From ce70a4ef4d6410d0a738a5440dd2b7d91c7e4822 Mon Sep 17 00:00:00 2001
From: sujithjay <sujith@logistimo.com>
Date: Tue, 21 Aug 2018 13:50:48 +0530
Subject: [PATCH 0001/1072] [SPARK-24985][SQL] Fix OOM in Full Outer Join in
 presence of data skew.

Change SortMergeJoinExec to use ExternalAppendOnlyUnsafeRowArray for Full Outer Join. This would spill data into disk if the buffered rows exceed beyond a threshold, thus preventing OOM errors.

Change corresponding test case in JoinSuite.
---
 .../execution/joins/SortMergeJoinExec.scala   | 62 +++++++++++--------
 .../org/apache/spark/sql/JoinSuite.scala      |  4 +-
 2 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index f4b9d132122e..48584712a226 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -248,7 +248,9 @@ case class SortMergeJoinExec(
             rightIter = RowIterator.fromScala(rightIter),
             boundCondition,
             leftNullRow,
-            rightNullRow)
+            rightNullRow,
+            inMemoryThreshold,
+            spillThreshold)
 
           new FullOuterIterator(
             smjScanner,
@@ -966,7 +968,9 @@ private class SortMergeFullOuterJoinScanner(
     rightIter: RowIterator,
     boundCondition: InternalRow => Boolean,
     leftNullRow: InternalRow,
-    rightNullRow: InternalRow)  {
+    rightNullRow: InternalRow,
+    inMemoryThreshold: Int,
+    spillThreshold: Int)  {
   private[this] val joinedRow: JoinedRow = new JoinedRow()
   private[this] var leftRow: InternalRow = _
   private[this] var leftRowKey: InternalRow = _
@@ -975,8 +979,10 @@ private class SortMergeFullOuterJoinScanner(
 
   private[this] var leftIndex: Int = 0
   private[this] var rightIndex: Int = 0
-  private[this] val leftMatches: ArrayBuffer[InternalRow] = new ArrayBuffer[InternalRow]
-  private[this] val rightMatches: ArrayBuffer[InternalRow] = new ArrayBuffer[InternalRow]
+  private[this] val leftMatches: ExternalAppendOnlyUnsafeRowArray =
+    new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
+  private[this] val rightMatches: ExternalAppendOnlyUnsafeRowArray =
+    new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
   private[this] var leftMatched: BitSet = new BitSet(1)
   private[this] var rightMatched: BitSet = new BitSet(1)
 
@@ -1028,23 +1034,23 @@ private class SortMergeFullOuterJoinScanner(
     rightIndex = 0
 
     while (leftRowKey != null && keyOrdering.compare(leftRowKey, matchingKey) == 0) {
-      leftMatches += leftRow.copy()
+      leftMatches.add(leftRow.copy().asInstanceOf[UnsafeRow])
       advancedLeft()
     }
     while (rightRowKey != null && keyOrdering.compare(rightRowKey, matchingKey) == 0) {
-      rightMatches += rightRow.copy()
+      rightMatches.add(rightRow.copy().asInstanceOf[UnsafeRow])
       advancedRight()
     }
 
-    if (leftMatches.size <= leftMatched.capacity) {
-      leftMatched.clearUntil(leftMatches.size)
+    if (leftMatches.length <= leftMatched.capacity) {
+      leftMatched.clearUntil(leftMatches.length)
     } else {
-      leftMatched = new BitSet(leftMatches.size)
+      leftMatched = new BitSet(leftMatches.length)
     }
-    if (rightMatches.size <= rightMatched.capacity) {
-      rightMatched.clearUntil(rightMatches.size)
+    if (rightMatches.length <= rightMatched.capacity) {
+      rightMatched.clearUntil(rightMatches.length)
     } else {
-      rightMatched = new BitSet(rightMatches.size)
+      rightMatched = new BitSet(rightMatches.length)
     }
   }
 
@@ -1058,31 +1064,37 @@ private class SortMergeFullOuterJoinScanner(
    * @return true if a valid match is found, false otherwise.
    */
   private def scanNextInBuffered(): Boolean = {
-    while (leftIndex < leftMatches.size) {
-      while (rightIndex < rightMatches.size) {
-        joinedRow(leftMatches(leftIndex), rightMatches(rightIndex))
-        if (boundCondition(joinedRow)) {
-          leftMatched.set(leftIndex)
-          rightMatched.set(rightIndex)
+    val leftMatchesIterator = leftMatches.generateIterator(leftIndex)
+
+    while (leftMatchesIterator.hasNext) {
+      val leftCurRow = leftMatchesIterator.next()
+      val rightMatchesIterator = rightMatches.generateIterator(rightIndex)
+      while (rightMatchesIterator.hasNext) {
+          joinedRow(leftCurRow, rightMatchesIterator.next())
+          if (boundCondition(joinedRow)) {
+            leftMatched.set(leftIndex)
+            rightMatched.set(rightIndex)
+            rightIndex += 1
+            return true
+          }
           rightIndex += 1
-          return true
-        }
-        rightIndex += 1
       }
       rightIndex = 0
       if (!leftMatched.get(leftIndex)) {
         // the left row has never matched any right row, join it with null row
-        joinedRow(leftMatches(leftIndex), rightNullRow)
+        joinedRow(leftCurRow, rightNullRow)
         leftIndex += 1
         return true
       }
       leftIndex += 1
     }
 
-    while (rightIndex < rightMatches.size) {
+    val rightMatchesIterator = rightMatches.generateIterator(rightIndex)
+    while (rightMatchesIterator.hasNext) {
+      val rightCurRow = rightMatchesIterator.next()
       if (!rightMatched.get(rightIndex)) {
         // the right row has never matched any left row, join it with null row
-        joinedRow(leftNullRow, rightMatches(rightIndex))
+        joinedRow(leftNullRow, rightCurRow)
         rightIndex += 1
         return true
       }
@@ -1099,7 +1111,7 @@ private class SortMergeFullOuterJoinScanner(
 
   def advanceNext(): Boolean = {
     // If we already buffered some matching rows, use them directly
-    if (leftIndex <= leftMatches.size || rightIndex <= rightMatches.size) {
+    if (leftIndex <= leftMatches.length || rightIndex <= rightMatches.length) {
       if (scanNextInBuffered()) {
         return true
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 44767dfc9249..7535fb5a4a32 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -759,9 +759,7 @@ class JoinSuite extends QueryTest with SharedSQLContext {
         )
       }
 
-      // FULL OUTER JOIN still does not use [[ExternalAppendOnlyUnsafeRowArray]]
-      // so should not cause any spill
-      assertNotSpilled(sparkContext, "full outer join") {
+      assertSpilled(sparkContext, "full outer join") {
         checkAnswer(
           sql(
             """

From d1ae0a40bd5ffdb65afd684a32370e8ab5217bf0 Mon Sep 17 00:00:00 2001
From: sujithjay <sujith@logistimo.com>
Date: Tue, 21 Aug 2018 22:29:33 +0530
Subject: [PATCH 0002/1072] [SPARK-24985][SQL] Clean up unused imports

---
 .../apache/spark/sql/execution/joins/SortMergeJoinExec.scala    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 48584712a226..c7c82681d741 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.joins
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._

From 121b3745b0fd22efab4825926389c54d841f08c3 Mon Sep 17 00:00:00 2001
From: sujithjay <sujith@logistimo.com>
Date: Tue, 21 Aug 2018 22:35:51 +0530
Subject: [PATCH 0003/1072] [SPARK-24985][SQL] Remove `copy()` on row instance
 before adding to `ExternalAppendOnlyUnsafeRowArray`.

`ExternalAppendOnlyUnsafeRowArray` handles copy internally.
---
 .../apache/spark/sql/execution/joins/SortMergeJoinExec.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index c7c82681d741..ca77fe98d6ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -1032,11 +1032,11 @@ private class SortMergeFullOuterJoinScanner(
     rightIndex = 0
 
     while (leftRowKey != null && keyOrdering.compare(leftRowKey, matchingKey) == 0) {
-      leftMatches.add(leftRow.copy().asInstanceOf[UnsafeRow])
+      leftMatches.add(leftRow.asInstanceOf[UnsafeRow])
       advancedLeft()
     }
     while (rightRowKey != null && keyOrdering.compare(rightRowKey, matchingKey) == 0) {
-      rightMatches.add(rightRow.copy().asInstanceOf[UnsafeRow])
+      rightMatches.add(rightRow.asInstanceOf[UnsafeRow])
       advancedRight()
     }
 

From 09a91d98bdecb86ecad4647b7ef5fb3f69bdc671 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 28 Nov 2018 16:21:42 +0800
Subject: [PATCH 0004/1072] [SPARK-26021][SQL][FOLLOWUP] add test for special
 floating point values

## What changes were proposed in this pull request?

a followup of https://github.com/apache/spark/pull/23043 . Add a test to show the minor behavior change introduced by #23043 , and add migration guide.

## How was this patch tested?

a new test

Closes #23141 from cloud-fan/follow.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/unsafe/PlatformUtilSuite.java       | 12 +++++---
 docs/sql-migration-guide-upgrade.md           |  6 ++--
 .../catalyst/expressions/UnsafeArrayData.java |  6 ----
 .../spark/sql/DatasetPrimitiveSuite.scala     | 29 +++++++++++++++++++
 .../org/apache/spark/sql/QueryTest.scala      |  7 +++++
 5 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
index ab34324eb54c..2474081dad5c 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -165,10 +165,14 @@ public void writeMinusZeroIsReplacedWithZero() {
     byte[] floatBytes = new byte[Float.BYTES];
     Platform.putDouble(doubleBytes, Platform.BYTE_ARRAY_OFFSET, -0.0d);
     Platform.putFloat(floatBytes, Platform.BYTE_ARRAY_OFFSET, -0.0f);
-    double doubleFromPlatform = Platform.getDouble(doubleBytes, Platform.BYTE_ARRAY_OFFSET);
-    float floatFromPlatform = Platform.getFloat(floatBytes, Platform.BYTE_ARRAY_OFFSET);
 
-    Assert.assertEquals(Double.doubleToLongBits(0.0d), Double.doubleToLongBits(doubleFromPlatform));
-    Assert.assertEquals(Float.floatToIntBits(0.0f), Float.floatToIntBits(floatFromPlatform));
+    byte[] doubleBytes2 = new byte[Double.BYTES];
+    byte[] floatBytes2 = new byte[Float.BYTES];
+    Platform.putDouble(doubleBytes, Platform.BYTE_ARRAY_OFFSET, 0.0d);
+    Platform.putFloat(floatBytes, Platform.BYTE_ARRAY_OFFSET, 0.0f);
+
+    // Make sure the bytes we write from 0.0 and -0.0 are same.
+    Assert.assertArrayEquals(doubleBytes, doubleBytes2);
+    Assert.assertArrayEquals(floatBytes, floatBytes2);
   }
 }
diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 68cb8f5a0d18..25cd54119091 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -17,14 +17,16 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, the `from_json` functions supports two modes - `PERMISSIVE` and `FAILFAST`. The modes can be set via the `mode` option. The default mode became `PERMISSIVE`. In previous versions, behavior of `from_json` did not conform to either `PERMISSIVE` nor `FAILFAST`, especially in processing of malformed JSON records. For example, the JSON string `{"a" 1}` with the schema `a INT` is converted to `null` by previous versions but Spark 3.0 converts it to `Row(null)`.
 
-  - In Spark version 2.4 and earlier, the `from_json` function produces `null`s for JSON strings and JSON datasource skips the same independetly of its mode if there is no valid root JSON token in its input (` ` for example). Since Spark 3.0, such input is treated as a bad record and handled according to specified mode. For example, in the `PERMISSIVE` mode the ` ` input is converted to `Row(null, null)` if specified schema is `key STRING, value INT`. 
+  - In Spark version 2.4 and earlier, the `from_json` function produces `null`s for JSON strings and JSON datasource skips the same independetly of its mode if there is no valid root JSON token in its input (` ` for example). Since Spark 3.0, such input is treated as a bad record and handled according to specified mode. For example, in the `PERMISSIVE` mode the ` ` input is converted to `Row(null, null)` if specified schema is `key STRING, value INT`.
 
   - The `ADD JAR` command previously returned a result set with the single value 0. It now returns an empty result set.
 
   - In Spark version 2.4 and earlier, users can create map values with map type key via built-in function like `CreateMap`, `MapFromArrays`, etc. Since Spark 3.0, it's not allowed to create map values with map type key with these built-in functions. Users can still read map values with map type key from data source or Java/Scala collections, though they are not very useful.
-  
+
   - In Spark version 2.4 and earlier, `Dataset.groupByKey` results to a grouped dataset with key attribute wrongly named as "value", if the key is non-struct type, e.g. int, string, array, etc. This is counterintuitive and makes the schema of aggregation queries weird. For example, the schema of `ds.groupByKey(...).count()` is `(value, count)`. Since Spark 3.0, we name the grouping attribute to "key". The old behaviour is preserved under a newly added configuration `spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue` with a default value of `false`.
 
+  - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but users can still distinguish them via `Dataset.show`, `Dataset.collect` etc. Since Spark 3.0, float/double -0.0 is replaced by 0.0 internally, and users can't distinguish them any more.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index 9002abdcfd47..d5f679fe23d4 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -334,17 +334,11 @@ public void setLong(int ordinal, long value) {
   }
 
   public void setFloat(int ordinal, float value) {
-    if (Float.isNaN(value)) {
-      value = Float.NaN;
-    }
     assertIndexIsValid(ordinal);
     Platform.putFloat(baseObject, getElementOffset(ordinal, 4), value);
   }
 
   public void setDouble(int ordinal, double value) {
-    if (Double.isNaN(value)) {
-      value = Double.NaN;
-    }
     assertIndexIsValid(ordinal);
     Platform.putDouble(baseObject, getElementOffset(ordinal, 8), value);
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index 96a6792f52f3..0ded5d8ce1e2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -393,4 +393,33 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSQLContext {
     val ds = spark.createDataset(data)
     checkDataset(ds, data: _*)
   }
+
+  test("special floating point values") {
+    import org.scalatest.exceptions.TestFailedException
+
+    // Spark treats -0.0 as 0.0
+    intercept[TestFailedException] {
+      checkDataset(Seq(-0.0d).toDS(), -0.0d)
+    }
+    intercept[TestFailedException] {
+      checkDataset(Seq(-0.0f).toDS(), -0.0f)
+    }
+    intercept[TestFailedException] {
+      checkDataset(Seq(Tuple1(-0.0)).toDS(), Tuple1(-0.0))
+    }
+
+    val floats = Seq[Float](-0.0f, 0.0f, Float.NaN).toDS()
+    checkDataset(floats, 0.0f, 0.0f, Float.NaN)
+
+    val doubles = Seq[Double](-0.0d, 0.0d, Double.NaN).toDS()
+    checkDataset(doubles, 0.0, 0.0, Double.NaN)
+
+    checkDataset(Seq(Tuple1(Float.NaN)).toDS(), Tuple1(Float.NaN))
+    checkDataset(Seq(Tuple1(-0.0f)).toDS(), Tuple1(0.0f))
+    checkDataset(Seq(Tuple1(Double.NaN)).toDS(), Tuple1(Double.NaN))
+    checkDataset(Seq(Tuple1(-0.0)).toDS(), Tuple1(0.0))
+
+    val complex = Map(Array(Seq(Tuple1(Double.NaN))) -> Map(Tuple2(Float.NaN, null)))
+    checkDataset(Seq(complex).toDS(), complex)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 8ba67239fb90..a547676c5ed5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -132,6 +132,13 @@ abstract class QueryTest extends PlanTest {
       a.length == b.length && a.zip(b).forall { case (l, r) => compare(l, r)}
     case (a: Iterable[_], b: Iterable[_]) =>
       a.size == b.size && a.zip(b).forall { case (l, r) => compare(l, r)}
+    case (a: Product, b: Product) =>
+      compare(a.productIterator.toSeq, b.productIterator.toSeq)
+    // 0.0 == -0.0, turn float/double to binary before comparison, to distinguish 0.0 and -0.0.
+    case (a: Double, b: Double) =>
+      java.lang.Double.doubleToRawLongBits(a) == java.lang.Double.doubleToRawLongBits(b)
+    case (a: Float, b: Float) =>
+      java.lang.Float.floatToRawIntBits(a) == java.lang.Float.floatToRawIntBits(b)
     case (a, b) => a == b
   }
 

From 93112e693082f3fba24cebaf9a98dcf5c1eb84af Mon Sep 17 00:00:00 2001
From: Yuanjian Li <xyliyuanjian@gmail.com>
Date: Wed, 28 Nov 2018 20:18:13 +0800
Subject: [PATCH 0005/1072] [SPARK-26142][SQL] Implement shuffle read metrics
 in SQL

## What changes were proposed in this pull request?

Implement `SQLShuffleMetricsReporter` on the sql side as the customized ShuffleMetricsReporter, which extended the `TempShuffleReadMetrics` and update SQLMetrics, in this way shuffle metrics can be reported in the SQL UI.

## How was this patch tested?

Add UT in SQLMetricsSuite.
Manual test locally, before:
![image](https://user-images.githubusercontent.com/4833765/48960517-30f97880-efa8-11e8-982c-92d05938fd1d.png)
after:
![image](https://user-images.githubusercontent.com/4833765/48960587-b54bfb80-efa8-11e8-8e95-7a3c8c74cc5c.png)

Closes #23128 from xuanyuanking/SPARK-26142.

Lead-authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Co-authored-by: liyuanjian <liyuanjian@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/ShuffledRowRDD.scala  |  9 ++-
 .../exchange/ShuffleExchangeExec.scala        |  5 +-
 .../apache/spark/sql/execution/limit.scala    | 10 ++-
 .../sql/execution/metric/SQLMetrics.scala     | 20 ++++++
 .../metric/SQLShuffleMetricsReporter.scala    | 67 +++++++++++++++++++
 .../execution/UnsafeRowSerializerSuite.scala  |  5 +-
 .../execution/metric/SQLMetricsSuite.scala    | 21 ++++--
 7 files changed, 126 insertions(+), 11 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
index 542266bc1ae0..9b05faaed045 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
@@ -22,6 +22,7 @@ import java.util.Arrays
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLShuffleMetricsReporter}
 
 /**
  * The [[Partition]] used by [[ShuffledRowRDD]]. A post-shuffle partition
@@ -112,6 +113,7 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A
  */
 class ShuffledRowRDD(
     var dependency: ShuffleDependency[Int, InternalRow, InternalRow],
+    metrics: Map[String, SQLMetric],
     specifiedPartitionStartIndices: Option[Array[Int]] = None)
   extends RDD[InternalRow](dependency.rdd.context, Nil) {
 
@@ -154,7 +156,10 @@ class ShuffledRowRDD(
 
   override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
     val shuffledRowPartition = split.asInstanceOf[ShuffledRowRDDPartition]
-    val metrics = context.taskMetrics().createTempShuffleReadMetrics()
+    val tempMetrics = context.taskMetrics().createTempShuffleReadMetrics()
+    // `SQLShuffleMetricsReporter` will update its own metrics for SQL exchange operator,
+    // as well as the `tempMetrics` for basic shuffle metrics.
+    val sqlMetricsReporter = new SQLShuffleMetricsReporter(tempMetrics, metrics)
     // The range of pre-shuffle partitions that we are fetching at here is
     // [startPreShufflePartitionIndex, endPreShufflePartitionIndex - 1].
     val reader =
@@ -163,7 +168,7 @@ class ShuffledRowRDD(
         shuffledRowPartition.startPreShufflePartitionIndex,
         shuffledRowPartition.endPreShufflePartitionIndex,
         context,
-        metrics)
+        sqlMetricsReporter)
     reader.read().asInstanceOf[Iterator[Product2[Int, InternalRow]]].map(_._2)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index d6742ab3e0f3..8938d93da90e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -48,7 +48,8 @@ case class ShuffleExchangeExec(
   //       e.g. it can be null on the Executor side
 
   override lazy val metrics = Map(
-    "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"))
+    "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size")
+  ) ++ SQLMetrics.getShuffleReadMetrics(sparkContext)
 
   override def nodeName: String = {
     val extraInfo = coordinator match {
@@ -108,7 +109,7 @@ case class ShuffleExchangeExec(
       assert(newPartitioning.isInstanceOf[HashPartitioning])
       newPartitioning = UnknownPartitioning(indices.length)
     }
-    new ShuffledRowRDD(shuffleDependency, specifiedPartitionStartIndices)
+    new ShuffledRowRDD(shuffleDependency, metrics, specifiedPartitionStartIndices)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 90dafcf53591..ea845da8438f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGe
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.execution.metric.SQLMetrics
 
 /**
  * Take the first `limit` elements and collect them to a single partition.
@@ -37,11 +38,13 @@ case class CollectLimitExec(limit: Int, child: SparkPlan) extends UnaryExecNode
   override def outputPartitioning: Partitioning = SinglePartition
   override def executeCollect(): Array[InternalRow] = child.executeTake(limit)
   private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
+  override lazy val metrics = SQLMetrics.getShuffleReadMetrics(sparkContext)
   protected override def doExecute(): RDD[InternalRow] = {
     val locallyLimited = child.execute().mapPartitionsInternal(_.take(limit))
     val shuffled = new ShuffledRowRDD(
       ShuffleExchangeExec.prepareShuffleDependency(
-        locallyLimited, child.output, SinglePartition, serializer))
+        locallyLimited, child.output, SinglePartition, serializer),
+      metrics)
     shuffled.mapPartitionsInternal(_.take(limit))
   }
 }
@@ -151,6 +154,8 @@ case class TakeOrderedAndProjectExec(
 
   private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
 
+  override lazy val metrics = SQLMetrics.getShuffleReadMetrics(sparkContext)
+
   protected override def doExecute(): RDD[InternalRow] = {
     val ord = new LazilyGeneratedOrdering(sortOrder, child.output)
     val localTopK: RDD[InternalRow] = {
@@ -160,7 +165,8 @@ case class TakeOrderedAndProjectExec(
     }
     val shuffled = new ShuffledRowRDD(
       ShuffleExchangeExec.prepareShuffleDependency(
-        localTopK, child.output, SinglePartition, serializer))
+        localTopK, child.output, SinglePartition, serializer),
+      metrics)
     shuffled.mapPartitions { iter =>
       val topK = org.apache.spark.util.collection.Utils.takeOrdered(iter.map(_.copy()), limit)(ord)
       if (projectList != child.output) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index cbf707f4a9cf..0b5ee3a5e057 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -82,6 +82,14 @@ object SQLMetrics {
 
   private val baseForAvgMetric: Int = 10
 
+  val REMOTE_BLOCKS_FETCHED = "remoteBlocksFetched"
+  val LOCAL_BLOCKS_FETCHED = "localBlocksFetched"
+  val REMOTE_BYTES_READ = "remoteBytesRead"
+  val REMOTE_BYTES_READ_TO_DISK = "remoteBytesReadToDisk"
+  val LOCAL_BYTES_READ = "localBytesRead"
+  val FETCH_WAIT_TIME = "fetchWaitTime"
+  val RECORDS_READ = "recordsRead"
+
   /**
    * Converts a double value to long value by multiplying a base integer, so we can store it in
    * `SQLMetrics`. It only works for average metrics. When showing the metrics on UI, we restore
@@ -194,4 +202,16 @@ object SQLMetrics {
         SparkListenerDriverAccumUpdates(executionId.toLong, metrics.map(m => m.id -> m.value)))
     }
   }
+
+  /**
+   * Create all shuffle read relative metrics and return the Map.
+   */
+  def getShuffleReadMetrics(sc: SparkContext): Map[String, SQLMetric] = Map(
+    REMOTE_BLOCKS_FETCHED -> createMetric(sc, "remote blocks fetched"),
+    LOCAL_BLOCKS_FETCHED -> createMetric(sc, "local blocks fetched"),
+    REMOTE_BYTES_READ -> createSizeMetric(sc, "remote bytes read"),
+    REMOTE_BYTES_READ_TO_DISK -> createSizeMetric(sc, "remote bytes read to disk"),
+    LOCAL_BYTES_READ -> createSizeMetric(sc, "local bytes read"),
+    FETCH_WAIT_TIME -> createTimingMetric(sc, "fetch wait time"),
+    RECORDS_READ -> createMetric(sc, "records read"))
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
new file mode 100644
index 000000000000..542141ea4b4e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.metric
+
+import org.apache.spark.executor.TempShuffleReadMetrics
+
+/**
+ * A shuffle metrics reporter for SQL exchange operators.
+ * @param tempMetrics [[TempShuffleReadMetrics]] created in TaskContext.
+ * @param metrics All metrics in current SparkPlan. This param should not empty and
+ *   contains all shuffle metrics defined in [[SQLMetrics.getShuffleReadMetrics]].
+ */
+private[spark] class SQLShuffleMetricsReporter(
+  tempMetrics: TempShuffleReadMetrics,
+  metrics: Map[String, SQLMetric]) extends TempShuffleReadMetrics {
+  private[this] val _remoteBlocksFetched = metrics(SQLMetrics.REMOTE_BLOCKS_FETCHED)
+  private[this] val _localBlocksFetched = metrics(SQLMetrics.LOCAL_BLOCKS_FETCHED)
+  private[this] val _remoteBytesRead = metrics(SQLMetrics.REMOTE_BYTES_READ)
+  private[this] val _remoteBytesReadToDisk = metrics(SQLMetrics.REMOTE_BYTES_READ_TO_DISK)
+  private[this] val _localBytesRead = metrics(SQLMetrics.LOCAL_BYTES_READ)
+  private[this] val _fetchWaitTime = metrics(SQLMetrics.FETCH_WAIT_TIME)
+  private[this] val _recordsRead = metrics(SQLMetrics.RECORDS_READ)
+
+  override def incRemoteBlocksFetched(v: Long): Unit = {
+    _remoteBlocksFetched.add(v)
+    tempMetrics.incRemoteBlocksFetched(v)
+  }
+  override def incLocalBlocksFetched(v: Long): Unit = {
+    _localBlocksFetched.add(v)
+    tempMetrics.incLocalBlocksFetched(v)
+  }
+  override def incRemoteBytesRead(v: Long): Unit = {
+    _remoteBytesRead.add(v)
+    tempMetrics.incRemoteBytesRead(v)
+  }
+  override def incRemoteBytesReadToDisk(v: Long): Unit = {
+    _remoteBytesReadToDisk.add(v)
+    tempMetrics.incRemoteBytesReadToDisk(v)
+  }
+  override def incLocalBytesRead(v: Long): Unit = {
+    _localBytesRead.add(v)
+    tempMetrics.incLocalBytesRead(v)
+  }
+  override def incFetchWaitTime(v: Long): Unit = {
+    _fetchWaitTime.add(v)
+    tempMetrics.incFetchWaitTime(v)
+  }
+  override def incRecordsRead(v: Long): Unit = {
+    _recordsRead.add(v)
+    tempMetrics.incRecordsRead(v)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index d305ce3e698a..96b3aa5ee75b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{LocalSparkSession, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.ShuffleBlockId
 import org.apache.spark.util.collection.ExternalSorter
@@ -137,7 +138,9 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
         rowsRDD,
         new PartitionIdPassthrough(2),
         new UnsafeRowSerializer(2))
-    val shuffled = new ShuffledRowRDD(dependency)
+    val shuffled = new ShuffledRowRDD(
+      dependency,
+      SQLMetrics.getShuffleReadMetrics(spark.sparkContext))
     shuffled.count()
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index b955c157a620..0f1d08b6af5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -94,8 +94,13 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
         "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"),
       Map("number of output rows" -> 1L,
         "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))
+    val shuffleExpected1 = Map(
+      "records read" -> 2L,
+      "local blocks fetched" -> 2L,
+      "remote blocks fetched" -> 0L)
     testSparkPlanMetrics(df, 1, Map(
       2L -> (("HashAggregate", expected1(0))),
+      1L -> (("Exchange", shuffleExpected1)),
       0L -> (("HashAggregate", expected1(1))))
     )
 
@@ -106,8 +111,13 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
         "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"),
       Map("number of output rows" -> 3L,
         "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))
+    val shuffleExpected2 = Map(
+      "records read" -> 4L,
+      "local blocks fetched" -> 4L,
+      "remote blocks fetched" -> 0L)
     testSparkPlanMetrics(df2, 1, Map(
       2L -> (("HashAggregate", expected2(0))),
+      1L -> (("Exchange", shuffleExpected2)),
       0L -> (("HashAggregate", expected2(1))))
     )
   }
@@ -191,7 +201,11 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       testSparkPlanMetrics(df, 1, Map(
         0L -> (("SortMergeJoin", Map(
           // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
-          "number of output rows" -> 4L))))
+          "number of output rows" -> 4L))),
+        2L -> (("Exchange", Map(
+          "records read" -> 4L,
+          "local blocks fetched" -> 2L,
+          "remote blocks fetched" -> 0L))))
       )
     }
   }
@@ -208,7 +222,7 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
         "SELECT * FROM testData2 left JOIN testDataForJoin ON testData2.a = testDataForJoin.a")
       testSparkPlanMetrics(df, 1, Map(
         0L -> (("SortMergeJoin", Map(
-          // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
+          // It's 8 because we read 6 rows in the left and 2 row in the right one
           "number of output rows" -> 8L))))
       )
 
@@ -216,7 +230,7 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
         "SELECT * FROM testDataForJoin right JOIN testData2 ON testData2.a = testDataForJoin.a")
       testSparkPlanMetrics(df2, 1, Map(
         0L -> (("SortMergeJoin", Map(
-          // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
+          // It's 8 because we read 6 rows in the left and 2 row in the right one
           "number of output rows" -> 8L))))
       )
     }
@@ -287,7 +301,6 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       // Assume the execution plan is
       // ... -> ShuffledHashJoin(nodeId = 1) -> Project(nodeId = 0)
       val df = df1.join(df2, "key")
-      val metrics = getSparkPlanMetrics(df, 1, Set(1L))
       testSparkPlanMetrics(df, 1, Map(
         1L -> (("ShuffledHashJoin", Map(
           "number of output rows" -> 2L,

From 438f8fd675d8f819373b6643dea3a77d954b6822 Mon Sep 17 00:00:00 2001
From: Sergey Zhemzhitsky <szhemzhitski@gmail.com>
Date: Wed, 28 Nov 2018 20:22:24 +0800
Subject: [PATCH 0006/1072] [SPARK-26114][CORE] ExternalSorter's
 readingIterator field leak

## What changes were proposed in this pull request?

This pull request fixes [SPARK-26114](https://issues.apache.org/jira/browse/SPARK-26114) issue that occurs when trying to reduce the number of partitions by means of coalesce without shuffling after shuffle-based transformations.

The leak occurs because of not cleaning up `ExternalSorter`'s `readingIterator` field as it's done for its `map` and `buffer` fields.
Additionally there are changes to the `CompletionIterator` to prevent capturing its `sub`-iterator and holding it even after the completion iterator completes. It is necessary because in some cases, e.g. in case of standard scala's `flatMap` iterator (which is used is `CoalescedRDD`'s `compute` method) the next value of the main iterator is assigned to `flatMap`'s `cur` field only after it is available.
For DAGs where ShuffledRDD is a parent of CoalescedRDD it means that the data should be fetched from the map-side of the shuffle, but the process of fetching this data consumes quite a lot of memory in addition to the memory already consumed by the iterator held by `flatMap`'s `cur` field (until it is reassigned).

For the following data
```scala
import org.apache.hadoop.io._
import org.apache.hadoop.io.compress._
import org.apache.commons.lang._
import org.apache.spark._

// generate 100M records of sample data
sc.makeRDD(1 to 1000, 1000)
  .flatMap(item => (1 to 100000)
    .map(i => new Text(RandomStringUtils.randomAlphanumeric(3).toLowerCase) -> new Text(RandomStringUtils.randomAlphanumeric(1024))))
  .saveAsSequenceFile("/tmp/random-strings", Some(classOf[GzipCodec]))
```

and the following job
```scala
import org.apache.hadoop.io._
import org.apache.spark._
import org.apache.spark.storage._

val rdd = sc.sequenceFile("/tmp/random-strings", classOf[Text], classOf[Text])
rdd
  .map(item => item._1.toString -> item._2.toString)
  .repartitionAndSortWithinPartitions(new HashPartitioner(1000))
  .coalesce(10,false)
  .count
```

... executed like the following
```bash
spark-shell \
  --num-executors=5 \
  --executor-cores=2 \
  --master=yarn \
  --deploy-mode=client \
  --conf spark.executor.memoryOverhead=512 \
  --conf spark.executor.memory=1g \
  --conf spark.dynamicAllocation.enabled=false \
  --conf spark.executor.extraJavaOptions='-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp -Dio.netty.noUnsafe=true'
```

... executors are always failing with OutOfMemoryErrors.

The main issue is multiple leaks of ExternalSorter references.
For example, in case of 2 tasks per executor it is expected to be 2 simultaneous instances of ExternalSorter per executor but heap dump generated on OutOfMemoryError shows that there are more ones.

![run1-noparams-dominator-tree-externalsorter](https://user-images.githubusercontent.com/1523889/48703665-782ce580-ec05-11e8-95a9-d6c94e8285ab.png)

P.S. This PR does not cover cases with CoGroupedRDDs which use ExternalAppendOnlyMap internally, which itself can lead to OutOfMemoryErrors in many places.

## How was this patch tested?

- Existing unit tests
- New unit tests
- Job executions on the live environment

Here is the screenshot before applying this patch
![run3-noparams-failure-ui-5x2-repartition-and-sort](https://user-images.githubusercontent.com/1523889/48700395-f769eb80-ebfc-11e8-831b-e94c757d416c.png)

Here is the screenshot after applying this patch
![run3-noparams-success-ui-5x2-repartition-and-sort](https://user-images.githubusercontent.com/1523889/48700610-7a8b4180-ebfd-11e8-9761-baaf38a58e66.png)
And in case of reducing the number of executors even more the job is still stable
![run3-noparams-success-ui-2x2-repartition-and-sort](https://user-images.githubusercontent.com/1523889/48700619-82e37c80-ebfd-11e8-98ed-a38e1f1f1fd9.png)

Closes #23083 from szhem/SPARK-26114-externalsorter-leak.

Authored-by: Sergey Zhemzhitsky <szhemzhitski@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/util/CompletionIterator.scala       |  7 ++++--
 .../util/collection/ExternalSorter.scala      |  3 ++-
 .../spark/util/CompletionIteratorSuite.scala  | 22 +++++++++++++++++++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
index 21acaa95c564..f4d6c7a28d2e 100644
--- a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
@@ -25,11 +25,14 @@ private[spark]
 abstract class CompletionIterator[ +A, +I <: Iterator[A]](sub: I) extends Iterator[A] {
 
   private[this] var completed = false
-  def next(): A = sub.next()
+  private[this] var iter = sub
+  def next(): A = iter.next()
   def hasNext: Boolean = {
-    val r = sub.hasNext
+    val r = iter.hasNext
     if (!r && !completed) {
       completed = true
+      // reassign to release resources of highly resource consuming iterators early
+      iter = Iterator.empty.asInstanceOf[I]
       completion()
     }
     r
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index eac3db01158d..46279e79d78d 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -727,9 +727,10 @@ private[spark] class ExternalSorter[K, V, C](
     spills.clear()
     forceSpillFiles.foreach(s => s.file.delete())
     forceSpillFiles.clear()
-    if (map != null || buffer != null) {
+    if (map != null || buffer != null || readingIterator != null) {
       map = null // So that the memory can be garbage-collected
       buffer = null // So that the memory can be garbage-collected
+      readingIterator = null // So that the memory can be garbage-collected
       releaseMemory()
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala b/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala
index 688fcd9f9aab..29421f7aa9e3 100644
--- a/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.util
 
+import java.lang.ref.PhantomReference
+import java.lang.ref.ReferenceQueue
+
 import org.apache.spark.SparkFunSuite
 
 class CompletionIteratorSuite extends SparkFunSuite {
@@ -44,4 +47,23 @@ class CompletionIteratorSuite extends SparkFunSuite {
     assert(!completionIter.hasNext)
     assert(numTimesCompleted === 1)
   }
+  test("reference to sub iterator should not be available after completion") {
+    var sub = Iterator(1, 2, 3)
+
+    val refQueue = new ReferenceQueue[Iterator[Int]]
+    val ref = new PhantomReference[Iterator[Int]](sub, refQueue)
+
+    val iter = CompletionIterator[Int, Iterator[Int]](sub, {})
+    sub = null
+    iter.toArray
+
+    for (_ <- 1 to 100 if !ref.isEnqueued) {
+      System.gc()
+      if (!ref.isEnqueued) {
+        Thread.sleep(10)
+      }
+    }
+    assert(ref.isEnqueued)
+    assert(refQueue.poll() === ref)
+  }
 }

From affe80958d366f399466a9dba8e03da7f3b7b9bf Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 28 Nov 2018 20:38:42 +0800
Subject: [PATCH 0007/1072] [SPARK-26147][SQL] only pull out unevaluable python
 udf from join condition

## What changes were proposed in this pull request?

https://github.com/apache/spark/pull/22326 made a mistake that, not all python UDFs are unevaluable in join condition. Only python UDFs that refer to attributes from both join side are unevaluable.

This PR fixes this mistake.

## How was this patch tested?

a new test

Closes #23153 from cloud-fan/join.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/sql/tests/test_udf.py          |  12 ++
 .../spark/sql/catalyst/optimizer/joins.scala  |  22 ++--
 ...PullOutPythonUDFInJoinConditionSuite.scala | 120 ++++++++++++------
 3 files changed, 106 insertions(+), 48 deletions(-)

diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index d2dfb52f5447..ed298f724d55 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -209,6 +209,18 @@ def test_udf_in_join_condition(self):
         with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
             self.assertEqual(df.collect(), [Row(a=1, b=1)])
 
+    def test_udf_in_left_outer_join_condition(self):
+        # regression test for SPARK-26147
+        from pyspark.sql.functions import udf, col
+        left = self.spark.createDataFrame([Row(a=1)])
+        right = self.spark.createDataFrame([Row(b=1)])
+        f = udf(lambda a: str(a), StringType())
+        # The join condition can't be pushed down, as it refers to attributes from both sides.
+        # The Python UDF only refer to attributes from one side, so it's evaluable.
+        df = left.join(right, f("a") == col("b").cast("string"), how="left_outer")
+        with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
+            self.assertEqual(df.collect(), [Row(a=1, b=1)])
+
     def test_udf_in_left_semi_join_condition(self):
         # regression test for SPARK-25314
         from pyspark.sql.functions import udf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index 7149edee0173..6ebb194d71c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -155,19 +155,20 @@ object EliminateOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 /**
- * PythonUDF in join condition can not be evaluated, this rule will detect the PythonUDF
- * and pull them out from join condition. For python udf accessing attributes from only one side,
- * they are pushed down by operation push down rules. If not (e.g. user disables filter push
- * down rules), we need to pull them out in this rule too.
+ * PythonUDF in join condition can't be evaluated if it refers to attributes from both join sides.
+ * See `ExtractPythonUDFs` for details. This rule will detect un-evaluable PythonUDF and pull them
+ * out from join condition.
  */
 object PullOutPythonUDFInJoinCondition extends Rule[LogicalPlan] with PredicateHelper {
-  def hasPythonUDF(expression: Expression): Boolean = {
-    expression.collectFirst { case udf: PythonUDF => udf }.isDefined
+
+  private def hasUnevaluablePythonUDF(expr: Expression, j: Join): Boolean = {
+    expr.find { e =>
+      PythonUDF.isScalarPythonUDF(e) && !canEvaluate(e, j.left) && !canEvaluate(e, j.right)
+    }.isDefined
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-    case j @ Join(_, _, joinType, condition)
-        if condition.isDefined && hasPythonUDF(condition.get) =>
+    case j @ Join(_, _, joinType, Some(cond)) if hasUnevaluablePythonUDF(cond, j) =>
       if (!joinType.isInstanceOf[InnerLike] && joinType != LeftSemi) {
         // The current strategy only support InnerLike and LeftSemi join because for other type,
         // it breaks SQL semantic if we run the join condition as a filter after join. If we pass
@@ -179,10 +180,9 @@ object PullOutPythonUDFInJoinCondition extends Rule[LogicalPlan] with PredicateH
       }
       // If condition expression contains python udf, it will be moved out from
       // the new join conditions.
-      val (udf, rest) =
-        splitConjunctivePredicates(condition.get).partition(hasPythonUDF)
+      val (udf, rest) = splitConjunctivePredicates(cond).partition(hasUnevaluablePythonUDF(_, j))
       val newCondition = if (rest.isEmpty) {
-        logWarning(s"The join condition:$condition of the join plan contains PythonUDF only," +
+        logWarning(s"The join condition:$cond of the join plan contains PythonUDF only," +
           s" it will be moved out and the join plan will be turned to cross join.")
         None
       } else {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala
index d3867f2b6bd0..3f1c91df7f2e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.scalatest.Matchers._
-
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -28,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.internal.SQLConf._
-import org.apache.spark.sql.types.BooleanType
+import org.apache.spark.sql.types.{BooleanType, IntegerType}
 
 class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
 
@@ -40,13 +38,29 @@ class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
         CheckCartesianProducts) :: Nil
   }
 
-  val testRelationLeft = LocalRelation('a.int, 'b.int)
-  val testRelationRight = LocalRelation('c.int, 'd.int)
+  val attrA = 'a.int
+  val attrB = 'b.int
+  val attrC = 'c.int
+  val attrD = 'd.int
+
+  val testRelationLeft = LocalRelation(attrA, attrB)
+  val testRelationRight = LocalRelation(attrC, attrD)
+
+  // This join condition refers to attributes from 2 tables, but the PythonUDF inside it only
+  // refer to attributes from one side.
+  val evaluableJoinCond = {
+    val pythonUDF = PythonUDF("evaluable", null,
+      IntegerType,
+      Seq(attrA),
+      PythonEvalType.SQL_BATCHED_UDF,
+      udfDeterministic = true)
+    pythonUDF === attrC
+  }
 
-  // Dummy python UDF for testing. Unable to execute.
-  val pythonUDF = PythonUDF("pythonUDF", null,
+  // This join condition is a PythonUDF which refers to attributes from 2 tables.
+  val unevaluableJoinCond = PythonUDF("unevaluable", null,
     BooleanType,
-    Seq.empty,
+    Seq(attrA, attrC),
     PythonEvalType.SQL_BATCHED_UDF,
     udfDeterministic = true)
 
@@ -66,62 +80,76 @@ class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
     }
   }
 
-  test("inner join condition with python udf only") {
-    val query = testRelationLeft.join(
+  test("inner join condition with python udf") {
+    val query1 = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some(pythonUDF))
-    val expected = testRelationLeft.join(
+      condition = Some(unevaluableJoinCond))
+    val expected1 = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = None).where(pythonUDF).analyze
-    comparePlanWithCrossJoinEnable(query, expected)
+      condition = None).where(unevaluableJoinCond).analyze
+    comparePlanWithCrossJoinEnable(query1, expected1)
+
+    // evaluable PythonUDF will not be touched
+    val query2 = testRelationLeft.join(
+      testRelationRight,
+      joinType = Inner,
+      condition = Some(evaluableJoinCond))
+    comparePlans(Optimize.execute(query2), query2)
   }
 
-  test("left semi join condition with python udf only") {
-    val query = testRelationLeft.join(
+  test("left semi join condition with python udf") {
+    val query1 = testRelationLeft.join(
       testRelationRight,
       joinType = LeftSemi,
-      condition = Some(pythonUDF))
-    val expected = testRelationLeft.join(
+      condition = Some(unevaluableJoinCond))
+    val expected1 = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = None).where(pythonUDF).select('a, 'b).analyze
-    comparePlanWithCrossJoinEnable(query, expected)
+      condition = None).where(unevaluableJoinCond).select('a, 'b).analyze
+    comparePlanWithCrossJoinEnable(query1, expected1)
+
+    // evaluable PythonUDF will not be touched
+    val query2 = testRelationLeft.join(
+      testRelationRight,
+      joinType = LeftSemi,
+      condition = Some(evaluableJoinCond))
+    comparePlans(Optimize.execute(query2), query2)
   }
 
-  test("python udf and common condition") {
+  test("unevaluable python udf and common condition") {
     val query = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some(pythonUDF && 'a.attr === 'c.attr))
+      condition = Some(unevaluableJoinCond && 'a.attr === 'c.attr))
     val expected = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some('a.attr === 'c.attr)).where(pythonUDF).analyze
+      condition = Some('a.attr === 'c.attr)).where(unevaluableJoinCond).analyze
     val optimized = Optimize.execute(query.analyze)
     comparePlans(optimized, expected)
   }
 
-  test("python udf or common condition") {
+  test("unevaluable python udf or common condition") {
     val query = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some(pythonUDF || 'a.attr === 'c.attr))
+      condition = Some(unevaluableJoinCond || 'a.attr === 'c.attr))
     val expected = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = None).where(pythonUDF || 'a.attr === 'c.attr).analyze
+      condition = None).where(unevaluableJoinCond || 'a.attr === 'c.attr).analyze
     comparePlanWithCrossJoinEnable(query, expected)
   }
 
-  test("pull out whole complex condition with multiple python udf") {
+  test("pull out whole complex condition with multiple unevaluable python udf") {
     val pythonUDF1 = PythonUDF("pythonUDF1", null,
       BooleanType,
-      Seq.empty,
+      Seq(attrA, attrC),
       PythonEvalType.SQL_BATCHED_UDF,
       udfDeterministic = true)
-    val condition = (pythonUDF || 'a.attr === 'c.attr) && pythonUDF1
+    val condition = (unevaluableJoinCond || 'a.attr === 'c.attr) && pythonUDF1
 
     val query = testRelationLeft.join(
       testRelationRight,
@@ -134,13 +162,13 @@ class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
     comparePlanWithCrossJoinEnable(query, expected)
   }
 
-  test("partial pull out complex condition with multiple python udf") {
+  test("partial pull out complex condition with multiple unevaluable python udf") {
     val pythonUDF1 = PythonUDF("pythonUDF1", null,
       BooleanType,
-      Seq.empty,
+      Seq(attrA, attrC),
       PythonEvalType.SQL_BATCHED_UDF,
       udfDeterministic = true)
-    val condition = (pythonUDF || pythonUDF1) && 'a.attr === 'c.attr
+    val condition = (unevaluableJoinCond || pythonUDF1) && 'a.attr === 'c.attr
 
     val query = testRelationLeft.join(
       testRelationRight,
@@ -149,23 +177,41 @@ class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
     val expected = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some('a.attr === 'c.attr)).where(pythonUDF || pythonUDF1).analyze
+      condition = Some('a.attr === 'c.attr)).where(unevaluableJoinCond || pythonUDF1).analyze
+    val optimized = Optimize.execute(query.analyze)
+    comparePlans(optimized, expected)
+  }
+
+  test("pull out unevaluable python udf when it's mixed with evaluable one") {
+    val query = testRelationLeft.join(
+      testRelationRight,
+      joinType = Inner,
+      condition = Some(evaluableJoinCond && unevaluableJoinCond))
+    val expected = testRelationLeft.join(
+      testRelationRight,
+      joinType = Inner,
+      condition = Some(evaluableJoinCond)).where(unevaluableJoinCond).analyze
     val optimized = Optimize.execute(query.analyze)
     comparePlans(optimized, expected)
   }
 
   test("throw an exception for not support join type") {
     for (joinType <- unsupportedJoinTypes) {
-      val thrownException = the [AnalysisException] thrownBy {
+      val e = intercept[AnalysisException] {
         val query = testRelationLeft.join(
           testRelationRight,
           joinType,
-          condition = Some(pythonUDF))
+          condition = Some(unevaluableJoinCond))
         Optimize.execute(query.analyze)
       }
-      assert(thrownException.message.contentEquals(
+      assert(e.message.contentEquals(
         s"Using PythonUDF in join condition of join type $joinType is not supported."))
+
+      val query2 = testRelationLeft.join(
+        testRelationRight,
+        joinType,
+        condition = Some(evaluableJoinCond))
+      comparePlans(Optimize.execute(query2), query2)
     }
   }
 }
-

From ce61bac1d84f8577b180400e44bd9bf22292e0b6 Mon Sep 17 00:00:00 2001
From: Mark Pavey <markpavey@exabre.co.uk>
Date: Wed, 28 Nov 2018 07:19:47 -0800
Subject: [PATCH 0008/1072] =?UTF-8?q?[SPARK-26137][CORE]=20Use=20Java=20sy?=
 =?UTF-8?q?stem=20property=20"file.separator"=20inste=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

… of hard coded "/" in DependencyUtils

## What changes were proposed in this pull request?

Use Java system property "file.separator" instead of hard coded "/" in DependencyUtils.

## How was this patch tested?

Manual test:
Submit Spark application via REST API that reads data from Elasticsearch using spark-elasticsearch library.

Without fix application fails with error:
18/11/22 10:36:20 ERROR Version: Multiple ES-Hadoop versions detected in the classpath; please use only one
jar:file:/C:/<...>/spark-2.4.0-bin-hadoop2.6/work/driver-20181122103610-0001/myApp-assembly-1.0.jar
jar:file:/C:/<...>/myApp-assembly-1.0.jar

18/11/22 10:36:20 ERROR Main: Application [MyApp] failed:
java.lang.Error: Multiple ES-Hadoop versions detected in the classpath; please use only one
jar:file:/C:/<...>/spark-2.4.0-bin-hadoop2.6/work/driver-20181122103610-0001/myApp-assembly-1.0.jar
jar:file:/C:/<...>/myApp-assembly-1.0.jar

	at org.elasticsearch.hadoop.util.Version.<clinit>(Version.java:73)
	at org.elasticsearch.hadoop.rest.RestService.findPartitions(RestService.java:214)
	at org.elasticsearch.spark.rdd.AbstractEsRDD.esPartitions$lzycompute(AbstractEsRDD.scala:73)
	at org.elasticsearch.spark.rdd.AbstractEsRDD.esPartitions(AbstractEsRDD.scala:72)
	at org.elasticsearch.spark.rdd.AbstractEsRDD.getPartitions(AbstractEsRDD.scala:44)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
	at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:945)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:944)
	...
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.spark.deploy.worker.DriverWrapper$.main(DriverWrapper.scala:65)
	at org.apache.spark.deploy.worker.DriverWrapper.main(DriverWrapper.scala)

With fix application runs successfully.

Closes #23102 from markpavey/JIRA_SPARK-26137_DependencyUtilsFileSeparatorFix.

Authored-by: Mark Pavey <markpavey@exabre.co.uk>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/deploy/DependencyUtils.scala |  3 ++-
 .../spark/deploy/SparkSubmitSuite.scala       | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
index 178bdcfccb60..5a17a6b6e169 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
@@ -61,11 +61,12 @@ private[deploy] object DependencyUtils extends Logging {
       hadoopConf: Configuration,
       secMgr: SecurityManager): String = {
     val targetDir = Utils.createTempDir()
+    val userJarName = userJar.split(File.separatorChar).last
     Option(jars)
       .map {
         resolveGlobPaths(_, hadoopConf)
           .split(",")
-          .filterNot(_.contains(userJar.split("/").last))
+          .filterNot(_.contains(userJarName))
           .mkString(",")
       }
       .filterNot(_ == "")
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 652c36ffa6e7..c093789244bf 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -962,6 +962,25 @@ class SparkSubmitSuite
     }
   }
 
+  test("remove copies of application jar from classpath") {
+    val fs = File.separator
+    val sparkConf = new SparkConf(false)
+    val hadoopConf = new Configuration()
+    val secMgr = new SecurityManager(sparkConf)
+
+    val appJarName = "myApp.jar"
+    val jar1Name = "myJar1.jar"
+    val jar2Name = "myJar2.jar"
+    val userJar = s"file:/path${fs}to${fs}app${fs}jar$fs$appJarName"
+    val jars = s"file:/$jar1Name,file:/$appJarName,file:/$jar2Name"
+
+    val resolvedJars = DependencyUtils
+      .resolveAndDownloadJars(jars, userJar, sparkConf, hadoopConf, secMgr)
+
+    assert(!resolvedJars.contains(appJarName))
+    assert(resolvedJars.contains(jar1Name) && resolvedJars.contains(jar2Name))
+  }
+
   test("Avoid re-upload remote resources in yarn client mode") {
     val hadoopConf = new Configuration()
     updateConfWithFakeS3Fs(hadoopConf)

From 87bd9c75df6b67bef903751269a4fd381f9140d9 Mon Sep 17 00:00:00 2001
From: Brandon Krieger <bkrieger@palantir.com>
Date: Wed, 28 Nov 2018 07:22:48 -0800
Subject: [PATCH 0009/1072] [SPARK-25998][CORE] Change TorrentBroadcast to hold
 weak reference of broadcast object

## What changes were proposed in this pull request?

This PR changes the broadcast object in TorrentBroadcast from a strong reference to a weak reference. This allows it to be garbage collected even if the Dataset is held in memory. This is ok, because the broadcast object can always be re-read.

## How was this patch tested?

Tested in Spark shell by taking a heap dump, full repro steps listed in https://issues.apache.org/jira/browse/SPARK-25998.

Closes #22995 from bkrieger/bk/torrent-broadcast-weak.

Authored-by: Brandon Krieger <bkrieger@palantir.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/broadcast/TorrentBroadcast.scala    | 22 ++++++++++++++-----
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index cbd49e070f2e..26ead57316e1 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.broadcast
 
 import java.io._
+import java.lang.ref.SoftReference
 import java.nio.ByteBuffer
 import java.util.zip.Adler32
 
@@ -61,9 +62,11 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
    * Value of the broadcast object on executors. This is reconstructed by [[readBroadcastBlock]],
    * which builds this value by reading blocks from the driver and/or other executors.
    *
-   * On the driver, if the value is required, it is read lazily from the block manager.
+   * On the driver, if the value is required, it is read lazily from the block manager. We hold
+   * a soft reference so that it can be garbage collected if required, as we can always reconstruct
+   * in the future.
    */
-  @transient private lazy val _value: T = readBroadcastBlock()
+  @transient private var _value: SoftReference[T] = _
 
   /** The compression codec to use, or None if compression is disabled */
   @transient private var compressionCodec: Option[CompressionCodec] = _
@@ -92,8 +95,15 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   /** The checksum for all the blocks. */
   private var checksums: Array[Int] = _
 
-  override protected def getValue() = {
-    _value
+  override protected def getValue() = synchronized {
+    val memoized: T = if (_value == null) null.asInstanceOf[T] else _value.get
+    if (memoized != null) {
+      memoized
+    } else {
+      val newlyRead = readBroadcastBlock()
+      _value = new SoftReference[T](newlyRead)
+      newlyRead
+    }
   }
 
   private def calcChecksum(block: ByteBuffer): Int = {
@@ -205,8 +215,8 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   }
 
   private def readBroadcastBlock(): T = Utils.tryOrIOException {
-    TorrentBroadcast.synchronized {
-      val broadcastCache = SparkEnv.get.broadcastManager.cachedValues
+    val broadcastCache = SparkEnv.get.broadcastManager.cachedValues
+    broadcastCache.synchronized {
 
       Option(broadcastCache.get(broadcastId)).map(_.asInstanceOf[T]).getOrElse {
         setConf(SparkEnv.get.conf)

From 9fde3deab87c8f9c6d8dd147f5d52d243ff4b7ad Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Wed, 28 Nov 2018 07:33:34 -0800
Subject: [PATCH 0010/1072] [SPARK-25989][ML] OneVsRestModel handle empty
 outputCols incorrectly

## What changes were proposed in this pull request?
ignore empty output columns

## How was this patch tested?
added tests

Closes #22991 from zhengruifeng/ovrm_empty_outcol.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ml/classification/OneVsRest.scala   | 35 ++++++++++++-------
 .../ml/classification/OneVsRestSuite.scala    | 26 ++++++++++++++
 2 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 1835a91775e0..2f42a5922054 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -37,7 +37,7 @@ import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
 import org.apache.spark.ml.param.shared.{HasParallelism, HasWeightCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
@@ -169,6 +169,12 @@ final class OneVsRestModel private[ml] (
     // Check schema
     transformSchema(dataset.schema, logging = true)
 
+    if (getPredictionCol == "" && getRawPredictionCol == "") {
+      logWarning(s"$uid: OneVsRestModel.transform() was called as NOOP" +
+        " since no output columns were set.")
+      return dataset.toDF
+    }
+
     // determine the input columns: these need to be passed through
     val origCols = dataset.schema.map(f => col(f.name))
 
@@ -209,6 +215,9 @@ final class OneVsRestModel private[ml] (
       newDataset.unpersist()
     }
 
+    var predictionColNames = Seq.empty[String]
+    var predictionColumns = Seq.empty[Column]
+
     if (getRawPredictionCol != "") {
       val numClass = models.length
 
@@ -219,24 +228,24 @@ final class OneVsRestModel private[ml] (
         Vectors.dense(predArray)
       }
 
-      // output the index of the classifier with highest confidence as prediction
-      val labelUDF = udf { (rawPredictions: Vector) => rawPredictions.argmax.toDouble }
+      predictionColNames = predictionColNames :+ getRawPredictionCol
+      predictionColumns = predictionColumns :+ rawPredictionUDF(col(accColName))
+    }
 
-      // output confidence as raw prediction, label and label metadata as prediction
-      aggregatedDataset
-        .withColumn(getRawPredictionCol, rawPredictionUDF(col(accColName)))
-        .withColumn(getPredictionCol, labelUDF(col(getRawPredictionCol)), labelMetadata)
-        .drop(accColName)
-    } else {
+    if (getPredictionCol != "") {
       // output the index of the classifier with highest confidence as prediction
       val labelUDF = udf { (predictions: Map[Int, Double]) =>
         predictions.maxBy(_._2)._1.toDouble
       }
-      // output label and label metadata as prediction
-      aggregatedDataset
-        .withColumn(getPredictionCol, labelUDF(col(accColName)), labelMetadata)
-        .drop(accColName)
+
+      predictionColNames = predictionColNames :+ getPredictionCol
+      predictionColumns = predictionColumns :+ labelUDF(col(accColName))
+        .as(getPredictionCol, labelMetadata)
     }
+
+    aggregatedDataset
+      .withColumns(predictionColNames, predictionColumns)
+      .drop(accColName)
   }
 
   @Since("1.4.1")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 519ec1720eb9..b6e8c927403a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -290,6 +290,32 @@ class OneVsRestSuite extends MLTest with DefaultReadWriteTest {
     checkModelData(ovaModel, newOvaModel)
   }
 
+  test("should ignore empty output cols") {
+    val lr = new LogisticRegression().setMaxIter(1)
+    val ovr = new OneVsRest().setClassifier(lr)
+    val ovrModel = ovr.fit(dataset)
+
+    val output1 = ovrModel.setPredictionCol("").setRawPredictionCol("")
+      .transform(dataset)
+    assert(output1.schema.fieldNames.toSet ===
+      Set("label", "features"))
+
+    val output2 = ovrModel.setPredictionCol("prediction").setRawPredictionCol("")
+      .transform(dataset)
+    assert(output2.schema.fieldNames.toSet ===
+      Set("label", "features", "prediction"))
+
+    val output3 = ovrModel.setPredictionCol("").setRawPredictionCol("rawPrediction")
+      .transform(dataset)
+    assert(output3.schema.fieldNames.toSet ===
+      Set("label", "features", "rawPrediction"))
+
+    val output4 = ovrModel.setPredictionCol("prediction").setRawPredictionCol("rawPrediction")
+      .transform(dataset)
+    assert(output4.schema.fieldNames.toSet ===
+      Set("label", "features", "prediction", "rawPrediction"))
+  }
+
   test("should support all NumericType labels and not support other types") {
     val ovr = new OneVsRest().setClassifier(new LogisticRegression().setMaxIter(1))
     MLTestingUtils.checkNumericTypes[OneVsRestModel, OneVsRest](

From fa0d4bf69929c5acd676d602e758a969713d19d8 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 28 Nov 2018 23:42:13 +0800
Subject: [PATCH 0011/1072] [SPARK-25829][SQL] remove duplicated map keys with
 last wins policy

## What changes were proposed in this pull request?

Currently duplicated map keys are not handled consistently. For example, map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc.

This PR proposes to remove duplicated map keys with last wins policy, to follow Java/Scala and Presto. It only applies to built-in functions, as users can create map with duplicated map keys via private APIs anyway.

updated functions: `CreateMap`, `MapFromArrays`, `MapFromEntries`, `StringToMap`, `MapConcat`, `TransformKeys`.

For other places:
1. data source v1 doesn't have this problem, as users need to provide a java/scala map, which can't have duplicated keys.
2. data source v2 may have this problem. I've added a note to `ArrayBasedMapData` to ask the caller to take care of duplicated keys. In the future we should enforce it in the stable data APIs for data source v2.
3. UDF doesn't have this problem, as users need to provide a java/scala map. Same as data source v1.
4. file format. I checked all of them and only parquet does not enforce it. For backward compatibility reasons I change nothing but leave a note saying that the behavior will be undefined if users write map with duplicated keys to parquet files. Maybe we can add a config and fail by default if parquet files have map with duplicated keys. This can be done in followup.

## How was this patch tested?

updated tests and new tests

Closes #23124 from cloud-fan/map.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |   2 +
 .../spark/sql/avro/AvroDeserializer.scala     |   4 +-
 python/pyspark/sql/functions.py               |  10 +-
 .../catalyst/expressions/UnsafeMapData.java   |   3 +
 .../sql/catalyst/CatalystTypeConverters.scala |   6 -
 .../spark/sql/catalyst/InternalRow.scala      |  48 ++--
 .../catalyst/expressions/BoundAttribute.scala |   8 +-
 .../expressions/collectionOperations.scala    | 242 ++++--------------
 .../expressions/complexTypeCreator.scala      | 106 +++-----
 .../expressions/higherOrderFunctions.scala    |   8 +-
 .../expressions/objects/objects.scala         |   8 +-
 .../sql/catalyst/json/JacksonParser.scala     |   2 +
 .../catalyst/util/ArrayBasedMapBuilder.scala  | 120 +++++++++
 .../sql/catalyst/util/ArrayBasedMapData.scala |  15 ++
 .../spark/sql/catalyst/util/ArrayData.scala   |  18 +-
 .../CollectionExpressionsSuite.scala          |  87 +++----
 .../expressions/ComplexTypeSuite.scala        |  20 +-
 .../HigherOrderFunctionsSuite.scala           |  37 +--
 .../util/ArrayBasedMapBuilderSuite.scala      | 105 ++++++++
 .../datasources/orc/OrcDeserializer.scala     |   2 +
 .../parquet/ParquetRowConverter.scala         |   6 +-
 .../spark/sql/DataFrameFunctionsSuite.scala   |   6 +-
 22 files changed, 444 insertions(+), 419 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 25cd54119091..55838e773e4b 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -27,6 +27,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but users can still distinguish them via `Dataset.show`, `Dataset.collect` etc. Since Spark 3.0, float/double -0.0 is replaced by 0.0 internally, and users can't distinguish them any more.
 
+  - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be udefined.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index 272e7d5b388d..4e2224b058a0 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.avro
 
-import java.math.{BigDecimal}
+import java.math.BigDecimal
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
@@ -218,6 +218,8 @@ class AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
             i += 1
           }
 
+          // The Avro map will never have null or duplicated map keys, it's safe to create a
+          // ArrayBasedMapData directly here.
           updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
 
       case (UNION, _) =>
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 286ef219a69e..f98e550e39da 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2656,11 +2656,11 @@ def map_concat(*cols):
     >>> from pyspark.sql.functions import map_concat
     >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as map1, map(3, 'c', 1, 'd') as map2")
     >>> df.select(map_concat("map1", "map2").alias("map3")).show(truncate=False)
-    +--------------------------------+
-    |map3                            |
-    +--------------------------------+
-    |[1 -> a, 2 -> b, 3 -> c, 1 -> d]|
-    +--------------------------------+
+    +------------------------+
+    |map3                    |
+    +------------------------+
+    |[1 -> d, 2 -> b, 3 -> c]|
+    +------------------------+
     """
     sc = SparkContext._active_spark_context
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
index f17441dfccb6..a0833a6df8bb 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
@@ -28,6 +28,9 @@
  * Currently we just use 2 UnsafeArrayData to represent UnsafeMapData, with extra 8 bytes at head
  * to indicate the number of bytes of the unsafe key array.
  * [unsafe key array numBytes] [unsafe key array] [unsafe value array]
+ *
+ * Note that, user is responsible to guarantee that the key array does not have duplicated
+ * elements, otherwise the behavior is undefined.
  */
 // TODO: Use a more efficient format which doesn't depend on unsafe array.
 public final class UnsafeMapData extends MapData {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 6f5fbdd79e66..93df73ab1eaf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -431,12 +431,6 @@ object CatalystTypeConverters {
         map,
         (key: Any) => convertToCatalyst(key),
         (value: Any) => convertToCatalyst(value))
-    case (keys: Array[_], values: Array[_]) =>
-      // case for mapdata with duplicate keys
-      new ArrayBasedMapData(
-        new GenericArrayData(keys.map(convertToCatalyst)),
-        new GenericArrayData(values.map(convertToCatalyst))
-      )
     case other => other
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index 274d75e680f0..e49c10be6be4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -125,22 +125,36 @@ object InternalRow {
    * actually takes a `SpecializedGetters` input because it can be generalized to other classes
    * that implements `SpecializedGetters` (e.g., `ArrayData`) too.
    */
-  def getAccessor(dataType: DataType): (SpecializedGetters, Int) => Any = dataType match {
-    case BooleanType => (input, ordinal) => input.getBoolean(ordinal)
-    case ByteType => (input, ordinal) => input.getByte(ordinal)
-    case ShortType => (input, ordinal) => input.getShort(ordinal)
-    case IntegerType | DateType => (input, ordinal) => input.getInt(ordinal)
-    case LongType | TimestampType => (input, ordinal) => input.getLong(ordinal)
-    case FloatType => (input, ordinal) => input.getFloat(ordinal)
-    case DoubleType => (input, ordinal) => input.getDouble(ordinal)
-    case StringType => (input, ordinal) => input.getUTF8String(ordinal)
-    case BinaryType => (input, ordinal) => input.getBinary(ordinal)
-    case CalendarIntervalType => (input, ordinal) => input.getInterval(ordinal)
-    case t: DecimalType => (input, ordinal) => input.getDecimal(ordinal, t.precision, t.scale)
-    case t: StructType => (input, ordinal) => input.getStruct(ordinal, t.size)
-    case _: ArrayType => (input, ordinal) => input.getArray(ordinal)
-    case _: MapType => (input, ordinal) => input.getMap(ordinal)
-    case u: UserDefinedType[_] => getAccessor(u.sqlType)
-    case _ => (input, ordinal) => input.get(ordinal, dataType)
+  def getAccessor(dt: DataType, nullable: Boolean = true): (SpecializedGetters, Int) => Any = {
+    val getValueNullSafe: (SpecializedGetters, Int) => Any = dt match {
+      case BooleanType => (input, ordinal) => input.getBoolean(ordinal)
+      case ByteType => (input, ordinal) => input.getByte(ordinal)
+      case ShortType => (input, ordinal) => input.getShort(ordinal)
+      case IntegerType | DateType => (input, ordinal) => input.getInt(ordinal)
+      case LongType | TimestampType => (input, ordinal) => input.getLong(ordinal)
+      case FloatType => (input, ordinal) => input.getFloat(ordinal)
+      case DoubleType => (input, ordinal) => input.getDouble(ordinal)
+      case StringType => (input, ordinal) => input.getUTF8String(ordinal)
+      case BinaryType => (input, ordinal) => input.getBinary(ordinal)
+      case CalendarIntervalType => (input, ordinal) => input.getInterval(ordinal)
+      case t: DecimalType => (input, ordinal) => input.getDecimal(ordinal, t.precision, t.scale)
+      case t: StructType => (input, ordinal) => input.getStruct(ordinal, t.size)
+      case _: ArrayType => (input, ordinal) => input.getArray(ordinal)
+      case _: MapType => (input, ordinal) => input.getMap(ordinal)
+      case u: UserDefinedType[_] => getAccessor(u.sqlType, nullable)
+      case _ => (input, ordinal) => input.get(ordinal, dt)
+    }
+
+    if (nullable) {
+      (getter, index) => {
+        if (getter.isNullAt(index)) {
+          null
+        } else {
+          getValueNullSafe(getter, index)
+        }
+      }
+    } else {
+      getValueNullSafe
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
index 77582e10f9ff..ea8c369ee49e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
@@ -34,15 +34,11 @@ case class BoundReference(ordinal: Int, dataType: DataType, nullable: Boolean)
 
   override def toString: String = s"input[$ordinal, ${dataType.simpleString}, $nullable]"
 
-  private val accessor: (InternalRow, Int) => Any = InternalRow.getAccessor(dataType)
+  private val accessor: (InternalRow, Int) => Any = InternalRow.getAccessor(dataType, nullable)
 
   // Use special getter for primitive types (for UnsafeRow)
   override def eval(input: InternalRow): Any = {
-    if (nullable && input.isNullAt(ordinal)) {
-      null
-    } else {
-      accessor(input, ordinal)
-    }
+    accessor(input, ordinal)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 43116743e995..fa8e38acd522 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -546,33 +546,25 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
 
   override def nullable: Boolean = children.exists(_.nullable)
 
+  private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
+
   override def eval(input: InternalRow): Any = {
-    val maps = children.map(_.eval(input))
+    val maps = children.map(_.eval(input).asInstanceOf[MapData])
     if (maps.contains(null)) {
       return null
     }
-    val keyArrayDatas = maps.map(_.asInstanceOf[MapData].keyArray())
-    val valueArrayDatas = maps.map(_.asInstanceOf[MapData].valueArray())
 
-    val numElements = keyArrayDatas.foldLeft(0L)((sum, ad) => sum + ad.numElements())
+    val numElements = maps.foldLeft(0L)((sum, ad) => sum + ad.numElements())
     if (numElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
       throw new RuntimeException(s"Unsuccessful attempt to concat maps with $numElements " +
         s"elements due to exceeding the map size limit " +
         s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
     }
-    val finalKeyArray = new Array[AnyRef](numElements.toInt)
-    val finalValueArray = new Array[AnyRef](numElements.toInt)
-    var position = 0
-    for (i <- keyArrayDatas.indices) {
-      val keyArray = keyArrayDatas(i).toObjectArray(dataType.keyType)
-      val valueArray = valueArrayDatas(i).toObjectArray(dataType.valueType)
-      Array.copy(keyArray, 0, finalKeyArray, position, keyArray.length)
-      Array.copy(valueArray, 0, finalValueArray, position, valueArray.length)
-      position += keyArray.length
-    }
 
-    new ArrayBasedMapData(new GenericArrayData(finalKeyArray),
-      new GenericArrayData(finalValueArray))
+    for (map <- maps) {
+      mapBuilder.putAll(map.keyArray(), map.valueArray())
+    }
+    mapBuilder.build()
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -581,16 +573,7 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
     val valueType = dataType.valueType
     val argsName = ctx.freshName("args")
     val hasNullName = ctx.freshName("hasNull")
-    val mapDataClass = classOf[MapData].getName
-    val arrayBasedMapDataClass = classOf[ArrayBasedMapData].getName
-    val arrayDataClass = classOf[ArrayData].getName
-
-    val init =
-      s"""
-        |$mapDataClass[] $argsName = new $mapDataClass[${mapCodes.size}];
-        |boolean ${ev.isNull}, $hasNullName = false;
-        |$mapDataClass ${ev.value} = null;
-      """.stripMargin
+    val builderTerm = ctx.addReferenceObj("mapBuilder", mapBuilder)
 
     val assignments = mapCodes.zip(children.map(_.nullable)).zipWithIndex.map {
       case ((m, true), i) =>
@@ -613,10 +596,10 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
          """.stripMargin
     }
 
-    val codes = ctx.splitExpressionsWithCurrentInputs(
+    val prepareMaps = ctx.splitExpressionsWithCurrentInputs(
       expressions = assignments,
       funcName = "getMapConcatInputs",
-      extraArguments = (s"$mapDataClass[]", argsName) :: ("boolean", hasNullName) :: Nil,
+      extraArguments = (s"MapData[]", argsName) :: ("boolean", hasNullName) :: Nil,
       returnType = "boolean",
       makeSplitFunction = body =>
         s"""
@@ -646,34 +629,34 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
 
     val mapMerge =
       s"""
-        |${ev.isNull} = $hasNullName;
-        |if (!${ev.isNull}) {
-        |  $arrayDataClass[] $keyArgsName = new $arrayDataClass[${mapCodes.size}];
-        |  $arrayDataClass[] $valArgsName = new $arrayDataClass[${mapCodes.size}];
-        |  long $numElementsName = 0;
-        |  for (int $idxName = 0; $idxName < $argsName.length; $idxName++) {
-        |    $keyArgsName[$idxName] = $argsName[$idxName].keyArray();
-        |    $valArgsName[$idxName] = $argsName[$idxName].valueArray();
-        |    $numElementsName += $argsName[$idxName].numElements();
-        |  }
-        |  if ($numElementsName > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) {
-        |    throw new RuntimeException("Unsuccessful attempt to concat maps with " +
-        |       $numElementsName + " elements due to exceeding the map size limit " +
-        |       "${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.");
-        |  }
-        |  $arrayDataClass $finKeysName = $keyConcat($keyArgsName,
-        |    (int) $numElementsName);
-        |  $arrayDataClass $finValsName = $valueConcat($valArgsName,
-        |    (int) $numElementsName);
-        |  ${ev.value} = new $arrayBasedMapDataClass($finKeysName, $finValsName);
+        |ArrayData[] $keyArgsName = new ArrayData[${mapCodes.size}];
+        |ArrayData[] $valArgsName = new ArrayData[${mapCodes.size}];
+        |long $numElementsName = 0;
+        |for (int $idxName = 0; $idxName < $argsName.length; $idxName++) {
+        |  $keyArgsName[$idxName] = $argsName[$idxName].keyArray();
+        |  $valArgsName[$idxName] = $argsName[$idxName].valueArray();
+        |  $numElementsName += $argsName[$idxName].numElements();
         |}
+        |if ($numElementsName > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) {
+        |  throw new RuntimeException("Unsuccessful attempt to concat maps with " +
+        |     $numElementsName + " elements due to exceeding the map size limit " +
+        |     "${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.");
+        |}
+        |ArrayData $finKeysName = $keyConcat($keyArgsName, (int) $numElementsName);
+        |ArrayData $finValsName = $valueConcat($valArgsName, (int) $numElementsName);
+        |${ev.value} = $builderTerm.from($finKeysName, $finValsName);
       """.stripMargin
 
     ev.copy(
       code = code"""
-        |$init
-        |$codes
-        |$mapMerge
+        |MapData[] $argsName = new MapData[${mapCodes.size}];
+        |boolean $hasNullName = false;
+        |$prepareMaps
+        |boolean ${ev.isNull} = $hasNullName;
+        |MapData ${ev.value} = null;
+        |if (!$hasNullName) {
+        |  $mapMerge
+        |}
       """.stripMargin)
   }
 
@@ -751,171 +734,44 @@ case class MapFromEntries(child: Expression) extends UnaryExpression {
       s"${child.dataType.catalogString} type. $prettyName accepts only arrays of pair structs.")
   }
 
+  private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
+
   override protected def nullSafeEval(input: Any): Any = {
-    val arrayData = input.asInstanceOf[ArrayData]
-    val numEntries = arrayData.numElements()
+    val entries = input.asInstanceOf[ArrayData]
+    val numEntries = entries.numElements()
     var i = 0
-    if(nullEntries) {
+    if (nullEntries) {
       while (i < numEntries) {
-        if (arrayData.isNullAt(i)) return null
+        if (entries.isNullAt(i)) return null
         i += 1
       }
     }
-    val keyArray = new Array[AnyRef](numEntries)
-    val valueArray = new Array[AnyRef](numEntries)
+
     i = 0
     while (i < numEntries) {
-      val entry = arrayData.getStruct(i, 2)
-      val key = entry.get(0, dataType.keyType)
-      if (key == null) {
-        throw new RuntimeException("The first field from a struct (key) can't be null.")
-      }
-      keyArray.update(i, key)
-      val value = entry.get(1, dataType.valueType)
-      valueArray.update(i, value)
+      mapBuilder.put(entries.getStruct(i, 2))
       i += 1
     }
-    ArrayBasedMapData(keyArray, valueArray)
+    mapBuilder.build()
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, c => {
       val numEntries = ctx.freshName("numEntries")
-      val isKeyPrimitive = CodeGenerator.isPrimitiveType(dataType.keyType)
-      val isValuePrimitive = CodeGenerator.isPrimitiveType(dataType.valueType)
-      val code = if (isKeyPrimitive && isValuePrimitive) {
-        genCodeForPrimitiveElements(ctx, c, ev.value, numEntries)
-      } else {
-        genCodeForAnyElements(ctx, c, ev.value, numEntries)
-      }
+      val builderTerm = ctx.addReferenceObj("mapBuilder", mapBuilder)
+      val i = ctx.freshName("idx")
       ctx.nullArrayElementsSaveExec(nullEntries, ev.isNull, c) {
         s"""
            |final int $numEntries = $c.numElements();
-           |$code
+           |for (int $i = 0; $i < $numEntries; $i++) {
+           |  $builderTerm.put($c.getStruct($i, 2));
+           |}
+           |${ev.value} = $builderTerm.build();
          """.stripMargin
       }
     })
   }
 
-  private def genCodeForAssignmentLoop(
-      ctx: CodegenContext,
-      childVariable: String,
-      mapData: String,
-      numEntries: String,
-      keyAssignment: (String, String) => String,
-      valueAssignment: (String, String) => String): String = {
-    val entry = ctx.freshName("entry")
-    val i = ctx.freshName("idx")
-
-    val nullKeyCheck = if (dataTypeDetails.get._2) {
-      s"""
-         |if ($entry.isNullAt(0)) {
-         |  throw new RuntimeException("The first field from a struct (key) can't be null.");
-         |}
-       """.stripMargin
-    } else {
-      ""
-    }
-
-    s"""
-       |for (int $i = 0; $i < $numEntries; $i++) {
-       |  InternalRow $entry = $childVariable.getStruct($i, 2);
-       |  $nullKeyCheck
-       |  ${keyAssignment(CodeGenerator.getValue(entry, dataType.keyType, "0"), i)}
-       |  ${valueAssignment(entry, i)}
-       |}
-     """.stripMargin
-  }
-
-  private def genCodeForPrimitiveElements(
-      ctx: CodegenContext,
-      childVariable: String,
-      mapData: String,
-      numEntries: String): String = {
-    val byteArraySize = ctx.freshName("byteArraySize")
-    val keySectionSize = ctx.freshName("keySectionSize")
-    val valueSectionSize = ctx.freshName("valueSectionSize")
-    val data = ctx.freshName("byteArray")
-    val unsafeMapData = ctx.freshName("unsafeMapData")
-    val keyArrayData = ctx.freshName("keyArrayData")
-    val valueArrayData = ctx.freshName("valueArrayData")
-
-    val baseOffset = Platform.BYTE_ARRAY_OFFSET
-    val keySize = dataType.keyType.defaultSize
-    val valueSize = dataType.valueType.defaultSize
-    val kByteSize = s"UnsafeArrayData.calculateSizeOfUnderlyingByteArray($numEntries, $keySize)"
-    val vByteSize = s"UnsafeArrayData.calculateSizeOfUnderlyingByteArray($numEntries, $valueSize)"
-
-    val keyAssignment = (key: String, idx: String) =>
-      CodeGenerator.setArrayElement(keyArrayData, dataType.keyType, idx, key)
-    val valueAssignment = (entry: String, idx: String) =>
-      CodeGenerator.createArrayAssignment(
-        valueArrayData, dataType.valueType, entry, idx, "1", dataType.valueContainsNull)
-    val assignmentLoop = genCodeForAssignmentLoop(
-      ctx,
-      childVariable,
-      mapData,
-      numEntries,
-      keyAssignment,
-      valueAssignment
-    )
-
-    s"""
-       |final long $keySectionSize = $kByteSize;
-       |final long $valueSectionSize = $vByteSize;
-       |final long $byteArraySize = 8 + $keySectionSize + $valueSectionSize;
-       |if ($byteArraySize > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) {
-       |  ${genCodeForAnyElements(ctx, childVariable, mapData, numEntries)}
-       |} else {
-       |  final byte[] $data = new byte[(int)$byteArraySize];
-       |  UnsafeMapData $unsafeMapData = new UnsafeMapData();
-       |  Platform.putLong($data, $baseOffset, $keySectionSize);
-       |  Platform.putLong($data, ${baseOffset + 8}, $numEntries);
-       |  Platform.putLong($data, ${baseOffset + 8} + $keySectionSize, $numEntries);
-       |  $unsafeMapData.pointTo($data, $baseOffset, (int)$byteArraySize);
-       |  ArrayData $keyArrayData = $unsafeMapData.keyArray();
-       |  ArrayData $valueArrayData = $unsafeMapData.valueArray();
-       |  $assignmentLoop
-       |  $mapData = $unsafeMapData;
-       |}
-     """.stripMargin
-  }
-
-  private def genCodeForAnyElements(
-      ctx: CodegenContext,
-      childVariable: String,
-      mapData: String,
-      numEntries: String): String = {
-    val keys = ctx.freshName("keys")
-    val values = ctx.freshName("values")
-    val mapDataClass = classOf[ArrayBasedMapData].getName()
-
-    val isValuePrimitive = CodeGenerator.isPrimitiveType(dataType.valueType)
-    val valueAssignment = (entry: String, idx: String) => {
-      val value = CodeGenerator.getValue(entry, dataType.valueType, "1")
-      if (dataType.valueContainsNull && isValuePrimitive) {
-        s"$values[$idx] = $entry.isNullAt(1) ? null : (Object)$value;"
-      } else {
-        s"$values[$idx] = $value;"
-      }
-    }
-    val keyAssignment = (key: String, idx: String) => s"$keys[$idx] = $key;"
-    val assignmentLoop = genCodeForAssignmentLoop(
-      ctx,
-      childVariable,
-      mapData,
-      numEntries,
-      keyAssignment,
-      valueAssignment)
-
-    s"""
-       |final Object[] $keys = new Object[$numEntries];
-       |final Object[] $values = new Object[$numEntries];
-       |$assignmentLoop
-       |$mapData = $mapDataClass.apply($keys, $values);
-     """.stripMargin
-  }
-
   override def prettyName: String = "map_from_entries"
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 6b77996789f1..4e722c9237a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -24,8 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.Platform
-import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -62,7 +60,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val et = dataType.elementType
     val (allocation, assigns, arrayData) =
-      GenArrayData.genCodeToCreateArrayData(ctx, et, children, false, "createArray")
+      GenArrayData.genCodeToCreateArrayData(ctx, et, children, "createArray")
     ev.copy(
       code = code"${allocation}${assigns}",
       value = JavaCode.variable(arrayData, dataType),
@@ -79,7 +77,6 @@ private [sql] object GenArrayData {
    * @param ctx a [[CodegenContext]]
    * @param elementType data type of underlying array elements
    * @param elementsExpr concatenated set of [[Expression]] for each element of an underlying array
-   * @param isMapKey if true, throw an exception when the element is null
    * @param functionName string to include in the error message
    * @return (array allocation, concatenated assignments to each array elements, arrayData name)
    */
@@ -87,7 +84,6 @@ private [sql] object GenArrayData {
       ctx: CodegenContext,
       elementType: DataType,
       elementsExpr: Seq[Expression],
-      isMapKey: Boolean,
       functionName: String): (String, String, String) = {
     val arrayDataName = ctx.freshName("arrayData")
     val numElements = s"${elementsExpr.length}L"
@@ -103,15 +99,9 @@ private [sql] object GenArrayData {
       val assignment = if (!expr.nullable) {
         setArrayElement
       } else {
-        val isNullAssignment = if (!isMapKey) {
-          s"$arrayDataName.setNullAt($i);"
-        } else {
-          "throw new RuntimeException(\"Cannot use null as map key!\");"
-        }
-
         s"""
            |if (${eval.isNull}) {
-           |  $isNullAssignment
+           |  $arrayDataName.setNullAt($i);
            |} else {
            |  $setArrayElement
            |}
@@ -165,7 +155,7 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
     }
   }
 
-  override def dataType: MapType = {
+  override lazy val dataType: MapType = {
     MapType(
       keyType = TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(keys.map(_.dataType))
         .getOrElse(StringType),
@@ -176,32 +166,33 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
 
   override def nullable: Boolean = false
 
+  private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
+
   override def eval(input: InternalRow): Any = {
-    val keyArray = keys.map(_.eval(input)).toArray
-    if (keyArray.contains(null)) {
-      throw new RuntimeException("Cannot use null as map key!")
+    var i = 0
+    while (i < keys.length) {
+      mapBuilder.put(keys(i).eval(input), values(i).eval(input))
+      i += 1
     }
-    val valueArray = values.map(_.eval(input)).toArray
-    new ArrayBasedMapData(new GenericArrayData(keyArray), new GenericArrayData(valueArray))
+    mapBuilder.build()
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val mapClass = classOf[ArrayBasedMapData].getName
     val MapType(keyDt, valueDt, _) = dataType
     val (allocationKeyData, assignKeys, keyArrayData) =
-      GenArrayData.genCodeToCreateArrayData(ctx, keyDt, keys, true, "createMap")
+      GenArrayData.genCodeToCreateArrayData(ctx, keyDt, keys, "createMap")
     val (allocationValueData, assignValues, valueArrayData) =
-      GenArrayData.genCodeToCreateArrayData(ctx, valueDt, values, false, "createMap")
+      GenArrayData.genCodeToCreateArrayData(ctx, valueDt, values, "createMap")
+    val builderTerm = ctx.addReferenceObj("mapBuilder", mapBuilder)
     val code =
       code"""
-       final boolean ${ev.isNull} = false;
        $allocationKeyData
        $assignKeys
        $allocationValueData
        $assignValues
-       final MapData ${ev.value} = new $mapClass($keyArrayData, $valueArrayData);
+       final MapData ${ev.value} = $builderTerm.from($keyArrayData, $valueArrayData);
       """
-    ev.copy(code = code)
+    ev.copy(code = code, isNull = FalseLiteral)
   }
 
   override def prettyName: String = "map"
@@ -234,53 +225,25 @@ case class MapFromArrays(left: Expression, right: Expression)
     }
   }
 
-  override def dataType: DataType = {
+  override def dataType: MapType = {
     MapType(
       keyType = left.dataType.asInstanceOf[ArrayType].elementType,
       valueType = right.dataType.asInstanceOf[ArrayType].elementType,
       valueContainsNull = right.dataType.asInstanceOf[ArrayType].containsNull)
   }
 
+  private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
+
   override def nullSafeEval(keyArray: Any, valueArray: Any): Any = {
     val keyArrayData = keyArray.asInstanceOf[ArrayData]
     val valueArrayData = valueArray.asInstanceOf[ArrayData]
-    if (keyArrayData.numElements != valueArrayData.numElements) {
-      throw new RuntimeException("The given two arrays should have the same length")
-    }
-    val leftArrayType = left.dataType.asInstanceOf[ArrayType]
-    if (leftArrayType.containsNull) {
-      var i = 0
-      while (i < keyArrayData.numElements) {
-        if (keyArrayData.isNullAt(i)) {
-          throw new RuntimeException("Cannot use null as map key!")
-        }
-        i += 1
-      }
-    }
-    new ArrayBasedMapData(keyArrayData.copy(), valueArrayData.copy())
+    mapBuilder.from(keyArrayData.copy(), valueArrayData.copy())
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, (keyArrayData, valueArrayData) => {
-      val arrayBasedMapData = classOf[ArrayBasedMapData].getName
-      val leftArrayType = left.dataType.asInstanceOf[ArrayType]
-      val keyArrayElemNullCheck = if (!leftArrayType.containsNull) "" else {
-        val i = ctx.freshName("i")
-        s"""
-           |for (int $i = 0; $i < $keyArrayData.numElements(); $i++) {
-           |  if ($keyArrayData.isNullAt($i)) {
-           |    throw new RuntimeException("Cannot use null as map key!");
-           |  }
-           |}
-         """.stripMargin
-      }
-      s"""
-         |if ($keyArrayData.numElements() != $valueArrayData.numElements()) {
-         |  throw new RuntimeException("The given two arrays should have the same length");
-         |}
-         |$keyArrayElemNullCheck
-         |${ev.value} = new $arrayBasedMapData($keyArrayData.copy(), $valueArrayData.copy());
-       """.stripMargin
+      val builderTerm = ctx.addReferenceObj("mapBuilder", mapBuilder)
+      s"${ev.value} = $builderTerm.from($keyArrayData.copy(), $valueArrayData.copy());"
     })
   }
 
@@ -488,28 +451,25 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
     }
   }
 
+  private lazy val mapBuilder = new ArrayBasedMapBuilder(StringType, StringType)
+
   override def nullSafeEval(
       inputString: Any,
       stringDelimiter: Any,
       keyValueDelimiter: Any): Any = {
     val keyValues =
       inputString.asInstanceOf[UTF8String].split(stringDelimiter.asInstanceOf[UTF8String], -1)
-
-    val iterator = new Iterator[(UTF8String, UTF8String)] {
-      var index = 0
-      val keyValueDelimiterUTF8String = keyValueDelimiter.asInstanceOf[UTF8String]
-
-      override def hasNext: Boolean = {
-        keyValues.length > index
-      }
-
-      override def next(): (UTF8String, UTF8String) = {
-        val keyValueArray = keyValues(index).split(keyValueDelimiterUTF8String, 2)
-        index += 1
-        (keyValueArray(0), if (keyValueArray.length < 2) null else keyValueArray(1))
-      }
+    val keyValueDelimiterUTF8String = keyValueDelimiter.asInstanceOf[UTF8String]
+
+    var i = 0
+    while (i < keyValues.length) {
+      val keyValueArray = keyValues(i).split(keyValueDelimiterUTF8String, 2)
+      val key = keyValueArray(0)
+      val value = if (keyValueArray.length < 2) null else keyValueArray(1)
+      mapBuilder.put(key, value)
+      i += 1
     }
-    ArrayBasedMapData(iterator, keyValues.size, identity, identity)
+    mapBuilder.build()
   }
 
   override def prettyName: String = "str_to_map"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 8b3102186622..a8639d29f964 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -512,7 +512,7 @@ case class TransformKeys(
 
   @transient lazy val MapType(keyType, valueType, valueContainsNull) = argument.dataType
 
-  override def dataType: DataType = MapType(function.dataType, valueType, valueContainsNull)
+  override def dataType: MapType = MapType(function.dataType, valueType, valueContainsNull)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     TypeUtils.checkForMapKeyType(function.dataType)
@@ -525,6 +525,7 @@ case class TransformKeys(
   @transient lazy val LambdaFunction(
     _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
 
+  private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
 
   override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
     val map = argumentValue.asInstanceOf[MapData]
@@ -534,13 +535,10 @@ case class TransformKeys(
       keyVar.value.set(map.keyArray().get(i, keyVar.dataType))
       valueVar.value.set(map.valueArray().get(i, valueVar.dataType))
       val result = functionForEval.eval(inputRow)
-      if (result == null) {
-        throw new RuntimeException("Cannot use null as map key!")
-      }
       resultKeys.update(i, result)
       i += 1
     }
-    new ArrayBasedMapData(resultKeys, map.valueArray())
+    mapBuilder.from(resultKeys, map.valueArray())
   }
 
   override def prettyName: String = "transform_keys"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 59c897b6a53c..8182730feb4b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -587,17 +587,13 @@ case class LambdaVariable(
     dataType: DataType,
     nullable: Boolean = true) extends LeafExpression with NonSQLExpression {
 
-  private val accessor: (InternalRow, Int) => Any = InternalRow.getAccessor(dataType)
+  private val accessor: (InternalRow, Int) => Any = InternalRow.getAccessor(dataType, nullable)
 
   // Interpreted execution of `LambdaVariable` always get the 0-index element from input row.
   override def eval(input: InternalRow): Any = {
     assert(input.numFields == 1,
       "The input row of interpreted LambdaVariable should have only 1 field.")
-    if (nullable && input.isNullAt(0)) {
-      null
-    } else {
-      accessor(input, 0)
-    }
+    accessor(input, 0)
   }
 
   override def genCode(ctx: CodegenContext): ExprCode = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 773ff5a7a401..92517aac053b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -367,6 +367,8 @@ class JacksonParser(
       values += fieldConverter.apply(parser)
     }
 
+    // The JSON map will never have null or duplicated map keys, it's safe to create a
+    // ArrayBasedMapData directly here.
     ArrayBasedMapData(keys.toArray, values.toArray)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
new file mode 100644
index 000000000000..e7cd61655dc9
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types._
+
+/**
+ * A builder of [[ArrayBasedMapData]], which fails if a null map key is detected, and removes
+ * duplicated map keys w.r.t. the last wins policy.
+ */
+class ArrayBasedMapBuilder(keyType: DataType, valueType: DataType) extends Serializable {
+  assert(!keyType.existsRecursively(_.isInstanceOf[MapType]), "key of map cannot be/contain map")
+  assert(keyType != NullType, "map key cannot be null type.")
+
+  private lazy val keyToIndex = keyType match {
+    // Binary type data is `byte[]`, which can't use `==` to check equality.
+    case _: AtomicType | _: CalendarIntervalType if !keyType.isInstanceOf[BinaryType] =>
+      new java.util.HashMap[Any, Int]()
+    case _ =>
+      // for complex types, use interpreted ordering to be able to compare unsafe data with safe
+      // data, e.g. UnsafeRow vs GenericInternalRow.
+      new java.util.TreeMap[Any, Int](TypeUtils.getInterpretedOrdering(keyType))
+  }
+
+  // TODO: specialize it
+  private lazy val keys = mutable.ArrayBuffer.empty[Any]
+  private lazy val values = mutable.ArrayBuffer.empty[Any]
+
+  private lazy val keyGetter = InternalRow.getAccessor(keyType)
+  private lazy val valueGetter = InternalRow.getAccessor(valueType)
+
+  def put(key: Any, value: Any): Unit = {
+    if (key == null) {
+      throw new RuntimeException("Cannot use null as map key.")
+    }
+
+    val index = keyToIndex.getOrDefault(key, -1)
+    if (index == -1) {
+      keyToIndex.put(key, values.length)
+      keys.append(key)
+      values.append(value)
+    } else {
+      // Overwrite the previous value, as the policy is last wins.
+      values(index) = value
+    }
+  }
+
+  // write a 2-field row, the first field is key and the second field is value.
+  def put(entry: InternalRow): Unit = {
+    if (entry.isNullAt(0)) {
+      throw new RuntimeException("Cannot use null as map key.")
+    }
+    put(keyGetter(entry, 0), valueGetter(entry, 1))
+  }
+
+  def putAll(keyArray: ArrayData, valueArray: ArrayData): Unit = {
+    if (keyArray.numElements() != valueArray.numElements()) {
+      throw new RuntimeException(
+        "The key array and value array of MapData must have the same length.")
+    }
+
+    var i = 0
+    while (i < keyArray.numElements()) {
+      put(keyGetter(keyArray, i), valueGetter(valueArray, i))
+      i += 1
+    }
+  }
+
+  private def reset(): Unit = {
+    keyToIndex.clear()
+    keys.clear()
+    values.clear()
+  }
+
+  /**
+   * Builds the result [[ArrayBasedMapData]] and reset this builder to free up the resources. The
+   * builder becomes fresh afterward and is ready to take input and build another map.
+   */
+  def build(): ArrayBasedMapData = {
+    val map = new ArrayBasedMapData(
+      new GenericArrayData(keys.toArray), new GenericArrayData(values.toArray))
+    reset()
+    map
+  }
+
+  /**
+   * Builds a [[ArrayBasedMapData]] from the given key and value array and reset this builder. The
+   * builder becomes fresh afterward and is ready to take input and build another map.
+   */
+  def from(keyArray: ArrayData, valueArray: ArrayData): ArrayBasedMapData = {
+    assert(keyToIndex.isEmpty, "'from' can only be called with a fresh ArrayBasedMapBuilder.")
+    putAll(keyArray, valueArray)
+    if (keyToIndex.size == keyArray.numElements()) {
+      // If there is no duplicated map keys, creates the MapData with the input key and value array,
+      // as they might already in unsafe format and are more efficient.
+      reset()
+      new ArrayBasedMapData(keyArray, valueArray)
+    } else {
+      build()
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
index 91b313944369..0989af26b8c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
@@ -19,6 +19,12 @@ package org.apache.spark.sql.catalyst.util
 
 import java.util.{Map => JavaMap}
 
+/**
+ * A simple `MapData` implementation which is backed by 2 arrays.
+ *
+ * Note that, user is responsible to guarantee that the key array does not have duplicated
+ * elements, otherwise the behavior is undefined.
+ */
 class ArrayBasedMapData(val keyArray: ArrayData, val valueArray: ArrayData) extends MapData {
   require(keyArray.numElements() == valueArray.numElements())
 
@@ -83,6 +89,9 @@ object ArrayBasedMapData {
    * Creates a [[ArrayBasedMapData]] by applying the given converters over
    * each (key -> value) pair from the given iterator
    *
+   * Note that, user is responsible to guarantee that the key array does not have duplicated
+   * elements, otherwise the behavior is undefined.
+   *
    * @param iterator Input iterator
    * @param size Number of elements
    * @param keyConverter This function is applied over all the keys extracted from the
@@ -108,6 +117,12 @@ object ArrayBasedMapData {
     ArrayBasedMapData(keys, values)
   }
 
+  /**
+   * Creates a [[ArrayBasedMapData]] from a key and value array.
+   *
+   * Note that, user is responsible to guarantee that the key array does not have duplicated
+   * elements, otherwise the behavior is undefined.
+   */
   def apply(keys: Array[_], values: Array[_]): ArrayBasedMapData = {
     new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
index 4da8ce05fe8a..ebbf241088f8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
@@ -172,11 +172,7 @@ abstract class ArrayData extends SpecializedGetters with Serializable {
     val values = new Array[T](size)
     var i = 0
     while (i < size) {
-      if (isNullAt(i)) {
-        values(i) = null.asInstanceOf[T]
-      } else {
-        values(i) = accessor(this, i).asInstanceOf[T]
-      }
+      values(i) = accessor(this, i).asInstanceOf[T]
       i += 1
     }
     values
@@ -187,11 +183,7 @@ abstract class ArrayData extends SpecializedGetters with Serializable {
     val accessor = InternalRow.getAccessor(elementType)
     var i = 0
     while (i < size) {
-      if (isNullAt(i)) {
-        f(i, null)
-      } else {
-        f(i, accessor(this, i))
-      }
+      f(i, accessor(this, i))
       i += 1
     }
   }
@@ -208,11 +200,7 @@ class ArrayDataIndexedSeq[T](arrayData: ArrayData, dataType: DataType) extends I
 
   override def apply(idx: Int): T =
     if (0 <= idx && idx < arrayData.numElements()) {
-      if (arrayData.isNullAt(idx)) {
-        null.asInstanceOf[T]
-      } else {
-        accessor(arrayData, idx).asInstanceOf[T]
-      }
+      accessor(arrayData, idx).asInstanceOf[T]
     } else {
       throw new IndexOutOfBoundsException(
         s"Index $idx must be between 0 and the length of the ArrayData.")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index d2edb2f24688..bed8547dbc83 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -114,13 +114,13 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val m1 = Literal.create(create_map("c" -> "3", "a" -> "4"), MapType(StringType, StringType,
       valueContainsNull = false))
     val m2 = Literal.create(create_map("d" -> "4", "e" -> "5"), MapType(StringType, StringType))
-    val m3 = Literal.create(create_map("a" -> "1", "b" -> "2"), MapType(StringType, StringType))
+    val m3 = Literal.create(create_map("f" -> "1", "g" -> "2"), MapType(StringType, StringType))
     val m4 = Literal.create(create_map("a" -> null, "c" -> "3"), MapType(StringType, StringType))
     val m5 = Literal.create(create_map("a" -> 1, "b" -> 2), MapType(StringType, IntegerType))
-    val m6 = Literal.create(create_map("a" -> null, "c" -> 3), MapType(StringType, IntegerType))
+    val m6 = Literal.create(create_map("c" -> null, "d" -> 3), MapType(StringType, IntegerType))
     val m7 = Literal.create(create_map(List(1, 2) -> 1, List(3, 4) -> 2),
       MapType(ArrayType(IntegerType), IntegerType))
-    val m8 = Literal.create(create_map(List(5, 6) -> 3, List(1, 2) -> 4),
+    val m8 = Literal.create(create_map(List(5, 6) -> 3, List(7, 8) -> 4),
       MapType(ArrayType(IntegerType), IntegerType))
     val m9 = Literal.create(create_map(1 -> "1", 2 -> "2"), MapType(IntegerType, StringType,
       valueContainsNull = false))
@@ -134,57 +134,33 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       MapType(IntegerType, IntegerType, valueContainsNull = true))
     val mNull = Literal.create(null, MapType(StringType, StringType))
 
-    // overlapping maps
-    checkEvaluation(MapConcat(Seq(m0, m1)),
-      (
-        Array("a", "b", "c", "a"), // keys
-        Array("1", "2", "3", "4") // values
-      )
-    )
+    // overlapping maps should remove duplicated map keys w.r.t. last win policy.
+    checkEvaluation(MapConcat(Seq(m0, m1)), create_map("a" -> "4", "b" -> "2", "c" -> "3"))
 
     // maps with no overlap
     checkEvaluation(MapConcat(Seq(m0, m2)),
       create_map("a" -> "1", "b" -> "2", "d" -> "4", "e" -> "5"))
 
     // 3 maps
-    checkEvaluation(MapConcat(Seq(m0, m1, m2)),
-      (
-        Array("a", "b", "c", "a", "d", "e"), // keys
-        Array("1", "2", "3", "4", "4", "5") // values
-      )
-    )
+    checkEvaluation(MapConcat(Seq(m0, m2, m3)),
+      create_map("a" -> "1", "b" -> "2", "d" -> "4", "e" -> "5", "f" -> "1", "g" -> "2"))
 
     // null reference values
-    checkEvaluation(MapConcat(Seq(m3, m4)),
-      (
-        Array("a", "b", "a", "c"), // keys
-        Array("1", "2", null, "3") // values
-      )
-    )
+    checkEvaluation(MapConcat(Seq(m2, m4)),
+      create_map("d" -> "4", "e" -> "5", "a" -> null, "c" -> "3"))
 
     // null primitive values
     checkEvaluation(MapConcat(Seq(m5, m6)),
-      (
-        Array("a", "b", "a", "c"), // keys
-        Array(1, 2, null, 3) // values
-      )
-    )
+      create_map("a" -> 1, "b" -> 2, "c" -> null, "d" -> 3))
 
     // keys that are primitive
     checkEvaluation(MapConcat(Seq(m9, m10)),
-      (
-        Array(1, 2, 3, 4), // keys
-        Array("1", "2", "3", "4") // values
-      )
-    )
+      create_map(1 -> "1", 2 -> "2", 3 -> "3", 4 -> "4"))
 
-    // keys that are arrays, with overlap
+    // keys that are arrays
     checkEvaluation(MapConcat(Seq(m7, m8)),
-      (
-        Array(List(1, 2), List(3, 4), List(5, 6), List(1, 2)), // keys
-        Array(1, 2, 3, 4) // values
-      )
-    )
+      create_map(List(1, 2) -> 1, List(3, 4) -> 2, List(5, 6) -> 3, List(7, 8) -> 4))
+
 
     // both keys and value are primitive and valueContainsNull = false
     checkEvaluation(MapConcat(Seq(m11, m12)), create_map(1 -> 2, 3 -> 4, 5 -> 6))
@@ -205,15 +181,14 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(MapConcat(Seq.empty), Map.empty)
 
     // force split expressions for input in generated code
-    val expectedKeys = Array.fill(65)(Seq("a", "b")).flatten ++ Array("d", "e")
-    val expectedValues = Array.fill(65)(Seq("1", "2")).flatten ++ Array("4", "5")
-    checkEvaluation(MapConcat(
-      Seq(
-        m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0,
-        m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0,
-        m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m0, m2
-      )),
-      (expectedKeys, expectedValues))
+    val expectedKeys = (1 to 65).map(_.toString)
+    val expectedValues = (1 to 65).map(_.toString)
+    checkEvaluation(
+      MapConcat(
+        expectedKeys.zip(expectedValues).map {
+          case (k, v) => Literal.create(create_map(k -> v), MapType(StringType, StringType))
+        }),
+      create_map(expectedKeys.zip(expectedValues): _*))
 
     // argument checking
     assert(MapConcat(Seq(m0, m1)).checkInputDataTypes().isSuccess)
@@ -248,7 +223,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
         ArrayType(IntegerType, containsNull = true),
         ArrayType(StringType, containsNull = true),
         valueContainsNull = true))
-    checkEvaluation(mapConcat, Map(
+    checkEvaluation(mapConcat, create_map(
       Seq(1, 2) -> Seq("a", "b"),
       Seq(3, 4, null) -> Seq("c", "d", null),
       Seq(6) -> null))
@@ -282,7 +257,9 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val ai1 = Literal.create(Seq(row(1, null), row(2, 20), row(3, null)), aiType)
     val ai2 = Literal.create(Seq.empty, aiType)
     val ai3 = Literal.create(null, aiType)
+    // The map key is duplicated
     val ai4 = Literal.create(Seq(row(1, 10), row(1, 20)), aiType)
+    // The map key is null
     val ai5 = Literal.create(Seq(row(1, 10), row(null, 20)), aiType)
     val ai6 = Literal.create(Seq(null, row(2, 20), null), aiType)
 
@@ -290,10 +267,12 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(MapFromEntries(ai1), create_map(1 -> null, 2 -> 20, 3 -> null))
     checkEvaluation(MapFromEntries(ai2), Map.empty)
     checkEvaluation(MapFromEntries(ai3), null)
-    checkEvaluation(MapKeys(MapFromEntries(ai4)), Seq(1, 1))
+    // Duplicated map keys will be removed w.r.t. the last wins policy.
+    checkEvaluation(MapFromEntries(ai4), create_map(1 -> 20))
+    // Map key can't be null
     checkExceptionInExpression[RuntimeException](
       MapFromEntries(ai5),
-      "The first field from a struct (key) can't be null.")
+      "Cannot use null as map key")
     checkEvaluation(MapFromEntries(ai6), null)
 
     // Non-primitive-type keys and values
@@ -310,13 +289,13 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(MapFromEntries(as1), create_map("a" -> null, "b" -> "bb", "c" -> null))
     checkEvaluation(MapFromEntries(as2), Map.empty)
     checkEvaluation(MapFromEntries(as3), null)
-    checkEvaluation(MapKeys(MapFromEntries(as4)), Seq("a", "a"))
-    checkEvaluation(MapFromEntries(as6), null)
-
+    // Duplicated map keys will be removed w.r.t. the last wins policy.
+    checkEvaluation(MapFromEntries(as4), create_map("a" -> "bb"))
     // Map key can't be null
     checkExceptionInExpression[RuntimeException](
       MapFromEntries(as5),
-      "The first field from a struct (key) can't be null.")
+      "Cannot use null as map key")
+    checkEvaluation(MapFromEntries(as6), null)
 
     // map key can't be map
     val structOfMap = row(create_map(1 -> 1), 1)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index d95f42e04e37..dc6046481504 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -183,6 +183,11 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       CreateMap(interlace(strWithNull, intSeq.map(Literal(_)))),
       "Cannot use null as map key")
 
+    // Duplicated map keys will be removed w.r.t. the last wins policy.
+    checkEvaluation(
+      CreateMap(Seq(Literal(1), Literal(2), Literal(1), Literal(3))),
+      create_map(1 -> 3))
+
     // ArrayType map key and value
     val map = CreateMap(Seq(
       Literal.create(intSeq, ArrayType(IntegerType, containsNull = false)),
@@ -243,12 +248,18 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       MapFromArrays(intWithNullArray, strArray),
       "Cannot use null as map key")
 
+    // Duplicated map keys will be removed w.r.t. the last wins policy.
+    checkEvaluation(
+      MapFromArrays(
+        Literal.create(Seq(1, 1), ArrayType(IntegerType)),
+        Literal.create(Seq(2, 3), ArrayType(IntegerType))),
+      create_map(1 -> 3))
+
     // map key can't be map
     val arrayOfMap = Seq(create_map(1 -> "a", 2 -> "b"))
     val map = MapFromArrays(
       Literal.create(arrayOfMap, ArrayType(MapType(IntegerType, StringType))),
-      Literal.create(Seq(1), ArrayType(IntegerType))
-    )
+      Literal.create(Seq(1), ArrayType(IntegerType)))
     map.checkInputDataTypes() match {
       case TypeCheckResult.TypeCheckSuccess => fail("should not allow map as map key")
       case TypeCheckResult.TypeCheckFailure(msg) =>
@@ -356,6 +367,11 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     val m5 = Map("a" -> null)
     checkEvaluation(new StringToMap(s5), m5)
 
+    // Duplicated map keys will be removed w.r.t. the last wins policy.
+    checkEvaluation(
+      new StringToMap(Literal("a:1,b:2,a:3")),
+      create_map("a" -> "3", "b" -> "2"))
+
     // arguments checking
     assert(new StringToMap(Literal("a:1,b:2,c:3")).checkInputDataTypes().isSuccess)
     assert(new StringToMap(Literal(null)).checkInputDataTypes().isFailure)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
index 66bf18af9579..03fb75e330c6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
@@ -330,8 +330,8 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(
       transformKeys(transformKeys(ai0, plusOne), plusValue),
       create_map(3 -> 1, 5 -> 2, 7 -> 3, 9 -> 4))
-    checkEvaluation(transformKeys(ai0, modKey),
-      ArrayBasedMapData(Array(1, 2, 0, 1), Array(1, 2, 3, 4)))
+    // Duplicated map keys will be removed w.r.t. the last wins policy.
+    checkEvaluation(transformKeys(ai0, modKey), create_map(1 -> 4, 2 -> 2, 0 -> 3))
     checkEvaluation(transformKeys(ai1, plusOne), Map.empty[Int, Int])
     checkEvaluation(transformKeys(ai1, plusOne), Map.empty[Int, Int])
     checkEvaluation(
@@ -467,16 +467,13 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
         .bind(validateBinding)
     }
 
-    val mii0 = Literal.create(Map(1 -> 10, 2 -> 20, 3 -> 30),
+    val mii0 = Literal.create(create_map(1 -> 10, 2 -> 20, 3 -> 30),
       MapType(IntegerType, IntegerType, valueContainsNull = false))
-    val mii1 = Literal.create(Map(1 -> -1, 2 -> -2, 4 -> -4),
+    val mii1 = Literal.create(create_map(1 -> -1, 2 -> -2, 4 -> -4),
       MapType(IntegerType, IntegerType, valueContainsNull = false))
-    val mii2 = Literal.create(Map(1 -> null, 2 -> -2, 3 -> null),
+    val mii2 = Literal.create(create_map(1 -> null, 2 -> -2, 3 -> null),
       MapType(IntegerType, IntegerType, valueContainsNull = true))
     val mii3 = Literal.create(Map(), MapType(IntegerType, IntegerType, valueContainsNull = false))
-    val mii4 = MapFromArrays(
-      Literal.create(Seq(2, 2), ArrayType(IntegerType, false)),
-      Literal.create(Seq(20, 200), ArrayType(IntegerType, false)))
     val miin = Literal.create(null, MapType(IntegerType, IntegerType, valueContainsNull = false))
 
     val multiplyKeyWithValues: (Expression, Expression, Expression) => Expression = {
@@ -492,12 +489,6 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(
       map_zip_with(mii0, mii3, multiplyKeyWithValues),
       Map(1 -> null, 2 -> null, 3 -> null))
-    checkEvaluation(
-      map_zip_with(mii0, mii4, multiplyKeyWithValues),
-      Map(1 -> null, 2 -> 800, 3 -> null))
-    checkEvaluation(
-      map_zip_with(mii4, mii0, multiplyKeyWithValues),
-      Map(2 -> 800, 1 -> null, 3 -> null))
     checkEvaluation(
       map_zip_with(mii0, miin, multiplyKeyWithValues),
       null)
@@ -511,9 +502,6 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val mss2 = Literal.create(Map("c" -> null, "b" -> "t", "a" -> null),
       MapType(StringType, StringType, valueContainsNull = true))
     val mss3 = Literal.create(Map(), MapType(StringType, StringType, valueContainsNull = false))
-    val mss4 = MapFromArrays(
-      Literal.create(Seq("a", "a"), ArrayType(StringType, false)),
-      Literal.create(Seq("a", "n"), ArrayType(StringType, false)))
     val mssn = Literal.create(null, MapType(StringType, StringType, valueContainsNull = false))
 
     val concat: (Expression, Expression, Expression) => Expression = {
@@ -529,12 +517,6 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(
       map_zip_with(mss0, mss3, concat),
       Map("a" -> null, "b" -> null, "d" -> null))
-    checkEvaluation(
-      map_zip_with(mss0, mss4, concat),
-      Map("a" -> "axa", "b" -> null, "d" -> null))
-    checkEvaluation(
-      map_zip_with(mss4, mss0, concat),
-      Map("a" -> "aax", "b" -> null, "d" -> null))
     checkEvaluation(
       map_zip_with(mss0, mssn, concat),
       null)
@@ -550,9 +532,6 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val mbb2 = Literal.create(Map(b(1, 3) -> null, b(1, 2) -> b(2), b(2, 1) -> null),
       MapType(BinaryType, BinaryType, valueContainsNull = true))
     val mbb3 = Literal.create(Map(), MapType(BinaryType, BinaryType, valueContainsNull = false))
-    val mbb4 = MapFromArrays(
-      Literal.create(Seq(b(2, 1), b(2, 1)), ArrayType(BinaryType, false)),
-      Literal.create(Seq(b(1), b(9)), ArrayType(BinaryType, false)))
     val mbbn = Literal.create(null, MapType(BinaryType, BinaryType, valueContainsNull = false))
 
     checkEvaluation(
@@ -564,12 +543,6 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(
       map_zip_with(mbb0, mbb3, concat),
       Map(b(1, 2) -> null, b(2, 1) -> null, b(1, 3) -> null))
-    checkEvaluation(
-      map_zip_with(mbb0, mbb4, concat),
-      Map(b(1, 2) -> null, b(2, 1) -> b(2, 1, 5, 1), b(1, 3) -> null))
-    checkEvaluation(
-      map_zip_with(mbb4, mbb0, concat),
-      Map(b(2, 1) -> b(2, 1, 1, 5), b(1, 2) -> null, b(1, 3) -> null))
     checkEvaluation(
       map_zip_with(mbb0, mbbn, concat),
       null)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
new file mode 100644
index 000000000000..8509bce17712
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{UnsafeArrayData, UnsafeRow}
+import org.apache.spark.sql.types.{ArrayType, BinaryType, IntegerType, StructType}
+import org.apache.spark.unsafe.Platform
+
+class ArrayBasedMapBuilderSuite extends SparkFunSuite {
+
+  test("basic") {
+    val builder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
+    builder.put(1, 1)
+    builder.put(InternalRow(2, 2))
+    builder.putAll(new GenericArrayData(Seq(3)), new GenericArrayData(Seq(3)))
+    val map = builder.build()
+    assert(map.numElements() == 3)
+    assert(ArrayBasedMapData.toScalaMap(map) == Map(1 -> 1, 2 -> 2, 3 -> 3))
+  }
+
+  test("fail with null key") {
+    val builder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
+    builder.put(1, null) // null value is OK
+    val e = intercept[RuntimeException](builder.put(null, 1))
+    assert(e.getMessage.contains("Cannot use null as map key"))
+  }
+
+  test("remove duplicated keys with last wins policy") {
+    val builder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
+    builder.put(1, 1)
+    builder.put(2, 2)
+    builder.put(1, 2)
+    val map = builder.build()
+    assert(map.numElements() == 2)
+    assert(ArrayBasedMapData.toScalaMap(map) == Map(1 -> 2, 2 -> 2))
+  }
+
+  test("binary type key") {
+    val builder = new ArrayBasedMapBuilder(BinaryType, IntegerType)
+    builder.put(Array(1.toByte), 1)
+    builder.put(Array(2.toByte), 2)
+    builder.put(Array(1.toByte), 3)
+    val map = builder.build()
+    assert(map.numElements() == 2)
+    val entries = ArrayBasedMapData.toScalaMap(map).iterator.toSeq
+    assert(entries(0)._1.asInstanceOf[Array[Byte]].toSeq == Seq(1))
+    assert(entries(0)._2 == 3)
+    assert(entries(1)._1.asInstanceOf[Array[Byte]].toSeq == Seq(2))
+    assert(entries(1)._2 == 2)
+  }
+
+  test("struct type key") {
+    val builder = new ArrayBasedMapBuilder(new StructType().add("i", "int"), IntegerType)
+    builder.put(InternalRow(1), 1)
+    builder.put(InternalRow(2), 2)
+    val unsafeRow = {
+      val row = new UnsafeRow(1)
+      val bytes = new Array[Byte](16)
+      row.pointTo(bytes, 16)
+      row.setInt(0, 1)
+      row
+    }
+    builder.put(unsafeRow, 3)
+    val map = builder.build()
+    assert(map.numElements() == 2)
+    assert(ArrayBasedMapData.toScalaMap(map) == Map(InternalRow(1) -> 3, InternalRow(2) -> 2))
+  }
+
+  test("array type key") {
+    val builder = new ArrayBasedMapBuilder(ArrayType(IntegerType), IntegerType)
+    builder.put(new GenericArrayData(Seq(1, 1)), 1)
+    builder.put(new GenericArrayData(Seq(2, 2)), 2)
+    val unsafeArray = {
+      val array = new UnsafeArrayData()
+      val bytes = new Array[Byte](24)
+      Platform.putLong(bytes, Platform.BYTE_ARRAY_OFFSET, 2)
+      array.pointTo(bytes, Platform.BYTE_ARRAY_OFFSET, 24)
+      array.setInt(0, 1)
+      array.setInt(1, 1)
+      array
+    }
+    builder.put(unsafeArray, 3)
+    val map = builder.build()
+    assert(map.numElements() == 2)
+    assert(ArrayBasedMapData.toScalaMap(map) ==
+      Map(new GenericArrayData(Seq(1, 1)) -> 3, new GenericArrayData(Seq(2, 2)) -> 2))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
index 4ecc54bd2fd9..ee16b3ab07f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
@@ -179,6 +179,8 @@ class OrcDeserializer(
           i += 1
         }
 
+        // The ORC map will never have null or duplicated map keys, it's safe to create a
+        // ArrayBasedMapData directly here.
         updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
 
       case udt: UserDefinedType[_] => newWriter(udt.sqlType, updater)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index 119972594184..004a96d13413 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -558,8 +558,12 @@ private[parquet] class ParquetRowConverter(
 
     override def getConverter(fieldIndex: Int): Converter = keyValueConverter
 
-    override def end(): Unit =
+    override def end(): Unit = {
+      // The parquet map may contains null or duplicated map keys. When it happens, the behavior is
+      // undefined.
+      // TODO (SPARK-26174): disallow it with a config.
       updater.set(ArrayBasedMapData(currentKeys.toArray, currentValues.toArray))
+    }
 
     // NOTE: We can't reuse the mutable Map here and must instantiate a new `Map` for the next
     // value.  `Row.copy()` only copies row cells, it doesn't do deep copy to objects stored in row
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 666ba35d7a8f..e6d1a038a591 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -89,13 +89,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
     val msg1 = intercept[Exception] {
       df5.select(map_from_arrays($"k", $"v")).collect
     }.getMessage
-    assert(msg1.contains("Cannot use null as map key!"))
+    assert(msg1.contains("Cannot use null as map key"))
 
     val df6 = Seq((Seq(1, 2), Seq("a"))).toDF("k", "v")
     val msg2 = intercept[Exception] {
       df6.select(map_from_arrays($"k", $"v")).collect
     }.getMessage
-    assert(msg2.contains("The given two arrays should have the same length"))
+    assert(msg2.contains("The key array and value array of MapData must have the same length"))
   }
 
   test("struct with column name") {
@@ -2588,7 +2588,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
     val ex3 = intercept[Exception] {
       dfExample1.selectExpr("transform_keys(i, (k, v) -> v)").show()
     }
-    assert(ex3.getMessage.contains("Cannot use null as map key!"))
+    assert(ex3.getMessage.contains("Cannot use null as map key"))
 
     val ex4 = intercept[AnalysisException] {
       dfExample2.selectExpr("transform_keys(j, (k, v) -> k + 1)")

From 8bfea86b1c8a65ce73711af02d9e4140659a926d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 29 Nov 2018 01:54:06 +0000
Subject: [PATCH 0012/1072] [SPARK-26133][ML] Remove deprecated OneHotEncoder
 and rename OneHotEncoderEstimator to OneHotEncoder

## What changes were proposed in this pull request?

We have deprecated `OneHotEncoder` at Spark 2.3.0 and introduced `OneHotEncoderEstimator`. At 3.0.0, we remove deprecated `OneHotEncoder` and rename `OneHotEncoderEstimator` to `OneHotEncoder`.

TODO: According to ML migration guide, we need to keep `OneHotEncoderEstimator` as an alias after renaming. This is not done at this patch in order to facilitate review.

## How was this patch tested?

Existing tests.

Closes #23100 from viirya/remove_one_hot_encoder.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 docs/ml-features.md                           |  24 +-
 docs/ml-guide.md                              |   6 +
 ...ple.java => JavaOneHotEncoderExample.java} |   8 +-
 ...r_example.py => onehot_encoder_example.py} |   8 +-
 ...ample.scala => OneHotEncoderExample.scala} |   8 +-
 .../spark/ml/feature/OneHotEncoder.scala      | 530 +++++++++++++++---
 .../ml/feature/OneHotEncoderEstimator.scala   | 528 -----------------
 .../apache/spark/ml/feature/RFormula.scala    |   4 +-
 .../feature/OneHotEncoderEstimatorSuite.scala | 422 --------------
 .../spark/ml/feature/OneHotEncoderSuite.scala | 411 +++++++++++---
 project/MimaExcludes.scala                    |  12 +
 python/pyspark/ml/feature.py                  | 102 +---
 12 files changed, 841 insertions(+), 1222 deletions(-)
 rename examples/src/main/java/org/apache/spark/examples/ml/{JavaOneHotEncoderEstimatorExample.java => JavaOneHotEncoderExample.java} (91%)
 rename examples/src/main/python/ml/{onehot_encoder_estimator_example.py => onehot_encoder_example.py} (83%)
 rename examples/src/main/scala/org/apache/spark/examples/ml/{OneHotEncoderEstimatorExample.scala => OneHotEncoderExample.scala} (89%)
 delete mode 100644 mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoderEstimator.scala
 delete mode 100644 mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderEstimatorSuite.scala

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 882b895a9d15..83a211ce02e6 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -779,43 +779,37 @@ for more details on the API.
 </div>
 </div>
 
-## OneHotEncoder (Deprecated since 2.3.0)
-
-Because this existing `OneHotEncoder` is a stateless transformer, it is not usable on new data where the number of categories may differ from the training data. In order to fix this, a new `OneHotEncoderEstimator` was created that produces an `OneHotEncoderModel` when fitting. For more detail, please see [SPARK-13030](https://issues.apache.org/jira/browse/SPARK-13030).
-
-`OneHotEncoder` has been deprecated in 2.3.0 and will be removed in 3.0.0. Please use [OneHotEncoderEstimator](ml-features.html#onehotencoderestimator) instead.
-
-## OneHotEncoderEstimator
+## OneHotEncoder
 
 [One-hot encoding](http://en.wikipedia.org/wiki/One-hot) maps a categorical feature, represented as a label index, to a binary vector with at most a single one-value indicating the presence of a specific feature value from among the set of all feature values. This encoding allows algorithms which expect continuous features, such as Logistic Regression, to use categorical features. For string type input data, it is common to encode categorical features using [StringIndexer](ml-features.html#stringindexer) first.
 
-`OneHotEncoderEstimator` can transform multiple columns, returning an one-hot-encoded output vector column for each input column. It is common to merge these vectors into a single feature vector using [VectorAssembler](ml-features.html#vectorassembler).
+`OneHotEncoder` can transform multiple columns, returning an one-hot-encoded output vector column for each input column. It is common to merge these vectors into a single feature vector using [VectorAssembler](ml-features.html#vectorassembler).
 
-`OneHotEncoderEstimator` supports the `handleInvalid` parameter to choose how to handle invalid input during transforming data. Available options include 'keep' (any invalid inputs are assigned to an extra categorical index) and 'error' (throw an error).
+`OneHotEncoder` supports the `handleInvalid` parameter to choose how to handle invalid input during transforming data. Available options include 'keep' (any invalid inputs are assigned to an extra categorical index) and 'error' (throw an error).
 
 **Examples**
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [OneHotEncoderEstimator Scala docs](api/scala/index.html#org.apache.spark.ml.feature.OneHotEncoderEstimator) for more details on the API.
+Refer to the [OneHotEncoder Scala docs](api/scala/index.html#org.apache.spark.ml.feature.OneHotEncoder) for more details on the API.
 
-{% include_example scala/org/apache/spark/examples/ml/OneHotEncoderEstimatorExample.scala %}
+{% include_example scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
 
-Refer to the [OneHotEncoderEstimator Java docs](api/java/org/apache/spark/ml/feature/OneHotEncoderEstimator.html)
+Refer to the [OneHotEncoder Java docs](api/java/org/apache/spark/ml/feature/OneHotEncoder.html)
 for more details on the API.
 
-{% include_example java/org/apache/spark/examples/ml/JavaOneHotEncoderEstimatorExample.java %}
+{% include_example java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
 
-Refer to the [OneHotEncoderEstimator Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.OneHotEncoderEstimator) for more details on the API.
+Refer to the [OneHotEncoder Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.OneHotEncoder) for more details on the API.
 
-{% include_example python/ml/onehot_encoder_estimator_example.py %}
+{% include_example python/ml/onehot_encoder_example.py %}
 </div>
 </div>
 
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index aea07be34cb8..57d4e1fe9d33 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -104,6 +104,12 @@ MLlib is under active development.
 The APIs marked `Experimental`/`DeveloperApi` may change in future releases,
 and the migration guide below will explain all changes between releases.
 
+## From 2.4 to 3.0
+
+### Breaking changes
+
+* `OneHotEncoder` which is deprecated in 2.3, is removed in 3.0 and `OneHotEncoderEstimator` is now renamed to `OneHotEncoder`.
+
 ## From 2.2 to 2.3
 
 ### Breaking changes
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderEstimatorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
similarity index 91%
rename from examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderEstimatorExample.java
rename to examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
index 6f93cff94b72..4b49bebf7ccf 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderEstimatorExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java
@@ -23,7 +23,7 @@
 import java.util.Arrays;
 import java.util.List;
 
-import org.apache.spark.ml.feature.OneHotEncoderEstimator;
+import org.apache.spark.ml.feature.OneHotEncoder;
 import org.apache.spark.ml.feature.OneHotEncoderModel;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
@@ -34,11 +34,11 @@
 import org.apache.spark.sql.types.StructType;
 // $example off$
 
-public class JavaOneHotEncoderEstimatorExample {
+public class JavaOneHotEncoderExample {
   public static void main(String[] args) {
     SparkSession spark = SparkSession
       .builder()
-      .appName("JavaOneHotEncoderEstimatorExample")
+      .appName("JavaOneHotEncoderExample")
       .getOrCreate();
 
     // Note: categorical features are usually first encoded with StringIndexer
@@ -59,7 +59,7 @@ public static void main(String[] args) {
 
     Dataset<Row> df = spark.createDataFrame(data, schema);
 
-    OneHotEncoderEstimator encoder = new OneHotEncoderEstimator()
+    OneHotEncoder encoder = new OneHotEncoder()
       .setInputCols(new String[] {"categoryIndex1", "categoryIndex2"})
       .setOutputCols(new String[] {"categoryVec1", "categoryVec2"});
 
diff --git a/examples/src/main/python/ml/onehot_encoder_estimator_example.py b/examples/src/main/python/ml/onehot_encoder_example.py
similarity index 83%
rename from examples/src/main/python/ml/onehot_encoder_estimator_example.py
rename to examples/src/main/python/ml/onehot_encoder_example.py
index 2723e681cea7..73775b79e36c 100644
--- a/examples/src/main/python/ml/onehot_encoder_estimator_example.py
+++ b/examples/src/main/python/ml/onehot_encoder_example.py
@@ -18,14 +18,14 @@
 from __future__ import print_function
 
 # $example on$
-from pyspark.ml.feature import OneHotEncoderEstimator
+from pyspark.ml.feature import OneHotEncoder
 # $example off$
 from pyspark.sql import SparkSession
 
 if __name__ == "__main__":
     spark = SparkSession\
         .builder\
-        .appName("OneHotEncoderEstimatorExample")\
+        .appName("OneHotEncoderExample")\
         .getOrCreate()
 
     # Note: categorical features are usually first encoded with StringIndexer
@@ -39,8 +39,8 @@
         (2.0, 0.0)
     ], ["categoryIndex1", "categoryIndex2"])
 
-    encoder = OneHotEncoderEstimator(inputCols=["categoryIndex1", "categoryIndex2"],
-                                     outputCols=["categoryVec1", "categoryVec2"])
+    encoder = OneHotEncoder(inputCols=["categoryIndex1", "categoryIndex2"],
+                            outputCols=["categoryVec1", "categoryVec2"])
     model = encoder.fit(df)
     encoded = model.transform(df)
     encoded.show()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderEstimatorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
similarity index 89%
rename from examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderEstimatorExample.scala
rename to examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
index 45d816808ed8..742f3cdeea35 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderEstimatorExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
@@ -19,15 +19,15 @@
 package org.apache.spark.examples.ml
 
 // $example on$
-import org.apache.spark.ml.feature.OneHotEncoderEstimator
+import org.apache.spark.ml.feature.OneHotEncoder
 // $example off$
 import org.apache.spark.sql.SparkSession
 
-object OneHotEncoderEstimatorExample {
+object OneHotEncoderExample {
   def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
-      .appName("OneHotEncoderEstimatorExample")
+      .appName("OneHotEncoderExample")
       .getOrCreate()
 
     // Note: categorical features are usually first encoded with StringIndexer
@@ -41,7 +41,7 @@ object OneHotEncoderEstimatorExample {
       (2.0, 0.0)
     )).toDF("categoryIndex1", "categoryIndex2")
 
-    val encoder = new OneHotEncoderEstimator()
+    val encoder = new OneHotEncoder()
       .setInputCols(Array("categoryIndex1", "categoryIndex2"))
       .setOutputCols(Array("categoryVec1", "categoryVec2"))
     val model = encoder.fit(df)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 27e4869a020b..ec9792cbbda8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -17,126 +17,512 @@
 
 package org.apache.spark.ml.feature
 
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.Transformer
+import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCols, HasOutputCols}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
-import org.apache.spark.sql.functions.{col, udf}
-import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
+import org.apache.spark.sql.expressions.UserDefinedFunction
+import org.apache.spark.sql.functions.{col, lit, udf}
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
+
+/** Private trait for params and common methods for OneHotEncoder and OneHotEncoderModel */
+private[ml] trait OneHotEncoderBase extends Params with HasHandleInvalid
+    with HasInputCols with HasOutputCols {
+
+  /**
+   * Param for how to handle invalid data during transform().
+   * Options are 'keep' (invalid data presented as an extra categorical feature) or
+   * 'error' (throw an error).
+   * Note that this Param is only used during transform; during fitting, invalid data
+   * will result in an error.
+   * Default: "error"
+   * @group param
+   */
+  @Since("2.3.0")
+  override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
+    "How to handle invalid data during transform(). " +
+    "Options are 'keep' (invalid data presented as an extra categorical feature) " +
+    "or error (throw an error). Note that this Param is only used during transform; " +
+    "during fitting, invalid data will result in an error.",
+    ParamValidators.inArray(OneHotEncoder.supportedHandleInvalids))
+
+  setDefault(handleInvalid, OneHotEncoder.ERROR_INVALID)
+
+  /**
+   * Whether to drop the last category in the encoded vector (default: true)
+   * @group param
+   */
+  @Since("2.3.0")
+  final val dropLast: BooleanParam =
+    new BooleanParam(this, "dropLast", "whether to drop the last category")
+  setDefault(dropLast -> true)
+
+  /** @group getParam */
+  @Since("2.3.0")
+  def getDropLast: Boolean = $(dropLast)
+
+  protected def validateAndTransformSchema(
+      schema: StructType,
+      dropLast: Boolean,
+      keepInvalid: Boolean): StructType = {
+    val inputColNames = $(inputCols)
+    val outputColNames = $(outputCols)
+
+    require(inputColNames.length == outputColNames.length,
+      s"The number of input columns ${inputColNames.length} must be the same as the number of " +
+        s"output columns ${outputColNames.length}.")
+
+    // Input columns must be NumericType.
+    inputColNames.foreach(SchemaUtils.checkNumericType(schema, _))
+
+    // Prepares output columns with proper attributes by examining input columns.
+    val inputFields = $(inputCols).map(schema(_))
+
+    val outputFields = inputFields.zip(outputColNames).map { case (inputField, outputColName) =>
+      OneHotEncoderCommon.transformOutputColumnSchema(
+        inputField, outputColName, dropLast, keepInvalid)
+    }
+    outputFields.foldLeft(schema) { case (newSchema, outputField) =>
+      SchemaUtils.appendColumn(newSchema, outputField)
+    }
+  }
+}
 
 /**
  * A one-hot encoder that maps a column of category indices to a column of binary vectors, with
  * at most a single one-value per row that indicates the input category index.
  * For example with 5 categories, an input value of 2.0 would map to an output vector of
  * `[0.0, 0.0, 1.0, 0.0]`.
- * The last category is not included by default (configurable via `OneHotEncoder!.dropLast`
+ * The last category is not included by default (configurable via `dropLast`),
  * because it makes the vector entries sum up to one, and hence linearly dependent.
  * So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
  *
  * @note This is different from scikit-learn's OneHotEncoder, which keeps all categories.
  * The output vectors are sparse.
  *
+ * When `handleInvalid` is configured to 'keep', an extra "category" indicating invalid values is
+ * added as last category. So when `dropLast` is true, invalid values are encoded as all-zeros
+ * vector.
+ *
+ * @note When encoding multi-column by using `inputCols` and `outputCols` params, input/output cols
+ * come in pairs, specified by the order in the arrays, and each pair is treated independently.
+ *
  * @see `StringIndexer` for converting categorical values into category indices
- * @deprecated `OneHotEncoderEstimator` will be renamed `OneHotEncoder` and this `OneHotEncoder`
- * will be removed in 3.0.0.
  */
-@Since("1.4.0")
-@deprecated("`OneHotEncoderEstimator` will be renamed `OneHotEncoder` and this `OneHotEncoder`" +
-  " will be removed in 3.0.0.", "2.3.0")
-class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer
-  with HasInputCol with HasOutputCol with DefaultParamsWritable {
+@Since("3.0.0")
+class OneHotEncoder @Since("3.0.0") (@Since("3.0.0") override val uid: String)
+    extends Estimator[OneHotEncoderModel] with OneHotEncoderBase with DefaultParamsWritable {
 
-  @Since("1.4.0")
-  def this() = this(Identifiable.randomUID("oneHot"))
+  @Since("3.0.0")
+  def this() = this(Identifiable.randomUID("oneHotEncoder"))
 
-  /**
-   * Whether to drop the last category in the encoded vector (default: true)
-   * @group param
-   */
-  @Since("1.4.0")
-  final val dropLast: BooleanParam =
-    new BooleanParam(this, "dropLast", "whether to drop the last category")
-  setDefault(dropLast -> true)
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCols(values: Array[String]): this.type = set(inputCols, values)
 
-  /** @group getParam */
-  @Since("2.0.0")
-  def getDropLast: Boolean = $(dropLast)
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCols(values: Array[String]): this.type = set(outputCols, values)
 
   /** @group setParam */
-  @Since("1.4.0")
+  @Since("3.0.0")
   def setDropLast(value: Boolean): this.type = set(dropLast, value)
 
   /** @group setParam */
-  @Since("1.4.0")
-  def setInputCol(value: String): this.type = set(inputCol, value)
+  @Since("3.0.0")
+  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
+
+  @Since("3.0.0")
+  override def transformSchema(schema: StructType): StructType = {
+    val keepInvalid = $(handleInvalid) == OneHotEncoder.KEEP_INVALID
+    validateAndTransformSchema(schema, dropLast = $(dropLast),
+      keepInvalid = keepInvalid)
+  }
+
+  @Since("3.0.0")
+  override def fit(dataset: Dataset[_]): OneHotEncoderModel = {
+    transformSchema(dataset.schema)
+
+    // Compute the plain number of categories without `handleInvalid` and
+    // `dropLast` taken into account.
+    val transformedSchema = validateAndTransformSchema(dataset.schema, dropLast = false,
+      keepInvalid = false)
+    val categorySizes = new Array[Int]($(outputCols).length)
+
+    val columnToScanIndices = $(outputCols).zipWithIndex.flatMap { case (outputColName, idx) =>
+      val numOfAttrs = AttributeGroup.fromStructField(
+        transformedSchema(outputColName)).size
+      if (numOfAttrs < 0) {
+        Some(idx)
+      } else {
+        categorySizes(idx) = numOfAttrs
+        None
+      }
+    }
+
+    // Some input columns don't have attributes or their attributes don't have necessary info.
+    // We need to scan the data to get the number of values for each column.
+    if (columnToScanIndices.length > 0) {
+      val inputColNames = columnToScanIndices.map($(inputCols)(_))
+      val outputColNames = columnToScanIndices.map($(outputCols)(_))
+
+      // When fitting data, we want the plain number of categories without `handleInvalid` and
+      // `dropLast` taken into account.
+      val attrGroups = OneHotEncoderCommon.getOutputAttrGroupFromData(
+        dataset, inputColNames, outputColNames, dropLast = false)
+      attrGroups.zip(columnToScanIndices).foreach { case (attrGroup, idx) =>
+        categorySizes(idx) = attrGroup.size
+      }
+    }
+
+    val model = new OneHotEncoderModel(uid, categorySizes).setParent(this)
+    copyValues(model)
+  }
+
+  @Since("3.0.0")
+  override def copy(extra: ParamMap): OneHotEncoder = defaultCopy(extra)
+}
+
+@Since("3.0.0")
+object OneHotEncoder extends DefaultParamsReadable[OneHotEncoder] {
+
+  private[feature] val KEEP_INVALID: String = "keep"
+  private[feature] val ERROR_INVALID: String = "error"
+  private[feature] val supportedHandleInvalids: Array[String] = Array(KEEP_INVALID, ERROR_INVALID)
+
+  @Since("3.0.0")
+  override def load(path: String): OneHotEncoder = super.load(path)
+}
+
+/**
+ * @param categorySizes  Original number of categories for each feature being encoded.
+ *                       The array contains one value for each input column, in order.
+ */
+@Since("3.0.0")
+class OneHotEncoderModel private[ml] (
+    @Since("3.0.0") override val uid: String,
+    @Since("3.0.0") val categorySizes: Array[Int])
+  extends Model[OneHotEncoderModel] with OneHotEncoderBase with MLWritable {
+
+  import OneHotEncoderModel._
+
+  // Returns the category size for each index with `dropLast` and `handleInvalid`
+  // taken into account.
+  private def getConfigedCategorySizes: Array[Int] = {
+    val dropLast = getDropLast
+    val keepInvalid = getHandleInvalid == OneHotEncoder.KEEP_INVALID
+
+    if (!dropLast && keepInvalid) {
+      // When `handleInvalid` is "keep", an extra category is added as last category
+      // for invalid data.
+      categorySizes.map(_ + 1)
+    } else if (dropLast && !keepInvalid) {
+      // When `dropLast` is true, the last category is removed.
+      categorySizes.map(_ - 1)
+    } else {
+      // When `dropLast` is true and `handleInvalid` is "keep", the extra category for invalid
+      // data is removed. Thus, it is the same as the plain number of categories.
+      categorySizes
+    }
+  }
+
+  private def encoder: UserDefinedFunction = {
+    val keepInvalid = getHandleInvalid == OneHotEncoder.KEEP_INVALID
+    val configedSizes = getConfigedCategorySizes
+    val localCategorySizes = categorySizes
+
+    // The udf performed on input data. The first parameter is the input value. The second
+    // parameter is the index in inputCols of the column being encoded.
+    udf { (label: Double, colIdx: Int) =>
+      val origCategorySize = localCategorySizes(colIdx)
+      // idx: index in vector of the single 1-valued element
+      val idx = if (label >= 0 && label < origCategorySize) {
+        label
+      } else {
+        if (keepInvalid) {
+          origCategorySize
+        } else {
+          if (label < 0) {
+            throw new SparkException(s"Negative value: $label. Input can't be negative. " +
+              s"To handle invalid values, set Param handleInvalid to " +
+              s"${OneHotEncoder.KEEP_INVALID}")
+          } else {
+            throw new SparkException(s"Unseen value: $label. To handle unseen values, " +
+              s"set Param handleInvalid to ${OneHotEncoder.KEEP_INVALID}.")
+          }
+        }
+      }
+
+      val size = configedSizes(colIdx)
+      if (idx < size) {
+        Vectors.sparse(size, Array(idx.toInt), Array(1.0))
+      } else {
+        Vectors.sparse(size, Array.empty[Int], Array.empty[Double])
+      }
+    }
+  }
 
   /** @group setParam */
-  @Since("1.4.0")
-  def setOutputCol(value: String): this.type = set(outputCol, value)
+  @Since("3.0.0")
+  def setInputCols(values: Array[String]): this.type = set(inputCols, values)
 
-  @Since("1.4.0")
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCols(values: Array[String]): this.type = set(outputCols, values)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setDropLast(value: Boolean): this.type = set(dropLast, value)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
+
+  @Since("3.0.0")
   override def transformSchema(schema: StructType): StructType = {
-    val inputColName = $(inputCol)
-    val outputColName = $(outputCol)
-    val inputFields = schema.fields
+    val inputColNames = $(inputCols)
+
+    require(inputColNames.length == categorySizes.length,
+      s"The number of input columns ${inputColNames.length} must be the same as the number of " +
+        s"features ${categorySizes.length} during fitting.")
+
+    val keepInvalid = $(handleInvalid) == OneHotEncoder.KEEP_INVALID
+    val transformedSchema = validateAndTransformSchema(schema, dropLast = $(dropLast),
+      keepInvalid = keepInvalid)
+    verifyNumOfValues(transformedSchema)
+  }
 
-    require(schema(inputColName).dataType.isInstanceOf[NumericType],
-      s"Input column must be of type ${NumericType.simpleString} but got " +
-        schema(inputColName).dataType.catalogString)
-    require(!inputFields.exists(_.name == outputColName),
-      s"Output column $outputColName already exists.")
+  /**
+   * If the metadata of input columns also specifies the number of categories, we need to
+   * compare with expected category number with `handleInvalid` and `dropLast` taken into
+   * account. Mismatched numbers will cause exception.
+   */
+  private def verifyNumOfValues(schema: StructType): StructType = {
+    val configedSizes = getConfigedCategorySizes
+    $(outputCols).zipWithIndex.foreach { case (outputColName, idx) =>
+      val inputColName = $(inputCols)(idx)
+      val attrGroup = AttributeGroup.fromStructField(schema(outputColName))
 
-    val outputField = OneHotEncoderCommon.transformOutputColumnSchema(
-      schema(inputColName), outputColName, $(dropLast))
-    val outputFields = inputFields :+ outputField
-    StructType(outputFields)
+      // If the input metadata specifies number of category for output column,
+      // comparing with expected category number with `handleInvalid` and
+      // `dropLast` taken into account.
+      if (attrGroup.attributes.nonEmpty) {
+        val numCategories = configedSizes(idx)
+        require(attrGroup.size == numCategories, "OneHotEncoderModel expected " +
+          s"$numCategories categorical values for input column $inputColName, " +
+            s"but the input column had metadata specifying ${attrGroup.size} values.")
+      }
+    }
+    schema
   }
 
-  @Since("2.0.0")
+  @Since("3.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    // schema transformation
-    val inputColName = $(inputCol)
-    val outputColName = $(outputCol)
+    val transformedSchema = transformSchema(dataset.schema, logging = true)
+    val keepInvalid = $(handleInvalid) == OneHotEncoder.KEEP_INVALID
 
-    val outputAttrGroupFromSchema = AttributeGroup.fromStructField(
-      transformSchema(dataset.schema)(outputColName))
+    val encodedColumns = $(inputCols).indices.map { idx =>
+      val inputColName = $(inputCols)(idx)
+      val outputColName = $(outputCols)(idx)
 
-    val outputAttrGroup = if (outputAttrGroupFromSchema.size < 0) {
-      OneHotEncoderCommon.getOutputAttrGroupFromData(
-        dataset, Seq(inputColName), Seq(outputColName), $(dropLast))(0)
-    } else {
-      outputAttrGroupFromSchema
+      val outputAttrGroupFromSchema =
+        AttributeGroup.fromStructField(transformedSchema(outputColName))
+
+      val metadata = if (outputAttrGroupFromSchema.size < 0) {
+        OneHotEncoderCommon.createAttrGroupForAttrNames(outputColName,
+          categorySizes(idx), $(dropLast), keepInvalid).toMetadata()
+      } else {
+        outputAttrGroupFromSchema.toMetadata()
+      }
+
+      encoder(col(inputColName).cast(DoubleType), lit(idx))
+        .as(outputColName, metadata)
+    }
+    dataset.withColumns($(outputCols), encodedColumns)
+  }
+
+  @Since("3.0.0")
+  override def copy(extra: ParamMap): OneHotEncoderModel = {
+    val copied = new OneHotEncoderModel(uid, categorySizes)
+    copyValues(copied, extra).setParent(parent)
+  }
+
+  @Since("3.0.0")
+  override def write: MLWriter = new OneHotEncoderModelWriter(this)
+}
+
+@Since("3.0.0")
+object OneHotEncoderModel extends MLReadable[OneHotEncoderModel] {
+
+  private[OneHotEncoderModel]
+  class OneHotEncoderModelWriter(instance: OneHotEncoderModel) extends MLWriter {
+
+    private case class Data(categorySizes: Array[Int])
+
+    override protected def saveImpl(path: String): Unit = {
+      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      val data = Data(instance.categorySizes)
+      val dataPath = new Path(path, "data").toString
+      sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
+    }
+  }
+
+  private class OneHotEncoderModelReader extends MLReader[OneHotEncoderModel] {
+
+    private val className = classOf[OneHotEncoderModel].getName
+
+    override def load(path: String): OneHotEncoderModel = {
+      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val dataPath = new Path(path, "data").toString
+      val data = sparkSession.read.parquet(dataPath)
+        .select("categorySizes")
+        .head()
+      val categorySizes = data.getAs[Seq[Int]](0).toArray
+      val model = new OneHotEncoderModel(metadata.uid, categorySizes)
+      metadata.getAndSetParams(model)
+      model
+    }
+  }
+
+  @Since("3.0.0")
+  override def read: MLReader[OneHotEncoderModel] = new OneHotEncoderModelReader
+
+  @Since("3.0.0")
+  override def load(path: String): OneHotEncoderModel = super.load(path)
+}
+
+/**
+ * Provides some helper methods used by `OneHotEncoder`.
+ */
+private[feature] object OneHotEncoderCommon {
+
+  private def genOutputAttrNames(inputCol: StructField): Option[Array[String]] = {
+    val inputAttr = Attribute.fromStructField(inputCol)
+    inputAttr match {
+      case nominal: NominalAttribute =>
+        if (nominal.values.isDefined) {
+          nominal.values
+        } else if (nominal.numValues.isDefined) {
+          nominal.numValues.map(n => Array.tabulate(n)(_.toString))
+        } else {
+          None
+        }
+      case binary: BinaryAttribute =>
+        if (binary.values.isDefined) {
+          binary.values
+        } else {
+          Some(Array.tabulate(2)(_.toString))
+        }
+      case _: NumericAttribute =>
+        throw new RuntimeException(
+          s"The input column ${inputCol.name} cannot be continuous-value.")
+      case _ =>
+        None // optimistic about unknown attributes
     }
+  }
 
-    val metadata = outputAttrGroup.toMetadata()
+  /** Creates an `AttributeGroup` filled by the `BinaryAttribute` named as required. */
+  private def genOutputAttrGroup(
+      outputAttrNames: Option[Array[String]],
+      outputColName: String): AttributeGroup = {
+    outputAttrNames.map { attrNames =>
+      val attrs: Array[Attribute] = attrNames.map { name =>
+        BinaryAttribute.defaultAttr.withName(name)
+      }
+      new AttributeGroup(outputColName, attrs)
+    }.getOrElse{
+      new AttributeGroup(outputColName)
+    }
+  }
 
-    // data transformation
-    val size = outputAttrGroup.size
-    val oneValue = Array(1.0)
-    val emptyValues = Array.empty[Double]
-    val emptyIndices = Array.empty[Int]
-    val encode = udf { label: Double =>
-      if (label < size) {
-        Vectors.sparse(size, Array(label.toInt), oneValue)
+  /**
+   * Prepares the `StructField` with proper metadata for `OneHotEncoder`'s output column.
+   */
+  def transformOutputColumnSchema(
+      inputCol: StructField,
+      outputColName: String,
+      dropLast: Boolean,
+      keepInvalid: Boolean = false): StructField = {
+    val outputAttrNames = genOutputAttrNames(inputCol)
+    val filteredOutputAttrNames = outputAttrNames.map { names =>
+      if (dropLast && !keepInvalid) {
+        require(names.length > 1,
+          s"The input column ${inputCol.name} should have at least two distinct values.")
+        names.dropRight(1)
+      } else if (!dropLast && keepInvalid) {
+        names ++ Seq("invalidValues")
       } else {
-        Vectors.sparse(size, emptyIndices, emptyValues)
+        names
       }
     }
 
-    dataset.select(col("*"), encode(col(inputColName).cast(DoubleType)).as(outputColName, metadata))
+    genOutputAttrGroup(filteredOutputAttrNames, outputColName).toStructField()
   }
 
-  @Since("1.4.1")
-  override def copy(extra: ParamMap): OneHotEncoder = defaultCopy(extra)
-}
+  /**
+   * This method is called when we want to generate `AttributeGroup` from actual data for
+   * one-hot encoder.
+   */
+  def getOutputAttrGroupFromData(
+      dataset: Dataset[_],
+      inputColNames: Seq[String],
+      outputColNames: Seq[String],
+      dropLast: Boolean): Seq[AttributeGroup] = {
+    // The RDD approach has advantage of early-stop if any values are invalid. It seems that
+    // DataFrame ops don't have equivalent functions.
+    val columns = inputColNames.map { inputColName =>
+      col(inputColName).cast(DoubleType)
+    }
+    val numOfColumns = columns.length
 
-@Since("1.6.0")
-object OneHotEncoder extends DefaultParamsReadable[OneHotEncoder] {
+    val numAttrsArray = dataset.select(columns: _*).rdd.map { row =>
+      (0 until numOfColumns).map(idx => row.getDouble(idx)).toArray
+    }.treeAggregate(new Array[Double](numOfColumns))(
+      (maxValues, curValues) => {
+        (0 until numOfColumns).foreach { idx =>
+          val x = curValues(idx)
+          assert(x <= Int.MaxValue,
+            s"OneHotEncoder only supports up to ${Int.MaxValue} indices, but got $x.")
+          assert(x >= 0.0 && x == x.toInt,
+            s"Values from column ${inputColNames(idx)} must be indices, but got $x.")
+          maxValues(idx) = math.max(maxValues(idx), x)
+        }
+        maxValues
+      },
+      (m0, m1) => {
+        (0 until numOfColumns).foreach { idx =>
+          m0(idx) = math.max(m0(idx), m1(idx))
+        }
+        m0
+      }
+    ).map(_.toInt + 1)
 
-  @Since("1.6.0")
-  override def load(path: String): OneHotEncoder = super.load(path)
+    outputColNames.zip(numAttrsArray).map { case (outputColName, numAttrs) =>
+      createAttrGroupForAttrNames(outputColName, numAttrs, dropLast, keepInvalid = false)
+    }
+  }
+
+  /** Creates an `AttributeGroup` with the required number of `BinaryAttribute`. */
+  def createAttrGroupForAttrNames(
+      outputColName: String,
+      numAttrs: Int,
+      dropLast: Boolean,
+      keepInvalid: Boolean): AttributeGroup = {
+    val outputAttrNames = Array.tabulate(numAttrs)(_.toString)
+    val filtered = if (dropLast && !keepInvalid) {
+      outputAttrNames.dropRight(1)
+    } else if (!dropLast && keepInvalid) {
+      outputAttrNames ++ Seq("invalidValues")
+    } else {
+      outputAttrNames
+    }
+    genOutputAttrGroup(Some(filtered), outputColName)
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoderEstimator.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoderEstimator.scala
deleted file mode 100644
index 4a44f3186538..000000000000
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoderEstimator.scala
+++ /dev/null
@@ -1,528 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.feature
-
-import org.apache.hadoop.fs.Path
-
-import org.apache.spark.SparkException
-import org.apache.spark.annotation.Since
-import org.apache.spark.ml.{Estimator, Model}
-import org.apache.spark.ml.attribute._
-import org.apache.spark.ml.linalg.Vectors
-import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCols, HasOutputCols}
-import org.apache.spark.ml.util._
-import org.apache.spark.sql.{DataFrame, Dataset}
-import org.apache.spark.sql.expressions.UserDefinedFunction
-import org.apache.spark.sql.functions.{col, lit, udf}
-import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
-
-/** Private trait for params and common methods for OneHotEncoderEstimator and OneHotEncoderModel */
-private[ml] trait OneHotEncoderBase extends Params with HasHandleInvalid
-    with HasInputCols with HasOutputCols {
-
-  /**
-   * Param for how to handle invalid data during transform().
-   * Options are 'keep' (invalid data presented as an extra categorical feature) or
-   * 'error' (throw an error).
-   * Note that this Param is only used during transform; during fitting, invalid data
-   * will result in an error.
-   * Default: "error"
-   * @group param
-   */
-  @Since("2.3.0")
-  override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
-    "How to handle invalid data during transform(). " +
-    "Options are 'keep' (invalid data presented as an extra categorical feature) " +
-    "or error (throw an error). Note that this Param is only used during transform; " +
-    "during fitting, invalid data will result in an error.",
-    ParamValidators.inArray(OneHotEncoderEstimator.supportedHandleInvalids))
-
-  setDefault(handleInvalid, OneHotEncoderEstimator.ERROR_INVALID)
-
-  /**
-   * Whether to drop the last category in the encoded vector (default: true)
-   * @group param
-   */
-  @Since("2.3.0")
-  final val dropLast: BooleanParam =
-    new BooleanParam(this, "dropLast", "whether to drop the last category")
-  setDefault(dropLast -> true)
-
-  /** @group getParam */
-  @Since("2.3.0")
-  def getDropLast: Boolean = $(dropLast)
-
-  protected def validateAndTransformSchema(
-      schema: StructType,
-      dropLast: Boolean,
-      keepInvalid: Boolean): StructType = {
-    val inputColNames = $(inputCols)
-    val outputColNames = $(outputCols)
-
-    require(inputColNames.length == outputColNames.length,
-      s"The number of input columns ${inputColNames.length} must be the same as the number of " +
-        s"output columns ${outputColNames.length}.")
-
-    // Input columns must be NumericType.
-    inputColNames.foreach(SchemaUtils.checkNumericType(schema, _))
-
-    // Prepares output columns with proper attributes by examining input columns.
-    val inputFields = $(inputCols).map(schema(_))
-
-    val outputFields = inputFields.zip(outputColNames).map { case (inputField, outputColName) =>
-      OneHotEncoderCommon.transformOutputColumnSchema(
-        inputField, outputColName, dropLast, keepInvalid)
-    }
-    outputFields.foldLeft(schema) { case (newSchema, outputField) =>
-      SchemaUtils.appendColumn(newSchema, outputField)
-    }
-  }
-}
-
-/**
- * A one-hot encoder that maps a column of category indices to a column of binary vectors, with
- * at most a single one-value per row that indicates the input category index.
- * For example with 5 categories, an input value of 2.0 would map to an output vector of
- * `[0.0, 0.0, 1.0, 0.0]`.
- * The last category is not included by default (configurable via `dropLast`),
- * because it makes the vector entries sum up to one, and hence linearly dependent.
- * So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
- *
- * @note This is different from scikit-learn's OneHotEncoder, which keeps all categories.
- * The output vectors are sparse.
- *
- * When `handleInvalid` is configured to 'keep', an extra "category" indicating invalid values is
- * added as last category. So when `dropLast` is true, invalid values are encoded as all-zeros
- * vector.
- *
- * @note When encoding multi-column by using `inputCols` and `outputCols` params, input/output cols
- * come in pairs, specified by the order in the arrays, and each pair is treated independently.
- *
- * @see `StringIndexer` for converting categorical values into category indices
- */
-@Since("2.3.0")
-class OneHotEncoderEstimator @Since("2.3.0") (@Since("2.3.0") override val uid: String)
-    extends Estimator[OneHotEncoderModel] with OneHotEncoderBase with DefaultParamsWritable {
-
-  @Since("2.3.0")
-  def this() = this(Identifiable.randomUID("oneHotEncoder"))
-
-  /** @group setParam */
-  @Since("2.3.0")
-  def setInputCols(values: Array[String]): this.type = set(inputCols, values)
-
-  /** @group setParam */
-  @Since("2.3.0")
-  def setOutputCols(values: Array[String]): this.type = set(outputCols, values)
-
-  /** @group setParam */
-  @Since("2.3.0")
-  def setDropLast(value: Boolean): this.type = set(dropLast, value)
-
-  /** @group setParam */
-  @Since("2.3.0")
-  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
-
-  @Since("2.3.0")
-  override def transformSchema(schema: StructType): StructType = {
-    val keepInvalid = $(handleInvalid) == OneHotEncoderEstimator.KEEP_INVALID
-    validateAndTransformSchema(schema, dropLast = $(dropLast),
-      keepInvalid = keepInvalid)
-  }
-
-  @Since("2.3.0")
-  override def fit(dataset: Dataset[_]): OneHotEncoderModel = {
-    transformSchema(dataset.schema)
-
-    // Compute the plain number of categories without `handleInvalid` and
-    // `dropLast` taken into account.
-    val transformedSchema = validateAndTransformSchema(dataset.schema, dropLast = false,
-      keepInvalid = false)
-    val categorySizes = new Array[Int]($(outputCols).length)
-
-    val columnToScanIndices = $(outputCols).zipWithIndex.flatMap { case (outputColName, idx) =>
-      val numOfAttrs = AttributeGroup.fromStructField(
-        transformedSchema(outputColName)).size
-      if (numOfAttrs < 0) {
-        Some(idx)
-      } else {
-        categorySizes(idx) = numOfAttrs
-        None
-      }
-    }
-
-    // Some input columns don't have attributes or their attributes don't have necessary info.
-    // We need to scan the data to get the number of values for each column.
-    if (columnToScanIndices.length > 0) {
-      val inputColNames = columnToScanIndices.map($(inputCols)(_))
-      val outputColNames = columnToScanIndices.map($(outputCols)(_))
-
-      // When fitting data, we want the plain number of categories without `handleInvalid` and
-      // `dropLast` taken into account.
-      val attrGroups = OneHotEncoderCommon.getOutputAttrGroupFromData(
-        dataset, inputColNames, outputColNames, dropLast = false)
-      attrGroups.zip(columnToScanIndices).foreach { case (attrGroup, idx) =>
-        categorySizes(idx) = attrGroup.size
-      }
-    }
-
-    val model = new OneHotEncoderModel(uid, categorySizes).setParent(this)
-    copyValues(model)
-  }
-
-  @Since("2.3.0")
-  override def copy(extra: ParamMap): OneHotEncoderEstimator = defaultCopy(extra)
-}
-
-@Since("2.3.0")
-object OneHotEncoderEstimator extends DefaultParamsReadable[OneHotEncoderEstimator] {
-
-  private[feature] val KEEP_INVALID: String = "keep"
-  private[feature] val ERROR_INVALID: String = "error"
-  private[feature] val supportedHandleInvalids: Array[String] = Array(KEEP_INVALID, ERROR_INVALID)
-
-  @Since("2.3.0")
-  override def load(path: String): OneHotEncoderEstimator = super.load(path)
-}
-
-/**
- * @param categorySizes  Original number of categories for each feature being encoded.
- *                       The array contains one value for each input column, in order.
- */
-@Since("2.3.0")
-class OneHotEncoderModel private[ml] (
-    @Since("2.3.0") override val uid: String,
-    @Since("2.3.0") val categorySizes: Array[Int])
-  extends Model[OneHotEncoderModel] with OneHotEncoderBase with MLWritable {
-
-  import OneHotEncoderModel._
-
-  // Returns the category size for each index with `dropLast` and `handleInvalid`
-  // taken into account.
-  private def getConfigedCategorySizes: Array[Int] = {
-    val dropLast = getDropLast
-    val keepInvalid = getHandleInvalid == OneHotEncoderEstimator.KEEP_INVALID
-
-    if (!dropLast && keepInvalid) {
-      // When `handleInvalid` is "keep", an extra category is added as last category
-      // for invalid data.
-      categorySizes.map(_ + 1)
-    } else if (dropLast && !keepInvalid) {
-      // When `dropLast` is true, the last category is removed.
-      categorySizes.map(_ - 1)
-    } else {
-      // When `dropLast` is true and `handleInvalid` is "keep", the extra category for invalid
-      // data is removed. Thus, it is the same as the plain number of categories.
-      categorySizes
-    }
-  }
-
-  private def encoder: UserDefinedFunction = {
-    val keepInvalid = getHandleInvalid == OneHotEncoderEstimator.KEEP_INVALID
-    val configedSizes = getConfigedCategorySizes
-    val localCategorySizes = categorySizes
-
-    // The udf performed on input data. The first parameter is the input value. The second
-    // parameter is the index in inputCols of the column being encoded.
-    udf { (label: Double, colIdx: Int) =>
-      val origCategorySize = localCategorySizes(colIdx)
-      // idx: index in vector of the single 1-valued element
-      val idx = if (label >= 0 && label < origCategorySize) {
-        label
-      } else {
-        if (keepInvalid) {
-          origCategorySize
-        } else {
-          if (label < 0) {
-            throw new SparkException(s"Negative value: $label. Input can't be negative. " +
-              s"To handle invalid values, set Param handleInvalid to " +
-              s"${OneHotEncoderEstimator.KEEP_INVALID}")
-          } else {
-            throw new SparkException(s"Unseen value: $label. To handle unseen values, " +
-              s"set Param handleInvalid to ${OneHotEncoderEstimator.KEEP_INVALID}.")
-          }
-        }
-      }
-
-      val size = configedSizes(colIdx)
-      if (idx < size) {
-        Vectors.sparse(size, Array(idx.toInt), Array(1.0))
-      } else {
-        Vectors.sparse(size, Array.empty[Int], Array.empty[Double])
-      }
-    }
-  }
-
-  /** @group setParam */
-  @Since("2.3.0")
-  def setInputCols(values: Array[String]): this.type = set(inputCols, values)
-
-  /** @group setParam */
-  @Since("2.3.0")
-  def setOutputCols(values: Array[String]): this.type = set(outputCols, values)
-
-  /** @group setParam */
-  @Since("2.3.0")
-  def setDropLast(value: Boolean): this.type = set(dropLast, value)
-
-  /** @group setParam */
-  @Since("2.3.0")
-  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
-
-  @Since("2.3.0")
-  override def transformSchema(schema: StructType): StructType = {
-    val inputColNames = $(inputCols)
-
-    require(inputColNames.length == categorySizes.length,
-      s"The number of input columns ${inputColNames.length} must be the same as the number of " +
-        s"features ${categorySizes.length} during fitting.")
-
-    val keepInvalid = $(handleInvalid) == OneHotEncoderEstimator.KEEP_INVALID
-    val transformedSchema = validateAndTransformSchema(schema, dropLast = $(dropLast),
-      keepInvalid = keepInvalid)
-    verifyNumOfValues(transformedSchema)
-  }
-
-  /**
-   * If the metadata of input columns also specifies the number of categories, we need to
-   * compare with expected category number with `handleInvalid` and `dropLast` taken into
-   * account. Mismatched numbers will cause exception.
-   */
-  private def verifyNumOfValues(schema: StructType): StructType = {
-    val configedSizes = getConfigedCategorySizes
-    $(outputCols).zipWithIndex.foreach { case (outputColName, idx) =>
-      val inputColName = $(inputCols)(idx)
-      val attrGroup = AttributeGroup.fromStructField(schema(outputColName))
-
-      // If the input metadata specifies number of category for output column,
-      // comparing with expected category number with `handleInvalid` and
-      // `dropLast` taken into account.
-      if (attrGroup.attributes.nonEmpty) {
-        val numCategories = configedSizes(idx)
-        require(attrGroup.size == numCategories, "OneHotEncoderModel expected " +
-          s"$numCategories categorical values for input column $inputColName, " +
-            s"but the input column had metadata specifying ${attrGroup.size} values.")
-      }
-    }
-    schema
-  }
-
-  @Since("2.3.0")
-  override def transform(dataset: Dataset[_]): DataFrame = {
-    val transformedSchema = transformSchema(dataset.schema, logging = true)
-    val keepInvalid = $(handleInvalid) == OneHotEncoderEstimator.KEEP_INVALID
-
-    val encodedColumns = $(inputCols).indices.map { idx =>
-      val inputColName = $(inputCols)(idx)
-      val outputColName = $(outputCols)(idx)
-
-      val outputAttrGroupFromSchema =
-        AttributeGroup.fromStructField(transformedSchema(outputColName))
-
-      val metadata = if (outputAttrGroupFromSchema.size < 0) {
-        OneHotEncoderCommon.createAttrGroupForAttrNames(outputColName,
-          categorySizes(idx), $(dropLast), keepInvalid).toMetadata()
-      } else {
-        outputAttrGroupFromSchema.toMetadata()
-      }
-
-      encoder(col(inputColName).cast(DoubleType), lit(idx))
-        .as(outputColName, metadata)
-    }
-    dataset.withColumns($(outputCols), encodedColumns)
-  }
-
-  @Since("2.3.0")
-  override def copy(extra: ParamMap): OneHotEncoderModel = {
-    val copied = new OneHotEncoderModel(uid, categorySizes)
-    copyValues(copied, extra).setParent(parent)
-  }
-
-  @Since("2.3.0")
-  override def write: MLWriter = new OneHotEncoderModelWriter(this)
-}
-
-@Since("2.3.0")
-object OneHotEncoderModel extends MLReadable[OneHotEncoderModel] {
-
-  private[OneHotEncoderModel]
-  class OneHotEncoderModelWriter(instance: OneHotEncoderModel) extends MLWriter {
-
-    private case class Data(categorySizes: Array[Int])
-
-    override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
-      val data = Data(instance.categorySizes)
-      val dataPath = new Path(path, "data").toString
-      sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
-    }
-  }
-
-  private class OneHotEncoderModelReader extends MLReader[OneHotEncoderModel] {
-
-    private val className = classOf[OneHotEncoderModel].getName
-
-    override def load(path: String): OneHotEncoderModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
-      val dataPath = new Path(path, "data").toString
-      val data = sparkSession.read.parquet(dataPath)
-        .select("categorySizes")
-        .head()
-      val categorySizes = data.getAs[Seq[Int]](0).toArray
-      val model = new OneHotEncoderModel(metadata.uid, categorySizes)
-      metadata.getAndSetParams(model)
-      model
-    }
-  }
-
-  @Since("2.3.0")
-  override def read: MLReader[OneHotEncoderModel] = new OneHotEncoderModelReader
-
-  @Since("2.3.0")
-  override def load(path: String): OneHotEncoderModel = super.load(path)
-}
-
-/**
- * Provides some helper methods used by both `OneHotEncoder` and `OneHotEncoderEstimator`.
- */
-private[feature] object OneHotEncoderCommon {
-
-  private def genOutputAttrNames(inputCol: StructField): Option[Array[String]] = {
-    val inputAttr = Attribute.fromStructField(inputCol)
-    inputAttr match {
-      case nominal: NominalAttribute =>
-        if (nominal.values.isDefined) {
-          nominal.values
-        } else if (nominal.numValues.isDefined) {
-          nominal.numValues.map(n => Array.tabulate(n)(_.toString))
-        } else {
-          None
-        }
-      case binary: BinaryAttribute =>
-        if (binary.values.isDefined) {
-          binary.values
-        } else {
-          Some(Array.tabulate(2)(_.toString))
-        }
-      case _: NumericAttribute =>
-        throw new RuntimeException(
-          s"The input column ${inputCol.name} cannot be continuous-value.")
-      case _ =>
-        None // optimistic about unknown attributes
-    }
-  }
-
-  /** Creates an `AttributeGroup` filled by the `BinaryAttribute` named as required. */
-  private def genOutputAttrGroup(
-      outputAttrNames: Option[Array[String]],
-      outputColName: String): AttributeGroup = {
-    outputAttrNames.map { attrNames =>
-      val attrs: Array[Attribute] = attrNames.map { name =>
-        BinaryAttribute.defaultAttr.withName(name)
-      }
-      new AttributeGroup(outputColName, attrs)
-    }.getOrElse{
-      new AttributeGroup(outputColName)
-    }
-  }
-
-  /**
-   * Prepares the `StructField` with proper metadata for `OneHotEncoder`'s output column.
-   */
-  def transformOutputColumnSchema(
-      inputCol: StructField,
-      outputColName: String,
-      dropLast: Boolean,
-      keepInvalid: Boolean = false): StructField = {
-    val outputAttrNames = genOutputAttrNames(inputCol)
-    val filteredOutputAttrNames = outputAttrNames.map { names =>
-      if (dropLast && !keepInvalid) {
-        require(names.length > 1,
-          s"The input column ${inputCol.name} should have at least two distinct values.")
-        names.dropRight(1)
-      } else if (!dropLast && keepInvalid) {
-        names ++ Seq("invalidValues")
-      } else {
-        names
-      }
-    }
-
-    genOutputAttrGroup(filteredOutputAttrNames, outputColName).toStructField()
-  }
-
-  /**
-   * This method is called when we want to generate `AttributeGroup` from actual data for
-   * one-hot encoder.
-   */
-  def getOutputAttrGroupFromData(
-      dataset: Dataset[_],
-      inputColNames: Seq[String],
-      outputColNames: Seq[String],
-      dropLast: Boolean): Seq[AttributeGroup] = {
-    // The RDD approach has advantage of early-stop if any values are invalid. It seems that
-    // DataFrame ops don't have equivalent functions.
-    val columns = inputColNames.map { inputColName =>
-      col(inputColName).cast(DoubleType)
-    }
-    val numOfColumns = columns.length
-
-    val numAttrsArray = dataset.select(columns: _*).rdd.map { row =>
-      (0 until numOfColumns).map(idx => row.getDouble(idx)).toArray
-    }.treeAggregate(new Array[Double](numOfColumns))(
-      (maxValues, curValues) => {
-        (0 until numOfColumns).foreach { idx =>
-          val x = curValues(idx)
-          assert(x <= Int.MaxValue,
-            s"OneHotEncoder only supports up to ${Int.MaxValue} indices, but got $x.")
-          assert(x >= 0.0 && x == x.toInt,
-            s"Values from column ${inputColNames(idx)} must be indices, but got $x.")
-          maxValues(idx) = math.max(maxValues(idx), x)
-        }
-        maxValues
-      },
-      (m0, m1) => {
-        (0 until numOfColumns).foreach { idx =>
-          m0(idx) = math.max(m0(idx), m1(idx))
-        }
-        m0
-      }
-    ).map(_.toInt + 1)
-
-    outputColNames.zip(numAttrsArray).map { case (outputColName, numAttrs) =>
-      createAttrGroupForAttrNames(outputColName, numAttrs, dropLast, keepInvalid = false)
-    }
-  }
-
-  /** Creates an `AttributeGroup` with the required number of `BinaryAttribute`. */
-  def createAttrGroupForAttrNames(
-      outputColName: String,
-      numAttrs: Int,
-      dropLast: Boolean,
-      keepInvalid: Boolean): AttributeGroup = {
-    val outputAttrNames = Array.tabulate(numAttrs)(_.toString)
-    val filtered = if (dropLast && !keepInvalid) {
-      outputAttrNames.dropRight(1)
-    } else if (!dropLast && keepInvalid) {
-      outputAttrNames ++ Seq("invalidValues")
-    } else {
-      outputAttrNames
-    }
-    genOutputAttrGroup(Some(filtered), outputColName)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index 346e1823f00b..d7eb13772aa6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -246,7 +246,7 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
         // Formula w/o intercept, one of the categories in the first category feature is
         // being used as reference category, we will not drop any category for that feature.
         if (!hasIntercept && !keepReferenceCategory) {
-          encoderStages += new OneHotEncoderEstimator(uid)
+          encoderStages += new OneHotEncoder(uid)
             .setInputCols(Array(indexed(term)))
             .setOutputCols(Array(encodedCol))
             .setDropLast(false)
@@ -269,7 +269,7 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
 
     if (oneHotEncodeColumns.nonEmpty) {
       val (inputCols, outputCols) = oneHotEncodeColumns.toArray.unzip
-      encoderStages += new OneHotEncoderEstimator(uid)
+      encoderStages += new OneHotEncoder(uid)
         .setInputCols(inputCols)
         .setOutputCols(outputCols)
         .setDropLast(true)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderEstimatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderEstimatorSuite.scala
deleted file mode 100644
index d549e1326227..000000000000
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderEstimatorSuite.scala
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.feature
-
-import org.apache.spark.ml.attribute.{AttributeGroup, BinaryAttribute, NominalAttribute}
-import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
-import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
-import org.apache.spark.sql.{Encoder, Row}
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.types._
-
-class OneHotEncoderEstimatorSuite extends MLTest with DefaultReadWriteTest {
-
-  import testImplicits._
-
-  test("params") {
-    ParamsSuite.checkParams(new OneHotEncoderEstimator)
-  }
-
-  test("OneHotEncoderEstimator dropLast = false") {
-    val data = Seq(
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
-      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))),
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))))
-
-    val schema = StructType(Array(
-        StructField("input", DoubleType),
-        StructField("expected", new VectorUDT)))
-
-    val df = spark.createDataFrame(sc.parallelize(data), schema)
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("input"))
-      .setOutputCols(Array("output"))
-    assert(encoder.getDropLast === true)
-    encoder.setDropLast(false)
-    assert(encoder.getDropLast === false)
-    val model = encoder.fit(df)
-    testTransformer[(Double, Vector)](df, model, "output", "expected") {
-      case Row(output: Vector, expected: Vector) =>
-        assert(output === expected)
-    }
-  }
-
-  test("OneHotEncoderEstimator dropLast = true") {
-    val data = Seq(
-      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
-      Row(1.0, Vectors.sparse(2, Seq((1, 1.0)))),
-      Row(2.0, Vectors.sparse(2, Seq())),
-      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
-      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
-      Row(2.0, Vectors.sparse(2, Seq())))
-
-    val schema = StructType(Array(
-        StructField("input", DoubleType),
-        StructField("expected", new VectorUDT)))
-
-    val df = spark.createDataFrame(sc.parallelize(data), schema)
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("input"))
-      .setOutputCols(Array("output"))
-
-    val model = encoder.fit(df)
-    testTransformer[(Double, Vector)](df, model, "output", "expected") {
-      case Row(output: Vector, expected: Vector) =>
-        assert(output === expected)
-    }
-  }
-
-  test("input column with ML attribute") {
-    val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large")
-    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
-      .select(col("size").as("size", attr.toMetadata()))
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("size"))
-      .setOutputCols(Array("encoded"))
-    val model = encoder.fit(df)
-    testTransformerByGlobalCheckFunc[(Double)](df, model, "encoded") { rows =>
-        val group = AttributeGroup.fromStructField(rows.head.schema("encoded"))
-        assert(group.size === 2)
-        assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("small").withIndex(0))
-        assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("medium").withIndex(1))
-    }
-  }
-
-  test("input column without ML attribute") {
-    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("index")
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("index"))
-      .setOutputCols(Array("encoded"))
-    val model = encoder.fit(df)
-    testTransformerByGlobalCheckFunc[(Double)](df, model, "encoded") { rows =>
-      val group = AttributeGroup.fromStructField(rows.head.schema("encoded"))
-      assert(group.size === 2)
-      assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("0").withIndex(0))
-      assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("1").withIndex(1))
-    }
-  }
-
-  test("read/write") {
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("index"))
-      .setOutputCols(Array("encoded"))
-    testDefaultReadWrite(encoder)
-  }
-
-  test("OneHotEncoderModel read/write") {
-    val instance = new OneHotEncoderModel("myOneHotEncoderModel", Array(1, 2, 3))
-    val newInstance = testDefaultReadWrite(instance)
-    assert(newInstance.categorySizes === instance.categorySizes)
-  }
-
-  test("OneHotEncoderEstimator with varying types") {
-    val data = Seq(
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
-      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))),
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))))
-
-    val schema = StructType(Array(
-        StructField("input", DoubleType),
-        StructField("expected", new VectorUDT)))
-
-    val df = spark.createDataFrame(sc.parallelize(data), schema)
-
-    class NumericTypeWithEncoder[A](val numericType: NumericType)
-      (implicit val encoder: Encoder[(A, Vector)])
-
-    val types = Seq(
-      new NumericTypeWithEncoder[Short](ShortType),
-      new NumericTypeWithEncoder[Long](LongType),
-      new NumericTypeWithEncoder[Int](IntegerType),
-      new NumericTypeWithEncoder[Float](FloatType),
-      new NumericTypeWithEncoder[Byte](ByteType),
-      new NumericTypeWithEncoder[Double](DoubleType),
-      new NumericTypeWithEncoder[Decimal](DecimalType(10, 0))(ExpressionEncoder()))
-
-    for (t <- types) {
-      val dfWithTypes = df.select(col("input").cast(t.numericType), col("expected"))
-      val estimator = new OneHotEncoderEstimator()
-        .setInputCols(Array("input"))
-        .setOutputCols(Array("output"))
-        .setDropLast(false)
-
-      val model = estimator.fit(dfWithTypes)
-      testTransformer(dfWithTypes, model, "output", "expected") {
-        case Row(output: Vector, expected: Vector) =>
-          assert(output === expected)
-      }(t.encoder)
-    }
-  }
-
-  test("OneHotEncoderEstimator: encoding multiple columns and dropLast = false") {
-    val data = Seq(
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))),
-      Row(1.0, Vectors.sparse(3, Seq((1, 1.0))), 3.0, Vectors.sparse(4, Seq((3, 1.0)))),
-      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), 0.0, Vectors.sparse(4, Seq((0, 1.0)))),
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 1.0, Vectors.sparse(4, Seq((1, 1.0)))),
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 0.0, Vectors.sparse(4, Seq((0, 1.0)))),
-      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))))
-
-    val schema = StructType(Array(
-        StructField("input1", DoubleType),
-        StructField("expected1", new VectorUDT),
-        StructField("input2", DoubleType),
-        StructField("expected2", new VectorUDT)))
-
-    val df = spark.createDataFrame(sc.parallelize(data), schema)
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("input1", "input2"))
-      .setOutputCols(Array("output1", "output2"))
-    assert(encoder.getDropLast === true)
-    encoder.setDropLast(false)
-    assert(encoder.getDropLast === false)
-
-    val model = encoder.fit(df)
-    testTransformer[(Double, Vector, Double, Vector)](
-      df,
-      model,
-      "output1",
-      "output2",
-      "expected1",
-      "expected2") {
-      case Row(output1: Vector, output2: Vector, expected1: Vector, expected2: Vector) =>
-        assert(output1 === expected1)
-        assert(output2 === expected2)
-    }
-  }
-
-  test("OneHotEncoderEstimator: encoding multiple columns and dropLast = true") {
-    val data = Seq(
-      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 2.0, Vectors.sparse(3, Seq((2, 1.0)))),
-      Row(1.0, Vectors.sparse(2, Seq((1, 1.0))), 3.0, Vectors.sparse(3, Seq())),
-      Row(2.0, Vectors.sparse(2, Seq()), 0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 1.0, Vectors.sparse(3, Seq((1, 1.0)))),
-      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(2.0, Vectors.sparse(2, Seq()), 2.0, Vectors.sparse(3, Seq((2, 1.0)))))
-
-    val schema = StructType(Array(
-        StructField("input1", DoubleType),
-        StructField("expected1", new VectorUDT),
-        StructField("input2", DoubleType),
-        StructField("expected2", new VectorUDT)))
-
-    val df = spark.createDataFrame(sc.parallelize(data), schema)
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("input1", "input2"))
-      .setOutputCols(Array("output1", "output2"))
-
-    val model = encoder.fit(df)
-    testTransformer[(Double, Vector, Double, Vector)](
-      df,
-      model,
-      "output1",
-      "output2",
-      "expected1",
-      "expected2") {
-      case Row(output1: Vector, output2: Vector, expected1: Vector, expected2: Vector) =>
-        assert(output1 === expected1)
-        assert(output2 === expected2)
-    }
-  }
-
-  test("Throw error on invalid values") {
-    val trainingData = Seq((0, 0), (1, 1), (2, 2))
-    val trainingDF = trainingData.toDF("id", "a")
-    val testData = Seq((0, 0), (1, 2), (1, 3))
-    val testDF = testData.toDF("id", "a")
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("a"))
-      .setOutputCols(Array("encoded"))
-
-    val model = encoder.fit(trainingDF)
-    testTransformerByInterceptingException[(Int, Int)](
-      testDF,
-      model,
-      expectedMessagePart = "Unseen value: 3.0. To handle unseen values",
-      firstResultCol = "encoded")
-
-  }
-
-  test("Can't transform on negative input") {
-    val trainingDF = Seq((0, 0), (1, 1), (2, 2)).toDF("a", "b")
-    val testDF = Seq((0, 0), (-1, 2), (1, 3)).toDF("a", "b")
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("a"))
-      .setOutputCols(Array("encoded"))
-
-    val model = encoder.fit(trainingDF)
-    testTransformerByInterceptingException[(Int, Int)](
-      testDF,
-      model,
-      expectedMessagePart = "Negative value: -1.0. Input can't be negative",
-      firstResultCol = "encoded")
-  }
-
-  test("Keep on invalid values: dropLast = false") {
-    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
-
-    val testData = Seq(
-      Row(0.0, Vectors.sparse(4, Seq((0, 1.0)))),
-      Row(1.0, Vectors.sparse(4, Seq((1, 1.0)))),
-      Row(3.0, Vectors.sparse(4, Seq((3, 1.0)))))
-
-    val schema = StructType(Array(
-        StructField("input", DoubleType),
-        StructField("expected", new VectorUDT)))
-
-    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("input"))
-      .setOutputCols(Array("output"))
-      .setHandleInvalid("keep")
-      .setDropLast(false)
-
-    val model = encoder.fit(trainingDF)
-    testTransformer[(Double, Vector)](testDF, model, "output", "expected") {
-      case Row(output: Vector, expected: Vector) =>
-        assert(output === expected)
-    }
-  }
-
-  test("Keep on invalid values: dropLast = true") {
-    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
-
-    val testData = Seq(
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
-      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
-      Row(3.0, Vectors.sparse(3, Seq())))
-
-    val schema = StructType(Array(
-        StructField("input", DoubleType),
-        StructField("expected", new VectorUDT)))
-
-    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("input"))
-      .setOutputCols(Array("output"))
-      .setHandleInvalid("keep")
-      .setDropLast(true)
-
-    val model = encoder.fit(trainingDF)
-    testTransformer[(Double, Vector)](testDF, model, "output", "expected") {
-      case Row(output: Vector, expected: Vector) =>
-        assert(output === expected)
-    }
-  }
-
-  test("OneHotEncoderModel changes dropLast") {
-    val data = Seq(
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
-      Row(1.0, Vectors.sparse(3, Seq((1, 1.0))), Vectors.sparse(2, Seq((1, 1.0)))),
-      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), Vectors.sparse(2, Seq())),
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
-      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
-      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), Vectors.sparse(2, Seq())))
-
-    val schema = StructType(Array(
-        StructField("input", DoubleType),
-        StructField("expected1", new VectorUDT),
-        StructField("expected2", new VectorUDT)))
-
-    val df = spark.createDataFrame(sc.parallelize(data), schema)
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("input"))
-      .setOutputCols(Array("output"))
-
-    val model = encoder.fit(df)
-
-    model.setDropLast(false)
-    testTransformer[(Double, Vector, Vector)](df, model, "output", "expected1") {
-      case Row(output: Vector, expected1: Vector) =>
-        assert(output === expected1)
-    }
-
-    model.setDropLast(true)
-    testTransformer[(Double, Vector, Vector)](df, model, "output", "expected2") {
-      case Row(output: Vector, expected2: Vector) =>
-        assert(output === expected2)
-    }
-  }
-
-  test("OneHotEncoderModel changes handleInvalid") {
-    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
-
-    val testData = Seq(
-      Row(0.0, Vectors.sparse(4, Seq((0, 1.0)))),
-      Row(1.0, Vectors.sparse(4, Seq((1, 1.0)))),
-      Row(3.0, Vectors.sparse(4, Seq((3, 1.0)))))
-
-    val schema = StructType(Array(
-        StructField("input", DoubleType),
-        StructField("expected", new VectorUDT)))
-
-    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
-
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("input"))
-      .setOutputCols(Array("output"))
-
-    val model = encoder.fit(trainingDF)
-    model.setHandleInvalid("error")
-
-    testTransformerByInterceptingException[(Double, Vector)](
-      testDF,
-      model,
-      expectedMessagePart = "Unseen value: 3.0. To handle unseen values",
-      firstResultCol = "output")
-
-    model.setHandleInvalid("keep")
-    testTransformerByGlobalCheckFunc[(Double, Vector)](testDF, model, "output") { _ => }
-  }
-
-  test("Transforming on mismatched attributes") {
-    val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large")
-    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
-      .select(col("size").as("size", attr.toMetadata()))
-    val encoder = new OneHotEncoderEstimator()
-      .setInputCols(Array("size"))
-      .setOutputCols(Array("encoded"))
-    val model = encoder.fit(df)
-
-    val testAttr = NominalAttribute.defaultAttr.withValues("tiny", "small", "medium", "large")
-    val testDF = Seq(0.0, 1.0, 2.0, 3.0).map(Tuple1.apply).toDF("size")
-      .select(col("size").as("size", testAttr.toMetadata()))
-    testTransformerByInterceptingException[(Double)](
-      testDF,
-      model,
-      expectedMessagePart = "OneHotEncoderModel expected 2 categorical values",
-      firstResultCol = "encoded")
-  }
-}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
index 41b32b2ffa09..d92313f4ce03 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
@@ -18,72 +18,71 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.ml.attribute.{AttributeGroup, BinaryAttribute, NominalAttribute}
-import org.apache.spark.ml.linalg.Vector
-import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
-import org.apache.spark.sql.{DataFrame, Encoder, Row}
+import org.apache.spark.sql.{Encoder, Row}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.types._
 
-class OneHotEncoderSuite
-  extends MLTest with DefaultReadWriteTest {
+class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
-  def stringIndexed(): DataFrame = {
-    val data = Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
-    val df = data.toDF("id", "label")
-    val indexer = new StringIndexer()
-      .setInputCol("label")
-      .setOutputCol("labelIndex")
-      .fit(df)
-    indexer.transform(df)
-  }
-
   test("params") {
     ParamsSuite.checkParams(new OneHotEncoder)
   }
 
   test("OneHotEncoder dropLast = false") {
-    val transformed = stringIndexed()
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
     val encoder = new OneHotEncoder()
-      .setInputCol("labelIndex")
-      .setOutputCol("labelVec")
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
     assert(encoder.getDropLast === true)
     encoder.setDropLast(false)
     assert(encoder.getDropLast === false)
-    val expected = Seq(
-      (0, Vectors.sparse(3, Seq((0, 1.0)))),
-      (1, Vectors.sparse(3, Seq((2, 1.0)))),
-      (2, Vectors.sparse(3, Seq((1, 1.0)))),
-      (3, Vectors.sparse(3, Seq((0, 1.0)))),
-      (4, Vectors.sparse(3, Seq((0, 1.0)))),
-      (5, Vectors.sparse(3, Seq((1, 1.0))))).toDF("id", "expected")
-
-    val withExpected = transformed.join(expected, "id")
-    testTransformer[(Int, String, Double, Vector)](withExpected, encoder, "labelVec", "expected") {
+    val model = encoder.fit(df)
+    testTransformer[(Double, Vector)](df, model, "output", "expected") {
       case Row(output: Vector, expected: Vector) =>
         assert(output === expected)
     }
   }
 
   test("OneHotEncoder dropLast = true") {
-    val transformed = stringIndexed()
+    val data = Seq(
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(2, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(2, Seq())),
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(2, Seq())))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
     val encoder = new OneHotEncoder()
-      .setInputCol("labelIndex")
-      .setOutputCol("labelVec")
-    val expected = Seq(
-      (0, Vectors.sparse(2, Seq((0, 1.0)))),
-      (1, Vectors.sparse(2, Seq())),
-      (2, Vectors.sparse(2, Seq((1, 1.0)))),
-      (3, Vectors.sparse(2, Seq((0, 1.0)))),
-      (4, Vectors.sparse(2, Seq((0, 1.0)))),
-      (5, Vectors.sparse(2, Seq((1, 1.0))))).toDF("id", "expected")
-
-    val withExpected = transformed.join(expected, "id")
-    testTransformer[(Int, String, Double, Vector)](withExpected, encoder, "labelVec", "expected") {
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+
+    val model = encoder.fit(df)
+    testTransformer[(Double, Vector)](df, model, "output", "expected") {
       case Row(output: Vector, expected: Vector) =>
         assert(output === expected)
     }
@@ -94,52 +93,61 @@ class OneHotEncoderSuite
     val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
       .select(col("size").as("size", attr.toMetadata()))
     val encoder = new OneHotEncoder()
-      .setInputCol("size")
-      .setOutputCol("encoded")
-    testTransformerByGlobalCheckFunc[(Double)](df, encoder, "encoded") { rows =>
-      val group = AttributeGroup.fromStructField(rows.head.schema("encoded"))
-      assert(group.size === 2)
-      assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("small").withIndex(0))
-      assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("medium").withIndex(1))
+      .setInputCols(Array("size"))
+      .setOutputCols(Array("encoded"))
+    val model = encoder.fit(df)
+    testTransformerByGlobalCheckFunc[(Double)](df, model, "encoded") { rows =>
+        val group = AttributeGroup.fromStructField(rows.head.schema("encoded"))
+        assert(group.size === 2)
+        assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("small").withIndex(0))
+        assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("medium").withIndex(1))
     }
   }
 
-
   test("input column without ML attribute") {
     val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("index")
     val encoder = new OneHotEncoder()
-      .setInputCol("index")
-      .setOutputCol("encoded")
-    val rows = encoder.transform(df).select("encoded").collect()
-    val group = AttributeGroup.fromStructField(rows.head.schema("encoded"))
-    assert(group.size === 2)
-    assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("0").withIndex(0))
-    assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("1").withIndex(1))
+      .setInputCols(Array("index"))
+      .setOutputCols(Array("encoded"))
+    val model = encoder.fit(df)
+    testTransformerByGlobalCheckFunc[(Double)](df, model, "encoded") { rows =>
+      val group = AttributeGroup.fromStructField(rows.head.schema("encoded"))
+      assert(group.size === 2)
+      assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("0").withIndex(0))
+      assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("1").withIndex(1))
+    }
   }
 
   test("read/write") {
-    val t = new OneHotEncoder()
-      .setInputCol("myInputCol")
-      .setOutputCol("myOutputCol")
-      .setDropLast(false)
-    testDefaultReadWrite(t)
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("index"))
+      .setOutputCols(Array("encoded"))
+    testDefaultReadWrite(encoder)
+  }
+
+  test("OneHotEncoderModel read/write") {
+    val instance = new OneHotEncoderModel("myOneHotEncoderModel", Array(1, 2, 3))
+    val newInstance = testDefaultReadWrite(instance)
+    assert(newInstance.categorySizes === instance.categorySizes)
   }
 
   test("OneHotEncoder with varying types") {
-    val df = stringIndexed()
-    val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large")
-    val expected = Seq(
-      (0, Vectors.sparse(3, Seq((0, 1.0)))),
-      (1, Vectors.sparse(3, Seq((2, 1.0)))),
-      (2, Vectors.sparse(3, Seq((1, 1.0)))),
-      (3, Vectors.sparse(3, Seq((0, 1.0)))),
-      (4, Vectors.sparse(3, Seq((0, 1.0)))),
-      (5, Vectors.sparse(3, Seq((1, 1.0))))).toDF("id", "expected")
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))))
 
-    val withExpected = df.join(expected, "id")
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
 
     class NumericTypeWithEncoder[A](val numericType: NumericType)
-       (implicit val encoder: Encoder[(A, Vector)])
+      (implicit val encoder: Encoder[(A, Vector)])
 
     val types = Seq(
       new NumericTypeWithEncoder[Short](ShortType),
@@ -151,17 +159,264 @@ class OneHotEncoderSuite
       new NumericTypeWithEncoder[Decimal](DecimalType(10, 0))(ExpressionEncoder()))
 
     for (t <- types) {
-      val dfWithTypes = withExpected.select(col("labelIndex")
-        .cast(t.numericType).as("labelIndex", attr.toMetadata()), col("expected"))
-      val encoder = new OneHotEncoder()
-        .setInputCol("labelIndex")
-        .setOutputCol("labelVec")
+      val dfWithTypes = df.select(col("input").cast(t.numericType), col("expected"))
+      val estimator = new OneHotEncoder()
+        .setInputCols(Array("input"))
+        .setOutputCols(Array("output"))
         .setDropLast(false)
 
-      testTransformer(dfWithTypes, encoder, "labelVec", "expected") {
+      val model = estimator.fit(dfWithTypes)
+      testTransformer(dfWithTypes, model, "output", "expected") {
         case Row(output: Vector, expected: Vector) =>
           assert(output === expected)
       }(t.encoder)
     }
   }
+
+  test("OneHotEncoder: encoding multiple columns and dropLast = false") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0))), 3.0, Vectors.sparse(4, Seq((3, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), 0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 1.0, Vectors.sparse(4, Seq((1, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input1", DoubleType),
+        StructField("expected1", new VectorUDT),
+        StructField("input2", DoubleType),
+        StructField("expected2", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("output1", "output2"))
+    assert(encoder.getDropLast === true)
+    encoder.setDropLast(false)
+    assert(encoder.getDropLast === false)
+
+    val model = encoder.fit(df)
+    testTransformer[(Double, Vector, Double, Vector)](
+      df,
+      model,
+      "output1",
+      "output2",
+      "expected1",
+      "expected2") {
+      case Row(output1: Vector, output2: Vector, expected1: Vector, expected2: Vector) =>
+        assert(output1 === expected1)
+        assert(output2 === expected2)
+    }
+  }
+
+  test("OneHotEncoder: encoding multiple columns and dropLast = true") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 2.0, Vectors.sparse(3, Seq((2, 1.0)))),
+      Row(1.0, Vectors.sparse(2, Seq((1, 1.0))), 3.0, Vectors.sparse(3, Seq())),
+      Row(2.0, Vectors.sparse(2, Seq()), 0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(2, Seq()), 2.0, Vectors.sparse(3, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input1", DoubleType),
+        StructField("expected1", new VectorUDT),
+        StructField("input2", DoubleType),
+        StructField("expected2", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("output1", "output2"))
+
+    val model = encoder.fit(df)
+    testTransformer[(Double, Vector, Double, Vector)](
+      df,
+      model,
+      "output1",
+      "output2",
+      "expected1",
+      "expected2") {
+      case Row(output1: Vector, output2: Vector, expected1: Vector, expected2: Vector) =>
+        assert(output1 === expected1)
+        assert(output2 === expected2)
+    }
+  }
+
+  test("Throw error on invalid values") {
+    val trainingData = Seq((0, 0), (1, 1), (2, 2))
+    val trainingDF = trainingData.toDF("id", "a")
+    val testData = Seq((0, 0), (1, 2), (1, 3))
+    val testDF = testData.toDF("id", "a")
+
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("a"))
+      .setOutputCols(Array("encoded"))
+
+    val model = encoder.fit(trainingDF)
+    testTransformerByInterceptingException[(Int, Int)](
+      testDF,
+      model,
+      expectedMessagePart = "Unseen value: 3.0. To handle unseen values",
+      firstResultCol = "encoded")
+
+  }
+
+  test("Can't transform on negative input") {
+    val trainingDF = Seq((0, 0), (1, 1), (2, 2)).toDF("a", "b")
+    val testDF = Seq((0, 0), (-1, 2), (1, 3)).toDF("a", "b")
+
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("a"))
+      .setOutputCols(Array("encoded"))
+
+    val model = encoder.fit(trainingDF)
+    testTransformerByInterceptingException[(Int, Int)](
+      testDF,
+      model,
+      expectedMessagePart = "Negative value: -1.0. Input can't be negative",
+      firstResultCol = "encoded")
+  }
+
+  test("Keep on invalid values: dropLast = false") {
+    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
+
+    val testData = Seq(
+      Row(0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(4, Seq((1, 1.0)))),
+      Row(3.0, Vectors.sparse(4, Seq((3, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
+
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+      .setHandleInvalid("keep")
+      .setDropLast(false)
+
+    val model = encoder.fit(trainingDF)
+    testTransformer[(Double, Vector)](testDF, model, "output", "expected") {
+      case Row(output: Vector, expected: Vector) =>
+        assert(output === expected)
+    }
+  }
+
+  test("Keep on invalid values: dropLast = true") {
+    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
+
+    val testData = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(3.0, Vectors.sparse(3, Seq())))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
+
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+      .setHandleInvalid("keep")
+      .setDropLast(true)
+
+    val model = encoder.fit(trainingDF)
+    testTransformer[(Double, Vector)](testDF, model, "output", "expected") {
+      case Row(output: Vector, expected: Vector) =>
+        assert(output === expected)
+    }
+  }
+
+  test("OneHotEncoderModel changes dropLast") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0))), Vectors.sparse(2, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), Vectors.sparse(2, Seq())),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), Vectors.sparse(2, Seq())))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected1", new VectorUDT),
+        StructField("expected2", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+
+    val model = encoder.fit(df)
+
+    model.setDropLast(false)
+    testTransformer[(Double, Vector, Vector)](df, model, "output", "expected1") {
+      case Row(output: Vector, expected1: Vector) =>
+        assert(output === expected1)
+    }
+
+    model.setDropLast(true)
+    testTransformer[(Double, Vector, Vector)](df, model, "output", "expected2") {
+      case Row(output: Vector, expected2: Vector) =>
+        assert(output === expected2)
+    }
+  }
+
+  test("OneHotEncoderModel changes handleInvalid") {
+    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
+
+    val testData = Seq(
+      Row(0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(4, Seq((1, 1.0)))),
+      Row(3.0, Vectors.sparse(4, Seq((3, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
+
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+
+    val model = encoder.fit(trainingDF)
+    model.setHandleInvalid("error")
+
+    testTransformerByInterceptingException[(Double, Vector)](
+      testDF,
+      model,
+      expectedMessagePart = "Unseen value: 3.0. To handle unseen values",
+      firstResultCol = "output")
+
+    model.setHandleInvalid("keep")
+    testTransformerByGlobalCheckFunc[(Double, Vector)](testDF, model, "output") { _ => }
+  }
+
+  test("Transforming on mismatched attributes") {
+    val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large")
+    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
+      .select(col("size").as("size", attr.toMetadata()))
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("size"))
+      .setOutputCols(Array("encoded"))
+    val model = encoder.fit(df)
+
+    val testAttr = NominalAttribute.defaultAttr.withValues("tiny", "small", "medium", "large")
+    val testDF = Seq(0.0, 1.0, 2.0, 3.0).map(Tuple1.apply).toDF("size")
+      .select(col("size").as("size", testAttr.toMetadata()))
+    testTransformerByInterceptingException[(Double)](
+      testDF,
+      model,
+      expectedMessagePart = "OneHotEncoderModel expected 2 categorical values",
+      firstResultCol = "encoded")
+  }
 }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 3fabec0f6012..5e97d826370f 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -228,6 +228,18 @@ object MimaExcludes {
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.v2.writer.DataWriterFactory.createWriter"),
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.writer.streaming.StreamWriter"),
 
+    // [SPARK-26133][ML] Remove deprecated OneHotEncoder and rename OneHotEncoderEstimator to OneHotEncoder
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.feature.OneHotEncoderEstimator"),
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.feature.OneHotEncoder"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.OneHotEncoder.transform"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.OneHotEncoder.getInputCol"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.OneHotEncoder.getOutputCol"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.OneHotEncoder.inputCol"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.OneHotEncoder.setInputCol"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.OneHotEncoder.setOutputCol"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.OneHotEncoder.outputCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.feature.OneHotEncoderEstimator$"),
+
     // [SPARK-26141] Enable custom metrics implementation in shuffle write
     // Following are Java private classes
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.sort.UnsafeShuffleWriter.this"),
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 3d2370024259..6cc80e181e5e 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -44,8 +44,7 @@
            'MinMaxScaler', 'MinMaxScalerModel',
            'NGram',
            'Normalizer',
-           'OneHotEncoder',
-           'OneHotEncoderEstimator', 'OneHotEncoderModel',
+           'OneHotEncoder', 'OneHotEncoderModel',
            'PCA', 'PCAModel',
            'PolynomialExpansion',
            'QuantileDiscretizer',
@@ -1642,91 +1641,8 @@ def getP(self):
 
 
 @inherit_doc
-class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
-    """
-    A one-hot encoder that maps a column of category indices to a
-    column of binary vectors, with at most a single one-value per row
-    that indicates the input category index.
-    For example with 5 categories, an input value of 2.0 would map to
-    an output vector of `[0.0, 0.0, 1.0, 0.0]`.
-    The last category is not included by default (configurable via
-    :py:attr:`dropLast`) because it makes the vector entries sum up to
-    one, and hence linearly dependent.
-    So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
-
-    .. note:: This is different from scikit-learn's OneHotEncoder,
-        which keeps all categories. The output vectors are sparse.
-
-    .. note:: Deprecated in 2.3.0. :py:class:`OneHotEncoderEstimator` will be renamed to
-        :py:class:`OneHotEncoder` and this :py:class:`OneHotEncoder` will be removed in 3.0.0.
-
-    .. seealso::
-
-       :py:class:`StringIndexer` for converting categorical values into
-       category indices
-
-    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
-    >>> model = stringIndexer.fit(stringIndDf)
-    >>> td = model.transform(stringIndDf)
-    >>> encoder = OneHotEncoder(inputCol="indexed", outputCol="features")
-    >>> encoder.transform(td).head().features
-    SparseVector(2, {0: 1.0})
-    >>> encoder.setParams(outputCol="freqs").transform(td).head().freqs
-    SparseVector(2, {0: 1.0})
-    >>> params = {encoder.dropLast: False, encoder.outputCol: "test"}
-    >>> encoder.transform(td, params).head().test
-    SparseVector(3, {0: 1.0})
-    >>> onehotEncoderPath = temp_path + "/onehot-encoder"
-    >>> encoder.save(onehotEncoderPath)
-    >>> loadedEncoder = OneHotEncoder.load(onehotEncoderPath)
-    >>> loadedEncoder.getDropLast() == encoder.getDropLast()
-    True
-
-    .. versionadded:: 1.4.0
-    """
-
-    dropLast = Param(Params._dummy(), "dropLast", "whether to drop the last category",
-                     typeConverter=TypeConverters.toBoolean)
-
-    @keyword_only
-    def __init__(self, dropLast=True, inputCol=None, outputCol=None):
-        """
-        __init__(self, dropLast=True, inputCol=None, outputCol=None)
-        """
-        super(OneHotEncoder, self).__init__()
-        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.OneHotEncoder", self.uid)
-        self._setDefault(dropLast=True)
-        kwargs = self._input_kwargs
-        self.setParams(**kwargs)
-
-    @keyword_only
-    @since("1.4.0")
-    def setParams(self, dropLast=True, inputCol=None, outputCol=None):
-        """
-        setParams(self, dropLast=True, inputCol=None, outputCol=None)
-        Sets params for this OneHotEncoder.
-        """
-        kwargs = self._input_kwargs
-        return self._set(**kwargs)
-
-    @since("1.4.0")
-    def setDropLast(self, value):
-        """
-        Sets the value of :py:attr:`dropLast`.
-        """
-        return self._set(dropLast=value)
-
-    @since("1.4.0")
-    def getDropLast(self):
-        """
-        Gets the value of dropLast or its default value.
-        """
-        return self.getOrDefault(self.dropLast)
-
-
-@inherit_doc
-class OneHotEncoderEstimator(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid,
-                             JavaMLReadable, JavaMLWritable):
+class OneHotEncoder(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid,
+                    JavaMLReadable, JavaMLWritable):
     """
     A one-hot encoder that maps a column of category indices to a column of binary vectors, with
     at most a single one-value per row that indicates the input category index.
@@ -1751,13 +1667,13 @@ class OneHotEncoderEstimator(JavaEstimator, HasInputCols, HasOutputCols, HasHand
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(0.0,), (1.0,), (2.0,)], ["input"])
-    >>> ohe = OneHotEncoderEstimator(inputCols=["input"], outputCols=["output"])
+    >>> ohe = OneHotEncoder(inputCols=["input"], outputCols=["output"])
     >>> model = ohe.fit(df)
     >>> model.transform(df).head().output
     SparseVector(2, {0: 1.0})
     >>> ohePath = temp_path + "/oheEstimator"
     >>> ohe.save(ohePath)
-    >>> loadedOHE = OneHotEncoderEstimator.load(ohePath)
+    >>> loadedOHE = OneHotEncoder.load(ohePath)
     >>> loadedOHE.getInputCols() == ohe.getInputCols()
     True
     >>> modelPath = temp_path + "/ohe-model"
@@ -1784,9 +1700,9 @@ def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropL
         """
         __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True)
         """
-        super(OneHotEncoderEstimator, self).__init__()
+        super(OneHotEncoder, self).__init__()
         self._java_obj = self._new_java_obj(
-            "org.apache.spark.ml.feature.OneHotEncoderEstimator", self.uid)
+            "org.apache.spark.ml.feature.OneHotEncoder", self.uid)
         self._setDefault(handleInvalid="error", dropLast=True)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
@@ -1796,7 +1712,7 @@ def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropL
     def setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True):
         """
         setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True)
-        Sets params for this OneHotEncoderEstimator.
+        Sets params for this OneHotEncoder.
         """
         kwargs = self._input_kwargs
         return self._set(**kwargs)
@@ -1821,7 +1737,7 @@ def _create_model(self, java_model):
 
 class OneHotEncoderModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    Model fitted by :py:class:`OneHotEncoderEstimator`.
+    Model fitted by :py:class:`OneHotEncoder`.
 
     .. versionadded:: 2.3.0
     """

From 7a83d71403edf7d24fa5efc0ef913f3ce76d88b8 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Thu, 29 Nov 2018 22:15:12 +0800
Subject: [PATCH 0013/1072] [SPARK-26163][SQL] Parsing decimals from JSON using
 locale

## What changes were proposed in this pull request?

In the PR, I propose using of the locale option to parse (and infer) decimals from JSON input. After the changes, `JacksonParser` converts input string to `BigDecimal` and to Spark's Decimal by using `java.text.DecimalFormat`. New behaviour can be switched off via SQL config `spark.sql.legacy.decimalParsing.enabled`.

## How was this patch tested?

Added 2 tests to `JsonExpressionsSuite` for the `en-US`, `ko-KR`, `ru-RU`, `de-DE` locales:
- Inferring decimal type using locale from JSON field values
- Converting JSON field values to specified decimal type using the locales.

Closes #23132 from MaxGekk/json-decimal-parsing-locale.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/expressions/ExprUtils.scala  | 21 +++++
 .../expressions/jsonExpressions.scala         |  7 +-
 .../sql/catalyst/json/JacksonParser.scala     |  6 ++
 .../sql/catalyst/json/JsonInferSchema.scala   | 89 +++++++++++--------
 .../expressions/JsonExpressionsSuite.scala    | 42 ++++++++-
 .../datasources/json/JsonDataSource.scala     |  4 +-
 .../datasources/json/JsonSuite.scala          | 15 ++--
 7 files changed, 132 insertions(+), 52 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
index 89e9071324ef..3f3d6b2b63a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.text.{DecimalFormat, DecimalFormatSymbols, ParsePosition}
+import java.util.Locale
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
 import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType}
@@ -83,4 +86,22 @@ object ExprUtils {
       }
     }
   }
+
+  def getDecimalParser(locale: Locale): String => java.math.BigDecimal = {
+    if (locale == Locale.US) { // Special handling the default locale for backward compatibility
+      (s: String) => new java.math.BigDecimal(s.replaceAll(",", ""))
+    } else {
+      val decimalFormat = new DecimalFormat("", new DecimalFormatSymbols(locale))
+      decimalFormat.setParseBigDecimal(true)
+      (s: String) => {
+        val pos = new ParsePosition(0)
+        val result = decimalFormat.parse(s, pos).asInstanceOf[java.math.BigDecimal]
+        if (pos.getIndex() != s.length() || pos.getErrorIndex() != -1) {
+          throw new IllegalArgumentException("Cannot parse any decimal");
+        } else {
+          result
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 47304d835fdf..e0cab537ce1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -23,12 +23,10 @@ import scala.util.parsing.combinator.RegexParsers
 
 import com.fasterxml.jackson.core._
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.json._
-import org.apache.spark.sql.catalyst.json.JsonInferSchema.inferField
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -775,6 +773,9 @@ case class SchemaOfJson(
     factory
   }
 
+  @transient
+  private lazy val jsonInferSchema = new JsonInferSchema(jsonOptions)
+
   @transient
   private lazy val json = child.eval().asInstanceOf[UTF8String]
 
@@ -787,7 +788,7 @@ case class SchemaOfJson(
   override def eval(v: InternalRow): Any = {
     val dt = Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
       parser.nextToken()
-      inferField(parser, jsonOptions)
+      jsonInferSchema.inferField(parser)
     }
 
     UTF8String.fromString(dt.catalogString)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 92517aac053b..2357595906b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -29,6 +29,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
@@ -135,6 +136,8 @@ class JacksonParser(
     }
   }
 
+  private val decimalParser = ExprUtils.getDecimalParser(options.locale)
+
   /**
    * Create a converter which converts the JSON documents held by the `JsonParser`
    * to a value according to a desired schema.
@@ -261,6 +264,9 @@ class JacksonParser(
       (parser: JsonParser) => parseJsonToken[Decimal](parser, dataType) {
         case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT) =>
           Decimal(parser.getDecimalValue, dt.precision, dt.scale)
+        case VALUE_STRING if parser.getTextLength >= 1 =>
+          val bigDecimal = decimalParser(parser.getText)
+          Decimal(bigDecimal, dt.precision, dt.scale)
       }
 
     case st: StructType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 9999a005106f..263e05de3207 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -19,18 +19,23 @@ package org.apache.spark.sql.catalyst.json
 
 import java.util.Comparator
 
+import scala.util.control.Exception.allCatch
+
 import com.fasterxml.jackson.core._
 
 import org.apache.spark.SparkException
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
+import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.JacksonUtils.nextUntil
 import org.apache.spark.sql.catalyst.util.{DropMalformedMode, FailFastMode, ParseMode, PermissiveMode}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
-private[sql] object JsonInferSchema {
+private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
+
+  private val decimalParser = ExprUtils.getDecimalParser(options.locale)
 
   /**
    * Infer the type of a collection of json records in three stages:
@@ -40,21 +45,20 @@ private[sql] object JsonInferSchema {
    */
   def infer[T](
       json: RDD[T],
-      configOptions: JSONOptions,
       createParser: (JsonFactory, T) => JsonParser): StructType = {
-    val parseMode = configOptions.parseMode
-    val columnNameOfCorruptRecord = configOptions.columnNameOfCorruptRecord
+    val parseMode = options.parseMode
+    val columnNameOfCorruptRecord = options.columnNameOfCorruptRecord
 
     // In each RDD partition, perform schema inference on each row and merge afterwards.
-    val typeMerger = compatibleRootType(columnNameOfCorruptRecord, parseMode)
+    val typeMerger = JsonInferSchema.compatibleRootType(columnNameOfCorruptRecord, parseMode)
     val mergedTypesFromPartitions = json.mapPartitions { iter =>
       val factory = new JsonFactory()
-      configOptions.setJacksonOptions(factory)
+      options.setJacksonOptions(factory)
       iter.flatMap { row =>
         try {
           Utils.tryWithResource(createParser(factory, row)) { parser =>
             parser.nextToken()
-            Some(inferField(parser, configOptions))
+            Some(inferField(parser))
           }
         } catch {
           case  e @ (_: RuntimeException | _: JsonProcessingException) => parseMode match {
@@ -82,7 +86,7 @@ private[sql] object JsonInferSchema {
     }
     json.sparkContext.runJob(mergedTypesFromPartitions, foldPartition, mergeResult)
 
-    canonicalizeType(rootType, configOptions) match {
+    canonicalizeType(rootType, options) match {
       case Some(st: StructType) => st
       case _ =>
         // canonicalizeType erases all empty structs, including the only one we want to keep
@@ -90,34 +94,17 @@ private[sql] object JsonInferSchema {
     }
   }
 
-  private[this] val structFieldComparator = new Comparator[StructField] {
-    override def compare(o1: StructField, o2: StructField): Int = {
-      o1.name.compareTo(o2.name)
-    }
-  }
-
-  private def isSorted(arr: Array[StructField]): Boolean = {
-    var i: Int = 0
-    while (i < arr.length - 1) {
-      if (structFieldComparator.compare(arr(i), arr(i + 1)) > 0) {
-        return false
-      }
-      i += 1
-    }
-    true
-  }
-
   /**
    * Infer the type of a json document from the parser's token stream
    */
-  def inferField(parser: JsonParser, configOptions: JSONOptions): DataType = {
+  def inferField(parser: JsonParser): DataType = {
     import com.fasterxml.jackson.core.JsonToken._
     parser.getCurrentToken match {
       case null | VALUE_NULL => NullType
 
       case FIELD_NAME =>
         parser.nextToken()
-        inferField(parser, configOptions)
+        inferField(parser)
 
       case VALUE_STRING if parser.getTextLength < 1 =>
         // Zero length strings and nulls have special handling to deal
@@ -128,18 +115,25 @@ private[sql] object JsonInferSchema {
         // record fields' types have been combined.
         NullType
 
+      case VALUE_STRING if options.prefersDecimal =>
+        val decimalTry = allCatch opt {
+          val bigDecimal = decimalParser(parser.getText)
+            DecimalType(bigDecimal.precision, bigDecimal.scale)
+        }
+        decimalTry.getOrElse(StringType)
       case VALUE_STRING => StringType
+
       case START_OBJECT =>
         val builder = Array.newBuilder[StructField]
         while (nextUntil(parser, END_OBJECT)) {
           builder += StructField(
             parser.getCurrentName,
-            inferField(parser, configOptions),
+            inferField(parser),
             nullable = true)
         }
         val fields: Array[StructField] = builder.result()
         // Note: other code relies on this sorting for correctness, so don't remove it!
-        java.util.Arrays.sort(fields, structFieldComparator)
+        java.util.Arrays.sort(fields, JsonInferSchema.structFieldComparator)
         StructType(fields)
 
       case START_ARRAY =>
@@ -148,15 +142,15 @@ private[sql] object JsonInferSchema {
         // the type as we pass through all JSON objects.
         var elementType: DataType = NullType
         while (nextUntil(parser, END_ARRAY)) {
-          elementType = compatibleType(
-            elementType, inferField(parser, configOptions))
+          elementType = JsonInferSchema.compatibleType(
+            elementType, inferField(parser))
         }
 
         ArrayType(elementType)
 
-      case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT) if configOptions.primitivesAsString => StringType
+      case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT) if options.primitivesAsString => StringType
 
-      case (VALUE_TRUE | VALUE_FALSE) if configOptions.primitivesAsString => StringType
+      case (VALUE_TRUE | VALUE_FALSE) if options.primitivesAsString => StringType
 
       case VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT =>
         import JsonParser.NumberType._
@@ -172,7 +166,7 @@ private[sql] object JsonInferSchema {
             } else {
               DoubleType
             }
-          case FLOAT | DOUBLE if configOptions.prefersDecimal =>
+          case FLOAT | DOUBLE if options.prefersDecimal =>
             val v = parser.getDecimalValue
             if (Math.max(v.precision(), v.scale()) <= DecimalType.MAX_PRECISION) {
               DecimalType(Math.max(v.precision(), v.scale()), v.scale())
@@ -217,12 +211,31 @@ private[sql] object JsonInferSchema {
 
     case other => Some(other)
   }
+}
+
+object JsonInferSchema {
+  val structFieldComparator = new Comparator[StructField] {
+    override def compare(o1: StructField, o2: StructField): Int = {
+      o1.name.compareTo(o2.name)
+    }
+  }
+
+  def isSorted(arr: Array[StructField]): Boolean = {
+    var i: Int = 0
+    while (i < arr.length - 1) {
+      if (structFieldComparator.compare(arr(i), arr(i + 1)) > 0) {
+        return false
+      }
+      i += 1
+    }
+    true
+  }
 
-  private def withCorruptField(
+  def withCorruptField(
       struct: StructType,
       other: DataType,
       columnNameOfCorruptRecords: String,
-      parseMode: ParseMode) = parseMode match {
+      parseMode: ParseMode): StructType = parseMode match {
     case PermissiveMode =>
       // If we see any other data type at the root level, we get records that cannot be
       // parsed. So, we use the struct as the data type and add the corrupt field to the schema.
@@ -230,7 +243,7 @@ private[sql] object JsonInferSchema {
         // If this given struct does not have a column used for corrupt records,
         // add this field.
         val newFields: Array[StructField] =
-          StructField(columnNameOfCorruptRecords, StringType, nullable = true) +: struct.fields
+        StructField(columnNameOfCorruptRecords, StringType, nullable = true) +: struct.fields
         // Note: other code relies on this sorting for correctness, so don't remove it!
         java.util.Arrays.sort(newFields, structFieldComparator)
         StructType(newFields)
@@ -253,7 +266,7 @@ private[sql] object JsonInferSchema {
   /**
    * Remove top-level ArrayType wrappers and merge the remaining schemas
    */
-  private def compatibleRootType(
+  def compatibleRootType(
       columnNameOfCorruptRecords: String,
       parseMode: ParseMode): (DataType, DataType) => DataType = {
     // Since we support array of json objects at the top level,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 9b89a27c2377..5d60cefc1389 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.text.SimpleDateFormat
+import java.text.{DecimalFormat, DecimalFormatSymbols, SimpleDateFormat}
 import java.util.{Calendar, Locale}
 
 import org.scalatest.exceptions.TestFailedException
@@ -765,4 +765,44 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
         timeZoneId = gmtId),
       expectedErrMsg = "The field for corrupt records must be string type and nullable")
   }
+
+  def decimalInput(langTag: String): (Decimal, String) = {
+    val decimalVal = new java.math.BigDecimal("1000.001")
+    val decimalType = new DecimalType(10, 5)
+    val expected = Decimal(decimalVal, decimalType.precision, decimalType.scale)
+    val decimalFormat = new DecimalFormat("",
+      new DecimalFormatSymbols(Locale.forLanguageTag(langTag)))
+    val input = s"""{"d": "${decimalFormat.format(expected.toBigDecimal)}"}"""
+
+    (expected, input)
+  }
+
+  test("parse decimals using locale") {
+    def checkDecimalParsing(langTag: String): Unit = {
+      val schema = new StructType().add("d", DecimalType(10, 5))
+      val options = Map("locale" -> langTag)
+      val (expected, input) = decimalInput(langTag)
+
+      checkEvaluation(
+        JsonToStructs(schema, options, Literal.create(input), gmtId),
+        InternalRow(expected))
+    }
+
+    Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalParsing)
+  }
+
+  test("inferring the decimal type using locale") {
+    def checkDecimalInfer(langTag: String, expectedType: String): Unit = {
+      val options = Map("locale" -> langTag, "prefersDecimal" -> "true")
+      val (_, input) = decimalInput(langTag)
+
+      checkEvaluation(
+        SchemaOfJson(Literal.create(input), options),
+        expectedType)
+    }
+
+    Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach {
+        checkDecimalInfer(_, """struct<d:decimal(7,3)>""")
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
index c7608e2e881f..456f08a2a2ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
@@ -107,7 +107,7 @@ object TextInputJsonDataSource extends JsonDataSource {
     }.getOrElse(CreateJacksonParser.internalRow(_: JsonFactory, _: InternalRow))
 
     SQLExecution.withSQLConfPropagated(json.sparkSession) {
-      JsonInferSchema.infer(rdd, parsedOptions, rowParser)
+      new JsonInferSchema(parsedOptions).infer(rdd, rowParser)
     }
   }
 
@@ -166,7 +166,7 @@ object MultiLineJsonDataSource extends JsonDataSource {
       .getOrElse(createParser(_: JsonFactory, _: PortableDataStream))
 
     SQLExecution.withSQLConfPropagated(sparkSession) {
-      JsonInferSchema.infer[PortableDataStream](sampled, parsedOptions, parser)
+      new JsonInferSchema(parsedOptions).infer[PortableDataStream](sampled, parser)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 9ea9189cdf7f..ee31077e12ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -31,8 +31,7 @@ import org.apache.hadoop.io.compress.GzipCodec
 import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{functions => F, _}
-import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JsonInferSchema, JSONOptions}
-import org.apache.spark.sql.catalyst.json.JsonInferSchema.compatibleType
+import org.apache.spark.sql.catalyst.json._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.ExternalRDD
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -118,10 +117,10 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
 
   test("Get compatible type") {
     def checkDataType(t1: DataType, t2: DataType, expected: DataType) {
-      var actual = compatibleType(t1, t2)
+      var actual = JsonInferSchema.compatibleType(t1, t2)
       assert(actual == expected,
         s"Expected $expected as the most general data type for $t1 and $t2, found $actual")
-      actual = compatibleType(t2, t1)
+      actual = JsonInferSchema.compatibleType(t2, t1)
       assert(actual == expected,
         s"Expected $expected as the most general data type for $t1 and $t2, found $actual")
     }
@@ -1373,9 +1372,9 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
 
   test("SPARK-6245 JsonInferSchema.infer on empty RDD") {
     // This is really a test that it doesn't throw an exception
-    val emptySchema = JsonInferSchema.infer(
+    val options = new JSONOptions(Map.empty[String, String], "GMT")
+    val emptySchema = new JsonInferSchema(options).infer(
       empty.rdd,
-      new JSONOptions(Map.empty[String, String], "GMT"),
       CreateJacksonParser.string)
     assert(StructType(Seq()) === emptySchema)
   }
@@ -1400,9 +1399,9 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
   }
 
   test("SPARK-8093 Erase empty structs") {
-    val emptySchema = JsonInferSchema.infer(
+    val options = new JSONOptions(Map.empty[String, String], "GMT")
+    val emptySchema = new JsonInferSchema(options).infer(
       emptyRecords.rdd,
-      new JSONOptions(Map.empty[String, String], "GMT"),
       CreateJacksonParser.string)
     assert(StructType(Seq()) === emptySchema)
   }

From b9b68a6dc7d0f735163e980392ea957f2d589923 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Thu, 29 Nov 2018 22:37:02 +0800
Subject: [PATCH 0014/1072] [SPARK-26211][SQL] Fix InSet for binary, and struct
 and array with null.

## What changes were proposed in this pull request?

Currently `InSet` doesn't work properly for binary type, or struct and array type with null value in the set.
Because, as for binary type, the `HashSet` doesn't work properly for `Array[Byte]`, and as for struct and array type with null value in the set, the `ordering` will throw a `NPE`.

## How was this patch tested?

Added a few tests.

Closes #23176 from ueshin/issues/SPARK-26211/inset.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/expressions/predicates.scala | 33 ++++++------
 .../catalyst/expressions/PredicateSuite.scala | 50 ++++++++++++++++++-
 2 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 16e0bc3aaf35..01ecb99025ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -367,31 +367,26 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
   }
 
   @transient lazy val set: Set[Any] = child.dataType match {
-    case _: AtomicType => hset
+    case t: AtomicType if !t.isInstanceOf[BinaryType] => hset
     case _: NullType => hset
     case _ =>
       // for structs use interpreted ordering to be able to compare UnsafeRows with non-UnsafeRows
-      TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ hset
+      TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ (hset - null)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val setTerm = ctx.addReferenceObj("set", set)
-    val childGen = child.genCode(ctx)
-    val setIsNull = if (hasNull) {
-      s"${ev.isNull} = !${ev.value};"
-    } else {
-      ""
-    }
-    ev.copy(code =
-      code"""
-         |${childGen.code}
-         |${CodeGenerator.JAVA_BOOLEAN} ${ev.isNull} = ${childGen.isNull};
-         |${CodeGenerator.JAVA_BOOLEAN} ${ev.value} = false;
-         |if (!${ev.isNull}) {
-         |  ${ev.value} = $setTerm.contains(${childGen.value});
-         |  $setIsNull
-         |}
-       """.stripMargin)
+    nullSafeCodeGen(ctx, ev, c => {
+      val setTerm = ctx.addReferenceObj("set", set)
+      val setIsNull = if (hasNull) {
+        s"${ev.isNull} = !${ev.value};"
+      } else {
+        ""
+      }
+      s"""
+         |${ev.value} = $setTerm.contains($c);
+         |$setIsNull
+       """.stripMargin
+    })
   }
 
   override def sql: String = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index ac76b17ef476..3b60d1d88b3c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -268,7 +268,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(InSet(nl, nS), null)
 
     val primitiveTypes = Seq(IntegerType, FloatType, DoubleType, StringType, ByteType, ShortType,
-      LongType, BinaryType, BooleanType, DecimalType.USER_DEFAULT, TimestampType)
+      LongType, BooleanType, DecimalType.USER_DEFAULT, TimestampType)
     primitiveTypes.foreach { t =>
       val dataGen = RandomDataGenerator.forType(t, nullable = true).get
       val inputData = Seq.fill(10) {
@@ -293,6 +293,54 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("INSET: binary") {
+    val hS = HashSet[Any]() + Array(1.toByte, 2.toByte) + Array(3.toByte)
+    val nS = HashSet[Any]() + Array(1.toByte, 2.toByte) + Array(3.toByte) + null
+    val onetwo = Literal(Array(1.toByte, 2.toByte))
+    val three = Literal(Array(3.toByte))
+    val threefour = Literal(Array(3.toByte, 4.toByte))
+    val nl = Literal(null, onetwo.dataType)
+    checkEvaluation(InSet(onetwo, hS), true)
+    checkEvaluation(InSet(three, hS), true)
+    checkEvaluation(InSet(three, nS), true)
+    checkEvaluation(InSet(threefour, hS), false)
+    checkEvaluation(InSet(threefour, nS), null)
+    checkEvaluation(InSet(nl, hS), null)
+    checkEvaluation(InSet(nl, nS), null)
+  }
+
+  test("INSET: struct") {
+    val hS = HashSet[Any]() + Literal.create((1, "a")).value + Literal.create((2, "b")).value
+    val nS = HashSet[Any]() + Literal.create((1, "a")).value + Literal.create((2, "b")).value + null
+    val oneA = Literal.create((1, "a"))
+    val twoB = Literal.create((2, "b"))
+    val twoC = Literal.create((2, "c"))
+    val nl = Literal(null, oneA.dataType)
+    checkEvaluation(InSet(oneA, hS), true)
+    checkEvaluation(InSet(twoB, hS), true)
+    checkEvaluation(InSet(twoB, nS), true)
+    checkEvaluation(InSet(twoC, hS), false)
+    checkEvaluation(InSet(twoC, nS), null)
+    checkEvaluation(InSet(nl, hS), null)
+    checkEvaluation(InSet(nl, nS), null)
+  }
+
+  test("INSET: array") {
+    val hS = HashSet[Any]() + Literal.create(Seq(1, 2)).value + Literal.create(Seq(3)).value
+    val nS = HashSet[Any]() + Literal.create(Seq(1, 2)).value + Literal.create(Seq(3)).value + null
+    val onetwo = Literal.create(Seq(1, 2))
+    val three = Literal.create(Seq(3))
+    val threefour = Literal.create(Seq(3, 4))
+    val nl = Literal(null, onetwo.dataType)
+    checkEvaluation(InSet(onetwo, hS), true)
+    checkEvaluation(InSet(three, hS), true)
+    checkEvaluation(InSet(three, nS), true)
+    checkEvaluation(InSet(threefour, hS), false)
+    checkEvaluation(InSet(threefour, nS), null)
+    checkEvaluation(InSet(nl, hS), null)
+    checkEvaluation(InSet(nl, nS), null)
+  }
+
   private case class MyStruct(a: Long, b: String)
   private case class MyStruct2(a: MyStruct, b: Array[Int])
   private val udt = new ExamplePointUDT

From 06a87711b8a3a71c32897003cd9c6203e1c0c42e Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Thu, 29 Nov 2018 08:48:12 -0600
Subject: [PATCH 0015/1072] [SPARK-26024][FOLLOWUP][MINOR] Follow-up to remove
 extra blank lines in R function descriptions

## What changes were proposed in this pull request?

Follow-up to remove extra blank lines in R function descriptions

## How was this patch tested?

N/A

Closes #23167 from srowen/SPARK-26024.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 R/pkg/R/DataFrame.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index ad9cd845f696..745bb3e15932 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -766,7 +766,6 @@ setMethod("repartition",
 #'  \item{2.} {Return a new SparkDataFrame range partitioned by the given column(s),
 #'                      using \code{spark.sql.shuffle.partitions} as number of partitions.}
 #'}
-#'
 #' At least one partition-by expression must be specified.
 #' When no explicit sort order is specified, "ascending nulls first" is assumed.
 #'
@@ -828,7 +827,6 @@ setMethod("repartitionByRange",
 #' toJSON
 #'
 #' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
-#'
 #' Each row is turned into a JSON document with columns as different fields.
 #' The returned SparkDataFrame has a single character column with the name \code{value}
 #'

From e3ea93ab6c8e434faa360af831947e682206d50d Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Thu, 29 Nov 2018 08:53:12 -0600
Subject: [PATCH 0016/1072] [MINOR][ML] add missing params to Instr

## What changes were proposed in this pull request?
add following param to instr:
GBTC: validationTol
GBTR: validationTol, validationIndicatorCol
colnames in LiR, LinearSVC, etc

## How was this patch tested?
existing tests

Closes #23122 from zhengruifeng/instr_append_missing_params.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ml/classification/DecisionTreeClassifier.scala     | 3 ++-
 .../org/apache/spark/ml/classification/GBTClassifier.scala   | 2 +-
 .../scala/org/apache/spark/ml/classification/LinearSVC.scala | 4 ++--
 .../apache/spark/ml/classification/LogisticRegression.scala  | 5 +++--
 .../ml/classification/MultilayerPerceptronClassifier.scala   | 4 ++--
 .../scala/org/apache/spark/ml/classification/OneVsRest.scala | 3 ++-
 .../scala/org/apache/spark/ml/regression/GBTRegressor.scala  | 3 ++-
 7 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index bcf89766b087..d9292a547676 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -115,7 +115,8 @@ class DecisionTreeClassifier @Since("1.4.0") (
     val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, numClasses)
     val strategy = getOldStrategy(categoricalFeatures, numClasses)
 
-    instr.logParams(this, maxDepth, maxBins, minInstancesPerNode, minInfoGain, maxMemoryInMB,
+    instr.logParams(this, labelCol, featuresCol, predictionCol, rawPredictionCol,
+      probabilityCol, maxDepth, maxBins, minInstancesPerNode, minInfoGain, maxMemoryInMB,
       cacheNodeIds, checkpointInterval, impurity, seed)
 
     val trees = RandomForest.run(oldDataset, strategy, numTrees = 1, featureSubsetStrategy = "all",
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 09a9df6d15ec..abe2d1febfdf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -194,7 +194,7 @@ class GBTClassifier @Since("1.4.0") (
     instr.logParams(this, labelCol, featuresCol, predictionCol, impurity, lossType,
       maxDepth, maxBins, maxIter, maxMemoryInMB, minInfoGain, minInstancesPerNode,
       seed, stepSize, subsamplingRate, cacheNodeIds, checkpointInterval, featureSubsetStrategy,
-      validationIndicatorCol)
+      validationIndicatorCol, validationTol)
     instr.logNumClasses(numClasses)
 
     val (baseLearners, learnerWeights) = if (withValidation) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 1b5c02fc9a57..ff801abef9a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -173,8 +173,8 @@ class LinearSVC @Since("2.2.0") (
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    instr.logParams(this, regParam, maxIter, fitIntercept, tol, standardization, threshold,
-      aggregationDepth)
+    instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
+      regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth)
 
     val (summarizer, labelSummarizer) = {
       val seqOp = (c: (MultivariateOnlineSummarizer, MultiClassSummarizer),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 6f0804f0c8e4..27a7db0b2f5d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -503,8 +503,9 @@ class LogisticRegression @Since("1.2.0") (
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    instr.logParams(this, regParam, elasticNetParam, standardization, threshold,
-      maxIter, tol, fitIntercept)
+    instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
+      probabilityCol, regParam, elasticNetParam, standardization, threshold, maxIter, tol,
+      fitIntercept)
 
     val (summarizer, labelSummarizer) = {
       val seqOp = (c: (MultivariateOnlineSummarizer, MultiClassSummarizer),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 4feddce1d9f2..47b8a8df637b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -205,8 +205,8 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
       dataset: Dataset[_]): MultilayerPerceptronClassificationModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    instr.logParams(this, labelCol, featuresCol, predictionCol, layers, maxIter, tol,
-      blockSize, solver, stepSize, seed)
+    instr.logParams(this, labelCol, featuresCol, predictionCol, rawPredictionCol, layers, maxIter,
+      tol, blockSize, solver, stepSize, seed)
 
     val myLayers = $(layers)
     val labels = myLayers.last
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 2f42a5922054..e1fceb1fc96a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -377,7 +377,8 @@ final class OneVsRest @Since("1.4.0") (
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    instr.logParams(this, labelCol, featuresCol, predictionCol, parallelism, rawPredictionCol)
+    instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol,
+      rawPredictionCol, parallelism)
     instr.logNamedValue("classifier", $(classifier).getClass.getCanonicalName)
 
     // determine number of classes either from metadata if provided, or via computation.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 9b386ef5eed8..9a5b7d59e9ae 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -171,7 +171,8 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, featuresCol, predictionCol, impurity, lossType,
       maxDepth, maxBins, maxIter, maxMemoryInMB, minInfoGain, minInstancesPerNode,
-      seed, stepSize, subsamplingRate, cacheNodeIds, checkpointInterval, featureSubsetStrategy)
+      seed, stepSize, subsamplingRate, cacheNodeIds, checkpointInterval, featureSubsetStrategy,
+      validationIndicatorCol, validationTol)
 
     val (baseLearners, learnerWeights) = if (withValidation) {
       GradientBoostedTrees.runWithValidation(trainDataset, validationDataset, boostingStrategy,

From 9a09e91a3e880b7a07b11a957fb6766578f5a1af Mon Sep 17 00:00:00 2001
From: cody koeninger <cody@koeninger.org>
Date: Thu, 29 Nov 2018 08:54:31 -0600
Subject: [PATCH 0017/1072] [SPARK-26177] Automated formatting for Scala code

## What changes were proposed in this pull request?

Add a maven plugin and wrapper script to use scalafmt to format files that differ from git master.

Intention is for contributors to be able to use this to automate fixing code style, not to include it in build pipeline yet.

If this PR is accepted, I'd make a different PR to update the code style section of https://spark.apache.org/contributing.html to mention the script

## How was this patch tested?

Manually tested by modifying a few files and running ./dev/scalafmt then checking that ./dev/scalastyle still passed.

Closes #23148 from koeninger/scalafmt.

Authored-by: cody koeninger <cody@koeninger.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/.scalafmt.conf | 24 ++++++++++++++++++++++++
 dev/scalafmt       | 23 +++++++++++++++++++++++
 pom.xml            | 21 +++++++++++++++++++++
 3 files changed, 68 insertions(+)
 create mode 100644 dev/.scalafmt.conf
 create mode 100755 dev/scalafmt

diff --git a/dev/.scalafmt.conf b/dev/.scalafmt.conf
new file mode 100644
index 000000000000..def67e026982
--- /dev/null
+++ b/dev/.scalafmt.conf
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+align = none
+align.openParenDefnSite = false
+align.openParenCallSite = false
+align.tokens = []
+docstrings = JavaDoc
+maxColumn = 98
+
diff --git a/dev/scalafmt b/dev/scalafmt
new file mode 100755
index 000000000000..76f688a2f5b8
--- /dev/null
+++ b/dev/scalafmt
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# by default, format only files that differ from git master
+params="${@:---diff}"
+
+./build/mvn mvn-scalafmt_2.12:format -Dscalafmt.skip=false -Dscalafmt.parameters="$params"
diff --git a/pom.xml b/pom.xml
index 93075e9b06a6..3ca2f739ce0e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -156,6 +156,9 @@
     <commons.collections.version>3.2.2</commons.collections.version>
     <scala.version>2.12.7</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
+    <scalafmt.parameters>--diff --test</scalafmt.parameters>
+    <!-- for now, not running scalafmt as part of default verify pipeline -->
+    <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <fasterxml.jackson.version>2.9.6</fasterxml.jackson.version>
     <snappy.version>1.1.7.1</snappy.version>
@@ -2600,6 +2603,24 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.antipathy</groupId>
+        <artifactId>mvn-scalafmt_2.12</artifactId>
+        <version>0.9_1.5.1</version>
+        <configuration>
+          <parameters>${scalafmt.parameters}</parameters> <!-- (Optional) Additional command line arguments -->
+          <skip>${scalafmt.skip}</skip> <!-- (Optional) skip formatting -->
+          <configLocation>dev/.scalafmt.conf</configLocation> <!-- (Optional) config locataion -->
+        </configuration>
+        <executions>
+          <execution>
+            <phase>validate</phase>
+            <goals>
+              <goal>format</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 

From 31c4fab3fb0343edf971de9070a319c6b3094647 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Thu, 29 Nov 2018 10:31:31 -0600
Subject: [PATCH 0018/1072] [SPARK-26081][SQL] Prevent empty files for empty
 partitions in Text datasources

## What changes were proposed in this pull request?

In the PR, I propose to postpone creation of `OutputStream`/`Univocity`/`JacksonGenerator` till the first row should be written. This prevents creation of empty files for empty partitions. So, no need to open and to read such files back while loading data from the location.

## How was this patch tested?

Added tests for Text, JSON and CSV datasource where empty dataset is written but should not produce any files.

Closes #23052 from MaxGekk/text-empty-files.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../datasources/csv/CSVFileFormat.scala       | 20 ++++++++++++-------
 .../datasources/json/JsonFileFormat.scala     | 19 +++++++++---------
 .../datasources/text/TextFileFormat.scala     | 16 +++++++++++----
 .../execution/datasources/csv/CSVSuite.scala  |  9 +++++++++
 .../datasources/json/JsonSuite.scala          | 13 ++++++++++--
 .../datasources/text/TextSuite.scala          |  9 +++++++++
 6 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index ff1911d69a6b..4c5a1d327023 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -169,13 +169,19 @@ private[csv] class CsvOutputWriter(
     context: TaskAttemptContext,
     params: CSVOptions) extends OutputWriter with Logging {
 
-  private val charset = Charset.forName(params.charset)
-
-  private val writer = CodecStreams.createOutputStreamWriter(context, new Path(path), charset)
-
-  private val gen = new UnivocityGenerator(dataSchema, writer, params)
+  private var univocityGenerator: Option[UnivocityGenerator] = None
+
+  override def write(row: InternalRow): Unit = {
+    val gen = univocityGenerator.getOrElse {
+      val charset = Charset.forName(params.charset)
+      val os = CodecStreams.createOutputStreamWriter(context, new Path(path), charset)
+      val newGen = new UnivocityGenerator(dataSchema, os, params)
+      univocityGenerator = Some(newGen)
+      newGen
+    }
 
-  override def write(row: InternalRow): Unit = gen.write(row)
+    gen.write(row)
+  }
 
-  override def close(): Unit = gen.close()
+  override def close(): Unit = univocityGenerator.map(_.close())
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index 610f0d1619fc..3042133ee43a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -175,19 +175,20 @@ private[json] class JsonOutputWriter(
          " which can be read back by Spark only if multiLine is enabled.")
   }
 
-  private val writer = CodecStreams.createOutputStreamWriter(
-    context, new Path(path), encoding)
-
-  // create the Generator without separator inserted between 2 records
-  private[this] val gen = new JacksonGenerator(dataSchema, writer, options)
+  private var jacksonGenerator: Option[JacksonGenerator] = None
 
   override def write(row: InternalRow): Unit = {
+    val gen = jacksonGenerator.getOrElse {
+      val os = CodecStreams.createOutputStreamWriter(context, new Path(path), encoding)
+      // create the Generator without separator inserted between 2 records
+      val newGen = new JacksonGenerator(dataSchema, os, options)
+      jacksonGenerator = Some(newGen)
+      newGen
+    }
+
     gen.write(row)
     gen.writeLineEnding()
   }
 
-  override def close(): Unit = {
-    gen.close()
-    writer.close()
-  }
+  override def close(): Unit = jacksonGenerator.map(_.close())
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index 268297148b52..01948ab25d63 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources.text
 
+import java.io.OutputStream
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
@@ -148,17 +150,23 @@ class TextOutputWriter(
     context: TaskAttemptContext)
   extends OutputWriter {
 
-  private val writer = CodecStreams.createOutputStream(context, new Path(path))
+  private var outputStream: Option[OutputStream] = None
 
   override def write(row: InternalRow): Unit = {
+    val os = outputStream.getOrElse{
+      val newStream = CodecStreams.createOutputStream(context, new Path(path))
+      outputStream = Some(newStream)
+      newStream
+    }
+
     if (!row.isNullAt(0)) {
       val utf8string = row.getUTF8String(0)
-      utf8string.writeTo(writer)
+      utf8string.writeTo(os)
     }
-    writer.write(lineSeparator)
+    os.write(lineSeparator)
   }
 
   override def close(): Unit = {
-    writer.close()
+    outputStream.map(_.close())
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index c275d63d32cc..e14e8d49db5c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1986,4 +1986,13 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     }.getMessage
     assert(errMsg2.contains("'lineSep' can contain only 1 character"))
   }
+
+  test("do not produce empty files for empty partitions") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      spark.emptyDataset[String].write.csv(path)
+      val files = new File(path).listFiles()
+      assert(!files.exists(_.getName.endsWith("csv")))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index ee31077e12ef..ee5176e23e34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1897,7 +1897,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         .text(path)
 
       val jsonDF = spark.read.option("multiLine", true).option("mode", "PERMISSIVE").json(path)
-      assert(jsonDF.count() === corruptRecordCount + 1) // null row for empty file
+      assert(jsonDF.count() === corruptRecordCount)
       assert(jsonDF.schema === new StructType()
         .add("_corrupt_record", StringType)
         .add("dummy", StringType))
@@ -1910,7 +1910,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
           F.count($"dummy").as("valid"),
           F.count($"_corrupt_record").as("corrupt"),
           F.count("*").as("count"))
-      checkAnswer(counts, Row(1, 5, 7)) // null row for empty file
+      checkAnswer(counts, Row(1, 4, 6)) // null row for empty file
     }
   }
 
@@ -2555,4 +2555,13 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     emptyString(StringType, "")
     emptyString(BinaryType, "".getBytes(StandardCharsets.UTF_8))
   }
+
+  test("do not produce empty files for empty partitions") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      spark.emptyDataset[String].write.json(path)
+      val files = new File(path).listFiles()
+      assert(!files.exists(_.getName.endsWith("json")))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
index 0e7f3afa9c3a..a86d5ee37f3d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
@@ -233,4 +233,13 @@ class TextSuite extends QueryTest with SharedSQLContext {
     assert(data(3) == Row("\"doh\""))
     assert(data.length == 4)
   }
+
+  test("do not produce empty files for empty partitions") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      spark.emptyDataset[String].write.text(path)
+      val files = new File(path).listFiles()
+      assert(!files.exists(_.getName.endsWith("txt")))
+    }
+  }
 }

From de4228152771390b0c5ba15254e9c5b832095366 Mon Sep 17 00:00:00 2001
From: Keiji Yoshida <kjmrknsn@gmail.com>
Date: Thu, 29 Nov 2018 10:39:00 -0600
Subject: [PATCH 0019/1072] [MINOR][DOCS][WIP] Fix Typos

## What changes were proposed in this pull request?
Fix Typos.

## How was this patch tested?
NA

Closes #23145 from kjmrknsn/docUpdate.

Authored-by: Keiji Yoshida <kjmrknsn@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/index.md                                 |  4 +--
 docs/rdd-programming-guide.md                 |  8 +++---
 docs/running-on-mesos.md                      |  2 +-
 docs/sql-data-sources-avro.md                 |  6 ++--
 docs/sql-data-sources-hive-tables.md          |  2 +-
 docs/sql-data-sources-jdbc.md                 |  2 +-
 docs/sql-data-sources-load-save-functions.md  |  2 +-
 docs/sql-getting-started.md                   |  2 +-
 docs/sql-programming-guide.md                 |  2 +-
 docs/sql-pyspark-pandas-with-arrow.md         |  2 +-
 docs/sql-reference.md                         |  6 ++--
 docs/streaming-programming-guide.md           |  2 +-
 .../structured-streaming-programming-guide.md | 28 +++++++++----------
 13 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index bd287e3f8d83..8864239eb164 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -66,8 +66,8 @@ Example applications are also provided in Python. For example,
 
     ./bin/spark-submit examples/src/main/python/pi.py 10
 
-Spark also provides an experimental [R API](sparkr.html) since 1.4 (only DataFrames APIs included).
-To run Spark interactively in a R interpreter, use `bin/sparkR`:
+Spark also provides an [R API](sparkr.html) since 1.4 (only DataFrames APIs included).
+To run Spark interactively in an R interpreter, use `bin/sparkR`:
 
     ./bin/sparkR --master local[2]
 
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 9a07d6ca24b6..2d1ddae5780d 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -332,7 +332,7 @@ One important parameter for parallel collections is the number of *partitions* t
 
 Spark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
 
-Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes an URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
+Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes a URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
 
 {% highlight scala %}
 scala> val distFile = sc.textFile("data.txt")
@@ -365,7 +365,7 @@ Apart from text files, Spark's Scala API also supports several other data format
 
 Spark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
 
-Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes an URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
+Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes a URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
 
 {% highlight java %}
 JavaRDD<String> distFile = sc.textFile("data.txt");
@@ -397,7 +397,7 @@ Apart from text files, Spark's Java API also supports several other data formats
 
 PySpark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
 
-Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes an URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
+Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes a URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
 
 {% highlight python %}
 >>> distFile = sc.textFile("data.txt")
@@ -1122,7 +1122,7 @@ costly operation.
 
 #### Background
 
-To understand what happens during the shuffle we can consider the example of the
+To understand what happens during the shuffle, we can consider the example of the
 [`reduceByKey`](#ReduceByLink) operation. The `reduceByKey` operation generates a new RDD where all
 values for a single key are combined into a tuple - the key and the result of executing a reduce
 function against all values associated with that key. The challenge is that not all values for a
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 2502cd4ca86f..b3ba4b255b71 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -687,7 +687,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>0</code></td>
   <td>
     Set the maximum number GPU resources to acquire for this job. Note that executors will still launch when no GPU resources are found
-    since this configuration is just a upper limit and not a guaranteed amount.
+    since this configuration is just an upper limit and not a guaranteed amount.
   </td>
   </tr>
 <tr>
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index bfe641d1c6d1..b403a66fad79 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -66,9 +66,9 @@ write.df(select(df, "name", "favorite_color"), "namesAndFavColors.avro", "avro")
 ## to_avro() and from_avro()
 The Avro package provides function `to_avro` to encode a column as binary in Avro 
 format, and `from_avro()` to decode Avro binary data into a column. Both functions transform one column to 
-another column, and the input/output SQL data type can be complex type or primitive type.
+another column, and the input/output SQL data type can be a complex type or a primitive type.
 
-Using Avro record as columns are useful when reading from or writing to a streaming source like Kafka. Each 
+Using Avro record as columns is useful when reading from or writing to a streaming source like Kafka. Each
 Kafka key-value record will be augmented with some metadata, such as the ingestion timestamp into Kafka, the offset in Kafka, etc.
 * If the "value" field that contains your data is in Avro, you could use `from_avro()` to extract your data, enrich it, clean it, and then push it downstream to Kafka again or write it out to a file.
 * `to_avro()` can be used to turn structs into Avro records. This method is particularly useful when you would like to re-encode multiple columns into a single one when writing data out to Kafka.
@@ -151,7 +151,7 @@ Data source options of Avro can be set via:
   <tr>
     <td><code>avroSchema</code></td>
     <td>None</td>
-    <td>Optional Avro schema provided by an user in JSON format. The date type and naming of record fields
+    <td>Optional Avro schema provided by a user in JSON format. The date type and naming of record fields
     should match the input Avro data or Catalyst data, otherwise the read/write action will fail.</td>
     <td>read and write</td>
   </tr>
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index 28e1a3962666..3b39a32d4324 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -74,7 +74,7 @@ creating table, you can create a table using storage handler at Hive side, and u
     <td><code>inputFormat, outputFormat</code></td>
     <td>
       These 2 options specify the name of a corresponding `InputFormat` and `OutputFormat` class as a string literal,
-      e.g. `org.apache.hadoop.hive.ql.io.orc.OrcInputFormat`. These 2 options must be appeared in pair, and you can not
+      e.g. `org.apache.hadoop.hive.ql.io.orc.OrcInputFormat`. These 2 options must be appeared in a pair, and you can not
       specify them if you already specified the `fileFormat` option.
     </td>
   </tr>
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index 057e8217241a..9a5d0fc7d424 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -55,7 +55,7 @@ the following case-insensitive options:
       as a subquery in the <code>FROM</code> clause. Spark will also assign an alias to the subquery clause.
       As an example, spark will issue a query of the following form to the JDBC Source.<br><br>
       <code> SELECT &lt;columns&gt; FROM (&lt;user_specified_query&gt;) spark_gen_alias</code><br><br>
-      Below are couple of restrictions while using this option.<br>
+      Below are a couple of restrictions while using this option.<br>
       <ol>
          <li> It is not allowed to specify `dbtable` and `query` options at the same time. </li>
          <li> It is not allowed to specify `query` and `partitionColumn` options at the same time. When specifying
diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
index e4c7b1766f91..4386caedb38b 100644
--- a/docs/sql-data-sources-load-save-functions.md
+++ b/docs/sql-data-sources-load-save-functions.md
@@ -324,4 +324,4 @@ CLUSTERED BY(name) SORTED BY (favorite_numbers) INTO 42 BUCKETS;
 `partitionBy` creates a directory structure as described in the [Partition Discovery](sql-data-sources-parquet.html#partition-discovery) section.
 Thus, it has limited applicability to columns with high cardinality. In contrast
  `bucketBy` distributes
-data across a fixed number of buckets and can be used when a number of unique values is unbounded.
+data across a fixed number of buckets and can be used when the number of unique values is unbounded.
diff --git a/docs/sql-getting-started.md b/docs/sql-getting-started.md
index 88512205894a..0c3f0fb20610 100644
--- a/docs/sql-getting-started.md
+++ b/docs/sql-getting-started.md
@@ -99,7 +99,7 @@ Here we include some basic examples of structured data processing using Datasets
 <div data-lang="scala"  markdown="1">
 {% include_example untyped_ops scala/org/apache/spark/examples/sql/SparkSQLExample.scala %}
 
-For a complete list of the types of operations that can be performed on a Dataset refer to the [API Documentation](api/scala/index.html#org.apache.spark.sql.Dataset).
+For a complete list of the types of operations that can be performed on a Dataset, refer to the [API Documentation](api/scala/index.html#org.apache.spark.sql.Dataset).
 
 In addition to simple column references and expressions, Datasets also have a rich library of functions including string manipulation, date arithmetic, common math operations and more. The complete list is available in the [DataFrame Function Reference](api/scala/index.html#org.apache.spark.sql.functions$).
 </div>
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index eca8915dfa97..9c85a15827bb 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -7,7 +7,7 @@ title: Spark SQL and DataFrames
 Spark SQL is a Spark module for structured data processing. Unlike the basic Spark RDD API, the interfaces provided
 by Spark SQL provide Spark with more information about the structure of both the data and the computation being performed. Internally,
 Spark SQL uses this extra information to perform extra optimizations. There are several ways to
-interact with Spark SQL including SQL and the Dataset API. When computing a result
+interact with Spark SQL including SQL and the Dataset API. When computing a result,
 the same execution engine is used, independent of which API/language you are using to express the
 computation. This unification means that developers can easily switch back and forth between
 different APIs based on which provides the most natural way to express a given transformation.
diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md
index d04b955f9bf8..d18ca0beb0fc 100644
--- a/docs/sql-pyspark-pandas-with-arrow.md
+++ b/docs/sql-pyspark-pandas-with-arrow.md
@@ -129,7 +129,7 @@ For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/p
 
 Currently, all Spark SQL data types are supported by Arrow-based conversion except `MapType`,
 `ArrayType` of `TimestampType`, and nested `StructType`. `BinaryType` is supported only when
-installed PyArrow is equal to or higher then 0.10.0.
+installed PyArrow is equal to or higher than 0.10.0.
 
 ### Setting Arrow Batch Size
 
diff --git a/docs/sql-reference.md b/docs/sql-reference.md
index 9e4239b6bad2..88d0596f3876 100644
--- a/docs/sql-reference.md
+++ b/docs/sql-reference.md
@@ -38,15 +38,15 @@ Spark SQL and DataFrames support the following data types:
   elements with the type of `elementType`. `containsNull` is used to indicate if
   elements in a `ArrayType` value can have `null` values.
   - `MapType(keyType, valueType, valueContainsNull)`:
-  Represents values comprising a set of key-value pairs. The data type of keys are
-  described by `keyType` and the data type of values are described by `valueType`.
+  Represents values comprising a set of key-value pairs. The data type of keys is
+  described by `keyType` and the data type of values is described by `valueType`.
   For a `MapType` value, keys are not allowed to have `null` values. `valueContainsNull`
   is used to indicate if values of a `MapType` value can have `null` values.
   - `StructType(fields)`: Represents values with the structure described by
   a sequence of `StructField`s (`fields`).
     * `StructField(name, dataType, nullable)`: Represents a field in a `StructType`.
     The name of a field is indicated by `name`. The data type of a field is indicated
-    by `dataType`. `nullable` is used to indicate if values of this fields can have
+    by `dataType`. `nullable` is used to indicate if values of these fields can have
     `null` values.
 
 <div class="codetabs">
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 70bee5032a24..94c61205bd53 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -733,7 +733,7 @@ for Java, and [StreamingContext](api/python/pyspark.streaming.html#pyspark.strea
 <span class="badge" style="background-color: grey">Python API</span> As of Spark {{site.SPARK_VERSION_SHORT}},
 out of these sources, Kafka and Kinesis are available in the Python API.
 
-This category of sources require interfacing with external non-Spark libraries, some of them with
+This category of sources requires interfacing with external non-Spark libraries, some of them with
 complex dependencies (e.g., Kafka). Hence, to minimize issues related to version conflicts
 of dependencies, the functionality to create DStreams from these sources has been moved to separate
 libraries that can be [linked](#linking) to explicitly when necessary.
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 8cea98c2cc52..32d61dcdb459 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1493,7 +1493,7 @@ Additional details on supported joins:
 ### Streaming Deduplication
 You can deduplicate records in data streams using a unique identifier in the events. This is exactly same as deduplication on static using a unique identifier column. The query will store the necessary amount of data from previous records such that it can filter duplicate records. Similar to aggregations, you can use deduplication with or without watermarking.
 
-- *With watermark* - If there is a upper bound on how late a duplicate record may arrive, then you can define a watermark on a event time column and deduplicate using both the guid and the event time columns. The query will use the watermark to remove old state data from past records that are not expected to get any duplicates any more. This bounds the amount of the state the query has to maintain.
+- *With watermark* - If there is an upper bound on how late a duplicate record may arrive, then you can define a watermark on an event time column and deduplicate using both the guid and the event time columns. The query will use the watermark to remove old state data from past records that are not expected to get any duplicates any more. This bounds the amount of the state the query has to maintain.
 
 - *Without watermark* - Since there are no bounds on when a duplicate record may arrive, the query stores the data from all the past records as state.
 
@@ -1577,7 +1577,7 @@ event time seen in each input stream, calculates watermarks based on the corresp
 and chooses a single global watermark with them to be used for stateful operations. By default,
 the minimum is chosen as the global watermark because it ensures that no data is
 accidentally dropped as too late if one of the streams falls behind the others
-(for example, one of the streams stop receiving data due to upstream failures). In other words,
+(for example, one of the streams stops receiving data due to upstream failures). In other words,
 the global watermark will safely move at the pace of the slowest stream and the query output will
 be delayed accordingly.
 
@@ -1598,7 +1598,7 @@ Some of them are as follows.
  
 - Multiple streaming aggregations (i.e. a chain of aggregations on a streaming DF) are not yet supported on streaming Datasets.
 
-- Limit and take first N rows are not supported on streaming Datasets.
+- Limit and take the first N rows are not supported on streaming Datasets.
 
 - Distinct operations on streaming Datasets are not supported.
 
@@ -1634,7 +1634,7 @@ returned through `Dataset.writeStream()`. You will have to specify one or more o
 
 - *Query name:* Optionally, specify a unique name of the query for identification.
 
-- *Trigger interval:* Optionally, specify the trigger interval. If it is not specified, the system will check for availability of new data as soon as the previous processing has completed. If a trigger time is missed because the previous processing has not completed, then the system will trigger processing immediately.
+- *Trigger interval:* Optionally, specify the trigger interval. If it is not specified, the system will check for availability of new data as soon as the previous processing has been completed. If a trigger time is missed because the previous processing has not been completed, then the system will trigger processing immediately.
 
 - *Checkpoint location:* For some output sinks where the end-to-end fault-tolerance can be guaranteed, specify the location where the system will write all the checkpoint information. This should be a directory in an HDFS-compatible fault-tolerant file system. The semantics of checkpointing is discussed in more detail in the next section.
 
@@ -2106,7 +2106,7 @@ With `foreachBatch`, you can do the following.
 
 ###### Foreach
 If `foreachBatch` is not an option (for example, corresponding batch data writer does not exist, or 
-continuous processing mode), then you can express you custom writer logic using `foreach`. 
+continuous processing mode), then you can express your custom writer logic using `foreach`. 
 Specifically, you can express the data writing logic by dividing it into three methods: `open`, `process`, and `close`.
 Since Spark 2.4, `foreach` is available in Scala, Java and Python. 
 
@@ -2236,8 +2236,8 @@ When the streaming query is started, Spark calls the function or the object’s
   in the continuous mode, then this guarantee does not hold and therefore should not be used for deduplication.
 
 #### Triggers
-The trigger settings of a streaming query defines the timing of streaming data processing, whether
-the query is going to executed as micro-batch query with a fixed batch interval or as a continuous processing query.
+The trigger settings of a streaming query define the timing of streaming data processing, whether
+the query is going to be executed as micro-batch query with a fixed batch interval or as a continuous processing query.
 Here are the different kinds of triggers that are supported.
 
 <table class="table">
@@ -2960,7 +2960,7 @@ the effect of the change is not well-defined. For all of them:
 
   - Addition/deletion/modification of rate limits is allowed: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "topic").option("maxOffsetsPerTrigger", ...)`
 
-  - Changes to subscribed topics/files is generally not allowed as the results are unpredictable: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "newTopic")`
+  - Changes to subscribed topics/files are generally not allowed as the results are unpredictable: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "newTopic")`
 
 - *Changes in the type of output sink*: Changes between a few specific combinations of sinks 
   are allowed. This needs to be verified on a case-by-case basis. Here are a few examples.
@@ -2974,17 +2974,17 @@ the effect of the change is not well-defined. For all of them:
 - *Changes in the parameters of output sink*: Whether this is allowed and whether the semantics of 
   the change are well-defined depends on the sink and the query. Here are a few examples.
 
-  - Changes to output directory of a file sink is not allowed: `sdf.writeStream.format("parquet").option("path", "/somePath")` to `sdf.writeStream.format("parquet").option("path", "/anotherPath")`
+  - Changes to output directory of a file sink are not allowed: `sdf.writeStream.format("parquet").option("path", "/somePath")` to `sdf.writeStream.format("parquet").option("path", "/anotherPath")`
 
-  - Changes to output topic is allowed: `sdf.writeStream.format("kafka").option("topic", "someTopic")` to `sdf.writeStream.format("kafka").option("topic", "anotherTopic")`
+  - Changes to output topic are allowed: `sdf.writeStream.format("kafka").option("topic", "someTopic")` to `sdf.writeStream.format("kafka").option("topic", "anotherTopic")`
 
-  - Changes to the user-defined foreach sink (that is, the `ForeachWriter` code) is allowed, but the semantics of the change depends on the code.
+  - Changes to the user-defined foreach sink (that is, the `ForeachWriter` code) are allowed, but the semantics of the change depends on the code.
 
 - *Changes in projection / filter / map-like operations**: Some cases are allowed. For example:
 
   - Addition / deletion of filters is allowed: `sdf.selectExpr("a")` to `sdf.where(...).selectExpr("a").filter(...)`.
 
-  - Changes in projections with same output schema is allowed: `sdf.selectExpr("stringColumn AS json").writeStream` to `sdf.selectExpr("anotherStringColumn AS json").writeStream`
+  - Changes in projections with same output schema are allowed: `sdf.selectExpr("stringColumn AS json").writeStream` to `sdf.selectExpr("anotherStringColumn AS json").writeStream`
 
   - Changes in projections with different output schema are conditionally allowed: `sdf.selectExpr("a").writeStream` to `sdf.selectExpr("b").writeStream` is allowed only if the output sink allows the schema change from `"a"` to `"b"`.
 
@@ -3000,7 +3000,7 @@ the effect of the change is not well-defined. For all of them:
   - *Streaming deduplication*: For example, `sdf.dropDuplicates("a")`. Any change in number or type of grouping keys or aggregates is not allowed.
 
   - *Stream-stream join*: For example, `sdf1.join(sdf2, ...)` (i.e. both inputs are generated with `sparkSession.readStream`). Changes
-    in the schema or equi-joining columns are not allowed. Changes in join type (outer or inner) not allowed. Other changes in the join condition are ill-defined.
+    in the schema or equi-joining columns are not allowed. Changes in join type (outer or inner) are not allowed. Other changes in the join condition are ill-defined.
 
   - *Arbitrary stateful operation*: For example, `sdf.groupByKey(...).mapGroupsWithState(...)` or `sdf.groupByKey(...).flatMapGroupsWithState(...)`.
     Any change to the schema of the user-defined state and the type of timeout is not allowed.
@@ -3083,7 +3083,7 @@ spark \
 </div>
 </div>
 
-A checkpoint interval of 1 second means that the continuous processing engine will records the progress of the query every second. The resulting checkpoints are in a format compatible with the micro-batch engine, hence any query can be restarted with any trigger. For example, a supported query started with the micro-batch mode can be restarted in continuous mode, and vice versa. Note that any time you switch to continuous mode, you will get at-least-once fault-tolerance guarantees.
+A checkpoint interval of 1 second means that the continuous processing engine will record the progress of the query every second. The resulting checkpoints are in a format compatible with the micro-batch engine, hence any query can be restarted with any trigger. For example, a supported query started with the micro-batch mode can be restarted in continuous mode, and vice versa. Note that any time you switch to continuous mode, you will get at-least-once fault-tolerance guarantees.
 
 ## Supported Queries
 {:.no_toc}

From 24e78b7f163acf6129d934633ae6d3e6d568656a Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Thu, 29 Nov 2018 09:48:18 -0800
Subject: [PATCH 0020/1072] [SPARK-26186][SPARK-26184][CORE] Last updated time
 is not getting updated for the Inprogress application

## What changes were proposed in this pull request?

When the 'spark.history.fs.inProgressOptimization.enabled' is true, inProgress application's last updated time is not getting updated in the History UI. Also, during the cleaning time, InProgress application is getting removed from the listing, even if the last updated time is within the cleaning threshold time.

In this PR, if the fastInprogressOptimization enabled, we update the `lastUpdateTime` of the application as last scan time. This will update the `lastUpdateTime` in the historyUI and also while cleaning, it won't remove if the updateTime is within the cleaning interval

## How was this patch tested?
Added UT, attached screen shot.
Before patch:
![screenshot from 2018-11-27 23-22-38](https://user-images.githubusercontent.com/23054875/49101600-9b5a3380-f29c-11e8-8efc-3fb594e4279a.png)
![screenshot from 2018-11-27 23-20-11](https://user-images.githubusercontent.com/23054875/49101601-9c8b6080-f29c-11e8-928e-643a8c8f4477.png)

After Patch:
![screenshot from 2018-11-27 23-37-10](https://user-images.githubusercontent.com/23054875/49101911-669aac00-f29d-11e8-8181-663e4a08ab0e.png)
![screenshot from 2018-11-27 23-39-04](https://user-images.githubusercontent.com/23054875/49102010-a5306680-f29d-11e8-947a-e8a2a09a785a.png)

Closes #23158 from shahidki31/HistoryLastUpdateTime.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../deploy/history/FsHistoryProvider.scala    | 22 +++++++++++
 .../history/FsHistoryProviderSuite.scala      | 39 +++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 4f4a00b7d831..da6e5f03aabb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -461,6 +461,28 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
               if (info.appId.isDefined && fastInProgressParsing) {
                 // When fast in-progress parsing is on, we don't need to re-parse when the
                 // size changes, but we do need to invalidate any existing UIs.
+                // Also, we need to update the `lastUpdated time` to display the updated time in
+                // the HistoryUI and to avoid cleaning the inprogress app while running.
+                val appInfo = listing.read(classOf[ApplicationInfoWrapper], info.appId.get)
+
+                val attemptList = appInfo.attempts.map { attempt =>
+                  if (attempt.info.attemptId == info.attemptId) {
+                    new AttemptInfoWrapper(
+                      attempt.info.copy(lastUpdated = new Date(newLastScanTime)),
+                      attempt.logPath,
+                      attempt.fileSize,
+                      attempt.adminAcls,
+                      attempt.viewAcls,
+                      attempt.adminAclsGroups,
+                      attempt.viewAclsGroups)
+                  } else {
+                    attempt
+                  }
+                }
+
+                val updatedAppInfo = new ApplicationInfoWrapper(appInfo.info, attemptList)
+                listing.write(updatedAppInfo)
+
                 invalidateUI(info.appId.get, info.attemptId)
                 false
               } else {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index b0ced46c9c7b..527c654a7cd6 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -334,6 +334,45 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     assert(!log2.exists())
   }
 
+  test("should not clean inprogress application with lastUpdated time less than maxTime") {
+    val firstFileModifiedTime = TimeUnit.DAYS.toMillis(1)
+    val secondFileModifiedTime = TimeUnit.DAYS.toMillis(6)
+    val maxAge = TimeUnit.DAYS.toMillis(7)
+    val clock = new ManualClock(0)
+    val provider = new FsHistoryProvider(
+      createTestConf().set(MAX_LOG_AGE_S, maxAge / 1000), clock)
+    val log = newLogFile("inProgressApp1", None, inProgress = true)
+    writeFile(log, true, None,
+      SparkListenerApplicationStart(
+        "inProgressApp1", Some("inProgressApp1"), 3L, "test", Some("attempt1"))
+    )
+    clock.setTime(firstFileModifiedTime)
+    log.setLastModified(clock.getTimeMillis())
+    provider.checkForLogs()
+    writeFile(log, true, None,
+      SparkListenerApplicationStart(
+        "inProgressApp1", Some("inProgressApp1"), 3L, "test", Some("attempt1")),
+      SparkListenerJobStart(0, 1L, Nil, null)
+    )
+
+    clock.setTime(secondFileModifiedTime)
+    log.setLastModified(clock.getTimeMillis())
+    provider.checkForLogs()
+    clock.setTime(TimeUnit.DAYS.toMillis(10))
+    writeFile(log, true, None,
+      SparkListenerApplicationStart(
+        "inProgressApp1", Some("inProgressApp1"), 3L, "test", Some("attempt1")),
+      SparkListenerJobStart(0, 1L, Nil, null),
+      SparkListenerJobEnd(0, 1L, JobSucceeded)
+    )
+    log.setLastModified(clock.getTimeMillis())
+    provider.checkForLogs()
+    // This should not trigger any cleanup
+    updateAndCheck(provider) { list =>
+      list.size should be(1)
+    }
+  }
+
   test("log cleaner for inProgress files") {
     val firstFileModifiedTime = TimeUnit.SECONDS.toMillis(10)
     val secondFileModifiedTime = TimeUnit.SECONDS.toMillis(20)

From 1144df3b5dc8280ae4a07678cb439f0b44cb17b0 Mon Sep 17 00:00:00 2001
From: Rob Vesse <rvesse@dotnetrdf.org>
Date: Thu, 29 Nov 2018 09:59:38 -0800
Subject: [PATCH 0021/1072] [SPARK-26015][K8S] Set a default UID for Spark on
 K8S Images

Adds USER directives to the Dockerfiles which is configurable via build argument (`spark_uid`) for easy customisation.  A `-u` flag is added to `bin/docker-image-tool.sh` to make it easy to customise this e.g.

```
> bin/docker-image-tool.sh -r rvesse -t uid -u 185 build
> bin/docker-image-tool.sh -r rvesse -t uid push
```

If no UID is explicitly specified it defaults to `185` - this is per skonto's suggestion to align with the OpenShift standard reserved UID for Java apps (
https://lists.openshift.redhat.com/openshift-archives/users/2016-March/msg00283.html)

Notes:
- We have to make the `WORKDIR` writable by the root group or otherwise jobs will fail with `AccessDeniedException`

To Do:
- [x] Debug and resolve issue with client mode test
- [x] Consider whether to always propagate `SPARK_USER_NAME` to environment of driver and executor pods so `entrypoint.sh` can insert that into `/etc/passwd` entry
- [x] Rebase once PR #23013 is merged and update documentation accordingly

Built the Docker images with the new Dockerfiles that include the `USER` directives.  Ran the Spark on K8S integration tests against the new images.  All pass except client mode which I am currently debugging further.

Also manually dropped myself into the resulting container images via `docker run` and checked `id -u` output to see that UID is as expected.

Tried customising the UID from the default via the new `-u` argument to `docker-image-tool.sh` and again checked the resulting image for the correct runtime UID.

cc felixcheung skonto vanzin

Closes #23017 from rvesse/SPARK-26015.

Authored-by: Rob Vesse <rvesse@dotnetrdf.org>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 bin/docker-image-tool.sh                         | 16 +++++++++++++---
 docs/running-on-kubernetes.md                    |  5 +++--
 .../docker/src/main/dockerfiles/spark/Dockerfile |  6 ++++++
 .../main/dockerfiles/spark/bindings/R/Dockerfile |  9 +++++++++
 .../dockerfiles/spark/bindings/python/Dockerfile |  9 +++++++++
 .../src/main/dockerfiles/spark/entrypoint.sh     |  2 +-
 .../integrationtest/ClientModeTestsSuite.scala   |  3 ++-
 7 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index 9f735f1148da..fbf9c9e448fd 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -146,6 +146,12 @@ function build {
   fi
 
   local BUILD_ARGS=(${BUILD_PARAMS})
+
+  # If a custom SPARK_UID was set add it to build arguments
+  if [ -n "$SPARK_UID" ]; then
+    BUILD_ARGS+=(--build-arg spark_uid=$SPARK_UID)
+  fi
+
   local BINDING_BUILD_ARGS=(
     ${BUILD_PARAMS}
     --build-arg
@@ -207,8 +213,10 @@ Options:
   -t tag                Tag to apply to the built image, or to identify the image to be pushed.
   -m                    Use minikube's Docker daemon.
   -n                    Build docker image with --no-cache
-  -b arg      Build arg to build or push the image. For multiple build args, this option needs to
-              be used separately for each build arg.
+  -u uid                UID to use in the USER directive to set the user the main Spark process runs as inside the
+                        resulting container
+  -b arg                Build arg to build or push the image. For multiple build args, this option needs to
+                        be used separately for each build arg.
 
 Using minikube when building images will do so directly into minikube's Docker daemon.
 There is no need to push the images into minikube in that case, they'll be automatically
@@ -243,7 +251,8 @@ PYDOCKERFILE=
 RDOCKERFILE=
 NOCACHEARG=
 BUILD_PARAMS=
-while getopts f:p:R:mr:t:nb: option
+SPARK_UID=
+while getopts f:p:R:mr:t:nb:u: option
 do
  case "${option}"
  in
@@ -263,6 +272,7 @@ do
    fi
    eval $(minikube docker-env)
    ;;
+  u) SPARK_UID=${OPTARG};;
  esac
 done
 
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 2c01e1e7155e..5639253d52f5 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -19,9 +19,9 @@ Please see [Spark Security](security.html) and the specific advice below before
 
 ## User Identity
 
-Images built from the project provided Dockerfiles do not contain any [`USER`](https://docs.docker.com/engine/reference/builder/#user) directives.  This means that the resulting images will be running the Spark processes as `root` inside the container.  On unsecured clusters this may provide an attack vector for privilege escalation and container breakout.  Therefore security conscious deployments should consider providing custom images with `USER` directives specifying an unprivileged UID and GID.
+Images built from the project provided Dockerfiles contain a default [`USER`](https://docs.docker.com/engine/reference/builder/#user) directive with a default UID of `185`.  This means that the resulting images will be running the Spark processes as this UID inside the container. Security conscious deployments should consider providing custom images with `USER` directives specifying their desired unprivileged UID and GID.  The resulting UID should include the root group in its supplementary groups in order to be able to run the Spark executables.  Users building their own images with the provided `docker-image-tool.sh` script can use the `-u <uid>` option to specify the desired UID.
 
-Alternatively the [Pod Template](#pod-template) feature can be used to add a [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#volumes-and-file-systems) with a `runAsUser` to the pods that Spark submits.  Please bear in mind that this requires cooperation from your users and as such may not be a suitable solution for shared environments.  Cluster administrators should use [Pod Security Policies](https://kubernetes.io/docs/concepts/policy/pod-security-policy/#users-and-groups) if they wish to limit the users that pods may run as.
+Alternatively the [Pod Template](#pod-template) feature can be used to add a [Security Context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#volumes-and-file-systems) with a `runAsUser` to the pods that Spark submits.  This can be used to override the `USER` directives in the images themselves.  Please bear in mind that this requires cooperation from your users and as such may not be a suitable solution for shared environments.  Cluster administrators should use [Pod Security Policies](https://kubernetes.io/docs/concepts/policy/pod-security-policy/#users-and-groups) if they wish to limit the users that pods may run as.
 
 ## Volume Mounts
 
@@ -87,6 +87,7 @@ Example usage is:
 $ ./bin/docker-image-tool.sh -r <repo> -t my-tag build
 $ ./bin/docker-image-tool.sh -r <repo> -t my-tag push
 ```
+This will build using the projects provided default `Dockerfiles`. To see more options available for customising the behaviour of this tool, including providing custom `Dockerfiles`, please run with the `-h` flag.
 
 By default `bin/docker-image-tool.sh` builds docker image for running JVM jobs. You need to opt-in to build additional 
 language binding docker images.
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 89b20e144622..084304032470 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -17,6 +17,8 @@
 
 FROM openjdk:8-alpine
 
+ARG spark_uid=185
+
 # Before building the docker image, first build and make a Spark distribution following
 # the instructions in http://spark.apache.org/docs/latest/building-spark.html.
 # If this docker file is being used in the context of building your images from a Spark
@@ -47,5 +49,9 @@ COPY data /opt/spark/data
 ENV SPARK_HOME /opt/spark
 
 WORKDIR /opt/spark/work-dir
+RUN chmod g+w /opt/spark/work-dir
 
 ENTRYPOINT [ "/opt/entrypoint.sh" ]
+
+# Specify the User that the actual main process will run as
+USER ${spark_uid}
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
index 9f67422efeb3..9ded57c65510 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
@@ -16,8 +16,14 @@
 #
 
 ARG base_img
+ARG spark_uid=185
+
 FROM $base_img
 WORKDIR /
+
+# Reset to root to run installation tasks
+USER 0
+
 RUN mkdir ${SPARK_HOME}/R
 
 RUN apk add --no-cache R R-dev
@@ -27,3 +33,6 @@ ENV R_HOME /usr/lib/R
 
 WORKDIR /opt/spark/work-dir
 ENTRYPOINT [ "/opt/entrypoint.sh" ]
+
+# Specify the User that the actual main process will run as
+USER ${spark_uid}
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
index 69b6efa6149a..de1a0617b1cc 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -16,8 +16,14 @@
 #
 
 ARG base_img
+ARG spark_uid=185
+
 FROM $base_img
 WORKDIR /
+
+# Reset to root to run installation tasks
+USER 0
+
 RUN mkdir ${SPARK_HOME}/python
 # TODO: Investigate running both pip and pip3 via virtualenvs
 RUN apk add --no-cache python && \
@@ -37,3 +43,6 @@ ENV PYTHONPATH ${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4
 
 WORKDIR /opt/spark/work-dir
 ENTRYPOINT [ "/opt/entrypoint.sh" ]
+
+# Specify the User that the actual main process will run as
+USER ${spark_uid}
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index 2b2a4e4cf6bc..2d770075a074 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -30,7 +30,7 @@ set -e
 # If there is no passwd entry for the container UID, attempt to create one
 if [ -z "$uidentry" ] ; then
     if [ -w /etc/passwd ] ; then
-        echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
+        echo "$myuid:x:$myuid:$mygid:${SPARK_USER_NAME:-anonymous uid}:$SPARK_HOME:/bin/false" >> /etc/passwd
     else
         echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
     fi
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
index c8bd584516ea..2720cdf74ca8 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
@@ -62,11 +62,12 @@ private[spark] trait ClientModeTestsSuite { k8sSuite: KubernetesSuite =>
           .endMetadata()
         .withNewSpec()
           .withServiceAccountName(kubernetesTestComponents.serviceAccountName)
+          .withRestartPolicy("Never")
           .addNewContainer()
             .withName("spark-example")
             .withImage(image)
             .withImagePullPolicy("IfNotPresent")
-            .withCommand("/opt/spark/bin/run-example")
+            .addToArgs("/opt/spark/bin/run-example")
             .addToArgs("--master", s"k8s://https://kubernetes.default.svc")
             .addToArgs("--deploy-mode", "client")
             .addToArgs("--conf", s"spark.kubernetes.container.image=$image")

From 9fdc7a840daa64d1302d12027fd84ea9894110a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E4=BA=AE?= <liang.li.work@outlook.com>
Date: Thu, 29 Nov 2018 13:08:53 -0600
Subject: [PATCH 0022/1072] [SPARK-26158][MLLIB] fix covariance accuracy
 problem for DenseVector
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Enhance accuracy of the covariance logic in RowMatrix for function computeCovariance

## How was this patch tested?
Unit test
Accuracy test

Closes #23126 from KyleLi1985/master.

Authored-by: 李亮 <liang.li.work@outlook.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../mllib/linalg/distributed/RowMatrix.scala  | 97 ++++++++++++++-----
 .../apache/spark/ml/feature/JavaPCASuite.java |  3 +-
 .../linalg/distributed/RowMatrixSuite.scala   | 14 +++
 3 files changed, 90 insertions(+), 24 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index c12b751bfb8e..ff02e5dd3c25 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -128,6 +128,77 @@ class RowMatrix @Since("1.0.0") (
     RowMatrix.triuToFull(n, GU.data)
   }
 
+  private def computeDenseVectorCovariance(mean: Vector, n: Int, m: Long): Matrix = {
+
+    val bc = rows.context.broadcast(mean)
+
+    // Computes n*(n+1)/2, avoiding overflow in the multiplication.
+    // This succeeds when n <= 65535, which is checked above
+    val nt = if (n % 2 == 0) ((n / 2) * (n + 1)) else (n * ((n + 1) / 2))
+
+    val MU = rows.treeAggregate(new BDV[Double](nt))(
+      seqOp = (U, v) => {
+
+        val n = v.size
+        val na = Array.ofDim[Double](n)
+        val means = bc.value
+
+        val ta = v.toArray
+        for (index <- 0 until n) {
+          na(index) = ta(index) - means(index)
+        }
+
+        BLAS.spr(1.0, new DenseVector(na), U.data)
+        U
+      }, combOp = (U1, U2) => U1 += U2)
+
+    bc.destroy()
+
+    val M = RowMatrix.triuToFull(n, MU.data).asBreeze
+
+    var i = 0
+    var j = 0
+    val m1 = m - 1.0
+    while (i < n) {
+      j = i
+      while (j < n) {
+        val Mij = M(i, j) / m1
+        M(i, j) = Mij
+        M(j, i) = Mij
+        j += 1
+      }
+      i += 1
+    }
+
+    Matrices.fromBreeze(M)
+  }
+
+  private def computeSparseVectorCovariance(mean: Vector, n: Int, m: Long): Matrix = {
+
+    // We use the formula Cov(X, Y) = E[X * Y] - E[X] E[Y], which is not accurate if E[X * Y] is
+    // large but Cov(X, Y) is small, but it is good for sparse computation.
+    // TODO: find a fast and stable way for sparse data.
+    val G = computeGramianMatrix().asBreeze
+
+    var i = 0
+    var j = 0
+    val m1 = m - 1.0
+    var alpha = 0.0
+    while (i < n) {
+      alpha = m / m1 * mean(i)
+      j = i
+      while (j < n) {
+        val Gij = G(i, j) / m1 - alpha * mean(j)
+        G(i, j) = Gij
+        G(j, i) = Gij
+        j += 1
+      }
+      i += 1
+    }
+
+    Matrices.fromBreeze(G)
+  }
+
   private def checkNumColumns(cols: Int): Unit = {
     if (cols > 65535) {
       throw new IllegalArgumentException(s"Argument with more than 65535 cols: $cols")
@@ -337,29 +408,11 @@ class RowMatrix @Since("1.0.0") (
       "  Cannot compute the covariance of a RowMatrix with <= 1 row.")
     val mean = summary.mean
 
-    // We use the formula Cov(X, Y) = E[X * Y] - E[X] E[Y], which is not accurate if E[X * Y] is
-    // large but Cov(X, Y) is small, but it is good for sparse computation.
-    // TODO: find a fast and stable way for sparse data.
-
-    val G = computeGramianMatrix().asBreeze
-
-    var i = 0
-    var j = 0
-    val m1 = m - 1.0
-    var alpha = 0.0
-    while (i < n) {
-      alpha = m / m1 * mean(i)
-      j = i
-      while (j < n) {
-        val Gij = G(i, j) / m1 - alpha * mean(j)
-        G(i, j) = Gij
-        G(j, i) = Gij
-        j += 1
-      }
-      i += 1
+    if (rows.first().isInstanceOf[DenseVector]) {
+      computeDenseVectorCovariance(mean, n, m)
+    } else {
+      computeSparseVectorCovariance(mean, n, m)
     }
-
-    Matrices.fromBreeze(G)
   }
 
   /**
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
index 683ceffeaed0..2e177edf2a5c 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
@@ -28,7 +28,6 @@
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.ml.linalg.Vector;
 import org.apache.spark.ml.linalg.Vectors;
-import org.apache.spark.mllib.linalg.DenseVector;
 import org.apache.spark.mllib.linalg.Matrix;
 import org.apache.spark.mllib.linalg.distributed.RowMatrix;
 import org.apache.spark.sql.Dataset;
@@ -67,7 +66,7 @@ public void testPCA() {
     JavaRDD<Vector> dataRDD = jsc.parallelize(points, 2);
 
     RowMatrix mat = new RowMatrix(dataRDD.map(
-        (Vector vector) -> (org.apache.spark.mllib.linalg.Vector) new DenseVector(vector.toArray())
+        (Vector vector) -> org.apache.spark.mllib.linalg.Vectors.fromML(vector)
     ).rdd());
 
     Matrix pc = mat.computePrincipalComponents(3);
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index 7c9e14f8cee7..a4ca4f0a80fa 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -266,6 +266,20 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
     }
   }
 
+  test("dense vector covariance accuracy (SPARK-26158)") {
+    val denseData = Seq(
+      Vectors.dense(100000.000004, 199999.999999),
+      Vectors.dense(100000.000012, 200000.000002),
+      Vectors.dense(99999.9999931, 200000.000003),
+      Vectors.dense(99999.9999977, 200000.000001)
+    )
+    val denseMat = new RowMatrix(sc.parallelize(denseData, 2))
+
+    val result = denseMat.computeCovariance()
+    val expected = breeze.linalg.cov(denseMat.toBreeze())
+    assert(closeToZero(abs(expected) - abs(result.asBreeze.asInstanceOf[BDM[Double]])))
+  }
+
   test("compute covariance") {
     for (mat <- Seq(denseMat, sparseMat)) {
       val result = mat.computeCovariance()

From cb368f2c2964797d7313d3a4151e2352ff7847a9 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <xyliyuanjian@gmail.com>
Date: Thu, 29 Nov 2018 12:09:30 -0800
Subject: [PATCH 0023/1072] [SPARK-26142] followup: Move sql shuffle read
 metrics relatives to SQLShuffleMetricsReporter

## What changes were proposed in this pull request?

Follow up for https://github.com/apache/spark/pull/23128, move sql read metrics relatives to `SQLShuffleMetricsReporter`, in order to put sql shuffle read metrics relatives closer and avoid possible problem about forgetting update SQLShuffleMetricsReporter while new metrics added by others.

## How was this patch tested?

Existing tests.

Closes #23175 from xuanyuanking/SPARK-26142-follow.

Authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../exchange/ShuffleExchangeExec.scala        |  4 +-
 .../apache/spark/sql/execution/limit.scala    |  6 +--
 .../sql/execution/metric/SQLMetrics.scala     | 20 --------
 .../metric/SQLShuffleMetricsReporter.scala    | 50 +++++++++++++++----
 .../execution/UnsafeRowSerializerSuite.scala  |  4 +-
 5 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 8938d93da90e..c9ca395bceaa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, Uns
 import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.execution.metric.{SQLMetrics, SQLShuffleMetricsReporter}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.MutablePair
@@ -49,7 +49,7 @@ case class ShuffleExchangeExec(
 
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size")
-  ) ++ SQLMetrics.getShuffleReadMetrics(sparkContext)
+  ) ++ SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
 
   override def nodeName: String = {
     val extraInfo = coordinator match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index ea845da8438f..e9ab7cd138d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGe
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
-import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.execution.metric.SQLShuffleMetricsReporter
 
 /**
  * Take the first `limit` elements and collect them to a single partition.
@@ -38,7 +38,7 @@ case class CollectLimitExec(limit: Int, child: SparkPlan) extends UnaryExecNode
   override def outputPartitioning: Partitioning = SinglePartition
   override def executeCollect(): Array[InternalRow] = child.executeTake(limit)
   private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
-  override lazy val metrics = SQLMetrics.getShuffleReadMetrics(sparkContext)
+  override lazy val metrics = SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
   protected override def doExecute(): RDD[InternalRow] = {
     val locallyLimited = child.execute().mapPartitionsInternal(_.take(limit))
     val shuffled = new ShuffledRowRDD(
@@ -154,7 +154,7 @@ case class TakeOrderedAndProjectExec(
 
   private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
 
-  override lazy val metrics = SQLMetrics.getShuffleReadMetrics(sparkContext)
+  override lazy val metrics = SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
 
   protected override def doExecute(): RDD[InternalRow] = {
     val ord = new LazilyGeneratedOrdering(sortOrder, child.output)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index 0b5ee3a5e057..cbf707f4a9cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -82,14 +82,6 @@ object SQLMetrics {
 
   private val baseForAvgMetric: Int = 10
 
-  val REMOTE_BLOCKS_FETCHED = "remoteBlocksFetched"
-  val LOCAL_BLOCKS_FETCHED = "localBlocksFetched"
-  val REMOTE_BYTES_READ = "remoteBytesRead"
-  val REMOTE_BYTES_READ_TO_DISK = "remoteBytesReadToDisk"
-  val LOCAL_BYTES_READ = "localBytesRead"
-  val FETCH_WAIT_TIME = "fetchWaitTime"
-  val RECORDS_READ = "recordsRead"
-
   /**
    * Converts a double value to long value by multiplying a base integer, so we can store it in
    * `SQLMetrics`. It only works for average metrics. When showing the metrics on UI, we restore
@@ -202,16 +194,4 @@ object SQLMetrics {
         SparkListenerDriverAccumUpdates(executionId.toLong, metrics.map(m => m.id -> m.value)))
     }
   }
-
-  /**
-   * Create all shuffle read relative metrics and return the Map.
-   */
-  def getShuffleReadMetrics(sc: SparkContext): Map[String, SQLMetric] = Map(
-    REMOTE_BLOCKS_FETCHED -> createMetric(sc, "remote blocks fetched"),
-    LOCAL_BLOCKS_FETCHED -> createMetric(sc, "local blocks fetched"),
-    REMOTE_BYTES_READ -> createSizeMetric(sc, "remote bytes read"),
-    REMOTE_BYTES_READ_TO_DISK -> createSizeMetric(sc, "remote bytes read to disk"),
-    LOCAL_BYTES_READ -> createSizeMetric(sc, "local bytes read"),
-    FETCH_WAIT_TIME -> createTimingMetric(sc, "fetch wait time"),
-    RECORDS_READ -> createMetric(sc, "records read"))
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
index 542141ea4b4e..780f0d762229 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
@@ -17,24 +17,32 @@
 
 package org.apache.spark.sql.execution.metric
 
+import org.apache.spark.SparkContext
 import org.apache.spark.executor.TempShuffleReadMetrics
 
 /**
  * A shuffle metrics reporter for SQL exchange operators.
  * @param tempMetrics [[TempShuffleReadMetrics]] created in TaskContext.
  * @param metrics All metrics in current SparkPlan. This param should not empty and
- *   contains all shuffle metrics defined in [[SQLMetrics.getShuffleReadMetrics]].
+ *   contains all shuffle metrics defined in createShuffleReadMetrics.
  */
 private[spark] class SQLShuffleMetricsReporter(
-  tempMetrics: TempShuffleReadMetrics,
-  metrics: Map[String, SQLMetric]) extends TempShuffleReadMetrics {
-  private[this] val _remoteBlocksFetched = metrics(SQLMetrics.REMOTE_BLOCKS_FETCHED)
-  private[this] val _localBlocksFetched = metrics(SQLMetrics.LOCAL_BLOCKS_FETCHED)
-  private[this] val _remoteBytesRead = metrics(SQLMetrics.REMOTE_BYTES_READ)
-  private[this] val _remoteBytesReadToDisk = metrics(SQLMetrics.REMOTE_BYTES_READ_TO_DISK)
-  private[this] val _localBytesRead = metrics(SQLMetrics.LOCAL_BYTES_READ)
-  private[this] val _fetchWaitTime = metrics(SQLMetrics.FETCH_WAIT_TIME)
-  private[this] val _recordsRead = metrics(SQLMetrics.RECORDS_READ)
+    tempMetrics: TempShuffleReadMetrics,
+    metrics: Map[String, SQLMetric]) extends TempShuffleReadMetrics {
+  private[this] val _remoteBlocksFetched =
+    metrics(SQLShuffleMetricsReporter.REMOTE_BLOCKS_FETCHED)
+  private[this] val _localBlocksFetched =
+    metrics(SQLShuffleMetricsReporter.LOCAL_BLOCKS_FETCHED)
+  private[this] val _remoteBytesRead =
+    metrics(SQLShuffleMetricsReporter.REMOTE_BYTES_READ)
+  private[this] val _remoteBytesReadToDisk =
+    metrics(SQLShuffleMetricsReporter.REMOTE_BYTES_READ_TO_DISK)
+  private[this] val _localBytesRead =
+    metrics(SQLShuffleMetricsReporter.LOCAL_BYTES_READ)
+  private[this] val _fetchWaitTime =
+    metrics(SQLShuffleMetricsReporter.FETCH_WAIT_TIME)
+  private[this] val _recordsRead =
+    metrics(SQLShuffleMetricsReporter.RECORDS_READ)
 
   override def incRemoteBlocksFetched(v: Long): Unit = {
     _remoteBlocksFetched.add(v)
@@ -65,3 +73,25 @@ private[spark] class SQLShuffleMetricsReporter(
     tempMetrics.incRecordsRead(v)
   }
 }
+
+private[spark] object SQLShuffleMetricsReporter {
+  val REMOTE_BLOCKS_FETCHED = "remoteBlocksFetched"
+  val LOCAL_BLOCKS_FETCHED = "localBlocksFetched"
+  val REMOTE_BYTES_READ = "remoteBytesRead"
+  val REMOTE_BYTES_READ_TO_DISK = "remoteBytesReadToDisk"
+  val LOCAL_BYTES_READ = "localBytesRead"
+  val FETCH_WAIT_TIME = "fetchWaitTime"
+  val RECORDS_READ = "recordsRead"
+
+  /**
+   * Create all shuffle read relative metrics and return the Map.
+   */
+  def createShuffleReadMetrics(sc: SparkContext): Map[String, SQLMetric] = Map(
+    REMOTE_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "remote blocks fetched"),
+    LOCAL_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "local blocks fetched"),
+    REMOTE_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "remote bytes read"),
+    REMOTE_BYTES_READ_TO_DISK -> SQLMetrics.createSizeMetric(sc, "remote bytes read to disk"),
+    LOCAL_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "local bytes read"),
+    FETCH_WAIT_TIME -> SQLMetrics.createTimingMetric(sc, "fetch wait time"),
+    RECORDS_READ -> SQLMetrics.createMetric(sc, "records read"))
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index 96b3aa5ee75b..1ad5713ab8ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{LocalSparkSession, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
-import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.execution.metric.SQLShuffleMetricsReporter
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.ShuffleBlockId
 import org.apache.spark.util.collection.ExternalSorter
@@ -140,7 +140,7 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
         new UnsafeRowSerializer(2))
     val shuffled = new ShuffledRowRDD(
       dependency,
-      SQLMetrics.getShuffleReadMetrics(spark.sparkContext))
+      SQLShuffleMetricsReporter.createShuffleReadMetrics(spark.sparkContext))
     shuffled.count()
   }
 }

From 59741887e272be92ebd6e61783f99f7d8fc05456 Mon Sep 17 00:00:00 2001
From: Wing Yew Poon <wypoon@cloudera.com>
Date: Thu, 29 Nov 2018 14:56:34 -0600
Subject: [PATCH 0024/1072] [SPARK-25905][CORE] When getting a remote block,
 avoid forcing a conversion to a ChunkedByteBuffer

## What changes were proposed in this pull request?

In `BlockManager`, `getRemoteValues` gets a `ChunkedByteBuffer` (by calling `getRemoteBytes`) and creates an `InputStream` from it. `getRemoteBytes`, in turn, gets a `ManagedBuffer` and converts it to a `ChunkedByteBuffer`.
Instead, expose a `getRemoteManagedBuffer` method so `getRemoteValues` can just get this `ManagedBuffer` and use its `InputStream`.
When reading a remote cache block from disk, this reduces heap memory usage significantly.
Retain `getRemoteBytes` for other callers.

## How was this patch tested?

Imran Rashid wrote an application (https://github.com/squito/spark_2gb_test/blob/master/src/main/scala/com/cloudera/sparktest/LargeBlocks.scala), that among other things, tests reading remote cache blocks. I ran this application, using 2500MB blocks, to test reading a cache block on disk. Without this change, with `--executor-memory 5g`, the test fails with `java.lang.OutOfMemoryError: Java heap space`. With the change, the test passes with `--executor-memory 2g`.
I also ran the unit tests in core. In particular, `DistributedSuite` has a set of tests that exercise the `getRemoteValues` code path. `BlockManagerSuite` has several tests that call `getRemoteBytes`; I left these unchanged, so `getRemoteBytes` still gets exercised.

Closes #23058 from wypoon/SPARK-25905.

Authored-by: Wing Yew Poon <wypoon@cloudera.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../apache/spark/storage/BlockManager.scala   | 43 ++++++++++++-------
 .../spark/util/io/ChunkedByteBuffer.scala     |  2 -
 .../org/apache/spark/DistributedSuite.scala   |  2 +-
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 1b617297e0a3..1dfbc6effb34 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -692,9 +692,9 @@ private[spark] class BlockManager(
    */
   private def getRemoteValues[T: ClassTag](blockId: BlockId): Option[BlockResult] = {
     val ct = implicitly[ClassTag[T]]
-    getRemoteBytes(blockId).map { data =>
+    getRemoteManagedBuffer(blockId).map { data =>
       val values =
-        serializerManager.dataDeserializeStream(blockId, data.toInputStream(dispose = true))(ct)
+        serializerManager.dataDeserializeStream(blockId, data.createInputStream())(ct)
       new BlockResult(values, DataReadMethod.Network, data.size)
     }
   }
@@ -717,13 +717,9 @@ private[spark] class BlockManager(
   }
 
   /**
-   * Get block from remote block managers as serialized bytes.
+   * Get block from remote block managers as a ManagedBuffer.
    */
-  def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = {
-    // TODO SPARK-25905 if we change this method to return the ManagedBuffer, then getRemoteValues
-    // could just use the inputStream on the temp file, rather than reading the file into memory.
-    // Until then, replication can cause the process to use too much memory and get killed
-    // even though we've read the data to disk.
+  private def getRemoteManagedBuffer(blockId: BlockId): Option[ManagedBuffer] = {
     logDebug(s"Getting remote block $blockId")
     require(blockId != null, "BlockId is null")
     var runningFailureCount = 0
@@ -788,14 +784,13 @@ private[spark] class BlockManager(
       }
 
       if (data != null) {
-        // SPARK-24307 undocumented "escape-hatch" in case there are any issues in converting to
-        // ChunkedByteBuffer, to go back to old code-path.  Can be removed post Spark 2.4 if
-        // new path is stable.
-        if (remoteReadNioBufferConversion) {
-          return Some(new ChunkedByteBuffer(data.nioByteBuffer()))
-        } else {
-          return Some(ChunkedByteBuffer.fromManagedBuffer(data))
-        }
+        // If the ManagedBuffer is a BlockManagerManagedBuffer, the disposal of the
+        // byte buffers backing it may need to be handled after reading the bytes.
+        // In this case, since we just fetched the bytes remotely, we do not have
+        // a BlockManagerManagedBuffer. The assert here is to ensure that this holds
+        // true (or the disposal is handled).
+        assert(!data.isInstanceOf[BlockManagerManagedBuffer])
+        return Some(data)
       }
       logDebug(s"The value of block $blockId is null")
     }
@@ -803,6 +798,22 @@ private[spark] class BlockManager(
     None
   }
 
+  /**
+   * Get block from remote block managers as serialized bytes.
+   */
+  def getRemoteBytes(blockId: BlockId): Option[ChunkedByteBuffer] = {
+    getRemoteManagedBuffer(blockId).map { data =>
+      // SPARK-24307 undocumented "escape-hatch" in case there are any issues in converting to
+      // ChunkedByteBuffer, to go back to old code-path.  Can be removed post Spark 2.4 if
+      // new path is stable.
+      if (remoteReadNioBufferConversion) {
+        new ChunkedByteBuffer(data.nioByteBuffer())
+      } else {
+        ChunkedByteBuffer.fromManagedBuffer(data)
+      }
+    }
+  }
+
   /**
    * Get a block from the block manager (either local or remote).
    *
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
index 128d6ff8cd74..2c3730de08b5 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -172,8 +172,6 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
 
 private[spark] object ChunkedByteBuffer {
 
-
-  // TODO SPARK-25905 eliminate this method if we switch BlockManager to getting InputStreams
   def fromManagedBuffer(data: ManagedBuffer): ChunkedByteBuffer = {
     data match {
       case f: FileSegmentManagedBuffer =>
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 629a323042ff..4083b20c2359 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -195,7 +195,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
         new ChunkedByteBuffer(bytes.nioByteBuffer()).toInputStream())(data.elementClassTag).toList
       assert(deserialized === (1 to 100).toList)
     }
-    // This will exercise the getRemoteBytes / getRemoteValues code paths:
+    // This will exercise the getRemoteValues code path:
     assert(blockIds.flatMap(id => blockManager.get[Int](id).get.data).toSet === (1 to 1000).toSet)
   }
 

From f97326bcdba532eabf25d4899b13709e9af2bfea Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Fri, 30 Nov 2018 08:27:55 +0800
Subject: [PATCH 0025/1072] [SPARK-25977][SQL] Parsing decimals from CSV using
 locale

## What changes were proposed in this pull request?

In the PR, I propose using of the locale option to parse decimals from CSV input. After the changes, `UnivocityParser` converts input string to `BigDecimal` and to Spark's Decimal by using `java.text.DecimalFormat`.

## How was this patch tested?

Added a test for the `en-US`, `ko-KR`, `ru-RU`, `de-DE` locales.

Closes #22979 from MaxGekk/decimal-parsing-locale.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: hyukjinkwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/csv/CSVExprUtils.scala |   4 +
 .../sql/catalyst/csv/CSVInferSchema.scala     |  72 ++++-----
 .../sql/catalyst/csv/UnivocityParser.scala    |   8 +-
 .../catalyst/expressions/csvExpressions.scala |   5 +-
 .../catalyst/csv/CSVInferSchemaSuite.scala    | 147 ++++++++++++------
 .../catalyst/csv/UnivocityParserSuite.scala   |  22 ++-
 .../datasources/csv/CSVDataSource.scala       |   4 +-
 7 files changed, 168 insertions(+), 94 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
index bbe27831f01d..6c982a1de9a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql.catalyst.csv
 
+import java.math.BigDecimal
+import java.text.{DecimalFormat, DecimalFormatSymbols, ParsePosition}
+import java.util.Locale
+
 object CSVExprUtils {
   /**
    * Filter ignorable rows for CSV iterator (lines empty and starting with `comment`).
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 799e9994451b..94cb4b114e6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -17,16 +17,19 @@
 
 package org.apache.spark.sql.catalyst.csv
 
-import java.math.BigDecimal
-
 import scala.util.control.Exception.allCatch
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
+import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
 
-object CSVInferSchema {
+class CSVInferSchema(options: CSVOptions) extends Serializable {
+
+  private val decimalParser = {
+    ExprUtils.getDecimalParser(options.locale)
+  }
 
   /**
    * Similar to the JSON schema inference
@@ -36,14 +39,13 @@ object CSVInferSchema {
    */
   def infer(
       tokenRDD: RDD[Array[String]],
-      header: Array[String],
-      options: CSVOptions): StructType = {
+      header: Array[String]): StructType = {
     val fields = if (options.inferSchemaFlag) {
       val startType: Array[DataType] = Array.fill[DataType](header.length)(NullType)
       val rootTypes: Array[DataType] =
-        tokenRDD.aggregate(startType)(inferRowType(options), mergeRowTypes)
+        tokenRDD.aggregate(startType)(inferRowType, mergeRowTypes)
 
-      toStructFields(rootTypes, header, options)
+      toStructFields(rootTypes, header)
     } else {
       // By default fields are assumed to be StringType
       header.map(fieldName => StructField(fieldName, StringType, nullable = true))
@@ -54,8 +56,7 @@ object CSVInferSchema {
 
   def toStructFields(
       fieldTypes: Array[DataType],
-      header: Array[String],
-      options: CSVOptions): Array[StructField] = {
+      header: Array[String]): Array[StructField] = {
     header.zip(fieldTypes).map { case (thisHeader, rootType) =>
       val dType = rootType match {
         case _: NullType => StringType
@@ -65,11 +66,10 @@ object CSVInferSchema {
     }
   }
 
-  def inferRowType(options: CSVOptions)
-      (rowSoFar: Array[DataType], next: Array[String]): Array[DataType] = {
+  def inferRowType(rowSoFar: Array[DataType], next: Array[String]): Array[DataType] = {
     var i = 0
     while (i < math.min(rowSoFar.length, next.length)) {  // May have columns on right missing.
-      rowSoFar(i) = inferField(rowSoFar(i), next(i), options)
+      rowSoFar(i) = inferField(rowSoFar(i), next(i))
       i+=1
     }
     rowSoFar
@@ -85,20 +85,20 @@ object CSVInferSchema {
    * Infer type of string field. Given known type Double, and a string "1", there is no
    * point checking if it is an Int, as the final type must be Double or higher.
    */
-  def inferField(typeSoFar: DataType, field: String, options: CSVOptions): DataType = {
+  def inferField(typeSoFar: DataType, field: String): DataType = {
     if (field == null || field.isEmpty || field == options.nullValue) {
       typeSoFar
     } else {
       typeSoFar match {
-        case NullType => tryParseInteger(field, options)
-        case IntegerType => tryParseInteger(field, options)
-        case LongType => tryParseLong(field, options)
+        case NullType => tryParseInteger(field)
+        case IntegerType => tryParseInteger(field)
+        case LongType => tryParseLong(field)
         case _: DecimalType =>
           // DecimalTypes have different precisions and scales, so we try to find the common type.
-          compatibleType(typeSoFar, tryParseDecimal(field, options)).getOrElse(StringType)
-        case DoubleType => tryParseDouble(field, options)
-        case TimestampType => tryParseTimestamp(field, options)
-        case BooleanType => tryParseBoolean(field, options)
+          compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
+        case DoubleType => tryParseDouble(field)
+        case TimestampType => tryParseTimestamp(field)
+        case BooleanType => tryParseBoolean(field)
         case StringType => StringType
         case other: DataType =>
           throw new UnsupportedOperationException(s"Unexpected data type $other")
@@ -106,30 +106,30 @@ object CSVInferSchema {
     }
   }
 
-  private def isInfOrNan(field: String, options: CSVOptions): Boolean = {
+  private def isInfOrNan(field: String): Boolean = {
     field == options.nanValue || field == options.negativeInf || field == options.positiveInf
   }
 
-  private def tryParseInteger(field: String, options: CSVOptions): DataType = {
+  private def tryParseInteger(field: String): DataType = {
     if ((allCatch opt field.toInt).isDefined) {
       IntegerType
     } else {
-      tryParseLong(field, options)
+      tryParseLong(field)
     }
   }
 
-  private def tryParseLong(field: String, options: CSVOptions): DataType = {
+  private def tryParseLong(field: String): DataType = {
     if ((allCatch opt field.toLong).isDefined) {
       LongType
     } else {
-      tryParseDecimal(field, options)
+      tryParseDecimal(field)
     }
   }
 
-  private def tryParseDecimal(field: String, options: CSVOptions): DataType = {
+  private def tryParseDecimal(field: String): DataType = {
     val decimalTry = allCatch opt {
-      // `BigDecimal` conversion can fail when the `field` is not a form of number.
-      val bigDecimal = new BigDecimal(field)
+      // The conversion can fail when the `field` is not a form of number.
+      val bigDecimal = decimalParser(field)
       // Because many other formats do not support decimal, it reduces the cases for
       // decimals by disallowing values having scale (eg. `1.1`).
       if (bigDecimal.scale <= 0) {
@@ -138,21 +138,21 @@ object CSVInferSchema {
         //   2. scale is bigger than precision.
         DecimalType(bigDecimal.precision, bigDecimal.scale)
       } else {
-        tryParseDouble(field, options)
+        tryParseDouble(field)
       }
     }
-    decimalTry.getOrElse(tryParseDouble(field, options))
+    decimalTry.getOrElse(tryParseDouble(field))
   }
 
-  private def tryParseDouble(field: String, options: CSVOptions): DataType = {
-    if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field, options)) {
+  private def tryParseDouble(field: String): DataType = {
+    if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field)) {
       DoubleType
     } else {
-      tryParseTimestamp(field, options)
+      tryParseTimestamp(field)
     }
   }
 
-  private def tryParseTimestamp(field: String, options: CSVOptions): DataType = {
+  private def tryParseTimestamp(field: String): DataType = {
     // This case infers a custom `dataFormat` is set.
     if ((allCatch opt options.timestampFormat.parse(field)).isDefined) {
       TimestampType
@@ -160,11 +160,11 @@ object CSVInferSchema {
       // We keep this for backwards compatibility.
       TimestampType
     } else {
-      tryParseBoolean(field, options)
+      tryParseBoolean(field)
     }
   }
 
-  private def tryParseBoolean(field: String, options: CSVOptions): DataType = {
+  private def tryParseBoolean(field: String): DataType = {
     if ((allCatch opt field.toBoolean).isDefined) {
       BooleanType
     } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index ed196935e357..85e129224c91 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.csv
 
 import java.io.InputStream
-import java.math.BigDecimal
 
 import scala.util.Try
 import scala.util.control.NonFatal
@@ -27,7 +26,7 @@ import com.univocity.parsers.csv.CsvParser
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericInternalRow}
 import org.apache.spark.sql.catalyst.util.{BadRecordException, DateTimeUtils, FailureSafeParser}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -104,6 +103,8 @@ class UnivocityParser(
     requiredSchema.map(f => makeConverter(f.name, f.dataType, f.nullable, options)).toArray
   }
 
+  private val decimalParser = ExprUtils.getDecimalParser(options.locale)
+
   /**
    * Create a converter which converts the string value to a value according to a desired type.
    * Currently, we do not support complex types (`ArrayType`, `MapType`, `StructType`).
@@ -149,8 +150,7 @@ class UnivocityParser(
 
     case dt: DecimalType => (d: String) =>
       nullSafeDatum(d, name, nullable, options) { datum =>
-        val value = new BigDecimal(datum.replaceAll(",", ""))
-        Decimal(value, dt.precision, dt.scale)
+        Decimal(decimalParser(datum), dt.precision, dt.scale)
       }
 
     case _: TimestampType => (d: String) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 1e4e1c663c90..83b0299bac44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -180,8 +180,9 @@ case class SchemaOfCsv(
 
     val header = row.zipWithIndex.map { case (_, index) => s"_c$index" }
     val startType: Array[DataType] = Array.fill[DataType](header.length)(NullType)
-    val fieldTypes = CSVInferSchema.inferRowType(parsedOptions)(startType, row)
-    val st = StructType(CSVInferSchema.toStructFields(fieldTypes, header, parsedOptions))
+    val inferSchema = new CSVInferSchema(parsedOptions)
+    val fieldTypes = inferSchema.inferRowType(startType, row)
+    val st = StructType(inferSchema.toStructFields(fieldTypes, header))
     UTF8String.fromString(st.catalogString)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index 651846d2ebcb..1a020e67a75b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -17,126 +17,175 @@
 
 package org.apache.spark.sql.catalyst.csv
 
+import java.text.{DecimalFormat, DecimalFormatSymbols}
+import java.util.Locale
+
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-class CSVInferSchemaSuite extends SparkFunSuite {
+class CSVInferSchemaSuite extends SparkFunSuite  with SQLHelper {
 
   test("String fields types are inferred correctly from null types") {
     val options = new CSVOptions(Map.empty[String, String], false, "GMT")
-    assert(CSVInferSchema.inferField(NullType, "", options) == NullType)
-    assert(CSVInferSchema.inferField(NullType, null, options) == NullType)
-    assert(CSVInferSchema.inferField(NullType, "100000000000", options) == LongType)
-    assert(CSVInferSchema.inferField(NullType, "60", options) == IntegerType)
-    assert(CSVInferSchema.inferField(NullType, "3.5", options) == DoubleType)
-    assert(CSVInferSchema.inferField(NullType, "test", options) == StringType)
-    assert(CSVInferSchema.inferField(NullType, "2015-08-20 15:57:00", options) == TimestampType)
-    assert(CSVInferSchema.inferField(NullType, "True", options) == BooleanType)
-    assert(CSVInferSchema.inferField(NullType, "FAlSE", options) == BooleanType)
+    val inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(NullType, "") == NullType)
+    assert(inferSchema.inferField(NullType, null) == NullType)
+    assert(inferSchema.inferField(NullType, "100000000000") == LongType)
+    assert(inferSchema.inferField(NullType, "60") == IntegerType)
+    assert(inferSchema.inferField(NullType, "3.5") == DoubleType)
+    assert(inferSchema.inferField(NullType, "test") == StringType)
+    assert(inferSchema.inferField(NullType, "2015-08-20 15:57:00") == TimestampType)
+    assert(inferSchema.inferField(NullType, "True") == BooleanType)
+    assert(inferSchema.inferField(NullType, "FAlSE") == BooleanType)
 
     val textValueOne = Long.MaxValue.toString + "0"
     val decimalValueOne = new java.math.BigDecimal(textValueOne)
     val expectedTypeOne = DecimalType(decimalValueOne.precision, decimalValueOne.scale)
-    assert(CSVInferSchema.inferField(NullType, textValueOne, options) == expectedTypeOne)
+    assert(inferSchema.inferField(NullType, textValueOne) == expectedTypeOne)
   }
 
   test("String fields types are inferred correctly from other types") {
     val options = new CSVOptions(Map.empty[String, String], false, "GMT")
-    assert(CSVInferSchema.inferField(LongType, "1.0", options) == DoubleType)
-    assert(CSVInferSchema.inferField(LongType, "test", options) == StringType)
-    assert(CSVInferSchema.inferField(IntegerType, "1.0", options) == DoubleType)
-    assert(CSVInferSchema.inferField(DoubleType, null, options) == DoubleType)
-    assert(CSVInferSchema.inferField(DoubleType, "test", options) == StringType)
-    assert(CSVInferSchema.inferField(LongType, "2015-08-20 14:57:00", options) == TimestampType)
-    assert(CSVInferSchema.inferField(DoubleType, "2015-08-20 15:57:00", options) == TimestampType)
-    assert(CSVInferSchema.inferField(LongType, "True", options) == BooleanType)
-    assert(CSVInferSchema.inferField(IntegerType, "FALSE", options) == BooleanType)
-    assert(CSVInferSchema.inferField(TimestampType, "FALSE", options) == BooleanType)
+    val inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(LongType, "1.0") == DoubleType)
+    assert(inferSchema.inferField(LongType, "test") == StringType)
+    assert(inferSchema.inferField(IntegerType, "1.0") == DoubleType)
+    assert(inferSchema.inferField(DoubleType, null) == DoubleType)
+    assert(inferSchema.inferField(DoubleType, "test") == StringType)
+    assert(inferSchema.inferField(LongType, "2015-08-20 14:57:00") == TimestampType)
+    assert(inferSchema.inferField(DoubleType, "2015-08-20 15:57:00") == TimestampType)
+    assert(inferSchema.inferField(LongType, "True") == BooleanType)
+    assert(inferSchema.inferField(IntegerType, "FALSE") == BooleanType)
+    assert(inferSchema.inferField(TimestampType, "FALSE") == BooleanType)
 
     val textValueOne = Long.MaxValue.toString + "0"
     val decimalValueOne = new java.math.BigDecimal(textValueOne)
     val expectedTypeOne = DecimalType(decimalValueOne.precision, decimalValueOne.scale)
-    assert(CSVInferSchema.inferField(IntegerType, textValueOne, options) == expectedTypeOne)
+    assert(inferSchema.inferField(IntegerType, textValueOne) == expectedTypeOne)
   }
 
   test("Timestamp field types are inferred correctly via custom data format") {
     var options = new CSVOptions(Map("timestampFormat" -> "yyyy-mm"), false, "GMT")
-    assert(CSVInferSchema.inferField(TimestampType, "2015-08", options) == TimestampType)
+    var inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(TimestampType, "2015-08") == TimestampType)
+
     options = new CSVOptions(Map("timestampFormat" -> "yyyy"), false, "GMT")
-    assert(CSVInferSchema.inferField(TimestampType, "2015", options) == TimestampType)
+    inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(TimestampType, "2015") == TimestampType)
   }
 
   test("Timestamp field types are inferred correctly from other types") {
     val options = new CSVOptions(Map.empty[String, String], false, "GMT")
-    assert(CSVInferSchema.inferField(IntegerType, "2015-08-20 14", options) == StringType)
-    assert(CSVInferSchema.inferField(DoubleType, "2015-08-20 14:10", options) == StringType)
-    assert(CSVInferSchema.inferField(LongType, "2015-08 14:49:00", options) == StringType)
+    val inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(IntegerType, "2015-08-20 14") == StringType)
+    assert(inferSchema.inferField(DoubleType, "2015-08-20 14:10") == StringType)
+    assert(inferSchema.inferField(LongType, "2015-08 14:49:00") == StringType)
   }
 
   test("Boolean fields types are inferred correctly from other types") {
     val options = new CSVOptions(Map.empty[String, String], false, "GMT")
-    assert(CSVInferSchema.inferField(LongType, "Fale", options) == StringType)
-    assert(CSVInferSchema.inferField(DoubleType, "TRUEe", options) == StringType)
+    val inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(LongType, "Fale") == StringType)
+    assert(inferSchema.inferField(DoubleType, "TRUEe") == StringType)
   }
 
   test("Type arrays are merged to highest common type") {
+    val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val inferSchema = new CSVInferSchema(options)
+
     assert(
-      CSVInferSchema.mergeRowTypes(Array(StringType),
+      inferSchema.mergeRowTypes(Array(StringType),
         Array(DoubleType)).deep == Array(StringType).deep)
     assert(
-      CSVInferSchema.mergeRowTypes(Array(IntegerType),
+      inferSchema.mergeRowTypes(Array(IntegerType),
         Array(LongType)).deep == Array(LongType).deep)
     assert(
-      CSVInferSchema.mergeRowTypes(Array(DoubleType),
+      inferSchema.mergeRowTypes(Array(DoubleType),
         Array(LongType)).deep == Array(DoubleType).deep)
   }
 
   test("Null fields are handled properly when a nullValue is specified") {
     var options = new CSVOptions(Map("nullValue" -> "null"), false, "GMT")
-    assert(CSVInferSchema.inferField(NullType, "null", options) == NullType)
-    assert(CSVInferSchema.inferField(StringType, "null", options) == StringType)
-    assert(CSVInferSchema.inferField(LongType, "null", options) == LongType)
+    var inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(NullType, "null") == NullType)
+    assert(inferSchema.inferField(StringType, "null") == StringType)
+    assert(inferSchema.inferField(LongType, "null") == LongType)
 
     options = new CSVOptions(Map("nullValue" -> "\\N"), false, "GMT")
-    assert(CSVInferSchema.inferField(IntegerType, "\\N", options) == IntegerType)
-    assert(CSVInferSchema.inferField(DoubleType, "\\N", options) == DoubleType)
-    assert(CSVInferSchema.inferField(TimestampType, "\\N", options) == TimestampType)
-    assert(CSVInferSchema.inferField(BooleanType, "\\N", options) == BooleanType)
-    assert(CSVInferSchema.inferField(DecimalType(1, 1), "\\N", options) == DecimalType(1, 1))
+    inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(IntegerType, "\\N") == IntegerType)
+    assert(inferSchema.inferField(DoubleType, "\\N") == DoubleType)
+    assert(inferSchema.inferField(TimestampType, "\\N") == TimestampType)
+    assert(inferSchema.inferField(BooleanType, "\\N") == BooleanType)
+    assert(inferSchema.inferField(DecimalType(1, 1), "\\N") == DecimalType(1, 1))
   }
 
   test("Merging Nulltypes should yield Nulltype.") {
-    val mergedNullTypes = CSVInferSchema.mergeRowTypes(Array(NullType), Array(NullType))
+    val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val inferSchema = new CSVInferSchema(options)
+
+    val mergedNullTypes = inferSchema.mergeRowTypes(Array(NullType), Array(NullType))
     assert(mergedNullTypes.deep == Array(NullType).deep)
   }
 
   test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
     val options = new CSVOptions(Map("TiMeStampFormat" -> "yyyy-mm"), false, "GMT")
-    assert(CSVInferSchema.inferField(TimestampType, "2015-08", options) == TimestampType)
+    val inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(TimestampType, "2015-08") == TimestampType)
   }
 
   test("SPARK-18877: `inferField` on DecimalType should find a common type with `typeSoFar`") {
     val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val inferSchema = new CSVInferSchema(options)
 
     // 9.03E+12 is Decimal(3, -10) and 1.19E+11 is Decimal(3, -9).
-    assert(CSVInferSchema.inferField(DecimalType(3, -10), "1.19E+11", options) ==
+    assert(inferSchema.inferField(DecimalType(3, -10), "1.19E11") ==
       DecimalType(4, -9))
 
     // BigDecimal("12345678901234567890.01234567890123456789") is precision 40 and scale 20.
     val value = "12345678901234567890.01234567890123456789"
-    assert(CSVInferSchema.inferField(DecimalType(3, -10), value, options) == DoubleType)
+    assert(inferSchema.inferField(DecimalType(3, -10), value) == DoubleType)
 
     // Seq(s"${Long.MaxValue}1", "2015-12-01 00:00:00") should be StringType
-    assert(CSVInferSchema.inferField(NullType, s"${Long.MaxValue}1", options) == DecimalType(20, 0))
-    assert(CSVInferSchema.inferField(DecimalType(20, 0), "2015-12-01 00:00:00", options)
+    assert(inferSchema.inferField(NullType, s"${Long.MaxValue}1") == DecimalType(20, 0))
+    assert(inferSchema.inferField(DecimalType(20, 0), "2015-12-01 00:00:00")
       == StringType)
   }
 
   test("DoubleType should be inferred when user defined nan/inf are provided") {
     val options = new CSVOptions(Map("nanValue" -> "nan", "negativeInf" -> "-inf",
       "positiveInf" -> "inf"), false, "GMT")
-    assert(CSVInferSchema.inferField(NullType, "nan", options) == DoubleType)
-    assert(CSVInferSchema.inferField(NullType, "inf", options) == DoubleType)
-    assert(CSVInferSchema.inferField(NullType, "-inf", options) == DoubleType)
+    val inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(NullType, "nan") == DoubleType)
+    assert(inferSchema.inferField(NullType, "inf") == DoubleType)
+    assert(inferSchema.inferField(NullType, "-inf") == DoubleType)
+  }
+
+  test("inferring the decimal type using locale") {
+    def checkDecimalInfer(langTag: String, expectedType: DataType): Unit = {
+      val options = new CSVOptions(
+        parameters = Map("locale" -> langTag, "inferSchema" -> "true", "sep" -> "|"),
+        columnPruning = false,
+        defaultTimeZoneId = "GMT")
+      val inferSchema = new CSVInferSchema(options)
+
+      val df = new DecimalFormat("", new DecimalFormatSymbols(Locale.forLanguageTag(langTag)))
+      val input = df.format(Decimal(1000001).toBigDecimal)
+
+      assert(inferSchema.inferField(NullType, input) == expectedType)
+    }
+
+    Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index e4e7dc2e8c0e..7212402ef5cf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -18,13 +18,17 @@
 package org.apache.spark.sql.catalyst.csv
 
 import java.math.BigDecimal
+import java.text.{DecimalFormat, DecimalFormatSymbols}
+import java.util.Locale
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
-class UnivocityParserSuite extends SparkFunSuite {
+class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
   private val parser = new UnivocityParser(
     StructType(Seq.empty),
     new CSVOptions(Map.empty[String, String], false, "GMT"))
@@ -196,4 +200,20 @@ class UnivocityParserSuite extends SparkFunSuite {
     assert(doubleVal2 == Double.PositiveInfinity)
   }
 
+  test("parse decimals using locale") {
+    def checkDecimalParsing(langTag: String): Unit = {
+      val decimalVal = new BigDecimal("1000.001")
+      val decimalType = new DecimalType(10, 5)
+      val expected = Decimal(decimalVal, decimalType.precision, decimalType.scale)
+      val df = new DecimalFormat("", new DecimalFormatSymbols(Locale.forLanguageTag(langTag)))
+      val input = df.format(expected.toBigDecimal)
+
+      val options = new CSVOptions(Map("locale" -> langTag), false, "GMT")
+      val parser = new UnivocityParser(new StructType().add("d", decimalType), options)
+
+      assert(parser.makeConverter("_1", decimalType, options = options).apply(input) === expected)
+    }
+
+    Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalParsing)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index b35b8851918b..b46dfb94c133 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -135,7 +135,7 @@ object TextInputCSVDataSource extends CSVDataSource {
           val parser = new CsvParser(parsedOptions.asParserSettings)
           linesWithoutHeader.map(parser.parseLine)
         }
-        CSVInferSchema.infer(tokenRDD, header, parsedOptions)
+        new CSVInferSchema(parsedOptions).infer(tokenRDD, header)
       case _ =>
         // If the first line could not be read, just return the empty schema.
         StructType(Nil)
@@ -208,7 +208,7 @@ object MultiLineCSVDataSource extends CSVDataSource {
             encoding = parsedOptions.charset)
         }
         val sampled = CSVUtils.sample(tokenRDD, parsedOptions)
-        CSVInferSchema.infer(sampled, header, parsedOptions)
+        new CSVInferSchema(parsedOptions).infer(sampled, header)
       case None =>
         // If the first row could not be read, just return the empty schema.
         StructType(Nil)

From 0166c7373eee2654c49c210927e4e290d103f24f Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Thu, 29 Nov 2018 18:00:47 -0800
Subject: [PATCH 0026/1072] [SPARK-25501][SS] Add kafka delegation token
 support.

## What changes were proposed in this pull request?

It adds kafka delegation token support for structured streaming. Please see the relevant [SPIP](https://docs.google.com/document/d/1ouRayzaJf_N5VQtGhVq9FURXVmRpXzEEWYHob0ne3NY/edit?usp=sharing)

What this PR contains:
* Configuration parameters for the feature
* Delegation token fetching from broker
* Usage of token through dynamic JAAS configuration
* Minor refactoring in the existing code

What this PR doesn't contain:
* Documentation changes because design can change

## How was this patch tested?

Existing tests + added small amount of additional unit tests.

Because it's an external service integration mainly tested on cluster.
* 4 node cluster
* Kafka broker version 1.1.0
* Topic with 4 partitions
* security.protocol = SASL_SSL
* sasl.mechanism = SCRAM-SHA-256

An example of obtaining a token:
```
18/10/01 01:07:49 INFO kafka010.TokenUtil: TOKENID         HMAC                           OWNER           RENEWERS                  ISSUEDATE       EXPIRYDATE      MAXDATE
18/10/01 01:07:49 INFO kafka010.TokenUtil: D1-v__Q5T_uHx55rW16Jwg [hidden] User:user    []                        2018-10-01T01:07 2018-10-02T01:07 2018-10-08T01:07
18/10/01 01:07:49 INFO security.KafkaDelegationTokenProvider: Get token from Kafka: Kind: KAFKA_DELEGATION_TOKEN, Service: kafka.server.delegation.token, Ident: 44 31 2d 76 5f 5f 51 35 54 5f 75 48 78 35 35 72 57 31 36 4a 77 67
```

An example token usage:
```
18/10/01 01:08:07 INFO kafka010.KafkaSecurityHelper: Scram JAAS params: org.apache.kafka.common.security.scram.ScramLoginModule required tokenauth=true serviceName="kafka" username="D1-v__Q5T_uHx55rW16Jwg" password="[hidden]";
18/10/01 01:08:07 INFO kafka010.KafkaSourceProvider: Delegation token detected, using it for login.
```

Closes #22598 from gaborgsomogyi/SPARK-25501.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 core/pom.xml                                  |  13 +
 .../HadoopDelegationTokenManager.scala        |   3 +-
 .../KafkaDelegationTokenProvider.scala        |  61 +++++
 .../deploy/security/KafkaTokenUtil.scala      | 202 +++++++++++++++
 .../apache/spark/internal/config/Kafka.scala  |  82 ++++++
 .../HadoopDelegationTokenManagerSuite.scala   |   5 +-
 .../deploy/security/KafkaTokenUtilSuite.scala | 239 ++++++++++++++++++
 external/kafka-0-10-sql/pom.xml               |   2 -
 .../sql/kafka010/KafkaSecurityHelper.scala    |  56 ++++
 .../sql/kafka010/KafkaSourceProvider.scala    |  82 +++---
 .../kafka010/KafkaStreamingWriteSupport.scala |  22 +-
 .../kafka010/KafkaContinuousSinkSuite.scala   |   4 +-
 .../kafka010/KafkaSecurityHelperSuite.scala   | 100 ++++++++
 external/kafka-0-10/pom.xml                   |   2 -
 pom.xml                                       |   2 +
 15 files changed, 825 insertions(+), 50 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
 create mode 100644 core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
 create mode 100644 core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala

diff --git a/core/pom.xml b/core/pom.xml
index 36d93212ba9f..49b1a54e3259 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -408,6 +408,19 @@
       <scope>provided</scope>
     </dependency>
 
+    <!--
+     The following kafka dependency used to obtain delegation token.
+     In order to prevent spark-core from depending on kafka, these deps have been placed in the
+     "provided" scope, rather than the "compile" scope, and NoClassDefFoundError exceptions are
+     handled when the user explicitly use neither spark-streaming-kafka nor spark-sql-kafka modules.
+    -->
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka-clients</artifactId>
+      <version>${kafka.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 1169b2878e99..126a6ab80136 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -274,7 +274,8 @@ private[spark] class HadoopDelegationTokenManager(
       new HadoopFSDelegationTokenProvider(
         () => HadoopDelegationTokenManager.this.fileSystemsToAccess())) ++
       safeCreateProvider(new HiveDelegationTokenProvider) ++
-      safeCreateProvider(new HBaseDelegationTokenProvider)
+      safeCreateProvider(new HBaseDelegationTokenProvider) ++
+      safeCreateProvider(new KafkaDelegationTokenProvider)
 
     // Filter out providers for which spark.security.credentials.{service}.enabled is false.
     providers
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
new file mode 100644
index 000000000000..45995be630cc
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import scala.language.existentials
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.security.Credentials
+import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+
+private[security] class KafkaDelegationTokenProvider
+  extends HadoopDelegationTokenProvider with Logging {
+
+  override def serviceName: String = "kafka"
+
+  override def obtainDelegationTokens(
+      hadoopConf: Configuration,
+      sparkConf: SparkConf,
+      creds: Credentials): Option[Long] = {
+    try {
+      logDebug("Attempting to fetch Kafka security token.")
+      val (token, nextRenewalDate) = KafkaTokenUtil.obtainToken(sparkConf)
+      creds.addToken(token.getService, token)
+      return Some(nextRenewalDate)
+    } catch {
+      case NonFatal(e) =>
+        logInfo(s"Failed to get token from service $serviceName", e)
+    }
+    None
+  }
+
+  override def delegationTokensRequired(
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): Boolean = {
+    val protocol = sparkConf.get(Kafka.SECURITY_PROTOCOL)
+    sparkConf.contains(Kafka.BOOTSTRAP_SERVERS) &&
+      (protocol == SASL_SSL.name ||
+        protocol == SSL.name ||
+        protocol == SASL_PLAINTEXT.name)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala b/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
new file mode 100644
index 000000000000..c890cee59ffe
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import java.{ util => ju }
+import java.text.SimpleDateFormat
+
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.security.token.{Token, TokenIdentifier}
+import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.clients.admin.{AdminClient, CreateDelegationTokenOptions}
+import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.security.JaasContext
+import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
+import org.apache.kafka.common.security.token.delegation.DelegationToken
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+
+private[spark] object KafkaTokenUtil extends Logging {
+  val TOKEN_KIND = new Text("KAFKA_DELEGATION_TOKEN")
+  val TOKEN_SERVICE = new Text("kafka.server.delegation.token")
+
+  private[spark] class KafkaDelegationTokenIdentifier extends AbstractDelegationTokenIdentifier {
+    override def getKind: Text = TOKEN_KIND
+  }
+
+  private[security] def obtainToken(sparkConf: SparkConf): (Token[_ <: TokenIdentifier], Long) = {
+    val adminClient = AdminClient.create(createAdminClientProperties(sparkConf))
+    val createDelegationTokenOptions = new CreateDelegationTokenOptions()
+    val createResult = adminClient.createDelegationToken(createDelegationTokenOptions)
+    val token = createResult.delegationToken().get()
+    printToken(token)
+
+    (new Token[KafkaDelegationTokenIdentifier](
+      token.tokenInfo.tokenId.getBytes,
+      token.hmacAsBase64String.getBytes,
+      TOKEN_KIND,
+      TOKEN_SERVICE
+    ), token.tokenInfo.expiryTimestamp)
+  }
+
+  private[security] def createAdminClientProperties(sparkConf: SparkConf): ju.Properties = {
+    val adminClientProperties = new ju.Properties
+
+    val bootstrapServers = sparkConf.get(Kafka.BOOTSTRAP_SERVERS)
+    require(bootstrapServers.nonEmpty, s"Tried to obtain kafka delegation token but bootstrap " +
+      "servers not configured.")
+    adminClientProperties.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers.get)
+
+    val protocol = sparkConf.get(Kafka.SECURITY_PROTOCOL)
+    adminClientProperties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, protocol)
+    protocol match {
+      case SASL_SSL.name =>
+        setTrustStoreProperties(sparkConf, adminClientProperties)
+
+      case SSL.name =>
+        setTrustStoreProperties(sparkConf, adminClientProperties)
+        setKeyStoreProperties(sparkConf, adminClientProperties)
+        logWarning("Obtaining kafka delegation token with SSL protocol. Please " +
+          "configure 2-way authentication on the broker side.")
+
+      case SASL_PLAINTEXT.name =>
+        logWarning("Obtaining kafka delegation token through plain communication channel. Please " +
+          "consider the security impact.")
+    }
+
+    // There are multiple possibilities to log in and applied in the following order:
+    // - JVM global security provided -> try to log in with JVM global security configuration
+    //   which can be configured for example with 'java.security.auth.login.config'.
+    //   For this no additional parameter needed.
+    // - Keytab is provided -> try to log in with kerberos module and keytab using kafka's dynamic
+    //   JAAS configuration.
+    // - Keytab not provided -> try to log in with kerberos module and ticket cache using kafka's
+    //   dynamic JAAS configuration.
+    // Kafka client is unable to use subject from JVM which already logged in
+    // to kdc (see KAFKA-7677)
+    if (isGlobalJaasConfigurationProvided) {
+      logDebug("JVM global security configuration detected, using it for login.")
+    } else {
+      adminClientProperties.put(SaslConfigs.SASL_MECHANISM, SaslConfigs.GSSAPI_MECHANISM)
+      if (sparkConf.contains(KEYTAB)) {
+        logDebug("Keytab detected, using it for login.")
+        val jaasParams = getKeytabJaasParams(sparkConf)
+        adminClientProperties.put(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+      } else {
+        logDebug("Using ticket cache for login.")
+        val jaasParams = getTicketCacheJaasParams(sparkConf)
+        adminClientProperties.put(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+      }
+    }
+
+    adminClientProperties
+  }
+
+  def isGlobalJaasConfigurationProvided: Boolean = {
+    try {
+      JaasContext.loadClientContext(ju.Collections.emptyMap[String, Object]())
+      true
+    } catch {
+      case NonFatal(_) => false
+    }
+  }
+
+  private def setTrustStoreProperties(sparkConf: SparkConf, properties: ju.Properties): Unit = {
+    sparkConf.get(Kafka.TRUSTSTORE_LOCATION).foreach { truststoreLocation =>
+      properties.put("ssl.truststore.location", truststoreLocation)
+    }
+    sparkConf.get(Kafka.TRUSTSTORE_PASSWORD).foreach { truststorePassword =>
+      properties.put("ssl.truststore.password", truststorePassword)
+    }
+  }
+
+  private def setKeyStoreProperties(sparkConf: SparkConf, properties: ju.Properties): Unit = {
+    sparkConf.get(Kafka.KEYSTORE_LOCATION).foreach { keystoreLocation =>
+      properties.put("ssl.keystore.location", keystoreLocation)
+    }
+    sparkConf.get(Kafka.KEYSTORE_PASSWORD).foreach { keystorePassword =>
+      properties.put("ssl.keystore.password", keystorePassword)
+    }
+    sparkConf.get(Kafka.KEY_PASSWORD).foreach { keyPassword =>
+      properties.put("ssl.key.password", keyPassword)
+    }
+  }
+
+  private[security] def getKeytabJaasParams(sparkConf: SparkConf): String = {
+    val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
+    require(serviceName.nonEmpty, "Kerberos service name must be defined")
+
+    val params =
+      s"""
+      |${getKrb5LoginModuleName} required
+      | useKeyTab=true
+      | serviceName="${serviceName.get}"
+      | keyTab="${sparkConf.get(KEYTAB).get}"
+      | principal="${sparkConf.get(PRINCIPAL).get}";
+      """.stripMargin.replace("\n", "")
+    logDebug(s"Krb keytab JAAS params: $params")
+    params
+  }
+
+  def getTicketCacheJaasParams(sparkConf: SparkConf): String = {
+    val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
+    require(serviceName.nonEmpty, "Kerberos service name must be defined")
+
+    val params =
+      s"""
+      |${getKrb5LoginModuleName} required
+      | useTicketCache=true
+      | serviceName="${serviceName.get}";
+      """.stripMargin.replace("\n", "")
+    logDebug(s"Krb ticket cache JAAS params: $params")
+    params
+  }
+
+  /**
+   * Krb5LoginModule package vary in different JVMs.
+   * Please see Hadoop UserGroupInformation for further details.
+   */
+  private def getKrb5LoginModuleName(): String = {
+    if (System.getProperty("java.vendor").contains("IBM")) {
+      "com.ibm.security.auth.module.Krb5LoginModule"
+    } else {
+      "com.sun.security.auth.module.Krb5LoginModule"
+    }
+  }
+
+  private def printToken(token: DelegationToken): Unit = {
+    if (log.isDebugEnabled) {
+      val dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm")
+      logDebug("%-15s %-30s %-15s %-25s %-15s %-15s %-15s".format(
+        "TOKENID", "HMAC", "OWNER", "RENEWERS", "ISSUEDATE", "EXPIRYDATE", "MAXDATE"))
+      val tokenInfo = token.tokenInfo
+      logDebug("%-15s [hidden] %-15s %-25s %-15s %-15s %-15s".format(
+        tokenInfo.tokenId,
+        tokenInfo.owner,
+        tokenInfo.renewersAsString,
+        dateFormat.format(tokenInfo.issueTimestamp),
+        dateFormat.format(tokenInfo.expiryTimestamp),
+        dateFormat.format(tokenInfo.maxTimestamp)))
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
new file mode 100644
index 000000000000..85d74c27142a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+private[spark] object Kafka {
+
+  val BOOTSTRAP_SERVERS =
+    ConfigBuilder("spark.kafka.bootstrap.servers")
+      .doc("A list of coma separated host/port pairs to use for establishing the initial " +
+        "connection to the Kafka cluster. For further details please see kafka documentation. " +
+        "Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val SECURITY_PROTOCOL =
+    ConfigBuilder("spark.kafka.security.protocol")
+      .doc("Protocol used to communicate with brokers. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createWithDefault("SASL_SSL")
+
+  val KERBEROS_SERVICE_NAME =
+    ConfigBuilder("spark.kafka.sasl.kerberos.service.name")
+      .doc("The Kerberos principal name that Kafka runs as. This can be defined either in " +
+        "Kafka's JAAS config or in Kafka's config. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val TRUSTSTORE_LOCATION =
+    ConfigBuilder("spark.kafka.ssl.truststore.location")
+      .doc("The location of the trust store file. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val TRUSTSTORE_PASSWORD =
+    ConfigBuilder("spark.kafka.ssl.truststore.password")
+      .doc("The store password for the trust store file. This is optional for client and only " +
+        "needed if ssl.truststore.location is configured. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val KEYSTORE_LOCATION =
+    ConfigBuilder("spark.kafka.ssl.keystore.location")
+      .doc("The location of the key store file. This is optional for client and can be used for " +
+        "two-way authentication for client. For further details please see kafka documentation. " +
+        "Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val KEYSTORE_PASSWORD =
+    ConfigBuilder("spark.kafka.ssl.keystore.password")
+      .doc("The store password for the key store file. This is optional for client and only " +
+        "needed if ssl.keystore.location is configured. For further details please see kafka " +
+        "documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+
+  val KEY_PASSWORD =
+    ConfigBuilder("spark.kafka.ssl.key.password")
+      .doc("The password of the private key in the key store file. This is optional for client. " +
+        "For further details please see kafka documentation. Only used to obtain delegation token.")
+      .stringConf
+      .createOptional
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
index e0e630e3be63..def9e626a2df 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.deploy.security
 
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.security.Credentials
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.util.Utils
@@ -33,6 +31,7 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
     assert(manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
     assert(manager.isProviderLoaded("hive"))
+    assert(manager.isProviderLoaded("kafka"))
   }
 
   test("disable hive credential provider") {
@@ -41,6 +40,7 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
     assert(manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
     assert(!manager.isProviderLoaded("hive"))
+    assert(manager.isProviderLoaded("kafka"))
   }
 
   test("using deprecated configurations") {
@@ -51,6 +51,7 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
     assert(!manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
     assert(!manager.isProviderLoaded("hive"))
+    assert(manager.isProviderLoaded("kafka"))
   }
 
   test("SPARK-23209: obtain tokens when Hive classes are not available") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
new file mode 100644
index 000000000000..682bebde916f
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import java.{ util => ju }
+import javax.security.auth.login.{AppConfigurationEntry, Configuration}
+
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config._
+
+class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
+  private val bootStrapServers = "127.0.0.1:0"
+  private val trustStoreLocation = "/path/to/trustStore"
+  private val trustStorePassword = "trustStoreSecret"
+  private val keyStoreLocation = "/path/to/keyStore"
+  private val keyStorePassword = "keyStoreSecret"
+  private val keyPassword = "keySecret"
+  private val keytab = "/path/to/keytab"
+  private val kerberosServiceName = "kafka"
+  private val principal = "user@domain.com"
+
+  private var sparkConf: SparkConf = null
+
+  private class KafkaJaasConfiguration extends Configuration {
+    val entry =
+      new AppConfigurationEntry(
+        "DummyModule",
+        AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
+        ju.Collections.emptyMap[String, Object]()
+      )
+
+    override def getAppConfigurationEntry(name: String): Array[AppConfigurationEntry] = {
+      if (name.equals("KafkaClient")) {
+        Array(entry)
+      } else {
+        null
+      }
+    }
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    sparkConf = new SparkConf()
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      resetGlobalConfig()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  private def setGlobalKafkaClientConfig(): Unit = {
+    Configuration.setConfiguration(new KafkaJaasConfiguration)
+  }
+
+  private def resetGlobalConfig(): Unit = {
+    Configuration.setConfiguration(null)
+  }
+
+  test("createAdminClientProperties without bootstrap servers should throw exception") {
+    val thrown = intercept[IllegalArgumentException] {
+      KafkaTokenUtil.createAdminClientProperties(sparkConf)
+    }
+    assert(thrown.getMessage contains
+      "Tried to obtain kafka delegation token but bootstrap servers not configured.")
+  }
+
+  test("createAdminClientProperties with SASL_PLAINTEXT protocol should not include " +
+      "keystore and truststore config") {
+    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
+    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_PLAINTEXT.name)
+    sparkConf.set(Kafka.TRUSTSTORE_LOCATION, trustStoreLocation)
+    sparkConf.set(Kafka.TRUSTSTORE_PASSWORD, trustStoreLocation)
+    sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
+    sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
+    sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
+    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
+
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+
+    assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
+      === bootStrapServers)
+    assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
+      === SASL_PLAINTEXT.name)
+    assert(!adminClientProperties.containsKey("ssl.truststore.location"))
+    assert(!adminClientProperties.containsKey("ssl.truststore.password"))
+    assert(!adminClientProperties.containsKey("ssl.keystore.location"))
+    assert(!adminClientProperties.containsKey("ssl.keystore.password"))
+    assert(!adminClientProperties.containsKey("ssl.key.password"))
+  }
+
+  test("createAdminClientProperties with SASL_SSL protocol should include truststore config") {
+    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
+    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
+    sparkConf.set(Kafka.TRUSTSTORE_LOCATION, trustStoreLocation)
+    sparkConf.set(Kafka.TRUSTSTORE_PASSWORD, trustStorePassword)
+    sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
+    sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
+    sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
+    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
+
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+
+    assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
+      === bootStrapServers)
+    assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
+      === SASL_SSL.name)
+    assert(adminClientProperties.get("ssl.truststore.location") === trustStoreLocation)
+    assert(adminClientProperties.get("ssl.truststore.password") === trustStorePassword)
+    assert(!adminClientProperties.containsKey("ssl.keystore.location"))
+    assert(!adminClientProperties.containsKey("ssl.keystore.password"))
+    assert(!adminClientProperties.containsKey("ssl.key.password"))
+  }
+
+  test("createAdminClientProperties with SSL protocol should include keystore and truststore " +
+      "config") {
+    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
+    sparkConf.set(Kafka.SECURITY_PROTOCOL, SSL.name)
+    sparkConf.set(Kafka.TRUSTSTORE_LOCATION, trustStoreLocation)
+    sparkConf.set(Kafka.TRUSTSTORE_PASSWORD, trustStorePassword)
+    sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
+    sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
+    sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
+    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
+
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+
+    assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
+      === bootStrapServers)
+    assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
+      === SSL.name)
+    assert(adminClientProperties.get("ssl.truststore.location") === trustStoreLocation)
+    assert(adminClientProperties.get("ssl.truststore.password") === trustStorePassword)
+    assert(adminClientProperties.get("ssl.keystore.location") === keyStoreLocation)
+    assert(adminClientProperties.get("ssl.keystore.password") === keyStorePassword)
+    assert(adminClientProperties.get("ssl.key.password") === keyPassword)
+  }
+
+  test("createAdminClientProperties with global config should not set dynamic jaas config") {
+    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
+    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
+    setGlobalKafkaClientConfig()
+
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+
+    assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
+      === bootStrapServers)
+    assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
+      === SASL_SSL.name)
+    assert(!adminClientProperties.containsKey(SaslConfigs.SASL_MECHANISM))
+    assert(!adminClientProperties.containsKey(SaslConfigs.SASL_JAAS_CONFIG))
+  }
+
+  test("createAdminClientProperties with keytab should set keytab dynamic jaas config") {
+    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
+    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
+    sparkConf.set(KEYTAB, keytab)
+    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
+    sparkConf.set(PRINCIPAL, principal)
+
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+
+    assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
+      === bootStrapServers)
+    assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
+      === SASL_SSL.name)
+    assert(adminClientProperties.containsKey(SaslConfigs.SASL_MECHANISM))
+    val saslJaasConfig = adminClientProperties.getProperty(SaslConfigs.SASL_JAAS_CONFIG)
+    assert(saslJaasConfig.contains("Krb5LoginModule required"))
+    assert(saslJaasConfig.contains("useKeyTab=true"))
+  }
+
+  test("createAdminClientProperties without keytab should set ticket cache dynamic jaas config") {
+    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
+    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
+    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
+
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+
+    assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
+      === bootStrapServers)
+    assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
+      === SASL_SSL.name)
+    assert(adminClientProperties.containsKey(SaslConfigs.SASL_MECHANISM))
+    val saslJaasConfig = adminClientProperties.getProperty(SaslConfigs.SASL_JAAS_CONFIG)
+    assert(saslJaasConfig.contains("Krb5LoginModule required"))
+    assert(saslJaasConfig.contains("useTicketCache=true"))
+  }
+
+  test("isGlobalJaasConfigurationProvided without global config should return false") {
+    assert(!KafkaTokenUtil.isGlobalJaasConfigurationProvided)
+  }
+
+  test("isGlobalJaasConfigurationProvided with global config should return false") {
+    setGlobalKafkaClientConfig()
+
+    assert(KafkaTokenUtil.isGlobalJaasConfigurationProvided)
+  }
+
+  test("getKeytabJaasParams with keytab no service should throw exception") {
+    sparkConf.set(KEYTAB, keytab)
+
+    val thrown = intercept[IllegalArgumentException] {
+      KafkaTokenUtil.getKeytabJaasParams(sparkConf)
+    }
+
+    assert(thrown.getMessage contains "Kerberos service name must be defined")
+  }
+
+  test("getTicketCacheJaasParams without service should throw exception") {
+    val thrown = intercept[IllegalArgumentException] {
+      KafkaTokenUtil.getTicketCacheJaasParams(sparkConf)
+    }
+
+    assert(thrown.getMessage contains "Kerberos service name must be defined")
+  }
+}
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 1af407167597..de8731c4b774 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -29,8 +29,6 @@
   <artifactId>spark-sql-kafka-0-10_2.12</artifactId>
   <properties>
     <sbt.project.name>sql-kafka-0-10</sbt.project.name>
-    <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
-    <kafka.version>2.1.0</kafka.version>
   </properties>
   <packaging>jar</packaging>
   <name>Kafka 0.10+ Source for Structured Streaming</name>
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
new file mode 100644
index 000000000000..74d5ef9c05f1
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.security.token.{Token, TokenIdentifier}
+import org.apache.kafka.common.security.scram.ScramLoginModule
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.security.KafkaTokenUtil
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+
+private[kafka010] object KafkaSecurityHelper extends Logging {
+  def isTokenAvailable(): Boolean = {
+    UserGroupInformation.getCurrentUser().getCredentials.getToken(
+      KafkaTokenUtil.TOKEN_SERVICE) != null
+  }
+
+  def getTokenJaasParams(sparkConf: SparkConf): String = {
+    val token = UserGroupInformation.getCurrentUser().getCredentials.getToken(
+      KafkaTokenUtil.TOKEN_SERVICE)
+    val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
+    require(serviceName.isDefined, "Kerberos service name must be defined")
+    val username = new String(token.getIdentifier)
+    val password = new String(token.getPassword)
+
+    val loginModuleName = classOf[ScramLoginModule].getName
+    val params =
+      s"""
+      |$loginModuleName required
+      | tokenauth=true
+      | serviceName="${serviceName.get}"
+      | username="$username"
+      | password="$password";
+      """.stripMargin.replace("\n", "")
+    logDebug(s"Scram JAAS params: ${params.replaceAll("password=\".*\"", "password=\"[hidden]\"")}")
+
+    params
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index f770f0c2a04c..0ac330435e5c 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -18,16 +18,19 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
-import java.util.{Locale, Optional, UUID}
+import java.util.{Locale, UUID}
 
 import scala.collection.JavaConverters._
 
 import org.apache.kafka.clients.consumer.ConsumerConfig
 import org.apache.kafka.clients.producer.ProducerConfig
+import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer}
 
+import org.apache.spark.SparkEnv
+import org.apache.spark.deploy.security.KafkaTokenUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SparkSession, SQLContext}
+import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.sources.v2._
@@ -80,12 +83,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     val uniqueGroupId = streamingUniqueGroupId(parameters, metadataPath)
 
     val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
-    val specifiedKafkaParams =
-      parameters
-        .keySet
-        .filter(_.toLowerCase(Locale.ROOT).startsWith("kafka."))
-        .map { k => k.drop(6).toString -> parameters(k) }
-        .toMap
+    val specifiedKafkaParams = convertToSpecifiedParams(parameters)
 
     val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(caseInsensitiveParams,
       STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
@@ -122,12 +120,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     val uniqueGroupId = streamingUniqueGroupId(parameters, metadataPath)
 
     val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
-    val specifiedKafkaParams =
-      parameters
-        .keySet
-        .filter(_.toLowerCase(Locale.ROOT).startsWith("kafka."))
-        .map { k => k.drop(6).toString -> parameters(k) }
-        .toMap
+    val specifiedKafkaParams = convertToSpecifiedParams(parameters)
 
     val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(caseInsensitiveParams,
       STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
@@ -198,12 +191,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       parameters: Map[String, String]): BaseRelation = {
     validateBatchOptions(parameters)
     val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
-    val specifiedKafkaParams =
-      parameters
-        .keySet
-        .filter(_.toLowerCase(Locale.ROOT).startsWith("kafka."))
-        .map { k => k.drop(6).toString -> parameters(k) }
-        .toMap
+    val specifiedKafkaParams = convertToSpecifiedParams(parameters)
 
     val startingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
       caseInsensitiveParams, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
@@ -230,8 +218,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       outputMode: OutputMode): Sink = {
     val defaultTopic = parameters.get(TOPIC_OPTION_KEY).map(_.trim)
     val specifiedKafkaParams = kafkaParamsForProducer(parameters)
-    new KafkaSink(sqlContext,
-      new ju.HashMap[String, Object](specifiedKafkaParams.asJava), defaultTopic)
+    new KafkaSink(sqlContext, specifiedKafkaParams, defaultTopic)
   }
 
   override def createRelation(
@@ -248,8 +235,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     }
     val topic = parameters.get(TOPIC_OPTION_KEY).map(_.trim)
     val specifiedKafkaParams = kafkaParamsForProducer(parameters)
-    KafkaWriter.write(outerSQLContext.sparkSession, data.queryExecution,
-      new ju.HashMap[String, Object](specifiedKafkaParams.asJava), topic)
+    KafkaWriter.write(outerSQLContext.sparkSession, data.queryExecution, specifiedKafkaParams,
+      topic)
 
     /* This method is suppose to return a relation that reads the data that was written.
      * We cannot support this for Kafka. Therefore, in order to make things consistent,
@@ -274,13 +261,11 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       options: DataSourceOptions): StreamingWriteSupport = {
     import scala.collection.JavaConverters._
 
-    val spark = SparkSession.getActiveSession.get
     val topic = Option(options.get(TOPIC_OPTION_KEY).orElse(null)).map(_.trim)
     // We convert the options argument from V2 -> Java map -> scala mutable -> scala immutable.
     val producerParams = kafkaParamsForProducer(options.asMap.asScala.toMap)
 
-    KafkaWriter.validateQuery(
-      schema.toAttributes, new java.util.HashMap[String, Object](producerParams.asJava), topic)
+    KafkaWriter.validateQuery(schema.toAttributes, producerParams, topic)
 
     new KafkaStreamingWriteSupport(topic, producerParams, schema)
   }
@@ -481,6 +466,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
 
 
+  private val serClassName = classOf[ByteArraySerializer].getName
   private val deserClassName = classOf[ByteArrayDeserializer].getName
 
   def getKafkaOffsetRangeLimit(
@@ -515,6 +501,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       // If buffer config is not set, set it to reasonable value to work around
       // buffer issues (see KAFKA-3135)
       .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
+      .setTokenJaasConfigIfNeeded()
       .build()
 
   def kafkaParamsForExecutors(
@@ -536,6 +523,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       // If buffer config is not set, set it to reasonable value to work around
       // buffer issues (see KAFKA-3135)
       .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
+      .setTokenJaasConfigIfNeeded()
       .build()
 
   /**
@@ -568,11 +556,32 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       this
     }
 
+    def setTokenJaasConfigIfNeeded(): ConfigUpdater = {
+      // There are multiple possibilities to log in and applied in the following order:
+      // - JVM global security provided -> try to log in with JVM global security configuration
+      //   which can be configured for example with 'java.security.auth.login.config'.
+      //   For this no additional parameter needed.
+      // - Token is provided -> try to log in with scram module using kafka's dynamic JAAS
+      //   configuration.
+      if (KafkaTokenUtil.isGlobalJaasConfigurationProvided) {
+        logDebug("JVM global security configuration detected, using it for login.")
+      } else if (KafkaSecurityHelper.isTokenAvailable()) {
+        logDebug("Delegation token detected, using it for login.")
+        val jaasParams = KafkaSecurityHelper.getTokenJaasParams(SparkEnv.get.conf)
+        val mechanism = kafkaParams
+          .getOrElse(SaslConfigs.SASL_MECHANISM, SaslConfigs.DEFAULT_SASL_MECHANISM)
+        require(mechanism.startsWith("SCRAM"),
+          "Delegation token works only with SCRAM mechanism.")
+        set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+      }
+      this
+    }
+
     def build(): ju.Map[String, Object] = map
   }
 
   private[kafka010] def kafkaParamsForProducer(
-      parameters: Map[String, String]): Map[String, String] = {
+      parameters: Map[String, String]): ju.Map[String, Object] = {
     val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
     if (caseInsensitiveParams.contains(s"kafka.${ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG}")) {
       throw new IllegalArgumentException(
@@ -580,17 +589,26 @@ private[kafka010] object KafkaSourceProvider extends Logging {
           + "are serialized with ByteArraySerializer.")
     }
 
-    if (caseInsensitiveParams.contains(s"kafka.${ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG}"))
-    {
+    if (caseInsensitiveParams.contains(s"kafka.${ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG}")) {
       throw new IllegalArgumentException(
         s"Kafka option '${ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG}' is not supported as "
           + "value are serialized with ByteArraySerializer.")
     }
+
+    val specifiedKafkaParams = convertToSpecifiedParams(parameters)
+
+    ConfigUpdater("executor", specifiedKafkaParams)
+      .set(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, serClassName)
+      .set(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, serClassName)
+      .setTokenJaasConfigIfNeeded()
+      .build()
+  }
+
+  private def convertToSpecifiedParams(parameters: Map[String, String]): Map[String, String] = {
     parameters
       .keySet
       .filter(_.toLowerCase(Locale.ROOT).startsWith("kafka."))
       .map { k => k.drop(6).toString -> parameters(k) }
-      .toMap + (ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> classOf[ByteArraySerializer].getName,
-      ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> classOf[ByteArraySerializer].getName)
+      .toMap
   }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWriteSupport.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWriteSupport.scala
index 927c56d9ce82..0d831c388460 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWriteSupport.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWriteSupport.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.kafka010
 
-import scala.collection.JavaConverters._
+import java.{util => ju}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -41,10 +41,12 @@ case object KafkaWriterCommitMessage extends WriterCommitMessage
  * @param schema The schema of the input data.
  */
 class KafkaStreamingWriteSupport(
-    topic: Option[String], producerParams: Map[String, String], schema: StructType)
+    topic: Option[String],
+    producerParams: ju.Map[String, Object],
+    schema: StructType)
   extends StreamingWriteSupport {
 
-  validateQuery(schema.toAttributes, producerParams.toMap[String, Object].asJava, topic)
+  validateQuery(schema.toAttributes, producerParams, topic)
 
   override def createStreamingWriterFactory(): KafkaStreamWriterFactory =
     KafkaStreamWriterFactory(topic, producerParams, schema)
@@ -62,7 +64,9 @@ class KafkaStreamingWriteSupport(
  * @param schema The schema of the input data.
  */
 case class KafkaStreamWriterFactory(
-    topic: Option[String], producerParams: Map[String, String], schema: StructType)
+    topic: Option[String],
+    producerParams: ju.Map[String, Object],
+    schema: StructType)
   extends StreamingDataWriterFactory {
 
   override def createWriter(
@@ -83,12 +87,12 @@ case class KafkaStreamWriterFactory(
  * @param inputSchema The attributes in the input data.
  */
 class KafkaStreamDataWriter(
-    targetTopic: Option[String], producerParams: Map[String, String], inputSchema: Seq[Attribute])
+    targetTopic: Option[String],
+    producerParams: ju.Map[String, Object],
+    inputSchema: Seq[Attribute])
   extends KafkaRowWriter(inputSchema, targetTopic) with DataWriter[InternalRow] {
-  import scala.collection.JavaConverters._
 
-  private lazy val producer = CachedKafkaProducer.getOrCreate(
-    new java.util.HashMap[String, Object](producerParams.asJava))
+  private lazy val producer = CachedKafkaProducer.getOrCreate(producerParams)
 
   def write(row: InternalRow): Unit = {
     checkForErrors()
@@ -112,7 +116,7 @@ class KafkaStreamDataWriter(
     if (producer != null) {
       producer.flush()
       checkForErrors()
-      CachedKafkaProducer.close(new java.util.HashMap[String, Object](producerParams.asJava))
+      CachedKafkaProducer.close(producerParams)
     }
   }
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala
index 3f6fcf6b2e52..b21037b1340c 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala
@@ -409,7 +409,7 @@ class KafkaContinuousSinkSuite extends KafkaContinuousTest {
     */
     val topic = newTopic()
     testUtils.createTopic(topic, 1)
-    val options = new java.util.HashMap[String, String]
+    val options = new java.util.HashMap[String, Object]
     options.put("bootstrap.servers", testUtils.brokerAddress)
     options.put("buffer.memory", "16384") // min buffer size
     options.put("block.on.buffer.full", "true")
@@ -417,7 +417,7 @@ class KafkaContinuousSinkSuite extends KafkaContinuousTest {
     options.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer].getName)
     val inputSchema = Seq(AttributeReference("value", BinaryType)())
     val data = new Array[Byte](15000) // large value
-    val writeTask = new KafkaStreamDataWriter(Some(topic), options.asScala.toMap, inputSchema)
+    val writeTask = new KafkaStreamDataWriter(Some(topic), options, inputSchema)
     try {
       val fieldTypes: Array[DataType] = Array(BinaryType)
       val converter = UnsafeProjection.create(fieldTypes)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
new file mode 100644
index 000000000000..772fe4614bad
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.util.UUID
+
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.token.Token
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.security.KafkaTokenUtil
+import org.apache.spark.deploy.security.KafkaTokenUtil.KafkaDelegationTokenIdentifier
+import org.apache.spark.internal.config._
+
+class KafkaSecurityHelperSuite extends SparkFunSuite with BeforeAndAfterEach {
+  private val keytab = "/path/to/keytab"
+  private val kerberosServiceName = "kafka"
+  private val principal = "user@domain.com"
+  private val tokenId = "tokenId" + UUID.randomUUID().toString
+  private val tokenPassword = "tokenPassword" + UUID.randomUUID().toString
+
+  private var sparkConf: SparkConf = null
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    sparkConf = new SparkConf()
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      resetUGI
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  private def addTokenToUGI(): Unit = {
+    val token = new Token[KafkaDelegationTokenIdentifier](
+      tokenId.getBytes,
+      tokenPassword.getBytes,
+      KafkaTokenUtil.TOKEN_KIND,
+      KafkaTokenUtil.TOKEN_SERVICE
+    )
+    val creds = new Credentials()
+    creds.addToken(KafkaTokenUtil.TOKEN_SERVICE, token)
+    UserGroupInformation.getCurrentUser.addCredentials(creds)
+  }
+
+  private def resetUGI: Unit = {
+    UserGroupInformation.setLoginUser(null)
+  }
+
+  test("isTokenAvailable without token should return false") {
+    assert(!KafkaSecurityHelper.isTokenAvailable())
+  }
+
+  test("isTokenAvailable with token should return true") {
+    addTokenToUGI()
+
+    assert(KafkaSecurityHelper.isTokenAvailable())
+  }
+
+  test("getTokenJaasParams with token no service should throw exception") {
+    addTokenToUGI()
+
+    val thrown = intercept[IllegalArgumentException] {
+      KafkaSecurityHelper.getTokenJaasParams(sparkConf)
+    }
+
+    assert(thrown.getMessage contains "Kerberos service name must be defined")
+  }
+
+  test("getTokenJaasParams with token should return scram module") {
+    addTokenToUGI()
+    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
+
+    val jaasParams = KafkaSecurityHelper.getTokenJaasParams(sparkConf)
+
+    assert(jaasParams.contains("ScramLoginModule required"))
+    assert(jaasParams.contains("tokenauth=true"))
+    assert(jaasParams.contains(tokenId))
+    assert(jaasParams.contains(tokenPassword))
+  }
+}
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index ea18b7e03591..333572e99b1c 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -28,8 +28,6 @@
   <artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
   <properties>
     <sbt.project.name>streaming-kafka-0-10</sbt.project.name>
-    <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
-    <kafka.version>2.1.0</kafka.version>
   </properties>
   <packaging>jar</packaging>
   <name>Spark Integration for Kafka 0.10</name>
diff --git a/pom.xml b/pom.xml
index 3ca2f739ce0e..dfc3c540dc18 100644
--- a/pom.xml
+++ b/pom.xml
@@ -128,6 +128,8 @@
     <hive.version>1.2.1.spark2</hive.version>
     <!-- Version used for internal directory structure -->
     <hive.version.short>1.2.1</hive.version.short>
+    <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
+    <kafka.version>2.1.0</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.0</parquet.version>
     <orc.version>1.5.3</orc.version>

From 66b2046462c0e93b2ca167728eba9f4d13a5a67c Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@apache.org>
Date: Fri, 30 Nov 2018 10:29:30 +0800
Subject: [PATCH 0027/1072] [SPARK-25446][R] Add schema_of_json() and
 schema_of_csv() to R

## What changes were proposed in this pull request?

This PR proposes to expose `schema_of_json` and `schema_of_csv` at R side.

**`schema_of_json`**:

```r
json <- '{"name":"Bob"}'
df <- sql("SELECT * FROM range(1)")
head(select(df, schema_of_json(json)))
```

```
  schema_of_json({"name":"Bob"})
1            struct<name:string>
```

**`schema_of_csv`**:

```r
csv <- "Amsterdam,2018"
df <- sql("SELECT * FROM range(1)")
head(select(df, schema_of_csv(csv)))
```

```
  schema_of_csv(Amsterdam,2018)
1    struct<_c0:string,_c1:int>
```

## How was this patch tested?

Manually tested, unit tests added, documentation manually built and verified.

Closes #22939 from HyukjinKwon/SPARK-25446.

Authored-by: hyukjinkwon <gurwls223@apache.org>
Signed-off-by: hyukjinkwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                       |  2 +
 R/pkg/R/functions.R                   | 77 ++++++++++++++++++++++++---
 R/pkg/R/generics.R                    |  8 +++
 R/pkg/tests/fulltests/test_sparkSQL.R | 16 +++++-
 4 files changed, 94 insertions(+), 9 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cdeafdd90ce4..1f8ba0bcf1cf 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -351,6 +351,8 @@ exportMethods("%<=>%",
               "row_number",
               "rpad",
               "rtrim",
+              "schema_of_csv",
+              "schema_of_json",
               "second",
               "sha1",
               "sha2",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index f72645a25779..f568a931ae1f 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -205,11 +205,18 @@ NULL
 #'              also supported for the schema.
 #'          \item \code{from_csv}: a DDL-formatted string
 #'          }
-#' @param ... additional argument(s). In \code{to_json}, \code{to_csv} and \code{from_json},
-#'            this contains additional named properties to control how it is converted, accepts
-#'            the same options as the JSON/CSV data source. Additionally \code{to_json} supports
-#'            the "pretty" option which enables pretty JSON generation. In \code{arrays_zip},
-#'            this contains additional Columns of arrays to be merged.
+#' @param ... additional argument(s).
+#'          \itemize{
+#'          \item \code{to_json}, \code{from_json} and \code{schema_of_json}: this contains
+#'              additional named properties to control how it is converted and accepts the
+#'              same options as the JSON data source.
+#'          \item \code{to_json}: it supports the "pretty" option which enables pretty
+#'              JSON generation.
+#'          \item \code{to_csv}, \code{from_csv} and \code{schema_of_csv}: this contains
+#'              additional named properties to control how it is converted and accepts the
+#'              same options as the CSV data source.
+#'          \item \code{arrays_zip}, this contains additional Columns of arrays to be merged.
+#'          }
 #' @name column_collection_functions
 #' @rdname column_collection_functions
 #' @family collection functions
@@ -1771,12 +1778,16 @@ setMethod("to_date",
 #' df2 <- mutate(df2, people_json = to_json(df2$people))
 #'
 #' # Converts a map into a JSON object
-#' df2 <- sql("SELECT map('name', 'Bob')) as people")
+#' df2 <- sql("SELECT map('name', 'Bob') as people")
 #' df2 <- mutate(df2, people_json = to_json(df2$people))
 #'
 #' # Converts an array of maps into a JSON array
 #' df2 <- sql("SELECT array(map('name', 'Bob'), map('name', 'Alice')) as people")
-#' df2 <- mutate(df2, people_json = to_json(df2$people))}
+#' df2 <- mutate(df2, people_json = to_json(df2$people))
+#'
+#' # Converts a map into a pretty JSON object
+#' df2 <- sql("SELECT map('name', 'Bob') as people")
+#' df2 <- mutate(df2, people_json = to_json(df2$people, pretty = TRUE))}
 #' @note to_json since 2.2.0
 setMethod("to_json", signature(x = "Column"),
           function(x, ...) {
@@ -2285,6 +2296,32 @@ setMethod("from_json", signature(x = "Column", schema = "characterOrstructType")
             column(jc)
           })
 
+#' @details
+#' \code{schema_of_json}: Parses a JSON string and infers its schema in DDL format.
+#'
+#' @rdname column_collection_functions
+#' @aliases schema_of_json schema_of_json,characterOrColumn-method
+#' @examples
+#'
+#' \dontrun{
+#' json <- "{\"name\":\"Bob\"}"
+#' df <- sql("SELECT * FROM range(1)")
+#' head(select(df, schema_of_json(json)))}
+#' @note schema_of_json since 3.0.0
+setMethod("schema_of_json", signature(x = "characterOrColumn"),
+          function(x, ...) {
+            if (class(x) == "character") {
+              col <- callJStatic("org.apache.spark.sql.functions", "lit", x)
+            } else {
+              col <- x@jc
+            }
+            options <- varargsToStrEnv(...)
+            jc <- callJStatic("org.apache.spark.sql.functions",
+                              "schema_of_json",
+                              col, options)
+            column(jc)
+          })
+
 #' @details
 #' \code{from_csv}: Parses a column containing a CSV string into a Column of \code{structType}
 #' with the specified \code{schema}.
@@ -2315,6 +2352,32 @@ setMethod("from_csv", signature(x = "Column", schema = "characterOrColumn"),
             column(jc)
           })
 
+#' @details
+#' \code{schema_of_csv}: Parses a CSV string and infers its schema in DDL format.
+#'
+#' @rdname column_collection_functions
+#' @aliases schema_of_csv schema_of_csv,characterOrColumn-method
+#' @examples
+#'
+#' \dontrun{
+#' csv <- "Amsterdam,2018"
+#' df <- sql("SELECT * FROM range(1)")
+#' head(select(df, schema_of_csv(csv)))}
+#' @note schema_of_csv since 3.0.0
+setMethod("schema_of_csv", signature(x = "characterOrColumn"),
+          function(x, ...) {
+            if (class(x) == "character") {
+              col <- callJStatic("org.apache.spark.sql.functions", "lit", x)
+            } else {
+              col <- x@jc
+            }
+            options <- varargsToStrEnv(...)
+            jc <- callJStatic("org.apache.spark.sql.functions",
+                              "schema_of_csv",
+                              col, options)
+            column(jc)
+          })
+
 #' @details
 #' \code{from_utc_timestamp}: This is a common function for databases supporting TIMESTAMP WITHOUT
 #' TIMEZONE. This function takes a timestamp which is timezone-agnostic, and interprets it as a
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index b2ca6e62175e..9d8c24c686c7 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1206,6 +1206,14 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
 #' @name NULL
 setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") })
 
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("schema_of_csv", function(x, ...) { standardGeneric("schema_of_csv") })
+
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("schema_of_json", function(x, ...) { standardGeneric("schema_of_json") })
+
 #' @rdname column_aggregate_functions
 #' @name NULL
 setGeneric("sd", function(x, na.rm = FALSE) { standardGeneric("sd") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 77a29c9ecad8..0d5118c127f2 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1620,14 +1620,20 @@ test_that("column functions", {
   expect_equal(collect(select(df, bround(df$x, 0)))[[1]][1], 2)
   expect_equal(collect(select(df, bround(df$x, 0)))[[1]][2], 4)
 
-  # Test from_csv()
+  # Test from_csv(), schema_of_csv()
   df <- as.DataFrame(list(list("col" = "1")))
   c <- collect(select(df, alias(from_csv(df$col, "a INT"), "csv")))
   expect_equal(c[[1]][[1]]$a, 1)
   c <- collect(select(df, alias(from_csv(df$col, lit("a INT")), "csv")))
   expect_equal(c[[1]][[1]]$a, 1)
 
-  # Test to_json(), from_json()
+  df <- as.DataFrame(list(list("col" = "1")))
+  c <- collect(select(df, schema_of_csv("Amsterdam,2018")))
+  expect_equal(c[[1]], "struct<_c0:string,_c1:int>")
+  c <- collect(select(df, schema_of_csv(lit("Amsterdam,2018"))))
+  expect_equal(c[[1]], "struct<_c0:string,_c1:int>")
+
+  # Test to_json(), from_json(), schema_of_json()
   df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
   j <- collect(select(df, alias(to_json(df$people), "json")))
   expect_equal(j[order(j$json), ][1], "[{\"name\":\"Bob\"},{\"name\":\"Alice\"}]")
@@ -1654,6 +1660,12 @@ test_that("column functions", {
     expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 })))
   }
 
+  df <- as.DataFrame(list(list("col" = "1")))
+  c <- collect(select(df, schema_of_json('{"name":"Bob"}')))
+  expect_equal(c[[1]], "struct<name:string>")
+  c <- collect(select(df, schema_of_json(lit('{"name":"Bob"}'))))
+  expect_equal(c[[1]], "struct<name:string>")
+
   # Test to_json() supports arrays of primitive types and arrays
   df <- sql("SELECT array(19, 42, 70) as age")
   j <- collect(select(df, alias(to_json(df$age), "json")))

From 8edb64c1b9ee49d836e171a459dd93f524df92bf Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Fri, 30 Nov 2018 11:56:25 +0800
Subject: [PATCH 0028/1072] [SPARK-26060][SQL] Track SparkConf entries and make
 SET command reject such entries.

## What changes were proposed in this pull request?

Currently the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. We should track `SparkConf` entries and make the command reject for such entries.

## How was this patch tested?

Added a test and existing tests.

Closes #23031 from ueshin/issues/SPARK-26060/set_command.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md                  |  2 ++
 .../org/apache/spark/sql/internal/SQLConf.scala      | 12 +++++++++++-
 .../scala/org/apache/spark/sql/RuntimeConfig.scala   |  4 ++++
 .../org/apache/spark/sql/RuntimeConfigSuite.scala    | 10 ++++++++++
 .../spark/sql/execution/command/DDLSuite.scala       |  8 ++++++++
 5 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 55838e773e4b..e48125a0972b 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -29,6 +29,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be udefined.
 
+  - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.execution.setCommandRejectsSparkConfs` to `false`.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 7bcf21595ce5..f1c845bc9450 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -45,7 +45,7 @@ import org.apache.spark.util.Utils
 
 object SQLConf {
 
-  private val sqlConfEntries = java.util.Collections.synchronizedMap(
+  private[sql] val sqlConfEntries = java.util.Collections.synchronizedMap(
     new java.util.HashMap[String, ConfigEntry[_]]())
 
   val staticConfKeys: java.util.Set[String] =
@@ -1610,6 +1610,14 @@ object SQLConf {
       """ "... N more fields" placeholder.""")
     .intConf
     .createWithDefault(25)
+
+  val SET_COMMAND_REJECTS_SPARK_CONFS =
+    buildConf("spark.sql.legacy.execution.setCommandRejectsSparkConfs")
+      .internal()
+      .doc("If it is set to true, SET command will fail when the key is registered as " +
+        "a SparkConf entry.")
+      .booleanConf
+      .createWithDefault(true)
 }
 
 /**
@@ -2030,6 +2038,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxToStringFields: Int = getConf(SQLConf.MAX_TO_STRING_FIELDS)
 
+  def setCommandRejectsSparkConfs: Boolean = getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CONFS)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index 5a554eff02e3..d83a01ff9ea6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -153,5 +153,9 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
     if (SQLConf.staticConfKeys.contains(key)) {
       throw new AnalysisException(s"Cannot modify the value of a static config: $key")
     }
+    if (sqlConf.setCommandRejectsSparkConfs &&
+        ConfigEntry.findEntry(key) != null && !SQLConf.sqlConfEntries.containsKey(key)) {
+      throw new AnalysisException(s"Cannot modify the value of a Spark config: $key")
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
index cdcea09ad975..6196757eb701 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.config
 
 class RuntimeConfigSuite extends SparkFunSuite {
 
@@ -68,4 +69,13 @@ class RuntimeConfigSuite extends SparkFunSuite {
     assert(!conf.isModifiable(""))
     assert(!conf.isModifiable("invalid config parameter"))
   }
+
+  test("reject SparkConf entries") {
+    val conf = newConf()
+
+    val ex = intercept[AnalysisException] {
+      conf.set(config.CPUS_PER_TASK.key, 4)
+    }
+    assert(ex.getMessage.contains("Spark config"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index f8d98dead2d4..9d32fb6d4696 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -24,6 +24,7 @@ import java.util.Locale
 import org.apache.hadoop.fs.Path
 import org.scalatest.BeforeAndAfterEach
 
+import org.apache.spark.internal.config
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
@@ -2715,4 +2716,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("set command rejects SparkConf entries") {
+    val ex = intercept[AnalysisException] {
+      sql(s"SET ${config.CPUS_PER_TASK.key} = 4")
+    }
+    assert(ex.getMessage.contains("Spark config"))
+  }
 }

From 9cfc3ee6253bed21924424ccaadea0287a6f15f4 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 30 Nov 2018 12:00:55 +0800
Subject: [PATCH 0029/1072] [SPARK-26188][SQL] FileIndex: don't infer data
 types of partition columns if user specifies schema

## What changes were proposed in this pull request?

This PR is to fix a regression introduced in: https://github.com/apache/spark/pull/21004/files#r236998030

If user specifies schema, Spark don't need to infer data type for of partition columns, otherwise the data type might not match with the one user provided.
E.g. for partition directory `p=4d`, after data type inference  the column value will be `4.0`.
See https://issues.apache.org/jira/browse/SPARK-26188 for more details.

Note that user specified schema **might not cover all the data columns**:
```
val schema = new StructType()
  .add("id", StringType)
  .add("ex", ArrayType(StringType))
val df = spark.read
  .schema(schema)
  .format("parquet")
  .load(src.toString)

assert(df.schema.toList === List(
  StructField("ex", ArrayType(StringType)),
  StructField("part", IntegerType), // inferred partitionColumn dataType
  StructField("id", StringType))) // used user provided partitionColumn dataType
```
For the missing columns in user specified schema, Spark still need to infer their data types if `partitionColumnTypeInferenceEnabled` is enabled.

To implement the partially inference, refactor `PartitioningUtils.parsePartitions`  and pass the user specified schema as parameter to cast partition values.

## How was this patch tested?

Add unit test.

Closes #23165 from gengliangwang/fixFileIndex.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../PartitioningAwareFileIndex.scala          | 47 ++-----------------
 .../datasources/PartitioningUtils.scala       | 39 ++++++++++++---
 .../datasources/FileIndexSuite.scala          | 16 +++++++
 .../ParquetPartitionDiscoverySuite.scala      | 22 +++++++--
 4 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index cc8af7b92c45..7b0e4dbcc25f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -126,33 +126,15 @@ abstract class PartitioningAwareFileIndex(
     val caseInsensitiveOptions = CaseInsensitiveMap(parameters)
     val timeZoneId = caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
       .getOrElse(sparkSession.sessionState.conf.sessionLocalTimeZone)
-    val inferredPartitionSpec = PartitioningUtils.parsePartitions(
+
+    val caseSensitive = sparkSession.sqlContext.conf.caseSensitiveAnalysis
+    PartitioningUtils.parsePartitions(
       leafDirs,
       typeInference = sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
       basePaths = basePaths,
+      userSpecifiedSchema = userSpecifiedSchema,
+      caseSensitive = caseSensitive,
       timeZoneId = timeZoneId)
-    userSpecifiedSchema match {
-      case Some(userProvidedSchema) if userProvidedSchema.nonEmpty =>
-        val userPartitionSchema =
-          combineInferredAndUserSpecifiedPartitionSchema(inferredPartitionSpec)
-
-        // we need to cast into the data type that user specified.
-        def castPartitionValuesToUserSchema(row: InternalRow) = {
-          InternalRow((0 until row.numFields).map { i =>
-            val dt = inferredPartitionSpec.partitionColumns.fields(i).dataType
-            Cast(
-              Literal.create(row.get(i, dt), dt),
-              userPartitionSchema.fields(i).dataType,
-              Option(timeZoneId)).eval()
-          }: _*)
-        }
-
-        PartitionSpec(userPartitionSchema, inferredPartitionSpec.partitions.map { part =>
-          part.copy(values = castPartitionValuesToUserSchema(part.values))
-        })
-      case _ =>
-        inferredPartitionSpec
-    }
   }
 
   private def prunePartitions(
@@ -233,25 +215,6 @@ abstract class PartitioningAwareFileIndex(
     val name = path.getName
     !((name.startsWith("_") && !name.contains("=")) || name.startsWith("."))
   }
-
-  /**
-   * In the read path, only managed tables by Hive provide the partition columns properly when
-   * initializing this class. All other file based data sources will try to infer the partitioning,
-   * and then cast the inferred types to user specified dataTypes if the partition columns exist
-   * inside `userSpecifiedSchema`, otherwise we can hit data corruption bugs like SPARK-18510, or
-   * inconsistent data types as reported in SPARK-21463.
-   * @param spec A partition inference result
-   * @return The PartitionSchema resolved from inference and cast according to `userSpecifiedSchema`
-   */
-  private def combineInferredAndUserSpecifiedPartitionSchema(spec: PartitionSpec): StructType = {
-    val equality = sparkSession.sessionState.conf.resolver
-    val resolved = spec.partitionColumns.map { partitionField =>
-      // SPARK-18510: try to get schema from userSpecifiedSchema, otherwise fallback to inferred
-      userSpecifiedSchema.flatMap(_.find(f => equality(f.name, partitionField.name))).getOrElse(
-        partitionField)
-    }
-    StructType(resolved)
-  }
 }
 
 object PartitioningAwareFileIndex {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 3183fd30e5e0..9d2c9ba0c1a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -94,18 +94,34 @@ object PartitioningUtils {
       paths: Seq[Path],
       typeInference: Boolean,
       basePaths: Set[Path],
+      userSpecifiedSchema: Option[StructType],
+      caseSensitive: Boolean,
       timeZoneId: String): PartitionSpec = {
-    parsePartitions(paths, typeInference, basePaths, DateTimeUtils.getTimeZone(timeZoneId))
+    parsePartitions(paths, typeInference, basePaths, userSpecifiedSchema,
+      caseSensitive, DateTimeUtils.getTimeZone(timeZoneId))
   }
 
   private[datasources] def parsePartitions(
       paths: Seq[Path],
       typeInference: Boolean,
       basePaths: Set[Path],
+      userSpecifiedSchema: Option[StructType],
+      caseSensitive: Boolean,
       timeZone: TimeZone): PartitionSpec = {
+    val userSpecifiedDataTypes = if (userSpecifiedSchema.isDefined) {
+      val nameToDataType = userSpecifiedSchema.get.fields.map(f => f.name -> f.dataType).toMap
+      if (!caseSensitive) {
+        CaseInsensitiveMap(nameToDataType)
+      } else {
+        nameToDataType
+      }
+    } else {
+      Map.empty[String, DataType]
+    }
+
     // First, we need to parse every partition's path and see if we can find partition values.
     val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
-      parsePartition(path, typeInference, basePaths, timeZone)
+      parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes, timeZone)
     }.unzip
 
     // We create pairs of (path -> path's partition value) here
@@ -147,7 +163,7 @@ object PartitioningUtils {
         columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
           // We always assume partition columns are nullable since we've no idea whether null values
           // will be appended in the future.
-          StructField(name, dataType, nullable = true)
+          StructField(name, userSpecifiedDataTypes.getOrElse(name, dataType), nullable = true)
         }
       }
 
@@ -185,6 +201,7 @@ object PartitioningUtils {
       path: Path,
       typeInference: Boolean,
       basePaths: Set[Path],
+      userSpecifiedDataTypes: Map[String, DataType],
       timeZone: TimeZone): (Option[PartitionValues], Option[Path]) = {
     val columns = ArrayBuffer.empty[(String, Literal)]
     // Old Hadoop versions don't have `Path.isRoot`
@@ -206,7 +223,7 @@ object PartitioningUtils {
         // Let's say currentPath is a path of "/table/a=1/", currentPath.getName will give us a=1.
         // Once we get the string, we try to parse it and find the partition column and value.
         val maybeColumn =
-          parsePartitionColumn(currentPath.getName, typeInference, timeZone)
+          parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes, timeZone)
         maybeColumn.foreach(columns += _)
 
         // Now, we determine if we should stop.
@@ -239,6 +256,7 @@ object PartitioningUtils {
   private def parsePartitionColumn(
       columnSpec: String,
       typeInference: Boolean,
+      userSpecifiedDataTypes: Map[String, DataType],
       timeZone: TimeZone): Option[(String, Literal)] = {
     val equalSignIndex = columnSpec.indexOf('=')
     if (equalSignIndex == -1) {
@@ -250,7 +268,16 @@ object PartitioningUtils {
       val rawColumnValue = columnSpec.drop(equalSignIndex + 1)
       assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'")
 
-      val literal = inferPartitionColumnValue(rawColumnValue, typeInference, timeZone)
+      val literal = if (userSpecifiedDataTypes.contains(columnName)) {
+        // SPARK-26188: if user provides corresponding column schema, get the column value without
+        //              inference, and then cast it as user specified data type.
+        val columnValue = inferPartitionColumnValue(rawColumnValue, false, timeZone)
+        val castedValue =
+          Cast(columnValue, userSpecifiedDataTypes(columnName), Option(timeZone.getID)).eval()
+        Literal.create(castedValue, userSpecifiedDataTypes(columnName))
+      } else {
+        inferPartitionColumnValue(rawColumnValue, typeInference, timeZone)
+      }
       Some(columnName -> literal)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 49e7af4a9896..fdb0511f01a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.util.{KnownSizeEstimation, SizeEstimator}
 
 class FileIndexSuite extends SharedSQLContext {
@@ -49,6 +50,21 @@ class FileIndexSuite extends SharedSQLContext {
     }
   }
 
+  test("SPARK-26188: don't infer data types of partition columns if user specifies schema") {
+    withTempDir { dir =>
+      val partitionDirectory = new File(dir, s"a=4d")
+      partitionDirectory.mkdir()
+      val file = new File(partitionDirectory, "text.txt")
+      stringToFile(file, "text")
+      val path = new Path(dir.getCanonicalPath)
+      val schema = StructType(Seq(StructField("a", StringType, false)))
+      val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, Some(schema))
+      val partitionValues = fileIndex.partitionSpec().partitions.map(_.values)
+      assert(partitionValues.length == 1 && partitionValues(0).numFields == 1 &&
+        partitionValues(0).getString(0) == "4d")
+    }
+  }
+
   test("InMemoryFileIndex: input paths are converted to qualified paths") {
     withTempDir { dir =>
       val file = new File(dir, "text.txt")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 9966ed94a839..f808ca458aaa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -101,7 +101,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       "hdfs://host:9000/path/a=10.5/b=hello")
 
     var exception = intercept[AssertionError] {
-      parsePartitions(paths.map(new Path(_)), true, Set.empty[Path], timeZoneId)
+      parsePartitions(paths.map(new Path(_)), true, Set.empty[Path], None, true, timeZoneId)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
 
@@ -115,6 +115,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       paths.map(new Path(_)),
       true,
       Set(new Path("hdfs://host:9000/path/")),
+      None,
+      true,
       timeZoneId)
 
     // Valid
@@ -128,6 +130,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       paths.map(new Path(_)),
       true,
       Set(new Path("hdfs://host:9000/path/something=true/table")),
+      None,
+      true,
       timeZoneId)
 
     // Valid
@@ -141,6 +145,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       paths.map(new Path(_)),
       true,
       Set(new Path("hdfs://host:9000/path/table=true")),
+      None,
+      true,
       timeZoneId)
 
     // Invalid
@@ -154,6 +160,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
         paths.map(new Path(_)),
         true,
         Set(new Path("hdfs://host:9000/path/")),
+        None,
+        true,
         timeZoneId)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
@@ -174,6 +182,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
         paths.map(new Path(_)),
         true,
         Set(new Path("hdfs://host:9000/tmp/tables/")),
+        None,
+        true,
         timeZoneId)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
@@ -181,13 +191,13 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
   test("parse partition") {
     def check(path: String, expected: Option[PartitionValues]): Unit = {
-      val actual = parsePartition(new Path(path), true, Set.empty[Path], timeZone)._1
+      val actual = parsePartition(new Path(path), true, Set.empty[Path], Map.empty, timeZone)._1
       assert(expected === actual)
     }
 
     def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = {
       val message = intercept[T] {
-        parsePartition(new Path(path), true, Set.empty[Path], timeZone)
+        parsePartition(new Path(path), true, Set.empty[Path], Map.empty, timeZone)
       }.getMessage
 
       assert(message.contains(expected))
@@ -231,6 +241,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       path = new Path("file://path/a=10"),
       typeInference = true,
       basePaths = Set(new Path("file://path/a=10")),
+      Map.empty,
       timeZone = timeZone)._1
 
     assert(partitionSpec1.isEmpty)
@@ -240,6 +251,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       path = new Path("file://path/a=10"),
       typeInference = true,
       basePaths = Set(new Path("file://path")),
+      Map.empty,
       timeZone = timeZone)._1
 
     assert(partitionSpec2 ==
@@ -258,6 +270,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
           paths.map(new Path(_)),
           true,
           rootPaths,
+          None,
+          true,
           timeZoneId)
       assert(actualSpec.partitionColumns === spec.partitionColumns)
       assert(actualSpec.partitions.length === spec.partitions.length)
@@ -370,7 +384,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
   test("parse partitions with type inference disabled") {
     def check(paths: Seq[String], spec: PartitionSpec): Unit = {
       val actualSpec =
-        parsePartitions(paths.map(new Path(_)), false, Set.empty[Path], timeZoneId)
+        parsePartitions(paths.map(new Path(_)), false, Set.empty[Path], None, true, timeZoneId)
       assert(actualSpec === spec)
     }
 

From 2b2c94a3ee89630047bcdd416a977e0d1cdb1926 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 30 Nov 2018 00:02:43 -0800
Subject: [PATCH 0030/1072] [SPARK-25528][SQL] data source v2 API refactor
 (batch read)

## What changes were proposed in this pull request?

This is the first step of the data source v2 API refactor [proposal](https://docs.google.com/document/d/1uUmKCpWLdh9vHxP7AWJ9EgbwB_U6T3EJYNjhISGmiQg/edit?usp=sharing)

It adds the new API for batch read, without removing the old APIs, as they are still needed for streaming sources.

More concretely, it adds
1. `TableProvider`, works like an anonymous catalog
2. `Table`, represents a structured data set.
3. `ScanBuilder` and `Scan`, a logical represents of data source scan
4. `Batch`, a physical representation of data source batch scan.

## How was this patch tested?

existing tests

Closes #23086 from cloud-fan/refactor-batch.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../kafka010/KafkaContinuousSourceSuite.scala |   4 +-
 .../sql/kafka010/KafkaContinuousTest.scala    |   4 +-
 project/MimaExcludes.scala                    |  48 ++--
 .../sql/sources/v2/SupportsBatchRead.java     |  33 +++
 .../apache/spark/sql/sources/v2/Table.java    |  59 ++++
 .../spark/sql/sources/v2/TableProvider.java   |  64 +++++
 .../spark/sql/sources/v2/reader/Batch.java    |  48 ++++
 .../reader/OldSupportsReportPartitioning.java |  38 +++
 .../reader/OldSupportsReportStatistics.java   |  38 +++
 .../spark/sql/sources/v2/reader/Scan.java     |  68 +++++
 .../sql/sources/v2/reader/ScanBuilder.java    |  30 ++
 .../sql/sources/v2/reader/ScanConfig.java     |   4 +-
 .../sql/sources/v2/reader/Statistics.java     |   2 +-
 .../v2/reader/SupportsPushDownFilters.java    |   4 +-
 .../SupportsPushDownRequiredColumns.java      |   4 +-
 .../v2/reader/SupportsReportPartitioning.java |   8 +-
 .../v2/reader/SupportsReportStatistics.java   |   6 +-
 .../v2/reader/partitioning/Partitioning.java  |   3 +-
 .../apache/spark/sql/DataFrameReader.scala    |  36 ++-
 .../apache/spark/sql/DataFrameWriter.scala    |   2 +-
 .../datasources/v2/DataSourceV2Relation.scala |  90 +++---
 .../datasources/v2/DataSourceV2ScanExec.scala |  68 ++---
 .../datasources/v2/DataSourceV2Strategy.scala |  34 +--
 .../v2/DataSourceV2StreamingScanExec.scala    | 120 ++++++++
 .../streaming/ProgressReporter.scala          |   4 +-
 .../continuous/ContinuousExecution.scala      |   5 +-
 .../sources/v2/JavaAdvancedDataSourceV2.java  | 116 ++++----
 .../sources/v2/JavaColumnarDataSourceV2.java  |  27 +-
 .../v2/JavaPartitionAwareDataSource.java      |  29 +-
 .../v2/JavaSchemaRequiredDataSource.java      |  36 ++-
 ...Support.java => JavaSimpleBatchTable.java} |  33 +--
 .../sources/v2/JavaSimpleDataSourceV2.java    |  19 +-
 .../sql/sources/v2/DataSourceV2Suite.scala    | 260 ++++++++++--------
 .../sources/v2/SimpleWritableDataSource.scala |  35 ++-
 .../continuous/ContinuousSuite.scala          |   4 +-
 35 files changed, 942 insertions(+), 441 deletions(-)
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportPartitioning.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportStatistics.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala
 rename sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/{JavaSimpleReadSupport.java => JavaSimpleBatchTable.java} (78%)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
index af510219a6f6..9ba066a4cdc3 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.kafka010
 import org.apache.kafka.clients.producer.ProducerRecord
 
 import org.apache.spark.sql.Dataset
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExec
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2StreamingScanExec
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.streaming.Trigger
 
@@ -208,7 +208,7 @@ class KafkaContinuousSourceTopicDeletionSuite extends KafkaContinuousTest {
         eventually(timeout(streamingTimeout)) {
           assert(
             query.lastExecution.executedPlan.collectFirst {
-              case scan: DataSourceV2ScanExec
+              case scan: DataSourceV2StreamingScanExec
                 if scan.readSupport.isInstanceOf[KafkaContinuousReadSupport] =>
                 scan.scanConfig.asInstanceOf[KafkaContinuousScanConfig]
             }.exists { config =>
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
index aa21f1271b81..5549e821be75 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import org.apache.spark.SparkContext
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart}
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExec
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2StreamingScanExec
 import org.apache.spark.sql.execution.streaming.StreamExecution
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.streaming.Trigger
@@ -47,7 +47,7 @@ trait KafkaContinuousTest extends KafkaSourceTest {
     eventually(timeout(streamingTimeout)) {
       assert(
         query.lastExecution.executedPlan.collectFirst {
-          case scan: DataSourceV2ScanExec
+          case scan: DataSourceV2StreamingScanExec
               if scan.readSupport.isInstanceOf[KafkaContinuousReadSupport] =>
             scan.scanConfig.asInstanceOf[KafkaContinuousScanConfig]
         }.exists(_.knownPartitions.size == newCount),
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 5e97d826370f..fcef424c330f 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -197,37 +197,6 @@ object MimaExcludes {
     // [SPARK-23781][CORE] Merge token renewer functionality into HadoopDelegationTokenManager
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.nextCredentialRenewalTime"),
 
-    // Data Source V2 API changes
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.ContinuousReadSupport"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.ReadSupport"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.WriteSupport"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.StreamWriteSupport"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.MicroBatchReadSupport"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.reader.SupportsScanColumnarBatch"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.reader.DataSourceReader"),
-    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.sources.v2.reader.SupportsPushDownRequiredColumns"),
-    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.sql.sources.v2.reader.ScanConfigBuilder.build"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.sources.v2.reader.InputPartition.createPartitionReader"),
-    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.sources.v2.reader.SupportsReportStatistics"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.sources.v2.reader.SupportsReportStatistics.estimateStatistics"),
-    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.sql.sources.v2.reader.ReadSupport.fullSchema"),
-    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.sql.sources.v2.reader.ReadSupport.planInputPartitions"),
-    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.sources.v2.reader.SupportsReportPartitioning"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.sources.v2.reader.SupportsReportPartitioning.outputPartitioning"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.v2.reader.SupportsReportPartitioning.outputPartitioning"),
-    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.sql.sources.v2.reader.ReadSupport.fullSchema"),
-    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.sql.sources.v2.reader.ReadSupport.planInputPartitions"),
-    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.sources.v2.reader.SupportsPushDownFilters"),
-    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.sql.sources.v2.reader.ScanConfigBuilder.build"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.reader.ContinuousInputPartition"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.reader.InputPartitionReader"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.reader.streaming.ContinuousInputPartitionReader"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReader"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.writer.DataSourceWriter"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.v2.writer.DataWriterFactory.createWriter"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.v2.writer.streaming.StreamWriter"),
-
     // [SPARK-26133][ML] Remove deprecated OneHotEncoder and rename OneHotEncoderEstimator to OneHotEncoder
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.feature.OneHotEncoderEstimator"),
     ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.feature.OneHotEncoder"),
@@ -243,7 +212,22 @@ object MimaExcludes {
     // [SPARK-26141] Enable custom metrics implementation in shuffle write
     // Following are Java private classes
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.sort.UnsafeShuffleWriter.this"),
-    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.TimeTrackingOutputStream.this")
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.TimeTrackingOutputStream.this"),
+
+    // Data Source V2 API changes
+    (problem: Problem) => problem match {
+      case MissingClassProblem(cls) =>
+        !cls.fullName.startsWith("org.apache.spark.sql.sources.v2")
+      case MissingTypesProblem(newCls, _) =>
+        !newCls.fullName.startsWith("org.apache.spark.sql.sources.v2")
+      case InheritedNewAbstractMethodProblem(cls, _) =>
+        !cls.fullName.startsWith("org.apache.spark.sql.sources.v2")
+      case DirectMissingMethodProblem(meth) =>
+        !meth.owner.fullName.startsWith("org.apache.spark.sql.sources.v2")
+      case ReversedMissingMethodProblem(meth) =>
+        !meth.owner.fullName.startsWith("org.apache.spark.sql.sources.v2")
+      case _ => true
+    }
   )
 
   // Exclude rules for 2.4.x
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
new file mode 100644
index 000000000000..0df89dbb608a
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+
+/**
+ * An empty mix-in interface for {@link Table}, to indicate this table supports batch scan.
+ * <p>
+ * If a {@link Table} implements this interface, its {@link Table#newScanBuilder(DataSourceOptions)}
+ * must return a {@link ScanBuilder} that builds {@link Scan} with {@link Scan#toBatch()}
+ * implemented.
+ * </p>
+ */
+@Evolving
+public interface SupportsBatchRead extends Table { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
new file mode 100644
index 000000000000..0c65fe0f9e76
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An interface representing a logical structured data set of a data source. For example, the
+ * implementation can be a directory on the file system, a topic of Kafka, or a table in the
+ * catalog, etc.
+ * <p>
+ * This interface can mixin the following interfaces to support different operations:
+ * </p>
+ * <ul>
+ *   <li>{@link SupportsBatchRead}: this table can be read in batch queries.</li>
+ * </ul>
+ */
+@Evolving
+public interface Table {
+
+  /**
+   * A name to identify this table. Implementations should provide a meaningful name, like the
+   * database and table name from catalog, or the location of files for this table.
+   */
+  String name();
+
+  /**
+   * Returns the schema of this table.
+   */
+  StructType schema();
+
+  /**
+   * Returns a {@link ScanBuilder} which can be used to build a {@link Scan} later. Spark will call
+   * this method for each data scanning query.
+   * <p>
+   * The builder can take some query specific information to do operators pushdown, and keep these
+   * information in the created {@link Scan}.
+   * </p>
+   */
+  ScanBuilder newScanBuilder(DataSourceOptions options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
new file mode 100644
index 000000000000..855d5efe0c69
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.DataSourceRegister;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * The base interface for v2 data sources which don't have a real catalog. Implementations must
+ * have a public, 0-arg constructor.
+ * <p>
+ * The major responsibility of this interface is to return a {@link Table} for read/write.
+ * </p>
+ */
+@Evolving
+// TODO: do not extend `DataSourceV2`, after we finish the API refactor completely.
+public interface TableProvider extends DataSourceV2 {
+
+  /**
+   * Return a {@link Table} instance to do read/write with user-specified options.
+   *
+   * @param options the user-specified options that can identify a table, e.g. file path, Kafka
+   *                topic name, etc. It's an immutable case-insensitive string-to-string map.
+   */
+  Table getTable(DataSourceOptions options);
+
+  /**
+   * Return a {@link Table} instance to do read/write with user-specified schema and options.
+   * <p>
+   * By default this method throws {@link UnsupportedOperationException}, implementations should
+   * override this method to handle user-specified schema.
+   * </p>
+   * @param options the user-specified options that can identify a table, e.g. file path, Kafka
+   *                topic name, etc. It's an immutable case-insensitive string-to-string map.
+   * @param schema the user-specified schema.
+   * @throws UnsupportedOperationException
+   */
+  default Table getTable(DataSourceOptions options, StructType schema) {
+    String name;
+    if (this instanceof DataSourceRegister) {
+      name = ((DataSourceRegister) this).shortName();
+    } else {
+      name = this.getClass().getName();
+    }
+    throw new UnsupportedOperationException(
+      name + " source does not support user-specified schema");
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
new file mode 100644
index 000000000000..bcfa1983abb8
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * A physical representation of a data source scan for batch queries. This interface is used to
+ * provide physical information, like how many partitions the scanned data has, and how to read
+ * records from the partitions.
+ */
+@Evolving
+public interface Batch {
+
+  /**
+   * Returns a list of {@link InputPartition input partitions}. Each {@link InputPartition}
+   * represents a data split that can be processed by one Spark task. The number of input
+   * partitions returned here is the same as the number of RDD partitions this scan outputs.
+   * <p>
+   * If the {@link Scan} supports filter pushdown, this Batch is likely configured with a filter
+   * and is responsible for creating splits for that filter, which is not a full scan.
+   * </p>
+   * <p>
+   * This method will be called only once during a data source scan, to launch one Spark job.
+   * </p>
+   */
+  InputPartition[] planInputPartitions();
+
+  /**
+   * Returns a factory, which produces one {@link PartitionReader} for one {@link InputPartition}.
+   */
+  PartitionReaderFactory createReaderFactory();
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportPartitioning.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportPartitioning.java
new file mode 100644
index 000000000000..347a465905ac
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportPartitioning.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
+
+/**
+ * A mix in interface for {@link BatchReadSupport}. Data sources can implement this interface to
+ * report data partitioning and try to avoid shuffle at Spark side.
+ *
+ * Note that, when a {@link ReadSupport} implementation creates exactly one {@link InputPartition},
+ * Spark may avoid adding a shuffle even if the reader does not implement this interface.
+ */
+@Evolving
+// TODO: remove it, after we finish the API refactor completely.
+public interface OldSupportsReportPartitioning extends ReadSupport {
+
+  /**
+   * Returns the output data partitioning that this reader guarantees.
+   */
+  Partitioning outputPartitioning(ScanConfig config);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportStatistics.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportStatistics.java
new file mode 100644
index 000000000000..0d3ec17107c1
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportStatistics.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * A mix in interface for {@link BatchReadSupport}. Data sources can implement this interface to
+ * report statistics to Spark.
+ *
+ * As of Spark 2.4, statistics are reported to the optimizer before any operator is pushed to the
+ * data source. Implementations that return more accurate statistics based on pushed operators will
+ * not improve query performance until the planner can push operators before getting stats.
+ */
+@Evolving
+// TODO: remove it, after we finish the API refactor completely.
+public interface OldSupportsReportStatistics extends ReadSupport {
+
+  /**
+   * Returns the estimated statistics of this data source scan.
+   */
+  Statistics estimateStatistics(ScanConfig config);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
new file mode 100644
index 000000000000..4d84fb19aa02
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.sources.v2.SupportsBatchRead;
+import org.apache.spark.sql.sources.v2.Table;
+
+/**
+ * A logical representation of a data source scan. This interface is used to provide logical
+ * information, like what the actual read schema is.
+ * <p>
+ * This logical representation is shared between batch scan, micro-batch streaming scan and
+ * continuous streaming scan. Data sources must implement the corresponding methods in this
+ * interface, to match what the table promises to support. For example, {@link #toBatch()} must be
+ * implemented, if the {@link Table} that creates this {@link Scan} implements
+ * {@link SupportsBatchRead}.
+ * </p>
+ */
+@Evolving
+public interface Scan {
+
+  /**
+   * Returns the actual schema of this data source scan, which may be different from the physical
+   * schema of the underlying storage, as column pruning or other optimizations may happen.
+   */
+  StructType readSchema();
+
+  /**
+   * A description string of this scan, which may includes information like: what filters are
+   * configured for this scan, what's the value of some important options like path, etc. The
+   * description doesn't need to include {@link #readSchema()}, as Spark already knows it.
+   * <p>
+   * By default this returns the class name of the implementation. Please override it to provide a
+   * meaningful description.
+   * </p>
+   */
+  default String description() {
+    return this.getClass().toString();
+  }
+
+  /**
+   * Returns the physical representation of this scan for batch query. By default this method throws
+   * exception, data sources must overwrite this method to provide an implementation, if the
+   * {@link Table} that creates this scan implements {@link SupportsBatchRead}.
+   *
+   * @throws UnsupportedOperationException
+   */
+  default Batch toBatch() {
+    throw new UnsupportedOperationException("Batch scans are not supported");
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java
new file mode 100644
index 000000000000..d4bc1ff97713
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * An interface for building the {@link Scan}. Implementations can mixin SupportsPushDownXYZ
+ * interfaces to do operator pushdown, and keep the operator pushdown result in the returned
+ * {@link Scan}.
+ */
+@Evolving
+public interface ScanBuilder {
+  Scan build();
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfig.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfig.java
index a69872a52774..c8cff68c2ef7 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfig.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfig.java
@@ -28,8 +28,8 @@
  * For APIs that take a {@link ScanConfig} as input, like
  * {@link ReadSupport#planInputPartitions(ScanConfig)},
  * {@link BatchReadSupport#createReaderFactory(ScanConfig)} and
- * {@link SupportsReportStatistics#estimateStatistics(ScanConfig)}, implementations mostly need to
- * cast the input {@link ScanConfig} to the concrete {@link ScanConfig} class of the data source.
+ * {@link OldSupportsReportStatistics#estimateStatistics(ScanConfig)}, implementations mostly need
+ * to cast the input {@link ScanConfig} to the concrete {@link ScanConfig} class of the data source.
  */
 @Evolving
 public interface ScanConfig {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java
index 14776f37fed4..a0b194a41f58 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java
@@ -23,7 +23,7 @@
 
 /**
  * An interface to represent statistics for a data source, which is returned by
- * {@link SupportsReportStatistics#estimateStatistics(ScanConfig)}.
+ * {@link SupportsReportStatistics#estimateStatistics()}.
  */
 @Evolving
 public interface Statistics {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
index 3a89baa1b44c..296d3e47e732 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
@@ -21,11 +21,11 @@
 import org.apache.spark.sql.sources.Filter;
 
 /**
- * A mix-in interface for {@link ScanConfigBuilder}. Data sources can implement this interface to
+ * A mix-in interface for {@link ScanBuilder}. Data sources can implement this interface to
  * push down filters to the data source and reduce the size of the data to be read.
  */
 @Evolving
-public interface SupportsPushDownFilters extends ScanConfigBuilder {
+public interface SupportsPushDownFilters extends ScanBuilder {
 
   /**
    * Pushes down filters, and returns filters that need to be evaluated after scanning.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
index 193476322488..60e71c5dd008 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
@@ -21,12 +21,12 @@
 import org.apache.spark.sql.types.StructType;
 
 /**
- * A mix-in interface for {@link ScanConfigBuilder}. Data sources can implement this
+ * A mix-in interface for {@link ScanBuilder}. Data sources can implement this
  * interface to push down required columns to the data source and only read these columns during
  * scan to reduce the size of the data to be read.
  */
 @Evolving
-public interface SupportsPushDownRequiredColumns extends ScanConfigBuilder {
+public interface SupportsPushDownRequiredColumns extends ScanBuilder {
 
   /**
    * Applies column pruning w.r.t. the given requiredSchema.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
index 0335c7775c2a..ba175812a88d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
@@ -21,17 +21,17 @@
 import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
 
 /**
- * A mix in interface for {@link BatchReadSupport}. Data sources can implement this interface to
+ * A mix in interface for {@link Batch}. Data sources can implement this interface to
  * report data partitioning and try to avoid shuffle at Spark side.
  *
- * Note that, when a {@link ReadSupport} implementation creates exactly one {@link InputPartition},
+ * Note that, when a {@link Batch} implementation creates exactly one {@link InputPartition},
  * Spark may avoid adding a shuffle even if the reader does not implement this interface.
  */
 @Evolving
-public interface SupportsReportPartitioning extends ReadSupport {
+public interface SupportsReportPartitioning extends Batch {
 
   /**
    * Returns the output data partitioning that this reader guarantees.
    */
-  Partitioning outputPartitioning(ScanConfig config);
+  Partitioning outputPartitioning();
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
index 917372cdd25b..d9f5fb64083a 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
@@ -20,7 +20,7 @@
 import org.apache.spark.annotation.Evolving;
 
 /**
- * A mix in interface for {@link BatchReadSupport}. Data sources can implement this interface to
+ * A mix in interface for {@link Batch}. Data sources can implement this interface to
  * report statistics to Spark.
  *
  * As of Spark 2.4, statistics are reported to the optimizer before any operator is pushed to the
@@ -28,10 +28,10 @@
  * not improve query performance until the planner can push operators before getting stats.
  */
 @Evolving
-public interface SupportsReportStatistics extends ReadSupport {
+public interface SupportsReportStatistics extends Batch {
 
   /**
    * Returns the estimated statistics of this data source scan.
    */
-  Statistics estimateStatistics(ScanConfig config);
+  Statistics estimateStatistics();
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java
index c9a00262c128..c7370eb3d38a 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java
@@ -19,12 +19,11 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.ScanConfig;
 import org.apache.spark.sql.sources.v2.reader.SupportsReportPartitioning;
 
 /**
  * An interface to represent the output data partitioning for a data source, which is returned by
- * {@link SupportsReportPartitioning#outputPartitioning(ScanConfig)}. Note that this should work
+ * {@link SupportsReportPartitioning#outputPartitioning()}. Note that this should work
  * like a snapshot. Once created, it should be deterministic and always report the same number of
  * partitions and the same "satisfy" result for a certain distribution.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index da88598eed06..661fe98d8c90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.execution.datasources.jdbc._
 import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
-import org.apache.spark.sql.sources.v2.{BatchReadSupportProvider, DataSourceOptions, DataSourceV2}
+import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.types.{StringType, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -194,20 +194,26 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     }
 
     val cls = DataSource.lookupDataSource(source, sparkSession.sessionState.conf)
-    if (classOf[DataSourceV2].isAssignableFrom(cls)) {
-      val ds = cls.getConstructor().newInstance().asInstanceOf[DataSourceV2]
-      if (ds.isInstanceOf[BatchReadSupportProvider]) {
-        val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
-          ds = ds, conf = sparkSession.sessionState.conf)
-        val pathsOption = {
-          val objectMapper = new ObjectMapper()
-          DataSourceOptions.PATHS_KEY -> objectMapper.writeValueAsString(paths.toArray)
-        }
-        Dataset.ofRows(sparkSession, DataSourceV2Relation.create(
-          ds, sessionOptions ++ extraOptions.toMap + pathsOption,
-          userSpecifiedSchema = userSpecifiedSchema))
-      } else {
-        loadV1Source(paths: _*)
+    if (classOf[TableProvider].isAssignableFrom(cls)) {
+      val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
+      val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
+        ds = provider, conf = sparkSession.sessionState.conf)
+      val pathsOption = {
+        val objectMapper = new ObjectMapper()
+        DataSourceOptions.PATHS_KEY -> objectMapper.writeValueAsString(paths.toArray)
+      }
+      val finalOptions = sessionOptions ++ extraOptions.toMap + pathsOption
+      val dsOptions = new DataSourceOptions(finalOptions.asJava)
+      val table = userSpecifiedSchema match {
+        case Some(schema) => provider.getTable(dsOptions, schema)
+        case _ => provider.getTable(dsOptions)
+      }
+      table match {
+        case s: SupportsBatchRead =>
+          Dataset.ofRows(sparkSession, DataSourceV2Relation.create(
+            provider, s, finalOptions, userSpecifiedSchema = userSpecifiedSchema))
+
+        case _ => loadV1Source(paths: _*)
       }
     } else {
       loadV1Source(paths: _*)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 5a807d3d4b93..b9c4076994e9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -252,7 +252,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           val options = sessionOptions ++ extraOptions
 
           if (mode == SaveMode.Append) {
-            val relation = DataSourceV2Relation.create(source, options)
+            val relation = DataSourceV2Relation.createRelationForWrite(source, options)
             runCommand(df.sparkSession, "save") {
               AppendData.byName(relation, df.logicalPlan)
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index f7e29593a635..0a6b0afe6cfe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -22,13 +22,13 @@ import java.util.UUID
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2.{BatchReadSupportProvider, BatchWriteSupportProvider, DataSourceOptions, DataSourceV2}
-import org.apache.spark.sql.sources.v2.reader.{BatchReadSupport, ReadSupport, ScanConfigBuilder, SupportsReportStatistics}
+import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.writer.BatchWriteSupport
 import org.apache.spark.sql.types.StructType
 
@@ -40,32 +40,38 @@ import org.apache.spark.sql.types.StructType
  * @param userSpecifiedSchema The user-specified schema for this scan.
  */
 case class DataSourceV2Relation(
-    source: DataSourceV2,
-    readSupport: BatchReadSupport,
+    // TODO: remove `source` when we finish API refactor for write.
+    source: TableProvider,
+    table: SupportsBatchRead,
     output: Seq[AttributeReference],
     options: Map[String, String],
-    tableIdent: Option[TableIdentifier] = None,
     userSpecifiedSchema: Option[StructType] = None)
-  extends LeafNode with MultiInstanceRelation with NamedRelation with DataSourceV2StringFormat {
+  extends LeafNode with MultiInstanceRelation with NamedRelation {
 
   import DataSourceV2Relation._
 
-  override def name: String = {
-    tableIdent.map(_.unquotedString).getOrElse(s"${source.name}:unknown")
-  }
-
-  override def pushedFilters: Seq[Expression] = Seq.empty
+  override def name: String = table.name()
 
-  override def simpleString: String = "RelationV2 " + metadataString
+  override def simpleString: String = {
+    s"RelationV2${truncatedString(output, "[", ", ", "]")} $name"
+  }
 
   def newWriteSupport(): BatchWriteSupport = source.createWriteSupport(options, schema)
 
-  override def computeStats(): Statistics = readSupport match {
-    case r: SupportsReportStatistics =>
-      val statistics = r.estimateStatistics(readSupport.newScanConfigBuilder().build())
-      Statistics(sizeInBytes = statistics.sizeInBytes().orElse(conf.defaultSizeInBytes))
-    case _ =>
-      Statistics(sizeInBytes = conf.defaultSizeInBytes)
+  def newScanBuilder(): ScanBuilder = {
+    val dsOptions = new DataSourceOptions(options.asJava)
+    table.newScanBuilder(dsOptions)
+  }
+
+  override def computeStats(): Statistics = {
+    val scan = newScanBuilder().build()
+    scan match {
+      case r: SupportsReportStatistics =>
+        val statistics = r.estimateStatistics()
+        Statistics(sizeInBytes = statistics.sizeInBytes().orElse(conf.defaultSizeInBytes))
+      case _ =>
+        Statistics(sizeInBytes = conf.defaultSizeInBytes)
+    }
   }
 
   override def newInstance(): DataSourceV2Relation = {
@@ -109,7 +115,7 @@ case class StreamingDataSourceV2Relation(
   }
 
   override def computeStats(): Statistics = readSupport match {
-    case r: SupportsReportStatistics =>
+    case r: OldSupportsReportStatistics =>
       val statistics = r.estimateStatistics(scanConfigBuilder.build())
       Statistics(sizeInBytes = statistics.sizeInBytes().orElse(conf.defaultSizeInBytes))
     case _ =>
@@ -119,15 +125,6 @@ case class StreamingDataSourceV2Relation(
 
 object DataSourceV2Relation {
   private implicit class SourceHelpers(source: DataSourceV2) {
-    def asReadSupportProvider: BatchReadSupportProvider = {
-      source match {
-        case provider: BatchReadSupportProvider =>
-          provider
-        case _ =>
-          throw new AnalysisException(s"Data source is not readable: $name")
-      }
-    }
-
     def asWriteSupportProvider: BatchWriteSupportProvider = {
       source match {
         case provider: BatchWriteSupportProvider =>
@@ -146,18 +143,6 @@ object DataSourceV2Relation {
       }
     }
 
-    def createReadSupport(
-        options: Map[String, String],
-        userSpecifiedSchema: Option[StructType]): BatchReadSupport = {
-      val v2Options = new DataSourceOptions(options.asJava)
-      userSpecifiedSchema match {
-        case Some(s) =>
-          asReadSupportProvider.createBatchReadSupport(s, v2Options)
-        case _ =>
-          asReadSupportProvider.createBatchReadSupport(v2Options)
-      }
-    }
-
     def createWriteSupport(
         options: Map[String, String],
         schema: StructType): BatchWriteSupport = {
@@ -170,20 +155,21 @@ object DataSourceV2Relation {
   }
 
   def create(
-      source: DataSourceV2,
+      provider: TableProvider,
+      table: SupportsBatchRead,
       options: Map[String, String],
-      tableIdent: Option[TableIdentifier] = None,
       userSpecifiedSchema: Option[StructType] = None): DataSourceV2Relation = {
-    val readSupport = source.createReadSupport(options, userSpecifiedSchema)
-    val output = readSupport.fullSchema().toAttributes
-    val ident = tableIdent.orElse(tableFromOptions(options))
-    DataSourceV2Relation(
-      source, readSupport, output, options, ident, userSpecifiedSchema)
+    val output = table.schema().toAttributes
+    DataSourceV2Relation(provider, table, output, options, userSpecifiedSchema)
   }
 
-  private def tableFromOptions(options: Map[String, String]): Option[TableIdentifier] = {
-    options
-      .get(DataSourceOptions.TABLE_KEY)
-      .map(TableIdentifier(_, options.get(DataSourceOptions.DATABASE_KEY)))
+  // TODO: remove this when we finish API refactor for write.
+  def createRelationForWrite(
+      source: DataSourceV2,
+      options: Map[String, String]): DataSourceV2Relation = {
+    val provider = source.asInstanceOf[TableProvider]
+    val dsOptions = new DataSourceOptions(options.asJava)
+    val table = provider.getTable(dsOptions)
+    create(provider, table.asInstanceOf[SupportsBatchRead], options)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
index 25f86a66a826..725bcc3af3ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
@@ -22,60 +22,47 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
+import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.{ColumnarBatchScan, LeafExecNode, WholeStageCodegenExec}
-import org.apache.spark.sql.execution.streaming.continuous._
-import org.apache.spark.sql.sources.v2.DataSourceV2
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReaderFactory, ContinuousReadSupport, MicroBatchReadSupport}
 
 /**
- * Physical plan node for scanning data from a data source.
+ * Physical plan node for scanning a batch of data from a data source.
  */
 case class DataSourceV2ScanExec(
     output: Seq[AttributeReference],
-    @transient source: DataSourceV2,
-    @transient options: Map[String, String],
-    @transient pushedFilters: Seq[Expression],
-    @transient readSupport: ReadSupport,
-    @transient scanConfig: ScanConfig)
-  extends LeafExecNode with DataSourceV2StringFormat with ColumnarBatchScan {
+    scanDesc: String,
+    @transient batch: Batch)
+  extends LeafExecNode with ColumnarBatchScan {
 
-  override def simpleString: String = "ScanV2 " + metadataString
+  override def simpleString: String = {
+    s"ScanV2${truncatedString(output, "[", ", ", "]")} $scanDesc"
+  }
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
-    case other: DataSourceV2ScanExec =>
-      output == other.output && readSupport.getClass == other.readSupport.getClass &&
-        options == other.options
+    case other: DataSourceV2ScanExec => this.batch == other.batch
     case _ => false
   }
 
-  override def hashCode(): Int = {
-    Seq(output, source, options).hashCode()
-  }
+  override def hashCode(): Int = batch.hashCode()
+
+  private lazy val partitions = batch.planInputPartitions()
+
+  private lazy val readerFactory = batch.createReaderFactory()
 
-  override def outputPartitioning: physical.Partitioning = readSupport match {
+  override def outputPartitioning: physical.Partitioning = batch match {
     case _ if partitions.length == 1 =>
       SinglePartition
 
     case s: SupportsReportPartitioning =>
       new DataSourcePartitioning(
-        s.outputPartitioning(scanConfig), AttributeMap(output.map(a => a -> a.name)))
+        s.outputPartitioning(), AttributeMap(output.map(a => a -> a.name)))
 
     case _ => super.outputPartitioning
   }
 
-  private lazy val partitions: Seq[InputPartition] = readSupport.planInputPartitions(scanConfig)
-
-  private lazy val readerFactory = readSupport match {
-    case r: BatchReadSupport => r.createReaderFactory(scanConfig)
-    case r: MicroBatchReadSupport => r.createReaderFactory(scanConfig)
-    case r: ContinuousReadSupport => r.createContinuousReaderFactory(scanConfig)
-    case _ => throw new IllegalStateException("unknown read support: " + readSupport)
-  }
-
-  // TODO: clean this up when we have dedicated scan plan for continuous streaming.
-  override val supportsBatch: Boolean = {
+  override def supportsBatch: Boolean = {
     require(partitions.forall(readerFactory.supportColumnarReads) ||
       !partitions.exists(readerFactory.supportColumnarReads),
       "Cannot mix row-based and columnar input partitions.")
@@ -83,25 +70,8 @@ case class DataSourceV2ScanExec(
     partitions.exists(readerFactory.supportColumnarReads)
   }
 
-  private lazy val inputRDD: RDD[InternalRow] = readSupport match {
-    case _: ContinuousReadSupport =>
-      assert(!supportsBatch,
-        "continuous stream reader does not support columnar read yet.")
-      EpochCoordinatorRef.get(
-          sparkContext.getLocalProperty(ContinuousExecution.EPOCH_COORDINATOR_ID_KEY),
-          sparkContext.env)
-        .askSync[Unit](SetReaderPartitions(partitions.size))
-      new ContinuousDataSourceRDD(
-        sparkContext,
-        sqlContext.conf.continuousStreamingExecutorQueueSize,
-        sqlContext.conf.continuousStreamingExecutorPollIntervalMs,
-        partitions,
-        schema,
-        readerFactory.asInstanceOf[ContinuousPartitionReaderFactory])
-
-    case _ =>
-      new DataSourceRDD(
-        sparkContext, partitions, readerFactory.asInstanceOf[PartitionReaderFactory], supportsBatch)
+  private lazy val inputRDD: RDD[InternalRow] = {
+    new DataSourceRDD(sparkContext, partitions, readerFactory, supportsBatch)
   }
 
   override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(inputRDD)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 9a3109e7c199..2e26fce880b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -37,9 +37,9 @@ object DataSourceV2Strategy extends Strategy {
    * @return pushed filter and post-scan filters.
    */
   private def pushFilters(
-      configBuilder: ScanConfigBuilder,
+      scanBuilder: ScanBuilder,
       filters: Seq[Expression]): (Seq[Expression], Seq[Expression]) = {
-    configBuilder match {
+    scanBuilder match {
       case r: SupportsPushDownFilters =>
         // A map from translated data source filters to original catalyst filter expressions.
         val translatedFilterToExpr = mutable.HashMap.empty[sources.Filter, Expression]
@@ -76,18 +76,18 @@ object DataSourceV2Strategy extends Strategy {
    */
   // TODO: nested column pruning.
   private def pruneColumns(
-      configBuilder: ScanConfigBuilder,
+      scanBuilder: ScanBuilder,
       relation: DataSourceV2Relation,
-      exprs: Seq[Expression]): (ScanConfig, Seq[AttributeReference]) = {
-    configBuilder match {
+      exprs: Seq[Expression]): (Scan, Seq[AttributeReference]) = {
+    scanBuilder match {
       case r: SupportsPushDownRequiredColumns =>
         val requiredColumns = AttributeSet(exprs.flatMap(_.references))
         val neededOutput = relation.output.filter(requiredColumns.contains)
         if (neededOutput != relation.output) {
           r.pruneColumns(neededOutput.toStructType)
-          val config = r.build()
+          val scan = r.build()
           val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap
-          config -> config.readSchema().toAttributes.map {
+          scan -> scan.readSchema().toAttributes.map {
             // We have to keep the attribute id during transformation.
             a => a.withExprId(nameToAttr(a.name).exprId)
           }
@@ -95,19 +95,19 @@ object DataSourceV2Strategy extends Strategy {
           r.build() -> relation.output
         }
 
-      case _ => configBuilder.build() -> relation.output
+      case _ => scanBuilder.build() -> relation.output
     }
   }
 
 
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
     case PhysicalOperation(project, filters, relation: DataSourceV2Relation) =>
-      val configBuilder = relation.readSupport.newScanConfigBuilder()
+      val scanBuilder = relation.newScanBuilder()
       // `pushedFilters` will be pushed down and evaluated in the underlying data sources.
       // `postScanFilters` need to be evaluated after the scan.
       // `postScanFilters` and `pushedFilters` can overlap, e.g. the parquet row group filter.
-      val (pushedFilters, postScanFilters) = pushFilters(configBuilder, filters)
-      val (config, output) = pruneColumns(configBuilder, relation, project ++ postScanFilters)
+      val (pushedFilters, postScanFilters) = pushFilters(scanBuilder, filters)
+      val (scan, output) = pruneColumns(scanBuilder, relation, project ++ postScanFilters)
       logInfo(
         s"""
            |Pushing operators to ${relation.source.getClass}
@@ -116,16 +116,10 @@ object DataSourceV2Strategy extends Strategy {
            |Output: ${output.mkString(", ")}
          """.stripMargin)
 
-      val scan = DataSourceV2ScanExec(
-        output,
-        relation.source,
-        relation.options,
-        pushedFilters,
-        relation.readSupport,
-        config)
+      val plan = DataSourceV2ScanExec(output, scan.description(), scan.toBatch)
 
       val filterCondition = postScanFilters.reduceLeftOption(And)
-      val withFilter = filterCondition.map(FilterExec(_, scan)).getOrElse(scan)
+      val withFilter = filterCondition.map(FilterExec(_, plan)).getOrElse(plan)
 
       // always add the projection, which will produce unsafe rows required by some operators
       ProjectExec(project, withFilter) :: Nil
@@ -135,7 +129,7 @@ object DataSourceV2Strategy extends Strategy {
       val scanConfig = r.scanConfigBuilder.build()
       // ensure there is a projection, which will produce unsafe rows required by some operators
       ProjectExec(r.output,
-        DataSourceV2ScanExec(
+        DataSourceV2StreamingScanExec(
           r.output, r.source, r.options, r.pushedFilters, r.readSupport, scanConfig)) :: Nil
 
     case WriteToDataSourceV2(writer, query) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala
new file mode 100644
index 000000000000..c87294090996
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical
+import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
+import org.apache.spark.sql.execution.{ColumnarBatchScan, LeafExecNode, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.streaming.continuous._
+import org.apache.spark.sql.sources.v2.DataSourceV2
+import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReaderFactory, ContinuousReadSupport, MicroBatchReadSupport}
+
+/**
+ * Physical plan node for scanning data from a data source.
+ */
+// TODO: micro-batch should be handled by `DataSourceV2ScanExec`, after we finish the API refactor
+// completely.
+case class DataSourceV2StreamingScanExec(
+    output: Seq[AttributeReference],
+    @transient source: DataSourceV2,
+    @transient options: Map[String, String],
+    @transient pushedFilters: Seq[Expression],
+    @transient readSupport: ReadSupport,
+    @transient scanConfig: ScanConfig)
+  extends LeafExecNode with DataSourceV2StringFormat with ColumnarBatchScan {
+
+  override def simpleString: String = "ScanV2 " + metadataString
+
+  // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
+  override def equals(other: Any): Boolean = other match {
+    case other: DataSourceV2StreamingScanExec =>
+      output == other.output && readSupport.getClass == other.readSupport.getClass &&
+        options == other.options
+    case _ => false
+  }
+
+  override def hashCode(): Int = {
+    Seq(output, source, options).hashCode()
+  }
+
+  override def outputPartitioning: physical.Partitioning = readSupport match {
+    case _ if partitions.length == 1 =>
+      SinglePartition
+
+    case s: OldSupportsReportPartitioning =>
+      new DataSourcePartitioning(
+        s.outputPartitioning(scanConfig), AttributeMap(output.map(a => a -> a.name)))
+
+    case _ => super.outputPartitioning
+  }
+
+  private lazy val partitions: Seq[InputPartition] = readSupport.planInputPartitions(scanConfig)
+
+  private lazy val readerFactory = readSupport match {
+    case r: MicroBatchReadSupport => r.createReaderFactory(scanConfig)
+    case r: ContinuousReadSupport => r.createContinuousReaderFactory(scanConfig)
+    case _ => throw new IllegalStateException("unknown read support: " + readSupport)
+  }
+
+  override val supportsBatch: Boolean = {
+    require(partitions.forall(readerFactory.supportColumnarReads) ||
+      !partitions.exists(readerFactory.supportColumnarReads),
+      "Cannot mix row-based and columnar input partitions.")
+
+    partitions.exists(readerFactory.supportColumnarReads)
+  }
+
+  private lazy val inputRDD: RDD[InternalRow] = readSupport match {
+    case _: ContinuousReadSupport =>
+      assert(!supportsBatch,
+        "continuous stream reader does not support columnar read yet.")
+      EpochCoordinatorRef.get(
+          sparkContext.getLocalProperty(ContinuousExecution.EPOCH_COORDINATOR_ID_KEY),
+          sparkContext.env)
+        .askSync[Unit](SetReaderPartitions(partitions.size))
+      new ContinuousDataSourceRDD(
+        sparkContext,
+        sqlContext.conf.continuousStreamingExecutorQueueSize,
+        sqlContext.conf.continuousStreamingExecutorPollIntervalMs,
+        partitions,
+        schema,
+        readerFactory.asInstanceOf[ContinuousPartitionReaderFactory])
+
+    case _ =>
+      new DataSourceRDD(
+        sparkContext, partitions, readerFactory.asInstanceOf[PartitionReaderFactory], supportsBatch)
+  }
+
+  override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(inputRDD)
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    if (supportsBatch) {
+      WholeStageCodegenExec(this)(codegenStageId = 0).execute()
+    } else {
+      val numOutputRows = longMetric("numOutputRows")
+      inputRDD.map { r =>
+        numOutputRows += 1
+        r
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 392229bcb5f5..6a22f0cc8431 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExec
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2StreamingScanExec
 import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReadSupport
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
@@ -256,7 +256,7 @@ trait ProgressReporter extends Logging {
       // (can happen with self-unions or self-joins). This means the source is scanned multiple
       // times in the query, we should count the numRows for each scan.
       val sourceToInputRowsTuples = lastExecution.executedPlan.collect {
-        case s: DataSourceV2ScanExec if s.readSupport.isInstanceOf[BaseStreamingSource] =>
+        case s: DataSourceV2StreamingScanExec if s.readSupport.isInstanceOf[BaseStreamingSource] =>
           val numRows = s.metrics.get("numOutputRows").map(_.value).getOrElse(0L)
           val source = s.readSupport.asInstanceOf[BaseStreamingSource]
           source -> numRows
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 1eab55122e84..af23c5cd3d80 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Curre
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanExec, StreamingDataSourceV2Relation}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2StreamingScanExec, StreamingDataSourceV2Relation}
 import org.apache.spark.sql.execution.streaming.{ContinuousExecutionRelation, StreamingRelationV2, _}
 import org.apache.spark.sql.sources.v2
 import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions, StreamingWriteSupportProvider}
@@ -206,7 +206,8 @@ class ContinuousExecution(
     }
 
     val (readSupport, scanConfig) = lastExecution.executedPlan.collect {
-      case scan: DataSourceV2ScanExec if scan.readSupport.isInstanceOf[ContinuousReadSupport] =>
+      case scan: DataSourceV2StreamingScanExec
+          if scan.readSupport.isInstanceOf[ContinuousReadSupport] =>
         scan.readSupport.asInstanceOf[ContinuousReadSupport] -> scan.scanConfig
     }.head
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
index 5602310219a7..2612b6185fd4 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
@@ -24,62 +24,29 @@
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
 import org.apache.spark.sql.sources.Filter;
 import org.apache.spark.sql.sources.GreaterThan;
-import org.apache.spark.sql.sources.v2.BatchReadSupportProvider;
 import org.apache.spark.sql.sources.v2.DataSourceOptions;
-import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
 
-public class JavaAdvancedDataSourceV2 implements DataSourceV2, BatchReadSupportProvider {
+public class JavaAdvancedDataSourceV2 implements TableProvider {
 
-  public class ReadSupport extends JavaSimpleReadSupport {
-    @Override
-    public ScanConfigBuilder newScanConfigBuilder() {
-      return new AdvancedScanConfigBuilder();
-    }
-
-    @Override
-    public InputPartition[] planInputPartitions(ScanConfig config) {
-      Filter[] filters = ((AdvancedScanConfigBuilder) config).filters;
-      List<InputPartition> res = new ArrayList<>();
-
-      Integer lowerBound = null;
-      for (Filter filter : filters) {
-        if (filter instanceof GreaterThan) {
-          GreaterThan f = (GreaterThan) filter;
-          if ("i".equals(f.attribute()) && f.value() instanceof Integer) {
-            lowerBound = (Integer) f.value();
-            break;
-          }
-        }
-      }
-
-      if (lowerBound == null) {
-        res.add(new JavaRangeInputPartition(0, 5));
-        res.add(new JavaRangeInputPartition(5, 10));
-      } else if (lowerBound < 4) {
-        res.add(new JavaRangeInputPartition(lowerBound + 1, 5));
-        res.add(new JavaRangeInputPartition(5, 10));
-      } else if (lowerBound < 9) {
-        res.add(new JavaRangeInputPartition(lowerBound + 1, 10));
+  @Override
+  public Table getTable(DataSourceOptions options) {
+    return new JavaSimpleBatchTable() {
+      @Override
+      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+        return new AdvancedScanBuilder();
       }
-
-      return res.stream().toArray(InputPartition[]::new);
-    }
-
-    @Override
-    public PartitionReaderFactory createReaderFactory(ScanConfig config) {
-      StructType requiredSchema = ((AdvancedScanConfigBuilder) config).requiredSchema;
-      return new AdvancedReaderFactory(requiredSchema);
-    }
+    };
   }
 
-  public static class AdvancedScanConfigBuilder implements ScanConfigBuilder, ScanConfig,
+  static class AdvancedScanBuilder implements ScanBuilder, Scan,
     SupportsPushDownFilters, SupportsPushDownRequiredColumns {
 
-    // Exposed for testing.
-    public StructType requiredSchema = new StructType().add("i", "int").add("j", "int");
-    public Filter[] filters = new Filter[0];
+    private StructType requiredSchema = new StructType().add("i", "int").add("j", "int");
+    private Filter[] filters = new Filter[0];
 
     @Override
     public void pruneColumns(StructType requiredSchema) {
@@ -121,9 +88,58 @@ public Filter[] pushedFilters() {
     }
 
     @Override
-    public ScanConfig build() {
+    public Scan build() {
       return this;
     }
+
+    @Override
+    public Batch toBatch() {
+      return new AdvancedBatch(requiredSchema, filters);
+    }
+  }
+
+  public static class AdvancedBatch implements Batch {
+    // Exposed for testing.
+    public StructType requiredSchema;
+    public Filter[] filters;
+
+    AdvancedBatch(StructType requiredSchema, Filter[] filters) {
+      this.requiredSchema = requiredSchema;
+      this.filters = filters;
+    }
+
+    @Override
+    public InputPartition[] planInputPartitions() {
+      List<InputPartition> res = new ArrayList<>();
+
+      Integer lowerBound = null;
+      for (Filter filter : filters) {
+        if (filter instanceof GreaterThan) {
+          GreaterThan f = (GreaterThan) filter;
+          if ("i".equals(f.attribute()) && f.value() instanceof Integer) {
+            lowerBound = (Integer) f.value();
+            break;
+          }
+        }
+      }
+
+      if (lowerBound == null) {
+        res.add(new JavaRangeInputPartition(0, 5));
+        res.add(new JavaRangeInputPartition(5, 10));
+      } else if (lowerBound < 4) {
+        res.add(new JavaRangeInputPartition(lowerBound + 1, 5));
+        res.add(new JavaRangeInputPartition(5, 10));
+      } else if (lowerBound < 9) {
+        res.add(new JavaRangeInputPartition(lowerBound + 1, 10));
+      }
+
+      return res.stream().toArray(InputPartition[]::new);
+    }
+
+    @Override
+    public PartitionReaderFactory createReaderFactory() {
+      return new AdvancedReaderFactory(requiredSchema);
+    }
   }
 
   static class AdvancedReaderFactory implements PartitionReaderFactory {
@@ -165,10 +181,4 @@ public void close() throws IOException {
       };
     }
   }
-
-
-  @Override
-  public BatchReadSupport createBatchReadSupport(DataSourceOptions options) {
-    return new ReadSupport();
-  }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
index 28a933039831..d72ab5338aa8 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
@@ -21,21 +21,21 @@
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
-import org.apache.spark.sql.sources.v2.BatchReadSupportProvider;
 import org.apache.spark.sql.sources.v2.DataSourceOptions;
-import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
 
-public class JavaColumnarDataSourceV2 implements DataSourceV2, BatchReadSupportProvider {
+public class JavaColumnarDataSourceV2 implements TableProvider {
 
-  class ReadSupport extends JavaSimpleReadSupport {
+  class MyScanBuilder extends JavaSimpleScanBuilder {
 
     @Override
-    public InputPartition[] planInputPartitions(ScanConfig config) {
+    public InputPartition[] planInputPartitions() {
       InputPartition[] partitions = new InputPartition[2];
       partitions[0] = new JavaRangeInputPartition(0, 50);
       partitions[1] = new JavaRangeInputPartition(50, 90);
@@ -43,11 +43,21 @@ public InputPartition[] planInputPartitions(ScanConfig config) {
     }
 
     @Override
-    public PartitionReaderFactory createReaderFactory(ScanConfig config) {
+    public PartitionReaderFactory createReaderFactory() {
       return new ColumnarReaderFactory();
     }
   }
 
+  @Override
+  public Table getTable(DataSourceOptions options) {
+    return new JavaSimpleBatchTable() {
+      @Override
+      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+        return new MyScanBuilder();
+      }
+    };
+  }
+
   static class ColumnarReaderFactory implements PartitionReaderFactory {
     private static final int BATCH_SIZE = 20;
 
@@ -106,9 +116,4 @@ public void close() throws IOException {
       };
     }
   }
-
-  @Override
-  public BatchReadSupport createBatchReadSupport(DataSourceOptions options) {
-    return new ReadSupport();
-  }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
index 18a11dde8219..a513bfb26ef1 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
@@ -22,18 +22,20 @@
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.sources.v2.*;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.sources.v2.reader.partitioning.ClusteredDistribution;
 import org.apache.spark.sql.sources.v2.reader.partitioning.Distribution;
 import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
 
-public class JavaPartitionAwareDataSource implements DataSourceV2, BatchReadSupportProvider {
+public class JavaPartitionAwareDataSource implements TableProvider {
 
-  class ReadSupport extends JavaSimpleReadSupport implements SupportsReportPartitioning {
+  class MyScanBuilder extends JavaSimpleScanBuilder implements SupportsReportPartitioning {
 
     @Override
-    public InputPartition[] planInputPartitions(ScanConfig config) {
+    public InputPartition[] planInputPartitions() {
       InputPartition[] partitions = new InputPartition[2];
       partitions[0] = new SpecificInputPartition(new int[]{1, 1, 3}, new int[]{4, 4, 6});
       partitions[1] = new SpecificInputPartition(new int[]{2, 4, 4}, new int[]{6, 2, 2});
@@ -41,16 +43,26 @@ public InputPartition[] planInputPartitions(ScanConfig config) {
     }
 
     @Override
-    public PartitionReaderFactory createReaderFactory(ScanConfig config) {
+    public PartitionReaderFactory createReaderFactory() {
       return new SpecificReaderFactory();
     }
 
     @Override
-    public Partitioning outputPartitioning(ScanConfig config) {
+    public Partitioning outputPartitioning() {
       return new MyPartitioning();
     }
   }
 
+  @Override
+  public Table getTable(DataSourceOptions options) {
+    return new JavaSimpleBatchTable() {
+      @Override
+      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+        return new MyScanBuilder();
+      }
+    };
+  }
+
   static class MyPartitioning implements Partitioning {
 
     @Override
@@ -106,9 +118,4 @@ public void close() throws IOException {
       };
     }
   }
-
-  @Override
-  public BatchReadSupport createBatchReadSupport(DataSourceOptions options) {
-    return new ReadSupport();
-  }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
index cc9ac04a0dad..815d57ba9413 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
@@ -17,39 +17,51 @@
 
 package test.org.apache.spark.sql.sources.v2;
 
-import org.apache.spark.sql.sources.v2.BatchReadSupportProvider;
 import org.apache.spark.sql.sources.v2.DataSourceOptions;
-import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
 
-public class JavaSchemaRequiredDataSource implements DataSourceV2, BatchReadSupportProvider {
+public class JavaSchemaRequiredDataSource implements TableProvider {
 
-  class ReadSupport extends JavaSimpleReadSupport {
-    private final StructType schema;
+  class MyScanBuilder extends JavaSimpleScanBuilder {
 
-    ReadSupport(StructType schema) {
+    private StructType schema;
+
+    MyScanBuilder(StructType schema) {
       this.schema = schema;
     }
 
     @Override
-    public StructType fullSchema() {
+    public StructType readSchema() {
       return schema;
     }
 
     @Override
-    public InputPartition[] planInputPartitions(ScanConfig config) {
+    public InputPartition[] planInputPartitions() {
       return new InputPartition[0];
     }
   }
 
   @Override
-  public BatchReadSupport createBatchReadSupport(DataSourceOptions options) {
-    throw new IllegalArgumentException("requires a user-supplied schema");
+  public Table getTable(DataSourceOptions options, StructType schema) {
+    return new JavaSimpleBatchTable() {
+
+      @Override
+      public StructType schema() {
+        return schema;
+      }
+
+      @Override
+      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+        return new MyScanBuilder(schema);
+      }
+    };
   }
 
   @Override
-  public BatchReadSupport createBatchReadSupport(StructType schema, DataSourceOptions options) {
-    return new ReadSupport(schema);
+  public Table getTable(DataSourceOptions options) {
+    throw new IllegalArgumentException("requires a user-supplied schema");
   }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReadSupport.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
similarity index 78%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReadSupport.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
index ced51dde6997..cb5954d5a621 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReadSupport.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
@@ -21,43 +21,44 @@
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.sources.v2.SupportsBatchRead;
+import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
 
-abstract class JavaSimpleReadSupport implements BatchReadSupport {
+abstract class JavaSimpleBatchTable implements Table, SupportsBatchRead {
 
   @Override
-  public StructType fullSchema() {
+  public StructType schema() {
     return new StructType().add("i", "int").add("j", "int");
   }
 
   @Override
-  public ScanConfigBuilder newScanConfigBuilder() {
-    return new JavaNoopScanConfigBuilder(fullSchema());
-  }
-
-  @Override
-  public PartitionReaderFactory createReaderFactory(ScanConfig config) {
-    return new JavaSimpleReaderFactory();
+  public String name() {
+    return this.getClass().toString();
   }
 }
 
-class JavaNoopScanConfigBuilder implements ScanConfigBuilder, ScanConfig {
-
-  private StructType schema;
+abstract class JavaSimpleScanBuilder implements ScanBuilder, Scan, Batch {
 
-  JavaNoopScanConfigBuilder(StructType schema) {
-    this.schema = schema;
+  @Override
+  public Scan build() {
+    return this;
   }
 
   @Override
-  public ScanConfig build() {
+  public Batch toBatch() {
     return this;
   }
 
   @Override
   public StructType readSchema() {
-    return schema;
+    return new StructType().add("i", "int").add("j", "int");
+  }
+
+  @Override
+  public PartitionReaderFactory createReaderFactory() {
+    return new JavaSimpleReaderFactory();
   }
 }
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
index 2cdbba84ec4a..852c4546df88 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
@@ -17,17 +17,17 @@
 
 package test.org.apache.spark.sql.sources.v2;
 
-import org.apache.spark.sql.sources.v2.BatchReadSupportProvider;
-import org.apache.spark.sql.sources.v2.DataSourceV2;
 import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 
-public class JavaSimpleDataSourceV2 implements DataSourceV2, BatchReadSupportProvider {
+public class JavaSimpleDataSourceV2 implements TableProvider {
 
-  class ReadSupport extends JavaSimpleReadSupport {
+  class MyScanBuilder extends JavaSimpleScanBuilder {
 
     @Override
-    public InputPartition[] planInputPartitions(ScanConfig config) {
+    public InputPartition[] planInputPartitions() {
       InputPartition[] partitions = new InputPartition[2];
       partitions[0] = new JavaRangeInputPartition(0, 5);
       partitions[1] = new JavaRangeInputPartition(5, 10);
@@ -36,7 +36,12 @@ public InputPartition[] planInputPartitions(ScanConfig config) {
   }
 
   @Override
-  public BatchReadSupport createBatchReadSupport(DataSourceOptions options) {
-    return new ReadSupport();
+  public Table getTable(DataSourceOptions options) {
+    return new JavaSimpleBatchTable() {
+      @Override
+      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+        return new MyScanBuilder();
+      }
+    };
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index e8f291af13ba..d282193d35d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -38,18 +38,17 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
 class DataSourceV2Suite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
-  private def getScanConfig(query: DataFrame): AdvancedScanConfigBuilder = {
+  private def getBatch(query: DataFrame): AdvancedBatch = {
     query.queryExecution.executedPlan.collect {
       case d: DataSourceV2ScanExec =>
-        d.scanConfig.asInstanceOf[AdvancedScanConfigBuilder]
+        d.batch.asInstanceOf[AdvancedBatch]
     }.head
   }
 
-  private def getJavaScanConfig(
-      query: DataFrame): JavaAdvancedDataSourceV2.AdvancedScanConfigBuilder = {
+  private def getJavaBatch(query: DataFrame): JavaAdvancedDataSourceV2.AdvancedBatch = {
     query.queryExecution.executedPlan.collect {
       case d: DataSourceV2ScanExec =>
-        d.scanConfig.asInstanceOf[JavaAdvancedDataSourceV2.AdvancedScanConfigBuilder]
+        d.batch.asInstanceOf[JavaAdvancedDataSourceV2.AdvancedBatch]
     }.head
   }
 
@@ -73,51 +72,51 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
         val q1 = df.select('j)
         checkAnswer(q1, (0 until 10).map(i => Row(-i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
-          val config = getScanConfig(q1)
-          assert(config.filters.isEmpty)
-          assert(config.requiredSchema.fieldNames === Seq("j"))
+          val batch = getBatch(q1)
+          assert(batch.filters.isEmpty)
+          assert(batch.requiredSchema.fieldNames === Seq("j"))
         } else {
-          val config = getJavaScanConfig(q1)
-          assert(config.filters.isEmpty)
-          assert(config.requiredSchema.fieldNames === Seq("j"))
+          val batch = getJavaBatch(q1)
+          assert(batch.filters.isEmpty)
+          assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
 
         val q2 = df.filter('i > 3)
         checkAnswer(q2, (4 until 10).map(i => Row(i, -i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
-          val config = getScanConfig(q2)
-          assert(config.filters.flatMap(_.references).toSet == Set("i"))
-          assert(config.requiredSchema.fieldNames === Seq("i", "j"))
+          val batch = getBatch(q2)
+          assert(batch.filters.flatMap(_.references).toSet == Set("i"))
+          assert(batch.requiredSchema.fieldNames === Seq("i", "j"))
         } else {
-          val config = getJavaScanConfig(q2)
-          assert(config.filters.flatMap(_.references).toSet == Set("i"))
-          assert(config.requiredSchema.fieldNames === Seq("i", "j"))
+          val batch = getJavaBatch(q2)
+          assert(batch.filters.flatMap(_.references).toSet == Set("i"))
+          assert(batch.requiredSchema.fieldNames === Seq("i", "j"))
         }
 
         val q3 = df.select('i).filter('i > 6)
         checkAnswer(q3, (7 until 10).map(i => Row(i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
-          val config = getScanConfig(q3)
-          assert(config.filters.flatMap(_.references).toSet == Set("i"))
-          assert(config.requiredSchema.fieldNames === Seq("i"))
+          val batch = getBatch(q3)
+          assert(batch.filters.flatMap(_.references).toSet == Set("i"))
+          assert(batch.requiredSchema.fieldNames === Seq("i"))
         } else {
-          val config = getJavaScanConfig(q3)
-          assert(config.filters.flatMap(_.references).toSet == Set("i"))
-          assert(config.requiredSchema.fieldNames === Seq("i"))
+          val batch = getJavaBatch(q3)
+          assert(batch.filters.flatMap(_.references).toSet == Set("i"))
+          assert(batch.requiredSchema.fieldNames === Seq("i"))
         }
 
         val q4 = df.select('j).filter('j < -10)
         checkAnswer(q4, Nil)
         if (cls == classOf[AdvancedDataSourceV2]) {
-          val config = getScanConfig(q4)
+          val batch = getBatch(q4)
           // 'j < 10 is not supported by the testing data source.
-          assert(config.filters.isEmpty)
-          assert(config.requiredSchema.fieldNames === Seq("j"))
+          assert(batch.filters.isEmpty)
+          assert(batch.requiredSchema.fieldNames === Seq("j"))
         } else {
-          val config = getJavaScanConfig(q4)
+          val batch = getJavaBatch(q4)
           // 'j < 10 is not supported by the testing data source.
-          assert(config.filters.isEmpty)
-          assert(config.requiredSchema.fieldNames === Seq("j"))
+          assert(batch.filters.isEmpty)
+          assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
       }
     }
@@ -279,26 +278,26 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
 
     val q1 = df.select('i + 1)
     checkAnswer(q1, (1 until 11).map(i => Row(i)))
-    val config1 = getScanConfig(q1)
-    assert(config1.requiredSchema.fieldNames === Seq("i"))
+    val batch1 = getBatch(q1)
+    assert(batch1.requiredSchema.fieldNames === Seq("i"))
 
     val q2 = df.select(lit(1))
     checkAnswer(q2, (0 until 10).map(i => Row(1)))
-    val config2 = getScanConfig(q2)
-    assert(config2.requiredSchema.isEmpty)
+    val batch2 = getBatch(q2)
+    assert(batch2.requiredSchema.isEmpty)
 
     // 'j === 1 can't be pushed down, but we should still be able do column pruning
     val q3 = df.filter('j === -1).select('j * 2)
     checkAnswer(q3, Row(-2))
-    val config3 = getScanConfig(q3)
-    assert(config3.filters.isEmpty)
-    assert(config3.requiredSchema.fieldNames === Seq("j"))
+    val batch3 = getBatch(q3)
+    assert(batch3.filters.isEmpty)
+    assert(batch3.requiredSchema.fieldNames === Seq("j"))
 
     // column pruning should work with other operators.
     val q4 = df.sort('i).limit(1).select('i + 1)
     checkAnswer(q4, Row(1))
-    val config4 = getScanConfig(q4)
-    assert(config4.requiredSchema.fieldNames === Seq("i"))
+    val batch4 = getBatch(q4)
+    assert(batch4.requiredSchema.fieldNames === Seq("i"))
   }
 
   test("SPARK-23315: get output from canonicalized data source v2 related plans") {
@@ -374,10 +373,6 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
 
 case class RangeInputPartition(start: Int, end: Int) extends InputPartition
 
-case class NoopScanConfigBuilder(readSchema: StructType) extends ScanConfigBuilder with ScanConfig {
-  override def build(): ScanConfig = this
-}
-
 object SimpleReaderFactory extends PartitionReaderFactory {
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
     val RangeInputPartition(start, end) = partition
@@ -396,87 +391,68 @@ object SimpleReaderFactory extends PartitionReaderFactory {
   }
 }
 
-abstract class SimpleReadSupport extends BatchReadSupport {
-  override def fullSchema(): StructType = new StructType().add("i", "int").add("j", "int")
+abstract class SimpleBatchTable extends Table with SupportsBatchRead  {
 
-  override def newScanConfigBuilder(): ScanConfigBuilder = {
-    NoopScanConfigBuilder(fullSchema())
-  }
+  override def schema(): StructType = new StructType().add("i", "int").add("j", "int")
 
-  override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
-    SimpleReaderFactory
-  }
+  override def name(): String = this.getClass.toString
 }
 
+abstract class SimpleScanBuilder extends ScanBuilder
+  with Batch with Scan {
+
+  override def build(): Scan = this
+
+  override def toBatch: Batch = this
 
-class SimpleSinglePartitionSource extends DataSourceV2 with BatchReadSupportProvider {
+  override def readSchema(): StructType = new StructType().add("i", "int").add("j", "int")
 
-  class ReadSupport extends SimpleReadSupport {
-    override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
+  override def createReaderFactory(): PartitionReaderFactory = SimpleReaderFactory
+}
+
+class SimpleSinglePartitionSource extends TableProvider {
+
+  class MyScanBuilder extends SimpleScanBuilder {
+    override def planInputPartitions(): Array[InputPartition] = {
       Array(RangeInputPartition(0, 5))
     }
   }
 
-  override def createBatchReadSupport(options: DataSourceOptions): BatchReadSupport = {
-    new ReadSupport
+  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+      new MyScanBuilder()
+    }
   }
 }
 
 // This class is used by pyspark tests. If this class is modified/moved, make sure pyspark
 // tests still pass.
-class SimpleDataSourceV2 extends DataSourceV2 with BatchReadSupportProvider {
+class SimpleDataSourceV2 extends TableProvider {
 
-  class ReadSupport extends SimpleReadSupport {
-    override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
+  class MyScanBuilder extends SimpleScanBuilder {
+    override def planInputPartitions(): Array[InputPartition] = {
       Array(RangeInputPartition(0, 5), RangeInputPartition(5, 10))
     }
   }
 
-  override def createBatchReadSupport(options: DataSourceOptions): BatchReadSupport = {
-    new ReadSupport
+  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+      new MyScanBuilder()
+    }
   }
 }
 
-class AdvancedDataSourceV2 extends DataSourceV2 with BatchReadSupportProvider {
-
-  class ReadSupport extends SimpleReadSupport {
-    override def newScanConfigBuilder(): ScanConfigBuilder = new AdvancedScanConfigBuilder()
+class AdvancedDataSourceV2 extends TableProvider {
 
-    override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-      val filters = config.asInstanceOf[AdvancedScanConfigBuilder].filters
-
-      val lowerBound = filters.collectFirst {
-        case GreaterThan("i", v: Int) => v
-      }
-
-      val res = scala.collection.mutable.ArrayBuffer.empty[InputPartition]
-
-      if (lowerBound.isEmpty) {
-        res.append(RangeInputPartition(0, 5))
-        res.append(RangeInputPartition(5, 10))
-      } else if (lowerBound.get < 4) {
-        res.append(RangeInputPartition(lowerBound.get + 1, 5))
-        res.append(RangeInputPartition(5, 10))
-      } else if (lowerBound.get < 9) {
-        res.append(RangeInputPartition(lowerBound.get + 1, 10))
-      }
-
-      res.toArray
-    }
-
-    override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
-      val requiredSchema = config.asInstanceOf[AdvancedScanConfigBuilder].requiredSchema
-      new AdvancedReaderFactory(requiredSchema)
+  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+      new AdvancedScanBuilder()
     }
   }
-
-  override def createBatchReadSupport(options: DataSourceOptions): BatchReadSupport = {
-    new ReadSupport
-  }
 }
 
-class AdvancedScanConfigBuilder extends ScanConfigBuilder with ScanConfig
-  with SupportsPushDownRequiredColumns with SupportsPushDownFilters {
+class AdvancedScanBuilder extends ScanBuilder
+  with Scan with SupportsPushDownFilters with SupportsPushDownRequiredColumns {
 
   var requiredSchema = new StructType().add("i", "int").add("j", "int")
   var filters = Array.empty[Filter]
@@ -498,10 +474,40 @@ class AdvancedScanConfigBuilder extends ScanConfigBuilder with ScanConfig
 
   override def pushedFilters(): Array[Filter] = filters
 
-  override def build(): ScanConfig = this
+  override def build(): Scan = this
+
+  override def toBatch: Batch = new AdvancedBatch(filters, requiredSchema)
+}
+
+class AdvancedBatch(val filters: Array[Filter], val requiredSchema: StructType) extends Batch {
+
+  override def planInputPartitions(): Array[InputPartition] = {
+    val lowerBound = filters.collectFirst {
+      case GreaterThan("i", v: Int) => v
+    }
+
+    val res = scala.collection.mutable.ArrayBuffer.empty[InputPartition]
+
+    if (lowerBound.isEmpty) {
+      res.append(RangeInputPartition(0, 5))
+      res.append(RangeInputPartition(5, 10))
+    } else if (lowerBound.get < 4) {
+      res.append(RangeInputPartition(lowerBound.get + 1, 5))
+      res.append(RangeInputPartition(5, 10))
+    } else if (lowerBound.get < 9) {
+      res.append(RangeInputPartition(lowerBound.get + 1, 10))
+    }
+
+    res.toArray
+  }
+
+  override def createReaderFactory(): PartitionReaderFactory = {
+    new AdvancedReaderFactory(requiredSchema)
+  }
 }
 
 class AdvancedReaderFactory(requiredSchema: StructType) extends PartitionReaderFactory {
+
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
     val RangeInputPartition(start, end) = partition
     new PartitionReader[InternalRow] {
@@ -526,39 +532,47 @@ class AdvancedReaderFactory(requiredSchema: StructType) extends PartitionReaderF
 }
 
 
-class SchemaRequiredDataSource extends DataSourceV2 with BatchReadSupportProvider {
+class SchemaRequiredDataSource extends TableProvider {
 
-  class ReadSupport(val schema: StructType) extends SimpleReadSupport {
-    override def fullSchema(): StructType = schema
+  class MyScanBuilder(schema: StructType) extends SimpleScanBuilder {
+    override def planInputPartitions(): Array[InputPartition] = Array.empty
 
-    override def planInputPartitions(config: ScanConfig): Array[InputPartition] =
-      Array.empty
+    override def readSchema(): StructType = schema
   }
 
-  override def createBatchReadSupport(options: DataSourceOptions): BatchReadSupport = {
+  override def getTable(options: DataSourceOptions): Table = {
     throw new IllegalArgumentException("requires a user-supplied schema")
   }
 
-  override def createBatchReadSupport(
-      schema: StructType, options: DataSourceOptions): BatchReadSupport = {
-    new ReadSupport(schema)
+  override def getTable(options: DataSourceOptions, schema: StructType): Table = {
+    val userGivenSchema = schema
+    new SimpleBatchTable {
+      override def schema(): StructType = userGivenSchema
+
+      override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+        new MyScanBuilder(userGivenSchema)
+      }
+    }
   }
 }
 
-class ColumnarDataSourceV2 extends DataSourceV2 with BatchReadSupportProvider {
+class ColumnarDataSourceV2 extends TableProvider {
 
-  class ReadSupport extends SimpleReadSupport {
-    override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
+  class MyScanBuilder extends SimpleScanBuilder {
+
+    override def planInputPartitions(): Array[InputPartition] = {
       Array(RangeInputPartition(0, 50), RangeInputPartition(50, 90))
     }
 
-    override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
+    override def createReaderFactory(): PartitionReaderFactory = {
       ColumnarReaderFactory
     }
   }
 
-  override def createBatchReadSupport(options: DataSourceOptions): BatchReadSupport = {
-    new ReadSupport
+  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+      new MyScanBuilder()
+    }
   }
 }
 
@@ -608,21 +622,29 @@ object ColumnarReaderFactory extends PartitionReaderFactory {
 }
 
 
-class PartitionAwareDataSource extends DataSourceV2 with BatchReadSupportProvider {
+class PartitionAwareDataSource extends TableProvider {
+
+  class MyScanBuilder extends SimpleScanBuilder
+    with SupportsReportPartitioning{
 
-  class ReadSupport extends SimpleReadSupport with SupportsReportPartitioning {
-    override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
+    override def planInputPartitions(): Array[InputPartition] = {
       // Note that we don't have same value of column `a` across partitions.
       Array(
         SpecificInputPartition(Array(1, 1, 3), Array(4, 4, 6)),
         SpecificInputPartition(Array(2, 4, 4), Array(6, 2, 2)))
     }
 
-    override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
+    override def createReaderFactory(): PartitionReaderFactory = {
       SpecificReaderFactory
     }
 
-    override def outputPartitioning(config: ScanConfig): Partitioning = new MyPartitioning
+    override def outputPartitioning(): Partitioning = new MyPartitioning
+  }
+
+  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+      new MyScanBuilder()
+    }
   }
 
   class MyPartitioning extends Partitioning {
@@ -633,10 +655,6 @@ class PartitionAwareDataSource extends DataSourceV2 with BatchReadSupportProvide
       case _ => false
     }
   }
-
-  override def createBatchReadSupport(options: DataSourceOptions): BatchReadSupport = {
-    new ReadSupport
-  }
 }
 
 case class SpecificInputPartition(i: Array[Int], j: Array[Int]) extends InputPartition
@@ -662,7 +680,7 @@ object SpecificReaderFactory extends PartitionReaderFactory {
 class SchemaReadAttemptException(m: String) extends RuntimeException(m)
 
 class SimpleWriteOnlyDataSource extends SimpleWritableDataSource {
-  override def fullSchema(): StructType = {
+  override def writeSchema(): StructType = {
     // This is a bit hacky since this source implements read support but throws
     // during schema retrieval. Might have to rewrite but it's done
     // such so for minimised changes.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
index a7dfc2d1deac..82bb4fa33a3a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
 /**
@@ -39,19 +39,16 @@ import org.apache.spark.util.SerializableConfiguration
  * Each job moves files from `target/_temporary/queryId/` to `target`.
  */
 class SimpleWritableDataSource extends DataSourceV2
-  with BatchReadSupportProvider
+  with TableProvider
   with BatchWriteSupportProvider
   with SessionConfigSupport {
 
-  protected def fullSchema(): StructType = new StructType().add("i", "long").add("j", "long")
+  protected def writeSchema(): StructType = new StructType().add("i", "long").add("j", "long")
 
   override def keyPrefix: String = "simpleWritableDataSource"
 
-  class ReadSupport(path: String, conf: Configuration) extends SimpleReadSupport {
-
-    override def fullSchema(): StructType = SimpleWritableDataSource.this.fullSchema()
-
-    override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
+  class MyScanBuilder(path: String, conf: Configuration) extends SimpleScanBuilder {
+    override def planInputPartitions(): Array[InputPartition] = {
       val dataPath = new Path(path)
       val fs = dataPath.getFileSystem(conf)
       if (fs.exists(dataPath)) {
@@ -66,10 +63,24 @@ class SimpleWritableDataSource extends DataSourceV2
       }
     }
 
-    override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
+    override def createReaderFactory(): PartitionReaderFactory = {
       val serializableConf = new SerializableConfiguration(conf)
       new CSVReaderFactory(serializableConf)
     }
+
+    override def readSchema(): StructType = writeSchema
+  }
+
+  override def getTable(options: DataSourceOptions): Table = {
+    val path = new Path(options.get("path").get())
+    val conf = SparkContext.getActive.get.hadoopConfiguration
+    new SimpleBatchTable {
+      override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+        new MyScanBuilder(path.toUri.toString, conf)
+      }
+
+      override def schema(): StructType = writeSchema
+    }
   }
 
   class WritSupport(queryId: String, path: String, conf: Configuration) extends BatchWriteSupport {
@@ -105,12 +116,6 @@ class SimpleWritableDataSource extends DataSourceV2
     }
   }
 
-  override def createBatchReadSupport(options: DataSourceOptions): BatchReadSupport = {
-    val path = new Path(options.get("path").get())
-    val conf = SparkContext.getActive.get.hadoopConfiguration
-    new ReadSupport(path.toUri.toString, conf)
-  }
-
   override def createBatchWriteSupport(
       queryId: String,
       schema: StructType,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index 93eae292acc2..756092fc7ff5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.streaming.continuous
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExec
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2StreamingScanExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
@@ -41,7 +41,7 @@ class ContinuousSuiteBase extends StreamTest {
       case s: ContinuousExecution =>
         assert(numTriggers >= 2, "must wait for at least 2 triggers to ensure query is initialized")
         val reader = s.lastExecution.executedPlan.collectFirst {
-          case DataSourceV2ScanExec(_, _, _, _, r: RateStreamContinuousReadSupport, _) => r
+          case DataSourceV2StreamingScanExec(_, _, _, _, r: RateStreamContinuousReadSupport, _) => r
         }.get
 
         val deltaMs = numTriggers * 1000 + 300

From c3f27b2437497396913fdec96f085c3626ef4e59 Mon Sep 17 00:00:00 2001
From: Keiji Yoshida <kjmrknsn@gmail.com>
Date: Fri, 30 Nov 2018 09:03:46 -0600
Subject: [PATCH 0031/1072] [MINOR][DOCS] Fix typos

## What changes were proposed in this pull request?
Fix Typos.
This PR is the complete version of https://github.com/apache/spark/pull/23145.

## How was this patch tested?
NA

Closes #23185 from kjmrknsn/docUpdate.

Authored-by: Keiji Yoshida <kjmrknsn@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/configuration.md            | 2 +-
 docs/graphx-programming-guide.md | 4 ++--
 docs/ml-datasource.md            | 2 +-
 docs/ml-features.md              | 8 ++++----
 docs/ml-pipeline.md              | 2 +-
 docs/mllib-linear-methods.md     | 4 ++--
 docs/security.md                 | 2 +-
 docs/sparkr.md                   | 2 +-
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 04210d855b11..8914bd0310f9 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -498,7 +498,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>
     Reuse Python worker or not. If yes, it will use a fixed number of Python workers,
     does not need to fork() a Python process for every task. It will be very useful
-    if there is large broadcast, then the broadcast will not be needed to transferred
+    if there is a large broadcast, then the broadcast will not need to be transferred
     from JVM to Python worker for every task.
   </td>
 </tr>
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index cb96fd773aa5..ecedeaf958f1 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -522,7 +522,7 @@ val joinedGraph = graph.joinVertices(uniqueCosts,
 
 A key step in many graph analytics tasks is aggregating information about the neighborhood of each
 vertex.
-For example, we might want to know the number of followers each user has or the average age of the
+For example, we might want to know the number of followers each user has or the average age of
 the followers of each user.  Many iterative graph algorithms (e.g., PageRank, Shortest Path, and
 connected components) repeatedly aggregate properties of neighboring vertices (e.g., current
 PageRank Value, shortest path to the source, and smallest reachable vertex id).
@@ -700,7 +700,7 @@ a new value for the vertex property, and then send messages to neighboring verti
 super step.  Unlike Pregel, messages are computed in parallel as a
 function of the edge triplet and the message computation has access to both the source and
 destination vertex attributes.  Vertices that do not receive a message are skipped within a super
-step.  The Pregel operators terminates iteration and returns the final graph when there are no
+step.  The Pregel operator terminates iteration and returns the final graph when there are no
 messages remaining.
 
 > Note, unlike more standard Pregel implementations, vertices in GraphX can only send messages to
diff --git a/docs/ml-datasource.md b/docs/ml-datasource.md
index 15083326240a..35afaef5ad7f 100644
--- a/docs/ml-datasource.md
+++ b/docs/ml-datasource.md
@@ -5,7 +5,7 @@ displayTitle: Data sources
 ---
 
 In this section, we introduce how to use data source in ML to load data.
-Beside some general data sources such as Parquet, CSV, JSON and JDBC, we also provide some specific data sources for ML.
+Besides some general data sources such as Parquet, CSV, JSON and JDBC, we also provide some specific data sources for ML.
 
 **Table of Contents**
 
diff --git a/docs/ml-features.md b/docs/ml-features.md
index 83a211ce02e6..a140bc6e7a22 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -359,7 +359,7 @@ Assume that we have the following DataFrame with columns `id` and `raw`:
 ~~~~
  id | raw
 ----|----------
- 0  | [I, saw, the, red, baloon]
+ 0  | [I, saw, the, red, balloon]
  1  | [Mary, had, a, little, lamb]
 ~~~~
 
@@ -369,7 +369,7 @@ column, we should get the following:
 ~~~~
  id | raw                         | filtered
 ----|-----------------------------|--------------------
- 0  | [I, saw, the, red, baloon]  |  [saw, red, baloon]
+ 0  | [I, saw, the, red, balloon]  |  [saw, red, balloon]
  1  | [Mary, had, a, little, lamb]|[Mary, little, lamb]
 ~~~~
 
@@ -1302,7 +1302,7 @@ need to know vector size, can use that column as an input.
 To use `VectorSizeHint` a user must set the `inputCol` and `size` parameters. Applying this
 transformer to a dataframe produces a new dataframe with updated metadata for `inputCol` specifying
 the vector size. Downstream operations on the resulting dataframe can get this size using the
-meatadata.
+metadata.
 
 `VectorSizeHint` can also take an optional `handleInvalid` parameter which controls its
 behaviour when the vector column contains nulls or vectors of the wrong size. By default
@@ -1310,7 +1310,7 @@ behaviour when the vector column contains nulls or vectors of the wrong size. By
 also be set to "skip", indicating that rows containing invalid values should be filtered out from
 the resulting dataframe, or "optimistic", indicating that the column should not be checked for
 invalid values and all rows should be kept. Note that the use of "optimistic" can cause the
-resulting dataframe to be in an inconsistent state, me:aning the metadata for the column
+resulting dataframe to be in an inconsistent state, meaning the metadata for the column
 `VectorSizeHint` was applied to does not match the contents of that column. Users should take care
 to avoid this kind of inconsistent state.
 
diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index 8c01ccb94c75..0c9c998f6353 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -62,7 +62,7 @@ In addition to the types listed in the Spark SQL guide, `DataFrame` can use ML [
 
 A `DataFrame` can be created either implicitly or explicitly from a regular `RDD`.  See the code examples below and the [Spark SQL programming guide](sql-programming-guide.html) for examples.
 
-Columns in a `DataFrame` are named.  The code examples below use names such as "text," "features," and "label."
+Columns in a `DataFrame` are named. The code examples below use names such as "text", "features", and "label".
 
 ## Pipeline components
 
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 73f6e206ca54..2879d884162a 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -272,7 +272,7 @@ In `spark.mllib`, the first class $0$ is chosen as the "pivot" class.
 See Section 4.4 of
 [The Elements of Statistical Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/) for
 references.
-Here is an
+Here is a
 [detailed mathematical derivation](http://www.slideshare.net/dbtsai/2014-0620-mlor-36132297).
 
 For multiclass classification problems, the algorithm will output a multinomial logistic regression
@@ -350,7 +350,7 @@ known as the [mean squared error](http://en.wikipedia.org/wiki/Mean_squared_erro
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-The following example demonstrate how to load training data, parse it as an RDD of LabeledPoint.
+The following example demonstrates how to load training data, parse it as an RDD of LabeledPoint.
 The example then uses LinearRegressionWithSGD to build a simple linear model to predict label
 values. We compute the mean squared error at the end to evaluate
 [goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
diff --git a/docs/security.md b/docs/security.md
index 02d581c6dad9..be4834660fb7 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -337,7 +337,7 @@ Configuration for SSL is organized hierarchically. The user can configure the de
 which will be used for all the supported communication protocols unless they are overwritten by
 protocol-specific settings. This way the user can easily provide the common settings for all the
 protocols without disabling the ability to configure each one individually. The following table
-describes the the SSL configuration namespaces:
+describes the SSL configuration namespaces:
 
 <table class="table">
   <tr>
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 5972435a0e40..0057f05de0ff 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -296,7 +296,7 @@ head(agg(rollup(df, "cyl", "disp", "gear"), avg(df$mpg)))
 
 ### Operating on Columns
 
-SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions.
+SparkR also provides a number of functions that can be directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions.
 
 <div data-lang="r"  markdown="1">
 {% highlight r %}

From 9b23be2e95fec756066ca0ed3188c3db2602b757 Mon Sep 17 00:00:00 2001
From: schintap <schintap@oath.com>
Date: Fri, 30 Nov 2018 12:48:56 -0600
Subject: [PATCH 0032/1072] [SPARK-26201] Fix python broadcast with encryption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Python with rpc and disk encryption enabled along with a python broadcast variable and just read the value back on the driver side the job failed with:

Traceback (most recent call last): File "broadcast.py", line 37, in <module> words_new.value File "/pyspark.zip/pyspark/broadcast.py", line 137, in value File "pyspark.zip/pyspark/broadcast.py", line 122, in load_from_path File "pyspark.zip/pyspark/broadcast.py", line 128, in load EOFError: Ran out of input

To reproduce use configs: --conf spark.network.crypto.enabled=true --conf spark.io.encryption.enabled=true

Code:

words_new = sc.broadcast(["scala", "java", "hadoop", "spark", "akka"])
words_new.value
print(words_new.value)

## How was this patch tested?
words_new = sc.broadcast([“scala”, “java”, “hadoop”, “spark”, “akka”])
textFile = sc.textFile(“README.md”)
wordCounts = textFile.flatMap(lambda line: line.split()).map(lambda word: (word + words_new.value[1], 1)).reduceByKey(lambda a, b: a+b)
 count = wordCounts.count()
 print(count)
 words_new.value
 print(words_new.value)

Closes #23166 from redsanket/SPARK-26201.

Authored-by: schintap <schintap@oath.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../apache/spark/api/python/PythonRDD.scala   | 29 ++++++++++++++++---
 python/pyspark/broadcast.py                   | 21 ++++++++++----
 python/pyspark/tests/test_broadcast.py        | 15 ++++++++++
 3 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 8b5a7a9aefea..5ed5070558af 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -660,6 +660,7 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial
     with Logging {
 
   private var encryptionServer: PythonServer[Unit] = null
+  private var decryptionServer: PythonServer[Unit] = null
 
   /**
    * Read data from disks, then copy it to `out`
@@ -708,16 +709,36 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial
       override def handleConnection(sock: Socket): Unit = {
         val env = SparkEnv.get
         val in = sock.getInputStream()
-        val dir = new File(Utils.getLocalDir(env.conf))
-        val file = File.createTempFile("broadcast", "", dir)
-        path = file.getAbsolutePath
-        val out = env.serializerManager.wrapForEncryption(new FileOutputStream(path))
+        val abspath = new File(path).getAbsolutePath
+        val out = env.serializerManager.wrapForEncryption(new FileOutputStream(abspath))
         DechunkedInputStream.dechunkAndCopyToOutput(in, out)
       }
     }
     Array(encryptionServer.port, encryptionServer.secret)
   }
 
+  def setupDecryptionServer(): Array[Any] = {
+    decryptionServer = new PythonServer[Unit]("broadcast-decrypt-server-for-driver") {
+      override def handleConnection(sock: Socket): Unit = {
+        val out = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream()))
+        Utils.tryWithSafeFinally {
+          val in = SparkEnv.get.serializerManager.wrapForEncryption(new FileInputStream(path))
+          Utils.tryWithSafeFinally {
+            Utils.copyStream(in, out, false)
+          } {
+            in.close()
+          }
+          out.flush()
+        } {
+          JavaUtils.closeQuietly(out)
+        }
+      }
+    }
+    Array(decryptionServer.port, decryptionServer.secret)
+  }
+
+  def waitTillBroadcastDataSent(): Unit = decryptionServer.getResult()
+
   def waitTillDataReceived(): Unit = encryptionServer.getResult()
 }
 // scalastyle:on no.finalize
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 1c7f2a7418df..29358b5740e5 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -77,11 +77,12 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None,
             # we're on the driver.  We want the pickled data to end up in a file (maybe encrypted)
             f = NamedTemporaryFile(delete=False, dir=sc._temp_dir)
             self._path = f.name
-            python_broadcast = sc._jvm.PythonRDD.setupBroadcast(self._path)
+            self._sc = sc
+            self._python_broadcast = sc._jvm.PythonRDD.setupBroadcast(self._path)
             if sc._encryption_enabled:
                 # with encryption, we ask the jvm to do the encryption for us, we send it data
                 # over a socket
-                port, auth_secret = python_broadcast.setupEncryptionServer()
+                port, auth_secret = self._python_broadcast.setupEncryptionServer()
                 (encryption_sock_file, _) = local_connect_and_auth(port, auth_secret)
                 broadcast_out = ChunkedStream(encryption_sock_file, 8192)
             else:
@@ -89,12 +90,14 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None,
                 broadcast_out = f
             self.dump(value, broadcast_out)
             if sc._encryption_enabled:
-                python_broadcast.waitTillDataReceived()
-            self._jbroadcast = sc._jsc.broadcast(python_broadcast)
+                self._python_broadcast.waitTillDataReceived()
+            self._jbroadcast = sc._jsc.broadcast(self._python_broadcast)
             self._pickle_registry = pickle_registry
         else:
             # we're on an executor
             self._jbroadcast = None
+            self._sc = None
+            self._python_broadcast = None
             if sock_file is not None:
                 # the jvm is doing decryption for us.  Read the value
                 # immediately from the sock_file
@@ -134,7 +137,15 @@ def value(self):
         """ Return the broadcasted value
         """
         if not hasattr(self, "_value") and self._path is not None:
-            self._value = self.load_from_path(self._path)
+            # we only need to decrypt it here when encryption is enabled and
+            # if its on the driver, since executor decryption is handled already
+            if self._sc is not None and self._sc._encryption_enabled:
+                port, auth_secret = self._python_broadcast.setupDecryptionServer()
+                (decrypted_sock_file, _) = local_connect_and_auth(port, auth_secret)
+                self._python_broadcast.waitTillBroadcastDataSent()
+                return self.load(decrypted_sock_file)
+            else:
+                self._value = self.load_from_path(self._path)
         return self._value
 
     def unpersist(self, blocking=False):
diff --git a/python/pyspark/tests/test_broadcast.py b/python/pyspark/tests/test_broadcast.py
index a98626e8f4bc..11d31d24bb01 100644
--- a/python/pyspark/tests/test_broadcast.py
+++ b/python/pyspark/tests/test_broadcast.py
@@ -67,6 +67,21 @@ def test_broadcast_with_encryption(self):
     def test_broadcast_no_encryption(self):
         self._test_multiple_broadcasts()
 
+    def _test_broadcast_on_driver(self, *extra_confs):
+        conf = SparkConf()
+        for key, value in extra_confs:
+            conf.set(key, value)
+        conf.setMaster("local-cluster[2,1,1024]")
+        self.sc = SparkContext(conf=conf)
+        bs = self.sc.broadcast(value=5)
+        self.assertEqual(5, bs.value)
+
+    def test_broadcast_value_driver_no_encryption(self):
+        self._test_broadcast_on_driver()
+
+    def test_broadcast_value_driver_encryption(self):
+        self._test_broadcast_on_driver(("spark.io.encryption.enabled", "true"))
+
 
 class BroadcastFrameProtocolTest(unittest.TestCase):
 

From 36edbac1c8337a4719f90e4abd58d38738b2e1fb Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Fri, 30 Nov 2018 14:23:18 -0800
Subject: [PATCH 0033/1072] [SPARK-26226][SQL] Update query tracker to report
 timeline for phases

## What changes were proposed in this pull request?
This patch changes the query plan tracker added earlier to report phase timeline, rather than just a duration for each phase. This way, we can easily find time that's unaccounted for.

## How was this patch tested?
Updated test cases to reflect that.

Closes #23183 from rxin/SPARK-26226.

Authored-by: Reynold Xin <rxin@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/QueryPlanningTracker.scala   | 45 +++++++++++++++----
 .../catalyst/QueryPlanningTrackerSuite.scala  | 18 +++++---
 .../org/apache/spark/sql/SparkSession.scala   |  2 +-
 .../spark/sql/execution/QueryExecution.scala  |  8 ++--
 .../QueryPlanningTrackerEndToEndSuite.scala   | 15 +------
 5 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala
index 244081cd160b..cd75407c7ee7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala
@@ -41,6 +41,13 @@ object QueryPlanningTracker {
   val OPTIMIZATION = "optimization"
   val PLANNING = "planning"
 
+  /**
+   * Summary for a rule.
+   * @param totalTimeNs total amount of time, in nanosecs, spent in this rule.
+   * @param numInvocations number of times the rule has been invoked.
+   * @param numEffectiveInvocations number of times the rule has been invoked and
+   *                                resulted in a plan change.
+   */
   class RuleSummary(
     var totalTimeNs: Long, var numInvocations: Long, var numEffectiveInvocations: Long) {
 
@@ -51,6 +58,18 @@ object QueryPlanningTracker {
     }
   }
 
+  /**
+   * Summary of a phase, with start time and end time so we can construct a timeline.
+   */
+  class PhaseSummary(val startTimeMs: Long, val endTimeMs: Long) {
+
+    def durationMs: Long = endTimeMs - startTimeMs
+
+    override def toString: String = {
+      s"PhaseSummary($startTimeMs, $endTimeMs)"
+    }
+  }
+
   /**
    * A thread local variable to implicitly pass the tracker around. This assumes the query planner
    * is single-threaded, and avoids passing the same tracker context in every function call.
@@ -79,15 +98,25 @@ class QueryPlanningTracker {
   // Use a Java HashMap for less overhead.
   private val rulesMap = new java.util.HashMap[String, RuleSummary]
 
-  // From a phase to time in ns.
-  private val phaseToTimeNs = new java.util.HashMap[String, Long]
+  // From a phase to its start time and end time, in ms.
+  private val phasesMap = new java.util.HashMap[String, PhaseSummary]
 
-  /** Measure the runtime of function f, and add it to the time for the specified phase. */
-  def measureTime[T](phase: String)(f: => T): T = {
-    val startTime = System.nanoTime()
+  /**
+   * Measure the start and end time of a phase. Note that if this function is called multiple
+   * times for the same phase, the recorded start time will be the start time of the first call,
+   * and the recorded end time will be the end time of the last call.
+   */
+  def measurePhase[T](phase: String)(f: => T): T = {
+    val startTime = System.currentTimeMillis()
     val ret = f
-    val timeTaken = System.nanoTime() - startTime
-    phaseToTimeNs.put(phase, phaseToTimeNs.getOrDefault(phase, 0) + timeTaken)
+    val endTime = System.currentTimeMillis
+
+    if (phasesMap.containsKey(phase)) {
+      val oldSummary = phasesMap.get(phase)
+      phasesMap.put(phase, new PhaseSummary(oldSummary.startTimeMs, endTime))
+    } else {
+      phasesMap.put(phase, new PhaseSummary(startTime, endTime))
+    }
     ret
   }
 
@@ -114,7 +143,7 @@ class QueryPlanningTracker {
 
   def rules: Map[String, RuleSummary] = rulesMap.asScala.toMap
 
-  def phases: Map[String, Long] = phaseToTimeNs.asScala.toMap
+  def phases: Map[String, PhaseSummary] = phasesMap.asScala.toMap
 
   /**
    * Returns the top k most expensive rules (as measured by time). If k is larger than the rules
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/QueryPlanningTrackerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/QueryPlanningTrackerSuite.scala
index 120b284a7785..9593a720e424 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/QueryPlanningTrackerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/QueryPlanningTrackerSuite.scala
@@ -23,19 +23,23 @@ class QueryPlanningTrackerSuite extends SparkFunSuite {
 
   test("phases") {
     val t = new QueryPlanningTracker
-    t.measureTime("p1") {
+    t.measurePhase("p1") {
       Thread.sleep(1)
     }
 
-    assert(t.phases("p1") > 0)
+    assert(t.phases("p1").durationMs > 0)
     assert(!t.phases.contains("p2"))
+  }
 
-    val old = t.phases("p1")
+  test("multiple measurePhase call") {
+    val t = new QueryPlanningTracker
+    t.measurePhase("p1") { Thread.sleep(1) }
+    val s1 = t.phases("p1")
+    assert(s1.durationMs > 0)
 
-    t.measureTime("p1") {
-      Thread.sleep(1)
-    }
-    assert(t.phases("p1") > old)
+    t.measurePhase("p1") { Thread.sleep(1) }
+    val s2 = t.phases("p1")
+    assert(s2.durationMs > s1.durationMs)
   }
 
   test("rules") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 739c6b54b4cb..26272c390668 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -649,7 +649,7 @@ class SparkSession private(
    */
   def sql(sqlText: String): DataFrame = {
     val tracker = new QueryPlanningTracker
-    val plan = tracker.measureTime(QueryPlanningTracker.PARSING) {
+    val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) {
       sessionState.sqlParser.parsePlan(sqlText)
     }
     Dataset.ofRows(self, plan, tracker)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index cfb5e43207b0..eef5a3f899f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -60,7 +60,7 @@ class QueryExecution(
     }
   }
 
-  lazy val analyzed: LogicalPlan = tracker.measureTime(QueryPlanningTracker.ANALYSIS) {
+  lazy val analyzed: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.ANALYSIS) {
     SparkSession.setActiveSession(sparkSession)
     sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker)
   }
@@ -71,11 +71,11 @@ class QueryExecution(
     sparkSession.sharedState.cacheManager.useCachedData(analyzed)
   }
 
-  lazy val optimizedPlan: LogicalPlan = tracker.measureTime(QueryPlanningTracker.OPTIMIZATION) {
+  lazy val optimizedPlan: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.OPTIMIZATION) {
     sparkSession.sessionState.optimizer.executeAndTrack(withCachedData, tracker)
   }
 
-  lazy val sparkPlan: SparkPlan = tracker.measureTime(QueryPlanningTracker.PLANNING) {
+  lazy val sparkPlan: SparkPlan = tracker.measurePhase(QueryPlanningTracker.PLANNING) {
     SparkSession.setActiveSession(sparkSession)
     // TODO: We use next(), i.e. take the first plan returned by the planner, here for now,
     //       but we will implement to choose the best plan.
@@ -84,7 +84,7 @@ class QueryExecution(
 
   // executedPlan should not be used to initialize any SparkPlan. It should be
   // only used for execution.
-  lazy val executedPlan: SparkPlan = tracker.measureTime(QueryPlanningTracker.PLANNING) {
+  lazy val executedPlan: SparkPlan = tracker.measurePhase(QueryPlanningTracker.PLANNING) {
     prepareForExecution(sparkPlan)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala
index 0af4c85400e9..e42177c156ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala
@@ -25,12 +25,7 @@ class QueryPlanningTrackerEndToEndSuite extends SharedSQLContext {
     val df = spark.range(1000).selectExpr("count(*)")
     df.collect()
     val tracker = df.queryExecution.tracker
-
-    assert(tracker.phases.size == 3)
-    assert(tracker.phases("analysis") > 0)
-    assert(tracker.phases("optimization") > 0)
-    assert(tracker.phases("planning") > 0)
-
+    assert(tracker.phases.keySet == Set("analysis", "optimization", "planning"))
     assert(tracker.rules.nonEmpty)
   }
 
@@ -39,13 +34,7 @@ class QueryPlanningTrackerEndToEndSuite extends SharedSQLContext {
     df.collect()
 
     val tracker = df.queryExecution.tracker
-
-    assert(tracker.phases.size == 4)
-    assert(tracker.phases("parsing") > 0)
-    assert(tracker.phases("analysis") > 0)
-    assert(tracker.phases("optimization") > 0)
-    assert(tracker.phases("planning") > 0)
-
+    assert(tracker.phases.keySet == Set("parsing", "analysis", "optimization", "planning"))
     assert(tracker.rules.nonEmpty)
   }
 

From 8856e9f6a3d5c019fcae45dbbdfa9128cd700e19 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Fri, 30 Nov 2018 15:20:05 -0800
Subject: [PATCH 0034/1072] [SPARK-26219][CORE] Executor summary should get
 updated for failure jobs in the history server UI

The root cause of the problem is, whenever the taskEnd event comes after stageCompleted event, execSummary is updating only for live UI. we need to update for history UI too.

To see the previous discussion, refer: PR for https://github.com/apache/spark/pull/23038, https://issues.apache.org/jira/browse/SPARK-26100.

Added UT. Manually verified

Test step to reproduce:

```
bin/spark-shell --master yarn --conf spark.executor.instances=3
sc.parallelize(1 to 10000, 10).map{ x => throw new RuntimeException("Bad executor")}.collect()
```

Open Executors page from the History UI

Before patch:
![screenshot from 2018-11-29 22-13-34](https://user-images.githubusercontent.com/23054875/49246338-a21ead00-f43a-11e8-8214-f1020420be52.png)

After patch:
![screenshot from 2018-11-30 00-54-49](https://user-images.githubusercontent.com/23054875/49246353-aa76e800-f43a-11e8-98ef-7faecaa7a50e.png)

Closes #23181 from shahidki31/executorUpdate.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/status/AppStatusListener.scala      | 19 ++--
 .../spark/status/AppStatusListenerSuite.scala | 92 +++++++++++--------
 2 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 8e845573a903..bd3f58b6182c 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -641,9 +641,14 @@ private[spark] class AppStatusListener(
         }
       }
 
-      // Force an update on live applications when the number of active tasks reaches 0. This is
-      // checked in some tests (e.g. SQLTestUtilsBase) so it needs to be reliably up to date.
-      conditionalLiveUpdate(exec, now, exec.activeTasks == 0)
+      // Force an update on both live and history applications when the number of active tasks
+      // reaches 0. This is checked in some tests (e.g. SQLTestUtilsBase) so it needs to be
+      // reliably up to date.
+      if (exec.activeTasks == 0) {
+        update(exec, now)
+      } else {
+        maybeUpdate(exec, now)
+      }
     }
   }
 
@@ -1024,14 +1029,6 @@ private[spark] class AppStatusListener(
     }
   }
 
-  private def conditionalLiveUpdate(entity: LiveEntity, now: Long, condition: Boolean): Unit = {
-    if (condition) {
-      liveUpdate(entity, now)
-    } else {
-      maybeUpdate(entity, now)
-    }
-  }
-
   private def cleanupExecutors(count: Long): Unit = {
     // Because the limit is on the number of *dead* executors, we need to calculate whether
     // there are actually enough dead executors to be deleted.
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 7860a0df4bb2..61fec8c1d0e4 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -1273,48 +1273,68 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     assert(allJobs.head.numFailedStages == 1)
   }
 
-  test("SPARK-25451: total tasks in the executor summary should match total stage tasks") {
-    val testConf = conf.clone.set(LIVE_ENTITY_UPDATE_PERIOD, Long.MaxValue)
+  Seq(true, false).foreach { live =>
+    test(s"Total tasks in the executor summary should match total stage tasks (live = $live)") {
 
-    val listener = new AppStatusListener(store, testConf, true)
+      val testConf = if (live) {
+        conf.clone().set(LIVE_ENTITY_UPDATE_PERIOD, Long.MaxValue)
+      } else {
+        conf.clone().set(LIVE_ENTITY_UPDATE_PERIOD, -1L)
+      }
 
-    val stage = new StageInfo(1, 0, "stage", 4, Nil, Nil, "details")
-    listener.onJobStart(SparkListenerJobStart(1, time, Seq(stage), null))
-    listener.onStageSubmitted(SparkListenerStageSubmitted(stage, new Properties()))
+      val listener = new AppStatusListener(store, testConf, live)
 
-    val tasks = createTasks(4, Array("1", "2"))
-    tasks.foreach { task =>
-      listener.onTaskStart(SparkListenerTaskStart(stage.stageId, stage.attemptNumber, task))
-    }
+      listener.onExecutorAdded(createExecutorAddedEvent(1))
+      listener.onExecutorAdded(createExecutorAddedEvent(2))
+      val stage = new StageInfo(1, 0, "stage", 4, Nil, Nil, "details")
+      listener.onJobStart(SparkListenerJobStart(1, time, Seq(stage), null))
+      listener.onStageSubmitted(SparkListenerStageSubmitted(stage, new Properties()))
 
-    time += 1
-    tasks(0).markFinished(TaskState.FINISHED, time)
-    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
-      Success, tasks(0), null))
-    time += 1
-    tasks(1).markFinished(TaskState.FINISHED, time)
-    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
-      Success, tasks(1), null))
+      val tasks = createTasks(4, Array("1", "2"))
+      tasks.foreach { task =>
+        listener.onTaskStart(SparkListenerTaskStart(stage.stageId, stage.attemptNumber, task))
+      }
 
-    stage.failureReason = Some("Failed")
-    listener.onStageCompleted(SparkListenerStageCompleted(stage))
-    time += 1
-    listener.onJobEnd(SparkListenerJobEnd(1, time, JobFailed(new RuntimeException("Bad Executor"))))
+      time += 1
+      tasks(0).markFinished(TaskState.FINISHED, time)
+      listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+        Success, tasks(0), null))
+      time += 1
+      tasks(1).markFinished(TaskState.FINISHED, time)
+      listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+        Success, tasks(1), null))
 
-    time += 1
-    tasks(2).markFinished(TaskState.FAILED, time)
-    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
-      ExecutorLostFailure("1", true, Some("Lost executor")), tasks(2), null))
-    time += 1
-    tasks(3).markFinished(TaskState.FAILED, time)
-    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
-      ExecutorLostFailure("2", true, Some("Lost executor")), tasks(3), null))
-
-    val esummary = store.view(classOf[ExecutorStageSummaryWrapper]).asScala.map(_.info)
-    esummary.foreach { execSummary =>
-      assert(execSummary.failedTasks === 1)
-      assert(execSummary.succeededTasks === 1)
-      assert(execSummary.killedTasks === 0)
+      stage.failureReason = Some("Failed")
+      listener.onStageCompleted(SparkListenerStageCompleted(stage))
+      time += 1
+      listener.onJobEnd(SparkListenerJobEnd(1, time, JobFailed(
+        new RuntimeException("Bad Executor"))))
+
+      time += 1
+      tasks(2).markFinished(TaskState.FAILED, time)
+      listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+        ExecutorLostFailure("1", true, Some("Lost executor")), tasks(2), null))
+      time += 1
+      tasks(3).markFinished(TaskState.FAILED, time)
+      listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+        ExecutorLostFailure("2", true, Some("Lost executor")), tasks(3), null))
+
+      val esummary = store.view(classOf[ExecutorStageSummaryWrapper]).asScala.map(_.info)
+      esummary.foreach { execSummary =>
+        assert(execSummary.failedTasks === 1)
+        assert(execSummary.succeededTasks === 1)
+        assert(execSummary.killedTasks === 0)
+      }
+
+      val allExecutorSummary = store.view(classOf[ExecutorSummaryWrapper]).asScala.map(_.info)
+      assert(allExecutorSummary.size === 2)
+      allExecutorSummary.foreach { allExecSummary =>
+        assert(allExecSummary.failedTasks === 1)
+        assert(allExecSummary.activeTasks === 0)
+        assert(allExecSummary.completedTasks === 1)
+      }
+      store.delete(classOf[ExecutorSummaryWrapper], "1")
+      store.delete(classOf[ExecutorSummaryWrapper], "2")
     }
   }
 

From 6be272b75b4ae3149869e19df193675cc4117763 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Fri, 30 Nov 2018 16:23:37 -0800
Subject: [PATCH 0035/1072] [SPARK-25876][K8S] Simplify kubernetes
 configuration types.

There are a few issues with the current configuration types used in
the kubernetes backend:

- they use type parameters for role-specific specialization, which makes
  type signatures really noisy throughout the code base.

- they break encapsulation by forcing the code that creates the config
  object to remove the configuration from SparkConf before creating the
  k8s-specific wrapper.

- they don't provide an easy way for tests to have default values for
  fields they do not use.

This change fixes those problems by:

- creating a base config type with role-specific specialization using
  inheritance

- encapsulating the logic of parsing SparkConf into k8s-specific views
  inside the k8s config classes

- providing some helper code for tests to easily override just the part
  of the configs they want.

Most of the change relates to the above, especially cleaning up the
tests. While doing that, I also made some smaller changes elsewhere:

- removed unnecessary type parameters in KubernetesVolumeSpec

- simplified the error detection logic in KubernetesVolumeUtils; all
  the call sites would just throw the first exception collected by
  that class, since they all called "get" on the "Try" object. Now
  the unnecessary wrapping is gone and the exception is just thrown
  where it occurs.

- removed a lot of unnecessary mocking from tests.

- changed the kerberos-related code so that less logic needs to live
  in the driver builder. In spirit it should be part of the upcoming
  work in this series of cleanups, but it made parts of this change
  simpler.

Tested with existing unit tests and integration tests.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #22959 from vanzin/SPARK-25876.
---
 .../org/apache/spark/deploy/k8s/Config.scala  |  17 +-
 .../spark/deploy/k8s/KubernetesConf.scala     | 302 ++++++++----------
 .../deploy/k8s/KubernetesVolumeSpec.scala     |  10 +-
 .../deploy/k8s/KubernetesVolumeUtils.scala    |  53 +--
 .../k8s/features/BasicDriverFeatureStep.scala |  24 +-
 .../features/BasicExecutorFeatureStep.scala   |  29 +-
 .../features/DriverCommandFeatureStep.scala   |  22 +-
 ...iverKubernetesCredentialsFeatureStep.scala |   6 +-
 .../features/DriverServiceFeatureStep.scala   |  10 +-
 .../k8s/features/EnvSecretsFeatureStep.scala  |  11 +-
 .../HadoopConfExecutorFeatureStep.scala       |  14 +-
 .../HadoopSparkUserExecutorFeatureStep.scala  |  17 +-
 .../KerberosConfDriverFeatureStep.scala       | 113 ++++---
 .../KerberosConfExecutorFeatureStep.scala     |  21 +-
 .../k8s/features/LocalDirsFeatureStep.scala   |   9 +-
 .../features/MountSecretsFeatureStep.scala    |  13 +-
 .../features/MountVolumesFeatureStep.scala    |  11 +-
 .../features/PodTemplateConfigMapStep.scala   |   5 +-
 .../hadooputils/HadoopKerberosLogin.scala     |  64 ----
 ...bernetesHadoopDelegationTokenManager.scala |  37 ---
 .../submit/KubernetesClientApplication.scala  |  61 +---
 .../k8s/submit/KubernetesDriverBuilder.scala  |  53 ++-
 .../k8s/KubernetesExecutorBuilder.scala       |  36 +--
 .../deploy/k8s/KubernetesConfSuite.scala      |  71 ++--
 .../spark/deploy/k8s/KubernetesTestConf.scala | 138 ++++++++
 .../k8s/KubernetesVolumeUtilsSuite.scala      |  30 +-
 .../BasicDriverFeatureStepSuite.scala         | 127 ++------
 .../BasicExecutorFeatureStepSuite.scala       | 103 ++----
 .../DriverCommandFeatureStepSuite.scala       |  29 +-
 ...ubernetesCredentialsFeatureStepSuite.scala |  69 +---
 .../DriverServiceFeatureStepSuite.scala       | 193 ++++-------
 .../features/EnvSecretsFeatureStepSuite.scala |  32 +-
 .../features/LocalDirsFeatureStepSuite.scala  |  46 +--
 .../MountSecretsFeatureStepSuite.scala        |  21 +-
 .../MountVolumesFeatureStepSuite.scala        |  56 ++--
 .../PodTemplateConfigMapStepSuite.scala       |  28 +-
 .../spark/deploy/k8s/submit/ClientSuite.scala |  47 +--
 .../submit/KubernetesDriverBuilderSuite.scala | 204 ++----------
 .../k8s/ExecutorPodsAllocatorSuite.scala      |  43 +--
 .../k8s/KubernetesExecutorBuilderSuite.scala  | 114 ++-----
 40 files changed, 777 insertions(+), 1512 deletions(-)
 delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/HadoopKerberosLogin.scala
 delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/security/KubernetesHadoopDelegationTokenManager.scala
 create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 724acd231a6c..1abf2901268f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -60,7 +60,8 @@ private[spark] object Config extends Logging {
       .doc("Comma separated list of the Kubernetes secrets used " +
         "to access private image registries.")
       .stringConf
-      .createOptional
+      .toSequence
+      .createWithDefault(Nil)
 
   val KUBERNETES_AUTH_DRIVER_CONF_PREFIX =
       "spark.kubernetes.authenticate.driver"
@@ -112,16 +113,16 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
-  val KUBERNETES_EXECUTOR_POD_NAME_PREFIX =
-    ConfigBuilder("spark.kubernetes.executor.podNamePrefix")
-      .doc("Prefix to use in front of the executor pod names.")
+  // For testing only.
+  val KUBERNETES_DRIVER_POD_NAME_PREFIX =
+    ConfigBuilder("spark.kubernetes.driver.resourceNamePrefix")
       .internal()
       .stringConf
-      .createWithDefault("spark")
+      .createOptional
 
-  val KUBERNETES_PYSPARK_PY_FILES =
-    ConfigBuilder("spark.kubernetes.python.pyFiles")
-      .doc("The PyFiles that are distributed via client arguments")
+  val KUBERNETES_EXECUTOR_POD_NAME_PREFIX =
+    ConfigBuilder("spark.kubernetes.executor.podNamePrefix")
+      .doc("Prefix to use in front of the executor pod names.")
       .internal()
       .stringConf
       .createOptional
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index ebb81540bbbb..a06c21b47f15 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -16,93 +16,53 @@
  */
 package org.apache.spark.deploy.k8s
 
-import scala.collection.mutable
+import java.util.Locale
 
 import io.fabric8.kubernetes.api.model.{LocalObjectReference, LocalObjectReferenceBuilder, Pod}
-import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.security.KubernetesHadoopDelegationTokenManager
 import org.apache.spark.deploy.k8s.submit._
-import org.apache.spark.deploy.k8s.submit.KubernetesClientApplication._
 import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.util.Utils
 
-
-private[spark] sealed trait KubernetesRoleSpecificConf
-
-/*
- * Structure containing metadata for Kubernetes logic that builds a Spark driver.
- */
-private[spark] case class KubernetesDriverSpecificConf(
-    mainAppResource: MainAppResource,
-    mainClass: String,
-    appName: String,
-    appArgs: Seq[String],
-    pyFiles: Seq[String] = Nil) extends KubernetesRoleSpecificConf {
-
-  require(mainAppResource != null, "Main resource must be provided.")
-
-}
-
-/*
- * Structure containing metadata for Kubernetes logic that builds a Spark executor.
- */
-private[spark] case class KubernetesExecutorSpecificConf(
-    executorId: String,
-    driverPod: Option[Pod])
-  extends KubernetesRoleSpecificConf
-
-/*
- * Structure containing metadata for HADOOP_CONF_DIR customization
- */
-private[spark] case class HadoopConfSpec(
-    hadoopConfDir: Option[String],
-    hadoopConfigMapName: Option[String])
-
 /**
  * Structure containing metadata for Kubernetes logic to build Spark pods.
  */
-private[spark] case class KubernetesConf[T <: KubernetesRoleSpecificConf](
-    sparkConf: SparkConf,
-    roleSpecificConf: T,
-    appResourceNamePrefix: String,
-    appId: String,
-    roleLabels: Map[String, String],
-    roleAnnotations: Map[String, String],
-    roleSecretNamesToMountPaths: Map[String, String],
-    roleSecretEnvNamesToKeyRefs: Map[String, String],
-    roleEnvs: Map[String, String],
-    roleVolumes: Iterable[KubernetesVolumeSpec[_ <: KubernetesVolumeSpecificConf]],
-    hadoopConfSpec: Option[HadoopConfSpec]) {
+private[spark] abstract class KubernetesConf(val sparkConf: SparkConf) {
 
-  def hadoopConfigMapName: String = s"$appResourceNamePrefix-hadoop-config"
+  val resourceNamePrefix: String
+  def labels: Map[String, String]
+  def environment: Map[String, String]
+  def annotations: Map[String, String]
+  def secretEnvNamesToKeyRefs: Map[String, String]
+  def secretNamesToMountPaths: Map[String, String]
+  def volumes: Seq[KubernetesVolumeSpec]
 
-  def krbConfigMapName: String = s"$appResourceNamePrefix-krb5-file"
+  def appName: String = get("spark.app.name", "spark")
 
-  def tokenManager(conf: SparkConf, hConf: Configuration): KubernetesHadoopDelegationTokenManager =
-    new KubernetesHadoopDelegationTokenManager(conf, hConf)
+  def hadoopConfigMapName: String = s"$resourceNamePrefix-hadoop-config"
 
-  def namespace(): String = sparkConf.get(KUBERNETES_NAMESPACE)
+  def krbConfigMapName: String = s"$resourceNamePrefix-krb5-file"
 
-  def imagePullPolicy(): String = sparkConf.get(CONTAINER_IMAGE_PULL_POLICY)
+  def namespace: String = get(KUBERNETES_NAMESPACE)
 
-  def imagePullSecrets(): Seq[LocalObjectReference] = {
+  def imagePullPolicy: String = get(CONTAINER_IMAGE_PULL_POLICY)
+
+  def imagePullSecrets: Seq[LocalObjectReference] = {
     sparkConf
       .get(IMAGE_PULL_SECRETS)
-      .map(_.split(","))
-      .getOrElse(Array.empty[String])
-      .map(_.trim)
       .map { secret =>
         new LocalObjectReferenceBuilder().withName(secret).build()
       }
   }
 
-  def nodeSelector(): Map[String, String] =
+  def nodeSelector: Map[String, String] =
     KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_NODE_SELECTOR_PREFIX)
 
+  def contains(config: ConfigEntry[_]): Boolean = sparkConf.contains(config)
+
   def get[T](config: ConfigEntry[T]): T = sparkConf.get(config)
 
   def get(conf: String): String = sparkConf.get(conf)
@@ -112,125 +72,139 @@ private[spark] case class KubernetesConf[T <: KubernetesRoleSpecificConf](
   def getOption(key: String): Option[String] = sparkConf.getOption(key)
 }
 
+private[spark] class KubernetesDriverConf(
+    sparkConf: SparkConf,
+    val appId: String,
+    val mainAppResource: MainAppResource,
+    val mainClass: String,
+    val appArgs: Array[String],
+    val pyFiles: Seq[String])
+  extends KubernetesConf(sparkConf) {
+
+  override val resourceNamePrefix: String = {
+    val custom = if (Utils.isTesting) get(KUBERNETES_DRIVER_POD_NAME_PREFIX) else None
+    custom.getOrElse(KubernetesConf.getResourceNamePrefix(appName))
+  }
+
+  override def labels: Map[String, String] = {
+    val presetLabels = Map(
+      SPARK_APP_ID_LABEL -> appId,
+      SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE)
+    val driverCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_DRIVER_LABEL_PREFIX)
+
+    presetLabels.keys.foreach { key =>
+      require(
+        !driverCustomLabels.contains(key),
+        s"Label with key $key is not allowed as it is reserved for Spark bookkeeping operations.")
+    }
+
+    driverCustomLabels ++ presetLabels
+  }
+
+  override def environment: Map[String, String] = {
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_DRIVER_ENV_PREFIX)
+  }
+
+  override def annotations: Map[String, String] = {
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_DRIVER_ANNOTATION_PREFIX)
+  }
+
+  override def secretNamesToMountPaths: Map[String, String] = {
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_DRIVER_SECRETS_PREFIX)
+  }
+
+  override def secretEnvNamesToKeyRefs: Map[String, String] = {
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_DRIVER_SECRET_KEY_REF_PREFIX)
+  }
+
+  override def volumes: Seq[KubernetesVolumeSpec] = {
+    KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, KUBERNETES_DRIVER_VOLUMES_PREFIX)
+  }
+}
+
+private[spark] class KubernetesExecutorConf(
+    sparkConf: SparkConf,
+    val appId: String,
+    val executorId: String,
+    val driverPod: Option[Pod])
+  extends KubernetesConf(sparkConf) {
+
+  override val resourceNamePrefix: String = {
+    get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX).getOrElse(
+      KubernetesConf.getResourceNamePrefix(appName))
+  }
+
+  override def labels: Map[String, String] = {
+    val presetLabels = Map(
+      SPARK_EXECUTOR_ID_LABEL -> executorId,
+      SPARK_APP_ID_LABEL -> appId,
+      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE)
+
+    val executorCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
+      sparkConf, KUBERNETES_EXECUTOR_LABEL_PREFIX)
+
+    presetLabels.keys.foreach { key =>
+      require(
+        !executorCustomLabels.contains(key),
+        s"Custom executor labels cannot contain $key as it is reserved for Spark.")
+    }
+
+    executorCustomLabels ++ presetLabels
+  }
+
+  override def environment: Map[String, String] = sparkConf.getExecutorEnv.toMap
+
+  override def annotations: Map[String, String] = {
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_EXECUTOR_ANNOTATION_PREFIX)
+  }
+
+  override def secretNamesToMountPaths: Map[String, String] = {
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_EXECUTOR_SECRETS_PREFIX)
+  }
+
+  override def secretEnvNamesToKeyRefs: Map[String, String] = {
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_EXECUTOR_SECRET_KEY_REF_PREFIX)
+  }
+
+  override def volumes: Seq[KubernetesVolumeSpec] = {
+    KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX)
+  }
+
+}
+
 private[spark] object KubernetesConf {
   def createDriverConf(
       sparkConf: SparkConf,
-      appName: String,
-      appResourceNamePrefix: String,
       appId: String,
       mainAppResource: MainAppResource,
       mainClass: String,
       appArgs: Array[String],
-      maybePyFiles: Option[String],
-      hadoopConfDir: Option[String]): KubernetesConf[KubernetesDriverSpecificConf] = {
-    val driverCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_DRIVER_LABEL_PREFIX)
-    require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), "Label with key " +
-      s"$SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping " +
-      "operations.")
-    require(!driverCustomLabels.contains(SPARK_ROLE_LABEL), "Label with key " +
-      s"$SPARK_ROLE_LABEL is not allowed as it is reserved for Spark bookkeeping " +
-      "operations.")
-    val driverLabels = driverCustomLabels ++ Map(
-      SPARK_APP_ID_LABEL -> appId,
-      SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE)
-    val driverAnnotations = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_DRIVER_ANNOTATION_PREFIX)
-    val driverSecretNamesToMountPaths = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_DRIVER_SECRETS_PREFIX)
-    val driverSecretEnvNamesToKeyRefs = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_DRIVER_SECRET_KEY_REF_PREFIX)
-    val driverEnvs = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_DRIVER_ENV_PREFIX)
-    val driverVolumes = KubernetesVolumeUtils.parseVolumesWithPrefix(
-      sparkConf, KUBERNETES_DRIVER_VOLUMES_PREFIX).map(_.get)
-    // Also parse executor volumes in order to verify configuration
-    // before the driver pod is created
-    KubernetesVolumeUtils.parseVolumesWithPrefix(
-      sparkConf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX).map(_.get)
-
-    val hadoopConfigMapName = sparkConf.get(KUBERNETES_HADOOP_CONF_CONFIG_MAP)
-    KubernetesUtils.requireNandDefined(
-      hadoopConfDir,
-      hadoopConfigMapName,
-      "Do not specify both the `HADOOP_CONF_DIR` in your ENV and the ConfigMap " +
-      "as the creation of an additional ConfigMap, when one is already specified is extraneous" )
-    val hadoopConfSpec =
-      if (hadoopConfDir.isDefined || hadoopConfigMapName.isDefined) {
-        Some(HadoopConfSpec(hadoopConfDir, hadoopConfigMapName))
-      } else {
-        None
-      }
-    val pyFiles = maybePyFiles.map(Utils.stringToSeq).getOrElse(Nil)
+      maybePyFiles: Option[String]): KubernetesDriverConf = {
+    // Parse executor volumes in order to verify configuration before the driver pod is created.
+    KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX)
 
-
-    KubernetesConf(
-      sparkConf.clone(),
-      KubernetesDriverSpecificConf(mainAppResource, mainClass, appName, appArgs, pyFiles),
-      appResourceNamePrefix,
-      appId,
-      driverLabels,
-      driverAnnotations,
-      driverSecretNamesToMountPaths,
-      driverSecretEnvNamesToKeyRefs,
-      driverEnvs,
-      driverVolumes,
-      hadoopConfSpec)
+    val pyFiles = maybePyFiles.map(Utils.stringToSeq).getOrElse(Nil)
+    new KubernetesDriverConf(sparkConf.clone(), appId, mainAppResource, mainClass, appArgs,
+      pyFiles)
   }
 
   def createExecutorConf(
       sparkConf: SparkConf,
       executorId: String,
       appId: String,
-      driverPod: Option[Pod]): KubernetesConf[KubernetesExecutorSpecificConf] = {
-    val executorCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_EXECUTOR_LABEL_PREFIX)
-    require(
-      !executorCustomLabels.contains(SPARK_APP_ID_LABEL),
-      s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is reserved for Spark.")
-    require(
-      !executorCustomLabels.contains(SPARK_EXECUTOR_ID_LABEL),
-      s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" +
-        " Spark.")
-    require(
-      !executorCustomLabels.contains(SPARK_ROLE_LABEL),
-      s"Custom executor labels cannot contain $SPARK_ROLE_LABEL as it is reserved for Spark.")
-    val executorLabels = Map(
-      SPARK_EXECUTOR_ID_LABEL -> executorId,
-      SPARK_APP_ID_LABEL -> appId,
-      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++
-      executorCustomLabels
-    val executorAnnotations = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_EXECUTOR_ANNOTATION_PREFIX)
-    val executorMountSecrets = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_EXECUTOR_SECRETS_PREFIX)
-    val executorEnvSecrets = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_EXECUTOR_SECRET_KEY_REF_PREFIX)
-    val executorEnv = sparkConf.getExecutorEnv.toMap
-    val executorVolumes = KubernetesVolumeUtils.parseVolumesWithPrefix(
-      sparkConf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX).map(_.get)
-
-    // If no prefix is defined then we are in pure client mode
-    // (not the one used by cluster mode inside the container)
-    val appResourceNamePrefix = {
-      if (sparkConf.getOption(KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key).isEmpty) {
-        getResourceNamePrefix(getAppName(sparkConf))
-      } else {
-        sparkConf.get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX)
-      }
-    }
+      driverPod: Option[Pod]): KubernetesExecutorConf = {
+    new KubernetesExecutorConf(sparkConf.clone(), appId, executorId, driverPod)
+  }
 
-    KubernetesConf(
-      sparkConf.clone(),
-      KubernetesExecutorSpecificConf(executorId, driverPod),
-      appResourceNamePrefix,
-      appId,
-      executorLabels,
-      executorAnnotations,
-      executorMountSecrets,
-      executorEnvSecrets,
-      executorEnv,
-      executorVolumes,
-      None)
+  def getResourceNamePrefix(appName: String): String = {
+    val launchTime = System.currentTimeMillis()
+    s"$appName-$launchTime"
+      .trim
+      .toLowerCase(Locale.ROOT)
+      .replaceAll("\\s+", "-")
+      .replaceAll("\\.", "-")
+      .replaceAll("[^a-z0-9\\-]", "")
+      .replaceAll("-+", "-")
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
index 1a214fad9661..0ebe8fd26015 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
@@ -18,12 +18,10 @@ package org.apache.spark.deploy.k8s
 
 private[spark] sealed trait KubernetesVolumeSpecificConf
 
-private[spark] case class KubernetesHostPathVolumeConf(
-    hostPath: String)
+private[spark] case class KubernetesHostPathVolumeConf(hostPath: String)
   extends KubernetesVolumeSpecificConf
 
-private[spark] case class KubernetesPVCVolumeConf(
-    claimName: String)
+private[spark] case class KubernetesPVCVolumeConf(claimName: String)
   extends KubernetesVolumeSpecificConf
 
 private[spark] case class KubernetesEmptyDirVolumeConf(
@@ -31,9 +29,9 @@ private[spark] case class KubernetesEmptyDirVolumeConf(
     sizeLimit: Option[String])
   extends KubernetesVolumeSpecificConf
 
-private[spark] case class KubernetesVolumeSpec[T <: KubernetesVolumeSpecificConf](
+private[spark] case class KubernetesVolumeSpec(
     volumeName: String,
     mountPath: String,
     mountSubPath: String,
     mountReadOnly: Boolean,
-    volumeConf: T)
+    volumeConf: KubernetesVolumeSpecificConf)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
index 155326469235..c0c4f86f1a6a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
@@ -16,10 +16,6 @@
  */
 package org.apache.spark.deploy.k8s
 
-import java.util.NoSuchElementException
-
-import scala.util.{Failure, Success, Try}
-
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
 
@@ -31,9 +27,7 @@ private[spark] object KubernetesVolumeUtils {
    * @param prefix the given property name prefix
    * @return a Map storing with volume name as key and spec as value
    */
-  def parseVolumesWithPrefix(
-    sparkConf: SparkConf,
-    prefix: String): Iterable[Try[KubernetesVolumeSpec[_ <: KubernetesVolumeSpecificConf]]] = {
+  def parseVolumesWithPrefix(sparkConf: SparkConf, prefix: String): Seq[KubernetesVolumeSpec] = {
     val properties = sparkConf.getAllWithPrefix(prefix).toMap
 
     getVolumeTypesAndNames(properties).map { case (volumeType, volumeName) =>
@@ -41,17 +35,13 @@ private[spark] object KubernetesVolumeUtils {
       val readOnlyKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_MOUNT_READONLY_KEY"
       val subPathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_MOUNT_SUBPATH_KEY"
 
-      for {
-        path <- properties.getTry(pathKey)
-        volumeConf <- parseVolumeSpecificConf(properties, volumeType, volumeName)
-      } yield KubernetesVolumeSpec(
+      KubernetesVolumeSpec(
         volumeName = volumeName,
-        mountPath = path,
+        mountPath = properties(pathKey),
         mountSubPath = properties.get(subPathKey).getOrElse(""),
         mountReadOnly = properties.get(readOnlyKey).exists(_.toBoolean),
-        volumeConf = volumeConf
-      )
-    }
+        volumeConf = parseVolumeSpecificConf(properties, volumeType, volumeName))
+    }.toSeq
   }
 
   /**
@@ -61,9 +51,7 @@ private[spark] object KubernetesVolumeUtils {
    * @param properties flat mapping of property names to values
    * @return Set[(volumeType, volumeName)]
    */
-  private def getVolumeTypesAndNames(
-    properties: Map[String, String]
-  ): Set[(String, String)] = {
+  private def getVolumeTypesAndNames(properties: Map[String, String]): Set[(String, String)] = {
     properties.keys.flatMap { k =>
       k.split('.').toList match {
         case tpe :: name :: _ => Some((tpe, name))
@@ -73,40 +61,25 @@ private[spark] object KubernetesVolumeUtils {
   }
 
   private def parseVolumeSpecificConf(
-    options: Map[String, String],
-    volumeType: String,
-    volumeName: String): Try[KubernetesVolumeSpecificConf] = {
+      options: Map[String, String],
+      volumeType: String,
+      volumeName: String): KubernetesVolumeSpecificConf = {
     volumeType match {
       case KUBERNETES_VOLUMES_HOSTPATH_TYPE =>
         val pathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_PATH_KEY"
-        for {
-          path <- options.getTry(pathKey)
-        } yield KubernetesHostPathVolumeConf(path)
+        KubernetesHostPathVolumeConf(options(pathKey))
 
       case KUBERNETES_VOLUMES_PVC_TYPE =>
         val claimNameKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY"
-        for {
-          claimName <- options.getTry(claimNameKey)
-        } yield KubernetesPVCVolumeConf(claimName)
+        KubernetesPVCVolumeConf(options(claimNameKey))
 
       case KUBERNETES_VOLUMES_EMPTYDIR_TYPE =>
         val mediumKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_MEDIUM_KEY"
         val sizeLimitKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY"
-        Success(KubernetesEmptyDirVolumeConf(options.get(mediumKey), options.get(sizeLimitKey)))
+        KubernetesEmptyDirVolumeConf(options.get(mediumKey), options.get(sizeLimitKey))
 
       case _ =>
-        Failure(new RuntimeException(s"Kubernetes Volume type `$volumeType` is not supported"))
-    }
-  }
-
-  /**
-   * Convenience wrapper to accumulate key lookup errors
-   */
-  implicit private class MapOps[A, B](m: Map[A, B]) {
-    def getTry(key: A): Try[B] = {
-      m
-        .get(key)
-        .fold[Try[B]](Failure(new NoSuchElementException(key.toString)))(Success(_))
+        throw new IllegalArgumentException(s"Kubernetes Volume type `$volumeType` is not supported")
     }
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 5ddf73cb16a6..d8cf3653d322 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -30,13 +30,12 @@ import org.apache.spark.internal.config._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.Utils
 
-private[spark] class BasicDriverFeatureStep(
-    conf: KubernetesConf[KubernetesDriverSpecificConf])
+private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
   extends KubernetesFeatureConfigStep {
 
   private val driverPodName = conf
     .get(KUBERNETES_DRIVER_POD_NAME)
-    .getOrElse(s"${conf.appResourceNamePrefix}-driver")
+    .getOrElse(s"${conf.resourceNamePrefix}-driver")
 
   private val driverContainerImage = conf
     .get(DRIVER_CONTAINER_IMAGE)
@@ -52,8 +51,8 @@ private[spark] class BasicDriverFeatureStep(
   // The memory overhead factor to use. If the user has not set it, then use a different
   // value for non-JVM apps. This value is propagated to executors.
   private val overheadFactor =
-    if (conf.roleSpecificConf.mainAppResource.isInstanceOf[NonJVMResource]) {
-      if (conf.sparkConf.contains(MEMORY_OVERHEAD_FACTOR)) {
+    if (conf.mainAppResource.isInstanceOf[NonJVMResource]) {
+      if (conf.contains(MEMORY_OVERHEAD_FACTOR)) {
         conf.get(MEMORY_OVERHEAD_FACTOR)
       } else {
         NON_JVM_MEMORY_OVERHEAD_FACTOR
@@ -68,8 +67,7 @@ private[spark] class BasicDriverFeatureStep(
   private val driverMemoryWithOverheadMiB = driverMemoryMiB + memoryOverheadMiB
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    val driverCustomEnvs = conf.roleEnvs
-      .toSeq
+    val driverCustomEnvs = conf.environment.toSeq
       .map { env =>
         new EnvVarBuilder()
           .withName(env._1)
@@ -96,7 +94,7 @@ private[spark] class BasicDriverFeatureStep(
     val driverContainer = new ContainerBuilder(pod.container)
       .withName(Option(pod.container.getName).getOrElse(DEFAULT_DRIVER_CONTAINER_NAME))
       .withImage(driverContainerImage)
-      .withImagePullPolicy(conf.imagePullPolicy())
+      .withImagePullPolicy(conf.imagePullPolicy)
       .addNewPort()
         .withName(DRIVER_PORT_NAME)
         .withContainerPort(driverPort)
@@ -130,13 +128,13 @@ private[spark] class BasicDriverFeatureStep(
     val driverPod = new PodBuilder(pod.pod)
       .editOrNewMetadata()
         .withName(driverPodName)
-        .addToLabels(conf.roleLabels.asJava)
-        .addToAnnotations(conf.roleAnnotations.asJava)
+        .addToLabels(conf.labels.asJava)
+        .addToAnnotations(conf.annotations.asJava)
         .endMetadata()
       .editOrNewSpec()
         .withRestartPolicy("Never")
-        .addToNodeSelector(conf.nodeSelector().asJava)
-        .addToImagePullSecrets(conf.imagePullSecrets(): _*)
+        .addToNodeSelector(conf.nodeSelector.asJava)
+        .addToImagePullSecrets(conf.imagePullSecrets: _*)
         .endSpec()
       .build()
 
@@ -147,7 +145,7 @@ private[spark] class BasicDriverFeatureStep(
     val additionalProps = mutable.Map(
       KUBERNETES_DRIVER_POD_NAME.key -> driverPodName,
       "spark.app.id" -> conf.appId,
-      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> conf.appResourceNamePrefix,
+      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> conf.resourceNamePrefix,
       KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true",
       MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString)
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 7f397e6e84fa..8bf315248388 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -29,8 +29,7 @@ import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
 
-private[spark] class BasicExecutorFeatureStep(
-    kubernetesConf: KubernetesConf[KubernetesExecutorSpecificConf])
+private[spark] class BasicExecutorFeatureStep(kubernetesConf: KubernetesExecutorConf)
   extends KubernetesFeatureConfigStep {
 
   // Consider moving some of these fields to KubernetesConf or KubernetesExecutorSpecificConf
@@ -42,7 +41,7 @@ private[spark] class BasicExecutorFeatureStep(
     .sparkConf
     .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT)
 
-  private val executorPodNamePrefix = kubernetesConf.appResourceNamePrefix
+  private val executorPodNamePrefix = kubernetesConf.resourceNamePrefix
 
   private val driverUrl = RpcEndpointAddress(
     kubernetesConf.get("spark.driver.host"),
@@ -76,7 +75,7 @@ private[spark] class BasicExecutorFeatureStep(
   private val executorLimitCores = kubernetesConf.get(KUBERNETES_EXECUTOR_LIMIT_CORES)
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    val name = s"$executorPodNamePrefix-exec-${kubernetesConf.roleSpecificConf.executorId}"
+    val name = s"$executorPodNamePrefix-exec-${kubernetesConf.executorId}"
 
     // hostname must be no longer than 63 characters, so take the last 63 characters of the pod
     // name as the hostname.  This preserves uniqueness since the end of name contains
@@ -98,7 +97,7 @@ private[spark] class BasicExecutorFeatureStep(
       .get(EXECUTOR_JAVA_OPTIONS)
       .map { opts =>
         val subsOpts = Utils.substituteAppNExecIds(opts, kubernetesConf.appId,
-          kubernetesConf.roleSpecificConf.executorId)
+          kubernetesConf.executorId)
         val delimitedOpts = Utils.splitCommandString(subsOpts)
         delimitedOpts.zipWithIndex.map {
           case (opt, index) =>
@@ -112,8 +111,8 @@ private[spark] class BasicExecutorFeatureStep(
       (ENV_APPLICATION_ID, kubernetesConf.appId),
       // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
       (ENV_SPARK_CONF_DIR, SPARK_CONF_DIR_INTERNAL),
-      (ENV_EXECUTOR_ID, kubernetesConf.roleSpecificConf.executorId)) ++
-      kubernetesConf.roleEnvs)
+      (ENV_EXECUTOR_ID, kubernetesConf.executorId)) ++
+      kubernetesConf.environment)
       .map(env => new EnvVarBuilder()
         .withName(env._1)
         .withValue(env._2)
@@ -138,7 +137,7 @@ private[spark] class BasicExecutorFeatureStep(
     val executorContainer = new ContainerBuilder(pod.container)
       .withName(Option(pod.container.getName).getOrElse(DEFAULT_EXECUTOR_CONTAINER_NAME))
       .withImage(executorContainerImage)
-      .withImagePullPolicy(kubernetesConf.imagePullPolicy())
+      .withImagePullPolicy(kubernetesConf.imagePullPolicy)
       .editOrNewResources()
         .addToRequests("memory", executorMemoryQuantity)
         .addToLimits("memory", executorMemoryQuantity)
@@ -158,27 +157,27 @@ private[spark] class BasicExecutorFeatureStep(
           .endResources()
         .build()
     }.getOrElse(executorContainer)
-    val driverPod = kubernetesConf.roleSpecificConf.driverPod
-    val ownerReference = driverPod.map(pod =>
+    val ownerReference = kubernetesConf.driverPod.map { pod =>
       new OwnerReferenceBuilder()
         .withController(true)
         .withApiVersion(pod.getApiVersion)
         .withKind(pod.getKind)
         .withName(pod.getMetadata.getName)
         .withUid(pod.getMetadata.getUid)
-        .build())
+        .build()
+    }
     val executorPod = new PodBuilder(pod.pod)
       .editOrNewMetadata()
         .withName(name)
-        .addToLabels(kubernetesConf.roleLabels.asJava)
-        .addToAnnotations(kubernetesConf.roleAnnotations.asJava)
+        .addToLabels(kubernetesConf.labels.asJava)
+        .addToAnnotations(kubernetesConf.annotations.asJava)
         .addToOwnerReferences(ownerReference.toSeq: _*)
         .endMetadata()
       .editOrNewSpec()
         .withHostname(hostname)
         .withRestartPolicy("Never")
-        .addToNodeSelector(kubernetesConf.nodeSelector().asJava)
-        .addToImagePullSecrets(kubernetesConf.imagePullSecrets(): _*)
+        .addToNodeSelector(kubernetesConf.nodeSelector.asJava)
+        .addToImagePullSecrets(kubernetesConf.imagePullSecrets: _*)
         .endSpec()
       .build()
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
index 8b8f0d01d49f..76b4ec98d494 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
@@ -32,13 +32,11 @@ import org.apache.spark.util.Utils
  * Creates the driver command for running the user app, and propagates needed configuration so
  * executors can also find the app code.
  */
-private[spark] class DriverCommandFeatureStep(conf: KubernetesConf[KubernetesDriverSpecificConf])
+private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   extends KubernetesFeatureConfigStep {
 
-  private val driverConf = conf.roleSpecificConf
-
   override def configurePod(pod: SparkPod): SparkPod = {
-    driverConf.mainAppResource match {
+    conf.mainAppResource match {
       case JavaMainAppResource(_) =>
         configureForJava(pod)
 
@@ -51,7 +49,7 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesConf[KubernetesDri
   }
 
   override def getAdditionalPodSystemProperties(): Map[String, String] = {
-    driverConf.mainAppResource match {
+    conf.mainAppResource match {
       case JavaMainAppResource(res) =>
         res.map(additionalJavaProperties).getOrElse(Map.empty)
 
@@ -71,10 +69,10 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesConf[KubernetesDri
   }
 
   private def configureForPython(pod: SparkPod, res: String): SparkPod = {
-    val maybePythonFiles = if (driverConf.pyFiles.nonEmpty) {
+    val maybePythonFiles = if (conf.pyFiles.nonEmpty) {
       // Delineation by ":" is to append the PySpark Files to the PYTHONPATH
       // of the respective PySpark pod
-      val resolved = KubernetesUtils.resolveFileUrisAndPath(driverConf.pyFiles)
+      val resolved = KubernetesUtils.resolveFileUrisAndPath(conf.pyFiles)
       Some(new EnvVarBuilder()
         .withName(ENV_PYSPARK_FILES)
         .withValue(resolved.mkString(":"))
@@ -85,7 +83,7 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesConf[KubernetesDri
     val pythonEnvs =
       Seq(new EnvVarBuilder()
           .withName(ENV_PYSPARK_MAJOR_PYTHON_VERSION)
-          .withValue(conf.sparkConf.get(PYSPARK_MAJOR_PYTHON_VERSION))
+          .withValue(conf.get(PYSPARK_MAJOR_PYTHON_VERSION))
         .build()) ++
       maybePythonFiles
 
@@ -105,9 +103,9 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesConf[KubernetesDri
     new ContainerBuilder(pod.container)
       .addToArgs("driver")
       .addToArgs("--properties-file", SPARK_CONF_PATH)
-      .addToArgs("--class", driverConf.mainClass)
+      .addToArgs("--class", conf.mainClass)
       .addToArgs(resource)
-      .addToArgs(driverConf.appArgs: _*)
+      .addToArgs(conf.appArgs: _*)
   }
 
   private def additionalJavaProperties(resource: String): Map[String, String] = {
@@ -116,7 +114,7 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesConf[KubernetesDri
 
   private def additionalPythonProperties(resource: String): Map[String, String] = {
     resourceType(APP_RESOURCE_TYPE_PYTHON) ++
-      mergeFileList("spark.files", Seq(resource) ++ driverConf.pyFiles)
+      mergeFileList("spark.files", Seq(resource) ++ conf.pyFiles)
   }
 
   private def additionalRProperties(resource: String): Map[String, String] = {
@@ -124,7 +122,7 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesConf[KubernetesDri
   }
 
   private def mergeFileList(key: String, filesToAdd: Seq[String]): Map[String, String] = {
-    val existing = Utils.stringToSeq(conf.sparkConf.get(key, ""))
+    val existing = Utils.stringToSeq(conf.get(key, ""))
     Map(key -> (existing ++ filesToAdd).distinct.mkString(","))
   }
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala
index ff5ad6673b30..795ca49a3c87 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala
@@ -28,7 +28,7 @@ import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 
-private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: KubernetesConf[_])
+private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: KubernetesConf)
   extends KubernetesFeatureConfigStep {
   // TODO clean up this class, and credentials in general. See also SparkKubernetesClientFactory.
   // We should use a struct to hold all creds-related fields. A lot of the code is very repetitive.
@@ -66,7 +66,7 @@ private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: Kube
     clientCertDataBase64.isDefined
 
   private val driverCredentialsSecretName =
-    s"${kubernetesConf.appResourceNamePrefix}-kubernetes-credentials"
+    s"${kubernetesConf.resourceNamePrefix}-kubernetes-credentials"
 
   override def configurePod(pod: SparkPod): SparkPod = {
     if (!shouldMountSecret) {
@@ -122,7 +122,7 @@ private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: Kube
     val redactedTokens = kubernetesConf.sparkConf.getAll
       .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX))
       .toMap
-      .mapValues( _ => "<present_but_redacted>")
+      .map { case (k, v) => (k, "<present_but_redacted>") }
     redactedTokens ++
       resolvedMountedCaCertFile.map { file =>
         Map(
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
index f2d7bbd08f30..42305457f4ff 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
@@ -20,13 +20,13 @@ import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.{HasMetadata, ServiceBuilder}
 
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesDriverConf, SparkPod}
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.{Clock, SystemClock}
 
 private[spark] class DriverServiceFeatureStep(
-    kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf],
+    kubernetesConf: KubernetesDriverConf,
     clock: Clock = new SystemClock)
   extends KubernetesFeatureConfigStep with Logging {
   import DriverServiceFeatureStep._
@@ -38,7 +38,7 @@ private[spark] class DriverServiceFeatureStep(
     s"$DRIVER_HOST_KEY is not supported in Kubernetes mode, as the driver's hostname will be " +
       "managed via a Kubernetes service.")
 
-  private val preferredServiceName = s"${kubernetesConf.appResourceNamePrefix}$DRIVER_SVC_POSTFIX"
+  private val preferredServiceName = s"${kubernetesConf.resourceNamePrefix}$DRIVER_SVC_POSTFIX"
   private val resolvedServiceName = if (preferredServiceName.length <= MAX_SERVICE_NAME_LENGTH) {
     preferredServiceName
   } else {
@@ -58,7 +58,7 @@ private[spark] class DriverServiceFeatureStep(
   override def configurePod(pod: SparkPod): SparkPod = pod
 
   override def getAdditionalPodSystemProperties(): Map[String, String] = {
-    val driverHostname = s"$resolvedServiceName.${kubernetesConf.namespace()}.svc"
+    val driverHostname = s"$resolvedServiceName.${kubernetesConf.namespace}.svc"
     Map(DRIVER_HOST_KEY -> driverHostname,
       "spark.driver.port" -> driverPort.toString,
       org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT.key ->
@@ -72,7 +72,7 @@ private[spark] class DriverServiceFeatureStep(
         .endMetadata()
       .withNewSpec()
         .withClusterIP("None")
-        .withSelector(kubernetesConf.roleLabels.asJava)
+        .withSelector(kubernetesConf.labels.asJava)
         .addNewPort()
           .withName(DRIVER_PORT_NAME)
           .withPort(driverPort)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
index 03ff7d48420f..d78f04dcc40e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
@@ -20,14 +20,13 @@ import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, HasMetadata}
 
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesRoleSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 
-private[spark] class EnvSecretsFeatureStep(
-    kubernetesConf: KubernetesConf[_ <: KubernetesRoleSpecificConf])
+private[spark] class EnvSecretsFeatureStep(kubernetesConf: KubernetesConf)
   extends KubernetesFeatureConfigStep {
   override def configurePod(pod: SparkPod): SparkPod = {
     val addedEnvSecrets = kubernetesConf
-      .roleSecretEnvNamesToKeyRefs
+      .secretEnvNamesToKeyRefs
       .map{ case (envName, keyRef) =>
         // Keyref parts
         val keyRefParts = keyRef.split(":")
@@ -50,8 +49,4 @@ private[spark] class EnvSecretsFeatureStep(
       .build()
     SparkPod(pod.pod, containerWithEnvVars)
   }
-
-  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
-
-  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
index fd09de2a918a..bca66759d586 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
@@ -16,9 +16,7 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.HasMetadata
-
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, SparkPod}
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features.hadooputils.HadoopBootstrapUtil
 import org.apache.spark.internal.Logging
@@ -28,21 +26,15 @@ import org.apache.spark.internal.Logging
  * containing Hadoop config files mounted as volumes and an ENV variable
  * pointed to the mounted file directory.
  */
-private[spark] class HadoopConfExecutorFeatureStep(
-    kubernetesConf: KubernetesConf[KubernetesExecutorSpecificConf])
+private[spark] class HadoopConfExecutorFeatureStep(conf: KubernetesExecutorConf)
   extends KubernetesFeatureConfigStep with Logging {
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    val sparkConf = kubernetesConf.sparkConf
-    val hadoopConfDirCMapName = sparkConf.getOption(HADOOP_CONFIG_MAP_NAME)
+    val hadoopConfDirCMapName = conf.getOption(HADOOP_CONFIG_MAP_NAME)
     require(hadoopConfDirCMapName.isDefined,
       "Ensure that the env `HADOOP_CONF_DIR` is defined either in the client or " +
         " using pre-existing ConfigMaps")
     logInfo("HADOOP_CONF_DIR defined")
     HadoopBootstrapUtil.bootstrapHadoopConfDir(None, None, hadoopConfDirCMapName, pod)
   }
-
-  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
-
-  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
index 5b6a6d5a7db4..e34211076319 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
@@ -16,28 +16,19 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.HasMetadata
-
-import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, SparkPod}
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.KubernetesExecutorSpecificConf
 import org.apache.spark.deploy.k8s.features.hadooputils.HadoopBootstrapUtil
-import org.apache.spark.internal.Logging
 
 /**
  * This step is responsible for setting ENV_SPARK_USER when HADOOP_FILES are detected
  * however, this step would not be run if Kerberos is enabled, as Kerberos sets SPARK_USER
  */
-private[spark] class HadoopSparkUserExecutorFeatureStep(
-    kubernetesConf: KubernetesConf[KubernetesExecutorSpecificConf])
-  extends KubernetesFeatureConfigStep with Logging {
+private[spark] class HadoopSparkUserExecutorFeatureStep(conf: KubernetesExecutorConf)
+  extends KubernetesFeatureConfigStep {
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    val sparkUserName = kubernetesConf.sparkConf.get(KERBEROS_SPARK_USER_NAME)
+    val sparkUserName = conf.get(KERBEROS_SPARK_USER_NAME)
     HadoopBootstrapUtil.bootstrapSparkUserPod(sparkUserName, pod)
   }
-
-  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
-
-  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
index ce47933b7f70..c6d5a866fa7b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
@@ -16,40 +16,43 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.HasMetadata
+import io.fabric8.kubernetes.api.model.{HasMetadata, Secret, SecretBuilder}
+import org.apache.commons.codec.binary.Base64
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesDriverConf, KubernetesUtils, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.KubernetesDriverSpecificConf
 import org.apache.spark.deploy.k8s.features.hadooputils._
-import org.apache.spark.internal.Logging
+import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 
 /**
  * Runs the necessary Hadoop-based logic based on Kerberos configs and the presence of the
  * HADOOP_CONF_DIR. This runs various bootstrap methods defined in HadoopBootstrapUtil.
  */
-private[spark] class KerberosConfDriverFeatureStep(
-    kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf])
-  extends KubernetesFeatureConfigStep with Logging {
-
-  require(kubernetesConf.hadoopConfSpec.isDefined,
-     "Ensure that HADOOP_CONF_DIR is defined either via env or a pre-defined ConfigMap")
-  private val hadoopConfDirSpec = kubernetesConf.hadoopConfSpec.get
-  private val conf = kubernetesConf.sparkConf
-  private val principal = conf.get(org.apache.spark.internal.config.PRINCIPAL)
-  private val keytab = conf.get(org.apache.spark.internal.config.KEYTAB)
-  private val existingSecretName = conf.get(KUBERNETES_KERBEROS_DT_SECRET_NAME)
-  private val existingSecretItemKey = conf.get(KUBERNETES_KERBEROS_DT_SECRET_ITEM_KEY)
-  private val krb5File = conf.get(KUBERNETES_KERBEROS_KRB5_FILE)
-  private val krb5CMap = conf.get(KUBERNETES_KERBEROS_KRB5_CONFIG_MAP)
-  private val kubeTokenManager = kubernetesConf.tokenManager(conf,
-    SparkHadoopUtil.get.newConfiguration(conf))
+private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDriverConf)
+  extends KubernetesFeatureConfigStep {
+
+  private val hadoopConfDir = Option(kubernetesConf.sparkConf.getenv(ENV_HADOOP_CONF_DIR))
+  private val hadoopConfigMapName = kubernetesConf.get(KUBERNETES_HADOOP_CONF_CONFIG_MAP)
+  KubernetesUtils.requireNandDefined(
+    hadoopConfDir,
+    hadoopConfigMapName,
+    "Do not specify both the `HADOOP_CONF_DIR` in your ENV and the ConfigMap " +
+    "as the creation of an additional ConfigMap, when one is already specified is extraneous")
+
+  private val principal = kubernetesConf.get(org.apache.spark.internal.config.PRINCIPAL)
+  private val keytab = kubernetesConf.get(org.apache.spark.internal.config.KEYTAB)
+  private val existingSecretName = kubernetesConf.get(KUBERNETES_KERBEROS_DT_SECRET_NAME)
+  private val existingSecretItemKey = kubernetesConf.get(KUBERNETES_KERBEROS_DT_SECRET_ITEM_KEY)
+  private val krb5File = kubernetesConf.get(KUBERNETES_KERBEROS_KRB5_FILE)
+  private val krb5CMap = kubernetesConf.get(KUBERNETES_KERBEROS_KRB5_CONFIG_MAP)
+  private val hadoopConf = SparkHadoopUtil.get.newConfiguration(kubernetesConf.sparkConf)
+  private val tokenManager = new HadoopDelegationTokenManager(kubernetesConf.sparkConf, hadoopConf)
   private val isKerberosEnabled =
-    (hadoopConfDirSpec.hadoopConfDir.isDefined && kubeTokenManager.isSecurityEnabled) ||
-      (hadoopConfDirSpec.hadoopConfigMapName.isDefined &&
-        (krb5File.isDefined || krb5CMap.isDefined))
+    (hadoopConfDir.isDefined && UserGroupInformation.isSecurityEnabled) ||
+      (hadoopConfigMapName.isDefined && (krb5File.isDefined || krb5CMap.isDefined))
   require(keytab.isEmpty || isKerberosEnabled,
     "You must enable Kerberos support if you are specifying a Kerberos Keytab")
 
@@ -76,11 +79,11 @@ private[spark] class KerberosConfDriverFeatureStep(
     "If a secret storing a Kerberos Delegation Token is specified you must also" +
       " specify the item-key where the data is stored")
 
-  private val hadoopConfigurationFiles = hadoopConfDirSpec.hadoopConfDir.map { hConfDir =>
+  private val hadoopConfigurationFiles = hadoopConfDir.map { hConfDir =>
     HadoopBootstrapUtil.getHadoopConfFiles(hConfDir)
   }
   private val newHadoopConfigMapName =
-    if (hadoopConfDirSpec.hadoopConfigMapName.isEmpty) {
+    if (hadoopConfigMapName.isEmpty) {
       Some(kubernetesConf.hadoopConfigMapName)
     } else {
       None
@@ -95,23 +98,24 @@ private[spark] class KerberosConfDriverFeatureStep(
       dtSecret = None,
       dtSecretName = secretName,
       dtSecretItemKey = secretItemKey,
-      jobUserName = kubeTokenManager.getCurrentUser.getShortUserName)
+      jobUserName = UserGroupInformation.getCurrentUser.getShortUserName)
   }).orElse(
     if (isKerberosEnabled) {
-      Some(HadoopKerberosLogin.buildSpec(
-        conf,
-        kubernetesConf.appResourceNamePrefix,
-        kubeTokenManager))
+      Some(buildKerberosSpec())
     } else {
       None
     }
   )
 
   override def configurePod(pod: SparkPod): SparkPod = {
+    if (!isKerberosEnabled) {
+      return pod
+    }
+
     val hadoopBasedSparkPod = HadoopBootstrapUtil.bootstrapHadoopConfDir(
-      hadoopConfDirSpec.hadoopConfDir,
+      hadoopConfDir,
       newHadoopConfigMapName,
-      hadoopConfDirSpec.hadoopConfigMapName,
+      hadoopConfigMapName,
       pod)
     kerberosConfSpec.map { hSpec =>
       HadoopBootstrapUtil.bootstrapKerberosPod(
@@ -124,11 +128,15 @@ private[spark] class KerberosConfDriverFeatureStep(
         hadoopBasedSparkPod)
     }.getOrElse(
       HadoopBootstrapUtil.bootstrapSparkUserPod(
-        kubeTokenManager.getCurrentUser.getShortUserName,
+        UserGroupInformation.getCurrentUser.getShortUserName,
         hadoopBasedSparkPod))
   }
 
   override def getAdditionalPodSystemProperties(): Map[String, String] = {
+    if (!isKerberosEnabled) {
+      return Map.empty
+    }
+
     val resolvedConfValues = kerberosConfSpec.map { hSpec =>
       Map(KERBEROS_DT_SECRET_NAME -> hSpec.dtSecretName,
         KERBEROS_DT_SECRET_KEY -> hSpec.dtSecretItemKey,
@@ -136,13 +144,16 @@ private[spark] class KerberosConfDriverFeatureStep(
         KRB5_CONFIG_MAP_NAME -> krb5CMap.getOrElse(kubernetesConf.krbConfigMapName))
       }.getOrElse(
         Map(KERBEROS_SPARK_USER_NAME ->
-          kubeTokenManager.getCurrentUser.getShortUserName))
+          UserGroupInformation.getCurrentUser.getShortUserName))
     Map(HADOOP_CONFIG_MAP_NAME ->
-      hadoopConfDirSpec.hadoopConfigMapName.getOrElse(
-      kubernetesConf.hadoopConfigMapName)) ++ resolvedConfValues
+      hadoopConfigMapName.getOrElse(kubernetesConf.hadoopConfigMapName)) ++ resolvedConfValues
   }
 
   override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
+    if (!isKerberosEnabled) {
+      return Seq.empty
+    }
+
     val hadoopConfConfigMap = for {
       hName <- newHadoopConfigMapName
       hFiles <- hadoopConfigurationFiles
@@ -162,4 +173,34 @@ private[spark] class KerberosConfDriverFeatureStep(
       krb5ConfigMap.toSeq ++
       kerberosDTSecret.toSeq
   }
+
+  private def buildKerberosSpec(): KerberosConfigSpec = {
+    // The JobUserUGI will be taken fom the Local Ticket Cache or via keytab+principal
+    // The login happens in the SparkSubmit so login logic is not necessary to include
+    val jobUserUGI = UserGroupInformation.getCurrentUser
+    val creds = jobUserUGI.getCredentials
+    tokenManager.obtainDelegationTokens(creds)
+    val tokenData = SparkHadoopUtil.get.serialize(creds)
+    require(tokenData.nonEmpty, "Did not obtain any delegation tokens")
+    val newSecretName =
+      s"${kubernetesConf.resourceNamePrefix}-$KERBEROS_DELEGEGATION_TOKEN_SECRET_NAME"
+    val secretDT =
+      new SecretBuilder()
+        .withNewMetadata()
+          .withName(newSecretName)
+          .endMetadata()
+        .addToData(KERBEROS_SECRET_KEY, Base64.encodeBase64String(tokenData))
+        .build()
+    KerberosConfigSpec(
+      dtSecret = Some(secretDT),
+      dtSecretName = newSecretName,
+      dtSecretItemKey = KERBEROS_SECRET_KEY,
+      jobUserName = jobUserUGI.getShortUserName)
+  }
+
+  private case class KerberosConfigSpec(
+      dtSecret: Option[Secret],
+      dtSecretName: String,
+      dtSecretItemKey: String,
+      jobUserName: String)
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
index 06a88b6c229f..32bb6a5d2bcb 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
@@ -16,38 +16,29 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.HasMetadata
-
-import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, SparkPod}
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.KubernetesExecutorSpecificConf
 import org.apache.spark.deploy.k8s.features.hadooputils.HadoopBootstrapUtil
 import org.apache.spark.internal.Logging
 
 /**
  * This step is responsible for mounting the DT secret for the executors
  */
-private[spark] class KerberosConfExecutorFeatureStep(
-    kubernetesConf: KubernetesConf[KubernetesExecutorSpecificConf])
+private[spark] class KerberosConfExecutorFeatureStep(conf: KubernetesExecutorConf)
   extends KubernetesFeatureConfigStep with Logging {
 
-  private val sparkConf = kubernetesConf.sparkConf
-  private val maybeKrb5CMap = sparkConf.getOption(KRB5_CONFIG_MAP_NAME)
+  private val maybeKrb5CMap = conf.getOption(KRB5_CONFIG_MAP_NAME)
   require(maybeKrb5CMap.isDefined, "HADOOP_CONF_DIR ConfigMap not found")
 
   override def configurePod(pod: SparkPod): SparkPod = {
     logInfo(s"Mounting Resources for Kerberos")
     HadoopBootstrapUtil.bootstrapKerberosPod(
-      sparkConf.get(KERBEROS_DT_SECRET_NAME),
-      sparkConf.get(KERBEROS_DT_SECRET_KEY),
-      sparkConf.get(KERBEROS_SPARK_USER_NAME),
+      conf.get(KERBEROS_DT_SECRET_NAME),
+      conf.get(KERBEROS_DT_SECRET_KEY),
+      conf.get(KERBEROS_SPARK_USER_NAME),
       None,
       None,
       maybeKrb5CMap,
       pod)
   }
-
-  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
-
-  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty[HasMetadata]
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStep.scala
index be386e119d46..19ed2df5551d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStep.scala
@@ -16,16 +16,15 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import java.nio.file.Paths
 import java.util.UUID
 
 import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, VolumeBuilder, VolumeMountBuilder}
 
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, KubernetesRoleSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 
 private[spark] class LocalDirsFeatureStep(
-    conf: KubernetesConf[_ <: KubernetesRoleSpecificConf],
+    conf: KubernetesConf,
     defaultLocalDir: String = s"/var/data/spark-${UUID.randomUUID}")
   extends KubernetesFeatureConfigStep {
 
@@ -73,8 +72,4 @@ private[spark] class LocalDirsFeatureStep(
       .build()
     SparkPod(podWithLocalDirVolumes, containerWithLocalDirVolumeMounts)
   }
-
-  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
-
-  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
index 97fa9499b2ed..f4e1a3a32672 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
@@ -18,14 +18,13 @@ package org.apache.spark.deploy.k8s.features
 
 import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, VolumeBuilder, VolumeMountBuilder}
 
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesRoleSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 
-private[spark] class MountSecretsFeatureStep(
-    kubernetesConf: KubernetesConf[_ <: KubernetesRoleSpecificConf])
+private[spark] class MountSecretsFeatureStep(kubernetesConf: KubernetesConf)
   extends KubernetesFeatureConfigStep {
   override def configurePod(pod: SparkPod): SparkPod = {
     val addedVolumes = kubernetesConf
-      .roleSecretNamesToMountPaths
+      .secretNamesToMountPaths
       .keys
       .map(secretName =>
         new VolumeBuilder()
@@ -40,7 +39,7 @@ private[spark] class MountSecretsFeatureStep(
         .endSpec()
       .build()
     val addedVolumeMounts = kubernetesConf
-      .roleSecretNamesToMountPaths
+      .secretNamesToMountPaths
       .map {
         case (secretName, mountPath) =>
           new VolumeMountBuilder()
@@ -54,9 +53,5 @@ private[spark] class MountSecretsFeatureStep(
     SparkPod(podWithVolumes, containerWithMounts)
   }
 
-  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
-
-  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
-
   private def secretVolumeName(secretName: String): String = s"$secretName-volume"
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index 1473a7d3ee7f..8548e7057cdf 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -20,12 +20,11 @@ import io.fabric8.kubernetes.api.model._
 
 import org.apache.spark.deploy.k8s._
 
-private[spark] class MountVolumesFeatureStep(
-    kubernetesConf: KubernetesConf[_ <: KubernetesRoleSpecificConf])
+private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
   extends KubernetesFeatureConfigStep {
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    val (volumeMounts, volumes) = constructVolumes(kubernetesConf.roleVolumes).unzip
+    val (volumeMounts, volumes) = constructVolumes(conf.volumes).unzip
 
     val podWithVolumes = new PodBuilder(pod.pod)
       .editSpec()
@@ -40,12 +39,8 @@ private[spark] class MountVolumesFeatureStep(
     SparkPod(podWithVolumes, containerWithVolumeMounts)
   }
 
-  override def getAdditionalPodSystemProperties(): Map[String, String] = Map.empty
-
-  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = Seq.empty
-
   private def constructVolumes(
-    volumeSpecs: Iterable[KubernetesVolumeSpec[_ <: KubernetesVolumeSpecificConf]]
+    volumeSpecs: Iterable[KubernetesVolumeSpec]
   ): Iterable[(VolumeMount, Volume)] = {
     volumeSpecs.map { spec =>
       val volumeMount = new VolumeMountBuilder()
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
index 28e2d1726ae2..09dcf93a54f8 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
@@ -22,12 +22,11 @@ import java.nio.charset.StandardCharsets
 import com.google.common.io.Files
 import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, ContainerBuilder, HasMetadata, PodBuilder}
 
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesRoleSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 
-private[spark] class PodTemplateConfigMapStep(
-   conf: KubernetesConf[_ <: KubernetesRoleSpecificConf])
+private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
   extends KubernetesFeatureConfigStep {
   def configurePod(pod: SparkPod): SparkPod = {
     val podWithVolume = new PodBuilder(pod.pod)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/HadoopKerberosLogin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/HadoopKerberosLogin.scala
deleted file mode 100644
index 0022d8f242a7..000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/HadoopKerberosLogin.scala
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.features.hadooputils
-
-import io.fabric8.kubernetes.api.model.SecretBuilder
-import org.apache.commons.codec.binary.Base64
-
-import org.apache.spark.SparkConf
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.security.KubernetesHadoopDelegationTokenManager
-
-/**
- * This logic does all the heavy lifting for Delegation Token creation. This step
- * assumes that the job user has either specified a principal and keytab or ran
- * $kinit before running spark-submit. By running UGI.getCurrentUser we are able
- * to obtain the current user, either signed in via $kinit or keytab. With the
- * Job User principal you then retrieve the delegation token from the NameNode
- * and store values in DelegationToken. Lastly, the class puts the data into
- * a secret. All this is defined in a KerberosConfigSpec.
- */
-private[spark] object HadoopKerberosLogin {
-  def buildSpec(
-      submissionSparkConf: SparkConf,
-      kubernetesResourceNamePrefix: String,
-      tokenManager: KubernetesHadoopDelegationTokenManager): KerberosConfigSpec = {
-    // The JobUserUGI will be taken fom the Local Ticket Cache or via keytab+principal
-    // The login happens in the SparkSubmit so login logic is not necessary to include
-    val jobUserUGI = tokenManager.getCurrentUser
-    val originalCredentials = jobUserUGI.getCredentials
-    tokenManager.obtainDelegationTokens(originalCredentials)
-
-    val tokenData = SparkHadoopUtil.get.serialize(originalCredentials)
-
-    val initialTokenDataKeyName = KERBEROS_SECRET_KEY
-    val newSecretName = s"$kubernetesResourceNamePrefix-$KERBEROS_DELEGEGATION_TOKEN_SECRET_NAME"
-    val secretDT =
-      new SecretBuilder()
-        .withNewMetadata()
-          .withName(newSecretName)
-          .endMetadata()
-        .addToData(initialTokenDataKeyName, Base64.encodeBase64String(tokenData))
-        .build()
-    KerberosConfigSpec(
-      dtSecret = Some(secretDT),
-      dtSecretName = newSecretName,
-      dtSecretItemKey = initialTokenDataKeyName,
-      jobUserName = jobUserUGI.getShortUserName)
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/security/KubernetesHadoopDelegationTokenManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/security/KubernetesHadoopDelegationTokenManager.scala
deleted file mode 100644
index 3e98d5811d83..000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/security/KubernetesHadoopDelegationTokenManager.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.k8s.security
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.security.UserGroupInformation
-
-import org.apache.spark.SparkConf
-import org.apache.spark.deploy.security.HadoopDelegationTokenManager
-
-/**
- * Adds Kubernetes-specific functionality to HadoopDelegationTokenManager.
- */
-private[spark] class KubernetesHadoopDelegationTokenManager(
-    _sparkConf: SparkConf,
-    _hadoopConf: Configuration)
-  extends HadoopDelegationTokenManager(_sparkConf, _hadoopConf) {
-
-  def getCurrentUser: UserGroupInformation = UserGroupInformation.getCurrentUser
-  def isSecurityEnabled: Boolean = UserGroupInformation.isSecurityEnabled
-
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 543d6b16d6ae..70a93c968795 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -17,19 +17,17 @@
 package org.apache.spark.deploy.k8s.submit
 
 import java.io.StringWriter
-import java.util.{Collections, Locale, Properties, UUID}
 import java.util.{Collections, UUID}
 import java.util.Properties
 
 import io.fabric8.kubernetes.api.model._
 import io.fabric8.kubernetes.client.KubernetesClient
-import org.apache.hadoop.security.UserGroupInformation
 import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkApplication
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, KubernetesUtils, SparkKubernetesClientFactory}
+import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
@@ -47,8 +45,7 @@ private[spark] case class ClientArguments(
     mainAppResource: MainAppResource,
     mainClass: String,
     driverArgs: Array[String],
-    maybePyFiles: Option[String],
-    hadoopConfigDir: Option[String])
+    maybePyFiles: Option[String])
 
 private[spark] object ClientArguments {
 
@@ -82,8 +79,7 @@ private[spark] object ClientArguments {
       mainAppResource,
       mainClass.get,
       driverArgs.toArray,
-      maybePyFiles,
-      sys.env.get(ENV_HADOOP_CONF_DIR))
+      maybePyFiles)
   }
 }
 
@@ -92,27 +88,24 @@ private[spark] object ClientArguments {
  * watcher that monitors and logs the application status. Waits for the application to terminate if
  * spark.kubernetes.submission.waitAppCompletion is true.
  *
+ * @param conf The kubernetes driver config.
  * @param builder Responsible for building the base driver pod based on a composition of
  *                implemented features.
- * @param kubernetesConf application configuration
  * @param kubernetesClient the client to talk to the Kubernetes API server
  * @param waitForAppCompletion a flag indicating whether the client should wait for the application
  *                             to complete
- * @param appName the application name
  * @param watcher a watcher that monitors and logs the application status
  */
 private[spark] class Client(
+    conf: KubernetesDriverConf,
     builder: KubernetesDriverBuilder,
-    kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf],
     kubernetesClient: KubernetesClient,
     waitForAppCompletion: Boolean,
-    appName: String,
-    watcher: LoggingPodStatusWatcher,
-    kubernetesResourceNamePrefix: String) extends Logging {
+    watcher: LoggingPodStatusWatcher) extends Logging {
 
   def run(): Unit = {
-    val resolvedDriverSpec = builder.buildFromFeatures(kubernetesConf)
-    val configMapName = s"$kubernetesResourceNamePrefix-driver-conf-map"
+    val resolvedDriverSpec = builder.buildFromFeatures(conf)
+    val configMapName = s"${conf.resourceNamePrefix}-driver-conf-map"
     val configMap = buildConfigMap(configMapName, resolvedDriverSpec.systemProperties)
     // The include of the ENV_VAR for "SPARK_CONF_DIR" is to allow for the
     // Spark command builder to pickup on the Java Options present in the ConfigMap
@@ -155,11 +148,11 @@ private[spark] class Client(
       }
 
       if (waitForAppCompletion) {
-        logInfo(s"Waiting for application $appName to finish...")
+        logInfo(s"Waiting for application ${conf.appName} to finish...")
         watcher.awaitCompletion()
-        logInfo(s"Application $appName finished.")
+        logInfo(s"Application ${conf.appName} finished.")
       } else {
-        logInfo(s"Deployed Spark application $appName into Kubernetes.")
+        logInfo(s"Deployed Spark application ${conf.appName} into Kubernetes.")
       }
     }
   }
@@ -216,19 +209,13 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
     // a unique app ID (captured by spark.app.id) in the format below.
     val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}"
     val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION)
-    val kubernetesResourceNamePrefix = KubernetesClientApplication.getResourceNamePrefix(appName)
-    sparkConf.set(KUBERNETES_PYSPARK_PY_FILES, clientArguments.maybePyFiles.getOrElse(""))
     val kubernetesConf = KubernetesConf.createDriverConf(
       sparkConf,
-      appName,
-      kubernetesResourceNamePrefix,
       kubernetesAppId,
       clientArguments.mainAppResource,
       clientArguments.mainClass,
       clientArguments.driverArgs,
-      clientArguments.maybePyFiles,
-      clientArguments.hadoopConfigDir)
-    val namespace = kubernetesConf.namespace()
+      clientArguments.maybePyFiles)
     // The master URL has been checked for validity already in SparkSubmit.
     // We just need to get rid of the "k8s://" prefix here.
     val master = KubernetesUtils.parseMasterUrl(sparkConf.get("spark.master"))
@@ -238,36 +225,18 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
 
     Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient(
       master,
-      Some(namespace),
+      Some(kubernetesConf.namespace),
       KUBERNETES_AUTH_SUBMISSION_CONF_PREFIX,
       sparkConf,
       None,
       None)) { kubernetesClient =>
         val client = new Client(
-          KubernetesDriverBuilder(kubernetesClient, kubernetesConf.sparkConf),
           kubernetesConf,
+          KubernetesDriverBuilder(kubernetesClient, kubernetesConf.sparkConf),
           kubernetesClient,
           waitForAppCompletion,
-          appName,
-          watcher,
-          kubernetesResourceNamePrefix)
+          watcher)
         client.run()
     }
   }
 }
-
-private[spark] object KubernetesClientApplication {
-
-  def getAppName(conf: SparkConf): String = conf.getOption("spark.app.name").getOrElse("spark")
-
-  def getResourceNamePrefix(appName: String): String = {
-    val launchTime = System.currentTimeMillis()
-    s"$appName-$launchTime"
-      .trim
-      .toLowerCase(Locale.ROOT)
-      .replaceAll("\\s+", "-")
-      .replaceAll("\\.", "-")
-      .replaceAll("[^a-z0-9\\-]", "")
-      .replaceAll("-+", "-")
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
index 167fb402cd40..a5ad9729aee9 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
@@ -21,57 +21,46 @@ import java.io.File
 import io.fabric8.kubernetes.client.KubernetesClient
 
 import org.apache.spark.SparkConf
-import org.apache.spark.deploy.k8s.{Config, KubernetesConf, KubernetesDriverSpec, KubernetesDriverSpecificConf, KubernetesRoleSpecificConf, KubernetesUtils, SparkPod}
+import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.features._
 
 private[spark] class KubernetesDriverBuilder(
-    provideBasicStep: (KubernetesConf[KubernetesDriverSpecificConf]) => BasicDriverFeatureStep =
+    provideBasicStep: (KubernetesDriverConf => BasicDriverFeatureStep) =
       new BasicDriverFeatureStep(_),
-    provideCredentialsStep: (KubernetesConf[KubernetesDriverSpecificConf])
-      => DriverKubernetesCredentialsFeatureStep =
+    provideCredentialsStep: (KubernetesDriverConf => DriverKubernetesCredentialsFeatureStep) =
       new DriverKubernetesCredentialsFeatureStep(_),
-    provideServiceStep: (KubernetesConf[KubernetesDriverSpecificConf]) => DriverServiceFeatureStep =
+    provideServiceStep: (KubernetesDriverConf => DriverServiceFeatureStep) =
       new DriverServiceFeatureStep(_),
-    provideSecretsStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf]
-      => MountSecretsFeatureStep) =
+    provideSecretsStep: (KubernetesConf => MountSecretsFeatureStep) =
       new MountSecretsFeatureStep(_),
-    provideEnvSecretsStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf]
-      => EnvSecretsFeatureStep) =
+    provideEnvSecretsStep: (KubernetesConf => EnvSecretsFeatureStep) =
       new EnvSecretsFeatureStep(_),
-    provideLocalDirsStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf])
-      => LocalDirsFeatureStep =
+    provideLocalDirsStep: (KubernetesConf => LocalDirsFeatureStep) =
       new LocalDirsFeatureStep(_),
-    provideVolumesStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf]
-      => MountVolumesFeatureStep) =
+    provideVolumesStep: (KubernetesConf => MountVolumesFeatureStep) =
       new MountVolumesFeatureStep(_),
-    provideDriverCommandStep: (
-      KubernetesConf[KubernetesDriverSpecificConf]
-      => DriverCommandFeatureStep) =
+    provideDriverCommandStep: (KubernetesDriverConf => DriverCommandFeatureStep) =
       new DriverCommandFeatureStep(_),
-    provideHadoopGlobalStep: (
-      KubernetesConf[KubernetesDriverSpecificConf]
-        => KerberosConfDriverFeatureStep) =
-    new KerberosConfDriverFeatureStep(_),
-    providePodTemplateConfigMapStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf]
-      => PodTemplateConfigMapStep) =
-    new PodTemplateConfigMapStep(_),
-    provideInitialPod: () => SparkPod = () => SparkPod.initialPod()) {
+    provideHadoopGlobalStep: (KubernetesDriverConf => KerberosConfDriverFeatureStep) =
+      new KerberosConfDriverFeatureStep(_),
+    providePodTemplateConfigMapStep: (KubernetesConf => PodTemplateConfigMapStep) =
+      new PodTemplateConfigMapStep(_),
+    provideInitialPod: () => SparkPod = () => SparkPod.initialPod) {
 
-  def buildFromFeatures(
-    kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf]): KubernetesDriverSpec = {
+  def buildFromFeatures(kubernetesConf: KubernetesDriverConf): KubernetesDriverSpec = {
     val baseFeatures = Seq(
       provideBasicStep(kubernetesConf),
       provideCredentialsStep(kubernetesConf),
       provideServiceStep(kubernetesConf),
       provideLocalDirsStep(kubernetesConf))
 
-    val secretFeature = if (kubernetesConf.roleSecretNamesToMountPaths.nonEmpty) {
+    val secretFeature = if (kubernetesConf.secretNamesToMountPaths.nonEmpty) {
       Seq(provideSecretsStep(kubernetesConf))
     } else Nil
-    val envSecretFeature = if (kubernetesConf.roleSecretEnvNamesToKeyRefs.nonEmpty) {
+    val envSecretFeature = if (kubernetesConf.secretEnvNamesToKeyRefs.nonEmpty) {
       Seq(provideEnvSecretsStep(kubernetesConf))
     } else Nil
-    val volumesFeature = if (kubernetesConf.roleVolumes.nonEmpty) {
+    val volumesFeature = if (kubernetesConf.volumes.nonEmpty) {
       Seq(provideVolumesStep(kubernetesConf))
     } else Nil
     val podTemplateFeature = if (
@@ -81,14 +70,12 @@ private[spark] class KubernetesDriverBuilder(
 
     val driverCommandStep = provideDriverCommandStep(kubernetesConf)
 
-    val maybeHadoopConfigStep =
-      kubernetesConf.hadoopConfSpec.map { _ =>
-        provideHadoopGlobalStep(kubernetesConf)}
+    val hadoopConfigStep = Some(provideHadoopGlobalStep(kubernetesConf))
 
     val allFeatures: Seq[KubernetesFeatureConfigStep] =
       baseFeatures ++ Seq(driverCommandStep) ++
         secretFeature ++ envSecretFeature ++ volumesFeature ++
-        maybeHadoopConfigStep.toSeq ++ podTemplateFeature
+        hadoopConfigStep ++ podTemplateFeature
 
     var spec = KubernetesDriverSpec(
       provideInitialPod(),
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index fc41a4770bce..d24ff0d1e660 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -26,50 +26,38 @@ import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features._
 
 private[spark] class KubernetesExecutorBuilder(
-    provideBasicStep: (KubernetesConf [KubernetesExecutorSpecificConf])
-      => BasicExecutorFeatureStep =
+    provideBasicStep: (KubernetesExecutorConf => BasicExecutorFeatureStep) =
       new BasicExecutorFeatureStep(_),
-    provideSecretsStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf])
-      => MountSecretsFeatureStep =
+    provideSecretsStep: (KubernetesConf => MountSecretsFeatureStep) =
       new MountSecretsFeatureStep(_),
-    provideEnvSecretsStep:
-      (KubernetesConf[_ <: KubernetesRoleSpecificConf] => EnvSecretsFeatureStep) =
+    provideEnvSecretsStep: (KubernetesConf => EnvSecretsFeatureStep) =
       new EnvSecretsFeatureStep(_),
-    provideLocalDirsStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf])
-      => LocalDirsFeatureStep =
+    provideLocalDirsStep: (KubernetesConf => LocalDirsFeatureStep) =
       new LocalDirsFeatureStep(_),
-    provideVolumesStep: (KubernetesConf[_ <: KubernetesRoleSpecificConf]
-      => MountVolumesFeatureStep) =
+    provideVolumesStep: (KubernetesConf => MountVolumesFeatureStep) =
       new MountVolumesFeatureStep(_),
-    provideHadoopConfStep: (
-      KubernetesConf[KubernetesExecutorSpecificConf]
-      => HadoopConfExecutorFeatureStep) =
+    provideHadoopConfStep: (KubernetesExecutorConf => HadoopConfExecutorFeatureStep) =
       new HadoopConfExecutorFeatureStep(_),
-    provideKerberosConfStep: (
-      KubernetesConf[KubernetesExecutorSpecificConf]
-      => KerberosConfExecutorFeatureStep) =
+    provideKerberosConfStep: (KubernetesExecutorConf => KerberosConfExecutorFeatureStep) =
       new KerberosConfExecutorFeatureStep(_),
-    provideHadoopSparkUserStep: (
-      KubernetesConf[KubernetesExecutorSpecificConf]
-      => HadoopSparkUserExecutorFeatureStep) =
+    provideHadoopSparkUserStep: (KubernetesExecutorConf => HadoopSparkUserExecutorFeatureStep) =
       new HadoopSparkUserExecutorFeatureStep(_),
     provideInitialPod: () => SparkPod = () => SparkPod.initialPod()) {
 
-  def buildFromFeatures(
-    kubernetesConf: KubernetesConf[KubernetesExecutorSpecificConf]): SparkPod = {
+  def buildFromFeatures(kubernetesConf: KubernetesExecutorConf): SparkPod = {
     val sparkConf = kubernetesConf.sparkConf
     val maybeHadoopConfigMap = sparkConf.getOption(HADOOP_CONFIG_MAP_NAME)
     val maybeDTSecretName = sparkConf.getOption(KERBEROS_DT_SECRET_NAME)
     val maybeDTDataItem = sparkConf.getOption(KERBEROS_DT_SECRET_KEY)
 
     val baseFeatures = Seq(provideBasicStep(kubernetesConf), provideLocalDirsStep(kubernetesConf))
-    val secretFeature = if (kubernetesConf.roleSecretNamesToMountPaths.nonEmpty) {
+    val secretFeature = if (kubernetesConf.secretNamesToMountPaths.nonEmpty) {
       Seq(provideSecretsStep(kubernetesConf))
     } else Nil
-    val secretEnvFeature = if (kubernetesConf.roleSecretEnvNamesToKeyRefs.nonEmpty) {
+    val secretEnvFeature = if (kubernetesConf.secretEnvNamesToKeyRefs.nonEmpty) {
       Seq(provideEnvSecretsStep(kubernetesConf))
     } else Nil
-    val volumesFeature = if (kubernetesConf.roleVolumes.nonEmpty) {
+    val volumesFeature = if (kubernetesConf.volumes.nonEmpty) {
       Seq(provideVolumesStep(kubernetesConf))
     } else Nil
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index 41ca8d186c17..f4d40b0b3590 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -26,10 +26,6 @@ import org.apache.spark.deploy.k8s.submit._
 
 class KubernetesConfSuite extends SparkFunSuite {
 
-  private val APP_NAME = "test-app"
-  private val RESOURCE_NAME_PREFIX = "prefix"
-  private val APP_ID = "test-id"
-  private val MAIN_CLASS = "test-class"
   private val APP_ARGS = Array("arg1", "arg2")
   private val CUSTOM_LABELS = Map(
     "customLabel1Key" -> "customLabel1Value",
@@ -49,26 +45,6 @@ class KubernetesConfSuite extends SparkFunSuite {
   private val DRIVER_POD = new PodBuilder().build()
   private val EXECUTOR_ID = "executor-id"
 
-  test("Basic driver translated fields.") {
-    val sparkConf = new SparkConf(false)
-    val conf = KubernetesConf.createDriverConf(
-      sparkConf,
-      APP_NAME,
-      RESOURCE_NAME_PREFIX,
-      APP_ID,
-      mainAppResource = JavaMainAppResource(None),
-      MAIN_CLASS,
-      APP_ARGS,
-      maybePyFiles = None,
-      hadoopConfDir = None)
-    assert(conf.appId === APP_ID)
-    assert(conf.sparkConf.getAll.toMap === sparkConf.getAll.toMap)
-    assert(conf.appResourceNamePrefix === RESOURCE_NAME_PREFIX)
-    assert(conf.roleSpecificConf.appName === APP_NAME)
-    assert(conf.roleSpecificConf.mainClass === MAIN_CLASS)
-    assert(conf.roleSpecificConf.appArgs === APP_ARGS)
-  }
-
   test("Resolve driver labels, annotations, secret mount paths, envs, and memory overhead") {
     val sparkConf = new SparkConf(false)
       .set(MEMORY_OVERHEAD_FACTOR, 0.3)
@@ -90,22 +66,19 @@ class KubernetesConfSuite extends SparkFunSuite {
 
     val conf = KubernetesConf.createDriverConf(
       sparkConf,
-      APP_NAME,
-      RESOURCE_NAME_PREFIX,
-      APP_ID,
-      mainAppResource = JavaMainAppResource(None),
-      MAIN_CLASS,
+      KubernetesTestConf.APP_ID,
+      JavaMainAppResource(None),
+      KubernetesTestConf.MAIN_CLASS,
       APP_ARGS,
-      maybePyFiles = None,
-      hadoopConfDir = None)
-    assert(conf.roleLabels === Map(
-      SPARK_APP_ID_LABEL -> APP_ID,
+      None)
+    assert(conf.labels === Map(
+      SPARK_APP_ID_LABEL -> KubernetesTestConf.APP_ID,
       SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) ++
       CUSTOM_LABELS)
-    assert(conf.roleAnnotations === CUSTOM_ANNOTATIONS)
-    assert(conf.roleSecretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
-    assert(conf.roleSecretEnvNamesToKeyRefs === SECRET_ENV_VARS)
-    assert(conf.roleEnvs === CUSTOM_ENVS)
+    assert(conf.annotations === CUSTOM_ANNOTATIONS)
+    assert(conf.secretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
+    assert(conf.secretEnvNamesToKeyRefs === SECRET_ENV_VARS)
+    assert(conf.environment === CUSTOM_ENVS)
     assert(conf.sparkConf.get(MEMORY_OVERHEAD_FACTOR) === 0.3)
   }
 
@@ -113,20 +86,20 @@ class KubernetesConfSuite extends SparkFunSuite {
     val conf = KubernetesConf.createExecutorConf(
       new SparkConf(false),
       EXECUTOR_ID,
-      APP_ID,
+      KubernetesTestConf.APP_ID,
       Some(DRIVER_POD))
-    assert(conf.roleSpecificConf.executorId === EXECUTOR_ID)
-    assert(conf.roleSpecificConf.driverPod.get === DRIVER_POD)
+    assert(conf.executorId === EXECUTOR_ID)
+    assert(conf.driverPod.get === DRIVER_POD)
   }
 
   test("Image pull secrets.") {
     val conf = KubernetesConf.createExecutorConf(
       new SparkConf(false)
-        .set(IMAGE_PULL_SECRETS, "my-secret-1,my-secret-2 "),
+        .set(IMAGE_PULL_SECRETS, Seq("my-secret-1", "my-secret-2 ")),
       EXECUTOR_ID,
-      APP_ID,
+      KubernetesTestConf.APP_ID,
       Some(DRIVER_POD))
-    assert(conf.imagePullSecrets() ===
+    assert(conf.imagePullSecrets ===
       Seq(
         new LocalObjectReferenceBuilder().withName("my-secret-1").build(),
         new LocalObjectReferenceBuilder().withName("my-secret-2").build()))
@@ -150,14 +123,14 @@ class KubernetesConfSuite extends SparkFunSuite {
     val conf = KubernetesConf.createExecutorConf(
       sparkConf,
       EXECUTOR_ID,
-      APP_ID,
+      KubernetesTestConf.APP_ID,
       Some(DRIVER_POD))
-    assert(conf.roleLabels === Map(
+    assert(conf.labels === Map(
       SPARK_EXECUTOR_ID_LABEL -> EXECUTOR_ID,
-      SPARK_APP_ID_LABEL -> APP_ID,
+      SPARK_APP_ID_LABEL -> KubernetesTestConf.APP_ID,
       SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++ CUSTOM_LABELS)
-    assert(conf.roleAnnotations === CUSTOM_ANNOTATIONS)
-    assert(conf.roleSecretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
-    assert(conf.roleSecretEnvNamesToKeyRefs === SECRET_ENV_VARS)
+    assert(conf.annotations === CUSTOM_ANNOTATIONS)
+    assert(conf.secretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
+    assert(conf.secretEnvNamesToKeyRefs === SECRET_ENV_VARS)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
new file mode 100644
index 000000000000..1d77a6d18152
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.k8s
+
+import io.fabric8.kubernetes.api.model.Pod
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit.{JavaMainAppResource, MainAppResource}
+
+/**
+ * Builder methods for KubernetesConf that allow easy control over what to return for a few
+ * properties. For use with tests instead of having to mock specific properties.
+ */
+object KubernetesTestConf {
+
+  val APP_ID = "appId"
+  val MAIN_CLASS = "mainClass"
+  val RESOURCE_PREFIX = "prefix"
+  val EXECUTOR_ID = "1"
+
+  private val DEFAULT_CONF = new SparkConf(false)
+
+  // scalastyle:off argcount
+  def createDriverConf(
+      sparkConf: SparkConf = DEFAULT_CONF,
+      appId: String = APP_ID,
+      mainAppResource: MainAppResource = JavaMainAppResource(None),
+      mainClass: String = MAIN_CLASS,
+      appArgs: Array[String] = Array.empty,
+      pyFiles: Seq[String] = Nil,
+      resourceNamePrefix: Option[String] = None,
+      labels: Map[String, String] = Map.empty,
+      environment: Map[String, String] = Map.empty,
+      annotations: Map[String, String] = Map.empty,
+      secretEnvNamesToKeyRefs: Map[String, String] = Map.empty,
+      secretNamesToMountPaths: Map[String, String] = Map.empty,
+      volumes: Seq[KubernetesVolumeSpec] = Seq.empty): KubernetesDriverConf = {
+    val conf = sparkConf.clone()
+
+    resourceNamePrefix.foreach { prefix =>
+      conf.set(KUBERNETES_DRIVER_POD_NAME_PREFIX, prefix)
+    }
+    setPrefixedConfigs(conf, KUBERNETES_DRIVER_LABEL_PREFIX, labels)
+    setPrefixedConfigs(conf, KUBERNETES_DRIVER_ENV_PREFIX, environment)
+    setPrefixedConfigs(conf, KUBERNETES_DRIVER_ANNOTATION_PREFIX, annotations)
+    setPrefixedConfigs(conf, KUBERNETES_DRIVER_SECRETS_PREFIX, secretNamesToMountPaths)
+    setPrefixedConfigs(conf, KUBERNETES_DRIVER_SECRET_KEY_REF_PREFIX, secretEnvNamesToKeyRefs)
+    setVolumeSpecs(conf, KUBERNETES_DRIVER_VOLUMES_PREFIX, volumes)
+
+    new KubernetesDriverConf(conf, appId, mainAppResource, mainClass, appArgs, pyFiles)
+  }
+  // scalastyle:on argcount
+
+  def createExecutorConf(
+      sparkConf: SparkConf = DEFAULT_CONF,
+      driverPod: Option[Pod] = None,
+      labels: Map[String, String] = Map.empty,
+      environment: Map[String, String] = Map.empty,
+      annotations: Map[String, String] = Map.empty,
+      secretEnvNamesToKeyRefs: Map[String, String] = Map.empty,
+      secretNamesToMountPaths: Map[String, String] = Map.empty,
+      volumes: Seq[KubernetesVolumeSpec] = Seq.empty): KubernetesExecutorConf = {
+    val conf = sparkConf.clone()
+
+    setPrefixedConfigs(conf, KUBERNETES_EXECUTOR_LABEL_PREFIX, labels)
+    setPrefixedConfigs(conf, "spark.executorEnv.", environment)
+    setPrefixedConfigs(conf, KUBERNETES_EXECUTOR_ANNOTATION_PREFIX, annotations)
+    setPrefixedConfigs(conf, KUBERNETES_EXECUTOR_SECRETS_PREFIX, secretNamesToMountPaths)
+    setPrefixedConfigs(conf, KUBERNETES_EXECUTOR_SECRET_KEY_REF_PREFIX, secretEnvNamesToKeyRefs)
+    setVolumeSpecs(conf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX, volumes)
+
+    new KubernetesExecutorConf(conf, APP_ID, EXECUTOR_ID, driverPod)
+  }
+
+  private def setPrefixedConfigs(
+      conf: SparkConf,
+      prefix: String,
+      values: Map[String, String]): Unit = {
+    values.foreach { case (k, v) =>
+      conf.set(s"${prefix}$k", v)
+    }
+  }
+
+  private def setVolumeSpecs(
+      conf: SparkConf,
+      prefix: String,
+      volumes: Seq[KubernetesVolumeSpec]): Unit = {
+    def key(vtype: String, vname: String, subkey: String): String = {
+      s"${prefix}$vtype.$vname.$subkey"
+    }
+
+    volumes.foreach { case spec =>
+      val (vtype, configs) = spec.volumeConf match {
+        case KubernetesHostPathVolumeConf(path) =>
+          (KUBERNETES_VOLUMES_HOSTPATH_TYPE,
+            Map(KUBERNETES_VOLUMES_OPTIONS_PATH_KEY -> path))
+
+        case KubernetesPVCVolumeConf(claimName) =>
+          (KUBERNETES_VOLUMES_PVC_TYPE,
+            Map(KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY -> claimName))
+
+        case KubernetesEmptyDirVolumeConf(medium, sizeLimit) =>
+          val mconf = medium.map { m => (KUBERNETES_VOLUMES_OPTIONS_MEDIUM_KEY, m) }.toMap
+          val lconf = sizeLimit.map { l => (KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY, l) }.toMap
+          (KUBERNETES_VOLUMES_EMPTYDIR_TYPE, mconf ++ lconf)
+      }
+
+      conf.set(key(vtype, spec.volumeName, KUBERNETES_VOLUMES_MOUNT_PATH_KEY), spec.mountPath)
+      if (spec.mountSubPath.nonEmpty) {
+        conf.set(key(vtype, spec.volumeName, KUBERNETES_VOLUMES_MOUNT_SUBPATH_KEY),
+          spec.mountSubPath)
+      }
+      conf.set(key(vtype, spec.volumeName, KUBERNETES_VOLUMES_MOUNT_READONLY_KEY),
+        spec.mountReadOnly.toString)
+      configs.foreach { case (k, v) =>
+        conf.set(key(vtype, spec.volumeName, k), v)
+      }
+    }
+  }
+
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
index de79a58a3a75..c0790898e097 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
@@ -25,7 +25,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     sparkConf.set("test.hostPath.volumeName.mount.readOnly", "true")
     sparkConf.set("test.hostPath.volumeName.options.path", "/hostPath")
 
-    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head.get
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountReadOnly === true)
@@ -39,7 +39,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     sparkConf.set("test.emptyDir.volumeName.mount.readOnly", "true")
     sparkConf.set("test.emptyDir.volumeName.mount.subPath", "subPath")
 
-    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head.get
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountSubPath === "subPath")
@@ -51,7 +51,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true")
     sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimeName")
 
-    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head.get
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountReadOnly === true)
@@ -66,7 +66,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     sparkConf.set("test.emptyDir.volumeName.options.medium", "medium")
     sparkConf.set("test.emptyDir.volumeName.options.sizeLimit", "5G")
 
-    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head.get
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountReadOnly === true)
@@ -79,7 +79,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     sparkConf.set("test.emptyDir.volumeName.mount.path", "/path")
     sparkConf.set("test.emptyDir.volumeName.mount.readOnly", "true")
 
-    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head.get
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountReadOnly === true)
@@ -92,27 +92,29 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     sparkConf.set("test.hostPath.volumeName.mount.path", "/path")
     sparkConf.set("test.hostPath.volumeName.options.path", "/hostPath")
 
-    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head.get
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.mountReadOnly === false)
   }
 
-  test("Gracefully fails on missing mount key") {
+  test("Fails on missing mount key") {
     val sparkConf = new SparkConf(false)
     sparkConf.set("test.emptyDir.volumeName.mnt.path", "/path")
 
-    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
-    assert(volumeSpec.isFailure === true)
-    assert(volumeSpec.failed.get.getMessage === "emptyDir.volumeName.mount.path")
+    val e = intercept[NoSuchElementException] {
+      KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.")
+    }
+    assert(e.getMessage.contains("emptyDir.volumeName.mount.path"))
   }
 
-  test("Gracefully fails on missing option key") {
+  test("Fails on missing option key") {
     val sparkConf = new SparkConf(false)
     sparkConf.set("test.hostPath.volumeName.mount.path", "/path")
     sparkConf.set("test.hostPath.volumeName.mount.readOnly", "true")
     sparkConf.set("test.hostPath.volumeName.options.pth", "/hostPath")
 
-    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
-    assert(volumeSpec.isFailure === true)
-    assert(volumeSpec.failed.get.getMessage === "hostPath.volumeName.options.path")
+    val e = intercept[NoSuchElementException] {
+      KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.")
+    }
+    assert(e.getMessage.contains("hostPath.volumeName.options.path"))
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 1e7dfbeffdb2..e4951bc1e69e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, LocalObjectReferenceBuilder}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
@@ -30,32 +30,17 @@ import org.apache.spark.ui.SparkUI
 
 class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
-  private val APP_ID = "spark-app-id"
-  private val RESOURCE_NAME_PREFIX = "spark"
   private val DRIVER_LABELS = Map("labelkey" -> "labelvalue")
   private val CONTAINER_IMAGE_PULL_POLICY = "IfNotPresent"
-  private val APP_NAME = "spark-test"
-  private val MAIN_CLASS = "org.apache.spark.examples.SparkPi"
-  private val PY_MAIN_CLASS = "org.apache.spark.deploy.PythonRunner"
-  private val APP_ARGS = Array("arg1", "arg2", "\"arg 3\"")
-  private val CUSTOM_ANNOTATION_KEY = "customAnnotation"
-  private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue"
-  private val DRIVER_ANNOTATIONS = Map(CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE)
-  private val DRIVER_CUSTOM_ENV1 = "customDriverEnv1"
-  private val DRIVER_CUSTOM_ENV2 = "customDriverEnv2"
+  private val DRIVER_ANNOTATIONS = Map("customAnnotation" -> "customAnnotationValue")
   private val DRIVER_ENVS = Map(
-    DRIVER_CUSTOM_ENV1 -> DRIVER_CUSTOM_ENV1,
-    DRIVER_CUSTOM_ENV2 -> DRIVER_CUSTOM_ENV2)
+    "customDriverEnv1" -> "customDriverEnv2",
+    "customDriverEnv2" -> "customDriverEnv2")
   private val TEST_IMAGE_PULL_SECRETS = Seq("my-secret-1", "my-secret-2")
   private val TEST_IMAGE_PULL_SECRET_OBJECTS =
     TEST_IMAGE_PULL_SECRETS.map { secret =>
       new LocalObjectReferenceBuilder().withName(secret).build()
     }
-  private val emptyDriverSpecificConf = KubernetesDriverSpecificConf(
-    JavaMainAppResource(None),
-    APP_NAME,
-    MAIN_CLASS,
-    APP_ARGS)
 
   test("Check the pod respects all configurations from the user.") {
     val sparkConf = new SparkConf()
@@ -65,19 +50,12 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       .set(DRIVER_MEMORY.key, "256M")
       .set(DRIVER_MEMORY_OVERHEAD, 200L)
       .set(CONTAINER_IMAGE, "spark-driver:latest")
-      .set(IMAGE_PULL_SECRETS, TEST_IMAGE_PULL_SECRETS.mkString(","))
-    val kubernetesConf = KubernetesConf(
-      sparkConf,
-      emptyDriverSpecificConf,
-      RESOURCE_NAME_PREFIX,
-      APP_ID,
-      DRIVER_LABELS,
-      DRIVER_ANNOTATIONS,
-      Map.empty,
-      Map.empty,
-      DRIVER_ENVS,
-      Nil,
-      hadoopConfSpec = None)
+      .set(IMAGE_PULL_SECRETS, TEST_IMAGE_PULL_SECRETS)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(
+      sparkConf = sparkConf,
+      labels = DRIVER_LABELS,
+      environment = DRIVER_ENVS,
+      annotations = DRIVER_ANNOTATIONS)
 
     val featureStep = new BasicDriverFeatureStep(kubernetesConf)
     val basePod = SparkPod.initialPod()
@@ -99,10 +77,11 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     val envs = configuredPod.container
       .getEnv
       .asScala
-      .map(env => (env.getName, env.getValue))
+      .map { env => (env.getName, env.getValue) }
       .toMap
-    assert(envs(DRIVER_CUSTOM_ENV1) === DRIVER_ENVS(DRIVER_CUSTOM_ENV1))
-    assert(envs(DRIVER_CUSTOM_ENV2) === DRIVER_ENVS(DRIVER_CUSTOM_ENV2))
+    DRIVER_ENVS.foreach { case (k, v) =>
+      assert(envs(v) === v)
+    }
 
     assert(configuredPod.pod.getSpec().getImagePullSecrets.asScala ===
       TEST_IMAGE_PULL_SECRET_OBJECTS)
@@ -122,13 +101,15 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
     val driverPodMetadata = configuredPod.pod.getMetadata
     assert(driverPodMetadata.getName === "spark-driver-pod")
-    assert(driverPodMetadata.getLabels.asScala === DRIVER_LABELS)
+    DRIVER_LABELS.foreach { case (k, v) =>
+      assert(driverPodMetadata.getLabels.get(k) === v)
+    }
     assert(driverPodMetadata.getAnnotations.asScala === DRIVER_ANNOTATIONS)
     assert(configuredPod.pod.getSpec.getRestartPolicy === "Never")
     val expectedSparkConf = Map(
       KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
-      "spark.app.id" -> APP_ID,
-      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> RESOURCE_NAME_PREFIX,
+      "spark.app.id" -> KubernetesTestConf.APP_ID,
+      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> kubernetesConf.resourceNamePrefix,
       "spark.kubernetes.submitInDriver" -> "true",
       MEMORY_OVERHEAD_FACTOR.key -> MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString)
     assert(featureStep.getAdditionalPodSystemProperties() === expectedSparkConf)
@@ -141,39 +122,10 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     val pythonSparkConf = new SparkConf()
       .set(DRIVER_MEMORY.key, "4g")
       .set(CONTAINER_IMAGE, "spark-driver-py:latest")
-    val javaKubernetesConf = KubernetesConf(
-      javaSparkConf,
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(None),
-        APP_NAME,
-        PY_MAIN_CLASS,
-        APP_ARGS),
-      RESOURCE_NAME_PREFIX,
-      APP_ID,
-      DRIVER_LABELS,
-      DRIVER_ANNOTATIONS,
-      Map.empty,
-      Map.empty,
-      DRIVER_ENVS,
-      Nil,
-      hadoopConfSpec = None)
-
-    val pythonKubernetesConf = KubernetesConf(
-      pythonSparkConf,
-      KubernetesDriverSpecificConf(
-        PythonMainAppResource(""),
-        APP_NAME,
-        PY_MAIN_CLASS,
-        APP_ARGS),
-      RESOURCE_NAME_PREFIX,
-      APP_ID,
-      DRIVER_LABELS,
-      DRIVER_ANNOTATIONS,
-      Map.empty,
-      Map.empty,
-      DRIVER_ENVS,
-      Nil,
-      hadoopConfSpec = None)
+    val javaKubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = javaSparkConf)
+    val pythonKubernetesConf = KubernetesTestConf.createDriverConf(
+      sparkConf = pythonSparkConf,
+      mainAppResource = PythonMainAppResource(""))
     val javaFeatureStep = new BasicDriverFeatureStep(javaKubernetesConf)
     val pythonFeatureStep = new BasicDriverFeatureStep(pythonKubernetesConf)
     val basePod = SparkPod.initialPod()
@@ -191,25 +143,14 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       .setJars(allJars)
       .set("spark.files", allFiles.mkString(","))
       .set(CONTAINER_IMAGE, "spark-driver:latest")
-    val kubernetesConf = KubernetesConf(
-      sparkConf,
-      emptyDriverSpecificConf,
-      RESOURCE_NAME_PREFIX,
-      APP_ID,
-      DRIVER_LABELS,
-      DRIVER_ANNOTATIONS,
-      Map.empty,
-      Map.empty,
-      DRIVER_ENVS,
-      Nil,
-      hadoopConfSpec = None)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
 
     val step = new BasicDriverFeatureStep(kubernetesConf)
     val additionalProperties = step.getAdditionalPodSystemProperties()
     val expectedSparkConf = Map(
       KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
-      "spark.app.id" -> APP_ID,
-      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> RESOURCE_NAME_PREFIX,
+      "spark.app.id" -> KubernetesTestConf.APP_ID,
+      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> kubernetesConf.resourceNamePrefix,
       "spark.kubernetes.submitInDriver" -> "true",
       "spark.jars" -> "/opt/spark/jar1.jar,hdfs:///opt/spark/jar2.jar",
       "spark.files" -> "https://localhost:9000/file1.txt,/opt/spark/file2.txt",
@@ -234,19 +175,9 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
         .set(CONTAINER_IMAGE, "spark-driver:latest")
         .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m")
       factor.foreach { value => sparkConf.set(MEMORY_OVERHEAD_FACTOR, value) }
-      val driverConf = emptyDriverSpecificConf.copy(mainAppResource = resource)
-      val conf = KubernetesConf(
-        sparkConf,
-        driverConf,
-        RESOURCE_NAME_PREFIX,
-        APP_ID,
-        DRIVER_LABELS,
-        DRIVER_ANNOTATIONS,
-        Map.empty,
-        Map.empty,
-        DRIVER_ENVS,
-        Nil,
-        hadoopConfSpec = None)
+      val conf = KubernetesTestConf.createDriverConf(
+        sparkConf = sparkConf,
+        mainAppResource = resource)
       val step = new BasicDriverFeatureStep(conf)
       val pod = step.configurePod(SparkPod.initialPod())
       val mem = pod.container.getResources.getRequests.get("memory").getAmount()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index e9a16aab6ccc..d6003c977937 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -19,20 +19,18 @@ package org.apache.spark.deploy.k8s.features
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model._
-import org.mockito.MockitoAnnotations
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterEach}
+import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.internal.config._
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 
-class BasicExecutorFeatureStepSuite
-  extends SparkFunSuite with BeforeAndAfter with BeforeAndAfterEach {
+class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
 
-  private val APP_ID = "app-id"
   private val DRIVER_HOSTNAME = "localhost"
   private val DRIVER_PORT = 7098
   private val DRIVER_ADDRESS = RpcEndpointAddress(
@@ -45,7 +43,6 @@ class BasicExecutorFeatureStepSuite
   private val RESOURCE_NAME_PREFIX = "base"
   private val EXECUTOR_IMAGE = "executor-image"
   private val LABELS = Map("label1key" -> "label1value")
-  private val ANNOTATIONS = Map("annotation1key" -> "annotation1value")
   private val TEST_IMAGE_PULL_SECRETS = Seq("my-1secret-1", "my-secret-2")
   private val TEST_IMAGE_PULL_SECRET_OBJECTS =
     TEST_IMAGE_PULL_SECRETS.map { secret =>
@@ -66,37 +63,35 @@ class BasicExecutorFeatureStepSuite
   private var baseConf: SparkConf = _
 
   before {
-    MockitoAnnotations.initMocks(this)
     baseConf = new SparkConf()
       .set(KUBERNETES_DRIVER_POD_NAME, DRIVER_POD_NAME)
       .set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, RESOURCE_NAME_PREFIX)
       .set(CONTAINER_IMAGE, EXECUTOR_IMAGE)
       .set(KUBERNETES_DRIVER_SUBMIT_CHECK, true)
-      .set("spark.driver.host", DRIVER_HOSTNAME)
+      .set(DRIVER_HOST_ADDRESS, DRIVER_HOSTNAME)
       .set("spark.driver.port", DRIVER_PORT.toString)
-      .set(IMAGE_PULL_SECRETS, TEST_IMAGE_PULL_SECRETS.mkString(","))
+      .set(IMAGE_PULL_SECRETS, TEST_IMAGE_PULL_SECRETS)
       .set("spark.kubernetes.resource.type", "java")
   }
 
+  private def newExecutorConf(
+      environment: Map[String, String] = Map.empty): KubernetesExecutorConf = {
+    KubernetesTestConf.createExecutorConf(
+      sparkConf = baseConf,
+      driverPod = Some(DRIVER_POD),
+      labels = LABELS,
+      environment = environment)
+  }
+
   test("basic executor pod has reasonable defaults") {
-    val step = new BasicExecutorFeatureStep(
-      KubernetesConf(
-        baseConf,
-        KubernetesExecutorSpecificConf("1", Some(DRIVER_POD)),
-        RESOURCE_NAME_PREFIX,
-        APP_ID,
-        LABELS,
-        ANNOTATIONS,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Nil,
-        hadoopConfSpec = None))
+    val step = new BasicExecutorFeatureStep(newExecutorConf())
     val executor = step.configurePod(SparkPod.initialPod())
 
     // The executor pod name and default labels.
     assert(executor.pod.getMetadata.getName === s"$RESOURCE_NAME_PREFIX-exec-1")
-    assert(executor.pod.getMetadata.getLabels.asScala === LABELS)
+    LABELS.foreach { case (k, v) =>
+      assert(executor.pod.getMetadata.getLabels.get(k) === v)
+    }
     assert(executor.pod.getSpec.getImagePullSecrets.asScala === TEST_IMAGE_PULL_SECRET_OBJECTS)
 
     // There is exactly 1 container with no volume mounts and default memory limits.
@@ -116,43 +111,18 @@ class BasicExecutorFeatureStepSuite
   }
 
   test("executor pod hostnames get truncated to 63 characters") {
-    val conf = baseConf.clone()
     val longPodNamePrefix = "loremipsumdolorsitametvimatelitrefficiendisuscipianturvixlegeresple"
 
-    val step = new BasicExecutorFeatureStep(
-      KubernetesConf(
-        conf,
-        KubernetesExecutorSpecificConf("1", Some(DRIVER_POD)),
-        longPodNamePrefix,
-        APP_ID,
-        LABELS,
-        ANNOTATIONS,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Nil,
-        hadoopConfSpec = None))
+    baseConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, longPodNamePrefix)
+    val step = new BasicExecutorFeatureStep(newExecutorConf())
     assert(step.configurePod(SparkPod.initialPod()).pod.getSpec.getHostname.length === 63)
   }
 
   test("classpath and extra java options get translated into environment variables") {
-    val conf = baseConf.clone()
-    conf.set(org.apache.spark.internal.config.EXECUTOR_JAVA_OPTIONS, "foo=bar")
-    conf.set(org.apache.spark.internal.config.EXECUTOR_CLASS_PATH, "bar=baz")
-
-    val step = new BasicExecutorFeatureStep(
-      KubernetesConf(
-        conf,
-        KubernetesExecutorSpecificConf("1", Some(DRIVER_POD)),
-        RESOURCE_NAME_PREFIX,
-        APP_ID,
-        LABELS,
-        ANNOTATIONS,
-        Map.empty,
-        Map.empty,
-        Map("qux" -> "quux"),
-        Nil,
-        hadoopConfSpec = None))
+    baseConf.set(EXECUTOR_JAVA_OPTIONS, "foo=bar")
+    baseConf.set(EXECUTOR_CLASS_PATH, "bar=baz")
+    val kconf = newExecutorConf(environment = Map("qux" -> "quux"))
+    val step = new BasicExecutorFeatureStep(kconf)
     val executor = step.configurePod(SparkPod.initialPod())
 
     checkEnv(executor,
@@ -163,23 +133,10 @@ class BasicExecutorFeatureStepSuite
   }
 
   test("test executor pyspark memory") {
-    val conf = baseConf.clone()
-    conf.set("spark.kubernetes.resource.type", "python")
-    conf.set(org.apache.spark.internal.config.PYSPARK_EXECUTOR_MEMORY, 42L)
-
-    val step = new BasicExecutorFeatureStep(
-      KubernetesConf(
-        conf,
-        KubernetesExecutorSpecificConf("1", Some(DRIVER_POD)),
-        RESOURCE_NAME_PREFIX,
-        APP_ID,
-        LABELS,
-        ANNOTATIONS,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Nil,
-        hadoopConfSpec = None))
+    baseConf.set("spark.kubernetes.resource.type", "python")
+    baseConf.set(PYSPARK_EXECUTOR_MEMORY, 42L)
+
+    val step = new BasicExecutorFeatureStep(newExecutorConf())
     val executor = step.configurePod(SparkPod.initialPod())
     // This is checking that basic executor + executorMemory = 1408 + 42 = 1450
     assert(executor.container.getResources.getRequests.get("memory").getAmount === "1450Mi")
@@ -199,7 +156,7 @@ class BasicExecutorFeatureStepSuite
       ENV_DRIVER_URL -> DRIVER_ADDRESS.toString,
       ENV_EXECUTOR_CORES -> "1",
       ENV_EXECUTOR_MEMORY -> "1g",
-      ENV_APPLICATION_ID -> APP_ID,
+      ENV_APPLICATION_ID -> KubernetesTestConf.APP_ID,
       ENV_SPARK_CONF_DIR -> SPARK_CONF_DIR_INTERNAL,
       ENV_EXECUTOR_POD_IP -> null) ++ additionalEnvVars
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
index 30672952aaf6..f74ac928028c 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
@@ -27,8 +27,6 @@ import org.apache.spark.util.Utils
 
 class DriverCommandFeatureStepSuite extends SparkFunSuite {
 
-  private val MAIN_CLASS = "mainClass"
-
   test("java resource") {
     val mainResource = "local:///main.jar"
     val spec = applyFeatureStep(
@@ -37,7 +35,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
     assert(spec.pod.container.getArgs.asScala === List(
       "driver",
       "--properties-file", SPARK_CONF_PATH,
-      "--class", MAIN_CLASS,
+      "--class", KubernetesTestConf.MAIN_CLASS,
       "spark-internal", "5", "7"))
 
     val jars = Utils.stringToSeq(spec.systemProperties("spark.jars"))
@@ -55,7 +53,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
     assert(spec.pod.container.getArgs.asScala === List(
       "driver",
       "--properties-file", SPARK_CONF_PATH,
-      "--class", MAIN_CLASS,
+      "--class", KubernetesTestConf.MAIN_CLASS,
       "/main.py"))
     val envs = spec.pod.container.getEnv.asScala
       .map { env => (env.getName, env.getValue) }
@@ -86,7 +84,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
     assert(spec.pod.container.getArgs.asScala === List(
       "driver",
       "--properties-file", SPARK_CONF_PATH,
-      "--class", MAIN_CLASS,
+      "--class", KubernetesTestConf.MAIN_CLASS,
       "/main.py", "5", "7", "9"))
 
     val envs = spec.pod.container.getEnv.asScala
@@ -112,7 +110,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
     assert(spec.pod.container.getArgs.asScala === List(
       "driver",
       "--properties-file", SPARK_CONF_PATH,
-      "--class", MAIN_CLASS,
+      "--class", KubernetesTestConf.MAIN_CLASS,
       "/main.R", "5", "7", "9"))
   }
 
@@ -121,20 +119,11 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
       conf: SparkConf = new SparkConf(false),
       appArgs: Array[String] = Array(),
       pyFiles: Seq[String] = Nil): KubernetesDriverSpec = {
-    val driverConf = new KubernetesDriverSpecificConf(
-      resource, MAIN_CLASS, "appName", appArgs, pyFiles = pyFiles)
-    val kubernetesConf = KubernetesConf(
-      conf,
-      driverConf,
-      "resource-prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(
+      sparkConf = conf,
+      mainAppResource = resource,
+      appArgs = appArgs,
+      pyFiles = pyFiles)
     val step = new DriverCommandFeatureStep(kubernetesConf)
     val pod = step.configurePod(SparkPod.initialPod())
     val props = step.getAdditionalPodSystemProperties()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
index 36c6616a87b0..7d8e9296a6cb 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
@@ -18,51 +18,25 @@ package org.apache.spark.deploy.k8s.features
 
 import java.io.File
 
+import scala.collection.JavaConverters._
+
 import com.google.common.base.Charsets
 import com.google.common.io.{BaseEncoding, Files}
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, Secret}
-import org.mockito.{Mock, MockitoAnnotations}
-import org.scalatest.BeforeAndAfter
-import scala.collection.JavaConverters._
+import io.fabric8.kubernetes.api.model.Secret
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.util.Utils
 
-class DriverKubernetesCredentialsFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
+class DriverKubernetesCredentialsFeatureStepSuite extends SparkFunSuite {
 
-  private val KUBERNETES_RESOURCE_NAME_PREFIX = "spark"
-  private val APP_ID = "k8s-app"
-  private var credentialsTempDirectory: File = _
+  private val credentialsTempDirectory = Utils.createTempDir()
   private val BASE_DRIVER_POD = SparkPod.initialPod()
 
-  @Mock
-  private var driverSpecificConf: KubernetesDriverSpecificConf = _
-
-  before {
-    MockitoAnnotations.initMocks(this)
-    credentialsTempDirectory = Utils.createTempDir()
-  }
-
-  after {
-    credentialsTempDirectory.delete()
-  }
-
   test("Don't set any credentials") {
-    val kubernetesConf = KubernetesConf(
-      new SparkConf(false),
-      driverSpecificConf,
-      KUBERNETES_RESOURCE_NAME_PREFIX,
-      APP_ID,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
+    val kubernetesConf = KubernetesTestConf.createDriverConf()
     val kubernetesCredentialsStep = new DriverKubernetesCredentialsFeatureStep(kubernetesConf)
     assert(kubernetesCredentialsStep.configurePod(BASE_DRIVER_POD) === BASE_DRIVER_POD)
     assert(kubernetesCredentialsStep.getAdditionalPodSystemProperties().isEmpty)
@@ -83,19 +57,7 @@ class DriverKubernetesCredentialsFeatureStepSuite extends SparkFunSuite with Bef
       .set(
         s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX",
         "/mnt/secrets/my-ca.pem")
-    val kubernetesConf = KubernetesConf(
-      submissionSparkConf,
-      driverSpecificConf,
-      KUBERNETES_RESOURCE_NAME_PREFIX,
-      APP_ID,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
-
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = submissionSparkConf)
     val kubernetesCredentialsStep = new DriverKubernetesCredentialsFeatureStep(kubernetesConf)
     assert(kubernetesCredentialsStep.configurePod(BASE_DRIVER_POD) === BASE_DRIVER_POD)
     assert(kubernetesCredentialsStep.getAdditionalKubernetesResources().isEmpty)
@@ -122,18 +84,7 @@ class DriverKubernetesCredentialsFeatureStepSuite extends SparkFunSuite with Bef
       .set(
         s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX",
         caCertFile.getAbsolutePath)
-    val kubernetesConf = KubernetesConf(
-      submissionSparkConf,
-      driverSpecificConf,
-      KUBERNETES_RESOURCE_NAME_PREFIX,
-      APP_ID,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = submissionSparkConf)
     val kubernetesCredentialsStep = new DriverKubernetesCredentialsFeatureStep(kubernetesConf)
     val resolvedProperties = kubernetesCredentialsStep.getAdditionalPodSystemProperties()
     val expectedSparkConf = Map(
@@ -153,7 +104,7 @@ class DriverKubernetesCredentialsFeatureStepSuite extends SparkFunSuite with Bef
       .head
       .asInstanceOf[Secret]
     assert(credentialsSecret.getMetadata.getName ===
-      s"$KUBERNETES_RESOURCE_NAME_PREFIX-kubernetes-credentials")
+      s"${kubernetesConf.resourceNamePrefix}-kubernetes-credentials")
     val decodedSecretData = credentialsSecret.getData.asScala.map { data =>
       (data._1, new String(BaseEncoding.base64().decode(data._2), Charsets.UTF_8))
     }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
index 3c46667c3042..045278939dff 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -16,24 +16,19 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.Service
-import org.mockito.{Mock, MockitoAnnotations}
-import org.mockito.Mockito.when
-import org.scalatest.BeforeAndAfter
 import scala.collection.JavaConverters._
 
+import io.fabric8.kubernetes.api.model.Service
+
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
-import org.apache.spark.util.Clock
-
-class DriverServiceFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
+import org.apache.spark.internal.config._
+import org.apache.spark.util.ManualClock
 
-  private val SHORT_RESOURCE_NAME_PREFIX =
-    "a" * (DriverServiceFeatureStep.MAX_SERVICE_NAME_LENGTH -
-      DriverServiceFeatureStep.DRIVER_SVC_POSTFIX.length)
+class DriverServiceFeatureStepSuite extends SparkFunSuite {
 
   private val LONG_RESOURCE_NAME_PREFIX =
     "a" * (DriverServiceFeatureStep.MAX_SERVICE_NAME_LENGTH -
@@ -42,34 +37,14 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     "label1key" -> "label1value",
     "label2key" -> "label2value")
 
-  @Mock
-  private var clock: Clock = _
-
-  private var sparkConf: SparkConf = _
-
-  before {
-    MockitoAnnotations.initMocks(this)
-    sparkConf = new SparkConf(false)
-  }
-
   test("Headless service has a port for the driver RPC and the block manager.") {
-    sparkConf = sparkConf
+    val sparkConf = new SparkConf(false)
       .set("spark.driver.port", "9000")
-      .set(org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT, 8080)
-    val configurationStep = new DriverServiceFeatureStep(
-      KubernetesConf(
-        sparkConf,
-        KubernetesDriverSpecificConf(
-          JavaMainAppResource(None), "main", "app", Seq.empty),
-        SHORT_RESOURCE_NAME_PREFIX,
-        "app-id",
-        DRIVER_LABELS,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Nil,
-        hadoopConfSpec = None))
+      .set(DRIVER_BLOCK_MANAGER_PORT, 8080)
+    val kconf = KubernetesTestConf.createDriverConf(
+      sparkConf = sparkConf,
+      labels = DRIVER_LABELS)
+    val configurationStep = new DriverServiceFeatureStep(kconf)
     assert(configurationStep.configurePod(SparkPod.initialPod()) === SparkPod.initialPod())
     assert(configurationStep.getAdditionalKubernetesResources().size === 1)
     assert(configurationStep.getAdditionalKubernetesResources().head.isInstanceOf[Service])
@@ -80,50 +55,28 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     verifyService(
       9000,
       8080,
-      s"$SHORT_RESOURCE_NAME_PREFIX${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
+      s"${kconf.resourceNamePrefix}${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
       driverService)
   }
 
   test("Hostname and ports are set according to the service name.") {
-    val configurationStep = new DriverServiceFeatureStep(
-      KubernetesConf(
-        sparkConf
-          .set("spark.driver.port", "9000")
-          .set(org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT, 8080)
-          .set(KUBERNETES_NAMESPACE, "my-namespace"),
-        KubernetesDriverSpecificConf(
-          JavaMainAppResource(None), "main", "app", Seq.empty),
-        SHORT_RESOURCE_NAME_PREFIX,
-        "app-id",
-        DRIVER_LABELS,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Nil,
-        hadoopConfSpec = None))
-    val expectedServiceName = SHORT_RESOURCE_NAME_PREFIX +
-      DriverServiceFeatureStep.DRIVER_SVC_POSTFIX
+    val sparkConf = new SparkConf(false)
+      .set("spark.driver.port", "9000")
+      .set(DRIVER_BLOCK_MANAGER_PORT, 8080)
+      .set(KUBERNETES_NAMESPACE, "my-namespace")
+    val kconf = KubernetesTestConf.createDriverConf(
+      sparkConf = sparkConf,
+      labels = DRIVER_LABELS)
+    val configurationStep = new DriverServiceFeatureStep(kconf)
+    val expectedServiceName = kconf.resourceNamePrefix + DriverServiceFeatureStep.DRIVER_SVC_POSTFIX
     val expectedHostName = s"$expectedServiceName.my-namespace.svc"
     val additionalProps = configurationStep.getAdditionalPodSystemProperties()
     verifySparkConfHostNames(additionalProps, expectedHostName)
   }
 
   test("Ports should resolve to defaults in SparkConf and in the service.") {
-    val configurationStep = new DriverServiceFeatureStep(
-      KubernetesConf(
-        sparkConf,
-        KubernetesDriverSpecificConf(
-          JavaMainAppResource(None), "main", "app", Seq.empty),
-        SHORT_RESOURCE_NAME_PREFIX,
-        "app-id",
-        DRIVER_LABELS,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Nil,
-        hadoopConfSpec = None))
+    val kconf = KubernetesTestConf.createDriverConf(labels = DRIVER_LABELS)
+    val configurationStep = new DriverServiceFeatureStep(kconf)
     val resolvedService = configurationStep
       .getAdditionalKubernetesResources()
       .head
@@ -131,30 +84,23 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     verifyService(
       DEFAULT_DRIVER_PORT,
       DEFAULT_BLOCKMANAGER_PORT,
-      s"$SHORT_RESOURCE_NAME_PREFIX${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
+      s"${kconf.resourceNamePrefix}${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
       resolvedService)
     val additionalProps = configurationStep.getAdditionalPodSystemProperties()
     assert(additionalProps("spark.driver.port") === DEFAULT_DRIVER_PORT.toString)
-    assert(additionalProps(org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT.key)
-      === DEFAULT_BLOCKMANAGER_PORT.toString)
+    assert(additionalProps(DRIVER_BLOCK_MANAGER_PORT.key) === DEFAULT_BLOCKMANAGER_PORT.toString)
   }
 
   test("Long prefixes should switch to using a generated name.") {
-    when(clock.getTimeMillis()).thenReturn(10000)
+    val clock = new ManualClock()
+    clock.setTime(10000)
+    val sparkConf = new SparkConf(false)
+      .set(KUBERNETES_NAMESPACE, "my-namespace")
     val configurationStep = new DriverServiceFeatureStep(
-      KubernetesConf(
-        sparkConf.set(KUBERNETES_NAMESPACE, "my-namespace"),
-        KubernetesDriverSpecificConf(
-          JavaMainAppResource(None), "main", "app", Seq.empty),
-        LONG_RESOURCE_NAME_PREFIX,
-        "app-id",
-        DRIVER_LABELS,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Map.empty,
-        Nil,
-        hadoopConfSpec = None),
+      KubernetesTestConf.createDriverConf(
+        sparkConf = sparkConf,
+        resourceNamePrefix = Some(LONG_RESOURCE_NAME_PREFIX),
+        labels = DRIVER_LABELS),
       clock)
     val driverService = configurationStep
       .getAdditionalKubernetesResources()
@@ -168,56 +114,27 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("Disallow bind address and driver host to be set explicitly.") {
-    try {
-      new DriverServiceFeatureStep(
-        KubernetesConf(
-          sparkConf.set(org.apache.spark.internal.config.DRIVER_BIND_ADDRESS, "host"),
-          KubernetesDriverSpecificConf(
-            JavaMainAppResource(None), "main", "app", Seq.empty),
-          LONG_RESOURCE_NAME_PREFIX,
-          "app-id",
-          DRIVER_LABELS,
-          Map.empty,
-          Map.empty,
-          Map.empty,
-          Map.empty,
-          Nil,
-          hadoopConfSpec = None),
-        clock)
-      fail("The driver bind address should not be allowed.")
-    } catch {
-      case e: Throwable =>
-        assert(e.getMessage ===
-          s"requirement failed: ${DriverServiceFeatureStep.DRIVER_BIND_ADDRESS_KEY} is" +
-          " not supported in Kubernetes mode, as the driver's bind address is managed" +
-          " and set to the driver pod's IP address.")
+    val sparkConf = new SparkConf(false)
+      .set(DRIVER_BIND_ADDRESS, "host")
+      .set("spark.app.name", LONG_RESOURCE_NAME_PREFIX)
+    val e1 = intercept[IllegalArgumentException] {
+      new DriverServiceFeatureStep(KubernetesTestConf.createDriverConf(sparkConf = sparkConf))
     }
-    sparkConf.remove(org.apache.spark.internal.config.DRIVER_BIND_ADDRESS)
-    sparkConf.set(org.apache.spark.internal.config.DRIVER_HOST_ADDRESS, "host")
-    try {
-      new DriverServiceFeatureStep(
-        KubernetesConf(
-          sparkConf,
-          KubernetesDriverSpecificConf(
-            JavaMainAppResource(None), "main", "app", Seq.empty),
-          LONG_RESOURCE_NAME_PREFIX,
-          "app-id",
-          DRIVER_LABELS,
-          Map.empty,
-          Map.empty,
-          Map.empty,
-          Map.empty,
-          Nil,
-          hadoopConfSpec = None),
-        clock)
-      fail("The driver host address should not be allowed.")
-    } catch {
-      case e: Throwable =>
-        assert(e.getMessage ===
-          s"requirement failed: ${DriverServiceFeatureStep.DRIVER_HOST_KEY} is" +
-          " not supported in Kubernetes mode, as the driver's hostname will be managed via" +
-          " a Kubernetes service.")
+    assert(e1.getMessage ===
+      s"requirement failed: ${DriverServiceFeatureStep.DRIVER_BIND_ADDRESS_KEY} is" +
+      " not supported in Kubernetes mode, as the driver's bind address is managed" +
+      " and set to the driver pod's IP address.")
+
+    sparkConf.remove(DRIVER_BIND_ADDRESS)
+    sparkConf.set(DRIVER_HOST_ADDRESS, "host")
+
+    val e2 = intercept[IllegalArgumentException] {
+      new DriverServiceFeatureStep(KubernetesTestConf.createDriverConf(sparkConf = sparkConf))
     }
+    assert(e2.getMessage ===
+      s"requirement failed: ${DriverServiceFeatureStep.DRIVER_HOST_KEY} is" +
+      " not supported in Kubernetes mode, as the driver's hostname will be managed via" +
+      " a Kubernetes service.")
   }
 
   private def verifyService(
@@ -227,7 +144,9 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       service: Service): Unit = {
     assert(service.getMetadata.getName === expectedServiceName)
     assert(service.getSpec.getClusterIP === "None")
-    assert(service.getSpec.getSelector.asScala === DRIVER_LABELS)
+    DRIVER_LABELS.foreach { case (k, v) =>
+      assert(service.getSpec.getSelector.get(k) === v)
+    }
     assert(service.getSpec.getPorts.size() === 2)
     val driverServicePorts = service.getSpec.getPorts.asScala
     assert(driverServicePorts.head.getName === DRIVER_PORT_NAME)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStepSuite.scala
index 3d253079c3ce..045552611106 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStepSuite.scala
@@ -16,12 +16,12 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.PodBuilder
+import scala.collection.JavaConverters._
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s._
 
-class EnvSecretsFeatureStepSuite extends SparkFunSuite{
+class EnvSecretsFeatureStepSuite extends SparkFunSuite {
   private val KEY_REF_NAME_FOO = "foo"
   private val KEY_REF_NAME_BAR = "bar"
   private val KEY_REF_KEY_FOO = "key_foo"
@@ -34,28 +34,14 @@ class EnvSecretsFeatureStepSuite extends SparkFunSuite{
     val envVarsToKeys = Map(
       ENV_NAME_BAR -> s"${KEY_REF_NAME_BAR}:${KEY_REF_KEY_BAR}",
       ENV_NAME_FOO -> s"${KEY_REF_NAME_FOO}:${KEY_REF_KEY_FOO}")
-    val sparkConf = new SparkConf(false)
-    val kubernetesConf = KubernetesConf(
-      sparkConf,
-      KubernetesExecutorSpecificConf("1", Some(new PodBuilder().build())),
-      "resource-name-prefix",
-      "app-id",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      envVarsToKeys,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(
+      secretEnvNamesToKeyRefs = envVarsToKeys)
 
     val step = new EnvSecretsFeatureStep(kubernetesConf)
-    val driverContainerWithEnvSecrets = step.configurePod(baseDriverPod).container
-
-    val expectedVars =
-      Seq(s"${ENV_NAME_BAR}", s"${ENV_NAME_FOO}")
-
-    expectedVars.foreach { envName =>
-      assert(KubernetesFeaturesTestUtils.containerHasEnvVar(driverContainerWithEnvSecrets, envName))
+    val container = step.configurePod(baseDriverPod).container
+    val containerEnvKeys = container.getEnv.asScala.map { v => v.getName }.toSet
+    envVarsToKeys.keys.foreach { envName =>
+      assert(containerEnvKeys.contains(envName))
     }
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
index 894d824999aa..8f34ce5c6b94 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
@@ -17,45 +17,19 @@
 package org.apache.spark.deploy.k8s.features
 
 import io.fabric8.kubernetes.api.model.{EnvVarBuilder, VolumeBuilder, VolumeMountBuilder}
-import org.mockito.Mockito
-import org.scalatest._
-import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, KubernetesRoleSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
+import org.apache.spark.util.SparkConfWithEnv
 
-class LocalDirsFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
+class LocalDirsFeatureStepSuite extends SparkFunSuite {
   private val defaultLocalDir = "/var/data/default-local-dir"
-  private var sparkConf: SparkConf = _
-  private var kubernetesConf: KubernetesConf[_ <: KubernetesRoleSpecificConf] = _
-
-  before {
-    val realSparkConf = new SparkConf(false)
-    sparkConf = Mockito.spy(realSparkConf)
-    kubernetesConf = KubernetesConf(
-      sparkConf,
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(None),
-        "app-name",
-        "main",
-        Seq.empty),
-      "resource",
-      "app-id",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
-  }
 
   test("Resolve to default local dir if neither env nor configuration are set") {
-    Mockito.doReturn(null).when(sparkConf).get("spark.local.dir")
-    Mockito.doReturn(null).when(sparkConf).getenv("SPARK_LOCAL_DIRS")
-    val stepUnderTest = new LocalDirsFeatureStep(kubernetesConf, defaultLocalDir)
+    val stepUnderTest = new LocalDirsFeatureStep(KubernetesTestConf.createDriverConf(),
+      defaultLocalDir)
     val configuredPod = stepUnderTest.configurePod(SparkPod.initialPod())
     assert(configuredPod.pod.getSpec.getVolumes.size === 1)
     assert(configuredPod.pod.getSpec.getVolumes.get(0) ===
@@ -79,8 +53,9 @@ class LocalDirsFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("Use configured local dirs split on comma if provided.") {
-    Mockito.doReturn("/var/data/my-local-dir-1,/var/data/my-local-dir-2")
-      .when(sparkConf).getenv("SPARK_LOCAL_DIRS")
+    val sparkConf = new SparkConfWithEnv(Map(
+      "SPARK_LOCAL_DIRS" -> "/var/data/my-local-dir-1,/var/data/my-local-dir-2"))
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
     val stepUnderTest = new LocalDirsFeatureStep(kubernetesConf, defaultLocalDir)
     val configuredPod = stepUnderTest.configurePod(SparkPod.initialPod())
     assert(configuredPod.pod.getSpec.getVolumes.size === 2)
@@ -116,9 +91,8 @@ class LocalDirsFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("Use tmpfs to back default local dir") {
-    Mockito.doReturn(null).when(sparkConf).get("spark.local.dir")
-    Mockito.doReturn(null).when(sparkConf).getenv("SPARK_LOCAL_DIRS")
-    Mockito.doReturn(true).when(sparkConf).get(KUBERNETES_LOCAL_DIRS_TMPFS)
+    val sparkConf = new SparkConf(false).set(KUBERNETES_LOCAL_DIRS_TMPFS, true)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
     val stepUnderTest = new LocalDirsFeatureStep(kubernetesConf, defaultLocalDir)
     val configuredPod = stepUnderTest.configurePod(SparkPod.initialPod())
     assert(configuredPod.pod.getSpec.getVolumes.size === 1)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStepSuite.scala
index 1555f6a9c652..22f6d26c4d0d 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStepSuite.scala
@@ -16,10 +16,8 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.PodBuilder
-
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SecretVolumeUtils, SparkPod}
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.deploy.k8s.{KubernetesTestConf, SecretVolumeUtils, SparkPod}
 
 class MountSecretsFeatureStepSuite extends SparkFunSuite {
 
@@ -32,19 +30,8 @@ class MountSecretsFeatureStepSuite extends SparkFunSuite {
     val secretNamesToMountPaths = Map(
       SECRET_FOO -> SECRET_MOUNT_PATH,
       SECRET_BAR -> SECRET_MOUNT_PATH)
-    val sparkConf = new SparkConf(false)
-    val kubernetesConf = KubernetesConf(
-      sparkConf,
-      KubernetesExecutorSpecificConf("1", Some(new PodBuilder().build())),
-      "resource-name-prefix",
-      "app-id",
-      Map.empty,
-      Map.empty,
-      secretNamesToMountPaths,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
+    val kubernetesConf = KubernetesTestConf.createExecutorConf(
+      secretNamesToMountPaths = secretNamesToMountPaths)
 
     val step = new MountSecretsFeatureStep(kubernetesConf)
     val driverPodWithSecretsMounted = step.configurePod(baseDriverPod).pod
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index aadbf16897f4..e6f1dd640e3e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -16,29 +16,12 @@
  */
 package org.apache.spark.deploy.k8s.features
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 
 class MountVolumesFeatureStepSuite extends SparkFunSuite {
-  private val sparkConf = new SparkConf(false)
-  private val emptyKubernetesConf = KubernetesConf(
-    sparkConf = sparkConf,
-    roleSpecificConf = KubernetesDriverSpecificConf(
-      JavaMainAppResource(None),
-      "app-name",
-      "main",
-      Seq.empty),
-    appResourceNamePrefix = "resource",
-    appId = "app-id",
-    roleLabels = Map.empty,
-    roleAnnotations = Map.empty,
-    roleSecretNamesToMountPaths = Map.empty,
-    roleSecretEnvNamesToKeyRefs = Map.empty,
-    roleEnvs = Map.empty,
-    roleVolumes = Nil,
-    hadoopConfSpec = None)
-
   test("Mounts hostPath volumes") {
     val volumeConf = KubernetesVolumeSpec(
       "testVolume",
@@ -47,7 +30,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       false,
       KubernetesHostPathVolumeConf("/hostPath/tmp")
     )
-    val kubernetesConf = emptyKubernetesConf.copy(roleVolumes = volumeConf :: Nil)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
     val step = new MountVolumesFeatureStep(kubernetesConf)
     val configuredPod = step.configurePod(SparkPod.initialPod())
 
@@ -67,7 +50,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       true,
       KubernetesPVCVolumeConf("pvcClaim")
     )
-    val kubernetesConf = emptyKubernetesConf.copy(roleVolumes = volumeConf :: Nil)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
     val step = new MountVolumesFeatureStep(kubernetesConf)
     val configuredPod = step.configurePod(SparkPod.initialPod())
 
@@ -89,7 +72,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       false,
       KubernetesEmptyDirVolumeConf(Some("Memory"), Some("6G"))
     )
-    val kubernetesConf = emptyKubernetesConf.copy(roleVolumes = volumeConf :: Nil)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
     val step = new MountVolumesFeatureStep(kubernetesConf)
     val configuredPod = step.configurePod(SparkPod.initialPod())
 
@@ -111,7 +94,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       false,
       KubernetesEmptyDirVolumeConf(None, None)
     )
-    val kubernetesConf = emptyKubernetesConf.copy(roleVolumes = volumeConf :: Nil)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
     val step = new MountVolumesFeatureStep(kubernetesConf)
     val configuredPod = step.configurePod(SparkPod.initialPod())
 
@@ -140,8 +123,8 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       true,
       KubernetesPVCVolumeConf("pvcClaim")
     )
-    val volumesConf = hpVolumeConf :: pvcVolumeConf :: Nil
-    val kubernetesConf = emptyKubernetesConf.copy(roleVolumes = volumesConf)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(
+      volumes = Seq(hpVolumeConf, pvcVolumeConf))
     val step = new MountVolumesFeatureStep(kubernetesConf)
     val configuredPod = step.configurePod(SparkPod.initialPod())
 
@@ -157,7 +140,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       false,
       KubernetesEmptyDirVolumeConf(None, None)
     )
-    val kubernetesConf = emptyKubernetesConf.copy(roleVolumes = volumeConf :: Nil)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
     val step = new MountVolumesFeatureStep(kubernetesConf)
     val configuredPod = step.configurePod(SparkPod.initialPod())
 
@@ -176,7 +159,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       true,
       KubernetesPVCVolumeConf("pvcClaim")
     )
-    val kubernetesConf = emptyKubernetesConf.copy(roleVolumes = volumeConf :: Nil)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
     val step = new MountVolumesFeatureStep(kubernetesConf)
     val configuredPod = step.configurePod(SparkPod.initialPod())
 
@@ -206,19 +189,18 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       true,
       KubernetesEmptyDirVolumeConf(None, None)
     )
-    val kubernetesConf = emptyKubernetesConf.copy(
-      roleVolumes = emptyDirSpec :: pvcSpec :: Nil)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(emptyDirSpec, pvcSpec))
     val step = new MountVolumesFeatureStep(kubernetesConf)
     val configuredPod = step.configurePod(SparkPod.initialPod())
 
     assert(configuredPod.pod.getSpec.getVolumes.size() === 2)
-    val mounts = configuredPod.container.getVolumeMounts
-    assert(mounts.size() === 2)
-    assert(mounts.get(0).getName === "testEmptyDir")
-    assert(mounts.get(0).getMountPath === "/tmp/foo")
-    assert(mounts.get(0).getSubPath === "foo")
-    assert(mounts.get(1).getName === "testPVC")
-    assert(mounts.get(1).getMountPath === "/tmp/bar")
-    assert(mounts.get(1).getSubPath === "bar")
+    val mounts = configuredPod.container.getVolumeMounts.asScala.sortBy(_.getName())
+    assert(mounts.size === 2)
+    assert(mounts(0).getName === "testEmptyDir")
+    assert(mounts(0).getMountPath === "/tmp/foo")
+    assert(mounts(0).getSubPath === "foo")
+    assert(mounts(1).getName === "testPVC")
+    assert(mounts(1).getMountPath === "/tmp/bar")
+    assert(mounts(1).getSubPath === "bar")
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
index 370948c9502e..7295b82ca479 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
@@ -20,40 +20,22 @@ import java.io.{File, PrintWriter}
 import java.nio.file.Files
 
 import io.fabric8.kubernetes.api.model.ConfigMap
-import org.mockito.Mockito
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 
 class PodTemplateConfigMapStepSuite extends SparkFunSuite with BeforeAndAfter {
-  private var sparkConf: SparkConf = _
-  private var kubernetesConf : KubernetesConf[_ <: KubernetesRoleSpecificConf] = _
+  private var kubernetesConf : KubernetesConf = _
   private var templateFile: File = _
 
   before {
-    sparkConf = Mockito.mock(classOf[SparkConf])
-    kubernetesConf = KubernetesConf(
-      sparkConf,
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(None),
-        "app-name",
-        "main",
-        Seq.empty),
-      "resource",
-      "app-id",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      Option.empty)
     templateFile = Files.createTempFile("pod-template", "yml").toFile
     templateFile.deleteOnExit()
-    Mockito.doReturn(Option(templateFile.getAbsolutePath)).when(sparkConf)
-      .get(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
+
+    val sparkConf = new SparkConf(false)
+      .set(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE, templateFile.getAbsolutePath)
+    kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
   }
 
   test("Mounts executor template volume if config specified") {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index 08f28758ef48..e9c05fef6f5d 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -24,8 +24,8 @@ import org.mockito.Mockito.{doReturn, verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.mockito.MockitoSugar._
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpec, KubernetesDriverSpecificConf, SparkPod}
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
 
@@ -37,10 +37,6 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
   private val KUBERNETES_RESOURCE_PREFIX = "resource-example"
   private val POD_NAME = "driver"
   private val CONTAINER_NAME = "container"
-  private val APP_ID = "app-id"
-  private val APP_NAME = "app"
-  private val MAIN_CLASS = "main"
-  private val APP_ARGS = Seq("arg1", "arg2")
   private val RESOLVED_JAVA_OPTIONS = Map(
     "conf1key" -> "conf1value",
     "conf2key" -> "conf2value")
@@ -122,28 +118,15 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
   @Mock
   private var resourceList: RESOURCE_LIST = _
 
-  private var kubernetesConf: KubernetesConf[KubernetesDriverSpecificConf] = _
-
-  private var sparkConf: SparkConf = _
+  private var kconf: KubernetesDriverConf = _
   private var createdPodArgumentCaptor: ArgumentCaptor[Pod] = _
   private var createdResourcesArgumentCaptor: ArgumentCaptor[HasMetadata] = _
 
   before {
     MockitoAnnotations.initMocks(this)
-    sparkConf = new SparkConf(false)
-    kubernetesConf = KubernetesConf[KubernetesDriverSpecificConf](
-      sparkConf,
-      KubernetesDriverSpecificConf(JavaMainAppResource(None), MAIN_CLASS, APP_NAME, APP_ARGS),
-      KUBERNETES_RESOURCE_PREFIX,
-      APP_ID,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
-    when(driverBuilder.buildFromFeatures(kubernetesConf)).thenReturn(BUILT_KUBERNETES_SPEC)
+    kconf = KubernetesTestConf.createDriverConf(
+      resourceNamePrefix = Some(KUBERNETES_RESOURCE_PREFIX))
+    when(driverBuilder.buildFromFeatures(kconf)).thenReturn(BUILT_KUBERNETES_SPEC)
     when(kubernetesClient.pods()).thenReturn(podOperations)
     when(podOperations.withName(POD_NAME)).thenReturn(namedPods)
 
@@ -158,26 +141,22 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("The client should configure the pod using the builder.") {
     val submissionClient = new Client(
+      kconf,
       driverBuilder,
-      kubernetesConf,
       kubernetesClient,
       false,
-      "spark",
-      loggingPodStatusWatcher,
-      KUBERNETES_RESOURCE_PREFIX)
+      loggingPodStatusWatcher)
     submissionClient.run()
     verify(podOperations).create(FULL_EXPECTED_POD)
   }
 
   test("The client should create Kubernetes resources") {
     val submissionClient = new Client(
+      kconf,
       driverBuilder,
-      kubernetesConf,
       kubernetesClient,
       false,
-      "spark",
-      loggingPodStatusWatcher,
-      KUBERNETES_RESOURCE_PREFIX)
+      loggingPodStatusWatcher)
     submissionClient.run()
     val otherCreatedResources = createdResourcesArgumentCaptor.getAllValues
     assert(otherCreatedResources.size === 2)
@@ -197,13 +176,11 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("Waiting for app completion should stall on the watcher") {
     val submissionClient = new Client(
+      kconf,
       driverBuilder,
-      kubernetesConf,
       kubernetesClient,
       true,
-      "spark",
-      loggingPodStatusWatcher,
-      KUBERNETES_RESOURCE_PREFIX)
+      loggingPodStatusWatcher)
     submissionClient.run()
     verify(loggingPodStatusWatcher).awaitCompletion()
   }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
index 3708864592d7..7e7dc4763c2e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
@@ -83,48 +83,21 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
       _ => templateVolumeStep)
 
   test("Apply fundamental steps all the time.") {
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(Some("example.jar")),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
+    val conf = KubernetesTestConf.createDriverConf()
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
       CREDENTIALS_STEP_TYPE,
       SERVICE_STEP_TYPE,
       LOCAL_DIRS_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE)
+      DRIVER_CMD_STEP_TYPE,
+      HADOOP_GLOBAL_STEP_TYPE)
   }
 
   test("Apply secrets step if secrets are present.") {
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(None),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map("secret" -> "secretMountPath"),
-      Map("EnvName" -> "SecretName:secretKey"),
-      Map.empty,
-      Nil,
-      hadoopConfSpec = None)
+    val conf = KubernetesTestConf.createDriverConf(
+      secretEnvNamesToKeyRefs = Map("EnvName" -> "SecretName:secretKey"),
+      secretNamesToMountPaths = Map("secret" -> "secretMountPath"))
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
@@ -133,7 +106,8 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
       LOCAL_DIRS_STEP_TYPE,
       SECRETS_STEP_TYPE,
       ENV_SECRETS_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE)
+      DRIVER_CMD_STEP_TYPE,
+      HADOOP_GLOBAL_STEP_TYPE)
   }
 
   test("Apply volumes step if mounts are present.") {
@@ -143,22 +117,7 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
       "",
       false,
       KubernetesHostPathVolumeConf("/path"))
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(None),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      volumeSpec :: Nil,
-      hadoopConfSpec = None)
+    val conf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeSpec))
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
@@ -166,7 +125,8 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
       SERVICE_STEP_TYPE,
       LOCAL_DIRS_STEP_TYPE,
       MOUNT_VOLUMES_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE)
+      DRIVER_CMD_STEP_TYPE,
+      HADOOP_GLOBAL_STEP_TYPE)
   }
 
   test("Apply volumes step if a mount subpath is present.") {
@@ -176,22 +136,7 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
       "foo",
       false,
       KubernetesHostPathVolumeConf("/path"))
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(None),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      volumeSpec :: Nil,
-      hadoopConfSpec = None)
+    val conf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeSpec))
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
@@ -199,89 +144,14 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
       SERVICE_STEP_TYPE,
       LOCAL_DIRS_STEP_TYPE,
       MOUNT_VOLUMES_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE)
-  }
-
-  test("Apply template volume step if executor template is present.") {
-    val sparkConf = spy(new SparkConf(false))
-    doReturn(Option("filename")).when(sparkConf)
-      .get(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
-    val conf = KubernetesConf(
-      sparkConf,
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(Some("example.jar")),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      Option.empty)
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
-      BASIC_STEP_TYPE,
-      CREDENTIALS_STEP_TYPE,
-      SERVICE_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE,
-      TEMPLATE_VOLUME_STEP_TYPE)
-  }
-
-  test("Apply HadoopSteps if HADOOP_CONF_DIR is defined.") {
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(None),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = Some(
-        HadoopConfSpec(
-          Some("/var/hadoop-conf"),
-          None)))
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
-      BASIC_STEP_TYPE,
-      CREDENTIALS_STEP_TYPE,
-      SERVICE_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
       DRIVER_CMD_STEP_TYPE,
       HADOOP_GLOBAL_STEP_TYPE)
   }
 
-  test("Apply HadoopSteps if HADOOP_CONF ConfigMap is defined.") {
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(None),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      hadoopConfSpec = Some(
-        HadoopConfSpec(
-          None,
-          Some("pre-defined-configMapName"))))
+  test("Apply template volume step if executor template is present.") {
+    val sparkConf = new SparkConf(false)
+      .set(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE, "filename")
+    val conf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
@@ -289,12 +159,16 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
       SERVICE_STEP_TYPE,
       LOCAL_DIRS_STEP_TYPE,
       DRIVER_CMD_STEP_TYPE,
-      HADOOP_GLOBAL_STEP_TYPE)
+      HADOOP_GLOBAL_STEP_TYPE,
+      TEMPLATE_VOLUME_STEP_TYPE)
   }
 
   private def validateStepTypesApplied(resolvedSpec: KubernetesDriverSpec, stepTypes: String*)
   : Unit = {
-    assert(resolvedSpec.systemProperties.size === stepTypes.size)
+    val addedProperties = resolvedSpec.systemProperties
+      .filter { case (k, _) => !k.startsWith("spark.") }
+      .toMap
+    assert(addedProperties.keys.toSet === stepTypes.toSet)
     stepTypes.foreach { stepType =>
       assert(resolvedSpec.pod.pod.getMetadata.getLabels.get(stepType) === stepType)
       assert(resolvedSpec.driverKubernetesResources.containsSlice(
@@ -314,22 +188,7 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
     val sparkConf = new SparkConf(false)
       .set(CONTAINER_IMAGE, "spark-driver:latest")
       .set(KUBERNETES_DRIVER_PODTEMPLATE_FILE, "template-file.yaml")
-    val kubernetesConf = new KubernetesConf(
-      sparkConf,
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(Some("example.jar")),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      Option.empty)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
     val driverSpec = KubernetesDriverBuilder
       .apply(kubernetesClient, sparkConf)
       .buildFromFeatures(kubernetesConf)
@@ -346,22 +205,7 @@ class KubernetesDriverBuilderSuite extends SparkFunSuite {
     val sparkConf = new SparkConf(false)
       .set(CONTAINER_IMAGE, "spark-driver:latest")
       .set(KUBERNETES_DRIVER_PODTEMPLATE_FILE, "template-file.yaml")
-    val kubernetesConf = new KubernetesConf(
-      sparkConf,
-      KubernetesDriverSpecificConf(
-        JavaMainAppResource(Some("example.jar")),
-        "test-app",
-        "main",
-        Seq.empty),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      Option.empty)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
     val exception = intercept[SparkException] {
       KubernetesDriverBuilder
         .apply(kubernetesClient, sparkConf)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 2f984e5d8980..ddf9f67a0727 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -27,7 +27,7 @@ import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorSpecificConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
@@ -79,7 +79,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     when(kubernetesClient.pods()).thenReturn(podOperations)
     when(podOperations.withName(driverPodName)).thenReturn(driverPodOperations)
     when(driverPodOperations.get).thenReturn(driverPod)
-    when(executorBuilder.buildFromFeatures(kubernetesConfWithCorrectFields()))
+    when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf])))
       .thenAnswer(executorPodAnswer())
     snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
     waitForExecutorPodsClock = new ManualClock(0L)
@@ -147,44 +147,9 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   private def executorPodAnswer(): Answer[SparkPod] = {
     new Answer[SparkPod] {
       override def answer(invocation: InvocationOnMock): SparkPod = {
-        val k8sConf = invocation.getArgumentAt(
-          0, classOf[KubernetesConf[KubernetesExecutorSpecificConf]])
-        executorPodWithId(k8sConf.roleSpecificConf.executorId.toInt)
+        val k8sConf = invocation.getArgumentAt(0, classOf[KubernetesExecutorConf])
+        executorPodWithId(k8sConf.executorId.toInt)
       }
     }
   }
-
-  private def kubernetesConfWithCorrectFields(): KubernetesConf[KubernetesExecutorSpecificConf] =
-    Matchers.argThat(new ArgumentMatcher[KubernetesConf[KubernetesExecutorSpecificConf]] {
-      override def matches(argument: scala.Any): Boolean = {
-        if (!argument.isInstanceOf[KubernetesConf[_]]) {
-          false
-        } else {
-          val k8sConf = argument.asInstanceOf[KubernetesConf[KubernetesExecutorSpecificConf]]
-          val executorSpecificConf = k8sConf.roleSpecificConf
-          // TODO: HADOOP_CONF_DIR
-          val expectedK8sConf = KubernetesConf.createExecutorConf(
-            conf,
-            executorSpecificConf.executorId,
-            TEST_SPARK_APP_ID,
-            Some(driverPod))
-
-          // Set prefixes to a common string since KUBERNETES_EXECUTOR_POD_NAME_PREFIX
-          // has not be set for the tests and thus KubernetesConf will use a random
-          // string for the prefix, based on the app name, and this comparison here will fail.
-          val k8sConfCopy = k8sConf
-            .copy(appResourceNamePrefix = "")
-            .copy(sparkConf = conf)
-          val expectedK8sConfCopy = expectedK8sConf
-            .copy(appResourceNamePrefix = "")
-            .copy(sparkConf = conf)
-
-            k8sConf.sparkConf.getAll.toMap == conf.getAll.toMap &&
-            // Since KubernetesConf.createExecutorConf clones the SparkConf object, force
-            // deep equality comparison for the SparkConf object and use object equality
-            // comparison on all other fields.
-            k8sConfCopy == expectedK8sConfCopy
-        }
-      }
-    })
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index a59f6d072023..b6a75b15af85 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.scheduler.cluster.k8s
 
+import scala.collection.JavaConverters._
+
 import io.fabric8.kubernetes.api.model.{Config => _, _}
 import io.fabric8.kubernetes.client.KubernetesClient
 import org.mockito.Mockito.{mock, never, verify}
@@ -25,6 +27,7 @@ import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features._
 import org.apache.spark.deploy.k8s.submit.PodBuilderSuiteUtils
+import org.apache.spark.util.SparkConfWithEnv
 
 class KubernetesExecutorBuilderSuite extends SparkFunSuite {
   private val BASIC_STEP_TYPE = "basic"
@@ -64,37 +67,15 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
     _ => hadoopSparkUser)
 
   test("Basic steps are consistently applied.") {
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesExecutorSpecificConf(
-        "executor-id", Some(new PodBuilder().build())),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      None)
+    val conf = KubernetesTestConf.createExecutorConf()
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf), BASIC_STEP_TYPE, LOCAL_DIRS_STEP_TYPE)
   }
 
   test("Apply secrets step if secrets are present.") {
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesExecutorSpecificConf(
-        "executor-id", Some(new PodBuilder().build())),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map("secret" -> "secretMountPath"),
-      Map("secret-name" -> "secret-key"),
-      Map.empty,
-      Nil,
-      None)
+    val conf = KubernetesTestConf.createExecutorConf(
+      secretEnvNamesToKeyRefs = Map("secret-name" -> "secret-key"),
+      secretNamesToMountPaths = Map("secret" -> "secretMountPath"))
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
@@ -110,19 +91,8 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
       "",
       false,
       KubernetesHostPathVolumeConf("/checkpoint"))
-    val conf = KubernetesConf(
-      new SparkConf(false),
-      KubernetesExecutorSpecificConf(
-        "executor-id", Some(new PodBuilder().build())),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      volumeSpec :: Nil,
-      None)
+    val conf = KubernetesTestConf.createExecutorConf(
+      volumes = Seq(volumeSpec))
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
@@ -132,25 +102,10 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
 
   test("Apply basicHadoop step if HADOOP_CONF_DIR is defined") {
     // HADOOP_DELEGATION_TOKEN
-    val HADOOP_CREDS_PREFIX = "spark.security.credentials."
-    val HADOOPFS_PROVIDER = s"$HADOOP_CREDS_PREFIX.hadoopfs.enabled"
-    val conf = KubernetesConf(
-      new SparkConf(false)
+    val conf = KubernetesTestConf.createExecutorConf(
+      sparkConf = new SparkConfWithEnv(Map("HADOOP_CONF_DIR" -> "/var/hadoop-conf"))
         .set(HADOOP_CONFIG_MAP_NAME, "hadoop-conf-map-name")
-        .set(KRB5_CONFIG_MAP_NAME, "krb5-conf-map-name")
-        .set(KERBEROS_SPARK_USER_NAME, "spark-user")
-        .set(HADOOPFS_PROVIDER, "true"),
-      KubernetesExecutorSpecificConf(
-        "executor-id", Some(new PodBuilder().build())),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      Some(HadoopConfSpec(Some("/var/hadoop-conf"), None)))
+        .set(KRB5_CONFIG_MAP_NAME, "krb5-conf-map-name"))
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
@@ -160,24 +115,13 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
   }
 
   test("Apply kerberos step if DT secrets created") {
-    val conf = KubernetesConf(
-      new SparkConf(false)
+    val conf = KubernetesTestConf.createExecutorConf(
+      sparkConf = new SparkConf(false)
         .set(HADOOP_CONFIG_MAP_NAME, "hadoop-conf-map-name")
         .set(KRB5_CONFIG_MAP_NAME, "krb5-conf-map-name")
         .set(KERBEROS_SPARK_USER_NAME, "spark-user")
         .set(KERBEROS_DT_SECRET_NAME, "dt-secret")
-        .set(KERBEROS_DT_SECRET_KEY, "dt-key"),
-      KubernetesExecutorSpecificConf(
-        "executor-id", Some(new PodBuilder().build())),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      Some(HadoopConfSpec(None, Some("pre-defined-onfigMapName"))))
+        .set(KERBEROS_DT_SECRET_KEY, "dt-key" ))
     validateStepTypesApplied(
       builderUnderTest.buildFromFeatures(conf),
       BASIC_STEP_TYPE,
@@ -187,10 +131,7 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
   }
 
   private def validateStepTypesApplied(resolvedPod: SparkPod, stepTypes: String*): Unit = {
-    assert(resolvedPod.pod.getMetadata.getLabels.size === stepTypes.size)
-    stepTypes.foreach { stepType =>
-      assert(resolvedPod.pod.getMetadata.getLabels.get(stepType) === stepType)
-    }
+    assert(resolvedPod.pod.getMetadata.getLabels.asScala.keys.toSet === stepTypes.toSet)
   }
 
   test("Starts with empty executor pod if template is not specified") {
@@ -205,25 +146,14 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
       .set("spark.driver.host", "https://driver.host.com")
       .set(Config.CONTAINER_IMAGE, "spark-executor:latest")
       .set(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE, "template-file.yaml")
-    val kubernetesConf = KubernetesConf(
-      sparkConf,
-      KubernetesExecutorSpecificConf(
-        "executor-id", Some(new PodBuilder()
-          .withNewMetadata()
+    val kubernetesConf = KubernetesTestConf.createExecutorConf(
+      sparkConf = sparkConf,
+      driverPod = Some(new PodBuilder()
+        .withNewMetadata()
           .withName("driver")
           .endMetadata()
-          .build())),
-      "prefix",
-      "appId",
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Map.empty,
-      Nil,
-      Option.empty)
-    val sparkPod = KubernetesExecutorBuilder
-      .apply(kubernetesClient, sparkConf)
+        .build()))
+    val sparkPod = KubernetesExecutorBuilder(kubernetesClient, sparkConf)
       .buildFromFeatures(kubernetesConf)
     PodBuilderSuiteUtils.verifyPodWithSupportedFeatures(sparkPod)
   }

From 28d33744076abd8bf7955eefcbdeef4849a99c40 Mon Sep 17 00:00:00 2001
From: DylanGuedes <djmgguedes@gmail.com>
Date: Sat, 1 Dec 2018 10:37:03 +0800
Subject: [PATCH 0036/1072] [SPARK-23647][PYTHON][SQL] Adds more types for hint
 in pyspark

Signed-off-by: DylanGuedes <djmgguedesgmail.com>

## What changes were proposed in this pull request?

Addition of float, int and list hints for `pyspark.sql` Hint.

## How was this patch tested?

I did manual tests following the same principles used in the Scala version, and also added unit tests.

Closes #20788 from DylanGuedes/jira-21030.

Authored-by: DylanGuedes <djmgguedes@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/dataframe.py            |  6 ++++--
 python/pyspark/sql/tests/test_dataframe.py | 13 +++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b8833a39078b..1b1092c409be 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -485,10 +485,12 @@ def hint(self, name, *parameters):
         if not isinstance(name, str):
             raise TypeError("name should be provided as str, got {0}".format(type(name)))
 
+        allowed_types = (basestring, list, float, int)
         for p in parameters:
-            if not isinstance(p, str):
+            if not isinstance(p, allowed_types):
                 raise TypeError(
-                    "all parameters should be str, got {0} of type {1}".format(p, type(p)))
+                    "all parameters should be in {0}, got {1} of type {2}".format(
+                        allowed_types, p, type(p)))
 
         jdf = self._jdf.hint(name, self._jseq(parameters))
         return DataFrame(jdf, self.sql_ctx)
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 908d400e0009..65edf593c300 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -375,6 +375,19 @@ def test_generic_hints(self):
         plan = df1.join(df2.hint("broadcast"), "id")._jdf.queryExecution().executedPlan()
         self.assertEqual(1, plan.toString().count("BroadcastHashJoin"))
 
+    # add tests for SPARK-23647 (test more types for hint)
+    def test_extended_hint_types(self):
+        from pyspark.sql import DataFrame
+
+        df = self.spark.range(10e10).toDF("id")
+        such_a_nice_list = ["itworks1", "itworks2", "itworks3"]
+        hinted_df = df.hint("my awesome hint", 1.2345, "what", such_a_nice_list)
+        logical_plan = hinted_df._jdf.queryExecution().logical()
+
+        self.assertEqual(1, logical_plan.toString().count("1.2345"))
+        self.assertEqual(1, logical_plan.toString().count("what"))
+        self.assertEqual(3, logical_plan.toString().count("itworks"))
+
     def test_sample(self):
         self.assertRaisesRegexp(
             TypeError,

From 2f6e88fecb455a02c4c08c41290e2f338e979543 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Fri, 30 Nov 2018 23:14:05 -0800
Subject: [PATCH 0037/1072] [SPARK-26189][R] Fix unionAll doc in SparkR

## What changes were proposed in this pull request?

Fix unionAll doc in SparkR

## How was this patch tested?

Manually ran test

Author: Huaxin Gao <huaxing@us.ibm.com>

Closes #23161 from huaxingao/spark-26189.
---
 R/pkg/R/DataFrame.R | 20 ++++++++++++++++----
 R/pkg/R/generics.R  |  2 +-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 745bb3e15932..24ed449f2a7d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2730,13 +2730,25 @@ setMethod("union",
             dataFrame(unioned)
           })
 
-#' Return a new SparkDataFrame containing the union of rows
+#' Return a new SparkDataFrame containing the union of rows.
 #'
-#' This is an alias for `union`.
+#' This is an alias for \code{union}.
 #'
-#' @rdname union
-#' @name unionAll
+#' @param x a SparkDataFrame.
+#' @param y a SparkDataFrame.
+#' @return A SparkDataFrame containing the result of the unionAll operation.
+#' @family SparkDataFrame functions
 #' @aliases unionAll,SparkDataFrame,SparkDataFrame-method
+#' @rdname unionAll
+#' @name unionAll
+#' @seealso \link{union}
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' df1 <- read.json(path)
+#' df2 <- read.json(path2)
+#' unionAllDF <- unionAll(df1, df2)
+#' }
 #' @note unionAll since 1.4.0
 setMethod("unionAll",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 9d8c24c686c7..eed76465221c 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -631,7 +631,7 @@ setGeneric("toRDD", function(x) { standardGeneric("toRDD") })
 #' @rdname union
 setGeneric("union", function(x, y) { standardGeneric("union") })
 
-#' @rdname union
+#' @rdname unionAll
 setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") })
 
 #' @rdname unionByName

From 327ac83f5cf33c84775a95442862bea56d8a0005 Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Sat, 1 Dec 2018 16:34:11 +0800
Subject: [PATCH 0038/1072] [SPARK-26180][CORE][TEST] Reuse withTempDir
 function to the SparkCore test case

## What changes were proposed in this pull request?

Currently, the common `withTempDir` function is used in Spark SQL test cases. To handle `val dir = Utils. createTempDir()` and `Utils. deleteRecursively (dir)`. Unfortunately, the `withTempDir` function cannot be used in the Spark Core test case. This PR Sharing `withTempDir` function in Spark Sql and SparkCore  to clean up SparkCore test cases. thanks.

## How was this patch tested?

N / A

Closes #23151 from heary-cao/withCreateTempDir.

Authored-by: caoxuewen <cao.xuewen@zte.com.cn>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/CheckpointSuite.scala    |   5 +-
 .../apache/spark/ContextCleanerSuite.scala    |  97 ++--
 .../scala/org/apache/spark/FileSuite.scala    |  19 +-
 .../org/apache/spark/SparkContextSuite.scala  | 315 +++++-----
 .../org/apache/spark/SparkFunSuite.scala      |  12 +-
 .../api/python/PythonBroadcastSuite.scala     |   5 +-
 .../spark/deploy/SparkSubmitSuite.scala       | 539 +++++++++---------
 .../history/FsHistoryProviderSuite.scala      |  89 +--
 .../history/HistoryServerArgumentsSuite.scala |   8 +-
 .../master/PersistenceEngineSuite.scala       |   5 +-
 .../input/WholeTextFileInputFormatSuite.scala |   6 +-
 .../WholeTextFileRecordReaderSuite.scala      |  62 +-
 .../spark/rdd/PairRDDFunctionsSuite.scala     |   5 +-
 .../org/apache/spark/rpc/RpcEnvSuite.scala    | 113 ++--
 .../spark/scheduler/DAGSchedulerSuite.scala   |  22 +-
 ...putCommitCoordinatorIntegrationSuite.scala |   5 +-
 .../serializer/KryoSerializerSuite.scala      |  42 +-
 .../apache/spark/storage/DiskStoreSuite.scala |   1 -
 .../util/PeriodicRDDCheckpointerSuite.scala   |  52 +-
 .../org/apache/spark/util/UtilsSuite.scala    | 222 ++++----
 .../apache/spark/sql/test/SQLTestUtils.scala  | 100 ++--
 .../spark/sql/hive/client/VersionsSuite.scala |   9 -
 .../spark/streaming/TestSuiteBase.scala       |  12 -
 23 files changed, 858 insertions(+), 887 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index 48408ccc8f81..6d9e47cfd00f 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -586,8 +586,7 @@ object CheckpointSuite {
 class CheckpointCompressionSuite extends SparkFunSuite with LocalSparkContext {
 
   test("checkpoint compression") {
-    val checkpointDir = Utils.createTempDir()
-    try {
+    withTempDir { checkpointDir =>
       val conf = new SparkConf()
         .set("spark.checkpoint.compress", "true")
         .set("spark.ui.enabled", "false")
@@ -616,8 +615,6 @@ class CheckpointCompressionSuite extends SparkFunSuite with LocalSparkContext {
 
       // Verify that the compressed content can be read back
       assert(rdd.collect().toSeq === (1 to 20))
-    } finally {
-      Utils.deleteRecursively(checkpointDir)
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 6724af952505..1fcc975ab39a 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -207,54 +207,55 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
   }
 
   test("automatically cleanup normal checkpoint") {
-    val checkpointDir = Utils.createTempDir()
-    checkpointDir.delete()
-    var rdd = newPairRDD()
-    sc.setCheckpointDir(checkpointDir.toString)
-    rdd.checkpoint()
-    rdd.cache()
-    rdd.collect()
-    var rddId = rdd.id
-
-    // Confirm the checkpoint directory exists
-    assert(ReliableRDDCheckpointData.checkpointPath(sc, rddId).isDefined)
-    val path = ReliableRDDCheckpointData.checkpointPath(sc, rddId).get
-    val fs = path.getFileSystem(sc.hadoopConfiguration)
-    assert(fs.exists(path))
-
-    // the checkpoint is not cleaned by default (without the configuration set)
-    var postGCTester = new CleanerTester(sc, Seq(rddId), Nil, Nil, Seq(rddId))
-    rdd = null // Make RDD out of scope, ok if collected earlier
-    runGC()
-    postGCTester.assertCleanup()
-    assert(!fs.exists(ReliableRDDCheckpointData.checkpointPath(sc, rddId).get))
-
-    // Verify that checkpoints are NOT cleaned up if the config is not enabled
-    sc.stop()
-    val conf = new SparkConf()
-      .setMaster("local[2]")
-      .setAppName("cleanupCheckpoint")
-      .set("spark.cleaner.referenceTracking.cleanCheckpoints", "false")
-    sc = new SparkContext(conf)
-    rdd = newPairRDD()
-    sc.setCheckpointDir(checkpointDir.toString)
-    rdd.checkpoint()
-    rdd.cache()
-    rdd.collect()
-    rddId = rdd.id
-
-    // Confirm the checkpoint directory exists
-    assert(fs.exists(ReliableRDDCheckpointData.checkpointPath(sc, rddId).get))
-
-    // Reference rdd to defeat any early collection by the JVM
-    rdd.count()
-
-    // Test that GC causes checkpoint data cleanup after dereferencing the RDD
-    postGCTester = new CleanerTester(sc, Seq(rddId))
-    rdd = null // Make RDD out of scope
-    runGC()
-    postGCTester.assertCleanup()
-    assert(fs.exists(ReliableRDDCheckpointData.checkpointPath(sc, rddId).get))
+    withTempDir { checkpointDir =>
+      checkpointDir.delete()
+      var rdd = newPairRDD()
+      sc.setCheckpointDir(checkpointDir.toString)
+      rdd.checkpoint()
+      rdd.cache()
+      rdd.collect()
+      var rddId = rdd.id
+
+      // Confirm the checkpoint directory exists
+      assert(ReliableRDDCheckpointData.checkpointPath(sc, rddId).isDefined)
+      val path = ReliableRDDCheckpointData.checkpointPath(sc, rddId).get
+      val fs = path.getFileSystem(sc.hadoopConfiguration)
+      assert(fs.exists(path))
+
+      // the checkpoint is not cleaned by default (without the configuration set)
+      var postGCTester = new CleanerTester(sc, Seq(rddId), Nil, Nil, Seq(rddId))
+      rdd = null // Make RDD out of scope, ok if collected earlier
+      runGC()
+      postGCTester.assertCleanup()
+      assert(!fs.exists(ReliableRDDCheckpointData.checkpointPath(sc, rddId).get))
+
+      // Verify that checkpoints are NOT cleaned up if the config is not enabled
+      sc.stop()
+      val conf = new SparkConf()
+        .setMaster("local[2]")
+        .setAppName("cleanupCheckpoint")
+        .set("spark.cleaner.referenceTracking.cleanCheckpoints", "false")
+      sc = new SparkContext(conf)
+      rdd = newPairRDD()
+      sc.setCheckpointDir(checkpointDir.toString)
+      rdd.checkpoint()
+      rdd.cache()
+      rdd.collect()
+      rddId = rdd.id
+
+      // Confirm the checkpoint directory exists
+      assert(fs.exists(ReliableRDDCheckpointData.checkpointPath(sc, rddId).get))
+
+      // Reference rdd to defeat any early collection by the JVM
+      rdd.count()
+
+      // Test that GC causes checkpoint data cleanup after dereferencing the RDD
+      postGCTester = new CleanerTester(sc, Seq(rddId))
+      rdd = null // Make RDD out of scope
+      runGC()
+      postGCTester.assertCleanup()
+      assert(fs.exists(ReliableRDDCheckpointData.checkpointPath(sc, rddId).get))
+    }
   }
 
   test("automatically clean up local checkpoint") {
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index df04a5ea1d99..983a7917e8aa 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -306,17 +306,18 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
       .set("spark.files.openCostInBytes", "0")
       .set("spark.default.parallelism", "1"))
 
-    val tempDir = Utils.createTempDir()
-    val tempDirPath = tempDir.getAbsolutePath
+    withTempDir { tempDir =>
+      val tempDirPath = tempDir.getAbsolutePath
 
-    for (i <- 0 until 8) {
-      val tempFile = new File(tempDir, s"part-0000$i")
-      Files.write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1", tempFile,
-        StandardCharsets.UTF_8)
-    }
+      for (i <- 0 until 8) {
+        val tempFile = new File(tempDir, s"part-0000$i")
+        Files.write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1", tempFile,
+          StandardCharsets.UTF_8)
+      }
 
-    for (p <- Seq(1, 2, 8)) {
-      assert(sc.binaryFiles(tempDirPath, minPartitions = p).getNumPartitions === p)
+      for (p <- Seq(1, 2, 8)) {
+        assert(sc.binaryFiles(tempDirPath, minPartitions = p).getNumPartitions === p)
+      }
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 79192f3f3c92..ec4c7efb5835 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -116,56 +116,57 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   }
 
   test("basic case for addFile and listFiles") {
-    val dir = Utils.createTempDir()
-
-    val file1 = File.createTempFile("someprefix1", "somesuffix1", dir)
-    val absolutePath1 = file1.getAbsolutePath
-
-    val file2 = File.createTempFile("someprefix2", "somesuffix2", dir)
-    val relativePath = file2.getParent + "/../" + file2.getParentFile.getName + "/" + file2.getName
-    val absolutePath2 = file2.getAbsolutePath
-
-    try {
-      Files.write("somewords1", file1, StandardCharsets.UTF_8)
-      Files.write("somewords2", file2, StandardCharsets.UTF_8)
-      val length1 = file1.length()
-      val length2 = file2.length()
-
-      sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
-      sc.addFile(file1.getAbsolutePath)
-      sc.addFile(relativePath)
-      sc.parallelize(Array(1), 1).map(x => {
-        val gotten1 = new File(SparkFiles.get(file1.getName))
-        val gotten2 = new File(SparkFiles.get(file2.getName))
-        if (!gotten1.exists()) {
-          throw new SparkException("file doesn't exist : " + absolutePath1)
-        }
-        if (!gotten2.exists()) {
-          throw new SparkException("file doesn't exist : " + absolutePath2)
-        }
+    withTempDir { dir =>
+      val file1 = File.createTempFile("someprefix1", "somesuffix1", dir)
+      val absolutePath1 = file1.getAbsolutePath
+
+      val file2 = File.createTempFile("someprefix2", "somesuffix2", dir)
+      val relativePath = file2.getParent + "/../" + file2.getParentFile.getName +
+        "/" + file2.getName
+      val absolutePath2 = file2.getAbsolutePath
+
+      try {
+        Files.write("somewords1", file1, StandardCharsets.UTF_8)
+        Files.write("somewords2", file2, StandardCharsets.UTF_8)
+        val length1 = file1.length()
+        val length2 = file2.length()
+
+        sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+        sc.addFile(file1.getAbsolutePath)
+        sc.addFile(relativePath)
+        sc.parallelize(Array(1), 1).map(x => {
+          val gotten1 = new File(SparkFiles.get(file1.getName))
+          val gotten2 = new File(SparkFiles.get(file2.getName))
+          if (!gotten1.exists()) {
+            throw new SparkException("file doesn't exist : " + absolutePath1)
+          }
+          if (!gotten2.exists()) {
+            throw new SparkException("file doesn't exist : " + absolutePath2)
+          }
 
-        if (length1 != gotten1.length()) {
-          throw new SparkException(
-            s"file has different length $length1 than added file ${gotten1.length()} : " +
-              absolutePath1)
-        }
-        if (length2 != gotten2.length()) {
-          throw new SparkException(
-            s"file has different length $length2 than added file ${gotten2.length()} : " +
-              absolutePath2)
-        }
+          if (length1 != gotten1.length()) {
+            throw new SparkException(
+              s"file has different length $length1 than added file ${gotten1.length()} : " +
+                absolutePath1)
+          }
+          if (length2 != gotten2.length()) {
+            throw new SparkException(
+              s"file has different length $length2 than added file ${gotten2.length()} : " +
+                absolutePath2)
+          }
 
-        if (absolutePath1 == gotten1.getAbsolutePath) {
-          throw new SparkException("file should have been copied :" + absolutePath1)
-        }
-        if (absolutePath2 == gotten2.getAbsolutePath) {
-          throw new SparkException("file should have been copied : " + absolutePath2)
-        }
-        x
-      }).count()
-      assert(sc.listFiles().filter(_.contains("somesuffix1")).size == 1)
-    } finally {
-      sc.stop()
+          if (absolutePath1 == gotten1.getAbsolutePath) {
+            throw new SparkException("file should have been copied :" + absolutePath1)
+          }
+          if (absolutePath2 == gotten2.getAbsolutePath) {
+            throw new SparkException("file should have been copied : " + absolutePath2)
+          }
+          x
+        }).count()
+        assert(sc.listFiles().filter(_.contains("somesuffix1")).size == 1)
+      } finally {
+        sc.stop()
+      }
     }
   }
 
@@ -202,51 +203,51 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   }
 
   test("addFile recursive works") {
-    val pluto = Utils.createTempDir()
-    val neptune = Utils.createTempDir(pluto.getAbsolutePath)
-    val saturn = Utils.createTempDir(neptune.getAbsolutePath)
-    val alien1 = File.createTempFile("alien", "1", neptune)
-    val alien2 = File.createTempFile("alien", "2", saturn)
-
-    try {
-      sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
-      sc.addFile(neptune.getAbsolutePath, true)
-      sc.parallelize(Array(1), 1).map(x => {
-        val sep = File.separator
-        if (!new File(SparkFiles.get(neptune.getName + sep + alien1.getName)).exists()) {
-          throw new SparkException("can't access file under root added directory")
-        }
-        if (!new File(SparkFiles.get(neptune.getName + sep + saturn.getName + sep + alien2.getName))
-            .exists()) {
-          throw new SparkException("can't access file in nested directory")
-        }
-        if (new File(SparkFiles.get(pluto.getName + sep + neptune.getName + sep + alien1.getName))
-            .exists()) {
-          throw new SparkException("file exists that shouldn't")
-        }
-        x
-      }).count()
-    } finally {
-      sc.stop()
+    withTempDir { pluto =>
+      val neptune = Utils.createTempDir(pluto.getAbsolutePath)
+      val saturn = Utils.createTempDir(neptune.getAbsolutePath)
+      val alien1 = File.createTempFile("alien", "1", neptune)
+      val alien2 = File.createTempFile("alien", "2", saturn)
+
+      try {
+        sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+        sc.addFile(neptune.getAbsolutePath, true)
+        sc.parallelize(Array(1), 1).map(x => {
+          val sep = File.separator
+          if (!new File(SparkFiles.get(neptune.getName + sep + alien1.getName)).exists()) {
+            throw new SparkException("can't access file under root added directory")
+          }
+          if (!new File(SparkFiles.get(
+            neptune.getName + sep + saturn.getName + sep + alien2.getName)).exists()) {
+            throw new SparkException("can't access file in nested directory")
+          }
+          if (new File(SparkFiles.get(
+            pluto.getName + sep + neptune.getName + sep + alien1.getName)).exists()) {
+            throw new SparkException("file exists that shouldn't")
+          }
+          x
+        }).count()
+      } finally {
+        sc.stop()
+      }
     }
   }
 
   test("addFile recursive can't add directories by default") {
-    val dir = Utils.createTempDir()
-
-    try {
-      sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
-      intercept[SparkException] {
-        sc.addFile(dir.getAbsolutePath)
+    withTempDir { dir =>
+      try {
+        sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+        intercept[SparkException] {
+          sc.addFile(dir.getAbsolutePath)
+        }
+      } finally {
+        sc.stop()
       }
-    } finally {
-      sc.stop()
     }
   }
 
   test("cannot call addFile with different paths that have the same filename") {
-    val dir = Utils.createTempDir()
-    try {
+    withTempDir { dir =>
       val subdir1 = new File(dir, "subdir1")
       val subdir2 = new File(dir, "subdir2")
       assert(subdir1.mkdir())
@@ -267,8 +268,6 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
         sc.addFile(file2.getAbsolutePath)
       }
       assert(getAddedFileContents() === "old")
-    } finally {
-      Utils.deleteRecursively(dir)
     }
   }
 
@@ -296,30 +295,33 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   }
 
   test("add jar with invalid path") {
-    val tmpDir = Utils.createTempDir()
-    val tmpJar = File.createTempFile("test", ".jar", tmpDir)
+    withTempDir { tmpDir =>
+      val tmpJar = File.createTempFile("test", ".jar", tmpDir)
 
-    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
-    sc.addJar(tmpJar.getAbsolutePath)
+      sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+      sc.addJar(tmpJar.getAbsolutePath)
 
-    // Invalid jar path will only print the error log, will not add to file server.
-    sc.addJar("dummy.jar")
-    sc.addJar("")
-    sc.addJar(tmpDir.getAbsolutePath)
+      // Invalid jar path will only print the error log, will not add to file server.
+      sc.addJar("dummy.jar")
+      sc.addJar("")
+      sc.addJar(tmpDir.getAbsolutePath)
 
-    assert(sc.listJars().size == 1)
-    assert(sc.listJars().head.contains(tmpJar.getName))
+      assert(sc.listJars().size == 1)
+      assert(sc.listJars().head.contains(tmpJar.getName))
+    }
   }
 
   test("SPARK-22585 addJar argument without scheme is interpreted literally without url decoding") {
-    val tmpDir = new File(Utils.createTempDir(), "host%3A443")
-    tmpDir.mkdirs()
-    val tmpJar = File.createTempFile("t%2F", ".jar", tmpDir)
+    withTempDir { dir =>
+      val tmpDir = new File(dir, "host%3A443")
+      tmpDir.mkdirs()
+      val tmpJar = File.createTempFile("t%2F", ".jar", tmpDir)
 
-    sc = new SparkContext("local", "test")
+      sc = new SparkContext("local", "test")
 
-    sc.addJar(tmpJar.getAbsolutePath)
-    assert(sc.listJars().size === 1)
+      sc.addJar(tmpJar.getAbsolutePath)
+      assert(sc.listJars().size === 1)
+    }
   }
 
   test("Cancelling job group should not cause SparkContext to shutdown (SPARK-6414)") {
@@ -340,60 +342,61 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   test("Comma separated paths for newAPIHadoopFile/wholeTextFiles/binaryFiles (SPARK-7155)") {
     // Regression test for SPARK-7155
     // dir1 and dir2 are used for wholeTextFiles and binaryFiles
-    val dir1 = Utils.createTempDir()
-    val dir2 = Utils.createTempDir()
-
-    val dirpath1 = dir1.getAbsolutePath
-    val dirpath2 = dir2.getAbsolutePath
-
-    // file1 and file2 are placed inside dir1, they are also used for
-    // textFile, hadoopFile, and newAPIHadoopFile
-    // file3, file4 and file5 are placed inside dir2, they are used for
-    // textFile, hadoopFile, and newAPIHadoopFile as well
-    val file1 = new File(dir1, "part-00000")
-    val file2 = new File(dir1, "part-00001")
-    val file3 = new File(dir2, "part-00000")
-    val file4 = new File(dir2, "part-00001")
-    val file5 = new File(dir2, "part-00002")
-
-    val filepath1 = file1.getAbsolutePath
-    val filepath2 = file2.getAbsolutePath
-    val filepath3 = file3.getAbsolutePath
-    val filepath4 = file4.getAbsolutePath
-    val filepath5 = file5.getAbsolutePath
-
-
-    try {
-      // Create 5 text files.
-      Files.write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1", file1,
-        StandardCharsets.UTF_8)
-      Files.write("someline1 in file2\nsomeline2 in file2", file2, StandardCharsets.UTF_8)
-      Files.write("someline1 in file3", file3, StandardCharsets.UTF_8)
-      Files.write("someline1 in file4\nsomeline2 in file4", file4, StandardCharsets.UTF_8)
-      Files.write("someline1 in file2\nsomeline2 in file5", file5, StandardCharsets.UTF_8)
-
-      sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
-
-      // Test textFile, hadoopFile, and newAPIHadoopFile for file1 and file2
-      assert(sc.textFile(filepath1 + "," + filepath2).count() == 5L)
-      assert(sc.hadoopFile(filepath1 + "," + filepath2,
-        classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
-      assert(sc.newAPIHadoopFile(filepath1 + "," + filepath2,
-        classOf[NewTextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
-
-      // Test textFile, hadoopFile, and newAPIHadoopFile for file3, file4, and file5
-      assert(sc.textFile(filepath3 + "," + filepath4 + "," + filepath5).count() == 5L)
-      assert(sc.hadoopFile(filepath3 + "," + filepath4 + "," + filepath5,
-               classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
-      assert(sc.newAPIHadoopFile(filepath3 + "," + filepath4 + "," + filepath5,
-               classOf[NewTextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
-
-      // Test wholeTextFiles, and binaryFiles for dir1 and dir2
-      assert(sc.wholeTextFiles(dirpath1 + "," + dirpath2).count() == 5L)
-      assert(sc.binaryFiles(dirpath1 + "," + dirpath2).count() == 5L)
-
-    } finally {
-      sc.stop()
+    withTempDir { dir1 =>
+      withTempDir { dir2 =>
+        val dirpath1 = dir1.getAbsolutePath
+        val dirpath2 = dir2.getAbsolutePath
+
+        // file1 and file2 are placed inside dir1, they are also used for
+        // textFile, hadoopFile, and newAPIHadoopFile
+        // file3, file4 and file5 are placed inside dir2, they are used for
+        // textFile, hadoopFile, and newAPIHadoopFile as well
+        val file1 = new File(dir1, "part-00000")
+        val file2 = new File(dir1, "part-00001")
+        val file3 = new File(dir2, "part-00000")
+        val file4 = new File(dir2, "part-00001")
+        val file5 = new File(dir2, "part-00002")
+
+        val filepath1 = file1.getAbsolutePath
+        val filepath2 = file2.getAbsolutePath
+        val filepath3 = file3.getAbsolutePath
+        val filepath4 = file4.getAbsolutePath
+        val filepath5 = file5.getAbsolutePath
+
+
+        try {
+          // Create 5 text files.
+          Files.write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1", file1,
+            StandardCharsets.UTF_8)
+          Files.write("someline1 in file2\nsomeline2 in file2", file2, StandardCharsets.UTF_8)
+          Files.write("someline1 in file3", file3, StandardCharsets.UTF_8)
+          Files.write("someline1 in file4\nsomeline2 in file4", file4, StandardCharsets.UTF_8)
+          Files.write("someline1 in file2\nsomeline2 in file5", file5, StandardCharsets.UTF_8)
+
+          sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+
+          // Test textFile, hadoopFile, and newAPIHadoopFile for file1 and file2
+          assert(sc.textFile(filepath1 + "," + filepath2).count() == 5L)
+          assert(sc.hadoopFile(filepath1 + "," + filepath2,
+            classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
+          assert(sc.newAPIHadoopFile(filepath1 + "," + filepath2,
+            classOf[NewTextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
+
+          // Test textFile, hadoopFile, and newAPIHadoopFile for file3, file4, and file5
+          assert(sc.textFile(filepath3 + "," + filepath4 + "," + filepath5).count() == 5L)
+          assert(sc.hadoopFile(filepath3 + "," + filepath4 + "," + filepath5,
+            classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
+          assert(sc.newAPIHadoopFile(filepath3 + "," + filepath4 + "," + filepath5,
+            classOf[NewTextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
+
+          // Test wholeTextFiles, and binaryFiles for dir1 and dir2
+          assert(sc.wholeTextFiles(dirpath1 + "," + dirpath2).count() == 5L)
+          assert(sc.binaryFiles(dirpath1 + "," + dirpath2).count() == 5L)
+
+        } finally {
+          sc.stop()
+        }
+      }
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 31289026b002..dad24d7c01b8 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -23,7 +23,7 @@ import java.io.File
 import org.scalatest.{BeforeAndAfterAll, FunSuite, Outcome}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.AccumulatorContext
+import org.apache.spark.util.{AccumulatorContext, Utils}
 
 /**
  * Base abstract class for all unit tests in Spark for handling common functionality.
@@ -106,4 +106,14 @@ abstract class SparkFunSuite
     }
   }
 
+  /**
+   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   */
+  protected def withTempDir(f: File => Unit): Unit = {
+    val dir = Utils.createTempDir()
+    try f(dir) finally {
+      Utils.deleteRecursively(dir)
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala b/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala
index b38a3667abee..7407a656dbfc 100644
--- a/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala
@@ -31,7 +31,6 @@ import org.apache.spark.util.Utils
 // a PythonBroadcast:
 class PythonBroadcastSuite extends SparkFunSuite with Matchers with SharedSparkContext {
   test("PythonBroadcast can be serialized with Kryo (SPARK-4882)") {
-    val tempDir = Utils.createTempDir()
     val broadcastedString = "Hello, world!"
     def assertBroadcastIsValid(broadcast: PythonBroadcast): Unit = {
       val source = Source.fromFile(broadcast.path)
@@ -39,7 +38,7 @@ class PythonBroadcastSuite extends SparkFunSuite with Matchers with SharedSparkC
       source.close()
       contents should be (broadcastedString)
     }
-    try {
+    withTempDir { tempDir =>
       val broadcastDataFile: File = {
         val file = new File(tempDir, "broadcastData")
         val printWriter = new PrintWriter(file)
@@ -53,8 +52,6 @@ class PythonBroadcastSuite extends SparkFunSuite with Matchers with SharedSparkC
       val deserializedBroadcast =
         Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance())
       assertBroadcastIsValid(deserializedBroadcast)
-    } finally {
-      Utils.deleteRecursively(tempDir)
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index c093789244bf..a8973d1b60f8 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -494,13 +494,11 @@ class SparkSubmitSuite
   }
 
   test("launch simple application with spark-submit with redaction") {
-    val testDir = Utils.createTempDir()
-    testDir.deleteOnExit()
-    val testDirPath = new Path(testDir.getAbsolutePath())
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val fileSystem = Utils.getHadoopFileSystem("/",
       SparkHadoopUtil.get.newConfiguration(new SparkConf()))
-    try {
+    withTempDir { testDir =>
+      val testDirPath = new Path(testDir.getAbsolutePath())
       val args = Seq(
         "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
         "--name", "testApp",
@@ -519,8 +517,6 @@ class SparkSubmitSuite
       Source.fromInputStream(logData).getLines().foreach { line =>
         assert(!line.contains("secret_password"))
       }
-    } finally {
-      Utils.deleteRecursively(testDir)
     }
   }
 
@@ -614,108 +610,112 @@ class SparkSubmitSuite
     assert(new File(rScriptDir).exists)
 
     // compile a small jar containing a class that will be called from R code.
-    val tempDir = Utils.createTempDir()
-    val srcDir = new File(tempDir, "sparkrtest")
-    srcDir.mkdirs()
-    val excSource = new JavaSourceFromString(new File(srcDir, "DummyClass").toURI.getPath,
-      """package sparkrtest;
+    withTempDir { tempDir =>
+      val srcDir = new File(tempDir, "sparkrtest")
+      srcDir.mkdirs()
+      val excSource = new JavaSourceFromString(new File(srcDir, "DummyClass").toURI.getPath,
+        """package sparkrtest;
         |
         |public class DummyClass implements java.io.Serializable {
         |  public static String helloWorld(String arg) { return "Hello " + arg; }
         |  public static int addStuff(int arg1, int arg2) { return arg1 + arg2; }
         |}
-      """.stripMargin)
-    val excFile = TestUtils.createCompiledClass("DummyClass", srcDir, excSource, Seq.empty)
-    val jarFile = new File(tempDir, "sparkRTestJar-%s.jar".format(System.currentTimeMillis()))
-    val jarURL = TestUtils.createJar(Seq(excFile), jarFile, directoryPrefix = Some("sparkrtest"))
+      """.
+          stripMargin)
+      val excFile = TestUtils.createCompiledClass("DummyClass", srcDir, excSource, Seq.empty)
+      val jarFile = new File(tempDir, "sparkRTestJar-%s.jar".format(System.currentTimeMillis()))
+      val jarURL = TestUtils.createJar(Seq(excFile), jarFile, directoryPrefix = Some("sparkrtest"))
 
-    val args = Seq(
-      "--name", "testApp",
-      "--master", "local",
-      "--jars", jarURL.toString,
-      "--verbose",
-      "--conf", "spark.ui.enabled=false",
-      rScriptDir)
-    runSparkSubmit(args)
+      val args = Seq(
+        "--name", "testApp",
+        "--master", "local",
+        "--jars", jarURL.toString,
+        "--verbose",
+        "--conf", "spark.ui.enabled=false",
+        rScriptDir)
+      runSparkSubmit(args)
+    }
   }
 
   test("resolves command line argument paths correctly") {
-    val dir = Utils.createTempDir()
-    val archive = Paths.get(dir.toPath.toString, "single.zip")
-    Files.createFile(archive)
-    val jars = "/jar1,/jar2"
-    val files = "local:/file1,file2"
-    val archives = s"file:/archive1,${dir.toPath.toAbsolutePath.toString}/*.zip#archive3"
-    val pyFiles = "py-file1,py-file2"
-
-    // Test jars and files
-    val clArgs = Seq(
-      "--master", "local",
-      "--class", "org.SomeClass",
-      "--jars", jars,
-      "--files", files,
-      "thejar.jar")
-    val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
-    appArgs.jars should be (Utils.resolveURIs(jars))
-    appArgs.files should be (Utils.resolveURIs(files))
-    conf.get("spark.jars") should be (Utils.resolveURIs(jars + ",thejar.jar"))
-    conf.get("spark.files") should be (Utils.resolveURIs(files))
-
-    // Test files and archives (Yarn)
-    val clArgs2 = Seq(
-      "--master", "yarn",
-      "--class", "org.SomeClass",
-      "--files", files,
-      "--archives", archives,
-      "thejar.jar"
-    )
-    val appArgs2 = new SparkSubmitArguments(clArgs2)
-    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
-    appArgs2.files should be (Utils.resolveURIs(files))
-    appArgs2.archives should fullyMatch regex ("file:/archive1,file:.*#archive3")
-    conf2.get("spark.yarn.dist.files") should be (Utils.resolveURIs(files))
-    conf2.get("spark.yarn.dist.archives") should fullyMatch regex
-      ("file:/archive1,file:.*#archive3")
-
-    // Test python files
-    val clArgs3 = Seq(
-      "--master", "local",
-      "--py-files", pyFiles,
-      "--conf", "spark.pyspark.driver.python=python3.4",
-      "--conf", "spark.pyspark.python=python3.5",
-      "mister.py"
-    )
-    val appArgs3 = new SparkSubmitArguments(clArgs3)
-    val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
-    appArgs3.pyFiles should be (Utils.resolveURIs(pyFiles))
-    conf3.get("spark.submit.pyFiles") should be (
-      PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
-    conf3.get(PYSPARK_DRIVER_PYTHON.key) should be ("python3.4")
-    conf3.get(PYSPARK_PYTHON.key) should be ("python3.5")
+    withTempDir { dir =>
+      val archive = Paths.get(dir.toPath.toString, "single.zip")
+      Files.createFile(archive)
+      val jars = "/jar1,/jar2"
+      val files = "local:/file1,file2"
+      val archives = s"file:/archive1,${dir.toPath.toAbsolutePath.toString}/*.zip#archive3"
+      val pyFiles = "py-file1,py-file2"
+
+      // Test jars and files
+      val clArgs = Seq(
+        "--master", "local",
+        "--class", "org.SomeClass",
+        "--jars", jars,
+        "--files", files,
+        "thejar.jar")
+      val appArgs = new SparkSubmitArguments(clArgs)
+      val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+      appArgs.jars should be(Utils.resolveURIs(jars))
+      appArgs.files should be(Utils.resolveURIs(files))
+      conf.get("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
+      conf.get("spark.files") should be(Utils.resolveURIs(files))
+
+      // Test files and archives (Yarn)
+      val clArgs2 = Seq(
+        "--master", "yarn",
+        "--class", "org.SomeClass",
+        "--files", files,
+        "--archives", archives,
+        "thejar.jar"
+      )
+      val appArgs2 = new SparkSubmitArguments(clArgs2)
+      val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
+      appArgs2.files should be(Utils.resolveURIs(files))
+      appArgs2.archives should fullyMatch regex ("file:/archive1,file:.*#archive3")
+      conf2.get("spark.yarn.dist.files") should be(Utils.resolveURIs(files))
+      conf2.get("spark.yarn.dist.archives") should fullyMatch regex
+        ("file:/archive1,file:.*#archive3")
+
+      // Test python files
+      val clArgs3 = Seq(
+        "--master", "local",
+        "--py-files", pyFiles,
+        "--conf", "spark.pyspark.driver.python=python3.4",
+        "--conf", "spark.pyspark.python=python3.5",
+        "mister.py"
+      )
+      val appArgs3 = new SparkSubmitArguments(clArgs3)
+      val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
+      appArgs3.pyFiles should be(Utils.resolveURIs(pyFiles))
+      conf3.get("spark.submit.pyFiles") should be(
+        PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
+      conf3.get(PYSPARK_DRIVER_PYTHON.key) should be("python3.4")
+      conf3.get(PYSPARK_PYTHON.key) should be("python3.5")
+    }
   }
 
   test("ambiguous archive mapping results in error message") {
-    val dir = Utils.createTempDir()
-    val archive1 = Paths.get(dir.toPath.toString, "first.zip")
-    val archive2 = Paths.get(dir.toPath.toString, "second.zip")
-    Files.createFile(archive1)
-    Files.createFile(archive2)
-    val jars = "/jar1,/jar2"
-    val files = "local:/file1,file2"
-    val archives = s"file:/archive1,${dir.toPath.toAbsolutePath.toString}/*.zip#archive3"
-    val pyFiles = "py-file1,py-file2"
-
-    // Test files and archives (Yarn)
-    val clArgs2 = Seq(
-      "--master", "yarn",
-      "--class", "org.SomeClass",
-      "--files", files,
-      "--archives", archives,
-      "thejar.jar"
-    )
+    withTempDir { dir =>
+      val archive1 = Paths.get(dir.toPath.toString, "first.zip")
+      val archive2 = Paths.get(dir.toPath.toString, "second.zip")
+      Files.createFile(archive1)
+      Files.createFile(archive2)
+      val jars = "/jar1,/jar2"
+      val files = "local:/file1,file2"
+      val archives = s"file:/archive1,${dir.toPath.toAbsolutePath.toString}/*.zip#archive3"
+      val pyFiles = "py-file1,py-file2"
+
+      // Test files and archives (Yarn)
+      val clArgs2 = Seq(
+        "--master", "yarn",
+        "--class", "org.SomeClass",
+        "--files", files,
+        "--archives", archives,
+        "thejar.jar"
+      )
 
-    testPrematureExit(clArgs2.toArray, "resolves ambiguously to multiple files")
+      testPrematureExit(clArgs2.toArray, "resolves ambiguously to multiple files")
+    }
   }
 
   test("resolves config paths correctly") {
@@ -724,77 +724,77 @@ class SparkSubmitSuite
     val archives = "file:/archive1,archive2" // spark.yarn.dist.archives
     val pyFiles = "py-file1,py-file2" // spark.submit.pyFiles
 
-    val tmpDir = Utils.createTempDir()
-
-    // Test jars and files
-    val f1 = File.createTempFile("test-submit-jars-files", "", tmpDir)
-    val writer1 = new PrintWriter(f1)
-    writer1.println("spark.jars " + jars)
-    writer1.println("spark.files " + files)
-    writer1.close()
-    val clArgs = Seq(
-      "--master", "local",
-      "--class", "org.SomeClass",
-      "--properties-file", f1.getPath,
-      "thejar.jar"
-    )
-    val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
-    conf.get("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
-    conf.get("spark.files") should be(Utils.resolveURIs(files))
-
-    // Test files and archives (Yarn)
-    val f2 = File.createTempFile("test-submit-files-archives", "", tmpDir)
-    val writer2 = new PrintWriter(f2)
-    writer2.println("spark.yarn.dist.files " + files)
-    writer2.println("spark.yarn.dist.archives " + archives)
-    writer2.close()
-    val clArgs2 = Seq(
-      "--master", "yarn",
-      "--class", "org.SomeClass",
-      "--properties-file", f2.getPath,
-      "thejar.jar"
-    )
-    val appArgs2 = new SparkSubmitArguments(clArgs2)
-    val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
-    conf2.get("spark.yarn.dist.files") should be(Utils.resolveURIs(files))
-    conf2.get("spark.yarn.dist.archives") should be(Utils.resolveURIs(archives))
-
-    // Test python files
-    val f3 = File.createTempFile("test-submit-python-files", "", tmpDir)
-    val writer3 = new PrintWriter(f3)
-    writer3.println("spark.submit.pyFiles " + pyFiles)
-    writer3.close()
-    val clArgs3 = Seq(
-      "--master", "local",
-      "--properties-file", f3.getPath,
-      "mister.py"
-    )
-    val appArgs3 = new SparkSubmitArguments(clArgs3)
-    val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
-    conf3.get("spark.submit.pyFiles") should be(
-      PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
-
-    // Test remote python files
-    val hadoopConf = new Configuration()
-    updateConfWithFakeS3Fs(hadoopConf)
-    val f4 = File.createTempFile("test-submit-remote-python-files", "", tmpDir)
-    val pyFile1 = File.createTempFile("file1", ".py", tmpDir)
-    val pyFile2 = File.createTempFile("file2", ".py", tmpDir)
-    val writer4 = new PrintWriter(f4)
-    val remotePyFiles = s"s3a://${pyFile1.getAbsolutePath},s3a://${pyFile2.getAbsolutePath}"
-    writer4.println("spark.submit.pyFiles " + remotePyFiles)
-    writer4.close()
-    val clArgs4 = Seq(
-      "--master", "yarn",
-      "--deploy-mode", "cluster",
-      "--properties-file", f4.getPath,
-      "hdfs:///tmp/mister.py"
-    )
-    val appArgs4 = new SparkSubmitArguments(clArgs4)
-    val (_, _, conf4, _) = submit.prepareSubmitEnvironment(appArgs4, conf = Some(hadoopConf))
-    // Should not format python path for yarn cluster mode
-    conf4.get("spark.submit.pyFiles") should be(Utils.resolveURIs(remotePyFiles))
+    withTempDir { tmpDir =>
+      // Test jars and files
+      val f1 = File.createTempFile("test-submit-jars-files", "", tmpDir)
+      val writer1 = new PrintWriter(f1)
+      writer1.println("spark.jars " + jars)
+      writer1.println("spark.files " + files)
+      writer1.close()
+      val clArgs = Seq(
+        "--master", "local",
+        "--class", "org.SomeClass",
+        "--properties-file", f1.getPath,
+        "thejar.jar"
+      )
+      val appArgs = new SparkSubmitArguments(clArgs)
+      val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+      conf.get("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
+      conf.get("spark.files") should be(Utils.resolveURIs(files))
+
+      // Test files and archives (Yarn)
+      val f2 = File.createTempFile("test-submit-files-archives", "", tmpDir)
+      val writer2 = new PrintWriter(f2)
+      writer2.println("spark.yarn.dist.files " + files)
+      writer2.println("spark.yarn.dist.archives " + archives)
+      writer2.close()
+      val clArgs2 = Seq(
+        "--master", "yarn",
+        "--class", "org.SomeClass",
+        "--properties-file", f2.getPath,
+        "thejar.jar"
+      )
+      val appArgs2 = new SparkSubmitArguments(clArgs2)
+      val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
+      conf2.get("spark.yarn.dist.files") should be(Utils.resolveURIs(files))
+      conf2.get("spark.yarn.dist.archives") should be(Utils.resolveURIs(archives))
+
+      // Test python files
+      val f3 = File.createTempFile("test-submit-python-files", "", tmpDir)
+      val writer3 = new PrintWriter(f3)
+      writer3.println("spark.submit.pyFiles " + pyFiles)
+      writer3.close()
+      val clArgs3 = Seq(
+        "--master", "local",
+        "--properties-file", f3.getPath,
+        "mister.py"
+      )
+      val appArgs3 = new SparkSubmitArguments(clArgs3)
+      val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
+      conf3.get("spark.submit.pyFiles") should be(
+        PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
+
+      // Test remote python files
+      val hadoopConf = new Configuration()
+      updateConfWithFakeS3Fs(hadoopConf)
+      val f4 = File.createTempFile("test-submit-remote-python-files", "", tmpDir)
+      val pyFile1 = File.createTempFile("file1", ".py", tmpDir)
+      val pyFile2 = File.createTempFile("file2", ".py", tmpDir)
+      val writer4 = new PrintWriter(f4)
+      val remotePyFiles = s"s3a://${pyFile1.getAbsolutePath},s3a://${pyFile2.getAbsolutePath}"
+      writer4.println("spark.submit.pyFiles " + remotePyFiles)
+      writer4.close()
+      val clArgs4 = Seq(
+        "--master", "yarn",
+        "--deploy-mode", "cluster",
+        "--properties-file", f4.getPath,
+        "hdfs:///tmp/mister.py"
+      )
+      val appArgs4 = new SparkSubmitArguments(clArgs4)
+      val (_, _, conf4, _) = submit.prepareSubmitEnvironment(appArgs4, conf = Some(hadoopConf))
+      // Should not format python path for yarn cluster mode
+      conf4.get("spark.submit.pyFiles") should be(Utils.resolveURIs(remotePyFiles))
+    }
   }
 
   test("user classpath first in driver") {
@@ -828,46 +828,50 @@ class SparkSubmitSuite
   }
 
   test("support glob path") {
-    val tmpJarDir = Utils.createTempDir()
-    val jar1 = TestUtils.createJarWithFiles(Map("test.resource" -> "1"), tmpJarDir)
-    val jar2 = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpJarDir)
-
-    val tmpFileDir = Utils.createTempDir()
-    val file1 = File.createTempFile("tmpFile1", "", tmpFileDir)
-    val file2 = File.createTempFile("tmpFile2", "", tmpFileDir)
-
-    val tmpPyFileDir = Utils.createTempDir()
-    val pyFile1 = File.createTempFile("tmpPy1", ".py", tmpPyFileDir)
-    val pyFile2 = File.createTempFile("tmpPy2", ".egg", tmpPyFileDir)
-
-    val tmpArchiveDir = Utils.createTempDir()
-    val archive1 = File.createTempFile("archive1", ".zip", tmpArchiveDir)
-    val archive2 = File.createTempFile("archive2", ".zip", tmpArchiveDir)
-
-    val tempPyFile = File.createTempFile("tmpApp", ".py")
-    tempPyFile.deleteOnExit()
-
-    val args = Seq(
-      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
-      "--name", "testApp",
-      "--master", "yarn",
-      "--deploy-mode", "client",
-      "--jars", s"${tmpJarDir.getAbsolutePath}/*.jar",
-      "--files", s"${tmpFileDir.getAbsolutePath}/tmpFile*",
-      "--py-files", s"${tmpPyFileDir.getAbsolutePath}/tmpPy*",
-      "--archives", s"${tmpArchiveDir.getAbsolutePath}/*.zip",
-      tempPyFile.toURI().toString())
-
-    val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
-    conf.get("spark.yarn.dist.jars").split(",").toSet should be
-      (Set(jar1.toURI.toString, jar2.toURI.toString))
-    conf.get("spark.yarn.dist.files").split(",").toSet should be
-      (Set(file1.toURI.toString, file2.toURI.toString))
-    conf.get("spark.yarn.dist.pyFiles").split(",").toSet should be
-      (Set(pyFile1.getAbsolutePath, pyFile2.getAbsolutePath))
-    conf.get("spark.yarn.dist.archives").split(",").toSet should be
-      (Set(archive1.toURI.toString, archive2.toURI.toString))
+    withTempDir { tmpJarDir =>
+      withTempDir { tmpFileDir =>
+        withTempDir { tmpPyFileDir =>
+          withTempDir { tmpArchiveDir =>
+            val jar1 = TestUtils.createJarWithFiles(Map("test.resource" -> "1"), tmpJarDir)
+            val jar2 = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpJarDir)
+
+            val file1 = File.createTempFile("tmpFile1", "", tmpFileDir)
+            val file2 = File.createTempFile("tmpFile2", "", tmpFileDir)
+
+            val pyFile1 = File.createTempFile("tmpPy1", ".py", tmpPyFileDir)
+            val pyFile2 = File.createTempFile("tmpPy2", ".egg", tmpPyFileDir)
+
+            val archive1 = File.createTempFile("archive1", ".zip", tmpArchiveDir)
+            val archive2 = File.createTempFile("archive2", ".zip", tmpArchiveDir)
+
+            val tempPyFile = File.createTempFile("tmpApp", ".py")
+            tempPyFile.deleteOnExit()
+
+            val args = Seq(
+              "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+              "--name", "testApp",
+              "--master", "yarn",
+              "--deploy-mode", "client",
+              "--jars", s"${tmpJarDir.getAbsolutePath}/*.jar",
+              "--files", s"${tmpFileDir.getAbsolutePath}/tmpFile*",
+              "--py-files", s"${tmpPyFileDir.getAbsolutePath}/tmpPy*",
+              "--archives", s"${tmpArchiveDir.getAbsolutePath}/*.zip",
+              tempPyFile.toURI().toString())
+
+            val appArgs = new SparkSubmitArguments(args)
+            val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+            conf.get("spark.yarn.dist.jars").split(",").toSet should be
+            (Set(jar1.toURI.toString, jar2.toURI.toString))
+            conf.get("spark.yarn.dist.files").split(",").toSet should be
+            (Set(file1.toURI.toString, file2.toURI.toString))
+            conf.get("spark.yarn.dist.pyFiles").split(",").toSet should be
+            (Set(pyFile1.getAbsolutePath, pyFile2.getAbsolutePath))
+            conf.get("spark.yarn.dist.archives").split(",").toSet should be
+            (Set(archive1.toURI.toString, archive2.toURI.toString))
+          }
+        }
+      }
+    }
   }
 
   // scalastyle:on println
@@ -985,37 +989,38 @@ class SparkSubmitSuite
     val hadoopConf = new Configuration()
     updateConfWithFakeS3Fs(hadoopConf)
 
-    val tmpDir = Utils.createTempDir()
-    val file = File.createTempFile("tmpFile", "", tmpDir)
-    val pyFile = File.createTempFile("tmpPy", ".egg", tmpDir)
-    val mainResource = File.createTempFile("tmpPy", ".py", tmpDir)
-    val tmpJar = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpDir)
-    val tmpJarPath = s"s3a://${new File(tmpJar.toURI).getAbsolutePath}"
+    withTempDir { tmpDir =>
+      val file = File.createTempFile("tmpFile", "", tmpDir)
+      val pyFile = File.createTempFile("tmpPy", ".egg", tmpDir)
+      val mainResource = File.createTempFile("tmpPy", ".py", tmpDir)
+      val tmpJar = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpDir)
+      val tmpJarPath = s"s3a://${new File(tmpJar.toURI).getAbsolutePath}"
 
-    val args = Seq(
-      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
-      "--name", "testApp",
-      "--master", "yarn",
-      "--deploy-mode", "client",
-      "--jars", tmpJarPath,
-      "--files", s"s3a://${file.getAbsolutePath}",
-      "--py-files", s"s3a://${pyFile.getAbsolutePath}",
-      s"s3a://$mainResource"
+      val args = Seq(
+        "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+        "--name", "testApp",
+        "--master", "yarn",
+        "--deploy-mode", "client",
+        "--jars", tmpJarPath,
+        "--files", s"s3a://${file.getAbsolutePath}",
+        "--py-files", s"s3a://${pyFile.getAbsolutePath}",
+        s"s3a://$mainResource"
       )
 
-    val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
+      val appArgs = new SparkSubmitArguments(args)
+      val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
 
-    // All the resources should still be remote paths, so that YARN client will not upload again.
-    conf.get("spark.yarn.dist.jars") should be (tmpJarPath)
-    conf.get("spark.yarn.dist.files") should be (s"s3a://${file.getAbsolutePath}")
-    conf.get("spark.yarn.dist.pyFiles") should be (s"s3a://${pyFile.getAbsolutePath}")
+      // All the resources should still be remote paths, so that YARN client will not upload again.
+      conf.get("spark.yarn.dist.jars") should be(tmpJarPath)
+      conf.get("spark.yarn.dist.files") should be(s"s3a://${file.getAbsolutePath}")
+      conf.get("spark.yarn.dist.pyFiles") should be(s"s3a://${pyFile.getAbsolutePath}")
 
-    // Local repl jars should be a local path.
-    conf.get("spark.repl.local.jars") should (startWith("file:"))
+      // Local repl jars should be a local path.
+      conf.get("spark.repl.local.jars") should (startWith("file:"))
 
-    // local py files should not be a URI format.
-    conf.get("spark.submit.pyFiles") should (startWith("/"))
+      // local py files should not be a URI format.
+      conf.get("spark.submit.pyFiles") should (startWith("/"))
+    }
   }
 
   test("download remote resource if it is not supported by yarn service") {
@@ -1095,18 +1100,13 @@ class SparkSubmitSuite
   }
 
   private def forConfDir(defaults: Map[String, String]) (f: String => Unit) = {
-    val tmpDir = Utils.createTempDir()
-
-    val defaultsConf = new File(tmpDir.getAbsolutePath, "spark-defaults.conf")
-    val writer = new OutputStreamWriter(new FileOutputStream(defaultsConf), StandardCharsets.UTF_8)
-    for ((key, value) <- defaults) writer.write(s"$key $value\n")
-
-    writer.close()
-
-    try {
+    withTempDir { tmpDir =>
+      val defaultsConf = new File(tmpDir.getAbsolutePath, "spark-defaults.conf")
+      val writer =
+        new OutputStreamWriter(new FileOutputStream(defaultsConf), StandardCharsets.UTF_8)
+      for ((key, value) <- defaults) writer.write(s"$key $value\n")
+      writer.close()
       f(tmpDir.getAbsolutePath)
-    } finally {
-      Utils.deleteRecursively(tmpDir)
     }
   }
 
@@ -1134,39 +1134,40 @@ class SparkSubmitSuite
     val hadoopConf = new Configuration()
     updateConfWithFakeS3Fs(hadoopConf)
 
-    val tmpDir = Utils.createTempDir()
-    val pyFile = File.createTempFile("tmpPy", ".egg", tmpDir)
+    withTempDir { tmpDir =>
+      val pyFile = File.createTempFile("tmpPy", ".egg", tmpDir)
 
-    val args = Seq(
-      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
-      "--name", "testApp",
-      "--master", "yarn",
-      "--deploy-mode", "client",
-      "--py-files", s"s3a://${pyFile.getAbsolutePath}",
-      "spark-internal"
-    )
+      val args = Seq(
+        "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+        "--name", "testApp",
+        "--master", "yarn",
+        "--deploy-mode", "client",
+        "--py-files", s"s3a://${pyFile.getAbsolutePath}",
+        "spark-internal"
+      )
 
-    val appArgs = new SparkSubmitArguments(args)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
+      val appArgs = new SparkSubmitArguments(args)
+      val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
 
-    conf.get(PY_FILES.key) should be (s"s3a://${pyFile.getAbsolutePath}")
-    conf.get("spark.submit.pyFiles") should (startWith("/"))
+      conf.get(PY_FILES.key) should be(s"s3a://${pyFile.getAbsolutePath}")
+      conf.get("spark.submit.pyFiles") should (startWith("/"))
 
-    // Verify "spark.submit.pyFiles"
-    val args1 = Seq(
-      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
-      "--name", "testApp",
-      "--master", "yarn",
-      "--deploy-mode", "client",
-      "--conf", s"spark.submit.pyFiles=s3a://${pyFile.getAbsolutePath}",
-      "spark-internal"
-    )
+      // Verify "spark.submit.pyFiles"
+      val args1 = Seq(
+        "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+        "--name", "testApp",
+        "--master", "yarn",
+        "--deploy-mode", "client",
+        "--conf", s"spark.submit.pyFiles=s3a://${pyFile.getAbsolutePath}",
+        "spark-internal"
+      )
 
-    val appArgs1 = new SparkSubmitArguments(args1)
-    val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1, conf = Some(hadoopConf))
+      val appArgs1 = new SparkSubmitArguments(args1)
+      val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1, conf = Some(hadoopConf))
 
-    conf1.get(PY_FILES.key) should be (s"s3a://${pyFile.getAbsolutePath}")
-    conf1.get("spark.submit.pyFiles") should (startWith("/"))
+      conf1.get(PY_FILES.key) should be(s"s3a://${pyFile.getAbsolutePath}")
+      conf1.get("spark.submit.pyFiles") should (startWith("/"))
+    }
   }
 
   test("handles natural line delimiters in --properties-file and --conf uniformly") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 527c654a7cd6..c1ae27aa940f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -767,53 +767,54 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
   }
 
   test("clean up stale app information") {
-    val storeDir = Utils.createTempDir()
-    val conf = createTestConf().set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
-    val clock = new ManualClock()
-    val provider = spy(new FsHistoryProvider(conf, clock))
-    val appId = "new1"
-
-    // Write logs for two app attempts.
-    clock.advance(1)
-    val attempt1 = newLogFile(appId, Some("1"), inProgress = false)
-    writeFile(attempt1, true, None,
-      SparkListenerApplicationStart(appId, Some(appId), 1L, "test", Some("1")),
-      SparkListenerJobStart(0, 1L, Nil, null),
-      SparkListenerApplicationEnd(5L)
+    withTempDir { storeDir =>
+      val conf = createTestConf().set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
+      val clock = new ManualClock()
+      val provider = spy(new FsHistoryProvider(conf, clock))
+      val appId = "new1"
+
+      // Write logs for two app attempts.
+      clock.advance(1)
+      val attempt1 = newLogFile(appId, Some("1"), inProgress = false)
+      writeFile(attempt1, true, None,
+        SparkListenerApplicationStart(appId, Some(appId), 1L, "test", Some("1")),
+        SparkListenerJobStart(0, 1L, Nil, null),
+        SparkListenerApplicationEnd(5L)
       )
-    val attempt2 = newLogFile(appId, Some("2"), inProgress = false)
-    writeFile(attempt2, true, None,
-      SparkListenerApplicationStart(appId, Some(appId), 1L, "test", Some("2")),
-      SparkListenerJobStart(0, 1L, Nil, null),
-      SparkListenerApplicationEnd(5L)
+      val attempt2 = newLogFile(appId, Some("2"), inProgress = false)
+      writeFile(attempt2, true, None,
+        SparkListenerApplicationStart(appId, Some(appId), 1L, "test", Some("2")),
+        SparkListenerJobStart(0, 1L, Nil, null),
+        SparkListenerApplicationEnd(5L)
       )
-    updateAndCheck(provider) { list =>
-      assert(list.size === 1)
-      assert(list(0).id === appId)
-      assert(list(0).attempts.size === 2)
-    }
-
-    // Load the app's UI.
-    val ui = provider.getAppUI(appId, Some("1"))
-    assert(ui.isDefined)
-
-    // Delete the underlying log file for attempt 1 and rescan. The UI should go away, but since
-    // attempt 2 still exists, listing data should be there.
-    clock.advance(1)
-    attempt1.delete()
-    updateAndCheck(provider) { list =>
-      assert(list.size === 1)
-      assert(list(0).id === appId)
-      assert(list(0).attempts.size === 1)
-    }
-    assert(!ui.get.valid)
-    assert(provider.getAppUI(appId, None) === None)
+      updateAndCheck(provider) { list =>
+        assert(list.size === 1)
+        assert(list(0).id === appId)
+        assert(list(0).attempts.size === 2)
+      }
 
-    // Delete the second attempt's log file. Now everything should go away.
-    clock.advance(1)
-    attempt2.delete()
-    updateAndCheck(provider) { list =>
-      assert(list.isEmpty)
+      // Load the app's UI.
+      val ui = provider.getAppUI(appId, Some("1"))
+      assert(ui.isDefined)
+
+      // Delete the underlying log file for attempt 1 and rescan. The UI should go away, but since
+      // attempt 2 still exists, listing data should be there.
+      clock.advance(1)
+      attempt1.delete()
+      updateAndCheck(provider) { list =>
+        assert(list.size === 1)
+        assert(list(0).id === appId)
+        assert(list(0).attempts.size === 1)
+      }
+      assert(!ui.get.valid)
+      assert(provider.getAppUI(appId, None) === None)
+
+      // Delete the second attempt's log file. Now everything should go away.
+      clock.advance(1)
+      attempt2.delete()
+      updateAndCheck(provider) { list =>
+        assert(list.isEmpty)
+      }
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
index 37954826af90..e89733a144cf 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
@@ -41,18 +41,14 @@ class HistoryServerArgumentsSuite extends SparkFunSuite {
   }
 
   test("Properties File Arguments Parsing --properties-file") {
-    val tmpDir = Utils.createTempDir()
-    val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
-    try {
+    withTempDir { tmpDir =>
+      val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
       Files.write("spark.test.CustomPropertyA blah\n" +
         "spark.test.CustomPropertyB notblah\n", outFile, UTF_8)
       val argStrings = Array("--properties-file", outFile.getAbsolutePath)
       val hsa = new HistoryServerArguments(conf, argStrings)
       assert(conf.get("spark.test.CustomPropertyA") === "blah")
       assert(conf.get("spark.test.CustomPropertyB") === "notblah")
-    } finally {
-      Utils.deleteRecursively(tmpDir)
     }
   }
-
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
index 62fe0eaedfd2..30278655dbe0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
@@ -31,14 +31,11 @@ import org.apache.spark.util.Utils
 class PersistenceEngineSuite extends SparkFunSuite {
 
   test("FileSystemPersistenceEngine") {
-    val dir = Utils.createTempDir()
-    try {
+    withTempDir { dir =>
       val conf = new SparkConf()
       testPersistenceEngine(conf, serializer =>
         new FileSystemPersistenceEngine(dir.getAbsolutePath, serializer)
       )
-    } finally {
-      Utils.deleteRecursively(dir)
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
index 817dc082b7d3..576ca1613f75 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
@@ -59,9 +59,7 @@ class WholeTextFileInputFormatSuite extends SparkFunSuite with BeforeAndAfterAll
 
   test("for small files minimum split size per node and per rack should be less than or equal to " +
     "maximum split size.") {
-    var dir : File = null;
-    try {
-      dir = Utils.createTempDir()
+    withTempDir { dir =>
       logInfo(s"Local disk address is ${dir.toString}.")
 
       // Set the minsize per node and rack to be larger than the size of the input file.
@@ -75,8 +73,6 @@ class WholeTextFileInputFormatSuite extends SparkFunSuite with BeforeAndAfterAll
       }
       // ensure spark job runs successfully without exceptions from the CombineFileInputFormat
       assert(sc.wholeTextFiles(dir.toString).count == 3)
-    } finally {
-      Utils.deleteRecursively(dir)
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index ddf73d637063..47552916adb2 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -89,52 +89,50 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite with BeforeAndAfterAl
    *   3) Does the contents be the same.
    */
   test("Correctness of WholeTextFileRecordReader.") {
-    val dir = Utils.createTempDir()
-    logInfo(s"Local disk address is ${dir.toString}.")
+    withTempDir { dir =>
+      logInfo(s"Local disk address is ${dir.toString}.")
 
-    WholeTextFileRecordReaderSuite.files.foreach { case (filename, contents) =>
-      createNativeFile(dir, filename, contents, false)
-    }
+      WholeTextFileRecordReaderSuite.files.foreach { case (filename, contents) =>
+        createNativeFile(dir, filename, contents, false)
+      }
 
-    val res = sc.wholeTextFiles(dir.toString, 3).collect()
+      val res = sc.wholeTextFiles(dir.toString, 3).collect()
 
-    assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
-      "Number of files read out does not fit with the actual value.")
+      assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
+        "Number of files read out does not fit with the actual value.")
 
-    for ((filename, contents) <- res) {
-      val shortName = filename.split('/').last
-      assert(WholeTextFileRecordReaderSuite.fileNames.contains(shortName),
-        s"Missing file name $filename.")
-      assert(contents === new Text(WholeTextFileRecordReaderSuite.files(shortName)).toString,
-        s"file $filename contents can not match.")
+      for ((filename, contents) <- res) {
+        val shortName = filename.split('/').last
+        assert(WholeTextFileRecordReaderSuite.fileNames.contains(shortName),
+          s"Missing file name $filename.")
+        assert(contents === new Text(WholeTextFileRecordReaderSuite.files(shortName)).toString,
+          s"file $filename contents can not match.")
+      }
     }
-
-    Utils.deleteRecursively(dir)
   }
 
   test("Correctness of WholeTextFileRecordReader with GzipCodec.") {
-    val dir = Utils.createTempDir()
-    logInfo(s"Local disk address is ${dir.toString}.")
+    withTempDir { dir =>
+      logInfo(s"Local disk address is ${dir.toString}.")
 
-    WholeTextFileRecordReaderSuite.files.foreach { case (filename, contents) =>
-      createNativeFile(dir, filename, contents, true)
-    }
+      WholeTextFileRecordReaderSuite.files.foreach { case (filename, contents) =>
+        createNativeFile(dir, filename, contents, true)
+      }
 
-    val res = sc.wholeTextFiles(dir.toString, 3).collect()
+      val res = sc.wholeTextFiles(dir.toString, 3).collect()
 
-    assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
-      "Number of files read out does not fit with the actual value.")
+      assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
+        "Number of files read out does not fit with the actual value.")
 
-    for ((filename, contents) <- res) {
-      val shortName = filename.split('/').last.split('.')(0)
+      for ((filename, contents) <- res) {
+        val shortName = filename.split('/').last.split('.')(0)
 
-      assert(WholeTextFileRecordReaderSuite.fileNames.contains(shortName),
-        s"Missing file name $filename.")
-      assert(contents === new Text(WholeTextFileRecordReaderSuite.files(shortName)).toString,
-        s"file $filename contents can not match.")
+        assert(WholeTextFileRecordReaderSuite.fileNames.contains(shortName),
+          s"Missing file name $filename.")
+        assert(contents === new Text(WholeTextFileRecordReaderSuite.files(shortName)).toString,
+          s"file $filename contents can not match.")
+      }
     }
-
-    Utils.deleteRecursively(dir)
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 0ec359d1c94f..945b09441ea9 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -470,15 +470,12 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("zero-partition RDD") {
-    val emptyDir = Utils.createTempDir()
-    try {
+    withTempDir { emptyDir =>
       val file = sc.textFile(emptyDir.getAbsolutePath)
       assert(file.partitions.isEmpty)
       assert(file.collect().toList === Nil)
       // Test that a shuffle on the file works, because this used to be a bug
       assert(file.map(line => (line, 1)).reduceByKey(_ + _).collect().toList === Nil)
-    } finally {
-      Utils.deleteRecursively(emptyDir)
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index a799b1cfb076..5cb2b561d6bc 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -822,63 +822,66 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
   }
 
   test("file server") {
-    val conf = new SparkConf()
-    val tempDir = Utils.createTempDir()
-    val file = new File(tempDir, "file")
-    Files.write(UUID.randomUUID().toString(), file, UTF_8)
-    val fileWithSpecialChars = new File(tempDir, "file name")
-    Files.write(UUID.randomUUID().toString(), fileWithSpecialChars, UTF_8)
-    val empty = new File(tempDir, "empty")
-    Files.write("", empty, UTF_8);
-    val jar = new File(tempDir, "jar")
-    Files.write(UUID.randomUUID().toString(), jar, UTF_8)
-
-    val dir1 = new File(tempDir, "dir1")
-    assert(dir1.mkdir())
-    val subFile1 = new File(dir1, "file1")
-    Files.write(UUID.randomUUID().toString(), subFile1, UTF_8)
-
-    val dir2 = new File(tempDir, "dir2")
-    assert(dir2.mkdir())
-    val subFile2 = new File(dir2, "file2")
-    Files.write(UUID.randomUUID().toString(), subFile2, UTF_8)
-
-    val fileUri = env.fileServer.addFile(file)
-    val fileWithSpecialCharsUri = env.fileServer.addFile(fileWithSpecialChars)
-    val emptyUri = env.fileServer.addFile(empty)
-    val jarUri = env.fileServer.addJar(jar)
-    val dir1Uri = env.fileServer.addDirectory("/dir1", dir1)
-    val dir2Uri = env.fileServer.addDirectory("/dir2", dir2)
-
-    // Try registering directories with invalid names.
-    Seq("/files", "/jars").foreach { uri =>
-      intercept[IllegalArgumentException] {
-        env.fileServer.addDirectory(uri, dir1)
-      }
-    }
+    withTempDir { tempDir =>
+      withTempDir { destDir =>
+        val conf = new SparkConf()
+
+        val file = new File(tempDir, "file")
+        Files.write(UUID.randomUUID().toString(), file, UTF_8)
+        val fileWithSpecialChars = new File(tempDir, "file name")
+        Files.write(UUID.randomUUID().toString(), fileWithSpecialChars, UTF_8)
+        val empty = new File(tempDir, "empty")
+        Files.write("", empty, UTF_8);
+        val jar = new File(tempDir, "jar")
+        Files.write(UUID.randomUUID().toString(), jar, UTF_8)
+
+        val dir1 = new File(tempDir, "dir1")
+        assert(dir1.mkdir())
+        val subFile1 = new File(dir1, "file1")
+        Files.write(UUID.randomUUID().toString(), subFile1, UTF_8)
+
+        val dir2 = new File(tempDir, "dir2")
+        assert(dir2.mkdir())
+        val subFile2 = new File(dir2, "file2")
+        Files.write(UUID.randomUUID().toString(), subFile2, UTF_8)
+
+        val fileUri = env.fileServer.addFile(file)
+        val fileWithSpecialCharsUri = env.fileServer.addFile(fileWithSpecialChars)
+        val emptyUri = env.fileServer.addFile(empty)
+        val jarUri = env.fileServer.addJar(jar)
+        val dir1Uri = env.fileServer.addDirectory("/dir1", dir1)
+        val dir2Uri = env.fileServer.addDirectory("/dir2", dir2)
+
+        // Try registering directories with invalid names.
+        Seq("/files", "/jars").foreach { uri =>
+          intercept[IllegalArgumentException] {
+            env.fileServer.addDirectory(uri, dir1)
+          }
+        }
 
-    val destDir = Utils.createTempDir()
-    val sm = new SecurityManager(conf)
-    val hc = SparkHadoopUtil.get.conf
-
-    val files = Seq(
-      (file, fileUri),
-      (fileWithSpecialChars, fileWithSpecialCharsUri),
-      (empty, emptyUri),
-      (jar, jarUri),
-      (subFile1, dir1Uri + "/file1"),
-      (subFile2, dir2Uri + "/file2"))
-    files.foreach { case (f, uri) =>
-      val destFile = new File(destDir, f.getName())
-      Utils.fetchFile(uri, destDir, conf, sm, hc, 0L, false)
-      assert(Files.equal(f, destFile))
-    }
+        val sm = new SecurityManager(conf)
+        val hc = SparkHadoopUtil.get.conf
+
+        val files = Seq(
+          (file, fileUri),
+          (fileWithSpecialChars, fileWithSpecialCharsUri),
+          (empty, emptyUri),
+          (jar, jarUri),
+          (subFile1, dir1Uri + "/file1"),
+          (subFile2, dir2Uri + "/file2"))
+        files.foreach { case (f, uri) =>
+          val destFile = new File(destDir, f.getName())
+          Utils.fetchFile(uri, destDir, conf, sm, hc, 0L, false)
+          assert(Files.equal(f, destFile))
+        }
 
-    // Try to download files that do not exist.
-    Seq("files", "jars", "dir1").foreach { root =>
-      intercept[Exception] {
-        val uri = env.address.toSparkURL + s"/$root/doesNotExist"
-        Utils.fetchFile(uri, destDir, conf, sm, hc, 0L, false)
+        // Try to download files that do not exist.
+        Seq("files", "jars", "dir1").foreach { root =>
+          intercept[Exception] {
+            val uri = env.address.toSparkURL + s"/$root/doesNotExist"
+            Utils.fetchFile(uri, destDir, conf, sm, hc, 0L, false)
+          }
+        }
       }
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 5f4ffa151d19..ed6a3d93b312 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -2831,18 +2831,22 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   test("SPARK-23207: reliable checkpoint can avoid rollback (checkpointed before)") {
-    sc.setCheckpointDir(Utils.createTempDir().getCanonicalPath)
-    val shuffleMapRdd = new MyCheckpointRDD(sc, 2, Nil, indeterminate = true)
-    shuffleMapRdd.checkpoint()
-    shuffleMapRdd.doCheckpoint()
-    assertResultStageNotRollbacked(shuffleMapRdd)
+    withTempDir { dir =>
+      sc.setCheckpointDir(dir.getCanonicalPath)
+      val shuffleMapRdd = new MyCheckpointRDD(sc, 2, Nil, indeterminate = true)
+      shuffleMapRdd.checkpoint()
+      shuffleMapRdd.doCheckpoint()
+      assertResultStageNotRollbacked(shuffleMapRdd)
+    }
   }
 
   test("SPARK-23207: reliable checkpoint fail to rollback (checkpointing now)") {
-    sc.setCheckpointDir(Utils.createTempDir().getCanonicalPath)
-    val shuffleMapRdd = new MyCheckpointRDD(sc, 2, Nil, indeterminate = true)
-    shuffleMapRdd.checkpoint()
-    assertResultStageFailToRollback(shuffleMapRdd)
+    withTempDir { dir =>
+      sc.setCheckpointDir(dir.getCanonicalPath)
+      val shuffleMapRdd = new MyCheckpointRDD(sc, 2, Nil, indeterminate = true)
+      shuffleMapRdd.checkpoint()
+      assertResultStageFailToRollback(shuffleMapRdd)
+    }
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
index d6ff5bb33055..848f70293553 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
@@ -49,11 +49,8 @@ class OutputCommitCoordinatorIntegrationSuite
   test("exception thrown in OutputCommitter.commitTask()") {
     // Regression test for SPARK-10381
     failAfter(Span(60, Seconds)) {
-      val tempDir = Utils.createTempDir()
-      try {
+      withTempDir { tempDir =>
         sc.parallelize(1 to 4, 2).map(_.toString).saveAsTextFile(tempDir.getAbsolutePath + "/out")
-      } finally {
-        Utils.deleteRecursively(tempDir)
       }
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index a7eed4b6a8b8..467e49026a02 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -369,27 +369,27 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("SPARK-12222: deserialize RoaringBitmap throw Buffer underflow exception") {
-    val dir = Utils.createTempDir()
-    val tmpfile = dir.toString + "/RoaringBitmap"
-    val outStream = new FileOutputStream(tmpfile)
-    val output = new KryoOutput(outStream)
-    val bitmap = new RoaringBitmap
-    bitmap.add(1)
-    bitmap.add(3)
-    bitmap.add(5)
-    // Ignore Kryo because it doesn't use writeObject
-    bitmap.serialize(new KryoOutputObjectOutputBridge(null, output))
-    output.flush()
-    output.close()
-
-    val inStream = new FileInputStream(tmpfile)
-    val input = new KryoInput(inStream)
-    val ret = new RoaringBitmap
-    // Ignore Kryo because it doesn't use readObject
-    ret.deserialize(new KryoInputObjectInputBridge(null, input))
-    input.close()
-    assert(ret == bitmap)
-    Utils.deleteRecursively(dir)
+    withTempDir { dir =>
+      val tmpfile = dir.toString + "/RoaringBitmap"
+      val outStream = new FileOutputStream(tmpfile)
+      val output = new KryoOutput(outStream)
+      val bitmap = new RoaringBitmap
+      bitmap.add(1)
+      bitmap.add(3)
+      bitmap.add(5)
+      // Ignore Kryo because it doesn't use writeObject
+      bitmap.serialize(new KryoOutputObjectOutputBridge(null, output))
+      output.flush()
+      output.close()
+
+      val inStream = new FileInputStream(tmpfile)
+      val input = new KryoInput(inStream)
+      val ret = new RoaringBitmap
+      // Ignore Kryo because it doesn't use readObject
+      ret.deserialize(new KryoInputObjectInputBridge(null, input))
+      input.close()
+      assert(ret == bitmap)
+    }
   }
 
   test("KryoOutputObjectOutputBridge.writeObject and KryoInputObjectInputBridge.readObject") {
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
index eec961a49110..959cf58fa053 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
@@ -132,7 +132,6 @@ class DiskStoreSuite extends SparkFunSuite {
   }
 
   test("block data encryption") {
-    val testDir = Utils.createTempDir()
     val testData = new Array[Byte](128 * 1024)
     new Random().nextBytes(testData)
 
diff --git a/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala b/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
index f9e1b791c86e..e48f0014fbbd 100644
--- a/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
@@ -50,34 +50,34 @@ class PeriodicRDDCheckpointerSuite extends SparkFunSuite with SharedSparkContext
   }
 
   test("Checkpointing") {
-    val tempDir = Utils.createTempDir()
-    val path = tempDir.toURI.toString
-    val checkpointInterval = 2
-    var rddsToCheck = Seq.empty[RDDToCheck]
-    sc.setCheckpointDir(path)
-    val rdd1 = createRDD(sc)
-    val checkpointer = new PeriodicRDDCheckpointer[Double](checkpointInterval, rdd1.sparkContext)
-    checkpointer.update(rdd1)
-    rdd1.count()
-    rddsToCheck = rddsToCheck :+ RDDToCheck(rdd1, 1)
-    checkCheckpoint(rddsToCheck, 1, checkpointInterval)
-
-    var iteration = 2
-    while (iteration < 9) {
-      val rdd = createRDD(sc)
-      checkpointer.update(rdd)
-      rdd.count()
-      rddsToCheck = rddsToCheck :+ RDDToCheck(rdd, iteration)
-      checkCheckpoint(rddsToCheck, iteration, checkpointInterval)
-      iteration += 1
-    }
+    withTempDir { tempDir =>
+      val path = tempDir.toURI.toString
+      val checkpointInterval = 2
+      var rddsToCheck = Seq.empty[RDDToCheck]
+      sc.setCheckpointDir(path)
+      val rdd1 = createRDD(sc)
+      val checkpointer =
+        new PeriodicRDDCheckpointer[Double](checkpointInterval, rdd1.sparkContext)
+      checkpointer.update(rdd1)
+      rdd1.count()
+      rddsToCheck = rddsToCheck :+ RDDToCheck(rdd1, 1)
+      checkCheckpoint(rddsToCheck, 1, checkpointInterval)
+
+      var iteration = 2
+      while (iteration < 9) {
+        val rdd = createRDD(sc)
+        checkpointer.update(rdd)
+        rdd.count()
+        rddsToCheck = rddsToCheck :+ RDDToCheck(rdd, iteration)
+        checkCheckpoint(rddsToCheck, iteration, checkpointInterval)
+        iteration += 1
+      }
 
-    checkpointer.deleteAllCheckpoints()
-    rddsToCheck.foreach { rdd =>
-      confirmCheckpointRemoved(rdd.rdd)
+      checkpointer.deleteAllCheckpoints()
+      rddsToCheck.foreach { rdd =>
+        confirmCheckpointRemoved(rdd.rdd)
+      }
     }
-
-    Utils.deleteRecursively(tempDir)
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index f5e912b50d1a..901a724da8a1 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -295,31 +295,30 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
   private val workerConf = new SparkConf()
 
   def testOffsetBytes(isCompressed: Boolean): Unit = {
-    val tmpDir2 = Utils.createTempDir()
-    val suffix = getSuffix(isCompressed)
-    val f1Path = tmpDir2 + "/f1" + suffix
-    writeLogFile(f1Path, "1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(StandardCharsets.UTF_8))
-    val f1Length = Utils.getFileLength(new File(f1Path), workerConf)
+    withTempDir { tmpDir2 =>
+      val suffix = getSuffix(isCompressed)
+      val f1Path = tmpDir2 + "/f1" + suffix
+      writeLogFile(f1Path, "1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(StandardCharsets.UTF_8))
+      val f1Length = Utils.getFileLength(new File(f1Path), workerConf)
 
-    // Read first few bytes
-    assert(Utils.offsetBytes(f1Path, f1Length, 0, 5) === "1\n2\n3")
+      // Read first few bytes
+      assert(Utils.offsetBytes(f1Path, f1Length, 0, 5) === "1\n2\n3")
 
-    // Read some middle bytes
-    assert(Utils.offsetBytes(f1Path, f1Length, 4, 11) === "3\n4\n5\n6")
+      // Read some middle bytes
+      assert(Utils.offsetBytes(f1Path, f1Length, 4, 11) === "3\n4\n5\n6")
 
-    // Read last few bytes
-    assert(Utils.offsetBytes(f1Path, f1Length, 12, 18) === "7\n8\n9\n")
+      // Read last few bytes
+      assert(Utils.offsetBytes(f1Path, f1Length, 12, 18) === "7\n8\n9\n")
 
-    // Read some nonexistent bytes in the beginning
-    assert(Utils.offsetBytes(f1Path, f1Length, -5, 5) === "1\n2\n3")
+      // Read some nonexistent bytes in the beginning
+      assert(Utils.offsetBytes(f1Path, f1Length, -5, 5) === "1\n2\n3")
 
-    // Read some nonexistent bytes at the end
-    assert(Utils.offsetBytes(f1Path, f1Length, 12, 22) === "7\n8\n9\n")
+      // Read some nonexistent bytes at the end
+      assert(Utils.offsetBytes(f1Path, f1Length, 12, 22) === "7\n8\n9\n")
 
-    // Read some nonexistent bytes on both ends
-    assert(Utils.offsetBytes(f1Path, f1Length, -3, 25) === "1\n2\n3\n4\n5\n6\n7\n8\n9\n")
-
-    Utils.deleteRecursively(tmpDir2)
+      // Read some nonexistent bytes on both ends
+      assert(Utils.offsetBytes(f1Path, f1Length, -3, 25) === "1\n2\n3\n4\n5\n6\n7\n8\n9\n")
+    }
   }
 
   test("reading offset bytes of a file") {
@@ -331,41 +330,41 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
   }
 
   def testOffsetBytesMultipleFiles(isCompressed: Boolean): Unit = {
-    val tmpDir = Utils.createTempDir()
-    val suffix = getSuffix(isCompressed)
-    val files = (1 to 3).map(i => new File(tmpDir, i.toString + suffix)) :+ new File(tmpDir, "4")
-    writeLogFile(files(0).getAbsolutePath, "0123456789".getBytes(StandardCharsets.UTF_8))
-    writeLogFile(files(1).getAbsolutePath, "abcdefghij".getBytes(StandardCharsets.UTF_8))
-    writeLogFile(files(2).getAbsolutePath, "ABCDEFGHIJ".getBytes(StandardCharsets.UTF_8))
-    writeLogFile(files(3).getAbsolutePath, "9876543210".getBytes(StandardCharsets.UTF_8))
-    val fileLengths = files.map(Utils.getFileLength(_, workerConf))
-
-    // Read first few bytes in the 1st file
-    assert(Utils.offsetBytes(files, fileLengths, 0, 5) === "01234")
+    withTempDir { tmpDir =>
+      val suffix = getSuffix(isCompressed)
+      val files = (1 to 3).map(i =>
+        new File(tmpDir, i.toString + suffix)) :+ new File(tmpDir, "4")
+      writeLogFile(files(0).getAbsolutePath, "0123456789".getBytes(StandardCharsets.UTF_8))
+      writeLogFile(files(1).getAbsolutePath, "abcdefghij".getBytes(StandardCharsets.UTF_8))
+      writeLogFile(files(2).getAbsolutePath, "ABCDEFGHIJ".getBytes(StandardCharsets.UTF_8))
+      writeLogFile(files(3).getAbsolutePath, "9876543210".getBytes(StandardCharsets.UTF_8))
+      val fileLengths = files.map(Utils.getFileLength(_, workerConf))
 
-    // Read bytes within the 1st file
-    assert(Utils.offsetBytes(files, fileLengths, 5, 8) === "567")
+      // Read first few bytes in the 1st file
+      assert(Utils.offsetBytes(files, fileLengths, 0, 5) === "01234")
 
-    // Read bytes across 1st and 2nd file
-    assert(Utils.offsetBytes(files, fileLengths, 8, 18) === "89abcdefgh")
+      // Read bytes within the 1st file
+      assert(Utils.offsetBytes(files, fileLengths, 5, 8) === "567")
 
-    // Read bytes across 1st, 2nd and 3rd file
-    assert(Utils.offsetBytes(files, fileLengths, 5, 24) === "56789abcdefghijABCD")
+      // Read bytes across 1st and 2nd file
+      assert(Utils.offsetBytes(files, fileLengths, 8, 18) === "89abcdefgh")
 
-    // Read bytes across 3rd and 4th file
-    assert(Utils.offsetBytes(files, fileLengths, 25, 35) === "FGHIJ98765")
+      // Read bytes across 1st, 2nd and 3rd file
+      assert(Utils.offsetBytes(files, fileLengths, 5, 24) === "56789abcdefghijABCD")
 
-    // Read some nonexistent bytes in the beginning
-    assert(Utils.offsetBytes(files, fileLengths, -5, 18) === "0123456789abcdefgh")
+      // Read bytes across 3rd and 4th file
+      assert(Utils.offsetBytes(files, fileLengths, 25, 35) === "FGHIJ98765")
 
-    // Read some nonexistent bytes at the end
-    assert(Utils.offsetBytes(files, fileLengths, 18, 45) === "ijABCDEFGHIJ9876543210")
+      // Read some nonexistent bytes in the beginning
+      assert(Utils.offsetBytes(files, fileLengths, -5, 18) === "0123456789abcdefgh")
 
-    // Read some nonexistent bytes on both ends
-    assert(Utils.offsetBytes(files, fileLengths, -5, 45) ===
-      "0123456789abcdefghijABCDEFGHIJ9876543210")
+      // Read some nonexistent bytes at the end
+      assert(Utils.offsetBytes(files, fileLengths, 18, 45) === "ijABCDEFGHIJ9876543210")
 
-    Utils.deleteRecursively(tmpDir)
+      // Read some nonexistent bytes on both ends
+      assert(Utils.offsetBytes(files, fileLengths, -5, 45) ===
+        "0123456789abcdefghijABCDEFGHIJ9876543210")
+    }
   }
 
   test("reading offset bytes across multiple files") {
@@ -427,27 +426,28 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
 
   test("doesDirectoryContainFilesNewerThan") {
     // create some temporary directories and files
-    val parent: File = Utils.createTempDir()
-    // The parent directory has two child directories
-    val child1: File = Utils.createTempDir(parent.getCanonicalPath)
-    val child2: File = Utils.createTempDir(parent.getCanonicalPath)
-    val child3: File = Utils.createTempDir(child1.getCanonicalPath)
-    // set the last modified time of child1 to 30 secs old
-    child1.setLastModified(System.currentTimeMillis() - (1000 * 30))
-
-    // although child1 is old, child2 is still new so return true
-    assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5))
-
-    child2.setLastModified(System.currentTimeMillis - (1000 * 30))
-    assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5))
-
-    parent.setLastModified(System.currentTimeMillis - (1000 * 30))
-    // although parent and its immediate children are new, child3 is still old
-    // we expect a full recursive search for new files.
-    assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5))
-
-    child3.setLastModified(System.currentTimeMillis - (1000 * 30))
-    assert(!Utils.doesDirectoryContainAnyNewFiles(parent, 5))
+    withTempDir { parent =>
+      // The parent directory has two child directories
+      val child1: File = Utils.createTempDir(parent.getCanonicalPath)
+      val child2: File = Utils.createTempDir(parent.getCanonicalPath)
+      val child3: File = Utils.createTempDir(child1.getCanonicalPath)
+      // set the last modified time of child1 to 30 secs old
+      child1.setLastModified(System.currentTimeMillis() - (1000 * 30))
+
+      // although child1 is old, child2 is still new so return true
+      assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5))
+
+      child2.setLastModified(System.currentTimeMillis - (1000 * 30))
+      assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5))
+
+      parent.setLastModified(System.currentTimeMillis - (1000 * 30))
+      // although parent and its immediate children are new, child3 is still old
+      // we expect a full recursive search for new files.
+      assert(Utils.doesDirectoryContainAnyNewFiles(parent, 5))
+
+      child3.setLastModified(System.currentTimeMillis - (1000 * 30))
+      assert(!Utils.doesDirectoryContainAnyNewFiles(parent, 5))
+    }
   }
 
   test("resolveURI") {
@@ -608,9 +608,8 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
   }
 
   test("loading properties from file") {
-    val tmpDir = Utils.createTempDir()
-    val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
-    try {
+    withTempDir { tmpDir =>
+      val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
       System.setProperty("spark.test.fileNameLoadB", "2")
       Files.write("spark.test.fileNameLoadA true\n" +
         "spark.test.fileNameLoadB 1\n", outFile, StandardCharsets.UTF_8)
@@ -621,8 +620,6 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
       val sparkConf = new SparkConf
       assert(sparkConf.getBoolean("spark.test.fileNameLoadA", false) === true)
       assert(sparkConf.getInt("spark.test.fileNameLoadB", 1) === 2)
-    } finally {
-      Utils.deleteRecursively(tmpDir)
     }
   }
 
@@ -638,52 +635,53 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
   }
 
   test("fetch hcfs dir") {
-    val tempDir = Utils.createTempDir()
-    val sourceDir = new File(tempDir, "source-dir")
-    sourceDir.mkdir()
-    val innerSourceDir = Utils.createTempDir(root = sourceDir.getPath)
-    val sourceFile = File.createTempFile("someprefix", "somesuffix", innerSourceDir)
-    val targetDir = new File(tempDir, "target-dir")
-    Files.write("some text", sourceFile, StandardCharsets.UTF_8)
-
-    val path =
-      if (Utils.isWindows) {
-        new Path("file:/" + sourceDir.getAbsolutePath.replace("\\", "/"))
-      } else {
-        new Path("file://" + sourceDir.getAbsolutePath)
-      }
-    val conf = new Configuration()
-    val fs = Utils.getHadoopFileSystem(path.toString, conf)
+    withTempDir { tempDir =>
+      val sourceDir = new File(tempDir, "source-dir")
+      sourceDir.mkdir()
+      val innerSourceDir = Utils.createTempDir(root = sourceDir.getPath)
+      val sourceFile = File.createTempFile("someprefix", "somesuffix", innerSourceDir)
+      val targetDir = new File(tempDir, "target-dir")
+      Files.write("some text", sourceFile, StandardCharsets.UTF_8)
+
+      val path =
+        if (Utils.isWindows) {
+          new Path("file:/" + sourceDir.getAbsolutePath.replace("\\", "/"))
+        } else {
+          new Path("file://" + sourceDir.getAbsolutePath)
+        }
+      val conf = new Configuration()
+      val fs = Utils.getHadoopFileSystem(path.toString, conf)
 
-    assert(!targetDir.isDirectory())
-    Utils.fetchHcfsFile(path, targetDir, fs, new SparkConf(), conf, false)
-    assert(targetDir.isDirectory())
+      assert(!targetDir.isDirectory())
+      Utils.fetchHcfsFile(path, targetDir, fs, new SparkConf(), conf, false)
+      assert(targetDir.isDirectory())
 
-    // Copy again to make sure it doesn't error if the dir already exists.
-    Utils.fetchHcfsFile(path, targetDir, fs, new SparkConf(), conf, false)
+      // Copy again to make sure it doesn't error if the dir already exists.
+      Utils.fetchHcfsFile(path, targetDir, fs, new SparkConf(), conf, false)
 
-    val destDir = new File(targetDir, sourceDir.getName())
-    assert(destDir.isDirectory())
+      val destDir = new File(targetDir, sourceDir.getName())
+      assert(destDir.isDirectory())
 
-    val destInnerDir = new File(destDir, innerSourceDir.getName)
-    assert(destInnerDir.isDirectory())
+      val destInnerDir = new File(destDir, innerSourceDir.getName)
+      assert(destInnerDir.isDirectory())
 
-    val destInnerFile = new File(destInnerDir, sourceFile.getName)
-    assert(destInnerFile.isFile())
+      val destInnerFile = new File(destInnerDir, sourceFile.getName)
+      assert(destInnerFile.isFile())
 
-    val filePath =
-      if (Utils.isWindows) {
-        new Path("file:/" + sourceFile.getAbsolutePath.replace("\\", "/"))
-      } else {
-        new Path("file://" + sourceFile.getAbsolutePath)
-      }
-    val testFileDir = new File(tempDir, "test-filename")
-    val testFileName = "testFName"
-    val testFilefs = Utils.getHadoopFileSystem(filePath.toString, conf)
-    Utils.fetchHcfsFile(filePath, testFileDir, testFilefs, new SparkConf(),
-                        conf, false, Some(testFileName))
-    val newFileName = new File(testFileDir, testFileName)
-    assert(newFileName.isFile())
+      val filePath =
+        if (Utils.isWindows) {
+          new Path("file:/" + sourceFile.getAbsolutePath.replace("\\", "/"))
+        } else {
+          new Path("file://" + sourceFile.getAbsolutePath)
+        }
+      val testFileDir = new File(tempDir, "test-filename")
+      val testFileName = "testFName"
+      val testFilefs = Utils.getHadoopFileSystem(filePath.toString, conf)
+      Utils.fetchHcfsFile(filePath, testFileDir, testFilefs, new SparkConf(),
+        conf, false, Some(testFileName))
+      val newFileName = new File(testFileDir, testFileName)
+      assert(newFileName.isFile())
+    }
   }
 
   test("shutdown hook manager") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 23419493e536..85963ec4ca69 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -66,6 +66,17 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with
     }
   }
 
+  /**
+   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   */
+  protected override def withTempDir(f: File => Unit): Unit = {
+    super.withTempDir { dir =>
+      f(dir)
+      waitForTasksToFinish()
+    }
+  }
+
   /**
    * A helper function for turning off/on codegen.
    */
@@ -143,43 +154,6 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with
       test(name) { runOnThread() }
     }
   }
-}
-
-/**
- * Helper trait that can be extended by all external SQL test suites.
- *
- * This allows subclasses to plugin a custom `SQLContext`.
- * To use implicit methods, import `testImplicits._` instead of through the `SQLContext`.
- *
- * Subclasses should *not* create `SQLContext`s in the test suite constructor, which is
- * prone to leaving multiple overlapping [[org.apache.spark.SparkContext]]s in the same JVM.
- */
-private[sql] trait SQLTestUtilsBase
-  extends Eventually
-  with BeforeAndAfterAll
-  with SQLTestData
-  with PlanTestBase { self: Suite =>
-
-  protected def sparkContext = spark.sparkContext
-
-  // Shorthand for running a query using our SQLContext
-  protected lazy val sql = spark.sql _
-
-  /**
-   * A helper object for importing SQL implicits.
-   *
-   * Note that the alternative of importing `spark.implicits._` is not possible here.
-   * This is because we create the `SQLContext` immediately before the first test is run,
-   * but the implicits import is needed in the constructor.
-   */
-  protected object testImplicits extends SQLImplicits {
-    protected override def _sqlContext: SQLContext = self.spark.sqlContext
-  }
-
-  protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
-    SparkSession.setActiveSession(spark)
-    super.withSQLConf(pairs: _*)(f)
-  }
 
   /**
    * Copy file in jar's resource to a temp file, then pass it to `f`.
@@ -206,21 +180,6 @@ private[sql] trait SQLTestUtilsBase
     }
   }
 
-  /**
-   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
-   * returns.
-   *
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withTempDir(f: File => Unit): Unit = {
-    val dir = Utils.createTempDir().getCanonicalFile
-    try f(dir) finally {
-      // wait for all tasks to finish before deleting files
-      waitForTasksToFinish()
-      Utils.deleteRecursively(dir)
-    }
-  }
-
   /**
    * Creates the specified number of temporary directories, which is then passed to `f` and will be
    * deleted after `f` returns.
@@ -233,6 +192,43 @@ private[sql] trait SQLTestUtilsBase
       files.foreach(Utils.deleteRecursively)
     }
   }
+}
+
+/**
+ * Helper trait that can be extended by all external SQL test suites.
+ *
+ * This allows subclasses to plugin a custom `SQLContext`.
+ * To use implicit methods, import `testImplicits._` instead of through the `SQLContext`.
+ *
+ * Subclasses should *not* create `SQLContext`s in the test suite constructor, which is
+ * prone to leaving multiple overlapping [[org.apache.spark.SparkContext]]s in the same JVM.
+ */
+private[sql] trait SQLTestUtilsBase
+  extends Eventually
+  with BeforeAndAfterAll
+  with SQLTestData
+  with PlanTestBase { self: Suite =>
+
+  protected def sparkContext = spark.sparkContext
+
+  // Shorthand for running a query using our SQLContext
+  protected lazy val sql = spark.sql _
+
+  /**
+   * A helper object for importing SQL implicits.
+   *
+   * Note that the alternative of importing `spark.implicits._` is not possible here.
+   * This is because we create the `SQLContext` immediately before the first test is run,
+   * but the implicits import is needed in the constructor.
+   */
+  protected object testImplicits extends SQLImplicits {
+    protected override def _sqlContext: SQLContext = self.spark.sqlContext
+  }
+
+  protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    SparkSession.setActiveSession(spark)
+    super.withSQLConf(pairs: _*)(f)
+  }
 
   /**
    * Drops functions after calling `f`. A function is represented by (functionName, isTemporary).
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index dc96ec416afd..218bd18e5dc9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -55,15 +55,6 @@ class VersionsSuite extends SparkFunSuite with Logging {
 
   import HiveClientBuilder.buildClient
 
-  /**
-   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
-   * returns.
-   */
-  protected def withTempDir(f: File => Unit): Unit = {
-    val dir = Utils.createTempDir().getCanonicalFile
-    try f(dir) finally Utils.deleteRecursively(dir)
-  }
-
   /**
    * Drops table `tableName` after calling `f`.
    */
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index ada494eb897f..6a0f523e4b49 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -557,16 +557,4 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       verifyOutput[W](output.toSeq, expectedOutput, useSet)
     }
   }
-
-  /**
-   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
-   * returns.
-   * (originally from `SqlTestUtils`.)
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withTempDir(f: File => Unit): Unit = {
-    val dir = Utils.createTempDir().getCanonicalFile
-    try f(dir) finally Utils.deleteRecursively(dir)
-  }
-
 }

From 1abfbda7eb9bd855d70ba64fc137ecc101e1d8b0 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sat, 1 Dec 2018 07:06:18 -0600
Subject: [PATCH 0039/1072] [SPARK-26212][BUILD][TEST-MAVEN] Upgrade maven
 version to 3.6.0

## What changes were proposed in this pull request?

This PR updates maven version from 3.5.4 to 3.6.0. The release note of the 3.6.0 is [here](https://maven.apache.org/docs/3.6.0/release-notes.html).

From [the release note of the 3.6.0](https://maven.apache.org/docs/3.6.0/release-notes.html), the followings are new features:
1. There had been issues related to the project discoverytime which has been increased in previous version which influenced some of our users.
1. The output in the reactor summary has been improved.
1. There was an issue related to the classpath ordering.

## How was this patch tested?

Existing tests

Closes #23177 from kiszk/SPARK-26212.

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/appveyor-install-dependencies.ps1 | 2 +-
 docs/building-spark.md                | 2 +-
 pom.xml                               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index cc68ffb90d87..7c7bdd623477 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -81,7 +81,7 @@ if (!(Test-Path $tools)) {
 # ========================== Maven
 Push-Location $tools
 
-$mavenVer = "3.5.4"
+$mavenVer = "3.6.0"
 Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip"
 
 # extract
diff --git a/docs/building-spark.md b/docs/building-spark.md
index dfcd53c48e85..55695f35931c 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -12,7 +12,7 @@ redirect_from: "building-with-maven.html"
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.5.4 and Java 8.
+Building Spark using Maven requires Maven 3.6.0 and Java 8.
 Note that support for Java 7 was removed as of Spark 2.2.0.
 
 ### Setting up Maven's Memory Usage
diff --git a/pom.xml b/pom.xml
index dfc3c540dc18..61321a145070 100644
--- a/pom.xml
+++ b/pom.xml
@@ -114,7 +114,7 @@
     <java.version>1.8</java.version>
     <maven.compiler.source>${java.version}</maven.compiler.source>
     <maven.compiler.target>${java.version}</maven.compiler.target>
-    <maven.version>3.5.4</maven.version>
+    <maven.version>3.6.0</maven.version>
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.16</slf4j.version>
     <log4j.version>1.2.17</log4j.version>

From 60e4239a1e3506d342099981b6e3b3b8431a203e Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Sat, 1 Dec 2018 07:11:31 -0600
Subject: [PATCH 0040/1072] [MINOR][DOC] Correct some document description
 errors

## What changes were proposed in this pull request?

Correct some document description errors.

## How was this patch tested?
N/A

Closes #23162 from 10110346/docerror.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/internal/config/package.scala     | 10 +++++-----
 .../scala/org/apache/spark/sql/internal/SQLConf.scala  |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 9cc48f637500..646b3881a79b 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -281,7 +281,7 @@ package object config {
   private[spark] val LISTENER_BUS_EVENT_QUEUE_CAPACITY =
     ConfigBuilder("spark.scheduler.listenerbus.eventqueue.capacity")
       .intConf
-      .checkValue(_ > 0, "The capacity of listener bus event queue must not be negative")
+      .checkValue(_ > 0, "The capacity of listener bus event queue must be positive")
       .createWithDefault(10000)
 
   private[spark] val LISTENER_BUS_METRICS_MAX_LISTENER_CLASSES_TIMED =
@@ -430,8 +430,8 @@ package object config {
       .doc("The chunk size in bytes during writing out the bytes of ChunkedByteBuffer.")
       .bytesConf(ByteUnit.BYTE)
       .checkValue(_ <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH,
-        "The chunk size during writing out the bytes of" +
-        " ChunkedByteBuffer should not larger than Int.MaxValue - 15.")
+        "The chunk size during writing out the bytes of ChunkedByteBuffer should" +
+          s" be less than or equal to ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
       .createWithDefault(64 * 1024 * 1024)
 
   private[spark] val CHECKPOINT_COMPRESS =
@@ -503,7 +503,7 @@ package object config {
         "made in creating intermediate shuffle files.")
       .bytesConf(ByteUnit.KiB)
       .checkValue(v => v > 0 && v <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024,
-        s"The file buffer size must be greater than 0 and less than" +
+        s"The file buffer size must be positive and less than or equal to" +
           s" ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024}.")
       .createWithDefaultString("32k")
 
@@ -513,7 +513,7 @@ package object config {
         "is written in unsafe shuffle writer. In KiB unless otherwise specified.")
       .bytesConf(ByteUnit.KiB)
       .checkValue(v => v > 0 && v <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024,
-        s"The buffer size must be greater than 0 and less than" +
+        s"The buffer size must be positive and less than or equal to" +
           s" ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024}.")
       .createWithDefaultString("32k")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index f1c845bc9450..c4f00d723c25 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -326,7 +326,7 @@ object SQLConf {
       "factor as the estimated data size, in case the data is compressed in the file and lead to" +
       " a heavily underestimated result.")
     .doubleConf
-    .checkValue(_ > 0, "the value of fileDataSizeFactor must be larger than 0")
+    .checkValue(_ > 0, "the value of fileDataSizeFactor must be greater than 0")
     .createWithDefault(1.0)
 
   val PARQUET_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.parquet.mergeSchema")
@@ -673,7 +673,7 @@ object SQLConf {
   val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets")
     .doc("The maximum number of buckets allowed. Defaults to 100000")
     .intConf
-    .checkValue(_ > 0, "the value of spark.sql.sources.bucketing.maxBuckets must be larger than 0")
+    .checkValue(_ > 0, "the value of spark.sql.sources.bucketing.maxBuckets must be greater than 0")
     .createWithDefault(100000)
 
   val CROSS_JOINS_ENABLED = buildConf("spark.sql.crossJoin.enabled")
@@ -1154,7 +1154,7 @@ object SQLConf {
       .internal()
       .doc("The number of bins when generating histograms.")
       .intConf
-      .checkValue(num => num > 1, "The number of bins must be larger than 1.")
+      .checkValue(num => num > 1, "The number of bins must be greater than 1.")
       .createWithDefault(254)
 
   val PERCENTILE_ACCURACY =

From 55c96858107739dd768abea1dff88bd970e47e9f Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sat, 1 Dec 2018 16:22:38 -0800
Subject: [PATCH 0041/1072] [SPARK-26226][SQL] Track optimization phase for
 streaming queries

## What changes were proposed in this pull request?
In an earlier PR, we missed measuring the optimization phase time for streaming queries. This patch adds it.

## How was this patch tested?
Given this is a debugging feature, and it is very convoluted to add tests to verify the phase is set properly, I am not introducing a streaming specific test.

Closes #23193 from rxin/SPARK-26226-1.

Authored-by: Reynold Xin <rxin@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/execution/streaming/IncrementalExecution.scala  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index fad287e28877..a73e88c19ba9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy}
+import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, ExpressionWithRandomSeed}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, HashPartitioning, SinglePartition}
@@ -73,7 +74,8 @@ class IncrementalExecution(
    * Walk the optimized logical plan and replace CurrentBatchTimestamp
    * with the desired literal
    */
-  override lazy val optimizedPlan: LogicalPlan = {
+  override
+  lazy val optimizedPlan: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.OPTIMIZATION) {
     sparkSession.sessionState.optimizer.execute(withCachedData) transformAllExpressions {
       case ts @ CurrentBatchTimestamp(timestamp, _, _) =>
         logInfo(s"Current batch timestamp = $timestamp")

From cbb9bb96d292d6e738f2f33637fb1c9715b167ac Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sat, 1 Dec 2018 16:24:06 -0800
Subject: [PATCH 0042/1072] [SPARK-26241][SQL] Add queryId to
 IncrementalExecution

## What changes were proposed in this pull request?
This is a small change for better debugging: to pass query uuid in IncrementalExecution, when we look at the QueryExecution in isolation to trace back the query.

## How was this patch tested?
N/A - just add some field for better debugging.

Closes #23192 from rxin/SPARK-26241.

Authored-by: Reynold Xin <rxin@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../scala/org/apache/spark/sql/execution/command/commands.scala | 2 +-
 .../spark/sql/execution/streaming/IncrementalExecution.scala    | 1 +
 .../spark/sql/execution/streaming/MicroBatchExecution.scala     | 1 +
 .../execution/streaming/continuous/ContinuousExecution.scala    | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index ab40936eb3cc..754a3316ffb7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -154,7 +154,7 @@ case class ExplainCommand(
         // output mode does not matter since there is no `Sink`.
         new IncrementalExecution(
           sparkSession, logicalPlan, OutputMode.Append(), "<unknown>",
-          UUID.randomUUID, 0, OffsetSeqMetadata(0, 0))
+          UUID.randomUUID, UUID.randomUUID, 0, OffsetSeqMetadata(0, 0))
       } else {
         sparkSession.sessionState.executePlan(logicalPlan)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index a73e88c19ba9..af52af0d1d7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -42,6 +42,7 @@ class IncrementalExecution(
     logicalPlan: LogicalPlan,
     val outputMode: OutputMode,
     val checkpointLocation: String,
+    val queryId: UUID,
     val runId: UUID,
     val currentBatchId: Long,
     val offsetSeqMetadata: OffsetSeqMetadata)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 5defca391a35..64e09edf27f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -521,6 +521,7 @@ class MicroBatchExecution(
         triggerLogicalPlan,
         outputMode,
         checkpointFile("state"),
+        id,
         runId,
         currentBatchId,
         offsetSeqMetadata)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index af23c5cd3d80..4d42428fd189 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -199,6 +199,7 @@ class ContinuousExecution(
         withSink,
         outputMode,
         checkpointFile("state"),
+        id,
         runId,
         currentBatchId,
         offsetSeqMetadata)

From 17fdca7c1bab94e6e54b25807344b06a78780cf6 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Sun, 2 Dec 2018 10:22:22 +0800
Subject: [PATCH 0043/1072] [SPARK-26211][SQL][TEST][FOLLOW-UP] Combine test
 cases for `In` and `InSet`.

## What changes were proposed in this pull request?

This is a follow pr of #23176.

`In` and `InSet` are semantically equal, so the tests for `In` should pass with `InSet`, and vice versa.
This combines those test cases.

## How was this patch tested?

The combined tests and existing tests.

Closes #23187 from ueshin/issues/SPARK-26211/in_inset_tests.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/PredicateSuite.scala | 160 ++++++++----------
 1 file changed, 66 insertions(+), 94 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 3b60d1d88b3c..0f63717f9daf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -124,34 +124,43 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
       (null, false, null) ::
       (null, null, null) :: Nil)
 
-  test("basic IN predicate test") {
-    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1),
+  private def checkInAndInSet(in: In, expected: Any): Unit = {
+    // expecting all in.list are Literal or NonFoldableLiteral.
+    checkEvaluation(in, expected)
+    checkEvaluation(InSet(in.value, HashSet() ++ in.list.map(_.eval())), expected)
+  }
+
+  test("basic IN/INSET predicate test") {
+    checkInAndInSet(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1),
       Literal(2))), null)
-    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType),
+    checkInAndInSet(In(NonFoldableLiteral.create(null, IntegerType),
       Seq(NonFoldableLiteral.create(null, IntegerType))), null)
-    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq.empty), null)
-    checkEvaluation(In(Literal(1), Seq.empty), false)
-    checkEvaluation(In(Literal(1), Seq(NonFoldableLiteral.create(null, IntegerType))), null)
-    checkEvaluation(In(Literal(1), Seq(Literal(1), NonFoldableLiteral.create(null, IntegerType))),
+    checkInAndInSet(In(NonFoldableLiteral.create(null, IntegerType), Seq.empty), null)
+    checkInAndInSet(In(Literal(1), Seq.empty), false)
+    checkInAndInSet(In(Literal(1), Seq(NonFoldableLiteral.create(null, IntegerType))), null)
+    checkInAndInSet(In(Literal(1), Seq(Literal(1), NonFoldableLiteral.create(null, IntegerType))),
       true)
-    checkEvaluation(In(Literal(2), Seq(Literal(1), NonFoldableLiteral.create(null, IntegerType))),
+    checkInAndInSet(In(Literal(2), Seq(Literal(1), NonFoldableLiteral.create(null, IntegerType))),
       null)
-    checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))), true)
-    checkEvaluation(In(Literal(2), Seq(Literal(1), Literal(2))), true)
-    checkEvaluation(In(Literal(3), Seq(Literal(1), Literal(2))), false)
+    checkInAndInSet(In(Literal(1), Seq(Literal(1), Literal(2))), true)
+    checkInAndInSet(In(Literal(2), Seq(Literal(1), Literal(2))), true)
+    checkInAndInSet(In(Literal(3), Seq(Literal(1), Literal(2))), false)
+
     checkEvaluation(
       And(In(Literal(1), Seq(Literal(1), Literal(2))), In(Literal(2), Seq(Literal(1),
         Literal(2)))),
       true)
+    checkEvaluation(
+      And(InSet(Literal(1), HashSet(1, 2)), InSet(Literal(2), Set(1, 2))),
+      true)
 
     val ns = NonFoldableLiteral.create(null, StringType)
-    checkEvaluation(In(ns, Seq(Literal("1"), Literal("2"))), null)
-    checkEvaluation(In(ns, Seq(ns)), null)
-    checkEvaluation(In(Literal("a"), Seq(ns)), null)
-    checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("^Ba*n"), ns)), true)
-    checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^Ba*n"))), true)
-    checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^n"))), false)
-
+    checkInAndInSet(In(ns, Seq(Literal("1"), Literal("2"))), null)
+    checkInAndInSet(In(ns, Seq(ns)), null)
+    checkInAndInSet(In(Literal("a"), Seq(ns)), null)
+    checkInAndInSet(In(Literal("^Ba*n"), Seq(Literal("^Ba*n"), ns)), true)
+    checkInAndInSet(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^Ba*n"))), true)
+    checkInAndInSet(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^n"))), false)
   }
 
   test("IN with different types") {
@@ -187,11 +196,12 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
       } else {
         false
       }
-      checkEvaluation(In(input(0), input.slice(1, 10)), expected)
+      checkInAndInSet(In(input(0), input.slice(1, 10)), expected)
     }
 
     val atomicTypes = DataTypeTestUtils.atomicTypes.filter { t =>
-      RandomDataGenerator.forType(t).isDefined && !t.isInstanceOf[DecimalType]
+      RandomDataGenerator.forType(t).isDefined &&
+        !t.isInstanceOf[DecimalType] && !t.isInstanceOf[BinaryType]
     } ++ Seq(DecimalType.USER_DEFAULT)
 
     val atomicArrayTypes = atomicTypes.map(ArrayType(_, containsNull = true))
@@ -252,93 +262,55 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(ctx.inlinedMutableStates.isEmpty)
   }
 
-  test("INSET") {
-    val hS = HashSet[Any]() + 1 + 2
-    val nS = HashSet[Any]() + 1 + 2 + null
-    val one = Literal(1)
-    val two = Literal(2)
-    val three = Literal(3)
-    val nl = Literal(null)
-    checkEvaluation(InSet(one, hS), true)
-    checkEvaluation(InSet(two, hS), true)
-    checkEvaluation(InSet(two, nS), true)
-    checkEvaluation(InSet(three, hS), false)
-    checkEvaluation(InSet(three, nS), null)
-    checkEvaluation(InSet(nl, hS), null)
-    checkEvaluation(InSet(nl, nS), null)
-
-    val primitiveTypes = Seq(IntegerType, FloatType, DoubleType, StringType, ByteType, ShortType,
-      LongType, BooleanType, DecimalType.USER_DEFAULT, TimestampType)
-    primitiveTypes.foreach { t =>
-      val dataGen = RandomDataGenerator.forType(t, nullable = true).get
-      val inputData = Seq.fill(10) {
-        val value = dataGen.apply()
-        value match {
-          case d: Double if d.isNaN => 0.0d
-          case f: Float if f.isNaN => 0.0f
-          case _ => value
-        }
-      }
-      val input = inputData.map(Literal(_))
-      val expected = if (inputData(0) == null) {
-        null
-      } else if (inputData.slice(1, 10).contains(inputData(0))) {
-        true
-      } else if (inputData.slice(1, 10).contains(null)) {
-        null
-      } else {
-        false
-      }
-      checkEvaluation(InSet(input(0), inputData.slice(1, 10).toSet), expected)
-    }
-  }
-
-  test("INSET: binary") {
-    val hS = HashSet[Any]() + Array(1.toByte, 2.toByte) + Array(3.toByte)
-    val nS = HashSet[Any]() + Array(1.toByte, 2.toByte) + Array(3.toByte) + null
+  test("IN/INSET: binary") {
     val onetwo = Literal(Array(1.toByte, 2.toByte))
     val three = Literal(Array(3.toByte))
     val threefour = Literal(Array(3.toByte, 4.toByte))
-    val nl = Literal(null, onetwo.dataType)
-    checkEvaluation(InSet(onetwo, hS), true)
-    checkEvaluation(InSet(three, hS), true)
-    checkEvaluation(InSet(three, nS), true)
-    checkEvaluation(InSet(threefour, hS), false)
-    checkEvaluation(InSet(threefour, nS), null)
-    checkEvaluation(InSet(nl, hS), null)
-    checkEvaluation(InSet(nl, nS), null)
+    val nl = NonFoldableLiteral.create(null, onetwo.dataType)
+    val hS = Seq(Literal(Array(1.toByte, 2.toByte)), Literal(Array(3.toByte)))
+    val nS = Seq(Literal(Array(1.toByte, 2.toByte)), Literal(Array(3.toByte)),
+      NonFoldableLiteral.create(null, onetwo.dataType))
+    checkInAndInSet(In(onetwo, hS), true)
+    checkInAndInSet(In(three, hS), true)
+    checkInAndInSet(In(three, nS), true)
+    checkInAndInSet(In(threefour, hS), false)
+    checkInAndInSet(In(threefour, nS), null)
+    checkInAndInSet(In(nl, hS), null)
+    checkInAndInSet(In(nl, nS), null)
   }
 
-  test("INSET: struct") {
-    val hS = HashSet[Any]() + Literal.create((1, "a")).value + Literal.create((2, "b")).value
-    val nS = HashSet[Any]() + Literal.create((1, "a")).value + Literal.create((2, "b")).value + null
+  test("IN/INSET: struct") {
     val oneA = Literal.create((1, "a"))
     val twoB = Literal.create((2, "b"))
     val twoC = Literal.create((2, "c"))
-    val nl = Literal(null, oneA.dataType)
-    checkEvaluation(InSet(oneA, hS), true)
-    checkEvaluation(InSet(twoB, hS), true)
-    checkEvaluation(InSet(twoB, nS), true)
-    checkEvaluation(InSet(twoC, hS), false)
-    checkEvaluation(InSet(twoC, nS), null)
-    checkEvaluation(InSet(nl, hS), null)
-    checkEvaluation(InSet(nl, nS), null)
+    val nl = NonFoldableLiteral.create(null, oneA.dataType)
+    val hS = Seq(Literal.create((1, "a")), Literal.create((2, "b")))
+    val nS = Seq(Literal.create((1, "a")), Literal.create((2, "b")),
+      NonFoldableLiteral.create(null, oneA.dataType))
+    checkInAndInSet(In(oneA, hS), true)
+    checkInAndInSet(In(twoB, hS), true)
+    checkInAndInSet(In(twoB, nS), true)
+    checkInAndInSet(In(twoC, hS), false)
+    checkInAndInSet(In(twoC, nS), null)
+    checkInAndInSet(In(nl, hS), null)
+    checkInAndInSet(In(nl, nS), null)
   }
 
-  test("INSET: array") {
-    val hS = HashSet[Any]() + Literal.create(Seq(1, 2)).value + Literal.create(Seq(3)).value
-    val nS = HashSet[Any]() + Literal.create(Seq(1, 2)).value + Literal.create(Seq(3)).value + null
+  test("IN/INSET: array") {
     val onetwo = Literal.create(Seq(1, 2))
     val three = Literal.create(Seq(3))
     val threefour = Literal.create(Seq(3, 4))
-    val nl = Literal(null, onetwo.dataType)
-    checkEvaluation(InSet(onetwo, hS), true)
-    checkEvaluation(InSet(three, hS), true)
-    checkEvaluation(InSet(three, nS), true)
-    checkEvaluation(InSet(threefour, hS), false)
-    checkEvaluation(InSet(threefour, nS), null)
-    checkEvaluation(InSet(nl, hS), null)
-    checkEvaluation(InSet(nl, nS), null)
+    val nl = NonFoldableLiteral.create(null, onetwo.dataType)
+    val hS = Seq(Literal.create(Seq(1, 2)), Literal.create(Seq(3)))
+    val nS = Seq(Literal.create(Seq(1, 2)), Literal.create(Seq(3)),
+      NonFoldableLiteral.create(null, onetwo.dataType))
+    checkInAndInSet(In(onetwo, hS), true)
+    checkInAndInSet(In(three, hS), true)
+    checkInAndInSet(In(three, nS), true)
+    checkInAndInSet(In(threefour, hS), false)
+    checkInAndInSet(In(threefour, nS), null)
+    checkInAndInSet(In(nl, hS), null)
+    checkInAndInSet(In(nl, nS), null)
   }
 
   private case class MyStruct(a: Long, b: String)

From 3e46e3ccd58d0a2d445dff58a52ab1966ce133e8 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sun, 2 Dec 2018 10:29:25 +0800
Subject: [PATCH 0044/1072] [SPARK-26161][SQL] Ignore empty files in load

## What changes were proposed in this pull request?

In the PR, I propose filtering out all empty files inside of `FileSourceScanExec` and exclude them from file splits. It should reduce overhead of opening and reading files without any data, and as consequence datasources will not produce empty partitions for such files.

## How was this patch tested?

Added a test which creates an empty and non-empty files. If empty files are ignored in load, Text datasource in the `wholetext` mode must create only one partition for non-empty file.

Closes #23130 from MaxGekk/ignore-empty-files.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/DataSourceScanExec.scala    |  4 ++--
 .../sql/execution/datasources/json/JsonSuite.scala  |  3 +--
 .../apache/spark/sql/sources/SaveLoadSuite.scala    | 13 +++++++++++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 4faa27c2c1e2..b29d5c76c5f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -368,7 +368,7 @@ case class FileSourceScanExec(
     logInfo(s"Planning with ${bucketSpec.numBuckets} buckets")
     val filesGroupedToBuckets =
       selectedPartitions.flatMap { p =>
-        p.files.map { f =>
+        p.files.filter(_.getLen > 0).map { f =>
           val hosts = getBlockHosts(getBlockLocations(f), 0, f.getLen)
           PartitionedFile(p.values, f.getPath.toUri.toString, 0, f.getLen, hosts)
         }
@@ -418,7 +418,7 @@ case class FileSourceScanExec(
       s"open cost is considered as scanning $openCostInBytes bytes.")
 
     val splitFiles = selectedPartitions.flatMap { partition =>
-      partition.files.flatMap { file =>
+      partition.files.filter(_.getLen > 0).flatMap { file =>
         val blockLocations = getBlockLocations(file)
         if (fsRelation.fileFormat.isSplitable(
             fsRelation.sparkSession, fsRelation.options, file.getPath)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index ee5176e23e34..9d23161c1f24 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1842,7 +1842,6 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       val path = dir.getCanonicalPath
       primitiveFieldAndType
         .toDF("value")
-        .repartition(1)
         .write
         .text(path)
 
@@ -1910,7 +1909,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
           F.count($"dummy").as("valid"),
           F.count($"_corrupt_record").as("corrupt"),
           F.count("*").as("count"))
-      checkAnswer(counts, Row(1, 4, 6)) // null row for empty file
+      checkAnswer(counts, Row(1, 4, 6))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
index 12779b46bfe8..048e4b80c72a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.sources
 
 import java.io.File
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
 
 import org.scalatest.BeforeAndAfter
 
@@ -142,4 +144,15 @@ class SaveLoadSuite extends DataSourceTest with SharedSQLContext with BeforeAndA
       assert(e.contains(s"Partition column `$unknown` not found in schema $schemaCatalog"))
     }
   }
+
+  test("skip empty files in non bucketed read") {
+    withTempDir { dir =>
+      val path = dir.getCanonicalPath
+      Files.write(Paths.get(path, "empty"), Array.empty[Byte])
+      Files.write(Paths.get(path, "notEmpty"), "a".getBytes(StandardCharsets.UTF_8))
+      val readback = spark.read.option("wholetext", true).text(path)
+
+      assert(readback.rdd.getNumPartitions === 1)
+    }
+  }
 }

From 39617cb2c0c433494e9f17fcd4e49c6300a9c4b0 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 2 Dec 2018 10:46:17 +0800
Subject: [PATCH 0045/1072] [SPARK-26216][SQL] Do not use case class as public
 API (UserDefinedFunction)

## What changes were proposed in this pull request?

It's a bad idea to use case class as public API, as it has a very wide surface. For example, the `copy` method, its fields, the companion object, etc.

For a particular case, `UserDefinedFunction`. It has a private constructor, and I believe we only want users to access a few methods:`apply`, `nullable`, `asNonNullable`, etc.

However, all its fields, and `copy` method, and the companion object are public unexpectedly. As a result, we made many tricks to work around the binary compatibility issues.

This PR proposes to only make interfaces public, and hide implementations behind with a private class. Now `UserDefinedFunction` is a pure trait, and the concrete implementation is `SparkUserDefinedFunction`, which is private.

Changing class to interface is not binary compatible(but source compatible), so 3.0 is a good chance to do it.

This is the first PR to go with this direction. If it's accepted, I'll create a umbrella JIRA and fix all the public case classes.

## How was this patch tested?

existing tests.

Closes #23178 from cloud-fan/udf.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |   2 +
 project/MimaExcludes.scala                    |   6 +-
 .../sql/expressions/UserDefinedFunction.scala | 119 ++++++++----------
 .../org/apache/spark/sql/functions.scala      |   2 +-
 4 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index e48125a0972b..787f4bcbbea8 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -31,6 +31,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.execution.setCommandRejectsSparkConfs` to `false`.
 
+  - Spark applications which are built with Spark version 2.4 and prior, and call methods of `UserDefinedFunction`, need to be re-compiled with Spark 3.0, as they are not binary compatible with Spark 3.0.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index fcef424c330f..1c83cf5860c5 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -227,7 +227,11 @@ object MimaExcludes {
       case ReversedMissingMethodProblem(meth) =>
         !meth.owner.fullName.startsWith("org.apache.spark.sql.sources.v2")
       case _ => true
-    }
+    },
+
+    // [SPARK-26216][SQL] Do not use case class as public API (UserDefinedFunction)
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction$"),
+    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.sql.expressions.UserDefinedFunction")
   )
 
   // Exclude rules for 2.4.x
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 58a942afe28c..f88e0e0f299d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -38,25 +38,14 @@ import org.apache.spark.sql.types.DataType
  * @since 1.3.0
  */
 @Stable
-case class UserDefinedFunction protected[sql] (
-    f: AnyRef,
-    dataType: DataType,
-    inputTypes: Option[Seq[DataType]]) {
-
-  private var _nameOption: Option[String] = None
-  private var _nullable: Boolean = true
-  private var _deterministic: Boolean = true
-
-  // This is a `var` instead of in the constructor for backward compatibility of this case class.
-  // TODO: revisit this case class in Spark 3.0, and narrow down the public surface.
-  private[sql] var nullableTypes: Option[Seq[Boolean]] = None
+sealed trait UserDefinedFunction {
 
   /**
    * Returns true when the UDF can return a nullable value.
    *
    * @since 2.3.0
    */
-  def nullable: Boolean = _nullable
+  def nullable: Boolean
 
   /**
    * Returns true iff the UDF is deterministic, i.e. the UDF produces the same output given the same
@@ -64,7 +53,7 @@ case class UserDefinedFunction protected[sql] (
    *
    * @since 2.3.0
    */
-  def deterministic: Boolean = _deterministic
+  def deterministic: Boolean
 
   /**
    * Returns an expression that invokes the UDF, using the given arguments.
@@ -72,80 +61,83 @@ case class UserDefinedFunction protected[sql] (
    * @since 1.3.0
    */
   @scala.annotation.varargs
-  def apply(exprs: Column*): Column = {
+  def apply(exprs: Column*): Column
+
+  /**
+   * Updates UserDefinedFunction with a given name.
+   *
+   * @since 2.3.0
+   */
+  def withName(name: String): UserDefinedFunction
+
+  /**
+   * Updates UserDefinedFunction to non-nullable.
+   *
+   * @since 2.3.0
+   */
+  def asNonNullable(): UserDefinedFunction
+
+  /**
+   * Updates UserDefinedFunction to nondeterministic.
+   *
+   * @since 2.3.0
+   */
+  def asNondeterministic(): UserDefinedFunction
+}
+
+private[sql] case class SparkUserDefinedFunction(
+    f: AnyRef,
+    dataType: DataType,
+    inputTypes: Option[Seq[DataType]],
+    nullableTypes: Option[Seq[Boolean]],
+    name: Option[String] = None,
+    nullable: Boolean = true,
+    deterministic: Boolean = true) extends UserDefinedFunction {
+
+  @scala.annotation.varargs
+  override def apply(exprs: Column*): Column = {
     // TODO: make sure this class is only instantiated through `SparkUserDefinedFunction.create()`
     // and `nullableTypes` is always set.
-    if (nullableTypes.isEmpty) {
-      nullableTypes = Some(ScalaReflection.getParameterTypeNullability(f))
-    }
     if (inputTypes.isDefined) {
       assert(inputTypes.get.length == nullableTypes.get.length)
     }
 
+    val inputsNullSafe = nullableTypes.getOrElse {
+      ScalaReflection.getParameterTypeNullability(f)
+    }
+
     Column(ScalaUDF(
       f,
       dataType,
       exprs.map(_.expr),
-      nullableTypes.get,
+      inputsNullSafe,
       inputTypes.getOrElse(Nil),
-      udfName = _nameOption,
-      nullable = _nullable,
-      udfDeterministic = _deterministic))
-  }
-
-  private def copyAll(): UserDefinedFunction = {
-    val udf = copy()
-    udf._nameOption = _nameOption
-    udf._nullable = _nullable
-    udf._deterministic = _deterministic
-    udf.nullableTypes = nullableTypes
-    udf
+      udfName = name,
+      nullable = nullable,
+      udfDeterministic = deterministic))
   }
 
-  /**
-   * Updates UserDefinedFunction with a given name.
-   *
-   * @since 2.3.0
-   */
-  def withName(name: String): UserDefinedFunction = {
-    val udf = copyAll()
-    udf._nameOption = Option(name)
-    udf
+  override def withName(name: String): UserDefinedFunction = {
+    copy(name = Option(name))
   }
 
-  /**
-   * Updates UserDefinedFunction to non-nullable.
-   *
-   * @since 2.3.0
-   */
-  def asNonNullable(): UserDefinedFunction = {
+  override def asNonNullable(): UserDefinedFunction = {
     if (!nullable) {
       this
     } else {
-      val udf = copyAll()
-      udf._nullable = false
-      udf
+      copy(nullable = false)
     }
   }
 
-  /**
-   * Updates UserDefinedFunction to nondeterministic.
-   *
-   * @since 2.3.0
-   */
-  def asNondeterministic(): UserDefinedFunction = {
-    if (!_deterministic) {
+  override def asNondeterministic(): UserDefinedFunction = {
+    if (!deterministic) {
       this
     } else {
-      val udf = copyAll()
-      udf._deterministic = false
-      udf
+      copy(deterministic = false)
     }
   }
 }
 
-// We have to use a name different than `UserDefinedFunction` here, to avoid breaking the binary
-// compatibility of the auto-generate UserDefinedFunction object.
 private[sql] object SparkUserDefinedFunction {
 
   def create(
@@ -157,8 +149,7 @@ private[sql] object SparkUserDefinedFunction {
     } else {
       Some(inputSchemas.map(_.get.dataType))
     }
-    val udf = new UserDefinedFunction(f, dataType, inputTypes)
-    udf.nullableTypes = Some(inputSchemas.map(_.map(_.nullable).getOrElse(true)))
-    udf
+    val nullableTypes = Some(inputSchemas.map(_.map(_.nullable).getOrElse(true)))
+    SparkUserDefinedFunction(f, dataType, inputTypes, nullableTypes)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index efa8f8526387..33186f778d86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4259,7 +4259,7 @@ object functions {
   def udf(f: AnyRef, dataType: DataType): UserDefinedFunction = {
     // TODO: should call SparkUserDefinedFunction.create() instead but inputSchemas is currently
     // unavailable. We may need to create type-safe overloaded versions of udf() methods.
-    new UserDefinedFunction(f, dataType, inputTypes = None)
+    SparkUserDefinedFunction(f, dataType, inputTypes = None, nullableTypes = None)
   }
 
   /**

From 031bd80e4f943d64a1171856978022fac320de5d Mon Sep 17 00:00:00 2001
From: lichaoqun <li.chaoqun@zte.com.cn>
Date: Sun, 2 Dec 2018 10:55:17 +0800
Subject: [PATCH 0046/1072] [SPARK-26195][SQL] Correct exception messages in
 some classes

## What changes were proposed in this pull request?

UnsupportedOperationException messages are not the same with method name.This PR correct these messages.

## How was this patch tested?
NA

Closes #23154 from lcqzte10192193/wid-lcq-1127.

Authored-by: lichaoqun <li.chaoqun@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/analysis/unresolved.scala   | 4 ++--
 .../apache/spark/sql/catalyst/expressions/Expression.scala    | 2 +-
 .../apache/spark/sql/catalyst/expressions/generators.scala    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 36cad3cf7478..d44b42134f86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -204,10 +204,10 @@ case class UnresolvedGenerator(name: FunctionIdentifier, children: Seq[Expressio
     throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
-    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+    throw new UnsupportedOperationException(s"Cannot generate code for expression: $this")
 
   override def terminate(): TraversableOnce[InternalRow] =
-    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+    throw new UnsupportedOperationException(s"Cannot terminate expression: $this")
 }
 
 case class UnresolvedFunction(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 2ecec61adb0a..c89c2272be75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -288,7 +288,7 @@ trait Unevaluable extends Expression {
     throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
 
   final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
-    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+    throw new UnsupportedOperationException(s"Cannot generate code for expression: $this")
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index d6e67b9ac3d1..9c74fdf6c9a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -258,7 +258,7 @@ case class GeneratorOuter(child: Generator) extends UnaryExpression with Generat
     throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
 
   final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
-    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+    throw new UnsupportedOperationException(s"Cannot generate code for expression: $this")
 
   override def elementSchema: StructType = child.elementSchema
 

From c7d95ccedf593edf9fda9ecaf8d0b4dda451440d Mon Sep 17 00:00:00 2001
From: Koert Kuipers <koert@tresata.com>
Date: Sun, 2 Dec 2018 17:38:25 +0800
Subject: [PATCH 0047/1072] [SPARK-26208][SQL] add headers to empty csv files
 when header=true

## What changes were proposed in this pull request?

Add headers to empty csv files when header=true, because otherwise these files are invalid when reading.

## How was this patch tested?

Added test for roundtrip of empty dataframe to csv file with headers and back in CSVSuite

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23173 from koertkuipers/feat-empty-csv-with-header.

Authored-by: Koert Kuipers <koert@tresata.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/csv/UnivocityGenerator.scala |  9 ++++----
 .../datasources/csv/CSVFileFormat.scala       | 22 ++++++++++++-------
 .../execution/datasources/csv/CSVSuite.scala  | 13 +++++++++++
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index 1218f9242afe..2ab376c0ac20 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -32,7 +32,6 @@ class UnivocityGenerator(
   private val writerSettings = options.asWriterSettings
   writerSettings.setHeaders(schema.fieldNames: _*)
   private val gen = new CsvWriter(writer, writerSettings)
-  private var printHeader = options.headerFlag
 
   // A `ValueConverter` is responsible for converting a value of an `InternalRow` to `String`.
   // When the value is null, this converter should not be called.
@@ -72,15 +71,15 @@ class UnivocityGenerator(
     values
   }
 
+  def writeHeaders(): Unit = {
+    gen.writeHeaders()
+  }
+
   /**
    * Writes a single InternalRow to CSV using Univocity.
    */
   def write(row: InternalRow): Unit = {
-    if (printHeader) {
-      gen.writeHeaders()
-    }
     gen.writeRow(convertRow(row): _*)
-    printHeader = false
   }
 
   def writeToString(row: InternalRow): String = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index 4c5a1d327023..f7d8a9e1042d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -171,15 +171,21 @@ private[csv] class CsvOutputWriter(
 
   private var univocityGenerator: Option[UnivocityGenerator] = None
 
-  override def write(row: InternalRow): Unit = {
-    val gen = univocityGenerator.getOrElse {
-      val charset = Charset.forName(params.charset)
-      val os = CodecStreams.createOutputStreamWriter(context, new Path(path), charset)
-      val newGen = new UnivocityGenerator(dataSchema, os, params)
-      univocityGenerator = Some(newGen)
-      newGen
-    }
+  if (params.headerFlag) {
+    val gen = getGen()
+    gen.writeHeaders()
+  }
 
+  private def getGen(): UnivocityGenerator = univocityGenerator.getOrElse {
+    val charset = Charset.forName(params.charset)
+    val os = CodecStreams.createOutputStreamWriter(context, new Path(path), charset)
+    val newGen = new UnivocityGenerator(dataSchema, os, params)
+    univocityGenerator = Some(newGen)
+    newGen
+  }
+
+  override def write(row: InternalRow): Unit = {
+    val gen = getGen()
     gen.write(row)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index e14e8d49db5c..bc950f2418d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1987,6 +1987,19 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     assert(errMsg2.contains("'lineSep' can contain only 1 character"))
   }
 
+  test("SPARK-26208: write and read empty data to csv file with headers") {
+    withTempPath { path =>
+      val df1 = spark.range(10).repartition(2).filter(_ < 0).map(_.toString).toDF
+      // we have 2 partitions but they are both empty and will be filtered out upon writing
+      // thanks to SPARK-23271 one new empty partition will be inserted
+      df1.write.format("csv").option("header", true).save(path.getAbsolutePath)
+      val df2 = spark.read.format("csv").option("header", true).option("inferSchema", false)
+        .load(path.getAbsolutePath)
+      assert(df1.schema === df2.schema)
+      checkAnswer(df1, df2)
+    }
+  }
+
   test("do not produce empty files for empty partitions") {
     withTempPath { dir =>
       val path = dir.getCanonicalPath

From 9cda9a892d03f60a76cd5d9b4546e72c50962c85 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@apache.org>
Date: Sun, 2 Dec 2018 17:41:08 +0800
Subject: [PATCH 0048/1072] [SPARK-26080][PYTHON] Skips Python resource limit
 on Windows in Python worker

## What changes were proposed in this pull request?

`resource` package is a Unix specific package. See https://docs.python.org/2/library/resource.html and https://docs.python.org/3/library/resource.html.

Note that we document Windows support:

> Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS).

This should be backported into branch-2.4 to restore Windows support in Spark 2.4.1.

## How was this patch tested?

Manually mocking the changed logics.

Closes #23055 from HyukjinKwon/SPARK-26080.

Lead-authored-by: hyukjinkwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/configuration.md    |  2 ++
 python/pyspark/worker.py | 19 ++++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 8914bd0310f9..9abbb3f63490 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -190,6 +190,8 @@ of the most common options to set are:
     and it is up to the application to avoid exceeding the overhead memory space
     shared with other non-JVM processes. When PySpark is run in YARN or Kubernetes, this memory
     is added to executor resource requests.
+
+    NOTE: Python memory usage may not be limited on platforms that do not support resource limiting, such as Windows.
   </td>
 </tr>
 <tr>
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 8c59f1f999f1..953b468e9651 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -22,7 +22,12 @@
 import os
 import sys
 import time
-import resource
+# 'resource' is a Unix specific module.
+has_resource_module = True
+try:
+    import resource
+except ImportError:
+    has_resource_module = False
 import socket
 import traceback
 
@@ -268,9 +273,9 @@ def main(infile, outfile):
 
         # set up memory limits
         memory_limit_mb = int(os.environ.get('PYSPARK_EXECUTOR_MEMORY_MB', "-1"))
-        total_memory = resource.RLIMIT_AS
-        try:
-            if memory_limit_mb > 0:
+        if memory_limit_mb > 0 and has_resource_module:
+            total_memory = resource.RLIMIT_AS
+            try:
                 (soft_limit, hard_limit) = resource.getrlimit(total_memory)
                 msg = "Current mem limits: {0} of max {1}\n".format(soft_limit, hard_limit)
                 print(msg, file=sys.stderr)
@@ -283,9 +288,9 @@ def main(infile, outfile):
                     print(msg, file=sys.stderr)
                     resource.setrlimit(total_memory, (new_limit, new_limit))
 
-        except (resource.error, OSError, ValueError) as e:
-            # not all systems support resource limits, so warn instead of failing
-            print("WARN: Failed to set memory limit: {0}\n".format(e), file=sys.stderr)
+            except (resource.error, OSError, ValueError) as e:
+                # not all systems support resource limits, so warn instead of failing
+                print("WARN: Failed to set memory limit: {0}\n".format(e), file=sys.stderr)
 
         # initialize global state
         taskContext = None

From 676bbb2446af1f281b8f76a5428b7ba75b7588b3 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 2 Dec 2018 08:52:01 -0600
Subject: [PATCH 0049/1072] [SPARK-26198][SQL] Fix Metadata serialize null
 values throw NPE

## What changes were proposed in this pull request?
How to reproduce this issue:
```scala
scala> val meta = new org.apache.spark.sql.types.MetadataBuilder().putNull("key").build().json
java.lang.NullPointerException
  at org.apache.spark.sql.types.Metadata$.org$apache$spark$sql$types$Metadata$$toJsonValue(Metadata.scala:196)
  at org.apache.spark.sql.types.Metadata$$anonfun$1.apply(Metadata.scala:180)
```

This pr fix `NullPointerException` when `Metadata` serialize `null` values.

## How was this patch tested?

unit tests

Closes #23164 from wangyum/SPARK-26198.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../src/main/scala/org/apache/spark/sql/types/Metadata.scala | 2 ++
 .../scala/org/apache/spark/sql/types/MetadataSuite.scala     | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
index 4979aced145c..b6a859b75c37 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
@@ -190,6 +190,8 @@ object Metadata {
         JBool(x)
       case x: String =>
         JString(x)
+      case null =>
+        JNull
       case x: Metadata =>
         toJsonValue(x.map)
       case other =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/MetadataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/MetadataSuite.scala
index 210e65708170..b4aeac562d2b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/MetadataSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/MetadataSuite.scala
@@ -26,6 +26,7 @@ class MetadataSuite extends SparkFunSuite {
     assert(meta.## !== 0)
     assert(meta.getString("key") === "value")
     assert(meta.contains("key"))
+    assert(meta === Metadata.fromJson(meta.json))
     intercept[NoSuchElementException](meta.getString("no_such_key"))
     intercept[ClassCastException](meta.getBoolean("key"))
   }
@@ -36,6 +37,7 @@ class MetadataSuite extends SparkFunSuite {
     assert(meta.## !== 0)
     assert(meta.getLong("key") === 12)
     assert(meta.contains("key"))
+    assert(meta === Metadata.fromJson(meta.json))
     intercept[NoSuchElementException](meta.getLong("no_such_key"))
     intercept[ClassCastException](meta.getBoolean("key"))
   }
@@ -46,6 +48,7 @@ class MetadataSuite extends SparkFunSuite {
     assert(meta.## !== 0)
     assert(meta.getDouble("key") === 12)
     assert(meta.contains("key"))
+    assert(meta === Metadata.fromJson(meta.json))
     intercept[NoSuchElementException](meta.getDouble("no_such_key"))
     intercept[ClassCastException](meta.getBoolean("key"))
   }
@@ -56,6 +59,7 @@ class MetadataSuite extends SparkFunSuite {
     assert(meta.## !== 0)
     assert(meta.getBoolean("key") === true)
     assert(meta.contains("key"))
+    assert(meta === Metadata.fromJson(meta.json))
     intercept[NoSuchElementException](meta.getBoolean("no_such_key"))
     intercept[ClassCastException](meta.getString("key"))
   }
@@ -69,6 +73,7 @@ class MetadataSuite extends SparkFunSuite {
     assert(meta.getLong("key") === 0)
     assert(meta.getBoolean("key") === false)
     assert(meta.contains("key"))
+    assert(meta === Metadata.fromJson(meta.json))
     intercept[NoSuchElementException](meta.getLong("no_such_key"))
   }
 }

From bfa3d32f7719cd4bfb2c161fe4a6bd3eea148158 Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Mon, 3 Dec 2018 16:18:22 +0800
Subject: [PATCH 0050/1072] [SPARK-26117][FOLLOW-UP][SQL] throw
 SparkOutOfMemoryError intead of SparkException in UnsafeHashedRelation

## What changes were proposed in this pull request?

When build hash Map with one row of data and run out of memory, we should throw a SparkOutOfMemoryError exception, which is more accurate than SparkException. this PR fix it.

## How was this patch tested?

N / A

Closes #23190 from heary-cao/throwUnsafeHashedRelation.

Authored-by: caoxuewen <cao.xuewen@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/execution/joins/HashedRelation.scala   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 86eb47a70f1a..e8c01d46a84c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -24,7 +24,7 @@ import com.esotericsoftware.kryo.io.{Input, Output}
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkException}
 import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
-import org.apache.spark.memory.{MemoryConsumer, StaticMemoryManager, TaskMemoryManager}
+import org.apache.spark.memory._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.BroadcastMode
@@ -316,7 +316,9 @@ private[joins] object UnsafeHashedRelation {
           row.getBaseObject, row.getBaseOffset, row.getSizeInBytes)
         if (!success) {
           binaryMap.free()
-          throw new SparkException("There is no enough memory to build hash map")
+          // scalastyle:off throwerror
+          throw new SparkOutOfMemoryError("There is no enough memory to build hash map")
+          // scalastyle:on throwerror
         }
       }
     }

From 11e5f1bcd49eec8ab4225d6e68a051b5c6a21cb2 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 3 Dec 2018 18:25:38 +0800
Subject: [PATCH 0051/1072] [SPARK-26151][SQL] Return partial results for bad
 CSV records

## What changes were proposed in this pull request?

In the PR, I propose to change behaviour of `UnivocityParser` and `FailureSafeParser`, and return all fields that were parsed and converted to expected types successfully instead of just returning a row with all `null`s for a bad input in the `PERMISSIVE` mode. For example, for CSV line `0,2013-111-11 12:13:14` and DDL schema `a int, b timestamp`, new result is `Row(0, null)`.

## How was this patch tested?

It was checked by existing tests from `CsvSuite` and `CsvFunctionsSuite`.

Closes #23120 from MaxGekk/failuresafe-partial-result.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/csv/UnivocityParser.scala    | 27 ++++++++++---------
 .../sql/catalyst/util/FailureSafeParser.scala | 21 ++++++---------
 .../apache/spark/sql/CsvFunctionsSuite.scala  |  2 +-
 .../execution/datasources/csv/CSVSuite.scala  |  6 ++---
 4 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 85e129224c91..8fff4b0781b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -243,21 +243,24 @@ class UnivocityParser(
         () => getPartialResult(),
         new RuntimeException("Malformed CSV record"))
     } else {
-      try {
-        // When the length of the returned tokens is identical to the length of the parsed schema,
-        // we just need to convert the tokens that correspond to the required columns.
-        var i = 0
-        while (i < requiredSchema.length) {
+      // When the length of the returned tokens is identical to the length of the parsed schema,
+      // we just need to convert the tokens that correspond to the required columns.
+      var badRecordException: Option[Throwable] = None
+      var i = 0
+      while (i < requiredSchema.length) {
+        try {
           row(i) = valueConverters(i).apply(getToken(tokens, i))
-          i += 1
+        } catch {
+          case NonFatal(e) =>
+            badRecordException = badRecordException.orElse(Some(e))
         }
+        i += 1
+      }
+
+      if (badRecordException.isEmpty) {
         row
-      } catch {
-        case NonFatal(e) =>
-          // For corrupted records with the number of tokens same as the schema,
-          // CSV reader doesn't support partial results. All fields other than the field
-          // configured by `columnNameOfCorruptRecord` are set to `null`.
-          throw BadRecordException(() => getCurrentInput, () => None, e)
+      } else {
+        throw BadRecordException(() => getCurrentInput, () => Some(row), badRecordException.get)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
index 76745b11c84c..4baf052bfe56 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
@@ -33,26 +33,21 @@ class FailureSafeParser[IN](
   private val corruptFieldIndex = schema.getFieldIndex(columnNameOfCorruptRecord)
   private val actualSchema = StructType(schema.filterNot(_.name == columnNameOfCorruptRecord))
   private val resultRow = new GenericInternalRow(schema.length)
-  private val nullResult = new GenericInternalRow(schema.length)
 
   // This function takes 2 parameters: an optional partial result, and the bad record. If the given
   // schema doesn't contain a field for corrupted record, we just return the partial result or a
   // row with all fields null. If the given schema contains a field for corrupted record, we will
   // set the bad record to this field, and set other fields according to the partial result or null.
   private val toResultRow: (Option[InternalRow], () => UTF8String) => InternalRow = {
-    if (corruptFieldIndex.isDefined) {
-      (row, badRecord) => {
-        var i = 0
-        while (i < actualSchema.length) {
-          val from = actualSchema(i)
-          resultRow(schema.fieldIndex(from.name)) = row.map(_.get(i, from.dataType)).orNull
-          i += 1
-        }
-        resultRow(corruptFieldIndex.get) = badRecord()
-        resultRow
+    (row, badRecord) => {
+      var i = 0
+      while (i < actualSchema.length) {
+        val from = actualSchema(i)
+        resultRow(schema.fieldIndex(from.name)) = row.map(_.get(i, from.dataType)).orNull
+        i += 1
       }
-    } else {
-      (row, _) => row.getOrElse(nullResult)
+      corruptFieldIndex.foreach(index => resultRow(index) = badRecord())
+      resultRow
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 1c359ce1d201..537d13b1bc8d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -60,7 +60,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSQLContext {
         "mode" -> "Permissive", "columnNameOfCorruptRecord" -> columnNameOfCorruptRecord)))
 
     checkAnswer(df2, Seq(
-      Row(Row(null, null, "0,2013-111-11 12:13:14")),
+      Row(Row(0, null, "0,2013-111-11 12:13:14")),
       Row(Row(1, java.sql.Date.valueOf("1983-08-04"), null))))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index bc950f2418d3..c9273193b642 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1116,7 +1116,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
         .schema(schema)
         .csv(testFile(valueMalformedFile))
       checkAnswer(df1,
-        Row(null, null) ::
+        Row(0, null) ::
         Row(1, java.sql.Date.valueOf("1983-08-04")) ::
         Nil)
 
@@ -1131,7 +1131,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
         .schema(schemaWithCorrField1)
         .csv(testFile(valueMalformedFile))
       checkAnswer(df2,
-        Row(null, null, "0,2013-111-11 12:13:14") ::
+        Row(0, null, "0,2013-111-11 12:13:14") ::
         Row(1, java.sql.Date.valueOf("1983-08-04"), null) ::
         Nil)
 
@@ -1148,7 +1148,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
         .schema(schemaWithCorrField2)
         .csv(testFile(valueMalformedFile))
       checkAnswer(df3,
-        Row(null, "0,2013-111-11 12:13:14", null) ::
+        Row(0, "0,2013-111-11 12:13:14", null) ::
         Row(1, null, java.sql.Date.valueOf("1983-08-04")) ::
         Nil)
 

From b569ba53f4b650c03bd11def7c7f7589ceff61eb Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 3 Dec 2018 19:53:45 +0800
Subject: [PATCH 0052/1072] [SPARK-26230][SQL] FileIndex: if case sensitive,
 validate partitions with original column names

## What changes were proposed in this pull request?

Partition column name is required to be unique under the same directory. The following paths are invalid partitioned directory:
```
hdfs://host:9000/path/a=1
hdfs://host:9000/path/b=2
```

If case sensitive, the following paths should be invalid too:
```
hdfs://host:9000/path/a=1
hdfs://host:9000/path/A=2
```
Since column 'a' and 'A' are different, and it is wrong to use either one as the column name in partition schema.

Also, there is a `TODO` comment in the code. Currently the Spark doesn't validate such case when `CASE_SENSITIVE` enabled.

This PR is to resolve the problem.

## How was this patch tested?

Add unit test

Closes #23186 from gengliangwang/SPARK-26230.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/PartitioningUtils.scala       | 14 +++++---
 .../datasources/FileIndexSuite.scala          | 32 ++++++++++++++++++-
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 9d2c9ba0c1a5..d66cb09bda0c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -155,7 +155,8 @@ object PartitioningUtils {
           "root directory of the table. If there are multiple root directories, " +
           "please load them separately and then union them.")
 
-      val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues, timeZone)
+      val resolvedPartitionValues =
+        resolvePartitions(pathsWithPartitionValues, caseSensitive, timeZone)
 
       // Creates the StructType which represents the partition columns.
       val fields = {
@@ -345,15 +346,18 @@ object PartitioningUtils {
    */
   def resolvePartitions(
       pathsWithPartitionValues: Seq[(Path, PartitionValues)],
+      caseSensitive: Boolean,
       timeZone: TimeZone): Seq[PartitionValues] = {
     if (pathsWithPartitionValues.isEmpty) {
       Seq.empty
     } else {
-      // TODO: Selective case sensitivity.
-      val distinctPartColNames =
-        pathsWithPartitionValues.map(_._2.columnNames.map(_.toLowerCase())).distinct
+      val partColNames = if (caseSensitive) {
+        pathsWithPartitionValues.map(_._2.columnNames)
+      } else {
+        pathsWithPartitionValues.map(_._2.columnNames.map(_.toLowerCase()))
+      }
       assert(
-        distinctPartColNames.size == 1,
+        partColNames.distinct.size == 1,
         listConflictingPartitionColumns(pathsWithPartitionValues))
 
       // Resolves possible type conflicts for each column
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index fdb0511f01a2..ec552f7ddf47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -52,7 +52,7 @@ class FileIndexSuite extends SharedSQLContext {
 
   test("SPARK-26188: don't infer data types of partition columns if user specifies schema") {
     withTempDir { dir =>
-      val partitionDirectory = new File(dir, s"a=4d")
+      val partitionDirectory = new File(dir, "a=4d")
       partitionDirectory.mkdir()
       val file = new File(partitionDirectory, "text.txt")
       stringToFile(file, "text")
@@ -65,6 +65,36 @@ class FileIndexSuite extends SharedSQLContext {
     }
   }
 
+  test("SPARK-26230: if case sensitive, validate partitions with original column names") {
+    withTempDir { dir =>
+      val partitionDirectory = new File(dir, "a=1")
+      partitionDirectory.mkdir()
+      val file = new File(partitionDirectory, "text.txt")
+      stringToFile(file, "text")
+      val partitionDirectory2 = new File(dir, "A=2")
+      partitionDirectory2.mkdir()
+      val file2 = new File(partitionDirectory2, "text.txt")
+      stringToFile(file2, "text")
+      val path = new Path(dir.getCanonicalPath)
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, None)
+        val partitionValues = fileIndex.partitionSpec().partitions.map(_.values)
+        assert(partitionValues.length == 2)
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val msg = intercept[AssertionError] {
+          val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, None)
+          fileIndex.partitionSpec()
+        }.getMessage
+        assert(msg.contains("Conflicting partition column names detected"))
+        assert("Partition column name list #[0-1]: A".r.findFirstIn(msg).isDefined)
+        assert("Partition column name list #[0-1]: a".r.findFirstIn(msg).isDefined)
+      }
+    }
+  }
+
   test("InMemoryFileIndex: input paths are converted to qualified paths") {
     withTempDir { dir =>
       val file = new File(dir, "text.txt")

From eebb940edb0c89eef88e1deb0fc0ae1c7a24bc3d Mon Sep 17 00:00:00 2001
From: pgandhi <pgandhi@oath.com>
Date: Mon, 3 Dec 2018 07:53:21 -0600
Subject: [PATCH 0053/1072] [SPARK-26253][WEBUI] Task Summary Metrics Table on
 Stage Page shows empty table when no data is present

Task Summary Metrics Table on Stage Page shows empty table when no data is present instead of showing a message.

## What changes were proposed in this pull request?

Added a custom message to show on the task summary metrics table as well as executor summary table when no data is present.

## How was this patch tested?

**Before:**

![49335550-29277d00-f615-11e8-8e62-a953e76bcebf](https://user-images.githubusercontent.com/22228190/49361520-425a2780-f702-11e8-8df4-08862ab6ceb8.png)

**After:**

<img width="1413" alt="screen shot 2018-12-03 at 1 56 09 pm" src="https://user-images.githubusercontent.com/22228190/49362019-8699f780-f703-11e8-93e1-d02df6572923.png">

Closes #23205 from pgandhi999/SPARK-26253.

Authored-by: pgandhi <pgandhi@oath.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../resources/org/apache/spark/ui/static/stagepage.js  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 4c83ec7e95ab..564467487e84 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -221,7 +221,10 @@ function createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTable) {
             "searching": false,
             "order": [[0, "asc"]],
             "bSort": false,
-            "bAutoWidth": false
+            "bAutoWidth": false,
+            "oLanguage": {
+                "sEmptyTable": "No tasks have reported metrics yet"
+            }
         };
         taskSummaryMetricsDataTable = $(taskMetricsTable).DataTable(taskConf);
     }
@@ -426,7 +429,10 @@ $(document).ready(function () {
                         }
                     ],
                     "order": [[0, "asc"]],
-                    "bAutoWidth": false
+                    "bAutoWidth": false,
+                    "oLanguage": {
+                        "sEmptyTable": "No data to show yet"
+                    }
                 }
                 var executorSummaryTableSelector =
                     $("#summary-executor-table").DataTable(executorSummaryConf);

From 8534d753ecb21ea64ffbaefb5eaca38ba0464c6d Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <me@daoyuan.wang>
Date: Mon, 3 Dec 2018 23:54:26 +0800
Subject: [PATCH 0054/1072] [SPARK-26181][SQL] the `hasMinMaxStats` method of
 `ColumnStatsMap` is not correct

## What changes were proposed in this pull request?

For now the `hasMinMaxStats` will return the same as `hasCountStats`, which is obviously not as expected.

## How was this patch tested?

Existing tests.

Closes #23152 from adrian-wang/minmaxstats.

Authored-by: Daoyuan Wang <me@daoyuan.wang>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../statsEstimation/FilterEstimation.scala    | 14 +++++++---
 .../FilterEstimationSuite.scala               | 27 +++++++++++++++++++
 .../sql/hive/execution/SQLQuerySuite.scala    | 14 ++++++++++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
index 5a3eeefaedb1..2c5beef43f52 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
@@ -670,6 +670,14 @@ case class FilterEstimation(plan: Filter) extends Logging {
         logDebug("[CBO] No range comparison statistics for String/Binary type " + attrLeft)
         return None
       case _ =>
+        if (!colStatsMap.hasMinMaxStats(attrLeft)) {
+          logDebug("[CBO] No min/max statistics for " + attrLeft)
+          return None
+        }
+        if (!colStatsMap.hasMinMaxStats(attrRight)) {
+          logDebug("[CBO] No min/max statistics for " + attrRight)
+          return None
+        }
     }
 
     val colStatLeft = colStatsMap(attrLeft)
@@ -879,13 +887,13 @@ case class ColumnStatsMap(originalMap: AttributeMap[ColumnStat]) {
   }
 
   def hasCountStats(a: Attribute): Boolean =
-    get(a).map(_.hasCountStats).getOrElse(false)
+    get(a).exists(_.hasCountStats)
 
   def hasDistinctCount(a: Attribute): Boolean =
-    get(a).map(_.distinctCount.isDefined).getOrElse(false)
+    get(a).exists(_.distinctCount.isDefined)
 
   def hasMinMaxStats(a: Attribute): Boolean =
-    get(a).map(_.hasCountStats).getOrElse(false)
+    get(a).exists(_.hasMinMaxStats)
 
   /**
    * Gets column stat for the given attribute. Prefer the column stat in updatedMap than that in
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
index 47bfa6256958..b0a47e783512 100755
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.LeftOuter
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.{ColumnStatsMap, FilterEstimation}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
@@ -821,6 +822,32 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
       expectedRowCount = 3)
   }
 
+  test("ColumnStatsMap tests") {
+    val attrNoDistinct = AttributeReference("att_without_distinct", IntegerType)()
+    val attrNoCount = AttributeReference("att_without_count", BooleanType)()
+    val attrNoMinMax = AttributeReference("att_without_min_max", DateType)()
+    val colStatNoDistinct = ColumnStat(distinctCount = None, min = Some(1), max = Some(10),
+      nullCount = Some(0), avgLen = Some(4), maxLen = Some(4))
+    val colStatNoCount = ColumnStat(distinctCount = Some(2), min = Some(false), max = Some(true),
+      nullCount = None, avgLen = Some(1), maxLen = Some(1))
+    val colStatNoMinMax = ColumnStat(distinctCount = Some(1), min = None, max = None,
+      nullCount = Some(1), avgLen = None, maxLen = None)
+    val columnStatsMap = ColumnStatsMap(AttributeMap(Seq(
+      attrNoDistinct -> colStatNoDistinct,
+      attrNoCount -> colStatNoCount,
+      attrNoMinMax -> colStatNoMinMax
+    )))
+    assert(!columnStatsMap.hasDistinctCount(attrNoDistinct))
+    assert(columnStatsMap.hasDistinctCount(attrNoCount))
+    assert(columnStatsMap.hasDistinctCount(attrNoMinMax))
+    assert(!columnStatsMap.hasCountStats(attrNoDistinct))
+    assert(!columnStatsMap.hasCountStats(attrNoCount))
+    assert(columnStatsMap.hasCountStats(attrNoMinMax))
+    assert(columnStatsMap.hasMinMaxStats(attrNoDistinct))
+    assert(columnStatsMap.hasMinMaxStats(attrNoCount))
+    assert(!columnStatsMap.hasMinMaxStats(attrNoMinMax))
+  }
+
   private def childStatsTestPlan(outList: Seq[Attribute], tableRowCount: BigInt): StatsTestPlan = {
     StatsTestPlan(
       outputList = outList,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index dfcde8cc0d39..fab2a27cdef1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2276,4 +2276,18 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
+
+  test("SPARK-26181 hasMinMaxStats method of ColumnStatsMap is not correct") {
+    withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {
+      withTable("all_null") {
+        sql("create table all_null (attr1 int, attr2 int)")
+        sql("insert into all_null values (null, null)")
+        sql("analyze table all_null compute statistics for columns attr1, attr2")
+        // check if the stats can be calculated without Cast exception.
+        sql("select * from all_null where attr1 < 1").queryExecution.stringWithStats
+        sql("select * from all_null where attr1 < attr2").queryExecution.stringWithStats
+      }
+    }
+  }
+
 }

From 6e4e70fe7bc3e103b8538748511261bb43cf3548 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 3 Dec 2018 10:02:15 -0600
Subject: [PATCH 0055/1072] [SPARK-26235][CORE] Change log level for
 ClassNotFoundException/NoClassDefFoundError in SparkSubmit to Error

## What changes were proposed in this pull request?

In my local setup, I set log4j root category as ERROR (https://stackoverflow.com/questions/27781187/how-to-stop-info-messages-displaying-on-spark-console , first item show up if we google search "set spark log level".) When I run such command
```
spark-submit --class foo bar.jar
```
Nothing shows up, and the script exits.

After quick investigation, I think the log level for ClassNotFoundException/NoClassDefFoundError in SparkSubmit should be ERROR instead of WARN. Since the whole process exit because of the exception/error.

Before https://github.com/apache/spark/pull/20925, the message is not controlled by `log4j.rootCategory`.

## How was this patch tested?

Manual check.

Closes #23189 from gengliangwang/changeLogLevel.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../main/scala/org/apache/spark/deploy/SparkSubmit.scala  | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 324f6f8894d3..d4055cb6c585 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -813,14 +813,14 @@ private[spark] class SparkSubmit extends Logging {
       mainClass = Utils.classForName(childMainClass)
     } catch {
       case e: ClassNotFoundException =>
-        logWarning(s"Failed to load $childMainClass.", e)
+        logError(s"Failed to load class $childMainClass.")
         if (childMainClass.contains("thriftserver")) {
           logInfo(s"Failed to load main class $childMainClass.")
           logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
         }
         throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
       case e: NoClassDefFoundError =>
-        logWarning(s"Failed to load $childMainClass: ${e.getMessage()}")
+        logError(s"Failed to load $childMainClass: ${e.getMessage()}")
         if (e.getMessage.contains("org/apache/hadoop/hive")) {
           logInfo(s"Failed to load hive class.")
           logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
@@ -915,6 +915,8 @@ object SparkSubmit extends CommandLineUtils with Logging {
           override protected def logInfo(msg: => String): Unit = self.logInfo(msg)
 
           override protected def logWarning(msg: => String): Unit = self.logWarning(msg)
+
+          override protected def logError(msg: => String): Unit = self.logError(msg)
         }
       }
 
@@ -922,6 +924,8 @@ object SparkSubmit extends CommandLineUtils with Logging {
 
       override protected def logWarning(msg: => String): Unit = printMessage(s"Warning: $msg")
 
+      override protected def logError(msg: => String): Unit = printMessage(s"Error: $msg")
+
       override def doSubmit(args: Array[String]): Unit = {
         try {
           super.doSubmit(args)

From 5e5b9f2ee0b4d8470197b404906fbd245c28f8ac Mon Sep 17 00:00:00 2001
From: cody koeninger <cody@koeninger.org>
Date: Mon, 3 Dec 2018 10:03:51 -0600
Subject: [PATCH 0056/1072] [SPARK-26177] Config change followup to []
 Automated formatting for Scala code

Let's keep this open for a while to see if other configuration tweaks are suggested

## What changes were proposed in this pull request?

Formatting configuration changes following up
https://github.com/apache/spark/pull/23148

## How was this patch tested?

./dev/scalafmt

Closes #23182 from koeninger/scalafmt-config.

Authored-by: cody koeninger <cody@koeninger.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/.scalafmt.conf | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dev/.scalafmt.conf b/dev/.scalafmt.conf
index def67e026982..9a8813e3b3ee 100644
--- a/dev/.scalafmt.conf
+++ b/dev/.scalafmt.conf
@@ -19,6 +19,10 @@ align = none
 align.openParenDefnSite = false
 align.openParenCallSite = false
 align.tokens = []
+optIn = {
+  configStyleArguments = false
+}
+danglingParentheses = false
 docstrings = JavaDoc
 maxColumn = 98
 

From 04046e5432acb1132fa567f2230723bc1a92a482 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Tue, 4 Dec 2018 00:05:15 +0800
Subject: [PATCH 0057/1072] [SPARK-25498][SQL] InterpretedMutableProjection
 should handle UnsafeRow

## What changes were proposed in this pull request?
Since `AggregationIterator` uses `MutableProjection` for `UnsafeRow`, `InterpretedMutableProjection` needs to handle `UnsafeRow` as buffer internally for fixed-length types only.

## How was this patch tested?
Run 'SQLQueryTestSuite' with the interpreted mode.

Closes #22512 from maropu/InterpreterTest.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/InternalRow.scala      | 22 +++++
 .../InterpretedMutableProjection.scala        | 23 +++++-
 .../expressions/ExpressionEvalHelper.scala    | 11 +++
 .../expressions/MutableProjectionSuite.scala  | 81 +++++++++++++++++++
 .../expressions/UnsafeRowConverterSuite.scala | 15 +---
 .../sql-tests/inputs/change-column.sql        |  1 +
 .../test/resources/sql-tests/inputs/udaf.sql  |  3 +
 .../sql-tests/results/change-column.sql.out   | 10 ++-
 .../resources/sql-tests/results/udaf.sql.out  | 18 ++++-
 .../apache/spark/sql/SQLQueryTestSuite.scala  | 27 ++++++-
 10 files changed, 192 insertions(+), 19 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index e49c10be6be4..bdab407688a6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -157,4 +157,26 @@ object InternalRow {
       getValueNullSafe
     }
   }
+
+  /**
+   * Returns a writer for an `InternalRow` with given data type.
+   */
+  def getWriter(ordinal: Int, dt: DataType): (InternalRow, Any) => Unit = dt match {
+    case BooleanType => (input, v) => input.setBoolean(ordinal, v.asInstanceOf[Boolean])
+    case ByteType => (input, v) => input.setByte(ordinal, v.asInstanceOf[Byte])
+    case ShortType => (input, v) => input.setShort(ordinal, v.asInstanceOf[Short])
+    case IntegerType | DateType => (input, v) => input.setInt(ordinal, v.asInstanceOf[Int])
+    case LongType | TimestampType => (input, v) => input.setLong(ordinal, v.asInstanceOf[Long])
+    case FloatType => (input, v) => input.setFloat(ordinal, v.asInstanceOf[Float])
+    case DoubleType => (input, v) => input.setDouble(ordinal, v.asInstanceOf[Double])
+    case DecimalType.Fixed(precision, _) =>
+      (input, v) => input.setDecimal(ordinal, v.asInstanceOf[Decimal], precision)
+    case udt: UserDefinedType[_] => getWriter(ordinal, udt.sqlType)
+    case NullType => (input, _) => input.setNullAt(ordinal)
+    case StringType => (input, v) => input.update(ordinal, v.asInstanceOf[UTF8String].copy())
+    case _: StructType => (input, v) => input.update(ordinal, v.asInstanceOf[InternalRow].copy())
+    case _: ArrayType => (input, v) => input.update(ordinal, v.asInstanceOf[ArrayData].copy())
+    case _: MapType => (input, v) => input.update(ordinal, v.asInstanceOf[MapData].copy())
+    case _ => (input, v) => input.update(ordinal, v)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
index 0654108cea28..122a564da61b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
@@ -49,10 +49,31 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
   def currentValue: InternalRow = mutableRow
 
   override def target(row: InternalRow): MutableProjection = {
+    // If `mutableRow` is `UnsafeRow`, `MutableProjection` accepts fixed-length types only
+    require(!row.isInstanceOf[UnsafeRow] ||
+      validExprs.forall { case (e, _) => UnsafeRow.isFixedLength(e.dataType) },
+      "MutableProjection cannot use UnsafeRow for output data types: " +
+        validExprs.map(_._1.dataType).filterNot(UnsafeRow.isFixedLength)
+          .map(_.catalogString).mkString(", "))
     mutableRow = row
     this
   }
 
+  private[this] val fieldWriters: Array[Any => Unit] = validExprs.map { case (e, i) =>
+    val writer = InternalRow.getWriter(i, e.dataType)
+    if (!e.nullable) {
+      (v: Any) => writer(mutableRow, v)
+    } else {
+      (v: Any) => {
+        if (v == null) {
+          mutableRow.setNullAt(i)
+        } else {
+          writer(mutableRow, v)
+        }
+      }
+    }
+  }.toArray
+
   override def apply(input: InternalRow): InternalRow = {
     var i = 0
     while (i < validExprs.length) {
@@ -64,7 +85,7 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
     i = 0
     while (i < validExprs.length) {
       val (_, ordinal) = validExprs(i)
-      mutableRow(ordinal) = buffer(ordinal)
+      fieldWriters(i)(buffer(ordinal))
       i += 1
     }
     mutableRow
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index eb33325d0b31..a7282e1b1cad 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -456,4 +456,15 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa
       diff < eps * math.min(absX, absY)
     }
   }
+
+  def testBothCodegenAndInterpreted(name: String)(f: => Unit): Unit = {
+    val modes = Seq(CodegenObjectFactoryMode.CODEGEN_ONLY, CodegenObjectFactoryMode.NO_CODEGEN)
+    for (fallbackMode <- modes) {
+      test(s"$name with $fallbackMode") {
+        withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) {
+          f
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
new file mode 100644
index 000000000000..2db1c3b98819
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+
+class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+  val fixedLengthTypes = Array[DataType](
+    BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType,
+    DateType, TimestampType)
+
+  val variableLengthTypes = Array(
+    StringType, DecimalType.defaultConcreteType, CalendarIntervalType, BinaryType,
+    ArrayType(StringType), MapType(IntegerType, StringType),
+    StructType.fromDDL("a INT, b STRING"), ObjectType(classOf[java.lang.Integer]))
+
+  def createMutableProjection(dataTypes: Array[DataType]): MutableProjection = {
+    MutableProjection.create(dataTypes.zipWithIndex.map(x => BoundReference(x._2, x._1, true)))
+  }
+
+  testBothCodegenAndInterpreted("fixed-length types") {
+    val inputRow = InternalRow.fromSeq(Seq(true, 3.toByte, 15.toShort, -83, 129L, 1.0f, 5.0, 1, 2L))
+    val proj = createMutableProjection(fixedLengthTypes)
+    assert(proj(inputRow) === inputRow)
+  }
+
+  testBothCodegenAndInterpreted("unsafe buffer") {
+    val inputRow = InternalRow.fromSeq(Seq(false, 1.toByte, 9.toShort, -18, 53L, 3.2f, 7.8, 4, 9L))
+    val numBytes = UnsafeRow.calculateBitSetWidthInBytes(fixedLengthTypes.length)
+    val unsafeBuffer = UnsafeRow.createFromByteArray(numBytes, fixedLengthTypes.length)
+    val proj = createMutableProjection(fixedLengthTypes)
+    val projUnsafeRow = proj.target(unsafeBuffer)(inputRow)
+    assert(FromUnsafeProjection.apply(fixedLengthTypes)(projUnsafeRow) === inputRow)
+  }
+
+  testBothCodegenAndInterpreted("variable-length types") {
+    val proj = createMutableProjection(variableLengthTypes)
+    val scalaValues = Seq("abc", BigDecimal(10), CalendarInterval.fromString("interval 1 day"),
+      Array[Byte](1, 2), Array("123", "456"), Map(1 -> "a", 2 -> "b"), Row(1, "a"),
+      new java.lang.Integer(5))
+    val inputRow = InternalRow.fromSeq(scalaValues.zip(variableLengthTypes).map {
+      case (v, dataType) => CatalystTypeConverters.createToCatalystConverter(dataType)(v)
+    })
+    val projRow = proj(inputRow)
+    variableLengthTypes.zipWithIndex.foreach { case (dataType, index) =>
+      val toScala = CatalystTypeConverters.createToScalaConverter(dataType)
+      assert(toScala(projRow.get(index, dataType)) === toScala(inputRow.get(index, dataType)))
+    }
+  }
+
+  test("unsupported types for unsafe buffer") {
+    withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+      val proj = createMutableProjection(Array(StringType))
+      val errMsg = intercept[IllegalArgumentException] {
+        proj.target(new UnsafeRow(1))
+      }.getMessage
+      assert(errMsg.contains("MutableProjection cannot use UnsafeRow for output data types:"))
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
index 5a646d9a850a..268372b5d050 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
@@ -26,26 +26,15 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.PlanTestBase
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, LongType, _}
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types.UTF8String
 
-class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestBase {
+class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestBase
+    with ExpressionEvalHelper {
 
   private def roundedSize(size: Int) = ByteArrayMethods.roundNumberOfBytesToNearestWord(size)
 
-  private def testBothCodegenAndInterpreted(name: String)(f: => Unit): Unit = {
-    val modes = Seq(CodegenObjectFactoryMode.CODEGEN_ONLY, CodegenObjectFactoryMode.NO_CODEGEN)
-    for (fallbackMode <- modes) {
-      test(s"$name with $fallbackMode") {
-        withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) {
-          f
-        }
-      }
-    }
-  }
-
   testBothCodegenAndInterpreted("basic conversion with only primitive types") {
     val factory = UnsafeProjection
     val fieldTypes: Array[DataType] = Array(LongType, LongType, IntegerType)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/change-column.sql b/sql/core/src/test/resources/sql-tests/inputs/change-column.sql
index 2909024e4c9f..6f5ac221ce79 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/change-column.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/change-column.sql
@@ -54,3 +54,4 @@ ALTER TABLE partition_table CHANGE COLUMN c c INT COMMENT 'this is column C';
 -- DROP TEST TABLE
 DROP TABLE test_change;
 DROP TABLE partition_table;
+DROP VIEW global_temp.global_temp_view;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf.sql
index 2183ba23afc3..58613a1325df 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udaf.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf.sql
@@ -11,3 +11,6 @@ SELECT default.myDoubleAvg(int_col1, 3) as my_avg from t1;
 CREATE FUNCTION udaf1 AS 'test.non.existent.udaf';
 
 SELECT default.udaf1(int_col1) as udaf1 from t1;
+
+DROP FUNCTION myDoubleAvg;
+DROP FUNCTION udaf1;
diff --git a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
index ff1ecbcc44c2..114617873af4 100644
--- a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 33
+-- Number of queries: 34
 
 
 -- !query 0
@@ -313,3 +313,11 @@ DROP TABLE partition_table
 struct<>
 -- !query 32 output
 
+
+
+-- !query 33
+DROP VIEW global_temp.global_temp_view
+-- !query 33 schema
+struct<>
+-- !query 33 output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out
index 87824ab81cdf..f4455bb71757 100644
--- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
+-- Number of queries: 8
 
 
 -- !query 0
@@ -52,3 +52,19 @@ struct<>
 -- !query 5 output
 org.apache.spark.sql.AnalysisException
 Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 7
+
+
+-- !query 6
+DROP FUNCTION myDoubleAvg
+-- !query 6 schema
+struct<>
+-- !query 6 output
+
+
+
+-- !query 7
+DROP FUNCTION udaf1
+-- !query 7 schema
+struct<>
+-- !query 7 output
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 6ca3ac596e5f..fd180ce2380a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -22,11 +22,13 @@ import java.util.{Locale, TimeZone}
 
 import scala.util.control.NonFatal
 
+import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
 import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeTableCommand}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.StructType
 
@@ -140,6 +142,12 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
     val input = fileToString(new File(testCase.inputFile))
 
     val (comments, code) = input.split("\n").partition(_.startsWith("--"))
+
+    // Runs all the tests on both codegen-only and interpreter modes
+    val codegenConfigSets = Array(CODEGEN_ONLY, NO_CODEGEN).map {
+      case codegenFactoryMode =>
+        Array(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactoryMode.toString)
+    }
     val configSets = {
       val configLines = comments.filter(_.startsWith("--SET")).map(_.substring(5))
       val configs = configLines.map(_.split(",").map { confAndValue =>
@@ -148,12 +156,25 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
       })
       // When we are regenerating the golden files we don't need to run all the configs as they
       // all need to return the same result
-      if (regenerateGoldenFiles && configs.nonEmpty) {
-        configs.take(1)
+      if (regenerateGoldenFiles) {
+        if (configs.nonEmpty) {
+          configs.take(1)
+        } else {
+          Array.empty[Array[(String, String)]]
+        }
       } else {
-        configs
+        if (configs.nonEmpty) {
+          codegenConfigSets.flatMap { codegenConfig =>
+            configs.map { config =>
+              config ++ codegenConfig
+            }
+          }
+        } else {
+          codegenConfigSets
+        }
       }
     }
+
     // List of SQL queries to run
     // note: this is not a robust way to split queries using semicolon, but works for now.
     val queries = code.mkString("\n").split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq

From 0c2935b01def8a5f631851999d9c2d57b63763e6 Mon Sep 17 00:00:00 2001
From: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Date: Mon, 3 Dec 2018 09:02:47 -0800
Subject: [PATCH 0058/1072] [SPARK-25515][K8S] Adds a config option to keep
 executor pods for debugging

## What changes were proposed in this pull request?
Keeps K8s executor resources present if case of failure or normal termination.
Introduces a new boolean config option: `spark.kubernetes.deleteExecutors`, with default value set to true.
The idea is to update Spark K8s backend structures but leave the resources around.
The assumption is that since entries are not removed from the `removedExecutorsCache` we are immune to updates that refer to the the executor resources previously removed.
The only delete operation not touched is the one in the `doKillExecutors` method.
Reason is right now we dont support [blacklisting](https://issues.apache.org/jira/browse/SPARK-23485) and dynamic allocation with Spark on K8s. In both cases in the future we might want to handle these scenarios although its more complicated.
More tests can be added if approach is approved.
## How was this patch tested?
Manually by running a Spark job and verifying pods are not deleted.

Closes #23136 from skonto/keep_pods.

Authored-by: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Signed-off-by: Yinan Li <ynli@google.com>
---
 docs/running-on-kubernetes.md                     |  7 +++++++
 .../org/apache/spark/deploy/k8s/Config.scala      |  7 +++++++
 .../cluster/k8s/ExecutorPodsAllocator.scala       | 15 ++++++++++-----
 .../k8s/ExecutorPodsLifecycleManager.scala        |  8 ++++++--
 .../k8s/KubernetesClusterSchedulerBackend.scala   | 15 ++++++++++-----
 .../k8s/ExecutorPodsLifecycleManagerSuite.scala   | 14 +++++++++++++-
 6 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 5639253d52f5..3172b1bca8f0 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -944,6 +944,13 @@ specific to Spark on Kubernetes.
    <code>spark.kubernetes.executor.podTemplateFile=/path/to/executor-pod-template.yaml`</code>
   </td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.executor.deleteOnTermination</code></td>
+  <td>true</td>
+  <td>
+  Specify whether executor pods should be deleted in case of failure or normal termination.
+  </td>
+</tr>
 </table>
 
 #### Pod template properties
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 1abf2901268f..e8bf16df190e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -282,6 +282,13 @@ private[spark] object Config extends Logging {
 
   val KUBERNETES_NODE_SELECTOR_PREFIX = "spark.kubernetes.node.selector."
 
+  val KUBERNETES_DELETE_EXECUTORS =
+    ConfigBuilder("spark.kubernetes.executor.deleteOnTermination")
+      .doc("If set to false then executor pods will not be deleted in case " +
+        "of failure or normal termination.")
+      .booleanConf
+      .createWithDefault(true)
+
   val KUBERNETES_DRIVER_LABEL_PREFIX = "spark.kubernetes.driver.label."
   val KUBERNETES_DRIVER_ANNOTATION_PREFIX = "spark.kubernetes.driver.annotation."
   val KUBERNETES_DRIVER_SECRETS_PREFIX = "spark.kubernetes.driver.secrets."
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 77bb9c3fcc9f..ef4cbdf162c6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -51,6 +51,8 @@ private[spark] class ExecutorPodsAllocator(
   private val kubernetesDriverPodName = conf
     .get(KUBERNETES_DRIVER_POD_NAME)
 
+  private val shouldDeleteExecutors = conf.get(KUBERNETES_DELETE_EXECUTORS)
+
   private val driverPod = kubernetesDriverPodName
     .map(name => Option(kubernetesClient.pods()
       .withName(name)
@@ -86,11 +88,14 @@ private[spark] class ExecutorPodsAllocator(
           s" cluster after $podCreationTimeout milliseconds despite the fact that a" +
           " previous allocation attempt tried to create it. The executor may have been" +
           " deleted but the application missed the deletion event.")
-        Utils.tryLogNonFatalError {
-          kubernetesClient
-            .pods()
-            .withLabel(SPARK_EXECUTOR_ID_LABEL, execId.toString)
-            .delete()
+
+        if (shouldDeleteExecutors) {
+          Utils.tryLogNonFatalError {
+            kubernetesClient
+              .pods()
+              .withLabel(SPARK_EXECUTOR_ID_LABEL, execId.toString)
+              .delete()
+          }
         }
         newlyCreatedExecutors -= execId
       } else {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
index 77a1d6cfae3b..95e1ba8362a0 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
@@ -30,7 +30,7 @@ import org.apache.spark.scheduler.ExecutorExited
 import org.apache.spark.util.Utils
 
 private[spark] class ExecutorPodsLifecycleManager(
-    conf: SparkConf,
+    val conf: SparkConf,
     kubernetesClient: KubernetesClient,
     snapshotsStore: ExecutorPodsSnapshotsStore,
     // Use a best-effort to track which executors have been removed already. It's not generally
@@ -43,6 +43,8 @@ private[spark] class ExecutorPodsLifecycleManager(
 
   private val eventProcessingInterval = conf.get(KUBERNETES_EXECUTOR_EVENT_PROCESSING_INTERVAL)
 
+  private lazy val shouldDeleteExecutors = conf.get(KUBERNETES_DELETE_EXECUTORS)
+
   def start(schedulerBackend: KubernetesClusterSchedulerBackend): Unit = {
     snapshotsStore.addSubscriber(eventProcessingInterval) {
       onNewSnapshots(schedulerBackend, _)
@@ -112,7 +114,9 @@ private[spark] class ExecutorPodsLifecycleManager(
       schedulerBackend: KubernetesClusterSchedulerBackend,
       execIdsRemovedInRound: mutable.Set[Long]): Unit = {
     removeExecutorFromSpark(schedulerBackend, podState, execId)
-    removeExecutorFromK8s(podState.pod)
+    if (shouldDeleteExecutors) {
+      removeExecutorFromK8s(podState.pod)
+    }
     execIdsRemovedInRound += execId
   }
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index fa6dc2c479bb..6356b5864580 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.ExecutorService
 import io.fabric8.kubernetes.client.KubernetesClient
 import scala.concurrent.{ExecutionContext, Future}
 
+import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.rpc.{RpcAddress, RpcEnv}
 import org.apache.spark.scheduler.{ExecutorLossReason, TaskSchedulerImpl}
@@ -51,6 +52,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
   private val initialExecutors = SchedulerBackendUtils.getInitialTargetExecutorNumber(conf)
 
+  private val shouldDeleteExecutors = conf.get(KUBERNETES_DELETE_EXECUTORS)
+
   // Allow removeExecutor to be accessible by ExecutorPodsLifecycleEventHandler
   private[k8s] def doRemoveExecutor(executorId: String, reason: ExecutorLossReason): Unit = {
     removeExecutor(executorId, reason)
@@ -82,11 +85,13 @@ private[spark] class KubernetesClusterSchedulerBackend(
       pollEvents.stop()
     }
 
-    Utils.tryLogNonFatalError {
-      kubernetesClient.pods()
-        .withLabel(SPARK_APP_ID_LABEL, applicationId())
-        .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
-        .delete()
+    if (shouldDeleteExecutors) {
+      Utils.tryLogNonFatalError {
+        kubernetesClient.pods()
+          .withLabel(SPARK_APP_ID_LABEL, applicationId())
+          .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
+          .delete()
+      }
     }
 
     Utils.tryLogNonFatalError {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
index 3995b2afe7c4..7411f8f9d69e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.client.KubernetesClient
 import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.Matchers.any
-import org.mockito.Mockito.{mock, times, verify, when}
+import org.mockito.Mockito.{mock, never, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
@@ -30,6 +30,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.Config
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
 import org.apache.spark.deploy.k8s.KubernetesUtils._
 import org.apache.spark.scheduler.ExecutorExited
@@ -100,6 +101,17 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
     verify(schedulerBackend).doRemoveExecutor("1", expectedLossReason)
   }
 
+  test("Keep executor pods in k8s if configured.") {
+    val failedPod = failedExecutorWithoutDeletion(1)
+    eventHandlerUnderTest.conf.set(Config.KUBERNETES_DELETE_EXECUTORS, false)
+    snapshotsStore.updatePod(failedPod)
+    snapshotsStore.notifySubscribers()
+    val msg = exitReasonMessage(1, failedPod)
+    val expectedLossReason = ExecutorExited(1, exitCausedByApp = true, msg)
+    verify(schedulerBackend).doRemoveExecutor("1", expectedLossReason)
+    verify(podOperations, never()).delete()
+  }
+
   private def exitReasonMessage(failedExecutorId: Int, failedPod: Pod): String = {
     val reason = Option(failedPod.getStatus.getReason)
     val message = Option(failedPod.getStatus.getMessage)

From 187bb7d008872e812aaa6590c89121bfa50e97d3 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 3 Dec 2018 13:54:09 -0800
Subject: [PATCH 0059/1072] [SPARK-25957][FOLLOWUP] Build python docker image
 in sbt build too.

docker-image-tool.sh requires explicit argument to create the python
image now; do that from the sbt integration tests target too.

Closes #23172 from vanzin/SPARK-25957.followup.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 project/SparkBuild.scala | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index bb834bc483f1..a0946a9ad665 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -494,7 +494,13 @@ object KubernetesIntegrationTests {
     dockerBuild := {
       if (shouldBuildImage) {
         val dockerTool = s"$sparkHome/bin/docker-image-tool.sh"
-        val cmd = Seq(dockerTool, "-m", "-t", imageTag.value, "build")
+        val bindingsDir = s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings"
+        val cmd = Seq(dockerTool, "-m",
+          "-t", imageTag.value,
+          "-p", s"$bindingsDir/python/Dockerfile",
+          "-R", s"$bindingsDir/R/Dockerfile",
+          "build"
+        )
         val ec = Process(cmd).!
         if (ec != 0) {
           throw new IllegalStateException(s"Process '${cmd.mkString(" ")}' exited with $ec.")

From 518a3d10c87bb6d7d442eba7265fc026aa54473e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 3 Dec 2018 14:03:10 -0800
Subject: [PATCH 0060/1072] [SPARK-26033][SPARK-26034][PYTHON][FOLLOW-UP] Small
 cleanup and deduplication in ml/mllib tests

## What changes were proposed in this pull request?

This PR is a small follow up that puts some logic and functions into smaller scope and make it localized, and deduplicate.

## How was this patch tested?

Manually tested. Jenkins tests as well.

Closes #23200 from HyukjinKwon/followup-SPARK-26034-SPARK-26033.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 python/pyspark/ml/tests/test_linalg.py        | 44 +++++++------
 python/pyspark/mllib/tests/test_algorithms.py |  8 +--
 python/pyspark/mllib/tests/test_linalg.py     | 62 ++++++++-----------
 python/pyspark/testing/mllibutils.py          |  5 --
 4 files changed, 51 insertions(+), 68 deletions(-)

diff --git a/python/pyspark/ml/tests/test_linalg.py b/python/pyspark/ml/tests/test_linalg.py
index 71cad5d7f5ad..995bc35e4ca8 100644
--- a/python/pyspark/ml/tests/test_linalg.py
+++ b/python/pyspark/ml/tests/test_linalg.py
@@ -20,25 +20,17 @@
 
 from numpy import arange, array, array_equal, inf, ones, tile, zeros
 
+from pyspark.serializers import PickleSerializer
 from pyspark.ml.linalg import DenseMatrix, DenseVector, MatrixUDT, SparseMatrix, SparseVector, \
     Vector, VectorUDT, Vectors
-from pyspark.testing.mllibutils import make_serializer, MLlibTestCase
+from pyspark.testing.mllibutils import MLlibTestCase
 from pyspark.sql import Row
 
 
-ser = make_serializer()
-
-
-def _squared_distance(a, b):
-    if isinstance(a, Vector):
-        return a.squared_distance(b)
-    else:
-        return b.squared_distance(a)
-
-
 class VectorTests(MLlibTestCase):
 
     def _test_serialize(self, v):
+        ser = PickleSerializer()
         self.assertEqual(v, ser.loads(ser.dumps(v)))
         jvec = self.sc._jvm.org.apache.spark.ml.python.MLSerDe.loads(bytearray(ser.dumps(v)))
         nv = ser.loads(bytes(self.sc._jvm.org.apache.spark.ml.python.MLSerDe.dumps(jvec)))
@@ -77,24 +69,30 @@ def test_dot(self):
         self.assertEqual(7.0, sv.dot(arr))
 
     def test_squared_distance(self):
+        def squared_distance(a, b):
+            if isinstance(a, Vector):
+                return a.squared_distance(b)
+            else:
+                return b.squared_distance(a)
+
         sv = SparseVector(4, {1: 1, 3: 2})
         dv = DenseVector(array([1., 2., 3., 4.]))
         lst = DenseVector([4, 3, 2, 1])
         lst1 = [4, 3, 2, 1]
         arr = pyarray.array('d', [0, 2, 1, 3])
         narr = array([0, 2, 1, 3])
-        self.assertEqual(15.0, _squared_distance(sv, dv))
-        self.assertEqual(25.0, _squared_distance(sv, lst))
-        self.assertEqual(20.0, _squared_distance(dv, lst))
-        self.assertEqual(15.0, _squared_distance(dv, sv))
-        self.assertEqual(25.0, _squared_distance(lst, sv))
-        self.assertEqual(20.0, _squared_distance(lst, dv))
-        self.assertEqual(0.0, _squared_distance(sv, sv))
-        self.assertEqual(0.0, _squared_distance(dv, dv))
-        self.assertEqual(0.0, _squared_distance(lst, lst))
-        self.assertEqual(25.0, _squared_distance(sv, lst1))
-        self.assertEqual(3.0, _squared_distance(sv, arr))
-        self.assertEqual(3.0, _squared_distance(sv, narr))
+        self.assertEqual(15.0, squared_distance(sv, dv))
+        self.assertEqual(25.0, squared_distance(sv, lst))
+        self.assertEqual(20.0, squared_distance(dv, lst))
+        self.assertEqual(15.0, squared_distance(dv, sv))
+        self.assertEqual(25.0, squared_distance(lst, sv))
+        self.assertEqual(20.0, squared_distance(lst, dv))
+        self.assertEqual(0.0, squared_distance(sv, sv))
+        self.assertEqual(0.0, squared_distance(dv, dv))
+        self.assertEqual(0.0, squared_distance(lst, lst))
+        self.assertEqual(25.0, squared_distance(sv, lst1))
+        self.assertEqual(3.0, squared_distance(sv, arr))
+        self.assertEqual(3.0, squared_distance(sv, narr))
 
     def test_hash(self):
         v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
diff --git a/python/pyspark/mllib/tests/test_algorithms.py b/python/pyspark/mllib/tests/test_algorithms.py
index cc3b64b1cb28..21a2d64087bc 100644
--- a/python/pyspark/mllib/tests/test_algorithms.py
+++ b/python/pyspark/mllib/tests/test_algorithms.py
@@ -26,10 +26,8 @@
 from pyspark.mllib.fpm import FPGrowth
 from pyspark.mllib.recommendation import Rating
 from pyspark.mllib.regression import LabeledPoint
-from pyspark.testing.mllibutils import make_serializer, MLlibTestCase
-
-
-ser = make_serializer()
+from pyspark.serializers import PickleSerializer
+from pyspark.testing.mllibutils import MLlibTestCase
 
 
 class ListTests(MLlibTestCase):
@@ -265,6 +263,7 @@ def test_regression(self):
 class ALSTests(MLlibTestCase):
 
     def test_als_ratings_serialize(self):
+        ser = PickleSerializer()
         r = Rating(7, 1123, 3.14)
         jr = self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads(bytearray(ser.dumps(r)))
         nr = ser.loads(bytes(self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(jr)))
@@ -273,6 +272,7 @@ def test_als_ratings_serialize(self):
         self.assertAlmostEqual(r.rating, nr.rating, 2)
 
     def test_als_ratings_id_long_error(self):
+        ser = PickleSerializer()
         r = Rating(1205640308657491975, 50233468418, 1.0)
         # rating user id exceeds max int value, should fail when pickled
         self.assertRaises(Py4JJavaError, self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads,
diff --git a/python/pyspark/mllib/tests/test_linalg.py b/python/pyspark/mllib/tests/test_linalg.py
index d0ebd9bc3db7..f26e28d1744d 100644
--- a/python/pyspark/mllib/tests/test_linalg.py
+++ b/python/pyspark/mllib/tests/test_linalg.py
@@ -22,33 +22,18 @@
 from numpy import array, array_equal, zeros, arange, tile, ones, inf
 
 import pyspark.ml.linalg as newlinalg
+from pyspark.serializers import PickleSerializer
 from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector, \
     DenseMatrix, SparseMatrix, Vectors, Matrices, MatrixUDT
 from pyspark.mllib.regression import LabeledPoint
-from pyspark.testing.mllibutils import make_serializer, MLlibTestCase
-
-_have_scipy = False
-try:
-    import scipy.sparse
-    _have_scipy = True
-except:
-    # No SciPy, but that's okay, we'll skip those tests
-    pass
-
-
-ser = make_serializer()
-
-
-def _squared_distance(a, b):
-    if isinstance(a, Vector):
-        return a.squared_distance(b)
-    else:
-        return b.squared_distance(a)
+from pyspark.testing.mllibutils import MLlibTestCase
+from pyspark.testing.utils import have_scipy
 
 
 class VectorTests(MLlibTestCase):
 
     def _test_serialize(self, v):
+        ser = PickleSerializer()
         self.assertEqual(v, ser.loads(ser.dumps(v)))
         jvec = self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads(bytearray(ser.dumps(v)))
         nv = ser.loads(bytes(self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(jvec)))
@@ -87,24 +72,30 @@ def test_dot(self):
         self.assertEqual(7.0, sv.dot(arr))
 
     def test_squared_distance(self):
+        def squared_distance(a, b):
+            if isinstance(a, Vector):
+                return a.squared_distance(b)
+            else:
+                return b.squared_distance(a)
+
         sv = SparseVector(4, {1: 1, 3: 2})
         dv = DenseVector(array([1., 2., 3., 4.]))
         lst = DenseVector([4, 3, 2, 1])
         lst1 = [4, 3, 2, 1]
         arr = pyarray.array('d', [0, 2, 1, 3])
         narr = array([0, 2, 1, 3])
-        self.assertEqual(15.0, _squared_distance(sv, dv))
-        self.assertEqual(25.0, _squared_distance(sv, lst))
-        self.assertEqual(20.0, _squared_distance(dv, lst))
-        self.assertEqual(15.0, _squared_distance(dv, sv))
-        self.assertEqual(25.0, _squared_distance(lst, sv))
-        self.assertEqual(20.0, _squared_distance(lst, dv))
-        self.assertEqual(0.0, _squared_distance(sv, sv))
-        self.assertEqual(0.0, _squared_distance(dv, dv))
-        self.assertEqual(0.0, _squared_distance(lst, lst))
-        self.assertEqual(25.0, _squared_distance(sv, lst1))
-        self.assertEqual(3.0, _squared_distance(sv, arr))
-        self.assertEqual(3.0, _squared_distance(sv, narr))
+        self.assertEqual(15.0, squared_distance(sv, dv))
+        self.assertEqual(25.0, squared_distance(sv, lst))
+        self.assertEqual(20.0, squared_distance(dv, lst))
+        self.assertEqual(15.0, squared_distance(dv, sv))
+        self.assertEqual(25.0, squared_distance(lst, sv))
+        self.assertEqual(20.0, squared_distance(lst, dv))
+        self.assertEqual(0.0, squared_distance(sv, sv))
+        self.assertEqual(0.0, squared_distance(dv, dv))
+        self.assertEqual(0.0, squared_distance(lst, lst))
+        self.assertEqual(25.0, squared_distance(sv, lst1))
+        self.assertEqual(3.0, squared_distance(sv, arr))
+        self.assertEqual(3.0, squared_distance(sv, narr))
 
     def test_hash(self):
         v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
@@ -466,7 +457,7 @@ def test_infer_schema(self):
                 raise ValueError("Expected a matrix but got type %r" % type(m))
 
 
-@unittest.skipIf(not _have_scipy, "SciPy not installed")
+@unittest.skipIf(not have_scipy, "SciPy not installed")
 class SciPyTests(MLlibTestCase):
 
     """
@@ -476,6 +467,8 @@ class SciPyTests(MLlibTestCase):
 
     def test_serialize(self):
         from scipy.sparse import lil_matrix
+
+        ser = PickleSerializer()
         lil = lil_matrix((4, 1))
         lil[1, 0] = 1
         lil[3, 0] = 2
@@ -621,13 +614,10 @@ def test_regression(self):
 
 if __name__ == "__main__":
     from pyspark.mllib.tests.test_linalg import *
-    if not _have_scipy:
-        print("NOTE: Skipping SciPy tests as it does not seem to be installed")
+
     try:
         import xmlrunner
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
     except ImportError:
         testRunner = None
     unittest.main(testRunner=testRunner, verbosity=2)
-    if not _have_scipy:
-        print("NOTE: SciPy tests were skipped as it does not seem to be installed")
diff --git a/python/pyspark/testing/mllibutils.py b/python/pyspark/testing/mllibutils.py
index 25f1bba8d37a..c09fb50482e4 100644
--- a/python/pyspark/testing/mllibutils.py
+++ b/python/pyspark/testing/mllibutils.py
@@ -18,14 +18,9 @@
 import unittest
 
 from pyspark import SparkContext
-from pyspark.serializers import PickleSerializer
 from pyspark.sql import SparkSession
 
 
-def make_serializer():
-    return PickleSerializer()
-
-
 class MLlibTestCase(unittest.TestCase):
     def setUp(self):
         self.sc = SparkContext('local[4]', "MLlib tests")

From a24e1a126c55fc06f5867c0e5e5b0ee71201e018 Mon Sep 17 00:00:00 2001
From: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Date: Mon, 3 Dec 2018 14:57:18 -0800
Subject: [PATCH 0061/1072] [SPARK-26256][K8S] Fix labels for pod deletion

## What changes were proposed in this pull request?
Adds proper labels when deleting executor pods.

## How was this patch tested?
Manually with tests.

Closes #23209 from skonto/fix-deletion-labels.

Authored-by: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scheduler/cluster/k8s/ExecutorPodsAllocator.scala  |  2 ++
 .../cluster/k8s/ExecutorPodsAllocatorSuite.scala       | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index ef4cbdf162c6..2f0f949566d6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -93,6 +93,8 @@ private[spark] class ExecutorPodsAllocator(
           Utils.tryLogNonFatalError {
             kubernetesClient
               .pods()
+              .withLabel(SPARK_APP_ID_LABEL, applicationId)
+              .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
               .withLabel(SPARK_EXECUTOR_ID_LABEL, execId.toString)
               .delete()
           }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index ddf9f67a0727..303e24b8f497 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -138,7 +138,15 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.notifySubscribers()
     snapshotsStore.replaceSnapshot(Seq.empty[Pod])
     waitForExecutorPodsClock.setTime(podCreationTimeout + 1)
-    when(podOperations.withLabel(SPARK_EXECUTOR_ID_LABEL, "1")).thenReturn(labeledPods)
+    when(podOperations
+      .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+      .thenReturn(podOperations)
+    when(podOperations
+      withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabel(SPARK_EXECUTOR_ID_LABEL, "1"))
+      .thenReturn(labeledPods)
     snapshotsStore.notifySubscribers()
     verify(labeledPods).delete()
     verify(podOperations).create(podWithAttachedContainerForId(2))

From 0889fbaf959e25ebb79e691692a02a93962727d0 Mon Sep 17 00:00:00 2001
From: Qi Shao <qi.shao.nyu@gmail.com>
Date: Mon, 3 Dec 2018 15:36:41 -0800
Subject: [PATCH 0062/1072] [SPARK-26083][K8S] Add Copy pyspark into
 corresponding dir cmd in pyspark Dockerfile

When I try to run `./bin/pyspark` cmd in a pod in Kubernetes(image built without change from pyspark Dockerfile), I'm getting an error:
```
$SPARK_HOME/bin/pyspark --deploy-mode client --master k8s://https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT_HTTPS ...
Python 2.7.15 (default, Aug 22 2018, 13:24:18)
[GCC 6.4.0] on linux2
Type "help", "copyright", "credits" or "license" for more information.
Could not open PYTHONSTARTUP
IOError: [Errno 2] No such file or directory: '/opt/spark/python/pyspark/shell.py'
```
This is because `pyspark` folder doesn't exist under `/opt/spark/python/`

## What changes were proposed in this pull request?

Added `COPY python/pyspark ${SPARK_HOME}/python/pyspark` to pyspark Dockerfile to resolve issue above.

## How was this patch tested?
Google Kubernetes Engine

Closes #23037 from AzureQ/master.

Authored-by: Qi Shao <qi.shao.nyu@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 bin/docker-image-tool.sh                                         | 1 +
 .../docker/src/main/dockerfiles/spark/bindings/python/Dockerfile | 1 +
 2 files changed, 2 insertions(+)

diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index fbf9c9e448fd..4f66137eb1c7 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -107,6 +107,7 @@ function create_dev_build_context {(
     "$PYSPARK_CTX/kubernetes/dockerfiles"
   mkdir "$PYSPARK_CTX/python"
   cp -r "python/lib" "$PYSPARK_CTX/python/lib"
+  cp -r "python/pyspark" "$PYSPARK_CTX/python/pyspark"
 
   local R_CTX="$CTX_DIR/sparkr"
   mkdir -p "$R_CTX/kubernetes"
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
index de1a0617b1cc..36b91eb9a3aa 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -38,6 +38,7 @@ RUN apk add --no-cache python && \
     # Removed the .cache to save space
     rm -r /root/.cache
 
+COPY python/pyspark ${SPARK_HOME}/python/pyspark
 COPY python/lib ${SPARK_HOME}/python/lib
 ENV PYTHONPATH ${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-*.zip
 

From f7af4a1965b1052d3c77505ab1b660a294757bed Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Tue, 4 Dec 2018 12:14:38 +0800
Subject: [PATCH 0063/1072] [SPARK-25498][SQL][FOLLOW-UP] Return an empty
 config set when regenerating the golden files

## What changes were proposed in this pull request?
This pr is to return an empty config set when regenerating the golden files in `SQLQueryTestSuite`.
This is the follow-up of  #22512.

## How was this patch tested?
N/A

Closes #23212 from maropu/SPARK-25498-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala    | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index fd180ce2380a..cf4585bf7ac6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -154,14 +154,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
         val (conf, value) = confAndValue.span(_ != '=')
         conf.trim -> value.substring(1).trim
       })
-      // When we are regenerating the golden files we don't need to run all the configs as they
+      // When we are regenerating the golden files, we don't need to set any config as they
       // all need to return the same result
       if (regenerateGoldenFiles) {
-        if (configs.nonEmpty) {
-          configs.take(1)
-        } else {
-          Array.empty[Array[(String, String)]]
-        }
+        Array.empty[Array[(String, String)]]
       } else {
         if (configs.nonEmpty) {
           codegenConfigSets.flatMap { codegenConfig =>

From b4dea313c45042e4094d14ebdeb8ad27be4cc695 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Mon, 3 Dec 2018 23:00:02 -0800
Subject: [PATCH 0064/1072] [SPARK-25573] Combine resolveExpression and resolve
 in the Analyzer

## What changes were proposed in this pull request?
Currently in the Analyzer, we have two methods 1) Resolve 2)ResolveExpressions that are called at different code paths to resolve attributes, column ordinal and extract value expressions. ~~In this PR, we combine the two into one method to make sure, there is only one method that is tasked with resolving the attributes.~~
Update the description of the methods and use better names to make it easier to know when to make use of one method vs the other.

## How was this patch tested?
Existing tests.

Closes #22899 from dilipbiswal/SPARK-25573-final.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 97 +++++++++++++------
 1 file changed, 66 insertions(+), 31 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b977fa07db5c..777053168a05 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -883,21 +883,38 @@ class Analyzer(
       }
     }
 
-    private def resolve(e: Expression, q: LogicalPlan): Expression = e match {
-      case f: LambdaFunction if !f.bound => f
-      case u @ UnresolvedAttribute(nameParts) =>
-        // Leave unchanged if resolution fails. Hopefully will be resolved next round.
-        val result =
-          withPosition(u) {
-            q.resolveChildren(nameParts, resolver)
-              .orElse(resolveLiteralFunction(nameParts, u, q))
-              .getOrElse(u)
-          }
-        logDebug(s"Resolving $u to $result")
-        result
-      case UnresolvedExtractValue(child, fieldExpr) if child.resolved =>
-        ExtractValue(child, fieldExpr, resolver)
-      case _ => e.mapChildren(resolve(_, q))
+    /**
+     * Resolves the attribute and extract value expressions(s) by traversing the
+     * input expression in top down manner. The traversal is done in top-down manner as
+     * we need to skip over unbound lamda function expression. The lamda expressions are
+     * resolved in a different rule [[ResolveLambdaVariables]]
+     *
+     * Example :
+     * SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
+     *
+     * In the case above, x and i are resolved as lamda variables in [[ResolveLambdaVariables]]
+     *
+     * Note : In this routine, the unresolved attributes are resolved from the input plan's
+     * children attributes.
+     */
+    private def resolveExpressionTopDown(e: Expression, q: LogicalPlan): Expression = {
+      if (e.resolved) return e
+      e match {
+        case f: LambdaFunction if !f.bound => f
+        case u @ UnresolvedAttribute(nameParts) =>
+          // Leave unchanged if resolution fails. Hopefully will be resolved next round.
+          val result =
+            withPosition(u) {
+              q.resolveChildren(nameParts, resolver)
+                .orElse(resolveLiteralFunction(nameParts, u, q))
+                .getOrElse(u)
+            }
+          logDebug(s"Resolving $u to $result")
+          result
+        case UnresolvedExtractValue(child, fieldExpr) if child.resolved =>
+          ExtractValue(child, fieldExpr, resolver)
+        case _ => e.mapChildren(resolveExpressionTopDown(_, q))
+      }
     }
 
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
@@ -936,7 +953,7 @@ class Analyzer(
       // we still have chance to resolve it based on its descendants
       case s @ Sort(ordering, global, child) if child.resolved && !s.resolved =>
         val newOrdering =
-          ordering.map(order => resolveExpression(order, child).asInstanceOf[SortOrder])
+          ordering.map(order => resolveExpressionBottomUp(order, child).asInstanceOf[SortOrder])
         Sort(newOrdering, global, child)
 
       // A special case for Generate, because the output of Generate should not be resolved by
@@ -944,7 +961,7 @@ class Analyzer(
       case g @ Generate(generator, _, _, _, _, _) if generator.resolved => g
 
       case g @ Generate(generator, join, outer, qualifier, output, child) =>
-        val newG = resolveExpression(generator, child, throws = true)
+        val newG = resolveExpressionBottomUp(generator, child, throws = true)
         if (newG.fastEquals(generator)) {
           g
         } else {
@@ -959,11 +976,11 @@ class Analyzer(
       // `AppendColumns`, because `AppendColumns`'s serializer might produce conflict attribute
       // names leading to ambiguous references exception.
       case a @ Aggregate(groupingExprs, aggExprs, appendColumns: AppendColumns) =>
-        a.mapExpressions(resolve(_, appendColumns))
+        a.mapExpressions(resolveExpressionTopDown(_, appendColumns))
 
       case q: LogicalPlan =>
         logTrace(s"Attempting to resolve ${q.simpleString}")
-        q.mapExpressions(resolve(_, q))
+        q.mapExpressions(resolveExpressionTopDown(_, q))
     }
 
     def newAliases(expressions: Seq[NamedExpression]): Seq[NamedExpression] = {
@@ -1060,7 +1077,22 @@ class Analyzer(
     func.map(wrapper)
   }
 
-  protected[sql] def resolveExpression(
+  /**
+   * Resolves the attribute, column value and extract value expressions(s) by traversing the
+   * input expression in bottom-up manner. In order to resolve the nested complex type fields
+   * correctly, this function makes use of `throws` parameter to control when to raise an
+   * AnalysisException.
+   *
+   * Example :
+   * SELECT a.b FROM t ORDER BY b[0].d
+   *
+   * In the above example, in b needs to be resolved before d can be resolved. Given we are
+   * doing a bottom up traversal, it will first attempt to resolve d and fail as b has not
+   * been resolved yet. If `throws` is false, this function will handle the exception by
+   * returning the original attribute. In this case `d` will be resolved in subsequent passes
+   * after `b` is resolved.
+   */
+  protected[sql] def resolveExpressionBottomUp(
       expr: Expression,
       plan: LogicalPlan,
       throws: Boolean = false): Expression = {
@@ -1073,11 +1105,14 @@ class Analyzer(
       expr transformUp {
         case GetColumnByOrdinal(ordinal, _) => plan.output(ordinal)
         case u @ UnresolvedAttribute(nameParts) =>
-          withPosition(u) {
-            plan.resolve(nameParts, resolver)
-              .orElse(resolveLiteralFunction(nameParts, u, plan))
-              .getOrElse(u)
-          }
+          val result =
+            withPosition(u) {
+              plan.resolve(nameParts, resolver)
+                .orElse(resolveLiteralFunction(nameParts, u, plan))
+                .getOrElse(u)
+            }
+          logDebug(s"Resolving $u to $result")
+          result
         case UnresolvedExtractValue(child, fieldName) if child.resolved =>
           ExtractValue(child, fieldName, resolver)
       }
@@ -1223,7 +1258,7 @@ class Analyzer(
         plan match {
           case p: Project =>
             // Resolving expressions against current plan.
-            val maybeResolvedExprs = exprs.map(resolveExpression(_, p))
+            val maybeResolvedExprs = exprs.map(resolveExpressionBottomUp(_, p))
             // Recursively resolving expressions on the child of current plan.
             val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, p.child)
             // If some attributes used by expressions are resolvable only on the rewritten child
@@ -1232,7 +1267,7 @@ class Analyzer(
             (newExprs, Project(p.projectList ++ missingAttrs, newChild))
 
           case a @ Aggregate(groupExprs, aggExprs, child) =>
-            val maybeResolvedExprs = exprs.map(resolveExpression(_, a))
+            val maybeResolvedExprs = exprs.map(resolveExpressionBottomUp(_, a))
             val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, child)
             val missingAttrs = (AttributeSet(newExprs) -- a.outputSet).intersect(newChild.outputSet)
             if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) {
@@ -1244,20 +1279,20 @@ class Analyzer(
             }
 
           case g: Generate =>
-            val maybeResolvedExprs = exprs.map(resolveExpression(_, g))
+            val maybeResolvedExprs = exprs.map(resolveExpressionBottomUp(_, g))
             val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, g.child)
             (newExprs, g.copy(unrequiredChildIndex = Nil, child = newChild))
 
           // For `Distinct` and `SubqueryAlias`, we can't recursively resolve and add attributes
           // via its children.
           case u: UnaryNode if !u.isInstanceOf[Distinct] && !u.isInstanceOf[SubqueryAlias] =>
-            val maybeResolvedExprs = exprs.map(resolveExpression(_, u))
+            val maybeResolvedExprs = exprs.map(resolveExpressionBottomUp(_, u))
             val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, u.child)
             (newExprs, u.withNewChildren(Seq(newChild)))
 
           // For other operators, we can't recursively resolve and add attributes via its children.
           case other =>
-            (exprs.map(resolveExpression(_, other)), other)
+            (exprs.map(resolveExpressionBottomUp(_, other)), other)
         }
       }
     }
@@ -2387,7 +2422,7 @@ class Analyzer(
           }
 
           validateTopLevelTupleFields(deserializer, inputs)
-          val resolved = resolveExpression(
+          val resolved = resolveExpressionBottomUp(
             deserializer, LocalRelation(inputs), throws = true)
           val result = resolved transformDown {
             case UnresolvedMapObjects(func, inputData, cls) if inputData.resolved =>

From 26128484228089c74517cd15cef0bb4166a4186f Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Tue, 4 Dec 2018 20:20:29 +0800
Subject: [PATCH 0065/1072] [SPARK-25374][SQL] SafeProjection supports fallback
 to an interpreted mode

## What changes were proposed in this pull request?
In SPARK-23711, we have implemented the expression fallback logic to an interpreted mode. So, this pr fixed code to support the same fallback mode in `SafeProjection` based on `CodeGeneratorWithInterpretedFallback`.

## How was this patch tested?
Add tests in `CodeGeneratorWithInterpretedFallbackSuite` and `UnsafeRowConverterSuite`.

Closes #22468 from maropu/SPARK-25374-3.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/encoders/ExpressionEncoder.scala |   2 +-
 .../InterpretedSafeProjection.scala           | 125 ++++++++++++++++++
 .../sql/catalyst/expressions/Projection.scala |  34 +++--
 .../expressions/CodeGenerationSuite.scala     |   2 +-
 ...eneratorWithInterpretedFallbackSuite.scala |  15 +++
 .../expressions/ExpressionEvalHelper.scala    |   4 +-
 .../expressions/MutableProjectionSuite.scala  |   2 +-
 .../expressions/UnsafeRowConverterSuite.scala |  89 ++++++++++++-
 .../DeclarativeAggregateEvaluator.scala       |  11 +-
 .../codegen/GeneratedProjectionSuite.scala    |   8 +-
 .../util/ArrayDataIndexedSeqSuite.scala       |   4 +-
 .../org/apache/spark/sql/types/TestUDT.scala  |  61 +++++++++
 .../spark/sql/FileBasedDataSourceSuite.scala  |   4 +-
 .../spark/sql/UserDefinedTypeSuite.scala      | 105 +++++----------
 .../datasources/json/JsonSuite.scala          |   4 +-
 .../datasources/orc/OrcQuerySuite.scala       |   4 +-
 .../execution/AggregationQuerySuite.scala     |   2 +-
 .../execution/ObjectHashAggregateSuite.scala  |   4 +-
 .../sql/sources/HadoopFsRelationTest.scala    |   2 +-
 19 files changed, 371 insertions(+), 111 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 589e215c55e4..fbf0bd68b958 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -302,7 +302,7 @@ case class ExpressionEncoder[T](
   private lazy val inputRow = new GenericInternalRow(1)
 
   @transient
-  private lazy val constructProjection = GenerateSafeProjection.generate(deserializer :: Nil)
+  private lazy val constructProjection = SafeProjection.create(deserializer :: Nil)
 
   /**
    * Returns a new set (with unique ids) of [[NamedExpression]] that represent the serialized form
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
new file mode 100644
index 000000000000..70789dac1d87
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData}
+import org.apache.spark.sql.types._
+
+
+/**
+ * An interpreted version of a safe projection.
+ *
+ * @param expressions that produces the resulting fields. These expressions must be bound
+ *                    to a schema.
+ */
+class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection {
+
+  private[this] val mutableRow = new SpecificInternalRow(expressions.map(_.dataType))
+
+  private[this] val exprsWithWriters = expressions.zipWithIndex.filter {
+    case (NoOp, _) => false
+    case _ => true
+  }.map { case (e, i) =>
+    val converter = generateSafeValueConverter(e.dataType)
+    val writer = InternalRow.getWriter(i, e.dataType)
+    val f = if (!e.nullable) {
+      (v: Any) => writer(mutableRow, converter(v))
+    } else {
+      (v: Any) => {
+        if (v == null) {
+          mutableRow.setNullAt(i)
+        } else {
+          writer(mutableRow, converter(v))
+        }
+      }
+    }
+    (e, f)
+  }
+
+  private def generateSafeValueConverter(dt: DataType): Any => Any = dt match {
+    case ArrayType(elemType, _) =>
+      val elementConverter = generateSafeValueConverter(elemType)
+      v => {
+        val arrayValue = v.asInstanceOf[ArrayData]
+        val result = new Array[Any](arrayValue.numElements())
+        arrayValue.foreach(elemType, (i, e) => {
+          result(i) = elementConverter(e)
+        })
+        new GenericArrayData(result)
+      }
+
+    case st: StructType =>
+      val fieldTypes = st.fields.map(_.dataType)
+      val fieldConverters = fieldTypes.map(generateSafeValueConverter)
+      v => {
+        val row = v.asInstanceOf[InternalRow]
+        val ar = new Array[Any](row.numFields)
+        var idx = 0
+        while (idx < row.numFields) {
+          ar(idx) = fieldConverters(idx)(row.get(idx, fieldTypes(idx)))
+          idx += 1
+        }
+        new GenericInternalRow(ar)
+      }
+
+    case MapType(keyType, valueType, _) =>
+      lazy val keyConverter = generateSafeValueConverter(keyType)
+      lazy val valueConverter = generateSafeValueConverter(valueType)
+      v => {
+        val mapValue = v.asInstanceOf[MapData]
+        val keys = mapValue.keyArray().toArray[Any](keyType)
+        val values = mapValue.valueArray().toArray[Any](valueType)
+        val convertedKeys = keys.map(keyConverter)
+        val convertedValues = values.map(valueConverter)
+        ArrayBasedMapData(convertedKeys, convertedValues)
+      }
+
+    case udt: UserDefinedType[_] =>
+      generateSafeValueConverter(udt.sqlType)
+
+    case _ => identity
+  }
+
+  override def apply(row: InternalRow): InternalRow = {
+    var i = 0
+    while (i < exprsWithWriters.length) {
+      val (expr, writer) = exprsWithWriters(i)
+      writer(expr.eval(row))
+      i += 1
+    }
+    mutableRow
+  }
+}
+
+/**
+ * Helper functions for creating an [[InterpretedSafeProjection]].
+ */
+object InterpretedSafeProjection {
+
+  /**
+   * Returns an [[SafeProjection]] for given sequence of bound Expressions.
+   */
+  def createProjection(exprs: Seq[Expression]): Projection = {
+    // We need to make sure that we do not reuse stateful expressions.
+    val cleanedExpressions = exprs.map(_.transform {
+      case s: Stateful => s.freshCopy()
+    })
+    new InterpretedSafeProjection(cleanedExpressions)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index 792646cf9f10..b48f7ba655b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -169,26 +169,40 @@ object UnsafeProjection
 /**
  * A projection that could turn UnsafeRow into GenericInternalRow
  */
-object FromUnsafeProjection {
+object SafeProjection extends CodeGeneratorWithInterpretedFallback[Seq[Expression], Projection] {
+
+  override protected def createCodeGeneratedObject(in: Seq[Expression]): Projection = {
+    GenerateSafeProjection.generate(in)
+  }
+
+  override protected def createInterpretedObject(in: Seq[Expression]): Projection = {
+    InterpretedSafeProjection.createProjection(in)
+  }
 
   /**
-   * Returns a Projection for given StructType.
+   * Returns a SafeProjection for given StructType.
    */
-  def apply(schema: StructType): Projection = {
-    apply(schema.fields.map(_.dataType))
+  def create(schema: StructType): Projection = create(schema.fields.map(_.dataType))
+
+  /**
+   * Returns a SafeProjection for given Array of DataTypes.
+   */
+  def create(fields: Array[DataType]): Projection = {
+    createObject(fields.zipWithIndex.map(x => new BoundReference(x._2, x._1, true)))
   }
 
   /**
-   * Returns an UnsafeProjection for given Array of DataTypes.
+   * Returns a SafeProjection for given sequence of Expressions (bounded).
    */
-  def apply(fields: Seq[DataType]): Projection = {
-    create(fields.zipWithIndex.map(x => new BoundReference(x._2, x._1, true)))
+  def create(exprs: Seq[Expression]): Projection = {
+    createObject(exprs)
   }
 
   /**
-   * Returns a Projection for given sequence of Expressions (bounded).
+   * Returns a SafeProjection for given sequence of Expressions, which will be bound to
+   * `inputSchema`.
    */
-  private def create(exprs: Seq[Expression]): Projection = {
-    GenerateSafeProjection.generate(exprs)
+  def create(exprs: Seq[Expression], inputSchema: Seq[Attribute]): Projection = {
+    create(toBoundExprs(exprs, inputSchema))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 7843003a4aac..7e6fe5b4e206 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -251,7 +251,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
       UTF8String.fromString("c"))
     assert(unsafeRow.getStruct(3, 1).getStruct(0, 2).getInt(1) === 3)
 
-    val fromUnsafe = FromUnsafeProjection(schema)
+    val fromUnsafe = SafeProjection.create(schema)
     val internalRow2 = fromUnsafe(unsafeRow)
     assert(internalRow === internalRow2)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallbackSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallbackSuite.scala
index 6ea3b05ff9c1..da5bddb0c09f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallbackSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallbackSuite.scala
@@ -106,4 +106,19 @@ class CodeGeneratorWithInterpretedFallbackSuite extends SparkFunSuite with PlanT
       assert(proj(input).toSeq(StructType.fromDDL("c0 int, c1 int")) === expected)
     }
   }
+
+  test("SPARK-25374 Correctly handles NoOp in SafeProjection") {
+    val exprs = Seq(Add(BoundReference(0, IntegerType, nullable = true), Literal.create(1)), NoOp)
+    val input = InternalRow.fromSeq(1 :: 1 :: Nil)
+    val expected = 2 :: null :: Nil
+    withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenOnly) {
+      val proj = SafeProjection.createObject(exprs)
+      assert(proj(input).toSeq(StructType.fromDDL("c0 int, c1 int")) === expected)
+    }
+
+    withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> noCodegen) {
+      val proj = SafeProjection.createObject(exprs)
+      assert(proj(input).toSeq(StructType.fromDDL("c0 int, c1 int")) === expected)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index a7282e1b1cad..b4fd170467d8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -321,8 +321,8 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa
       GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil),
       expression)
     plan.initialize(0)
-    actual = FromUnsafeProjection(expression.dataType :: Nil)(
-      plan(inputRow)).get(0, expression.dataType)
+    val ref = new BoundReference(0, expression.dataType, nullable = true)
+    actual = GenerateSafeProjection.generate(ref :: Nil)(plan(inputRow)).get(0, expression.dataType)
     assert(checkResult(actual, expected, expression))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
index 2db1c3b98819..0d594eb10962 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
@@ -51,7 +51,7 @@ class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
     val unsafeBuffer = UnsafeRow.createFromByteArray(numBytes, fixedLengthTypes.length)
     val proj = createMutableProjection(fixedLengthTypes)
     val projUnsafeRow = proj.target(unsafeBuffer)(inputRow)
-    assert(FromUnsafeProjection.apply(fixedLengthTypes)(projUnsafeRow) === inputRow)
+    assert(SafeProjection.create(fixedLengthTypes)(projUnsafeRow) === inputRow)
   }
 
   testBothCodegenAndInterpreted("variable-length types") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
index 268372b5d050..ecb8047459b0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTestBase
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.types.{IntegerType, LongType, _}
 import org.apache.spark.unsafe.array.ByteArrayMethods
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestBase
     with ExpressionEvalHelper {
@@ -535,4 +535,91 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB
     assert(unsafeRow.getSizeInBytes ==
       8 + 8 * 2 + roundedSize(field1.getSizeInBytes) + roundedSize(field2.getSizeInBytes))
   }
+
+  testBothCodegenAndInterpreted("SPARK-25374 converts back into safe representation") {
+    def convertBackToInternalRow(inputRow: InternalRow, fields: Array[DataType]): InternalRow = {
+      val unsafeProj = UnsafeProjection.create(fields)
+      val unsafeRow = unsafeProj(inputRow)
+      val safeProj = SafeProjection.create(fields)
+      safeProj(unsafeRow)
+    }
+
+    // Simple tests
+    val inputRow = InternalRow.fromSeq(Seq(
+      false, 3.toByte, 15.toShort, -83, 129L, 1.0f, 8.0, UTF8String.fromString("test"),
+      Decimal(255), CalendarInterval.fromString("interval 1 day"), Array[Byte](1, 2)
+    ))
+    val fields1 = Array(
+      BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType,
+      DoubleType, StringType, DecimalType.defaultConcreteType, CalendarIntervalType,
+      BinaryType)
+
+    assert(convertBackToInternalRow(inputRow, fields1) === inputRow)
+
+    // Array tests
+    val arrayRow = InternalRow.fromSeq(Seq(
+      createArray(1, 2, 3),
+      createArray(
+        createArray(Seq("a", "b", "c").map(UTF8String.fromString): _*),
+        createArray(Seq("d").map(UTF8String.fromString): _*))
+    ))
+    val fields2 = Array[DataType](
+      ArrayType(IntegerType),
+      ArrayType(ArrayType(StringType)))
+
+    assert(convertBackToInternalRow(arrayRow, fields2) === arrayRow)
+
+    // Struct tests
+    val structRow = InternalRow.fromSeq(Seq(
+      InternalRow.fromSeq(Seq[Any](1, 4.0)),
+      InternalRow.fromSeq(Seq(
+        UTF8String.fromString("test"),
+        InternalRow.fromSeq(Seq(
+          1,
+          createArray(Seq("2", "3").map(UTF8String.fromString): _*)
+        ))
+      ))
+    ))
+    val fields3 = Array[DataType](
+      StructType(
+        StructField("c0", IntegerType) ::
+        StructField("c1", DoubleType) ::
+        Nil),
+      StructType(
+        StructField("c2", StringType) ::
+        StructField("c3", StructType(
+          StructField("c4", IntegerType) ::
+          StructField("c5", ArrayType(StringType)) ::
+          Nil)) ::
+        Nil))
+
+    assert(convertBackToInternalRow(structRow, fields3) === structRow)
+
+    // Map tests
+    val mapRow = InternalRow.fromSeq(Seq(
+      createMap(Seq("k1", "k2").map(UTF8String.fromString): _*)(1, 2),
+      createMap(
+        createMap(3, 5)(Seq("v1", "v2").map(UTF8String.fromString): _*),
+        createMap(7, 9)(Seq("v3", "v4").map(UTF8String.fromString): _*)
+      )(
+        createMap(Seq("k3", "k4").map(UTF8String.fromString): _*)(3.toShort, 4.toShort),
+        createMap(Seq("k5", "k6").map(UTF8String.fromString): _*)(5.toShort, 6.toShort)
+      )))
+    val fields4 = Array[DataType](
+      MapType(StringType, IntegerType),
+      MapType(MapType(IntegerType, StringType), MapType(StringType, ShortType)))
+
+    val mapResultRow = convertBackToInternalRow(mapRow, fields4)
+    val mapExpectedRow = mapRow
+    checkResult(mapExpectedRow, mapResultRow,
+      exprDataType = StructType(fields4.zipWithIndex.map(f => StructField(s"c${f._2}", f._1))),
+      exprNullable = false)
+
+    // UDT tests
+    val vector = new TestUDT.MyDenseVector(Array(1.0, 3.0, 5.0, 7.0, 9.0))
+    val udt = new TestUDT.MyDenseVectorUDT()
+    val udtRow = InternalRow.fromSeq(Seq(udt.serialize(vector)))
+    val fields5 = Array[DataType](udt)
+    assert(convertBackToInternalRow(udtRow, fields5) === udtRow)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
index 614f24db0aaf..b0f55b3b5c44 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
@@ -17,25 +17,24 @@
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, JoinedRow}
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateSafeProjection
+import org.apache.spark.sql.catalyst.expressions.{Attribute, JoinedRow, SafeProjection}
 
 /**
  * Evaluator for a [[DeclarativeAggregate]].
  */
 case class DeclarativeAggregateEvaluator(function: DeclarativeAggregate, input: Seq[Attribute]) {
 
-  lazy val initializer = GenerateSafeProjection.generate(function.initialValues)
+  lazy val initializer = SafeProjection.create(function.initialValues)
 
-  lazy val updater = GenerateSafeProjection.generate(
+  lazy val updater = SafeProjection.create(
     function.updateExpressions,
     function.aggBufferAttributes ++ input)
 
-  lazy val merger = GenerateSafeProjection.generate(
+  lazy val merger = SafeProjection.create(
     function.mergeExpressions,
     function.aggBufferAttributes ++ function.inputAggBufferAttributes)
 
-  lazy val evaluator = GenerateSafeProjection.generate(
+  lazy val evaluator = SafeProjection.create(
     function.evaluateExpression :: Nil,
     function.aggBufferAttributes)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 2c45b3b0c73d..4c9bcfe8f93a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -58,7 +58,7 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     }
 
     // test generated SafeProjection
-    val safeProj = FromUnsafeProjection(nestedSchema)
+    val safeProj = SafeProjection.create(nestedSchema)
     val result = safeProj(unsafe)
     // Can't compare GenericInternalRow with JoinedRow directly
     (0 until N).foreach { i =>
@@ -109,7 +109,7 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     }
 
     // test generated SafeProjection
-    val safeProj = FromUnsafeProjection(nestedSchema)
+    val safeProj = SafeProjection.create(nestedSchema)
     val result = safeProj(unsafe)
     // Can't compare GenericInternalRow with JoinedRow directly
     (0 until N).foreach { i =>
@@ -147,7 +147,7 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     assert(unsafeRow.getArray(1).getBinary(1) === null)
     assert(java.util.Arrays.equals(unsafeRow.getArray(1).getBinary(2), Array[Byte](3, 4)))
 
-    val safeProj = FromUnsafeProjection(fields)
+    val safeProj = SafeProjection.create(fields)
     val row2 = safeProj(unsafeRow)
     assert(row2 === row)
   }
@@ -233,7 +233,7 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val nestedSchema = StructType(
       Seq(StructField("", joinedSchema), StructField("", joinedSchema)) ++ joinedSchema)
 
-    val safeProj = FromUnsafeProjection(nestedSchema)
+    val safeProj = SafeProjection.create(nestedSchema)
     val result = safeProj(nested)
 
     // test generated MutableProjection
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
index 6400898343ae..da71e3a4d53e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
@@ -22,7 +22,7 @@ import scala.util.Random
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.RandomDataGenerator
 import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
-import org.apache.spark.sql.catalyst.expressions.{FromUnsafeProjection, UnsafeArrayData, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.{SafeProjection, UnsafeArrayData, UnsafeProjection}
 import org.apache.spark.sql.types._
 
 class ArrayDataIndexedSeqSuite extends SparkFunSuite {
@@ -77,7 +77,7 @@ class ArrayDataIndexedSeqSuite extends SparkFunSuite {
       val internalRow = rowConverter.toRow(row)
 
       val unsafeRowConverter = UnsafeProjection.create(schema)
-      val safeRowConverter = FromUnsafeProjection(schema)
+      val safeRowConverter = SafeProjection.create(schema)
 
       val unsafeRow = unsafeRowConverter(internalRow)
       val safeRow = safeRowConverter(unsafeRow)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala
new file mode 100644
index 000000000000..1be8ee9dfa92
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types
+
+import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
+
+
+// Wrapped in an object to check Scala compatibility. See SPARK-13929
+object TestUDT {
+
+  @SQLUserDefinedType(udt = classOf[MyDenseVectorUDT])
+  private[sql] class MyDenseVector(val data: Array[Double]) extends Serializable {
+    override def hashCode(): Int = java.util.Arrays.hashCode(data)
+
+    override def equals(other: Any): Boolean = other match {
+      case v: MyDenseVector => java.util.Arrays.equals(this.data, v.data)
+      case _ => false
+    }
+
+    override def toString: String = data.mkString("(", ", ", ")")
+  }
+
+  private[sql] class MyDenseVectorUDT extends UserDefinedType[MyDenseVector] {
+
+    override def sqlType: DataType = ArrayType(DoubleType, containsNull = false)
+
+    override def serialize(features: MyDenseVector): ArrayData = {
+      new GenericArrayData(features.data.map(_.asInstanceOf[Any]))
+    }
+
+    override def deserialize(datum: Any): MyDenseVector = {
+      datum match {
+        case data: ArrayData =>
+          new MyDenseVector(data.toDoubleArray())
+      }
+    }
+
+    override def userClass: Class[MyDenseVector] = classOf[MyDenseVector]
+
+    private[spark] override def asNullable: MyDenseVectorUDT = this
+
+    override def hashCode(): Int = getClass.hashCode()
+
+    override def equals(other: Any): Boolean = other.isInstanceOf[MyDenseVectorUDT]
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 64b42c32b8b1..54299e9808bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -312,13 +312,13 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
       assert(msg.contains("CSV data source does not support array<int> data type"))
 
       msg = intercept[AnalysisException] {
-        Seq((1, new UDT.MyDenseVector(Array(0.25, 2.25, 4.25)))).toDF("id", "vectors")
+        Seq((1, new TestUDT.MyDenseVector(Array(0.25, 2.25, 4.25)))).toDF("id", "vectors")
           .write.mode("overwrite").csv(csvDir)
       }.getMessage
       assert(msg.contains("CSV data source does not support array<double> data type"))
 
       msg = intercept[AnalysisException] {
-        val schema = StructType(StructField("a", new UDT.MyDenseVectorUDT(), true) :: Nil)
+        val schema = StructType(StructField("a", new TestUDT.MyDenseVectorUDT(), true) :: Nil)
         spark.range(1).write.mode("overwrite").csv(csvDir)
         spark.read.schema(schema).csv(csvDir).collect()
       }.getMessage
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index cf956316057e..6628d36ffc70 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -20,56 +20,14 @@ package org.apache.spark.sql
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{Cast, ExpressionEvalHelper, GenericInternalRow, Literal}
-import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
-private[sql] case class MyLabeledPoint(label: Double, features: UDT.MyDenseVector) {
+private[sql] case class MyLabeledPoint(label: Double, features: TestUDT.MyDenseVector) {
   def getLabel: Double = label
-  def getFeatures: UDT.MyDenseVector = features
-}
-
-// Wrapped in an object to check Scala compatibility. See SPARK-13929
-object UDT {
-
-  @SQLUserDefinedType(udt = classOf[MyDenseVectorUDT])
-  private[sql] class MyDenseVector(val data: Array[Double]) extends Serializable {
-    override def hashCode(): Int = java.util.Arrays.hashCode(data)
-
-    override def equals(other: Any): Boolean = other match {
-      case v: MyDenseVector => java.util.Arrays.equals(this.data, v.data)
-      case _ => false
-    }
-
-    override def toString: String = data.mkString("(", ", ", ")")
-  }
-
-  private[sql] class MyDenseVectorUDT extends UserDefinedType[MyDenseVector] {
-
-    override def sqlType: DataType = ArrayType(DoubleType, containsNull = false)
-
-    override def serialize(features: MyDenseVector): ArrayData = {
-      new GenericArrayData(features.data.map(_.asInstanceOf[Any]))
-    }
-
-    override def deserialize(datum: Any): MyDenseVector = {
-      datum match {
-        case data: ArrayData =>
-          new MyDenseVector(data.toDoubleArray())
-      }
-    }
-
-    override def userClass: Class[MyDenseVector] = classOf[MyDenseVector]
-
-    private[spark] override def asNullable: MyDenseVectorUDT = this
-
-    override def hashCode(): Int = getClass.hashCode()
-
-    override def equals(other: Any): Boolean = other.isInstanceOf[MyDenseVectorUDT]
-  }
-
+  def getFeatures: TestUDT.MyDenseVector = features
 }
 
 // object and classes to test SPARK-19311
@@ -148,12 +106,12 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
   import testImplicits._
 
   private lazy val pointsRDD = Seq(
-    MyLabeledPoint(1.0, new UDT.MyDenseVector(Array(0.1, 1.0))),
-    MyLabeledPoint(0.0, new UDT.MyDenseVector(Array(0.2, 2.0)))).toDF()
+    MyLabeledPoint(1.0, new TestUDT.MyDenseVector(Array(0.1, 1.0))),
+    MyLabeledPoint(0.0, new TestUDT.MyDenseVector(Array(0.2, 2.0)))).toDF()
 
   private lazy val pointsRDD2 = Seq(
-    MyLabeledPoint(1.0, new UDT.MyDenseVector(Array(0.1, 1.0))),
-    MyLabeledPoint(0.0, new UDT.MyDenseVector(Array(0.3, 3.0)))).toDF()
+    MyLabeledPoint(1.0, new TestUDT.MyDenseVector(Array(0.1, 1.0))),
+    MyLabeledPoint(0.0, new TestUDT.MyDenseVector(Array(0.3, 3.0)))).toDF()
 
   test("register user type: MyDenseVector for MyLabeledPoint") {
     val labels: RDD[Double] = pointsRDD.select('label).rdd.map { case Row(v: Double) => v }
@@ -162,16 +120,17 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
     assert(labelsArrays.contains(1.0))
     assert(labelsArrays.contains(0.0))
 
-    val features: RDD[UDT.MyDenseVector] =
-      pointsRDD.select('features).rdd.map { case Row(v: UDT.MyDenseVector) => v }
-    val featuresArrays: Array[UDT.MyDenseVector] = features.collect()
+    val features: RDD[TestUDT.MyDenseVector] =
+      pointsRDD.select('features).rdd.map { case Row(v: TestUDT.MyDenseVector) => v }
+    val featuresArrays: Array[TestUDT.MyDenseVector] = features.collect()
     assert(featuresArrays.size === 2)
-    assert(featuresArrays.contains(new UDT.MyDenseVector(Array(0.1, 1.0))))
-    assert(featuresArrays.contains(new UDT.MyDenseVector(Array(0.2, 2.0))))
+    assert(featuresArrays.contains(new TestUDT.MyDenseVector(Array(0.1, 1.0))))
+    assert(featuresArrays.contains(new TestUDT.MyDenseVector(Array(0.2, 2.0))))
   }
 
   test("UDTs and UDFs") {
-    spark.udf.register("testType", (d: UDT.MyDenseVector) => d.isInstanceOf[UDT.MyDenseVector])
+    spark.udf.register("testType",
+      (d: TestUDT.MyDenseVector) => d.isInstanceOf[TestUDT.MyDenseVector])
     pointsRDD.createOrReplaceTempView("points")
     checkAnswer(
       sql("SELECT testType(features) from points"),
@@ -185,8 +144,8 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
       checkAnswer(
         spark.read.parquet(path),
         Seq(
-          Row(1.0, new UDT.MyDenseVector(Array(0.1, 1.0))),
-          Row(0.0, new UDT.MyDenseVector(Array(0.2, 2.0)))))
+          Row(1.0, new TestUDT.MyDenseVector(Array(0.1, 1.0))),
+          Row(0.0, new TestUDT.MyDenseVector(Array(0.2, 2.0)))))
     }
   }
 
@@ -197,17 +156,17 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
       checkAnswer(
         spark.read.parquet(path),
         Seq(
-          Row(1.0, new UDT.MyDenseVector(Array(0.1, 1.0))),
-          Row(0.0, new UDT.MyDenseVector(Array(0.2, 2.0)))))
+          Row(1.0, new TestUDT.MyDenseVector(Array(0.1, 1.0))),
+          Row(0.0, new TestUDT.MyDenseVector(Array(0.2, 2.0)))))
     }
   }
 
   // Tests to make sure that all operators correctly convert types on the way out.
   test("Local UDTs") {
-    val vec = new UDT.MyDenseVector(Array(0.1, 1.0))
+    val vec = new TestUDT.MyDenseVector(Array(0.1, 1.0))
     val df = Seq((1, vec)).toDF("int", "vec")
-    assert(vec === df.collect()(0).getAs[UDT.MyDenseVector](1))
-    assert(vec === df.take(1)(0).getAs[UDT.MyDenseVector](1))
+    assert(vec === df.collect()(0).getAs[TestUDT.MyDenseVector](1))
+    assert(vec === df.take(1)(0).getAs[TestUDT.MyDenseVector](1))
     checkAnswer(df.limit(1).groupBy('int).agg(first('vec)), Row(1, vec))
     checkAnswer(df.orderBy('int).limit(1).groupBy('int).agg(first('vec)), Row(1, vec))
   }
@@ -219,14 +178,14 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
     )
     val schema = StructType(Seq(
       StructField("id", IntegerType, false),
-      StructField("vec", new UDT.MyDenseVectorUDT, false)
+      StructField("vec", new TestUDT.MyDenseVectorUDT, false)
     ))
 
     val jsonRDD = spark.read.schema(schema).json(data.toDS())
     checkAnswer(
       jsonRDD,
-      Row(1, new UDT.MyDenseVector(Array(1.1, 2.2, 3.3, 4.4))) ::
-        Row(2, new UDT.MyDenseVector(Array(2.25, 4.5, 8.75))) ::
+      Row(1, new TestUDT.MyDenseVector(Array(1.1, 2.2, 3.3, 4.4))) ::
+        Row(2, new TestUDT.MyDenseVector(Array(2.25, 4.5, 8.75))) ::
         Nil
     )
   }
@@ -239,25 +198,25 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
 
     val schema = StructType(Seq(
       StructField("id", IntegerType, false),
-      StructField("vec", new UDT.MyDenseVectorUDT, false)
+      StructField("vec", new TestUDT.MyDenseVectorUDT, false)
     ))
 
     val jsonDataset = spark.read.schema(schema).json(data.toDS())
-      .as[(Int, UDT.MyDenseVector)]
+      .as[(Int, TestUDT.MyDenseVector)]
     checkDataset(
       jsonDataset,
-      (1, new UDT.MyDenseVector(Array(1.1, 2.2, 3.3, 4.4))),
-      (2, new UDT.MyDenseVector(Array(2.25, 4.5, 8.75)))
+      (1, new TestUDT.MyDenseVector(Array(1.1, 2.2, 3.3, 4.4))),
+      (2, new TestUDT.MyDenseVector(Array(2.25, 4.5, 8.75)))
     )
   }
 
   test("SPARK-10472 UserDefinedType.typeName") {
     assert(IntegerType.typeName === "integer")
-    assert(new UDT.MyDenseVectorUDT().typeName === "mydensevector")
+    assert(new TestUDT.MyDenseVectorUDT().typeName === "mydensevector")
   }
 
   test("Catalyst type converter null handling for UDTs") {
-    val udt = new UDT.MyDenseVectorUDT()
+    val udt = new TestUDT.MyDenseVectorUDT()
     val toScalaConverter = CatalystTypeConverters.createToScalaConverter(udt)
     assert(toScalaConverter(null) === null)
 
@@ -303,12 +262,12 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
   test("except on UDT") {
     checkAnswer(
       pointsRDD.except(pointsRDD2),
-      Seq(Row(0.0, new UDT.MyDenseVector(Array(0.2, 2.0)))))
+      Seq(Row(0.0, new TestUDT.MyDenseVector(Array(0.2, 2.0)))))
   }
 
   test("SPARK-23054 Cast UserDefinedType to string") {
-    val udt = new UDT.MyDenseVectorUDT()
-    val vector = new UDT.MyDenseVector(Array(1.0, 3.0, 5.0, 7.0, 9.0))
+    val udt = new TestUDT.MyDenseVectorUDT()
+    val vector = new TestUDT.MyDenseVector(Array(1.0, 3.0, 5.0, 7.0, 9.0))
     val data = udt.serialize(vector)
     val ret = Cast(Literal(data, udt), StringType, None)
     checkEvaluation(ret, "(1.0, 3.0, 5.0, 7.0, 9.0)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 9d23161c1f24..dff37ca2d40f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1463,7 +1463,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         FloatType, DoubleType, DecimalType(25, 5), DecimalType(6, 5),
         DateType, TimestampType,
         ArrayType(IntegerType), MapType(StringType, LongType), struct,
-        new UDT.MyDenseVectorUDT())
+        new TestUDT.MyDenseVectorUDT())
     val fields = dataTypes.zipWithIndex.map { case (dataType, index) =>
       StructField(s"col$index", dataType, nullable = true)
     }
@@ -1487,7 +1487,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         Seq(2, 3, 4),
         Map("a string" -> 2000L),
         Row(4.75.toFloat, Seq(false, true)),
-        new UDT.MyDenseVector(Array(0.25, 2.25, 4.25)))
+        new TestUDT.MyDenseVector(Array(0.25, 2.25, 4.25)))
     val data =
       Row.fromSeq(Seq("Spark " + spark.sparkContext.version) ++ constantValues) :: Nil
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 998b7b31dcd6..918dbcdfa1cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, RecordReaderIterator}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.types.{IntegerType, StructType, TestUDT}
 import org.apache.spark.util.Utils
 
 case class AllDataTypesWithNonPrimitiveType(
@@ -103,7 +103,7 @@ abstract class OrcQueryTest extends OrcTest {
 
   test("Read/write UserDefinedType") {
     withTempPath { path =>
-      val data = Seq((1, new UDT.MyDenseVector(Array(0.25, 2.25, 4.25))))
+      val data = Seq((1, new TestUDT.MyDenseVector(Array(0.25, 2.25, 4.25))))
       val udtDF = data.toDF("id", "vectors")
       udtDF.write.orc(path.getAbsolutePath)
       val readBack = spark.read.schema(udtDF.schema).orc(path.getAbsolutePath)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index c65bf7c14c7a..cfae2d82e273 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -884,7 +884,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
       FloatType, DoubleType, DecimalType(25, 5), DecimalType(6, 5),
       DateType, TimestampType,
       ArrayType(IntegerType), MapType(StringType, LongType), struct,
-      new UDT.MyDenseVectorUDT())
+      new TestUDT.MyDenseVectorUDT())
     // Right now, we will use SortAggregate to handle UDAFs.
     // UnsafeRow.mutableFieldTypes.asScala.toSeq will trigger SortAggregate to use
     // UnsafeRow as the aggregation buffer. While, dataTypes will trigger
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
index c9309197791b..2391106cfb25 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
@@ -124,7 +124,7 @@ class ObjectHashAggregateSuite
         .add("f2", ArrayType(BooleanType), nullable = true),
 
       // UDT
-      new UDT.MyDenseVectorUDT(),
+      new TestUDT.MyDenseVectorUDT(),
 
       // Others
       StringType,
@@ -259,7 +259,7 @@ class ObjectHashAggregateSuite
       StringType, BinaryType, NullType, BooleanType
     )
 
-    val udt = new UDT.MyDenseVectorUDT()
+    val udt = new TestUDT.MyDenseVectorUDT()
 
     val fixedLengthTypes = builtinNumericTypes ++ Seq(BooleanType, NullType)
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 6bd59fde550d..6075f2c8877d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -115,7 +115,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
     new StructType()
       .add("f1", FloatType, nullable = true)
       .add("f2", ArrayType(BooleanType, containsNull = true), nullable = true),
-    new UDT.MyDenseVectorUDT()
+    new TestUDT.MyDenseVectorUDT()
   ).filter(supportsDataType)
 
   test(s"test all data types") {

From 93f5592aa8c1254a93524fda81cf0e418c22cb2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BD=AD=E7=81=BF00244106?= <00244106@zte.intra>
Date: Tue, 4 Dec 2018 22:08:16 +0900
Subject: [PATCH 0066/1072] [MINOR][SQL] Combine the same codes in test cases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

In the DDLSuit, there are four test cases have the same codes , writing a function can combine the same code.

## How was this patch tested?

existing tests.

Closes #23194 from CarolinePeng/Update_temp.

Authored-by: 彭灿00244106 <00244106@zte.intra>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/execution/command/DDLSuite.scala      | 40 ++++++++-----------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 9d32fb6d4696..052a5e757c44 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -377,41 +377,41 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("CTAS a managed table with the existing empty directory") {
-    val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1")))
+  private def withEmptyDirInTablePath(dirName: String)(f : File => Unit): Unit = {
+    val tableLoc =
+      new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier(dirName)))
     try {
       tableLoc.mkdir()
+      f(tableLoc)
+    } finally {
+      waitForTasksToFinish()
+      Utils.deleteRecursively(tableLoc)
+    }
+  }
+
+
+  test("CTAS a managed table with the existing empty directory") {
+    withEmptyDirInTablePath("tab1") { tableLoc =>
       withTable("tab1") {
         sql(s"CREATE TABLE tab1 USING ${dataSource} AS SELECT 1, 'a'")
         checkAnswer(spark.table("tab1"), Row(1, "a"))
       }
-    } finally {
-      waitForTasksToFinish()
-      Utils.deleteRecursively(tableLoc)
     }
   }
 
   test("create a managed table with the existing empty directory") {
-    val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1")))
-    try {
-      tableLoc.mkdir()
+    withEmptyDirInTablePath("tab1") { tableLoc =>
       withTable("tab1") {
         sql(s"CREATE TABLE tab1 (col1 int, col2 string) USING ${dataSource}")
         sql("INSERT INTO tab1 VALUES (1, 'a')")
         checkAnswer(spark.table("tab1"), Row(1, "a"))
       }
-    } finally {
-      waitForTasksToFinish()
-      Utils.deleteRecursively(tableLoc)
     }
   }
 
   test("create a managed table with the existing non-empty directory") {
     withTable("tab1") {
-      val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1")))
-      try {
-        // create an empty hidden file
-        tableLoc.mkdir()
+      withEmptyDirInTablePath("tab1") { tableLoc =>
         val hiddenGarbageFile = new File(tableLoc.getCanonicalPath, ".garbage")
         hiddenGarbageFile.createNewFile()
         val exMsg = "Can not create the managed table('`tab1`'). The associated location"
@@ -439,28 +439,20 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
           }.getMessage
           assert(ex.contains(exMsgWithDefaultDB))
         }
-      } finally {
-        waitForTasksToFinish()
-        Utils.deleteRecursively(tableLoc)
       }
     }
   }
 
   test("rename a managed table with existing empty directory") {
-    val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab2")))
-    try {
+    withEmptyDirInTablePath("tab2") { tableLoc =>
       withTable("tab1") {
         sql(s"CREATE TABLE tab1 USING $dataSource AS SELECT 1, 'a'")
-        tableLoc.mkdir()
         val ex = intercept[AnalysisException] {
           sql("ALTER TABLE tab1 RENAME TO tab2")
         }.getMessage
         val expectedMsg = "Can not rename the managed table('`tab1`'). The associated location"
         assert(ex.contains(expectedMsg))
       }
-    } finally {
-      waitForTasksToFinish()
-      Utils.deleteRecursively(tableLoc)
     }
   }
 

From 06a3b6aafa510ede2f1376b29a46f99447286c67 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 4 Dec 2018 07:57:58 -0600
Subject: [PATCH 0067/1072] [SPARK-24423][FOLLOW-UP][SQL] Fix error example

## What changes were proposed in this pull request?
![image](https://user-images.githubusercontent.com/5399861/49172173-42ad9800-f37b-11e8-8135-7adc323357ae.png)
It will throw:
```
requirement failed: When reading JDBC data sources, users need to specify all or none for the following options: 'partitionColumn', 'lowerBound', 'upperBound', and 'numPartitions'
```
and
```
User-defined partition column subq.c1 not found in the JDBC relation ...
```

This PR fix this error example.

## How was this patch tested?

manual tests

Closes #23170 from wangyum/SPARK-24499.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/sql-data-sources-jdbc.md                          |  6 +++---
 .../sql/execution/datasources/jdbc/JDBCOptions.scala   | 10 +++++++---
 .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala    | 10 +++++++---
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index 9a5d0fc7d424..a2b14620be12 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -64,9 +64,9 @@ the following case-insensitive options:
             Example:<br>
             <code>
                spark.read.format("jdbc")<br>
-               &nbsp&nbsp .option("dbtable", "(select c1, c2 from t1) as subq")<br>
-               &nbsp&nbsp .option("partitionColumn", "subq.c1"<br>
-               &nbsp&nbsp .load()
+                 .option("url", jdbcUrl)<br>
+                 .option("query", "select c1, c2 from t1")<br>
+                 .load()
             </code></li>
       </ol>
     </td>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 7dfbb9d8b5c0..b4469cb538fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -137,9 +137,13 @@ class JDBCOptions(
        |the partition columns using the supplied subquery alias to resolve any ambiguity.
        |Example :
        |spark.read.format("jdbc")
-       |        .option("dbtable", "(select c1, c2 from t1) as subq")
-       |        .option("partitionColumn", "subq.c1"
-       |        .load()
+       |  .option("url", jdbcUrl)
+       |  .option("dbtable", "(select c1, c2 from t1) as subq")
+       |  .option("partitionColumn", "c1")
+       |  .option("lowerBound", "1")
+       |  .option("upperBound", "100")
+       |  .option("numPartitions", "3")
+       |  .load()
      """.stripMargin
   )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 7fa0e7fc162c..71e83767964a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -1348,9 +1348,13 @@ class JDBCSuite extends QueryTest
          |the partition columns using the supplied subquery alias to resolve any ambiguity.
          |Example :
          |spark.read.format("jdbc")
-         |        .option("dbtable", "(select c1, c2 from t1) as subq")
-         |        .option("partitionColumn", "subq.c1"
-         |        .load()
+         |  .option("url", jdbcUrl)
+         |  .option("dbtable", "(select c1, c2 from t1) as subq")
+         |  .option("partitionColumn", "c1")
+         |  .option("lowerBound", "1")
+         |  .option("upperBound", "100")
+         |  .option("numPartitions", "3")
+         |  .load()
      """.stripMargin
     val e5 = intercept[RuntimeException] {
       sql(

From f982ca07e80074bdc1e3b742c5e21cf368e4ede2 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 4 Dec 2018 08:36:33 -0600
Subject: [PATCH 0068/1072] [SPARK-26178][SQL] Use java.time API for parsing
 timestamps and dates from CSV

## What changes were proposed in this pull request?

In the PR, I propose to use **java.time API** for parsing timestamps and dates from CSV content with microseconds precision. The SQL config `spark.sql.legacy.timeParser.enabled` allow to switch back to previous behaviour with using `java.text.SimpleDateFormat`/`FastDateFormat` for parsing/generating timestamps/dates.

## How was this patch tested?

It was tested by `UnivocityParserSuite`, `CsvExpressionsSuite`, `CsvFunctionsSuite` and `CsvSuite`.

Closes #23150 from MaxGekk/time-parser.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |   2 +
 .../sql/catalyst/csv/CSVInferSchema.scala     |  15 +-
 .../spark/sql/catalyst/csv/CSVOptions.scala   |  10 +-
 .../sql/catalyst/csv/UnivocityGenerator.scala |  14 +-
 .../sql/catalyst/csv/UnivocityParser.scala    |  38 ++--
 .../sql/catalyst/util/DateTimeFormatter.scala | 179 ++++++++++++++++++
 .../sql/catalyst/util/DateTimeUtils.scala     |   2 +-
 .../apache/spark/sql/internal/SQLConf.scala   |   9 +
 .../catalyst/csv/CSVInferSchemaSuite.scala    |   7 +-
 .../catalyst/csv/UnivocityParserSuite.scala   | 113 ++++++-----
 .../sql/catalyst/util/DateTimeTestUtils.scala |   5 +-
 .../sql/util/DateTimeFormatterSuite.scala     | 103 ++++++++++
 .../apache/spark/sql/CsvFunctionsSuite.scala  |   2 +-
 .../execution/datasources/csv/CSVSuite.scala  |  66 ++++---
 14 files changed, 431 insertions(+), 134 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatter.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimeFormatterSuite.scala

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 787f4bcbbea8..fee0e6df7177 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -33,6 +33,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Spark applications which are built with Spark version 2.4 and prior, and call methods of `UserDefinedFunction`, need to be re-compiled with Spark 3.0, as they are not binary compatible with Spark 3.0.
 
+  - Since Spark 3.0, CSV datasource uses java.time API for parsing and generating CSV content. New formatting implementation supports date/timestamp patterns conformed to ISO 8601. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 94cb4b114e6b..345dc4d41993 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -22,10 +22,16 @@ import scala.util.control.Exception.allCatch
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.DateTimeFormatter
 import org.apache.spark.sql.types._
 
-class CSVInferSchema(options: CSVOptions) extends Serializable {
+class CSVInferSchema(val options: CSVOptions) extends Serializable {
+
+  @transient
+  private lazy val timeParser = DateTimeFormatter(
+    options.timestampFormat,
+    options.timeZone,
+    options.locale)
 
   private val decimalParser = {
     ExprUtils.getDecimalParser(options.locale)
@@ -154,10 +160,7 @@ class CSVInferSchema(options: CSVOptions) extends Serializable {
 
   private def tryParseTimestamp(field: String): DataType = {
     // This case infers a custom `dataFormat` is set.
-    if ((allCatch opt options.timestampFormat.parse(field)).isDefined) {
-      TimestampType
-    } else if ((allCatch opt DateTimeUtils.stringToTime(field)).isDefined) {
-      // We keep this for backwards compatibility.
+    if ((allCatch opt timeParser.parse(field)).isDefined) {
       TimestampType
     } else {
       tryParseBoolean(field)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 94bdb72d675d..90c96d1f55c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -21,7 +21,6 @@ import java.nio.charset.StandardCharsets
 import java.util.{Locale, TimeZone}
 
 import com.univocity.parsers.csv.{CsvParserSettings, CsvWriterSettings, UnescapedQuoteHandling}
-import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util._
@@ -146,13 +145,10 @@ class CSVOptions(
   // A language tag in IETF BCP 47 format
   val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
 
-  // Uses `FastDateFormat` which can be direct replacement for `SimpleDateFormat` and thread-safe.
-  val dateFormat: FastDateFormat =
-    FastDateFormat.getInstance(parameters.getOrElse("dateFormat", "yyyy-MM-dd"), locale)
+  val dateFormat: String = parameters.getOrElse("dateFormat", "yyyy-MM-dd")
 
-  val timestampFormat: FastDateFormat =
-    FastDateFormat.getInstance(
-      parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), timeZone, locale)
+  val timestampFormat: String =
+    parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSXXX")
 
   val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index 2ab376c0ac20..af09cd6c8449 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -22,7 +22,7 @@ import java.io.Writer
 import com.univocity.parsers.csv.CsvWriter
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeFormatter}
 import org.apache.spark.sql.types._
 
 class UnivocityGenerator(
@@ -41,14 +41,18 @@ class UnivocityGenerator(
   private val valueConverters: Array[ValueConverter] =
     schema.map(_.dataType).map(makeConverter).toArray
 
+  private val timeFormatter = DateTimeFormatter(
+    options.timestampFormat,
+    options.timeZone,
+    options.locale)
+  private val dateFormatter = DateFormatter(options.dateFormat, options.timeZone, options.locale)
+
   private def makeConverter(dataType: DataType): ValueConverter = dataType match {
     case DateType =>
-      (row: InternalRow, ordinal: Int) =>
-        options.dateFormat.format(DateTimeUtils.toJavaDate(row.getInt(ordinal)))
+      (row: InternalRow, ordinal: Int) => dateFormatter.format(row.getInt(ordinal))
 
     case TimestampType =>
-      (row: InternalRow, ordinal: Int) =>
-        options.timestampFormat.format(DateTimeUtils.toJavaTimestamp(row.getLong(ordinal)))
+      (row: InternalRow, ordinal: Int) => timeFormatter.format(row.getLong(ordinal))
 
     case udt: UserDefinedType[_] => makeConverter(udt.sqlType)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 8fff4b0781b1..0f375e036029 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.csv
 
 import java.io.InputStream
 
-import scala.util.Try
 import scala.util.control.NonFatal
 
 import com.univocity.parsers.csv.CsvParser
@@ -27,7 +26,7 @@ import com.univocity.parsers.csv.CsvParser
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericInternalRow}
-import org.apache.spark.sql.catalyst.util.{BadRecordException, DateTimeUtils, FailureSafeParser}
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -75,6 +74,12 @@ class UnivocityParser(
 
   private val row = new GenericInternalRow(requiredSchema.length)
 
+  private val timeFormatter = DateTimeFormatter(
+    options.timestampFormat,
+    options.timeZone,
+    options.locale)
+  private val dateFormatter = DateFormatter(options.dateFormat, options.timeZone, options.locale)
+
   // Retrieve the raw record string.
   private def getCurrentInput: UTF8String = {
     UTF8String.fromString(tokenizer.getContext.currentParsedContent().stripLineEnd)
@@ -100,7 +105,7 @@ class UnivocityParser(
   //
   //   output row - ["A", 2]
   private val valueConverters: Array[ValueConverter] = {
-    requiredSchema.map(f => makeConverter(f.name, f.dataType, f.nullable, options)).toArray
+    requiredSchema.map(f => makeConverter(f.name, f.dataType, f.nullable)).toArray
   }
 
   private val decimalParser = ExprUtils.getDecimalParser(options.locale)
@@ -115,8 +120,7 @@ class UnivocityParser(
   def makeConverter(
       name: String,
       dataType: DataType,
-      nullable: Boolean = true,
-      options: CSVOptions): ValueConverter = dataType match {
+      nullable: Boolean = true): ValueConverter = dataType match {
     case _: ByteType => (d: String) =>
       nullSafeDatum(d, name, nullable, options)(_.toByte)
 
@@ -154,34 +158,16 @@ class UnivocityParser(
       }
 
     case _: TimestampType => (d: String) =>
-      nullSafeDatum(d, name, nullable, options) { datum =>
-        // This one will lose microseconds parts.
-        // See https://issues.apache.org/jira/browse/SPARK-10681.
-        Try(options.timestampFormat.parse(datum).getTime * 1000L)
-          .getOrElse {
-          // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
-          // compatibility.
-          DateTimeUtils.stringToTime(datum).getTime * 1000L
-        }
-      }
+      nullSafeDatum(d, name, nullable, options)(timeFormatter.parse)
 
     case _: DateType => (d: String) =>
-      nullSafeDatum(d, name, nullable, options) { datum =>
-        // This one will lose microseconds parts.
-        // See https://issues.apache.org/jira/browse/SPARK-10681.x
-        Try(DateTimeUtils.millisToDays(options.dateFormat.parse(datum).getTime))
-          .getOrElse {
-          // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
-          // compatibility.
-          DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(datum).getTime)
-        }
-      }
+      nullSafeDatum(d, name, nullable, options)(dateFormatter.parse)
 
     case _: StringType => (d: String) =>
       nullSafeDatum(d, name, nullable, options)(UTF8String.fromString)
 
     case udt: UserDefinedType[_] => (datum: String) =>
-      makeConverter(name, udt.sqlType, nullable, options)
+      makeConverter(name, udt.sqlType, nullable)
 
     // We don't actually hit this exception though, we keep it for understandability
     case _ => throw new RuntimeException(s"Unsupported type: ${dataType.typeName}")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatter.scala
new file mode 100644
index 000000000000..ad1f4131de2f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatter.scala
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.time._
+import java.time.format.DateTimeFormatterBuilder
+import java.time.temporal.{ChronoField, TemporalQueries}
+import java.util.{Locale, TimeZone}
+
+import scala.util.Try
+
+import org.apache.commons.lang3.time.FastDateFormat
+
+import org.apache.spark.sql.internal.SQLConf
+
+sealed trait DateTimeFormatter {
+  def parse(s: String): Long // returns microseconds since epoch
+  def format(us: Long): String
+}
+
+class Iso8601DateTimeFormatter(
+    pattern: String,
+    timeZone: TimeZone,
+    locale: Locale) extends DateTimeFormatter {
+  val formatter = new DateTimeFormatterBuilder()
+    .appendPattern(pattern)
+    .parseDefaulting(ChronoField.YEAR_OF_ERA, 1970)
+    .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
+    .parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
+    .parseDefaulting(ChronoField.HOUR_OF_DAY, 0)
+    .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
+    .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
+    .toFormatter(locale)
+
+  def toInstant(s: String): Instant = {
+    val temporalAccessor = formatter.parse(s)
+    if (temporalAccessor.query(TemporalQueries.offset()) == null) {
+      val localDateTime = LocalDateTime.from(temporalAccessor)
+      val zonedDateTime = ZonedDateTime.of(localDateTime, timeZone.toZoneId)
+      Instant.from(zonedDateTime)
+    } else {
+      Instant.from(temporalAccessor)
+    }
+  }
+
+  private def instantToMicros(instant: Instant): Long = {
+    val sec = Math.multiplyExact(instant.getEpochSecond, DateTimeUtils.MICROS_PER_SECOND)
+    val result = Math.addExact(sec, instant.getNano / DateTimeUtils.NANOS_PER_MICROS)
+    result
+  }
+
+  def parse(s: String): Long = instantToMicros(toInstant(s))
+
+  def format(us: Long): String = {
+    val secs = Math.floorDiv(us, DateTimeUtils.MICROS_PER_SECOND)
+    val mos = Math.floorMod(us, DateTimeUtils.MICROS_PER_SECOND)
+    val instant = Instant.ofEpochSecond(secs, mos * DateTimeUtils.NANOS_PER_MICROS)
+
+    formatter.withZone(timeZone.toZoneId).format(instant)
+  }
+}
+
+class LegacyDateTimeFormatter(
+    pattern: String,
+    timeZone: TimeZone,
+    locale: Locale) extends DateTimeFormatter {
+  val format = FastDateFormat.getInstance(pattern, timeZone, locale)
+
+  protected def toMillis(s: String): Long = format.parse(s).getTime
+
+  def parse(s: String): Long = toMillis(s) * DateTimeUtils.MICROS_PER_MILLIS
+
+  def format(us: Long): String = {
+    format.format(DateTimeUtils.toJavaTimestamp(us))
+  }
+}
+
+class LegacyFallbackDateTimeFormatter(
+    pattern: String,
+    timeZone: TimeZone,
+    locale: Locale) extends LegacyDateTimeFormatter(pattern, timeZone, locale) {
+  override def toMillis(s: String): Long = {
+    Try {super.toMillis(s)}.getOrElse(DateTimeUtils.stringToTime(s).getTime)
+  }
+}
+
+object DateTimeFormatter {
+  def apply(format: String, timeZone: TimeZone, locale: Locale): DateTimeFormatter = {
+    if (SQLConf.get.legacyTimeParserEnabled) {
+      new LegacyFallbackDateTimeFormatter(format, timeZone, locale)
+    } else {
+      new Iso8601DateTimeFormatter(format, timeZone, locale)
+    }
+  }
+}
+
+sealed trait DateFormatter {
+  def parse(s: String): Int // returns days since epoch
+  def format(days: Int): String
+}
+
+class Iso8601DateFormatter(
+    pattern: String,
+    timeZone: TimeZone,
+    locale: Locale) extends DateFormatter {
+
+  val dateTimeFormatter = new Iso8601DateTimeFormatter(pattern, timeZone, locale)
+
+  override def parse(s: String): Int = {
+    val seconds = dateTimeFormatter.toInstant(s).getEpochSecond
+    val days = Math.floorDiv(seconds, DateTimeUtils.SECONDS_PER_DAY)
+
+    days.toInt
+  }
+
+  override def format(days: Int): String = {
+    val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
+    dateTimeFormatter.formatter.withZone(timeZone.toZoneId).format(instant)
+  }
+}
+
+class LegacyDateFormatter(
+    pattern: String,
+    timeZone: TimeZone,
+    locale: Locale) extends DateFormatter {
+  val format = FastDateFormat.getInstance(pattern, timeZone, locale)
+
+  def parse(s: String): Int = {
+    val milliseconds = format.parse(s).getTime
+    DateTimeUtils.millisToDays(milliseconds)
+  }
+
+  def format(days: Int): String = {
+    val date = DateTimeUtils.toJavaDate(days)
+    format.format(date)
+  }
+}
+
+class LegacyFallbackDateFormatter(
+    pattern: String,
+    timeZone: TimeZone,
+    locale: Locale) extends LegacyDateFormatter(pattern, timeZone, locale) {
+  override def parse(s: String): Int = {
+    Try(super.parse(s)).orElse {
+      // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
+      // compatibility.
+      Try(DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(s).getTime))
+    }.getOrElse {
+      // In Spark 1.5.0, we store the data as number of days since epoch in string.
+      // So, we just convert it to Int.
+      s.toInt
+    }
+  }
+}
+
+object DateFormatter {
+  def apply(format: String, timeZone: TimeZone, locale: Locale): DateFormatter = {
+    if (SQLConf.get.legacyTimeParserEnabled) {
+      new LegacyFallbackDateFormatter(format, timeZone, locale)
+    } else {
+      new Iso8601DateFormatter(format, timeZone, locale)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 5ae75dc93930..c6dfdbf2505b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -50,7 +50,7 @@ object DateTimeUtils {
   final val MILLIS_PER_SECOND = 1000L
   final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L
   final val MICROS_PER_DAY = MICROS_PER_SECOND * SECONDS_PER_DAY
-
+  final val NANOS_PER_MICROS = 1000L
   final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L
 
   // number of days in 400 years
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index c4f00d723c25..451b051f8407 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1618,6 +1618,13 @@ object SQLConf {
         "a SparkConf entry.")
       .booleanConf
       .createWithDefault(true)
+
+  val LEGACY_TIME_PARSER_ENABLED = buildConf("spark.sql.legacy.timeParser.enabled")
+    .doc("When set to true, java.text.SimpleDateFormat is used for formatting and parsing " +
+      " dates/timestamps in a locale-sensitive manner. When set to false, classes from " +
+      "java.time.* packages are used for the same purpose.")
+    .booleanConf
+    .createWithDefault(false)
 }
 
 /**
@@ -2040,6 +2047,8 @@ class SQLConf extends Serializable with Logging {
 
   def setCommandRejectsSparkConfs: Boolean = getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CONFS)
 
+  def legacyTimeParserEnabled: Boolean = getConf(SQLConf.LEGACY_TIME_PARSER_ENABLED)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index 1a020e67a75b..c2b525ad1a9f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -22,13 +22,12 @@ import java.util.Locale
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-class CSVInferSchemaSuite extends SparkFunSuite  with SQLHelper {
+class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
 
   test("String fields types are inferred correctly from null types") {
-    val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val options = new CSVOptions(Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss"), false, "GMT")
     val inferSchema = new CSVInferSchema(options)
 
     assert(inferSchema.inferField(NullType, "") == NullType)
@@ -48,7 +47,7 @@ class CSVInferSchemaSuite extends SparkFunSuite  with SQLHelper {
   }
 
   test("String fields types are inferred correctly from other types") {
-    val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val options = new CSVOptions(Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss"), false, "GMT")
     val inferSchema = new CSVInferSchema(options)
 
     assert(inferSchema.inferField(LongType, "1.0") == DoubleType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 7212402ef5cf..2d0b0d3033a9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -19,20 +19,17 @@ package org.apache.spark.sql.catalyst.csv
 
 import java.math.BigDecimal
 import java.text.{DecimalFormat, DecimalFormatSymbols}
-import java.util.Locale
+import java.util.{Locale, TimeZone}
+
+import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
-  private val parser = new UnivocityParser(
-    StructType(Seq.empty),
-    new CSVOptions(Map.empty[String, String], false, "GMT"))
-
   private def assertNull(v: Any) = assert(v == null)
 
   test("Can parse decimal type values") {
@@ -43,7 +40,8 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     stringValues.zip(decimalValues).foreach { case (strVal, decimalVal) =>
       val decimalValue = new BigDecimal(decimalVal.toString)
       val options = new CSVOptions(Map.empty[String, String], false, "GMT")
-      assert(parser.makeConverter("_1", decimalType, options = options).apply(strVal) ===
+      val parser = new UnivocityParser(StructType(Seq.empty), options)
+      assert(parser.makeConverter("_1", decimalType).apply(strVal) ===
         Decimal(decimalValue, decimalType.precision, decimalType.scale))
     }
   }
@@ -56,22 +54,23 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     types.foreach { t =>
       // Tests that a custom nullValue.
       val nullValueOptions = new CSVOptions(Map("nullValue" -> "-"), false, "GMT")
-      val converter =
-        parser.makeConverter("_1", t, nullable = true, options = nullValueOptions)
+      var parser = new UnivocityParser(StructType(Seq.empty), nullValueOptions)
+      val converter = parser.makeConverter("_1", t, nullable = true)
       assertNull(converter.apply("-"))
       assertNull(converter.apply(null))
 
       // Tests that the default nullValue is empty string.
       val options = new CSVOptions(Map.empty[String, String], false, "GMT")
-      assertNull(parser.makeConverter("_1", t, nullable = true, options = options).apply(""))
+      parser = new UnivocityParser(StructType(Seq.empty), options)
+      assertNull(parser.makeConverter("_1", t, nullable = true).apply(""))
     }
 
     // Not nullable field with nullValue option.
     types.foreach { t =>
       // Casts a null to not nullable field should throw an exception.
       val options = new CSVOptions(Map("nullValue" -> "-"), false, "GMT")
-      val converter =
-        parser.makeConverter("_1", t, nullable = false, options = options)
+      val parser = new UnivocityParser(StructType(Seq.empty), options)
+      val converter = parser.makeConverter("_1", t, nullable = false)
       var message = intercept[RuntimeException] {
         converter.apply("-")
       }.getMessage
@@ -86,62 +85,74 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     // null.
     Seq(true, false).foreach { b =>
       val options = new CSVOptions(Map("nullValue" -> "null"), false, "GMT")
-      val converter =
-        parser.makeConverter("_1", StringType, nullable = b, options = options)
+      val parser = new UnivocityParser(StructType(Seq.empty), options)
+      val converter = parser.makeConverter("_1", StringType, nullable = b)
       assert(converter.apply("") == UTF8String.fromString(""))
     }
   }
 
   test("Throws exception for empty string with non null type") {
-      val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val parser = new UnivocityParser(StructType(Seq.empty), options)
     val exception = intercept[RuntimeException]{
-      parser.makeConverter("_1", IntegerType, nullable = false, options = options).apply("")
+      parser.makeConverter("_1", IntegerType, nullable = false).apply("")
     }
     assert(exception.getMessage.contains("null value found but field _1 is not nullable."))
   }
 
   test("Types are cast correctly") {
     val options = new CSVOptions(Map.empty[String, String], false, "GMT")
-    assert(parser.makeConverter("_1", ByteType, options = options).apply("10") == 10)
-    assert(parser.makeConverter("_1", ShortType, options = options).apply("10") == 10)
-    assert(parser.makeConverter("_1", IntegerType, options = options).apply("10") == 10)
-    assert(parser.makeConverter("_1", LongType, options = options).apply("10") == 10)
-    assert(parser.makeConverter("_1", FloatType, options = options).apply("1.00") == 1.0)
-    assert(parser.makeConverter("_1", DoubleType, options = options).apply("1.00") == 1.0)
-    assert(parser.makeConverter("_1", BooleanType, options = options).apply("true") == true)
-
-    val timestampsOptions =
+    var parser = new UnivocityParser(StructType(Seq.empty), options)
+    assert(parser.makeConverter("_1", ByteType).apply("10") == 10)
+    assert(parser.makeConverter("_1", ShortType).apply("10") == 10)
+    assert(parser.makeConverter("_1", IntegerType).apply("10") == 10)
+    assert(parser.makeConverter("_1", LongType).apply("10") == 10)
+    assert(parser.makeConverter("_1", FloatType).apply("1.00") == 1.0)
+    assert(parser.makeConverter("_1", DoubleType).apply("1.00") == 1.0)
+    assert(parser.makeConverter("_1", BooleanType).apply("true") == true)
+
+    var timestampsOptions =
       new CSVOptions(Map("timestampFormat" -> "dd/MM/yyyy hh:mm"), false, "GMT")
+    parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
     val customTimestamp = "31/01/2015 00:00"
-    val expectedTime = timestampsOptions.timestampFormat.parse(customTimestamp).getTime
-    val castedTimestamp =
-      parser.makeConverter("_1", TimestampType, nullable = true, options = timestampsOptions)
+    var format = FastDateFormat.getInstance(
+      timestampsOptions.timestampFormat, timestampsOptions.timeZone, timestampsOptions.locale)
+    val expectedTime = format.parse(customTimestamp).getTime
+    val castedTimestamp = parser.makeConverter("_1", TimestampType, nullable = true)
         .apply(customTimestamp)
     assert(castedTimestamp == expectedTime * 1000L)
 
     val customDate = "31/01/2015"
     val dateOptions = new CSVOptions(Map("dateFormat" -> "dd/MM/yyyy"), false, "GMT")
-    val expectedDate = dateOptions.dateFormat.parse(customDate).getTime
-    val castedDate =
-      parser.makeConverter("_1", DateType, nullable = true, options = dateOptions)
-        .apply(customTimestamp)
-    assert(castedDate == DateTimeUtils.millisToDays(expectedDate))
+    parser = new UnivocityParser(StructType(Seq.empty), dateOptions)
+    format = FastDateFormat.getInstance(
+      dateOptions.dateFormat, dateOptions.timeZone, dateOptions.locale)
+    val expectedDate = format.parse(customDate).getTime
+    val castedDate = parser.makeConverter("_1", DateType, nullable = true)
+        .apply(customDate)
+    assert(castedDate == DateTimeUtils.millisToDays(expectedDate, TimeZone.getTimeZone("GMT")))
 
     val timestamp = "2015-01-01 00:00:00"
-    assert(parser.makeConverter("_1", TimestampType, options = options).apply(timestamp) ==
-      DateTimeUtils.stringToTime(timestamp).getTime  * 1000L)
-    assert(parser.makeConverter("_1", DateType, options = options).apply("2015-01-01") ==
-      DateTimeUtils.millisToDays(DateTimeUtils.stringToTime("2015-01-01").getTime))
+    timestampsOptions = new CSVOptions(Map(
+      "timestampFormat" -> "yyyy-MM-dd HH:mm:ss",
+      "dateFormat" -> "yyyy-MM-dd"), false, "UTC")
+    parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
+    val expected = 1420070400 * DateTimeUtils.MICROS_PER_SECOND
+    assert(parser.makeConverter("_1", TimestampType).apply(timestamp) ==
+      expected)
+    assert(parser.makeConverter("_1", DateType).apply("2015-01-01") ==
+      expected / DateTimeUtils.MICROS_PER_DAY)
   }
 
   test("Throws exception for casting an invalid string to Float and Double Types") {
     val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val parser = new UnivocityParser(StructType(Seq.empty), options)
     val types = Seq(DoubleType, FloatType)
     val input = Seq("10u000", "abc", "1 2/3")
     types.foreach { dt =>
       input.foreach { v =>
         val message = intercept[NumberFormatException] {
-          parser.makeConverter("_1", dt, options = options).apply(v)
+          parser.makeConverter("_1", dt).apply(v)
         }.getMessage
         assert(message.contains(v))
       }
@@ -150,9 +161,9 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
 
   test("Float NaN values are parsed correctly") {
     val options = new CSVOptions(Map("nanValue" -> "nn"), false, "GMT")
+    val parser = new UnivocityParser(StructType(Seq.empty), options)
     val floatVal: Float = parser.makeConverter(
-      "_1", FloatType, nullable = true, options = options
-    ).apply("nn").asInstanceOf[Float]
+      "_1", FloatType, nullable = true).apply("nn").asInstanceOf[Float]
 
     // Java implements the IEEE-754 floating point standard which guarantees that any comparison
     // against NaN will return false (except != which returns true)
@@ -161,41 +172,41 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
 
   test("Double NaN values are parsed correctly") {
     val options = new CSVOptions(Map("nanValue" -> "-"), false, "GMT")
+    val parser = new UnivocityParser(StructType(Seq.empty), options)
     val doubleVal: Double = parser.makeConverter(
-      "_1", DoubleType, nullable = true, options = options
-    ).apply("-").asInstanceOf[Double]
+      "_1", DoubleType, nullable = true).apply("-").asInstanceOf[Double]
 
     assert(doubleVal.isNaN)
   }
 
   test("Float infinite values can be parsed") {
     val negativeInfOptions = new CSVOptions(Map("negativeInf" -> "max"), false, "GMT")
+    var parser = new UnivocityParser(StructType(Seq.empty), negativeInfOptions)
     val floatVal1 = parser.makeConverter(
-      "_1", FloatType, nullable = true, options = negativeInfOptions
-    ).apply("max").asInstanceOf[Float]
+      "_1", FloatType, nullable = true).apply("max").asInstanceOf[Float]
 
     assert(floatVal1 == Float.NegativeInfinity)
 
     val positiveInfOptions = new CSVOptions(Map("positiveInf" -> "max"), false, "GMT")
+    parser = new UnivocityParser(StructType(Seq.empty), positiveInfOptions)
     val floatVal2 = parser.makeConverter(
-      "_1", FloatType, nullable = true, options = positiveInfOptions
-    ).apply("max").asInstanceOf[Float]
+      "_1", FloatType, nullable = true).apply("max").asInstanceOf[Float]
 
     assert(floatVal2 == Float.PositiveInfinity)
   }
 
   test("Double infinite values can be parsed") {
     val negativeInfOptions = new CSVOptions(Map("negativeInf" -> "max"), false, "GMT")
+    var parser = new UnivocityParser(StructType(Seq.empty), negativeInfOptions)
     val doubleVal1 = parser.makeConverter(
-      "_1", DoubleType, nullable = true, options = negativeInfOptions
-    ).apply("max").asInstanceOf[Double]
+      "_1", DoubleType, nullable = true).apply("max").asInstanceOf[Double]
 
     assert(doubleVal1 == Double.NegativeInfinity)
 
     val positiveInfOptions = new CSVOptions(Map("positiveInf" -> "max"), false, "GMT")
+    parser = new UnivocityParser(StructType(Seq.empty), positiveInfOptions)
     val doubleVal2 = parser.makeConverter(
-      "_1", DoubleType, nullable = true, options = positiveInfOptions
-    ).apply("max").asInstanceOf[Double]
+      "_1", DoubleType, nullable = true).apply("max").asInstanceOf[Double]
 
     assert(doubleVal2 == Double.PositiveInfinity)
   }
@@ -211,7 +222,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
       val options = new CSVOptions(Map("locale" -> langTag), false, "GMT")
       val parser = new UnivocityParser(new StructType().add("d", decimalType), options)
 
-      assert(parser.makeConverter("_1", decimalType, options = options).apply(input) === expected)
+      assert(parser.makeConverter("_1", decimalType).apply(input) === expected)
     }
 
     Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalParsing)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
index dfa0fe93a2f9..66d8d28988f8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
@@ -26,7 +26,7 @@ object DateTimeTestUtils {
 
   val ALL_TIMEZONES: Seq[TimeZone] = TimeZone.getAvailableIDs.toSeq.map(TimeZone.getTimeZone)
 
-  val outstandingTimezones: Seq[TimeZone] = Seq(
+  val outstandingTimezonesIds: Seq[String] = Seq(
     "UTC",
     "PST",
     "CET",
@@ -34,7 +34,8 @@ object DateTimeTestUtils {
     "America/Los_Angeles",
     "Antarctica/Vostok",
     "Asia/Hong_Kong",
-    "Europe/Amsterdam").map(TimeZone.getTimeZone)
+    "Europe/Amsterdam")
+  val outstandingTimezones: Seq[TimeZone] = outstandingTimezonesIds.map(TimeZone.getTimeZone)
 
   def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = {
     val originalDefaultTimeZone = TimeZone.getDefault
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimeFormatterSuite.scala
new file mode 100644
index 000000000000..02d4ee049060
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimeFormatterSuite.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util
+
+import java.util.{Locale, TimeZone}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeFormatter, DateTimeTestUtils}
+
+class DateTimeFormatterSuite  extends SparkFunSuite {
+  test("parsing dates using time zones") {
+    val localDate = "2018-12-02"
+    val expectedDays = Map(
+      "UTC" -> 17867,
+      "PST" -> 17867,
+      "CET" -> 17866,
+      "Africa/Dakar" -> 17867,
+      "America/Los_Angeles" -> 17867,
+      "Antarctica/Vostok" -> 17866,
+      "Asia/Hong_Kong" -> 17866,
+      "Europe/Amsterdam" -> 17866)
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      val formatter = DateFormatter("yyyy-MM-dd", TimeZone.getTimeZone(timeZone), Locale.US)
+      val daysSinceEpoch = formatter.parse(localDate)
+      assert(daysSinceEpoch === expectedDays(timeZone))
+    }
+  }
+
+  test("parsing timestamps using time zones") {
+    val localDate = "2018-12-02T10:11:12.001234"
+    val expectedMicros = Map(
+      "UTC" -> 1543745472001234L,
+      "PST" -> 1543774272001234L,
+      "CET" -> 1543741872001234L,
+      "Africa/Dakar" -> 1543745472001234L,
+      "America/Los_Angeles" -> 1543774272001234L,
+      "Antarctica/Vostok" -> 1543723872001234L,
+      "Asia/Hong_Kong" -> 1543716672001234L,
+      "Europe/Amsterdam" -> 1543741872001234L)
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      val formatter = DateTimeFormatter(
+        "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
+        TimeZone.getTimeZone(timeZone),
+        Locale.US)
+      val microsSinceEpoch = formatter.parse(localDate)
+      assert(microsSinceEpoch === expectedMicros(timeZone))
+    }
+  }
+
+  test("format dates using time zones") {
+    val daysSinceEpoch = 17867
+    val expectedDate = Map(
+      "UTC" -> "2018-12-02",
+      "PST" -> "2018-12-01",
+      "CET" -> "2018-12-02",
+      "Africa/Dakar" -> "2018-12-02",
+      "America/Los_Angeles" -> "2018-12-01",
+      "Antarctica/Vostok" -> "2018-12-02",
+      "Asia/Hong_Kong" -> "2018-12-02",
+      "Europe/Amsterdam" -> "2018-12-02")
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      val formatter = DateFormatter("yyyy-MM-dd", TimeZone.getTimeZone(timeZone), Locale.US)
+      val date = formatter.format(daysSinceEpoch)
+      assert(date === expectedDate(timeZone))
+    }
+  }
+
+  test("format timestamps using time zones") {
+    val microsSinceEpoch = 1543745472001234L
+    val expectedTimestamp = Map(
+      "UTC" -> "2018-12-02T10:11:12.001234",
+      "PST" -> "2018-12-02T02:11:12.001234",
+      "CET" -> "2018-12-02T11:11:12.001234",
+      "Africa/Dakar" -> "2018-12-02T10:11:12.001234",
+      "America/Los_Angeles" -> "2018-12-02T02:11:12.001234",
+      "Antarctica/Vostok" -> "2018-12-02T16:11:12.001234",
+      "Asia/Hong_Kong" -> "2018-12-02T18:11:12.001234",
+      "Europe/Amsterdam" -> "2018-12-02T11:11:12.001234")
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      val formatter = DateTimeFormatter(
+        "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
+        TimeZone.getTimeZone(timeZone),
+        Locale.US)
+      val timestamp = formatter.format(microsSinceEpoch)
+      assert(timestamp === expectedTimestamp(timeZone))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 537d13b1bc8d..6b67fccf86b9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -53,7 +53,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSQLContext {
   test("checking the columnNameOfCorruptRecord option") {
     val columnNameOfCorruptRecord = "_unparsed"
     val df = Seq("0,2013-111-11 12:13:14", "1,1983-08-04").toDS()
-    val schema = new StructType().add("a", IntegerType).add("b", TimestampType)
+    val schema = new StructType().add("a", IntegerType).add("b", DateType)
     val schemaWithCorrField1 = schema.add(columnNameOfCorruptRecord, StringType)
     val df2 = df
       .select(from_csv($"value", schemaWithCorrField1, Map(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index c9273193b642..3b977d74053e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -586,6 +586,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     val results = spark.read
       .format("csv")
       .options(Map("comment" -> "~", "header" -> "false", "inferSchema" -> "true"))
+      .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
       .load(testFile(commentsFile))
       .collect()
 
@@ -622,10 +623,11 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     val options = Map(
       "header" -> "true",
       "inferSchema" -> "false",
-      "dateFormat" -> "dd/MM/yyyy hh:mm")
+      "dateFormat" -> "dd/MM/yyyy HH:mm")
     val results = spark.read
       .format("csv")
       .options(options)
+      .option("timeZone", "UTC")
       .schema(customSchema)
       .load(testFile(datesFile))
       .select("date")
@@ -893,36 +895,38 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
   }
 
   test("Write dates correctly in ISO8601 format by default") {
-    withTempDir { dir =>
-      val customSchema = new StructType(Array(StructField("date", DateType, true)))
-      val iso8601datesPath = s"${dir.getCanonicalPath}/iso8601dates.csv"
-      val dates = spark.read
-        .format("csv")
-        .schema(customSchema)
-        .option("header", "true")
-        .option("inferSchema", "false")
-        .option("dateFormat", "dd/MM/yyyy HH:mm")
-        .load(testFile(datesFile))
-      dates.write
-        .format("csv")
-        .option("header", "true")
-        .save(iso8601datesPath)
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      withTempDir { dir =>
+        val customSchema = new StructType(Array(StructField("date", DateType, true)))
+        val iso8601datesPath = s"${dir.getCanonicalPath}/iso8601dates.csv"
+        val dates = spark.read
+          .format("csv")
+          .schema(customSchema)
+          .option("header", "true")
+          .option("inferSchema", "false")
+          .option("dateFormat", "dd/MM/yyyy HH:mm")
+          .load(testFile(datesFile))
+        dates.write
+          .format("csv")
+          .option("header", "true")
+          .save(iso8601datesPath)
 
-      // This will load back the dates as string.
-      val stringSchema = StructType(StructField("date", StringType, true) :: Nil)
-      val iso8601dates = spark.read
-        .format("csv")
-        .schema(stringSchema)
-        .option("header", "true")
-        .load(iso8601datesPath)
+        // This will load back the dates as string.
+        val stringSchema = StructType(StructField("date", StringType, true) :: Nil)
+        val iso8601dates = spark.read
+          .format("csv")
+          .schema(stringSchema)
+          .option("header", "true")
+          .load(iso8601datesPath)
+
+        val iso8501 = FastDateFormat.getInstance("yyyy-MM-dd", Locale.US)
+        val expectedDates = dates.collect().map { r =>
+          // This should be ISO8601 formatted string.
+          Row(iso8501.format(r.toSeq.head))
+        }
 
-      val iso8501 = FastDateFormat.getInstance("yyyy-MM-dd", Locale.US)
-      val expectedDates = dates.collect().map { r =>
-        // This should be ISO8601 formatted string.
-        Row(iso8501.format(r.toSeq.head))
+        checkAnswer(iso8601dates, expectedDates)
       }
-
-      checkAnswer(iso8601dates, expectedDates)
     }
   }
 
@@ -1107,7 +1111,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
 
   test("SPARK-18699 put malformed records in a `columnNameOfCorruptRecord` field") {
     Seq(false, true).foreach { multiLine =>
-      val schema = new StructType().add("a", IntegerType).add("b", TimestampType)
+      val schema = new StructType().add("a", IntegerType).add("b", DateType)
       // We use `PERMISSIVE` mode by default if invalid string is given.
       val df1 = spark
         .read
@@ -1139,7 +1143,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
       val schemaWithCorrField2 = new StructType()
         .add("a", IntegerType)
         .add(columnNameOfCorruptRecord, StringType)
-        .add("b", TimestampType)
+        .add("b", DateType)
       val df3 = spark
         .read
         .option("mode", "permissive")
@@ -1325,7 +1329,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     val columnNameOfCorruptRecord = "_corrupt_record"
     val schema = new StructType()
       .add("a", IntegerType)
-      .add("b", TimestampType)
+      .add("b", DateType)
       .add(columnNameOfCorruptRecord, StringType)
     // negative cases
     val msg = intercept[AnalysisException] {

From 556d83e0d87a8f899f29544eb5ca4999a84c96c1 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Tue, 4 Dec 2018 10:33:27 -0800
Subject: [PATCH 0069/1072] [SPARK-26233][SQL] CheckOverflow when encoding a
 decimal value

## What changes were proposed in this pull request?

When we encode a Decimal from external source we don't check for overflow. That method is useful not only in order to enforce that we can represent the correct value in the specified range, but it also changes the underlying data to the right precision/scale. Since in our code generation we assume that a decimal has exactly the same precision and scale of its data type, missing to enforce it can lead to corrupted output/results when there are subsequent transformations.

## How was this patch tested?

added UT

Closes #23210 from mgaido91/SPARK-26233.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/catalyst/encoders/RowEncoder.scala  | 4 ++--
 .../test/scala/org/apache/spark/sql/DatasetSuite.scala   | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index d905f8f9858e..8ca3d356f3bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -106,12 +106,12 @@ object RowEncoder {
         returnNullable = false)
 
     case d: DecimalType =>
-      StaticInvoke(
+      CheckOverflow(StaticInvoke(
         Decimal.getClass,
         d,
         "fromDecimal",
         inputObject :: Nil,
-        returnNullable = false)
+        returnNullable = false), d)
 
     case StringType =>
       StaticInvoke(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 0f900833d2cf..525c7cef3956 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1647,6 +1647,15 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkDataset(ds, data: _*)
     checkAnswer(ds.select("x"), Seq(Row(1), Row(2)))
   }
+
+  test("SPARK-26233: serializer should enforce decimal precision and scale") {
+    val s = StructType(Seq(StructField("a", StringType), StructField("b", DecimalType(38, 8))))
+    val encoder = RowEncoder(s)
+    implicit val uEnc = encoder
+    val df = spark.range(2).map(l => Row(l.toString, BigDecimal.valueOf(l + 0.1111)))
+    checkAnswer(df.groupBy(col("a")).agg(first(col("b"))),
+      Seq(Row("0", BigDecimal.valueOf(0.1111)), Row("1", BigDecimal.valueOf(1.1111))))
+  }
 }
 
 case class TestDataUnion(x: Int, y: Int, z: Int)

From 35f9163adf5c067229afbe57ed60d5dd5f2422c8 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Tue, 4 Dec 2018 11:00:58 -0800
Subject: [PATCH 0070/1072] [SPARK-26119][CORE][WEBUI] Task summary table
 should contain only successful tasks' metrics

## What changes were proposed in this pull request?

Task summary table in the stage page currently displays the summary of all the tasks. However, we should display the task summary of only successful tasks, to follow the behavior of previous versions of spark.

## How was this patch tested?
Added UT. attached screenshot
Before patch:
![screenshot from 2018-11-20 00-36-18](https://user-images.githubusercontent.com/23054875/48729339-62e3a580-ec5d-11e8-81f0-0d191a234ffe.png)

![screenshot from 2018-11-20 01-18-37](https://user-images.githubusercontent.com/23054875/48731112-41d18380-ec62-11e8-8c31-1ffbfa04e746.png)

Closes #23088 from shahidki31/summaryMetrics.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/status/AppStatusStore.scala  | 73 +++++++++++++------
 .../spark/status/AppStatusStoreSuite.scala    | 33 ++++++++-
 2 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index 5c0ed4d5d8f4..b35781cb36e8 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -148,11 +148,20 @@ private[spark] class AppStatusStore(
     // cheaper for disk stores (avoids deserialization).
     val count = {
       Utils.tryWithResource(
-        store.view(classOf[TaskDataWrapper])
-          .parent(stageKey)
-          .index(TaskIndexNames.EXEC_RUN_TIME)
-          .first(0L)
-          .closeableIterator()
+        if (store.isInstanceOf[InMemoryStore]) {
+          store.view(classOf[TaskDataWrapper])
+            .parent(stageKey)
+            .index(TaskIndexNames.STATUS)
+            .first("SUCCESS")
+            .last("SUCCESS")
+            .closeableIterator()
+        } else {
+          store.view(classOf[TaskDataWrapper])
+            .parent(stageKey)
+            .index(TaskIndexNames.EXEC_RUN_TIME)
+            .first(0L)
+            .closeableIterator()
+        }
       ) { it =>
         var _count = 0L
         while (it.hasNext()) {
@@ -221,30 +230,50 @@ private[spark] class AppStatusStore(
     // stabilize once the stage finishes. It's also slow, especially with disk stores.
     val indices = quantiles.map { q => math.min((q * count).toLong, count - 1) }
 
+    // TODO: Summary metrics needs to display all the successful tasks' metrics (SPARK-26119).
+    // For InMemory case, it is efficient to find using the following code. But for diskStore case
+    // we need an efficient solution to avoid deserialization time overhead. For that, we need to
+    // rework on the way indexing works, so that we can index by specific metrics for successful
+    // and failed tasks differently (would be tricky). Also would require changing the disk store
+    // version (to invalidate old stores).
     def scanTasks(index: String)(fn: TaskDataWrapper => Long): IndexedSeq[Double] = {
-      Utils.tryWithResource(
-        store.view(classOf[TaskDataWrapper])
+      if (store.isInstanceOf[InMemoryStore]) {
+        val quantileTasks = store.view(classOf[TaskDataWrapper])
           .parent(stageKey)
           .index(index)
           .first(0L)
-          .closeableIterator()
-      ) { it =>
-        var last = Double.NaN
-        var currentIdx = -1L
-        indices.map { idx =>
-          if (idx == currentIdx) {
-            last
-          } else {
-            val diff = idx - currentIdx
-            currentIdx = idx
-            if (it.skip(diff - 1)) {
-              last = fn(it.next()).toDouble
+          .asScala
+          .filter { _.status == "SUCCESS"} // Filter "SUCCESS" tasks
+          .toIndexedSeq
+
+        indices.map { index =>
+          fn(quantileTasks(index.toInt)).toDouble
+        }.toIndexedSeq
+      } else {
+        Utils.tryWithResource(
+          store.view(classOf[TaskDataWrapper])
+            .parent(stageKey)
+            .index(index)
+            .first(0L)
+            .closeableIterator()
+        ) { it =>
+          var last = Double.NaN
+          var currentIdx = -1L
+          indices.map { idx =>
+            if (idx == currentIdx) {
               last
             } else {
-              Double.NaN
+              val diff = idx - currentIdx
+              currentIdx = idx
+              if (it.skip(diff - 1)) {
+                last = fn(it.next()).toDouble
+                last
+              } else {
+                Double.NaN
+              }
             }
-          }
-        }.toIndexedSeq
+          }.toIndexedSeq
+        }
       }
     }
 
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
index 92f90f3d96dd..75a658161d3f 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
@@ -77,6 +77,34 @@ class AppStatusStoreSuite extends SparkFunSuite {
     assert(store.count(classOf[CachedQuantile]) === 2)
   }
 
+  test("only successfull task have taskSummary") {
+    val store = new InMemoryStore()
+    (0 until 5).foreach { i => store.write(newTaskData(i, status = "FAILED")) }
+    val appStore = new AppStatusStore(store).taskSummary(stageId, attemptId, uiQuantiles)
+    assert(appStore.size === 0)
+  }
+
+  test("summary should contain task metrics of only successfull tasks") {
+    val store = new InMemoryStore()
+
+    for (i <- 0 to 5) {
+      if (i % 2 == 1) {
+        store.write(newTaskData(i, status = "FAILED"))
+      } else {
+        store.write(newTaskData(i))
+      }
+    }
+
+    val summary = new AppStatusStore(store).taskSummary(stageId, attemptId, uiQuantiles).get
+
+    val values = Array(0.0, 2.0, 4.0)
+
+    val dist = new Distribution(values, 0, values.length).getQuantiles(uiQuantiles.sorted)
+    dist.zip(summary.executorRunTime).foreach { case (expected, actual) =>
+      assert(expected === actual)
+    }
+  }
+
   private def compareQuantiles(count: Int, quantiles: Array[Double]): Unit = {
     val store = new InMemoryStore()
     val values = (0 until count).map { i =>
@@ -93,12 +121,11 @@ class AppStatusStoreSuite extends SparkFunSuite {
     }
   }
 
-  private def newTaskData(i: Int): TaskDataWrapper = {
+  private def newTaskData(i: Int, status: String = "SUCCESS"): TaskDataWrapper = {
     new TaskDataWrapper(
-      i, i, i, i, i, i, i.toString, i.toString, i.toString, i.toString, false, Nil, None,
+      i, i, i, i, i, i, i.toString, i.toString, status, i.toString, false, Nil, None,
       i, i, i, i, i, i, i, i, i, i,
       i, i, i, i, i, i, i, i, i, i,
       i, i, i, i, stageId, attemptId)
   }
-
 }

From 180f969c97a66b4c265e5fad8272665a00572f1a Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 4 Dec 2018 14:35:04 -0800
Subject: [PATCH 0071/1072] [SPARK-26094][CORE][STREAMING] createNonEcFile
 creates parent dirs.

## What changes were proposed in this pull request?

We explicitly avoid files with hdfs erasure coding for the streaming WAL
and for event logs, as hdfs EC does not support all relevant apis.
However, the new builder api used has different semantics -- it does not
create parent dirs, and it does not resolve relative paths.  This
updates createNonEcFile to have similar semantics to the old api.

## How was this patch tested?

Ran tests with the WAL pointed at a non-existent dir, which failed before this change.  Manually tested the new function with a relative path as well.
Unit tests via jenkins.

Closes #23092 from squito/SPARK-26094.

Authored-by: Imran Rashid <irashid@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/deploy/SparkHadoopUtil.scala   | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 7bb2a419107d..937199273dab 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -466,7 +466,13 @@ private[spark] object SparkHadoopUtil {
     try {
       // Use reflection as this uses apis only avialable in hadoop 3
       val builderMethod = fs.getClass().getMethod("createFile", classOf[Path])
-      val builder = builderMethod.invoke(fs, path)
+      // the builder api does not resolve relative paths, nor does it create parent dirs, while
+      // the old api does.
+      if (!fs.mkdirs(path.getParent())) {
+        throw new IOException(s"Failed to create parents of $path")
+      }
+      val qualifiedPath = fs.makeQualified(path)
+      val builder = builderMethod.invoke(fs, qualifiedPath)
       val builderCls = builder.getClass()
       // this may throw a NoSuchMethodException if the path is not on hdfs
       val replicateMethod = builderCls.getMethod("replicate")

From 7143e9d7220bd98ceb82c5c5f045108a8a664ec1 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Wed, 5 Dec 2018 09:12:24 +0800
Subject: [PATCH 0072/1072] [SPARK-25829][SQL][FOLLOWUP] Refactor MapConcat in
 order to check properly the limit size

## What changes were proposed in this pull request?

The PR starts from the [comment](https://github.com/apache/spark/pull/23124#discussion_r236112390) in the main one and it aims at:
 - simplifying the code for `MapConcat`;
 - be more precise in checking the limit size.

## How was this patch tested?

existing tests

Closes #23217 from mgaido91/SPARK-25829_followup.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/collectionOperations.scala    | 77 +------------------
 .../catalyst/util/ArrayBasedMapBuilder.scala  | 10 +++
 2 files changed, 12 insertions(+), 75 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index fa8e38acd522..67f6739b1e18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -554,13 +554,6 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
       return null
     }
 
-    val numElements = maps.foldLeft(0L)((sum, ad) => sum + ad.numElements())
-    if (numElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
-      throw new RuntimeException(s"Unsuccessful attempt to concat maps with $numElements " +
-        s"elements due to exceeding the map size limit " +
-        s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
-    }
-
     for (map <- maps) {
       mapBuilder.putAll(map.keyArray(), map.valueArray())
     }
@@ -569,8 +562,6 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val mapCodes = children.map(_.genCode(ctx))
-    val keyType = dataType.keyType
-    val valueType = dataType.valueType
     val argsName = ctx.freshName("args")
     val hasNullName = ctx.freshName("hasNull")
     val builderTerm = ctx.addReferenceObj("mapBuilder", mapBuilder)
@@ -610,41 +601,12 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
     )
 
     val idxName = ctx.freshName("idx")
-    val numElementsName = ctx.freshName("numElems")
-    val finKeysName = ctx.freshName("finalKeys")
-    val finValsName = ctx.freshName("finalValues")
-
-    val keyConcat = genCodeForArrays(ctx, keyType, false)
-
-    val valueConcat =
-      if (valueType.sameType(keyType) &&
-          !(CodeGenerator.isPrimitiveType(valueType) && dataType.valueContainsNull)) {
-        keyConcat
-      } else {
-        genCodeForArrays(ctx, valueType, dataType.valueContainsNull)
-      }
-
-    val keyArgsName = ctx.freshName("keyArgs")
-    val valArgsName = ctx.freshName("valArgs")
-
     val mapMerge =
       s"""
-        |ArrayData[] $keyArgsName = new ArrayData[${mapCodes.size}];
-        |ArrayData[] $valArgsName = new ArrayData[${mapCodes.size}];
-        |long $numElementsName = 0;
         |for (int $idxName = 0; $idxName < $argsName.length; $idxName++) {
-        |  $keyArgsName[$idxName] = $argsName[$idxName].keyArray();
-        |  $valArgsName[$idxName] = $argsName[$idxName].valueArray();
-        |  $numElementsName += $argsName[$idxName].numElements();
+        |  $builderTerm.putAll($argsName[$idxName].keyArray(), $argsName[$idxName].valueArray());
         |}
-        |if ($numElementsName > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) {
-        |  throw new RuntimeException("Unsuccessful attempt to concat maps with " +
-        |     $numElementsName + " elements due to exceeding the map size limit " +
-        |     "${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.");
-        |}
-        |ArrayData $finKeysName = $keyConcat($keyArgsName, (int) $numElementsName);
-        |ArrayData $finValsName = $valueConcat($valArgsName, (int) $numElementsName);
-        |${ev.value} = $builderTerm.from($finKeysName, $finValsName);
+        |${ev.value} = $builderTerm.build();
       """.stripMargin
 
     ev.copy(
@@ -660,41 +622,6 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
       """.stripMargin)
   }
 
-  private def genCodeForArrays(
-      ctx: CodegenContext,
-      elementType: DataType,
-      checkForNull: Boolean): String = {
-    val counter = ctx.freshName("counter")
-    val arrayData = ctx.freshName("arrayData")
-    val argsName = ctx.freshName("args")
-    val numElemName = ctx.freshName("numElements")
-    val y = ctx.freshName("y")
-    val z = ctx.freshName("z")
-
-    val allocation = CodeGenerator.createArrayData(
-      arrayData, elementType, numElemName, s" $prettyName failed.")
-    val assignment = CodeGenerator.createArrayAssignment(
-      arrayData, elementType, s"$argsName[$y]", counter, z, checkForNull)
-
-    val concat = ctx.freshName("concat")
-    val concatDef =
-      s"""
-         |private ArrayData $concat(ArrayData[] $argsName, int $numElemName) {
-         |  $allocation
-         |  int $counter = 0;
-         |  for (int $y = 0; $y < ${children.length}; $y++) {
-         |    for (int $z = 0; $z < $argsName[$y].numElements(); $z++) {
-         |      $assignment
-         |      $counter++;
-         |    }
-         |  }
-         |  return $arrayData;
-         |}
-       """.stripMargin
-
-    ctx.addNewFunction(concat, concatDef)
-  }
-
   override def prettyName: String = "map_concat"
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
index e7cd61655dc9..98934368205e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.array.ByteArrayMethods
 
 /**
  * A builder of [[ArrayBasedMapData]], which fails if a null map key is detected, and removes
@@ -54,6 +55,10 @@ class ArrayBasedMapBuilder(keyType: DataType, valueType: DataType) extends Seria
 
     val index = keyToIndex.getOrDefault(key, -1)
     if (index == -1) {
+      if (size >= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+        throw new RuntimeException(s"Unsuccessful attempt to build maps with $size elements " +
+          s"due to exceeding the map size limit ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
+      }
       keyToIndex.put(key, values.length)
       keys.append(key)
       values.append(value)
@@ -117,4 +122,9 @@ class ArrayBasedMapBuilder(keyType: DataType, valueType: DataType) extends Seria
       build()
     }
   }
+
+  /**
+   * Returns the current size of the map which is going to be produced by the current builder.
+   */
+  def size: Int = keys.size
 }

From 7e3eb3cd209d83394ca2b2cec79b26b1bbe9d7ea Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 5 Dec 2018 15:22:08 +0800
Subject: [PATCH 0073/1072] [SPARK-26252][PYTHON] Add support to run specific
 unittests and/or doctests in python/run-tests script

## What changes were proposed in this pull request?

This PR proposes add a developer option, `--testnames`, to our testing script to allow run specific set of unittests and doctests.

**1. Run unittests in the class**

```bash
./run-tests --testnames 'pyspark.sql.tests.test_arrow ArrowTests'
```
```
Running PySpark tests. Output is in /.../spark/python/unit-tests.log
Will test against the following Python executables: ['python2.7', 'pypy']
Will test the following Python tests: ['pyspark.sql.tests.test_arrow ArrowTests']
Starting test(python2.7): pyspark.sql.tests.test_arrow ArrowTests
Starting test(pypy): pyspark.sql.tests.test_arrow ArrowTests
Finished test(python2.7): pyspark.sql.tests.test_arrow ArrowTests (14s)
Finished test(pypy): pyspark.sql.tests.test_arrow ArrowTests (14s) ... 22 tests were skipped
Tests passed in 14 seconds

Skipped tests in pyspark.sql.tests.test_arrow ArrowTests with pypy:
    test_createDataFrame_column_name_encoding (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
    test_createDataFrame_does_not_modify_input (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
    test_createDataFrame_fallback_disabled (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
    test_createDataFrame_fallback_enabled (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped
...
```

**2. Run single unittest in the class.**

```bash
./run-tests --testnames 'pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion'
```
```
Running PySpark tests. Output is in /.../spark/python/unit-tests.log
Will test against the following Python executables: ['python2.7', 'pypy']
Will test the following Python tests: ['pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion']
Starting test(pypy): pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion
Starting test(python2.7): pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion
Finished test(pypy): pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion (0s) ... 1 tests were skipped
Finished test(python2.7): pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion (8s)
Tests passed in 8 seconds

Skipped tests in pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion with pypy:
    test_null_conversion (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
```

**3. Run doctests in single PySpark module.**

```bash
./run-tests --testnames pyspark.sql.dataframe
```

```
Running PySpark tests. Output is in /.../spark/python/unit-tests.log
Will test against the following Python executables: ['python2.7', 'pypy']
Will test the following Python tests: ['pyspark.sql.dataframe']
Starting test(pypy): pyspark.sql.dataframe
Starting test(python2.7): pyspark.sql.dataframe
Finished test(python2.7): pyspark.sql.dataframe (47s)
Finished test(pypy): pyspark.sql.dataframe (48s)
Tests passed in 48 seconds
```

Of course, you can mix them:

```bash
./run-tests --testnames 'pyspark.sql.tests.test_arrow ArrowTests,pyspark.sql.dataframe'
```

```
Running PySpark tests. Output is in /.../spark/python/unit-tests.log
Will test against the following Python executables: ['python2.7', 'pypy']
Will test the following Python tests: ['pyspark.sql.tests.test_arrow ArrowTests', 'pyspark.sql.dataframe']
Starting test(pypy): pyspark.sql.dataframe
Starting test(pypy): pyspark.sql.tests.test_arrow ArrowTests
Starting test(python2.7): pyspark.sql.dataframe
Starting test(python2.7): pyspark.sql.tests.test_arrow ArrowTests
Finished test(pypy): pyspark.sql.tests.test_arrow ArrowTests (0s) ... 22 tests were skipped
Finished test(python2.7): pyspark.sql.tests.test_arrow ArrowTests (18s)
Finished test(python2.7): pyspark.sql.dataframe (50s)
Finished test(pypy): pyspark.sql.dataframe (52s)
Tests passed in 52 seconds

Skipped tests in pyspark.sql.tests.test_arrow ArrowTests with pypy:
    test_createDataFrame_column_name_encoding (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
    test_createDataFrame_does_not_modify_input (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
    test_createDataFrame_fallback_disabled (pyspark.sql.tests.test_arrow.ArrowTests) ... skipped 'Pandas >= 0.19.2 must be installed; however, it was not found.'
```

and also you can use all other options (except `--modules`, which will be ignored)

```bash
./run-tests --testnames 'pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion' --python-executables=python
```

```
Running PySpark tests. Output is in /.../spark/python/unit-tests.log
Will test against the following Python executables: ['python']
Will test the following Python tests: ['pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion']
Starting test(python): pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion
Finished test(python): pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion (12s)
Tests passed in 12 seconds
```

See help below:

```bash
 ./run-tests --help
```

```
Usage: run-tests [options]

Options:
...
  Developer Options:
    --testnames=TESTNAMES
                        A comma-separated list of specific modules, classes
                        and functions of doctest or unittest to test. For
                        example, 'pyspark.sql.foo' to run the module as
                        unittests or doctests, 'pyspark.sql.tests FooTests' to
                        run the specific class of unittests,
                        'pyspark.sql.tests FooTests.test_foo' to run the
                        specific unittest in the class. '--modules' option is
                        ignored if they are given.
```

I intentionally grouped it as a developer option to be more conservative.

## How was this patch tested?

Manually tested. Negative tests were also done.

```bash
./run-tests --testnames 'pyspark.sql.tests.test_arrow ArrowTests.test_null_conversion1' --python-executables=python
```

```
...
AttributeError: type object 'ArrowTests' has no attribute 'test_null_conversion1'
...
```

```bash
./run-tests --testnames 'pyspark.sql.tests.test_arrow ArrowT' --python-executables=python
```

```
...
AttributeError: 'module' object has no attribute 'ArrowT'
...
```

```bash
 ./run-tests --testnames 'pyspark.sql.tests.test_ar' --python-executables=python
```
```
...
/.../python2.7: No module named pyspark.sql.tests.test_ar
```

Closes #23203 from HyukjinKwon/SPARK-26252.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/run-tests-with-coverage |  2 -
 python/run-tests.py            | 68 +++++++++++++++++++++++-----------
 2 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/python/run-tests-with-coverage b/python/run-tests-with-coverage
index 6d74b563e914..457821037d43 100755
--- a/python/run-tests-with-coverage
+++ b/python/run-tests-with-coverage
@@ -50,8 +50,6 @@ export SPARK_CONF_DIR="$COVERAGE_DIR/conf"
 # This environment variable enables the coverage.
 export COVERAGE_PROCESS_START="$FWDIR/.coveragerc"
 
-# If you'd like to run a specific unittest class, you could do such as
-# SPARK_TESTING=1 ../bin/pyspark pyspark.sql.tests VectorizedUDFTests
 ./run-tests "$@"
 
 # Don't run coverage for the coverage command itself
diff --git a/python/run-tests.py b/python/run-tests.py
index 01a6e81264dd..e45268c13769 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -19,7 +19,7 @@
 
 from __future__ import print_function
 import logging
-from optparse import OptionParser
+from optparse import OptionParser, OptionGroup
 import os
 import re
 import shutil
@@ -99,7 +99,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
     try:
         per_test_output = tempfile.TemporaryFile()
         retcode = subprocess.Popen(
-            [os.path.join(SPARK_HOME, "bin/pyspark"), test_name],
+            [os.path.join(SPARK_HOME, "bin/pyspark")] + test_name.split(),
             stderr=per_test_output, stdout=per_test_output, env=env).wait()
         shutil.rmtree(tmp_dir, ignore_errors=True)
     except:
@@ -190,6 +190,20 @@ def parse_opts():
         help="Enable additional debug logging"
     )
 
+    group = OptionGroup(parser, "Developer Options")
+    group.add_option(
+        "--testnames", type="string",
+        default=None,
+        help=(
+            "A comma-separated list of specific modules, classes and functions of doctest "
+            "or unittest to test. "
+            "For example, 'pyspark.sql.foo' to run the module as unittests or doctests, "
+            "'pyspark.sql.tests FooTests' to run the specific class of unittests, "
+            "'pyspark.sql.tests FooTests.test_foo' to run the specific unittest in the class. "
+            "'--modules' option is ignored if they are given.")
+    )
+    parser.add_option_group(group)
+
     (opts, args) = parser.parse_args()
     if args:
         parser.error("Unsupported arguments: %s" % ' '.join(args))
@@ -213,25 +227,31 @@ def _check_coverage(python_exec):
 
 def main():
     opts = parse_opts()
-    if (opts.verbose):
+    if opts.verbose:
         log_level = logging.DEBUG
     else:
         log_level = logging.INFO
+    should_test_modules = opts.testnames is None
     logging.basicConfig(stream=sys.stdout, level=log_level, format="%(message)s")
     LOGGER.info("Running PySpark tests. Output is in %s", LOG_FILE)
     if os.path.exists(LOG_FILE):
         os.remove(LOG_FILE)
     python_execs = opts.python_executables.split(',')
-    modules_to_test = []
-    for module_name in opts.modules.split(','):
-        if module_name in python_modules:
-            modules_to_test.append(python_modules[module_name])
-        else:
-            print("Error: unrecognized module '%s'. Supported modules: %s" %
-                  (module_name, ", ".join(python_modules)))
-            sys.exit(-1)
     LOGGER.info("Will test against the following Python executables: %s", python_execs)
-    LOGGER.info("Will test the following Python modules: %s", [x.name for x in modules_to_test])
+
+    if should_test_modules:
+        modules_to_test = []
+        for module_name in opts.modules.split(','):
+            if module_name in python_modules:
+                modules_to_test.append(python_modules[module_name])
+            else:
+                print("Error: unrecognized module '%s'. Supported modules: %s" %
+                      (module_name, ", ".join(python_modules)))
+                sys.exit(-1)
+        LOGGER.info("Will test the following Python modules: %s", [x.name for x in modules_to_test])
+    else:
+        testnames_to_test = opts.testnames.split(',')
+        LOGGER.info("Will test the following Python tests: %s", testnames_to_test)
 
     task_queue = Queue.PriorityQueue()
     for python_exec in python_execs:
@@ -246,16 +266,20 @@ def main():
         LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation)
         LOGGER.debug("%s version is: %s", python_exec, subprocess_check_output(
             [python_exec, "--version"], stderr=subprocess.STDOUT, universal_newlines=True).strip())
-        for module in modules_to_test:
-            if python_implementation not in module.blacklisted_python_implementations:
-                for test_goal in module.python_test_goals:
-                    heavy_tests = ['pyspark.streaming.tests', 'pyspark.mllib.tests',
-                                   'pyspark.tests', 'pyspark.sql.tests', 'pyspark.ml.tests']
-                    if any(map(lambda prefix: test_goal.startswith(prefix), heavy_tests)):
-                        priority = 0
-                    else:
-                        priority = 100
-                    task_queue.put((priority, (python_exec, test_goal)))
+        if should_test_modules:
+            for module in modules_to_test:
+                if python_implementation not in module.blacklisted_python_implementations:
+                    for test_goal in module.python_test_goals:
+                        heavy_tests = ['pyspark.streaming.tests', 'pyspark.mllib.tests',
+                                       'pyspark.tests', 'pyspark.sql.tests', 'pyspark.ml.tests']
+                        if any(map(lambda prefix: test_goal.startswith(prefix), heavy_tests)):
+                            priority = 0
+                        else:
+                            priority = 100
+                        task_queue.put((priority, (python_exec, test_goal)))
+        else:
+            for test_goal in testnames_to_test:
+                task_queue.put((0, (python_exec, test_goal)))
 
     # Create the target directory before starting tasks to avoid races.
     target_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'target'))

From 169d9ad8f1b6006c8db0edbdfffc20dc73c78610 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 5 Dec 2018 19:30:25 +0800
Subject: [PATCH 0074/1072] [SPARK-26133][ML][FOLLOWUP] Fix doc for
 OneHotEncoder

## What changes were proposed in this pull request?

This fixes doc of renamed OneHotEncoder in PySpark.

## How was this patch tested?

N/A

Closes #23230 from viirya/remove_one_hot_encoder_followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/ml/feature.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 6cc80e181e5e..c9507c20918e 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -1648,22 +1648,22 @@ class OneHotEncoder(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid
     at most a single one-value per row that indicates the input category index.
     For example with 5 categories, an input value of 2.0 would map to an output vector of
     `[0.0, 0.0, 1.0, 0.0]`.
-    The last category is not included by default (configurable via `dropLast`),
+    The last category is not included by default (configurable via :py:attr:`dropLast`),
     because it makes the vector entries sum up to one, and hence linearly dependent.
     So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
 
-    Note: This is different from scikit-learn's OneHotEncoder, which keeps all categories.
-    The output vectors are sparse.
+    .. note:: This is different from scikit-learn's OneHotEncoder, which keeps all categories.
+        The output vectors are sparse.
 
-    When `handleInvalid` is configured to 'keep', an extra "category" indicating invalid values is
-    added as last category. So when `dropLast` is true, invalid values are encoded as all-zeros
-    vector.
+    When :py:attr:`handleInvalid` is configured to 'keep', an extra "category" indicating invalid
+    values is added as last category. So when :py:attr:`dropLast` is true, invalid values are
+    encoded as all-zeros vector.
 
-    Note: When encoding multi-column by using `inputCols` and `outputCols` params, input/output
-    cols come in pairs, specified by the order in the arrays, and each pair is treated
-    independently.
+    .. note:: When encoding multi-column by using :py:attr:`inputCols` and
+        :py:attr:`outputCols` params, input/output cols come in pairs, specified by the order in
+        the arrays, and each pair is treated independently.
 
-    See `StringIndexer` for converting categorical values into category indices
+    .. seealso:: :py:class:`StringIndexer` for converting categorical values into category indices
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(0.0,), (1.0,), (2.0,)], ["input"])
@@ -1671,7 +1671,7 @@ class OneHotEncoder(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid
     >>> model = ohe.fit(df)
     >>> model.transform(df).head().output
     SparseVector(2, {0: 1.0})
-    >>> ohePath = temp_path + "/oheEstimator"
+    >>> ohePath = temp_path + "/ohe"
     >>> ohe.save(ohePath)
     >>> loadedOHE = OneHotEncoder.load(ohePath)
     >>> loadedOHE.getInputCols() == ohe.getInputCols()

From 7bb1dab8a006531d612e21d888c7fc6911990017 Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Wed, 5 Dec 2018 23:10:48 +0800
Subject: [PATCH 0075/1072] [SPARK-26271][FOLLOW-UP][SQL] remove unuse object
 SparkPlan

## What changes were proposed in this pull request?

this code come from PR: https://github.com/apache/spark/pull/11190,
but this code has never been used, only since  PR: https://github.com/apache/spark/pull/14548,
Let's continue fix it. thanks.

## How was this patch tested?

N / A

Closes #23227 from heary-cao/unuseSparkPlan.

Authored-by: caoxuewen <cao.xuewen@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/execution/SparkPlan.scala     | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 9d9b020309d9..a89ccca99d05 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -423,11 +423,6 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   }
 }
 
-object SparkPlan {
-  private[execution] val subqueryExecutionContext = ExecutionContext.fromExecutorService(
-    ThreadUtils.newDaemonCachedThreadPool("subquery", 16))
-}
-
 trait LeafExecNode extends SparkPlan {
   override final def children: Seq[SparkPlan] = Nil
   override def producedAttributes: AttributeSet = outputSet

From dd518a196c2d40ae48034b8b0950d1c8045c02ed Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Wed, 5 Dec 2018 23:43:03 +0800
Subject: [PATCH 0076/1072] [SPARK-26151][SQL][FOLLOWUP] Return partial results
 for bad CSV records

## What changes were proposed in this pull request?

Updated SQL migration guide according to changes in https://github.com/apache/spark/pull/23120

Closes #23235 from MaxGekk/failuresafe-partial-result-followup.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index fee0e6df7177..ed2ff139bcc3 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -35,6 +35,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, CSV datasource uses java.time API for parsing and generating CSV content. New formatting implementation supports date/timestamp patterns conformed to ISO 8601. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
 
+  - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.

From ab76900fedc05df7080c9b6c81d65a3f260c1c26 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 6 Dec 2018 09:14:46 +0800
Subject: [PATCH 0077/1072] [SPARK-26275][PYTHON][ML] Increases timeout for
 StreamingLogisticRegressionWithSGDTests.test_training_and_prediction test

## What changes were proposed in this pull request?

Looks this test is flaky

https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/99704/console
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/99569/console
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/99644/console
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/99548/console
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/99454/console
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/99609/console

```
======================================================================
FAIL: test_training_and_prediction (pyspark.mllib.tests.test_streaming_algorithms.StreamingLogisticRegressionWithSGDTests)
Test that the model improves on toy data with no. of batches
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/mllib/tests/test_streaming_algorithms.py", line 367, in test_training_and_prediction
    self._eventually(condition)
  File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/mllib/tests/test_streaming_algorithms.py", line 78, in _eventually
    % (timeout, lastValue))
AssertionError: Test failed due to timeout after 30 sec, with last condition returning: Latest errors: 0.67, 0.71, 0.78, 0.7, 0.75, 0.74, 0.73, 0.69, 0.62, 0.71, 0.69, 0.75, 0.72, 0.77, 0.71, 0.74

----------------------------------------------------------------------
Ran 13 tests in 185.051s

FAILED (failures=1, skipped=1)
```

This looks happening after increasing the parallelism in Jenkins to speed up at https://github.com/apache/spark/pull/23111. I am able to reproduce this manually when the resource usage is heavy (with manual decrease of timeout).

## How was this patch tested?

Manually tested by

```
cd python
./run-tests --testnames 'pyspark.mllib.tests.test_streaming_algorithms StreamingLogisticRegressionWithSGDTests.test_training_and_prediction' --python-executables=python
```

Closes #23236 from HyukjinKwon/SPARK-26275.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/mllib/tests/test_streaming_algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py
index 4bc8904acd31..bf2ad2d267bb 100644
--- a/python/pyspark/mllib/tests/test_streaming_algorithms.py
+++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py
@@ -364,7 +364,7 @@ def condition():
                 return True
             return "Latest errors: " + ", ".join(map(lambda x: str(x), errors))
 
-        self._eventually(condition)
+        self._eventually(condition, timeout=60.0)
 
 
 class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):

From ecaa495b1fe532c36e952ccac42f4715809476af Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 6 Dec 2018 10:07:28 -0800
Subject: [PATCH 0078/1072] [SPARK-25274][PYTHON][SQL] In toPandas with Arrow
 send un-ordered record batches to improve performance

## What changes were proposed in this pull request?

When executing `toPandas` with Arrow enabled, partitions that arrive in the JVM out-of-order must be buffered before they can be send to Python. This causes an excess of memory to be used in the driver JVM and increases the time it takes to complete because data must sit in the JVM waiting for preceding partitions to come in.

This change sends un-ordered partitions to Python as soon as they arrive in the JVM, followed by a list of partition indices so that Python can assemble the data in the correct order. This way, data is not buffered at the JVM and there is no waiting on particular partitions so performance will be increased.

Followup to #21546

## How was this patch tested?

Added new test with a large number of batches per partition, and test that forces a small delay in the first partition. These test that partitions are collected out-of-order and then are are put in the correct order in Python.

## Performance Tests - toPandas

Tests run on a 4 node standalone cluster with 32 cores total, 14.04.1-Ubuntu and OpenJDK 8
measured wall clock time to execute `toPandas()` and took the average best time of 5 runs/5 loops each.

Test code
```python
df = spark.range(1 << 25, numPartitions=32).toDF("id").withColumn("x1", rand()).withColumn("x2", rand()).withColumn("x3", rand()).withColumn("x4", rand())
for i in range(5):
	start = time.time()
	_ = df.toPandas()
	elapsed = time.time() - start
```

Spark config
```
spark.driver.memory 5g
spark.executor.memory 5g
spark.driver.maxResultSize 2g
spark.sql.execution.arrow.enabled true
```

Current Master w/ Arrow stream | This PR
---------------------|------------
5.16207 | 4.342533
5.133671 | 4.399408
5.147513 | 4.468471
5.105243 | 4.36524
5.018685 | 4.373791

Avg Master | Avg This PR
------------------|--------------
5.1134364 | 4.3898886

Speedup of **1.164821449**

Closes #22275 from BryanCutler/arrow-toPandas-oo-batches-SPARK-25274.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 python/pyspark/serializers.py                 | 33 ++++++++++++++
 python/pyspark/sql/dataframe.py               | 11 ++++-
 python/pyspark/sql/tests/test_arrow.py        | 28 ++++++++++++
 .../scala/org/apache/spark/sql/Dataset.scala  | 45 ++++++++++---------
 4 files changed, 95 insertions(+), 22 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index ff9a612b77f6..f3ebd3767a0a 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -185,6 +185,39 @@ def loads(self, obj):
         raise NotImplementedError
 
 
+class ArrowCollectSerializer(Serializer):
+    """
+    Deserialize a stream of batches followed by batch order information. Used in
+    DataFrame._collectAsArrow() after invoking Dataset.collectAsArrowToPython() in the JVM.
+    """
+
+    def __init__(self):
+        self.serializer = ArrowStreamSerializer()
+
+    def dump_stream(self, iterator, stream):
+        return self.serializer.dump_stream(iterator, stream)
+
+    def load_stream(self, stream):
+        """
+        Load a stream of un-ordered Arrow RecordBatches, where the last iteration yields
+        a list of indices that can be used to put the RecordBatches in the correct order.
+        """
+        # load the batches
+        for batch in self.serializer.load_stream(stream):
+            yield batch
+
+        # load the batch order indices
+        num = read_int(stream)
+        batch_order = []
+        for i in xrange(num):
+            index = read_int(stream)
+            batch_order.append(index)
+        yield batch_order
+
+    def __repr__(self):
+        return "ArrowCollectSerializer(%s)" % self.serializer
+
+
 class ArrowStreamSerializer(Serializer):
     """
     Serializes Arrow record batches as a stream.
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 1b1092c409be..a1056d0b787e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -29,7 +29,7 @@
 
 from pyspark import copy_func, since, _NoValue
 from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix
-from pyspark.serializers import ArrowStreamSerializer, BatchedSerializer, PickleSerializer, \
+from pyspark.serializers import ArrowCollectSerializer, BatchedSerializer, PickleSerializer, \
     UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
@@ -2168,7 +2168,14 @@ def _collectAsArrow(self):
         """
         with SCCallSiteSync(self._sc) as css:
             sock_info = self._jdf.collectAsArrowToPython()
-        return list(_load_from_socket(sock_info, ArrowStreamSerializer()))
+
+        # Collect list of un-ordered batches where last element is a list of correct order indices
+        results = list(_load_from_socket(sock_info, ArrowCollectSerializer()))
+        batches = results[:-1]
+        batch_order = results[-1]
+
+        # Re-order the batch list using the correct order
+        return [batches[i] for i in batch_order]
 
     ##########################################################################################
     # Pandas compatibility
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 6e75e82d5800..21fe5000df5d 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -381,6 +381,34 @@ def test_timestamp_dst(self):
         self.assertPandasEqual(pdf, df_from_python.toPandas())
         self.assertPandasEqual(pdf, df_from_pandas.toPandas())
 
+    def test_toPandas_batch_order(self):
+
+        def delay_first_part(partition_index, iterator):
+            if partition_index == 0:
+                time.sleep(0.1)
+            return iterator
+
+        # Collects Arrow RecordBatches out of order in driver JVM then re-orders in Python
+        def run_test(num_records, num_parts, max_records, use_delay=False):
+            df = self.spark.range(num_records, numPartitions=num_parts).toDF("a")
+            if use_delay:
+                df = df.rdd.mapPartitionsWithIndex(delay_first_part).toDF()
+            with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": max_records}):
+                pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
+                self.assertPandasEqual(pdf, pdf_arrow)
+
+        cases = [
+            (1024, 512, 2),    # Use large num partitions for more likely collecting out of order
+            (64, 8, 2, True),  # Use delay in first partition to force collecting out of order
+            (64, 64, 1),       # Test single batch per partition
+            (64, 1, 64),       # Test single partition, single batch
+            (64, 1, 8),        # Test single partition, multiple batches
+            (30, 7, 2),        # Test different sized partitions
+        ]
+
+        for case in cases:
+            run_test(*case)
+
 
 class EncryptionArrowTests(ArrowTests):
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index b10d66dfb1ae..a664c7338bad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.sql
 
-import java.io.CharArrayWriter
+import java.io.{CharArrayWriter, DataOutputStream}
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
 import scala.language.implicitConversions
 import scala.util.control.NonFatal
 
@@ -3200,34 +3201,38 @@ class Dataset[T] private[sql](
     val timeZoneId = sparkSession.sessionState.conf.sessionLocalTimeZone
 
     withAction("collectAsArrowToPython", queryExecution) { plan =>
-      PythonRDD.serveToStream("serve-Arrow") { out =>
+      PythonRDD.serveToStream("serve-Arrow") { outputStream =>
+        val out = new DataOutputStream(outputStream)
         val batchWriter = new ArrowBatchStreamWriter(schema, out, timeZoneId)
         val arrowBatchRdd = toArrowBatchRdd(plan)
         val numPartitions = arrowBatchRdd.partitions.length
 
-        // Store collection results for worst case of 1 to N-1 partitions
-        val results = new Array[Array[Array[Byte]]](numPartitions - 1)
-        var lastIndex = -1  // index of last partition written
+        // Batches ordered by (index of partition, batch index in that partition) tuple
+        val batchOrder = new ArrayBuffer[(Int, Int)]()
+        var partitionCount = 0
 
-        // Handler to eagerly write partitions to Python in order
+        // Handler to eagerly write batches to Python as they arrive, un-ordered
         def handlePartitionBatches(index: Int, arrowBatches: Array[Array[Byte]]): Unit = {
-          // If result is from next partition in order
-          if (index - 1 == lastIndex) {
+          if (arrowBatches.nonEmpty) {
+            // Write all batches (can be more than 1) in the partition, store the batch order tuple
             batchWriter.writeBatches(arrowBatches.iterator)
-            lastIndex += 1
-            // Write stored partitions that come next in order
-            while (lastIndex < results.length && results(lastIndex) != null) {
-              batchWriter.writeBatches(results(lastIndex).iterator)
-              results(lastIndex) = null
-              lastIndex += 1
+            arrowBatches.indices.foreach {
+              partition_batch_index => batchOrder.append((index, partition_batch_index))
             }
-            // After last batch, end the stream
-            if (lastIndex == results.length) {
-              batchWriter.end()
+          }
+          partitionCount += 1
+
+          // After last batch, end the stream and write batch order indices
+          if (partitionCount == numPartitions) {
+            batchWriter.end()
+            out.writeInt(batchOrder.length)
+            // Sort by (index of partition, batch index in that partition) tuple to get the
+            // overall_batch_index from 0 to N-1 batches, which can be used to put the
+            // transferred batches in the correct order
+            batchOrder.zipWithIndex.sortBy(_._1).foreach { case (_, overall_batch_index) =>
+              out.writeInt(overall_batch_index)
             }
-          } else {
-            // Store partitions received out of order
-            results(index - 1) = arrowBatches
+            out.flush()
           }
         }
 

From b14a26ee5764aa98472bc69ab1dec408b89bc78a Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Thu, 6 Dec 2018 10:59:20 -0800
Subject: [PATCH 0079/1072] [SPARK-26236][SS] Add kafka delegation token
 support documentation.

## What changes were proposed in this pull request?

Kafka delegation token support implemented in [PR#22598](https://github.com/apache/spark/pull/22598) but that didn't contain documentation because of rapid changes. Because it has been merged in this PR I've documented it.

## How was this patch tested?
jekyll build + manual html check

Closes #23195 from gaborgsomogyi/SPARK-26236.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../structured-streaming-kafka-integration.md | 216 +++++++++++++++++-
 1 file changed, 206 insertions(+), 10 deletions(-)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index a549ce2a6a05..7040f8da2c61 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -66,8 +66,8 @@ Dataset<Row> df = spark
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
   .option("subscribe", "topic1")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
 
 // Subscribe to multiple topics
 Dataset<Row> df = spark
@@ -75,8 +75,8 @@ Dataset<Row> df = spark
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
   .option("subscribe", "topic1,topic2")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
 
 // Subscribe to a pattern
 Dataset<Row> df = spark
@@ -84,8 +84,8 @@ Dataset<Row> df = spark
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
   .option("subscribePattern", "topic.*")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
 
 {% endhighlight %}
 </div>
@@ -479,7 +479,7 @@ StreamingQuery ds = df
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
   .option("topic", "topic1")
-  .start()
+  .start();
 
 // Write key-value data from a DataFrame to Kafka using a topic specified in the data
 StreamingQuery ds = df
@@ -487,7 +487,7 @@ StreamingQuery ds = df
   .writeStream()
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .start()
+  .start();
 
 {% endhighlight %}
 </div>
@@ -547,14 +547,14 @@ df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
   .option("topic", "topic1")
-  .save()
+  .save();
 
 // Write key-value data from a DataFrame to Kafka using a topic specified in the data
 df.selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
   .write()
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .save()
+  .save();
 
 {% endhighlight %}
 </div>
@@ -624,3 +624,199 @@ For experimenting on `spark-shell`, you can also use `--packages` to add `spark-
 
 See [Application Submission Guide](submitting-applications.html) for more details about submitting
 applications with external dependencies.
+
+## Security
+
+Kafka 0.9.0.0 introduced several features that increases security in a cluster. For detailed
+description about these possibilities, see [Kafka security docs](http://kafka.apache.org/documentation.html#security).
+
+It's worth noting that security is optional and turned off by default.
+
+Spark supports the following ways to authenticate against Kafka cluster:
+- **Delegation token (introduced in Kafka broker 1.1.0)**
+- **JAAS login configuration**
+
+### Delegation token
+
+This way the application can be configured via Spark parameters and may not need JAAS login
+configuration (Spark can use Kafka's dynamic JAAS configuration feature). For further information
+about delegation tokens, see [Kafka delegation token docs](http://kafka.apache.org/documentation/#security_delegation_token).
+
+The process is initiated by Spark's Kafka delegation token provider. When `spark.kafka.bootstrap.servers`,
+Spark considers the following log in options, in order of preference:
+- **JAAS login configuration**
+- **Keytab file**, such as,
+
+      ./bin/spark-submit \
+          --keytab <KEYTAB_FILE> \
+          --principal <PRINCIPAL> \
+          --conf spark.kafka.bootstrap.servers=<KAFKA_SERVERS> \
+          ...
+
+- **Kerberos credential cache**, such as,
+
+      ./bin/spark-submit \
+          --conf spark.kafka.bootstrap.servers=<KAFKA_SERVERS> \
+          ...
+
+The Kafka delegation token provider can be turned off by setting `spark.security.credentials.kafka.enabled` to `false` (default: `true`).
+
+Spark can be configured to use the following authentication protocols to obtain token (it must match with
+Kafka broker configuration):
+- **SASL SSL (default)**
+- **SSL**
+- **SASL PLAINTEXT (for testing)**
+
+After obtaining delegation token successfully, Spark distributes it across nodes and renews it accordingly.
+Delegation token uses `SCRAM` login module for authentication and because of that the appropriate
+`sasl.mechanism` has to be configured on source/sink (it must match with Kafka broker configuration):
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+// Setting on Kafka Source for Streaming Queries
+val df = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
+  .option("subscribe", "topic1")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+// Setting on Kafka Source for Batch Queries
+val df = spark
+  .read
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
+  .option("subscribe", "topic1")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+// Setting on Kafka Sink for Streaming Queries
+val ds = df
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .writeStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
+  .option("topic", "topic1")
+  .start()
+
+// Setting on Kafka Sink for Batch Queries
+val ds = df
+  .selectExpr("topic1", "CAST(key AS STRING)", "CAST(value AS STRING)")
+  .write
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
+  .save()
+
+{% endhighlight %}
+</div>
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+// Setting on Kafka Source for Streaming Queries
+Dataset<Row> df = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
+  .option("subscribe", "topic1")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+// Setting on Kafka Source for Batch Queries
+Dataset<Row> df = spark
+  .read()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
+  .option("subscribe", "topic1")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+// Setting on Kafka Sink for Streaming Queries
+StreamingQuery ds = df
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .writeStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
+  .option("topic", "topic1")
+  .start();
+
+// Setting on Kafka Sink for Batch Queries
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .write()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
+  .option("topic", "topic1")
+  .save();
+
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+// Setting on Kafka Source for Streaming Queries
+df = spark \
+  .readStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512") \
+  .option("subscribe", "topic1") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+// Setting on Kafka Source for Batch Queries
+df = spark \
+  .read \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512") \
+  .option("subscribe", "topic1") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+// Setting on Kafka Sink for Streaming Queries
+ds = df \
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
+  .writeStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512") \
+  .option("topic", "topic1") \
+  .start()
+
+// Setting on Kafka Sink for Batch Queries
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
+  .write \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("kafka.sasl.mechanism", "SCRAM-SHA-512") \
+  .option("topic", "topic1") \
+  .save()
+
+{% endhighlight %}
+</div>
+</div>
+
+When delegation token is available on an executor it can be overridden with JAAS login configuration.
+
+### JAAS login configuration
+
+JAAS login configuration must placed on all nodes where Spark tries to access Kafka cluster.
+This provides the possibility to apply any custom authentication logic with a higher cost to maintain.
+This can be done several ways. One possibility is to provide additional JVM parameters, such as,
+
+    ./bin/spark-submit \
+        --driver-java-options "-Djava.security.auth.login.config=/path/to/custom_jaas.conf" \
+        --conf spark.executor.extraJavaOptions=-Djava.security.auth.login.config=/path/to/custom_jaas.conf \
+        ...

From dbd90e54408d593e02a3dd1e659fcf9a7b940535 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 6 Dec 2018 14:17:13 -0800
Subject: [PATCH 0080/1072] [SPARK-26194][K8S] Auto generate auth secret for
 k8s apps.

This change modifies the logic in the SecurityManager to do two
things:

- generate unique app secrets also when k8s is being used
- only store the secret in the user's UGI on YARN

The latter is needed so that k8s won't unnecessarily create
k8s secrets for the UGI credentials when only the auth token
is stored there.

On the k8s side, the secret is propagated to executors using
an environment variable instead. This ensures it works in both
client and cluster mode.

Security doc was updated to mention the feature and clarify that
proper access control in k8s should be enabled for it to be secure.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #23174 from vanzin/SPARK-26194.
---
 .../org/apache/spark/SecurityManager.scala    | 21 +++-
 .../apache/spark/SecurityManagerSuite.scala   | 57 +++++++----
 docs/security.md                              | 34 ++++---
 .../features/BasicExecutorFeatureStep.scala   | 96 +++++++++++--------
 .../cluster/k8s/ExecutorPodsAllocator.scala   |  5 +-
 .../k8s/KubernetesClusterManager.scala        |  3 +-
 .../KubernetesClusterSchedulerBackend.scala   |  5 +-
 .../k8s/KubernetesExecutorBuilder.scala       | 13 ++-
 .../BasicExecutorFeatureStepSuite.scala       | 46 ++++++---
 .../k8s/ExecutorPodsAllocatorSuite.scala      |  9 +-
 ...bernetesClusterSchedulerBackendSuite.scala |  9 +-
 .../k8s/KubernetesExecutorBuilderSuite.scala  | 18 ++--
 .../k8s/integrationtest/KubernetesSuite.scala |  2 +
 13 files changed, 205 insertions(+), 113 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 3cfafeb95110..96e4b53b2418 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -348,15 +348,23 @@ private[spark] class SecurityManager(
    */
   def initializeAuth(): Unit = {
     import SparkMasterRegex._
+    val k8sRegex = "k8s.*".r
 
     if (!sparkConf.get(NETWORK_AUTH_ENABLED)) {
       return
     }
 
+    // TODO: this really should be abstracted somewhere else.
     val master = sparkConf.get(SparkLauncher.SPARK_MASTER, "")
-    master match {
+    val storeInUgi = master match {
       case "yarn" | "local" | LOCAL_N_REGEX(_) | LOCAL_N_FAILURES_REGEX(_, _) =>
-        // Secret generation allowed here
+        true
+
+      case k8sRegex() =>
+        // Don't propagate the secret through the user's credentials in kubernetes. That conflicts
+        // with the way k8s handles propagation of delegation tokens.
+        false
+
       case _ =>
         require(sparkConf.contains(SPARK_AUTH_SECRET_CONF),
           s"A secret key must be specified via the $SPARK_AUTH_SECRET_CONF config.")
@@ -364,9 +372,12 @@ private[spark] class SecurityManager(
     }
 
     secretKey = Utils.createSecret(sparkConf)
-    val creds = new Credentials()
-    creds.addSecretKey(SECRET_LOOKUP_KEY, secretKey.getBytes(UTF_8))
-    UserGroupInformation.getCurrentUser().addCredentials(creds)
+
+    if (storeInUgi) {
+      val creds = new Credentials()
+      creds.addSecretKey(SECRET_LOOKUP_KEY, secretKey.getBytes(UTF_8))
+      UserGroupInformation.getCurrentUser().addCredentials(creds)
+    }
   }
 
   // Default SecurityManager only has a single secret key, so ignore appId.
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index e357299770a2..eec8004fc94f 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -395,15 +395,23 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(keyFromEnv === new SecurityManager(conf2).getSecretKey())
   }
 
-  test("secret key generation") {
-    Seq(
-      ("yarn", true),
-      ("local", true),
-      ("local[*]", true),
-      ("local[1, 2]", true),
-      ("local-cluster[2, 1, 1024]", false),
-      ("invalid", false)
-    ).foreach { case (master, shouldGenerateSecret) =>
+  // How is the secret expected to be generated and stored.
+  object SecretTestType extends Enumeration {
+    val MANUAL, AUTO, UGI = Value
+  }
+
+  import SecretTestType._
+
+  Seq(
+    ("yarn", UGI),
+    ("local", UGI),
+    ("local[*]", UGI),
+    ("local[1, 2]", UGI),
+    ("k8s://127.0.0.1", AUTO),
+    ("local-cluster[2, 1, 1024]", MANUAL),
+    ("invalid", MANUAL)
+  ).foreach { case (master, secretType) =>
+    test(s"secret key generation: master '$master'") {
       val conf = new SparkConf()
         .set(NETWORK_AUTH_ENABLED, true)
         .set(SparkLauncher.SPARK_MASTER, master)
@@ -412,19 +420,26 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
       UserGroupInformation.createUserForTesting("authTest", Array()).doAs(
         new PrivilegedExceptionAction[Unit]() {
           override def run(): Unit = {
-            if (shouldGenerateSecret) {
-              mgr.initializeAuth()
-              val creds = UserGroupInformation.getCurrentUser().getCredentials()
-              val secret = creds.getSecretKey(SecurityManager.SECRET_LOOKUP_KEY)
-              assert(secret != null)
-              assert(new String(secret, UTF_8) === mgr.getSecretKey())
-            } else {
-              intercept[IllegalArgumentException] {
+            secretType match {
+              case UGI =>
+                mgr.initializeAuth()
+                val creds = UserGroupInformation.getCurrentUser().getCredentials()
+                val secret = creds.getSecretKey(SecurityManager.SECRET_LOOKUP_KEY)
+                assert(secret != null)
+                assert(new String(secret, UTF_8) === mgr.getSecretKey())
+
+              case AUTO =>
                 mgr.initializeAuth()
-              }
-              intercept[IllegalArgumentException] {
-                mgr.getSecretKey()
-              }
+                val creds = UserGroupInformation.getCurrentUser().getCredentials()
+                assert(creds.getSecretKey(SecurityManager.SECRET_LOOKUP_KEY) === null)
+
+              case MANUAL =>
+                intercept[IllegalArgumentException] {
+                  mgr.initializeAuth()
+                }
+                intercept[IllegalArgumentException] {
+                  mgr.getSecretKey()
+                }
             }
           }
         }
diff --git a/docs/security.md b/docs/security.md
index be4834660fb7..2a4f3c074c1e 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -26,21 +26,29 @@ not documented, Spark does not support.
 Spark currently supports authentication for RPC channels using a shared secret. Authentication can
 be turned on by setting the `spark.authenticate` configuration parameter.
 
-The exact mechanism used to generate and distribute the shared secret is deployment-specific.
+The exact mechanism used to generate and distribute the shared secret is deployment-specific. Unless
+specified below, the secret must be defined by setting the `spark.authenticate.secret` config
+option. The same secret is shared by all Spark applications and daemons in that case, which limits
+the security of these deployments, especially on multi-tenant clusters.
 
-For Spark on [YARN](running-on-yarn.html) and local deployments, Spark will automatically handle
-generating and distributing the shared secret. Each application will use a unique shared secret. In
+The REST Submission Server and the MesosClusterDispatcher do not support authentication.  You should
+ensure that all network access to the REST API & MesosClusterDispatcher (port 6066 and 7077
+respectively by default) are restricted to hosts that are trusted to submit jobs.
+
+### YARN
+
+For Spark on [YARN](running-on-yarn.html), Spark will automatically handle generating and
+distributing the shared secret. Each application will use a unique shared secret. In
 the case of YARN, this feature relies on YARN RPC encryption being enabled for the distribution of
 secrets to be secure.
 
-For other resource managers, `spark.authenticate.secret` must be configured on each of the nodes.
-This secret will be shared by all the daemons and applications, so this deployment configuration is
-not as secure as the above, especially when considering multi-tenant clusters.  In this
-configuration, a user with the secret can effectively impersonate any other user.
+### Kubernetes
 
-The Rest Submission Server and the MesosClusterDispatcher do not support authentication.  You should
-ensure that all network access to the REST API & MesosClusterDispatcher (port 6066 and 7077
-respectively by default) are restricted to hosts that are trusted to submit jobs.
+On Kubernetes, Spark will also automatically generate an authentication secret unique to each
+application. The secret is propagated to executor pods using environment variables. This means
+that any user that can list pods in the namespace where the Spark application is running can
+also see their authentication secret. Access control rules should be properly set up by the
+Kubernetes admin to ensure that Spark authentication is secure.
 
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
@@ -738,10 +746,10 @@ tokens for supported will be created.
 ## Secure Interaction with Kubernetes
 
 When talking to Hadoop-based services behind Kerberos, it was noted that Spark needs to obtain delegation tokens
-so that non-local processes can authenticate. These delegation tokens in Kubernetes are stored in Secrets that are 
-shared by the Driver and its Executors. As such, there are three ways of submitting a Kerberos job: 
+so that non-local processes can authenticate. These delegation tokens in Kubernetes are stored in Secrets that are
+shared by the Driver and its Executors. As such, there are three ways of submitting a Kerberos job:
 
-In all cases you must define the environment variable: `HADOOP_CONF_DIR` or 
+In all cases you must define the environment variable: `HADOOP_CONF_DIR` or
 `spark.kubernetes.hadoop.configMapName.`
 
 It also important to note that the KDC needs to be visible from inside the containers.
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 8bf315248388..939aa88b0797 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -20,7 +20,7 @@ import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model._
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
@@ -29,11 +29,12 @@ import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
 
-private[spark] class BasicExecutorFeatureStep(kubernetesConf: KubernetesExecutorConf)
+private[spark] class BasicExecutorFeatureStep(
+    kubernetesConf: KubernetesExecutorConf,
+    secMgr: SecurityManager)
   extends KubernetesFeatureConfigStep {
 
   // Consider moving some of these fields to KubernetesConf or KubernetesExecutorSpecificConf
-  private val executorExtraClasspath = kubernetesConf.get(EXECUTOR_CLASS_PATH)
   private val executorContainerImage = kubernetesConf
     .get(EXECUTOR_CONTAINER_IMAGE)
     .getOrElse(throw new SparkException("Must specify the executor container image"))
@@ -87,44 +88,61 @@ private[spark] class BasicExecutorFeatureStep(kubernetesConf: KubernetesExecutor
     val executorCpuQuantity = new QuantityBuilder(false)
       .withAmount(executorCoresRequest)
       .build()
-    val executorExtraClasspathEnv = executorExtraClasspath.map { cp =>
-      new EnvVarBuilder()
-        .withName(ENV_CLASSPATH)
-        .withValue(cp)
-        .build()
-    }
-    val executorExtraJavaOptionsEnv = kubernetesConf
-      .get(EXECUTOR_JAVA_OPTIONS)
-      .map { opts =>
-        val subsOpts = Utils.substituteAppNExecIds(opts, kubernetesConf.appId,
-          kubernetesConf.executorId)
-        val delimitedOpts = Utils.splitCommandString(subsOpts)
-        delimitedOpts.zipWithIndex.map {
-          case (opt, index) =>
-            new EnvVarBuilder().withName(s"$ENV_JAVA_OPT_PREFIX$index").withValue(opt).build()
+
+    val executorEnv: Seq[EnvVar] = {
+        (Seq(
+          (ENV_DRIVER_URL, driverUrl),
+          (ENV_EXECUTOR_CORES, executorCores.toString),
+          (ENV_EXECUTOR_MEMORY, executorMemoryString),
+          (ENV_APPLICATION_ID, kubernetesConf.appId),
+          // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
+          (ENV_SPARK_CONF_DIR, SPARK_CONF_DIR_INTERNAL),
+          (ENV_EXECUTOR_ID, kubernetesConf.executorId)
+        ) ++ kubernetesConf.environment).map { case (k, v) =>
+          new EnvVarBuilder()
+            .withName(k)
+            .withValue(v)
+            .build()
         }
-      }.getOrElse(Seq.empty[EnvVar])
-    val executorEnv = (Seq(
-      (ENV_DRIVER_URL, driverUrl),
-      (ENV_EXECUTOR_CORES, executorCores.toString),
-      (ENV_EXECUTOR_MEMORY, executorMemoryString),
-      (ENV_APPLICATION_ID, kubernetesConf.appId),
-      // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
-      (ENV_SPARK_CONF_DIR, SPARK_CONF_DIR_INTERNAL),
-      (ENV_EXECUTOR_ID, kubernetesConf.executorId)) ++
-      kubernetesConf.environment)
-      .map(env => new EnvVarBuilder()
-        .withName(env._1)
-        .withValue(env._2)
-        .build()
-      ) ++ Seq(
-      new EnvVarBuilder()
-        .withName(ENV_EXECUTOR_POD_IP)
-        .withValueFrom(new EnvVarSourceBuilder()
-          .withNewFieldRef("v1", "status.podIP")
+      } ++ {
+        Seq(new EnvVarBuilder()
+          .withName(ENV_EXECUTOR_POD_IP)
+          .withValueFrom(new EnvVarSourceBuilder()
+            .withNewFieldRef("v1", "status.podIP")
+            .build())
           .build())
-        .build()
-    ) ++ executorExtraJavaOptionsEnv ++ executorExtraClasspathEnv.toSeq
+      } ++ {
+        Option(secMgr.getSecretKey()).map { authSecret =>
+          new EnvVarBuilder()
+            .withName(SecurityManager.ENV_AUTH_SECRET)
+            .withValue(authSecret)
+            .build()
+        }
+      } ++ {
+        kubernetesConf.get(EXECUTOR_CLASS_PATH).map { cp =>
+          new EnvVarBuilder()
+            .withName(ENV_CLASSPATH)
+            .withValue(cp)
+            .build()
+        }
+      } ++ {
+        val userOpts = kubernetesConf.get(EXECUTOR_JAVA_OPTIONS).toSeq.flatMap { opts =>
+          val subsOpts = Utils.substituteAppNExecIds(opts, kubernetesConf.appId,
+            kubernetesConf.executorId)
+          Utils.splitCommandString(subsOpts)
+        }
+
+        val sparkOpts = Utils.sparkJavaOpts(kubernetesConf.sparkConf,
+          SparkConf.isExecutorStartupConf)
+
+        (userOpts ++ sparkOpts).zipWithIndex.map { case (opt, index) =>
+          new EnvVarBuilder()
+            .withName(s"$ENV_JAVA_OPT_PREFIX$index")
+            .withValue(opt)
+            .build()
+        }
+      }
+
     val requiredPorts = Seq(
       (BLOCK_MANAGER_PORT_NAME, blockManagerPort))
       .map { case (name, port) =>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 2f0f949566d6..ac42554b1334 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.PodBuilder
 import io.fabric8.kubernetes.client.KubernetesClient
 import scala.collection.mutable
 
-import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesConf
@@ -31,6 +31,7 @@ import org.apache.spark.util.{Clock, Utils}
 
 private[spark] class ExecutorPodsAllocator(
     conf: SparkConf,
+    secMgr: SecurityManager,
     executorBuilder: KubernetesExecutorBuilder,
     kubernetesClient: KubernetesClient,
     snapshotsStore: ExecutorPodsSnapshotsStore,
@@ -135,7 +136,7 @@ private[spark] class ExecutorPodsAllocator(
             newExecutorId.toString,
             applicationId,
             driverPod)
-          val executorPod = executorBuilder.buildFromFeatures(executorConf)
+          val executorPod = executorBuilder.buildFromFeatures(executorConf, secMgr)
           val podWithAttachedContainer = new PodBuilder(executorPod.pod)
             .editOrNewSpec()
             .addToContainers(executorPod.container)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index ce10f766334f..b31fbb420ed6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -94,6 +94,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
 
     val executorPodsAllocator = new ExecutorPodsAllocator(
       sc.conf,
+      sc.env.securityManager,
       KubernetesExecutorBuilder(kubernetesClient, sc.conf),
       kubernetesClient,
       snapshotsStore,
@@ -110,7 +111,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
 
     new KubernetesClusterSchedulerBackend(
       scheduler.asInstanceOf[TaskSchedulerImpl],
-      sc.env.rpcEnv,
+      sc,
       kubernetesClient,
       requestExecutorsService,
       snapshotsStore,
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 6356b5864580..68f6f2e46e31 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.ExecutorService
 import io.fabric8.kubernetes.client.KubernetesClient
 import scala.concurrent.{ExecutionContext, Future}
 
+import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.rpc.{RpcAddress, RpcEnv}
@@ -30,7 +31,7 @@ import org.apache.spark.util.{ThreadUtils, Utils}
 
 private[spark] class KubernetesClusterSchedulerBackend(
     scheduler: TaskSchedulerImpl,
-    rpcEnv: RpcEnv,
+    sc: SparkContext,
     kubernetesClient: KubernetesClient,
     requestExecutorsService: ExecutorService,
     snapshotsStore: ExecutorPodsSnapshotsStore,
@@ -38,7 +39,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
     lifecycleEventHandler: ExecutorPodsLifecycleManager,
     watchEvents: ExecutorPodsWatchSnapshotSource,
     pollEvents: ExecutorPodsPollingSnapshotSource)
-  extends CoarseGrainedSchedulerBackend(scheduler, rpcEnv) {
+  extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) {
 
   private implicit val requestExecutorContext = ExecutionContext.fromExecutorService(
     requestExecutorsService)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index d24ff0d1e660..ba273cad6a8e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -20,14 +20,14 @@ import java.io.File
 
 import io.fabric8.kubernetes.client.KubernetesClient
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features._
 
 private[spark] class KubernetesExecutorBuilder(
-    provideBasicStep: (KubernetesExecutorConf => BasicExecutorFeatureStep) =
-      new BasicExecutorFeatureStep(_),
+    provideBasicStep: (KubernetesExecutorConf, SecurityManager) => BasicExecutorFeatureStep =
+      new BasicExecutorFeatureStep(_, _),
     provideSecretsStep: (KubernetesConf => MountSecretsFeatureStep) =
       new MountSecretsFeatureStep(_),
     provideEnvSecretsStep: (KubernetesConf => EnvSecretsFeatureStep) =
@@ -44,13 +44,16 @@ private[spark] class KubernetesExecutorBuilder(
       new HadoopSparkUserExecutorFeatureStep(_),
     provideInitialPod: () => SparkPod = () => SparkPod.initialPod()) {
 
-  def buildFromFeatures(kubernetesConf: KubernetesExecutorConf): SparkPod = {
+  def buildFromFeatures(
+      kubernetesConf: KubernetesExecutorConf,
+      secMgr: SecurityManager): SparkPod = {
     val sparkConf = kubernetesConf.sparkConf
     val maybeHadoopConfigMap = sparkConf.getOption(HADOOP_CONFIG_MAP_NAME)
     val maybeDTSecretName = sparkConf.getOption(KERBEROS_DT_SECRET_NAME)
     val maybeDTDataItem = sparkConf.getOption(KERBEROS_DT_SECRET_KEY)
 
-    val baseFeatures = Seq(provideBasicStep(kubernetesConf), provideLocalDirsStep(kubernetesConf))
+    val baseFeatures = Seq(provideBasicStep(kubernetesConf, secMgr),
+      provideLocalDirsStep(kubernetesConf))
     val secretFeature = if (kubernetesConf.secretNamesToMountPaths.nonEmpty) {
       Seq(provideSecretsStep(kubernetesConf))
     } else Nil
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index d6003c977937..6aa862643c78 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -21,13 +21,14 @@ import scala.collection.JavaConverters._
 import io.fabric8.kubernetes.api.model._
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.config._
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+import org.apache.spark.util.Utils
 
 class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
 
@@ -63,7 +64,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   private var baseConf: SparkConf = _
 
   before {
-    baseConf = new SparkConf()
+    baseConf = new SparkConf(false)
       .set(KUBERNETES_DRIVER_POD_NAME, DRIVER_POD_NAME)
       .set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, RESOURCE_NAME_PREFIX)
       .set(CONTAINER_IMAGE, EXECUTOR_IMAGE)
@@ -84,7 +85,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("basic executor pod has reasonable defaults") {
-    val step = new BasicExecutorFeatureStep(newExecutorConf())
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
     val executor = step.configurePod(SparkPod.initialPod())
 
     // The executor pod name and default labels.
@@ -106,7 +107,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     assert(executor.pod.getSpec.getNodeSelector.isEmpty)
     assert(executor.pod.getSpec.getVolumes.isEmpty)
 
-    checkEnv(executor, Map())
+    checkEnv(executor, baseConf, Map())
     checkOwnerReferences(executor.pod, DRIVER_POD_UID)
   }
 
@@ -114,7 +115,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     val longPodNamePrefix = "loremipsumdolorsitametvimatelitrefficiendisuscipianturvixlegeresple"
 
     baseConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, longPodNamePrefix)
-    val step = new BasicExecutorFeatureStep(newExecutorConf())
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
     assert(step.configurePod(SparkPod.initialPod()).pod.getSpec.getHostname.length === 63)
   }
 
@@ -122,10 +123,10 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     baseConf.set(EXECUTOR_JAVA_OPTIONS, "foo=bar")
     baseConf.set(EXECUTOR_CLASS_PATH, "bar=baz")
     val kconf = newExecutorConf(environment = Map("qux" -> "quux"))
-    val step = new BasicExecutorFeatureStep(kconf)
+    val step = new BasicExecutorFeatureStep(kconf, new SecurityManager(baseConf))
     val executor = step.configurePod(SparkPod.initialPod())
 
-    checkEnv(executor,
+    checkEnv(executor, baseConf,
       Map("SPARK_JAVA_OPT_0" -> "foo=bar",
         ENV_CLASSPATH -> "bar=baz",
         "qux" -> "quux"))
@@ -136,12 +137,27 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     baseConf.set("spark.kubernetes.resource.type", "python")
     baseConf.set(PYSPARK_EXECUTOR_MEMORY, 42L)
 
-    val step = new BasicExecutorFeatureStep(newExecutorConf())
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
     val executor = step.configurePod(SparkPod.initialPod())
     // This is checking that basic executor + executorMemory = 1408 + 42 = 1450
     assert(executor.container.getResources.getRequests.get("memory").getAmount === "1450Mi")
   }
 
+  test("auth secret propagation") {
+    val conf = baseConf.clone()
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set("spark.master", "k8s://127.0.0.1")
+
+    val secMgr = new SecurityManager(conf)
+    secMgr.initializeAuth()
+
+    val step = new BasicExecutorFeatureStep(KubernetesTestConf.createExecutorConf(sparkConf = conf),
+      secMgr)
+
+    val executor = step.configurePod(SparkPod.initialPod())
+    checkEnv(executor, conf, Map(SecurityManager.ENV_AUTH_SECRET -> secMgr.getSecretKey()))
+  }
+
   // There is always exactly one controller reference, and it points to the driver pod.
   private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
     assert(executor.getMetadata.getOwnerReferences.size() === 1)
@@ -150,7 +166,10 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   // Check that the expected environment variables are present.
-  private def checkEnv(executorPod: SparkPod, additionalEnvVars: Map[String, String]): Unit = {
+  private def checkEnv(
+      executorPod: SparkPod,
+      conf: SparkConf,
+      additionalEnvVars: Map[String, String]): Unit = {
     val defaultEnvs = Map(
       ENV_EXECUTOR_ID -> "1",
       ENV_DRIVER_URL -> DRIVER_ADDRESS.toString,
@@ -160,10 +179,15 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       ENV_SPARK_CONF_DIR -> SPARK_CONF_DIR_INTERNAL,
       ENV_EXECUTOR_POD_IP -> null) ++ additionalEnvVars
 
-    assert(executorPod.container.getEnv.size() === defaultEnvs.size)
+    val extraJavaOptsStart = additionalEnvVars.keys.count(_.startsWith(ENV_JAVA_OPT_PREFIX))
+    val extraJavaOpts = Utils.sparkJavaOpts(conf, SparkConf.isExecutorStartupConf)
+    val extraJavaOptsEnvs = extraJavaOpts.zipWithIndex.map { case (opt, ind) =>
+      s"$ENV_JAVA_OPT_PREFIX${ind + extraJavaOptsStart}" -> opt
+    }.toMap
+
     val mapEnvs = executorPod.container.getEnv.asScala.map {
       x => (x.getName, x.getValue)
     }.toMap
-    assert(defaultEnvs === mapEnvs)
+    assert((defaultEnvs ++ extraJavaOptsEnvs) === mapEnvs)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 303e24b8f497..d4fa31af3d5c 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -20,13 +20,13 @@ import io.fabric8.kubernetes.api.model.{DoneablePod, Pod, PodBuilder}
 import io.fabric8.kubernetes.client.KubernetesClient
 import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.{ArgumentMatcher, Matchers, Mock, MockitoAnnotations}
-import org.mockito.Matchers.any
+import org.mockito.Matchers.{any, eq => meq}
 import org.mockito.Mockito.{never, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
@@ -52,6 +52,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)
   private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
   private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000L)
+  private val secMgr = new SecurityManager(conf)
 
   private var waitForExecutorPodsClock: ManualClock = _
 
@@ -79,12 +80,12 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     when(kubernetesClient.pods()).thenReturn(podOperations)
     when(podOperations.withName(driverPodName)).thenReturn(driverPodOperations)
     when(driverPodOperations.get).thenReturn(driverPod)
-    when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf])))
+    when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr)))
       .thenAnswer(executorPodAnswer())
     snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
     waitForExecutorPodsClock = new ManualClock(0L)
     podsAllocatorUnderTest = new ExecutorPodsAllocator(
-      conf, executorBuilder, kubernetesClient, snapshotsStore, waitForExecutorPodsClock)
+      conf, secMgr, executorBuilder, kubernetesClient, snapshotsStore, waitForExecutorPodsClock)
     podsAllocatorUnderTest.start(TEST_SPARK_APP_ID)
   }
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index 52e7a12dbaf0..75232f7b98b0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -23,7 +23,7 @@ import org.mockito.Matchers.{eq => mockitoEq}
 import org.mockito.Mockito.{never, verify, when}
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkFunSuite}
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
 import org.apache.spark.rpc.{RpcEndpoint, RpcEndpointRef, RpcEnv}
@@ -41,6 +41,9 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   @Mock
   private var sc: SparkContext = _
 
+  @Mock
+  private var env: SparkEnv = _
+
   @Mock
   private var rpcEnv: RpcEnv = _
 
@@ -81,6 +84,8 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     MockitoAnnotations.initMocks(this)
     when(taskScheduler.sc).thenReturn(sc)
     when(sc.conf).thenReturn(sparkConf)
+    when(sc.env).thenReturn(env)
+    when(env.rpcEnv).thenReturn(rpcEnv)
     driverEndpoint = ArgumentCaptor.forClass(classOf[RpcEndpoint])
     when(rpcEnv.setupEndpoint(
       mockitoEq(CoarseGrainedSchedulerBackend.ENDPOINT_NAME), driverEndpoint.capture()))
@@ -88,7 +93,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     when(kubernetesClient.pods()).thenReturn(podOperations)
     schedulerBackendUnderTest = new KubernetesClusterSchedulerBackend(
       taskScheduler,
-      rpcEnv,
+      sc,
       kubernetesClient,
       requestExecutorsService,
       eventQueue,
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index b6a75b15af85..ef521fd801e9 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.{Config => _, _}
 import io.fabric8.kubernetes.client.KubernetesClient
 import org.mockito.Mockito.{mock, never, verify}
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features._
@@ -39,6 +39,8 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
   private val KERBEROS_CONF_STEP_TYPE = "kerberos-step"
   private val MOUNT_VOLUMES_STEP_TYPE = "mount-volumes"
 
+  private val secMgr = new SecurityManager(new SparkConf(false))
+
   private val basicFeatureStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
     BASIC_STEP_TYPE, classOf[BasicExecutorFeatureStep])
   private val mountSecretsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
@@ -57,7 +59,7 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
     MOUNT_VOLUMES_STEP_TYPE, classOf[MountVolumesFeatureStep])
 
   private val builderUnderTest = new KubernetesExecutorBuilder(
-    _ => basicFeatureStep,
+    (_, _) => basicFeatureStep,
     _ => mountSecretsStep,
     _ => envSecretsStep,
     _ => localDirsStep,
@@ -69,7 +71,7 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
   test("Basic steps are consistently applied.") {
     val conf = KubernetesTestConf.createExecutorConf()
     validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf), BASIC_STEP_TYPE, LOCAL_DIRS_STEP_TYPE)
+      builderUnderTest.buildFromFeatures(conf, secMgr), BASIC_STEP_TYPE, LOCAL_DIRS_STEP_TYPE)
   }
 
   test("Apply secrets step if secrets are present.") {
@@ -77,7 +79,7 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
       secretEnvNamesToKeyRefs = Map("secret-name" -> "secret-key"),
       secretNamesToMountPaths = Map("secret" -> "secretMountPath"))
     validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
+      builderUnderTest.buildFromFeatures(conf, secMgr),
       BASIC_STEP_TYPE,
       LOCAL_DIRS_STEP_TYPE,
       SECRETS_STEP_TYPE,
@@ -94,7 +96,7 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
     val conf = KubernetesTestConf.createExecutorConf(
       volumes = Seq(volumeSpec))
     validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
+      builderUnderTest.buildFromFeatures(conf, secMgr),
       BASIC_STEP_TYPE,
       LOCAL_DIRS_STEP_TYPE,
       MOUNT_VOLUMES_STEP_TYPE)
@@ -107,7 +109,7 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
         .set(HADOOP_CONFIG_MAP_NAME, "hadoop-conf-map-name")
         .set(KRB5_CONFIG_MAP_NAME, "krb5-conf-map-name"))
     validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
+      builderUnderTest.buildFromFeatures(conf, secMgr),
       BASIC_STEP_TYPE,
       LOCAL_DIRS_STEP_TYPE,
       HADOOP_CONF_STEP_TYPE,
@@ -123,7 +125,7 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
         .set(KERBEROS_DT_SECRET_NAME, "dt-secret")
         .set(KERBEROS_DT_SECRET_KEY, "dt-key" ))
     validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
+      builderUnderTest.buildFromFeatures(conf, secMgr),
       BASIC_STEP_TYPE,
       LOCAL_DIRS_STEP_TYPE,
       HADOOP_CONF_STEP_TYPE,
@@ -154,7 +156,7 @@ class KubernetesExecutorBuilderSuite extends SparkFunSuite {
           .endMetadata()
         .build()))
     val sparkPod = KubernetesExecutorBuilder(kubernetesClient, sparkConf)
-      .buildFromFeatures(kubernetesConf)
+      .buildFromFeatures(kubernetesConf, secMgr)
     PodBuilderSuiteUtils.verifyPodWithSupportedFeatures(sparkPod)
   }
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index b746a01eb529..f8f4b4177f3b 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -36,6 +36,7 @@ import org.apache.spark.{SPARK_VERSION, SparkFunSuite}
 import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.deploy.k8s.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 
 class KubernetesSuite extends SparkFunSuite
   with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite
@@ -138,6 +139,7 @@ class KubernetesSuite extends SparkFunSuite
       .set("spark.kubernetes.driver.pod.name", driverPodName)
       .set("spark.kubernetes.driver.label.spark-app-locator", appLocator)
       .set("spark.kubernetes.executor.label.spark-app-locator", appLocator)
+      .set(NETWORK_AUTH_ENABLED.key, "true")
     if (!kubernetesTestComponents.hasUserSpecifiedNamespace) {
       kubernetesTestComponents.createNamespace()
     }

From bfc5569a53510bc75c15384084ff89b418592875 Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Fri, 7 Dec 2018 09:57:35 +0800
Subject: [PATCH 0081/1072] [SPARK-26289][CORE] cleanup enablePerfMetrics
 parameter from BytesToBytesMap

## What changes were proposed in this pull request?

`enablePerfMetrics `was originally designed in `BytesToBytesMap `to control `getNumHashCollisions  getTimeSpentResizingNs  getAverageProbesPerLookup`.

However, as the Spark version gradual progress.  this parameter is only used for `getAverageProbesPerLookup ` and always given to true when using `BytesToBytesMap`.

 it is also dangerous to determine whether `getAverageProbesPerLookup `opens and throws an `IllegalStateException `exception.
So this pr will be remove `enablePerfMetrics `parameter from `BytesToBytesMap`. thanks.

## How was this patch tested?

the existed test cases.

Closes #23244 from heary-cao/enablePerfMetrics.

Authored-by: caoxuewen <cao.xuewen@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/unsafe/map/BytesToBytesMap.java     | 33 ++++---------------
 .../map/AbstractBytesToBytesMapSuite.java     |  4 +--
 .../UnsafeFixedWidthAggregationMap.java       |  2 +-
 .../sql/execution/joins/HashedRelation.scala  |  6 ++--
 4 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index a4e88598f760..405e52946415 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -159,11 +159,9 @@ public final class BytesToBytesMap extends MemoryConsumer {
    */
   private final Location loc;
 
-  private final boolean enablePerfMetrics;
+  private long numProbes = 0L;
 
-  private long numProbes = 0;
-
-  private long numKeyLookups = 0;
+  private long numKeyLookups = 0L;
 
   private long peakMemoryUsedBytes = 0L;
 
@@ -180,8 +178,7 @@ public BytesToBytesMap(
       SerializerManager serializerManager,
       int initialCapacity,
       double loadFactor,
-      long pageSizeBytes,
-      boolean enablePerfMetrics) {
+      long pageSizeBytes) {
     super(taskMemoryManager, pageSizeBytes, taskMemoryManager.getTungstenMemoryMode());
     this.taskMemoryManager = taskMemoryManager;
     this.blockManager = blockManager;
@@ -189,7 +186,6 @@ public BytesToBytesMap(
     this.loadFactor = loadFactor;
     this.loc = new Location();
     this.pageSizeBytes = pageSizeBytes;
-    this.enablePerfMetrics = enablePerfMetrics;
     if (initialCapacity <= 0) {
       throw new IllegalArgumentException("Initial capacity must be greater than 0");
     }
@@ -209,14 +205,6 @@ public BytesToBytesMap(
       TaskMemoryManager taskMemoryManager,
       int initialCapacity,
       long pageSizeBytes) {
-    this(taskMemoryManager, initialCapacity, pageSizeBytes, false);
-  }
-
-  public BytesToBytesMap(
-      TaskMemoryManager taskMemoryManager,
-      int initialCapacity,
-      long pageSizeBytes,
-      boolean enablePerfMetrics) {
     this(
       taskMemoryManager,
       SparkEnv.get() != null ? SparkEnv.get().blockManager() :  null,
@@ -224,8 +212,7 @@ public BytesToBytesMap(
       initialCapacity,
       // In order to re-use the longArray for sorting, the load factor cannot be larger than 0.5.
       0.5,
-      pageSizeBytes,
-      enablePerfMetrics);
+      pageSizeBytes);
   }
 
   /**
@@ -462,15 +449,12 @@ public Location lookup(Object keyBase, long keyOffset, int keyLength, int hash)
   public void safeLookup(Object keyBase, long keyOffset, int keyLength, Location loc, int hash) {
     assert(longArray != null);
 
-    if (enablePerfMetrics) {
-      numKeyLookups++;
-    }
+    numKeyLookups++;
+
     int pos = hash & mask;
     int step = 1;
     while (true) {
-      if (enablePerfMetrics) {
-        numProbes++;
-      }
+      numProbes++;
       if (longArray.get(pos * 2) == 0) {
         // This is a new key.
         loc.with(pos, hash, false);
@@ -860,9 +844,6 @@ public long getPeakMemoryUsedBytes() {
    * Returns the average number of probes per key lookup.
    */
   public double getAverageProbesPerLookup() {
-    if (!enablePerfMetrics) {
-      throw new IllegalStateException();
-    }
     return (1.0 * numProbes) / numKeyLookups;
   }
 
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 53a233f698c7..aa29232e73e1 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -530,7 +530,7 @@ public void failureToGrow() {
   @Test
   public void spillInIterator() throws IOException {
     BytesToBytesMap map = new BytesToBytesMap(
-      taskMemoryManager, blockManager, serializerManager, 1, 0.75, 1024, false);
+      taskMemoryManager, blockManager, serializerManager, 1, 0.75, 1024);
     try {
       int i;
       for (i = 0; i < 1024; i++) {
@@ -569,7 +569,7 @@ public void spillInIterator() throws IOException {
   @Test
   public void multipleValuesForSameKey() {
     BytesToBytesMap map =
-      new BytesToBytesMap(taskMemoryManager, blockManager, serializerManager, 1, 0.5, 1024, false);
+      new BytesToBytesMap(taskMemoryManager, blockManager, serializerManager, 1, 0.5, 1024);
     try {
       int i;
       for (i = 0; i < 1024; i++) {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
index c8cf44b51df7..7e76a651ba2c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
@@ -98,7 +98,7 @@ public UnsafeFixedWidthAggregationMap(
     this.groupingKeyProjection = UnsafeProjection.create(groupingKeySchema);
     this.groupingKeySchema = groupingKeySchema;
     this.map = new BytesToBytesMap(
-      taskContext.taskMemoryManager(), initialCapacity, pageSizeBytes, true);
+      taskContext.taskMemoryManager(), initialCapacity, pageSizeBytes);
 
     // Initialize the buffer for aggregation value
     final UnsafeProjection valueProjection = UnsafeProjection.create(aggregationBufferSchema);
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index e8c01d46a84c..b1ff6e83acc2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -248,8 +248,7 @@ private[joins] class UnsafeHashedRelation(
     binaryMap = new BytesToBytesMap(
       taskMemoryManager,
       (nKeys * 1.5 + 1).toInt, // reduce hash collision
-      pageSizeBytes,
-      true)
+      pageSizeBytes)
 
     var i = 0
     var keyBuffer = new Array[Byte](1024)
@@ -299,8 +298,7 @@ private[joins] object UnsafeHashedRelation {
       taskMemoryManager,
       // Only 70% of the slots can be used before growing, more capacity help to reduce collision
       (sizeEstimate * 1.5 + 1).toInt,
-      pageSizeBytes,
-      true)
+      pageSizeBytes)
 
     // Create a mapping of buildKeys -> rows
     val keyGenerator = UnsafeProjection.create(key)

From 5a140b7844936cf2b65f08853b8cfd8c499d4f13 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 7 Dec 2018 11:13:14 +0800
Subject: [PATCH 0082/1072] [SPARK-26263][SQL] Validate partition values with
 user provided schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Currently if user provides data schema, partition column values are converted as per it. But if the conversion failed, e.g. converting string to int, the column value is null.

This PR proposes to throw exception in such case, instead of converting into null value silently:
1. These null partition column values doesn't make sense to users in most cases. It is better to show the conversion failure, and then users can adjust the schema or ETL jobs to fix it.
2. There are always exceptions on such conversion failure for non-partition data columns. Partition columns should have the same behavior.

We can reproduce the case above as following:
```
/tmp/testDir
├── p=bar
└── p=foo
```
If we run:
```
val schema = StructType(Seq(StructField("p", IntegerType, false)))
spark.read.schema(schema).csv("/tmp/testDir/").show()
```
We will get:
```
+----+
|   p|
+----+
|null|
|null|
+----+
```

## How was this patch tested?

Unit test

Closes #23215 from gengliangwang/SPARK-26263.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |  2 ++
 .../apache/spark/sql/internal/SQLConf.scala   | 12 ++++++++
 .../PartitioningAwareFileIndex.scala          |  4 +--
 .../datasources/PartitioningUtils.scala       | 30 +++++++++++++------
 .../datasources/FileIndexSuite.scala          | 27 ++++++++++++++++-
 .../ParquetPartitionDiscoverySuite.scala      | 18 ++++++++---
 6 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index ed2ff139bcc3..3638b0873aa4 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -29,6 +29,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be udefined.
 
+  - In Spark version 2.4 and earlier, partition column value is converted as null if it can't be casted to corresponding user provided schema. Since 3.0, partition column value is validated with user provided schema. An exception is thrown if the validation fails. You can disable such validation by setting `spark.sql.sources.validatePartitionColumns` to `false`.
+
   - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.execution.setCommandRejectsSparkConfs` to `false`.
 
   - Spark applications which are built with Spark version 2.4 and prior, and call methods of `UserDefinedFunction`, need to be re-compiled with Spark 3.0, as they are not binary compatible with Spark 3.0.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 451b051f8407..6857b8de7975 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1396,6 +1396,16 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val VALIDATE_PARTITION_COLUMNS =
+    buildConf("spark.sql.sources.validatePartitionColumns")
+      .internal()
+      .doc("When this option is set to true, partition column values will be validated with " +
+        "user-specified schema. If the validation fails, a runtime exception is thrown." +
+        "When this option is set to false, the partition column value will be converted to null " +
+        "if it can not be casted to corresponding user-specified schema.")
+      .booleanConf
+      .createWithDefault(true)
+
   val CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE =
     buildConf("spark.sql.streaming.continuous.executorQueueSize")
     .internal()
@@ -2014,6 +2024,8 @@ class SQLConf extends Serializable with Logging {
   def allowCreatingManagedTableUsingNonemptyLocation: Boolean =
     getConf(ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION)
 
+  def validatePartitionColumns: Boolean = getConf(VALIDATE_PARTITION_COLUMNS)
+
   def partitionOverwriteMode: PartitionOverwriteMode.Value =
     PartitionOverwriteMode.withName(getConf(PARTITION_OVERWRITE_MODE))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 7b0e4dbcc25f..b2e4155e6f49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -127,13 +127,13 @@ abstract class PartitioningAwareFileIndex(
     val timeZoneId = caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
       .getOrElse(sparkSession.sessionState.conf.sessionLocalTimeZone)
 
-    val caseSensitive = sparkSession.sqlContext.conf.caseSensitiveAnalysis
     PartitioningUtils.parsePartitions(
       leafDirs,
       typeInference = sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
       basePaths = basePaths,
       userSpecifiedSchema = userSpecifiedSchema,
-      caseSensitive = caseSensitive,
+      caseSensitive = sparkSession.sqlContext.conf.caseSensitiveAnalysis,
+      validatePartitionColumns = sparkSession.sqlContext.conf.validatePartitionColumns,
       timeZoneId = timeZoneId)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index d66cb09bda0c..6458b65466fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -26,12 +26,13 @@ import scala.util.Try
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -96,9 +97,10 @@ object PartitioningUtils {
       basePaths: Set[Path],
       userSpecifiedSchema: Option[StructType],
       caseSensitive: Boolean,
+      validatePartitionColumns: Boolean,
       timeZoneId: String): PartitionSpec = {
-    parsePartitions(paths, typeInference, basePaths, userSpecifiedSchema,
-      caseSensitive, DateTimeUtils.getTimeZone(timeZoneId))
+    parsePartitions(paths, typeInference, basePaths, userSpecifiedSchema, caseSensitive,
+      validatePartitionColumns, DateTimeUtils.getTimeZone(timeZoneId))
   }
 
   private[datasources] def parsePartitions(
@@ -107,6 +109,7 @@ object PartitioningUtils {
       basePaths: Set[Path],
       userSpecifiedSchema: Option[StructType],
       caseSensitive: Boolean,
+      validatePartitionColumns: Boolean,
       timeZone: TimeZone): PartitionSpec = {
     val userSpecifiedDataTypes = if (userSpecifiedSchema.isDefined) {
       val nameToDataType = userSpecifiedSchema.get.fields.map(f => f.name -> f.dataType).toMap
@@ -121,7 +124,8 @@ object PartitioningUtils {
 
     // First, we need to parse every partition's path and see if we can find partition values.
     val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
-      parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes, timeZone)
+      parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes,
+        validatePartitionColumns, timeZone)
     }.unzip
 
     // We create pairs of (path -> path's partition value) here
@@ -203,6 +207,7 @@ object PartitioningUtils {
       typeInference: Boolean,
       basePaths: Set[Path],
       userSpecifiedDataTypes: Map[String, DataType],
+      validatePartitionColumns: Boolean,
       timeZone: TimeZone): (Option[PartitionValues], Option[Path]) = {
     val columns = ArrayBuffer.empty[(String, Literal)]
     // Old Hadoop versions don't have `Path.isRoot`
@@ -224,7 +229,8 @@ object PartitioningUtils {
         // Let's say currentPath is a path of "/table/a=1/", currentPath.getName will give us a=1.
         // Once we get the string, we try to parse it and find the partition column and value.
         val maybeColumn =
-          parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes, timeZone)
+          parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes,
+            validatePartitionColumns, timeZone)
         maybeColumn.foreach(columns += _)
 
         // Now, we determine if we should stop.
@@ -258,6 +264,7 @@ object PartitioningUtils {
       columnSpec: String,
       typeInference: Boolean,
       userSpecifiedDataTypes: Map[String, DataType],
+      validatePartitionColumns: Boolean,
       timeZone: TimeZone): Option[(String, Literal)] = {
     val equalSignIndex = columnSpec.indexOf('=')
     if (equalSignIndex == -1) {
@@ -272,10 +279,15 @@ object PartitioningUtils {
       val literal = if (userSpecifiedDataTypes.contains(columnName)) {
         // SPARK-26188: if user provides corresponding column schema, get the column value without
         //              inference, and then cast it as user specified data type.
-        val columnValue = inferPartitionColumnValue(rawColumnValue, false, timeZone)
-        val castedValue =
-          Cast(columnValue, userSpecifiedDataTypes(columnName), Option(timeZone.getID)).eval()
-        Literal.create(castedValue, userSpecifiedDataTypes(columnName))
+        val dataType = userSpecifiedDataTypes(columnName)
+        val columnValueLiteral = inferPartitionColumnValue(rawColumnValue, false, timeZone)
+        val columnValue = columnValueLiteral.eval()
+        val castedValue = Cast(columnValueLiteral, dataType, Option(timeZone.getID)).eval()
+        if (validatePartitionColumns && columnValue != null && castedValue == null) {
+          throw new RuntimeException(s"Failed to cast value `$columnValue` to `$dataType` " +
+            s"for partition column `$columnName`")
+        }
+        Literal.create(castedValue, dataType)
       } else {
         inferPartitionColumnValue(rawColumnValue, typeInference, timeZone)
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index ec552f7ddf47..6bd0a2591fc1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
 import org.apache.spark.util.{KnownSizeEstimation, SizeEstimator}
 
 class FileIndexSuite extends SharedSQLContext {
@@ -95,6 +95,31 @@ class FileIndexSuite extends SharedSQLContext {
     }
   }
 
+  test("SPARK-26263: Throw exception when partition value can't be casted to user-specified type") {
+    withTempDir { dir =>
+      val partitionDirectory = new File(dir, "a=foo")
+      partitionDirectory.mkdir()
+      val file = new File(partitionDirectory, "text.txt")
+      stringToFile(file, "text")
+      val path = new Path(dir.getCanonicalPath)
+      val schema = StructType(Seq(StructField("a", IntegerType, false)))
+      withSQLConf(SQLConf.VALIDATE_PARTITION_COLUMNS.key -> "true") {
+        val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, Some(schema))
+        val msg = intercept[RuntimeException] {
+          fileIndex.partitionSpec()
+        }.getMessage
+        assert(msg == "Failed to cast value `foo` to `IntegerType` for partition column `a`")
+      }
+
+      withSQLConf(SQLConf.VALIDATE_PARTITION_COLUMNS.key -> "false") {
+        val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, Some(schema))
+        val partitionValues = fileIndex.partitionSpec().partitions.map(_.values)
+        assert(partitionValues.length == 1 && partitionValues(0).numFields == 1 &&
+          partitionValues(0).isNullAt(0))
+      }
+    }
+  }
+
   test("InMemoryFileIndex: input paths are converted to qualified paths") {
     withTempDir { dir =>
       val file = new File(dir, "text.txt")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index f808ca458aaa..88067358667c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -101,7 +101,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       "hdfs://host:9000/path/a=10.5/b=hello")
 
     var exception = intercept[AssertionError] {
-      parsePartitions(paths.map(new Path(_)), true, Set.empty[Path], None, true, timeZoneId)
+      parsePartitions(paths.map(new Path(_)), true, Set.empty[Path], None, true, true, timeZoneId)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
 
@@ -117,6 +117,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       Set(new Path("hdfs://host:9000/path/")),
       None,
       true,
+      true,
       timeZoneId)
 
     // Valid
@@ -132,6 +133,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       Set(new Path("hdfs://host:9000/path/something=true/table")),
       None,
       true,
+      true,
       timeZoneId)
 
     // Valid
@@ -147,6 +149,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       Set(new Path("hdfs://host:9000/path/table=true")),
       None,
       true,
+      true,
       timeZoneId)
 
     // Invalid
@@ -162,6 +165,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
         Set(new Path("hdfs://host:9000/path/")),
         None,
         true,
+        true,
         timeZoneId)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
@@ -184,6 +188,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
         Set(new Path("hdfs://host:9000/tmp/tables/")),
         None,
         true,
+        true,
         timeZoneId)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
@@ -191,13 +196,14 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
   test("parse partition") {
     def check(path: String, expected: Option[PartitionValues]): Unit = {
-      val actual = parsePartition(new Path(path), true, Set.empty[Path], Map.empty, timeZone)._1
+      val actual = parsePartition(new Path(path), true, Set.empty[Path],
+        Map.empty, true, timeZone)._1
       assert(expected === actual)
     }
 
     def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = {
       val message = intercept[T] {
-        parsePartition(new Path(path), true, Set.empty[Path], Map.empty, timeZone)
+        parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZone)
       }.getMessage
 
       assert(message.contains(expected))
@@ -242,6 +248,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       typeInference = true,
       basePaths = Set(new Path("file://path/a=10")),
       Map.empty,
+      true,
       timeZone = timeZone)._1
 
     assert(partitionSpec1.isEmpty)
@@ -252,6 +259,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       typeInference = true,
       basePaths = Set(new Path("file://path")),
       Map.empty,
+      true,
       timeZone = timeZone)._1
 
     assert(partitionSpec2 ==
@@ -272,6 +280,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
           rootPaths,
           None,
           true,
+          true,
           timeZoneId)
       assert(actualSpec.partitionColumns === spec.partitionColumns)
       assert(actualSpec.partitions.length === spec.partitions.length)
@@ -384,7 +393,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
   test("parse partitions with type inference disabled") {
     def check(paths: Seq[String], spec: PartitionSpec): Unit = {
       val actualSpec =
-        parsePartitions(paths.map(new Path(_)), false, Set.empty[Path], None, true, timeZoneId)
+        parsePartitions(paths.map(new Path(_)), false, Set.empty[Path], None,
+          true, true, timeZoneId)
       assert(actualSpec === spec)
     }
 

From 477226520358f0cc47d5ea255ad84d3c13f6d77d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 6 Dec 2018 20:50:57 -0800
Subject: [PATCH 0083/1072] [SPARK-26298][BUILD] Upgrade Janino to 3.0.11

## What changes were proposed in this pull request?

This PR aims to upgrade Janino compiler to the latest version 3.0.11. The followings are the changes from the [release note](http://janino-compiler.github.io/janino/changelog.html).

- Script with many "helper" variables.
- Java 9+ compatibility
- Compilation Error Messages Generated by JDK.
- Added experimental support for the "StackMapFrame" attribute; not active yet.
- Make Unparser more flexible.
- Fixed NPEs in various "toString()" methods.
- Optimize static method invocation with rvalue target expression.
- Added all missing "ClassFile.getConstant*Info()" methods, removing the necessity for many type casts.

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #23250 from dongjoon-hyun/SPARK-26298.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7 | 4 ++--
 dev/deps/spark-deps-hadoop-3.1 | 4 ++--
 pom.xml                        | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index ec7c304c9e36..d250d5205586 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -34,7 +34,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-3.0.10.jar
+commons-compiler-3.0.11.jar
 commons-compress-1.8.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
@@ -98,7 +98,7 @@ jackson-module-jaxb-annotations-2.9.6.jar
 jackson-module-paranamer-2.9.6.jar
 jackson-module-scala_2.12-2.9.6.jar
 jackson-xc-1.9.13.jar
-janino-3.0.10.jar
+janino-3.0.11.jar
 javassist-3.18.1-GA.jar
 javax.annotation-api-1.2.jar
 javax.inject-1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 811febf22940..347503ace557 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -31,7 +31,7 @@ commons-beanutils-1.9.3.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-3.0.10.jar
+commons-compiler-3.0.11.jar
 commons-compress-1.8.1.jar
 commons-configuration2-2.1.1.jar
 commons-crypto-1.0.0.jar
@@ -97,7 +97,7 @@ jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations-2.9.6.jar
 jackson-module-paranamer-2.9.6.jar
 jackson-module-scala_2.12-2.9.6.jar
-janino-3.0.10.jar
+janino-3.0.11.jar
 javassist-3.18.1-GA.jar
 javax.annotation-api-1.2.jar
 javax.inject-1.jar
diff --git a/pom.xml b/pom.xml
index 61321a145070..15e5c1829040 100644
--- a/pom.xml
+++ b/pom.xml
@@ -173,7 +173,7 @@
     <!-- org.apache.commons/commons-lang3/-->
     <commons-lang3.version>3.8.1</commons-lang3.version>
     <datanucleus-core.version>3.2.10</datanucleus-core.version>
-    <janino.version>3.0.10</janino.version>
+    <janino.version>3.0.11</janino.version>
     <jersey.version>2.22.2</jersey.version>
     <joda.version>2.9.3</joda.version>
     <jodd.version>3.5.2</jodd.version>

From 1ab3d3e474ce2e36d58aea8ad09fb61f0c73e5c5 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Fri, 7 Dec 2018 07:55:54 -0800
Subject: [PATCH 0084/1072] [SPARK-26060][SQL][FOLLOW-UP] Rename the config
 name.

## What changes were proposed in this pull request?

This is a follow-up of #23031 to rename the config name to `spark.sql.legacy.setCommandRejectsSparkCoreConfs`.

## How was this patch tested?

Existing tests.

Closes #23245 from ueshin/issues/SPARK-26060/rename_config.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/sql-migration-guide-upgrade.md                        | 2 +-
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 7 ++++---
 .../main/scala/org/apache/spark/sql/RuntimeConfig.scala    | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 3638b0873aa4..67c30fb941ec 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -31,7 +31,7 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, partition column value is converted as null if it can't be casted to corresponding user provided schema. Since 3.0, partition column value is validated with user provided schema. An exception is thrown if the validation fails. You can disable such validation by setting `spark.sql.sources.validatePartitionColumns` to `false`.
 
-  - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.execution.setCommandRejectsSparkConfs` to `false`.
+  - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
 
   - Spark applications which are built with Spark version 2.4 and prior, and call methods of `UserDefinedFunction`, need to be re-compiled with Spark 3.0, as they are not binary compatible with Spark 3.0.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6857b8de7975..86e068bf632b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1621,8 +1621,8 @@ object SQLConf {
     .intConf
     .createWithDefault(25)
 
-  val SET_COMMAND_REJECTS_SPARK_CONFS =
-    buildConf("spark.sql.legacy.execution.setCommandRejectsSparkConfs")
+  val SET_COMMAND_REJECTS_SPARK_CORE_CONFS =
+    buildConf("spark.sql.legacy.setCommandRejectsSparkCoreConfs")
       .internal()
       .doc("If it is set to true, SET command will fail when the key is registered as " +
         "a SparkConf entry.")
@@ -2057,7 +2057,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxToStringFields: Int = getConf(SQLConf.MAX_TO_STRING_FIELDS)
 
-  def setCommandRejectsSparkConfs: Boolean = getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CONFS)
+  def setCommandRejectsSparkCoreConfs: Boolean =
+    getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)
 
   def legacyTimeParserEnabled: Boolean = getConf(SQLConf.LEGACY_TIME_PARSER_ENABLED)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index d83a01ff9ea6..0f5aab7f47d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -153,7 +153,7 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
     if (SQLConf.staticConfKeys.contains(key)) {
       throw new AnalysisException(s"Cannot modify the value of a static config: $key")
     }
-    if (sqlConf.setCommandRejectsSparkConfs &&
+    if (sqlConf.setCommandRejectsSparkCoreConfs &&
         ConfigEntry.findEntry(key) != null && !SQLConf.sqlConfEntries.containsKey(key)) {
       throw new AnalysisException(s"Cannot modify the value of a Spark config: $key")
     }

From 543577a1e8c0904048b73008fa7c4cee33f69894 Mon Sep 17 00:00:00 2001
From: Sahil Takiar <stakiar@cloudera.com>
Date: Fri, 7 Dec 2018 10:33:42 -0800
Subject: [PATCH 0085/1072] [SPARK-24243][CORE] Expose exceptions from
 InProcessAppHandle

Adds a new method to SparkAppHandle called getError which returns
the exception (if present) that caused the underlying Spark app to
fail.

New tests added to SparkLauncherSuite for the new method.

Closes #21849

Closes #23221 from vanzin/SPARK-24243.

Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/launcher/SparkLauncherSuite.java    | 102 ++++++++++++++++--
 .../spark/launcher/ChildProcAppHandle.java    |  20 +++-
 .../spark/launcher/InProcessAppHandle.java    |  13 +++
 .../spark/launcher/OutputRedirector.java      |  25 +++++
 .../apache/spark/launcher/SparkAppHandle.java |   8 ++
 project/MimaExcludes.scala                    |   3 +
 6 files changed, 159 insertions(+), 12 deletions(-)

diff --git a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
index 6a1a38c1a54f..773c390175b6 100644
--- a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
+++ b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
@@ -41,6 +41,8 @@
 public class SparkLauncherSuite extends BaseSuite {
 
   private static final NamedThreadFactory TF = new NamedThreadFactory("SparkLauncherSuite-%d");
+  private static final String EXCEPTION_MESSAGE = "dummy-exception";
+  private static final RuntimeException DUMMY_EXCEPTION = new RuntimeException(EXCEPTION_MESSAGE);
 
   private final SparkLauncher launcher = new SparkLauncher();
 
@@ -130,17 +132,8 @@ public void testInProcessLauncher() throws Exception {
     try {
       inProcessLauncherTestImpl();
     } finally {
-      Properties p = new Properties();
-      for (Map.Entry<Object, Object> e : properties.entrySet()) {
-        p.put(e.getKey(), e.getValue());
-      }
-      System.setProperties(p);
-      // Here DAGScheduler is stopped, while SparkContext.clearActiveContext may not be called yet.
-      // Wait for a reasonable amount of time to avoid creating two active SparkContext in JVM.
-      // See SPARK-23019 and SparkContext.stop() for details.
-      eventually(Duration.ofSeconds(5), Duration.ofMillis(10), () -> {
-        assertTrue("SparkContext is still alive.", SparkContext$.MODULE$.getActive().isEmpty());
-      });
+      restoreSystemProperties(properties);
+      waitForSparkContextShutdown();
     }
   }
 
@@ -227,6 +220,82 @@ public void testInProcessLauncherDoesNotKillJvm() throws Exception {
     assertEquals(SparkAppHandle.State.LOST, handle.getState());
   }
 
+  @Test
+  public void testInProcessLauncherGetError() throws Exception {
+    // Because this test runs SparkLauncher in process and in client mode, it pollutes the system
+    // properties, and that can cause test failures down the test pipeline. So restore the original
+    // system properties after this test runs.
+    Map<Object, Object> properties = new HashMap<>(System.getProperties());
+
+    SparkAppHandle handle = null;
+    try {
+      handle = new InProcessLauncher()
+        .setMaster("local")
+        .setAppResource(SparkLauncher.NO_RESOURCE)
+        .setMainClass(ErrorInProcessTestApp.class.getName())
+        .addAppArgs("hello")
+        .startApplication();
+
+      final SparkAppHandle _handle = handle;
+      eventually(Duration.ofSeconds(60), Duration.ofMillis(1000), () -> {
+        assertEquals(SparkAppHandle.State.FAILED, _handle.getState());
+      });
+
+      assertNotNull(handle.getError());
+      assertTrue(handle.getError().isPresent());
+      assertSame(handle.getError().get(), DUMMY_EXCEPTION);
+    } finally {
+      if (handle != null) {
+        handle.kill();
+      }
+      restoreSystemProperties(properties);
+      waitForSparkContextShutdown();
+    }
+  }
+
+  @Test
+  public void testSparkLauncherGetError() throws Exception {
+    SparkAppHandle handle = null;
+    try {
+      handle = new SparkLauncher()
+        .setMaster("local")
+        .setAppResource(SparkLauncher.NO_RESOURCE)
+        .setMainClass(ErrorInProcessTestApp.class.getName())
+        .addAppArgs("hello")
+        .startApplication();
+
+      final SparkAppHandle _handle = handle;
+      eventually(Duration.ofSeconds(60), Duration.ofMillis(1000), () -> {
+        assertEquals(SparkAppHandle.State.FAILED, _handle.getState());
+      });
+
+      assertNotNull(handle.getError());
+      assertTrue(handle.getError().isPresent());
+      assertTrue(handle.getError().get().getMessage().contains(EXCEPTION_MESSAGE));
+    } finally {
+      if (handle != null) {
+        handle.kill();
+      }
+    }
+  }
+
+  private void restoreSystemProperties(Map<Object, Object> properties) {
+    Properties p = new Properties();
+    for (Map.Entry<Object, Object> e : properties.entrySet()) {
+      p.put(e.getKey(), e.getValue());
+    }
+    System.setProperties(p);
+  }
+
+  private void waitForSparkContextShutdown() throws Exception {
+    // Here DAGScheduler is stopped, while SparkContext.clearActiveContext may not be called yet.
+    // Wait for a reasonable amount of time to avoid creating two active SparkContext in JVM.
+    // See SPARK-23019 and SparkContext.stop() for details.
+    eventually(Duration.ofSeconds(5), Duration.ofMillis(10), () -> {
+      assertTrue("SparkContext is still alive.", SparkContext$.MODULE$.getActive().isEmpty());
+    });
+  }
+
   public static class SparkLauncherTestApp {
 
     public static void main(String[] args) throws Exception {
@@ -264,4 +333,15 @@ public static void main(String[] args) throws Exception {
 
   }
 
+  /**
+   * Similar to {@link InProcessTestApp} except it throws an exception
+   */
+  public static class ErrorInProcessTestApp {
+
+    public static void main(String[] args) {
+      assertNotEquals(0, args.length);
+      assertEquals(args[0], "hello");
+      throw DUMMY_EXCEPTION;
+    }
+  }
 }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
index 5609f8492f4f..7dfcf0e66734 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
@@ -18,6 +18,7 @@
 package org.apache.spark.launcher;
 
 import java.io.InputStream;
+import java.util.Optional;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -29,7 +30,7 @@ class ChildProcAppHandle extends AbstractAppHandle {
   private static final Logger LOG = Logger.getLogger(ChildProcAppHandle.class.getName());
 
   private volatile Process childProc;
-  private OutputRedirector redirector;
+  private volatile OutputRedirector redirector;
 
   ChildProcAppHandle(LauncherServer server) {
     super(server);
@@ -46,6 +47,23 @@ public synchronized void disconnect() {
     }
   }
 
+  /**
+   * Parses the logs of {@code spark-submit} and returns the last exception thrown.
+   * <p>
+   * Since {@link SparkLauncher} runs {@code spark-submit} in a sub-process, it's difficult to
+   * accurately retrieve the full {@link Throwable} from the {@code spark-submit} process.
+   * This method parses the logs of the sub-process and provides a best-effort attempt at
+   * returning the last exception thrown by the {@code spark-submit} process. Only the exception
+   * message is parsed, the associated stacktrace is meaningless.
+   *
+   * @return an {@link Optional} containing a {@link RuntimeException} with the parsed
+   * exception, otherwise returns a {@link Optional#EMPTY}
+   */
+  @Override
+  public Optional<Throwable> getError() {
+    return redirector != null ? Optional.ofNullable(redirector.getError()) : Optional.empty();
+  }
+
   @Override
   public synchronized void kill() {
     if (!isDisposed()) {
diff --git a/launcher/src/main/java/org/apache/spark/launcher/InProcessAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/InProcessAppHandle.java
index 15fbca0facef..ba09050c756d 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/InProcessAppHandle.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/InProcessAppHandle.java
@@ -17,7 +17,9 @@
 
 package org.apache.spark.launcher;
 
+import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.util.Optional;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -31,6 +33,8 @@ class InProcessAppHandle extends AbstractAppHandle {
   // Avoid really long thread names.
   private static final int MAX_APP_NAME_LEN = 16;
 
+  private volatile Throwable error;
+
   private Thread app;
 
   InProcessAppHandle(LauncherServer server) {
@@ -51,6 +55,11 @@ public synchronized void kill() {
     }
   }
 
+  @Override
+  public Optional<Throwable> getError() {
+    return Optional.ofNullable(error);
+  }
+
   synchronized void start(String appName, Method main, String[] args) {
     CommandBuilderUtils.checkState(app == null, "Handle already started.");
 
@@ -62,7 +71,11 @@ synchronized void start(String appName, Method main, String[] args) {
       try {
         main.invoke(null, (Object) args);
       } catch (Throwable t) {
+        if (t instanceof InvocationTargetException) {
+          t = t.getCause();
+        }
         LOG.log(Level.WARNING, "Application failed with exception.", t);
+        error = t;
         setState(State.FAILED);
       }
 
diff --git a/launcher/src/main/java/org/apache/spark/launcher/OutputRedirector.java b/launcher/src/main/java/org/apache/spark/launcher/OutputRedirector.java
index 6f4b0bb38e03..0f097f831392 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/OutputRedirector.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/OutputRedirector.java
@@ -37,6 +37,7 @@ class OutputRedirector {
   private final ChildProcAppHandle callback;
 
   private volatile boolean active;
+  private volatile Throwable error;
 
   OutputRedirector(InputStream in, String loggerName, ThreadFactory tf) {
     this(in, loggerName, tf, null);
@@ -61,6 +62,10 @@ private void redirect() {
       while ((line = reader.readLine()) != null) {
         if (active) {
           sink.info(line.replaceFirst("\\s*$", ""));
+          if ((containsIgnoreCase(line, "Error") || containsIgnoreCase(line, "Exception")) &&
+              !line.contains("at ")) {
+            error = new RuntimeException(line);
+          }
         }
       }
     } catch (IOException e) {
@@ -85,4 +90,24 @@ boolean isAlive() {
     return thread.isAlive();
   }
 
+  Throwable getError() {
+    return error;
+  }
+
+  /**
+   * Copied from Apache Commons Lang {@code StringUtils#containsIgnoreCase(String, String)}
+   */
+  private static boolean containsIgnoreCase(String str, String searchStr) {
+    if (str == null || searchStr == null) {
+      return false;
+    }
+    int len = searchStr.length();
+    int max = str.length() - len;
+    for (int i = 0; i <= max; i++) {
+      if (str.regionMatches(true, i, searchStr, 0, len)) {
+        return true;
+      }
+    }
+    return false;
+  }
 }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java
index cefb4d1a95fb..afec270e2b11 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark.launcher;
 
+import java.util.Optional;
+
 /**
  * A handle to a running Spark application.
  * <p>
@@ -100,6 +102,12 @@ public boolean isFinal() {
    */
   void disconnect();
 
+  /**
+   * If the application failed due to an error, return the underlying error. If the app
+   * succeeded, this method returns an empty {@link Optional}.
+   */
+  Optional<Throwable> getError();
+
   /**
    * Listener for updates to a handle's state. The callbacks do not receive information about
    * what exactly has changed, just that an update has occurred.
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 1c83cf5860c5..4eeebb805070 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,9 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    // [SPARK-24243][CORE] Expose exceptions from InProcessAppHandle
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.launcher.SparkAppHandle.getError"),
+
     // [SPARK-25867] Remove KMeans computeCost
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.clustering.KMeansModel.computeCost"),
 

From 9b7679a97ed2622081390301af8735e62055492d Mon Sep 17 00:00:00 2001
From: 10087686 <wang.jiaochun@zte.com.cn>
Date: Fri, 7 Dec 2018 14:11:25 -0600
Subject: [PATCH 0086/1072] [SPARK-26294][CORE] Delete Unnecessary If statement

## What changes were proposed in this pull request?
Delete unnecessary If statement, because it Impossible execution when
records less than or equal to zero.it is only execution when records begin zero.
...................
if (inMemSorter == null || inMemSorter.numRecords() <= 0) {
       return 0L;
 }
....................
if (inMemSorter.numRecords() > 0) {
.....................
}
## How was this patch tested?
Existing tests

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23247 from wangjiaochun/inMemSorter.

Authored-by: 10087686 <wang.jiaochun@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../unsafe/sort/UnsafeExternalSorter.java          | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 5056652a2420..af5a934b7da6 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -213,14 +213,12 @@ public long spill(long size, MemoryConsumer trigger) throws IOException {
       spillWriters.size() > 1 ? " times" : " time");
 
     ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics();
-    // We only write out contents of the inMemSorter if it is not empty.
-    if (inMemSorter.numRecords() > 0) {
-      final UnsafeSorterSpillWriter spillWriter =
-        new UnsafeSorterSpillWriter(blockManager, fileBufferSizeBytes, writeMetrics,
-          inMemSorter.numRecords());
-      spillWriters.add(spillWriter);
-      spillIterator(inMemSorter.getSortedIterator(), spillWriter);
-    }
+
+    final UnsafeSorterSpillWriter spillWriter =
+      new UnsafeSorterSpillWriter(blockManager, fileBufferSizeBytes, writeMetrics,
+        inMemSorter.numRecords());
+    spillWriters.add(spillWriter);
+    spillIterator(inMemSorter.getSortedIterator(), spillWriter);
 
     final long spillSize = freeMemory();
     // Note that this is more-or-less going to be a multiple of the page size, so wasted space in

From bd00f10773f3a08e50ecd06c7e70d9b38094a252 Mon Sep 17 00:00:00 2001
From: dima-asana <42555784+dima-asana@users.noreply.github.com>
Date: Fri, 7 Dec 2018 14:14:43 -0600
Subject: [PATCH 0087/1072] [MINOR][SQL][DOC] Correct parquet nullability
 documentation

## What changes were proposed in this pull request?

Parquet files appear to have nullability info when being written, not being read.

## How was this patch tested?

Some test code: (running spark 2.3, but the relevant code in DataSource looks identical on master)

case class NullTest(bo: Boolean, opbol: Option[Boolean])
val testDf = spark.createDataFrame(Seq(NullTest(true, Some(false))))

defined class NullTest
testDf: org.apache.spark.sql.DataFrame = [bo: boolean, opbol: boolean]

testDf.write.parquet("s3://asana-stats/tmp_dima/parquet_check_schema")

spark.read.parquet("s3://asana-stats/tmp_dima/parquet_check_schema/part-00000-b1bf4a19-d9fe-4ece-a2b4-9bbceb490857-c000.snappy.parquet4").printSchema()
root
 |-- bo: boolean (nullable = true)
 |-- opbol: boolean (nullable = true)

Meanwhile, the parquet file formed does have nullable info:

[]batchprod-report000:/tmp/dimakamalov-batch$ aws s3 ls s3://asana-stats/tmp_dima/parquet_check_schema/
2018-10-17 21:03:52          0 _SUCCESS
2018-10-17 21:03:50        504 part-00000-b1bf4a19-d9fe-4ece-a2b4-9bbceb490857-c000.snappy.parquet
[]batchprod-report000:/tmp/dimakamalov-batch$ aws s3 cp s3://asana-stats/tmp_dima/parquet_check_schema/part-00000-b1bf4a19-d9fe-4ece-a2b4-9bbceb490857-c000.snappy.parquet .
download: s3://asana-stats/tmp_dima/parquet_check_schema/part-00000-b1bf4a19-d9fe-4ece-a2b4-9bbceb490857-c000.snappy.parquet to ./part-00000-b1bf4a19-d9fe-4ece-a2b4-9bbceb490857-c000.snappy.parquet
[]batchprod-report000:/tmp/dimakamalov-batch$ java -jar parquet-tools-1.8.2.jar schema part-00000-b1bf4a19-d9fe-4ece-a2b4-9bbceb490857-c000.snappy.parquet
message spark_schema {
  required boolean bo;
  optional boolean opbol;
}

Closes #22759 from dima-asana/dima-asana-nullable-parquet-doc.

Authored-by: dima-asana <42555784+dima-asana@users.noreply.github.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/sql-data-sources-parquet.md              |  2 +-
 .../sql/test/DataFrameReaderWriterSuite.scala | 44 +++++++++++++++++--
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index 4fed3eaf83e5..dcd293651846 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -9,7 +9,7 @@ displayTitle: Parquet Files
 
 [Parquet](http://parquet.io) is a columnar format that is supported by many other data processing systems.
 Spark SQL provides support for both reading and writing Parquet files that automatically preserves the schema
-of the original data. When writing Parquet files, all columns are automatically converted to be nullable for
+of the original data. When reading Parquet files, all columns are automatically converted to be nullable for
 compatibility reasons.
 
 ### Loading Data Programmatically
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 237872585e11..e45ab19aadbf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -23,6 +23,13 @@ import java.util.concurrent.ConcurrentLinkedQueue
 
 import scala.collection.JavaConverters._
 
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.hadoop.ParquetFileReader
+import org.apache.parquet.hadoop.util.HadoopInputFile
+import org.apache.parquet.schema.PrimitiveType
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+import org.apache.parquet.schema.Type.Repetition
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.SparkContext
@@ -31,6 +38,7 @@ import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -522,11 +530,12 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
       Seq("json", "orc", "parquet", "csv").foreach { format =>
         val schema = StructType(
           StructField("cl1", IntegerType, nullable = false).withComment("test") ::
-            StructField("cl2", IntegerType, nullable = true) ::
-            StructField("cl3", IntegerType, nullable = true) :: Nil)
+          StructField("cl2", IntegerType, nullable = true) ::
+          StructField("cl3", IntegerType, nullable = true) :: Nil)
         val row = Row(3, null, 4)
         val df = spark.createDataFrame(sparkContext.parallelize(row :: Nil), schema)
 
+        // if we write and then read, the read will enforce schema to be nullable
         val tableName = "tab"
         withTable(tableName) {
           df.write.format(format).mode("overwrite").saveAsTable(tableName)
@@ -536,12 +545,41 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
             Row("cl1", "test") :: Nil)
           // Verify the schema
           val expectedFields = schema.fields.map(f => f.copy(nullable = true))
-          assert(spark.table(tableName).schema == schema.copy(fields = expectedFields))
+          assert(spark.table(tableName).schema === schema.copy(fields = expectedFields))
         }
       }
     }
   }
 
+  test("parquet - column nullability -- write only") {
+    val schema = StructType(
+      StructField("cl1", IntegerType, nullable = false) ::
+      StructField("cl2", IntegerType, nullable = true) :: Nil)
+    val row = Row(3, 4)
+    val df = spark.createDataFrame(sparkContext.parallelize(row :: Nil), schema)
+
+    withTempPath { dir =>
+      val path = dir.getAbsolutePath
+      df.write.mode("overwrite").parquet(path)
+      val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0)
+
+      val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), new Configuration())
+      val f = ParquetFileReader.open(hadoopInputFile)
+      val parquetSchema = f.getFileMetaData.getSchema.getColumns.asScala
+                          .map(_.getPrimitiveType)
+      f.close()
+
+      // the write keeps nullable info from the schema
+      val expectedParquetSchema = Seq(
+        new PrimitiveType(Repetition.REQUIRED, PrimitiveTypeName.INT32, "cl1"),
+        new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "cl2")
+      )
+
+      assert (expectedParquetSchema === parquetSchema)
+    }
+
+  }
+
   test("SPARK-17230: write out results of decimal calculation") {
     val df = spark.range(99, 101)
       .selectExpr("id", "cast(id as long) * cast('1.0' as decimal(38, 18)) as num")

From 3b8ae23735f5b29db95516662190f606edc51fd7 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Fri, 7 Dec 2018 14:31:35 -0600
Subject: [PATCH 0088/1072] [SPARK-26196][SPARK-26281][WEBUI] Total tasks title
 in the stage page is incorrect when there are failed or killed tasks and
 update duration metrics

## What changes were proposed in this pull request?
This PR fixes 3 issues
1) Total tasks message in the tasks table is incorrect, when there are failed or killed tasks
2) Sorting of the "Duration" column is not correct
3) Duration in the aggregated tasks summary table and the tasks table and not matching.

Total tasks  = numCompleteTasks +  numActiveTasks + numKilledTasks + numFailedTasks;

Corrected the duration metrics in the tasks table as executorRunTime based on the PR https://github.com/apache/spark/pull/23081

## How was this patch tested?
test step:
1)
```
bin/spark-shell
scala > sc.parallelize(1 to 100, 10).map{ x => throw new RuntimeException("Bad executor")}.collect()
```
![screenshot from 2018-11-28 07-26-00](https://user-images.githubusercontent.com/23054875/49123523-e2691880-f2de-11e8-9c16-60d1865e6e77.png)

After patch:
![screenshot from 2018-11-28 07-24-31](https://user-images.githubusercontent.com/23054875/49123525-e432dc00-f2de-11e8-89ca-4a53e19c9c18.png)

2)  Duration metrics:
Before patch:
![screenshot from 2018-12-06 03-25-14](https://user-images.githubusercontent.com/23054875/49546591-9e8d9900-f906-11e8-8a0b-157742c47655.png)

After patch:
![screenshot from 2018-12-06 03-23-14](https://user-images.githubusercontent.com/23054875/49546589-9cc3d580-f906-11e8-827f-52ef8ffdeaec.png)

Closes #23160 from shahidki31/totalTasks.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../resources/org/apache/spark/ui/static/stagepage.js    | 9 +++++----
 .../org/apache/spark/status/api/v1/StagesResource.scala  | 1 -
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 564467487e84..08de2b0fee03 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -616,7 +616,8 @@ $(document).ready(function () {
                 $("#accumulator-table").DataTable(accumulatorConf);
 
                 // building tasks table that uses server side functionality
-                var totalTasksToShow = responseBody.numCompleteTasks + responseBody.numActiveTasks;
+                var totalTasksToShow = responseBody.numCompleteTasks + responseBody.numActiveTasks +
+                    responseBody.numKilledTasks + responseBody.numFailedTasks;
                 var taskTable = "#active-tasks-table";
                 var taskConf = {
                     "serverSide": true,
@@ -667,8 +668,8 @@ $(document).ready(function () {
                         {data : "launchTime", name: "Launch Time", render: formatDate},
                         {
                             data : function (row, type) {
-                                if (row.duration) {
-                                    return type === 'display' ? formatDuration(row.duration) : row.duration;
+                                if (row.taskMetrics && row.taskMetrics.executorRunTime) {
+                                    return type === 'display' ? formatDuration(row.taskMetrics.executorRunTime) : row.taskMetrics.executorRunTime;
                                 } else {
                                     return "";
                                 }
@@ -927,7 +928,7 @@ $(document).ready(function () {
 
                 // title number and toggle list
                 $("#summaryMetricsTitle").html("Summary Metrics for " + "<a href='#tasksTitle'>" + responseBody.numCompleteTasks + " Completed Tasks" + "</a>");
-                $("#tasksTitle").html("Task (" + totalTasksToShow + ")");
+                $("#tasksTitle").html("Tasks (" + totalTasksToShow + ")");
 
                 // hide or show the accumulate update table
                 if (accumulatorTable.length == 0) {
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
index f81892734c2d..9d1d66a0e15a 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
@@ -210,7 +210,6 @@ private[v1] class StagesResource extends BaseAppResource {
       (containsValue(f.taskId) || containsValue(f.index) || containsValue(f.attempt)
         || containsValue(f.launchTime)
         || containsValue(f.resultFetchStart.getOrElse(defaultOptionString))
-        || containsValue(f.duration.getOrElse(defaultOptionString))
         || containsValue(f.executorId) || containsValue(f.host) || containsValue(f.status)
         || containsValue(f.taskLocality) || containsValue(f.speculative)
         || containsValue(f.errorMessage.getOrElse(defaultOptionString))

From 20278e719e28fc5d7a8069e0498a8df143ecee90 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Fri, 7 Dec 2018 13:53:35 -0800
Subject: [PATCH 0089/1072] [SPARK-24333][ML][PYTHON] Add fit with validation
 set to spark.ml GBT: Python API

## What changes were proposed in this pull request?

Add validationIndicatorCol and validationTol to GBT Python.

## How was this patch tested?

Add test in doctest to test the new API.

Closes #21465 from huaxingao/spark-24333.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 python/pyspark/ml/classification.py           |  81 ++++++++------
 .../ml/param/_shared_params_code_gen.py       |   5 +-
 python/pyspark/ml/param/shared.py             |  71 ++++++++-----
 python/pyspark/ml/regression.py               | 100 +++++++++++++-----
 4 files changed, 169 insertions(+), 88 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index ce028512357f..6ddfce95a3d4 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -23,7 +23,7 @@
 from pyspark.ml import Estimator, Model
 from pyspark.ml.param.shared import *
 from pyspark.ml.regression import DecisionTreeModel, DecisionTreeRegressionModel, \
-    RandomForestParams, TreeEnsembleModel, TreeEnsembleParams
+    GBTParams, HasVarianceImpurity, RandomForestParams, TreeEnsembleModel, TreeEnsembleParams
 from pyspark.ml.util import *
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
 from pyspark.ml.wrapper import JavaWrapper
@@ -895,15 +895,6 @@ def getImpurity(self):
         return self.getOrDefault(self.impurity)
 
 
-class GBTParams(TreeEnsembleParams):
-    """
-    Private class to track supported GBT params.
-
-    .. versionadded:: 1.4.0
-    """
-    supportedLossTypes = ["logistic"]
-
-
 @inherit_doc
 class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
                              HasProbabilityCol, HasRawPredictionCol, DecisionTreeParams,
@@ -1174,9 +1165,31 @@ def trees(self):
         return [DecisionTreeClassificationModel(m) for m in list(self._call_java("trees"))]
 
 
+class GBTClassifierParams(GBTParams, HasVarianceImpurity):
+    """
+    Private class to track supported GBTClassifier params.
+
+    .. versionadded:: 3.0.0
+    """
+
+    supportedLossTypes = ["logistic"]
+
+    lossType = Param(Params._dummy(), "lossType",
+                     "Loss function which GBT tries to minimize (case-insensitive). " +
+                     "Supported options: " + ", ".join(supportedLossTypes),
+                     typeConverter=TypeConverters.toString)
+
+    @since("1.4.0")
+    def getLossType(self):
+        """
+        Gets the value of lossType or its default value.
+        """
+        return self.getOrDefault(self.lossType)
+
+
 @inherit_doc
-class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                    GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
+class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                    GBTClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
                     JavaMLReadable):
     """
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
@@ -1242,32 +1255,28 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     [0.25..., 0.23..., 0.21..., 0.19..., 0.18...]
     >>> model.numClasses
     2
+    >>> gbt = gbt.setValidationIndicatorCol("validationIndicator")
+    >>> gbt.getValidationIndicatorCol()
+    'validationIndicator'
+    >>> gbt.getValidationTol()
+    0.01
 
     .. versionadded:: 1.4.0
     """
 
-    lossType = Param(Params._dummy(), "lossType",
-                     "Loss function which GBT tries to minimize (case-insensitive). " +
-                     "Supported options: " + ", ".join(GBTParams.supportedLossTypes),
-                     typeConverter=TypeConverters.toString)
-
-    stepSize = Param(Params._dummy(), "stepSize",
-                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
-                     "the contribution of each estimator.",
-                     typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
-                 maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0,
-                 featureSubsetStrategy="all"):
+                 maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, impurity="variance",
+                 featureSubsetStrategy="all", validationTol=0.01, validationIndicatorCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, \
-                 featureSubsetStrategy="all")
+                 impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \
+                 validationIndicatorCol=None)
         """
         super(GBTClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1275,7 +1284,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0,
-                         featureSubsetStrategy="all")
+                         impurity="variance", featureSubsetStrategy="all", validationTol=0.01)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1285,13 +1294,15 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0,
-                  featureSubsetStrategy="all"):
+                  impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
+                  validationIndicatorCol=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, \
-                  featureSubsetStrategy="all")
+                  impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \
+                  validationIndicatorCol=None)
         Sets params for Gradient Boosted Tree Classification.
         """
         kwargs = self._input_kwargs
@@ -1307,13 +1318,6 @@ def setLossType(self, value):
         """
         return self._set(lossType=value)
 
-    @since("1.4.0")
-    def getLossType(self):
-        """
-        Gets the value of lossType or its default value.
-        """
-        return self.getOrDefault(self.lossType)
-
     @since("2.4.0")
     def setFeatureSubsetStrategy(self, value):
         """
@@ -1321,6 +1325,13 @@ def setFeatureSubsetStrategy(self, value):
         """
         return self._set(featureSubsetStrategy=value)
 
+    @since("3.0.0")
+    def setValidationIndicatorCol(self, value):
+        """
+        Sets the value of :py:attr:`validationIndicatorCol`.
+        """
+        return self._set(validationIndicatorCol=value)
+
 
 class GBTClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable,
                              JavaMLReadable):
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index e45ba840b412..1b0c8c5d28b7 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -164,7 +164,10 @@ def get$Name(self):
          "False", "TypeConverters.toBoolean"),
         ("loss", "the loss function to be optimized.", None, "TypeConverters.toString"),
         ("distanceMeasure", "the distance measure. Supported options: 'euclidean' and 'cosine'.",
-         "'euclidean'", "TypeConverters.toString")]
+         "'euclidean'", "TypeConverters.toString"),
+        ("validationIndicatorCol", "name of the column that indicates whether each row is for " +
+         "training or for validation. False indicates training; true indicates validation.",
+         None, "TypeConverters.toString")]
 
     code = []
     for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 618f5bf0a810..6405b9fce7ef 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -702,6 +702,53 @@ def getLoss(self):
         return self.getOrDefault(self.loss)
 
 
+class HasDistanceMeasure(Params):
+    """
+    Mixin for param distanceMeasure: the distance measure. Supported options: 'euclidean' and 'cosine'.
+    """
+
+    distanceMeasure = Param(Params._dummy(), "distanceMeasure", "the distance measure. Supported options: 'euclidean' and 'cosine'.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasDistanceMeasure, self).__init__()
+        self._setDefault(distanceMeasure='euclidean')
+
+    def setDistanceMeasure(self, value):
+        """
+        Sets the value of :py:attr:`distanceMeasure`.
+        """
+        return self._set(distanceMeasure=value)
+
+    def getDistanceMeasure(self):
+        """
+        Gets the value of distanceMeasure or its default value.
+        """
+        return self.getOrDefault(self.distanceMeasure)
+
+
+class HasValidationIndicatorCol(Params):
+    """
+    Mixin for param validationIndicatorCol: name of the column that indicates whether each row is for training or for validation. False indicates training; true indicates validation.
+    """
+
+    validationIndicatorCol = Param(Params._dummy(), "validationIndicatorCol", "name of the column that indicates whether each row is for training or for validation. False indicates training; true indicates validation.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasValidationIndicatorCol, self).__init__()
+
+    def setValidationIndicatorCol(self, value):
+        """
+        Sets the value of :py:attr:`validationIndicatorCol`.
+        """
+        return self._set(validationIndicatorCol=value)
+
+    def getValidationIndicatorCol(self):
+        """
+        Gets the value of validationIndicatorCol or its default value.
+        """
+        return self.getOrDefault(self.validationIndicatorCol)
+
+
 class DecisionTreeParams(Params):
     """
     Mixin for Decision Tree parameters.
@@ -790,27 +837,3 @@ def getCacheNodeIds(self):
         """
         return self.getOrDefault(self.cacheNodeIds)
 
-
-class HasDistanceMeasure(Params):
-    """
-    Mixin for param distanceMeasure: the distance measure. Supported options: 'euclidean' and 'cosine'.
-    """
-
-    distanceMeasure = Param(Params._dummy(), "distanceMeasure", "the distance measure. Supported options: 'euclidean' and 'cosine'.", typeConverter=TypeConverters.toString)
-
-    def __init__(self):
-        super(HasDistanceMeasure, self).__init__()
-        self._setDefault(distanceMeasure='euclidean')
-
-    def setDistanceMeasure(self, value):
-        """
-        Sets the value of :py:attr:`distanceMeasure`.
-        """
-        return self._set(distanceMeasure=value)
-
-    def getDistanceMeasure(self):
-        """
-        Gets the value of distanceMeasure or its default value.
-        """
-        return self.getOrDefault(self.distanceMeasure)
-
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 98f436135184..78cb4a670355 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -650,19 +650,20 @@ def getFeatureSubsetStrategy(self):
         return self.getOrDefault(self.featureSubsetStrategy)
 
 
-class TreeRegressorParams(Params):
+class HasVarianceImpurity(Params):
     """
     Private class to track supported impurity measures.
     """
 
     supportedImpurities = ["variance"]
+
     impurity = Param(Params._dummy(), "impurity",
                      "Criterion used for information gain calculation (case-insensitive). " +
                      "Supported options: " +
                      ", ".join(supportedImpurities), typeConverter=TypeConverters.toString)
 
     def __init__(self):
-        super(TreeRegressorParams, self).__init__()
+        super(HasVarianceImpurity, self).__init__()
 
     @since("1.4.0")
     def setImpurity(self, value):
@@ -679,6 +680,10 @@ def getImpurity(self):
         return self.getOrDefault(self.impurity)
 
 
+class TreeRegressorParams(HasVarianceImpurity):
+    pass
+
+
 class RandomForestParams(TreeEnsembleParams):
     """
     Private class to track supported random forest parameters.
@@ -705,12 +710,52 @@ def getNumTrees(self):
         return self.getOrDefault(self.numTrees)
 
 
-class GBTParams(TreeEnsembleParams):
+class GBTParams(TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol):
     """
     Private class to track supported GBT params.
     """
+
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+                     "the contribution of each estimator.",
+                     typeConverter=TypeConverters.toFloat)
+
+    validationTol = Param(Params._dummy(), "validationTol",
+                          "Threshold for stopping early when fit with validation is used. " +
+                          "If the error rate on the validation input changes by less than the " +
+                          "validationTol, then learning will stop early (before `maxIter`). " +
+                          "This parameter is ignored when fit without validation is used.",
+                          typeConverter=TypeConverters.toFloat)
+
+    @since("3.0.0")
+    def getValidationTol(self):
+        """
+        Gets the value of validationTol or its default value.
+        """
+        return self.getOrDefault(self.validationTol)
+
+
+class GBTRegressorParams(GBTParams, TreeRegressorParams):
+    """
+    Private class to track supported GBTRegressor params.
+
+    .. versionadded:: 3.0.0
+    """
+
     supportedLossTypes = ["squared", "absolute"]
 
+    lossType = Param(Params._dummy(), "lossType",
+                     "Loss function which GBT tries to minimize (case-insensitive). " +
+                     "Supported options: " + ", ".join(supportedLossTypes),
+                     typeConverter=TypeConverters.toString)
+
+    @since("1.4.0")
+    def getLossType(self):
+        """
+        Gets the value of lossType or its default value.
+        """
+        return self.getOrDefault(self.lossType)
+
 
 @inherit_doc
 class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
@@ -1030,9 +1075,9 @@ def featureImportances(self):
 
 
 @inherit_doc
-class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                   GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
-                   JavaMLReadable, TreeRegressorParams):
+class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                   GBTRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
+                   JavaMLReadable):
     """
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for regression.
@@ -1079,39 +1124,36 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
     ...              ["label", "features"])
     >>> model.evaluateEachIteration(validation, "squared")
     [0.0, 0.0, 0.0, 0.0, 0.0]
+    >>> gbt = gbt.setValidationIndicatorCol("validationIndicator")
+    >>> gbt.getValidationIndicatorCol()
+    'validationIndicator'
+    >>> gbt.getValidationTol()
+    0.01
 
     .. versionadded:: 1.4.0
     """
 
-    lossType = Param(Params._dummy(), "lossType",
-                     "Loss function which GBT tries to minimize (case-insensitive). " +
-                     "Supported options: " + ", ".join(GBTParams.supportedLossTypes),
-                     typeConverter=TypeConverters.toString)
-
-    stepSize = Param(Params._dummy(), "stepSize",
-                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
-                     "the contribution of each estimator.",
-                     typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                  checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
-                 impurity="variance", featureSubsetStrategy="all"):
+                 impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
+                 validationIndicatorCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
                  checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
-                 impurity="variance", featureSubsetStrategy="all")
+                 impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \
+                 validationIndicatorCol=None)
         """
         super(GBTRegressor, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                          checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
-                         impurity="variance", featureSubsetStrategy="all")
+                         impurity="variance", featureSubsetStrategy="all", validationTol=0.01)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1121,13 +1163,15 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                   checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
-                  impuriy="variance", featureSubsetStrategy="all"):
+                  impuriy="variance", featureSubsetStrategy="all", validationTol=0.01,
+                  validationIndicatorCol=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
                   checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
-                  impurity="variance", featureSubsetStrategy="all")
+                  impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \
+                  validationIndicatorCol=None)
         Sets params for Gradient Boosted Tree Regression.
         """
         kwargs = self._input_kwargs
@@ -1143,13 +1187,6 @@ def setLossType(self, value):
         """
         return self._set(lossType=value)
 
-    @since("1.4.0")
-    def getLossType(self):
-        """
-        Gets the value of lossType or its default value.
-        """
-        return self.getOrDefault(self.lossType)
-
     @since("2.4.0")
     def setFeatureSubsetStrategy(self, value):
         """
@@ -1157,6 +1194,13 @@ def setFeatureSubsetStrategy(self, value):
         """
         return self._set(featureSubsetStrategy=value)
 
+    @since("3.0.0")
+    def setValidationIndicatorCol(self, value):
+        """
+        Sets the value of :py:attr:`validationIndicatorCol`.
+        """
+        return self._set(validationIndicatorCol=value)
+
 
 class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable):
     """

From 9b1f6c8bab5401258c653d4e2efb50e97c6d282f Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 7 Dec 2018 13:58:02 -0800
Subject: [PATCH 0090/1072] [SPARK-26304][SS] Add default value to
 spark.kafka.sasl.kerberos.service.name parameter

## What changes were proposed in this pull request?

spark.kafka.sasl.kerberos.service.name is an optional parameter but most of the time value `kafka` has to be set. As I've written in the jira the following reasoning is behind:
* Kafka's configuration guide suggest the same value: https://kafka.apache.org/documentation/#security_sasl_kerberos_brokerconfig
* It would be easier for spark users by providing less configuration
* Other streaming engines are doing the same

In this PR I've changed the parameter from optional to `WithDefault` and set `kafka` as default value.

## How was this patch tested?

Available unit tests + on cluster.

Closes #23254 from gaborgsomogyi/SPARK-26304.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../deploy/security/KafkaTokenUtil.scala      |  7 ++----
 .../apache/spark/internal/config/Kafka.scala  |  2 +-
 .../deploy/security/KafkaTokenUtilSuite.scala | 24 -------------------
 .../sql/kafka010/KafkaSecurityHelper.scala    |  5 +---
 .../kafka010/KafkaSecurityHelperSuite.scala   | 15 ------------
 5 files changed, 4 insertions(+), 49 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala b/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
index c890cee59ffe..aec0f72feb3c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
@@ -143,14 +143,11 @@ private[spark] object KafkaTokenUtil extends Logging {
   }
 
   private[security] def getKeytabJaasParams(sparkConf: SparkConf): String = {
-    val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
-    require(serviceName.nonEmpty, "Kerberos service name must be defined")
-
     val params =
       s"""
       |${getKrb5LoginModuleName} required
       | useKeyTab=true
-      | serviceName="${serviceName.get}"
+      | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}"
       | keyTab="${sparkConf.get(KEYTAB).get}"
       | principal="${sparkConf.get(PRINCIPAL).get}";
       """.stripMargin.replace("\n", "")
@@ -166,7 +163,7 @@ private[spark] object KafkaTokenUtil extends Logging {
       s"""
       |${getKrb5LoginModuleName} required
       | useTicketCache=true
-      | serviceName="${serviceName.get}";
+      | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}";
       """.stripMargin.replace("\n", "")
     logDebug(s"Krb ticket cache JAAS params: $params")
     params
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
index 85d74c27142a..064fc93cb8ed 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
@@ -40,7 +40,7 @@ private[spark] object Kafka {
         "Kafka's JAAS config or in Kafka's config. For further details please see kafka " +
         "documentation. Only used to obtain delegation token.")
       .stringConf
-      .createOptional
+      .createWithDefault("kafka")
 
   val TRUSTSTORE_LOCATION =
     ConfigBuilder("spark.kafka.ssl.truststore.location")
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
index 682bebde916f..18aa537b3a51 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
@@ -36,7 +36,6 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
   private val keyStorePassword = "keyStoreSecret"
   private val keyPassword = "keySecret"
   private val keytab = "/path/to/keytab"
-  private val kerberosServiceName = "kafka"
   private val principal = "user@domain.com"
 
   private var sparkConf: SparkConf = null
@@ -96,7 +95,6 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
     sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
     sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
     sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
-    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
 
     val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
 
@@ -119,7 +117,6 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
     sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
     sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
     sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
-    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
 
     val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
 
@@ -143,7 +140,6 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
     sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
     sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
     sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
-    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
 
     val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
 
@@ -177,7 +173,6 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
     sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
     sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
     sparkConf.set(KEYTAB, keytab)
-    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
     sparkConf.set(PRINCIPAL, principal)
 
     val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
@@ -195,7 +190,6 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
   test("createAdminClientProperties without keytab should set ticket cache dynamic jaas config") {
     sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
     sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
-    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
 
     val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
 
@@ -218,22 +212,4 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
 
     assert(KafkaTokenUtil.isGlobalJaasConfigurationProvided)
   }
-
-  test("getKeytabJaasParams with keytab no service should throw exception") {
-    sparkConf.set(KEYTAB, keytab)
-
-    val thrown = intercept[IllegalArgumentException] {
-      KafkaTokenUtil.getKeytabJaasParams(sparkConf)
-    }
-
-    assert(thrown.getMessage contains "Kerberos service name must be defined")
-  }
-
-  test("getTicketCacheJaasParams without service should throw exception") {
-    val thrown = intercept[IllegalArgumentException] {
-      KafkaTokenUtil.getTicketCacheJaasParams(sparkConf)
-    }
-
-    assert(thrown.getMessage contains "Kerberos service name must be defined")
-  }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
index 74d5ef9c05f1..7215295b1009 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.kafka010
 
 import org.apache.hadoop.security.UserGroupInformation
-import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 import org.apache.kafka.common.security.scram.ScramLoginModule
 
 import org.apache.spark.SparkConf
@@ -35,8 +34,6 @@ private[kafka010] object KafkaSecurityHelper extends Logging {
   def getTokenJaasParams(sparkConf: SparkConf): String = {
     val token = UserGroupInformation.getCurrentUser().getCredentials.getToken(
       KafkaTokenUtil.TOKEN_SERVICE)
-    val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
-    require(serviceName.isDefined, "Kerberos service name must be defined")
     val username = new String(token.getIdentifier)
     val password = new String(token.getPassword)
 
@@ -45,7 +42,7 @@ private[kafka010] object KafkaSecurityHelper extends Logging {
       s"""
       |$loginModuleName required
       | tokenauth=true
-      | serviceName="${serviceName.get}"
+      | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}"
       | username="$username"
       | password="$password";
       """.stripMargin.replace("\n", "")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
index 772fe4614bad..fd9dee390d18 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
@@ -26,12 +26,8 @@ import org.scalatest.BeforeAndAfterEach
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.security.KafkaTokenUtil
 import org.apache.spark.deploy.security.KafkaTokenUtil.KafkaDelegationTokenIdentifier
-import org.apache.spark.internal.config._
 
 class KafkaSecurityHelperSuite extends SparkFunSuite with BeforeAndAfterEach {
-  private val keytab = "/path/to/keytab"
-  private val kerberosServiceName = "kafka"
-  private val principal = "user@domain.com"
   private val tokenId = "tokenId" + UUID.randomUUID().toString
   private val tokenPassword = "tokenPassword" + UUID.randomUUID().toString
 
@@ -76,19 +72,8 @@ class KafkaSecurityHelperSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(KafkaSecurityHelper.isTokenAvailable())
   }
 
-  test("getTokenJaasParams with token no service should throw exception") {
-    addTokenToUGI()
-
-    val thrown = intercept[IllegalArgumentException] {
-      KafkaSecurityHelper.getTokenJaasParams(sparkConf)
-    }
-
-    assert(thrown.getMessage contains "Kerberos service name must be defined")
-  }
-
   test("getTokenJaasParams with token should return scram module") {
     addTokenToUGI()
-    sparkConf.set(Kafka.KERBEROS_SERVICE_NAME, kerberosServiceName)
 
     val jaasParams = KafkaSecurityHelper.getTokenJaasParams(sparkConf)
 

From 2ea9792fdeb07be19d63e7625cfc483e062a1d9c Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 8 Dec 2018 05:59:53 -0600
Subject: [PATCH 0091/1072] [SPARK-26266][BUILD] Update to Scala 2.12.8

## What changes were proposed in this pull request?

Update to Scala 2.12.8

## How was this patch tested?

Existing tests.

Closes #23218 from srowen/SPARK-26266.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 6 +++---
 dev/deps/spark-deps-hadoop-3.1 | 6 +++---
 docs/_config.yml               | 2 +-
 pom.xml                        | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index d250d5205586..71423af0789c 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -171,10 +171,10 @@ parquet-jackson-1.10.0.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
 pyrolite-4.13.jar
-scala-compiler-2.12.7.jar
-scala-library-2.12.7.jar
+scala-compiler-2.12.8.jar
+scala-library-2.12.8.jar
 scala-parser-combinators_2.12-1.1.0.jar
-scala-reflect-2.12.7.jar
+scala-reflect-2.12.8.jar
 scala-xml_2.12-1.0.5.jar
 shapeless_2.12-2.3.2.jar
 slf4j-api-1.7.16.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 347503ace557..93eafef04533 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -189,10 +189,10 @@ protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
 pyrolite-4.13.jar
 re2j-1.1.jar
-scala-compiler-2.12.7.jar
-scala-library-2.12.7.jar
+scala-compiler-2.12.8.jar
+scala-library-2.12.8.jar
 scala-parser-combinators_2.12-1.1.0.jar
-scala-reflect-2.12.7.jar
+scala-reflect-2.12.8.jar
 scala-xml_2.12-1.0.5.jar
 shapeless_2.12-2.3.2.jar
 slf4j-api-1.7.16.jar
diff --git a/docs/_config.yml b/docs/_config.yml
index 649d18bf72b5..146c90fcff6e 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -17,7 +17,7 @@ include:
 SPARK_VERSION: 3.0.0-SNAPSHOT
 SPARK_VERSION_SHORT: 3.0.0
 SCALA_BINARY_VERSION: "2.12"
-SCALA_VERSION: "2.12.7"
+SCALA_VERSION: "2.12.8"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
diff --git a/pom.xml b/pom.xml
index 15e5c1829040..310d7de95512 100644
--- a/pom.xml
+++ b/pom.xml
@@ -156,7 +156,7 @@
     <commons.math3.version>3.4.1</commons.math3.version>
     <!-- managed up from 3.2.1 for SPARK-11652 -->
     <commons.collections.version>3.2.2</commons.collections.version>
-    <scala.version>2.12.7</scala.version>
+    <scala.version>2.12.8</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <scalafmt.parameters>--diff --test</scalafmt.parameters>
     <!-- for now, not running scalafmt as part of default verify pipeline -->

From 678e1aca6901944c119d2ec56169d4e69fce66de Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Sat, 8 Dec 2018 22:23:50 +0800
Subject: [PATCH 0092/1072] [SPARK-24207][R] follow-up PR for SPARK-24207 to
 fix code style problems

## What changes were proposed in this pull request?

follow-up PR for SPARK-24207 to fix code style problems

Closes #23256 from huaxingao/spark-24207-cnt.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/mllib_fpm.R                           |  7 +++--
 R/pkg/tests/fulltests/test_mllib_fpm.R        | 29 ++++++++++---------
 R/pkg/vignettes/sparkr-vignettes.Rmd          |  7 +++--
 examples/src/main/r/ml/prefixSpan.R           |  9 +++---
 .../spark/examples/ml/FPGrowthExample.scala   |  3 --
 .../spark/examples/ml/PrefixSpanExample.scala |  3 --
 6 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R
index ac37580c6b37..c248e9ec9be9 100644
--- a/R/pkg/R/mllib_fpm.R
+++ b/R/pkg/R/mllib_fpm.R
@@ -190,9 +190,10 @@ setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"),
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
-#'                       list(list(list(1L), list(3L, 2L), list(1L, 2L))),
-#'                       list(list(list(1L, 2L), list(5L))),
-#'                       list(list(list(6L)))), schema = c("sequence"))
+#'                            list(list(list(1L), list(3L, 2L), list(1L, 2L))),
+#'                            list(list(list(1L, 2L), list(5L))),
+#'                            list(list(list(6L)))),
+#'                       schema = c("sequence"))
 #' frequency <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L,
 #'                                                   maxLocalProjDBSize = 32000000L)
 #' showDF(frequency)
diff --git a/R/pkg/tests/fulltests/test_mllib_fpm.R b/R/pkg/tests/fulltests/test_mllib_fpm.R
index daf9ff97a821..bc1e17538d41 100644
--- a/R/pkg/tests/fulltests/test_mllib_fpm.R
+++ b/R/pkg/tests/fulltests/test_mllib_fpm.R
@@ -84,19 +84,20 @@ test_that("spark.fpGrowth", {
 })
 
 test_that("spark.prefixSpan", {
-    df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
-                          list(list(list(1L), list(3L, 2L), list(1L, 2L))),
-                          list(list(list(1L, 2L), list(5L))),
-                          list(list(list(6L)))), schema = c("sequence"))
-    result1 <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L,
-                                                    maxLocalProjDBSize = 32000000L)
-
-    expected_result <- createDataFrame(list(list(list(list(1L)), 3L),
-                                            list(list(list(3L)), 2L),
-                                            list(list(list(2L)), 3L),
-                                            list(list(list(1L, 2L)), 3L),
-                                            list(list(list(1L), list(3L)), 2L)),
-                                            schema = c("sequence", "freq"))
-  })
+  df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
+                             list(list(list(1L), list(3L, 2L), list(1L, 2L))),
+                             list(list(list(1L, 2L), list(5L))),
+                             list(list(list(6L)))),
+                        schema = c("sequence"))
+  result <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L,
+                                                 maxLocalProjDBSize = 32000000L)
+
+  expected_result <- createDataFrame(list(list(list(list(1L)), 3L), list(list(list(3L)), 2L),
+                                          list(list(list(2L)), 3L), list(list(list(1L, 2L)), 3L),
+                                          list(list(list(1L), list(3L)), 2L)),
+                                     schema = c("sequence", "freq"))
+
+  expect_equivalent(expected_result, result)
+})
 
 sparkR.session.stop()
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index f80b45b4f36a..1c6a03c4b9bc 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -1019,9 +1019,10 @@ head(predict(fpm, df))
 
 ```{r}
 df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
-                      list(list(list(1L), list(3L, 2L), list(1L, 2L))),
-                      list(list(list(1L, 2L), list(5L))),
-                      list(list(list(6L)))), schema = c("sequence"))
+                           list(list(list(1L), list(3L, 2L), list(1L, 2L))),
+                           list(list(list(1L, 2L), list(5L))),
+                           list(list(list(6L)))),
+                      schema = c("sequence"))
 head(spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L))
 ```
 
diff --git a/examples/src/main/r/ml/prefixSpan.R b/examples/src/main/r/ml/prefixSpan.R
index 9b70573ffb78..02908aeb0296 100644
--- a/examples/src/main/r/ml/prefixSpan.R
+++ b/examples/src/main/r/ml/prefixSpan.R
@@ -28,9 +28,10 @@ sparkR.session(appName = "SparkR-ML-prefixSpan-example")
 # Load training data
 
 df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))),
-                      list(list(list(1L), list(3L, 2L), list(1L, 2L))),
-                      list(list(list(1L, 2L), list(5L))),
-                      list(list(list(6L)))), schema = c("sequence"))
+                           list(list(list(1L), list(3L, 2L), list(1L, 2L))),
+                           list(list(list(1L, 2L), list(5L))),
+                           list(list(list(6L)))),
+                      schema = c("sequence"))
 
 # Finding frequent sequential patterns
 frequency <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L,
@@ -39,4 +40,4 @@ showDF(frequency)
 
 # $example off$
 
-sparkR.session.stop()
\ No newline at end of file
+sparkR.session.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
index 59110d70de55..bece0d96c030 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.examples.ml
 
-// scalastyle:off println
-
 // $example on$
 import org.apache.spark.ml.fpm.FPGrowth
 // $example off$
@@ -64,4 +62,3 @@ object FPGrowthExample {
     spark.stop()
   }
 }
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala
index 0a2d31097a02..b4e0811c506b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.examples.ml
 
-// scalastyle:off println
-
 // $example on$
 import org.apache.spark.ml.fpm.PrefixSpan
 // $example off$
@@ -59,4 +57,3 @@ object PrefixSpanExample {
     spark.stop()
   }
 }
-// scalastyle:on println

From bdf32847b1ffcb3aa4d0bef058f86e65656e99fb Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 8 Dec 2018 11:18:09 -0800
Subject: [PATCH 0093/1072] [SPARK-26021][SQL][FOLLOWUP] only deal with NaN and
 -0.0 in UnsafeWriter

## What changes were proposed in this pull request?

A followup of https://github.com/apache/spark/pull/23043

There are 4 places we need to deal with NaN and -0.0:
1. comparison expressions. `-0.0` and `0.0` should be treated as same. Different NaNs should be treated as same.
2. Join keys. `-0.0` and `0.0` should be treated as same. Different NaNs should be treated as same.
3. grouping keys. `-0.0` and `0.0` should be assigned to the same group. Different NaNs should be assigned to the same group.
4. window partition keys. `-0.0` and `0.0` should be treated as same. Different NaNs should be treated as same.

The case 1 is OK. Our comparison already handles NaN and -0.0, and for struct/array/map, we will recursively compare the fields/elements.

Case 2, 3 and 4 are problematic, as they compare `UnsafeRow` binary directly, and different NaNs have different binary representation, and the same thing happens for -0.0 and 0.0.

To fix it, a simple solution is: normalize float/double when building unsafe data (`UnsafeRow`, `UnsafeArrayData`, `UnsafeMapData`). Then we don't need to worry about it anymore.

Following this direction, this PR moves the handling of NaN and -0.0 from `Platform` to `UnsafeWriter`, so that places like `UnsafeRow.setFloat` will not handle them, which reduces the perf overhead. It's also easier to add comments explaining why we do it in `UnsafeWriter`.

## How was this patch tested?

existing tests

Closes #23239 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/unsafe/Platform.java     | 10 ------
 .../spark/unsafe/PlatformUtilSuite.java       | 18 ----------
 .../expressions/codegen/UnsafeWriter.java     | 35 +++++++++++++++++++
 .../codegen/UnsafeRowWriterSuite.scala        | 20 +++++++++++
 .../apache/spark/sql/DataFrameJoinSuite.scala | 12 +++++++
 .../sql/DataFrameWindowFunctionsSuite.scala   | 14 ++++++++
 6 files changed, 81 insertions(+), 28 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 4563efcfcf47..076b693f81c8 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -174,11 +174,6 @@ public static float getFloat(Object object, long offset) {
   }
 
   public static void putFloat(Object object, long offset, float value) {
-    if (Float.isNaN(value)) {
-      value = Float.NaN;
-    } else if (value == -0.0f) {
-      value = 0.0f;
-    }
     _UNSAFE.putFloat(object, offset, value);
   }
 
@@ -187,11 +182,6 @@ public static double getDouble(Object object, long offset) {
   }
 
   public static void putDouble(Object object, long offset, double value) {
-    if (Double.isNaN(value)) {
-      value = Double.NaN;
-    } else if (value == -0.0d) {
-      value = 0.0d;
-    }
     _UNSAFE.putDouble(object, offset, value);
   }
 
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
index 2474081dad5c..3ad9ac7b4de9 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -157,22 +157,4 @@ public void heapMemoryReuse() {
     Assert.assertEquals(onheap4.size(), 1024 * 1024 + 7);
     Assert.assertEquals(obj3, onheap4.getBaseObject());
   }
-
-  @Test
-  // SPARK-26021
-  public void writeMinusZeroIsReplacedWithZero() {
-    byte[] doubleBytes = new byte[Double.BYTES];
-    byte[] floatBytes = new byte[Float.BYTES];
-    Platform.putDouble(doubleBytes, Platform.BYTE_ARRAY_OFFSET, -0.0d);
-    Platform.putFloat(floatBytes, Platform.BYTE_ARRAY_OFFSET, -0.0f);
-
-    byte[] doubleBytes2 = new byte[Double.BYTES];
-    byte[] floatBytes2 = new byte[Float.BYTES];
-    Platform.putDouble(doubleBytes, Platform.BYTE_ARRAY_OFFSET, 0.0d);
-    Platform.putFloat(floatBytes, Platform.BYTE_ARRAY_OFFSET, 0.0f);
-
-    // Make sure the bytes we write from 0.0 and -0.0 are same.
-    Assert.assertArrayEquals(doubleBytes, doubleBytes2);
-    Assert.assertArrayEquals(floatBytes, floatBytes2);
-  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
index 95263a0da95a..7553ab8cf700 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
@@ -198,11 +198,46 @@ protected final void writeLong(long offset, long value) {
     Platform.putLong(getBuffer(), offset, value);
   }
 
+  // We need to take care of NaN and -0.0 in several places:
+  //   1. When compare values, different NaNs should be treated as same, `-0.0` and `0.0` should be
+  //      treated as same.
+  //   2. In GROUP BY, different NaNs should belong to the same group, -0.0 and 0.0 should belong
+  //      to the same group.
+  //   3. As join keys, different NaNs should be treated as same, `-0.0` and `0.0` should be
+  //      treated as same.
+  //   4. As window partition keys, different NaNs should be treated as same, `-0.0` and `0.0`
+  //      should be treated as same.
+  //
+  // Case 1 is fine, as we handle NaN and -0.0 well during comparison. For complex types, we
+  // recursively compare the fields/elements, so it's also fine.
+  //
+  // Case 2, 3 and 4 are problematic, as they compare `UnsafeRow` binary directly, and different
+  // NaNs have different binary representation, and the same thing happens for -0.0 and 0.0.
+  //
+  // Here we normalize NaN and -0.0, so that `UnsafeProjection` will normalize them when writing
+  // float/double columns and nested fields to `UnsafeRow`.
+  //
+  // Note that, we must do this for all the `UnsafeProjection`s, not only the ones that extract
+  // join/grouping/window partition keys. `UnsafeProjection` copies unsafe data directly for complex
+  // types, so nested float/double may not be normalized. We need to make sure that all the unsafe
+  // data(`UnsafeRow`, `UnsafeArrayData`, `UnsafeMapData`) will have flat/double normalized during
+  // creation.
   protected final void writeFloat(long offset, float value) {
+    if (Float.isNaN(value)) {
+      value = Float.NaN;
+    } else if (value == -0.0f) {
+      value = 0.0f;
+    }
     Platform.putFloat(getBuffer(), offset, value);
   }
 
+  // See comments for `writeFloat`.
   protected final void writeDouble(long offset, double value) {
+    if (Double.isNaN(value)) {
+      value = Double.NaN;
+    } else if (value == -0.0d) {
+      value = 0.0d;
+    }
     Platform.putDouble(getBuffer(), offset, value);
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
index fb651b76fc16..22e1fa6dfed4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
@@ -50,4 +50,24 @@ class UnsafeRowWriterSuite extends SparkFunSuite {
     assert(res1 == res2)
   }
 
+  test("SPARK-26021: normalize float/double NaN and -0.0") {
+    val unsafeRowWriter1 = new UnsafeRowWriter(4)
+    unsafeRowWriter1.resetRowWriter()
+    unsafeRowWriter1.write(0, Float.NaN)
+    unsafeRowWriter1.write(1, Double.NaN)
+    unsafeRowWriter1.write(2, 0.0f)
+    unsafeRowWriter1.write(3, 0.0)
+    val res1 = unsafeRowWriter1.getRow
+
+    val unsafeRowWriter2 = new UnsafeRowWriter(4)
+    unsafeRowWriter2.resetRowWriter()
+    unsafeRowWriter2.write(0, 0.0f/0.0f)
+    unsafeRowWriter2.write(1, 0.0/0.0)
+    unsafeRowWriter2.write(2, -0.0f)
+    unsafeRowWriter2.write(3, -0.0)
+    val res2 = unsafeRowWriter2.getRow
+
+    // The two rows should be the equal
+    assert(res1 == res2)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index e6b30f9956da..c9f41ab1c017 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -295,4 +295,16 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
       df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
     }
   }
+
+  test("NaN and -0.0 in join keys") {
+    val df1 = Seq(Float.NaN -> Double.NaN, 0.0f -> 0.0, -0.0f -> -0.0).toDF("f", "d")
+    val df2 = Seq(Float.NaN -> Double.NaN, 0.0f -> 0.0, -0.0f -> -0.0).toDF("f", "d")
+    val joined = df1.join(df2, Seq("f", "d"))
+    checkAnswer(joined, Seq(
+      Row(Float.NaN, Double.NaN),
+      Row(0.0f, 0.0),
+      Row(0.0f, 0.0),
+      Row(0.0f, 0.0),
+      Row(0.0f, 0.0)))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 78277d7dcf75..9a5d5a9966ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -681,4 +681,18 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
         Row("S2", "P2", 300, 300, 500)))
 
   }
+
+  test("NaN and -0.0 in window partition keys") {
+    val df = Seq(
+      (Float.NaN, Double.NaN, 1),
+      (0.0f/0.0f, 0.0/0.0, 1),
+      (0.0f, 0.0, 1),
+      (-0.0f, -0.0, 1)).toDF("f", "d", "i")
+    val result = df.select($"f", count("i").over(Window.partitionBy("f", "d")))
+    checkAnswer(result, Seq(
+      Row(Float.NaN, 2),
+      Row(Float.NaN, 2),
+      Row(0.0f, 2),
+      Row(0.0f, 2)))
+  }
 }

From 55276d3a26474e7479941db3e9c065d86344885f Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Sat, 8 Dec 2018 17:53:12 -0800
Subject: [PATCH 0094/1072] [SPARK-25132][SQL][FOLLOWUP][DOC] Add migration doc
 for case-insensitive field resolution when reading from Parquet

## What changes were proposed in this pull request?
#22148 introduces a behavior change. According to discussion at #22184, this PR updates migration guide when upgrade from Spark 2.3 to 2.4.

## How was this patch tested?
N/A

Closes #23238 from seancxmao/SPARK-25132-doc-2.4.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/sql-migration-guide-upgrade.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 67c30fb941ec..f6458a9b2730 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -145,6 +145,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.3 and earlier, HAVING without GROUP BY is treated as WHERE. This means, `SELECT 1 FROM range(10) HAVING true` is executed as `SELECT 1 FROM range(10) WHERE true`  and returns 10 rows. This violates SQL standard, and has been fixed in Spark 2.4. Since Spark 2.4, HAVING without GROUP BY is treated as a global aggregate, which means `SELECT 1 FROM range(10) HAVING true` will return only one row. To restore the previous behavior, set `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` to `true`.
 
+  - In version 2.3 and earlier, when reading from a Parquet data source table, Spark always returns null for any column whose column names in Hive metastore schema and Parquet schema are in different letter cases, no matter whether `spark.sql.caseSensitive` is set to `true` or `false`. Since 2.4, when `spark.sql.caseSensitive` is set to `false`, Spark does case insensitive column name resolution between Hive metastore schema and Parquet schema, so even column names are in different letter cases, Spark returns corresponding column values. An exception is thrown if there is ambiguity, i.e. more than one Parquet column is matched. This change also applies to Parquet Hive tables when `spark.sql.hive.convertMetastoreParquet` is set to `true`.
+
 ## Upgrading From Spark SQL 2.3.0 to 2.3.1 and above
 
   - As of version 2.3.1 Arrow functionality, including `pandas_udf` and `toPandas()`/`createDataFrame()` with `spark.sql.execution.arrow.enabled` set to `True`, has been marked as experimental. These are still evolving and not currently recommended for use in production.

From 877f82cb30bc4edef770b36e1e394a887ab535c6 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <xyliyuanjian@gmail.com>
Date: Sun, 9 Dec 2018 10:49:15 +0800
Subject: [PATCH 0095/1072] [SPARK-26193][SQL] Implement shuffle write metrics
 in SQL

## What changes were proposed in this pull request?

1. Implement `SQLShuffleWriteMetricsReporter` on the SQL side as the customized `ShuffleWriteMetricsReporter`.
2. Add shuffle write metrics to `ShuffleExchangeExec`, and use these metrics to create corresponding `SQLShuffleWriteMetricsReporter` in shuffle dependency.
3. Rework on `ShuffleMapTask` to add new class named `ShuffleWriteProcessor` which control shuffle write process, we use sql shuffle write metrics by customizing a ShuffleWriteProcessor on SQL side.

## How was this patch tested?
Add UT in SQLMetricsSuite.
Manually test locally, update screen shot to document attached in JIRA.

Closes #23207 from xuanyuanking/SPARK-26193.

Authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/Dependency.scala   |  6 +-
 .../spark/scheduler/ShuffleMapTask.scala      | 20 +----
 .../spark/shuffle/ShuffleWriteProcessor.scala | 74 +++++++++++++++++++
 project/MimaExcludes.scala                    |  3 +
 .../exchange/ShuffleExchangeExec.scala        | 38 ++++++++--
 .../apache/spark/sql/execution/limit.scala    | 30 ++++++--
 .../sql/execution/metric/SQLMetrics.scala     | 12 +++
 .../metric/SQLShuffleMetricsReporter.scala    | 55 ++++++++++++++
 .../execution/metric/SQLMetricsSuite.scala    | 36 +++++++--
 9 files changed, 234 insertions(+), 40 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala

diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index 9ea6d2fa2fd9..fb051a8c0db8 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.shuffle.ShuffleHandle
+import org.apache.spark.shuffle.{ShuffleHandle, ShuffleWriteProcessor}
 
 /**
  * :: DeveloperApi ::
@@ -65,6 +65,7 @@ abstract class NarrowDependency[T](_rdd: RDD[T]) extends Dependency[T] {
  * @param keyOrdering key ordering for RDD's shuffles
  * @param aggregator map/reduce-side aggregator for RDD's shuffle
  * @param mapSideCombine whether to perform partial aggregation (also known as map-side combine)
+ * @param shuffleWriterProcessor the processor to control the write behavior in ShuffleMapTask
  */
 @DeveloperApi
 class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
@@ -73,7 +74,8 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
     val serializer: Serializer = SparkEnv.get.serializer,
     val keyOrdering: Option[Ordering[K]] = None,
     val aggregator: Option[Aggregator[K, V, C]] = None,
-    val mapSideCombine: Boolean = false)
+    val mapSideCombine: Boolean = false,
+    val shuffleWriterProcessor: ShuffleWriteProcessor = new ShuffleWriteProcessor)
   extends Dependency[Product2[K, V]] {
 
   if (mapSideCombine) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 5412717d6198..2a8d1dd995e2 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -92,25 +92,7 @@ private[spark] class ShuffleMapTask(
       threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
     } else 0L
 
-    var writer: ShuffleWriter[Any, Any] = null
-    try {
-      val manager = SparkEnv.get.shuffleManager
-      writer = manager.getWriter[Any, Any](
-        dep.shuffleHandle, partitionId, context, context.taskMetrics().shuffleWriteMetrics)
-      writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
-      writer.stop(success = true).get
-    } catch {
-      case e: Exception =>
-        try {
-          if (writer != null) {
-            writer.stop(success = false)
-          }
-        } catch {
-          case e: Exception =>
-            log.debug("Could not stop writer", e)
-        }
-        throw e
-    }
+    dep.shuffleWriterProcessor.writeProcess(rdd, dep, partitionId, context, partition)
   }
 
   override def preferredLocations: Seq[TaskLocation] = preferredLocs
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
new file mode 100644
index 000000000000..f5213157a9a8
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import org.apache.spark.{Partition, ShuffleDependency, SparkEnv, TaskContext}
+import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.scheduler.MapStatus
+
+/**
+ * The interface for customizing shuffle write process. The driver create a ShuffleWriteProcessor
+ * and put it into [[ShuffleDependency]], and executors use it in each ShuffleMapTask.
+ */
+private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
+
+  /**
+   * Create a [[ShuffleWriteMetricsReporter]] from the task context. As the reporter is a
+   * per-row operator, here need a careful consideration on performance.
+   */
+  protected def createMetricsReporter(context: TaskContext): ShuffleWriteMetricsReporter = {
+    context.taskMetrics().shuffleWriteMetrics
+  }
+
+  /**
+   * The write process for particular partition, it controls the life circle of [[ShuffleWriter]]
+   * get from [[ShuffleManager]] and triggers rdd compute, finally return the [[MapStatus]] for
+   * this task.
+   */
+  def writeProcess(
+      rdd: RDD[_],
+      dep: ShuffleDependency[_, _, _],
+      partitionId: Int,
+      context: TaskContext,
+      partition: Partition): MapStatus = {
+    var writer: ShuffleWriter[Any, Any] = null
+    try {
+      val manager = SparkEnv.get.shuffleManager
+      writer = manager.getWriter[Any, Any](
+        dep.shuffleHandle,
+        partitionId,
+        context,
+        createMetricsReporter(context))
+      writer.write(
+        rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
+      writer.stop(success = true).get
+    } catch {
+      case e: Exception =>
+        try {
+          if (writer != null) {
+            writer.stop(success = false)
+          }
+        } catch {
+          case e: Exception =>
+            log.debug("Could not stop writer", e)
+        }
+        throw e
+    }
+  }
+}
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 4eeebb805070..b3252d70a80c 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -217,6 +217,9 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.sort.UnsafeShuffleWriter.this"),
     ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.TimeTrackingOutputStream.this"),
 
+    // [SPARK-26139] Implement shuffle write metrics in SQL
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ShuffleDependency.this"),
+
     // Data Source V2 API changes
     (problem: Problem) => problem match {
       case MissingClassProblem(cls) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index c9ca395bceaa..0c2020572e72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -23,6 +23,7 @@ import java.util.function.Supplier
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle.{ShuffleWriteMetricsReporter, ShuffleWriteProcessor}
 import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
@@ -30,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, Uns
 import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.metric.{SQLMetrics, SQLShuffleMetricsReporter}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics, SQLShuffleMetricsReporter, SQLShuffleWriteMetricsReporter}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.MutablePair
@@ -46,10 +47,13 @@ case class ShuffleExchangeExec(
 
   // NOTE: coordinator can be null after serialization/deserialization,
   //       e.g. it can be null on the Executor side
-
+  private lazy val writeMetrics =
+    SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
+  private lazy val readMetrics =
+    SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size")
-  ) ++ SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
+  ) ++ readMetrics ++ writeMetrics
 
   override def nodeName: String = {
     val extraInfo = coordinator match {
@@ -90,7 +94,11 @@ case class ShuffleExchangeExec(
   private[exchange] def prepareShuffleDependency()
     : ShuffleDependency[Int, InternalRow, InternalRow] = {
     ShuffleExchangeExec.prepareShuffleDependency(
-      child.execute(), child.output, newPartitioning, serializer)
+      child.execute(),
+      child.output,
+      newPartitioning,
+      serializer,
+      writeMetrics)
   }
 
   /**
@@ -109,7 +117,7 @@ case class ShuffleExchangeExec(
       assert(newPartitioning.isInstanceOf[HashPartitioning])
       newPartitioning = UnknownPartitioning(indices.length)
     }
-    new ShuffledRowRDD(shuffleDependency, metrics, specifiedPartitionStartIndices)
+    new ShuffledRowRDD(shuffleDependency, readMetrics, specifiedPartitionStartIndices)
   }
 
   /**
@@ -204,7 +212,9 @@ object ShuffleExchangeExec {
       rdd: RDD[InternalRow],
       outputAttributes: Seq[Attribute],
       newPartitioning: Partitioning,
-      serializer: Serializer): ShuffleDependency[Int, InternalRow, InternalRow] = {
+      serializer: Serializer,
+      writeMetrics: Map[String, SQLMetric])
+    : ShuffleDependency[Int, InternalRow, InternalRow] = {
     val part: Partitioner = newPartitioning match {
       case RoundRobinPartitioning(numPartitions) => new HashPartitioner(numPartitions)
       case HashPartitioning(_, n) =>
@@ -333,8 +343,22 @@ object ShuffleExchangeExec {
       new ShuffleDependency[Int, InternalRow, InternalRow](
         rddWithPartitionIds,
         new PartitionIdPassthrough(part.numPartitions),
-        serializer)
+        serializer,
+        shuffleWriterProcessor = createShuffleWriteProcessor(writeMetrics))
 
     dependency
   }
+
+  /**
+   * Create a customized [[ShuffleWriteProcessor]] for SQL which wrap the default metrics reporter
+   * with [[SQLShuffleWriteMetricsReporter]] as new reporter for [[ShuffleWriteProcessor]].
+   */
+  def createShuffleWriteProcessor(metrics: Map[String, SQLMetric]): ShuffleWriteProcessor = {
+    new ShuffleWriteProcessor {
+      override protected def createMetricsReporter(
+          context: TaskContext): ShuffleWriteMetricsReporter = {
+        new SQLShuffleWriteMetricsReporter(context.taskMetrics().shuffleWriteMetrics, metrics)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index e9ab7cd138d9..1f2fdde53864 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGe
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
-import org.apache.spark.sql.execution.metric.SQLShuffleMetricsReporter
+import org.apache.spark.sql.execution.metric.{SQLShuffleMetricsReporter, SQLShuffleWriteMetricsReporter}
 
 /**
  * Take the first `limit` elements and collect them to a single partition.
@@ -38,13 +38,21 @@ case class CollectLimitExec(limit: Int, child: SparkPlan) extends UnaryExecNode
   override def outputPartitioning: Partitioning = SinglePartition
   override def executeCollect(): Array[InternalRow] = child.executeTake(limit)
   private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
-  override lazy val metrics = SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
+  private lazy val writeMetrics =
+    SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
+  private lazy val readMetrics =
+    SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
+  override lazy val metrics = readMetrics ++ writeMetrics
   protected override def doExecute(): RDD[InternalRow] = {
     val locallyLimited = child.execute().mapPartitionsInternal(_.take(limit))
     val shuffled = new ShuffledRowRDD(
       ShuffleExchangeExec.prepareShuffleDependency(
-        locallyLimited, child.output, SinglePartition, serializer),
-      metrics)
+        locallyLimited,
+        child.output,
+        SinglePartition,
+        serializer,
+        writeMetrics),
+      readMetrics)
     shuffled.mapPartitionsInternal(_.take(limit))
   }
 }
@@ -154,7 +162,11 @@ case class TakeOrderedAndProjectExec(
 
   private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
 
-  override lazy val metrics = SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
+  private lazy val writeMetrics =
+    SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
+  private lazy val readMetrics =
+    SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
+  override lazy val metrics = readMetrics ++ writeMetrics
 
   protected override def doExecute(): RDD[InternalRow] = {
     val ord = new LazilyGeneratedOrdering(sortOrder, child.output)
@@ -165,8 +177,12 @@ case class TakeOrderedAndProjectExec(
     }
     val shuffled = new ShuffledRowRDD(
       ShuffleExchangeExec.prepareShuffleDependency(
-        localTopK, child.output, SinglePartition, serializer),
-      metrics)
+        localTopK,
+        child.output,
+        SinglePartition,
+        serializer,
+        writeMetrics),
+      readMetrics)
     shuffled.mapPartitions { iter =>
       val topK = org.apache.spark.util.collection.Utils.takeOrdered(iter.map(_.copy()), limit)(ord)
       if (projectList != child.output) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index cbf707f4a9cf..19809b07508d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.execution.metric
 import java.text.NumberFormat
 import java.util.Locale
 
+import scala.concurrent.duration._
+
 import org.apache.spark.SparkContext
 import org.apache.spark.scheduler.AccumulableInfo
 import org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates
@@ -78,6 +80,7 @@ object SQLMetrics {
   private val SUM_METRIC = "sum"
   private val SIZE_METRIC = "size"
   private val TIMING_METRIC = "timing"
+  private val NS_TIMING_METRIC = "nsTiming"
   private val AVERAGE_METRIC = "average"
 
   private val baseForAvgMetric: Int = 10
@@ -121,6 +124,13 @@ object SQLMetrics {
     acc
   }
 
+  def createNanoTimingMetric(sc: SparkContext, name: String): SQLMetric = {
+    // Same with createTimingMetric, just normalize the unit of time to millisecond.
+    val acc = new SQLMetric(NS_TIMING_METRIC, -1)
+    acc.register(sc, name = Some(s"$name total (min, med, max)"), countFailedValues = false)
+    acc
+  }
+
   /**
    * Create a metric to report the average information (including min, med, max) like
    * avg hash probe. As average metrics are double values, this kind of metrics should be
@@ -163,6 +173,8 @@ object SQLMetrics {
         Utils.bytesToString
       } else if (metricsType == TIMING_METRIC) {
         Utils.msDurationToString
+      } else if (metricsType == NS_TIMING_METRIC) {
+        duration => Utils.msDurationToString(duration.nanos.toMillis)
       } else {
         throw new IllegalStateException("unexpected metrics type: " + metricsType)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
index 780f0d762229..ff7941e3b3e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.metric
 
 import org.apache.spark.SparkContext
 import org.apache.spark.executor.TempShuffleReadMetrics
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter
 
 /**
  * A shuffle metrics reporter for SQL exchange operators.
@@ -95,3 +96,57 @@ private[spark] object SQLShuffleMetricsReporter {
     FETCH_WAIT_TIME -> SQLMetrics.createTimingMetric(sc, "fetch wait time"),
     RECORDS_READ -> SQLMetrics.createMetric(sc, "records read"))
 }
+
+/**
+ * A shuffle write metrics reporter for SQL exchange operators.
+ * @param metricsReporter Other reporter need to be updated in this SQLShuffleWriteMetricsReporter.
+ * @param metrics Shuffle write metrics in current SparkPlan.
+ */
+private[spark] class SQLShuffleWriteMetricsReporter(
+    metricsReporter: ShuffleWriteMetricsReporter,
+    metrics: Map[String, SQLMetric]) extends ShuffleWriteMetricsReporter {
+  private[this] val _bytesWritten =
+    metrics(SQLShuffleWriteMetricsReporter.SHUFFLE_BYTES_WRITTEN)
+  private[this] val _recordsWritten =
+    metrics(SQLShuffleWriteMetricsReporter.SHUFFLE_RECORDS_WRITTEN)
+  private[this] val _writeTime =
+    metrics(SQLShuffleWriteMetricsReporter.SHUFFLE_WRITE_TIME)
+
+  override private[spark] def incBytesWritten(v: Long): Unit = {
+    metricsReporter.incBytesWritten(v)
+    _bytesWritten.add(v)
+  }
+  override private[spark] def decRecordsWritten(v: Long): Unit = {
+    metricsReporter.decBytesWritten(v)
+    _recordsWritten.set(_recordsWritten.value - v)
+  }
+  override private[spark] def incRecordsWritten(v: Long): Unit = {
+    metricsReporter.incRecordsWritten(v)
+    _recordsWritten.add(v)
+  }
+  override private[spark] def incWriteTime(v: Long): Unit = {
+    metricsReporter.incWriteTime(v)
+    _writeTime.add(v)
+  }
+  override private[spark] def decBytesWritten(v: Long): Unit = {
+    metricsReporter.decBytesWritten(v)
+    _bytesWritten.set(_bytesWritten.value - v)
+  }
+}
+
+private[spark] object SQLShuffleWriteMetricsReporter {
+  val SHUFFLE_BYTES_WRITTEN = "shuffleBytesWritten"
+  val SHUFFLE_RECORDS_WRITTEN = "shuffleRecordsWritten"
+  val SHUFFLE_WRITE_TIME = "shuffleWriteTime"
+
+  /**
+   * Create all shuffle write relative metrics and return the Map.
+   */
+  def createShuffleWriteMetrics(sc: SparkContext): Map[String, SQLMetric] = Map(
+    SHUFFLE_BYTES_WRITTEN ->
+      SQLMetrics.createSizeMetric(sc, "shuffle bytes written"),
+    SHUFFLE_RECORDS_WRITTEN ->
+      SQLMetrics.createMetric(sc, "shuffle records written"),
+    SHUFFLE_WRITE_TIME ->
+      SQLMetrics.createNanoTimingMetric(sc, "shuffle write time"))
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 0f1d08b6af5d..2251607e76af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -97,7 +97,8 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     val shuffleExpected1 = Map(
       "records read" -> 2L,
       "local blocks fetched" -> 2L,
-      "remote blocks fetched" -> 0L)
+      "remote blocks fetched" -> 0L,
+      "shuffle records written" -> 2L)
     testSparkPlanMetrics(df, 1, Map(
       2L -> (("HashAggregate", expected1(0))),
       1L -> (("Exchange", shuffleExpected1)),
@@ -114,7 +115,8 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     val shuffleExpected2 = Map(
       "records read" -> 4L,
       "local blocks fetched" -> 4L,
-      "remote blocks fetched" -> 0L)
+      "remote blocks fetched" -> 0L,
+      "shuffle records written" -> 4L)
     testSparkPlanMetrics(df2, 1, Map(
       2L -> (("HashAggregate", expected2(0))),
       1L -> (("Exchange", shuffleExpected2)),
@@ -170,6 +172,11 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     val df = testData2.groupBy().agg(collect_set('a)) // 2 partitions
     testSparkPlanMetrics(df, 1, Map(
       2L -> (("ObjectHashAggregate", Map("number of output rows" -> 2L))),
+      1L -> (("Exchange", Map(
+        "shuffle records written" -> 2L,
+        "records read" -> 2L,
+        "local blocks fetched" -> 2L,
+        "remote blocks fetched" -> 0L))),
       0L -> (("ObjectHashAggregate", Map("number of output rows" -> 1L))))
     )
 
@@ -177,6 +184,11 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     val df2 = testData2.groupBy('a).agg(collect_set('a))
     testSparkPlanMetrics(df2, 1, Map(
       2L -> (("ObjectHashAggregate", Map("number of output rows" -> 4L))),
+      1L -> (("Exchange", Map(
+        "shuffle records written" -> 4L,
+        "records read" -> 4L,
+        "local blocks fetched" -> 4L,
+        "remote blocks fetched" -> 0L))),
       0L -> (("ObjectHashAggregate", Map("number of output rows" -> 3L))))
     )
   }
@@ -205,7 +217,8 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
         2L -> (("Exchange", Map(
           "records read" -> 4L,
           "local blocks fetched" -> 2L,
-          "remote blocks fetched" -> 0L))))
+          "remote blocks fetched" -> 0L,
+          "shuffle records written" -> 2L))))
       )
     }
   }
@@ -299,12 +312,25 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       val df1 = Seq((1, "1"), (2, "2")).toDF("key", "value")
       val df2 = (1 to 10).map(i => (i, i.toString)).toSeq.toDF("key", "value")
       // Assume the execution plan is
-      // ... -> ShuffledHashJoin(nodeId = 1) -> Project(nodeId = 0)
+      // Project(nodeId = 0)
+      // +- ShuffledHashJoin(nodeId = 1)
+      // :- Exchange(nodeId = 2)
+      // :  +- Project(nodeId = 3)
+      // :     +- LocalTableScan(nodeId = 4)
+      // +- Exchange(nodeId = 5)
+      // +- Project(nodeId = 6)
+      // +- LocalTableScan(nodeId = 7)
       val df = df1.join(df2, "key")
       testSparkPlanMetrics(df, 1, Map(
         1L -> (("ShuffledHashJoin", Map(
           "number of output rows" -> 2L,
-          "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))))
+          "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))),
+        2L -> (("Exchange", Map(
+          "shuffle records written" -> 2L,
+          "records read" -> 2L))),
+        5L -> (("Exchange", Map(
+          "shuffle records written" -> 10L,
+          "records read" -> 10L))))
       )
     }
   }

From ec506bd30c2ca324c12c9ec811764081c2eb8c42 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Sun, 9 Dec 2018 11:44:16 -0600
Subject: [PATCH 0096/1072] [SPARK-26283][CORE] Enable reading from open frames
 of zstd, when reading zstd compressed eventLog

## What changes were proposed in this pull request?
Root cause: Prior to Spark2.4, When we enable zst for eventLog compression, for inprogress application, It always throws exception in the Application UI, when we open from the history server. But after 2.4 it will display the UI information based on the completed frames in the zstd compressed eventLog. But doesn't read incomplete frames for inprogress application.
In this PR, we have added 'setContinous(true)' for reading input stream from eventLog, so that it can read from open frames also. (By default 'isContinous=false' for zstd inputStream and when we try to read an open frame, it throws truncated error)

## How was this patch tested?
Test steps:
1) Add the configurations in the spark-defaults.conf
   (i) spark.eventLog.compress true
   (ii) spark.io.compression.codec zstd
2) Restart history server
3) bin/spark-shell
4) sc.parallelize(1 to 1000, 1000).count
5) Open app UI from the history server UI

**Before fix**
![screenshot from 2018-12-06 00-01-38](https://user-images.githubusercontent.com/23054875/49537340-bfe28b00-f8ee-11e8-9fca-6d42fdc89e1a.png)

**After fix:**
![screenshot from 2018-12-06 00-34-39](https://user-images.githubusercontent.com/23054875/49537353-ca9d2000-f8ee-11e8-803d-645897b9153b.png)

Closes #23241 from shahidki31/zstdEventLog.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/io/CompressionCodec.scala | 12 ++++++++++++
 .../spark/scheduler/EventLoggingListener.scala       |  2 +-
 .../apache/spark/scheduler/ReplayListenerBus.scala   |  2 --
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 0664c5ac752c..c4f4b18769d2 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -43,6 +43,10 @@ trait CompressionCodec {
   def compressedOutputStream(s: OutputStream): OutputStream
 
   def compressedInputStream(s: InputStream): InputStream
+
+  private[spark] def compressedContinuousInputStream(s: InputStream): InputStream = {
+    compressedInputStream(s)
+  }
 }
 
 private[spark] object CompressionCodec {
@@ -197,4 +201,12 @@ class ZStdCompressionCodec(conf: SparkConf) extends CompressionCodec {
     // avoid overhead excessive of JNI call while trying to uncompress small amount of data.
     new BufferedInputStream(new ZstdInputStream(s), bufferSize)
   }
+
+  override def compressedContinuousInputStream(s: InputStream): InputStream = {
+    // SPARK-26283: Enable reading from open frames of zstd (for eg: zstd compressed eventLog
+    // Reading). By default `isContinuous` is false, and when we try to read from open frames,
+    // `compressedInputStream` method above throws truncated error exception. This method set
+    // `isContinuous` true to allow reading from open frames.
+    new BufferedInputStream(new ZstdInputStream(s).setContinuous(true), bufferSize)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index 5f697fe99258..069a91f1a8fc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -402,7 +402,7 @@ private[spark] object EventLoggingListener extends Logging {
       val codec = codecName(log).map { c =>
         codecMap.getOrElseUpdate(c, CompressionCodec.createCodec(new SparkConf, c))
       }
-      codec.map(_.compressedInputStream(in)).getOrElse(in)
+      codec.map(_.compressedContinuousInputStream(in)).getOrElse(in)
     } catch {
       case e: Throwable =>
         in.close()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 4c6b0c1227b1..226c23733c87 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -118,8 +118,6 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
       case e: HaltReplayException =>
         // Just stop replay.
       case _: EOFException if maybeTruncated =>
-      case _: IOException if maybeTruncated =>
-        logWarning(s"Failed to read Spark event log: $sourceName")
       case ioe: IOException =>
         throw ioe
       case e: Exception =>

From 403c8d5a60b2712561044e320d6f9233ed3172bf Mon Sep 17 00:00:00 2001
From: 10087686 <wang.jiaochun@zte.com.cn>
Date: Sun, 9 Dec 2018 22:44:41 -0800
Subject: [PATCH 0097/1072] [SPARK-26287][CORE] Don't need to create an empty
 spill file when memory has no records

## What changes were proposed in this pull request?
 If there are no records in memory, then we don't need to create an empty temp spill file.

## How was this patch tested?
Existing tests

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23225 from wangjiaochun/ShufflSorter.

Authored-by: 10087686 <wang.jiaochun@zte.com.cn>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/shuffle/sort/ShuffleExternalSorter.java   | 13 +++++++++----
 .../shuffle/sort/UnsafeShuffleWriterSuite.java      |  1 +
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index 6ee9d5f0eec3..dc43215373e1 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -145,6 +145,15 @@ final class ShuffleExternalSorter extends MemoryConsumer {
    */
   private void writeSortedFile(boolean isLastFile) {
 
+    // This call performs the actual sort.
+    final ShuffleInMemorySorter.ShuffleSorterIterator sortedRecords =
+      inMemSorter.getSortedIterator();
+
+    // If there are no sorted records, so we don't need to create an empty spill file.
+    if (!sortedRecords.hasNext()) {
+      return;
+    }
+
     final ShuffleWriteMetricsReporter writeMetricsToUse;
 
     if (isLastFile) {
@@ -157,10 +166,6 @@ private void writeSortedFile(boolean isLastFile) {
       writeMetricsToUse = new ShuffleWriteMetrics();
     }
 
-    // This call performs the actual sort.
-    final ShuffleInMemorySorter.ShuffleSorterIterator sortedRecords =
-      inMemSorter.getSortedIterator();
-
     // Small writes to DiskBlockObjectWriter will be fairly inefficient. Since there doesn't seem to
     // be an API to directly transfer bytes from managed memory to the disk writer, we buffer
     // data through a byte array. This array does not need to be large enough to hold a single
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 30ad3f557554..aa5082f1ac7f 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -235,6 +235,7 @@ public void writeEmptyIterator() throws Exception {
     final Option<MapStatus> mapStatus = writer.stop(true);
     assertTrue(mapStatus.isDefined());
     assertTrue(mergedOutputFile.exists());
+    assertEquals(0, spillFilesCreated.size());
     assertArrayEquals(new long[NUM_PARTITITONS], partitionSizesInMergedFile);
     assertEquals(0, taskMetrics.shuffleWriteMetrics().recordsWritten());
     assertEquals(0, taskMetrics.shuffleWriteMetrics().bytesWritten());

From 3bc83de3cce86a06c275c86b547a99afd781761f Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 10 Dec 2018 14:57:20 +0800
Subject: [PATCH 0098/1072] [SPARK-26307][SQL] Fix CTAS when INSERT a
 partitioned table using Hive serde

## What changes were proposed in this pull request?

This is a  Spark 2.3 regression introduced in https://github.com/apache/spark/pull/20521. We should add the partition info for InsertIntoHiveTable in CreateHiveTableAsSelectCommand. Otherwise, we will hit the following error by running the newly added test case:

```
[info] - CTAS: INSERT a partitioned table using Hive serde *** FAILED *** (829 milliseconds)
[info]   org.apache.spark.SparkException: Requested partitioning does not match the tab1 table:
[info] Requested partitions:
[info] Table partitions: part
[info]   at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:179)
[info]   at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:107)
```

## How was this patch tested?

Added a test case.

Closes #23255 from gatorsmile/fixCTAS.

Authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/CreateHiveTableAsSelectCommand.scala    |  4 +++-
 .../scala/org/apache/spark/sql/hive/InsertSuite.scala | 11 +++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 630bea5161f1..fd1e931ee0c7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -57,9 +57,11 @@ case class CreateHiveTableAsSelectCommand(
         return Seq.empty
       }
 
+      // For CTAS, there is no static partition values to insert.
+      val partition = tableDesc.partitionColumnNames.map(_ -> None).toMap
       InsertIntoHiveTable(
         tableDesc,
-        Map.empty,
+        partition,
         query,
         overwrite = false,
         ifPartitionNotExists = false,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 5879748d05b2..510de3a7eab5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -752,6 +752,17 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
     }
   }
 
+  test("SPARK-26307: CTAS - INSERT a partitioned table using Hive serde") {
+    withTable("tab1") {
+      withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+        val df = Seq(("a", 100)).toDF("part", "id")
+        df.write.format("hive").partitionBy("part").mode("overwrite").saveAsTable("tab1")
+        df.write.format("hive").partitionBy("part").mode("append").saveAsTable("tab1")
+      }
+    }
+  }
+
+
   Seq("LOCAL", "").foreach { local =>
     Seq(true, false).foreach { caseSensitivity =>
       Seq("orc", "parquet").foreach { format =>

From c8ac6ae84c8a3cc7ed787155a84cbeb56c78a048 Mon Sep 17 00:00:00 2001
From: Darcy Shen <sadhen@zoho.com>
Date: Mon, 10 Dec 2018 22:26:28 +0800
Subject: [PATCH 0099/1072] [SPARK-26319][SQL][TEST] Add appendReadColumns Unit
 Test for HiveShimSuite

## What changes were proposed in this pull request?

Add appendReadColumns Unit Test for HiveShimSuite.

## How was this patch tested?
```
$ build/sbt
> project hive
> testOnly *HiveShimSuite
```

Closes #23268 from sadhen/refactor/hiveshim.

Authored-by: Darcy Shen <sadhen@zoho.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../apache/spark/sql/hive/HiveShimSuite.scala | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
new file mode 100644
index 000000000000..a716f739b5c2
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.hive
+
+import scala.collection.JavaConverters._
+import scala.language.implicitConversions
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
+
+import org.apache.spark.SparkFunSuite
+
+class HiveShimSuite extends SparkFunSuite {
+
+  test("appendReadColumns") {
+    val conf = new Configuration
+    val ids = Seq(1, 2, 3).map(Int.box)
+    val names = Seq("a", "b", "c")
+    val moreIds = Seq(4, 5).map(Int.box)
+    val moreNames = Seq("d", "e")
+
+    // test when READ_COLUMN_NAMES_CONF_STR is empty
+    HiveShim.appendReadColumns(conf, ids, names)
+    assert(names.asJava === ColumnProjectionUtils.getReadColumnNames(conf))
+
+    // test when READ_COLUMN_NAMES_CONF_STR is non-empty
+    HiveShim.appendReadColumns(conf, moreIds, moreNames)
+    assert((names ++ moreNames).asJava === ColumnProjectionUtils.getReadColumnNames(conf))
+  }
+}

From 42e8c381b15dd48c2f00c088c897ebdd25405aef Mon Sep 17 00:00:00 2001
From: 10087686 <wang.jiaochun@zte.com.cn>
Date: Mon, 10 Dec 2018 22:28:26 +0800
Subject: [PATCH 0100/1072] [SPARK-26286][TEST] Add MAXIMUM_PAGE_SIZE_BYTES
 exception bound unit test

## What changes were proposed in this pull request?
Add MAXIMUM_PAGE_SIZE_BYTES Exception test

## How was this patch tested?
Existing tests

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23226 from wangjiaochun/BytesToBytesMapSuite.

Authored-by: 10087686 <wang.jiaochun@zte.com.cn>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../unsafe/map/AbstractBytesToBytesMapSuite.java      | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index aa29232e73e1..a11cd535b547 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -622,6 +622,17 @@ public void initialCapacityBoundsChecking() {
     } catch (IllegalArgumentException e) {
       // expected exception
     }
+
+    try {
+      new BytesToBytesMap(
+        taskMemoryManager,
+        1,
+        TaskMemoryManager.MAXIMUM_PAGE_SIZE_BYTES + 1);
+      Assert.fail("Expected IllegalArgumentException to be thrown");
+    } catch (IllegalArgumentException e) {
+      // expected exception
+    }
+
   }
 
   @Test

From 9794923272c26ee5ba760a57718a368c33d09f04 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Mon, 10 Dec 2018 22:37:17 +0800
Subject: [PATCH 0101/1072] [MINOR][DOC] Update the condition description of
 serialized shuffle

## What changes were proposed in this pull request?
`1. The shuffle dependency specifies no aggregation or output ordering.`
If the shuffle dependency specifies aggregation, but it only aggregates at the reduce-side, serialized shuffle can still be used.
`3. The shuffle produces fewer than 16777216 output partitions.`
If the number of output partitions is 16777216 , we can use serialized shuffle.

We can see this mothod: `canUseSerializedShuffle`
## How was this patch tested?
N/A

Closes #23228 from 10110346/SerializedShuffle_doc.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/shuffle/sort/SortShuffleManager.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index b51a843a31c3..b59fa8e8a3cc 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -33,10 +33,10 @@ import org.apache.spark.shuffle._
  * Sort-based shuffle has two different write paths for producing its map output files:
  *
  *  - Serialized sorting: used when all three of the following conditions hold:
- *    1. The shuffle dependency specifies no aggregation or output ordering.
+ *    1. The shuffle dependency specifies no map-side combine.
  *    2. The shuffle serializer supports relocation of serialized values (this is currently
  *       supported by KryoSerializer and Spark SQL's custom serializers).
- *    3. The shuffle produces fewer than 16777216 output partitions.
+ *    3. The shuffle produces fewer than or equal to 16777216 output partitions.
  *  - Deserialized sorting: used to handle all other cases.
  *
  * -----------------------

From 0bf6c77141e40cc636351c5e77194bb75144bb12 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 10 Dec 2018 10:27:04 -0600
Subject: [PATCH 0102/1072] This tests pushing to gitbox


From b1a724b468d5c1c4aee2a22ffc6d8edac537c3d6 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 10 Dec 2018 11:11:06 -0600
Subject: [PATCH 0103/1072] This tests pushing directly to github


From 90c77ea3132d0b7a12c316bd42fb8d0f59bee253 Mon Sep 17 00:00:00 2001
From: Reza Safi <rezasafi@cloudera.com>
Date: Mon, 10 Dec 2018 11:14:11 -0600
Subject: [PATCH 0104/1072] [SPARK-24958][CORE] Add memory from procfs to
 executor metrics.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds the entire memory used by spark’s executor (as measured by procfs) to the executor metrics.  The memory usage is collected from the entire process tree under the executor.  The metrics are subdivided into memory used by java, by python, and by other processes, to aid users in diagnosing the source of high memory usage.
The additional metrics are sent to the driver in heartbeats, using the mechanism introduced by SPARK-23429.  This also slightly extends that approach to allow one ExecutorMetricType to collect multiple metrics.

Added unit tests and also tested on a live cluster.

Closes #22612 from rezasafi/ptreememory2.

Authored-by: Reza Safi <rezasafi@cloudera.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../scala/org/apache/spark/Heartbeater.scala  |  11 +-
 .../spark/executor/ExecutorMetrics.scala      |  23 +-
 .../spark/executor/ProcfsMetricsGetter.scala  | 228 ++++++++++++++++++
 .../spark/internal/config/package.scala       |   5 +
 .../spark/metrics/ExecutorMetricType.scala    |  74 +++++-
 .../org/apache/spark/status/api/v1/api.scala  |   6 +-
 .../org/apache/spark/util/JsonProtocol.scala  |  16 +-
 .../application_list_json_expectation.json    |  15 ++
 .../completed_app_list_json_expectation.json  |  15 ++
 ...ith_executor_metrics_json_expectation.json |  40 ++-
 ...process_tree_metrics_json_expectation.json |  98 ++++++++
 .../limit_app_list_json_expectation.json      |  30 +--
 .../minDate_app_list_json_expectation.json    |  15 ++
 .../minEndDate_app_list_json_expectation.json |  15 ++
 .../test/resources/ProcfsMetrics/22763/stat   |   1 +
 .../test/resources/ProcfsMetrics/26109/stat   |   1 +
 .../application_1538416563558_0014            | 190 +++++++++++++++
 .../deploy/history/HistoryServerSuite.scala   |   3 +
 .../executor/ProcfsMetricsGetterSuite.scala   |  41 ++++
 .../scheduler/EventLoggingListenerSuite.scala |  85 ++++---
 .../spark/status/AppStatusListenerSuite.scala |  74 ++++--
 .../apache/spark/util/JsonProtocolSuite.scala |  46 ++--
 dev/.rat-excludes                             |   2 +
 23 files changed, 901 insertions(+), 133 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
 create mode 100644 core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
 create mode 100644 core/src/test/resources/ProcfsMetrics/22763/stat
 create mode 100644 core/src/test/resources/ProcfsMetrics/26109/stat
 create mode 100644 core/src/test/resources/spark-events/application_1538416563558_0014
 create mode 100644 core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/Heartbeater.scala b/core/src/main/scala/org/apache/spark/Heartbeater.scala
index 84091eef0430..1012755e068d 100644
--- a/core/src/main/scala/org/apache/spark/Heartbeater.scala
+++ b/core/src/main/scala/org/apache/spark/Heartbeater.scala
@@ -61,10 +61,17 @@ private[spark] class Heartbeater(
 
   /**
    * Get the current executor level metrics. These are returned as an array, with the index
-   * determined by ExecutorMetricType.values
+   * determined by ExecutorMetricType.metricToOffset
    */
   def getCurrentMetrics(): ExecutorMetrics = {
-    val metrics = ExecutorMetricType.values.map(_.getMetricValue(memoryManager)).toArray
+
+    val metrics = new Array[Long](ExecutorMetricType.numMetrics)
+    var offset = 0
+    ExecutorMetricType.metricGetters.foreach { metric =>
+      val newMetrics = metric.getMetricValues(memoryManager)
+      Array.copy(newMetrics, 0, metrics, offset, newMetrics.size)
+      offset += newMetrics.length
+    }
     new ExecutorMetrics(metrics)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
index 1befd27de1cb..f19ac813fde3 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
@@ -27,17 +27,15 @@ import org.apache.spark.metrics.ExecutorMetricType
  */
 @DeveloperApi
 class ExecutorMetrics private[spark] extends Serializable {
-
-  // Metrics are indexed by ExecutorMetricType.values
-  private val metrics = new Array[Long](ExecutorMetricType.values.length)
-
+  // Metrics are indexed by ExecutorMetricType.metricToOffset
+  private val metrics = new Array[Long](ExecutorMetricType.numMetrics)
   // the first element is initialized to -1, indicating that the values for the array
   // haven't been set yet.
   metrics(0) = -1
 
-  /** Returns the value for the specified metricType. */
-  def getMetricValue(metricType: ExecutorMetricType): Long = {
-    metrics(ExecutorMetricType.metricIdxMap(metricType))
+  /** Returns the value for the specified metric. */
+  def getMetricValue(metricName: String): Long = {
+    metrics(ExecutorMetricType.metricToOffset(metricName))
   }
 
   /** Returns true if the values for the metrics have been set, false otherwise. */
@@ -49,14 +47,14 @@ class ExecutorMetrics private[spark] extends Serializable {
   }
 
   /**
-   * Constructor: create the ExecutorMetrics with the values specified.
+   * Constructor: create the ExecutorMetrics with using a given map.
    *
    * @param executorMetrics map of executor metric name to value
    */
   private[spark] def this(executorMetrics: Map[String, Long]) {
     this()
-    (0 until ExecutorMetricType.values.length).foreach { idx =>
-      metrics(idx) = executorMetrics.getOrElse(ExecutorMetricType.values(idx).name, 0L)
+    ExecutorMetricType.metricToOffset.foreach { case(name, idx) =>
+      metrics(idx) = executorMetrics.getOrElse(name, 0L)
     }
   }
 
@@ -69,9 +67,8 @@ class ExecutorMetrics private[spark] extends Serializable {
    */
   private[spark] def compareAndUpdatePeakValues(executorMetrics: ExecutorMetrics): Boolean = {
     var updated = false
-
-    (0 until ExecutorMetricType.values.length).foreach { idx =>
-       if (executorMetrics.metrics(idx) > metrics(idx)) {
+    (0 until ExecutorMetricType.numMetrics).foreach { idx =>
+      if (executorMetrics.metrics(idx) > metrics(idx)) {
         updated = true
         metrics(idx) = executorMetrics.metrics(idx)
       }
diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
new file mode 100644
index 000000000000..af67f41e94af
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.executor
+
+import java.io._
+import java.nio.charset.Charset
+import java.nio.file.{Files, Paths}
+import java.util.Locale
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.util.Try
+
+import org.apache.spark.{SparkEnv, SparkException}
+import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.util.Utils
+
+
+private[spark] case class ProcfsMetrics(
+    jvmVmemTotal: Long,
+    jvmRSSTotal: Long,
+    pythonVmemTotal: Long,
+    pythonRSSTotal: Long,
+    otherVmemTotal: Long,
+    otherRSSTotal: Long)
+
+// Some of the ideas here are taken from the ProcfsBasedProcessTree class in hadoop
+// project.
+private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends Logging {
+  private val procfsStatFile = "stat"
+  private val testing = sys.env.contains("SPARK_TESTING") || sys.props.contains("spark.testing")
+  private val pageSize = computePageSize()
+  private var isAvailable: Boolean = isProcfsAvailable
+  private val pid = computePid()
+
+  private lazy val isProcfsAvailable: Boolean = {
+    if (testing) {
+       true
+    }
+    else {
+      val procDirExists = Try(Files.exists(Paths.get(procfsDir))).recover {
+        case ioe: IOException =>
+          logWarning("Exception checking for procfs dir", ioe)
+          false
+      }
+      val shouldLogStageExecutorMetrics =
+        SparkEnv.get.conf.get(config.EVENT_LOG_STAGE_EXECUTOR_METRICS)
+      val shouldLogStageExecutorProcessTreeMetrics =
+        SparkEnv.get.conf.get(config.EVENT_LOG_PROCESS_TREE_METRICS)
+      procDirExists.get && shouldLogStageExecutorProcessTreeMetrics && shouldLogStageExecutorMetrics
+    }
+  }
+
+  private def computePid(): Int = {
+    if (!isAvailable || testing) {
+      return -1;
+    }
+    try {
+      // This can be simplified in java9:
+      // https://docs.oracle.com/javase/9/docs/api/java/lang/ProcessHandle.html
+      val cmd = Array("bash", "-c", "echo $PPID")
+      val out = Utils.executeAndGetOutput(cmd)
+      Integer.parseInt(out.split("\n")(0))
+    }
+    catch {
+      case e: SparkException =>
+        logWarning("Exception when trying to compute process tree." +
+          " As a result reporting of ProcessTree metrics is stopped", e)
+        isAvailable = false
+        -1
+    }
+  }
+
+  private def computePageSize(): Long = {
+    if (testing) {
+      return 4096;
+    }
+    try {
+      val cmd = Array("getconf", "PAGESIZE")
+      val out = Utils.executeAndGetOutput(cmd)
+      Integer.parseInt(out.split("\n")(0))
+    } catch {
+      case e: Exception =>
+        logWarning("Exception when trying to compute pagesize, as a" +
+          " result reporting of ProcessTree metrics is stopped")
+        isAvailable = false
+        0
+    }
+  }
+
+  private def computeProcessTree(): Set[Int] = {
+    if (!isAvailable || testing) {
+      return Set()
+    }
+    var ptree: Set[Int] = Set()
+    ptree += pid
+    val queue = mutable.Queue.empty[Int]
+    queue += pid
+    while ( !queue.isEmpty ) {
+      val p = queue.dequeue()
+      val c = getChildPids(p)
+      if (!c.isEmpty) {
+        queue ++= c
+        ptree ++= c.toSet
+      }
+    }
+    ptree
+  }
+
+  private def getChildPids(pid: Int): ArrayBuffer[Int] = {
+    try {
+      val builder = new ProcessBuilder("pgrep", "-P", pid.toString)
+      val process = builder.start()
+      val childPidsInInt = mutable.ArrayBuffer.empty[Int]
+      def appendChildPid(s: String): Unit = {
+        if (s != "") {
+          logTrace("Found a child pid:" + s)
+          childPidsInInt += Integer.parseInt(s)
+        }
+      }
+      val stdoutThread = Utils.processStreamByLine("read stdout for pgrep",
+        process.getInputStream, appendChildPid)
+      val errorStringBuilder = new StringBuilder()
+      val stdErrThread = Utils.processStreamByLine(
+        "stderr for pgrep",
+        process.getErrorStream,
+        line => errorStringBuilder.append(line))
+      val exitCode = process.waitFor()
+      stdoutThread.join()
+      stdErrThread.join()
+      val errorString = errorStringBuilder.toString()
+      // pgrep will have exit code of 1 if there are more than one child process
+      // and it will have a exit code of 2 if there is no child process
+      if (exitCode != 0 && exitCode > 2) {
+        val cmd = builder.command().toArray.mkString(" ")
+        logWarning(s"Process $cmd exited with code $exitCode and stderr: $errorString")
+        throw new SparkException(s"Process $cmd exited with code $exitCode")
+      }
+      childPidsInInt
+    } catch {
+      case e: Exception =>
+        logWarning("Exception when trying to compute process tree." +
+          " As a result reporting of ProcessTree metrics is stopped.", e)
+        isAvailable = false
+        mutable.ArrayBuffer.empty[Int]
+    }
+  }
+
+  def addProcfsMetricsFromOneProcess(
+      allMetrics: ProcfsMetrics,
+      pid: Int): ProcfsMetrics = {
+
+    // The computation of RSS and Vmem are based on proc(5):
+    // http://man7.org/linux/man-pages/man5/proc.5.html
+    try {
+      val pidDir = new File(procfsDir, pid.toString)
+      def openReader(): BufferedReader = {
+        val f = new File(new File(procfsDir, pid.toString), procfsStatFile)
+        new BufferedReader(new InputStreamReader(new FileInputStream(f), Charset.forName("UTF-8")))
+      }
+      Utils.tryWithResource(openReader) { in =>
+        val procInfo = in.readLine
+        val procInfoSplit = procInfo.split(" ")
+        val vmem = procInfoSplit(22).toLong
+        val rssMem = procInfoSplit(23).toLong * pageSize
+        if (procInfoSplit(1).toLowerCase(Locale.US).contains("java")) {
+          allMetrics.copy(
+            jvmVmemTotal = allMetrics.jvmVmemTotal + vmem,
+            jvmRSSTotal = allMetrics.jvmRSSTotal + (rssMem)
+          )
+        }
+        else if (procInfoSplit(1).toLowerCase(Locale.US).contains("python")) {
+          allMetrics.copy(
+            pythonVmemTotal = allMetrics.pythonVmemTotal + vmem,
+            pythonRSSTotal = allMetrics.pythonRSSTotal + (rssMem)
+          )
+        }
+        else {
+          allMetrics.copy(
+            otherVmemTotal = allMetrics.otherVmemTotal + vmem,
+            otherRSSTotal = allMetrics.otherRSSTotal + (rssMem)
+          )
+        }
+      }
+    } catch {
+      case f: IOException =>
+        logWarning("There was a problem with reading" +
+          " the stat file of the process. ", f)
+        ProcfsMetrics(0, 0, 0, 0, 0, 0)
+    }
+  }
+
+  private[spark] def computeAllMetrics(): ProcfsMetrics = {
+    if (!isAvailable) {
+      return ProcfsMetrics(0, 0, 0, 0, 0, 0)
+    }
+    val pids = computeProcessTree
+    var allMetrics = ProcfsMetrics(0, 0, 0, 0, 0, 0)
+    for (p <- pids) {
+      allMetrics = addProcfsMetricsFromOneProcess(allMetrics, p)
+      // if we had an error getting any of the metrics, we don't want to report partial metrics, as
+      // that would be misleading.
+      if (!isAvailable) {
+        return ProcfsMetrics(0, 0, 0, 0, 0, 0)
+      }
+    }
+    allMetrics
+  }
+}
+
+private[spark] object ProcfsMetricsGetter {
+  final val pTreeInfo = new ProcfsMetricsGetter
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 646b3881a79b..85bb557abef5 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -93,6 +93,11 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val EVENT_LOG_PROCESS_TREE_METRICS =
+    ConfigBuilder("spark.eventLog.logStageExecutorProcessTreeMetrics.enabled")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EVENT_LOG_OVERWRITE =
     ConfigBuilder("spark.eventLog.overwrite").booleanConf.createWithDefault(false)
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
index cd10dad25e87..704b36d3118b 100644
--- a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
@@ -19,25 +19,43 @@ package org.apache.spark.metrics
 import java.lang.management.{BufferPoolMXBean, ManagementFactory}
 import javax.management.ObjectName
 
+import scala.collection.mutable
+
+import org.apache.spark.executor.ProcfsMetricsGetter
 import org.apache.spark.memory.MemoryManager
 
 /**
  * Executor metric types for executor-level metrics stored in ExecutorMetrics.
  */
 sealed trait ExecutorMetricType {
+  private[spark] def getMetricValues(memoryManager: MemoryManager): Array[Long]
+  private[spark] def names: Seq[String]
+}
+
+sealed trait SingleValueExecutorMetricType extends ExecutorMetricType {
+  override private[spark] def names = {
+    Seq(getClass().getName().
+      stripSuffix("$").split("""\.""").last)
+  }
+
+  override private[spark] def getMetricValues(memoryManager: MemoryManager): Array[Long] = {
+    val metrics = new Array[Long](1)
+    metrics(0) = getMetricValue(memoryManager)
+    metrics
+  }
+
   private[spark] def getMetricValue(memoryManager: MemoryManager): Long
-  private[spark] val name = getClass().getName().stripSuffix("$").split("""\.""").last
 }
 
 private[spark] abstract class MemoryManagerExecutorMetricType(
-    f: MemoryManager => Long) extends ExecutorMetricType {
+    f: MemoryManager => Long) extends SingleValueExecutorMetricType {
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
     f(memoryManager)
   }
 }
 
 private[spark] abstract class MBeanExecutorMetricType(mBeanName: String)
-  extends ExecutorMetricType {
+  extends SingleValueExecutorMetricType {
   private val bean = ManagementFactory.newPlatformMXBeanProxy(
     ManagementFactory.getPlatformMBeanServer,
     new ObjectName(mBeanName).toString, classOf[BufferPoolMXBean])
@@ -47,18 +65,40 @@ private[spark] abstract class MBeanExecutorMetricType(mBeanName: String)
   }
 }
 
-case object JVMHeapMemory extends ExecutorMetricType {
+case object JVMHeapMemory extends SingleValueExecutorMetricType {
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
     ManagementFactory.getMemoryMXBean.getHeapMemoryUsage().getUsed()
   }
 }
 
-case object JVMOffHeapMemory extends ExecutorMetricType {
+case object JVMOffHeapMemory extends SingleValueExecutorMetricType {
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
     ManagementFactory.getMemoryMXBean.getNonHeapMemoryUsage().getUsed()
   }
 }
 
+case object ProcessTreeMetrics extends ExecutorMetricType {
+  override val names = Seq(
+    "ProcessTreeJVMVMemory",
+    "ProcessTreeJVMRSSMemory",
+    "ProcessTreePythonVMemory",
+    "ProcessTreePythonRSSMemory",
+    "ProcessTreeOtherVMemory",
+    "ProcessTreeOtherRSSMemory")
+
+  override private[spark] def getMetricValues(memoryManager: MemoryManager): Array[Long] = {
+    val allMetrics = ProcfsMetricsGetter.pTreeInfo.computeAllMetrics()
+    val processTreeMetrics = new Array[Long](names.length)
+    processTreeMetrics(0) = allMetrics.jvmVmemTotal
+    processTreeMetrics(1) = allMetrics.jvmRSSTotal
+    processTreeMetrics(2) = allMetrics.pythonVmemTotal
+    processTreeMetrics(3) = allMetrics.pythonRSSTotal
+    processTreeMetrics(4) = allMetrics.otherVmemTotal
+    processTreeMetrics(5) = allMetrics.otherRSSTotal
+    processTreeMetrics
+  }
+}
+
 case object OnHeapExecutionMemory extends MemoryManagerExecutorMetricType(
   _.onHeapExecutionMemoryUsed)
 
@@ -84,8 +124,9 @@ case object MappedPoolMemory extends MBeanExecutorMetricType(
   "java.nio:type=BufferPool,name=mapped")
 
 private[spark] object ExecutorMetricType {
-  // List of all executor metric types
-  val values = IndexedSeq(
+
+  // List of all executor metric getters
+  val metricGetters = IndexedSeq(
     JVMHeapMemory,
     JVMOffHeapMemory,
     OnHeapExecutionMemory,
@@ -95,10 +136,21 @@ private[spark] object ExecutorMetricType {
     OnHeapUnifiedMemory,
     OffHeapUnifiedMemory,
     DirectPoolMemory,
-    MappedPoolMemory
+    MappedPoolMemory,
+    ProcessTreeMetrics
   )
 
-  // Map of executor metric type to its index in values.
-  val metricIdxMap =
-    Map[ExecutorMetricType, Int](ExecutorMetricType.values.zipWithIndex: _*)
+
+  val (metricToOffset, numMetrics) = {
+    var numberOfMetrics = 0
+    val definedMetricsAndOffset = mutable.LinkedHashMap.empty[String, Int]
+    metricGetters.foreach { m =>
+      var metricInSet = 0
+      (0 until m.names.length).foreach { idx =>
+        definedMetricsAndOffset += (m.names(idx) -> (idx + numberOfMetrics))
+      }
+      numberOfMetrics += m.names.length
+    }
+    (definedMetricsAndOffset, numberOfMetrics)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index aa21da2b66ab..c7d3cd37db6f 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -133,9 +133,9 @@ private[spark] class ExecutorMetricsJsonSerializer
       jsonGenerator: JsonGenerator,
       serializerProvider: SerializerProvider): Unit = {
     metrics.foreach { m: ExecutorMetrics =>
-      val metricsMap = ExecutorMetricType.values.map { metricType =>
-            metricType.name -> m.getMetricValue(metricType)
-      }.toMap
+      val metricsMap = ExecutorMetricType.metricToOffset.map { case (metric, _) =>
+        metric -> m.getMetricValue(metric)
+      }
       jsonGenerator.writeObject(metricsMap)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 0cd8612b8fd1..348291fe5e7a 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -394,10 +394,10 @@ private[spark] object JsonProtocol {
 
   /** Convert executor metrics to JSON. */
   def executorMetricsToJson(executorMetrics: ExecutorMetrics): JValue = {
-    val metrics = ExecutorMetricType.values.map{ metricType =>
-      JField(metricType.name, executorMetrics.getMetricValue(metricType))
-     }
-    JObject(metrics: _*)
+    val metrics = ExecutorMetricType.metricToOffset.map { case (m, _) =>
+      JField(m, executorMetrics.getMetricValue(m))
+    }
+    JObject(metrics.toSeq: _*)
   }
 
   def taskEndReasonToJson(taskEndReason: TaskEndReason): JValue = {
@@ -611,10 +611,10 @@ private[spark] object JsonProtocol {
   /** Extract the executor metrics from JSON. */
   def executorMetricsFromJson(json: JValue): ExecutorMetrics = {
     val metrics =
-      ExecutorMetricType.values.map { metric =>
-        metric.name -> jsonOption(json \ metric.name).map(_.extract[Long]).getOrElse(0L)
-      }.toMap
-    new ExecutorMetrics(metrics)
+      ExecutorMetricType.metricToOffset.map { case (metric, _) =>
+        metric -> jsonOption(json \ metric).map(_.extract[Long]).getOrElse(0L)
+      }
+    new ExecutorMetrics(metrics.toMap)
   }
 
   def taskEndFromJson(json: JValue): SparkListenerTaskEnd = {
diff --git a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
index eea6f595efd2..0f0ccf9858a3 100644
--- a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
@@ -1,4 +1,19 @@
 [ {
+  "id" : "application_1538416563558_0014",
+  "name" : "PythonBisectingKMeansExample",
+  "attempts" : [ {
+    "startTime" : "2018-10-02T00:42:39.580GMT",
+    "endTime" : "2018-10-02T00:44:02.338GMT",
+    "lastUpdated" : "",
+    "duration" : 82758,
+    "sparkUser" : "root",
+    "completed" : true,
+    "appSparkVersion" : "2.5.0-SNAPSHOT",
+    "lastUpdatedEpoch" : 0,
+    "startTimeEpoch" : 1538440959580,
+    "endTimeEpoch" : 1538441042338
+  } ]
+}, {
   "id" : "application_1506645932520_24630151",
   "name" : "Spark shell",
   "attempts" : [ {
diff --git a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
index 7bc7f31be097..e136a35a1e3a 100644
--- a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
@@ -1,4 +1,19 @@
 [ {
+  "id" : "application_1538416563558_0014",
+  "name" : "PythonBisectingKMeansExample",
+  "attempts" : [ {
+    "startTime" : "2018-10-02T00:42:39.580GMT",
+    "endTime" : "2018-10-02T00:44:02.338GMT",
+    "lastUpdated" : "",
+    "duration" : 82758,
+    "sparkUser" : "root",
+    "completed" : true,
+    "appSparkVersion" : "2.5.0-SNAPSHOT",
+    "lastUpdatedEpoch" : 0,
+    "startTimeEpoch" : 1538440959580,
+    "endTimeEpoch" : 1538441042338
+  } ]
+}, {
   "id" : "application_1506645932520_24630151",
   "name" : "Spark shell",
   "attempts" : [ {
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
index 9bf2086cc8e7..75674778dd1f 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
@@ -37,7 +37,13 @@
     "DirectPoolMemory" : 397602,
     "MappedPoolMemory" : 0,
     "JVMHeapMemory" : 629553808,
-    "OffHeapStorageMemory" : 0
+    "OffHeapStorageMemory" : 0,
+    "ProcessTreeJVMVMemory": 0,
+    "ProcessTreeJVMRSSMemory": 0,
+    "ProcessTreePythonVMemory": 0,
+    "ProcessTreePythonRSSMemory": 0,
+    "ProcessTreeOtherVMemory": 0,
+    "ProcessTreeOtherRSSMemory": 0
   }
 }, {
   "id" : "7",
@@ -177,7 +183,13 @@
     "DirectPoolMemory" : 126261,
     "MappedPoolMemory" : 0,
     "JVMHeapMemory" : 518613056,
-    "OffHeapStorageMemory" : 0
+    "OffHeapStorageMemory" : 0,
+    "ProcessTreeJVMVMemory": 0,
+    "ProcessTreeJVMRSSMemory": 0,
+    "ProcessTreePythonVMemory": 0,
+    "ProcessTreePythonRSSMemory": 0,
+    "ProcessTreeOtherVMemory": 0,
+    "ProcessTreeOtherRSSMemory": 0
   }
 }, {
   "id" : "3",
@@ -221,7 +233,13 @@
     "DirectPoolMemory" : 87796,
     "MappedPoolMemory" : 0,
     "JVMHeapMemory" : 726805712,
-    "OffHeapStorageMemory" : 0
+    "OffHeapStorageMemory" : 0,
+    "ProcessTreeJVMVMemory": 0,
+    "ProcessTreeJVMRSSMemory": 0,
+    "ProcessTreePythonVMemory": 0,
+    "ProcessTreePythonRSSMemory": 0,
+    "ProcessTreeOtherVMemory": 0,
+    "ProcessTreeOtherRSSMemory": 0
   }
 }, {
   "id" : "2",
@@ -265,7 +283,13 @@
     "DirectPoolMemory" : 87796,
     "MappedPoolMemory" : 0,
     "JVMHeapMemory" : 595946552,
-    "OffHeapStorageMemory" : 0
+    "OffHeapStorageMemory" : 0,
+    "ProcessTreeJVMVMemory": 0,
+    "ProcessTreeJVMRSSMemory": 0,
+    "ProcessTreePythonVMemory": 0,
+    "ProcessTreePythonRSSMemory": 0,
+    "ProcessTreeOtherVMemory": 0,
+    "ProcessTreeOtherRSSMemory": 0
   }
 }, {
   "id" : "1",
@@ -309,6 +333,12 @@
     "DirectPoolMemory" : 98230,
     "MappedPoolMemory" : 0,
     "JVMHeapMemory" : 755008624,
-    "OffHeapStorageMemory" : 0
+    "OffHeapStorageMemory" : 0,
+    "ProcessTreeJVMVMemory": 0,
+    "ProcessTreeJVMRSSMemory": 0,
+    "ProcessTreePythonVMemory": 0,
+    "ProcessTreePythonRSSMemory": 0,
+    "ProcessTreeOtherVMemory": 0,
+    "ProcessTreeOtherRSSMemory": 0
   }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
new file mode 100644
index 000000000000..69efefe736dd
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
@@ -0,0 +1,98 @@
+[ {
+  "id" : "driver",
+  "hostPort" : "rezamemory-1.gce.something.com:43959",
+  "isActive" : true,
+  "rddBlocks" : 0,
+  "memoryUsed" : 0,
+  "diskUsed" : 0,
+  "totalCores" : 0,
+  "maxTasks" : 0,
+  "activeTasks" : 0,
+  "failedTasks" : 0,
+  "completedTasks" : 0,
+  "totalTasks" : 0,
+  "totalDuration" : 0,
+  "totalGCTime" : 0,
+  "totalInputBytes" : 0,
+  "totalShuffleRead" : 0,
+  "totalShuffleWrite" : 0,
+  "isBlacklisted" : false,
+  "maxMemory" : 384093388,
+  "addTime" : "2018-10-02T00:42:47.690GMT",
+  "executorLogs" : { },
+  "memoryMetrics" : {
+    "usedOnHeapStorageMemory" : 0,
+    "usedOffHeapStorageMemory" : 0,
+    "totalOnHeapStorageMemory" : 384093388,
+    "totalOffHeapStorageMemory" : 0
+  },
+  "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "OnHeapStorageMemory" : 554933,
+    "JVMOffHeapMemory" : 104976128,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapUnifiedMemory" : 554933,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 228407,
+    "MappedPoolMemory" : 0,
+    "JVMHeapMemory" : 350990264,
+    "OffHeapStorageMemory" : 0,
+    "ProcessTreeJVMVMemory" : 5067235328,
+    "ProcessTreeJVMRSSMemory" : 710475776,
+    "ProcessTreePythonVMemory" : 408375296,
+    "ProcessTreePythonRSSMemory" : 40284160,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0
+  }
+}, {
+  "id" : "9",
+  "hostPort" : "rezamemory-2.gce.something.com:40797",
+  "isActive" : true,
+  "rddBlocks" : 0,
+  "memoryUsed" : 0,
+  "diskUsed" : 0,
+  "totalCores" : 1,
+  "maxTasks" : 1,
+  "activeTasks" : 0,
+  "failedTasks" : 0,
+  "completedTasks" : 2,
+  "totalTasks" : 2,
+  "totalDuration" : 6191,
+  "totalGCTime" : 288,
+  "totalInputBytes" : 108,
+  "totalShuffleRead" : 0,
+  "totalShuffleWrite" : 0,
+  "isBlacklisted" : false,
+  "maxMemory" : 384093388,
+  "addTime" : "2018-10-02T00:43:56.142GMT",
+  "executorLogs" : {
+    "stdout" : "http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000010/root/stdout?start=-4096",
+    "stderr" : "http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000010/root/stderr?start=-4096"
+  },
+  "memoryMetrics" : {
+    "usedOnHeapStorageMemory" : 0,
+    "usedOffHeapStorageMemory" : 0,
+    "totalOnHeapStorageMemory" : 384093388,
+    "totalOffHeapStorageMemory" : 0
+  },
+  "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "OnHeapStorageMemory" : 1088805,
+    "JVMOffHeapMemory" : 59006656,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapUnifiedMemory" : 1088805,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 20181,
+    "MappedPoolMemory" : 0,
+    "JVMHeapMemory" : 193766856,
+    "OffHeapStorageMemory" : 0,
+    "ProcessTreeJVMVMemory" : 3016261632,
+    "ProcessTreeJVMRSSMemory" : 405860352,
+    "ProcessTreePythonVMemory" : 625926144,
+    "ProcessTreePythonRSSMemory" : 69013504,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0
+  }
+} ]
diff --git a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
index 9e1e65a35881..0ef9377dcb08 100644
--- a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
@@ -1,4 +1,19 @@
 [ {
+  "id" : "application_1538416563558_0014",
+  "name" : "PythonBisectingKMeansExample",
+  "attempts" : [ {
+    "startTime" : "2018-10-02T00:42:39.580GMT",
+    "endTime" : "2018-10-02T00:44:02.338GMT",
+    "lastUpdated" : "",
+    "duration" : 82758,
+    "sparkUser" : "root",
+    "completed" : true,
+    "appSparkVersion" : "2.5.0-SNAPSHOT",
+    "lastUpdatedEpoch" : 0,
+    "startTimeEpoch" : 1538440959580,
+    "endTimeEpoch" : 1538441042338
+  } ]
+}, {
   "id" : "application_1506645932520_24630151",
   "name" : "Spark shell",
   "attempts" : [ {
@@ -28,19 +43,4 @@
     "startTimeEpoch" : 1516300235119,
     "endTimeEpoch" : 1516300707938
   } ]
-}, {
-  "id" : "app-20180109111548-0000",
-  "name" : "Spark shell",
-  "attempts" : [ {
-    "startTime" : "2018-01-09T10:15:42.372GMT",
-    "endTime" : "2018-01-09T10:24:37.606GMT",
-    "lastUpdated" : "",
-    "duration" : 535234,
-    "sparkUser" : "attilapiros",
-    "completed" : true,
-    "appSparkVersion" : "2.3.0-SNAPSHOT",
-    "lastUpdatedEpoch" : 0,
-    "startTimeEpoch" : 1515492942372,
-    "endTimeEpoch" : 1515493477606
-  } ]
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
index 28c6bf1b3e01..ea9dc1b97afc 100644
--- a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
@@ -1,4 +1,19 @@
 [ {
+  "id" : "application_1538416563558_0014",
+  "name" : "PythonBisectingKMeansExample",
+  "attempts" : [ {
+    "startTime" : "2018-10-02T00:42:39.580GMT",
+    "endTime" : "2018-10-02T00:44:02.338GMT",
+    "lastUpdated" : "",
+    "duration" : 82758,
+    "sparkUser" : "root",
+    "completed" : true,
+    "appSparkVersion" : "2.5.0-SNAPSHOT",
+    "lastUpdatedEpoch" : 0,
+    "startTimeEpoch" : 1538440959580,
+    "endTimeEpoch" : 1538441042338
+  } ]
+}, {
   "id" : "application_1506645932520_24630151",
   "name" : "Spark shell",
   "attempts" : [ {
diff --git a/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json
index f547b79f47e1..2a77071a9ffd 100644
--- a/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json
@@ -1,4 +1,19 @@
 [ {
+  "id" : "application_1538416563558_0014",
+  "name" : "PythonBisectingKMeansExample",
+  "attempts" : [ {
+    "startTime" : "2018-10-02T00:42:39.580GMT",
+    "endTime" : "2018-10-02T00:44:02.338GMT",
+    "lastUpdated" : "",
+    "duration" : 82758,
+    "sparkUser" : "root",
+    "completed" : true,
+    "appSparkVersion" : "2.5.0-SNAPSHOT",
+    "lastUpdatedEpoch" : 0,
+    "startTimeEpoch" : 1538440959580,
+    "endTimeEpoch" : 1538441042338
+  } ]
+}, {
   "id" : "application_1506645932520_24630151",
   "name" : "Spark shell",
   "attempts" : [ {
diff --git a/core/src/test/resources/ProcfsMetrics/22763/stat b/core/src/test/resources/ProcfsMetrics/22763/stat
new file mode 100644
index 000000000000..cea4b713d0ee
--- /dev/null
+++ b/core/src/test/resources/ProcfsMetrics/22763/stat
@@ -0,0 +1 @@
+22763 (python2.7) S 22756 22756 7051 0 -1 1077944384 449 0 0 0 4 3 0 0 20 0 3 0 117445 360595456 1912 18446744073709551615 4194304 4196756 140726192435536 140726192432528 140707465485051 0 0 16781312 2 18446744073709551615 0 0 17 1 0 0 0 0 0 6294976 6295604 38744064 140726192440006 140726192440119 140726192440119 140726192443369 0
\ No newline at end of file
diff --git a/core/src/test/resources/ProcfsMetrics/26109/stat b/core/src/test/resources/ProcfsMetrics/26109/stat
new file mode 100644
index 000000000000..ae46bfabd047
--- /dev/null
+++ b/core/src/test/resources/ProcfsMetrics/26109/stat
@@ -0,0 +1 @@
+26109 (java) S 1 26107 5788 0 -1 1077944320 75354 0 0 0 572 52 0 0 20 0 34 0 4355257 4769947648 64114 18446744073709551615 4194304 4196468 140737190381776 140737190364320 139976994791319 0 0 0 16800975 18446744073709551615 0 0 17 2 0 0 0 0 0 6293624 6294260 11276288 140737190385424 140737190414250 140737190414250 140737190416335 0
diff --git a/core/src/test/resources/spark-events/application_1538416563558_0014 b/core/src/test/resources/spark-events/application_1538416563558_0014
new file mode 100644
index 000000000000..000288dbc454
--- /dev/null
+++ b/core/src/test/resources/spark-events/application_1538416563558_0014
@@ -0,0 +1,190 @@
+{"Event":"SparkListenerLogStart","Spark Version":"2.5.0-SNAPSHOT"}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"rezamemory-1.gce.something.com","Port":43959},"Maximum Memory":384093388,"Timestamp":1538440967690,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/usr/java/jdk1.8.0_121/jre","Java Version":"1.8.0_121 (Oracle Corporation)","Scala Version":"version 2.11.12"},"Spark Properties":{"spark.serializer":"org.apache.spark.serializer.KryoSerializer","spark.yarn.jars":"local:/opt/some/path/lib/spark2/jars/*","spark.driver.host":"rezamemory-1.gce.something.com","spark.serializer.objectStreamReset":"100","spark.eventLog.enabled":"true","spark.executor.heartbeatInterval":"100ms","spark.hadoop.mapreduce.application.classpath":"","spark.driver.port":"35918","spark.shuffle.service.enabled":"true","spark.rdd.compress":"True","spark.driver.extraLibraryPath":"/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native","spark.executorEnv.PYTHONPATH":"/opt/some/path/lib/spark2/python/lib/py4j-0.10.7-src.zip<CPS>/opt/some/path/lib/spark2/python/lib/pyspark.zip","spark.yarn.historyServer.address":"http://rezamemory-1.gce.something.com:18089","spark.app.name":"PythonBisectingKMeansExample","spark.ui.killEnabled":"true","spark.sql.hive.metastore.jars":"${env:HADOOP_COMMON_HOME}/../hive/lib/*:${env:HADOOP_COMMON_HOME}/client/*","spark.dynamicAllocation.schedulerBacklogTimeout":"1","spark.yarn.am.extraLibraryPath":"/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native","spark.scheduler.mode":"FIFO","spark.eventLog.logStageExecutorMetrics.enabled":"true","spark.yarn.config.gatewayPath":"/opt/cloudera/parcels","spark.executor.id":"driver","spark.yarn.config.replacementPath":"{{HADOOP_COMMON_HOME}}/../../..","spark.eventLog.logStageExecutorProcessTreeMetrics.enabled":"true","spark.submit.deployMode":"client","spark.shuffle.service.port":"7337","spark.master":"yarn","spark.authenticate":"false","spark.ui.filters":"org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter","spark.executor.extraLibraryPath":"/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native","spark.eventLog.dir":"hdfs://rezamemory-1.gce.something.com:8020/user/spark/spark2ApplicationHistory","spark.dynamicAllocation.enabled":"true","spark.sql.catalogImplementation":"hive","spark.hadoop.yarn.application.classpath":"","spark.driver.appUIAddress":"http://rezamemory-1.gce.something.com:4040","spark.yarn.isPython":"true","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS":"rezamemory-1.gce.something.com","spark.dynamicAllocation.minExecutors":"0","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES":"http://rezamemory-1.gce.something.com:8088/proxy/application_1538416563558_0014","spark.dynamicAllocation.executorIdleTimeout":"60","spark.app.id":"application_1538416563558_0014","spark.sql.hive.metastore.version":"1.1.0"},"System Properties":{"java.io.tmpdir":"/tmp","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/root","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","sun.arch.data.model":"64","sun.boot.library.path":"/usr/java/jdk1.8.0_121/jre/lib/amd64","user.dir":"/","java.library.path":":/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native:/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib","sun.cpu.isalist":"","os.arch":"amd64","java.vm.version":"25.121-b13","jetty.git.hash":"unknown","java.endorsed.dirs":"/usr/java/jdk1.8.0_121/jre/lib/endorsed","java.runtime.version":"1.8.0_121-b13","java.vm.info":"mixed mode","java.ext.dirs":"/usr/java/jdk1.8.0_121/jre/lib/ext:/usr/java/packages/lib/ext","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/usr/java/jdk1.8.0_121/jre/lib/resources.jar:/usr/java/jdk1.8.0_121/jre/lib/rt.jar:/usr/java/jdk1.8.0_121/jre/lib/sunrsasign.jar:/usr/java/jdk1.8.0_121/jre/lib/jsse.jar:/usr/java/jdk1.8.0_121/jre/lib/jce.jar:/usr/java/jdk1.8.0_121/jre/lib/charsets.jar:/usr/java/jdk1.8.0_121/jre/lib/jfr.jar:/usr/java/jdk1.8.0_121/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Los_Angeles","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"3.10.0-693.5.2.el7.x86_64","sun.os.patch.level":"unknown","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","user.language":"en","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.print.PSPrinterJob","java.awt.graphicsenv":"sun.awt.X11GraphicsEnvironment","awt.toolkit":"sun.awt.X11.XToolkit","os.name":"Linux","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"root","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --conf spark.executor.heartbeatInterval=100ms --conf spark.eventLog.logStageExecutorProcessTreeMetrics.enabled=true --conf spark.eventLog.logStageExecutorMetrics.enabled=true ./opt/some/path/lib/spark2/examples/src/main/python/mllib/bisecting_k_means_example.py","java.home":"/usr/java/jdk1.8.0_121/jre","java.version":"1.8.0_121","sun.io.unicode.encoding":"UnicodeLittle"},"Classpath Entries":{"/opt/some/path/lib/spark2/jars/apacheds-kerberos-codec-2.0.0-M15.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/netty-3.10.5.Final.jar":"System Classpath","/opt/some/path/lib/spark2/jars/validation-api-1.1.0.Final.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-annotations-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-azure-datalake-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-jaxrs-1.9.13.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jaxb-impl-2.2.3-1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jasper-compiler-5.5.23.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/logredactor-1.0.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-streaming_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jersey-common-2.22.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jersey-container-servlet-core-2.22.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-collections-3.2.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/guice-servlet-3.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-hadoop-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/RoaringBitmap-0.5.11.jar":"System Classpath","/opt/some/path/lib/spark2/jars/parquet-hadoop-1.10.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/parquet-jackson-1.10.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jersey-server-2.22.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hk2-api-2.4.0-b34.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jtransforms-2.4.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/aircompressor-0.10.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-el-1.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/avro-mapred-1.8.2-hadoop2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/minlog-1.3.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-daemon-1.0.13.jar":"System Classpath","/opt/some/path/lib/spark2/jars/kryo-shaded-4.0.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-crypto-1.0.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-mllib-local_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-openstack-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-lang3-3.5.jar":"System Classpath","/opt/some/path/lib/spark2/jars/univocity-parsers-2.7.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/javassist-3.18.1-GA.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-api-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-registry-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/activation-1.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/objenesis-2.5.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/aopalliance-1.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jackson-xc-1.8.8.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-applications-unmanaged-am-launcher-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-repl_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hue-plugins-3.9.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-digester-1.8.jar":"System Classpath","/opt/some/path/lib/spark2/jars/json4s-ast_2.11-3.5.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-server-resourcemanager-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-math3-3.1.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/activation-1.1.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-server-tests-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/snappy-java-1.0.4.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/microsoft-windowsazure-storage-sdk-0.6.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/slf4j-log4j12-1.7.16.jar":"System Classpath","/opt/some/path/lib/spark2/kafka-0.9/metrics-core-2.2.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jersey-json-1.9.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-kvstore_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/parquet-common-1.10.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/protobuf-java-2.5.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jersey-guice-1.9.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-archives-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-archive-logs-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jetty-6.1.26.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-examples-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/snappy-java-1.1.7.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-server-web-proxy-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-httpclient-3.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/orc-mapreduce-1.5.2-nohive.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jsch-0.1.42.jar":"System Classpath","/opt/some/path/lib/spark2/jars/metrics-jvm-3.1.5.jar":"System Classpath","/opt/some/path/lib/spark2/jars/javax.annotation-api-1.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/pyrolite-4.13.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-jackson-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/json4s-jackson_2.11-3.5.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jersey-client-2.22.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jline-2.11.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-hdfs-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-scala_2.10-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/kafka-0.9/spark-streaming-kafka-0-8_2.11-2.2.0.cloudera1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jsp-api-2.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jaxb-api-2.2.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-server-nodemanager-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-logging-1.1.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-core-asl-1.9.13.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-compiler-3.0.10.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-generator-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/parquet-format-2.4.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-mapper-asl-1.9.13.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-core-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/curator-framework-2.7.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jersey-server-1.9.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-common-2.6.0-cdh5.12.0-tests.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jetty-6.1.26.cloudera.4.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/mockito-all-1.8.5.jar":"System Classpath","/opt/some/path/lib/spark2/jars/aopalliance-repackaged-2.4.0-b34.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jackson-core-2.2.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/leveldbjni-all-1.8.jar":"System Classpath","/opt/some/path/lib/spark2/jars/osgi-resource-locator-1.0.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jsp-api-2.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-unsafe_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/oro-2.0.8.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-hs-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-common-2.7.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-databind-2.6.7.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-codec-1.10.jar":"System Classpath","/opt/some/path/lib/spark2/jars/xmlenc-0.52.jar":"System Classpath","/opt/some/path/lib/spark2/jars/opencsv-2.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/xbean-asm6-shaded-4.8.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/javax.inject-1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/parquet-encoding-1.10.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-common-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/scala-library-2.11.12.jar":"System Classpath","/opt/some/path/lib/spark2/jars/json4s-scalap_2.11-3.5.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/log4j-1.2.17.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jaxb-api-2.2.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/LICENSE.txt":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-common-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/netty-3.9.9.Final.jar":"System Classpath","/opt/some/path/lib/spark2/jars/json4s-core_2.11-3.5.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-yarn-api-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/httpcore-4.2.5.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jettison-1.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/zookeeper-3.4.6.jar":"System Classpath","/opt/some/path/lib/spark2/jars/metrics-core-3.1.5.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-auth-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jersey-core-1.9.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-network-shuffle_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-beanutils-core-1.8.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hk2-utils-2.4.0-b34.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-beanutils-1.9.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/chill_2.11-0.9.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-core-2.6.7.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/apacheds-i18n-2.0.0-M15.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jul-to-slf4j-1.7.16.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/paranamer-2.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jersey-container-servlet-2.22.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/janino-3.0.10.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jetty-util-6.1.26.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-common-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-beanutils-core-1.8.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/py4j-0.10.7.jar":"System Classpath","/opt/some/path/lib/spark2/jars/ivy-2.4.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/apacheds-kerberos-codec-2.0.0-M15.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-lang-2.6.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-format-2.1.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-client-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/stream-2.7.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-hdfs-2.6.0-cdh5.12.0-tests.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/xml-apis-1.3.04.jar":"System Classpath","/opt/some/path/lib/spark2/kafka-0.9/kafka_2.11-0.9.0-kafka-2.0.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/metrics-core-3.0.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-server-applicationhistoryservice-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/conf/":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/guice-servlet-3.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/slf4j-api-1.7.5.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-configuration-1.6.jar":"System Classpath","/opt/some/path/lib/spark2/jars/xz-1.5.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-tools-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-yarn-server-common-2.7.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/arrow-format-0.10.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/okio-1.4.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/compress-lzf-1.0.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-mapreduce-client-jobclient-2.7.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hppc-0.7.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/stax-api-1.0-2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-yarn_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/api-util-1.0.0-M20.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-applications-distributedshell-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/joda-time-2.9.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-sls-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jets3t-0.9.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/curator-recipes-2.7.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/leveldbjni-all-1.8.jar":"System Classpath","/opt/some/path/lib/spark2/jars/guice-3.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-streaming-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/guava-14.0.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hamcrest-core-1.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/aws-java-sdk-bundle-1.11.134.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-client-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-hs-plugins-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-gridmix-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/api-util-1.0.0-M20.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/xz-1.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-pig-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jersey-guava-2.22.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/scala-compiler-2.11.12.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-sql_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-mapreduce-client-app-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/java-xmlbuilder-0.4.jar":"System Classpath","/opt/some/path/lib/spark2/jars/slf4j-api-1.7.16.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-hadoop-bundle-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-mapreduce-client-shuffle-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-pig-bundle-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-digester-1.8.jar":"System Classpath","/opt/some/path/lib/spark2/jars/metrics-json-3.1.5.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-codec-1.4.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-beanutils-1.7.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-catalyst_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/scala-xml_2.11-1.0.5.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-common-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/scala-parser-combinators_2.11-1.1.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jetty-util-6.1.26.cloudera.4.jar":"System Classpath","/opt/some/path/lib/spark2/jars/httpclient-4.5.6.jar":"System Classpath","/opt/some/path/lib/spark2/jars/antlr4-runtime-4.7.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-lang-2.6.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-mllib_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jersey-media-jaxb-2.22.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/api-asn1-api-1.0.0-M20.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-app-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/machinist_2.11-0.6.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-core_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spire_2.11-0.13.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-xc-1.9.13.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-thrift-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/htrace-core-3.1.0-incubating.jar":"System Classpath","/opt/some/path/lib/spark2/jars/macro-compat_2.11-1.1.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-annotations-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-io-2.4.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jackson-annotations-2.2.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/orc-core-1.5.2-nohive.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-net-3.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/arrow-memory-0.10.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/shapeless_2.11-2.3.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-graphx_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jackson-core-asl-1.8.8.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/okhttp-2.4.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-format-2.1.0-cdh5.12.0-sources.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/htrace-core4-4.0.1-incubating.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-datajoin-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-module-paranamer-2.7.9.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-aws-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/scala-reflect-2.11.12.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-net-3.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jackson-databind-2.2.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/parquet-column-1.10.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/xmlenc-0.52.jar":"System Classpath","/opt/some/path/lib/spark2/kafka-0.9/kafka-clients-0.9.0-kafka-2.0.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-io-2.4.jar":"System Classpath","/opt/some/path/lib/spark2/jars/lz4-java-1.4.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/core-1.1.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/arrow-vector-0.10.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/javax.ws.rs-api-2.0.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-azure-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-format-2.1.0-cdh5.12.0-javadoc.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-nfs-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-yarn-client-2.7.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/breeze_2.11-0.13.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-yarn-server-common-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/httpcore-4.4.10.jar":"System Classpath","/opt/some/path/lib/spark2/jars/javax.servlet-api-3.1.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/javax.inject-2.4.0-b34.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-mapreduce-client-core-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/servlet-api-2.5.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-yarn-common-2.7.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-math3-3.4.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/javax.inject-1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jackson-jaxrs-1.8.8.jar":"System Classpath","/opt/some/path/lib/spark2/jars/curator-recipes-2.7.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/spark-1.6.0-cdh5.12.0-yarn-shuffle.jar":"System Classpath","/opt/some/path/lib/spark2/jars/breeze-macros_2.11-0.13.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/zookeeper-3.4.5-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/httpclient-4.2.5.jar":"System Classpath","/opt/some/path/lib/spark2/jars/metrics-graphite-3.1.5.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jcl-over-slf4j-1.7.16.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-compress-1.4.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-sketch_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-network-common_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/gson-2.2.4.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-cascading-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-auth-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/orc-shims-1.5.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/log4j-1.2.17.jar":"System Classpath","/opt/some/path/lib/spark2/jars/stax-api-1.0-2.jar":"System Classpath","/opt/some/path/lib/spark2/kafka-0.9/zkclient-0.7.jar":"System Classpath","/opt/some/path/lib/spark2/jars/paranamer-2.8.jar":"System Classpath","/opt/some/path/lib/spark2/jars/apacheds-i18n-2.0.0-M15.jar":"System Classpath","/opt/some/path/lib/spark2/jars/gson-2.2.4.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-tags_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-configuration-1.6.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-hdfs-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/guice-3.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jsr305-1.3.9.jar":"System Classpath","/opt/some/path/lib/spark2/jars/curator-client-2.7.1.jar":"System Classpath","/opt/some/path/lib/spark2/conf/yarn-conf/":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/api-asn1-api-1.0.0-M20.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/azure-data-lake-store-sdk-2.1.4.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-distcp-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/junit-4.11.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-extras-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/xercesImpl-2.9.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hk2-locator-2.4.0-b34.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jasper-runtime-5.5.23.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/curator-client-2.7.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/avro-1.8.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-compress-1.8.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jsr305-3.0.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-collections-3.2.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/guava-11.0.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/asm-3.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/avro-1.7.6-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-httpclient-3.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jersey-client-1.9.jar":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-yarn-server-web-proxy-2.7.3.jar":"System Classpath","/opt/some/path/lib/spark2/jars/zstd-jni-1.3.2-2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/commons-cli-1.2.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-scrooge_2.10-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/commons-cli-1.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spire-macros_2.11-0.13.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-ant-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/spark-launcher_2.11-2.5.0-SNAPSHOT.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-nativetask-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/xercesImpl-2.9.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-module-scala_2.11-2.6.7.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-hdfs-nfs-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-encoding-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-shuffle-2.6.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-avro-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/flatbuffers-1.2.0-3f79e055.jar":"System Classpath","/opt/some/path/lib/spark2/jars/protobuf-java-2.5.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-test-hadoop2-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.12.0-tests.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/aopalliance-1.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-column-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/slf4j-log4j12-1.7.5.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/parquet-protobuf-1.5.0-cdh5.12.0.jar":"System Classpath","/opt/some/path/lib/spark2/jars/avro-ipc-1.8.2.jar":"System Classpath","/opt/some/path/lib/spark2/jars/arpack_combined_all-0.1.jar":"System Classpath","/opt/some/path/lib/spark2/jars/netty-all-4.1.17.Final.jar":"System Classpath","/opt/some/path/lib/spark2/jars/chill-java-0.9.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/NOTICE.txt":"System Classpath","/opt/some/path/lib/spark2/jars/hadoop-mapreduce-client-common-2.7.3.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/jackson-mapper-asl-1.8.8.jar":"System Classpath","/opt/some/path/lib/spark2/jars/jackson-annotations-2.6.7.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/curator-framework-2.7.1.jar":"System Classpath","/opt/cloudera/parcels/CDH-5.12.0-1.cdh5.12.0.p0.29/jars/hadoop-rumen-2.6.0-cdh5.12.0.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"PythonBisectingKMeansExample","App ID":"application_1538416563558_0014","Timestamp":1538440959580,"User":"root"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Submission Time":1538440969009,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"first at BisectingKMeans.scala:163","Number of Tasks":1,"RDD Info":[{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"2\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:163","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1377)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:163)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]}],"Stage IDs":[0],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"3\",\"name\":\"first\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"first at BisectingKMeans.scala:163","Number of Tasks":1,"RDD Info":[{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"2\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:163","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1377)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:163)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440969044,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"3\",\"name\":\"first\"}"}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538440973727,"Executor ID":"1","Executor Info":{"Host":"rezamemory-2.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000002/root/stdout?start=-4096","stderr":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000002/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1538440973735,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Port":46411},"Maximum Memory":384093388,"Timestamp":1538440973890,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1538440973735,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440977628,"Failed":false,"Killed":false,"Accumulables":[{"ID":23,"Name":"internal.metrics.input.recordsRead","Update":4,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":22,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":72,"Internal":true,"Count Failed Values":true},{"ID":7,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":6,"Name":"internal.metrics.jvmGCTime","Update":208,"Value":208,"Internal":true,"Count Failed Values":true},{"ID":5,"Name":"internal.metrics.resultSize","Update":1448,"Value":1448,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.executorCpuTime","Update":1105071149,"Value":1105071149,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorRunTime","Update":2307,"Value":2307,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorDeserializeCpuTime","Update":651096062,"Value":651096062,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeTime","Update":1322,"Value":1322,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1322,"Executor Deserialize CPU Time":651096062,"Executor Run Time":2307,"Executor CPU Time":1105071149,"Result Size":1448,"JVM GC Time":208,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":72,"Records Read":4},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":0,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":256071440,"JVMOffHeapMemory":92211424,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":333371,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":333371,"OffHeapUnifiedMemory":0,"DirectPoolMemory":134726,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":4926242816,"ProcessTreeJVMRSSMemory":525656064,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"1","Stage ID":0,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":182536928,"JVMOffHeapMemory":58263224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1086483,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1086483,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20304,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3009855488,"ProcessTreeJVMRSSMemory":404488192,"ProcessTreePythonVMemory":626200576,"ProcessTreePythonRSSMemory":69218304,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"first at BisectingKMeans.scala:163","Number of Tasks":1,"RDD Info":[{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"2\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:163","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1377)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:163)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440969044,"Completion Time":1538440977644,"Accumulables":[{"ID":23,"Name":"internal.metrics.input.recordsRead","Value":4,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorDeserializeCpuTime","Value":651096062,"Internal":true,"Count Failed Values":true},{"ID":5,"Name":"internal.metrics.resultSize","Value":1448,"Internal":true,"Count Failed Values":true},{"ID":22,"Name":"internal.metrics.input.bytesRead","Value":72,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.executorCpuTime","Value":1105071149,"Internal":true,"Count Failed Values":true},{"ID":7,"Name":"internal.metrics.resultSerializationTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeTime","Value":1322,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorRunTime","Value":2307,"Internal":true,"Count Failed Values":true},{"ID":6,"Name":"internal.metrics.jvmGCTime","Value":208,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1538440977650,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":1,"Submission Time":1538440977784,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at BisectingKMeans.scala:170","Number of Tasks":2,"RDD Info":[{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:370)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:170)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":10,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"ShuffledRDD","Scope":"{\"id\":\"13\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[1],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:171)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]}],"Stage IDs":[1,2],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"15\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at BisectingKMeans.scala:170","Number of Tasks":2,"RDD Info":[{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:370)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:170)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440977793,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"15\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":0,"Attempt":0,"Launch Time":1538440977816,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":1,"Attempt":0,"Launch Time":1538440978659,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":0,"Attempt":0,"Launch Time":1538440977816,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440978683,"Failed":false,"Killed":false,"Accumulables":[{"ID":48,"Name":"internal.metrics.input.recordsRead","Update":8,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":47,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":72,"Internal":true,"Count Failed Values":true},{"ID":46,"Name":"internal.metrics.shuffle.write.writeTime","Update":13535058,"Value":13535058,"Internal":true,"Count Failed Values":true},{"ID":45,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":178,"Value":178,"Internal":true,"Count Failed Values":true},{"ID":35,"Name":"internal.metrics.peakExecutionMemory","Update":1088,"Value":1088,"Internal":true,"Count Failed Values":true},{"ID":34,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":33,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":30,"Name":"internal.metrics.resultSize","Update":1662,"Value":1662,"Internal":true,"Count Failed Values":true},{"ID":29,"Name":"internal.metrics.executorCpuTime","Update":202227536,"Value":202227536,"Internal":true,"Count Failed Values":true},{"ID":28,"Name":"internal.metrics.executorRunTime","Update":705,"Value":705,"Internal":true,"Count Failed Values":true},{"ID":27,"Name":"internal.metrics.executorDeserializeCpuTime","Update":65694833,"Value":65694833,"Internal":true,"Count Failed Values":true},{"ID":26,"Name":"internal.metrics.executorDeserializeTime","Update":119,"Value":119,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":119,"Executor Deserialize CPU Time":65694833,"Executor Run Time":705,"Executor CPU Time":202227536,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":178,"Shuffle Write Time":13535058,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":72,"Records Read":8},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":1,"Attempt":0,"Launch Time":1538440978659,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440978820,"Failed":false,"Killed":false,"Accumulables":[{"ID":48,"Name":"internal.metrics.input.recordsRead","Update":4,"Value":12,"Internal":true,"Count Failed Values":true},{"ID":47,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":144,"Internal":true,"Count Failed Values":true},{"ID":46,"Name":"internal.metrics.shuffle.write.writeTime","Update":289555,"Value":13824613,"Internal":true,"Count Failed Values":true},{"ID":45,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":178,"Value":356,"Internal":true,"Count Failed Values":true},{"ID":35,"Name":"internal.metrics.peakExecutionMemory","Update":1088,"Value":2176,"Internal":true,"Count Failed Values":true},{"ID":34,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":33,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":30,"Name":"internal.metrics.resultSize","Update":1662,"Value":3324,"Internal":true,"Count Failed Values":true},{"ID":29,"Name":"internal.metrics.executorCpuTime","Update":36560031,"Value":238787567,"Internal":true,"Count Failed Values":true},{"ID":28,"Name":"internal.metrics.executorRunTime","Update":120,"Value":825,"Internal":true,"Count Failed Values":true},{"ID":27,"Name":"internal.metrics.executorDeserializeCpuTime","Update":7042587,"Value":72737420,"Internal":true,"Count Failed Values":true},{"ID":26,"Name":"internal.metrics.executorDeserializeTime","Update":8,"Value":127,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":8,"Executor Deserialize CPU Time":7042587,"Executor Run Time":120,"Executor CPU Time":36560031,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":178,"Shuffle Write Time":289555,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":72,"Records Read":4},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":1,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":292935952,"JVMOffHeapMemory":95141200,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":351534,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":351534,"OffHeapUnifiedMemory":0,"DirectPoolMemory":135031,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":4929392640,"ProcessTreeJVMRSSMemory":539996160,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"1","Stage ID":1,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":215586960,"JVMOffHeapMemory":60718904,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1492038,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1492038,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20637,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3014057984,"ProcessTreeJVMRSSMemory":422723584,"ProcessTreePythonVMemory":958914560,"ProcessTreePythonRSSMemory":106622976,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at BisectingKMeans.scala:170","Number of Tasks":2,"RDD Info":[{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:370)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:170)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440977793,"Completion Time":1538440978821,"Accumulables":[{"ID":26,"Name":"internal.metrics.executorDeserializeTime","Value":127,"Internal":true,"Count Failed Values":true},{"ID":35,"Name":"internal.metrics.peakExecutionMemory","Value":2176,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":356,"Internal":true,"Count Failed Values":true},{"ID":29,"Name":"internal.metrics.executorCpuTime","Value":238787567,"Internal":true,"Count Failed Values":true},{"ID":47,"Name":"internal.metrics.input.bytesRead","Value":144,"Internal":true,"Count Failed Values":true},{"ID":46,"Name":"internal.metrics.shuffle.write.writeTime","Value":13824613,"Internal":true,"Count Failed Values":true},{"ID":34,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":28,"Name":"internal.metrics.executorRunTime","Value":825,"Internal":true,"Count Failed Values":true},{"ID":45,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":2,"Internal":true,"Count Failed Values":true},{"ID":27,"Name":"internal.metrics.executorDeserializeCpuTime","Value":72737420,"Internal":true,"Count Failed Values":true},{"ID":48,"Name":"internal.metrics.input.recordsRead","Value":12,"Internal":true,"Count Failed Values":true},{"ID":30,"Name":"internal.metrics.resultSize","Value":3324,"Internal":true,"Count Failed Values":true},{"ID":33,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":10,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"ShuffledRDD","Scope":"{\"id\":\"13\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[1],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:171)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440978830,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"15\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":1,"Attempt":0,"Launch Time":1538440978844,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":0,"Attempt":0,"Launch Time":1538440979033,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":1,"Attempt":0,"Launch Time":1538440978844,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979050,"Failed":false,"Killed":false,"Accumulables":[{"ID":68,"Name":"internal.metrics.shuffle.read.recordsRead","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":356,"Value":356,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":64,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":63,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":62,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":60,"Name":"internal.metrics.peakExecutionMemory","Update":992,"Value":992,"Internal":true,"Count Failed Values":true},{"ID":59,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":58,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":55,"Name":"internal.metrics.resultSize","Update":1828,"Value":1828,"Internal":true,"Count Failed Values":true},{"ID":54,"Name":"internal.metrics.executorCpuTime","Update":88389028,"Value":88389028,"Internal":true,"Count Failed Values":true},{"ID":53,"Name":"internal.metrics.executorRunTime","Update":122,"Value":122,"Internal":true,"Count Failed Values":true},{"ID":52,"Name":"internal.metrics.executorDeserializeCpuTime","Update":27126551,"Value":27126551,"Internal":true,"Count Failed Values":true},{"ID":51,"Name":"internal.metrics.executorDeserializeTime","Update":45,"Value":45,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":45,"Executor Deserialize CPU Time":27126551,"Executor Run Time":122,"Executor CPU Time":88389028,"Result Size":1828,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":2,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":356,"Total Records Read":2},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":0,"Attempt":0,"Launch Time":1538440979033,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979084,"Failed":false,"Killed":false,"Accumulables":[{"ID":68,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":356,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":64,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":63,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":62,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":60,"Name":"internal.metrics.peakExecutionMemory","Update":0,"Value":992,"Internal":true,"Count Failed Values":true},{"ID":59,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":58,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":55,"Name":"internal.metrics.resultSize","Update":1706,"Value":3534,"Internal":true,"Count Failed Values":true},{"ID":54,"Name":"internal.metrics.executorCpuTime","Update":15055355,"Value":103444383,"Internal":true,"Count Failed Values":true},{"ID":53,"Name":"internal.metrics.executorRunTime","Update":26,"Value":148,"Internal":true,"Count Failed Values":true},{"ID":52,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4722422,"Value":31848973,"Internal":true,"Count Failed Values":true},{"ID":51,"Name":"internal.metrics.executorDeserializeTime","Update":5,"Value":50,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":5,"Executor Deserialize CPU Time":4722422,"Executor Run Time":26,"Executor CPU Time":15055355,"Result Size":1706,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":2,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":303792496,"JVMOffHeapMemory":95545824,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":371127,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":371127,"OffHeapUnifiedMemory":0,"DirectPoolMemory":135031,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":4931497984,"ProcessTreeJVMRSSMemory":549777408,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"1","Stage ID":2,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":227393200,"JVMOffHeapMemory":61799392,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":463135,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":463135,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20637,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3016163328,"ProcessTreeJVMRSSMemory":436539392,"ProcessTreePythonVMemory":958914560,"ProcessTreePythonRSSMemory":106622976,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":10,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"ShuffledRDD","Scope":"{\"id\":\"13\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[1],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:171)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440978830,"Completion Time":1538440979086,"Accumulables":[{"ID":68,"Name":"internal.metrics.shuffle.read.recordsRead","Value":2,"Internal":true,"Count Failed Values":true},{"ID":59,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":53,"Name":"internal.metrics.executorRunTime","Value":148,"Internal":true,"Count Failed Values":true},{"ID":62,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":55,"Name":"internal.metrics.resultSize","Value":3534,"Internal":true,"Count Failed Values":true},{"ID":64,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":58,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":52,"Name":"internal.metrics.executorDeserializeCpuTime","Value":31848973,"Internal":true,"Count Failed Values":true},{"ID":60,"Name":"internal.metrics.peakExecutionMemory","Value":992,"Internal":true,"Count Failed Values":true},{"ID":54,"Name":"internal.metrics.executorCpuTime","Value":103444383,"Internal":true,"Count Failed Values":true},{"ID":63,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":2,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":356,"Internal":true,"Count Failed Values":true},{"ID":51,"Name":"internal.metrics.executorDeserializeTime","Value":50,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":1,"Completion Time":1538440979087,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":2,"Submission Time":1538440979161,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":12,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"25\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[11],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"24\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"27\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"ShuffledRDD","Scope":"{\"id\":\"26\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[3],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]}],"Stage IDs":[3,4],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"28\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":12,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"25\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[11],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"24\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440979163,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"28\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":0,"Attempt":0,"Launch Time":1538440979184,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":1,"Attempt":0,"Launch Time":1538440979344,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":0,"Attempt":0,"Launch Time":1538440979184,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979348,"Failed":false,"Killed":false,"Accumulables":[{"ID":98,"Name":"internal.metrics.input.recordsRead","Update":8,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":97,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":72,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.shuffle.write.writeTime","Update":259310,"Value":259310,"Internal":true,"Count Failed Values":true},{"ID":95,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":355,"Value":355,"Internal":true,"Count Failed Values":true},{"ID":85,"Name":"internal.metrics.peakExecutionMemory","Update":1264,"Value":1264,"Internal":true,"Count Failed Values":true},{"ID":84,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":83,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSize","Update":1662,"Value":1662,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.executorCpuTime","Update":40081727,"Value":40081727,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.executorRunTime","Update":98,"Value":98,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorDeserializeCpuTime","Update":24271689,"Value":24271689,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorDeserializeTime","Update":39,"Value":39,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":39,"Executor Deserialize CPU Time":24271689,"Executor Run Time":98,"Executor CPU Time":40081727,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":355,"Shuffle Write Time":259310,"Shuffle Records Written":2},"Input Metrics":{"Bytes Read":72,"Records Read":8},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":1,"Attempt":0,"Launch Time":1538440979344,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979441,"Failed":false,"Killed":false,"Accumulables":[{"ID":98,"Name":"internal.metrics.input.recordsRead","Update":4,"Value":12,"Internal":true,"Count Failed Values":true},{"ID":97,"Name":"internal.metrics.input.bytesRead","Update":36,"Value":108,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.shuffle.write.writeTime","Update":221381,"Value":480691,"Internal":true,"Count Failed Values":true},{"ID":95,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":178,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":85,"Name":"internal.metrics.peakExecutionMemory","Update":1088,"Value":2352,"Internal":true,"Count Failed Values":true},{"ID":84,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":83,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSize","Update":1662,"Value":3324,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.executorCpuTime","Update":23089017,"Value":63170744,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.executorRunTime","Update":74,"Value":172,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3471167,"Value":27742856,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":43,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":3471167,"Executor Run Time":74,"Executor CPU Time":23089017,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":178,"Shuffle Write Time":221381,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":36,"Records Read":4},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":3,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":318926040,"JVMOffHeapMemory":96521592,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":391718,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":391718,"OffHeapUnifiedMemory":0,"DirectPoolMemory":135031,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":4932550656,"ProcessTreeJVMRSSMemory":569753600,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"1","Stage ID":3,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":236711480,"JVMOffHeapMemory":62683008,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":483726,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":483726,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20922,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3019313152,"ProcessTreeJVMRSSMemory":445640704,"ProcessTreePythonVMemory":958914560,"ProcessTreePythonRSSMemory":106622976,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":12,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"25\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[11],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"24\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440979163,"Completion Time":1538440979444,"Accumulables":[{"ID":83,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":95,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":3,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorDeserializeCpuTime","Value":27742856,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSize","Value":3324,"Internal":true,"Count Failed Values":true},{"ID":98,"Name":"internal.metrics.input.recordsRead","Value":12,"Internal":true,"Count Failed Values":true},{"ID":85,"Name":"internal.metrics.peakExecutionMemory","Value":2352,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":533,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorDeserializeTime","Value":43,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.executorCpuTime","Value":63170744,"Internal":true,"Count Failed Values":true},{"ID":97,"Name":"internal.metrics.input.bytesRead","Value":108,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.shuffle.write.writeTime","Value":480691,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.executorRunTime","Value":172,"Internal":true,"Count Failed Values":true},{"ID":84,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"27\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"ShuffledRDD","Scope":"{\"id\":\"26\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[3],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440979446,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"28\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":0,"Attempt":0,"Launch Time":1538440979462,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":1,"Attempt":0,"Launch Time":1538440979527,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":0,"Attempt":0,"Launch Time":1538440979462,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979528,"Failed":false,"Killed":false,"Accumulables":[{"ID":118,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":117,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":116,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":178,"Value":178,"Internal":true,"Count Failed Values":true},{"ID":115,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":114,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":113,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":112,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":110,"Name":"internal.metrics.peakExecutionMemory","Update":800,"Value":800,"Internal":true,"Count Failed Values":true},{"ID":109,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":108,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":105,"Name":"internal.metrics.resultSize","Update":1828,"Value":1828,"Internal":true,"Count Failed Values":true},{"ID":104,"Name":"internal.metrics.executorCpuTime","Update":17714408,"Value":17714408,"Internal":true,"Count Failed Values":true},{"ID":103,"Name":"internal.metrics.executorRunTime","Update":30,"Value":30,"Internal":true,"Count Failed Values":true},{"ID":102,"Name":"internal.metrics.executorDeserializeCpuTime","Update":12579502,"Value":12579502,"Internal":true,"Count Failed Values":true},{"ID":101,"Name":"internal.metrics.executorDeserializeTime","Update":22,"Value":22,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":22,"Executor Deserialize CPU Time":12579502,"Executor Run Time":30,"Executor CPU Time":17714408,"Result Size":1828,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":178,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":1,"Attempt":0,"Launch Time":1538440979527,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979572,"Failed":false,"Killed":false,"Accumulables":[{"ID":118,"Name":"internal.metrics.shuffle.read.recordsRead","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":117,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":116,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":355,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":115,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":114,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":113,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":112,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":110,"Name":"internal.metrics.peakExecutionMemory","Update":992,"Value":1792,"Internal":true,"Count Failed Values":true},{"ID":109,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":108,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":105,"Name":"internal.metrics.resultSize","Update":1828,"Value":3656,"Internal":true,"Count Failed Values":true},{"ID":104,"Name":"internal.metrics.executorCpuTime","Update":16462125,"Value":34176533,"Internal":true,"Count Failed Values":true},{"ID":103,"Name":"internal.metrics.executorRunTime","Update":16,"Value":46,"Internal":true,"Count Failed Values":true},{"ID":102,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3190663,"Value":15770165,"Internal":true,"Count Failed Values":true},{"ID":101,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":26,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":3190663,"Executor Run Time":16,"Executor CPU Time":16462125,"Result Size":1828,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":2,"Fetch Wait Time":1,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":355,"Total Records Read":2},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":4,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":329919832,"JVMOffHeapMemory":96756344,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":413740,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":413740,"OffHeapUnifiedMemory":0,"DirectPoolMemory":135031,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":4935208960,"ProcessTreeJVMRSSMemory":585252864,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"1","Stage ID":4,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":242876648,"JVMOffHeapMemory":62975784,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":505748,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":505748,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20922,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3019313152,"ProcessTreeJVMRSSMemory":451244032,"ProcessTreePythonVMemory":958914560,"ProcessTreePythonRSSMemory":106622976,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"27\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"ShuffledRDD","Scope":"{\"id\":\"26\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[3],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440979446,"Completion Time":1538440979573,"Accumulables":[{"ID":101,"Name":"internal.metrics.executorDeserializeTime","Value":26,"Internal":true,"Count Failed Values":true},{"ID":110,"Name":"internal.metrics.peakExecutionMemory","Value":1792,"Internal":true,"Count Failed Values":true},{"ID":104,"Name":"internal.metrics.executorCpuTime","Value":34176533,"Internal":true,"Count Failed Values":true},{"ID":113,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":3,"Internal":true,"Count Failed Values":true},{"ID":116,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":533,"Internal":true,"Count Failed Values":true},{"ID":115,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":118,"Name":"internal.metrics.shuffle.read.recordsRead","Value":3,"Internal":true,"Count Failed Values":true},{"ID":109,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":103,"Name":"internal.metrics.executorRunTime","Value":46,"Internal":true,"Count Failed Values":true},{"ID":112,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":105,"Name":"internal.metrics.resultSize","Value":3656,"Internal":true,"Count Failed Values":true},{"ID":114,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":117,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":108,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":102,"Name":"internal.metrics.executorDeserializeCpuTime","Value":15770165,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":2,"Completion Time":1538440979573,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":3,"Submission Time":1538440979609,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"35\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"34\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]},{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"ShuffledRDD","Scope":"{\"id\":\"36\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[5],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]}],"Stage IDs":[5,6],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"38\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"35\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"34\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440979619,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"38\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":0,"Attempt":0,"Launch Time":1538440979638,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":1,"Attempt":0,"Launch Time":1538440979754,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":0,"Attempt":0,"Launch Time":1538440979638,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979756,"Failed":false,"Killed":false,"Accumulables":[{"ID":148,"Name":"internal.metrics.input.recordsRead","Update":8,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":147,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":72,"Internal":true,"Count Failed Values":true},{"ID":146,"Name":"internal.metrics.shuffle.write.writeTime","Update":272852,"Value":272852,"Internal":true,"Count Failed Values":true},{"ID":145,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":355,"Value":355,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.peakExecutionMemory","Update":1264,"Value":1264,"Internal":true,"Count Failed Values":true},{"ID":134,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":133,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":130,"Name":"internal.metrics.resultSize","Update":1662,"Value":1662,"Internal":true,"Count Failed Values":true},{"ID":129,"Name":"internal.metrics.executorCpuTime","Update":23042622,"Value":23042622,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorRunTime","Update":76,"Value":76,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorDeserializeCpuTime","Update":13112180,"Value":13112180,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorDeserializeTime","Update":28,"Value":28,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":28,"Executor Deserialize CPU Time":13112180,"Executor Run Time":76,"Executor CPU Time":23042622,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":355,"Shuffle Write Time":272852,"Shuffle Records Written":2},"Input Metrics":{"Bytes Read":72,"Records Read":8},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":1,"Attempt":0,"Launch Time":1538440979754,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979851,"Failed":false,"Killed":false,"Accumulables":[{"ID":148,"Name":"internal.metrics.input.recordsRead","Update":4,"Value":12,"Internal":true,"Count Failed Values":true},{"ID":147,"Name":"internal.metrics.input.bytesRead","Update":36,"Value":108,"Internal":true,"Count Failed Values":true},{"ID":146,"Name":"internal.metrics.shuffle.write.writeTime","Update":229882,"Value":502734,"Internal":true,"Count Failed Values":true},{"ID":145,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":178,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.peakExecutionMemory","Update":1088,"Value":2352,"Internal":true,"Count Failed Values":true},{"ID":134,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":133,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":130,"Name":"internal.metrics.resultSize","Update":1662,"Value":3324,"Internal":true,"Count Failed Values":true},{"ID":129,"Name":"internal.metrics.executorCpuTime","Update":22093052,"Value":45135674,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorRunTime","Update":81,"Value":157,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3862579,"Value":16974759,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":32,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":3862579,"Executor Run Time":81,"Executor CPU Time":22093052,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":178,"Shuffle Write Time":229882,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":36,"Records Read":4},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":5,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":341682304,"JVMOffHeapMemory":97514672,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":434309,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":434309,"OffHeapUnifiedMemory":0,"DirectPoolMemory":135031,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":4935254016,"ProcessTreeJVMRSSMemory":597999616,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"1","Stage ID":5,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":252029672,"JVMOffHeapMemory":63463032,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":526317,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":526317,"OffHeapUnifiedMemory":0,"DirectPoolMemory":21041,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3020365824,"ProcessTreeJVMRSSMemory":458960896,"ProcessTreePythonVMemory":958914560,"ProcessTreePythonRSSMemory":106622976,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"35\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"34\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440979619,"Completion Time":1538440979852,"Accumulables":[{"ID":146,"Name":"internal.metrics.shuffle.write.writeTime","Value":502734,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorRunTime","Value":157,"Internal":true,"Count Failed Values":true},{"ID":134,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":133,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorDeserializeCpuTime","Value":16974759,"Internal":true,"Count Failed Values":true},{"ID":145,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":3,"Internal":true,"Count Failed Values":true},{"ID":130,"Name":"internal.metrics.resultSize","Value":3324,"Internal":true,"Count Failed Values":true},{"ID":148,"Name":"internal.metrics.input.recordsRead","Value":12,"Internal":true,"Count Failed Values":true},{"ID":129,"Name":"internal.metrics.executorCpuTime","Value":45135674,"Internal":true,"Count Failed Values":true},{"ID":147,"Name":"internal.metrics.input.bytesRead","Value":108,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorDeserializeTime","Value":32,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.peakExecutionMemory","Value":2352,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":533,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"ShuffledRDD","Scope":"{\"id\":\"36\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[5],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440979854,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"38\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":0,"Attempt":0,"Launch Time":1538440979869,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":1,"Attempt":0,"Launch Time":1538440979920,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":0,"Attempt":0,"Launch Time":1538440979869,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979921,"Failed":false,"Killed":false,"Accumulables":[{"ID":168,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":167,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":166,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":178,"Value":178,"Internal":true,"Count Failed Values":true},{"ID":165,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":164,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":163,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":162,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":160,"Name":"internal.metrics.peakExecutionMemory","Update":800,"Value":800,"Internal":true,"Count Failed Values":true},{"ID":159,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":158,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":155,"Name":"internal.metrics.resultSize","Update":1828,"Value":1828,"Internal":true,"Count Failed Values":true},{"ID":154,"Name":"internal.metrics.executorCpuTime","Update":15546330,"Value":15546330,"Internal":true,"Count Failed Values":true},{"ID":153,"Name":"internal.metrics.executorRunTime","Update":19,"Value":19,"Internal":true,"Count Failed Values":true},{"ID":152,"Name":"internal.metrics.executorDeserializeCpuTime","Update":11263754,"Value":11263754,"Internal":true,"Count Failed Values":true},{"ID":151,"Name":"internal.metrics.executorDeserializeTime","Update":22,"Value":22,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":22,"Executor Deserialize CPU Time":11263754,"Executor Run Time":19,"Executor CPU Time":15546330,"Result Size":1828,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":178,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":1,"Attempt":0,"Launch Time":1538440979920,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440979972,"Failed":false,"Killed":false,"Accumulables":[{"ID":168,"Name":"internal.metrics.shuffle.read.recordsRead","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":167,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":166,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":355,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":165,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":164,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":163,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":162,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":160,"Name":"internal.metrics.peakExecutionMemory","Update":992,"Value":1792,"Internal":true,"Count Failed Values":true},{"ID":159,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":158,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":157,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":155,"Name":"internal.metrics.resultSize","Update":1871,"Value":3699,"Internal":true,"Count Failed Values":true},{"ID":154,"Name":"internal.metrics.executorCpuTime","Update":15089701,"Value":30636031,"Internal":true,"Count Failed Values":true},{"ID":153,"Name":"internal.metrics.executorRunTime","Update":27,"Value":46,"Internal":true,"Count Failed Values":true},{"ID":152,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3045280,"Value":14309034,"Internal":true,"Count Failed Values":true},{"ID":151,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":25,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3045280,"Executor Run Time":27,"Executor CPU Time":15089701,"Result Size":1871,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":2,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":355,"Total Records Read":2},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":6,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":350990264,"JVMOffHeapMemory":97710440,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":456312,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":456312,"OffHeapUnifiedMemory":0,"DirectPoolMemory":135031,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":4932550656,"ProcessTreeJVMRSSMemory":604299264,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"ShuffledRDD","Scope":"{\"id\":\"36\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[5],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440979854,"Completion Time":1538440979973,"Accumulables":[{"ID":155,"Name":"internal.metrics.resultSize","Value":3699,"Internal":true,"Count Failed Values":true},{"ID":164,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":167,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":158,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":166,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":533,"Internal":true,"Count Failed Values":true},{"ID":151,"Name":"internal.metrics.executorDeserializeTime","Value":25,"Internal":true,"Count Failed Values":true},{"ID":160,"Name":"internal.metrics.peakExecutionMemory","Value":1792,"Internal":true,"Count Failed Values":true},{"ID":163,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":3,"Internal":true,"Count Failed Values":true},{"ID":154,"Name":"internal.metrics.executorCpuTime","Value":30636031,"Internal":true,"Count Failed Values":true},{"ID":157,"Name":"internal.metrics.resultSerializationTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":165,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":168,"Name":"internal.metrics.shuffle.read.recordsRead","Value":3,"Internal":true,"Count Failed Values":true},{"ID":159,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":153,"Name":"internal.metrics.executorRunTime","Value":46,"Internal":true,"Count Failed Values":true},{"ID":162,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":152,"Name":"internal.metrics.executorDeserializeCpuTime","Value":14309034,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":3,"Completion Time":1538440979974,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":4,"Submission Time":1538440980008,"Stage Infos":[{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":20,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"45\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"44\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]},{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":22,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"ShuffledRDD","Scope":"{\"id\":\"46\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[7],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]}],"Stage IDs":[7,8],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"48\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":20,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"45\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"44\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440980015,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"48\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":7,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":0,"Attempt":0,"Launch Time":1538440980049,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Port":46411},"Timestamp":1538440980522}
+{"Event":"SparkListenerExecutorRemoved","Timestamp":1538440980759,"Executor ID":"1","Removed Reason":"Container marked as failed: container_1538416563558_0014_01_000002 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000002\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"}
+{"Event":"SparkListenerTaskEnd","Stage ID":7,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"ExecutorLostFailure","Executor ID":"1","Exit Caused By App":true,"Loss Reason":"Container marked as failed: container_1538416563558_0014_01_000002 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000002\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"},"Task Info":{"Task ID":13,"Index":0,"Attempt":0,"Launch Time":1538440980049,"Executor ID":"1","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440980757,"Failed":true,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538440986317,"Executor ID":"2","Executor Info":{"Host":"rezamemory-2.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000003/root/stdout?start=-4096","stderr":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000003/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":7,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":0,"Attempt":1,"Launch Time":1538440986317,"Executor ID":"2","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"2","Host":"rezamemory-2.gce.something.com","Port":39119},"Maximum Memory":384093388,"Timestamp":1538440986696,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538440988793,"Executor ID":"3","Executor Info":{"Host":"rezamemory-2.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000004/root/stdout?start=-4096","stderr":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000004/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":7,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":1,"Attempt":0,"Launch Time":1538440988793,"Executor ID":"3","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"3","Host":"rezamemory-2.gce.something.com","Port":40911},"Maximum Memory":384093388,"Timestamp":1538440989162,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"2","Host":"rezamemory-2.gce.something.com","Port":39119},"Timestamp":1538440993798}
+{"Event":"SparkListenerTaskEnd","Stage ID":7,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"ExecutorLostFailure","Executor ID":"2","Exit Caused By App":true,"Loss Reason":"Container marked as failed: container_1538416563558_0014_01_000003 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000003\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"},"Task Info":{"Task ID":14,"Index":0,"Attempt":1,"Launch Time":1538440986317,"Executor ID":"2","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440994010,"Failed":true,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorRemoved","Timestamp":1538440994012,"Executor ID":"2","Removed Reason":"Container marked as failed: container_1538416563558_0014_01_000003 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000003\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"}
+{"Event":"SparkListenerTaskStart","Stage ID":7,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":2,"Launch Time":1538440995449,"Executor ID":"3","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":7,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":1,"Attempt":0,"Launch Time":1538440988793,"Executor ID":"3","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440995450,"Failed":false,"Killed":false,"Accumulables":[{"ID":198,"Name":"internal.metrics.input.recordsRead","Update":4,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":197,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":72,"Internal":true,"Count Failed Values":true},{"ID":196,"Name":"internal.metrics.shuffle.write.writeTime","Update":10065137,"Value":10065137,"Internal":true,"Count Failed Values":true},{"ID":195,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":194,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":178,"Value":178,"Internal":true,"Count Failed Values":true},{"ID":185,"Name":"internal.metrics.peakExecutionMemory","Update":1088,"Value":1088,"Internal":true,"Count Failed Values":true},{"ID":184,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":183,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":181,"Name":"internal.metrics.jvmGCTime","Update":360,"Value":360,"Internal":true,"Count Failed Values":true},{"ID":180,"Name":"internal.metrics.resultSize","Update":1705,"Value":1705,"Internal":true,"Count Failed Values":true},{"ID":179,"Name":"internal.metrics.executorCpuTime","Update":1406669099,"Value":1406669099,"Internal":true,"Count Failed Values":true},{"ID":178,"Name":"internal.metrics.executorRunTime","Update":4128,"Value":4128,"Internal":true,"Count Failed Values":true},{"ID":177,"Name":"internal.metrics.executorDeserializeCpuTime","Update":726605764,"Value":726605764,"Internal":true,"Count Failed Values":true},{"ID":176,"Name":"internal.metrics.executorDeserializeTime","Update":1995,"Value":1995,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1995,"Executor Deserialize CPU Time":726605764,"Executor Run Time":4128,"Executor CPU Time":1406669099,"Result Size":1705,"JVM GC Time":360,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":178,"Shuffle Write Time":10065137,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":72,"Records Read":4},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":7,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":2,"Launch Time":1538440995449,"Executor ID":"3","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440995696,"Failed":false,"Killed":false,"Accumulables":[{"ID":198,"Name":"internal.metrics.input.recordsRead","Update":8,"Value":12,"Internal":true,"Count Failed Values":true},{"ID":197,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":144,"Internal":true,"Count Failed Values":true},{"ID":196,"Name":"internal.metrics.shuffle.write.writeTime","Update":293846,"Value":10358983,"Internal":true,"Count Failed Values":true},{"ID":195,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":194,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":355,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":185,"Name":"internal.metrics.peakExecutionMemory","Update":1264,"Value":2352,"Internal":true,"Count Failed Values":true},{"ID":184,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":183,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":180,"Name":"internal.metrics.resultSize","Update":1662,"Value":3367,"Internal":true,"Count Failed Values":true},{"ID":179,"Name":"internal.metrics.executorCpuTime","Update":91844758,"Value":1498513857,"Internal":true,"Count Failed Values":true},{"ID":178,"Name":"internal.metrics.executorRunTime","Update":220,"Value":4348,"Internal":true,"Count Failed Values":true},{"ID":177,"Name":"internal.metrics.executorDeserializeCpuTime","Update":8316162,"Value":734921926,"Internal":true,"Count Failed Values":true},{"ID":176,"Name":"internal.metrics.executorDeserializeTime","Update":9,"Value":2004,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":9,"Executor Deserialize CPU Time":8316162,"Executor Run Time":220,"Executor CPU Time":91844758,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":355,"Shuffle Write Time":293846,"Shuffle Records Written":2},"Input Metrics":{"Bytes Read":72,"Records Read":8},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"2","Stage ID":7,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":201931120,"JVMOffHeapMemory":58230320,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1094710,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1094710,"OffHeapUnifiedMemory":0,"DirectPoolMemory":45633,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3023769600,"ProcessTreeJVMRSSMemory":410324992,"ProcessTreePythonVMemory":285470720,"ProcessTreePythonRSSMemory":30171136,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":7,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":195471784,"JVMOffHeapMemory":100867584,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":476885,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":476885,"OffHeapUnifiedMemory":0,"DirectPoolMemory":171571,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":4971368448,"ProcessTreeJVMRSSMemory":663375872,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"1","Stage ID":7,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":258718872,"JVMOffHeapMemory":63737056,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":548320,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":548320,"OffHeapUnifiedMemory":0,"DirectPoolMemory":21084,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3021418496,"ProcessTreeJVMRSSMemory":466001920,"ProcessTreePythonVMemory":958914560,"ProcessTreePythonRSSMemory":106622976,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"3","Stage ID":7,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":223684056,"JVMOffHeapMemory":60665000,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1482102,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1482102,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20318,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3015626752,"ProcessTreeJVMRSSMemory":404672512,"ProcessTreePythonVMemory":958963712,"ProcessTreePythonRSSMemory":106639360,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":20,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"45\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"44\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440980015,"Completion Time":1538440995697,"Accumulables":[{"ID":176,"Name":"internal.metrics.executorDeserializeTime","Value":2004,"Internal":true,"Count Failed Values":true},{"ID":185,"Name":"internal.metrics.peakExecutionMemory","Value":2352,"Internal":true,"Count Failed Values":true},{"ID":194,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":533,"Internal":true,"Count Failed Values":true},{"ID":184,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":178,"Name":"internal.metrics.executorRunTime","Value":4348,"Internal":true,"Count Failed Values":true},{"ID":196,"Name":"internal.metrics.shuffle.write.writeTime","Value":10358983,"Internal":true,"Count Failed Values":true},{"ID":181,"Name":"internal.metrics.jvmGCTime","Value":360,"Internal":true,"Count Failed Values":true},{"ID":180,"Name":"internal.metrics.resultSize","Value":3367,"Internal":true,"Count Failed Values":true},{"ID":198,"Name":"internal.metrics.input.recordsRead","Value":12,"Internal":true,"Count Failed Values":true},{"ID":183,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":177,"Name":"internal.metrics.executorDeserializeCpuTime","Value":734921926,"Internal":true,"Count Failed Values":true},{"ID":195,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":3,"Internal":true,"Count Failed Values":true},{"ID":179,"Name":"internal.metrics.executorCpuTime","Value":1498513857,"Internal":true,"Count Failed Values":true},{"ID":197,"Name":"internal.metrics.input.bytesRead","Value":144,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":22,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"ShuffledRDD","Scope":"{\"id\":\"46\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[7],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440995698,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"48\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1538440995710,"Executor ID":"3","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"3","Host":"rezamemory-2.gce.something.com","Port":40911},"Timestamp":1538440996257}
+{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"ExecutorLostFailure","Executor ID":"3","Exit Caused By App":true,"Loss Reason":"Container marked as failed: container_1538416563558_0014_01_000004 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000004\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1538440995710,"Executor ID":"3","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538440996467,"Failed":true,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorRemoved","Timestamp":1538440996468,"Executor ID":"3","Removed Reason":"Container marked as failed: container_1538416563558_0014_01_000004 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000004\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538441002826,"Executor ID":"4","Executor Info":{"Host":"rezamemory-2.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000005/root/stdout?start=-4096","stderr":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000005/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":0,"Attempt":1,"Launch Time":1538441002828,"Executor ID":"4","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538441003031,"Executor ID":"5","Executor Info":{"Host":"rezamemory-2.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000006/root/stdout?start=-4096","stderr":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000006/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":1,"Attempt":0,"Launch Time":1538441003032,"Executor ID":"5","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"4","Host":"rezamemory-2.gce.something.com","Port":39248},"Maximum Memory":384093388,"Timestamp":1538441003132,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"5","Host":"rezamemory-2.gce.something.com","Port":43165},"Maximum Memory":384093388,"Timestamp":1538441003383,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":0,"Attempt":1,"Launch Time":1538441002828,"Executor ID":"4","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441006147,"Failed":false,"Killed":false,"Accumulables":[{"ID":218,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":217,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":216,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":215,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":214,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":178,"Value":178,"Internal":true,"Count Failed Values":true},{"ID":213,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":212,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":210,"Name":"internal.metrics.peakExecutionMemory","Update":800,"Value":800,"Internal":true,"Count Failed Values":true},{"ID":209,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":208,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":207,"Name":"internal.metrics.resultSerializationTime","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":206,"Name":"internal.metrics.jvmGCTime","Update":350,"Value":350,"Internal":true,"Count Failed Values":true},{"ID":205,"Name":"internal.metrics.resultSize","Update":1914,"Value":1914,"Internal":true,"Count Failed Values":true},{"ID":204,"Name":"internal.metrics.executorCpuTime","Update":219243972,"Value":219243972,"Internal":true,"Count Failed Values":true},{"ID":203,"Name":"internal.metrics.executorRunTime","Update":893,"Value":893,"Internal":true,"Count Failed Values":true},{"ID":202,"Name":"internal.metrics.executorDeserializeCpuTime","Update":717217987,"Value":717217987,"Internal":true,"Count Failed Values":true},{"ID":201,"Name":"internal.metrics.executorDeserializeTime","Update":1972,"Value":1972,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1972,"Executor Deserialize CPU Time":717217987,"Executor Run Time":893,"Executor CPU Time":219243972,"Result Size":1914,"JVM GC Time":350,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":1,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":178,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":1,"Attempt":0,"Launch Time":1538441003032,"Executor ID":"5","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441006584,"Failed":false,"Killed":false,"Accumulables":[{"ID":218,"Name":"internal.metrics.shuffle.read.recordsRead","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":217,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":216,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":215,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":214,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":355,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":213,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":212,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":210,"Name":"internal.metrics.peakExecutionMemory","Update":992,"Value":1792,"Internal":true,"Count Failed Values":true},{"ID":209,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":208,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":207,"Name":"internal.metrics.resultSerializationTime","Update":10,"Value":12,"Internal":true,"Count Failed Values":true},{"ID":206,"Name":"internal.metrics.jvmGCTime","Update":270,"Value":620,"Internal":true,"Count Failed Values":true},{"ID":205,"Name":"internal.metrics.resultSize","Update":1914,"Value":3828,"Internal":true,"Count Failed Values":true},{"ID":204,"Name":"internal.metrics.executorCpuTime","Update":210863492,"Value":430107464,"Internal":true,"Count Failed Values":true},{"ID":203,"Name":"internal.metrics.executorRunTime","Update":412,"Value":1305,"Internal":true,"Count Failed Values":true},{"ID":202,"Name":"internal.metrics.executorDeserializeCpuTime","Update":727356712,"Value":1444574699,"Internal":true,"Count Failed Values":true},{"ID":201,"Name":"internal.metrics.executorDeserializeTime","Update":2604,"Value":4576,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2604,"Executor Deserialize CPU Time":727356712,"Executor Run Time":412,"Executor CPU Time":210863492,"Result Size":1914,"JVM GC Time":270,"Result Serialization Time":10,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":2,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":355,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":2},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":8,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":263995432,"JVMOffHeapMemory":101978136,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":498888,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":498888,"OffHeapUnifiedMemory":0,"DirectPoolMemory":191656,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":5008089088,"ProcessTreeJVMRSSMemory":663732224,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"5","Stage ID":8,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":150497592,"JVMOffHeapMemory":45958576,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":22003,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":22003,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3446,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":2984218624,"ProcessTreeJVMRSSMemory":325042176,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"4","Stage ID":8,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":181352744,"JVMOffHeapMemory":47061200,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":22003,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":22003,"OffHeapUnifiedMemory":0,"DirectPoolMemory":11272,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3013332992,"ProcessTreeJVMRSSMemory":416645120,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"3","Stage ID":8,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":226223752,"JVMOffHeapMemory":60840424,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":433558,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":433558,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20318,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3016937472,"ProcessTreeJVMRSSMemory":406044672,"ProcessTreePythonVMemory":958963712,"ProcessTreePythonRSSMemory":106639360,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":22,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"ShuffledRDD","Scope":"{\"id\":\"46\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[7],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538440995698,"Completion Time":1538441006585,"Accumulables":[{"ID":218,"Name":"internal.metrics.shuffle.read.recordsRead","Value":3,"Internal":true,"Count Failed Values":true},{"ID":209,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":202,"Name":"internal.metrics.executorDeserializeCpuTime","Value":1444574699,"Internal":true,"Count Failed Values":true},{"ID":205,"Name":"internal.metrics.resultSize","Value":3828,"Internal":true,"Count Failed Values":true},{"ID":214,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":533,"Internal":true,"Count Failed Values":true},{"ID":217,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":208,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":216,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":207,"Name":"internal.metrics.resultSerializationTime","Value":12,"Internal":true,"Count Failed Values":true},{"ID":210,"Name":"internal.metrics.peakExecutionMemory","Value":1792,"Internal":true,"Count Failed Values":true},{"ID":201,"Name":"internal.metrics.executorDeserializeTime","Value":4576,"Internal":true,"Count Failed Values":true},{"ID":213,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":204,"Name":"internal.metrics.executorCpuTime","Value":430107464,"Internal":true,"Count Failed Values":true},{"ID":203,"Name":"internal.metrics.executorRunTime","Value":1305,"Internal":true,"Count Failed Values":true},{"ID":212,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":3,"Internal":true,"Count Failed Values":true},{"ID":215,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":206,"Name":"internal.metrics.jvmGCTime","Value":620,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":4,"Completion Time":1538441006585,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":5,"Submission Time":1538441006610,"Stage Infos":[{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":24,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"55\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[23],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"54\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]},{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"57\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"ShuffledRDD","Scope":"{\"id\":\"56\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[9],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]}],"Stage IDs":[9,10],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"58\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":24,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"55\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[23],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"54\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441006612,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"58\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":0,"Attempt":0,"Launch Time":1538441006622,"Executor ID":"4","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":1,"Attempt":0,"Launch Time":1538441006623,"Executor ID":"5","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"4","Host":"rezamemory-2.gce.something.com","Port":39248},"Timestamp":1538441010070}
+{"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"5","Host":"rezamemory-2.gce.something.com","Port":43165},"Timestamp":1538441010233}
+{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"ExecutorLostFailure","Executor ID":"4","Exit Caused By App":true,"Loss Reason":"Container marked as failed: container_1538416563558_0014_01_000005 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000005\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"},"Task Info":{"Task ID":20,"Index":0,"Attempt":0,"Launch Time":1538441006622,"Executor ID":"4","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441010280,"Failed":true,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorRemoved","Timestamp":1538441010281,"Executor ID":"4","Removed Reason":"Container marked as failed: container_1538416563558_0014_01_000005 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000005\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"}
+{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"ExecutorLostFailure","Executor ID":"5","Exit Caused By App":true,"Loss Reason":"Container marked as failed: container_1538416563558_0014_01_000006 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000006\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"},"Task Info":{"Task ID":21,"Index":1,"Attempt":0,"Launch Time":1538441006623,"Executor ID":"5","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441010484,"Failed":true,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorRemoved","Timestamp":1538441010485,"Executor ID":"5","Removed Reason":"Container marked as failed: container_1538416563558_0014_01_000006 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000006\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538441015443,"Executor ID":"6","Executor Info":{"Host":"rezamemory-3.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-3.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000007/root/stdout?start=-4096","stderr":"http://rezamemory-3.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000007/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":1,"Attempt":1,"Launch Time":1538441015444,"Executor ID":"6","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"6","Host":"rezamemory-3.gce.something.com","Port":45593},"Maximum Memory":384093388,"Timestamp":1538441015852,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538441020314,"Executor ID":"7","Executor Info":{"Host":"rezamemory-3.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-3.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000008/root/stdout?start=-4096","stderr":"http://rezamemory-3.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000008/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":0,"Attempt":1,"Launch Time":1538441020315,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Port":40992},"Maximum Memory":384093388,"Timestamp":1538441020602,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"6","Host":"rezamemory-3.gce.something.com","Port":45593},"Timestamp":1538441022942}
+{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"ExecutorLostFailure","Executor ID":"6","Exit Caused By App":true,"Loss Reason":"Container marked as failed: container_1538416563558_0014_01_000007 on host: rezamemory-3.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000007\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"},"Task Info":{"Task ID":22,"Index":1,"Attempt":1,"Launch Time":1538441015444,"Executor ID":"6","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441023152,"Failed":true,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorRemoved","Timestamp":1538441023153,"Executor ID":"6","Removed Reason":"Container marked as failed: container_1538416563558_0014_01_000007 on host: rezamemory-3.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000007\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"}
+{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":1,"Attempt":2,"Launch Time":1538441025899,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":0,"Attempt":1,"Launch Time":1538441020315,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441025900,"Failed":false,"Killed":false,"Accumulables":[{"ID":248,"Name":"internal.metrics.input.recordsRead","Update":8,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":247,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":72,"Internal":true,"Count Failed Values":true},{"ID":246,"Name":"internal.metrics.shuffle.write.writeTime","Update":3971129,"Value":3971129,"Internal":true,"Count Failed Values":true},{"ID":245,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":244,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":355,"Value":355,"Internal":true,"Count Failed Values":true},{"ID":235,"Name":"internal.metrics.peakExecutionMemory","Update":1264,"Value":1264,"Internal":true,"Count Failed Values":true},{"ID":234,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":233,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":231,"Name":"internal.metrics.jvmGCTime","Update":244,"Value":244,"Internal":true,"Count Failed Values":true},{"ID":230,"Name":"internal.metrics.resultSize","Update":1705,"Value":1705,"Internal":true,"Count Failed Values":true},{"ID":229,"Name":"internal.metrics.executorCpuTime","Update":1268816374,"Value":1268816374,"Internal":true,"Count Failed Values":true},{"ID":228,"Name":"internal.metrics.executorRunTime","Update":2978,"Value":2978,"Internal":true,"Count Failed Values":true},{"ID":227,"Name":"internal.metrics.executorDeserializeCpuTime","Update":714859741,"Value":714859741,"Internal":true,"Count Failed Values":true},{"ID":226,"Name":"internal.metrics.executorDeserializeTime","Update":2106,"Value":2106,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2106,"Executor Deserialize CPU Time":714859741,"Executor Run Time":2978,"Executor CPU Time":1268816374,"Result Size":1705,"JVM GC Time":244,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":355,"Shuffle Write Time":3971129,"Shuffle Records Written":2},"Input Metrics":{"Bytes Read":72,"Records Read":8},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":1,"Attempt":2,"Launch Time":1538441025899,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441026136,"Failed":false,"Killed":false,"Accumulables":[{"ID":248,"Name":"internal.metrics.input.recordsRead","Update":4,"Value":12,"Internal":true,"Count Failed Values":true},{"ID":247,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":144,"Internal":true,"Count Failed Values":true},{"ID":246,"Name":"internal.metrics.shuffle.write.writeTime","Update":265841,"Value":4236970,"Internal":true,"Count Failed Values":true},{"ID":245,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":244,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":178,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":235,"Name":"internal.metrics.peakExecutionMemory","Update":1088,"Value":2352,"Internal":true,"Count Failed Values":true},{"ID":234,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":233,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":232,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":230,"Name":"internal.metrics.resultSize","Update":1705,"Value":3410,"Internal":true,"Count Failed Values":true},{"ID":229,"Name":"internal.metrics.executorCpuTime","Update":88980290,"Value":1357796664,"Internal":true,"Count Failed Values":true},{"ID":228,"Name":"internal.metrics.executorRunTime","Update":201,"Value":3179,"Internal":true,"Count Failed Values":true},{"ID":227,"Name":"internal.metrics.executorDeserializeCpuTime","Update":8550572,"Value":723410313,"Internal":true,"Count Failed Values":true},{"ID":226,"Name":"internal.metrics.executorDeserializeTime","Update":13,"Value":2119,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":13,"Executor Deserialize CPU Time":8550572,"Executor Run Time":201,"Executor CPU Time":88980290,"Result Size":1705,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":178,"Shuffle Write Time":265841,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":72,"Records Read":4},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":9,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":332727504,"JVMOffHeapMemory":103237664,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":519462,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":519462,"OffHeapUnifiedMemory":0,"DirectPoolMemory":228406,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":5011247104,"ProcessTreeJVMRSSMemory":658915328,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"5","Stage ID":9,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":184519808,"JVMOffHeapMemory":58341088,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1116714,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1116714,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20420,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":2998673408,"ProcessTreeJVMRSSMemory":378527744,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"7","Stage ID":9,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":218694008,"JVMOffHeapMemory":60757008,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1482103,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1482103,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20668,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3020120064,"ProcessTreeJVMRSSMemory":423698432,"ProcessTreePythonVMemory":958894080,"ProcessTreePythonRSSMemory":106696704,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"4","Stage ID":9,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":220189424,"JVMOffHeapMemory":59534504,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1116714,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1116714,"OffHeapUnifiedMemory":0,"DirectPoolMemory":27895,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3024392192,"ProcessTreeJVMRSSMemory":431939584,"ProcessTreePythonVMemory":283738112,"ProcessTreePythonRSSMemory":27226112,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"6","Stage ID":9,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":208356192,"JVMOffHeapMemory":58297728,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1094711,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1094711,"OffHeapUnifiedMemory":0,"DirectPoolMemory":27296,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3027820544,"ProcessTreeJVMRSSMemory":439750656,"ProcessTreePythonVMemory":286220288,"ProcessTreePythonRSSMemory":30846976,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":24,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"55\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[23],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"54\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441006612,"Completion Time":1538441026137,"Accumulables":[{"ID":227,"Name":"internal.metrics.executorDeserializeCpuTime","Value":723410313,"Internal":true,"Count Failed Values":true},{"ID":245,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":3,"Internal":true,"Count Failed Values":true},{"ID":226,"Name":"internal.metrics.executorDeserializeTime","Value":2119,"Internal":true,"Count Failed Values":true},{"ID":235,"Name":"internal.metrics.peakExecutionMemory","Value":2352,"Internal":true,"Count Failed Values":true},{"ID":244,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":533,"Internal":true,"Count Failed Values":true},{"ID":229,"Name":"internal.metrics.executorCpuTime","Value":1357796664,"Internal":true,"Count Failed Values":true},{"ID":247,"Name":"internal.metrics.input.bytesRead","Value":144,"Internal":true,"Count Failed Values":true},{"ID":232,"Name":"internal.metrics.resultSerializationTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":234,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":228,"Name":"internal.metrics.executorRunTime","Value":3179,"Internal":true,"Count Failed Values":true},{"ID":246,"Name":"internal.metrics.shuffle.write.writeTime","Value":4236970,"Internal":true,"Count Failed Values":true},{"ID":231,"Name":"internal.metrics.jvmGCTime","Value":244,"Internal":true,"Count Failed Values":true},{"ID":230,"Name":"internal.metrics.resultSize","Value":3410,"Internal":true,"Count Failed Values":true},{"ID":248,"Name":"internal.metrics.input.recordsRead","Value":12,"Internal":true,"Count Failed Values":true},{"ID":233,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"57\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"ShuffledRDD","Scope":"{\"id\":\"56\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[9],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441026138,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"58\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":10,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1538441026147,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":10,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1538441026309,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":10,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1538441026147,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441026311,"Failed":false,"Killed":false,"Accumulables":[{"ID":268,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":267,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":266,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":178,"Value":178,"Internal":true,"Count Failed Values":true},{"ID":265,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":264,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":263,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":262,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":260,"Name":"internal.metrics.peakExecutionMemory","Update":800,"Value":800,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":255,"Name":"internal.metrics.resultSize","Update":1828,"Value":1828,"Internal":true,"Count Failed Values":true},{"ID":254,"Name":"internal.metrics.executorCpuTime","Update":80311930,"Value":80311930,"Internal":true,"Count Failed Values":true},{"ID":253,"Name":"internal.metrics.executorRunTime","Update":89,"Value":89,"Internal":true,"Count Failed Values":true},{"ID":252,"Name":"internal.metrics.executorDeserializeCpuTime","Update":29610969,"Value":29610969,"Internal":true,"Count Failed Values":true},{"ID":251,"Name":"internal.metrics.executorDeserializeTime","Update":62,"Value":62,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":62,"Executor Deserialize CPU Time":29610969,"Executor Run Time":89,"Executor CPU Time":80311930,"Result Size":1828,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":178,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":10,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1538441026309,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441026375,"Failed":false,"Killed":false,"Accumulables":[{"ID":268,"Name":"internal.metrics.shuffle.read.recordsRead","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":267,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":266,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":355,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":265,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":264,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":263,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":262,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":260,"Name":"internal.metrics.peakExecutionMemory","Update":992,"Value":1792,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":255,"Name":"internal.metrics.resultSize","Update":1828,"Value":3656,"Internal":true,"Count Failed Values":true},{"ID":254,"Name":"internal.metrics.executorCpuTime","Update":18625831,"Value":98937761,"Internal":true,"Count Failed Values":true},{"ID":253,"Name":"internal.metrics.executorRunTime","Update":38,"Value":127,"Internal":true,"Count Failed Values":true},{"ID":252,"Name":"internal.metrics.executorDeserializeCpuTime","Update":6238101,"Value":35849070,"Internal":true,"Count Failed Values":true},{"ID":251,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":68,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":6238101,"Executor Run Time":38,"Executor CPU Time":18625831,"Result Size":1828,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":2,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":355,"Total Records Read":2},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":10,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":341644736,"JVMOffHeapMemory":103378144,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":541469,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":541469,"OffHeapUnifiedMemory":0,"DirectPoolMemory":228406,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":5011247104,"ProcessTreeJVMRSSMemory":658989056,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"7","Stage ID":10,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":228132872,"JVMOffHeapMemory":61634808,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":455614,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":455614,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20669,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3021172736,"ProcessTreeJVMRSSMemory":436867072,"ProcessTreePythonVMemory":958894080,"ProcessTreePythonRSSMemory":106696704,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"57\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"ShuffledRDD","Scope":"{\"id\":\"56\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[9],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441026138,"Completion Time":1538441026376,"Accumulables":[{"ID":254,"Name":"internal.metrics.executorCpuTime","Value":98937761,"Internal":true,"Count Failed Values":true},{"ID":262,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":253,"Name":"internal.metrics.executorRunTime","Value":127,"Internal":true,"Count Failed Values":true},{"ID":265,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":268,"Name":"internal.metrics.shuffle.read.recordsRead","Value":3,"Internal":true,"Count Failed Values":true},{"ID":267,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":252,"Name":"internal.metrics.executorDeserializeCpuTime","Value":35849070,"Internal":true,"Count Failed Values":true},{"ID":255,"Name":"internal.metrics.resultSize","Value":3656,"Internal":true,"Count Failed Values":true},{"ID":264,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":263,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":3,"Internal":true,"Count Failed Values":true},{"ID":266,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":533,"Internal":true,"Count Failed Values":true},{"ID":260,"Name":"internal.metrics.peakExecutionMemory","Value":1792,"Internal":true,"Count Failed Values":true},{"ID":251,"Name":"internal.metrics.executorDeserializeTime","Value":68,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":5,"Completion Time":1538441026376,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":6,"Submission Time":1538441026404,"Stage Infos":[{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"67\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"ShuffledRDD","Scope":"{\"id\":\"66\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[11],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]},{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"65\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"64\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]}],"Stage IDs":[12,11],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"68\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"65\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"64\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441026408,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"68\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":11,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":0,"Attempt":0,"Launch Time":1538441026450,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":11,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":1,"Attempt":0,"Launch Time":1538441026585,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":11,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":0,"Attempt":0,"Launch Time":1538441026450,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441026586,"Failed":false,"Killed":false,"Accumulables":[{"ID":298,"Name":"internal.metrics.input.recordsRead","Update":8,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":297,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":72,"Internal":true,"Count Failed Values":true},{"ID":296,"Name":"internal.metrics.shuffle.write.writeTime","Update":278446,"Value":278446,"Internal":true,"Count Failed Values":true},{"ID":295,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":355,"Value":355,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.peakExecutionMemory","Update":1264,"Value":1264,"Internal":true,"Count Failed Values":true},{"ID":284,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":283,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":280,"Name":"internal.metrics.resultSize","Update":1662,"Value":1662,"Internal":true,"Count Failed Values":true},{"ID":279,"Name":"internal.metrics.executorCpuTime","Update":23317154,"Value":23317154,"Internal":true,"Count Failed Values":true},{"ID":278,"Name":"internal.metrics.executorRunTime","Update":69,"Value":69,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.executorDeserializeCpuTime","Update":17832528,"Value":17832528,"Internal":true,"Count Failed Values":true},{"ID":276,"Name":"internal.metrics.executorDeserializeTime","Update":53,"Value":53,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":53,"Executor Deserialize CPU Time":17832528,"Executor Run Time":69,"Executor CPU Time":23317154,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":355,"Shuffle Write Time":278446,"Shuffle Records Written":2},"Input Metrics":{"Bytes Read":72,"Records Read":8},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":11,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":1,"Attempt":0,"Launch Time":1538441026585,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441026700,"Failed":false,"Killed":false,"Accumulables":[{"ID":298,"Name":"internal.metrics.input.recordsRead","Update":4,"Value":12,"Internal":true,"Count Failed Values":true},{"ID":297,"Name":"internal.metrics.input.bytesRead","Update":36,"Value":108,"Internal":true,"Count Failed Values":true},{"ID":296,"Name":"internal.metrics.shuffle.write.writeTime","Update":215244,"Value":493690,"Internal":true,"Count Failed Values":true},{"ID":295,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":178,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.peakExecutionMemory","Update":1088,"Value":2352,"Internal":true,"Count Failed Values":true},{"ID":284,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":283,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":280,"Name":"internal.metrics.resultSize","Update":1662,"Value":3324,"Internal":true,"Count Failed Values":true},{"ID":279,"Name":"internal.metrics.executorCpuTime","Update":23292541,"Value":46609695,"Internal":true,"Count Failed Values":true},{"ID":278,"Name":"internal.metrics.executorRunTime","Update":94,"Value":163,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4400590,"Value":22233118,"Internal":true,"Count Failed Values":true},{"ID":276,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":57,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4400590,"Executor Run Time":94,"Executor CPU Time":23292541,"Result Size":1662,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":178,"Shuffle Write Time":215244,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":36,"Records Read":4},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":11,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":198912952,"JVMOffHeapMemory":104016864,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":554933,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":554933,"OffHeapUnifiedMemory":0,"DirectPoolMemory":228407,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":5040721920,"ProcessTreeJVMRSSMemory":705302528,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"7","Stage ID":11,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":249428840,"JVMOffHeapMemory":62917480,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":455614,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":455614,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20911,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3035901952,"ProcessTreeJVMRSSMemory":447041536,"ProcessTreePythonVMemory":958894080,"ProcessTreePythonRSSMemory":106696704,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"filter at BisectingKMeans.scala:213","Number of Tasks":2,"RDD Info":[{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"65\",\"name\":\"filter\"}","Callsite":"filter at BisectingKMeans.scala:213","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"64\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:372","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"12\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:170","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"11\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:169","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ZippedPartitionsRDD2","Scope":"{\"id\":\"10\",\"name\":\"zip\"}","Callsite":"zip at BisectingKMeans.scala:169","Parent IDs":[3,5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"map\"}","Callsite":"map at BisectingKMeans.scala:168","Parent IDs":[3],"Storage Level":{"Use Disk":true,"Use Memory":true,"Deserialized":true,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.filter(RDD.scala:387)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:213)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441026408,"Completion Time":1538441026701,"Accumulables":[{"ID":295,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":3,"Internal":true,"Count Failed Values":true},{"ID":298,"Name":"internal.metrics.input.recordsRead","Value":12,"Internal":true,"Count Failed Values":true},{"ID":280,"Name":"internal.metrics.resultSize","Value":3324,"Internal":true,"Count Failed Values":true},{"ID":283,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.executorDeserializeCpuTime","Value":22233118,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":533,"Internal":true,"Count Failed Values":true},{"ID":276,"Name":"internal.metrics.executorDeserializeTime","Value":57,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.peakExecutionMemory","Value":2352,"Internal":true,"Count Failed Values":true},{"ID":279,"Name":"internal.metrics.executorCpuTime","Value":46609695,"Internal":true,"Count Failed Values":true},{"ID":297,"Name":"internal.metrics.input.bytesRead","Value":108,"Internal":true,"Count Failed Values":true},{"ID":284,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":278,"Name":"internal.metrics.executorRunTime","Value":163,"Internal":true,"Count Failed Values":true},{"ID":296,"Name":"internal.metrics.shuffle.write.writeTime","Value":493690,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"67\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"ShuffledRDD","Scope":"{\"id\":\"66\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[11],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441026702,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"68\",\"name\":\"collect\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":12,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":0,"Attempt":0,"Launch Time":1538441026714,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":12,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":1,"Attempt":0,"Launch Time":1538441026794,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":12,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":0,"Attempt":0,"Launch Time":1538441026714,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441026795,"Failed":false,"Killed":false,"Accumulables":[{"ID":318,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":178,"Value":178,"Internal":true,"Count Failed Values":true},{"ID":315,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":314,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":313,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":312,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":310,"Name":"internal.metrics.peakExecutionMemory","Update":800,"Value":800,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":305,"Name":"internal.metrics.resultSize","Update":1871,"Value":1871,"Internal":true,"Count Failed Values":true},{"ID":304,"Name":"internal.metrics.executorCpuTime","Update":16951615,"Value":16951615,"Internal":true,"Count Failed Values":true},{"ID":303,"Name":"internal.metrics.executorRunTime","Update":28,"Value":28,"Internal":true,"Count Failed Values":true},{"ID":302,"Name":"internal.metrics.executorDeserializeCpuTime","Update":12613041,"Value":12613041,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.executorDeserializeTime","Update":31,"Value":31,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":31,"Executor Deserialize CPU Time":12613041,"Executor Run Time":28,"Executor CPU Time":16951615,"Result Size":1871,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":178,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":12,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":1,"Attempt":0,"Launch Time":1538441026794,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441026839,"Failed":false,"Killed":false,"Accumulables":[{"ID":318,"Name":"internal.metrics.shuffle.read.recordsRead","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":355,"Value":533,"Internal":true,"Count Failed Values":true},{"ID":315,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":314,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":313,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":312,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":310,"Name":"internal.metrics.peakExecutionMemory","Update":992,"Value":1792,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":305,"Name":"internal.metrics.resultSize","Update":1871,"Value":3742,"Internal":true,"Count Failed Values":true},{"ID":304,"Name":"internal.metrics.executorCpuTime","Update":17828037,"Value":34779652,"Internal":true,"Count Failed Values":true},{"ID":303,"Name":"internal.metrics.executorRunTime","Update":24,"Value":52,"Internal":true,"Count Failed Values":true},{"ID":302,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3879530,"Value":16492571,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.executorDeserializeTime","Update":5,"Value":36,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":5,"Executor Deserialize CPU Time":3879530,"Executor Run Time":24,"Executor CPU Time":17828037,"Result Size":1871,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":2,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":355,"Total Records Read":2},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":12,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":204287872,"JVMOffHeapMemory":104055736,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":519458,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":519458,"OffHeapUnifiedMemory":0,"DirectPoolMemory":228407,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":5047037952,"ProcessTreeJVMRSSMemory":708661248,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"7","Stage ID":12,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":252161344,"JVMOffHeapMemory":63019944,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":441078,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":441078,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20911,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3038007296,"ProcessTreeJVMRSSMemory":451837952,"ProcessTreePythonVMemory":958894080,"ProcessTreePythonRSSMemory":106696704,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"collect at BisectingKMeans.scala:304","Number of Tasks":2,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"67\",\"name\":\"mapValues\"}","Callsite":"mapValues at BisectingKMeans.scala:303","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"ShuffledRDD","Scope":"{\"id\":\"66\",\"name\":\"aggregateByKey\"}","Callsite":"aggregateByKey at BisectingKMeans.scala:300","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[11],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:944)\norg.apache.spark.mllib.clustering.BisectingKMeans$.org$apache$spark$mllib$clustering$BisectingKMeans$$summarize(BisectingKMeans.scala:304)\norg.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$run$1.apply$mcVI$sp(BisectingKMeans.scala:216)\nscala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:210)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:255)\norg.apache.spark.mllib.clustering.BisectingKMeans.run(BisectingKMeans.scala:261)\norg.apache.spark.mllib.api.python.PythonMLLibAPI.trainBisectingKMeans(PythonMLLibAPI.scala:135)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441026702,"Completion Time":1538441026840,"Accumulables":[{"ID":304,"Name":"internal.metrics.executorCpuTime","Value":34779652,"Internal":true,"Count Failed Values":true},{"ID":313,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":3,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.resultSerializationTime","Value":2,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":533,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.executorDeserializeTime","Value":36,"Internal":true,"Count Failed Values":true},{"ID":310,"Name":"internal.metrics.peakExecutionMemory","Value":1792,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.recordsRead","Value":3,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":303,"Name":"internal.metrics.executorRunTime","Value":52,"Internal":true,"Count Failed Values":true},{"ID":312,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":315,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":302,"Name":"internal.metrics.executorDeserializeCpuTime","Value":16492571,"Internal":true,"Count Failed Values":true},{"ID":314,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":305,"Name":"internal.metrics.resultSize","Value":3742,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":6,"Completion Time":1538441026840,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerUnpersistRDD","RDD ID":32}
+{"Event":"SparkListenerUnpersistRDD","RDD ID":5}
+{"Event":"SparkListenerJobStart","Job ID":7,"Submission Time":1538441026935,"Stage Infos":[{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"sum at BisectingKMeansModel.scala:101","Number of Tasks":2,"RDD Info":[{"RDD ID":36,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"78\",\"name\":\"map\"}","Callsite":"map at BisectingKMeansModel.scala:101","Parent IDs":[35],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"77\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:34)\norg.apache.spark.mllib.clustering.BisectingKMeansModel.computeCost(BisectingKMeansModel.scala:101)\norg.apache.spark.mllib.clustering.BisectingKMeansModel.computeCost(BisectingKMeansModel.scala:108)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Accumulables":[]}],"Stage IDs":[13],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"79\",\"name\":\"sum\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"sum at BisectingKMeansModel.scala:101","Number of Tasks":2,"RDD Info":[{"RDD ID":36,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"78\",\"name\":\"map\"}","Callsite":"map at BisectingKMeansModel.scala:101","Parent IDs":[35],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"77\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:34)\norg.apache.spark.mllib.clustering.BisectingKMeansModel.computeCost(BisectingKMeansModel.scala:101)\norg.apache.spark.mllib.clustering.BisectingKMeansModel.computeCost(BisectingKMeansModel.scala:108)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441026936,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"79\",\"name\":\"sum\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":13,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":0,"Attempt":0,"Launch Time":1538441026947,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Port":40992},"Timestamp":1538441027285}
+{"Event":"SparkListenerTaskEnd","Stage ID":13,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"ExecutorLostFailure","Executor ID":"7","Exit Caused By App":true,"Loss Reason":"Container marked as failed: container_1538416563558_0014_01_000008 on host: rezamemory-3.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000008\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"},"Task Info":{"Task ID":31,"Index":0,"Attempt":0,"Launch Time":1538441026947,"Executor ID":"7","Host":"rezamemory-3.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441027494,"Failed":true,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorRemoved","Timestamp":1538441027495,"Executor ID":"7","Removed Reason":"Container marked as failed: container_1538416563558_0014_01_000008 on host: rezamemory-3.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000008\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538441032740,"Executor ID":"8","Executor Info":{"Host":"rezamemory-2.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000009/root/stdout?start=-4096","stderr":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000009/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":13,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":0,"Attempt":1,"Launch Time":1538441032741,"Executor ID":"8","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"8","Host":"rezamemory-2.gce.something.com","Port":41485},"Maximum Memory":384093388,"Timestamp":1538441033142,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1538441036142,"Executor ID":"9","Executor Info":{"Host":"rezamemory-2.gce.something.com","Total Cores":1,"Log Urls":{"stdout":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000010/root/stdout?start=-4096","stderr":"http://rezamemory-2.gce.something.com:8042/node/containerlogs/container_1538416563558_0014_01_000010/root/stderr?start=-4096"}}}
+{"Event":"SparkListenerTaskStart","Stage ID":13,"Stage Attempt ID":0,"Task Info":{"Task ID":33,"Index":1,"Attempt":0,"Launch Time":1538441036144,"Executor ID":"9","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"9","Host":"rezamemory-2.gce.something.com","Port":40797},"Maximum Memory":384093388,"Timestamp":1538441036560,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerBlockManagerRemoved","Block Manager ID":{"Executor ID":"8","Host":"rezamemory-2.gce.something.com","Port":41485},"Timestamp":1538441040323}
+{"Event":"SparkListenerTaskEnd","Stage ID":13,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"ExecutorLostFailure","Executor ID":"8","Exit Caused By App":true,"Loss Reason":"Container marked as failed: container_1538416563558_0014_01_000009 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000009\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"},"Task Info":{"Task ID":32,"Index":0,"Attempt":1,"Launch Time":1538441032741,"Executor ID":"8","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441040533,"Failed":true,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerExecutorRemoved","Timestamp":1538441040534,"Executor ID":"8","Removed Reason":"Container marked as failed: container_1538416563558_0014_01_000009 on host: rezamemory-2.gce.something.com. Exit status: 56. Diagnostics: Exception from container-launch.\nContainer id: container_1538416563558_0014_01_000009\nExit code: 56\nStack trace: ExitCodeException exitCode=56: \n\tat org.apache.hadoop.util.Shell.runCommand(Shell.java:601)\n\tat org.apache.hadoop.util.Shell.run(Shell.java:504)\n\tat org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:786)\n\tat org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:213)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)\n\tat org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\n\nContainer exited with a non-zero exit code 56\n"}
+{"Event":"SparkListenerTaskStart","Stage ID":13,"Stage Attempt ID":0,"Task Info":{"Task ID":34,"Index":0,"Attempt":2,"Launch Time":1538441042184,"Executor ID":"9","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":13,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":33,"Index":1,"Attempt":0,"Launch Time":1538441036144,"Executor ID":"9","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441042185,"Failed":false,"Killed":false,"Accumulables":[{"ID":348,"Name":"internal.metrics.input.recordsRead","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":347,"Name":"internal.metrics.input.bytesRead","Update":36,"Value":36,"Internal":true,"Count Failed Values":true},{"ID":334,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":333,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":332,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":331,"Name":"internal.metrics.jvmGCTime","Update":288,"Value":288,"Internal":true,"Count Failed Values":true},{"ID":330,"Name":"internal.metrics.resultSize","Update":1539,"Value":1539,"Internal":true,"Count Failed Values":true},{"ID":329,"Name":"internal.metrics.executorCpuTime","Update":1278640624,"Value":1278640624,"Internal":true,"Count Failed Values":true},{"ID":328,"Name":"internal.metrics.executorRunTime","Update":2796,"Value":2796,"Internal":true,"Count Failed Values":true},{"ID":327,"Name":"internal.metrics.executorDeserializeCpuTime","Update":720112530,"Value":720112530,"Internal":true,"Count Failed Values":true},{"ID":326,"Name":"internal.metrics.executorDeserializeTime","Update":2587,"Value":2587,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2587,"Executor Deserialize CPU Time":720112530,"Executor Run Time":2796,"Executor CPU Time":1278640624,"Result Size":1539,"JVM GC Time":288,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":36,"Records Read":2},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":13,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":34,"Index":0,"Attempt":2,"Launch Time":1538441042184,"Executor ID":"9","Host":"rezamemory-2.gce.something.com","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1538441042334,"Failed":false,"Killed":false,"Accumulables":[{"ID":348,"Name":"internal.metrics.input.recordsRead","Update":4,"Value":6,"Internal":true,"Count Failed Values":true},{"ID":347,"Name":"internal.metrics.input.bytesRead","Update":72,"Value":108,"Internal":true,"Count Failed Values":true},{"ID":334,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":333,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":330,"Name":"internal.metrics.resultSize","Update":1453,"Value":2992,"Internal":true,"Count Failed Values":true},{"ID":329,"Name":"internal.metrics.executorCpuTime","Update":69678739,"Value":1348319363,"Internal":true,"Count Failed Values":true},{"ID":328,"Name":"internal.metrics.executorRunTime","Update":118,"Value":2914,"Internal":true,"Count Failed Values":true},{"ID":327,"Name":"internal.metrics.executorDeserializeCpuTime","Update":6252896,"Value":726365426,"Internal":true,"Count Failed Values":true},{"ID":326,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":2593,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":6252896,"Executor Run Time":118,"Executor CPU Time":69678739,"Result Size":1453,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":72,"Records Read":4},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":13,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":266240264,"JVMOffHeapMemory":104976128,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":534126,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":534126,"OffHeapUnifiedMemory":0,"DirectPoolMemory":228407,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":5067235328,"ProcessTreeJVMRSSMemory":710475776,"ProcessTreePythonVMemory":408375296,"ProcessTreePythonRSSMemory":40284160,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"8","Stage ID":13,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":197860072,"JVMOffHeapMemory":57762424,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1088805,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1088805,"OffHeapUnifiedMemory":0,"DirectPoolMemory":25453,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3028791296,"ProcessTreeJVMRSSMemory":430297088,"ProcessTreePythonVMemory":286212096,"ProcessTreePythonRSSMemory":30441472,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"9","Stage ID":13,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":193766856,"JVMOffHeapMemory":59006656,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":1088805,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":1088805,"OffHeapUnifiedMemory":0,"DirectPoolMemory":20181,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":3016261632,"ProcessTreeJVMRSSMemory":405860352,"ProcessTreePythonVMemory":625926144,"ProcessTreePythonRSSMemory":69013504,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"sum at BisectingKMeansModel.scala:101","Number of Tasks":2,"RDD Info":[{"RDD ID":36,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"78\",\"name\":\"map\"}","Callsite":"map at BisectingKMeansModel.scala:101","Parent IDs":[35],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"77\",\"name\":\"mapPartitions\"}","Callsite":"mapPartitions at PythonMLLibAPI.scala:1346","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"PythonRDD","Callsite":"RDD at PythonRDD.scala:53","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"data/mllib/kmeans_data.txt","Scope":"{\"id\":\"0\",\"name\":\"textFile\"}","Callsite":"textFile at NativeMethodAccessorImpl.java:0","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:34)\norg.apache.spark.mllib.clustering.BisectingKMeansModel.computeCost(BisectingKMeansModel.scala:101)\norg.apache.spark.mllib.clustering.BisectingKMeansModel.computeCost(BisectingKMeansModel.scala:108)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\npy4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\npy4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\npy4j.Gateway.invoke(Gateway.java:282)\npy4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\npy4j.commands.CallCommand.execute(CallCommand.java:79)\npy4j.GatewayConnection.run(GatewayConnection.java:238)\njava.lang.Thread.run(Thread.java:745)","Submission Time":1538441026936,"Completion Time":1538441042335,"Accumulables":[{"ID":331,"Name":"internal.metrics.jvmGCTime","Value":288,"Internal":true,"Count Failed Values":true},{"ID":334,"Name":"internal.metrics.diskBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":328,"Name":"internal.metrics.executorRunTime","Value":2914,"Internal":true,"Count Failed Values":true},{"ID":327,"Name":"internal.metrics.executorDeserializeCpuTime","Value":726365426,"Internal":true,"Count Failed Values":true},{"ID":348,"Name":"internal.metrics.input.recordsRead","Value":6,"Internal":true,"Count Failed Values":true},{"ID":330,"Name":"internal.metrics.resultSize","Value":2992,"Internal":true,"Count Failed Values":true},{"ID":333,"Name":"internal.metrics.memoryBytesSpilled","Value":0,"Internal":true,"Count Failed Values":true},{"ID":332,"Name":"internal.metrics.resultSerializationTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":326,"Name":"internal.metrics.executorDeserializeTime","Value":2593,"Internal":true,"Count Failed Values":true},{"ID":347,"Name":"internal.metrics.input.bytesRead","Value":108,"Internal":true,"Count Failed Values":true},{"ID":329,"Name":"internal.metrics.executorCpuTime","Value":1348319363,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":7,"Completion Time":1538441042335,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1538441042338}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 7c9f8aba17f3..2a2d013bacbd 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -83,6 +83,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       .set("spark.testing", "true")
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .set("spark.eventLog.logStageExecutorMetrics.enabled", "true")
+      .set("spark.eventLog.logStageExecutorProcessTreeMetrics.enabled", "true")
     conf.setAll(extraConf)
     provider = new FsHistoryProvider(conf)
     provider.checkForLogs()
@@ -131,6 +132,8 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     "executor list json" -> "applications/local-1422981780767/executors",
     "executor list with executor metrics json" ->
       "applications/application_1506645932520_24630151/executors",
+    "executor list with executor process tree metrics json" ->
+      "applications/application_1538416563558_0014/executors",
     "stage list json" -> "applications/local-1422981780767/stages",
     "complete stage list json" -> "applications/local-1422981780767/stages?status=complete",
     "failed stage list json" -> "applications/local-1422981780767/stages?status=failed",
diff --git a/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala b/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
new file mode 100644
index 000000000000..9ed1497db5e1
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.executor
+
+import org.apache.spark.SparkFunSuite
+
+
+class ProcfsMetricsGetterSuite extends SparkFunSuite {
+
+  val p = new ProcfsMetricsGetter(getTestResourcePath("ProcfsMetrics"))
+
+  test("testGetProcessInfo") {
+    var r = ProcfsMetrics(0, 0, 0, 0, 0, 0)
+    r = p.addProcfsMetricsFromOneProcess(r, 26109)
+    assert(r.jvmVmemTotal == 4769947648L)
+    assert(r.jvmRSSTotal == 262610944)
+    assert(r.pythonVmemTotal == 0)
+    assert(r.pythonRSSTotal == 0)
+
+    r = p.addProcfsMetricsFromOneProcess(r, 22763)
+    assert(r.pythonVmemTotal == 360595456)
+    assert(r.pythonRSSTotal == 7831552)
+    assert(r.jvmVmemTotal == 4769947648L)
+    assert(r.jvmRSSTotal == 262610944)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index cecd6996df7b..0c04a93646d7 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -282,53 +282,67 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       // receive 3 metric updates from each executor with just stage 0 running,
       // with different peak updates for each executor
       createExecutorMetricsUpdateEvent(1,
-        new ExecutorMetrics(Array(4000L, 50L, 20L, 0L, 40L, 0L, 60L, 0L, 70L, 20L))),
+        new ExecutorMetrics(Array(4000L, 50L, 20L, 0L, 40L, 0L, 60L, 0L, 70L, 20L, 7500L, 3500L,
+          6500L, 2500L, 5500L, 1500L))),
       createExecutorMetricsUpdateEvent(2,
-        new ExecutorMetrics(Array(1500L, 50L, 20L, 0L, 0L, 0L, 20L, 0L, 70L, 0L))),
-      // exec 1: new stage 0 peaks for metrics at indexes: 2, 4, 6
+        new ExecutorMetrics(Array(1500L, 50L, 20L, 0L, 0L, 0L, 20L, 0L, 70L, 0L, 8500L, 3500L,
+          7500L, 2500L, 6500L, 1500L))),
+      // exec 1: new stage 0 peaks for metrics at indexes:  2, 4, 6
       createExecutorMetricsUpdateEvent(1,
-        new ExecutorMetrics(Array(4000L, 50L, 50L, 0L, 50L, 0L, 100L, 0L, 70L, 20L))),
+        new ExecutorMetrics(Array(4000L, 50L, 50L, 0L, 50L, 0L, 100L, 0L, 70L, 20L, 8000L, 4000L,
+          7000L, 3000L, 6000L, 2000L))),
       // exec 2: new stage 0 peaks for metrics at indexes: 0, 4, 6
       createExecutorMetricsUpdateEvent(2,
-        new ExecutorMetrics(Array(2000L, 50L, 10L, 0L, 10L, 0L, 30L, 0L, 70L, 0L))),
+        new ExecutorMetrics(Array(2000L, 50L, 10L, 0L, 10L, 0L, 30L, 0L, 70L, 0L, 9000L, 4000L,
+          8000L, 3000L, 7000L, 2000L))),
       // exec 1: new stage 0 peaks for metrics at indexes: 5, 7
       createExecutorMetricsUpdateEvent(1,
-        new ExecutorMetrics(Array(2000L, 40L, 50L, 0L, 40L, 10L, 90L, 10L, 50L, 0L))),
+        new ExecutorMetrics(Array(2000L, 40L, 50L, 0L, 40L, 10L, 90L, 10L, 50L, 0L, 8000L, 3500L,
+          7000L, 2500L, 6000L, 1500L))),
       // exec 2: new stage 0 peaks for metrics at indexes: 0, 5, 6, 7, 8
       createExecutorMetricsUpdateEvent(2,
-        new ExecutorMetrics(Array(3500L, 50L, 15L, 0L, 10L, 10L, 35L, 10L, 80L, 0L))),
+        new ExecutorMetrics(Array(3500L, 50L, 15L, 0L, 10L, 10L, 35L, 10L, 80L, 0L, 8500L, 3500L,
+          7500L, 2500L, 6500L, 1500L))),
       // now start stage 1, one more metric update for each executor, and new
       // peaks for some stage 1 metrics (as listed), initialize stage 1 peaks
       createStageSubmittedEvent(1),
       // exec 1: new stage 0 peaks for metrics at indexes: 0, 3, 7; initialize stage 1 peaks
       createExecutorMetricsUpdateEvent(1,
-        new ExecutorMetrics(Array(5000L, 30L, 50L, 20L, 30L, 10L, 80L, 30L, 50L, 0L))),
-      // exec 2: new stage 0 peaks for metrics at indexes: 0, 1, 2, 3, 6, 7, 9;
+        new ExecutorMetrics(Array(5000L, 30L, 50L, 20L, 30L, 10L, 80L, 30L, 50L,
+          0L, 5000L, 3000L, 4000L, 2000L, 3000L, 1000L))),
+      // exec 2: new stage 0 peaks for metrics at indexes: 0, 1, 3, 6, 7, 9;
       // initialize stage 1 peaks
       createExecutorMetricsUpdateEvent(2,
-        new ExecutorMetrics(Array(7000L, 70L, 50L, 20L, 0L, 10L, 50L, 30L, 10L, 40L))),
+        new ExecutorMetrics(Array(7000L, 70L, 50L, 20L, 0L, 10L, 50L, 30L, 10L,
+          40L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L))),
       // complete stage 0, and 3 more updates for each executor with just
       // stage 1 running
       createStageCompletedEvent(0),
       // exec 1: new stage 1 peaks for metrics at indexes: 0, 1, 3
       createExecutorMetricsUpdateEvent(1,
-        new ExecutorMetrics(Array(6000L, 70L, 20L, 30L, 10L, 0L, 30L, 30L, 30L, 0L))),
-      // enew ExecutorMetrics(xec 2: new stage 1 peaks for metrics at indexes: 3, 4, 7, 8
+        new ExecutorMetrics(Array(6000L, 70L, 20L, 30L, 10L, 0L, 30L, 30L, 30L, 0L, 5000L, 3000L,
+          4000L, 2000L, 3000L, 1000L))),
+      // exec 2: new stage 1 peaks for metrics at indexes: 3, 4, 7, 8
       createExecutorMetricsUpdateEvent(2,
-        new ExecutorMetrics(Array(5500L, 30L, 20L, 40L, 10L, 0L, 30L, 40L, 40L, 20L))),
+        new ExecutorMetrics(Array(5500L, 30L, 20L, 40L, 10L, 0L, 30L, 40L, 40L,
+          20L, 8000L, 5000L, 7000L, 4000L, 6000L, 3000L, 5000L, 2000L))),
       // exec 1: new stage 1 peaks for metrics at indexes: 0, 4, 5, 7
       createExecutorMetricsUpdateEvent(1,
-        new ExecutorMetrics(Array(7000L, 70L, 5L, 25L, 60L, 30L, 65L, 55L, 30L, 0L))),
+        new ExecutorMetrics(Array(7000L, 70L, 5L, 25L, 60L, 30L, 65L, 55L, 30L, 0L, 3000L, 2500L,
+          2000L, 1500L, 1000L, 500L))),
       // exec 2: new stage 1 peak for metrics at index: 7
       createExecutorMetricsUpdateEvent(2,
-        new ExecutorMetrics(Array(5500L, 40L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 20L))),
+        new ExecutorMetrics(Array(5500L, 40L, 25L, 30L, 10L, 30L, 35L, 60L, 0L,
+          20L, 7000L, 3000L, 6000L, 2000L, 5000L, 1000L))),
       // exec 1: no new stage 1 peaks
       createExecutorMetricsUpdateEvent(1,
-        new ExecutorMetrics(Array(5500L, 70L, 15L, 20L, 55L, 20L, 70L, 40L, 20L, 0L))),
+        new ExecutorMetrics(Array(5500L, 70L, 15L, 20L, 55L, 20L, 70L, 40L, 20L,
+          0L, 4000L, 2500L, 3000L, 1500L, 2000L, 500L))),
       createExecutorRemovedEvent(1),
       // exec 2: new stage 1 peak for metrics at index: 6
       createExecutorMetricsUpdateEvent(2,
-        new ExecutorMetrics(Array(4000L, 20L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 0L))),
+        new ExecutorMetrics(Array(4000L, 20L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 0L, 7000L,
+          4000L, 6000L, 3000L, 5000L, 2000L))),
       createStageCompletedEvent(1),
       SparkListenerApplicationEnd(1000L))
 
@@ -342,20 +356,23 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
 
     // expected StageExecutorMetrics, for the given stage id and executor id
     val expectedMetricsEvents: Map[(Int, String), SparkListenerStageExecutorMetrics] =
-      Map(
-        ((0, "1"),
-          new SparkListenerStageExecutorMetrics("1", 0, 0,
-            new ExecutorMetrics(Array(5000L, 50L, 50L, 20L, 50L, 10L, 100L, 30L, 70L, 20L)))),
-        ((0, "2"),
-          new SparkListenerStageExecutorMetrics("2", 0, 0,
-            new ExecutorMetrics(Array(7000L, 70L, 50L, 20L, 10L, 10L, 50L, 30L, 80L, 40L)))),
-        ((1, "1"),
-          new SparkListenerStageExecutorMetrics("1", 1, 0,
-            new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 80L, 55L, 50L, 0L)))),
-        ((1, "2"),
-          new SparkListenerStageExecutorMetrics("2", 1, 0,
-            new ExecutorMetrics(Array(7000L, 70L, 50L, 40L, 10L, 30L, 50L, 60L, 40L, 40L)))))
-
+    Map(
+      ((0, "1"),
+        new SparkListenerStageExecutorMetrics("1", 0, 0,
+          new ExecutorMetrics(Array(5000L, 50L, 50L, 20L, 50L, 10L, 100L, 30L,
+            70L, 20L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L)))),
+      ((0, "2"),
+        new SparkListenerStageExecutorMetrics("2", 0, 0,
+          new ExecutorMetrics(Array(7000L, 70L, 50L, 20L, 10L, 10L, 50L, 30L,
+            80L, 40L, 9000L, 4000L, 8000L, 3000L, 7000L, 2000L)))),
+      ((1, "1"),
+        new SparkListenerStageExecutorMetrics("1", 1, 0,
+          new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 80L, 55L,
+            50L, 0L, 5000L, 3000L, 4000L, 2000L, 3000L, 1000L)))),
+      ((1, "2"),
+        new SparkListenerStageExecutorMetrics("2", 1, 0,
+          new ExecutorMetrics(Array(7000L, 70L, 50L, 40L, 10L, 30L, 50L, 60L,
+            40L, 40L, 8000L, 5000L, 7000L, 4000L, 6000L, 3000L)))))
     // Verify the log file contains the expected events.
     // Posted events should be logged, except for ExecutorMetricsUpdate events -- these
     // are consolidated, and the peak values for each stage are logged at stage end.
@@ -456,9 +473,9 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
               assert(executorMetrics.execId === expectedMetrics.execId)
               assert(executorMetrics.stageId === expectedMetrics.stageId)
               assert(executorMetrics.stageAttemptId === expectedMetrics.stageAttemptId)
-              ExecutorMetricType.values.foreach { metricType =>
-                assert(executorMetrics.executorMetrics.getMetricValue(metricType) ===
-                  expectedMetrics.executorMetrics.getMetricValue(metricType))
+              ExecutorMetricType.metricToOffset.foreach { metric =>
+                assert(executorMetrics.executorMetrics.getMetricValue(metric._1) ===
+                  expectedMetrics.executorMetrics.getMetricValue(metric._1))
               }
             case None =>
               assert(false)
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 61fec8c1d0e4..71eeb0480245 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -1367,58 +1367,74 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     // receive 3 metric updates from each executor with just stage 0 running,
     // with different peak updates for each executor
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(1,
-      Array(4000L, 50L, 20L, 0L, 40L, 0L, 60L, 0L, 70L, 20L)))
+      Array(4000L, 50L, 20L, 0L, 40L, 0L, 60L, 0L, 70L, 20L, 7500L, 3500L,
+        6500L, 2500L, 5500L, 1500L)))
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(2,
-      Array(1500L, 50L, 20L, 0L, 0L, 0L, 20L, 0L, 70L, 0L)))
+      Array(1500L, 50L, 20L, 0L, 0L, 0L, 20L, 0L, 70L, 0L, 8500L, 3500L,
+        7500L, 2500L, 6500L, 1500L)))
     // exec 1: new stage 0 peaks for metrics at indexes: 2, 4, 6
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(1,
-      Array(4000L, 50L, 50L, 0L, 50L, 0L, 100L, 0L, 70L, 20L)))
+      Array(4000L, 50L, 50L, 0L, 50L, 0L, 100L, 0L, 70L, 20L, 8000L, 4000L,
+        7000L, 3000L, 6000L, 2000L)))
     // exec 2: new stage 0 peaks for metrics at indexes: 0, 4, 6
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(2,
-      Array(2000L, 50L, 10L, 0L, 10L, 0L, 30L, 0L, 70L, 0L)))
+      Array(2000L, 50L, 10L, 0L, 10L, 0L, 30L, 0L, 70L, 0L, 9000L, 4000L,
+        8000L, 3000L, 7000L, 2000L)))
     // exec 1: new stage 0 peaks for metrics at indexes: 5, 7
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(1,
-      Array(2000L, 40L, 50L, 0L, 40L, 10L, 90L, 10L, 50L, 0L)))
+      Array(2000L, 40L, 50L, 0L, 40L, 10L, 90L, 10L, 50L, 0L, 8000L, 3500L,
+        7000L, 2500L, 6000L, 1500L)))
     // exec 2: new stage 0 peaks for metrics at indexes: 0, 5, 6, 7, 8
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(2,
-      Array(3500L, 50L, 15L, 0L, 10L, 10L, 35L, 10L, 80L, 0L)))
+      Array(3500L, 50L, 15L, 0L, 10L, 10L, 35L, 10L, 80L, 0L, 8500L, 3500L,
+        7500L, 2500L, 6500L, 1500L)))
     // now start stage 1, one more metric update for each executor, and new
     // peaks for some stage 1 metrics (as listed), initialize stage 1 peaks
     listener.onStageSubmitted(createStageSubmittedEvent(1))
     // exec 1: new stage 0 peaks for metrics at indexes: 0, 3, 7
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(1,
-      Array(5000L, 30L, 50L, 20L, 30L, 10L, 80L, 30L, 50L, 0L)))
+      Array(5000L, 30L, 50L, 20L, 30L, 10L, 80L, 30L, 50L, 0L, 5000L, 3000L,
+        4000L, 2000L, 3000L, 1000L)))
     // exec 2: new stage 0 peaks for metrics at indexes: 0, 1, 2, 3, 6, 7, 9
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(2,
-      Array(7000L, 80L, 50L, 20L, 0L, 10L, 50L, 30L, 10L, 40L)))
+      Array(7000L, 80L, 50L, 20L, 0L, 10L, 50L, 30L, 10L, 40L, 8000L, 4000L,
+        7000L, 3000L, 6000L, 2000L)))
     // complete stage 0, and 3 more updates for each executor with just
     // stage 1 running
     listener.onStageCompleted(createStageCompletedEvent(0))
     // exec 1: new stage 1 peaks for metrics at indexes: 0, 1, 3
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(1,
-      Array(6000L, 70L, 20L, 30L, 10L, 0L, 30L, 30L, 30L, 0L)))
+      Array(6000L, 70L, 20L, 30L, 10L, 0L, 30L, 30L, 30L, 0L, 5000L, 3000L,
+        4000L, 2000L, 3000L, 1000L)))
     // exec 2: new stage 1 peaks for metrics at indexes: 3, 4, 7, 8
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(2,
-      Array(5500L, 30L, 20L, 40L, 10L, 0L, 30L, 40L, 40L, 20L)))
+      Array(5500L, 30L, 20L, 40L, 10L, 0L, 30L, 40L, 40L, 20L, 8000L, 5000L,
+        7000L, 4000L, 6000L, 3000L)))
     // exec 1: new stage 1 peaks for metrics at indexes: 0, 4, 5, 7
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(1,
-      Array(7000L, 70L, 5L, 25L, 60L, 30L, 65L, 55L, 30L, 0L)))
+      Array(7000L, 70L, 5L, 25L, 60L, 30L, 65L, 55L, 30L, 0L, 3000L, 2500L, 2000L,
+        1500L, 1000L, 500L)))
     // exec 2: new stage 1 peak for metrics at index: 7
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(2,
-      Array(5500L, 40L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 20L)))
+      Array(5500L, 40L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 20L, 7000L, 3000L,
+        6000L, 2000L, 5000L, 1000L)))
     // exec 1: no new stage 1 peaks
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(1,
-      Array(5500L, 70L, 15L, 20L, 55L, 20L, 70L, 40L, 20L, 0L)))
+      Array(5500L, 70L, 15L, 20L, 55L, 20L, 70L, 40L, 20L, 0L, 4000L, 2500L,
+        3000L, 1500, 2000L, 500L)))
     listener.onExecutorRemoved(createExecutorRemovedEvent(1))
     // exec 2: new stage 1 peak for metrics at index: 6
     listener.onExecutorMetricsUpdate(createExecutorMetricsUpdateEvent(2,
-      Array(4000L, 20L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 0L)))
+      Array(4000L, 20L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 0L, 7000L, 4000L, 6000L,
+        3000L, 5000L, 2000L)))
     listener.onStageCompleted(createStageCompletedEvent(1))
 
     // expected peak values for each executor
     val expectedValues = Map(
-      "1" -> new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 100L, 55L, 70L, 20L)),
-      "2" -> new ExecutorMetrics(Array(7000L, 80L, 50L, 40L, 10L, 30L, 50L, 60L, 80L, 40L)))
+      "1" -> new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 100L, 55L,
+        70L, 20L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L)),
+      "2" -> new ExecutorMetrics(Array(7000L, 80L, 50L, 40L, 10L, 30L, 50L, 60L,
+        80L, 40L, 9000L, 5000L, 8000L, 4000L, 7000L, 3000L)))
 
     // check that the stored peak values match the expected values
     expectedValues.foreach { case (id, metrics) =>
@@ -1426,8 +1442,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
         assert(exec.info.id === id)
         exec.info.peakMemoryMetrics match {
           case Some(actual) =>
-            ExecutorMetricType.values.foreach { metricType =>
-              assert(actual.getMetricValue(metricType) === metrics.getMetricValue(metricType))
+            ExecutorMetricType.metricToOffset.foreach { metric =>
+              assert(actual.getMetricValue(metric._1) === metrics.getMetricValue(metric._1))
             }
           case _ =>
             assert(false)
@@ -1446,23 +1462,29 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     listener.onStageSubmitted(createStageSubmittedEvent(0))
     listener.onStageSubmitted(createStageSubmittedEvent(1))
     listener.onStageExecutorMetrics(SparkListenerStageExecutorMetrics("1", 0, 0,
-      new ExecutorMetrics(Array(5000L, 50L, 50L, 20L, 50L, 10L, 100L, 30L, 70L, 20L))))
+      new ExecutorMetrics(Array(5000L, 50L, 50L, 20L, 50L, 10L, 100L, 30L,
+        70L, 20L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L))))
     listener.onStageExecutorMetrics(SparkListenerStageExecutorMetrics("2", 0, 0,
-      new ExecutorMetrics(Array(7000L, 70L, 50L, 20L, 10L, 10L, 50L, 30L, 80L, 40L))))
+      new ExecutorMetrics(Array(7000L, 70L, 50L, 20L, 10L, 10L, 50L, 30L, 80L, 40L, 9000L,
+        4000L, 8000L, 3000L, 7000L, 2000L))))
      listener.onStageCompleted(createStageCompletedEvent(0))
     // executor 1 is removed before stage 1 has finished, the stage executor metrics
     // are logged afterwards and should still be used to update the executor metrics.
     listener.onExecutorRemoved(createExecutorRemovedEvent(1))
     listener.onStageExecutorMetrics(SparkListenerStageExecutorMetrics("1", 1, 0,
-      new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 80L, 55L, 50L, 0L))))
+      new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 80L, 55L, 50L, 0L, 5000L, 3000L,
+        4000L, 2000L, 3000L, 1000L))))
     listener.onStageExecutorMetrics(SparkListenerStageExecutorMetrics("2", 1, 0,
-      new ExecutorMetrics(Array(7000L, 80L, 50L, 40L, 10L, 30L, 50L, 60L, 40L, 40L))))
+      new ExecutorMetrics(Array(7000L, 80L, 50L, 40L, 10L, 30L, 50L, 60L, 40L, 40L, 8000L, 5000L,
+        7000L, 4000L, 6000L, 3000L))))
     listener.onStageCompleted(createStageCompletedEvent(1))
 
     // expected peak values for each executor
     val expectedValues = Map(
-      "1" -> new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 100L, 55L, 70L, 20L)),
-      "2" -> new ExecutorMetrics(Array(7000L, 80L, 50L, 40L, 10L, 30L, 50L, 60L, 80L, 40L)))
+      "1" -> new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 100L, 55L,
+        70L, 20L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L)),
+      "2" -> new ExecutorMetrics(Array(7000L, 80L, 50L, 40L, 10L, 30L, 50L, 60L,
+        80L, 40L, 9000L, 5000L, 8000L, 4000L, 7000L, 3000L)))
 
     // check that the stored peak values match the expected values
     for ((id, metrics) <- expectedValues) {
@@ -1470,8 +1492,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
         assert(exec.info.id === id)
         exec.info.peakMemoryMetrics match {
           case Some(actual) =>
-            ExecutorMetricType.values.foreach { metricType =>
-              assert(actual.getMetricValue(metricType) === metrics.getMetricValue(metricType))
+            ExecutorMetricType.metricToOffset.foreach { metric =>
+              assert(actual.getMetricValue(metric._1) === metrics.getMetricValue(metric._1))
             }
           case _ =>
             assert(false)
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 1e0d2af9a471..303ca7cb8801 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -96,7 +96,8 @@ class JsonProtocolSuite extends SparkFunSuite {
           .accumulators().map(AccumulatorSuite.makeInfo)
           .zipWithIndex.map { case (a, i) => a.copy(id = i) }
       val executorUpdates = new ExecutorMetrics(
-        Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L, 321L, 654L, 765L))
+        Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L,
+          321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L))
       SparkListenerExecutorMetricsUpdate("exec3", Seq((1L, 2, 3, accumUpdates)),
         Some(executorUpdates))
     }
@@ -105,8 +106,8 @@ class JsonProtocolSuite extends SparkFunSuite {
         "In your multitude...", 300), RDDBlockId(0, 0), StorageLevel.MEMORY_ONLY, 100L, 0L))
     val stageExecutorMetrics =
       SparkListenerStageExecutorMetrics("1", 2, 3,
-        new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L, 321L, 654L, 765L)))
-
+        new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L,
+          321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L)))
     testEvent(stageSubmitted, stageSubmittedJsonString)
     testEvent(stageCompleted, stageCompletedJsonString)
     testEvent(taskStart, taskStartJsonString)
@@ -440,14 +441,14 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("executorMetricsFromJson backward compatibility: handle missing metrics") {
     // any missing metrics should be set to 0
-    val executorMetrics = new ExecutorMetrics(
-      Array(12L, 23L, 45L, 67L, 78L, 89L, 90L, 123L, 456L, 789L))
+    val executorMetrics = new ExecutorMetrics(Array(12L, 23L, 45L, 67L, 78L, 89L,
+      90L, 123L, 456L, 789L, 40L, 20L, 20L, 10L, 20L, 10L))
     val oldExecutorMetricsJson =
       JsonProtocol.executorMetricsToJson(executorMetrics)
         .removeField( _._1 == "MappedPoolMemory")
-    val expectedExecutorMetrics = new ExecutorMetrics(
-      Array(12L, 23L, 45L, 67L, 78L, 89L, 90L, 123L, 456L, 0L))
-    assertEquals(expectedExecutorMetrics,
+    val exepectedExecutorMetrics = new ExecutorMetrics(Array(12L, 23L, 45L, 67L,
+      78L, 89L, 90L, 123L, 456L, 0L, 40L, 20L, 20L, 10L, 20L, 10L))
+    assertEquals(exepectedExecutorMetrics,
       JsonProtocol.executorMetricsFromJson(oldExecutorMetricsJson))
   }
 
@@ -753,9 +754,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       assertStackTraceElementEquals)
   }
 
-  private def assertEquals(metrics1: ExecutorMetrics, metrics2: ExecutorMetrics) {
-    ExecutorMetricType.values.foreach { metricType =>
-      assert(metrics1.getMetricValue(metricType) === metrics2.getMetricValue(metricType))
+  private def assertEquals(metrics1: ExecutorMetrics, metrics2: ExecutorMetrics): Unit = {
+    ExecutorMetricType.metricToOffset.foreach { metric =>
+      assert(metrics1.getMetricValue(metric._1) === metrics2.getMetricValue(metric._1))
     }
   }
 
@@ -872,13 +873,14 @@ private[spark] object JsonProtocolSuite extends Assertions {
       if (includeTaskMetrics) {
         Seq((1L, 1, 1, Seq(makeAccumulableInfo(1, false, false, None),
           makeAccumulableInfo(2, false, false, None))))
-      } else {
+       } else {
         Seq()
       }
     val executorMetricsUpdate =
       if (includeExecutorMetrics) {
-        Some(new ExecutorMetrics(Array(123456L, 543L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)))
-       } else {
+        Some(new ExecutorMetrics(Array(123456L, 543L, 0L, 0L, 0L, 0L, 0L,
+          0L, 0L, 0L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L)))
+      } else {
         None
       }
     SparkListenerExecutorMetricsUpdate(execId, taskMetrics, executorMetricsUpdate)
@@ -2082,7 +2084,13 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "OnHeapUnifiedMemory" : 432,
       |    "OffHeapUnifiedMemory" : 321,
       |    "DirectPoolMemory" : 654,
-      |    "MappedPoolMemory" : 765
+      |    "MappedPoolMemory" : 765,
+      |    "ProcessTreeJVMVMemory": 256912,
+      |    "ProcessTreeJVMRSSMemory": 123456,
+      |    "ProcessTreePythonVMemory": 123456,
+      |    "ProcessTreePythonRSSMemory": 61728,
+      |    "ProcessTreeOtherVMemory": 30364,
+      |    "ProcessTreeOtherRSSMemory": 15182
       |  }
       |
       |}
@@ -2105,7 +2113,13 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "OnHeapUnifiedMemory" : 432,
       |    "OffHeapUnifiedMemory" : 321,
       |    "DirectPoolMemory" : 654,
-      |    "MappedPoolMemory" : 765
+      |    "MappedPoolMemory" : 765,
+      |    "ProcessTreeJVMVMemory": 256912,
+      |    "ProcessTreeJVMRSSMemory": 123456,
+      |    "ProcessTreePythonVMemory": 123456,
+      |    "ProcessTreePythonRSSMemory": 61728,
+      |    "ProcessTreeOtherVMemory": 30364,
+      |    "ProcessTreeOtherRSSMemory": 15182
       |  }
       |}
     """.stripMargin
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 777950016801..8239cbc3a381 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -82,6 +82,8 @@ app-20161115172038-0000
 app-20161116163331-0000
 application_1516285256255_0012
 application_1506645932520_24630151
+application_1538416563558_0014
+stat
 local-1422981759269
 local-1422981780767
 local-1425081759269

From 0a37da68e1cbca0e0120beab916309898022ba85 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 10 Dec 2018 12:04:44 -0800
Subject: [PATCH 0105/1072] [SPARK-26317][BUILD] Upgrade SBT to 0.13.18

## What changes were proposed in this pull request?

SBT 0.13.14 ~ 1.1.1 has a bug on accessing `java.util.Base64.getDecoder` with JDK9+. It's fixed at 1.1.2 and backported to [0.13.18 (released on Nov 28th)](https://github.com/sbt/sbt/releases/tag/v0.13.18). This PR aims to update SBT.

## How was this patch tested?

Pass the Jenkins with the building and existing tests.

Closes #23270 from dongjoon-hyun/SPARK-26317.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/build.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/build.properties b/project/build.properties
index d03985d980ec..23aa187fb35a 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=0.13.17
+sbt.version=0.13.18

From 82c1ac48a37bcc929db86515bffd602c381415be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=94=B0=E7=94=B000222924?=
 <han.tiantian@zte.com.cn>
Date: Mon, 10 Dec 2018 18:27:01 -0600
Subject: [PATCH 0106/1072] =?UTF-8?q?[SPARK-25696]=20The=20storage=20memor?=
 =?UTF-8?q?y=20displayed=20on=20spark=20Application=20UI=20is=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

… incorrect.

## What changes were proposed in this pull request?
In the reported heartbeat information, the unit of the memory data is bytes, which is converted by the formatBytes() function in the utils.js file before being displayed in the interface. The cardinality of the unit conversion in the formatBytes function is 1000, which should be 1024.
Change the cardinality of the unit conversion in the formatBytes function to 1024.

## How was this patch tested?
 manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #22683 from httfighter/SPARK-25696.

Lead-authored-by: 韩田田00222924 <han.tiantian@zte.com.cn>
Co-authored-by: han.tiantian@zte.com.cn <han.tiantian@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 R/pkg/R/context.R                             |  2 +-
 R/pkg/R/mllib_tree.R                          |  6 +--
 .../org/apache/spark/ui/static/utils.js       |  4 +-
 .../scala/org/apache/spark/SparkContext.scala |  4 +-
 .../spark/serializer/KryoSerializer.scala     |  4 +-
 .../scala/org/apache/spark/util/Utils.scala   | 46 +++++++++----------
 .../apache/spark/MapOutputTrackerSuite.scala  |  2 +-
 .../serializer/KryoSerializerSuite.scala      |  2 +-
 .../apache/spark/storage/DiskStoreSuite.scala |  2 +-
 .../org/apache/spark/util/UtilsSuite.scala    | 18 ++++----
 docs/configuration.md                         |  6 +--
 docs/hardware-provisioning.md                 |  4 +-
 docs/mllib-decision-tree.md                   |  2 +-
 docs/running-on-mesos.md                      |  2 +-
 docs/spark-standalone.md                      |  4 +-
 docs/streaming-kinesis-integration.md         |  2 +-
 docs/tuning.md                                | 10 ++--
 .../linalg/distributed/BlockMatrix.scala      |  2 +-
 .../optimization/GradientDescentSuite.scala   |  2 +-
 python/pyspark/rdd.py                         |  2 +-
 python/pyspark/shuffle.py                     |  4 +-
 .../spark/deploy/yarn/YarnAllocator.scala     |  2 +-
 .../expressions/NullExpressionsSuite.scala    |  4 +-
 .../catalyst/expressions/OrderingSuite.scala  |  2 +-
 .../exchange/ExchangeCoordinator.scala        | 14 +++---
 .../execution/python/WindowInPandasExec.scala |  2 +-
 .../sql/execution/window/WindowExec.scala     |  2 +-
 .../org/apache/spark/sql/DataFrameSuite.scala |  2 +-
 .../spark/sql/StatisticsCollectionSuite.scala | 12 ++---
 29 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index e99136723f65..0207f249f9aa 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -87,7 +87,7 @@ objectFile <- function(sc, path, minPartitions = NULL) {
 #' in the list are split into \code{numSlices} slices and distributed to nodes
 #' in the cluster.
 #'
-#' If size of serialized slices is larger than spark.r.maxAllocationLimit or (200MB), the function
+#' If size of serialized slices is larger than spark.r.maxAllocationLimit or (200MiB), the function
 #' will write it to disk and send the file name to JVM. Also to make sure each slice is not
 #' larger than that limit, number of slices may be increased.
 #'
diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R
index 0e60842dd44c..9844061cfd07 100644
--- a/R/pkg/R/mllib_tree.R
+++ b/R/pkg/R/mllib_tree.R
@@ -157,7 +157,7 @@ print.summary.decisionTree <- function(x) {
 #' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
 #'                           Note: this setting will be ignored if the checkpoint directory is not
 #'                           set.
-#' @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation.
+#' @param maxMemoryInMB Maximum memory in MiB allocated to histogram aggregation.
 #' @param cacheNodeIds If FALSE, the algorithm will pass trees to executors to match instances with
 #'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
 #'                     can speed up training of deeper trees. Users can set how often should the
@@ -382,7 +382,7 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
 #' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
 #'                           Note: this setting will be ignored if the checkpoint directory is not
 #'                           set.
-#' @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation.
+#' @param maxMemoryInMB Maximum memory in MiB allocated to histogram aggregation.
 #' @param cacheNodeIds If FALSE, the algorithm will pass trees to executors to match instances with
 #'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
 #'                     can speed up training of deeper trees. Users can set how often should the
@@ -588,7 +588,7 @@ setMethod("write.ml", signature(object = "RandomForestClassificationModel", path
 #' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
 #'                           Note: this setting will be ignored if the checkpoint directory is not
 #'                           set.
-#' @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation.
+#' @param maxMemoryInMB Maximum memory in MiB allocated to histogram aggregation.
 #' @param cacheNodeIds If FALSE, the algorithm will pass trees to executors to match instances with
 #'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
 #'                     can speed up training of deeper trees. Users can set how often should the
diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js
index deeafad4eb5f..22985e31a780 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/utils.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js
@@ -40,9 +40,9 @@ function formatDuration(milliseconds) {
 function formatBytes(bytes, type) {
     if (type !== 'display') return bytes;
     if (bytes == 0) return '0.0 B';
-    var k = 1000;
+    var k = 1024;
     var dm = 1;
-    var sizes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'];
+    var sizes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'];
     var i = Math.floor(Math.log(bytes) / Math.log(k));
     return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' + sizes[i];
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 845a3d5f6d6f..696dafda6d1e 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1043,7 +1043,7 @@ class SparkContext(config: SparkConf) extends Logging {
     // See SPARK-11227 for details.
     FileSystem.getLocal(hadoopConfiguration)
 
-    // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
+    // A Hadoop configuration can be about 10 KiB, which is pretty big, so broadcast it.
     val confBroadcast = broadcast(new SerializableConfiguration(hadoopConfiguration))
     val setInputPathsFunc = (jobConf: JobConf) => FileInputFormat.setInputPaths(jobConf, path)
     new HadoopRDD(
@@ -2723,7 +2723,7 @@ object SparkContext extends Logging {
         val memoryPerSlaveInt = memoryPerSlave.toInt
         if (sc.executorMemory > memoryPerSlaveInt) {
           throw new SparkException(
-            "Asked to launch cluster with %d MB RAM / worker but requested %d MB/worker".format(
+            "Asked to launch cluster with %d MiB RAM / worker but requested %d MiB/worker".format(
               memoryPerSlaveInt, sc.executorMemory))
         }
 
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 1e1c27c47787..72ca0fbe667e 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -62,14 +62,14 @@ class KryoSerializer(conf: SparkConf)
 
   if (bufferSizeKb >= ByteUnit.GiB.toKiB(2)) {
     throw new IllegalArgumentException("spark.kryoserializer.buffer must be less than " +
-      s"2048 mb, got: + ${ByteUnit.KiB.toMiB(bufferSizeKb)} mb.")
+      s"2048 MiB, got: + ${ByteUnit.KiB.toMiB(bufferSizeKb)} MiB.")
   }
   private val bufferSize = ByteUnit.KiB.toBytes(bufferSizeKb).toInt
 
   val maxBufferSizeMb = conf.getSizeAsMb("spark.kryoserializer.buffer.max", "64m").toInt
   if (maxBufferSizeMb >= ByteUnit.GiB.toMiB(2)) {
     throw new IllegalArgumentException("spark.kryoserializer.buffer.max must be less than " +
-      s"2048 mb, got: + $maxBufferSizeMb mb.")
+      s"2048 MiB, got: + $maxBufferSizeMb MiB.")
   }
   private val maxBufferSize = ByteUnit.MiB.toBytes(maxBufferSizeMb).toInt
 
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 227c9e734f0a..b4ea1ee95021 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1092,41 +1092,41 @@ private[spark] object Utils extends Logging {
    * Convert a Java memory parameter passed to -Xmx (such as 300m or 1g) to a number of mebibytes.
    */
   def memoryStringToMb(str: String): Int = {
-    // Convert to bytes, rather than directly to MB, because when no units are specified the unit
+    // Convert to bytes, rather than directly to MiB, because when no units are specified the unit
     // is assumed to be bytes
     (JavaUtils.byteStringAsBytes(str) / 1024 / 1024).toInt
   }
 
   /**
-   * Convert a quantity in bytes to a human-readable string such as "4.0 MB".
+   * Convert a quantity in bytes to a human-readable string such as "4.0 MiB".
    */
   def bytesToString(size: Long): String = bytesToString(BigInt(size))
 
   def bytesToString(size: BigInt): String = {
-    val EB = 1L << 60
-    val PB = 1L << 50
-    val TB = 1L << 40
-    val GB = 1L << 30
-    val MB = 1L << 20
-    val KB = 1L << 10
-
-    if (size >= BigInt(1L << 11) * EB) {
+    val EiB = 1L << 60
+    val PiB = 1L << 50
+    val TiB = 1L << 40
+    val GiB = 1L << 30
+    val MiB = 1L << 20
+    val KiB = 1L << 10
+
+    if (size >= BigInt(1L << 11) * EiB) {
       // The number is too large, show it in scientific notation.
       BigDecimal(size, new MathContext(3, RoundingMode.HALF_UP)).toString() + " B"
     } else {
       val (value, unit) = {
-        if (size >= 2 * EB) {
-          (BigDecimal(size) / EB, "EB")
-        } else if (size >= 2 * PB) {
-          (BigDecimal(size) / PB, "PB")
-        } else if (size >= 2 * TB) {
-          (BigDecimal(size) / TB, "TB")
-        } else if (size >= 2 * GB) {
-          (BigDecimal(size) / GB, "GB")
-        } else if (size >= 2 * MB) {
-          (BigDecimal(size) / MB, "MB")
-        } else if (size >= 2 * KB) {
-          (BigDecimal(size) / KB, "KB")
+        if (size >= 2 * EiB) {
+          (BigDecimal(size) / EiB, "EiB")
+        } else if (size >= 2 * PiB) {
+          (BigDecimal(size) / PiB, "PiB")
+        } else if (size >= 2 * TiB) {
+          (BigDecimal(size) / TiB, "TiB")
+        } else if (size >= 2 * GiB) {
+          (BigDecimal(size) / GiB, "GiB")
+        } else if (size >= 2 * MiB) {
+          (BigDecimal(size) / MiB, "MiB")
+        } else if (size >= 2 * KiB) {
+          (BigDecimal(size) / KiB, "KiB")
         } else {
           (BigDecimal(size), "B")
         }
@@ -1157,7 +1157,7 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Convert a quantity in megabytes to a human-readable string such as "4.0 MB".
+   * Convert a quantity in megabytes to a human-readable string such as "4.0 MiB".
    */
   def megabytesToString(megabytes: Long): String = {
     bytesToString(megabytes * 1024L * 1024L)
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 21f481d47724..3e1a3d4f7306 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -244,7 +244,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val newConf = new SparkConf
     newConf.set("spark.rpc.message.maxSize", "1")
     newConf.set("spark.rpc.askTimeout", "1") // Fail fast
-    newConf.set("spark.shuffle.mapOutput.minSizeForBroadcast", "10240") // 10 KB << 1MB framesize
+    newConf.set("spark.shuffle.mapOutput.minSizeForBroadcast", "10240") // 10 KiB << 1MiB framesize
 
     // needs TorrentBroadcast so need a SparkContext
     withSpark(new SparkContext("local", "MapOutputTrackerSuite", newConf)) { sc =>
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 467e49026a02..8af53274d9b2 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -75,7 +75,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     val thrown3 = intercept[IllegalArgumentException](newKryoInstance(conf, "2g", "3g"))
     assert(thrown3.getMessage.contains(kryoBufferProperty))
     assert(!thrown3.getMessage.contains(kryoBufferMaxProperty))
-    // test configuration with mb is supported properly
+    // test configuration with MiB is supported properly
     newKryoInstance(conf, "8m", "9m")
   }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
index 959cf58fa053..6f60b08088cd 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
@@ -128,7 +128,7 @@ class DiskStoreSuite extends SparkFunSuite {
 
     assert(e.getMessage ===
       s"requirement failed: can't create a byte buffer of size ${blockData.size}" +
-      " since it exceeds 10.0 KB.")
+      " since it exceeds 10.0 KiB.")
   }
 
   test("block data encryption") {
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 901a724da8a1..b2ff1cce3eb0 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -133,7 +133,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(Utils.byteStringAsBytes("1p") === ByteUnit.PiB.toBytes(1))
 
     // Overflow handling, 1073741824p exceeds Long.MAX_VALUE if converted straight to Bytes
-    // This demonstrates that we can have e.g 1024^3 PB without overflowing.
+    // This demonstrates that we can have e.g 1024^3 PiB without overflowing.
     assert(Utils.byteStringAsGb("1073741824p") === ByteUnit.PiB.toGiB(1073741824))
     assert(Utils.byteStringAsMb("1073741824p") === ByteUnit.PiB.toMiB(1073741824))
 
@@ -149,7 +149,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
 
     // Test overflow exception
     intercept[IllegalArgumentException] {
-      // This value exceeds Long.MAX when converted to TB
+      // This value exceeds Long.MAX when converted to TiB
       ByteUnit.PiB.toTiB(9223372036854775807L)
     }
 
@@ -189,13 +189,13 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
   test("bytesToString") {
     assert(Utils.bytesToString(10) === "10.0 B")
     assert(Utils.bytesToString(1500) === "1500.0 B")
-    assert(Utils.bytesToString(2000000) === "1953.1 KB")
-    assert(Utils.bytesToString(2097152) === "2.0 MB")
-    assert(Utils.bytesToString(2306867) === "2.2 MB")
-    assert(Utils.bytesToString(5368709120L) === "5.0 GB")
-    assert(Utils.bytesToString(5L * (1L << 40)) === "5.0 TB")
-    assert(Utils.bytesToString(5L * (1L << 50)) === "5.0 PB")
-    assert(Utils.bytesToString(5L * (1L << 60)) === "5.0 EB")
+    assert(Utils.bytesToString(2000000) === "1953.1 KiB")
+    assert(Utils.bytesToString(2097152) === "2.0 MiB")
+    assert(Utils.bytesToString(2306867) === "2.2 MiB")
+    assert(Utils.bytesToString(5368709120L) === "5.0 GiB")
+    assert(Utils.bytesToString(5L * (1L << 40)) === "5.0 TiB")
+    assert(Utils.bytesToString(5L * (1L << 50)) === "5.0 PiB")
+    assert(Utils.bytesToString(5L * (1L << 60)) === "5.0 EiB")
     assert(Utils.bytesToString(BigInt(1L << 11) * (1L << 60)) === "2.36E+21 B")
   }
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 9abbb3f63490..ff9b802617f0 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1384,14 +1384,14 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.files.maxPartitionBytes</code></td>
-  <td>134217728 (128 MB)</td>
+  <td>134217728 (128 MiB)</td>
   <td>
     The maximum number of bytes to pack into a single partition when reading files.
   </td>
 </tr>
 <tr>
   <td><code>spark.files.openCostInBytes</code></td>
-  <td>4194304 (4 MB)</td>
+  <td>4194304 (4 MiB)</td>
   <td>
     The estimated cost to open a file, measured by the number of bytes could be scanned at the same
     time. This is used when putting multiple files into a partition. It is better to overestimate,
@@ -1445,7 +1445,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.rpc.message.maxSize</code></td>
   <td>128</td>
   <td>
-    Maximum message size (in MB) to allow in "control plane" communication; generally only applies to map
+    Maximum message size (in MiB) to allow in "control plane" communication; generally only applies to map
     output size information sent between executors and the driver. Increase this if you are running
     jobs with many thousands of map and reduce tasks and see messages about the RPC message size.
   </td>
diff --git a/docs/hardware-provisioning.md b/docs/hardware-provisioning.md
index 896f9302ef30..29876a51b280 100644
--- a/docs/hardware-provisioning.md
+++ b/docs/hardware-provisioning.md
@@ -37,7 +37,7 @@ use the same disks as HDFS.
 
 # Memory
 
-In general, Spark can run well with anywhere from **8 GB to hundreds of gigabytes** of memory per
+In general, Spark can run well with anywhere from **8 GiB to hundreds of gigabytes** of memory per
 machine. In all cases, we recommend allocating only at most 75% of the memory for Spark; leave the
 rest for the operating system and buffer cache.
 
@@ -47,7 +47,7 @@ Storage tab of Spark's monitoring UI (`http://<driver-node>:4040`) to see its si
 Note that memory usage is greatly affected by storage level and serialization format -- see
 the [tuning guide](tuning.html) for tips on how to reduce it.
 
-Finally, note that the Java VM does not always behave well with more than 200 GB of RAM. If you
+Finally, note that the Java VM does not always behave well with more than 200 GiB of RAM. If you
 purchase machines with more RAM than this, you can run _multiple worker JVMs per node_. In
 Spark's [standalone mode](spark-standalone.html), you can set the number of workers per node
 with the `SPARK_WORKER_INSTANCES` variable in `conf/spark-env.sh`, and the number of cores
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index ec13b81f8555..281755f4cea8 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -149,7 +149,7 @@ These parameters may be tuned.  Be careful to validate on held-out test data whe
   * Note that the `maxBins` parameter must be at least the maximum number of categories `$M$` for any categorical feature.
 
 * **`maxMemoryInMB`**: Amount of memory to be used for collecting sufficient statistics.
-  * The default value is conservatively chosen to be 256 MB to allow the decision algorithm to work in most scenarios.  Increasing `maxMemoryInMB` can lead to faster training (if the memory is available) by allowing fewer passes over the data.  However, there may be decreasing returns as `maxMemoryInMB` grows since the amount of communication on each iteration can be proportional to `maxMemoryInMB`.
+  * The default value is conservatively chosen to be 256 MiB to allow the decision algorithm to work in most scenarios.  Increasing `maxMemoryInMB` can lead to faster training (if the memory is available) by allowing fewer passes over the data.  However, there may be decreasing returns as `maxMemoryInMB` grows since the amount of communication on each iteration can be proportional to `maxMemoryInMB`.
   * *Implementation details*: For faster processing, the decision tree algorithm collects statistics about groups of nodes to split (rather than 1 node at a time).  The number of nodes which can be handled in one group is determined by the memory requirements (which vary per features).  The `maxMemoryInMB` parameter specifies the memory limit in terms of megabytes which each worker can use for these statistics.
 
 * **`subsamplingRate`**: Fraction of the training data used for learning the decision tree.  This parameter is most relevant for training ensembles of trees (using [`RandomForest`](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest$) and [`GradientBoostedTrees`](api/scala/index.html#org.apache.spark.mllib.tree.GradientBoostedTrees)), where it can be useful to subsample the original data.  For training a single decision tree, this parameter is less useful since the number of training instances is generally not the main constraint.
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index b3ba4b255b71..968d668e2c93 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -437,7 +437,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.mesos.executor.memoryOverhead</code></td>
   <td>executor memory * 0.10, with minimum of 384</td>
   <td>
-    The amount of additional memory, specified in MB, to be allocated per executor. By default,
+    The amount of additional memory, specified in MiB, to be allocated per executor. By default,
     the overhead will be larger of either 384 or 10% of <code>spark.executor.memory</code>. If set,
     the final overhead will be this value.
   </td>
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 49ef2e1ce2a1..672a4d0f3199 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -60,7 +60,7 @@ Finally, the following configuration options can be passed to the master and wor
   </tr>
   <tr>
     <td><code>-m MEM</code>, <code>--memory MEM</code></td>
-    <td>Total amount of memory to allow Spark applications to use on the machine, in a format like 1000M or 2G (default: your machine's total RAM minus 1 GB); only on worker</td>
+    <td>Total amount of memory to allow Spark applications to use on the machine, in a format like 1000M or 2G (default: your machine's total RAM minus 1 GiB); only on worker</td>
   </tr>
   <tr>
     <td><code>-d DIR</code>, <code>--work-dir DIR</code></td>
@@ -128,7 +128,7 @@ You can optionally configure the cluster further by setting environment variable
   </tr>
   <tr>
     <td><code>SPARK_WORKER_MEMORY</code></td>
-    <td>Total amount of memory to allow Spark applications to use on the machine, e.g. <code>1000m</code>, <code>2g</code> (default: total memory minus 1 GB); note that each application's <i>individual</i> memory is configured using its <code>spark.executor.memory</code> property.</td>
+    <td>Total amount of memory to allow Spark applications to use on the machine, e.g. <code>1000m</code>, <code>2g</code> (default: total memory minus 1 GiB); note that each application's <i>individual</i> memory is configured using its <code>spark.executor.memory</code> property.</td>
   </tr>
   <tr>
     <td><code>SPARK_WORKER_PORT</code></td>
diff --git a/docs/streaming-kinesis-integration.md b/docs/streaming-kinesis-integration.md
index 6a52e8a7b0eb..4a1812bbb40a 100644
--- a/docs/streaming-kinesis-integration.md
+++ b/docs/streaming-kinesis-integration.md
@@ -248,5 +248,5 @@ de-aggregate records during consumption.
   - `InitialPositionInStream.TRIM_HORIZON` may lead to duplicate processing of records where the impact is dependent on checkpoint frequency and processing idempotency.
 
 #### Kinesis retry configuration
- - `spark.streaming.kinesis.retry.waitTime` : Wait time between Kinesis retries as a duration string. When reading from Amazon Kinesis, users may hit `ProvisionedThroughputExceededException`'s, when consuming faster than 5 transactions/second or, exceeding the maximum read rate of 2 MB/second. This configuration can be tweaked to increase the sleep between fetches when a fetch fails to reduce these exceptions. Default is "100ms".
+ - `spark.streaming.kinesis.retry.waitTime` : Wait time between Kinesis retries as a duration string. When reading from Amazon Kinesis, users may hit `ProvisionedThroughputExceededException`'s, when consuming faster than 5 transactions/second or, exceeding the maximum read rate of 2 MiB/second. This configuration can be tweaked to increase the sleep between fetches when a fetch fails to reduce these exceptions. Default is "100ms".
  - `spark.streaming.kinesis.retry.maxAttempts` : Max number of retries for Kinesis fetches. This config can also be used to tackle the Kinesis `ProvisionedThroughputExceededException`'s in scenarios mentioned above. It can be increased to have more number of retries for Kinesis reads. Default is 3.
diff --git a/docs/tuning.md b/docs/tuning.md
index cd0f9cd08136..43acacb98cbf 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -115,7 +115,7 @@ variety of workloads without requiring user expertise of how memory is divided i
 Although there are two relevant configurations, the typical user should not need to adjust them
 as the default values are applicable to most workloads:
 
-* `spark.memory.fraction` expresses the size of `M` as a fraction of the (JVM heap space - 300MB)
+* `spark.memory.fraction` expresses the size of `M` as a fraction of the (JVM heap space - 300MiB)
 (default 0.6). The rest of the space (40%) is reserved for user data structures, internal
 metadata in Spark, and safeguarding against OOM errors in the case of sparse and unusually
 large records.
@@ -147,7 +147,7 @@ pointer-based data structures and wrapper objects. There are several ways to do
    Java standard library.
 2. Avoid nested structures with a lot of small objects and pointers when possible.
 3. Consider using numeric IDs or enumeration objects instead of strings for keys.
-4. If you have less than 32 GB of RAM, set the JVM flag `-XX:+UseCompressedOops` to make pointers be
+4. If you have less than 32 GiB of RAM, set the JVM flag `-XX:+UseCompressedOops` to make pointers be
    four bytes instead of eight. You can add these options in
    [`spark-env.sh`](configuration.html#environment-variables).
 
@@ -224,8 +224,8 @@ temporary objects created during task execution. Some steps which may be useful
 
 * As an example, if your task is reading data from HDFS, the amount of memory used by the task can be estimated using
   the size of the data block read from HDFS. Note that the size of a decompressed block is often 2 or 3 times the
-  size of the block. So if we wish to have 3 or 4 tasks' worth of working space, and the HDFS block size is 128 MB,
-  we can estimate size of Eden to be `4*3*128MB`.
+  size of the block. So if we wish to have 3 or 4 tasks' worth of working space, and the HDFS block size is 128 MiB,
+  we can estimate size of Eden to be `4*3*128MiB`.
 
 * Monitor how the frequency and time taken by garbage collection changes with the new settings.
 
@@ -267,7 +267,7 @@ available in `SparkContext` can greatly reduce the size of each serialized task,
 of launching a job over a cluster. If your tasks use any large object from the driver program
 inside of them (e.g. a static lookup table), consider turning it into a broadcast variable.
 Spark prints the serialized size of each task on the master, so you can look at that to
-decide whether your tasks are too large; in general tasks larger than about 20 KB are probably
+decide whether your tasks are too large; in general tasks larger than about 20 KiB are probably
 worth optimizing.
 
 ## Data Locality
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index e58860fea97d..e32d615af2a4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -322,7 +322,7 @@ class BlockMatrix @Since("1.3.0") (
     val m = numRows().toInt
     val n = numCols().toInt
     val mem = m * n / 125000
-    if (mem > 500) logWarning(s"Storing this matrix will require $mem MB of memory!")
+    if (mem > 500) logWarning(s"Storing this matrix will require $mem MiB of memory!")
     val localBlocks = blocks.collect()
     val values = new Array[Double](m * n)
     localBlocks.foreach { case ((blockRowIndex, blockColIndex), submat) =>
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
index 37eb794b0c5c..6250b0363ee3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
@@ -190,7 +190,7 @@ class GradientDescentClusterSuite extends SparkFunSuite with LocalClusterSparkCo
       iter.map(i => (1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
-    // greater than 1MB and hence Spark would throw an error.
+    // greater than 1MiB and hence Spark would throw an error.
     val (weights, loss) = GradientDescent.runMiniBatchSGD(
       points,
       new LogisticGradient,
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 8bd6897df925..b6e17cab44e9 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -127,7 +127,7 @@ def __new__(cls, mean, confidence, low, high):
 def _parse_memory(s):
     """
     Parse a memory string in the format supported by Java (e.g. 1g, 200m) and
-    return the value in MB
+    return the value in MiB
 
     >>> _parse_memory("256m")
     256
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index bd0ac0039ffe..5d2d63850e9b 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -37,7 +37,7 @@
     process = None
 
     def get_used_memory():
-        """ Return the used memory in MB """
+        """ Return the used memory in MiB """
         global process
         if process is None or process._pid != os.getpid():
             process = psutil.Process(os.getpid())
@@ -50,7 +50,7 @@ def get_used_memory():
 except ImportError:
 
     def get_used_memory():
-        """ Return the used memory in MB """
+        """ Return the used memory in MiB """
         if platform.system() == 'Linux':
             for line in open('/proc/self/status'):
                 if line.startswith('VmRSS:'):
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 9497530805c1..d37d0d66d8ae 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -127,7 +127,7 @@ private[yarn] class YarnAllocator(
   private var numUnexpectedContainerRelease = 0L
   private val containerIdToExecutorId = new HashMap[ContainerId, String]
 
-  // Executor memory in MB.
+  // Executor memory in MiB.
   protected val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toInt
   // Additional memory overhead.
   protected val memoryOverhead: Int = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
index 8818d0135b29..b7ce36723081 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
@@ -160,7 +160,7 @@ class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(AtLeastNNonNulls(4, nullOnly), false, EmptyRow)
   }
 
-  test("Coalesce should not throw 64kb exception") {
+  test("Coalesce should not throw 64KiB exception") {
     val inputs = (1 to 2500).map(x => Literal(s"x_$x"))
     checkEvaluation(Coalesce(inputs), "x_1")
   }
@@ -171,7 +171,7 @@ class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(ctx.inlinedMutableStates.size == 1)
   }
 
-  test("AtLeastNNonNulls should not throw 64kb exception") {
+  test("AtLeastNNonNulls should not throw 64KiB exception") {
     val inputs = (1 to 4000).map(x => Literal(s"x_$x"))
     checkEvaluation(AtLeastNNonNulls(1, inputs), true)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index d0604b8eb767..94e251d90bcf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -128,7 +128,7 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
+  test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KiB") {
     val sortOrder = Literal("abc").asc
 
     // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
index f5d93ee5fa91..e4ec76f0b9a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
@@ -73,14 +73,14 @@ import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan}
  * greater than the target size.
  *
  * For example, we have two stages with the following pre-shuffle partition size statistics:
- * stage 1: [100 MB, 20 MB, 100 MB, 10MB, 30 MB]
- * stage 2: [10 MB,  10 MB, 70 MB,  5 MB, 5 MB]
- * assuming the target input size is 128 MB, we will have four post-shuffle partitions,
+ * stage 1: [100 MiB, 20 MiB, 100 MiB, 10MiB, 30 MiB]
+ * stage 2: [10 MiB,  10 MiB, 70 MiB,  5 MiB, 5 MiB]
+ * assuming the target input size is 128 MiB, we will have four post-shuffle partitions,
  * which are:
- *  - post-shuffle partition 0: pre-shuffle partition 0 (size 110 MB)
- *  - post-shuffle partition 1: pre-shuffle partition 1 (size 30 MB)
- *  - post-shuffle partition 2: pre-shuffle partition 2 (size 170 MB)
- *  - post-shuffle partition 3: pre-shuffle partition 3 and 4 (size 50 MB)
+ *  - post-shuffle partition 0: pre-shuffle partition 0 (size 110 MiB)
+ *  - post-shuffle partition 1: pre-shuffle partition 1 (size 30 MiB)
+ *  - post-shuffle partition 2: pre-shuffle partition 2 (size 170 MiB)
+ *  - post-shuffle partition 3: pre-shuffle partition 3 and 4 (size 50 MiB)
  */
 class ExchangeCoordinator(
     advisoryTargetPostShuffleInputSize: Long,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
index 27bed1137e5b..82973307feef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
@@ -44,7 +44,7 @@ case class WindowInPandasExec(
 
   override def requiredChildDistribution: Seq[Distribution] = {
     if (partitionSpec.isEmpty) {
-      // Only show warning when the number of bytes is larger than 100 MB?
+      // Only show warning when the number of bytes is larger than 100 MiB?
       logWarning("No Partition Defined for Window operation! Moving all data to a single "
         + "partition, this can cause serious performance degradation.")
       AllTuples :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index fede0f3e92d6..729b8bdb3dae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -90,7 +90,7 @@ case class WindowExec(
 
   override def requiredChildDistribution: Seq[Distribution] = {
     if (partitionSpec.isEmpty) {
-      // Only show warning when the number of bytes is larger than 100 MB?
+      // Only show warning when the number of bytes is larger than 100 MiB?
       logWarning("No Partition Defined for Window operation! Moving all data to a single "
         + "partition, this can cause serious performance degradation.")
       AllTuples :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index fc3faa08d55f..b51c51e66350 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1904,7 +1904,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       val e = intercept[SparkException] {
         df.filter(filter).count()
       }.getMessage
-      assert(e.contains("grows beyond 64 KB"))
+      assert(e.contains("grows beyond 64 KiB"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index cb562d65b614..02dc32d5f90b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -227,12 +227,12 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       BigInt(0) -> (("0.0 B", "0")),
       BigInt(100) -> (("100.0 B", "100")),
       BigInt(2047) -> (("2047.0 B", "2.05E+3")),
-      BigInt(2048) -> (("2.0 KB", "2.05E+3")),
-      BigInt(3333333) -> (("3.2 MB", "3.33E+6")),
-      BigInt(4444444444L) -> (("4.1 GB", "4.44E+9")),
-      BigInt(5555555555555L) -> (("5.1 TB", "5.56E+12")),
-      BigInt(6666666666666666L) -> (("5.9 PB", "6.67E+15")),
-      BigInt(1L << 10 ) * (1L << 60) -> (("1024.0 EB", "1.18E+21")),
+      BigInt(2048) -> (("2.0 KiB", "2.05E+3")),
+      BigInt(3333333) -> (("3.2 MiB", "3.33E+6")),
+      BigInt(4444444444L) -> (("4.1 GiB", "4.44E+9")),
+      BigInt(5555555555555L) -> (("5.1 TiB", "5.56E+12")),
+      BigInt(6666666666666666L) -> (("5.9 PiB", "6.67E+15")),
+      BigInt(1L << 10 ) * (1L << 60) -> (("1024.0 EiB", "1.18E+21")),
       BigInt(1L << 11) * (1L << 60) -> (("2.36E+21 B", "2.36E+21"))
     )
     numbers.foreach { case (input, (expectedSize, expectedRows)) =>

From 05cf81e6de3d61ddb0af81cd179665693f23351f Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Mon, 10 Dec 2018 18:28:13 -0600
Subject: [PATCH 0107/1072] [SPARK-19827][R] spark.ml R API for PIC

## What changes were proposed in this pull request?

Add PowerIterationCluster (PIC) in R
## How was this patch tested?
Add test case

Closes #23072 from huaxingao/spark-19827.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 R/pkg/NAMESPACE                               |  3 +-
 R/pkg/R/generics.R                            |  4 ++
 R/pkg/R/mllib_clustering.R                    | 62 +++++++++++++++++++
 R/pkg/tests/fulltests/test_mllib_clustering.R | 13 ++++
 R/pkg/vignettes/sparkr-vignettes.Rmd          | 14 +++++
 docs/ml-clustering.md                         | 41 ++++++++++++
 docs/sparkr.md                                |  1 +
 .../src/main/r/ml/powerIterationClustering.R  | 38 ++++++++++++
 .../clustering/PowerIterationClustering.scala |  4 +-
 .../r/PowerIterationClusteringWrapper.scala   | 39 ++++++++++++
 python/pyspark/ml/clustering.py               |  4 +-
 11 files changed, 218 insertions(+), 5 deletions(-)
 create mode 100644 examples/src/main/r/ml/powerIterationClustering.R
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/r/PowerIterationClusteringWrapper.scala

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 1f8ba0bcf1cf..cfad20db16c7 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -67,7 +67,8 @@ exportMethods("glm",
               "spark.fpGrowth",
               "spark.freqItemsets",
               "spark.associationRules",
-              "spark.findFrequentSequentialPatterns")
+              "spark.findFrequentSequentialPatterns",
+              "spark.assignClusters")
 
 # Job group lifecycle management methods
 export("setJobGroup",
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index eed76465221c..09d817127edd 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1479,6 +1479,10 @@ setGeneric("spark.associationRules", function(object) { standardGeneric("spark.a
 setGeneric("spark.findFrequentSequentialPatterns",
             function(data, ...) { standardGeneric("spark.findFrequentSequentialPatterns") })
 
+#' @rdname spark.powerIterationClustering
+setGeneric("spark.assignClusters",
+            function(data, ...) { standardGeneric("spark.assignClusters") })
+
 #' @param object a fitted ML model object.
 #' @param path the directory where the model is saved.
 #' @param ... additional argument(s) passed to the method.
diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index 900be685824d..7d9dcebfe70d 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -41,6 +41,12 @@ setClass("KMeansModel", representation(jobj = "jobj"))
 #' @note LDAModel since 2.1.0
 setClass("LDAModel", representation(jobj = "jobj"))
 
+#' S4 class that represents a PowerIterationClustering
+#'
+#' @param jobj a Java object reference to the backing Scala PowerIterationClustering
+#' @note PowerIterationClustering since 3.0.0
+setClass("PowerIterationClustering", slots = list(jobj = "jobj"))
+
 #' Bisecting K-Means Clustering Model
 #'
 #' Fits a bisecting k-means clustering model against a SparkDataFrame.
@@ -610,3 +616,59 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
           function(object, path, overwrite = FALSE) {
             write_internal(object, path, overwrite)
           })
+
+#' PowerIterationClustering
+#'
+#' A scalable graph clustering algorithm. Users can call \code{spark.assignClusters} to
+#' return a cluster assignment for each input vertex.
+#'
+#  Run the PIC algorithm and returns a cluster assignment for each input vertex.
+#' @param data a SparkDataFrame.
+#' @param k the number of clusters to create.
+#' @param initMode the initialization algorithm.
+#' @param maxIter the maximum number of iterations.
+#' @param sourceCol the name of the input column for source vertex IDs.
+#' @param destinationCol the name of the input column for destination vertex IDs
+#' @param weightCol weight column name. If this is not set or \code{NULL},
+#'                  we treat all instance weights as 1.0.
+#' @param ... additional argument(s) passed to the method.
+#' @return A dataset that contains columns of vertex id and the corresponding cluster for the id.
+#'         The schema of it will be:
+#'         \code{id: Long}
+#'         \code{cluster: Int}
+#' @rdname spark.powerIterationClustering
+#' @aliases assignClusters,PowerIterationClustering-method,SparkDataFrame-method
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
+#'                            list(1L, 2L, 1.0), list(3L, 4L, 1.0),
+#'                            list(4L, 0L, 0.1)),
+#'                       schema = c("src", "dst", "weight"))
+#' clusters <- spark.assignClusters(df, initMode="degree", weightCol="weight")
+#' showDF(clusters)
+#' }
+#' @note spark.assignClusters(SparkDataFrame) since 3.0.0
+setMethod("spark.assignClusters",
+          signature(data = "SparkDataFrame"),
+          function(data, k = 2L, initMode = c("random", "degree"), maxIter = 20L,
+            sourceCol = "src", destinationCol = "dst", weightCol = NULL) {
+            if (!is.numeric(k) || k < 1) {
+              stop("k should be a number with value >= 1.")
+            }
+            if (!is.integer(maxIter) || maxIter <= 0) {
+              stop("maxIter should be a number with value > 0.")
+            }
+            initMode <- match.arg(initMode)
+            if (!is.null(weightCol) && weightCol == "") {
+              weightCol <- NULL
+            } else if (!is.null(weightCol)) {
+              weightCol <- as.character(weightCol)
+            }
+            jobj <- callJStatic("org.apache.spark.ml.r.PowerIterationClusteringWrapper",
+                                "getPowerIterationClustering",
+                                as.integer(k), initMode,
+                                as.integer(maxIter), as.character(sourceCol),
+                                as.character(destinationCol), weightCol)
+            object <- new("PowerIterationClustering", jobj = jobj)
+            dataFrame(callJMethod(object@jobj, "assignClusters", data@sdf))
+          })
diff --git a/R/pkg/tests/fulltests/test_mllib_clustering.R b/R/pkg/tests/fulltests/test_mllib_clustering.R
index 4110e13da494..b78a476f1d05 100644
--- a/R/pkg/tests/fulltests/test_mllib_clustering.R
+++ b/R/pkg/tests/fulltests/test_mllib_clustering.R
@@ -319,4 +319,17 @@ test_that("spark.posterior and spark.perplexity", {
   expect_equal(length(local.posterior), sum(unlist(local.posterior)))
 })
 
+test_that("spark.assignClusters", {
+  df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
+                             list(1L, 2L, 1.0), list(3L, 4L, 1.0),
+                             list(4L, 0L, 0.1)),
+                        schema = c("src", "dst", "weight"))
+  clusters <- spark.assignClusters(df, initMode = "degree", weightCol = "weight")
+  expected_result <- createDataFrame(list(list(4L, 1L), list(0L, 0L),
+                                          list(1L, 0L), list(3L, 1L),
+                                          list(2L, 0L)),
+                                     schema = c("id", "cluster"))
+  expect_equivalent(expected_result, clusters)
+})
+
 sparkR.session.stop()
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 1c6a03c4b9bc..cbe8c61725c8 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -549,6 +549,8 @@ SparkR supports the following machine learning models and algorithms.
 
 * Latent Dirichlet Allocation (LDA)
 
+* Power Iteration Clustering (PIC)
+
 #### Collaborative Filtering
 
 * Alternating Least Squares (ALS)
@@ -982,6 +984,18 @@ predicted <- predict(model, df)
 head(predicted)
 ```
 
+#### Power Iteration Clustering
+
+Power Iteration Clustering (PIC) is a scalable graph clustering algorithm. `spark.assignClusters` method runs the PIC algorithm and returns a cluster assignment for each input vertex.
+
+```{r}
+df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
+                           list(1L, 2L, 1.0), list(3L, 4L, 1.0),
+                           list(4L, 0L, 0.1)),
+                      schema = c("src", "dst", "weight"))
+head(spark.assignClusters(df, initMode = "degree", weightCol = "weight"))
+```
+
 #### FP-growth
 
 `spark.fpGrowth` executes FP-growth algorithm to mine frequent itemsets on a `SparkDataFrame`. `itemsCol` should be an array of values.
diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index 1186fb73d0fa..65f265256200 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -265,3 +265,44 @@ Refer to the [R API docs](api/R/spark.gaussianMixture.html) for more details.
 </div>
 
 </div>
+
+## Power Iteration Clustering (PIC)
+
+Power Iteration Clustering (PIC) is  a scalable graph clustering algorithm
+developed by [Lin and Cohen](http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf).
+From the abstract: PIC finds a very low-dimensional embedding of a dataset
+using truncated power iteration on a normalized pair-wise similarity matrix of the data.
+
+`spark.ml`'s PowerIterationClustering implementation takes the following parameters:
+
+* `k`: the number of clusters to create
+* `initMode`: param for the initialization algorithm
+* `maxIter`: param for maximum number of iterations
+* `srcCol`: param for the name of the input column for source vertex IDs
+* `dstCol`: name of the input column for destination vertex IDs
+* `weightCol`: Param for weight column name
+
+**Examples**
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.clustering.PowerIterationClustering) for more details.
+
+{% include_example scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+Refer to the [Java API docs](api/java/org/apache/spark/ml/clustering/PowerIterationClustering.html) for more details.
+
+{% include_example java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java %}
+</div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.powerIterationClustering.html) for more details.
+
+{% include_example r/ml/powerIterationClustering.R %}
+</div>
+
+</div>
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 0057f05de0ff..dbb61241007f 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -544,6 +544,7 @@ SparkR supports the following machine learning algorithms currently:
 * [`spark.gaussianMixture`](api/R/spark.gaussianMixture.html): [`Gaussian Mixture Model (GMM)`](ml-clustering.html#gaussian-mixture-model-gmm)
 * [`spark.kmeans`](api/R/spark.kmeans.html): [`K-Means`](ml-clustering.html#k-means)
 * [`spark.lda`](api/R/spark.lda.html): [`Latent Dirichlet Allocation (LDA)`](ml-clustering.html#latent-dirichlet-allocation-lda)
+* [`spark.powerIterationClustering (PIC)`](api/R/spark.powerIterationClustering.html): [`Power Iteration Clustering (PIC)`](ml-clustering.html#power-iteration-clustering-pic)
 
 #### Collaborative Filtering
 
diff --git a/examples/src/main/r/ml/powerIterationClustering.R b/examples/src/main/r/ml/powerIterationClustering.R
new file mode 100644
index 000000000000..ba43037106d1
--- /dev/null
+++ b/examples/src/main/r/ml/powerIterationClustering.R
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/powerIterationClustering.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-powerIterationCLustering-example")
+
+# $example on$
+df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
+                           list(1L, 2L, 1.0), list(3L, 4L, 1.0),
+                           list(4L, 0L, 0.1)),
+                      schema = c("src", "dst", "weight"))
+# assign clusters
+clusters <- spark.assignClusters(df, k=2L, maxIter=20L, initMode="degree", weightCol="weight")
+
+showDF(arrange(clusters, clusters$id))
+# $example off$
+
+sparkR.session.stop()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
index 1b9a3499947d..d9a330f67e8d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
@@ -97,8 +97,8 @@ private[clustering] trait PowerIterationClusteringParams extends Params with Has
 /**
  * :: Experimental ::
  * Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
- * <a href=http://www.icml2010.org/papers/387.pdf>Lin and Cohen</a>. From the abstract:
- * PIC finds a very low-dimensional embedding of a dataset using truncated power
+ * <a href=http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf>Lin and Cohen</a>. From
+ * the abstract: PIC finds a very low-dimensional embedding of a dataset using truncated power
  * iteration on a normalized pair-wise similarity matrix of the data.
  *
  * This class is not yet an Estimator/Transformer, use `assignClusters` method to run the
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/PowerIterationClusteringWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/PowerIterationClusteringWrapper.scala
new file mode 100644
index 000000000000..b5dfad0224ed
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/PowerIterationClusteringWrapper.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.r
+
+import org.apache.spark.ml.clustering.PowerIterationClustering
+
+private[r] object PowerIterationClusteringWrapper {
+  def getPowerIterationClustering(
+      k: Int,
+      initMode: String,
+      maxIter: Int,
+      srcCol: String,
+      dstCol: String,
+      weightCol: String): PowerIterationClustering = {
+    val pic = new PowerIterationClustering()
+      .setK(k)
+      .setInitMode(initMode)
+      .setMaxIter(maxIter)
+      .setSrcCol(srcCol)
+      .setDstCol(dstCol)
+    if (weightCol != null) pic.setWeightCol(weightCol)
+    pic
+  }
+}
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index d0b507ec5dad..d8a6dfb7d3a7 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -1193,8 +1193,8 @@ class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReada
     .. note:: Experimental
 
     Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
-    `Lin and Cohen <http://www.icml2010.org/papers/387.pdf>`_. From the abstract:
-    PIC finds a very low-dimensional embedding of a dataset using truncated power
+    `Lin and Cohen <http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf>`_. From the
+    abstract: PIC finds a very low-dimensional embedding of a dataset using truncated power
     iteration on a normalized pair-wise similarity matrix of the data.
 
     This class is not yet an Estimator/Transformer, use :py:func:`assignClusters` method

From cbe92305cd4f80725a251cf74353e8985d28306c Mon Sep 17 00:00:00 2001
From: 10129659 <chen.yanshan@zte.com.cn>
Date: Tue, 11 Dec 2018 09:50:21 +0800
Subject: [PATCH 0108/1072] [SPARK-26312][SQL] Replace
 RDDConversions.rowToRowRdd with RowEncoder to improve its conversion
 performance

## What changes were proposed in this pull request?

`RDDConversions` would get disproportionately slower as the number of columns in the query increased,
for the type of `converters` before is `scala.collection.immutable.::` which is a subtype of list.
This PR removing `RDDConversions` and using `RowEncoder` to convert the Row to InternalRow.

The test of `PrunedScanSuite` for 2000 columns and 20k rows takes 409 seconds before this PR, and 361 seconds after.

## How was this patch tested?

Test case of `PrunedScanSuite`

Closes #23262 from eatoncys/toarray.

Authored-by: 10129659 <chen.yanshan@zte.com.cn>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/execution/ExistingRDD.scala     | 44 +------------------
 .../datasources/DataSourceStrategy.scala      |  7 ++-
 2 files changed, 7 insertions(+), 44 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index e214bfd05041..49fb288fdea6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -18,54 +18,14 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Encoder, Row, SparkSession}
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.{Encoder, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.types.DataType
-
-object RDDConversions {
-  def productToRowRdd[A <: Product](data: RDD[A], outputTypes: Seq[DataType]): RDD[InternalRow] = {
-    data.mapPartitions { iterator =>
-      val numColumns = outputTypes.length
-      val mutableRow = new GenericInternalRow(numColumns)
-      val converters = outputTypes.map(CatalystTypeConverters.createToCatalystConverter)
-      iterator.map { r =>
-        var i = 0
-        while (i < numColumns) {
-          mutableRow(i) = converters(i)(r.productElement(i))
-          i += 1
-        }
-
-        mutableRow
-      }
-    }
-  }
-
-  /**
-   * Convert the objects inside Row into the types Catalyst expected.
-   */
-  def rowToRowRdd(data: RDD[Row], outputTypes: Seq[DataType]): RDD[InternalRow] = {
-    data.mapPartitions { iterator =>
-      val numColumns = outputTypes.length
-      val mutableRow = new GenericInternalRow(numColumns)
-      val converters = outputTypes.map(CatalystTypeConverters.createToCatalystConverter)
-      iterator.map { r =>
-        var i = 0
-        while (i < numColumns) {
-          mutableRow(i) = converters(i)(r(i))
-          i += 1
-        }
-
-        mutableRow
-      }
-    }
-  }
-}
 
 object ExternalRDD {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index c6000442fae7..b304e2da6e1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -29,11 +29,11 @@ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, Quali
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoTable, LogicalPlan, Project}
-import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
@@ -416,7 +416,10 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
       output: Seq[Attribute],
       rdd: RDD[Row]): RDD[InternalRow] = {
     if (relation.relation.needConversion) {
-      execution.RDDConversions.rowToRowRdd(rdd, output.map(_.dataType))
+      val converters = RowEncoder(StructType.fromAttributes(output))
+      rdd.mapPartitions { iterator =>
+        iterator.map(converters.toRow)
+      }
     } else {
       rdd.asInstanceOf[RDD[InternalRow]]
     }

From 7d5f6e8c493b96898ba01edede1522121fe945fc Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 11 Dec 2018 14:16:51 +0800
Subject: [PATCH 0109/1072] [SPARK-26293][SQL] Cast exception when having
 python udf in subquery

## What changes were proposed in this pull request?

This is a regression introduced by https://github.com/apache/spark/pull/22104 at Spark 2.4.0.

When we have Python UDF in subquery, we will hit an exception
```
Caused by: java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.AttributeReference cannot be cast to org.apache.spark.sql.catalyst.expressions.PythonUDF
	at scala.collection.immutable.Stream.map(Stream.scala:414)
	at org.apache.spark.sql.execution.python.EvalPythonExec.$anonfun$doExecute$2(EvalPythonExec.scala:98)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:815)
...
```

https://github.com/apache/spark/pull/22104 turned `ExtractPythonUDFs` from a physical rule to optimizer rule. However, there is a difference between a physical rule and optimizer rule. A physical rule always runs once, an optimizer rule may be applied twice on a query tree even the rule is located in a batch that only runs once.

For a subquery, the `OptimizeSubqueries` rule will execute the entire optimizer on the query plan inside subquery. Later on subquery will be turned to joins, and the optimizer rules will be applied to it again.

Unfortunately, the `ExtractPythonUDFs` rule is not idempotent. When it's applied twice on a query plan inside subquery, it will produce a malformed plan. It extracts Python UDF from Python exec plans.

This PR proposes 2 changes to be double safe:
1. `ExtractPythonUDFs` should skip python exec plans, to make the rule idempotent
2. `ExtractPythonUDFs` should skip subquery

## How was this patch tested?

a new test.

Closes #23248 from cloud-fan/python.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/sql/tests/test_udf.py          | 52 +++++++------------
 .../python/ArrowEvalPythonExec.scala          |  8 ++-
 .../python/BatchEvalPythonExec.scala          |  8 ++-
 .../execution/python/ExtractPythonUDFs.scala  | 18 +++++--
 4 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index ed298f724d55..12cf8c7de1da 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -23,7 +23,7 @@
 
 from pyspark import SparkContext
 from pyspark.sql import SparkSession, Column, Row
-from pyspark.sql.functions import UserDefinedFunction
+from pyspark.sql.functions import UserDefinedFunction, udf
 from pyspark.sql.types import *
 from pyspark.sql.utils import AnalysisException
 from pyspark.testing.sqlutils import ReusedSQLTestCase, test_compiled, test_not_compiled_message
@@ -102,7 +102,6 @@ def test_udf_registration_return_type_not_none(self):
 
     def test_nondeterministic_udf(self):
         # Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
-        from pyspark.sql.functions import udf
         import random
         udf_random_col = udf(lambda: int(100 * random.random()), IntegerType()).asNondeterministic()
         self.assertEqual(udf_random_col.deterministic, False)
@@ -113,7 +112,6 @@ def test_nondeterministic_udf(self):
 
     def test_nondeterministic_udf2(self):
         import random
-        from pyspark.sql.functions import udf
         random_udf = udf(lambda: random.randint(6, 6), IntegerType()).asNondeterministic()
         self.assertEqual(random_udf.deterministic, False)
         random_udf1 = self.spark.catalog.registerFunction("randInt", random_udf)
@@ -132,7 +130,6 @@ def test_nondeterministic_udf2(self):
 
     def test_nondeterministic_udf3(self):
         # regression test for SPARK-23233
-        from pyspark.sql.functions import udf
         f = udf(lambda x: x)
         # Here we cache the JVM UDF instance.
         self.spark.range(1).select(f("id"))
@@ -144,7 +141,7 @@ def test_nondeterministic_udf3(self):
         self.assertFalse(deterministic)
 
     def test_nondeterministic_udf_in_aggregate(self):
-        from pyspark.sql.functions import udf, sum
+        from pyspark.sql.functions import sum
         import random
         udf_random_col = udf(lambda: int(100 * random.random()), 'int').asNondeterministic()
         df = self.spark.range(10)
@@ -181,7 +178,6 @@ def test_multiple_udfs(self):
         self.assertEqual(tuple(row), (6, 5))
 
     def test_udf_in_filter_on_top_of_outer_join(self):
-        from pyspark.sql.functions import udf
         left = self.spark.createDataFrame([Row(a=1)])
         right = self.spark.createDataFrame([Row(a=1)])
         df = left.join(right, on='a', how='left_outer')
@@ -190,7 +186,6 @@ def test_udf_in_filter_on_top_of_outer_join(self):
 
     def test_udf_in_filter_on_top_of_join(self):
         # regression test for SPARK-18589
-        from pyspark.sql.functions import udf
         left = self.spark.createDataFrame([Row(a=1)])
         right = self.spark.createDataFrame([Row(b=1)])
         f = udf(lambda a, b: a == b, BooleanType())
@@ -199,7 +194,6 @@ def test_udf_in_filter_on_top_of_join(self):
 
     def test_udf_in_join_condition(self):
         # regression test for SPARK-25314
-        from pyspark.sql.functions import udf
         left = self.spark.createDataFrame([Row(a=1)])
         right = self.spark.createDataFrame([Row(b=1)])
         f = udf(lambda a, b: a == b, BooleanType())
@@ -211,7 +205,7 @@ def test_udf_in_join_condition(self):
 
     def test_udf_in_left_outer_join_condition(self):
         # regression test for SPARK-26147
-        from pyspark.sql.functions import udf, col
+        from pyspark.sql.functions import col
         left = self.spark.createDataFrame([Row(a=1)])
         right = self.spark.createDataFrame([Row(b=1)])
         f = udf(lambda a: str(a), StringType())
@@ -223,7 +217,6 @@ def test_udf_in_left_outer_join_condition(self):
 
     def test_udf_in_left_semi_join_condition(self):
         # regression test for SPARK-25314
-        from pyspark.sql.functions import udf
         left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
         right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1)])
         f = udf(lambda a, b: a == b, BooleanType())
@@ -236,7 +229,6 @@ def test_udf_in_left_semi_join_condition(self):
     def test_udf_and_common_filter_in_join_condition(self):
         # regression test for SPARK-25314
         # test the complex scenario with both udf and common filter
-        from pyspark.sql.functions import udf
         left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
         right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1), Row(b=1, b1=3, b2=1)])
         f = udf(lambda a, b: a == b, BooleanType())
@@ -247,7 +239,6 @@ def test_udf_and_common_filter_in_join_condition(self):
     def test_udf_and_common_filter_in_left_semi_join_condition(self):
         # regression test for SPARK-25314
         # test the complex scenario with both udf and common filter
-        from pyspark.sql.functions import udf
         left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
         right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1), Row(b=1, b1=3, b2=1)])
         f = udf(lambda a, b: a == b, BooleanType())
@@ -258,7 +249,6 @@ def test_udf_and_common_filter_in_left_semi_join_condition(self):
     def test_udf_not_supported_in_join_condition(self):
         # regression test for SPARK-25314
         # test python udf is not supported in join type besides left_semi and inner join.
-        from pyspark.sql.functions import udf
         left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
         right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1), Row(b=1, b1=3, b2=1)])
         f = udf(lambda a, b: a == b, BooleanType())
@@ -301,7 +291,7 @@ def test_broadcast_in_udf(self):
 
     def test_udf_with_filter_function(self):
         df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
-        from pyspark.sql.functions import udf, col
+        from pyspark.sql.functions import col
         from pyspark.sql.types import BooleanType
 
         my_filter = udf(lambda a: a < 2, BooleanType())
@@ -310,7 +300,7 @@ def test_udf_with_filter_function(self):
 
     def test_udf_with_aggregate_function(self):
         df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
-        from pyspark.sql.functions import udf, col, sum
+        from pyspark.sql.functions import col, sum
         from pyspark.sql.types import BooleanType
 
         my_filter = udf(lambda a: a == 1, BooleanType())
@@ -326,7 +316,7 @@ def test_udf_with_aggregate_function(self):
         self.assertEqual(sel.collect(), [Row(t=4), Row(t=3)])
 
     def test_udf_in_generate(self):
-        from pyspark.sql.functions import udf, explode
+        from pyspark.sql.functions import explode
         df = self.spark.range(5)
         f = udf(lambda x: list(range(x)), ArrayType(LongType()))
         row = df.select(explode(f(*df))).groupBy().sum().first()
@@ -353,7 +343,6 @@ def test_udf_in_generate(self):
         self.assertEqual(res[3][1], 1)
 
     def test_udf_with_order_by_and_limit(self):
-        from pyspark.sql.functions import udf
         my_copy = udf(lambda x: x, IntegerType())
         df = self.spark.range(10).orderBy("id")
         res = df.select(df.id, my_copy(df.id).alias("copy")).limit(1)
@@ -394,14 +383,14 @@ def test_non_existed_udaf(self):
                                 lambda: spark.udf.registerJavaUDAF("udaf1", "non_existed_udaf"))
 
     def test_udf_with_input_file_name(self):
-        from pyspark.sql.functions import udf, input_file_name
+        from pyspark.sql.functions import input_file_name
         sourceFile = udf(lambda path: path, StringType())
         filePath = "python/test_support/sql/people1.json"
         row = self.spark.read.json(filePath).select(sourceFile(input_file_name())).first()
         self.assertTrue(row[0].find("people1.json") != -1)
 
     def test_udf_with_input_file_name_for_hadooprdd(self):
-        from pyspark.sql.functions import udf, input_file_name
+        from pyspark.sql.functions import input_file_name
 
         def filename(path):
             return path
@@ -427,9 +416,6 @@ def test_udf_defers_judf_initialization(self):
         # This is separate of  UDFInitializationTests
         # to avoid context initialization
         # when udf is called
-
-        from pyspark.sql.functions import UserDefinedFunction
-
         f = UserDefinedFunction(lambda x: x, StringType())
 
         self.assertIsNone(
@@ -445,8 +431,6 @@ def test_udf_defers_judf_initialization(self):
         )
 
     def test_udf_with_string_return_type(self):
-        from pyspark.sql.functions import UserDefinedFunction
-
         add_one = UserDefinedFunction(lambda x: x + 1, "integer")
         make_pair = UserDefinedFunction(lambda x: (-x, x), "struct<x:integer,y:integer>")
         make_array = UserDefinedFunction(
@@ -460,13 +444,11 @@ def test_udf_with_string_return_type(self):
         self.assertTupleEqual(expected, actual)
 
     def test_udf_shouldnt_accept_noncallable_object(self):
-        from pyspark.sql.functions import UserDefinedFunction
-
         non_callable = None
         self.assertRaises(TypeError, UserDefinedFunction, non_callable, StringType())
 
     def test_udf_with_decorator(self):
-        from pyspark.sql.functions import lit, udf
+        from pyspark.sql.functions import lit
         from pyspark.sql.types import IntegerType, DoubleType
 
         @udf(IntegerType())
@@ -523,7 +505,6 @@ def as_double(x):
         )
 
     def test_udf_wrapper(self):
-        from pyspark.sql.functions import udf
         from pyspark.sql.types import IntegerType
 
         def f(x):
@@ -569,7 +550,7 @@ def test_nonparam_udf_with_aggregate(self):
     # SPARK-24721
     @unittest.skipIf(not test_compiled, test_not_compiled_message)
     def test_datasource_with_udf(self):
-        from pyspark.sql.functions import udf, lit, col
+        from pyspark.sql.functions import lit, col
 
         path = tempfile.mkdtemp()
         shutil.rmtree(path)
@@ -609,8 +590,6 @@ def test_datasource_with_udf(self):
 
     # SPARK-25591
     def test_same_accumulator_in_udfs(self):
-        from pyspark.sql.functions import udf
-
         data_schema = StructType([StructField("a", IntegerType(), True),
                                   StructField("b", IntegerType(), True)])
         data = self.spark.createDataFrame([[1, 2]], schema=data_schema)
@@ -632,6 +611,15 @@ def second_udf(x):
         data.collect()
         self.assertEqual(test_accum.value, 101)
 
+    # SPARK-26293
+    def test_udf_in_subquery(self):
+        f = udf(lambda x: x, "long")
+        with self.tempView("v"):
+            self.spark.range(1).filter(f("id") >= 0).createTempView("v")
+            sql = self.spark.sql
+            result = sql("select i from values(0L) as data(i) where i in (select id from v)")
+            self.assertEqual(result.collect(), [Row(i=0)])
+
 
 class UDFInitializationTests(unittest.TestCase):
     def tearDown(self):
@@ -642,8 +630,6 @@ def tearDown(self):
             SparkContext._active_spark_context.stop()
 
     def test_udf_init_shouldnt_initialize_context(self):
-        from pyspark.sql.functions import UserDefinedFunction
-
         UserDefinedFunction(lambda x: x, StringType())
 
         self.assertIsNone(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
index 2b87796dc683..a5203daea9cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
@@ -60,8 +60,12 @@ private class BatchIterator[T](iter: Iterator[T], batchSize: Int)
 /**
  * A logical plan that evaluates a [[PythonUDF]].
  */
-case class ArrowEvalPython(udfs: Seq[PythonUDF], output: Seq[Attribute], child: LogicalPlan)
-  extends UnaryNode
+case class ArrowEvalPython(
+    udfs: Seq[PythonUDF],
+    output: Seq[Attribute],
+    child: LogicalPlan) extends UnaryNode {
+  override def producedAttributes: AttributeSet = AttributeSet(output.drop(child.output.length))
+}
 
 /**
  * A physical plan that evaluates a [[PythonUDF]].
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
index b08b7e60e130..d3736d24e501 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
@@ -32,8 +32,12 @@ import org.apache.spark.sql.types.{StructField, StructType}
 /**
  * A logical plan that evaluates a [[PythonUDF]]
  */
-case class BatchEvalPython(udfs: Seq[PythonUDF], output: Seq[Attribute], child: LogicalPlan)
-  extends UnaryNode
+case class BatchEvalPython(
+    udfs: Seq[PythonUDF],
+    output: Seq[Attribute],
+    child: LogicalPlan) extends UnaryNode {
+  override def producedAttributes: AttributeSet = AttributeSet(output.drop(child.output.length))
+}
 
 /**
  * A physical plan that evaluates a [[PythonUDF]]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
index 90b5325919e9..380c31baa621 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
@@ -24,7 +24,7 @@ import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 
 
@@ -131,8 +131,20 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
     expressions.flatMap(collectEvaluableUDFs)
   }
 
-  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-    case plan: LogicalPlan => extract(plan)
+  def apply(plan: LogicalPlan): LogicalPlan = plan match {
+    // SPARK-26293: A subquery will be rewritten into join later, and will go through this rule
+    // eventually. Here we skip subquery, as Python UDF only needs to be extracted once.
+    case _: Subquery => plan
+
+    case _ => plan transformUp {
+      // A safe guard. `ExtractPythonUDFs` only runs once, so we will not hit `BatchEvalPython` and
+      // `ArrowEvalPython` in the input plan. However if we hit them, we must skip them, as we can't
+      // extract Python UDFs from them.
+      case p: BatchEvalPython => p
+      case p: ArrowEvalPython => p
+
+      case plan: LogicalPlan => extract(plan)
+    }
   }
 
   /**

From 4e1d859c19d3bfdfcb8acf915a97c68633b9ca95 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 11 Dec 2018 16:06:57 +0800
Subject: [PATCH 0110/1072] [SPARK-26303][SQL] Return partial results for bad
 JSON records

## What changes were proposed in this pull request?

In the PR, I propose to return partial results from JSON datasource and JSON functions in the PERMISSIVE mode if some of JSON fields are parsed and converted to desired types successfully. The changes are made only for `StructType`. Whole bad JSON records are placed into the corrupt column specified by the `columnNameOfCorruptRecord` option or SQL config.

Partial results are not returned for malformed JSON input.

## How was this patch tested?

Added new UT which checks converting JSON strings with one invalid and one valid field at the end of the string.

Closes #23253 from MaxGekk/json-bad-record.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md           |  4 +++-
 python/pyspark/sql/readwriter.py              |  4 ++--
 python/pyspark/sql/streaming.py               |  4 ++--
 .../sql/catalyst/json/JacksonParser.scala     | 24 +++++++++++++++----
 .../catalyst/util/BadRecordException.scala    | 10 ++++++++
 .../apache/spark/sql/DataFrameReader.scala    | 16 ++++++-------
 .../sql/streaming/DataStreamReader.scala      | 16 ++++++-------
 .../datasources/json/JsonSuite.scala          | 15 +++++++++++-
 8 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index f6458a9b2730..8834e8991d8c 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -37,7 +37,9 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, CSV datasource uses java.time API for parsing and generating CSV content. New formatting implementation supports date/timestamp patterns conformed to ISO 8601. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
 
-  - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
+  - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
+
+  - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
 
 ## Upgrading From Spark SQL 2.3 to 2.4
 
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 1d2dd4d80893..7b10512a4329 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -211,7 +211,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
                      set, it uses the default value, ``PERMISSIVE``.
 
                 * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
-                  into a field configured by ``columnNameOfCorruptRecord``, and sets other \
+                  into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
                   fields to ``null``. To keep corrupt records, an user can set a string type \
                   field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
                   schema does not have the field, it drops corrupt records during parsing. \
@@ -424,7 +424,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                      set, it uses the default value, ``PERMISSIVE``.
 
                 * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
-                  into a field configured by ``columnNameOfCorruptRecord``, and sets other \
+                  into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
                   fields to ``null``. To keep corrupt records, an user can set a string type \
                   field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
                   schema does not have the field, it drops corrupt records during parsing. \
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index d92b0d5677e2..fc23b9d99c34 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -441,7 +441,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
                      set, it uses the default value, ``PERMISSIVE``.
 
                 * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
-                  into a field configured by ``columnNameOfCorruptRecord``, and sets other \
+                  into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
                   fields to ``null``. To keep corrupt records, an user can set a string type \
                   field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
                   schema does not have the field, it drops corrupt records during parsing. \
@@ -648,7 +648,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                      set, it uses the default value, ``PERMISSIVE``.
 
                 * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
-                  into a field configured by ``columnNameOfCorruptRecord``, and sets other \
+                  into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
                   fields to ``null``. To keep corrupt records, an user can set a string type \
                   field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
                   schema does not have the field, it drops corrupt records during parsing. \
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 2357595906b1..7e3bd4df51bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -22,6 +22,7 @@ import java.nio.charset.MalformedInputException
 
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Try
+import scala.util.control.NonFatal
 
 import com.fasterxml.jackson.core._
 
@@ -29,7 +30,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
@@ -347,17 +347,28 @@ class JacksonParser(
       schema: StructType,
       fieldConverters: Array[ValueConverter]): InternalRow = {
     val row = new GenericInternalRow(schema.length)
+    var badRecordException: Option[Throwable] = None
+
     while (nextUntil(parser, JsonToken.END_OBJECT)) {
       schema.getFieldIndex(parser.getCurrentName) match {
         case Some(index) =>
-          row.update(index, fieldConverters(index).apply(parser))
-
+          try {
+            row.update(index, fieldConverters(index).apply(parser))
+          } catch {
+            case NonFatal(e) =>
+              badRecordException = badRecordException.orElse(Some(e))
+              parser.skipChildren()
+          }
         case None =>
           parser.skipChildren()
       }
     }
 
-    row
+    if (badRecordException.isEmpty) {
+      row
+    } else {
+      throw PartialResultException(row, badRecordException.get)
+    }
   }
 
   /**
@@ -428,6 +439,11 @@ class JacksonParser(
         val wrappedCharException = new CharConversionException(msg)
         wrappedCharException.initCause(e)
         throw BadRecordException(() => recordLiteral(record), () => None, wrappedCharException)
+      case PartialResultException(row, cause) =>
+        throw BadRecordException(
+          record = () => recordLiteral(record),
+          partialResult = () => Some(row),
+          cause)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
index 985f0dc1cd60..d719a33929fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
@@ -20,6 +20,16 @@ package org.apache.spark.sql.catalyst.util
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.unsafe.types.UTF8String
 
+/**
+ * Exception thrown when the underlying parser returns a partial result of parsing.
+ * @param partialResult the partial result of parsing a bad record.
+ * @param cause the actual exception about why the parser cannot return full result.
+ */
+case class PartialResultException(
+     partialResult: InternalRow,
+     cause: Throwable)
+  extends Exception(cause)
+
 /**
  * Exception thrown when the underlying parser meet a bad record and can't parse it.
  * @param record a function to return the record that cause the parser to fail
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 661fe98d8c90..9751528654ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -362,7 +362,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * during parsing.
    *   <ul>
    *     <li>`PERMISSIVE` : when it meets a corrupted record, puts the malformed string into a
-   *     field configured by `columnNameOfCorruptRecord`, and sets other fields to `null`. To
+   *     field configured by `columnNameOfCorruptRecord`, and sets malformed fields to `null`. To
    *     keep corrupt records, an user can set a string type field named
    *     `columnNameOfCorruptRecord` in an user-defined schema. If a schema does not have the
    *     field, it drops corrupt records during parsing. When inferring a schema, it implicitly
@@ -598,13 +598,13 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    *    during parsing. It supports the following case-insensitive modes.
    *   <ul>
    *     <li>`PERMISSIVE` : when it meets a corrupted record, puts the malformed string into a
-   *     field configured by `columnNameOfCorruptRecord`, and sets other fields to `null`. To keep
-   *     corrupt records, an user can set a string type field named `columnNameOfCorruptRecord`
-   *     in an user-defined schema. If a schema does not have the field, it drops corrupt records
-   *     during parsing. A record with less/more tokens than schema is not a corrupted record to
-   *     CSV. When it meets a record having fewer tokens than the length of the schema, sets
-   *     `null` to extra fields. When the record has more tokens than the length of the schema,
-   *     it drops extra tokens.</li>
+   *     field configured by `columnNameOfCorruptRecord`, and sets malformed fields to `null`.
+   *     To keep corrupt records, an user can set a string type field named
+   *     `columnNameOfCorruptRecord` in an user-defined schema. If a schema does not have
+   *     the field, it drops corrupt records during parsing. A record with less/more tokens
+   *     than schema is not a corrupted record to CSV. When it meets a record having fewer
+   *     tokens than the length of the schema, sets `null` to extra fields. When the record
+   *     has more tokens than the length of the schema, it drops extra tokens.</li>
    *     <li>`DROPMALFORMED` : ignores the whole corrupted records.</li>
    *     <li>`FAILFAST` : throws an exception when it meets corrupted records.</li>
    *   </ul>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index c8e3e1c19104..914fa90ae7e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -273,7 +273,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * during parsing.
    *   <ul>
    *     <li>`PERMISSIVE` : when it meets a corrupted record, puts the malformed string into a
-   *     field configured by `columnNameOfCorruptRecord`, and sets other fields to `null`. To
+   *     field configured by `columnNameOfCorruptRecord`, and sets malformed fields to `null`. To
    *     keep corrupt records, an user can set a string type field named
    *     `columnNameOfCorruptRecord` in an user-defined schema. If a schema does not have the
    *     field, it drops corrupt records during parsing. When inferring a schema, it implicitly
@@ -360,13 +360,13 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    *    during parsing. It supports the following case-insensitive modes.
    *   <ul>
    *     <li>`PERMISSIVE` : when it meets a corrupted record, puts the malformed string into a
-   *     field configured by `columnNameOfCorruptRecord`, and sets other fields to `null`. To keep
-   *     corrupt records, an user can set a string type field named `columnNameOfCorruptRecord`
-   *     in an user-defined schema. If a schema does not have the field, it drops corrupt records
-   *     during parsing. A record with less/more tokens than schema is not a corrupted record to
-   *     CSV. When it meets a record having fewer tokens than the length of the schema, sets
-   *     `null` to extra fields. When the record has more tokens than the length of the schema,
-   *     it drops extra tokens.</li>
+   *     field configured by `columnNameOfCorruptRecord`, and sets malformed fields to `null`.
+   *     To keep corrupt records, an user can set a string type field named
+   *     `columnNameOfCorruptRecord` in an user-defined schema. If a schema does not have
+   *     the field, it drops corrupt records during parsing. A record with less/more tokens
+   *     than schema is not a corrupted record to CSV. When it meets a record having fewer
+   *     tokens than the length of the schema, sets `null` to extra fields. When the record
+   *     has more tokens than the length of the schema, it drops extra tokens.</li>
    *     <li>`DROPMALFORMED` : ignores the whole corrupted records.</li>
    *     <li>`FAILFAST` : throws an exception when it meets corrupted records.</li>
    *   </ul>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index dff37ca2d40f..3330de3584eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -248,7 +248,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
 
     checkAnswer(
       sql("select nullstr, headers.Host from jsonTable"),
-      Seq(Row("", "1.abc.com"), Row("", null), Row(null, null), Row(null, null))
+      Seq(Row("", "1.abc.com"), Row("", null), Row("", null), Row(null, null))
     )
   }
 
@@ -2563,4 +2563,17 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       assert(!files.exists(_.getName.endsWith("json")))
     }
   }
+
+  test("return partial result for bad records") {
+    val schema = "a double, b array<int>, c string, _corrupt_record string"
+    val badRecords = Seq(
+      """{"a":"-","b":[0, 1, 2],"c":"abc"}""",
+      """{"a":0.1,"b":{},"c":"def"}""").toDS()
+    val df = spark.read.schema(schema).json(badRecords)
+
+    checkAnswer(
+      df,
+      Row(null, Array(0, 1, 2), "abc", """{"a":"-","b":[0, 1, 2],"c":"abc"}""") ::
+      Row(0.1, null, "def", """{"a":0.1,"b":{},"c":"def"}""") :: Nil)
+  }
 }

From bd7df6b1e129741136d09a3d29f9ffcc32ce1de3 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <xyliyuanjian@gmail.com>
Date: Tue, 11 Dec 2018 18:47:21 +0800
Subject: [PATCH 0111/1072] [SPARK-26327][SQL] Bug fix for `FileSourceScanExec`
 metrics update and name changing

## What changes were proposed in this pull request?

As the description in [SPARK-26327](https://issues.apache.org/jira/browse/SPARK-26327), `postDriverMetricUpdates` was called on wrong place cause this bug, fix this by split the initializing of `selectedPartitions` and metrics updating logic. Add the updating logic in `inputRDD` initializing which can take effect in both code generation node and normal node. Also rename `metadataTime` to `fileListingTime` for clearer meaning.
## How was this patch tested?

New test case in `SQLMetricsSuite`.
Manual test:

|         | Before | After |
|---------|:--------:|:-------:|
| CodeGen |![image](https://user-images.githubusercontent.com/4833765/49741753-13c7e800-fcd2-11e8-97a8-8057b657aa3c.png)|![image](https://user-images.githubusercontent.com/4833765/49741774-1f1b1380-fcd2-11e8-98d9-78b950f4e43a.png)|
| Normal  |![image](https://user-images.githubusercontent.com/4833765/49741836-378b2e00-fcd2-11e8-80c3-ab462a6a3184.png)|![image](https://user-images.githubusercontent.com/4833765/49741860-4a056780-fcd2-11e8-9ef1-863de217f183.png)|

Closes #23277 from xuanyuanking/SPARK-26327.

Authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/DataSourceScanExec.scala    | 28 +++++++++++++------
 .../execution/metric/SQLMetricsSuite.scala    | 15 ++++++++++
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index b29d5c76c5f3..c0fa4e777b49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -167,19 +167,14 @@ case class FileSourceScanExec(
       partitionSchema = relation.partitionSchema,
       relation.sparkSession.sessionState.conf)
 
+  private var fileListingTime = 0L
+
   @transient private lazy val selectedPartitions: Seq[PartitionDirectory] = {
     val optimizerMetadataTimeNs = relation.location.metadataOpsTimeNs.getOrElse(0L)
     val startTime = System.nanoTime()
     val ret = relation.location.listFiles(partitionFilters, dataFilters)
     val timeTakenMs = ((System.nanoTime() - startTime) + optimizerMetadataTimeNs) / 1000 / 1000
-
-    metrics("numFiles").add(ret.map(_.files.size.toLong).sum)
-    metrics("metadataTime").add(timeTakenMs)
-
-    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
-      metrics("numFiles") :: metrics("metadataTime") :: Nil)
-
+    fileListingTime = timeTakenMs
     ret
   }
 
@@ -291,6 +286,8 @@ case class FileSourceScanExec(
   }
 
   private lazy val inputRDD: RDD[InternalRow] = {
+    // Update metrics for taking effect in both code generation node and normal node.
+    updateDriverMetrics()
     val readFile: (PartitionedFile) => Iterator[InternalRow] =
       relation.fileFormat.buildReaderWithPartitionValues(
         sparkSession = relation.sparkSession,
@@ -316,7 +313,7 @@ case class FileSourceScanExec(
   override lazy val metrics =
     Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
       "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of files"),
-      "metadataTime" -> SQLMetrics.createMetric(sparkContext, "metadata time (ms)"),
+      "fileListingTime" -> SQLMetrics.createMetric(sparkContext, "file listing time (ms)"),
       "scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time"))
 
   protected override def doExecute(): RDD[InternalRow] = {
@@ -507,6 +504,19 @@ case class FileSourceScanExec(
     }
   }
 
+  /**
+   * Send the updated metrics to driver, while this function calling, selectedPartitions has
+   * been initialized. See SPARK-26327 for more detail.
+   */
+  private def updateDriverMetrics() = {
+    metrics("numFiles").add(selectedPartitions.map(_.files.size.toLong).sum)
+    metrics("fileListingTime").add(fileListingTime)
+
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
+      metrics("numFiles") :: metrics("fileListingTime") :: Nil)
+  }
+
   override def doCanonicalize(): FileSourceScanExec = {
     FileSourceScanExec(
       relation,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 2251607e76af..4a80638f6885 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -636,4 +636,19 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       assert(filters.head.metrics("numOutputRows").value == 1)
     }
   }
+
+  test("SPARK-26327: FileSourceScanExec metrics") {
+    withTable("testDataForScan") {
+      spark.range(10).selectExpr("id", "id % 3 as p")
+        .write.partitionBy("p").saveAsTable("testDataForScan")
+      // The execution plan only has 1 FileScan node.
+      val df = spark.sql(
+        "SELECT * FROM testDataForScan WHERE p = 1")
+      testSparkPlanMetrics(df, 1, Map(
+        0L -> (("Scan parquet default.testdataforscan", Map(
+          "number of output rows" -> 3L,
+          "number of files" -> 2L))))
+      )
+    }
+  }
 }

From a3bbca98d7d120f22727a55cdc448608e6bb9fad Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 11 Dec 2018 21:08:39 +0800
Subject: [PATCH 0112/1072] [SPARK-26265][CORE] Fix deadlock in
 BytesToBytesMap.MapIterator when locking both BytesToBytesMap.MapIterator and
 TaskMemoryManager

## What changes were proposed in this pull request?

In `BytesToBytesMap.MapIterator.advanceToNextPage`, We will first lock this `MapIterator` and then `TaskMemoryManager` when going to free a memory page by calling `freePage`. At the same time, it is possibly that another memory consumer first locks `TaskMemoryManager` and then this `MapIterator` when it acquires memory and causes spilling on this `MapIterator`.

So it ends with the `MapIterator` object holds lock to the `MapIterator` object and waits for lock on `TaskMemoryManager`, and the other consumer holds lock to `TaskMemoryManager` and waits for lock on the `MapIterator` object.

To avoid deadlock here, this patch proposes to keep reference to the page to free and free it after releasing the lock of `MapIterator`.

## How was this patch tested?

Added test and manually test by running the test 100 times to make sure there is no deadlock.

Closes #23272 from viirya/SPARK-26265.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/unsafe/map/BytesToBytesMap.java     | 86 ++++++++++---------
 .../spark/memory/TestMemoryConsumer.java      |  4 +-
 .../map/AbstractBytesToBytesMapSuite.java     | 47 ++++++++++
 3 files changed, 96 insertions(+), 41 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 405e52946415..fbba002f1f80 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -255,11 +255,18 @@ private MapIterator(int numRecords, Location loc, boolean destructive) {
     }
 
     private void advanceToNextPage() {
+      // SPARK-26265: We will first lock this `MapIterator` and then `TaskMemoryManager` when going
+      // to free a memory page by calling `freePage`. At the same time, it is possibly that another
+      // memory consumer first locks `TaskMemoryManager` and then this `MapIterator` when it
+      // acquires memory and causes spilling on this `MapIterator`. To avoid deadlock here, we keep
+      // reference to the page to free and free it after releasing the lock of `MapIterator`.
+      MemoryBlock pageToFree = null;
+
       synchronized (this) {
         int nextIdx = dataPages.indexOf(currentPage) + 1;
         if (destructive && currentPage != null) {
           dataPages.remove(currentPage);
-          freePage(currentPage);
+          pageToFree = currentPage;
           nextIdx --;
         }
         if (dataPages.size() > nextIdx) {
@@ -283,6 +290,9 @@ private void advanceToNextPage() {
           }
         }
       }
+      if (pageToFree != null) {
+        freePage(pageToFree);
+      }
     }
 
     @Override
@@ -329,52 +339,50 @@ public Location next() {
       }
     }
 
-    public long spill(long numBytes) throws IOException {
-      synchronized (this) {
-        if (!destructive || dataPages.size() == 1) {
-          return 0L;
-        }
+    public synchronized long spill(long numBytes) throws IOException {
+      if (!destructive || dataPages.size() == 1) {
+        return 0L;
+      }
 
-        updatePeakMemoryUsed();
+      updatePeakMemoryUsed();
 
-        // TODO: use existing ShuffleWriteMetrics
-        ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics();
+      // TODO: use existing ShuffleWriteMetrics
+      ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics();
 
-        long released = 0L;
-        while (dataPages.size() > 0) {
-          MemoryBlock block = dataPages.getLast();
-          // The currentPage is used, cannot be released
-          if (block == currentPage) {
-            break;
-          }
+      long released = 0L;
+      while (dataPages.size() > 0) {
+        MemoryBlock block = dataPages.getLast();
+        // The currentPage is used, cannot be released
+        if (block == currentPage) {
+          break;
+        }
 
-          Object base = block.getBaseObject();
-          long offset = block.getBaseOffset();
-          int numRecords = UnsafeAlignedOffset.getSize(base, offset);
-          int uaoSize = UnsafeAlignedOffset.getUaoSize();
-          offset += uaoSize;
-          final UnsafeSorterSpillWriter writer =
-            new UnsafeSorterSpillWriter(blockManager, 32 * 1024, writeMetrics, numRecords);
-          while (numRecords > 0) {
-            int length = UnsafeAlignedOffset.getSize(base, offset);
-            writer.write(base, offset + uaoSize, length, 0);
-            offset += uaoSize + length + 8;
-            numRecords--;
-          }
-          writer.close();
-          spillWriters.add(writer);
+        Object base = block.getBaseObject();
+        long offset = block.getBaseOffset();
+        int numRecords = UnsafeAlignedOffset.getSize(base, offset);
+        int uaoSize = UnsafeAlignedOffset.getUaoSize();
+        offset += uaoSize;
+        final UnsafeSorterSpillWriter writer =
+                new UnsafeSorterSpillWriter(blockManager, 32 * 1024, writeMetrics, numRecords);
+        while (numRecords > 0) {
+          int length = UnsafeAlignedOffset.getSize(base, offset);
+          writer.write(base, offset + uaoSize, length, 0);
+          offset += uaoSize + length + 8;
+          numRecords--;
+        }
+        writer.close();
+        spillWriters.add(writer);
 
-          dataPages.removeLast();
-          released += block.size();
-          freePage(block);
+        dataPages.removeLast();
+        released += block.size();
+        freePage(block);
 
-          if (released >= numBytes) {
-            break;
-          }
+        if (released >= numBytes) {
+          break;
         }
-
-        return released;
       }
+
+      return released;
     }
 
     @Override
diff --git a/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java b/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java
index 0bbaea6b834b..6aa577d1bf79 100644
--- a/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java
+++ b/core/src/test/java/org/apache/spark/memory/TestMemoryConsumer.java
@@ -38,12 +38,12 @@ public long spill(long size, MemoryConsumer trigger) throws IOException {
     return used;
   }
 
-  void use(long size) {
+  public void use(long size) {
     long got = taskMemoryManager.acquireExecutionMemory(size, this);
     used += got;
   }
 
-  void free(long size) {
+  public void free(long size) {
     used -= size;
     taskMemoryManager.releaseExecutionMemory(size, this);
   }
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index a11cd535b547..e5fbafc23d95 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -33,6 +33,8 @@
 
 import org.apache.spark.SparkConf;
 import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.memory.MemoryMode;
+import org.apache.spark.memory.TestMemoryConsumer;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.memory.TestMemoryManager;
 import org.apache.spark.network.util.JavaUtils;
@@ -678,4 +680,49 @@ public void testPeakMemoryUsed() {
     }
   }
 
+  @Test
+  public void avoidDeadlock() throws InterruptedException {
+    memoryManager.limit(PAGE_SIZE_BYTES);
+    MemoryMode mode = useOffHeapMemoryAllocator() ? MemoryMode.OFF_HEAP: MemoryMode.ON_HEAP;
+    TestMemoryConsumer c1 = new TestMemoryConsumer(taskMemoryManager, mode);
+    BytesToBytesMap map =
+      new BytesToBytesMap(taskMemoryManager, blockManager, serializerManager, 1, 0.5, 1024);
+
+    Thread thread = new Thread(() -> {
+      int i = 0;
+      long used = 0;
+      while (i < 10) {
+        c1.use(10000000);
+        used += 10000000;
+        i++;
+      }
+      c1.free(used);
+    });
+
+    try {
+      int i;
+      for (i = 0; i < 1024; i++) {
+        final long[] arr = new long[]{i};
+        final BytesToBytesMap.Location loc = map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8);
+        loc.append(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
+      }
+
+      // Starts to require memory at another memory consumer.
+      thread.start();
+
+      BytesToBytesMap.MapIterator iter = map.destructiveIterator();
+      for (i = 0; i < 1024; i++) {
+        iter.next();
+      }
+      assertFalse(iter.hasNext());
+    } finally {
+      map.free();
+      thread.join();
+      for (File spillFile : spillFilesCreated) {
+        assertFalse("Spill file " + spillFile.getPath() + " was not cleaned up",
+          spillFile.exists());
+      }
+    }
+  }
+
 }

From 5c67a9a7fa29836fc825504bbcc3c3fc820009c6 Mon Sep 17 00:00:00 2001
From: jiake <ke.a.jia@intel.com>
Date: Tue, 11 Dec 2018 21:23:27 +0800
Subject: [PATCH 0113/1072] [SPARK-26316][SPARK-21052] Revert hash join metrics
 in that causes performance degradation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
The wrong implementation in the hash join metrics in [spark 21052](https://issues.apache.org/jira/browse/SPARK-21052) caused significant performance degradation in TPC-DS. And the result is [here](https://docs.google.com/spreadsheets/d/18a5BdOlmm8euTaRodyeWum9yu92mbWWu6JbhGXtr7yE/edit#gid=0) in TPC-DS 1TB scale. So we currently partial revert 21052.
**Cluster info:**

  | Master Node | Worker Nodes
-- | -- | --
Node | 1x | 4x
Processor | Intel(R) Xeon(R) Platinum 8170 CPU  2.10GHz | Intel(R) Xeon(R) Platinum 8180 CPU  2.50GHz
Memory | 192 GB | 384 GB
Storage Main | 8 x 960G SSD | 8 x 960G SSD
Network | 10Gbe |  
Role | CM Management NameNodeSecondary NameNodeResource ManagerHive Metastore Server | DataNodeNodeManager
OS Version | CentOS 7.2 | CentOS 7.2
Hadoop | Apache Hadoop 2.7.5 | Apache Hadoop 2.7.5
Hive | Apache Hive 2.2.0 |  
Spark | Apache Spark 2.1.0  & Apache Spark2.3.0 |  
JDK  version | 1.8.0_112 | 1.8.0_112

**Related parameters setting:**

Component | Parameter | Value
-- | -- | --
Yarn Resource Manager | yarn.scheduler.maximum-allocation-mb | 120GB
  | yarn.scheduler.minimum-allocation-mb | 1GB
  | yarn.scheduler.maximum-allocation-vcores | 121
  | Yarn.resourcemanager.scheduler.class | Fair Scheduler
Yarn Node Manager | yarn.nodemanager.resource.memory-mb | 120GB
  | yarn.nodemanager.resource.cpu-vcores | 121
Spark | spark.executor.memory | 110GB
  | spark.executor.cores | 50

## How was this patch tested?
N/A

Closes #23269 from JkSelf/partial-revert-21052.

Authored-by: jiake <ke.a.jia@intel.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../joins/BroadcastHashJoinExec.scala         | 28 +-----
 .../spark/sql/execution/joins/HashJoin.scala  |  8 +-
 .../sql/execution/joins/HashedRelation.scala  | 35 -------
 .../joins/ShuffledHashJoinExec.scala          |  6 +-
 .../execution/metric/SQLMetricsSuite.scala    | 94 +------------------
 5 files changed, 6 insertions(+), 165 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
index a6f3ea47c849..fd4a7897c7ad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.plans.physical.{BroadcastDistribution, Dist
 import org.apache.spark.sql.execution.{BinaryExecNode, CodegenSupport, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.types.{BooleanType, LongType}
-import org.apache.spark.util.TaskCompletionListener
 
 /**
  * Performs an inner hash join of two child relations.  When the output RDD of this operator is
@@ -48,8 +47,7 @@ case class BroadcastHashJoinExec(
   extends BinaryExecNode with HashJoin with CodegenSupport {
 
   override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
-    "avgHashProbe" -> SQLMetrics.createAverageMetric(sparkContext, "avg hash probe"))
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
   override def requiredChildDistribution: Seq[Distribution] = {
     val mode = HashedRelationBroadcastMode(buildKeys)
@@ -63,13 +61,12 @@ case class BroadcastHashJoinExec(
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
-    val avgHashProbe = longMetric("avgHashProbe")
 
     val broadcastRelation = buildPlan.executeBroadcast[HashedRelation]()
     streamedPlan.execute().mapPartitions { streamedIter =>
       val hashed = broadcastRelation.value.asReadOnlyCopy()
       TaskContext.get().taskMetrics().incPeakExecutionMemory(hashed.estimatedSize)
-      join(streamedIter, hashed, numOutputRows, avgHashProbe)
+      join(streamedIter, hashed, numOutputRows)
     }
   }
 
@@ -111,23 +108,6 @@ case class BroadcastHashJoinExec(
     }
   }
 
-  /**
-   * Returns the codes used to add a task completion listener to update avg hash probe
-   * at the end of the task.
-   */
-  private def genTaskListener(avgHashProbe: String, relationTerm: String): String = {
-    val listenerClass = classOf[TaskCompletionListener].getName
-    val taskContextClass = classOf[TaskContext].getName
-    s"""
-       | $taskContextClass$$.MODULE$$.get().addTaskCompletionListener(new $listenerClass() {
-       |   @Override
-       |   public void onTaskCompletion($taskContextClass context) {
-       |     $avgHashProbe.set($relationTerm.getAverageProbesPerLookup());
-       |   }
-       | });
-     """.stripMargin
-  }
-
   /**
    * Returns a tuple of Broadcast of HashedRelation and the variable name for it.
    */
@@ -137,15 +117,11 @@ case class BroadcastHashJoinExec(
     val broadcast = ctx.addReferenceObj("broadcast", broadcastRelation)
     val clsName = broadcastRelation.value.getClass.getName
 
-    // At the end of the task, we update the avg hash probe.
-    val avgHashProbe = metricTerm(ctx, "avgHashProbe")
-
     // Inline mutable state since not many join operations in a task
     val relationTerm = ctx.addMutableState(clsName, "relation",
       v => s"""
          | $v = (($clsName) $broadcast.value()).asReadOnlyCopy();
          | incPeakExecutionMemory($v.estimatedSize());
-         | ${genTaskListener(avgHashProbe, v)}
        """.stripMargin, forceInline = true)
     (broadcastRelation, relationTerm)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index dab873bf9b9a..1aef5f686426 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.joins
 
-import org.apache.spark.TaskContext
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
@@ -194,8 +193,7 @@ trait HashJoin {
   protected def join(
       streamedIter: Iterator[InternalRow],
       hashed: HashedRelation,
-      numOutputRows: SQLMetric,
-      avgHashProbe: SQLMetric): Iterator[InternalRow] = {
+      numOutputRows: SQLMetric): Iterator[InternalRow] = {
 
     val joinedIter = joinType match {
       case _: InnerLike =>
@@ -213,10 +211,6 @@ trait HashJoin {
           s"BroadcastHashJoin should not take $x as the JoinType")
     }
 
-    // At the end of the task, we update the avg hash probe.
-    TaskContext.get().addTaskCompletionListener[Unit](_ =>
-      avgHashProbe.set(hashed.getAverageProbesPerLookup))
-
     val resultProj = createResultProjection
     joinedIter.map { r =>
       numOutputRows += 1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index b1ff6e83acc2..7c21062c4cec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -80,11 +80,6 @@ private[execution] sealed trait HashedRelation extends KnownSizeEstimation {
    * Release any used resources.
    */
   def close(): Unit
-
-  /**
-   * Returns the average number of probes per key lookup.
-   */
-  def getAverageProbesPerLookup: Double
 }
 
 private[execution] object HashedRelation {
@@ -279,8 +274,6 @@ private[joins] class UnsafeHashedRelation(
   override def read(kryo: Kryo, in: Input): Unit = Utils.tryOrIOException {
     read(() => in.readInt(), () => in.readLong(), in.readBytes)
   }
-
-  override def getAverageProbesPerLookup: Double = binaryMap.getAverageProbesPerLookup
 }
 
 private[joins] object UnsafeHashedRelation {
@@ -395,10 +388,6 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
   // The number of unique keys.
   private var numKeys = 0L
 
-  // Tracking average number of probes per key lookup.
-  private var numKeyLookups = 0L
-  private var numProbes = 0L
-
   // needed by serializer
   def this() = {
     this(
@@ -483,8 +472,6 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
    */
   def getValue(key: Long, resultRow: UnsafeRow): UnsafeRow = {
     if (isDense) {
-      numKeyLookups += 1
-      numProbes += 1
       if (key >= minKey && key <= maxKey) {
         val value = array((key - minKey).toInt)
         if (value > 0) {
@@ -493,14 +480,11 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
       }
     } else {
       var pos = firstSlot(key)
-      numKeyLookups += 1
-      numProbes += 1
       while (array(pos + 1) != 0) {
         if (array(pos) == key) {
           return getRow(array(pos + 1), resultRow)
         }
         pos = nextSlot(pos)
-        numProbes += 1
       }
     }
     null
@@ -528,8 +512,6 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
    */
   def get(key: Long, resultRow: UnsafeRow): Iterator[UnsafeRow] = {
     if (isDense) {
-      numKeyLookups += 1
-      numProbes += 1
       if (key >= minKey && key <= maxKey) {
         val value = array((key - minKey).toInt)
         if (value > 0) {
@@ -538,14 +520,11 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
       }
     } else {
       var pos = firstSlot(key)
-      numKeyLookups += 1
-      numProbes += 1
       while (array(pos + 1) != 0) {
         if (array(pos) == key) {
           return valueIter(array(pos + 1), resultRow)
         }
         pos = nextSlot(pos)
-        numProbes += 1
       }
     }
     null
@@ -585,11 +564,8 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
   private def updateIndex(key: Long, address: Long): Unit = {
     var pos = firstSlot(key)
     assert(numKeys < array.length / 2)
-    numKeyLookups += 1
-    numProbes += 1
     while (array(pos) != key && array(pos + 1) != 0) {
       pos = nextSlot(pos)
-      numProbes += 1
     }
     if (array(pos + 1) == 0) {
       // this is the first value for this key, put the address in array.
@@ -721,8 +697,6 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
     writeLong(maxKey)
     writeLong(numKeys)
     writeLong(numValues)
-    writeLong(numKeyLookups)
-    writeLong(numProbes)
 
     writeLong(array.length)
     writeLongArray(writeBuffer, array, array.length)
@@ -764,8 +738,6 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
     maxKey = readLong()
     numKeys = readLong()
     numValues = readLong()
-    numKeyLookups = readLong()
-    numProbes = readLong()
 
     val length = readLong().toInt
     mask = length - 2
@@ -783,11 +755,6 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
   override def read(kryo: Kryo, in: Input): Unit = {
     read(() => in.readBoolean(), () => in.readLong(), in.readBytes)
   }
-
-  /**
-   * Returns the average number of probes per key lookup.
-   */
-  def getAverageProbesPerLookup: Double = numProbes.toDouble / numKeyLookups
 }
 
 private[joins] class LongHashedRelation(
@@ -839,8 +806,6 @@ private[joins] class LongHashedRelation(
     resultRow = new UnsafeRow(nFields)
     map = in.readObject().asInstanceOf[LongToUnsafeRowMap]
   }
-
-  override def getAverageProbesPerLookup: Double = map.getAverageProbesPerLookup
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
index 2b59ed6e4d16..524804d61e59 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
@@ -42,8 +42,7 @@ case class ShuffledHashJoinExec(
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
     "buildDataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size of build side"),
-    "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build hash map"),
-    "avgHashProbe" -> SQLMetrics.createAverageMetric(sparkContext, "avg hash probe"))
+    "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build hash map"))
 
   override def requiredChildDistribution: Seq[Distribution] =
     HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil
@@ -63,10 +62,9 @@ case class ShuffledHashJoinExec(
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
-    val avgHashProbe = longMetric("avgHashProbe")
     streamedPlan.execute().zipPartitions(buildPlan.execute()) { (streamIter, buildIter) =>
       val hashed = buildHashedRelation(buildIter)
-      join(streamIter, hashed, numOutputRows, avgHashProbe)
+      join(streamIter, hashed, numOutputRows)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 4a80638f6885..f6495496a58e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -261,50 +261,6 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     )
   }
 
-  test("BroadcastHashJoin metrics: track avg probe") {
-    // The executed plan looks like:
-    // Project [a#210, b#211, b#221]
-    // +- BroadcastHashJoin [a#210], [a#220], Inner, BuildRight
-    //    :- Project [_1#207 AS a#210, _2#208 AS b#211]
-    //    :  +- Filter isnotnull(_1#207)
-    //    :     +- LocalTableScan [_1#207, _2#208]
-    //    +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, binary, true]))
-    //       +- Project [_1#217 AS a#220, _2#218 AS b#221]
-    //          +- Filter isnotnull(_1#217)
-    //             +- LocalTableScan [_1#217, _2#218]
-    //
-    // Assume the execution plan with node id is
-    // WholeStageCodegen disabled:
-    // Project(nodeId = 0)
-    //   BroadcastHashJoin(nodeId = 1)
-    //     ...(ignored)
-    //
-    // WholeStageCodegen enabled:
-    // WholeStageCodegen(nodeId = 0)
-    //   Project(nodeId = 1)
-    //     BroadcastHashJoin(nodeId = 2)
-    //       Project(nodeId = 3)
-    //         Filter(nodeId = 4)
-    //           ...(ignored)
-    Seq(true, false).foreach { enableWholeStage =>
-      val df1 = generateRandomBytesDF()
-      val df2 = generateRandomBytesDF()
-      val df = df1.join(broadcast(df2), "a")
-      val nodeIds = if (enableWholeStage) {
-        Set(2L)
-      } else {
-        Set(1L)
-      }
-      val metrics = getSparkPlanMetrics(df, 2, nodeIds, enableWholeStage).get
-      nodeIds.foreach { nodeId =>
-        val probes = metrics(nodeId)._2("avg hash probe (min, med, max)")
-        probes.toString.stripPrefix("\n(").stripSuffix(")").split(", ").foreach { probe =>
-          assert(probe.toDouble > 1.0)
-        }
-      }
-    }
-  }
-
   test("ShuffledHashJoin metrics") {
     withSQLConf("spark.sql.autoBroadcastJoinThreshold" -> "40",
         "spark.sql.shuffle.partitions" -> "2",
@@ -323,8 +279,7 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       val df = df1.join(df2, "key")
       testSparkPlanMetrics(df, 1, Map(
         1L -> (("ShuffledHashJoin", Map(
-          "number of output rows" -> 2L,
-          "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))),
+          "number of output rows" -> 2L))),
         2L -> (("Exchange", Map(
           "shuffle records written" -> 2L,
           "records read" -> 2L))),
@@ -335,53 +290,6 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     }
   }
 
-  test("ShuffledHashJoin metrics: track avg probe") {
-    // The executed plan looks like:
-    // Project [a#308, b#309, b#319]
-    // +- ShuffledHashJoin [a#308], [a#318], Inner, BuildRight
-    //    :- Exchange hashpartitioning(a#308, 2)
-    //    :  +- Project [_1#305 AS a#308, _2#306 AS b#309]
-    //    :     +- Filter isnotnull(_1#305)
-    //    :        +- LocalTableScan [_1#305, _2#306]
-    //    +- Exchange hashpartitioning(a#318, 2)
-    //       +- Project [_1#315 AS a#318, _2#316 AS b#319]
-    //          +- Filter isnotnull(_1#315)
-    //             +- LocalTableScan [_1#315, _2#316]
-    //
-    // Assume the execution plan with node id is
-    // WholeStageCodegen disabled:
-    // Project(nodeId = 0)
-    //   ShuffledHashJoin(nodeId = 1)
-    //     ...(ignored)
-    //
-    // WholeStageCodegen enabled:
-    // WholeStageCodegen(nodeId = 0)
-    //   Project(nodeId = 1)
-    //     ShuffledHashJoin(nodeId = 2)
-    //       ...(ignored)
-    withSQLConf("spark.sql.autoBroadcastJoinThreshold" -> "5000000",
-        "spark.sql.shuffle.partitions" -> "2",
-        "spark.sql.join.preferSortMergeJoin" -> "false") {
-      Seq(true, false).foreach { enableWholeStage =>
-        val df1 = generateRandomBytesDF(65535 * 5)
-        val df2 = generateRandomBytesDF(65535)
-        val df = df1.join(df2, "a")
-        val nodeIds = if (enableWholeStage) {
-          Set(2L)
-        } else {
-          Set(1L)
-        }
-        val metrics = getSparkPlanMetrics(df, 1, nodeIds, enableWholeStage).get
-        nodeIds.foreach { nodeId =>
-          val probes = metrics(nodeId)._2("avg hash probe (min, med, max)")
-          probes.toString.stripPrefix("\n(").stripSuffix(")").split(", ").foreach { probe =>
-            assert(probe.toDouble > 1.0)
-          }
-        }
-      }
-    }
-  }
-
   test("BroadcastHashJoin(outer) metrics") {
     val df1 = Seq((1, "a"), (1, "b"), (4, "c")).toDF("key", "value")
     val df2 = Seq((1, "a"), (1, "b"), (2, "c"), (3, "d")).toDF("key2", "value")

From d811369ce23186cbb3208ad665e15408e13fea87 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Tue, 11 Dec 2018 09:12:17 -0800
Subject: [PATCH 0114/1072] [SPARK-26300][SS] Remove a redundant
 `checkForStreaming` call

## What changes were proposed in this pull request?
If `checkForContinuous`  is called ( `checkForStreaming` is called in `checkForContinuous`  ), the `checkForStreaming`  mothod  will be called twice in `createQuery` , this is not necessary,  and the `checkForStreaming` method has a lot of statements,  so it's better to remove one of them.

## How was this patch tested?

Existing unit tests in `StreamingQueryManagerSuite` and `ContinuousAggregationSuite`

Closes #23251 from 10110346/isUnsupportedOperationCheckEnabled.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/streaming/StreamingQueryManager.scala      | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index d9fe1a992a09..881cd96cc9dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -246,9 +246,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
     val analyzedPlan = df.queryExecution.analyzed
     df.queryExecution.assertAnalyzed()
 
-    if (sparkSession.sessionState.conf.isUnsupportedOperationCheckEnabled) {
-      UnsupportedOperationChecker.checkForStreaming(analyzedPlan, outputMode)
-    }
+    val operationCheckEnabled = sparkSession.sessionState.conf.isUnsupportedOperationCheckEnabled
 
     if (sparkSession.sessionState.conf.adaptiveExecutionEnabled) {
       logWarning(s"${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key} " +
@@ -257,7 +255,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
 
     (sink, trigger) match {
       case (v2Sink: StreamingWriteSupportProvider, trigger: ContinuousTrigger) =>
-        if (sparkSession.sessionState.conf.isUnsupportedOperationCheckEnabled) {
+        if (operationCheckEnabled) {
           UnsupportedOperationChecker.checkForContinuous(analyzedPlan, outputMode)
         }
         new StreamingQueryWrapper(new ContinuousExecution(
@@ -272,6 +270,9 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
           extraOptions,
           deleteCheckpointOnStop))
       case _ =>
+        if (operationCheckEnabled) {
+          UnsupportedOperationChecker.checkForStreaming(analyzedPlan, outputMode)
+        }
         new StreamingQueryWrapper(new MicroBatchExecution(
           sparkSession,
           userSpecifiedName.orNull,

From 57d6fbfa8c803ce1791e7be36aba0219a1fcaa63 Mon Sep 17 00:00:00 2001
From: mcheah <mcheah@palantir.com>
Date: Tue, 11 Dec 2018 13:49:52 -0800
Subject: [PATCH 0115/1072] [SPARK-26239] File-based secret key loading for
 SASL.

This proposes an alternative way to load secret keys into a Spark application that is running on Kubernetes. Instead of automatically generating the secret, the secret key can reside in a file that is shared between both the driver and executor containers.

Unit tests.

Closes #23252 from mccheah/auth-secret-with-file.

Authored-by: mcheah <mcheah@palantir.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/SecurityManager.scala    | 34 +++++++++-
 .../scala/org/apache/spark/SparkEnv.scala     |  4 +-
 .../spark/internal/config/package.scala       | 31 +++++++++
 .../apache/spark/SecurityManagerSuite.scala   | 66 ++++++++++++++++++-
 docs/security.md                              | 44 +++++++++++++
 .../features/BasicExecutorFeatureStep.scala   | 16 +++--
 .../BasicExecutorFeatureStepSuite.scala       | 23 +++++++
 7 files changed, 205 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 96e4b53b2418..15783c952c23 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark
 
+import java.io.File
 import java.net.{Authenticator, PasswordAuthentication}
 import java.nio.charset.StandardCharsets.UTF_8
+import java.nio.file.Files
+import java.util.Base64
 
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
@@ -43,7 +46,8 @@ import org.apache.spark.util.Utils
  */
 private[spark] class SecurityManager(
     sparkConf: SparkConf,
-    val ioEncryptionKey: Option[Array[Byte]] = None)
+    val ioEncryptionKey: Option[Array[Byte]] = None,
+    authSecretFileConf: ConfigEntry[Option[String]] = AUTH_SECRET_FILE)
   extends Logging with SecretKeyHolder {
 
   import SecurityManager._
@@ -328,6 +332,7 @@ private[spark] class SecurityManager(
         .orElse(Option(secretKey))
         .orElse(Option(sparkConf.getenv(ENV_AUTH_SECRET)))
         .orElse(sparkConf.getOption(SPARK_AUTH_SECRET_CONF))
+        .orElse(secretKeyFromFile())
         .getOrElse {
           throw new IllegalArgumentException(
             s"A secret key must be specified via the $SPARK_AUTH_SECRET_CONF config")
@@ -348,7 +353,6 @@ private[spark] class SecurityManager(
    */
   def initializeAuth(): Unit = {
     import SparkMasterRegex._
-    val k8sRegex = "k8s.*".r
 
     if (!sparkConf.get(NETWORK_AUTH_ENABLED)) {
       return
@@ -371,7 +375,14 @@ private[spark] class SecurityManager(
         return
     }
 
-    secretKey = Utils.createSecret(sparkConf)
+    if (sparkConf.get(AUTH_SECRET_FILE_DRIVER).isDefined !=
+        sparkConf.get(AUTH_SECRET_FILE_EXECUTOR).isDefined) {
+      throw new IllegalArgumentException(
+        "Invalid secret configuration: Secret files must be specified for both the driver and the" +
+          " executors, not only one or the other.")
+    }
+
+    secretKey = secretKeyFromFile().getOrElse(Utils.createSecret(sparkConf))
 
     if (storeInUgi) {
       val creds = new Credentials()
@@ -380,6 +391,22 @@ private[spark] class SecurityManager(
     }
   }
 
+  private def secretKeyFromFile(): Option[String] = {
+    sparkConf.get(authSecretFileConf).flatMap { secretFilePath =>
+      sparkConf.getOption(SparkLauncher.SPARK_MASTER).map {
+        case k8sRegex() =>
+          val secretFile = new File(secretFilePath)
+          require(secretFile.isFile, s"No file found containing the secret key at $secretFilePath.")
+          val base64Key = Base64.getEncoder.encodeToString(Files.readAllBytes(secretFile.toPath))
+          require(!base64Key.isEmpty, s"Secret key from file located at $secretFilePath is empty.")
+          base64Key
+        case _ =>
+          throw new IllegalArgumentException(
+            "Secret keys provided via files is only allowed in Kubernetes mode.")
+      }
+    }
+  }
+
   // Default SecurityManager only has a single secret key, so ignore appId.
   override def getSaslUser(appId: String): String = getSaslUser()
   override def getSecretKey(appId: String): String = getSecretKey()
@@ -387,6 +414,7 @@ private[spark] class SecurityManager(
 
 private[spark] object SecurityManager {
 
+  val k8sRegex = "k8s.*".r
   val SPARK_AUTH_CONF = NETWORK_AUTH_ENABLED.key
   val SPARK_AUTH_SECRET_CONF = "spark.authenticate.secret"
   // This is used to set auth secret to an executor's env variable. It should have the same
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 66038eeaea54..de0c8579d9ac 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -232,8 +232,8 @@ object SparkEnv extends Logging {
     if (isDriver) {
       assert(listenerBus != null, "Attempted to create driver SparkEnv with null listener bus!")
     }
-
-    val securityManager = new SecurityManager(conf, ioEncryptionKey)
+    val authSecretFileConf = if (isDriver) AUTH_SECRET_FILE_DRIVER else AUTH_SECRET_FILE_EXECUTOR
+    val securityManager = new SecurityManager(conf, ioEncryptionKey, authSecretFileConf)
     if (isDriver) {
       securityManager.initializeAuth()
     }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 85bb557abef5..f1c1c034df49 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -424,6 +424,37 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val AUTH_SECRET_FILE =
+    ConfigBuilder("spark.authenticate.secret.file")
+      .doc("Path to a file that contains the authentication secret to use. The secret key is " +
+        "loaded from this path on both the driver and the executors if overrides are not set for " +
+        "either entity (see below). File-based secret keys are only allowed when using " +
+        "Kubernetes.")
+      .stringConf
+      .createOptional
+
+  private[spark] val AUTH_SECRET_FILE_DRIVER =
+    ConfigBuilder("spark.authenticate.secret.driver.file")
+      .doc("Path to a file that contains the authentication secret to use. Loaded by the " +
+        "driver. In Kubernetes client mode it is often useful to set a different secret " +
+        "path for the driver vs. the executors, since the driver may not be running in " +
+        "a pod unlike the executors. If this is set, an accompanying secret file must " +
+        "be specified for the executors. The fallback configuration allows the same path to be " +
+        "used for both the driver and the executors when running in cluster mode. File-based " +
+        "secret keys are only allowed when using Kubernetes.")
+      .fallbackConf(AUTH_SECRET_FILE)
+
+  private[spark] val AUTH_SECRET_FILE_EXECUTOR =
+    ConfigBuilder("spark.authenticate.secret.executor.file")
+      .doc("Path to a file that contains the authentication secret to use. Loaded by the " +
+        "executors only. In Kubernetes client mode it is often useful to set a different " +
+        "secret path for the driver vs. the executors, since the driver may not be running " +
+        "in a pod unlike the executors. If this is set, an accompanying secret file must be " +
+        "specified for the executors. The fallback configuration allows the same path to be " +
+        "used for both the driver and the executors when running in cluster mode. File-based " +
+        "secret keys are only allowed when using Kubernetes.")
+      .fallbackConf(AUTH_SECRET_FILE)
+
   private[spark] val NETWORK_ENCRYPTION_ENABLED =
     ConfigBuilder("spark.network.crypto.enabled")
       .booleanConf
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index eec8004fc94f..e9061f4e7beb 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -19,7 +19,9 @@ package org.apache.spark
 
 import java.io.File
 import java.nio.charset.StandardCharsets.UTF_8
+import java.nio.file.Files
 import java.security.PrivilegedExceptionAction
+import java.util.Base64
 
 import org.apache.hadoop.security.UserGroupInformation
 
@@ -395,9 +397,54 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(keyFromEnv === new SecurityManager(conf2).getSecretKey())
   }
 
+  test("use executor-specific secret file configuration.") {
+    val secretFileFromDriver = createTempSecretFile("driver-secret")
+    val secretFileFromExecutor = createTempSecretFile("executor-secret")
+    val conf = new SparkConf()
+      .setMaster("k8s://127.0.0.1")
+      .set(AUTH_SECRET_FILE_DRIVER, Some(secretFileFromDriver.getAbsolutePath))
+      .set(AUTH_SECRET_FILE_EXECUTOR, Some(secretFileFromExecutor.getAbsolutePath))
+      .set(SecurityManager.SPARK_AUTH_CONF, "true")
+    val mgr = new SecurityManager(conf, authSecretFileConf = AUTH_SECRET_FILE_EXECUTOR)
+    assert(encodeFileAsBase64(secretFileFromExecutor) === mgr.getSecretKey())
+  }
+
+  test("secret file must be defined in both driver and executor") {
+    val conf1 = new SparkConf()
+      .set(AUTH_SECRET_FILE_DRIVER, Some("/tmp/driver-secret.txt"))
+      .set(SecurityManager.SPARK_AUTH_CONF, "true")
+    val mgr1 = new SecurityManager(conf1)
+    intercept[IllegalArgumentException] {
+      mgr1.initializeAuth()
+    }
+
+    val conf2 = new SparkConf()
+      .set(AUTH_SECRET_FILE_EXECUTOR, Some("/tmp/executor-secret.txt"))
+      .set(SecurityManager.SPARK_AUTH_CONF, "true")
+    val mgr2 = new SecurityManager(conf2)
+    intercept[IllegalArgumentException] {
+      mgr2.initializeAuth()
+    }
+  }
+
+  Seq("yarn", "local", "local[*]", "local[1,2]", "mesos://localhost:8080").foreach { master =>
+    test(s"master $master cannot use file mounted secrets") {
+      val conf = new SparkConf()
+        .set(AUTH_SECRET_FILE, "/tmp/secret.txt")
+        .set(SecurityManager.SPARK_AUTH_CONF, "true")
+        .setMaster(master)
+      intercept[IllegalArgumentException] {
+        new SecurityManager(conf).getSecretKey()
+      }
+      intercept[IllegalArgumentException] {
+        new SecurityManager(conf).initializeAuth()
+      }
+    }
+  }
+
   // How is the secret expected to be generated and stored.
   object SecretTestType extends Enumeration {
-    val MANUAL, AUTO, UGI = Value
+    val MANUAL, AUTO, UGI, FILE = Value
   }
 
   import SecretTestType._
@@ -408,6 +455,7 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     ("local[*]", UGI),
     ("local[1, 2]", UGI),
     ("k8s://127.0.0.1", AUTO),
+    ("k8s://127.0.1.1", FILE),
     ("local-cluster[2, 1, 1024]", MANUAL),
     ("invalid", MANUAL)
   ).foreach { case (master, secretType) =>
@@ -440,6 +488,12 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
                 intercept[IllegalArgumentException] {
                   mgr.getSecretKey()
                 }
+
+              case FILE =>
+                val secretFile = createTempSecretFile()
+                conf.set(AUTH_SECRET_FILE, secretFile.getAbsolutePath)
+                mgr.initializeAuth()
+                assert(encodeFileAsBase64(secretFile) === mgr.getSecretKey())
             }
           }
         }
@@ -447,5 +501,15 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     }
   }
 
+  private def encodeFileAsBase64(secretFile: File) = {
+    Base64.getEncoder.encodeToString(Files.readAllBytes(secretFile.toPath))
+  }
+
+  private def createTempSecretFile(contents: String = "test-secret"): File = {
+    val secretDir = Utils.createTempDir("temp-secrets")
+    val secretFile = new File(secretDir, "temp-secret.txt")
+    Files.write(secretFile.toPath, contents.getBytes(UTF_8))
+    secretFile
+  }
 }
 
diff --git a/docs/security.md b/docs/security.md
index 2a4f3c074c1e..8416ed91356a 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -66,6 +66,50 @@ Kubernetes admin to ensure that Spark authentication is secure.
 </tr>
 </table>
 
+Alternatively, one can mount authentication secrets using files and Kubernetes secrets that
+the user mounts into their pods.
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.authenticate.secret.file</code></td>
+  <td>None</td>
+  <td>
+    Path pointing to the secret key to use for securing connections. Ensure that the
+    contents of the file have been securely generated. This file is loaded on both the driver
+    and the executors unless other settings override this (see below).
+  </td>
+</tr>
+<tr>
+  <td><code>spark.authenticate.secret.driver.file</code></td>
+  <td>The value of <code>spark.authenticate.secret.file</code></td>
+  <td>
+    When specified, overrides the location that the Spark driver reads to load the secret.
+    Useful when in client mode, when the location of the secret file may differ in the pod versus
+    the node the driver is running in. When this is specified,
+    <code>spark.authenticate.secret.executor.file</code> must be specified so that the driver
+    and the executors can both use files to load the secret key. Ensure that the contents of the file
+    on the driver is identical to the contents of the file on the executors.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.authenticate.secret.executor.file</code></td>
+  <td>The value of <code>spark.authenticate.secret.file</code></td>
+  <td>
+    When specified, overrides the location that the Spark executors read to load the secret.
+    Useful in client mode, when the location of the secret file may differ in the pod versus
+    the node the driver is running in. When this is specified,
+    <code>spark.authenticate.secret.driver.file</code> must be specified so that the driver
+    and the executors can both use files to load the secret key. Ensure that the contents of the file
+    on the driver is identical to the contents of the file on the executors.
+  </td>
+</tr>
+</table>
+
+Note that when using files, Spark will not mount these files into the containers for you. It is up
+you to ensure that the secret files are deployed securely into your containers and that the driver's
+secret file agrees with the executors' secret file.
+
 ## Encryption
 
 Spark supports AES-based encryption for RPC connections. For encryption to be enabled, RPC
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 939aa88b0797..4bcf4c9446aa 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -24,7 +24,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.internal.config.{EXECUTOR_CLASS_PATH, EXECUTOR_JAVA_OPTIONS, EXECUTOR_MEMORY, EXECUTOR_MEMORY_OVERHEAD, PYSPARK_EXECUTOR_MEMORY}
+import org.apache.spark.internal.config._
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
@@ -112,12 +112,14 @@ private[spark] class BasicExecutorFeatureStep(
             .build())
           .build())
       } ++ {
-        Option(secMgr.getSecretKey()).map { authSecret =>
-          new EnvVarBuilder()
-            .withName(SecurityManager.ENV_AUTH_SECRET)
-            .withValue(authSecret)
-            .build()
-        }
+        if (kubernetesConf.get(AUTH_SECRET_FILE_EXECUTOR).isEmpty) {
+          Option(secMgr.getSecretKey()).map { authSecret =>
+            new EnvVarBuilder()
+              .withName(SecurityManager.ENV_AUTH_SECRET)
+              .withValue(authSecret)
+              .build()
+          }
+        } else None
       } ++ {
         kubernetesConf.get(EXECUTOR_CLASS_PATH).map { cp =>
           new EnvVarBuilder()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index 6aa862643c78..05989d9be7ad 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -16,6 +16,10 @@
  */
 package org.apache.spark.deploy.k8s.features
 
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model._
@@ -158,6 +162,25 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     checkEnv(executor, conf, Map(SecurityManager.ENV_AUTH_SECRET -> secMgr.getSecretKey()))
   }
 
+  test("Auth secret shouldn't propagate if files are loaded.") {
+    val secretDir = Utils.createTempDir("temp-secret")
+    val secretFile = new File(secretDir, "secret-file.txt")
+    Files.write(secretFile.toPath, "some-secret".getBytes(StandardCharsets.UTF_8))
+    val conf = baseConf.clone()
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET_FILE, secretFile.getAbsolutePath)
+      .set("spark.master", "k8s://127.0.0.1")
+    val secMgr = new SecurityManager(conf)
+    secMgr.initializeAuth()
+
+    val step = new BasicExecutorFeatureStep(KubernetesTestConf.createExecutorConf(sparkConf = conf),
+      secMgr)
+
+    val executor = step.configurePod(SparkPod.initialPod())
+    assert(!KubernetesFeaturesTestUtils.containerHasEnvVar(
+      executor.container, SecurityManager.ENV_AUTH_SECRET))
+  }
+
   // There is always exactly one controller reference, and it points to the driver pod.
   private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
     assert(executor.getMetadata.getOwnerReferences.size() === 1)

From bd8da3799dd160771ebb3ea55b7678b644248425 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <xyliyuanjian@gmail.com>
Date: Wed, 12 Dec 2018 10:03:50 +0800
Subject: [PATCH 0116/1072] [SPARK-26193][SQL][FOLLOW UP] Read metrics rename
 and display text changes

## What changes were proposed in this pull request?
Follow up pr for #23207, include following changes:

- Rename `SQLShuffleMetricsReporter` to `SQLShuffleReadMetricsReporter` to make it match with write side naming.
- Display text changes for read side for naming consistent.
- Rename function in `ShuffleWriteProcessor`.
- Delete `private[spark]` in execution package.

## How was this patch tested?

Existing tests.

Closes #23286 from xuanyuanking/SPARK-26193-follow.

Authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/scheduler/ShuffleMapTask.scala      |  2 +-
 .../spark/shuffle/ShuffleWriteProcessor.scala |  2 +-
 .../spark/sql/execution/ShuffledRowRDD.scala  |  6 ++--
 .../exchange/ShuffleExchangeExec.scala        |  4 +--
 .../apache/spark/sql/execution/limit.scala    |  6 ++--
 .../metric/SQLShuffleMetricsReporter.scala    | 36 +++++++++----------
 .../execution/UnsafeRowSerializerSuite.scala  |  4 +--
 .../execution/metric/SQLMetricsSuite.scala    | 20 +++++------
 8 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 2a8d1dd995e2..35664ff515d4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -92,7 +92,7 @@ private[spark] class ShuffleMapTask(
       threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
     } else 0L
 
-    dep.shuffleWriterProcessor.writeProcess(rdd, dep, partitionId, context, partition)
+    dep.shuffleWriterProcessor.write(rdd, dep, partitionId, context, partition)
   }
 
   override def preferredLocations: Seq[TaskLocation] = preferredLocs
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
index f5213157a9a8..5b0c7e9f2b0b 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
@@ -41,7 +41,7 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
    * get from [[ShuffleManager]] and triggers rdd compute, finally return the [[MapStatus]] for
    * this task.
    */
-  def writeProcess(
+  def write(
       rdd: RDD[_],
       dep: ShuffleDependency[_, _, _],
       partitionId: Int,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
index 9b05faaed045..079ff25fcb67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
@@ -22,7 +22,7 @@ import java.util.Arrays
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.metric.{SQLMetric, SQLShuffleMetricsReporter}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLShuffleReadMetricsReporter}
 
 /**
  * The [[Partition]] used by [[ShuffledRowRDD]]. A post-shuffle partition
@@ -157,9 +157,9 @@ class ShuffledRowRDD(
   override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
     val shuffledRowPartition = split.asInstanceOf[ShuffledRowRDDPartition]
     val tempMetrics = context.taskMetrics().createTempShuffleReadMetrics()
-    // `SQLShuffleMetricsReporter` will update its own metrics for SQL exchange operator,
+    // `SQLShuffleReadMetricsReporter` will update its own metrics for SQL exchange operator,
     // as well as the `tempMetrics` for basic shuffle metrics.
-    val sqlMetricsReporter = new SQLShuffleMetricsReporter(tempMetrics, metrics)
+    val sqlMetricsReporter = new SQLShuffleReadMetricsReporter(tempMetrics, metrics)
     // The range of pre-shuffle partitions that we are fetching at here is
     // [startPreShufflePartitionIndex, endPreShufflePartitionIndex - 1].
     val reader =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 0c2020572e72..da7b0c6f43fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, Uns
 import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics, SQLShuffleMetricsReporter, SQLShuffleWriteMetricsReporter}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics, SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.MutablePair
@@ -50,7 +50,7 @@ case class ShuffleExchangeExec(
   private lazy val writeMetrics =
     SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
   private lazy val readMetrics =
-    SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
+    SQLShuffleReadMetricsReporter.createShuffleReadMetrics(sparkContext)
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size")
   ) ++ readMetrics ++ writeMetrics
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 1f2fdde53864..bfaf080292bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGe
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
-import org.apache.spark.sql.execution.metric.{SQLShuffleMetricsReporter, SQLShuffleWriteMetricsReporter}
+import org.apache.spark.sql.execution.metric.{SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter}
 
 /**
  * Take the first `limit` elements and collect them to a single partition.
@@ -41,7 +41,7 @@ case class CollectLimitExec(limit: Int, child: SparkPlan) extends UnaryExecNode
   private lazy val writeMetrics =
     SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
   private lazy val readMetrics =
-    SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
+    SQLShuffleReadMetricsReporter.createShuffleReadMetrics(sparkContext)
   override lazy val metrics = readMetrics ++ writeMetrics
   protected override def doExecute(): RDD[InternalRow] = {
     val locallyLimited = child.execute().mapPartitionsInternal(_.take(limit))
@@ -165,7 +165,7 @@ case class TakeOrderedAndProjectExec(
   private lazy val writeMetrics =
     SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
   private lazy val readMetrics =
-    SQLShuffleMetricsReporter.createShuffleReadMetrics(sparkContext)
+    SQLShuffleReadMetricsReporter.createShuffleReadMetrics(sparkContext)
   override lazy val metrics = readMetrics ++ writeMetrics
 
   protected override def doExecute(): RDD[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
index ff7941e3b3e8..2c0ea80495ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
@@ -27,23 +27,23 @@ import org.apache.spark.shuffle.ShuffleWriteMetricsReporter
  * @param metrics All metrics in current SparkPlan. This param should not empty and
  *   contains all shuffle metrics defined in createShuffleReadMetrics.
  */
-private[spark] class SQLShuffleMetricsReporter(
+class SQLShuffleReadMetricsReporter(
     tempMetrics: TempShuffleReadMetrics,
     metrics: Map[String, SQLMetric]) extends TempShuffleReadMetrics {
   private[this] val _remoteBlocksFetched =
-    metrics(SQLShuffleMetricsReporter.REMOTE_BLOCKS_FETCHED)
+    metrics(SQLShuffleReadMetricsReporter.REMOTE_BLOCKS_FETCHED)
   private[this] val _localBlocksFetched =
-    metrics(SQLShuffleMetricsReporter.LOCAL_BLOCKS_FETCHED)
+    metrics(SQLShuffleReadMetricsReporter.LOCAL_BLOCKS_FETCHED)
   private[this] val _remoteBytesRead =
-    metrics(SQLShuffleMetricsReporter.REMOTE_BYTES_READ)
+    metrics(SQLShuffleReadMetricsReporter.REMOTE_BYTES_READ)
   private[this] val _remoteBytesReadToDisk =
-    metrics(SQLShuffleMetricsReporter.REMOTE_BYTES_READ_TO_DISK)
+    metrics(SQLShuffleReadMetricsReporter.REMOTE_BYTES_READ_TO_DISK)
   private[this] val _localBytesRead =
-    metrics(SQLShuffleMetricsReporter.LOCAL_BYTES_READ)
+    metrics(SQLShuffleReadMetricsReporter.LOCAL_BYTES_READ)
   private[this] val _fetchWaitTime =
-    metrics(SQLShuffleMetricsReporter.FETCH_WAIT_TIME)
+    metrics(SQLShuffleReadMetricsReporter.FETCH_WAIT_TIME)
   private[this] val _recordsRead =
-    metrics(SQLShuffleMetricsReporter.RECORDS_READ)
+    metrics(SQLShuffleReadMetricsReporter.RECORDS_READ)
 
   override def incRemoteBlocksFetched(v: Long): Unit = {
     _remoteBlocksFetched.add(v)
@@ -75,7 +75,7 @@ private[spark] class SQLShuffleMetricsReporter(
   }
 }
 
-private[spark] object SQLShuffleMetricsReporter {
+object SQLShuffleReadMetricsReporter {
   val REMOTE_BLOCKS_FETCHED = "remoteBlocksFetched"
   val LOCAL_BLOCKS_FETCHED = "localBlocksFetched"
   val REMOTE_BYTES_READ = "remoteBytesRead"
@@ -88,8 +88,8 @@ private[spark] object SQLShuffleMetricsReporter {
    * Create all shuffle read relative metrics and return the Map.
    */
   def createShuffleReadMetrics(sc: SparkContext): Map[String, SQLMetric] = Map(
-    REMOTE_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "remote blocks fetched"),
-    LOCAL_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "local blocks fetched"),
+    REMOTE_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "remote blocks read"),
+    LOCAL_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "local blocks read"),
     REMOTE_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "remote bytes read"),
     REMOTE_BYTES_READ_TO_DISK -> SQLMetrics.createSizeMetric(sc, "remote bytes read to disk"),
     LOCAL_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "local bytes read"),
@@ -102,7 +102,7 @@ private[spark] object SQLShuffleMetricsReporter {
  * @param metricsReporter Other reporter need to be updated in this SQLShuffleWriteMetricsReporter.
  * @param metrics Shuffle write metrics in current SparkPlan.
  */
-private[spark] class SQLShuffleWriteMetricsReporter(
+class SQLShuffleWriteMetricsReporter(
     metricsReporter: ShuffleWriteMetricsReporter,
     metrics: Map[String, SQLMetric]) extends ShuffleWriteMetricsReporter {
   private[this] val _bytesWritten =
@@ -112,29 +112,29 @@ private[spark] class SQLShuffleWriteMetricsReporter(
   private[this] val _writeTime =
     metrics(SQLShuffleWriteMetricsReporter.SHUFFLE_WRITE_TIME)
 
-  override private[spark] def incBytesWritten(v: Long): Unit = {
+  override def incBytesWritten(v: Long): Unit = {
     metricsReporter.incBytesWritten(v)
     _bytesWritten.add(v)
   }
-  override private[spark] def decRecordsWritten(v: Long): Unit = {
+  override def decRecordsWritten(v: Long): Unit = {
     metricsReporter.decBytesWritten(v)
     _recordsWritten.set(_recordsWritten.value - v)
   }
-  override private[spark] def incRecordsWritten(v: Long): Unit = {
+  override def incRecordsWritten(v: Long): Unit = {
     metricsReporter.incRecordsWritten(v)
     _recordsWritten.add(v)
   }
-  override private[spark] def incWriteTime(v: Long): Unit = {
+  override def incWriteTime(v: Long): Unit = {
     metricsReporter.incWriteTime(v)
     _writeTime.add(v)
   }
-  override private[spark] def decBytesWritten(v: Long): Unit = {
+  override def decBytesWritten(v: Long): Unit = {
     metricsReporter.decBytesWritten(v)
     _bytesWritten.set(_bytesWritten.value - v)
   }
 }
 
-private[spark] object SQLShuffleWriteMetricsReporter {
+object SQLShuffleWriteMetricsReporter {
   val SHUFFLE_BYTES_WRITTEN = "shuffleBytesWritten"
   val SHUFFLE_RECORDS_WRITTEN = "shuffleRecordsWritten"
   val SHUFFLE_WRITE_TIME = "shuffleWriteTime"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index 1ad5713ab8ae..ca8692290edb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{LocalSparkSession, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
-import org.apache.spark.sql.execution.metric.SQLShuffleMetricsReporter
+import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.ShuffleBlockId
 import org.apache.spark.util.collection.ExternalSorter
@@ -140,7 +140,7 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
         new UnsafeRowSerializer(2))
     val shuffled = new ShuffledRowRDD(
       dependency,
-      SQLShuffleMetricsReporter.createShuffleReadMetrics(spark.sparkContext))
+      SQLShuffleReadMetricsReporter.createShuffleReadMetrics(spark.sparkContext))
     shuffled.count()
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index f6495496a58e..47265df4831d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -96,8 +96,8 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
         "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))
     val shuffleExpected1 = Map(
       "records read" -> 2L,
-      "local blocks fetched" -> 2L,
-      "remote blocks fetched" -> 0L,
+      "local blocks read" -> 2L,
+      "remote blocks read" -> 0L,
       "shuffle records written" -> 2L)
     testSparkPlanMetrics(df, 1, Map(
       2L -> (("HashAggregate", expected1(0))),
@@ -114,8 +114,8 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
         "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))
     val shuffleExpected2 = Map(
       "records read" -> 4L,
-      "local blocks fetched" -> 4L,
-      "remote blocks fetched" -> 0L,
+      "local blocks read" -> 4L,
+      "remote blocks read" -> 0L,
       "shuffle records written" -> 4L)
     testSparkPlanMetrics(df2, 1, Map(
       2L -> (("HashAggregate", expected2(0))),
@@ -175,8 +175,8 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       1L -> (("Exchange", Map(
         "shuffle records written" -> 2L,
         "records read" -> 2L,
-        "local blocks fetched" -> 2L,
-        "remote blocks fetched" -> 0L))),
+        "local blocks read" -> 2L,
+        "remote blocks read" -> 0L))),
       0L -> (("ObjectHashAggregate", Map("number of output rows" -> 1L))))
     )
 
@@ -187,8 +187,8 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       1L -> (("Exchange", Map(
         "shuffle records written" -> 4L,
         "records read" -> 4L,
-        "local blocks fetched" -> 4L,
-        "remote blocks fetched" -> 0L))),
+        "local blocks read" -> 4L,
+        "remote blocks read" -> 0L))),
       0L -> (("ObjectHashAggregate", Map("number of output rows" -> 3L))))
     )
   }
@@ -216,8 +216,8 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
           "number of output rows" -> 4L))),
         2L -> (("Exchange", Map(
           "records read" -> 4L,
-          "local blocks fetched" -> 2L,
-          "remote blocks fetched" -> 0L,
+          "local blocks read" -> 2L,
+          "remote blocks read" -> 0L,
           "shuffle records written" -> 2L))))
       )
     }

From 79e36e2c2ac01458b5baa3f3ee310fddd29e9c35 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Wed, 12 Dec 2018 09:03:13 -0600
Subject: [PATCH 0117/1072] [SPARK-19827][R][FOLLOWUP] spark.ml R API for PIC

## What changes were proposed in this pull request?

Follow up style fixes to PIC in R; see #23072

## How was this patch tested?

Existing tests.

Closes #23292 from srowen/SPARK-19827.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 R/pkg/R/mllib_clustering.R                        | 15 ++++++---------
 R/pkg/R/mllib_fpm.R                               |  4 ++--
 examples/src/main/r/ml/powerIterationClustering.R |  3 ++-
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index 7d9dcebfe70d..9b32b71d34fe 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -621,11 +621,10 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
 #'
 #' A scalable graph clustering algorithm. Users can call \code{spark.assignClusters} to
 #' return a cluster assignment for each input vertex.
-#'
-#  Run the PIC algorithm and returns a cluster assignment for each input vertex.
+#' Run the PIC algorithm and returns a cluster assignment for each input vertex.
 #' @param data a SparkDataFrame.
 #' @param k the number of clusters to create.
-#' @param initMode the initialization algorithm.
+#' @param initMode the initialization algorithm; "random" or "degree"
 #' @param maxIter the maximum number of iterations.
 #' @param sourceCol the name of the input column for source vertex IDs.
 #' @param destinationCol the name of the input column for destination vertex IDs
@@ -633,18 +632,16 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
 #'                  we treat all instance weights as 1.0.
 #' @param ... additional argument(s) passed to the method.
 #' @return A dataset that contains columns of vertex id and the corresponding cluster for the id.
-#'         The schema of it will be:
-#'         \code{id: Long}
-#'         \code{cluster: Int}
+#'         The schema of it will be: \code{id: integer}, \code{cluster: integer}
 #' @rdname spark.powerIterationClustering
-#' @aliases assignClusters,PowerIterationClustering-method,SparkDataFrame-method
+#' @aliases spark.assignClusters,SparkDataFrame-method
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
 #'                            list(1L, 2L, 1.0), list(3L, 4L, 1.0),
 #'                            list(4L, 0L, 0.1)),
 #'                       schema = c("src", "dst", "weight"))
-#' clusters <- spark.assignClusters(df, initMode="degree", weightCol="weight")
+#' clusters <- spark.assignClusters(df, initMode = "degree", weightCol = "weight")
 #' showDF(clusters)
 #' }
 #' @note spark.assignClusters(SparkDataFrame) since 3.0.0
@@ -652,7 +649,7 @@ setMethod("spark.assignClusters",
           signature(data = "SparkDataFrame"),
           function(data, k = 2L, initMode = c("random", "degree"), maxIter = 20L,
             sourceCol = "src", destinationCol = "dst", weightCol = NULL) {
-            if (!is.numeric(k) || k < 1) {
+            if (!is.integer(k) || k < 1) {
               stop("k should be a number with value >= 1.")
             }
             if (!is.integer(maxIter) || maxIter <= 0) {
diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R
index c248e9ec9be9..0cc7a16c302d 100644
--- a/R/pkg/R/mllib_fpm.R
+++ b/R/pkg/R/mllib_fpm.R
@@ -183,8 +183,8 @@ setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"),
 #' @return A complete set of frequent sequential patterns in the input sequences of itemsets.
 #'         The returned \code{SparkDataFrame} contains columns of sequence and corresponding
 #'         frequency. The schema of it will be:
-#'         \code{sequence: ArrayType(ArrayType(T))} (T is the item type)
-#'         \code{freq: Long}
+#'         \code{sequence: ArrayType(ArrayType(T))}, \code{freq: integer}
+#'         where T is the item type
 #' @rdname spark.prefixSpan
 #' @aliases findFrequentSequentialPatterns,PrefixSpan,SparkDataFrame-method
 #' @examples
diff --git a/examples/src/main/r/ml/powerIterationClustering.R b/examples/src/main/r/ml/powerIterationClustering.R
index ba43037106d1..3530d88e5050 100644
--- a/examples/src/main/r/ml/powerIterationClustering.R
+++ b/examples/src/main/r/ml/powerIterationClustering.R
@@ -30,7 +30,8 @@ df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
                            list(4L, 0L, 0.1)),
                       schema = c("src", "dst", "weight"))
 # assign clusters
-clusters <- spark.assignClusters(df, k=2L, maxIter=20L, initMode="degree", weightCol="weight")
+clusters <- spark.assignClusters(df, k = 2L, maxIter = 20L,
+                                 initMode = "degree", weightCol = "weight")
 
 showDF(arrange(clusters, clusters$id))
 # $example off$

From 570b8f3d45ad8d6649ed633251a8194d910f1ab5 Mon Sep 17 00:00:00 2001
From: Ilya Matiach <ilmat@microsoft.com>
Date: Wed, 12 Dec 2018 10:06:41 -0600
Subject: [PATCH 0118/1072] [SPARK-24102][ML][MLLIB] ML Evaluators should use
 weight column - added weight column for regression evaluator

## What changes were proposed in this pull request?

The evaluators BinaryClassificationEvaluator, RegressionEvaluator, and MulticlassClassificationEvaluator and the corresponding metrics classes BinaryClassificationMetrics, RegressionMetrics and MulticlassMetrics should use sample weight data.

I've closed the PR: https://github.com/apache/spark/pull/16557
 as recommended in favor of creating three pull requests, one for each of the evaluators (binary/regression/multiclass) to make it easier to review/update.

The updates to the regression metrics were based on (and updated with new changes based on comments):
https://issues.apache.org/jira/browse/SPARK-11520
 ("RegressionMetrics should support instance weights")
 but the pull request was closed as the changes were never checked in.

## How was this patch tested?

I added tests to the metrics class.

Closes #17085 from imatiach-msft/ilmat/regression-evaluate.

Authored-by: Ilya Matiach <ilmat@microsoft.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../ml/evaluation/RegressionEvaluator.scala   | 19 ++++---
 .../mllib/evaluation/RegressionMetrics.scala  | 30 ++++++-----
 .../stat/MultivariateOnlineSummarizer.scala   | 25 ++++++----
 .../stat/MultivariateStatisticalSummary.scala |  6 +++
 .../evaluation/RegressionMetricsSuite.scala   | 50 +++++++++++++++++++
 project/MimaExcludes.scala                    |  5 +-
 6 files changed, 106 insertions(+), 29 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index 031cd0d635bf..616569bb55e4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
-import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol}
+import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol}
 import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
 import org.apache.spark.mllib.evaluation.RegressionMetrics
 import org.apache.spark.sql.{Dataset, Row}
@@ -33,7 +33,8 @@ import org.apache.spark.sql.types.{DoubleType, FloatType}
 @Since("1.4.0")
 @Experimental
 final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String)
-  extends Evaluator with HasPredictionCol with HasLabelCol with DefaultParamsWritable {
+  extends Evaluator with HasPredictionCol with HasLabelCol
+    with HasWeightCol with DefaultParamsWritable {
 
   @Since("1.4.0")
   def this() = this(Identifiable.randomUID("regEval"))
@@ -69,6 +70,10 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
   @Since("1.4.0")
   def setLabelCol(value: String): this.type = set(labelCol, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   setDefault(metricName -> "rmse")
 
   @Since("2.0.0")
@@ -77,11 +82,13 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
     SchemaUtils.checkColumnTypes(schema, $(predictionCol), Seq(DoubleType, FloatType))
     SchemaUtils.checkNumericType(schema, $(labelCol))
 
-    val predictionAndLabels = dataset
-      .select(col($(predictionCol)).cast(DoubleType), col($(labelCol)).cast(DoubleType))
+    val predictionAndLabelsWithWeights = dataset
+      .select(col($(predictionCol)).cast(DoubleType), col($(labelCol)).cast(DoubleType),
+        if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol)))
       .rdd
-      .map { case Row(prediction: Double, label: Double) => (prediction, label) }
-    val metrics = new RegressionMetrics(predictionAndLabels)
+      .map { case Row(prediction: Double, label: Double, weight: Double) =>
+        (prediction, label, weight) }
+    val metrics = new RegressionMetrics(predictionAndLabelsWithWeights)
     val metric = $(metricName) match {
       case "rmse" => metrics.rootMeanSquaredError
       case "mse" => metrics.meanSquaredError
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 020676cac5a6..525047973ad5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -27,17 +27,18 @@ import org.apache.spark.sql.DataFrame
 /**
  * Evaluator for regression.
  *
- * @param predictionAndObservations an RDD of (prediction, observation) pairs
+ * @param predAndObsWithOptWeight an RDD of either (prediction, observation, weight)
+ *                                                    or (prediction, observation) pairs
  * @param throughOrigin True if the regression is through the origin. For example, in linear
  *                      regression, it will be true without fitting intercept.
  */
 @Since("1.2.0")
 class RegressionMetrics @Since("2.0.0") (
-    predictionAndObservations: RDD[(Double, Double)], throughOrigin: Boolean)
+    predAndObsWithOptWeight: RDD[_ <: Product], throughOrigin: Boolean)
     extends Logging {
 
   @Since("1.2.0")
-  def this(predictionAndObservations: RDD[(Double, Double)]) =
+  def this(predictionAndObservations: RDD[_ <: Product]) =
     this(predictionAndObservations, false)
 
   /**
@@ -52,10 +53,13 @@ class RegressionMetrics @Since("2.0.0") (
    * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors.
    */
   private lazy val summary: MultivariateStatisticalSummary = {
-    val summary: MultivariateStatisticalSummary = predictionAndObservations.map {
-      case (prediction, observation) => Vectors.dense(observation, observation - prediction)
+    val summary: MultivariateStatisticalSummary = predAndObsWithOptWeight.map {
+      case (prediction: Double, observation: Double, weight: Double) =>
+        (Vectors.dense(observation, observation - prediction), weight)
+      case (prediction: Double, observation: Double) =>
+        (Vectors.dense(observation, observation - prediction), 1.0)
     }.treeAggregate(new MultivariateOnlineSummarizer())(
-        (summary, v) => summary.add(v),
+        (summary, sample) => summary.add(sample._1, sample._2),
         (sum1, sum2) => sum1.merge(sum2)
       )
     summary
@@ -63,11 +67,13 @@ class RegressionMetrics @Since("2.0.0") (
 
   private lazy val SSy = math.pow(summary.normL2(0), 2)
   private lazy val SSerr = math.pow(summary.normL2(1), 2)
-  private lazy val SStot = summary.variance(0) * (summary.count - 1)
+  private lazy val SStot = summary.variance(0) * (summary.weightSum - 1)
   private lazy val SSreg = {
     val yMean = summary.mean(0)
-    predictionAndObservations.map {
-      case (prediction, _) => math.pow(prediction - yMean, 2)
+    predAndObsWithOptWeight.map {
+      case (prediction: Double, _: Double, weight: Double) =>
+        math.pow(prediction - yMean, 2) * weight
+      case (prediction: Double, _: Double) => math.pow(prediction - yMean, 2)
     }.sum()
   }
 
@@ -79,7 +85,7 @@ class RegressionMetrics @Since("2.0.0") (
    */
   @Since("1.2.0")
   def explainedVariance: Double = {
-    SSreg / summary.count
+    SSreg / summary.weightSum
   }
 
   /**
@@ -88,7 +94,7 @@ class RegressionMetrics @Since("2.0.0") (
    */
   @Since("1.2.0")
   def meanAbsoluteError: Double = {
-    summary.normL1(1) / summary.count
+    summary.normL1(1) / summary.weightSum
   }
 
   /**
@@ -97,7 +103,7 @@ class RegressionMetrics @Since("2.0.0") (
    */
   @Since("1.2.0")
   def meanSquaredError: Double = {
-    SSerr / summary.count
+    SSerr / summary.weightSum
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 0554b6d8ff5b..6d510e1633d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -52,7 +52,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   private var totalCnt: Long = 0
   private var totalWeightSum: Double = 0.0
   private var weightSquareSum: Double = 0.0
-  private var weightSum: Array[Double] = _
+  private var currWeightSum: Array[Double] = _
   private var nnz: Array[Long] = _
   private var currMax: Array[Double] = _
   private var currMin: Array[Double] = _
@@ -78,7 +78,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       currM2n = Array.ofDim[Double](n)
       currM2 = Array.ofDim[Double](n)
       currL1 = Array.ofDim[Double](n)
-      weightSum = Array.ofDim[Double](n)
+      currWeightSum = Array.ofDim[Double](n)
       nnz = Array.ofDim[Long](n)
       currMax = Array.fill[Double](n)(Double.MinValue)
       currMin = Array.fill[Double](n)(Double.MaxValue)
@@ -91,7 +91,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
     val localCurrM2n = currM2n
     val localCurrM2 = currM2
     val localCurrL1 = currL1
-    val localWeightSum = weightSum
+    val localWeightSum = currWeightSum
     val localNumNonzeros = nnz
     val localCurrMax = currMax
     val localCurrMin = currMin
@@ -139,8 +139,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       weightSquareSum += other.weightSquareSum
       var i = 0
       while (i < n) {
-        val thisNnz = weightSum(i)
-        val otherNnz = other.weightSum(i)
+        val thisNnz = currWeightSum(i)
+        val otherNnz = other.currWeightSum(i)
         val totalNnz = thisNnz + otherNnz
         val totalCnnz = nnz(i) + other.nnz(i)
         if (totalNnz != 0.0) {
@@ -157,7 +157,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
           currMax(i) = math.max(currMax(i), other.currMax(i))
           currMin(i) = math.min(currMin(i), other.currMin(i))
         }
-        weightSum(i) = totalNnz
+        currWeightSum(i) = totalNnz
         nnz(i) = totalCnnz
         i += 1
       }
@@ -170,7 +170,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       this.totalCnt = other.totalCnt
       this.totalWeightSum = other.totalWeightSum
       this.weightSquareSum = other.weightSquareSum
-      this.weightSum = other.weightSum.clone()
+      this.currWeightSum = other.currWeightSum.clone()
       this.nnz = other.nnz.clone()
       this.currMax = other.currMax.clone()
       this.currMin = other.currMin.clone()
@@ -189,7 +189,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
     val realMean = Array.ofDim[Double](n)
     var i = 0
     while (i < n) {
-      realMean(i) = currMean(i) * (weightSum(i) / totalWeightSum)
+      realMean(i) = currMean(i) * (currWeightSum(i) / totalWeightSum)
       i += 1
     }
     Vectors.dense(realMean)
@@ -214,8 +214,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       val len = currM2n.length
       while (i < len) {
         // We prevent variance from negative value caused by numerical error.
-        realVariance(i) = math.max((currM2n(i) + deltaMean(i) * deltaMean(i) * weightSum(i) *
-          (totalWeightSum - weightSum(i)) / totalWeightSum) / denominator, 0.0)
+        realVariance(i) = math.max((currM2n(i) + deltaMean(i) * deltaMean(i) * currWeightSum(i) *
+          (totalWeightSum - currWeightSum(i)) / totalWeightSum) / denominator, 0.0)
         i += 1
       }
     }
@@ -229,6 +229,11 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   @Since("1.1.0")
   override def count: Long = totalCnt
 
+  /**
+   * Sum of weights.
+   */
+  override def weightSum: Double = totalWeightSum
+
   /**
    * Number of nonzero elements in each dimension.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
index 39a16fb743d6..a4381032f8c0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
@@ -44,6 +44,12 @@ trait MultivariateStatisticalSummary {
   @Since("1.0.0")
   def count: Long
 
+  /**
+   * Sum of weights.
+   */
+  @Since("3.0.0")
+  def weightSum: Double
+
   /**
    * Number of nonzero elements (including explicitly presented zero values) in each column.
    */
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
index f1d517383643..23809777f7d3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
@@ -133,4 +133,54 @@ class RegressionMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
       "root mean squared error mismatch")
     assert(metrics.r2 ~== 1.0 absTol eps, "r2 score mismatch")
   }
+
+  test("regression metrics with same (1.0) weight samples") {
+    val predictionAndObservationWithWeight = sc.parallelize(
+      Seq((2.25, 3.0, 1.0), (-0.25, -0.5, 1.0), (1.75, 2.0, 1.0), (7.75, 7.0, 1.0)), 2)
+    val metrics = new RegressionMetrics(predictionAndObservationWithWeight, false)
+    assert(metrics.explainedVariance ~== 8.79687 absTol eps,
+      "explained variance regression score mismatch")
+    assert(metrics.meanAbsoluteError ~== 0.5 absTol eps, "mean absolute error mismatch")
+    assert(metrics.meanSquaredError ~== 0.3125 absTol eps, "mean squared error mismatch")
+    assert(metrics.rootMeanSquaredError ~== 0.55901 absTol eps,
+      "root mean squared error mismatch")
+    assert(metrics.r2 ~== 0.95717 absTol eps, "r2 score mismatch")
+  }
+
+  /**
+   * The following values are hand calculated using the formula:
+   * [[https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights]]
+   * preds = c(2.25, -0.25, 1.75, 7.75)
+   * obs = c(3.0, -0.5, 2.0, 7.0)
+   * weights = c(0.1, 0.2, 0.15, 0.05)
+   * count = 4
+   *
+   * Weighted metrics can be calculated with MultivariateStatisticalSummary.
+   *             (observations, observations - predictions)
+   * mean        (1.7, 0.05)
+   * variance    (7.3, 0.3)
+   * numNonZeros (0.5, 0.5)
+   * max         (7.0, 0.75)
+   * min         (-0.5, -0.75)
+   * normL2      (2.0, 0.32596)
+   * normL1      (1.05, 0.2)
+   *
+   * explainedVariance: sum(pow((preds - 1.7),2)*weight) / weightedCount = 5.2425
+   * meanAbsoluteError: normL1(1) / weightedCount = 0.4
+   * meanSquaredError: pow(normL2(1),2) / weightedCount = 0.2125
+   * rootMeanSquaredError: sqrt(meanSquaredError) = 0.46098
+   * r2: 1 - pow(normL2(1),2) / (variance(0) * (weightedCount - 1)) = 1.02910
+   */
+  test("regression metrics with weighted samples") {
+    val predictionAndObservationWithWeight = sc.parallelize(
+      Seq((2.25, 3.0, 0.1), (-0.25, -0.5, 0.2), (1.75, 2.0, 0.15), (7.75, 7.0, 0.05)), 2)
+    val metrics = new RegressionMetrics(predictionAndObservationWithWeight, false)
+    assert(metrics.explainedVariance ~== 5.2425 absTol eps,
+      "explained variance regression score mismatch")
+    assert(metrics.meanAbsoluteError ~== 0.4 absTol eps, "mean absolute error mismatch")
+    assert(metrics.meanSquaredError ~== 0.2125 absTol eps, "mean squared error mismatch")
+    assert(metrics.rootMeanSquaredError ~== 0.46098 absTol eps,
+      "root mean squared error mismatch")
+    assert(metrics.r2 ~== 1.02910 absTol eps, "r2 score mismatch")
+  }
 }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index b3252d70a80c..883913332ca1 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -531,7 +531,10 @@ object MimaExcludes {
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toDenseColMajor"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toDenseMatrix"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toSparseMatrix"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.getSizeInBytes")
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.getSizeInBytes"),
+
+    // [SPARK-18693] Added weightSum to trait MultivariateStatisticalSummary
+    ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.stat.MultivariateStatisticalSummary.weightSum")
   ) ++ Seq(
       // [SPARK-17019] Expose on-heap and off-heap memory usage in various places
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerBlockManagerAdded.copy"),

From a63e7b2a212bab94d080b00cf1c5f397800a276a Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 12 Dec 2018 12:01:21 -0800
Subject: [PATCH 0119/1072] [SPARK-25877][K8S] Move all feature logic to
 feature classes.

This change makes the driver and executor builders a lot simpler
by encapsulating almost all feature logic into the respective
feature classes. The only logic that remains is the creation of
the initial pod, which needs to happen before anything else so
is better to be left in the builder class.

Most feature classes already behave fine when the config has nothing
they should handle, but a few minor tweaks had to be added. Unit
tests were also updated or added to account for these.

The builder suites were simplified a lot and just test the remaining
pod-related code in the builders themselves.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #23220 from vanzin/SPARK-25877.
---
 .../HadoopConfExecutorFeatureStep.scala       |  10 +-
 .../HadoopSparkUserExecutorFeatureStep.scala  |   5 +-
 .../KerberosConfExecutorFeatureStep.scala     |  26 +--
 .../features/PodTemplateConfigMapStep.scala   |  82 +++++---
 .../submit/KubernetesClientApplication.scala  |   4 +-
 .../k8s/submit/KubernetesDriverBuilder.scala  |  99 +++------
 .../cluster/k8s/ExecutorPodsAllocator.scala   |   3 +-
 .../k8s/KubernetesClusterManager.scala        |   2 +-
 .../k8s/KubernetesExecutorBuilder.scala       | 100 +++------
 .../spark/deploy/k8s/PodBuilderSuite.scala    | 177 ++++++++++++++++
 .../PodTemplateConfigMapStepSuite.scala       |  25 ++-
 .../spark/deploy/k8s/submit/ClientSuite.scala |   2 +-
 .../submit/KubernetesDriverBuilderSuite.scala | 194 +-----------------
 .../k8s/submit/PodBuilderSuiteUtils.scala     | 142 -------------
 .../k8s/ExecutorPodsAllocatorSuite.scala      |   4 +-
 .../k8s/KubernetesExecutorBuilderSuite.scala  | 144 +------------
 16 files changed, 343 insertions(+), 676 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
 delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/PodBuilderSuiteUtils.scala

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
index bca66759d586..da332881ae1a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
@@ -31,10 +31,10 @@ private[spark] class HadoopConfExecutorFeatureStep(conf: KubernetesExecutorConf)
 
   override def configurePod(pod: SparkPod): SparkPod = {
     val hadoopConfDirCMapName = conf.getOption(HADOOP_CONFIG_MAP_NAME)
-    require(hadoopConfDirCMapName.isDefined,
-      "Ensure that the env `HADOOP_CONF_DIR` is defined either in the client or " +
-        " using pre-existing ConfigMaps")
-    logInfo("HADOOP_CONF_DIR defined")
-    HadoopBootstrapUtil.bootstrapHadoopConfDir(None, None, hadoopConfDirCMapName, pod)
+    if (hadoopConfDirCMapName.isDefined) {
+      HadoopBootstrapUtil.bootstrapHadoopConfDir(None, None, hadoopConfDirCMapName, pod)
+    } else {
+      pod
+    }
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
index e34211076319..c038e75491ca 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
@@ -28,7 +28,8 @@ private[spark] class HadoopSparkUserExecutorFeatureStep(conf: KubernetesExecutor
   extends KubernetesFeatureConfigStep {
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    val sparkUserName = conf.get(KERBEROS_SPARK_USER_NAME)
-    HadoopBootstrapUtil.bootstrapSparkUserPod(sparkUserName, pod)
+    conf.getOption(KERBEROS_SPARK_USER_NAME).map { user =>
+      HadoopBootstrapUtil.bootstrapSparkUserPod(user, pod)
+    }.getOrElse(pod)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
index 32bb6a5d2bcb..907271b1cb48 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
@@ -27,18 +27,20 @@ import org.apache.spark.internal.Logging
 private[spark] class KerberosConfExecutorFeatureStep(conf: KubernetesExecutorConf)
   extends KubernetesFeatureConfigStep with Logging {
 
-  private val maybeKrb5CMap = conf.getOption(KRB5_CONFIG_MAP_NAME)
-  require(maybeKrb5CMap.isDefined, "HADOOP_CONF_DIR ConfigMap not found")
-
   override def configurePod(pod: SparkPod): SparkPod = {
-    logInfo(s"Mounting Resources for Kerberos")
-    HadoopBootstrapUtil.bootstrapKerberosPod(
-      conf.get(KERBEROS_DT_SECRET_NAME),
-      conf.get(KERBEROS_DT_SECRET_KEY),
-      conf.get(KERBEROS_SPARK_USER_NAME),
-      None,
-      None,
-      maybeKrb5CMap,
-      pod)
+    val maybeKrb5CMap = conf.getOption(KRB5_CONFIG_MAP_NAME)
+    if (maybeKrb5CMap.isDefined) {
+      logInfo(s"Mounting Resources for Kerberos")
+      HadoopBootstrapUtil.bootstrapKerberosPod(
+        conf.get(KERBEROS_DT_SECRET_NAME),
+        conf.get(KERBEROS_DT_SECRET_KEY),
+        conf.get(KERBEROS_SPARK_USER_NAME),
+        None,
+        None,
+        maybeKrb5CMap,
+        pod)
+    } else {
+      pod
+    }
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
index 09dcf93a54f8..7f41ca43589b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
@@ -28,44 +28,60 @@ import org.apache.spark.deploy.k8s.Constants._
 
 private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
   extends KubernetesFeatureConfigStep {
+
+  private val hasTemplate = conf.contains(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
+
   def configurePod(pod: SparkPod): SparkPod = {
-    val podWithVolume = new PodBuilder(pod.pod)
-        .editSpec()
-          .addNewVolume()
-            .withName(POD_TEMPLATE_VOLUME)
-            .withNewConfigMap()
-              .withName(POD_TEMPLATE_CONFIGMAP)
-              .addNewItem()
-                .withKey(POD_TEMPLATE_KEY)
-                .withPath(EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME)
-              .endItem()
-            .endConfigMap()
-          .endVolume()
-        .endSpec()
-      .build()
+    if (hasTemplate) {
+      val podWithVolume = new PodBuilder(pod.pod)
+          .editSpec()
+            .addNewVolume()
+              .withName(POD_TEMPLATE_VOLUME)
+              .withNewConfigMap()
+                .withName(POD_TEMPLATE_CONFIGMAP)
+                .addNewItem()
+                  .withKey(POD_TEMPLATE_KEY)
+                  .withPath(EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME)
+                .endItem()
+              .endConfigMap()
+            .endVolume()
+          .endSpec()
+        .build()
 
-    val containerWithVolume = new ContainerBuilder(pod.container)
-        .addNewVolumeMount()
-          .withName(POD_TEMPLATE_VOLUME)
-          .withMountPath(EXECUTOR_POD_SPEC_TEMPLATE_MOUNTPATH)
-        .endVolumeMount()
-      .build()
-    SparkPod(podWithVolume, containerWithVolume)
+      val containerWithVolume = new ContainerBuilder(pod.container)
+          .addNewVolumeMount()
+            .withName(POD_TEMPLATE_VOLUME)
+            .withMountPath(EXECUTOR_POD_SPEC_TEMPLATE_MOUNTPATH)
+          .endVolumeMount()
+        .build()
+      SparkPod(podWithVolume, containerWithVolume)
+    } else {
+      pod
+    }
   }
 
-  override def getAdditionalPodSystemProperties(): Map[String, String] = Map[String, String](
-    KUBERNETES_EXECUTOR_PODTEMPLATE_FILE.key ->
-      (EXECUTOR_POD_SPEC_TEMPLATE_MOUNTPATH + "/" + EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME))
+  override def getAdditionalPodSystemProperties(): Map[String, String] = {
+    if (hasTemplate) {
+      Map[String, String](
+        KUBERNETES_EXECUTOR_PODTEMPLATE_FILE.key ->
+          (EXECUTOR_POD_SPEC_TEMPLATE_MOUNTPATH + "/" + EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME))
+    } else {
+      Map.empty
+    }
+  }
 
   override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
-    require(conf.get(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE).isDefined)
-    val podTemplateFile = conf.get(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE).get
-    val podTemplateString = Files.toString(new File(podTemplateFile), StandardCharsets.UTF_8)
-    Seq(new ConfigMapBuilder()
-        .withNewMetadata()
-          .withName(POD_TEMPLATE_CONFIGMAP)
-        .endMetadata()
-        .addToData(POD_TEMPLATE_KEY, podTemplateString)
-      .build())
+    if (hasTemplate) {
+      val podTemplateFile = conf.get(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE).get
+      val podTemplateString = Files.toString(new File(podTemplateFile), StandardCharsets.UTF_8)
+      Seq(new ConfigMapBuilder()
+          .withNewMetadata()
+            .withName(POD_TEMPLATE_CONFIGMAP)
+          .endMetadata()
+          .addToData(POD_TEMPLATE_KEY, podTemplateString)
+        .build())
+    } else {
+      Nil
+    }
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 70a93c968795..3888778bf84c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -104,7 +104,7 @@ private[spark] class Client(
     watcher: LoggingPodStatusWatcher) extends Logging {
 
   def run(): Unit = {
-    val resolvedDriverSpec = builder.buildFromFeatures(conf)
+    val resolvedDriverSpec = builder.buildFromFeatures(conf, kubernetesClient)
     val configMapName = s"${conf.resourceNamePrefix}-driver-conf-map"
     val configMap = buildConfigMap(configMapName, resolvedDriverSpec.systemProperties)
     // The include of the ENV_VAR for "SPARK_CONF_DIR" is to allow for the
@@ -232,7 +232,7 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
       None)) { kubernetesClient =>
         val client = new Client(
           kubernetesConf,
-          KubernetesDriverBuilder(kubernetesClient, kubernetesConf.sparkConf),
+          new KubernetesDriverBuilder(),
           kubernetesClient,
           waitForAppCompletion,
           watcher)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
index a5ad9729aee9..d2c0ced9fa2f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
@@ -20,90 +20,49 @@ import java.io.File
 
 import io.fabric8.kubernetes.client.KubernetesClient
 
-import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.features._
 
-private[spark] class KubernetesDriverBuilder(
-    provideBasicStep: (KubernetesDriverConf => BasicDriverFeatureStep) =
-      new BasicDriverFeatureStep(_),
-    provideCredentialsStep: (KubernetesDriverConf => DriverKubernetesCredentialsFeatureStep) =
-      new DriverKubernetesCredentialsFeatureStep(_),
-    provideServiceStep: (KubernetesDriverConf => DriverServiceFeatureStep) =
-      new DriverServiceFeatureStep(_),
-    provideSecretsStep: (KubernetesConf => MountSecretsFeatureStep) =
-      new MountSecretsFeatureStep(_),
-    provideEnvSecretsStep: (KubernetesConf => EnvSecretsFeatureStep) =
-      new EnvSecretsFeatureStep(_),
-    provideLocalDirsStep: (KubernetesConf => LocalDirsFeatureStep) =
-      new LocalDirsFeatureStep(_),
-    provideVolumesStep: (KubernetesConf => MountVolumesFeatureStep) =
-      new MountVolumesFeatureStep(_),
-    provideDriverCommandStep: (KubernetesDriverConf => DriverCommandFeatureStep) =
-      new DriverCommandFeatureStep(_),
-    provideHadoopGlobalStep: (KubernetesDriverConf => KerberosConfDriverFeatureStep) =
-      new KerberosConfDriverFeatureStep(_),
-    providePodTemplateConfigMapStep: (KubernetesConf => PodTemplateConfigMapStep) =
-      new PodTemplateConfigMapStep(_),
-    provideInitialPod: () => SparkPod = () => SparkPod.initialPod) {
+private[spark] class KubernetesDriverBuilder {
 
-  def buildFromFeatures(kubernetesConf: KubernetesDriverConf): KubernetesDriverSpec = {
-    val baseFeatures = Seq(
-      provideBasicStep(kubernetesConf),
-      provideCredentialsStep(kubernetesConf),
-      provideServiceStep(kubernetesConf),
-      provideLocalDirsStep(kubernetesConf))
-
-    val secretFeature = if (kubernetesConf.secretNamesToMountPaths.nonEmpty) {
-      Seq(provideSecretsStep(kubernetesConf))
-    } else Nil
-    val envSecretFeature = if (kubernetesConf.secretEnvNamesToKeyRefs.nonEmpty) {
-      Seq(provideEnvSecretsStep(kubernetesConf))
-    } else Nil
-    val volumesFeature = if (kubernetesConf.volumes.nonEmpty) {
-      Seq(provideVolumesStep(kubernetesConf))
-    } else Nil
-    val podTemplateFeature = if (
-      kubernetesConf.get(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE).isDefined) {
-      Seq(providePodTemplateConfigMapStep(kubernetesConf))
-    } else Nil
-
-    val driverCommandStep = provideDriverCommandStep(kubernetesConf)
-
-    val hadoopConfigStep = Some(provideHadoopGlobalStep(kubernetesConf))
+  def buildFromFeatures(
+      conf: KubernetesDriverConf,
+      client: KubernetesClient): KubernetesDriverSpec = {
+    val initialPod = conf.get(Config.KUBERNETES_DRIVER_PODTEMPLATE_FILE)
+      .map { file =>
+        KubernetesUtils.loadPodFromTemplate(
+          client,
+          new File(file),
+          conf.get(Config.KUBERNETES_DRIVER_PODTEMPLATE_CONTAINER_NAME))
+      }
+      .getOrElse(SparkPod.initialPod())
 
-    val allFeatures: Seq[KubernetesFeatureConfigStep] =
-      baseFeatures ++ Seq(driverCommandStep) ++
-        secretFeature ++ envSecretFeature ++ volumesFeature ++
-        hadoopConfigStep ++ podTemplateFeature
+    val features = Seq(
+      new BasicDriverFeatureStep(conf),
+      new DriverKubernetesCredentialsFeatureStep(conf),
+      new DriverServiceFeatureStep(conf),
+      new MountSecretsFeatureStep(conf),
+      new EnvSecretsFeatureStep(conf),
+      new LocalDirsFeatureStep(conf),
+      new MountVolumesFeatureStep(conf),
+      new DriverCommandFeatureStep(conf),
+      new KerberosConfDriverFeatureStep(conf),
+      new PodTemplateConfigMapStep(conf))
 
-    var spec = KubernetesDriverSpec(
-      provideInitialPod(),
+    val spec = KubernetesDriverSpec(
+      initialPod,
       driverKubernetesResources = Seq.empty,
-      kubernetesConf.sparkConf.getAll.toMap)
-    for (feature <- allFeatures) {
+      conf.sparkConf.getAll.toMap)
+
+    features.foldLeft(spec) { case (spec, feature) =>
       val configuredPod = feature.configurePod(spec.pod)
       val addedSystemProperties = feature.getAdditionalPodSystemProperties()
       val addedResources = feature.getAdditionalKubernetesResources()
-      spec = KubernetesDriverSpec(
+      KubernetesDriverSpec(
         configuredPod,
         spec.driverKubernetesResources ++ addedResources,
         spec.systemProperties ++ addedSystemProperties)
     }
-    spec
   }
-}
 
-private[spark] object KubernetesDriverBuilder {
-  def apply(kubernetesClient: KubernetesClient, conf: SparkConf): KubernetesDriverBuilder = {
-    conf.get(Config.KUBERNETES_DRIVER_PODTEMPLATE_FILE)
-      .map(new File(_))
-      .map(file => new KubernetesDriverBuilder(provideInitialPod = () =>
-        KubernetesUtils.loadPodFromTemplate(
-          kubernetesClient,
-          file,
-          conf.get(Config.KUBERNETES_DRIVER_PODTEMPLATE_CONTAINER_NAME))
-      ))
-      .getOrElse(new KubernetesDriverBuilder())
-  }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index ac42554b1334..da3edfeca9b1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -136,7 +136,8 @@ private[spark] class ExecutorPodsAllocator(
             newExecutorId.toString,
             applicationId,
             driverPod)
-          val executorPod = executorBuilder.buildFromFeatures(executorConf, secMgr)
+          val executorPod = executorBuilder.buildFromFeatures(executorConf, secMgr,
+            kubernetesClient)
           val podWithAttachedContainer = new PodBuilder(executorPod.pod)
             .editOrNewSpec()
             .addToContainers(executorPod.container)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index b31fbb420ed6..809bdf8ca8c2 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -95,7 +95,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
     val executorPodsAllocator = new ExecutorPodsAllocator(
       sc.conf,
       sc.env.securityManager,
-      KubernetesExecutorBuilder(kubernetesClient, sc.conf),
+      new KubernetesExecutorBuilder(),
       kubernetesClient,
       snapshotsStore,
       new SystemClock())
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index ba273cad6a8e..0b74966fe868 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -20,86 +20,36 @@ import java.io.File
 
 import io.fabric8.kubernetes.client.KubernetesClient
 
-import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.SecurityManager
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features._
 
-private[spark] class KubernetesExecutorBuilder(
-    provideBasicStep: (KubernetesExecutorConf, SecurityManager) => BasicExecutorFeatureStep =
-      new BasicExecutorFeatureStep(_, _),
-    provideSecretsStep: (KubernetesConf => MountSecretsFeatureStep) =
-      new MountSecretsFeatureStep(_),
-    provideEnvSecretsStep: (KubernetesConf => EnvSecretsFeatureStep) =
-      new EnvSecretsFeatureStep(_),
-    provideLocalDirsStep: (KubernetesConf => LocalDirsFeatureStep) =
-      new LocalDirsFeatureStep(_),
-    provideVolumesStep: (KubernetesConf => MountVolumesFeatureStep) =
-      new MountVolumesFeatureStep(_),
-    provideHadoopConfStep: (KubernetesExecutorConf => HadoopConfExecutorFeatureStep) =
-      new HadoopConfExecutorFeatureStep(_),
-    provideKerberosConfStep: (KubernetesExecutorConf => KerberosConfExecutorFeatureStep) =
-      new KerberosConfExecutorFeatureStep(_),
-    provideHadoopSparkUserStep: (KubernetesExecutorConf => HadoopSparkUserExecutorFeatureStep) =
-      new HadoopSparkUserExecutorFeatureStep(_),
-    provideInitialPod: () => SparkPod = () => SparkPod.initialPod()) {
+private[spark] class KubernetesExecutorBuilder {
 
   def buildFromFeatures(
-      kubernetesConf: KubernetesExecutorConf,
-      secMgr: SecurityManager): SparkPod = {
-    val sparkConf = kubernetesConf.sparkConf
-    val maybeHadoopConfigMap = sparkConf.getOption(HADOOP_CONFIG_MAP_NAME)
-    val maybeDTSecretName = sparkConf.getOption(KERBEROS_DT_SECRET_NAME)
-    val maybeDTDataItem = sparkConf.getOption(KERBEROS_DT_SECRET_KEY)
-
-    val baseFeatures = Seq(provideBasicStep(kubernetesConf, secMgr),
-      provideLocalDirsStep(kubernetesConf))
-    val secretFeature = if (kubernetesConf.secretNamesToMountPaths.nonEmpty) {
-      Seq(provideSecretsStep(kubernetesConf))
-    } else Nil
-    val secretEnvFeature = if (kubernetesConf.secretEnvNamesToKeyRefs.nonEmpty) {
-      Seq(provideEnvSecretsStep(kubernetesConf))
-    } else Nil
-    val volumesFeature = if (kubernetesConf.volumes.nonEmpty) {
-      Seq(provideVolumesStep(kubernetesConf))
-    } else Nil
-
-    val maybeHadoopConfFeatureSteps = maybeHadoopConfigMap.map { _ =>
-      val maybeKerberosStep =
-        if (maybeDTSecretName.isDefined && maybeDTDataItem.isDefined) {
-          provideKerberosConfStep(kubernetesConf)
-        } else {
-          provideHadoopSparkUserStep(kubernetesConf)
-        }
-      Seq(provideHadoopConfStep(kubernetesConf)) :+
-        maybeKerberosStep
-    }.getOrElse(Seq.empty[KubernetesFeatureConfigStep])
-
-    val allFeatures: Seq[KubernetesFeatureConfigStep] =
-      baseFeatures ++
-      secretFeature ++
-      secretEnvFeature ++
-      volumesFeature ++
-      maybeHadoopConfFeatureSteps
-
-    var executorPod = provideInitialPod()
-    for (feature <- allFeatures) {
-      executorPod = feature.configurePod(executorPod)
-    }
-    executorPod
+      conf: KubernetesExecutorConf,
+      secMgr: SecurityManager,
+      client: KubernetesClient): SparkPod = {
+    val initialPod = conf.get(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
+      .map { file =>
+        KubernetesUtils.loadPodFromTemplate(
+          client,
+          new File(file),
+          conf.get(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_CONTAINER_NAME))
+      }
+      .getOrElse(SparkPod.initialPod())
+
+    val features = Seq(
+      new BasicExecutorFeatureStep(conf, secMgr),
+      new MountSecretsFeatureStep(conf),
+      new EnvSecretsFeatureStep(conf),
+      new LocalDirsFeatureStep(conf),
+      new MountVolumesFeatureStep(conf),
+      new HadoopConfExecutorFeatureStep(conf),
+      new KerberosConfExecutorFeatureStep(conf),
+      new HadoopSparkUserExecutorFeatureStep(conf))
+
+    features.foldLeft(initialPod) { case (pod, feature) => feature.configurePod(pod) }
   }
-}
 
-private[spark] object KubernetesExecutorBuilder {
-  def apply(kubernetesClient: KubernetesClient, conf: SparkConf): KubernetesExecutorBuilder = {
-    conf.get(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
-      .map(new File(_))
-      .map(file => new KubernetesExecutorBuilder(provideInitialPod = () =>
-          KubernetesUtils.loadPodFromTemplate(
-            kubernetesClient,
-            file,
-            conf.get(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_CONTAINER_NAME))
-      ))
-      .getOrElse(new KubernetesExecutorBuilder())
-  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
new file mode 100644
index 000000000000..7dde0c137716
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s
+
+import java.io.File
+
+import io.fabric8.kubernetes.api.model.{Config => _, _}
+import io.fabric8.kubernetes.client.KubernetesClient
+import io.fabric8.kubernetes.client.dsl.{MixedOperation, PodResource}
+import org.mockito.Matchers.any
+import org.mockito.Mockito.{mock, never, verify, when}
+import scala.collection.JavaConverters._
+
+import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
+import org.apache.spark.deploy.k8s._
+import org.apache.spark.internal.config.ConfigEntry
+
+abstract class PodBuilderSuite extends SparkFunSuite {
+
+  protected def templateFileConf: ConfigEntry[_]
+
+  protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod
+
+  private val baseConf = new SparkConf(false)
+    .set(Config.CONTAINER_IMAGE, "spark-executor:latest")
+
+  test("use empty initial pod if template is not specified") {
+    val client = mock(classOf[KubernetesClient])
+    buildPod(baseConf.clone(), client)
+    verify(client, never()).pods()
+  }
+
+  test("load pod template if specified") {
+    val client = mockKubernetesClient()
+    val sparkConf = baseConf.clone().set(templateFileConf.key, "template-file.yaml")
+    val pod = buildPod(sparkConf, client)
+    verifyPod(pod)
+  }
+
+  test("complain about misconfigured pod template") {
+    val client = mockKubernetesClient(
+      new PodBuilder()
+        .withNewMetadata()
+        .addToLabels("test-label-key", "test-label-value")
+        .endMetadata()
+        .build())
+    val sparkConf = baseConf.clone().set(templateFileConf.key, "template-file.yaml")
+    val exception = intercept[SparkException] {
+      buildPod(sparkConf, client)
+    }
+    assert(exception.getMessage.contains("Could not load pod from template file."))
+  }
+
+  private def mockKubernetesClient(pod: Pod = podWithSupportedFeatures()): KubernetesClient = {
+    val kubernetesClient = mock(classOf[KubernetesClient])
+    val pods =
+      mock(classOf[MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]]])
+    val podResource = mock(classOf[PodResource[Pod, DoneablePod]])
+    when(kubernetesClient.pods()).thenReturn(pods)
+    when(pods.load(any(classOf[File]))).thenReturn(podResource)
+    when(podResource.get()).thenReturn(pod)
+    kubernetesClient
+  }
+
+  private def verifyPod(pod: SparkPod): Unit = {
+    val metadata = pod.pod.getMetadata
+    assert(metadata.getLabels.containsKey("test-label-key"))
+    assert(metadata.getAnnotations.containsKey("test-annotation-key"))
+    assert(metadata.getNamespace === "namespace")
+    assert(metadata.getOwnerReferences.asScala.exists(_.getName == "owner-reference"))
+    val spec = pod.pod.getSpec
+    assert(!spec.getContainers.asScala.exists(_.getName == "executor-container"))
+    assert(spec.getDnsPolicy === "dns-policy")
+    assert(spec.getHostAliases.asScala.exists(_.getHostnames.asScala.exists(_ == "hostname")))
+    assert(spec.getImagePullSecrets.asScala.exists(_.getName == "local-reference"))
+    assert(spec.getInitContainers.asScala.exists(_.getName == "init-container"))
+    assert(spec.getNodeName == "node-name")
+    assert(spec.getNodeSelector.get("node-selector-key") === "node-selector-value")
+    assert(spec.getSchedulerName === "scheduler")
+    assert(spec.getSecurityContext.getRunAsUser === 1000L)
+    assert(spec.getServiceAccount === "service-account")
+    assert(spec.getSubdomain === "subdomain")
+    assert(spec.getTolerations.asScala.exists(_.getKey == "toleration-key"))
+    assert(spec.getVolumes.asScala.exists(_.getName == "test-volume"))
+    val container = pod.container
+    assert(container.getName === "executor-container")
+    assert(container.getArgs.contains("arg"))
+    assert(container.getCommand.equals(List("command").asJava))
+    assert(container.getEnv.asScala.exists(_.getName == "env-key"))
+    assert(container.getResources.getLimits.get("gpu") ===
+      new QuantityBuilder().withAmount("1").build())
+    assert(container.getSecurityContext.getRunAsNonRoot)
+    assert(container.getStdin)
+    assert(container.getTerminationMessagePath === "termination-message-path")
+    assert(container.getTerminationMessagePolicy === "termination-message-policy")
+    assert(pod.container.getVolumeMounts.asScala.exists(_.getName == "test-volume"))
+  }
+
+  private def podWithSupportedFeatures(): Pod = {
+    new PodBuilder()
+      .withNewMetadata()
+        .addToLabels("test-label-key", "test-label-value")
+        .addToAnnotations("test-annotation-key", "test-annotation-value")
+        .withNamespace("namespace")
+        .addNewOwnerReference()
+          .withController(true)
+          .withName("owner-reference")
+          .endOwnerReference()
+        .endMetadata()
+      .withNewSpec()
+        .withDnsPolicy("dns-policy")
+        .withHostAliases(new HostAliasBuilder().withHostnames("hostname").build())
+        .withImagePullSecrets(
+          new LocalObjectReferenceBuilder().withName("local-reference").build())
+        .withInitContainers(new ContainerBuilder().withName("init-container").build())
+        .withNodeName("node-name")
+        .withNodeSelector(Map("node-selector-key" -> "node-selector-value").asJava)
+        .withSchedulerName("scheduler")
+        .withNewSecurityContext()
+          .withRunAsUser(1000L)
+          .endSecurityContext()
+        .withServiceAccount("service-account")
+        .withSubdomain("subdomain")
+        .withTolerations(new TolerationBuilder()
+          .withKey("toleration-key")
+          .withOperator("Equal")
+          .withEffect("NoSchedule")
+          .build())
+        .addNewVolume()
+          .withNewHostPath()
+          .withPath("/test")
+          .endHostPath()
+          .withName("test-volume")
+          .endVolume()
+        .addNewContainer()
+          .withArgs("arg")
+          .withCommand("command")
+          .addNewEnv()
+            .withName("env-key")
+            .withValue("env-value")
+            .endEnv()
+          .withImagePullPolicy("Always")
+          .withName("executor-container")
+          .withNewResources()
+            .withLimits(Map("gpu" -> new QuantityBuilder().withAmount("1").build()).asJava)
+            .endResources()
+          .withNewSecurityContext()
+            .withRunAsNonRoot(true)
+            .endSecurityContext()
+          .withStdin(true)
+          .withTerminationMessagePath("termination-message-path")
+          .withTerminationMessagePolicy("termination-message-policy")
+          .addToVolumeMounts(
+            new VolumeMountBuilder()
+              .withName("test-volume")
+              .withMountPath("/test")
+              .build())
+          .endContainer()
+        .endSpec()
+      .build()
+  }
+
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
index 7295b82ca479..5e7388dc8e67 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
@@ -20,25 +20,32 @@ import java.io.{File, PrintWriter}
 import java.nio.file.Files
 
 import io.fabric8.kubernetes.api.model.ConfigMap
-import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
 
-class PodTemplateConfigMapStepSuite extends SparkFunSuite with BeforeAndAfter {
-  private var kubernetesConf : KubernetesConf = _
-  private var templateFile: File = _
+class PodTemplateConfigMapStepSuite extends SparkFunSuite {
 
-  before {
-    templateFile = Files.createTempFile("pod-template", "yml").toFile
+  test("Do nothing when executor template is not specified") {
+    val conf = KubernetesTestConf.createDriverConf()
+    val step = new PodTemplateConfigMapStep(conf)
+
+    val initialPod = SparkPod.initialPod()
+    val configuredPod = step.configurePod(initialPod)
+    assert(configuredPod === initialPod)
+
+    assert(step.getAdditionalKubernetesResources().isEmpty)
+    assert(step.getAdditionalPodSystemProperties().isEmpty)
+  }
+
+  test("Mounts executor template volume if config specified") {
+    val templateFile = Files.createTempFile("pod-template", "yml").toFile
     templateFile.deleteOnExit()
 
     val sparkConf = new SparkConf(false)
       .set(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE, templateFile.getAbsolutePath)
-    kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
-  }
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
 
-  test("Mounts executor template volume if config specified") {
     val writer = new PrintWriter(templateFile)
     writer.write("pod-template-contents")
     writer.close()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index e9c05fef6f5d..1bb926cbca23 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -126,7 +126,7 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
     MockitoAnnotations.initMocks(this)
     kconf = KubernetesTestConf.createDriverConf(
       resourceNamePrefix = Some(KUBERNETES_RESOURCE_PREFIX))
-    when(driverBuilder.buildFromFeatures(kconf)).thenReturn(BUILT_KUBERNETES_SPEC)
+    when(driverBuilder.buildFromFeatures(kconf, kubernetesClient)).thenReturn(BUILT_KUBERNETES_SPEC)
     when(kubernetesClient.pods()).thenReturn(podOperations)
     when(podOperations.withName(POD_NAME)).thenReturn(namedPods)
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
index 7e7dc4763c2e..6518c91a1a1f 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
@@ -16,201 +16,21 @@
  */
 package org.apache.spark.deploy.k8s.submit
 
-import io.fabric8.kubernetes.api.model.PodBuilder
 import io.fabric8.kubernetes.client.KubernetesClient
-import org.mockito.Mockito._
 
-import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
+import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.Config.{CONTAINER_IMAGE, KUBERNETES_DRIVER_PODTEMPLATE_FILE, KUBERNETES_EXECUTOR_PODTEMPLATE_FILE}
-import org.apache.spark.deploy.k8s.features._
+import org.apache.spark.internal.config.ConfigEntry
 
-class KubernetesDriverBuilderSuite extends SparkFunSuite {
+class KubernetesDriverBuilderSuite extends PodBuilderSuite {
 
-  private val BASIC_STEP_TYPE = "basic"
-  private val CREDENTIALS_STEP_TYPE = "credentials"
-  private val SERVICE_STEP_TYPE = "service"
-  private val LOCAL_DIRS_STEP_TYPE = "local-dirs"
-  private val SECRETS_STEP_TYPE = "mount-secrets"
-  private val DRIVER_CMD_STEP_TYPE = "driver-command"
-  private val ENV_SECRETS_STEP_TYPE = "env-secrets"
-  private val HADOOP_GLOBAL_STEP_TYPE = "hadoop-global"
-  private val MOUNT_VOLUMES_STEP_TYPE = "mount-volumes"
-  private val TEMPLATE_VOLUME_STEP_TYPE = "template-volume"
-
-  private val basicFeatureStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    BASIC_STEP_TYPE, classOf[BasicDriverFeatureStep])
-
-  private val credentialsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    CREDENTIALS_STEP_TYPE, classOf[DriverKubernetesCredentialsFeatureStep])
-
-  private val serviceStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    SERVICE_STEP_TYPE, classOf[DriverServiceFeatureStep])
-
-  private val localDirsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    LOCAL_DIRS_STEP_TYPE, classOf[LocalDirsFeatureStep])
-
-  private val secretsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    SECRETS_STEP_TYPE, classOf[MountSecretsFeatureStep])
-
-  private val driverCommandStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    DRIVER_CMD_STEP_TYPE, classOf[DriverCommandFeatureStep])
-
-  private val envSecretsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    ENV_SECRETS_STEP_TYPE, classOf[EnvSecretsFeatureStep])
-
-  private val hadoopGlobalStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    HADOOP_GLOBAL_STEP_TYPE, classOf[KerberosConfDriverFeatureStep])
-
-  private val mountVolumesStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    MOUNT_VOLUMES_STEP_TYPE, classOf[MountVolumesFeatureStep])
-
-  private val templateVolumeStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    TEMPLATE_VOLUME_STEP_TYPE, classOf[PodTemplateConfigMapStep]
-  )
-
-  private val builderUnderTest: KubernetesDriverBuilder =
-    new KubernetesDriverBuilder(
-      _ => basicFeatureStep,
-      _ => credentialsStep,
-      _ => serviceStep,
-      _ => secretsStep,
-      _ => envSecretsStep,
-      _ => localDirsStep,
-      _ => mountVolumesStep,
-      _ => driverCommandStep,
-      _ => hadoopGlobalStep,
-      _ => templateVolumeStep)
-
-  test("Apply fundamental steps all the time.") {
-    val conf = KubernetesTestConf.createDriverConf()
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
-      BASIC_STEP_TYPE,
-      CREDENTIALS_STEP_TYPE,
-      SERVICE_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE,
-      HADOOP_GLOBAL_STEP_TYPE)
+  override protected def templateFileConf: ConfigEntry[_] = {
+    Config.KUBERNETES_DRIVER_PODTEMPLATE_FILE
   }
 
-  test("Apply secrets step if secrets are present.") {
-    val conf = KubernetesTestConf.createDriverConf(
-      secretEnvNamesToKeyRefs = Map("EnvName" -> "SecretName:secretKey"),
-      secretNamesToMountPaths = Map("secret" -> "secretMountPath"))
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
-      BASIC_STEP_TYPE,
-      CREDENTIALS_STEP_TYPE,
-      SERVICE_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      SECRETS_STEP_TYPE,
-      ENV_SECRETS_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE,
-      HADOOP_GLOBAL_STEP_TYPE)
-  }
-
-  test("Apply volumes step if mounts are present.") {
-    val volumeSpec = KubernetesVolumeSpec(
-      "volume",
-      "/tmp",
-      "",
-      false,
-      KubernetesHostPathVolumeConf("/path"))
-    val conf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeSpec))
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
-      BASIC_STEP_TYPE,
-      CREDENTIALS_STEP_TYPE,
-      SERVICE_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      MOUNT_VOLUMES_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE,
-      HADOOP_GLOBAL_STEP_TYPE)
-  }
-
-  test("Apply volumes step if a mount subpath is present.") {
-    val volumeSpec = KubernetesVolumeSpec(
-      "volume",
-      "/tmp",
-      "foo",
-      false,
-      KubernetesHostPathVolumeConf("/path"))
-    val conf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeSpec))
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
-      BASIC_STEP_TYPE,
-      CREDENTIALS_STEP_TYPE,
-      SERVICE_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      MOUNT_VOLUMES_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE,
-      HADOOP_GLOBAL_STEP_TYPE)
-  }
-
-  test("Apply template volume step if executor template is present.") {
-    val sparkConf = new SparkConf(false)
-      .set(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE, "filename")
+  override protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod = {
     val conf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf),
-      BASIC_STEP_TYPE,
-      CREDENTIALS_STEP_TYPE,
-      SERVICE_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      DRIVER_CMD_STEP_TYPE,
-      HADOOP_GLOBAL_STEP_TYPE,
-      TEMPLATE_VOLUME_STEP_TYPE)
-  }
-
-  private def validateStepTypesApplied(resolvedSpec: KubernetesDriverSpec, stepTypes: String*)
-  : Unit = {
-    val addedProperties = resolvedSpec.systemProperties
-      .filter { case (k, _) => !k.startsWith("spark.") }
-      .toMap
-    assert(addedProperties.keys.toSet === stepTypes.toSet)
-    stepTypes.foreach { stepType =>
-      assert(resolvedSpec.pod.pod.getMetadata.getLabels.get(stepType) === stepType)
-      assert(resolvedSpec.driverKubernetesResources.containsSlice(
-        KubernetesFeaturesTestUtils.getSecretsForStepType(stepType)))
-      assert(resolvedSpec.systemProperties(stepType) === stepType)
-    }
-  }
-
-  test("Start with empty pod if template is not specified") {
-    val kubernetesClient = mock(classOf[KubernetesClient])
-    val driverBuilder = KubernetesDriverBuilder.apply(kubernetesClient, new SparkConf())
-    verify(kubernetesClient, never()).pods()
+    new KubernetesDriverBuilder().buildFromFeatures(conf, client).pod
   }
 
-  test("Starts with template if specified") {
-    val kubernetesClient = PodBuilderSuiteUtils.loadingMockKubernetesClient()
-    val sparkConf = new SparkConf(false)
-      .set(CONTAINER_IMAGE, "spark-driver:latest")
-      .set(KUBERNETES_DRIVER_PODTEMPLATE_FILE, "template-file.yaml")
-    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
-    val driverSpec = KubernetesDriverBuilder
-      .apply(kubernetesClient, sparkConf)
-      .buildFromFeatures(kubernetesConf)
-    PodBuilderSuiteUtils.verifyPodWithSupportedFeatures(driverSpec.pod)
-  }
-
-  test("Throws on misconfigured pod template") {
-    val kubernetesClient = PodBuilderSuiteUtils.loadingMockKubernetesClient(
-      new PodBuilder()
-        .withNewMetadata()
-        .addToLabels("test-label-key", "test-label-value")
-        .endMetadata()
-        .build())
-    val sparkConf = new SparkConf(false)
-      .set(CONTAINER_IMAGE, "spark-driver:latest")
-      .set(KUBERNETES_DRIVER_PODTEMPLATE_FILE, "template-file.yaml")
-    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
-    val exception = intercept[SparkException] {
-      KubernetesDriverBuilder
-        .apply(kubernetesClient, sparkConf)
-        .buildFromFeatures(kubernetesConf)
-    }
-    assert(exception.getMessage.contains("Could not load pod from template file."))
-  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/PodBuilderSuiteUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/PodBuilderSuiteUtils.scala
deleted file mode 100644
index c92e9e6e3b6b..000000000000
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/PodBuilderSuiteUtils.scala
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.submit
-
-import java.io.File
-
-import io.fabric8.kubernetes.api.model._
-import io.fabric8.kubernetes.client.KubernetesClient
-import io.fabric8.kubernetes.client.dsl.{MixedOperation, PodResource}
-import org.mockito.Matchers.any
-import org.mockito.Mockito.{mock, when}
-import org.scalatest.FlatSpec
-import scala.collection.JavaConverters._
-
-import org.apache.spark.deploy.k8s.SparkPod
-
-object PodBuilderSuiteUtils extends FlatSpec {
-
-  def loadingMockKubernetesClient(pod: Pod = podWithSupportedFeatures()): KubernetesClient = {
-    val kubernetesClient = mock(classOf[KubernetesClient])
-    val pods =
-      mock(classOf[MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]]])
-    val podResource = mock(classOf[PodResource[Pod, DoneablePod]])
-    when(kubernetesClient.pods()).thenReturn(pods)
-    when(pods.load(any(classOf[File]))).thenReturn(podResource)
-    when(podResource.get()).thenReturn(pod)
-    kubernetesClient
-  }
-
-  def verifyPodWithSupportedFeatures(pod: SparkPod): Unit = {
-    val metadata = pod.pod.getMetadata
-    assert(metadata.getLabels.containsKey("test-label-key"))
-    assert(metadata.getAnnotations.containsKey("test-annotation-key"))
-    assert(metadata.getNamespace === "namespace")
-    assert(metadata.getOwnerReferences.asScala.exists(_.getName == "owner-reference"))
-    val spec = pod.pod.getSpec
-    assert(!spec.getContainers.asScala.exists(_.getName == "executor-container"))
-    assert(spec.getDnsPolicy === "dns-policy")
-    assert(spec.getHostAliases.asScala.exists(_.getHostnames.asScala.exists(_ == "hostname")))
-    assert(spec.getImagePullSecrets.asScala.exists(_.getName == "local-reference"))
-    assert(spec.getInitContainers.asScala.exists(_.getName == "init-container"))
-    assert(spec.getNodeName == "node-name")
-    assert(spec.getNodeSelector.get("node-selector-key") === "node-selector-value")
-    assert(spec.getSchedulerName === "scheduler")
-    assert(spec.getSecurityContext.getRunAsUser === 1000L)
-    assert(spec.getServiceAccount === "service-account")
-    assert(spec.getSubdomain === "subdomain")
-    assert(spec.getTolerations.asScala.exists(_.getKey == "toleration-key"))
-    assert(spec.getVolumes.asScala.exists(_.getName == "test-volume"))
-    val container = pod.container
-    assert(container.getName === "executor-container")
-    assert(container.getArgs.contains("arg"))
-    assert(container.getCommand.equals(List("command").asJava))
-    assert(container.getEnv.asScala.exists(_.getName == "env-key"))
-    assert(container.getResources.getLimits.get("gpu") ===
-      new QuantityBuilder().withAmount("1").build())
-    assert(container.getSecurityContext.getRunAsNonRoot)
-    assert(container.getStdin)
-    assert(container.getTerminationMessagePath === "termination-message-path")
-    assert(container.getTerminationMessagePolicy === "termination-message-policy")
-    assert(pod.container.getVolumeMounts.asScala.exists(_.getName == "test-volume"))
-
-  }
-
-
-  def podWithSupportedFeatures(): Pod = new PodBuilder()
-        .withNewMetadata()
-          .addToLabels("test-label-key", "test-label-value")
-          .addToAnnotations("test-annotation-key", "test-annotation-value")
-          .withNamespace("namespace")
-          .addNewOwnerReference()
-            .withController(true)
-            .withName("owner-reference")
-            .endOwnerReference()
-          .endMetadata()
-        .withNewSpec()
-          .withDnsPolicy("dns-policy")
-          .withHostAliases(new HostAliasBuilder().withHostnames("hostname").build())
-          .withImagePullSecrets(
-            new LocalObjectReferenceBuilder().withName("local-reference").build())
-          .withInitContainers(new ContainerBuilder().withName("init-container").build())
-          .withNodeName("node-name")
-          .withNodeSelector(Map("node-selector-key" -> "node-selector-value").asJava)
-          .withSchedulerName("scheduler")
-          .withNewSecurityContext()
-            .withRunAsUser(1000L)
-            .endSecurityContext()
-          .withServiceAccount("service-account")
-          .withSubdomain("subdomain")
-          .withTolerations(new TolerationBuilder()
-            .withKey("toleration-key")
-            .withOperator("Equal")
-            .withEffect("NoSchedule")
-            .build())
-          .addNewVolume()
-            .withNewHostPath()
-            .withPath("/test")
-            .endHostPath()
-            .withName("test-volume")
-            .endVolume()
-          .addNewContainer()
-            .withArgs("arg")
-            .withCommand("command")
-            .addNewEnv()
-              .withName("env-key")
-              .withValue("env-value")
-              .endEnv()
-            .withImagePullPolicy("Always")
-            .withName("executor-container")
-            .withNewResources()
-              .withLimits(Map("gpu" -> new QuantityBuilder().withAmount("1").build()).asJava)
-              .endResources()
-            .withNewSecurityContext()
-              .withRunAsNonRoot(true)
-              .endSecurityContext()
-            .withStdin(true)
-            .withTerminationMessagePath("termination-message-path")
-            .withTerminationMessagePolicy("termination-message-policy")
-            .addToVolumeMounts(
-              new VolumeMountBuilder()
-                .withName("test-volume")
-                .withMountPath("/test")
-                .build())
-            .endContainer()
-          .endSpec()
-        .build()
-
-}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index d4fa31af3d5c..278a3821a6f3 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -80,8 +80,8 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     when(kubernetesClient.pods()).thenReturn(podOperations)
     when(podOperations.withName(driverPodName)).thenReturn(driverPodOperations)
     when(driverPodOperations.get).thenReturn(driverPod)
-    when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr)))
-      .thenAnswer(executorPodAnswer())
+    when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
+      meq(kubernetesClient))).thenAnswer(executorPodAnswer())
     snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
     waitForExecutorPodsClock = new ManualClock(0L)
     podsAllocatorUnderTest = new ExecutorPodsAllocator(
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index ef521fd801e9..bd716174a827 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -16,147 +16,23 @@
  */
 package org.apache.spark.scheduler.cluster.k8s
 
-import scala.collection.JavaConverters._
-
-import io.fabric8.kubernetes.api.model.{Config => _, _}
 import io.fabric8.kubernetes.client.KubernetesClient
-import org.mockito.Mockito.{mock, never, verify}
 
-import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
+import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.features._
-import org.apache.spark.deploy.k8s.submit.PodBuilderSuiteUtils
-import org.apache.spark.util.SparkConfWithEnv
-
-class KubernetesExecutorBuilderSuite extends SparkFunSuite {
-  private val BASIC_STEP_TYPE = "basic"
-  private val SECRETS_STEP_TYPE = "mount-secrets"
-  private val ENV_SECRETS_STEP_TYPE = "env-secrets"
-  private val LOCAL_DIRS_STEP_TYPE = "local-dirs"
-  private val HADOOP_CONF_STEP_TYPE = "hadoop-conf-step"
-  private val HADOOP_SPARK_USER_STEP_TYPE = "hadoop-spark-user"
-  private val KERBEROS_CONF_STEP_TYPE = "kerberos-step"
-  private val MOUNT_VOLUMES_STEP_TYPE = "mount-volumes"
-
-  private val secMgr = new SecurityManager(new SparkConf(false))
-
-  private val basicFeatureStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    BASIC_STEP_TYPE, classOf[BasicExecutorFeatureStep])
-  private val mountSecretsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    SECRETS_STEP_TYPE, classOf[MountSecretsFeatureStep])
-  private val envSecretsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    ENV_SECRETS_STEP_TYPE, classOf[EnvSecretsFeatureStep])
-  private val localDirsStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    LOCAL_DIRS_STEP_TYPE, classOf[LocalDirsFeatureStep])
-  private val hadoopConfStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    HADOOP_CONF_STEP_TYPE, classOf[HadoopConfExecutorFeatureStep])
-  private val hadoopSparkUser = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    HADOOP_SPARK_USER_STEP_TYPE, classOf[HadoopSparkUserExecutorFeatureStep])
-  private val kerberosConf = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    KERBEROS_CONF_STEP_TYPE, classOf[KerberosConfExecutorFeatureStep])
-  private val mountVolumesStep = KubernetesFeaturesTestUtils.getMockConfigStepForStepType(
-    MOUNT_VOLUMES_STEP_TYPE, classOf[MountVolumesFeatureStep])
+import org.apache.spark.internal.config.ConfigEntry
 
-  private val builderUnderTest = new KubernetesExecutorBuilder(
-    (_, _) => basicFeatureStep,
-    _ => mountSecretsStep,
-    _ => envSecretsStep,
-    _ => localDirsStep,
-    _ => mountVolumesStep,
-    _ => hadoopConfStep,
-    _ => kerberosConf,
-    _ => hadoopSparkUser)
-
-  test("Basic steps are consistently applied.") {
-    val conf = KubernetesTestConf.createExecutorConf()
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf, secMgr), BASIC_STEP_TYPE, LOCAL_DIRS_STEP_TYPE)
-  }
-
-  test("Apply secrets step if secrets are present.") {
-    val conf = KubernetesTestConf.createExecutorConf(
-      secretEnvNamesToKeyRefs = Map("secret-name" -> "secret-key"),
-      secretNamesToMountPaths = Map("secret" -> "secretMountPath"))
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf, secMgr),
-      BASIC_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      SECRETS_STEP_TYPE,
-      ENV_SECRETS_STEP_TYPE)
-  }
+class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
 
-  test("Apply volumes step if mounts are present.") {
-    val volumeSpec = KubernetesVolumeSpec(
-      "volume",
-      "/tmp",
-      "",
-      false,
-      KubernetesHostPathVolumeConf("/checkpoint"))
-    val conf = KubernetesTestConf.createExecutorConf(
-      volumes = Seq(volumeSpec))
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf, secMgr),
-      BASIC_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      MOUNT_VOLUMES_STEP_TYPE)
+  override protected def templateFileConf: ConfigEntry[_] = {
+    Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE
   }
 
-  test("Apply basicHadoop step if HADOOP_CONF_DIR is defined") {
-    // HADOOP_DELEGATION_TOKEN
-    val conf = KubernetesTestConf.createExecutorConf(
-      sparkConf = new SparkConfWithEnv(Map("HADOOP_CONF_DIR" -> "/var/hadoop-conf"))
-        .set(HADOOP_CONFIG_MAP_NAME, "hadoop-conf-map-name")
-        .set(KRB5_CONFIG_MAP_NAME, "krb5-conf-map-name"))
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf, secMgr),
-      BASIC_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      HADOOP_CONF_STEP_TYPE,
-      HADOOP_SPARK_USER_STEP_TYPE)
+  override protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod = {
+    sparkConf.set("spark.driver.host", "https://driver.host.com")
+    val conf = KubernetesTestConf.createExecutorConf(sparkConf = sparkConf)
+    val secMgr = new SecurityManager(sparkConf)
+    new KubernetesExecutorBuilder().buildFromFeatures(conf, secMgr, client)
   }
 
-  test("Apply kerberos step if DT secrets created") {
-    val conf = KubernetesTestConf.createExecutorConf(
-      sparkConf = new SparkConf(false)
-        .set(HADOOP_CONFIG_MAP_NAME, "hadoop-conf-map-name")
-        .set(KRB5_CONFIG_MAP_NAME, "krb5-conf-map-name")
-        .set(KERBEROS_SPARK_USER_NAME, "spark-user")
-        .set(KERBEROS_DT_SECRET_NAME, "dt-secret")
-        .set(KERBEROS_DT_SECRET_KEY, "dt-key" ))
-    validateStepTypesApplied(
-      builderUnderTest.buildFromFeatures(conf, secMgr),
-      BASIC_STEP_TYPE,
-      LOCAL_DIRS_STEP_TYPE,
-      HADOOP_CONF_STEP_TYPE,
-      KERBEROS_CONF_STEP_TYPE)
-  }
-
-  private def validateStepTypesApplied(resolvedPod: SparkPod, stepTypes: String*): Unit = {
-    assert(resolvedPod.pod.getMetadata.getLabels.asScala.keys.toSet === stepTypes.toSet)
-  }
-
-  test("Starts with empty executor pod if template is not specified") {
-    val kubernetesClient = mock(classOf[KubernetesClient])
-    val executorBuilder = KubernetesExecutorBuilder.apply(kubernetesClient, new SparkConf())
-    verify(kubernetesClient, never()).pods()
-  }
-
-  test("Starts with executor template if specified") {
-    val kubernetesClient = PodBuilderSuiteUtils.loadingMockKubernetesClient()
-    val sparkConf = new SparkConf(false)
-      .set("spark.driver.host", "https://driver.host.com")
-      .set(Config.CONTAINER_IMAGE, "spark-executor:latest")
-      .set(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE, "template-file.yaml")
-    val kubernetesConf = KubernetesTestConf.createExecutorConf(
-      sparkConf = sparkConf,
-      driverPod = Some(new PodBuilder()
-        .withNewMetadata()
-          .withName("driver")
-          .endMetadata()
-        .build()))
-    val sparkPod = KubernetesExecutorBuilder(kubernetesClient, sparkConf)
-      .buildFromFeatures(kubernetesConf, secMgr)
-    PodBuilderSuiteUtils.verifyPodWithSupportedFeatures(sparkPod)
-  }
 }

From 2920438c43ade38e62442b0ba8937b716a05f7ad Mon Sep 17 00:00:00 2001
From: Luca Canali <luca.canali@cern.ch>
Date: Wed, 12 Dec 2018 16:18:22 -0800
Subject: [PATCH 0120/1072] [SPARK-25277][YARN] YARN applicationMaster metrics
 should not register static metrics

## What changes were proposed in this pull request?

YARN applicationMaster metrics registration introduced in SPARK-24594 causes further registration of static metrics (Codegenerator and HiveExternalCatalog) and of JVM metrics, which I believe do not belong in this context.
This looks like an unintended side effect of using the start method of [[MetricsSystem]].
A possible solution proposed here, is to introduce startNoRegisterSources to avoid these additional registrations of static sources and of JVM sources in the case of YARN applicationMaster metrics (this could be useful for other metrics that may be added in the future).

## How was this patch tested?

Manually tested on a YARN cluster,

Closes #22279 from LucaCanali/YarnMetricsRemoveExtraSourceRegistration.

Lead-authored-by: Luca Canali <luca.canali@cern.ch>
Co-authored-by: LucaCanali <luca.canali@cern.ch>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/metrics/MetricsSystem.scala    | 8 +++++---
 .../org/apache/spark/deploy/yarn/ApplicationMaster.scala  | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index bb7b434e9a11..301317a79dfc 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -94,11 +94,13 @@ private[spark] class MetricsSystem private (
 
   metricsConfig.initialize()
 
-  def start() {
+  def start(registerStaticSources: Boolean = true) {
     require(!running, "Attempting to start a MetricsSystem that is already running")
     running = true
-    StaticSources.allSources.foreach(registerSource)
-    registerSources()
+    if (registerStaticSources) {
+      StaticSources.allSources.foreach(registerSource)
+      registerSources()
+    }
     registerSinks()
     sinks.foreach(_.start)
   }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index c1f3211bcab2..e46c4f970c4a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -449,7 +449,8 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     val ms = MetricsSystem.createMetricsSystem("applicationMaster", sparkConf, securityMgr)
     val prefix = _sparkConf.get(YARN_METRICS_NAMESPACE).getOrElse(appId)
     ms.registerSource(new ApplicationMasterSource(prefix, allocator))
-    ms.start()
+    // do not register static sources in this case as per SPARK-25277
+    ms.start(false)
     metricsSystem = Some(ms)
     reporterThread = launchReporterThread()
   }

From 6daa78309460e338dd688cf6cdbd46a12666f72e Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Wed, 12 Dec 2018 16:45:50 -0800
Subject: [PATCH 0121/1072] [SPARK-26322][SS] Add
 spark.kafka.sasl.token.mechanism to ease delegation token configuration.

## What changes were proposed in this pull request?

When Kafka delegation token obtained, SCRAM `sasl.mechanism` has to be configured for authentication. This can be configured on the related source/sink which is inconvenient from user perspective. Such granularity is not required and this configuration can be implemented with one central parameter.

In this PR `spark.kafka.sasl.token.mechanism` added to configure this centrally (default: `SCRAM-SHA-512`).

## How was this patch tested?

Existing unit tests + on cluster.

Closes #23274 from gaborgsomogyi/SPARK-26322.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/internal/config/Kafka.scala  |   9 ++
 .../structured-streaming-kafka-integration.md | 144 +-----------------
 .../sql/kafka010/KafkaSourceProvider.scala    |  15 +-
 3 files changed, 21 insertions(+), 147 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
index 064fc93cb8ed..e91ddd3e9741 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
@@ -79,4 +79,13 @@ private[spark] object Kafka {
         "For further details please see kafka documentation. Only used to obtain delegation token.")
       .stringConf
       .createOptional
+
+  val TOKEN_SASL_MECHANISM =
+    ConfigBuilder("spark.kafka.sasl.token.mechanism")
+      .doc("SASL mechanism used for client connections with delegation token. Because SCRAM " +
+        "login module used for authentication a compatible mechanism has to be set here. " +
+        "For further details please see kafka documentation (sasl.mechanism). Only used to " +
+        "authenticate against Kafka broker with delegation token.")
+      .stringConf
+      .createWithDefault("SCRAM-SHA-512")
 }
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 7040f8da2c61..3d64ec4cb55f 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -642,9 +642,9 @@ This way the application can be configured via Spark parameters and may not need
 configuration (Spark can use Kafka's dynamic JAAS configuration feature). For further information
 about delegation tokens, see [Kafka delegation token docs](http://kafka.apache.org/documentation/#security_delegation_token).
 
-The process is initiated by Spark's Kafka delegation token provider. When `spark.kafka.bootstrap.servers`,
+The process is initiated by Spark's Kafka delegation token provider. When `spark.kafka.bootstrap.servers` is set,
 Spark considers the following log in options, in order of preference:
-- **JAAS login configuration**
+- **JAAS login configuration**, please see example below.
 - **Keytab file**, such as,
 
       ./bin/spark-submit \
@@ -669,144 +669,8 @@ Kafka broker configuration):
 
 After obtaining delegation token successfully, Spark distributes it across nodes and renews it accordingly.
 Delegation token uses `SCRAM` login module for authentication and because of that the appropriate
-`sasl.mechanism` has to be configured on source/sink (it must match with Kafka broker configuration):
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-{% highlight scala %}
-
-// Setting on Kafka Source for Streaming Queries
-val df = spark
-  .readStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
-  .option("subscribe", "topic1")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .as[(String, String)]
-
-// Setting on Kafka Source for Batch Queries
-val df = spark
-  .read
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
-  .option("subscribe", "topic1")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .as[(String, String)]
-
-// Setting on Kafka Sink for Streaming Queries
-val ds = df
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .writeStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
-  .option("topic", "topic1")
-  .start()
-
-// Setting on Kafka Sink for Batch Queries
-val ds = df
-  .selectExpr("topic1", "CAST(key AS STRING)", "CAST(value AS STRING)")
-  .write
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
-  .save()
-
-{% endhighlight %}
-</div>
-<div data-lang="java" markdown="1">
-{% highlight java %}
-
-// Setting on Kafka Source for Streaming Queries
-Dataset<Row> df = spark
-  .readStream()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
-  .option("subscribe", "topic1")
-  .load();
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
-
-// Setting on Kafka Source for Batch Queries
-Dataset<Row> df = spark
-  .read()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
-  .option("subscribe", "topic1")
-  .load();
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
-
-// Setting on Kafka Sink for Streaming Queries
-StreamingQuery ds = df
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .writeStream()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
-  .option("topic", "topic1")
-  .start();
-
-// Setting on Kafka Sink for Batch Queries
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .write()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512")
-  .option("topic", "topic1")
-  .save();
-
-{% endhighlight %}
-</div>
-<div data-lang="python" markdown="1">
-{% highlight python %}
-
-// Setting on Kafka Source for Streaming Queries
-df = spark \
-  .readStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512") \
-  .option("subscribe", "topic1") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-
-// Setting on Kafka Source for Batch Queries
-df = spark \
-  .read \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512") \
-  .option("subscribe", "topic1") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-
-// Setting on Kafka Sink for Streaming Queries
-ds = df \
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
-  .writeStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512") \
-  .option("topic", "topic1") \
-  .start()
-
-// Setting on Kafka Sink for Batch Queries
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
-  .write \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("kafka.sasl.mechanism", "SCRAM-SHA-512") \
-  .option("topic", "topic1") \
-  .save()
-
-{% endhighlight %}
-</div>
-</div>
+`spark.kafka.sasl.token.mechanism` (default: `SCRAM-SHA-512`) has to be configured. Also, this parameter
+must match with Kafka broker configuration.
 
 When delegation token is available on an executor it can be overridden with JAAS login configuration.
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 0ac330435e5c..6a0c2088ac3d 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -30,6 +30,7 @@ import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySe
 import org.apache.spark.SparkEnv
 import org.apache.spark.deploy.security.KafkaTokenUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
@@ -501,7 +502,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       // If buffer config is not set, set it to reasonable value to work around
       // buffer issues (see KAFKA-3135)
       .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
-      .setTokenJaasConfigIfNeeded()
+      .setAuthenticationConfigIfNeeded()
       .build()
 
   def kafkaParamsForExecutors(
@@ -523,7 +524,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       // If buffer config is not set, set it to reasonable value to work around
       // buffer issues (see KAFKA-3135)
       .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
-      .setTokenJaasConfigIfNeeded()
+      .setAuthenticationConfigIfNeeded()
       .build()
 
   /**
@@ -556,7 +557,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       this
     }
 
-    def setTokenJaasConfigIfNeeded(): ConfigUpdater = {
+    def setAuthenticationConfigIfNeeded(): ConfigUpdater = {
       // There are multiple possibilities to log in and applied in the following order:
       // - JVM global security provided -> try to log in with JVM global security configuration
       //   which can be configured for example with 'java.security.auth.login.config'.
@@ -568,11 +569,11 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       } else if (KafkaSecurityHelper.isTokenAvailable()) {
         logDebug("Delegation token detected, using it for login.")
         val jaasParams = KafkaSecurityHelper.getTokenJaasParams(SparkEnv.get.conf)
-        val mechanism = kafkaParams
-          .getOrElse(SaslConfigs.SASL_MECHANISM, SaslConfigs.DEFAULT_SASL_MECHANISM)
+        set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+        val mechanism = SparkEnv.get.conf.get(Kafka.TOKEN_SASL_MECHANISM)
         require(mechanism.startsWith("SCRAM"),
           "Delegation token works only with SCRAM mechanism.")
-        set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+        set(SaslConfigs.SASL_MECHANISM, mechanism)
       }
       this
     }
@@ -600,7 +601,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
     ConfigUpdater("executor", specifiedKafkaParams)
       .set(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, serClassName)
       .set(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, serClassName)
-      .setTokenJaasConfigIfNeeded()
+      .setAuthenticationConfigIfNeeded()
       .build()
   }
 

From 05b68d5cc92e46bd701cd01b4179cd13397eaf90 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 13 Dec 2018 11:13:15 +0800
Subject: [PATCH 0122/1072] [SPARK-26297][SQL] improve the doc of
 Distribution/Partitioning

## What changes were proposed in this pull request?

Some documents of `Distribution/Partitioning` are stale and misleading, this PR fixes them:
1. `Distribution` never have intra-partition requirement
2. `OrderedDistribution` does not require tuples that share the same value being colocated in the same partition.
3. `RangePartitioning` can provide a weaker guarantee for a prefix of its `ordering` expressions.

## How was this patch tested?

comment-only PR.

Closes #23249 from cloud-fan/doc.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/physical/partitioning.scala         | 54 ++++++++++++-------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index cc1a5e835d9c..17e1cb416fc8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -22,13 +22,11 @@ import org.apache.spark.sql.types.{DataType, IntegerType}
 
 /**
  * Specifies how tuples that share common expressions will be distributed when a query is executed
- * in parallel on many machines.  Distribution can be used to refer to two distinct physical
- * properties:
- *  - Inter-node partitioning of data: In this case the distribution describes how tuples are
- *    partitioned across physical machines in a cluster.  Knowing this property allows some
- *    operators (e.g., Aggregate) to perform partition local operations instead of global ones.
- *  - Intra-partition ordering of data: In this case the distribution describes guarantees made
- *    about how tuples are distributed within a single partition.
+ * in parallel on many machines.
+ *
+ * Distribution here refers to inter-node partitioning of data. That is, it describes how tuples
+ * are partitioned across physical machines in a cluster. Knowing this property allows some
+ * operators (e.g., Aggregate) to perform partition local operations instead of global ones.
  */
 sealed trait Distribution {
   /**
@@ -70,9 +68,7 @@ case object AllTuples extends Distribution {
 
 /**
  * Represents data where tuples that share the same values for the `clustering`
- * [[Expression Expressions]] will be co-located. Based on the context, this
- * can mean such tuples are either co-located in the same partition or they will be contiguous
- * within a single partition.
+ * [[Expression Expressions]] will be co-located in the same partition.
  */
 case class ClusteredDistribution(
     clustering: Seq[Expression],
@@ -118,10 +114,12 @@ case class HashClusteredDistribution(
 
 /**
  * Represents data where tuples have been ordered according to the `ordering`
- * [[Expression Expressions]].  This is a strictly stronger guarantee than
- * [[ClusteredDistribution]] as an ordering will ensure that tuples that share the
- * same value for the ordering expressions are contiguous and will never be split across
- * partitions.
+ * [[Expression Expressions]]. Its requirement is defined as the following:
+ *   - Given any 2 adjacent partitions, all the rows of the second partition must be larger than or
+ *     equal to any row in the first partition, according to the `ordering` expressions.
+ *
+ * In other words, this distribution requires the rows to be ordered across partitions, but not
+ * necessarily within a partition.
  */
 case class OrderedDistribution(ordering: Seq[SortOrder]) extends Distribution {
   require(
@@ -241,12 +239,12 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
 
 /**
  * Represents a partitioning where rows are split across partitions based on some total ordering of
- * the expressions specified in `ordering`.  When data is partitioned in this manner the following
- * two conditions are guaranteed to hold:
- *  - All row where the expressions in `ordering` evaluate to the same values will be in the same
- *    partition.
- *  - Each partition will have a `min` and `max` row, relative to the given ordering.  All rows
- *    that are in between `min` and `max` in this `ordering` will reside in this partition.
+ * the expressions specified in `ordering`.  When data is partitioned in this manner, it guarantees:
+ * Given any 2 adjacent partitions, all the rows of the second partition must be larger than any row
+ * in the first partition, according to the `ordering` expressions.
+ *
+ * This is a strictly stronger guarantee than what `OrderedDistribution(ordering)` requires, as
+ * there is no overlap between partitions.
  *
  * This class extends expression primarily so that transformations over expression will descend
  * into its child.
@@ -262,6 +260,22 @@ case class RangePartitioning(ordering: Seq[SortOrder], numPartitions: Int)
     super.satisfies0(required) || {
       required match {
         case OrderedDistribution(requiredOrdering) =>
+          // If `ordering` is a prefix of `requiredOrdering`:
+          //   Let's say `ordering` is [a, b] and `requiredOrdering` is [a, b, c]. According to the
+          //   RangePartitioning definition, any [a, b] in a previous partition must be smaller
+          //   than any [a, b] in the following partition. This also means any [a, b, c] in a
+          //   previous partition must be smaller than any [a, b, c] in the following partition.
+          //   Thus `RangePartitioning(a, b)` satisfies `OrderedDistribution(a, b, c)`.
+          //
+          // If `requiredOrdering` is a prefix of `ordering`:
+          //   Let's say `ordering` is [a, b, c] and `requiredOrdering` is [a, b]. According to the
+          //   RangePartitioning definition, any [a, b, c] in a previous partition must be smaller
+          //   than any [a, b, c] in the following partition. If there is a [a1, b1] from a previous
+          //   partition which is larger than a [a2, b2] from the following partition, then there
+          //   must be a [a1, b1 c1] larger than [a2, b2, c2], which violates RangePartitioning
+          //   definition. So it's guaranteed that, any [a, b] in a previous partition must not be
+          //   greater(i.e. smaller or equal to) than any [a, b] in the following partition. Thus
+          //   `RangePartitioning(a, b, c)` satisfies `OrderedDistribution(a, b)`.
           val minSize = Seq(requiredOrdering.size, ordering.size).min
           requiredOrdering.take(minSize) == ordering.take(minSize)
         case ClusteredDistribution(requiredClustering, _) =>

From 3238e3d1c0d9be5c43a72705e18afbbb4c512e15 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 13 Dec 2018 12:50:15 +0800
Subject: [PATCH 0123/1072] [SPARK-26348][SQL][TEST] make sure expression is
 resolved during test

## What changes were proposed in this pull request?

cleanup some tests to make sure expression is resolved during test.

## How was this patch tested?

test-only PR

Closes #23297 from cloud-fan/test.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/dsl/package.scala      |  2 +-
 .../expressions/ExpressionEvalHelper.scala    | 10 ++++----
 .../expressions/JsonExpressionsSuite.scala    | 11 ++++-----
 .../catalyst/expressions/PredicateSuite.scala | 23 ++++++-------------
 .../expressions/StringExpressionsSuite.scala  |  7 ++----
 5 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 176ea823b1fc..151481c80ee9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -136,7 +136,7 @@ package object dsl {
     implicit def longToLiteral(l: Long): Literal = Literal(l)
     implicit def floatToLiteral(f: Float): Literal = Literal(f)
     implicit def doubleToLiteral(d: Double): Literal = Literal(d)
-    implicit def stringToLiteral(s: String): Literal = Literal(s)
+    implicit def stringToLiteral(s: String): Literal = Literal.create(s, StringType)
     implicit def dateToLiteral(d: Date): Literal = Literal(d)
     implicit def bigDecimalToLiteral(d: BigDecimal): Literal = Literal(d.underlying())
     implicit def bigDecimalToLiteral(d: java.math.BigDecimal): Literal = Literal(d)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index b4fd170467d8..1c91adab7137 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -28,7 +28,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
-import org.apache.spark.sql.catalyst.analysis.{ResolveTimeZone, SimpleAnalyzer}
+import org.apache.spark.sql.catalyst.analysis.ResolveTimeZone
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
 import org.apache.spark.sql.catalyst.plans.PlanTestBase
@@ -70,7 +70,9 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa
   private def prepareEvaluation(expression: Expression): Expression = {
     val serializer = new JavaSerializer(new SparkConf()).newInstance
     val resolver = ResolveTimeZone(new SQLConf)
-    resolver.resolveTimeZones(serializer.deserialize(serializer.serialize(expression)))
+    val expr = resolver.resolveTimeZones(expression)
+    assert(expr.resolved)
+    serializer.deserialize(serializer.serialize(expr))
   }
 
   protected def checkEvaluation(
@@ -296,9 +298,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa
       expected: Any,
       inputRow: InternalRow = EmptyRow): Unit = {
     val plan = Project(Alias(expression, s"Optimized($expression)")() :: Nil, OneRowRelation())
-    // We should analyze the plan first, otherwise we possibly optimize an unresolved plan.
-    val analyzedPlan = SimpleAnalyzer.execute(plan)
-    val optimizedPlan = SimpleTestOptimizer.execute(analyzedPlan)
+    val optimizedPlan = SimpleTestOptimizer.execute(plan)
     checkEvaluationWithoutCodegen(optimizedPlan.expressions.head, expected, inputRow)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 5d60cefc1389..238e6e34b4ae 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -25,7 +25,7 @@ import org.scalatest.exceptions.TestFailedException
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.errors.TreeNodeException
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.plans.PlanTestBase
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
@@ -694,11 +694,10 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
     val mapType2 = MapType(IntegerType, CalendarIntervalType)
     val schema2 = StructType(StructField("a", mapType2) :: Nil)
     val struct2 = Literal.create(null, schema2)
-    intercept[TreeNodeException[_]] {
-      checkEvaluation(
-        StructsToJson(Map.empty, struct2, gmtId),
-        null
-      )
+    StructsToJson(Map.empty, struct2, gmtId).checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckFailure(msg) =>
+        assert(msg.contains("Unable to convert column a of type calendarinterval to JSON"))
+      case _ => fail("from_json should not work on interval map value type.")
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 0f63717f9daf..3541afcd2144 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -24,6 +24,7 @@ import scala.collection.immutable.HashSet
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.RandomDataGenerator
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
@@ -231,22 +232,12 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
       testWithRandomDataGeneration(structType, nullable)
     }
 
-    // Map types: not supported
-    for (
-        keyType <- atomicTypes;
-        valueType <- atomicTypes;
-        nullable <- Seq(true, false)) {
-      val mapType = MapType(keyType, valueType)
-      val e = intercept[Exception] {
-        testWithRandomDataGeneration(mapType, nullable)
-      }
-      if (e.getMessage.contains("Code generation of")) {
-        // If the `value` expression is null, `eval` will be short-circuited.
-        // Codegen version evaluation will be run then.
-        assert(e.getMessage.contains("cannot generate equality code for un-comparable type"))
-      } else {
-        assert(e.getMessage.contains("Exception evaluating"))
-      }
+    // In doesn't support map type and will fail the analyzer.
+    val map = Literal.create(create_map(1 -> 1), MapType(IntegerType, IntegerType))
+    In(map, Seq(map)).checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckFailure(msg) =>
+        assert(msg.contains("function in does not support ordering on type map"))
+      case _ => fail("In should not work on map type")
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index aa334e040d5f..e95f2dff231b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -744,16 +744,14 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("ParseUrl") {
     def checkParseUrl(expected: String, urlStr: String, partToExtract: String): Unit = {
-      checkEvaluation(
-        ParseUrl(Seq(Literal(urlStr), Literal(partToExtract))), expected)
+      checkEvaluation(ParseUrl(Seq(urlStr, partToExtract)), expected)
     }
     def checkParseUrlWithKey(
         expected: String,
         urlStr: String,
         partToExtract: String,
         key: String): Unit = {
-      checkEvaluation(
-        ParseUrl(Seq(Literal(urlStr), Literal(partToExtract), Literal(key))), expected)
+      checkEvaluation(ParseUrl(Seq(urlStr, partToExtract, key)), expected)
     }
 
     checkParseUrl("spark.apache.org", "http://spark.apache.org/path?query=1", "HOST")
@@ -798,7 +796,6 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Sentences(nullString, nullString, nullString), null)
     checkEvaluation(Sentences(nullString, nullString), null)
     checkEvaluation(Sentences(nullString), null)
-    checkEvaluation(Sentences(Literal.create(null, NullType)), null)
     checkEvaluation(Sentences("", nullString, nullString), Seq.empty)
     checkEvaluation(Sentences("", nullString), Seq.empty)
     checkEvaluation(Sentences(""), Seq.empty)

From 8edae94fa7ec1a1cc2c69e0924da0da85d4aac83 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Thu, 13 Dec 2018 13:14:59 +0800
Subject: [PATCH 0124/1072] [SPARK-26355][PYSPARK] Add a workaround for PyArrow
 0.11.

## What changes were proposed in this pull request?

In PyArrow 0.11, there is a API breaking change.

- [ARROW-1949](https://issues.apache.org/jira/browse/ARROW-1949) - [Python/C++] Add option to Array.from_pandas and pyarrow.array to perform unsafe casts.

This causes test failures in `ScalarPandasUDFTests.test_vectorized_udf_null_(byte|short|int|long)`:

```
  File "/Users/ueshin/workspace/apache-spark/spark/python/pyspark/worker.py", line 377, in main
    process()
  File "/Users/ueshin/workspace/apache-spark/spark/python/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/Users/ueshin/workspace/apache-spark/spark/python/pyspark/serializers.py", line 317, in dump_stream
    batch = _create_batch(series, self._timezone)
  File "/Users/ueshin/workspace/apache-spark/spark/python/pyspark/serializers.py", line 286, in _create_batch
    arrs = [create_array(s, t) for s, t in series]
  File "/Users/ueshin/workspace/apache-spark/spark/python/pyspark/serializers.py", line 284, in create_array
    return pa.Array.from_pandas(s, mask=mask, type=t)
  File "pyarrow/array.pxi", line 474, in pyarrow.lib.Array.from_pandas
    return array(obj, mask=mask, type=type, safe=safe, from_pandas=True,
  File "pyarrow/array.pxi", line 169, in pyarrow.lib.array
    return _ndarray_to_array(values, mask, type, from_pandas, safe,
  File "pyarrow/array.pxi", line 69, in pyarrow.lib._ndarray_to_array
    check_status(NdarrayToArrow(pool, values, mask, from_pandas,
  File "pyarrow/error.pxi", line 81, in pyarrow.lib.check_status
    raise ArrowInvalid(message)
ArrowInvalid: Floating point value truncated
```

We should add a workaround to support PyArrow 0.11.

## How was this patch tested?

In my local environment.

Closes #23305 from ueshin/issues/SPARK-26355/pyarrow_0.11.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/serializers.py                         |  5 ++++-
 .../pyspark/sql/tests/test_pandas_udf_grouped_map.py  | 11 +++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index f3ebd3767a0a..fd4695210fb7 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -281,7 +281,10 @@ def create_array(s, t):
             # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
             return pa.Array.from_pandas(s.apply(
                 lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
-        return pa.Array.from_pandas(s, mask=mask, type=t)
+        elif LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
+            # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
+            return pa.Array.from_pandas(s, mask=mask, type=t)
+        return pa.Array.from_pandas(s, mask=mask, type=t, safe=False)
 
     arrs = [create_array(s, t) for s, t in series]
     return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
index bfecc071386e..a12c608dff9d 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
@@ -468,8 +468,15 @@ def invalid_positional_types(pdf):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(Exception, "KeyError: 'id'"):
                 grouped_df.apply(column_name_typo).collect()
-            with self.assertRaisesRegexp(Exception, "No cast implemented"):
-                grouped_df.apply(invalid_positional_types).collect()
+            from distutils.version import LooseVersion
+            import pyarrow as pa
+            if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
+                # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
+                with self.assertRaisesRegexp(Exception, "No cast implemented"):
+                    grouped_df.apply(invalid_positional_types).collect()
+            else:
+                with self.assertRaisesRegexp(Exception, "an integer is required"):
+                    grouped_df.apply(invalid_positional_types).collect()
 
     def test_positional_assignment_conf(self):
         import pandas as pd

From 19b63c560d92a0e30b2dfc523bcce4f1c9daf851 Mon Sep 17 00:00:00 2001
From: Qi Shao <qi.shao.nyu@gmail.com>
Date: Thu, 13 Dec 2018 20:05:49 +0800
Subject: [PATCH 0125/1072] [MINOR][R] Fix indents of sparkR welcome message to
 be consistent with pyspark and spark-shell

## What changes were proposed in this pull request?

1. Removed empty space at the beginning of welcome message lines of sparkR to be consistent with welcome message of `pyspark` and `spark-shell`
2. Setting indent of logo message lines to 3 to be consistent with welcome message of `pyspark` and `spark-shell`

Output of `pyspark`:
```
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 2.4.0
      /_/

Using Python version 3.6.6 (default, Jun 28 2018 11:07:29)
SparkSession available as 'spark'.
```

Output of `spark-shell`:
```
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 2.4.0
      /_/

Using Scala version 2.11.12 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_161)
Type in expressions to have them evaluated.
Type :help for more information.
```

## How was this patch tested?

Before:
Output of `sparkR`:
```
 Welcome to
    ____              __
   / __/__  ___ _____/ /__
  _\ \/ _ \/ _ `/ __/  '_/
 /___/ .__/\_,_/_/ /_/\_\   version  2.4.0
    /_/

 SparkSession available as 'spark'.
```
After:
```
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 2.4.0
      /_/

SparkSession available as 'spark'.
```

Closes #23293 from AzureQ/master.

Authored-by: Qi Shao <qi.shao.nyu@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/inst/profile/shell.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R
index 32eb3671b594..e4e0d032997d 100644
--- a/R/pkg/inst/profile/shell.R
+++ b/R/pkg/inst/profile/shell.R
@@ -33,19 +33,19 @@
   sc <- SparkR:::callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", spark)
   assign("sc", sc, envir = .GlobalEnv)
   sparkVer <- SparkR:::callJMethod(sc, "version")
-  cat("\n Welcome to")
+  cat("\nWelcome to")
   cat("\n")
-  cat("    ____              __", "\n")
-  cat("   / __/__  ___ _____/ /__", "\n")
-  cat("  _\\ \\/ _ \\/ _ `/ __/  '_/", "\n")
-  cat(" /___/ .__/\\_,_/_/ /_/\\_\\")
+  cat("      ____              __", "\n")
+  cat("     / __/__  ___ _____/ /__", "\n")
+  cat("    _\\ \\/ _ \\/ _ `/ __/  '_/", "\n")
+  cat("   /___/ .__/\\_,_/_/ /_/\\_\\")
   if (nchar(sparkVer) == 0) {
     cat("\n")
   } else {
-    cat("   version ", sparkVer, "\n")
+    cat("   version", sparkVer, "\n")
   }
-  cat("    /_/", "\n")
+  cat("      /_/", "\n")
   cat("\n")
 
-  cat("\n SparkSession available as 'spark'.\n")
+  cat("\nSparkSession available as 'spark'.\n")
 }

From f3726092169406979849b3cb5afeb52be106fd68 Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Thu, 13 Dec 2018 07:40:13 -0600
Subject: [PATCH 0126/1072] [MINOR][DOC] Fix comments of ConvertToLocalRelation
 rule

## What changes were proposed in this pull request?
There are some comments issues left when `ConvertToLocalRelation` rule was added (see #22205/[SPARK-25212](https://issues.apache.org/jira/browse/SPARK-25212)). This PR fixes those comments issues.

## How was this patch tested?
N/A

Closes #23273 from seancxmao/ConvertToLocalRelation-doc.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala   | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 8d251eeab848..f615757a837a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -131,11 +131,11 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
     //   since the other rules might make two separate Unions operators adjacent.
     Batch("Union", Once,
       CombineUnions) ::
-    // run this once earlier. this might simplify the plan and reduce cost of optimizer.
-    // for example, a query such as Filter(LocalRelation) would go through all the heavy
+    // Run this once earlier. This might simplify the plan and reduce cost of optimizer.
+    // For example, a query such as Filter(LocalRelation) would go through all the heavy
     // optimizer rules that are triggered when there is a filter
-    // (e.g. InferFiltersFromConstraints). if we run this batch earlier, the query becomes just
-    // LocalRelation and does not trigger many rules
+    // (e.g. InferFiltersFromConstraints). If we run this batch earlier, the query becomes just
+    // LocalRelation and does not trigger many rules.
     Batch("LocalRelation early", fixedPoint,
       ConvertToLocalRelation,
       PropagateEmptyRelation) ::
@@ -1370,10 +1370,8 @@ object DecimalAggregates extends Rule[LogicalPlan] {
 }
 
 /**
- * Converts local operations (i.e. ones that don't require data exchange) on LocalRelation to
- * another LocalRelation.
- *
- * This is relatively simple as it currently handles only 2 single case: Project and Limit.
+ * Converts local operations (i.e. ones that don't require data exchange) on `LocalRelation` to
+ * another `LocalRelation`.
  */
 object ConvertToLocalRelation extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {

From f69998ace6b2e0665e45839c3e98c77a18be442a Mon Sep 17 00:00:00 2001
From: lichaoqun <li.chaoqun@zte.com.cn>
Date: Thu, 13 Dec 2018 07:42:17 -0600
Subject: [PATCH 0127/1072] [MINOR][DOC] update the condition description of
 BypassMergeSortShuffle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
These three condition descriptions should be updated, follow #23228  :
<li>no Ordering is specified,</li>
<li>no Aggregator is specified, and</li>
<li>the number of partitions is less than
<code>spark.shuffle.sort.bypassMergeThreshold</code>.
</li>
1、If the shuffle dependency specifies aggregation, but it only aggregates at the reduce-side, BypassMergeSortShuffle can still be used.
2、If the number of output partitions is spark.shuffle.sort.bypassMergeThreshold(eg.200), we can use BypassMergeSortShuffle.

## How was this patch tested?
N/A

Closes #23281 from lcqzte10192193/wid-lcq-1211.

Authored-by: lichaoqun <li.chaoqun@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/shuffle/sort/BypassMergeSortShuffleWriter.java     | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index fda33cd8293d..997bc9e3f043 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -58,9 +58,8 @@
  * simultaneously opens separate serializers and file streams for all partitions. As a result,
  * {@link SortShuffleManager} only selects this write path when
  * <ul>
- *    <li>no Ordering is specified,</li>
- *    <li>no Aggregator is specified, and</li>
- *    <li>the number of partitions is less than
+ *    <li>no map-side combine is specified, and</li>
+ *    <li>the number of partitions is less than or equal to
  *      <code>spark.shuffle.sort.bypassMergeThreshold</code>.</li>
  * </ul>
  *

From 29b3eb6fedd8f90495046da598eacc4ac00944c3 Mon Sep 17 00:00:00 2001
From: "n.fraison" <n.fraison@criteo.com>
Date: Thu, 13 Dec 2018 08:34:47 -0600
Subject: [PATCH 0128/1072] [SPARK-26340][CORE] Ensure cores per executor is
 greater than cpu per task

Currently this check is only performed for dynamic allocation use case in
ExecutorAllocationManager.

## What changes were proposed in this pull request?

Checks that cpu per task is lower than number of cores per executor otherwise throw an exception

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23290 from ashangit/master.

Authored-by: n.fraison <n.fraison@criteo.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkConf.scala     | 9 +++++++++
 .../src/test/scala/org/apache/spark/SparkConfSuite.scala | 7 +++++++
 2 files changed, 16 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 21c5cbc04d81..8d135d3e083d 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -605,6 +605,15 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
+    if (contains("spark.executor.cores") && contains("spark.task.cpus")) {
+      val executorCores = getInt("spark.executor.cores", 1)
+      val taskCpus = getInt("spark.task.cpus", 1)
+
+      if (executorCores < taskCpus) {
+        throw new SparkException("spark.executor.cores must not be less than spark.task.cpus.")
+      }
+    }
+
     val encryptionEnabled = get(NETWORK_ENCRYPTION_ENABLED) || get(SASL_ENCRYPTION_ENABLED)
     require(!encryptionEnabled || get(NETWORK_AUTH_ENABLED),
       s"${NETWORK_AUTH_ENABLED.key} must be enabled when enabling encryption.")
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index df274d949bae..7cb03deae139 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -138,6 +138,13 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     assert(sc.appName === "My other app")
   }
 
+  test("creating SparkContext with cpus per tasks bigger than cores per executors") {
+    val conf = new SparkConf(false)
+      .set("spark.executor.cores", "1")
+      .set("spark.task.cpus", "2")
+    intercept[SparkException] { sc = new SparkContext(conf) }
+  }
+
   test("nested property names") {
     // This wasn't supported by some external conf parsing libraries
     System.setProperty("spark.test.a", "a")

From 6c1f7ba8f627a69cac74f11400066dd9871d9102 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 13 Dec 2018 23:03:26 +0800
Subject: [PATCH 0129/1072] [SPARK-26313][SQL] move `newScanBuilder` from Table
 to read related mix-in traits

## What changes were proposed in this pull request?

As discussed in https://github.com/apache/spark/pull/23208/files#r239684490 , we should put `newScanBuilder` in read related mix-in traits like `SupportsBatchRead`, to support write-only table.

In the `Append` operator, we should skip schema validation if not necessary. In the future we would introduce a capability API, so that data source can tell Spark that it doesn't want to do validation.

## How was this patch tested?

existing tests.

Closes #23266 from cloud-fan/ds-read.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/sources/v2/SupportsBatchRead.java     |  8 ++---
 .../spark/sql/sources/v2/SupportsRead.java    | 35 +++++++++++++++++++
 .../apache/spark/sql/sources/v2/Table.java    | 15 ++------
 3 files changed, 41 insertions(+), 17 deletions(-)
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java

diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
index 0df89dbb608a..6c5a95d2a75b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
@@ -24,10 +24,10 @@
 /**
  * An empty mix-in interface for {@link Table}, to indicate this table supports batch scan.
  * <p>
- * If a {@link Table} implements this interface, its {@link Table#newScanBuilder(DataSourceOptions)}
- * must return a {@link ScanBuilder} that builds {@link Scan} with {@link Scan#toBatch()}
- * implemented.
+ * If a {@link Table} implements this interface, the
+ * {@link SupportsRead#newScanBuilder(DataSourceOptions)} must return a {@link ScanBuilder} that
+ * builds {@link Scan} with {@link Scan#toBatch()} implemented.
  * </p>
  */
 @Evolving
-public interface SupportsBatchRead extends Table { }
+public interface SupportsBatchRead extends SupportsRead { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
new file mode 100644
index 000000000000..e22738d20d50
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+
+/**
+ * An internal base interface of mix-in interfaces for readable {@link Table}. This adds
+ * {@link #newScanBuilder(DataSourceOptions)} that is used to create a scan for batch, micro-batch,
+ * or continuous processing.
+ */
+interface SupportsRead extends Table {
+
+  /**
+   * Returns a {@link ScanBuilder} which can be used to build a {@link Scan}. Spark will call this
+   * method to configure each scan.
+   */
+  ScanBuilder newScanBuilder(DataSourceOptions options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
index 0c65fe0f9e76..08664859b8de 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
@@ -18,8 +18,6 @@
 package org.apache.spark.sql.sources.v2;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
 import org.apache.spark.sql.types.StructType;
 
 /**
@@ -43,17 +41,8 @@ public interface Table {
   String name();
 
   /**
-   * Returns the schema of this table.
+   * Returns the schema of this table. If the table is not readable and doesn't have a schema, an
+   * empty schema can be returned here.
    */
   StructType schema();
-
-  /**
-   * Returns a {@link ScanBuilder} which can be used to build a {@link Scan} later. Spark will call
-   * this method for each data scanning query.
-   * <p>
-   * The builder can take some query specific information to do operators pushdown, and keep these
-   * information in the created {@link Scan}.
-   * </p>
-   */
-  ScanBuilder newScanBuilder(DataSourceOptions options);
 }

From 524d1be6d2920674eb871b5f0f25e7496a374090 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 13 Dec 2018 09:07:33 -0800
Subject: [PATCH 0130/1072] [SPARK-26098][WEBUI] Show associated SQL query in
 Job page

## What changes were proposed in this pull request?

For jobs associated to SQL queries, it would be easier to understand the context to showing the SQL query in Job detail page.
Before code change, it is hard to tell what the job is about from the job page:

![image](https://user-images.githubusercontent.com/1097932/48659359-96baa180-ea8a-11e8-8419-a0a87c3f30fc.png)

After code change:
![image](https://user-images.githubusercontent.com/1097932/48659390-26f8e680-ea8b-11e8-8fdd-3b58909ea364.png)

After navigating to the associated SQL detail page, We can see the whole context :
![image](https://user-images.githubusercontent.com/1097932/48659463-9fac7280-ea8c-11e8-9dfe-244e849f72a5.png)

**For Jobs don't have associated SQL query, the text won't be shown.**

## How was this patch tested?

Manual test

Closes #23068 from gengliangwang/addSQLID.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/status/AppStatusListener.scala    |  7 ++++++-
 .../org/apache/spark/status/AppStatusStore.scala   |  7 +++++++
 .../scala/org/apache/spark/status/LiveEntity.scala |  5 +++--
 .../scala/org/apache/spark/status/storeTypes.scala |  3 ++-
 .../scala/org/apache/spark/ui/jobs/JobPage.scala   | 14 +++++++++++++-
 5 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index bd3f58b6182c..262ff6547faa 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -70,6 +70,8 @@ private[spark] class AppStatusListener(
   private val liveTasks = new HashMap[Long, LiveTask]()
   private val liveRDDs = new HashMap[Int, LiveRDD]()
   private val pools = new HashMap[String, SchedulerPool]()
+
+  private val SQL_EXECUTION_ID_KEY = "spark.sql.execution.id"
   // Keep the active executor count as a separate variable to avoid having to do synchronization
   // around liveExecutors.
   @volatile private var activeExecutorCount = 0
@@ -318,6 +320,8 @@ private[spark] class AppStatusListener(
     val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)")
     val jobGroup = Option(event.properties)
       .flatMap { p => Option(p.getProperty(SparkContext.SPARK_JOB_GROUP_ID)) }
+    val sqlExecutionId = Option(event.properties)
+      .flatMap(p => Option(p.getProperty(SQL_EXECUTION_ID_KEY)).map(_.toLong))
 
     val job = new LiveJob(
       event.jobId,
@@ -325,7 +329,8 @@ private[spark] class AppStatusListener(
       if (event.time > 0) Some(new Date(event.time)) else None,
       event.stageIds,
       jobGroup,
-      numTasks)
+      numTasks,
+      sqlExecutionId)
     liveJobs.put(event.jobId, job)
     liveUpdate(job, now)
 
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index b35781cb36e8..312bcccb1cca 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -56,6 +56,13 @@ private[spark] class AppStatusStore(
     store.read(classOf[JobDataWrapper], jobId).info
   }
 
+  // Returns job data and associated SQL execution ID of certain Job ID.
+  // If there is no related SQL execution, the SQL execution ID part will be None.
+  def jobWithAssociatedSql(jobId: Int): (v1.JobData, Option[Long]) = {
+    val data = store.read(classOf[JobDataWrapper], jobId)
+    (data.info, data.sqlExecutionId)
+  }
+
   def executorList(activeOnly: Boolean): Seq[v1.ExecutorSummary] = {
     val base = store.view(classOf[ExecutorSummaryWrapper])
     val filtered = if (activeOnly) {
diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
index 47e45a66eccc..7f7b83a54d79 100644
--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -64,7 +64,8 @@ private class LiveJob(
     val submissionTime: Option[Date],
     val stageIds: Seq[Int],
     jobGroup: Option[String],
-    numTasks: Int) extends LiveEntity {
+    numTasks: Int,
+    sqlExecutionId: Option[Long]) extends LiveEntity {
 
   var activeTasks = 0
   var completedTasks = 0
@@ -108,7 +109,7 @@ private class LiveJob(
       skippedStages.size,
       failedStages,
       killedSummary)
-    new JobDataWrapper(info, skippedStages)
+    new JobDataWrapper(info, skippedStages, sqlExecutionId)
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
index ef19e86f3135..eea47b3b1709 100644
--- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala
+++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
@@ -68,7 +68,8 @@ private[spark] class ExecutorSummaryWrapper(val info: ExecutorSummary) {
  */
 private[spark] class JobDataWrapper(
     val info: JobData,
-    val skippedStages: Set[Int]) {
+    val skippedStages: Set[Int],
+    val sqlExecutionId: Option[Long]) {
 
   @JsonIgnore @KVIndex
   private def id: Int = info.jobId
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 55444a2c0c9a..b58a6ca447ed 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -189,7 +189,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
     val jobId = parameterId.toInt
-    val jobData = store.asOption(store.job(jobId)).getOrElse {
+    val (jobData, sqlExecutionId) = store.asOption(store.jobWithAssociatedSql(jobId)).getOrElse {
       val content =
         <div id="no-info">
           <p>No information to display for job {jobId}</p>
@@ -197,6 +197,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
       return UIUtils.headerSparkPage(
         request, s"Details for Job $jobId", content, parent)
     }
+
     val isComplete = jobData.status != JobExecutionStatus.RUNNING
     val stages = jobData.stageIds.map { stageId =>
       // This could be empty if the listener hasn't received information about the
@@ -278,6 +279,17 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
             <Strong>Status:</Strong>
             {jobData.status}
           </li>
+          {
+            if (sqlExecutionId.isDefined) {
+              <li>
+                <strong>Associated SQL Query: </strong>
+                {<a href={"%s/SQL/execution/?id=%s".format(
+                  UIUtils.prependBaseUri(request, parent.basePath),
+                  sqlExecutionId.get)
+                }>{sqlExecutionId.get}</a>}
+              </li>
+            }
+          }
           {
             if (jobData.jobGroup.isDefined) {
               <li>

From 362e472831e0609f88fdeb01d8e14badc812b0f4 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Thu, 13 Dec 2018 16:12:55 -0800
Subject: [PATCH 0131/1072] [SPARK-23886][SS] Update query status for
 ContinuousExecution

## What changes were proposed in this pull request?

Added query status updates to ContinuousExecution.

## How was this patch tested?

Existing unit tests + added ContinuousQueryStatusAndProgressSuite.

Closes #23095 from gaborgsomogyi/SPARK-23886.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../streaming/MicroBatchExecution.scala       |  6 ++
 .../streaming/ProgressReporter.scala          |  1 -
 .../continuous/ContinuousExecution.scala      |  6 ++
 .../sql/streaming/StreamingQueryStatus.scala  |  6 +-
 ...ontinuousQueryStatusAndProgressSuite.scala | 55 +++++++++++++++++++
 5 files changed, 71 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 64e09edf27f5..03beefeca269 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -147,6 +147,12 @@ class MicroBatchExecution(
     logInfo(s"Query $prettyIdString was stopped")
   }
 
+  /** Begins recording statistics about query progress for a given trigger. */
+  override protected def startTrigger(): Unit = {
+    super.startTrigger()
+    currentStatus = currentStatus.copy(isTriggerActive = true)
+  }
+
   /**
    * Repeatedly attempts to run batches as data arrives.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 6a22f0cc8431..39ab702ee083 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -114,7 +114,6 @@ trait ProgressReporter extends Logging {
     logDebug("Starting Trigger Calculation")
     lastTriggerStartTimestamp = currentTriggerStartTimestamp
     currentTriggerStartTimestamp = triggerClock.getTimeMillis()
-    currentStatus = currentStatus.copy(isTriggerActive = true)
     currentTriggerStartOffsets = null
     currentTriggerEndOffsets = null
     currentDurationsMs.clear()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 4d42428fd189..f0859aaaa304 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -118,6 +118,8 @@ class ContinuousExecution(
     // For at least once, we can just ignore those reports and risk duplicates.
     commitLog.getLatest() match {
       case Some((latestEpochId, _)) =>
+        updateStatusMessage("Starting new streaming query " +
+          s"and getting offsets from latest epoch $latestEpochId")
         val nextOffsets = offsetLog.get(latestEpochId).getOrElse {
           throw new IllegalStateException(
             s"Batch $latestEpochId was committed without end epoch offsets!")
@@ -129,6 +131,7 @@ class ContinuousExecution(
         nextOffsets
       case None =>
         // We are starting this stream for the first time. Offsets are all None.
+        updateStatusMessage("Starting new streaming query")
         logInfo(s"Starting new streaming query.")
         currentBatchId = 0
         OffsetSeq.fill(continuousSources.map(_ => null): _*)
@@ -263,6 +266,7 @@ class ContinuousExecution(
       epochUpdateThread.setDaemon(true)
       epochUpdateThread.start()
 
+      updateStatusMessage("Running")
       reportTimeTaken("runContinuous") {
         SQLExecution.withNewExecutionId(
           sparkSessionForQuery, lastExecution) {
@@ -322,6 +326,8 @@ class ContinuousExecution(
    * before this is called.
    */
   def commit(epoch: Long): Unit = {
+    updateStatusMessage(s"Committing epoch $epoch")
+
     assert(continuousSources.length == 1, "only one continuous source supported currently")
     assert(offsetLog.get(epoch).isDefined, s"offset for epoch $epoch not reported before commit")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index 9dc62b7aac89..6ca9aacab724 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -28,9 +28,11 @@ import org.apache.spark.annotation.Evolving
  * Reports information about the instantaneous status of a streaming query.
  *
  * @param message A human readable description of what the stream is currently doing.
- * @param isDataAvailable True when there is new data to be processed.
+ * @param isDataAvailable True when there is new data to be processed. Doesn't apply
+ *                        to ContinuousExecution where it is always false.
  * @param isTriggerActive True when the trigger is actively firing, false when waiting for the
- *                        next trigger time.
+ *                        next trigger time. Doesn't apply to ContinuousExecution where it is
+ *                        always false.
  *
  * @since 2.1.0
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala
new file mode 100644
index 000000000000..10bea7f09057
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.continuous
+
+import org.apache.spark.sql.execution.streaming.StreamExecution
+import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
+import org.apache.spark.sql.streaming.Trigger
+
+class ContinuousQueryStatusAndProgressSuite extends ContinuousSuiteBase {
+  test("StreamingQueryStatus - ContinuousExecution isDataAvailable and isTriggerActive " +
+      "should be false") {
+    import testImplicits._
+
+    val input = ContinuousMemoryStream[Int]
+
+    def assertStatus(stream: StreamExecution): Unit = {
+      assert(stream.status.isDataAvailable === false)
+      assert(stream.status.isTriggerActive === false)
+    }
+
+    val trigger = Trigger.Continuous(100)
+    testStream(input.toDF(), useV2Sink = true)(
+      StartStream(trigger),
+      Execute(assertStatus),
+      AddData(input, 0, 1, 2),
+      Execute(assertStatus),
+      CheckAnswer(0, 1, 2),
+      Execute(assertStatus),
+      StopStream,
+      Execute(assertStatus),
+      AddData(input, 3, 4, 5),
+      Execute(assertStatus),
+      StartStream(trigger),
+      Execute(assertStatus),
+      CheckAnswer(0, 1, 2, 3, 4, 5),
+      Execute(assertStatus),
+      StopStream,
+      Execute(assertStatus))
+  }
+}

From 160e583a17235318c06b95992941a772ff782fae Mon Sep 17 00:00:00 2001
From: Li Jin <ice.xelloss@gmail.com>
Date: Fri, 14 Dec 2018 10:45:24 +0800
Subject: [PATCH 0132/1072] [SPARK-26364][PYTHON][TESTING] Clean up imports in
 test_pandas_udf*

## What changes were proposed in this pull request?

Clean up unconditional import statements and move them to the top.

Conditional imports (pandas, numpy, pyarrow) are left as-is.

## How was this patch tested?

Exising tests.

Closes #23314 from icexelloss/clean-up-test-imports.

Authored-by: Li Jin <ice.xelloss@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_pandas_udf.py   | 16 +---
 .../sql/tests/test_pandas_udf_grouped_agg.py  | 39 +---------
 .../sql/tests/test_pandas_udf_grouped_map.py  | 40 +++-------
 .../sql/tests/test_pandas_udf_scalar.py       | 75 +++++--------------
 .../sql/tests/test_pandas_udf_window.py       | 29 +------
 5 files changed, 36 insertions(+), 163 deletions(-)

diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py
index c4b5478a7e89..d4d9679649ee 100644
--- a/python/pyspark/sql/tests/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/test_pandas_udf.py
@@ -17,12 +17,16 @@
 
 import unittest
 
+from pyspark.sql.functions import udf, pandas_udf, PandasUDFType
 from pyspark.sql.types import *
 from pyspark.sql.utils import ParseException
+from pyspark.rdd import PythonEvalType
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
 
+from py4j.protocol import Py4JJavaError
+
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
@@ -30,9 +34,6 @@
 class PandasUDFTests(ReusedSQLTestCase):
 
     def test_pandas_udf_basic(self):
-        from pyspark.rdd import PythonEvalType
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         udf = pandas_udf(lambda x: x, DoubleType())
         self.assertEqual(udf.returnType, DoubleType())
         self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
@@ -65,10 +66,6 @@ def test_pandas_udf_basic(self):
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
     def test_pandas_udf_decorator(self):
-        from pyspark.rdd import PythonEvalType
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-        from pyspark.sql.types import StructType, StructField, DoubleType
-
         @pandas_udf(DoubleType())
         def foo(x):
             return x
@@ -114,8 +111,6 @@ def foo(x):
         self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
     def test_udf_wrong_arg(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         with QuietTest(self.sc):
             with self.assertRaises(ParseException):
                 @pandas_udf('blah')
@@ -151,9 +146,6 @@ def foo(k, v, w):
                     return k
 
     def test_stopiteration_in_udf(self):
-        from pyspark.sql.functions import udf, pandas_udf, PandasUDFType
-        from py4j.protocol import Py4JJavaError
-
         def foo(x):
             raise StopIteration()
 
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
index 5383704434c8..18264ead2fd0 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
@@ -17,6 +17,9 @@
 
 import unittest
 
+from pyspark.rdd import PythonEvalType
+from pyspark.sql.functions import array, explode, col, lit, mean, sum, \
+    udf, pandas_udf, PandasUDFType
 from pyspark.sql.types import *
 from pyspark.sql.utils import AnalysisException
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
@@ -31,7 +34,6 @@ class GroupedAggPandasUDFTests(ReusedSQLTestCase):
 
     @property
     def data(self):
-        from pyspark.sql.functions import array, explode, col, lit
         return self.spark.range(10).toDF('id') \
             .withColumn("vs", array([lit(i * 1.0) + col('id') for i in range(20, 30)])) \
             .withColumn("v", explode(col('vs'))) \
@@ -40,8 +42,6 @@ def data(self):
 
     @property
     def python_plus_one(self):
-        from pyspark.sql.functions import udf
-
         @udf('double')
         def plus_one(v):
             assert isinstance(v, (int, float))
@@ -51,7 +51,6 @@ def plus_one(v):
     @property
     def pandas_scalar_plus_two(self):
         import pandas as pd
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
 
         @pandas_udf('double', PandasUDFType.SCALAR)
         def plus_two(v):
@@ -61,8 +60,6 @@ def plus_two(v):
 
     @property
     def pandas_agg_mean_udf(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         @pandas_udf('double', PandasUDFType.GROUPED_AGG)
         def avg(v):
             return v.mean()
@@ -70,8 +67,6 @@ def avg(v):
 
     @property
     def pandas_agg_sum_udf(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         @pandas_udf('double', PandasUDFType.GROUPED_AGG)
         def sum(v):
             return v.sum()
@@ -80,7 +75,6 @@ def sum(v):
     @property
     def pandas_agg_weighted_mean_udf(self):
         import numpy as np
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
 
         @pandas_udf('double', PandasUDFType.GROUPED_AGG)
         def weighted_mean(v, w):
@@ -88,8 +82,6 @@ def weighted_mean(v, w):
         return weighted_mean
 
     def test_manual(self):
-        from pyspark.sql.functions import pandas_udf, array
-
         df = self.data
         sum_udf = self.pandas_agg_sum_udf
         mean_udf = self.pandas_agg_mean_udf
@@ -118,8 +110,6 @@ def test_manual(self):
         self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
 
     def test_basic(self):
-        from pyspark.sql.functions import col, lit, mean
-
         df = self.data
         weighted_mean_udf = self.pandas_agg_weighted_mean_udf
 
@@ -150,9 +140,6 @@ def test_basic(self):
         self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
 
     def test_unsupported_types(self):
-        from pyspark.sql.types import DoubleType, MapType
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
                 pandas_udf(
@@ -173,8 +160,6 @@ def mean_and_std_udf(v):
                     return {v.mean(): v.std()}
 
     def test_alias(self):
-        from pyspark.sql.functions import mean
-
         df = self.data
         mean_udf = self.pandas_agg_mean_udf
 
@@ -187,8 +172,6 @@ def test_mixed_sql(self):
         """
         Test mixing group aggregate pandas UDF with sql expression.
         """
-        from pyspark.sql.functions import sum
-
         df = self.data
         sum_udf = self.pandas_agg_sum_udf
 
@@ -225,8 +208,6 @@ def test_mixed_udfs(self):
         """
         Test mixing group aggregate pandas UDF with python UDF and scalar pandas UDF.
         """
-        from pyspark.sql.functions import sum
-
         df = self.data
         plus_one = self.python_plus_one
         plus_two = self.pandas_scalar_plus_two
@@ -292,8 +273,6 @@ def test_multiple_udfs(self):
         """
         Test multiple group aggregate pandas UDFs in one agg function.
         """
-        from pyspark.sql.functions import sum, mean
-
         df = self.data
         mean_udf = self.pandas_agg_mean_udf
         sum_udf = self.pandas_agg_sum_udf
@@ -315,8 +294,6 @@ def test_multiple_udfs(self):
         self.assertPandasEqual(expected1, result1)
 
     def test_complex_groupby(self):
-        from pyspark.sql.functions import sum
-
         df = self.data
         sum_udf = self.pandas_agg_sum_udf
         plus_one = self.python_plus_one
@@ -359,8 +336,6 @@ def test_complex_groupby(self):
         self.assertPandasEqual(expected7.toPandas(), result7.toPandas())
 
     def test_complex_expressions(self):
-        from pyspark.sql.functions import col, sum
-
         df = self.data
         plus_one = self.python_plus_one
         plus_two = self.pandas_scalar_plus_two
@@ -434,7 +409,6 @@ def test_complex_expressions(self):
         self.assertPandasEqual(expected3, result3)
 
     def test_retain_group_columns(self):
-        from pyspark.sql.functions import sum
         with self.sql_conf({"spark.sql.retainGroupColumns": False}):
             df = self.data
             sum_udf = self.pandas_agg_sum_udf
@@ -444,8 +418,6 @@ def test_retain_group_columns(self):
             self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
 
     def test_array_type(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         df = self.data
 
         array_udf = pandas_udf(lambda x: [1.0, 2.0], 'array<double>', PandasUDFType.GROUPED_AGG)
@@ -453,8 +425,6 @@ def test_array_type(self):
         self.assertEquals(result1.first()['v2'], [1.0, 2.0])
 
     def test_invalid_args(self):
-        from pyspark.sql.functions import mean
-
         df = self.data
         plus_one = self.python_plus_one
         mean_udf = self.pandas_agg_mean_udf
@@ -478,9 +448,6 @@ def test_invalid_args(self):
                 df.groupby(df.id).agg(mean_udf(df.v), mean(df.v)).collect()
 
     def test_register_vectorized_udf_basic(self):
-        from pyspark.sql.functions import pandas_udf
-        from pyspark.rdd import PythonEvalType
-
         sum_pandas_udf = pandas_udf(
             lambda v: v.sum(), "integer", PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF)
 
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
index a12c608dff9d..80e70349b78d 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
@@ -18,7 +18,12 @@
 import datetime
 import unittest
 
+from collections import OrderedDict
+from decimal import Decimal
+from distutils.version import LooseVersion
+
 from pyspark.sql import Row
+from pyspark.sql.functions import array, explode, col, lit, udf, sum, pandas_udf, PandasUDFType
 from pyspark.sql.types import *
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
@@ -32,16 +37,12 @@ class GroupedMapPandasUDFTests(ReusedSQLTestCase):
 
     @property
     def data(self):
-        from pyspark.sql.functions import array, explode, col, lit
         return self.spark.range(10).toDF('id') \
             .withColumn("vs", array([lit(i) for i in range(20, 30)])) \
             .withColumn("v", explode(col('vs'))).drop('vs')
 
     def test_supported_types(self):
-        from decimal import Decimal
-        from distutils.version import LooseVersion
         import pyarrow as pa
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
 
         values = [
             1, 2, 3,
@@ -131,8 +132,6 @@ def test_supported_types(self):
         self.assertPandasEqual(expected3, result3)
 
     def test_array_type_correct(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType, array, col
-
         df = self.data.withColumn("arr", array(col("id"))).repartition(1, "id")
 
         output_schema = StructType(
@@ -151,8 +150,6 @@ def test_array_type_correct(self):
         self.assertPandasEqual(expected, result)
 
     def test_register_grouped_map_udf(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         foo_udf = pandas_udf(lambda x: x, "id long", PandasUDFType.GROUPED_MAP)
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
@@ -161,7 +158,6 @@ def test_register_grouped_map_udf(self):
                 self.spark.catalog.registerFunction("foo_udf", foo_udf)
 
     def test_decorator(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
         df = self.data
 
         @pandas_udf(
@@ -176,7 +172,6 @@ def foo(pdf):
         self.assertPandasEqual(expected, result)
 
     def test_coerce(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
         df = self.data
 
         foo = pandas_udf(
@@ -191,7 +186,6 @@ def test_coerce(self):
         self.assertPandasEqual(expected, result)
 
     def test_complex_groupby(self):
-        from pyspark.sql.functions import pandas_udf, col, PandasUDFType
         df = self.data
 
         @pandas_udf(
@@ -210,7 +204,6 @@ def normalize(pdf):
         self.assertPandasEqual(expected, result)
 
     def test_empty_groupby(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
         df = self.data
 
         @pandas_udf(
@@ -229,7 +222,6 @@ def normalize(pdf):
         self.assertPandasEqual(expected, result)
 
     def test_datatype_string(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
         df = self.data
 
         foo_udf = pandas_udf(
@@ -243,8 +235,6 @@ def test_datatype_string(self):
         self.assertPandasEqual(expected, result)
 
     def test_wrong_return_type(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
                     NotImplementedError,
@@ -255,7 +245,6 @@ def test_wrong_return_type(self):
                     PandasUDFType.GROUPED_MAP)
 
     def test_wrong_args(self):
-        from pyspark.sql.functions import udf, pandas_udf, sum, PandasUDFType
         df = self.data
 
         with QuietTest(self.sc):
@@ -277,9 +266,7 @@ def test_wrong_args(self):
                     pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
 
     def test_unsupported_types(self):
-        from distutils.version import LooseVersion
         import pyarrow as pa
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
 
         common_err_msg = 'Invalid returnType.*grouped map Pandas UDF.*'
         unsupported_types = [
@@ -300,7 +287,6 @@ def test_unsupported_types(self):
 
     # Regression test for SPARK-23314
     def test_timestamp_dst(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
         # Daylight saving time for Los Angeles for 2015 is Sun, Nov 1 at 2:00 am
         dt = [datetime.datetime(2015, 11, 1, 0, 30),
               datetime.datetime(2015, 11, 1, 1, 30),
@@ -311,12 +297,12 @@ def test_timestamp_dst(self):
         self.assertPandasEqual(df.toPandas(), result.toPandas())
 
     def test_udf_with_key(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
+        import numpy as np
+
         df = self.data
         pdf = df.toPandas()
 
         def foo1(key, pdf):
-            import numpy as np
             assert type(key) == tuple
             assert type(key[0]) == np.int64
 
@@ -326,7 +312,6 @@ def foo1(key, pdf):
                               v4=pdf.v * pdf.id.mean())
 
         def foo2(key, pdf):
-            import numpy as np
             assert type(key) == tuple
             assert type(key[0]) == np.int64
             assert type(key[1]) == np.int32
@@ -385,9 +370,7 @@ def foo3(key, pdf):
         self.assertPandasEqual(expected4, result4)
 
     def test_column_order(self):
-        from collections import OrderedDict
         import pandas as pd
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
 
         # Helper function to set column names from a list
         def rename_pdf(pdf, names):
@@ -468,7 +451,6 @@ def invalid_positional_types(pdf):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(Exception, "KeyError: 'id'"):
                 grouped_df.apply(column_name_typo).collect()
-            from distutils.version import LooseVersion
             import pyarrow as pa
             if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
                 # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
@@ -480,7 +462,6 @@ def invalid_positional_types(pdf):
 
     def test_positional_assignment_conf(self):
         import pandas as pd
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
 
         with self.sql_conf({
                 "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName": False}):
@@ -496,9 +477,7 @@ def foo(_):
                 self.assertEqual(r.b, 1)
 
     def test_self_join_with_pandas(self):
-        import pyspark.sql.functions as F
-
-        @F.pandas_udf('key long, col string', F.PandasUDFType.GROUPED_MAP)
+        @pandas_udf('key long, col string', PandasUDFType.GROUPED_MAP)
         def dummy_pandas_udf(df):
             return df[['key', 'col']]
 
@@ -508,12 +487,11 @@ def dummy_pandas_udf(df):
 
         # this was throwing an AnalysisException before SPARK-24208
         res = df_with_pandas.alias('temp0').join(df_with_pandas.alias('temp1'),
-                                                 F.col('temp0.key') == F.col('temp1.key'))
+                                                 col('temp0.key') == col('temp1.key'))
         self.assertEquals(res.count(), 5)
 
     def test_mixed_scalar_udfs_followed_by_grouby_apply(self):
         import pandas as pd
-        from pyspark.sql.functions import udf, pandas_udf, PandasUDFType
 
         df = self.spark.range(0, 10).toDF('v1')
         df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1'])) \
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 2f585a372598..6a6865a9fb16 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -16,12 +16,20 @@
 #
 import datetime
 import os
+import random
 import shutil
 import sys
 import tempfile
 import time
 import unittest
 
+from datetime import date, datetime
+from decimal import Decimal
+from distutils.version import LooseVersion
+
+from pyspark.rdd import PythonEvalType
+from pyspark.sql import Column
+from pyspark.sql.functions import array, col, expr, lit, sum, udf, pandas_udf
 from pyspark.sql.types import Row
 from pyspark.sql.types import *
 from pyspark.sql.utils import AnalysisException
@@ -59,18 +67,16 @@ def tearDownClass(cls):
 
     @property
     def nondeterministic_vectorized_udf(self):
-        from pyspark.sql.functions import pandas_udf
+        import pandas as pd
+        import numpy as np
 
         @pandas_udf('double')
         def random_udf(v):
-            import pandas as pd
-            import numpy as np
             return pd.Series(np.random.random(len(v)))
         random_udf = random_udf.asNondeterministic()
         return random_udf
 
     def test_pandas_udf_tokenize(self):
-        from pyspark.sql.functions import pandas_udf
         tokenize = pandas_udf(lambda s: s.apply(lambda str: str.split(' ')),
                               ArrayType(StringType()))
         self.assertEqual(tokenize.returnType, ArrayType(StringType()))
@@ -79,7 +85,6 @@ def test_pandas_udf_tokenize(self):
         self.assertEqual([Row(hi=[u'hi', u'boo']), Row(hi=[u'bye', u'boo'])], result.collect())
 
     def test_pandas_udf_nested_arrays(self):
-        from pyspark.sql.functions import pandas_udf
         tokenize = pandas_udf(lambda s: s.apply(lambda str: [str.split(' ')]),
                               ArrayType(ArrayType(StringType())))
         self.assertEqual(tokenize.returnType, ArrayType(ArrayType(StringType())))
@@ -88,7 +93,6 @@ def test_pandas_udf_nested_arrays(self):
         self.assertEqual([Row(hi=[[u'hi', u'boo']]), Row(hi=[[u'bye', u'boo']])], result.collect())
 
     def test_vectorized_udf_basic(self):
-        from pyspark.sql.functions import pandas_udf, col, array
         df = self.spark.range(10).select(
             col('id').cast('string').alias('str'),
             col('id').cast('int').alias('int'),
@@ -114,9 +118,6 @@ def test_vectorized_udf_basic(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_register_nondeterministic_vectorized_udf_basic(self):
-        from pyspark.sql.functions import pandas_udf
-        from pyspark.rdd import PythonEvalType
-        import random
         random_pandas_udf = pandas_udf(
             lambda x: random.randint(6, 6) + x, IntegerType()).asNondeterministic()
         self.assertEqual(random_pandas_udf.deterministic, False)
@@ -129,7 +130,6 @@ def test_register_nondeterministic_vectorized_udf_basic(self):
         self.assertEqual(row[0], 7)
 
     def test_vectorized_udf_null_boolean(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [(True,), (True,), (None,), (False,)]
         schema = StructType().add("bool", BooleanType())
         df = self.spark.createDataFrame(data, schema)
@@ -138,7 +138,6 @@ def test_vectorized_udf_null_boolean(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_byte(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [(None,), (2,), (3,), (4,)]
         schema = StructType().add("byte", ByteType())
         df = self.spark.createDataFrame(data, schema)
@@ -147,7 +146,6 @@ def test_vectorized_udf_null_byte(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_short(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [(None,), (2,), (3,), (4,)]
         schema = StructType().add("short", ShortType())
         df = self.spark.createDataFrame(data, schema)
@@ -156,7 +154,6 @@ def test_vectorized_udf_null_short(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_int(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [(None,), (2,), (3,), (4,)]
         schema = StructType().add("int", IntegerType())
         df = self.spark.createDataFrame(data, schema)
@@ -165,7 +162,6 @@ def test_vectorized_udf_null_int(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_long(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [(None,), (2,), (3,), (4,)]
         schema = StructType().add("long", LongType())
         df = self.spark.createDataFrame(data, schema)
@@ -174,7 +170,6 @@ def test_vectorized_udf_null_long(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_float(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [(3.0,), (5.0,), (-1.0,), (None,)]
         schema = StructType().add("float", FloatType())
         df = self.spark.createDataFrame(data, schema)
@@ -183,7 +178,6 @@ def test_vectorized_udf_null_float(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_double(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [(3.0,), (5.0,), (-1.0,), (None,)]
         schema = StructType().add("double", DoubleType())
         df = self.spark.createDataFrame(data, schema)
@@ -192,8 +186,6 @@ def test_vectorized_udf_null_double(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_decimal(self):
-        from decimal import Decimal
-        from pyspark.sql.functions import pandas_udf, col
         data = [(Decimal(3.0),), (Decimal(5.0),), (Decimal(-1.0),), (None,)]
         schema = StructType().add("decimal", DecimalType(38, 18))
         df = self.spark.createDataFrame(data, schema)
@@ -202,7 +194,6 @@ def test_vectorized_udf_null_decimal(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_string(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [("foo",), (None,), ("bar",), ("bar",)]
         schema = StructType().add("str", StringType())
         df = self.spark.createDataFrame(data, schema)
@@ -211,7 +202,6 @@ def test_vectorized_udf_null_string(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_string_in_udf(self):
-        from pyspark.sql.functions import pandas_udf, col
         import pandas as pd
         df = self.spark.range(10)
         str_f = pandas_udf(lambda x: pd.Series(map(str, x)), StringType())
@@ -220,7 +210,6 @@ def test_vectorized_udf_string_in_udf(self):
         self.assertEquals(expected.collect(), actual.collect())
 
     def test_vectorized_udf_datatype_string(self):
-        from pyspark.sql.functions import pandas_udf, col
         df = self.spark.range(10).select(
             col('id').cast('string').alias('str'),
             col('id').cast('int').alias('int'),
@@ -244,9 +233,8 @@ def test_vectorized_udf_datatype_string(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_binary(self):
-        from distutils.version import LooseVersion
         import pyarrow as pa
-        from pyspark.sql.functions import pandas_udf, col
+
         if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
             with QuietTest(self.sc):
                 with self.assertRaisesRegexp(
@@ -262,7 +250,6 @@ def test_vectorized_udf_null_binary(self):
             self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_array_type(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [([1, 2],), ([3, 4],)]
         array_schema = StructType([StructField("array", ArrayType(IntegerType()))])
         df = self.spark.createDataFrame(data, schema=array_schema)
@@ -271,7 +258,6 @@ def test_vectorized_udf_array_type(self):
         self.assertEquals(df.collect(), result.collect())
 
     def test_vectorized_udf_null_array(self):
-        from pyspark.sql.functions import pandas_udf, col
         data = [([1, 2],), (None,), (None,), ([3, 4],), (None,)]
         array_schema = StructType([StructField("array", ArrayType(IntegerType()))])
         df = self.spark.createDataFrame(data, schema=array_schema)
@@ -280,7 +266,6 @@ def test_vectorized_udf_null_array(self):
         self.assertEquals(df.collect(), result.collect())
 
     def test_vectorized_udf_complex(self):
-        from pyspark.sql.functions import pandas_udf, col, expr
         df = self.spark.range(10).select(
             col('id').cast('int').alias('a'),
             col('id').cast('int').alias('b'),
@@ -293,7 +278,6 @@ def test_vectorized_udf_complex(self):
         self.assertEquals(expected.collect(), res.collect())
 
     def test_vectorized_udf_exception(self):
-        from pyspark.sql.functions import pandas_udf, col
         df = self.spark.range(10)
         raise_exception = pandas_udf(lambda x: x * (1 / 0), LongType())
         with QuietTest(self.sc):
@@ -301,8 +285,8 @@ def test_vectorized_udf_exception(self):
                 df.select(raise_exception(col('id'))).collect()
 
     def test_vectorized_udf_invalid_length(self):
-        from pyspark.sql.functions import pandas_udf, col
         import pandas as pd
+
         df = self.spark.range(10)
         raise_exception = pandas_udf(lambda _: pd.Series(1), LongType())
         with QuietTest(self.sc):
@@ -312,7 +296,6 @@ def test_vectorized_udf_invalid_length(self):
                 df.select(raise_exception(col('id'))).collect()
 
     def test_vectorized_udf_chained(self):
-        from pyspark.sql.functions import pandas_udf, col
         df = self.spark.range(10)
         f = pandas_udf(lambda x: x + 1, LongType())
         g = pandas_udf(lambda x: x - 1, LongType())
@@ -320,7 +303,6 @@ def test_vectorized_udf_chained(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_wrong_return_type(self):
-        from pyspark.sql.functions import pandas_udf
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
                     NotImplementedError,
@@ -328,7 +310,6 @@ def test_vectorized_udf_wrong_return_type(self):
                 pandas_udf(lambda x: x * 1.0, MapType(LongType(), LongType()))
 
     def test_vectorized_udf_return_scalar(self):
-        from pyspark.sql.functions import pandas_udf, col
         df = self.spark.range(10)
         f = pandas_udf(lambda x: 1.0, DoubleType())
         with QuietTest(self.sc):
@@ -336,7 +317,6 @@ def test_vectorized_udf_return_scalar(self):
                 df.select(f(col('id'))).collect()
 
     def test_vectorized_udf_decorator(self):
-        from pyspark.sql.functions import pandas_udf, col
         df = self.spark.range(10)
 
         @pandas_udf(returnType=LongType())
@@ -346,21 +326,18 @@ def identity(x):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_empty_partition(self):
-        from pyspark.sql.functions import pandas_udf, col
         df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))
         f = pandas_udf(lambda x: x, LongType())
         res = df.select(f(col('id')))
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_varargs(self):
-        from pyspark.sql.functions import pandas_udf, col
         df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))
         f = pandas_udf(lambda *v: v[0], LongType())
         res = df.select(f(col('id')))
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_unsupported_types(self):
-        from pyspark.sql.functions import pandas_udf
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
                     NotImplementedError,
@@ -368,8 +345,6 @@ def test_vectorized_udf_unsupported_types(self):
                 pandas_udf(lambda x: x, MapType(StringType(), IntegerType()))
 
     def test_vectorized_udf_dates(self):
-        from pyspark.sql.functions import pandas_udf, col
-        from datetime import date
         schema = StructType().add("idx", LongType()).add("date", DateType())
         data = [(0, date(1969, 1, 1),),
                 (1, date(2012, 2, 2),),
@@ -405,8 +380,6 @@ def check_data(idx, date, date_copy):
             self.assertIsNone(result[i][3])  # "check_data" col
 
     def test_vectorized_udf_timestamps(self):
-        from pyspark.sql.functions import pandas_udf, col
-        from datetime import datetime
         schema = StructType([
             StructField("idx", LongType(), True),
             StructField("timestamp", TimestampType(), True)])
@@ -447,8 +420,8 @@ def check_data(idx, timestamp, timestamp_copy):
             self.assertIsNone(result[i][3])  # "check_data" col
 
     def test_vectorized_udf_return_timestamp_tz(self):
-        from pyspark.sql.functions import pandas_udf, col
         import pandas as pd
+
         df = self.spark.range(10)
 
         @pandas_udf(returnType=TimestampType())
@@ -465,8 +438,8 @@ def gen_timestamps(id):
             self.assertEquals(expected, ts)
 
     def test_vectorized_udf_check_config(self):
-        from pyspark.sql.functions import pandas_udf, col
         import pandas as pd
+
         with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 3}):
             df = self.spark.range(10, numPartitions=1)
 
@@ -479,9 +452,8 @@ def check_records_per_batch(x):
                 self.assertTrue(r <= 3)
 
     def test_vectorized_udf_timestamps_respect_session_timezone(self):
-        from pyspark.sql.functions import pandas_udf, col
-        from datetime import datetime
         import pandas as pd
+
         schema = StructType([
             StructField("idx", LongType(), True),
             StructField("timestamp", TimestampType(), True)])
@@ -519,8 +491,6 @@ def test_vectorized_udf_timestamps_respect_session_timezone(self):
 
     def test_nondeterministic_vectorized_udf(self):
         # Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
-        from pyspark.sql.functions import pandas_udf, col
-
         @pandas_udf('double')
         def plus_ten(v):
             return v + 10
@@ -533,8 +503,6 @@ def plus_ten(v):
         self.assertTrue(result1['plus_ten(rand)'].equals(result1['rand'] + 10))
 
     def test_nondeterministic_vectorized_udf_in_aggregate(self):
-        from pyspark.sql.functions import sum
-
         df = self.spark.range(10)
         random_udf = self.nondeterministic_vectorized_udf
 
@@ -545,8 +513,6 @@ def test_nondeterministic_vectorized_udf_in_aggregate(self):
                 df.agg(sum(random_udf(df.id))).collect()
 
     def test_register_vectorized_udf_basic(self):
-        from pyspark.rdd import PythonEvalType
-        from pyspark.sql.functions import pandas_udf, col, expr
         df = self.spark.range(10).select(
             col('id').cast('int').alias('a'),
             col('id').cast('int').alias('b'))
@@ -563,11 +529,10 @@ def test_register_vectorized_udf_basic(self):
 
     # Regression test for SPARK-23314
     def test_timestamp_dst(self):
-        from pyspark.sql.functions import pandas_udf
         # Daylight saving time for Los Angeles for 2015 is Sun, Nov 1 at 2:00 am
-        dt = [datetime.datetime(2015, 11, 1, 0, 30),
-              datetime.datetime(2015, 11, 1, 1, 30),
-              datetime.datetime(2015, 11, 1, 2, 30)]
+        dt = [datetime(2015, 11, 1, 0, 30),
+              datetime(2015, 11, 1, 1, 30),
+              datetime(2015, 11, 1, 2, 30)]
         df = self.spark.createDataFrame(dt, 'timestamp').toDF('time')
         foo_udf = pandas_udf(lambda x: x, 'timestamp')
         result = df.withColumn('time', foo_udf(df.time))
@@ -593,7 +558,6 @@ def test_type_annotation(self):
 
     def test_mixed_udf(self):
         import pandas as pd
-        from pyspark.sql.functions import col, udf, pandas_udf
 
         df = self.spark.range(0, 1).toDF('v')
 
@@ -696,8 +660,6 @@ def f4(x):
 
     def test_mixed_udf_and_sql(self):
         import pandas as pd
-        from pyspark.sql import Column
-        from pyspark.sql.functions import udf, pandas_udf
 
         df = self.spark.range(0, 1).toDF('v')
 
@@ -758,7 +720,6 @@ def test_datasource_with_udf(self):
         # This needs to a separate test because Arrow dependency is optional
         import pandas as pd
         import numpy as np
-        from pyspark.sql.functions import pandas_udf, lit, col
 
         path = tempfile.mkdtemp()
         shutil.rmtree(path)
diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/test_pandas_udf_window.py
index f0e6d2696df6..0a7a19c1c081 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_window.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_window.py
@@ -18,6 +18,8 @@
 import unittest
 
 from pyspark.sql.utils import AnalysisException
+from pyspark.sql.functions import array, explode, col, lit, mean, min, max, rank, \
+    udf, pandas_udf, PandasUDFType
 from pyspark.sql.window import Window
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
@@ -30,7 +32,6 @@
 class WindowPandasUDFTests(ReusedSQLTestCase):
     @property
     def data(self):
-        from pyspark.sql.functions import array, explode, col, lit
         return self.spark.range(10).toDF('id') \
             .withColumn("vs", array([lit(i * 1.0) + col('id') for i in range(20, 30)])) \
             .withColumn("v", explode(col('vs'))) \
@@ -39,18 +40,14 @@ def data(self):
 
     @property
     def python_plus_one(self):
-        from pyspark.sql.functions import udf
         return udf(lambda v: v + 1, 'double')
 
     @property
     def pandas_scalar_time_two(self):
-        from pyspark.sql.functions import pandas_udf
         return pandas_udf(lambda v: v * 2, 'double')
 
     @property
     def pandas_agg_mean_udf(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         @pandas_udf('double', PandasUDFType.GROUPED_AGG)
         def avg(v):
             return v.mean()
@@ -58,8 +55,6 @@ def avg(v):
 
     @property
     def pandas_agg_max_udf(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         @pandas_udf('double', PandasUDFType.GROUPED_AGG)
         def max(v):
             return v.max()
@@ -67,8 +62,6 @@ def max(v):
 
     @property
     def pandas_agg_min_udf(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         @pandas_udf('double', PandasUDFType.GROUPED_AGG)
         def min(v):
             return v.min()
@@ -88,8 +81,6 @@ def unpartitioned_window(self):
         return Window.partitionBy()
 
     def test_simple(self):
-        from pyspark.sql.functions import mean
-
         df = self.data
         w = self.unbounded_window
 
@@ -105,8 +96,6 @@ def test_simple(self):
         self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
 
     def test_multiple_udfs(self):
-        from pyspark.sql.functions import max, min, mean
-
         df = self.data
         w = self.unbounded_window
 
@@ -121,8 +110,6 @@ def test_multiple_udfs(self):
         self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
 
     def test_replace_existing(self):
-        from pyspark.sql.functions import mean
-
         df = self.data
         w = self.unbounded_window
 
@@ -132,8 +119,6 @@ def test_replace_existing(self):
         self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
 
     def test_mixed_sql(self):
-        from pyspark.sql.functions import mean
-
         df = self.data
         w = self.unbounded_window
         mean_udf = self.pandas_agg_mean_udf
@@ -144,8 +129,6 @@ def test_mixed_sql(self):
         self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
 
     def test_mixed_udf(self):
-        from pyspark.sql.functions import mean
-
         df = self.data
         w = self.unbounded_window
 
@@ -171,8 +154,6 @@ def test_mixed_udf(self):
         self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
 
     def test_without_partitionBy(self):
-        from pyspark.sql.functions import mean
-
         df = self.data
         w = self.unpartitioned_window
         mean_udf = self.pandas_agg_mean_udf
@@ -187,8 +168,6 @@ def test_without_partitionBy(self):
         self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
 
     def test_mixed_sql_and_udf(self):
-        from pyspark.sql.functions import max, min, rank, col
-
         df = self.data
         w = self.unbounded_window
         ow = self.ordered_window
@@ -221,8 +200,6 @@ def test_mixed_sql_and_udf(self):
         self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
 
     def test_array_type(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         df = self.data
         w = self.unbounded_window
 
@@ -231,8 +208,6 @@ def test_array_type(self):
         self.assertEquals(result1.first()['v2'], [1.0, 2.0])
 
     def test_invalid_args(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         df = self.data
         w = self.unbounded_window
         ow = self.ordered_window

From 9c481c7a6b8019c569a08b2645bf9c19ff84a9e5 Mon Sep 17 00:00:00 2001
From: jasonwayne <wuwenjie0102@gmail.com>
Date: Fri, 14 Dec 2018 10:47:58 +0800
Subject: [PATCH 0133/1072] [SPARK-26360] remove redundant validateQuery call

## What changes were proposed in this pull request?
remove a redundant `KafkaWriter.validateQuery` call in `KafkaSourceProvider `

## How was this patch tested?
Just removing duplicate codes, so I just build and run unit tests.

Closes #23309 from JasonWayne/SPARK-26360.

Authored-by: jasonwayne <wuwenjie0102@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/kafka010/KafkaSourceProvider.scala     | 2 --
 1 file changed, 2 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 6a0c2088ac3d..4b8b5c0019b4 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -266,8 +266,6 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     // We convert the options argument from V2 -> Java map -> scala mutable -> scala immutable.
     val producerParams = kafkaParamsForProducer(options.asMap.asScala.toMap)
 
-    KafkaWriter.validateQuery(schema.toAttributes, producerParams, topic)
-
     new KafkaStreamingWriteSupport(topic, producerParams, schema)
   }
 

From 93139afb072d14870fb4eab01cb11df28eb0f8dd Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 14 Dec 2018 10:50:48 +0800
Subject: [PATCH 0134/1072] [SPARK-26337][SQL][TEST] Add benchmark for
 LongToUnsafeRowMap

## What changes were proposed in this pull request?

Regarding the performance issue of SPARK-26155, it reports the issue on TPC-DS. I think it is better to add a benchmark for `LongToUnsafeRowMap` which is the root cause of performance regression.

It can be easier to show performance difference between different metric implementations in `LongToUnsafeRowMap`.

## How was this patch tested?

Manually run added benchmark.

Closes #23284 from viirya/SPARK-26337.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 ...HashedRelationMetricsBenchmark-results.txt | 11 +++
 .../HashedRelationMetricsBenchmark.scala      | 84 +++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala

diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
new file mode 100644
index 000000000000..338244ad542f
--- /dev/null
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+LongToUnsafeRowMap metrics
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_181-b13 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
+LongToUnsafeRowMap metrics:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+LongToUnsafeRowMap                             234 /  315          2.1         467.3       1.0X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
new file mode 100644
index 000000000000..bdf753debe62
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.SparkConf
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
+import org.apache.spark.memory.{StaticMemoryManager, TaskMemoryManager}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{BoundReference, UnsafeProjection}
+import org.apache.spark.sql.execution.joins.LongToUnsafeRowMap
+import org.apache.spark.sql.types.LongType
+
+/**
+ * Benchmark to measure metrics performance at HashedRelation.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/HashedRelationMetricsBenchmark-results.txt".
+ * }}}
+ */
+object HashedRelationMetricsBenchmark extends SqlBasedBenchmark {
+
+  def benchmarkLongToUnsafeRowMapMetrics(numRows: Int): Unit = {
+    runBenchmark("LongToUnsafeRowMap metrics") {
+      val benchmark = new Benchmark("LongToUnsafeRowMap metrics", numRows, output = output)
+      benchmark.addCase("LongToUnsafeRowMap") { iter =>
+        val taskMemoryManager = new TaskMemoryManager(
+          new StaticMemoryManager(
+            new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
+            Long.MaxValue,
+            Long.MaxValue,
+            1),
+          0)
+        val unsafeProj = UnsafeProjection.create(Seq(BoundReference(0, LongType, false)))
+
+        val keys = Range.Long(0, numRows, 1)
+        val map = new LongToUnsafeRowMap(taskMemoryManager, 1)
+        keys.foreach { k =>
+          map.append(k, unsafeProj(InternalRow(k)))
+        }
+        map.optimize()
+
+        val threads = (0 to 100).map { _ =>
+          val thread = new Thread {
+            override def run: Unit = {
+              val row = unsafeProj(InternalRow(0L)).copy()
+              keys.foreach { k =>
+                assert(map.getValue(k, row) eq row)
+                assert(row.getLong(0) == k)
+              }
+            }
+          }
+          thread.start()
+          thread
+        }
+        threads.map(_.join())
+        map.free()
+      }
+      benchmark.run()
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    benchmarkLongToUnsafeRowMapMetrics(500000)
+  }
+}

From 2d8838dccde6d77b4ff1a15fdd6a0d4da2fda8c7 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 13 Dec 2018 20:55:12 -0800
Subject: [PATCH 0135/1072] [SPARK-26368][SQL] Make it clear that
 getOrInferFileFormatSchema doesn't create InMemoryFileIndex

## What changes were proposed in this pull request?
I was looking at the code and it was a bit difficult to see the life cycle of InMemoryFileIndex passed into getOrInferFileFormatSchema, because once it is passed in, and another time it was created in getOrInferFileFormatSchema. It'd be easier to understand the life cycle if we move the creation of it out.

## How was this patch tested?
This is a simple code move and should be covered by existing tests.

Closes #23317 from rxin/SPARK-26368.

Authored-by: Reynold Xin <rxin@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../execution/datasources/DataSource.scala    | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 795a6d0b6b04..fefff68c4ba8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -122,21 +122,14 @@ case class DataSource(
    *     be any further inference in any triggers.
    *
    * @param format the file format object for this DataSource
-   * @param fileIndex optional [[InMemoryFileIndex]] for getting partition schema and file list
+   * @param getFileIndex [[InMemoryFileIndex]] for getting partition schema and file list
    * @return A pair of the data schema (excluding partition columns) and the schema of the partition
    *         columns.
    */
   private def getOrInferFileFormatSchema(
       format: FileFormat,
-      fileIndex: Option[InMemoryFileIndex] = None): (StructType, StructType) = {
-    // The operations below are expensive therefore try not to do them if we don't need to, e.g.,
-    // in streaming mode, we have already inferred and registered partition columns, we will
-    // never have to materialize the lazy val below
-    lazy val tempFileIndex = fileIndex.getOrElse {
-      val globbedPaths =
-        checkAndGlobPathIfNecessary(checkEmptyGlobPath = false, checkFilesExist = false)
-      createInMemoryFileIndex(globbedPaths)
-    }
+      getFileIndex: () => InMemoryFileIndex): (StructType, StructType) = {
+    lazy val tempFileIndex = getFileIndex()
 
     val partitionSchema = if (partitionColumns.isEmpty) {
       // Try to infer partitioning, because no DataSource in the read path provides the partitioning
@@ -236,7 +229,15 @@ case class DataSource(
               "you may be able to create a static DataFrame on that directory with " +
               "'spark.read.load(directory)' and infer schema from it.")
         }
-        val (dataSchema, partitionSchema) = getOrInferFileFormatSchema(format)
+
+        val (dataSchema, partitionSchema) = getOrInferFileFormatSchema(format, () => {
+          // The operations below are expensive therefore try not to do them if we don't need to,
+          // e.g., in streaming mode, we have already inferred and registered partition columns,
+          // we will never have to materialize the lazy val below
+          val globbedPaths =
+            checkAndGlobPathIfNecessary(checkEmptyGlobPath = false, checkFilesExist = false)
+          createInMemoryFileIndex(globbedPaths)
+        })
         SourceInfo(
           s"FileSource[$path]",
           StructType(dataSchema ++ partitionSchema),
@@ -370,7 +371,7 @@ case class DataSource(
         } else {
           val index = createInMemoryFileIndex(globbedPaths)
           val (resultDataSchema, resultPartitionSchema) =
-            getOrInferFileFormatSchema(format, Some(index))
+            getOrInferFileFormatSchema(format, () => index)
           (index, resultDataSchema, resultPartitionSchema)
         }
 

From 3dda58af2b7f42beab736d856bf17b4d35c8866c Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Sat, 15 Dec 2018 00:23:28 +0800
Subject: [PATCH 0136/1072] [SPARK-26370][SQL] Fix resolution of higher-order
 function for the same identifier.

## What changes were proposed in this pull request?

When using a higher-order function with the same variable name as the existing columns in `Filter` or something which uses `Analyzer.resolveExpressionBottomUp` during the resolution, e.g.,:

```scala
val df = Seq(
  (Seq(1, 9, 8, 7), 1, 2),
  (Seq(5, 9, 7), 2, 2),
  (Seq.empty, 3, 2),
  (null, 4, 2)
).toDF("i", "x", "d")

checkAnswer(df.filter("exists(i, x -> x % d == 0)"),
  Seq(Row(Seq(1, 9, 8, 7), 1, 2)))
checkAnswer(df.select("x").filter("exists(i, x -> x % d == 0)"),
  Seq(Row(1)))
```

the following exception happens:

```
java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.BoundReference cannot be cast to org.apache.spark.sql.catalyst.expressions.NamedExpression
  at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237)
  at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
  at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
  at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
  at scala.collection.TraversableLike.map(TraversableLike.scala:237)
  at scala.collection.TraversableLike.map$(TraversableLike.scala:230)
  at scala.collection.AbstractTraversable.map(Traversable.scala:108)
  at org.apache.spark.sql.catalyst.expressions.HigherOrderFunction.$anonfun$functionsForEval$1(higherOrderFunctions.scala:147)
  at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237)
  at scala.collection.immutable.List.foreach(List.scala:392)
  at scala.collection.TraversableLike.map(TraversableLike.scala:237)
  at scala.collection.TraversableLike.map$(TraversableLike.scala:230)
  at scala.collection.immutable.List.map(List.scala:298)
  at org.apache.spark.sql.catalyst.expressions.HigherOrderFunction.functionsForEval(higherOrderFunctions.scala:145)
  at org.apache.spark.sql.catalyst.expressions.HigherOrderFunction.functionsForEval$(higherOrderFunctions.scala:145)
  at org.apache.spark.sql.catalyst.expressions.ArrayExists.functionsForEval$lzycompute(higherOrderFunctions.scala:369)
  at org.apache.spark.sql.catalyst.expressions.ArrayExists.functionsForEval(higherOrderFunctions.scala:369)
  at org.apache.spark.sql.catalyst.expressions.SimpleHigherOrderFunction.functionForEval(higherOrderFunctions.scala:176)
  at org.apache.spark.sql.catalyst.expressions.SimpleHigherOrderFunction.functionForEval$(higherOrderFunctions.scala:176)
  at org.apache.spark.sql.catalyst.expressions.ArrayExists.functionForEval(higherOrderFunctions.scala:369)
  at org.apache.spark.sql.catalyst.expressions.ArrayExists.nullSafeEval(higherOrderFunctions.scala:387)
  at org.apache.spark.sql.catalyst.expressions.SimpleHigherOrderFunction.eval(higherOrderFunctions.scala:190)
  at org.apache.spark.sql.catalyst.expressions.SimpleHigherOrderFunction.eval$(higherOrderFunctions.scala:185)
  at org.apache.spark.sql.catalyst.expressions.ArrayExists.eval(higherOrderFunctions.scala:369)
  at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificPredicate.eval(Unknown Source)
  at org.apache.spark.sql.execution.FilterExec.$anonfun$doExecute$3(basicPhysicalOperators.scala:216)
  at org.apache.spark.sql.execution.FilterExec.$anonfun$doExecute$3$adapted(basicPhysicalOperators.scala:215)

...
```

because the `UnresolvedAttribute`s in `LambdaFunction` are unexpectedly resolved by the rule.

This pr modified to use a placeholder `UnresolvedNamedLambdaVariable` to prevent unexpected resolution.

## How was this patch tested?

Added a test and modified some tests.

Closes #23320 from ueshin/issues/SPARK-26370/hof_resolution.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/higherOrderFunctions.scala       |  5 ++--
 .../expressions/higherOrderFunctions.scala    | 26 +++++++++++++++++--
 .../sql/catalyst/parser/AstBuilder.scala      |  7 +++--
 .../ResolveLambdaVariablesSuite.scala         | 10 ++++---
 ...ReplaceNullWithFalseInPredicateSuite.scala | 14 +++++-----
 .../parser/ExpressionParserSuite.scala        |  6 +++--
 .../typeCoercion/native/mapZipWith.sql.out    |  4 +--
 .../spark/sql/DataFrameFunctionsSuite.scala   | 20 ++++++++++++++
 8 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index a8a7bbd9f9cd..1cd7f412bb67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -150,13 +150,14 @@ case class ResolveLambdaVariables(conf: SQLConf) extends Rule[LogicalPlan] {
       val lambdaMap = l.arguments.map(v => canonicalizer(v.name) -> v).toMap
       l.mapChildren(resolve(_, parentLambdaMap ++ lambdaMap))
 
-    case u @ UnresolvedAttribute(name +: nestedFields) =>
+    case u @ UnresolvedNamedLambdaVariable(name +: nestedFields) =>
       parentLambdaMap.get(canonicalizer(name)) match {
         case Some(lambda) =>
           nestedFields.foldLeft(lambda: Expression) { (expr, fieldName) =>
             ExtractValue(expr, Literal(fieldName), conf.resolver)
           }
-        case None => u
+        case None =>
+          UnresolvedAttribute(u.nameParts)
       }
 
     case _ =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index a8639d29f964..7141b6e99638 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -22,12 +22,34 @@ import java.util.concurrent.atomic.AtomicReference
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedException}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.array.ByteArrayMethods
 
+/**
+ * A placeholder of lambda variables to prevent unexpected resolution of [[LambdaFunction]].
+ */
+case class UnresolvedNamedLambdaVariable(nameParts: Seq[String])
+  extends LeafExpression with NamedExpression with Unevaluable {
+
+  override def name: String =
+    nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".")
+
+  override def exprId: ExprId = throw new UnresolvedException(this, "exprId")
+  override def dataType: DataType = throw new UnresolvedException(this, "dataType")
+  override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
+  override def qualifier: Seq[String] = throw new UnresolvedException(this, "qualifier")
+  override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute")
+  override def newInstance(): NamedExpression = throw new UnresolvedException(this, "newInstance")
+  override lazy val resolved = false
+
+  override def toString: String = s"lambda '$name"
+
+  override def sql: String = name
+}
+
 /**
  * A named lambda variable.
  */
@@ -79,7 +101,7 @@ case class LambdaFunction(
 
 object LambdaFunction {
   val identity: LambdaFunction = {
-    val id = UnresolvedAttribute.quoted("id")
+    val id = UnresolvedNamedLambdaVariable(Seq("id"))
     LambdaFunction(id, Seq(id))
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 672bffcfc0ca..8959f78b656d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1338,9 +1338,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitLambda(ctx: LambdaContext): Expression = withOrigin(ctx) {
     val arguments = ctx.IDENTIFIER().asScala.map { name =>
-      UnresolvedAttribute.quoted(name.getText)
+      UnresolvedNamedLambdaVariable(UnresolvedAttribute.quoted(name.getText).nameParts)
     }
-    LambdaFunction(expression(ctx.expression), arguments)
+    val function = expression(ctx.expression).transformUp {
+      case a: UnresolvedAttribute => UnresolvedNamedLambdaVariable(a.nameParts)
+    }
+    LambdaFunction(function, arguments)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
index c4171c75ecd0..a5847ba7c522 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
@@ -49,19 +49,21 @@ class ResolveLambdaVariablesSuite extends PlanTest {
     comparePlans(Analyzer.execute(plan(e1)), plan(e2))
   }
 
+  private def lv(s: Symbol) = UnresolvedNamedLambdaVariable(Seq(s.name))
+
   test("resolution - no op") {
     checkExpression(key, key)
   }
 
   test("resolution - simple") {
-    val in = ArrayTransform(values1, LambdaFunction('x.attr + 1, 'x.attr :: Nil))
+    val in = ArrayTransform(values1, LambdaFunction(lv('x) + 1, lv('x) :: Nil))
     val out = ArrayTransform(values1, LambdaFunction(lvInt + 1, lvInt :: Nil))
     checkExpression(in, out)
   }
 
   test("resolution - nested") {
     val in = ArrayTransform(values2, LambdaFunction(
-      ArrayTransform('x.attr, LambdaFunction('x.attr + 1, 'x.attr :: Nil)), 'x.attr :: Nil))
+      ArrayTransform(lv('x), LambdaFunction(lv('x) + 1, lv('x) :: Nil)), lv('x) :: Nil))
     val out = ArrayTransform(values2, LambdaFunction(
       ArrayTransform(lvArray, LambdaFunction(lvInt + 1, lvInt :: Nil)), lvArray :: Nil))
     checkExpression(in, out)
@@ -75,14 +77,14 @@ class ResolveLambdaVariablesSuite extends PlanTest {
 
   test("fail - name collisions") {
     val p = plan(ArrayTransform(values1,
-      LambdaFunction('x.attr + 'X.attr, 'x.attr :: 'X.attr :: Nil)))
+      LambdaFunction(lv('x) + lv('X), lv('x) :: lv('X) :: Nil)))
     val msg = intercept[AnalysisException](Analyzer.execute(p)).getMessage
     assert(msg.contains("arguments should not have names that are semantically the same"))
   }
 
   test("fail - lambda arguments") {
     val p = plan(ArrayTransform(values1,
-      LambdaFunction('x.attr + 'y.attr + 'z.attr, 'x.attr :: 'y.attr :: 'z.attr :: Nil)))
+      LambdaFunction(lv('x) + lv('y) + lv('z), lv('x) :: lv('y) :: lv('z) :: Nil)))
     val msg = intercept[AnalysisException](Analyzer.execute(p)).getMessage
     assert(msg.contains("does not match the number of arguments expected"))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index ee0d04da3e46..748075bfd6a6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or}
+import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
@@ -306,22 +306,24 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testProjection(originalExpr = column, expectedExpr = column)
   }
 
+  private def lv(s: Symbol) = UnresolvedNamedLambdaVariable(Seq(s.name))
+
   test("replace nulls in lambda function of ArrayFilter") {
-    testHigherOrderFunc('a, ArrayFilter, Seq('e))
+    testHigherOrderFunc('a, ArrayFilter, Seq(lv('e)))
   }
 
   test("replace nulls in lambda function of ArrayExists") {
-    testHigherOrderFunc('a, ArrayExists, Seq('e))
+    testHigherOrderFunc('a, ArrayExists, Seq(lv('e)))
   }
 
   test("replace nulls in lambda function of MapFilter") {
-    testHigherOrderFunc('m, MapFilter, Seq('k, 'v))
+    testHigherOrderFunc('m, MapFilter, Seq(lv('k), lv('v)))
   }
 
   test("inability to replace nulls in arbitrary higher-order function") {
     val lambdaFunc = LambdaFunction(
-      function = If('e > 0, Literal(null, BooleanType), TrueLiteral),
-      arguments = Seq[NamedExpression]('e))
+      function = If(lv('e) > 0, Literal(null, BooleanType), TrueLiteral),
+      arguments = Seq[NamedExpression](lv('e)))
     val column = ArrayTransform('a, lambdaFunc)
     testProjection(originalExpr = column, expectedExpr = column)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index b4df22c5b29f..8bcc69d580d8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -246,9 +246,11 @@ class ExpressionParserSuite extends PlanTest {
     intercept("foo(a x)", "extraneous input 'x'")
   }
 
+  private def lv(s: Symbol) = UnresolvedNamedLambdaVariable(Seq(s.name))
+
   test("lambda functions") {
-    assertEqual("x -> x + 1", LambdaFunction('x + 1, Seq('x.attr)))
-    assertEqual("(x, y) -> x + y", LambdaFunction('x + 'y, Seq('x.attr, 'y.attr)))
+    assertEqual("x -> x + 1", LambdaFunction(lv('x) + 1, Seq(lv('x))))
+    assertEqual("(x, y) -> x + y", LambdaFunction(lv('x) + lv('y), Seq(lv('x), lv('y))))
   }
 
   test("window function expressions") {
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
index 35740094ba53..86a578ca013d 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
@@ -85,7 +85,7 @@ FROM various_maps
 struct<>
 -- !query 5 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_zip_with(various_maps.`decimal_map1`, various_maps.`decimal_map2`, lambdafunction(named_struct(NamePlaceholder(), `k`, NamePlaceholder(), `v1`, NamePlaceholder(), `v2`), `k`, `v1`, `v2`))' due to argument data type mismatch: The input to function map_zip_with should have been two maps with compatible key types, but the key types are [decimal(36,0), decimal(36,35)].; line 1 pos 7
+cannot resolve 'map_zip_with(various_maps.`decimal_map1`, various_maps.`decimal_map2`, lambdafunction(named_struct(NamePlaceholder(), k, NamePlaceholder(), v1, NamePlaceholder(), v2), k, v1, v2))' due to argument data type mismatch: The input to function map_zip_with should have been two maps with compatible key types, but the key types are [decimal(36,0), decimal(36,35)].; line 1 pos 7
 
 
 -- !query 6
@@ -113,7 +113,7 @@ FROM various_maps
 struct<>
 -- !query 8 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_zip_with(various_maps.`decimal_map2`, various_maps.`int_map`, lambdafunction(named_struct(NamePlaceholder(), `k`, NamePlaceholder(), `v1`, NamePlaceholder(), `v2`), `k`, `v1`, `v2`))' due to argument data type mismatch: The input to function map_zip_with should have been two maps with compatible key types, but the key types are [decimal(36,35), int].; line 1 pos 7
+cannot resolve 'map_zip_with(various_maps.`decimal_map2`, various_maps.`int_map`, lambdafunction(named_struct(NamePlaceholder(), k, NamePlaceholder(), v1, NamePlaceholder(), v2), k, v1, v2))' due to argument data type mismatch: The input to function map_zip_with should have been two maps with compatible key types, but the key types are [decimal(36,35), int].; line 1 pos 7
 
 
 -- !query 9
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index e6d1a038a591..b7fc9570af91 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -2908,6 +2908,26 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
     }
     assert(ex.getMessage.contains("Cannot use null as map key"))
   }
+
+  test("SPARK-26370: Fix resolution of higher-order function for the same identifier") {
+    val df = Seq(
+      (Seq(1, 9, 8, 7), 1, 2),
+      (Seq(5, 9, 7), 2, 2),
+      (Seq.empty, 3, 2),
+      (null, 4, 2)
+    ).toDF("i", "x", "d")
+
+    checkAnswer(df.selectExpr("x", "exists(i, x -> x % d == 0)"),
+      Seq(
+        Row(1, true),
+        Row(2, false),
+        Row(3, false),
+        Row(4, null)))
+    checkAnswer(df.filter("exists(i, x -> x % d == 0)"),
+      Seq(Row(Seq(1, 9, 8, 7), 1, 2)))
+    checkAnswer(df.select("x").filter("exists(i, x -> x % d == 0)"),
+      Seq(Row(1)))
+  }
 }
 
 object DataFrameFunctionsSuite {

From d25e443eec6efc9172eade6ac11be7b3ff04759d Mon Sep 17 00:00:00 2001
From: CarolinPeng <00244106@zte.intra>
Date: Fri, 14 Dec 2018 14:23:21 -0600
Subject: [PATCH 0137/1072] [MINOR][SQL] Some errors in the notes.

## What changes were proposed in this pull request?

When using ordinals to access linked list, the time cost is O(n).

## How was this patch tested?

Existing tests.

Closes #23280 from CarolinePeng/update_Two.

Authored-by: CarolinPeng <00244106@zte.intra>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/expressions/package.scala     | 2 +-
 .../apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 0083ee64653e..bf18e8bcb52d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -101,7 +101,7 @@ package object expressions  {
       StructType(attrs.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata)))
     }
 
-    // It's possible that `attrs` is a linked list, which can lead to bad O(n^2) loops when
+    // It's possible that `attrs` is a linked list, which can lead to bad O(n) loops when
     // accessing attributes by their ordinals. To avoid this performance penalty, convert the input
     // to an array.
     @transient private lazy val attrsArray = attrs.toArray
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index a520eba001af..3ad2ee692361 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -93,7 +93,7 @@ abstract class LogicalPlan
   /**
    * Optionally resolves the given strings to a [[NamedExpression]] using the input from all child
    * nodes of this LogicalPlan. The attribute is expressed as
-   * as string in the following form: `[scope].AttributeName.[nested].[fields]...`.
+   * string in the following form: `[scope].AttributeName.[nested].[fields]...`.
    */
   def resolveChildren(
       nameParts: Seq[String],

From 1b604c1fd0b9ef17b394818fbd6c546bc01cdd8c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 15 Dec 2018 13:52:07 +0800
Subject: [PATCH 0138/1072] [SPARK-26265][CORE][FOLLOWUP] Put freePage into a
 finally block

## What changes were proposed in this pull request?

Based on the [comment](https://github.com/apache/spark/pull/23272#discussion_r240735509), it seems to be better to put `freePage` into a `finally` block. This patch as a follow-up to do so.

## How was this patch tested?

Existing tests.

Closes #23294 from viirya/SPARK-26265-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/unsafe/map/BytesToBytesMap.java     | 57 ++++++++++---------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index fbba002f1f80..7df8aafb2b67 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -262,36 +262,39 @@ private void advanceToNextPage() {
       // reference to the page to free and free it after releasing the lock of `MapIterator`.
       MemoryBlock pageToFree = null;
 
-      synchronized (this) {
-        int nextIdx = dataPages.indexOf(currentPage) + 1;
-        if (destructive && currentPage != null) {
-          dataPages.remove(currentPage);
-          pageToFree = currentPage;
-          nextIdx --;
-        }
-        if (dataPages.size() > nextIdx) {
-          currentPage = dataPages.get(nextIdx);
-          pageBaseObject = currentPage.getBaseObject();
-          offsetInPage = currentPage.getBaseOffset();
-          recordsInPage = UnsafeAlignedOffset.getSize(pageBaseObject, offsetInPage);
-          offsetInPage += UnsafeAlignedOffset.getUaoSize();
-        } else {
-          currentPage = null;
-          if (reader != null) {
-            handleFailedDelete();
+      try {
+        synchronized (this) {
+          int nextIdx = dataPages.indexOf(currentPage) + 1;
+          if (destructive && currentPage != null) {
+            dataPages.remove(currentPage);
+            pageToFree = currentPage;
+            nextIdx--;
           }
-          try {
-            Closeables.close(reader, /* swallowIOException = */ false);
-            reader = spillWriters.getFirst().getReader(serializerManager);
-            recordsInPage = -1;
-          } catch (IOException e) {
-            // Scala iterator does not handle exception
-            Platform.throwException(e);
+          if (dataPages.size() > nextIdx) {
+            currentPage = dataPages.get(nextIdx);
+            pageBaseObject = currentPage.getBaseObject();
+            offsetInPage = currentPage.getBaseOffset();
+            recordsInPage = UnsafeAlignedOffset.getSize(pageBaseObject, offsetInPage);
+            offsetInPage += UnsafeAlignedOffset.getUaoSize();
+          } else {
+            currentPage = null;
+            if (reader != null) {
+              handleFailedDelete();
+            }
+            try {
+              Closeables.close(reader, /* swallowIOException = */ false);
+              reader = spillWriters.getFirst().getReader(serializerManager);
+              recordsInPage = -1;
+            } catch (IOException e) {
+              // Scala iterator does not handle exception
+              Platform.throwException(e);
+            }
           }
         }
-      }
-      if (pageToFree != null) {
-        freePage(pageToFree);
+      } finally {
+        if (pageToFree != null) {
+          freePage(pageToFree);
+        }
       }
     }
 

From 9ccae0c9e7d1a0a704e8cd7574ba508419e05e30 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Sat, 15 Dec 2018 13:55:24 +0800
Subject: [PATCH 0139/1072] [SPARK-26362][CORE] Remove
 'spark.driver.allowMultipleContexts' to disallow multiple creation of
 SparkContexts

## What changes were proposed in this pull request?

Multiple SparkContexts are discouraged and it has been warning for last 4 years, see SPARK-4180. It could cause arbitrary and mysterious error cases, see SPARK-2243.

Honestly, I didn't even know Spark still allows it, which looks never officially supported, see SPARK-2243.

I believe It should be good timing now to remove this configuration.

## How was this patch tested?

Each doc was manually checked and manually tested:

```
$ ./bin/spark-shell --conf=spark.driver.allowMultipleContexts=true
...
scala> new SparkContext()
org.apache.spark.SparkException: Only one SparkContext should be running in this JVM (see SPARK-2243).The currently running SparkContext was created at:
org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:939)
...
org.apache.spark.SparkContext$.$anonfun$assertNoOtherContextIsRunning$2(SparkContext.scala:2435)
  at scala.Option.foreach(Option.scala:274)
  at org.apache.spark.SparkContext$.assertNoOtherContextIsRunning(SparkContext.scala:2432)
  at org.apache.spark.SparkContext$.markPartiallyConstructed(SparkContext.scala:2509)
  at org.apache.spark.SparkContext.<init>(SparkContext.scala:80)
  at org.apache.spark.SparkContext.<init>(SparkContext.scala:112)
  ... 49 elided
```

Closes #23311 from HyukjinKwon/SPARK-26362.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/SparkContext.scala | 65 +++++++------------
 .../spark/api/java/JavaSparkContext.scala     |  4 +-
 .../org/apache/spark/SparkContextSuite.scala  | 19 +-----
 .../ExternalClusterManagerSuite.scala         |  3 +-
 docs/rdd-programming-guide.md                 |  2 +-
 project/MimaExcludes.scala                    |  4 ++
 python/pyspark/context.py                     |  3 +
 .../execution/ExchangeCoordinatorSuite.scala  |  1 -
 8 files changed, 34 insertions(+), 67 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 696dafda6d1e..09cc346db0ed 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -64,9 +64,8 @@ import org.apache.spark.util.logging.DriverLogger
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
  * cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
  *
- * Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before
- * creating a new one.  This limitation may eventually be removed; see SPARK-2243 for more details.
- *
+ * @note Only one `SparkContext` should be active per JVM. You must `stop()` the
+ *   active `SparkContext` before creating a new one.
  * @param config a Spark Config object describing the application configuration. Any settings in
  *   this config overrides the default configs as well as system properties.
  */
@@ -75,14 +74,10 @@ class SparkContext(config: SparkConf) extends Logging {
   // The call site where this SparkContext was constructed.
   private val creationSite: CallSite = Utils.getCallSite()
 
-  // If true, log warnings instead of throwing exceptions when multiple SparkContexts are active
-  private val allowMultipleContexts: Boolean =
-    config.getBoolean("spark.driver.allowMultipleContexts", false)
-
   // In order to prevent multiple SparkContexts from being active at the same time, mark this
   // context as having started construction.
   // NOTE: this must be placed at the beginning of the SparkContext constructor.
-  SparkContext.markPartiallyConstructed(this, allowMultipleContexts)
+  SparkContext.markPartiallyConstructed(this)
 
   val startTime = System.currentTimeMillis()
 
@@ -2392,7 +2387,7 @@ class SparkContext(config: SparkConf) extends Logging {
   // In order to prevent multiple SparkContexts from being active at the same time, mark this
   // context as having finished construction.
   // NOTE: this must be placed at the end of the SparkContext constructor.
-  SparkContext.setActiveContext(this, allowMultipleContexts)
+  SparkContext.setActiveContext(this)
 }
 
 /**
@@ -2409,18 +2404,18 @@ object SparkContext extends Logging {
   private val SPARK_CONTEXT_CONSTRUCTOR_LOCK = new Object()
 
   /**
-   * The active, fully-constructed SparkContext.  If no SparkContext is active, then this is `null`.
+   * The active, fully-constructed SparkContext. If no SparkContext is active, then this is `null`.
    *
-   * Access to this field is guarded by SPARK_CONTEXT_CONSTRUCTOR_LOCK.
+   * Access to this field is guarded by `SPARK_CONTEXT_CONSTRUCTOR_LOCK`.
    */
   private val activeContext: AtomicReference[SparkContext] =
     new AtomicReference[SparkContext](null)
 
   /**
-   * Points to a partially-constructed SparkContext if some thread is in the SparkContext
+   * Points to a partially-constructed SparkContext if another thread is in the SparkContext
    * constructor, or `None` if no SparkContext is being constructed.
    *
-   * Access to this field is guarded by SPARK_CONTEXT_CONSTRUCTOR_LOCK
+   * Access to this field is guarded by `SPARK_CONTEXT_CONSTRUCTOR_LOCK`.
    */
   private var contextBeingConstructed: Option[SparkContext] = None
 
@@ -2428,24 +2423,16 @@ object SparkContext extends Logging {
    * Called to ensure that no other SparkContext is running in this JVM.
    *
    * Throws an exception if a running context is detected and logs a warning if another thread is
-   * constructing a SparkContext.  This warning is necessary because the current locking scheme
+   * constructing a SparkContext. This warning is necessary because the current locking scheme
    * prevents us from reliably distinguishing between cases where another context is being
    * constructed and cases where another constructor threw an exception.
    */
-  private def assertNoOtherContextIsRunning(
-      sc: SparkContext,
-      allowMultipleContexts: Boolean): Unit = {
+  private def assertNoOtherContextIsRunning(sc: SparkContext): Unit = {
     SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
       Option(activeContext.get()).filter(_ ne sc).foreach { ctx =>
-          val errMsg = "Only one SparkContext may be running in this JVM (see SPARK-2243)." +
-            " To ignore this error, set spark.driver.allowMultipleContexts = true. " +
+          val errMsg = "Only one SparkContext should be running in this JVM (see SPARK-2243)." +
             s"The currently running SparkContext was created at:\n${ctx.creationSite.longForm}"
-          val exception = new SparkException(errMsg)
-          if (allowMultipleContexts) {
-            logWarning("Multiple running SparkContexts detected in the same JVM!", exception)
-          } else {
-            throw exception
-          }
+          throw new SparkException(errMsg)
         }
 
       contextBeingConstructed.filter(_ ne sc).foreach { otherContext =>
@@ -2454,7 +2441,7 @@ object SparkContext extends Logging {
         val otherContextCreationSite =
           Option(otherContext.creationSite).map(_.longForm).getOrElse("unknown location")
         val warnMsg = "Another SparkContext is being constructed (or threw an exception in its" +
-          " constructor).  This may indicate an error, since only one SparkContext may be" +
+          " constructor). This may indicate an error, since only one SparkContext should be" +
           " running in this JVM (see SPARK-2243)." +
           s" The other SparkContext was created at:\n$otherContextCreationSite"
         logWarning(warnMsg)
@@ -2467,8 +2454,6 @@ object SparkContext extends Logging {
    * singleton object. Because we can only have one active SparkContext per JVM,
    * this is useful when applications may wish to share a SparkContext.
    *
-   * @note This function cannot be used to create multiple SparkContext instances
-   * even if multiple contexts are allowed.
    * @param config `SparkConfig` that will be used for initialisation of the `SparkContext`
    * @return current `SparkContext` (or a new one if it wasn't created before the function call)
    */
@@ -2477,7 +2462,7 @@ object SparkContext extends Logging {
     // from assertNoOtherContextIsRunning within setActiveContext
     SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
       if (activeContext.get() == null) {
-        setActiveContext(new SparkContext(config), allowMultipleContexts = false)
+        setActiveContext(new SparkContext(config))
       } else {
         if (config.getAll.nonEmpty) {
           logWarning("Using an existing SparkContext; some configuration may not take effect.")
@@ -2494,14 +2479,12 @@ object SparkContext extends Logging {
    *
    * This method allows not passing a SparkConf (useful if just retrieving).
    *
-   * @note This function cannot be used to create multiple SparkContext instances
-   * even if multiple contexts are allowed.
    * @return current `SparkContext` (or a new one if wasn't created before the function call)
    */
   def getOrCreate(): SparkContext = {
     SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
       if (activeContext.get() == null) {
-        setActiveContext(new SparkContext(), allowMultipleContexts = false)
+        setActiveContext(new SparkContext())
       }
       activeContext.get()
     }
@@ -2516,16 +2499,14 @@ object SparkContext extends Logging {
 
   /**
    * Called at the beginning of the SparkContext constructor to ensure that no SparkContext is
-   * running.  Throws an exception if a running context is detected and logs a warning if another
-   * thread is constructing a SparkContext.  This warning is necessary because the current locking
+   * running. Throws an exception if a running context is detected and logs a warning if another
+   * thread is constructing a SparkContext. This warning is necessary because the current locking
    * scheme prevents us from reliably distinguishing between cases where another context is being
    * constructed and cases where another constructor threw an exception.
    */
-  private[spark] def markPartiallyConstructed(
-      sc: SparkContext,
-      allowMultipleContexts: Boolean): Unit = {
+  private[spark] def markPartiallyConstructed(sc: SparkContext): Unit = {
     SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
-      assertNoOtherContextIsRunning(sc, allowMultipleContexts)
+      assertNoOtherContextIsRunning(sc)
       contextBeingConstructed = Some(sc)
     }
   }
@@ -2534,18 +2515,16 @@ object SparkContext extends Logging {
    * Called at the end of the SparkContext constructor to ensure that no other SparkContext has
    * raced with this constructor and started.
    */
-  private[spark] def setActiveContext(
-      sc: SparkContext,
-      allowMultipleContexts: Boolean): Unit = {
+  private[spark] def setActiveContext(sc: SparkContext): Unit = {
     SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
-      assertNoOtherContextIsRunning(sc, allowMultipleContexts)
+      assertNoOtherContextIsRunning(sc)
       contextBeingConstructed = None
       activeContext.set(sc)
     }
   }
 
   /**
-   * Clears the active SparkContext metadata.  This is called by `SparkContext#stop()`.  It's
+   * Clears the active SparkContext metadata. This is called by `SparkContext#stop()`. It's
    * also called in unit tests to prevent a flood of warnings from test suites that don't / can't
    * properly clean up their SparkContexts.
    */
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 03f259d73e97..2f74d09b3a2b 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -40,8 +40,8 @@ import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD}
  * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns
  * [[org.apache.spark.api.java.JavaRDD]]s and works with Java collections instead of Scala ones.
  *
- * Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before
- * creating a new one.  This limitation may eventually be removed; see SPARK-2243 for more details.
+ * @note Only one `SparkContext` should be active per JVM. You must `stop()` the
+ *   active `SparkContext` before creating a new one.
  */
 class JavaSparkContext(val sc: SparkContext) extends Closeable {
 
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index ec4c7efb5835..66de2f2ac86a 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -44,7 +44,6 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   test("Only one SparkContext may be active at a time") {
     // Regression test for SPARK-4180
     val conf = new SparkConf().setAppName("test").setMaster("local")
-      .set("spark.driver.allowMultipleContexts", "false")
     sc = new SparkContext(conf)
     val envBefore = SparkEnv.get
     // A SparkContext is already running, so we shouldn't be able to create a second one
@@ -58,7 +57,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   }
 
   test("Can still construct a new SparkContext after failing to construct a previous one") {
-    val conf = new SparkConf().set("spark.driver.allowMultipleContexts", "false")
+    val conf = new SparkConf()
     // This is an invalid configuration (no app name or master URL)
     intercept[SparkException] {
       new SparkContext(conf)
@@ -67,18 +66,6 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     sc = new SparkContext(conf.setMaster("local").setAppName("test"))
   }
 
-  test("Check for multiple SparkContexts can be disabled via undocumented debug option") {
-    var secondSparkContext: SparkContext = null
-    try {
-      val conf = new SparkConf().setAppName("test").setMaster("local")
-        .set("spark.driver.allowMultipleContexts", "true")
-      sc = new SparkContext(conf)
-      secondSparkContext = new SparkContext(conf)
-    } finally {
-      Option(secondSparkContext).foreach(_.stop())
-    }
-  }
-
   test("Test getOrCreate") {
     var sc2: SparkContext = null
     SparkContext.clearActiveContext()
@@ -92,10 +79,6 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     assert(sc === sc2)
     assert(sc eq sc2)
 
-    // Try creating second context to confirm that it's still possible, if desired
-    sc2 = new SparkContext(new SparkConf().setAppName("test3").setMaster("local")
-        .set("spark.driver.allowMultipleContexts", "true"))
-
     sc2.stop()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
index 0621c98d4118..30d0966691a3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
@@ -25,8 +25,7 @@ import org.apache.spark.util.AccumulatorV2
 
 class ExternalClusterManagerSuite extends SparkFunSuite with LocalSparkContext {
   test("launch of backend and scheduler") {
-    val conf = new SparkConf().setMaster("myclusterManager").
-        setAppName("testcm").set("spark.driver.allowMultipleContexts", "true")
+    val conf = new SparkConf().setMaster("myclusterManager").setAppName("testcm")
     sc = new SparkContext(conf)
     // check if the scheduler components are created and initialized
     sc.schedulerBackend match {
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 2d1ddae5780d..308a8ea65390 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -138,7 +138,7 @@ The first thing a Spark program must do is to create a [SparkContext](api/scala/
 how to access a cluster. To create a `SparkContext` you first need to build a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object
 that contains information about your application.
 
-Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before creating a new one.
+Only one SparkContext should be active per JVM. You must `stop()` the active SparkContext before creating a new one.
 
 {% highlight scala %}
 val conf = new SparkConf().setAppName(appName).setMaster(master)
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 883913332ca1..7bb70a29195d 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -220,6 +220,10 @@ object MimaExcludes {
     // [SPARK-26139] Implement shuffle write metrics in SQL
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ShuffleDependency.this"),
 
+    // [SPARK-26362][CORE] Remove 'spark.driver.allowMultipleContexts' to disallow multiple creation of SparkContexts
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.setActiveContext"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.markPartiallyConstructed"),
+
     // Data Source V2 API changes
     (problem: Problem) => problem match {
       case MissingClassProblem(cls) =>
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1180bf91baa5..6137ed25a0dd 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -63,6 +63,9 @@ class SparkContext(object):
     Main entry point for Spark functionality. A SparkContext represents the
     connection to a Spark cluster, and can be used to create L{RDD} and
     broadcast variables on that cluster.
+
+    .. note:: Only one :class:`SparkContext` should be active per JVM. You must `stop()`
+        the active :class:`SparkContext` before creating a new one.
     """
 
     _gateway = None
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
index 6ad025f37e44..4a439940beb7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
@@ -263,7 +263,6 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
         .setMaster("local[*]")
         .setAppName("test")
         .set("spark.ui.enabled", "false")
-        .set("spark.driver.allowMultipleContexts", "true")
         .set(SQLConf.SHUFFLE_PARTITIONS.key, "5")
         .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true")
         .set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")

From 860f4497f2a59b21d455ec8bfad9ae15d2fd4d2e Mon Sep 17 00:00:00 2001
From: Jing Chen He <jinghe@us.ibm.com>
Date: Sat, 15 Dec 2018 08:41:16 -0600
Subject: [PATCH 0140/1072] [SPARK-26315][PYSPARK] auto cast threshold from
 Integer to Float in approxSimilarityJoin of BucketedRandomProjectionLSHModel

## What changes were proposed in this pull request?

If the input parameter 'threshold' to the function approxSimilarityJoin is not a float, we would get an exception.  The fix is to convert the 'threshold' into a float before calling the java implementation method.

## How was this patch tested?

Added a new test case.  Without this fix, the test will throw an exception as reported in the JIRA. With the fix, the test passes.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23313 from jerryjch/SPARK-26315.

Authored-by: Jing Chen He <jinghe@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/ml/feature.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index c9507c20918e..08ae58246adb 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -192,6 +192,7 @@ def approxSimilarityJoin(self, datasetA, datasetB, threshold, distCol="distCol")
                  "datasetA" and "datasetB", and a column "distCol" is added to show the distance
                  between each pair.
         """
+        threshold = TypeConverters.toFloat(threshold)
         return self._call_java("approxSimilarityJoin", datasetA, datasetB, threshold, distCol)
 
 
@@ -239,6 +240,16 @@ class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutp
     |  3|  6| 2.23606797749979|
     +---+---+-----------------+
     ...
+    >>> model.approxSimilarityJoin(df, df2, 3, distCol="EuclideanDistance").select(
+    ...     col("datasetA.id").alias("idA"),
+    ...     col("datasetB.id").alias("idB"),
+    ...     col("EuclideanDistance")).show()
+    +---+---+-----------------+
+    |idA|idB|EuclideanDistance|
+    +---+---+-----------------+
+    |  3|  6| 2.23606797749979|
+    +---+---+-----------------+
+    ...
     >>> brpPath = temp_path + "/brp"
     >>> brp.save(brpPath)
     >>> brp2 = BucketedRandomProjectionLSH.load(brpPath)

From 8a27952cdbf492939d9bda59e2f516f574581636 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Sun, 16 Dec 2018 09:32:13 +0800
Subject: [PATCH 0141/1072] [SPARK-26243][SQL] Use java.time API for parsing
 timestamps and dates from JSON

## What changes were proposed in this pull request?

In the PR, I propose to switch on **java.time API** for parsing timestamps and dates from JSON inputs with microseconds precision. The SQL config `spark.sql.legacy.timeParser.enabled` allow to switch back to previous behavior with using `java.text.SimpleDateFormat`/`FastDateFormat` for parsing/generating timestamps/dates.

## How was this patch tested?

It was tested by `JsonExpressionsSuite`, `JsonFunctionsSuite` and `JsonSuite`.

Closes #23196 from MaxGekk/json-time-parser.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |   2 +-
 .../sql/catalyst/csv/CSVInferSchema.scala     |   6 +-
 .../sql/catalyst/csv/UnivocityGenerator.scala |   8 +-
 .../sql/catalyst/csv/UnivocityParser.scala    |   6 +-
 .../spark/sql/catalyst/json/JSONOptions.scala |  10 +-
 .../sql/catalyst/json/JacksonGenerator.scala  |  14 +-
 .../sql/catalyst/json/JacksonParser.scala     |  35 +--
 ...rmatter.scala => TimestampFormatter.scala} |  93 ++++----
 .../sql/util/DateTimeFormatterSuite.scala     | 103 ---------
 .../util/DateTimestampFormatterSuite.scala    | 174 +++++++++++++++
 .../datasources/json/JsonSuite.scala          | 201 ++++++++++--------
 .../sql/sources/HadoopFsRelationTest.scala    | 105 ++++-----
 12 files changed, 422 insertions(+), 335 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/{DateTimeFormatter.scala => TimestampFormatter.scala} (63%)
 delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimeFormatterSuite.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimestampFormatterSuite.scala

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 8834e8991d8c..115fc6516fb4 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -35,7 +35,7 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Spark applications which are built with Spark version 2.4 and prior, and call methods of `UserDefinedFunction`, need to be re-compiled with Spark 3.0, as they are not binary compatible with Spark 3.0.
 
-  - Since Spark 3.0, CSV datasource uses java.time API for parsing and generating CSV content. New formatting implementation supports date/timestamp patterns conformed to ISO 8601. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
+  - Since Spark 3.0, CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpuse with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
 
   - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 345dc4d41993..35ade136cc60 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -22,13 +22,13 @@ import scala.util.control.Exception.allCatch
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
-import org.apache.spark.sql.catalyst.util.DateTimeFormatter
+import org.apache.spark.sql.catalyst.util.TimestampFormatter
 import org.apache.spark.sql.types._
 
 class CSVInferSchema(val options: CSVOptions) extends Serializable {
 
   @transient
-  private lazy val timeParser = DateTimeFormatter(
+  private lazy val timestampParser = TimestampFormatter(
     options.timestampFormat,
     options.timeZone,
     options.locale)
@@ -160,7 +160,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
 
   private def tryParseTimestamp(field: String): DataType = {
     // This case infers a custom `dataFormat` is set.
-    if ((allCatch opt timeParser.parse(field)).isDefined) {
+    if ((allCatch opt timestampParser.parse(field)).isDefined) {
       TimestampType
     } else {
       tryParseBoolean(field)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index af09cd6c8449..f012d96138f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -22,7 +22,7 @@ import java.io.Writer
 import com.univocity.parsers.csv.CsvWriter
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeFormatter}
+import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
 import org.apache.spark.sql.types._
 
 class UnivocityGenerator(
@@ -41,18 +41,18 @@ class UnivocityGenerator(
   private val valueConverters: Array[ValueConverter] =
     schema.map(_.dataType).map(makeConverter).toArray
 
-  private val timeFormatter = DateTimeFormatter(
+  private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.timeZone,
     options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.timeZone, options.locale)
+  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
 
   private def makeConverter(dataType: DataType): ValueConverter = dataType match {
     case DateType =>
       (row: InternalRow, ordinal: Int) => dateFormatter.format(row.getInt(ordinal))
 
     case TimestampType =>
-      (row: InternalRow, ordinal: Int) => timeFormatter.format(row.getLong(ordinal))
+      (row: InternalRow, ordinal: Int) => timestampFormatter.format(row.getLong(ordinal))
 
     case udt: UserDefinedType[_] => makeConverter(udt.sqlType)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 0f375e036029..ed089120055e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -74,11 +74,11 @@ class UnivocityParser(
 
   private val row = new GenericInternalRow(requiredSchema.length)
 
-  private val timeFormatter = DateTimeFormatter(
+  private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.timeZone,
     options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.timeZone, options.locale)
+  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
 
   // Retrieve the raw record string.
   private def getCurrentInput: UTF8String = {
@@ -158,7 +158,7 @@ class UnivocityParser(
       }
 
     case _: TimestampType => (d: String) =>
-      nullSafeDatum(d, name, nullable, options)(timeFormatter.parse)
+      nullSafeDatum(d, name, nullable, options)(timestampFormatter.parse)
 
     case _: DateType => (d: String) =>
       nullSafeDatum(d, name, nullable, options)(dateFormatter.parse)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index e10b8a327c01..eaff3fa7bec2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -21,7 +21,6 @@ import java.nio.charset.{Charset, StandardCharsets}
 import java.util.{Locale, TimeZone}
 
 import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
-import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util._
@@ -82,13 +81,10 @@ private[sql] class JSONOptions(
   val timeZone: TimeZone = DateTimeUtils.getTimeZone(
     parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
 
-  // Uses `FastDateFormat` which can be direct replacement for `SimpleDateFormat` and thread-safe.
-  val dateFormat: FastDateFormat =
-    FastDateFormat.getInstance(parameters.getOrElse("dateFormat", "yyyy-MM-dd"), locale)
+  val dateFormat: String = parameters.getOrElse("dateFormat", "yyyy-MM-dd")
 
-  val timestampFormat: FastDateFormat =
-    FastDateFormat.getInstance(
-      parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), timeZone, locale)
+  val timestampFormat: String =
+    parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSXXX")
 
   val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index d02a2be8ddad..951f5190cd50 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -23,7 +23,7 @@ import com.fasterxml.jackson.core._
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
-import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils, MapData}
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.types._
 
 /**
@@ -77,6 +77,12 @@ private[sql] class JacksonGenerator(
 
   private val lineSeparator: String = options.lineSeparatorInWrite
 
+  private val timestampFormatter = TimestampFormatter(
+    options.timestampFormat,
+    options.timeZone,
+    options.locale)
+  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+
   private def makeWriter(dataType: DataType): ValueWriter = dataType match {
     case NullType =>
       (row: SpecializedGetters, ordinal: Int) =>
@@ -116,14 +122,12 @@ private[sql] class JacksonGenerator(
 
     case TimestampType =>
       (row: SpecializedGetters, ordinal: Int) =>
-        val timestampString =
-          options.timestampFormat.format(DateTimeUtils.toJavaTimestamp(row.getLong(ordinal)))
+        val timestampString = timestampFormatter.format(row.getLong(ordinal))
         gen.writeString(timestampString)
 
     case DateType =>
       (row: SpecializedGetters, ordinal: Int) =>
-        val dateString =
-          options.dateFormat.format(DateTimeUtils.toJavaDate(row.getInt(ordinal)))
+        val dateString = dateFormatter.format(row.getInt(ordinal))
         gen.writeString(dateString)
 
     case BinaryType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 7e3bd4df51bb..3f245e1400fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -55,6 +55,12 @@ class JacksonParser(
   private val factory = new JsonFactory()
   options.setJacksonOptions(factory)
 
+  private val timestampFormatter = TimestampFormatter(
+    options.timestampFormat,
+    options.timeZone,
+    options.locale)
+  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+
   /**
    * Create a converter which converts the JSON documents held by the `JsonParser`
    * to a value according to a desired schema. This is a wrapper for the method
@@ -218,17 +224,7 @@ class JacksonParser(
     case TimestampType =>
       (parser: JsonParser) => parseJsonToken[java.lang.Long](parser, dataType) {
         case VALUE_STRING if parser.getTextLength >= 1 =>
-          val stringValue = parser.getText
-          // This one will lose microseconds parts.
-          // See https://issues.apache.org/jira/browse/SPARK-10681.
-          Long.box {
-            Try(options.timestampFormat.parse(stringValue).getTime * 1000L)
-              .getOrElse {
-                // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
-                // compatibility.
-                DateTimeUtils.stringToTime(stringValue).getTime * 1000L
-              }
-          }
+          timestampFormatter.parse(parser.getText)
 
         case VALUE_NUMBER_INT =>
           parser.getLongValue * 1000000L
@@ -237,22 +233,7 @@ class JacksonParser(
     case DateType =>
       (parser: JsonParser) => parseJsonToken[java.lang.Integer](parser, dataType) {
         case VALUE_STRING if parser.getTextLength >= 1 =>
-          val stringValue = parser.getText
-          // This one will lose microseconds parts.
-          // See https://issues.apache.org/jira/browse/SPARK-10681.x
-          Int.box {
-            Try(DateTimeUtils.millisToDays(options.dateFormat.parse(stringValue).getTime))
-              .orElse {
-                // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
-                // compatibility.
-                Try(DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(stringValue).getTime))
-              }
-              .getOrElse {
-                // In Spark 1.5.0, we store the data as number of days since epoch in string.
-                // So, we just convert it to Int.
-                stringValue.toInt
-              }
-          }
+          dateFormatter.parse(parser.getText)
       }
 
     case BinaryType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
similarity index 63%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatter.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index ad1f4131de2f..2b8d22dde926 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import java.time._
 import java.time.format.DateTimeFormatterBuilder
-import java.time.temporal.{ChronoField, TemporalQueries}
+import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
 import java.util.{Locale, TimeZone}
 
 import scala.util.Try
@@ -28,31 +28,44 @@ import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.sql.internal.SQLConf
 
-sealed trait DateTimeFormatter {
+sealed trait TimestampFormatter {
   def parse(s: String): Long // returns microseconds since epoch
   def format(us: Long): String
 }
 
-class Iso8601DateTimeFormatter(
+trait FormatterUtils {
+  protected def zoneId: ZoneId
+  protected def buildFormatter(
+      pattern: String,
+      locale: Locale): java.time.format.DateTimeFormatter = {
+    new DateTimeFormatterBuilder()
+      .appendPattern(pattern)
+      .parseDefaulting(ChronoField.YEAR_OF_ERA, 1970)
+      .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
+      .parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
+      .parseDefaulting(ChronoField.HOUR_OF_DAY, 0)
+      .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
+      .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
+      .toFormatter(locale)
+  }
+  protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor): java.time.Instant = {
+    val localDateTime = LocalDateTime.from(temporalAccessor)
+    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
+    Instant.from(zonedDateTime)
+  }
+}
+
+class Iso8601TimestampFormatter(
     pattern: String,
     timeZone: TimeZone,
-    locale: Locale) extends DateTimeFormatter {
-  val formatter = new DateTimeFormatterBuilder()
-    .appendPattern(pattern)
-    .parseDefaulting(ChronoField.YEAR_OF_ERA, 1970)
-    .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
-    .parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
-    .parseDefaulting(ChronoField.HOUR_OF_DAY, 0)
-    .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
-    .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
-    .toFormatter(locale)
+    locale: Locale) extends TimestampFormatter with FormatterUtils {
+  val zoneId = timeZone.toZoneId
+  val formatter = buildFormatter(pattern, locale)
 
   def toInstant(s: String): Instant = {
     val temporalAccessor = formatter.parse(s)
     if (temporalAccessor.query(TemporalQueries.offset()) == null) {
-      val localDateTime = LocalDateTime.from(temporalAccessor)
-      val zonedDateTime = ZonedDateTime.of(localDateTime, timeZone.toZoneId)
-      Instant.from(zonedDateTime)
+      toInstantWithZoneId(temporalAccessor)
     } else {
       Instant.from(temporalAccessor)
     }
@@ -75,10 +88,10 @@ class Iso8601DateTimeFormatter(
   }
 }
 
-class LegacyDateTimeFormatter(
+class LegacyTimestampFormatter(
     pattern: String,
     timeZone: TimeZone,
-    locale: Locale) extends DateTimeFormatter {
+    locale: Locale) extends TimestampFormatter {
   val format = FastDateFormat.getInstance(pattern, timeZone, locale)
 
   protected def toMillis(s: String): Long = format.parse(s).getTime
@@ -90,21 +103,21 @@ class LegacyDateTimeFormatter(
   }
 }
 
-class LegacyFallbackDateTimeFormatter(
+class LegacyFallbackTimestampFormatter(
     pattern: String,
     timeZone: TimeZone,
-    locale: Locale) extends LegacyDateTimeFormatter(pattern, timeZone, locale) {
+    locale: Locale) extends LegacyTimestampFormatter(pattern, timeZone, locale) {
   override def toMillis(s: String): Long = {
     Try {super.toMillis(s)}.getOrElse(DateTimeUtils.stringToTime(s).getTime)
   }
 }
 
-object DateTimeFormatter {
-  def apply(format: String, timeZone: TimeZone, locale: Locale): DateTimeFormatter = {
+object TimestampFormatter {
+  def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = {
     if (SQLConf.get.legacyTimeParserEnabled) {
-      new LegacyFallbackDateTimeFormatter(format, timeZone, locale)
+      new LegacyFallbackTimestampFormatter(format, timeZone, locale)
     } else {
-      new Iso8601DateTimeFormatter(format, timeZone, locale)
+      new Iso8601TimestampFormatter(format, timeZone, locale)
     }
   }
 }
@@ -116,13 +129,19 @@ sealed trait DateFormatter {
 
 class Iso8601DateFormatter(
     pattern: String,
-    timeZone: TimeZone,
-    locale: Locale) extends DateFormatter {
+    locale: Locale) extends DateFormatter with FormatterUtils {
+
+  val zoneId = ZoneId.of("UTC")
+
+  val formatter = buildFormatter(pattern, locale)
 
-  val dateTimeFormatter = new Iso8601DateTimeFormatter(pattern, timeZone, locale)
+  def toInstant(s: String): Instant = {
+    val temporalAccessor = formatter.parse(s)
+    toInstantWithZoneId(temporalAccessor)
+  }
 
   override def parse(s: String): Int = {
-    val seconds = dateTimeFormatter.toInstant(s).getEpochSecond
+    val seconds = toInstant(s).getEpochSecond
     val days = Math.floorDiv(seconds, DateTimeUtils.SECONDS_PER_DAY)
 
     days.toInt
@@ -130,15 +149,12 @@ class Iso8601DateFormatter(
 
   override def format(days: Int): String = {
     val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
-    dateTimeFormatter.formatter.withZone(timeZone.toZoneId).format(instant)
+    formatter.withZone(zoneId).format(instant)
   }
 }
 
-class LegacyDateFormatter(
-    pattern: String,
-    timeZone: TimeZone,
-    locale: Locale) extends DateFormatter {
-  val format = FastDateFormat.getInstance(pattern, timeZone, locale)
+class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter {
+  val format = FastDateFormat.getInstance(pattern, locale)
 
   def parse(s: String): Int = {
     val milliseconds = format.parse(s).getTime
@@ -153,8 +169,7 @@ class LegacyDateFormatter(
 
 class LegacyFallbackDateFormatter(
     pattern: String,
-    timeZone: TimeZone,
-    locale: Locale) extends LegacyDateFormatter(pattern, timeZone, locale) {
+    locale: Locale) extends LegacyDateFormatter(pattern, locale) {
   override def parse(s: String): Int = {
     Try(super.parse(s)).orElse {
       // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
@@ -169,11 +184,11 @@ class LegacyFallbackDateFormatter(
 }
 
 object DateFormatter {
-  def apply(format: String, timeZone: TimeZone, locale: Locale): DateFormatter = {
+  def apply(format: String, locale: Locale): DateFormatter = {
     if (SQLConf.get.legacyTimeParserEnabled) {
-      new LegacyFallbackDateFormatter(format, timeZone, locale)
+      new LegacyFallbackDateFormatter(format, locale)
     } else {
-      new Iso8601DateFormatter(format, timeZone, locale)
+      new Iso8601DateFormatter(format, locale)
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimeFormatterSuite.scala
deleted file mode 100644
index 02d4ee049060..000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimeFormatterSuite.scala
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.util
-
-import java.util.{Locale, TimeZone}
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeFormatter, DateTimeTestUtils}
-
-class DateTimeFormatterSuite  extends SparkFunSuite {
-  test("parsing dates using time zones") {
-    val localDate = "2018-12-02"
-    val expectedDays = Map(
-      "UTC" -> 17867,
-      "PST" -> 17867,
-      "CET" -> 17866,
-      "Africa/Dakar" -> 17867,
-      "America/Los_Angeles" -> 17867,
-      "Antarctica/Vostok" -> 17866,
-      "Asia/Hong_Kong" -> 17866,
-      "Europe/Amsterdam" -> 17866)
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-      val formatter = DateFormatter("yyyy-MM-dd", TimeZone.getTimeZone(timeZone), Locale.US)
-      val daysSinceEpoch = formatter.parse(localDate)
-      assert(daysSinceEpoch === expectedDays(timeZone))
-    }
-  }
-
-  test("parsing timestamps using time zones") {
-    val localDate = "2018-12-02T10:11:12.001234"
-    val expectedMicros = Map(
-      "UTC" -> 1543745472001234L,
-      "PST" -> 1543774272001234L,
-      "CET" -> 1543741872001234L,
-      "Africa/Dakar" -> 1543745472001234L,
-      "America/Los_Angeles" -> 1543774272001234L,
-      "Antarctica/Vostok" -> 1543723872001234L,
-      "Asia/Hong_Kong" -> 1543716672001234L,
-      "Europe/Amsterdam" -> 1543741872001234L)
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-      val formatter = DateTimeFormatter(
-        "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
-        TimeZone.getTimeZone(timeZone),
-        Locale.US)
-      val microsSinceEpoch = formatter.parse(localDate)
-      assert(microsSinceEpoch === expectedMicros(timeZone))
-    }
-  }
-
-  test("format dates using time zones") {
-    val daysSinceEpoch = 17867
-    val expectedDate = Map(
-      "UTC" -> "2018-12-02",
-      "PST" -> "2018-12-01",
-      "CET" -> "2018-12-02",
-      "Africa/Dakar" -> "2018-12-02",
-      "America/Los_Angeles" -> "2018-12-01",
-      "Antarctica/Vostok" -> "2018-12-02",
-      "Asia/Hong_Kong" -> "2018-12-02",
-      "Europe/Amsterdam" -> "2018-12-02")
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-      val formatter = DateFormatter("yyyy-MM-dd", TimeZone.getTimeZone(timeZone), Locale.US)
-      val date = formatter.format(daysSinceEpoch)
-      assert(date === expectedDate(timeZone))
-    }
-  }
-
-  test("format timestamps using time zones") {
-    val microsSinceEpoch = 1543745472001234L
-    val expectedTimestamp = Map(
-      "UTC" -> "2018-12-02T10:11:12.001234",
-      "PST" -> "2018-12-02T02:11:12.001234",
-      "CET" -> "2018-12-02T11:11:12.001234",
-      "Africa/Dakar" -> "2018-12-02T10:11:12.001234",
-      "America/Los_Angeles" -> "2018-12-02T02:11:12.001234",
-      "Antarctica/Vostok" -> "2018-12-02T16:11:12.001234",
-      "Asia/Hong_Kong" -> "2018-12-02T18:11:12.001234",
-      "Europe/Amsterdam" -> "2018-12-02T11:11:12.001234")
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-      val formatter = DateTimeFormatter(
-        "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
-        TimeZone.getTimeZone(timeZone),
-        Locale.US)
-      val timestamp = formatter.format(microsSinceEpoch)
-      assert(timestamp === expectedTimestamp(timeZone))
-    }
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimestampFormatterSuite.scala
new file mode 100644
index 000000000000..43e348c7eebf
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimestampFormatterSuite.scala
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util
+
+import java.util.{Locale, TimeZone}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.internal.SQLConf
+
+class DateTimestampFormatterSuite extends SparkFunSuite with SQLHelper {
+  test("parsing dates") {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+        val daysSinceEpoch = formatter.parse("2018-12-02")
+        assert(daysSinceEpoch === 17867)
+      }
+    }
+  }
+
+  test("parsing timestamps using time zones") {
+    val localDate = "2018-12-02T10:11:12.001234"
+    val expectedMicros = Map(
+      "UTC" -> 1543745472001234L,
+      "PST" -> 1543774272001234L,
+      "CET" -> 1543741872001234L,
+      "Africa/Dakar" -> 1543745472001234L,
+      "America/Los_Angeles" -> 1543774272001234L,
+      "Antarctica/Vostok" -> 1543723872001234L,
+      "Asia/Hong_Kong" -> 1543716672001234L,
+      "Europe/Amsterdam" -> 1543741872001234L)
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      val formatter = TimestampFormatter(
+        "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
+        TimeZone.getTimeZone(timeZone),
+        Locale.US)
+      val microsSinceEpoch = formatter.parse(localDate)
+      assert(microsSinceEpoch === expectedMicros(timeZone))
+    }
+  }
+
+  test("format dates") {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+        val date = formatter.format(17867)
+        assert(date === "2018-12-02")
+      }
+    }
+  }
+
+  test("format timestamps using time zones") {
+    val microsSinceEpoch = 1543745472001234L
+    val expectedTimestamp = Map(
+      "UTC" -> "2018-12-02T10:11:12.001234",
+      "PST" -> "2018-12-02T02:11:12.001234",
+      "CET" -> "2018-12-02T11:11:12.001234",
+      "Africa/Dakar" -> "2018-12-02T10:11:12.001234",
+      "America/Los_Angeles" -> "2018-12-02T02:11:12.001234",
+      "Antarctica/Vostok" -> "2018-12-02T16:11:12.001234",
+      "Asia/Hong_Kong" -> "2018-12-02T18:11:12.001234",
+      "Europe/Amsterdam" -> "2018-12-02T11:11:12.001234")
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      val formatter = TimestampFormatter(
+        "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
+        TimeZone.getTimeZone(timeZone),
+        Locale.US)
+      val timestamp = formatter.format(microsSinceEpoch)
+      assert(timestamp === expectedTimestamp(timeZone))
+    }
+  }
+
+  test("roundtrip timestamp -> micros -> timestamp using timezones") {
+    Seq(
+      -58710115316212000L,
+      -18926315945345679L,
+      -9463427405253013L,
+      -244000001L,
+      0L,
+      99628200102030L,
+      1543749753123456L,
+      2177456523456789L,
+      11858049903010203L).foreach { micros =>
+      DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
+        val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", timeZone, Locale.US)
+        val timestamp = formatter.format(micros)
+        val parsed = formatter.parse(timestamp)
+        assert(micros === parsed)
+      }
+    }
+  }
+
+  test("roundtrip micros -> timestamp -> micros using timezones") {
+    Seq(
+      "0109-07-20T18:38:03.788000",
+      "1370-04-01T10:00:54.654321",
+      "1670-02-11T14:09:54.746987",
+      "1969-12-31T23:55:55.999999",
+      "1970-01-01T00:00:00.000000",
+      "1973-02-27T02:30:00.102030",
+      "2018-12-02T11:22:33.123456",
+      "2039-01-01T01:02:03.456789",
+      "2345-10-07T22:45:03.010203").foreach { timestamp =>
+      DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
+        val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", timeZone, Locale.US)
+        val micros = formatter.parse(timestamp)
+        val formatted = formatter.format(micros)
+        assert(timestamp === formatted)
+      }
+    }
+  }
+
+  test("roundtrip date -> days -> date") {
+    Seq(
+      "0050-01-01",
+      "0953-02-02",
+      "1423-03-08",
+      "1969-12-31",
+      "1972-08-25",
+      "1975-09-26",
+      "2018-12-12",
+      "2038-01-01",
+      "5010-11-17").foreach { date =>
+      DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+          val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+          val days = formatter.parse(date)
+          val formatted = formatter.format(days)
+          assert(date === formatted)
+        }
+      }
+    }
+  }
+
+  test("roundtrip days -> date -> days") {
+    Seq(
+      -701265,
+      -371419,
+      -199722,
+      -1,
+      0,
+      967,
+      2094,
+      17877,
+      24837,
+      1110657).foreach { days =>
+      DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+          val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+          val date = formatter.format(days)
+          val parsed = formatter.parse(date)
+          assert(days === parsed)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 3330de3584eb..786335b42e3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -57,14 +57,17 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     }
 
     val factory = new JsonFactory()
-    def enforceCorrectType(value: Any, dataType: DataType): Any = {
+    def enforceCorrectType(
+        value: Any,
+        dataType: DataType,
+        options: Map[String, String] = Map.empty): Any = {
       val writer = new StringWriter()
       Utils.tryWithResource(factory.createGenerator(writer)) { generator =>
         generator.writeObject(value)
         generator.flush()
       }
 
-      val dummyOption = new JSONOptions(Map.empty[String, String], "GMT")
+      val dummyOption = new JSONOptions(options, SQLConf.get.sessionLocalTimeZone)
       val dummySchema = StructType(Seq.empty)
       val parser = new JacksonParser(dummySchema, dummyOption, allowArrayAsStructs = true)
 
@@ -96,19 +99,27 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     checkTypePromotion(DateTimeUtils.fromJavaTimestamp(new Timestamp(intNumber.toLong * 1000L)),
         enforceCorrectType(intNumber.toLong, TimestampType))
     val strTime = "2014-09-30 12:34:56"
-    checkTypePromotion(DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf(strTime)),
-        enforceCorrectType(strTime, TimestampType))
+    checkTypePromotion(
+      expected = DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf(strTime)),
+      enforceCorrectType(strTime, TimestampType,
+        Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss")))
 
     val strDate = "2014-10-15"
     checkTypePromotion(
       DateTimeUtils.fromJavaDate(Date.valueOf(strDate)), enforceCorrectType(strDate, DateType))
 
     val ISO8601Time1 = "1970-01-01T01:00:01.0Z"
-    val ISO8601Time2 = "1970-01-01T02:00:01-01:00"
     checkTypePromotion(DateTimeUtils.fromJavaTimestamp(new Timestamp(3601000)),
-        enforceCorrectType(ISO8601Time1, TimestampType))
+        enforceCorrectType(
+          ISO8601Time1,
+          TimestampType,
+          Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss.SX")))
+    val ISO8601Time2 = "1970-01-01T02:00:01-01:00"
     checkTypePromotion(DateTimeUtils.fromJavaTimestamp(new Timestamp(10801000)),
-        enforceCorrectType(ISO8601Time2, TimestampType))
+        enforceCorrectType(
+          ISO8601Time2,
+          TimestampType,
+          Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ssXXX")))
 
     val ISO8601Date = "1970-01-01"
     checkTypePromotion(DateTimeUtils.millisToDays(32400000),
@@ -1440,103 +1451,105 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
   }
 
   test("backward compatibility") {
-    // This test we make sure our JSON support can read JSON data generated by previous version
-    // of Spark generated through toJSON method and JSON data source.
-    // The data is generated by the following program.
-    // Here are a few notes:
-    //  - Spark 1.5.0 cannot save timestamp data. So, we manually added timestamp field (col13)
-    //      in the JSON object.
-    //  - For Spark before 1.5.1, we do not generate UDTs. So, we manually added the UDT value to
-    //      JSON objects generated by those Spark versions (col17).
-    //  - If the type is NullType, we do not write data out.
-
-    // Create the schema.
-    val struct =
-      StructType(
-        StructField("f1", FloatType, true) ::
-          StructField("f2", ArrayType(BooleanType), true) :: Nil)
+    withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> "true") {
+      // This test we make sure our JSON support can read JSON data generated by previous version
+      // of Spark generated through toJSON method and JSON data source.
+      // The data is generated by the following program.
+      // Here are a few notes:
+      //  - Spark 1.5.0 cannot save timestamp data. So, we manually added timestamp field (col13)
+      //      in the JSON object.
+      //  - For Spark before 1.5.1, we do not generate UDTs. So, we manually added the UDT value to
+      //      JSON objects generated by those Spark versions (col17).
+      //  - If the type is NullType, we do not write data out.
+
+      // Create the schema.
+      val struct =
+        StructType(
+          StructField("f1", FloatType, true) ::
+            StructField("f2", ArrayType(BooleanType), true) :: Nil)
 
-    val dataTypes =
-      Seq(
-        StringType, BinaryType, NullType, BooleanType,
-        ByteType, ShortType, IntegerType, LongType,
-        FloatType, DoubleType, DecimalType(25, 5), DecimalType(6, 5),
-        DateType, TimestampType,
-        ArrayType(IntegerType), MapType(StringType, LongType), struct,
-        new TestUDT.MyDenseVectorUDT())
-    val fields = dataTypes.zipWithIndex.map { case (dataType, index) =>
-      StructField(s"col$index", dataType, nullable = true)
-    }
-    val schema = StructType(fields)
+      val dataTypes =
+        Seq(
+          StringType, BinaryType, NullType, BooleanType,
+          ByteType, ShortType, IntegerType, LongType,
+          FloatType, DoubleType, DecimalType(25, 5), DecimalType(6, 5),
+          DateType, TimestampType,
+          ArrayType(IntegerType), MapType(StringType, LongType), struct,
+          new TestUDT.MyDenseVectorUDT())
+      val fields = dataTypes.zipWithIndex.map { case (dataType, index) =>
+        StructField(s"col$index", dataType, nullable = true)
+      }
+      val schema = StructType(fields)
 
-    val constantValues =
-      Seq(
-        "a string in binary".getBytes(StandardCharsets.UTF_8),
-        null,
-        true,
-        1.toByte,
-        2.toShort,
-        3,
-        Long.MaxValue,
-        0.25.toFloat,
-        0.75,
-        new java.math.BigDecimal(s"1234.23456"),
-        new java.math.BigDecimal(s"1.23456"),
-        java.sql.Date.valueOf("2015-01-01"),
-        java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123"),
-        Seq(2, 3, 4),
-        Map("a string" -> 2000L),
-        Row(4.75.toFloat, Seq(false, true)),
-        new TestUDT.MyDenseVector(Array(0.25, 2.25, 4.25)))
-    val data =
-      Row.fromSeq(Seq("Spark " + spark.sparkContext.version) ++ constantValues) :: Nil
+      val constantValues =
+        Seq(
+          "a string in binary".getBytes(StandardCharsets.UTF_8),
+          null,
+          true,
+          1.toByte,
+          2.toShort,
+          3,
+          Long.MaxValue,
+          0.25.toFloat,
+          0.75,
+          new java.math.BigDecimal(s"1234.23456"),
+          new java.math.BigDecimal(s"1.23456"),
+          java.sql.Date.valueOf("2015-01-01"),
+          java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123"),
+          Seq(2, 3, 4),
+          Map("a string" -> 2000L),
+          Row(4.75.toFloat, Seq(false, true)),
+          new TestUDT.MyDenseVector(Array(0.25, 2.25, 4.25)))
+      val data =
+        Row.fromSeq(Seq("Spark " + spark.sparkContext.version) ++ constantValues) :: Nil
 
-    // Data generated by previous versions.
-    // scalastyle:off
-    val existingJSONData =
+      // Data generated by previous versions.
+      // scalastyle:off
+      val existingJSONData =
       """{"col0":"Spark 1.2.2","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-      """{"col0":"Spark 1.3.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-      """{"col0":"Spark 1.3.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-      """{"col0":"Spark 1.4.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-      """{"col0":"Spark 1.4.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-      """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-      """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"16436","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: Nil
-    // scalastyle:on
-
-    // Generate data for the current version.
-    val df = spark.createDataFrame(spark.sparkContext.parallelize(data, 1), schema)
-    withTempPath { path =>
-      df.write.format("json").mode("overwrite").save(path.getCanonicalPath)
+        """{"col0":"Spark 1.3.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
+        """{"col0":"Spark 1.3.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
+        """{"col0":"Spark 1.4.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
+        """{"col0":"Spark 1.4.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
+        """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
+        """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"16436","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: Nil
+      // scalastyle:on
+
+      // Generate data for the current version.
+      val df = spark.createDataFrame(spark.sparkContext.parallelize(data, 1), schema)
+      withTempPath { path =>
+        df.write.format("json").mode("overwrite").save(path.getCanonicalPath)
 
-      // df.toJSON will convert internal rows to external rows first and then generate
-      // JSON objects. While, df.write.format("json") will write internal rows directly.
-      val allJSON =
+        // df.toJSON will convert internal rows to external rows first and then generate
+        // JSON objects. While, df.write.format("json") will write internal rows directly.
+        val allJSON =
         existingJSONData ++
           df.toJSON.collect() ++
           sparkContext.textFile(path.getCanonicalPath).collect()
 
-      Utils.deleteRecursively(path)
-      sparkContext.parallelize(allJSON, 1).saveAsTextFile(path.getCanonicalPath)
-
-      // Read data back with the schema specified.
-      val col0Values =
-        Seq(
-          "Spark 1.2.2",
-          "Spark 1.3.1",
-          "Spark 1.3.1",
-          "Spark 1.4.1",
-          "Spark 1.4.1",
-          "Spark 1.5.0",
-          "Spark 1.5.0",
-          "Spark " + spark.sparkContext.version,
-          "Spark " + spark.sparkContext.version)
-      val expectedResult = col0Values.map { v =>
-        Row.fromSeq(Seq(v) ++ constantValues)
+        Utils.deleteRecursively(path)
+        sparkContext.parallelize(allJSON, 1).saveAsTextFile(path.getCanonicalPath)
+
+        // Read data back with the schema specified.
+        val col0Values =
+          Seq(
+            "Spark 1.2.2",
+            "Spark 1.3.1",
+            "Spark 1.3.1",
+            "Spark 1.4.1",
+            "Spark 1.4.1",
+            "Spark 1.5.0",
+            "Spark 1.5.0",
+            "Spark " + spark.sparkContext.version,
+            "Spark " + spark.sparkContext.version)
+        val expectedResult = col0Values.map { v =>
+          Row.fromSeq(Seq(v) ++ constantValues)
+        }
+        checkAnswer(
+          spark.read.format("json").schema(schema).load(path.getCanonicalPath),
+          expectedResult
+        )
       }
-      checkAnswer(
-        spark.read.format("json").schema(schema).load(path.getCanonicalPath),
-        expectedResult
-      )
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 6075f2c8877d..f0f62b608785 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.sources
 
 import java.io.File
+import java.util.TimeZone
 
 import scala.util.Random
 
@@ -125,56 +126,62 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
     } else {
       Seq(false)
     }
-    for (dataType <- supportedDataTypes) {
-      for (parquetDictionaryEncodingEnabled <- parquetDictionaryEncodingEnabledConfs) {
-        val extraMessage = if (isParquetDataSource) {
-          s" with parquet.enable.dictionary = $parquetDictionaryEncodingEnabled"
-        } else {
-          ""
-        }
-        logInfo(s"Testing $dataType data type$extraMessage")
-
-        val extraOptions = Map[String, String](
-          "parquet.enable.dictionary" -> parquetDictionaryEncodingEnabled.toString
-        )
-
-        withTempPath { file =>
-          val path = file.getCanonicalPath
-
-          val dataGenerator = RandomDataGenerator.forType(
-            dataType = dataType,
-            nullable = true,
-            new Random(System.nanoTime())
-          ).getOrElse {
-            fail(s"Failed to create data generator for schema $dataType")
+    // TODO: Support new parser too, see SPARK-26374.
+    withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> "true") {
+      for (dataType <- supportedDataTypes) {
+        for (parquetDictionaryEncodingEnabled <- parquetDictionaryEncodingEnabledConfs) {
+          val extraMessage = if (isParquetDataSource) {
+            s" with parquet.enable.dictionary = $parquetDictionaryEncodingEnabled"
+          } else {
+            ""
+          }
+          logInfo(s"Testing $dataType data type$extraMessage")
+
+          val extraOptions = Map[String, String](
+            "parquet.enable.dictionary" -> parquetDictionaryEncodingEnabled.toString
+          )
+
+          withTempPath { file =>
+            val path = file.getCanonicalPath
+
+            val seed = System.nanoTime()
+            withClue(s"Random data generated with the seed: ${seed}") {
+              val dataGenerator = RandomDataGenerator.forType(
+                dataType = dataType,
+                nullable = true,
+                new Random(seed)
+              ).getOrElse {
+                fail(s"Failed to create data generator for schema $dataType")
+              }
+
+              // Create a DF for the schema with random data. The index field is used to sort the
+              // DataFrame.  This is a workaround for SPARK-10591.
+              val schema = new StructType()
+                .add("index", IntegerType, nullable = false)
+                .add("col", dataType, nullable = true)
+              val rdd =
+                spark.sparkContext.parallelize((1 to 10).map(i => Row(i, dataGenerator())))
+              val df = spark.createDataFrame(rdd, schema).orderBy("index").coalesce(1)
+
+              df.write
+                .mode("overwrite")
+                .format(dataSourceName)
+                .option("dataSchema", df.schema.json)
+                .options(extraOptions)
+                .save(path)
+
+              val loadedDF = spark
+                .read
+                .format(dataSourceName)
+                .option("dataSchema", df.schema.json)
+                .schema(df.schema)
+                .options(extraOptions)
+                .load(path)
+                .orderBy("index")
+
+              checkAnswer(loadedDF, df)
+            }
           }
-
-          // Create a DF for the schema with random data. The index field is used to sort the
-          // DataFrame.  This is a workaround for SPARK-10591.
-          val schema = new StructType()
-            .add("index", IntegerType, nullable = false)
-            .add("col", dataType, nullable = true)
-          val rdd =
-            spark.sparkContext.parallelize((1 to 10).map(i => Row(i, dataGenerator())))
-          val df = spark.createDataFrame(rdd, schema).orderBy("index").coalesce(1)
-
-          df.write
-            .mode("overwrite")
-            .format(dataSourceName)
-            .option("dataSchema", df.schema.json)
-            .options(extraOptions)
-            .save(path)
-
-          val loadedDF = spark
-            .read
-            .format(dataSourceName)
-            .option("dataSchema", df.schema.json)
-            .schema(df.schema)
-            .options(extraOptions)
-            .load(path)
-            .orderBy("index")
-
-          checkAnswer(loadedDF, df)
         }
       }
     }

From cd815ae6c5ce3edb8aec3add942549f76a20e586 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Sun, 16 Dec 2018 10:57:11 +0800
Subject: [PATCH 0142/1072] [SPARK-26078][SQL] Dedup self-join attributes on IN
 subqueries

## What changes were proposed in this pull request?

When there is a self-join as result of a IN subquery, the join condition may be invalid, resulting in trivially true predicates and return wrong results.

The PR deduplicates the subquery output in order to avoid the issue.

## How was this patch tested?

added UT

Closes #23057 from mgaido91/SPARK-26078.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/subquery.scala     | 99 ++++++++++++-------
 .../org/apache/spark/sql/SubquerySuite.scala  | 37 +++++++
 2 files changed, 98 insertions(+), 38 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index e9b7a8b76e68..34840c6c977a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
@@ -43,31 +43,53 @@ import org.apache.spark.sql.types._
  *    condition.
  */
 object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
-  private def dedupJoin(joinPlan: LogicalPlan): LogicalPlan = joinPlan match {
+
+  private def buildJoin(
+      outerPlan: LogicalPlan,
+      subplan: LogicalPlan,
+      joinType: JoinType,
+      condition: Option[Expression]): Join = {
+    // Deduplicate conflicting attributes if any.
+    val dedupSubplan = dedupSubqueryOnSelfJoin(outerPlan, subplan, None, condition)
+    Join(outerPlan, dedupSubplan, joinType, condition)
+  }
+
+  private def dedupSubqueryOnSelfJoin(
+      outerPlan: LogicalPlan,
+      subplan: LogicalPlan,
+      valuesOpt: Option[Seq[Expression]],
+      condition: Option[Expression] = None): LogicalPlan = {
     // SPARK-21835: It is possibly that the two sides of the join have conflicting attributes,
     // the produced join then becomes unresolved and break structural integrity. We should
-    // de-duplicate conflicting attributes. We don't use transformation here because we only
-    // care about the most top join converted from correlated predicate subquery.
-    case j @ Join(left, right, joinType @ (LeftSemi | LeftAnti | ExistenceJoin(_)), joinCond) =>
-      val duplicates = right.outputSet.intersect(left.outputSet)
-      if (duplicates.nonEmpty) {
-        val aliasMap = AttributeMap(duplicates.map { dup =>
-          dup -> Alias(dup, dup.toString)()
-        }.toSeq)
-        val aliasedExpressions = right.output.map { ref =>
-          aliasMap.getOrElse(ref, ref)
-        }
-        val newRight = Project(aliasedExpressions, right)
-        val newJoinCond = joinCond.map { condExpr =>
-          condExpr transform {
-            case a: Attribute => aliasMap.getOrElse(a, a).toAttribute
+    // de-duplicate conflicting attributes.
+    // SPARK-26078: it may also happen that the subquery has conflicting attributes with the outer
+    // values. In this case, the resulting join would contain trivially true conditions (eg.
+    // id#3 = id#3) which cannot be de-duplicated after. In this method, if there are conflicting
+    // attributes in the join condition, the subquery's conflicting attributes are changed using
+    // a projection which aliases them and resolves the problem.
+    val outerReferences = valuesOpt.map(values =>
+      AttributeSet.fromAttributeSets(values.map(_.references))).getOrElse(AttributeSet.empty)
+    val outerRefs = outerPlan.outputSet ++ outerReferences
+    val duplicates = outerRefs.intersect(subplan.outputSet)
+    if (duplicates.nonEmpty) {
+      condition.foreach { e =>
+          val conflictingAttrs = e.references.intersect(duplicates)
+          if (conflictingAttrs.nonEmpty) {
+            throw new AnalysisException("Found conflicting attributes " +
+              s"${conflictingAttrs.mkString(",")} in the condition joining outer plan:\n  " +
+              s"$outerPlan\nand subplan:\n  $subplan")
           }
-        }
-        Join(left, newRight, joinType, newJoinCond)
-      } else {
-        j
       }
-    case _ => joinPlan
+      val rewrites = AttributeMap(duplicates.map { dup =>
+        dup -> Alias(dup, dup.toString)()
+      }.toSeq)
+      val aliasedExpressions = subplan.output.map { ref =>
+        rewrites.getOrElse(ref, ref)
+      }
+      Project(aliasedExpressions, subplan)
+    } else {
+      subplan
+    }
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
@@ -85,17 +107,16 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
       withSubquery.foldLeft(newFilter) {
         case (p, Exists(sub, conditions, _)) =>
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
-          // Deduplicate conflicting attributes if any.
-          dedupJoin(Join(outerPlan, sub, LeftSemi, joinCond))
+          buildJoin(outerPlan, sub, LeftSemi, joinCond)
         case (p, Not(Exists(sub, conditions, _))) =>
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
-          // Deduplicate conflicting attributes if any.
-          dedupJoin(Join(outerPlan, sub, LeftAnti, joinCond))
+          buildJoin(outerPlan, sub, LeftAnti, joinCond)
         case (p, InSubquery(values, ListQuery(sub, conditions, _, _))) =>
-          val inConditions = values.zip(sub.output).map(EqualTo.tupled)
-          val (joinCond, outerPlan) = rewriteExistentialExpr(inConditions ++ conditions, p)
           // Deduplicate conflicting attributes if any.
-          dedupJoin(Join(outerPlan, sub, LeftSemi, joinCond))
+          val newSub = dedupSubqueryOnSelfJoin(p, sub, Some(values))
+          val inConditions = values.zip(newSub.output).map(EqualTo.tupled)
+          val (joinCond, outerPlan) = rewriteExistentialExpr(inConditions ++ conditions, p)
+          Join(outerPlan, newSub, LeftSemi, joinCond)
         case (p, Not(InSubquery(values, ListQuery(sub, conditions, _, _)))) =>
           // This is a NULL-aware (left) anti join (NAAJ) e.g. col NOT IN expr
           // Construct the condition. A NULL in one of the conditions is regarded as a positive
@@ -103,7 +124,10 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
 
           // Note that will almost certainly be planned as a Broadcast Nested Loop join.
           // Use EXISTS if performance matters to you.
-          val inConditions = values.zip(sub.output).map(EqualTo.tupled)
+
+          // Deduplicate conflicting attributes if any.
+          val newSub = dedupSubqueryOnSelfJoin(p, sub, Some(values))
+          val inConditions = values.zip(newSub.output).map(EqualTo.tupled)
           val (joinCond, outerPlan) = rewriteExistentialExpr(inConditions, p)
           // Expand the NOT IN expression with the NULL-aware semantic
           // to its full form. That is from:
@@ -118,8 +142,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
           // will have the final conditions in the LEFT ANTI as
           // (A.A1 = B.B1 OR ISNULL(A.A1 = B.B1)) AND (B.B2 = A.A2) AND B.B3 > 1
           val finalJoinCond = (nullAwareJoinConds ++ conditions).reduceLeft(And)
-          // Deduplicate conflicting attributes if any.
-          dedupJoin(Join(outerPlan, sub, LeftAnti, Option(finalJoinCond)))
+          Join(outerPlan, newSub, LeftAnti, Option(finalJoinCond))
         case (p, predicate) =>
           val (newCond, inputPlan) = rewriteExistentialExpr(Seq(predicate), p)
           Project(p.output, Filter(newCond.get, inputPlan))
@@ -140,16 +163,16 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
       e transformUp {
         case Exists(sub, conditions, _) =>
           val exists = AttributeReference("exists", BooleanType, nullable = false)()
-          // Deduplicate conflicting attributes if any.
-          newPlan = dedupJoin(
-            Join(newPlan, sub, ExistenceJoin(exists), conditions.reduceLeftOption(And)))
+          newPlan =
+            buildJoin(newPlan, sub, ExistenceJoin(exists), conditions.reduceLeftOption(And))
           exists
         case InSubquery(values, ListQuery(sub, conditions, _, _)) =>
           val exists = AttributeReference("exists", BooleanType, nullable = false)()
-          val inConditions = values.zip(sub.output).map(EqualTo.tupled)
-          val newConditions = (inConditions ++ conditions).reduceLeftOption(And)
           // Deduplicate conflicting attributes if any.
-          newPlan = dedupJoin(Join(newPlan, sub, ExistenceJoin(exists), newConditions))
+          val newSub = dedupSubqueryOnSelfJoin(newPlan, sub, Some(values))
+          val inConditions = values.zip(newSub.output).map(EqualTo.tupled)
+          val newConditions = (inConditions ++ conditions).reduceLeftOption(And)
+          newPlan = Join(newPlan, newSub, ExistenceJoin(exists), newConditions)
           exists
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 5088821ad736..c95c52f1d3a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort}
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types._
 
 class SubquerySuite extends QueryTest with SharedSQLContext {
   import testImplicits._
@@ -1280,4 +1281,40 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
       assert(subqueries.length == 1)
     }
   }
+
+  test("SPARK-26078: deduplicate fake self joins for IN subqueries") {
+    withTempView("a", "b") {
+      Seq("a" -> 2, "b" -> 1).toDF("id", "num").createTempView("a")
+      Seq("a" -> 2, "b" -> 1).toDF("id", "num").createTempView("b")
+
+      val df1 = spark.sql(
+        """
+          |SELECT id,num,source FROM (
+          |  SELECT id, num, 'a' as source FROM a
+          |  UNION ALL
+          |  SELECT id, num, 'b' as source FROM b
+          |) AS c WHERE c.id IN (SELECT id FROM b WHERE num = 2)
+        """.stripMargin)
+      checkAnswer(df1, Seq(Row("a", 2, "a"), Row("a", 2, "b")))
+      val df2 = spark.sql(
+        """
+          |SELECT id,num,source FROM (
+          |  SELECT id, num, 'a' as source FROM a
+          |  UNION ALL
+          |  SELECT id, num, 'b' as source FROM b
+          |) AS c WHERE c.id NOT IN (SELECT id FROM b WHERE num = 2)
+        """.stripMargin)
+      checkAnswer(df2, Seq(Row("b", 1, "a"), Row("b", 1, "b")))
+      val df3 = spark.sql(
+        """
+          |SELECT id,num,source FROM (
+          |  SELECT id, num, 'a' as source FROM a
+          |  UNION ALL
+          |  SELECT id, num, 'b' as source FROM b
+          |) AS c WHERE c.id IN (SELECT id FROM b WHERE num = 2) OR
+          |c.id IN (SELECT id FROM b WHERE num = 3)
+        """.stripMargin)
+      checkAnswer(df3, Seq(Row("a", 2, "a"), Row("a", 2, "b")))
+    }
+  }
 }

From e3e33d8794da5f3597b8d706b734af5025360939 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Sun, 16 Dec 2018 11:02:00 +0800
Subject: [PATCH 0143/1072] [SPARK-26372][SQL] Don't reuse value from previous
 row when parsing bad CSV input field

## What changes were proposed in this pull request?

CSV parsing accidentally uses the previous good value for a bad input field. See example in Jira.

This PR ensures that the associated column is set to null when an input field cannot be converted.

## How was this patch tested?

Added new test.
Ran all SQL unit tests (testOnly org.apache.spark.sql.*).
Ran pyspark tests for pyspark-sql

Closes #23323 from bersprockets/csv-bad-field.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/csv/UnivocityParser.scala    |  1 +
 .../resources/test-data/bad_after_good.csv    |  2 ++
 .../execution/datasources/csv/CSVSuite.scala  | 19 +++++++++++++++++++
 3 files changed, 22 insertions(+)
 create mode 100644 sql/core/src/test/resources/test-data/bad_after_good.csv

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index ed089120055e..82a5b3c302b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -239,6 +239,7 @@ class UnivocityParser(
         } catch {
           case NonFatal(e) =>
             badRecordException = badRecordException.orElse(Some(e))
+            row.setNullAt(i)
         }
         i += 1
       }
diff --git a/sql/core/src/test/resources/test-data/bad_after_good.csv b/sql/core/src/test/resources/test-data/bad_after_good.csv
new file mode 100644
index 000000000000..4621a7d23714
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/bad_after_good.csv
@@ -0,0 +1,2 @@
+"good record",1999-08-01
+"bad record",1999-088-01
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 3b977d74053e..d9e5d7af1967 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -63,6 +63,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
   private val datesFile = "test-data/dates.csv"
   private val unescapedQuotesFile = "test-data/unescaped-quotes.csv"
   private val valueMalformedFile = "test-data/value-malformed.csv"
+  private val badAfterGoodFile = "test-data/bad_after_good.csv"
 
   /** Verifies data and schema. */
   private def verifyCars(
@@ -2012,4 +2013,22 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
       assert(!files.exists(_.getName.endsWith("csv")))
     }
   }
+
+  test("Do not reuse last good value for bad input field") {
+    val schema = StructType(
+      StructField("col1", StringType) ::
+      StructField("col2", DateType) ::
+      Nil
+    )
+    val rows = spark.read
+      .schema(schema)
+      .format("csv")
+      .load(testFile(badAfterGoodFile))
+
+    val expectedRows = Seq(
+      Row("good record", java.sql.Date.valueOf("1999-08-01")),
+      Row("bad record", null))
+
+    checkAnswer(rows, expectedRows)
+  }
 }

From 5217f7b2263c7aaeadf60ef602776bb3777269cd Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 17 Dec 2018 08:24:51 +0800
Subject: [PATCH 0144/1072] [SPARK-26248][SQL] Infer date type from CSV

## What changes were proposed in this pull request?

The `CSVInferSchema` class is extended to support inferring of `DateType` from CSV input. The attempt to infer `DateType` is performed after inferring `TimestampType`.

## How was this patch tested?

Added new test for inferring date types from CSV . It was also tested by existing suites like `CSVInferSchemaSuite`, `CsvExpressionsSuite`, `CsvFunctionsSuite` and `CsvSuite`.

Closes #23202 from MaxGekk/csv-date-inferring.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/csv/CSVInferSchema.scala     | 20 +++++++++++++++----
 .../catalyst/csv/CSVInferSchemaSuite.scala    | 18 +++++++++++++++++
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 35ade136cc60..11f3740d99a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -22,16 +22,20 @@ import scala.util.control.Exception.allCatch
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
-import org.apache.spark.sql.catalyst.util.TimestampFormatter
+import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
 import org.apache.spark.sql.types._
 
 class CSVInferSchema(val options: CSVOptions) extends Serializable {
 
   @transient
-  private lazy val timestampParser = TimestampFormatter(
+  private lazy val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.timeZone,
     options.locale)
+  @transient
+  private lazy val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.locale)
 
   private val decimalParser = {
     ExprUtils.getDecimalParser(options.locale)
@@ -104,6 +108,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
           compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
         case DoubleType => tryParseDouble(field)
         case TimestampType => tryParseTimestamp(field)
+        case DateType => tryParseDate(field)
         case BooleanType => tryParseBoolean(field)
         case StringType => StringType
         case other: DataType =>
@@ -159,9 +164,16 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   }
 
   private def tryParseTimestamp(field: String): DataType = {
-    // This case infers a custom `dataFormat` is set.
-    if ((allCatch opt timestampParser.parse(field)).isDefined) {
+    if ((allCatch opt timestampFormatter.parse(field)).isDefined) {
       TimestampType
+    } else {
+      tryParseDate(field)
+    }
+  }
+
+  private def tryParseDate(field: String): DataType = {
+    if ((allCatch opt dateFormatter.parse(field)).isDefined) {
+      DateType
     } else {
       tryParseBoolean(field)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index c2b525ad1a9f..84b2e616a442 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -187,4 +187,22 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
 
     Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
   }
+
+  test("inferring date type") {
+    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd"), false, "GMT")
+    var inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
+
+    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy"), false, "GMT")
+    inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
+
+    options = new CSVOptions(
+      Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"),
+      columnPruning = false,
+      defaultTimeZoneId = "GMT")
+    inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(NullType, "2018-12-03T11:00:00") == TimestampType)
+    assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
+  }
 }

From e408e05322ac4e31de4d9bc58687c86882e3944a Mon Sep 17 00:00:00 2001
From: Keiji Yoshida <kjmrknsn@gmail.com>
Date: Sun, 16 Dec 2018 17:11:58 -0800
Subject: [PATCH 0145/1072] [MINOR][DOCS] Fix the "not found: value Row" error
 on the "programmatic_schema" example

## What changes were proposed in this pull request?

Print `import org.apache.spark.sql.Row` of `SparkSQLExample.scala` on the `programmatic_schema` example to fix the `not found: value Row` error on it.

```
scala> val rowRDD = peopleRDD.map(_.split(",")).map(attributes => Row(attributes(0), attributes(1).trim))
<console>:28: error: not found: value Row
       val rowRDD = peopleRDD.map(_.split(",")).map(attributes => Row(attributes(0), attributes(1).trim))
```

## How was this patch tested?

NA

Closes #23326 from kjmrknsn/fix-sql-getting-started.

Authored-by: Keiji Yoshida <kjmrknsn@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/examples/sql/SparkSQLExample.scala   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
index 958361a6684c..678cbc64aff1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
@@ -16,7 +16,9 @@
  */
 package org.apache.spark.examples.sql
 
+// $example on:programmatic_schema$
 import org.apache.spark.sql.Row
+// $example off:programmatic_schema$
 // $example on:init_session$
 import org.apache.spark.sql.SparkSession
 // $example off:init_session$

From db1c5b1839598eada81e4709ab4d25e799bb1810 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 17 Dec 2018 11:53:14 +0800
Subject: [PATCH 0146/1072] Revert "[SPARK-26248][SQL] Infer date type from
 CSV"

This reverts commit 5217f7b2263c7aaeadf60ef602776bb3777269cd.
---
 .../sql/catalyst/csv/CSVInferSchema.scala     | 20 ++++---------------
 .../catalyst/csv/CSVInferSchemaSuite.scala    | 18 -----------------
 2 files changed, 4 insertions(+), 34 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 11f3740d99a7..35ade136cc60 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -22,20 +22,16 @@ import scala.util.control.Exception.allCatch
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
-import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.TimestampFormatter
 import org.apache.spark.sql.types._
 
 class CSVInferSchema(val options: CSVOptions) extends Serializable {
 
   @transient
-  private lazy val timestampFormatter = TimestampFormatter(
+  private lazy val timestampParser = TimestampFormatter(
     options.timestampFormat,
     options.timeZone,
     options.locale)
-  @transient
-  private lazy val dateFormatter = DateFormatter(
-    options.dateFormat,
-    options.locale)
 
   private val decimalParser = {
     ExprUtils.getDecimalParser(options.locale)
@@ -108,7 +104,6 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
           compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
         case DoubleType => tryParseDouble(field)
         case TimestampType => tryParseTimestamp(field)
-        case DateType => tryParseDate(field)
         case BooleanType => tryParseBoolean(field)
         case StringType => StringType
         case other: DataType =>
@@ -164,16 +159,9 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   }
 
   private def tryParseTimestamp(field: String): DataType = {
-    if ((allCatch opt timestampFormatter.parse(field)).isDefined) {
+    // This case infers a custom `dataFormat` is set.
+    if ((allCatch opt timestampParser.parse(field)).isDefined) {
       TimestampType
-    } else {
-      tryParseDate(field)
-    }
-  }
-
-  private def tryParseDate(field: String): DataType = {
-    if ((allCatch opt dateFormatter.parse(field)).isDefined) {
-      DateType
     } else {
       tryParseBoolean(field)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index 84b2e616a442..c2b525ad1a9f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -187,22 +187,4 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
 
     Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
   }
-
-  test("inferring date type") {
-    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd"), false, "GMT")
-    var inferSchema = new CSVInferSchema(options)
-    assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
-
-    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy"), false, "GMT")
-    inferSchema = new CSVInferSchema(options)
-    assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
-
-    options = new CSVOptions(
-      Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"),
-      columnPruning = false,
-      defaultTimeZoneId = "GMT")
-    inferSchema = new CSVInferSchema(options)
-    assert(inferSchema.inferField(NullType, "2018-12-03T11:00:00") == TimestampType)
-    assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
-  }
 }

From 56448c662398f4c5319a337e6601450270a6a27c Mon Sep 17 00:00:00 2001
From: Kris Mok <rednaxelafx@gmail.com>
Date: Mon, 17 Dec 2018 13:41:20 +0800
Subject: [PATCH 0147/1072] [SPARK-26352][SQL] join reorder should not change
 the order of output attributes

## What changes were proposed in this pull request?

The optimizer rule `org.apache.spark.sql.catalyst.optimizer.ReorderJoin` performs join reordering on inner joins. This was introduced from SPARK-12032 (https://github.com/apache/spark/pull/10073) in 2015-12.

After it had reordered the joins, though, it didn't check whether or not the output attribute order is still the same as before. Thus, it's possible to have a mismatch between the reordered output attributes order vs the schema that a DataFrame thinks it has.
The same problem exists in the CBO version of join reordering (`CostBasedJoinReorder`) too.

This can be demonstrated with the example:
```scala
spark.sql("create table table_a (x int, y int) using parquet")
spark.sql("create table table_b (i int, j int) using parquet")
spark.sql("create table table_c (a int, b int) using parquet")
val df = spark.sql("""
  with df1 as (select * from table_a cross join table_b)
  select * from df1 join table_c on a = x and b = i
""")
```
here's what the DataFrame thinks:
```
scala> df.printSchema
root
 |-- x: integer (nullable = true)
 |-- y: integer (nullable = true)
 |-- i: integer (nullable = true)
 |-- j: integer (nullable = true)
 |-- a: integer (nullable = true)
 |-- b: integer (nullable = true)
```
here's what the optimized plan thinks, after join reordering:
```
scala> df.queryExecution.optimizedPlan.output.foreach(a => println(s"|-- ${a.name}: ${a.dataType.typeName}"))
|-- x: integer
|-- y: integer
|-- a: integer
|-- b: integer
|-- i: integer
|-- j: integer
```

If we exclude the `ReorderJoin` rule (using Spark 2.4's optimizer rule exclusion feature), it's back to normal:
```
scala> spark.conf.set("spark.sql.optimizer.excludedRules", "org.apache.spark.sql.catalyst.optimizer.ReorderJoin")

scala> val df = spark.sql("with df1 as (select * from table_a cross join table_b) select * from df1 join table_c on a = x and b = i")
df: org.apache.spark.sql.DataFrame = [x: int, y: int ... 4 more fields]

scala> df.queryExecution.optimizedPlan.output.foreach(a => println(s"|-- ${a.name}: ${a.dataType.typeName}"))
|-- x: integer
|-- y: integer
|-- i: integer
|-- j: integer
|-- a: integer
|-- b: integer
```

Note that this output attribute ordering problem leads to data corruption, and can manifest itself in various symptoms:
* Silently corrupting data, if the reordered columns happen to either have matching types or have sufficiently-compatible types (e.g. all fixed length primitive types are considered as "sufficiently compatible" in an `UnsafeRow`), then only the resulting data is going to be wrong but it might not trigger any alarms immediately. Or
* Weird Java-level exceptions like `java.lang.NegativeArraySizeException`, or even SIGSEGVs.

## How was this patch tested?

Added new unit test in `JoinReorderSuite` and new end-to-end test in `JoinSuite`.
Also made `JoinReorderSuite` and `StarJoinReorderSuite` assert more strongly on maintaining output attribute order.

Closes #23303 from rednaxelafx/fix-join-reorder.

Authored-by: Kris Mok <rednaxelafx@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/CostBasedJoinReorder.scala      | 10 +++++
 .../spark/sql/catalyst/optimizer/joins.scala  | 12 +++++-
 .../optimizer/JoinOptimizationSuite.scala     |  3 ++
 .../catalyst/optimizer/JoinReorderSuite.scala | 38 +++++++++++++++++--
 .../StarJoinCostBasedReorderSuite.scala       | 21 +++++++++-
 .../optimizer/StarJoinReorderSuite.scala      | 28 ++++++++++++--
 .../org/apache/spark/sql/JoinSuite.scala      | 14 +++++++
 7 files changed, 116 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 064ca68b7a62..01634a9d852c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -48,6 +48,7 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
           if projectList.forall(_.isInstanceOf[Attribute]) =>
           reorder(p, p.output)
       }
+
       // After reordering is finished, convert OrderedJoin back to Join
       result transformDown {
         case OrderedJoin(left, right, jt, cond) => Join(left, right, jt, cond)
@@ -175,11 +176,20 @@ object JoinReorderDP extends PredicateHelper with Logging {
         assert(topOutputSet == p.outputSet)
         // Keep the same order of final output attributes.
         p.copy(projectList = output)
+      case finalPlan if !sameOutput(finalPlan, output) =>
+        Project(output, finalPlan)
       case finalPlan =>
         finalPlan
     }
   }
 
+  private def sameOutput(plan: LogicalPlan, expectedOutput: Seq[Attribute]): Boolean = {
+    val thisOutput = plan.output
+    thisOutput.length == expectedOutput.length && thisOutput.zip(expectedOutput).forall {
+      case (a1, a2) => a1.semanticEquals(a2)
+    }
+  }
+
   /** Find all possible plans at the next level, based on existing levels. */
   private def searchLevel(
       existingLevels: Seq[JoinPlanMap],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index 6ebb194d71c2..0b6471289a47 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -86,9 +86,9 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case ExtractFiltersAndInnerJoins(input, conditions)
+    case p @ ExtractFiltersAndInnerJoins(input, conditions)
         if input.size > 2 && conditions.nonEmpty =>
-      if (SQLConf.get.starSchemaDetection && !SQLConf.get.cboEnabled) {
+      val reordered = if (SQLConf.get.starSchemaDetection && !SQLConf.get.cboEnabled) {
         val starJoinPlan = StarSchemaDetection.reorderStarJoins(input, conditions)
         if (starJoinPlan.nonEmpty) {
           val rest = input.filterNot(starJoinPlan.contains(_))
@@ -99,6 +99,14 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
       } else {
         createOrderedJoin(input, conditions)
       }
+
+      if (p.sameOutput(reordered)) {
+        reordered
+      } else {
+        // Reordering the joins have changed the order of the columns.
+        // Inject a projection to make sure we restore to the expected ordering.
+        Project(p.output, reordered)
+      }
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index ccd9d8dd4d21..e9438b2eee55 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -102,16 +102,19 @@ class JoinOptimizationSuite extends PlanTest {
         x.join(y).join(z).where(("x.b".attr === "z.b".attr) && ("y.d".attr === "z.a".attr)),
         x.join(z, condition = Some("x.b".attr === "z.b".attr))
           .join(y, condition = Some("y.d".attr === "z.a".attr))
+          .select(Seq("x.a", "x.b", "x.c", "y.d", "z.a", "z.b", "z.c").map(_.attr): _*)
       ),
       (
         x.join(y, Cross).join(z, Cross)
           .where(("x.b".attr === "z.b".attr) && ("y.d".attr === "z.a".attr)),
         x.join(z, Cross, Some("x.b".attr === "z.b".attr))
           .join(y, Cross, Some("y.d".attr === "z.a".attr))
+          .select(Seq("x.a", "x.b", "x.c", "y.d", "z.a", "z.b", "z.c").map(_.attr): _*)
       ),
       (
         x.join(y, Inner).join(z, Cross).where("x.b".attr === "z.a".attr),
         x.join(z, Cross, Some("x.b".attr === "z.a".attr)).join(y, Inner)
+          .select(Seq("x.a", "x.b", "x.c", "y.d", "z.a", "z.b", "z.c").map(_.attr): _*)
       )
     )
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
index 565b0a10154a..c94a8b9e318f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
-import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.{Cross, Inner, PlanTest}
+import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.statsEstimation.{StatsEstimationTestBase, StatsTestPlan}
 import org.apache.spark.sql.internal.SQLConf.{CBO_ENABLED, JOIN_REORDER_ENABLED}
@@ -124,7 +124,8 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
     // the original order (t1 J t2) J t3.
     val bestPlan =
       t1.join(t3, Inner, Some(nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
-      .join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .select(outputsOf(t1, t2, t3): _*)
 
     assertEqualPlans(originalPlan, bestPlan)
   }
@@ -139,7 +140,9 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
     val bestPlan =
       t1.join(t3, Inner, Some(nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
         .join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .select(outputsOf(t1, t2, t3): _*) // this is redundant but we'll take it for now
         .join(t4)
+        .select(outputsOf(t1, t2, t4, t3): _*)
 
     assertEqualPlans(originalPlan, bestPlan)
   }
@@ -202,6 +205,7 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
       t1.join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
         .join(t4.join(t3, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))),
           Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
+        .select(outputsOf(t1, t4, t2, t3): _*)
 
     assertEqualPlans(originalPlan, bestPlan)
   }
@@ -219,6 +223,23 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
     }
   }
 
+  test("SPARK-26352: join reordering should not change the order of attributes") {
+    // This test case does not rely on CBO.
+    // It's similar to the test case above, but catches a reordering bug that the one above doesn't
+    val tab1 = LocalRelation('x.int, 'y.int)
+    val tab2 = LocalRelation('i.int, 'j.int)
+    val tab3 = LocalRelation('a.int, 'b.int)
+    val original =
+      tab1.join(tab2, Cross)
+          .join(tab3, Inner, Some('a === 'x && 'b === 'i))
+    val expected =
+      tab1.join(tab3, Inner, Some('a === 'x))
+          .join(tab2, Cross, Some('b === 'i))
+          .select(outputsOf(tab1, tab2, tab3): _*)
+
+    assertEqualPlans(original, expected)
+  }
+
   test("reorder recursively") {
     // Original order:
     //          Join
@@ -266,8 +287,17 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
   private def assertEqualPlans(
       originalPlan: LogicalPlan,
       groundTruthBestPlan: LogicalPlan): Unit = {
-    val optimized = Optimize.execute(originalPlan.analyze)
+    val analyzed = originalPlan.analyze
+    val optimized = Optimize.execute(analyzed)
     val expected = groundTruthBestPlan.analyze
+
+    assert(analyzed.sameOutput(expected)) // if this fails, the expected plan itself is incorrect
+    assert(analyzed.sameOutput(optimized))
+
     compareJoinOrder(optimized, expected)
   }
+
+  private def outputsOf(plans: LogicalPlan*): Seq[Attribute] = {
+    plans.map(_.output).reduce(_ ++ _)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
index d4d23ad69b2c..baae934e1e4f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
@@ -218,6 +218,7 @@ class StarJoinCostBasedReorderSuite extends PlanTest with StatsEstimationTestBas
         .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk")))
         .join(t2, Inner, Some(nameToAttr("f1_c2") === nameToAttr("t2_c1")))
         .join(t1, Inner, Some(nameToAttr("f1_c1") === nameToAttr("t1_c1")))
+        .select(outputsOf(f1, t1, t2, d1, d2): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -256,6 +257,7 @@ class StarJoinCostBasedReorderSuite extends PlanTest with StatsEstimationTestBas
         .join(t3.join(t2, Inner, Some(nameToAttr("t2_c2") === nameToAttr("t3_c1"))), Inner,
           Some(nameToAttr("d1_c2") === nameToAttr("t2_c1")))
         .join(t1, Inner, Some(nameToAttr("t1_c1") === nameToAttr("f1_c1")))
+        .select(outputsOf(d1, t1, t2, f1, d2, t3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -297,6 +299,7 @@ class StarJoinCostBasedReorderSuite extends PlanTest with StatsEstimationTestBas
           Some(nameToAttr("t3_c1") === nameToAttr("t4_c1")))
         .join(t1.join(t2, Inner, Some(nameToAttr("t1_c1") === nameToAttr("t2_c1"))), Inner,
           Some(nameToAttr("t1_c2") === nameToAttr("t4_c2")))
+        .select(outputsOf(d1, t1, t2, t3, t4, f1, d2): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -347,6 +350,7 @@ class StarJoinCostBasedReorderSuite extends PlanTest with StatsEstimationTestBas
           Some(nameToAttr("d3_c2") === nameToAttr("t1_c1")))
         .join(t5.join(t6, Inner, Some(nameToAttr("t5_c2") === nameToAttr("t6_c2"))), Inner,
           Some(nameToAttr("d2_c2") === nameToAttr("t5_c1")))
+        .select(outputsOf(d1, t3, t4, f1, d2, t5, t6, d3, t1, t2): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -375,6 +379,7 @@ class StarJoinCostBasedReorderSuite extends PlanTest with StatsEstimationTestBas
       f1.join(d3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("d3_pk")))
         .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk")))
         .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk")))
+        .select(outputsOf(d1, d2, f1, d3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -400,13 +405,27 @@ class StarJoinCostBasedReorderSuite extends PlanTest with StatsEstimationTestBas
       f1.join(t3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("t3_c1")))
         .join(t2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("t2_c1")))
         .join(t1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("t1_c1")))
+        .select(outputsOf(t1, f1, t2, t3): _*)
 
     assertEqualPlans(query, expected)
   }
 
   private def assertEqualPlans( plan1: LogicalPlan, plan2: LogicalPlan): Unit = {
-    val optimized = Optimize.execute(plan1.analyze)
+    val analyzed = plan1.analyze
+    val optimized = Optimize.execute(analyzed)
     val expected = plan2.analyze
+
+    assert(equivalentOutput(analyzed, expected)) // if this fails, the expected itself is incorrect
+    assert(equivalentOutput(analyzed, optimized))
+
     compareJoinOrder(optimized, expected)
   }
+
+  private def outputsOf(plans: LogicalPlan*): Seq[Attribute] = {
+    plans.map(_.output).reduce(_ ++ _)
+  }
+
+  private def equivalentOutput(plan1: LogicalPlan, plan2: LogicalPlan): Boolean = {
+    normalizeExprIds(plan1).output == normalizeExprIds(plan2).output
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinReorderSuite.scala
index 4e0883e91e84..9dc653b9d6c4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinReorderSuite.scala
@@ -182,6 +182,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk1")))
         .join(d3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("d3_pk1")))
         .join(s3, Inner, Some(nameToAttr("d3_fk1") === nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, d2, f1, d3, s3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -220,6 +221,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("d3_pk1")))
         .join(d2, Inner, Some(nameToAttr("f1_fk2") < nameToAttr("d2_pk1")))
         .join(s3, Inner, Some(nameToAttr("d3_fk1") === nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, f1, d2, s3, d3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -255,7 +257,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d3, Inner, Some(nameToAttr("d3_fk1") === nameToAttr("s3_pk1")))
         .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk1")))
         .join(s3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("s3_c2")))
-
+        .select(outputsOf(d1, f1, d2, s3, d3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -292,6 +294,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("d3_pk1")))
         .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_c2")))
         .join(s3, Inner, Some(nameToAttr("d3_fk1") < nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, f1, d2, s3, d3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -395,6 +398,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d2.where(nameToAttr("d2_c2") === 2), Inner,
           Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk1")))
         .join(s3, Inner, Some(nameToAttr("f11_fk1") === nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, f11, f1, d2, s3): _*)
 
     assertEqualPlans(query, equivQuery)
   }
@@ -430,6 +434,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d2.where(nameToAttr("d2_c2") === 2), Inner,
           Some(nameToAttr("f1_fk2") === nameToAttr("d2_c4")))
         .join(s3, Inner, Some(nameToAttr("d3_fk1") === nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, d3, f1, d2, s3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -465,6 +470,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d2.where(nameToAttr("d2_c2") === 2), Inner,
           Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk1")))
         .join(s3, Inner, Some(nameToAttr("d3_fk1") === nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, d3, f1, d2, s3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -499,6 +505,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d2.where(nameToAttr("d2_c2") === 2),
           Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk1")))
         .join(s3, Inner, Some(nameToAttr("d3_fk1") === nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, d3, f1, d2, s3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -532,6 +539,7 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d3, Inner, Some(nameToAttr("f1_fk3") < nameToAttr("d3_pk1")))
         .join(d2, Inner, Some(nameToAttr("f1_fk2") < nameToAttr("d2_pk1")))
         .join(s3, Inner, Some(nameToAttr("d3_fk1") < nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, d3, f1, d2, s3): _*)
 
     assertEqualPlans(query, expected)
   }
@@ -565,13 +573,27 @@ class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(d3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("d3_pk1")))
         .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk1")))
         .join(s3, Inner, Some(nameToAttr("d3_fk1") === nameToAttr("s3_pk1")))
+        .select(outputsOf(d1, d3, f1, d2, s3): _*)
 
     assertEqualPlans(query, expected)
   }
 
-  private def assertEqualPlans( plan1: LogicalPlan, plan2: LogicalPlan): Unit = {
-    val optimized = Optimize.execute(plan1.analyze)
+  private def assertEqualPlans(plan1: LogicalPlan, plan2: LogicalPlan): Unit = {
+    val analyzed = plan1.analyze
+    val optimized = Optimize.execute(analyzed)
     val expected = plan2.analyze
+
+    assert(equivalentOutput(analyzed, expected)) // if this fails, the expected itself is incorrect
+    assert(equivalentOutput(analyzed, optimized))
+
     compareJoinOrder(optimized, expected)
   }
+
+  private def outputsOf(plans: LogicalPlan*): Seq[Attribute] = {
+    plans.map(_.output).reduce(_ ++ _)
+  }
+
+  private def equivalentOutput(plan1: LogicalPlan, plan2: LogicalPlan): Boolean = {
+    normalizeExprIds(plan1).output == normalizeExprIds(plan2).output
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index aa2162c9d2cd..91445c8d96d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -895,4 +895,18 @@ class JoinSuite extends QueryTest with SharedSQLContext {
       checkAnswer(res, Row(0, 0, 0))
     }
   }
+
+  test("SPARK-26352: join reordering should not change the order of columns") {
+    withTable("tab1", "tab2", "tab3") {
+      spark.sql("select 1 as x, 100 as y").write.saveAsTable("tab1")
+      spark.sql("select 42 as i, 200 as j").write.saveAsTable("tab2")
+      spark.sql("select 1 as a, 42 as b").write.saveAsTable("tab3")
+
+      val df = spark.sql("""
+        with tmp as (select * from tab1 cross join tab2)
+        select * from tmp join tab3 on a = x and b = i
+      """)
+      checkAnswer(df, Row(1, 100, 42, 200, 1, 42))
+    }
+  }
 }

From 5960a8297ca06a4c62f39a8821dba4ba172f2bfc Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sun, 16 Dec 2018 23:40:06 -0800
Subject: [PATCH 0148/1072] [SPARK-26327][SQL][FOLLOW-UP] Refactor the code and
 restore the metrics name

## What changes were proposed in this pull request?

- The original comment about `updateDriverMetrics` is not right.
- Refactor the code to ensure `selectedPartitions `  has been set before sending the driver-side metrics.
- Restore the original name, which is more general and extendable.

## How was this patch tested?
The existing tests.

Closes #23328 from gatorsmile/followupSpark-26142.

Authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/execution/DataSourceScanExec.scala    | 39 +++++++++----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index c0fa4e777b49..322ffffca564 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.{ArrayBuffer, HashMap}
 
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.{BlockLocation, FileStatus, LocatedFileStatus, Path}
@@ -167,14 +167,26 @@ case class FileSourceScanExec(
       partitionSchema = relation.partitionSchema,
       relation.sparkSession.sessionState.conf)
 
-  private var fileListingTime = 0L
+  val driverMetrics: HashMap[String, Long] = HashMap.empty
+
+  /**
+   * Send the driver-side metrics. Before calling this function, selectedPartitions has
+   * been initialized. See SPARK-26327 for more details.
+   */
+  private def sendDriverMetrics(): Unit = {
+    driverMetrics.foreach(e => metrics(e._1).add(e._2))
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
+      metrics.filter(e => driverMetrics.contains(e._1)).values.toSeq)
+  }
 
   @transient private lazy val selectedPartitions: Seq[PartitionDirectory] = {
     val optimizerMetadataTimeNs = relation.location.metadataOpsTimeNs.getOrElse(0L)
     val startTime = System.nanoTime()
     val ret = relation.location.listFiles(partitionFilters, dataFilters)
+    driverMetrics("numFiles") = ret.map(_.files.size.toLong).sum
     val timeTakenMs = ((System.nanoTime() - startTime) + optimizerMetadataTimeNs) / 1000 / 1000
-    fileListingTime = timeTakenMs
+    driverMetrics("metadataTime") = timeTakenMs
     ret
   }
 
@@ -286,8 +298,6 @@ case class FileSourceScanExec(
   }
 
   private lazy val inputRDD: RDD[InternalRow] = {
-    // Update metrics for taking effect in both code generation node and normal node.
-    updateDriverMetrics()
     val readFile: (PartitionedFile) => Iterator[InternalRow] =
       relation.fileFormat.buildReaderWithPartitionValues(
         sparkSession = relation.sparkSession,
@@ -298,12 +308,14 @@ case class FileSourceScanExec(
         options = relation.options,
         hadoopConf = relation.sparkSession.sessionState.newHadoopConfWithOptions(relation.options))
 
-    relation.bucketSpec match {
+    val readRDD = relation.bucketSpec match {
       case Some(bucketing) if relation.sparkSession.sessionState.conf.bucketingEnabled =>
         createBucketedReadRDD(bucketing, readFile, selectedPartitions, relation)
       case _ =>
         createNonBucketedReadRDD(readFile, selectedPartitions, relation)
     }
+    sendDriverMetrics()
+    readRDD
   }
 
   override def inputRDDs(): Seq[RDD[InternalRow]] = {
@@ -313,7 +325,7 @@ case class FileSourceScanExec(
   override lazy val metrics =
     Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
       "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of files"),
-      "fileListingTime" -> SQLMetrics.createMetric(sparkContext, "file listing time (ms)"),
+      "metadataTime" -> SQLMetrics.createMetric(sparkContext, "metadata time"),
       "scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time"))
 
   protected override def doExecute(): RDD[InternalRow] = {
@@ -504,19 +516,6 @@ case class FileSourceScanExec(
     }
   }
 
-  /**
-   * Send the updated metrics to driver, while this function calling, selectedPartitions has
-   * been initialized. See SPARK-26327 for more detail.
-   */
-  private def updateDriverMetrics() = {
-    metrics("numFiles").add(selectedPartitions.map(_.files.size.toLong).sum)
-    metrics("fileListingTime").add(fileListingTime)
-
-    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
-      metrics("numFiles") :: metrics("fileListingTime") :: Nil)
-  }
-
   override def doCanonicalize(): FileSourceScanExec = {
     FileSourceScanExec(
       relation,

From f6888f7c944daff3d7c88b37e883673866eb148e Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 17 Dec 2018 00:13:51 -0800
Subject: [PATCH 0149/1072] [SPARK-20636] Add the rule TransposeWindow to the
 optimization batch

## What changes were proposed in this pull request?

This PR is a follow-up of the PR https://github.com/apache/spark/pull/17899. It is to add the rule TransposeWindow the optimizer batch.

## How was this patch tested?
The existing tests.

Closes #23222 from gatorsmile/followupSPARK-20636.

Authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  1 +
 .../sql/DataFrameWindowFunctionsSuite.scala   | 38 +++++++++++++------
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f615757a837a..3eb6bca6ec97 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -73,6 +73,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
         CombineLimits,
         CombineUnions,
         // Constant folding and strength reduction
+        TransposeWindow,
         NullPropagation,
         ConstantPropagation,
         FoldablePropagation,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 9a5d5a9966ab..9277dc685924 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql
 import org.scalatest.Matchers.the
 
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
+import org.apache.spark.sql.catalyst.optimizer.TransposeWindow
+import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -668,18 +670,30 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
       ("S2", "P2", 300)
     ).toDF("sno", "pno", "qty")
 
-    val w1 = Window.partitionBy("sno")
-    val w2 = Window.partitionBy("sno", "pno")
-
-    checkAnswer(
-      df.select($"sno", $"pno", $"qty", sum($"qty").over(w2).alias("sum_qty_2"))
-        .select($"sno", $"pno", $"qty", col("sum_qty_2"), sum("qty").over(w1).alias("sum_qty_1")),
-      Seq(
-        Row("S1", "P1", 100, 800, 800),
-        Row("S1", "P1", 700, 800, 800),
-        Row("S2", "P1", 200, 200, 500),
-        Row("S2", "P2", 300, 300, 500)))
-
+    Seq(true, false).foreach { transposeWindowEnabled =>
+      val excludedRules = if (transposeWindowEnabled) "" else TransposeWindow.ruleName
+      withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> excludedRules) {
+        val w1 = Window.partitionBy("sno")
+        val w2 = Window.partitionBy("sno", "pno")
+
+        val select = df.select($"sno", $"pno", $"qty", sum($"qty").over(w2).alias("sum_qty_2"))
+          .select($"sno", $"pno", $"qty", col("sum_qty_2"), sum("qty").over(w1).alias("sum_qty_1"))
+
+        val expectedNumExchanges = if (transposeWindowEnabled) 1 else 2
+        val actualNumExchanges = select.queryExecution.executedPlan.collect {
+          case e: Exchange => e
+        }.length
+        assert(actualNumExchanges == expectedNumExchanges)
+
+        checkAnswer(
+          select,
+          Seq(
+            Row("S1", "P1", 100, 800, 800),
+            Row("S1", "P1", 700, 800, 800),
+            Row("S2", "P1", 200, 200, 500),
+            Row("S2", "P2", 300, 300, 500)))
+      }
+    }
   }
 
   test("NaN and -0.0 in window partition keys") {

From 12640d674b0af6716023fad30fe12cee728bfe34 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 17 Dec 2018 21:47:38 +0800
Subject: [PATCH 0150/1072] [SPARK-26243][SQL][FOLLOWUP] fix code style issues
 in TimestampFormatter.scala

## What changes were proposed in this pull request?

1. rename `FormatterUtils` to `DateTimeFormatterHelper`, and move it to a separated file
2. move `DateFormatter` and its implementation to a separated file
3. mark some methods as private
4. add `override` to some methods

## How was this patch tested?

existing tests

Closes #23329 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/util/DateFormatter.scala     |  96 +++++++++++++++
 .../util/DateTimeFormatterHelper.scala        |  44 +++++++
 .../catalyst/util/TimestampFormatter.scala    | 115 ++----------------
 .../spark/sql/util/DateFormatterSuite.scala   |  92 ++++++++++++++
 ...te.scala => TimestampFormatterSuite.scala} |  73 +----------
 5 files changed, 246 insertions(+), 174 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/util/{DateTimestampFormatterSuite.scala => TimestampFormatterSuite.scala} (66%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
new file mode 100644
index 000000000000..9e8d51cc65f0
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.time.{Instant, ZoneId}
+import java.util.Locale
+
+import scala.util.Try
+
+import org.apache.commons.lang3.time.FastDateFormat
+
+import org.apache.spark.sql.internal.SQLConf
+
+sealed trait DateFormatter {
+  def parse(s: String): Int // returns days since epoch
+  def format(days: Int): String
+}
+
+class Iso8601DateFormatter(
+    pattern: String,
+    locale: Locale) extends DateFormatter with DateTimeFormatterHelper {
+
+  private val formatter = buildFormatter(pattern, locale)
+  private val UTC = ZoneId.of("UTC")
+
+  private def toInstant(s: String): Instant = {
+    val temporalAccessor = formatter.parse(s)
+    toInstantWithZoneId(temporalAccessor, UTC)
+  }
+
+  override def parse(s: String): Int = {
+    val seconds = toInstant(s).getEpochSecond
+    val days = Math.floorDiv(seconds, DateTimeUtils.SECONDS_PER_DAY)
+    days.toInt
+  }
+
+  override def format(days: Int): String = {
+    val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
+    formatter.withZone(UTC).format(instant)
+  }
+}
+
+class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter {
+  private val format = FastDateFormat.getInstance(pattern, locale)
+
+  override def parse(s: String): Int = {
+    val milliseconds = format.parse(s).getTime
+    DateTimeUtils.millisToDays(milliseconds)
+  }
+
+  override def format(days: Int): String = {
+    val date = DateTimeUtils.toJavaDate(days)
+    format.format(date)
+  }
+}
+
+class LegacyFallbackDateFormatter(
+    pattern: String,
+    locale: Locale) extends LegacyDateFormatter(pattern, locale) {
+  override def parse(s: String): Int = {
+    Try(super.parse(s)).orElse {
+      // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
+      // compatibility.
+      Try(DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(s).getTime))
+    }.getOrElse {
+      // In Spark 1.5.0, we store the data as number of days since epoch in string.
+      // So, we just convert it to Int.
+      s.toInt
+    }
+  }
+}
+
+object DateFormatter {
+  def apply(format: String, locale: Locale): DateFormatter = {
+    if (SQLConf.get.legacyTimeParserEnabled) {
+      new LegacyFallbackDateFormatter(format, locale)
+    } else {
+      new Iso8601DateFormatter(format, locale)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
new file mode 100644
index 000000000000..b85101d38d9e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.time.{Instant, LocalDateTime, ZonedDateTime, ZoneId}
+import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder}
+import java.time.temporal.{ChronoField, TemporalAccessor}
+import java.util.Locale
+
+trait DateTimeFormatterHelper {
+
+  protected def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
+    new DateTimeFormatterBuilder()
+      .appendPattern(pattern)
+      .parseDefaulting(ChronoField.YEAR_OF_ERA, 1970)
+      .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
+      .parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
+      .parseDefaulting(ChronoField.HOUR_OF_DAY, 0)
+      .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
+      .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
+      .toFormatter(locale)
+  }
+
+  protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = {
+    val localDateTime = LocalDateTime.from(temporalAccessor)
+    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
+    Instant.from(zonedDateTime)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 2b8d22dde926..eb1303303463 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.catalyst.util
 
 import java.time._
-import java.time.format.DateTimeFormatterBuilder
-import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
+import java.time.temporal.TemporalQueries
 import java.util.{Locale, TimeZone}
 
 import scala.util.Try
@@ -33,39 +32,16 @@ sealed trait TimestampFormatter {
   def format(us: Long): String
 }
 
-trait FormatterUtils {
-  protected def zoneId: ZoneId
-  protected def buildFormatter(
-      pattern: String,
-      locale: Locale): java.time.format.DateTimeFormatter = {
-    new DateTimeFormatterBuilder()
-      .appendPattern(pattern)
-      .parseDefaulting(ChronoField.YEAR_OF_ERA, 1970)
-      .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
-      .parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
-      .parseDefaulting(ChronoField.HOUR_OF_DAY, 0)
-      .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
-      .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
-      .toFormatter(locale)
-  }
-  protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor): java.time.Instant = {
-    val localDateTime = LocalDateTime.from(temporalAccessor)
-    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
-    Instant.from(zonedDateTime)
-  }
-}
-
 class Iso8601TimestampFormatter(
     pattern: String,
     timeZone: TimeZone,
-    locale: Locale) extends TimestampFormatter with FormatterUtils {
-  val zoneId = timeZone.toZoneId
-  val formatter = buildFormatter(pattern, locale)
+    locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
+  private val formatter = buildFormatter(pattern, locale)
 
-  def toInstant(s: String): Instant = {
+  private def toInstant(s: String): Instant = {
     val temporalAccessor = formatter.parse(s)
     if (temporalAccessor.query(TemporalQueries.offset()) == null) {
-      toInstantWithZoneId(temporalAccessor)
+      toInstantWithZoneId(temporalAccessor, timeZone.toZoneId)
     } else {
       Instant.from(temporalAccessor)
     }
@@ -77,9 +53,9 @@ class Iso8601TimestampFormatter(
     result
   }
 
-  def parse(s: String): Long = instantToMicros(toInstant(s))
+  override def parse(s: String): Long = instantToMicros(toInstant(s))
 
-  def format(us: Long): String = {
+  override def format(us: Long): String = {
     val secs = Math.floorDiv(us, DateTimeUtils.MICROS_PER_SECOND)
     val mos = Math.floorMod(us, DateTimeUtils.MICROS_PER_SECOND)
     val instant = Instant.ofEpochSecond(secs, mos * DateTimeUtils.NANOS_PER_MICROS)
@@ -92,13 +68,13 @@ class LegacyTimestampFormatter(
     pattern: String,
     timeZone: TimeZone,
     locale: Locale) extends TimestampFormatter {
-  val format = FastDateFormat.getInstance(pattern, timeZone, locale)
+  private val format = FastDateFormat.getInstance(pattern, timeZone, locale)
 
   protected def toMillis(s: String): Long = format.parse(s).getTime
 
-  def parse(s: String): Long = toMillis(s) * DateTimeUtils.MICROS_PER_MILLIS
+  override def parse(s: String): Long = toMillis(s) * DateTimeUtils.MICROS_PER_MILLIS
 
-  def format(us: Long): String = {
+  override def format(us: Long): String = {
     format.format(DateTimeUtils.toJavaTimestamp(us))
   }
 }
@@ -121,74 +97,3 @@ object TimestampFormatter {
     }
   }
 }
-
-sealed trait DateFormatter {
-  def parse(s: String): Int // returns days since epoch
-  def format(days: Int): String
-}
-
-class Iso8601DateFormatter(
-    pattern: String,
-    locale: Locale) extends DateFormatter with FormatterUtils {
-
-  val zoneId = ZoneId.of("UTC")
-
-  val formatter = buildFormatter(pattern, locale)
-
-  def toInstant(s: String): Instant = {
-    val temporalAccessor = formatter.parse(s)
-    toInstantWithZoneId(temporalAccessor)
-  }
-
-  override def parse(s: String): Int = {
-    val seconds = toInstant(s).getEpochSecond
-    val days = Math.floorDiv(seconds, DateTimeUtils.SECONDS_PER_DAY)
-
-    days.toInt
-  }
-
-  override def format(days: Int): String = {
-    val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
-    formatter.withZone(zoneId).format(instant)
-  }
-}
-
-class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter {
-  val format = FastDateFormat.getInstance(pattern, locale)
-
-  def parse(s: String): Int = {
-    val milliseconds = format.parse(s).getTime
-    DateTimeUtils.millisToDays(milliseconds)
-  }
-
-  def format(days: Int): String = {
-    val date = DateTimeUtils.toJavaDate(days)
-    format.format(date)
-  }
-}
-
-class LegacyFallbackDateFormatter(
-    pattern: String,
-    locale: Locale) extends LegacyDateFormatter(pattern, locale) {
-  override def parse(s: String): Int = {
-    Try(super.parse(s)).orElse {
-      // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
-      // compatibility.
-      Try(DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(s).getTime))
-    }.getOrElse {
-      // In Spark 1.5.0, we store the data as number of days since epoch in string.
-      // So, we just convert it to Int.
-      s.toInt
-    }
-  }
-}
-
-object DateFormatter {
-  def apply(format: String, locale: Locale): DateFormatter = {
-    if (SQLConf.get.legacyTimeParserEnabled) {
-      new LegacyFallbackDateFormatter(format, locale)
-    } else {
-      new Iso8601DateFormatter(format, locale)
-    }
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
new file mode 100644
index 000000000000..019615b81101
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util
+
+import java.util.Locale
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.internal.SQLConf
+
+class DateFormatterSuite extends SparkFunSuite with SQLHelper {
+  test("parsing dates") {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+        val daysSinceEpoch = formatter.parse("2018-12-02")
+        assert(daysSinceEpoch === 17867)
+      }
+    }
+  }
+
+  test("format dates") {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+        val date = formatter.format(17867)
+        assert(date === "2018-12-02")
+      }
+    }
+  }
+
+  test("roundtrip date -> days -> date") {
+    Seq(
+      "0050-01-01",
+      "0953-02-02",
+      "1423-03-08",
+      "1969-12-31",
+      "1972-08-25",
+      "1975-09-26",
+      "2018-12-12",
+      "2038-01-01",
+      "5010-11-17").foreach { date =>
+      DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+          val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+          val days = formatter.parse(date)
+          val formatted = formatter.format(days)
+          assert(date === formatted)
+        }
+      }
+    }
+  }
+
+  test("roundtrip days -> date -> days") {
+    Seq(
+      -701265,
+      -371419,
+      -199722,
+      -1,
+      0,
+      967,
+      2094,
+      17877,
+      24837,
+      1110657).foreach { days =>
+      DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+          val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+          val date = formatter.format(days)
+          val parsed = formatter.parse(date)
+          assert(days === parsed)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
similarity index 66%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimestampFormatterSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index 43e348c7eebf..c110ffa01f73 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateTimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -21,19 +21,9 @@ import java.util.{Locale, TimeZone}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, TimestampFormatter}
 
-class DateTimestampFormatterSuite extends SparkFunSuite with SQLHelper {
-  test("parsing dates") {
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-        val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
-        val daysSinceEpoch = formatter.parse("2018-12-02")
-        assert(daysSinceEpoch === 17867)
-      }
-    }
-  }
+class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
 
   test("parsing timestamps using time zones") {
     val localDate = "2018-12-02T10:11:12.001234"
@@ -56,16 +46,6 @@ class DateTimestampFormatterSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
-  test("format dates") {
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-        val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
-        val date = formatter.format(17867)
-        assert(date === "2018-12-02")
-      }
-    }
-  }
-
   test("format timestamps using time zones") {
     val microsSinceEpoch = 1543745472001234L
     val expectedTimestamp = Map(
@@ -87,7 +67,7 @@ class DateTimestampFormatterSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
-  test("roundtrip timestamp -> micros -> timestamp using timezones") {
+  test("roundtrip micros -> timestamp -> micros using timezones") {
     Seq(
       -58710115316212000L,
       -18926315945345679L,
@@ -107,7 +87,7 @@ class DateTimestampFormatterSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
-  test("roundtrip micros -> timestamp -> micros using timezones") {
+  test("roundtrip timestamp -> micros -> timestamp using timezones") {
     Seq(
       "0109-07-20T18:38:03.788000",
       "1370-04-01T10:00:54.654321",
@@ -126,49 +106,4 @@ class DateTimestampFormatterSuite extends SparkFunSuite with SQLHelper {
       }
     }
   }
-
-  test("roundtrip date -> days -> date") {
-    Seq(
-      "0050-01-01",
-      "0953-02-02",
-      "1423-03-08",
-      "1969-12-31",
-      "1972-08-25",
-      "1975-09-26",
-      "2018-12-12",
-      "2038-01-01",
-      "5010-11-17").foreach { date =>
-      DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
-          val days = formatter.parse(date)
-          val formatted = formatter.format(days)
-          assert(date === formatted)
-        }
-      }
-    }
-  }
-
-  test("roundtrip days -> date -> days") {
-    Seq(
-      -701265,
-      -371419,
-      -199722,
-      -1,
-      0,
-      967,
-      2094,
-      17877,
-      24837,
-      1110657).foreach { days =>
-      DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
-          val date = formatter.format(days)
-          val parsed = formatter.parse(date)
-          assert(days === parsed)
-        }
-      }
-    }
-  }
 }

From c04ad17ccf14a07ffdb2bf637124492a341075f2 Mon Sep 17 00:00:00 2001
From: Yuhao Yang <yuhao.yang@intel.com>
Date: Mon, 17 Dec 2018 09:28:23 -0600
Subject: [PATCH 0151/1072] [SPARK-20351][ML] Add trait hasTrainingSummary to
 replace the duplicate code

## What changes were proposed in this pull request?

Add a trait HasTrainingSummary to avoid code duplicate related to training summary.

Currently all the training summary use the similar pattern which can be generalized,

```

  private[ml] final var trainingSummary: Option[T] = None

  def hasSummary: Boolean = trainingSummary.isDefined

  def summary: T = trainingSummary.getOrElse...

  private[ml] def setSummary(summary: Option[T]): ...

```

Classes with the trait need to override `setSummry`. And for Java compatibility, they will also have to override `summary` method, otherwise the java code will regard all the summary class as Object due to a known issue with Scala.

## How was this patch tested?

existing Java and Scala unit tests

Closes #17654 from hhbyyh/hassummary.

Authored-by: Yuhao Yang <yuhao.yang@intel.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../classification/LogisticRegression.scala   | 24 ++-------
 .../spark/ml/clustering/BisectingKMeans.scala | 25 ++-------
 .../spark/ml/clustering/GaussianMixture.scala | 24 ++-------
 .../apache/spark/ml/clustering/KMeans.scala   | 23 ++------
 .../GeneralizedLinearRegression.scala         | 22 ++------
 .../ml/regression/LinearRegression.scala      | 21 ++------
 .../spark/ml/util/HasTrainingSummary.scala    | 52 +++++++++++++++++++
 7 files changed, 78 insertions(+), 113 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/util/HasTrainingSummary.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 27a7db0b2f5d..f2a5c11a3486 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -934,8 +934,8 @@ class LogisticRegressionModel private[spark] (
     @Since("2.1.0") val interceptVector: Vector,
     @Since("1.3.0") override val numClasses: Int,
     private val isMultinomial: Boolean)
-  extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
-  with LogisticRegressionParams with MLWritable {
+  extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel] with MLWritable
+  with LogisticRegressionParams with HasTrainingSummary[LogisticRegressionTrainingSummary] {
 
   require(coefficientMatrix.numRows == interceptVector.size, s"Dimension mismatch! Expected " +
     s"coefficientMatrix.numRows == interceptVector.size, but ${coefficientMatrix.numRows} != " +
@@ -1018,20 +1018,16 @@ class LogisticRegressionModel private[spark] (
   @Since("1.6.0")
   override val numFeatures: Int = coefficientMatrix.numCols
 
-  private var trainingSummary: Option[LogisticRegressionTrainingSummary] = None
-
   /**
    * Gets summary of model on training set. An exception is thrown
-   * if `trainingSummary == None`.
+   * if `hasSummary` is false.
    */
   @Since("1.5.0")
-  def summary: LogisticRegressionTrainingSummary = trainingSummary.getOrElse {
-    throw new SparkException("No training summary available for this LogisticRegressionModel")
-  }
+  override def summary: LogisticRegressionTrainingSummary = super.summary
 
   /**
    * Gets summary of model on training set. An exception is thrown
-   * if `trainingSummary == None` or it is a multiclass model.
+   * if `hasSummary` is false or it is a multiclass model.
    */
   @Since("2.3.0")
   def binarySummary: BinaryLogisticRegressionTrainingSummary = summary match {
@@ -1062,16 +1058,6 @@ class LogisticRegressionModel private[spark] (
     (model, model.getProbabilityCol, model.getPredictionCol)
   }
 
-  private[classification]
-  def setSummary(summary: Option[LogisticRegressionTrainingSummary]): this.type = {
-    this.trainingSummary = summary
-    this
-  }
-
-  /** Indicates whether a training summary exists for this model instance. */
-  @Since("1.5.0")
-  def hasSummary: Boolean = trainingSummary.isDefined
-
   /**
    * Evaluates the model on a test dataset.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 1a94aefa3f56..49e9f5136813 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -87,8 +87,9 @@ private[clustering] trait BisectingKMeansParams extends Params with HasMaxIter
 @Since("2.0.0")
 class BisectingKMeansModel private[ml] (
     @Since("2.0.0") override val uid: String,
-    private val parentModel: MLlibBisectingKMeansModel
-  ) extends Model[BisectingKMeansModel] with BisectingKMeansParams with MLWritable {
+    private val parentModel: MLlibBisectingKMeansModel)
+  extends Model[BisectingKMeansModel] with BisectingKMeansParams with MLWritable
+  with HasTrainingSummary[BisectingKMeansSummary] {
 
   @Since("2.0.0")
   override def copy(extra: ParamMap): BisectingKMeansModel = {
@@ -143,28 +144,12 @@ class BisectingKMeansModel private[ml] (
   @Since("2.0.0")
   override def write: MLWriter = new BisectingKMeansModel.BisectingKMeansModelWriter(this)
 
-  private var trainingSummary: Option[BisectingKMeansSummary] = None
-
-  private[clustering] def setSummary(summary: Option[BisectingKMeansSummary]): this.type = {
-    this.trainingSummary = summary
-    this
-  }
-
-  /**
-   * Return true if there exists summary of model.
-   */
-  @Since("2.1.0")
-  def hasSummary: Boolean = trainingSummary.nonEmpty
-
   /**
    * Gets summary of model on training set. An exception is
-   * thrown if `trainingSummary == None`.
+   * thrown if `hasSummary` is false.
    */
   @Since("2.1.0")
-  def summary: BisectingKMeansSummary = trainingSummary.getOrElse {
-    throw new SparkException(
-      s"No training summary available for the ${this.getClass.getSimpleName}")
-  }
+  override def summary: BisectingKMeansSummary = super.summary
 }
 
 object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 88abc1605d69..bb10b3228b93 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -85,7 +85,8 @@ class GaussianMixtureModel private[ml] (
     @Since("2.0.0") override val uid: String,
     @Since("2.0.0") val weights: Array[Double],
     @Since("2.0.0") val gaussians: Array[MultivariateGaussian])
-  extends Model[GaussianMixtureModel] with GaussianMixtureParams with MLWritable {
+  extends Model[GaussianMixtureModel] with GaussianMixtureParams with MLWritable
+  with HasTrainingSummary[GaussianMixtureSummary] {
 
   /** @group setParam */
   @Since("2.1.0")
@@ -160,28 +161,13 @@ class GaussianMixtureModel private[ml] (
   @Since("2.0.0")
   override def write: MLWriter = new GaussianMixtureModel.GaussianMixtureModelWriter(this)
 
-  private var trainingSummary: Option[GaussianMixtureSummary] = None
-
-  private[clustering] def setSummary(summary: Option[GaussianMixtureSummary]): this.type = {
-    this.trainingSummary = summary
-    this
-  }
-
-  /**
-   * Return true if there exists summary of model.
-   */
-  @Since("2.0.0")
-  def hasSummary: Boolean = trainingSummary.nonEmpty
-
   /**
    * Gets summary of model on training set. An exception is
-   * thrown if `trainingSummary == None`.
+   * thrown if `hasSummary` is false.
    */
   @Since("2.0.0")
-  def summary: GaussianMixtureSummary = trainingSummary.getOrElse {
-    throw new RuntimeException(
-      s"No training summary available for the ${this.getClass.getSimpleName}")
-  }
+  override def summary: GaussianMixtureSummary = super.summary
+
 }
 
 @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 2eed84d51782..319747d4a193 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -107,7 +107,8 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
 class KMeansModel private[ml] (
     @Since("1.5.0") override val uid: String,
     private[clustering] val parentModel: MLlibKMeansModel)
-  extends Model[KMeansModel] with KMeansParams with GeneralMLWritable {
+  extends Model[KMeansModel] with KMeansParams with GeneralMLWritable
+    with HasTrainingSummary[KMeansSummary] {
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): KMeansModel = {
@@ -153,28 +154,12 @@ class KMeansModel private[ml] (
   @Since("1.6.0")
   override def write: GeneralMLWriter = new GeneralMLWriter(this)
 
-  private var trainingSummary: Option[KMeansSummary] = None
-
-  private[clustering] def setSummary(summary: Option[KMeansSummary]): this.type = {
-    this.trainingSummary = summary
-    this
-  }
-
-  /**
-   * Return true if there exists summary of model.
-   */
-  @Since("2.0.0")
-  def hasSummary: Boolean = trainingSummary.nonEmpty
-
   /**
    * Gets summary of model on training set. An exception is
-   * thrown if `trainingSummary == None`.
+   * thrown if `hasSummary` is false.
    */
   @Since("2.0.0")
-  def summary: KMeansSummary = trainingSummary.getOrElse {
-    throw new SparkException(
-      s"No training summary available for the ${this.getClass.getSimpleName}")
-  }
+  override def summary: KMeansSummary = super.summary
 }
 
 /** Helper class for storing model data */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index abb60ea20575..885b13bf8dac 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1001,7 +1001,8 @@ class GeneralizedLinearRegressionModel private[ml] (
     @Since("2.0.0") val coefficients: Vector,
     @Since("2.0.0") val intercept: Double)
   extends RegressionModel[Vector, GeneralizedLinearRegressionModel]
-  with GeneralizedLinearRegressionBase with MLWritable {
+  with GeneralizedLinearRegressionBase with MLWritable
+  with HasTrainingSummary[GeneralizedLinearRegressionTrainingSummary] {
 
   /**
    * Sets the link prediction (linear predictor) column name.
@@ -1054,29 +1055,12 @@ class GeneralizedLinearRegressionModel private[ml] (
     output.toDF()
   }
 
-  private var trainingSummary: Option[GeneralizedLinearRegressionTrainingSummary] = None
-
   /**
    * Gets R-like summary of model on training set. An exception is
    * thrown if there is no summary available.
    */
   @Since("2.0.0")
-  def summary: GeneralizedLinearRegressionTrainingSummary = trainingSummary.getOrElse {
-    throw new SparkException(
-      "No training summary available for this GeneralizedLinearRegressionModel")
-  }
-
-  /**
-   * Indicates if [[summary]] is available.
-   */
-  @Since("2.0.0")
-  def hasSummary: Boolean = trainingSummary.nonEmpty
-
-  private[regression]
-  def setSummary(summary: Option[GeneralizedLinearRegressionTrainingSummary]): this.type = {
-    this.trainingSummary = summary
-    this
-  }
+  override def summary: GeneralizedLinearRegressionTrainingSummary = super.summary
 
   /**
    * Evaluate the model on the given dataset, returning a summary of the results.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index ce6c12cc368d..197828762d16 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -647,33 +647,20 @@ class LinearRegressionModel private[ml] (
     @Since("1.3.0") val intercept: Double,
     @Since("2.3.0") val scale: Double)
   extends RegressionModel[Vector, LinearRegressionModel]
-  with LinearRegressionParams with GeneralMLWritable {
+  with LinearRegressionParams with GeneralMLWritable
+  with HasTrainingSummary[LinearRegressionTrainingSummary] {
 
   private[ml] def this(uid: String, coefficients: Vector, intercept: Double) =
     this(uid, coefficients, intercept, 1.0)
 
-  private var trainingSummary: Option[LinearRegressionTrainingSummary] = None
-
   override val numFeatures: Int = coefficients.size
 
   /**
    * Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is
-   * thrown if `trainingSummary == None`.
+   * thrown if `hasSummary` is false.
    */
   @Since("1.5.0")
-  def summary: LinearRegressionTrainingSummary = trainingSummary.getOrElse {
-    throw new SparkException("No training summary available for this LinearRegressionModel")
-  }
-
-  private[regression]
-  def setSummary(summary: Option[LinearRegressionTrainingSummary]): this.type = {
-    this.trainingSummary = summary
-    this
-  }
-
-  /** Indicates whether a training summary exists for this model instance. */
-  @Since("1.5.0")
-  def hasSummary: Boolean = trainingSummary.isDefined
+  override def summary: LinearRegressionTrainingSummary = super.summary
 
   /**
    * Evaluates the model on a test dataset.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/HasTrainingSummary.scala b/mllib/src/main/scala/org/apache/spark/ml/util/HasTrainingSummary.scala
new file mode 100644
index 000000000000..edb0208144e1
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/HasTrainingSummary.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.util
+
+import org.apache.spark.SparkException
+import org.apache.spark.annotation.Since
+
+
+/**
+ * Trait for models that provides Training summary.
+ *
+ * @tparam T Summary instance type
+ */
+@Since("3.0.0")
+private[ml] trait HasTrainingSummary[T] {
+
+  private[ml] final var trainingSummary: Option[T] = None
+
+  /** Indicates whether a training summary exists for this model instance. */
+  @Since("3.0.0")
+  def hasSummary: Boolean = trainingSummary.isDefined
+
+  /**
+   * Gets summary of model on training set. An exception is
+   * thrown if if `hasSummary` is false.
+   */
+  @Since("3.0.0")
+  def summary: T = trainingSummary.getOrElse {
+    throw new SparkException(
+      s"No training summary available for this ${this.getClass.getSimpleName}")
+  }
+
+  private[ml] def setSummary(summary: Option[T]): this.type = {
+    this.trainingSummary = summary
+    this
+  }
+}

From 6d45e6ea1507943f6ee833af8ad7969294b0356a Mon Sep 17 00:00:00 2001
From: chakravarthi <tcchakra@gmail.com>
Date: Mon, 17 Dec 2018 09:46:50 -0800
Subject: [PATCH 0152/1072] [SPARK-26255][YARN] Apply user provided UI filters 
 to SQL tab in yarn mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

User specified filters are not applied to SQL tab in yarn mode, as it is overridden by the yarn AmIp filter.
So we need to append user provided filters (spark.ui.filters) with yarn filter.

## How was this patch tested?

【Test step】：

1)  Launch spark sql with authentication filter as below:

2)  spark-sql --master yarn --conf spark.ui.filters=org.apache.hadoop.security.authentication.server.AuthenticationFilter --conf spark.org.apache.hadoop.security.authentication.server.AuthenticationFilter.params="type=simple"

3)  Go to Yarn application list UI link

4) Launch the application master for the Spark-SQL app ID and access all the tabs by appending tab name.

5) It will display an error for all tabs including SQL tab.(before able to access SQL tab,as Authentication filter is not applied for SQL tab)

6) Also can be verified with info logs,that Authentication filter applied to SQL tab.(before it is not applied).

I have attached the behaviour below in following order..

1) Command used
2) Before fix (logs and UI)
3) After fix (logs and UI)

**1) COMMAND USED**:

launching spark-sql with authentication filter.

![image](https://user-images.githubusercontent.com/45845595/49947295-e7e97400-ff16-11e8-8c9a-10659487ddee.png)

**2) BEFORE FIX:**

**UI result:**
able to access SQL tab.

![image](https://user-images.githubusercontent.com/45845595/49948398-62b38e80-ff19-11e8-95dc-e74f9e3c2ba7.png)

 **logs**:
authentication filter not applied to SQL tab.

![image](https://user-images.githubusercontent.com/45845595/49947343-ff286180-ff16-11e8-9de0-3f8db140bc32.png)

**3) AFTER FIX:**

**UI result**:

Not able to access SQL tab.

![image](https://user-images.githubusercontent.com/45845595/49947360-0d767d80-ff17-11e8-9e9e-a95311949164.png)

**in logs**:

Both yarn filter and Authentication filter applied to SQL tab.

![image](https://user-images.githubusercontent.com/45845595/49947377-1a936c80-ff17-11e8-9f44-700eb3dc0ded.png)

Closes #23312 from chakravarthiT/SPARK-26255_ui.

Authored-by: chakravarthi <tcchakra@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/scheduler/cluster/YarnSchedulerBackend.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index 67c36aac4926..1289d4be79ea 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -168,8 +168,10 @@ private[spark] abstract class YarnSchedulerBackend(
       filterName != null && filterName.nonEmpty &&
       filterParams != null && filterParams.nonEmpty
     if (hasFilter) {
+      // SPARK-26255: Append user provided filters(spark.ui.filters) with yarn filter.
+      val allFilters = filterName + "," + conf.get("spark.ui.filters", "")
       logInfo(s"Add WebUI Filter. $filterName, $filterParams, $proxyBase")
-      conf.set("spark.ui.filters", filterName)
+      conf.set("spark.ui.filters", allFilters)
       filterParams.foreach { case (k, v) => conf.set(s"spark.$filterName.param.$k", v) }
       scheduler.sc.ui.foreach { ui => JettyUtils.addFilters(ui.getHandlers, conf) }
     }

From 5a116e669cb196f59ab3f8d06477f675cd0400f9 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Mon, 17 Dec 2018 10:07:35 -0800
Subject: [PATCH 0153/1072] [SPARK-26371][SS] Increase kafka ConfigUpdater test
 coverage.

## What changes were proposed in this pull request?

As Kafka delegation token added logic into ConfigUpdater it would be good to test it.
This PR contains the following changes:
* ConfigUpdater extracted to a separate file and renamed to KafkaConfigUpdater
* mockito-core dependency added to kafka-0-10-sql
* Unit tests added

## How was this patch tested?

Existing + new unit tests + on cluster.

Closes #23321 from gaborgsomogyi/SPARK-26371.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 external/kafka-0-10-sql/pom.xml               |   5 +
 .../sql/kafka010/KafkaConfigUpdater.scala     |  74 ++++++++++++
 .../sql/kafka010/KafkaSourceProvider.scala    |  52 +-------
 .../kafka010/KafkaConfigUpdaterSuite.scala    | 113 ++++++++++++++++++
 .../kafka010/KafkaDelegationTokenTest.scala   |  90 ++++++++++++++
 .../kafka010/KafkaSecurityHelperSuite.scala   |  46 +------
 6 files changed, 287 insertions(+), 93 deletions(-)
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdaterSuite.scala
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala

diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index de8731c4b774..1c77906f43b1 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -106,6 +106,11 @@
         <version>${jetty.version}</version>
         <scope>test</scope>
       </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
new file mode 100644
index 000000000000..bc1b8019f6a6
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.common.config.SaslConfigs
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.deploy.security.KafkaTokenUtil
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Kafka
+
+/**
+ * Class to conveniently update Kafka config params, while logging the changes
+ */
+private[kafka010] case class KafkaConfigUpdater(module: String, kafkaParams: Map[String, String])
+    extends Logging {
+  private val map = new ju.HashMap[String, Object](kafkaParams.asJava)
+
+  def set(key: String, value: Object): this.type = {
+    map.put(key, value)
+    logDebug(s"$module: Set $key to $value, earlier value: ${kafkaParams.getOrElse(key, "")}")
+    this
+  }
+
+  def setIfUnset(key: String, value: Object): this.type = {
+    if (!map.containsKey(key)) {
+      map.put(key, value)
+      logDebug(s"$module: Set $key to $value")
+    }
+    this
+  }
+
+  def setAuthenticationConfigIfNeeded(): this.type = {
+    // There are multiple possibilities to log in and applied in the following order:
+    // - JVM global security provided -> try to log in with JVM global security configuration
+    //   which can be configured for example with 'java.security.auth.login.config'.
+    //   For this no additional parameter needed.
+    // - Token is provided -> try to log in with scram module using kafka's dynamic JAAS
+    //   configuration.
+    if (KafkaTokenUtil.isGlobalJaasConfigurationProvided) {
+      logDebug("JVM global security configuration detected, using it for login.")
+    } else if (KafkaSecurityHelper.isTokenAvailable()) {
+      logDebug("Delegation token detected, using it for login.")
+      val jaasParams = KafkaSecurityHelper.getTokenJaasParams(SparkEnv.get.conf)
+      set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+      val mechanism = SparkEnv.get.conf.get(Kafka.TOKEN_SASL_MECHANISM)
+      require(mechanism.startsWith("SCRAM"),
+        "Delegation token works only with SCRAM mechanism.")
+      set(SaslConfigs.SASL_MECHANISM, mechanism)
+    }
+    this
+  }
+
+  def build(): ju.Map[String, Object] = map
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 4b8b5c0019b4..5774ee7a1c94 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -24,13 +24,9 @@ import scala.collection.JavaConverters._
 
 import org.apache.kafka.clients.consumer.ConsumerConfig
 import org.apache.kafka.clients.producer.ProducerConfig
-import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer}
 
-import org.apache.spark.SparkEnv
-import org.apache.spark.deploy.security.KafkaTokenUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
@@ -483,7 +479,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
   }
 
   def kafkaParamsForDriver(specifiedKafkaParams: Map[String, String]): ju.Map[String, Object] =
-    ConfigUpdater("source", specifiedKafkaParams)
+    KafkaConfigUpdater("source", specifiedKafkaParams)
       .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
       .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
 
@@ -506,7 +502,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
   def kafkaParamsForExecutors(
       specifiedKafkaParams: Map[String, String],
       uniqueGroupId: String): ju.Map[String, Object] =
-    ConfigUpdater("executor", specifiedKafkaParams)
+    KafkaConfigUpdater("executor", specifiedKafkaParams)
       .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
       .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
 
@@ -537,48 +533,6 @@ private[kafka010] object KafkaSourceProvider extends Logging {
     s"${groupIdPrefix}-${UUID.randomUUID}-${metadataPath.hashCode}"
   }
 
-  /** Class to conveniently update Kafka config params, while logging the changes */
-  private case class ConfigUpdater(module: String, kafkaParams: Map[String, String]) {
-    private val map = new ju.HashMap[String, Object](kafkaParams.asJava)
-
-    def set(key: String, value: Object): this.type = {
-      map.put(key, value)
-      logDebug(s"$module: Set $key to $value, earlier value: ${kafkaParams.getOrElse(key, "")}")
-      this
-    }
-
-    def setIfUnset(key: String, value: Object): ConfigUpdater = {
-      if (!map.containsKey(key)) {
-        map.put(key, value)
-        logDebug(s"$module: Set $key to $value")
-      }
-      this
-    }
-
-    def setAuthenticationConfigIfNeeded(): ConfigUpdater = {
-      // There are multiple possibilities to log in and applied in the following order:
-      // - JVM global security provided -> try to log in with JVM global security configuration
-      //   which can be configured for example with 'java.security.auth.login.config'.
-      //   For this no additional parameter needed.
-      // - Token is provided -> try to log in with scram module using kafka's dynamic JAAS
-      //   configuration.
-      if (KafkaTokenUtil.isGlobalJaasConfigurationProvided) {
-        logDebug("JVM global security configuration detected, using it for login.")
-      } else if (KafkaSecurityHelper.isTokenAvailable()) {
-        logDebug("Delegation token detected, using it for login.")
-        val jaasParams = KafkaSecurityHelper.getTokenJaasParams(SparkEnv.get.conf)
-        set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
-        val mechanism = SparkEnv.get.conf.get(Kafka.TOKEN_SASL_MECHANISM)
-        require(mechanism.startsWith("SCRAM"),
-          "Delegation token works only with SCRAM mechanism.")
-        set(SaslConfigs.SASL_MECHANISM, mechanism)
-      }
-      this
-    }
-
-    def build(): ju.Map[String, Object] = map
-  }
-
   private[kafka010] def kafkaParamsForProducer(
       parameters: Map[String, String]): ju.Map[String, Object] = {
     val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
@@ -596,7 +550,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
 
     val specifiedKafkaParams = convertToSpecifiedParams(parameters)
 
-    ConfigUpdater("executor", specifiedKafkaParams)
+    KafkaConfigUpdater("executor", specifiedKafkaParams)
       .set(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, serClassName)
       .set(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, serClassName)
       .setAuthenticationConfigIfNeeded()
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdaterSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdaterSuite.scala
new file mode 100644
index 000000000000..25ccca3cb984
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdaterSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import org.apache.kafka.common.config.SaslConfigs
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.config._
+
+class KafkaConfigUpdaterSuite extends SparkFunSuite with KafkaDelegationTokenTest {
+  private val testModule = "testModule"
+  private val testKey = "testKey"
+  private val testValue = "testValue"
+  private val otherTestValue = "otherTestValue"
+
+  test("set should always set value") {
+    val params = Map.empty[String, String]
+
+    val updatedParams = KafkaConfigUpdater(testModule, params)
+      .set(testKey, testValue)
+      .build()
+
+    assert(updatedParams.size() === 1)
+    assert(updatedParams.get(testKey) === testValue)
+  }
+
+  test("setIfUnset without existing key should set value") {
+    val params = Map.empty[String, String]
+
+    val updatedParams = KafkaConfigUpdater(testModule, params)
+      .setIfUnset(testKey, testValue)
+      .build()
+
+    assert(updatedParams.size() === 1)
+    assert(updatedParams.get(testKey) === testValue)
+  }
+
+  test("setIfUnset with existing key should not set value") {
+    val params = Map[String, String](testKey -> testValue)
+
+    val updatedParams = KafkaConfigUpdater(testModule, params)
+      .setIfUnset(testKey, otherTestValue)
+      .build()
+
+    assert(updatedParams.size() === 1)
+    assert(updatedParams.get(testKey) === testValue)
+  }
+
+  test("setAuthenticationConfigIfNeeded with global security should not set values") {
+    val params = Map.empty[String, String]
+    setGlobalKafkaClientConfig()
+
+    val updatedParams = KafkaConfigUpdater(testModule, params)
+      .setAuthenticationConfigIfNeeded()
+      .build()
+
+    assert(updatedParams.size() === 0)
+  }
+
+  test("setAuthenticationConfigIfNeeded with token should set values") {
+    val params = Map.empty[String, String]
+    setSparkEnv(Map.empty)
+    addTokenToUGI()
+
+    val updatedParams = KafkaConfigUpdater(testModule, params)
+      .setAuthenticationConfigIfNeeded()
+      .build()
+
+    assert(updatedParams.size() === 2)
+    assert(updatedParams.containsKey(SaslConfigs.SASL_JAAS_CONFIG))
+    assert(updatedParams.get(SaslConfigs.SASL_MECHANISM) ===
+      Kafka.TOKEN_SASL_MECHANISM.defaultValueString)
+  }
+
+  test("setAuthenticationConfigIfNeeded with token and invalid mechanism should throw exception") {
+    val params = Map.empty[String, String]
+    setSparkEnv(Map[String, String](Kafka.TOKEN_SASL_MECHANISM.key -> "INVALID"))
+    addTokenToUGI()
+
+    val e = intercept[IllegalArgumentException] {
+      KafkaConfigUpdater(testModule, params)
+        .setAuthenticationConfigIfNeeded()
+        .build()
+    }
+
+    assert(e.getMessage.contains("Delegation token works only with SCRAM mechanism."))
+  }
+
+  test("setAuthenticationConfigIfNeeded without security should not set values") {
+    val params = Map.empty[String, String]
+
+    val updatedParams = KafkaConfigUpdater(testModule, params)
+      .setAuthenticationConfigIfNeeded()
+      .build()
+
+    assert(updatedParams.size() === 0)
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
new file mode 100644
index 000000000000..1899c65c721b
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+import javax.security.auth.login.{AppConfigurationEntry, Configuration}
+
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.token.Token
+import org.mockito.Mockito.{doReturn, mock}
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
+import org.apache.spark.deploy.security.KafkaTokenUtil
+import org.apache.spark.deploy.security.KafkaTokenUtil.KafkaDelegationTokenIdentifier
+
+/**
+ * This is a trait which provides functionalities for Kafka delegation token related test suites.
+ */
+trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
+  self: SparkFunSuite =>
+
+  protected val tokenId = "tokenId" + ju.UUID.randomUUID().toString
+  protected val tokenPassword = "tokenPassword" + ju.UUID.randomUUID().toString
+
+  private class KafkaJaasConfiguration extends Configuration {
+    val entry =
+      new AppConfigurationEntry(
+        "DummyModule",
+        AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
+        ju.Collections.emptyMap[String, Object]()
+      )
+
+    override def getAppConfigurationEntry(name: String): Array[AppConfigurationEntry] = {
+      if (name.equals("KafkaClient")) {
+        Array(entry)
+      } else {
+        null
+      }
+    }
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      Configuration.setConfiguration(null)
+      UserGroupInformation.setLoginUser(null)
+      SparkEnv.set(null)
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  protected def setGlobalKafkaClientConfig(): Unit = {
+    Configuration.setConfiguration(new KafkaJaasConfiguration)
+  }
+
+  protected def addTokenToUGI(): Unit = {
+    val token = new Token[KafkaDelegationTokenIdentifier](
+      tokenId.getBytes,
+      tokenPassword.getBytes,
+      KafkaTokenUtil.TOKEN_KIND,
+      KafkaTokenUtil.TOKEN_SERVICE
+    )
+    val creds = new Credentials()
+    creds.addToken(KafkaTokenUtil.TOKEN_SERVICE, token)
+    UserGroupInformation.getCurrentUser.addCredentials(creds)
+  }
+
+  protected def setSparkEnv(settings: Traversable[(String, String)]): Unit = {
+    val conf = new SparkConf().setAll(settings)
+    val env = mock(classOf[SparkEnv])
+    doReturn(conf).when(env).conf
+    SparkEnv.set(env)
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
index fd9dee390d18..d908bbfc2c5f 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
@@ -17,51 +17,9 @@
 
 package org.apache.spark.sql.kafka010
 
-import java.util.UUID
-
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
-import org.apache.hadoop.security.token.Token
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.security.KafkaTokenUtil
-import org.apache.spark.deploy.security.KafkaTokenUtil.KafkaDelegationTokenIdentifier
-
-class KafkaSecurityHelperSuite extends SparkFunSuite with BeforeAndAfterEach {
-  private val tokenId = "tokenId" + UUID.randomUUID().toString
-  private val tokenPassword = "tokenPassword" + UUID.randomUUID().toString
-
-  private var sparkConf: SparkConf = null
-
-  override def beforeEach(): Unit = {
-    super.beforeEach()
-    sparkConf = new SparkConf()
-  }
-
-  override def afterEach(): Unit = {
-    try {
-      resetUGI
-    } finally {
-      super.afterEach()
-    }
-  }
-
-  private def addTokenToUGI(): Unit = {
-    val token = new Token[KafkaDelegationTokenIdentifier](
-      tokenId.getBytes,
-      tokenPassword.getBytes,
-      KafkaTokenUtil.TOKEN_KIND,
-      KafkaTokenUtil.TOKEN_SERVICE
-    )
-    val creds = new Credentials()
-    creds.addToken(KafkaTokenUtil.TOKEN_SERVICE, token)
-    UserGroupInformation.getCurrentUser.addCredentials(creds)
-  }
-
-  private def resetUGI: Unit = {
-    UserGroupInformation.setLoginUser(null)
-  }
 
+class KafkaSecurityHelperSuite extends SparkFunSuite with KafkaDelegationTokenTest {
   test("isTokenAvailable without token should return false") {
     assert(!KafkaSecurityHelper.isTokenAvailable())
   }
@@ -75,7 +33,7 @@ class KafkaSecurityHelperSuite extends SparkFunSuite with BeforeAndAfterEach {
   test("getTokenJaasParams with token should return scram module") {
     addTokenToUGI()
 
-    val jaasParams = KafkaSecurityHelper.getTokenJaasParams(sparkConf)
+    val jaasParams = KafkaSecurityHelper.getTokenJaasParams(new SparkConf())
 
     assert(jaasParams.contains("ScramLoginModule required"))
     assert(jaasParams.contains("tokenauth=true"))

From 81d377d772a527d9ae3311be0480e6403769e919 Mon Sep 17 00:00:00 2001
From: Vaclav Kosar <admin@vaclavkosar.com>
Date: Mon, 17 Dec 2018 11:50:24 -0800
Subject: [PATCH 0154/1072] [SPARK-24933][SS] Report numOutputRows in
 SinkProgress

## What changes were proposed in this pull request?

SinkProgress should report similar properties like SourceProgress as long as they are available for given Sink. Count of written rows is metric availble for all Sinks. Since relevant progress information is with respect to commited rows, ideal object to carry this info is WriterCommitMessage. For brevity the implementation will focus only on Sinks with API V2 and on Micro Batch mode. Implemention for Continuous mode will be provided at later date.

### Before
```
{"description":"org.apache.spark.sql.kafka010.KafkaSourceProvider3c0bd317"}
```

### After
```
{"description":"org.apache.spark.sql.kafka010.KafkaSourceProvider3c0bd317","numOutputRows":5000}
```

### This PR is related to:
- https://issues.apache.org/jira/browse/SPARK-24647
- https://issues.apache.org/jira/browse/SPARK-21313

## How was this patch tested?

Existing and new unit tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #21919 from vackosar/feature/SPARK-24933-numOutputRows.

Lead-authored-by: Vaclav Kosar <admin@vaclavkosar.com>
Co-authored-by: Kosar, Vaclav: Functions Transformation <Vaclav.Kosar@barclayscapital.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/kafka010/KafkaSinkSuite.scala   | 21 +++++++++++++
 .../v2/WriteToDataSourceV2Exec.scala          | 30 +++++++++++++++----
 .../streaming/MicroBatchExecution.scala       | 11 +++++--
 .../streaming/ProgressReporter.scala          |  7 +++--
 .../execution/streaming/StreamExecution.scala |  4 +++
 .../apache/spark/sql/streaming/progress.scala | 21 +++++++++++--
 ...StreamingQueryStatusAndProgressSuite.scala | 10 ++++---
 7 files changed, 88 insertions(+), 16 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
index d46c4139011d..07d2b8a5dc42 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
@@ -232,6 +232,27 @@ class KafkaSinkSuite extends StreamTest with SharedSQLContext with KafkaTest {
     }
   }
 
+  test("streaming - sink progress is produced") {
+    /* ensure sink progress is correctly produced. */
+    val input = MemoryStream[String]
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+
+    val writer = createKafkaWriter(
+      input.toDF(),
+      withTopic = Some(topic),
+      withOutputMode = Some(OutputMode.Update()))()
+
+    try {
+      input.addData("1", "2", "3")
+      failAfter(streamingTimeout) {
+        writer.processAllAvailable()
+      }
+      assert(writer.lastProgress.sink.numOutputRows == 3L)
+    } finally {
+      writer.stop()
+    }
+  }
 
   test("streaming - write data with bad schema") {
     val input = MemoryStream[String]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 9a1fe1e0a328..d7e20eed4cbc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{LongAccumulator, Utils}
 
 /**
  * Deprecated logical plan for writing data into data source v2. This is being replaced by more
@@ -47,6 +47,8 @@ case class WriteToDataSourceV2(writeSupport: BatchWriteSupport, query: LogicalPl
 case class WriteToDataSourceV2Exec(writeSupport: BatchWriteSupport, query: SparkPlan)
   extends UnaryExecNode {
 
+  var commitProgress: Option[StreamWriterCommitProgress] = None
+
   override def child: SparkPlan = query
   override def output: Seq[Attribute] = Nil
 
@@ -55,6 +57,7 @@ case class WriteToDataSourceV2Exec(writeSupport: BatchWriteSupport, query: Spark
     val useCommitCoordinator = writeSupport.useCommitCoordinator
     val rdd = query.execute()
     val messages = new Array[WriterCommitMessage](rdd.partitions.length)
+    val totalNumRowsAccumulator = new LongAccumulator()
 
     logInfo(s"Start processing data source write support: $writeSupport. " +
       s"The input RDD has ${messages.length} partitions.")
@@ -65,15 +68,18 @@ case class WriteToDataSourceV2Exec(writeSupport: BatchWriteSupport, query: Spark
         (context: TaskContext, iter: Iterator[InternalRow]) =>
           DataWritingSparkTask.run(writerFactory, context, iter, useCommitCoordinator),
         rdd.partitions.indices,
-        (index, message: WriterCommitMessage) => {
-          messages(index) = message
-          writeSupport.onDataWriterCommit(message)
+        (index, result: DataWritingSparkTaskResult) => {
+          val commitMessage = result.writerCommitMessage
+          messages(index) = commitMessage
+          totalNumRowsAccumulator.add(result.numRows)
+          writeSupport.onDataWriterCommit(commitMessage)
         }
       )
 
       logInfo(s"Data source write support $writeSupport is committing.")
       writeSupport.commit(messages)
       logInfo(s"Data source write support $writeSupport committed.")
+      commitProgress = Some(StreamWriterCommitProgress(totalNumRowsAccumulator.value))
     } catch {
       case cause: Throwable =>
         logError(s"Data source write support $writeSupport is aborting.")
@@ -102,7 +108,7 @@ object DataWritingSparkTask extends Logging {
       writerFactory: DataWriterFactory,
       context: TaskContext,
       iter: Iterator[InternalRow],
-      useCommitCoordinator: Boolean): WriterCommitMessage = {
+      useCommitCoordinator: Boolean): DataWritingSparkTaskResult = {
     val stageId = context.stageId()
     val stageAttempt = context.stageAttemptNumber()
     val partId = context.partitionId()
@@ -110,9 +116,12 @@ object DataWritingSparkTask extends Logging {
     val attemptId = context.attemptNumber()
     val dataWriter = writerFactory.createWriter(partId, taskId)
 
+    var count = 0L
     // write the data and commit this writer.
     Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
       while (iter.hasNext) {
+        // Count is here.
+        count += 1
         dataWriter.write(iter.next())
       }
 
@@ -139,7 +148,7 @@ object DataWritingSparkTask extends Logging {
       logInfo(s"Committed partition $partId (task $taskId, attempt $attemptId" +
         s"stage $stageId.$stageAttempt)")
 
-      msg
+      DataWritingSparkTaskResult(count, msg)
 
     })(catchBlock = {
       // If there is an error, abort this writer
@@ -151,3 +160,12 @@ object DataWritingSparkTask extends Logging {
     })
   }
 }
+
+private[v2] case class DataWritingSparkTaskResult(
+                                                   numRows: Long,
+                                                   writerCommitMessage: WriterCommitMessage)
+
+/**
+ * Sink progress information collected after commit.
+ */
+private[sql] case class StreamWriterCommitProgress(numOutputRows: Long)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 03beefeca269..8ad436a4ff57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentBatchTimestamp,
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
-import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, WriteToDataSourceV2}
+import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2, WriteToDataSourceV2Exec}
 import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWritSupport, RateControlMicroBatchReadSupport}
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset => OffsetV2}
@@ -246,6 +246,7 @@ class MicroBatchExecution(
    *  DONE
    */
   private def populateStartOffsets(sparkSessionToRunBatches: SparkSession): Unit = {
+    sinkCommitProgress = None
     offsetLog.getLatest() match {
       case Some((latestBatchId, nextOffsets)) =>
         /* First assume that we are re-executing the latest known batch
@@ -537,7 +538,8 @@ class MicroBatchExecution(
     val nextBatch =
       new Dataset(sparkSessionToRunBatch, lastExecution, RowEncoder(lastExecution.analyzed.schema))
 
-    reportTimeTaken("addBatch") {
+    val batchSinkProgress: Option[StreamWriterCommitProgress] =
+      reportTimeTaken("addBatch") {
       SQLExecution.withNewExecutionId(sparkSessionToRunBatch, lastExecution) {
         sink match {
           case s: Sink => s.addBatch(currentBatchId, nextBatch)
@@ -545,10 +547,15 @@ class MicroBatchExecution(
             // This doesn't accumulate any data - it just forces execution of the microbatch writer.
             nextBatch.collect()
         }
+        lastExecution.executedPlan match {
+          case w: WriteToDataSourceV2Exec => w.commitProgress
+          case _ => None
+        }
       }
     }
 
     withProgressLocked {
+      sinkCommitProgress = batchSinkProgress
       watermarkTracker.updateWatermark(lastExecution.executedPlan)
       commitLog.add(currentBatchId, CommitMetadata(watermarkTracker.currentWatermark))
       committedOffsets ++= availableOffsets
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 39ab702ee083..d1f3f74c5e73 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2StreamingScanExec
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2StreamingScanExec, StreamWriterCommitProgress}
 import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReadSupport
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
@@ -56,6 +56,7 @@ trait ProgressReporter extends Logging {
   protected def logicalPlan: LogicalPlan
   protected def lastExecution: QueryExecution
   protected def newData: Map[BaseStreamingSource, LogicalPlan]
+  protected def sinkCommitProgress: Option[StreamWriterCommitProgress]
   protected def sources: Seq[BaseStreamingSource]
   protected def sink: BaseStreamingSink
   protected def offsetSeqMetadata: OffsetSeqMetadata
@@ -167,7 +168,9 @@ trait ProgressReporter extends Logging {
       )
     }
 
-    val sinkProgress = new SinkProgress(sink.toString)
+    val sinkProgress = SinkProgress(
+      sink.toString,
+      sinkCommitProgress.map(_.numOutputRows))
 
     val newProgress = new StreamingQueryProgress(
       id = id,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 89b4f40c9c0b..83824f40ab90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
+import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming._
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
@@ -114,6 +115,9 @@ abstract class StreamExecution(
   @volatile
   var availableOffsets = new StreamProgress
 
+  @volatile
+  var sinkCommitProgress: Option[StreamWriterCommitProgress] = None
+
   /** The current batchId or -1 if execution has not yet been initialized. */
   protected var currentBatchId: Long = -1
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 3cd6700efef5..0b3945cbd132 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -30,6 +30,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.annotation.Evolving
+import org.apache.spark.sql.streaming.SinkProgress.DEFAULT_NUM_OUTPUT_ROWS
 
 /**
  * Information about updates made to stateful operators in a [[StreamingQuery]] during a trigger.
@@ -207,11 +208,19 @@ class SourceProgress protected[sql](
  * during a trigger. See [[StreamingQueryProgress]] for more information.
  *
  * @param description Description of the source corresponding to this status.
+ * @param numOutputRows Number of rows written to the sink or -1 for Continuous Mode (temporarily)
+ * or Sink V1 (until decommissioned).
  * @since 2.1.0
  */
 @Evolving
 class SinkProgress protected[sql](
-    val description: String) extends Serializable {
+    val description: String,
+    val numOutputRows: Long) extends Serializable {
+
+  /** SinkProgress without custom metrics. */
+  protected[sql] def this(description: String) {
+    this(description, DEFAULT_NUM_OUTPUT_ROWS)
+  }
 
   /** The compact JSON representation of this progress. */
   def json: String = compact(render(jsonValue))
@@ -222,6 +231,14 @@ class SinkProgress protected[sql](
   override def toString: String = prettyJson
 
   private[sql] def jsonValue: JValue = {
-    ("description" -> JString(description))
+    ("description" -> JString(description)) ~
+      ("numOutputRows" -> JInt(numOutputRows))
   }
 }
+
+private[sql] object SinkProgress {
+  val DEFAULT_NUM_OUTPUT_ROWS: Long = -1L
+
+  def apply(description: String, numOutputRows: Option[Long]): SinkProgress =
+    new SinkProgress(description, numOutputRows.getOrElse(DEFAULT_NUM_OUTPUT_ROWS))
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 7bef687e7e43..2f460b044b23 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -73,7 +73,8 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
         |    "inputRowsPerSecond" : 10.0
         |  } ],
         |  "sink" : {
-        |    "description" : "sink"
+        |    "description" : "sink",
+        |    "numOutputRows" : -1
         |  }
         |}
       """.stripMargin.trim)
@@ -105,7 +106,8 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
          |    "numInputRows" : 678
          |  } ],
          |  "sink" : {
-         |    "description" : "sink"
+         |    "description" : "sink",
+         |    "numOutputRows" : -1
          |  }
          |}
       """.stripMargin.trim)
@@ -250,7 +252,7 @@ object StreamingQueryStatusAndProgressSuite {
         processedRowsPerSecond = Double.PositiveInfinity  // should not be present in the json
       )
     ),
-    sink = new SinkProgress("sink")
+    sink = SinkProgress("sink", None)
   )
 
   val testProgress2 = new StreamingQueryProgress(
@@ -274,7 +276,7 @@ object StreamingQueryStatusAndProgressSuite {
         processedRowsPerSecond = Double.NegativeInfinity // should not be present in the json
       )
     ),
-    sink = new SinkProgress("sink")
+    sink = SinkProgress("sink", None)
   )
 
   val testStatus = new StreamingQueryStatus("active", true, false)

From 114d0de14c441f06d98ab1bcf6c8375c58ecd9ab Mon Sep 17 00:00:00 2001
From: suxingfate <suxingfate@163.com>
Date: Mon, 17 Dec 2018 13:36:57 -0800
Subject: [PATCH 0155/1072] [SPARK-25922][K8] Spark Driver/Executor
 "spark-app-selector" label mismatch

## What changes were proposed in this pull request?

In K8S Cluster mode, the algorithm to generate spark-app-selector/spark.app.id of spark driver is different with spark executor.
This patch makes sure spark driver and executor to use the same spark-app-selector/spark.app.id if spark.app.id is set, otherwise it will use superclass applicationId.

In K8S Client mode, spark-app-selector/spark.app.id for executors will use superclass applicationId.

## How was this patch tested?

Manually run."

Closes #23322 from suxingfate/SPARK-25922.

Lead-authored-by: suxingfate <suxingfate@163.com>
Co-authored-by: xinglwang <xinglwang@ebay.com>
Signed-off-by: Yinan Li <ynli@google.com>
---
 .../KubernetesClusterSchedulerBackend.scala   | 28 ++++++++++++++-----
 ...bernetesClusterSchedulerBackendSuite.scala | 14 +++++-----
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 68f6f2e46e31..03f5da2bb0bc 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -18,9 +18,10 @@ package org.apache.spark.scheduler.cluster.k8s
 
 import java.util.concurrent.ExecutorService
 
-import io.fabric8.kubernetes.client.KubernetesClient
 import scala.concurrent.{ExecutionContext, Future}
 
+import io.fabric8.kubernetes.client.KubernetesClient
+
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
@@ -39,10 +40,10 @@ private[spark] class KubernetesClusterSchedulerBackend(
     lifecycleEventHandler: ExecutorPodsLifecycleManager,
     watchEvents: ExecutorPodsWatchSnapshotSource,
     pollEvents: ExecutorPodsPollingSnapshotSource)
-  extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) {
+    extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) {
 
-  private implicit val requestExecutorContext = ExecutionContext.fromExecutorService(
-    requestExecutorsService)
+  private implicit val requestExecutorContext =
+    ExecutionContext.fromExecutorService(requestExecutorsService)
 
   protected override val minRegisteredRatio =
     if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) {
@@ -60,6 +61,17 @@ private[spark] class KubernetesClusterSchedulerBackend(
     removeExecutor(executorId, reason)
   }
 
+  /**
+   * Get an application ID associated with the job.
+   * This returns the string value of spark.app.id if set, otherwise
+   * the locally-generated ID from the superclass.
+   *
+   * @return The application ID
+   */
+  override def applicationId(): String = {
+    conf.getOption("spark.app.id").map(_.toString).getOrElse(super.applicationId)
+  }
+
   override def start(): Unit = {
     super.start()
     if (!Utils.isDynamicAllocationEnabled(conf)) {
@@ -88,7 +100,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
     if (shouldDeleteExecutors) {
       Utils.tryLogNonFatalError {
-        kubernetesClient.pods()
+        kubernetesClient
+          .pods()
           .withLabel(SPARK_APP_ID_LABEL, applicationId())
           .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
           .delete()
@@ -120,7 +133,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
   }
 
   override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = Future[Boolean] {
-    kubernetesClient.pods()
+    kubernetesClient
+      .pods()
       .withLabel(SPARK_APP_ID_LABEL, applicationId())
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
       .withLabelIn(SPARK_EXECUTOR_ID_LABEL, executorIds: _*)
@@ -133,7 +147,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
   }
 
   private class KubernetesDriverEndpoint(rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
-    extends DriverEndpoint(rpcEnv, sparkProperties) {
+      extends DriverEndpoint(rpcEnv, sparkProperties) {
 
     override def onDisconnected(rpcAddress: RpcAddress): Unit = {
       // Don't do anything besides disabling the executor - allow the Kubernetes API events to
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index 75232f7b98b0..6e182bed459f 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -37,6 +37,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   private val requestExecutorsService = new DeterministicScheduler()
   private val sparkConf = new SparkConf(false)
     .set("spark.executor.instances", "3")
+    .set("spark.app.id", TEST_SPARK_APP_ID)
 
   @Mock
   private var sc: SparkContext = _
@@ -87,8 +88,10 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     when(sc.env).thenReturn(env)
     when(env.rpcEnv).thenReturn(rpcEnv)
     driverEndpoint = ArgumentCaptor.forClass(classOf[RpcEndpoint])
-    when(rpcEnv.setupEndpoint(
-      mockitoEq(CoarseGrainedSchedulerBackend.ENDPOINT_NAME), driverEndpoint.capture()))
+    when(
+      rpcEnv.setupEndpoint(
+        mockitoEq(CoarseGrainedSchedulerBackend.ENDPOINT_NAME),
+        driverEndpoint.capture()))
       .thenReturn(driverEndpointRef)
     when(kubernetesClient.pods()).thenReturn(podOperations)
     schedulerBackendUnderTest = new KubernetesClusterSchedulerBackend(
@@ -100,9 +103,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
       podAllocator,
       lifecycleEventHandler,
       watchEvents,
-      pollEvents) {
-      override def applicationId(): String = TEST_SPARK_APP_ID
-    }
+      pollEvents)
   }
 
   test("Start all components") {
@@ -127,8 +128,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
 
   test("Remove executor") {
     schedulerBackendUnderTest.start()
-    schedulerBackendUnderTest.doRemoveExecutor(
-      "1", ExecutorKilled)
+    schedulerBackendUnderTest.doRemoveExecutor("1", ExecutorKilled)
     verify(driverEndpointRef).send(RemoveExecutor("1", ExecutorKilled))
   }
 

From 86100df54ba8413bebd6ca243b55a6007bc7a2de Mon Sep 17 00:00:00 2001
From: Li Jin <ice.xelloss@gmail.com>
Date: Tue, 18 Dec 2018 09:15:21 +0800
Subject: [PATCH 0156/1072] [SPARK-24561][SQL][PYTHON] User-defined window
 aggregation functions with Pandas UDF (bounded window)

## What changes were proposed in this pull request?

This PR implements a new feature - window aggregation Pandas UDF for bounded window.

#### Doc:
https://docs.google.com/document/d/14EjeY5z4-NC27-SmIP9CsMPCANeTcvxN44a7SIJtZPc/edit#heading=h.c87w44wcj3wj

#### Example:
```
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.window import Window

df = spark.range(0, 10, 2).toDF('v')
w1 = Window.partitionBy().orderBy('v').rangeBetween(-2, 4)
w2 = Window.partitionBy().orderBy('v').rowsBetween(-2, 2)

pandas_udf('double', PandasUDFType.GROUPED_AGG)
def avg(v):
    return v.mean()

df.withColumn('v_mean', avg(df['v']).over(w1)).show()
# +---+------+
# |  v|v_mean|
# +---+------+
# |  0|   1.0|
# |  2|   2.0|
# |  4|   4.0|
# |  6|   6.0|
# |  8|   7.0|
# +---+------+

df.withColumn('v_mean', avg(df['v']).over(w2)).show()
# +---+------+
# |  v|v_mean|
# +---+------+
# |  0|   2.0|
# |  2|   3.0|
# |  4|   4.0|
# |  6|   5.0|
# |  8|   6.0|
# +---+------+

```

#### High level changes:

This PR modifies the existing WindowInPandasExec physical node to deal with unbounded (growing, shrinking and sliding) windows.

* `WindowInPandasExec` now share the same base class as `WindowExec` and share utility functions. See `WindowExecBase`
* `WindowFunctionFrame` now has two new functions `currentLowerBound` and `currentUpperBound` - to return the lower and upper window bound for the current output row. It is also modified to allow `AggregateProcessor` == null. Null aggregator processor is used for `WindowInPandasExec` where we don't have an aggregator and only uses lower and upper bound functions from `WindowFunctionFrame`
* The biggest change is in `WindowInPandasExec`, where it is modified to take `currentLowerBound` and `currentUpperBound` and write those values together with the input data to the python process for rolling window aggregation. See `WindowInPandasExec` for more details.

#### Discussion
In benchmarking, I found numpy variant of the rolling window UDF is much faster than the pandas version:

Spark SQL window function: 20s
Pandas variant: ~80s
Numpy variant: 10s
Numpy variant with numba: 4s

Allowing numpy variant of the vectorized UDFs is something I want to discuss because of the performance improvement, but doesn't have to be in this PR.

## How was this patch tested?

New tests

Closes #22305 from icexelloss/SPARK-24561-bounded-window-udf.

Authored-by: Li Jin <ice.xelloss@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/functions.py               |  21 +-
 .../sql/tests/test_pandas_udf_window.py       | 157 ++++++++-
 python/pyspark/worker.py                      |  57 ++-
 .../sql/catalyst/analysis/CheckAnalysis.scala |   5 -
 .../execution/python/WindowInPandasExec.scala | 329 +++++++++++++++---
 .../sql/execution/window/WindowExec.scala     | 189 +---------
 .../sql/execution/window/WindowExecBase.scala | 230 ++++++++++++
 .../window/WindowFunctionFrame.scala          | 108 ++++--
 8 files changed, 792 insertions(+), 304 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index f98e550e39da..d188de39e21c 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2982,8 +2982,7 @@ def pandas_udf(f=None, returnType=None, functionType=None):
        |  2|        6.0|
        +---+-----------+
 
-       This example shows using grouped aggregated UDFs as window functions. Note that only
-       unbounded window frame is supported at the moment:
+       This example shows using grouped aggregated UDFs as window functions.
 
        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
        >>> from pyspark.sql import Window
@@ -2993,20 +2992,24 @@ def pandas_udf(f=None, returnType=None, functionType=None):
        >>> @pandas_udf("double", PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
        ... def mean_udf(v):
        ...     return v.mean()
-       >>> w = Window \\
-       ...     .partitionBy('id') \\
-       ...     .rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
+       >>> w = (Window.partitionBy('id')
+       ...            .orderBy('v')
+       ...            .rowsBetween(-1, 0))
        >>> df.withColumn('mean_v', mean_udf(df['v']).over(w)).show()  # doctest: +SKIP
        +---+----+------+
        | id|   v|mean_v|
        +---+----+------+
-       |  1| 1.0|   1.5|
+       |  1| 1.0|   1.0|
        |  1| 2.0|   1.5|
-       |  2| 3.0|   6.0|
-       |  2| 5.0|   6.0|
-       |  2|10.0|   6.0|
+       |  2| 3.0|   3.0|
+       |  2| 5.0|   4.0|
+       |  2|10.0|   7.5|
        +---+----+------+
 
+       .. note:: For performance reasons, the input series to window functions are not copied.
+            Therefore, mutating the input series is not allowed and will cause incorrect results.
+            For the same reason, users should also not rely on the index of the input series.
+
        .. seealso:: :meth:`pyspark.sql.GroupedData.agg` and :class:`pyspark.sql.Window`
 
     .. note:: The user-defined functions are considered deterministic by default. Due to
diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/test_pandas_udf_window.py
index 0a7a19c1c081..3ba98e76468b 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_window.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_window.py
@@ -46,6 +46,15 @@ def python_plus_one(self):
     def pandas_scalar_time_two(self):
         return pandas_udf(lambda v: v * 2, 'double')
 
+    @property
+    def pandas_agg_count_udf(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        @pandas_udf('long', PandasUDFType.GROUPED_AGG)
+        def count(v):
+            return len(v)
+        return count
+
     @property
     def pandas_agg_mean_udf(self):
         @pandas_udf('double', PandasUDFType.GROUPED_AGG)
@@ -70,7 +79,7 @@ def min(v):
     @property
     def unbounded_window(self):
         return Window.partitionBy('id') \
-            .rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
+            .rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing).orderBy('v')
 
     @property
     def ordered_window(self):
@@ -80,6 +89,32 @@ def ordered_window(self):
     def unpartitioned_window(self):
         return Window.partitionBy()
 
+    @property
+    def sliding_row_window(self):
+        return Window.partitionBy('id').orderBy('v').rowsBetween(-2, 1)
+
+    @property
+    def sliding_range_window(self):
+        return Window.partitionBy('id').orderBy('v').rangeBetween(-2, 4)
+
+    @property
+    def growing_row_window(self):
+        return Window.partitionBy('id').orderBy('v').rowsBetween(Window.unboundedPreceding, 3)
+
+    @property
+    def growing_range_window(self):
+        return Window.partitionBy('id').orderBy('v') \
+            .rangeBetween(Window.unboundedPreceding, 4)
+
+    @property
+    def shrinking_row_window(self):
+        return Window.partitionBy('id').orderBy('v').rowsBetween(-2, Window.unboundedFollowing)
+
+    @property
+    def shrinking_range_window(self):
+        return Window.partitionBy('id').orderBy('v') \
+            .rangeBetween(-3, Window.unboundedFollowing)
+
     def test_simple(self):
         df = self.data
         w = self.unbounded_window
@@ -100,12 +135,12 @@ def test_multiple_udfs(self):
         w = self.unbounded_window
 
         result1 = df.withColumn('mean_v', self.pandas_agg_mean_udf(df['v']).over(w)) \
-                    .withColumn('max_v', self.pandas_agg_max_udf(df['v']).over(w)) \
-                    .withColumn('min_w', self.pandas_agg_min_udf(df['w']).over(w))
+            .withColumn('max_v', self.pandas_agg_max_udf(df['v']).over(w)) \
+            .withColumn('min_w', self.pandas_agg_min_udf(df['w']).over(w))
 
         expected1 = df.withColumn('mean_v', mean(df['v']).over(w)) \
-                      .withColumn('max_v', max(df['v']).over(w)) \
-                      .withColumn('min_w', min(df['w']).over(w))
+            .withColumn('max_v', max(df['v']).over(w)) \
+            .withColumn('min_w', min(df['w']).over(w))
 
         self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
 
@@ -183,16 +218,16 @@ def test_mixed_sql_and_udf(self):
 
         # Test chaining sql aggregate function and udf
         result3 = df.withColumn('max_v', max_udf(df['v']).over(w)) \
-                    .withColumn('min_v', min(df['v']).over(w)) \
-                    .withColumn('v_diff', col('max_v') - col('min_v')) \
-                    .drop('max_v', 'min_v')
+            .withColumn('min_v', min(df['v']).over(w)) \
+            .withColumn('v_diff', col('max_v') - col('min_v')) \
+            .drop('max_v', 'min_v')
         expected3 = expected1
 
         # Test mixing sql window function and udf
         result4 = df.withColumn('max_v', max_udf(df['v']).over(w)) \
-                    .withColumn('rank', rank().over(ow))
+            .withColumn('rank', rank().over(ow))
         expected4 = df.withColumn('max_v', max(df['v']).over(w)) \
-                      .withColumn('rank', rank().over(ow))
+            .withColumn('rank', rank().over(ow))
 
         self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
         self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
@@ -210,8 +245,6 @@ def test_array_type(self):
     def test_invalid_args(self):
         df = self.data
         w = self.unbounded_window
-        ow = self.ordered_window
-        mean_udf = self.pandas_agg_mean_udf
 
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
@@ -220,11 +253,101 @@ def test_invalid_args(self):
                 foo_udf = pandas_udf(lambda x: x, 'v double', PandasUDFType.GROUPED_MAP)
                 df.withColumn('v2', foo_udf(df['v']).over(w))
 
-        with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
-                    AnalysisException,
-                    '.*Only unbounded window frame is supported.*'):
-                df.withColumn('mean_v', mean_udf(df['v']).over(ow))
+    def test_bounded_simple(self):
+        from pyspark.sql.functions import mean, max, min, count
+
+        df = self.data
+        w1 = self.sliding_row_window
+        w2 = self.shrinking_range_window
+
+        plus_one = self.python_plus_one
+        count_udf = self.pandas_agg_count_udf
+        mean_udf = self.pandas_agg_mean_udf
+        max_udf = self.pandas_agg_max_udf
+        min_udf = self.pandas_agg_min_udf
+
+        result1 = df.withColumn('mean_v', mean_udf(plus_one(df['v'])).over(w1)) \
+            .withColumn('count_v', count_udf(df['v']).over(w2)) \
+            .withColumn('max_v',  max_udf(df['v']).over(w2)) \
+            .withColumn('min_v', min_udf(df['v']).over(w1))
+
+        expected1 = df.withColumn('mean_v', mean(plus_one(df['v'])).over(w1)) \
+            .withColumn('count_v', count(df['v']).over(w2)) \
+            .withColumn('max_v', max(df['v']).over(w2)) \
+            .withColumn('min_v', min(df['v']).over(w1))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_growing_window(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        w1 = self.growing_row_window
+        w2 = self.growing_range_window
+
+        mean_udf = self.pandas_agg_mean_udf
+
+        result1 = df.withColumn('m1', mean_udf(df['v']).over(w1)) \
+            .withColumn('m2', mean_udf(df['v']).over(w2))
+
+        expected1 = df.withColumn('m1', mean(df['v']).over(w1)) \
+            .withColumn('m2', mean(df['v']).over(w2))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_sliding_window(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        w1 = self.sliding_row_window
+        w2 = self.sliding_range_window
+
+        mean_udf = self.pandas_agg_mean_udf
+
+        result1 = df.withColumn('m1', mean_udf(df['v']).over(w1)) \
+            .withColumn('m2', mean_udf(df['v']).over(w2))
+
+        expected1 = df.withColumn('m1', mean(df['v']).over(w1)) \
+            .withColumn('m2', mean(df['v']).over(w2))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_shrinking_window(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        w1 = self.shrinking_row_window
+        w2 = self.shrinking_range_window
+
+        mean_udf = self.pandas_agg_mean_udf
+
+        result1 = df.withColumn('m1', mean_udf(df['v']).over(w1)) \
+            .withColumn('m2', mean_udf(df['v']).over(w2))
+
+        expected1 = df.withColumn('m1', mean(df['v']).over(w1)) \
+            .withColumn('m2', mean(df['v']).over(w2))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_bounded_mixed(self):
+        from pyspark.sql.functions import mean, max
+
+        df = self.data
+        w1 = self.sliding_row_window
+        w2 = self.unbounded_window
+
+        mean_udf = self.pandas_agg_mean_udf
+        max_udf = self.pandas_agg_max_udf
+
+        result1 = df.withColumn('mean_v', mean_udf(df['v']).over(w1)) \
+            .withColumn('max_v', max_udf(df['v']).over(w2)) \
+            .withColumn('mean_unbounded_v', mean_udf(df['v']).over(w1))
+
+        expected1 = df.withColumn('mean_v', mean(df['v']).over(w1)) \
+            .withColumn('max_v', max(df['v']).over(w2)) \
+            .withColumn('mean_unbounded_v', mean(df['v']).over(w1))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 953b468e9651..bf007b0c62d8 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -145,7 +145,18 @@ def wrapped(*series):
     return lambda *a: (wrapped(*a), arrow_return_type)
 
 
-def wrap_window_agg_pandas_udf(f, return_type):
+def wrap_window_agg_pandas_udf(f, return_type, runner_conf, udf_index):
+    window_bound_types_str = runner_conf.get('pandas_window_bound_types')
+    window_bound_type = [t.strip().lower() for t in window_bound_types_str.split(',')][udf_index]
+    if window_bound_type == 'bounded':
+        return wrap_bounded_window_agg_pandas_udf(f, return_type)
+    elif window_bound_type == 'unbounded':
+        return wrap_unbounded_window_agg_pandas_udf(f, return_type)
+    else:
+        raise RuntimeError("Invalid window bound type: {} ".format(window_bound_type))
+
+
+def wrap_unbounded_window_agg_pandas_udf(f, return_type):
     # This is similar to grouped_agg_pandas_udf, the only difference
     # is that window_agg_pandas_udf needs to repeat the return value
     # to match window length, where grouped_agg_pandas_udf just returns
@@ -160,7 +171,41 @@ def wrapped(*series):
     return lambda *a: (wrapped(*a), arrow_return_type)
 
 
-def read_single_udf(pickleSer, infile, eval_type, runner_conf):
+def wrap_bounded_window_agg_pandas_udf(f, return_type):
+    arrow_return_type = to_arrow_type(return_type)
+
+    def wrapped(begin_index, end_index, *series):
+        import pandas as pd
+        result = []
+
+        # Index operation is faster on np.ndarray,
+        # So we turn the index series into np array
+        # here for performance
+        begin_array = begin_index.values
+        end_array = end_index.values
+
+        for i in range(len(begin_array)):
+            # Note: Create a slice from a series for each window is
+            #       actually pretty expensive. However, there
+            #       is no easy way to reduce cost here.
+            # Note: s.iloc[i : j] is about 30% faster than s[i: j], with
+            #       the caveat that the created slices shares the same
+            #       memory with s. Therefore, user are not allowed to
+            #       change the value of input series inside the window
+            #       function. It is rare that user needs to modify the
+            #       input series in the window function, and therefore,
+            #       it is be a reasonable restriction.
+            # Note: Calling reset_index on the slices will increase the cost
+            #       of creating slices by about 100%. Therefore, for performance
+            #       reasons we don't do it here.
+            series_slices = [s.iloc[begin_array[i]: end_array[i]] for s in series]
+            result.append(f(*series_slices))
+        return pd.Series(result)
+
+    return lambda *a: (wrapped(*a), arrow_return_type)
+
+
+def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index):
     num_arg = read_int(infile)
     arg_offsets = [read_int(infile) for i in range(num_arg)]
     row_func = None
@@ -184,7 +229,7 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf):
     elif eval_type == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
         return arg_offsets, wrap_grouped_agg_pandas_udf(func, return_type)
     elif eval_type == PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF:
-        return arg_offsets, wrap_window_agg_pandas_udf(func, return_type)
+        return arg_offsets, wrap_window_agg_pandas_udf(func, return_type, runner_conf, udf_index)
     elif eval_type == PythonEvalType.SQL_BATCHED_UDF:
         return arg_offsets, wrap_udf(func, return_type)
     else:
@@ -226,7 +271,8 @@ def read_udfs(pickleSer, infile, eval_type):
 
         # See FlatMapGroupsInPandasExec for how arg_offsets are used to
         # distinguish between grouping attributes and data attributes
-        arg_offsets, udf = read_single_udf(pickleSer, infile, eval_type, runner_conf)
+        arg_offsets, udf = read_single_udf(
+            pickleSer, infile, eval_type, runner_conf, udf_index=0)
         udfs['f'] = udf
         split_offset = arg_offsets[0] + 1
         arg0 = ["a[%d]" % o for o in arg_offsets[1: split_offset]]
@@ -238,7 +284,8 @@ def read_udfs(pickleSer, infile, eval_type):
         # In the special case of a single UDF this will return a single result rather
         # than a tuple of results; this is the format that the JVM side expects.
         for i in range(num_udfs):
-            arg_offsets, udf = read_single_udf(pickleSer, infile, eval_type, runner_conf)
+            arg_offsets, udf = read_single_udf(
+                pickleSer, infile, eval_type, runner_conf, udf_index=i)
             udfs['f%d' % i] = udf
             args = ["a[%d]" % o for o in arg_offsets]
             call_udf.append("f%d(%s)" % (i, ", ".join(args)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 6a91d556b2f3..88d41e882440 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -134,11 +134,6 @@ trait CheckAnalysis extends PredicateHelper {
             failAnalysis("An offset window function can only be evaluated in an ordered " +
               s"row-based window frame with a single offset: $w")
 
-          case _ @ WindowExpression(_: PythonUDF,
-            WindowSpecDefinition(_, _, frame: SpecifiedWindowFrame))
-              if !frame.isUnbounded =>
-            failAnalysis("Only unbounded window frame is supported with Pandas UDFs.")
-
           case w @ WindowExpression(e, s) =>
             // Only allow window functions with an aggregate expression or an offset window
             // function or a Pandas window UDF.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
index 82973307feef..1ce1215bfdd6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
@@ -27,17 +27,64 @@ import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
+import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan}
 import org.apache.spark.sql.execution.arrow.ArrowUtils
-import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.execution.window._
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
+/**
+ * This class calculates and outputs windowed aggregates over the rows in a single partition.
+ *
+ * This is similar to [[WindowExec]]. The main difference is that this node does not compute
+ * any window aggregation values. Instead, it computes the lower and upper bound for each window
+ * (i.e. window bounds) and pass the data and indices to Python worker to do the actual window
+ * aggregation.
+ *
+ * It currently materializes all data associated with the same partition key and passes them to
+ * Python worker. This is not strictly necessary for sliding windows and can be improved (by
+ * possibly slicing data into overlapping chunks and stitching them together).
+ *
+ * This class groups window expressions by their window boundaries so that window expressions
+ * with the same window boundaries can share the same window bounds. The window bounds are
+ * prepended to the data passed to the python worker.
+ *
+ * For example, if we have:
+ *     avg(v) over specifiedwindowframe(RowFrame, -5, 5),
+ *     avg(v) over specifiedwindowframe(RowFrame, UnboundedPreceding, UnboundedFollowing),
+ *     avg(v) over specifiedwindowframe(RowFrame, -3, 3),
+ *     max(v) over specifiedwindowframe(RowFrame, -3, 3)
+ *
+ * The python input will look like:
+ * (lower_bound_w1, upper_bound_w1, lower_bound_w3, upper_bound_w3, v)
+ *
+ * where w1 is specifiedwindowframe(RowFrame, -5, 5)
+ *       w2 is specifiedwindowframe(RowFrame, UnboundedPreceding, UnboundedFollowing)
+ *       w3 is specifiedwindowframe(RowFrame, -3, 3)
+ *
+ * Note that w2 doesn't have bound indices in the python input because it's unbounded window
+ * so it's bound indices will always be the same.
+ *
+ * Bounded window and Unbounded window are evaluated differently in Python worker:
+ * (1) Bounded window takes the window bound indices in addition to the input columns.
+ *     Unbounded window takes only input columns.
+ * (2) Bounded window evaluates the udf once per input row.
+ *     Unbounded window evaluates the udf once per window partition.
+ * This is controlled by Python runner conf "pandas_window_bound_types"
+ *
+ * The logic to compute window bounds is delegated to [[WindowFunctionFrame]] and shared with
+ * [[WindowExec]]
+ *
+ * Note this doesn't support partial aggregation and all aggregation is computed from the entire
+ * window.
+ */
 case class WindowInPandasExec(
     windowExpression: Seq[NamedExpression],
     partitionSpec: Seq[Expression],
     orderSpec: Seq[SortOrder],
-    child: SparkPlan) extends UnaryExecNode {
+    child: SparkPlan)
+  extends WindowExecBase(windowExpression, partitionSpec, orderSpec, child) {
 
   override def output: Seq[Attribute] =
     child.output ++ windowExpression.map(_.toAttribute)
@@ -60,6 +107,26 @@ case class WindowInPandasExec(
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
+  /**
+   * Helper functions and data structures for window bounds
+   *
+   * It contains:
+   * (1) Total number of window bound indices in the python input row
+   * (2) Function from frame index to its lower bound column index in the python input row
+   * (3) Function from frame index to its upper bound column index in the python input row
+   * (4) Seq from frame index to its window bound type
+   */
+  private type WindowBoundHelpers = (Int, Int => Int, Int => Int, Seq[WindowBoundType])
+
+  /**
+   * Enum for window bound types. Used only inside this class.
+   */
+  private sealed case class WindowBoundType(value: String)
+  private object UnboundedWindow extends WindowBoundType("unbounded")
+  private object BoundedWindow extends WindowBoundType("bounded")
+
+  private val windowBoundTypeConf = "pandas_window_bound_types"
+
   private def collectFunctions(udf: PythonUDF): (ChainedPythonFunctions, Seq[Expression]) = {
     udf.children match {
       case Seq(u: PythonUDF) =>
@@ -73,68 +140,150 @@ case class WindowInPandasExec(
   }
 
   /**
-   * Create the resulting projection.
-   *
-   * This method uses Code Generation. It can only be used on the executor side.
-   *
-   * @param expressions unbound ordered function expressions.
-   * @return the final resulting projection.
+   * See [[WindowBoundHelpers]] for details.
    */
-  private[this] def createResultProjection(expressions: Seq[Expression]): UnsafeProjection = {
-    val references = expressions.zipWithIndex.map { case (e, i) =>
-      // Results of window expressions will be on the right side of child's output
-      BoundReference(child.output.size + i, e.dataType, e.nullable)
+  private def computeWindowBoundHelpers(
+      factories: Seq[InternalRow => WindowFunctionFrame]
+  ): WindowBoundHelpers = {
+    val functionFrames = factories.map(_(EmptyRow))
+
+    val windowBoundTypes = functionFrames.map {
+      case _: UnboundedWindowFunctionFrame => UnboundedWindow
+      case _: UnboundedFollowingWindowFunctionFrame |
+        _: SlidingWindowFunctionFrame |
+        _: UnboundedPrecedingWindowFunctionFrame => BoundedWindow
+      // It should be impossible to get other types of window function frame here
+      case frame => throw new RuntimeException(s"Unexpected window function frame $frame.")
     }
-    val unboundToRefMap = expressions.zip(references).toMap
-    val patchedWindowExpression = windowExpression.map(_.transform(unboundToRefMap))
-    UnsafeProjection.create(
-      child.output ++ patchedWindowExpression,
-      child.output)
+
+    val requiredIndices = functionFrames.map {
+      case _: UnboundedWindowFunctionFrame => 0
+      case _ => 2
+    }
+
+    val upperBoundIndices = requiredIndices.scan(0)(_ + _).tail
+
+    val boundIndices = requiredIndices.zip(upperBoundIndices).map { case (num, upperBoundIndex) =>
+        if (num == 0) {
+          // Sentinel values for unbounded window
+          (-1, -1)
+        } else {
+          (upperBoundIndex - 2, upperBoundIndex - 1)
+        }
+    }
+
+    def lowerBoundIndex(frameIndex: Int) = boundIndices(frameIndex)._1
+    def upperBoundIndex(frameIndex: Int) = boundIndices(frameIndex)._2
+
+    (requiredIndices.sum, lowerBoundIndex, upperBoundIndex, windowBoundTypes)
   }
 
   protected override def doExecute(): RDD[InternalRow] = {
-    val inputRDD = child.execute()
+    // Unwrap the expressions and factories from the map.
+    val expressionsWithFrameIndex =
+      windowFrameExpressionFactoryPairs.map(_._1).zipWithIndex.flatMap {
+        case (buffer, frameIndex) => buffer.map(expr => (expr, frameIndex))
+      }
+
+    val expressions = expressionsWithFrameIndex.map(_._1)
+    val expressionIndexToFrameIndex =
+      expressionsWithFrameIndex.map(_._2).zipWithIndex.map(_.swap).toMap
+
+    val factories = windowFrameExpressionFactoryPairs.map(_._2).toArray
 
+    // Helper functions
+    val (numBoundIndices, lowerBoundIndex, upperBoundIndex, frameWindowBoundTypes) =
+      computeWindowBoundHelpers(factories)
+    val isBounded = { frameIndex: Int => lowerBoundIndex(frameIndex) >= 0 }
+    val numFrames = factories.length
+
+    val inMemoryThreshold = conf.windowExecBufferInMemoryThreshold
+    val spillThreshold = conf.windowExecBufferSpillThreshold
     val sessionLocalTimeZone = conf.sessionLocalTimeZone
-    val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
 
     // Extract window expressions and window functions
-    val expressions = windowExpression.flatMap(_.collect { case e: WindowExpression => e })
-
-    val udfExpressions = expressions.map(_.windowFunction.asInstanceOf[PythonUDF])
+    val windowExpressions = expressions.flatMap(_.collect { case e: WindowExpression => e })
+    val udfExpressions = windowExpressions.map(_.windowFunction.asInstanceOf[PythonUDF])
 
+    // We shouldn't be chaining anything here.
+    // All chained python functions should only contain one function.
     val (pyFuncs, inputs) = udfExpressions.map(collectFunctions).unzip
+    require(pyFuncs.length == expressions.length)
+
+    val udfWindowBoundTypes = pyFuncs.indices.map(i =>
+      frameWindowBoundTypes(expressionIndexToFrameIndex(i)))
+    val pythonRunnerConf: Map[String, String] = (ArrowUtils.getPythonRunnerConfMap(conf)
+      + (windowBoundTypeConf -> udfWindowBoundTypes.map(_.value).mkString(",")))
 
     // Filter child output attributes down to only those that are UDF inputs.
-    // Also eliminate duplicate UDF inputs.
-    val allInputs = new ArrayBuffer[Expression]
-    val dataTypes = new ArrayBuffer[DataType]
+    // Also eliminate duplicate UDF inputs. This is similar to how other Python UDF node
+    // handles UDF inputs.
+    val dataInputs = new ArrayBuffer[Expression]
+    val dataInputTypes = new ArrayBuffer[DataType]
     val argOffsets = inputs.map { input =>
       input.map { e =>
-        if (allInputs.exists(_.semanticEquals(e))) {
-          allInputs.indexWhere(_.semanticEquals(e))
+        if (dataInputs.exists(_.semanticEquals(e))) {
+          dataInputs.indexWhere(_.semanticEquals(e))
         } else {
-          allInputs += e
-          dataTypes += e.dataType
-          allInputs.length - 1
+          dataInputs += e
+          dataInputTypes += e.dataType
+          dataInputs.length - 1
         }
       }.toArray
     }.toArray
 
-    // Schema of input rows to the python runner
-    val windowInputSchema = StructType(dataTypes.zipWithIndex.map { case (dt, i) =>
-      StructField(s"_$i", dt)
-    })
+    // In addition to UDF inputs, we will prepend window bounds for each UDFs.
+    // For bounded windows, we prepend lower bound and upper bound. For unbounded windows,
+    // we no not add window bounds. (strictly speaking, we only need to lower or upper bound
+    // if the window is bounded only on one side, this can be improved in the future)
 
-    inputRDD.mapPartitionsInternal { iter =>
-      val context = TaskContext.get()
+    // Setting window bounds for each window frames. Each window frame has different bounds so
+    // each has its own window bound columns.
+    val windowBoundsInput = factories.indices.flatMap { frameIndex =>
+      if (isBounded(frameIndex)) {
+        Seq(
+          BoundReference(lowerBoundIndex(frameIndex), IntegerType, nullable = false),
+          BoundReference(upperBoundIndex(frameIndex), IntegerType, nullable = false)
+        )
+      } else {
+        Seq.empty
+      }
+    }
 
-      val grouped = if (partitionSpec.isEmpty) {
-        // Use an empty unsafe row as a place holder for the grouping key
-        Iterator((new UnsafeRow(), iter))
+    // Setting the window bounds argOffset for each UDF. For UDFs with bounded window, argOffset
+    // for the UDF is (lowerBoundOffet, upperBoundOffset, inputOffset1, inputOffset2, ...)
+    // For UDFs with unbounded window, argOffset is (inputOffset1, inputOffset2, ...)
+    pyFuncs.indices.foreach { exprIndex =>
+      val frameIndex = expressionIndexToFrameIndex(exprIndex)
+      if (isBounded(frameIndex)) {
+        argOffsets(exprIndex) =
+          Array(lowerBoundIndex(frameIndex), upperBoundIndex(frameIndex)) ++
+            argOffsets(exprIndex).map(_ + windowBoundsInput.length)
       } else {
-        GroupedIterator(iter, partitionSpec, child.output)
+        argOffsets(exprIndex) = argOffsets(exprIndex).map(_ + windowBoundsInput.length)
       }
+    }
+
+    val allInputs = windowBoundsInput ++ dataInputs
+    val allInputTypes = allInputs.map(_.dataType)
+
+    // Start processing.
+    child.execute().mapPartitions { iter =>
+      val context = TaskContext.get()
+
+      // Get all relevant projections.
+      val resultProj = createResultProjection(expressions)
+      val pythonInputProj = UnsafeProjection.create(
+        allInputs,
+        windowBoundsInput.map(ref =>
+          AttributeReference(s"i_${ref.ordinal}", ref.dataType)()) ++ child.output
+      )
+      val pythonInputSchema = StructType(
+        allInputTypes.zipWithIndex.map { case (dt, i) =>
+          StructField(s"_$i", dt)
+        }
+      )
+      val grouping = UnsafeProjection.create(partitionSpec, child.output)
 
       // The queue used to buffer input rows so we can drain it to
       // combine input with output from Python.
@@ -144,11 +293,94 @@ case class WindowInPandasExec(
         queue.close()
       }
 
-      val inputProj = UnsafeProjection.create(allInputs, child.output)
-      val pythonInput = grouped.map { case (_, rows) =>
-        rows.map { row =>
-          queue.add(row.asInstanceOf[UnsafeRow])
-          inputProj(row)
+      val stream = iter.map { row =>
+        queue.add(row.asInstanceOf[UnsafeRow])
+        row
+      }
+
+      val pythonInput = new Iterator[Iterator[UnsafeRow]] {
+
+        // Manage the stream and the grouping.
+        var nextRow: UnsafeRow = null
+        var nextGroup: UnsafeRow = null
+        var nextRowAvailable: Boolean = false
+        private[this] def fetchNextRow() {
+          nextRowAvailable = stream.hasNext
+          if (nextRowAvailable) {
+            nextRow = stream.next().asInstanceOf[UnsafeRow]
+            nextGroup = grouping(nextRow)
+          } else {
+            nextRow = null
+            nextGroup = null
+          }
+        }
+        fetchNextRow()
+
+        // Manage the current partition.
+        val buffer: ExternalAppendOnlyUnsafeRowArray =
+          new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
+        var bufferIterator: Iterator[UnsafeRow] = _
+
+        val indexRow = new SpecificInternalRow(Array.fill(numBoundIndices)(IntegerType))
+
+        val frames = factories.map(_(indexRow))
+
+        private[this] def fetchNextPartition() {
+          // Collect all the rows in the current partition.
+          // Before we start to fetch new input rows, make a copy of nextGroup.
+          val currentGroup = nextGroup.copy()
+
+          // clear last partition
+          buffer.clear()
+
+          while (nextRowAvailable && nextGroup == currentGroup) {
+            buffer.add(nextRow)
+            fetchNextRow()
+          }
+
+          // Setup the frames.
+          var i = 0
+          while (i < numFrames) {
+            frames(i).prepare(buffer)
+            i += 1
+          }
+
+          // Setup iteration
+          rowIndex = 0
+          bufferIterator = buffer.generateIterator()
+        }
+
+        // Iteration
+        var rowIndex = 0
+
+        override final def hasNext: Boolean =
+          (bufferIterator != null && bufferIterator.hasNext) || nextRowAvailable
+
+        override final def next(): Iterator[UnsafeRow] = {
+          // Load the next partition if we need to.
+          if ((bufferIterator == null || !bufferIterator.hasNext) && nextRowAvailable) {
+            fetchNextPartition()
+          }
+
+          val join = new JoinedRow
+
+          bufferIterator.zipWithIndex.map {
+            case (current, index) =>
+              var frameIndex = 0
+              while (frameIndex < numFrames) {
+                frames(frameIndex).write(index, current)
+                // If the window is unbounded we don't need to write out window bounds.
+                if (isBounded(frameIndex)) {
+                  indexRow.setInt(
+                    lowerBoundIndex(frameIndex), frames(frameIndex).currentLowerBound())
+                  indexRow.setInt(
+                    upperBoundIndex(frameIndex), frames(frameIndex).currentUpperBound())
+                }
+                frameIndex += 1
+              }
+
+              pythonInputProj(join(indexRow, current))
+          }
         }
       }
 
@@ -156,12 +388,11 @@ case class WindowInPandasExec(
         pyFuncs,
         PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF,
         argOffsets,
-        windowInputSchema,
+        pythonInputSchema,
         sessionLocalTimeZone,
         pythonRunnerConf).compute(pythonInput, context.partitionId(), context)
 
       val joined = new JoinedRow
-      val resultProj = createResultProjection(expressions)
 
       windowFunctionResult.flatMap(_.rowIterator.asScala).map { windowOutput =>
         val leftRow = queue.remove()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 729b8bdb3dae..89f6edda2ef5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -83,7 +83,7 @@ case class WindowExec(
     partitionSpec: Seq[Expression],
     orderSpec: Seq[SortOrder],
     child: SparkPlan)
-  extends UnaryExecNode {
+  extends WindowExecBase(windowExpression, partitionSpec, orderSpec, child) {
 
   override def output: Seq[Attribute] =
     child.output ++ windowExpression.map(_.toAttribute)
@@ -104,193 +104,6 @@ case class WindowExec(
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
-  /**
-   * Create a bound ordering object for a given frame type and offset. A bound ordering object is
-   * used to determine which input row lies within the frame boundaries of an output row.
-   *
-   * This method uses Code Generation. It can only be used on the executor side.
-   *
-   * @param frame to evaluate. This can either be a Row or Range frame.
-   * @param bound with respect to the row.
-   * @param timeZone the session local timezone for time related calculations.
-   * @return a bound ordering object.
-   */
-  private[this] def createBoundOrdering(
-      frame: FrameType, bound: Expression, timeZone: String): BoundOrdering = {
-    (frame, bound) match {
-      case (RowFrame, CurrentRow) =>
-        RowBoundOrdering(0)
-
-      case (RowFrame, IntegerLiteral(offset)) =>
-        RowBoundOrdering(offset)
-
-      case (RangeFrame, CurrentRow) =>
-        val ordering = newOrdering(orderSpec, child.output)
-        RangeBoundOrdering(ordering, IdentityProjection, IdentityProjection)
-
-      case (RangeFrame, offset: Expression) if orderSpec.size == 1 =>
-        // Use only the first order expression when the offset is non-null.
-        val sortExpr = orderSpec.head
-        val expr = sortExpr.child
-
-        // Create the projection which returns the current 'value'.
-        val current = newMutableProjection(expr :: Nil, child.output)
-
-        // Flip the sign of the offset when processing the order is descending
-        val boundOffset = sortExpr.direction match {
-          case Descending => UnaryMinus(offset)
-          case Ascending => offset
-        }
-
-        // Create the projection which returns the current 'value' modified by adding the offset.
-        val boundExpr = (expr.dataType, boundOffset.dataType) match {
-          case (DateType, IntegerType) => DateAdd(expr, boundOffset)
-          case (TimestampType, CalendarIntervalType) =>
-            TimeAdd(expr, boundOffset, Some(timeZone))
-          case (a, b) if a== b => Add(expr, boundOffset)
-        }
-        val bound = newMutableProjection(boundExpr :: Nil, child.output)
-
-        // Construct the ordering. This is used to compare the result of current value projection
-        // to the result of bound value projection. This is done manually because we want to use
-        // Code Generation (if it is enabled).
-        val boundSortExprs = sortExpr.copy(BoundReference(0, expr.dataType, expr.nullable)) :: Nil
-        val ordering = newOrdering(boundSortExprs, Nil)
-        RangeBoundOrdering(ordering, current, bound)
-
-      case (RangeFrame, _) =>
-        sys.error("Non-Zero range offsets are not supported for windows " +
-          "with multiple order expressions.")
-    }
-  }
-
-  /**
-   * Collection containing an entry for each window frame to process. Each entry contains a frame's
-   * [[WindowExpression]]s and factory function for the WindowFrameFunction.
-   */
-  private[this] lazy val windowFrameExpressionFactoryPairs = {
-    type FrameKey = (String, FrameType, Expression, Expression)
-    type ExpressionBuffer = mutable.Buffer[Expression]
-    val framedFunctions = mutable.Map.empty[FrameKey, (ExpressionBuffer, ExpressionBuffer)]
-
-    // Add a function and its function to the map for a given frame.
-    def collect(tpe: String, fr: SpecifiedWindowFrame, e: Expression, fn: Expression): Unit = {
-      val key = (tpe, fr.frameType, fr.lower, fr.upper)
-      val (es, fns) = framedFunctions.getOrElseUpdate(
-        key, (ArrayBuffer.empty[Expression], ArrayBuffer.empty[Expression]))
-      es += e
-      fns += fn
-    }
-
-    // Collect all valid window functions and group them by their frame.
-    windowExpression.foreach { x =>
-      x.foreach {
-        case e @ WindowExpression(function, spec) =>
-          val frame = spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame]
-          function match {
-            case AggregateExpression(f, _, _, _) => collect("AGGREGATE", frame, e, f)
-            case f: AggregateWindowFunction => collect("AGGREGATE", frame, e, f)
-            case f: OffsetWindowFunction => collect("OFFSET", frame, e, f)
-            case f => sys.error(s"Unsupported window function: $f")
-          }
-        case _ =>
-      }
-    }
-
-    // Map the groups to a (unbound) expression and frame factory pair.
-    var numExpressions = 0
-    val timeZone = conf.sessionLocalTimeZone
-    framedFunctions.toSeq.map {
-      case (key, (expressions, functionSeq)) =>
-        val ordinal = numExpressions
-        val functions = functionSeq.toArray
-
-        // Construct an aggregate processor if we need one.
-        def processor = AggregateProcessor(
-          functions,
-          ordinal,
-          child.output,
-          (expressions, schema) =>
-            newMutableProjection(expressions, schema, subexpressionEliminationEnabled))
-
-        // Create the factory
-        val factory = key match {
-          // Offset Frame
-          case ("OFFSET", _, IntegerLiteral(offset), _) =>
-            target: InternalRow =>
-              new OffsetWindowFunctionFrame(
-                target,
-                ordinal,
-                // OFFSET frame functions are guaranteed be OffsetWindowFunctions.
-                functions.map(_.asInstanceOf[OffsetWindowFunction]),
-                child.output,
-                (expressions, schema) =>
-                  newMutableProjection(expressions, schema, subexpressionEliminationEnabled),
-                offset)
-
-          // Entire Partition Frame.
-          case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing) =>
-            target: InternalRow => {
-              new UnboundedWindowFunctionFrame(target, processor)
-            }
-
-          // Growing Frame.
-          case ("AGGREGATE", frameType, UnboundedPreceding, upper) =>
-            target: InternalRow => {
-              new UnboundedPrecedingWindowFunctionFrame(
-                target,
-                processor,
-                createBoundOrdering(frameType, upper, timeZone))
-            }
-
-          // Shrinking Frame.
-          case ("AGGREGATE", frameType, lower, UnboundedFollowing) =>
-            target: InternalRow => {
-              new UnboundedFollowingWindowFunctionFrame(
-                target,
-                processor,
-                createBoundOrdering(frameType, lower, timeZone))
-            }
-
-          // Moving Frame.
-          case ("AGGREGATE", frameType, lower, upper) =>
-            target: InternalRow => {
-              new SlidingWindowFunctionFrame(
-                target,
-                processor,
-                createBoundOrdering(frameType, lower, timeZone),
-                createBoundOrdering(frameType, upper, timeZone))
-            }
-        }
-
-        // Keep track of the number of expressions. This is a side-effect in a map...
-        numExpressions += expressions.size
-
-        // Create the Frame Expression - Factory pair.
-        (expressions, factory)
-    }
-  }
-
-  /**
-   * Create the resulting projection.
-   *
-   * This method uses Code Generation. It can only be used on the executor side.
-   *
-   * @param expressions unbound ordered function expressions.
-   * @return the final resulting projection.
-   */
-  private[this] def createResultProjection(expressions: Seq[Expression]): UnsafeProjection = {
-    val references = expressions.zipWithIndex.map{ case (e, i) =>
-      // Results of window expressions will be on the right side of child's output
-      BoundReference(child.output.size + i, e.dataType, e.nullable)
-    }
-    val unboundToRefMap = expressions.zip(references).toMap
-    val patchedWindowExpression = windowExpression.map(_.transform(unboundToRefMap))
-    UnsafeProjection.create(
-      child.output ++ patchedWindowExpression,
-      child.output)
-  }
-
   protected override def doExecute(): RDD[InternalRow] = {
     // Unwrap the expressions and factories from the map.
     val expressions = windowFrameExpressionFactoryPairs.flatMap(_._1)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
new file mode 100644
index 000000000000..dcb86f48bdf3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, TimestampType}
+
+abstract class WindowExecBase(
+    windowExpression: Seq[NamedExpression],
+    partitionSpec: Seq[Expression],
+    orderSpec: Seq[SortOrder],
+    child: SparkPlan) extends UnaryExecNode {
+
+  /**
+   * Create the resulting projection.
+   *
+   * This method uses Code Generation. It can only be used on the executor side.
+   *
+   * @param expressions unbound ordered function expressions.
+   * @return the final resulting projection.
+   */
+  protected def createResultProjection(expressions: Seq[Expression]): UnsafeProjection = {
+    val references = expressions.zipWithIndex.map { case (e, i) =>
+      // Results of window expressions will be on the right side of child's output
+      BoundReference(child.output.size + i, e.dataType, e.nullable)
+    }
+    val unboundToRefMap = expressions.zip(references).toMap
+    val patchedWindowExpression = windowExpression.map(_.transform(unboundToRefMap))
+    UnsafeProjection.create(
+      child.output ++ patchedWindowExpression,
+      child.output)
+  }
+
+  /**
+   * Create a bound ordering object for a given frame type and offset. A bound ordering object is
+   * used to determine which input row lies within the frame boundaries of an output row.
+   *
+   * This method uses Code Generation. It can only be used on the executor side.
+   *
+   * @param frame to evaluate. This can either be a Row or Range frame.
+   * @param bound with respect to the row.
+   * @param timeZone the session local timezone for time related calculations.
+   * @return a bound ordering object.
+   */
+  private def createBoundOrdering(
+      frame: FrameType, bound: Expression, timeZone: String): BoundOrdering = {
+    (frame, bound) match {
+      case (RowFrame, CurrentRow) =>
+        RowBoundOrdering(0)
+
+      case (RowFrame, IntegerLiteral(offset)) =>
+        RowBoundOrdering(offset)
+
+      case (RangeFrame, CurrentRow) =>
+        val ordering = newOrdering(orderSpec, child.output)
+        RangeBoundOrdering(ordering, IdentityProjection, IdentityProjection)
+
+      case (RangeFrame, offset: Expression) if orderSpec.size == 1 =>
+        // Use only the first order expression when the offset is non-null.
+        val sortExpr = orderSpec.head
+        val expr = sortExpr.child
+
+        // Create the projection which returns the current 'value'.
+        val current = newMutableProjection(expr :: Nil, child.output)
+
+        // Flip the sign of the offset when processing the order is descending
+        val boundOffset = sortExpr.direction match {
+          case Descending => UnaryMinus(offset)
+          case Ascending => offset
+        }
+
+        // Create the projection which returns the current 'value' modified by adding the offset.
+        val boundExpr = (expr.dataType, boundOffset.dataType) match {
+          case (DateType, IntegerType) => DateAdd(expr, boundOffset)
+          case (TimestampType, CalendarIntervalType) =>
+            TimeAdd(expr, boundOffset, Some(timeZone))
+          case (a, b) if a == b => Add(expr, boundOffset)
+        }
+        val bound = newMutableProjection(boundExpr :: Nil, child.output)
+
+        // Construct the ordering. This is used to compare the result of current value projection
+        // to the result of bound value projection. This is done manually because we want to use
+        // Code Generation (if it is enabled).
+        val boundSortExprs = sortExpr.copy(BoundReference(0, expr.dataType, expr.nullable)) :: Nil
+        val ordering = newOrdering(boundSortExprs, Nil)
+        RangeBoundOrdering(ordering, current, bound)
+
+      case (RangeFrame, _) =>
+        sys.error("Non-Zero range offsets are not supported for windows " +
+          "with multiple order expressions.")
+    }
+  }
+
+  /**
+   * Collection containing an entry for each window frame to process. Each entry contains a frame's
+   * [[WindowExpression]]s and factory function for the WindowFrameFunction.
+   */
+  protected lazy val windowFrameExpressionFactoryPairs = {
+    type FrameKey = (String, FrameType, Expression, Expression)
+    type ExpressionBuffer = mutable.Buffer[Expression]
+    val framedFunctions = mutable.Map.empty[FrameKey, (ExpressionBuffer, ExpressionBuffer)]
+
+    // Add a function and its function to the map for a given frame.
+    def collect(tpe: String, fr: SpecifiedWindowFrame, e: Expression, fn: Expression): Unit = {
+      val key = (tpe, fr.frameType, fr.lower, fr.upper)
+      val (es, fns) = framedFunctions.getOrElseUpdate(
+        key, (ArrayBuffer.empty[Expression], ArrayBuffer.empty[Expression]))
+      es += e
+      fns += fn
+    }
+
+    // Collect all valid window functions and group them by their frame.
+    windowExpression.foreach { x =>
+      x.foreach {
+        case e @ WindowExpression(function, spec) =>
+          val frame = spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame]
+          function match {
+            case AggregateExpression(f, _, _, _) => collect("AGGREGATE", frame, e, f)
+            case f: AggregateWindowFunction => collect("AGGREGATE", frame, e, f)
+            case f: OffsetWindowFunction => collect("OFFSET", frame, e, f)
+            case f: PythonUDF => collect("AGGREGATE", frame, e, f)
+            case f => sys.error(s"Unsupported window function: $f")
+          }
+        case _ =>
+      }
+    }
+
+    // Map the groups to a (unbound) expression and frame factory pair.
+    var numExpressions = 0
+    val timeZone = conf.sessionLocalTimeZone
+    framedFunctions.toSeq.map {
+      case (key, (expressions, functionSeq)) =>
+        val ordinal = numExpressions
+        val functions = functionSeq.toArray
+
+        // Construct an aggregate processor if we need one.
+        // Currently we don't allow mixing of Pandas UDF and SQL aggregation functions
+        // in a single Window physical node. Therefore, we can assume no SQL aggregation
+        // functions if Pandas UDF exists. In the future, we might mix Pandas UDF and SQL
+        // aggregation function in a single physical node.
+        def processor = if (functions.exists(_.isInstanceOf[PythonUDF])) {
+          null
+        } else {
+          AggregateProcessor(
+            functions,
+            ordinal,
+            child.output,
+            (expressions, schema) =>
+              newMutableProjection(expressions, schema, subexpressionEliminationEnabled))
+        }
+
+        // Create the factory
+        val factory = key match {
+          // Offset Frame
+          case ("OFFSET", _, IntegerLiteral(offset), _) =>
+            target: InternalRow =>
+              new OffsetWindowFunctionFrame(
+                target,
+                ordinal,
+                // OFFSET frame functions are guaranteed be OffsetWindowFunctions.
+                functions.map(_.asInstanceOf[OffsetWindowFunction]),
+                child.output,
+                (expressions, schema) =>
+                  newMutableProjection(expressions, schema, subexpressionEliminationEnabled),
+                offset)
+
+          // Entire Partition Frame.
+          case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing) =>
+            target: InternalRow => {
+              new UnboundedWindowFunctionFrame(target, processor)
+            }
+
+          // Growing Frame.
+          case ("AGGREGATE", frameType, UnboundedPreceding, upper) =>
+            target: InternalRow => {
+              new UnboundedPrecedingWindowFunctionFrame(
+                target,
+                processor,
+                createBoundOrdering(frameType, upper, timeZone))
+            }
+
+          // Shrinking Frame.
+          case ("AGGREGATE", frameType, lower, UnboundedFollowing) =>
+            target: InternalRow => {
+              new UnboundedFollowingWindowFunctionFrame(
+                target,
+                processor,
+                createBoundOrdering(frameType, lower, timeZone))
+            }
+
+          // Moving Frame.
+          case ("AGGREGATE", frameType, lower, upper) =>
+            target: InternalRow => {
+              new SlidingWindowFunctionFrame(
+                target,
+                processor,
+                createBoundOrdering(frameType, lower, timeZone),
+                createBoundOrdering(frameType, upper, timeZone))
+            }
+        }
+
+        // Keep track of the number of expressions. This is a side-effect in a map...
+        numExpressions += expressions.size
+
+        // Create the Frame Expression - Factory pair.
+        (expressions, factory)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index 156002ef58fb..a5601899ea2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray
  * Before use a frame must be prepared by passing it all the rows in the current partition. After
  * preparation the update method can be called to fill the output rows.
  */
-private[window] abstract class WindowFunctionFrame {
+abstract class WindowFunctionFrame {
   /**
    * Prepare the frame for calculating the results for a partition.
    *
@@ -42,6 +42,20 @@ private[window] abstract class WindowFunctionFrame {
    * Write the current results to the target row.
    */
   def write(index: Int, current: InternalRow): Unit
+
+  /**
+   * The current lower window bound in the row array (inclusive).
+   *
+   * This should be called after the current row is updated via [[write]]
+   */
+  def currentLowerBound(): Int
+
+  /**
+   * The current row index of the upper window bound in the row array (exclusive)
+   *
+   * This should be called after the current row is updated via [[write]]
+   */
+  def currentUpperBound(): Int
 }
 
 object WindowFunctionFrame {
@@ -62,7 +76,7 @@ object WindowFunctionFrame {
  * @param newMutableProjection function used to create the projection.
  * @param offset by which rows get moved within a partition.
  */
-private[window] final class OffsetWindowFunctionFrame(
+final class OffsetWindowFunctionFrame(
     target: InternalRow,
     ordinal: Int,
     expressions: Array[OffsetWindowFunction],
@@ -137,6 +151,10 @@ private[window] final class OffsetWindowFunctionFrame(
     }
     inputIndex += 1
   }
+
+  override def currentLowerBound(): Int = throw new UnsupportedOperationException()
+
+  override def currentUpperBound(): Int = throw new UnsupportedOperationException()
 }
 
 /**
@@ -148,7 +166,7 @@ private[window] final class OffsetWindowFunctionFrame(
  * @param lbound comparator used to identify the lower bound of an output row.
  * @param ubound comparator used to identify the upper bound of an output row.
  */
-private[window] final class SlidingWindowFunctionFrame(
+final class SlidingWindowFunctionFrame(
     target: InternalRow,
     processor: AggregateProcessor,
     lbound: BoundOrdering,
@@ -170,24 +188,24 @@ private[window] final class SlidingWindowFunctionFrame(
   private[this] val buffer = new util.ArrayDeque[InternalRow]()
 
   /**
-   * Index of the first input row with a value greater than the upper bound of the current
-   * output row.
+   * Index of the first input row with a value equal to or greater than the lower bound of the
+   * current output row.
    */
-  private[this] var inputHighIndex = 0
+  private[this] var lowerBound = 0
 
   /**
-   * Index of the first input row with a value equal to or greater than the lower bound of the
-   * current output row.
+   * Index of the first input row with a value greater than the upper bound of the current
+   * output row.
    */
-  private[this] var inputLowIndex = 0
+  private[this] var upperBound = 0
 
   /** Prepare the frame for calculating a new partition. Reset all variables. */
   override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
     input = rows
     inputIterator = input.generateIterator()
     nextRow = WindowFunctionFrame.getNextOrNull(inputIterator)
-    inputHighIndex = 0
-    inputLowIndex = 0
+    lowerBound = 0
+    upperBound = 0
     buffer.clear()
   }
 
@@ -197,27 +215,27 @@ private[window] final class SlidingWindowFunctionFrame(
 
     // Drop all rows from the buffer for which the input row value is smaller than
     // the output row lower bound.
-    while (!buffer.isEmpty && lbound.compare(buffer.peek(), inputLowIndex, current, index) < 0) {
+    while (!buffer.isEmpty && lbound.compare(buffer.peek(), lowerBound, current, index) < 0) {
       buffer.remove()
-      inputLowIndex += 1
+      lowerBound += 1
       bufferUpdated = true
     }
 
     // Add all rows to the buffer for which the input row value is equal to or less than
     // the output row upper bound.
-    while (nextRow != null && ubound.compare(nextRow, inputHighIndex, current, index) <= 0) {
-      if (lbound.compare(nextRow, inputLowIndex, current, index) < 0) {
-        inputLowIndex += 1
+    while (nextRow != null && ubound.compare(nextRow, upperBound, current, index) <= 0) {
+      if (lbound.compare(nextRow, lowerBound, current, index) < 0) {
+        lowerBound += 1
       } else {
         buffer.add(nextRow.copy())
         bufferUpdated = true
       }
       nextRow = WindowFunctionFrame.getNextOrNull(inputIterator)
-      inputHighIndex += 1
+      upperBound += 1
     }
 
     // Only recalculate and update when the buffer changes.
-    if (bufferUpdated) {
+    if (processor != null && bufferUpdated) {
       processor.initialize(input.length)
       val iter = buffer.iterator()
       while (iter.hasNext) {
@@ -226,6 +244,10 @@ private[window] final class SlidingWindowFunctionFrame(
       processor.evaluate(target)
     }
   }
+
+  override def currentLowerBound(): Int = lowerBound
+
+  override def currentUpperBound(): Int = upperBound
 }
 
 /**
@@ -239,27 +261,39 @@ private[window] final class SlidingWindowFunctionFrame(
  * @param target to write results to.
  * @param processor to calculate the row values with.
  */
-private[window] final class UnboundedWindowFunctionFrame(
+final class UnboundedWindowFunctionFrame(
     target: InternalRow,
     processor: AggregateProcessor)
   extends WindowFunctionFrame {
 
+  val lowerBound: Int = 0
+  var upperBound: Int = 0
+
   /** Prepare the frame for calculating a new partition. Process all rows eagerly. */
   override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
-    processor.initialize(rows.length)
-
-    val iterator = rows.generateIterator()
-    while (iterator.hasNext) {
-      processor.update(iterator.next())
+    if (processor != null) {
+      processor.initialize(rows.length)
+      val iterator = rows.generateIterator()
+      while (iterator.hasNext) {
+        processor.update(iterator.next())
+      }
     }
+
+    upperBound = rows.length
   }
 
   /** Write the frame columns for the current row to the given target row. */
   override def write(index: Int, current: InternalRow): Unit = {
     // Unfortunately we cannot assume that evaluation is deterministic. So we need to re-evaluate
     // for each row.
-    processor.evaluate(target)
+    if (processor != null) {
+      processor.evaluate(target)
+    }
   }
+
+  override def currentLowerBound(): Int = lowerBound
+
+  override def currentUpperBound(): Int = upperBound
 }
 
 /**
@@ -276,7 +310,7 @@ private[window] final class UnboundedWindowFunctionFrame(
  * @param processor to calculate the row values with.
  * @param ubound comparator used to identify the upper bound of an output row.
  */
-private[window] final class UnboundedPrecedingWindowFunctionFrame(
+final class UnboundedPrecedingWindowFunctionFrame(
     target: InternalRow,
     processor: AggregateProcessor,
     ubound: BoundOrdering)
@@ -308,7 +342,9 @@ private[window] final class UnboundedPrecedingWindowFunctionFrame(
       nextRow = inputIterator.next()
     }
 
-    processor.initialize(input.length)
+    if (processor != null) {
+      processor.initialize(input.length)
+    }
   }
 
   /** Write the frame columns for the current row to the given target row. */
@@ -318,17 +354,23 @@ private[window] final class UnboundedPrecedingWindowFunctionFrame(
     // Add all rows to the aggregates for which the input row value is equal to or less than
     // the output row upper bound.
     while (nextRow != null && ubound.compare(nextRow, inputIndex, current, index) <= 0) {
-      processor.update(nextRow)
+      if (processor != null) {
+        processor.update(nextRow)
+      }
       nextRow = WindowFunctionFrame.getNextOrNull(inputIterator)
       inputIndex += 1
       bufferUpdated = true
     }
 
     // Only recalculate and update when the buffer changes.
-    if (bufferUpdated) {
+    if (processor != null && bufferUpdated) {
       processor.evaluate(target)
     }
   }
+
+  override def currentLowerBound(): Int = 0
+
+  override def currentUpperBound(): Int = inputIndex
 }
 
 /**
@@ -347,7 +389,7 @@ private[window] final class UnboundedPrecedingWindowFunctionFrame(
  * @param processor to calculate the row values with.
  * @param lbound comparator used to identify the lower bound of an output row.
  */
-private[window] final class UnboundedFollowingWindowFunctionFrame(
+final class UnboundedFollowingWindowFunctionFrame(
     target: InternalRow,
     processor: AggregateProcessor,
     lbound: BoundOrdering)
@@ -384,7 +426,7 @@ private[window] final class UnboundedFollowingWindowFunctionFrame(
     }
 
     // Only recalculate and update when the buffer changes.
-    if (bufferUpdated) {
+    if (processor != null && bufferUpdated) {
       processor.initialize(input.length)
       if (nextRow != null) {
         processor.update(nextRow)
@@ -395,4 +437,8 @@ private[window] final class UnboundedFollowingWindowFunctionFrame(
       processor.evaluate(target)
     }
   }
+
+  override def currentLowerBound(): Int = inputIndex
+
+  override def currentUpperBound(): Int = input.length
 }

From d72571e51d8b41e2287750759e120547afeeb7d7 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Tue, 18 Dec 2018 13:50:55 +0800
Subject: [PATCH 0157/1072] [SPARK-26246][SQL] Inferring TimestampType from
 JSON

## What changes were proposed in this pull request?

The `JsonInferSchema` class is extended to support `TimestampType` inferring from string fields in JSON input:
- If the `prefersDecimal` option is set to `true`, it tries to infer decimal type from the string field.
- If decimal type inference fails or `prefersDecimal` is disabled, `JsonInferSchema` tries to infer `TimestampType`.
- If timestamp type inference fails, `StringType` is returned as the inferred type.

## How was this patch tested?

Added new test suite - `JsonInferSchemaSuite` to check date and timestamp types inferring from JSON using `JsonInferSchema` directly. A few tests were added `JsonSuite` to check type merging and roundtrip tests. This changes was tested by `JsonSuite`, `JsonExpressionsSuite` and `JsonFunctionsSuite` as well.

Closes #23201 from MaxGekk/json-infer-time.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/json/JsonInferSchema.scala   |  22 +++-
 .../catalyst/json/JsonInferSchemaSuite.scala  | 102 ++++++++++++++++++
 .../datasources/json/JsonSuite.scala          |  52 +++++++++
 3 files changed, 171 insertions(+), 5 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 263e05de3207..d1bc00c08c1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -28,7 +28,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.JacksonUtils.nextUntil
-import org.apache.spark.sql.catalyst.util.{DropMalformedMode, FailFastMode, ParseMode, PermissiveMode}
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -37,6 +37,12 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
 
   private val decimalParser = ExprUtils.getDecimalParser(options.locale)
 
+  @transient
+  private lazy val timestampFormatter = TimestampFormatter(
+    options.timestampFormat,
+    options.timeZone,
+    options.locale)
+
   /**
    * Infer the type of a collection of json records in three stages:
    *   1. Infer the type of each record
@@ -115,13 +121,19 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
         // record fields' types have been combined.
         NullType
 
-      case VALUE_STRING if options.prefersDecimal =>
+      case VALUE_STRING =>
+        val field = parser.getText
         val decimalTry = allCatch opt {
-          val bigDecimal = decimalParser(parser.getText)
+          val bigDecimal = decimalParser(field)
             DecimalType(bigDecimal.precision, bigDecimal.scale)
         }
-        decimalTry.getOrElse(StringType)
-      case VALUE_STRING => StringType
+        if (options.prefersDecimal && decimalTry.isDefined) {
+          decimalTry.get
+        } else if ((allCatch opt timestampFormatter.parse(field)).isDefined) {
+          TimestampType
+        } else {
+          StringType
+        }
 
       case START_OBJECT =>
         val builder = Array.newBuilder[StructField]
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
new file mode 100644
index 000000000000..9307f9b47b80
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.json
+
+import com.fasterxml.jackson.core.JsonFactory
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
+
+  def checkType(options: Map[String, String], json: String, dt: DataType): Unit = {
+    val jsonOptions = new JSONOptions(options, "UTC", "")
+    val inferSchema = new JsonInferSchema(jsonOptions)
+    val factory = new JsonFactory()
+    jsonOptions.setJacksonOptions(factory)
+    val parser = CreateJacksonParser.string(factory, json)
+    parser.nextToken()
+    val expectedType = StructType(Seq(StructField("a", dt, true)))
+
+    assert(inferSchema.inferField(parser) === expectedType)
+  }
+
+  def checkTimestampType(pattern: String, json: String): Unit = {
+    checkType(Map("timestampFormat" -> pattern), json, TimestampType)
+  }
+
+  test("inferring timestamp type") {
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkTimestampType("yyyy", """{"a": "2018"}""")
+        checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
+        checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
+        checkTimestampType(
+          "yyyy-MM-dd'T'HH:mm:ss.SSS",
+          """{"a": "2018-12-02T21:04:00.123"}""")
+        checkTimestampType(
+          "yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
+          """{"a": "2018-12-02T21:04:00.123567+01:00"}""")
+      }
+    }
+  }
+
+  test("prefer decimals over timestamps") {
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkType(
+          options = Map(
+            "prefersDecimal" -> "true",
+            "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
+          ),
+          json = """{"a": "20181202.210400123"}""",
+          dt = DecimalType(17, 9)
+        )
+      }
+    }
+  }
+
+  test("skip decimal type inferring") {
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkType(
+          options = Map(
+            "prefersDecimal" -> "false",
+            "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
+          ),
+          json = """{"a": "20181202.210400123"}""",
+          dt = TimestampType
+        )
+      }
+    }
+  }
+
+  test("fallback to string type") {
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkType(
+          options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
+          json = """{"a": "20181202.210400123"}""",
+          dt = StringType
+        )
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 786335b42e3c..8f575a371c98 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.StructType.fromDDL
 import org.apache.spark.util.Utils
 
 class TestFileFilter extends PathFilter {
@@ -2589,4 +2590,55 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       Row(null, Array(0, 1, 2), "abc", """{"a":"-","b":[0, 1, 2],"c":"abc"}""") ::
       Row(0.1, null, "def", """{"a":0.1,"b":{},"c":"def"}""") :: Nil)
   }
+
+  test("inferring timestamp type") {
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        def schemaOf(jsons: String*): StructType = spark.read.json(jsons.toDS).schema
+
+        assert(schemaOf(
+          """{"a":"2018-12-17T10:11:12.123-01:00"}""",
+          """{"a":"2018-12-16T22:23:24.123-02:00"}""") === fromDDL("a timestamp"))
+
+        assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":1}""")
+          === fromDDL("a string"))
+        assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":"123"}""")
+          === fromDDL("a string"))
+
+        assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":null}""")
+          === fromDDL("a timestamp"))
+        assert(schemaOf("""{"a":null}""", """{"a":"2018-12-17T10:11:12.123-01:00"}""")
+          === fromDDL("a timestamp"))
+      }
+    }
+  }
+
+  test("roundtrip for timestamp type inferring") {
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val customSchema = new StructType().add("date", TimestampType)
+        withTempDir { dir =>
+          val timestampsWithFormatPath = s"${dir.getCanonicalPath}/timestampsWithFormat.json"
+          val timestampsWithFormat = spark.read
+            .option("timestampFormat", "dd/MM/yyyy HH:mm")
+            .json(datesRecords)
+          assert(timestampsWithFormat.schema === customSchema)
+
+          timestampsWithFormat.write
+            .format("json")
+            .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
+            .option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
+            .save(timestampsWithFormatPath)
+
+          val readBack = spark.read
+            .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
+            .option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
+            .json(timestampsWithFormatPath)
+
+          assert(readBack.schema === customSchema)
+          checkAnswer(readBack, timestampsWithFormat)
+        }
+      }
+    }
+  }
 }

From 218341c5db62bf5363c4a16440fa742970f1e919 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 18 Dec 2018 20:52:02 +0800
Subject: [PATCH 0158/1072] [SPARK-26081][SQL][FOLLOW-UP] Use foreach instead
 of misuse of map (for Unit)

## What changes were proposed in this pull request?

This PR proposes to use foreach instead of misuse of map (for Unit). This could cause some weird errors potentially and it's not a good practice anyway. See also SPARK-16694

## How was this patch tested?

N/A

Closes #23341 from HyukjinKwon/followup-SPARK-26081.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/execution/datasources/csv/CSVFileFormat.scala   | 2 +-
 .../spark/sql/execution/datasources/json/JsonFileFormat.scala | 2 +-
 .../spark/sql/execution/datasources/text/TextFileFormat.scala | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index f7d8a9e1042d..f4f139d18005 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -189,5 +189,5 @@ private[csv] class CsvOutputWriter(
     gen.write(row)
   }
 
-  override def close(): Unit = univocityGenerator.map(_.close())
+  override def close(): Unit = univocityGenerator.foreach(_.close())
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index 3042133ee43a..40f55e706801 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -190,5 +190,5 @@ private[json] class JsonOutputWriter(
     gen.writeLineEnding()
   }
 
-  override def close(): Unit = jacksonGenerator.map(_.close())
+  override def close(): Unit = jacksonGenerator.foreach(_.close())
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index 01948ab25d63..0607f7b3c0d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -153,7 +153,7 @@ class TextOutputWriter(
   private var outputStream: Option[OutputStream] = None
 
   override def write(row: InternalRow): Unit = {
-    val os = outputStream.getOrElse{
+    val os = outputStream.getOrElse {
       val newStream = CodecStreams.createOutputStream(context, new Path(path))
       outputStream = Some(newStream)
       newStream
@@ -167,6 +167,6 @@ class TextOutputWriter(
   }
 
   override def close(): Unit = {
-    outputStream.map(_.close())
+    outputStream.foreach(_.close())
   }
 }

From 4d693ac904d89b3afeba107eb0480120daf78174 Mon Sep 17 00:00:00 2001
From: Stan Zhai <zhaishidan@haizhi.com>
Date: Tue, 18 Dec 2018 07:02:09 -0600
Subject: [PATCH 0159/1072] [SPARK-24680][DEPLOY] Support
 spark.executorEnv.JAVA_HOME in Standalone mode

## What changes were proposed in this pull request?

spark.executorEnv.JAVA_HOME does not take effect when a Worker starting an Executor process in Standalone mode.

This PR fixed this.

## How was this patch tested?

Manual tests.

Closes #21663 from stanzhai/fix-executor-env-java-home.

Lead-authored-by: Stan Zhai <zhaishidan@haizhi.com>
Co-authored-by: Stan Zhai <mail@stanzhai.site>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/launcher/AbstractCommandBuilder.java | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index ce24400f557c..56edceb17bfb 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -91,14 +91,18 @@ abstract List<String> buildCommand(Map<String, String> env)
    */
   List<String> buildJavaCommand(String extraClassPath) throws IOException {
     List<String> cmd = new ArrayList<>();
-    String envJavaHome;
 
-    if (javaHome != null) {
-      cmd.add(join(File.separator, javaHome, "bin", "java"));
-    } else if ((envJavaHome = System.getenv("JAVA_HOME")) != null) {
-        cmd.add(join(File.separator, envJavaHome, "bin", "java"));
-    } else {
-        cmd.add(join(File.separator, System.getProperty("java.home"), "bin", "java"));
+    String[] candidateJavaHomes = new String[] {
+      javaHome,
+      childEnv.get("JAVA_HOME"),
+      System.getenv("JAVA_HOME"),
+      System.getProperty("java.home")
+    };
+    for (String javaHome : candidateJavaHomes) {
+      if (javaHome != null) {
+        cmd.add(join(File.separator, javaHome, "bin", "java"));
+        break;
+      }
     }
 
     // Load extra JAVA_OPTS from conf/java-opts, if it exists.

From 3c0bb6bc45e64fd82052d7857f2a06c34f0c1793 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Wed, 19 Dec 2018 00:01:53 +0800
Subject: [PATCH 0160/1072] [SPARK-26384][SQL] Propagate SQL configs for CSV
 schema inferring

## What changes were proposed in this pull request?

Currently, SQL configs are not propagated to executors while schema inferring in CSV datasource. For example, changing of `spark.sql.legacy.timeParser.enabled` does not impact on inferring timestamp types. In the PR, I propose to fix the issue by wrapping schema inferring action using `SQLExecution.withSQLConfPropagated`.

## How was this patch tested?

Added logging to `TimestampFormatter`:
```patch
-object TimestampFormatter {
+object TimestampFormatter extends Logging {
   def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = {
     if (SQLConf.get.legacyTimeParserEnabled) {
+      logError("LegacyFallbackTimestampFormatter is being used")
       new LegacyFallbackTimestampFormatter(format, timeZone, locale)
     } else {
+      logError("Iso8601TimestampFormatter is being used")
       new Iso8601TimestampFormatter(format, timeZone, locale)
     }
   }
```
and run the command in `spark-shell`:
```shell
$ ./bin/spark-shell --conf spark.sql.legacy.timeParser.enabled=true
```
```scala
scala> Seq("2010|10|10").toDF.repartition(1).write.mode("overwrite").text("/tmp/foo")
scala> spark.read.option("inferSchema", "true").option("header", "false").option("timestampFormat", "yyyy|MM|dd").csv("/tmp/foo").printSchema()
18/12/18 10:47:27 ERROR TimestampFormatter: LegacyFallbackTimestampFormatter is being used
root
 |-- _c0: timestamp (nullable = true)
```

Closes #23345 from MaxGekk/csv-schema-infer-propagate-configs.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/execution/datasources/csv/CSVDataSource.scala    | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index b46dfb94c133..375cec597166 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -35,6 +35,7 @@ import org.apache.spark.rdd.{BinaryFileRDD, RDD}
 import org.apache.spark.sql.{Dataset, Encoders, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVInferSchema, CSVOptions, UnivocityParser}
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.text.TextFileFormat
 import org.apache.spark.sql.types.StructType
@@ -135,7 +136,9 @@ object TextInputCSVDataSource extends CSVDataSource {
           val parser = new CsvParser(parsedOptions.asParserSettings)
           linesWithoutHeader.map(parser.parseLine)
         }
-        new CSVInferSchema(parsedOptions).infer(tokenRDD, header)
+        SQLExecution.withSQLConfPropagated(csv.sparkSession) {
+          new CSVInferSchema(parsedOptions).infer(tokenRDD, header)
+        }
       case _ =>
         // If the first line could not be read, just return the empty schema.
         StructType(Nil)
@@ -208,7 +211,9 @@ object MultiLineCSVDataSource extends CSVDataSource {
             encoding = parsedOptions.charset)
         }
         val sampled = CSVUtils.sample(tokenRDD, parsedOptions)
-        new CSVInferSchema(parsedOptions).infer(sampled, header)
+        SQLExecution.withSQLConfPropagated(sparkSession) {
+          new CSVInferSchema(parsedOptions).infer(sampled, header)
+        }
       case None =>
         // If the first row could not be read, just return the empty schema.
         StructType(Nil)

From befca983d2da4f7828aa7a7cd7345d17c4f291dd Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 18 Dec 2018 10:09:56 -0800
Subject: [PATCH 0161/1072] [SPARK-26382][CORE] prefix comparator should handle
 -0.0

## What changes were proposed in this pull request?

This is kind of a followup of https://github.com/apache/spark/pull/23239

The `UnsafeProject` will normalize special float/double values(NaN and -0.0), so the sorter doesn't have to handle it.

However, for consistency and future-proof, this PR proposes to normalize `-0.0` in the prefix comparator, so that it's same with the normal ordering. Note that prefix comparator handles NaN as well.

This is not a bug fix, but a safe guard.

## How was this patch tested?

existing tests

Closes #23334 from cloud-fan/sort.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../unsafe/sort/PrefixComparators.java          |  2 ++
 .../unsafe/sort/PrefixComparatorsSuite.scala    | 17 +++++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
index 0910db22af00..bef1bdadb27a 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
@@ -69,6 +69,8 @@ public static final class DoublePrefixComparator {
      * details see http://stereopsis.com/radix.html.
      */
     public static long computePrefix(double value) {
+      // normalize -0.0 to 0.0, as they should be equal
+      value = value == -0.0 ? 0.0 : value;
       // Java's doubleToLongBits already canonicalizes all NaN values to the smallest possible
       // positive NaN, so there's nothing special we need to do for NaNs.
       long bits = Double.doubleToLongBits(value);
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 73546ef1b7a6..38cb37c52459 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -125,6 +125,7 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
     val nan2Prefix = PrefixComparators.DoublePrefixComparator.computePrefix(nan2)
     assert(nan1Prefix === nan2Prefix)
     val doubleMaxPrefix = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
+    // NaN is greater than the max double value.
     assert(PrefixComparators.DOUBLE.compare(nan1Prefix, doubleMaxPrefix) === 1)
   }
 
@@ -134,22 +135,34 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
     assert(java.lang.Double.doubleToRawLongBits(negativeNan) < 0)
     val prefix = PrefixComparators.DoublePrefixComparator.computePrefix(negativeNan)
     val doubleMaxPrefix = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
+    // -NaN is greater than the max double value.
     assert(PrefixComparators.DOUBLE.compare(prefix, doubleMaxPrefix) === 1)
   }
 
   test("double prefix comparator handles other special values properly") {
-    val nullValue = 0L
+    // See `SortPrefix.nullValue` for how we deal with nulls for float/double type
+    val smallestNullPrefix = 0L
+    val largestNullPrefix = -1L
     val nan = PrefixComparators.DoublePrefixComparator.computePrefix(Double.NaN)
     val posInf = PrefixComparators.DoublePrefixComparator.computePrefix(Double.PositiveInfinity)
     val negInf = PrefixComparators.DoublePrefixComparator.computePrefix(Double.NegativeInfinity)
     val minValue = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MinValue)
     val maxValue = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
     val zero = PrefixComparators.DoublePrefixComparator.computePrefix(0.0)
+    val minusZero = PrefixComparators.DoublePrefixComparator.computePrefix(-0.0)
+
+    // null is greater than everything including NaN, when we need to treat it as the largest value.
+    assert(PrefixComparators.DOUBLE.compare(largestNullPrefix, nan) === 1)
+    // NaN is greater than the positive infinity.
     assert(PrefixComparators.DOUBLE.compare(nan, posInf) === 1)
     assert(PrefixComparators.DOUBLE.compare(posInf, maxValue) === 1)
     assert(PrefixComparators.DOUBLE.compare(maxValue, zero) === 1)
     assert(PrefixComparators.DOUBLE.compare(zero, minValue) === 1)
     assert(PrefixComparators.DOUBLE.compare(minValue, negInf) === 1)
-    assert(PrefixComparators.DOUBLE.compare(negInf, nullValue) === 1)
+    // null is smaller than everything including negative infinity, when we need to treat it as
+    // the smallest value.
+    assert(PrefixComparators.DOUBLE.compare(negInf, smallestNullPrefix) === 1)
+    // 0.0 should be equal to -0.0.
+    assert(PrefixComparators.DOUBLE.compare(zero, minusZero) === 0)
   }
 }

From 428eb2ad0ad8a141427120b13de3287962258c2d Mon Sep 17 00:00:00 2001
From: Jackey Lee <qcsd2011@163.com>
Date: Tue, 18 Dec 2018 12:15:36 -0600
Subject: [PATCH 0162/1072] [SPARK-26394][CORE] Fix annotation error for
 Utils.timeStringAsMs

## What changes were proposed in this pull request?

Change microseconds to milliseconds in annotation of Utils.timeStringAsMs.

Closes #23346 from stczwd/stczwd.

Authored-by: Jackey Lee <qcsd2011@163.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index b4ea1ee95021..143abd3bbea8 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1037,7 +1037,7 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Convert a time parameter such as (50s, 100ms, or 250us) to microseconds for internal use. If
+   * Convert a time parameter such as (50s, 100ms, or 250us) to milliseconds for internal use. If
    * no suffix is provided, the passed number is assumed to be in ms.
    */
   def timeStringAsMs(str: String): Long = {

From 4b3fe3a9ccc8a4a8eb0d037d19cb07a8a288e37a Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 18 Dec 2018 13:30:09 -0800
Subject: [PATCH 0163/1072] [SPARK-25815][K8S] Support kerberos in client mode,
 keytab-based token renewal.

This change hooks up the k8s backed to the updated HadoopDelegationTokenManager,
so that delegation tokens are also available in client mode, and keytab-based token
renewal is enabled.

The change re-works the k8s feature steps related to kerberos so
that the driver does all the credential management and provides all
the needed information to executors - so nothing needs to be added
to executor pods. This also makes cluster mode behave a lot more
similarly to client mode, since no driver-related config steps are run
in the latter case.

The main two things that don't need to happen in executors anymore are:

- adding the Hadoop config to the executor pods: this is not needed
  since the Spark driver will serialize the Hadoop config and send
  it to executors when running tasks.

- mounting the kerberos config file in the executor pods: this is
  not needed once you remove the above. The Hadoop conf sent by
  the driver with the tasks is already resolved (i.e. has all the
  kerberos names properly defined), so executors do not need access
  to the kerberos realm information anymore.

The change also avoids creating delegation tokens unnecessarily.
This means that they'll only be created if a secret with tokens
was not provided, and if a keytab is not provided. In either of
those cases, the driver code will handle delegation tokens: in
cluster mode by creating a secret and stashing them, in client
mode by using existing mechanisms to send DTs to executors.

One last feature: the change also allows defining a keytab with
a "local:" URI. This is supported in client mode (although that's
the same as not saying "local:"), and in k8s cluster mode. This
allows the keytab to be mounted onto the image from a pre-existing
secret, for example.

Finally, the new code always sets SPARK_USER in the driver and
executor pods. This is in line with how other resource managers
behave: the submitting user reflects which user will access
Hadoop services in the app. (With kerberos, that's overridden
by the logged in user.) That user is unrelated to the OS user
the app is running as inside the containers.

Tested:
- client and cluster mode with kinit
- cluster mode with keytab
- cluster mode with local: keytab
- YARN cluster with keytab (to make sure it isn't broken)

Closes #22911 from vanzin/SPARK-25815.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala |  29 +-
 .../HadoopDelegationTokenManager.scala        |   8 +-
 .../scala/org/apache/spark/util/Utils.scala   |   8 +
 .../apache/spark/deploy/k8s/Constants.scala   |   9 +-
 .../spark/deploy/k8s/KubernetesConf.scala     |   4 -
 .../apache/spark/deploy/k8s/SparkPod.scala    |  25 +-
 .../k8s/features/BasicDriverFeatureStep.scala |   4 +
 .../features/BasicExecutorFeatureStep.scala   |   4 +
 .../HadoopConfDriverFeatureStep.scala         | 124 +++++++
 .../HadoopConfExecutorFeatureStep.scala       |  40 ---
 .../HadoopSparkUserExecutorFeatureStep.scala  |  35 --
 .../KerberosConfDriverFeatureStep.scala       | 315 ++++++++++--------
 .../KerberosConfExecutorFeatureStep.scala     |  46 ---
 .../hadooputils/HadoopBootstrapUtil.scala     | 283 ----------------
 .../hadooputils/KerberosConfigSpec.scala      |  33 --
 .../k8s/submit/KubernetesDriverBuilder.scala  |   1 +
 .../KubernetesClusterSchedulerBackend.scala   |   7 +-
 .../k8s/KubernetesExecutorBuilder.scala       |   5 +-
 .../BasicDriverFeatureStepSuite.scala         |   3 +-
 .../BasicExecutorFeatureStepSuite.scala       |   9 +-
 .../HadoopConfDriverFeatureStepSuite.scala    |  71 ++++
 .../KerberosConfDriverFeatureStepSuite.scala  | 171 ++++++++++
 .../KubernetesFeaturesTestUtils.scala         |   6 +
 .../org/apache/spark/deploy/yarn/Client.scala |  24 +-
 .../spark/deploy/yarn/ClientSuite.scala       |   6 +-
 25 files changed, 649 insertions(+), 621 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStep.scala
 delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
 delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
 delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
 delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/HadoopBootstrapUtil.scala
 delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/KerberosConfigSpec.scala
 create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
 create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index d4055cb6c585..763bd0a70a03 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy
 
 import java.io._
 import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowableException}
-import java.net.URL
+import java.net.{URI, URL}
 import java.security.PrivilegedExceptionAction
 import java.text.ParseException
 import java.util.UUID
@@ -334,19 +334,20 @@ private[spark] class SparkSubmit extends Logging {
     val hadoopConf = conf.getOrElse(SparkHadoopUtil.newConfiguration(sparkConf))
     val targetDir = Utils.createTempDir()
 
-    // assure a keytab is available from any place in a JVM
-    if (clusterManager == YARN || clusterManager == LOCAL || isMesosClient || isKubernetesCluster) {
-      if (args.principal != null) {
-        if (args.keytab != null) {
-          require(new File(args.keytab).exists(), s"Keytab file: ${args.keytab} does not exist")
-          // Add keytab and principal configurations in sysProps to make them available
-          // for later use; e.g. in spark sql, the isolated class loader used to talk
-          // to HiveMetastore will use these settings. They will be set as Java system
-          // properties and then loaded by SparkConf
-          sparkConf.set(KEYTAB, args.keytab)
-          sparkConf.set(PRINCIPAL, args.principal)
-          UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
-        }
+    // Kerberos is not supported in standalone mode, and keytab support is not yet available
+    // in Mesos cluster mode.
+    if (clusterManager != STANDALONE
+        && !isMesosCluster
+        && args.principal != null
+        && args.keytab != null) {
+      // If client mode, make sure the keytab is just a local path.
+      if (deployMode == CLIENT && Utils.isLocalUri(args.keytab)) {
+        args.keytab = new URI(args.keytab).getPath()
+      }
+
+      if (!Utils.isLocalUri(args.keytab)) {
+        require(new File(args.keytab).exists(), s"Keytab file: ${args.keytab} does not exist")
+        UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 126a6ab80136..f7e3ddecee09 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.deploy.security
 
 import java.io.File
+import java.net.URI
 import java.security.PrivilegedExceptionAction
 import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 import java.util.concurrent.atomic.AtomicReference
@@ -71,11 +72,13 @@ private[spark] class HadoopDelegationTokenManager(
   private val providerEnabledConfig = "spark.security.credentials.%s.enabled"
 
   private val principal = sparkConf.get(PRINCIPAL).orNull
-  private val keytab = sparkConf.get(KEYTAB).orNull
+
+  // The keytab can be a local: URI for cluster mode, so translate it to a regular path. If it is
+  // needed later on, the code will check that it exists.
+  private val keytab = sparkConf.get(KEYTAB).map { uri => new URI(uri).getPath() }.orNull
 
   require((principal == null) == (keytab == null),
     "Both principal and keytab must be defined, or neither.")
-  require(keytab == null || new File(keytab).isFile(), s"Cannot find keytab at $keytab.")
 
   private val delegationTokenProviders = loadProviders()
   logDebug("Using the following builtin delegation token providers: " +
@@ -264,6 +267,7 @@ private[spark] class HadoopDelegationTokenManager(
 
   private def doLogin(): UserGroupInformation = {
     logInfo(s"Attempting to login to KDC using principal: $principal")
+    require(new File(keytab).isFile(), s"Cannot find keytab at $keytab.")
     val ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
     logInfo("Successfully logged into KDC.")
     ugi
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 143abd3bbea8..f322e92c6c8c 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -92,6 +92,9 @@ private[spark] object Utils extends Logging {
   private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
   @volatile private var localRootDirs: Array[String] = null
 
+  /** Scheme used for files that are locally available on worker nodes in the cluster. */
+  val LOCAL_SCHEME = "local"
+
   /** Serialize an object using Java serialization */
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream()
@@ -2829,6 +2832,11 @@ private[spark] object Utils extends Logging {
   def isClientMode(conf: SparkConf): Boolean = {
     "client".equals(conf.get(SparkLauncher.DEPLOY_MODE, "client"))
   }
+
+  /** Returns whether the URI is a "local:" URI. */
+  def isLocalUri(uri: String): Boolean = {
+    uri.startsWith(s"$LOCAL_SCHEME:")
+  }
 }
 
 private[util] object CallerContext extends Logging {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
index 85917b88e912..76041e7de518 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
@@ -87,25 +87,22 @@ private[spark] object Constants {
   val NON_JVM_MEMORY_OVERHEAD_FACTOR = 0.4d
 
   // Hadoop Configuration
-  val HADOOP_FILE_VOLUME = "hadoop-properties"
+  val HADOOP_CONF_VOLUME = "hadoop-properties"
   val KRB_FILE_VOLUME = "krb5-file"
   val HADOOP_CONF_DIR_PATH = "/opt/hadoop/conf"
   val KRB_FILE_DIR_PATH = "/etc"
   val ENV_HADOOP_CONF_DIR = "HADOOP_CONF_DIR"
   val HADOOP_CONFIG_MAP_NAME =
     "spark.kubernetes.executor.hadoopConfigMapName"
-  val KRB5_CONFIG_MAP_NAME =
-    "spark.kubernetes.executor.krb5ConfigMapName"
 
   // Kerberos Configuration
-  val KERBEROS_DELEGEGATION_TOKEN_SECRET_NAME = "delegation-tokens"
   val KERBEROS_DT_SECRET_NAME =
     "spark.kubernetes.kerberos.dt-secret-name"
   val KERBEROS_DT_SECRET_KEY =
     "spark.kubernetes.kerberos.dt-secret-key"
-  val KERBEROS_SPARK_USER_NAME =
-    "spark.kubernetes.kerberos.spark-user-name"
   val KERBEROS_SECRET_KEY = "hadoop-tokens"
+  val KERBEROS_KEYTAB_VOLUME = "kerberos-keytab"
+  val KERBEROS_KEYTAB_MOUNT_POINT = "/mnt/secrets/kerberos-keytab"
 
   // Hadoop credentials secrets for the Spark app.
   val SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR = "/mnt/secrets/hadoop-credentials"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index a06c21b47f15..6febad981af5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -42,10 +42,6 @@ private[spark] abstract class KubernetesConf(val sparkConf: SparkConf) {
 
   def appName: String = get("spark.app.name", "spark")
 
-  def hadoopConfigMapName: String = s"$resourceNamePrefix-hadoop-config"
-
-  def krbConfigMapName: String = s"$resourceNamePrefix-krb5-file"
-
   def namespace: String = get(KUBERNETES_NAMESPACE)
 
   def imagePullPolicy: String = get(CONTAINER_IMAGE_PULL_POLICY)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
index 345dd117fd35..fd1196368a7f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
@@ -18,7 +18,30 @@ package org.apache.spark.deploy.k8s
 
 import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder}
 
-private[spark] case class SparkPod(pod: Pod, container: Container)
+private[spark] case class SparkPod(pod: Pod, container: Container) {
+
+  /**
+   * Convenience method to apply a series of chained transformations to a pod.
+   *
+   * Use it like:
+   *
+   *     original.modify { case pod =>
+   *       // update pod and return new one
+   *     }.modify { case pod =>
+   *       // more changes that create a new pod
+   *     }.modify {
+   *       case pod if someCondition => // new pod
+   *     }
+   *
+   * This makes it cleaner to apply multiple transformations, avoiding having to create
+   * a bunch of awkwardly-named local variables. Since the argument is a partial function,
+   * it can do matching without needing to exhaust all the possibilities. If the function
+   * is not applied, then the original pod will be kept.
+   */
+  def transform(fn: PartialFunction[SparkPod, SparkPod]): SparkPod = fn.lift(this).getOrElse(this)
+
+}
+
 
 private[spark] object SparkPod {
   def initialPod(): SparkPod = {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index d8cf3653d322..8362c14fb289 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -110,6 +110,10 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
         .withContainerPort(driverUIPort)
         .withProtocol("TCP")
         .endPort()
+      .addNewEnv()
+        .withName(ENV_SPARK_USER)
+        .withValue(Utils.getCurrentUserName())
+        .endEnv()
       .addAllToEnv(driverCustomEnvs.asJava)
       .addNewEnv()
         .withName(ENV_DRIVER_BIND_ADDRESS)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 4bcf4c9446aa..c8bf7cdb4224 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -163,6 +163,10 @@ private[spark] class BasicExecutorFeatureStep(
         .addToLimits("memory", executorMemoryQuantity)
         .addToRequests("cpu", executorCpuQuantity)
         .endResources()
+        .addNewEnv()
+          .withName(ENV_SPARK_USER)
+          .withValue(Utils.getCurrentUserName())
+          .endEnv()
       .addAllToEnv(executorEnv.asJava)
       .withPorts(requiredPorts.asJava)
       .addToArgs("executor")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStep.scala
new file mode 100644
index 000000000000..d602ed5481e6
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStep.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+
+import scala.collection.JavaConverters._
+
+import com.google.common.io.Files
+import io.fabric8.kubernetes.api.model._
+
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+
+/**
+ * Mounts the Hadoop configuration - either a pre-defined config map, or a local configuration
+ * directory - on the driver pod.
+ */
+private[spark] class HadoopConfDriverFeatureStep(conf: KubernetesConf)
+  extends KubernetesFeatureConfigStep {
+
+  private val confDir = Option(conf.sparkConf.getenv(ENV_HADOOP_CONF_DIR))
+  private val existingConfMap = conf.get(KUBERNETES_HADOOP_CONF_CONFIG_MAP)
+
+  KubernetesUtils.requireNandDefined(
+    confDir,
+    existingConfMap,
+    "Do not specify both the `HADOOP_CONF_DIR` in your ENV and the ConfigMap " +
+    "as the creation of an additional ConfigMap, when one is already specified is extraneous")
+
+  private lazy val confFiles: Seq[File] = {
+    val dir = new File(confDir.get)
+    if (dir.isDirectory) {
+      dir.listFiles.filter(_.isFile).toSeq
+    } else {
+      Nil
+    }
+  }
+
+  private def newConfigMapName: String = s"${conf.resourceNamePrefix}-hadoop-config"
+
+  private def hasHadoopConf: Boolean = confDir.isDefined || existingConfMap.isDefined
+
+  override def configurePod(original: SparkPod): SparkPod = {
+    original.transform { case pod if hasHadoopConf =>
+      val confVolume = if (confDir.isDefined) {
+        val keyPaths = confFiles.map { file =>
+          new KeyToPathBuilder()
+            .withKey(file.getName())
+            .withPath(file.getName())
+            .build()
+        }
+        new VolumeBuilder()
+          .withName(HADOOP_CONF_VOLUME)
+          .withNewConfigMap()
+            .withName(newConfigMapName)
+            .withItems(keyPaths.asJava)
+            .endConfigMap()
+          .build()
+      } else {
+        new VolumeBuilder()
+          .withName(HADOOP_CONF_VOLUME)
+          .withNewConfigMap()
+            .withName(existingConfMap.get)
+            .endConfigMap()
+          .build()
+      }
+
+      val podWithConf = new PodBuilder(pod.pod)
+        .editSpec()
+          .addNewVolumeLike(confVolume)
+            .endVolume()
+          .endSpec()
+          .build()
+
+      val containerWithMount = new ContainerBuilder(pod.container)
+        .addNewVolumeMount()
+          .withName(HADOOP_CONF_VOLUME)
+          .withMountPath(HADOOP_CONF_DIR_PATH)
+          .endVolumeMount()
+        .addNewEnv()
+          .withName(ENV_HADOOP_CONF_DIR)
+          .withValue(HADOOP_CONF_DIR_PATH)
+          .endEnv()
+        .build()
+
+      SparkPod(podWithConf, containerWithMount)
+    }
+  }
+
+  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
+    if (confDir.isDefined) {
+      val fileMap = confFiles.map { file =>
+        (file.getName(), Files.toString(file, StandardCharsets.UTF_8))
+      }.toMap.asJava
+
+      Seq(new ConfigMapBuilder()
+        .withNewMetadata()
+          .withName(newConfigMapName)
+          .endMetadata()
+        .addToData(fileMap)
+        .build())
+    } else {
+      Nil
+    }
+  }
+
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
deleted file mode 100644
index da332881ae1a..000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStep.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.features
-
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, SparkPod}
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.features.hadooputils.HadoopBootstrapUtil
-import org.apache.spark.internal.Logging
-
-/**
- * This step is responsible for bootstraping the container with ConfigMaps
- * containing Hadoop config files mounted as volumes and an ENV variable
- * pointed to the mounted file directory.
- */
-private[spark] class HadoopConfExecutorFeatureStep(conf: KubernetesExecutorConf)
-  extends KubernetesFeatureConfigStep with Logging {
-
-  override def configurePod(pod: SparkPod): SparkPod = {
-    val hadoopConfDirCMapName = conf.getOption(HADOOP_CONFIG_MAP_NAME)
-    if (hadoopConfDirCMapName.isDefined) {
-      HadoopBootstrapUtil.bootstrapHadoopConfDir(None, None, hadoopConfDirCMapName, pod)
-    } else {
-      pod
-    }
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
deleted file mode 100644
index c038e75491ca..000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopSparkUserExecutorFeatureStep.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.features
-
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, SparkPod}
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.features.hadooputils.HadoopBootstrapUtil
-
-/**
- * This step is responsible for setting ENV_SPARK_USER when HADOOP_FILES are detected
- * however, this step would not be run if Kerberos is enabled, as Kerberos sets SPARK_USER
- */
-private[spark] class HadoopSparkUserExecutorFeatureStep(conf: KubernetesExecutorConf)
-  extends KubernetesFeatureConfigStep {
-
-  override def configurePod(pod: SparkPod): SparkPod = {
-    conf.getOption(KERBEROS_SPARK_USER_NAME).map { user =>
-      HadoopBootstrapUtil.bootstrapSparkUserPod(user, pod)
-    }.getOrElse(pod)
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
index c6d5a866fa7b..721d7e97b21f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
@@ -16,31 +16,40 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.{HasMetadata, Secret, SecretBuilder}
+import java.io.File
+import java.nio.charset.StandardCharsets
+
+import scala.collection.JavaConverters._
+
+import com.google.common.io.Files
+import io.fabric8.kubernetes.api.model._
 import org.apache.commons.codec.binary.Base64
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.k8s.{KubernetesDriverConf, KubernetesUtils, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.features.hadooputils._
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.util.Utils
 
 /**
- * Runs the necessary Hadoop-based logic based on Kerberos configs and the presence of the
- * HADOOP_CONF_DIR. This runs various bootstrap methods defined in HadoopBootstrapUtil.
+ * Provide kerberos / service credentials to the Spark driver.
+ *
+ * There are three use cases, in order of precedence:
+ *
+ * - keytab: if a kerberos keytab is defined, it is provided to the driver, and the driver will
+ *   manage the kerberos login and the creation of delegation tokens.
+ * - existing tokens: if a secret containing delegation tokens is provided, it will be mounted
+ *   on the driver pod, and the driver will handle distribution of those tokens to executors.
+ * - tgt only: if Hadoop security is enabled, the local TGT will be used to create delegation
+ *   tokens which will be provided to the driver. The driver will handle distribution of the
+ *   tokens to executors.
  */
 private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDriverConf)
-  extends KubernetesFeatureConfigStep {
-
-  private val hadoopConfDir = Option(kubernetesConf.sparkConf.getenv(ENV_HADOOP_CONF_DIR))
-  private val hadoopConfigMapName = kubernetesConf.get(KUBERNETES_HADOOP_CONF_CONFIG_MAP)
-  KubernetesUtils.requireNandDefined(
-    hadoopConfDir,
-    hadoopConfigMapName,
-    "Do not specify both the `HADOOP_CONF_DIR` in your ENV and the ConfigMap " +
-    "as the creation of an additional ConfigMap, when one is already specified is extraneous")
+  extends KubernetesFeatureConfigStep with Logging {
 
   private val principal = kubernetesConf.get(org.apache.spark.internal.config.PRINCIPAL)
   private val keytab = kubernetesConf.get(org.apache.spark.internal.config.KEYTAB)
@@ -49,15 +58,6 @@ private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDri
   private val krb5File = kubernetesConf.get(KUBERNETES_KERBEROS_KRB5_FILE)
   private val krb5CMap = kubernetesConf.get(KUBERNETES_KERBEROS_KRB5_CONFIG_MAP)
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(kubernetesConf.sparkConf)
-  private val tokenManager = new HadoopDelegationTokenManager(kubernetesConf.sparkConf, hadoopConf)
-  private val isKerberosEnabled =
-    (hadoopConfDir.isDefined && UserGroupInformation.isSecurityEnabled) ||
-      (hadoopConfigMapName.isDefined && (krb5File.isDefined || krb5CMap.isDefined))
-  require(keytab.isEmpty || isKerberosEnabled,
-    "You must enable Kerberos support if you are specifying a Kerberos Keytab")
-
-  require(existingSecretName.isEmpty || isKerberosEnabled,
-    "You must enable Kerberos support if you are specifying a Kerberos Secret")
 
   KubernetesUtils.requireNandDefined(
     krb5File,
@@ -79,128 +79,183 @@ private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDri
     "If a secret storing a Kerberos Delegation Token is specified you must also" +
       " specify the item-key where the data is stored")
 
-  private val hadoopConfigurationFiles = hadoopConfDir.map { hConfDir =>
-    HadoopBootstrapUtil.getHadoopConfFiles(hConfDir)
+  if (!hasKerberosConf) {
+    logInfo("You have not specified a krb5.conf file locally or via a ConfigMap. " +
+      "Make sure that you have the krb5.conf locally on the driver image.")
   }
-  private val newHadoopConfigMapName =
-    if (hadoopConfigMapName.isEmpty) {
-      Some(kubernetesConf.hadoopConfigMapName)
-    } else {
-      None
-    }
 
-  // Either use pre-existing secret or login to create new Secret with DT stored within
-  private val kerberosConfSpec: Option[KerberosConfigSpec] = (for {
-    secretName <- existingSecretName
-    secretItemKey <- existingSecretItemKey
-  } yield {
-    KerberosConfigSpec(
-      dtSecret = None,
-      dtSecretName = secretName,
-      dtSecretItemKey = secretItemKey,
-      jobUserName = UserGroupInformation.getCurrentUser.getShortUserName)
-  }).orElse(
-    if (isKerberosEnabled) {
-      Some(buildKerberosSpec())
+  // Create delegation tokens if needed. This is a lazy val so that it's not populated
+  // unnecessarily. But it needs to be accessible to different methods in this class,
+  // since it's not clear based solely on available configuration options that delegation
+  // tokens are needed when other credentials are not available.
+  private lazy val delegationTokens: Array[Byte] = {
+    if (keytab.isEmpty && existingSecretName.isEmpty) {
+      val tokenManager = new HadoopDelegationTokenManager(kubernetesConf.sparkConf,
+        SparkHadoopUtil.get.newConfiguration(kubernetesConf.sparkConf))
+      val creds = UserGroupInformation.getCurrentUser().getCredentials()
+      tokenManager.obtainDelegationTokens(creds)
+      // If no tokens and no secrets are stored in the credentials, make sure nothing is returned,
+      // to avoid creating an unnecessary secret.
+      if (creds.numberOfTokens() > 0 || creds.numberOfSecretKeys() > 0) {
+        SparkHadoopUtil.get.serialize(creds)
+      } else {
+        null
+      }
     } else {
-      None
+      null
     }
-  )
+  }
 
-  override def configurePod(pod: SparkPod): SparkPod = {
-    if (!isKerberosEnabled) {
-      return pod
-    }
+  private def needKeytabUpload: Boolean = keytab.exists(!Utils.isLocalUri(_))
 
-    val hadoopBasedSparkPod = HadoopBootstrapUtil.bootstrapHadoopConfDir(
-      hadoopConfDir,
-      newHadoopConfigMapName,
-      hadoopConfigMapName,
-      pod)
-    kerberosConfSpec.map { hSpec =>
-      HadoopBootstrapUtil.bootstrapKerberosPod(
-        hSpec.dtSecretName,
-        hSpec.dtSecretItemKey,
-        hSpec.jobUserName,
-        krb5File,
-        Some(kubernetesConf.krbConfigMapName),
-        krb5CMap,
-        hadoopBasedSparkPod)
-    }.getOrElse(
-      HadoopBootstrapUtil.bootstrapSparkUserPod(
-        UserGroupInformation.getCurrentUser.getShortUserName,
-        hadoopBasedSparkPod))
-  }
+  private def dtSecretName: String = s"${kubernetesConf.resourceNamePrefix}-delegation-tokens"
 
-  override def getAdditionalPodSystemProperties(): Map[String, String] = {
-    if (!isKerberosEnabled) {
-      return Map.empty
-    }
+  private def ktSecretName: String = s"${kubernetesConf.resourceNamePrefix}-kerberos-keytab"
 
-    val resolvedConfValues = kerberosConfSpec.map { hSpec =>
-      Map(KERBEROS_DT_SECRET_NAME -> hSpec.dtSecretName,
-        KERBEROS_DT_SECRET_KEY -> hSpec.dtSecretItemKey,
-        KERBEROS_SPARK_USER_NAME -> hSpec.jobUserName,
-        KRB5_CONFIG_MAP_NAME -> krb5CMap.getOrElse(kubernetesConf.krbConfigMapName))
-      }.getOrElse(
-        Map(KERBEROS_SPARK_USER_NAME ->
-          UserGroupInformation.getCurrentUser.getShortUserName))
-    Map(HADOOP_CONFIG_MAP_NAME ->
-      hadoopConfigMapName.getOrElse(kubernetesConf.hadoopConfigMapName)) ++ resolvedConfValues
-  }
+  private def hasKerberosConf: Boolean = krb5CMap.isDefined | krb5File.isDefined
 
-  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
-    if (!isKerberosEnabled) {
-      return Seq.empty
-    }
+  private def newConfigMapName: String = s"${kubernetesConf.resourceNamePrefix}-krb5-file"
 
-    val hadoopConfConfigMap = for {
-      hName <- newHadoopConfigMapName
-      hFiles <- hadoopConfigurationFiles
-    } yield {
-      HadoopBootstrapUtil.buildHadoopConfigMap(hName, hFiles)
-    }
+  override def configurePod(original: SparkPod): SparkPod = {
+    original.transform { case pod if hasKerberosConf =>
+      val configMapVolume = if (krb5CMap.isDefined) {
+        new VolumeBuilder()
+          .withName(KRB_FILE_VOLUME)
+          .withNewConfigMap()
+            .withName(krb5CMap.get)
+            .endConfigMap()
+          .build()
+      } else {
+        val krb5Conf = new File(krb5File.get)
+        new VolumeBuilder()
+          .withName(KRB_FILE_VOLUME)
+          .withNewConfigMap()
+          .withName(newConfigMapName)
+          .withItems(new KeyToPathBuilder()
+            .withKey(krb5Conf.getName())
+            .withPath(krb5Conf.getName())
+            .build())
+          .endConfigMap()
+          .build()
+      }
 
-    val krb5ConfigMap = krb5File.map { fileLocation =>
-      HadoopBootstrapUtil.buildkrb5ConfigMap(
-        kubernetesConf.krbConfigMapName,
-        fileLocation)
-    }
+      val podWithVolume = new PodBuilder(pod.pod)
+        .editSpec()
+          .addNewVolumeLike(configMapVolume)
+            .endVolume()
+          .endSpec()
+        .build()
+
+      val containerWithMount = new ContainerBuilder(pod.container)
+        .addNewVolumeMount()
+          .withName(KRB_FILE_VOLUME)
+          .withMountPath(KRB_FILE_DIR_PATH + "/krb5.conf")
+          .withSubPath("krb5.conf")
+          .endVolumeMount()
+        .build()
+
+      SparkPod(podWithVolume, containerWithMount)
+    }.transform {
+      case pod if needKeytabUpload =>
+        // If keytab is defined and is a submission-local file (not local: URI), then create a
+        // secret for it. The keytab data will be stored in this secret below.
+        val podWitKeytab = new PodBuilder(pod.pod)
+          .editOrNewSpec()
+            .addNewVolume()
+              .withName(KERBEROS_KEYTAB_VOLUME)
+              .withNewSecret()
+                .withSecretName(ktSecretName)
+                .endSecret()
+              .endVolume()
+            .endSpec()
+          .build()
+
+        val containerWithKeytab = new ContainerBuilder(pod.container)
+          .addNewVolumeMount()
+            .withName(KERBEROS_KEYTAB_VOLUME)
+            .withMountPath(KERBEROS_KEYTAB_MOUNT_POINT)
+            .endVolumeMount()
+          .build()
+
+        SparkPod(podWitKeytab, containerWithKeytab)
+
+      case pod if existingSecretName.isDefined | delegationTokens != null =>
+        val secretName = existingSecretName.getOrElse(dtSecretName)
+        val itemKey = existingSecretItemKey.getOrElse(KERBEROS_SECRET_KEY)
+
+        val podWithTokens = new PodBuilder(pod.pod)
+          .editOrNewSpec()
+            .addNewVolume()
+              .withName(SPARK_APP_HADOOP_SECRET_VOLUME_NAME)
+              .withNewSecret()
+                .withSecretName(secretName)
+                .endSecret()
+              .endVolume()
+            .endSpec()
+          .build()
 
-    val kerberosDTSecret = kerberosConfSpec.flatMap(_.dtSecret)
+        val containerWithTokens = new ContainerBuilder(pod.container)
+          .addNewVolumeMount()
+            .withName(SPARK_APP_HADOOP_SECRET_VOLUME_NAME)
+            .withMountPath(SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR)
+            .endVolumeMount()
+          .addNewEnv()
+            .withName(ENV_HADOOP_TOKEN_FILE_LOCATION)
+            .withValue(s"$SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR/$itemKey")
+            .endEnv()
+          .build()
 
-    hadoopConfConfigMap.toSeq ++
-      krb5ConfigMap.toSeq ++
-      kerberosDTSecret.toSeq
+        SparkPod(podWithTokens, containerWithTokens)
+    }
   }
 
-  private def buildKerberosSpec(): KerberosConfigSpec = {
-    // The JobUserUGI will be taken fom the Local Ticket Cache or via keytab+principal
-    // The login happens in the SparkSubmit so login logic is not necessary to include
-    val jobUserUGI = UserGroupInformation.getCurrentUser
-    val creds = jobUserUGI.getCredentials
-    tokenManager.obtainDelegationTokens(creds)
-    val tokenData = SparkHadoopUtil.get.serialize(creds)
-    require(tokenData.nonEmpty, "Did not obtain any delegation tokens")
-    val newSecretName =
-      s"${kubernetesConf.resourceNamePrefix}-$KERBEROS_DELEGEGATION_TOKEN_SECRET_NAME"
-    val secretDT =
-      new SecretBuilder()
-        .withNewMetadata()
-          .withName(newSecretName)
-          .endMetadata()
-        .addToData(KERBEROS_SECRET_KEY, Base64.encodeBase64String(tokenData))
-        .build()
-    KerberosConfigSpec(
-      dtSecret = Some(secretDT),
-      dtSecretName = newSecretName,
-      dtSecretItemKey = KERBEROS_SECRET_KEY,
-      jobUserName = jobUserUGI.getShortUserName)
+  override def getAdditionalPodSystemProperties(): Map[String, String] = {
+    // If a submission-local keytab is provided, update the Spark config so that it knows the
+    // path of the keytab in the driver container.
+    if (needKeytabUpload) {
+      val ktName = new File(keytab.get).getName()
+      Map(KEYTAB.key -> s"$KERBEROS_KEYTAB_MOUNT_POINT/$ktName")
+    } else {
+      Map.empty
+    }
   }
 
-  private case class KerberosConfigSpec(
-      dtSecret: Option[Secret],
-      dtSecretName: String,
-      dtSecretItemKey: String,
-      jobUserName: String)
+  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
+    Seq[HasMetadata]() ++ {
+      krb5File.map { path =>
+        val file = new File(path)
+        new ConfigMapBuilder()
+          .withNewMetadata()
+            .withName(newConfigMapName)
+            .endMetadata()
+          .addToData(
+            Map(file.getName() -> Files.toString(file, StandardCharsets.UTF_8)).asJava)
+          .build()
+      }
+    } ++ {
+      // If a submission-local keytab is provided, stash it in a secret.
+      if (needKeytabUpload) {
+        val kt = new File(keytab.get)
+        Seq(new SecretBuilder()
+          .withNewMetadata()
+            .withName(ktSecretName)
+            .endMetadata()
+          .addToData(kt.getName(), Base64.encodeBase64String(Files.toByteArray(kt)))
+          .build())
+      } else {
+        Nil
+      }
+    } ++ {
+      if (delegationTokens != null) {
+        Seq(new SecretBuilder()
+          .withNewMetadata()
+            .withName(dtSecretName)
+            .endMetadata()
+          .addToData(KERBEROS_SECRET_KEY, Base64.encodeBase64String(delegationTokens))
+          .build())
+      } else {
+        Nil
+      }
+    }
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
deleted file mode 100644
index 907271b1cb48..000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfExecutorFeatureStep.scala
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.features
-
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, SparkPod}
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.features.hadooputils.HadoopBootstrapUtil
-import org.apache.spark.internal.Logging
-
-/**
- * This step is responsible for mounting the DT secret for the executors
- */
-private[spark] class KerberosConfExecutorFeatureStep(conf: KubernetesExecutorConf)
-  extends KubernetesFeatureConfigStep with Logging {
-
-  override def configurePod(pod: SparkPod): SparkPod = {
-    val maybeKrb5CMap = conf.getOption(KRB5_CONFIG_MAP_NAME)
-    if (maybeKrb5CMap.isDefined) {
-      logInfo(s"Mounting Resources for Kerberos")
-      HadoopBootstrapUtil.bootstrapKerberosPod(
-        conf.get(KERBEROS_DT_SECRET_NAME),
-        conf.get(KERBEROS_DT_SECRET_KEY),
-        conf.get(KERBEROS_SPARK_USER_NAME),
-        None,
-        None,
-        maybeKrb5CMap,
-        pod)
-    } else {
-      pod
-    }
-  }
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/HadoopBootstrapUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/HadoopBootstrapUtil.scala
deleted file mode 100644
index 5bee766caf2b..000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/HadoopBootstrapUtil.scala
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.features.hadooputils
-
-import java.io.File
-import java.nio.charset.StandardCharsets
-
-import scala.collection.JavaConverters._
-
-import com.google.common.io.Files
-import io.fabric8.kubernetes.api.model._
-
-import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.SparkPod
-import org.apache.spark.internal.Logging
-
-private[spark] object HadoopBootstrapUtil extends Logging {
-
-  /**
-   * Mounting the DT secret for both the Driver and the executors
-   *
-   * @param dtSecretName Name of the secret that stores the Delegation Token
-   * @param dtSecretItemKey Name of the Item Key storing the Delegation Token
-   * @param userName Name of the SparkUser to set SPARK_USER
-   * @param fileLocation Optional Location of the krb5 file
-   * @param newKrb5ConfName Optional location of the ConfigMap for Krb5
-   * @param existingKrb5ConfName Optional name of ConfigMap for Krb5
-   * @param pod Input pod to be appended to
-   * @return a modified SparkPod
-   */
-  def bootstrapKerberosPod(
-      dtSecretName: String,
-      dtSecretItemKey: String,
-      userName: String,
-      fileLocation: Option[String],
-      newKrb5ConfName: Option[String],
-      existingKrb5ConfName: Option[String],
-      pod: SparkPod): SparkPod = {
-
-    val preConfigMapVolume = existingKrb5ConfName.map { kconf =>
-      new VolumeBuilder()
-        .withName(KRB_FILE_VOLUME)
-        .withNewConfigMap()
-          .withName(kconf)
-          .endConfigMap()
-        .build()
-    }
-
-    val createConfigMapVolume = for {
-      fLocation <- fileLocation
-      krb5ConfName <- newKrb5ConfName
-    } yield {
-      val krb5File = new File(fLocation)
-      val fileStringPath = krb5File.toPath.getFileName.toString
-      new VolumeBuilder()
-        .withName(KRB_FILE_VOLUME)
-        .withNewConfigMap()
-        .withName(krb5ConfName)
-        .withItems(new KeyToPathBuilder()
-          .withKey(fileStringPath)
-          .withPath(fileStringPath)
-          .build())
-        .endConfigMap()
-        .build()
-    }
-
-    // Breaking up Volume creation for clarity
-    val configMapVolume = preConfigMapVolume.orElse(createConfigMapVolume)
-    if (configMapVolume.isEmpty) {
-       logInfo("You have not specified a krb5.conf file locally or via a ConfigMap. " +
-         "Make sure that you have the krb5.conf locally on the Driver and Executor images")
-    }
-
-    val kerberizedPodWithDTSecret = new PodBuilder(pod.pod)
-      .editOrNewSpec()
-        .addNewVolume()
-          .withName(SPARK_APP_HADOOP_SECRET_VOLUME_NAME)
-          .withNewSecret()
-            .withSecretName(dtSecretName)
-            .endSecret()
-          .endVolume()
-        .endSpec()
-      .build()
-
-    // Optionally add the krb5.conf ConfigMap
-    val kerberizedPod = configMapVolume.map { cmVolume =>
-      new PodBuilder(kerberizedPodWithDTSecret)
-        .editSpec()
-          .addNewVolumeLike(cmVolume)
-            .endVolume()
-          .endSpec()
-        .build()
-    }.getOrElse(kerberizedPodWithDTSecret)
-
-    val kerberizedContainerWithMounts = new ContainerBuilder(pod.container)
-      .addNewVolumeMount()
-        .withName(SPARK_APP_HADOOP_SECRET_VOLUME_NAME)
-        .withMountPath(SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR)
-        .endVolumeMount()
-      .addNewEnv()
-        .withName(ENV_HADOOP_TOKEN_FILE_LOCATION)
-        .withValue(s"$SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR/$dtSecretItemKey")
-        .endEnv()
-      .addNewEnv()
-        .withName(ENV_SPARK_USER)
-        .withValue(userName)
-        .endEnv()
-      .build()
-
-    // Optionally add the krb5.conf Volume Mount
-    val kerberizedContainer =
-      if (configMapVolume.isDefined) {
-        new ContainerBuilder(kerberizedContainerWithMounts)
-          .addNewVolumeMount()
-            .withName(KRB_FILE_VOLUME)
-            .withMountPath(KRB_FILE_DIR_PATH + "/krb5.conf")
-            .withSubPath("krb5.conf")
-            .endVolumeMount()
-          .build()
-      } else {
-        kerberizedContainerWithMounts
-      }
-
-    SparkPod(kerberizedPod, kerberizedContainer)
-  }
-
-  /**
-   * setting ENV_SPARK_USER when HADOOP_FILES are detected
-   *
-   * @param sparkUserName Name of the SPARK_USER
-   * @param pod Input pod to be appended to
-   * @return a modified SparkPod
-   */
-  def bootstrapSparkUserPod(sparkUserName: String, pod: SparkPod): SparkPod = {
-    val envModifiedContainer = new ContainerBuilder(pod.container)
-      .addNewEnv()
-        .withName(ENV_SPARK_USER)
-        .withValue(sparkUserName)
-        .endEnv()
-      .build()
-    SparkPod(pod.pod, envModifiedContainer)
-  }
-
-  /**
-   * Grabbing files in the HADOOP_CONF_DIR
-   *
-   * @param path location of HADOOP_CONF_DIR
-   * @return a list of File object
-   */
-  def getHadoopConfFiles(path: String): Seq[File] = {
-    val dir = new File(path)
-    if (dir.isDirectory) {
-      dir.listFiles.filter(_.isFile).toSeq
-    } else {
-      Seq.empty[File]
-    }
-  }
-
-  /**
-   * Bootstraping the container with ConfigMaps that store
-   * Hadoop configuration files
-   *
-   * @param hadoopConfDir directory location of HADOOP_CONF_DIR env
-   * @param newHadoopConfigMapName name of the new configMap for HADOOP_CONF_DIR
-   * @param existingHadoopConfigMapName name of the pre-defined configMap for HADOOP_CONF_DIR
-   * @param pod Input pod to be appended to
-   * @return a modified SparkPod
-   */
-  def bootstrapHadoopConfDir(
-      hadoopConfDir: Option[String],
-      newHadoopConfigMapName: Option[String],
-      existingHadoopConfigMapName: Option[String],
-      pod: SparkPod): SparkPod = {
-    val preConfigMapVolume = existingHadoopConfigMapName.map { hConf =>
-      new VolumeBuilder()
-        .withName(HADOOP_FILE_VOLUME)
-        .withNewConfigMap()
-          .withName(hConf)
-          .endConfigMap()
-        .build() }
-
-    val createConfigMapVolume = for {
-      dirLocation <- hadoopConfDir
-      hConfName <- newHadoopConfigMapName
-    } yield {
-      val hadoopConfigFiles = getHadoopConfFiles(dirLocation)
-      val keyPaths = hadoopConfigFiles.map { file =>
-        val fileStringPath = file.toPath.getFileName.toString
-        new KeyToPathBuilder()
-          .withKey(fileStringPath)
-          .withPath(fileStringPath)
-          .build()
-      }
-      new VolumeBuilder()
-        .withName(HADOOP_FILE_VOLUME)
-        .withNewConfigMap()
-          .withName(hConfName)
-          .withItems(keyPaths.asJava)
-          .endConfigMap()
-        .build()
-    }
-
-    // Breaking up Volume Creation for clarity
-    val configMapVolume = preConfigMapVolume.getOrElse(createConfigMapVolume.get)
-
-    val hadoopSupportedPod = new PodBuilder(pod.pod)
-      .editSpec()
-        .addNewVolumeLike(configMapVolume)
-          .endVolume()
-        .endSpec()
-        .build()
-
-    val hadoopSupportedContainer = new ContainerBuilder(pod.container)
-      .addNewVolumeMount()
-        .withName(HADOOP_FILE_VOLUME)
-        .withMountPath(HADOOP_CONF_DIR_PATH)
-        .endVolumeMount()
-      .addNewEnv()
-        .withName(ENV_HADOOP_CONF_DIR)
-        .withValue(HADOOP_CONF_DIR_PATH)
-        .endEnv()
-      .build()
-    SparkPod(hadoopSupportedPod, hadoopSupportedContainer)
-  }
-
-  /**
-   * Builds ConfigMap given the file location of the
-   * krb5.conf file
-   *
-   * @param configMapName name of configMap for krb5
-   * @param fileLocation location of krb5 file
-   * @return a ConfigMap
-   */
-  def buildkrb5ConfigMap(
-      configMapName: String,
-      fileLocation: String): ConfigMap = {
-    val file = new File(fileLocation)
-    new ConfigMapBuilder()
-      .withNewMetadata()
-        .withName(configMapName)
-        .endMetadata()
-      .addToData(Map(file.toPath.getFileName.toString ->
-        Files.toString(file, StandardCharsets.UTF_8)).asJava)
-      .build()
-  }
-
-  /**
-   * Builds ConfigMap given the ConfigMap name
-   * and a list of Hadoop Conf files
-   *
-   * @param hadoopConfigMapName name of hadoopConfigMap
-   * @param hadoopConfFiles list of hadoopFiles
-   * @return a ConfigMap
-   */
-  def buildHadoopConfigMap(
-      hadoopConfigMapName: String,
-      hadoopConfFiles: Seq[File]): ConfigMap = {
-    new ConfigMapBuilder()
-      .withNewMetadata()
-        .withName(hadoopConfigMapName)
-        .endMetadata()
-      .addToData(hadoopConfFiles.map { file =>
-        (file.toPath.getFileName.toString,
-          Files.toString(file, StandardCharsets.UTF_8))
-        }.toMap.asJava)
-      .build()
-  }
-
-}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/KerberosConfigSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/KerberosConfigSpec.scala
deleted file mode 100644
index 7f7ef216cf48..000000000000
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/hadooputils/KerberosConfigSpec.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.k8s.features.hadooputils
-
-import io.fabric8.kubernetes.api.model.Secret
-
-/**
- * Represents a given configuration of the Kerberos Configuration logic
- * <p>
- * - The secret containing a DT, either previously specified or built on the fly
- * - The name of the secret where the DT will be stored
- * - The data item-key on the secret which correlates with where the current DT data is stored
- * - The Job User's username
- */
-private[spark] case class KerberosConfigSpec(
-    dtSecret: Option[Secret],
-    dtSecretName: String,
-    dtSecretItemKey: String,
-    jobUserName: String)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
index d2c0ced9fa2f..57e4060bc85b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
@@ -46,6 +46,7 @@ private[spark] class KubernetesDriverBuilder {
       new LocalDirsFeatureStep(conf),
       new MountVolumesFeatureStep(conf),
       new DriverCommandFeatureStep(conf),
+      new HadoopConfDriverFeatureStep(conf),
       new KerberosConfDriverFeatureStep(conf),
       new PodTemplateConfigMapStep(conf))
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 03f5da2bb0bc..cd298971e02a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -25,6 +25,7 @@ import io.fabric8.kubernetes.client.KubernetesClient
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.rpc.{RpcAddress, RpcEnv}
 import org.apache.spark.scheduler.{ExecutorLossReason, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SchedulerBackendUtils}
@@ -143,7 +144,11 @@ private[spark] class KubernetesClusterSchedulerBackend(
   }
 
   override def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = {
-    new KubernetesDriverEndpoint(rpcEnv, properties)
+    new KubernetesDriverEndpoint(sc.env.rpcEnv, properties)
+  }
+
+  override protected def createTokenManager(): Option[HadoopDelegationTokenManager] = {
+    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration))
   }
 
   private class KubernetesDriverEndpoint(rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index 0b74966fe868..48aa2c56d4d6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -44,10 +44,7 @@ private[spark] class KubernetesExecutorBuilder {
       new MountSecretsFeatureStep(conf),
       new EnvSecretsFeatureStep(conf),
       new LocalDirsFeatureStep(conf),
-      new MountVolumesFeatureStep(conf),
-      new HadoopConfExecutorFeatureStep(conf),
-      new KerberosConfExecutorFeatureStep(conf),
-      new HadoopSparkUserExecutorFeatureStep(conf))
+      new MountVolumesFeatureStep(conf))
 
     features.foldLeft(initialPod) { case (pod, feature) => feature.configurePod(pod) }
   }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index e4951bc1e69e..5ceb9d6d6fcd 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
 import org.apache.spark.internal.config._
 import org.apache.spark.ui.SparkUI
+import org.apache.spark.util.Utils
 
 class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
@@ -73,7 +74,6 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     val foundPortNames = configuredPod.container.getPorts.asScala.toSet
     assert(expectedPortNames === foundPortNames)
 
-    assert(configuredPod.container.getEnv.size === 3)
     val envs = configuredPod.container
       .getEnv
       .asScala
@@ -82,6 +82,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     DRIVER_ENVS.foreach { case (k, v) =>
       assert(envs(v) === v)
     }
+    assert(envs(ENV_SPARK_USER) === Utils.getCurrentUserName())
 
     assert(configuredPod.pod.getSpec().getImagePullSecrets.asScala ===
       TEST_IMAGE_PULL_SECRET_OBJECTS)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index 05989d9be7ad..c2efab01e424 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -200,7 +200,8 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       ENV_EXECUTOR_MEMORY -> "1g",
       ENV_APPLICATION_ID -> KubernetesTestConf.APP_ID,
       ENV_SPARK_CONF_DIR -> SPARK_CONF_DIR_INTERNAL,
-      ENV_EXECUTOR_POD_IP -> null) ++ additionalEnvVars
+      ENV_EXECUTOR_POD_IP -> null,
+      ENV_SPARK_USER -> Utils.getCurrentUserName())
 
     val extraJavaOptsStart = additionalEnvVars.keys.count(_.startsWith(ENV_JAVA_OPT_PREFIX))
     val extraJavaOpts = Utils.sparkJavaOpts(conf, SparkConf.isExecutorStartupConf)
@@ -208,9 +209,11 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       s"$ENV_JAVA_OPT_PREFIX${ind + extraJavaOptsStart}" -> opt
     }.toMap
 
-    val mapEnvs = executorPod.container.getEnv.asScala.map {
+    val containerEnvs = executorPod.container.getEnv.asScala.map {
       x => (x.getName, x.getValue)
     }.toMap
-    assert((defaultEnvs ++ extraJavaOptsEnvs) === mapEnvs)
+
+    val expectedEnvs = defaultEnvs ++ additionalEnvVars ++ extraJavaOptsEnvs
+    assert(containerEnvs === expectedEnvs)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
new file mode 100644
index 000000000000..e1c01dbdc735
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import java.io.File
+import java.nio.charset.StandardCharsets.UTF_8
+
+import scala.collection.JavaConverters._
+
+import com.google.common.io.Files
+import io.fabric8.kubernetes.api.model.ConfigMap
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
+import org.apache.spark.util.{SparkConfWithEnv, Utils}
+
+class HadoopConfDriverFeatureStepSuite extends SparkFunSuite {
+
+  import KubernetesFeaturesTestUtils._
+  import SecretVolumeUtils._
+
+  test("mount hadoop config map if defined") {
+    val sparkConf = new SparkConf(false)
+      .set(Config.KUBERNETES_HADOOP_CONF_CONFIG_MAP, "testConfigMap")
+    val conf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
+    val step = new HadoopConfDriverFeatureStep(conf)
+    checkPod(step.configurePod(SparkPod.initialPod()))
+    assert(step.getAdditionalKubernetesResources().isEmpty)
+  }
+
+  test("create hadoop config map if config dir is defined") {
+    val confDir = Utils.createTempDir()
+    val confFiles = Set("core-site.xml", "hdfs-site.xml")
+
+    confFiles.foreach { f =>
+      Files.write("some data", new File(confDir, f), UTF_8)
+    }
+
+    val sparkConf = new SparkConfWithEnv(Map(ENV_HADOOP_CONF_DIR -> confDir.getAbsolutePath()))
+    val conf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
+
+    val step = new HadoopConfDriverFeatureStep(conf)
+    checkPod(step.configurePod(SparkPod.initialPod()))
+
+    val hadoopConfMap = filter[ConfigMap](step.getAdditionalKubernetesResources()).head
+    assert(hadoopConfMap.getData().keySet().asScala === confFiles)
+  }
+
+  private def checkPod(pod: SparkPod): Unit = {
+    assert(podHasVolume(pod.pod, HADOOP_CONF_VOLUME))
+    assert(containerHasVolume(pod.container, HADOOP_CONF_VOLUME, HADOOP_CONF_DIR_PATH))
+    assert(containerHasEnvVar(pod.container, ENV_HADOOP_CONF_DIR))
+  }
+
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
new file mode 100644
index 000000000000..41ca3a94ce7a
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import java.io.File
+import java.nio.charset.StandardCharsets.UTF_8
+import java.security.PrivilegedExceptionAction
+
+import scala.collection.JavaConverters._
+
+import com.google.common.io.Files
+import io.fabric8.kubernetes.api.model.{ConfigMap, Secret}
+import org.apache.commons.codec.binary.Base64
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.k8s._
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
+import org.apache.spark.internal.config._
+import org.apache.spark.util.Utils
+
+class KerberosConfDriverFeatureStepSuite extends SparkFunSuite {
+
+  import KubernetesFeaturesTestUtils._
+  import SecretVolumeUtils._
+
+  private val tmpDir = Utils.createTempDir()
+
+  test("mount krb5 config map if defined") {
+    val configMap = "testConfigMap"
+    val step = createStep(
+      new SparkConf(false).set(KUBERNETES_KERBEROS_KRB5_CONFIG_MAP, configMap))
+
+    checkPodForKrbConf(step.configurePod(SparkPod.initialPod()), configMap)
+    assert(step.getAdditionalPodSystemProperties().isEmpty)
+    assert(filter[ConfigMap](step.getAdditionalKubernetesResources()).isEmpty)
+  }
+
+  test("create krb5.conf config map if local config provided") {
+    val krbConf = File.createTempFile("krb5", ".conf", tmpDir)
+    Files.write("some data", krbConf, UTF_8)
+
+    val sparkConf = new SparkConf(false)
+      .set(KUBERNETES_KERBEROS_KRB5_FILE, krbConf.getAbsolutePath())
+    val step = createStep(sparkConf)
+
+    val confMap = filter[ConfigMap](step.getAdditionalKubernetesResources()).head
+    assert(confMap.getData().keySet().asScala === Set(krbConf.getName()))
+
+    checkPodForKrbConf(step.configurePod(SparkPod.initialPod()), confMap.getMetadata().getName())
+    assert(step.getAdditionalPodSystemProperties().isEmpty)
+  }
+
+  test("create keytab secret if client keytab file used") {
+    val keytab = File.createTempFile("keytab", ".bin", tmpDir)
+    Files.write("some data", keytab, UTF_8)
+
+    val sparkConf = new SparkConf(false)
+      .set(KEYTAB, keytab.getAbsolutePath())
+      .set(PRINCIPAL, "alice")
+    val step = createStep(sparkConf)
+
+    val pod = step.configurePod(SparkPod.initialPod())
+    assert(podHasVolume(pod.pod, KERBEROS_KEYTAB_VOLUME))
+    assert(containerHasVolume(pod.container, KERBEROS_KEYTAB_VOLUME, KERBEROS_KEYTAB_MOUNT_POINT))
+
+    assert(step.getAdditionalPodSystemProperties().keys === Set(KEYTAB.key))
+
+    val secret = filter[Secret](step.getAdditionalKubernetesResources()).head
+    assert(secret.getData().keySet().asScala === Set(keytab.getName()))
+  }
+
+  test("do nothing if container-local keytab used") {
+    val sparkConf = new SparkConf(false)
+      .set(KEYTAB, "local:/my.keytab")
+      .set(PRINCIPAL, "alice")
+    val step = createStep(sparkConf)
+
+    val initial = SparkPod.initialPod()
+    assert(step.configurePod(initial) === initial)
+    assert(step.getAdditionalPodSystemProperties().isEmpty)
+    assert(step.getAdditionalKubernetesResources().isEmpty)
+  }
+
+  test("mount delegation tokens if provided") {
+    val dtSecret = "tokenSecret"
+    val sparkConf = new SparkConf(false)
+      .set(KUBERNETES_KERBEROS_DT_SECRET_NAME, dtSecret)
+      .set(KUBERNETES_KERBEROS_DT_SECRET_ITEM_KEY, "dtokens")
+    val step = createStep(sparkConf)
+
+    checkPodForTokens(step.configurePod(SparkPod.initialPod()), dtSecret)
+    assert(step.getAdditionalPodSystemProperties().isEmpty)
+    assert(step.getAdditionalKubernetesResources().isEmpty)
+  }
+
+  test("create delegation tokens if needed") {
+    // Since HadoopDelegationTokenManager does not create any tokens without proper configs and
+    // services, start with a test user that already has some tokens that will just be piped
+    // through to the driver.
+    val testUser = UserGroupInformation.createUserForTesting("k8s", Array())
+    testUser.doAs(new PrivilegedExceptionAction[Unit]() {
+      override def run(): Unit = {
+        val creds = testUser.getCredentials()
+        creds.addSecretKey(new Text("K8S_TEST_KEY"), Array[Byte](0x4, 0x2))
+        testUser.addCredentials(creds)
+
+        val tokens = SparkHadoopUtil.get.serialize(creds)
+
+        val step = createStep(new SparkConf(false))
+
+        val dtSecret = filter[Secret](step.getAdditionalKubernetesResources()).head
+        assert(dtSecret.getData().get(KERBEROS_SECRET_KEY) === Base64.encodeBase64String(tokens))
+
+        checkPodForTokens(step.configurePod(SparkPod.initialPod()),
+          dtSecret.getMetadata().getName())
+
+        assert(step.getAdditionalPodSystemProperties().isEmpty)
+      }
+    })
+  }
+
+  test("do nothing if no config and no tokens") {
+    val step = createStep(new SparkConf(false))
+    val initial = SparkPod.initialPod()
+    assert(step.configurePod(initial) === initial)
+    assert(step.getAdditionalPodSystemProperties().isEmpty)
+    assert(step.getAdditionalKubernetesResources().isEmpty)
+  }
+
+  private def checkPodForKrbConf(pod: SparkPod, confMapName: String): Unit = {
+    val podVolume = pod.pod.getSpec().getVolumes().asScala.find(_.getName() == KRB_FILE_VOLUME)
+    assert(podVolume.isDefined)
+    assert(containerHasVolume(pod.container, KRB_FILE_VOLUME, KRB_FILE_DIR_PATH + "/krb5.conf"))
+    assert(podVolume.get.getConfigMap().getName() === confMapName)
+  }
+
+  private def checkPodForTokens(pod: SparkPod, dtSecretName: String): Unit = {
+    val podVolume = pod.pod.getSpec().getVolumes().asScala
+      .find(_.getName() == SPARK_APP_HADOOP_SECRET_VOLUME_NAME)
+    assert(podVolume.isDefined)
+    assert(containerHasVolume(pod.container, SPARK_APP_HADOOP_SECRET_VOLUME_NAME,
+      SPARK_APP_HADOOP_CREDENTIALS_BASE_DIR))
+    assert(containerHasEnvVar(pod.container, ENV_HADOOP_TOKEN_FILE_LOCATION))
+    assert(podVolume.get.getSecret().getSecretName() === dtSecretName)
+  }
+
+  private def createStep(conf: SparkConf): KerberosConfDriverFeatureStep = {
+    val kconf = KubernetesTestConf.createDriverConf(sparkConf = conf)
+    new KerberosConfDriverFeatureStep(kconf)
+  }
+
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
index f90380e30e52..076b681be239 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.deploy.k8s.features
 
 import scala.collection.JavaConverters._
+import scala.reflect.ClassTag
 
 import io.fabric8.kubernetes.api.model.{Container, HasMetadata, PodBuilder, SecretBuilder}
 import org.mockito.Matchers
@@ -63,4 +64,9 @@ object KubernetesFeaturesTestUtils {
   def containerHasEnvVar(container: Container, envVarName: String): Boolean = {
     container.getEnv.asScala.exists(envVar => envVar.getName == envVarName)
   }
+
+  def filter[T: ClassTag](list: Seq[HasMetadata]): Seq[T] = {
+    val desired = implicitly[ClassTag[T]].runtimeClass
+    list.filter(_.getClass() == desired).map(_.asInstanceOf[T]).toSeq
+  }
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 6240f7b68d2c..184fb6a8ad13 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -116,6 +116,8 @@ private[spark] class Client(
     }
   }
 
+  require(keytab == null || !Utils.isLocalUri(keytab), "Keytab should reference a local file.")
+
   private val launcherBackend = new LauncherBackend() {
     override protected def conf: SparkConf = sparkConf
 
@@ -472,7 +474,7 @@ private[spark] class Client(
         appMasterOnly: Boolean = false): (Boolean, String) = {
       val trimmedPath = path.trim()
       val localURI = Utils.resolveURI(trimmedPath)
-      if (localURI.getScheme != LOCAL_SCHEME) {
+      if (localURI.getScheme != Utils.LOCAL_SCHEME) {
         if (addDistributedUri(localURI)) {
           val localPath = getQualifiedLocalPath(localURI, hadoopConf)
           val linkname = targetDir.map(_ + "/").getOrElse("") +
@@ -515,7 +517,7 @@ private[spark] class Client(
     val sparkArchive = sparkConf.get(SPARK_ARCHIVE)
     if (sparkArchive.isDefined) {
       val archive = sparkArchive.get
-      require(!isLocalUri(archive), s"${SPARK_ARCHIVE.key} cannot be a local URI.")
+      require(!Utils.isLocalUri(archive), s"${SPARK_ARCHIVE.key} cannot be a local URI.")
       distribute(Utils.resolveURI(archive).toString,
         resType = LocalResourceType.ARCHIVE,
         destName = Some(LOCALIZED_LIB_DIR))
@@ -525,7 +527,7 @@ private[spark] class Client(
           // Break the list of jars to upload, and resolve globs.
           val localJars = new ArrayBuffer[String]()
           jars.foreach { jar =>
-            if (!isLocalUri(jar)) {
+            if (!Utils.isLocalUri(jar)) {
               val path = getQualifiedLocalPath(Utils.resolveURI(jar), hadoopConf)
               val pathFs = FileSystem.get(path.toUri(), hadoopConf)
               pathFs.globStatus(path).filter(_.isFile()).foreach { entry =>
@@ -814,7 +816,7 @@ private[spark] class Client(
     }
     (pySparkArchives ++ pyArchives).foreach { path =>
       val uri = Utils.resolveURI(path)
-      if (uri.getScheme != LOCAL_SCHEME) {
+      if (uri.getScheme != Utils.LOCAL_SCHEME) {
         pythonPath += buildPath(Environment.PWD.$$(), new Path(uri).getName())
       } else {
         pythonPath += uri.getPath()
@@ -1183,9 +1185,6 @@ private object Client extends Logging {
   // Alias for the user jar
   val APP_JAR_NAME: String = "__app__.jar"
 
-  // URI scheme that identifies local resources
-  val LOCAL_SCHEME = "local"
-
   // Staging directory for any temporary jars or files
   val SPARK_STAGING: String = ".sparkStaging"
 
@@ -1307,7 +1306,7 @@ private object Client extends Logging {
     addClasspathEntry(buildPath(Environment.PWD.$$(), LOCALIZED_LIB_DIR, "*"), env)
     if (sparkConf.get(SPARK_ARCHIVE).isEmpty) {
       sparkConf.get(SPARK_JARS).foreach { jars =>
-        jars.filter(isLocalUri).foreach { jar =>
+        jars.filter(Utils.isLocalUri).foreach { jar =>
           val uri = new URI(jar)
           addClasspathEntry(getClusterPath(sparkConf, uri.getPath()), env)
         }
@@ -1340,7 +1339,7 @@ private object Client extends Logging {
   private def getMainJarUri(mainJar: Option[String]): Option[URI] = {
     mainJar.flatMap { path =>
       val uri = Utils.resolveURI(path)
-      if (uri.getScheme == LOCAL_SCHEME) Some(uri) else None
+      if (uri.getScheme == Utils.LOCAL_SCHEME) Some(uri) else None
     }.orElse(Some(new URI(APP_JAR_NAME)))
   }
 
@@ -1368,7 +1367,7 @@ private object Client extends Logging {
       uri: URI,
       fileName: String,
       env: HashMap[String, String]): Unit = {
-    if (uri != null && uri.getScheme == LOCAL_SCHEME) {
+    if (uri != null && uri.getScheme == Utils.LOCAL_SCHEME) {
       addClasspathEntry(getClusterPath(conf, uri.getPath), env)
     } else if (fileName != null) {
       addClasspathEntry(buildPath(Environment.PWD.$$(), fileName), env)
@@ -1489,11 +1488,6 @@ private object Client extends Logging {
     components.mkString(Path.SEPARATOR)
   }
 
-  /** Returns whether the URI is a "local:" URI. */
-  def isLocalUri(uri: String): Boolean = {
-    uri.startsWith(s"$LOCAL_SCHEME:")
-  }
-
   def createAppReport(report: ApplicationReport): YarnAppReport = {
     val diags = report.getDiagnostics()
     val diagsOpt = if (diags != null && diags.nonEmpty) Some(diags) else None
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index b3286e8fd824..a6f57fcdb246 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -100,7 +100,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
     val cp = env("CLASSPATH").split(":|;|<CPS>")
     s"$SPARK,$USER,$ADDED".split(",").foreach({ entry =>
       val uri = new URI(entry)
-      if (LOCAL_SCHEME.equals(uri.getScheme())) {
+      if (Utils.LOCAL_SCHEME.equals(uri.getScheme())) {
         cp should contain (uri.getPath())
       } else {
         cp should not contain (uri.getPath())
@@ -136,7 +136,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       val expected = ADDED.split(",")
         .map(p => {
           val uri = new URI(p)
-          if (LOCAL_SCHEME == uri.getScheme()) {
+          if (Utils.LOCAL_SCHEME == uri.getScheme()) {
             p
           } else {
             Option(uri.getFragment()).getOrElse(new File(p).getName())
@@ -249,7 +249,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any())
     classpath(client) should contain (buildPath(PWD, LOCALIZED_LIB_DIR, "*"))
 
-    sparkConf.set(SPARK_ARCHIVE, LOCAL_SCHEME + ":" + archive.getPath())
+    sparkConf.set(SPARK_ARCHIVE, Utils.LOCAL_SCHEME + ":" + archive.getPath())
     intercept[IllegalArgumentException] {
       client.prepareLocalResources(new Path(temp.getAbsolutePath()), Nil)
     }

From 834b8609793525a5a486013732d8c98e1c6e6504 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Tue, 18 Dec 2018 23:21:52 -0800
Subject: [PATCH 0164/1072] [SPARK-26366][SQL] ReplaceExceptWithFilter should
 consider NULL as False

## What changes were proposed in this pull request?

In `ReplaceExceptWithFilter` we do not consider properly the case in which the condition returns NULL. Indeed, in that case, since negating NULL still returns NULL, so it is not true the assumption that negating the condition returns all the rows which didn't satisfy it, rows returning NULL may not be returned. This happens when constraints inferred by `InferFiltersFromConstraints` are not enough, as it happens with `OR` conditions.

The rule had also problems with non-deterministic conditions: in such a scenario, this rule would change the probability of the output.

The PR fixes these problem by:
 - returning False for the condition when it is Null (in this way we do return all the rows which didn't satisfy it);
 - avoiding any transformation when the condition is non-deterministic.

## How was this patch tested?

added UTs

Closes #23315 from mgaido91/SPARK-26366.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../optimizer/ReplaceExceptWithFilter.scala   | 32 ++++++++------
 .../optimizer/ReplaceOperatorSuite.scala      | 44 ++++++++++++++-----
 .../org/apache/spark/sql/DatasetSuite.scala   | 11 +++++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 38 ++++++++++++++++
 4 files changed, 101 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceExceptWithFilter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceExceptWithFilter.scala
index efd3944eba7f..4996d24dfd29 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceExceptWithFilter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceExceptWithFilter.scala
@@ -36,7 +36,8 @@ import org.apache.spark.sql.catalyst.rules.Rule
  * Note:
  * Before flipping the filter condition of the right node, we should:
  * 1. Combine all it's [[Filter]].
- * 2. Apply InferFiltersFromConstraints rule (to take into account of NULL values in the condition).
+ * 2. Update the attribute references to the left node;
+ * 3. Add a Coalesce(condition, False) (to take into account of NULL values in the condition).
  */
 object ReplaceExceptWithFilter extends Rule[LogicalPlan] {
 
@@ -47,23 +48,28 @@ object ReplaceExceptWithFilter extends Rule[LogicalPlan] {
 
     plan.transform {
       case e @ Except(left, right, false) if isEligible(left, right) =>
-        val newCondition = transformCondition(left, skipProject(right))
-        newCondition.map { c =>
-          Distinct(Filter(Not(c), left))
-        }.getOrElse {
+        val filterCondition = combineFilters(skipProject(right)).asInstanceOf[Filter].condition
+        if (filterCondition.deterministic) {
+          transformCondition(left, filterCondition).map { c =>
+            Distinct(Filter(Not(c), left))
+          }.getOrElse {
+            e
+          }
+        } else {
           e
         }
     }
   }
 
-  private def transformCondition(left: LogicalPlan, right: LogicalPlan): Option[Expression] = {
-    val filterCondition =
-      InferFiltersFromConstraints(combineFilters(right)).asInstanceOf[Filter].condition
-
-    val attributeNameMap: Map[String, Attribute] = left.output.map(x => (x.name, x)).toMap
-
-    if (filterCondition.references.forall(r => attributeNameMap.contains(r.name))) {
-      Some(filterCondition.transform { case a: AttributeReference => attributeNameMap(a.name) })
+  private def transformCondition(plan: LogicalPlan, condition: Expression): Option[Expression] = {
+    val attributeNameMap: Map[String, Attribute] = plan.output.map(x => (x.name, x)).toMap
+    if (condition.references.forall(r => attributeNameMap.contains(r.name))) {
+      val rewrittenCondition = condition.transform {
+        case a: AttributeReference => attributeNameMap(a.name)
+      }
+      // We need to consider as False when the condition is NULL, otherwise we do not return those
+      // rows containing NULL which are instead filtered in the Except right plan
+      Some(Coalesce(Seq(rewrittenCondition, Literal.FalseLiteral)))
     } else {
       None
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
index 3b1b2d588ef6..c8e15c7da763 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, Not}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Coalesce, If, Literal, Not}
 import org.apache.spark.sql.catalyst.expressions.aggregate.First
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.types.BooleanType
 
 class ReplaceOperatorSuite extends PlanTest {
 
@@ -65,8 +66,7 @@ class ReplaceOperatorSuite extends PlanTest {
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-        Filter(Not((attributeA.isNotNull && attributeB.isNotNull) &&
-          (attributeA >= 2 && attributeB < 1)),
+        Filter(Not(Coalesce(Seq(attributeA >= 2 && attributeB < 1, Literal.FalseLiteral))),
           Filter(attributeB === 2, Filter(attributeA === 1, table1)))).analyze
 
     comparePlans(optimized, correctAnswer)
@@ -84,8 +84,8 @@ class ReplaceOperatorSuite extends PlanTest {
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-        Filter(Not((attributeA.isNotNull && attributeB.isNotNull) &&
-          (attributeA >= 2 && attributeB < 1)), table1)).analyze
+        Filter(Not(Coalesce(Seq(attributeA >= 2 && attributeB < 1, Literal.FalseLiteral))),
+          table1)).analyze
 
     comparePlans(optimized, correctAnswer)
   }
@@ -104,8 +104,7 @@ class ReplaceOperatorSuite extends PlanTest {
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-        Filter(Not((attributeA.isNotNull && attributeB.isNotNull) &&
-          (attributeA >= 2 && attributeB < 1)),
+        Filter(Not(Coalesce(Seq(attributeA >= 2 && attributeB < 1, Literal.FalseLiteral))),
           Project(Seq(attributeA, attributeB), table1))).analyze
 
     comparePlans(optimized, correctAnswer)
@@ -125,8 +124,7 @@ class ReplaceOperatorSuite extends PlanTest {
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-          Filter(Not((attributeA.isNotNull && attributeB.isNotNull) &&
-            (attributeA >= 2 && attributeB < 1)),
+          Filter(Not(Coalesce(Seq(attributeA >= 2 && attributeB < 1, Literal.FalseLiteral))),
             Filter(attributeB === 2, Filter(attributeA === 1, table1)))).analyze
 
     comparePlans(optimized, correctAnswer)
@@ -146,8 +144,7 @@ class ReplaceOperatorSuite extends PlanTest {
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-        Filter(Not((attributeA.isNotNull && attributeB.isNotNull) &&
-          (attributeA === 1 && attributeB === 2)),
+        Filter(Not(Coalesce(Seq(attributeA === 1 && attributeB === 2, Literal.FalseLiteral))),
           Project(Seq(attributeA, attributeB),
             Filter(attributeB < 1, Filter(attributeA >= 2, table1))))).analyze
 
@@ -229,4 +226,29 @@ class ReplaceOperatorSuite extends PlanTest {
 
     comparePlans(optimized, query)
   }
+
+  test("SPARK-26366: ReplaceExceptWithFilter should handle properly NULL") {
+    val basePlan = LocalRelation(Seq('a.int, 'b.int))
+    val otherPlan = basePlan.where('a.in(1, 2) || 'b.in())
+    val except = Except(basePlan, otherPlan, false)
+    val result = OptimizeIn(Optimize.execute(except.analyze))
+    val correctAnswer = Aggregate(basePlan.output, basePlan.output,
+      Filter(!Coalesce(Seq(
+        'a.in(1, 2) || If('b.isNotNull, Literal.FalseLiteral, Literal(null, BooleanType)),
+        Literal.FalseLiteral)),
+        basePlan)).analyze
+    comparePlans(result, correctAnswer)
+  }
+
+  test("SPARK-26366: ReplaceExceptWithFilter should not transform non-detrministic") {
+    val basePlan = LocalRelation(Seq('a.int, 'b.int))
+    val otherPlan = basePlan.where('a > rand(1L))
+    val except = Except(basePlan, otherPlan, false)
+    val result = Optimize.execute(except.analyze)
+    val condition = basePlan.output.zip(otherPlan.output).map { case (a1, a2) =>
+      a1 <=> a2 }.reduce( _ && _)
+    val correctAnswer = Aggregate(basePlan.output, otherPlan.output,
+      Join(basePlan, otherPlan, LeftAnti, Option(condition))).analyze
+    comparePlans(result, correctAnswer)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 525c7cef3956..c90b15814a53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1656,6 +1656,17 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkAnswer(df.groupBy(col("a")).agg(first(col("b"))),
       Seq(Row("0", BigDecimal.valueOf(0.1111)), Row("1", BigDecimal.valueOf(1.1111))))
   }
+
+  test("SPARK-26366: return nulls which are not filtered in except") {
+    val inputDF = sqlContext.createDataFrame(
+      sparkContext.parallelize(Seq(Row("0", "a"), Row("1", null))),
+      StructType(Seq(
+        StructField("a", StringType, nullable = true),
+        StructField("b", StringType, nullable = true))))
+
+    val exceptDF = inputDF.filter(col("a").isin("0") or col("b") > "c")
+    checkAnswer(inputDF.except(exceptDF), Seq(Row("1", null)))
+  }
 }
 
 case class TestDataUnion(x: Int, y: Int, z: Int)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 4cc8a4539199..37a8815350a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2899,6 +2899,44 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-26366: verify ReplaceExceptWithFilter") {
+    Seq(true, false).foreach { enabled =>
+      withSQLConf(SQLConf.REPLACE_EXCEPT_WITH_FILTER.key -> enabled.toString) {
+        val df = spark.createDataFrame(
+          sparkContext.parallelize(Seq(Row(0, 3, 5),
+            Row(0, 3, null),
+            Row(null, 3, 5),
+            Row(0, null, 5),
+            Row(0, null, null),
+            Row(null, null, 5),
+            Row(null, 3, null),
+            Row(null, null, null))),
+          StructType(Seq(StructField("c1", IntegerType),
+            StructField("c2", IntegerType),
+            StructField("c3", IntegerType))))
+        val where = "c2 >= 3 OR c1 >= 0"
+        val whereNullSafe =
+          """
+            |(c2 IS NOT NULL AND c2 >= 3)
+            |OR (c1 IS NOT NULL AND c1 >= 0)
+          """.stripMargin
+
+        val df_a = df.filter(where)
+        val df_b = df.filter(whereNullSafe)
+        checkAnswer(df.except(df_a), df.except(df_b))
+
+        val whereWithIn = "c2 >= 3 OR c1 in (2)"
+        val whereWithInNullSafe =
+          """
+            |(c2 IS NOT NULL AND c2 >= 3)
+          """.stripMargin
+        val dfIn_a = df.filter(whereWithIn)
+        val dfIn_b = df.filter(whereWithInNullSafe)
+        checkAnswer(df.except(dfIn_a), df.except(dfIn_b))
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From 08f74ada3656af401099aa79471ef8a1155a3f07 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 19 Dec 2018 09:41:30 -0800
Subject: [PATCH 0165/1072] [SPARK-26390][SQL] ColumnPruning rule should only
 do column pruning

## What changes were proposed in this pull request?

This is a small clean up.

By design catalyst rules should be orthogonal: each rule should have its own responsibility. However, the `ColumnPruning` rule does not only do column pruning, but also remove no-op project and window.

This PR updates the `RemoveRedundantProject` rule to remove no-op window as well, and clean up the `ColumnPruning` rule to only do column pruning.

## How was this patch tested?

existing tests

Closes #23343 from cloud-fan/column-pruning.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 23 +++++++++----------
 .../optimizer/ColumnPruningSuite.scala        |  7 +++---
 .../optimizer/CombiningLimitsSuite.scala      |  5 ++--
 .../optimizer/JoinOptimizationSuite.scala     |  1 +
 .../RemoveRedundantAliasAndProjectSuite.scala |  2 +-
 .../optimizer/RewriteSubquerySuite.scala      |  2 +-
 .../optimizer/TransposeWindowSuite.scala      |  2 +-
 7 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 3eb6bca6ec97..44d554311490 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -93,7 +93,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
         RewriteCorrelatedScalarSubquery,
         EliminateSerialization,
         RemoveRedundantAliases,
-        RemoveRedundantProject,
+        RemoveNoopOperators,
         SimplifyExtractValueOps,
         CombineConcats) ++
         extendedOperatorOptimizationRules
@@ -177,7 +177,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       RewritePredicateSubquery,
       ColumnPruning,
       CollapseProject,
-      RemoveRedundantProject) :+
+      RemoveNoopOperators) :+
     Batch("UpdateAttributeReferences", Once,
       UpdateNullabilityInAttributeReferences)
   }
@@ -403,11 +403,15 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
 }
 
 /**
- * Remove projections from the query plan that do not make any modifications.
+ * Remove no-op operators from the query plan that do not make any modifications.
  */
-object RemoveRedundantProject extends Rule[LogicalPlan] {
+object RemoveNoopOperators extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case p @ Project(_, child) if p.output == child.output => child
+    // Eliminate no-op Projects
+    case p @ Project(_, child) if child.sameOutput(p) => child
+
+    // Eliminate no-op Window
+    case w: Window if w.windowExpressions.isEmpty => w.child
   }
 }
 
@@ -602,17 +606,12 @@ object ColumnPruning extends Rule[LogicalPlan] {
       p.copy(child = w.copy(
         windowExpressions = w.windowExpressions.filter(p.references.contains)))
 
-    // Eliminate no-op Window
-    case w: Window if w.windowExpressions.isEmpty => w.child
-
-    // Eliminate no-op Projects
-    case p @ Project(_, child) if child.sameOutput(p) => child
-
     // Can't prune the columns on LeafNode
     case p @ Project(_, _: LeafNode) => p
 
     // for all other logical plans that inherits the output from it's children
-    case p @ Project(_, child) =>
+    // Project over project is handled by the first case, skip it here.
+    case p @ Project(_, child) if !child.isInstanceOf[Project] =>
       val required = child.references ++ p.references
       if (!child.inputSet.subsetOf(required)) {
         val newChildren = child.children.map(c => prunedChild(c, required))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 8d7c9bf220bc..57195d5fda7c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -34,6 +34,7 @@ class ColumnPruningSuite extends PlanTest {
     val batches = Batch("Column pruning", FixedPoint(100),
       PushDownPredicate,
       ColumnPruning,
+      RemoveNoopOperators,
       CollapseProject) :: Nil
   }
 
@@ -340,10 +341,8 @@ class ColumnPruningSuite extends PlanTest {
   test("Column pruning on Union") {
     val input1 = LocalRelation('a.int, 'b.string, 'c.double)
     val input2 = LocalRelation('c.int, 'd.string, 'e.double)
-    val query = Project('b :: Nil,
-      Union(input1 :: input2 :: Nil)).analyze
-    val expected = Project('b :: Nil,
-      Union(Project('b :: Nil, input1) :: Project('d :: Nil, input2) :: Nil)).analyze
+    val query = Project('b :: Nil, Union(input1 :: input2 :: Nil)).analyze
+    val expected = Union(Project('b :: Nil, input1) :: Project('d :: Nil, input2) :: Nil).analyze
     comparePlans(Optimize.execute(query), expected)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index ef4b848924f0..b190dd5a7c22 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -27,8 +27,9 @@ class CombiningLimitsSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("Filter Pushdown", FixedPoint(100),
-        ColumnPruning) ::
+      Batch("Column Pruning", FixedPoint(100),
+        ColumnPruning,
+        RemoveNoopOperators) ::
       Batch("Combine Limit", FixedPoint(10),
         CombineLimits) ::
       Batch("Constant Folding", FixedPoint(10),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index e9438b2eee55..6fe5e619d03a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -39,6 +39,7 @@ class JoinOptimizationSuite extends PlanTest {
         ReorderJoin,
         PushPredicateThroughJoin,
         ColumnPruning,
+        RemoveNoopOperators,
         CollapseProject) :: Nil
 
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
index 1973b5abb462..3802dbf5d6e0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
@@ -33,7 +33,7 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper
       FixedPoint(50),
       PushProjectionThroughUnion,
       RemoveRedundantAliases,
-      RemoveRedundantProject) :: Nil
+      RemoveNoopOperators) :: Nil
   }
 
   test("all expressions in project list are aliased child output") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
index 6b3739c372c3..f00d22e6e96a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
@@ -34,7 +34,7 @@ class RewriteSubquerySuite extends PlanTest {
         RewritePredicateSubquery,
         ColumnPruning,
         CollapseProject,
-        RemoveRedundantProject) :: Nil
+        RemoveNoopOperators) :: Nil
   }
 
   test("Column pruning after rewriting predicate subquery") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
index 58b3d1c98f3c..4acd57832d2f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.rules.RuleExecutor
 class TransposeWindowSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("CollapseProject", FixedPoint(100), CollapseProject, RemoveRedundantProject) ::
+      Batch("CollapseProject", FixedPoint(100), CollapseProject, RemoveNoopOperators) ::
       Batch("FlipWindow", Once, CollapseWindow, TransposeWindow) :: Nil
   }
 

From 61c443acd23c74ebcb20fd32e5e0ed6c1722b5dc Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 20 Dec 2018 10:41:45 +0800
Subject: [PATCH 0166/1072] [SPARK-26262][SQL] Runs SQLQueryTestSuite on mixed
 config sets: WHOLESTAGE_CODEGEN_ENABLED and CODEGEN_FACTORY_MODE

## What changes were proposed in this pull request?
For better test coverage, this pr proposed to use the 4 mixed config sets of `WHOLESTAGE_CODEGEN_ENABLED` and `CODEGEN_FACTORY_MODE`  when running `SQLQueryTestSuite`:
1. WHOLESTAGE_CODEGEN_ENABLED=true, CODEGEN_FACTORY_MODE=CODEGEN_ONLY
2. WHOLESTAGE_CODEGEN_ENABLED=false, CODEGEN_FACTORY_MODE=CODEGEN_ONLY
3. WHOLESTAGE_CODEGEN_ENABLED=true, CODEGEN_FACTORY_MODE=NO_CODEGEN
4. WHOLESTAGE_CODEGEN_ENABLED=false, CODEGEN_FACTORY_MODE=NO_CODEGEN

This pr also moved some existing tests into `ExplainSuite` because explain output results are different between codegen and interpreter modes.

## How was this patch tested?
Existing tests.

Closes #23213 from maropu/InterpreterModeTest.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../resources/sql-tests/inputs/group-by.sql   |   5 -
 .../sql-tests/inputs/inline-table.sql         |   3 -
 .../resources/sql-tests/inputs/operators.sql  |  21 --
 .../inputs/sql-compatibility-functions.sql    |   5 -
 .../sql-tests/inputs/string-functions.sql     |  27 ---
 .../inputs/table-valued-functions.sql         |   6 -
 .../sql-tests/results/group-by.sql.out        |  30 +--
 .../sql-tests/results/inline-table.sql.out    |  32 +--
 .../sql-tests/results/operators.sql.out       | 204 +++++++-----------
 .../sql-compatibility-functions.sql.out       |  61 ++----
 .../results/string-functions.sql.out          | 131 +++--------
 .../results/table-valued-functions.sql.out    |  41 +---
 .../org/apache/spark/sql/ExplainSuite.scala   | 133 +++++++++++-
 .../apache/spark/sql/SQLQueryTestSuite.scala  |  51 ++---
 14 files changed, 281 insertions(+), 469 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index ec263ea70bd4..7e81ff1aba37 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -141,8 +141,3 @@ SELECT every("true");
 SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
 SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
 SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
-
--- simple explain of queries having every/some/any agregates. Optimized
--- plan should show the rewritten aggregate expression.
-EXPLAIN EXTENDED SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k;
-
diff --git a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
index 41d316444ed6..b3ec956cd178 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
@@ -49,6 +49,3 @@ select * from values ("one", count(1)), ("two", 2) as data(a, b);
 
 -- string to timestamp
 select * from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-12-06 01:00:00.0'), timestamp('1991-12-06 12:00:00.0'))) as data(a, b);
-
--- cross-join inline tables
-EXPLAIN EXTENDED SELECT * FROM VALUES ('one', 1), ('three', null) CROSS JOIN VALUES ('one', 1), ('three', null);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql
index 37f9cd44da7f..ba14789d48db 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql
@@ -29,27 +29,6 @@ select 2 * 5;
 select 5 % 3;
 select pmod(-7, 3);
 
--- check operator precedence.
--- We follow Oracle operator precedence in the table below that lists the levels of precedence
--- among SQL operators from high to low:
-------------------------------------------------------------------------------------------
--- Operator                                          Operation
-------------------------------------------------------------------------------------------
--- +, -                                              identity, negation
--- *, /                                              multiplication, division
--- +, -, ||                                          addition, subtraction, concatenation
--- =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN   comparison
--- NOT                                               exponentiation, logical negation
--- AND                                               conjunction
--- OR                                                disjunction
-------------------------------------------------------------------------------------------
-explain select 'a' || 1 + 2;
-explain select 1 - 2 || 'b';
-explain select 2 * 4  + 3 || 'b';
-explain select 3 + 1 || 'a' || 4 / 2;
-explain select 1 == 1 OR 'a' || 'b' ==  'ab';
-explain select 'a' || 'c' == 'ac' AND 2 == 3;
-
 -- math functions
 select cot(1);
 select cot(null);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
index f1461032065a..1ae49c8bfc76 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
@@ -12,11 +12,6 @@ SELECT nullif(1, 2.1d), nullif(1, 1.0d);
 SELECT nvl(1, 2.1d), nvl(null, 2.1d);
 SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d);
 
--- explain for these functions; use range to avoid constant folding
-explain extended
-select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'), nvl2(id, 'x', 'y')
-from range(2);
-
 -- SPARK-16730 cast alias functions for Hive compatibility
 SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1);
 SELECT float(1), double(1), decimal(1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 2effb43183d7..fbc231627e36 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -5,10 +5,6 @@ select format_string();
 -- A pipe operator for string concatenation
 select 'a' || 'b' || 'c';
 
--- Check if catalyst combine nested `Concat`s
-EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
-FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10));
-
 -- replace function
 select replace('abc', 'b', '123');
 select replace('abc', 'b');
@@ -25,29 +21,6 @@ select left(null, -2), left("abcd", -2), left("abcd", 0), left("abcd", 'a');
 select right("abcd", 2), right("abcd", 5), right("abcd", '2'), right("abcd", null);
 select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd", 'a');
 
--- turn off concatBinaryAsString
-set spark.sql.function.concatBinaryAsString=false;
-
--- Check if catalyst combine nested `Concat`s if concatBinaryAsString=false
-EXPLAIN SELECT ((col1 || col2) || (col3 || col4)) col
-FROM (
-  SELECT
-    string(id) col1,
-    string(id + 1) col2,
-    encode(string(id + 2), 'utf-8') col3,
-    encode(string(id + 3), 'utf-8') col4
-  FROM range(10)
-);
-
-EXPLAIN SELECT (col1 || (col3 || col4)) col
-FROM (
-  SELECT
-    string(id) col1,
-    encode(string(id + 2), 'utf-8') col3,
-    encode(string(id + 3), 'utf-8') col4
-  FROM range(10)
-);
-
 -- split function
 SELECT split('aa1cc2ee3', '[1-9]+');
 SELECT split('aa1cc2ee3', '[1-9]+', 2);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
index 72cd8ca9d872..6f14c8ca8782 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
@@ -21,9 +21,3 @@ select * from range(1, null);
 
 -- range call with a mixed-case function name
 select * from RaNgE(2);
-
--- Explain
-EXPLAIN select * from RaNgE(2);
-
--- cross-join table valued functions
-EXPLAIN EXTENDED SELECT * FROM range(3) CROSS JOIN range(3);
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 9a8d025331b6..daf47c4d0a39 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 47
+-- Number of queries: 46
 
 
 -- !query 0
@@ -459,31 +459,3 @@ struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RA
 5	NULL	NULL
 5	false	false
 5	true	true
-
-
--- !query 46
-EXPLAIN EXTENDED SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k
--- !query 46 schema
-struct<plan:string>
--- !query 46 output
-== Parsed Logical Plan ==
-'Aggregate ['k], ['k, unresolvedalias('every('v), None), unresolvedalias('some('v), None), unresolvedalias('any('v), None)]
-+- 'UnresolvedRelation `test_agg`
-
-== Analyzed Logical Plan ==
-k: int, every(v): boolean, some(v): boolean, any(v): boolean
-Aggregate [k#x], [k#x, every(v#x) AS every(v)#x, some(v#x) AS some(v)#x, any(v#x) AS any(v)#x]
-+- SubqueryAlias `test_agg`
-   +- Project [k#x, v#x]
-      +- SubqueryAlias `test_agg`
-         +- LocalRelation [k#x, v#x]
-
-== Optimized Logical Plan ==
-Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS some(v)#x, max(v#x) AS any(v)#x]
-+- LocalRelation [k#x, v#x]
-
-== Physical Plan ==
-*HashAggregate(keys=[k#x], functions=[min(v#x), max(v#x)], output=[k#x, every(v)#x, some(v)#x, any(v)#x])
-+- Exchange hashpartitioning(k#x, 200)
-   +- *HashAggregate(keys=[k#x], functions=[partial_min(v#x), partial_max(v#x)], output=[k#x, min#x, max#x])
-      +- LocalTableScan [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
index c065ce501292..4e80f0bda551 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 18
+-- Number of queries: 17
 
 
 -- !query 0
@@ -151,33 +151,3 @@ select * from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-
 struct<a:timestamp,b:array<timestamp>>
 -- !query 16 output
 1991-12-06 00:00:00	[1991-12-06 01:00:00.0,1991-12-06 12:00:00.0]
-
-
--- !query 17
-EXPLAIN EXTENDED SELECT * FROM VALUES ('one', 1), ('three', null) CROSS JOIN VALUES ('one', 1), ('three', null)
--- !query 17 schema
-struct<plan:string>
--- !query 17 output
-== Parsed Logical Plan ==
-'Project [*]
-+- 'Join Cross
-   :- 'UnresolvedInlineTable [col1, col2], [List(one, 1), List(three, null)]
-   +- 'UnresolvedInlineTable [col1, col2], [List(one, 1), List(three, null)]
-
-== Analyzed Logical Plan ==
-col1: string, col2: int, col1: string, col2: int
-Project [col1#x, col2#x, col1#x, col2#x]
-+- Join Cross
-   :- LocalRelation [col1#x, col2#x]
-   +- LocalRelation [col1#x, col2#x]
-
-== Optimized Logical Plan ==
-Join Cross
-:- LocalRelation [col1#x, col2#x]
-+- LocalRelation [col1#x, col2#x]
-
-== Physical Plan ==
-BroadcastNestedLoopJoin BuildRight, Cross
-:- LocalTableScan [col1#x, col2#x]
-+- BroadcastExchange IdentityBroadcastMode
-   +- LocalTableScan [col1#x, col2#x]
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index 570b281353f3..e0cbd575bc34 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 55
+-- Number of queries: 49
 
 
 -- !query 0
@@ -195,260 +195,200 @@ struct<pmod(-7, 3):int>
 
 
 -- !query 24
-explain select 'a' || 1 + 2
+select cot(1)
 -- !query 24 schema
-struct<plan:string>
+struct<COT(CAST(1 AS DOUBLE)):double>
 -- !query 24 output
-== Physical Plan ==
-*Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE))#x]
-+- *Scan OneRowRelation[]
+0.6420926159343306
 
 
 -- !query 25
-explain select 1 - 2 || 'b'
+select cot(null)
 -- !query 25 schema
-struct<plan:string>
+struct<COT(CAST(NULL AS DOUBLE)):double>
 -- !query 25 output
-== Physical Plan ==
-*Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x]
-+- *Scan OneRowRelation[]
+NULL
 
 
 -- !query 26
-explain select 2 * 4  + 3 || 'b'
+select cot(0)
 -- !query 26 schema
-struct<plan:string>
+struct<COT(CAST(0 AS DOUBLE)):double>
 -- !query 26 output
-== Physical Plan ==
-*Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x]
-+- *Scan OneRowRelation[]
+Infinity
 
 
 -- !query 27
-explain select 3 + 1 || 'a' || 4 / 2
+select cot(-1)
 -- !query 27 schema
-struct<plan:string>
+struct<COT(CAST(-1 AS DOUBLE)):double>
 -- !query 27 output
-== Physical Plan ==
-*Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), CAST((CAST(4 AS DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x]
-+- *Scan OneRowRelation[]
+-0.6420926159343306
 
 
 -- !query 28
-explain select 1 == 1 OR 'a' || 'b' ==  'ab'
+select ceiling(0)
 -- !query 28 schema
-struct<plan:string>
+struct<CEIL(CAST(0 AS DOUBLE)):bigint>
 -- !query 28 output
-== Physical Plan ==
-*Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x]
-+- *Scan OneRowRelation[]
+0
 
 
 -- !query 29
-explain select 'a' || 'c' == 'ac' AND 2 == 3
+select ceiling(1)
 -- !query 29 schema
-struct<plan:string>
+struct<CEIL(CAST(1 AS DOUBLE)):bigint>
 -- !query 29 output
-== Physical Plan ==
-*Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x]
-+- *Scan OneRowRelation[]
+1
 
 
 -- !query 30
-select cot(1)
+select ceil(1234567890123456)
 -- !query 30 schema
-struct<COT(CAST(1 AS DOUBLE)):double>
+struct<CEIL(1234567890123456):bigint>
 -- !query 30 output
-0.6420926159343306
+1234567890123456
 
 
 -- !query 31
-select cot(null)
+select ceiling(1234567890123456)
 -- !query 31 schema
-struct<COT(CAST(NULL AS DOUBLE)):double>
+struct<CEIL(1234567890123456):bigint>
 -- !query 31 output
-NULL
+1234567890123456
 
 
 -- !query 32
-select cot(0)
+select ceil(0.01)
 -- !query 32 schema
-struct<COT(CAST(0 AS DOUBLE)):double>
+struct<CEIL(0.01):decimal(1,0)>
 -- !query 32 output
-Infinity
+1
 
 
 -- !query 33
-select cot(-1)
+select ceiling(-0.10)
 -- !query 33 schema
-struct<COT(CAST(-1 AS DOUBLE)):double>
+struct<CEIL(-0.10):decimal(1,0)>
 -- !query 33 output
--0.6420926159343306
+0
 
 
 -- !query 34
-select ceiling(0)
+select floor(0)
 -- !query 34 schema
-struct<CEIL(CAST(0 AS DOUBLE)):bigint>
+struct<FLOOR(CAST(0 AS DOUBLE)):bigint>
 -- !query 34 output
 0
 
 
 -- !query 35
-select ceiling(1)
+select floor(1)
 -- !query 35 schema
-struct<CEIL(CAST(1 AS DOUBLE)):bigint>
+struct<FLOOR(CAST(1 AS DOUBLE)):bigint>
 -- !query 35 output
 1
 
 
 -- !query 36
-select ceil(1234567890123456)
+select floor(1234567890123456)
 -- !query 36 schema
-struct<CEIL(1234567890123456):bigint>
+struct<FLOOR(1234567890123456):bigint>
 -- !query 36 output
 1234567890123456
 
 
 -- !query 37
-select ceiling(1234567890123456)
--- !query 37 schema
-struct<CEIL(1234567890123456):bigint>
--- !query 37 output
-1234567890123456
-
-
--- !query 38
-select ceil(0.01)
--- !query 38 schema
-struct<CEIL(0.01):decimal(1,0)>
--- !query 38 output
-1
-
-
--- !query 39
-select ceiling(-0.10)
--- !query 39 schema
-struct<CEIL(-0.10):decimal(1,0)>
--- !query 39 output
-0
-
-
--- !query 40
-select floor(0)
--- !query 40 schema
-struct<FLOOR(CAST(0 AS DOUBLE)):bigint>
--- !query 40 output
-0
-
-
--- !query 41
-select floor(1)
--- !query 41 schema
-struct<FLOOR(CAST(1 AS DOUBLE)):bigint>
--- !query 41 output
-1
-
-
--- !query 42
-select floor(1234567890123456)
--- !query 42 schema
-struct<FLOOR(1234567890123456):bigint>
--- !query 42 output
-1234567890123456
-
-
--- !query 43
 select floor(0.01)
--- !query 43 schema
+-- !query 37 schema
 struct<FLOOR(0.01):decimal(1,0)>
--- !query 43 output
+-- !query 37 output
 0
 
 
--- !query 44
+-- !query 38
 select floor(-0.10)
--- !query 44 schema
+-- !query 38 schema
 struct<FLOOR(-0.10):decimal(1,0)>
--- !query 44 output
+-- !query 38 output
 -1
 
 
--- !query 45
+-- !query 39
 select 1 > 0.00001
--- !query 45 schema
+-- !query 39 schema
 struct<(CAST(1 AS BIGINT) > 0):boolean>
--- !query 45 output
+-- !query 39 output
 true
 
 
--- !query 46
+-- !query 40
 select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null)
--- !query 46 schema
+-- !query 40 schema
 struct<(7 % 2):int,(7 % 0):int,(0 % 2):int,(7 % CAST(NULL AS INT)):int,(CAST(NULL AS INT) % 2):int,(CAST(NULL AS DOUBLE) % CAST(NULL AS DOUBLE)):double>
--- !query 46 output
+-- !query 40 output
 1	NULL	0	NULL	NULL	NULL
 
 
--- !query 47
+-- !query 41
 select BIT_LENGTH('abc')
--- !query 47 schema
+-- !query 41 schema
 struct<bit_length(abc):int>
--- !query 47 output
+-- !query 41 output
 24
 
 
--- !query 48
+-- !query 42
 select CHAR_LENGTH('abc')
--- !query 48 schema
+-- !query 42 schema
 struct<length(abc):int>
--- !query 48 output
+-- !query 42 output
 3
 
 
--- !query 49
+-- !query 43
 select CHARACTER_LENGTH('abc')
--- !query 49 schema
+-- !query 43 schema
 struct<length(abc):int>
--- !query 49 output
+-- !query 43 output
 3
 
 
--- !query 50
+-- !query 44
 select OCTET_LENGTH('abc')
--- !query 50 schema
+-- !query 44 schema
 struct<octet_length(abc):int>
--- !query 50 output
+-- !query 44 output
 3
 
 
--- !query 51
+-- !query 45
 select abs(-3.13), abs('-2.19')
--- !query 51 schema
+-- !query 45 schema
 struct<abs(-3.13):decimal(3,2),abs(CAST(-2.19 AS DOUBLE)):double>
--- !query 51 output
+-- !query 45 output
 3.13	2.19
 
 
--- !query 52
+-- !query 46
 select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11)
--- !query 52 schema
+-- !query 46 schema
 struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),(- CAST(-1.11 AS DOUBLE)):double,(- -1.11):decimal(3,2)>
--- !query 52 output
+-- !query 46 output
 -1.11	-1.11	1.11	1.11
 
 
--- !query 53
+-- !query 47
 select pmod(-7, 2), pmod(0, 2), pmod(7, 0), pmod(7, null), pmod(null, 2), pmod(null, null)
--- !query 53 schema
+-- !query 47 schema
 struct<pmod(-7, 2):int,pmod(0, 2):int,pmod(7, 0):int,pmod(7, CAST(NULL AS INT)):int,pmod(CAST(NULL AS INT), 2):int,pmod(CAST(NULL AS DOUBLE), CAST(NULL AS DOUBLE)):double>
--- !query 53 output
+-- !query 47 output
 1	0	NULL	NULL	NULL	NULL
 
 
--- !query 54
+-- !query 48
 select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as smallint), cast(0 as smallint))
--- !query 54 schema
+-- !query 48 schema
 struct<pmod(CAST(3.13 AS DECIMAL(10,0)), CAST(0 AS DECIMAL(10,0))):decimal(10,0),pmod(CAST(2 AS SMALLINT), CAST(0 AS SMALLINT)):smallint>
--- !query 54 output
+-- !query 48 output
 NULL	NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
index e035505f15d2..69a8e958000d 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
+-- Number of queries: 14
 
 
 -- !query 0
@@ -67,74 +67,49 @@ struct<nvl2(NULL, 1, 2.1D):double,nvl2('n', 1, 2.1D):double>
 
 
 -- !query 8
-explain extended
-select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'), nvl2(id, 'x', 'y')
-from range(2)
--- !query 8 schema
-struct<plan:string>
--- !query 8 output
-== Parsed Logical Plan ==
-'Project [unresolvedalias('ifnull('id, x), None), unresolvedalias('nullif('id, x), None), unresolvedalias('nvl('id, x), None), unresolvedalias('nvl2('id, x, y), None)]
-+- 'UnresolvedTableValuedFunction range, [2]
-
-== Analyzed Logical Plan ==
-ifnull(`id`, 'x'): string, nullif(`id`, 'x'): bigint, nvl(`id`, 'x'): string, nvl2(`id`, 'x', 'y'): string
-Project [ifnull(id#xL, x) AS ifnull(`id`, 'x')#x, nullif(id#xL, x) AS nullif(`id`, 'x')#xL, nvl(id#xL, x) AS nvl(`id`, 'x')#x, nvl2(id#xL, x, y) AS nvl2(`id`, 'x', 'y')#x]
-+- Range (0, 2, step=1, splits=None)
-
-== Optimized Logical Plan ==
-Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, id#xL AS nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, x AS nvl2(`id`, 'x', 'y')#x]
-+- Range (0, 2, step=1, splits=None)
-
-== Physical Plan ==
-*Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, id#xL AS nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, x AS nvl2(`id`, 'x', 'y')#x]
-+- *Range (0, 2, step=1, splits=2)
-
-
--- !query 9
 SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)
--- !query 9 schema
+-- !query 8 schema
 struct<CAST(1 AS BOOLEAN):boolean,CAST(1 AS TINYINT):tinyint,CAST(1 AS SMALLINT):smallint,CAST(1 AS INT):int,CAST(1 AS BIGINT):bigint>
--- !query 9 output
+-- !query 8 output
 true	1	1	1	1
 
 
--- !query 10
+-- !query 9
 SELECT float(1), double(1), decimal(1)
--- !query 10 schema
+-- !query 9 schema
 struct<CAST(1 AS FLOAT):float,CAST(1 AS DOUBLE):double,CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
--- !query 10 output
+-- !query 9 output
 1.0	1.0	1
 
 
--- !query 11
+-- !query 10
 SELECT date("2014-04-04"), timestamp(date("2014-04-04"))
--- !query 11 schema
+-- !query 10 schema
 struct<CAST(2014-04-04 AS DATE):date,CAST(CAST(2014-04-04 AS DATE) AS TIMESTAMP):timestamp>
--- !query 11 output
+-- !query 10 output
 2014-04-04	2014-04-04 00:00:00
 
 
--- !query 12
+-- !query 11
 SELECT string(1, 2)
--- !query 12 schema
+-- !query 11 schema
 struct<>
--- !query 12 output
+-- !query 11 output
 org.apache.spark.sql.AnalysisException
 Function string accepts only one argument; line 1 pos 7
 
 
--- !query 13
+-- !query 12
 CREATE TEMPORARY VIEW tempView1 AS VALUES (1, NAMED_STRUCT('col1', 'gamma', 'col2', 'delta')) AS T(id, st)
--- !query 13 schema
+-- !query 12 schema
 struct<>
--- !query 13 output
+-- !query 12 output
 
 
--- !query 14
+-- !query 13
 SELECT nvl(st.col1, "value"), count(*) FROM from tempView1 GROUP BY nvl(st.col1, "value")
--- !query 14 schema
+-- !query 13 schema
 struct<nvl(tempview1.`st`.`col1` AS `col1`, 'value'):string,FROM:bigint>
--- !query 14 output
+-- !query 13 output
 gamma	1
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index e8f2e0a81455..25d93b206314 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
+-- Number of queries: 13
 
 
 -- !query 0
@@ -29,151 +29,80 @@ abc
 
 
 -- !query 3
-EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
-FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10))
--- !query 3 schema
-struct<plan:string>
--- !query 3 output
-== Parsed Logical Plan ==
-'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x]
-+- 'SubqueryAlias `__auto_generated_subquery_name`
-   +- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x]
-      +- 'UnresolvedTableValuedFunction range, [10]
-
-== Analyzed Logical Plan ==
-col: string
-Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x]
-+- SubqueryAlias `__auto_generated_subquery_name`
-   +- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL]
-      +- Range (0, 10, step=1, splits=None)
-
-== Optimized Logical Plan ==
-Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x]
-+- Range (0, 10, step=1, splits=None)
-
-== Physical Plan ==
-*Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x]
-+- *Range (0, 10, step=1, splits=2)
-
-
--- !query 4
 select replace('abc', 'b', '123')
--- !query 4 schema
+-- !query 3 schema
 struct<replace(abc, b, 123):string>
--- !query 4 output
+-- !query 3 output
 a123c
 
 
--- !query 5
+-- !query 4
 select replace('abc', 'b')
--- !query 5 schema
+-- !query 4 schema
 struct<replace(abc, b, ):string>
--- !query 5 output
+-- !query 4 output
 ac
 
 
--- !query 6
+-- !query 5
 select length(uuid()), (uuid() <> uuid())
--- !query 6 schema
+-- !query 5 schema
 struct<length(uuid()):int,(NOT (uuid() = uuid())):boolean>
--- !query 6 output
+-- !query 5 output
 36	true
 
 
--- !query 7
+-- !query 6
 select position('bar' in 'foobarbar'), position(null, 'foobarbar'), position('aaads', null)
--- !query 7 schema
+-- !query 6 schema
 struct<locate(bar, foobarbar, 1):int,locate(CAST(NULL AS STRING), foobarbar, 1):int,locate(aaads, CAST(NULL AS STRING), 1):int>
--- !query 7 output
+-- !query 6 output
 4	NULL	NULL
 
 
--- !query 8
+-- !query 7
 select left("abcd", 2), left("abcd", 5), left("abcd", '2'), left("abcd", null)
--- !query 8 schema
+-- !query 7 schema
 struct<left('abcd', 2):string,left('abcd', 5):string,left('abcd', '2'):string,left('abcd', NULL):string>
--- !query 8 output
+-- !query 7 output
 ab	abcd	ab	NULL
 
 
--- !query 9
+-- !query 8
 select left(null, -2), left("abcd", -2), left("abcd", 0), left("abcd", 'a')
--- !query 9 schema
+-- !query 8 schema
 struct<left(NULL, -2):string,left('abcd', -2):string,left('abcd', 0):string,left('abcd', 'a'):string>
--- !query 9 output
+-- !query 8 output
 NULL			NULL
 
 
--- !query 10
+-- !query 9
 select right("abcd", 2), right("abcd", 5), right("abcd", '2'), right("abcd", null)
--- !query 10 schema
+-- !query 9 schema
 struct<right('abcd', 2):string,right('abcd', 5):string,right('abcd', '2'):string,right('abcd', NULL):string>
--- !query 10 output
+-- !query 9 output
 cd	abcd	cd	NULL
 
 
--- !query 11
+-- !query 10
 select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd", 'a')
--- !query 11 schema
+-- !query 10 schema
 struct<right(NULL, -2):string,right('abcd', -2):string,right('abcd', 0):string,right('abcd', 'a'):string>
--- !query 11 output
+-- !query 10 output
 NULL			NULL
 
 
--- !query 12
-set spark.sql.function.concatBinaryAsString=false
--- !query 12 schema
-struct<key:string,value:string>
--- !query 12 output
-spark.sql.function.concatBinaryAsString	false
-
-
--- !query 13
-EXPLAIN SELECT ((col1 || col2) || (col3 || col4)) col
-FROM (
-  SELECT
-    string(id) col1,
-    string(id + 1) col2,
-    encode(string(id + 2), 'utf-8') col3,
-    encode(string(id + 3), 'utf-8') col4
-  FROM range(10)
-)
--- !query 13 schema
-struct<plan:string>
--- !query 13 output
-== Physical Plan ==
-*Project [concat(cast(id#xL as string), cast((id#xL + 1) as string), cast(encode(cast((id#xL + 2) as string), utf-8) as string), cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]
-+- *Range (0, 10, step=1, splits=2)
-
-
--- !query 14
-EXPLAIN SELECT (col1 || (col3 || col4)) col
-FROM (
-  SELECT
-    string(id) col1,
-    encode(string(id + 2), 'utf-8') col3,
-    encode(string(id + 3), 'utf-8') col4
-  FROM range(10)
-)
--- !query 14 schema
-struct<plan:string>
--- !query 14 output
-== Physical Plan ==
-*Project [concat(cast(id#xL as string), cast(encode(cast((id#xL + 2) as string), utf-8) as string), cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]
-+- *Range (0, 10, step=1, splits=2)
-
-
--- !query 15
+-- !query 11
 SELECT split('aa1cc2ee3', '[1-9]+')
--- !query 15 schema
+-- !query 11 schema
 struct<split(aa1cc2ee3, [1-9]+, -1):array<string>>
--- !query 15 output
+-- !query 11 output
 ["aa","cc","ee",""]
 
 
--- !query 16
+-- !query 12
 SELECT split('aa1cc2ee3', '[1-9]+', 2)
--- !query 16 schema
+-- !query 12 schema
 struct<split(aa1cc2ee3, [1-9]+, 2):array<string>>
--- !query 16 output
+-- !query 12 output
 ["aa","cc2ee3"]
diff --git a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
index 94af9181225d..fdbea0ee9072 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
+-- Number of queries: 8
 
 
 -- !query 0
@@ -99,42 +99,3 @@ struct<id:bigint>
 -- !query 7 output
 0
 1
-
-
--- !query 8
-EXPLAIN select * from RaNgE(2)
--- !query 8 schema
-struct<plan:string>
--- !query 8 output
-== Physical Plan ==
-*Range (0, 2, step=1, splits=2)
-
-
--- !query 9
-EXPLAIN EXTENDED SELECT * FROM range(3) CROSS JOIN range(3)
--- !query 9 schema
-struct<plan:string>
--- !query 9 output
-== Parsed Logical Plan ==
-'Project [*]
-+- 'Join Cross
-   :- 'UnresolvedTableValuedFunction range, [3]
-   +- 'UnresolvedTableValuedFunction range, [3]
-
-== Analyzed Logical Plan ==
-id: bigint, id: bigint
-Project [id#xL, id#xL]
-+- Join Cross
-   :- Range (0, 3, step=1, splits=None)
-   +- Range (0, 3, step=1, splits=None)
-
-== Optimized Logical Plan ==
-Join Cross
-:- Range (0, 3, step=1, splits=None)
-+- Range (0, 3, step=1, splits=None)
-
-== Physical Plan ==
-BroadcastNestedLoopJoin BuildRight, Cross
-:- *Range (0, 3, step=1, splits=2)
-+- BroadcastExchange IdentityBroadcastMode
-   +- *Range (0, 3, step=1, splits=2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 56d300e30a58..ce475922eb5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.StructType
 
@@ -29,10 +30,11 @@ class ExplainSuite extends QueryTest with SharedSQLContext {
   private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*): Unit = {
     val output = new java.io.ByteArrayOutputStream()
     Console.withOut(output) {
-      df.explain(extended = false)
+      df.explain(extended = true)
     }
+    val normalizedOutput = output.toString.replaceAll("#\\d+", "#x")
     for (key <- keywords) {
-      assert(output.toString.contains(key))
+      assert(normalizedOutput.contains(key))
     }
   }
 
@@ -53,6 +55,133 @@ class ExplainSuite extends QueryTest with SharedSQLContext {
     checkKeywordsExistsInExplain(df,
       keywords = "InMemoryRelation", "StorageLevel(disk, memory, deserialized, 1 replicas)")
   }
+
+  test("optimized plan should show the rewritten aggregate expression") {
+    withTempView("test_agg") {
+      sql(
+        """
+          |CREATE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+          |  (1, true), (1, false),
+          |  (2, true),
+          |  (3, false), (3, null),
+          |  (4, null), (4, null),
+          |  (5, null), (5, true), (5, false) AS test_agg(k, v)
+        """.stripMargin)
+
+      // simple explain of queries having every/some/any aggregates. Optimized
+      // plan should show the rewritten aggregate expression.
+      val df = sql("SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k")
+      checkKeywordsExistsInExplain(df,
+        "Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS some(v)#x, " +
+          "max(v#x) AS any(v)#x]")
+    }
+  }
+
+  test("explain inline tables cross-joins") {
+    val df = sql(
+      """
+        |SELECT * FROM VALUES ('one', 1), ('three', null)
+        |  CROSS JOIN VALUES ('one', 1), ('three', null)
+      """.stripMargin)
+    checkKeywordsExistsInExplain(df,
+      "Join Cross",
+      ":- LocalRelation [col1#x, col2#x]",
+      "+- LocalRelation [col1#x, col2#x]")
+  }
+
+  test("explain table valued functions") {
+    checkKeywordsExistsInExplain(sql("select * from RaNgE(2)"), "Range (0, 2, step=1, splits=None)")
+    checkKeywordsExistsInExplain(sql("SELECT * FROM range(3) CROSS JOIN range(3)"),
+      "Join Cross",
+      ":- Range (0, 3, step=1, splits=None)",
+      "+- Range (0, 3, step=1, splits=None)")
+  }
+
+  test("explain string functions") {
+    // Check if catalyst combine nested `Concat`s
+    val df1 = sql(
+      """
+        |SELECT (col1 || col2 || col3 || col4) col
+        |  FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10))
+      """.stripMargin)
+    checkKeywordsExistsInExplain(df1,
+      "Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)" +
+        ", cast(id#xL as string)) AS col#x]")
+
+    // Check if catalyst combine nested `Concat`s if concatBinaryAsString=false
+    withSQLConf(SQLConf.CONCAT_BINARY_AS_STRING.key -> "false") {
+      val df2 = sql(
+        """
+          |SELECT ((col1 || col2) || (col3 || col4)) col
+          |FROM (
+          |  SELECT
+          |    string(id) col1,
+          |    string(id + 1) col2,
+          |    encode(string(id + 2), 'utf-8') col3,
+          |    encode(string(id + 3), 'utf-8') col4
+          |  FROM range(10)
+          |)
+        """.stripMargin)
+      checkKeywordsExistsInExplain(df2,
+        "Project [concat(cast(id#xL as string), cast((id#xL + 1) as string), " +
+          "cast(encode(cast((id#xL + 2) as string), utf-8) as string), " +
+          "cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]")
+
+      val df3 = sql(
+        """
+          |SELECT (col1 || (col3 || col4)) col
+          |FROM (
+          |  SELECT
+          |    string(id) col1,
+          |    encode(string(id + 2), 'utf-8') col3,
+          |    encode(string(id + 3), 'utf-8') col4
+          |  FROM range(10)
+          |)
+        """.stripMargin)
+      checkKeywordsExistsInExplain(df3,
+        "Project [concat(cast(id#xL as string), " +
+          "cast(encode(cast((id#xL + 2) as string), utf-8) as string), " +
+          "cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]")
+    }
+  }
+
+  test("check operator precedence") {
+    // We follow Oracle operator precedence in the table below that lists the levels
+    // of precedence among SQL operators from high to low:
+    // ---------------------------------------------------------------------------------------
+    // Operator                                          Operation
+    // ---------------------------------------------------------------------------------------
+    // +, -                                              identity, negation
+    // *, /                                              multiplication, division
+    // +, -, ||                                          addition, subtraction, concatenation
+    // =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN   comparison
+    // NOT                                               exponentiation, logical negation
+    // AND                                               conjunction
+    // OR                                                disjunction
+    // ---------------------------------------------------------------------------------------
+    checkKeywordsExistsInExplain(sql("select 'a' || 1 + 2"),
+      "Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE))#x]")
+    checkKeywordsExistsInExplain(sql("select 1 - 2 || 'b'"),
+      "Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x]")
+    checkKeywordsExistsInExplain(sql("select 2 * 4  + 3 || 'b'"),
+      "Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x]")
+    checkKeywordsExistsInExplain(sql("select 3 + 1 || 'a' || 4 / 2"),
+      "Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), " +
+        "CAST((CAST(4 AS DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x]")
+    checkKeywordsExistsInExplain(sql("select 1 == 1 OR 'a' || 'b' ==  'ab'"),
+      "Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x]")
+    checkKeywordsExistsInExplain(sql("select 'a' || 'c' == 'ac' AND 2 == 3"),
+      "Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x]")
+  }
+
+  test("explain for these functions; use range to avoid constant folding") {
+    val df = sql("select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'), nvl2(id, 'x', 'y') " +
+      "from range(2)")
+    checkKeywordsExistsInExplain(df,
+      "Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, " +
+        "id#xL AS nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, " +
+        "x AS nvl2(`id`, 'x', 'y')#x]")
+  }
 }
 
 case class ExplainSingleData(id: Int)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index cf4585bf7ac6..b2515226d9a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -137,28 +137,39 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  // For better test coverage, runs the tests on mixed config sets: WHOLESTAGE_CODEGEN_ENABLED
+  // and CODEGEN_FACTORY_MODE.
+  private lazy val codegenConfigSets = Array(
+    ("true", "CODEGEN_ONLY"),
+    ("false", "CODEGEN_ONLY"),
+    ("false", "NO_CODEGEN")
+  ).map { case (wholeStageCodegenEnabled, codegenFactoryMode) =>
+    Array(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> wholeStageCodegenEnabled,
+      SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactoryMode)
+  }
+
   /** Run a test case. */
   private def runTest(testCase: TestCase): Unit = {
     val input = fileToString(new File(testCase.inputFile))
 
     val (comments, code) = input.split("\n").partition(_.startsWith("--"))
 
-    // Runs all the tests on both codegen-only and interpreter modes
-    val codegenConfigSets = Array(CODEGEN_ONLY, NO_CODEGEN).map {
-      case codegenFactoryMode =>
-        Array(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactoryMode.toString)
-    }
-    val configSets = {
-      val configLines = comments.filter(_.startsWith("--SET")).map(_.substring(5))
-      val configs = configLines.map(_.split(",").map { confAndValue =>
-        val (conf, value) = confAndValue.span(_ != '=')
-        conf.trim -> value.substring(1).trim
-      })
-      // When we are regenerating the golden files, we don't need to set any config as they
-      // all need to return the same result
-      if (regenerateGoldenFiles) {
-        Array.empty[Array[(String, String)]]
-      } else {
+    // List of SQL queries to run
+    // note: this is not a robust way to split queries using semicolon, but works for now.
+    val queries = code.mkString("\n").split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
+
+    // When we are regenerating the golden files, we don't need to set any config as they
+    // all need to return the same result
+    if (regenerateGoldenFiles) {
+      runQueries(queries, testCase.resultFile, None)
+    } else {
+      val configSets = {
+        val configLines = comments.filter(_.startsWith("--SET")).map(_.substring(5))
+        val configs = configLines.map(_.split(",").map { confAndValue =>
+          val (conf, value) = confAndValue.span(_ != '=')
+          conf.trim -> value.substring(1).trim
+        })
+
         if (configs.nonEmpty) {
           codegenConfigSets.flatMap { codegenConfig =>
             configs.map { config =>
@@ -169,15 +180,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
           codegenConfigSets
         }
       }
-    }
 
-    // List of SQL queries to run
-    // note: this is not a robust way to split queries using semicolon, but works for now.
-    val queries = code.mkString("\n").split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
-
-    if (configSets.isEmpty) {
-      runQueries(queries, testCase.resultFile, None)
-    } else {
       configSets.foreach { configSet =>
         try {
           runQueries(queries, testCase.resultFile, Some(configSet))

From 5ad03607d1487e7ab3e3b6d00eef9c4028ed4975 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 20 Dec 2018 10:47:24 +0800
Subject: [PATCH 0167/1072] [SPARK-25271][SQL] Hive ctas commands should use
 data source if it is convertible

## What changes were proposed in this pull request?

In Spark 2.3.0 and previous versions, Hive CTAS command will convert to use data source to write data into the table when the table is convertible. This behavior is controlled by the configs like HiveUtils.CONVERT_METASTORE_ORC and HiveUtils.CONVERT_METASTORE_PARQUET.

In 2.3.1, we drop this optimization by mistake in the PR [SPARK-22977](https://github.com/apache/spark/pull/20521/files#r217254430). Since that Hive CTAS command only uses Hive Serde to write data.

This patch adds this optimization back to Hive CTAS command. This patch adds OptimizedCreateHiveTableAsSelectCommand which uses data source to write data.

## How was this patch tested?

Added test.

Closes #22514 from viirya/SPARK-25271-2.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/ddl.scala     |   8 ++
 .../datasources/DataSourceStrategy.scala      |  12 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  43 +++++-
 .../spark/sql/hive/HiveStrategies.scala       |  62 +++-----
 .../org/apache/spark/sql/hive/HiveUtils.scala |   8 ++
 .../CreateHiveTableAsSelectCommand.scala      | 134 +++++++++++++-----
 .../spark/sql/hive/HiveParquetSuite.scala     |  14 ++
 .../sql/hive/execution/SQLQuerySuite.scala    |  40 ++++++
 8 files changed, 230 insertions(+), 91 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index e1faecedd20e..096481f68275 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -820,6 +820,14 @@ object DDLUtils {
     table.provider.isDefined && table.provider.get.toLowerCase(Locale.ROOT) != HIVE_PROVIDER
   }
 
+  def readHiveTable(table: CatalogTable): HiveTableRelation = {
+    HiveTableRelation(
+      table,
+      // Hive table columns are always nullable.
+      table.dataSchema.asNullable.toAttributes,
+      table.partitionSchema.asNullable.toAttributes)
+  }
+
   /**
    * Throws a standard error for actions that require partitionProvider = hive.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index b304e2da6e1c..b5cf8c9515bf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -244,27 +244,19 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
     })
   }
 
-  private def readHiveTable(table: CatalogTable): LogicalPlan = {
-    HiveTableRelation(
-      table,
-      // Hive table columns are always nullable.
-      table.dataSchema.asNullable.toAttributes,
-      table.partitionSchema.asNullable.toAttributes)
-  }
-
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case i @ InsertIntoTable(UnresolvedCatalogRelation(tableMeta), _, _, _, _)
         if DDLUtils.isDatasourceTable(tableMeta) =>
       i.copy(table = readDataSourceTable(tableMeta))
 
     case i @ InsertIntoTable(UnresolvedCatalogRelation(tableMeta), _, _, _, _) =>
-      i.copy(table = readHiveTable(tableMeta))
+      i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
     case UnresolvedCatalogRelation(tableMeta) if DDLUtils.isDatasourceTable(tableMeta) =>
       readDataSourceTable(tableMeta)
 
     case UnresolvedCatalogRelation(tableMeta) =>
-      readHiveTable(tableMeta)
+      DDLUtils.readHiveTable(tableMeta)
   }
 }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 5823548a8063..03f4b8d83e35 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive
 
+import java.util.Locale
+
 import scala.util.control.NonFatal
 
 import com.google.common.util.concurrent.Striped
@@ -29,6 +31,8 @@ import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetOptions}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.HiveCaseSensitiveInferenceMode._
 import org.apache.spark.sql.types._
 
@@ -113,7 +117,44 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     }
   }
 
-  def convertToLogicalRelation(
+  // Return true for Apache ORC and Hive ORC-related configuration names.
+  // Note that Spark doesn't support configurations like `hive.merge.orcfile.stripe.level`.
+  private def isOrcProperty(key: String) =
+    key.startsWith("orc.") || key.contains(".orc.")
+
+  private def isParquetProperty(key: String) =
+    key.startsWith("parquet.") || key.contains(".parquet.")
+
+  def convert(relation: HiveTableRelation): LogicalRelation = {
+    val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
+
+    // Consider table and storage properties. For properties existing in both sides, storage
+    // properties will supersede table properties.
+    if (serde.contains("parquet")) {
+      val options = relation.tableMeta.properties.filterKeys(isParquetProperty) ++
+        relation.tableMeta.storage.properties + (ParquetOptions.MERGE_SCHEMA ->
+        SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING).toString)
+        convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet")
+    } else {
+      val options = relation.tableMeta.properties.filterKeys(isOrcProperty) ++
+        relation.tableMeta.storage.properties
+      if (SQLConf.get.getConf(SQLConf.ORC_IMPLEMENTATION) == "native") {
+        convertToLogicalRelation(
+          relation,
+          options,
+          classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat],
+          "orc")
+      } else {
+        convertToLogicalRelation(
+          relation,
+          options,
+          classOf[org.apache.spark.sql.hive.orc.OrcFileFormat],
+          "orc")
+      }
+    }
+  }
+
+  private def convertToLogicalRelation(
       relation: HiveTableRelation,
       options: Map[String, String],
       fileFormatClass: Class[_ <: FileFormat],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 07ee10540431..8a5ab188a949 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -31,8 +31,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoTab
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils}
-import org.apache.spark.sql.execution.datasources.{CreateTable, LogicalRelation}
-import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetOptions}
+import org.apache.spark.sql.execution.datasources.CreateTable
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 
@@ -181,49 +180,17 @@ case class RelationConversions(
     conf: SQLConf,
     sessionCatalog: HiveSessionCatalog) extends Rule[LogicalPlan] {
   private def isConvertible(relation: HiveTableRelation): Boolean = {
-    val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
-    serde.contains("parquet") && conf.getConf(HiveUtils.CONVERT_METASTORE_PARQUET) ||
-      serde.contains("orc") && conf.getConf(HiveUtils.CONVERT_METASTORE_ORC)
+    isConvertible(relation.tableMeta)
   }
 
-  // Return true for Apache ORC and Hive ORC-related configuration names.
-  // Note that Spark doesn't support configurations like `hive.merge.orcfile.stripe.level`.
-  private def isOrcProperty(key: String) =
-    key.startsWith("orc.") || key.contains(".orc.")
-
-  private def isParquetProperty(key: String) =
-    key.startsWith("parquet.") || key.contains(".parquet.")
-
-  private def convert(relation: HiveTableRelation): LogicalRelation = {
-    val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
-
-    // Consider table and storage properties. For properties existing in both sides, storage
-    // properties will supersede table properties.
-    if (serde.contains("parquet")) {
-      val options = relation.tableMeta.properties.filterKeys(isParquetProperty) ++
-        relation.tableMeta.storage.properties + (ParquetOptions.MERGE_SCHEMA ->
-        conf.getConf(HiveUtils.CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING).toString)
-      sessionCatalog.metastoreCatalog
-        .convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet")
-    } else {
-      val options = relation.tableMeta.properties.filterKeys(isOrcProperty) ++
-        relation.tableMeta.storage.properties
-      if (conf.getConf(SQLConf.ORC_IMPLEMENTATION) == "native") {
-        sessionCatalog.metastoreCatalog.convertToLogicalRelation(
-          relation,
-          options,
-          classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat],
-          "orc")
-      } else {
-        sessionCatalog.metastoreCatalog.convertToLogicalRelation(
-          relation,
-          options,
-          classOf[org.apache.spark.sql.hive.orc.OrcFileFormat],
-          "orc")
-      }
-    }
+  private def isConvertible(tableMeta: CatalogTable): Boolean = {
+    val serde = tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
+    serde.contains("parquet") && SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_PARQUET) ||
+      serde.contains("orc") && SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_ORC)
   }
 
+  private val metastoreCatalog = sessionCatalog.metastoreCatalog
+
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan resolveOperators {
       // Write path
@@ -231,12 +198,21 @@ case class RelationConversions(
         // Inserting into partitioned table is not supported in Parquet/Orc data source (yet).
           if query.resolved && DDLUtils.isHiveTable(r.tableMeta) &&
             !r.isPartitioned && isConvertible(r) =>
-        InsertIntoTable(convert(r), partition, query, overwrite, ifPartitionNotExists)
+        InsertIntoTable(metastoreCatalog.convert(r), partition,
+          query, overwrite, ifPartitionNotExists)
 
       // Read path
       case relation: HiveTableRelation
           if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) =>
-        convert(relation)
+        metastoreCatalog.convert(relation)
+
+      // CTAS
+      case CreateTable(tableDesc, mode, Some(query))
+          if DDLUtils.isHiveTable(tableDesc) && tableDesc.partitionColumnNames.isEmpty &&
+            isConvertible(tableDesc) && SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_CTAS) =>
+        DDLUtils.checkDataColNames(tableDesc)
+        OptimizedCreateHiveTableAsSelectCommand(
+          tableDesc, query, query.output.map(_.name), mode)
     }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 66067704195d..b60d4c71f594 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -110,6 +110,14 @@ private[spark] object HiveUtils extends Logging {
     .booleanConf
     .createWithDefault(true)
 
+  val CONVERT_METASTORE_CTAS = buildConf("spark.sql.hive.convertMetastoreCtas")
+    .doc("When set to true,  Spark will try to use built-in data source writer " +
+      "instead of Hive serde in CTAS. This flag is effective only if " +
+      "`spark.sql.hive.convertMetastoreParquet` or `spark.sql.hive.convertMetastoreOrc` is " +
+      "enabled respectively for Parquet and ORC formats")
+    .booleanConf
+    .createWithDefault(true)
+
   val HIVE_METASTORE_SHARED_PREFIXES = buildConf("spark.sql.hive.metastore.sharedPrefixes")
     .doc("A comma separated list of class prefixes that should be loaded using the classloader " +
       "that is shared between Spark SQL and a specific version of Hive. An example of classes " +
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index fd1e931ee0c7..608f21e72625 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -20,32 +20,26 @@ package org.apache.spark.sql.hive.execution
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.command.DataWritingCommand
+import org.apache.spark.sql.execution.command.{DataWritingCommand, DDLUtils}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
+import org.apache.spark.sql.hive.HiveSessionCatalog
 
+trait CreateHiveTableAsSelectBase extends DataWritingCommand {
+  val tableDesc: CatalogTable
+  val query: LogicalPlan
+  val outputColumnNames: Seq[String]
+  val mode: SaveMode
 
-/**
- * Create table and insert the query result into it.
- *
- * @param tableDesc the Table Describe, which may contain serde, storage handler etc.
- * @param query the query whose result will be insert into the new relation
- * @param mode SaveMode
- */
-case class CreateHiveTableAsSelectCommand(
-    tableDesc: CatalogTable,
-    query: LogicalPlan,
-    outputColumnNames: Seq[String],
-    mode: SaveMode)
-  extends DataWritingCommand {
-
-  private val tableIdentifier = tableDesc.identifier
+  protected val tableIdentifier = tableDesc.identifier
 
   override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    if (catalog.tableExists(tableIdentifier)) {
+    val tableExists = catalog.tableExists(tableIdentifier)
+
+    if (tableExists) {
       assert(mode != SaveMode.Overwrite,
         s"Expect the table $tableIdentifier has been dropped when the save mode is Overwrite")
 
@@ -57,15 +51,8 @@ case class CreateHiveTableAsSelectCommand(
         return Seq.empty
       }
 
-      // For CTAS, there is no static partition values to insert.
-      val partition = tableDesc.partitionColumnNames.map(_ -> None).toMap
-      InsertIntoHiveTable(
-        tableDesc,
-        partition,
-        query,
-        overwrite = false,
-        ifPartitionNotExists = false,
-        outputColumnNames = outputColumnNames).run(sparkSession, child)
+      val command = getWritingCommand(catalog, tableDesc, tableExists = true)
+      command.run(sparkSession, child)
     } else {
       // TODO ideally, we should get the output data ready first and then
       // add the relation into catalog, just in case of failure occurs while data
@@ -77,15 +64,8 @@ case class CreateHiveTableAsSelectCommand(
       try {
         // Read back the metadata of the table which was created just now.
         val createdTableMeta = catalog.getTableMetadata(tableDesc.identifier)
-        // For CTAS, there is no static partition values to insert.
-        val partition = createdTableMeta.partitionColumnNames.map(_ -> None).toMap
-        InsertIntoHiveTable(
-          createdTableMeta,
-          partition,
-          query,
-          overwrite = true,
-          ifPartitionNotExists = false,
-          outputColumnNames = outputColumnNames).run(sparkSession, child)
+        val command = getWritingCommand(catalog, createdTableMeta, tableExists = false)
+        command.run(sparkSession, child)
       } catch {
         case NonFatal(e) =>
           // drop the created table.
@@ -97,9 +77,89 @@ case class CreateHiveTableAsSelectCommand(
     Seq.empty[Row]
   }
 
+  // Returns `DataWritingCommand` which actually writes data into the table.
+  def getWritingCommand(
+    catalog: SessionCatalog,
+    tableDesc: CatalogTable,
+    tableExists: Boolean): DataWritingCommand
+
   override def argString: String = {
     s"[Database:${tableDesc.database}, " +
     s"TableName: ${tableDesc.identifier.table}, " +
     s"InsertIntoHiveTable]"
   }
 }
+
+/**
+ * Create table and insert the query result into it.
+ *
+ * @param tableDesc the table description, which may contain serde, storage handler etc.
+ * @param query the query whose result will be insert into the new relation
+ * @param mode SaveMode
+ */
+case class CreateHiveTableAsSelectCommand(
+    tableDesc: CatalogTable,
+    query: LogicalPlan,
+    outputColumnNames: Seq[String],
+    mode: SaveMode)
+  extends CreateHiveTableAsSelectBase {
+
+  override def getWritingCommand(
+      catalog: SessionCatalog,
+      tableDesc: CatalogTable,
+      tableExists: Boolean): DataWritingCommand = {
+    // For CTAS, there is no static partition values to insert.
+    val partition = tableDesc.partitionColumnNames.map(_ -> None).toMap
+    InsertIntoHiveTable(
+      tableDesc,
+      partition,
+      query,
+      overwrite = if (tableExists) false else true,
+      ifPartitionNotExists = false,
+      outputColumnNames = outputColumnNames)
+  }
+}
+
+/**
+ * Create table and insert the query result into it. This creates Hive table but inserts
+ * the query result into it by using data source.
+ *
+ * @param tableDesc the table description, which may contain serde, storage handler etc.
+ * @param query the query whose result will be insert into the new relation
+ * @param mode SaveMode
+ */
+case class OptimizedCreateHiveTableAsSelectCommand(
+    tableDesc: CatalogTable,
+    query: LogicalPlan,
+    outputColumnNames: Seq[String],
+    mode: SaveMode)
+  extends CreateHiveTableAsSelectBase {
+
+  override def getWritingCommand(
+      catalog: SessionCatalog,
+      tableDesc: CatalogTable,
+      tableExists: Boolean): DataWritingCommand = {
+    val metastoreCatalog = catalog.asInstanceOf[HiveSessionCatalog].metastoreCatalog
+    val hiveTable = DDLUtils.readHiveTable(tableDesc)
+
+    val hadoopRelation = metastoreCatalog.convert(hiveTable) match {
+      case LogicalRelation(t: HadoopFsRelation, _, _, _) => t
+      case _ => throw new AnalysisException(s"$tableIdentifier should be converted to " +
+        "HadoopFsRelation.")
+    }
+
+    InsertIntoHadoopFsRelationCommand(
+      hadoopRelation.location.rootPaths.head,
+      Map.empty, // We don't support to convert partitioned table.
+      false,
+      Seq.empty, // We don't support to convert partitioned table.
+      hadoopRelation.bucketSpec,
+      hadoopRelation.fileFormat,
+      hadoopRelation.options,
+      query,
+      if (tableExists) mode else SaveMode.Overwrite,
+      Some(tableDesc),
+      Some(hadoopRelation.location),
+      query.output.map(_.name))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
index e5c9df05d567..470c6a342b4d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
@@ -92,4 +92,18 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton
       }
     }
   }
+
+  test("SPARK-25271: write empty map into hive parquet table") {
+    import testImplicits._
+
+    Seq(Map(1 -> "a"), Map.empty[Int, String]).toDF("m").createOrReplaceTempView("p")
+    withTempView("p") {
+      val targetTable = "targetTable"
+      withTable(targetTable) {
+        sql(s"CREATE TABLE $targetTable STORED AS PARQUET AS SELECT m FROM p")
+        checkAnswer(sql(s"SELECT m FROM $targetTable"),
+          Row(Map(1 -> "a")) :: Row(Map.empty[Int, String]) :: Nil)
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index fab2a27cdef1..6acf44606cbb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2276,6 +2276,46 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
+  test("SPARK-25271: Hive ctas commands should use data source if it is convertible") {
+    withTempView("p") {
+      Seq(1, 2, 3).toDF("id").createOrReplaceTempView("p")
+
+      Seq("orc", "parquet").foreach { format =>
+        Seq(true, false).foreach { isConverted =>
+          withSQLConf(
+            HiveUtils.CONVERT_METASTORE_ORC.key -> s"$isConverted",
+            HiveUtils.CONVERT_METASTORE_PARQUET.key -> s"$isConverted") {
+            Seq(true, false).foreach { isConvertedCtas =>
+              withSQLConf(HiveUtils.CONVERT_METASTORE_CTAS.key -> s"$isConvertedCtas") {
+
+                val targetTable = "targetTable"
+                withTable(targetTable) {
+                  val df = sql(s"CREATE TABLE $targetTable STORED AS $format AS SELECT id FROM p")
+                  checkAnswer(sql(s"SELECT id FROM $targetTable"),
+                    Row(1) :: Row(2) :: Row(3) :: Nil)
+
+                  val ctasDSCommand = df.queryExecution.analyzed.collect {
+                    case _: OptimizedCreateHiveTableAsSelectCommand => true
+                  }.headOption
+                  val ctasCommand = df.queryExecution.analyzed.collect {
+                    case _: CreateHiveTableAsSelectCommand => true
+                  }.headOption
+
+                  if (isConverted && isConvertedCtas) {
+                    assert(ctasDSCommand.nonEmpty)
+                    assert(ctasCommand.isEmpty)
+                  } else {
+                    assert(ctasDSCommand.isEmpty)
+                    assert(ctasCommand.nonEmpty)
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
 
   test("SPARK-26181 hasMinMaxStats method of ColumnStatsMap is not correct") {
     withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {

From 04d8e3a33c6bb08b2891ca52613cd5ccd24a69dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E4=BA=AE?= <liang.li.work@outlook.com>
Date: Thu, 20 Dec 2018 13:22:12 +0800
Subject: [PATCH 0168/1072] [SPARK-26318][SQL] Deprecate Row.merge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Deprecate Row.merge

## How was this patch tested?
N/A

Closes #23271 from KyleLi1985/master.

Authored-by: 李亮 <liang.li.work@outlook.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index e12bf9616e2d..4f5af9ac80b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -57,6 +57,7 @@ object Row {
   /**
    * Merge multiple rows into a single row, one after another.
    */
+  @deprecated("This method is deprecated and will be removed in future versions.", "3.0.0")
   def merge(rows: Row*): Row = {
     // TODO: Improve the performance of this if used in performance critical part.
     new GenericRow(rows.flatMap(_.toSeq).toArray)

From 98c0ca78610ccf62784081353584717c62285485 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Thu, 20 Dec 2018 14:17:44 +0800
Subject: [PATCH 0169/1072] [SPARK-26308][SQL] Avoid cast of decimals for
 ScalaUDF

## What changes were proposed in this pull request?

Currently, when we infer the schema for scala/java decimals, we return as data type the `SYSTEM_DEFAULT` implementation, ie. the decimal type with precision 38 and scale 18. But this is not right, as we know nothing about the right precision and scale and these values can be not enough to store the data. This problem arises in particular with UDF, where we cast all the input of type `DecimalType` to a `DecimalType(38, 18)`: in case this is not enough, null is returned as input for the UDF.

The PR defines a custom handling for casting to the expected data types for ScalaUDF: the decimal precision and scale is picked from the input, so no casting to different and maybe wrong percision and scale happens.

## How was this patch tested?

added UTs

Closes #23308 from mgaido91/SPARK-26308.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/TypeCoercion.scala  | 31 ++++++++++++++++++
 .../sql/catalyst/expressions/ScalaUDF.scala   |  2 +-
 .../scala/org/apache/spark/sql/UDFSuite.scala | 32 ++++++++++++++++++-
 3 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 133fa119b7aa..1706b3eece6d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -879,6 +879,37 @@ object TypeCoercion {
           }
         }
         e.withNewChildren(children)
+
+      case udf: ScalaUDF if udf.inputTypes.nonEmpty =>
+        val children = udf.children.zip(udf.inputTypes).map { case (in, expected) =>
+          implicitCast(in, udfInputToCastType(in.dataType, expected)).getOrElse(in)
+        }
+        udf.withNewChildren(children)
+    }
+
+    private def udfInputToCastType(input: DataType, expectedType: DataType): DataType = {
+      (input, expectedType) match {
+        // SPARK-26308: avoid casting to an arbitrary precision and scale for decimals. Please note
+        // that precision and scale cannot be inferred properly for a ScalaUDF because, when it is
+        // created, it is not bound to any column. So here the precision and scale of the input
+        // column is used.
+        case (in: DecimalType, _: DecimalType) => in
+        case (ArrayType(dtIn, _), ArrayType(dtExp, nullableExp)) =>
+          ArrayType(udfInputToCastType(dtIn, dtExp), nullableExp)
+        case (MapType(keyDtIn, valueDtIn, _), MapType(keyDtExp, valueDtExp, nullableExp)) =>
+          MapType(udfInputToCastType(keyDtIn, keyDtExp),
+            udfInputToCastType(valueDtIn, valueDtExp),
+            nullableExp)
+        case (StructType(fieldsIn), StructType(fieldsExp)) =>
+          val fieldTypes =
+            fieldsIn.map(_.dataType).zip(fieldsExp.map(_.dataType)).map { case (dtIn, dtExp) =>
+              udfInputToCastType(dtIn, dtExp)
+            }
+          StructType(fieldsExp.zip(fieldTypes).map { case (field, newDt) =>
+            field.copy(dataType = newDt)
+          })
+        case (_, other) => other
+      }
     }
 
     /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index fae90caebf96..a23aaa3a0b3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -52,7 +52,7 @@ case class ScalaUDF(
     udfName: Option[String] = None,
     nullable: Boolean = true,
     udfDeterministic: Boolean = true)
-  extends Expression with ImplicitCastInputTypes with NonSQLExpression with UserDefinedExpression {
+  extends Expression with NonSQLExpression with UserDefinedExpression {
 
   // The constructor for SPARK 2.1 and 2.2
   def this(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 20dcefa7e3ca..a26d306cff6b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import java.math.BigDecimal
+
 import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.QueryExecution
@@ -26,7 +28,7 @@ import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationComm
 import org.apache.spark.sql.functions.{lit, udf}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData._
-import org.apache.spark.sql.types.{DataTypes, DoubleType}
+import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.QueryExecutionListener
 
 
@@ -420,4 +422,32 @@ class UDFSuite extends QueryTest with SharedSQLContext {
       checkAnswer(df, Seq(Row("null1x"), Row(null), Row("N3null")))
     }
   }
+
+  test("SPARK-26308: udf with decimal") {
+    val df1 = spark.createDataFrame(
+      sparkContext.parallelize(Seq(Row(new BigDecimal("2011000000000002456556")))),
+      StructType(Seq(StructField("col1", DecimalType(30, 0)))))
+    val udf1 = org.apache.spark.sql.functions.udf((value: BigDecimal) => {
+      if (value == null) null else value.toBigInteger.toString
+    })
+    checkAnswer(df1.select(udf1(df1.col("col1"))), Seq(Row("2011000000000002456556")))
+  }
+
+  test("SPARK-26308: udf with complex types of decimal") {
+    val df1 = spark.createDataFrame(
+      sparkContext.parallelize(Seq(Row(Array(new BigDecimal("2011000000000002456556"))))),
+      StructType(Seq(StructField("col1", ArrayType(DecimalType(30, 0))))))
+    val udf1 = org.apache.spark.sql.functions.udf((arr: Seq[BigDecimal]) => {
+      arr.map(value => if (value == null) null else value.toBigInteger.toString)
+    })
+    checkAnswer(df1.select(udf1($"col1")), Seq(Row(Array("2011000000000002456556"))))
+
+    val df2 = spark.createDataFrame(
+      sparkContext.parallelize(Seq(Row(Map("a" -> new BigDecimal("2011000000000002456556"))))),
+      StructType(Seq(StructField("col1", MapType(StringType, DecimalType(30, 0))))))
+    val udf2 = org.apache.spark.sql.functions.udf((map: Map[String, BigDecimal]) => {
+      map.mapValues(value => if (value == null) null else value.toBigInteger.toString)
+    })
+    checkAnswer(df2.select(udf2($"col1")), Seq(Row(Map("a" -> "2011000000000002456556"))))
+  }
 }

From 7c8f4756c34a0b00931c2987c827a18d989e6c08 Mon Sep 17 00:00:00 2001
From: zhoukang <zhoukang199191@gmail.com>
Date: Thu, 20 Dec 2018 08:26:25 -0600
Subject: [PATCH 0170/1072] [SPARK-24687][CORE] Avoid job hanging when generate
 task binary causes fatal error

## What changes were proposed in this pull request?
When NoClassDefFoundError thrown,it will cause job hang.
`Exception in thread "dag-scheduler-event-loop" java.lang.NoClassDefFoundError: Lcom/xxx/data/recommend/aggregator/queue/QueueName;
	at java.lang.Class.getDeclaredFields0(Native Method)
	at java.lang.Class.privateGetDeclaredFields(Class.java:2436)
	at java.lang.Class.getDeclaredField(Class.java:1946)
	at java.io.ObjectStreamClass.getDeclaredSUID(ObjectStreamClass.java:1659)
	at java.io.ObjectStreamClass.access$700(ObjectStreamClass.java:72)
	at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:480)
	at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:468)
	at java.security.AccessController.doPrivileged(Native Method)
	at java.io.ObjectStreamClass.<init>(ObjectStreamClass.java:468)
	at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:365)
	at java.io.ObjectOutputStream.writeClass(ObjectOutputStream.java:1212)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1119)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
	at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1377)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1173)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
	at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1377)`

It is caused by NoClassDefFoundError will not catch up during task seriazation.
`var taskBinary: Broadcast[Array[Byte]] = null
    try {
      // For ShuffleMapTask, serialize and broadcast (rdd, shuffleDep).
      // For ResultTask, serialize and broadcast (rdd, func).
      val taskBinaryBytes: Array[Byte] = stage match {
        case stage: ShuffleMapStage =>
          JavaUtils.bufferToArray(
            closureSerializer.serialize((stage.rdd, stage.shuffleDep): AnyRef))
        case stage: ResultStage =>
          JavaUtils.bufferToArray(closureSerializer.serialize((stage.rdd, stage.func): AnyRef))
      }

      taskBinary = sc.broadcast(taskBinaryBytes)
    } catch {
      // In the case of a failure during serialization, abort the stage.
      case e: NotSerializableException =>
        abortStage(stage, "Task not serializable: " + e.toString, Some(e))
        runningStages -= stage

        // Abort execution
        return
      case NonFatal(e) =>
        abortStage(stage, s"Task serialization failed: $e\n${Utils.exceptionString(e)}", Some(e))
        runningStages -= stage
        return
    }`
image below shows that stage 33 blocked and never be scheduled.
<img width="1273" alt="2018-06-28 4 28 42" src="https://user-images.githubusercontent.com/26762018/42621188-b87becca-85ef-11e8-9a0b-0ddf07504c96.png">
<img width="569" alt="2018-06-28 4 28 49" src="https://user-images.githubusercontent.com/26762018/42621191-b8b260e8-85ef-11e8-9d10-e97a5918baa6.png">

## How was this patch tested?
UT

Closes #21664 from caneGuy/zhoukang/fix-noclassdeferror.

Authored-by: zhoukang <zhoukang199191@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../main/scala/org/apache/spark/scheduler/DAGScheduler.scala  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 06966e77db81..6f4c326442e1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1170,9 +1170,11 @@ private[spark] class DAGScheduler(
 
         // Abort execution
         return
-      case NonFatal(e) =>
+      case e: Throwable =>
         abortStage(stage, s"Task serialization failed: $e\n${Utils.exceptionString(e)}", Some(e))
         runningStages -= stage
+
+        // Abort execution
         return
     }
 

From a888d202ab719ccb13b328aa445341d1d6b06881 Mon Sep 17 00:00:00 2001
From: Jorge Machado <jorge.w.machado@hotmail.com>
Date: Thu, 20 Dec 2018 08:29:51 -0600
Subject: [PATCH 0171/1072] [SPARK-26324][DOCS] Add Spark docs for Running in
 Mesos with SSL

## What changes were proposed in this pull request?
Added docs for running spark jobs with Mesos on SSL

Closes #23342 from jomach/master.

Lead-authored-by: Jorge Machado <jorge.w.machado@hotmail.com>
Co-authored-by: Jorge Machado <dxc.machado@extaccount.com>
Co-authored-by: Jorge Machado <jorge.machado.ext@kiwigrid.com>
Co-authored-by: Jorge Machado <JorgeWilson.Machado@ext.gfk.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/running-on-mesos.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 968d668e2c93..a07773c1c71e 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -108,6 +108,19 @@ Please note that if you specify multiple ways to obtain the credentials then the
 
 An equivalent order applies for the secret.  Essentially we prefer the configuration to be specified directly rather than indirectly by files, and we prefer that configuration settings are used over environment variables.
 
+### Deploy to a Mesos running on Secure Sockets
+
+If you want to deploy a Spark Application into a Mesos cluster that is running in a secure mode there are some environment variables that need to be set.
+
+- `LIBPROCESS_SSL_ENABLED=true` enables SSL communication
+- `LIBPROCESS_SSL_VERIFY_CERT=false` verifies the ssl certificate 
+- `LIBPROCESS_SSL_KEY_FILE=pathToKeyFile.key` path to key 
+- `LIBPROCESS_SSL_CERT_FILE=pathToCRTFile.crt` the certificate file to be used
+
+All options can be found at http://mesos.apache.org/documentation/latest/ssl/
+
+Then submit happens as described in Client mode or Cluster mode below
+
 ## Uploading Spark Package
 
 When Mesos runs a task on a Mesos slave for the first time, that slave must have a Spark binary

From 6692bacf3e74e7a17d8e676e8a06ab198f85d328 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 20 Dec 2018 10:05:56 -0800
Subject: [PATCH 0172/1072] [SPARK-26409][SQL][TESTS] SQLConf should be
 serializable in test sessions

## What changes were proposed in this pull request?

`SQLConf` is supposed to be serializable. However, currently it is not  serializable in `WithTestConf`. `WithTestConf` uses the method `overrideConfs` in closure, while the classes which implements it (`TestHiveSessionStateBuilder` and `TestSQLSessionStateBuilder`) are not serializable.

This PR is to use a local variable to fix it.

## How was this patch tested?

Add unit test.

Closes #23352 from gengliangwang/serializableSQLConf.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/internal/BaseSessionStateBuilder.scala  | 3 ++-
 .../test/scala/org/apache/spark/sql/SerializationSuite.scala | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index ac07e1f6bb4f..319c2649592f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -309,13 +309,14 @@ private[sql] trait WithTestConf { self: BaseSessionStateBuilder =>
   def overrideConfs: Map[String, String]
 
   override protected lazy val conf: SQLConf = {
+    val overrideConfigurations = overrideConfs
     val conf = parentState.map(_.conf.clone()).getOrElse {
       new SQLConf {
         clear()
         override def clear(): Unit = {
           super.clear()
           // Make sure we start with the default test configs even after clear
-          overrideConfs.foreach { case (key, value) => setConfString(key, value) }
+          overrideConfigurations.foreach { case (key, value) => setConfString(key, value) }
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SerializationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SerializationSuite.scala
index cd6b2647e0be..1a1c956aed3d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SerializationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SerializationSuite.scala
@@ -27,4 +27,9 @@ class SerializationSuite extends SparkFunSuite with SharedSQLContext {
     val spark = SparkSession.builder.getOrCreate()
     new JavaSerializer(new SparkConf()).newInstance().serialize(spark.sqlContext)
   }
+
+  test("[SPARK-26409] SQLConf should be serializable") {
+    val spark = SparkSession.builder.getOrCreate()
+    new JavaSerializer(new SparkConf()).newInstance().serialize(spark.sessionState.conf)
+  }
 }

From 3d6b44d9ea92dc1eabb8f211176861e51240bf93 Mon Sep 17 00:00:00 2001
From: Ngone51 <ngone_5451@163.com>
Date: Thu, 20 Dec 2018 10:25:52 -0800
Subject: [PATCH 0173/1072] [SPARK-26392][YARN] Cancel pending allocate
 requests by taking locality preference into account

## What changes were proposed in this pull request?

Right now, we cancel pending allocate requests by its sending order. I thing we can take

locality preference into account when do this to perfom least impact on task locality preference.

## How was this patch tested?

N.A.

Closes #23344 from Ngone51/dev-cancel-pending-allocate-requests-by-taking-locality-preference-into-account.

Authored-by: Ngone51 <ngone_5451@163.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/deploy/yarn/YarnAllocator.scala     | 29 ++++++++-----------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index d37d0d66d8ae..54b1ec266113 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -294,6 +294,15 @@ private[yarn] class YarnAllocator(
       s"pending: $numPendingAllocate, running: ${runningExecutors.size}, " +
       s"executorsStarting: ${numExecutorsStarting.get}")
 
+    // Split the pending container request into three groups: locality matched list, locality
+    // unmatched list and non-locality list. Take the locality matched container request into
+    // consideration of container placement, treat as allocated containers.
+    // For locality unmatched and locality free container requests, cancel these container
+    // requests, since required locality preference has been changed, recalculating using
+    // container placement strategy.
+    val (localRequests, staleRequests, anyHostRequests) = splitPendingAllocationsByLocality(
+      hostToLocalTaskCounts, pendingAllocate)
+
     if (missing > 0) {
       if (log.isInfoEnabled()) {
         var requestContainerMessage = s"Will request $missing executor container(s), each with " +
@@ -306,15 +315,6 @@ private[yarn] class YarnAllocator(
         logInfo(requestContainerMessage)
       }
 
-      // Split the pending container request into three groups: locality matched list, locality
-      // unmatched list and non-locality list. Take the locality matched container request into
-      // consideration of container placement, treat as allocated containers.
-      // For locality unmatched and locality free container requests, cancel these container
-      // requests, since required locality preference has been changed, recalculating using
-      // container placement strategy.
-      val (localRequests, staleRequests, anyHostRequests) = splitPendingAllocationsByLocality(
-        hostToLocalTaskCounts, pendingAllocate)
-
       // cancel "stale" requests for locations that are no longer needed
       staleRequests.foreach { stale =>
         amClient.removeContainerRequest(stale)
@@ -374,14 +374,9 @@ private[yarn] class YarnAllocator(
       val numToCancel = math.min(numPendingAllocate, -missing)
       logInfo(s"Canceling requests for $numToCancel executor container(s) to have a new desired " +
         s"total $targetNumExecutors executors.")
-
-      val matchingRequests = amClient.getMatchingRequests(RM_REQUEST_PRIORITY, ANY_HOST, resource)
-      if (!matchingRequests.isEmpty) {
-        matchingRequests.iterator().next().asScala
-          .take(numToCancel).foreach(amClient.removeContainerRequest)
-      } else {
-        logWarning("Expected to find pending requests, but found none.")
-      }
+      // cancel pending allocate requests by taking locality preference into account
+      val cancelRequests = (staleRequests ++ anyHostRequests ++ localRequests).take(numToCancel)
+      cancelRequests.foreach(amClient.removeContainerRequest)
     }
   }
 

From aa0d4ca8bab08a467645080a5b8a28bf6dd8a042 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Thu, 20 Dec 2018 11:22:49 -0800
Subject: [PATCH 0174/1072] [SPARK-25970][ML] Add Instrumentation to PrefixSpan

## What changes were proposed in this pull request?
Add Instrumentation to PrefixSpan

## How was this patch tested?
existing tests

Closes #22971 from zhengruifeng/log_PrefixSpan.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 .../src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
index 2a3413553a6a..b0006a8d4a58 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.fpm
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.fpm.{PrefixSpan => mllibPrefixSpan}
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions.col
@@ -135,7 +136,10 @@ final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params
    *          - `freq: Long`
    */
   @Since("2.4.0")
-  def findFrequentSequentialPatterns(dataset: Dataset[_]): DataFrame = {
+  def findFrequentSequentialPatterns(dataset: Dataset[_]): DataFrame = instrumented { instr =>
+    instr.logDataset(dataset)
+    instr.logParams(this, params: _*)
+
     val sequenceColParam = $(sequenceCol)
     val inputType = dataset.schema(sequenceColParam).dataType
     require(inputType.isInstanceOf[ArrayType] &&

From 98ecda3e8ef9db5e21b5b9605df09d1653094b9c Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 21 Dec 2018 13:01:14 +0800
Subject: [PATCH 0175/1072] [MINOR][SQL] Locality does not need to be
 implemented

## What changes were proposed in this pull request?
`HadoopFileWholeTextReader` and  `HadoopFileLinesReader` will be eventually called in `FileSourceScanExec`.
In fact,  locality has been implemented in `FileScanRDD`,  even if we implement it in `HadoopFileWholeTextReader ` and  `HadoopFileLinesReader`,  it would be useless.
So I think these `TODO` can be removed.

## How was this patch tested?
N/A

Closes #23339 from 10110346/noneededtodo.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/datasources/HadoopFileLinesReader.scala | 2 +-
 .../sql/execution/datasources/HadoopFileWholeTextReader.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
index 00a78f7343c5..57082b40e113 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
@@ -51,7 +51,7 @@ class HadoopFileLinesReader(
       new Path(new URI(file.filePath)),
       file.start,
       file.length,
-      // TODO: Implement Locality
+      // The locality is decided by `getPreferredLocations` in `FileScanRDD`.
       Array.empty)
     val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
     val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
index c61a89e6e8c3..f5724f7c5955 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
@@ -40,7 +40,7 @@ class HadoopFileWholeTextReader(file: PartitionedFile, conf: Configuration)
       Array(new Path(new URI(file.filePath))),
       Array(file.start),
       Array(file.length),
-      // TODO: Implement Locality
+      // The locality is decided by `getPreferredLocations` in `FileScanRDD`.
       Array.empty[String])
     val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
     val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)

From 305e9b5ad22b428501fd42d3730d73d2e09ad4c5 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 21 Dec 2018 16:09:30 +0800
Subject: [PATCH 0176/1072] [SPARK-26422][R] Support to disable Hive support in
 SparkR even for Hadoop versions unsupported by Hive fork

## What changes were proposed in this pull request?

Currently,  even if I explicitly disable Hive support in SparkR session as below:

```r
sparkSession <- sparkR.session("local[4]", "SparkR", Sys.getenv("SPARK_HOME"),
                               enableHiveSupport = FALSE)
```

produces when the Hadoop version is not supported by our Hive fork:

```
java.lang.reflect.InvocationTargetException
...
Caused by: java.lang.IllegalArgumentException: Unrecognized Hadoop major version number: 3.1.1.3.1.0.0-78
	at org.apache.hadoop.hive.shims.ShimLoader.getMajorVersion(ShimLoader.java:174)
	at org.apache.hadoop.hive.shims.ShimLoader.loadShims(ShimLoader.java:139)
	at org.apache.hadoop.hive.shims.ShimLoader.getHadoopShims(ShimLoader.java:100)
	at org.apache.hadoop.hive.conf.HiveConf$ConfVars.<clinit>(HiveConf.java:368)
	... 43 more
Error in handleErrors(returnStatus, conn) :
  java.lang.ExceptionInInitializerError
	at org.apache.hadoop.hive.conf.HiveConf.<clinit>(HiveConf.java:105)
	at java.lang.Class.forName0(Native Method)
	at java.lang.Class.forName(Class.java:348)
	at org.apache.spark.util.Utils$.classForName(Utils.scala:193)
	at org.apache.spark.sql.SparkSession$.hiveClassesArePresent(SparkSession.scala:1116)
	at org.apache.spark.sql.api.r.SQLUtils$.getOrCreateSparkSession(SQLUtils.scala:52)
	at org.apache.spark.sql.api.r.SQLUtils.getOrCreateSparkSession(SQLUtils.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
```

The root cause is that:

```
SparkSession.hiveClassesArePresent
```

check if the class is loadable or not to check if that's in classpath but `org.apache.hadoop.hive.conf.HiveConf` has a check for Hadoop version as static logic which is executed right away. This throws an `IllegalArgumentException` and that's not caught:

https://github.com/apache/spark/blob/36edbac1c8337a4719f90e4abd58d38738b2e1fb/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala#L1113-L1121

So, currently, if users have a Hive built-in Spark with unsupported Hadoop version by our fork (namely 3+), there's no way to use SparkR even though it could work.

This PR just propose to change the order of bool comparison so that we can don't execute `SparkSession.hiveClassesArePresent` when:

  1. `enableHiveSupport` is explicitly disabled
  2. `spark.sql.catalogImplementation` is `in-memory`

so that we **only** check `SparkSession.hiveClassesArePresent` when Hive support is explicitly enabled by short circuiting.

## How was this patch tested?

It's difficult to write a test since we don't run tests against Hadoop 3 yet. See https://github.com/apache/spark/pull/21588. Manually tested.

Closes #23356 from HyukjinKwon/SPARK-26422.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/api/r/SQLUtils.scala  | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index becb05cf72ab..e98cab8b56d1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -49,9 +49,17 @@ private[sql] object SQLUtils extends Logging {
       sparkConfigMap: JMap[Object, Object],
       enableHiveSupport: Boolean): SparkSession = {
     val spark =
-      if (SparkSession.hiveClassesArePresent && enableHiveSupport &&
+      if (enableHiveSupport &&
           jsc.sc.conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase(Locale.ROOT) ==
-            "hive") {
+            "hive" &&
+          // Note that the order of conditions here are on purpose.
+          // `SparkSession.hiveClassesArePresent` checks if Hive's `HiveConf` is loadable or not;
+          // however, `HiveConf` itself has some static logic to check if Hadoop version is
+          // supported or not, which throws an `IllegalArgumentException` if unsupported.
+          // If this is checked first, there's no way to disable Hive support in the case above.
+          // So, we intentionally check if Hive classes are loadable or not only when
+          // Hive support is explicitly enabled by short-circuiting. See also SPARK-26422.
+          SparkSession.hiveClassesArePresent) {
         SparkSession.builder().sparkContext(withHiveExternalCatalog(jsc.sc)).getOrCreate()
       } else {
         if (enableHiveSupport) {

From 8e76d6621aaddb8b73443b14ea2c6eebe9089893 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Fri, 21 Dec 2018 10:41:25 -0800
Subject: [PATCH 0177/1072] [SPARK-26267][SS] Retry when detecting incorrect
 offsets from Kafka

## What changes were proposed in this pull request?

Due to [KAFKA-7703](https://issues.apache.org/jira/browse/KAFKA-7703), Kafka may return an earliest offset when we are request a latest offset. This will cause Spark to reprocess data.

As per suggestion in KAFKA-7703, we put a position call between poll and seekToEnd to block the fetch request triggered by `poll` before calling `seekToEnd`.

In addition, to avoid other unknown issues, we also use the previous known offsets to audit the latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times.

## How was this patch tested?

Jenkins

Closes #23324 from zsxwing/SPARK-26267.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../kafka010/KafkaContinuousReadSupport.scala |  4 +-
 .../kafka010/KafkaMicroBatchReadSupport.scala | 19 ++++-
 .../kafka010/KafkaOffsetRangeCalculator.scala |  2 +
 .../sql/kafka010/KafkaOffsetReader.scala      | 80 +++++++++++++++++--
 .../spark/sql/kafka010/KafkaSource.scala      |  5 +-
 .../kafka010/KafkaMicroBatchSourceSuite.scala | 48 +++++++++++
 6 files changed, 145 insertions(+), 13 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala
index 1753a28fba2f..02dfb9ca2b95 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala
@@ -60,7 +60,7 @@ class KafkaContinuousReadSupport(
   override def initialOffset(): Offset = {
     val offsets = initialOffsets match {
       case EarliestOffsetRangeLimit => KafkaSourceOffset(offsetReader.fetchEarliestOffsets())
-      case LatestOffsetRangeLimit => KafkaSourceOffset(offsetReader.fetchLatestOffsets())
+      case LatestOffsetRangeLimit => KafkaSourceOffset(offsetReader.fetchLatestOffsets(None))
       case SpecificOffsetRangeLimit(p) => offsetReader.fetchSpecificOffsets(p, reportDataLoss)
     }
     logInfo(s"Initial offsets: $offsets")
@@ -107,7 +107,7 @@ class KafkaContinuousReadSupport(
 
   override def needsReconfiguration(config: ScanConfig): Boolean = {
     val knownPartitions = config.asInstanceOf[KafkaContinuousScanConfig].knownPartitions
-    offsetReader.fetchLatestOffsets().keySet != knownPartitions
+    offsetReader.fetchLatestOffsets(None).keySet != knownPartitions
   }
 
   override def toString(): String = s"KafkaSource[$offsetReader]"
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala
index bb4de674c3c7..b4f042e93a5d 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala
@@ -84,7 +84,7 @@ private[kafka010] class KafkaMicroBatchReadSupport(
 
   override def latestOffset(start: Offset): Offset = {
     val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
-    val latestPartitionOffsets = kafkaOffsetReader.fetchLatestOffsets()
+    val latestPartitionOffsets = kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
     endPartitionOffsets = KafkaSourceOffset(maxOffsetsPerTrigger.map { maxOffsets =>
       rateLimit(maxOffsets, startPartitionOffsets, latestPartitionOffsets)
     }.getOrElse {
@@ -133,10 +133,21 @@ private[kafka010] class KafkaMicroBatchReadSupport(
     }.toSeq
     logDebug("TopicPartitions: " + topicPartitions.mkString(", "))
 
+    val fromOffsets = startPartitionOffsets ++ newPartitionInitialOffsets
+    val untilOffsets = endPartitionOffsets
+    untilOffsets.foreach { case (tp, untilOffset) =>
+      fromOffsets.get(tp).foreach { fromOffset =>
+        if (untilOffset < fromOffset) {
+          reportDataLoss(s"Partition $tp's offset was changed from " +
+            s"$fromOffset to $untilOffset, some data may have been missed")
+        }
+      }
+    }
+
     // Calculate offset ranges
     val offsetRanges = rangeCalculator.getRanges(
-      fromOffsets = startPartitionOffsets ++ newPartitionInitialOffsets,
-      untilOffsets = endPartitionOffsets,
+      fromOffsets = fromOffsets,
+      untilOffsets = untilOffsets,
       executorLocations = getSortedExecutorList())
 
     // Reuse Kafka consumers only when all the offset ranges have distinct TopicPartitions,
@@ -186,7 +197,7 @@ private[kafka010] class KafkaMicroBatchReadSupport(
         case EarliestOffsetRangeLimit =>
           KafkaSourceOffset(kafkaOffsetReader.fetchEarliestOffsets())
         case LatestOffsetRangeLimit =>
-          KafkaSourceOffset(kafkaOffsetReader.fetchLatestOffsets())
+          KafkaSourceOffset(kafkaOffsetReader.fetchLatestOffsets(None))
         case SpecificOffsetRangeLimit(p) =>
           kafkaOffsetReader.fetchSpecificOffsets(p, reportDataLoss)
       }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
index fb209c724afb..600879492405 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
@@ -37,6 +37,8 @@ private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int
    * the read tasks of the skewed partitions to multiple Spark tasks.
    * The number of Spark tasks will be *approximately* `numPartitions`. It can be less or more
    * depending on rounding errors or Kafka partitions that didn't receive any new data.
+   *
+   * Empty ranges (`KafkaOffsetRange.size <= 0`) will be dropped.
    */
   def getRanges(
       fromOffsets: PartitionOffsetMap,
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index 82066697cb95..fc443d22bf5a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -21,6 +21,7 @@ import java.{util => ju}
 import java.util.concurrent.{Executors, ThreadFactory}
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 import scala.util.control.NonFatal
@@ -137,6 +138,12 @@ private[kafka010] class KafkaOffsetReader(
         // Poll to get the latest assigned partitions
         consumer.poll(0)
         val partitions = consumer.assignment()
+
+        // Call `position` to wait until the potential offset request triggered by `poll(0)` is
+        // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
+        // `poll(0)` may reset offsets that should have been set by another request.
+        partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
+
         consumer.pause(partitions)
         assert(partitions.asScala == partitionOffsets.keySet,
           "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
@@ -192,19 +199,82 @@ private[kafka010] class KafkaOffsetReader(
   /**
    * Fetch the latest offsets for the topic partitions that are indicated
    * in the [[ConsumerStrategy]].
+   *
+   * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called
+   * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after
+   * `poll` to wait until the potential offset request triggered by `poll(0)` is done.
+   *
+   * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the
+   * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less
+   * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When
+   * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot
+   * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying.
    */
-  def fetchLatestOffsets(): Map[TopicPartition, Long] = runUninterruptibly {
+  def fetchLatestOffsets(
+      knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = runUninterruptibly {
     withRetriesWithoutInterrupt {
       // Poll to get the latest assigned partitions
       consumer.poll(0)
       val partitions = consumer.assignment()
+
+      // Call `position` to wait until the potential offset request triggered by `poll(0)` is
+      // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
+      // `poll(0)` may reset offsets that should have been set by another request.
+      partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
+
       consumer.pause(partitions)
       logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the end.")
 
-      consumer.seekToEnd(partitions)
-      val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
-      logDebug(s"Got latest offsets for partition : $partitionOffsets")
-      partitionOffsets
+      if (knownOffsets.isEmpty) {
+        consumer.seekToEnd(partitions)
+        partitions.asScala.map(p => p -> consumer.position(p)).toMap
+      } else {
+        var partitionOffsets: PartitionOffsetMap = Map.empty
+
+        /**
+         * Compare `knownOffsets` and `partitionOffsets`. Returns all partitions that have incorrect
+         * latest offset (offset in `knownOffsets` is great than the one in `partitionOffsets`).
+         */
+        def findIncorrectOffsets(): Seq[(TopicPartition, Long, Long)] = {
+          var incorrectOffsets = ArrayBuffer[(TopicPartition, Long, Long)]()
+          partitionOffsets.foreach { case (tp, offset) =>
+            knownOffsets.foreach(_.get(tp).foreach { knownOffset =>
+              if (knownOffset > offset) {
+                val incorrectOffset = (tp, knownOffset, offset)
+                incorrectOffsets += incorrectOffset
+              }
+            })
+          }
+          incorrectOffsets
+        }
+
+        // Retry to fetch latest offsets when detecting incorrect offsets. We don't use
+        // `withRetriesWithoutInterrupt` to retry because:
+        //
+        // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh
+        //    consumer has a much bigger chance to hit KAFKA-7703.
+        // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703.
+        var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil
+        var attempt = 0
+        do {
+          consumer.seekToEnd(partitions)
+          partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
+          attempt += 1
+
+          incorrectOffsets = findIncorrectOffsets()
+          if (incorrectOffsets.nonEmpty) {
+            logWarning("Found incorrect offsets in some partitions " +
+              s"(partition, previous offset, fetched offset): $incorrectOffsets")
+            if (attempt < maxOffsetFetchAttempts) {
+              logWarning("Retrying to fetch latest offsets because of incorrect offsets")
+              Thread.sleep(offsetFetchAttemptIntervalMs)
+            }
+          }
+        } while (incorrectOffsets.nonEmpty && attempt < maxOffsetFetchAttempts)
+
+        logDebug(s"Got latest offsets for partition : $partitionOffsets")
+        partitionOffsets
+      }
     }
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 66ec7e0cd084..d65b3cea632c 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -130,7 +130,7 @@ private[kafka010] class KafkaSource(
     metadataLog.get(0).getOrElse {
       val offsets = startingOffsets match {
         case EarliestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchEarliestOffsets())
-        case LatestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchLatestOffsets())
+        case LatestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchLatestOffsets(None))
         case SpecificOffsetRangeLimit(p) => kafkaReader.fetchSpecificOffsets(p, reportDataLoss)
       }
       metadataLog.add(0, offsets)
@@ -148,7 +148,8 @@ private[kafka010] class KafkaSource(
     // Make sure initialPartitionOffsets is initialized
     initialPartitionOffsets
 
-    val latest = kafkaReader.fetchLatestOffsets()
+    val latest = kafkaReader.fetchLatestOffsets(
+      currentPartitionOffsets.orElse(Some(initialPartitionOffsets)))
     val offsets = maxOffsetsPerTrigger match {
       case None =>
         latest
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 5ee76990b54f..61cbb3285a4f 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -329,6 +329,54 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     )
   }
 
+  test("subscribe topic by pattern with topic recreation between batches") {
+    val topicPrefix = newTopic()
+    val topic = topicPrefix + "-good"
+    val topic2 = topicPrefix + "-bad"
+    testUtils.createTopic(topic, partitions = 1)
+    testUtils.sendMessages(topic, Array("1", "3"))
+    testUtils.createTopic(topic2, partitions = 1)
+    testUtils.sendMessages(topic2, Array("2", "4"))
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("kafka.default.api.timeout.ms", "3000")
+      .option("startingOffsets", "earliest")
+      .option("subscribePattern", s"$topicPrefix-.*")
+
+    val ds = reader.load()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+      .map(kv => kv._2.toInt)
+
+    testStream(ds)(
+      StartStream(),
+      AssertOnQuery { q =>
+        q.processAllAvailable()
+        true
+      },
+      CheckAnswer(1, 2, 3, 4),
+      // Restart the stream in this test to make the test stable. When recreating a topic when a
+      // consumer is alive, it may not be able to see the recreated topic even if a fresh consumer
+      // has seen it.
+      StopStream,
+      // Recreate `topic2` and wait until it's available
+      WithOffsetSync(new TopicPartition(topic2, 0), expectedOffset = 1) { () =>
+        testUtils.deleteTopic(topic2)
+        testUtils.createTopic(topic2)
+        testUtils.sendMessages(topic2, Array("6"))
+      },
+      StartStream(),
+      ExpectFailure[IllegalStateException](e => {
+        // The offset of `topic2` should be changed from 2 to 1
+        assert(e.getMessage.contains("was changed from 2 to 1"))
+      })
+    )
+  }
+
   test("ensure that initial offset are written with an extra byte in the beginning (SPARK-19517)") {
     withTempDir { metadataPath =>
       val topic = "kafka-initial-offset-current"

From d6a5f859848bbd237e19075dd26e1547fb3af417 Mon Sep 17 00:00:00 2001
From: wuyi <ngone_5451@163.com>
Date: Fri, 21 Dec 2018 13:21:58 -0600
Subject: [PATCH 0178/1072] [SPARK-26269][YARN] Yarnallocator should have same
 blacklist behaviour with yarn to maxmize use of cluster resource

## What changes were proposed in this pull request?

As I mentioned in jira [SPARK-26269](https://issues.apache.org/jira/browse/SPARK-26269), in order to maxmize the use of cluster resource,  this pr try to make `YarnAllocator` have the same blacklist behaviour with YARN.

## How was this patch tested?

Added.

Closes #23223 from Ngone51/dev-YarnAllocator-should-have-same-blacklist-behaviour-with-YARN.

Lead-authored-by: wuyi <ngone_5451@163.com>
Co-authored-by: Ngone51 <ngone_5451@163.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../spark/deploy/yarn/YarnAllocator.scala     | 32 ++++++--
 .../yarn/YarnAllocatorBlacklistTracker.scala  |  4 +-
 .../YarnAllocatorBlacklistTrackerSuite.scala  |  2 +-
 .../deploy/yarn/YarnAllocatorSuite.scala      | 75 ++++++++++++++++++-
 4 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 54b1ec266113..a3feca5dfd22 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -607,13 +607,23 @@ private[yarn] class YarnAllocator(
             val message = "Container killed by YARN for exceeding physical memory limits. " +
               s"$diag Consider boosting ${EXECUTOR_MEMORY_OVERHEAD.key}."
             (true, message)
-          case _ =>
-            // all the failures which not covered above, like:
-            // disk failure, kill by app master or resource manager, ...
-            allocatorBlacklistTracker.handleResourceAllocationFailure(hostOpt)
-            (true, "Container marked as failed: " + containerId + onHostStr +
-              ". Exit status: " + completedContainer.getExitStatus +
-              ". Diagnostics: " + completedContainer.getDiagnostics)
+          case other_exit_status =>
+            // SPARK-26269: follow YARN's blacklisting behaviour(see https://github
+            // .com/apache/hadoop/blob/228156cfd1b474988bc4fedfbf7edddc87db41e3/had
+            // oop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/ap
+            // ache/hadoop/yarn/util/Apps.java#L273 for details)
+            if (NOT_APP_AND_SYSTEM_FAULT_EXIT_STATUS.contains(other_exit_status)) {
+              (false, s"Container marked as failed: $containerId$onHostStr" +
+                s". Exit status: ${completedContainer.getExitStatus}" +
+                s". Diagnostics: ${completedContainer.getDiagnostics}.")
+            } else {
+              // completed container from a bad node
+              allocatorBlacklistTracker.handleResourceAllocationFailure(hostOpt)
+              (true, s"Container from a bad node: $containerId$onHostStr" +
+                s". Exit status: ${completedContainer.getExitStatus}" +
+                s". Diagnostics: ${completedContainer.getDiagnostics}.")
+            }
+
 
         }
         if (exitCausedByApp) {
@@ -739,4 +749,12 @@ private object YarnAllocator {
   val MEM_REGEX = "[0-9.]+ [KMG]B"
   val VMEM_EXCEEDED_EXIT_CODE = -103
   val PMEM_EXCEEDED_EXIT_CODE = -104
+
+  val NOT_APP_AND_SYSTEM_FAULT_EXIT_STATUS = Set(
+    ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
+    ContainerExitStatus.KILLED_BY_APPMASTER,
+    ContainerExitStatus.KILLED_AFTER_APP_COMPLETION,
+    ContainerExitStatus.ABORTED,
+    ContainerExitStatus.DISKS_FAILED
+  )
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala
index ceac7cda5f8b..268976b62950 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala
@@ -120,7 +120,9 @@ private[spark] class YarnAllocatorBlacklistTracker(
     if (removals.nonEmpty) {
       logInfo(s"removing nodes from YARN application master's blacklist: $removals")
     }
-    amClient.updateBlacklist(additions.asJava, removals.asJava)
+    if (additions.nonEmpty || removals.nonEmpty) {
+      amClient.updateBlacklist(additions.asJava, removals.asJava)
+    }
     currentBlacklistedYarnNodes = nodesToBlacklist
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
index aeac68e6ed33..201910731e93 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
@@ -87,7 +87,7 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
     // expired blacklisted nodes (simulating a resource request)
     yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2"))
     // no change is communicated to YARN regarding the blacklisting
-    verify(amClientMock).updateBlacklist(Collections.emptyList(), Collections.emptyList())
+    verify(amClientMock, times(0)).updateBlacklist(Collections.emptyList(), Collections.emptyList())
   }
 
   test("combining scheduler and allocation blacklist") {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index b61e7df4420e..53a538dc1de2 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.deploy.yarn
 
+import java.util.Collections
+
 import scala.collection.JavaConverters._
 
 import org.apache.hadoop.conf.Configuration
@@ -114,13 +116,29 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       clock)
   }
 
-  def createContainer(host: String, resource: Resource = containerResource): Container = {
-    val containerId = ContainerId.newContainerId(appAttemptId, containerNum)
+  def createContainer(
+      host: String,
+      containerNumber: Int = containerNum,
+      resource: Resource = containerResource): Container = {
+    val  containerId: ContainerId = ContainerId.newContainerId(appAttemptId, containerNum)
     containerNum += 1
     val nodeId = NodeId.newInstance(host, 1000)
     Container.newInstance(containerId, nodeId, "", resource, RM_REQUEST_PRIORITY, null)
   }
 
+  def createContainers(hosts: Seq[String], containerIds: Seq[Int]): Seq[Container] = {
+    hosts.zip(containerIds).map{case (host, id) => createContainer(host, id)}
+  }
+
+  def createContainerStatus(
+      containerId: ContainerId,
+      exitStatus: Int,
+      containerState: ContainerState = ContainerState.COMPLETE,
+      diagnostics: String = "diagnostics"): ContainerStatus = {
+    ContainerStatus.newInstance(containerId, containerState, diagnostics, exitStatus)
+  }
+
+
   test("single container allocated") {
     // request a single container and receive it
     val handler = createAllocator(1)
@@ -148,7 +166,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       Map(YARN_EXECUTOR_RESOURCE_TYPES_PREFIX + "gpu" -> "2G"))
 
     handler.updateResourceRequests()
-    val container = createContainer("host1", handler.resource)
+    val container = createContainer("host1", resource = handler.resource)
     handler.handleAllocatedContainers(Array(container))
 
     // get amount of memory and vcores from resource, so effectively skipping their validation
@@ -417,4 +435,55 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     clock.advance(50 * 1000L)
     handler.getNumExecutorsFailed should be (0)
   }
+
+  test("SPARK-26269: YarnAllocator should have same blacklist behaviour with YARN") {
+    val rmClientSpy = spy(rmClient)
+    val maxExecutors = 11
+
+    val handler = createAllocator(
+      maxExecutors,
+      rmClientSpy,
+      Map(
+        "spark.yarn.blacklist.executor.launch.blacklisting.enabled" -> "true",
+        "spark.blacklist.application.maxFailedExecutorsPerNode" -> "0"))
+    handler.updateResourceRequests()
+
+    val hosts = (0 until maxExecutors).map(i => s"host$i")
+    val ids = 0 to maxExecutors
+    val containers = createContainers(hosts, ids)
+
+    val nonBlacklistedStatuses = Seq(
+      ContainerExitStatus.SUCCESS,
+      ContainerExitStatus.PREEMPTED,
+      ContainerExitStatus.KILLED_EXCEEDED_VMEM,
+      ContainerExitStatus.KILLED_EXCEEDED_PMEM,
+      ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
+      ContainerExitStatus.KILLED_BY_APPMASTER,
+      ContainerExitStatus.KILLED_AFTER_APP_COMPLETION,
+      ContainerExitStatus.ABORTED,
+      ContainerExitStatus.DISKS_FAILED)
+
+    val nonBlacklistedContainerStatuses = nonBlacklistedStatuses.zipWithIndex.map {
+      case (exitStatus, idx) => createContainerStatus(containers(idx).getId, exitStatus)
+    }
+
+    val BLACKLISTED_EXIT_CODE = 1
+    val blacklistedStatuses = Seq(ContainerExitStatus.INVALID, BLACKLISTED_EXIT_CODE)
+
+    val blacklistedContainerStatuses = blacklistedStatuses.zip(9 until maxExecutors).map {
+      case (exitStatus, idx) => createContainerStatus(containers(idx).getId, exitStatus)
+    }
+
+    handler.handleAllocatedContainers(containers.slice(0, 9))
+    handler.processCompletedContainers(nonBlacklistedContainerStatuses)
+    verify(rmClientSpy, never())
+      .updateBlacklist(hosts.slice(0, 9).asJava, Collections.emptyList())
+
+    handler.handleAllocatedContainers(containers.slice(9, 11))
+    handler.processCompletedContainers(blacklistedContainerStatuses)
+    verify(rmClientSpy)
+      .updateBlacklist(hosts.slice(9, 10).asJava, Collections.emptyList())
+    verify(rmClientSpy)
+      .updateBlacklist(hosts.slice(10, 11).asJava, Collections.emptyList())
+  }
 }

From 8dd29fe36b781d115213b1d6a8446ad04e9239bb Mon Sep 17 00:00:00 2001
From: pgandhi <pgandhi@oath.com>
Date: Fri, 21 Dec 2018 11:28:22 -0800
Subject: [PATCH 0179/1072] [SPARK-25642][YARN] Adding two new metrics to
 record the number of registered connections as well as the number of active
 connections to YARN Shuffle Service

Recently, the ability to expose the metrics for YARN Shuffle Service was added as part of [SPARK-18364](https://github.com/apache/spark/pull/22485). We need to add some metrics to be able to determine the number of active connections as well as open connections to the external shuffle service to benchmark network and connection issues on large cluster environments.

Added two more shuffle server metrics for Spark Yarn shuffle service: numRegisteredConnections which indicate the number of registered connections to the shuffle service and numActiveConnections which indicate the number of active connections to the shuffle service at any given point in time.

If these metrics are outputted to a file, we get something like this:

1533674653489 default.shuffleService: Hostname=server1.abc.com, openBlockRequestLatencyMillis_count=729, openBlockRequestLatencyMillis_rate15=0.7110833548897356, openBlockRequestLatencyMillis_rate5=1.657808981793011, openBlockRequestLatencyMillis_rate1=2.2404486061620474, openBlockRequestLatencyMillis_rateMean=0.9242558551196706,
numRegisteredConnections=35,
blockTransferRateBytes_count=2635880512, blockTransferRateBytes_rate15=2578547.6094160094, blockTransferRateBytes_rate5=6048721.726302424, blockTransferRateBytes_rate1=8548922.518223226, blockTransferRateBytes_rateMean=3341878.633637769, registeredExecutorsSize=5, registerExecutorRequestLatencyMillis_count=5, registerExecutorRequestLatencyMillis_rate15=0.0027973949328659836, registerExecutorRequestLatencyMillis_rate5=0.0021278007987206426, registerExecutorRequestLatencyMillis_rate1=2.8270296777387467E-6, registerExecutorRequestLatencyMillis_rateMean=0.006339206380043053, numActiveConnections=35

Closes #22498 from pgandhi999/SPARK-18364.

Authored-by: pgandhi <pgandhi@oath.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/network/TransportContext.java       |  9 ++++++-
 .../server/TransportChannelHandler.java       | 18 +++++++++++++-
 .../spark/network/server/TransportServer.java |  5 ++++
 .../shuffle/ExternalShuffleBlockHandler.java  | 24 +++++++++++++++++--
 .../network/yarn/YarnShuffleService.java      | 21 +++++++++-------
 .../yarn/YarnShuffleServiceMetrics.java       |  5 ++++
 .../spark/deploy/ExternalShuffleService.scala |  2 ++
 .../yarn/YarnShuffleServiceMetricsSuite.scala |  3 ++-
 8 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
index 480b52652de5..1a3f3f2a6f24 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -20,6 +20,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import com.codahale.metrics.Counter;
 import io.netty.channel.Channel;
 import io.netty.channel.ChannelPipeline;
 import io.netty.channel.EventLoopGroup;
@@ -66,6 +67,8 @@ public class TransportContext {
   private final RpcHandler rpcHandler;
   private final boolean closeIdleConnections;
   private final boolean isClientOnly;
+  // Number of registered connections to the shuffle service
+  private Counter registeredConnections = new Counter();
 
   /**
    * Force to create MessageEncoder and MessageDecoder so that we can make sure they will be created
@@ -221,7 +224,7 @@ private TransportChannelHandler createChannelHandler(Channel channel, RpcHandler
     TransportRequestHandler requestHandler = new TransportRequestHandler(channel, client,
       rpcHandler, conf.maxChunksBeingTransferred());
     return new TransportChannelHandler(client, responseHandler, requestHandler,
-      conf.connectionTimeoutMs(), closeIdleConnections);
+      conf.connectionTimeoutMs(), closeIdleConnections, this);
   }
 
   /**
@@ -234,4 +237,8 @@ private ChunkFetchRequestHandler createChunkFetchHandler(TransportChannelHandler
   }
 
   public TransportConf getConf() { return conf; }
+
+  public Counter getRegisteredConnections() {
+    return registeredConnections;
+  }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
index c824a7b0d474..ca81099c4d5c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -21,6 +21,7 @@
 import io.netty.channel.SimpleChannelInboundHandler;
 import io.netty.handler.timeout.IdleState;
 import io.netty.handler.timeout.IdleStateEvent;
+import org.apache.spark.network.TransportContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -57,18 +58,21 @@ public class TransportChannelHandler extends SimpleChannelInboundHandler<Message
   private final TransportRequestHandler requestHandler;
   private final long requestTimeoutNs;
   private final boolean closeIdleConnections;
+  private final TransportContext transportContext;
 
   public TransportChannelHandler(
       TransportClient client,
       TransportResponseHandler responseHandler,
       TransportRequestHandler requestHandler,
       long requestTimeoutMs,
-      boolean closeIdleConnections) {
+      boolean closeIdleConnections,
+      TransportContext transportContext) {
     this.client = client;
     this.responseHandler = responseHandler;
     this.requestHandler = requestHandler;
     this.requestTimeoutNs = requestTimeoutMs * 1000L * 1000;
     this.closeIdleConnections = closeIdleConnections;
+    this.transportContext = transportContext;
   }
 
   public TransportClient getClient() {
@@ -176,4 +180,16 @@ public TransportResponseHandler getResponseHandler() {
     return responseHandler;
   }
 
+  @Override
+  public void channelRegistered(ChannelHandlerContext ctx) throws Exception {
+    transportContext.getRegisteredConnections().inc();
+    super.channelRegistered(ctx);
+  }
+
+  @Override
+  public void channelUnregistered(ChannelHandlerContext ctx) throws Exception {
+    transportContext.getRegisteredConnections().dec();
+    super.channelUnregistered(ctx);
+  }
+
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index 9c85ab2f5f06..eb5f10a5c1a1 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -22,6 +22,7 @@
 import java.util.List;
 import java.util.concurrent.TimeUnit;
 
+import com.codahale.metrics.Counter;
 import com.codahale.metrics.MetricSet;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -159,4 +160,8 @@ public void close() {
     }
     bootstrap = null;
   }
+
+  public Counter getRegisteredConnections() {
+    return context.getRegisteredConnections();
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
index 098fa7974b87..788a845c5775 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -29,6 +29,7 @@
 import com.codahale.metrics.Metric;
 import com.codahale.metrics.MetricSet;
 import com.codahale.metrics.Timer;
+import com.codahale.metrics.Counter;
 import com.google.common.annotations.VisibleForTesting;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -173,7 +174,8 @@ private void checkAuth(TransportClient client, String appId) {
   /**
    * A simple class to wrap all shuffle service wrapper metrics
    */
-  private class ShuffleMetrics implements MetricSet {
+  @VisibleForTesting
+  public class ShuffleMetrics implements MetricSet {
     private final Map<String, Metric> allMetrics;
     // Time latency for open block request in ms
     private final Timer openBlockRequestLatencyMillis = new Timer();
@@ -181,14 +183,20 @@ private class ShuffleMetrics implements MetricSet {
     private final Timer registerExecutorRequestLatencyMillis = new Timer();
     // Block transfer rate in byte per second
     private final Meter blockTransferRateBytes = new Meter();
+    // Number of active connections to the shuffle service
+    private Counter activeConnections = new Counter();
+    // Number of registered connections to the shuffle service
+    private Counter registeredConnections = new Counter();
 
-    private ShuffleMetrics() {
+    public ShuffleMetrics() {
       allMetrics = new HashMap<>();
       allMetrics.put("openBlockRequestLatencyMillis", openBlockRequestLatencyMillis);
       allMetrics.put("registerExecutorRequestLatencyMillis", registerExecutorRequestLatencyMillis);
       allMetrics.put("blockTransferRateBytes", blockTransferRateBytes);
       allMetrics.put("registeredExecutorsSize",
                      (Gauge<Integer>) () -> blockManager.getRegisteredExecutorsSize());
+      allMetrics.put("numActiveConnections", activeConnections);
+      allMetrics.put("numRegisteredConnections", registeredConnections);
     }
 
     @Override
@@ -244,4 +252,16 @@ public ManagedBuffer next() {
     }
   }
 
+  @Override
+  public void channelActive(TransportClient client) {
+    metrics.activeConnections.inc();
+    super.channelActive(client);
+  }
+
+  @Override
+  public void channelInactive(TransportClient client) {
+    metrics.activeConnections.dec();
+    super.channelInactive(client);
+  }
+
 }
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index 72ae1a129523..7e8d3b2bc3ba 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -170,15 +170,6 @@ protected void serviceInit(Configuration conf) throws Exception {
       TransportConf transportConf = new TransportConf("shuffle", new HadoopConfigProvider(conf));
       blockHandler = new ExternalShuffleBlockHandler(transportConf, registeredExecutorFile);
 
-      // register metrics on the block handler into the Node Manager's metrics system.
-      YarnShuffleServiceMetrics serviceMetrics =
-        new YarnShuffleServiceMetrics(blockHandler.getAllMetrics());
-
-      MetricsSystemImpl metricsSystem = (MetricsSystemImpl) DefaultMetricsSystem.instance();
-      metricsSystem.register(
-        "sparkShuffleService", "Metrics on the Spark Shuffle Service", serviceMetrics);
-      logger.info("Registered metrics with Hadoop's DefaultMetricsSystem");
-
       // If authentication is enabled, set up the shuffle server to use a
       // special RPC handler that filters out unauthenticated fetch requests
       List<TransportServerBootstrap> bootstraps = Lists.newArrayList();
@@ -199,6 +190,18 @@ protected void serviceInit(Configuration conf) throws Exception {
       port = shuffleServer.getPort();
       boundPort = port;
       String authEnabledString = authEnabled ? "enabled" : "not enabled";
+
+      // register metrics on the block handler into the Node Manager's metrics system.
+      blockHandler.getAllMetrics().getMetrics().put("numRegisteredConnections",
+          shuffleServer.getRegisteredConnections());
+      YarnShuffleServiceMetrics serviceMetrics =
+          new YarnShuffleServiceMetrics(blockHandler.getAllMetrics());
+
+      MetricsSystemImpl metricsSystem = (MetricsSystemImpl) DefaultMetricsSystem.instance();
+      metricsSystem.register(
+          "sparkShuffleService", "Metrics on the Spark Shuffle Service", serviceMetrics);
+      logger.info("Registered metrics with Hadoop's DefaultMetricsSystem");
+
       logger.info("Started YARN shuffle service for Spark on port {}. " +
         "Authentication is {}.  Registered executor file is {}", port, authEnabledString,
         registeredExecutorFile);
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleServiceMetrics.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleServiceMetrics.java
index 3e4d479b862b..501237407e9b 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleServiceMetrics.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleServiceMetrics.java
@@ -107,6 +107,11 @@ public static void collectMetric(
         throw new IllegalStateException(
                 "Not supported class type of metric[" + name + "] for value " + gaugeValue);
       }
+    } else if (metric instanceof Counter) {
+      Counter c = (Counter) metric;
+      long counterValue = c.getCount();
+      metricsRecordBuilder.addGauge(new ShuffleServiceMetricsInfo(name, "Number of " +
+          "connections to shuffle service " + name), counterValue);
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index f6b3c37f0fe7..03e3abb3ce56 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -84,6 +84,8 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
     server = transportContext.createServer(port, bootstraps.asJava)
 
     shuffleServiceSource.registerMetricSet(server.getAllMetrics)
+    blockHandler.getAllMetrics.getMetrics.put("numRegisteredConnections",
+        server.getRegisteredConnections)
     shuffleServiceSource.registerMetricSet(blockHandler.getAllMetrics)
     masterMetricsSystem.registerSource(shuffleServiceSource)
     masterMetricsSystem.start()
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
index 40b92282a3b8..952fd0b70bb7 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
@@ -38,7 +38,8 @@ class YarnShuffleServiceMetricsSuite extends SparkFunSuite with Matchers {
   test("metrics named as expected") {
     val allMetrics = Set(
       "openBlockRequestLatencyMillis", "registerExecutorRequestLatencyMillis",
-      "blockTransferRateBytes", "registeredExecutorsSize")
+      "blockTransferRateBytes", "registeredExecutorsSize", "numActiveConnections",
+      "numRegisteredConnections")
 
     metrics.getMetrics.keySet().asScala should be (allMetrics)
   }

From bba506f8f454c7a8fa82e93a1728e02428fe0d35 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 22 Dec 2018 10:16:27 +0800
Subject: [PATCH 0180/1072] [SPARK-26216][SQL][FOLLOWUP] use abstract class
 instead of trait for UserDefinedFunction

## What changes were proposed in this pull request?

A followup of https://github.com/apache/spark/pull/23178 , to keep binary compability by using abstract class.

## How was this patch tested?

Manual test. I created a simple app with Spark 2.4
```
object TryUDF {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("test").master("local[*]").getOrCreate()
    import spark.implicits._
    val f1 = udf((i: Int) => i + 1)
    println(f1.deterministic)
    spark.range(10).select(f1.asNonNullable().apply($"id")).show()
    spark.stop()
  }
}
```

When I run it with current master, it fails with
```
java.lang.IncompatibleClassChangeError: Found interface org.apache.spark.sql.expressions.UserDefinedFunction, but class was expected
```

When I run it with this PR, it works

Closes #23351 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |  2 --
 project/MimaExcludes.scala                    | 28 ++++++++++++++++++-
 .../sql/expressions/UserDefinedFunction.scala |  2 +-
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 115fc6516fb4..1bd3b5ad0e1a 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -33,8 +33,6 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
 
-  - Spark applications which are built with Spark version 2.4 and prior, and call methods of `UserDefinedFunction`, need to be re-compiled with Spark 3.0, as they are not binary compatible with Spark 3.0.
-
   - Since Spark 3.0, CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpuse with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
 
   - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 7bb70a29195d..89fc53ce3972 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -241,7 +241,33 @@ object MimaExcludes {
 
     // [SPARK-26216][SQL] Do not use case class as public API (UserDefinedFunction)
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction$"),
-    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.sql.expressions.UserDefinedFunction")
+    ProblemFilters.exclude[AbstractClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.inputTypes"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.nullableTypes_="),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.dataType"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.f"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.this"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.asNonNullable"),
+    ProblemFilters.exclude[ReversedAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.asNonNullable"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.nullable"),
+    ProblemFilters.exclude[ReversedAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.nullable"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.asNondeterministic"),
+    ProblemFilters.exclude[ReversedAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.asNondeterministic"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.deterministic"),
+    ProblemFilters.exclude[ReversedAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.deterministic"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.apply"),
+    ProblemFilters.exclude[ReversedAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.apply"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.withName"),
+    ProblemFilters.exclude[ReversedAbstractMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.withName"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.productElement"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.productArity"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.copy$default$2"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.canEqual"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.copy"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.copy$default$1"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.productIterator"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.productPrefix"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.copy$default$3")
   )
 
   // Exclude rules for 2.4.x
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index f88e0e0f299d..901472d8e036 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.types.DataType
  * @since 1.3.0
  */
 @Stable
-sealed trait UserDefinedFunction {
+sealed abstract class UserDefinedFunction {
 
   /**
    * Returns true when the UDF can return a nullable value.

From 81addaa6b7b6f16e477f8dbb26a5d5e9541131b0 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 22 Dec 2018 00:41:21 -0800
Subject: [PATCH 0181/1072] [SPARK-26427][BUILD] Upgrade Apache ORC to 1.5.4

## What changes were proposed in this pull request?

This PR aims to update Apache ORC dependency to the latest version 1.5.4 released at Dec. 20. ([Release Notes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12318320&version=12344187]))
```
[ORC-237] OrcFile.mergeFiles Specified block size is less than configured minimum value
[ORC-409] Changes for extending MemoryManagerImpl
[ORC-410] Fix a locale-dependent test in TestCsvReader
[ORC-416] Avoid opening data reader when there is no stripe
[ORC-417] Use dynamic Apache Maven mirror link
[ORC-419] Ensure to call `close` at RecordReaderImpl constructor exception
[ORC-432] openjdk 8 has a bug that prevents surefire from working
[ORC-435] Ability to read stripes that are greater than 2GB
[ORC-437] Make acid schema checks case insensitive
[ORC-411] Update build to work with Java 10.
[ORC-418] Fix broken docker build script
```

## How was this patch tested?

Build and pass Jenkins.

Closes #23364 from dongjoon-hyun/SPARK-26427.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7 | 6 +++---
 dev/deps/spark-deps-hadoop-3.1 | 6 +++---
 pom.xml                        | 6 +++++-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 71423af0789c..1af29fcaff2a 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -155,9 +155,9 @@ objenesis-2.5.1.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar
-orc-core-1.5.3-nohive.jar
-orc-mapreduce-1.5.3-nohive.jar
-orc-shims-1.5.3.jar
+orc-core-1.5.4-nohive.jar
+orc-mapreduce-1.5.4-nohive.jar
+orc-shims-1.5.4.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.1.jar
 paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 93eafef04533..05f180b17a58 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -172,9 +172,9 @@ okhttp-2.7.5.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar
-orc-core-1.5.3-nohive.jar
-orc-mapreduce-1.5.3-nohive.jar
-orc-shims-1.5.3.jar
+orc-core-1.5.4-nohive.jar
+orc-mapreduce-1.5.4-nohive.jar
+orc-shims-1.5.4.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.1.jar
 paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index 310d7de95512..de9421419edc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -132,7 +132,7 @@
     <kafka.version>2.1.0</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.0</parquet.version>
-    <orc.version>1.5.3</orc.version>
+    <orc.version>1.5.4</orc.version>
     <orc.classifier>nohive</orc.classifier>
     <hive.parquet.version>1.6.0</hive.parquet.version>
     <jetty.version>9.4.12.v20180830</jetty.version>
@@ -1740,6 +1740,10 @@
         <classifier>${orc.classifier}</classifier>
         <scope>${orc.deps.scope}</scope>
         <exclusions>
+          <exclusion>
+            <groupId>javax.xml.bind</groupId>
+            <artifactId>jaxb-api</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-common</artifactId>

From ceff0c8450a4f2e31ec52dfc4d101f67c67853c5 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 22 Dec 2018 00:43:59 -0800
Subject: [PATCH 0182/1072] [SPARK-26428][SS][TEST] Minimize deprecated
 `ProcessingTime` usage

## What changes were proposed in this pull request?

Use of `ProcessingTime` class was deprecated in favor of `Trigger.ProcessingTime` in Spark 2.2. And, [SPARK-21464](https://issues.apache.org/jira/browse/SPARK-21464) minimized it at 2.2.1. Recently, it grows again in test suites. This PR aims to clean up newly introduced deprecation warnings for Spark 3.0.

## How was this patch tested?

Pass the Jenkins with existing tests and manually check the warnings.

Closes #23367 from dongjoon-hyun/SPARK-26428.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../kafka010/KafkaMicroBatchSourceSuite.scala    | 16 ++++++++--------
 .../sql/streaming/FileStreamSourceSuite.scala    |  2 +-
 .../apache/spark/sql/streaming/StreamSuite.scala |  4 ++--
 .../streaming/StreamingQueryListenerSuite.scala  |  6 +++---
 .../sql/streaming/StreamingQuerySuite.scala      |  4 ++--
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 61cbb3285a4f..d4eb52654005 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.sources.v2.DataSourceOptions
-import org.apache.spark.sql.streaming.{ProcessingTime, StreamTest}
+import org.apache.spark.sql.streaming.{StreamTest, Trigger}
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSQLContext
 
@@ -236,7 +236,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     }
 
     testStream(mapped)(
-      StartStream(ProcessingTime(100), clock),
+      StartStream(Trigger.ProcessingTime(100), clock),
       waitUntilBatchProcessed,
       // 1 from smallest, 1 from middle, 8 from biggest
       CheckAnswer(1, 10, 100, 101, 102, 103, 104, 105, 106, 107),
@@ -247,7 +247,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
         11, 108, 109, 110, 111, 112, 113, 114, 115, 116
       ),
       StopStream,
-      StartStream(ProcessingTime(100), clock),
+      StartStream(Trigger.ProcessingTime(100), clock),
       waitUntilBatchProcessed,
       // smallest now empty, 1 more from middle, 9 more from biggest
       CheckAnswer(1, 10, 100, 101, 102, 103, 104, 105, 106, 107,
@@ -282,7 +282,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
 
     val mapped = kafka.map(kv => kv._2.toInt + 1)
     testStream(mapped)(
-      StartStream(trigger = ProcessingTime(1)),
+      StartStream(trigger = Trigger.ProcessingTime(1)),
       makeSureGetOffsetCalled,
       AddKafkaData(Set(topic), 1, 2, 3),
       CheckAnswer(2, 3, 4),
@@ -605,7 +605,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     }
 
     testStream(kafka)(
-      StartStream(ProcessingTime(100), clock),
+      StartStream(Trigger.ProcessingTime(100), clock),
       waitUntilBatchProcessed,
       // 5 from smaller topic, 5 from bigger one
       CheckLastBatch((0 to 4) ++ (100 to 104): _*),
@@ -618,7 +618,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
       // smaller topic empty, 5 from bigger one
       CheckLastBatch(110 to 114: _*),
       StopStream,
-      StartStream(ProcessingTime(100), clock),
+      StartStream(Trigger.ProcessingTime(100), clock),
       waitUntilBatchProcessed,
       // smallest now empty, 5 from bigger one
       CheckLastBatch(115 to 119: _*),
@@ -727,7 +727,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     // The message values are the same as their offsets to make the test easy to follow
     testUtils.withTranscationalProducer { producer =>
       testStream(mapped)(
-        StartStream(ProcessingTime(100), clock),
+        StartStream(Trigger.ProcessingTime(100), clock),
         waitUntilBatchProcessed,
         CheckAnswer(),
         WithOffsetSync(topicPartition, expectedOffset = 5) { () =>
@@ -850,7 +850,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     // The message values are the same as their offsets to make the test easy to follow
     testUtils.withTranscationalProducer { producer =>
       testStream(mapped)(
-        StartStream(ProcessingTime(100), clock),
+        StartStream(Trigger.ProcessingTime(100), clock),
         waitUntilBatchProcessed,
         CheckNewAnswer(),
         WithOffsetSync(topicPartition, expectedOffset = 5) { () =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index d4bd9c7987f2..de664cafed3b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -1360,7 +1360,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       options = srcOptions)
     val clock = new StreamManualClock()
     testStream(fileStream)(
-      StartStream(trigger = ProcessingTime(10), triggerClock = clock),
+      StartStream(trigger = Trigger.ProcessingTime(10), triggerClock = clock),
       AssertOnQuery { _ =>
         // Block until the first batch finishes.
         eventually(timeout(streamingTimeout)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index f55ddb5419d2..55fdcee83f11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -312,7 +312,7 @@ class StreamSuite extends StreamTest {
 
     val inputData = MemoryStream[Int]
     testStream(inputData.toDS())(
-      StartStream(ProcessingTime("10 seconds"), new StreamManualClock),
+      StartStream(Trigger.ProcessingTime("10 seconds"), new StreamManualClock),
 
       /* -- batch 0 ----------------------- */
       // Add some data in batch 0
@@ -353,7 +353,7 @@ class StreamSuite extends StreamTest {
 
       /* Stop then restart the Stream  */
       StopStream,
-      StartStream(ProcessingTime("10 seconds"), new StreamManualClock(60 * 1000)),
+      StartStream(Trigger.ProcessingTime("10 seconds"), new StreamManualClock(60 * 1000)),
 
       /* -- batch 1 no rerun ----------------- */
       // batch 1 would not re-run because the latest batch id logged in commit log is 1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index fe77a1b4469c..d00f2e3bf4d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -82,7 +82,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
       testStream(df, OutputMode.Append)(
 
         // Start event generated when query started
-        StartStream(ProcessingTime(100), triggerClock = clock),
+        StartStream(Trigger.ProcessingTime(100), triggerClock = clock),
         AssertOnQuery { query =>
           assert(listener.startEvent !== null)
           assert(listener.startEvent.id === query.id)
@@ -124,7 +124,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         },
 
         // Termination event generated with exception message when stopped with error
-        StartStream(ProcessingTime(100), triggerClock = clock),
+        StartStream(Trigger.ProcessingTime(100), triggerClock = clock),
         AssertStreamExecThreadToWaitForClock(),
         AddData(inputData, 0),
         AdvanceManualClock(100), // process bad data
@@ -306,7 +306,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         }
         val clock = new StreamManualClock()
         val actions = mutable.ArrayBuffer[StreamAction]()
-        actions += StartStream(trigger = ProcessingTime(10), triggerClock = clock)
+        actions += StartStream(trigger = Trigger.ProcessingTime(10), triggerClock = clock)
         for (_ <- 1 to 100) {
           actions += AdvanceManualClock(10)
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index c170641372d6..29b816486a1f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -257,7 +257,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     var lastProgressBeforeStop: StreamingQueryProgress = null
 
     testStream(mapped, OutputMode.Complete)(
-      StartStream(ProcessingTime(1000), triggerClock = clock),
+      StartStream(Trigger.ProcessingTime(1000), triggerClock = clock),
       AssertStreamExecThreadIsWaitingForTime(1000),
       AssertOnQuery(_.status.isDataAvailable === false),
       AssertOnQuery(_.status.isTriggerActive === false),
@@ -370,7 +370,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       AssertOnQuery(_.status.message === "Stopped"),
 
       // Test status and progress after query terminated with error
-      StartStream(ProcessingTime(1000), triggerClock = clock),
+      StartStream(Trigger.ProcessingTime(1000), triggerClock = clock),
       AdvanceManualClock(1000), // ensure initial trigger completes before AddData
       AddData(inputData, 0),
       AdvanceManualClock(1000), // allow another trigger

From c7bfb4cf832d5b20527df6e19855b6a7436988a9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 22 Dec 2018 00:46:36 -0800
Subject: [PATCH 0183/1072] [SPARK-26430][BUILD][TEST-MAVEN] Upgrade Surefire
 plugin to 3.0.0-M2

## What changes were proposed in this pull request?

This PR aims to upgrade Maven Surefile plugin for JDK11 support. 3.0.0-M2 is [released Dec. 9th.](https://issues.apache.org/jira/projects/SUREFIRE/versions/12344396)
```
[SUREFIRE-1568] Versions 2.21 and higher doesn't work with junit-platform for Java 9 module
[SUREFIRE-1605] NoClassDefFoundError (RunNotifier) with JDK 11
[SUREFIRE-1600] Surefire Project using surefire:2.12.4 is not fully able to work with JDK 10+ on internal build system. Therefore surefire-shadefire should go with Surefire:3.0.0-M2.
[SUREFIRE-1593] 3.0.0-M1 produces invalid code sources on Windows
[SUREFIRE-1602] Surefire fails loading class ForkedBooter when using a sub-directory pom file and a local maven repo
[SUREFIRE-1606] maven-shared-utils must not be on provider's classpath
[SUREFIRE-1531] Option to switch-off Java 9 modules
[SUREFIRE-1590] Deploy multiple versions of Report XSD
[SUREFIRE-1591] Java 1.7 feature Diamonds replaced Generics
[SUREFIRE-1594] Java 1.7 feature try-catch - multiple exceptions in one catch
[SUREFIRE-1595] Java 1.7 feature System.lineSeparator()
[SUREFIRE-1597] ModularClasspathForkConfiguration with debug logs (args file and its path on file system)
[SUREFIRE-1596] Unnecessary check JAVA_RECENT == JAVA_1_7 in unit tests
[SUREFIRE-1598] Fixed typo in assertion statement in integration test Surefire855AllowFailsafeUseArtifactFileIT
[SUREFIRE-1607] Roadmap on Project Site
```

## How was this patch tested?

Pass the Jenkins.

Closes #23370 from dongjoon-hyun/SPARK-26430.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index de9421419edc..321de209a56a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2103,7 +2103,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>3.0.0-M1</version>
+          <version>3.0.0-M2</version>
           <!-- Note config is repeated in scalatest config -->
           <configuration>
             <includes>

From 0a02d5c36fc5035abcfb930e1a229d65c6cf683f Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@yahoo-inc.com>
Date: Sat, 22 Dec 2018 09:03:02 -0600
Subject: [PATCH 0184/1072] =?UTF-8?q?[SPARK-26285][CORE]=20accumulator=20m?=
 =?UTF-8?q?etrics=20sources=20for=20LongAccumulator=20and=20Doub=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…leAccumulator

## What changes were proposed in this pull request?

This PR implements metric sources for LongAccumulator and DoubleAccumulator, such that a user can register these accumulators easily and have their values be reported by the driver's metric namespace.

## How was this patch tested?

Unit tests, and manual tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23242 from abellina/SPARK-26285_accumulator_source.

Lead-authored-by: Alessandro Bellina <abellina@yahoo-inc.com>
Co-authored-by: Alessandro Bellina <abellina@oath.com>
Co-authored-by: Alessandro Bellina <abellina@gmail.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../metrics/source/AccumulatorSource.scala    | 89 ++++++++++++++++++
 .../source/AccumulatorSourceSuite.scala       | 91 +++++++++++++++++++
 .../examples/AccumulatorMetricsTest.scala     | 77 ++++++++++++++++
 3 files changed, 257 insertions(+)
 create mode 100644 core/src/main/scala/org/apache/spark/metrics/source/AccumulatorSource.scala
 create mode 100644 core/src/test/scala/org/apache/spark/metrics/source/AccumulatorSourceSuite.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala

diff --git a/core/src/main/scala/org/apache/spark/metrics/source/AccumulatorSource.scala b/core/src/main/scala/org/apache/spark/metrics/source/AccumulatorSource.scala
new file mode 100644
index 000000000000..45a4d224d45f
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/source/AccumulatorSource.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.source
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.util.{AccumulatorV2, DoubleAccumulator, LongAccumulator}
+
+/**
+ * AccumulatorSource is a Spark metric Source that reports the current value
+ * of the accumulator as a gauge.
+ *
+ * It is restricted to the LongAccumulator and the DoubleAccumulator, as those
+ * are the current built-in numerical accumulators with Spark, and excludes
+ * the CollectionAccumulator, as that is a List of values (hard to report,
+ * to a metrics system)
+ */
+private[spark] class AccumulatorSource extends Source {
+  private val registry = new MetricRegistry
+  protected def register[T](accumulators: Map[String, AccumulatorV2[_, T]]): Unit = {
+    accumulators.foreach {
+      case (name, accumulator) =>
+        val gauge = new Gauge[T] {
+          override def getValue: T = accumulator.value
+        }
+        registry.register(MetricRegistry.name(name), gauge)
+    }
+  }
+
+  override def sourceName: String = "AccumulatorSource"
+  override def metricRegistry: MetricRegistry = registry
+}
+
+@Experimental
+class LongAccumulatorSource extends AccumulatorSource
+
+@Experimental
+class DoubleAccumulatorSource extends AccumulatorSource
+
+/**
+ * :: Experimental ::
+ * Metrics source specifically for LongAccumulators. Accumulators
+ * are only valid on the driver side, so these metrics are reported
+ * only by the driver.
+ * Register LongAccumulators using:
+ *    LongAccumulatorSource.register(sc, {"name" -> longAccumulator})
+ */
+@Experimental
+object LongAccumulatorSource {
+  def register(sc: SparkContext, accumulators: Map[String, LongAccumulator]): Unit = {
+    val source = new LongAccumulatorSource
+    source.register(accumulators)
+    sc.env.metricsSystem.registerSource(source)
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Metrics source specifically for DoubleAccumulators. Accumulators
+ * are only valid on the driver side, so these metrics are reported
+ * only by the driver.
+ * Register DoubleAccumulators using:
+ *    DoubleAccumulatorSource.register(sc, {"name" -> doubleAccumulator})
+ */
+@Experimental
+object DoubleAccumulatorSource {
+  def register(sc: SparkContext, accumulators: Map[String, DoubleAccumulator]): Unit = {
+    val source = new DoubleAccumulatorSource
+    source.register(accumulators)
+    sc.env.metricsSystem.registerSource(source)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/metrics/source/AccumulatorSourceSuite.scala b/core/src/test/scala/org/apache/spark/metrics/source/AccumulatorSourceSuite.scala
new file mode 100644
index 000000000000..6a6c07cb068c
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/metrics/source/AccumulatorSourceSuite.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.source
+
+import com.codahale.metrics.MetricRegistry
+import org.mockito.ArgumentCaptor
+import org.mockito.Mockito.{mock, never, spy, times, verify, when}
+
+import org.apache.spark.{SparkContext, SparkEnv, SparkFunSuite}
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.util.{DoubleAccumulator, LongAccumulator}
+
+class AccumulatorSourceSuite extends SparkFunSuite {
+  test("that that accumulators register against the metric system's register") {
+    val acc1 = new LongAccumulator()
+    val acc2 = new LongAccumulator()
+    val mockContext = mock(classOf[SparkContext])
+    val mockEnvironment = mock(classOf[SparkEnv])
+    val mockMetricSystem = mock(classOf[MetricsSystem])
+    when(mockEnvironment.metricsSystem) thenReturn (mockMetricSystem)
+    when(mockContext.env) thenReturn (mockEnvironment)
+    val accs = Map("my-accumulator-1" -> acc1,
+                   "my-accumulator-2" -> acc2)
+    LongAccumulatorSource.register(mockContext, accs)
+    val captor = new ArgumentCaptor[AccumulatorSource]()
+    verify(mockMetricSystem, times(1)).registerSource(captor.capture())
+    val source = captor.getValue()
+    val gauges = source.metricRegistry.getGauges()
+    assert (gauges.size == 2)
+    assert (gauges.firstKey == "my-accumulator-1")
+    assert (gauges.lastKey == "my-accumulator-2")
+  }
+
+  test("the accumulators value property is checked when the gauge's value is requested") {
+    val acc1 = new LongAccumulator()
+    acc1.add(123)
+    val acc2 = new LongAccumulator()
+    acc2.add(456)
+    val mockContext = mock(classOf[SparkContext])
+    val mockEnvironment = mock(classOf[SparkEnv])
+    val mockMetricSystem = mock(classOf[MetricsSystem])
+    when(mockEnvironment.metricsSystem) thenReturn (mockMetricSystem)
+    when(mockContext.env) thenReturn (mockEnvironment)
+    val accs = Map("my-accumulator-1" -> acc1,
+                   "my-accumulator-2" -> acc2)
+    LongAccumulatorSource.register(mockContext, accs)
+    val captor = new ArgumentCaptor[AccumulatorSource]()
+    verify(mockMetricSystem, times(1)).registerSource(captor.capture())
+    val source = captor.getValue()
+    val gauges = source.metricRegistry.getGauges()
+    assert(gauges.get("my-accumulator-1").getValue() == 123)
+    assert(gauges.get("my-accumulator-2").getValue() == 456)
+  }
+
+  test("the double accumulators value propety is checked when the gauge's value is requested") {
+    val acc1 = new DoubleAccumulator()
+    acc1.add(123.123)
+    val acc2 = new DoubleAccumulator()
+    acc2.add(456.456)
+    val mockContext = mock(classOf[SparkContext])
+    val mockEnvironment = mock(classOf[SparkEnv])
+    val mockMetricSystem = mock(classOf[MetricsSystem])
+    when(mockEnvironment.metricsSystem) thenReturn (mockMetricSystem)
+    when(mockContext.env) thenReturn (mockEnvironment)
+    val accs = Map(
+      "my-accumulator-1" -> acc1,
+      "my-accumulator-2" -> acc2)
+    DoubleAccumulatorSource.register(mockContext, accs)
+    val captor = new ArgumentCaptor[AccumulatorSource]()
+    verify(mockMetricSystem, times(1)).registerSource(captor.capture())
+    val source = captor.getValue()
+    val gauges = source.metricRegistry.getGauges()
+    assert(gauges.get("my-accumulator-1").getValue() == 123.123)
+    assert(gauges.get("my-accumulator-2").getValue() == 456.456)
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala b/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala
new file mode 100644
index 000000000000..5d9a9a73f12e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import org.apache.spark.metrics.source.{DoubleAccumulatorSource, LongAccumulatorSource}
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Usage: AccumulatorMetricsTest [numElem]
+ *
+ * This example shows how to register accumulators against the accumulator source.
+ * A simple RDD is created, and during the map, the accumulators are incremented.
+ *
+ * The only argument, numElem, sets the number elements in the collection to parallize.
+ *
+ * The result is output to stdout in the driver with the values of the accumulators.
+ * For the long accumulator, it should equal numElem the double accumulator should be
+ * roughly 1.1 x numElem (within double precision.) This example also sets up a
+ * ConsoleSink (metrics) instance, and so registered codahale metrics (like the
+ * accumulator source) are reported to stdout as well.
+ */
+object AccumulatorMetricsTest {
+  def main(args: Array[String]) {
+
+    val spark = SparkSession
+      .builder()
+      .config("spark.metrics.conf.*.sink.console.class",
+              "org.apache.spark.metrics.sink.ConsoleSink")
+      .getOrCreate()
+
+    val sc = spark.sparkContext
+
+    val acc = sc.longAccumulator("my-long-metric")
+    // register the accumulator, the metric system will report as
+    // [spark.metrics.namespace].[execId|driver].AccumulatorSource.my-long-metric
+    LongAccumulatorSource.register(sc, List(("my-long-metric" -> acc)).toMap)
+
+    val acc2 = sc.doubleAccumulator("my-double-metric")
+    // register the accumulator, the metric system will report as
+    // [spark.metrics.namespace].[execId|driver].AccumulatorSource.my-double-metric
+    DoubleAccumulatorSource.register(sc, List(("my-double-metric" -> acc2)).toMap)
+
+    val num = if (args.length > 0) args(0).toInt else 1000000
+
+    val startTime = System.nanoTime
+
+    val accumulatorTest = sc.parallelize(1 to num).foreach(_ => {
+      acc.add(1)
+      acc2.add(1.1)
+    })
+
+    // Print a footer with test time and accumulator values
+    println("Test took %.0f milliseconds".format((System.nanoTime - startTime) / 1E6))
+    println("Accumulator values:")
+    println("*** Long accumulator (my-long-metric): " + acc.value)
+    println("*** Double accumulator (my-double-metric): " + acc2.value)
+
+    spark.stop()
+  }
+}
+// scalastyle:on println

From 90a810352e94c0b74c19324301e51e8f5bbe98dd Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan@gmail.com>
Date: Sat, 22 Dec 2018 10:32:32 -0600
Subject: [PATCH 0185/1072] [SPARK-25245][DOCS][SS] Explain regarding limiting
 modification on "spark.sql.shuffle.partitions" for structured streaming

## What changes were proposed in this pull request?

This patch adds explanation of `why "spark.sql.shuffle.partitions" keeps unchanged in structured streaming`, which couple of users already wondered and some of them even thought it as a bug.

This patch would help other end users to know about such behavior before they find by theirselves and being wondered.

## How was this patch tested?

No need to test because this is a simple addition on guide doc with markdown editor.

Closes #22238 from HeartSaVioR/SPARK-25245.

Lead-authored-by: Jungtaek Lim <kabhwan@gmail.com>
Co-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/structured-streaming-programming-guide.md         | 10 ++++++++++
 .../scala/org/apache/spark/sql/internal/SQLConf.scala  |  8 ++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 32d61dcdb459..e76b53dbb4dc 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -3113,6 +3113,16 @@ See [Input Sources](#input-sources) and [Output Sinks](#output-sinks) sections f
 
 # Additional Information
 
+**Notes**
+
+- Several configurations are not modifiable after the query has run. To change them, discard the checkpoint and start a new query. These configurations include:
+  - `spark.sql.shuffle.partitions`
+    - This is due to the physical partitioning of state: state is partitioned via applying hash function to key, hence the number of partitions for state should be unchanged.
+    - If you want to run fewer tasks for stateful operations, `coalesce` would help with avoiding unnecessary repartitioning.
+      - After `coalesce`, the number of (reduced) tasks will be kept unless another shuffle happens.
+  - `spark.sql.streaming.stateStore.providerClass`: To read the previous state of the query properly, the class of state store provider should be unchanged.
+  - `spark.sql.streaming.multipleWatermarkPolicy`: Modification of this would lead inconsistent watermark value when query contains multiple watermarks, hence the policy should be unchanged.
+
 **Further Reading**
 
 - See and run the
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 86e068bf632b..fe445e001935 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -263,7 +263,9 @@ object SQLConf {
       .createWithDefault(true)
 
   val SHUFFLE_PARTITIONS = buildConf("spark.sql.shuffle.partitions")
-    .doc("The default number of partitions to use when shuffling data for joins or aggregations.")
+    .doc("The default number of partitions to use when shuffling data for joins or aggregations. " +
+      "Note: For structured streaming, this configuration cannot be changed between query " +
+      "restarts from the same checkpoint location.")
     .intConf
     .createWithDefault(200)
 
@@ -882,7 +884,9 @@ object SQLConf {
       .internal()
       .doc(
         "The class used to manage state data in stateful streaming queries. This class must " +
-          "be a subclass of StateStoreProvider, and must have a zero-arg constructor.")
+          "be a subclass of StateStoreProvider, and must have a zero-arg constructor. " +
+          "Note: For structured streaming, this configuration cannot be changed between query " +
+          "restarts from the same checkpoint location.")
       .stringConf
       .createWithDefault(
         "org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider")

From a5a24d92bdf6e6a8e33bdc8833bedba033576b4c Mon Sep 17 00:00:00 2001
From: DB Tsai <d_tsai@apple.com>
Date: Sat, 22 Dec 2018 10:35:14 -0800
Subject: [PATCH 0186/1072] [SPARK-26402][SQL] Accessing nested fields with
 different cases in case insensitive mode

## What changes were proposed in this pull request?

GetStructField with different optional names should be semantically equal. We will use this as building block to compare the nested fields used in the plans to be optimized by catalyst optimizer.

This PR also fixes a bug below that accessing nested fields with different cases in case insensitive mode will result `AnalysisException`.

```
sql("create table t (s struct<i: Int>) using json")
sql("select s.I from t group by s.i")
```
which is currently failing
```
org.apache.spark.sql.AnalysisException: expression 'default.t.`s`' is neither present in the group by, nor is it an aggregate function
```
as cloud-fan pointed out.

## How was this patch tested?

New tests are added.

Closes #23353 from dbtsai/nestedEqual.

Lead-authored-by: DB Tsai <d_tsai@apple.com>
Co-authored-by: DB Tsai <dbtsai@dbtsai.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../catalyst/expressions/Canonicalize.scala   |  4 ++-
 .../expressions/CanonicalizeSuite.scala       | 29 ++++++++++++++++++
 .../BinaryComparisonSimplificationSuite.scala | 30 +++++++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 19 ++++++++++++
 4 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index fe6db8b344d3..4d218b936b3a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -26,6 +26,7 @@ package org.apache.spark.sql.catalyst.expressions
  *
  * The following rules are applied:
  *  - Names and nullability hints for [[org.apache.spark.sql.types.DataType]]s are stripped.
+ *  - Names for [[GetStructField]] are stripped.
  *  - Commutative and associative operations ([[Add]] and [[Multiply]]) have their children ordered
  *    by `hashCode`.
  *  - [[EqualTo]] and [[EqualNullSafe]] are reordered by `hashCode`.
@@ -37,10 +38,11 @@ object Canonicalize {
     expressionReorder(ignoreNamesTypes(e))
   }
 
-  /** Remove names and nullability from types. */
+  /** Remove names and nullability from types, and names from `GetStructField`. */
   private[expressions] def ignoreNamesTypes(e: Expression): Expression = e match {
     case a: AttributeReference =>
       AttributeReference("none", a.dataType.asNullable)(exprId = a.exprId)
+    case GetStructField(child, ordinal, Some(_)) => GetStructField(child, ordinal, None)
     case _ => e
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index 28e6940f3cca..9802a6e5891b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.logical.Range
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
 class CanonicalizeSuite extends SparkFunSuite {
 
@@ -50,4 +51,32 @@ class CanonicalizeSuite extends SparkFunSuite {
     assert(range.where(arrays1).sameResult(range.where(arrays2)))
     assert(!range.where(arrays1).sameResult(range.where(arrays3)))
   }
+
+  test("SPARK-26402: accessing nested fields with different cases in case insensitive mode") {
+    val expId = NamedExpression.newExprId
+    val qualifier = Seq.empty[String]
+    val structType = StructType(
+      StructField("a", StructType(StructField("b", IntegerType, false) :: Nil), false) :: Nil)
+
+    // GetStructField with different names are semantically equal
+    val fieldA1 = GetStructField(
+      AttributeReference("data1", structType, false)(expId, qualifier),
+      0, Some("a1"))
+    val fieldA2 = GetStructField(
+      AttributeReference("data2", structType, false)(expId, qualifier),
+      0, Some("a2"))
+    assert(fieldA1.semanticEquals(fieldA2))
+
+    val fieldB1 = GetStructField(
+      GetStructField(
+        AttributeReference("data1", structType, false)(expId, qualifier),
+        0, Some("a1")),
+      0, Some("b1"))
+    val fieldB2 = GetStructField(
+      GetStructField(
+        AttributeReference("data2", structType, false)(expId, qualifier),
+        0, Some("a2")),
+      0, Some("b2"))
+    assert(fieldB1.semanticEquals(fieldB2))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
index a313681eeb8f..5794691a365a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLite
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
 class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper {
 
@@ -92,4 +93,33 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
     val correctAnswer = nonNullableRelation.analyze
     comparePlans(actual, correctAnswer)
   }
+
+  test("SPARK-26402: accessing nested fields with different cases in case insensitive mode") {
+    val expId = NamedExpression.newExprId
+    val qualifier = Seq.empty[String]
+    val structType = StructType(
+      StructField("a", StructType(StructField("b", IntegerType, false) :: Nil), false) :: Nil)
+
+    val fieldA1 = GetStructField(
+      GetStructField(
+        AttributeReference("data1", structType, false)(expId, qualifier),
+        0, Some("a1")),
+      0, Some("b1"))
+    val fieldA2 = GetStructField(
+      GetStructField(
+        AttributeReference("data2", structType, false)(expId, qualifier),
+        0, Some("a2")),
+      0, Some("b2"))
+
+    // GetStructField with different names are semantically equal; thus, `EqualTo(fieldA1, fieldA2)`
+    // will be optimized to `TrueLiteral` by `SimplifyBinaryComparison`.
+    val originalQuery = nonNullableRelation
+        .where(EqualTo(fieldA1, fieldA2))
+        .analyze
+
+    val optimized = Optimize.execute(originalQuery)
+    val correctAnswer = nonNullableRelation.analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 37a8815350a5..656da9fa0180 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2937,6 +2937,25 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-26402: accessing nested fields with different cases in case insensitive mode") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val msg = intercept[AnalysisException] {
+        withTable("t") {
+          sql("create table t (s struct<i: Int>) using json")
+          checkAnswer(sql("select s.I from t group by s.i"), Nil)
+        }
+      }.message
+      assert(msg.contains("No such struct field I in i"))
+    }
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      withTable("t") {
+        sql("create table t (s struct<i: Int>) using json")
+        checkAnswer(sql("select s.I from t group by s.i"), Nil)
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From 1008ab0801c192e8f261001eaaf58a6c9f6e747a Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 24 Dec 2018 10:47:47 +0800
Subject: [PATCH 0187/1072] [SPARK-26178][SPARK-26243][SQL][FOLLOWUP] Replacing
 SimpleDateFormat by DateTimeFormatter in comments

## What changes were proposed in this pull request?

The PRs #23150 and #23196 switched JSON and CSV datasources on new formatter for dates/timestamps which is based on `DateTimeFormatter`. In this PR, I replaced `SimpleDateFormat` by `DateTimeFormatter` to reflect the changes.

Closes #23374 from MaxGekk/java-time-docs.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/readwriter.py              | 28 +++++++++++--------
 python/pyspark/sql/streaming.py               | 14 ++++++----
 .../apache/spark/sql/DataFrameReader.scala    | 12 ++++----
 .../apache/spark/sql/DataFrameWriter.scala    | 12 ++++----
 .../sql/streaming/DataStreamReader.scala      | 12 ++++----
 5 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 7b10512a4329..3da052391a95 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -226,11 +226,12 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
                                           it uses the value specified in
                                           ``spark.sql.columnNameOfCorruptRecord``.
         :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           follow the formats at ``java.time.format.DateTimeFormatter``. This
                            applies to date type. If None is set, it uses the
                            default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
-                                formats follow the formats at ``java.text.SimpleDateFormat``.
+        :param timestampFormat: sets the string that indicates a timestamp format.
+                                Custom date formats follow the formats at
+                                ``java.time.format.DateTimeFormatter``.
                                 This applies to timestamp type. If None is set, it uses the
                                 default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
         :param multiLine: parse one record, which may span multiple lines, per file. If None is
@@ -406,11 +407,12 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param negativeInf: sets the string representation of a negative infinity value. If None
                             is set, it uses the default value, ``Inf``.
         :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           follow the formats at ``java.time.format.DateTimeFormatter``. This
                            applies to date type. If None is set, it uses the
                            default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
-                                formats follow the formats at ``java.text.SimpleDateFormat``.
+        :param timestampFormat: sets the string that indicates a timestamp format.
+                                Custom date formats follow the formats at
+                                ``java.time.format.DateTimeFormatter``.
                                 This applies to timestamp type. If None is set, it uses the
                                 default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
         :param maxColumns: defines a hard limit of how many columns a record can have. If None is
@@ -803,11 +805,12 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm
                             known case-insensitive shorten names (none, bzip2, gzip, lz4,
                             snappy and deflate).
         :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           follow the formats at ``java.time.format.DateTimeFormatter``. This
                            applies to date type. If None is set, it uses the
                            default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
-                                formats follow the formats at ``java.text.SimpleDateFormat``.
+        :param timestampFormat: sets the string that indicates a timestamp format.
+                                Custom date formats follow the formats at
+                                ``java.time.format.DateTimeFormatter``.
                                 This applies to timestamp type. If None is set, it uses the
                                 default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
         :param encoding: specifies encoding (charset) of saved json files. If None is set,
@@ -904,11 +907,12 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
         :param nullValue: sets the string representation of a null value. If None is set, it uses
                           the default value, empty string.
         :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           follow the formats at ``java.time.format.DateTimeFormatter``. This
                            applies to date type. If None is set, it uses the
                            default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
-                                formats follow the formats at ``java.text.SimpleDateFormat``.
+        :param timestampFormat: sets the string that indicates a timestamp format.
+                                Custom date formats follow the formats at
+                                ``java.time.format.DateTimeFormatter``.
                                 This applies to timestamp type. If None is set, it uses the
                                 default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
         :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index fc23b9d99c34..b981fdc4edc7 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -456,11 +456,12 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
                                           it uses the value specified in
                                           ``spark.sql.columnNameOfCorruptRecord``.
         :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           follow the formats at ``java.time.format.DateTimeFormatter``. This
                            applies to date type. If None is set, it uses the
                            default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
-                                formats follow the formats at ``java.text.SimpleDateFormat``.
+        :param timestampFormat: sets the string that indicates a timestamp format.
+                                Custom date formats follow the formats at
+                                ``java.time.format.DateTimeFormatter``.
                                 This applies to timestamp type. If None is set, it uses the
                                 default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
         :param multiLine: parse one record, which may span multiple lines, per file. If None is
@@ -630,11 +631,12 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param negativeInf: sets the string representation of a negative infinity value. If None
                             is set, it uses the default value, ``Inf``.
         :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           follow the formats at ``java.time.format.DateTimeFormatter``. This
                            applies to date type. If None is set, it uses the
                            default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
-                                formats follow the formats at ``java.text.SimpleDateFormat``.
+        :param timestampFormat: sets the string that indicates a timestamp format.
+                                Custom date formats follow the formats at
+                                ``java.time.format.DateTimeFormatter``.
                                 This applies to timestamp type. If None is set, it uses the
                                 default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
         :param maxColumns: defines a hard limit of how many columns a record can have. If None is
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 9751528654ff..ce8e4c8f5b82 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -375,11 +375,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string
    * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.</li>
    * <li>`dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format.
-   * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to
-   * date type.</li>
+   * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`.
+   * This applies to date type.</li>
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
-   * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
+   * `java.time.format.DateTimeFormatter`. This applies to timestamp type.</li>
    * <li>`multiLine` (default `false`): parse one record, which may span multiple lines,
    * per file</li>
    * <li>`encoding` (by default it is not set): allows to forcibly set one of standard basic
@@ -585,11 +585,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`negativeInf` (default `-Inf`): sets the string representation of a negative infinity
    * value.</li>
    * <li>`dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format.
-   * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to
-   * date type.</li>
+   * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`.
+   * This applies to date type.</li>
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
-   * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
+   * `java.time.format.DateTimeFormatter`. This applies to timestamp type.</li>
    * <li>`maxColumns` (default `20480`): defines a hard limit of how many columns
    * a record can have.</li>
    * <li>`maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index b9c4076994e9..981b3a8fd4ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -530,11 +530,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`,
    * `snappy` and `deflate`). </li>
    * <li>`dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format.
-   * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to
-   * date type.</li>
+   * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`.
+   * This applies to date type.</li>
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
-   * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
+   * `java.time.format.DateTimeFormatter`. This applies to timestamp type.</li>
    * <li>`encoding` (by default it is not set): specifies encoding (charset) of saved json
    * files. If it is not set, the UTF-8 charset will be used. </li>
    * <li>`lineSep` (default `\n`): defines the line separator that should be used for writing.</li>
@@ -649,11 +649,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`,
    * `snappy` and `deflate`). </li>
    * <li>`dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format.
-   * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to
-   * date type.</li>
+   * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`.
+   * This applies to date type.</li>
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
-   * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
+   * `java.time.format.DateTimeFormatter`. This applies to timestamp type.</li>
    * <li>`ignoreLeadingWhiteSpace` (default `true`): a flag indicating whether or not leading
    * whitespaces from values being written should be skipped.</li>
    * <li>`ignoreTrailingWhiteSpace` (default `true`): a flag indicating defines whether or not
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 914fa90ae7e1..98589da9552c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -286,11 +286,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string
    * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.</li>
    * <li>`dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format.
-   * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to
-   * date type.</li>
+   * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`.
+   * This applies to date type.</li>
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
-   * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
+   * `java.time.format.DateTimeFormatter`. This applies to timestamp type.</li>
    * <li>`multiLine` (default `false`): parse one record, which may span multiple lines,
    * per file</li>
    * <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
@@ -347,11 +347,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <li>`negativeInf` (default `-Inf`): sets the string representation of a negative infinity
    * value.</li>
    * <li>`dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format.
-   * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to
-   * date type.</li>
+   * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`.
+   * This applies to date type.</li>
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
-   * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
+   * `java.time.format.DateTimeFormatter`. This applies to timestamp type.</li>
    * <li>`maxColumns` (default `20480`): defines a hard limit of how many columns
    * a record can have.</li>
    * <li>`maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed

From 0523f5e378e69f406104fabaf3ebe913de976bdb Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sun, 23 Dec 2018 21:09:44 -0800
Subject: [PATCH 0188/1072] [SPARK-14023][CORE][SQL] Don't reference 'field' in
 StructField errors for clarity in exceptions

## What changes were proposed in this pull request?

Variation of https://github.com/apache/spark/pull/20500
I cheated by not referencing fields or columns at all as this exception propagates in contexts where both would be applicable.

## How was this patch tested?

Existing tests

Closes #23373 from srowen/SPARK-14023.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/types/StructType.scala | 17 +++++++----------
 .../spark/sql/types/StructTypeSuite.scala       |  8 ++++----
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 6e8bbde7787a..e01d7c59cac5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -28,7 +28,6 @@ import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser}
 import org.apache.spark.sql.catalyst.util.{quoteIdentifier, truncatedString}
-import org.apache.spark.util.Utils
 
 /**
  * A [[StructType]] object can be constructed by
@@ -57,7 +56,7 @@ import org.apache.spark.util.Utils
  *
  * // If this struct does not have a field called "d", it throws an exception.
  * struct("d")
- * // java.lang.IllegalArgumentException: Field "d" does not exist.
+ * // java.lang.IllegalArgumentException: d does not exist.
  * //   ...
  *
  * // Extract multiple StructFields. Field names are provided in a set.
@@ -69,7 +68,7 @@ import org.apache.spark.util.Utils
  * // Any names without matching fields will throw an exception.
  * // For the case shown below, an exception is thrown due to "d".
  * struct(Set("b", "c", "d"))
- * // java.lang.IllegalArgumentException: Field "d" does not exist.
+ * // java.lang.IllegalArgumentException: d does not exist.
  * //    ...
  * }}}
  *
@@ -272,22 +271,21 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   def apply(name: String): StructField = {
     nameToField.getOrElse(name,
       throw new IllegalArgumentException(
-        s"""Field "$name" does not exist.
-           |Available fields: ${fieldNames.mkString(", ")}""".stripMargin))
+        s"$name does not exist. Available: ${fieldNames.mkString(", ")}"))
   }
 
   /**
    * Returns a [[StructType]] containing [[StructField]]s of the given names, preserving the
    * original order of fields.
    *
-   * @throws IllegalArgumentException if a field cannot be found for any of the given names
+   * @throws IllegalArgumentException if at least one given field name does not exist
    */
   def apply(names: Set[String]): StructType = {
     val nonExistFields = names -- fieldNamesSet
     if (nonExistFields.nonEmpty) {
       throw new IllegalArgumentException(
-        s"""Nonexistent field(s): ${nonExistFields.mkString(", ")}.
-           |Available fields: ${fieldNames.mkString(", ")}""".stripMargin)
+        s"${nonExistFields.mkString(", ")} do(es) not exist. " +
+          s"Available: ${fieldNames.mkString(", ")}")
     }
     // Preserve the original order of fields.
     StructType(fields.filter(f => names.contains(f.name)))
@@ -301,8 +299,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   def fieldIndex(name: String): Int = {
     nameToIndex.getOrElse(name,
       throw new IllegalArgumentException(
-        s"""Field "$name" does not exist.
-           |Available fields: ${fieldNames.mkString(", ")}""".stripMargin))
+        s"$name does not exist. Available: ${fieldNames.mkString(", ")}"))
   }
 
   private[sql] def getFieldIndex(name: String): Option[Int] = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index 53a78c94aa6f..b4ce26be24de 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -22,21 +22,21 @@ import org.apache.spark.sql.types.StructType.fromDDL
 
 class StructTypeSuite extends SparkFunSuite {
 
-  val s = StructType.fromDDL("a INT, b STRING")
+  private val s = StructType.fromDDL("a INT, b STRING")
 
   test("lookup a single missing field should output existing fields") {
     val e = intercept[IllegalArgumentException](s("c")).getMessage
-    assert(e.contains("Available fields: a, b"))
+    assert(e.contains("Available: a, b"))
   }
 
   test("lookup a set of missing fields should output existing fields") {
     val e = intercept[IllegalArgumentException](s(Set("a", "c"))).getMessage
-    assert(e.contains("Available fields: a, b"))
+    assert(e.contains("Available: a, b"))
   }
 
   test("lookup fieldIndex for missing field should output existing fields") {
     val e = intercept[IllegalArgumentException](s.fieldIndex("c")).getMessage
-    assert(e.contains("Available fields: a, b"))
+    assert(e.contains("Available: a, b"))
   }
 
   test("SPARK-24849: toDDL - simple struct") {

From 827383a97c11a61661440ff86ce0c3382a2a23b2 Mon Sep 17 00:00:00 2001
From: wangyanlin01 <wangyanlin01@baidu.com>
Date: Tue, 25 Dec 2018 15:53:42 +0800
Subject: [PATCH 0189/1072] [SPARK-26426][SQL] fix ExpresionInfo assert error
 in windows operation system.

## What changes were proposed in this pull request?
fix ExpresionInfo assert error in windows operation system, when running unit tests.

## How was this patch tested?
unit tests

Closes #23363 from yanlin-Lynn/unit-test-windows.

Authored-by: wangyanlin01 <wangyanlin01@baidu.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../apache/spark/sql/catalyst/expressions/ExpressionInfo.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index ab13ac9cc548..d5a1b77c0ec8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -79,7 +79,7 @@ public ExpressionInfo(
         assert name != null;
         assert arguments != null;
         assert examples != null;
-        assert examples.isEmpty() || examples.startsWith("\n    Examples:");
+        assert examples.isEmpty() || examples.startsWith(System.lineSeparator() + "    Examples:");
         assert note != null;
         assert since != null;
 

From 7c7fccfeb5bc079fede41eb64f57ab6b1b4b9018 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 27 Dec 2018 11:09:50 +0800
Subject: [PATCH 0190/1072] [SPARK-26424][SQL] Use java.time API in
 date/timestamp expressions

## What changes were proposed in this pull request?

In the PR, I propose to switch the `DateFormatClass`, `ToUnixTimestamp`, `FromUnixTime`, `UnixTime` on java.time API for parsing/formatting dates and timestamps. The API has been already implemented by the `Timestamp`/`DateFormatter` classes. One of benefit is those classes support parsing timestamps with microsecond precision. Old behaviour can be switched on via SQL config: `spark.sql.legacy.timeParser.enabled` (`false` by default).

## How was this patch tested?

It was tested by existing test suites - `DateFunctionsSuite`, `DateExpressionsSuite`, `JsonSuite`, `CsvSuite`, `SQLQueryTestSuite` as well as PySpark tests.

Closes #23358 from MaxGekk/new-time-cast.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/R/functions.R                           |  8 +-
 docs/sql-migration-guide-upgrade.md           |  1 +
 python/pyspark/sql/functions.py               |  6 +-
 .../sql/catalyst/csv/CSVInferSchema.scala     |  3 +-
 .../expressions/datetimeExpressions.scala     | 82 +++++++++++--------
 .../sql/catalyst/json/JsonInferSchema.scala   |  3 +-
 .../sql/catalyst/util/DateFormatter.scala     |  8 +-
 .../util/DateTimeFormatterHelper.scala        | 21 +++--
 .../sql/catalyst/util/DateTimeUtils.scala     | 10 ---
 .../catalyst/util/TimestampFormatter.scala    | 22 ++++-
 .../catalyst/csv/UnivocityParserSuite.scala   |  2 +-
 .../spark/sql/util/DateFormatterSuite.scala   |  7 ++
 .../sql/util/TimestampFormatterSuite.scala    | 12 +++
 .../org/apache/spark/sql/functions.scala      | 10 +--
 .../apache/spark/sql/DateFunctionsSuite.scala |  2 +-
 15 files changed, 122 insertions(+), 75 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index f568a931ae1f..5b3cc0940d9c 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1723,7 +1723,7 @@ setMethod("radians",
 #' @details
 #' \code{to_date}: Converts the column into a DateType. You may optionally specify
 #' a format according to the rules in:
-#' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}.
+#' \url{https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html}.
 #' If the string cannot be parsed according to the specified format (or default),
 #' the value of the column will be null.
 #' By default, it follows casting rules to a DateType if the format is omitted
@@ -1819,7 +1819,7 @@ setMethod("to_csv", signature(x = "Column"),
 #' @details
 #' \code{to_timestamp}: Converts the column into a TimestampType. You may optionally specify
 #' a format according to the rules in:
-#' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}.
+#' \url{https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html}.
 #' If the string cannot be parsed according to the specified format (or default),
 #' the value of the column will be null.
 #' By default, it follows casting rules to a TimestampType if the format is omitted
@@ -2240,7 +2240,7 @@ setMethod("n", signature(x = "Column"),
 #' \code{date_format}: Converts a date/timestamp/string to a value of string in the format
 #' specified by the date format given by the second argument. A pattern could be for instance
 #' \code{dd.MM.yyyy} and could return a string like '18.03.1993'. All
-#' pattern letters of \code{java.text.SimpleDateFormat} can be used.
+#' pattern letters of \code{java.time.format.DateTimeFormatter} can be used.
 #' Note: Use when ever possible specialized functions like \code{year}. These benefit from a
 #' specialized implementation.
 #'
@@ -2666,7 +2666,7 @@ setMethod("format_string", signature(format = "character", x = "Column"),
 #' \code{from_unixtime}: Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC)
 #' to a string representing the timestamp of that moment in the current system time zone in the JVM
 #' in the given format.
-#' See \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
+#' See \href{https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html}{
 #' Customizing Formats} for available options.
 #'
 #' @rdname column_datetime_functions
diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 1bd3b5ad0e1a..c4d2157de8b6 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -39,6 +39,7 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
 
+  - Since Spark 3.0, the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions use java.time API for parsing and formatting dates/timestamps from/to strings by using ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html) based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, java.text.SimpleDateFormat and java.util.GregorianCalendar (hybrid calendar that supports both the Julian and Gregorian calendar systems, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html) is used for the same purpuse. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index d188de39e21c..d2a771e9bb8e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -874,7 +874,7 @@ def date_format(date, format):
     format given by the second argument.
 
     A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
-    pattern letters of the Java class `java.text.SimpleDateFormat` can be used.
+    pattern letters of the Java class `java.time.format.DateTimeFormatter` can be used.
 
     .. note:: Use when ever possible specialized functions like `year`. These benefit from a
         specialized implementation.
@@ -1094,7 +1094,7 @@ def to_date(col, format=None):
     """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or
     :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`
     using the optionally specified format. Specify formats according to
-    `SimpleDateFormats <http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html>`_.
+    `DateTimeFormatter <https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html>`_. # noqa
     By default, it follows casting rules to :class:`pyspark.sql.types.DateType` if the format
     is omitted (equivalent to ``col.cast("date")``).
 
@@ -1119,7 +1119,7 @@ def to_timestamp(col, format=None):
     """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or
     :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`
     using the optionally specified format. Specify formats according to
-    `SimpleDateFormats <http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html>`_.
+    `DateTimeFormatter <https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html>`_. # noqa
     By default, it follows casting rules to :class:`pyspark.sql.types.TimestampType` if the format
     is omitted (equivalent to ``col.cast("timestamp")``).
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 35ade136cc60..4dd41042856d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -27,8 +27,7 @@ import org.apache.spark.sql.types._
 
 class CSVInferSchema(val options: CSVOptions) extends Serializable {
 
-  @transient
-  private lazy val timestampParser = TimestampFormatter(
+  private val timestampParser = TimestampFormatter(
     options.timestampFormat,
     options.timeZone,
     options.locale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 45e17ae235a9..73af0a3c5c2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
-import java.text.DateFormat
-import java.util.{Calendar, TimeZone}
+import java.util.{Calendar, Locale, TimeZone}
 
 import scala.util.control.NonFatal
 
@@ -28,7 +27,8 @@ import org.apache.commons.lang3.StringEscapeUtils
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
@@ -562,16 +562,17 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
     copy(timeZoneId = Option(timeZoneId))
 
   override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
-    val df = DateTimeUtils.newDateFormat(format.toString, timeZone)
-    UTF8String.fromString(df.format(new java.util.Date(timestamp.asInstanceOf[Long] / 1000)))
+    val df = TimestampFormatter(format.toString, timeZone, Locale.US)
+    UTF8String.fromString(df.format(timestamp.asInstanceOf[Long]))
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
     val tz = ctx.addReferenceObj("timeZone", timeZone)
+    val locale = ctx.addReferenceObj("locale", Locale.US)
     defineCodeGen(ctx, ev, (timestamp, format) => {
-      s"""UTF8String.fromString($dtu.newDateFormat($format.toString(), $tz)
-          .format(new java.util.Date($timestamp / 1000)))"""
+      s"""UTF8String.fromString($tf.apply($format.toString(), $tz, $locale)
+          .format($timestamp))"""
     })
   }
 
@@ -612,9 +613,10 @@ case class ToUnixTimestamp(
 }
 
 /**
- * Converts time string with given pattern.
- * (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
- * to Unix time stamp (in seconds), returns null if fail.
+ * Converts time string with given pattern to Unix time stamp (in seconds), returns null if fail.
+ * See [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html]
+ * if SQL config spark.sql.legacy.timeParser.enabled is set to true otherwise
+ * [https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html].
  * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
  * If the second parameter is missing, use "yyyy-MM-dd HH:mm:ss".
  * If no parameters provided, the first parameter will be current_timestamp.
@@ -663,9 +665,9 @@ abstract class UnixTime
   override def nullable: Boolean = true
 
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
-  private lazy val formatter: DateFormat =
+  private lazy val formatter: TimestampFormatter =
     try {
-      DateTimeUtils.newDateFormat(constFormat.toString, timeZone)
+      TimestampFormatter(constFormat.toString, timeZone, Locale.US)
     } catch {
       case NonFatal(_) => null
     }
@@ -677,16 +679,16 @@ abstract class UnixTime
     } else {
       left.dataType match {
         case DateType =>
-          DateTimeUtils.daysToMillis(t.asInstanceOf[Int], timeZone) / 1000L
+          DateTimeUtils.daysToMillis(t.asInstanceOf[Int], timeZone) / MILLIS_PER_SECOND
         case TimestampType =>
-          t.asInstanceOf[Long] / 1000000L
+          t.asInstanceOf[Long] / MICROS_PER_SECOND
         case StringType if right.foldable =>
           if (constFormat == null || formatter == null) {
             null
           } else {
             try {
               formatter.parse(
-                t.asInstanceOf[UTF8String].toString).getTime / 1000L
+                t.asInstanceOf[UTF8String].toString) / MICROS_PER_SECOND
             } catch {
               case NonFatal(_) => null
             }
@@ -698,8 +700,8 @@ abstract class UnixTime
           } else {
             val formatString = f.asInstanceOf[UTF8String].toString
             try {
-              DateTimeUtils.newDateFormat(formatString, timeZone).parse(
-                t.asInstanceOf[UTF8String].toString).getTime / 1000L
+              TimestampFormatter(formatString, timeZone, Locale.US).parse(
+                t.asInstanceOf[UTF8String].toString) / MICROS_PER_SECOND
             } catch {
               case NonFatal(_) => null
             }
@@ -712,7 +714,7 @@ abstract class UnixTime
     val javaType = CodeGenerator.javaType(dataType)
     left.dataType match {
       case StringType if right.foldable =>
-        val df = classOf[DateFormat].getName
+        val df = classOf[TimestampFormatter].getName
         if (formatter == null) {
           ExprCode.forNullValue(dataType)
         } else {
@@ -724,24 +726,35 @@ abstract class UnixTime
             $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
             if (!${ev.isNull}) {
               try {
-                ${ev.value} = $formatterName.parse(${eval1.value}.toString()).getTime() / 1000L;
+                ${ev.value} = $formatterName.parse(${eval1.value}.toString()) / 1000000L;
+              } catch (java.lang.IllegalArgumentException e) {
+                ${ev.isNull} = true;
               } catch (java.text.ParseException e) {
                 ${ev.isNull} = true;
+              } catch (java.time.format.DateTimeParseException e) {
+                ${ev.isNull} = true;
+              } catch (java.time.DateTimeException e) {
+                ${ev.isNull} = true;
               }
             }""")
         }
       case StringType =>
         val tz = ctx.addReferenceObj("timeZone", timeZone)
-        val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+        val locale = ctx.addReferenceObj("locale", Locale.US)
+        val dtu = TimestampFormatter.getClass.getName.stripSuffix("$")
         nullSafeCodeGen(ctx, ev, (string, format) => {
           s"""
             try {
-              ${ev.value} = $dtu.newDateFormat($format.toString(), $tz)
-                .parse($string.toString()).getTime() / 1000L;
+              ${ev.value} = $dtu.apply($format.toString(), $tz, $locale)
+                .parse($string.toString()) / 1000000L;
             } catch (java.lang.IllegalArgumentException e) {
               ${ev.isNull} = true;
             } catch (java.text.ParseException e) {
               ${ev.isNull} = true;
+            } catch (java.time.format.DateTimeParseException e) {
+              ${ev.isNull} = true;
+            } catch (java.time.DateTimeException e) {
+              ${ev.isNull} = true;
             }
           """
         })
@@ -806,9 +819,9 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
     copy(timeZoneId = Option(timeZoneId))
 
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
-  private lazy val formatter: DateFormat =
+  private lazy val formatter: TimestampFormatter =
     try {
-      DateTimeUtils.newDateFormat(constFormat.toString, timeZone)
+      TimestampFormatter(constFormat.toString, timeZone, Locale.US)
     } catch {
       case NonFatal(_) => null
     }
@@ -823,8 +836,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           null
         } else {
           try {
-            UTF8String.fromString(formatter.format(
-              new java.util.Date(time.asInstanceOf[Long] * 1000L)))
+            UTF8String.fromString(formatter.format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
           } catch {
             case NonFatal(_) => null
           }
@@ -835,8 +847,8 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           null
         } else {
           try {
-            UTF8String.fromString(DateTimeUtils.newDateFormat(f.toString, timeZone)
-              .format(new java.util.Date(time.asInstanceOf[Long] * 1000L)))
+            UTF8String.fromString(TimestampFormatter(f.toString, timeZone, Locale.US)
+              .format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
           } catch {
             case NonFatal(_) => null
           }
@@ -846,7 +858,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val df = classOf[DateFormat].getName
+    val df = classOf[TimestampFormatter].getName
     if (format.foldable) {
       if (formatter == null) {
         ExprCode.forNullValue(StringType)
@@ -859,8 +871,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
           if (!${ev.isNull}) {
             try {
-              ${ev.value} = UTF8String.fromString($formatterName.format(
-                new java.util.Date(${t.value} * 1000L)));
+              ${ev.value} = UTF8String.fromString($formatterName.format(${t.value} * 1000000L));
             } catch (java.lang.IllegalArgumentException e) {
               ${ev.isNull} = true;
             }
@@ -868,12 +879,13 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
       }
     } else {
       val tz = ctx.addReferenceObj("timeZone", timeZone)
-      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+      val locale = ctx.addReferenceObj("locale", Locale.US)
+      val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
       nullSafeCodeGen(ctx, ev, (seconds, f) => {
         s"""
         try {
-          ${ev.value} = UTF8String.fromString($dtu.newDateFormat($f.toString(), $tz).format(
-            new java.util.Date($seconds * 1000L)));
+          ${ev.value} = UTF8String.fromString($tf.apply($f.toString(), $tz, $locale).
+            format($seconds * 1000000L));
         } catch (java.lang.IllegalArgumentException e) {
           ${ev.isNull} = true;
         }"""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index d1bc00c08c1c..3203e626ea40 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -37,8 +37,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
 
   private val decimalParser = ExprUtils.getDecimalParser(options.locale)
 
-  @transient
-  private lazy val timestampFormatter = TimestampFormatter(
+  private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.timeZone,
     options.locale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 9e8d51cc65f0..b4c99674fc1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -26,7 +26,7 @@ import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.sql.internal.SQLConf
 
-sealed trait DateFormatter {
+sealed trait DateFormatter extends Serializable {
   def parse(s: String): Int // returns days since epoch
   def format(days: Int): String
 }
@@ -35,7 +35,8 @@ class Iso8601DateFormatter(
     pattern: String,
     locale: Locale) extends DateFormatter with DateTimeFormatterHelper {
 
-  private val formatter = buildFormatter(pattern, locale)
+  @transient
+  private lazy val formatter = buildFormatter(pattern, locale)
   private val UTC = ZoneId.of("UTC")
 
   private def toInstant(s: String): Instant = {
@@ -56,7 +57,8 @@ class Iso8601DateFormatter(
 }
 
 class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter {
-  private val format = FastDateFormat.getInstance(pattern, locale)
+  @transient
+  private lazy val format = FastDateFormat.getInstance(pattern, locale)
 
   override def parse(s: String): Int = {
     val milliseconds = format.parse(s).getTime
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index b85101d38d9e..91cc57e0bb01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -17,27 +17,36 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.{Instant, LocalDateTime, ZonedDateTime, ZoneId}
-import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder}
-import java.time.temporal.{ChronoField, TemporalAccessor}
+import java.time._
+import java.time.chrono.IsoChronology
+import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle}
+import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
 import java.util.Locale
 
 trait DateTimeFormatterHelper {
 
   protected def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
     new DateTimeFormatterBuilder()
+      .parseCaseInsensitive()
       .appendPattern(pattern)
-      .parseDefaulting(ChronoField.YEAR_OF_ERA, 1970)
+      .parseDefaulting(ChronoField.ERA, 1)
       .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
       .parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
-      .parseDefaulting(ChronoField.HOUR_OF_DAY, 0)
       .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
       .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
       .toFormatter(locale)
+      .withChronology(IsoChronology.INSTANCE)
+      .withResolverStyle(ResolverStyle.STRICT)
   }
 
   protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = {
-    val localDateTime = LocalDateTime.from(temporalAccessor)
+    val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) {
+      LocalTime.ofNanoOfDay(0)
+    } else {
+      LocalTime.from(temporalAccessor)
+    }
+    val localDate = LocalDate.from(temporalAccessor)
+    val localDateTime = LocalDateTime.of(localDate, localTime)
     val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
     Instant.from(zonedDateTime)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index c6dfdbf2505b..3e5e1fbc2b36 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -111,16 +111,6 @@ object DateTimeUtils {
     computedTimeZones.computeIfAbsent(timeZoneId, computeTimeZone)
   }
 
-  def newDateFormat(formatString: String, timeZone: TimeZone): DateFormat = {
-    val sdf = new SimpleDateFormat(formatString, Locale.US)
-    sdf.setTimeZone(timeZone)
-    // Enable strict parsing, if the input date/format is invalid, it will throw an exception.
-    // e.g. to parse invalid date '2016-13-12', or '2016-01-12' with  invalid format 'yyyy-aa-dd',
-    // an exception will be throwed.
-    sdf.setLenient(false)
-    sdf
-  }
-
   // we should use the exact day as Int, for example, (year, month, day) -> day
   def millisToDays(millisUtc: Long): SQLDate = {
     millisToDays(millisUtc, defaultTimeZone())
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index eb1303303463..b67b2d7cc3c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.text.ParseException
 import java.time._
+import java.time.format.DateTimeParseException
 import java.time.temporal.TemporalQueries
 import java.util.{Locale, TimeZone}
 
@@ -27,7 +29,19 @@ import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.sql.internal.SQLConf
 
-sealed trait TimestampFormatter {
+sealed trait TimestampFormatter extends Serializable {
+  /**
+   * Parses a timestamp in a string and converts it to microseconds.
+   *
+   * @param s - string with timestamp to parse
+   * @return microseconds since epoch.
+   * @throws ParseException can be thrown by legacy parser
+   * @throws DateTimeParseException can be thrown by new parser
+   * @throws DateTimeException unable to obtain local date or time
+   */
+  @throws(classOf[ParseException])
+  @throws(classOf[DateTimeParseException])
+  @throws(classOf[DateTimeException])
   def parse(s: String): Long // returns microseconds since epoch
   def format(us: Long): String
 }
@@ -36,7 +50,8 @@ class Iso8601TimestampFormatter(
     pattern: String,
     timeZone: TimeZone,
     locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
-  private val formatter = buildFormatter(pattern, locale)
+  @transient
+  private lazy val formatter = buildFormatter(pattern, locale)
 
   private def toInstant(s: String): Instant = {
     val temporalAccessor = formatter.parse(s)
@@ -68,7 +83,8 @@ class LegacyTimestampFormatter(
     pattern: String,
     timeZone: TimeZone,
     locale: Locale) extends TimestampFormatter {
-  private val format = FastDateFormat.getInstance(pattern, timeZone, locale)
+  @transient
+  private lazy val format = FastDateFormat.getInstance(pattern, timeZone, locale)
 
   protected def toMillis(s: String): Long = format.parse(s).getTime
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 2d0b0d3033a9..4ae61bc61255 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -112,7 +112,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     assert(parser.makeConverter("_1", BooleanType).apply("true") == true)
 
     var timestampsOptions =
-      new CSVOptions(Map("timestampFormat" -> "dd/MM/yyyy hh:mm"), false, "GMT")
+      new CSVOptions(Map("timestampFormat" -> "dd/MM/yyyy HH:mm"), false, "GMT")
     parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
     val customTimestamp = "31/01/2015 00:00"
     var format = FastDateFormat.getInstance(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
index 019615b81101..2dc55e0e1f63 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.util
 
+import java.time.LocalDate
 import java.util.Locale
 
 import org.apache.spark.SparkFunSuite
@@ -89,4 +90,10 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
       }
     }
   }
+
+  test("parsing date without explicit day") {
+    val formatter = DateFormatter("yyyy MMM", Locale.US)
+    val daysSinceEpoch = formatter.parse("2018 Dec")
+    assert(daysSinceEpoch === LocalDate.of(2018, 12, 1).toEpochDay)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index c110ffa01f73..edccbb2a7f5d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.util
 
+import java.time.{LocalDateTime, ZoneOffset}
 import java.util.{Locale, TimeZone}
+import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
@@ -106,4 +108,14 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
       }
     }
   }
+
+  test(" case insensitive parsing of am and pm") {
+    val formatter = TimestampFormatter(
+      "yyyy MMM dd hh:mm:ss a",
+      TimeZone.getTimeZone("UTC"),
+      Locale.US)
+    val micros = formatter.parse("2009 Mar 20 11:30:01 am")
+    assert(micros === TimeUnit.SECONDS.toMicros(
+      LocalDateTime.of(2009, 3, 20, 11, 30, 1).toEpochSecond(ZoneOffset.UTC)))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 33186f778d86..645452553e6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2578,7 +2578,7 @@ object functions {
    * Converts a date/timestamp/string to a value of string in the format specified by the date
    * format given by the second argument.
    *
-   * See [[java.text.SimpleDateFormat]] for valid date and time format patterns
+   * See [[java.time.format.DateTimeFormatter]] for valid date and time format patterns
    *
    * @param dateExpr A date, timestamp or string. If a string, the data must be in a format that
    *                 can be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
@@ -2811,7 +2811,7 @@ object functions {
    * representing the timestamp of that moment in the current system time zone in the given
    * format.
    *
-   * See [[java.text.SimpleDateFormat]] for valid date and time format patterns
+   * See [[java.time.format.DateTimeFormatter]] for valid date and time format patterns
    *
    * @param ut A number of a type that is castable to a long, such as string or integer. Can be
    *           negative for timestamps before the unix epoch
@@ -2855,7 +2855,7 @@ object functions {
   /**
    * Converts time string with given pattern to Unix timestamp (in seconds).
    *
-   * See [[java.text.SimpleDateFormat]] for valid date and time format patterns
+   * See [[java.time.format.DateTimeFormatter]] for valid date and time format patterns
    *
    * @param s A date, timestamp or string. If a string, the data must be in a format that can be
    *          cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
@@ -2883,7 +2883,7 @@ object functions {
   /**
    * Converts time string with the given pattern to timestamp.
    *
-   * See [[java.text.SimpleDateFormat]] for valid date and time format patterns
+   * See [[java.time.format.DateTimeFormatter]] for valid date and time format patterns
    *
    * @param s   A date, timestamp or string. If a string, the data must be in a format that can be
    *            cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
@@ -2908,7 +2908,7 @@ object functions {
   /**
    * Converts the column into a `DateType` with a specified format
    *
-   * See [[java.text.SimpleDateFormat]] for valid date and time format patterns
+   * See [[java.time.format.DateTimeFormatter]] for valid date and time format patterns
    *
    * @param e   A date, timestamp or string. If a string, the data must be in a format that can be
    *            cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index c4ec7150c407..62bb72dd6ea2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -405,7 +405,7 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
         Row(Date.valueOf("2014-12-31"))))
     checkAnswer(
       df.select(to_date(col("s"), "yyyy-MM-dd")),
-      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+      Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
 
     // now switch format
     checkAnswer(

From f89cdec8b9a9fcc95ba7458869b4ba9d038560f9 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 27 Dec 2018 16:03:14 +0800
Subject: [PATCH 0191/1072] [SPARK-26435][SQL] Support creating partitioned
 table using Hive CTAS by specifying partition column names

## What changes were proposed in this pull request?

Spark SQL doesn't support creating partitioned table using Hive CTAS in SQL syntax. However it is supported by using DataFrameWriter API.

```scala
val df = Seq(("a", 1)).toDF("part", "id")
df.write.format("hive").partitionBy("part").saveAsTable("t")
```
Hive begins to support this syntax in newer version: https://issues.apache.org/jira/browse/HIVE-20241:

```
CREATE TABLE t PARTITIONED BY (part) AS SELECT 1 as id, "a" as part
```

This patch adds this support to SQL syntax.

## How was this patch tested?

Added tests.

Closes #23376 from viirya/hive-ctas-partitioned-table.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  3 +-
 .../spark/sql/execution/SparkSqlParser.scala  | 33 ++++++++-----
 .../sql/hive/execution/HiveDDLSuite.scala     | 48 ++++++++++++++++++-
 .../sql/hive/execution/SQLQuerySuite.scala    |  4 +-
 4 files changed, 71 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 5e732edb17ba..b39681d886c5 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -88,7 +88,8 @@ statement
         (AS? query)?                                                   #createTable
     | createTableHeader ('(' columns=colTypeList ')')?
         ((COMMENT comment=STRING) |
-        (PARTITIONED BY '(' partitionColumns=colTypeList ')') |
+        (PARTITIONED BY '(' partitionColumns=colTypeList ')' |
+        PARTITIONED BY partitionColumnNames=identifierList) |
         bucketSpec |
         skewSpec |
         rowFormat |
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 364efea52830..8deb55b00a9d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -1196,33 +1196,40 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
 
     selectQuery match {
       case Some(q) =>
-        // Hive does not allow to use a CTAS statement to create a partitioned table.
-        if (tableDesc.partitionColumnNames.nonEmpty) {
-          val errorMessage = "A Create Table As Select (CTAS) statement is not allowed to " +
-            "create a partitioned table using Hive's file formats. " +
-            "Please use the syntax of \"CREATE TABLE tableName USING dataSource " +
-            "OPTIONS (...) PARTITIONED BY ...\" to create a partitioned table through a " +
-            "CTAS statement."
-          operationNotAllowed(errorMessage, ctx)
-        }
-
         // Don't allow explicit specification of schema for CTAS.
-        if (schema.nonEmpty) {
+        if (dataCols.nonEmpty) {
           operationNotAllowed(
             "Schema may not be specified in a Create Table As Select (CTAS) statement",
             ctx)
         }
 
+        // When creating partitioned table with CTAS statement, we can't specify data type for the
+        // partition columns.
+        if (partitionCols.nonEmpty) {
+          val errorMessage = "Create Partitioned Table As Select cannot specify data type for " +
+            "the partition columns of the target table."
+          operationNotAllowed(errorMessage, ctx)
+        }
+
+        // Hive CTAS supports dynamic partition by specifying partition column names.
+        val partitionColumnNames =
+          Option(ctx.partitionColumnNames)
+            .map(visitIdentifierList(_).toArray)
+            .getOrElse(Array.empty[String])
+
+        val tableDescWithPartitionColNames =
+          tableDesc.copy(partitionColumnNames = partitionColumnNames)
+
         val hasStorageProperties = (ctx.createFileFormat.size != 0) || (ctx.rowFormat.size != 0)
         if (conf.convertCTAS && !hasStorageProperties) {
           // At here, both rowStorage.serdeProperties and fileStorage.serdeProperties
           // are empty Maps.
-          val newTableDesc = tableDesc.copy(
+          val newTableDesc = tableDescWithPartitionColNames.copy(
             storage = CatalogStorageFormat.empty.copy(locationUri = locUri),
             provider = Some(conf.defaultDataSourceName))
           CreateTable(newTableDesc, mode, Some(q))
         } else {
-          CreateTable(tableDesc, mode, Some(q))
+          CreateTable(tableDescWithPartitionColNames, mode, Some(q))
         }
       case None => CreateTable(tableDesc, mode, None)
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index fd38944a5dd2..6abdc4054cb0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.execution
 
 import java.io.File
 import java.net.URI
-import java.util.Date
 
 import scala.language.existentials
 
@@ -33,6 +32,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.HiveExternalCatalog
@@ -2370,4 +2370,50 @@ class HiveDDLSuite
       ))
     }
   }
+
+  test("Hive CTAS can't create partitioned table by specifying schema") {
+    val err1 = intercept[ParseException] {
+      spark.sql(
+        s"""
+           |CREATE TABLE t (a int)
+           |PARTITIONED BY (b string)
+           |STORED AS parquet
+           |AS SELECT 1 as a, "a" as b
+                 """.stripMargin)
+    }.getMessage
+    assert(err1.contains("Schema may not be specified in a Create Table As Select " +
+      "(CTAS) statement"))
+
+    val err2 = intercept[ParseException] {
+      spark.sql(
+        s"""
+           |CREATE TABLE t
+           |PARTITIONED BY (b string)
+           |STORED AS parquet
+           |AS SELECT 1 as a, "a" as b
+                 """.stripMargin)
+    }.getMessage
+    assert(err2.contains("Create Partitioned Table As Select cannot specify data type for " +
+      "the partition columns of the target table"))
+  }
+
+  test("Hive CTAS with dynamic partition") {
+    Seq("orc", "parquet").foreach { format =>
+      withTable("t") {
+        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+          spark.sql(
+            s"""
+               |CREATE TABLE t
+               |PARTITIONED BY (b)
+               |STORED AS $format
+               |AS SELECT 1 as a, "a" as b
+               """.stripMargin)
+          checkAnswer(spark.table("t"), Row(1, "a"))
+
+          assert(spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+            .partitionColumnNames === Seq("b"))
+        }
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 6acf44606cbb..70efad103d13 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -692,8 +692,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
               |AS SELECT key, value FROM mytable1
             """.stripMargin)
         }.getMessage
-        assert(e.contains("A Create Table As Select (CTAS) statement is not allowed to " +
-          "create a partitioned table using Hive's file formats"))
+        assert(e.contains("Create Partitioned Table As Select cannot specify data type for " +
+          "the partition columns of the target table"))
       }
     }
   }

From a1c1dd3484a4dcd7c38fe256e69dbaaaf10d1a92 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Thu, 27 Dec 2018 11:13:16 +0100
Subject: [PATCH 0192/1072] [SPARK-26191][SQL] Control truncation of Spark
 plans via maxFields parameter

## What changes were proposed in this pull request?

In the PR, I propose to add `maxFields` parameter to all functions involved in creation of textual representation of spark plans such as `simpleString` and `verboseString`. New parameter restricts number of fields converted to truncated strings. Any elements beyond the limit will be dropped and replaced by a `"... N more fields"` placeholder. The threshold is bumped up to `Int.MaxValue` for `toFile()`.

## How was this patch tested?

Added a test to `QueryExecutionSuite` which checks `maxFields` impacts on number of truncated fields in `LocalRelation`.

Closes #23159 from MaxGekk/to-file-max-fields.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  4 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  8 ++-
 .../sql/catalyst/analysis/TypeCoercion.scala  |  4 +-
 .../catalyst/encoders/ExpressionEncoder.scala |  8 ++-
 .../sql/catalyst/expressions/Expression.scala |  6 +-
 .../expressions/codegen/javaCode.scala        |  2 +-
 .../sql/catalyst/expressions/generators.scala |  3 +-
 .../expressions/higherOrderFunctions.scala    |  4 +-
 .../spark/sql/catalyst/expressions/misc.scala |  5 +-
 .../expressions/namedExpressions.scala        |  4 +-
 .../spark/sql/catalyst/plans/QueryPlan.scala  |  4 +-
 .../catalyst/plans/logical/LogicalPlan.scala  |  4 +-
 .../plans/logical/basicLogicalOperators.scala |  8 +--
 .../spark/sql/catalyst/trees/TreeNode.scala   | 56 +++++++++++--------
 .../spark/sql/catalyst/util/package.scala     | 10 ++--
 .../apache/spark/sql/types/StructType.scala   |  6 +-
 .../aggregate/PercentileSuite.scala           |  2 +-
 .../org/apache/spark/sql/util/UtilSuite.scala |  2 +-
 .../sql/execution/DataSourceScanExec.scala    | 12 ++--
 .../spark/sql/execution/ExistingRDD.scala     |  6 +-
 .../spark/sql/execution/QueryExecution.scala  | 21 ++++---
 .../spark/sql/execution/SparkPlanInfo.scala   |  6 +-
 .../sql/execution/WholeStageCodegenExec.scala | 28 ++++++++--
 .../aggregate/HashAggregateExec.scala         | 12 ++--
 .../aggregate/ObjectHashAggregateExec.scala   | 12 ++--
 .../aggregate/SortAggregateExec.scala         | 12 ++--
 .../execution/basicPhysicalOperators.scala    |  4 +-
 .../execution/columnar/InMemoryRelation.scala |  4 +-
 .../datasources/LogicalRelation.scala         |  4 +-
 .../SaveIntoDataSourceCommand.scala           |  2 +-
 .../spark/sql/execution/datasources/ddl.scala |  2 +-
 .../datasources/v2/DataSourceV2Relation.scala |  8 ++-
 .../datasources/v2/DataSourceV2ScanExec.scala |  4 +-
 .../v2/DataSourceV2StreamingScanExec.scala    |  2 +-
 .../v2/DataSourceV2StringFormat.scala         |  6 +-
 .../spark/sql/execution/debug/package.scala   |  3 +-
 .../apache/spark/sql/execution/limit.scala    |  6 +-
 .../streaming/MicroBatchExecution.scala       |  6 +-
 .../continuous/ContinuousExecution.scala      |  7 ++-
 .../sql/execution/streaming/memory.scala      |  5 +-
 .../apache/spark/sql/execution/subquery.scala |  2 +-
 .../sql/execution/QueryExecutionSuite.scala   | 13 +++++
 .../CreateHiveTableAsSelectCommand.scala      |  2 +-
 43 files changed, 203 insertions(+), 126 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 777053168a05..198645d875c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -979,7 +979,7 @@ class Analyzer(
         a.mapExpressions(resolveExpressionTopDown(_, appendColumns))
 
       case q: LogicalPlan =>
-        logTrace(s"Attempting to resolve ${q.simpleString}")
+        logTrace(s"Attempting to resolve ${q.simpleString(SQLConf.get.maxToStringFields)}")
         q.mapExpressions(resolveExpressionTopDown(_, q))
     }
 
@@ -1777,7 +1777,7 @@ class Analyzer(
 
       case p if p.expressions.exists(hasGenerator) =>
         throw new AnalysisException("Generators are not supported outside the SELECT clause, but " +
-          "got: " + p.simpleString)
+          "got: " + p.simpleString(SQLConf.get.maxToStringFields))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 88d41e882440..c28a97839fe4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
@@ -303,7 +304,7 @@ trait CheckAnalysis extends PredicateHelper {
             val missingAttributes = o.missingInput.mkString(",")
             val input = o.inputSet.mkString(",")
             val msgForMissingAttributes = s"Resolved attribute(s) $missingAttributes missing " +
-              s"from $input in operator ${operator.simpleString}."
+              s"from $input in operator ${operator.simpleString(SQLConf.get.maxToStringFields)}."
 
             val resolver = plan.conf.resolver
             val attrsWithSameName = o.missingInput.filter { missing =>
@@ -368,7 +369,7 @@ trait CheckAnalysis extends PredicateHelper {
               s"""nondeterministic expressions are only allowed in
                  |Project, Filter, Aggregate or Window, found:
                  | ${o.expressions.map(_.sql).mkString(",")}
-                 |in operator ${operator.simpleString}
+                 |in operator ${operator.simpleString(SQLConf.get.maxToStringFields)}
                """.stripMargin)
 
           case _: UnresolvedHint =>
@@ -380,7 +381,8 @@ trait CheckAnalysis extends PredicateHelper {
     }
     extendedCheckRules.foreach(_(plan))
     plan.foreachUp {
-      case o if !o.resolved => failAnalysis(s"unresolved operator ${o.simpleString}")
+      case o if !o.resolved =>
+        failAnalysis(s"unresolved operator ${o.simpleString(SQLConf.get.maxToStringFields)}")
       case _ =>
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 1706b3eece6d..b19aa50ba215 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -1069,8 +1069,8 @@ trait TypeCoercionRule extends Rule[LogicalPlan] with Logging {
             // Leave the same if the dataTypes match.
             case Some(newType) if a.dataType == newType.dataType => a
             case Some(newType) =>
-              logDebug(
-                s"Promoting $a from ${a.dataType} to ${newType.dataType} in ${q.simpleString}")
+              logDebug(s"Promoting $a from ${a.dataType} to ${newType.dataType} in " +
+                s" ${q.simpleString(SQLConf.get.maxToStringFields)}")
               newType
           }
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index fbf0bd68b958..da5c1fd0feb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateSafeProjection
 import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, InitializeJavaBean, Invoke, NewInstance}
 import org.apache.spark.sql.catalyst.optimizer.SimplifyCasts
 import org.apache.spark.sql.catalyst.plans.logical.{CatalystSerde, DeserializeToObject, LocalRelation}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ObjectType, StringType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
@@ -323,8 +324,8 @@ case class ExpressionEncoder[T](
     extractProjection(inputRow)
   } catch {
     case e: Exception =>
-      throw new RuntimeException(
-        s"Error while encoding: $e\n${serializer.map(_.simpleString).mkString("\n")}", e)
+      throw new RuntimeException(s"Error while encoding: $e\n" +
+          s"${serializer.map(_.simpleString(SQLConf.get.maxToStringFields)).mkString("\n")}", e)
   }
 
   /**
@@ -336,7 +337,8 @@ case class ExpressionEncoder[T](
     constructProjection(row).get(0, ObjectType(clsTag.runtimeClass)).asInstanceOf[T]
   } catch {
     case e: Exception =>
-      throw new RuntimeException(s"Error while decoding: $e\n${deserializer.simpleString}", e)
+      throw new RuntimeException(s"Error while decoding: $e\n" +
+        s"${deserializer.simpleString(SQLConf.get.maxToStringFields)}", e)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index c89c2272be75..d5d119543da7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -259,12 +259,12 @@ abstract class Expression extends TreeNode[Expression] {
 
   // Marks this as final, Expression.verboseString should never be called, and thus shouldn't be
   // overridden by concrete classes.
-  final override def verboseString: String = simpleString
+  final override def verboseString(maxFields: Int): String = simpleString(maxFields)
 
-  override def simpleString: String = toString
+  override def simpleString(maxFields: Int): String = toString
 
   override def toString: String = prettyName + truncatedString(
-    flatArguments.toSeq, "(", ", ", ")")
+    flatArguments.toSeq, "(", ", ", ")", SQLConf.get.maxToStringFields)
 
   /**
    * Returns SQL representation of this expression.  For expressions extending [[NonSQLExpression]],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
index 17d4a0dc4e88..17fff64a1b7d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
@@ -197,7 +197,7 @@ trait Block extends TreeNode[Block] with JavaCode {
     case _ => code"$this\n$other"
   }
 
-  override def verboseString: String = toString
+  override def verboseString(maxFields: Int): String = toString
 }
 
 object Block {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 9c74fdf6c9a1..6b6da1c8b414 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
@@ -101,7 +102,7 @@ case class UserDefinedGenerator(
     inputRow = new InterpretedProjection(children)
     convertToScala = {
       val inputSchema = StructType(children.map { e =>
-        StructField(e.simpleString, e.dataType, nullable = true)
+        StructField(e.simpleString(SQLConf.get.maxToStringFields), e.dataType, nullable = true)
       })
       CatalystTypeConverters.createToScalaConverter(inputSchema)
     }.asInstanceOf[InternalRow => Row]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 7141b6e99638..e6cc11d1ad28 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -76,7 +76,9 @@ case class NamedLambdaVariable(
 
   override def toString: String = s"lambda $name#${exprId.id}$typeSuffix"
 
-  override def simpleString: String = s"lambda $name#${exprId.id}: ${dataType.simpleString}"
+  override def simpleString(maxFields: Int): String = {
+    s"lambda $name#${exprId.id}: ${dataType.simpleString(maxFields)}"
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 0cdeda9b1051..1f1decc45a3f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.RandomUUIDGenerator
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -40,7 +41,7 @@ case class PrintToStderr(child: Expression) extends UnaryExpression {
     input
   }
 
-  private val outputPrefix = s"Result of ${child.simpleString} is "
+  private val outputPrefix = s"Result of ${child.simpleString(SQLConf.get.maxToStringFields)} is "
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val outputPrefixField = ctx.addReferenceObj("outputPrefix", outputPrefix)
@@ -72,7 +73,7 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa
 
   override def prettyName: String = "assert_true"
 
-  private val errMsg = s"'${child.simpleString}' is not true!"
+  private val errMsg = s"'${child.simpleString(SQLConf.get.maxToStringFields)}' is not true!"
 
   override def eval(input: InternalRow) : Any = {
     val v = child.eval(input)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 02b48f9e30f2..131459bf27bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -330,7 +330,9 @@ case class AttributeReference(
 
   // Since the expression id is not in the first constructor it is missing from the default
   // tree string.
-  override def simpleString: String = s"$name#${exprId.id}: ${dataType.simpleString}"
+  override def simpleString(maxFields: Int): String = {
+    s"$name#${exprId.id}: ${dataType.simpleString(maxFields)}"
+  }
 
   override def sql: String = {
     val qualifierPrefix = if (qualifier.nonEmpty) qualifier.mkString(".") + "." else ""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index ca0cea6ba7de..125181fb213f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -172,9 +172,9 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
    */
   protected def statePrefix = if (missingInput.nonEmpty && children.nonEmpty) "!" else ""
 
-  override def simpleString: String = statePrefix + super.simpleString
+  override def simpleString(maxFields: Int): String = statePrefix + super.simpleString(maxFields)
 
-  override def verboseString: String = simpleString
+  override def verboseString(maxFields: Int): String = simpleString(maxFields)
 
   /**
    * All the subqueries of current plan.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 3ad2ee692361..51e0f4b4c84d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -36,8 +36,8 @@ abstract class LogicalPlan
   /** Returns true if this subtree has data from a streaming data source. */
   def isStreaming: Boolean = children.exists(_.isStreaming == true)
 
-  override def verboseStringWithSuffix: String = {
-    super.verboseString + statsCache.map(", " + _.toString).getOrElse("")
+  override def verboseStringWithSuffix(maxFields: Int): String = {
+    super.verboseString(maxFields) + statsCache.map(", " + _.toString).getOrElse("")
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index a26ec4eed864..d8b3a4af4f7b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -468,7 +468,7 @@ case class View(
 
   override def newInstance(): LogicalPlan = copy(output = output.map(_.newInstance()))
 
-  override def simpleString: String = {
+  override def simpleString(maxFields: Int): String = {
     s"View (${desc.identifier}, ${output.mkString("[", ",", "]")})"
   }
 }
@@ -484,8 +484,8 @@ case class View(
 case class With(child: LogicalPlan, cteRelations: Seq[(String, SubqueryAlias)]) extends UnaryNode {
   override def output: Seq[Attribute] = child.output
 
-  override def simpleString: String = {
-    val cteAliases = truncatedString(cteRelations.map(_._1), "[", ", ", "]")
+  override def simpleString(maxFields: Int): String = {
+    val cteAliases = truncatedString(cteRelations.map(_._1), "[", ", ", "]", maxFields)
     s"CTE $cteAliases"
   }
 
@@ -557,7 +557,7 @@ case class Range(
 
   override def newInstance(): Range = copy(output = output.map(_.newInstance()))
 
-  override def simpleString: String = {
+  override def simpleString(maxFields: Int): String = {
     s"Range ($start, $end, step=$step, splits=$numSlices)"
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 2e9f9f53e94a..21e59bbd283e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, Partitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
 
@@ -433,17 +434,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   private lazy val allChildren: Set[TreeNode[_]] = (children ++ innerChildren).toSet[TreeNode[_]]
 
   /** Returns a string representing the arguments to this node, minus any children */
-  def argString: String = stringArgs.flatMap {
+  def argString(maxFields: Int): String = stringArgs.flatMap {
     case tn: TreeNode[_] if allChildren.contains(tn) => Nil
     case Some(tn: TreeNode[_]) if allChildren.contains(tn) => Nil
-    case Some(tn: TreeNode[_]) => tn.simpleString :: Nil
-    case tn: TreeNode[_] => tn.simpleString :: Nil
+    case Some(tn: TreeNode[_]) => tn.simpleString(maxFields) :: Nil
+    case tn: TreeNode[_] => tn.simpleString(maxFields) :: Nil
     case seq: Seq[Any] if seq.toSet.subsetOf(allChildren.asInstanceOf[Set[Any]]) => Nil
     case iter: Iterable[_] if iter.isEmpty => Nil
-    case seq: Seq[_] => truncatedString(seq, "[", ", ", "]") :: Nil
-    case set: Set[_] => truncatedString(set.toSeq, "{", ", ", "}") :: Nil
+    case seq: Seq[_] => truncatedString(seq, "[", ", ", "]", maxFields) :: Nil
+    case set: Set[_] => truncatedString(set.toSeq, "{", ", ", "}", maxFields) :: Nil
     case array: Array[_] if array.isEmpty => Nil
-    case array: Array[_] => truncatedString(array, "[", ", ", "]") :: Nil
+    case array: Array[_] => truncatedString(array, "[", ", ", "]", maxFields) :: Nil
     case null => Nil
     case None => Nil
     case Some(null) => Nil
@@ -456,24 +457,33 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     case other => other :: Nil
   }.mkString(", ")
 
-  /** ONE line description of this node. */
-  def simpleString: String = s"$nodeName $argString".trim
+  /**
+   * ONE line description of this node.
+   * @param maxFields Maximum number of fields that will be converted to strings.
+   *                  Any elements beyond the limit will be dropped.
+   */
+  def simpleString(maxFields: Int): String = {
+    s"$nodeName ${argString(maxFields)}".trim
+  }
 
   /** ONE line description of this node with more information */
-  def verboseString: String
+  def verboseString(maxFields: Int): String
 
   /** ONE line description of this node with some suffix information */
-  def verboseStringWithSuffix: String = verboseString
+  def verboseStringWithSuffix(maxFields: Int): String = verboseString(maxFields)
 
   override def toString: String = treeString
 
   /** Returns a string representation of the nodes in this tree */
   def treeString: String = treeString(verbose = true)
 
-  def treeString(verbose: Boolean, addSuffix: Boolean = false): String = {
+  def treeString(
+      verbose: Boolean,
+      addSuffix: Boolean = false,
+      maxFields: Int = SQLConf.get.maxToStringFields): String = {
     val writer = new StringBuilderWriter()
     try {
-      treeString(writer, verbose, addSuffix)
+      treeString(writer, verbose, addSuffix, maxFields)
       writer.toString
     } finally {
       writer.close()
@@ -483,8 +493,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   def treeString(
       writer: Writer,
       verbose: Boolean,
-      addSuffix: Boolean): Unit = {
-    generateTreeString(0, Nil, writer, verbose, "", addSuffix)
+      addSuffix: Boolean,
+      maxFields: Int): Unit = {
+    generateTreeString(0, Nil, writer, verbose, "", addSuffix, maxFields)
   }
 
   /**
@@ -550,7 +561,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       writer: Writer,
       verbose: Boolean,
       prefix: String = "",
-      addSuffix: Boolean = false): Unit = {
+      addSuffix: Boolean = false,
+      maxFields: Int): Unit = {
 
     if (depth > 0) {
       lastChildren.init.foreach { isLast =>
@@ -560,9 +572,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     }
 
     val str = if (verbose) {
-      if (addSuffix) verboseStringWithSuffix else verboseString
+      if (addSuffix) verboseStringWithSuffix(maxFields) else verboseString(maxFields)
     } else {
-      simpleString
+      simpleString(maxFields)
     }
     writer.write(prefix)
     writer.write(str)
@@ -571,17 +583,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     if (innerChildren.nonEmpty) {
       innerChildren.init.foreach(_.generateTreeString(
         depth + 2, lastChildren :+ children.isEmpty :+ false, writer, verbose,
-        addSuffix = addSuffix))
+        addSuffix = addSuffix, maxFields = maxFields))
       innerChildren.last.generateTreeString(
         depth + 2, lastChildren :+ children.isEmpty :+ true, writer, verbose,
-        addSuffix = addSuffix)
+        addSuffix = addSuffix, maxFields = maxFields)
     }
 
     if (children.nonEmpty) {
       children.init.foreach(_.generateTreeString(
-        depth + 1, lastChildren :+ false, writer, verbose, prefix, addSuffix))
+        depth + 1, lastChildren :+ false, writer, verbose, prefix, addSuffix, maxFields))
       children.last.generateTreeString(
-        depth + 1, lastChildren :+ true, writer, verbose, prefix, addSuffix)
+        depth + 1, lastChildren :+ true, writer, verbose, prefix, addSuffix, maxFields)
     }
   }
 
@@ -664,7 +676,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       t.forall(_.isInstanceOf[Partitioning]) || t.forall(_.isInstanceOf[DataType]) =>
       JArray(t.map(parseToJson).toList)
     case t: Seq[_] if t.length > 0 && t.head.isInstanceOf[String] =>
-      JString(truncatedString(t, "[", ", ", "]"))
+      JString(truncatedString(t, "[", ", ", "]", SQLConf.get.maxToStringFields))
     case t: Seq[_] => JNull
     case m: Map[_, _] => JNull
     // if it's a scala object, we can simply keep the full class path.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index 277584b20dcd..7f5860e12cfd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -184,14 +184,14 @@ package object util extends Logging {
       start: String,
       sep: String,
       end: String,
-      maxNumFields: Int = SQLConf.get.maxToStringFields): String = {
-    if (seq.length > maxNumFields) {
+      maxFields: Int): String = {
+    if (seq.length > maxFields) {
       if (truncationWarningPrinted.compareAndSet(false, true)) {
         logWarning(
           "Truncated the string representation of a plan since it was too large. This " +
             s"behavior can be adjusted by setting '${SQLConf.MAX_TO_STRING_FIELDS.key}'.")
       }
-      val numFields = math.max(0, maxNumFields - 1)
+      val numFields = math.max(0, maxFields - 1)
       seq.take(numFields).mkString(
         start, sep, sep + "... " + (seq.length - numFields) + " more fields" + end)
     } else {
@@ -200,7 +200,9 @@ package object util extends Logging {
   }
 
   /** Shorthand for calling truncatedString() without start or end strings. */
-  def truncatedString[T](seq: Seq[T], sep: String): String = truncatedString(seq, "", sep, "")
+  def truncatedString[T](seq: Seq[T], sep: String, maxFields: Int): String = {
+    truncatedString(seq, "", sep, "", maxFields)
+  }
 
   /* FIX ME
   implicit class debugLogging(a: Any) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index e01d7c59cac5..d563276a5711 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -28,6 +28,7 @@ import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser}
 import org.apache.spark.sql.catalyst.util.{quoteIdentifier, truncatedString}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A [[StructType]] object can be constructed by
@@ -343,7 +344,10 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
 
   override def simpleString: String = {
     val fieldTypes = fields.view.map(field => s"${field.name}:${field.dataType.simpleString}")
-    truncatedString(fieldTypes, "struct<", ",", ">")
+    truncatedString(
+      fieldTypes,
+      "struct<", ",", ">",
+      SQLConf.get.maxToStringFields)
   }
 
   override def catalogString: String = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
index 63c7b4297802..0e0c8e167a0a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
@@ -215,7 +215,7 @@ class PercentileSuite extends SparkFunSuite {
       val percentile2 = new Percentile(child, percentage)
       assertEqual(percentile2.checkInputDataTypes(),
         TypeCheckFailure(s"Percentage(s) must be between 0.0 and 1.0, " +
-        s"but got ${percentage.simpleString}"))
+        s"but got ${percentage.simpleString(100)}"))
     }
 
     val nonFoldablePercentage = Seq(NonFoldableLiteral(0.5),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/UtilSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/UtilSuite.scala
index 9c162026942f..d95de71e897a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/UtilSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/UtilSuite.scala
@@ -26,6 +26,6 @@ class UtilSuite extends SparkFunSuite {
     assert(truncatedString(Seq(1, 2), "[", ", ", "]", 2) == "[1, 2]")
     assert(truncatedString(Seq(1, 2, 3), "[", ", ", "]", 2) == "[1, ... 2 more fields]")
     assert(truncatedString(Seq(1, 2, 3), "[", ", ", "]", -5) == "[, ... 3 more fields]")
-    assert(truncatedString(Seq(1, 2, 3), ", ") == "1, 2, 3")
+    assert(truncatedString(Seq(1, 2, 3), ", ", 10) == "1, 2, 3")
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 322ffffca564..1d7dd73706c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -52,19 +52,19 @@ trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
   // Metadata that describes more details of this scan.
   protected def metadata: Map[String, String]
 
-  override def simpleString: String = {
+  override def simpleString(maxFields: Int): String = {
     val metadataEntries = metadata.toSeq.sorted.map {
       case (key, value) =>
         key + ": " + StringUtils.abbreviate(redact(value), 100)
     }
-    val metadataStr = truncatedString(metadataEntries, " ", ", ", "")
-    s"$nodeNamePrefix$nodeName${truncatedString(output, "[", ",", "]")}$metadataStr"
+    val metadataStr = truncatedString(metadataEntries, " ", ", ", "", maxFields)
+    s"$nodeNamePrefix$nodeName${truncatedString(output, "[", ",", "]", maxFields)}$metadataStr"
   }
 
-  override def verboseString: String = redact(super.verboseString)
+  override def verboseString(maxFields: Int): String = redact(super.verboseString(maxFields))
 
-  override def treeString(verbose: Boolean, addSuffix: Boolean): String = {
-    redact(super.treeString(verbose, addSuffix))
+  override def treeString(verbose: Boolean, addSuffix: Boolean, maxFields: Int): String = {
+    redact(super.treeString(verbose, addSuffix, maxFields))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 49fb288fdea6..981ecae80a72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -79,7 +79,7 @@ case class ExternalRDDScanExec[T](
     }
   }
 
-  override def simpleString: String = {
+  override def simpleString(maxFields: Int): String = {
     s"$nodeName${output.mkString("[", ",", "]")}"
   }
 }
@@ -156,8 +156,8 @@ case class RDDScanExec(
     }
   }
 
-  override def simpleString: String = {
-    s"$nodeName${truncatedString(output, "[", ",", "]")}"
+  override def simpleString(maxFields: Int): String = {
+    s"$nodeName${truncatedString(output, "[", ",", "]", maxFields)}"
   }
 
   // Input can be InternalRow, has to be turned into UnsafeRows.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index eef5a3f899f5..9b8d2e830867 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExecutedCommandExec, ShowTablesCommand}
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BinaryType, DateType, DecimalType, TimestampType, _}
 import org.apache.spark.util.Utils
 
@@ -208,27 +209,27 @@ class QueryExecution(
     }
   }
 
-  private def writePlans(writer: Writer): Unit = {
+  private def writePlans(writer: Writer, maxFields: Int): Unit = {
     val (verbose, addSuffix) = (true, false)
 
     writer.write("== Parsed Logical Plan ==\n")
-    writeOrError(writer)(logical.treeString(_, verbose, addSuffix))
+    writeOrError(writer)(logical.treeString(_, verbose, addSuffix, maxFields))
     writer.write("\n== Analyzed Logical Plan ==\n")
     val analyzedOutput = stringOrError(truncatedString(
-      analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", "))
+      analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", ", maxFields))
     writer.write(analyzedOutput)
     writer.write("\n")
-    writeOrError(writer)(analyzed.treeString(_, verbose, addSuffix))
+    writeOrError(writer)(analyzed.treeString(_, verbose, addSuffix, maxFields))
     writer.write("\n== Optimized Logical Plan ==\n")
-    writeOrError(writer)(optimizedPlan.treeString(_, verbose, addSuffix))
+    writeOrError(writer)(optimizedPlan.treeString(_, verbose, addSuffix, maxFields))
     writer.write("\n== Physical Plan ==\n")
-    writeOrError(writer)(executedPlan.treeString(_, verbose, addSuffix))
+    writeOrError(writer)(executedPlan.treeString(_, verbose, addSuffix, maxFields))
   }
 
   override def toString: String = withRedaction {
     val writer = new StringBuilderWriter()
     try {
-      writePlans(writer)
+      writePlans(writer, SQLConf.get.maxToStringFields)
       writer.toString
     } finally {
       writer.close()
@@ -280,14 +281,16 @@ class QueryExecution(
 
     /**
      * Dumps debug information about query execution into the specified file.
+     *
+     * @param maxFields maximim number of fields converted to string representation.
      */
-    def toFile(path: String): Unit = {
+    def toFile(path: String, maxFields: Int = Int.MaxValue): Unit = {
       val filePath = new Path(path)
       val fs = filePath.getFileSystem(sparkSession.sessionState.newHadoopConf())
       val writer = new BufferedWriter(new OutputStreamWriter(fs.create(filePath)))
 
       try {
-        writePlans(writer)
+        writePlans(writer, maxFields)
         writer.write("\n== Whole Stage Codegen ==\n")
         org.apache.spark.sql.execution.debug.writeCodegen(writer, executedPlan)
       } finally {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
index 59ffd1638111..f554ff0aa775 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.metric.SQLMetricInfo
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * :: DeveloperApi ::
@@ -62,7 +63,10 @@ private[execution] object SparkPlanInfo {
       case fileScan: FileSourceScanExec => fileScan.metadata
       case _ => Map[String, String]()
     }
-    new SparkPlanInfo(plan.nodeName, plan.simpleString, children.map(fromSparkPlan),
+    new SparkPlanInfo(
+      plan.nodeName,
+      plan.simpleString(SQLConf.get.maxToStringFields),
+      children.map(fromSparkPlan),
       metadata, metrics)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index fbda0d87a175..f4927dedabe5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -87,7 +87,7 @@ trait CodegenSupport extends SparkPlan {
     this.parent = parent
     ctx.freshNamePrefix = variablePrefix
     s"""
-       |${ctx.registerComment(s"PRODUCE: ${this.simpleString}")}
+       |${ctx.registerComment(s"PRODUCE: ${this.simpleString(SQLConf.get.maxToStringFields)}")}
        |${doProduce(ctx)}
      """.stripMargin
   }
@@ -188,7 +188,7 @@ trait CodegenSupport extends SparkPlan {
       parent.doConsume(ctx, inputVars, rowVar)
     }
     s"""
-       |${ctx.registerComment(s"CONSUME: ${parent.simpleString}")}
+       |${ctx.registerComment(s"CONSUME: ${parent.simpleString(SQLConf.get.maxToStringFields)}")}
        |$evaluated
        |$consumeFunc
      """.stripMargin
@@ -496,8 +496,16 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with InputRDDCod
       writer: Writer,
       verbose: Boolean,
       prefix: String = "",
-      addSuffix: Boolean = false): Unit = {
-    child.generateTreeString(depth, lastChildren, writer, verbose, prefix = "", addSuffix = false)
+      addSuffix: Boolean = false,
+      maxFields: Int): Unit = {
+    child.generateTreeString(
+      depth,
+      lastChildren,
+      writer,
+      verbose,
+      prefix = "",
+      addSuffix = false,
+      maxFields)
   }
 
   override def needCopyResult: Boolean = false
@@ -772,8 +780,16 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
       writer: Writer,
       verbose: Boolean,
       prefix: String = "",
-      addSuffix: Boolean = false): Unit = {
-    child.generateTreeString(depth, lastChildren, writer, verbose, s"*($codegenStageId) ", false)
+      addSuffix: Boolean = false,
+      maxFields: Int): Unit = {
+    child.generateTreeString(
+      depth,
+      lastChildren,
+      writer,
+      verbose,
+      s"*($codegenStageId) ",
+      false,
+      maxFields)
   }
 
   override def needStopCheck: Boolean = true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 4827f838fc51..2355d305c38e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -922,18 +922,18 @@ case class HashAggregateExec(
      """
   }
 
-  override def verboseString: String = toString(verbose = true)
+  override def verboseString(maxFields: Int): String = toString(verbose = true, maxFields)
 
-  override def simpleString: String = toString(verbose = false)
+  override def simpleString(maxFields: Int): String = toString(verbose = false, maxFields)
 
-  private def toString(verbose: Boolean): String = {
+  private def toString(verbose: Boolean, maxFields: Int): String = {
     val allAggregateExpressions = aggregateExpressions
 
     testFallbackStartsAt match {
       case None =>
-        val keyString = truncatedString(groupingExpressions, "[", ", ", "]")
-        val functionString = truncatedString(allAggregateExpressions, "[", ", ", "]")
-        val outputString = truncatedString(output, "[", ", ", "]")
+        val keyString = truncatedString(groupingExpressions, "[", ", ", "]", maxFields)
+        val functionString = truncatedString(allAggregateExpressions, "[", ", ", "]", maxFields)
+        val outputString = truncatedString(output, "[", ", ", "]", maxFields)
         if (verbose) {
           s"HashAggregate(keys=$keyString, functions=$functionString, output=$outputString)"
         } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
index 7145bb03028d..bd52c6321647 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
@@ -137,15 +137,15 @@ case class ObjectHashAggregateExec(
     }
   }
 
-  override def verboseString: String = toString(verbose = true)
+  override def verboseString(maxFields: Int): String = toString(verbose = true, maxFields)
 
-  override def simpleString: String = toString(verbose = false)
+  override def simpleString(maxFields: Int): String = toString(verbose = false, maxFields)
 
-  private def toString(verbose: Boolean): String = {
+  private def toString(verbose: Boolean, maxFields: Int): String = {
     val allAggregateExpressions = aggregateExpressions
-    val keyString = truncatedString(groupingExpressions, "[", ", ", "]")
-    val functionString = truncatedString(allAggregateExpressions, "[", ", ", "]")
-    val outputString = truncatedString(output, "[", ", ", "]")
+    val keyString = truncatedString(groupingExpressions, "[", ", ", "]", maxFields)
+    val functionString = truncatedString(allAggregateExpressions, "[", ", ", "]", maxFields)
+    val outputString = truncatedString(output, "[", ", ", "]", maxFields)
     if (verbose) {
       s"ObjectHashAggregate(keys=$keyString, functions=$functionString, output=$outputString)"
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index d732b905dcdd..7ab6ecc08a7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -107,16 +107,16 @@ case class SortAggregateExec(
     }
   }
 
-  override def simpleString: String = toString(verbose = false)
+  override def simpleString(maxFields: Int): String = toString(verbose = false, maxFields)
 
-  override def verboseString: String = toString(verbose = true)
+  override def verboseString(maxFields: Int): String = toString(verbose = true, maxFields)
 
-  private def toString(verbose: Boolean): String = {
+  private def toString(verbose: Boolean, maxFields: Int): String = {
     val allAggregateExpressions = aggregateExpressions
 
-    val keyString = truncatedString(groupingExpressions, "[", ", ", "]")
-    val functionString = truncatedString(allAggregateExpressions, "[", ", ", "]")
-    val outputString = truncatedString(output, "[", ", ", "]")
+    val keyString = truncatedString(groupingExpressions, "[", ", ", "]", maxFields)
+    val functionString = truncatedString(allAggregateExpressions, "[", ", ", "]", maxFields)
+    val outputString = truncatedString(output, "[", ", ", "]", maxFields)
     if (verbose) {
       s"SortAggregate(key=$keyString, functions=$functionString, output=$outputString)"
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 09effe087e19..2570b36b3166 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -586,7 +586,9 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       }
   }
 
-  override def simpleString: String = s"Range ($start, $end, step=$step, splits=$numSlices)"
+  override def simpleString(maxFields: Int): String = {
+    s"Range ($start, $end, step=$step, splits=$numSlices)"
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 73eb65f84489..4109d9994dd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -209,6 +209,6 @@ case class InMemoryRelation(
 
   override protected def otherCopyArgs: Seq[AnyRef] = Seq(statsOfPlanToCache)
 
-  override def simpleString: String =
-    s"InMemoryRelation [${truncatedString(output, ", ")}], ${cacheBuilder.storageLevel}"
+  override def simpleString(maxFields: Int): String =
+    s"InMemoryRelation [${truncatedString(output, ", ", maxFields)}], ${cacheBuilder.storageLevel}"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 1023572d19e2..db3604fe92cc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -63,7 +63,9 @@ case class LogicalRelation(
     case _ =>  // Do nothing.
   }
 
-  override def simpleString: String = s"Relation[${truncatedString(output, ",")}] $relation"
+  override def simpleString(maxFields: Int): String = {
+    s"Relation[${truncatedString(output, ",", maxFields)}] $relation"
+  }
 }
 
 object LogicalRelation {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
index 00b1b5dedb59..f29e7869fb27 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
@@ -48,7 +48,7 @@ case class SaveIntoDataSourceCommand(
     Seq.empty[Row]
   }
 
-  override def simpleString: String = {
+  override def simpleString(maxFields: Int): String = {
     val redacted = SQLConf.get.redactOptions(options)
     s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}"
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index fdc5e85f3c2e..042320edea4f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -68,7 +68,7 @@ case class CreateTempViewUsing(
       s"Temporary view '$tableIdent' should not have specified a database")
   }
 
-  override def argString: String = {
+  override def argString(maxFields: Int): String = {
     s"[tableIdent:$tableIdent " +
       userSpecifiedSchema.map(_ + " ").getOrElse("") +
       s"replace:$replace " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 0a6b0afe6cfe..7bf2b8bff373 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -52,8 +52,8 @@ case class DataSourceV2Relation(
 
   override def name: String = table.name()
 
-  override def simpleString: String = {
-    s"RelationV2${truncatedString(output, "[", ", ", "]")} $name"
+  override def simpleString(maxFields: Int): String = {
+    s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
   }
 
   def newWriteSupport(): BatchWriteSupport = source.createWriteSupport(options, schema)
@@ -96,7 +96,9 @@ case class StreamingDataSourceV2Relation(
 
   override def isStreaming: Boolean = true
 
-  override def simpleString: String = "Streaming RelationV2 " + metadataString
+  override def simpleString(maxFields: Int): String = {
+    "Streaming RelationV2 " + metadataString(maxFields)
+  }
 
   override def pushedFilters: Seq[Expression] = Nil
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
index 725bcc3af3ca..53e4e77c65e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
@@ -35,8 +35,8 @@ case class DataSourceV2ScanExec(
     @transient batch: Batch)
   extends LeafExecNode with ColumnarBatchScan {
 
-  override def simpleString: String = {
-    s"ScanV2${truncatedString(output, "[", ", ", "]")} $scanDesc"
+  override def simpleString(maxFields: Int): String = {
+    s"ScanV2${truncatedString(output, "[", ", ", "]", maxFields)} $scanDesc"
   }
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala
index c87294090996..be75fe4f596d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala
@@ -42,7 +42,7 @@ case class DataSourceV2StreamingScanExec(
     @transient scanConfig: ScanConfig)
   extends LeafExecNode with DataSourceV2StringFormat with ColumnarBatchScan {
 
-  override def simpleString: String = "ScanV2 " + metadataString
+  override def simpleString(maxFields: Int): String = "ScanV2 " + metadataString(maxFields)
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StringFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StringFormat.scala
index e829f621b4ea..f11703c8a277 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StringFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StringFormat.scala
@@ -59,7 +59,7 @@ trait DataSourceV2StringFormat {
     case _ => Utils.getSimpleName(source.getClass)
   }
 
-  def metadataString: String = {
+  def metadataString(maxFields: Int): String = {
     val entries = scala.collection.mutable.ArrayBuffer.empty[(String, String)]
 
     if (pushedFilters.nonEmpty) {
@@ -73,12 +73,12 @@ trait DataSourceV2StringFormat {
       }.mkString("[", ",", "]")
     }
 
-    val outputStr = truncatedString(output, "[", ", ", "]")
+    val outputStr = truncatedString(output, "[", ", ", "]", maxFields)
 
     val entriesStr = if (entries.nonEmpty) {
       truncatedString(entries.map {
         case (key, value) => key + ": " + StringUtils.abbreviate(value, 100)
-      }, " (", ", ", ")")
+      }, " (", ", ", ")", maxFields)
     } else {
       ""
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 3511cefa7c29..ae8197f617a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, Codegen
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingQueryWrapper}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamingQuery
 import org.apache.spark.util.{AccumulatorV2, LongAccumulator}
 
@@ -216,7 +217,7 @@ package object debug {
     val columnStats: Array[ColumnMetrics] = Array.fill(child.output.size)(new ColumnMetrics())
 
     def dumpStats(): Unit = {
-      debugPrint(s"== ${child.simpleString} ==")
+      debugPrint(s"== ${child.simpleString(SQLConf.get.maxToStringFields)} ==")
       debugPrint(s"Tuples output: ${tupleCount.value}")
       child.output.zip(columnStats).foreach { case (attr, metric) =>
         // This is called on driver. All accumulator updates have a fixed value. So it's safe to use
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index bfaf080292bc..56973af8fd64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -198,9 +198,9 @@ case class TakeOrderedAndProjectExec(
 
   override def outputPartitioning: Partitioning = SinglePartition
 
-  override def simpleString: String = {
-    val orderByString = truncatedString(sortOrder, "[", ",", "]")
-    val outputString = truncatedString(output, "[", ",", "]")
+  override def simpleString(maxFields: Int): String = {
+    val orderByString = truncatedString(sortOrder, "[", ",", "]", maxFields)
+    val outputString = truncatedString(output, "[", ",", "]", maxFields)
 
     s"TakeOrderedAndProject(limit=$limit, orderBy=$orderByString, output=$outputString)"
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 8ad436a4ff57..38ecb0dd12da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2, WriteToDataSourceV2Exec}
 import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWritSupport, RateControlMicroBatchReadSupport}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset => OffsetV2}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
@@ -482,9 +483,10 @@ class MicroBatchExecution(
     val newBatchesPlan = logicalPlan transform {
       case StreamingExecutionRelation(source, output) =>
         newData.get(source).map { dataPlan =>
+          val maxFields = SQLConf.get.maxToStringFields
           assert(output.size == dataPlan.output.size,
-            s"Invalid batch: ${truncatedString(output, ",")} != " +
-              s"${truncatedString(dataPlan.output, ",")}")
+            s"Invalid batch: ${truncatedString(output, ",", maxFields)} != " +
+              s"${truncatedString(dataPlan.output, ",", maxFields)}")
 
           val aliases = output.zip(dataPlan.output).map { case (to, from) =>
             Alias(from, to.name)(exprId = to.exprId, explicitMetadata = Some(from.metadata))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index f0859aaaa304..89033b70f143 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2StreamingScanExec, StreamingDataSourceV2Relation}
 import org.apache.spark.sql.execution.streaming.{ContinuousExecutionRelation, StreamingRelationV2, _}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2
 import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions, StreamingWriteSupportProvider}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, PartitionOffset}
@@ -166,10 +167,10 @@ class ContinuousExecution(
         val readSupport = continuousSources(insertedSourceId)
         insertedSourceId += 1
         val newOutput = readSupport.fullSchema().toAttributes
-
+        val maxFields = SQLConf.get.maxToStringFields
         assert(output.size == newOutput.size,
-          s"Invalid reader: ${truncatedString(output, ",")} != " +
-            s"${truncatedString(newOutput, ",")}")
+          s"Invalid reader: ${truncatedString(output, ",", maxFields)} != " +
+            s"${truncatedString(newOutput, ",", maxFields)}")
         replacements ++= output.zip(newOutput)
 
         val loggedOffset = offsets.offsets(0)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index daee089f3871..13b75ae4a433 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Stati
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset => OffsetV2}
 import org.apache.spark.sql.streaming.OutputMode
@@ -117,7 +118,9 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
     }
   }
 
-  override def toString: String = s"MemoryStream[${truncatedString(output, ",")}]"
+  override def toString: String = {
+    s"MemoryStream[${truncatedString(output, ",", SQLConf.get.maxToStringFields)}]"
+  }
 
   override def deserializeOffset(json: String): OffsetV2 = LongOffset(json.toLong)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 310ebcdf6768..e180d2228c3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -51,7 +51,7 @@ case class ScalarSubquery(
   override def dataType: DataType = plan.schema.fields.head.dataType
   override def children: Seq[Expression] = Nil
   override def nullable: Boolean = true
-  override def toString: String = plan.simpleString
+  override def toString: String = plan.simpleString(SQLConf.get.maxToStringFields)
   override def withNewPlan(query: SubqueryExec): ScalarSubquery = copy(plan = query)
 
   override def semanticEquals(other: Expression): Boolean = other match {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 0c47a2040f17..3cc97c995702 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -106,6 +106,19 @@ class QueryExecutionSuite extends SharedSQLContext {
     }
   }
 
+  test("check maximum fields restriction") {
+    withTempDir { dir =>
+      val path = dir.getCanonicalPath + "/plans.txt"
+      val ds = spark.createDataset(Seq(QueryExecutionTestRecord(
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)))
+      ds.queryExecution.debug.toFile(path)
+      val localRelations = Source.fromFile(path).getLines().filter(_.contains("LocalRelation"))
+
+      assert(!localRelations.exists(_.contains("more fields")))
+    }
+  }
+
   test("toString() exception/error handling") {
     spark.experimental.extraStrategies = Seq(
         new SparkStrategy {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 608f21e72625..7249eacfbf9a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -83,7 +83,7 @@ trait CreateHiveTableAsSelectBase extends DataWritingCommand {
     tableDesc: CatalogTable,
     tableExists: Boolean): DataWritingCommand
 
-  override def argString: String = {
+  override def argString(maxFields: Int): String = {
     s"[Database:${tableDesc.database}, " +
     s"TableName: ${tableDesc.identifier.table}, " +
     s"InsertIntoHiveTable]"

From add287f397d41c1725464dff89d4a555ffc9db04 Mon Sep 17 00:00:00 2001
From: Kevin Yu <qyu@us.ibm.com>
Date: Thu, 27 Dec 2018 22:26:37 +0800
Subject: [PATCH 0193/1072] [SPARK-25892][SQL] Change
 AttributeReference.withMetadata's return type to AttributeReference

## What changes were proposed in this pull request?

Currently the `AttributeReference.withMetadata` method have return type `Attribute`, the rest of with methods in the `AttributeReference` return type are `AttributeReference`, as the [spark-25892](https://issues.apache.org/jira/browse/SPARK-25892?jql=project%20%3D%20SPARK%20AND%20component%20in%20(ML%2C%20PySpark%2C%20SQL)) mentioned.
This PR will change `AttributeReference.withMetadata` method's return type from `Attribute` to `AttributeReference`.
## How was this patch tested?

Run all `sql/test,` `catalyst/test` and `org.apache.spark.sql.execution.streaming.*`

Closes #22918 from kevinyu98/spark-25892.

Authored-by: Kevin Yu <qyu@us.ibm.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/expressions/namedExpressions.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 131459bf27bc..7ebb171f34ba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -311,7 +311,7 @@ case class AttributeReference(
     }
   }
 
-  override def withMetadata(newMetadata: Metadata): Attribute = {
+  override def withMetadata(newMetadata: Metadata): AttributeReference = {
     AttributeReference(name, dataType, nullable, newMetadata)(exprId, qualifier)
   }
 

From 68496c1af310aadfb1b226cb05be510252769d43 Mon Sep 17 00:00:00 2001
From: deepyaman <deepyaman.datta@utexas.edu>
Date: Fri, 28 Dec 2018 00:02:41 +0800
Subject: [PATCH 0194/1072] [SPARK-26451][SQL] Change lead/lag argument name
 from count to offset

## What changes were proposed in this pull request?

Change aligns argument name with that in Scala version and documentation.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23357 from deepyaman/patch-1.

Authored-by: deepyaman <deepyaman.datta@utexas.edu>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/functions.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index d2a771e9bb8e..3c33e2bed92d 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -798,7 +798,7 @@ def factorial(col):
 # ---------------  Window functions ------------------------
 
 @since(1.4)
-def lag(col, count=1, default=None):
+def lag(col, offset=1, default=None):
     """
     Window function: returns the value that is `offset` rows before the current row, and
     `defaultValue` if there is less than `offset` rows before the current row. For example,
@@ -807,15 +807,15 @@ def lag(col, count=1, default=None):
     This is equivalent to the LAG function in SQL.
 
     :param col: name of column or expression
-    :param count: number of row to extend
+    :param offset: number of row to extend
     :param default: default value
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.lag(_to_java_column(col), count, default))
+    return Column(sc._jvm.functions.lag(_to_java_column(col), offset, default))
 
 
 @since(1.4)
-def lead(col, count=1, default=None):
+def lead(col, offset=1, default=None):
     """
     Window function: returns the value that is `offset` rows after the current row, and
     `defaultValue` if there is less than `offset` rows after the current row. For example,
@@ -824,11 +824,11 @@ def lead(col, count=1, default=None):
     This is equivalent to the LEAD function in SQL.
 
     :param col: name of column or expression
-    :param count: number of row to extend
+    :param offset: number of row to extend
     :param default: default value
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.lead(_to_java_column(col), count, default))
+    return Column(sc._jvm.functions.lead(_to_java_column(col), offset, default))
 
 
 @since(1.4)

From f2adb610680f9addec51bf470acf64f3849073e9 Mon Sep 17 00:00:00 2001
From: wuqingxin <wuqingxin@baidu.com>
Date: Fri, 28 Dec 2018 00:15:57 -0800
Subject: [PATCH 0195/1072] [SPARK-26446][CORE] Add cachedExecutorIdleTimeout
 docs at ExecutorAllocationManager

## What changes were proposed in this pull request?

Add docs to describe how remove policy act while considering the property `spark.dynamicAllocation.cachedExecutorIdleTimeout` in ExecutorAllocationManager

## How was this patch tested?
comment-only PR.

Closes #23386 from TopGunViper/SPARK-26446.

Authored-by: wuqingxin <wuqingxin@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/ExecutorAllocationManager.scala   | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index c3e5b96a5588..3f0b71bbe17f 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -57,7 +57,8 @@ import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
  * a long time to ramp up under heavy workloads.
  *
  * The remove policy is simpler: If an executor has been idle for K seconds, meaning it has not
- * been scheduled to run any tasks, then it is removed.
+ * been scheduled to run any tasks, then it is removed. Note that an executor caching any data
+ * blocks will be removed if it has been idle for more than L seconds.
  *
  * There is no retry logic in either case because we make the assumption that the cluster manager
  * will eventually fulfill all requests it receives asynchronously.
@@ -81,7 +82,12 @@ import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
  *     This is used only after the initial backlog timeout is exceeded
  *
  *   spark.dynamicAllocation.executorIdleTimeout (K) -
- *     If an executor has been idle for this duration, remove it
+ *     If an executor without caching any data blocks has been idle for this duration, remove it
+ *
+ *   spark.dynamicAllocation.cachedExecutorIdleTimeout (L) -
+ *     If an executor with caching data blocks has been idle for more than this duration,
+ *     the executor will be removed
+ *
  */
 private[spark] class ExecutorAllocationManager(
     client: ExecutorAllocationClient,

From 5bef4fedfe1916320223b1245bacb58f151cee66 Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Fri, 28 Dec 2018 07:40:59 -0600
Subject: [PATCH 0196/1072] [SPARK-26444][WEBUI] Stage color doesn't change
 with it's status

## What changes were proposed in this pull request?
On job page, in event timeline section, stage color doesn't change according to its status. Below are some screenshots.

ACTIVE:
<img width="550" alt="active" src="https://user-images.githubusercontent.com/12194089/50438844-c763e580-092a-11e9-84f6-6fc30e08d69b.png">
COMPLETE:
<img width="516" alt="complete" src="https://user-images.githubusercontent.com/12194089/50438847-ca5ed600-092a-11e9-9d2e-5d79807bc1ce.png">
FAILED:
<img width="325" alt="failed" src="https://user-images.githubusercontent.com/12194089/50438852-ccc13000-092a-11e9-9b6b-782b96b283b1.png">

This PR lets stage color change with it's status. The main idea is to make css style class name match the corresponding stage status.

## How was this patch tested?
Manually tested locally.

```
// active/complete stage
sc.parallelize(1 to 3, 3).map { n => Thread.sleep(10* 1000); n }.count
// failed stage
sc.parallelize(1 to 3, 3).map { n => Thread.sleep(10* 1000); throw new Exception() }.count
```

Note we need to clear browser cache to let new `timeline-view.css` take effect. Below are screenshots after this PR.

ACTIVE:
<img width="569" alt="active-after" src="https://user-images.githubusercontent.com/12194089/50439986-08f68f80-092f-11e9-85d9-be1c31aed13b.png">
COMPLETE:
<img width="567" alt="complete-after" src="https://user-images.githubusercontent.com/12194089/50439990-0bf18000-092f-11e9-8624-723958906e90.png">
FAILED:
<img width="352" alt="failed-after" src="https://user-images.githubusercontent.com/12194089/50439993-101d9d80-092f-11e9-8dfd-3e20536f2fa5.png">

Closes #23385 from seancxmao/timeline-stage-color.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/ui/static/timeline-view.css          | 8 ++++----
 .../src/main/scala/org/apache/spark/ui/jobs/JobPage.scala | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
index 3bf3e8bfa1f3..10bceae2fbdd 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
@@ -98,12 +98,12 @@ rect.getting-result-time-proportion {
   cursor: pointer;
 }
 
-.vis-timeline .vis-item.stage.succeeded {
+.vis-timeline .vis-item.stage.complete {
   background-color: #A0DFFF;
   border-color: #3EC0FF;
 }
 
-.vis-timeline .vis-item.stage.succeeded.vis-selected {
+.vis-timeline .vis-item.stage.complete.vis-selected {
   background-color: #A0DFFF;
   border-color: #3EC0FF;
   z-index: auto;
@@ -130,12 +130,12 @@ rect.getting-result-time-proportion {
   stroke: #FF4D6D;
 }
 
-.vis-timeline .vis-item.stage.running {
+.vis-timeline .vis-item.stage.active {
   background-color: #A2FCC0;
   border-color: #36F572;
 }
 
-.vis-timeline .vis-item.stage.running.vis-selected {
+.vis-timeline .vis-item.stage.active.vis-selected {
   background-color: #A2FCC0;
   border-color: #36F572;
   z-index: auto;
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index b58a6ca447ed..cd82439223b0 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -62,7 +62,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
       val stageId = stage.stageId
       val attemptId = stage.attemptId
       val name = stage.name
-      val status = stage.status.toString
+      val status = stage.status.toString.toLowerCase(Locale.ROOT)
       val submissionTime = stage.submissionTime.get.getTime()
       val completionTime = stage.completionTime.map(_.getTime())
         .getOrElse(System.currentTimeMillis())

From e0054b88a1624ec5196dc206997db065731099ac Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 28 Dec 2018 11:29:06 -0800
Subject: [PATCH 0197/1072] [SPARK-26424][SQL][FOLLOWUP] Fix
 DateFormatClass/UnixTime codegen

## What changes were proposed in this pull request?

This PR fixes the codegen bug introduced by #23358 .

- https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7-ubuntu-scala-2.11/158/

```
Line 44, Column 93: A method named "apply" is not declared in any enclosing class
nor any supertype, nor through a static import
```

## How was this patch tested?

Manual. `DateExpressionsSuite` should be passed with Scala-2.11.

Closes #23394 from dongjoon-hyun/SPARK-26424.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/expressions/datetimeExpressions.scala      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 73af0a3c5c2e..8fc0112c0257 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -571,7 +571,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
     val tz = ctx.addReferenceObj("timeZone", timeZone)
     val locale = ctx.addReferenceObj("locale", Locale.US)
     defineCodeGen(ctx, ev, (timestamp, format) => {
-      s"""UTF8String.fromString($tf.apply($format.toString(), $tz, $locale)
+      s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $tz, $locale)
           .format($timestamp))"""
     })
   }
@@ -741,11 +741,11 @@ abstract class UnixTime
       case StringType =>
         val tz = ctx.addReferenceObj("timeZone", timeZone)
         val locale = ctx.addReferenceObj("locale", Locale.US)
-        val dtu = TimestampFormatter.getClass.getName.stripSuffix("$")
+        val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
         nullSafeCodeGen(ctx, ev, (string, format) => {
           s"""
             try {
-              ${ev.value} = $dtu.apply($format.toString(), $tz, $locale)
+              ${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $tz, $locale)
                 .parse($string.toString()) / 1000000L;
             } catch (java.lang.IllegalArgumentException e) {
               ${ev.isNull} = true;

From e63243df8aca9f44255879e931e0c372beef9fc2 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Sat, 29 Dec 2018 12:11:45 -0800
Subject: [PATCH 0198/1072] [SPARK-26496][SS][TEST] Avoid to use
 Random.nextString in StreamingInnerJoinSuite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Similar with https://github.com/apache/spark/pull/21446. Looks random string is not quite safe as a directory name.

```scala
scala> val prefix = Random.nextString(10); val dir = new File("/tmp", "del_" + prefix + "-" + UUID.randomUUID.toString); dir.mkdirs()
prefix: String = 窽텘⒘駖ⵚ駢⡞Ρ닋੎
dir: java.io.File = /tmp/del_窽텘⒘駖ⵚ駢⡞Ρ닋੎-a3f99855-c429-47a0-a108-47bca6905745
res40: Boolean = false  // nope, didn't like this one
```

## How was this patch tested?

Unit test was added, and manually.

Closes #23405 from HyukjinKwon/SPARK-26496.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/streaming/StreamingJoinSuite.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index c5cc8df4356a..42fe9f34ee3e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -350,7 +350,7 @@ class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with
     withTempDir { tempDir =>
       val queryId = UUID.randomUUID
       val opId = 0
-      val path = Utils.createDirectory(tempDir.getAbsolutePath, Random.nextString(10)).toString
+      val path = Utils.createDirectory(tempDir.getAbsolutePath, Random.nextFloat.toString).toString
       val stateInfo = StatefulOperatorStateInfo(path, queryId, opId, 0L, 5)
 
       implicit val sqlContext = spark.sqlContext

From e6d3e7d0d8c80adaa51b43d76f1cc83bb9a010b9 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Sat, 29 Dec 2018 17:33:43 -0800
Subject: [PATCH 0199/1072] [SPARK-26443][CORE] Use ConfigEntry for hardcoded
 configs for history category.

## What changes were proposed in this pull request?

This pr makes hardcoded "spark.history" configs to use `ConfigEntry` and put them in `History` config object.

## How was this patch tested?

Existing tests.

Closes #23384 from ueshin/issues/SPARK-26443/hardcoded_history_configs.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/SparkConf.scala    |  4 +-
 .../deploy/history/FsHistoryProvider.scala    | 21 ++++-----
 .../spark/deploy/history/HistoryServer.scala  | 16 ++++---
 .../history/HistoryServerArguments.scala      |  2 +-
 .../spark/internal/config/History.scala       | 46 ++++++++++++++++++-
 .../org/apache/spark/SparkConfSuite.scala     |  2 +-
 .../history/FsHistoryProviderSuite.scala      | 23 +++++-----
 .../history/HistoryServerArgumentsSuite.scala |  9 ++--
 .../deploy/history/HistoryServerSuite.scala   | 13 +++---
 9 files changed, 89 insertions(+), 47 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 8d135d3e083d..0b47da12b5b4 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -682,11 +682,11 @@ private[spark] object SparkConf extends Logging {
   private val configsWithAlternatives = Map[String, Seq[AlternateConfig]](
     "spark.executor.userClassPathFirst" -> Seq(
       AlternateConfig("spark.files.userClassPathFirst", "1.3")),
-    "spark.history.fs.update.interval" -> Seq(
+    UPDATE_INTERVAL_S.key -> Seq(
       AlternateConfig("spark.history.fs.update.interval.seconds", "1.4"),
       AlternateConfig("spark.history.fs.updateInterval", "1.3"),
       AlternateConfig("spark.history.updateInterval", "1.3")),
-    "spark.history.fs.cleaner.interval" -> Seq(
+    CLEANER_INTERVAL_S.key -> Seq(
       AlternateConfig("spark.history.fs.cleaner.interval.seconds", "1.4")),
     MAX_LOG_AGE_S.key -> Seq(
       AlternateConfig("spark.history.fs.cleaner.maxAge.seconds", "1.4")),
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index da6e5f03aabb..709a380dfb63 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -42,7 +42,7 @@ import org.fusesource.leveldbjni.internal.NativeDB
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.DRIVER_LOG_DFS_DIR
+import org.apache.spark.internal.config.{DRIVER_LOG_DFS_DIR, History}
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.io.CompressionCodec
@@ -91,24 +91,22 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   import FsHistoryProvider._
 
   // Interval between safemode checks.
-  private val SAFEMODE_CHECK_INTERVAL_S = conf.getTimeAsSeconds(
-    "spark.history.fs.safemodeCheck.interval", "5s")
+  private val SAFEMODE_CHECK_INTERVAL_S = conf.get(History.SAFEMODE_CHECK_INTERVAL_S)
 
   // Interval between each check for event log updates
-  private val UPDATE_INTERVAL_S = conf.getTimeAsSeconds("spark.history.fs.update.interval", "10s")
+  private val UPDATE_INTERVAL_S = conf.get(History.UPDATE_INTERVAL_S)
 
   // Interval between each cleaner checks for event logs to delete
-  private val CLEAN_INTERVAL_S = conf.get(CLEANER_INTERVAL_S)
+  private val CLEAN_INTERVAL_S = conf.get(History.CLEANER_INTERVAL_S)
 
   // Number of threads used to replay event logs.
-  private val NUM_PROCESSING_THREADS = conf.getInt(SPARK_HISTORY_FS_NUM_REPLAY_THREADS,
-    Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt)
+  private val NUM_PROCESSING_THREADS = conf.get(History.NUM_REPLAY_THREADS)
 
-  private val logDir = conf.get(EVENT_LOG_DIR)
+  private val logDir = conf.get(History.HISTORY_LOG_DIR)
 
-  private val HISTORY_UI_ACLS_ENABLE = conf.getBoolean("spark.history.ui.acls.enable", false)
-  private val HISTORY_UI_ADMIN_ACLS = conf.get("spark.history.ui.admin.acls", "")
-  private val HISTORY_UI_ADMIN_ACLS_GROUPS = conf.get("spark.history.ui.admin.acls.groups", "")
+  private val HISTORY_UI_ACLS_ENABLE = conf.get(History.UI_ACLS_ENABLE)
+  private val HISTORY_UI_ADMIN_ACLS = conf.get(History.UI_ADMIN_ACLS)
+  private val HISTORY_UI_ADMIN_ACLS_GROUPS = conf.get(History.UI_ADMIN_ACLS_GROUPS)
   logInfo(s"History server ui acls " + (if (HISTORY_UI_ACLS_ENABLE) "enabled" else "disabled") +
     "; users with admin permissions: " + HISTORY_UI_ADMIN_ACLS.toString +
     "; groups with admin permissions" + HISTORY_UI_ADMIN_ACLS_GROUPS.toString)
@@ -1089,7 +1087,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 }
 
 private[history] object FsHistoryProvider {
-  private val SPARK_HISTORY_FS_NUM_REPLAY_THREADS = "spark.history.fs.numReplayThreads"
 
   private val APPL_START_EVENT_PREFIX = "{\"Event\":\"SparkListenerApplicationStart\""
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 5856c7057b74..b9303388638f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -30,7 +30,7 @@ import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.internal.config.History.HISTORY_SERVER_UI_PORT
+import org.apache.spark.internal.config.History
 import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
@@ -56,7 +56,7 @@ class HistoryServer(
   with Logging with UIRoot with ApplicationCacheOperations {
 
   // How many applications to retain
-  private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50)
+  private val retainedApplications = conf.get(History.RETAINED_APPLICATIONS)
 
   // How many applications the summary ui displays
   private[history] val maxApplications = conf.get(HISTORY_UI_MAX_APPS);
@@ -273,14 +273,14 @@ object HistoryServer extends Logging {
     initSecurity()
     val securityManager = createSecurityManager(conf)
 
-    val providerName = conf.getOption("spark.history.provider")
+    val providerName = conf.get(History.PROVIDER)
       .getOrElse(classOf[FsHistoryProvider].getName())
     val provider = Utils.classForName(providerName)
       .getConstructor(classOf[SparkConf])
       .newInstance(conf)
       .asInstanceOf[ApplicationHistoryProvider]
 
-    val port = conf.get(HISTORY_SERVER_UI_PORT)
+    val port = conf.get(History.HISTORY_SERVER_UI_PORT)
 
     val server = new HistoryServer(conf, provider, securityManager, port)
     server.bind()
@@ -319,10 +319,12 @@ object HistoryServer extends Logging {
     // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
     // As long as it is using Hadoop rpc (hdfs://), a relogin will automatically
     // occur from the keytab.
-    if (conf.getBoolean("spark.history.kerberos.enabled", false)) {
+    if (conf.get(History.KERBEROS_ENABLED)) {
       // if you have enabled kerberos the following 2 params must be set
-      val principalName = conf.get("spark.history.kerberos.principal")
-      val keytabFilename = conf.get("spark.history.kerberos.keytab")
+      val principalName = conf.get(History.KERBEROS_PRINCIPAL)
+        .getOrElse(throw new NoSuchElementException(History.KERBEROS_PRINCIPAL.key))
+      val keytabFilename = conf.get(History.KERBEROS_KEYTAB)
+        .getOrElse(throw new NoSuchElementException(History.KERBEROS_KEYTAB.key))
       SparkHadoopUtil.get.loginUserFromKeytab(principalName, keytabFilename)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index 49f00cb10179..dec89769c030 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -79,7 +79,7 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin
       |
       |  spark.history.fs.logDirectory      Directory where app logs are stored
       |                                     (default: file:/tmp/spark-events)
-      |  spark.history.fs.updateInterval    How often to reload log data from storage
+      |  spark.history.fs.update.interval   How often to reload log data from storage
       |                                     (in seconds, default: 10)
       |""".stripMargin)
     // scalastyle:on println
diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
index b7d8061d26d2..f984dd385344 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/History.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -25,10 +25,18 @@ private[spark] object History {
 
   val DEFAULT_LOG_DIR = "file:/tmp/spark-events"
 
-  val EVENT_LOG_DIR = ConfigBuilder("spark.history.fs.logDirectory")
+  val HISTORY_LOG_DIR = ConfigBuilder("spark.history.fs.logDirectory")
     .stringConf
     .createWithDefault(DEFAULT_LOG_DIR)
 
+  val SAFEMODE_CHECK_INTERVAL_S = ConfigBuilder("spark.history.fs.safemodeCheck.interval")
+    .timeConf(TimeUnit.SECONDS)
+    .createWithDefaultString("5s")
+
+  val UPDATE_INTERVAL_S = ConfigBuilder("spark.history.fs.update.interval")
+    .timeConf(TimeUnit.SECONDS)
+    .createWithDefaultString("10s")
+
   val CLEANER_ENABLED = ConfigBuilder("spark.history.fs.cleaner.enabled")
     .booleanConf
     .createWithDefault(false)
@@ -79,4 +87,40 @@ private[spark] object History {
 
   val MAX_DRIVER_LOG_AGE_S = ConfigBuilder("spark.history.fs.driverlog.cleaner.maxAge")
     .fallbackConf(MAX_LOG_AGE_S)
+
+  val UI_ACLS_ENABLE = ConfigBuilder("spark.history.ui.acls.enable")
+    .booleanConf
+    .createWithDefault(false)
+
+  val UI_ADMIN_ACLS = ConfigBuilder("spark.history.ui.admin.acls")
+    .stringConf
+    .createWithDefault("")
+
+  val UI_ADMIN_ACLS_GROUPS = ConfigBuilder("spark.history.ui.admin.acls.groups")
+    .stringConf
+    .createWithDefault("")
+
+  val NUM_REPLAY_THREADS = ConfigBuilder("spark.history.fs.numReplayThreads")
+    .intConf
+    .createWithDefaultFunction(() => Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt)
+
+  val RETAINED_APPLICATIONS = ConfigBuilder("spark.history.retainedApplications")
+    .intConf
+    .createWithDefault(50)
+
+  val PROVIDER = ConfigBuilder("spark.history.provider")
+    .stringConf
+    .createOptional
+
+  val KERBEROS_ENABLED = ConfigBuilder("spark.history.kerberos.enabled")
+    .booleanConf
+    .createWithDefault(false)
+
+  val KERBEROS_PRINCIPAL = ConfigBuilder("spark.history.kerberos.principal")
+    .stringConf
+    .createOptional
+
+  val KERBEROS_KEYTAB = ConfigBuilder("spark.history.kerberos.keytab")
+    .stringConf
+    .createOptional
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 7cb03deae139..e14a5dcb5ef8 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -232,7 +232,7 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
 
   test("deprecated configs") {
     val conf = new SparkConf()
-    val newName = "spark.history.fs.update.interval"
+    val newName = UPDATE_INTERVAL_S.key
 
     assert(!conf.contains(newName))
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index c1ae27aa940f..6d2e329094ae 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -294,7 +294,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     val maxAge = TimeUnit.SECONDS.toMillis(10)
     val clock = new ManualClock(maxAge / 2)
     val provider = new FsHistoryProvider(
-      createTestConf().set("spark.history.fs.cleaner.maxAge", s"${maxAge}ms"), clock)
+      createTestConf().set(MAX_LOG_AGE_S.key, s"${maxAge}ms"), clock)
 
     val log1 = newLogFile("app1", Some("attempt1"), inProgress = false)
     writeFile(log1, true, None,
@@ -379,7 +379,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     val maxAge = TimeUnit.SECONDS.toMillis(40)
     val clock = new ManualClock(0)
     val provider = new FsHistoryProvider(
-      createTestConf().set("spark.history.fs.cleaner.maxAge", s"${maxAge}ms"), clock)
+      createTestConf().set(MAX_LOG_AGE_S.key, s"${maxAge}ms"), clock)
 
     val log1 = newLogFile("inProgressApp1", None, inProgress = true)
     writeFile(log1, true, None,
@@ -462,8 +462,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     val maxAge = TimeUnit.SECONDS.toSeconds(40)
     val clock = new ManualClock(0)
     val testConf = new SparkConf()
-    testConf.set("spark.history.fs.logDirectory",
-      Utils.createTempDir(namePrefix = "eventLog").getAbsolutePath())
+    testConf.set(HISTORY_LOG_DIR, Utils.createTempDir(namePrefix = "eventLog").getAbsolutePath())
     testConf.set(DRIVER_LOG_DFS_DIR, testDir.getAbsolutePath())
     testConf.set(DRIVER_LOG_CLEANER_ENABLED, true)
     testConf.set(DRIVER_LOG_CLEANER_INTERVAL, maxAge / 4)
@@ -645,9 +644,9 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
     // Test both history ui admin acls and application acls are configured.
     val conf1 = createTestConf()
-      .set("spark.history.ui.acls.enable", "true")
-      .set("spark.history.ui.admin.acls", "user1,user2")
-      .set("spark.history.ui.admin.acls.groups", "group1")
+      .set(UI_ACLS_ENABLE, true)
+      .set(UI_ADMIN_ACLS, "user1,user2")
+      .set(UI_ADMIN_ACLS_GROUPS, "group1")
       .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
 
     createAndCheck(conf1, ("spark.admin.acls", "user"), ("spark.admin.acls.groups", "group")) {
@@ -667,9 +666,9 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
     // Test only history ui admin acls are configured.
     val conf2 = createTestConf()
-      .set("spark.history.ui.acls.enable", "true")
-      .set("spark.history.ui.admin.acls", "user1,user2")
-      .set("spark.history.ui.admin.acls.groups", "group1")
+      .set(UI_ACLS_ENABLE, true)
+      .set(UI_ADMIN_ACLS, "user1,user2")
+      .set(UI_ADMIN_ACLS_GROUPS, "group1")
       .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
     createAndCheck(conf2) { securityManager =>
       // Test whether user has permission to access UI.
@@ -687,7 +686,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
     // Test neither history ui admin acls nor application acls are configured.
      val conf3 = createTestConf()
-      .set("spark.history.ui.acls.enable", "true")
+      .set(UI_ACLS_ENABLE, true)
       .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
     createAndCheck(conf3) { securityManager =>
       // Test whether user has permission to access UI.
@@ -1036,7 +1035,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
   private def createTestConf(inMemory: Boolean = false): SparkConf = {
     val conf = new SparkConf()
-      .set(EVENT_LOG_DIR, testDir.getAbsolutePath())
+      .set(HISTORY_LOG_DIR, testDir.getAbsolutePath())
       .set(FAST_IN_PROGRESS_PARSING, true)
 
     if (!inMemory) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
index e89733a144cf..6b479873f69f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
@@ -22,21 +22,22 @@ import java.nio.charset.StandardCharsets._
 import com.google.common.io.Files
 
 import org.apache.spark._
+import org.apache.spark.internal.config.History._
 import org.apache.spark.util.Utils
 
 class HistoryServerArgumentsSuite extends SparkFunSuite {
 
   private val logDir = new File("src/test/resources/spark-events")
   private val conf = new SparkConf()
-    .set("spark.history.fs.logDirectory", logDir.getAbsolutePath)
-    .set("spark.history.fs.updateInterval", "1")
+    .set(HISTORY_LOG_DIR, logDir.getAbsolutePath)
+    .set(UPDATE_INTERVAL_S, 1L)
     .set("spark.testing", "true")
 
   test("No Arguments Parsing") {
     val argStrings = Array.empty[String]
     val hsa = new HistoryServerArguments(conf, argStrings)
-    assert(conf.get("spark.history.fs.logDirectory") === logDir.getAbsolutePath)
-    assert(conf.get("spark.history.fs.updateInterval") === "1")
+    assert(conf.get(HISTORY_LOG_DIR) === logDir.getAbsolutePath)
+    assert(conf.get(UPDATE_INTERVAL_S) === 1L)
     assert(conf.get("spark.testing") === "true")
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 2a2d013bacbd..a9dee67ae938 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -78,8 +78,8 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     Utils.deleteRecursively(storeDir)
     assert(storeDir.mkdir())
     val conf = new SparkConf()
-      .set("spark.history.fs.logDirectory", logDir)
-      .set("spark.history.fs.update.interval", "0")
+      .set(HISTORY_LOG_DIR, logDir)
+      .set(UPDATE_INTERVAL_S.key, "0")
       .set("spark.testing", "true")
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .set("spark.eventLog.logStageExecutorMetrics.enabled", "true")
@@ -416,11 +416,10 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     // allowed refresh rate (1Hz)
     stop()
     val myConf = new SparkConf()
-      .set("spark.history.fs.logDirectory", logDir.getAbsolutePath)
+      .set(HISTORY_LOG_DIR, logDir.getAbsolutePath)
       .set("spark.eventLog.dir", logDir.getAbsolutePath)
-      .set("spark.history.fs.update.interval", "1s")
+      .set(UPDATE_INTERVAL_S.key, "1s")
       .set("spark.eventLog.enabled", "true")
-      .set("spark.history.cache.window", "250ms")
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .remove("spark.testing")
     val provider = new FsHistoryProvider(myConf)
@@ -613,8 +612,8 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     stop()
     init(
       "spark.ui.filters" -> classOf[FakeAuthFilter].getName(),
-      "spark.history.ui.acls.enable" -> "true",
-      "spark.history.ui.admin.acls" -> admin)
+      UI_ACLS_ENABLE.key -> "true",
+      UI_ADMIN_ACLS.key -> admin)
 
     val tests = Seq(
       (owner, HttpServletResponse.SC_OK),

From 240817b7aea14d12f7764e17ab11073d14e8e6aa Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Sat, 29 Dec 2018 21:47:49 -0600
Subject: [PATCH 0200/1072] [SPARK-26363][WEBUI] Avoid duplicated KV store
 lookups in method `taskList`

## What changes were proposed in this pull request?

In the method `taskList`(since https://github.com/apache/spark/pull/21688),  the executor log value is queried in KV store  for every task(method `constructTaskData`).
This PR propose to use a hashmap for reducing duplicated KV store lookups in the method.

![image](https://user-images.githubusercontent.com/1097932/49946230-841c7680-ff29-11e8-8b83-d8f7553bfe5e.png)

## How was this patch tested?

Manual check

Closes #23310 from gengliangwang/removeExecutorLog.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/status/AppStatusStore.scala  | 52 ++++++++++---------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index 312bcccb1cca..0487f2f07c09 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -20,6 +20,7 @@ package org.apache.spark.status
 import java.util.{List => JList}
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.HashMap
 
 import org.apache.spark.{JobExecutionStatus, SparkConf}
 import org.apache.spark.status.api.v1
@@ -386,10 +387,9 @@ private[spark] class AppStatusStore(
 
   def taskList(stageId: Int, stageAttemptId: Int, maxTasks: Int): Seq[v1.TaskData] = {
     val stageKey = Array(stageId, stageAttemptId)
-    store.view(classOf[TaskDataWrapper]).index("stage").first(stageKey).last(stageKey).reverse()
-      .max(maxTasks).asScala.map { taskDataWrapper =>
-      constructTaskData(taskDataWrapper)
-    }.toSeq.reverse
+    val taskDataWrapperIter = store.view(classOf[TaskDataWrapper]).index("stage")
+      .first(stageKey).last(stageKey).reverse().max(maxTasks).asScala
+    constructTaskDataList(taskDataWrapperIter).reverse
   }
 
   def taskList(
@@ -428,9 +428,8 @@ private[spark] class AppStatusStore(
     }
 
     val ordered = if (ascending) indexed else indexed.reverse()
-    ordered.skip(offset).max(length).asScala.map { taskDataWrapper =>
-      constructTaskData(taskDataWrapper)
-    }.toSeq
+    val taskDataWrapperIter = ordered.skip(offset).max(length).asScala
+    constructTaskDataList(taskDataWrapperIter)
   }
 
   def executorSummary(stageId: Int, attemptId: Int): Map[String, v1.ExecutorStageSummary] = {
@@ -536,24 +535,29 @@ private[spark] class AppStatusStore(
     store.close()
   }
 
-  def constructTaskData(taskDataWrapper: TaskDataWrapper) : v1.TaskData = {
-    val taskDataOld: v1.TaskData = taskDataWrapper.toApi
-    val executorLogs: Option[Map[String, String]] = try {
-      Some(executorSummary(taskDataOld.executorId).executorLogs)
-    } catch {
-      case e: NoSuchElementException => e.getMessage
-        None
-    }
-    new v1.TaskData(taskDataOld.taskId, taskDataOld.index,
-      taskDataOld.attempt, taskDataOld.launchTime, taskDataOld.resultFetchStart,
-      taskDataOld.duration, taskDataOld.executorId, taskDataOld.host, taskDataOld.status,
-      taskDataOld.taskLocality, taskDataOld.speculative, taskDataOld.accumulatorUpdates,
-      taskDataOld.errorMessage, taskDataOld.taskMetrics,
-      executorLogs.getOrElse(Map[String, String]()),
-      AppStatusUtils.schedulerDelay(taskDataOld),
-      AppStatusUtils.gettingResultTime(taskDataOld))
+  def constructTaskDataList(taskDataWrapperIter: Iterable[TaskDataWrapper]): Seq[v1.TaskData] = {
+    val executorIdToLogs = new HashMap[String, Map[String, String]]()
+    taskDataWrapperIter.map { taskDataWrapper =>
+      val taskDataOld: v1.TaskData = taskDataWrapper.toApi
+      val executorLogs = executorIdToLogs.getOrElseUpdate(taskDataOld.executorId, {
+        try {
+          executorSummary(taskDataOld.executorId).executorLogs
+        } catch {
+          case e: NoSuchElementException =>
+            Map.empty
+        }
+      })
+
+      new v1.TaskData(taskDataOld.taskId, taskDataOld.index,
+        taskDataOld.attempt, taskDataOld.launchTime, taskDataOld.resultFetchStart,
+        taskDataOld.duration, taskDataOld.executorId, taskDataOld.host, taskDataOld.status,
+        taskDataOld.taskLocality, taskDataOld.speculative, taskDataOld.accumulatorUpdates,
+        taskDataOld.errorMessage, taskDataOld.taskMetrics,
+        executorLogs,
+        AppStatusUtils.schedulerDelay(taskDataOld),
+        AppStatusUtils.gettingResultTime(taskDataOld))
+    }.toSeq
   }
-
 }
 
 private[spark] object AppStatusStore {

From 0996b7c95a79fb018169ed1da7a8e3e482260838 Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Mon, 31 Dec 2018 08:24:18 -0600
Subject: [PATCH 0201/1072] [SPARK-23375][SQL][FOLLOWUP][TEST] Test Sort
 metrics while Sort is missing

## What changes were proposed in this pull request?
#20560/[SPARK-23375](https://issues.apache.org/jira/browse/SPARK-23375) introduced an optimizer rule to eliminate redundant Sort. For a test case named "Sort metrics" in `SQLMetricsSuite`, because range is already sorted, sort is removed by the `RemoveRedundantSorts`, which makes this test case meaningless.

This PR modifies the query for testing Sort metrics and checks Sort exists in the plan.

## How was this patch tested?
Modify the existing test case.

Closes #23258 from seancxmao/sort-metrics.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../execution/metric/SQLMetricsSuite.scala    | 18 ++++++--
 .../metric/SQLMetricsTestUtils.scala          | 43 ++++++++++++++++---
 2 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 47265df4831d..7368a6c9e1d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -194,10 +194,20 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
   }
 
   test("Sort metrics") {
-    // Assume the execution plan is
-    // WholeStageCodegen(nodeId = 0, Range(nodeId = 2) -> Sort(nodeId = 1))
-    val ds = spark.range(10).sort('id)
-    testSparkPlanMetrics(ds.toDF(), 2, Map.empty)
+    // Assume the execution plan with node id is
+    // Sort(nodeId = 0)
+    //   Exchange(nodeId = 1)
+    //     Project(nodeId = 2)
+    //       LocalTableScan(nodeId = 3)
+    // Because of SPARK-25267, ConvertToLocalRelation is disabled in the test cases of sql/core,
+    // so Project here is not collapsed into LocalTableScan.
+    val df = Seq(1, 3, 2).toDF("id").sort('id)
+    testSparkPlanMetricsWithPredicates(df, 2, Map(
+      0L -> (("Sort", Map(
+        "sort time total (min, med, max)" -> {_.toString.matches(timingMetricPattern)},
+        "peak memory total (min, med, max)" -> {_.toString.matches(sizeMetricPattern)},
+        "spill size total (min, med, max)" -> {_.toString.matches(sizeMetricPattern)})))
+    ))
   }
 
   test("SortMergeJoin metrics") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index dcc540fc4f10..2d245d2ba1e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -40,6 +40,18 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
 
   protected def statusStore: SQLAppStatusStore = spark.sharedState.statusStore
 
+  // Pattern of size SQLMetric value, e.g. "\n96.2 MiB (32.1 MiB, 32.1 MiB, 32.1 MiB)"
+  protected val sizeMetricPattern = {
+    val bytes = "([0-9]+(\\.[0-9]+)?) (EiB|PiB|TiB|GiB|MiB|KiB|B)"
+    s"\\n$bytes \\($bytes, $bytes, $bytes\\)"
+  }
+
+  // Pattern of timing SQLMetric value, e.g. "\n2.0 ms (1.0 ms, 1.0 ms, 1.0 ms)"
+  protected val timingMetricPattern = {
+    val duration = "([0-9]+(\\.[0-9]+)?) (ms|s|m|h)"
+    s"\\n$duration \\($duration, $duration, $duration\\)"
+  }
+
   /**
    * Get execution metrics for the SQL execution and verify metrics values.
    *
@@ -185,15 +197,34 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
       df: DataFrame,
       expectedNumOfJobs: Int,
       expectedMetrics: Map[Long, (String, Map[String, Any])]): Unit = {
-    val optActualMetrics = getSparkPlanMetrics(df, expectedNumOfJobs, expectedMetrics.keySet)
+    val expectedMetricsPredicates = expectedMetrics.mapValues { case (nodeName, nodeMetrics) =>
+      (nodeName, nodeMetrics.mapValues(expectedMetricValue =>
+        (actualMetricValue: Any) => expectedMetricValue.toString === actualMetricValue))
+    }
+    testSparkPlanMetricsWithPredicates(df, expectedNumOfJobs, expectedMetricsPredicates)
+  }
+
+  /**
+   * Call `df.collect()` and verify if the collected metrics satisfy the specified predicates.
+   * @param df `DataFrame` to run
+   * @param expectedNumOfJobs number of jobs that will run
+   * @param expectedMetricsPredicates the expected metrics predicates. The format is
+   *                                  `nodeId -> (operatorName, metric name -> metric predicate)`.
+   */
+  protected def testSparkPlanMetricsWithPredicates(
+      df: DataFrame,
+      expectedNumOfJobs: Int,
+      expectedMetricsPredicates: Map[Long, (String, Map[String, Any => Boolean])]): Unit = {
+    val optActualMetrics =
+      getSparkPlanMetrics(df, expectedNumOfJobs, expectedMetricsPredicates.keySet)
     optActualMetrics.foreach { actualMetrics =>
-      assert(expectedMetrics.keySet === actualMetrics.keySet)
-      for (nodeId <- expectedMetrics.keySet) {
-        val (expectedNodeName, expectedMetricsMap) = expectedMetrics(nodeId)
+      assert(expectedMetricsPredicates.keySet === actualMetrics.keySet)
+      for ((nodeId, (expectedNodeName, expectedMetricsPredicatesMap))
+          <- expectedMetricsPredicates) {
         val (actualNodeName, actualMetricsMap) = actualMetrics(nodeId)
         assert(expectedNodeName === actualNodeName)
-        for (metricName <- expectedMetricsMap.keySet) {
-          assert(expectedMetricsMap(metricName).toString === actualMetricsMap(metricName))
+        for ((metricName, metricPredicate) <- expectedMetricsPredicatesMap) {
+          assert(metricPredicate(actualMetricsMap(metricName)))
         }
       }
     }

From 89c92ccc2046d068aea23ae7973a97c58cfdc966 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Mon, 31 Dec 2018 16:39:46 +0100
Subject: [PATCH 0202/1072] [SPARK-26504][SQL] Rope-wise dumping of Spark plans

## What changes were proposed in this pull request?

Proposed new class `StringConcat` for converting a sequence of strings to string with one memory allocation in the `toString` method.  `StringConcat` replaces `StringBuilderWriter` in methods of dumping of Spark plans and codegen to strings.

All `Writer` arguments are replaced by `String => Unit` in methods related to Spark plans stringification.

## How was this patch tested?

It was tested by existing suites `QueryExecutionSuite`, `DebuggingSuite` as well as new tests for `StringConcat` in `StringUtilsSuite`.

Closes #23406 from MaxGekk/rope-plan.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala  | 17 ++++
 .../spark/sql/catalyst/trees/TreeNode.scala   | 38 ++++-----
 .../spark/sql/catalyst/util/StringUtils.scala | 32 +++++++
 .../sql/catalyst/util/StringUtilsSuite.scala  | 13 +++
 .../spark/sql/execution/QueryExecution.scala  | 85 +++++++++----------
 .../sql/execution/WholeStageCodegenExec.scala |  8 +-
 .../spark/sql/execution/debug/package.scala   | 27 +++---
 7 files changed, 133 insertions(+), 87 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 125181fb213f..8f5444ed8a5a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode}
 import org.apache.spark.sql.internal.SQLConf
@@ -301,4 +302,20 @@ object QueryPlan extends PredicateHelper {
       Nil
     }
   }
+
+  /**
+   * Converts the query plan to string and appends it via provided function.
+   */
+  def append[T <: QueryPlan[T]](
+      plan: => QueryPlan[T],
+      append: String => Unit,
+      verbose: Boolean,
+      addSuffix: Boolean,
+      maxFields: Int = SQLConf.get.maxToStringFields): Unit = {
+    try {
+      plan.treeString(append, verbose, addSuffix, maxFields)
+    } catch {
+      case e: AnalysisException => append(e.toString)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 21e59bbd283e..570a019b2af7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -17,13 +17,11 @@
 
 package org.apache.spark.sql.catalyst.trees
 
-import java.io.Writer
 import java.util.UUID
 
 import scala.collection.Map
 import scala.reflect.ClassTag
 
-import org.apache.commons.io.output.StringBuilderWriter
 import org.apache.commons.lang3.ClassUtils
 import org.json4s.JsonAST._
 import org.json4s.JsonDSL._
@@ -37,6 +35,7 @@ import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, Partitioning}
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -481,21 +480,18 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       verbose: Boolean,
       addSuffix: Boolean = false,
       maxFields: Int = SQLConf.get.maxToStringFields): String = {
-    val writer = new StringBuilderWriter()
-    try {
-      treeString(writer, verbose, addSuffix, maxFields)
-      writer.toString
-    } finally {
-      writer.close()
-    }
+    val concat = new StringConcat()
+
+    treeString(concat.append, verbose, addSuffix, maxFields)
+    concat.toString
   }
 
   def treeString(
-      writer: Writer,
+      append: String => Unit,
       verbose: Boolean,
       addSuffix: Boolean,
       maxFields: Int): Unit = {
-    generateTreeString(0, Nil, writer, verbose, "", addSuffix, maxFields)
+    generateTreeString(0, Nil, append, verbose, "", addSuffix, maxFields)
   }
 
   /**
@@ -558,7 +554,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   def generateTreeString(
       depth: Int,
       lastChildren: Seq[Boolean],
-      writer: Writer,
+      append: String => Unit,
       verbose: Boolean,
       prefix: String = "",
       addSuffix: Boolean = false,
@@ -566,9 +562,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
 
     if (depth > 0) {
       lastChildren.init.foreach { isLast =>
-        writer.write(if (isLast) "   " else ":  ")
+        append(if (isLast) "   " else ":  ")
       }
-      writer.write(if (lastChildren.last) "+- " else ":- ")
+      append(if (lastChildren.last) "+- " else ":- ")
     }
 
     val str = if (verbose) {
@@ -576,24 +572,24 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     } else {
       simpleString(maxFields)
     }
-    writer.write(prefix)
-    writer.write(str)
-    writer.write("\n")
+    append(prefix)
+    append(str)
+    append("\n")
 
     if (innerChildren.nonEmpty) {
       innerChildren.init.foreach(_.generateTreeString(
-        depth + 2, lastChildren :+ children.isEmpty :+ false, writer, verbose,
+        depth + 2, lastChildren :+ children.isEmpty :+ false, append, verbose,
         addSuffix = addSuffix, maxFields = maxFields))
       innerChildren.last.generateTreeString(
-        depth + 2, lastChildren :+ children.isEmpty :+ true, writer, verbose,
+        depth + 2, lastChildren :+ children.isEmpty :+ true, append, verbose,
         addSuffix = addSuffix, maxFields = maxFields)
     }
 
     if (children.nonEmpty) {
       children.init.foreach(_.generateTreeString(
-        depth + 1, lastChildren :+ false, writer, verbose, prefix, addSuffix, maxFields))
+        depth + 1, lastChildren :+ false, append, verbose, prefix, addSuffix, maxFields))
       children.last.generateTreeString(
-        depth + 1, lastChildren :+ true, writer, verbose, prefix, addSuffix, maxFields)
+        depth + 1, lastChildren :+ true, append, verbose, prefix, addSuffix, maxFields)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index bc861a805ce6..643b83b1741a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.util
 
 import java.util.regex.{Pattern, PatternSyntaxException}
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -87,4 +89,34 @@ object StringUtils {
     }
     funcNames.toSeq
   }
+
+  /**
+   * Concatenation of sequence of strings to final string with cheap append method
+   * and one memory allocation for the final string.
+   */
+  class StringConcat {
+    private val strings = new ArrayBuffer[String]
+    private var length: Int = 0
+
+    /**
+     * Appends a string and accumulates its length to allocate a string buffer for all
+     * appended strings once in the toString method.
+     */
+    def append(s: String): Unit = {
+      if (s != null) {
+        strings.append(s)
+        length += s.length
+      }
+    }
+
+    /**
+     * The method allocates memory for all appended strings, writes them to the memory and
+     * returns concatenated string.
+     */
+    override def toString: String = {
+      val result = new java.lang.StringBuilder(length)
+      strings.foreach(result.append)
+      result.toString
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
index 78fee5135c3a..616ec12032db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
@@ -43,4 +43,17 @@ class StringUtilsSuite extends SparkFunSuite {
     assert(filterPattern(names, " a. ") === Seq("a1", "a2"))
     assert(filterPattern(names, " d* ") === Nil)
   }
+
+  test("string concatenation") {
+    def concat(seq: String*): String = {
+      seq.foldLeft(new StringConcat())((acc, s) => {acc.append(s); acc}).toString
+    }
+
+    assert(new StringConcat().toString == "")
+    assert(concat("") == "")
+    assert(concat(null) == "")
+    assert(concat("a") == "a")
+    assert(concat("1", "2") == "12")
+    assert(concat("abc", "\n", "123") == "abc\n123")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 9b8d2e830867..7fccbf65d852 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -17,20 +17,21 @@
 
 package org.apache.spark.sql.execution
 
-import java.io.{BufferedWriter, OutputStreamWriter, Writer}
+import java.io.{BufferedWriter, OutputStreamWriter}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 
-import org.apache.commons.io.output.StringBuilderWriter
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
+import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExecutedCommandExec, ShowTablesCommand}
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange}
@@ -108,10 +109,6 @@ class QueryExecution(
     ReuseExchange(sparkSession.sessionState.conf),
     ReuseSubquery(sparkSession.sessionState.conf))
 
-  protected def stringOrError[A](f: => A): String =
-    try f.toString catch { case e: AnalysisException => e.toString }
-
-
   /**
    * Returns the result as a hive compatible sequence of strings. This is used in tests and
    * `SparkSQLDriver` for CLI applications.
@@ -197,55 +194,53 @@ class QueryExecution(
   }
 
   def simpleString: String = withRedaction {
-    s"""== Physical Plan ==
-       |${stringOrError(executedPlan.treeString(verbose = false))}
-      """.stripMargin.trim
-  }
-
-  private def writeOrError(writer: Writer)(f: Writer => Unit): Unit = {
-    try f(writer)
-    catch {
-      case e: AnalysisException => writer.write(e.toString)
-    }
+    val concat = new StringConcat()
+    concat.append("== Physical Plan ==\n")
+    QueryPlan.append(executedPlan, concat.append, verbose = false, addSuffix = false)
+    concat.append("\n")
+    concat.toString
   }
 
-  private def writePlans(writer: Writer, maxFields: Int): Unit = {
+  private def writePlans(append: String => Unit, maxFields: Int): Unit = {
     val (verbose, addSuffix) = (true, false)
-
-    writer.write("== Parsed Logical Plan ==\n")
-    writeOrError(writer)(logical.treeString(_, verbose, addSuffix, maxFields))
-    writer.write("\n== Analyzed Logical Plan ==\n")
-    val analyzedOutput = stringOrError(truncatedString(
-      analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", ", maxFields))
-    writer.write(analyzedOutput)
-    writer.write("\n")
-    writeOrError(writer)(analyzed.treeString(_, verbose, addSuffix, maxFields))
-    writer.write("\n== Optimized Logical Plan ==\n")
-    writeOrError(writer)(optimizedPlan.treeString(_, verbose, addSuffix, maxFields))
-    writer.write("\n== Physical Plan ==\n")
-    writeOrError(writer)(executedPlan.treeString(_, verbose, addSuffix, maxFields))
+    append("== Parsed Logical Plan ==\n")
+    QueryPlan.append(logical, append, verbose, addSuffix, maxFields)
+    append("\n== Analyzed Logical Plan ==\n")
+    val analyzedOutput = try {
+      truncatedString(
+        analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", ", maxFields)
+    } catch {
+      case e: AnalysisException => e.toString
+    }
+    append(analyzedOutput)
+    append("\n")
+    QueryPlan.append(analyzed, append, verbose, addSuffix, maxFields)
+    append("\n== Optimized Logical Plan ==\n")
+    QueryPlan.append(optimizedPlan, append, verbose, addSuffix, maxFields)
+    append("\n== Physical Plan ==\n")
+    QueryPlan.append(executedPlan, append, verbose, addSuffix, maxFields)
   }
 
   override def toString: String = withRedaction {
-    val writer = new StringBuilderWriter()
-    try {
-      writePlans(writer, SQLConf.get.maxToStringFields)
-      writer.toString
-    } finally {
-      writer.close()
-    }
+    val concat = new StringConcat()
+    writePlans(concat.append, SQLConf.get.maxToStringFields)
+    concat.toString
   }
 
   def stringWithStats: String = withRedaction {
+    val concat = new StringConcat()
+    val maxFields = SQLConf.get.maxToStringFields
+
     // trigger to compute stats for logical plans
     optimizedPlan.stats
 
     // only show optimized logical plan and physical plan
-    s"""== Optimized Logical Plan ==
-        |${stringOrError(optimizedPlan.treeString(verbose = true, addSuffix = true))}
-        |== Physical Plan ==
-        |${stringOrError(executedPlan.treeString(verbose = true))}
-    """.stripMargin.trim
+    concat.append("== Optimized Logical Plan ==\n")
+    QueryPlan.append(optimizedPlan, concat.append, verbose = true, addSuffix = true, maxFields)
+    concat.append("\n== Physical Plan ==\n")
+    QueryPlan.append(executedPlan, concat.append, verbose = true, addSuffix = false, maxFields)
+    concat.append("\n")
+    concat.toString
   }
 
   /**
@@ -282,7 +277,7 @@ class QueryExecution(
     /**
      * Dumps debug information about query execution into the specified file.
      *
-     * @param maxFields maximim number of fields converted to string representation.
+     * @param maxFields maximum number of fields converted to string representation.
      */
     def toFile(path: String, maxFields: Int = Int.MaxValue): Unit = {
       val filePath = new Path(path)
@@ -290,9 +285,9 @@ class QueryExecution(
       val writer = new BufferedWriter(new OutputStreamWriter(fs.create(filePath)))
 
       try {
-        writePlans(writer, maxFields)
+        writePlans(writer.write, maxFields)
         writer.write("\n== Whole Stage Codegen ==\n")
-        org.apache.spark.sql.execution.debug.writeCodegen(writer, executedPlan)
+        org.apache.spark.sql.execution.debug.writeCodegen(writer.write, executedPlan)
       } finally {
         writer.close()
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index f4927dedabe5..3b0a99669ccd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -493,7 +493,7 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with InputRDDCod
   override def generateTreeString(
       depth: Int,
       lastChildren: Seq[Boolean],
-      writer: Writer,
+      append: String => Unit,
       verbose: Boolean,
       prefix: String = "",
       addSuffix: Boolean = false,
@@ -501,7 +501,7 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with InputRDDCod
     child.generateTreeString(
       depth,
       lastChildren,
-      writer,
+      append,
       verbose,
       prefix = "",
       addSuffix = false,
@@ -777,7 +777,7 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
   override def generateTreeString(
       depth: Int,
       lastChildren: Seq[Boolean],
-      writer: Writer,
+      append: String => Unit,
       verbose: Boolean,
       prefix: String = "",
       addSuffix: Boolean = false,
@@ -785,7 +785,7 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
     child.generateTreeString(
       depth,
       lastChildren,
-      writer,
+      append,
       verbose,
       s"*($codegenStageId) ",
       false,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index ae8197f617a2..53b74c7c8559 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -17,13 +17,10 @@
 
 package org.apache.spark.sql.execution
 
-import java.io.Writer
 import java.util.Collections
 
 import scala.collection.JavaConverters._
 
-import org.apache.commons.io.output.StringBuilderWriter
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
@@ -32,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingQueryWrapper}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamingQuery
@@ -73,24 +71,19 @@ package object debug {
    * @return single String containing all WholeStageCodegen subtrees and corresponding codegen
    */
   def codegenString(plan: SparkPlan): String = {
-    val writer = new StringBuilderWriter()
-
-    try {
-      writeCodegen(writer, plan)
-      writer.toString
-    } finally {
-      writer.close()
-    }
+    val concat = new StringConcat()
+    writeCodegen(concat.append, plan)
+    concat.toString
   }
 
-  def writeCodegen(writer: Writer, plan: SparkPlan): Unit = {
+  def writeCodegen(append: String => Unit, plan: SparkPlan): Unit = {
     val codegenSeq = codegenStringSeq(plan)
-    writer.write(s"Found ${codegenSeq.size} WholeStageCodegen subtrees.\n")
+    append(s"Found ${codegenSeq.size} WholeStageCodegen subtrees.\n")
     for (((subtree, code), i) <- codegenSeq.zipWithIndex) {
-      writer.write(s"== Subtree ${i + 1} / ${codegenSeq.size} ==\n")
-      writer.write(subtree)
-      writer.write("\nGenerated code:\n")
-      writer.write(s"${code}\n")
+      append(s"== Subtree ${i + 1} / ${codegenSeq.size} ==\n")
+      append(subtree)
+      append("\nGenerated code:\n")
+      append(s"${code}\n")
     }
   }
 

From c0b9db120d4c2ad0b5b99b9152549e94ef8f5a2d Mon Sep 17 00:00:00 2001
From: Hirobe Keiichi <keiichi_hirobe@forcia.com>
Date: Mon, 31 Dec 2018 10:15:14 -0600
Subject: [PATCH 0203/1072] [SPARK-26339][SQL] Throws better exception when
 reading files that start with underscore

## What changes were proposed in this pull request?
As the description in SPARK-26339, spark.read behavior is very confusing when reading files that start with underscore,  fix this by throwing exception which message is "Path does not exist".

## How was this patch tested?
manual tests.
Both of codes below throws exception which message is "Path does not exist".
```
spark.read.csv("/home/forcia/work/spark/_test.csv")
spark.read.schema("test STRING, number INT").csv("/home/forcia/work/spark/_test.csv")
```

Closes #23288 from KeiichiHirobe/SPARK-26339.

Authored-by: Hirobe Keiichi <keiichi_hirobe@forcia.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../execution/datasources/DataSource.scala    | 17 +++++++++++++++-
 .../src/test/resources/test-data/_cars.csv    |  7 +++++++
 .../execution/datasources/csv/CSVSuite.scala  | 20 +++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/resources/test-data/_cars.csv

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index fefff68c4ba8..517e04317d94 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -543,7 +543,7 @@ case class DataSource(
       checkFilesExist: Boolean): Seq[Path] = {
     val allPaths = caseInsensitiveOptions.get("path") ++ paths
     val hadoopConf = sparkSession.sessionState.newHadoopConf()
-    allPaths.flatMap { path =>
+    val allGlobPath = allPaths.flatMap { path =>
       val hdfsPath = new Path(path)
       val fs = hdfsPath.getFileSystem(hadoopConf)
       val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
@@ -560,6 +560,21 @@ case class DataSource(
       }
       globPath
     }.toSeq
+
+    val (filteredOut, filteredIn) = allGlobPath.partition { path =>
+      InMemoryFileIndex.shouldFilterOut(path.getName)
+    }
+    if (filteredOut.nonEmpty) {
+      if (filteredIn.isEmpty) {
+        throw new AnalysisException(
+          s"All paths were ignored:\n${filteredOut.mkString("\n  ")}")
+      } else {
+        logDebug(
+          s"Some paths were ignored:\n${filteredOut.mkString("\n  ")}")
+      }
+    }
+
+    allGlobPath
   }
 }
 
diff --git a/sql/core/src/test/resources/test-data/_cars.csv b/sql/core/src/test/resources/test-data/_cars.csv
new file mode 100644
index 000000000000..40ded573ade5
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/_cars.csv
@@ -0,0 +1,7 @@
+
+year,make,model,comment,blank
+"2012","Tesla","S","No comment",
+
+1997,Ford,E350,"Go get one now they are going fast",
+2015,Chevy,Volt
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index d9e5d7af1967..fb1bedfaa32c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -53,6 +53,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
   private val carsEmptyValueFile = "test-data/cars-empty-value.csv"
   private val carsBlankColName = "test-data/cars-blank-column-name.csv"
   private val carsCrlf = "test-data/cars-crlf.csv"
+  private val carsFilteredOutFile = "test-data/_cars.csv"
   private val emptyFile = "test-data/empty.csv"
   private val commentsFile = "test-data/comments.csv"
   private val disableCommentsFile = "test-data/disable_comments.csv"
@@ -346,6 +347,25 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     assert(result.schema.fieldNames.size === 1)
   }
 
+  test("SPARK-26339 Not throw an exception if some of specified paths are filtered in") {
+    val cars = spark
+      .read
+      .option("header", "false")
+      .csv(testFile(carsFile), testFile(carsFilteredOutFile))
+
+    verifyCars(cars, withHeader = false, checkTypes = false)
+  }
+
+  test("SPARK-26339 Throw an exception only if all of the specified paths are filtered out") {
+    val e = intercept[AnalysisException] {
+      val cars = spark
+        .read
+        .option("header", "false")
+        .csv(testFile(carsFilteredOutFile))
+    }.getMessage
+    assert(e.contains("All paths were ignored:"))
+  }
+
   test("DDL test with empty file") {
     withView("carsTable") {
       spark.sql(

From c0368363f8a81dd739c6c90fb2849b2a3ab4d8e4 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Mon, 31 Dec 2018 17:46:06 +0100
Subject: [PATCH 0204/1072] [SPARK-26495][SQL] Simplify the SelectedField
 extractor.

## What changes were proposed in this pull request?
The current `SelectedField` extractor is somewhat complicated and it seems to be handling cases that should be handled automatically:

- `GetArrayItem(child: GetStructFieldObject())`
- `GetArrayStructFields(child: GetArrayStructFields())`
- `GetMap(value: GetStructFieldObject())`

This PR removes those cases and simplifies the extractor by passing down the data type instead of a field.

## How was this patch tested?
Existing tests.

Closes #23397 from hvanhovell/SPARK-26495.

Authored-by: Herman van Hovell <hvanhovell@databricks.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/execution/SelectedField.scala   | 103 +++++++-----------
 1 file changed, 41 insertions(+), 62 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SelectedField.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SelectedField.scala
index 0e7c593f9fb6..68f797a856a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SelectedField.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SelectedField.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
 
@@ -51,8 +52,6 @@ import org.apache.spark.sql.types._
  * type appropriate to the complex type extractor. In our example, the name of the child expression
  * is "name" and its data type is a [[org.apache.spark.sql.types.StructType]] with a single string
  * field named "first".
- *
- * @param expr the top-level complex type extractor
  */
 private[execution] object SelectedField {
   def unapply(expr: Expression): Option[StructField] = {
@@ -64,71 +63,51 @@ private[execution] object SelectedField {
     selectField(unaliased, None)
   }
 
-  private def selectField(expr: Expression, fieldOpt: Option[StructField]): Option[StructField] = {
+  /**
+   * Convert an expression into the parts of the schema (the field) it accesses.
+   */
+  private def selectField(expr: Expression, dataTypeOpt: Option[DataType]): Option[StructField] = {
     expr match {
-      // No children. Returns a StructField with the attribute name or None if fieldOpt is None.
-      case AttributeReference(name, dataType, nullable, metadata) =>
-        fieldOpt.map(field =>
-          StructField(name, wrapStructType(dataType, field), nullable, metadata))
-      // Handles case "expr0.field[n]", where "expr0" is of struct type and "expr0.field" is of
-      // array type.
-      case GetArrayItem(x @ GetStructFieldObject(child, field @ StructField(name,
-          dataType, nullable, metadata)), _) =>
-        val childField = fieldOpt.map(field => StructField(name,
-          wrapStructType(dataType, field), nullable, metadata)).getOrElse(field)
-        selectField(child, Some(childField))
-      // Handles case "expr0.field[n]", where "expr0.field" is of array type.
-      case GetArrayItem(child, _) =>
-        selectField(child, fieldOpt)
-      // Handles case "expr0.field.subfield", where "expr0" and "expr0.field" are of array type.
-      case GetArrayStructFields(child: GetArrayStructFields,
-          field @ StructField(name, dataType, nullable, metadata), _, _, _) =>
-        val childField = fieldOpt.map(field => StructField(name,
-            wrapStructType(dataType, field),
-            nullable, metadata)).orElse(Some(field))
-        selectField(child, childField)
-      // Handles case "expr0.field", where "expr0" is of array type.
-      case GetArrayStructFields(child,
-          field @ StructField(name, dataType, nullable, metadata), _, _, _) =>
-        val childField =
-          fieldOpt.map(field => StructField(name,
-            wrapStructType(dataType, field),
-            nullable, metadata)).orElse(Some(field))
-        selectField(child, childField)
-      // Handles case "expr0.field[key]", where "expr0" is of struct type and "expr0.field" is of
-      // map type.
-      case GetMapValue(x @ GetStructFieldObject(child, field @ StructField(name,
-          dataType,
-          nullable, metadata)), _) =>
-        val childField = fieldOpt.map(field => StructField(name,
-          wrapStructType(dataType, field),
-          nullable, metadata)).orElse(Some(field))
-        selectField(child, childField)
-      // Handles case "expr0.field[key]", where "expr0.field" is of map type.
+      case a: Attribute =>
+        dataTypeOpt.map { dt =>
+          StructField(a.name, dt, a.nullable)
+        }
+      case c: GetStructField =>
+        val field = c.childSchema(c.ordinal)
+        val newField = field.copy(dataType = dataTypeOpt.getOrElse(field.dataType))
+        selectField(c.child, Option(struct(newField)))
+      case GetArrayStructFields(child, field, _, _, containsNull) =>
+        val newFieldDataType = dataTypeOpt match {
+          case None =>
+            // GetArrayStructFields is the top level extractor. This means its result is
+            // not pruned and we need to use the element type of the array its producing.
+            field.dataType
+          case Some(ArrayType(dataType, _)) =>
+            // GetArrayStructFields is part of a chain of extractors and its result is pruned
+            // by a parent expression. In this case need to use the parent element type.
+            dataType
+          case Some(x) =>
+            // This should not happen.
+            throw new AnalysisException(s"DataType '$x' is not supported by GetArrayStructFields.")
+        }
+        val newField = StructField(field.name, newFieldDataType, field.nullable)
+        selectField(child, Option(ArrayType(struct(newField), containsNull)))
       case GetMapValue(child, _) =>
-        selectField(child, fieldOpt)
-      // Handles case "expr0.field", where expr0 is of struct type.
-      case GetStructFieldObject(child,
-        field @ StructField(name, dataType, nullable, metadata)) =>
-        val childField = fieldOpt.map(field => StructField(name,
-          wrapStructType(dataType, field),
-          nullable, metadata)).orElse(Some(field))
-        selectField(child, childField)
+        // GetMapValue does not select a field from a struct (i.e. prune the struct) so it can't be
+        // the top-level extractor. However it can be part of an extractor chain.
+        val MapType(keyType, _, valueContainsNull) = child.dataType
+        val opt = dataTypeOpt.map(dt => MapType(keyType, dt, valueContainsNull))
+        selectField(child, opt)
+      case GetArrayItem(child, _) =>
+        // GetArrayItem does not select a field from a struct (i.e. prune the struct) so it can't be
+        // the top-level extractor. However it can be part of an extractor chain.
+        val ArrayType(_, containsNull) = child.dataType
+        val opt = dataTypeOpt.map(dt => ArrayType(dt, containsNull))
+        selectField(child, opt)
       case _ =>
         None
     }
   }
 
-  // Constructs a composition of complex types with a StructType(Array(field)) at its core. Returns
-  // a StructType for a StructType, an ArrayType for an ArrayType and a MapType for a MapType.
-  private def wrapStructType(dataType: DataType, field: StructField): DataType = {
-    dataType match {
-      case _: StructType =>
-        StructType(Array(field))
-      case ArrayType(elementType, containsNull) =>
-        ArrayType(wrapStructType(elementType, field), containsNull)
-      case MapType(keyType, valueType, valueContainsNull) =>
-        MapType(keyType, wrapStructType(valueType, field), valueContainsNull)
-    }
-  }
+  private def struct(field: StructField): StructType = StructType(Array(field))
 }

From b1a9b5eff59f64c370cd7388761effdf2152a108 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 31 Dec 2018 13:35:02 -0800
Subject: [PATCH 0205/1072] [SPARK-26470][CORE] Use ConfigEntry for hardcoded
 configs for eventLog category

## What changes were proposed in this pull request?

The PR makes hardcoded `spark.eventLog` configs to use `ConfigEntry` and put them in the `config` package.

## How was this patch tested?

existing tests

Closes #23395 from mgaido91/SPARK-26470.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/SparkContext.scala     |  7 +++----
 .../apache/spark/internal/config/package.scala    |  9 +++++++++
 .../spark/deploy/history/HistoryServerSuite.scala |  9 +++++----
 .../scheduler/EventLoggingListenerSuite.scala     | 15 ++++++++-------
 4 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 09cc346db0ed..3475859c3ed6 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -230,7 +230,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def deployMode: String = _conf.getOption("spark.submit.deployMode").getOrElse("client")
   def appName: String = _conf.get("spark.app.name")
 
-  private[spark] def isEventLogEnabled: Boolean = _conf.getBoolean("spark.eventLog.enabled", false)
+  private[spark] def isEventLogEnabled: Boolean = _conf.get(EVENT_LOG_ENABLED)
   private[spark] def eventLogDir: Option[URI] = _eventLogDir
   private[spark] def eventLogCodec: Option[String] = _eventLogCodec
 
@@ -396,15 +396,14 @@ class SparkContext(config: SparkConf) extends Logging {
 
     _eventLogDir =
       if (isEventLogEnabled) {
-        val unresolvedDir = conf.get("spark.eventLog.dir", EventLoggingListener.DEFAULT_LOG_DIR)
-          .stripSuffix("/")
+        val unresolvedDir = conf.get(EVENT_LOG_DIR).stripSuffix("/")
         Some(Utils.resolveURI(unresolvedDir))
       } else {
         None
       }
 
     _eventLogCodec = {
-      val compress = _conf.getBoolean("spark.eventLog.compress", false)
+      val compress = _conf.get(EVENT_LOG_COMPRESS)
       if (compress && isEventLogEnabled) {
         Some(CompressionCodec.getCodecName(_conf)).map(CompressionCodec.getShortName)
       } else {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index f1c1c034df49..d8e9c099028f 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.TimeUnit
 
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.ByteUnit
+import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
 
@@ -62,6 +63,14 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val EVENT_LOG_ENABLED = ConfigBuilder("spark.eventLog.enabled")
+    .booleanConf
+    .createWithDefault(false)
+
+  private[spark] val EVENT_LOG_DIR = ConfigBuilder("spark.eventLog.dir")
+    .stringConf
+    .createWithDefault(EventLoggingListener.DEFAULT_LOG_DIR)
+
   private[spark] val EVENT_LOG_COMPRESS =
     ConfigBuilder("spark.eventLog.compress")
       .booleanConf
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index a9dee67ae938..96458c55b5f5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -45,6 +45,7 @@ import org.scalatest.mockito.MockitoSugar
 import org.scalatest.selenium.WebBrowser
 
 import org.apache.spark._
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
 import org.apache.spark.status.api.v1.ApplicationInfo
 import org.apache.spark.status.api.v1.JobData
@@ -82,8 +83,8 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       .set(UPDATE_INTERVAL_S.key, "0")
       .set("spark.testing", "true")
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
-      .set("spark.eventLog.logStageExecutorMetrics.enabled", "true")
-      .set("spark.eventLog.logStageExecutorProcessTreeMetrics.enabled", "true")
+      .set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
+      .set(EVENT_LOG_PROCESS_TREE_METRICS, true)
     conf.setAll(extraConf)
     provider = new FsHistoryProvider(conf)
     provider.checkForLogs()
@@ -417,9 +418,9 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     stop()
     val myConf = new SparkConf()
       .set(HISTORY_LOG_DIR, logDir.getAbsolutePath)
-      .set("spark.eventLog.dir", logDir.getAbsolutePath)
+      .set(EVENT_LOG_DIR, logDir.getAbsolutePath)
       .set(UPDATE_INTERVAL_S.key, "1s")
-      .set("spark.eventLog.enabled", "true")
+      .set(EVENT_LOG_ENABLED, true)
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .remove("spark.testing")
     val provider = new FsHistoryProvider(myConf)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index 0c04a93646d7..04987e6ef79e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.io._
 import org.apache.spark.metrics.{ExecutorMetricType, MetricsSystem}
 import org.apache.spark.scheduler.cluster.ExecutorInfo
@@ -122,7 +123,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     // Expected IOException, since we haven't enabled log overwrite.
     intercept[IOException] { testEventLogging() }
     // Try again, but enable overwriting.
-    testEventLogging(extraConf = Map("spark.eventLog.overwrite" -> "true"))
+    testEventLogging(extraConf = Map(EVENT_LOG_OVERWRITE.key -> "true"))
   }
 
   test("Event log name") {
@@ -526,15 +527,15 @@ object EventLoggingListenerSuite {
   /** Get a SparkConf with event logging enabled. */
   def getLoggingConf(logDir: Path, compressionCodec: Option[String] = None): SparkConf = {
     val conf = new SparkConf
-    conf.set("spark.eventLog.enabled", "true")
-    conf.set("spark.eventLog.logBlockUpdates.enabled", "true")
-    conf.set("spark.eventLog.testing", "true")
-    conf.set("spark.eventLog.dir", logDir.toString)
+    conf.set(EVENT_LOG_ENABLED, true)
+    conf.set(EVENT_LOG_BLOCK_UPDATES, true)
+    conf.set(EVENT_LOG_TESTING, true)
+    conf.set(EVENT_LOG_DIR, logDir.toString)
     compressionCodec.foreach { codec =>
-      conf.set("spark.eventLog.compress", "true")
+      conf.set(EVENT_LOG_COMPRESS, true)
       conf.set("spark.io.compression.codec", codec)
     }
-    conf.set("spark.eventLog.logStageExecutorMetrics.enabled", "true")
+    conf.set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
     conf
   }
 

From 993736154b6a46ffd7c3218173a2653a3842bba0 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Tue, 1 Jan 2019 09:14:23 +0800
Subject: [PATCH 0206/1072] [MINOR] Fix inconsistency log level among
 delegation token providers

## What changes were proposed in this pull request?

There's some inconsistency for log level while logging error messages in
delegation token providers. (DEBUG, INFO, WARNING)

Given that failing to obtain token would often crash the query, I guess
it would be nice to set higher log level for error log messages.

## How was this patch tested?

The patch just changed the log level.

Closes #23418 from HeartSaVioR/FIX-inconsistency-log-level-between-delegation-token-providers.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../HBaseDelegationTokenProvider.scala        |  4 +-
 .../HadoopFSDelegationTokenProvider.scala     | 45 +++++++++++--------
 .../HiveDelegationTokenProvider.scala         |  4 +-
 .../KafkaDelegationTokenProvider.scala        |  2 +-
 4 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
index 5dcde4ec3a8a..6ef68351bc9b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
@@ -50,7 +50,7 @@ private[security] class HBaseDelegationTokenProvider
       creds.addToken(token.getService, token)
     } catch {
       case NonFatal(e) =>
-        logDebug(s"Failed to get token from service $serviceName", e)
+        logWarning(s"Failed to get token from service $serviceName", e)
     }
 
     None
@@ -71,7 +71,7 @@ private[security] class HBaseDelegationTokenProvider
       confCreate.invoke(null, conf).asInstanceOf[Configuration]
     } catch {
       case NonFatal(e) =>
-        logDebug("Fail to invoke HBaseConfiguration", e)
+        logWarning("Fail to invoke HBaseConfiguration", e)
         conf
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 767b5521e8d7..00200f807d22 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.security
 
 import scala.collection.JavaConverters._
 import scala.util.Try
+import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.FileSystem
@@ -44,28 +45,34 @@ private[deploy] class HadoopFSDelegationTokenProvider(fileSystems: () => Set[Fil
       hadoopConf: Configuration,
       sparkConf: SparkConf,
       creds: Credentials): Option[Long] = {
-    val fsToGetTokens = fileSystems()
-    val fetchCreds = fetchDelegationTokens(getTokenRenewer(hadoopConf), fsToGetTokens, creds)
+    try {
+      val fsToGetTokens = fileSystems()
+      val fetchCreds = fetchDelegationTokens(getTokenRenewer(hadoopConf), fsToGetTokens, creds)
 
-    // Get the token renewal interval if it is not set. It will only be called once.
-    if (tokenRenewalInterval == null) {
-      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf, fsToGetTokens)
-    }
+      // Get the token renewal interval if it is not set. It will only be called once.
+      if (tokenRenewalInterval == null) {
+        tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf, fsToGetTokens)
+      }
 
-    // Get the time of next renewal.
-    val nextRenewalDate = tokenRenewalInterval.flatMap { interval =>
-      val nextRenewalDates = fetchCreds.getAllTokens.asScala
-        .filter(_.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier])
-        .map { token =>
-          val identifier = token
-            .decodeIdentifier()
-            .asInstanceOf[AbstractDelegationTokenIdentifier]
-          identifier.getIssueDate + interval
-        }
-      if (nextRenewalDates.isEmpty) None else Some(nextRenewalDates.min)
-    }
+      // Get the time of next renewal.
+      val nextRenewalDate = tokenRenewalInterval.flatMap { interval =>
+        val nextRenewalDates = fetchCreds.getAllTokens.asScala
+          .filter(_.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier])
+          .map { token =>
+            val identifier = token
+              .decodeIdentifier()
+              .asInstanceOf[AbstractDelegationTokenIdentifier]
+            identifier.getIssueDate + interval
+          }
+        if (nextRenewalDates.isEmpty) None else Some(nextRenewalDates.min)
+      }
 
-    nextRenewalDate
+      nextRenewalDate
+    } catch {
+      case NonFatal(e) =>
+        logWarning(s"Failed to get token from service $serviceName", e)
+        None
+    }
   }
 
   override def delegationTokensRequired(
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
index 7249eb85ac7c..90f705138157 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
@@ -49,7 +49,7 @@ private[spark] class HiveDelegationTokenProvider
       new HiveConf(hadoopConf, classOf[HiveConf])
     } catch {
       case NonFatal(e) =>
-        logDebug("Fail to create Hive Configuration", e)
+        logWarning("Fail to create Hive Configuration", e)
         hadoopConf
       case e: NoClassDefFoundError =>
         logWarning(classNotFoundErrorStr)
@@ -104,7 +104,7 @@ private[spark] class HiveDelegationTokenProvider
       None
     } catch {
       case NonFatal(e) =>
-        logDebug(s"Failed to get token from service $serviceName", e)
+        logWarning(s"Failed to get token from service $serviceName", e)
         None
       case e: NoClassDefFoundError =>
         logWarning(classNotFoundErrorStr)
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
index 45995be630cc..f67cb26259fe 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
@@ -44,7 +44,7 @@ private[security] class KafkaDelegationTokenProvider
       return Some(nextRenewalDate)
     } catch {
       case NonFatal(e) =>
-        logInfo(s"Failed to get token from service $serviceName", e)
+        logWarning(s"Failed to get token from service $serviceName", e)
     }
     None
   }

From f7455618ce6de8d2e70f10722dc112fcc6ee3cee Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 1 Jan 2019 09:29:28 +0800
Subject: [PATCH 0207/1072] Revert "[SPARK-26339][SQL] Throws better exception
 when reading files that start with underscore"

This reverts commit c0b9db120d4c2ad0b5b99b9152549e94ef8f5a2d.
---
 .../execution/datasources/DataSource.scala    | 17 +---------------
 .../src/test/resources/test-data/_cars.csv    |  7 -------
 .../execution/datasources/csv/CSVSuite.scala  | 20 -------------------
 3 files changed, 1 insertion(+), 43 deletions(-)
 delete mode 100644 sql/core/src/test/resources/test-data/_cars.csv

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 517e04317d94..fefff68c4ba8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -543,7 +543,7 @@ case class DataSource(
       checkFilesExist: Boolean): Seq[Path] = {
     val allPaths = caseInsensitiveOptions.get("path") ++ paths
     val hadoopConf = sparkSession.sessionState.newHadoopConf()
-    val allGlobPath = allPaths.flatMap { path =>
+    allPaths.flatMap { path =>
       val hdfsPath = new Path(path)
       val fs = hdfsPath.getFileSystem(hadoopConf)
       val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
@@ -560,21 +560,6 @@ case class DataSource(
       }
       globPath
     }.toSeq
-
-    val (filteredOut, filteredIn) = allGlobPath.partition { path =>
-      InMemoryFileIndex.shouldFilterOut(path.getName)
-    }
-    if (filteredOut.nonEmpty) {
-      if (filteredIn.isEmpty) {
-        throw new AnalysisException(
-          s"All paths were ignored:\n${filteredOut.mkString("\n  ")}")
-      } else {
-        logDebug(
-          s"Some paths were ignored:\n${filteredOut.mkString("\n  ")}")
-      }
-    }
-
-    allGlobPath
   }
 }
 
diff --git a/sql/core/src/test/resources/test-data/_cars.csv b/sql/core/src/test/resources/test-data/_cars.csv
deleted file mode 100644
index 40ded573ade5..000000000000
--- a/sql/core/src/test/resources/test-data/_cars.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-
-year,make,model,comment,blank
-"2012","Tesla","S","No comment",
-
-1997,Ford,E350,"Go get one now they are going fast",
-2015,Chevy,Volt
-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index fb1bedfaa32c..d9e5d7af1967 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -53,7 +53,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
   private val carsEmptyValueFile = "test-data/cars-empty-value.csv"
   private val carsBlankColName = "test-data/cars-blank-column-name.csv"
   private val carsCrlf = "test-data/cars-crlf.csv"
-  private val carsFilteredOutFile = "test-data/_cars.csv"
   private val emptyFile = "test-data/empty.csv"
   private val commentsFile = "test-data/comments.csv"
   private val disableCommentsFile = "test-data/disable_comments.csv"
@@ -347,25 +346,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     assert(result.schema.fieldNames.size === 1)
   }
 
-  test("SPARK-26339 Not throw an exception if some of specified paths are filtered in") {
-    val cars = spark
-      .read
-      .option("header", "false")
-      .csv(testFile(carsFile), testFile(carsFilteredOutFile))
-
-    verifyCars(cars, withHeader = false, checkTypes = false)
-  }
-
-  test("SPARK-26339 Throw an exception only if all of the specified paths are filtered out") {
-    val e = intercept[AnalysisException] {
-      val cars = spark
-        .read
-        .option("header", "false")
-        .csv(testFile(carsFilteredOutFile))
-    }.getMessage
-    assert(e.contains("All paths were ignored:"))
-  }
-
   test("DDL test with empty file") {
     withView("carsTable") {
       spark.sql(

From 5f0ddd2d6e2fdebf549207bbc4b13ca709eee3c4 Mon Sep 17 00:00:00 2001
From: Thomas D'Silva <tdsilva@apache.org>
Date: Tue, 1 Jan 2019 14:11:14 +0800
Subject: [PATCH 0208/1072] [SPARK-26499][SQL] JdbcUtils.makeGetter does not
 handle ByteType
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…Type

## What changes were proposed in this pull request?
Modifed JdbcUtils.makeGetter to handle ByteType.

## How was this patch tested?

Added a new test to JDBCSuite that maps ```TINYINT``` to ```ByteType```.

Closes #23400 from twdsilva/tiny_int_support.

Authored-by: Thomas D'Silva <tdsilva@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../datasources/jdbc/JdbcUtils.scala          |  4 +++
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 25 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index edea549748b4..922bef284c98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -438,6 +438,10 @@ object JdbcUtils extends Logging {
       (rs: ResultSet, row: InternalRow, pos: Int) =>
         row.setShort(pos, rs.getShort(pos + 1))
 
+    case ByteType =>
+      (rs: ResultSet, row: InternalRow, pos: Int) =>
+        row.update(pos, rs.getByte(pos + 1))
+
     case StringType =>
       (rs: ResultSet, row: InternalRow, pos: Int) =>
         // TODO(davies): use getBytes for better performance, if the encoding is UTF-8
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 71e83767964a..e4641631e607 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -56,6 +56,20 @@ class JDBCSuite extends QueryTest
       Some(StringType)
   }
 
+  val testH2DialectTinyInt = new JdbcDialect {
+    override def canHandle(url: String): Boolean = url.startsWith("jdbc:h2")
+    override def getCatalystType(
+        sqlType: Int,
+        typeName: String,
+        size: Int,
+        md: MetadataBuilder): Option[DataType] = {
+      sqlType match {
+        case java.sql.Types.TINYINT => Some(ByteType)
+        case _ => None
+      }
+    }
+  }
+
   before {
     Utils.classForName("org.h2.Driver")
     // Extra properties that will be specified for our database. We need these to test
@@ -693,6 +707,17 @@ class JDBCSuite extends QueryTest
     JdbcDialects.unregisterDialect(testH2Dialect)
   }
 
+  test("Map TINYINT to ByteType via JdbcDialects") {
+    JdbcDialects.registerDialect(testH2DialectTinyInt)
+    val df = spark.read.jdbc(urlWithUserAndPass, "test.inttypes", new Properties())
+    val rows = df.collect()
+    assert(rows.length === 2)
+    assert(rows(0).get(2).isInstanceOf[Byte])
+    assert(rows(0).getByte(2) === 3)
+    assert(rows(1).isNullAt(2))
+    JdbcDialects.unregisterDialect(testH2DialectTinyInt)
+  }
+
   test("Default jdbc dialect registration") {
     assert(JdbcDialects.get("jdbc:mysql://127.0.0.1/db") == MySQLDialect)
     assert(JdbcDialects.get("jdbc:postgresql://127.0.0.1/db") == PostgresDialect)

From 2bf4d97118c20812e26a7ea59826ee470ab42f7c Mon Sep 17 00:00:00 2001
From: zhoukang <zhoukang199191@gmail.com>
Date: Tue, 1 Jan 2019 09:13:13 -0600
Subject: [PATCH 0209/1072] [SPARK-24544][SQL] Print actual failure cause when
 look up function failed

## What changes were proposed in this pull request?

When we operate as below:
`
0: jdbc:hive2://xxx/> create  function funnel_analysis as 'com.xxx.hive.extend.udf.UapFunnelAnalysis';
`

`
0: jdbc:hive2://xxx/> select funnel_analysis(1,",",1,'');
Error: org.apache.spark.sql.AnalysisException: Undefined function: 'funnel_analysis'. This function is neither a registered temporary function nor a permanent function registered in the database 'xxx'.; line 1 pos 7 (state=,code=0)
`

`
0: jdbc:hive2://xxx/> describe function funnel_analysis;
+-----------------------------------------------------------+--+
|                       function_desc                       |
+-----------------------------------------------------------+--+
| Function: xxx.funnel_analysis                            |
| Class: com.xxx.hive.extend.udf.UapFunnelAnalysis  |
| Usage: N/A.                                               |
+-----------------------------------------------------------+--+
`
We can see describe funtion will get right information,but when we actually use this funtion,we will get an undefined exception.
Which is really misleading,the real cause is below:
 `
No handler for Hive UDF 'com.xxx.xxx.hive.extend.udf.UapFunnelAnalysis': java.lang.IllegalStateException: Should not be called directly;
	at org.apache.hadoop.hive.ql.udf.generic.GenericUDTF.initialize(GenericUDTF.java:72)
	at org.apache.spark.sql.hive.HiveGenericUDTF.outputInspector$lzycompute(hiveUDFs.scala:204)
	at org.apache.spark.sql.hive.HiveGenericUDTF.outputInspector(hiveUDFs.scala:204)
	at org.apache.spark.sql.hive.HiveGenericUDTF.elementSchema$lzycompute(hiveUDFs.scala:212)
	at org.apache.spark.sql.hive.HiveGenericUDTF.elementSchema(hiveUDFs.scala:212)
`
This patch print the actual failure for quick debugging.
## How was this patch tested?
UT

Closes #21790 from caneGuy/zhoukang/print-warning1.

Authored-by: zhoukang <zhoukang199191@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../catalyst/analysis/NoSuchItemException.scala    |  4 ++--
 .../sql/catalyst/catalog/SessionCatalog.scala      |  5 +++--
 .../sql/catalyst/catalog/SessionCatalogSuite.scala | 14 ++++++++++++++
 .../apache/spark/sql/hive/HiveSessionCatalog.scala |  9 ++++++---
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
index f5aae60431c1..8bf6f69f3b17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
@@ -40,10 +40,10 @@ class NoSuchPartitionException(
 class NoSuchPermanentFunctionException(db: String, func: String)
   extends AnalysisException(s"Function '$func' not found in database '$db'")
 
-class NoSuchFunctionException(db: String, func: String)
+class NoSuchFunctionException(db: String, func: String, cause: Option[Throwable] = None)
   extends AnalysisException(
     s"Undefined function: '$func'. This function is neither a registered temporary function nor " +
-    s"a permanent function registered in the database '$db'.")
+    s"a permanent function registered in the database '$db'.", cause = cause)
 
 class NoSuchPartitionsException(db: String, table: String, specs: Seq[TablePartitionSpec])
   extends AnalysisException(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index b6771ec4dffe..1dbe946503e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1222,9 +1222,10 @@ class SessionCatalog(
     databaseExists(db) && externalCatalog.functionExists(db, name.funcName)
   }
 
-  protected def failFunctionLookup(name: FunctionIdentifier): Nothing = {
+  protected[sql] def failFunctionLookup(
+      name: FunctionIdentifier, cause: Option[Throwable] = None): Nothing = {
     throw new NoSuchFunctionException(
-      db = name.database.getOrElse(getCurrentDatabase), func = name.funcName)
+      db = name.database.getOrElse(getCurrentDatabase), func = name.funcName, cause)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 19e8c0334689..92f87ea796e8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1448,4 +1448,18 @@ abstract class SessionCatalogSuite extends AnalysisTest {
       }
     }
   }
+
+  test("SPARK-24544: test print actual failure cause when look up function failed") {
+    withBasicCatalog { catalog =>
+      val cause = intercept[NoSuchFunctionException] {
+        catalog.failFunctionLookup(FunctionIdentifier("failureFunc"),
+          Some(new Exception("Actual error")))
+      }
+
+      // fullStackTrace will be printed, but `cause.getMessage` has been
+      // override in `AnalysisException`,so here we get the root cause
+      // exception message for check.
+      assert(cause.cause.get.getMessage.contains("Actual error"))
+    }
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 405c0c8bfe66..7560805bb3b0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DecimalType, DoubleType}
+import org.apache.spark.util.Utils
 
 
 private[sql] class HiveSessionCatalog(
@@ -141,8 +142,10 @@ private[sql] class HiveSessionCatalog(
           // let's try to load it as a Hive's built-in function.
           // Hive is case insensitive.
           val functionName = funcName.unquotedString.toLowerCase(Locale.ROOT)
+          logWarning("Encountered a failure during looking up function:" +
+            s" ${Utils.exceptionString(error)}")
           if (!hiveFunctions.contains(functionName)) {
-            failFunctionLookup(funcName)
+            failFunctionLookup(funcName, Some(error))
           }
 
           // TODO: Remove this fallback path once we implement the list of fallback functions
@@ -150,12 +153,12 @@ private[sql] class HiveSessionCatalog(
           val functionInfo = {
             try {
               Option(HiveFunctionRegistry.getFunctionInfo(functionName)).getOrElse(
-                failFunctionLookup(funcName))
+                failFunctionLookup(funcName, Some(error)))
             } catch {
               // If HiveFunctionRegistry.getFunctionInfo throws an exception,
               // we are failing to load a Hive builtin function, which means that
               // the given function is not a Hive builtin function.
-              case NonFatal(e) => failFunctionLookup(funcName)
+              case NonFatal(e) => failFunctionLookup(funcName, Some(e))
             }
           }
           val className = functionInfo.getFunctionClass.getName

From 001d3095385626e329b3853364a4feeb811aac5a Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Tue, 1 Jan 2019 09:18:58 -0600
Subject: [PATCH 0210/1072] [SPARK-25765][ML] Add training cost to
 BisectingKMeans summary

## What changes were proposed in this pull request?

The PR adds the `trainingCost` value to the `BisectingKMeansSummary`, in order to expose the information retrievable by running `computeCost` on the training dataset. This fills the gap with `KMeans` implementation.

## How was this patch tested?

improved UTs

Closes #22764 from mgaido91/SPARK-25765.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ml/clustering/BisectingKMeans.scala | 13 +++-
 .../mllib/clustering/BisectingKMeans.scala    |  3 +-
 .../clustering/BisectingKMeansModel.scala     | 59 ++++++++++++++++---
 .../ml/clustering/BisectingKMeansSuite.scala  |  2 +
 .../clustering/BisectingKMeansSuite.scala     |  1 +
 project/MimaExcludes.scala                    |  3 +
 python/pyspark/ml/clustering.py               | 12 +++-
 7 files changed, 82 insertions(+), 11 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 49e9f5136813..d846f17e7f54 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -264,7 +264,12 @@ class BisectingKMeans @Since("2.0.0") (
     val parentModel = bkm.run(rdd, Some(instr))
     val model = copyValues(new BisectingKMeansModel(uid, parentModel).setParent(this))
     val summary = new BisectingKMeansSummary(
-      model.transform(dataset), $(predictionCol), $(featuresCol), $(k), $(maxIter))
+      model.transform(dataset),
+      $(predictionCol),
+      $(featuresCol),
+      $(k),
+      $(maxIter),
+      parentModel.trainingCost)
     instr.logNamedValue("clusterSizes", summary.clusterSizes)
     instr.logNumFeatures(model.clusterCenters.head.size)
     model.setSummary(Some(summary))
@@ -294,6 +299,8 @@ object BisectingKMeans extends DefaultParamsReadable[BisectingKMeans] {
  * @param featuresCol  Name for column of features in `predictions`.
  * @param k  Number of clusters.
  * @param numIter  Number of iterations.
+ * @param trainingCost Sum of the cost to the nearest centroid for all points in the training
+ *                     dataset. This is equivalent to sklearn's inertia.
  */
 @Since("2.1.0")
 @Experimental
@@ -302,4 +309,6 @@ class BisectingKMeansSummary private[clustering] (
     predictionCol: String,
     featuresCol: String,
     k: Int,
-    numIter: Int) extends ClusteringSummary(predictions, predictionCol, featuresCol, k, numIter)
+    numIter: Int,
+    @Since("3.0.0") val trainingCost: Double)
+  extends ClusteringSummary(predictions, predictionCol, featuresCol, k, numIter)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 80ab8eb9bc8b..696dff0f319a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -242,7 +242,8 @@ class BisectingKMeans private (
     norms.unpersist(false)
     val clusters = activeClusters ++ inactiveClusters
     val root = buildTree(clusters, dMeasure)
-    new BisectingKMeansModel(root, this.distanceMeasure)
+    val totalCost = root.leafNodes.map(_.cost).sum
+    new BisectingKMeansModel(root, this.distanceMeasure, totalCost)
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index 4c5794fbffc8..b54b8917e060 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -41,11 +41,12 @@ import org.apache.spark.sql.{Row, SparkSession}
 @Since("1.6.0")
 class BisectingKMeansModel private[clustering] (
     private[clustering] val root: ClusteringTreeNode,
-    @Since("2.4.0") val distanceMeasure: String
+    @Since("2.4.0") val distanceMeasure: String,
+    @Since("3.0.0") val trainingCost: Double
   ) extends Serializable with Saveable with Logging {
 
   @Since("1.6.0")
-  def this(root: ClusteringTreeNode) = this(root, DistanceMeasure.EUCLIDEAN)
+  def this(root: ClusteringTreeNode) = this(root, DistanceMeasure.EUCLIDEAN, 0.0)
 
   private val distanceMeasureInstance: DistanceMeasure =
     DistanceMeasure.decodeFromString(distanceMeasure)
@@ -109,10 +110,10 @@ class BisectingKMeansModel private[clustering] (
 
   @Since("2.0.0")
   override def save(sc: SparkContext, path: String): Unit = {
-    BisectingKMeansModel.SaveLoadV2_0.save(sc, this, path)
+    BisectingKMeansModel.SaveLoadV3_0.save(sc, this, path)
   }
 
-  override protected def formatVersion: String = "2.0"
+  override protected def formatVersion: String = "3.0"
 }
 
 @Since("2.0.0")
@@ -128,11 +129,15 @@ object BisectingKMeansModel extends Loader[BisectingKMeansModel] {
       case (SaveLoadV2_0.thisClassName, SaveLoadV2_0.thisFormatVersion) =>
         val model = SaveLoadV2_0.load(sc, path)
         model
+      case (SaveLoadV3_0.thisClassName, SaveLoadV3_0.thisFormatVersion) =>
+        val model = SaveLoadV3_0.load(sc, path)
+        model
       case _ => throw new Exception(
         s"BisectingKMeansModel.load did not recognize model with (className, format version):" +
           s"($loadedClassName, $formatVersion).  Supported:\n" +
           s"  (${SaveLoadV1_0.thisClassName}, ${SaveLoadV1_0.thisClassName}\n" +
-          s"  (${SaveLoadV2_0.thisClassName}, ${SaveLoadV2_0.thisClassName})")
+          s"  (${SaveLoadV2_0.thisClassName}, ${SaveLoadV2_0.thisClassName})\n" +
+          s"  (${SaveLoadV3_0.thisClassName}, ${SaveLoadV3_0.thisClassName})")
     }
   }
 
@@ -195,7 +200,8 @@ object BisectingKMeansModel extends Loader[BisectingKMeansModel] {
       val data = rows.select("index", "size", "center", "norm", "cost", "height", "children")
       val nodes = data.rdd.map(Data.apply).collect().map(d => (d.index, d)).toMap
       val rootNode = buildTree(rootId, nodes)
-      new BisectingKMeansModel(rootNode, DistanceMeasure.EUCLIDEAN)
+      val totalCost = rootNode.leafNodes.map(_.cost).sum
+      new BisectingKMeansModel(rootNode, DistanceMeasure.EUCLIDEAN, totalCost)
     }
   }
 
@@ -231,7 +237,46 @@ object BisectingKMeansModel extends Loader[BisectingKMeansModel] {
       val data = rows.select("index", "size", "center", "norm", "cost", "height", "children")
       val nodes = data.rdd.map(Data.apply).collect().map(d => (d.index, d)).toMap
       val rootNode = buildTree(rootId, nodes)
-      new BisectingKMeansModel(rootNode, distanceMeasure)
+      val totalCost = rootNode.leafNodes.map(_.cost).sum
+      new BisectingKMeansModel(rootNode, distanceMeasure, totalCost)
+    }
+  }
+
+  private[clustering] object SaveLoadV3_0 {
+    private[clustering] val thisFormatVersion = "3.0"
+
+    private[clustering]
+    val thisClassName = "org.apache.spark.mllib.clustering.BisectingKMeansModel"
+
+    def save(sc: SparkContext, model: BisectingKMeansModel, path: String): Unit = {
+      val spark = SparkSession.builder().sparkContext(sc).getOrCreate()
+      val metadata = compact(render(
+        ("class" -> thisClassName) ~ ("version" -> thisFormatVersion)
+          ~ ("rootId" -> model.root.index) ~ ("distanceMeasure" -> model.distanceMeasure)
+          ~ ("trainingCost" -> model.trainingCost)))
+      sc.parallelize(Seq(metadata), 1).saveAsTextFile(Loader.metadataPath(path))
+
+      val data = getNodes(model.root).map(node => Data(node.index, node.size,
+        node.centerWithNorm.vector, node.centerWithNorm.norm, node.cost, node.height,
+        node.children.map(_.index)))
+      spark.createDataFrame(data).write.parquet(Loader.dataPath(path))
+    }
+
+    def load(sc: SparkContext, path: String): BisectingKMeansModel = {
+      implicit val formats: DefaultFormats = DefaultFormats
+      val (className, formatVersion, metadata) = Loader.loadMetadata(sc, path)
+      assert(className == thisClassName)
+      assert(formatVersion == thisFormatVersion)
+      val rootId = (metadata \ "rootId").extract[Int]
+      val distanceMeasure = (metadata \ "distanceMeasure").extract[String]
+      val trainingCost = (metadata \ "trainingCost").extract[Double]
+      val spark = SparkSession.builder().sparkContext(sc).getOrCreate()
+      val rows = spark.read.parquet(Loader.dataPath(path))
+      Loader.checkSchema[Data](rows.schema)
+      val data = rows.select("index", "size", "center", "norm", "cost", "height", "children")
+      val nodes = data.rdd.map(Data.apply).collect().map(d => (d.index, d)).toMap
+      val rootNode = buildTree(rootId, nodes)
+      new BisectingKMeansModel(rootNode, distanceMeasure, trainingCost)
     }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index 1b7780e171e7..461f8b8d211d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -134,6 +134,8 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(clusterSizes.sum === numRows)
     assert(clusterSizes.forall(_ >= 0))
     assert(summary.numIter == 20)
+    assert(summary.trainingCost < 0.1)
+    assert(model.computeCost(dataset) == summary.trainingCost)
 
     model.setSummary(None)
     assert(!model.hasSummary)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala
index 4a4d8b5c89de..10d5f325d68e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala
@@ -194,6 +194,7 @@ class BisectingKMeansSuite extends SparkFunSuite with MLlibTestSparkContext {
       assert(model.k === sameModel.k)
       assert(model.distanceMeasure === sameModel.distanceMeasure)
       model.clusterCenters.zip(sameModel.clusterCenters).foreach(c => c._1 === c._2)
+      assert(model.trainingCost == sameModel.trainingCost)
     } finally {
       Utils.deleteRecursively(tempDir)
     }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 89fc53ce3972..cf8d9f3c24d0 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,9 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    // [SPARK-25765][ML] Add training cost to BisectingKMeans summary
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel.this"),
+
     // [SPARK-24243][CORE] Expose exceptions from InProcessAppHandle
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.launcher.SparkAppHandle.getError"),
 
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index d8a6dfb7d3a7..5a776aec1425 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -587,6 +587,8 @@ class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPred
     2
     >>> summary.clusterSizes
     [2, 2]
+    >>> summary.trainingCost
+    2.000...
     >>> transformed = model.transform(df).select("features", "prediction")
     >>> rows = transformed.collect()
     >>> rows[0].prediction == rows[1].prediction
@@ -700,7 +702,15 @@ class BisectingKMeansSummary(ClusteringSummary):
 
     .. versionadded:: 2.1.0
     """
-    pass
+
+    @property
+    @since("3.0.0")
+    def trainingCost(self):
+        """
+        Sum of squared distances to the nearest centroid for all points in the training dataset.
+        This is equivalent to sklearn's inertia.
+        """
+        return self._call_java("trainingCost")
 
 
 @inherit_doc

From 5da55873fa330f4ab21fb05a0a7dbf45bbeb5a54 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 2 Jan 2019 07:59:32 +0800
Subject: [PATCH 0211/1072] [SPARK-26374][TEST][SQL] Enable TimestampFormatter
 in HadoopFsRelationTest

## What changes were proposed in this pull request?

Default timestamp pattern defined in `JSONOptions` doesn't allow saving/loading timestamps with time zones of seconds precision. Because of that, the round trip test failed for timestamps before 1582. In the PR, I propose to extend zone offset section from `XXX` to `XXXXX` which should allow to save/load zone offsets like `-07:52:48`.

## How was this patch tested?

It was tested by `JsonHadoopFsRelationSuite` and `TimestampFormatterSuite`.

Closes #23417 from MaxGekk/hadoopfsrelationtest-new-formatter.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/util/TimestampFormatterSuite.scala    | 32 ++++++++++---------
 .../sql/sources/HadoopFsRelationTest.scala    |  6 ++--
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index edccbb2a7f5d..2ce3eacc30cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -70,21 +70,23 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("roundtrip micros -> timestamp -> micros using timezones") {
-    Seq(
-      -58710115316212000L,
-      -18926315945345679L,
-      -9463427405253013L,
-      -244000001L,
-      0L,
-      99628200102030L,
-      1543749753123456L,
-      2177456523456789L,
-      11858049903010203L).foreach { micros =>
-      DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
-        val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", timeZone, Locale.US)
-        val timestamp = formatter.format(micros)
-        val parsed = formatter.parse(timestamp)
-        assert(micros === parsed)
+    Seq("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXXXX").foreach { pattern =>
+      Seq(
+        -58710115316212000L,
+        -18926315945345679L,
+        -9463427405253013L,
+        -244000001L,
+        0L,
+        99628200102030L,
+        1543749753123456L,
+        2177456523456789L,
+        11858049903010203L).foreach { micros =>
+        DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
+          val formatter = TimestampFormatter(pattern, timeZone, Locale.US)
+          val timestamp = formatter.format(micros)
+          val parsed = formatter.parse(timestamp)
+          assert(micros === parsed)
+        }
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index f0f62b608785..57b896612bfe 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -126,8 +126,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
     } else {
       Seq(false)
     }
-    // TODO: Support new parser too, see SPARK-26374.
-    withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> "true") {
+    withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> "false") {
       for (dataType <- supportedDataTypes) {
         for (parquetDictionaryEncodingEnabled <- parquetDictionaryEncodingEnabledConfs) {
           val extraMessage = if (isParquetDataSource) {
@@ -138,7 +137,8 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
           logInfo(s"Testing $dataType data type$extraMessage")
 
           val extraOptions = Map[String, String](
-            "parquet.enable.dictionary" -> parquetDictionaryEncodingEnabled.toString
+            "parquet.enable.dictionary" -> parquetDictionaryEncodingEnabled.toString,
+            "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss.SSSXXXXX"
           )
 
           withTempPath { file =>

From 39a0493387d66a1e5c04f568804ebc83c2a5f644 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 2 Jan 2019 08:01:34 +0800
Subject: [PATCH 0212/1072] [SPARK-26227][R] from_[csv|json] should accept
 schema_of_[csv|json] in R API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

**1. Document `from_csv(..., schema_of_csv(...))` support:**

```R
csv <- "Amsterdam,2018"
df <- sql(paste0("SELECT '", csv, "' as csv"))
head(select(df, from_csv(df$csv, schema_of_csv(csv))))
```

```
    from_csv(csv)
1 Amsterdam, 2018
```

**2. Allow `from_json(..., schema_of_json(...))`**

Before:

```R
df2 <- sql("SELECT named_struct('name', 'Bob') as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
head(select(df2, from_json(df2$people_json, schema_of_json(head(df2)$people_json))))
```

```
Error in (function (classes, fdef, mtable)  :
  unable to find an inherited method for function ‘from_json’ for signature ‘"Column", "Column"’
```

After:

```R
df2 <- sql("SELECT named_struct('name', 'Bob') as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
head(select(df2, from_json(df2$people_json, schema_of_json(head(df2)$people_json))))
```

```
  from_json(people_json)
1                    Bob
```

**3. (While I'm here) Allow `structType` as schema for `from_csv` support to match with `from_json`.**

Before:

```R
csv <- "Amsterdam,2018"
df <- sql(paste0("SELECT '", csv, "' as csv"))
head(select(df, from_csv(df$csv, structType("city STRING, year INT"))))
```

```
Error in (function (classes, fdef, mtable)  :
  unable to find an inherited method for function ‘from_csv’ for signature ‘"Column", "structType"’
```

After:

```R
csv <- "Amsterdam,2018"
df <- sql(paste0("SELECT '", csv, "' as csv"))
head(select(df, from_csv(df$csv, structType("city STRING, year INT"))))
```

```
    from_csv(csv)
1 Amsterdam, 2018
```

## How was this patch tested?

Manually tested and unittests were added.

Closes #23184 from HyukjinKwon/SPARK-26227-1.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/functions.R                           | 60 ++++++++++++-------
 R/pkg/tests/fulltests/test_sparkSQL.R         | 16 ++++-
 .../org/apache/spark/sql/api/r/SQLUtils.scala |  6 +-
 3 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5b3cc0940d9c..58fc4104b0f0 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -202,8 +202,9 @@ NULL
 #'          \itemize{
 #'          \item \code{from_json}: a structType object to use as the schema to use
 #'              when parsing the JSON string. Since Spark 2.3, the DDL-formatted string is
-#'              also supported for the schema.
-#'          \item \code{from_csv}: a DDL-formatted string
+#'              also supported for the schema. Since Spark 3.0, \code{schema_of_json} or
+#'              the DDL-formatted string literal can also be accepted.
+#'          \item \code{from_csv}: a structType object, DDL-formatted string or \code{schema_of_csv}
 #'          }
 #' @param ... additional argument(s).
 #'          \itemize{
@@ -2254,6 +2255,8 @@ setMethod("date_format", signature(y = "Column", x = "character"),
             column(jc)
           })
 
+setClassUnion("characterOrstructTypeOrColumn", c("character", "structType", "Column"))
+
 #' @details
 #' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
 #' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
@@ -2261,7 +2264,7 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #'
 #' @rdname column_collection_functions
 #' @param as.json.array indicating if input string is JSON array of objects or a single object.
-#' @aliases from_json from_json,Column,characterOrstructType-method
+#' @aliases from_json from_json,Column,characterOrstructTypeOrColumn-method
 #' @examples
 #'
 #' \dontrun{
@@ -2269,25 +2272,37 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
 #' schema <- structType(structField("date", "string"))
 #' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
-
 #' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
 #' df2 <- mutate(df2, people_json = to_json(df2$people))
 #' schema <- structType(structField("name", "string"))
 #' head(select(df2, from_json(df2$people_json, schema)))
-#' head(select(df2, from_json(df2$people_json, "name STRING")))}
+#' head(select(df2, from_json(df2$people_json, "name STRING")))
+#' head(select(df2, from_json(df2$people_json, schema_of_json(head(df2)$people_json))))}
 #' @note from_json since 2.2.0
-setMethod("from_json", signature(x = "Column", schema = "characterOrstructType"),
+setMethod("from_json", signature(x = "Column", schema = "characterOrstructTypeOrColumn"),
           function(x, schema, as.json.array = FALSE, ...) {
             if (is.character(schema)) {
-              schema <- structType(schema)
+              jschema <- structType(schema)$jobj
+            } else if (class(schema) == "structType") {
+              jschema <- schema$jobj
+            } else {
+              jschema <- schema@jc
             }
 
             if (as.json.array) {
-              jschema <- callJStatic("org.apache.spark.sql.types.DataTypes",
-                                     "createArrayType",
-                                     schema$jobj)
-            } else {
-              jschema <- schema$jobj
+              # This case is R-specifically different. Unlike Scala and Python side,
+              # R side has 'as.json.array' option to indicate if the schema should be
+              # treated as struct or element type of array in order to make it more
+              # R-friendly.
+              if (class(schema) == "Column") {
+                jschema <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
+                                       "createArrayType",
+                                       jschema)
+              } else {
+                jschema <- callJStatic("org.apache.spark.sql.types.DataTypes",
+                                       "createArrayType",
+                                       jschema)
+              }
             }
             options <- varargsToStrEnv(...)
             jc <- callJStatic("org.apache.spark.sql.functions",
@@ -2328,22 +2343,27 @@ setMethod("schema_of_json", signature(x = "characterOrColumn"),
 #' If the string is unparseable, the Column will contain the value NA.
 #'
 #' @rdname column_collection_functions
-#' @aliases from_csv from_csv,Column,character-method
+#' @aliases from_csv from_csv,Column,characterOrstructTypeOrColumn-method
 #' @examples
 #'
 #' \dontrun{
-#' df <- sql("SELECT 'Amsterdam,2018' as csv")
+#' csv <- "Amsterdam,2018"
+#' df <- sql(paste0("SELECT '", csv, "' as csv"))
 #' schema <- "city STRING, year INT"
-#' head(select(df, from_csv(df$csv, schema)))}
+#' head(select(df, from_csv(df$csv, schema)))
+#' head(select(df, from_csv(df$csv, structType(schema))))
+#' head(select(df, from_csv(df$csv, schema_of_csv(csv))))}
 #' @note from_csv since 3.0.0
-setMethod("from_csv", signature(x = "Column", schema = "characterOrColumn"),
+setMethod("from_csv", signature(x = "Column", schema = "characterOrstructTypeOrColumn"),
           function(x, schema, ...) {
-            if (class(schema) == "Column") {
-              jschema <- schema@jc
-            } else if (is.character(schema)) {
+            if (class(schema) == "structType") {
+              schema <- callJMethod(schema$jobj, "toDDL")
+            }
+
+            if (is.character(schema)) {
               jschema <- callJStatic("org.apache.spark.sql.functions", "lit", schema)
             } else {
-              stop("schema argument should be a column or character")
+              jschema <- schema@jc
             }
             options <- varargsToStrEnv(...)
             jc <- callJStatic("org.apache.spark.sql.functions",
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 0d5118c127f2..a1805f57b1dc 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1626,6 +1626,12 @@ test_that("column functions", {
   expect_equal(c[[1]][[1]]$a, 1)
   c <- collect(select(df, alias(from_csv(df$col, lit("a INT")), "csv")))
   expect_equal(c[[1]][[1]]$a, 1)
+  c <- collect(select(df, alias(from_csv(df$col, structType("a INT")), "csv")))
+  expect_equal(c[[1]][[1]]$a, 1)
+  c <- collect(select(df, alias(from_csv(df$col, schema_of_csv("1")), "csv")))
+  expect_equal(c[[1]][[1]]$`_c0`, 1)
+  c <- collect(select(df, alias(from_csv(df$col, schema_of_csv(lit("1"))), "csv")))
+  expect_equal(c[[1]][[1]]$`_c0`, 1)
 
   df <- as.DataFrame(list(list("col" = "1")))
   c <- collect(select(df, schema_of_csv("Amsterdam,2018")))
@@ -1651,7 +1657,9 @@ test_that("column functions", {
   expect_equal(j[order(j$json), ][1], "{\"age\":16,\"height\":176.5}")
   df <- as.DataFrame(j)
   schemas <- list(structType(structField("age", "integer"), structField("height", "double")),
-                  "age INT, height DOUBLE")
+                  "age INT, height DOUBLE",
+                  schema_of_json("{\"age\":16,\"height\":176.5}"),
+                  schema_of_json(lit("{\"age\":16,\"height\":176.5}")))
   for (schema in schemas) {
     s <- collect(select(df, alias(from_json(df$json, schema), "structcol")))
     expect_equal(ncol(s), 1)
@@ -1691,7 +1699,11 @@ test_that("column functions", {
   # check if array type in string is correctly supported.
   jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
   df <- as.DataFrame(list(list("people" = jsonArr)))
-  for (schema in list(structType(structField("name", "string")), "name STRING")) {
+  schemas <- list(structType(structField("name", "string")),
+                  "name STRING",
+                  schema_of_json("{\"name\":\"Alice\"}"),
+                  schema_of_json(lit("{\"name\":\"Bob\"}")))
+  for (schema in schemas) {
     arr <- collect(select(df, alias(from_json(df$people, schema, as.json.array = TRUE), "arrcol")))
     expect_equal(ncol(arr), 1)
     expect_equal(nrow(arr), 1)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index e98cab8b56d1..f5d8d4ea0a4d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -30,7 +30,7 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericRowWithSchema}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.command.ShowTablesCommand
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
@@ -233,4 +233,8 @@ private[sql] object SQLUtils extends Logging {
     }
     sparkSession.sessionState.catalog.listTables(db).map(_.table).toArray
   }
+
+  def createArrayType(column: Column): ArrayType = {
+    new ArrayType(ExprUtils.evalTypeExpr(column.expr), true)
+  }
 }

From d371180c01bf68ed4e5f88df836c7f2fb27a46d3 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 2 Jan 2019 08:04:36 +0800
Subject: [PATCH 0213/1072] [MINOR][R] Deduplicate RStudio setup documentation

## What changes were proposed in this pull request?

This PR targets to deduplicate RStudio setup for SparkR.

## How was this patch tested?

N/A

Closes #23421 from HyukjinKwon/minor-doc.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/README.md | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/R/README.md b/R/README.md
index d77a1ecffc99..e238a0efe4b5 100644
--- a/R/README.md
+++ b/R/README.md
@@ -39,15 +39,7 @@ To set other options like driver memory, executor memory etc. you can pass in th
 
 #### Using SparkR from RStudio
 
-If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example
-```R
-# Set this to where Spark is installed
-Sys.setenv(SPARK_HOME="/Users/username/spark")
-# This line loads SparkR from the installed directory
-.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
-library(SparkR)
-sparkR.session()
-```
+If you wish to use SparkR from RStudio, please refer [SparkR documentation](https://spark.apache.org/docs/latest/sparkr.html#starting-up-from-rstudio).
 
 #### Making changes to SparkR
 

From 79b05481a2cff7a0aa34146c72068cc6e41e2241 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 1 Jan 2019 22:37:28 -0600
Subject: [PATCH 0214/1072] [SPARK-26508][CORE][SQL] Address warning messages
 in Java reported at lgtm.com

## What changes were proposed in this pull request?

This PR addresses warning messages in Java files reported at [lgtm.com](https://lgtm.com).

[lgtm.com](https://lgtm.com) provides automated code review of Java/Python/JavaScript files for OSS projects. [Here](https://lgtm.com/projects/g/apache/spark/alerts/?mode=list&severity=warning) are warning messages regarding Apache Spark project.

This PR addresses the following warnings:

- Result of multiplication cast to wider type
- Implicit narrowing conversion in compound assignment
- Boxed variable is never null
- Useless null check

NOTE: `Potential input resource leak` looks false positive for now.

## How was this patch tested?

Existing UTs

Closes #23420 from kiszk/SPARK-26508.

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../java/org/apache/spark/network/util/ByteUnit.java | 12 ++++++------
 .../org/apache/spark/network/util/TransportConf.java |  6 +++---
 .../org/apache/spark/unsafe/map/BytesToBytesMap.java |  4 ++--
 .../main/java/org/apache/spark/examples/JavaTC.java  |  2 +-
 .../org/apache/spark/examples/ml/JavaALSExample.java |  2 +-
 .../examples/mllib/JavaCorrelationsExample.java      |  2 +-
 .../mllib/JavaRandomForestClassificationExample.java |  8 ++++----
 .../org/apache/spark/launcher/LauncherServer.java    |  7 +++----
 .../expressions/codegen/UnsafeArrayWriter.java       |  2 +-
 .../expressions/codegen/UnsafeRowWriter.java         |  2 +-
 .../sql/catalyst/expressions/xml/UDFXPathUtil.java   |  2 +-
 11 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/ByteUnit.java b/common/network-common/src/main/java/org/apache/spark/network/util/ByteUnit.java
index 984575acaf51..6f7925c26094 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/ByteUnit.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/ByteUnit.java
@@ -18,11 +18,11 @@
 
 public enum ByteUnit {
   BYTE(1),
-  KiB(1024L),
-  MiB((long) Math.pow(1024L, 2L)),
-  GiB((long) Math.pow(1024L, 3L)),
-  TiB((long) Math.pow(1024L, 4L)),
-  PiB((long) Math.pow(1024L, 5L));
+  KiB(1L << 10),
+  MiB(1L << 20),
+  GiB(1L << 30),
+  TiB(1L << 40),
+  PiB(1L << 50);
 
   ByteUnit(long multiplier) {
     this.multiplier = multiplier;
@@ -50,7 +50,7 @@ public long convertTo(long d, ByteUnit u) {
     }
   }
 
-  public double toBytes(long d) {
+  public long toBytes(long d) {
     if (d < 0) {
       throw new IllegalArgumentException("Negative size value. Size must be positive: " + d);
     }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 43a6bc7dc3d0..201628b04fbe 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -309,8 +309,8 @@ public int chunkFetchHandlerThreads() {
     }
     int chunkFetchHandlerThreadsPercent =
       conf.getInt("spark.shuffle.server.chunkFetchHandlerThreadsPercent", 100);
-    return (int)Math.ceil(
-     (this.serverThreads() > 0 ? this.serverThreads() : 2 * NettyRuntime.availableProcessors()) *
-     chunkFetchHandlerThreadsPercent/(double)100);
+    int threads =
+      this.serverThreads() > 0 ? this.serverThreads() : 2 * NettyRuntime.availableProcessors();
+    return (int) Math.ceil(threads * (chunkFetchHandlerThreadsPercent / 100.0));
   }
 }
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 7df8aafb2b67..2ff98a69ee1f 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -712,7 +712,7 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff
       final long recordOffset = offset;
       UnsafeAlignedOffset.putSize(base, offset, klen + vlen + uaoSize);
       UnsafeAlignedOffset.putSize(base, offset + uaoSize, klen);
-      offset += (2 * uaoSize);
+      offset += (2L * uaoSize);
       Platform.copyMemory(kbase, koff, base, offset, klen);
       offset += klen;
       Platform.copyMemory(vbase, voff, base, offset, vlen);
@@ -780,7 +780,7 @@ private void allocate(int capacity) {
     assert (capacity >= 0);
     capacity = Math.max((int) Math.min(MAX_CAPACITY, ByteArrayMethods.nextPowerOf2(capacity)), 64);
     assert (capacity <= MAX_CAPACITY);
-    longArray = allocateArray(capacity * 2);
+    longArray = allocateArray(capacity * 2L);
     longArray.zeroOut();
 
     this.growthThreshold = (int) (capacity * loadFactor);
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaTC.java b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
index c9ca9c9b3a41..7e8df69e7e8d 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaTC.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
@@ -71,7 +71,7 @@ public static void main(String[] args) {
 
     JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
 
-    Integer slices = (args.length > 0) ? Integer.parseInt(args[0]): 2;
+    int slices = (args.length > 0) ? Integer.parseInt(args[0]): 2;
     JavaPairRDD<Integer, Integer> tc = jsc.parallelizePairs(generateGraph(), slices).cache();
 
     // Linear transitive closure: each round grows paths by one edge,
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
index 27052be87b82..b8d2c9f6a658 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
@@ -111,7 +111,7 @@ public static void main(String[] args) {
       .setMetricName("rmse")
       .setLabelCol("rating")
       .setPredictionCol("prediction");
-    Double rmse = evaluator.evaluate(predictions);
+    double rmse = evaluator.evaluate(predictions);
     System.out.println("Root-mean-square error = " + rmse);
 
     // Generate top 10 movie recommendations for each user
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
index c0fa0b3cac1e..9bd858b59890 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
@@ -46,7 +46,7 @@ public static void main(String[] args) {
 
     // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
     // If a method is not specified, Pearson's method will be used by default.
-    Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
+    double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
     System.out.println("Correlation is: " + correlation);
 
     // note that each Vector is a row and not a column
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java
index 6998ce2156c2..0707db8d3e83 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java
@@ -48,14 +48,14 @@ public static void main(String[] args) {
 
     // Train a RandomForest model.
     // Empty categoricalFeaturesInfo indicates all features are continuous.
-    Integer numClasses = 2;
+    int numClasses = 2;
     Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
     Integer numTrees = 3; // Use more in practice.
     String featureSubsetStrategy = "auto"; // Let the algorithm choose.
     String impurity = "gini";
-    Integer maxDepth = 5;
-    Integer maxBins = 32;
-    Integer seed = 12345;
+    int maxDepth = 5;
+    int maxBins = 32;
+    int seed = 12345;
 
     RandomForestModel model = RandomForest.trainClassifier(trainingData, numClasses,
       categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins,
diff --git a/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java b/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java
index 607879fd02ea..3ff77878f68a 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java
@@ -318,9 +318,9 @@ protected void handle(Message msg) throws IOException {
             throw new IllegalArgumentException("Received Hello for unknown client.");
           }
         } else {
+          String msgClassName = msg != null ? msg.getClass().getName() : "no message";
           if (handle == null) {
-            throw new IllegalArgumentException("Expected hello, got: " +
-              msg != null ? msg.getClass().getName() : null);
+            throw new IllegalArgumentException("Expected hello, got: " + msgClassName);
           }
           if (msg instanceof SetAppId) {
             SetAppId set = (SetAppId) msg;
@@ -328,8 +328,7 @@ protected void handle(Message msg) throws IOException {
           } else if (msg instanceof SetState) {
             handle.setState(((SetState)msg).state);
           } else {
-            throw new IllegalArgumentException("Invalid message: " +
-              msg != null ? msg.getClass().getName() : null);
+            throw new IllegalArgumentException("Invalid message: " + msgClassName);
           }
         }
       } catch (Exception e) {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
index a78dd970d23e..997eecd839d8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
@@ -74,7 +74,7 @@ public void initialize(int numElements) {
   }
 
   private long getElementOffset(int ordinal) {
-    return startingOffset + headerInBytes + ordinal * elementSize;
+    return startingOffset + headerInBytes + ordinal * (long) elementSize;
   }
 
   private void setNullBit(int ordinal) {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java
index 3960d6d52047..d2298aa26364 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java
@@ -132,7 +132,7 @@ public void setNull8Bytes(int ordinal) {
   }
 
   public long getFieldOffset(int ordinal) {
-    return startingOffset + nullBitsSize + 8 * ordinal;
+    return startingOffset + nullBitsSize + 8L * ordinal;
   }
 
   public void write(int ordinal, boolean value) {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtil.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtil.java
index 023ec139652c..e9f18229b54c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtil.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtil.java
@@ -180,7 +180,7 @@ public long skip(long ns) throws IOException {
         return 0;
       }
       // Bound skip by beginning and end of the source
-      long n = Math.min(length - next, ns);
+      int n = (int) Math.min(length - next, ns);
       n = Math.max(-next, n);
       next += n;
       return n;

From 4bdfda92a1c570d7a1142ee30eb41e37661bc240 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Wed, 2 Jan 2019 11:23:53 -0600
Subject: [PATCH 0215/1072] [SPARK-26507][CORE] Fix core tests for Java 11

## What changes were proposed in this pull request?

This should make tests in core modules pass for Java 11.

## How was this patch tested?

Existing tests, with modifications.

Closes #23419 from srowen/Java11.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../main/scala/org/apache/spark/util/Utils.scala | 10 +++++++---
 .../metrics/source/AccumulatorSourceSuite.scala  |  9 ++++-----
 .../apache/spark/util/JsonProtocolSuite.scala    | 16 +++++++++++-----
 .../scala/org/apache/spark/util/UtilsSuite.scala | 16 ----------------
 .../launcher/SparkSubmitCommandBuilderSuite.java |  2 +-
 pom.xml                                          |  2 ++
 6 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index f322e92c6c8c..22f074cf9897 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2740,13 +2740,17 @@ private[spark] object Utils extends Logging {
 
   /**
    * Safer than Class obj's getSimpleName which may throw Malformed class name error in scala.
-   * This method mimicks scalatest's getSimpleNameOfAnObjectsClass.
+   * This method mimics scalatest's getSimpleNameOfAnObjectsClass.
    */
   def getSimpleName(cls: Class[_]): String = {
     try {
-      return cls.getSimpleName
+      cls.getSimpleName
     } catch {
-      case err: InternalError => return stripDollars(stripPackages(cls.getName))
+      // TODO: the value returned here isn't even quite right; it returns simple names
+      // like UtilsSuite$MalformedClassObject$MalformedClass instead of MalformedClass
+      // The exact value may not matter much as it's used in log statements
+      case _: InternalError =>
+        stripDollars(stripPackages(cls.getName))
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/metrics/source/AccumulatorSourceSuite.scala b/core/src/test/scala/org/apache/spark/metrics/source/AccumulatorSourceSuite.scala
index 6a6c07cb068c..45e6e0b4913e 100644
--- a/core/src/test/scala/org/apache/spark/metrics/source/AccumulatorSourceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/source/AccumulatorSourceSuite.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.metrics.source
 
-import com.codahale.metrics.MetricRegistry
 import org.mockito.ArgumentCaptor
-import org.mockito.Mockito.{mock, never, spy, times, verify, when}
+import org.mockito.Mockito.{mock, times, verify, when}
 
 import org.apache.spark.{SparkContext, SparkEnv, SparkFunSuite}
 import org.apache.spark.metrics.MetricsSystem
@@ -37,7 +36,7 @@ class AccumulatorSourceSuite extends SparkFunSuite {
     val accs = Map("my-accumulator-1" -> acc1,
                    "my-accumulator-2" -> acc2)
     LongAccumulatorSource.register(mockContext, accs)
-    val captor = new ArgumentCaptor[AccumulatorSource]()
+    val captor = ArgumentCaptor.forClass(classOf[AccumulatorSource])
     verify(mockMetricSystem, times(1)).registerSource(captor.capture())
     val source = captor.getValue()
     val gauges = source.metricRegistry.getGauges()
@@ -59,7 +58,7 @@ class AccumulatorSourceSuite extends SparkFunSuite {
     val accs = Map("my-accumulator-1" -> acc1,
                    "my-accumulator-2" -> acc2)
     LongAccumulatorSource.register(mockContext, accs)
-    val captor = new ArgumentCaptor[AccumulatorSource]()
+    val captor = ArgumentCaptor.forClass(classOf[AccumulatorSource])
     verify(mockMetricSystem, times(1)).registerSource(captor.capture())
     val source = captor.getValue()
     val gauges = source.metricRegistry.getGauges()
@@ -81,7 +80,7 @@ class AccumulatorSourceSuite extends SparkFunSuite {
       "my-accumulator-1" -> acc1,
       "my-accumulator-2" -> acc2)
     DoubleAccumulatorSource.register(mockContext, accs)
-    val captor = new ArgumentCaptor[AccumulatorSource]()
+    val captor = ArgumentCaptor.forClass(classOf[AccumulatorSource])
     verify(mockMetricSystem, times(1)).registerSource(captor.capture())
     val source = captor.getValue()
     val gauges = source.metricRegistry.getGauges()
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 303ca7cb8801..b88f25726fc4 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -761,13 +761,13 @@ private[spark] object JsonProtocolSuite extends Assertions {
   }
 
   private def assertJsonStringEquals(expected: String, actual: String, metadata: String) {
-    val expectedJson = pretty(parse(expected))
-    val actualJson = pretty(parse(actual))
+    val expectedJson = parse(expected)
+    val actualJson = parse(actual)
     if (expectedJson != actualJson) {
       // scalastyle:off
       // This prints something useful if the JSON strings don't match
-      println("=== EXPECTED ===\n" + expectedJson + "\n")
-      println("=== ACTUAL ===\n" + actualJson + "\n")
+      println(s"=== EXPECTED ===\n${pretty(expectedJson)}\n")
+      println(s"=== ACTUAL ===\n${pretty(actualJson)}\n")
       // scalastyle:on
       throw new TestFailedException(s"$metadata JSON did not equal", 1)
     }
@@ -807,7 +807,13 @@ private[spark] object JsonProtocolSuite extends Assertions {
   }
 
   private def assertStackTraceElementEquals(ste1: StackTraceElement, ste2: StackTraceElement) {
-    assert(ste1 === ste2)
+    // This mimics the equals() method from Java 8 and earlier. Java 9 adds checks for
+    // class loader and module, which will cause them to be not equal, when we don't
+    // care about those
+    assert(ste1.getClassName === ste2.getClassName)
+    assert(ste1.getMethodName === ste2.getMethodName)
+    assert(ste1.getLineNumber === ste2.getLineNumber)
+    assert(ste1.getFileName === ste2.getFileName)
   }
 
   /** ----------------------------------- *
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index b2ff1cce3eb0..d3f94fbe05d7 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1156,22 +1156,6 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     }
   }
 
-  object MalformedClassObject {
-    class MalformedClass
-  }
-
-  test("Safe getSimpleName") {
-    // getSimpleName on class of MalformedClass will result in error: Malformed class name
-    // Utils.getSimpleName works
-    val err = intercept[java.lang.InternalError] {
-      classOf[MalformedClassObject.MalformedClass].getSimpleName
-    }
-    assert(err.getMessage === "Malformed class name")
-
-    assert(Utils.getSimpleName(classOf[MalformedClassObject.MalformedClass]) ===
-      "UtilsSuite$MalformedClassObject$MalformedClass")
-  }
-
   test("stringHalfWidth") {
     // scalastyle:off nonascii
     assert(Utils.stringHalfWidth(null) == 0)
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index b343094b2e7b..e694e9066f12 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -158,7 +158,7 @@ public void testPySparkLauncher() throws Exception {
 
     Map<String, String> env = new HashMap<>();
     List<String> cmd = buildCommand(sparkSubmitArgs, env);
-    assertEquals("python", cmd.get(cmd.size() - 1));
+    assertTrue(Arrays.asList("python", "python2", "python3").contains(cmd.get(cmd.size() - 1)));
     assertEquals(
       String.format("\"%s\" \"foo\" \"%s\" \"bar\" \"%s\"",
         parser.MASTER, parser.DEPLOY_MODE, SparkSubmitCommandBuilder.PYSPARK_SHELL_RESOURCE),
diff --git a/pom.xml b/pom.xml
index 321de209a56a..a433659cd200 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2060,6 +2060,8 @@
           </executions>
           <configuration>
             <scalaVersion>${scala.version}</scalaVersion>
+            <checkMultipleScalaVersions>true</checkMultipleScalaVersions>
+            <failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
             <recompileMode>incremental</recompileMode>
             <useZincServer>true</useZincServer>
             <args>

From d40654861b1a051d4cfc4ec0d8e80f817e6b8e8b Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Wed, 2 Jan 2019 15:45:14 -0600
Subject: [PATCH 0216/1072] [SPARK-26277][SQL][TEST] WholeStageCodegen metrics
 should be tested with whole-stage codegen enabled

## What changes were proposed in this pull request?
In `org.apache.spark.sql.execution.metric.SQLMetricsSuite`, there's a test case named "WholeStageCodegen metrics". However, it is executed with whole-stage codegen disabled. This PR fixes this by enable whole-stage codegen for this test case.

## How was this patch tested?
Tested locally using exiting test cases.

Closes #23224 from seancxmao/codegen-metrics.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/execution/metric/SQLMetricsSuite.scala  | 11 ++++++++---
 .../sql/execution/metric/SQLMetricsTestUtils.scala    |  7 +++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 7368a6c9e1d6..6174ec4c8908 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -77,11 +77,16 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
   }
 
   test("WholeStageCodegen metrics") {
-    // Assume the execution plan is
-    // WholeStageCodegen(nodeId = 0, Range(nodeId = 2) -> Filter(nodeId = 1))
+    // Assume the execution plan with node id is
+    // WholeStageCodegen(nodeId = 0)
+    //   Filter(nodeId = 1)
+    //     Range(nodeId = 2)
     // TODO: update metrics in generated operators
     val ds = spark.range(10).filter('id < 5)
-    testSparkPlanMetrics(ds.toDF(), 1, Map.empty)
+    testSparkPlanMetricsWithPredicates(ds.toDF(), 1, Map(
+      0L -> (("WholeStageCodegen", Map(
+        "duration total (min, med, max)" -> {_.toString.matches(timingMetricPattern)})))
+    ), true)
   }
 
   test("Aggregate metrics") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index 2d245d2ba1e3..0e13f7dd55ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -144,6 +144,7 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
    * @param df `DataFrame` to run
    * @param expectedNumOfJobs number of jobs that will run
    * @param expectedNodeIds the node ids of the metrics to collect from execution data.
+   * @param enableWholeStage enable whole-stage code generation or not.
    */
   protected def getSparkPlanMetrics(
        df: DataFrame,
@@ -210,13 +211,15 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
    * @param expectedNumOfJobs number of jobs that will run
    * @param expectedMetricsPredicates the expected metrics predicates. The format is
    *                                  `nodeId -> (operatorName, metric name -> metric predicate)`.
+   * @param enableWholeStage enable whole-stage code generation or not.
    */
   protected def testSparkPlanMetricsWithPredicates(
       df: DataFrame,
       expectedNumOfJobs: Int,
-      expectedMetricsPredicates: Map[Long, (String, Map[String, Any => Boolean])]): Unit = {
+      expectedMetricsPredicates: Map[Long, (String, Map[String, Any => Boolean])],
+      enableWholeStage: Boolean = false): Unit = {
     val optActualMetrics =
-      getSparkPlanMetrics(df, expectedNumOfJobs, expectedMetricsPredicates.keySet)
+      getSparkPlanMetrics(df, expectedNumOfJobs, expectedMetricsPredicates.keySet, enableWholeStage)
     optActualMetrics.foreach { actualMetrics =>
       assert(expectedMetricsPredicates.keySet === actualMetrics.keySet)
       for ((nodeId, (expectedNodeName, expectedMetricsPredicatesMap))

From 8be4d24a27a1e9995a53d4efb3a13a47813d1f77 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Wed, 2 Jan 2019 16:57:10 -0800
Subject: [PATCH 0217/1072] [SPARK-26023][SQL][FOLLOWUP] Dumping truncated
 plans and generated code to a file

## What changes were proposed in this pull request?

`DataSourceScanExec` overrides "wrong" `treeString` method without `append`. In the PR, I propose to make `treeString`s **final** to prevent such mistakes in the future. And removed the `treeString` and `verboseString` since they both use `simpleString` with reduction.

## How was this patch tested?

It was tested by `DataSourceScanExecRedactionSuite`

Closes #23431 from MaxGekk/datasource-scan-exec-followup.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/catalyst/trees/TreeNode.scala   | 4 ++--
 .../apache/spark/sql/execution/DataSourceScanExec.scala  | 9 ++-------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 570a019b2af7..d214ebb30903 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -474,9 +474,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   override def toString: String = treeString
 
   /** Returns a string representation of the nodes in this tree */
-  def treeString: String = treeString(verbose = true)
+  final def treeString: String = treeString(verbose = true)
 
-  def treeString(
+  final def treeString(
       verbose: Boolean,
       addSuffix: Boolean = false,
       maxFields: Int = SQLConf.get.maxToStringFields): String = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 1d7dd73706c4..8b84eda36103 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -58,13 +58,8 @@ trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
         key + ": " + StringUtils.abbreviate(redact(value), 100)
     }
     val metadataStr = truncatedString(metadataEntries, " ", ", ", "", maxFields)
-    s"$nodeNamePrefix$nodeName${truncatedString(output, "[", ",", "]", maxFields)}$metadataStr"
-  }
-
-  override def verboseString(maxFields: Int): String = redact(super.verboseString(maxFields))
-
-  override def treeString(verbose: Boolean, addSuffix: Boolean, maxFields: Int): String = {
-    redact(super.treeString(verbose, addSuffix, maxFields))
+    redact(
+      s"$nodeNamePrefix$nodeName${truncatedString(output, "[", ",", "]", maxFields)}$metadataStr")
   }
 
   /**

From 56967b7e288ac54e705b14a21516df5402d4c9d9 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 3 Jan 2019 11:01:54 +0800
Subject: [PATCH 0218/1072] [SPARK-26403][SQL] Support pivoting using array
 column for `pivot(column)` API

## What changes were proposed in this pull request?

This PR fixes `pivot(Column)` can accepts `collection.mutable.WrappedArray`.

Note that we return `collection.mutable.WrappedArray` from `ArrayType`, and `Literal.apply` doesn't support this.

We can unwrap the array and use it for type dispatch.

```scala
val df = Seq(
  (2, Seq.empty[String]),
  (2, Seq("a", "x")),
  (3, Seq.empty[String]),
  (3, Seq("a", "x"))).toDF("x", "s")
df.groupBy("x").pivot("s").count().show()
```

Before:

```
Unsupported literal type class scala.collection.mutable.WrappedArray$ofRef WrappedArray()
java.lang.RuntimeException: Unsupported literal type class scala.collection.mutable.WrappedArray$ofRef WrappedArray()
	at org.apache.spark.sql.catalyst.expressions.Literal$.apply(literals.scala:80)
	at org.apache.spark.sql.RelationalGroupedDataset.$anonfun$pivot$2(RelationalGroupedDataset.scala:427)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237)
	at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
	at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
	at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:39)
	at scala.collection.TraversableLike.map(TraversableLike.scala:237)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:230)
	at scala.collection.AbstractTraversable.map(Traversable.scala:108)
	at org.apache.spark.sql.RelationalGroupedDataset.pivot(RelationalGroupedDataset.scala:425)
	at org.apache.spark.sql.RelationalGroupedDataset.pivot(RelationalGroupedDataset.scala:406)
	at org.apache.spark.sql.RelationalGroupedDataset.pivot(RelationalGroupedDataset.scala:317)
	at org.apache.spark.sql.DataFramePivotSuite.$anonfun$new$1(DataFramePivotSuite.scala:341)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
```

After:

```
+---+---+------+
|  x| []|[a, x]|
+---+---+------+
|  3|  1|     1|
|  2|  1|     1|
+---+---+------+
```

## How was this patch tested?

Manually tested and unittests were added.

Closes #23349 from HyukjinKwon/SPARK-26403.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/expressions/literals.scala     |  1 +
 .../catalyst/expressions/LiteralExpressionSuite.scala |  2 ++
 .../org/apache/spark/sql/DataFramePivotSuite.scala    | 11 +++++++++++
 3 files changed, 14 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 34d252886ffb..48beffa18a55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -67,6 +67,7 @@ object Literal {
     case t: Timestamp => Literal(DateTimeUtils.fromJavaTimestamp(t), TimestampType)
     case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
     case a: Array[Byte] => Literal(a, BinaryType)
+    case a: collection.mutable.WrappedArray[_] => apply(a.array)
     case a: Array[_] =>
       val elementType = componentTypeToDataType(a.getClass.getComponentType())
       val dataType = ArrayType(elementType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 3ea6bfac9ddc..133aaa449ea4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -179,6 +179,8 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkArrayLiteral(Array("a", "b", "c"))
     checkArrayLiteral(Array(1.0, 4.0))
     checkArrayLiteral(Array(CalendarInterval.MICROS_PER_DAY, CalendarInterval.MICROS_PER_HOUR))
+    val arr = collection.mutable.WrappedArray.make(Array(1.0, 4.0))
+    checkEvaluation(Literal(arr), toCatalyst(arr))
   }
 
   test("seq") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index b52ca58c07d2..8c2c11be9b6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -333,4 +333,15 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext {
     }
     assert(exception.getMessage.contains("Unsupported literal type"))
   }
+
+  test("SPARK-26403: pivoting by array column") {
+    val df = Seq(
+      (2, Seq.empty[String]),
+      (2, Seq("a", "x")),
+      (3, Seq.empty[String]),
+      (3, Seq("a", "x"))).toDF("x", "s")
+    val expected = Seq((3, 1, 1), (2, 1, 1)).toDF
+    val actual = df.groupBy("x").pivot("s").count()
+    checkAnswer(actual, expected)
+  }
 }

From 2a30deb85ae4e42c5cbc936383dd5c3970f4a74f Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 3 Jan 2019 11:27:40 +0100
Subject: [PATCH 0219/1072] [SPARK-26502][SQL] Move hiveResultString() from
 QueryExecution to HiveResult

## What changes were proposed in this pull request?

In the PR, I propose to move `hiveResultString()` out of `QueryExecution` and put it to a separate object.

Closes #23409 from MaxGekk/hive-result-string.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/execution/HiveResult.scala      | 116 ++++++++++++++++++
 .../spark/sql/execution/QueryExecution.scala  |  91 +-------------
 .../apache/spark/sql/SQLQueryTestSuite.scala  |   5 +-
 .../hive/thriftserver/SparkSQLDriver.scala    |   3 +-
 .../apache/spark/sql/hive/test/TestHive.scala |   2 +-
 .../hive/execution/HiveComparisonTest.scala   |   4 +-
 6 files changed, 126 insertions(+), 95 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
new file mode 100644
index 000000000000..22d3ca958a21
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import java.nio.charset.StandardCharsets
+import java.sql.{Date, Timestamp}
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExecutedCommandExec, ShowTablesCommand}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+/**
+ * Runs a query returning the result in Hive compatible form.
+ */
+object HiveResult {
+  /**
+   * Returns the result as a hive compatible sequence of strings. This is used in tests and
+   * `SparkSQLDriver` for CLI applications.
+   */
+  def hiveResultString(executedPlan: SparkPlan): Seq[String] = executedPlan match {
+    case ExecutedCommandExec(desc: DescribeTableCommand) =>
+      // If it is a describe command for a Hive table, we want to have the output format
+      // be similar with Hive.
+      executedPlan.executeCollectPublic().map {
+        case Row(name: String, dataType: String, comment) =>
+          Seq(name, dataType,
+            Option(comment.asInstanceOf[String]).getOrElse(""))
+            .map(s => String.format(s"%-20s", s))
+            .mkString("\t")
+      }
+    // SHOW TABLES in Hive only output table names, while ours output database, table name, isTemp.
+    case command @ ExecutedCommandExec(s: ShowTablesCommand) if !s.isExtended =>
+      command.executeCollect().map(_.getString(1))
+    case other =>
+      val result: Seq[Seq[Any]] = other.executeCollectPublic().map(_.toSeq).toSeq
+      // We need the types so we can output struct field names
+      val types = executedPlan.output.map(_.dataType)
+      // Reformat to match hive tab delimited output.
+      result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t"))
+  }
+
+  /** Formats a datum (based on the given data type) and returns the string representation. */
+  private def toHiveString(a: (Any, DataType)): String = {
+    val primitiveTypes = Seq(StringType, IntegerType, LongType, DoubleType, FloatType,
+      BooleanType, ByteType, ShortType, DateType, TimestampType, BinaryType)
+    val timeZone = DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)
+
+    def formatDecimal(d: java.math.BigDecimal): String = {
+      if (d.compareTo(java.math.BigDecimal.ZERO) == 0) {
+        java.math.BigDecimal.ZERO.toPlainString
+      } else {
+        d.stripTrailingZeros().toPlainString
+      }
+    }
+
+    /** Hive outputs fields of structs slightly differently than top level attributes. */
+    def toHiveStructString(a: (Any, DataType)): String = a match {
+      case (struct: Row, StructType(fields)) =>
+        struct.toSeq.zip(fields).map {
+          case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
+        }.mkString("{", ",", "}")
+      case (seq: Seq[_], ArrayType(typ, _)) =>
+        seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
+      case (map: Map[_, _], MapType(kType, vType, _)) =>
+        map.map {
+          case (key, value) =>
+            toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
+        }.toSeq.sorted.mkString("{", ",", "}")
+      case (null, _) => "null"
+      case (s: String, StringType) => "\"" + s + "\""
+      case (decimal, DecimalType()) => decimal.toString
+      case (interval, CalendarIntervalType) => interval.toString
+      case (other, tpe) if primitiveTypes contains tpe => other.toString
+    }
+
+    a match {
+      case (struct: Row, StructType(fields)) =>
+        struct.toSeq.zip(fields).map {
+          case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
+        }.mkString("{", ",", "}")
+      case (seq: Seq[_], ArrayType(typ, _)) =>
+        seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
+      case (map: Map[_, _], MapType(kType, vType, _)) =>
+        map.map {
+          case (key, value) =>
+            toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
+        }.toSeq.sorted.mkString("{", ",", "}")
+      case (null, _) => "NULL"
+      case (d: Date, DateType) =>
+        DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
+      case (t: Timestamp, TimestampType) =>
+        DateTimeUtils.timestampToString(DateTimeUtils.fromJavaTimestamp(t), timeZone)
+      case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
+      case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
+      case (interval, CalendarIntervalType) => interval.toString
+      case (other, tpe) if primitiveTypes.contains(tpe) => other.toString
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 7fccbf65d852..72499aa936a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -18,25 +18,20 @@
 package org.apache.spark.sql.execution
 
 import java.io.{BufferedWriter, OutputStreamWriter}
-import java.nio.charset.StandardCharsets
-import java.sql.{Date, Timestamp}
 
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExecutedCommandExec, ShowTablesCommand}
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{BinaryType, DateType, DecimalType, TimestampType, _}
 import org.apache.spark.util.Utils
 
 /**
@@ -109,90 +104,6 @@ class QueryExecution(
     ReuseExchange(sparkSession.sessionState.conf),
     ReuseSubquery(sparkSession.sessionState.conf))
 
-  /**
-   * Returns the result as a hive compatible sequence of strings. This is used in tests and
-   * `SparkSQLDriver` for CLI applications.
-   */
-  def hiveResultString(): Seq[String] = executedPlan match {
-    case ExecutedCommandExec(desc: DescribeTableCommand) =>
-      // If it is a describe command for a Hive table, we want to have the output format
-      // be similar with Hive.
-      desc.run(sparkSession).map {
-        case Row(name: String, dataType: String, comment) =>
-          Seq(name, dataType,
-            Option(comment.asInstanceOf[String]).getOrElse(""))
-            .map(s => String.format(s"%-20s", s))
-            .mkString("\t")
-      }
-    // SHOW TABLES in Hive only output table names, while ours output database, table name, isTemp.
-    case command @ ExecutedCommandExec(s: ShowTablesCommand) if !s.isExtended =>
-      command.executeCollect().map(_.getString(1))
-    case other =>
-      val result: Seq[Seq[Any]] = other.executeCollectPublic().map(_.toSeq).toSeq
-      // We need the types so we can output struct field names
-      val types = analyzed.output.map(_.dataType)
-      // Reformat to match hive tab delimited output.
-      result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t"))
-  }
-
-  /** Formats a datum (based on the given data type) and returns the string representation. */
-  private def toHiveString(a: (Any, DataType)): String = {
-    val primitiveTypes = Seq(StringType, IntegerType, LongType, DoubleType, FloatType,
-      BooleanType, ByteType, ShortType, DateType, TimestampType, BinaryType)
-
-    def formatDecimal(d: java.math.BigDecimal): String = {
-      if (d.compareTo(java.math.BigDecimal.ZERO) == 0) {
-        java.math.BigDecimal.ZERO.toPlainString
-      } else {
-        d.stripTrailingZeros().toPlainString
-      }
-    }
-
-    /** Hive outputs fields of structs slightly differently than top level attributes. */
-    def toHiveStructString(a: (Any, DataType)): String = a match {
-      case (struct: Row, StructType(fields)) =>
-        struct.toSeq.zip(fields).map {
-          case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
-        }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ, _)) =>
-        seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-      case (map: Map[_, _], MapType(kType, vType, _)) =>
-        map.map {
-          case (key, value) =>
-            toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-        }.toSeq.sorted.mkString("{", ",", "}")
-      case (null, _) => "null"
-      case (s: String, StringType) => "\"" + s + "\""
-      case (decimal, DecimalType()) => decimal.toString
-      case (interval, CalendarIntervalType) => interval.toString
-      case (other, tpe) if primitiveTypes contains tpe => other.toString
-    }
-
-    a match {
-      case (struct: Row, StructType(fields)) =>
-        struct.toSeq.zip(fields).map {
-          case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
-        }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ, _)) =>
-        seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-      case (map: Map[_, _], MapType(kType, vType, _)) =>
-        map.map {
-          case (key, value) =>
-            toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-        }.toSeq.sorted.mkString("{", ",", "}")
-      case (null, _) => "NULL"
-      case (d: Date, DateType) =>
-        DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
-      case (t: Timestamp, TimestampType) =>
-        DateTimeUtils.timestampToString(DateTimeUtils.fromJavaTimestamp(t),
-          DateTimeUtils.getTimeZone(sparkSession.sessionState.conf.sessionLocalTimeZone))
-      case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
-      case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
-      case (interval, CalendarIntervalType) => interval.toString
-      case (other, tpe) if primitiveTypes.contains(tpe) => other.toString
-    }
-  }
-
   def simpleString: String = withRedaction {
     val concat = new StringConcat()
     concat.append("== Physical Plan ==\n")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index b2515226d9a1..24b312348bd6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -22,11 +22,11 @@ import java.util.{Locale, TimeZone}
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
+import org.apache.spark.sql.execution.HiveResult.hiveResultString
 import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeTableCommand}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
@@ -287,7 +287,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
       val schema = df.schema
       val notIncludedMsg = "[not included in comparison]"
       // Get answer, but also get rid of the #1234 expression ids that show up in explain plans
-      val answer = df.queryExecution.hiveResultString().map(_.replaceAll("#\\d+", "#x")
+      val answer = hiveResultString(df.queryExecution.executedPlan)
+        .map(_.replaceAll("#\\d+", "#x")
         .replaceAll("Location.*/sql/core/", s"Location ${notIncludedMsg}sql/core/")
         .replaceAll("Created By.*", s"Created By $notIncludedMsg")
         .replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 677590217344..960fdd11db15 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SQLContext}
 import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
+import org.apache.spark.sql.execution.HiveResult.hiveResultString
 
 
 private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlContext)
@@ -61,7 +62,7 @@ private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlCont
       context.sparkContext.setJobDescription(command)
       val execution = context.sessionState.executePlan(context.sql(command).logicalPlan)
       hiveResponse = SQLExecution.withNewExecutionId(context.sparkSession, execution) {
-        execution.hiveResultString()
+        hiveResultString(execution.executedPlan)
       }
       tableSchema = getResultSetSchema(execution)
       new CommandProcessorResponse(0)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 3508affda241..4c2bc62b9faf 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -297,7 +297,7 @@ private[hive] class TestHiveSparkSession(
 
   protected[hive] implicit class SqlCmd(sql: String) {
     def cmd: () => Unit = {
-      () => new TestHiveQueryExecution(sql).hiveResultString(): Unit
+      () => new TestHiveQueryExecution(sql).executedPlan.executeCollect(): Unit
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 272e6f51f500..66426824573c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.execution.HiveResult.hiveResultString
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveQueryExecution}
@@ -345,7 +346,8 @@ abstract class HiveComparisonTest
         val catalystResults = queryList.zip(hiveResults).map { case (queryString, hive) =>
           val query = new TestHiveQueryExecution(queryString.replace("../../data", testDataPath))
           def getResult(): Seq[String] = {
-            SQLExecution.withNewExecutionId(query.sparkSession, query)(query.hiveResultString())
+            SQLExecution.withNewExecutionId(
+              query.sparkSession, query)(hiveResultString(query.executedPlan))
           }
           try { (query, prepareAnswer(query, getResult())) } catch {
             case e: Throwable =>

From 88b074f3f06ddd236d63e8bf31edebe1d3e94fe4 Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Thu, 3 Jan 2019 10:26:14 -0600
Subject: [PATCH 0220/1072] [SPARK-26501][CORE][TEST] Fix unexpected overriden
 of exitFn in SparkSubmitSuite

## What changes were proposed in this pull request?

The overriden of SparkSubmit's exitFn at some previous tests in SparkSubmitSuite may cause the following tests pass even they failed when they were run separately. This PR is to fix this problem.

## How was this patch tested?

unittest

Closes #23404 from liupc/Fix-SparkSubmitSuite-exitFn.

Authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/deploy/SparkSubmitSuite.scala       | 40 ++++++++++---------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index a8973d1b60f8..2a7a55cbb903 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -72,27 +72,31 @@ trait TestPrematureExit {
     mainObject.printStream = printStream
 
     @volatile var exitedCleanly = false
+    val original = mainObject.exitFn
     mainObject.exitFn = (_) => exitedCleanly = true
-
-    @volatile var exception: Exception = null
-    val thread = new Thread {
-      override def run() = try {
-        mainObject.main(input)
-      } catch {
-        // Capture the exception to check whether the exception contains searchString or not
-        case e: Exception => exception = e
+    try {
+      @volatile var exception: Exception = null
+      val thread = new Thread {
+        override def run() = try {
+          mainObject.main(input)
+        } catch {
+          // Capture the exception to check whether the exception contains searchString or not
+          case e: Exception => exception = e
+        }
       }
-    }
-    thread.start()
-    thread.join()
-    if (exitedCleanly) {
-      val joined = printStream.lineBuffer.mkString("\n")
-      assert(joined.contains(searchString))
-    } else {
-      assert(exception != null)
-      if (!exception.getMessage.contains(searchString)) {
-        throw exception
+      thread.start()
+      thread.join()
+      if (exitedCleanly) {
+        val joined = printStream.lineBuffer.mkString("\n")
+        assert(joined.contains(searchString))
+      } else {
+        assert(exception != null)
+        if (!exception.getMessage.contains(searchString)) {
+          throw exception
+        }
       }
+    } finally {
+      mainObject.exitFn = original
     }
   }
 }

From 40711eef168716c44b873359e17822fe6b3387f4 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 3 Jan 2019 10:30:47 -0600
Subject: [PATCH 0221/1072] [SPARK-26517][SQL][TEST] Avoid duplicate test in
 ParquetSchemaPruningSuite

## What changes were proposed in this pull request?

`testExactCaseQueryPruning` and `testMixedCaseQueryPruning` don't need to set up `PARQUET_VECTORIZED_READER_ENABLED` config. Because `withMixedCaseData` will run against both Spark vectorized reader and Parquet-mr reader.

## How was this patch tested?

Existing test.

Closes #23427 from viirya/fix-parquet-schema-pruning-test.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../parquet/ParquetSchemaPruningSuite.scala   | 23 ++++---------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index 434c4414edeb..9a02529a2550 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -312,15 +312,8 @@ class ParquetSchemaPruningSuite
   // schema's column and field names. N.B. this implies that `testThunk` should pass using either a
   // case-sensitive or case-insensitive query parser
   private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
-    test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
-        SQLConf.CASE_SENSITIVE.key -> "true") {
-        withMixedCaseData(testThunk)
-      }
-    }
-    test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
-        SQLConf.CASE_SENSITIVE.key -> "true") {
+    test(s"Case-sensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
         withMixedCaseData(testThunk)
       }
     }
@@ -330,20 +323,14 @@ class ParquetSchemaPruningSuite
   // Tests schema pruning for a query whose column and field names may differ in case from the table
   // schema's column and field names
   private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
-    test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
-        SQLConf.CASE_SENSITIVE.key -> "false") {
-        withMixedCaseData(testThunk)
-      }
-    }
-    test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
-        SQLConf.CASE_SENSITIVE.key -> "false") {
+    test(s"Case-insensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         withMixedCaseData(testThunk)
       }
     }
   }
 
+  // Tests given test function with Spark vectorized reader and Parquet-mr reader.
   private def withMixedCaseData(testThunk: => Unit) {
     withParquetTable(mixedCaseData, "mixedcase") {
       testThunk

From e2dbafdbc5e50fcf2554bf51939ce0cd363d8806 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 4 Jan 2019 00:37:03 +0800
Subject: [PATCH 0222/1072] [SPARK-26447][SQL] Allow OrcColumnarBatchReader to
 return less partition columns

## What changes were proposed in this pull request?

Currently OrcColumnarBatchReader returns all the partition column values in the batch read.
In data source V2, we can improve it by returning the required partition column values only.

This PR is part of https://github.com/apache/spark/pull/23383 . As cloud-fan suggested, create a new PR to make review easier.

Also, this PR doesn't improve `OrcFileFormat`, since in the method `buildReaderWithPartitionValues`, the `requiredSchema` filter out all the partition columns, so we can't know which partition column is required.

## How was this patch tested?

Unit test

Closes #23387 from gengliangwang/refactorOrcColumnarBatch.

Lead-authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Co-authored-by: Gengliang Wang <ltnwgl@gmail.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../orc/OrcColumnarBatchReader.java           | 93 ++++++++++---------
 .../datasources/orc/OrcFileFormat.scala       | 10 +-
 .../orc/OrcColumnarBatchReaderSuite.scala     | 80 ++++++++++++++++
 3 files changed, 136 insertions(+), 47 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
index a0d9578a377b..7dc90df05a8f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.util.stream.IntStream;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.RecordReader;
@@ -58,9 +59,14 @@ public class OrcColumnarBatchReader extends RecordReader<Void, ColumnarBatch> {
 
   /**
    * The column IDs of the physical ORC file schema which are required by this reader.
-   * -1 means this required column doesn't exist in the ORC file.
+   * -1 means this required column is partition column, or it doesn't exist in the ORC file.
+   * Ideally partition column should never appear in the physical file, and should only appear
+   * in the directory name. However, Spark allows partition columns inside physical file,
+   * but Spark will discard the values from the file, and use the partition value got from
+   * directory name. The column order will be reserved though.
    */
-  private int[] requestedColIds;
+  @VisibleForTesting
+  public int[] requestedDataColIds;
 
   // Record reader from ORC row batch.
   private org.apache.orc.RecordReader recordReader;
@@ -68,7 +74,8 @@ public class OrcColumnarBatchReader extends RecordReader<Void, ColumnarBatch> {
   private StructField[] requiredFields;
 
   // The result columnar batch for vectorized execution by whole-stage codegen.
-  private ColumnarBatch columnarBatch;
+  @VisibleForTesting
+  public ColumnarBatch columnarBatch;
 
   // Writable column vectors of the result columnar batch.
   private WritableColumnVector[] columnVectors;
@@ -143,25 +150,33 @@ public void initialize(
   /**
    * Initialize columnar batch by setting required schema and partition information.
    * With this information, this creates ColumnarBatch with the full schema.
+   *
+   * @param orcSchema Schema from ORC file reader.
+   * @param requiredFields All the fields that are required to return, including partition fields.
+   * @param requestedDataColIds Requested column ids from orcSchema. -1 if not existed.
+   * @param requestedPartitionColIds Requested column ids from partition schema. -1 if not existed.
+   * @param partitionValues Values of partition columns.
    */
   public void initBatch(
       TypeDescription orcSchema,
-      int[] requestedColIds,
       StructField[] requiredFields,
-      StructType partitionSchema,
+      int[] requestedDataColIds,
+      int[] requestedPartitionColIds,
       InternalRow partitionValues) {
     batch = orcSchema.createRowBatch(capacity);
     assert(!batch.selectedInUse); // `selectedInUse` should be initialized with `false`.
-
+    assert(requiredFields.length == requestedDataColIds.length);
+    assert(requiredFields.length == requestedPartitionColIds.length);
+    // If a required column is also partition column, use partition value and don't read from file.
+    for (int i = 0; i < requiredFields.length; i++) {
+      if (requestedPartitionColIds[i] != -1) {
+        requestedDataColIds[i] = -1;
+      }
+    }
     this.requiredFields = requiredFields;
-    this.requestedColIds = requestedColIds;
-    assert(requiredFields.length == requestedColIds.length);
+    this.requestedDataColIds = requestedDataColIds;
 
     StructType resultSchema = new StructType(requiredFields);
-    for (StructField f : partitionSchema.fields()) {
-      resultSchema = resultSchema.add(f);
-    }
-
     if (copyToSpark) {
       if (MEMORY_MODE == MemoryMode.OFF_HEAP) {
         columnVectors = OffHeapColumnVector.allocateColumns(capacity, resultSchema);
@@ -169,22 +184,18 @@ public void initBatch(
         columnVectors = OnHeapColumnVector.allocateColumns(capacity, resultSchema);
       }
 
-      // Initialize the missing columns once.
+      // Initialize the partition columns and missing columns once.
       for (int i = 0; i < requiredFields.length; i++) {
-        if (requestedColIds[i] == -1) {
+        if (requestedPartitionColIds[i] != -1) {
+          ColumnVectorUtils.populate(columnVectors[i],
+            partitionValues, requestedPartitionColIds[i]);
+          columnVectors[i].setIsConstant();
+        } else if (requestedDataColIds[i] == -1) {
           columnVectors[i].putNulls(0, capacity);
           columnVectors[i].setIsConstant();
         }
       }
 
-      if (partitionValues.numFields() > 0) {
-        int partitionIdx = requiredFields.length;
-        for (int i = 0; i < partitionValues.numFields(); i++) {
-          ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i);
-          columnVectors[i + partitionIdx].setIsConstant();
-        }
-      }
-
       columnarBatch = new ColumnarBatch(columnVectors);
     } else {
       // Just wrap the ORC column vector instead of copying it to Spark column vector.
@@ -192,26 +203,22 @@ public void initBatch(
 
       for (int i = 0; i < requiredFields.length; i++) {
         DataType dt = requiredFields[i].dataType();
-        int colId = requestedColIds[i];
-        // Initialize the missing columns once.
-        if (colId == -1) {
-          OnHeapColumnVector missingCol = new OnHeapColumnVector(capacity, dt);
-          missingCol.putNulls(0, capacity);
-          missingCol.setIsConstant();
-          orcVectorWrappers[i] = missingCol;
-        } else {
-          orcVectorWrappers[i] = new OrcColumnVector(dt, batch.cols[colId]);
-        }
-      }
-
-      if (partitionValues.numFields() > 0) {
-        int partitionIdx = requiredFields.length;
-        for (int i = 0; i < partitionValues.numFields(); i++) {
-          DataType dt = partitionSchema.fields()[i].dataType();
+        if (requestedPartitionColIds[i] != -1) {
           OnHeapColumnVector partitionCol = new OnHeapColumnVector(capacity, dt);
-          ColumnVectorUtils.populate(partitionCol, partitionValues, i);
+          ColumnVectorUtils.populate(partitionCol, partitionValues, requestedPartitionColIds[i]);
           partitionCol.setIsConstant();
-          orcVectorWrappers[partitionIdx + i] = partitionCol;
+          orcVectorWrappers[i] = partitionCol;
+        } else {
+          int colId = requestedDataColIds[i];
+          // Initialize the missing columns once.
+          if (colId == -1) {
+            OnHeapColumnVector missingCol = new OnHeapColumnVector(capacity, dt);
+            missingCol.putNulls(0, capacity);
+            missingCol.setIsConstant();
+            orcVectorWrappers[i] = missingCol;
+          } else {
+            orcVectorWrappers[i] = new OrcColumnVector(dt, batch.cols[colId]);
+          }
         }
       }
 
@@ -233,7 +240,7 @@ private boolean nextBatch() throws IOException {
 
     if (!copyToSpark) {
       for (int i = 0; i < requiredFields.length; i++) {
-        if (requestedColIds[i] != -1) {
+        if (requestedDataColIds[i] != -1) {
           ((OrcColumnVector) orcVectorWrappers[i]).setBatchSize(batchSize);
         }
       }
@@ -248,8 +255,8 @@ private boolean nextBatch() throws IOException {
       StructField field = requiredFields[i];
       WritableColumnVector toColumn = columnVectors[i];
 
-      if (requestedColIds[i] >= 0) {
-        ColumnVector fromColumn = batch.cols[requestedColIds[i]];
+      if (requestedDataColIds[i] >= 0) {
+        ColumnVector fromColumn = batch.cols[requestedDataColIds[i]];
 
         if (fromColumn.isRepeating) {
           putRepeatingValues(batchSize, field, fromColumn, toColumn);
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 4574f8247af5..cd10ad21cd82 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -206,13 +206,15 @@ class OrcFileFormat
           // after opening a file.
           val iter = new RecordReaderIterator(batchReader)
           Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit](_ => iter.close()))
-
+          val requestedDataColIds = requestedColIds ++ Array.fill(partitionSchema.length)(-1)
+          val requestedPartitionColIds =
+            Array.fill(requiredSchema.length)(-1) ++ Range(0, partitionSchema.length)
           batchReader.initialize(fileSplit, taskAttemptContext)
           batchReader.initBatch(
             reader.getSchema,
-            requestedColIds,
-            requiredSchema.fields,
-            partitionSchema,
+            resultSchema.fields,
+            requestedDataColIds,
+            requestedPartitionColIds,
             file.partitionValues)
 
           iter.asInstanceOf[Iterator[InternalRow]]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
new file mode 100644
index 000000000000..52abeb20e7f2
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.orc.TypeDescription
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector}
+import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
+import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.unsafe.types.UTF8String.fromString
+
+class OrcColumnarBatchReaderSuite extends QueryTest with SQLTestUtils with SharedSQLContext {
+  private val dataSchema = StructType.fromDDL("col1 int, col2 int")
+  private val partitionSchema = StructType.fromDDL("p1 string, p2 string")
+  private val partitionValues = InternalRow(fromString("partValue1"), fromString("partValue2"))
+  private val orcFileSchemaList = Seq(
+    "struct<col1:int,col2:int>", "struct<col1:int,col2:int,p1:string,p2:string>",
+    "struct<col1:int,col2:int,p1:string>", "struct<col1:int,col2:int,p2:string>")
+  orcFileSchemaList.foreach { case schema =>
+    val orcFileSchema = TypeDescription.fromString(schema)
+
+    val isConstant = classOf[WritableColumnVector].getDeclaredField("isConstant")
+    isConstant.setAccessible(true)
+
+    def getReader(
+        requestedDataColIds: Array[Int],
+        requestedPartitionColIds: Array[Int],
+        resultFields: Array[StructField]): OrcColumnarBatchReader = {
+      val reader = new OrcColumnarBatchReader(false, false, 4096)
+      reader.initBatch(
+        orcFileSchema,
+        resultFields,
+        requestedDataColIds,
+        requestedPartitionColIds,
+        partitionValues)
+      reader
+    }
+
+    test(s"all partitions are requested: $schema") {
+      val requestedDataColIds = Array(0, 1, 0, 0)
+      val requestedPartitionColIds = Array(-1, -1, 0, 1)
+      val reader = getReader(requestedDataColIds, requestedPartitionColIds,
+        dataSchema.fields ++ partitionSchema.fields)
+      assert(reader.requestedDataColIds === Array(0, 1, -1, -1))
+    }
+
+    test(s"initBatch should initialize requested partition columns only: $schema") {
+      val requestedDataColIds = Array(0, -1) // only `col1` is requested, `col2` doesn't exist
+      val requestedPartitionColIds = Array(-1, 0) // only `p1` is requested
+      val reader = getReader(requestedDataColIds, requestedPartitionColIds,
+        Array(dataSchema.fields(0), partitionSchema.fields(0)))
+      val batch = reader.columnarBatch
+      assert(batch.numCols() === 2)
+
+      assert(batch.column(0).isInstanceOf[OrcColumnVector])
+      assert(batch.column(1).isInstanceOf[OnHeapColumnVector])
+
+      val p1 = batch.column(1).asInstanceOf[OnHeapColumnVector]
+      assert(isConstant.get(p1).asInstanceOf[Boolean]) // Partition column is constant.
+      assert(p1.getUTF8String(0) === partitionValues.getUTF8String(0))
+    }
+  }
+}

From 05372d188aeaeff5e8de8866ec6e7b932bafa70f Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Thu, 3 Jan 2019 14:30:27 -0800
Subject: [PATCH 0223/1072] [SPARK-26489][CORE] Use ConfigEntry for hardcoded
 configs for python/r categories

## What changes were proposed in this pull request?

The PR makes hardcoded configs below to use ConfigEntry.

* spark.pyspark
* spark.python
* spark.r

This patch doesn't change configs which are not relevant to SparkConf (e.g. system properties, python source code)

## How was this patch tested?

Existing tests.

Closes #23428 from HeartSaVioR/SPARK-26489.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/api/python/PythonRunner.scala       |  6 +--
 .../api/python/PythonWorkerFactory.scala      | 15 +++---
 .../org/apache/spark/api/r/RBackend.scala     | 10 ++--
 .../apache/spark/api/r/RBackendHandler.scala  |  7 ++-
 .../org/apache/spark/api/r/RRunner.scala      |  8 ++--
 .../org/apache/spark/deploy/RRunner.scala     |  9 ++--
 .../apache/spark/internal/config/Python.scala | 47 +++++++++++++++++++
 .../config/R.scala}                           | 26 ++++++----
 .../spark/internal/config/package.scala       |  4 --
 .../features/BasicExecutorFeatureStep.scala   |  1 +
 .../BasicExecutorFeatureStepSuite.scala       |  1 +
 .../org/apache/spark/deploy/yarn/Client.scala |  1 +
 .../spark/deploy/yarn/YarnAllocator.scala     |  1 +
 13 files changed, 96 insertions(+), 40 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/Python.scala
 rename core/src/main/scala/org/apache/spark/{api/r/SparkRDefaults.scala => internal/config/R.scala} (56%)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index f73e95eac8f7..6b748c825d29 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -27,7 +27,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.PYSPARK_EXECUTOR_MEMORY
+import org.apache.spark.internal.config.Python._
 import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util._
 
@@ -71,7 +71,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
 
   private val conf = SparkEnv.get.conf
   private val bufferSize = conf.getInt("spark.buffer.size", 65536)
-  private val reuseWorker = conf.getBoolean("spark.python.worker.reuse", true)
+  private val reuseWorker = conf.get(PYTHON_WORKER_REUSE)
   // each python worker gets an equal part of the allocation. the worker pool will grow to the
   // number of concurrent tasks, which is determined by the number of cores in this executor.
   private val memoryMb = conf.get(PYSPARK_EXECUTOR_MEMORY)
@@ -496,7 +496,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     extends Thread(s"Worker Monitor for $pythonExec") {
 
     /** How long to wait before killing the python worker if a task cannot be interrupted. */
-    private val taskKillTimeout = env.conf.getTimeAsMs("spark.python.task.killTimeout", "2s")
+    private val taskKillTimeout = env.conf.get(PYTHON_TASK_KILL_TIMEOUT)
 
     setDaemon(true)
 
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 1f2f503a28d4..09e219fef5a1 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -28,6 +28,7 @@ import scala.collection.mutable
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Python._
 import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util.{RedirectThread, Utils}
 
@@ -41,7 +42,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   // currently only works on UNIX-based systems now because it uses signals for child management,
   // so we can also fall back to launching workers, pyspark/worker.py (by default) directly.
   private val useDaemon = {
-    val useDaemonEnabled = SparkEnv.get.conf.getBoolean("spark.python.use.daemon", true)
+    val useDaemonEnabled = SparkEnv.get.conf.get(PYTHON_USE_DAEMON)
 
     // This flag is ignored on Windows as it's unable to fork.
     !System.getProperty("os.name").startsWith("Windows") && useDaemonEnabled
@@ -53,21 +54,21 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
   // This configuration indicates the module to run the daemon to execute its Python workers.
   private val daemonModule =
-    SparkEnv.get.conf.getOption("spark.python.daemon.module").map { value =>
+    SparkEnv.get.conf.get(PYTHON_DAEMON_MODULE).map { value =>
       logInfo(
-        s"Python daemon module in PySpark is set to [$value] in 'spark.python.daemon.module', " +
+        s"Python daemon module in PySpark is set to [$value] in '${PYTHON_DAEMON_MODULE.key}', " +
         "using this to start the daemon up. Note that this configuration only has an effect when " +
-        "'spark.python.use.daemon' is enabled and the platform is not Windows.")
+        s"'${PYTHON_USE_DAEMON.key}' is enabled and the platform is not Windows.")
       value
     }.getOrElse("pyspark.daemon")
 
   // This configuration indicates the module to run each Python worker.
   private val workerModule =
-    SparkEnv.get.conf.getOption("spark.python.worker.module").map { value =>
+    SparkEnv.get.conf.get(PYTHON_WORKER_MODULE).map { value =>
       logInfo(
-        s"Python worker module in PySpark is set to [$value] in 'spark.python.worker.module', " +
+        s"Python worker module in PySpark is set to [$value] in '${PYTHON_WORKER_MODULE.key}', " +
         "using this to start the worker up. Note that this configuration only has an effect when " +
-        "'spark.python.use.daemon' is disabled or the platform is Windows.")
+        s"'${PYTHON_USE_DAEMON.key}' is disabled or the platform is Windows.")
       value
     }.getOrElse("pyspark.worker")
 
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
index 50c8fdf5316d..36b4132088b5 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
@@ -32,6 +32,7 @@ import io.netty.handler.timeout.ReadTimeoutHandler
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.R._
 
 /**
  * Netty-based backend server that is used to communicate between R and Java.
@@ -47,10 +48,8 @@ private[spark] class RBackend {
 
   def init(): (Int, RAuthHelper) = {
     val conf = new SparkConf()
-    val backendConnectionTimeout = conf.getInt(
-      "spark.r.backendConnectionTimeout", SparkRDefaults.DEFAULT_CONNECTION_TIMEOUT)
-    bossGroup = new NioEventLoopGroup(
-      conf.getInt("spark.r.numRBackendThreads", SparkRDefaults.DEFAULT_NUM_RBACKEND_THREADS))
+    val backendConnectionTimeout = conf.get(R_BACKEND_CONNECTION_TIMEOUT)
+    bossGroup = new NioEventLoopGroup(conf.get(R_NUM_BACKEND_THREADS))
     val workerGroup = bossGroup
     val handler = new RBackendHandler(this)
     val authHelper = new RAuthHelper(conf)
@@ -126,8 +125,7 @@ private[spark] object RBackend extends Logging {
       // Connection timeout is set by socket client. To make it configurable we will pass the
       // timeout value to client inside the temp file
       val conf = new SparkConf()
-      val backendConnectionTimeout = conf.getInt(
-        "spark.r.backendConnectionTimeout", SparkRDefaults.DEFAULT_CONNECTION_TIMEOUT)
+      val backendConnectionTimeout = conf.get(R_BACKEND_CONNECTION_TIMEOUT)
 
       // tell the R process via temporary file
       val path = args(0)
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
index 18fc595301f4..7b74efa41044 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
@@ -29,6 +29,7 @@ import io.netty.handler.timeout.ReadTimeoutException
 import org.apache.spark.SparkConf
 import org.apache.spark.api.r.SerDe._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.R._
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
@@ -98,10 +99,8 @@ private[r] class RBackendHandler(server: RBackend)
         }
       }
       val conf = new SparkConf()
-      val heartBeatInterval = conf.getInt(
-        "spark.r.heartBeatInterval", SparkRDefaults.DEFAULT_HEARTBEAT_INTERVAL)
-      val backendConnectionTimeout = conf.getInt(
-        "spark.r.backendConnectionTimeout", SparkRDefaults.DEFAULT_CONNECTION_TIMEOUT)
+      val heartBeatInterval = conf.get(R_HEARTBEAT_INTERVAL)
+      val backendConnectionTimeout = conf.get(R_BACKEND_CONNECTION_TIMEOUT)
       val interval = Math.min(heartBeatInterval, backendConnectionTimeout - 1)
 
       execService.scheduleAtFixedRate(pingRunner, interval, interval, TimeUnit.SECONDS)
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index e7fdc3963945..3fdea04cdf7a 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -27,6 +27,7 @@ import scala.util.Try
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.R._
 import org.apache.spark.util.Utils
 
 /**
@@ -340,11 +341,10 @@ private[r] object RRunner {
     // "spark.sparkr.r.command" is deprecated and replaced by "spark.r.command",
     // but kept here for backward compatibility.
     val sparkConf = SparkEnv.get.conf
-    var rCommand = sparkConf.get("spark.sparkr.r.command", "Rscript")
-    rCommand = sparkConf.get("spark.r.command", rCommand)
+    var rCommand = sparkConf.get(SPARKR_COMMAND)
+    rCommand = sparkConf.get(R_COMMAND).orElse(Some(rCommand)).get
 
-    val rConnectionTimeout = sparkConf.getInt(
-      "spark.r.backendConnectionTimeout", SparkRDefaults.DEFAULT_CONNECTION_TIMEOUT)
+    val rConnectionTimeout = sparkConf.get(R_BACKEND_CONNECTION_TIMEOUT)
     val rOptions = "--vanilla"
     val rLibDir = RUtils.sparkRPackagePath(isDriver = false)
     val rExecScript = rLibDir(0) + "/SparkR/worker/" + script
diff --git a/core/src/main/scala/org/apache/spark/deploy/RRunner.scala b/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
index e86b362639e5..6284e6a6448f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
@@ -25,7 +25,8 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{SparkException, SparkUserAppException}
-import org.apache.spark.api.r.{RBackend, RUtils, SparkRDefaults}
+import org.apache.spark.api.r.{RBackend, RUtils}
+import org.apache.spark.internal.config.R._
 import org.apache.spark.util.RedirectThread
 
 /**
@@ -43,8 +44,8 @@ object RRunner {
     val rCommand = {
       // "spark.sparkr.r.command" is deprecated and replaced by "spark.r.command",
       // but kept here for backward compatibility.
-      var cmd = sys.props.getOrElse("spark.sparkr.r.command", "Rscript")
-      cmd = sys.props.getOrElse("spark.r.command", cmd)
+      var cmd = sys.props.getOrElse(SPARKR_COMMAND.key, SPARKR_COMMAND.defaultValue.get)
+      cmd = sys.props.getOrElse(R_COMMAND.key, cmd)
       if (sys.props.getOrElse("spark.submit.deployMode", "client") == "client") {
         cmd = sys.props.getOrElse("spark.r.driver.command", cmd)
       }
@@ -53,7 +54,7 @@ object RRunner {
 
     //  Connection timeout set by R process on its connection to RBackend in seconds.
     val backendConnectionTimeout = sys.props.getOrElse(
-      "spark.r.backendConnectionTimeout", SparkRDefaults.DEFAULT_CONNECTION_TIMEOUT.toString)
+      R_BACKEND_CONNECTION_TIMEOUT.key, R_BACKEND_CONNECTION_TIMEOUT.defaultValue.get.toString)
 
     // Check if the file path exists.
     // If not, change directory to current working directory for YARN cluster mode
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Python.scala b/core/src/main/scala/org/apache/spark/internal/config/Python.scala
new file mode 100644
index 000000000000..26a0598f4941
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Python.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.internal.config
+
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.network.util.ByteUnit
+
+private[spark] object Python {
+  val PYTHON_WORKER_REUSE = ConfigBuilder("spark.python.worker.reuse")
+    .booleanConf
+    .createWithDefault(true)
+
+  val PYTHON_TASK_KILL_TIMEOUT = ConfigBuilder("spark.python.task.killTimeout")
+    .timeConf(TimeUnit.MILLISECONDS)
+    .createWithDefaultString("2s")
+
+  val PYTHON_USE_DAEMON = ConfigBuilder("spark.python.use.daemon")
+    .booleanConf
+    .createWithDefault(true)
+
+  val PYTHON_DAEMON_MODULE = ConfigBuilder("spark.python.daemon.module")
+    .stringConf
+    .createOptional
+
+  val PYTHON_WORKER_MODULE = ConfigBuilder("spark.python.worker.module")
+    .stringConf
+    .createOptional
+
+  val PYSPARK_EXECUTOR_MEMORY = ConfigBuilder("spark.executor.pyspark.memory")
+    .bytesConf(ByteUnit.MiB)
+    .createOptional
+}
diff --git a/core/src/main/scala/org/apache/spark/api/r/SparkRDefaults.scala b/core/src/main/scala/org/apache/spark/internal/config/R.scala
similarity index 56%
rename from core/src/main/scala/org/apache/spark/api/r/SparkRDefaults.scala
rename to core/src/main/scala/org/apache/spark/internal/config/R.scala
index af67cbbce4e5..26e06a5231c4 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SparkRDefaults.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/R.scala
@@ -14,17 +14,27 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.spark.internal.config
 
-package org.apache.spark.api.r
+private[spark] object R {
 
-private[spark] object SparkRDefaults {
+  val R_BACKEND_CONNECTION_TIMEOUT = ConfigBuilder("spark.r.backendConnectionTimeout")
+    .intConf
+    .createWithDefault(6000)
 
-  // Default value for spark.r.backendConnectionTimeout config
-  val DEFAULT_CONNECTION_TIMEOUT: Int = 6000
+  val R_NUM_BACKEND_THREADS = ConfigBuilder("spark.r.numRBackendThreads")
+    .intConf
+    .createWithDefault(2)
 
-  // Default value for spark.r.heartBeatInterval config
-  val DEFAULT_HEARTBEAT_INTERVAL: Int = 100
+  val R_HEARTBEAT_INTERVAL = ConfigBuilder("spark.r.heartBeatInterval")
+    .intConf
+    .createWithDefault(100)
 
-  // Default value for spark.r.numRBackendThreads config
-  val DEFAULT_NUM_RBACKEND_THREADS = 2
+  val SPARKR_COMMAND = ConfigBuilder("spark.sparkr.r.command")
+    .stringConf
+    .createWithDefault("Rscript")
+
+  val R_COMMAND = ConfigBuilder("spark.r.command")
+    .stringConf
+    .createOptional
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index d8e9c099028f..da8060459477 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -166,10 +166,6 @@ package object config {
     .checkValue(_ >= 0, "The off-heap memory size must not be negative")
     .createWithDefault(0)
 
-  private[spark] val PYSPARK_EXECUTOR_MEMORY = ConfigBuilder("spark.executor.pyspark.memory")
-    .bytesConf(ByteUnit.MiB)
-    .createOptional
-
   private[spark] val IS_PYTHON_APP = ConfigBuilder("spark.yarn.isPython").internal()
     .booleanConf.createWithDefault(false)
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index c8bf7cdb4224..dd73a5e52281 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -25,6 +25,7 @@ import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Python._
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index c2efab01e424..e28c650a571e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf,
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Python._
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 184fb6a8ad13..44a60b835f12 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -53,6 +53,7 @@ import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Python._
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils}
 import org.apache.spark.util.{CallerContext, Utils}
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index a3feca5dfd22..8c6eff991513 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -36,6 +36,7 @@ import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Python._
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef}
 import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor

From f65dc9593ee4b84343fea04fdcace14096788be8 Mon Sep 17 00:00:00 2001
From: "Liu,Linhong" <liulinhong@baidu.com>
Date: Fri, 4 Jan 2019 10:51:33 +0800
Subject: [PATCH 0224/1072] [SPARK-26526][SQL][TEST] Fix invalid test case
 about non-deterministic expression

## What changes were proposed in this pull request?

Test case in SPARK-10316 is used to make sure non-deterministic `Filter` won't be pushed through `Project`
But in current code base this test case can't cover this purpose.
Change LogicalRDD to HadoopFsRelation can fix this issue.

## How was this patch tested?

Modified test pass.

Closes #23440 from LinhongLiu/fix-test.

Authored-by: Liu,Linhong <liulinhong@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/DataFrameSuite.scala   | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index b51c51e66350..3082e0bb97df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1398,11 +1398,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
   }
 
   test("SPARK-10316: respect non-deterministic expressions in PhysicalOperation") {
-    val input = spark.read.json((1 to 10).map(i => s"""{"id": $i}""").toDS())
+    withTempDir { dir =>
+      (1 to 10).toDF("id").write.mode(SaveMode.Overwrite).json(dir.getCanonicalPath)
+      val input = spark.read.json(dir.getCanonicalPath)
 
-    val df = input.select($"id", rand(0).as('r))
-    df.as("a").join(df.filter($"r" < 0.5).as("b"), $"a.id" === $"b.id").collect().foreach { row =>
-      assert(row.getDouble(1) - row.getDouble(3) === 0.0 +- 0.001)
+      val df = input.select($"id", rand(0).as('r))
+      df.as("a").join(df.filter($"r" < 0.5).as("b"), $"a.id" === $"b.id").collect().foreach { row =>
+        assert(row.getDouble(1) - row.getDouble(3) === 0.0 +- 0.001)
+      }
     }
   }
 

From 27e42c1de502da80fa3e22bb69de47fb00158174 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 3 Jan 2019 20:01:19 -0800
Subject: [PATCH 0225/1072] [MINOR][NETWORK][TEST] Fix
 TransportFrameDecoderSuite to use ByteBuf instead of ByteBuffer

## What changes were proposed in this pull request?

`fireChannelRead` expects `io.netty.buffer.ByteBuf`.I checked that this is the only place which misuse `java.nio.ByteBuffer` in `network` module.

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #23442 from dongjoon-hyun/SPARK-NETWORK-COMMON.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/network/util/TransportFrameDecoderSuite.java  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
index b53e41303751..7d40387c5f1a 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.network.util;
 
-import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
@@ -69,7 +68,7 @@ public void testInterception() throws Exception {
       decoder.channelRead(ctx, len);
       decoder.channelRead(ctx, dataBuf);
       verify(interceptor, times(interceptedReads)).handle(any(ByteBuf.class));
-      verify(ctx).fireChannelRead(any(ByteBuffer.class));
+      verify(ctx).fireChannelRead(any(ByteBuf.class));
       assertEquals(0, len.refCnt());
       assertEquals(0, dataBuf.refCnt());
     } finally {

From 4419e1daca6c5de373d5f3f13c417b791d768c96 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Fri, 4 Jan 2019 22:12:35 +0800
Subject: [PATCH 0226/1072] [SPARK-26445][CORE] Use ConfigEntry for hardcoded
 configs for driver/executor categories.

## What changes were proposed in this pull request?

The PR makes hardcoded spark.driver, spark.executor, and spark.cores.max configs to use `ConfigEntry`.

Note that some config keys are from `SparkLauncher` instead of defining in the config package object because the string is already defined in it and it does not depend on core module.

## How was this patch tested?

Existing tests.

Closes #23415 from ueshin/issues/SPARK-26445/hardcoded_driver_executor_configs.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/ExecutorAllocationManager.scala     |  4 +-
 .../scala/org/apache/spark/SparkConf.scala    | 25 ++++-----
 .../scala/org/apache/spark/SparkContext.scala |  8 +--
 .../scala/org/apache/spark/SparkEnv.scala     |  8 +--
 .../spark/api/python/PythonRunner.scala       |  4 +-
 .../org/apache/spark/deploy/Client.scala      |  8 +--
 .../spark/deploy/FaultToleranceTest.scala     |  6 +-
 .../org/apache/spark/deploy/SparkSubmit.scala | 24 ++++----
 .../spark/deploy/SparkSubmitArguments.scala   | 20 +++----
 .../deploy/rest/StandaloneRestServer.scala    | 13 +++--
 .../rest/SubmitRestProtocolRequest.scala      | 11 ++--
 .../spark/deploy/worker/DriverWrapper.scala   |  6 +-
 .../spark/internal/config/package.scala       | 55 ++++++++++++++++++-
 .../spark/memory/StaticMemoryManager.scala    |  9 +--
 .../spark/memory/UnifiedMemoryManager.scala   |  9 +--
 .../apache/spark/metrics/MetricsSystem.scala  |  2 +-
 .../spark/scheduler/TaskSetManager.scala      |  2 +-
 .../cluster/StandaloneSchedulerBackend.scala  | 16 +++---
 .../local/LocalSchedulerBackend.scala         |  4 +-
 .../org/apache/spark/util/RpcUtils.scala      |  5 +-
 .../scala/org/apache/spark/util/Utils.scala   |  2 +-
 .../spark/util/logging/FileAppender.scala     | 10 ++--
 .../util/logging/RollingFileAppender.scala    | 19 ++-----
 .../ExecutorAllocationManagerSuite.scala      |  2 +-
 .../org/apache/spark/SparkConfSuite.scala     |  2 +-
 .../StandaloneDynamicAllocationSuite.scala    |  6 +-
 .../memory/UnifiedMemoryManagerSuite.scala    |  2 +-
 .../spark/scheduler/TaskSetManagerSuite.scala |  2 +-
 .../BlockManagerReplicationSuite.scala        |  4 +-
 .../spark/storage/BlockManagerSuite.scala     |  2 +-
 .../apache/spark/util/FileAppenderSuite.scala | 13 ++---
 .../k8s/features/BasicDriverFeatureStep.scala |  4 +-
 .../features/BasicExecutorFeatureStep.scala   |  6 +-
 .../features/DriverServiceFeatureStep.scala   | 15 +++--
 .../BasicDriverFeatureStepSuite.scala         |  2 +-
 .../BasicExecutorFeatureStepSuite.scala       | 16 +++---
 .../DriverServiceFeatureStepSuite.scala       |  6 +-
 .../apache/spark/deploy/mesos/config.scala    |  3 +
 .../deploy/rest/mesos/MesosRestServer.scala   | 13 +++--
 .../cluster/mesos/MesosClusterScheduler.scala | 10 ++--
 .../MesosCoarseGrainedSchedulerBackend.scala  | 20 +++----
 .../MesosFineGrainedSchedulerBackend.scala    | 12 ++--
 .../spark/deploy/yarn/ApplicationMaster.scala |  6 +-
 .../org/apache/spark/deploy/yarn/config.scala | 10 +---
 .../cluster/YarnClientSchedulerBackend.scala  |  8 +--
 .../yarn/ResourceRequestHelperSuite.scala     |  2 +-
 46 files changed, 236 insertions(+), 200 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 3f0b71bbe17f..d966582295b3 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -127,7 +127,7 @@ private[spark] class ExecutorAllocationManager(
   // allocation is only supported for YARN and the default number of cores per executor in YARN is
   // 1, but it might need to be attained differently for different cluster managers
   private val tasksPerExecutorForFullParallelism =
-    conf.getInt("spark.executor.cores", 1) / conf.getInt("spark.task.cpus", 1)
+    conf.get(EXECUTOR_CORES) / conf.getInt("spark.task.cpus", 1)
 
   private val executorAllocationRatio =
     conf.get(DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO)
@@ -223,7 +223,7 @@ private[spark] class ExecutorAllocationManager(
         "shuffle service. You may enable this through spark.shuffle.service.enabled.")
     }
     if (tasksPerExecutorForFullParallelism == 0) {
-      throw new SparkException("spark.executor.cores must not be < spark.task.cpus.")
+      throw new SparkException(s"${EXECUTOR_CORES.key} must not be < spark.task.cpus.")
     }
 
     if (executorAllocationRatio > 1.0 || executorAllocationRatio <= 0.0) {
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 0b47da12b5b4..681e4378a4dd 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -503,12 +503,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       logWarning(msg)
     }
 
-    val executorOptsKey = "spark.executor.extraJavaOptions"
-    val executorClasspathKey = "spark.executor.extraClassPath"
-    val driverOptsKey = "spark.driver.extraJavaOptions"
-    val driverClassPathKey = "spark.driver.extraClassPath"
-    val driverLibraryPathKey = "spark.driver.extraLibraryPath"
-    val sparkExecutorInstances = "spark.executor.instances"
+    val executorOptsKey = EXECUTOR_JAVA_OPTIONS.key
 
     // Used by Yarn in 1.1 and before
     sys.props.get("spark.driver.libraryPath").foreach { value =>
@@ -517,7 +512,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
           |spark.driver.libraryPath was detected (set to '$value').
           |This is deprecated in Spark 1.2+.
           |
-          |Please instead use: $driverLibraryPathKey
+          |Please instead use: ${DRIVER_LIBRARY_PATH.key}
         """.stripMargin
       logWarning(warning)
     }
@@ -594,9 +589,9 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
-    if (contains("spark.cores.max") && contains("spark.executor.cores")) {
-      val totalCores = getInt("spark.cores.max", 1)
-      val executorCores = getInt("spark.executor.cores", 1)
+    if (contains(CORES_MAX) && contains(EXECUTOR_CORES)) {
+      val totalCores = getInt(CORES_MAX.key, 1)
+      val executorCores = get(EXECUTOR_CORES)
       val leftCores = totalCores % executorCores
       if (leftCores != 0) {
         logWarning(s"Total executor cores: ${totalCores} is not " +
@@ -605,12 +600,12 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
-    if (contains("spark.executor.cores") && contains("spark.task.cpus")) {
-      val executorCores = getInt("spark.executor.cores", 1)
+    if (contains(EXECUTOR_CORES) && contains("spark.task.cpus")) {
+      val executorCores = get(EXECUTOR_CORES)
       val taskCpus = getInt("spark.task.cpus", 1)
 
       if (executorCores < taskCpus) {
-        throw new SparkException("spark.executor.cores must not be less than spark.task.cpus.")
+        throw new SparkException(s"${EXECUTOR_CORES.key} must not be less than spark.task.cpus.")
       }
     }
 
@@ -680,7 +675,7 @@ private[spark] object SparkConf extends Logging {
    * TODO: consolidate it with `ConfigBuilder.withAlternative`.
    */
   private val configsWithAlternatives = Map[String, Seq[AlternateConfig]](
-    "spark.executor.userClassPathFirst" -> Seq(
+    EXECUTOR_USER_CLASS_PATH_FIRST.key -> Seq(
       AlternateConfig("spark.files.userClassPathFirst", "1.3")),
     UPDATE_INTERVAL_S.key -> Seq(
       AlternateConfig("spark.history.fs.update.interval.seconds", "1.4"),
@@ -703,7 +698,7 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.kryoserializer.buffer.max.mb", "1.4")),
     "spark.shuffle.file.buffer" -> Seq(
       AlternateConfig("spark.shuffle.file.buffer.kb", "1.4")),
-    "spark.executor.logs.rolling.maxSize" -> Seq(
+    EXECUTOR_LOGS_ROLLING_MAX_SIZE.key -> Seq(
       AlternateConfig("spark.executor.logs.rolling.size.maxBytes", "1.4")),
     "spark.io.compression.snappy.blockSize" -> Seq(
       AlternateConfig("spark.io.compression.snappy.block.size", "1.4")),
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 3475859c3ed6..89be9de08307 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -386,9 +386,9 @@ class SparkContext(config: SparkConf) extends Logging {
     // Set Spark driver host and port system properties. This explicitly sets the configuration
     // instead of relying on the default value of the config constant.
     _conf.set(DRIVER_HOST_ADDRESS, _conf.get(DRIVER_HOST_ADDRESS))
-    _conf.setIfMissing("spark.driver.port", "0")
+    _conf.setIfMissing(DRIVER_PORT, 0)
 
-    _conf.set("spark.executor.id", SparkContext.DRIVER_IDENTIFIER)
+    _conf.set(EXECUTOR_ID, SparkContext.DRIVER_IDENTIFIER)
 
     _jars = Utils.getUserJars(_conf)
     _files = _conf.getOption("spark.files").map(_.split(",")).map(_.filter(_.nonEmpty))
@@ -461,7 +461,7 @@ class SparkContext(config: SparkConf) extends Logging {
       files.foreach(addFile)
     }
 
-    _executorMemory = _conf.getOption("spark.executor.memory")
+    _executorMemory = _conf.getOption(EXECUTOR_MEMORY.key)
       .orElse(Option(System.getenv("SPARK_EXECUTOR_MEMORY")))
       .orElse(Option(System.getenv("SPARK_MEM"))
       .map(warnSparkMem))
@@ -2639,7 +2639,7 @@ object SparkContext extends Logging {
       case SparkMasterRegex.LOCAL_N_FAILURES_REGEX(threads, _) => convertToInt(threads)
       case "yarn" =>
         if (conf != null && conf.getOption("spark.submit.deployMode").contains("cluster")) {
-          conf.getInt("spark.driver.cores", 0)
+          conf.getInt(DRIVER_CORES.key, 0)
         } else {
           0
         }
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index de0c8579d9ac..9222781fa083 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -163,10 +163,10 @@ object SparkEnv extends Logging {
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
     assert(conf.contains(DRIVER_HOST_ADDRESS),
       s"${DRIVER_HOST_ADDRESS.key} is not set on the driver!")
-    assert(conf.contains("spark.driver.port"), "spark.driver.port is not set on the driver!")
+    assert(conf.contains(DRIVER_PORT), s"${DRIVER_PORT.key} is not set on the driver!")
     val bindAddress = conf.get(DRIVER_BIND_ADDRESS)
     val advertiseAddress = conf.get(DRIVER_HOST_ADDRESS)
-    val port = conf.get("spark.driver.port").toInt
+    val port = conf.get(DRIVER_PORT)
     val ioEncryptionKey = if (conf.get(IO_ENCRYPTION_ENABLED)) {
       Some(CryptoStreamUtils.createKey(conf))
     } else {
@@ -251,7 +251,7 @@ object SparkEnv extends Logging {
 
     // Figure out which port RpcEnv actually bound to in case the original port is 0 or occupied.
     if (isDriver) {
-      conf.set("spark.driver.port", rpcEnv.address.port.toString)
+      conf.set(DRIVER_PORT, rpcEnv.address.port)
     }
 
     // Create an instance of the class with the given name, possibly initializing it with our conf
@@ -359,7 +359,7 @@ object SparkEnv extends Logging {
       // We need to set the executor ID before the MetricsSystem is created because sources and
       // sinks specified in the metrics configuration file will want to incorporate this executor's
       // ID into the metrics they report.
-      conf.set("spark.executor.id", executorId)
+      conf.set(EXECUTOR_ID, executorId)
       val ms = MetricsSystem.createMetricsSystem("executor", conf, securityManager)
       ms.start()
       ms
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 6b748c825d29..5168e9330965 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -27,6 +27,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.EXECUTOR_CORES
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util._
@@ -74,8 +75,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
   private val reuseWorker = conf.get(PYTHON_WORKER_REUSE)
   // each python worker gets an equal part of the allocation. the worker pool will grow to the
   // number of concurrent tasks, which is determined by the number of cores in this executor.
-  private val memoryMb = conf.get(PYSPARK_EXECUTOR_MEMORY)
-      .map(_ / conf.getInt("spark.executor.cores", 1))
+  private val memoryMb = conf.get(PYSPARK_EXECUTOR_MEMORY).map(_ / conf.get(EXECUTOR_CORES))
 
   // All the Python functions should have the same exec, version and envvars.
   protected val envVars = funcs.head.funcs.head.envVars
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index d5145094ec07..d94b174d8d86 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -27,7 +27,7 @@ import org.apache.log4j.Logger
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.{DriverState, Master}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.util.{SparkExitCode, ThreadUtils, Utils}
 
@@ -68,17 +68,17 @@ private class ClientEndpoint(
         //       people call `addJar` assuming the jar is in the same directory.
         val mainClass = "org.apache.spark.deploy.worker.DriverWrapper"
 
-        val classPathConf = "spark.driver.extraClassPath"
+        val classPathConf = config.DRIVER_CLASS_PATH.key
         val classPathEntries = sys.props.get(classPathConf).toSeq.flatMap { cp =>
           cp.split(java.io.File.pathSeparator)
         }
 
-        val libraryPathConf = "spark.driver.extraLibraryPath"
+        val libraryPathConf = config.DRIVER_LIBRARY_PATH.key
         val libraryPathEntries = sys.props.get(libraryPathConf).toSeq.flatMap { cp =>
           cp.split(java.io.File.pathSeparator)
         }
 
-        val extraJavaOptsConf = "spark.driver.extraJavaOptions"
+        val extraJavaOptsConf = config.DRIVER_JAVA_OPTIONS.key
         val extraJavaOpts = sys.props.get(extraJavaOptsConf)
           .map(Utils.splitCommandString).getOrElse(Seq.empty)
         val sparkJavaOpts = Utils.sparkJavaOpts(conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index c6307da61c7e..0679bdf7c707 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -34,7 +34,7 @@ import org.json4s.jackson.JsonMethods
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.deploy.master.RecoveryState
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
@@ -77,7 +77,7 @@ private object FaultToleranceTest extends App with Logging {
   private val containerSparkHome = "/opt/spark"
   private val dockerMountDir = "%s:%s".format(sparkHome, containerSparkHome)
 
-  System.setProperty("spark.driver.host", "172.17.42.1") // default docker host ip
+  System.setProperty(config.DRIVER_HOST_ADDRESS.key, "172.17.42.1") // default docker host ip
 
   private def afterEach() {
     if (sc != null) {
@@ -216,7 +216,7 @@ private object FaultToleranceTest extends App with Logging {
     if (sc != null) { sc.stop() }
     // Counter-hack: Because of a hack in SparkEnv#create() that changes this
     // property, we need to reset it.
-    System.setProperty("spark.driver.port", "0")
+    System.setProperty(config.DRIVER_PORT.key, "0")
     sc = new SparkContext(getMasterUrls(masters), "fault-tolerance", containerSparkHome)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 763bd0a70a03..a4c65aeaae3f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -514,13 +514,13 @@ private[spark] class SparkSubmit extends Logging {
       OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.app.name"),
       OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, confKey = "spark.jars.ivy"),
       OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT,
-        confKey = "spark.driver.memory"),
+        confKey = DRIVER_MEMORY.key),
       OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
-        confKey = "spark.driver.extraClassPath"),
+        confKey = DRIVER_CLASS_PATH.key),
       OptionAssigner(args.driverExtraJavaOptions, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
-        confKey = "spark.driver.extraJavaOptions"),
+        confKey = DRIVER_JAVA_OPTIONS.key),
       OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
-        confKey = "spark.driver.extraLibraryPath"),
+        confKey = DRIVER_LIBRARY_PATH.key),
       OptionAssigner(args.principal, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         confKey = PRINCIPAL.key),
       OptionAssigner(args.keytab, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
@@ -537,7 +537,7 @@ private[spark] class SparkSubmit extends Logging {
       // Yarn only
       OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.queue"),
       OptionAssigner(args.numExecutors, YARN, ALL_DEPLOY_MODES,
-        confKey = "spark.executor.instances"),
+        confKey = EXECUTOR_INSTANCES.key),
       OptionAssigner(args.pyFiles, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.pyFiles"),
       OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.jars"),
       OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.files"),
@@ -545,22 +545,22 @@ private[spark] class SparkSubmit extends Logging {
 
       // Other options
       OptionAssigner(args.executorCores, STANDALONE | YARN | KUBERNETES, ALL_DEPLOY_MODES,
-        confKey = "spark.executor.cores"),
+        confKey = EXECUTOR_CORES.key),
       OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN | KUBERNETES, ALL_DEPLOY_MODES,
-        confKey = "spark.executor.memory"),
+        confKey = EXECUTOR_MEMORY.key),
       OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
-        confKey = "spark.cores.max"),
+        confKey = CORES_MAX.key),
       OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = "spark.files"),
       OptionAssigner(args.jars, LOCAL, CLIENT, confKey = "spark.jars"),
       OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = "spark.jars"),
       OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN | KUBERNETES, CLUSTER,
-        confKey = "spark.driver.memory"),
+        confKey = DRIVER_MEMORY.key),
       OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN | KUBERNETES, CLUSTER,
-        confKey = "spark.driver.cores"),
+        confKey = DRIVER_CORES.key),
       OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
-        confKey = "spark.driver.supervise"),
+        confKey = DRIVER_SUPERVISE.key),
       OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = "spark.jars.ivy"),
 
       // An internal option used only for spark-shell to add user jars to repl's classloader,
@@ -727,7 +727,7 @@ private[spark] class SparkSubmit extends Logging {
 
     // Ignore invalid spark.driver.host in cluster modes.
     if (deployMode == CLUSTER) {
-      sparkConf.remove("spark.driver.host")
+      sparkConf.remove(DRIVER_HOST_ADDRESS)
     }
 
     // Resolve paths in certain spark properties
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 4cf08a7980f5..34facd5a58c4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -31,7 +31,7 @@ import scala.util.Try
 
 import org.apache.spark.{SparkException, SparkUserAppException}
 import org.apache.spark.deploy.SparkSubmitAction._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.launcher.SparkSubmitArgumentsParser
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.util.Utils
@@ -155,31 +155,31 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       .orElse(env.get("MASTER"))
       .orNull
     driverExtraClassPath = Option(driverExtraClassPath)
-      .orElse(sparkProperties.get("spark.driver.extraClassPath"))
+      .orElse(sparkProperties.get(config.DRIVER_CLASS_PATH.key))
       .orNull
     driverExtraJavaOptions = Option(driverExtraJavaOptions)
-      .orElse(sparkProperties.get("spark.driver.extraJavaOptions"))
+      .orElse(sparkProperties.get(config.DRIVER_JAVA_OPTIONS.key))
       .orNull
     driverExtraLibraryPath = Option(driverExtraLibraryPath)
-      .orElse(sparkProperties.get("spark.driver.extraLibraryPath"))
+      .orElse(sparkProperties.get(config.DRIVER_LIBRARY_PATH.key))
       .orNull
     driverMemory = Option(driverMemory)
-      .orElse(sparkProperties.get("spark.driver.memory"))
+      .orElse(sparkProperties.get(config.DRIVER_MEMORY.key))
       .orElse(env.get("SPARK_DRIVER_MEMORY"))
       .orNull
     driverCores = Option(driverCores)
-      .orElse(sparkProperties.get("spark.driver.cores"))
+      .orElse(sparkProperties.get(config.DRIVER_CORES.key))
       .orNull
     executorMemory = Option(executorMemory)
-      .orElse(sparkProperties.get("spark.executor.memory"))
+      .orElse(sparkProperties.get(config.EXECUTOR_MEMORY.key))
       .orElse(env.get("SPARK_EXECUTOR_MEMORY"))
       .orNull
     executorCores = Option(executorCores)
-      .orElse(sparkProperties.get("spark.executor.cores"))
+      .orElse(sparkProperties.get(config.EXECUTOR_CORES.key))
       .orElse(env.get("SPARK_EXECUTOR_CORES"))
       .orNull
     totalExecutorCores = Option(totalExecutorCores)
-      .orElse(sparkProperties.get("spark.cores.max"))
+      .orElse(sparkProperties.get(config.CORES_MAX.key))
       .orNull
     name = Option(name).orElse(sparkProperties.get("spark.app.name")).orNull
     jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull
@@ -197,7 +197,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       .orElse(env.get("DEPLOY_MODE"))
       .orNull
     numExecutors = Option(numExecutors)
-      .getOrElse(sparkProperties.get("spark.executor.instances").orNull)
+      .getOrElse(sparkProperties.get(config.EXECUTOR_INSTANCES.key).orNull)
     queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull
     keytab = Option(keytab)
       .orElse(sparkProperties.get("spark.kerberos.keytab"))
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
index afa1a5fbba79..c75e684df226 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
@@ -23,6 +23,7 @@ import javax.servlet.http.HttpServletResponse
 import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf}
 import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription}
 import org.apache.spark.deploy.ClientArguments._
+import org.apache.spark.internal.config
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.Utils
 
@@ -132,12 +133,12 @@ private[rest] class StandaloneSubmitRequestServlet(
 
     // Optional fields
     val sparkProperties = request.sparkProperties
-    val driverMemory = sparkProperties.get("spark.driver.memory")
-    val driverCores = sparkProperties.get("spark.driver.cores")
-    val driverExtraJavaOptions = sparkProperties.get("spark.driver.extraJavaOptions")
-    val driverExtraClassPath = sparkProperties.get("spark.driver.extraClassPath")
-    val driverExtraLibraryPath = sparkProperties.get("spark.driver.extraLibraryPath")
-    val superviseDriver = sparkProperties.get("spark.driver.supervise")
+    val driverMemory = sparkProperties.get(config.DRIVER_MEMORY.key)
+    val driverCores = sparkProperties.get(config.DRIVER_CORES.key)
+    val driverExtraJavaOptions = sparkProperties.get(config.DRIVER_JAVA_OPTIONS.key)
+    val driverExtraClassPath = sparkProperties.get(config.DRIVER_CLASS_PATH.key)
+    val driverExtraLibraryPath = sparkProperties.get(config.DRIVER_LIBRARY_PATH.key)
+    val superviseDriver = sparkProperties.get(config.DRIVER_SUPERVISE.key)
     // The semantics of "spark.master" and the masterUrl are different. While the
     // property "spark.master" could contain all registered masters, masterUrl
     // contains only the active master. To make sure a Spark driver can recover
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
index 86ddf954ca12..7f462148c71a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.rest
 
 import scala.util.Try
 
+import org.apache.spark.internal.config
 import org.apache.spark.util.Utils
 
 /**
@@ -49,11 +50,11 @@ private[rest] class CreateSubmissionRequest extends SubmitRestProtocolRequest {
     assertFieldIsSet(appArgs, "appArgs")
     assertFieldIsSet(environmentVariables, "environmentVariables")
     assertPropertyIsSet("spark.app.name")
-    assertPropertyIsBoolean("spark.driver.supervise")
-    assertPropertyIsNumeric("spark.driver.cores")
-    assertPropertyIsNumeric("spark.cores.max")
-    assertPropertyIsMemory("spark.driver.memory")
-    assertPropertyIsMemory("spark.executor.memory")
+    assertPropertyIsBoolean(config.DRIVER_SUPERVISE.key)
+    assertPropertyIsNumeric(config.DRIVER_CORES.key)
+    assertPropertyIsNumeric(config.CORES_MAX.key)
+    assertPropertyIsMemory(config.DRIVER_MEMORY.key)
+    assertPropertyIsMemory(config.EXECUTOR_MEMORY.key)
   }
 
   private def assertPropertyIsSet(key: String): Unit =
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 8d6a2b80ef5f..1e8ad0b6af6a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -23,7 +23,7 @@ import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.{DependencyUtils, SparkHadoopUtil, SparkSubmit}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util._
 
@@ -43,7 +43,7 @@ object DriverWrapper extends Logging {
       case workerUrl :: userJar :: mainClass :: extraArgs =>
         val conf = new SparkConf()
         val host: String = Utils.localHostName()
-        val port: Int = sys.props.getOrElse("spark.driver.port", "0").toInt
+        val port: Int = sys.props.getOrElse(config.DRIVER_PORT.key, "0").toInt
         val rpcEnv = RpcEnv.create("Driver", host, port, conf, new SecurityManager(conf))
         logInfo(s"Driver address: ${rpcEnv.address}")
         rpcEnv.setupEndpoint("workerWatcher", new WorkerWatcher(rpcEnv, workerUrl))
@@ -51,7 +51,7 @@ object DriverWrapper extends Logging {
         val currentLoader = Thread.currentThread.getContextClassLoader
         val userJarUrl = new File(userJar).toURI().toURL()
         val loader =
-          if (sys.props.getOrElse("spark.driver.userClassPathFirst", "false").toBoolean) {
+          if (sys.props.getOrElse(config.DRIVER_USER_CLASS_PATH_FIRST.key, "false").toBoolean) {
             new ChildFirstURLClassLoader(Array(userJarUrl), currentLoader)
           } else {
             new MutableURLClassLoader(Array(userJarUrl), currentLoader)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index da8060459477..8caaa73b0227 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -39,7 +39,12 @@ package object config {
   private[spark] val DRIVER_USER_CLASS_PATH_FIRST =
     ConfigBuilder("spark.driver.userClassPathFirst").booleanConf.createWithDefault(false)
 
-  private[spark] val DRIVER_MEMORY = ConfigBuilder("spark.driver.memory")
+  private[spark] val DRIVER_CORES = ConfigBuilder("spark.driver.cores")
+    .doc("Number of cores to use for the driver process, only in cluster mode.")
+    .intConf
+    .createWithDefault(1)
+
+  private[spark] val DRIVER_MEMORY = ConfigBuilder(SparkLauncher.DRIVER_MEMORY)
     .doc("Amount of memory to use for the driver process, in MiB unless otherwise specified.")
     .bytesConf(ByteUnit.MiB)
     .createWithDefaultString("1g")
@@ -113,6 +118,9 @@ package object config {
   private[spark] val EVENT_LOG_CALLSITE_LONG_FORM =
     ConfigBuilder("spark.eventLog.longForm.enabled").booleanConf.createWithDefault(false)
 
+  private[spark] val EXECUTOR_ID =
+    ConfigBuilder("spark.executor.id").stringConf.createOptional
+
   private[spark] val EXECUTOR_CLASS_PATH =
     ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_CLASSPATH).stringConf.createOptional
 
@@ -139,7 +147,11 @@ package object config {
   private[spark] val EXECUTOR_USER_CLASS_PATH_FIRST =
     ConfigBuilder("spark.executor.userClassPathFirst").booleanConf.createWithDefault(false)
 
-  private[spark] val EXECUTOR_MEMORY = ConfigBuilder("spark.executor.memory")
+  private[spark] val EXECUTOR_CORES = ConfigBuilder(SparkLauncher.EXECUTOR_CORES)
+    .intConf
+    .createWithDefault(1)
+
+  private[spark] val EXECUTOR_MEMORY = ConfigBuilder(SparkLauncher.EXECUTOR_MEMORY)
     .doc("Amount of memory to use per executor process, in MiB unless otherwise specified.")
     .bytesConf(ByteUnit.MiB)
     .createWithDefaultString("1g")
@@ -150,6 +162,15 @@ package object config {
     .bytesConf(ByteUnit.MiB)
     .createOptional
 
+  private[spark] val CORES_MAX = ConfigBuilder("spark.cores.max")
+    .doc("When running on a standalone deploy cluster or a Mesos cluster in coarse-grained " +
+      "sharing mode, the maximum amount of CPU cores to request for the application from across " +
+      "the cluster (not from each machine). If not set, the default will be " +
+      "`spark.deploy.defaultCores` on Spark's standalone cluster manager, or infinite " +
+      "(all available cores) on Mesos.")
+    .intConf
+    .createOptional
+
   private[spark] val MEMORY_OFFHEAP_ENABLED = ConfigBuilder("spark.memory.offHeap.enabled")
     .doc("If true, Spark will attempt to use off-heap memory for certain operations. " +
       "If off-heap memory use is enabled, then spark.memory.offHeap.size must be positive.")
@@ -347,6 +368,17 @@ package object config {
     .stringConf
     .createWithDefault(Utils.localCanonicalHostName())
 
+  private[spark] val DRIVER_PORT = ConfigBuilder("spark.driver.port")
+    .doc("Port of driver endpoints.")
+    .intConf
+    .createWithDefault(0)
+
+  private[spark] val DRIVER_SUPERVISE = ConfigBuilder("spark.driver.supervise")
+    .doc("If true, restarts the driver automatically if it fails with a non-zero exit status. " +
+      "Only has effect in Spark standalone mode or Mesos cluster deploy mode.")
+    .booleanConf
+    .createWithDefault(false)
+
   private[spark] val DRIVER_BIND_ADDRESS = ConfigBuilder("spark.driver.bindAddress")
     .doc("Address where to bind network listen sockets on the driver.")
     .fallbackConf(DRIVER_HOST_ADDRESS)
@@ -729,4 +761,23 @@ package object config {
       .stringConf
       .toSequence
       .createWithDefault(Nil)
+
+  private[spark] val EXECUTOR_LOGS_ROLLING_STRATEGY =
+    ConfigBuilder("spark.executor.logs.rolling.strategy").stringConf.createWithDefault("")
+
+  private[spark] val EXECUTOR_LOGS_ROLLING_TIME_INTERVAL =
+    ConfigBuilder("spark.executor.logs.rolling.time.interval").stringConf.createWithDefault("daily")
+
+  private[spark] val EXECUTOR_LOGS_ROLLING_MAX_SIZE =
+    ConfigBuilder("spark.executor.logs.rolling.maxSize")
+      .stringConf
+      .createWithDefault((1024 * 1024).toString)
+
+  private[spark] val EXECUTOR_LOGS_ROLLING_MAX_RETAINED_FILES =
+    ConfigBuilder("spark.executor.logs.rolling.maxRetainedFiles").intConf.createWithDefault(-1)
+
+  private[spark] val EXECUTOR_LOGS_ROLLING_ENABLE_COMPRESSION =
+    ConfigBuilder("spark.executor.logs.rolling.enableCompression")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
index a6f7db0600e6..828608704274 100644
--- a/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.memory
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config
 import org.apache.spark.storage.BlockId
 
 /**
@@ -127,14 +128,14 @@ private[spark] object StaticMemoryManager {
     if (systemMaxMemory < MIN_MEMORY_BYTES) {
       throw new IllegalArgumentException(s"System memory $systemMaxMemory must " +
         s"be at least $MIN_MEMORY_BYTES. Please increase heap size using the --driver-memory " +
-        s"option or spark.driver.memory in Spark configuration.")
+        s"option or ${config.DRIVER_MEMORY.key} in Spark configuration.")
     }
-    if (conf.contains("spark.executor.memory")) {
-      val executorMemory = conf.getSizeAsBytes("spark.executor.memory")
+    if (conf.contains(config.EXECUTOR_MEMORY)) {
+      val executorMemory = conf.getSizeAsBytes(config.EXECUTOR_MEMORY.key)
       if (executorMemory < MIN_MEMORY_BYTES) {
         throw new IllegalArgumentException(s"Executor memory $executorMemory must be at least " +
           s"$MIN_MEMORY_BYTES. Please increase executor memory using the " +
-          s"--executor-memory option or spark.executor.memory in Spark configuration.")
+          s"--executor-memory option or ${config.EXECUTOR_MEMORY.key} in Spark configuration.")
       }
     }
     val memoryFraction = conf.getDouble("spark.shuffle.memoryFraction", 0.2)
diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
index 78edd2c4d7fa..9260fd3a6fb3 100644
--- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.memory
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config
 import org.apache.spark.storage.BlockId
 
 /**
@@ -216,15 +217,15 @@ object UnifiedMemoryManager {
     if (systemMemory < minSystemMemory) {
       throw new IllegalArgumentException(s"System memory $systemMemory must " +
         s"be at least $minSystemMemory. Please increase heap size using the --driver-memory " +
-        s"option or spark.driver.memory in Spark configuration.")
+        s"option or ${config.DRIVER_MEMORY.key} in Spark configuration.")
     }
     // SPARK-12759 Check executor memory to fail fast if memory is insufficient
-    if (conf.contains("spark.executor.memory")) {
-      val executorMemory = conf.getSizeAsBytes("spark.executor.memory")
+    if (conf.contains(config.EXECUTOR_MEMORY)) {
+      val executorMemory = conf.getSizeAsBytes(config.EXECUTOR_MEMORY.key)
       if (executorMemory < minSystemMemory) {
         throw new IllegalArgumentException(s"Executor memory $executorMemory must be at least " +
           s"$minSystemMemory. Please increase executor memory using the " +
-          s"--executor-memory option or spark.executor.memory in Spark configuration.")
+          s"--executor-memory option or ${config.EXECUTOR_MEMORY.key} in Spark configuration.")
       }
     }
     val usableMemory = systemMemory - reservedMemory
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 301317a79dfc..b1e311ada459 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -130,7 +130,7 @@ private[spark] class MetricsSystem private (
   private[spark] def buildRegistryName(source: Source): String = {
     val metricsNamespace = conf.get(METRICS_NAMESPACE).orElse(conf.getOption("spark.app.id"))
 
-    val executorId = conf.getOption("spark.executor.id")
+    val executorId = conf.get(EXECUTOR_ID)
     val defaultName = MetricRegistry.name(source.sourceName)
 
     if (instance == "driver" || instance == "executor") {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 6bf60dd8e9df..41f032ccf82b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -717,7 +717,7 @@ private[spark] class TaskSetManager(
     calculatedTasks += 1
     if (maxResultSize > 0 && totalResultSize > maxResultSize) {
       val msg = s"Total size of serialized results of ${calculatedTasks} tasks " +
-        s"(${Utils.bytesToString(totalResultSize)}) is bigger than spark.driver.maxResultSize " +
+        s"(${Utils.bytesToString(totalResultSize)}) is bigger than ${config.MAX_RESULT_SIZE.key} " +
         s"(${Utils.bytesToString(maxResultSize)})"
       logError(msg)
       abort(msg)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index f73a58ff5d48..adef20d3077d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -25,7 +25,7 @@ import scala.concurrent.Future
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.deploy.{ApplicationDescription, Command}
 import org.apache.spark.deploy.client.{StandaloneAppClient, StandaloneAppClientListener}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler._
@@ -54,7 +54,7 @@ private[spark] class StandaloneSchedulerBackend(
 
   private val registrationBarrier = new Semaphore(0)
 
-  private val maxCores = conf.getOption("spark.cores.max").map(_.toInt)
+  private val maxCores = conf.get(config.CORES_MAX)
   private val totalExpectedCores = maxCores.getOrElse(0)
 
   override def start() {
@@ -69,8 +69,8 @@ private[spark] class StandaloneSchedulerBackend(
 
     // The endpoint for executors to talk to us
     val driverUrl = RpcEndpointAddress(
-      sc.conf.get("spark.driver.host"),
-      sc.conf.get("spark.driver.port").toInt,
+      sc.conf.get(config.DRIVER_HOST_ADDRESS),
+      sc.conf.get(config.DRIVER_PORT),
       CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
     val args = Seq(
       "--driver-url", driverUrl,
@@ -79,11 +79,11 @@ private[spark] class StandaloneSchedulerBackend(
       "--cores", "{{CORES}}",
       "--app-id", "{{APP_ID}}",
       "--worker-url", "{{WORKER_URL}}")
-    val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")
+    val extraJavaOpts = sc.conf.get(config.EXECUTOR_JAVA_OPTIONS)
       .map(Utils.splitCommandString).getOrElse(Seq.empty)
-    val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath")
+    val classPathEntries = sc.conf.get(config.EXECUTOR_CLASS_PATH)
       .map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)
-    val libraryPathEntries = sc.conf.getOption("spark.executor.extraLibraryPath")
+    val libraryPathEntries = sc.conf.get(config.EXECUTOR_LIBRARY_PATH)
       .map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)
 
     // When testing, expose the parent class path to the child. This is processed by
@@ -102,7 +102,7 @@ private[spark] class StandaloneSchedulerBackend(
     val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",
       args, sc.executorEnvs, classPathEntries ++ testingClassPath, libraryPathEntries, javaOpts)
     val webUrl = sc.ui.map(_.webUrl).getOrElse("")
-    val coresPerExecutor = conf.getOption("spark.executor.cores").map(_.toInt)
+    val coresPerExecutor = conf.getOption(config.EXECUTOR_CORES.key).map(_.toInt)
     // If we're using dynamic allocation, set our initial executor limit to 0 for now.
     // ExecutorAllocationManager will send the real initial limit to the Master later.
     val initialExecutorLimit =
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
index 0de57fbd5600..6ff8bf29b006 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
@@ -24,7 +24,7 @@ import java.nio.ByteBuffer
 import org.apache.spark.{SparkConf, SparkContext, SparkEnv, TaskState}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.{Executor, ExecutorBackend}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler._
@@ -116,7 +116,7 @@ private[spark] class LocalSchedulerBackend(
    * @param conf Spark configuration.
    */
   def getUserClasspath(conf: SparkConf): Seq[URL] = {
-    val userClassPathStr = conf.getOption("spark.executor.extraClassPath")
+    val userClassPathStr = conf.get(config.EXECUTOR_CLASS_PATH)
     userClassPathStr.map(_.split(File.pathSeparator)).toSeq.flatten.map(new File(_).toURI.toURL)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
index e5cccf39f945..902e48fed391 100644
--- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.util
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, RpcTimeout}
 
 private[spark] object RpcUtils {
@@ -26,8 +27,8 @@ private[spark] object RpcUtils {
    * Retrieve a `RpcEndpointRef` which is located in the driver via its name.
    */
   def makeDriverRef(name: String, conf: SparkConf, rpcEnv: RpcEnv): RpcEndpointRef = {
-    val driverHost: String = conf.get("spark.driver.host", "localhost")
-    val driverPort: Int = conf.getInt("spark.driver.port", 7077)
+    val driverHost: String = conf.get(config.DRIVER_HOST_ADDRESS.key, "localhost")
+    val driverPort: Int = conf.getInt(config.DRIVER_PORT.key, 7077)
     Utils.checkHost(driverHost)
     rpcEnv.setupEndpointRef(RpcAddress(driverHost, driverPort), name)
   }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 22f074cf9897..3527fee68939 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2231,7 +2231,7 @@ private[spark] object Utils extends Logging {
               s"${e.getMessage}: Service$serviceString failed after " +
                 s"$maxRetries retries (on a random free port)! " +
                 s"Consider explicitly setting the appropriate binding address for " +
-                s"the service$serviceString (for example spark.driver.bindAddress " +
+                s"the service$serviceString (for example ${DRIVER_BIND_ADDRESS.key} " +
                 s"for SparkDriver) to the correct binding address."
             } else {
               s"${e.getMessage}: Service$serviceString failed after " +
diff --git a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
index 2f9ad4c8cc3e..3188e0bd2b70 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
@@ -20,7 +20,7 @@ package org.apache.spark.util.logging
 import java.io.{File, FileOutputStream, InputStream, IOException}
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.util.{IntParam, Utils}
 
 /**
@@ -115,11 +115,9 @@ private[spark] object FileAppender extends Logging {
   /** Create the right appender based on Spark configuration */
   def apply(inputStream: InputStream, file: File, conf: SparkConf): FileAppender = {
 
-    import RollingFileAppender._
-
-    val rollingStrategy = conf.get(STRATEGY_PROPERTY, STRATEGY_DEFAULT)
-    val rollingSizeBytes = conf.get(SIZE_PROPERTY, STRATEGY_DEFAULT)
-    val rollingInterval = conf.get(INTERVAL_PROPERTY, INTERVAL_DEFAULT)
+    val rollingStrategy = conf.get(config.EXECUTOR_LOGS_ROLLING_STRATEGY)
+    val rollingSizeBytes = conf.get(config.EXECUTOR_LOGS_ROLLING_MAX_SIZE)
+    val rollingInterval = conf.get(config.EXECUTOR_LOGS_ROLLING_TIME_INTERVAL)
 
     def createTimeBasedAppender(): FileAppender = {
       val validatedParams: Option[(Long, String)] = rollingInterval match {
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
index 5d8cec8447b5..59439b68792e 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
@@ -24,6 +24,7 @@ import com.google.common.io.Files
 import org.apache.commons.io.IOUtils
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config
 
 /**
  * Continuously appends data from input stream into the given file, and rolls
@@ -44,10 +45,8 @@ private[spark] class RollingFileAppender(
     bufferSize: Int = RollingFileAppender.DEFAULT_BUFFER_SIZE
   ) extends FileAppender(inputStream, activeFile, bufferSize) {
 
-  import RollingFileAppender._
-
-  private val maxRetainedFiles = conf.getInt(RETAINED_FILES_PROPERTY, -1)
-  private val enableCompression = conf.getBoolean(ENABLE_COMPRESSION, false)
+  private val maxRetainedFiles = conf.get(config.EXECUTOR_LOGS_ROLLING_MAX_RETAINED_FILES)
+  private val enableCompression = conf.get(config.EXECUTOR_LOGS_ROLLING_ENABLE_COMPRESSION)
 
   /** Stop the appender */
   override def stop() {
@@ -82,7 +81,7 @@ private[spark] class RollingFileAppender(
   // Roll the log file and compress if enableCompression is true.
   private def rotateFile(activeFile: File, rolloverFile: File): Unit = {
     if (enableCompression) {
-      val gzFile = new File(rolloverFile.getAbsolutePath + GZIP_LOG_SUFFIX)
+      val gzFile = new File(rolloverFile.getAbsolutePath + RollingFileAppender.GZIP_LOG_SUFFIX)
       var gzOutputStream: GZIPOutputStream = null
       var inputStream: InputStream = null
       try {
@@ -103,7 +102,7 @@ private[spark] class RollingFileAppender(
 
   // Check if the rollover file already exists.
   private def rolloverFileExist(file: File): Boolean = {
-    file.exists || new File(file.getAbsolutePath + GZIP_LOG_SUFFIX).exists
+    file.exists || new File(file.getAbsolutePath + RollingFileAppender.GZIP_LOG_SUFFIX).exists
   }
 
   /** Move the active log file to a new rollover file */
@@ -164,15 +163,7 @@ private[spark] class RollingFileAppender(
  * names of configurations that configure rolling file appenders.
  */
 private[spark] object RollingFileAppender {
-  val STRATEGY_PROPERTY = "spark.executor.logs.rolling.strategy"
-  val STRATEGY_DEFAULT = ""
-  val INTERVAL_PROPERTY = "spark.executor.logs.rolling.time.interval"
-  val INTERVAL_DEFAULT = "daily"
-  val SIZE_PROPERTY = "spark.executor.logs.rolling.maxSize"
-  val SIZE_DEFAULT = (1024 * 1024).toString
-  val RETAINED_FILES_PROPERTY = "spark.executor.logs.rolling.maxRetainedFiles"
   val DEFAULT_BUFFER_SIZE = 8192
-  val ENABLE_COMPRESSION = "spark.executor.logs.rolling.enableCompression"
 
   val GZIP_LOG_SUFFIX = ".gz"
 
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 5c718cb654ce..d0389235cb72 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -155,7 +155,7 @@ class ExecutorAllocationManagerSuite
       .set("spark.dynamicAllocation.maxExecutors", "15")
       .set("spark.dynamicAllocation.minExecutors", "3")
       .set("spark.dynamicAllocation.executorAllocationRatio", divisor.toString)
-      .set("spark.executor.cores", cores.toString)
+      .set(config.EXECUTOR_CORES, cores)
     val sc = new SparkContext(conf)
     contexts += sc
     var manager = sc.executorAllocationManager.get
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index e14a5dcb5ef8..9a6abbdb0a46 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -140,7 +140,7 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
 
   test("creating SparkContext with cpus per tasks bigger than cores per executors") {
     val conf = new SparkConf(false)
-      .set("spark.executor.cores", "1")
+      .set(EXECUTOR_CORES, 1)
       .set("spark.task.cpus", "2")
     intercept[SparkException] { sc = new SparkContext(conf) }
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index a1d2a1283db1..8567dd1f0823 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -243,7 +243,7 @@ class StandaloneDynamicAllocationSuite
   }
 
   test("dynamic allocation with cores per executor") {
-    sc = new SparkContext(appConf.set("spark.executor.cores", "2"))
+    sc = new SparkContext(appConf.set(config.EXECUTOR_CORES, 2))
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
@@ -296,7 +296,7 @@ class StandaloneDynamicAllocationSuite
 
   test("dynamic allocation with cores per executor AND max cores") {
     sc = new SparkContext(appConf
-      .set("spark.executor.cores", "2")
+      .set(config.EXECUTOR_CORES, 2)
       .set("spark.cores.max", "8"))
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
@@ -526,7 +526,7 @@ class StandaloneDynamicAllocationSuite
     new SparkConf()
       .setMaster(masterRpcEnv.address.toSparkURL)
       .setAppName("test")
-      .set("spark.executor.memory", "256m")
+      .set(config.EXECUTOR_MEMORY.key, "256m")
   }
 
   /** Make a master to which our application will send executor requests. */
diff --git a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
index d56cfc183d92..5ce3453b682f 100644
--- a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
@@ -248,7 +248,7 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
     val mm = UnifiedMemoryManager(conf, numCores = 1)
 
     // Try using an executor memory that's too small
-    val conf2 = conf.clone().set("spark.executor.memory", (reservedMemory / 2).toString)
+    val conf2 = conf.clone().set(EXECUTOR_MEMORY.key, (reservedMemory / 2).toString)
     val exception = intercept[IllegalArgumentException] {
       UnifiedMemoryManager(conf2, numCores = 1)
     }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index d264adaef90a..f73ff67837c6 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -655,7 +655,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
   }
 
   test("abort the job if total size of results is too large") {
-    val conf = new SparkConf().set("spark.driver.maxResultSize", "2m")
+    val conf = new SparkConf().set(config.MAX_RESULT_SIZE.key, "2m")
     sc = new SparkContext("local", "test", conf)
 
     def genBytes(size: Int): (Int) => Array[Byte] = { (x: Int) =>
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 3962bdc27d22..19116cf22d2f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -31,7 +31,7 @@ import org.scalatest.concurrent.Eventually._
 import org.apache.spark._
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.MEMORY_OFFHEAP_SIZE
+import org.apache.spark.internal.config.{DRIVER_PORT, MEMORY_OFFHEAP_SIZE}
 import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.BlockTransferService
 import org.apache.spark.network.netty.NettyBlockTransferService
@@ -86,7 +86,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
 
     conf.set("spark.authenticate", "false")
-    conf.set("spark.driver.port", rpcEnv.address.port.toString)
+    conf.set(DRIVER_PORT, rpcEnv.address.port)
     conf.set("spark.testing", "true")
     conf.set("spark.memory.fraction", "1")
     conf.set("spark.memory.storageFraction", "1")
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index cf00c1c3aad3..e866342e4472 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -124,7 +124,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       .set("spark.storage.unrollMemoryThreshold", "512")
 
     rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
-    conf.set("spark.driver.port", rpcEnv.address.port.toString)
+    conf.set(DRIVER_PORT, rpcEnv.address.port)
 
     // Mock SparkContext to reduce the memory usage of tests. It's fine since the only reason we
     // need to create a SparkContext is to initialize LiveListenerBus.
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index 52cd5378bc71..242163931f7a 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -34,7 +34,7 @@ import org.mockito.Mockito.{atLeast, mock, verify}
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.util.logging.{FileAppender, RollingFileAppender, SizeBasedRollingPolicy, TimeBasedRollingPolicy}
 
 class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
@@ -136,7 +136,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
     // setup input stream and appender
     val testOutputStream = new PipedOutputStream()
     val testInputStream = new PipedInputStream(testOutputStream, 100 * 1000)
-    val conf = new SparkConf().set(RollingFileAppender.RETAINED_FILES_PROPERTY, "10")
+    val conf = new SparkConf().set(config.EXECUTOR_LOGS_ROLLING_MAX_RETAINED_FILES, 10)
     val appender = new RollingFileAppender(testInputStream, testFile,
       new SizeBasedRollingPolicy(1000, false), conf, 10)
 
@@ -200,13 +200,12 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
       appender.awaitTermination()
     }
 
-    import RollingFileAppender._
-
     def rollingStrategy(strategy: String): Seq[(String, String)] =
-      Seq(STRATEGY_PROPERTY -> strategy)
-    def rollingSize(size: String): Seq[(String, String)] = Seq(SIZE_PROPERTY -> size)
+      Seq(config.EXECUTOR_LOGS_ROLLING_STRATEGY.key -> strategy)
+    def rollingSize(size: String): Seq[(String, String)] =
+      Seq(config.EXECUTOR_LOGS_ROLLING_MAX_SIZE.key -> size)
     def rollingInterval(interval: String): Seq[(String, String)] =
-      Seq(INTERVAL_PROPERTY -> interval)
+      Seq(config.EXECUTOR_LOGS_ROLLING_TIME_INTERVAL.key -> interval)
 
     val msInDay = 24 * 60 * 60 * 1000L
     val msInHour = 60 * 60 * 1000L
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 8362c14fb289..d52988df58d6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -42,7 +42,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
     .getOrElse(throw new SparkException("Must specify the driver container image"))
 
   // CPU settings
-  private val driverCpuCores = conf.get("spark.driver.cores", "1")
+  private val driverCpuCores = conf.get(DRIVER_CORES.key, "1")
   private val driverLimitCores = conf.get(KUBERNETES_DRIVER_LIMIT_CORES)
 
   // Memory settings
@@ -85,7 +85,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       ("cpu", new QuantityBuilder(false).withAmount(limitCores).build())
     }
 
-    val driverPort = conf.sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT)
+    val driverPort = conf.sparkConf.getInt(DRIVER_PORT.key, DEFAULT_DRIVER_PORT)
     val driverBlockManagerPort = conf.sparkConf.getInt(
       DRIVER_BLOCK_MANAGER_PORT.key,
       DEFAULT_BLOCKMANAGER_PORT
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index dd73a5e52281..6c3a6b39fa5c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -46,8 +46,8 @@ private[spark] class BasicExecutorFeatureStep(
   private val executorPodNamePrefix = kubernetesConf.resourceNamePrefix
 
   private val driverUrl = RpcEndpointAddress(
-    kubernetesConf.get("spark.driver.host"),
-    kubernetesConf.sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT),
+    kubernetesConf.get(DRIVER_HOST_ADDRESS),
+    kubernetesConf.sparkConf.getInt(DRIVER_PORT.key, DEFAULT_DRIVER_PORT),
     CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
   private val executorMemoryMiB = kubernetesConf.get(EXECUTOR_MEMORY)
   private val executorMemoryString = kubernetesConf.get(
@@ -67,7 +67,7 @@ private[spark] class BasicExecutorFeatureStep(
       executorMemoryWithOverhead
     }
 
-  private val executorCores = kubernetesConf.sparkConf.getInt("spark.executor.cores", 1)
+  private val executorCores = kubernetesConf.sparkConf.get(EXECUTOR_CORES)
   private val executorCoresRequest =
     if (kubernetesConf.sparkConf.contains(KUBERNETES_EXECUTOR_REQUEST_CORES)) {
       kubernetesConf.get(KUBERNETES_EXECUTOR_REQUEST_CORES).get
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
index 42305457f4ff..15671179b18b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.{HasMetadata, ServiceBuilder}
 
 import org.apache.spark.deploy.k8s.{KubernetesDriverConf, SparkPod}
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.util.{Clock, SystemClock}
 
 private[spark] class DriverServiceFeatureStep(
@@ -51,18 +51,17 @@ private[spark] class DriverServiceFeatureStep(
   }
 
   private val driverPort = kubernetesConf.sparkConf.getInt(
-    "spark.driver.port", DEFAULT_DRIVER_PORT)
+    config.DRIVER_PORT.key, DEFAULT_DRIVER_PORT)
   private val driverBlockManagerPort = kubernetesConf.sparkConf.getInt(
-    org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT.key, DEFAULT_BLOCKMANAGER_PORT)
+    config.DRIVER_BLOCK_MANAGER_PORT.key, DEFAULT_BLOCKMANAGER_PORT)
 
   override def configurePod(pod: SparkPod): SparkPod = pod
 
   override def getAdditionalPodSystemProperties(): Map[String, String] = {
     val driverHostname = s"$resolvedServiceName.${kubernetesConf.namespace}.svc"
     Map(DRIVER_HOST_KEY -> driverHostname,
-      "spark.driver.port" -> driverPort.toString,
-      org.apache.spark.internal.config.DRIVER_BLOCK_MANAGER_PORT.key ->
-        driverBlockManagerPort.toString)
+      config.DRIVER_PORT.key -> driverPort.toString,
+      config.DRIVER_BLOCK_MANAGER_PORT.key -> driverBlockManagerPort.toString)
   }
 
   override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
@@ -90,8 +89,8 @@ private[spark] class DriverServiceFeatureStep(
 }
 
 private[spark] object DriverServiceFeatureStep {
-  val DRIVER_BIND_ADDRESS_KEY = org.apache.spark.internal.config.DRIVER_BIND_ADDRESS.key
-  val DRIVER_HOST_KEY = org.apache.spark.internal.config.DRIVER_HOST_ADDRESS.key
+  val DRIVER_BIND_ADDRESS_KEY = config.DRIVER_BIND_ADDRESS.key
+  val DRIVER_HOST_KEY = config.DRIVER_HOST_ADDRESS.key
   val DRIVER_SVC_POSTFIX = "-driver-svc"
   val MAX_SERVICE_NAME_LENGTH = 63
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 5ceb9d6d6fcd..27d59dd7f3e5 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -46,7 +46,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
   test("Check the pod respects all configurations from the user.") {
     val sparkConf = new SparkConf()
       .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod")
-      .set("spark.driver.cores", "2")
+      .set(DRIVER_CORES, 2)
       .set(KUBERNETES_DRIVER_LIMIT_CORES, "4")
       .set(DRIVER_MEMORY.key, "256M")
       .set(DRIVER_MEMORY_OVERHEAD, 200L)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index e28c650a571e..36bfb7d41ec3 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.internal.config._
+import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
@@ -74,8 +74,8 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       .set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, RESOURCE_NAME_PREFIX)
       .set(CONTAINER_IMAGE, EXECUTOR_IMAGE)
       .set(KUBERNETES_DRIVER_SUBMIT_CHECK, true)
-      .set(DRIVER_HOST_ADDRESS, DRIVER_HOSTNAME)
-      .set("spark.driver.port", DRIVER_PORT.toString)
+      .set(config.DRIVER_HOST_ADDRESS, DRIVER_HOSTNAME)
+      .set(config.DRIVER_PORT, DRIVER_PORT)
       .set(IMAGE_PULL_SECRETS, TEST_IMAGE_PULL_SECRETS)
       .set("spark.kubernetes.resource.type", "java")
   }
@@ -125,8 +125,8 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("classpath and extra java options get translated into environment variables") {
-    baseConf.set(EXECUTOR_JAVA_OPTIONS, "foo=bar")
-    baseConf.set(EXECUTOR_CLASS_PATH, "bar=baz")
+    baseConf.set(config.EXECUTOR_JAVA_OPTIONS, "foo=bar")
+    baseConf.set(config.EXECUTOR_CLASS_PATH, "bar=baz")
     val kconf = newExecutorConf(environment = Map("qux" -> "quux"))
     val step = new BasicExecutorFeatureStep(kconf, new SecurityManager(baseConf))
     val executor = step.configurePod(SparkPod.initialPod())
@@ -150,7 +150,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("auth secret propagation") {
     val conf = baseConf.clone()
-      .set(NETWORK_AUTH_ENABLED, true)
+      .set(config.NETWORK_AUTH_ENABLED, true)
       .set("spark.master", "k8s://127.0.0.1")
 
     val secMgr = new SecurityManager(conf)
@@ -168,8 +168,8 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     val secretFile = new File(secretDir, "secret-file.txt")
     Files.write(secretFile.toPath, "some-secret".getBytes(StandardCharsets.UTF_8))
     val conf = baseConf.clone()
-      .set(NETWORK_AUTH_ENABLED, true)
-      .set(AUTH_SECRET_FILE, secretFile.getAbsolutePath)
+      .set(config.NETWORK_AUTH_ENABLED, true)
+      .set(config.AUTH_SECRET_FILE, secretFile.getAbsolutePath)
       .set("spark.master", "k8s://127.0.0.1")
     val secMgr = new SecurityManager(conf)
     secMgr.initializeAuth()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
index 045278939dff..822f1e32968c 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -39,7 +39,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
 
   test("Headless service has a port for the driver RPC and the block manager.") {
     val sparkConf = new SparkConf(false)
-      .set("spark.driver.port", "9000")
+      .set(DRIVER_PORT, 9000)
       .set(DRIVER_BLOCK_MANAGER_PORT, 8080)
     val kconf = KubernetesTestConf.createDriverConf(
       sparkConf = sparkConf,
@@ -61,7 +61,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
 
   test("Hostname and ports are set according to the service name.") {
     val sparkConf = new SparkConf(false)
-      .set("spark.driver.port", "9000")
+      .set(DRIVER_PORT, 9000)
       .set(DRIVER_BLOCK_MANAGER_PORT, 8080)
       .set(KUBERNETES_NAMESPACE, "my-namespace")
     val kconf = KubernetesTestConf.createDriverConf(
@@ -87,7 +87,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
       s"${kconf.resourceNamePrefix}${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
       resolvedService)
     val additionalProps = configurationStep.getAdditionalPodSystemProperties()
-    assert(additionalProps("spark.driver.port") === DEFAULT_DRIVER_PORT.toString)
+    assert(additionalProps(DRIVER_PORT.key) === DEFAULT_DRIVER_PORT.toString)
     assert(additionalProps(DRIVER_BLOCK_MANAGER_PORT.key) === DEFAULT_BLOCKMANAGER_PORT.toString)
   }
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
index d134847dc74d..dd0b2bad1ecb 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
@@ -129,4 +129,7 @@ package object config {
         "when launching drivers. Default is to accept all offers with sufficient resources.")
       .stringConf
       .createWithDefault("")
+
+  private[spark] val EXECUTOR_URI =
+    ConfigBuilder("spark.executor.uri").stringConf.createOptional
 }
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
index 68f6921153d8..a4aba3e9c0d0 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
@@ -27,6 +27,7 @@ import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf}
 import org.apache.spark.deploy.Command
 import org.apache.spark.deploy.mesos.MesosDriverDescription
 import org.apache.spark.deploy.rest._
+import org.apache.spark.internal.config
 import org.apache.spark.scheduler.cluster.mesos.MesosClusterScheduler
 import org.apache.spark.util.Utils
 
@@ -92,12 +93,12 @@ private[mesos] class MesosSubmitRequestServlet(
 
     // Optional fields
     val sparkProperties = request.sparkProperties
-    val driverExtraJavaOptions = sparkProperties.get("spark.driver.extraJavaOptions")
-    val driverExtraClassPath = sparkProperties.get("spark.driver.extraClassPath")
-    val driverExtraLibraryPath = sparkProperties.get("spark.driver.extraLibraryPath")
-    val superviseDriver = sparkProperties.get("spark.driver.supervise")
-    val driverMemory = sparkProperties.get("spark.driver.memory")
-    val driverCores = sparkProperties.get("spark.driver.cores")
+    val driverExtraJavaOptions = sparkProperties.get(config.DRIVER_JAVA_OPTIONS.key)
+    val driverExtraClassPath = sparkProperties.get(config.DRIVER_CLASS_PATH.key)
+    val driverExtraLibraryPath = sparkProperties.get(config.DRIVER_LIBRARY_PATH.key)
+    val superviseDriver = sparkProperties.get(config.DRIVER_SUPERVISE.key)
+    val driverMemory = sparkProperties.get(config.DRIVER_MEMORY.key)
+    val driverCores = sparkProperties.get(config.DRIVER_CORES.key)
     val name = request.sparkProperties.getOrElse("spark.app.name", mainClass)
 
     // Construct driver description
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index cb1bcba651be..021b1ac84805 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -32,6 +32,7 @@ import org.apache.mesos.Protos.TaskStatus.Reason
 import org.apache.spark.{SecurityManager, SparkConf, SparkException, TaskState}
 import org.apache.spark.deploy.mesos.{config, MesosDriverDescription}
 import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KillSubmissionResponse, SubmissionStatusResponse}
+import org.apache.spark.internal.config.{CORES_MAX, EXECUTOR_LIBRARY_PATH, EXECUTOR_MEMORY}
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.util.Utils
 
@@ -365,8 +366,7 @@ private[spark] class MesosClusterScheduler(
   }
 
   private def getDriverExecutorURI(desc: MesosDriverDescription): Option[String] = {
-    desc.conf.getOption("spark.executor.uri")
-      .orElse(desc.command.environment.get("SPARK_EXECUTOR_URI"))
+    desc.conf.get(config.EXECUTOR_URI).orElse(desc.command.environment.get("SPARK_EXECUTOR_URI"))
   }
 
   private def getDriverFrameworkID(desc: MesosDriverDescription): String = {
@@ -474,7 +474,7 @@ private[spark] class MesosClusterScheduler(
     } else if (executorUri.isDefined) {
       val folderBasename = executorUri.get.split('/').last.split('.').head
 
-      val entries = conf.getOption("spark.executor.extraLibraryPath")
+      val entries = conf.get(EXECUTOR_LIBRARY_PATH)
         .map(path => Seq(path) ++ desc.command.libraryPathEntries)
         .getOrElse(desc.command.libraryPathEntries)
 
@@ -528,10 +528,10 @@ private[spark] class MesosClusterScheduler(
       options ++= Seq("--class", desc.command.mainClass)
     }
 
-    desc.conf.getOption("spark.executor.memory").foreach { v =>
+    desc.conf.getOption(EXECUTOR_MEMORY.key).foreach { v =>
       options ++= Seq("--executor-memory", v)
     }
-    desc.conf.getOption("spark.cores.max").foreach { v =>
+    desc.conf.getOption(CORES_MAX.key).foreach { v =>
       options ++= Seq("--total-executor-cores", v)
     }
     desc.conf.getOption("spark.submit.pyFiles").foreach { pyFiles =>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index f5866651dc90..d0174516c236 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -33,7 +33,6 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkContext, SparkExceptio
 import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.config
-import org.apache.spark.internal.config.EXECUTOR_HEARTBEAT_INTERVAL
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.mesos.MesosExternalShuffleClient
@@ -63,9 +62,9 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   // Blacklist a slave after this many failures
   private val MAX_SLAVE_FAILURES = 2
 
-  private val maxCoresOption = conf.getOption("spark.cores.max").map(_.toInt)
+  private val maxCoresOption = conf.get(config.CORES_MAX)
 
-  private val executorCoresOption = conf.getOption("spark.executor.cores").map(_.toInt)
+  private val executorCoresOption = conf.getOption(config.EXECUTOR_CORES.key).map(_.toInt)
 
   private val minCoresPerExecutor = executorCoresOption.getOrElse(1)
 
@@ -220,18 +219,18 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
 
   def createCommand(offer: Offer, numCores: Int, taskId: String): CommandInfo = {
     val environment = Environment.newBuilder()
-    val extraClassPath = conf.getOption("spark.executor.extraClassPath")
+    val extraClassPath = conf.get(config.EXECUTOR_CLASS_PATH)
     extraClassPath.foreach { cp =>
       environment.addVariables(
         Environment.Variable.newBuilder().setName("SPARK_EXECUTOR_CLASSPATH").setValue(cp).build())
     }
-    val extraJavaOpts = conf.getOption("spark.executor.extraJavaOptions").map {
+    val extraJavaOpts = conf.get(config.EXECUTOR_JAVA_OPTIONS).map {
       Utils.substituteAppNExecIds(_, appId, taskId)
     }.getOrElse("")
 
     // Set the environment variable through a command prefix
     // to append to the existing value of the variable
-    val prefixEnv = conf.getOption("spark.executor.extraLibraryPath").map { p =>
+    val prefixEnv = conf.get(config.EXECUTOR_LIBRARY_PATH).map { p =>
       Utils.libraryPathEnvPrefix(Seq(p))
     }.getOrElse("")
 
@@ -261,8 +260,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     val command = CommandInfo.newBuilder()
       .setEnvironment(environment)
 
-    val uri = conf.getOption("spark.executor.uri")
-      .orElse(Option(System.getenv("SPARK_EXECUTOR_URI")))
+    val uri = conf.get(EXECUTOR_URI).orElse(Option(System.getenv("SPARK_EXECUTOR_URI")))
 
     if (uri.isEmpty) {
       val executorSparkHome = conf.getOption("spark.mesos.executor.home")
@@ -304,8 +302,8 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       "driverURL"
     } else {
       RpcEndpointAddress(
-        conf.get("spark.driver.host"),
-        conf.get("spark.driver.port").toInt,
+        conf.get(config.DRIVER_HOST_ADDRESS),
+        conf.get(config.DRIVER_PORT),
         CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
     }
   }
@@ -633,7 +631,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
             externalShufflePort,
             sc.conf.getTimeAsMs("spark.storage.blockManagerSlaveTimeoutMs",
               s"${sc.conf.getTimeAsSeconds("spark.network.timeout", "120s")}s"),
-            sc.conf.get(EXECUTOR_HEARTBEAT_INTERVAL))
+            sc.conf.get(config.EXECUTOR_HEARTBEAT_INTERVAL))
         slave.shuffleRegistered = true
       }
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index 0bb6fe0fa4bd..192f9407a1ba 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -28,8 +28,9 @@ import org.apache.mesos.SchedulerDriver
 import org.apache.mesos.protobuf.ByteString
 
 import org.apache.spark.{SparkContext, SparkException, TaskState}
-import org.apache.spark.deploy.mesos.config
+import org.apache.spark.deploy.mesos.config.EXECUTOR_URI
 import org.apache.spark.executor.MesosExecutorBackend
+import org.apache.spark.internal.config
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.util.Utils
@@ -107,15 +108,15 @@ private[spark] class MesosFineGrainedSchedulerBackend(
       throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
     }
     val environment = Environment.newBuilder()
-    sc.conf.getOption("spark.executor.extraClassPath").foreach { cp =>
+    sc.conf.get(config.EXECUTOR_CLASS_PATH).foreach { cp =>
       environment.addVariables(
         Environment.Variable.newBuilder().setName("SPARK_EXECUTOR_CLASSPATH").setValue(cp).build())
     }
-    val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions").map {
+    val extraJavaOpts = sc.conf.get(config.EXECUTOR_JAVA_OPTIONS).map {
       Utils.substituteAppNExecIds(_, appId, execId)
     }.getOrElse("")
 
-    val prefixEnv = sc.conf.getOption("spark.executor.extraLibraryPath").map { p =>
+    val prefixEnv = sc.conf.get(config.EXECUTOR_LIBRARY_PATH).map { p =>
       Utils.libraryPathEnvPrefix(Seq(p))
     }.getOrElse("")
 
@@ -132,8 +133,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
     val command = CommandInfo.newBuilder()
       .setEnvironment(environment)
-    val uri = sc.conf.getOption("spark.executor.uri")
-      .orElse(Option(System.getenv("SPARK_EXECUTOR_URI")))
+    val uri = sc.conf.get(EXECUTOR_URI).orElse(Option(System.getenv("SPARK_EXECUTOR_URI")))
 
     val executorBackendName = classOf[MesosExecutorBackend].getName
     if (uri.isEmpty) {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index e46c4f970c4a..8dbdac168f70 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -470,8 +470,8 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
         rpcEnv = sc.env.rpcEnv
 
         val userConf = sc.getConf
-        val host = userConf.get("spark.driver.host")
-        val port = userConf.get("spark.driver.port").toInt
+        val host = userConf.get(DRIVER_HOST_ADDRESS)
+        val port = userConf.get(DRIVER_PORT)
         registerAM(host, port, userConf, sc.ui.map(_.webUrl))
 
         val driverRef = rpcEnv.setupEndpointRef(
@@ -505,7 +505,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       amCores, true)
 
     // The client-mode AM doesn't listen for incoming connections, so report an invalid port.
-    registerAM(hostname, -1, sparkConf, sparkConf.getOption("spark.driver.appUIAddress"))
+    registerAM(hostname, -1, sparkConf, sparkConf.get(DRIVER_APP_UI_ADDRESS))
 
     // The driver should be up and listening, so unlike cluster mode, just try to connect to it
     // with no waiting or retrying.
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index b257d8fdd3b1..7e9cd409daf3 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -224,16 +224,12 @@ package object config {
 
   /* Driver configuration. */
 
-  private[spark] val DRIVER_CORES = ConfigBuilder("spark.driver.cores")
-    .intConf
-    .createWithDefault(1)
+  private[spark] val DRIVER_APP_UI_ADDRESS = ConfigBuilder("spark.driver.appUIAddress")
+    .stringConf
+    .createOptional
 
   /* Executor configuration. */
 
-  private[spark] val EXECUTOR_CORES = ConfigBuilder("spark.executor.cores")
-    .intConf
-    .createWithDefault(1)
-
   private[spark] val EXECUTOR_NODE_LABEL_EXPRESSION =
     ConfigBuilder("spark.yarn.executor.nodeLabelExpression")
       .doc("Node label expression for executors.")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 9397a1e3de9a..167eef19ed85 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -24,7 +24,7 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnAppReport}
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.launcher.SparkAppHandle
 import org.apache.spark.scheduler.TaskSchedulerImpl
 
@@ -42,10 +42,10 @@ private[spark] class YarnClientSchedulerBackend(
    * This waits until the application is running.
    */
   override def start() {
-    val driverHost = conf.get("spark.driver.host")
-    val driverPort = conf.get("spark.driver.port")
+    val driverHost = conf.get(config.DRIVER_HOST_ADDRESS)
+    val driverPort = conf.get(config.DRIVER_PORT)
     val hostport = driverHost + ":" + driverPort
-    sc.ui.foreach { ui => conf.set("spark.driver.appUIAddress", ui.webUrl) }
+    sc.ui.foreach { ui => conf.set(DRIVER_APP_UI_ADDRESS, ui.webUrl) }
 
     val argsArrayBuf = new ArrayBuffer[String]()
     argsArrayBuf += ("--arg", hostport)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ResourceRequestHelperSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ResourceRequestHelperSuite.scala
index 8032213602c9..9e3cc6ec01df 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ResourceRequestHelperSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ResourceRequestHelperSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.Matchers
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.deploy.yarn.ResourceRequestTestHelper.ResourceInformation
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.config.{DRIVER_MEMORY, EXECUTOR_MEMORY}
+import org.apache.spark.internal.config.{DRIVER_CORES, DRIVER_MEMORY, EXECUTOR_CORES, EXECUTOR_MEMORY}
 
 class ResourceRequestHelperSuite extends SparkFunSuite with Matchers {
 

From 36440e64476610ec4037fb14f50cf7f06495e384 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 4 Jan 2019 15:35:23 -0600
Subject: [PATCH 0227/1072] [SPARK-26306][TEST][BUILD] More memory to de-flake
 SorterSuite

## What changes were proposed in this pull request?

Increase test memory to avoid OOM in TimSort-related tests.

## How was this patch tested?

Existing tests.

Closes #23425 from srowen/SPARK-26306.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 pom.xml                                                | 4 ++--
 project/SparkBuild.scala                               | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pom.xml b/pom.xml
index a433659cd200..d0c525adf355 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2115,7 +2115,7 @@
               <include>**/*Suite.java</include>
             </includes>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-            <argLine>-ea -Xmx3g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
             <environmentVariables>
               <!--
                 Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
@@ -2165,7 +2165,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx3g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
             <stderr/>
             <environmentVariables>
               <!--
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index a0946a9ad665..a8f0ca4d758d 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -888,7 +888,7 @@ object TestSettings {
     javaOptions in Test ++= System.getProperties.asScala.filter(_._1.startsWith("spark"))
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
     javaOptions in Test += "-ea",
-    javaOptions in Test ++= "-Xmx3g -Xss4m"
+    javaOptions in Test ++= "-Xmx4g -Xss4m"
       .split(" ").toSeq,
     javaOptions += "-Xmx3g",
     // Exclude tags defined in a system property
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 17af0e03f2bb..f16b536de514 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -135,7 +135,7 @@
           <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
           <junitxml>.</junitxml>
           <filereports>SparkTestSuite.txt</filereports>
-          <argLine>-ea -Xmx3g -XX:ReservedCodeCacheSize=512m ${extraScalaTestArgs}</argLine>
+          <argLine>-ea -Xmx4g -XX:ReservedCodeCacheSize=512m ${extraScalaTestArgs}</argLine>
           <stderr/>
           <systemProperties>
             <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 999468993603..fe144c76af7d 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -229,7 +229,7 @@
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
           <!-- Specially disable assertions since some Hive tests fail them -->
-          <argLine>-da -Xmx3g -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+          <argLine>-da -Xmx4g -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
         </configuration>
       </plugin>
       <plugin>

From 89cebf4932ff966cc876ba8a9ecd9d9c034fb071 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 4 Jan 2019 15:37:09 -0600
Subject: [PATCH 0228/1072] [SPARK-24421][CORE][FOLLOWUP] Use normal direct
 ByteBuffer allocation if Cleaner can't be set

## What changes were proposed in this pull request?

In Java 9+ we can't use sun.misc.Cleaner by default anymore, and this was largely handled in https://github.com/apache/spark/pull/22993 However I think the change there left a significant problem.

If a DirectByteBuffer is allocated using the reflective hack in Platform, now, we by default can't set a Cleaner. But I believe this means the memory isn't freed promptly or possibly at all. If a Cleaner can't be set, I think we need to use normal APIs to allocate the direct ByteBuffer.

According to comments in the code, the downside is simply that the normal APIs will check and impose limits on how much off-heap memory can be allocated. Per the original review on https://github.com/apache/spark/pull/22993 this much seems fine, as either way in this case the user would have to add a JVM setting (increase max, or allow the reflective access).

## How was this patch tested?

Existing tests. This resolved an OutOfMemoryError in Java 11 from TimSort tests without increasing test heap size. (See https://github.com/apache/spark/pull/23419#issuecomment-450772125 ) This suggests there is a problem and that this resolves it.

Closes #23424 from srowen/SPARK-24421.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/unsafe/Platform.java     | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 076b693f81c8..1adf7abfc8a6 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -209,22 +209,33 @@ public static long reallocateMemory(long address, long oldSize, long newSize) {
   }
 
   /**
-   * Uses internal JDK APIs to allocate a DirectByteBuffer while ignoring the JVM's
-   * MaxDirectMemorySize limit (the default limit is too low and we do not want to require users
-   * to increase it).
+   * Allocate a DirectByteBuffer, potentially bypassing the JVM's MaxDirectMemorySize limit.
    */
   public static ByteBuffer allocateDirectBuffer(int size) {
     try {
-      long memory = allocateMemory(size);
-      ByteBuffer buffer = (ByteBuffer) DBB_CONSTRUCTOR.newInstance(memory, size);
-      if (CLEANER_CREATE_METHOD != null) {
+      if (CLEANER_CREATE_METHOD == null) {
+        // Can't set a Cleaner (see comments on field), so need to allocate via normal Java APIs
         try {
-          DBB_CLEANER_FIELD.set(buffer,
-              CLEANER_CREATE_METHOD.invoke(null, buffer, (Runnable) () -> freeMemory(memory)));
-        } catch (IllegalAccessException | InvocationTargetException e) {
-          throw new IllegalStateException(e);
+          return ByteBuffer.allocateDirect(size);
+        } catch (OutOfMemoryError oome) {
+          // checkstyle.off: RegexpSinglelineJava
+          throw new OutOfMemoryError("Failed to allocate direct buffer (" + oome.getMessage() +
+              "); try increasing -XX:MaxDirectMemorySize=... to, for example, your heap size");
+          // checkstyle.on: RegexpSinglelineJava
         }
       }
+      // Otherwise, use internal JDK APIs to allocate a DirectByteBuffer while ignoring the JVM's
+      // MaxDirectMemorySize limit (the default limit is too low and we do not want to
+      // require users to increase it).
+      long memory = allocateMemory(size);
+      ByteBuffer buffer = (ByteBuffer) DBB_CONSTRUCTOR.newInstance(memory, size);
+      try {
+        DBB_CLEANER_FIELD.set(buffer,
+            CLEANER_CREATE_METHOD.invoke(null, buffer, (Runnable) () -> freeMemory(memory)));
+      } catch (IllegalAccessException | InvocationTargetException e) {
+        freeMemory(memory);
+        throw new IllegalStateException(e);
+      }
       return buffer;
     } catch (Exception e) {
       throwException(e);

From bccb8602d7bc78894689e9b2e5fe685763d32d23 Mon Sep 17 00:00:00 2001
From: shane knapp <incomplete@gmail.com>
Date: Fri, 4 Jan 2019 18:27:26 -0800
Subject: [PATCH 0229/1072] [SPARK-26537][BUILD] change git-wip-us to gitbox

## What changes were proposed in this pull request?

due to apache recently moving from git-wip-us.apache.org to gitbox.apache.org, we need to update the packaging scripts to point to the new repo location.

this will also need to be backported to 2.4, 2.3, 2.1, 2.0 and 1.6.

## How was this patch tested?

the build system will test this.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23454 from shaneknapp/update-apache-repo.

Authored-by: shane knapp <incomplete@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/create-release/release-tag.sh  | 2 +-
 dev/create-release/release-util.sh | 4 ++--
 pom.xml                            | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 dev/create-release/release-util.sh

diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh
index 628bc0504c9c..010082d960a2 100755
--- a/dev/create-release/release-tag.sh
+++ b/dev/create-release/release-tag.sh
@@ -61,7 +61,7 @@ done
 init_java
 init_maven_sbt
 
-ASF_SPARK_REPO="git-wip-us.apache.org/repos/asf/spark.git"
+ASF_SPARK_REPO="gitbox.apache.org/repos/asf/spark.git"
 
 rm -rf spark
 git clone "https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO" -b $GIT_BRANCH
diff --git a/dev/create-release/release-util.sh b/dev/create-release/release-util.sh
old mode 100644
new mode 100755
index 7426b0d6ca08..c925de9be52d
--- a/dev/create-release/release-util.sh
+++ b/dev/create-release/release-util.sh
@@ -19,8 +19,8 @@
 
 DRY_RUN=${DRY_RUN:-0}
 GPG="gpg --no-tty --batch"
-ASF_REPO="https://git-wip-us.apache.org/repos/asf/spark.git"
-ASF_REPO_WEBUI="https://git-wip-us.apache.org/repos/asf?p=spark.git"
+ASF_REPO="https://gitbox.apache.org/repos/asf/spark.git"
+ASF_REPO_WEBUI="https://gitbox.apache.org/repos/asf?p=spark.git"
 
 function error {
   echo "$*"
diff --git a/pom.xml b/pom.xml
index d0c525adf355..40b0e328c035 100644
--- a/pom.xml
+++ b/pom.xml
@@ -39,7 +39,7 @@
   </licenses>
   <scm>
     <connection>scm:git:git@github.com:apache/spark.git</connection>
-    <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/spark.git</developerConnection>
+    <developerConnection>scm:git:https://gitbox.apache.org/repos/asf/spark.git</developerConnection>
     <url>scm:git:git@github.com:apache/spark.git</url>
     <tag>HEAD</tag>
   </scm>

From e15a319ccd1125584c09c38ca90b252324df6998 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 4 Jan 2019 19:23:38 -0800
Subject: [PATCH 0230/1072] [SPARK-26536][BUILD][TEST] Upgrade Mockito to
 2.23.4

## What changes were proposed in this pull request?

This PR upgrades Mockito from 1.10.19 to 2.23.4. The following changes are required.

- Replace `org.mockito.Matchers` with `org.mockito.ArgumentMatchers`
- Replace `anyObject` with `any`
- Replace `getArgumentAt` with `getArgument` and add type annotation.
- Use `isNull` matcher in case of `null` is invoked.
```scala
     saslHandler.channelInactive(null);
-    verify(handler).channelInactive(any(TransportClient.class));
+    verify(handler).channelInactive(isNull());
```

- Make and use `doReturn` wrapper to avoid [SI-4775](https://issues.scala-lang.org/browse/SI-4775)
```scala
private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
```

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #23452 from dongjoon-hyun/SPARK-26536.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/network/sasl/SparkSaslSuite.java       |  4 ++--
 .../ExternalShuffleBlockHandlerSuite.java        |  4 +++-
 .../shuffle/OneForOneBlockFetcherSuite.java      |  8 ++++----
 .../unsafe/map/AbstractBytesToBytesMapSuite.java |  4 ++--
 .../spark/ExecutorAllocationManagerSuite.scala   |  2 +-
 .../apache/spark/HeartbeatReceiverSuite.scala    | 13 ++++++-------
 .../org/apache/spark/MapOutputTrackerSuite.scala |  2 +-
 .../StandaloneDynamicAllocationSuite.scala       |  2 +-
 .../deploy/history/ApplicationCacheSuite.scala   |  2 +-
 .../deploy/history/FsHistoryProviderSuite.scala  |  4 ++--
 .../history/HistoryServerDiskManagerSuite.scala  |  6 ++++--
 .../spark/deploy/worker/DriverRunnerTest.scala   |  2 +-
 .../apache/spark/deploy/worker/WorkerSuite.scala |  2 +-
 .../apache/spark/executor/ExecutorSuite.scala    |  2 +-
 .../apache/spark/memory/MemoryManagerSuite.scala |  2 +-
 .../scala/org/apache/spark/rpc/RpcEnvSuite.scala |  2 +-
 .../spark/rpc/netty/NettyRpcHandlerSuite.scala   |  2 +-
 .../spark/scheduler/BlacklistTrackerSuite.scala  |  2 +-
 .../apache/spark/scheduler/MapStatusSuite.scala  |  3 ++-
 .../scheduler/OutputCommitCoordinatorSuite.scala | 16 +++++++++-------
 .../spark/scheduler/TaskContextSuite.scala       |  2 +-
 .../spark/scheduler/TaskResultGetterSuite.scala  |  2 +-
 .../spark/scheduler/TaskSchedulerImplSuite.scala | 10 +++++-----
 .../spark/scheduler/TaskSetBlacklistSuite.scala  |  2 +-
 .../spark/scheduler/TaskSetManagerSuite.scala    |  4 ++--
 .../spark/security/CryptoStreamUtilsSuite.scala  |  2 +-
 .../sort/BypassMergeSortShuffleWriterSuite.scala |  2 +-
 .../sort/IndexShuffleBlockResolverSuite.scala    |  2 +-
 .../shuffle/sort/SortShuffleManagerSuite.scala   |  4 +++-
 .../apache/spark/storage/BlockManagerSuite.scala |  2 +-
 .../storage/PartiallyUnrolledIteratorSuite.scala |  4 ++--
 .../ShuffleBlockFetcherIteratorSuite.scala       |  7 +++++--
 .../sql/kafka010/KafkaDelegationTokenTest.scala  |  4 +++-
 .../kinesis/KinesisCheckpointerSuite.scala       |  2 +-
 .../streaming/kinesis/KinesisReceiverSuite.scala |  5 ++---
 .../launcher/SparkSubmitOptionParserSuite.java   |  6 +++++-
 .../org/apache/spark/ml/PipelineSuite.scala      |  2 +-
 pom.xml                                          |  2 +-
 .../spark/repl/ExecutorClassLoaderSuite.scala    |  2 +-
 .../spark/deploy/k8s/PodBuilderSuite.scala       |  2 +-
 .../features/KubernetesFeaturesTestUtils.scala   |  8 ++++----
 .../spark/deploy/k8s/submit/ClientSuite.scala    |  4 +++-
 .../cluster/k8s/ExecutorPodsAllocatorSuite.scala |  4 ++--
 .../k8s/ExecutorPodsLifecycleManagerSuite.scala  |  4 ++--
 .../KubernetesClusterSchedulerBackendSuite.scala |  2 +-
 .../mesos/MesosClusterSchedulerSuite.scala       |  7 ++++---
 ...MesosCoarseGrainedSchedulerBackendSuite.scala |  9 ++++-----
 .../MesosFineGrainedSchedulerBackendSuite.scala  | 12 ++++++------
 .../spark/scheduler/cluster/mesos/Utils.scala    | 11 ++++++-----
 .../apache/spark/deploy/yarn/ClientSuite.scala   |  5 +++--
 .../yarn/YarnShuffleServiceMetricsSuite.scala    |  8 ++++----
 .../continuous/EpochCoordinatorSuite.scala       |  4 ++--
 .../test/DataStreamReaderWriterSuite.scala       |  2 +-
 .../streaming/ReceivedBlockTrackerSuite.scala    |  2 +-
 .../ExecutorAllocationManagerSuite.scala         |  4 ++--
 .../streaming/util/WriteAheadLogSuite.scala      |  4 ++--
 56 files changed, 131 insertions(+), 111 deletions(-)

diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
index 6f15718bd870..59adf9704cbf 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -347,10 +347,10 @@ public void testRpcHandlerDelegate() throws Exception {
     verify(handler).getStreamManager();
 
     saslHandler.channelInactive(null);
-    verify(handler).channelInactive(any(TransportClient.class));
+    verify(handler).channelInactive(isNull());
 
     saslHandler.exceptionCaught(null, null);
-    verify(handler).exceptionCaught(any(Throwable.class), any(TransportClient.class));
+    verify(handler).exceptionCaught(isNull(), isNull());
   }
 
   @Test
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
index 7846b71d5a8b..4cc9a16e1449 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
@@ -27,7 +27,7 @@
 import org.mockito.ArgumentCaptor;
 
 import static org.junit.Assert.*;
-import static org.mockito.Matchers.any;
+import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.Mockito.*;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
@@ -79,6 +79,8 @@ public void testRegisterExecutor() {
   @SuppressWarnings("unchecked")
   @Test
   public void testOpenShuffleBlocks() {
+    when(client.getClientId()).thenReturn("app0");
+
     RpcResponseCallback callback = mock(RpcResponseCallback.class);
 
     ManagedBuffer block0Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[3]));
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index dc947a619bf0..95460637db89 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -28,10 +28,10 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyInt;
-import static org.mockito.Matchers.anyLong;
-import static org.mockito.Matchers.eq;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyLong;
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index e5fbafc23d95..ecfebf8f8287 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -50,8 +50,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.mockito.Answers.RETURNS_SMART_NULLS;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyInt;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
 import static org.mockito.Mockito.when;
 
 
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index d0389235cb72..38f5e8c9f0ac 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark
 
 import scala.collection.mutable
 
-import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{mock, never, verify, when}
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 
diff --git a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
index de479db5fbc0..a69e589743ef 100644
--- a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
@@ -23,8 +23,7 @@ import scala.collection.mutable
 import scala.concurrent.Future
 import scala.concurrent.duration._
 
-import org.mockito.Matchers
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{mock, spy, verify, when}
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 
@@ -151,7 +150,7 @@ class HeartbeatReceiverSuite
     heartbeatReceiverClock.advance(executorTimeout)
     heartbeatReceiverRef.askSync[Boolean](ExpireDeadHosts)
     // Only the second executor should be expired as a dead host
-    verify(scheduler).executorLost(Matchers.eq(executorId2), any())
+    verify(scheduler).executorLost(meq(executorId2), any())
     val trackedExecutors = getTrackedExecutors
     assert(trackedExecutors.size === 1)
     assert(trackedExecutors.contains(executorId1))
@@ -223,10 +222,10 @@ class HeartbeatReceiverSuite
       assert(!response.reregisterBlockManager)
       // Additionally verify that the scheduler callback is called with the correct parameters
       verify(scheduler).executorHeartbeatReceived(
-        Matchers.eq(executorId),
-        Matchers.eq(Array(1L -> metrics.accumulators())),
-        Matchers.eq(blockManagerId),
-        Matchers.eq(executorUpdates))
+        meq(executorId),
+        meq(Array(1L -> metrics.accumulators())),
+        meq(blockManagerId),
+        meq(executorUpdates))
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 3e1a3d4f7306..c088da8fbf3b 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 
 import org.apache.spark.LocalSparkContext._
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index 8567dd1f0823..8c3c38dbc7ea 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.deploy
 import scala.collection.mutable
 import scala.concurrent.duration._
 
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, verify, when}
 import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
index 44f9c566a380..0402d949e904 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 
 import com.codahale.metrics.Counter
 import org.eclipse.jetty.servlet.ServletContextHandler
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 6d2e329094ae..7d6efd95fbab 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -34,7 +34,7 @@ import org.apache.hadoop.hdfs.{DFSInputStream, DistributedFileSystem}
 import org.apache.hadoop.security.AccessControlException
 import org.json4s.jackson.JsonMethods._
 import org.mockito.ArgumentMatcher
-import org.mockito.Matchers.{any, argThat}
+import org.mockito.ArgumentMatchers.{any, argThat}
 import org.mockito.Mockito.{doThrow, mock, spy, verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
@@ -933,7 +933,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     val mockedFs = spy(provider.fs)
     doThrow(new AccessControlException("Cannot read accessDenied file")).when(mockedFs).open(
       argThat(new ArgumentMatcher[Path]() {
-        override def matches(path: Any): Boolean = {
+        override def matches(path: Path): Boolean = {
           path.asInstanceOf[Path].getName.toLowerCase(Locale.ROOT) == "accessdenied"
         }
       }))
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
index 341a1e2443df..f78469e13249 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.deploy.history
 import java.io.File
 
 import org.mockito.AdditionalAnswers
-import org.mockito.Matchers.{any, anyBoolean, anyLong, eq => meq}
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.{anyBoolean, anyLong, eq => meq}
+import org.mockito.Mockito.{doAnswer, spy}
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -32,6 +32,8 @@ import org.apache.spark.util.kvstore.KVStore
 
 class HistoryServerDiskManagerSuite extends SparkFunSuite with BeforeAndAfter {
 
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
+
   private val MAX_USAGE = 3L
 
   private var testDir: File = _
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
index 52956045d598..1deac43897f9 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import scala.concurrent.duration._
 
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers.{any, anyInt}
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
index e3fe2b696aa1..e5e5b5e428c4 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -22,7 +22,7 @@ import java.util.function.Supplier
 
 import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.Answers.RETURNS_SMART_NULLS
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 32a94e60484e..a5fe2026c0f7 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -30,7 +30,7 @@ import scala.concurrent.duration._
 import scala.language.postfixOps
 
 import org.mockito.ArgumentCaptor
-import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{inOrder, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
index 85eeb5055ae0..8b35f1dfddb0 100644
--- a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 
-import org.mockito.Matchers.{any, anyLong}
+import org.mockito.ArgumentMatchers.{any, anyLong}
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 5cb2b561d6bc..558b7fa49832 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -29,7 +29,7 @@ import scala.concurrent.duration._
 import scala.language.postfixOps
 
 import com.google.common.io.Files
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, never, verify, when}
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.Eventually._
diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala
index a71d8726e706..4bc001fe8f7c 100644
--- a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala
@@ -21,7 +21,7 @@ import java.net.InetSocketAddress
 import java.nio.ByteBuffer
 
 import io.netty.channel.Channel
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 
 import org.apache.spark.SparkFunSuite
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
index 96c8404327e2..aea4c5f96bbe 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.scheduler
 
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{never, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index 2155a0f2b6c2..f41ffb7f2c0b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -21,7 +21,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream,
 
 import scala.util.Random
 
-import org.mockito.Mockito._
+import org.mockito.Mockito.mock
 import org.roaringbitmap.RoaringBitmap
 
 import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkFunSuite}
@@ -31,6 +31,7 @@ import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.storage.BlockManagerId
 
 class MapStatusSuite extends SparkFunSuite {
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
 
   test("compressSize") {
     assert(MapStatus.compressSize(0L) === 0)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index 158c9eb75f2b..a560013dba96 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -26,8 +26,8 @@ import scala.language.postfixOps
 
 import org.apache.hadoop.mapred._
 import org.apache.hadoop.mapreduce.TaskType
-import org.mockito.Matchers
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.Mockito.{doAnswer, spy, times, verify}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
@@ -71,6 +71,8 @@ import org.apache.spark.util.{ThreadUtils, Utils}
  */
 class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
 
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
+
   var outputCommitCoordinator: OutputCommitCoordinator = null
   var tempDir: File = null
   var sc: SparkContext = null
@@ -103,7 +105,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
         invoke.callRealMethod()
         mockTaskScheduler.backend.reviveOffers()
       }
-    }).when(mockTaskScheduler).submitTasks(Matchers.any())
+    }).when(mockTaskScheduler).submitTasks(any())
 
     doAnswer(new Answer[TaskSetManager]() {
       override def answer(invoke: InvocationOnMock): TaskSetManager = {
@@ -123,7 +125,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
           }
         }
       }
-    }).when(mockTaskScheduler).createTaskSetManager(Matchers.any(), Matchers.any())
+    }).when(mockTaskScheduler).createTaskSetManager(any(), any())
 
     sc.taskScheduler = mockTaskScheduler
     val dagSchedulerWithMockTaskScheduler = new DAGScheduler(sc, mockTaskScheduler)
@@ -154,7 +156,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
   test("Job should not complete if all commits are denied") {
     // Create a mock OutputCommitCoordinator that denies all attempts to commit
     doReturn(false).when(outputCommitCoordinator).handleAskPermissionToCommit(
-      Matchers.any(), Matchers.any(), Matchers.any(), Matchers.any())
+      any(), any(), any(), any())
     val rdd: RDD[Int] = sc.parallelize(Seq(1), 1)
     def resultHandler(x: Int, y: Unit): Unit = {}
     val futureAction: SimpleFutureAction[Unit] = sc.submitJob[Int, Unit, Unit](rdd,
@@ -268,8 +270,8 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(retriedStage.size === 1)
     assert(sc.dagScheduler.outputCommitCoordinator.isEmpty)
     verify(sc.env.outputCommitCoordinator, times(2))
-      .stageStart(Matchers.eq(retriedStage.head), Matchers.any())
-    verify(sc.env.outputCommitCoordinator).stageEnd(Matchers.eq(retriedStage.head))
+      .stageStart(meq(retriedStage.head), any())
+    verify(sc.env.outputCommitCoordinator).stageEnd(meq(retriedStage.head))
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index aa9c36c0aaac..3bfc97b80184 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 
 import java.util.Properties
 
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.scalatest.BeforeAndAfter
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index efb8b15cf6b4..ea1439cfebca 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -28,7 +28,7 @@ import scala.util.control.NonFatal
 
 import com.google.common.util.concurrent.MoreExecutors
 import org.mockito.ArgumentCaptor
-import org.mockito.Matchers.{any, anyLong}
+import org.mockito.ArgumentMatchers.{any, anyLong}
 import org.mockito.Mockito.{spy, times, verify}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.Eventually._
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 29172b4664e3..9c555a923d62 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -22,7 +22,7 @@ import java.nio.ByteBuffer
 import scala.collection.mutable.HashMap
 import scala.concurrent.duration._
 
-import org.mockito.Matchers.{anyInt, anyObject, anyString, eq => meq}
+import org.mockito.ArgumentMatchers.{any, anyInt, anyString, eq => meq}
 import org.mockito.Mockito.{atLeast, atMost, never, spy, times, verify, when}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
@@ -430,7 +430,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     verify(blacklist, never).updateBlacklistForSuccessfulTaskSet(
       stageId = meq(2),
       stageAttemptId = anyInt(),
-      failuresByExec = anyObject())
+      failuresByExec = any())
   }
 
   test("scheduled tasks obey node and executor blacklists") {
@@ -504,7 +504,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       WorkerOffer("executor3", "host1", 2)
     )).flatten.size === 0)
     assert(tsm.isZombie)
-    verify(tsm).abort(anyString(), anyObject())
+    verify(tsm).abort(anyString(), any())
   }
 
   test("SPARK-22148 abort timer should kick in when task is completely blacklisted & no new " +
@@ -1184,7 +1184,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(finalTsm.isZombie)
 
     // no taskset has completed all of its tasks, so no updates to the blacklist tracker yet
-    verify(blacklist, never).updateBlacklistForSuccessfulTaskSet(anyInt(), anyInt(), anyObject())
+    verify(blacklist, never).updateBlacklistForSuccessfulTaskSet(anyInt(), anyInt(), any())
 
     // finally, lets complete all the tasks.  We simulate failures in attempt 1, but everything
     // else succeeds, to make sure we get the right updates to the blacklist in all cases.
@@ -1202,7 +1202,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       // we update the blacklist for the stage attempts with all successful tasks.  Even though
       // some tasksets had failures, we still consider them all successful from a blacklisting
       // perspective, as the failures weren't from a problem w/ the tasks themselves.
-      verify(blacklist).updateBlacklistForSuccessfulTaskSet(meq(0), meq(stageAttempt), anyObject())
+      verify(blacklist).updateBlacklistForSuccessfulTaskSet(meq(0), meq(stageAttempt), any())
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
index 6e2709dbe1e8..b3bc76687ce1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.scheduler
 
-import org.mockito.Matchers.isA
+import org.mockito.ArgumentMatchers.isA
 import org.mockito.Mockito.{never, verify}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.mockito.MockitoSugar
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index f73ff67837c6..f9dfd2c456c5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -22,7 +22,7 @@ import java.util.{Properties, Random}
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import org.mockito.Matchers.{any, anyInt, anyString}
+import org.mockito.ArgumentMatchers.{any, anyInt, anyString}
 import org.mockito.Mockito.{mock, never, spy, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
@@ -1319,7 +1319,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     when(taskSetManagerSpy.addPendingTask(anyInt())).thenAnswer(
       new Answer[Unit] {
         override def answer(invocationOnMock: InvocationOnMock): Unit = {
-          val task = invocationOnMock.getArgumentAt(0, classOf[Int])
+          val task: Int = invocationOnMock.getArgument(0)
           assert(taskSetManager.taskSetBlacklistHelperOpt.get.
             isExecutorBlacklistedForTask(exec, task))
         }
diff --git a/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala b/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
index 0d3611c80b8d..e5d1bf4fde9e 100644
--- a/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
@@ -24,7 +24,7 @@ import java.nio.file.Files
 import java.util.{Arrays, Random, UUID}
 
 import com.google.common.io.ByteStreams
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 
 import org.apache.spark._
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
index 4467c3241a94..7f956c26d0ff 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
@@ -25,7 +25,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.Answers.RETURNS_SMART_NULLS
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers.{any, anyInt}
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
index 4ce379b76b55..0154d0b6ef6f 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
@@ -21,7 +21,7 @@ import java.io.{DataInputStream, File, FileInputStream, FileOutputStream}
 
 import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.Answers.RETURNS_SMART_NULLS
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleManagerSuite.scala
index f29dac965c80..e5f3aab6a6a1 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleManagerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.shuffle.sort
 
-import org.mockito.Mockito._
+import org.mockito.Mockito.{mock, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.Matchers
@@ -31,6 +31,8 @@ import org.apache.spark.serializer.{JavaSerializer, KryoSerializer, Serializer}
  */
 class SortShuffleManagerSuite extends SparkFunSuite with Matchers {
 
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
+
   import SortShuffleManager.canUseSerializedShuffle
 
   private class RuntimeExceptionAnswer extends Answer[Object] {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index e866342e4472..a7bb2a03360a 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -27,7 +27,7 @@ import scala.language.{implicitConversions, postfixOps}
 import scala.reflect.ClassTag
 
 import org.apache.commons.lang3.RandomUtils
-import org.mockito.{Matchers => mc}
+import org.mockito.{ArgumentMatchers => mc}
 import org.mockito.Mockito.{mock, times, verify, when}
 import org.scalatest._
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
diff --git a/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
index cbc903f17ad7..56860b2e5570 100644
--- a/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import org.mockito.Matchers
+import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito._
 import org.scalatest.mockito.MockitoSugar
 
@@ -45,7 +45,7 @@ class PartiallyUnrolledIteratorSuite extends SparkFunSuite with MockitoSugar {
     joinIterator.hasNext
     joinIterator.hasNext
     verify(memoryStore, times(1))
-      .releaseUnrollMemoryForThisTask(Matchers.eq(ON_HEAP), Matchers.eq(unrollSize.toLong))
+      .releaseUnrollMemoryForThisTask(meq(ON_HEAP), meq(unrollSize.toLong))
 
     // Secondly, iterate over rest iterator
     (unrollSize until unrollSize + restSize).foreach { value =>
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 01ee9ef0825f..6b83243fe496 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -24,8 +24,8 @@ import java.util.concurrent.Semaphore
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.Future
 
-import org.mockito.Matchers.{any, eq => meq}
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.Mockito.{mock, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.PrivateMethodTester
@@ -40,6 +40,9 @@ import org.apache.spark.util.Utils
 
 
 class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodTester {
+
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
+
   // Some of the tests are quite tricky because we are testing the cleanup behavior
   // in the presence of faults.
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
index 1899c65c721b..31247ab21908 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
@@ -22,7 +22,7 @@ import javax.security.auth.login.{AppConfigurationEntry, Configuration}
 
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.Token
-import org.mockito.Mockito.{doReturn, mock}
+import org.mockito.Mockito.mock
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
@@ -35,6 +35,8 @@ import org.apache.spark.deploy.security.KafkaTokenUtil.KafkaDelegationTokenIdent
 trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
   self: SparkFunSuite =>
 
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
+
   protected val tokenId = "tokenId" + ju.UUID.randomUUID().toString
   protected val tokenPassword = "tokenPassword" + ju.UUID.randomUUID().toString
 
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
index e26f4477d1d7..bd31b7dc49a6 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
@@ -24,7 +24,7 @@ import scala.concurrent.duration._
 import scala.language.postfixOps
 
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers._
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 2fadda271ea2..7531a9cc400d 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -24,9 +24,8 @@ import com.amazonaws.services.kinesis.clientlibrary.exceptions._
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.ShutdownReason
 import com.amazonaws.services.kinesis.model.Record
-import org.mockito.Matchers._
-import org.mockito.Matchers.{eq => meq}
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.{anyListOf, anyString, eq => meq}
+import org.mockito.Mockito.{never, times, verify, when}
 import org.scalatest.{BeforeAndAfter, Matchers}
 import org.scalatest.mockito.MockitoSugar
 
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitOptionParserSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitOptionParserSuite.java
index 9ff7aceb581f..4e26cf6c109c 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitOptionParserSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitOptionParserSuite.java
@@ -23,6 +23,7 @@
 
 import org.junit.Before;
 import org.junit.Test;
+import static org.mockito.ArgumentMatchers.isNull;
 import static org.mockito.Mockito.*;
 
 public class SparkSubmitOptionParserSuite extends BaseSuite {
@@ -48,14 +49,17 @@ public void testAllOptions() {
       }
     }
 
+    int nullCount = 0;
     for (String[] switchNames : parser.switches) {
       int switchCount = 0;
       for (String name : switchNames) {
         parser.parse(Arrays.asList(name));
         count++;
+        nullCount++;
         switchCount++;
         verify(parser, times(switchCount)).handle(eq(switchNames[0]), same(null));
-        verify(parser, times(count)).handle(anyString(), any(String.class));
+        verify(parser, times(nullCount)).handle(anyString(), isNull());
+        verify(parser, times(count - nullCount)).handle(anyString(), any(String.class));
         verify(parser, times(count)).handleExtraArgs(eq(Collections.emptyList()));
       }
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
index 7848eae931a0..1183cb061761 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml
 import scala.collection.JavaConverters._
 
 import org.apache.hadoop.fs.Path
-import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.when
 import org.scalatest.mockito.MockitoSugar.mock
 
diff --git a/pom.xml b/pom.xml
index 40b0e328c035..245344a82693 100644
--- a/pom.xml
+++ b/pom.xml
@@ -764,7 +764,7 @@
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-core</artifactId>
-        <version>1.10.19</version>
+        <version>2.23.4</version>
         <scope>test</scope>
       </dependency>
       <dependency>
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index ac528ecb829b..e9ed01ff2233 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -30,7 +30,7 @@ import scala.io.Source
 import scala.language.implicitConversions
 
 import com.google.common.io.Files
-import org.mockito.Matchers.anyString
+import org.mockito.ArgumentMatchers.anyString
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
index 7dde0c137716..707c823d69cf 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -21,7 +21,7 @@ import java.io.File
 import io.fabric8.kubernetes.api.model.{Config => _, _}
 import io.fabric8.kubernetes.client.KubernetesClient
 import io.fabric8.kubernetes.client.dsl.{MixedOperation, PodResource}
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, never, verify, when}
 import scala.collection.JavaConverters._
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
index 076b681be239..95de7d905954 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
@@ -20,8 +20,8 @@ import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import io.fabric8.kubernetes.api.model.{Container, HasMetadata, PodBuilder, SecretBuilder}
-import org.mockito.Matchers
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.any
+import org.mockito.Mockito.{mock, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 
@@ -37,10 +37,10 @@ object KubernetesFeaturesTestUtils {
 
     when(mockStep.getAdditionalPodSystemProperties())
       .thenReturn(Map(stepType -> stepType))
-    when(mockStep.configurePod(Matchers.any(classOf[SparkPod])))
+    when(mockStep.configurePod(any(classOf[SparkPod])))
       .thenAnswer(new Answer[SparkPod]() {
         override def answer(invocation: InvocationOnMock): SparkPod = {
-          val originalPod = invocation.getArgumentAt(0, classOf[SparkPod])
+          val originalPod: SparkPod = invocation.getArgument(0)
           val configuredPod = new PodBuilder(originalPod.pod)
             .editOrNewMetadata()
             .addToLabels(stepType, stepType)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index 1bb926cbca23..aa421be6e841 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -20,7 +20,7 @@ import io.fabric8.kubernetes.api.model._
 import io.fabric8.kubernetes.client.{KubernetesClient, Watch}
 import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations}
-import org.mockito.Mockito.{doReturn, verify, when}
+import org.mockito.Mockito.{verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.mockito.MockitoSugar._
 
@@ -31,6 +31,8 @@ import org.apache.spark.deploy.k8s.Fabric8Aliases._
 
 class ClientSuite extends SparkFunSuite with BeforeAndAfter {
 
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
+
   private val DRIVER_POD_UID = "pod-id"
   private val DRIVER_POD_API_VERSION = "v1"
   private val DRIVER_POD_KIND = "pod"
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 278a3821a6f3..55d9adc212f9 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -20,7 +20,7 @@ import io.fabric8.kubernetes.api.model.{DoneablePod, Pod, PodBuilder}
 import io.fabric8.kubernetes.client.KubernetesClient
 import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.{ArgumentMatcher, Matchers, Mock, MockitoAnnotations}
-import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{never, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
@@ -156,7 +156,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   private def executorPodAnswer(): Answer[SparkPod] = {
     new Answer[SparkPod] {
       override def answer(invocation: InvocationOnMock): SparkPod = {
-        val k8sConf = invocation.getArgumentAt(0, classOf[KubernetesExecutorConf])
+        val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
         executorPodWithId(k8sConf.executorId.toInt)
       }
     }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
index 7411f8f9d69e..b20ed4799e32 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
@@ -21,7 +21,7 @@ import io.fabric8.kubernetes.api.model.{DoneablePod, Pod}
 import io.fabric8.kubernetes.client.KubernetesClient
 import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.{Mock, MockitoAnnotations}
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, never, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
@@ -128,7 +128,7 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
   private def namedPodsAnswer(): Answer[PodResource[Pod, DoneablePod]] = {
     new Answer[PodResource[Pod, DoneablePod]] {
       override def answer(invocation: InvocationOnMock): PodResource[Pod, DoneablePod] = {
-        val podName = invocation.getArgumentAt(0, classOf[String])
+        val podName: String = invocation.getArgument(0)
         namedExecutorPods.getOrElseUpdate(
           podName, mock(classOf[PodResource[Pod, DoneablePod]]))
       }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index 6e182bed459f..8ed934d91dd7 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler.cluster.k8s
 import io.fabric8.kubernetes.client.KubernetesClient
 import org.jmock.lib.concurrent.DeterministicScheduler
 import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations}
-import org.mockito.Matchers.{eq => mockitoEq}
+import org.mockito.ArgumentMatchers.{eq => mockitoEq}
 import org.mockito.Mockito.{never, verify, when}
 import org.scalatest.BeforeAndAfter
 
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 082d4bcfdf83..7adac1964e01 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -24,7 +24,8 @@ import scala.collection.JavaConverters._
 import org.apache.mesos.Protos.{TaskState => MesosTaskState, _}
 import org.apache.mesos.Protos.Value.{Scalar, Type}
 import org.apache.mesos.SchedulerDriver
-import org.mockito.{ArgumentCaptor, Matchers}
+import org.mockito.ArgumentCaptor
+import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito._
 import org.scalatest.mockito.MockitoSugar
 
@@ -133,7 +134,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
 
     when(
       driver.launchTasks(
-        Matchers.eq(Collections.singleton(offer.getId)),
+        meq(Collections.singleton(offer.getId)),
         capture.capture())
     ).thenReturn(Status.valueOf(1))
 
@@ -156,7 +157,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     assert(mem.exists(_.getRole() == "*"))
 
     verify(driver, times(1)).launchTasks(
-      Matchers.eq(Collections.singleton(offer.getId)),
+      meq(Collections.singleton(offer.getId)),
       capture.capture()
     )
   }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index da33d85d8fb2..0cfaa0a0c9a6 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -24,9 +24,8 @@ import scala.concurrent.duration._
 
 import org.apache.mesos.{Protos, Scheduler, SchedulerDriver}
 import org.apache.mesos.Protos._
-import org.mockito.Matchers
-import org.mockito.Matchers._
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.{any, anyInt, anyLong, anyString, eq => meq}
+import org.mockito.Mockito.{times, verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.ScalaFutures
 import org.scalatest.mockito.MockitoSugar
@@ -697,9 +696,9 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
       offerId: OfferID,
       filter: Boolean = false): Unit = {
     if (filter) {
-      verify(driver, times(1)).declineOffer(Matchers.eq(offerId), anyObject[Filters])
+      verify(driver, times(1)).declineOffer(meq(offerId), any[Filters]())
     } else {
-      verify(driver, times(1)).declineOffer(Matchers.eq(offerId))
+      verify(driver, times(1)).declineOffer(meq(offerId))
     }
   }
 
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
index 1ead4b1ed7c7..c9b7e6c439c4 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
@@ -30,8 +30,8 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.mesos.{Protos, Scheduler, SchedulerDriver}
 import org.apache.mesos.Protos._
 import org.apache.mesos.Protos.Value.Scalar
-import org.mockito.{ArgumentCaptor, Matchers}
-import org.mockito.Matchers._
+import org.mockito.ArgumentCaptor
+import org.mockito.ArgumentMatchers.{any, anyLong, eq => meq}
 import org.mockito.Mockito._
 import org.scalatest.mockito.MockitoSugar
 
@@ -264,7 +264,7 @@ class MesosFineGrainedSchedulerBackendSuite
     val capture = ArgumentCaptor.forClass(classOf[Collection[TaskInfo]])
     when(
       driver.launchTasks(
-        Matchers.eq(Collections.singleton(mesosOffers.get(0).getId)),
+        meq(Collections.singleton(mesosOffers.get(0).getId)),
         capture.capture(),
         any(classOf[Filters])
       )
@@ -275,7 +275,7 @@ class MesosFineGrainedSchedulerBackendSuite
     backend.resourceOffers(driver, mesosOffers)
 
     verify(driver, times(1)).launchTasks(
-      Matchers.eq(Collections.singleton(mesosOffers.get(0).getId)),
+      meq(Collections.singleton(mesosOffers.get(0).getId)),
       capture.capture(),
       any(classOf[Filters])
     )
@@ -373,7 +373,7 @@ class MesosFineGrainedSchedulerBackendSuite
     val capture = ArgumentCaptor.forClass(classOf[Collection[TaskInfo]])
     when(
       driver.launchTasks(
-        Matchers.eq(Collections.singleton(mesosOffers.get(0).getId)),
+        meq(Collections.singleton(mesosOffers.get(0).getId)),
         capture.capture(),
         any(classOf[Filters])
       )
@@ -382,7 +382,7 @@ class MesosFineGrainedSchedulerBackendSuite
     backend.resourceOffers(driver, mesosOffers)
 
     verify(driver, times(1)).launchTasks(
-      Matchers.eq(Collections.singleton(mesosOffers.get(0).getId)),
+      meq(Collections.singleton(mesosOffers.get(0).getId)),
       capture.capture(),
       any(classOf[Filters])
     )
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
index c9f47471cd75..65e595e3cf2b 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
@@ -25,8 +25,9 @@ import org.apache.mesos.Protos._
 import org.apache.mesos.Protos.Value.{Range => MesosRange, Ranges, Scalar}
 import org.apache.mesos.SchedulerDriver
 import org.apache.mesos.protobuf.ByteString
-import org.mockito.{ArgumentCaptor, Matchers}
-import org.mockito.Mockito._
+import org.mockito.ArgumentCaptor
+import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.Mockito.{times, verify}
 
 import org.apache.spark.deploy.mesos.config.MesosSecretConfig
 
@@ -84,15 +85,15 @@ object Utils {
   def verifyTaskLaunched(driver: SchedulerDriver, offerId: String): List[TaskInfo] = {
     val captor = ArgumentCaptor.forClass(classOf[java.util.Collection[TaskInfo]])
     verify(driver, times(1)).launchTasks(
-      Matchers.eq(Collections.singleton(createOfferId(offerId))),
+      meq(Collections.singleton(createOfferId(offerId))),
       captor.capture())
     captor.getValue.asScala.toList
   }
 
   def verifyTaskNotLaunched(driver: SchedulerDriver, offerId: String): Unit = {
     verify(driver, times(0)).launchTasks(
-      Matchers.eq(Collections.singleton(createOfferId(offerId))),
-      Matchers.any(classOf[java.util.Collection[TaskInfo]]))
+      meq(Collections.singleton(createOfferId(offerId))),
+      any(classOf[java.util.Collection[TaskInfo]]))
   }
 
   def createOfferId(offerId: String): OfferID = {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index a6f57fcdb246..9acd99546c03 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -34,8 +34,8 @@ import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.YarnClientApplication
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.util.Records
-import org.mockito.Matchers.{eq => meq, _}
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.{any, anyBoolean, anyShort, eq => meq}
+import org.mockito.Mockito.{spy, verify}
 import org.scalatest.Matchers
 
 import org.apache.spark.{SparkConf, SparkFunSuite, TestUtils}
@@ -43,6 +43,7 @@ import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.util.{SparkConfWithEnv, Utils}
 
 class ClientSuite extends SparkFunSuite with Matchers {
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
 
   import Client._
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
index 952fd0b70bb7..f538cbc5b765 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.network.yarn
 import scala.collection.JavaConverters._
 
 import org.apache.hadoop.metrics2.MetricsRecordBuilder
-import org.mockito.Matchers._
+import org.mockito.ArgumentMatchers.{any, anyDouble, anyInt, anyLong}
 import org.mockito.Mockito.{mock, times, verify, when}
 import org.scalatest.Matchers
 
@@ -56,8 +56,8 @@ class YarnShuffleServiceMetricsSuite extends SparkFunSuite with Matchers {
       YarnShuffleServiceMetrics.collectMetric(builder, testname,
         metrics.getMetrics.get(testname))
 
-      verify(builder).addCounter(anyObject(), anyLong())
-      verify(builder, times(4)).addGauge(anyObject(), anyDouble())
+      verify(builder).addCounter(any(), anyLong())
+      verify(builder, times(4)).addGauge(any(), anyDouble())
     }
   }
 
@@ -69,6 +69,6 @@ class YarnShuffleServiceMetricsSuite extends SparkFunSuite with Matchers {
       metrics.getMetrics.get("registeredExecutorsSize"))
 
     // only one
-    verify(builder).addGauge(anyObject(), anyInt())
+    verify(builder).addGauge(any(), anyInt())
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
index 3c973d8ebc70..e644c16ddfea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.streaming.continuous
 
+import org.mockito.ArgumentMatchers.{any, eq => eqTo}
 import org.mockito.InOrder
-import org.mockito.Matchers.{any, eq => eqTo}
-import org.mockito.Mockito._
+import org.mockito.Mockito.{inOrder, never, verify}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.mockito.MockitoSugar
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 8212fb912ec5..4d3a54a048e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -24,7 +24,7 @@ import java.util.concurrent.TimeUnit
 import scala.concurrent.duration._
 
 import org.apache.hadoop.fs.Path
-import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito._
 import org.scalatest.BeforeAndAfter
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
index fd7e00b1de25..bdaef9494915 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -26,7 +26,7 @@ import scala.language.{implicitConversions, postfixOps}
 import scala.util.Random
 
 import org.apache.hadoop.conf.Configuration
-import org.mockito.Matchers.any
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{doThrow, reset, spy}
 import org.scalatest.{BeforeAndAfter, Matchers}
 import org.scalatest.concurrent.Eventually._
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index 8d81b582e4d3..7ec02c4782e4 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.streaming.scheduler
 
-import org.mockito.Matchers.{eq => meq}
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.{eq => meq}
+import org.mockito.Mockito.{never, reset, times, verify, when}
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
 import org.scalatest.mockito.MockitoSugar
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
index 4a2549fc0a96..c20380d8490d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
@@ -31,8 +31,8 @@ import scala.language.{implicitConversions, postfixOps}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.mockito.ArgumentCaptor
-import org.mockito.Matchers.{eq => meq, _}
-import org.mockito.Mockito._
+import org.mockito.ArgumentMatchers.{any, anyLong, eq => meq}
+import org.mockito.Mockito.{times, verify, when}
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually
 import org.scalatest.concurrent.Eventually._

From 5969b8a2edb913fe2a8e0d928010eb8f471c7b02 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 5 Jan 2019 00:55:17 -0800
Subject: [PATCH 0231/1072] [SPARK-26541][BUILD] Add
 `-Pdocker-integration-tests` to `dev/scalastyle`

## What changes were proposed in this pull request?

This PR makes `scalastyle` to check `docker-integration-tests` module additionally and fixes one error.

## How was this patch tested?

Pass the Jenkins with the updated Scalastyle.
```
========================================================================
Running Scala style checks
========================================================================
Scalastyle checks passed.
```

Closes #23459 from dongjoon-hyun/SPARK-26541.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/scalastyle                                                  | 1 +
 .../org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala      | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/dev/scalastyle b/dev/scalastyle
index 2d6ee0da1d4c..ff6dba5b536a 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -28,6 +28,7 @@ ERRORS=$(echo -e "q\n" \
         -Phive \
         -Phive-thriftserver \
         -Pspark-ganglia-lgpl \
+        -Pdocker-integration-tests \
         scalastyle test:scalastyle \
     | awk '{if($1~/error/)print}' \
 )
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 70d294d0ca65..79fdf9c2ba43 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -154,7 +154,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
     // A value with fractions from DECIMAL(3, 2) is correct:
     assert(row.getDecimal(1).compareTo(BigDecimal.valueOf(1.23)) == 0)
     // A value > Int.MaxValue from DECIMAL(10) is correct:
-    assert(row.getDecimal(2).compareTo(BigDecimal.valueOf(9999999999l)) == 0)
+    assert(row.getDecimal(2).compareTo(BigDecimal.valueOf(9999999999L)) == 0)
   }
 
 
From 1af1190beeb1ac15205a9bd06ca67e363de03221 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Sat, 5 Jan 2019 01:14:58 -0800
Subject: [PATCH 0232/1072] [SPARK-26078][SQL][FOLLOWUP] Remove useless import

## What changes were proposed in this pull request?

While backporting the patch to 2.4/2.3, I realized that the patch introduces unneeded imports (probably leftovers from intermediate changes). This PR removes the useless import.

## How was this patch tested?

NA

Closes #23451 from mgaido91/SPARK-26078_FOLLOWUP.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index c95c52f1d3a9..48c167660913 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -22,7 +22,6 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort}
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types._
 
 class SubquerySuite extends QueryTest with SharedSQLContext {
   import testImplicits._

From 980e6bcd1c016139c6918d788fb4806a60740fcf Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Sat, 5 Jan 2019 21:50:27 +0800
Subject: [PATCH 0233/1072] [SPARK-26246][SQL][FOLLOWUP] Inferring
 TimestampType from JSON

## What changes were proposed in this pull request?

Added new JSON option `inferTimestamp` (`true` by default) to control inferring of `TimestampType` from string values.

## How was this patch tested?

Add new UT to `JsonInferSchemaSuite`.

Closes #23455 from MaxGekk/json-infer-time-followup.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md                         | 3 +++
 .../org/apache/spark/sql/catalyst/json/JSONOptions.scala    | 6 ++++++
 .../apache/spark/sql/catalyst/json/JsonInferSchema.scala    | 3 ++-
 .../spark/sql/catalyst/json/JsonInferSchemaSuite.scala      | 6 ++++++
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index c4d2157de8b6..7e6a0c097d24 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -40,6 +40,9 @@ displayTitle: Spark SQL Upgrading Guide
   - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
 
   - Since Spark 3.0, the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions use java.time API for parsing and formatting dates/timestamps from/to strings by using ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html) based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, java.text.SimpleDateFormat and java.util.GregorianCalendar (hybrid calendar that supports both the Julian and Gregorian calendar systems, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html) is used for the same purpuse. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
+
+  - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they matches to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index eaff3fa7bec2..1ec9d5093a78 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -117,6 +117,12 @@ private[sql] class JSONOptions(
    */
   val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false)
 
+  /**
+   * Enables inferring of TimestampType from strings matched to the timestamp pattern
+   * defined by the timestampFormat option.
+   */
+  val inferTimestamp: Boolean = parameters.get("inferTimestamp").map(_.toBoolean).getOrElse(true)
+
   /** Sets config options on a Jackson [[JsonFactory]]. */
   def setJacksonOptions(factory: JsonFactory): Unit = {
     factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 3203e626ea40..0bf3f03cdb72 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -128,7 +128,8 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
         }
         if (options.prefersDecimal && decimalTry.isDefined) {
           decimalTry.get
-        } else if ((allCatch opt timestampFormatter.parse(field)).isDefined) {
+        } else if (options.inferTimestamp &&
+            (allCatch opt timestampFormatter.parse(field)).isDefined) {
           TimestampType
         } else {
           StringType
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
index 9307f9b47b80..9a6f4f5f9b0c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
@@ -99,4 +99,10 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
       }
     }
   }
+
+  test("disable timestamp inferring") {
+    val json = """{"a": "2019-01-04T21:11:10.123Z"}"""
+    checkType(Map("inferTimestamp" -> "true"), json, TimestampType)
+    checkType(Map("inferTimestamp" -> "false"), json, StringType)
+  }
 }

From 0037bbb71725619590f5ecbc9a5a470c4889810f Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sat, 5 Jan 2019 22:53:28 +0800
Subject: [PATCH 0234/1072] [MINOR][DOC] Fix typos in the SQL migration guide

## What changes were proposed in this pull request?

Fixed a few typos in the migration guide.

Closes #23465 from MaxGekk/fix-typos-migration-guide.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 7e6a0c097d24..0fcdd420bcfe 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -17,7 +17,7 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, the `from_json` functions supports two modes - `PERMISSIVE` and `FAILFAST`. The modes can be set via the `mode` option. The default mode became `PERMISSIVE`. In previous versions, behavior of `from_json` did not conform to either `PERMISSIVE` nor `FAILFAST`, especially in processing of malformed JSON records. For example, the JSON string `{"a" 1}` with the schema `a INT` is converted to `null` by previous versions but Spark 3.0 converts it to `Row(null)`.
 
-  - In Spark version 2.4 and earlier, the `from_json` function produces `null`s for JSON strings and JSON datasource skips the same independetly of its mode if there is no valid root JSON token in its input (` ` for example). Since Spark 3.0, such input is treated as a bad record and handled according to specified mode. For example, in the `PERMISSIVE` mode the ` ` input is converted to `Row(null, null)` if specified schema is `key STRING, value INT`.
+  - In Spark version 2.4 and earlier, the `from_json` function produces `null`s for JSON strings and JSON datasource skips the same independently of its mode if there is no valid root JSON token in its input (` ` for example). Since Spark 3.0, such input is treated as a bad record and handled according to specified mode. For example, in the `PERMISSIVE` mode the ` ` input is converted to `Row(null, null)` if specified schema is `key STRING, value INT`.
 
   - The `ADD JAR` command previously returned a result set with the single value 0. It now returns an empty result set.
 
@@ -27,21 +27,21 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but users can still distinguish them via `Dataset.show`, `Dataset.collect` etc. Since Spark 3.0, float/double -0.0 is replaced by 0.0 internally, and users can't distinguish them any more.
 
-  - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be udefined.
+  - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be undefined.
 
   - In Spark version 2.4 and earlier, partition column value is converted as null if it can't be casted to corresponding user provided schema. Since 3.0, partition column value is validated with user provided schema. An exception is thrown if the validation fails. You can disable such validation by setting `spark.sql.sources.validatePartitionColumns` to `false`.
 
   - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
 
-  - Since Spark 3.0, CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpuse with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
+  - Since Spark 3.0, CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpose with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
 
   - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
 
   - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
 
-  - Since Spark 3.0, the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions use java.time API for parsing and formatting dates/timestamps from/to strings by using ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html) based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, java.text.SimpleDateFormat and java.util.GregorianCalendar (hybrid calendar that supports both the Julian and Gregorian calendar systems, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html) is used for the same purpuse. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
+  - Since Spark 3.0, the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions use java.time API for parsing and formatting dates/timestamps from/to strings by using ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html) based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, java.text.SimpleDateFormat and java.util.GregorianCalendar (hybrid calendar that supports both the Julian and Gregorian calendar systems, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html) is used for the same purpose. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
 
-  - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they matches to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
+  - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
 
 ## Upgrading From Spark SQL 2.3 to 2.4
 

From 4ab5b5b9185f60f671d90d94732d0d784afa5f84 Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Sat, 5 Jan 2019 14:37:04 -0800
Subject: [PATCH 0235/1072] [SPARK-26545] Fix typo in EqualNullSafe's truth
 table comment

## What changes were proposed in this pull request?

The truth table comment in EqualNullSafe incorrectly marked FALSE results as UNKNOWN.

## How was this patch tested?

N/A

Closes #23461 from rednaxelafx/fix-typo.

Authored-by: Kris Mok <kris.mok@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/catalyst/expressions/predicates.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 01ecb99025ea..37fe22f4556e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -653,9 +653,9 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
   // +---------+---------+---------+---------+
   // | <=>     | TRUE    | FALSE   | UNKNOWN |
   // +---------+---------+---------+---------+
-  // | TRUE    | TRUE    | FALSE   | UNKNOWN |
-  // | FALSE   | FALSE   | TRUE    | UNKNOWN |
-  // | UNKNOWN | UNKNOWN | UNKNOWN | TRUE    |
+  // | TRUE    | TRUE    | FALSE   | FALSE   |
+  // | FALSE   | FALSE   | TRUE    | FALSE   |
+  // | UNKNOWN | FALSE   | FALSE   | TRUE    |
   // +---------+---------+---------+---------+
   override def eval(input: InternalRow): Any = {
     val input1 = left.eval(input)

From a17851cb95687963936c4d4a7eed132ee2c10677 Mon Sep 17 00:00:00 2001
From: Dave DeCaprio <daved@alum.mit.edu>
Date: Sat, 5 Jan 2019 19:20:35 -0800
Subject: [PATCH 0236/1072] [SPARK-26548][SQL] Don't hold CacheManager write
 lock while computing executedPlan

## What changes were proposed in this pull request?

Address SPARK-26548, in Spark 2.4.0, the CacheManager holds a write lock while computing the executedPlan for a cached logicalPlan.  In some cases with very large query plans this can be an expensive operation, taking minutes to run.  The entire cache is blocked during this time.  This PR changes that so the writeLock is only obtained after the executedPlan is generated, this reduces the time the lock is held to just the necessary time when the shared data structure is being updated.

gatorsmile and cloud-fan - You can committed patches in this area before.  This is a small incremental change.

## How was this patch tested?

Has been tested on a live system where the blocking was causing major issues and it is working well.
 CacheManager has no explicit unit test but is used in many places internally as part of the SharedState.

Closes #23469 from DaveDeCaprio/optimizer-unblocked.

Lead-authored-by: Dave DeCaprio <daved@alum.mit.edu>
Co-authored-by: David DeCaprio <daved@alum.mit.edu>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/execution/CacheManager.scala  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index c9929935fb8a..728fde54fe69 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -88,7 +88,7 @@ class CacheManager extends Logging {
   def cacheQuery(
       query: Dataset[_],
       tableName: Option[String] = None,
-      storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = writeLock {
+      storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = {
     val planToCache = query.logicalPlan
     if (lookupCachedData(planToCache).nonEmpty) {
       logWarning("Asked to cache already cached data.")
@@ -100,7 +100,13 @@ class CacheManager extends Logging {
         sparkSession.sessionState.executePlan(planToCache).executedPlan,
         tableName,
         planToCache)
-      cachedData.add(CachedData(planToCache, inMemoryRelation))
+      writeLock {
+        if (lookupCachedData(planToCache).nonEmpty) {
+          logWarning("Data has already been cached.")
+        } else {
+          cachedData.add(CachedData(planToCache, inMemoryRelation))
+        }
+      }
     }
   }
 

From 737f08949adecbae37bb92dfad71ae5f3a82cbee Mon Sep 17 00:00:00 2001
From: SongYadong <song.yadong1@zte.com.cn>
Date: Sun, 6 Jan 2019 08:46:20 -0600
Subject: [PATCH 0237/1072] [SPARK-26527][CORE] Let acquireUnrollMemory fail
 fast if required space exceeds memory limit

## What changes were proposed in this pull request?

When acquiring unroll memory from `StaticMemoryManager`, let it fail fast if required space exceeds memory limit, just like acquiring storage memory.
I think this may reduce some computation and memory evicting costs especially when required space(`numBytes`) is very big.

## How was this patch tested?

Existing unit tests.

Closes #23426 from SongYadong/acquireUnrollMemory_fail_fast.

Authored-by: SongYadong <song.yadong1@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/memory/StaticMemoryManager.scala    | 27 ++++++++++++-------
 .../spark/storage/MemoryStoreSuite.scala      |  4 +--
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
index 828608704274..0fd349dc5161 100644
--- a/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
@@ -80,16 +80,23 @@ private[spark] class StaticMemoryManager(
       memoryMode: MemoryMode): Boolean = synchronized {
     require(memoryMode != MemoryMode.OFF_HEAP,
       "StaticMemoryManager does not support off-heap unroll memory")
-    val currentUnrollMemory = onHeapStorageMemoryPool.memoryStore.currentUnrollMemory
-    val freeMemory = onHeapStorageMemoryPool.memoryFree
-    // When unrolling, we will use all of the existing free memory, and, if necessary,
-    // some extra space freed from evicting cached blocks. We must place a cap on the
-    // amount of memory to be evicted by unrolling, however, otherwise unrolling one
-    // big block can blow away the entire cache.
-    val maxNumBytesToFree = math.max(0, maxUnrollMemory - currentUnrollMemory - freeMemory)
-    // Keep it within the range 0 <= X <= maxNumBytesToFree
-    val numBytesToFree = math.max(0, math.min(maxNumBytesToFree, numBytes - freeMemory))
-    onHeapStorageMemoryPool.acquireMemory(blockId, numBytes, numBytesToFree)
+    if (numBytes > maxOnHeapStorageMemory) {
+      // Fail fast if the block simply won't fit
+      logInfo(s"Will not store $blockId as the required space ($numBytes bytes) exceeds our " +
+        s"memory limit ($maxOnHeapStorageMemory bytes)")
+      false
+    } else {
+      val currentUnrollMemory = onHeapStorageMemoryPool.memoryStore.currentUnrollMemory
+      val freeMemory = onHeapStorageMemoryPool.memoryFree
+      // When unrolling, we will use all of the existing free memory, and, if necessary,
+      // some extra space freed from evicting cached blocks. We must place a cap on the
+      // amount of memory to be evicted by unrolling, however, otherwise unrolling one
+      // big block can blow away the entire cache.
+      val maxNumBytesToFree = math.max(0, maxUnrollMemory - currentUnrollMemory - freeMemory)
+      // Keep it within the range 0 <= X <= maxNumBytesToFree
+      val numBytesToFree = math.max(0, math.min(maxNumBytesToFree, numBytes - freeMemory))
+      onHeapStorageMemoryPool.acquireMemory(blockId, numBytes, numBytesToFree)
+    }
   }
 
   private[memory]
diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
index 7274072e5049..baff672f5fb8 100644
--- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
@@ -291,11 +291,11 @@ class MemoryStoreSuite
     blockInfoManager.removeBlock("b3")
     putIteratorAsBytes("b3", smallIterator, ClassTag.Any)
 
-    // Unroll huge block with not enough space. This should fail and kick out b2 in the process.
+    // Unroll huge block with not enough space. This should fail.
     val result4 = putIteratorAsBytes("b4", bigIterator, ClassTag.Any)
     assert(result4.isLeft) // unroll was unsuccessful
     assert(!memoryStore.contains("b1"))
-    assert(!memoryStore.contains("b2"))
+    assert(memoryStore.contains("b2"))
     assert(memoryStore.contains("b3"))
     assert(!memoryStore.contains("b4"))
     assert(memoryStore.currentUnrollMemoryForThisTask > 0) // we returned an iterator

From 9d8e9b394bbc065a72076585a21393f42ce86cd1 Mon Sep 17 00:00:00 2001
From: Hirobe Keiichi <keiichi_hirobe@forcia.com>
Date: Sun, 6 Jan 2019 08:52:09 -0600
Subject: [PATCH 0238/1072] [SPARK-26339][SQL] Throws better exception when
 reading files that start with underscore

## What changes were proposed in this pull request?
My pull request #23288 was resolved and merged to master, but it turned out  later that my change breaks another regression test. Because we cannot reopen pull request, I create a new pull request here.
Commit 92934b4 is only change after pull request #23288.
`CheckFileExist` was avoided at 239cfa4 after discussing #23288 (comment).
But, that change turned out to be wrong because we should not check if argument checkFileExist is false.

Test https://github.com/apache/spark/blob/27e42c1de502da80fa3e22bb69de47fb00158174/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala#L2555
failed when we avoided checkFileExist, but now successed after commit 92934b4 .

## How was this patch tested?
Both of below tests were passed.
```
testOnly org.apache.spark.sql.execution.datasources.csv.CSVSuite
testOnly org.apache.spark.sql.SQLQuerySuite
```

Closes #23446 from KeiichiHirobe/SPARK-26339.

Authored-by: Hirobe Keiichi <keiichi_hirobe@forcia.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../execution/datasources/DataSource.scala    | 19 +++++++++++++++++-
 .../src/test/resources/test-data/_cars.csv    |  7 +++++++
 .../execution/datasources/csv/CSVSuite.scala  | 20 +++++++++++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/resources/test-data/_cars.csv

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index fefff68c4ba8..2a438a5cbf95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -543,7 +543,7 @@ case class DataSource(
       checkFilesExist: Boolean): Seq[Path] = {
     val allPaths = caseInsensitiveOptions.get("path") ++ paths
     val hadoopConf = sparkSession.sessionState.newHadoopConf()
-    allPaths.flatMap { path =>
+    val allGlobPath = allPaths.flatMap { path =>
       val hdfsPath = new Path(path)
       val fs = hdfsPath.getFileSystem(hadoopConf)
       val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
@@ -560,6 +560,23 @@ case class DataSource(
       }
       globPath
     }.toSeq
+
+    if (checkFilesExist) {
+      val (filteredOut, filteredIn) = allGlobPath.partition { path =>
+        InMemoryFileIndex.shouldFilterOut(path.getName)
+      }
+      if (filteredOut.nonEmpty) {
+        if (filteredIn.isEmpty) {
+          throw new AnalysisException(
+            s"All paths were ignored:\n${filteredOut.mkString("\n  ")}")
+        } else {
+          logDebug(
+            s"Some paths were ignored:\n${filteredOut.mkString("\n  ")}")
+        }
+      }
+    }
+
+    allGlobPath
   }
 }
 
diff --git a/sql/core/src/test/resources/test-data/_cars.csv b/sql/core/src/test/resources/test-data/_cars.csv
new file mode 100644
index 000000000000..40ded573ade5
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/_cars.csv
@@ -0,0 +1,7 @@
+
+year,make,model,comment,blank
+"2012","Tesla","S","No comment",
+
+1997,Ford,E350,"Go get one now they are going fast",
+2015,Chevy,Volt
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index d9e5d7af1967..fb1bedfaa32c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -53,6 +53,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
   private val carsEmptyValueFile = "test-data/cars-empty-value.csv"
   private val carsBlankColName = "test-data/cars-blank-column-name.csv"
   private val carsCrlf = "test-data/cars-crlf.csv"
+  private val carsFilteredOutFile = "test-data/_cars.csv"
   private val emptyFile = "test-data/empty.csv"
   private val commentsFile = "test-data/comments.csv"
   private val disableCommentsFile = "test-data/disable_comments.csv"
@@ -346,6 +347,25 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     assert(result.schema.fieldNames.size === 1)
   }
 
+  test("SPARK-26339 Not throw an exception if some of specified paths are filtered in") {
+    val cars = spark
+      .read
+      .option("header", "false")
+      .csv(testFile(carsFile), testFile(carsFilteredOutFile))
+
+    verifyCars(cars, withHeader = false, checkTypes = false)
+  }
+
+  test("SPARK-26339 Throw an exception only if all of the specified paths are filtered out") {
+    val e = intercept[AnalysisException] {
+      val cars = spark
+        .read
+        .option("header", "false")
+        .csv(testFile(carsFilteredOutFile))
+    }.getMessage
+    assert(e.contains("All paths were ignored:"))
+  }
+
   test("DDL test with empty file") {
     withView("carsTable") {
       spark.sql(

From b305d71625380f6fcd7b675d423222eca1840c2a Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sun, 6 Jan 2019 17:36:06 -0800
Subject: [PATCH 0239/1072] [SPARK-26547][SQL] Remove duplicate toHiveString
 from HiveUtils

## What changes were proposed in this pull request?

The `toHiveString()` and `toHiveStructString` methods were removed from `HiveUtils` because they have been already implemented in `HiveResult`. One related test was moved to `HiveResultSuite`.

## How was this patch tested?

By tests from `hive-thriftserver`.

Closes #23466 from MaxGekk/dedup-hive-result-string.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/execution/HiveResult.scala      | 115 ++++++++++--------
 .../spark/sql/execution/HiveResultSuite.scala |  30 +++++
 .../SparkExecuteStatementOperation.scala      |   4 +-
 .../org/apache/spark/sql/hive/HiveUtils.scala |  46 -------
 .../spark/sql/hive/HiveUtilsSuite.scala       |  11 +-
 5 files changed, 96 insertions(+), 110 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 22d3ca958a21..c90b254a6d12 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -56,61 +56,70 @@ object HiveResult {
       result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t"))
   }
 
-  /** Formats a datum (based on the given data type) and returns the string representation. */
-  private def toHiveString(a: (Any, DataType)): String = {
-    val primitiveTypes = Seq(StringType, IntegerType, LongType, DoubleType, FloatType,
-      BooleanType, ByteType, ShortType, DateType, TimestampType, BinaryType)
-    val timeZone = DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)
-
-    def formatDecimal(d: java.math.BigDecimal): String = {
-      if (d.compareTo(java.math.BigDecimal.ZERO) == 0) {
-        java.math.BigDecimal.ZERO.toPlainString
-      } else {
-        d.stripTrailingZeros().toPlainString
-      }
+  private def formatDecimal(d: java.math.BigDecimal): String = {
+    if (d.compareTo(java.math.BigDecimal.ZERO) == 0) {
+      java.math.BigDecimal.ZERO.toPlainString
+    } else {
+      d.stripTrailingZeros().toPlainString // Hive strips trailing zeros
     }
+  }
 
-    /** Hive outputs fields of structs slightly differently than top level attributes. */
-    def toHiveStructString(a: (Any, DataType)): String = a match {
-      case (struct: Row, StructType(fields)) =>
-        struct.toSeq.zip(fields).map {
-          case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
-        }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ, _)) =>
-        seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-      case (map: Map[_, _], MapType(kType, vType, _)) =>
-        map.map {
-          case (key, value) =>
-            toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-        }.toSeq.sorted.mkString("{", ",", "}")
-      case (null, _) => "null"
-      case (s: String, StringType) => "\"" + s + "\""
-      case (decimal, DecimalType()) => decimal.toString
-      case (interval, CalendarIntervalType) => interval.toString
-      case (other, tpe) if primitiveTypes contains tpe => other.toString
-    }
+  private val primitiveTypes = Seq(
+    StringType,
+    IntegerType,
+    LongType,
+    DoubleType,
+    FloatType,
+    BooleanType,
+    ByteType,
+    ShortType,
+    DateType,
+    TimestampType,
+    BinaryType)
 
-    a match {
-      case (struct: Row, StructType(fields)) =>
-        struct.toSeq.zip(fields).map {
-          case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
-        }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ, _)) =>
-        seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-      case (map: Map[_, _], MapType(kType, vType, _)) =>
-        map.map {
-          case (key, value) =>
-            toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-        }.toSeq.sorted.mkString("{", ",", "}")
-      case (null, _) => "NULL"
-      case (d: Date, DateType) =>
-        DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
-      case (t: Timestamp, TimestampType) =>
-        DateTimeUtils.timestampToString(DateTimeUtils.fromJavaTimestamp(t), timeZone)
-      case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
-      case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
-      case (interval, CalendarIntervalType) => interval.toString
-      case (other, tpe) if primitiveTypes.contains(tpe) => other.toString
-    }
+  /** Hive outputs fields of structs slightly differently than top level attributes. */
+  private def toHiveStructString(a: (Any, DataType)): String = a match {
+    case (struct: Row, StructType(fields)) =>
+      struct.toSeq.zip(fields).map {
+        case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
+      }.mkString("{", ",", "}")
+    case (seq: Seq[_], ArrayType(typ, _)) =>
+      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
+    case (map: Map[_, _], MapType(kType, vType, _)) =>
+      map.map {
+        case (key, value) =>
+          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
+      }.toSeq.sorted.mkString("{", ",", "}")
+    case (null, _) => "null"
+    case (s: String, StringType) => "\"" + s + "\""
+    case (decimal, DecimalType()) => decimal.toString
+    case (interval, CalendarIntervalType) => interval.toString
+    case (other, tpe) if primitiveTypes contains tpe => other.toString
+  }
+
+  /** Formats a datum (based on the given data type) and returns the string representation. */
+  def toHiveString(a: (Any, DataType)): String = a match {
+    case (struct: Row, StructType(fields)) =>
+      struct.toSeq.zip(fields).map {
+        case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
+      }.mkString("{", ",", "}")
+    case (seq: Seq[_], ArrayType(typ, _)) =>
+      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
+    case (map: Map[_, _], MapType(kType, vType, _)) =>
+      map.map {
+        case (key, value) =>
+          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
+      }.toSeq.sorted.mkString("{", ",", "}")
+    case (null, _) => "NULL"
+    case (d: Date, DateType) =>
+      DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
+    case (t: Timestamp, TimestampType) =>
+      val timeZone = DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)
+      DateTimeUtils.timestampToString(DateTimeUtils.fromJavaTimestamp(t), timeZone)
+    case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
+    case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
+    case (interval, CalendarIntervalType) => interval.toString
+    case (other, _ : UserDefinedType[_]) => other.toString
+    case (other, tpe) if primitiveTypes.contains(tpe) => other.toString
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
new file mode 100644
index 000000000000..4205b3f79a97
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT}
+
+class HiveResultSuite extends SparkFunSuite {
+
+  test("toHiveString correctly handles UDTs") {
+    val point = new ExamplePoint(50.0, 50.0)
+    val tpe = new ExamplePointUDT()
+    assert(HiveResult.toHiveString((point, tpe)) === "(50.0, 50.0)")
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 3cfc81b8a957..e68c6011c139 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -34,8 +34,8 @@ import org.apache.hive.service.cli.session.HiveSession
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLContext}
+import org.apache.spark.sql.execution.HiveResult
 import org.apache.spark.sql.execution.command.SetCommand
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{Utils => SparkUtils}
@@ -103,7 +103,7 @@ private[hive] class SparkExecuteStatementOperation(
       case BinaryType =>
         to += from.getAs[Array[Byte]](ordinal)
       case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] =>
-        val hiveString = HiveUtils.toHiveString((from.get(ordinal), dataTypes(ordinal)))
+        val hiveString = HiveResult.toHiveString((from.get(ordinal), dataTypes(ordinal)))
         to += hiveString
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index b60d4c71f594..597eef129f63 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -444,52 +444,6 @@ private[spark] object HiveUtils extends Logging {
     propMap.toMap
   }
 
-  protected val primitiveTypes =
-    Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
-      ShortType, DateType, TimestampType, BinaryType)
-
-  protected[sql] def toHiveString(a: (Any, DataType)): String = a match {
-    case (struct: Row, StructType(fields)) =>
-      struct.toSeq.zip(fields).map {
-        case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
-      }.mkString("{", ",", "}")
-    case (seq: Seq[_], ArrayType(typ, _)) =>
-      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-    case (map: Map[_, _], MapType(kType, vType, _)) =>
-      map.map {
-        case (key, value) =>
-          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-      }.toSeq.sorted.mkString("{", ",", "}")
-    case (null, _) => "NULL"
-    case (d: Int, DateType) => new DateWritable(d).toString
-    case (t: Timestamp, TimestampType) => new TimestampWritable(t).toString
-    case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
-    case (decimal: java.math.BigDecimal, DecimalType()) =>
-      // Hive strips trailing zeros so use its toString
-      HiveDecimal.create(decimal).toString
-    case (other, _ : UserDefinedType[_]) => other.toString
-    case (other, tpe) if primitiveTypes contains tpe => other.toString
-  }
-
-  /** Hive outputs fields of structs slightly differently than top level attributes. */
-  protected def toHiveStructString(a: (Any, DataType)): String = a match {
-    case (struct: Row, StructType(fields)) =>
-      struct.toSeq.zip(fields).map {
-        case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
-      }.mkString("{", ",", "}")
-    case (seq: Seq[_], ArrayType(typ, _)) =>
-      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-    case (map: Map[_, _], MapType(kType, vType, _)) =>
-      map.map {
-        case (key, value) =>
-          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-      }.toSeq.sorted.mkString("{", ",", "}")
-    case (null, _) => "null"
-    case (s: String, StringType) => "\"" + s + "\""
-    case (decimal, DecimalType()) => decimal.toString
-    case (other, tpe) if primitiveTypes contains tpe => other.toString
-  }
-
   /**
    * Infers the schema for Hive serde tables and returns the CatalogTable with the inferred schema.
    * When the tables are data source tables or the schema already exists, returns the original
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index f2b75e4b23f0..303dd70760a1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -17,16 +17,15 @@
 
 package org.apache.spark.sql.hive
 
-import java.net.URL
-
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.HiveResult
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SQLTestUtils}
-import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader}
+import org.apache.spark.util.ChildFirstURLClassLoader
 
 class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
 
@@ -62,10 +61,4 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton
       Thread.currentThread().setContextClassLoader(contextClassLoader)
     }
   }
-
-  test("toHiveString correctly handles UDTs") {
-    val point = new ExamplePoint(50.0, 50.0)
-    val tpe = new ExamplePointUDT()
-    assert(HiveUtils.toHiveString((point, tpe)) === "(50.0, 50.0)")
-  }
 }

From fe039faddf13c6a30f7aea69324aa4d4bb84c632 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 6 Jan 2019 19:59:31 -0800
Subject: [PATCH 0240/1072] [SPARK-26554][BUILD] Update `release-util.sh` to
 avoid GitBox fake 200 headers

## What changes were proposed in this pull request?

Unlike the previous Apache Git repository, new GitBox repository returns a fake HTTP 200 header instead of `404 Not Found` header. This makes release scripts out of order. This PR aims to fix it to handle the html body message instead of the fake HTTP headers. This is a release blocker.

```bash
$ curl -s --head --fail "https://gitbox.apache.org/repos/asf?p=spark.git;a=commit;h=v3.0.0"
HTTP/1.1 200 OK
Date: Sun, 06 Jan 2019 22:42:39 GMT
Server: Apache/2.4.18 (Ubuntu)
Vary: Accept-Encoding
Access-Control-Allow-Origin: *
Access-Control-Allow-Methods: POST, GET, OPTIONS
Access-Control-Allow-Headers: X-PINGOTHER
Access-Control-Max-Age: 1728000
Content-Type: text/html; charset=utf-8
```

**BEFORE**
```bash
$ ./do-release-docker.sh -d /tmp/test -n
Branch [branch-2.4]:
Current branch version is 2.4.1-SNAPSHOT.
Release [2.4.1]:
RC # [1]:
v2.4.1-rc1 already exists. Continue anyway [y/n]?
```

**AFTER**
```bash
$ ./do-release-docker.sh -d /tmp/test -n
Branch [branch-2.4]:
Current branch version is 2.4.1-SNAPSHOT.
Release [2.4.1]:
RC # [1]:
This is a dry run. Please confirm the ref that will be built for testing.
Ref [v2.4.1-rc1]:
```

## How was this patch tested?

Manual.

Closes #23476 from dongjoon-hyun/SPARK-26554.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/create-release/release-util.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dev/create-release/release-util.sh b/dev/create-release/release-util.sh
index c925de9be52d..9a340528b506 100755
--- a/dev/create-release/release-util.sh
+++ b/dev/create-release/release-util.sh
@@ -73,7 +73,9 @@ function fcreate_secure {
 }
 
 function check_for_tag {
-  curl -s --head --fail "$ASF_REPO_WEBUI;a=commit;h=$1" >/dev/null
+  # Check HTML body messages instead of header status codes. Apache GitBox returns
+  # a header with `200 OK` status code for both existing and non-existing tag URLs
+  ! curl -s --fail "$ASF_REPO_WEBUI;a=commit;h=$1" | grep '404 Not Found' > /dev/null
 }
 
 function get_release_info {

From 61133cb8a69e7814c3450e84ce9cc9226d7e8ad8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 6 Jan 2019 21:00:10 -0800
Subject: [PATCH 0241/1072] [SPARK-26536][BUILD][FOLLOWUP][TEST-MAVEN] Make
 StreamingReadSupport public for maven testing

## What changes were proposed in this pull request?

`StreamingReadSupport` is designed to be a `package` interface. Mockito seems to complain during `Maven` testing. This doesn't fail in `sbt` and IntelliJ. For mock-testing purpose, this PR makes it `public` interface and adds explicit comments like `public interface ReadSupport`

```scala
EpochCoordinatorSuite:
*** RUN ABORTED ***
  java.lang.IllegalAccessError: tried to
access class org.apache.spark.sql.sources.v2.reader.streaming.StreamingReadSupport
from class org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport$MockitoMock$58628338
  at org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport$MockitoMock$58628338.<clinit>(Unknown Source)
  at sun.reflect.GeneratedSerializationConstructorAccessor632.newInstance(Unknown Source)
  at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
  at org.objenesis.instantiator.sun.SunReflectionFactoryInstantiator.newInstance(SunReflectionFactoryInstantiator.java:48)
  at org.objenesis.ObjenesisBase.newInstance(ObjenesisBase.java:73)
  at org.mockito.internal.creation.instance.ObjenesisInstantiator.newInstance(ObjenesisInstantiator.java:19)
  at org.mockito.internal.creation.bytebuddy.SubclassByteBuddyMockMaker.createMock(SubclassByteBuddyMockMaker.java:47)
  at org.mockito.internal.creation.bytebuddy.ByteBuddyMockMaker.createMock(ByteBuddyMockMaker.java:25)
  at org.mockito.internal.util.MockUtil.createMock(MockUtil.java:35)
  at org.mockito.internal.MockitoCore.mock(MockitoCore.java:69)
```

## How was this patch tested?

Pass the Jenkins with Maven build

Closes #23463 from dongjoon-hyun/SPARK-26536-2.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../v2/reader/streaming/StreamingReadSupport.java     | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
index 84872d1ebc26..bd39fc858d3b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
@@ -17,14 +17,17 @@
 
 package org.apache.spark.sql.sources.v2.reader.streaming;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.spark.sql.sources.v2.reader.ReadSupport;
 
 /**
- * A base interface for streaming read support. This is package private and is invisible to data
- * sources. Data sources should implement concrete streaming read support interfaces:
- * {@link MicroBatchReadSupport} or {@link ContinuousReadSupport}.
+ * A base interface for streaming read support. Data sources should implement concrete streaming
+ * read support interfaces: {@link MicroBatchReadSupport} or {@link ContinuousReadSupport}.
+ * This is exposed for a testing purpose.
  */
-interface StreamingReadSupport extends ReadSupport {
+@VisibleForTesting
+public interface StreamingReadSupport extends ReadSupport {
 
   /**
    * Returns the initial offset for a streaming query to start reading from. Note that the

From 468d25ec7419b4c55955ead877232aae5654260e Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 6 Jan 2019 22:45:18 -0800
Subject: [PATCH 0242/1072] [MINOR][BUILD] Fix script name in `release-tag.sh`
 usage message

## What changes were proposed in this pull request?

This PR fixes the old script name in `release-tag.sh`.

    $ ./release-tag.sh --help | head -n1
    usage: tag-release.sh

## How was this patch tested?

Manual.

    $ ./release-tag.sh --help | head -n1
    usage: release-tag.sh

Closes #23477 from dongjoon-hyun/SPARK-RELEASE-TAG.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/create-release/release-tag.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh
index 010082d960a2..8024440759eb 100755
--- a/dev/create-release/release-tag.sh
+++ b/dev/create-release/release-tag.sh
@@ -21,8 +21,9 @@ SELF=$(cd $(dirname $0) && pwd)
 . "$SELF/release-util.sh"
 
 function exit_with_usage {
+  local NAME=$(basename $0)
   cat << EOF
-usage: tag-release.sh
+usage: $NAME
 Tags a Spark release on a particular branch.
 
 Inputs are specified with the following environment variables:

From a927c764c1eee066efc1c2c713dfee411de79245 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 7 Jan 2019 18:36:52 +0800
Subject: [PATCH 0243/1072] [SPARK-26559][ML][PYSPARK] ML image can't work with
 numpy versions prior to 1.9

## What changes were proposed in this pull request?

Due to [API change](https://github.com/numpy/numpy/pull/4257/files#diff-c39521d89f7e61d6c0c445d93b62f7dc) at 1.9, PySpark image doesn't work with numpy version prior to 1.9.

When running image test with numpy version prior to 1.9, we can see error:
```
test_read_images (pyspark.ml.tests.test_image.ImageReaderTest) ... ERROR
test_read_images_multiple_times (pyspark.ml.tests.test_image.ImageReaderTest2) ... ok

======================================================================
ERROR: test_read_images (pyspark.ml.tests.test_image.ImageReaderTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/Users/viirya/docker_tmp/repos/spark-1/python/pyspark/ml/tests/test_image.py", line 36, in test_read_images
    self.assertEqual(ImageSchema.toImage(array, origin=first_row[0]), first_row)
  File "/Users/viirya/docker_tmp/repos/spark-1/python/pyspark/ml/image.py", line 193, in toImage
    data = bytearray(array.astype(dtype=np.uint8).ravel().tobytes())
AttributeError: 'numpy.ndarray' object has no attribute 'tobytes'

----------------------------------------------------------------------
Ran 2 tests in 29.040s

FAILED (errors=1)
```

## How was this patch tested?

Manually test with numpy version prior and after 1.9.

Closes #23484 from viirya/fix-pyspark-image.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/ml/image.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/image.py b/python/pyspark/ml/image.py
index edb90a357854..a1aacea88e42 100644
--- a/python/pyspark/ml/image.py
+++ b/python/pyspark/ml/image.py
@@ -28,6 +28,7 @@
 import warnings
 
 import numpy as np
+from distutils.version import LooseVersion
 
 from pyspark import SparkContext
 from pyspark.sql.types import Row, _create_row, _parse_datatype_json_string
@@ -190,7 +191,11 @@ def toImage(self, array, origin=""):
         # Running `bytearray(numpy.array([1]))` fails in specific Python versions
         # with a specific Numpy version, for example in Python 3.6.0 and NumPy 1.13.3.
         # Here, it avoids it by converting it to bytes.
-        data = bytearray(array.astype(dtype=np.uint8).ravel().tobytes())
+        if LooseVersion(np.__version__) >= LooseVersion('1.9'):
+            data = bytearray(array.astype(dtype=np.uint8).ravel().tobytes())
+        else:
+            # Numpy prior to 1.9 don't have `tobytes` method.
+            data = bytearray(array.astype(dtype=np.uint8).ravel())
 
         # Creating new Row with _create_row(), because Row(name = value, ... )
         # orders fields by name, which conflicts with expected schema order

From 868e02533d76d45bff4200d07658105b6004cf46 Mon Sep 17 00:00:00 2001
From: ayudovin <a.yudovin6695@gmail.com>
Date: Mon, 7 Jan 2019 08:58:33 -0600
Subject: [PATCH 0244/1072] [SPARK-26383][CORE] NPE when use
 DataFrameReader.jdbc with wrong URL

### What changes were proposed in this pull request?
When passing wrong url to jdbc then It would throw IllegalArgumentException instead of NPE.
### How was this patch tested?
Adding test case to Existing tests in JDBCSuite

Closes #23464 from ayudovin/fixing-npe.

Authored-by: ayudovin <a.yudovin6695@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/execution/datasources/jdbc/JdbcUtils.scala  |  7 ++++++-
 .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 13 +++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 922bef284c98..86a27b5afc25 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -48,6 +48,7 @@ object JdbcUtils extends Logging {
    * Returns a factory for creating connections to the given JDBC URL.
    *
    * @param options - JDBC options that contains url, table and other information.
+   * @throws IllegalArgumentException if the driver could not open a JDBC connection.
    */
   def createConnectionFactory(options: JDBCOptions): () => Connection = {
     val driverClass: String = options.driverClass
@@ -60,7 +61,11 @@ object JdbcUtils extends Logging {
         throw new IllegalStateException(
           s"Did not find registered driver with class $driverClass")
       }
-      driver.connect(options.url, options.asConnectionProperties)
+      val connection: Connection = driver.connect(options.url, options.asConnectionProperties)
+      require(connection != null,
+        s"The driver could not open a JDBC connection. Check the URL: ${options.url}")
+
+      connection
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index e4641631e607..aefa5da94481 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -1507,4 +1507,17 @@ class JDBCSuite extends QueryTest
       checkNotPushdown(sql("SELECT name, theid FROM predicateOption WHERE theid = 1")),
       Row("fred", 1) :: Nil)
   }
+
+  test("SPARK-26383 throw IllegalArgumentException if wrong kind of driver to the given url") {
+    val e = intercept[IllegalArgumentException] {
+      val opts = Map(
+        "url" -> "jdbc:mysql://localhost/db",
+        "dbtable" -> "table",
+        "driver" -> "org.postgresql.Driver"
+      )
+      spark.read.format("jdbc").options(opts).load
+    }.getMessage
+    assert(e.contains("The driver could not open a JDBC connection. " +
+      "Check the URL: jdbc:mysql://localhost/db"))
+  }
 }

From 71183b283343a99c6fa99a41268dae412598067f Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Mon, 7 Jan 2019 09:15:50 -0800
Subject: [PATCH 0245/1072] [SPARK-24489][ML] Check for invalid input type of
 weight data in ml.PowerIterationClustering

## What changes were proposed in this pull request?
The test case will result the following failure. currently in ml.PIC, there is no check for the data type of weight column.
 ```
 test("invalid input types for weight") {
    val invalidWeightData = spark.createDataFrame(Seq(
      (0L, 1L, "a"),
      (2L, 3L, "b")
    )).toDF("src", "dst", "weight")

    val pic = new PowerIterationClustering()
      .setWeightCol("weight")

    val result = pic.assignClusters(invalidWeightData)
  }
```
```
Job aborted due to stage failure: Task 0 in stage 8077.0 failed 1 times, most recent failure: Lost task 0.0 in stage 8077.0 (TID 882, localhost, executor driver): scala.MatchError: [0,1,null] (of class org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema)
	at org.apache.spark.ml.clustering.PowerIterationClustering$$anonfun$3.apply(PowerIterationClustering.scala:178)
	at org.apache.spark.ml.clustering.PowerIterationClustering$$anonfun$3.apply(PowerIterationClustering.scala:178)
	at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
	at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
	at scala.collection.Iterator$class.foreach(Iterator.scala:893)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
	at org.apache.spark.graphx.EdgeRDD$$anonfun$1.apply(EdgeRDD.scala:107)
	at org.apache.spark.graphx.EdgeRDD$$anonfun$1.apply(EdgeRDD.scala:105)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$26.apply(RDD.scala:847)
```
In this PR, added check types for weight column.
## How was this patch tested?
UT added

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #21509 from shahidki31/testCasePic.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Holden Karau <holden@pigscanfly.ca>
---
 .../ml/clustering/PowerIterationClustering.scala  |  1 +
 .../PowerIterationClusteringSuite.scala           | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
index d9a330f67e8d..149e99d2f195 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
@@ -166,6 +166,7 @@ class PowerIterationClustering private[clustering] (
     val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) {
       lit(1.0)
     } else {
+      SchemaUtils.checkNumericType(dataset.schema, $(weightCol))
       col($(weightCol)).cast(DoubleType)
     }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
index 55b460f1a452..0ba3ffabb75d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
@@ -145,6 +145,21 @@ class PowerIterationClusteringSuite extends SparkFunSuite
     assert(msg.contains("Similarity must be nonnegative"))
   }
 
+  test("check for invalid input types of weight") {
+    val invalidWeightData = spark.createDataFrame(Seq(
+      (0L, 1L, "a"),
+      (2L, 3L, "b")
+    )).toDF("src", "dst", "weight")
+
+    val msg = intercept[IllegalArgumentException] {
+      new PowerIterationClustering()
+        .setWeightCol("weight")
+        .assignClusters(invalidWeightData)
+    }.getMessage
+    assert(msg.contains("requirement failed: Column weight must be of type numeric" +
+      " but was actually of type string."))
+  }
+
   test("test default weight") {
     val dataWithoutWeight = data.sample(0.5, 1L).select('src, 'dst)
 

From 669e8a155987995a1a5d49a96b88c05f39e41723 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 7 Jan 2019 14:40:08 -0600
Subject: [PATCH 0246/1072] [SPARK-25689][YARN] Make driver, not AM, manage
 delegation tokens.

This change modifies the behavior of the delegation token code when running
on YARN, so that the driver controls the renewal, in both client and cluster
mode. For that, a few different things were changed:

* The AM code only runs code that needs DTs when DTs are available.

In a way, this restores the AM behavior to what it was pre-SPARK-23361, but
keeping the fix added in that bug. Basically, all the AM code is run in a
"UGI.doAs()" block; but code that needs to talk to HDFS (basically the
distributed cache handling code) was delayed to the point where the driver
is up and running, and thus when valid delegation tokens are available.

* SparkSubmit / ApplicationMaster now handle user login, not the token manager.

The previous AM code was relying on the token manager to keep the user
logged in when keytabs are used. This required some odd APIs in the token
manager and the AM so that the right UGI was exposed and used in the right
places.

After this change, the logged in user is handled separately from the token
manager, so the API was cleaned up, and, as explained above, the whole AM
runs under the logged in user, which also helps with simplifying some more code.

* Distributed cache configs are sent separately to the AM.

Because of the delayed initialization of the cached resources in the AM, it
became easier to write the cache config to a separate properties file instead
of bundling it with the rest of the Spark config. This also avoids having
to modify the SparkConf to hide things from the UI.

* Finally, the AM doesn't manage the token manager anymore.

The above changes allow the token manager to be completely handled by the
driver's scheduler backend code also in YARN mode (whether client or cluster),
making it similar to other RMs. To maintain the fix added in SPARK-23361 also
in client mode, the AM now sends an extra message to the driver on initialization
to fetch delegation tokens; and although it might not really be needed, the
driver also keeps the running AM updated when new tokens are created.

Tested in a kerberized cluster with the same tests used to validate SPARK-23361,
in both client and cluster mode. Also tested with a non-kerberized cluster.

Closes #23338 from vanzin/SPARK-25689.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../HadoopDelegationTokenManager.scala        | 110 ++++++--------
 .../HiveDelegationTokenProvider.scala         |  16 ++-
 .../cluster/CoarseGrainedClusterMessage.scala |   3 +
 .../CoarseGrainedSchedulerBackend.scala       |  40 ++++--
 .../HadoopDelegationTokenManagerSuite.scala   |   8 +-
 .../KerberosConfDriverFeatureStep.scala       |   2 +-
 .../KubernetesClusterSchedulerBackend.scala   |   7 +-
 .../MesosCoarseGrainedSchedulerBackend.scala  |   7 +-
 .../spark/deploy/yarn/ApplicationMaster.scala | 135 +++++++++---------
 .../yarn/ApplicationMasterArguments.scala     |   5 +
 .../org/apache/spark/deploy/yarn/Client.scala | 100 +++++++------
 .../spark/deploy/yarn/YarnRMClient.scala      |   8 +-
 .../org/apache/spark/deploy/yarn/config.scala |  10 --
 .../YARNHadoopDelegationTokenManager.scala    |   7 +-
 .../cluster/YarnClientSchedulerBackend.scala  |   6 +
 .../cluster/YarnSchedulerBackend.scala        |  17 ++-
 ...ARNHadoopDelegationTokenManagerSuite.scala |   2 +-
 17 files changed, 246 insertions(+), 237 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index f7e3ddecee09..d97857a39fc2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -21,7 +21,6 @@ import java.io.File
 import java.net.URI
 import java.security.PrivilegedExceptionAction
 import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
-import java.util.concurrent.atomic.AtomicReference
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.FileSystem
@@ -39,32 +38,24 @@ import org.apache.spark.util.ThreadUtils
 /**
  * Manager for delegation tokens in a Spark application.
  *
- * This manager has two modes of operation:
- *
- * 1.  When configured with a principal and a keytab, it will make sure long-running apps can run
- * without interruption while accessing secured services. It periodically logs in to the KDC with
- * user-provided credentials, and contacts all the configured secure services to obtain delegation
- * tokens to be distributed to the rest of the application.
- *
- * Because the Hadoop UGI API does not expose the TTL of the TGT, a configuration controls how often
- * to check that a relogin is necessary. This is done reasonably often since the check is a no-op
- * when the relogin is not yet needed. The check period can be overridden in the configuration.
+ * When configured with a principal and a keytab, this manager will make sure long-running apps can
+ * run without interruption while accessing secured services. It periodically logs in to the KDC
+ * with user-provided credentials, and contacts all the configured secure services to obtain
+ * delegation tokens to be distributed to the rest of the application.
  *
  * New delegation tokens are created once 75% of the renewal interval of the original tokens has
- * elapsed. The new tokens are sent to the Spark driver endpoint once it's registered with the AM.
- * The driver is tasked with distributing the tokens to other processes that might need them.
+ * elapsed. The new tokens are sent to the Spark driver endpoint. The driver is tasked with
+ * distributing the tokens to other processes that might need them.
  *
- * 2. When operating without an explicit principal and keytab, token renewal will not be available.
- * Starting the manager will distribute an initial set of delegation tokens to the provided Spark
- * driver, but the app will not get new tokens when those expire.
- *
- * It can also be used just to create delegation tokens, by calling the `obtainDelegationTokens`
- * method. This option does not require calling the `start` method, but leaves it up to the
- * caller to distribute the tokens that were generated.
+ * This class can also be used just to create delegation tokens, by calling the
+ * `obtainDelegationTokens` method. This option does not require calling the `start` method nor
+ * providing a driver reference, but leaves it up to the caller to distribute the tokens that were
+ * generated.
  */
 private[spark] class HadoopDelegationTokenManager(
     protected val sparkConf: SparkConf,
-    protected val hadoopConf: Configuration) extends Logging {
+    protected val hadoopConf: Configuration,
+    protected val schedulerRef: RpcEndpointRef) extends Logging {
 
   private val deprecatedProviderEnabledConfigs = List(
     "spark.yarn.security.tokens.%s.enabled",
@@ -85,60 +76,44 @@ private[spark] class HadoopDelegationTokenManager(
     s"${delegationTokenProviders.keys.mkString(", ")}.")
 
   private var renewalExecutor: ScheduledExecutorService = _
-  private val driverRef = new AtomicReference[RpcEndpointRef]()
-
-  /** Set the endpoint used to send tokens to the driver. */
-  def setDriverRef(ref: RpcEndpointRef): Unit = {
-    driverRef.set(ref)
-  }
 
   /** @return Whether delegation token renewal is enabled. */
   def renewalEnabled: Boolean = principal != null
 
   /**
-   * Start the token renewer. Requires a principal and keytab. Upon start, the renewer will:
+   * Start the token renewer. Requires a principal and keytab. Upon start, the renewer will
+   * obtain delegation tokens for all configured services and send them to the driver, and
+   * set up tasks to periodically get fresh tokens as needed.
    *
-   * - log in the configured principal, and set up a task to keep that user's ticket renewed
-   * - obtain delegation tokens from all available providers
-   * - send the tokens to the driver, if it's already registered
-   * - schedule a periodic task to update the tokens when needed.
+   * This method requires that a keytab has been provided to Spark, and will try to keep the
+   * logged in user's TGT valid while this manager is active.
    *
-   * @return The newly logged in user.
+   * @return New set of delegation tokens created for the configured principal.
    */
-  def start(): UserGroupInformation = {
+  def start(): Array[Byte] = {
     require(renewalEnabled, "Token renewal must be enabled to start the renewer.")
+    require(schedulerRef != null, "Token renewal requires a scheduler endpoint.")
     renewalExecutor =
       ThreadUtils.newDaemonSingleThreadScheduledExecutor("Credential Renewal Thread")
 
-    val originalCreds = UserGroupInformation.getCurrentUser().getCredentials()
-    val ugi = doLogin()
-
-    val tgtRenewalTask = new Runnable() {
-      override def run(): Unit = {
-        ugi.checkTGTAndReloginFromKeytab()
+    val ugi = UserGroupInformation.getCurrentUser()
+    if (ugi.isFromKeytab()) {
+      // In Hadoop 2.x, renewal of the keytab-based login seems to be automatic, but in Hadoop 3.x,
+      // it is configurable (see hadoop.kerberos.keytab.login.autorenewal.enabled, added in
+      // HADOOP-9567). This task will make sure that the user stays logged in regardless of that
+      // configuration's value. Note that checkTGTAndReloginFromKeytab() is a no-op if the TGT does
+      // not need to be renewed yet.
+      val tgtRenewalTask = new Runnable() {
+        override def run(): Unit = {
+          ugi.checkTGTAndReloginFromKeytab()
+        }
       }
+      val tgtRenewalPeriod = sparkConf.get(KERBEROS_RELOGIN_PERIOD)
+      renewalExecutor.scheduleAtFixedRate(tgtRenewalTask, tgtRenewalPeriod, tgtRenewalPeriod,
+        TimeUnit.SECONDS)
     }
-    val tgtRenewalPeriod = sparkConf.get(KERBEROS_RELOGIN_PERIOD)
-    renewalExecutor.scheduleAtFixedRate(tgtRenewalTask, tgtRenewalPeriod, tgtRenewalPeriod,
-      TimeUnit.SECONDS)
 
-    val creds = obtainTokensAndScheduleRenewal(ugi)
-    ugi.addCredentials(creds)
-
-    val driver = driverRef.get()
-    if (driver != null) {
-      val tokens = SparkHadoopUtil.get.serialize(creds)
-      driver.send(UpdateDelegationTokens(tokens))
-    }
-
-    // Transfer the original user's tokens to the new user, since it may contain needed tokens
-    // (such as those user to connect to YARN). Explicitly avoid overwriting tokens that already
-    // exist in the current user's credentials, since those were freshly obtained above
-    // (see SPARK-23361).
-    val existing = ugi.getCredentials()
-    existing.mergeAll(originalCreds)
-    ugi.addCredentials(existing)
-    ugi
+    updateTokensTask()
   }
 
   def stop(): Unit = {
@@ -218,27 +193,22 @@ private[spark] class HadoopDelegationTokenManager(
    * Periodic task to login to the KDC and create new delegation tokens. Re-schedules itself
    * to fetch the next set of tokens when needed.
    */
-  private def updateTokensTask(): Unit = {
+  private def updateTokensTask(): Array[Byte] = {
     try {
       val freshUGI = doLogin()
       val creds = obtainTokensAndScheduleRenewal(freshUGI)
       val tokens = SparkHadoopUtil.get.serialize(creds)
 
-      val driver = driverRef.get()
-      if (driver != null) {
-        logInfo("Updating delegation tokens.")
-        driver.send(UpdateDelegationTokens(tokens))
-      } else {
-        // This shouldn't really happen, since the driver should register way before tokens expire.
-        logWarning("Delegation tokens close to expiration but no driver has registered yet.")
-        SparkHadoopUtil.get.addDelegationTokens(tokens, sparkConf)
-      }
+      logInfo("Updating delegation tokens.")
+      schedulerRef.send(UpdateDelegationTokens(tokens))
+      tokens
     } catch {
       case e: Exception =>
         val delay = TimeUnit.SECONDS.toMillis(sparkConf.get(CREDENTIALS_RENEWAL_RETRY_WAIT))
         logWarning(s"Failed to update tokens, will try again in ${UIUtils.formatDuration(delay)}!" +
           " If this happens too often tasks will fail.", e)
         scheduleRenewal(delay)
+        null
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
index 90f705138157..4ca0136424fe 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
@@ -67,11 +67,17 @@ private[spark] class HiveDelegationTokenProvider
     // Other modes (such as client with or without keytab, or cluster mode with keytab) do not need
     // a delegation token, since there's a valid kerberos TGT for the right user available to the
     // driver, which is the only process that connects to the HMS.
-    val deployMode = sparkConf.get("spark.submit.deployMode", "client")
-    UserGroupInformation.isSecurityEnabled &&
+    //
+    // Note that this means Hive tokens are not re-created periodically by the token manager.
+    // This is because HMS connections are only performed by the Spark driver, and the driver
+    // either has a TGT, in which case it does not need tokens, or it has a token created
+    // elsewhere, in which case it cannot create new ones. The check for an existing token avoids
+    // printing an exception to the logs in the latter case.
+    val currentToken = UserGroupInformation.getCurrentUser().getCredentials().getToken(tokenAlias)
+    currentToken == null && UserGroupInformation.isSecurityEnabled &&
       hiveConf(hadoopConf).getTrimmed("hive.metastore.uris", "").nonEmpty &&
       (SparkHadoopUtil.get.isProxyUser(UserGroupInformation.getCurrentUser()) ||
-        (deployMode == "cluster" && !sparkConf.contains(KEYTAB)))
+        (!Utils.isClientMode(sparkConf) && !sparkConf.contains(KEYTAB)))
   }
 
   override def obtainDelegationTokens(
@@ -98,7 +104,7 @@ private[spark] class HiveDelegationTokenProvider
         val hive2Token = new Token[DelegationTokenIdentifier]()
         hive2Token.decodeFromUrlString(tokenStr)
         logDebug(s"Get Token from hive metastore: ${hive2Token.toString}")
-        creds.addToken(new Text("hive.server2.delegation.token"), hive2Token)
+        creds.addToken(tokenAlias, hive2Token)
       }
 
       None
@@ -134,4 +140,6 @@ private[spark] class HiveDelegationTokenProvider
       case e: UndeclaredThrowableException => throw Option(e.getCause()).getOrElse(e)
     }
   }
+
+  private def tokenAlias: Text = new Text("hive.server2.delegation.token")
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index e8b7fc0ef100..9e768c22c17e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -104,6 +104,9 @@ private[spark] object CoarseGrainedClusterMessages {
 
   case class RegisterClusterManager(am: RpcEndpointRef) extends CoarseGrainedClusterMessage
 
+  // Used by YARN's client mode AM to retrieve the current set of delegation tokens.
+  object RetrieveDelegationTokens extends CoarseGrainedClusterMessage
+
   // Request executors by specifying the new total number of executors desired
   // This includes executors already pending or running
   case class RequestExecutors(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 329158a44d36..98ed2fffc0ac 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -162,11 +162,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         }
 
       case UpdateDelegationTokens(newDelegationTokens) =>
-        SparkHadoopUtil.get.addDelegationTokens(newDelegationTokens, conf)
-        delegationTokens.set(newDelegationTokens)
-        executorDataMap.values.foreach { ed =>
-          ed.executorEndpoint.send(UpdateDelegationTokens(newDelegationTokens))
-        }
+        updateDelegationTokens(newDelegationTokens)
 
       case RemoveExecutor(executorId, reason) =>
         // We will remove the executor's state and cannot restore it. However, the connection
@@ -404,17 +400,18 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     driverEndpoint = createDriverEndpointRef(properties)
 
     if (UserGroupInformation.isSecurityEnabled()) {
-      delegationTokenManager = createTokenManager()
+      delegationTokenManager = createTokenManager(driverEndpoint)
       delegationTokenManager.foreach { dtm =>
-        dtm.setDriverRef(driverEndpoint)
-        val creds = if (dtm.renewalEnabled) {
-          dtm.start().getCredentials()
+        val tokens = if (dtm.renewalEnabled) {
+          dtm.start()
         } else {
           val creds = UserGroupInformation.getCurrentUser().getCredentials()
           dtm.obtainDelegationTokens(creds)
-          creds
+          SparkHadoopUtil.get.serialize(creds)
+        }
+        if (tokens != null) {
+          delegationTokens.set(tokens)
         }
-        delegationTokens.set(SparkHadoopUtil.get.serialize(creds))
       }
     }
   }
@@ -716,8 +713,27 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * Create the delegation token manager to be used for the application. This method is called
    * once during the start of the scheduler backend (so after the object has already been
    * fully constructed), only if security is enabled in the Hadoop configuration.
+   *
+   * @param schedulerRef RPC endpoint for the scheduler, where updated delegation tokens should be
+   *                     sent.
    */
-  protected def createTokenManager(): Option[HadoopDelegationTokenManager] = None
+  protected def createTokenManager(
+      schedulerRef: RpcEndpointRef): Option[HadoopDelegationTokenManager] = None
+
+  /**
+   * Called when a new set of delegation tokens is sent to the driver. Child classes can override
+   * this method but should always call this implementation, which handles token distribution to
+   * executors.
+   */
+  protected def updateDelegationTokens(tokens: Array[Byte]): Unit = {
+    SparkHadoopUtil.get.addDelegationTokens(tokens, conf)
+    delegationTokens.set(tokens)
+    executorDataMap.values.foreach { ed =>
+      ed.executorEndpoint.send(UpdateDelegationTokens(tokens))
+    }
+  }
+
+  protected def currentDelegationTokens: Array[Byte] = delegationTokens.get()
 
 }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
index def9e626a2df..af7d44b160fe 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
@@ -27,7 +27,7 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
   private val hadoopConf = new Configuration()
 
   test("default configuration") {
-    val manager = new HadoopDelegationTokenManager(new SparkConf(false), hadoopConf)
+    val manager = new HadoopDelegationTokenManager(new SparkConf(false), hadoopConf, null)
     assert(manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
     assert(manager.isProviderLoaded("hive"))
@@ -36,7 +36,7 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
 
   test("disable hive credential provider") {
     val sparkConf = new SparkConf(false).set("spark.security.credentials.hive.enabled", "false")
-    val manager = new HadoopDelegationTokenManager(sparkConf, hadoopConf)
+    val manager = new HadoopDelegationTokenManager(sparkConf, hadoopConf, null)
     assert(manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
     assert(!manager.isProviderLoaded("hive"))
@@ -47,7 +47,7 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
     val sparkConf = new SparkConf(false)
       .set("spark.yarn.security.tokens.hadoopfs.enabled", "false")
       .set("spark.yarn.security.credentials.hive.enabled", "false")
-    val manager = new HadoopDelegationTokenManager(sparkConf, hadoopConf)
+    val manager = new HadoopDelegationTokenManager(sparkConf, hadoopConf, null)
     assert(!manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
     assert(!manager.isProviderLoaded("hive"))
@@ -99,7 +99,7 @@ private object NoHiveTest {
 
   def runTest(): Unit = {
     try {
-      val manager = new HadoopDelegationTokenManager(new SparkConf(), new Configuration())
+      val manager = new HadoopDelegationTokenManager(new SparkConf(), new Configuration(), null)
       require(!manager.isProviderLoaded("hive"))
     } catch {
       case e: Throwable =>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
index 721d7e97b21f..a77e8d4dbcff 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
@@ -91,7 +91,7 @@ private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDri
   private lazy val delegationTokens: Array[Byte] = {
     if (keytab.isEmpty && existingSecretName.isEmpty) {
       val tokenManager = new HadoopDelegationTokenManager(kubernetesConf.sparkConf,
-        SparkHadoopUtil.get.newConfiguration(kubernetesConf.sparkConf))
+        SparkHadoopUtil.get.newConfiguration(kubernetesConf.sparkConf), null)
       val creds = UserGroupInformation.getCurrentUser().getCredentials()
       tokenManager.obtainDelegationTokens(creds)
       // If no tokens and no secrets are stored in the credentials, make sure nothing is returned,
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index cd298971e02a..e285e202a148 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -26,7 +26,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
-import org.apache.spark.rpc.{RpcAddress, RpcEnv}
+import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.{ExecutorLossReason, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SchedulerBackendUtils}
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -147,8 +147,9 @@ private[spark] class KubernetesClusterSchedulerBackend(
     new KubernetesDriverEndpoint(sc.env.rpcEnv, properties)
   }
 
-  override protected def createTokenManager(): Option[HadoopDelegationTokenManager] = {
-    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration))
+  override protected def createTokenManager(
+      schedulerRef: RpcEndpointRef): Option[HadoopDelegationTokenManager] = {
+    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, schedulerRef))
   }
 
   private class KubernetesDriverEndpoint(rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index d0174516c236..03cd2583b9b2 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -36,7 +36,7 @@ import org.apache.spark.internal.config
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.mesos.MesosExternalShuffleClient
-import org.apache.spark.rpc.RpcEndpointAddress
+import org.apache.spark.rpc.{RpcEndpointAddress, RpcEndpointRef}
 import org.apache.spark.scheduler.{SlaveLost, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
@@ -772,8 +772,9 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     }
   }
 
-  override protected def createTokenManager(): Option[HadoopDelegationTokenManager] = {
-    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration))
+  override protected def createTokenManager(
+      schedulerRef: RpcEndpointRef): Option[HadoopDelegationTokenManager] = {
+    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, schedulerRef))
   }
 
   private def numExecutors(): Int = {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 8dbdac168f70..1ece7bdc979c 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -30,6 +30,7 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.{StringUtils => ComStrUtils}
 import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.records._
@@ -41,7 +42,6 @@ import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.history.HistoryServer
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.metrics.MetricsSystem
@@ -58,6 +58,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
   // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
   // optimal as more containers are available. Might need to handle this better.
 
+  private val appAttemptId = YarnSparkHadoopUtil.getContainerId.getApplicationAttemptId()
   private val isClusterMode = args.userClass != null
 
   private val sparkConf = new SparkConf()
@@ -99,25 +100,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     }
   }
 
-  private val tokenManager: Option[YARNHadoopDelegationTokenManager] = {
-    sparkConf.get(KEYTAB).map { _ =>
-      new YARNHadoopDelegationTokenManager(sparkConf, yarnConf)
-    }
-  }
-
-  private val ugi = tokenManager match {
-    case Some(tm) =>
-      // Set the context class loader so that the token renewer has access to jars distributed
-      // by the user.
-      Utils.withContextClassLoader(userClassLoader) {
-        tm.start()
-      }
-
-    case _ =>
-      SparkHadoopUtil.get.createSparkUser()
-  }
-
-  private val client = doAsUser { new YarnRMClient() }
+  private val client = new YarnRMClient()
 
   // Default to twice the number of executors (twice the maximum number of executors if dynamic
   // allocation is enabled), with a minimum of 3.
@@ -174,11 +157,19 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
   // In cluster mode, used to tell the AM when the user's SparkContext has been initialized.
   private val sparkContextPromise = Promise[SparkContext]()
 
-  // Load the list of localized files set by the client. This is used when launching executors,
-  // and is loaded here so that these configs don't pollute the Web UI's environment page in
-  // cluster mode.
-  private val localResources = doAsUser {
+  /**
+   * Load the list of localized files set by the client, used when launching executors. This should
+   * be called in a context where the needed credentials to access HDFS are available.
+   */
+  private def prepareLocalResources(): Map[String, LocalResource] = {
     logInfo("Preparing Local resources")
+    val distCacheConf = new SparkConf(false)
+    if (args.distCacheConf != null) {
+      Utils.getPropertiesFromFile(args.distCacheConf).foreach { case (k, v) =>
+        distCacheConf.set(k, v)
+      }
+    }
+
     val resources = HashMap[String, LocalResource]()
 
     def setupDistributedCache(
@@ -199,11 +190,11 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       resources(fileName) = amJarRsrc
     }
 
-    val distFiles = sparkConf.get(CACHED_FILES)
-    val fileSizes = sparkConf.get(CACHED_FILES_SIZES)
-    val timeStamps = sparkConf.get(CACHED_FILES_TIMESTAMPS)
-    val visibilities = sparkConf.get(CACHED_FILES_VISIBILITIES)
-    val resTypes = sparkConf.get(CACHED_FILES_TYPES)
+    val distFiles = distCacheConf.get(CACHED_FILES)
+    val fileSizes = distCacheConf.get(CACHED_FILES_SIZES)
+    val timeStamps = distCacheConf.get(CACHED_FILES_TIMESTAMPS)
+    val visibilities = distCacheConf.get(CACHED_FILES_VISIBILITIES)
+    val resTypes = distCacheConf.get(CACHED_FILES_TYPES)
 
     for (i <- 0 to distFiles.size - 1) {
       val resType = LocalResourceType.valueOf(resTypes(i))
@@ -212,7 +203,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     }
 
     // Distribute the conf archive to executors.
-    sparkConf.get(CACHED_CONF_ARCHIVE).foreach { path =>
+    distCacheConf.get(CACHED_CONF_ARCHIVE).foreach { path =>
       val uri = new URI(path)
       val fs = FileSystem.get(uri, yarnConf)
       val status = fs.getFileStatus(new Path(uri))
@@ -225,33 +216,12 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
         LocalResourceVisibility.PRIVATE.name())
     }
 
-    // Clean up the configuration so it doesn't show up in the Web UI (since it's really noisy).
-    CACHE_CONFIGS.foreach { e =>
-      sparkConf.remove(e)
-      sys.props.remove(e.key)
-    }
-
     resources.toMap
   }
 
-  def getAttemptId(): ApplicationAttemptId = {
-    client.getAttemptId()
-  }
-
   final def run(): Int = {
-    doAsUser {
-      runImpl()
-    }
-    exitCode
-  }
-
-  private def runImpl(): Unit = {
     try {
-      val appAttemptId = client.getAttemptId()
-
-      var attemptID: Option[String] = None
-
-      if (isClusterMode) {
+      val attemptID = if (isClusterMode) {
         // Set the web ui port to be ephemeral for yarn so we don't conflict with
         // other spark processes running on the same box
         System.setProperty("spark.ui.port", "0")
@@ -264,7 +234,9 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
         // configuration will be checked in SparkContext to avoid misuse of yarn cluster mode.
         System.setProperty("spark.yarn.app.id", appAttemptId.getApplicationId().toString())
 
-        attemptID = Option(appAttemptId.getAttemptId.toString)
+        Option(appAttemptId.getAttemptId.toString)
+      } else {
+        None
       }
 
       new CallerContext(
@@ -277,7 +249,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       val priority = ShutdownHookManager.SPARK_CONTEXT_SHUTDOWN_PRIORITY - 1
       ShutdownHookManager.addShutdownHook(priority) { () =>
         val maxAppAttempts = client.getMaxRegAttempts(sparkConf, yarnConf)
-        val isLastAttempt = client.getAttemptId().getAttemptId() >= maxAppAttempts
+        val isLastAttempt = appAttemptId.getAttemptId() >= maxAppAttempts
 
         if (!finished) {
           // The default state of ApplicationMaster is failed if it is invoked by shut down hook.
@@ -322,6 +294,8 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
           logWarning("Exception during stopping of the metric system: ", e)
       }
     }
+
+    exitCode
   }
 
   /**
@@ -377,9 +351,6 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
           logDebug("shutting down user thread")
           userClassThread.interrupt()
         }
-        if (!inShutdown) {
-          tokenManager.foreach(_.stop())
-        }
       }
     }
   }
@@ -405,8 +376,8 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       port: Int,
       _sparkConf: SparkConf,
       uiAddress: Option[String]): Unit = {
-    val appId = client.getAttemptId().getApplicationId().toString()
-    val attemptId = client.getAttemptId().getAttemptId().toString()
+    val appId = appAttemptId.getApplicationId().toString()
+    val attemptId = appAttemptId.getAttemptId().toString()
     val historyAddress = ApplicationMaster
       .getHistoryServerAddress(_sparkConf, yarnConf, appId, attemptId)
 
@@ -415,9 +386,20 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
   }
 
   private def createAllocator(driverRef: RpcEndpointRef, _sparkConf: SparkConf): Unit = {
-    val appId = client.getAttemptId().getApplicationId().toString()
+    // In client mode, the AM may be restarting after delegation tokens have reached their TTL. So
+    // always contact the driver to get the current set of valid tokens, so that local resources can
+    // be initialized below.
+    if (!isClusterMode) {
+      val tokens = driverRef.askSync[Array[Byte]](RetrieveDelegationTokens)
+      if (tokens != null) {
+        SparkHadoopUtil.get.addDelegationTokens(tokens, _sparkConf)
+      }
+    }
+
+    val appId = appAttemptId.getApplicationId().toString()
     val driverUrl = RpcEndpointAddress(driverRef.address.host, driverRef.address.port,
       CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
+    val localResources = prepareLocalResources()
 
     // Before we initialize the allocator, let's log the information about how executors will
     // be run up front, to avoid printing this out for every single executor being launched.
@@ -433,13 +415,12 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     allocator = client.createAllocator(
       yarnConf,
       _sparkConf,
+      appAttemptId,
       driverUrl,
       driverRef,
       securityMgr,
       localResources)
 
-    tokenManager.foreach(_.setDriverRef(driverRef))
-
     // Initialize the AM endpoint *after* the allocator has been initialized. This ensures
     // that when the driver sends an initial executor request (e.g. after an AM restart),
     // the allocator is ready to service requests.
@@ -755,6 +736,9 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
           case None =>
             logWarning("Container allocator is not ready to find executor loss reasons yet.")
         }
+
+      case UpdateDelegationTokens(tokens) =>
+        SparkHadoopUtil.get.addDelegationTokens(tokens, sparkConf)
     }
 
     override def onDisconnected(remoteAddress: RpcAddress): Unit = {
@@ -767,12 +751,6 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     }
   }
 
-  private def doAsUser[T](fn: => T): T = {
-    ugi.doAs(new PrivilegedExceptionAction[T]() {
-      override def run: T = fn
-    })
-  }
-
 }
 
 object ApplicationMaster extends Logging {
@@ -793,7 +771,24 @@ object ApplicationMaster extends Logging {
     SignalUtils.registerLogger(log)
     val amArgs = new ApplicationMasterArguments(args)
     master = new ApplicationMaster(amArgs)
-    System.exit(master.run())
+
+    val ugi = master.sparkConf.get(PRINCIPAL) match {
+      case Some(principal) =>
+        val originalCreds = UserGroupInformation.getCurrentUser().getCredentials()
+        SparkHadoopUtil.get.loginUserFromKeytab(principal, master.sparkConf.get(KEYTAB).orNull)
+        val newUGI = UserGroupInformation.getCurrentUser()
+        // Transfer the original user's tokens to the new user, since it may contain needed tokens
+        // (such as those user to connect to YARN).
+        newUGI.addCredentials(originalCreds)
+        newUGI
+
+      case _ =>
+        SparkHadoopUtil.get.createSparkUser()
+    }
+
+    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
+      override def run(): Unit = System.exit(master.run())
+    })
   }
 
   private[spark] def sparkContextInitialized(sc: SparkContext): Unit = {
@@ -801,7 +796,7 @@ object ApplicationMaster extends Logging {
   }
 
   private[spark] def getAttemptId(): ApplicationAttemptId = {
-    master.getAttemptId
+    master.appAttemptId
   }
 
   private[spark] def getHistoryServerAddress(
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
index cc76a7c8f13f..c10206c84727 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
@@ -26,6 +26,7 @@ class ApplicationMasterArguments(val args: Array[String]) {
   var primaryRFile: String = null
   var userArgs: Seq[String] = Nil
   var propertiesFile: String = null
+  var distCacheConf: String = null
 
   parseArgs(args.toList)
 
@@ -62,6 +63,10 @@ class ApplicationMasterArguments(val args: Array[String]) {
           propertiesFile = value
           args = tail
 
+        case ("--dist-cache-conf") :: value :: tail =>
+          distCacheConf = value
+          args = tail
+
         case _ =>
           printUsageAndExit(1, args)
       }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 44a60b835f12..9f09dc031754 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -100,21 +100,19 @@ private[spark] class Client(
   }
 
   private val distCacheMgr = new ClientDistributedCacheManager()
+  private val cachedResourcesConf = new SparkConf(false)
 
-  private val principal = sparkConf.get(PRINCIPAL).orNull
   private val keytab = sparkConf.get(KEYTAB).orNull
-  private val loginFromKeytab = principal != null
-  private val amKeytabFileName: String = {
+  private val amKeytabFileName: Option[String] = if (keytab != null && isClusterMode) {
+    val principal = sparkConf.get(PRINCIPAL).orNull
     require((principal == null) == (keytab == null),
       "Both principal and keytab must be defined, or neither.")
-    if (loginFromKeytab) {
-      logInfo(s"Kerberos credentials: principal = $principal, keytab = $keytab")
-      // Generate a file name that can be used for the keytab file, that does not conflict
-      // with any user file.
-      new File(keytab).getName() + "-" + UUID.randomUUID().toString
-    } else {
-      null
-    }
+    logInfo(s"Kerberos credentials: principal = $principal, keytab = $keytab")
+    // Generate a file name that can be used for the keytab file, that does not conflict
+    // with any user file.
+    Some(new File(keytab).getName() + "-" + UUID.randomUUID().toString)
+  } else {
+    None
   }
 
   require(keytab == null || !Utils.isLocalUri(keytab), "Keytab should reference a local file.")
@@ -220,16 +218,7 @@ private[spark] class Client(
       }
     }
 
-    if (isClusterMode && principal != null && keytab != null) {
-      val newUgi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
-      newUgi.doAs(new PrivilegedExceptionAction[Unit] {
-        override def run(): Unit = {
-          cleanupStagingDirInternal()
-        }
-      })
-    } else {
-      cleanupStagingDirInternal()
-    }
+    cleanupStagingDirInternal()
   }
 
   /**
@@ -312,7 +301,7 @@ private[spark] class Client(
    */
   private def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = {
     val credentials = UserGroupInformation.getCurrentUser().getCredentials()
-    val credentialManager = new YARNHadoopDelegationTokenManager(sparkConf, hadoopConf)
+    val credentialManager = new YARNHadoopDelegationTokenManager(sparkConf, hadoopConf, null)
     credentialManager.obtainDelegationTokens(credentials)
 
     // When using a proxy user, copy the delegation tokens to the user's credentials. Avoid
@@ -496,11 +485,11 @@ private[spark] class Client(
 
     // If we passed in a keytab, make sure we copy the keytab to the staging directory on
     // HDFS, and setup the relevant environment vars, so the AM can login again.
-    if (loginFromKeytab) {
+    amKeytabFileName.foreach { kt =>
       logInfo("To enable the AM to login from keytab, credentials are being copied over to the AM" +
         " via the YARN Secure Distributed Cache.")
       val (_, localizedPath) = distribute(keytab,
-        destName = Some(amKeytabFileName),
+        destName = Some(kt),
         appMasterOnly = true)
       require(localizedPath != null, "Keytab file already distributed.")
     }
@@ -636,7 +625,7 @@ private[spark] class Client(
     // Update the configuration with all the distributed files, minus the conf archive. The
     // conf archive will be handled by the AM differently so that we avoid having to send
     // this configuration by other means. See SPARK-14602 for one reason of why this is needed.
-    distCacheMgr.updateConfiguration(sparkConf)
+    distCacheMgr.updateConfiguration(cachedResourcesConf)
 
     // Upload the conf archive to HDFS manually, and record its location in the configuration.
     // This will allow the AM to know where the conf archive is in HDFS, so that it can be
@@ -648,7 +637,7 @@ private[spark] class Client(
     // system.
     val remoteConfArchivePath = new Path(destDir, LOCALIZED_CONF_ARCHIVE)
     val remoteFs = FileSystem.get(remoteConfArchivePath.toUri(), hadoopConf)
-    sparkConf.set(CACHED_CONF_ARCHIVE, remoteConfArchivePath.toString())
+    cachedResourcesConf.set(CACHED_CONF_ARCHIVE, remoteConfArchivePath.toString())
 
     val localConfArchive = new Path(createConfArchive().toURI())
     copyFileToRemote(destDir, localConfArchive, replication, symlinkCache, force = true,
@@ -660,11 +649,6 @@ private[spark] class Client(
       remoteFs, hadoopConf, remoteConfArchivePath, localResources, LocalResourceType.ARCHIVE,
       LOCALIZED_CONF_DIR, statCache, appMasterOnly = false)
 
-    // Clear the cache-related entries from the configuration to avoid them polluting the
-    // UI's environment page. This works for client mode; for cluster mode, this is handled
-    // by the AM.
-    CACHE_CONFIGS.foreach(sparkConf.remove)
-
     localResources
   }
 
@@ -768,19 +752,25 @@ private[spark] class Client(
       hadoopConf.writeXml(confStream)
       confStream.closeEntry()
 
-      // Save Spark configuration to a file in the archive, but filter out the app's secret.
-      val props = new Properties()
-      sparkConf.getAll.foreach { case (k, v) =>
-        props.setProperty(k, v)
+      // Save Spark configuration to a file in the archive.
+      val props = confToProperties(sparkConf)
+
+      // If propagating the keytab to the AM, override the keytab name with the name of the
+      // distributed file. Otherwise remove princpal/keytab from the conf, so they're not seen
+      // by the AM at all.
+      amKeytabFileName match {
+        case Some(kt) =>
+          props.setProperty(KEYTAB.key, kt)
+        case None =>
+          props.remove(PRINCIPAL.key)
+          props.remove(KEYTAB.key)
       }
-      // Override spark.yarn.key to point to the location in distributed cache which will be used
-      // by AM.
-      Option(amKeytabFileName).foreach { k => props.setProperty(KEYTAB.key, k) }
-      confStream.putNextEntry(new ZipEntry(SPARK_CONF_FILE))
-      val writer = new OutputStreamWriter(confStream, StandardCharsets.UTF_8)
-      props.store(writer, "Spark configuration.")
-      writer.flush()
-      confStream.closeEntry()
+
+      writePropertiesToArchive(props, SPARK_CONF_FILE, confStream)
+
+      // Write the distributed cache config to the archive.
+      writePropertiesToArchive(confToProperties(cachedResourcesConf), DIST_CACHE_CONF_FILE,
+        confStream)
     } finally {
       confStream.close()
     }
@@ -984,7 +974,10 @@ private[spark] class Client(
     }
     val amArgs =
       Seq(amClass) ++ userClass ++ userJar ++ primaryPyFile ++ primaryRFile ++ userArgs ++
-      Seq("--properties-file", buildPath(Environment.PWD.$$(), LOCALIZED_CONF_DIR, SPARK_CONF_FILE))
+      Seq("--properties-file",
+        buildPath(Environment.PWD.$$(), LOCALIZED_CONF_DIR, SPARK_CONF_FILE)) ++
+      Seq("--dist-cache-conf",
+        buildPath(Environment.PWD.$$(), LOCALIZED_CONF_DIR, DIST_CACHE_CONF_FILE))
 
     // Command for the ApplicationMaster
     val commands = prefixEnv ++
@@ -1213,6 +1206,9 @@ private object Client extends Logging {
   // Name of the file in the conf archive containing Spark configuration.
   val SPARK_CONF_FILE = "__spark_conf__.properties"
 
+  // Name of the file in the conf archive containing the distributed cache info.
+  val DIST_CACHE_CONF_FILE = "__spark_dist_cache__.properties"
+
   // Subdirectory where the user's python files (not archives) will be placed.
   val LOCALIZED_PYTHON_DIR = "__pyfiles__"
 
@@ -1512,6 +1508,22 @@ private object Client extends Logging {
     }
     getClusterPath(conf, cmdPrefix)
   }
+
+  def confToProperties(conf: SparkConf): Properties = {
+    val props = new Properties()
+    conf.getAll.foreach { case (k, v) =>
+      props.setProperty(k, v)
+    }
+    props
+  }
+
+  def writePropertiesToArchive(props: Properties, name: String, out: ZipOutputStream): Unit = {
+    out.putNextEntry(new ZipEntry(name))
+    val writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)
+    props.store(writer, "Spark configuration.")
+    writer.flush()
+    out.closeEntry()
+  }
 }
 
 private[spark] class YarnClusterApplication extends SparkApplication {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
index 05a7b1e1310c..cf16edf16c03 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -76,12 +76,13 @@ private[spark] class YarnRMClient extends Logging {
   def createAllocator(
       conf: YarnConfiguration,
       sparkConf: SparkConf,
+      appAttemptId: ApplicationAttemptId,
       driverUrl: String,
       driverRef: RpcEndpointRef,
       securityMgr: SecurityManager,
       localResources: Map[String, LocalResource]): YarnAllocator = {
     require(registered, "Must register AM before creating allocator.")
-    new YarnAllocator(driverUrl, driverRef, conf, sparkConf, amClient, getAttemptId(), securityMgr,
+    new YarnAllocator(driverUrl, driverRef, conf, sparkConf, amClient, appAttemptId, securityMgr,
       localResources, new SparkRackResolver())
   }
 
@@ -100,11 +101,6 @@ private[spark] class YarnRMClient extends Logging {
     }
   }
 
-  /** Returns the attempt ID. */
-  def getAttemptId(): ApplicationAttemptId = {
-    YarnSparkHadoopUtil.getContainerId.getApplicationAttemptId()
-  }
-
   /** Returns the configuration for the AmIpFilter to add to the Spark UI. */
   def getAmIpFilterParams(conf: YarnConfiguration, proxyBase: String): Map[String, String] = {
     // Figure out which scheme Yarn is using. Note the method seems to have been added after 2.2,
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index 7e9cd409daf3..6091cd496c03 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -321,16 +321,6 @@ package object config {
     .stringConf
     .createOptional
 
-  // The list of cache-related config entries. This is used by Client and the AM to clean
-  // up the environment so that these settings do not appear on the web UI.
-  private[yarn] val CACHE_CONFIGS = Seq(
-    CACHED_FILES,
-    CACHED_FILES_SIZES,
-    CACHED_FILES_TIMESTAMPS,
-    CACHED_FILES_VISIBILITIES,
-    CACHED_FILES_TYPES,
-    CACHED_CONF_ARCHIVE)
-
   /* YARN allocator-level blacklisting related config entries. */
   private[spark] val YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED =
     ConfigBuilder("spark.yarn.blacklist.executor.launch.blacklisting.enabled")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
index 2d9a3f0c83fd..bb40ea801519 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
@@ -36,10 +36,11 @@ import org.apache.spark.util.Utils
  * [[ServiceCredentialProvider]] interface, as well as the builtin providers defined
  * in [[HadoopDelegationTokenManager]].
  */
-private[yarn] class YARNHadoopDelegationTokenManager(
+private[spark] class YARNHadoopDelegationTokenManager(
     _sparkConf: SparkConf,
-    _hadoopConf: Configuration)
-  extends HadoopDelegationTokenManager(_sparkConf, _hadoopConf) {
+    _hadoopConf: Configuration,
+    _schedulerRef: RpcEndpointRef)
+  extends HadoopDelegationTokenManager(_sparkConf, _hadoopConf, _schedulerRef) {
 
   private val credentialProviders = {
     ServiceLoader.load(classOf[ServiceCredentialProvider], Utils.getContextOrSparkClassLoader)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 167eef19ed85..934fba3e6ff3 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -27,6 +27,7 @@ import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.launcher.SparkAppHandle
 import org.apache.spark.scheduler.TaskSchedulerImpl
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 
 private[spark] class YarnClientSchedulerBackend(
     scheduler: TaskSchedulerImpl,
@@ -166,4 +167,9 @@ private[spark] class YarnClientSchedulerBackend(
     logInfo("Stopped")
   }
 
+  override protected def updateDelegationTokens(tokens: Array[Byte]): Unit = {
+    super.updateDelegationTokens(tokens)
+    amEndpoint.foreach(_.send(UpdateDelegationTokens(tokens)))
+  }
+
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index 1289d4be79ea..6357d4adbcd9 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -27,6 +27,8 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.yarn.api.records.{ApplicationAttemptId, ApplicationId}
 
 import org.apache.spark.SparkContext
+import org.apache.spark.deploy.security.HadoopDelegationTokenManager
+import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
@@ -55,6 +57,7 @@ private[spark] abstract class YarnSchedulerBackend(
   protected var totalExpectedExecutors = 0
 
   private val yarnSchedulerEndpoint = new YarnSchedulerEndpoint(rpcEnv)
+  protected var amEndpoint: Option[RpcEndpointRef] = None
 
   private val yarnSchedulerEndpointRef = rpcEnv.setupEndpoint(
     YarnSchedulerBackend.ENDPOINT_NAME, yarnSchedulerEndpoint)
@@ -191,6 +194,11 @@ private[spark] abstract class YarnSchedulerBackend(
     sc.executorAllocationManager.foreach(_.reset())
   }
 
+  override protected def createTokenManager(
+      schedulerRef: RpcEndpointRef): Option[HadoopDelegationTokenManager] = {
+    Some(new YARNHadoopDelegationTokenManager(sc.conf, sc.hadoopConfiguration, schedulerRef))
+  }
+
   /**
    * Override the DriverEndpoint to add extra logic for the case when an executor is disconnected.
    * This endpoint communicates with the executors and queries the AM for an executor's exit
@@ -226,7 +234,6 @@ private[spark] abstract class YarnSchedulerBackend(
    */
   private class YarnSchedulerEndpoint(override val rpcEnv: RpcEnv)
     extends ThreadSafeRpcEndpoint with Logging {
-    private var amEndpoint: Option[RpcEndpointRef] = None
 
     private[YarnSchedulerBackend] def handleExecutorDisconnectedFromDriver(
         executorId: String,
@@ -266,11 +273,6 @@ private[spark] abstract class YarnSchedulerBackend(
           logWarning(s"Requesting driver to remove executor $executorId for reason $reason")
           driverEndpoint.send(r)
         }
-
-      case u @ UpdateDelegationTokens(tokens) =>
-        // Add the tokens to the current user and send a message to the scheduler so that it
-        // notifies all registered executors of the new tokens.
-        driverEndpoint.send(u)
     }
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -304,6 +306,9 @@ private[spark] abstract class YarnSchedulerBackend(
 
       case RetrieveLastAllocatedExecutorId =>
         context.reply(currentExecutorIdCounter)
+
+      case RetrieveDelegationTokens =>
+        context.reply(currentDelegationTokens)
     }
 
     override def onDisconnected(remoteAddress: RpcAddress): Unit = {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
index 98315e423574..f00453cb9c59 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
@@ -34,7 +34,7 @@ class YARNHadoopDelegationTokenManagerSuite extends SparkFunSuite {
   }
 
   test("Correctly loads credential providers") {
-    credentialManager = new YARNHadoopDelegationTokenManager(sparkConf, hadoopConf)
+    credentialManager = new YARNHadoopDelegationTokenManager(sparkConf, hadoopConf, null)
     assert(credentialManager.isProviderLoaded("yarn-test"))
   }
 }

From 98be8953c75c026c1cb432cc8f66dd312feed0c6 Mon Sep 17 00:00:00 2001
From: maryannxue <maryannxue@apache.org>
Date: Mon, 7 Jan 2019 13:59:40 -0800
Subject: [PATCH 0247/1072] [SPARK-26065][SQL] Change query hint from a
 `LogicalPlan` to a field

## What changes were proposed in this pull request?

The existing query hint implementation relies on a logical plan node `ResolvedHint` to store query hints in logical plans, and on `Statistics` in physical plans. Since `ResolvedHint` is not really a logical operator and can break the pattern matching for existing and future optimization rules, it is a issue to the Optimizer as the old `AnalysisBarrier` was to the Analyzer.

Given the fact that all our query hints are either 1) a join hint, i.e., broadcast hint; or 2) a re-partition hint, which is indeed an operator, we only need to add a hint field on the Join plan and that will be a good enough solution for the current hint usage.

This PR is to let `Join` node have a hint for its left sub-tree and another hint for its right sub-tree and each hint is a merged result of all the effective hints specified in the corresponding sub-tree. The "effectiveness" of a hint, i.e., whether that hint should be propagated to the `Join` node, is currently consistent with the hint propagation rules originally implemented in the `Statistics` approach. Note that the `ResolvedHint` node still has to live through the analysis stage because of the `Dataset` interface, but it will be got rid of and moved to the `Join` node in the "pre-optimization" stage.

This PR also introduces a change in how hints work with join reordering. Before this PR, hints would stop join reordering. For example, in "a.join(b).join(c).hint("broadcast").join(d)", the broadcast hint would stop d from participating in the cost-based join reordering while still allowing reordering from under the hint node. After this PR, though, the broadcast hint will not interfere with join reordering at all, and after reordering if a relation associated with a hint stays unchanged or equivalent to the original relation, the hint will be retained, otherwise will be discarded. For example, the original plan is like "a.join(b).hint("broadcast").join(c).hint("broadcast").join(d)", thus the join order is "a JOIN b JOIN c JOIN d". So if after reordering the join order becomes "a JOIN b JOIN (c JOIN d)", the plan will be like "a.join(b).hint("broadcast").join(c.join(d))"; but if after reordering the join order becomes "a JOIN c JOIN b JOIN d", the plan will be like "a.join(c).join(b).hint("broadcast").join(d)".

## How was this patch tested?

Added new tests.

Closes #23036 from maryannxue/query-hint.

Authored-by: maryannxue <maryannxue@apache.org>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  16 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |   4 +-
 .../analysis/StreamingJoinHelper.scala        |   2 +-
 .../UnsupportedOperationChecker.scala         |   2 +-
 .../spark/sql/catalyst/dsl/package.scala      |   2 +-
 .../optimizer/CostBasedJoinReorder.scala      |  84 ++++++--
 .../optimizer/EliminateResolvedHint.scala     |  59 ++++++
 .../sql/catalyst/optimizer/Optimizer.scala    |  36 ++--
 .../optimizer/PropagateEmptyRelation.scala    |   2 +-
 .../ReplaceNullWithFalseInPredicate.scala     |   2 +-
 .../sql/catalyst/optimizer/expressions.scala  |   3 +-
 .../spark/sql/catalyst/optimizer/joins.scala  |  27 ++-
 .../sql/catalyst/optimizer/subquery.scala     |  14 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   4 +-
 .../sql/catalyst/planning/patterns.scala      |  41 ++--
 .../plans/logical/LogicalPlanVisitor.scala    |   3 -
 .../catalyst/plans/logical/Statistics.scala   |   7 +-
 .../plans/logical/basicLogicalOperators.scala |  14 +-
 .../sql/catalyst/plans/logical/hints.scala    |  27 ++-
 .../statsEstimation/AggregateEstimation.scala |   3 +-
 .../BasicStatsPlanVisitor.scala               |   2 -
 .../statsEstimation/JoinEstimation.scala      |   2 +-
 .../SizeInBytesOnlyStatsPlanVisitor.scala     |  22 +-
 .../analysis/AnalysisErrorSuite.scala         |   8 +-
 .../sql/catalyst/analysis/AnalysisSuite.scala |   4 +-
 .../catalyst/analysis/ResolveHintsSuite.scala |   2 +-
 .../optimizer/ColumnPruningSuite.scala        |   6 +-
 .../optimizer/FilterPushdownSuite.scala       |  14 --
 .../optimizer/JoinOptimizationSuite.scala     |  28 +--
 .../catalyst/optimizer/JoinReorderSuite.scala |  83 +++++++-
 .../optimizer/ReplaceOperatorSuite.scala      |   8 +-
 .../spark/sql/catalyst/plans/PlanTest.scala   |  10 +-
 .../sql/catalyst/plans/SameResultSuite.scala  |  16 +-
 .../BasicStatsEstimationSuite.scala           |  18 --
 .../FilterEstimationSuite.scala               |   2 +-
 .../statsEstimation/JoinEstimationSuite.scala |  31 +--
 .../scala/org/apache/spark/sql/Dataset.scala  |  16 +-
 .../spark/sql/execution/SparkStrategies.scala |  52 ++---
 .../execution/columnar/InMemoryRelation.scala |   9 +-
 .../apache/spark/sql/CachedTableSuite.scala   |  21 ++
 .../apache/spark/sql/DataFrameJoinSuite.scala |  10 +-
 .../org/apache/spark/sql/JoinHintSuite.scala  | 193 ++++++++++++++++++
 .../spark/sql/StatisticsCollectionSuite.scala |   3 +-
 .../execution/joins/BroadcastJoinSuite.scala  |  14 +-
 .../execution/joins/ExistenceJoinSuite.scala  |  11 +-
 .../sql/execution/joins/InnerJoinSuite.scala  |  15 +-
 .../sql/execution/joins/OuterJoinSuite.scala  |  11 +-
 47 files changed, 680 insertions(+), 283 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 198645d875c4..2aa0f2117364 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -943,7 +943,7 @@ class Analyzer(
         failAnalysis("Invalid usage of '*' in explode/json_tuple/UDTF")
 
       // To resolve duplicate expression IDs for Join and Intersect
-      case j @ Join(left, right, _, _) if !j.duplicateResolved =>
+      case j @ Join(left, right, _, _, _) if !j.duplicateResolved =>
         j.copy(right = dedupRight(left, right))
       case i @ Intersect(left, right, _) if !i.duplicateResolved =>
         i.copy(right = dedupRight(left, right))
@@ -2249,13 +2249,14 @@ class Analyzer(
    */
   object ResolveNaturalAndUsingJoin extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
-      case j @ Join(left, right, UsingJoin(joinType, usingCols), _)
+      case j @ Join(left, right, UsingJoin(joinType, usingCols), _, hint)
           if left.resolved && right.resolved && j.duplicateResolved =>
-        commonNaturalJoinProcessing(left, right, joinType, usingCols, None)
-      case j @ Join(left, right, NaturalJoin(joinType), condition) if j.resolvedExceptNatural =>
+        commonNaturalJoinProcessing(left, right, joinType, usingCols, None, hint)
+      case j @ Join(left, right, NaturalJoin(joinType), condition, hint)
+          if j.resolvedExceptNatural =>
         // find common column names from both sides
         val joinNames = left.output.map(_.name).intersect(right.output.map(_.name))
-        commonNaturalJoinProcessing(left, right, joinType, joinNames, condition)
+        commonNaturalJoinProcessing(left, right, joinType, joinNames, condition, hint)
     }
   }
 
@@ -2360,7 +2361,8 @@ class Analyzer(
       right: LogicalPlan,
       joinType: JoinType,
       joinNames: Seq[String],
-      condition: Option[Expression]) = {
+      condition: Option[Expression],
+      hint: JoinHint) = {
     val leftKeys = joinNames.map { keyName =>
       left.output.find(attr => resolver(attr.name, keyName)).getOrElse {
         throw new AnalysisException(s"USING column `$keyName` cannot be resolved on the left " +
@@ -2401,7 +2403,7 @@ class Analyzer(
         sys.error("Unsupported natural join type " + joinType)
     }
     // use Project to trim unnecessary fields
-    Project(projectList, Join(left, right, joinType, newCondition))
+    Project(projectList, Join(left, right, joinType, newCondition, hint))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index c28a97839fe4..18c40b370cb5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -172,7 +172,7 @@ trait CheckAnalysis extends PredicateHelper {
             failAnalysis("Null-aware predicate sub-queries cannot be used in nested " +
               s"conditions: $condition")
 
-          case j @ Join(_, _, _, Some(condition)) if condition.dataType != BooleanType =>
+          case j @ Join(_, _, _, Some(condition), _) if condition.dataType != BooleanType =>
             failAnalysis(
               s"join condition '${condition.sql}' " +
                 s"of type ${condition.dataType.catalogString} is not a boolean.")
@@ -609,7 +609,7 @@ trait CheckAnalysis extends PredicateHelper {
         failOnNonEqualCorrelatedPredicate(foundNonEqualCorrelatedPred, a)
 
       // Join can host correlated expressions.
-      case j @ Join(left, right, joinType, _) =>
+      case j @ Join(left, right, joinType, _, _) =>
         joinType match {
           // Inner join, like Filter, can be anywhere.
           case _: InnerLike =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index 7a0aa08289ef..76733dd6dac3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -41,7 +41,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
    */
   def isWatermarkInJoinKeys(plan: LogicalPlan): Boolean = {
     plan match {
-      case ExtractEquiJoinKeys(_, leftKeys, rightKeys, _, _, _) =>
+      case ExtractEquiJoinKeys(_, leftKeys, rightKeys, _, _, _, _) =>
         (leftKeys ++ rightKeys).exists {
           case a: AttributeReference => a.metadata.contains(EventTimeWatermark.delayKey)
           case _ => false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index cff4cee09427..41ba6d34b549 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -229,7 +229,7 @@ object UnsupportedOperationChecker {
           throwError("dropDuplicates is not supported after aggregation on a " +
             "streaming DataFrame/Dataset")
 
-        case Join(left, right, joinType, condition) =>
+        case Join(left, right, joinType, condition, _) =>
 
           joinType match {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 151481c80ee9..846ee3b38652 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -325,7 +325,7 @@ package object dsl {
         otherPlan: LogicalPlan,
         joinType: JoinType = Inner,
         condition: Option[Expression] = None): LogicalPlan =
-        Join(logicalPlan, otherPlan, joinType, condition)
+        Join(logicalPlan, otherPlan, joinType, condition, JoinHint.NONE)
 
       def cogroup[Key: Encoder, Left: Encoder, Right: Encoder, Result: Encoder](
           otherPlan: LogicalPlan,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 01634a9d852c..743d3ce944fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeSet, Expression, PredicateHelper}
 import org.apache.spark.sql.catalyst.plans.{Inner, InnerLike, JoinType}
-import org.apache.spark.sql.catalyst.plans.logical.{BinaryNode, Join, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
 
@@ -31,6 +31,40 @@ import org.apache.spark.sql.internal.SQLConf
  * Cost-based join reorder.
  * We may have several join reorder algorithms in the future. This class is the entry of these
  * algorithms, and chooses which one to use.
+ *
+ * Note that join strategy hints, e.g. the broadcast hint, do not interfere with the reordering.
+ * Such hints will be applied on the equivalent counterparts (i.e., join between the same relations
+ * regardless of the join order) of the original nodes after reordering.
+ * For example, the plan before reordering is like:
+ *
+ *            Join
+ *            /   \
+ *          Hint1 t4
+ *          /
+ *        Join
+ *        / \
+ *      Join t3
+ *      /   \
+ *    Hint2 t2
+ *    /
+ *   t1
+ *
+ * The original join order as illustrated above is "((t1 JOIN t2) JOIN t3) JOIN t4", and after
+ * reordering, the new join order is "((t1 JOIN t3) JOIN t2) JOIN t4", so the new plan will be like:
+ *
+ *            Join
+ *            /   \
+ *          Hint1 t4
+ *          /
+ *        Join
+ *        / \
+ *      Join t2
+ *      / \
+ *    t1  t3
+ *
+ * "Hint1" is applied on "(t1 JOIN t3) JOIN t2" as it is equivalent to the original hinted node,
+ * "(t1 JOIN t2) JOIN t3"; while "Hint2" has disappeared from the new plan since there is no
+ * equivalent node to "t1 JOIN t2".
  */
 object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
 
@@ -40,24 +74,30 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
     if (!conf.cboEnabled || !conf.joinReorderEnabled) {
       plan
     } else {
+      // Use a map to track the hints on the join items.
+      val hintMap = new mutable.HashMap[AttributeSet, HintInfo]
       val result = plan transformDown {
         // Start reordering with a joinable item, which is an InnerLike join with conditions.
-        case j @ Join(_, _, _: InnerLike, Some(cond)) =>
-          reorder(j, j.output)
-        case p @ Project(projectList, Join(_, _, _: InnerLike, Some(cond)))
+        case j @ Join(_, _, _: InnerLike, Some(cond), _) =>
+          reorder(j, j.output, hintMap)
+        case p @ Project(projectList, Join(_, _, _: InnerLike, Some(cond), _))
           if projectList.forall(_.isInstanceOf[Attribute]) =>
-          reorder(p, p.output)
+          reorder(p, p.output, hintMap)
       }
-
-      // After reordering is finished, convert OrderedJoin back to Join
-      result transformDown {
-        case OrderedJoin(left, right, jt, cond) => Join(left, right, jt, cond)
+      // After reordering is finished, convert OrderedJoin back to Join.
+      result transform {
+        case OrderedJoin(left, right, jt, cond) =>
+          val joinHint = JoinHint(hintMap.get(left.outputSet), hintMap.get(right.outputSet))
+          Join(left, right, jt, cond, joinHint)
       }
     }
   }
 
-  private def reorder(plan: LogicalPlan, output: Seq[Attribute]): LogicalPlan = {
-    val (items, conditions) = extractInnerJoins(plan)
+  private def reorder(
+      plan: LogicalPlan,
+      output: Seq[Attribute],
+      hintMap: mutable.HashMap[AttributeSet, HintInfo]): LogicalPlan = {
+    val (items, conditions) = extractInnerJoins(plan, hintMap)
     val result =
       // Do reordering if the number of items is appropriate and join conditions exist.
       // We also need to check if costs of all items can be evaluated.
@@ -75,27 +115,31 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
    * Extracts items of consecutive inner joins and join conditions.
    * This method works for bushy trees and left/right deep trees.
    */
-  private def extractInnerJoins(plan: LogicalPlan): (Seq[LogicalPlan], Set[Expression]) = {
+  private def extractInnerJoins(
+      plan: LogicalPlan,
+      hintMap: mutable.HashMap[AttributeSet, HintInfo]): (Seq[LogicalPlan], Set[Expression]) = {
     plan match {
-      case Join(left, right, _: InnerLike, Some(cond)) =>
-        val (leftPlans, leftConditions) = extractInnerJoins(left)
-        val (rightPlans, rightConditions) = extractInnerJoins(right)
+      case Join(left, right, _: InnerLike, Some(cond), hint) =>
+        hint.leftHint.foreach(hintMap.put(left.outputSet, _))
+        hint.rightHint.foreach(hintMap.put(right.outputSet, _))
+        val (leftPlans, leftConditions) = extractInnerJoins(left, hintMap)
+        val (rightPlans, rightConditions) = extractInnerJoins(right, hintMap)
         (leftPlans ++ rightPlans, splitConjunctivePredicates(cond).toSet ++
           leftConditions ++ rightConditions)
-      case Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond)))
+      case Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), _))
         if projectList.forall(_.isInstanceOf[Attribute]) =>
-        extractInnerJoins(j)
+        extractInnerJoins(j, hintMap)
       case _ =>
         (Seq(plan), Set())
     }
   }
 
   private def replaceWithOrderedJoin(plan: LogicalPlan): LogicalPlan = plan match {
-    case j @ Join(left, right, jt: InnerLike, Some(cond)) =>
+    case j @ Join(left, right, jt: InnerLike, Some(cond), _) =>
       val replacedLeft = replaceWithOrderedJoin(left)
       val replacedRight = replaceWithOrderedJoin(right)
       OrderedJoin(replacedLeft, replacedRight, jt, Some(cond))
-    case p @ Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond))) =>
+    case p @ Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), _)) =>
       p.copy(child = replaceWithOrderedJoin(j))
     case _ =>
       plan
@@ -295,7 +339,7 @@ object JoinReorderDP extends PredicateHelper with Logging {
     } else {
       (otherPlan, onePlan)
     }
-    val newJoin = Join(left, right, Inner, joinConds.reduceOption(And))
+    val newJoin = Join(left, right, Inner, joinConds.reduceOption(And), JoinHint.NONE)
     val collectedJoinConds = joinConds ++ oneJoinPlan.joinConds ++ otherJoinPlan.joinConds
     val remainingConds = conditions -- collectedJoinConds
     val neededAttr = AttributeSet(remainingConds.flatMap(_.references)) ++ topOutput
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
new file mode 100644
index 000000000000..bbe4eee4b432
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * Replaces [[ResolvedHint]] operators from the plan. Move the [[HintInfo]] to associated [[Join]]
+ * operators, otherwise remove it if no [[Join]] operator is matched.
+ */
+object EliminateResolvedHint extends Rule[LogicalPlan] {
+  // This is also called in the beginning of the optimization phase, and as a result
+  // is using transformUp rather than resolveOperators.
+  def apply(plan: LogicalPlan): LogicalPlan = {
+    val pulledUp = plan transformUp {
+      case j: Join =>
+        val leftHint = mergeHints(collectHints(j.left))
+        val rightHint = mergeHints(collectHints(j.right))
+        j.copy(hint = JoinHint(leftHint, rightHint))
+    }
+    pulledUp.transform {
+      case h: ResolvedHint => h.child
+    }
+  }
+
+  private def mergeHints(hints: Seq[HintInfo]): Option[HintInfo] = {
+    hints.reduceOption((h1, h2) => HintInfo(
+      broadcast = h1.broadcast || h2.broadcast))
+  }
+
+  private def collectHints(plan: LogicalPlan): Seq[HintInfo] = {
+    plan match {
+      case h: ResolvedHint => collectHints(h.child) :+ h.hints
+      case u: UnaryNode => collectHints(u.child)
+      // TODO revisit this logic:
+      // except and intersect are semi/anti-joins which won't return more data then
+      // their left argument, so the broadcast hint should be propagated here
+      case i: Intersect => collectHints(i.left)
+      case e: Except => collectHints(e.left)
+      case _ => Seq.empty
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 44d554311490..06f908281dd3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -115,6 +115,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
     // However, because we also use the analyzer to canonicalized queries (for view definition),
     // we do not eliminate subqueries or compute current time in the analyzer.
     Batch("Finish Analysis", Once,
+      EliminateResolvedHint,
       EliminateSubqueryAliases,
       EliminateView,
       ReplaceExpressions,
@@ -192,6 +193,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
    */
   def nonExcludableRules: Seq[String] =
     EliminateDistinct.ruleName ::
+      EliminateResolvedHint.ruleName ::
       EliminateSubqueryAliases.ruleName ::
       EliminateView.ruleName ::
       ReplaceExpressions.ruleName ::
@@ -356,7 +358,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
       // not allowed to use the same attributes. We use a blacklist to prevent us from creating a
       // situation in which this happens; the rule will only remove an alias if its child
       // attribute is not on the black list.
-      case Join(left, right, joinType, condition) =>
+      case Join(left, right, joinType, condition, hint) =>
         val newLeft = removeRedundantAliases(left, blacklist ++ right.outputSet)
         val newRight = removeRedundantAliases(right, blacklist ++ newLeft.outputSet)
         val mapping = AttributeMap(
@@ -365,7 +367,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
         val newCondition = condition.map(_.transform {
           case a: Attribute => mapping.getOrElse(a, a)
         })
-        Join(newLeft, newRight, joinType, newCondition)
+        Join(newLeft, newRight, joinType, newCondition, hint)
 
       case _ =>
         // Remove redundant aliases in the subtree(s).
@@ -460,7 +462,7 @@ object LimitPushDown extends Rule[LogicalPlan] {
     // on both sides if it is applied multiple times. Therefore:
     //   - If one side is already limited, stack another limit on top if the new limit is smaller.
     //     The redundant limit will be collapsed by the CombineLimits rule.
-    case LocalLimit(exp, join @ Join(left, right, joinType, _)) =>
+    case LocalLimit(exp, join @ Join(left, right, joinType, _, _)) =>
       val newJoin = joinType match {
         case RightOuter => join.copy(right = maybePushLocalLimit(exp, right))
         case LeftOuter => join.copy(left = maybePushLocalLimit(exp, left))
@@ -578,7 +580,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
       p.copy(child = g.copy(child = newChild, unrequiredChildIndex = unrequiredIndices))
 
     // Eliminate unneeded attributes from right side of a Left Existence Join.
-    case j @ Join(_, right, LeftExistence(_), _) =>
+    case j @ Join(_, right, LeftExistence(_), _, _) =>
       j.copy(right = prunedChild(right, j.references))
 
     // all the columns will be used to compare, so we can't prune them
@@ -792,7 +794,7 @@ object InferFiltersFromConstraints extends Rule[LogicalPlan]
         filter
       }
 
-    case join @ Join(left, right, joinType, conditionOpt) =>
+    case join @ Join(left, right, joinType, conditionOpt, _) =>
       joinType match {
         // For inner join, we can infer additional filters for both sides. LeftSemi is kind of an
         // inner join, it just drops the right side in the final output.
@@ -919,7 +921,6 @@ object RemoveRedundantSorts extends Rule[LogicalPlan] {
   def canEliminateSort(plan: LogicalPlan): Boolean = plan match {
     case p: Project => p.projectList.forall(_.deterministic)
     case f: Filter => f.condition.deterministic
-    case _: ResolvedHint => true
     case _ => false
   }
 }
@@ -1094,7 +1095,6 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
     // Note that some operators (e.g. project, aggregate, union) are being handled separately
     // (earlier in this rule).
     case _: AppendColumns => true
-    case _: ResolvedHint => true
     case _: Distinct => true
     case _: Generate => true
     case _: Pivot => true
@@ -1179,7 +1179,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     // push the where condition down into join filter
-    case f @ Filter(filterCondition, Join(left, right, joinType, joinCondition)) =>
+    case f @ Filter(filterCondition, Join(left, right, joinType, joinCondition, hint)) =>
       val (leftFilterConditions, rightFilterConditions, commonFilterCondition) =
         split(splitConjunctivePredicates(filterCondition), left, right)
       joinType match {
@@ -1193,7 +1193,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
             commonFilterCondition.partition(canEvaluateWithinJoin)
           val newJoinCond = (newJoinConditions ++ joinCondition).reduceLeftOption(And)
 
-          val join = Join(newLeft, newRight, joinType, newJoinCond)
+          val join = Join(newLeft, newRight, joinType, newJoinCond, hint)
           if (others.nonEmpty) {
             Filter(others.reduceLeft(And), join)
           } else {
@@ -1205,7 +1205,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
           val newRight = rightFilterConditions.
             reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
           val newJoinCond = joinCondition
-          val newJoin = Join(newLeft, newRight, RightOuter, newJoinCond)
+          val newJoin = Join(newLeft, newRight, RightOuter, newJoinCond, hint)
 
           (leftFilterConditions ++ commonFilterCondition).
             reduceLeftOption(And).map(Filter(_, newJoin)).getOrElse(newJoin)
@@ -1215,7 +1215,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
             reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
           val newRight = right
           val newJoinCond = joinCondition
-          val newJoin = Join(newLeft, newRight, joinType, newJoinCond)
+          val newJoin = Join(newLeft, newRight, joinType, newJoinCond, hint)
 
           (rightFilterConditions ++ commonFilterCondition).
             reduceLeftOption(And).map(Filter(_, newJoin)).getOrElse(newJoin)
@@ -1225,7 +1225,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
       }
 
     // push down the join filter into sub query scanning if applicable
-    case j @ Join(left, right, joinType, joinCondition) =>
+    case j @ Join(left, right, joinType, joinCondition, hint) =>
       val (leftJoinConditions, rightJoinConditions, commonJoinCondition) =
         split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right)
 
@@ -1238,7 +1238,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
             reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
           val newJoinCond = commonJoinCondition.reduceLeftOption(And)
 
-          Join(newLeft, newRight, joinType, newJoinCond)
+          Join(newLeft, newRight, joinType, newJoinCond, hint)
         case RightOuter =>
           // push down the left side only join filter for left side sub query
           val newLeft = leftJoinConditions.
@@ -1246,7 +1246,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
           val newRight = right
           val newJoinCond = (rightJoinConditions ++ commonJoinCondition).reduceLeftOption(And)
 
-          Join(newLeft, newRight, RightOuter, newJoinCond)
+          Join(newLeft, newRight, RightOuter, newJoinCond, hint)
         case LeftOuter | LeftAnti | ExistenceJoin(_) =>
           // push down the right side only join filter for right sub query
           val newLeft = left
@@ -1254,7 +1254,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
             reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
           val newJoinCond = (leftJoinConditions ++ commonJoinCondition).reduceLeftOption(And)
 
-          Join(newLeft, newRight, joinType, newJoinCond)
+          Join(newLeft, newRight, joinType, newJoinCond, hint)
         case FullOuter => j
         case NaturalJoin(_) => sys.error("Untransformed NaturalJoin node")
         case UsingJoin(_, _) => sys.error("Untransformed Using join node")
@@ -1310,7 +1310,7 @@ object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper {
     if (SQLConf.get.crossJoinEnabled) {
       plan
     } else plan transform {
-      case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, _)
+      case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, _, _)
         if isCartesianProduct(j) =>
           throw new AnalysisException(
             s"""Detected implicit cartesian product for ${j.joinType.sql} join between logical plans
@@ -1449,7 +1449,7 @@ object ReplaceIntersectWithSemiJoin extends Rule[LogicalPlan] {
     case Intersect(left, right, false) =>
       assert(left.output.size == right.output.size)
       val joinCond = left.output.zip(right.output).map { case (l, r) => EqualNullSafe(l, r) }
-      Distinct(Join(left, right, LeftSemi, joinCond.reduceLeftOption(And)))
+      Distinct(Join(left, right, LeftSemi, joinCond.reduceLeftOption(And), JoinHint.NONE))
   }
 }
 
@@ -1470,7 +1470,7 @@ object ReplaceExceptWithAntiJoin extends Rule[LogicalPlan] {
     case Except(left, right, false) =>
       assert(left.output.size == right.output.size)
       val joinCond = left.output.zip(right.output).map { case (l, r) => EqualNullSafe(l, r) }
-      Distinct(Join(left, right, LeftAnti, joinCond.reduceLeftOption(And)))
+      Distinct(Join(left, right, LeftAnti, joinCond.reduceLeftOption(And), JoinHint.NONE))
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index c3fdb924243d..b19e13870aa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -56,7 +56,7 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper wit
     // Joins on empty LocalRelations generated from streaming sources are not eliminated
     // as stateful streaming joins need to perform other state management operations other than
     // just processing the input data.
-    case p @ Join(_, _, joinType, _)
+    case p @ Join(_, _, joinType, _, _)
         if !p.children.exists(_.isStreaming) =>
       val isLeftEmpty = isEmptyLocalRelation(p.left)
       val isRightEmpty = isEmptyLocalRelation(p.right)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index 72a60f692ac7..689915a98534 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -52,7 +52,7 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case f @ Filter(cond, _) => f.copy(condition = replaceNullWithFalse(cond))
-    case j @ Join(_, _, _, Some(cond)) => j.copy(condition = Some(replaceNullWithFalse(cond)))
+    case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(replaceNullWithFalse(cond)))
     case p: LogicalPlan => p transformExpressions {
       case i @ If(pred, _, _) => i.copy(predicate = replaceNullWithFalse(pred))
       case cw @ CaseWhen(branches, _) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 468a950fb108..39709529c00d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -600,7 +600,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
         // propagating the foldable expressions.
         // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes
         // of outer join.
-        case j @ Join(left, right, joinType, _) if foldableMap.nonEmpty =>
+        case j @ Join(left, right, joinType, _, _) if foldableMap.nonEmpty =>
           val newJoin = j.transformExpressions(replaceFoldable)
           val missDerivedAttrsSet: AttributeSet = AttributeSet(joinType match {
             case _: InnerLike | LeftExistence(_) => Nil
@@ -648,7 +648,6 @@ object FoldablePropagation extends Rule[LogicalPlan] {
     case _: Distinct => true
     case _: AppendColumns => true
     case _: AppendColumnsWithObject => true
-    case _: ResolvedHint => true
     case _: RepartitionByExpression => true
     case _: Repartition => true
     case _: Sort => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index 0b6471289a47..82aefca8a1af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -43,10 +43,13 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
    *
    * @param input a list of LogicalPlans to inner join and the type of inner join.
    * @param conditions a list of condition for join.
+   * @param hintMap a map of relation output attribute sets to their corresponding hints.
    */
   @tailrec
-  final def createOrderedJoin(input: Seq[(LogicalPlan, InnerLike)], conditions: Seq[Expression])
-    : LogicalPlan = {
+  final def createOrderedJoin(
+      input: Seq[(LogicalPlan, InnerLike)],
+      conditions: Seq[Expression],
+      hintMap: Map[AttributeSet, HintInfo]): LogicalPlan = {
     assert(input.size >= 2)
     if (input.size == 2) {
       val (joinConditions, others) = conditions.partition(canEvaluateWithinJoin)
@@ -55,7 +58,8 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
         case (Inner, Inner) => Inner
         case (_, _) => Cross
       }
-      val join = Join(left, right, innerJoinType, joinConditions.reduceLeftOption(And))
+      val join = Join(left, right, innerJoinType, joinConditions.reduceLeftOption(And),
+        JoinHint(hintMap.get(left.outputSet), hintMap.get(right.outputSet)))
       if (others.nonEmpty) {
         Filter(others.reduceLeft(And), join)
       } else {
@@ -78,26 +82,27 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
       val joinedRefs = left.outputSet ++ right.outputSet
       val (joinConditions, others) = conditions.partition(
         e => e.references.subsetOf(joinedRefs) && canEvaluateWithinJoin(e))
-      val joined = Join(left, right, innerJoinType, joinConditions.reduceLeftOption(And))
+      val joined = Join(left, right, innerJoinType, joinConditions.reduceLeftOption(And),
+        JoinHint(hintMap.get(left.outputSet), hintMap.get(right.outputSet)))
 
       // should not have reference to same logical plan
-      createOrderedJoin(Seq((joined, Inner)) ++ rest.filterNot(_._1 eq right), others)
+      createOrderedJoin(Seq((joined, Inner)) ++ rest.filterNot(_._1 eq right), others, hintMap)
     }
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case p @ ExtractFiltersAndInnerJoins(input, conditions)
+    case p @ ExtractFiltersAndInnerJoins(input, conditions, hintMap)
         if input.size > 2 && conditions.nonEmpty =>
       val reordered = if (SQLConf.get.starSchemaDetection && !SQLConf.get.cboEnabled) {
         val starJoinPlan = StarSchemaDetection.reorderStarJoins(input, conditions)
         if (starJoinPlan.nonEmpty) {
           val rest = input.filterNot(starJoinPlan.contains(_))
-          createOrderedJoin(starJoinPlan ++ rest, conditions)
+          createOrderedJoin(starJoinPlan ++ rest, conditions, hintMap)
         } else {
-          createOrderedJoin(input, conditions)
+          createOrderedJoin(input, conditions, hintMap)
         }
       } else {
-        createOrderedJoin(input, conditions)
+        createOrderedJoin(input, conditions, hintMap)
       }
 
       if (p.sameOutput(reordered)) {
@@ -156,7 +161,7 @@ object EliminateOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case f @ Filter(condition, j @ Join(_, _, RightOuter | LeftOuter | FullOuter, _)) =>
+    case f @ Filter(condition, j @ Join(_, _, RightOuter | LeftOuter | FullOuter, _, _)) =>
       val newJoinType = buildNewJoinType(f, j)
       if (j.joinType == newJoinType) f else Filter(condition, j.copy(joinType = newJoinType))
   }
@@ -176,7 +181,7 @@ object PullOutPythonUDFInJoinCondition extends Rule[LogicalPlan] with PredicateH
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-    case j @ Join(_, _, joinType, Some(cond)) if hasUnevaluablePythonUDF(cond, j) =>
+    case j @ Join(_, _, joinType, Some(cond), _) if hasUnevaluablePythonUDF(cond, j) =>
       if (!joinType.isInstanceOf[InnerLike] && joinType != LeftSemi) {
         // The current strategy only support InnerLike and LeftSemi join because for other type,
         // it breaks SQL semantic if we run the join condition as a filter after join. If we pass
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 34840c6c977a..e78ed1c3c5d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -51,7 +51,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
       condition: Option[Expression]): Join = {
     // Deduplicate conflicting attributes if any.
     val dedupSubplan = dedupSubqueryOnSelfJoin(outerPlan, subplan, None, condition)
-    Join(outerPlan, dedupSubplan, joinType, condition)
+    Join(outerPlan, dedupSubplan, joinType, condition, JoinHint.NONE)
   }
 
   private def dedupSubqueryOnSelfJoin(
@@ -116,7 +116,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
           val newSub = dedupSubqueryOnSelfJoin(p, sub, Some(values))
           val inConditions = values.zip(newSub.output).map(EqualTo.tupled)
           val (joinCond, outerPlan) = rewriteExistentialExpr(inConditions ++ conditions, p)
-          Join(outerPlan, newSub, LeftSemi, joinCond)
+          Join(outerPlan, newSub, LeftSemi, joinCond, JoinHint.NONE)
         case (p, Not(InSubquery(values, ListQuery(sub, conditions, _, _)))) =>
           // This is a NULL-aware (left) anti join (NAAJ) e.g. col NOT IN expr
           // Construct the condition. A NULL in one of the conditions is regarded as a positive
@@ -142,7 +142,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
           // will have the final conditions in the LEFT ANTI as
           // (A.A1 = B.B1 OR ISNULL(A.A1 = B.B1)) AND (B.B2 = A.A2) AND B.B3 > 1
           val finalJoinCond = (nullAwareJoinConds ++ conditions).reduceLeft(And)
-          Join(outerPlan, newSub, LeftAnti, Option(finalJoinCond))
+          Join(outerPlan, newSub, LeftAnti, Option(finalJoinCond), JoinHint.NONE)
         case (p, predicate) =>
           val (newCond, inputPlan) = rewriteExistentialExpr(Seq(predicate), p)
           Project(p.output, Filter(newCond.get, inputPlan))
@@ -172,7 +172,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
           val newSub = dedupSubqueryOnSelfJoin(newPlan, sub, Some(values))
           val inConditions = values.zip(newSub.output).map(EqualTo.tupled)
           val newConditions = (inConditions ++ conditions).reduceLeftOption(And)
-          newPlan = Join(newPlan, newSub, ExistenceJoin(exists), newConditions)
+          newPlan = Join(newPlan, newSub, ExistenceJoin(exists), newConditions, JoinHint.NONE)
           exists
       }
     }
@@ -450,7 +450,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
           // CASE 1: Subquery guaranteed not to have the COUNT bug
           Project(
             currentChild.output :+ origOutput,
-            Join(currentChild, query, LeftOuter, conditions.reduceOption(And)))
+            Join(currentChild, query, LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
         } else {
           // Subquery might have the COUNT bug. Add appropriate corrections.
           val (topPart, havingNode, aggNode) = splitSubquery(query)
@@ -477,7 +477,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
                     aggValRef), origOutput.name)(exprId = origOutput.exprId),
               Join(currentChild,
                 Project(query.output :+ alwaysTrueExpr, query),
-                LeftOuter, conditions.reduceOption(And)))
+                LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
 
           } else {
             // CASE 3: Subquery with HAVING clause. Pull the HAVING clause above the join.
@@ -507,7 +507,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
               currentChild.output :+ caseExpr,
               Join(currentChild,
                 Project(subqueryRoot.output :+ alwaysTrueExpr, subqueryRoot),
-                LeftOuter, conditions.reduceOption(And)))
+                LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
 
           }
         }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 8959f78b656d..a27c6d3c3671 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -515,7 +515,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitFromClause(ctx: FromClauseContext): LogicalPlan = withOrigin(ctx) {
     val from = ctx.relation.asScala.foldLeft(null: LogicalPlan) { (left, relation) =>
       val right = plan(relation.relationPrimary)
-      val join = right.optionalMap(left)(Join(_, _, Inner, None))
+      val join = right.optionalMap(left)(Join(_, _, Inner, None, JoinHint.NONE))
       withJoinRelations(join, relation)
     }
     if (ctx.pivotClause() != null) {
@@ -727,7 +727,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
           case None =>
             (baseJoinType, None)
         }
-        Join(left, plan(join.right), joinType, condition)
+        Join(left, plan(join.right), joinType, condition, JoinHint.NONE)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index 84be677e438a..dfc3b2d22129 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.planning
 
+import scala.collection.mutable
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
@@ -98,12 +100,13 @@ object PhysicalOperation extends PredicateHelper {
  * value).
  */
 object ExtractEquiJoinKeys extends Logging with PredicateHelper {
-  /** (joinType, leftKeys, rightKeys, condition, leftChild, rightChild) */
+  /** (joinType, leftKeys, rightKeys, condition, leftChild, rightChild, joinHint) */
   type ReturnType =
-    (JoinType, Seq[Expression], Seq[Expression], Option[Expression], LogicalPlan, LogicalPlan)
+    (JoinType, Seq[Expression], Seq[Expression],
+      Option[Expression], LogicalPlan, LogicalPlan, JoinHint)
 
   def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
-    case join @ Join(left, right, joinType, condition) =>
+    case join @ Join(left, right, joinType, condition, hint) =>
       logDebug(s"Considering join on: $condition")
       // Find equi-join predicates that can be evaluated before the join, and thus can be used
       // as join keys.
@@ -133,7 +136,7 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
       if (joinKeys.nonEmpty) {
         val (leftKeys, rightKeys) = joinKeys.unzip
         logDebug(s"leftKeys:$leftKeys | rightKeys:$rightKeys")
-        Some((joinType, leftKeys, rightKeys, otherPredicates.reduceOption(And), left, right))
+        Some((joinType, leftKeys, rightKeys, otherPredicates.reduceOption(And), left, right, hint))
       } else {
         None
       }
@@ -164,25 +167,35 @@ object ExtractFiltersAndInnerJoins extends PredicateHelper {
    * was involved in an explicit cross join. Also returns the entire list of join conditions for
    * the left-deep tree.
    */
-  def flattenJoin(plan: LogicalPlan, parentJoinType: InnerLike = Inner)
+  def flattenJoin(
+      plan: LogicalPlan,
+      hintMap: mutable.HashMap[AttributeSet, HintInfo],
+      parentJoinType: InnerLike = Inner)
       : (Seq[(LogicalPlan, InnerLike)], Seq[Expression]) = plan match {
-    case Join(left, right, joinType: InnerLike, cond) =>
-      val (plans, conditions) = flattenJoin(left, joinType)
+    case Join(left, right, joinType: InnerLike, cond, hint) =>
+      val (plans, conditions) = flattenJoin(left, hintMap, joinType)
+      hint.leftHint.map(hintMap.put(left.outputSet, _))
+      hint.rightHint.map(hintMap.put(right.outputSet, _))
       (plans ++ Seq((right, joinType)), conditions ++
         cond.toSeq.flatMap(splitConjunctivePredicates))
-    case Filter(filterCondition, j @ Join(left, right, _: InnerLike, joinCondition)) =>
-      val (plans, conditions) = flattenJoin(j)
+    case Filter(filterCondition, j @ Join(_, _, _: InnerLike, _, _)) =>
+      val (plans, conditions) = flattenJoin(j, hintMap)
       (plans, conditions ++ splitConjunctivePredicates(filterCondition))
 
     case _ => (Seq((plan, parentJoinType)), Seq.empty)
   }
 
-  def unapply(plan: LogicalPlan): Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])]
+  def unapply(plan: LogicalPlan)
+      : Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression], Map[AttributeSet, HintInfo])]
       = plan match {
-    case f @ Filter(filterCondition, j @ Join(_, _, joinType: InnerLike, _)) =>
-      Some(flattenJoin(f))
-    case j @ Join(_, _, joinType, _) =>
-      Some(flattenJoin(j))
+    case f @ Filter(filterCondition, j @ Join(_, _, joinType: InnerLike, _, _)) =>
+      val hintMap = new mutable.HashMap[AttributeSet, HintInfo]
+      val flattened = flattenJoin(f, hintMap)
+      Some((flattened._1, flattened._2, hintMap.toMap))
+    case j @ Join(_, _, joinType, _, _) =>
+      val hintMap = new mutable.HashMap[AttributeSet, HintInfo]
+      val flattened = flattenJoin(j, hintMap)
+      Some((flattened._1, flattened._2, hintMap.toMap))
     case _ => None
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
index 2c248d74869c..18baced8f3d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
@@ -37,7 +37,6 @@ trait LogicalPlanVisitor[T] {
     case p: Project => visitProject(p)
     case p: Repartition => visitRepartition(p)
     case p: RepartitionByExpression => visitRepartitionByExpr(p)
-    case p: ResolvedHint => visitHint(p)
     case p: Sample => visitSample(p)
     case p: ScriptTransformation => visitScriptTransform(p)
     case p: Union => visitUnion(p)
@@ -61,8 +60,6 @@ trait LogicalPlanVisitor[T] {
 
   def visitGlobalLimit(p: GlobalLimit): T
 
-  def visitHint(p: ResolvedHint): T
-
   def visitIntersect(p: Intersect): T
 
   def visitJoin(p: Join): T
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index b3a48860aa63..5a388117a6c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -52,13 +52,11 @@ import org.apache.spark.util.Utils
  *                    defaults to the product of children's `sizeInBytes`.
  * @param rowCount Estimated number of rows.
  * @param attributeStats Statistics for Attributes.
- * @param hints Query hints.
  */
 case class Statistics(
     sizeInBytes: BigInt,
     rowCount: Option[BigInt] = None,
-    attributeStats: AttributeMap[ColumnStat] = AttributeMap(Nil),
-    hints: HintInfo = HintInfo()) {
+    attributeStats: AttributeMap[ColumnStat] = AttributeMap(Nil)) {
 
   override def toString: String = "Statistics(" + simpleString + ")"
 
@@ -70,8 +68,7 @@ case class Statistics(
         s"rowCount=${BigDecimal(rowCount.get, new MathContext(3, RoundingMode.HALF_UP)).toString()}"
       } else {
         ""
-      },
-      s"hints=$hints"
+      }
     ).filter(_.nonEmpty).mkString(", ")
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index d8b3a4af4f7b..639d68f4ecd7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -288,7 +288,8 @@ case class Join(
     left: LogicalPlan,
     right: LogicalPlan,
     joinType: JoinType,
-    condition: Option[Expression])
+    condition: Option[Expression],
+    hint: JoinHint)
   extends BinaryNode with PredicateHelper {
 
   override def output: Seq[Attribute] = {
@@ -350,6 +351,17 @@ case class Join(
     case UsingJoin(_, _) => false
     case _ => resolvedExceptNatural
   }
+
+  // Ignore hint for canonicalization
+  protected override def doCanonicalize(): LogicalPlan =
+    super.doCanonicalize().asInstanceOf[Join].copy(hint = JoinHint.NONE)
+
+  // Do not include an empty join hint in string description
+  protected override def stringArgs: Iterator[Any] = super.stringArgs.filter { e =>
+    (!e.isInstanceOf[JoinHint]
+      || e.asInstanceOf[JoinHint].leftHint.isDefined
+      || e.asInstanceOf[JoinHint].rightHint.isDefined)
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index cbb626590d1d..b2ba725e9d44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -35,6 +35,7 @@ case class UnresolvedHint(name: String, parameters: Seq[Any], child: LogicalPlan
 
 /**
  * A resolved hint node. The analyzer should convert all [[UnresolvedHint]] into [[ResolvedHint]].
+ * This node will be eliminated before optimization starts.
  */
 case class ResolvedHint(child: LogicalPlan, hints: HintInfo = HintInfo())
   extends UnaryNode {
@@ -44,11 +45,31 @@ case class ResolvedHint(child: LogicalPlan, hints: HintInfo = HintInfo())
   override def doCanonicalize(): LogicalPlan = child.canonicalized
 }
 
+/**
+ * Hint that is associated with a [[Join]] node, with [[HintInfo]] on its left child and on its
+ * right child respectively.
+ */
+case class JoinHint(leftHint: Option[HintInfo], rightHint: Option[HintInfo]) {
 
-case class HintInfo(broadcast: Boolean = false) {
+  override def toString: String = {
+    Seq(
+      leftHint.map("leftHint=" + _),
+      rightHint.map("rightHint=" + _))
+      .filter(_.isDefined).map(_.get).mkString(", ")
+  }
+}
 
-  /** Must be called when computing stats for a join operator to reset hints. */
-  def resetForJoin(): HintInfo = copy(broadcast = false)
+object JoinHint {
+  val NONE = JoinHint(None, None)
+}
+
+/**
+ * The hint attributes to be applied on a specific node.
+ *
+ * @param broadcast If set to true, it indicates that the broadcast hash join is the preferred join
+ *                  strategy and the node with this hint is preferred to be the build side.
+ */
+case class HintInfo(broadcast: Boolean = false) {
 
   override def toString: String = {
     val hints = scala.collection.mutable.ArrayBuffer.empty[String]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
index 111c594a53e5..eb56ab43ea9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
@@ -56,8 +56,7 @@ object AggregateEstimation {
       Some(Statistics(
         sizeInBytes = getOutputSize(agg.output, outputRows, outputAttrStats),
         rowCount = Some(outputRows),
-        attributeStats = outputAttrStats,
-        hints = childStats.hints))
+        attributeStats = outputAttrStats))
     } else {
       None
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
index b6c16079d198..b8c652dc8f12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
@@ -47,8 +47,6 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
 
   override def visitGlobalLimit(p: GlobalLimit): Statistics = fallback(p)
 
-  override def visitHint(p: ResolvedHint): Statistics = fallback(p)
-
   override def visitIntersect(p: Intersect): Statistics = fallback(p)
 
   override def visitJoin(p: Join): Statistics = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
index 2543e38a92c0..19a0d1279cc3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
@@ -56,7 +56,7 @@ case class JoinEstimation(join: Join) extends Logging {
     case _ if !rowCountsExist(join.left, join.right) =>
       None
 
-    case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, _) =>
+    case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, _, _) =>
       // 1. Compute join selectivity
       val joinKeyPairs = extractJoinKeysWithColStats(leftKeys, rightKeys)
       val (numInnerJoinedRows, keyStatsAfterJoin) = computeCardinalityAndStats(joinKeyPairs)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
index ee43f9126386..da36db7ae1f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
@@ -44,7 +44,7 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
     }
 
     // Don't propagate rowCount and attributeStats, since they are not estimated here.
-    Statistics(sizeInBytes = sizeInBytes, hints = p.child.stats.hints)
+    Statistics(sizeInBytes = sizeInBytes)
   }
 
   /**
@@ -60,8 +60,7 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
     if (p.groupingExpressions.isEmpty) {
       Statistics(
         sizeInBytes = EstimationUtils.getOutputSize(p.output, outputRowCount = 1),
-        rowCount = Some(1),
-        hints = p.child.stats.hints)
+        rowCount = Some(1))
     } else {
       visitUnaryNode(p)
     }
@@ -87,19 +86,15 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
     // Don't propagate column stats, because we don't know the distribution after limit
     Statistics(
       sizeInBytes = EstimationUtils.getOutputSize(p.output, rowCount, childStats.attributeStats),
-      rowCount = Some(rowCount),
-      hints = childStats.hints)
+      rowCount = Some(rowCount))
   }
 
-  override def visitHint(p: ResolvedHint): Statistics = p.child.stats.copy(hints = p.hints)
-
   override def visitIntersect(p: Intersect): Statistics = {
     val leftSize = p.left.stats.sizeInBytes
     val rightSize = p.right.stats.sizeInBytes
     val sizeInBytes = if (leftSize < rightSize) leftSize else rightSize
     Statistics(
-      sizeInBytes = sizeInBytes,
-      hints = p.left.stats.hints.resetForJoin())
+      sizeInBytes = sizeInBytes)
   }
 
   override def visitJoin(p: Join): Statistics = {
@@ -108,10 +103,7 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
         // LeftSemi and LeftAnti won't ever be bigger than left
         p.left.stats
       case _ =>
-        // Make sure we don't propagate isBroadcastable in other joins, because
-        // they could explode the size.
-        val stats = default(p)
-        stats.copy(hints = stats.hints.resetForJoin())
+        default(p)
     }
   }
 
@@ -121,7 +113,7 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
     if (limit == 0) {
       // sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero
       // (product of children).
-      Statistics(sizeInBytes = 1, rowCount = Some(0), hints = childStats.hints)
+      Statistics(sizeInBytes = 1, rowCount = Some(0))
     } else {
       // The output row count of LocalLimit should be the sum of row counts from each partition.
       // However, since the number of partitions is not available here, we just use statistics of
@@ -147,7 +139,7 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
     }
     val sampleRows = p.child.stats.rowCount.map(c => EstimationUtils.ceil(BigDecimal(c) * ratio))
     // Don't propagate column stats, because we don't know the distribution after a sample operation
-    Statistics(sizeInBytes, sampleRows, hints = p.child.stats.hints)
+    Statistics(sizeInBytes, sampleRows)
   }
 
   override def visitScriptTransform(p: ScriptTransformation): Statistics = default(p)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 117e96175e92..129ce3b1105e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -443,7 +443,7 @@ class AnalysisErrorSuite extends AnalysisTest {
   }
 
   test("error test for self-join") {
-    val join = Join(testRelation, testRelation, Cross, None)
+    val join = Join(testRelation, testRelation, Cross, None, JoinHint.NONE)
     val error = intercept[AnalysisException] {
       SimpleAnalyzer.checkAnalysis(join)
     }
@@ -565,7 +565,8 @@ class AnalysisErrorSuite extends AnalysisTest {
           LocalRelation(b),
           Filter(EqualTo(UnresolvedAttribute("a"), c), LocalRelation(c)),
           LeftOuter,
-          Option(EqualTo(b, c)))),
+          Option(EqualTo(b, c)),
+          JoinHint.NONE)),
       LocalRelation(a))
     assertAnalysisError(plan1, "Accessing outer query column is not allowed in" :: Nil)
 
@@ -575,7 +576,8 @@ class AnalysisErrorSuite extends AnalysisTest {
           Filter(EqualTo(UnresolvedAttribute("a"), c), LocalRelation(c)),
           LocalRelation(b),
           RightOuter,
-          Option(EqualTo(b, c)))),
+          Option(EqualTo(b, c)),
+          JoinHint.NONE)),
       LocalRelation(a))
     assertAnalysisError(plan2, "Accessing outer query column is not allowed in" :: Nil)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index da3ae72c3682..982948483fa1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -397,7 +397,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         Join(
           Project(Seq($"x.key"), SubqueryAlias("x", input)),
           Project(Seq($"y.key"), SubqueryAlias("y", input)),
-          Cross, None))
+          Cross, None, JoinHint.NONE))
 
     assertAnalysisSuccess(query)
   }
@@ -578,7 +578,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       Seq(UnresolvedAttribute("a")), pythonUdf, output, project)
     val left = SubqueryAlias("temp0", flatMapGroupsInPandas)
     val right = SubqueryAlias("temp1", flatMapGroupsInPandas)
-    val join = Join(left, right, Inner, None)
+    val join = Join(left, right, Inner, None, JoinHint.NONE)
     assertAnalysisSuccess(
       Project(Seq(UnresolvedAttribute("temp0.a"), UnresolvedAttribute("temp1.a")), join))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
index bd66ee5355f4..563e8adf87ed 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
@@ -60,7 +60,7 @@ class ResolveHintsSuite extends AnalysisTest {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("table", "table2"), table("table").join(table("table2"))),
       Join(ResolvedHint(testRelation, HintInfo(broadcast = true)),
-        ResolvedHint(testRelation2, HintInfo(broadcast = true)), Inner, None),
+        ResolvedHint(testRelation2, HintInfo(broadcast = true)), Inner, None, JoinHint.NONE),
       caseSensitive = false)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 57195d5fda7c..0cd6e092e203 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -353,15 +353,15 @@ class ColumnPruningSuite extends PlanTest {
       Project(Seq($"x.key", $"y.key"),
         Join(
           SubqueryAlias("x", input),
-          ResolvedHint(SubqueryAlias("y", input)), Inner, None)).analyze
+          SubqueryAlias("y", input), Inner, None, JoinHint.NONE)).analyze
 
     val optimized = Optimize.execute(query)
 
     val expected =
       Join(
         Project(Seq($"x.key"), SubqueryAlias("x", input)),
-        ResolvedHint(Project(Seq($"y.key"), SubqueryAlias("y", input))),
-        Inner, None).analyze
+        Project(Seq($"y.key"), SubqueryAlias("y", input)),
+        Inner, None, JoinHint.NONE).analyze
 
     comparePlans(optimized, expected)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 82a10254d846..cf4e9fcea2c6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -822,19 +821,6 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("broadcast hint") {
-    val originalQuery = ResolvedHint(testRelation)
-      .where('a === 2L && 'b + Rand(10).as("rnd") === 3)
-
-    val optimized = Optimize.execute(originalQuery.analyze)
-
-    val correctAnswer = ResolvedHint(testRelation.where('a === 2L))
-      .where('b + Rand(10).as("rnd") === 3)
-      .analyze
-
-    comparePlans(optimized, correctAnswer)
-  }
-
   test("union") {
     val testRelation2 = LocalRelation('d.int, 'e.int, 'f.int)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index 6fe5e619d03a..9093d7fecb0f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -65,7 +65,8 @@ class JoinOptimizationSuite extends PlanTest {
 
     def testExtractCheckCross
         (plan: LogicalPlan, expected: Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])]) {
-      assert(ExtractFiltersAndInnerJoins.unapply(plan) === expected)
+      assert(
+        ExtractFiltersAndInnerJoins.unapply(plan) === expected.map(e => (e._1, e._2, Map.empty)))
     }
 
     testExtract(x, None)
@@ -124,29 +125,4 @@ class JoinOptimizationSuite extends PlanTest {
       comparePlans(optimized, queryAnswerPair._2.analyze)
     }
   }
-
-  test("broadcasthint sets relation statistics to smallest value") {
-    val input = LocalRelation('key.int, 'value.string)
-
-    val query =
-      Project(Seq($"x.key", $"y.key"),
-        Join(
-          SubqueryAlias("x", input),
-          ResolvedHint(SubqueryAlias("y", input)), Cross, None)).analyze
-
-    val optimized = Optimize.execute(query)
-
-    val expected =
-      Join(
-        Project(Seq($"x.key"), SubqueryAlias("x", input)),
-        ResolvedHint(Project(Seq($"y.key"), SubqueryAlias("y", input))),
-        Cross, None).analyze
-
-    comparePlans(optimized, expected)
-
-    val broadcastChildren = optimized.collect {
-      case Join(_, r, _, _) if r.stats.sizeInBytes == 1 => r
-    }
-    assert(broadcastChildren.size == 1)
-  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
index c94a8b9e318f..0dee84620586 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
@@ -31,6 +31,8 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
+      Batch("Resolve Hints", Once,
+        EliminateResolvedHint) ::
       Batch("Operator Optimizations", FixedPoint(100),
         CombineFilters,
         PushDownPredicate,
@@ -42,6 +44,12 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         CostBasedJoinReorder) :: Nil
   }
 
+  object ResolveHints extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Resolve Hints", Once,
+        EliminateResolvedHint) :: Nil
+  }
+
   var originalConfCBOEnabled = false
   var originalConfJoinReorderEnabled = false
 
@@ -284,12 +292,85 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
     assertEqualPlans(originalPlan, bestPlan)
   }
 
+  test("hints preservation") {
+    // Apply hints if we find an equivalent node in the new plan, otherwise discard them.
+    val originalPlan =
+      t1.join(t2.hint("broadcast")).hint("broadcast").join(t4.join(t3).hint("broadcast"))
+        .where((nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")) &&
+          (nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
+          (nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+
+    val bestPlan =
+    t1.join(t2.hint("broadcast"), Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+      .hint("broadcast")
+      .join(
+        t4.join(t3, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+          .hint("broadcast"),
+        Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
+
+    assertEqualPlans(originalPlan, bestPlan)
+
+    val originalPlan2 =
+      t1.join(t2).hint("broadcast").join(t3).hint("broadcast").join(t4.hint("broadcast"))
+        .where((nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")) &&
+          (nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
+          (nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+
+    val bestPlan2 =
+      t1.join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .hint("broadcast")
+        .join(
+          t4.hint("broadcast")
+            .join(t3, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))),
+          Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
+        .select(outputsOf(t1, t2, t3, t4): _*)
+
+    assertEqualPlans(originalPlan2, bestPlan2)
+
+    val originalPlan3 =
+      t1.join(t4).hint("broadcast")
+        .join(t2.hint("broadcast")).hint("broadcast")
+        .join(t3.hint("broadcast"))
+        .where((nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")) &&
+        (nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
+        (nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+
+    val bestPlan3 =
+      t1.join(t2.hint("broadcast"), Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .join(
+          t4.join(t3.hint("broadcast"),
+            Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))),
+          Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
+        .select(outputsOf(t1, t4, t2, t3): _*)
+
+    assertEqualPlans(originalPlan3, bestPlan3)
+
+    val originalPlan4 =
+      t2.hint("broadcast")
+        .join(t4).hint("broadcast")
+        .join(t3.hint("broadcast")).hint("broadcast")
+        .join(t1)
+        .where((nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")) &&
+          (nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
+          (nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+
+    val bestPlan4 =
+      t1.join(t2.hint("broadcast"), Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .join(
+          t4.join(t3.hint("broadcast"),
+            Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))),
+          Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
+        .select(outputsOf(t2, t4, t3, t1): _*)
+
+    assertEqualPlans(originalPlan4, bestPlan4)
+  }
+
   private def assertEqualPlans(
       originalPlan: LogicalPlan,
       groundTruthBestPlan: LogicalPlan): Unit = {
     val analyzed = originalPlan.analyze
     val optimized = Optimize.execute(analyzed)
-    val expected = groundTruthBestPlan.analyze
+    val expected = ResolveHints.execute(groundTruthBestPlan.analyze)
 
     assert(analyzed.sameOutput(expected)) // if this fails, the expected plan itself is incorrect
     assert(analyzed.sameOutput(optimized))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
index c8e15c7da763..6d1af12e68b2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
@@ -48,7 +48,7 @@ class ReplaceOperatorSuite extends PlanTest {
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-        Join(table1, table2, LeftSemi, Option('a <=> 'c && 'b <=> 'd))).analyze
+        Join(table1, table2, LeftSemi, Option('a <=> 'c && 'b <=> 'd), JoinHint.NONE)).analyze
 
     comparePlans(optimized, correctAnswer)
   }
@@ -160,7 +160,7 @@ class ReplaceOperatorSuite extends PlanTest {
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-        Join(table1, table2, LeftAnti, Option('a <=> 'c && 'b <=> 'd))).analyze
+        Join(table1, table2, LeftAnti, Option('a <=> 'c && 'b <=> 'd), JoinHint.NONE)).analyze
 
     comparePlans(optimized, correctAnswer)
   }
@@ -175,7 +175,7 @@ class ReplaceOperatorSuite extends PlanTest {
 
     val correctAnswer =
       Aggregate(left.output, right.output,
-        Join(left, right, LeftAnti, Option($"left.a" <=> $"right.a"))).analyze
+        Join(left, right, LeftAnti, Option($"left.a" <=> $"right.a"), JoinHint.NONE)).analyze
 
     comparePlans(optimized, correctAnswer)
   }
@@ -248,7 +248,7 @@ class ReplaceOperatorSuite extends PlanTest {
     val condition = basePlan.output.zip(otherPlan.output).map { case (a1, a2) =>
       a1 <=> a2 }.reduce( _ && _)
     val correctAnswer = Aggregate(basePlan.output, otherPlan.output,
-      Join(basePlan, otherPlan, LeftAnti, Option(condition))).analyze
+      Join(basePlan, otherPlan, LeftAnti, Option(condition), JoinHint.NONE)).analyze
     comparePlans(result, correctAnswer)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 3081ff935f04..5394732f41f2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -99,11 +99,11 @@ trait PlanTestBase extends PredicateHelper with SQLHelper { self: Suite =>
           .reduce(And), child)
       case sample: Sample =>
         sample.copy(seed = 0L)
-      case Join(left, right, joinType, condition) if condition.isDefined =>
+      case Join(left, right, joinType, condition, hint) if condition.isDefined =>
         val newCondition =
           splitConjunctivePredicates(condition.get).map(rewriteEqual).sortBy(_.hashCode())
             .reduce(And)
-        Join(left, right, joinType, Some(newCondition))
+        Join(left, right, joinType, Some(newCondition), hint)
     }
   }
 
@@ -165,8 +165,10 @@ trait PlanTestBase extends PredicateHelper with SQLHelper { self: Suite =>
   private def sameJoinPlan(plan1: LogicalPlan, plan2: LogicalPlan): Boolean = {
     (plan1, plan2) match {
       case (j1: Join, j2: Join) =>
-        (sameJoinPlan(j1.left, j2.left) && sameJoinPlan(j1.right, j2.right)) ||
-          (sameJoinPlan(j1.left, j2.right) && sameJoinPlan(j1.right, j2.left))
+        (sameJoinPlan(j1.left, j2.left) && sameJoinPlan(j1.right, j2.right)
+          && j1.hint.leftHint == j2.hint.leftHint && j1.hint.rightHint == j2.hint.rightHint) ||
+          (sameJoinPlan(j1.left, j2.right) && sameJoinPlan(j1.right, j2.left)
+            && j1.hint.leftHint == j2.hint.rightHint && j1.hint.rightHint == j2.hint.leftHint)
       case (p1: Project, p2: Project) =>
         p1.projectList == p2.projectList && sameJoinPlan(p1.child, p2.child)
       case _ =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
index 7c8ed78a4911..fbaaf807af5d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
@@ -20,7 +20,9 @@ package org.apache.spark.sql.catalyst.plans
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, ResolvedHint, Union}
+import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util._
 
 /**
@@ -30,6 +32,10 @@ class SameResultSuite extends SparkFunSuite {
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
   val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int)
 
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("EliminateResolvedHint", Once, EliminateResolvedHint) :: Nil
+  }
+
   def assertSameResult(a: LogicalPlan, b: LogicalPlan, result: Boolean = true): Unit = {
     val aAnalyzed = a.analyze
     val bAnalyzed = b.analyze
@@ -72,4 +78,12 @@ class SameResultSuite extends SparkFunSuite {
     val df2 = testRelation.join(testRelation)
     assertSameResult(df1, df2)
   }
+
+  test("join hint") {
+    val df1 = testRelation.join(testRelation.hint("broadcast"))
+    val df2 = testRelation.join(testRelation)
+    val df1Optimized = Optimize.execute(df1.analyze)
+    val df2Optimized = Optimize.execute(df2.analyze)
+    assertSameResult(df1Optimized, df2Optimized)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 953094cb0dd5..16a5c2d3001a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -38,24 +38,6 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
     // row count * (overhead + column size)
     size = Some(10 * (8 + 4)))
 
-  test("BroadcastHint estimation") {
-    val filter = Filter(Literal(true), plan)
-    val filterStatsCboOn = Statistics(sizeInBytes = 10 * (8 +4),
-      rowCount = Some(10), attributeStats = AttributeMap(Seq(attribute -> colStat)))
-    val filterStatsCboOff = Statistics(sizeInBytes = 10 * (8 +4))
-    checkStats(
-      filter,
-      expectedStatsCboOn = filterStatsCboOn,
-      expectedStatsCboOff = filterStatsCboOff)
-
-    val broadcastHint = ResolvedHint(filter, HintInfo(broadcast = true))
-    checkStats(
-      broadcastHint,
-      expectedStatsCboOn = filterStatsCboOn.copy(hints = HintInfo(broadcast = true)),
-      expectedStatsCboOff = filterStatsCboOff.copy(hints = HintInfo(broadcast = true))
-    )
-  }
-
   test("range") {
     val range = Range(1, 5, 1, None)
     val rangeStats = Statistics(sizeInBytes = 4 * 8)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
index b0a47e783512..1cf888519077 100755
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
@@ -528,7 +528,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
       rowCount = 30,
       attributeStats = AttributeMap(Seq(attrIntLargerRange -> colStatIntLargerRange)))
     val nonLeafChild = Join(largerTable, smallerTable, LeftOuter,
-      Some(EqualTo(attrIntLargerRange, attrInt)))
+      Some(EqualTo(attrIntLargerRange, attrInt)), JoinHint.NONE)
 
     Seq(IsNull(attrIntLargerRange), IsNotNull(attrIntLargerRange)).foreach { predicate =>
       validateEstimatedStats(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
index 12c0a7be2129..6c5a2b247fc2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
@@ -79,8 +79,8 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     val c1 = generateJoinChild(col1, leftHistogram, expectedMin, expectedMax)
     val c2 = generateJoinChild(col2, rightHistogram, expectedMin, expectedMax)
 
-    val c1JoinC2 = Join(c1, c2, Inner, Some(EqualTo(col1, col2)))
-    val c2JoinC1 = Join(c2, c1, Inner, Some(EqualTo(col2, col1)))
+    val c1JoinC2 = Join(c1, c2, Inner, Some(EqualTo(col1, col2)), JoinHint.NONE)
+    val c2JoinC1 = Join(c2, c1, Inner, Some(EqualTo(col2, col1)), JoinHint.NONE)
     val expectedStatsAfterJoin = Statistics(
       sizeInBytes = expectedRows * (8 + 2 * 4),
       rowCount = Some(expectedRows),
@@ -284,7 +284,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
   test("cross join") {
     // table1 (key-1-5 int, key-5-9 int): (1, 9), (2, 8), (3, 7), (4, 6), (5, 5)
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
-    val join = Join(table1, table2, Cross, None)
+    val join = Join(table1, table2, Cross, None, JoinHint.NONE)
     val expectedStats = Statistics(
       sizeInBytes = 5 * 3 * (8 + 4 * 4),
       rowCount = Some(5 * 3),
@@ -299,7 +299,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
     // key-5-9 and key-2-4 are disjoint
     val join = Join(table1, table2, Inner,
-      Some(EqualTo(nameToAttr("key-5-9"), nameToAttr("key-2-4"))))
+      Some(EqualTo(nameToAttr("key-5-9"), nameToAttr("key-2-4"))), JoinHint.NONE)
     val expectedStats = Statistics(
       sizeInBytes = 1,
       rowCount = Some(0),
@@ -312,7 +312,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
     // key-5-9 and key-2-4 are disjoint
     val join = Join(table1, table2, LeftOuter,
-      Some(EqualTo(nameToAttr("key-5-9"), nameToAttr("key-2-4"))))
+      Some(EqualTo(nameToAttr("key-5-9"), nameToAttr("key-2-4"))), JoinHint.NONE)
     val expectedStats = Statistics(
       sizeInBytes = 5 * (8 + 4 * 4),
       rowCount = Some(5),
@@ -328,7 +328,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
     // key-5-9 and key-2-4 are disjoint
     val join = Join(table1, table2, RightOuter,
-      Some(EqualTo(nameToAttr("key-5-9"), nameToAttr("key-2-4"))))
+      Some(EqualTo(nameToAttr("key-5-9"), nameToAttr("key-2-4"))), JoinHint.NONE)
     val expectedStats = Statistics(
       sizeInBytes = 3 * (8 + 4 * 4),
       rowCount = Some(3),
@@ -344,7 +344,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
     // key-5-9 and key-2-4 are disjoint
     val join = Join(table1, table2, FullOuter,
-      Some(EqualTo(nameToAttr("key-5-9"), nameToAttr("key-2-4"))))
+      Some(EqualTo(nameToAttr("key-5-9"), nameToAttr("key-2-4"))), JoinHint.NONE)
     val expectedStats = Statistics(
       sizeInBytes = (5 + 3) * (8 + 4 * 4),
       rowCount = Some(5 + 3),
@@ -361,7 +361,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table1 (key-1-5 int, key-5-9 int): (1, 9), (2, 8), (3, 7), (4, 6), (5, 5)
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
     val join = Join(table1, table2, Inner,
-      Some(EqualTo(nameToAttr("key-1-5"), nameToAttr("key-1-2"))))
+      Some(EqualTo(nameToAttr("key-1-5"), nameToAttr("key-1-2"))), JoinHint.NONE)
     // Update column stats for equi-join keys (key-1-5 and key-1-2).
     val joinedColStat = ColumnStat(distinctCount = Some(2), min = Some(1), max = Some(2),
       nullCount = Some(0), avgLen = Some(4), maxLen = Some(4))
@@ -383,7 +383,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table3 (key-1-2 int, key-2-3 int): (1, 2), (2, 3)
     val join = Join(table2, table3, Inner, Some(
       And(EqualTo(nameToAttr("key-1-2"), nameToAttr("key-1-2")),
-        EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3")))))
+        EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3")))), JoinHint.NONE)
 
     // Update column stats for join keys.
     val joinedColStat1 = ColumnStat(distinctCount = Some(2), min = Some(1), max = Some(2),
@@ -404,7 +404,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
     // table3 (key-1-2 int, key-2-3 int): (1, 2), (2, 3)
     val join = Join(table3, table2, LeftOuter,
-      Some(EqualTo(nameToAttr("key-2-3"), nameToAttr("key-2-4"))))
+      Some(EqualTo(nameToAttr("key-2-3"), nameToAttr("key-2-4"))), JoinHint.NONE)
     val joinedColStat = ColumnStat(distinctCount = Some(2), min = Some(2), max = Some(3),
       nullCount = Some(0), avgLen = Some(4), maxLen = Some(4))
 
@@ -422,7 +422,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
     // table3 (key-1-2 int, key-2-3 int): (1, 2), (2, 3)
     val join = Join(table2, table3, RightOuter,
-      Some(EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3"))))
+      Some(EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3"))), JoinHint.NONE)
     val joinedColStat = ColumnStat(distinctCount = Some(2), min = Some(2), max = Some(3),
       nullCount = Some(0), avgLen = Some(4), maxLen = Some(4))
 
@@ -440,7 +440,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
     // table3 (key-1-2 int, key-2-3 int): (1, 2), (2, 3)
     val join = Join(table2, table3, FullOuter,
-      Some(EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3"))))
+      Some(EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3"))), JoinHint.NONE)
 
     val expectedStats = Statistics(
       sizeInBytes = 3 * (8 + 4 * 4),
@@ -456,7 +456,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     // table3 (key-1-2 int, key-2-3 int): (1, 2), (2, 3)
     Seq(LeftSemi, LeftAnti).foreach { jt =>
       val join = Join(table2, table3, jt,
-        Some(EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3"))))
+        Some(EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3"))), JoinHint.NONE)
       // For now we just propagate the statistics from left side for left semi/anti join.
       val expectedStats = Statistics(
         sizeInBytes = 3 * (8 + 4 * 2),
@@ -525,7 +525,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
       withClue(s"For data type ${key1.dataType}") {
         // All values in two tables are the same, so column stats after join are also the same.
         val join = Join(Project(Seq(key1), table1), Project(Seq(key2), table2), Inner,
-          Some(EqualTo(key1, key2)))
+          Some(EqualTo(key1, key2)), JoinHint.NONE)
         val expectedStats = Statistics(
           sizeInBytes = 1 * (8 + 2 * getColSize(key1, columnInfo1(key1))),
           rowCount = Some(1),
@@ -543,7 +543,8 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
       outputList = Seq(nullColumn),
       rowCount = 1,
       attributeStats = AttributeMap(Seq(nullColumn -> nullColStat)))
-    val join = Join(table1, nullTable, Inner, Some(EqualTo(nameToAttr("key-1-5"), nullColumn)))
+    val join = Join(table1, nullTable, Inner,
+      Some(EqualTo(nameToAttr("key-1-5"), nullColumn)), JoinHint.NONE)
     val expectedStats = Statistics(
       sizeInBytes = 1,
       rowCount = Some(0),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index a664c7338bad..44cada086489 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -862,7 +862,7 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   def join(right: Dataset[_]): DataFrame = withPlan {
-    Join(logicalPlan, right.logicalPlan, joinType = Inner, None)
+    Join(logicalPlan, right.logicalPlan, joinType = Inner, None, JoinHint.NONE)
   }
 
   /**
@@ -940,7 +940,7 @@ class Dataset[T] private[sql](
     // Analyze the self join. The assumption is that the analyzer will disambiguate left vs right
     // by creating a new instance for one of the branch.
     val joined = sparkSession.sessionState.executePlan(
-      Join(logicalPlan, right.logicalPlan, joinType = JoinType(joinType), None))
+      Join(logicalPlan, right.logicalPlan, joinType = JoinType(joinType), None, JoinHint.NONE))
       .analyzed.asInstanceOf[Join]
 
     withPlan {
@@ -948,7 +948,8 @@ class Dataset[T] private[sql](
         joined.left,
         joined.right,
         UsingJoin(JoinType(joinType), usingColumns),
-        None)
+        None,
+        JoinHint.NONE)
     }
   }
 
@@ -1001,7 +1002,7 @@ class Dataset[T] private[sql](
     // Trigger analysis so in the case of self-join, the analyzer will clone the plan.
     // After the cloning, left and right side will have distinct expression ids.
     val plan = withPlan(
-      Join(logicalPlan, right.logicalPlan, JoinType(joinType), Some(joinExprs.expr)))
+      Join(logicalPlan, right.logicalPlan, JoinType(joinType), Some(joinExprs.expr), JoinHint.NONE))
       .queryExecution.analyzed.asInstanceOf[Join]
 
     // If auto self join alias is disabled, return the plan.
@@ -1048,7 +1049,7 @@ class Dataset[T] private[sql](
    * @since 2.1.0
    */
   def crossJoin(right: Dataset[_]): DataFrame = withPlan {
-    Join(logicalPlan, right.logicalPlan, joinType = Cross, None)
+    Join(logicalPlan, right.logicalPlan, joinType = Cross, None, JoinHint.NONE)
   }
 
   /**
@@ -1083,7 +1084,8 @@ class Dataset[T] private[sql](
         this.logicalPlan,
         other.logicalPlan,
         JoinType(joinType),
-        Some(condition.expr))).analyzed.asInstanceOf[Join]
+        Some(condition.expr),
+        JoinHint.NONE)).analyzed.asInstanceOf[Join]
 
     if (joined.joinType == LeftSemi || joined.joinType == LeftAnti) {
       throw new AnalysisException("Invalid join type in joinWith: " + joined.joinType.sql)
@@ -1135,7 +1137,7 @@ class Dataset[T] private[sql](
     implicit val tuple2Encoder: Encoder[(T, U)] =
       ExpressionEncoder.tuple(this.exprEnc, other.exprEnc)
 
-    withTypedPlan(Join(left, right, joined.joinType, Some(conditionExpr)))
+    withTypedPlan(Join(left, right, joined.joinType, Some(conditionExpr), JoinHint.NONE))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index dbc6db62bd82..b7cc373b2df1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -208,17 +208,17 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       }
     }
 
-    private def canBroadcastByHints(joinType: JoinType, left: LogicalPlan, right: LogicalPlan)
-      : Boolean = {
-      val buildLeft = canBuildLeft(joinType) && left.stats.hints.broadcast
-      val buildRight = canBuildRight(joinType) && right.stats.hints.broadcast
+    private def canBroadcastByHints(
+        joinType: JoinType, left: LogicalPlan, right: LogicalPlan, hint: JoinHint): Boolean = {
+      val buildLeft = canBuildLeft(joinType) && hint.leftHint.exists(_.broadcast)
+      val buildRight = canBuildRight(joinType) && hint.rightHint.exists(_.broadcast)
       buildLeft || buildRight
     }
 
-    private def broadcastSideByHints(joinType: JoinType, left: LogicalPlan, right: LogicalPlan)
-      : BuildSide = {
-      val buildLeft = canBuildLeft(joinType) && left.stats.hints.broadcast
-      val buildRight = canBuildRight(joinType) && right.stats.hints.broadcast
+    private def broadcastSideByHints(
+        joinType: JoinType, left: LogicalPlan, right: LogicalPlan, hint: JoinHint): BuildSide = {
+      val buildLeft = canBuildLeft(joinType) && hint.leftHint.exists(_.broadcast)
+      val buildRight = canBuildRight(joinType) && hint.rightHint.exists(_.broadcast)
       broadcastSide(buildLeft, buildRight, left, right)
     }
 
@@ -241,14 +241,14 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       // --- BroadcastHashJoin --------------------------------------------------------------------
 
       // broadcast hints were specified
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
-        if canBroadcastByHints(joinType, left, right) =>
-        val buildSide = broadcastSideByHints(joinType, left, right)
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, hint)
+        if canBroadcastByHints(joinType, left, right, hint) =>
+        val buildSide = broadcastSideByHints(joinType, left, right, hint)
         Seq(joins.BroadcastHashJoinExec(
           leftKeys, rightKeys, joinType, buildSide, condition, planLater(left), planLater(right)))
 
       // broadcast hints were not specified, so need to infer it from size and configuration.
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
         if canBroadcastBySizes(joinType, left, right) =>
         val buildSide = broadcastSideBySizes(joinType, left, right)
         Seq(joins.BroadcastHashJoinExec(
@@ -256,14 +256,14 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
       // --- ShuffledHashJoin ---------------------------------------------------------------------
 
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
          if !conf.preferSortMergeJoin && canBuildRight(joinType) && canBuildLocalHashMap(right)
            && muchSmaller(right, left) ||
            !RowOrdering.isOrderable(leftKeys) =>
         Seq(joins.ShuffledHashJoinExec(
           leftKeys, rightKeys, joinType, BuildRight, condition, planLater(left), planLater(right)))
 
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
          if !conf.preferSortMergeJoin && canBuildLeft(joinType) && canBuildLocalHashMap(left)
            && muchSmaller(left, right) ||
            !RowOrdering.isOrderable(leftKeys) =>
@@ -272,7 +272,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
       // --- SortMergeJoin ------------------------------------------------------------
 
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
         if RowOrdering.isOrderable(leftKeys) =>
         joins.SortMergeJoinExec(
           leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right)) :: Nil
@@ -280,25 +280,25 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       // --- Without joining keys ------------------------------------------------------------
 
       // Pick BroadcastNestedLoopJoin if one side could be broadcast
-      case j @ logical.Join(left, right, joinType, condition)
-          if canBroadcastByHints(joinType, left, right) =>
-        val buildSide = broadcastSideByHints(joinType, left, right)
+      case j @ logical.Join(left, right, joinType, condition, hint)
+          if canBroadcastByHints(joinType, left, right, hint) =>
+        val buildSide = broadcastSideByHints(joinType, left, right, hint)
         joins.BroadcastNestedLoopJoinExec(
           planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
 
-      case j @ logical.Join(left, right, joinType, condition)
+      case j @ logical.Join(left, right, joinType, condition, _)
           if canBroadcastBySizes(joinType, left, right) =>
         val buildSide = broadcastSideBySizes(joinType, left, right)
         joins.BroadcastNestedLoopJoinExec(
           planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
 
       // Pick CartesianProduct for InnerJoin
-      case logical.Join(left, right, _: InnerLike, condition) =>
+      case logical.Join(left, right, _: InnerLike, condition, _) =>
         joins.CartesianProductExec(planLater(left), planLater(right), condition) :: Nil
 
-      case logical.Join(left, right, joinType, condition) =>
+      case logical.Join(left, right, joinType, condition, hint) =>
         val buildSide = broadcastSide(
-          left.stats.hints.broadcast, right.stats.hints.broadcast, left, right)
+          hint.leftHint.exists(_.broadcast), hint.rightHint.exists(_.broadcast), left, right)
         // This join could be very slow or OOM
         joins.BroadcastNestedLoopJoinExec(
           planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
@@ -380,13 +380,13 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   object StreamingJoinStrategy extends Strategy {
     override def apply(plan: LogicalPlan): Seq[SparkPlan] = {
       plan match {
-        case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
+        case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
           if left.isStreaming && right.isStreaming =>
 
           new StreamingSymmetricHashJoinExec(
             leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right)) :: Nil
 
-        case Join(left, right, _, _) if left.isStreaming && right.isStreaming =>
+        case Join(left, right, _, _, _) if left.isStreaming && right.isStreaming =>
           throw new AnalysisException(
             "Stream-stream join without equality predicate is not supported", plan = Some(plan))
 
@@ -561,6 +561,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         throw new IllegalStateException(
           "logical except (all) operator should have been replaced by union, aggregate" +
             " and generate operators in the optimizer")
+      case logical.ResolvedHint(child, hints) =>
+        throw new IllegalStateException(
+          "ResolvedHint operator should have been replaced by join hint in the optimizer")
 
       case logical.DeserializeToObject(deserializer, objAttr, child) =>
         execution.DeserializeToObjectExec(deserializer, objAttr, planLater(child)) :: Nil
@@ -632,7 +635,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil
       case r: LogicalRDD =>
         RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil
-      case h: ResolvedHint => planLater(h.child) :: Nil
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 4109d9994dd8..41f406d6c299 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical
-import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.storage.StorageLevel
@@ -184,12 +184,7 @@ case class InMemoryRelation(
   override def computeStats(): Statistics = {
     if (cacheBuilder.sizeInBytesStats.value == 0L) {
       // Underlying columnar RDD hasn't been materialized, use the stats from the plan to cache.
-      // Note that we should drop the hint info here. We may cache a plan whose root node is a hint
-      // node. When we lookup the cache with a semantically same plan without hint info, the plan
-      // returned by cache lookup should not have hint info. If we lookup the cache with a
-      // semantically same plan with a different hint info, `CacheManager.useCachedData` will take
-      // care of it and retain the hint info in the lookup input plan.
-      statsOfPlanToCache.copy(hints = HintInfo())
+      statsOfPlanToCache
     } else {
       Statistics(sizeInBytes = cacheBuilder.sizeInBytesStats.value.longValue)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 6e805c4f3c39..2141be4d680f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -27,9 +27,11 @@ import org.apache.spark.executor.DataReadMethod.DataReadMethod
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
+import org.apache.spark.sql.catalyst.plans.logical.Join
 import org.apache.spark.sql.execution.{RDDScanExec, SparkPlan}
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
@@ -925,4 +927,23 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       }
     }
   }
+
+  test("Cache should respect the broadcast hint") {
+    val df = broadcast(spark.range(1000)).cache()
+    val df2 = spark.range(1000).cache()
+    df.count()
+    df2.count()
+
+    // Test the broadcast hint.
+    val joinPlan = df.join(df2, "id").queryExecution.optimizedPlan
+    val hint = joinPlan.collect {
+      case Join(_, _, _, _, hint) => hint
+    }
+    assert(hint.size == 1)
+    assert(hint(0).leftHint.get.broadcast)
+    assert(hint(0).rightHint.isEmpty)
+
+    // Clean-up
+    df.unpersist()
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index c9f41ab1c017..a4a3e2a62d1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -198,7 +198,7 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     // outer -> left
     val outerJoin2Left = df.join(df2, $"a.int" === $"b.int", "outer").where($"a.int" >= 3)
     assert(outerJoin2Left.queryExecution.optimizedPlan.collect {
-      case j @ Join(_, _, LeftOuter, _) => j }.size === 1)
+      case j @ Join(_, _, LeftOuter, _, _) => j }.size === 1)
     checkAnswer(
       outerJoin2Left,
       Row(3, 4, "3", null, null, null) :: Nil)
@@ -206,7 +206,7 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     // outer -> right
     val outerJoin2Right = df.join(df2, $"a.int" === $"b.int", "outer").where($"b.int" >= 3)
     assert(outerJoin2Right.queryExecution.optimizedPlan.collect {
-      case j @ Join(_, _, RightOuter, _) => j }.size === 1)
+      case j @ Join(_, _, RightOuter, _, _) => j }.size === 1)
     checkAnswer(
       outerJoin2Right,
       Row(null, null, null, 5, 6, "5") :: Nil)
@@ -215,7 +215,7 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     val outerJoin2Inner = df.join(df2, $"a.int" === $"b.int", "outer").
       where($"a.int" === 1 && $"b.int2" === 3)
     assert(outerJoin2Inner.queryExecution.optimizedPlan.collect {
-      case j @ Join(_, _, Inner, _) => j }.size === 1)
+      case j @ Join(_, _, Inner, _, _) => j }.size === 1)
     checkAnswer(
       outerJoin2Inner,
       Row(1, 2, "1", 1, 3, "1") :: Nil)
@@ -223,7 +223,7 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     // right -> inner
     val rightJoin2Inner = df.join(df2, $"a.int" === $"b.int", "right").where($"a.int" > 0)
     assert(rightJoin2Inner.queryExecution.optimizedPlan.collect {
-      case j @ Join(_, _, Inner, _) => j }.size === 1)
+      case j @ Join(_, _, Inner, _, _) => j }.size === 1)
     checkAnswer(
       rightJoin2Inner,
       Row(1, 2, "1", 1, 3, "1") :: Nil)
@@ -231,7 +231,7 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     // left -> inner
     val leftJoin2Inner = df.join(df2, $"a.int" === $"b.int", "left").where($"b.int2" > 0)
     assert(leftJoin2Inner.queryExecution.optimizedPlan.collect {
-      case j @ Join(_, _, Inner, _) => j }.size === 1)
+      case j @ Join(_, _, Inner, _, _) => j }.size === 1)
     checkAnswer(
       leftJoin2Inner,
       Row(1, 2, "1", 1, 3, "1") :: Nil)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
new file mode 100644
index 000000000000..3652895ff43d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.test.SharedSQLContext
+
+class JoinHintSuite extends PlanTest with SharedSQLContext {
+  import testImplicits._
+
+  lazy val df = spark.range(10)
+  lazy val df1 = df.selectExpr("id as a1", "id as a2")
+  lazy val df2 = df.selectExpr("id as b1", "id as b2")
+  lazy val df3 = df.selectExpr("id as c1", "id as c2")
+
+  def verifyJoinHint(df: DataFrame, expectedHints: Seq[JoinHint]): Unit = {
+    val optimized = df.queryExecution.optimizedPlan
+    val joinHints = optimized collect {
+      case Join(_, _, _, _, hint) => hint
+      case _: ResolvedHint => fail("ResolvedHint should not appear after optimize.")
+    }
+    assert(joinHints == expectedHints)
+  }
+
+  test("single join") {
+    verifyJoinHint(
+      df.hint("broadcast").join(df, "id"),
+      JoinHint(
+        Some(HintInfo(broadcast = true)),
+        None) :: Nil
+    )
+    verifyJoinHint(
+      df.join(df.hint("broadcast"), "id"),
+      JoinHint(
+        None,
+        Some(HintInfo(broadcast = true))) :: Nil
+    )
+  }
+
+  test("multiple joins") {
+    verifyJoinHint(
+      df1.join(df2.hint("broadcast").join(df3, 'b1 === 'c1).hint("broadcast"), 'a1 === 'c1),
+      JoinHint(
+        None,
+        Some(HintInfo(broadcast = true))) ::
+        JoinHint(
+          Some(HintInfo(broadcast = true)),
+          None) :: Nil
+    )
+    verifyJoinHint(
+      df1.hint("broadcast").join(df2, 'a1 === 'b1).hint("broadcast").join(df3, 'a1 === 'c1),
+      JoinHint(
+        Some(HintInfo(broadcast = true)),
+        None) ::
+        JoinHint(
+          Some(HintInfo(broadcast = true)),
+          None) :: Nil
+    )
+  }
+
+  test("hint scope") {
+    withTempView("a", "b", "c") {
+      df1.createOrReplaceTempView("a")
+      df2.createOrReplaceTempView("b")
+      verifyJoinHint(
+        sql(
+          """
+            |select /*+ broadcast(a, b)*/ * from (
+            |  select /*+ broadcast(b)*/ * from a join b on a.a1 = b.b1
+            |) a join (
+            |  select /*+ broadcast(a)*/ * from a join b on a.a1 = b.b1
+            |) b on a.a1 = b.b1
+          """.stripMargin),
+        JoinHint(
+          Some(HintInfo(broadcast = true)),
+          Some(HintInfo(broadcast = true))) ::
+          JoinHint(
+            None,
+            Some(HintInfo(broadcast = true))) ::
+          JoinHint(
+            Some(HintInfo(broadcast = true)),
+            None) :: Nil
+      )
+    }
+  }
+
+  test("hint preserved after join reorder") {
+    withTempView("a", "b", "c") {
+      df1.createOrReplaceTempView("a")
+      df2.createOrReplaceTempView("b")
+      df3.createOrReplaceTempView("c")
+      verifyJoinHint(
+        sql("select /*+ broadcast(a, c)*/ * from a, b, c " +
+          "where a.a1 = b.b1 and b.b1 = c.c1"),
+        JoinHint(
+          None,
+          Some(HintInfo(broadcast = true))) ::
+          JoinHint(
+            Some(HintInfo(broadcast = true)),
+            None):: Nil
+      )
+      verifyJoinHint(
+        sql("select /*+ broadcast(a, c)*/ * from a, c, b " +
+          "where a.a1 = b.b1 and b.b1 = c.c1"),
+        JoinHint(
+          None,
+          Some(HintInfo(broadcast = true))) ::
+          JoinHint(
+            Some(HintInfo(broadcast = true)),
+            None):: Nil
+      )
+      verifyJoinHint(
+        sql("select /*+ broadcast(b, c)*/ * from a, c, b " +
+          "where a.a1 = b.b1 and b.b1 = c.c1"),
+        JoinHint(
+          None,
+          Some(HintInfo(broadcast = true))) ::
+          JoinHint(
+            None,
+            Some(HintInfo(broadcast = true))):: Nil
+      )
+
+      verifyJoinHint(
+        df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
+          .join(df3, 'b1 === 'c1 && 'a1 < 10),
+        JoinHint(
+          Some(HintInfo(broadcast = true)),
+          None) ::
+          JoinHint.NONE:: Nil
+      )
+
+      verifyJoinHint(
+        df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
+          .join(df3, 'b1 === 'c1 && 'a1 < 10)
+          .join(df, 'b1 === 'id),
+        JoinHint.NONE ::
+          JoinHint(
+            Some(HintInfo(broadcast = true)),
+            None) ::
+          JoinHint.NONE:: Nil
+      )
+    }
+  }
+
+  test("intersect/except") {
+    val dfSub = spark.range(2)
+    verifyJoinHint(
+      df.hint("broadcast").except(dfSub).join(df, "id"),
+      JoinHint(
+        Some(HintInfo(broadcast = true)),
+        None) ::
+        JoinHint.NONE :: Nil
+    )
+    verifyJoinHint(
+      df.join(df.hint("broadcast").intersect(dfSub), "id"),
+      JoinHint(
+        None,
+        Some(HintInfo(broadcast = true))) ::
+        JoinHint.NONE :: Nil
+    )
+  }
+
+  test("hint merge") {
+    verifyJoinHint(
+      df.hint("broadcast").filter('id > 2).hint("broadcast").join(df, "id"),
+      JoinHint(
+        Some(HintInfo(broadcast = true)),
+        None) :: Nil
+    )
+    verifyJoinHint(
+      df.join(df.hint("broadcast").limit(2).hint("broadcast"), "id"),
+      JoinHint(
+        None,
+        Some(HintInfo(broadcast = true))) :: Nil
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 02dc32d5f90b..99842680cedf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -237,8 +237,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     )
     numbers.foreach { case (input, (expectedSize, expectedRows)) =>
       val stats = Statistics(sizeInBytes = input, rowCount = Some(input))
-      val expectedString = s"sizeInBytes=$expectedSize, rowCount=$expectedRows," +
-        s" hints=none"
+      val expectedString = s"sizeInBytes=$expectedSize, rowCount=$expectedRows"
       assert(stats.simpleString == expectedString)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index 42dd0024b258..f238148e61c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -203,7 +203,7 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
   }
 
   test("broadcast hint in SQL") {
-    import org.apache.spark.sql.catalyst.plans.logical.{ResolvedHint, Join}
+    import org.apache.spark.sql.catalyst.plans.logical.Join
 
     spark.range(10).createOrReplaceTempView("t")
     spark.range(10).createOrReplaceTempView("u")
@@ -216,12 +216,12 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
       val plan3 = sql(s"SELECT /*+ $name(v) */ * FROM t JOIN u ON t.id = u.id").queryExecution
         .optimizedPlan
 
-      assert(plan1.asInstanceOf[Join].left.isInstanceOf[ResolvedHint])
-      assert(!plan1.asInstanceOf[Join].right.isInstanceOf[ResolvedHint])
-      assert(!plan2.asInstanceOf[Join].left.isInstanceOf[ResolvedHint])
-      assert(plan2.asInstanceOf[Join].right.isInstanceOf[ResolvedHint])
-      assert(!plan3.asInstanceOf[Join].left.isInstanceOf[ResolvedHint])
-      assert(!plan3.asInstanceOf[Join].right.isInstanceOf[ResolvedHint])
+      assert(plan1.asInstanceOf[Join].hint.leftHint.get.broadcast)
+      assert(plan1.asInstanceOf[Join].hint.rightHint.isEmpty)
+      assert(plan2.asInstanceOf[Join].hint.leftHint.isEmpty)
+      assert(plan2.asInstanceOf[Join].hint.rightHint.get.broadcast)
+      assert(plan3.asInstanceOf[Join].hint.leftHint.isEmpty)
+      assert(plan3.asInstanceOf[Join].hint.rightHint.isEmpty)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
index 22279a3a43ef..771a9730247a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.Join
+import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint}
 import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan, SparkPlanTest}
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
 import org.apache.spark.sql.internal.SQLConf
@@ -85,7 +85,8 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSQLContext {
       expectedAnswer: Seq[Row]): Unit = {
 
     def extractJoinParts(): Option[ExtractEquiJoinKeys.ReturnType] = {
-      val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition))
+      val join = Join(leftRows.logicalPlan, rightRows.logicalPlan,
+        Inner, Some(condition), JoinHint.NONE)
       ExtractEquiJoinKeys.unapply(join)
     }
 
@@ -102,7 +103,7 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     test(s"$testName using ShuffledHashJoin") {
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
             EnsureRequirements(left.sqlContext.sessionState.conf).apply(
@@ -121,7 +122,7 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     testWithWholeStageCodegenOnAndOff(s"$testName using BroadcastHashJoin") { _ =>
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
             EnsureRequirements(left.sqlContext.sessionState.conf).apply(
@@ -140,7 +141,7 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     test(s"$testName using SortMergeJoin") {
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
             EnsureRequirements(left.sqlContext.sessionState.conf).apply(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
index f5edd6bbd5e6..f99a278bb242 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans.Inner
-import org.apache.spark.sql.catalyst.plans.logical.Join
+import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
 import org.apache.spark.sql.internal.SQLConf
@@ -80,7 +80,8 @@ class InnerJoinSuite extends SparkPlanTest with SharedSQLContext {
       expectedAnswer: Seq[Product]): Unit = {
 
     def extractJoinParts(): Option[ExtractEquiJoinKeys.ReturnType] = {
-      val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition()))
+      val join = Join(leftRows.logicalPlan, rightRows.logicalPlan,
+        Inner, Some(condition()), JoinHint.NONE)
       ExtractEquiJoinKeys.unapply(join)
     }
 
@@ -128,7 +129,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     testWithWholeStageCodegenOnAndOff(s"$testName using BroadcastHashJoin (build=left)") { _ =>
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (leftPlan: SparkPlan, rightPlan: SparkPlan) =>
             makeBroadcastHashJoin(
@@ -140,7 +141,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     testWithWholeStageCodegenOnAndOff(s"$testName using BroadcastHashJoin (build=right)") { _ =>
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (leftPlan: SparkPlan, rightPlan: SparkPlan) =>
             makeBroadcastHashJoin(
@@ -152,7 +153,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     test(s"$testName using ShuffledHashJoin (build=left)") {
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (leftPlan: SparkPlan, rightPlan: SparkPlan) =>
             makeShuffledHashJoin(
@@ -164,7 +165,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     test(s"$testName using ShuffledHashJoin (build=right)") {
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (leftPlan: SparkPlan, rightPlan: SparkPlan) =>
             makeShuffledHashJoin(
@@ -176,7 +177,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     testWithWholeStageCodegenOnAndOff(s"$testName using SortMergeJoin") { _ =>
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (leftPlan: SparkPlan, rightPlan: SparkPlan) =>
             makeSortMergeJoin(leftKeys, rightKeys, boundCondition, leftPlan, rightPlan),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
index 513248dae48b..1f04fcf6ca45 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.expressions.{And, Expression, LessThan}
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.Join
+import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint}
 import org.apache.spark.sql.execution.{SparkPlan, SparkPlanTest}
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
 import org.apache.spark.sql.internal.SQLConf
@@ -72,13 +72,14 @@ class OuterJoinSuite extends SparkPlanTest with SharedSQLContext {
       expectedAnswer: Seq[Product]): Unit = {
 
     def extractJoinParts(): Option[ExtractEquiJoinKeys.ReturnType] = {
-      val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition))
+      val join = Join(leftRows.logicalPlan, rightRows.logicalPlan,
+        Inner, Some(condition), JoinHint.NONE)
       ExtractEquiJoinKeys.unapply(join)
     }
 
     if (joinType != FullOuter) {
       test(s"$testName using ShuffledHashJoin") {
-        extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+        extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
           withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
             val buildSide = if (joinType == LeftOuter) BuildRight else BuildLeft
             checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
@@ -99,7 +100,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSQLContext {
           case RightOuter => BuildLeft
           case _ => fail(s"Unsupported join type $joinType")
         }
-        extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+        extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
           withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
             checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
               BroadcastHashJoinExec(
@@ -112,7 +113,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSQLContext {
     }
 
     test(s"$testName using SortMergeJoin") {
-      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _) =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
             EnsureRequirements(spark.sessionState.conf).apply(

From 1a641525e60039cc6b10816e946cb6f44b3e2696 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 7 Jan 2019 15:35:33 -0800
Subject: [PATCH 0248/1072] [SPARK-26491][CORE][TEST] Use ConfigEntry for
 hardcoded configs for test categories

## What changes were proposed in this pull request?

The PR makes hardcoded `spark.test` and `spark.testing` configs to use `ConfigEntry` and put them in the config package.

## How was this patch tested?

existing UTs

Closes #23413 from mgaido91/SPARK-26491.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/ExecutorAllocationManager.scala     |  4 +-
 .../scala/org/apache/spark/SparkContext.scala |  3 +-
 .../deploy/history/FsHistoryProvider.scala    |  3 +-
 .../apache/spark/deploy/worker/Worker.scala   |  4 +-
 .../spark/executor/ProcfsMetricsGetter.scala  |  2 +-
 .../apache/spark/executor/TaskMetrics.scala   |  3 +-
 .../apache/spark/internal/config/Tests.scala  | 56 +++++++++++++++++++
 .../spark/memory/StaticMemoryManager.scala    |  5 +-
 .../spark/memory/UnifiedMemoryManager.scala   |  7 ++-
 .../apache/spark/scheduler/DAGScheduler.scala |  3 +-
 .../cluster/StandaloneSchedulerBackend.scala  |  3 +-
 .../org/apache/spark/util/SizeEstimator.scala |  5 +-
 .../scala/org/apache/spark/util/Utils.scala   |  5 +-
 .../org/apache/spark/DistributedSuite.scala   |  5 +-
 .../ExecutorAllocationManagerSuite.scala      |  3 +-
 .../scala/org/apache/spark/ShuffleSuite.scala |  5 +-
 .../org/apache/spark/SparkFunSuite.scala      |  3 +-
 .../history/HistoryServerArgumentsSuite.scala |  6 +-
 .../deploy/history/HistoryServerSuite.scala   |  7 ++-
 .../memory/StaticMemoryManagerSuite.scala     |  5 +-
 .../memory/UnifiedMemoryManagerSuite.scala    | 33 +++++------
 .../scheduler/BarrierTaskContextSuite.scala   |  7 ++-
 .../scheduler/BlacklistIntegrationSuite.scala | 19 ++++---
 .../sort/ShuffleExternalSorterSuite.scala     |  5 +-
 .../BlockManagerReplicationSuite.scala        |  9 +--
 .../spark/storage/BlockManagerSuite.scala     | 10 ++--
 .../spark/storage/MemoryStoreSuite.scala      |  1 -
 .../spark/util/SizeEstimatorSuite.scala       |  5 +-
 .../ExternalAppendOnlyMapSuite.scala          |  3 +-
 .../util/collection/ExternalSorterSuite.scala |  3 +-
 .../KubernetesTestComponents.scala            |  3 +-
 .../MesosCoarseGrainedSchedulerBackend.scala  |  3 +-
 .../spark/sql/execution/SQLExecution.scala    |  4 +-
 .../apache/spark/sql/BenchmarkQueryTest.scala |  3 +-
 .../execution/UnsafeRowSerializerSuite.scala  |  3 +-
 35 files changed, 165 insertions(+), 83 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/Tests.scala

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index d966582295b3..0807e653b41a 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -27,6 +27,7 @@ import com.codahale.metrics.{Gauge, MetricRegistry}
 
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL
 import org.apache.spark.metrics.source.Source
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.BlockManagerMaster
@@ -157,7 +158,7 @@ private[spark] class ExecutorAllocationManager(
 
   // Polling loop interval (ms)
   private val intervalMillis: Long = if (Utils.isTesting) {
-      conf.getLong(TESTING_SCHEDULE_INTERVAL_KEY, 100)
+      conf.get(TEST_SCHEDULE_INTERVAL)
     } else {
       100
     }
@@ -899,5 +900,4 @@ private[spark] class ExecutorAllocationManager(
 
 private object ExecutorAllocationManager {
   val NOT_SET = Long.MaxValue
-  val TESTING_SCHEDULE_INTERVAL_KEY = "spark.testing.dynamicAllocation.scheduleInterval"
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 89be9de08307..3a1e1b931002 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -45,6 +45,7 @@ import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
 import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream, StreamInputFormat, WholeTextFileInputFormat}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd._
@@ -470,7 +471,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
     // Convert java options to env vars as a work around
     // since we can't set env vars directly in sbt.
-    for { (envKey, propKey) <- Seq(("SPARK_TESTING", "spark.testing"))
+    for { (envKey, propKey) <- Seq(("SPARK_TESTING", IS_TESTING.key))
       value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
       executorEnvs(envKey) = value
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 709a380dfb63..3c5648434fa6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -45,6 +45,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{DRIVER_LOG_DFS_DIR, History}
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Status._
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.ReplayListenerBus._
@@ -267,7 +268,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
 
     // Disable the background thread during tests.
-    if (!conf.contains("spark.testing")) {
+    if (!conf.contains(IS_TESTING)) {
       // A task that periodically checks for event log updates on disk.
       logDebug(s"Scheduling update thread every $UPDATE_INTERVAL_S seconds")
       pool.scheduleWithFixedDelay(
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index d5ea2523c628..467df26c4735 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -37,6 +37,7 @@ import org.apache.spark.deploy.ExternalShuffleService
 import org.apache.spark.deploy.master.{DriverState, Master}
 import org.apache.spark.deploy.worker.ui.WorkerWebUI
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
 import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
@@ -103,7 +104,6 @@ private[deploy] class Worker(
   private val CLEANUP_NON_SHUFFLE_FILES_ENABLED =
     conf.getBoolean("spark.storage.cleanupFilesAfterExecutorExit", true)
 
-  private val testing: Boolean = sys.props.contains("spark.testing")
   private var master: Option[RpcEndpointRef] = None
 
   /**
@@ -127,7 +127,7 @@ private[deploy] class Worker(
   private var connected = false
   private val workerId = generateWorkerId()
   private val sparkHome =
-    if (testing) {
+    if (sys.props.contains(IS_TESTING.key)) {
       assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
       new File(sys.props("spark.test.home"))
     } else {
diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
index af67f41e94af..f354d603c2e3 100644
--- a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
@@ -43,7 +43,7 @@ private[spark] case class ProcfsMetrics(
 // project.
 private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends Logging {
   private val procfsStatFile = "stat"
-  private val testing = sys.env.contains("SPARK_TESTING") || sys.props.contains("spark.testing")
+  private val testing = Utils.isTesting
   private val pageSize = computePageSize()
   private var isAvailable: Boolean = isProcfsAvailable
   private val pid = computePid()
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 85b2745a2aec..ea79c7310349 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -23,6 +23,7 @@ import scala.collection.mutable.{ArrayBuffer, LinkedHashMap}
 import org.apache.spark._
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.scheduler.AccumulableInfo
 import org.apache.spark.storage.{BlockId, BlockStatus}
 import org.apache.spark.util._
@@ -202,7 +203,7 @@ class TaskMetrics private[spark] () extends Serializable {
   }
 
   // Only used for test
-  private[spark] val testAccum = sys.props.get("spark.testing").map(_ => new LongAccumulator)
+  private[spark] val testAccum = sys.props.get(IS_TESTING.key).map(_ => new LongAccumulator)
 
 
   import InternalAccumulator._
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
new file mode 100644
index 000000000000..21660ab3a951
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+private[spark] object Tests {
+
+  val TEST_USE_COMPRESSED_OOPS_KEY = "spark.test.useCompressedOops"
+
+  val TEST_MEMORY = ConfigBuilder("spark.testing.memory")
+    .longConf
+    .createWithDefault(Runtime.getRuntime.maxMemory)
+
+  val TEST_SCHEDULE_INTERVAL =
+    ConfigBuilder("spark.testing.dynamicAllocation.scheduleInterval")
+      .longConf
+      .createWithDefault(100)
+
+  val IS_TESTING = ConfigBuilder("spark.testing")
+    .booleanConf
+    .createOptional
+
+  val TEST_NO_STAGE_RETRY = ConfigBuilder("spark.test.noStageRetry")
+    .booleanConf
+    .createWithDefault(false)
+
+  val TEST_RESERVED_MEMORY = ConfigBuilder("spark.testing.reservedMemory")
+    .longConf
+    .createOptional
+
+  val TEST_N_HOSTS = ConfigBuilder("spark.testing.nHosts")
+    .intConf
+    .createWithDefault(5)
+
+  val TEST_N_EXECUTORS_HOST = ConfigBuilder("spark.testing.nExecutorsPerHost")
+    .intConf
+    .createWithDefault(4)
+
+  val TEST_N_CORES_EXECUTOR = ConfigBuilder("spark.testing.nCoresPerExecutor")
+    .intConf
+    .createWithDefault(2)
+}
diff --git a/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
index 0fd349dc5161..7e052c02c937 100644
--- a/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
@@ -19,6 +19,7 @@ package org.apache.spark.memory
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Tests.TEST_MEMORY
 import org.apache.spark.storage.BlockId
 
 /**
@@ -120,7 +121,7 @@ private[spark] object StaticMemoryManager {
    * Return the total amount of memory available for the storage region, in bytes.
    */
   private def getMaxStorageMemory(conf: SparkConf): Long = {
-    val systemMaxMemory = conf.getLong("spark.testing.memory", Runtime.getRuntime.maxMemory)
+    val systemMaxMemory = conf.get(TEST_MEMORY)
     val memoryFraction = conf.getDouble("spark.storage.memoryFraction", 0.6)
     val safetyFraction = conf.getDouble("spark.storage.safetyFraction", 0.9)
     (systemMaxMemory * memoryFraction * safetyFraction).toLong
@@ -130,7 +131,7 @@ private[spark] object StaticMemoryManager {
    * Return the total amount of memory available for the execution region, in bytes.
    */
   private def getMaxExecutionMemory(conf: SparkConf): Long = {
-    val systemMaxMemory = conf.getLong("spark.testing.memory", Runtime.getRuntime.maxMemory)
+    val systemMaxMemory = conf.get(TEST_MEMORY)
 
     if (systemMaxMemory < MIN_MEMORY_BYTES) {
       throw new IllegalArgumentException(s"System memory $systemMaxMemory must " +
diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
index 9260fd3a6fb3..7801bb87050f 100644
--- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -19,6 +19,7 @@ package org.apache.spark.memory
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.storage.BlockId
 
 /**
@@ -210,9 +211,9 @@ object UnifiedMemoryManager {
    * Return the total amount of memory shared between execution and storage, in bytes.
    */
   private def getMaxMemory(conf: SparkConf): Long = {
-    val systemMemory = conf.getLong("spark.testing.memory", Runtime.getRuntime.maxMemory)
-    val reservedMemory = conf.getLong("spark.testing.reservedMemory",
-      if (conf.contains("spark.testing")) 0 else RESERVED_SYSTEM_MEMORY_BYTES)
+    val systemMemory = conf.get(TEST_MEMORY)
+    val reservedMemory = conf.getLong(TEST_RESERVED_MEMORY.key,
+      if (conf.contains(IS_TESTING)) 0 else RESERVED_SYSTEM_MEMORY_BYTES)
     val minSystemMemory = (reservedMemory * 1.5).ceil.toLong
     if (systemMemory < minSystemMemory) {
       throw new IllegalArgumentException(s"System memory $systemMemory must " +
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 6f4c326442e1..f6ade180ee25 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -38,6 +38,7 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd.{DeterministicLevel, RDD, RDDCheckpointData}
@@ -186,7 +187,7 @@ private[spark] class DAGScheduler(
   private val closureSerializer = SparkEnv.get.closureSerializer.newInstance()
 
   /** If enabled, FetchFailed will not cause stage retry, in order to surface the problem. */
-  private val disallowStageRetryForTest = sc.getConf.getBoolean("spark.test.noStageRetry", false)
+  private val disallowStageRetryForTest = sc.getConf.get(TEST_NO_STAGE_RETRY)
 
   /**
    * Whether to unregister all the outputs on the host in condition that we receive a FetchFailure,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index adef20d3077d..66080b6e6b4f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -26,6 +26,7 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.deploy.{ApplicationDescription, Command}
 import org.apache.spark.deploy.client.{StandaloneAppClient, StandaloneAppClientListener}
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler._
@@ -90,7 +91,7 @@ private[spark] class StandaloneSchedulerBackend(
     // compute-classpath.{cmd,sh} and makes all needed jars available to child processes
     // when the assembly is built with the "*-provided" profiles enabled.
     val testingClassPath =
-      if (sys.props.contains("spark.testing")) {
+      if (sys.props.contains(IS_TESTING.key)) {
         sys.props("java.class.path").split(java.io.File.pathSeparator).toSeq
       } else {
         Nil
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 3bfdf95db84c..e12b6b71578c 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -28,6 +28,7 @@ import com.google.common.collect.MapMaker
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Tests.TEST_USE_COMPRESSED_OOPS_KEY
 import org.apache.spark.util.collection.OpenHashSet
 
 /**
@@ -126,8 +127,8 @@ object SizeEstimator extends Logging {
   private def getIsCompressedOops: Boolean = {
     // This is only used by tests to override the detection of compressed oops. The test
     // actually uses a system property instead of a SparkConf, so we'll stick with that.
-    if (System.getProperty("spark.test.useCompressedOops") != null) {
-      return System.getProperty("spark.test.useCompressedOops").toBoolean
+    if (System.getProperty(TEST_USE_COMPRESSED_OOPS_KEY) != null) {
+      return System.getProperty(TEST_USE_COMPRESSED_OOPS_KEY).toBoolean
     }
 
     // java.vm.info provides compressed ref info for IBM JDKs
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 3527fee68939..16ef38142ad9 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -60,6 +60,7 @@ import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
@@ -1847,7 +1848,7 @@ private[spark] object Utils extends Logging {
    * Indicates whether Spark is currently running unit tests.
    */
   def isTesting: Boolean = {
-    sys.env.contains("SPARK_TESTING") || sys.props.contains("spark.testing")
+    sys.env.contains("SPARK_TESTING") || sys.props.contains(IS_TESTING.key)
   }
 
   /**
@@ -2175,7 +2176,7 @@ private[spark] object Utils extends Logging {
    */
   def portMaxRetries(conf: SparkConf): Int = {
     val maxRetries = conf.getOption("spark.port.maxRetries").map(_.toInt)
-    if (conf.contains("spark.testing")) {
+    if (conf.contains(IS_TESTING)) {
       // Set a higher number of retries for tests...
       maxRetries.getOrElse(100)
     } else {
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 4083b20c2359..21050e44414f 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -22,6 +22,7 @@ import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.time.{Millis, Span}
 
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.security.EncryptionFunSuite
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 import org.apache.spark.util.io.ChunkedByteBuffer
@@ -217,7 +218,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     val size = 10000
     val conf = new SparkConf()
       .set("spark.storage.unrollMemoryThreshold", "1024")
-      .set("spark.testing.memory", (size / 2).toString)
+      .set(TEST_MEMORY, size.toLong / 2)
     sc = new SparkContext(clusterUrl, "test", conf)
     val data = sc.parallelize(1 to size, 2).persist(StorageLevel.MEMORY_ONLY)
     assert(data.count() === size)
@@ -233,7 +234,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     val numPartitions = 20
     val conf = new SparkConf()
       .set("spark.storage.unrollMemoryThreshold", "1024")
-      .set("spark.testing.memory", size.toString)
+      .set(TEST_MEMORY, size.toLong)
     sc = new SparkContext(clusterUrl, "test", conf)
     val data = sc.parallelize(1 to size, numPartitions).persist(StorageLevel.MEMORY_ONLY)
     assert(data.count() === size)
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 38f5e8c9f0ac..6b310b9cb67a 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -25,6 +25,7 @@ import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.ExternalClusterManager
 import org.apache.spark.scheduler.cluster.ExecutorInfo
@@ -1166,7 +1167,7 @@ class ExecutorAllocationManagerSuite
       .set("spark.dynamicAllocation.testing", "true")
       // SPARK-22864: effectively disable the allocation schedule by setting the period to a
       // really long value.
-      .set(TESTING_SCHEDULE_INTERVAL_KEY, "10000")
+      .set(TEST_SCHEDULE_INTERVAL, 10000L)
     val sc = new SparkContext(conf)
     contexts += sc
     sc
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 35f728cd57fe..ffa70425ea36 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.{Callable, CyclicBarrier, Executors, ExecutorService
 import org.scalatest.Matchers
 
 import org.apache.spark.ShuffleSuite.NonJavaSerializableClass
+import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD, SubtractedRDD}
 import org.apache.spark.scheduler.{MapStatus, MyRDD, SparkListener, SparkListenerTaskEnd}
@@ -37,7 +38,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
 
   // Ensure that the DAGScheduler doesn't retry stages whose fetches fail, so that we accurately
   // test that the shuffle works (rather than retrying until all blocks are local to one Executor).
-  conf.set("spark.test.noStageRetry", "true")
+  conf.set(TEST_NO_STAGE_RETRY, true)
 
   test("groupByKey without compression") {
     val myConf = conf.clone().set("spark.shuffle.compress", "false")
@@ -269,7 +270,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
   }
 
   test("[SPARK-4085] rerun map stage if reduce stage cannot find its local shuffle file") {
-    val myConf = conf.clone().set("spark.test.noStageRetry", "false")
+    val myConf = conf.clone().set(TEST_NO_STAGE_RETRY, false)
     sc = new SparkContext("local", "test", myConf)
     val rdd = sc.parallelize(1 to 10, 2).map((_, 1)).reduceByKey(_ + _)
     rdd.count()
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index dad24d7c01b8..7d114b1b0c14 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -23,6 +23,7 @@ import java.io.File
 import org.scalatest.{BeforeAndAfterAll, FunSuite, Outcome}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.util.{AccumulatorContext, Utils}
 
 /**
@@ -59,7 +60,7 @@ abstract class SparkFunSuite
   protected val enableAutoThreadAudit = true
 
   protected override def beforeAll(): Unit = {
-    System.setProperty("spark.testing", "true")
+    System.setProperty(IS_TESTING.key, "true")
     if (enableAutoThreadAudit) {
       doThreadPreAudit()
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
index 6b479873f69f..5903ae71ec66 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
@@ -23,7 +23,7 @@ import com.google.common.io.Files
 
 import org.apache.spark._
 import org.apache.spark.internal.config.History._
-import org.apache.spark.util.Utils
+import org.apache.spark.internal.config.Tests._
 
 class HistoryServerArgumentsSuite extends SparkFunSuite {
 
@@ -31,14 +31,14 @@ class HistoryServerArgumentsSuite extends SparkFunSuite {
   private val conf = new SparkConf()
     .set(HISTORY_LOG_DIR, logDir.getAbsolutePath)
     .set(UPDATE_INTERVAL_S, 1L)
-    .set("spark.testing", "true")
+    .set(IS_TESTING, true)
 
   test("No Arguments Parsing") {
     val argStrings = Array.empty[String]
     val hsa = new HistoryServerArguments(conf, argStrings)
     assert(conf.get(HISTORY_LOG_DIR) === logDir.getAbsolutePath)
     assert(conf.get(UPDATE_INTERVAL_S) === 1L)
-    assert(conf.get("spark.testing") === "true")
+    assert(conf.get(IS_TESTING).getOrElse(false))
   }
 
   test("Properties File Arguments Parsing --properties-file") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 96458c55b5f5..bb7d3c52bc9c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -47,6 +47,7 @@ import org.scalatest.selenium.WebBrowser
 import org.apache.spark._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.status.api.v1.ApplicationInfo
 import org.apache.spark.status.api.v1.JobData
 import org.apache.spark.ui.SparkUI
@@ -81,7 +82,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     val conf = new SparkConf()
       .set(HISTORY_LOG_DIR, logDir)
       .set(UPDATE_INTERVAL_S.key, "0")
-      .set("spark.testing", "true")
+      .set(IS_TESTING, true)
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
       .set(EVENT_LOG_PROCESS_TREE_METRICS, true)
@@ -400,7 +401,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
    */
   test("security manager starts with spark.authenticate set") {
     val conf = new SparkConf()
-      .set("spark.testing", "true")
+      .set(IS_TESTING, true)
       .set(SecurityManager.SPARK_AUTH_CONF, "true")
     HistoryServer.createSecurityManager(conf)
   }
@@ -422,7 +423,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       .set(UPDATE_INTERVAL_S.key, "1s")
       .set(EVENT_LOG_ENABLED, true)
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
-      .remove("spark.testing")
+      .remove(IS_TESTING)
     val provider = new FsHistoryProvider(myConf)
     val securityManager = HistoryServer.createSecurityManager(myConf)
 
diff --git a/core/src/test/scala/org/apache/spark/memory/StaticMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/StaticMemoryManagerSuite.scala
index 0f32fe4059fb..c3275add50f4 100644
--- a/core/src/test/scala/org/apache/spark/memory/StaticMemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/StaticMemoryManagerSuite.scala
@@ -21,6 +21,7 @@ import org.mockito.Mockito.when
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config.MEMORY_OFFHEAP_SIZE
+import org.apache.spark.internal.config.Tests.TEST_MEMORY
 import org.apache.spark.storage.TestBlockId
 import org.apache.spark.storage.memory.MemoryStore
 
@@ -48,8 +49,8 @@ class StaticMemoryManagerSuite extends MemoryManagerSuite {
     new StaticMemoryManager(
       conf.clone
         .set("spark.memory.fraction", "1")
-        .set("spark.testing.memory", maxOnHeapExecutionMemory.toString)
-        .set(MEMORY_OFFHEAP_SIZE.key, maxOffHeapExecutionMemory.toString),
+        .set(TEST_MEMORY, maxOnHeapExecutionMemory)
+        .set(MEMORY_OFFHEAP_SIZE, maxOffHeapExecutionMemory),
       maxOnHeapExecutionMemory = maxOnHeapExecutionMemory,
       maxOnHeapStorageMemory = 0,
       numCores = 1)
diff --git a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
index 5ce3453b682f..8556e920daeb 100644
--- a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
@@ -21,6 +21,7 @@ import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.storage.TestBlockId
 import org.apache.spark.storage.memory.MemoryStore
 
@@ -43,8 +44,8 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
       maxOffHeapExecutionMemory: Long): UnifiedMemoryManager = {
     val conf = new SparkConf()
       .set("spark.memory.fraction", "1")
-      .set("spark.testing.memory", maxOnHeapExecutionMemory.toString)
-      .set(MEMORY_OFFHEAP_SIZE.key, maxOffHeapExecutionMemory.toString)
+      .set(TEST_MEMORY, maxOnHeapExecutionMemory)
+      .set(MEMORY_OFFHEAP_SIZE, maxOffHeapExecutionMemory)
       .set("spark.memory.storageFraction", storageFraction.toString)
     UnifiedMemoryManager(conf, numCores = 1)
   }
@@ -218,19 +219,19 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
   }
 
   test("small heap") {
-    val systemMemory = 1024 * 1024
-    val reservedMemory = 300 * 1024
+    val systemMemory = 1024L * 1024
+    val reservedMemory = 300L * 1024
     val memoryFraction = 0.8
     val conf = new SparkConf()
       .set("spark.memory.fraction", memoryFraction.toString)
-      .set("spark.testing.memory", systemMemory.toString)
-      .set("spark.testing.reservedMemory", reservedMemory.toString)
+      .set(TEST_MEMORY, systemMemory)
+      .set(TEST_RESERVED_MEMORY, reservedMemory)
     val mm = UnifiedMemoryManager(conf, numCores = 1)
     val expectedMaxMemory = ((systemMemory - reservedMemory) * memoryFraction).toLong
     assert(mm.maxHeapMemory === expectedMaxMemory)
 
     // Try using a system memory that's too small
-    val conf2 = conf.clone().set("spark.testing.memory", (reservedMemory / 2).toString)
+    val conf2 = conf.clone().set(TEST_MEMORY, reservedMemory / 2)
     val exception = intercept[IllegalArgumentException] {
       UnifiedMemoryManager(conf2, numCores = 1)
     }
@@ -238,13 +239,13 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
   }
 
   test("insufficient executor memory") {
-    val systemMemory = 1024 * 1024
-    val reservedMemory = 300 * 1024
+    val systemMemory = 1024L * 1024
+    val reservedMemory = 300L * 1024
     val memoryFraction = 0.8
     val conf = new SparkConf()
       .set("spark.memory.fraction", memoryFraction.toString)
-      .set("spark.testing.memory", systemMemory.toString)
-      .set("spark.testing.reservedMemory", reservedMemory.toString)
+      .set(TEST_MEMORY, systemMemory)
+      .set(TEST_RESERVED_MEMORY, reservedMemory)
     val mm = UnifiedMemoryManager(conf, numCores = 1)
 
     // Try using an executor memory that's too small
@@ -259,7 +260,7 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
     val conf = new SparkConf()
       .set("spark.memory.fraction", "1")
       .set("spark.memory.storageFraction", "0")
-      .set("spark.testing.memory", "1000")
+      .set(TEST_MEMORY, 1000L)
     val mm = UnifiedMemoryManager(conf, numCores = 2)
     val ms = makeMemoryStore(mm)
     val memoryMode = MemoryMode.ON_HEAP
@@ -285,7 +286,7 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
     val conf = new SparkConf()
       .set("spark.memory.fraction", "1")
       .set("spark.memory.storageFraction", "0")
-      .set("spark.testing.memory", "1000")
+      .set(TEST_MEMORY, 1000L)
     val mm = UnifiedMemoryManager(conf, numCores = 2)
     makeBadMemoryStore(mm)
     val memoryMode = MemoryMode.ON_HEAP
@@ -306,9 +307,9 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
 
   test("not enough free memory in the storage pool --OFF_HEAP") {
     val conf = new SparkConf()
-      .set(MEMORY_OFFHEAP_SIZE.key, "1000")
-      .set("spark.testing.memory", "1000")
-      .set(MEMORY_OFFHEAP_ENABLED.key, "true")
+      .set(MEMORY_OFFHEAP_SIZE, 1000L)
+      .set(TEST_MEMORY, 1000L)
+      .set(MEMORY_OFFHEAP_ENABLED, true)
     val taskAttemptId = 0L
     val mm = UnifiedMemoryManager(conf, numCores = 1)
     val ms = makeMemoryStore(mm)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index 36dd620a5685..112fd31a060e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.scheduler
 import scala.util.Random
 
 import org.apache.spark._
+import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 
 class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
 
@@ -76,7 +77,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   test("throw exception on barrier() call timeout") {
     val conf = new SparkConf()
       .set("spark.barrier.sync.timeout", "1")
-      .set("spark.test.noStageRetry", "true")
+      .set(TEST_NO_STAGE_RETRY, true)
       .setMaster("local-cluster[4, 1, 1024]")
       .setAppName("test-cluster")
     sc = new SparkContext(conf)
@@ -101,7 +102,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   test("throw exception if barrier() call doesn't happen on every task") {
     val conf = new SparkConf()
       .set("spark.barrier.sync.timeout", "1")
-      .set("spark.test.noStageRetry", "true")
+      .set(TEST_NO_STAGE_RETRY, true)
       .setMaster("local-cluster[4, 1, 1024]")
       .setAppName("test-cluster")
     sc = new SparkContext(conf)
@@ -124,7 +125,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   test("throw exception if the number of barrier() calls are not the same on every task") {
     val conf = new SparkConf()
       .set("spark.barrier.sync.timeout", "1")
-      .set("spark.test.noStageRetry", "true")
+      .set(TEST_NO_STAGE_RETRY, true)
       .setMaster("local-cluster[4, 1, 1024]")
       .setAppName("test-cluster")
     sc = new SparkContext(conf)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
index 29bb8232f44f..2215f7f36621 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
@@ -20,6 +20,7 @@ import scala.concurrent.duration._
 
 import org.apache.spark._
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Tests._
 
 class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorMockBackend]{
 
@@ -58,9 +59,9 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
     extraConfs = Seq(
       config.BLACKLIST_ENABLED.key -> "true",
       config.MAX_TASK_FAILURES.key -> "4",
-      "spark.testing.nHosts" -> "2",
-      "spark.testing.nExecutorsPerHost" -> "5",
-      "spark.testing.nCoresPerExecutor" -> "10"
+      TEST_N_HOSTS.key -> "2",
+      TEST_N_EXECUTORS_HOST.key -> "5",
+      TEST_N_CORES_EXECUTOR.key -> "10"
     )
   ) {
     // To reliably reproduce the failure that would occur without blacklisting, we have to use 1
@@ -102,9 +103,9 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
     "SPARK-15865 Progress with fewer executors than maxTaskFailures",
     extraConfs = Seq(
       config.BLACKLIST_ENABLED.key -> "true",
-      "spark.testing.nHosts" -> "2",
-      "spark.testing.nExecutorsPerHost" -> "1",
-      "spark.testing.nCoresPerExecutor" -> "1",
+      TEST_N_HOSTS.key -> "2",
+      TEST_N_EXECUTORS_HOST.key -> "1",
+      TEST_N_CORES_EXECUTOR.key -> "1",
       "spark.scheduler.blacklist.unschedulableTaskSetTimeout" -> "0s"
     )
   ) {
@@ -129,9 +130,9 @@ class MultiExecutorMockBackend(
     conf: SparkConf,
     taskScheduler: TaskSchedulerImpl) extends MockBackend(conf, taskScheduler) {
 
-  val nHosts = conf.getInt("spark.testing.nHosts", 5)
-  val nExecutorsPerHost = conf.getInt("spark.testing.nExecutorsPerHost", 4)
-  val nCoresPerExecutor = conf.getInt("spark.testing.nCoresPerExecutor", 2)
+  val nHosts = conf.get(TEST_N_HOSTS)
+  val nExecutorsPerHost = conf.get(TEST_N_EXECUTORS_HOST)
+  val nCoresPerExecutor = conf.get(TEST_N_CORES_EXECUTOR)
 
   override val executorIdToExecutor: Map[String, ExecutorTaskStatus] = {
     (0 until nHosts).flatMap { hostIdx =>
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
index b9f0e873375b..43621cb85762 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
@@ -24,6 +24,7 @@ import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.executor.{ShuffleWriteMetrics, TaskMetrics}
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory._
 import org.apache.spark.unsafe.Platform
 
@@ -33,8 +34,8 @@ class ShuffleExternalSorterSuite extends SparkFunSuite with LocalSparkContext wi
     val conf = new SparkConf()
       .setMaster("local[1]")
       .setAppName("ShuffleExternalSorterSuite")
-      .set("spark.testing", "true")
-      .set("spark.testing.memory", "1600")
+      .set(IS_TESTING, true)
+      .set(TEST_MEMORY, 1600L)
       .set("spark.memory.fraction", "1")
     sc = new SparkContext(conf)
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 19116cf22d2f..480e07fb9399 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark._
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{DRIVER_PORT, MEMORY_OFFHEAP_SIZE}
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.BlockTransferService
 import org.apache.spark.network.netty.NettyBlockTransferService
@@ -69,8 +70,8 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
   protected def makeBlockManager(
       maxMem: Long,
       name: String = SparkContext.DRIVER_IDENTIFIER): BlockManager = {
-    conf.set("spark.testing.memory", maxMem.toString)
-    conf.set(MEMORY_OFFHEAP_SIZE.key, maxMem.toString)
+    conf.set(TEST_MEMORY, maxMem)
+    conf.set(MEMORY_OFFHEAP_SIZE, maxMem)
     val transfer = new NettyBlockTransferService(conf, securityMgr, "localhost", "localhost", 0, 1)
     val memManager = UnifiedMemoryManager(conf, numCores = 1)
     val serializerManager = new SerializerManager(serializer, conf)
@@ -87,7 +88,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
 
     conf.set("spark.authenticate", "false")
     conf.set(DRIVER_PORT, rpcEnv.address.port)
-    conf.set("spark.testing", "true")
+    conf.set(IS_TESTING, true)
     conf.set("spark.memory.fraction", "1")
     conf.set("spark.memory.storageFraction", "1")
     conf.set("spark.storage.unrollFraction", "0.4")
@@ -233,7 +234,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     val failableTransfer = mock(classOf[BlockTransferService]) // this wont actually work
     when(failableTransfer.hostName).thenReturn("some-hostname")
     when(failableTransfer.port).thenReturn(1000)
-    conf.set("spark.testing.memory", "10000")
+    conf.set(TEST_MEMORY, 10000L)
     val memManager = UnifiedMemoryManager(conf, numCores = 1)
     val serializerManager = new SerializerManager(serializer, conf)
     val failableStore = new BlockManager("failable-store", rpcEnv, master, serializerManager, conf,
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index a7bb2a03360a..bda81365b079 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -37,6 +37,7 @@ import org.apache.spark._
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.executor.DataReadMethod
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.{BlockDataManager, BlockTransferService, TransportContext}
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
@@ -89,8 +90,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       transferService: Option[BlockTransferService] = Option.empty,
       testConf: Option[SparkConf] = None): BlockManager = {
     val bmConf = testConf.map(_.setAll(conf.getAll)).getOrElse(conf)
-    bmConf.set("spark.testing.memory", maxMem.toString)
-    bmConf.set(MEMORY_OFFHEAP_SIZE.key, maxMem.toString)
+    bmConf.set(TEST_MEMORY, maxMem)
+    bmConf.set(MEMORY_OFFHEAP_SIZE, maxMem)
     val serializer = new KryoSerializer(bmConf)
     val encryptionKey = if (bmConf.get(IO_ENCRYPTION_ENABLED)) {
       Some(CryptoStreamUtils.createKey(bmConf))
@@ -115,11 +116,10 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     System.setProperty("os.arch", "amd64")
     conf = new SparkConf(false)
       .set("spark.app.id", "test")
-      .set("spark.testing", "true")
+      .set(IS_TESTING, true)
       .set("spark.memory.fraction", "1")
       .set("spark.memory.storageFraction", "1")
       .set("spark.kryoserializer.buffer", "1m")
-      .set("spark.test.useCompressedOops", "true")
       .set("spark.storage.unrollFraction", "0.4")
       .set("spark.storage.unrollMemoryThreshold", "512")
 
@@ -901,7 +901,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
 
   test("block store put failure") {
     // Use Java serializer so we can create an unserializable error.
-    conf.set("spark.testing.memory", "1200")
+    conf.set(TEST_MEMORY, 1200L)
     val transfer = new NettyBlockTransferService(conf, securityMgr, "localhost", "localhost", 0, 1)
     val memoryManager = UnifiedMemoryManager(conf, numCores = 1)
     val serializerManager = new SerializerManager(new JavaSerializer(conf), conf)
diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
index baff672f5fb8..b02af2bfe7ac 100644
--- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
@@ -39,7 +39,6 @@ class MemoryStoreSuite
   with ResetSystemProperties {
 
   var conf: SparkConf = new SparkConf(false)
-    .set("spark.test.useCompressedOops", "true")
     .set("spark.storage.unrollFraction", "0.4")
     .set("spark.storage.unrollMemoryThreshold", "512")
 
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index 63f9f82adf3e..8bc62db81e4f 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.config.Tests.TEST_USE_COMPRESSED_OOPS_KEY
 
 class DummyClass1 {}
 
@@ -76,7 +77,7 @@ class SizeEstimatorSuite
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
     super.beforeEach()
     System.setProperty("os.arch", "amd64")
-    System.setProperty("spark.test.useCompressedOops", "true")
+    System.setProperty(TEST_USE_COMPRESSED_OOPS_KEY, "true")
   }
 
   override def afterEach(): Unit = {
@@ -192,7 +193,7 @@ class SizeEstimatorSuite
   // (Sun vs IBM). Use a DummyString class to make tests deterministic.
   test("64-bit arch with no compressed oops") {
     System.setProperty("os.arch", "amd64")
-    System.setProperty("spark.test.useCompressedOops", "false")
+    System.setProperty(TEST_USE_COMPRESSED_OOPS_KEY, "false")
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 35fba1a3b73c..6211399005e1 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -27,6 +27,7 @@ import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests.TEST_MEMORY
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.memory.MemoryTestingUtils
 import org.apache.spark.util.CompletionIterator
@@ -552,7 +553,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     val conf = createSparkConf(loadDefaults = false)
       .set("spark.shuffle.memoryFraction", "0.01")
       .set("spark.memory.useLegacyMode", "true")
-      .set("spark.testing.memory", "100000000")
+      .set(TEST_MEMORY, 100000000L)
       .set("spark.shuffle.sort.bypassMergeThreshold", "0")
     sc = new SparkContext("local", "test", conf)
     val N = 2e5.toInt
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index 47173b89e91e..aa400dd74e9c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -23,6 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
 import org.apache.spark._
+import org.apache.spark.internal.config.Tests.TEST_MEMORY
 import org.apache.spark.memory.MemoryTestingUtils
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.unsafe.array.LongArray
@@ -639,7 +640,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     val conf = createSparkConf(loadDefaults = false, kryo = false)
       .set("spark.shuffle.memoryFraction", "0.01")
       .set("spark.memory.useLegacyMode", "true")
-      .set("spark.testing.memory", "100000000")
+      .set(TEST_MEMORY, 100000000L)
       .set("spark.shuffle.sort.bypassMergeThreshold", "0")
     sc = new SparkContext("local", "test", conf)
     val N = 2e5.toInt
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index c0b435efb8c9..cc8968394901 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -27,6 +27,7 @@ import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Tests.IS_TESTING
 
 private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesClient) {
 
@@ -67,7 +68,7 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl
       .set("spark.executors.instances", "1")
       .set("spark.app.name", "spark-test-app")
       .set("spark.ui.enabled", "true")
-      .set("spark.testing", "false")
+      .set(IS_TESTING, false)
       .set("spark.kubernetes.submission.waitAppCompletion", "false")
       .set("spark.kubernetes.authenticate.driver.serviceAccountName", serviceAccountName)
   }
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 03cd2583b9b2..fb235350700f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -33,6 +33,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkContext, SparkExceptio
 import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.mesos.MesosExternalShuffleClient
@@ -298,7 +299,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   }
 
   protected def driverURL: String = {
-    if (conf.contains("spark.testing")) {
+    if (conf.contains(IS_TESTING)) {
       "driverURL"
     } else {
       RpcEndpointAddress(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index dda7cb55f539..5b38fe5c46bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicLong
 
-import org.apache.spark.SparkContext
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
 
@@ -38,7 +38,7 @@ object SQLExecution {
     executionIdToQueryExecution.get(executionId)
   }
 
-  private val testing = sys.props.contains("spark.testing")
+  private val testing = sys.props.contains(IS_TESTING.key)
 
   private[sql] def checkSQLExecutionId(sparkSession: SparkSession): Unit = {
     val sc = sparkSession.sparkContext
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
index d95794d62403..c37d663941d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
@@ -29,7 +30,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSQLContext with B
 
   // When Utils.isTesting is true, the RuleExecutor will issue an exception when hitting
   // the max iteration of analyzer/optimizer batches.
-  assert(Utils.isTesting, "spark.testing is not set to true")
+  assert(Utils.isTesting, s"${IS_TESTING.key} is not set to true")
 
   /**
    * Drop all the tables
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index ca8692290edb..963e42517b44 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -21,6 +21,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
 import java.util.Properties
 
 import org.apache.spark._
+import org.apache.spark.internal.config.Tests.TEST_MEMORY
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{LocalSparkSession, Row, SparkSession}
@@ -99,7 +100,7 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
     val conf = new SparkConf()
       .set("spark.shuffle.spill.initialMemoryThreshold", "1")
       .set("spark.shuffle.sort.bypassMergeThreshold", "0")
-      .set("spark.testing.memory", "80000")
+      .set(TEST_MEMORY, 80000L)
     spark = SparkSession.builder().master("local").appName("test").config(conf).getOrCreate()
     val outputFile = File.createTempFile("test-unsafe-row-serializer-spill", "")
     outputFile.deleteOnExit()

From 5102ccc4ab6e30caa5510131dee7098b4f3ad32e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 7 Jan 2019 15:48:54 -0800
Subject: [PATCH 0249/1072] [SPARK-26339][SQL][FOLLOW-UP] Issue warning instead
 of throwing an exception for underscore files

## What changes were proposed in this pull request?

The PR https://github.com/apache/spark/pull/23446 happened to introduce a behaviour change - empty dataframes can't be read anymore from underscore files. It looks controversial to allow or disallow this case so this PR targets to fix to issue warning instead of throwing an exception to be more conservative.

**Before**

```scala
scala> spark.read.schema("a int").parquet("_tmp*").show()
org.apache.spark.sql.AnalysisException: All paths were ignored:
file:/.../_tmp
  file:/.../_tmp1;
  at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:570)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:360)
  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:231)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:219)
  at org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:651)
  at org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:635)
  ... 49 elided

scala> spark.read.text("_tmp*").show()
org.apache.spark.sql.AnalysisException: All paths were ignored:
file:/.../_tmp
  file:/.../_tmp1;
  at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:570)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:360)
  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:231)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:219)
  at org.apache.spark.sql.DataFrameReader.text(DataFrameReader.scala:723)
  at org.apache.spark.sql.DataFrameReader.text(DataFrameReader.scala:695)
  ... 49 elided
```

**After**

```scala
scala> spark.read.schema("a int").parquet("_tmp*").show()
19/01/07 15:14:43 WARN DataSource: All paths were ignored:
  file:/.../_tmp
  file:/.../_tmp1
+---+
|  a|
+---+
+---+

scala> spark.read.text("_tmp*").show()
19/01/07 15:14:51 WARN DataSource: All paths were ignored:
  file:/.../_tmp
  file:/.../_tmp1
+-----+
|value|
+-----+
+-----+
```

## How was this patch tested?

Manually tested as above.

Closes #23481 from HyukjinKwon/SPARK-26339.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../execution/datasources/DataSource.scala    |  6 +++---
 .../src/test/resources/test-data/_cars.csv    |  7 -------
 .../execution/datasources/csv/CSVSuite.scala  | 20 -------------------
 3 files changed, 3 insertions(+), 30 deletions(-)
 delete mode 100644 sql/core/src/test/resources/test-data/_cars.csv

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 2a438a5cbf95..5dad784e45af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -567,11 +567,11 @@ case class DataSource(
       }
       if (filteredOut.nonEmpty) {
         if (filteredIn.isEmpty) {
-          throw new AnalysisException(
-            s"All paths were ignored:\n${filteredOut.mkString("\n  ")}")
+          logWarning(
+            s"All paths were ignored:\n  ${filteredOut.mkString("\n  ")}")
         } else {
           logDebug(
-            s"Some paths were ignored:\n${filteredOut.mkString("\n  ")}")
+            s"Some paths were ignored:\n  ${filteredOut.mkString("\n  ")}")
         }
       }
     }
diff --git a/sql/core/src/test/resources/test-data/_cars.csv b/sql/core/src/test/resources/test-data/_cars.csv
deleted file mode 100644
index 40ded573ade5..000000000000
--- a/sql/core/src/test/resources/test-data/_cars.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-
-year,make,model,comment,blank
-"2012","Tesla","S","No comment",
-
-1997,Ford,E350,"Go get one now they are going fast",
-2015,Chevy,Volt
-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index fb1bedfaa32c..d9e5d7af1967 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -53,7 +53,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
   private val carsEmptyValueFile = "test-data/cars-empty-value.csv"
   private val carsBlankColName = "test-data/cars-blank-column-name.csv"
   private val carsCrlf = "test-data/cars-crlf.csv"
-  private val carsFilteredOutFile = "test-data/_cars.csv"
   private val emptyFile = "test-data/empty.csv"
   private val commentsFile = "test-data/comments.csv"
   private val disableCommentsFile = "test-data/disable_comments.csv"
@@ -347,25 +346,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     assert(result.schema.fieldNames.size === 1)
   }
 
-  test("SPARK-26339 Not throw an exception if some of specified paths are filtered in") {
-    val cars = spark
-      .read
-      .option("header", "false")
-      .csv(testFile(carsFile), testFile(carsFilteredOutFile))
-
-    verifyCars(cars, withHeader = false, checkTypes = false)
-  }
-
-  test("SPARK-26339 Throw an exception only if all of the specified paths are filtered out") {
-    val e = intercept[AnalysisException] {
-      val cars = spark
-        .read
-        .option("header", "false")
-        .csv(testFile(carsFilteredOutFile))
-    }.getMessage
-    assert(e.contains("All paths were ignored:"))
-  }
-
   test("DDL test with empty file") {
     withView("carsTable") {
       spark.sql(

From 5fb5a0292d9ced48860abe712a10cbb8e513b75a Mon Sep 17 00:00:00 2001
From: Adrian Tanase <atanase@adobe.com>
Date: Mon, 7 Jan 2019 19:03:38 -0600
Subject: [PATCH 0250/1072] [MINOR][K8S] add missing docs for
 podTemplateContainerName properties

## What changes were proposed in this pull request?

Adding docs for an enhancement that came in late in this PR: #22146
Currently the docs state that we're going to use the first container in a pod template, which was the implementation for some time, until it was improved with 2 new properties.

## How was this patch tested?

I tested that the properties work by combining pod templates with client-mode and a simple pod template.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23155 from aditanase/k8s-readme.

Authored-by: Adrian Tanase <atanase@adobe.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/running-on-kubernetes.md | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 3172b1bca8f0..3453ee912205 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -229,8 +229,11 @@ pod template that will always be overwritten by Spark. Therefore, users of this
 the pod template file only lets Spark start with a template pod instead of an empty pod during the pod-building process.
 For details, see the [full list](#pod-template-properties) of pod template values that will be overwritten by spark.
 
-Pod template files can also define multiple containers. In such cases, Spark will always assume that the first container in
-the list will be the driver or executor container.
+Pod template files can also define multiple containers. In such cases, you can use the spark properties
+`spark.kubernetes.driver.podTemplateContainerName` and `spark.kubernetes.executor.podTemplateContainerName`
+to indicate which container should be used as a basis for the driver or executor.
+If not specified, or if the container name is not valid, Spark will assume that the first container in the list
+will be the driver or executor container.
 
 ## Using Kubernetes Volumes
 
@@ -932,16 +935,32 @@ specific to Spark on Kubernetes.
   <td><code>spark.kubernetes.driver.podTemplateFile</code></td>
   <td>(none)</td>
   <td>
-   Specify the local file that contains the driver [pod template](#pod-template). For example
-   <code>spark.kubernetes.driver.podTemplateFile=/path/to/driver-pod-template.yaml`</code>
+   Specify the local file that contains the driver <a href="#pod-template">pod template</a>. For example
+   <code>spark.kubernetes.driver.podTemplateFile=/path/to/driver-pod-template.yaml</code>
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.driver.podTemplateContainerName</code></td>
+  <td>(none)</td>
+  <td>
+   Specify the container name to be used as a basis for the driver in the given <a href="#pod-template">pod template</a>.
+   For example <code>spark.kubernetes.driver.podTemplateContainerName=spark-driver</code>
   </td>
 </tr>
 <tr>
   <td><code>spark.kubernetes.executor.podTemplateFile</code></td>
   <td>(none)</td>
   <td>
-   Specify the local file that contains the executor [pod template](#pod-template). For example
-   <code>spark.kubernetes.executor.podTemplateFile=/path/to/executor-pod-template.yaml`</code>
+   Specify the local file that contains the executor <a href="#pod-template">pod template</a>. For example
+   <code>spark.kubernetes.executor.podTemplateFile=/path/to/executor-pod-template.yaml</code>
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.executor.podTemplateContainerName</code></td>
+  <td>(none)</td>
+  <td>
+   Specify the container name to be used as a basis for the executor in the given <a href="#pod-template">pod template</a>.
+   For example <code>spark.kubernetes.executor.podTemplateContainerName=spark-executor</code>
   </td>
 </tr>
 <tr>

From 6f35ede31cc72a81e3852b1ac7454589d1897bfc Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 7 Jan 2019 17:54:05 -0800
Subject: [PATCH 0251/1072] [SPARK-26554][BUILD][FOLLOWUP] Use GitHub instead
 of GitBox to check HEADER

## What changes were proposed in this pull request?

This PR uses GitHub repository instead of GitBox because GitHub repo returns HTTP header status correctly.

## How was this patch tested?

Manual.

```
$ ./do-release-docker.sh -d /tmp/test -n
Branch [branch-2.4]:
Current branch version is 2.4.1-SNAPSHOT.
Release [2.4.1]:
RC # [1]:
This is a dry run. Please confirm the ref that will be built for testing.
Ref [v2.4.1-rc1]:
```

Closes #23482 from dongjoon-hyun/SPARK-26554-2.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/create-release/release-util.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dev/create-release/release-util.sh b/dev/create-release/release-util.sh
index 9a340528b506..5486c18e95bc 100755
--- a/dev/create-release/release-util.sh
+++ b/dev/create-release/release-util.sh
@@ -21,6 +21,7 @@ DRY_RUN=${DRY_RUN:-0}
 GPG="gpg --no-tty --batch"
 ASF_REPO="https://gitbox.apache.org/repos/asf/spark.git"
 ASF_REPO_WEBUI="https://gitbox.apache.org/repos/asf?p=spark.git"
+ASF_GITHUB_REPO="https://github.com/apache/spark"
 
 function error {
   echo "$*"
@@ -73,9 +74,7 @@ function fcreate_secure {
 }
 
 function check_for_tag {
-  # Check HTML body messages instead of header status codes. Apache GitBox returns
-  # a header with `200 OK` status code for both existing and non-existing tag URLs
-  ! curl -s --fail "$ASF_REPO_WEBUI;a=commit;h=$1" | grep '404 Not Found' > /dev/null
+  curl -s --head --fail "$ASF_GITHUB_REPO/releases/tag/$1" > /dev/null
 }
 
 function get_release_info {

From 29a7d2da44585d91a9e94bf88dc7b1f42a0e5674 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 7 Jan 2019 18:59:43 -0800
Subject: [PATCH 0252/1072] [SPARK-24196][SQL] Implement Spark's own
 GetSchemasOperation

## What changes were proposed in this pull request?

This PR fix SQL Client tools can't show DBs by implementing Spark's own `GetSchemasOperation`.

## How was this patch tested?
unit tests and manual tests
![image](https://user-images.githubusercontent.com/5399861/47782885-3dd5d400-dd3c-11e8-8586-59a8c15c7020.png)
![image](https://user-images.githubusercontent.com/5399861/47782899-4928ff80-dd3c-11e8-9d2d-ba9580ba4301.png)

Closes #22903 from wangyum/SPARK-24196.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../cli/operation/GetSchemasOperation.java    |   2 +-
 .../SparkGetSchemasOperation.scala            |  66 +++++++++++
 .../server/SparkSQLOperationManager.scala     |  17 ++-
 .../HiveThriftServer2Suites.scala             |  16 +++
 .../SparkMetadataOperationSuite.scala         | 103 ++++++++++++++++++
 5 files changed, 201 insertions(+), 3 deletions(-)
 create mode 100644 sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
 create mode 100644 sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala

diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
index d6f6280f1c39..3516bc2ba242 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
@@ -41,7 +41,7 @@ public class GetSchemasOperation extends MetadataOperation {
   .addStringColumn("TABLE_SCHEM", "Schema name.")
   .addStringColumn("TABLE_CATALOG", "Catalog name.");
 
-  private RowSet rowSet;
+  protected RowSet rowSet;
 
   protected GetSchemasOperation(HiveSession parentSession,
       String catalogName, String schemaName) {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
new file mode 100644
index 000000000000..d585049c28e3
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
+import org.apache.hive.service.cli._
+import org.apache.hive.service.cli.operation.GetSchemasOperation
+import org.apache.hive.service.cli.operation.MetadataOperation.DEFAULT_HIVE_CATALOG
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.sql.SQLContext
+
+/**
+ * Spark's own GetSchemasOperation
+ *
+ * @param sqlContext SQLContext to use
+ * @param parentSession a HiveSession from SessionManager
+ * @param catalogName catalog name. null if not applicable.
+ * @param schemaName database name, null or a concrete database name
+ */
+private[hive] class SparkGetSchemasOperation(
+    sqlContext: SQLContext,
+    parentSession: HiveSession,
+    catalogName: String,
+    schemaName: String)
+  extends GetSchemasOperation(parentSession, catalogName, schemaName) {
+
+  override def runInternal(): Unit = {
+    setState(OperationState.RUNNING)
+    // Always use the latest class loader provided by executionHive's state.
+    val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
+    Thread.currentThread().setContextClassLoader(executionHiveClassLoader)
+
+    if (isAuthV2Enabled) {
+      val cmdStr = s"catalog : $catalogName, schemaPattern : $schemaName"
+      authorizeMetaGets(HiveOperationType.GET_TABLES, null, cmdStr)
+    }
+
+    try {
+      val schemaPattern = convertSchemaPattern(schemaName)
+      sqlContext.sessionState.catalog.listDatabases(schemaPattern).foreach { dbName =>
+        rowSet.addRow(Array[AnyRef](dbName, DEFAULT_HIVE_CATALOG))
+      }
+      setState(OperationState.FINISHED)
+    } catch {
+      case e: HiveSQLException =>
+        setState(OperationState.ERROR)
+        throw e
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
index bf7c01f60fb5..85b6c7134755 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -21,13 +21,13 @@ import java.util.{Map => JMap}
 import java.util.concurrent.ConcurrentHashMap
 
 import org.apache.hive.service.cli._
-import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, Operation, OperationManager}
+import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, GetSchemasOperation, Operation, OperationManager}
 import org.apache.hive.service.cli.session.HiveSession
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.hive.thriftserver.{ReflectionUtils, SparkExecuteStatementOperation}
+import org.apache.spark.sql.hive.thriftserver.{ReflectionUtils, SparkExecuteStatementOperation, SparkGetSchemasOperation}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -63,6 +63,19 @@ private[thriftserver] class SparkSQLOperationManager()
     operation
   }
 
+  override def newGetSchemasOperation(
+      parentSession: HiveSession,
+      catalogName: String,
+      schemaName: String): GetSchemasOperation = synchronized {
+    val sqlContext = sessionToContexts.get(parentSession.getSessionHandle)
+    require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" +
+      " initialized or had already closed.")
+    val operation = new SparkGetSchemasOperation(sqlContext, parentSession, catalogName, schemaName)
+    handleToOperation.put(operation.getHandle, operation)
+    logDebug(s"Created GetSchemasOperation with session=$parentSession.")
+    operation
+  }
+
   def setConfMap(conf: SQLConf, confMap: java.util.Map[String, String]): Unit = {
     val iterator = confMap.entrySet().iterator()
     while (iterator.hasNext) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 70eb28cdd0c6..f9509aed4aaa 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -818,6 +818,22 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     }
   }
 
+  def withDatabase(dbNames: String*)(fs: (Statement => Unit)*) {
+    val user = System.getProperty("user.name")
+    val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
+    val statements = connections.map(_.createStatement())
+
+    try {
+      statements.zip(fs).foreach { case (s, f) => f(s) }
+    } finally {
+      dbNames.foreach { name =>
+        statements(0).execute(s"DROP DATABASE IF EXISTS $name")
+      }
+      statements.foreach(_.close())
+      connections.foreach(_.close())
+    }
+  }
+
   def withJdbcStatement(tableNames: String*)(f: Statement => Unit) {
     withMultipleConnectionJdbcStatement(tableNames: _*)(f)
   }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
new file mode 100644
index 000000000000..9a997ae01df9
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.util.Properties
+
+import org.apache.hive.jdbc.{HiveConnection, HiveQueryResultSet, Utils => JdbcUtils}
+import org.apache.hive.service.auth.PlainSaslHelper
+import org.apache.hive.service.cli.thrift._
+import org.apache.thrift.protocol.TBinaryProtocol
+import org.apache.thrift.transport.TSocket
+
+class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
+
+  override def mode: ServerMode.Value = ServerMode.binary
+
+  test("Spark's own GetSchemasOperation(SparkGetSchemasOperation)") {
+    def testGetSchemasOperation(
+        catalog: String,
+        schemaPattern: String)(f: HiveQueryResultSet => Unit): Unit = {
+      val rawTransport = new TSocket("localhost", serverPort)
+      val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties)
+      val user = System.getProperty("user.name")
+      val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
+      val client = new TCLIService.Client(new TBinaryProtocol(transport))
+      transport.open()
+      var rs: HiveQueryResultSet = null
+      try {
+        val openResp = client.OpenSession(new TOpenSessionReq)
+        val sessHandle = openResp.getSessionHandle
+        val schemaReq = new TGetSchemasReq(sessHandle)
+
+        if (catalog != null) {
+          schemaReq.setCatalogName(catalog)
+        }
+
+        if (schemaPattern == null) {
+          schemaReq.setSchemaName("%")
+        } else {
+          schemaReq.setSchemaName(schemaPattern)
+        }
+
+        val schemaResp = client.GetSchemas(schemaReq)
+        JdbcUtils.verifySuccess(schemaResp.getStatus)
+
+        rs = new HiveQueryResultSet.Builder(connection)
+          .setClient(client)
+          .setSessionHandle(sessHandle)
+          .setStmtHandle(schemaResp.getOperationHandle)
+          .build()
+        f(rs)
+      } finally {
+        rs.close()
+        connection.close()
+        transport.close()
+        rawTransport.close()
+      }
+    }
+
+    def checkResult(dbNames: Seq[String], rs: HiveQueryResultSet): Unit = {
+      if (dbNames.nonEmpty) {
+        for (i <- dbNames.indices) {
+          assert(rs.next())
+          assert(rs.getString("TABLE_SCHEM") === dbNames(i))
+        }
+      } else {
+        assert(!rs.next())
+      }
+    }
+
+    withDatabase("db1", "db2") { statement =>
+      Seq("CREATE DATABASE db1", "CREATE DATABASE db2").foreach(statement.execute)
+
+      testGetSchemasOperation(null, "%") { rs =>
+        checkResult(Seq("db1", "db2"), rs)
+      }
+      testGetSchemasOperation(null, "db1") { rs =>
+        checkResult(Seq("db1"), rs)
+      }
+      testGetSchemasOperation(null, "db_not_exist") { rs =>
+        checkResult(Seq.empty, rs)
+      }
+      testGetSchemasOperation(null, "db*") { rs =>
+        checkResult(Seq("db1", "db2"), rs)
+      }
+    }
+  }
+}

From 72a572ffd6e156243b13f9243ed296f6d77b4241 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 8 Jan 2019 22:44:33 +0800
Subject: [PATCH 0253/1072] [SPARK-26323][SQL] Scala UDF should still check
 input types even if some inputs are of type Any

## What changes were proposed in this pull request?

For Scala UDF, when checking input nullability, we will skip inputs with type `Any`, and only check the inputs that provide nullability info.

We should do the same for checking input types.

## How was this patch tested?

new tests

Closes #23275 from cloud-fan/udf.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/analysis/TypeCoercion.scala  |  13 +-
 .../sql/catalyst/expressions/ScalaUDF.scala   |   4 +-
 .../spark/sql/types/AbstractDataType.scala    |   2 +-
 .../apache/spark/sql/UDFRegistration.scala    | 216 ++++++++----------
 .../sql/expressions/UserDefinedFunction.scala |  57 ++---
 .../org/apache/spark/sql/functions.scala      |  52 ++---
 .../scala/org/apache/spark/sql/UDFSuite.scala |  15 ++
 7 files changed, 175 insertions(+), 184 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index b19aa50ba215..13cc9b9c125e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -882,7 +882,18 @@ object TypeCoercion {
 
       case udf: ScalaUDF if udf.inputTypes.nonEmpty =>
         val children = udf.children.zip(udf.inputTypes).map { case (in, expected) =>
-          implicitCast(in, udfInputToCastType(in.dataType, expected)).getOrElse(in)
+          // Currently Scala UDF will only expect `AnyDataType` at top level, so this trick works.
+          // In the future we should create types like `AbstractArrayType`, so that Scala UDF can
+          // accept inputs of array type of arbitrary element type.
+          if (expected == AnyDataType) {
+            in
+          } else {
+            implicitCast(
+              in,
+              udfInputToCastType(in.dataType, expected.asInstanceOf[DataType])
+            ).getOrElse(in)
+          }
+
         }
         udf.withNewChildren(children)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index a23aaa3a0b3e..fae1119c394b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types.{AbstractDataType, DataType}
 
 /**
  * User-defined function.
@@ -48,7 +48,7 @@ case class ScalaUDF(
     dataType: DataType,
     children: Seq[Expression],
     inputsNullSafe: Seq[Boolean],
-    inputTypes: Seq[DataType] = Nil,
+    inputTypes: Seq[AbstractDataType] = Nil,
     udfName: Option[String] = None,
     nullable: Boolean = true,
     udfDeterministic: Boolean = true)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
index 5367ce2af8e9..d2ef08873187 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
@@ -96,7 +96,7 @@ private[sql] object TypeCollection {
 /**
  * An `AbstractDataType` that matches any concrete data types.
  */
-protected[sql] object AnyDataType extends AbstractDataType {
+protected[sql] object AnyDataType extends AbstractDataType with Serializable {
 
   // Note that since AnyDataType matches any concrete types, defaultConcreteType should never
   // be invoked.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 5a3f556c9c07..fe5d1afd8478 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -123,17 +123,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         |def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
         |  val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
         |  val inputSchemas: Seq[Option[ScalaReflection.Schema]] = $inputSchemas
+        |  val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+        |  val finalUdf = if (nullable) udf else udf.asNonNullable()
         |  def builder(e: Seq[Expression]) = if (e.length == $x) {
-        |    ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        |    if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        |    Some(name), nullable, udfDeterministic = true)
+        |    finalUdf.createScalaUDF(e)
         |  } else {
         |    throw new AnalysisException("Invalid number of arguments for function " + name +
         |      ". Expected: $x; Found: " + e.length)
         |  }
         |  functionRegistry.createOrReplaceTempFunction(name, builder)
-        |  val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-        |  if (nullable) udf else udf.asNonNullable()
+        |  finalUdf
         |}""".stripMargin)
     }
 
@@ -170,17 +169,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 0) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 0; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -191,17 +189,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 1) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 1; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -212,17 +209,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 2) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 2; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -233,17 +229,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 3) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 3; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -254,17 +249,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 4) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 4; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -275,17 +269,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 5) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 5; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -296,17 +289,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 6) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 6; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -317,17 +309,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 7) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 7; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -338,17 +329,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 8) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 8; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -359,17 +349,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 9) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 9; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -380,17 +369,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 10) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 10; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -401,17 +389,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 11) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 11; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -422,17 +409,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 12) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 12; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -443,17 +429,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 13) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 13; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -464,17 +449,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 14) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 14; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -485,17 +469,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Try(ScalaReflection.schemaFor[A15]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 15) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 15; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -506,17 +489,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Try(ScalaReflection.schemaFor[A15]).toOption :: Try(ScalaReflection.schemaFor[A16]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 16) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 16; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -527,17 +509,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Try(ScalaReflection.schemaFor[A15]).toOption :: Try(ScalaReflection.schemaFor[A16]).toOption :: Try(ScalaReflection.schemaFor[A17]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 17) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 17; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -548,17 +529,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Try(ScalaReflection.schemaFor[A15]).toOption :: Try(ScalaReflection.schemaFor[A16]).toOption :: Try(ScalaReflection.schemaFor[A17]).toOption :: Try(ScalaReflection.schemaFor[A18]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 18) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 18; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -569,17 +549,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Try(ScalaReflection.schemaFor[A15]).toOption :: Try(ScalaReflection.schemaFor[A16]).toOption :: Try(ScalaReflection.schemaFor[A17]).toOption :: Try(ScalaReflection.schemaFor[A18]).toOption :: Try(ScalaReflection.schemaFor[A19]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 19) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 19; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -590,17 +569,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Try(ScalaReflection.schemaFor[A15]).toOption :: Try(ScalaReflection.schemaFor[A16]).toOption :: Try(ScalaReflection.schemaFor[A17]).toOption :: Try(ScalaReflection.schemaFor[A18]).toOption :: Try(ScalaReflection.schemaFor[A19]).toOption :: Try(ScalaReflection.schemaFor[A20]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 20) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 20; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -611,17 +589,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Try(ScalaReflection.schemaFor[A15]).toOption :: Try(ScalaReflection.schemaFor[A16]).toOption :: Try(ScalaReflection.schemaFor[A17]).toOption :: Try(ScalaReflection.schemaFor[A18]).toOption :: Try(ScalaReflection.schemaFor[A19]).toOption :: Try(ScalaReflection.schemaFor[A20]).toOption :: Try(ScalaReflection.schemaFor[A21]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 21) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 21; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   /**
@@ -632,17 +609,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas: Seq[Option[ScalaReflection.Schema]] = Try(ScalaReflection.schemaFor[A1]).toOption :: Try(ScalaReflection.schemaFor[A2]).toOption :: Try(ScalaReflection.schemaFor[A3]).toOption :: Try(ScalaReflection.schemaFor[A4]).toOption :: Try(ScalaReflection.schemaFor[A5]).toOption :: Try(ScalaReflection.schemaFor[A6]).toOption :: Try(ScalaReflection.schemaFor[A7]).toOption :: Try(ScalaReflection.schemaFor[A8]).toOption :: Try(ScalaReflection.schemaFor[A9]).toOption :: Try(ScalaReflection.schemaFor[A10]).toOption :: Try(ScalaReflection.schemaFor[A11]).toOption :: Try(ScalaReflection.schemaFor[A12]).toOption :: Try(ScalaReflection.schemaFor[A13]).toOption :: Try(ScalaReflection.schemaFor[A14]).toOption :: Try(ScalaReflection.schemaFor[A15]).toOption :: Try(ScalaReflection.schemaFor[A16]).toOption :: Try(ScalaReflection.schemaFor[A17]).toOption :: Try(ScalaReflection.schemaFor[A18]).toOption :: Try(ScalaReflection.schemaFor[A19]).toOption :: Try(ScalaReflection.schemaFor[A20]).toOption :: Try(ScalaReflection.schemaFor[A21]).toOption :: Try(ScalaReflection.schemaFor[A22]).toOption :: Nil
+    val udf = SparkUserDefinedFunction(func, dataType, inputSchemas).withName(name)
+    val finalUdf = if (nullable) udf else udf.asNonNullable()
     def builder(e: Seq[Expression]) = if (e.length == 22) {
-      ScalaUDF(func, dataType, e, inputSchemas.map(_.map(_.nullable).getOrElse(true)),
-        if (inputSchemas.contains(None)) Nil else inputSchemas.map(_.get.dataType),
-        Some(name), nullable, udfDeterministic = true)
+      finalUdf.createScalaUDF(e)
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 22; Found: " + e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder)
-    val udf = SparkUserDefinedFunction.create(func, dataType, inputSchemas).withName(name)
-    if (nullable) udf else udf.asNonNullable()
+    finalUdf
   }
 
   //////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 901472d8e036..1b2d6c7ffb52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.expressions
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.expressions.ScalaUDF
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
+import org.apache.spark.sql.types.{AnyDataType, DataType}
 
 /**
  * A user-defined function. To create one, use the `udf` functions in `functions`.
@@ -88,40 +88,47 @@ sealed abstract class UserDefinedFunction {
 private[sql] case class SparkUserDefinedFunction(
     f: AnyRef,
     dataType: DataType,
-    inputTypes: Option[Seq[DataType]],
-    nullableTypes: Option[Seq[Boolean]],
+    inputSchemas: Seq[Option[ScalaReflection.Schema]],
     name: Option[String] = None,
     nullable: Boolean = true,
     deterministic: Boolean = true) extends UserDefinedFunction {
 
   @scala.annotation.varargs
   override def apply(exprs: Column*): Column = {
-    // TODO: make sure this class is only instantiated through `SparkUserDefinedFunction.create()`
-    // and `nullableTypes` is always set.
-    if (inputTypes.isDefined) {
-      assert(inputTypes.get.length == nullableTypes.get.length)
-    }
+    Column(createScalaUDF(exprs.map(_.expr)))
+  }
+
+  private[sql] def createScalaUDF(exprs: Seq[Expression]): ScalaUDF = {
+    // It's possible that some of the inputs don't have a specific type(e.g. `Any`),  skip type
+    // check and null check for them.
+    val inputTypes = inputSchemas.map(_.map(_.dataType).getOrElse(AnyDataType))
 
-    val inputsNullSafe = nullableTypes.getOrElse {
+    val inputsNullSafe = if (inputSchemas.isEmpty) {
+      // This is for backward compatibility of `functions.udf(AnyRef, DataType)`. We need to
+      // do reflection of the lambda function object and see if its arguments are nullable or not.
+      // This doesn't work for Scala 2.12 and we should consider removing this workaround, as Spark
+      // uses Scala 2.12 by default since 3.0.
       ScalaReflection.getParameterTypeNullability(f)
+    } else {
+      inputSchemas.map(_.map(_.nullable).getOrElse(true))
     }
 
-    Column(ScalaUDF(
+    ScalaUDF(
       f,
       dataType,
-      exprs.map(_.expr),
+      exprs,
       inputsNullSafe,
-      inputTypes.getOrElse(Nil),
+      inputTypes,
       udfName = name,
       nullable = nullable,
-      udfDeterministic = deterministic))
+      udfDeterministic = deterministic)
   }
 
-  override def withName(name: String): UserDefinedFunction = {
+  override def withName(name: String): SparkUserDefinedFunction = {
     copy(name = Option(name))
   }
 
-  override def asNonNullable(): UserDefinedFunction = {
+  override def asNonNullable(): SparkUserDefinedFunction = {
     if (!nullable) {
       this
     } else {
@@ -129,7 +136,7 @@ private[sql] case class SparkUserDefinedFunction(
     }
   }
 
-  override def asNondeterministic(): UserDefinedFunction = {
+  override def asNondeterministic(): SparkUserDefinedFunction = {
     if (!deterministic) {
       this
     } else {
@@ -137,19 +144,3 @@ private[sql] case class SparkUserDefinedFunction(
     }
   }
 }
-
-private[sql] object SparkUserDefinedFunction {
-
-  def create(
-      f: AnyRef,
-      dataType: DataType,
-      inputSchemas: Seq[Option[ScalaReflection.Schema]]): UserDefinedFunction = {
-    val inputTypes = if (inputSchemas.contains(None)) {
-      None
-    } else {
-      Some(inputSchemas.map(_.get.dataType))
-    }
-    val nullableTypes = Some(inputSchemas.map(_.map(_.nullable).getOrElse(true)))
-    SparkUserDefinedFunction(f, dataType, inputTypes, nullableTypes)
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 645452553e6a..7572cf23cde8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3874,7 +3874,7 @@ object functions {
       |def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
       |  val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
       |  val inputSchemas = $inputSchemas
-      |  val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+      |  val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
       |  if (nullable) udf else udf.asNonNullable()
       |}""".stripMargin)
   }
@@ -3897,7 +3897,7 @@ object functions {
       | */
       |def udf(f: UDF$i[$extTypeArgs], returnType: DataType): UserDefinedFunction = {
       |  val func = f$anyCast.call($anyParams)
-      |  SparkUserDefinedFunction.create($funcCall, returnType, inputSchemas = Seq.fill($i)(None))
+      |  SparkUserDefinedFunction($funcCall, returnType, inputSchemas = Seq.fill($i)(None))
       |}""".stripMargin)
   }
 
@@ -3919,7 +3919,7 @@ object functions {
   def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -3935,7 +3935,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -3951,7 +3951,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -3967,7 +3967,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A3])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -3983,7 +3983,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A3])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A4])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -3999,7 +3999,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A3])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A4])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A5])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -4015,7 +4015,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A3])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A4])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A5])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A6])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -4031,7 +4031,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A3])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A4])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A5])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A6])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A7])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -4047,7 +4047,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A3])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A4])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A5])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A6])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A7])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A8])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -4063,7 +4063,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A3])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A4])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A5])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A6])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A7])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A8])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A9])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -4079,7 +4079,7 @@ object functions {
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputSchemas = Try(ScalaReflection.schemaFor(typeTag[A1])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A2])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A3])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A4])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A5])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A6])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A7])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A8])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A9])).toOption :: Try(ScalaReflection.schemaFor(typeTag[A10])).toOption :: Nil
-    val udf = SparkUserDefinedFunction.create(f, dataType, inputSchemas)
+    val udf = SparkUserDefinedFunction(f, dataType, inputSchemas)
     if (nullable) udf else udf.asNonNullable()
   }
 
@@ -4098,7 +4098,7 @@ object functions {
    */
   def udf(f: UDF0[_], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF0[Any]].call()
-    SparkUserDefinedFunction.create(() => func, returnType, inputSchemas = Seq.fill(0)(None))
+    SparkUserDefinedFunction(() => func, returnType, inputSchemas = Seq.fill(0)(None))
   }
 
   /**
@@ -4112,7 +4112,7 @@ object functions {
    */
   def udf(f: UDF1[_, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(1)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(1)(None))
   }
 
   /**
@@ -4126,7 +4126,7 @@ object functions {
    */
   def udf(f: UDF2[_, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(2)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(2)(None))
   }
 
   /**
@@ -4140,7 +4140,7 @@ object functions {
    */
   def udf(f: UDF3[_, _, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(3)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(3)(None))
   }
 
   /**
@@ -4154,7 +4154,7 @@ object functions {
    */
   def udf(f: UDF4[_, _, _, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(4)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(4)(None))
   }
 
   /**
@@ -4168,7 +4168,7 @@ object functions {
    */
   def udf(f: UDF5[_, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(5)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(5)(None))
   }
 
   /**
@@ -4182,7 +4182,7 @@ object functions {
    */
   def udf(f: UDF6[_, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(6)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(6)(None))
   }
 
   /**
@@ -4196,7 +4196,7 @@ object functions {
    */
   def udf(f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(7)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(7)(None))
   }
 
   /**
@@ -4210,7 +4210,7 @@ object functions {
    */
   def udf(f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(8)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(8)(None))
   }
 
   /**
@@ -4224,7 +4224,7 @@ object functions {
    */
   def udf(f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(9)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(9)(None))
   }
 
   /**
@@ -4238,7 +4238,7 @@ object functions {
    */
   def udf(f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
     val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction.create(func, returnType, inputSchemas = Seq.fill(10)(None))
+    SparkUserDefinedFunction(func, returnType, inputSchemas = Seq.fill(10)(None))
   }
 
   // scalastyle:on parameter.number
@@ -4257,9 +4257,7 @@ object functions {
    * @since 2.0.0
    */
   def udf(f: AnyRef, dataType: DataType): UserDefinedFunction = {
-    // TODO: should call SparkUserDefinedFunction.create() instead but inputSchemas is currently
-    // unavailable. We may need to create type-safe overloaded versions of udf() methods.
-    SparkUserDefinedFunction(f, dataType, inputTypes = None, nullableTypes = None)
+    SparkUserDefinedFunction(f, dataType, inputSchemas = Nil)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index a26d306cff6b..06b9343c3758 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -450,4 +450,19 @@ class UDFSuite extends QueryTest with SharedSQLContext {
     })
     checkAnswer(df2.select(udf2($"col1")), Seq(Row(Map("a" -> "2011000000000002456556"))))
   }
+
+  test("SPARK-26323 Verify input type check - with udf()") {
+    val f = udf((x: Long, y: Any) => x)
+    val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j").select(f($"i", $"j"))
+    checkAnswer(df, Seq(Row(1L), Row(2L)))
+  }
+
+  test("SPARK-26323 Verify input type check - with udf.register") {
+    withTable("t") {
+      Seq(1 -> "a", 2 -> "b").toDF("i", "j").write.format("json").saveAsTable("t")
+      spark.udf.register("f", (x: Long, y: Any) => x)
+      val df = spark.sql("SELECT f(i, j) FROM t")
+      checkAnswer(df, Seq(Row(1L), Row(2L)))
+    }
+  }
 }

From b7113822d5f9a984d30cc7fb3b8920fcf630a96a Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Tue, 8 Jan 2019 10:45:23 -0600
Subject: [PATCH 0254/1072] [MINOR][WEBUI] Modify the name of the column named
 "shuffle spill" in the StagePage

## What changes were proposed in this pull request?

![default](https://user-images.githubusercontent.com/24688163/50752687-16463f00-128a-11e9-8ee3-4d156f7631f6.png)
For this DAG, it has no shuffle operation, only sorting, and sorting leads to spill.

![default](https://user-images.githubusercontent.com/24688163/50752974-0f6bfc00-128b-11e9-9362-a0f440e02359.png)
So I think the name of the column named "shuffle spill" is not all right  in the StagePage

## How was this patch tested?
Manual testing

Closes #23483 from 10110346/shufflespillwebui.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../resources/org/apache/spark/ui/static/stagepage.js     | 8 ++++----
 .../org/apache/spark/ui/static/stagespage-template.html   | 8 ++++----
 .../main/scala/org/apache/spark/ui/jobs/StagePage.scala   | 8 ++++----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 08de2b0fee03..5b792ffc584d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -100,10 +100,10 @@ function getColumnNameForTaskMetricSummary(columnKey) {
             return "Scheduler Delay";
 
         case "diskBytesSpilled":
-            return "Shuffle spill (disk)";
+            return "Spill (disk)";
 
         case "memoryBytesSpilled":
-            return "Shuffle spill (memory)";
+            return "Spill (memory)";
 
         case "shuffleReadMetrics":
             return "Shuffle Read Size / Records";
@@ -842,7 +842,7 @@ $(document).ready(function () {
                                     return "";
                                 }
                             },
-                            name: "Shuffle Spill (Memory)"
+                            name: "Spill (Memory)"
                         },
                         {
                             data : function (row, type) {
@@ -852,7 +852,7 @@ $(document).ready(function () {
                                     return "";
                                 }
                             },
-                            name: "Shuffle Spill (Disk)"
+                            name: "Spill (Disk)"
                         },
                         {
                             data : function (row, type) {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
index 6f950c61b2d6..6b0435bb2028 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
@@ -59,8 +59,8 @@ <h4 class="title-table">Aggregated Metrics by Executor</h4>
                 <th><span id="executor-summary-output">Output Size / Records</span></th>
                 <th><span id="executor-summary-shuffle-read">Shuffle Read Size / Records</span></th>
                 <th><span id="executor-summary-shuffle-write">Shuffle Write Size / Records</span></th>
-                <th>Shuffle Spill (Memory) </th>
-                <th>Shuffle Spill (Disk) </th>
+                <th>Spill (Memory) </th>
+                <th>Spill (Disk) </th>
                 </thead>
                 <tbody>
                 </tbody>
@@ -111,8 +111,8 @@ <h4 id="tasksTitle" class="title-table"></h4>
                     <th>Write Time</th>
                     <th>Shuffle Write Size / Records</th>
                     <th>Shuffle Read Size / Records</th>
-                    <th>Shuffle Spill (Memory)</th>
-                    <th>Shuffle Spill (Disk)</th>
+                    <th>Spill (Memory)</th>
+                    <th>Spill (Disk)</th>
                     <th>Errors</th>
                 </tr>
                 </thead>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index a213b764abea..3bca1d574301 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -188,11 +188,11 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
           }}
           {if (hasBytesSpilled(stageData)) {
             <li>
-              <strong>Shuffle Spill (Memory): </strong>
+              <strong>Spill (Memory): </strong>
               {Utils.bytesToString(stageData.memoryBytesSpilled)}
             </li>
             <li>
-              <strong>Shuffle Spill (Disk): </strong>
+              <strong>Spill (Disk): </strong>
               {Utils.bytesToString(stageData.diskBytesSpilled)}
             </li>
           }}
@@ -797,8 +797,8 @@ private[spark] object ApiHelper {
   val HEADER_SHUFFLE_REMOTE_READS = "Shuffle Remote Reads"
   val HEADER_SHUFFLE_WRITE_TIME = "Write Time"
   val HEADER_SHUFFLE_WRITE_SIZE = "Shuffle Write Size / Records"
-  val HEADER_MEM_SPILL = "Shuffle Spill (Memory)"
-  val HEADER_DISK_SPILL = "Shuffle Spill (Disk)"
+  val HEADER_MEM_SPILL = "Spill (Memory)"
+  val HEADER_DISK_SPILL = "Spill (Disk)"
   val HEADER_ERROR = "Errors"
 
   private[ui] val COLUMN_TO_INDEX = Map(

From c101182b10cffd9314c44eefe4db53ba3d6553b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Wed, 9 Jan 2019 01:24:47 +0800
Subject: [PATCH 0255/1072] [SPARK-26002][SQL] Fix day of year calculation for
 Julian calendar days
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Fixing leap year calculations for date operators (year/month/dayOfYear) where the Julian calendars are used (before 1582-10-04). In a Julian calendar every years which are multiples of 4 are leap years (there is no extra exception for years multiples of 100).

## How was this patch tested?

With a unit test ("SPARK-26002: correct day of year calculations for Julian calendar years") which focuses to these corner cases.

Manually:

```
scala> sql("select year('1500-01-01')").show()

+------------------------------+
|year(CAST(1500-01-01 AS DATE))|
+------------------------------+
|                          1500|
+------------------------------+

scala> sql("select dayOfYear('1100-01-01')").show()

+-----------------------------------+
|dayofyear(CAST(1100-01-01 AS DATE))|
+-----------------------------------+
|                                  1|
+-----------------------------------+
```

Closes #23000 from attilapiros/julianOffByDays.

Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/util/DateTimeUtils.scala     | 56 +++++++++++++++----
 .../catalyst/util/DateTimeUtilsSuite.scala    | 30 ++++++++++
 .../resources/sql-tests/inputs/datetime.sql   |  2 +
 .../sql-tests/results/datetime.sql.out        | 10 +++-
 4 files changed, 86 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 3e5e1fbc2b36..e95117f95cdb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -53,14 +53,30 @@ object DateTimeUtils {
   final val NANOS_PER_MICROS = 1000L
   final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L
 
-  // number of days in 400 years
+  // number of days in 400 years by Gregorian calendar
   final val daysIn400Years: Int = 146097
+
+  // In the Julian calendar every year that is exactly divisible by 4 is a leap year without any
+  // exception. But in the Gregorian calendar every year that is exactly divisible by four
+  // is a leap year, except for years that are exactly divisible by 100, but these centurial years
+  // are leap years if they are exactly divisible by 400.
+  // So there are 3 extra days in the Julian calendar within a 400 years cycle compared to the
+  // Gregorian calendar.
+  final val extraLeapDaysIn400YearsJulian = 3
+
+  // number of days in 400 years by Julian calendar
+  final val daysIn400YearsInJulian: Int = daysIn400Years + extraLeapDaysIn400YearsJulian
+
   // number of days between 1.1.1970 and 1.1.2001
   final val to2001 = -11323
 
   // this is year -17999, calculation: 50 * daysIn400Year
   final val YearZero = -17999
   final val toYearZero = to2001 + 7304850
+
+  // days to year -17999 in Julian calendar
+  final val toYearZeroInJulian = toYearZero + 49 * extraLeapDaysIn400YearsJulian
+
   final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
   final val TimeZoneUTC = TimeZone.getTimeZone("UTC")
   final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
@@ -575,20 +591,30 @@ object DateTimeUtils {
    * Return the number of days since the start of 400 year period.
    * The second year of a 400 year period (year 1) starts on day 365.
    */
-  private[this] def yearBoundary(year: Int): Int = {
-    year * 365 + ((year / 4 ) - (year / 100) + (year / 400))
+  private[this] def yearBoundary(year: Int, isGregorian: Boolean): Int = {
+    if (isGregorian) {
+      year * 365 + ((year / 4) - (year / 100) + (year / 400))
+    } else {
+      year * 365 + (year / 4)
+    }
   }
 
   /**
    * Calculates the number of years for the given number of days. This depends
    * on a 400 year period.
    * @param days days since the beginning of the 400 year period
+   * @param isGregorian indicates whether leap years should be calculated according to Gregorian
+   *                    (or Julian) calendar
    * @return (number of year, days in year)
    */
-  private[this] def numYears(days: Int): (Int, Int) = {
+  private[this] def numYears(days: Int, isGregorian: Boolean): (Int, Int) = {
     val year = days / 365
-    val boundary = yearBoundary(year)
-    if (days > boundary) (year, days - boundary) else (year - 1, days - yearBoundary(year - 1))
+    val boundary = yearBoundary(year, isGregorian)
+    if (days > boundary) {
+      (year, days - boundary)
+    } else {
+      (year - 1, days - yearBoundary(year - 1, isGregorian))
+    }
   }
 
   /**
@@ -599,18 +625,26 @@ object DateTimeUtils {
    * equals to the period 1.1.1601 until 31.12.2000.
    */
   private[this] def getYearAndDayInYear(daysSince1970: SQLDate): (Int, Int) = {
-    // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999)
-    var  daysSince1970Tmp = daysSince1970
     // Since Julian calendar was replaced with the Gregorian calendar,
     // the 10 days after Oct. 4 were skipped.
     // (1582-10-04) -141428 days since 1970-01-01
     if (daysSince1970 <= -141428) {
-      daysSince1970Tmp -= 10
+      getYearAndDayInYear(daysSince1970 - 10, toYearZeroInJulian, daysIn400YearsInJulian, false)
+    } else {
+      getYearAndDayInYear(daysSince1970, toYearZero, daysIn400Years, true)
     }
-    val daysNormalized = daysSince1970Tmp + toYearZero
+  }
+
+  private def getYearAndDayInYear(
+      daysSince1970: SQLDate,
+      toYearZero: SQLDate,
+      daysIn400Years: SQLDate,
+      isGregorian: Boolean): (Int, Int) = {
+    // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999)
+    val daysNormalized = daysSince1970 + toYearZero
     val numOfQuarterCenturies = daysNormalized / daysIn400Years
     val daysInThis400 = daysNormalized % daysIn400Years + 1
-    val (years, dayInYear) = numYears(daysInThis400)
+    val (years, dayInYear) = numYears(daysInThis400, isGregorian)
     val year: Int = (2001 - 20000) + 400 * numOfQuarterCenturies + years
     (year, dayInYear)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 0182eeb17121..2cb6110e2c09 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -410,6 +410,36 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 78)
   }
 
+  test("SPARK-26002: correct day of year calculations for Julian calendar years") {
+    val c = Calendar.getInstance()
+    c.set(Calendar.MILLISECOND, 0)
+    (1000 to 1600 by 100).foreach { year =>
+      // January 1 is the 1st day of year.
+      c.set(year, 0, 1, 0, 0, 0)
+      assert(getYear(getInUTCDays(c.getTimeInMillis)) === year)
+      assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 1)
+      assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 1)
+
+      // March 1 is the 61st day of the year as they are leap years. It is true for
+      // even the multiples of 100 as before 1582-10-4 the Julian calendar leap year calculation
+      // is used in which every multiples of 4 are leap years
+      c.set(year, 2, 1, 0, 0, 0)
+      assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 61)
+      assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 3)
+
+      // testing leap day (February 29) in leap years
+      c.set(year, 1, 29, 0, 0, 0)
+      assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
+
+      // For non-leap years:
+      c.set(year + 1, 2, 1, 0, 0, 0)
+      assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
+    }
+
+    c.set(1582, 2, 1, 0, 0, 0)
+    assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
+  }
+
   test("get year") {
     val c = Calendar.getInstance()
     c.set(2015, 2, 18, 0, 0, 0)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index 547c2bef02b2..8bd8bc2b94b8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -27,3 +27,5 @@ select current_date = current_date(), current_timestamp = current_timestamp(), a
 select a, b from ttf2 order by a, current_date;
 
 select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15');
+
+select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01');
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 63aa00426ea3..2090633802e2 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
+-- Number of queries: 11
 
 
 -- !query 0
@@ -89,3 +89,11 @@ select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), week
 struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):int,weekday(CAST(2017-05-27 AS DATE)):int,weekday(CAST(NULL AS DATE)):int,weekday(CAST(1582-10-15 13:10:15 AS DATE)):int>
 -- !query 9 output
 5	3	5	NULL	4
+
+
+-- !query 10
+select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
+-- !query 10 schema
+struct<year(CAST(1500-01-01 AS DATE)):int,month(CAST(1500-01-01 AS DATE)):int,dayofyear(CAST(1500-01-01 AS DATE)):int>
+-- !query 10 output
+1500	1	1

From 2783e4c45f55f4fc87748d1c4a454bfdf3024156 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 8 Jan 2019 11:25:33 -0600
Subject: [PATCH 0256/1072] [SPARK-24522][UI] Create filter to apply HTTP
 security checks consistently.

Currently there is code scattered in a bunch of places to do different
things related to HTTP security, such as access control, setting
security-related headers, and filtering out bad content. This makes it
really easy to miss these things when writing new UI code.

This change creates a new filter that does all of those things, and
makes sure that all servlet handlers that are attached to the UI get
the new filter and any user-defined filters consistently. The extent
of the actual features should be the same as before.

The new filter is added at the end of the filter chain, because authentication
is done by custom filters and thus needs to happen first. This means that
custom filters see unfiltered HTTP requests - which is actually the current
behavior anyway.

As a side-effect of some of the code refactoring, handlers added after
the initial set also get wrapped with a GzipHandler, which didn't happen
before.

Tested with added unit tests and in a history server with SPNEGO auth
configured.

Closes #23302 from vanzin/SPARK-24522.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../spark/deploy/history/HistoryPage.scala    |   5 +-
 .../spark/deploy/history/HistoryServer.scala  |   8 +-
 .../deploy/master/ui/ApplicationPage.scala    |   3 +-
 .../spark/deploy/master/ui/MasterPage.scala   |   6 +-
 .../spark/deploy/worker/ui/LogPage.scala      |  28 ++--
 .../spark/deploy/worker/ui/WorkerWebUI.scala  |   1 -
 .../spark/metrics/sink/MetricsServlet.scala   |   2 +-
 .../spark/status/api/v1/SecurityFilter.scala  |  36 ----
 .../apache/spark/ui/HttpSecurityFilter.scala  | 116 +++++++++++++
 .../org/apache/spark/ui/JettyUtils.scala      | 154 ++++++++---------
 .../scala/org/apache/spark/ui/UIUtils.scala   |  21 ---
 .../scala/org/apache/spark/ui/WebUI.scala     |  15 +-
 .../ui/exec/ExecutorThreadDumpPage.scala      |   4 +-
 .../apache/spark/ui/jobs/AllJobsPage.scala    |  16 +-
 .../org/apache/spark/ui/jobs/JobPage.scala    |   3 +-
 .../org/apache/spark/ui/jobs/JobsTab.scala    |   4 +-
 .../org/apache/spark/ui/jobs/PoolPage.scala   |   3 +-
 .../org/apache/spark/ui/jobs/StagePage.scala  |  19 +--
 .../org/apache/spark/ui/jobs/StageTable.scala |  15 +-
 .../org/apache/spark/ui/jobs/StagesTab.scala  |   4 +-
 .../org/apache/spark/ui/storage/RDDPage.scala |  11 +-
 .../spark/ui/HttpSecurityFilterSuite.scala    | 157 ++++++++++++++++++
 .../scala/org/apache/spark/ui/UISuite.scala   | 147 +++++++++++-----
 .../org/apache/spark/ui/UIUtilsSuite.scala    |  39 -----
 .../spark/deploy/mesos/ui/DriverPage.scala    |   3 +-
 .../cluster/YarnSchedulerBackend.scala        |  35 +++-
 .../cluster/YarnSchedulerBackendSuite.scala   |  59 ++++++-
 .../sql/execution/ui/AllExecutionsPage.scala  |  19 +--
 .../sql/execution/ui/ExecutionPage.scala      |   3 +-
 .../ui/ThriftServerSessionPage.scala          |   3 +-
 .../apache/spark/streaming/ui/BatchPage.scala |   8 +-
 31 files changed, 609 insertions(+), 338 deletions(-)
 delete mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
 create mode 100644 core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
 create mode 100644 core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 00ca4efa4d26..7a8ab7fddd79 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -27,9 +27,8 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
 private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val requestedIncomplete =
-      Option(UIUtils.stripXSS(request.getParameter("showIncomplete"))).getOrElse("false").toBoolean
+    val requestedIncomplete = Option(request.getParameter("showIncomplete"))
+      .getOrElse("false").toBoolean
 
     val displayApplications = parent.getApplicationList()
       .exists(isApplicationCompleted(_) != requestedIncomplete)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index b9303388638f..ff2ea3b843ee 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -150,17 +150,15 @@ class HistoryServer(
       ui: SparkUI,
       completed: Boolean) {
     assert(serverInfo.isDefined, "HistoryServer must be bound before attaching SparkUIs")
-    handlers.synchronized {
-      ui.getHandlers.foreach(attachHandler)
+    ui.getHandlers.foreach { handler =>
+      serverInfo.get.addHandler(handler, ui.securityManager)
     }
   }
 
   /** Detach a reconstructed UI from this server. Only valid after bind(). */
   override def detachSparkUI(appId: String, attemptId: Option[String], ui: SparkUI): Unit = {
     assert(serverInfo.isDefined, "HistoryServer must be bound before detaching SparkUIs")
-    handlers.synchronized {
-      ui.getHandlers.foreach(detachHandler)
-    }
+    ui.getHandlers.foreach(detachHandler)
     provider.onUIDetached(appId, attemptId, ui)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index fad4e46dc035..bcd7a7e4ccdb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -33,8 +33,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
 
   /** Executor details for a particular application */
   def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val appId = UIUtils.stripXSS(request.getParameter("appId"))
+    val appId = request.getParameter("appId")
     val state = master.askSync[MasterStateResponse](RequestMasterState)
     val app = state.activeApps.find(_.id == appId)
       .getOrElse(state.completedApps.find(_.id == appId).orNull)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index b8afe203fbfa..6701465c023c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -57,10 +57,8 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
   private def handleKillRequest(request: HttpServletRequest, action: String => Unit): Unit = {
     if (parent.killEnabled &&
         parent.master.securityMgr.checkModifyPermissions(request.getRemoteUser)) {
-      // stripXSS is called first to remove suspicious characters used in XSS attacks
-      val killFlag =
-        Option(UIUtils.stripXSS(request.getParameter("terminate"))).getOrElse("false").toBoolean
-      val id = Option(UIUtils.stripXSS(request.getParameter("id")))
+      val killFlag = Option(request.getParameter("terminate")).getOrElse("false").toBoolean
+      val id = Option(request.getParameter("id"))
       if (id.isDefined && killFlag) {
         action(id.get)
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
index 4fca9342c037..4e720a759a1b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -33,15 +33,13 @@ private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with
   private val supportedLogTypes = Set("stderr", "stdout")
   private val defaultBytes = 100 * 1024
 
-  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def renderLog(request: HttpServletRequest): String = {
-    val appId = Option(UIUtils.stripXSS(request.getParameter("appId")))
-    val executorId = Option(UIUtils.stripXSS(request.getParameter("executorId")))
-    val driverId = Option(UIUtils.stripXSS(request.getParameter("driverId")))
-    val logType = UIUtils.stripXSS(request.getParameter("logType"))
-    val offset = Option(UIUtils.stripXSS(request.getParameter("offset"))).map(_.toLong)
-    val byteLength =
-      Option(UIUtils.stripXSS(request.getParameter("byteLength"))).map(_.toInt)
+    val appId = Option(request.getParameter("appId"))
+    val executorId = Option(request.getParameter("executorId"))
+    val driverId = Option(request.getParameter("driverId"))
+    val logType = request.getParameter("logType")
+    val offset = Option(request.getParameter("offset")).map(_.toLong)
+    val byteLength = Option(request.getParameter("byteLength")).map(_.toInt)
       .getOrElse(defaultBytes)
 
     val logDir = (appId, executorId, driverId) match {
@@ -58,15 +56,13 @@ private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with
     pre + logText
   }
 
-  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def render(request: HttpServletRequest): Seq[Node] = {
-    val appId = Option(UIUtils.stripXSS(request.getParameter("appId")))
-    val executorId = Option(UIUtils.stripXSS(request.getParameter("executorId")))
-    val driverId = Option(UIUtils.stripXSS(request.getParameter("driverId")))
-    val logType = UIUtils.stripXSS(request.getParameter("logType"))
-    val offset = Option(UIUtils.stripXSS(request.getParameter("offset"))).map(_.toLong)
-    val byteLength =
-      Option(UIUtils.stripXSS(request.getParameter("byteLength"))).map(_.toInt)
+    val appId = Option(request.getParameter("appId"))
+    val executorId = Option(request.getParameter("executorId"))
+    val driverId = Option(request.getParameter("driverId"))
+    val logType = request.getParameter("logType")
+    val offset = Option(request.getParameter("offset")).map(_.toLong)
+    val byteLength = Option(request.getParameter("byteLength")).map(_.toInt)
       .getOrElse(defaultBytes)
 
     val (logDir, params, pageName) = (appId, executorId, driverId) match {
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index ea67b7434a76..54886955b98f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -50,7 +50,6 @@ class WorkerWebUI(
     addStaticHandler(WorkerWebUI.STATIC_RESOURCE_BASE)
     attachHandler(createServletHandler("/log",
       (request: HttpServletRequest) => logPage.renderLog(request),
-      worker.securityMgr,
       worker.conf))
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
index 68b58b849064..bea24ca7807e 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
@@ -51,7 +51,7 @@ private[spark] class MetricsServlet(
   def getHandlers(conf: SparkConf): Array[ServletContextHandler] = {
     Array[ServletContextHandler](
       createServletHandler(servletPath,
-        new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr, conf)
+        new ServletParams(request => getMetricsSnapshot(request), "text/json"), conf)
     )
   }
 
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
deleted file mode 100644
index 1cd37185d660..000000000000
--- a/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.status.api.v1
-
-import javax.ws.rs.container.{ContainerRequestContext, ContainerRequestFilter}
-import javax.ws.rs.core.Response
-import javax.ws.rs.ext.Provider
-
-@Provider
-private[v1] class SecurityFilter extends ContainerRequestFilter with ApiRequestContext {
-  override def filter(req: ContainerRequestContext): Unit = {
-    val user = httpRequest.getRemoteUser()
-    if (!uiRoot.securityManager.checkUIViewPermissions(user)) {
-      req.abortWith(
-        Response
-          .status(Response.Status.FORBIDDEN)
-          .entity(raw"""user "$user" is not authorized""")
-          .build()
-      )
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala b/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
new file mode 100644
index 000000000000..da84fdf8fe14
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import java.util.{Enumeration, Map => JMap}
+import javax.servlet._
+import javax.servlet.http.{HttpServletRequest, HttpServletRequestWrapper, HttpServletResponse}
+
+import scala.collection.JavaConverters._
+
+import org.apache.commons.lang3.StringEscapeUtils
+
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.internal.config._
+
+/**
+ * A servlet filter that implements HTTP security features. The following actions are taken
+ * for every request:
+ *
+ * - perform access control of authenticated requests.
+ * - check request data for disallowed content (e.g. things that could be used to create XSS
+ *   attacks).
+ * - set response headers to prevent certain kinds of attacks.
+ *
+ * Request parameters are sanitized so that HTML content is escaped, and disallowed content is
+ * removed.
+ */
+private class HttpSecurityFilter(
+    conf: SparkConf,
+    securityMgr: SecurityManager) extends Filter {
+
+  override def destroy(): Unit = { }
+
+  override def init(config: FilterConfig): Unit = { }
+
+  override def doFilter(req: ServletRequest, res: ServletResponse, chain: FilterChain): Unit = {
+    val hreq = req.asInstanceOf[HttpServletRequest]
+    val hres = res.asInstanceOf[HttpServletResponse]
+    hres.setHeader("Cache-Control", "no-cache, no-store, must-revalidate")
+
+    if (!securityMgr.checkUIViewPermissions(hreq.getRemoteUser())) {
+      hres.sendError(HttpServletResponse.SC_FORBIDDEN,
+        "User is not authorized to access this page.")
+      return
+    }
+
+    // SPARK-10589 avoid frame-related click-jacking vulnerability, using X-Frame-Options
+    // (see http://tools.ietf.org/html/rfc7034). By default allow framing only from the
+    // same origin, but allow framing for a specific named URI.
+    // Example: spark.ui.allowFramingFrom = https://example.com/
+    val xFrameOptionsValue = conf.getOption("spark.ui.allowFramingFrom")
+      .map { uri => s"ALLOW-FROM $uri" }
+      .getOrElse("SAMEORIGIN")
+
+    hres.setHeader("X-Frame-Options", xFrameOptionsValue)
+    hres.setHeader("X-XSS-Protection", conf.get(UI_X_XSS_PROTECTION))
+    if (conf.get(UI_X_CONTENT_TYPE_OPTIONS)) {
+      hres.setHeader("X-Content-Type-Options", "nosniff")
+    }
+    if (hreq.getScheme() == "https") {
+      conf.get(UI_STRICT_TRANSPORT_SECURITY).foreach(
+        hres.setHeader("Strict-Transport-Security", _))
+    }
+
+    chain.doFilter(new XssSafeRequest(hreq), res)
+  }
+
+}
+
+private class XssSafeRequest(req: HttpServletRequest) extends HttpServletRequestWrapper(req) {
+
+  private val NEWLINE_AND_SINGLE_QUOTE_REGEX = raw"(?i)(\r\n|\n|\r|%0D%0A|%0A|%0D|'|%27)".r
+
+  private val parameterMap: Map[String, Array[String]] = {
+    super.getParameterMap().asScala.map { case (name, values) =>
+      stripXSS(name) -> values.map(stripXSS)
+    }.toMap
+  }
+
+  override def getParameterMap(): JMap[String, Array[String]] = parameterMap.asJava
+
+  override def getParameterNames(): Enumeration[String] = {
+    parameterMap.keys.iterator.asJavaEnumeration
+  }
+
+  override def getParameterValues(name: String): Array[String] = parameterMap.get(name).orNull
+
+  override def getParameter(name: String): String = {
+    parameterMap.get(name).flatMap(_.headOption).orNull
+  }
+
+  private def stripXSS(str: String): String = {
+    if (str != null) {
+      // Remove new lines and single quotes, followed by escaping HTML version 4.0
+      StringEscapeUtils.escapeHtml4(NEWLINE_AND_SINGLE_QUOTE_REGEX.replaceAllIn(str, ""))
+    } else {
+      null
+    }
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 316af9b79d28..08f5fb937da7 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ui
 
 import java.net.{URI, URL}
+import java.util.EnumSet
 import javax.servlet.DispatcherType
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
@@ -68,43 +69,16 @@ private[spark] object JettyUtils extends Logging {
   implicit def textResponderToServlet(responder: Responder[String]): ServletParams[String] =
     new ServletParams(responder, "text/plain")
 
-  def createServlet[T <: AnyRef](
+  private def createServlet[T <: AnyRef](
       servletParams: ServletParams[T],
-      securityMgr: SecurityManager,
       conf: SparkConf): HttpServlet = {
-
-    // SPARK-10589 avoid frame-related click-jacking vulnerability, using X-Frame-Options
-    // (see http://tools.ietf.org/html/rfc7034). By default allow framing only from the
-    // same origin, but allow framing for a specific named URI.
-    // Example: spark.ui.allowFramingFrom = https://example.com/
-    val allowFramingFrom = conf.getOption("spark.ui.allowFramingFrom")
-    val xFrameOptionsValue =
-      allowFramingFrom.map(uri => s"ALLOW-FROM $uri").getOrElse("SAMEORIGIN")
-
     new HttpServlet {
       override def doGet(request: HttpServletRequest, response: HttpServletResponse) {
         try {
-          if (securityMgr.checkUIViewPermissions(request.getRemoteUser)) {
-            response.setContentType("%s;charset=utf-8".format(servletParams.contentType))
-            response.setStatus(HttpServletResponse.SC_OK)
-            val result = servletParams.responder(request)
-            response.setHeader("Cache-Control", "no-cache, no-store, must-revalidate")
-            response.setHeader("X-Frame-Options", xFrameOptionsValue)
-            response.setHeader("X-XSS-Protection", conf.get(UI_X_XSS_PROTECTION))
-            if (conf.get(UI_X_CONTENT_TYPE_OPTIONS)) {
-              response.setHeader("X-Content-Type-Options", "nosniff")
-            }
-            if (request.getScheme == "https") {
-              conf.get(UI_STRICT_TRANSPORT_SECURITY).foreach(
-                response.setHeader("Strict-Transport-Security", _))
-            }
-            response.getWriter.print(servletParams.extractFn(result))
-          } else {
-            response.setStatus(HttpServletResponse.SC_FORBIDDEN)
-            response.setHeader("Cache-Control", "no-cache, no-store, must-revalidate")
-            response.sendError(HttpServletResponse.SC_FORBIDDEN,
-              "User is not authorized to access this page.")
-          }
+          response.setContentType("%s;charset=utf-8".format(servletParams.contentType))
+          response.setStatus(HttpServletResponse.SC_OK)
+          val result = servletParams.responder(request)
+          response.getWriter.print(servletParams.extractFn(result))
         } catch {
           case e: IllegalArgumentException =>
             response.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getMessage)
@@ -124,10 +98,9 @@ private[spark] object JettyUtils extends Logging {
   def createServletHandler[T <: AnyRef](
       path: String,
       servletParams: ServletParams[T],
-      securityMgr: SecurityManager,
       conf: SparkConf,
       basePath: String = ""): ServletContextHandler = {
-    createServletHandler(path, createServlet(servletParams, securityMgr, conf), basePath)
+    createServletHandler(path, createServlet(servletParams, conf), basePath)
   }
 
   /** Create a context handler that responds to a request with the given path prefix */
@@ -257,36 +230,6 @@ private[spark] object JettyUtils extends Logging {
     contextHandler
   }
 
-  /** Add filters, if any, to the given list of ServletContextHandlers */
-  def addFilters(handlers: Seq[ServletContextHandler], conf: SparkConf) {
-    val filters: Array[String] = conf.get("spark.ui.filters", "").split(',').map(_.trim())
-    filters.foreach {
-      case filter : String =>
-        if (!filter.isEmpty) {
-          logInfo(s"Adding filter $filter to ${handlers.map(_.getContextPath).mkString(", ")}.")
-          val holder : FilterHolder = new FilterHolder()
-          holder.setClassName(filter)
-          // Get any parameters for each filter
-          conf.get("spark." + filter + ".params", "").split(',').map(_.trim()).toSet.foreach {
-            param: String =>
-              if (!param.isEmpty) {
-                val parts = param.split("=")
-                if (parts.length == 2) holder.setInitParameter(parts(0), parts(1))
-             }
-          }
-
-          val prefix = s"spark.$filter.param."
-          conf.getAll
-            .filter { case (k, v) => k.length() > prefix.length() && k.startsWith(prefix) }
-            .foreach { case (k, v) => holder.setInitParameter(k.substring(prefix.length()), v) }
-
-          val enumDispatcher = java.util.EnumSet.of(DispatcherType.ASYNC, DispatcherType.ERROR,
-            DispatcherType.FORWARD, DispatcherType.INCLUDE, DispatcherType.REQUEST)
-          handlers.foreach { case(handler) => handler.addFilter(holder, "/*", enumDispatcher) }
-        }
-    }
-  }
-
   /**
    * Attempt to start a Jetty server bound to the supplied hostName:port using the given
    * context handlers.
@@ -298,12 +241,9 @@ private[spark] object JettyUtils extends Logging {
       hostName: String,
       port: Int,
       sslOptions: SSLOptions,
-      handlers: Seq[ServletContextHandler],
       conf: SparkConf,
       serverName: String = ""): ServerInfo = {
 
-    addFilters(handlers, conf)
-
     // Start the server first, with no connectors.
     val pool = new QueuedThreadPool
     if (serverName.nonEmpty) {
@@ -398,16 +338,6 @@ private[spark] object JettyUtils extends Logging {
       }
 
       server.addConnector(httpConnector)
-
-      // Add all the known handlers now that connectors are configured.
-      handlers.foreach { h =>
-        h.setVirtualHosts(toVirtualHosts(SPARK_CONNECTOR_NAME))
-        val gzipHandler = new GzipHandler()
-        gzipHandler.setHandler(h)
-        collection.addHandler(gzipHandler)
-        gzipHandler.start()
-      }
-
       pool.setMaxThreads(math.max(pool.getMaxThreads, minThreads))
       ServerInfo(server, httpPort, securePort, conf, collection)
     } catch {
@@ -489,6 +419,16 @@ private[spark] object JettyUtils extends Logging {
     }
   }
 
+  def addFilter(
+      handler: ServletContextHandler,
+      filter: String,
+      params: Map[String, String]): Unit = {
+    val holder = new FilterHolder()
+    holder.setClassName(filter)
+    params.foreach { case (k, v) => holder.setInitParameter(k, v) }
+    handler.addFilter(holder, "/*", EnumSet.allOf(classOf[DispatcherType]))
+  }
+
   // Create a new URI from the arguments, handling IPv6 host encoding and default ports.
   private def createRedirectURI(
       scheme: String, server: String, port: Int, path: String, query: String) = {
@@ -509,20 +449,37 @@ private[spark] case class ServerInfo(
     server: Server,
     boundPort: Int,
     securePort: Option[Int],
-    conf: SparkConf,
-    private val rootHandler: ContextHandlerCollection) {
+    private val conf: SparkConf,
+    private val rootHandler: ContextHandlerCollection) extends Logging {
 
-  def addHandler(handler: ServletContextHandler): Unit = {
+  def addHandler(
+      handler: ServletContextHandler,
+      securityMgr: SecurityManager): Unit = synchronized {
     handler.setVirtualHosts(JettyUtils.toVirtualHosts(JettyUtils.SPARK_CONNECTOR_NAME))
-    JettyUtils.addFilters(Seq(handler), conf)
-    rootHandler.addHandler(handler)
+    addFilters(handler, securityMgr)
+
+    val gzipHandler = new GzipHandler()
+    gzipHandler.setHandler(handler)
+    rootHandler.addHandler(gzipHandler)
+
     if (!handler.isStarted()) {
       handler.start()
     }
+    gzipHandler.start()
   }
 
-  def removeHandler(handler: ContextHandler): Unit = {
-    rootHandler.removeHandler(handler)
+  def removeHandler(handler: ServletContextHandler): Unit = synchronized {
+    // Since addHandler() always adds a wrapping gzip handler, find the container handler
+    // and remove it.
+    rootHandler.getHandlers()
+      .find { h =>
+        h.isInstanceOf[GzipHandler] && h.asInstanceOf[GzipHandler].getHandler() == handler
+      }
+      .foreach { h =>
+        rootHandler.removeHandler(h)
+        h.stop()
+      }
+
     if (handler.isStarted) {
       handler.stop()
     }
@@ -537,4 +494,33 @@ private[spark] case class ServerInfo(
       threadPool.asInstanceOf[LifeCycle].stop
     }
   }
+
+  /**
+   * Add filters, if any, to the given ServletContextHandlers. Always adds a filter at the end
+   * of the chain to perform security-related functions.
+   */
+  private def addFilters(handler: ServletContextHandler, securityMgr: SecurityManager): Unit = {
+    conf.getOption("spark.ui.filters").toSeq.flatMap(Utils.stringToSeq).foreach { filter =>
+      logInfo(s"Adding filter to ${handler.getContextPath()}: $filter")
+      val oldParams = conf.getOption(s"spark.$filter.params").toSeq
+        .flatMap(Utils.stringToSeq)
+        .flatMap { param =>
+          val parts = param.split("=")
+          if (parts.length == 2) Some(parts(0) -> parts(1)) else None
+        }
+        .toMap
+
+      val newParams = conf.getAllWithPrefix(s"spark.$filter.param.").toMap
+
+      JettyUtils.addFilter(handler, filter, oldParams ++ newParams)
+    }
+
+    // This filter must come after user-installed filters, since that's where authentication
+    // filters are installed. This means that custom filters will see the request before it's
+    // been validated by the security filter.
+    val securityFilter = new HttpSecurityFilter(conf, securityMgr)
+    val holder = new FilterHolder(securityFilter)
+    handler.addFilter(holder, "/*", EnumSet.allOf(classOf[DispatcherType]))
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 60a929375baa..967435030bc4 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -27,8 +27,6 @@ import scala.util.control.NonFatal
 import scala.xml._
 import scala.xml.transform.{RewriteRule, RuleTransformer}
 
-import org.apache.commons.lang3.StringEscapeUtils
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.ui.scope.RDDOperationGraph
 
@@ -38,8 +36,6 @@ private[spark] object UIUtils extends Logging {
   val TABLE_CLASS_STRIPED = TABLE_CLASS_NOT_STRIPED + " table-striped"
   val TABLE_CLASS_STRIPED_SORTABLE = TABLE_CLASS_STRIPED + " sortable"
 
-  private val NEWLINE_AND_SINGLE_QUOTE_REGEX = raw"(?i)(\r\n|\n|\r|%0D%0A|%0A|%0D|'|%27)".r
-
   // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
     override def initialValue(): SimpleDateFormat =
@@ -552,23 +548,6 @@ private[spark] object UIUtils extends Logging {
     }
   }
 
-  /**
-   * Remove suspicious characters of user input to prevent Cross-Site scripting (XSS) attacks
-   *
-   * For more information about XSS testing:
-   * https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet and
-   * https://www.owasp.org/index.php/Testing_for_Reflected_Cross_site_scripting_(OTG-INPVAL-001)
-   */
-  def stripXSS(requestParameter: String): String = {
-    if (requestParameter == null) {
-      null
-    } else {
-      // Remove new lines and single quotes, followed by escaping HTML version 4.0
-      StringEscapeUtils.escapeHtml4(
-        NEWLINE_AND_SINGLE_QUOTE_REGEX.replaceAllIn(requestParameter, ""))
-    }
-  }
-
   def buildErrorResponse(status: Response.Status, msg: String): Response = {
     Response.status(status).entity(msg).`type`(MediaType.TEXT_PLAIN).build()
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 2e43f17e6a8e..ebf8655ce8c2 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -58,7 +58,6 @@ private[spark] abstract class WebUI(
   def getBasePath: String = basePath
   def getTabs: Seq[WebUITab] = tabs
   def getHandlers: Seq[ServletContextHandler] = handlers
-  def getSecurityManager: SecurityManager = securityManager
 
   /** Attaches a tab to this UI, along with all of its attached pages. */
   def attachTab(tab: WebUITab): Unit = {
@@ -81,9 +80,9 @@ private[spark] abstract class WebUI(
   def attachPage(page: WebUIPage): Unit = {
     val pagePath = "/" + page.prefix
     val renderHandler = createServletHandler(pagePath,
-      (request: HttpServletRequest) => page.render(request), securityManager, conf, basePath)
+      (request: HttpServletRequest) => page.render(request), conf, basePath)
     val renderJsonHandler = createServletHandler(pagePath.stripSuffix("/") + "/json",
-      (request: HttpServletRequest) => page.renderJson(request), securityManager, conf, basePath)
+      (request: HttpServletRequest) => page.renderJson(request), conf, basePath)
     attachHandler(renderHandler)
     attachHandler(renderJsonHandler)
     val handlers = pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]())
@@ -91,13 +90,13 @@ private[spark] abstract class WebUI(
   }
 
   /** Attaches a handler to this UI. */
-  def attachHandler(handler: ServletContextHandler): Unit = {
+  def attachHandler(handler: ServletContextHandler): Unit = synchronized {
     handlers += handler
-    serverInfo.foreach(_.addHandler(handler))
+    serverInfo.foreach(_.addHandler(handler, securityManager))
   }
 
   /** Detaches a handler from this UI. */
-  def detachHandler(handler: ServletContextHandler): Unit = {
+  def detachHandler(handler: ServletContextHandler): Unit = synchronized {
     handlers -= handler
     serverInfo.foreach(_.removeHandler(handler))
   }
@@ -129,7 +128,9 @@ private[spark] abstract class WebUI(
     assert(serverInfo.isEmpty, s"Attempted to bind $className more than once!")
     try {
       val host = Option(conf.getenv("SPARK_LOCAL_IP")).getOrElse("0.0.0.0")
-      serverInfo = Some(startJettyServer(host, port, sslOptions, handlers, conf, name))
+      val server = startJettyServer(host, port, sslOptions, conf, name)
+      handlers.foreach(server.addHandler(_, securityManager))
+      serverInfo = Some(server)
       logInfo(s"Bound $className to $host, and started at $webUrl")
     } catch {
       case e: Exception =>
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index f9713fb5b4a3..a13037b5e24d 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -28,10 +28,8 @@ private[ui] class ExecutorThreadDumpPage(
     parent: SparkUITab,
     sc: Option[SparkContext]) extends WebUIPage("threadDump") {
 
-  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def render(request: HttpServletRequest): Seq[Node] = {
-    val executorId =
-      Option(UIUtils.stripXSS(request.getParameter("executorId"))).map { executorId =>
+    val executorId = Option(request.getParameter("executorId")).map { executorId =>
       UIUtils.decodeURLParameter(executorId)
     }.getOrElse {
       throw new IllegalArgumentException(s"Missing executorId parameter")
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 2c22e0555fcb..b35ea5b52549 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -205,21 +205,17 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
       jobTag: String,
       jobs: Seq[v1.JobData],
       killEnabled: Boolean): Seq[Node] = {
-    // stripXSS is called to remove suspicious characters used in XSS attacks
-    val allParameters = request.getParameterMap.asScala.toMap.map { case (k, v) =>
-      UIUtils.stripXSS(k) -> v.map(UIUtils.stripXSS).toSeq
-    }
-    val parameterOtherTable = allParameters.filterNot(_._1.startsWith(jobTag))
+    val parameterOtherTable = request.getParameterMap().asScala
+      .filterNot(_._1.startsWith(jobTag))
       .map(para => para._1 + "=" + para._2(0))
 
     val someJobHasJobGroup = jobs.exists(_.jobGroup.isDefined)
     val jobIdTitle = if (someJobHasJobGroup) "Job Id (Job Group)" else "Job Id"
 
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val parameterJobPage = UIUtils.stripXSS(request.getParameter(jobTag + ".page"))
-    val parameterJobSortColumn = UIUtils.stripXSS(request.getParameter(jobTag + ".sort"))
-    val parameterJobSortDesc = UIUtils.stripXSS(request.getParameter(jobTag + ".desc"))
-    val parameterJobPageSize = UIUtils.stripXSS(request.getParameter(jobTag + ".pageSize"))
+    val parameterJobPage = request.getParameter(jobTag + ".page")
+    val parameterJobSortColumn = request.getParameter(jobTag + ".sort")
+    val parameterJobSortDesc = request.getParameter(jobTag + ".desc")
+    val parameterJobPageSize = request.getParameter(jobTag + ".pageSize")
 
     val jobPage = Option(parameterJobPage).map(_.toInt).getOrElse(1)
     val jobSortColumn = Option(parameterJobSortColumn).map { sortColumn =>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index cd82439223b0..46295e73e086 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -184,8 +184,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
   }
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val parameterId = UIUtils.stripXSS(request.getParameter("id"))
+    val parameterId = request.getParameter("id")
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
     val jobId = parameterId.toInt
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
index ff1b75e5c506..37bb292bd595 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
@@ -47,9 +47,7 @@ private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore)
 
   def handleKillRequest(request: HttpServletRequest): Unit = {
     if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) {
-      // stripXSS is called first to remove suspicious characters used in XSS attacks
-      val jobId = Option(UIUtils.stripXSS(request.getParameter("id"))).map(_.toInt)
-      jobId.foreach { id =>
+      Option(request.getParameter("id")).map(_.toInt).foreach { id =>
         store.asOption(store.job(id)).foreach { job =>
           if (job.status == JobExecutionStatus.RUNNING) {
             sc.foreach(_.cancelJob(id))
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
index 22a40101e33d..6d2710385d9d 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -29,8 +29,7 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
 private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val poolName = Option(UIUtils.stripXSS(request.getParameter("poolname"))).map { poolname =>
+    val poolName = Option(request.getParameter("poolname")).map { poolname =>
       UIUtils.decodeURLParameter(poolname)
     }.getOrElse {
       throw new IllegalArgumentException(s"Missing poolname parameter")
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 3bca1d574301..8ec625da042f 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -80,22 +80,19 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
   }
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val parameterId = UIUtils.stripXSS(request.getParameter("id"))
+    val parameterId = request.getParameter("id")
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
-    val parameterAttempt = UIUtils.stripXSS(request.getParameter("attempt"))
+    val parameterAttempt = request.getParameter("attempt")
     require(parameterAttempt != null && parameterAttempt.nonEmpty, "Missing attempt parameter")
 
-    val parameterTaskPage = UIUtils.stripXSS(request.getParameter("task.page"))
-    val parameterTaskSortColumn = UIUtils.stripXSS(request.getParameter("task.sort"))
-    val parameterTaskSortDesc = UIUtils.stripXSS(request.getParameter("task.desc"))
-    val parameterTaskPageSize = UIUtils.stripXSS(request.getParameter("task.pageSize"))
+    val parameterTaskPage = request.getParameter("task.page")
+    val parameterTaskSortColumn = request.getParameter("task.sort")
+    val parameterTaskSortDesc = request.getParameter("task.desc")
+    val parameterTaskPageSize = request.getParameter("task.pageSize")
 
-    val eventTimelineParameterTaskPage = UIUtils.stripXSS(
-      request.getParameter("task.eventTimelinePageNumber"))
-    val eventTimelineParameterTaskPageSize = UIUtils.stripXSS(
-      request.getParameter("task.eventTimelinePageSize"))
+    val eventTimelineParameterTaskPage = request.getParameter("task.eventTimelinePageNumber")
+    val eventTimelineParameterTaskPageSize = request.getParameter("task.eventTimelinePageSize")
     var eventTimelineTaskPage = Option(eventTimelineParameterTaskPage).map(_.toInt).getOrElse(1)
     var eventTimelineTaskPageSize = Option(
       eventTimelineParameterTaskPageSize).map(_.toInt).getOrElse(100)
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index 766efc15e26b..330b6422a13a 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -42,17 +42,14 @@ private[ui] class StageTableBase(
     isFairScheduler: Boolean,
     killEnabled: Boolean,
     isFailedStage: Boolean) {
-  // stripXSS is called to remove suspicious characters used in XSS attacks
-  val allParameters = request.getParameterMap.asScala.toMap.map { case (k, v) =>
-    UIUtils.stripXSS(k) -> v.map(UIUtils.stripXSS).toSeq
-  }
-  val parameterOtherTable = allParameters.filterNot(_._1.startsWith(stageTag))
+  val parameterOtherTable = request.getParameterMap().asScala
+    .filterNot(_._1.startsWith(stageTag))
     .map(para => para._1 + "=" + para._2(0))
 
-  val parameterStagePage = UIUtils.stripXSS(request.getParameter(stageTag + ".page"))
-  val parameterStageSortColumn = UIUtils.stripXSS(request.getParameter(stageTag + ".sort"))
-  val parameterStageSortDesc = UIUtils.stripXSS(request.getParameter(stageTag + ".desc"))
-  val parameterStagePageSize = UIUtils.stripXSS(request.getParameter(stageTag + ".pageSize"))
+  val parameterStagePage = request.getParameter(stageTag + ".page")
+  val parameterStageSortColumn = request.getParameter(stageTag + ".sort")
+  val parameterStageSortDesc = request.getParameter(stageTag + ".desc")
+  val parameterStagePageSize = request.getParameter(stageTag + ".pageSize")
 
   val stagePage = Option(parameterStagePage).map(_.toInt).getOrElse(1)
   val stageSortColumn = Option(parameterStageSortColumn).map { sortColumn =>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
index 10b032084ce4..e16c337ba164 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -45,9 +45,7 @@ private[ui] class StagesTab(val parent: SparkUI, val store: AppStatusStore)
 
   def handleKillRequest(request: HttpServletRequest): Unit = {
     if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) {
-      // stripXSS is called first to remove suspicious characters used in XSS attacks
-      val stageId = Option(UIUtils.stripXSS(request.getParameter("id"))).map(_.toInt)
-      stageId.foreach { id =>
+      Option(request.getParameter("id")).map(_.toInt).foreach { id =>
         store.asOption(store.lastStageAttempt(id)).foreach { stage =>
           val status = stage.status
           if (status == StageStatus.ACTIVE || status == StageStatus.PENDING) {
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 87da290c8305..dde441abe590 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -31,14 +31,13 @@ import org.apache.spark.util.Utils
 private[ui] class RDDPage(parent: SparkUITab, store: AppStatusStore) extends WebUIPage("rdd") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val parameterId = UIUtils.stripXSS(request.getParameter("id"))
+    val parameterId = request.getParameter("id")
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
-    val parameterBlockPage = UIUtils.stripXSS(request.getParameter("block.page"))
-    val parameterBlockSortColumn = UIUtils.stripXSS(request.getParameter("block.sort"))
-    val parameterBlockSortDesc = UIUtils.stripXSS(request.getParameter("block.desc"))
-    val parameterBlockPageSize = UIUtils.stripXSS(request.getParameter("block.pageSize"))
+    val parameterBlockPage = request.getParameter("block.page")
+    val parameterBlockSortColumn = request.getParameter("block.sort")
+    val parameterBlockSortDesc = request.getParameter("block.desc")
+    val parameterBlockPageSize = request.getParameter("block.pageSize")
 
     val blockPage = Option(parameterBlockPage).map(_.toInt).getOrElse(1)
     val blockSortColumn = Option(parameterBlockSortColumn).getOrElse("Block Name")
diff --git a/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala b/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala
new file mode 100644
index 000000000000..f46cc293ed27
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import java.util.UUID
+import javax.servlet.FilterChain
+import javax.servlet.http.{HttpServletRequest, HttpServletResponse}
+
+import scala.collection.JavaConverters._
+
+import org.mockito.ArgumentCaptor
+import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.Mockito.{mock, times, verify, when}
+
+import org.apache.spark._
+import org.apache.spark.internal.config._
+
+class HttpSecurityFilterSuite extends SparkFunSuite {
+
+  test("filter bad user input") {
+    val badValues = Map(
+      "encoded" -> "Encoding:base64%0d%0a%0d%0aPGh0bWw%2bjcmlwdD48L2h0bWw%2b",
+      "alert1" -> """>"'><script>alert(401)<%2Fscript>""",
+      "alert2" -> """app-20161208133404-0002<iframe+src%3Djavascript%3Aalert(1705)>""",
+      "alert3" -> """stdout'%2Balert(60)%2B'""",
+      "html" -> """stdout'"><iframe+id%3D1131+src%3Dhttp%3A%2F%2Fdemo.test.net%2Fphishing.html>"""
+    )
+    val badKeys = badValues.map(_.swap)
+    val goodInput = Map("goodKey" -> "goodValue")
+
+    val conf = new SparkConf()
+    val filter = new HttpSecurityFilter(conf, new SecurityManager(conf))
+
+    def newRequest(): HttpServletRequest = {
+      val req = mock(classOf[HttpServletRequest])
+      when(req.getParameterMap()).thenReturn(Map.empty[String, Array[String]].asJava)
+      req
+    }
+
+    def doRequest(k: String, v: String): HttpServletRequest = {
+      val req = newRequest()
+      when(req.getParameterMap()).thenReturn(Map(k -> Array(v)).asJava)
+
+      val chain = mock(classOf[FilterChain])
+      val res = mock(classOf[HttpServletResponse])
+      filter.doFilter(req, res, chain)
+
+      val captor = ArgumentCaptor.forClass(classOf[HttpServletRequest])
+      verify(chain).doFilter(captor.capture(), any())
+      captor.getValue()
+    }
+
+    badKeys.foreach { case (k, v) =>
+      val req = doRequest(k, v)
+      assert(req.getParameter(k) === null)
+      assert(req.getParameterValues(k) === null)
+      assert(!req.getParameterMap().containsKey(k))
+    }
+
+    badValues.foreach { case (k, v) =>
+      val req = doRequest(k, v)
+      assert(req.getParameter(k) !== null)
+      assert(req.getParameter(k) !== v)
+      assert(req.getParameterValues(k) !== null)
+      assert(req.getParameterValues(k) !== Array(v))
+      assert(req.getParameterMap().get(k) !== null)
+      assert(req.getParameterMap().get(k) !== Array(v))
+    }
+
+    goodInput.foreach { case (k, v) =>
+      val req = doRequest(k, v)
+      assert(req.getParameter(k) === v)
+      assert(req.getParameterValues(k) === Array(v))
+      assert(req.getParameterMap().get(k) === Array(v))
+    }
+  }
+
+  test("perform access control") {
+    val conf = new SparkConf(false)
+      .set("spark.ui.acls.enable", "true")
+      .set("spark.admin.acls", "admin")
+      .set("spark.ui.view.acls", "alice")
+    val secMgr = new SecurityManager(conf)
+
+    val req = mockEmptyRequest()
+    val res = mock(classOf[HttpServletResponse])
+    val chain = mock(classOf[FilterChain])
+
+    val filter = new HttpSecurityFilter(conf, secMgr)
+
+    when(req.getRemoteUser()).thenReturn("admin")
+    filter.doFilter(req, res, chain)
+    verify(chain, times(1)).doFilter(any(), any())
+
+    when(req.getRemoteUser()).thenReturn("alice")
+    filter.doFilter(req, res, chain)
+    verify(chain, times(2)).doFilter(any(), any())
+
+    // Because the current user is added to the view ACLs, let's try to create an invalid
+    // name, to avoid matching some common user name.
+    when(req.getRemoteUser()).thenReturn(UUID.randomUUID().toString())
+    filter.doFilter(req, res, chain)
+
+    // chain.doFilter() should not be called again, so same count as above.
+    verify(chain, times(2)).doFilter(any(), any())
+    verify(res).sendError(meq(HttpServletResponse.SC_FORBIDDEN), any())
+  }
+
+  test("set security-related headers") {
+    val conf = new SparkConf(false)
+      .set("spark.ui.allowFramingFrom", "example.com")
+      .set(UI_X_XSS_PROTECTION, "xssProtection")
+      .set(UI_X_CONTENT_TYPE_OPTIONS, true)
+      .set(UI_STRICT_TRANSPORT_SECURITY, "tsec")
+    val secMgr = new SecurityManager(conf)
+    val req = mockEmptyRequest()
+    val res = mock(classOf[HttpServletResponse])
+    val chain = mock(classOf[FilterChain])
+
+    when(req.getScheme()).thenReturn("https")
+
+    val filter = new HttpSecurityFilter(conf, secMgr)
+    filter.doFilter(req, res, chain)
+
+    Map(
+      "X-Frame-Options" -> "ALLOW-FROM example.com",
+      "X-XSS-Protection" -> "xssProtection",
+      "X-Content-Type-Options" -> "nosniff",
+      "Strict-Transport-Security" -> "tsec"
+    ).foreach { case (name, value) =>
+      verify(res).setHeader(meq(name), meq(value))
+    }
+  }
+
+  private def mockEmptyRequest(): HttpServletRequest = {
+    val params: Map[String, Array[String]] = Map.empty
+    val req = mock(classOf[HttpServletRequest])
+    when(req.getParameterMap()).thenReturn(params.asJava)
+    req
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 36ea3799afdf..eaa8f28ae062 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ui
 import java.net.{BindException, ServerSocket}
 import java.net.{URI, URL}
 import java.util.Locale
+import javax.servlet._
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
 import scala.io.Source
@@ -49,12 +50,14 @@ class UISuite extends SparkFunSuite {
     sc
   }
 
-  private def sslDisabledConf(): (SparkConf, SSLOptions) = {
+  private def sslDisabledConf(): (SparkConf, SecurityManager, SSLOptions) = {
     val conf = new SparkConf
-    (conf, new SecurityManager(conf).getSSLOptions("ui"))
+    val securityMgr = new SecurityManager(conf)
+    (conf, securityMgr, securityMgr.getSSLOptions("ui"))
   }
 
-  private def sslEnabledConf(sslPort: Option[Int] = None): (SparkConf, SSLOptions) = {
+  private def sslEnabledConf(sslPort: Option[Int] = None):
+      (SparkConf, SecurityManager, SSLOptions) = {
     val keyStoreFilePath = getTestResourcePath("spark.keystore")
     val conf = new SparkConf()
       .set("spark.ssl.ui.enabled", "true")
@@ -64,7 +67,8 @@ class UISuite extends SparkFunSuite {
     sslPort.foreach { p =>
       conf.set("spark.ssl.ui.port", p.toString)
     }
-    (conf, new SecurityManager(conf).getSSLOptions("ui"))
+    val securityMgr = new SecurityManager(conf)
+    (conf, securityMgr, securityMgr.getSSLOptions("ui"))
   }
 
   ignore("basic ui visibility") {
@@ -95,14 +99,12 @@ class UISuite extends SparkFunSuite {
     var server: ServerSocket = null
     var serverInfo1: ServerInfo = null
     var serverInfo2: ServerInfo = null
-    val (conf, sslOptions) = sslDisabledConf()
+    val (conf, _, sslOptions) = sslDisabledConf()
     try {
       server = new ServerSocket(0)
       val startPort = server.getLocalPort
-      serverInfo1 = JettyUtils.startJettyServer(
-        "0.0.0.0", startPort, sslOptions, Seq[ServletContextHandler](), conf)
-      serverInfo2 = JettyUtils.startJettyServer(
-        "0.0.0.0", startPort, sslOptions, Seq[ServletContextHandler](), conf)
+      serverInfo1 = JettyUtils.startJettyServer("0.0.0.0", startPort, sslOptions, conf)
+      serverInfo2 = JettyUtils.startJettyServer("0.0.0.0", startPort, sslOptions, conf)
       // Allow some wiggle room in case ports on the machine are under contention
       val boundPort1 = serverInfo1.boundPort
       val boundPort2 = serverInfo2.boundPort
@@ -123,11 +125,9 @@ class UISuite extends SparkFunSuite {
     try {
       server = new ServerSocket(0)
       val startPort = server.getLocalPort
-      val (conf, sslOptions) = sslEnabledConf()
-      serverInfo1 = JettyUtils.startJettyServer(
-        "0.0.0.0", startPort, sslOptions, Seq[ServletContextHandler](), conf, "server1")
-      serverInfo2 = JettyUtils.startJettyServer(
-        "0.0.0.0", startPort, sslOptions, Seq[ServletContextHandler](), conf, "server2")
+      val (conf, _, sslOptions) = sslEnabledConf()
+      serverInfo1 = JettyUtils.startJettyServer("0.0.0.0", startPort, sslOptions, conf, "server1")
+      serverInfo2 = JettyUtils.startJettyServer("0.0.0.0", startPort, sslOptions, conf, "server2")
       // Allow some wiggle room in case ports on the machine are under contention
       val boundPort1 = serverInfo1.boundPort
       val boundPort2 = serverInfo2.boundPort
@@ -144,10 +144,9 @@ class UISuite extends SparkFunSuite {
   test("jetty binds to port 0 correctly") {
     var socket: ServerSocket = null
     var serverInfo: ServerInfo = null
-    val (conf, sslOptions) = sslDisabledConf()
+    val (conf, _, sslOptions) = sslDisabledConf()
     try {
-      serverInfo = JettyUtils.startJettyServer(
-        "0.0.0.0", 0, sslOptions, Seq[ServletContextHandler](), conf)
+      serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
       val server = serverInfo.server
       val boundPort = serverInfo.boundPort
       assert(server.getState === "STARTED")
@@ -165,9 +164,8 @@ class UISuite extends SparkFunSuite {
     var socket: ServerSocket = null
     var serverInfo: ServerInfo = null
     try {
-      val (conf, sslOptions) = sslEnabledConf()
-      serverInfo = JettyUtils.startJettyServer(
-        "0.0.0.0", 0, sslOptions, Seq[ServletContextHandler](), conf)
+      val (conf, _, sslOptions) = sslEnabledConf()
+      serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
       val server = serverInfo.server
       val boundPort = serverInfo.boundPort
       assert(server.getState === "STARTED")
@@ -231,30 +229,49 @@ class UISuite extends SparkFunSuite {
     assert(newHeader === null)
   }
 
-  test("http -> https redirect applies to all URIs") {
-    var serverInfo: ServerInfo = null
+  test("add and remove handlers with custom user filter") {
+    val (conf, securityMgr, sslOptions) = sslDisabledConf()
+    conf.set("spark.ui.filters", classOf[TestFilter].getName())
+    conf.set(s"spark.${classOf[TestFilter].getName()}.param.responseCode",
+      HttpServletResponse.SC_NOT_ACCEPTABLE.toString)
+
+    val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
     try {
-      val servlet = new HttpServlet() {
-        override def doGet(req: HttpServletRequest, res: HttpServletResponse): Unit = {
-          res.sendError(HttpServletResponse.SC_OK)
-        }
-      }
+      val path = "/test"
+      val url = new URL(s"http://localhost:${serverInfo.boundPort}$path/root")
 
-      def newContext(path: String): ServletContextHandler = {
-        val ctx = new ServletContextHandler()
-        ctx.setContextPath(path)
-        ctx.addServlet(new ServletHolder(servlet), "/root")
-        ctx
-      }
+      assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_NOT_FOUND)
+
+      val (servlet, ctx) = newContext(path)
+      serverInfo.addHandler(ctx, securityMgr)
+      assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_NOT_ACCEPTABLE)
+
+      // Try a request with bad content in a parameter to make sure the security filter
+      // is being added to new handlers.
+      val badRequest = new URL(
+        s"http://localhost:${serverInfo.boundPort}$path/root?bypass&invalid<=foo")
+      assert(TestUtils.httpResponseCode(badRequest) === HttpServletResponse.SC_OK)
+      assert(servlet.lastRequest.getParameter("invalid<") === null)
+      assert(servlet.lastRequest.getParameter("invalid&lt;") !== null)
 
-      val (conf, sslOptions) = sslEnabledConf()
-      serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions,
-        Seq[ServletContextHandler](newContext("/"), newContext("/test1")),
-        conf)
+      serverInfo.removeHandler(ctx)
+      assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_NOT_FOUND)
+    } finally {
+      stopServer(serverInfo)
+    }
+  }
+
+  test("http -> https redirect applies to all URIs") {
+    val (conf, securityMgr, sslOptions) = sslEnabledConf()
+    val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
+    try {
+      Seq(newContext("/"), newContext("/test1")).foreach { case (_, ctx) =>
+        serverInfo.addHandler(ctx, securityMgr)
+      }
       assert(serverInfo.server.getState === "STARTED")
 
-      val testContext = newContext("/test2")
-      serverInfo.addHandler(testContext)
+      val (_, testContext) = newContext("/test2")
+      serverInfo.addHandler(testContext, securityMgr)
       testContext.start()
 
       val httpPort = serverInfo.boundPort
@@ -286,10 +303,10 @@ class UISuite extends SparkFunSuite {
 
       // Make sure the SSL port lies way outside the "http + 400" range used as the default.
       val baseSslPort = Utils.userPort(socket.getLocalPort(), 10000)
-      val (conf, sslOptions) = sslEnabledConf(sslPort = Some(baseSslPort))
+      val (conf, _, sslOptions) = sslEnabledConf(sslPort = Some(baseSslPort))
 
       serverInfo = JettyUtils.startJettyServer("0.0.0.0", socket.getLocalPort() + 1,
-        sslOptions, Seq[ServletContextHandler](), conf, "server1")
+        sslOptions, conf, serverName = "server1")
 
       val notAllowed = Utils.userPort(serverInfo.boundPort, 400)
       assert(serverInfo.securePort.isDefined)
@@ -300,6 +317,18 @@ class UISuite extends SparkFunSuite {
     }
   }
 
+  /**
+   * Create a new context handler for the given path, with a single servlet that responds to
+   * requests in `$path/root`.
+   */
+  private def newContext(path: String): (CapturingServlet, ServletContextHandler) = {
+    val servlet = new CapturingServlet()
+    val ctx = new ServletContextHandler()
+    ctx.setContextPath(path)
+    ctx.addServlet(new ServletHolder(servlet), "/root")
+    (servlet, ctx)
+  }
+
   def stopServer(info: ServerInfo): Unit = {
     if (info != null) info.stop()
   }
@@ -307,4 +336,40 @@ class UISuite extends SparkFunSuite {
   def closeSocket(socket: ServerSocket): Unit = {
     if (socket != null) socket.close
   }
+
+  /** Test servlet that exposes the last request object for GET calls. */
+  private class CapturingServlet extends HttpServlet {
+
+    @volatile var lastRequest: HttpServletRequest = _
+
+    override def doGet(req: HttpServletRequest, res: HttpServletResponse): Unit = {
+      lastRequest = req
+      res.sendError(HttpServletResponse.SC_OK)
+    }
+
+  }
+
+}
+
+// Filter for testing; returns a configurable code for every request.
+private[spark] class TestFilter extends Filter {
+
+  private var rc: Int = HttpServletResponse.SC_OK
+
+  override def destroy(): Unit = { }
+
+  override def init(config: FilterConfig): Unit = {
+    if (config.getInitParameter("responseCode") != null) {
+      rc = config.getInitParameter("responseCode").toInt
+    }
+  }
+
+  override def doFilter(req: ServletRequest, res: ServletResponse, chain: FilterChain): Unit = {
+    if (req.getParameter("bypass") == null) {
+      res.asInstanceOf[HttpServletResponse].sendError(rc, "Test.")
+    } else {
+      chain.doFilter(req, res)
+    }
+  }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
index 423daacc0f5a..c770fd5da76f 100644
--- a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
@@ -133,45 +133,6 @@ class UIUtilsSuite extends SparkFunSuite {
     assert(decoded2 === decodeURLParameter(decoded2))
   }
 
-  test("SPARK-20393: Prevent newline characters in parameters.") {
-    val encoding = "Encoding:base64%0d%0a%0d%0aPGh0bWw%2bjcmlwdD48L2h0bWw%2b"
-    val stripEncoding = "Encoding:base64PGh0bWw%2bjcmlwdD48L2h0bWw%2b"
-
-    assert(stripEncoding === stripXSS(encoding))
-  }
-
-  test("SPARK-20393: Prevent script from parameters running on page.") {
-    val scriptAlert = """>"'><script>alert(401)<%2Fscript>"""
-    val stripScriptAlert = "&gt;&quot;&gt;&lt;script&gt;alert(401)&lt;%2Fscript&gt;"
-
-    assert(stripScriptAlert === stripXSS(scriptAlert))
-  }
-
-  test("SPARK-20393: Prevent javascript from parameters running on page.") {
-    val javascriptAlert =
-      """app-20161208133404-0002<iframe+src%3Djavascript%3Aalert(1705)>"""
-    val stripJavascriptAlert =
-      "app-20161208133404-0002&lt;iframe+src%3Djavascript%3Aalert(1705)&gt;"
-
-    assert(stripJavascriptAlert === stripXSS(javascriptAlert))
-  }
-
-  test("SPARK-20393: Prevent links from parameters on page.") {
-    val link =
-      """stdout'"><iframe+id%3D1131+src%3Dhttp%3A%2F%2Fdemo.test.net%2Fphishing.html>"""
-    val stripLink =
-      "stdout&quot;&gt;&lt;iframe+id%3D1131+src%3Dhttp%3A%2F%2Fdemo.test.net%2Fphishing.html&gt;"
-
-    assert(stripLink === stripXSS(link))
-  }
-
-  test("SPARK-20393: Prevent popups from parameters on page.") {
-    val popup = """stdout'%2Balert(60)%2B'"""
-    val stripPopup = "stdout%2Balert(60)%2B"
-
-    assert(stripPopup === stripXSS(popup))
-  }
-
   private def verify(
       desc: String,
       expected: Node,
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
index 91f64141e531..6e4571eba036 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
@@ -29,8 +29,7 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
 private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver") {
 
   override def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val driverId = UIUtils.stripXSS(request.getParameter("id"))
+    val driverId = request.getParameter("id")
     require(driverId != null && driverId.nonEmpty, "Missing id parameter")
 
     val state = parent.scheduler.getDriverState(driverId)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index 6357d4adbcd9..a9ff3023a581 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.scheduler.cluster
 
+import java.util.EnumSet
 import java.util.concurrent.atomic.{AtomicBoolean}
+import javax.servlet.DispatcherType
 
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.Future
@@ -25,6 +27,7 @@ import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.yarn.api.records.{ApplicationAttemptId, ApplicationId}
+import org.eclipse.jetty.servlet.{FilterHolder, FilterMapping}
 
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
@@ -159,7 +162,7 @@ private[spark] abstract class YarnSchedulerBackend(
   /**
    * Add filters to the SparkUI.
    */
-  private def addWebUIFilter(
+  private[cluster] def addWebUIFilter(
       filterName: String,
       filterParams: Map[String, String],
       proxyBase: String): Unit = {
@@ -174,9 +177,33 @@ private[spark] abstract class YarnSchedulerBackend(
       // SPARK-26255: Append user provided filters(spark.ui.filters) with yarn filter.
       val allFilters = filterName + "," + conf.get("spark.ui.filters", "")
       logInfo(s"Add WebUI Filter. $filterName, $filterParams, $proxyBase")
-      conf.set("spark.ui.filters", allFilters)
-      filterParams.foreach { case (k, v) => conf.set(s"spark.$filterName.param.$k", v) }
-      scheduler.sc.ui.foreach { ui => JettyUtils.addFilters(ui.getHandlers, conf) }
+
+      // For already installed handlers, prepend the filter.
+      scheduler.sc.ui.foreach { ui =>
+        // Lock the UI so that new handlers are not added while this is running. Set the updated
+        // filter config inside the lock so that we're sure all handlers will properly get it.
+        ui.synchronized {
+          filterParams.foreach { case (k, v) =>
+            conf.set(s"spark.$filterName.param.$k", v)
+          }
+          conf.set("spark.ui.filters", allFilters)
+
+          ui.getHandlers.map(_.getServletHandler()).foreach { h =>
+            val holder = new FilterHolder()
+            holder.setName(filterName)
+            holder.setClassName(filterName)
+            filterParams.foreach { case (k, v) => holder.setInitParameter(k, v) }
+            h.addFilter(holder)
+
+            val mapping = new FilterMapping()
+            mapping.setFilterName(filterName)
+            mapping.setPathSpec("/*")
+            mapping.setDispatcherTypes(EnumSet.allOf(classOf[DispatcherType]))
+
+            h.prependFilterMapping(mapping)
+          }
+        }
+      }
     }
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 7fac57ff68ab..5d285f89f22f 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -16,14 +16,19 @@
  */
 package org.apache.spark.scheduler.cluster
 
+import java.net.URL
+import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
+
 import scala.language.reflectiveCalls
 
+import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.mockito.Mockito.when
 import org.scalatest.mockito.MockitoSugar
 
-import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
+import org.apache.spark._
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.ui.TestFilter
 
 class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with LocalSparkContext {
 
@@ -54,7 +59,57 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
       // Serialize to make sure serialization doesn't throw an error
       ser.serialize(req)
     }
-    sc.stop()
+  }
+
+  test("Respect user filters when adding AM IP filter") {
+    val conf = new SparkConf(false)
+      .set("spark.ui.filters", classOf[TestFilter].getName())
+      .set(s"spark.${classOf[TestFilter].getName()}.param.responseCode",
+        HttpServletResponse.SC_BAD_GATEWAY.toString)
+
+    sc = new SparkContext("local", "YarnSchedulerBackendSuite", conf)
+    val sched = mock[TaskSchedulerImpl]
+    when(sched.sc).thenReturn(sc)
+
+    val url = new URL(sc.uiWebUrl.get)
+    // Before adding the "YARN" filter, should get the code from the filter in SparkConf.
+    assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_BAD_GATEWAY)
+
+    val backend = new YarnSchedulerBackend(sched, sc) { }
+
+    backend.addWebUIFilter(classOf[TestFilter2].getName(),
+      Map("responseCode" -> HttpServletResponse.SC_NOT_ACCEPTABLE.toString), "")
+
+    sc.ui.get.getHandlers.foreach { h =>
+      // Two filters above + security filter.
+      assert(h.getServletHandler().getFilters().length === 3)
+    }
+
+    // The filter should have been added first in the chain, so we should get SC_NOT_ACCEPTABLE
+    // instead of SC_OK.
+    assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_NOT_ACCEPTABLE)
+
+    // Add a new handler and make sure the added filter is properly registered.
+    val servlet = new HttpServlet() {
+      override def doGet(req: HttpServletRequest, res: HttpServletResponse): Unit = {
+        res.sendError(HttpServletResponse.SC_CONFLICT)
+      }
+    }
+
+    val ctx = new ServletContextHandler()
+    ctx.setContextPath("/new-handler")
+    ctx.addServlet(new ServletHolder(servlet), "/")
+
+    sc.ui.get.attachHandler(ctx)
+
+    val newUrl = new URL(sc.uiWebUrl.get + "/new-handler/")
+    assert(TestUtils.httpResponseCode(newUrl) === HttpServletResponse.SC_NOT_ACCEPTABLE)
+
+    val bypassUrl = new URL(sc.uiWebUrl.get + "/new-handler/?bypass")
+    assert(TestUtils.httpResponseCode(bypassUrl) === HttpServletResponse.SC_CONFLICT)
   }
 
 }
+
+// Just extend the test filter so we can configure two of them.
+class TestFilter2 extends TestFilter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
index 4958f154e625..05890de5e126 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
@@ -158,19 +158,14 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L
     showSucceededJobs: Boolean,
     showFailedJobs: Boolean): Seq[Node] = {
 
-    // stripXSS is called to remove suspicious characters used in XSS attacks
-    val allParameters = request.getParameterMap.asScala.toMap.map { case (k, v) =>
-      UIUtils.stripXSS(k) -> v.map(UIUtils.stripXSS).toSeq
+    val parameterOtherTable = request.getParameterMap().asScala.map { case (name, vals) =>
+      name + "=" + vals(0)
     }
-    val parameterOtherTable = allParameters.filterNot(_._1.startsWith(executionTag))
-      .map(para => para._1 + "=" + para._2(0))
-
-    val parameterExecutionPage = UIUtils.stripXSS(request.getParameter(s"$executionTag.page"))
-    val parameterExecutionSortColumn = UIUtils.stripXSS(request
-      .getParameter(s"$executionTag.sort"))
-    val parameterExecutionSortDesc = UIUtils.stripXSS(request.getParameter(s"$executionTag.desc"))
-    val parameterExecutionPageSize = UIUtils.stripXSS(request
-      .getParameter(s"$executionTag.pageSize"))
+
+    val parameterExecutionPage = request.getParameter(s"$executionTag.page")
+    val parameterExecutionSortColumn = request.getParameter(s"$executionTag.sort")
+    val parameterExecutionSortDesc = request.getParameter(s"$executionTag.desc")
+    val parameterExecutionPageSize = request.getParameter(s"$executionTag.pageSize")
 
     val executionPage = Option(parameterExecutionPage).map(_.toInt).getOrElse(1)
     val executionSortColumn = Option(parameterExecutionSortColumn).map { sortColumn =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
index e4c119e6d06c..875086cda258 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
@@ -30,8 +30,7 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
   private val sqlStore = parent.sqlStore
 
   override def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val parameterExecutionId = UIUtils.stripXSS(request.getParameter("id"))
+    val parameterExecutionId = request.getParameter("id")
     require(parameterExecutionId != null && parameterExecutionId.nonEmpty,
       "Missing execution id parameter")
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
index f46eeea94154..fdc9bee5ed05 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
@@ -39,8 +39,7 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
 
   /** Render the page */
   def render(request: HttpServletRequest): Seq[Node] = {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val parameterId = UIUtils.stripXSS(request.getParameter("id"))
+    val parameterId = request.getParameter("id")
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
     val content =
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index 884d21d0afdd..dc7876bad68d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -317,12 +317,10 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
   }
 
   def render(request: HttpServletRequest): Seq[Node] = streamingListener.synchronized {
-    // stripXSS is called first to remove suspicious characters used in XSS attacks
-    val batchTime =
-      Option(SparkUIUtils.stripXSS(request.getParameter("id"))).map(id => Time(id.toLong))
+    val batchTime = Option(request.getParameter("id")).map(id => Time(id.toLong))
       .getOrElse {
-      throw new IllegalArgumentException(s"Missing id parameter")
-    }
+        throw new IllegalArgumentException(s"Missing id parameter")
+      }
     val formattedBatchTime =
       UIUtils.formatBatchTime(batchTime.milliseconds, streamingListener.batchDuration)
 

From e103c4a5e72bab8862ff49d6d4c1e62e642fc412 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Tue, 8 Jan 2019 13:11:11 -0600
Subject: [PATCH 0257/1072] [SPARK-24920][CORE] Allow sharing Netty's memory
 pool allocators
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Introducing shared polled ByteBuf allocators.
This feature can be enabled via the "spark.network.sharedByteBufAllocators.enabled" configuration.

When it is on then only two pooled ByteBuf allocators are created:
- one for transport servers where caching is allowed and
- one for transport clients where caching is disabled

This way the cache allowance remains as before.
Both shareable pools are created with numCores parameter set to 0 (which defaults to the available processors) as conf.serverThreads() and conf.clientThreads() are module dependant and the lazy creation of this allocators would lead to unpredicted behaviour.

When "spark.network.sharedByteBufAllocators.enabled" is false then a new allocator is created for every transport client and server separately as was before this PR.

## How was this patch tested?

Existing unit tests.

Closes #23278 from attilapiros/SPARK-24920.

Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../client/TransportClientFactory.java        | 11 +++--
 .../spark/network/server/TransportServer.java | 17 ++++---
 .../apache/spark/network/util/NettyUtils.java | 48 +++++++++++++++++++
 .../spark/network/util/TransportConf.java     | 18 +++++++
 .../network/netty/SparkTransportConf.scala    | 25 +---------
 docs/configuration.md                         | 10 ++++
 6 files changed, 97 insertions(+), 32 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index 16d242dbb2c4..a8e27157f42f 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -84,7 +84,7 @@ private static class ClientPool {
 
   private final Class<? extends Channel> socketChannelClass;
   private EventLoopGroup workerGroup;
-  private PooledByteBufAllocator pooledAllocator;
+  private final PooledByteBufAllocator pooledAllocator;
   private final NettyMemoryMetrics metrics;
 
   public TransportClientFactory(
@@ -103,8 +103,13 @@ public TransportClientFactory(
         ioMode,
         conf.clientThreads(),
         conf.getModuleName() + "-client");
-    this.pooledAllocator = NettyUtils.createPooledByteBufAllocator(
-      conf.preferDirectBufs(), false /* allowCache */, conf.clientThreads());
+    if (conf.sharedByteBufAllocators()) {
+      this.pooledAllocator = NettyUtils.getSharedPooledByteBufAllocator(
+          conf.preferDirectBufsForSharedByteBufAllocators(), false /* allowCache */);
+    } else {
+      this.pooledAllocator = NettyUtils.createPooledByteBufAllocator(
+          conf.preferDirectBufs(), false /* allowCache */, conf.clientThreads());
+    }
     this.metrics = new NettyMemoryMetrics(
       this.pooledAllocator, conf.getModuleName() + "-client", conf);
   }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index eb5f10a5c1a1..a0ecde292646 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -54,6 +54,7 @@ public class TransportServer implements Closeable {
   private ServerBootstrap bootstrap;
   private ChannelFuture channelFuture;
   private int port = -1;
+  private final PooledByteBufAllocator pooledAllocator;
   private NettyMemoryMetrics metrics;
 
   /**
@@ -69,6 +70,13 @@ public TransportServer(
     this.context = context;
     this.conf = context.getConf();
     this.appRpcHandler = appRpcHandler;
+    if (conf.sharedByteBufAllocators()) {
+      this.pooledAllocator = NettyUtils.getSharedPooledByteBufAllocator(
+          conf.preferDirectBufsForSharedByteBufAllocators(), true /* allowCache */);
+    } else {
+      this.pooledAllocator = NettyUtils.createPooledByteBufAllocator(
+          conf.preferDirectBufs(), true /* allowCache */, conf.serverThreads());
+    }
     this.bootstraps = Lists.newArrayList(Preconditions.checkNotNull(bootstraps));
 
     boolean shouldClose = true;
@@ -96,18 +104,15 @@ private void init(String hostToBind, int portToBind) {
       NettyUtils.createEventLoop(ioMode, conf.serverThreads(), conf.getModuleName() + "-server");
     EventLoopGroup workerGroup = bossGroup;
 
-    PooledByteBufAllocator allocator = NettyUtils.createPooledByteBufAllocator(
-      conf.preferDirectBufs(), true /* allowCache */, conf.serverThreads());
-
     bootstrap = new ServerBootstrap()
       .group(bossGroup, workerGroup)
       .channel(NettyUtils.getServerChannelClass(ioMode))
-      .option(ChannelOption.ALLOCATOR, allocator)
+      .option(ChannelOption.ALLOCATOR, pooledAllocator)
       .option(ChannelOption.SO_REUSEADDR, !SystemUtils.IS_OS_WINDOWS)
-      .childOption(ChannelOption.ALLOCATOR, allocator);
+      .childOption(ChannelOption.ALLOCATOR, pooledAllocator);
 
     this.metrics = new NettyMemoryMetrics(
-      allocator, conf.getModuleName() + "-server", conf);
+      pooledAllocator, conf.getModuleName() + "-server", conf);
 
     if (conf.backLog() > 0) {
       bootstrap.option(ChannelOption.SO_BACKLOG, conf.backLog());
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
index 33d6eb4a83a0..423cc0c70ea0 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
@@ -36,6 +36,22 @@
  * Utilities for creating various Netty constructs based on whether we're using EPOLL or NIO.
  */
 public class NettyUtils {
+
+  /**
+   * Specifies an upper bound on the number of Netty threads that Spark requires by default.
+   * In practice, only 2-4 cores should be required to transfer roughly 10 Gb/s, and each core
+   * that we use will have an initial overhead of roughly 32 MB of off-heap memory, which comes
+   * at a premium.
+   *
+   * Thus, this value should still retain maximum throughput and reduce wasted off-heap memory
+   * allocation. It can be overridden by setting the number of serverThreads and clientThreads
+   * manually in Spark's configuration.
+   */
+  private static int MAX_DEFAULT_NETTY_THREADS = 8;
+
+  private static final PooledByteBufAllocator[] _sharedPooledByteBufAllocator =
+      new PooledByteBufAllocator[2];
+
   /** Creates a new ThreadFactory which prefixes each thread with the given name. */
   public static ThreadFactory createThreadFactory(String threadPoolPrefix) {
     return new DefaultThreadFactory(threadPoolPrefix, true);
@@ -95,6 +111,38 @@ public static String getRemoteAddress(Channel channel) {
     return "<unknown remote>";
   }
 
+  /**
+   * Returns the default number of threads for both the Netty client and server thread pools.
+   * If numUsableCores is 0, we will use Runtime get an approximate number of available cores.
+   */
+  public static int defaultNumThreads(int numUsableCores) {
+    final int availableCores;
+    if (numUsableCores > 0) {
+      availableCores = numUsableCores;
+    } else {
+      availableCores = Runtime.getRuntime().availableProcessors();
+    }
+    return Math.min(availableCores, MAX_DEFAULT_NETTY_THREADS);
+  }
+
+  /**
+   * Returns the lazily created shared pooled ByteBuf allocator for the specified allowCache
+   * parameter value.
+   */
+  public static synchronized PooledByteBufAllocator getSharedPooledByteBufAllocator(
+      boolean allowDirectBufs,
+      boolean allowCache) {
+    final int index = allowCache ? 0 : 1;
+    if (_sharedPooledByteBufAllocator[index] == null) {
+      _sharedPooledByteBufAllocator[index] =
+        createPooledByteBufAllocator(
+          allowDirectBufs,
+          allowCache,
+          defaultNumThreads(0));
+    }
+    return _sharedPooledByteBufAllocator[index];
+  }
+
   /**
    * Create a pooled ByteBuf allocator but disables the thread-local cache. Thread-local caches
    * are disabled for TransportClients because the ByteBufs are allocated by the event loop thread,
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 201628b04fbe..89ee5ee3c0cd 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -265,6 +265,23 @@ public boolean saslServerAlwaysEncrypt() {
     return conf.getBoolean("spark.network.sasl.serverAlwaysEncrypt", false);
   }
 
+  /**
+   * Flag indicating whether to share the pooled ByteBuf allocators between the different Netty
+   * channels. If enabled then only two pooled ByteBuf allocators are created: one where caching
+   * is allowed (for transport servers) and one where not (for transport clients).
+   * When disabled a new allocator is created for each transport servers and clients.
+   */
+  public boolean sharedByteBufAllocators() {
+    return conf.getBoolean("spark.network.sharedByteBufAllocators.enabled", true);
+  }
+
+  /**
+  * If enabled then off-heap byte buffers will be prefered for the shared ByteBuf allocators.
+  */
+  public boolean preferDirectBufsForSharedByteBufAllocators() {
+    return conf.getBoolean("spark.network.io.preferDirectBufs", true);
+  }
+
   /**
    * The commons-crypto configuration for the module.
    */
@@ -313,4 +330,5 @@ public int chunkFetchHandlerThreads() {
       this.serverThreads() > 0 ? this.serverThreads() : 2 * NettyRuntime.availableProcessors();
     return (int) Math.ceil(threads * (chunkFetchHandlerThreadsPercent / 100.0));
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala b/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
index 25f7bcb9801b..3ba0a0a750f9 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
@@ -20,7 +20,7 @@ package org.apache.spark.network.netty
 import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkConf
-import org.apache.spark.network.util.{ConfigProvider, TransportConf}
+import org.apache.spark.network.util.{ConfigProvider, NettyUtils, TransportConf}
 
 /**
  * Provides a utility for transforming from a SparkConf inside a Spark JVM (e.g., Executor,
@@ -28,17 +28,6 @@ import org.apache.spark.network.util.{ConfigProvider, TransportConf}
  * like the number of cores that are allocated to this JVM.
  */
 object SparkTransportConf {
-  /**
-   * Specifies an upper bound on the number of Netty threads that Spark requires by default.
-   * In practice, only 2-4 cores should be required to transfer roughly 10 Gb/s, and each core
-   * that we use will have an initial overhead of roughly 32 MB of off-heap memory, which comes
-   * at a premium.
-   *
-   * Thus, this value should still retain maximum throughput and reduce wasted off-heap memory
-   * allocation. It can be overridden by setting the number of serverThreads and clientThreads
-   * manually in Spark's configuration.
-   */
-  private val MAX_DEFAULT_NETTY_THREADS = 8
 
   /**
    * Utility for creating a [[TransportConf]] from a [[SparkConf]].
@@ -54,7 +43,7 @@ object SparkTransportConf {
     // Specify thread configuration based on our JVM's allocation of cores (rather than necessarily
     // assuming we have all the machine's cores).
     // NB: Only set if serverThreads/clientThreads not already set.
-    val numThreads = defaultNumThreads(numUsableCores)
+    val numThreads = NettyUtils.defaultNumThreads(numUsableCores)
     conf.setIfMissing(s"spark.$module.io.serverThreads", numThreads.toString)
     conf.setIfMissing(s"spark.$module.io.clientThreads", numThreads.toString)
 
@@ -66,14 +55,4 @@ object SparkTransportConf {
       }
     })
   }
-
-  /**
-   * Returns the default number of threads for both the Netty client and server thread pools.
-   * If numUsableCores is 0, we will use Runtime get an approximate number of available cores.
-   */
-  private def defaultNumThreads(numUsableCores: Int): Int = {
-    val availableCores =
-      if (numUsableCores > 0) numUsableCores else Runtime.getRuntime.availableProcessors()
-    math.min(availableCores, MAX_DEFAULT_NETTY_THREADS)
-  }
 }
diff --git a/docs/configuration.md b/docs/configuration.md
index ff9b802617f0..33148ed48452 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1505,6 +1505,16 @@ Apart from these, the following properties are also available, and may be useful
     <code>spark.rpc.lookupTimeout</code> if they are not configured.
   </td>
 </tr>
+<tr>
+  <td><code>spark.network.io.preferDirectBufs</code></td>
+  <td>true</td>
+  <td>
+    If enabled then off-heap buffer allocations are preferred by the shared allocators.
+    Off-heap buffers are used to reduce garbage collection during shuffle and cache
+    block transfer. For environments where off-heap memory is tightly limited, users may wish to
+    turn this off to force all allocations to be on-heap.
+    </td>
+</tr>
 <tr>
   <td><code>spark.port.maxRetries</code></td>
   <td>16</td>

From 32515d205a4de4d8838226fa5e5c4e4f66935193 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 8 Jan 2019 11:26:36 -0800
Subject: [PATCH 0258/1072] [SPARK-26349][PYSPARK] Forbid insecure py4j
 gateways

Spark always creates secure py4j connections between java and python,
but it also allows users to pass in their own connection. This ensures
that even passed in connections are secure.

Added test cases verifying the failure with a (mocked) insecure gateway.

This is closely related to SPARK-26019, but this entirely forbids the
insecure connection, rather than creating the "escape-hatch".

Closes #23441 from squito/SPARK-26349.

Authored-by: Imran Rashid <irashid@cloudera.com>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 python/pyspark/context.py            |  5 +++++
 python/pyspark/tests/test_context.py | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 6137ed25a0dd..64178eb7b58a 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -115,6 +115,11 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         ValueError:...
         """
         self._callsite = first_spark_call() or CallSite(None, None, None)
+        if gateway is not None and gateway.gateway_parameters.auth_token is None:
+            raise ValueError(
+                "You are trying to pass an insecure Py4j gateway to Spark. This"
+                " is not allowed as it is a security risk.")
+
         SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
         try:
             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py
index 201baf420354..18d9cd40be9f 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -20,6 +20,7 @@
 import threading
 import time
 import unittest
+from collections import namedtuple
 
 from pyspark import SparkFiles, SparkContext
 from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest, SPARK_HOME
@@ -246,6 +247,15 @@ def test_startTime(self):
         with SparkContext() as sc:
             self.assertGreater(sc.startTime, 0)
 
+    def test_forbid_insecure_gateway(self):
+        # Fail immediately if you try to create a SparkContext
+        # with an insecure gateway
+        parameters = namedtuple('MockGatewayParameters', 'auth_token')(None)
+        mock_insecure_gateway = namedtuple('MockJavaGateway', 'gateway_parameters')(parameters)
+        with self.assertRaises(ValueError) as context:
+            SparkContext(gateway=mock_insecure_gateway)
+        self.assertIn("insecure Py4j gateway", str(context.exception))
+
 
 if __name__ == "__main__":
     from pyspark.tests.test_context import *

From 311f32f37fbeaebe9dfa0b8dc2a111ee99b583b7 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 9 Jan 2019 10:18:33 +0800
Subject: [PATCH 0259/1072] [SPARK-26571][SQL] Update Hive Serde mapping with
 canonical name of Parquet and Orc FileFormat

## What changes were proposed in this pull request?

Currently Spark table maintains Hive catalog storage format, so that Hive client can read it.  In `HiveSerDe.scala`, Spark uses a mapping from its data source to HiveSerde. The mapping is old, we need to update with latest canonical name of Parquet and Orc FileFormat.

Otherwise the following queries will result in wrong Serde value in Hive table(default value `org.apache.hadoop.mapred.SequenceFileInputFormat`), and Hive client will fail to read the output table:
```
df.write.format("org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat").saveAsTable(..)
```

```
df.write.format("org.apache.spark.sql.execution.datasources.orc.OrcFileFormat").saveAsTable(..)
```

This minor PR is to fix the mapping.

## How was this patch tested?

Unit test.

Closes #23491 from gengliangwang/fixHiveSerdeMap.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/HiveSerDe.scala |  2 ++
 .../sql/hive/HiveMetastoreCatalogSuite.scala  | 18 ++++++++++++
 .../sql/hive/orc/HiveOrcSourceSuite.scala     | 29 -------------------
 3 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
index eca612f06f9b..bd25a6437033 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
@@ -74,8 +74,10 @@ object HiveSerDe {
   def sourceToSerDe(source: String): Option[HiveSerDe] = {
     val key = source.toLowerCase(Locale.ROOT) match {
       case s if s.startsWith("org.apache.spark.sql.parquet") => "parquet"
+      case s if s.startsWith("org.apache.spark.sql.execution.datasources.parquet") => "parquet"
       case s if s.startsWith("org.apache.spark.sql.orc") => "orc"
       case s if s.startsWith("org.apache.spark.sql.hive.orc") => "orc"
+      case s if s.startsWith("org.apache.spark.sql.execution.datasources.orc") => "orc"
       case s if s.equals("orcfile") => "orc"
       case s if s.equals("parquetfile") => "parquet"
       case s if s.equals("avrofile") => "avro"
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 688b619cd1bb..5c9261c206ea 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -159,10 +159,28 @@ class DataSourceWithHiveMetastoreCatalogSuite
       "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
     )),
 
+    "org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat" -> ((
+      "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
+      "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
+      "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
+    )),
+
     "orc" -> ((
       "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
       "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
       "org.apache.hadoop.hive.ql.io.orc.OrcSerde"
+    )),
+
+    "org.apache.spark.sql.hive.orc" -> ((
+      "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
+      "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
+      "org.apache.hadoop.hive.ql.io.orc.OrcSerde"
+    )),
+
+    "org.apache.spark.sql.execution.datasources.orc.OrcFileFormat" -> ((
+      "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
+      "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
+      "org.apache.hadoop.hive.ql.io.orc.OrcSerde"
     ))
   ).foreach { case (provider, (inputFormat, outputFormat, serde)) =>
     test(s"Persist non-partitioned $provider relation into metastore as managed table") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index 7fefaf53939b..c46512b6f585 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -21,11 +21,9 @@ import java.io.File
 
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.TestingUDT.{IntervalData, IntervalUDT}
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.datasources.orc.OrcSuite
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -67,33 +65,6 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
        """.stripMargin)
   }
 
-  test("SPARK-22972: hive orc source") {
-    val tableName = "normal_orc_as_source_hive"
-    withTable(tableName) {
-      sql(
-        s"""
-          |CREATE TABLE $tableName
-          |USING org.apache.spark.sql.hive.orc
-          |OPTIONS (
-          |  PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}'
-          |)
-        """.stripMargin)
-
-      val tableMetadata = spark.sessionState.catalog.getTableMetadata(
-        TableIdentifier(tableName))
-      assert(tableMetadata.storage.inputFormat ==
-        Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"))
-      assert(tableMetadata.storage.outputFormat ==
-        Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
-      assert(tableMetadata.storage.serde ==
-        Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
-      assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.hive.orc")
-        .equals(HiveSerDe.sourceToSerDe("orc")))
-      assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.orc")
-        .equals(HiveSerDe.sourceToSerDe("orc")))
-    }
-  }
-
   test("SPARK-19459/SPARK-18220: read char/varchar column written by Hive") {
     val location = Utils.createTempDir()
     val uri = location.toURI

From eb42bb493b1d7c79e9516660b71aec66bdde5d51 Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Wed, 9 Jan 2019 10:39:25 +0800
Subject: [PATCH 0260/1072] [SPARK-26529] Add debug logs for confArchive when
 preparing local resource

## What changes were proposed in this pull request?

Currently, `Client#createConfArchive` do not handle IOException, and some detail info is not provided in logs. Sometimes, this may delay the time of locating the root cause of io error.
This PR will add debug logs for confArchive when preparing local resource.

## How was this patch tested?

unittest

Closes #23444 from liupc/Add-logs-for-IOException-when-preparing-local-resource.

Authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 9f09dc031754..84921800a471 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -713,6 +713,7 @@ private[spark] class Client(
       new File(Utils.getLocalDir(sparkConf)))
     val confStream = new ZipOutputStream(new FileOutputStream(confArchive))
 
+    logDebug(s"Creating an archive with the config files for distribution at $confArchive.")
     try {
       confStream.setLevel(0)
 

From dbbba80b3cb319b147dcf82a69963eee662e289f Mon Sep 17 00:00:00 2001
From: Yuanjian Li <xyliyuanjian@gmail.com>
Date: Wed, 9 Jan 2019 11:55:12 +0800
Subject: [PATCH 0261/1072] [SPARK-26549][PYSPARK] Fix for python worker reuse
 take no effect for parallelize lazy iterable range

## What changes were proposed in this pull request?

During the follow-up work(#23435) for PySpark worker reuse scenario, we found that the worker reuse takes no effect for `sc.parallelize(xrange(...))`. It happened because of the specialize rdd.parallelize logic for xrange(introduced in #3264) generated data by lazy iterable range, which don't need to use the passed-in iterator. But this will break the end of stream checking in python worker and finally cause worker reuse takes no effect. See more details in [SPARK-26549](https://issues.apache.org/jira/browse/SPARK-26549) description.

We fix this by force using the passed-in iterator.

## How was this patch tested?
New UT in test_worker.py.

Closes #23470 from xuanyuanking/SPARK-26549.

Authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/context.py           |  8 ++++++++
 python/pyspark/tests/test_worker.py | 12 +++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 64178eb7b58a..316fbc8bfda5 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -498,6 +498,14 @@ def getStart(split):
                 return start0 + int((split * size / numSlices)) * step
 
             def f(split, iterator):
+                # it's an empty iterator here but we need this line for triggering the
+                # logic of signal handling in FramedSerializer.load_stream, for instance,
+                # SpecialLengths.END_OF_DATA_SECTION in _read_with_length. Since
+                # FramedSerializer.load_stream produces a generator, the control should
+                # at least be in that function once. Here we do it by explicitly converting
+                # the empty iterator to a list, thus make sure worker reuse takes effect.
+                # See more details in SPARK-26549.
+                assert len(list(iterator)) == 0
                 return xrange(getStart(split), getStart(split + 1), step)
 
             return self.parallelize([], numSlices).mapPartitionsWithIndex(f)
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index a33b77d98341..a4f108f18e17 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -22,7 +22,7 @@
 
 from py4j.protocol import Py4JJavaError
 
-from pyspark.testing.utils import ReusedPySparkTestCase, QuietTest
+from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest
 
 if sys.version_info[0] >= 3:
     xrange = range
@@ -145,6 +145,16 @@ def test_with_different_versions_of_python(self):
             self.sc.pythonVer = version
 
 
+class WorkerReuseTest(PySparkTestCase):
+
+    def test_reuse_worker_of_parallelize_xrange(self):
+        rdd = self.sc.parallelize(xrange(20), 8)
+        previous_pids = rdd.map(lambda x: os.getpid()).collect()
+        current_pids = rdd.map(lambda x: os.getpid()).collect()
+        for pid in current_pids:
+            self.assertTrue(pid in previous_pids)
+
+
 if __name__ == "__main__":
     import unittest
     from pyspark.tests.test_worker import *

From 49c062b2e0487b13b732b18edde105e1f000c20d Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Wed, 9 Jan 2019 09:54:21 -0800
Subject: [PATCH 0262/1072] [SPARK-25484][SQL][TEST] Refactor
 ExternalAppendOnlyUnsafeRowArrayBenchmark

## What changes were proposed in this pull request?

Refactor ExternalAppendOnlyUnsafeRowArrayBenchmark to use main method.

## How was this patch tested?

Manually tested and regenerated results.
Please note that `spark.memory.debugFill` setting has a huge impact on this benchmark. Since it is set to true by default when running the benchmark from SBT, we need to disable it:
```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt ";project sql;set javaOptions in Test += \"-Dspark.memory.debugFill=false\";test:runMain org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArrayBenchmark"
```

Closes #22617 from peter-toth/SPARK-25484.

Lead-authored-by: Peter Toth <peter.toth@gmail.com>
Co-authored-by: Peter Toth <ptoth@hortonworks.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 ...endOnlyUnsafeRowArrayBenchmark-results.txt |  45 +++++
 ...nalAppendOnlyUnsafeRowArrayBenchmark.scala | 158 +++++++-----------
 2 files changed, 105 insertions(+), 98 deletions(-)
 create mode 100644 sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt

diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
new file mode 100644
index 000000000000..02c6b72f3221
--- /dev/null
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
@@ -0,0 +1,45 @@
+================================================================================================
+WITHOUT SPILL
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 100000 rows:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+ArrayBuffer                                   6378 / 6550         16.1          62.3       1.0X
+ExternalAppendOnlyUnsafeRowArray              6196 / 6242         16.5          60.5       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 1000 rows:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+ArrayBuffer                                 11988 / 12027         21.9          45.7       1.0X
+ExternalAppendOnlyUnsafeRowArray            37480 / 37574          7.0         143.0       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 30000 rows:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+ArrayBuffer                                 23536 / 23538         20.9          47.9       1.0X
+ExternalAppendOnlyUnsafeRowArray            31275 / 31277         15.7          63.6       0.8X
+
+
+================================================================================================
+WITH SPILL
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Spilling with 1000 rows:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                        29241 / 29279          9.0         111.5       1.0X
+ExternalAppendOnlyUnsafeRowArray            14309 / 14313         18.3          54.6       2.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Spilling with 10000 rows:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                            11 /   11         14.8          67.4       1.0X
+ExternalAppendOnlyUnsafeRowArray                 9 /    9         17.6          56.8       1.2X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
index 611b2fc037f3..e174dc6f31a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
@@ -20,24 +20,57 @@ package org.apache.spark.sql.execution
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{SparkConf, SparkContext, SparkEnv, TaskContext}
-import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.internal.config
 import org.apache.spark.memory.MemoryTestingUtils
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
 
-object ExternalAppendOnlyUnsafeRowArrayBenchmark {
+/**
+ * Benchmark ExternalAppendOnlyUnsafeRowArray.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark sql test jar>
+ *   2. build/sbt build/sbt ";project sql;set javaOptions
+ *        in Test += \"-Dspark.memory.debugFill=false\";test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt ";project sql;set javaOptions
+ *        in Test += \"-Dspark.memory.debugFill=false\";test:runMain <this class>"
+ *      Results will be written to
+ *      "benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt".
+ * }}}
+ */
+object ExternalAppendOnlyUnsafeRowArrayBenchmark extends BenchmarkBase {
 
-  def testAgainstRawArrayBuffer(numSpillThreshold: Int, numRows: Int, iterations: Int): Unit = {
+  private val conf = new SparkConf(false)
+    // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
+    // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
+    .set("spark.serializer.objectStreamReset", "1")
+    .set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
+
+  private def withFakeTaskContext(f: => Unit): Unit = {
+    val sc = new SparkContext("local", "test", conf)
+    val taskContext = MemoryTestingUtils.fakeTaskContext(SparkEnv.get)
+    TaskContext.setTaskContext(taskContext)
+    f
+    sc.stop()
+  }
+
+  private def testRows(numRows: Int): Seq[UnsafeRow] = {
     val random = new java.util.Random()
-    val rows = (1 to numRows).map(_ => {
+    (1 to numRows).map(_ => {
       val row = new UnsafeRow(1)
       row.pointTo(new Array[Byte](64), 16)
       row.setLong(0, random.nextLong())
       row
     })
+  }
 
-    val benchmark = new Benchmark(s"Array with $numRows rows", iterations * numRows)
+  def testAgainstRawArrayBuffer(numSpillThreshold: Int, numRows: Int, iterations: Int): Unit = {
+    val rows = testRows(numRows)
+
+    val benchmark = new Benchmark(s"Array with $numRows rows", iterations * numRows,
+      output = output)
 
     // Internally, `ExternalAppendOnlyUnsafeRowArray` will create an
     // in-memory buffer of size `numSpillThreshold`. This will mimic that
@@ -82,33 +115,19 @@ object ExternalAppendOnlyUnsafeRowArrayBenchmark {
       }
     }
 
-    val conf = new SparkConf(false)
-    // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
-    // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
-    conf.set("spark.serializer.objectStreamReset", "1")
-    conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
-
-    val sc = new SparkContext("local", "test", conf)
-    val taskContext = MemoryTestingUtils.fakeTaskContext(SparkEnv.get)
-    TaskContext.setTaskContext(taskContext)
-    benchmark.run()
-    sc.stop()
+    withFakeTaskContext {
+      benchmark.run()
+    }
   }
 
   def testAgainstRawUnsafeExternalSorter(
       numSpillThreshold: Int,
       numRows: Int,
       iterations: Int): Unit = {
+    val rows = testRows(numRows)
 
-    val random = new java.util.Random()
-    val rows = (1 to numRows).map(_ => {
-      val row = new UnsafeRow(1)
-      row.pointTo(new Array[Byte](64), 16)
-      row.setLong(0, random.nextLong())
-      row
-    })
-
-    val benchmark = new Benchmark(s"Spilling with $numRows rows", iterations * numRows)
+    val benchmark = new Benchmark(s"Spilling with $numRows rows", iterations * numRows,
+      output = output)
 
     benchmark.addCase("UnsafeExternalSorter") { _: Int =>
       var sum = 0L
@@ -158,80 +177,23 @@ object ExternalAppendOnlyUnsafeRowArrayBenchmark {
       }
     }
 
-    val conf = new SparkConf(false)
-    // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
-    // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
-    conf.set("spark.serializer.objectStreamReset", "1")
-    conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
-
-    val sc = new SparkContext("local", "test", conf)
-    val taskContext = MemoryTestingUtils.fakeTaskContext(SparkEnv.get)
-    TaskContext.setTaskContext(taskContext)
-    benchmark.run()
-    sc.stop()
+    withFakeTaskContext {
+      benchmark.run()
+    }
   }
 
-  def main(args: Array[String]): Unit = {
-
-    // ========================================================================================= //
-    // WITHOUT SPILL
-    // ========================================================================================= //
-
-    val spillThreshold = 100 * 1000
-
-    /*
-    Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-    Array with 1000 rows:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    ArrayBuffer                                   7821 / 7941         33.5          29.8       1.0X
-    ExternalAppendOnlyUnsafeRowArray              8798 / 8819         29.8          33.6       0.9X
-    */
-    testAgainstRawArrayBuffer(spillThreshold, 1000, 1 << 18)
-
-    /*
-    Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-    Array with 30000 rows:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    ArrayBuffer                                 19200 / 19206         25.6          39.1       1.0X
-    ExternalAppendOnlyUnsafeRowArray            19558 / 19562         25.1          39.8       1.0X
-    */
-    testAgainstRawArrayBuffer(spillThreshold, 30 * 1000, 1 << 14)
-
-    /*
-    Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-    Array with 100000 rows:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    ArrayBuffer                                   5949 / 6028         17.2          58.1       1.0X
-    ExternalAppendOnlyUnsafeRowArray              6078 / 6138         16.8          59.4       1.0X
-    */
-    testAgainstRawArrayBuffer(spillThreshold, 100 * 1000, 1 << 10)
-
-    // ========================================================================================= //
-    // WITH SPILL
-    // ========================================================================================= //
-
-    /*
-    Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-    Spilling with 1000 rows:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    UnsafeExternalSorter                          9239 / 9470         28.4          35.2       1.0X
-    ExternalAppendOnlyUnsafeRowArray              8857 / 8909         29.6          33.8       1.0X
-    */
-    testAgainstRawUnsafeExternalSorter(100 * 1000, 1000, 1 << 18)
-
-    /*
-    Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-    Spilling with 10000 rows:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    UnsafeExternalSorter                             4 /    5         39.3          25.5       1.0X
-    ExternalAppendOnlyUnsafeRowArray                 5 /    6         29.8          33.5       0.8X
-    */
-    testAgainstRawUnsafeExternalSorter(
-      config.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get, 10 * 1000, 1 << 4)
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("WITHOUT SPILL") {
+      val spillThreshold = 100 * 1000
+      testAgainstRawArrayBuffer(spillThreshold, 100 * 1000, 1 << 10)
+      testAgainstRawArrayBuffer(spillThreshold, 1000, 1 << 18)
+      testAgainstRawArrayBuffer(spillThreshold, 30 * 1000, 1 << 14)
+    }
+
+    runBenchmark("WITH SPILL") {
+      testAgainstRawUnsafeExternalSorter(100 * 1000, 1000, 1 << 18)
+      testAgainstRawUnsafeExternalSorter(
+        config.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get, 10 * 1000, 1 << 4)
+    }
   }
 }

From e853afb416cdbfa3078c00295b0b3da4bcaed62e Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 9 Jan 2019 13:50:32 -0800
Subject: [PATCH 0263/1072] [SPARK-26448][SQL] retain the difference between
 0.0 and -0.0

## What changes were proposed in this pull request?

In https://github.com/apache/spark/pull/23043 , we introduced a behavior change: Spark users are not able to distinguish 0.0 and -0.0 anymore.

This PR proposes an alternative fix to the original bug, to retain the difference between 0.0 and -0.0 inside Spark.

The idea is, we can rewrite the window partition key, join key and grouping key during logical phase, to normalize the special floating numbers. Thus only operators care about special floating numbers need to pay the perf overhead, and end users can distinguish -0.0.

## How was this patch tested?

existing test

Closes #23388 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 docs/sql-migration-guide-upgrade.md           |   2 +-
 .../expressions/codegen/UnsafeWriter.java     |  35 ----
 .../optimizer/NormalizeFloatingNumbers.scala  | 198 ++++++++++++++++++
 .../sql/catalyst/optimizer/Optimizer.scala    |   7 +-
 .../sql/catalyst/planning/patterns.scala      |   5 +-
 .../expressions/UnsafeRowConverterSuite.scala |  16 --
 .../codegen/UnsafeRowWriterSuite.scala        |  21 --
 .../sql/execution/aggregate/AggUtils.scala    |  16 +-
 .../spark/sql/DataFrameAggregateSuite.scala   |  59 ++++--
 .../apache/spark/sql/DataFrameJoinSuite.scala |  12 --
 .../sql/DataFrameWindowFunctionsSuite.scala   |  60 +++++-
 .../spark/sql/DatasetPrimitiveSuite.scala     |  45 ++--
 .../org/apache/spark/sql/JoinSuite.scala      |  60 ++++++
 .../org/apache/spark/sql/QueryTest.scala      |  51 ++---
 .../execution/AggregationQuerySuite.scala     |   2 +-
 15 files changed, 436 insertions(+), 153 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 0fcdd420bcfe..4e36fd45fb04 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -25,7 +25,7 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, `Dataset.groupByKey` results to a grouped dataset with key attribute wrongly named as "value", if the key is non-struct type, e.g. int, string, array, etc. This is counterintuitive and makes the schema of aggregation queries weird. For example, the schema of `ds.groupByKey(...).count()` is `(value, count)`. Since Spark 3.0, we name the grouping attribute to "key". The old behaviour is preserved under a newly added configuration `spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue` with a default value of `false`.
 
-  - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but users can still distinguish them via `Dataset.show`, `Dataset.collect` etc. Since Spark 3.0, float/double -0.0 is replaced by 0.0 internally, and users can't distinguish them any more.
+  - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but -0.0 and 0.0 are considered as different values when used in aggregate grouping keys, window partition keys and join keys. Since Spark 3.0, this bug is fixed. For example, `Seq(-0.0, 0.0).toDF("d").groupBy("d").count()` returns `[(0.0, 2)]` in Spark 3.0, and `[(0.0, 1), (-0.0, 1)]` in Spark 2.4 and earlier.
 
   - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be undefined.
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
index 7553ab8cf700..95263a0da95a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
@@ -198,46 +198,11 @@ protected final void writeLong(long offset, long value) {
     Platform.putLong(getBuffer(), offset, value);
   }
 
-  // We need to take care of NaN and -0.0 in several places:
-  //   1. When compare values, different NaNs should be treated as same, `-0.0` and `0.0` should be
-  //      treated as same.
-  //   2. In GROUP BY, different NaNs should belong to the same group, -0.0 and 0.0 should belong
-  //      to the same group.
-  //   3. As join keys, different NaNs should be treated as same, `-0.0` and `0.0` should be
-  //      treated as same.
-  //   4. As window partition keys, different NaNs should be treated as same, `-0.0` and `0.0`
-  //      should be treated as same.
-  //
-  // Case 1 is fine, as we handle NaN and -0.0 well during comparison. For complex types, we
-  // recursively compare the fields/elements, so it's also fine.
-  //
-  // Case 2, 3 and 4 are problematic, as they compare `UnsafeRow` binary directly, and different
-  // NaNs have different binary representation, and the same thing happens for -0.0 and 0.0.
-  //
-  // Here we normalize NaN and -0.0, so that `UnsafeProjection` will normalize them when writing
-  // float/double columns and nested fields to `UnsafeRow`.
-  //
-  // Note that, we must do this for all the `UnsafeProjection`s, not only the ones that extract
-  // join/grouping/window partition keys. `UnsafeProjection` copies unsafe data directly for complex
-  // types, so nested float/double may not be normalized. We need to make sure that all the unsafe
-  // data(`UnsafeRow`, `UnsafeArrayData`, `UnsafeMapData`) will have flat/double normalized during
-  // creation.
   protected final void writeFloat(long offset, float value) {
-    if (Float.isNaN(value)) {
-      value = Float.NaN;
-    } else if (value == -0.0f) {
-      value = 0.0f;
-    }
     Platform.putFloat(getBuffer(), offset, value);
   }
 
-  // See comments for `writeFloat`.
   protected final void writeDouble(long offset, double value) {
-    if (Double.isNaN(value)) {
-      value = Double.NaN;
-    } else if (value == -0.0d) {
-      value = 0.0d;
-    }
     Platform.putDouble(getBuffer(), offset, value);
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
new file mode 100644
index 000000000000..520f24aa22e4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CreateArray, CreateMap, CreateNamedStruct, CreateNamedStructUnsafe, CreateStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, LambdaFunction, NamedLambdaVariable, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery, Window}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.types._
+
+/**
+ * We need to take care of special floating numbers (NaN and -0.0) in several places:
+ *   1. When compare values, different NaNs should be treated as same, `-0.0` and `0.0` should be
+ *      treated as same.
+ *   2. In aggregate grouping keys, different NaNs should belong to the same group, -0.0 and 0.0
+ *      should belong to the same group.
+ *   3. In join keys, different NaNs should be treated as same, `-0.0` and `0.0` should be
+ *      treated as same.
+ *   4. In window partition keys, different NaNs should belong to the same partition, -0.0 and 0.0
+ *      should belong to the same partition.
+ *
+ * Case 1 is fine, as we handle NaN and -0.0 well during comparison. For complex types, we
+ * recursively compare the fields/elements, so it's also fine.
+ *
+ * Case 2, 3 and 4 are problematic, as Spark SQL turns grouping/join/window partition keys into
+ * binary `UnsafeRow` and compare the binary data directly. Different NaNs have different binary
+ * representation, and the same thing happens for -0.0 and 0.0.
+ *
+ * This rule normalizes NaN and -0.0 in window partition keys, join keys and aggregate grouping
+ * keys.
+ *
+ * Ideally we should do the normalization in the physical operators that compare the
+ * binary `UnsafeRow` directly. We don't need this normalization if the Spark SQL execution engine
+ * is not optimized to run on binary data. This rule is created to simplify the implementation, so
+ * that we have a single place to do normalization, which is more maintainable.
+ *
+ * Note that, this rule must be executed at the end of optimizer, because the optimizer may create
+ * new joins(the subquery rewrite) and new join conditions(the join reorder).
+ */
+object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan match {
+    // A subquery will be rewritten into join later, and will go through this rule
+    // eventually. Here we skip subquery, as we only need to run this rule once.
+    case _: Subquery => plan
+
+    case _ => plan transform {
+      case w: Window if w.partitionSpec.exists(p => needNormalize(p.dataType)) =>
+        // Although the `windowExpressions` may refer to `partitionSpec` expressions, we don't need
+        // to normalize the `windowExpressions`, as they are executed per input row and should take
+        // the input row as it is.
+        w.copy(partitionSpec = w.partitionSpec.map(normalize))
+
+      // Only hash join and sort merge join need the normalization. Here we catch all Joins with
+      // join keys, assuming Joins with join keys are always planned as hash join or sort merge
+      // join. It's very unlikely that we will break this assumption in the near future.
+      case j @ ExtractEquiJoinKeys(_, leftKeys, rightKeys, condition, _, _, _)
+          // The analyzer guarantees left and right joins keys are of the same data type. Here we
+          // only need to check join keys of one side.
+          if leftKeys.exists(k => needNormalize(k.dataType)) =>
+        val newLeftJoinKeys = leftKeys.map(normalize)
+        val newRightJoinKeys = rightKeys.map(normalize)
+        val newConditions = newLeftJoinKeys.zip(newRightJoinKeys).map {
+          case (l, r) => EqualTo(l, r)
+        } ++ condition
+        j.copy(condition = Some(newConditions.reduce(And)))
+
+      // TODO: ideally Aggregate should also be handled here, but its grouping expressions are
+      // mixed in its aggregate expressions. It's unreliable to change the grouping expressions
+      // here. For now we normalize grouping expressions in `AggUtils` during planning.
+    }
+  }
+
+  private def needNormalize(dt: DataType): Boolean = dt match {
+    case FloatType | DoubleType => true
+    case StructType(fields) => fields.exists(f => needNormalize(f.dataType))
+    case ArrayType(et, _) => needNormalize(et)
+    // Currently MapType is not comparable and analyzer should fail earlier if this case happens.
+    case _: MapType =>
+      throw new IllegalStateException("grouping/join/window partition keys cannot be map type.")
+    case _ => false
+  }
+
+  private[sql] def normalize(expr: Expression): Expression = expr match {
+    case _ if expr.dataType == FloatType || expr.dataType == DoubleType =>
+      NormalizeNaNAndZero(expr)
+
+    case CreateNamedStruct(children) =>
+      CreateNamedStruct(children.map(normalize))
+
+    case CreateNamedStructUnsafe(children) =>
+      CreateNamedStructUnsafe(children.map(normalize))
+
+    case CreateArray(children) =>
+      CreateArray(children.map(normalize))
+
+    case CreateMap(children) =>
+      CreateMap(children.map(normalize))
+
+    case a: Alias if needNormalize(a.dataType) =>
+      a.withNewChildren(Seq(normalize(a.child)))
+
+    case _ if expr.dataType.isInstanceOf[StructType] && needNormalize(expr.dataType) =>
+      val fields = expr.dataType.asInstanceOf[StructType].fields.indices.map { i =>
+        normalize(GetStructField(expr, i))
+      }
+      CreateStruct(fields)
+
+    case _ if expr.dataType.isInstanceOf[ArrayType] && needNormalize(expr.dataType) =>
+      val ArrayType(et, containsNull) = expr.dataType
+      val lv = NamedLambdaVariable("arg", et, containsNull)
+      val function = normalize(lv)
+      ArrayTransform(expr, LambdaFunction(function, Seq(lv)))
+
+    case _ => expr
+  }
+}
+
+case class NormalizeNaNAndZero(child: Expression) extends UnaryExpression with ExpectsInputTypes {
+
+  override def dataType: DataType = child.dataType
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(FloatType, DoubleType))
+
+  private lazy val normalizer: Any => Any = child.dataType match {
+    case FloatType => (input: Any) => {
+      val f = input.asInstanceOf[Float]
+      if (f.isNaN) {
+        Float.NaN
+      } else if (f == -0.0f) {
+        0.0f
+      } else {
+        f
+      }
+    }
+
+    case DoubleType => (input: Any) => {
+      val d = input.asInstanceOf[Double]
+      if (d.isNaN) {
+        Double.NaN
+      } else if (d == -0.0d) {
+        0.0d
+      } else {
+        d
+      }
+    }
+  }
+
+  override def nullSafeEval(input: Any): Any = {
+    normalizer(input)
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val codeToNormalize = child.dataType match {
+      case FloatType => (f: String) => {
+        s"""
+           |if (Float.isNaN($f)) {
+           |  ${ev.value} = Float.NaN;
+           |} else if ($f == -0.0f) {
+           |  ${ev.value} = 0.0f;
+           |} else {
+           |  ${ev.value} = $f;
+           |}
+         """.stripMargin
+      }
+
+      case DoubleType => (d: String) => {
+        s"""
+           |if (Double.isNaN($d)) {
+           |  ${ev.value} = Double.NaN;
+           |} else if ($d == -0.0d) {
+           |  ${ev.value} = 0.0d;
+           |} else {
+           |  ${ev.value} = $d;
+           |}
+         """.stripMargin
+      }
+    }
+
+    nullSafeCodeGen(ctx, ev, codeToNormalize)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 06f908281dd3..d51dc6663d43 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -180,7 +180,9 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       CollapseProject,
       RemoveNoopOperators) :+
     Batch("UpdateAttributeReferences", Once,
-      UpdateNullabilityInAttributeReferences)
+      UpdateNullabilityInAttributeReferences) :+
+    // This batch must be executed after the `RewriteSubquery` batch, which creates joins.
+    Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers)
   }
 
   /**
@@ -210,7 +212,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       PullupCorrelatedPredicates.ruleName ::
       RewriteCorrelatedScalarSubquery.ruleName ::
       RewritePredicateSubquery.ruleName ::
-      PullOutPythonUDFInJoinCondition.ruleName :: Nil
+      PullOutPythonUDFInJoinCondition.ruleName ::
+      NormalizeFloatingNumbers.ruleName :: Nil
 
   /**
    * Optimize all the subqueries inside expression.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index dfc3b2d22129..95be0a52cb2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -105,8 +105,8 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
     (JoinType, Seq[Expression], Seq[Expression],
       Option[Expression], LogicalPlan, LogicalPlan, JoinHint)
 
-  def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
-    case join @ Join(left, right, joinType, condition, hint) =>
+  def unapply(join: Join): Option[ReturnType] = join match {
+    case Join(left, right, joinType, condition, hint) =>
       logDebug(s"Considering join on: $condition")
       // Find equi-join predicates that can be evaluated before the join, and thus can be used
       // as join keys.
@@ -140,7 +140,6 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
       } else {
         None
       }
-    case _ => None
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
index ecb8047459b0..69523fa81bc6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
@@ -246,22 +246,6 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB
     // assert(setToNullAfterCreation.get(11) === rowWithNoNullColumns.get(11))
   }
 
-  testBothCodegenAndInterpreted("NaN canonicalization") {
-    val factory = UnsafeProjection
-    val fieldTypes: Array[DataType] = Array(FloatType, DoubleType)
-
-    val row1 = new SpecificInternalRow(fieldTypes)
-    row1.setFloat(0, java.lang.Float.intBitsToFloat(0x7f800001))
-    row1.setDouble(1, java.lang.Double.longBitsToDouble(0x7ff0000000000001L))
-
-    val row2 = new SpecificInternalRow(fieldTypes)
-    row2.setFloat(0, java.lang.Float.intBitsToFloat(0x7fffffff))
-    row2.setDouble(1, java.lang.Double.longBitsToDouble(0x7fffffffffffffffL))
-
-    val converter = factory.create(fieldTypes)
-    assert(converter.apply(row1).getBytes === converter.apply(row2).getBytes)
-  }
-
   testBothCodegenAndInterpreted("basic conversion with struct type") {
     val factory = UnsafeProjection
     val fieldTypes: Array[DataType] = Array(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
index 22e1fa6dfed4..86b8fa54c0fd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
@@ -49,25 +49,4 @@ class UnsafeRowWriterSuite extends SparkFunSuite {
     // The two rows should be the equal
     assert(res1 == res2)
   }
-
-  test("SPARK-26021: normalize float/double NaN and -0.0") {
-    val unsafeRowWriter1 = new UnsafeRowWriter(4)
-    unsafeRowWriter1.resetRowWriter()
-    unsafeRowWriter1.write(0, Float.NaN)
-    unsafeRowWriter1.write(1, Double.NaN)
-    unsafeRowWriter1.write(2, 0.0f)
-    unsafeRowWriter1.write(3, 0.0)
-    val res1 = unsafeRowWriter1.getRow
-
-    val unsafeRowWriter2 = new UnsafeRowWriter(4)
-    unsafeRowWriter2.resetRowWriter()
-    unsafeRowWriter2.write(0, 0.0f/0.0f)
-    unsafeRowWriter2.write(1, 0.0/0.0)
-    unsafeRowWriter2.write(2, -0.0f)
-    unsafeRowWriter2.write(3, -0.0)
-    val res2 = unsafeRowWriter2.getRow
-
-    // The two rows should be the equal
-    assert(res1 == res2)
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
index 6be88c463dbd..8b7556b0c6c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.aggregate
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.optimizer.NormalizeFloatingNumbers
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming.{StateStoreRestoreExec, StateStoreSaveExec}
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Utility functions used by the query planner to convert our plan to new aggregation code path.
@@ -35,12 +35,20 @@ object AggUtils {
       initialInputBufferOffset: Int = 0,
       resultExpressions: Seq[NamedExpression] = Nil,
       child: SparkPlan): SparkPlan = {
+    // Ideally this should be done in `NormalizeFloatingNumbers`, but we do it here because
+    // `groupingExpressions` is not extracted during logical phase.
+    val normalizedGroupingExpressions = groupingExpressions.map { e =>
+      NormalizeFloatingNumbers.normalize(e) match {
+        case n: NamedExpression => n
+        case other => Alias(other, e.name)(exprId = e.exprId)
+      }
+    }
     val useHash = HashAggregateExec.supportsAggregate(
       aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes))
     if (useHash) {
       HashAggregateExec(
         requiredChildDistributionExpressions = requiredChildDistributionExpressions,
-        groupingExpressions = groupingExpressions,
+        groupingExpressions = normalizedGroupingExpressions,
         aggregateExpressions = aggregateExpressions,
         aggregateAttributes = aggregateAttributes,
         initialInputBufferOffset = initialInputBufferOffset,
@@ -53,7 +61,7 @@ object AggUtils {
       if (objectHashEnabled && useObjectHash) {
         ObjectHashAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
-          groupingExpressions = groupingExpressions,
+          groupingExpressions = normalizedGroupingExpressions,
           aggregateExpressions = aggregateExpressions,
           aggregateAttributes = aggregateAttributes,
           initialInputBufferOffset = initialInputBufferOffset,
@@ -62,7 +70,7 @@ object AggUtils {
       } else {
         SortAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
-          groupingExpressions = groupingExpressions,
+          groupingExpressions = normalizedGroupingExpressions,
           aggregateExpressions = aggregateExpressions,
           aggregateAttributes = aggregateAttributes,
           initialInputBufferOffset = initialInputBufferOffset,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index ff64edcd07f4..73259a0ed3b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -724,17 +724,52 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
         "type: GroupBy]"))
   }
 
-  test("SPARK-26021: Double and Float 0.0/-0.0 should be equal when grouping") {
-    val colName = "i"
-    val doubles = Seq(0.0d, -0.0d, 0.0d).toDF(colName).groupBy(colName).count().collect()
-    val floats = Seq(0.0f, -0.0f, 0.0f).toDF(colName).groupBy(colName).count().collect()
-
-    assert(doubles.length == 1)
-    assert(floats.length == 1)
-    // using compare since 0.0 == -0.0 is true
-    assert(java.lang.Double.compare(doubles(0).getDouble(0), 0.0d) == 0)
-    assert(java.lang.Float.compare(floats(0).getFloat(0), 0.0f) == 0)
-    assert(doubles(0).getLong(1) == 3)
-    assert(floats(0).getLong(1) == 3)
+  test("SPARK-26021: NaN and -0.0 in grouping expressions") {
+    import java.lang.Float.floatToRawIntBits
+    import java.lang.Double.doubleToRawLongBits
+
+    // 0.0/0.0 and NaN are different values.
+    assert(floatToRawIntBits(0.0f/0.0f) != floatToRawIntBits(Float.NaN))
+    assert(doubleToRawLongBits(0.0/0.0) != doubleToRawLongBits(Double.NaN))
+
+    checkAnswer(
+      Seq(0.0f, -0.0f, 0.0f/0.0f, Float.NaN).toDF("f").groupBy("f").count(),
+      Row(0.0f, 2) :: Row(Float.NaN, 2) :: Nil)
+    checkAnswer(
+      Seq(0.0d, -0.0d, 0.0d/0.0d, Double.NaN).toDF("d").groupBy("d").count(),
+      Row(0.0d, 2) :: Row(Double.NaN, 2) :: Nil)
+
+    // test with complicated type grouping expressions
+    checkAnswer(
+      Seq(0.0f, -0.0f, 0.0f/0.0f, Float.NaN).toDF("f")
+        .groupBy(array("f"), struct("f")).count(),
+      Row(Seq(0.0f), Row(0.0f), 2) ::
+        Row(Seq(Float.NaN), Row(Float.NaN), 2) :: Nil)
+    checkAnswer(
+      Seq(0.0d, -0.0d, 0.0d/0.0d, Double.NaN).toDF("d")
+        .groupBy(array("d"), struct("d")).count(),
+      Row(Seq(0.0d), Row(0.0d), 2) ::
+        Row(Seq(Double.NaN), Row(Double.NaN), 2) :: Nil)
+
+    checkAnswer(
+      Seq(0.0f, -0.0f, 0.0f/0.0f, Float.NaN).toDF("f")
+        .groupBy(array(struct("f")), struct(array("f"))).count(),
+      Row(Seq(Row(0.0f)), Row(Seq(0.0f)), 2) ::
+        Row(Seq(Row(Float.NaN)), Row(Seq(Float.NaN)), 2) :: Nil)
+    checkAnswer(
+      Seq(0.0d, -0.0d, 0.0d/0.0d, Double.NaN).toDF("d")
+        .groupBy(array(struct("d")), struct(array("d"))).count(),
+      Row(Seq(Row(0.0d)), Row(Seq(0.0d)), 2) ::
+        Row(Seq(Row(Double.NaN)), Row(Seq(Double.NaN)), 2) :: Nil)
+
+    // test with complicated type grouping columns
+    val df = Seq(
+      (Array(-0.0f, 0.0f), Tuple2(-0.0d, Double.NaN), Seq(Tuple2(-0.0d, Double.NaN))),
+      (Array(0.0f, -0.0f), Tuple2(0.0d, Double.NaN), Seq(Tuple2(0.0d, 0.0/0.0)))
+    ).toDF("arr", "stru", "arrOfStru")
+    checkAnswer(
+      df.groupBy("arr", "stru", "arrOfStru").count(),
+      Row(Seq(0.0f, 0.0f), Row(0.0d, Double.NaN), Seq(Row(0.0d, Double.NaN)), 2)
+    )
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index a4a3e2a62d1a..6bd12cbf0135 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -295,16 +295,4 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
       df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
     }
   }
-
-  test("NaN and -0.0 in join keys") {
-    val df1 = Seq(Float.NaN -> Double.NaN, 0.0f -> 0.0, -0.0f -> -0.0).toDF("f", "d")
-    val df2 = Seq(Float.NaN -> Double.NaN, 0.0f -> 0.0, -0.0f -> -0.0).toDF("f", "d")
-    val joined = df1.join(df2, Seq("f", "d"))
-    checkAnswer(joined, Seq(
-      Row(Float.NaN, Double.NaN),
-      Row(0.0f, 0.0),
-      Row(0.0f, 0.0),
-      Row(0.0f, 0.0),
-      Row(0.0f, 0.0)))
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 9277dc685924..f4ba2f0673c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -697,16 +697,56 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
   }
 
   test("NaN and -0.0 in window partition keys") {
+    import java.lang.Float.floatToRawIntBits
+    import java.lang.Double.doubleToRawLongBits
+
+    // 0.0/0.0 and NaN are different values.
+    assert(floatToRawIntBits(0.0f/0.0f) != floatToRawIntBits(Float.NaN))
+    assert(doubleToRawLongBits(0.0/0.0) != doubleToRawLongBits(Double.NaN))
+
     val df = Seq(
-      (Float.NaN, Double.NaN, 1),
-      (0.0f/0.0f, 0.0/0.0, 1),
-      (0.0f, 0.0, 1),
-      (-0.0f, -0.0, 1)).toDF("f", "d", "i")
-    val result = df.select($"f", count("i").over(Window.partitionBy("f", "d")))
-    checkAnswer(result, Seq(
-      Row(Float.NaN, 2),
-      Row(Float.NaN, 2),
-      Row(0.0f, 2),
-      Row(0.0f, 2)))
+      (Float.NaN, Double.NaN),
+      (0.0f/0.0f, 0.0/0.0),
+      (0.0f, 0.0),
+      (-0.0f, -0.0)).toDF("f", "d")
+
+    checkAnswer(
+      df.select($"f", count(lit(1)).over(Window.partitionBy("f", "d"))),
+      Seq(
+        Row(Float.NaN, 2),
+        Row(0.0f/0.0f, 2),
+        Row(0.0f, 2),
+        Row(-0.0f, 2)))
+
+    // test with complicated window partition keys.
+    val windowSpec1 = Window.partitionBy(array("f"), struct("d"))
+    checkAnswer(
+      df.select($"f", count(lit(1)).over(windowSpec1)),
+      Seq(
+        Row(Float.NaN, 2),
+        Row(0.0f/0.0f, 2),
+        Row(0.0f, 2),
+        Row(-0.0f, 2)))
+
+    val windowSpec2 = Window.partitionBy(array(struct("f")), struct(array("d")))
+    checkAnswer(
+      df.select($"f", count(lit(1)).over(windowSpec2)),
+      Seq(
+        Row(Float.NaN, 2),
+        Row(0.0f/0.0f, 2),
+        Row(0.0f, 2),
+        Row(-0.0f, 2)))
+
+    // test with df with complicated-type columns.
+    val df2 = Seq(
+      (Array(-0.0f, 0.0f), Tuple2(-0.0d, Double.NaN), Seq(Tuple2(-0.0d, Double.NaN))),
+      (Array(0.0f, -0.0f), Tuple2(0.0d, Double.NaN), Seq(Tuple2(0.0d, 0.0/0.0)))
+    ).toDF("arr", "stru", "arrOfStru")
+    val windowSpec3 = Window.partitionBy("arr", "stru", "arrOfStru")
+    checkAnswer(
+      df2.select($"arr", $"stru", $"arrOfStru", count(lit(1)).over(windowSpec3)),
+      Seq(
+        Row(Seq(-0.0f, 0.0f), Row(-0.0d, Double.NaN), Seq(Row(-0.0d, Double.NaN)), 2),
+        Row(Seq(0.0f, -0.0f), Row(0.0d, Double.NaN), Seq(Row(0.0d, 0.0/0.0)), 2)))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index 0ded5d8ce1e2..4d7037f36b1f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -397,27 +397,48 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSQLContext {
   test("special floating point values") {
     import org.scalatest.exceptions.TestFailedException
 
-    // Spark treats -0.0 as 0.0
+    // Spark distinguishes -0.0 and 0.0
     intercept[TestFailedException] {
-      checkDataset(Seq(-0.0d).toDS(), -0.0d)
+      checkDataset(Seq(-0.0d).toDS(), 0.0d)
     }
     intercept[TestFailedException] {
-      checkDataset(Seq(-0.0f).toDS(), -0.0f)
+      checkAnswer(Seq(-0.0d).toDF(), Row(0.0d))
     }
     intercept[TestFailedException] {
-      checkDataset(Seq(Tuple1(-0.0)).toDS(), Tuple1(-0.0))
+      checkDataset(Seq(-0.0f).toDS(), 0.0f)
     }
+    intercept[TestFailedException] {
+      checkAnswer(Seq(-0.0f).toDF(), Row(0.0f))
+    }
+    intercept[TestFailedException] {
+      checkDataset(Seq(Tuple1(-0.0)).toDS(), Tuple1(0.0))
+    }
+    intercept[TestFailedException] {
+      checkAnswer(Seq(Tuple1(-0.0)).toDF(), Row(Row(0.0)))
+    }
+    intercept[TestFailedException] {
+      checkDataset(Seq(Seq(-0.0)).toDS(), Seq(0.0))
+    }
+    intercept[TestFailedException] {
+      checkAnswer(Seq(Seq(-0.0)).toDF(), Row(Seq(0.0)))
+    }
+
+    val floats = Seq[Float](-0.0f, 0.0f, Float.NaN)
+    checkDataset(floats.toDS(), floats: _*)
+
+    val arrayOfFloats = Seq[Array[Float]](Array(0.0f, -0.0f), Array(-0.0f, Float.NaN))
+    checkDataset(arrayOfFloats.toDS(), arrayOfFloats: _*)
 
-    val floats = Seq[Float](-0.0f, 0.0f, Float.NaN).toDS()
-    checkDataset(floats, 0.0f, 0.0f, Float.NaN)
+    val doubles = Seq[Double](-0.0d, 0.0d, Double.NaN)
+    checkDataset(doubles.toDS(), doubles: _*)
 
-    val doubles = Seq[Double](-0.0d, 0.0d, Double.NaN).toDS()
-    checkDataset(doubles, 0.0, 0.0, Double.NaN)
+    val arrayOfDoubles = Seq[Array[Double]](Array(0.0d, -0.0d), Array(-0.0d, Double.NaN))
+    checkDataset(arrayOfDoubles.toDS(), arrayOfDoubles: _*)
 
-    checkDataset(Seq(Tuple1(Float.NaN)).toDS(), Tuple1(Float.NaN))
-    checkDataset(Seq(Tuple1(-0.0f)).toDS(), Tuple1(0.0f))
-    checkDataset(Seq(Tuple1(Double.NaN)).toDS(), Tuple1(Double.NaN))
-    checkDataset(Seq(Tuple1(-0.0)).toDS(), Tuple1(0.0))
+    val tuples = Seq[(Float, Float, Double, Double)](
+      (0.0f, -0.0f, 0.0d, -0.0d),
+      (-0.0f, Float.NaN, -0.0d, Double.NaN))
+    checkDataset(tuples.toDS(), tuples: _*)
 
     val complex = Map(Array(Seq(Tuple1(Double.NaN))) -> Map(Tuple2(Float.NaN, null)))
     checkDataset(Seq(complex).toDS(), complex)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 91445c8d96d8..81cc95847a79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -909,4 +909,64 @@ class JoinSuite extends QueryTest with SharedSQLContext {
       checkAnswer(df, Row(1, 100, 42, 200, 1, 42))
     }
   }
+
+  test("NaN and -0.0 in join keys") {
+    withTempView("v1", "v2", "v3", "v4") {
+      Seq(Float.NaN -> Double.NaN, 0.0f -> 0.0, -0.0f -> -0.0).toDF("f", "d").createTempView("v1")
+      Seq(Float.NaN -> Double.NaN, 0.0f -> 0.0, -0.0f -> -0.0).toDF("f", "d").createTempView("v2")
+
+      checkAnswer(
+        sql(
+          """
+            |SELECT v1.f, v1.d, v2.f, v2.d
+            |FROM v1 JOIN v2
+            |ON v1.f = v2.f AND v1.d = v2.d
+          """.stripMargin),
+        Seq(
+          Row(Float.NaN, Double.NaN, Float.NaN, Double.NaN),
+          Row(0.0f, 0.0, 0.0f, 0.0),
+          Row(0.0f, 0.0, -0.0f, -0.0),
+          Row(-0.0f, -0.0, 0.0f, 0.0),
+          Row(-0.0f, -0.0, -0.0f, -0.0)))
+
+      // test with complicated join keys.
+      checkAnswer(
+        sql(
+          """
+            |SELECT v1.f, v1.d, v2.f, v2.d
+            |FROM v1 JOIN v2
+            |ON
+            |  array(v1.f) = array(v2.f) AND
+            |  struct(v1.d) = struct(v2.d) AND
+            |  array(struct(v1.f, v1.d)) = array(struct(v2.f, v2.d)) AND
+            |  struct(array(v1.f), array(v1.d)) = struct(array(v2.f), array(v2.d))
+          """.stripMargin),
+        Seq(
+          Row(Float.NaN, Double.NaN, Float.NaN, Double.NaN),
+          Row(0.0f, 0.0, 0.0f, 0.0),
+          Row(0.0f, 0.0, -0.0f, -0.0),
+          Row(-0.0f, -0.0, 0.0f, 0.0),
+          Row(-0.0f, -0.0, -0.0f, -0.0)))
+
+      // test with tables with complicated-type columns.
+      Seq((Array(-0.0f, 0.0f), Tuple2(-0.0d, Double.NaN), Seq(Tuple2(-0.0d, Double.NaN))))
+        .toDF("arr", "stru", "arrOfStru").createTempView("v3")
+      Seq((Array(0.0f, -0.0f), Tuple2(0.0d, 0.0/0.0), Seq(Tuple2(0.0d, 0.0/0.0))))
+        .toDF("arr", "stru", "arrOfStru").createTempView("v4")
+      checkAnswer(
+        sql(
+          """
+            |SELECT v3.arr, v3.stru, v3.arrOfStru, v4.arr, v4.stru, v4.arrOfStru
+            |FROM v3 JOIN v4
+            |ON v3.arr = v4.arr AND v3.stru = v4.stru AND v3.arrOfStru = v4.arrOfStru
+          """.stripMargin),
+        Seq(Row(
+          Seq(-0.0f, 0.0f),
+          Row(-0.0d, Double.NaN),
+          Seq(Row(-0.0d, Double.NaN)),
+          Seq(0.0f, -0.0f),
+          Row(0.0d, 0.0/0.0),
+          Seq(Row(0.0d, 0.0/0.0)))))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index a547676c5ed5..cf25f1ce910d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -64,7 +64,7 @@ abstract class QueryTest extends PlanTest {
       expectedAnswer: T*): Unit = {
     val result = getResult(ds)
 
-    if (!compare(result.toSeq, expectedAnswer)) {
+    if (!QueryTest.compare(result.toSeq, expectedAnswer)) {
       fail(
         s"""
            |Decoded objects do not match expected objects:
@@ -84,7 +84,7 @@ abstract class QueryTest extends PlanTest {
       expectedAnswer: T*): Unit = {
     val result = getResult(ds)
 
-    if (!compare(result.toSeq.sorted, expectedAnswer.sorted)) {
+    if (!QueryTest.compare(result.toSeq.sorted, expectedAnswer.sorted)) {
       fail(
         s"""
            |Decoded objects do not match expected objects:
@@ -124,24 +124,6 @@ abstract class QueryTest extends PlanTest {
     }
   }
 
-  private def compare(obj1: Any, obj2: Any): Boolean = (obj1, obj2) match {
-    case (null, null) => true
-    case (null, _) => false
-    case (_, null) => false
-    case (a: Array[_], b: Array[_]) =>
-      a.length == b.length && a.zip(b).forall { case (l, r) => compare(l, r)}
-    case (a: Iterable[_], b: Iterable[_]) =>
-      a.size == b.size && a.zip(b).forall { case (l, r) => compare(l, r)}
-    case (a: Product, b: Product) =>
-      compare(a.productIterator.toSeq, b.productIterator.toSeq)
-    // 0.0 == -0.0, turn float/double to binary before comparison, to distinguish 0.0 and -0.0.
-    case (a: Double, b: Double) =>
-      java.lang.Double.doubleToRawLongBits(a) == java.lang.Double.doubleToRawLongBits(b)
-    case (a: Float, b: Float) =>
-      java.lang.Float.floatToRawIntBits(a) == java.lang.Float.floatToRawIntBits(b)
-    case (a, b) => a == b
-  }
-
   /**
    * Runs the plan and makes sure the answer matches the expected result.
    *
@@ -310,9 +292,6 @@ object QueryTest {
       // Convert array to Seq for easy equality check.
       case b: Array[_] => b.toSeq
       case r: Row => prepareRow(r)
-      // spark treats -0.0 as 0.0
-      case d: Double if d == -0.0d => 0.0d
-      case f: Float if f == -0.0f => 0.0f
       case o => o
     })
   }
@@ -352,11 +331,35 @@ object QueryTest {
     None
   }
 
+  private def compare(obj1: Any, obj2: Any): Boolean = (obj1, obj2) match {
+    case (null, null) => true
+    case (null, _) => false
+    case (_, null) => false
+    case (a: Array[_], b: Array[_]) =>
+      a.length == b.length && a.zip(b).forall { case (l, r) => compare(l, r)}
+    case (a: Map[_, _], b: Map[_, _]) =>
+      val entries1 = a.iterator.toSeq.sortBy(_.toString())
+      val entries2 = b.iterator.toSeq.sortBy(_.toString())
+      compare(entries1, entries2)
+    case (a: Iterable[_], b: Iterable[_]) =>
+      a.size == b.size && a.zip(b).forall { case (l, r) => compare(l, r)}
+    case (a: Product, b: Product) =>
+      compare(a.productIterator.toSeq, b.productIterator.toSeq)
+    case (a: Row, b: Row) =>
+      compare(a.toSeq, b.toSeq)
+    // 0.0 == -0.0, turn float/double to bits before comparison, to distinguish 0.0 and -0.0.
+    case (a: Double, b: Double) =>
+      java.lang.Double.doubleToRawLongBits(a) == java.lang.Double.doubleToRawLongBits(b)
+    case (a: Float, b: Float) =>
+      java.lang.Float.floatToRawIntBits(a) == java.lang.Float.floatToRawIntBits(b)
+    case (a, b) => a == b
+  }
+
   def sameRows(
       expectedAnswer: Seq[Row],
       sparkAnswer: Seq[Row],
       isSorted: Boolean = false): Option[String] = {
-    if (prepareAnswer(expectedAnswer, isSorted) != prepareAnswer(sparkAnswer, isSorted)) {
+    if (!compare(prepareAnswer(expectedAnswer, isSorted), prepareAnswer(sparkAnswer, isSorted))) {
       return Some(genError(expectedAnswer, sparkAnswer, isSorted))
     }
     None
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index cfae2d82e273..ecd428780c67 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -594,7 +594,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
           |  max(distinct value1)
           |FROM agg2
         """.stripMargin),
-      Row(-60, 70.0, 101.0/9.0, 5.6, 100))
+      Row(-60, 70, 101.0/9.0, 5.6, 100))
 
     checkAnswer(
       spark.sql(

From 2d01bccbd4c93bfbfa1a9e618fcb795b7106f01c Mon Sep 17 00:00:00 2001
From: maryannxue <maryannxue@apache.org>
Date: Wed, 9 Jan 2019 14:31:26 -0800
Subject: [PATCH 0264/1072] [SPARK-26065][FOLLOW-UP][SQL] Fix the Failure when
 having two Consecutive Hints

## What changes were proposed in this pull request?

This is to fix a bug in https://github.com/apache/spark/pull/23036, which would lead to an exception in case of two consecutive hints.

## How was this patch tested?

Added a new test.

Closes #23501 from maryannxue/query-hint-followup.

Authored-by: maryannxue <maryannxue@apache.org>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/EliminateResolvedHint.scala   | 2 +-
 .../test/scala/org/apache/spark/sql/JoinHintSuite.scala  | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
index bbe4eee4b432..a136f0493699 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
@@ -34,7 +34,7 @@ object EliminateResolvedHint extends Rule[LogicalPlan] {
         val rightHint = mergeHints(collectHints(j.right))
         j.copy(hint = JoinHint(leftHint, rightHint))
     }
-    pulledUp.transform {
+    pulledUp.transformUp {
       case h: ResolvedHint => h.child
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
index 3652895ff43d..55f210cb04db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
@@ -190,4 +190,13 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
         Some(HintInfo(broadcast = true))) :: Nil
     )
   }
+
+  test("nested hint") {
+    verifyJoinHint(
+      df.hint("broadcast").hint("broadcast").filter('id > 2).join(df, "id"),
+      JoinHint(
+        Some(HintInfo(broadcast = true)),
+        None) :: Nil
+    )
+  }
 }

From 1a47233f998cd26bac06fa5529a1755a3758d198 Mon Sep 17 00:00:00 2001
From: Jamison Bennett <jamison.bennett@gmail.com>
Date: Thu, 10 Jan 2019 10:23:03 +0800
Subject: [PATCH 0265/1072] [SPARK-26493][SQL] Allow multiple
 spark.sql.extensions

## What changes were proposed in this pull request?

Allow multiple spark.sql.extensions to be specified in the
configuration.

## How was this patch tested?

New tests are added.

Closes #23398 from jamisonbennett/SPARK-26493.

Authored-by: Jamison Bennett <jamison.bennett@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  11 +-
 .../spark/sql/internal/StaticSQLConf.scala    |  10 +-
 .../org/apache/spark/sql/SparkSession.scala   |  11 +-
 .../spark/sql/SparkSessionExtensions.scala    |  24 ++-
 .../sql/SparkSessionExtensionSuite.scala      | 167 ++++++++++++++++--
 5 files changed, 198 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index c79f9906d266..befc02f1a8c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -25,6 +25,7 @@ import scala.language.existentials
 import scala.reflect.ClassTag
 import scala.util.{Failure, Success, Try}
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
@@ -87,7 +88,7 @@ trait FunctionRegistry {
   override def clone(): FunctionRegistry = throw new CloneNotSupportedException()
 }
 
-class SimpleFunctionRegistry extends FunctionRegistry {
+class SimpleFunctionRegistry extends FunctionRegistry with Logging {
 
   @GuardedBy("this")
   private val functionBuilders =
@@ -103,7 +104,13 @@ class SimpleFunctionRegistry extends FunctionRegistry {
       name: FunctionIdentifier,
       info: ExpressionInfo,
       builder: FunctionBuilder): Unit = synchronized {
-    functionBuilders.put(normalizeFuncName(name), (info, builder))
+    val normalizedName = normalizeFuncName(name)
+    val newFunction = (info, builder)
+    functionBuilders.put(normalizedName, newFunction) match {
+      case Some(previousFunction) if previousFunction != newFunction =>
+        logWarning(s"The function $normalizedName replaced a previously registered function.")
+      case _ =>
+    }
   }
 
   override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index d9c354b165e5..0a8dc2835ea4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -99,9 +99,15 @@ object StaticSQLConf {
       .createWithDefault(false)
 
   val SPARK_SESSION_EXTENSIONS = buildStaticConf("spark.sql.extensions")
-    .doc("Name of the class used to configure Spark Session extensions. The class should " +
-      "implement Function1[SparkSessionExtension, Unit], and must have a no-args constructor.")
+    .doc("A comma-separated list of classes that implement " +
+      "Function1[SparkSessionExtension, Unit] used to configure Spark Session extensions. The " +
+      "classes must have a no-args constructor. If multiple extensions are specified, they are " +
+      "applied in the specified order. For the case of rules and planner strategies, they are " +
+      "applied in the specified order. For the case of parsers, the last parser is used and each " +
+      "parser can delegate to its predecessor. For the case of function name conflicts, the last " +
+      "registered function name is used.")
     .stringConf
+    .toSequence
     .createOptional
 
   val QUERY_EXECUTION_LISTENERS = buildStaticConf("spark.sql.queryExecutionListeners")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 26272c390668..1c13a6819fe5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -93,7 +93,7 @@ class SparkSession private(
   private[sql] def this(sc: SparkContext) {
     this(sc, None, None,
       SparkSession.applyExtensions(
-        sc.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS),
+        sc.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty),
         new SparkSessionExtensions))
   }
 
@@ -950,7 +950,7 @@ object SparkSession extends Logging {
         }
 
         applyExtensions(
-          sparkContext.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS),
+          sparkContext.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty),
           extensions)
 
         session = new SparkSession(sparkContext, None, None, extensions)
@@ -1138,14 +1138,13 @@ object SparkSession extends Logging {
   }
 
   /**
-   * Initialize extensions for given extension classname. This class will be applied to the
+   * Initialize extensions for given extension classnames. The classes will be applied to the
    * extensions passed into this function.
    */
   private def applyExtensions(
-      extensionOption: Option[String],
+      extensionConfClassNames: Seq[String],
       extensions: SparkSessionExtensions): SparkSessionExtensions = {
-    if (extensionOption.isDefined) {
-      val extensionConfClassName = extensionOption.get
+    extensionConfClassNames.foreach { extensionConfClassName =>
       try {
         val extensionConfClass = Utils.classForName(extensionConfClassName)
         val extensionConf = extensionConfClass.getConstructor().newInstance()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index 5ed76789786b..66becf37cb6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -44,12 +44,12 @@ import org.apache.spark.sql.catalyst.rules.Rule
  * <li>(External) Catalog listeners.</li>
  * </ul>
  *
- * The extensions can be used by calling withExtension on the [[SparkSession.Builder]], for
+ * The extensions can be used by calling `withExtensions` on the [[SparkSession.Builder]], for
  * example:
  * {{{
  *   SparkSession.builder()
  *     .master("...")
- *     .conf("...", true)
+ *     .config("...", true)
  *     .withExtensions { extensions =>
  *       extensions.injectResolutionRule { session =>
  *         ...
@@ -61,6 +61,26 @@ import org.apache.spark.sql.catalyst.rules.Rule
  *     .getOrCreate()
  * }}}
  *
+ * The extensions can also be used by setting the Spark SQL configuration property
+ * `spark.sql.extensions`. Multiple extensions can be set using a comma-separated list. For example:
+ * {{{
+ *   SparkSession.builder()
+ *     .master("...")
+ *     .config("spark.sql.extensions", "org.example.MyExtensions")
+ *     .getOrCreate()
+ *
+ *   class MyExtensions extends Function1[SparkSessionExtensions, Unit] {
+ *     override def apply(extensions: SparkSessionExtensions): Unit = {
+ *       extensions.injectResolutionRule { session =>
+ *         ...
+ *       }
+ *       extensions.injectParser { (session, parser) =>
+ *         ...
+ *       }
+ *     }
+ *   }
+ * }}}
+ *
  * Note that none of the injected builders should assume that the [[SparkSession]] is fully
  * initialized and should not touch the session's internals (e.g. the SessionState).
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 234711ecdb66..9f33feb1950c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.types.{DataType, IntegerType, StructType}
  */
 class SparkSessionExtensionSuite extends SparkFunSuite {
   type ExtensionsBuilder = SparkSessionExtensions => Unit
-  private def create(builder: ExtensionsBuilder): ExtensionsBuilder = builder
+  private def create(builder: ExtensionsBuilder): Seq[ExtensionsBuilder] = Seq(builder)
 
   private def stop(spark: SparkSession): Unit = {
     spark.stop()
@@ -38,55 +38,71 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
     SparkSession.clearDefaultSession()
   }
 
-  private def withSession(builder: ExtensionsBuilder)(f: SparkSession => Unit): Unit = {
-    val spark = SparkSession.builder().master("local[1]").withExtensions(builder).getOrCreate()
+  private def withSession(builders: Seq[ExtensionsBuilder])(f: SparkSession => Unit): Unit = {
+    val builder = SparkSession.builder().master("local[1]")
+    builders.foreach(builder.withExtensions)
+    val spark = builder.getOrCreate()
     try f(spark) finally {
       stop(spark)
     }
   }
 
   test("inject analyzer rule") {
-    withSession(_.injectResolutionRule(MyRule)) { session =>
+    withSession(Seq(_.injectResolutionRule(MyRule))) { session =>
       assert(session.sessionState.analyzer.extendedResolutionRules.contains(MyRule(session)))
     }
   }
 
+  test("inject post hoc resolution analyzer rule") {
+    withSession(Seq(_.injectPostHocResolutionRule(MyRule))) { session =>
+      assert(session.sessionState.analyzer.postHocResolutionRules.contains(MyRule(session)))
+    }
+  }
+
   test("inject check analysis rule") {
-    withSession(_.injectCheckRule(MyCheckRule)) { session =>
+    withSession(Seq(_.injectCheckRule(MyCheckRule))) { session =>
       assert(session.sessionState.analyzer.extendedCheckRules.contains(MyCheckRule(session)))
     }
   }
 
   test("inject optimizer rule") {
-    withSession(_.injectOptimizerRule(MyRule)) { session =>
+    withSession(Seq(_.injectOptimizerRule(MyRule))) { session =>
       assert(session.sessionState.optimizer.batches.flatMap(_.rules).contains(MyRule(session)))
     }
   }
 
   test("inject spark planner strategy") {
-    withSession(_.injectPlannerStrategy(MySparkStrategy)) { session =>
+    withSession(Seq(_.injectPlannerStrategy(MySparkStrategy))) { session =>
       assert(session.sessionState.planner.strategies.contains(MySparkStrategy(session)))
     }
   }
 
   test("inject parser") {
     val extension = create { extensions =>
-      extensions.injectParser((_, _) => CatalystSqlParser)
+      extensions.injectParser((_: SparkSession, _: ParserInterface) => CatalystSqlParser)
     }
     withSession(extension) { session =>
-      assert(session.sessionState.sqlParser == CatalystSqlParser)
+      assert(session.sessionState.sqlParser === CatalystSqlParser)
+    }
+  }
+
+  test("inject multiple rules") {
+    withSession(Seq(_.injectOptimizerRule(MyRule),
+        _.injectPlannerStrategy(MySparkStrategy))) { session =>
+      assert(session.sessionState.optimizer.batches.flatMap(_.rules).contains(MyRule(session)))
+      assert(session.sessionState.planner.strategies.contains(MySparkStrategy(session)))
     }
   }
 
   test("inject stacked parsers") {
     val extension = create { extensions =>
-      extensions.injectParser((_, _) => CatalystSqlParser)
+      extensions.injectParser((_: SparkSession, _: ParserInterface) => CatalystSqlParser)
       extensions.injectParser(MyParser)
       extensions.injectParser(MyParser)
     }
     withSession(extension) { session =>
       val parser = MyParser(session, MyParser(session, CatalystSqlParser))
-      assert(session.sessionState.sqlParser == parser)
+      assert(session.sessionState.sqlParser === parser)
     }
   }
 
@@ -108,12 +124,89 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
     try {
       assert(session.sessionState.planner.strategies.contains(MySparkStrategy(session)))
       assert(session.sessionState.analyzer.extendedResolutionRules.contains(MyRule(session)))
+      assert(session.sessionState.analyzer.postHocResolutionRules.contains(MyRule(session)))
+      assert(session.sessionState.analyzer.extendedCheckRules.contains(MyCheckRule(session)))
+      assert(session.sessionState.optimizer.batches.flatMap(_.rules).contains(MyRule(session)))
+      assert(session.sessionState.sqlParser.isInstanceOf[MyParser])
+      assert(session.sessionState.functionRegistry
+        .lookupFunction(MyExtensions.myFunction._1).isDefined)
+    } finally {
+      stop(session)
+    }
+  }
+
+  test("use multiple custom class for extensions in the specified order") {
+    val session = SparkSession.builder()
+      .master("local[1]")
+      .config("spark.sql.extensions", Seq(
+        classOf[MyExtensions2].getCanonicalName,
+        classOf[MyExtensions].getCanonicalName).mkString(","))
+      .getOrCreate()
+    try {
+      assert(session.sessionState.planner.strategies.containsSlice(
+        Seq(MySparkStrategy2(session), MySparkStrategy(session))))
+      val orderedRules = Seq(MyRule2(session), MyRule(session))
+      val orderedCheckRules = Seq(MyCheckRule2(session), MyCheckRule(session))
+      val parser = MyParser(session, CatalystSqlParser)
+      assert(session.sessionState.analyzer.extendedResolutionRules.containsSlice(orderedRules))
+      assert(session.sessionState.analyzer.postHocResolutionRules.containsSlice(orderedRules))
+      assert(session.sessionState.analyzer.extendedCheckRules.containsSlice(orderedCheckRules))
+      assert(session.sessionState.optimizer.batches.flatMap(_.rules).filter(orderedRules.contains)
+        .containsSlice(orderedRules ++ orderedRules)) // The optimizer rules are duplicated
+      assert(session.sessionState.sqlParser === parser)
+      assert(session.sessionState.functionRegistry
+        .lookupFunction(MyExtensions.myFunction._1).isDefined)
+      assert(session.sessionState.functionRegistry
+        .lookupFunction(MyExtensions2.myFunction._1).isDefined)
+    } finally {
+      stop(session)
+    }
+  }
+
+  test("allow an extension to be duplicated") {
+    val session = SparkSession.builder()
+      .master("local[1]")
+      .config("spark.sql.extensions", Seq(
+        classOf[MyExtensions].getCanonicalName,
+        classOf[MyExtensions].getCanonicalName).mkString(","))
+      .getOrCreate()
+    try {
+      assert(session.sessionState.planner.strategies.count(_ === MySparkStrategy(session)) === 2)
+      assert(session.sessionState.analyzer.extendedResolutionRules.count(_ === MyRule(session)) ===
+        2)
+      assert(session.sessionState.analyzer.postHocResolutionRules.count(_ === MyRule(session)) ===
+        2)
+      assert(session.sessionState.analyzer.extendedCheckRules.count(_ === MyCheckRule(session)) ===
+        2)
+      assert(session.sessionState.optimizer.batches.flatMap(_.rules)
+        .count(_ === MyRule(session)) === 4) // The optimizer rules are duplicated
+      val outerParser = session.sessionState.sqlParser
+      assert(outerParser.isInstanceOf[MyParser])
+      assert(outerParser.asInstanceOf[MyParser].delegate.isInstanceOf[MyParser])
       assert(session.sessionState.functionRegistry
         .lookupFunction(MyExtensions.myFunction._1).isDefined)
     } finally {
       stop(session)
     }
   }
+
+  test("use the last registered function name when there are duplicates") {
+    val session = SparkSession.builder()
+      .master("local[1]")
+      .config("spark.sql.extensions", Seq(
+        classOf[MyExtensions2].getCanonicalName,
+        classOf[MyExtensions2Duplicate].getCanonicalName).mkString(","))
+      .getOrCreate()
+    try {
+      val lastRegistered = session.sessionState.functionRegistry
+        .lookupFunction(FunctionIdentifier("myFunction2"))
+      assert(lastRegistered.isDefined)
+      assert(lastRegistered.get !== MyExtensions2.myFunction._2)
+      assert(lastRegistered.get === MyExtensions2Duplicate.myFunction._2)
+    } finally {
+      stop(session)
+    }
+  }
 }
 
 case class MyRule(spark: SparkSession) extends Rule[LogicalPlan] {
@@ -151,14 +244,62 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars
 object MyExtensions {
 
   val myFunction = (FunctionIdentifier("myFunction"),
-    new ExpressionInfo("noClass", "myDb", "myFunction", "usage", "extended usage" ),
-    (myArgs: Seq[Expression]) => Literal(5, IntegerType))
+    new ExpressionInfo("noClass", "myDb", "myFunction", "usage", "extended usage"),
+    (_: Seq[Expression]) => Literal(5, IntegerType))
 }
 
 class MyExtensions extends (SparkSessionExtensions => Unit) {
   def apply(e: SparkSessionExtensions): Unit = {
     e.injectPlannerStrategy(MySparkStrategy)
     e.injectResolutionRule(MyRule)
+    e.injectPostHocResolutionRule(MyRule)
+    e.injectCheckRule(MyCheckRule)
+    e.injectOptimizerRule(MyRule)
+    e.injectParser(MyParser)
     e.injectFunction(MyExtensions.myFunction)
   }
 }
+
+case class MyRule2(spark: SparkSession) extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan
+}
+
+case class MyCheckRule2(spark: SparkSession) extends (LogicalPlan => Unit) {
+  override def apply(plan: LogicalPlan): Unit = { }
+}
+
+case class MySparkStrategy2(spark: SparkSession) extends SparkStrategy {
+  override def apply(plan: LogicalPlan): Seq[SparkPlan] = Seq.empty
+}
+
+object MyExtensions2 {
+
+  val myFunction = (FunctionIdentifier("myFunction2"),
+    new ExpressionInfo("noClass", "myDb", "myFunction2", "usage", "extended usage"),
+    (_: Seq[Expression]) => Literal(5, IntegerType))
+}
+
+class MyExtensions2 extends (SparkSessionExtensions => Unit) {
+  def apply(e: SparkSessionExtensions): Unit = {
+    e.injectPlannerStrategy(MySparkStrategy2)
+    e.injectResolutionRule(MyRule2)
+    e.injectPostHocResolutionRule(MyRule2)
+    e.injectCheckRule(MyCheckRule2)
+    e.injectOptimizerRule(MyRule2)
+    e.injectParser((_: SparkSession, _: ParserInterface) => CatalystSqlParser)
+    e.injectFunction(MyExtensions2.myFunction)
+  }
+}
+
+object MyExtensions2Duplicate {
+
+  val myFunction = (FunctionIdentifier("myFunction2"),
+    new ExpressionInfo("noClass", "myDb", "myFunction2", "usage", "extended usage"),
+    (_: Seq[Expression]) => Literal(5, IntegerType))
+}
+
+class MyExtensions2Duplicate extends (SparkSessionExtensions => Unit) {
+  def apply(e: SparkSessionExtensions): Unit = {
+    e.injectFunction(MyExtensions2Duplicate.myFunction)
+  }
+}

From 73c7b126c6f477b38eba98232f2c8389a68676b8 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Thu, 10 Jan 2019 10:32:20 +0800
Subject: [PATCH 0266/1072] [SPARK-26546][SQL] Caching of
 java.time.format.DateTimeFormatter

## What changes were proposed in this pull request?

Added a cache for  java.time.format.DateTimeFormatter instances with keys consist of pattern and locale. This should allow to avoid parsing of timestamp/date patterns each time when new instance of `TimestampFormatter`/`DateFormatter` is created.

## How was this patch tested?

By existing test suites `TimestampFormatterSuite`/`DateFormatterSuite` and `JsonFunctionsSuite`/`JsonSuite`.

Closes #23462 from MaxGekk/time-formatter-caching.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/util/DateFormatter.scala     |  2 +-
 .../util/DateTimeFormatterHelper.scala        | 51 ++++++++++++++-----
 .../catalyst/util/TimestampFormatter.scala    |  2 +-
 3 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index b4c99674fc1c..db9255281488 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -36,7 +36,7 @@ class Iso8601DateFormatter(
     locale: Locale) extends DateFormatter with DateTimeFormatterHelper {
 
   @transient
-  private lazy val formatter = buildFormatter(pattern, locale)
+  private lazy val formatter = getOrCreateFormatter(pattern, locale)
   private val UTC = ZoneId.of("UTC")
 
   private def toInstant(s: String): Instant = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index 91cc57e0bb01..81ad6ad8ca40 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -23,9 +23,46 @@ import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverSt
 import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
 import java.util.Locale
 
+import com.google.common.cache.CacheBuilder
+
+import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._
+
 trait DateTimeFormatterHelper {
+  protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = {
+    val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) {
+      LocalTime.ofNanoOfDay(0)
+    } else {
+      LocalTime.from(temporalAccessor)
+    }
+    val localDate = LocalDate.from(temporalAccessor)
+    val localDateTime = LocalDateTime.of(localDate, localTime)
+    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
+    Instant.from(zonedDateTime)
+  }
+
+  // Gets a formatter from the cache or creates new one. The buildFormatter method can be called
+  // a few times with the same parameters in parallel if the cache does not contain values
+  // associated to those parameters. Since the formatter is immutable, it does not matter.
+  // In this way, synchronised is intentionally omitted in this method to make parallel calls
+  // less synchronised.
+  // The Cache.get method is not used here to avoid creation of additional instances of Callable.
+  protected def getOrCreateFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
+    val key = (pattern, locale)
+    var formatter = cache.getIfPresent(key)
+    if (formatter == null) {
+      formatter = buildFormatter(pattern, locale)
+      cache.put(key, formatter)
+    }
+    formatter
+  }
+}
 
-  protected def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
+private object DateTimeFormatterHelper {
+  val cache = CacheBuilder.newBuilder()
+    .maximumSize(128)
+    .build[(String, Locale), DateTimeFormatter]()
+
+  def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
     new DateTimeFormatterBuilder()
       .parseCaseInsensitive()
       .appendPattern(pattern)
@@ -38,16 +75,4 @@ trait DateTimeFormatterHelper {
       .withChronology(IsoChronology.INSTANCE)
       .withResolverStyle(ResolverStyle.STRICT)
   }
-
-  protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = {
-    val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) {
-      LocalTime.ofNanoOfDay(0)
-    } else {
-      LocalTime.from(temporalAccessor)
-    }
-    val localDate = LocalDate.from(temporalAccessor)
-    val localDateTime = LocalDateTime.of(localDate, localTime)
-    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
-    Instant.from(zonedDateTime)
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index b67b2d7cc3c5..8042099e5a92 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -51,7 +51,7 @@ class Iso8601TimestampFormatter(
     timeZone: TimeZone,
     locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
   @transient
-  private lazy val formatter = buildFormatter(pattern, locale)
+  private lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   private def toInstant(s: String): Instant = {
     val temporalAccessor = formatter.parse(s)

From b316ebf0c2bc1736cad5c429ec1eb71a4c987d87 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 9 Jan 2019 23:39:42 -0800
Subject: [PATCH 0267/1072] [SPARK-26491][K8S][FOLLOWUP] Fix compile failure

## What changes were proposed in this pull request?

This fixes the compilation error.

```
$ cd resource-managers/kubernetes/integration-tests
$ mvn test-compile
[ERROR] /Users/dongjoon/APACHE/spark/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala:71: type mismatch;
 found   : org.apache.spark.internal.config.OptionalConfigEntry[Boolean]
 required: String
[ERROR]       .set(IS_TESTING, false)
[ERROR]            ^
```

## How was this patch tested?

Pass the Jenkins K8S Integration test or Manual.

Closes #23505 from dongjoon-hyun/SPARK-26491.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../deploy/k8s/integrationtest/KubernetesTestComponents.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index cc8968394901..250eaab9e980 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -68,7 +68,7 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl
       .set("spark.executors.instances", "1")
       .set("spark.app.name", "spark-test-app")
       .set("spark.ui.enabled", "true")
-      .set(IS_TESTING, false)
+      .set(IS_TESTING.key, "false")
       .set("spark.kubernetes.submission.waitAppCompletion", "false")
       .set("spark.kubernetes.authenticate.driver.serviceAccountName", serviceAccountName)
   }

From c7daa95d7f095500b416ba405660f98cd2a39727 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Jan 2019 00:40:21 -0800
Subject: [PATCH 0268/1072] [SPARK-22128][CORE][BUILD] Add `paranamer`
 dependency to `core` module

## What changes were proposed in this pull request?

With Scala-2.12 profile, Spark application fails while Spark is okay. For example, our documented `SimpleApp` Java example succeeds to compile but it fails at runtime because it doesn't use `paranamer 2.8` and hits [SPARK-22128](https://issues.apache.org/jira/browse/SPARK-22128). This PR aims to declare it explicitly for the Spark applications. Note that this doesn't introduce new dependency to Spark itself.

https://dist.apache.org/repos/dist/dev/spark/3.0.0-SNAPSHOT-2019_01_09_13_59-e853afb-docs/_site/quick-start.html

The following is the dependency tree from the Spark application.

**BEFORE**
```
$ mvn dependency:tree -Dincludes=com.thoughtworks.paranamer
[INFO] --- maven-dependency-plugin:2.8:tree (default-cli)  simple ---
[INFO] my.test:simple:jar:1.0-SNAPSHOT
[INFO] \- org.apache.spark:spark-sql_2.12:jar:3.0.0-SNAPSHOT:compile
[INFO]    \- org.apache.spark:spark-core_2.12:jar:3.0.0-SNAPSHOT:compile
[INFO]       \- org.apache.avro:avro:jar:1.8.2:compile
[INFO]          \- com.thoughtworks.paranamer:paranamer:jar:2.7:compile
```

**AFTER**
```
[INFO] --- maven-dependency-plugin:2.8:tree (default-cli)  simple ---
[INFO] my.test:simple:jar:1.0-SNAPSHOT
[INFO] \- org.apache.spark:spark-sql_2.12:jar:3.0.0-SNAPSHOT:compile
[INFO]    \- org.apache.spark:spark-core_2.12:jar:3.0.0-SNAPSHOT:compile
[INFO]       \- com.thoughtworks.paranamer:paranamer:jar:2.8:compile
```

## How was this patch tested?

Pass the Jenkins. And manually test with the sample app is running.

Closes #23502 from dongjoon-hyun/SPARK-26583.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 core/pom.xml | 4 ++++
 pom.xml      | 1 +
 2 files changed, 5 insertions(+)

diff --git a/core/pom.xml b/core/pom.xml
index 49b1a54e3259..1cd1ad9725d7 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -33,6 +33,10 @@
   <name>Spark Project Core</name>
   <url>http://spark.apache.org/</url>
   <dependencies>
+    <dependency>
+      <groupId>com.thoughtworks.paranamer</groupId>
+      <artifactId>paranamer</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
diff --git a/pom.xml b/pom.xml
index 245344a82693..de14d6add84e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1949,6 +1949,7 @@
         <groupId>com.thoughtworks.paranamer</groupId>
         <artifactId>paranamer</artifactId>
         <version>${paranamer.version}</version>
+        <scope>runtime</scope>
       </dependency>
       <dependency>
         <groupId>org.apache.arrow</groupId>

From 6955638eae99cbe0a890a50e0c61c17641e7269f Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 10 Jan 2019 20:15:25 +0900
Subject: [PATCH 0269/1072] [SPARK-26459][SQL] replace
 UpdateNullabilityInAttributeReferences with FixNullability

## What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/18576

The newly added rule `UpdateNullabilityInAttributeReferences` does the same thing the `FixNullability` does, we only need to keep one of them.

This PR removes `UpdateNullabilityInAttributeReferences`, and use `FixNullability` to replace it. Also rename it to `UpdateAttributeNullability`

## How was this patch tested?

existing tests

Closes #23390 from cloud-fan/nullable.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 38 +------------
 .../analysis/UpdateAttributeNullability.scala | 57 +++++++++++++++++++
 .../sql/catalyst/optimizer/Optimizer.scala    | 18 +-----
 ...ttributeNullabilityInOptimizerSuite.scala} |  9 +--
 4 files changed, 65 insertions(+), 57 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/{UpdateNullabilityInAttributeReferencesSuite.scala => UpdateAttributeNullabilityInOptimizerSuite.scala} (89%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 2aa0f2117364..a84bb7653c52 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -197,8 +197,8 @@ class Analyzer(
       PullOutNondeterministic),
     Batch("UDF", Once,
       HandleNullInputsForUDF),
-    Batch("FixNullability", Once,
-      FixNullability),
+    Batch("UpdateNullability", Once,
+      UpdateAttributeNullability),
     Batch("Subquery", Once,
       UpdateOuterReferences),
     Batch("Cleanup", fixedPoint,
@@ -1821,40 +1821,6 @@ class Analyzer(
     }
   }
 
-  /**
-   * Fixes nullability of Attributes in a resolved LogicalPlan by using the nullability of
-   * corresponding Attributes of its children output Attributes. This step is needed because
-   * users can use a resolved AttributeReference in the Dataset API and outer joins
-   * can change the nullability of an AttribtueReference. Without the fix, a nullable column's
-   * nullable field can be actually set as non-nullable, which cause illegal optimization
-   * (e.g., NULL propagation) and wrong answers.
-   * See SPARK-13484 and SPARK-13801 for the concrete queries of this case.
-   */
-  object FixNullability extends Rule[LogicalPlan] {
-
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
-      case p if !p.resolved => p // Skip unresolved nodes.
-      case p: LogicalPlan if p.resolved =>
-        val childrenOutput = p.children.flatMap(c => c.output).groupBy(_.exprId).flatMap {
-          case (exprId, attributes) =>
-            // If there are multiple Attributes having the same ExprId, we need to resolve
-            // the conflict of nullable field. We do not really expect this happen.
-            val nullable = attributes.exists(_.nullable)
-            attributes.map(attr => attr.withNullability(nullable))
-        }.toSeq
-        // At here, we create an AttributeMap that only compare the exprId for the lookup
-        // operation. So, we can find the corresponding input attribute's nullability.
-        val attributeMap = AttributeMap[Attribute](childrenOutput.map(attr => attr -> attr))
-        // For an Attribute used by the current LogicalPlan, if it is from its children,
-        // we fix the nullable field by using the nullability setting of the corresponding
-        // output Attribute from the children.
-        p.transformExpressions {
-          case attr: Attribute if attributeMap.contains(attr) =>
-            attr.withNullability(attributeMap(attr).nullable)
-        }
-    }
-  }
-
   /**
    * Extracts [[WindowExpression]]s from the projectList of a [[Project]] operator and
    * aggregateExpressions of an [[Aggregate]] operator and creates individual [[Window]]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
new file mode 100644
index 000000000000..8655decdcf27
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * Updates nullability of Attributes in a resolved LogicalPlan by using the nullability of
+ * corresponding Attributes of its children output Attributes. This step is needed because
+ * users can use a resolved AttributeReference in the Dataset API and outer joins
+ * can change the nullability of an AttribtueReference. Without this rule, a nullable column's
+ * nullable field can be actually set as non-nullable, which cause illegal optimization
+ * (e.g., NULL propagation) and wrong answers.
+ * See SPARK-13484 and SPARK-13801 for the concrete queries of this case.
+ *
+ * This rule should be executed again at the end of optimization phase, as optimizer may change
+ * some expressions and their nullabilities as well. See SPARK-21351 for more details.
+ */
+object UpdateAttributeNullability extends Rule[LogicalPlan] {
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
+    // Skip unresolved nodes.
+    case p if !p.resolved => p
+    // Skip leaf node, as it has no child and no need to update nullability.
+    case p: LeafNode => p
+    case p: LogicalPlan =>
+      val nullabilities = p.children.flatMap(c => c.output).groupBy(_.exprId).map {
+        // If there are multiple Attributes having the same ExprId, we need to resolve
+        // the conflict of nullable field. We do not really expect this to happen.
+        case (exprId, attributes) => exprId -> attributes.exists(_.nullable)
+      }
+      // For an Attribute used by the current LogicalPlan, if it is from its children,
+      // we fix the nullable field by using the nullability setting of the corresponding
+      // output Attribute from the children.
+      p.transformExpressions {
+        case attr: Attribute if nullabilities.contains(attr.exprId) =>
+          attr.withNullability(nullabilities(attr.exprId))
+      }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d51dc6663d43..d92f7f860b1b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -179,8 +179,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       ColumnPruning,
       CollapseProject,
       RemoveNoopOperators) :+
-    Batch("UpdateAttributeReferences", Once,
-      UpdateNullabilityInAttributeReferences) :+
+    Batch("UpdateNullability", Once, UpdateAttributeNullability) :+
     // This batch must be executed after the `RewriteSubquery` batch, which creates joins.
     Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers)
   }
@@ -1647,18 +1646,3 @@ object RemoveRepetitionFromGroupExpressions extends Rule[LogicalPlan] {
       }
   }
 }
-
-/**
- * Updates nullability in [[AttributeReference]]s if nullability is different between
- * non-leaf plan's expressions and the children output.
- */
-object UpdateNullabilityInAttributeReferences extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-    case p if !p.isInstanceOf[LeafNode] =>
-      val nullabilityMap = AttributeMap(p.children.flatMap(_.output).map { x => x -> x.nullable })
-      p transformExpressions {
-        case ar: AttributeReference if nullabilityMap.contains(ar) =>
-          ar.withNullability(nullabilityMap(ar))
-      }
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateNullabilityInAttributeReferencesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateAttributeNullabilityInOptimizerSuite.scala
similarity index 89%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateNullabilityInAttributeReferencesSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateAttributeNullabilityInOptimizerSuite.scala
index 09b11f5aba2a..6d6f799b830f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateNullabilityInAttributeReferencesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateAttributeNullabilityInOptimizerSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import org.apache.spark.sql.catalyst.analysis.UpdateAttributeNullability
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{CreateArray, GetArrayItem}
@@ -25,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 
-class UpdateNullabilityInAttributeReferencesSuite extends PlanTest {
+class UpdateAttributeNullabilityInOptimizerSuite extends PlanTest {
 
   object Optimizer extends RuleExecutor[LogicalPlan] {
     val batches =
@@ -36,8 +37,8 @@ class UpdateNullabilityInAttributeReferencesSuite extends PlanTest {
           SimplifyConditionals,
           SimplifyBinaryComparison,
           SimplifyExtractValueOps) ::
-      Batch("UpdateAttributeReferences", Once,
-        UpdateNullabilityInAttributeReferences) :: Nil
+      Batch("UpdateNullability", Once,
+        UpdateAttributeNullability) :: Nil
   }
 
   test("update nullability in AttributeReference")  {
@@ -46,7 +47,7 @@ class UpdateNullabilityInAttributeReferencesSuite extends PlanTest {
     // nullable AttributeReference to `b`, because both array indexing and map lookup are
     // nullable expressions. After optimization, the same attribute is now non-nullable,
     // but the AttributeReference is not updated to reflect this. So, we need to update nullability
-    // by the `UpdateNullabilityInAttributeReferences` rule.
+    // by the `UpdateAttributeNullability` rule.
     val original = rel
       .select(GetArrayItem(CreateArray(Seq('a, 'a + 1L)), 0) as "b")
       .groupBy($"b")("1")

From 2f8a938805ce3c182d61bab8f66b9ff6d90dc83b Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Thu, 10 Jan 2019 08:57:44 -0600
Subject: [PATCH 0270/1072] [SPARK-26539][CORE] Remove
 spark.memory.useLegacyMode and StaticMemoryManager

## What changes were proposed in this pull request?

Remove spark.memory.useLegacyMode and StaticMemoryManager. Update tests that used the StaticMemoryManager to equivalent use of UnifiedMemoryManager.

## How was this patch tested?

Existing tests, with modifications to make them work with a different mem manager.

Closes #23457 from srowen/SPARK-26539.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/SparkConf.scala    |  27 +--
 .../scala/org/apache/spark/SparkEnv.scala     |  10 +-
 .../spark/memory/StaticMemoryManager.scala    | 154 --------------
 .../spark/memory/UnifiedMemoryManager.scala   |   2 +-
 .../org/apache/spark/memory/package.scala     |   9 +-
 .../spark/memory/TaskMemoryManagerSuite.java  |   2 +-
 .../spark/broadcast/BroadcastSuite.scala      |   6 +-
 .../memory/StaticMemoryManagerSuite.scala     | 192 ------------------
 .../BlockManagerReplicationSuite.scala        |   1 -
 .../spark/storage/BlockManagerSuite.scala     |   1 -
 .../spark/storage/MemoryStoreSuite.scala      |  13 +-
 .../ExternalAppendOnlyMapSuite.scala          |   7 +-
 .../util/collection/ExternalSorterSuite.scala |   7 +-
 docs/configuration.md                         |  45 ----
 .../sql/execution/joins/HashedRelation.scala  |   6 +-
 .../benchmark/AggregateBenchmark.scala        |   6 +-
 .../HashedRelationMetricsBenchmark.scala      |   4 +-
 .../execution/joins/HashedRelationSuite.scala |  12 +-
 .../streaming/ReceivedBlockHandlerSuite.scala |   6 +-
 19 files changed, 37 insertions(+), 473 deletions(-)
 delete mode 100644 core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
 delete mode 100644 core/src/test/scala/org/apache/spark/memory/StaticMemoryManagerSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 681e4378a4dd..f27df505fa5f 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -532,38 +532,13 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     }
 
     // Validate memory fractions
-    val deprecatedMemoryKeys = Seq(
-      "spark.storage.memoryFraction",
-      "spark.shuffle.memoryFraction",
-      "spark.shuffle.safetyFraction",
-      "spark.storage.unrollFraction",
-      "spark.storage.safetyFraction")
-    val memoryKeys = Seq(
-      "spark.memory.fraction",
-      "spark.memory.storageFraction") ++
-      deprecatedMemoryKeys
-    for (key <- memoryKeys) {
+    for (key <- Seq("spark.memory.fraction", "spark.memory.storageFraction")) {
       val value = getDouble(key, 0.5)
       if (value > 1 || value < 0) {
         throw new IllegalArgumentException(s"$key should be between 0 and 1 (was '$value').")
       }
     }
 
-    // Warn against deprecated memory fractions (unless legacy memory management mode is enabled)
-    val legacyMemoryManagementKey = "spark.memory.useLegacyMode"
-    val legacyMemoryManagement = getBoolean(legacyMemoryManagementKey, false)
-    if (!legacyMemoryManagement) {
-      val keyset = deprecatedMemoryKeys.toSet
-      val detected = settings.keys().asScala.filter(keyset.contains)
-      if (detected.nonEmpty) {
-        logWarning("Detected deprecated memory fraction settings: " +
-          detected.mkString("[", ", ", "]") + ". As of Spark 1.6, execution and storage " +
-          "memory management are unified. All memory fractions used in the old model are " +
-          "now deprecated and no longer read. If you wish to use the old memory management, " +
-          s"you may explicitly enable `$legacyMemoryManagementKey` (not recommended).")
-      }
-    }
-
     if (contains("spark.master") && get("spark.master").startsWith("yarn-")) {
       val warning = s"spark.master ${get("spark.master")} is deprecated in Spark 2.0+, please " +
         "instead use \"yarn\" with specified deploy mode."
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 9222781fa083..ba5ed8ab1f30 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -31,7 +31,7 @@ import org.apache.spark.api.python.PythonWorkerFactory
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.memory.{MemoryManager, StaticMemoryManager, UnifiedMemoryManager}
+import org.apache.spark.memory.{MemoryManager, UnifiedMemoryManager}
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.network.netty.NettyBlockTransferService
 import org.apache.spark.rpc.{RpcEndpoint, RpcEndpointRef, RpcEnv}
@@ -322,13 +322,7 @@ object SparkEnv extends Logging {
       shortShuffleMgrNames.getOrElse(shuffleMgrName.toLowerCase(Locale.ROOT), shuffleMgrName)
     val shuffleManager = instantiateClass[ShuffleManager](shuffleMgrClass)
 
-    val useLegacyMemoryManager = conf.getBoolean("spark.memory.useLegacyMode", false)
-    val memoryManager: MemoryManager =
-      if (useLegacyMemoryManager) {
-        new StaticMemoryManager(conf, numUsableCores)
-      } else {
-        UnifiedMemoryManager(conf, numUsableCores)
-      }
+    val memoryManager: MemoryManager = UnifiedMemoryManager(conf, numUsableCores)
 
     val blockManagerPort = if (isDriver) {
       conf.get(DRIVER_BLOCK_MANAGER_PORT)
diff --git a/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
deleted file mode 100644
index 7e052c02c937..000000000000
--- a/core/src/main/scala/org/apache/spark/memory/StaticMemoryManager.scala
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.memory
-
-import org.apache.spark.SparkConf
-import org.apache.spark.internal.config
-import org.apache.spark.internal.config.Tests.TEST_MEMORY
-import org.apache.spark.storage.BlockId
-
-/**
- * A [[MemoryManager]] that statically partitions the heap space into disjoint regions.
- *
- * The sizes of the execution and storage regions are determined through
- * `spark.shuffle.memoryFraction` and `spark.storage.memoryFraction` respectively. The two
- * regions are cleanly separated such that neither usage can borrow memory from the other.
- */
-private[spark] class StaticMemoryManager(
-    conf: SparkConf,
-    maxOnHeapExecutionMemory: Long,
-    override val maxOnHeapStorageMemory: Long,
-    numCores: Int)
-  extends MemoryManager(
-    conf,
-    numCores,
-    maxOnHeapStorageMemory,
-    maxOnHeapExecutionMemory) {
-
-  def this(conf: SparkConf, numCores: Int) {
-    this(
-      conf,
-      StaticMemoryManager.getMaxExecutionMemory(conf),
-      StaticMemoryManager.getMaxStorageMemory(conf),
-      numCores)
-  }
-
-  // The StaticMemoryManager does not support off-heap storage memory:
-  offHeapExecutionMemoryPool.incrementPoolSize(offHeapStorageMemoryPool.poolSize)
-  offHeapStorageMemoryPool.decrementPoolSize(offHeapStorageMemoryPool.poolSize)
-
-  // Max number of bytes worth of blocks to evict when unrolling
-  private val maxUnrollMemory: Long = {
-    (maxOnHeapStorageMemory * conf.getDouble("spark.storage.unrollFraction", 0.2)).toLong
-  }
-
-  override def maxOffHeapStorageMemory: Long = 0L
-
-  override def acquireStorageMemory(
-      blockId: BlockId,
-      numBytes: Long,
-      memoryMode: MemoryMode): Boolean = synchronized {
-    require(memoryMode != MemoryMode.OFF_HEAP,
-      "StaticMemoryManager does not support off-heap storage memory")
-    if (numBytes > maxOnHeapStorageMemory) {
-      // Fail fast if the block simply won't fit
-      logInfo(s"Will not store $blockId as the required space ($numBytes bytes) exceeds our " +
-        s"memory limit ($maxOnHeapStorageMemory bytes)")
-      false
-    } else {
-      onHeapStorageMemoryPool.acquireMemory(blockId, numBytes)
-    }
-  }
-
-  override def acquireUnrollMemory(
-      blockId: BlockId,
-      numBytes: Long,
-      memoryMode: MemoryMode): Boolean = synchronized {
-    require(memoryMode != MemoryMode.OFF_HEAP,
-      "StaticMemoryManager does not support off-heap unroll memory")
-    if (numBytes > maxOnHeapStorageMemory) {
-      // Fail fast if the block simply won't fit
-      logInfo(s"Will not store $blockId as the required space ($numBytes bytes) exceeds our " +
-        s"memory limit ($maxOnHeapStorageMemory bytes)")
-      false
-    } else {
-      val currentUnrollMemory = onHeapStorageMemoryPool.memoryStore.currentUnrollMemory
-      val freeMemory = onHeapStorageMemoryPool.memoryFree
-      // When unrolling, we will use all of the existing free memory, and, if necessary,
-      // some extra space freed from evicting cached blocks. We must place a cap on the
-      // amount of memory to be evicted by unrolling, however, otherwise unrolling one
-      // big block can blow away the entire cache.
-      val maxNumBytesToFree = math.max(0, maxUnrollMemory - currentUnrollMemory - freeMemory)
-      // Keep it within the range 0 <= X <= maxNumBytesToFree
-      val numBytesToFree = math.max(0, math.min(maxNumBytesToFree, numBytes - freeMemory))
-      onHeapStorageMemoryPool.acquireMemory(blockId, numBytes, numBytesToFree)
-    }
-  }
-
-  private[memory]
-  override def acquireExecutionMemory(
-      numBytes: Long,
-      taskAttemptId: Long,
-      memoryMode: MemoryMode): Long = synchronized {
-    memoryMode match {
-      case MemoryMode.ON_HEAP => onHeapExecutionMemoryPool.acquireMemory(numBytes, taskAttemptId)
-      case MemoryMode.OFF_HEAP => offHeapExecutionMemoryPool.acquireMemory(numBytes, taskAttemptId)
-    }
-  }
-}
-
-
-private[spark] object StaticMemoryManager {
-
-  private val MIN_MEMORY_BYTES = 32 * 1024 * 1024
-
-  /**
-   * Return the total amount of memory available for the storage region, in bytes.
-   */
-  private def getMaxStorageMemory(conf: SparkConf): Long = {
-    val systemMaxMemory = conf.get(TEST_MEMORY)
-    val memoryFraction = conf.getDouble("spark.storage.memoryFraction", 0.6)
-    val safetyFraction = conf.getDouble("spark.storage.safetyFraction", 0.9)
-    (systemMaxMemory * memoryFraction * safetyFraction).toLong
-  }
-
-  /**
-   * Return the total amount of memory available for the execution region, in bytes.
-   */
-  private def getMaxExecutionMemory(conf: SparkConf): Long = {
-    val systemMaxMemory = conf.get(TEST_MEMORY)
-
-    if (systemMaxMemory < MIN_MEMORY_BYTES) {
-      throw new IllegalArgumentException(s"System memory $systemMaxMemory must " +
-        s"be at least $MIN_MEMORY_BYTES. Please increase heap size using the --driver-memory " +
-        s"option or ${config.DRIVER_MEMORY.key} in Spark configuration.")
-    }
-    if (conf.contains(config.EXECUTOR_MEMORY)) {
-      val executorMemory = conf.getSizeAsBytes(config.EXECUTOR_MEMORY.key)
-      if (executorMemory < MIN_MEMORY_BYTES) {
-        throw new IllegalArgumentException(s"Executor memory $executorMemory must be at least " +
-          s"$MIN_MEMORY_BYTES. Please increase executor memory using the " +
-          s"--executor-memory option or ${config.EXECUTOR_MEMORY.key} in Spark configuration.")
-      }
-    }
-    val memoryFraction = conf.getDouble("spark.shuffle.memoryFraction", 0.2)
-    val safetyFraction = conf.getDouble("spark.shuffle.safetyFraction", 0.8)
-    (systemMaxMemory * memoryFraction * safetyFraction).toLong
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
index 7801bb87050f..a0fbbbdebd02 100644
--- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -46,7 +46,7 @@ import org.apache.spark.storage.BlockId
  *                          it if necessary. Cached blocks can be evicted only if actual
  *                          storage memory usage exceeds this region.
  */
-private[spark] class UnifiedMemoryManager private[memory] (
+private[spark] class UnifiedMemoryManager(
     conf: SparkConf,
     val maxHeapMemory: Long,
     onHeapStorageRegionSize: Long,
diff --git a/core/src/main/scala/org/apache/spark/memory/package.scala b/core/src/main/scala/org/apache/spark/memory/package.scala
index 3d00cd9cb637..7f782193f246 100644
--- a/core/src/main/scala/org/apache/spark/memory/package.scala
+++ b/core/src/main/scala/org/apache/spark/memory/package.scala
@@ -61,15 +61,10 @@ package org.apache.spark
  * }}}
  *
  *
- * There are two implementations of [[org.apache.spark.memory.MemoryManager]] which vary in how
- * they handle the sizing of their memory pools:
+ * There is one implementation of [[org.apache.spark.memory.MemoryManager]]:
  *
- *  - [[org.apache.spark.memory.UnifiedMemoryManager]], the default in Spark 1.6+, enforces soft
+ *  - [[org.apache.spark.memory.UnifiedMemoryManager]] enforces soft
  *    boundaries between storage and execution memory, allowing requests for memory in one region
  *    to be fulfilled by borrowing memory from the other.
- *  - [[org.apache.spark.memory.StaticMemoryManager]] enforces hard boundaries between storage
- *    and execution memory by statically partitioning Spark's memory and preventing storage and
- *    execution from borrowing memory from each other. This mode is retained only for legacy
- *    compatibility purposes.
  */
 package object memory
diff --git a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
index a0664b30d6cc..dc1fe774f796 100644
--- a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
+++ b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
@@ -29,7 +29,7 @@ public class TaskMemoryManagerSuite {
   @Test
   public void leakedPageMemoryIsDetected() {
     final TaskMemoryManager manager = new TaskMemoryManager(
-      new StaticMemoryManager(
+      new UnifiedMemoryManager(
         new SparkConf().set("spark.memory.offHeap.enabled", "false"),
         Long.MAX_VALUE,
         Long.MAX_VALUE,
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 9ad2e9a5e74a..6976464e8ab5 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -145,8 +145,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
   encryptionTest("Cache broadcast to disk") { conf =>
     conf.setMaster("local")
       .setAppName("test")
-      .set("spark.memory.useLegacyMode", "true")
-      .set("spark.storage.memoryFraction", "0.0")
+      .set("spark.memory.storageFraction", "0.0")
     sc = new SparkContext(conf)
     val list = List[Int](1, 2, 3, 4)
     val broadcast = sc.broadcast(list)
@@ -173,8 +172,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
     val conf = new SparkConf()
       .setMaster("local[4]")
       .setAppName("test")
-      .set("spark.memory.useLegacyMode", "true")
-      .set("spark.storage.memoryFraction", "0.0")
+      .set("spark.memory.storageFraction", "0.0")
 
     sc = new SparkContext(conf)
     val list = List[Int](1, 2, 3, 4)
diff --git a/core/src/test/scala/org/apache/spark/memory/StaticMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/StaticMemoryManagerSuite.scala
deleted file mode 100644
index c3275add50f4..000000000000
--- a/core/src/test/scala/org/apache/spark/memory/StaticMemoryManagerSuite.scala
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.memory
-
-import org.mockito.Mockito.when
-
-import org.apache.spark.SparkConf
-import org.apache.spark.internal.config.MEMORY_OFFHEAP_SIZE
-import org.apache.spark.internal.config.Tests.TEST_MEMORY
-import org.apache.spark.storage.TestBlockId
-import org.apache.spark.storage.memory.MemoryStore
-
-class StaticMemoryManagerSuite extends MemoryManagerSuite {
-  private val conf = new SparkConf().set("spark.storage.unrollFraction", "0.4")
-
-  /**
-   * Make a [[StaticMemoryManager]] and a [[MemoryStore]] with limited class dependencies.
-   */
-  private def makeThings(
-      maxExecutionMem: Long,
-      maxStorageMem: Long): (StaticMemoryManager, MemoryStore) = {
-    val mm = new StaticMemoryManager(
-      conf,
-      maxOnHeapExecutionMemory = maxExecutionMem,
-      maxOnHeapStorageMemory = maxStorageMem,
-      numCores = 1)
-    val ms = makeMemoryStore(mm)
-    (mm, ms)
-  }
-
-  override protected def createMemoryManager(
-      maxOnHeapExecutionMemory: Long,
-      maxOffHeapExecutionMemory: Long): StaticMemoryManager = {
-    new StaticMemoryManager(
-      conf.clone
-        .set("spark.memory.fraction", "1")
-        .set(TEST_MEMORY, maxOnHeapExecutionMemory)
-        .set(MEMORY_OFFHEAP_SIZE, maxOffHeapExecutionMemory),
-      maxOnHeapExecutionMemory = maxOnHeapExecutionMemory,
-      maxOnHeapStorageMemory = 0,
-      numCores = 1)
-  }
-
-  test("basic execution memory") {
-    val maxExecutionMem = 1000L
-    val taskAttemptId = 0L
-    val (mm, _) = makeThings(maxExecutionMem, Long.MaxValue)
-    val memoryMode = MemoryMode.ON_HEAP
-    assert(mm.executionMemoryUsed === 0L)
-    assert(mm.acquireExecutionMemory(10L, taskAttemptId, memoryMode) === 10L)
-    assert(mm.executionMemoryUsed === 10L)
-    assert(mm.acquireExecutionMemory(100L, taskAttemptId, memoryMode) === 100L)
-    // Acquire up to the max
-    assert(mm.acquireExecutionMemory(1000L, taskAttemptId, memoryMode) === 890L)
-    assert(mm.executionMemoryUsed === maxExecutionMem)
-    assert(mm.acquireExecutionMemory(1L, taskAttemptId, memoryMode) === 0L)
-    assert(mm.executionMemoryUsed === maxExecutionMem)
-    mm.releaseExecutionMemory(800L, taskAttemptId, memoryMode)
-    assert(mm.executionMemoryUsed === 200L)
-    // Acquire after release
-    assert(mm.acquireExecutionMemory(1L, taskAttemptId, memoryMode) === 1L)
-    assert(mm.executionMemoryUsed === 201L)
-    // Release beyond what was acquired
-    mm.releaseExecutionMemory(maxExecutionMem, taskAttemptId, memoryMode)
-    assert(mm.executionMemoryUsed === 0L)
-  }
-
-  test("basic storage memory") {
-    val maxStorageMem = 1000L
-    val dummyBlock = TestBlockId("you can see the world you brought to live")
-    val (mm, ms) = makeThings(Long.MaxValue, maxStorageMem)
-    val memoryMode = MemoryMode.ON_HEAP
-    assert(mm.storageMemoryUsed === 0L)
-    assert(mm.acquireStorageMemory(dummyBlock, 10L, memoryMode))
-    assertEvictBlocksToFreeSpaceNotCalled(ms)
-    assert(mm.storageMemoryUsed === 10L)
-
-    assert(mm.acquireStorageMemory(dummyBlock, 100L, memoryMode))
-    assertEvictBlocksToFreeSpaceNotCalled(ms)
-    assert(mm.storageMemoryUsed === 110L)
-    // Acquire more than the max, not granted
-    assert(!mm.acquireStorageMemory(dummyBlock, maxStorageMem + 1L, memoryMode))
-    assertEvictBlocksToFreeSpaceNotCalled(ms)
-    assert(mm.storageMemoryUsed === 110L)
-    // Acquire up to the max, requests after this are still granted due to LRU eviction
-    assert(mm.acquireStorageMemory(dummyBlock, maxStorageMem, memoryMode))
-    assertEvictBlocksToFreeSpaceCalled(ms, 110L)
-    assert(mm.storageMemoryUsed === 1000L)
-    assert(mm.acquireStorageMemory(dummyBlock, 1L, memoryMode))
-    assertEvictBlocksToFreeSpaceCalled(ms, 1L)
-    assert(evictedBlocks.nonEmpty)
-    evictedBlocks.clear()
-    // Note: We evicted 1 byte to put another 1-byte block in, so the storage memory used remains at
-    // 1000 bytes. This is different from real behavior, where the 1-byte block would have evicted
-    // the 1000-byte block entirely. This is set up differently so we can write finer-grained tests.
-    assert(mm.storageMemoryUsed === 1000L)
-    mm.releaseStorageMemory(800L, memoryMode)
-    assert(mm.storageMemoryUsed === 200L)
-    // Acquire after release
-    assert(mm.acquireStorageMemory(dummyBlock, 1L, memoryMode))
-    assertEvictBlocksToFreeSpaceNotCalled(ms)
-    assert(mm.storageMemoryUsed === 201L)
-    mm.releaseAllStorageMemory()
-    assert(mm.storageMemoryUsed === 0L)
-    assert(mm.acquireStorageMemory(dummyBlock, 1L, memoryMode))
-    assertEvictBlocksToFreeSpaceNotCalled(ms)
-    assert(mm.storageMemoryUsed === 1L)
-    // Release beyond what was acquired
-    mm.releaseStorageMemory(100L, memoryMode)
-    assert(mm.storageMemoryUsed === 0L)
-  }
-
-  test("execution and storage isolation") {
-    val maxExecutionMem = 200L
-    val maxStorageMem = 1000L
-    val taskAttemptId = 0L
-    val dummyBlock = TestBlockId("ain't nobody love like you do")
-    val (mm, ms) = makeThings(maxExecutionMem, maxStorageMem)
-    val memoryMode = MemoryMode.ON_HEAP
-    // Only execution memory should increase
-    assert(mm.acquireExecutionMemory(100L, taskAttemptId, memoryMode) === 100L)
-    assert(mm.storageMemoryUsed === 0L)
-    assert(mm.executionMemoryUsed === 100L)
-    assert(mm.acquireExecutionMemory(1000L, taskAttemptId, memoryMode) === 100L)
-    assert(mm.storageMemoryUsed === 0L)
-    assert(mm.executionMemoryUsed === 200L)
-    // Only storage memory should increase
-    assert(mm.acquireStorageMemory(dummyBlock, 50L, memoryMode))
-    assertEvictBlocksToFreeSpaceNotCalled(ms)
-    assert(mm.storageMemoryUsed === 50L)
-    assert(mm.executionMemoryUsed === 200L)
-    // Only execution memory should be released
-    mm.releaseExecutionMemory(133L, taskAttemptId, memoryMode)
-    assert(mm.storageMemoryUsed === 50L)
-    assert(mm.executionMemoryUsed === 67L)
-    // Only storage memory should be released
-    mm.releaseAllStorageMemory()
-    assert(mm.storageMemoryUsed === 0L)
-    assert(mm.executionMemoryUsed === 67L)
-  }
-
-  test("unroll memory") {
-    val maxStorageMem = 1000L
-    val dummyBlock = TestBlockId("lonely water")
-    val (mm, ms) = makeThings(Long.MaxValue, maxStorageMem)
-    val memoryMode = MemoryMode.ON_HEAP
-    assert(mm.acquireUnrollMemory(dummyBlock, 100L, memoryMode))
-    when(ms.currentUnrollMemory).thenReturn(100L)
-    assertEvictBlocksToFreeSpaceNotCalled(ms)
-    assert(mm.storageMemoryUsed === 100L)
-    mm.releaseUnrollMemory(40L, memoryMode)
-    assert(mm.storageMemoryUsed === 60L)
-    when(ms.currentUnrollMemory).thenReturn(60L)
-    assert(mm.acquireStorageMemory(dummyBlock, 800L, memoryMode))
-    assertEvictBlocksToFreeSpaceNotCalled(ms)
-    assert(mm.storageMemoryUsed === 860L)
-    // `spark.storage.unrollFraction` is 0.4, so the max unroll space is 400 bytes.
-    // As of this point, cache memory is 800 bytes and current unroll memory is 60 bytes.
-    // Requesting 240 more bytes of unroll memory will leave our total unroll memory at
-    // 300 bytes, still under the 400-byte limit. Therefore, all 240 bytes are granted.
-    assert(mm.acquireUnrollMemory(dummyBlock, 240L, memoryMode))
-    assertEvictBlocksToFreeSpaceCalled(ms, 100L) // 860 + 240 - 1000
-    when(ms.currentUnrollMemory).thenReturn(300L) // 60 + 240
-    assert(mm.storageMemoryUsed === 1000L)
-    evictedBlocks.clear()
-    // We already have 300 bytes of unroll memory, so requesting 150 more will leave us
-    // above the 400-byte limit. Since there is not enough free memory, this request will
-    // fail even after evicting as much as we can (400 - 300 = 100 bytes).
-    assert(!mm.acquireUnrollMemory(dummyBlock, 150L, memoryMode))
-    assertEvictBlocksToFreeSpaceCalled(ms, 100L)
-    assert(mm.storageMemoryUsed === 900L)
-    // Release beyond what was acquired
-    mm.releaseUnrollMemory(maxStorageMem, memoryMode)
-    assert(mm.storageMemoryUsed === 0L)
-  }
-
-}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 480e07fb9399..2250ae2f771e 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -91,7 +91,6 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     conf.set(IS_TESTING, true)
     conf.set("spark.memory.fraction", "1")
     conf.set("spark.memory.storageFraction", "1")
-    conf.set("spark.storage.unrollFraction", "0.4")
     conf.set("spark.storage.unrollMemoryThreshold", "512")
 
     // to make a replication attempt to inactive store fail fast
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index bda81365b079..c23264191e12 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -120,7 +120,6 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       .set("spark.memory.fraction", "1")
       .set("spark.memory.storageFraction", "1")
       .set("spark.kryoserializer.buffer", "1m")
-      .set("spark.storage.unrollFraction", "0.4")
       .set("spark.storage.unrollMemoryThreshold", "512")
 
     rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
index b02af2bfe7ac..7cdcd0fea2ed 100644
--- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
@@ -26,7 +26,7 @@ import scala.reflect.ClassTag
 import org.scalatest._
 
 import org.apache.spark._
-import org.apache.spark.memory.{MemoryMode, StaticMemoryManager}
+import org.apache.spark.memory.{MemoryMode, UnifiedMemoryManager}
 import org.apache.spark.serializer.{KryoSerializer, SerializerManager}
 import org.apache.spark.storage.memory.{BlockEvictionHandler, MemoryStore, PartiallySerializedBlock, PartiallyUnrolledIterator}
 import org.apache.spark.util._
@@ -39,7 +39,6 @@ class MemoryStoreSuite
   with ResetSystemProperties {
 
   var conf: SparkConf = new SparkConf(false)
-    .set("spark.storage.unrollFraction", "0.4")
     .set("spark.storage.unrollMemoryThreshold", "512")
 
   // Reuse a serializer across tests to avoid creating a new thread-local buffer on each test
@@ -60,7 +59,7 @@ class MemoryStoreSuite
   }
 
   def makeMemoryStore(maxMem: Long): (MemoryStore, BlockInfoManager) = {
-    val memManager = new StaticMemoryManager(conf, Long.MaxValue, maxMem, numCores = 1)
+    val memManager = new UnifiedMemoryManager(conf, maxMem, maxMem, 1)
     val blockInfoManager = new BlockInfoManager
     val blockEvictionHandler = new BlockEvictionHandler {
       var memoryStore: MemoryStore = _
@@ -239,7 +238,7 @@ class MemoryStoreSuite
   }
 
   test("safely unroll blocks through putIteratorAsBytes") {
-    val (memoryStore, blockInfoManager) = makeMemoryStore(12000)
+    val (memoryStore, blockInfoManager) = makeMemoryStore(8400)
     val smallList = List.fill(40)(new Array[Byte](100))
     val bigList = List.fill(40)(new Array[Byte](1000))
     def smallIterator: Iterator[Any] = smallList.iterator.asInstanceOf[Iterator[Any]]
@@ -290,11 +289,11 @@ class MemoryStoreSuite
     blockInfoManager.removeBlock("b3")
     putIteratorAsBytes("b3", smallIterator, ClassTag.Any)
 
-    // Unroll huge block with not enough space. This should fail.
+    // Unroll huge block with not enough space. This should fail and kick out b2 in the process.
     val result4 = putIteratorAsBytes("b4", bigIterator, ClassTag.Any)
     assert(result4.isLeft) // unroll was unsuccessful
     assert(!memoryStore.contains("b1"))
-    assert(memoryStore.contains("b2"))
+    assert(!memoryStore.contains("b2"))
     assert(memoryStore.contains("b3"))
     assert(!memoryStore.contains("b4"))
     assert(memoryStore.currentUnrollMemoryForThisTask > 0) // we returned an iterator
@@ -417,7 +416,7 @@ class MemoryStoreSuite
     val bytesPerSmallBlock = memStoreSize / numInitialBlocks
     def testFailureOnNthDrop(numValidBlocks: Int, readLockAfterDrop: Boolean): Unit = {
       val tc = TaskContext.empty()
-      val memManager = new StaticMemoryManager(conf, Long.MaxValue, memStoreSize, numCores = 1)
+      val memManager = new UnifiedMemoryManager(conf, memStoreSize, memStoreSize.toInt, 1)
       val blockInfoManager = new BlockInfoManager
       blockInfoManager.registerTask(tc.taskAttemptId)
       var droppedSoFar = 0
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 6211399005e1..1e0399809ba8 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -551,12 +551,11 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
 
   test("force to spill for external aggregation") {
     val conf = createSparkConf(loadDefaults = false)
-      .set("spark.shuffle.memoryFraction", "0.01")
-      .set("spark.memory.useLegacyMode", "true")
-      .set(TEST_MEMORY, 100000000L)
+      .set("spark.memory.storageFraction", "0.999")
+      .set(TEST_MEMORY, 471859200L)
       .set("spark.shuffle.sort.bypassMergeThreshold", "0")
     sc = new SparkContext("local", "test", conf)
-    val N = 2e5.toInt
+    val N = 200000
     sc.parallelize(1 to N, 2)
       .map { i => (i, i) }
       .groupByKey()
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index aa400dd74e9c..14148e0e67fa 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -638,12 +638,11 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
 
   test("force to spill for external sorter") {
     val conf = createSparkConf(loadDefaults = false, kryo = false)
-      .set("spark.shuffle.memoryFraction", "0.01")
-      .set("spark.memory.useLegacyMode", "true")
-      .set(TEST_MEMORY, 100000000L)
+      .set("spark.memory.storageFraction", "0.999")
+      .set(TEST_MEMORY, 471859200L)
       .set("spark.shuffle.sort.bypassMergeThreshold", "0")
     sc = new SparkContext("local", "test", conf)
-    val N = 2e5.toInt
+    val N = 200000
     val p = new org.apache.spark.HashPartitioner(2)
     val p2 = new org.apache.spark.HashPartitioner(3)
     sc.parallelize(1 to N, 3)
diff --git a/docs/configuration.md b/docs/configuration.md
index 33148ed48452..3c383eed04d8 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1198,51 +1198,6 @@ Apart from these, the following properties are also available, and may be useful
     This must be set to a positive value when <code>spark.memory.offHeap.enabled=true</code>.
   </td>
 </tr>
-<tr>
-  <td><code>spark.memory.useLegacyMode</code></td>
-  <td>false</td>
-  <td>
-    Whether to enable the legacy memory management mode used in Spark 1.5 and before.
-    The legacy mode rigidly partitions the heap space into fixed-size regions,
-    potentially leading to excessive spilling if the application was not tuned.
-    The following deprecated memory fraction configurations are not read unless this is enabled:
-    <code>spark.shuffle.memoryFraction</code><br>
-    <code>spark.storage.memoryFraction</code><br>
-    <code>spark.storage.unrollFraction</code>
-  </td>
-</tr>
-<tr>
-  <td><code>spark.shuffle.memoryFraction</code></td>
-  <td>0.2</td>
-  <td>
-    (deprecated) This is read only if <code>spark.memory.useLegacyMode</code> is enabled.
-    Fraction of Java heap to use for aggregation and cogroups during shuffles.
-    At any given time, the collective size of
-    all in-memory maps used for shuffles is bounded by this limit, beyond which the contents will
-    begin to spill to disk. If spills are often, consider increasing this value at the expense of
-    <code>spark.storage.memoryFraction</code>.
-  </td>
-</tr>
-<tr>
-  <td><code>spark.storage.memoryFraction</code></td>
-  <td>0.6</td>
-  <td>
-    (deprecated) This is read only if <code>spark.memory.useLegacyMode</code> is enabled.
-    Fraction of Java heap to use for Spark's memory cache. This should not be larger than the "old"
-    generation of objects in the JVM, which by default is given 0.6 of the heap, but you can
-    increase it if you configure your own old generation size.
-  </td>
-</tr>
-<tr>
-  <td><code>spark.storage.unrollFraction</code></td>
-  <td>0.2</td>
-  <td>
-    (deprecated) This is read only if <code>spark.memory.useLegacyMode</code> is enabled.
-    Fraction of <code>spark.storage.memoryFraction</code> to use for unrolling blocks in memory.
-    This is dynamically allocated by dropping existing blocks when there is not enough free
-    storage space to unroll the new block in its entirety.
-  </td>
-</tr>
 <tr>
   <td><code>spark.storage.replication.proactive</code></td>
   <td>false</td>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 7c21062c4cec..a708926dd1f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -94,7 +94,7 @@ private[execution] object HashedRelation {
       taskMemoryManager: TaskMemoryManager = null): HashedRelation = {
     val mm = Option(taskMemoryManager).getOrElse {
       new TaskMemoryManager(
-        new StaticMemoryManager(
+        new UnifiedMemoryManager(
           new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
           Long.MaxValue,
           Long.MaxValue,
@@ -227,7 +227,7 @@ private[joins] class UnsafeHashedRelation(
     // TODO(josh): This needs to be revisited before we merge this patch; making this change now
     // so that tests compile:
     val taskMemoryManager = new TaskMemoryManager(
-      new StaticMemoryManager(
+      new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
         Long.MaxValue,
@@ -392,7 +392,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
   def this() = {
     this(
       new TaskMemoryManager(
-        new StaticMemoryManager(
+        new UnifiedMemoryManager(
           new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
           Long.MaxValue,
           Long.MaxValue,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
index b7d28988274b..334f0275d4eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
@@ -22,7 +22,7 @@ import java.util.HashMap
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.internal.config._
-import org.apache.spark.memory.{StaticMemoryManager, TaskMemoryManager}
+import org.apache.spark.memory.{TaskMemoryManager, UnifiedMemoryManager}
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.joins.LongToUnsafeRowMap
 import org.apache.spark.sql.execution.vectorized.AggregateHashMap
@@ -473,7 +473,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
           value.pointTo(valueBytes, Platform.BYTE_ARRAY_OFFSET, 16)
           value.setInt(0, 555)
           val taskMemoryManager = new TaskMemoryManager(
-            new StaticMemoryManager(
+            new UnifiedMemoryManager(
               new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
               Long.MaxValue,
               Long.MaxValue,
@@ -504,7 +504,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
       Seq("off", "on").foreach { heap =>
         benchmark.addCase(s"BytesToBytesMap ($heap Heap)") { _ =>
           val taskMemoryManager = new TaskMemoryManager(
-            new StaticMemoryManager(
+            new UnifiedMemoryManager(
               new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, s"${heap == "off"}")
                 .set(MEMORY_OFFHEAP_SIZE.key, "102400000"),
               Long.MaxValue,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
index bdf753debe62..0b356a9e34c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.benchmark
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
-import org.apache.spark.memory.{StaticMemoryManager, TaskMemoryManager}
+import org.apache.spark.memory.{TaskMemoryManager, UnifiedMemoryManager}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{BoundReference, UnsafeProjection}
 import org.apache.spark.sql.execution.joins.LongToUnsafeRowMap
@@ -43,7 +43,7 @@ object HashedRelationMetricsBenchmark extends SqlBasedBenchmark {
       val benchmark = new Benchmark("LongToUnsafeRowMap metrics", numRows, output = output)
       benchmark.addCase("LongToUnsafeRowMap") { iter =>
         val taskMemoryManager = new TaskMemoryManager(
-          new StaticMemoryManager(
+          new UnifiedMemoryManager(
             new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
             Long.MaxValue,
             Long.MaxValue,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index d9b34dcd1647..7b55e839e3b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -23,7 +23,7 @@ import scala.util.Random
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
-import org.apache.spark.memory.{StaticMemoryManager, TaskMemoryManager}
+import org.apache.spark.memory.{TaskMemoryManager, UnifiedMemoryManager}
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -36,7 +36,7 @@ import org.apache.spark.util.collection.CompactBuffer
 class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
 
   val mm = new TaskMemoryManager(
-    new StaticMemoryManager(
+    new UnifiedMemoryManager(
       new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
       Long.MaxValue,
       Long.MaxValue,
@@ -85,7 +85,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
 
   test("test serialization empty hash map") {
     val taskMemoryManager = new TaskMemoryManager(
-      new StaticMemoryManager(
+      new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
         Long.MaxValue,
@@ -157,7 +157,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
 
   test("LongToUnsafeRowMap with very wide range") {
     val taskMemoryManager = new TaskMemoryManager(
-      new StaticMemoryManager(
+      new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
         Long.MaxValue,
@@ -202,7 +202,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
 
   test("LongToUnsafeRowMap with random keys") {
     val taskMemoryManager = new TaskMemoryManager(
-      new StaticMemoryManager(
+      new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
         Long.MaxValue,
@@ -256,7 +256,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
 
   test("SPARK-24257: insert big values into LongToUnsafeRowMap") {
     val taskMemoryManager = new TaskMemoryManager(
-      new StaticMemoryManager(
+      new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
         Long.MaxValue,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index fe65353b9d50..d1a6e8a89acc 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark._
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.memory.StaticMemoryManager
+import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.netty.NettyBlockTransferService
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
@@ -215,8 +215,6 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
   test("Test Block - isFullyConsumed") {
     val sparkConf = new SparkConf().set("spark.app.id", "streaming-test")
     sparkConf.set("spark.storage.unrollMemoryThreshold", "512")
-    // spark.storage.unrollFraction set to 0.4 for BlockManager
-    sparkConf.set("spark.storage.unrollFraction", "0.4")
 
     sparkConf.set(IO_ENCRYPTION_ENABLED, enableEncryption)
     // Block Manager with 12000 * 0.4 = 4800 bytes of free space for unroll
@@ -282,7 +280,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
       maxMem: Long,
       conf: SparkConf,
       name: String = SparkContext.DRIVER_IDENTIFIER): BlockManager = {
-    val memManager = new StaticMemoryManager(conf, Long.MaxValue, maxMem, numCores = 1)
+    val memManager = new UnifiedMemoryManager(conf, maxMem, maxMem, 1)
     val transfer = new NettyBlockTransferService(conf, securityMgr, "localhost", "localhost", 0, 1)
     val blockManager = new BlockManager(name, rpcEnv, blockManagerMaster, serializerManager, conf,
       memManager, mapOutputTracker, shuffleManager, transfer, securityMgr, 0)

From 270916f8cd8ba01341f2a38a8376e9e4be08a2e8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Jan 2019 08:42:23 -0800
Subject: [PATCH 0271/1072] [SPARK-26584][SQL] Remove
 `spark.sql.orc.copyBatchToSpark` internal conf

## What changes were proposed in this pull request?

This PR aims to remove internal ORC configuration to simplify the code path for Spark 3.0.0. This removes the configuration `spark.sql.orc.copyBatchToSpark` and related ORC codes including tests and benchmarks.

## How was this patch tested?

Pass the Jenkins with the reduced test coverage.

Closes #23503 from dongjoon-hyun/SPARK-26584.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/internal/SQLConf.scala   |   7 -
 .../DataSourceReadBenchmark-results.txt       |  17 -
 .../orc/OrcColumnarBatchReader.java           | 400 ++----------------
 .../datasources/orc/OrcFileFormat.scala       |   7 +-
 .../benchmark/DataSourceReadBenchmark.scala   |  50 ---
 .../orc/OrcColumnarBatchReaderSuite.scala     |   2 +-
 .../benchmarks/OrcReadBenchmark-results.txt   |  17 -
 .../spark/sql/hive/orc/OrcReadBenchmark.scala |  49 ---
 8 files changed, 26 insertions(+), 523 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index fe445e001935..9804af7dff17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -524,13 +524,6 @@ object SQLConf {
     .intConf
     .createWithDefault(4096)
 
-  val ORC_COPY_BATCH_TO_SPARK = buildConf("spark.sql.orc.copyBatchToSpark")
-    .doc("Whether or not to copy the ORC columnar batch to Spark columnar batch in the " +
-      "vectorized ORC reader.")
-    .internal()
-    .booleanConf
-    .createWithDefault(false)
-
   val ORC_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.orc.filterPushdown")
     .doc("When true, enable filter pushdown for ORC files.")
     .booleanConf
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
index b07e8b1197ff..f547f61654b5 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
@@ -11,7 +11,6 @@ SQL Json                                      8709 / 8724          1.8         5
 SQL Parquet Vectorized                         166 /  187         94.8          10.5     159.0X
 SQL Parquet MR                                1706 / 1720          9.2         108.4      15.5X
 SQL ORC Vectorized                             167 /  174         94.2          10.6     157.9X
-SQL ORC Vectorized with copy                   226 /  231         69.6          14.4     116.7X
 SQL ORC MR                                    1433 / 1465         11.0          91.1      18.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -30,7 +29,6 @@ SQL Json                                      8990 / 8998          1.7         5
 SQL Parquet Vectorized                         209 /  221         75.1          13.3     126.5X
 SQL Parquet MR                                1949 / 1949          8.1         123.9      13.6X
 SQL ORC Vectorized                             221 /  228         71.3          14.0     120.1X
-SQL ORC Vectorized with copy                   315 /  319         49.9          20.1      84.0X
 SQL ORC MR                                    1527 / 1549         10.3          97.1      17.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -49,7 +47,6 @@ SQL Json                                      9703 / 9733          1.6         6
 SQL Parquet Vectorized                         176 /  182         89.2          11.2     157.0X
 SQL Parquet MR                                2164 / 2173          7.3         137.6      12.8X
 SQL ORC Vectorized                             307 /  314         51.2          19.5      90.2X
-SQL ORC Vectorized with copy                   312 /  319         50.4          19.8      88.7X
 SQL ORC MR                                    1690 / 1700          9.3         107.4      16.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -68,7 +65,6 @@ SQL Json                                    12570 / 12617          1.3         7
 SQL Parquet Vectorized                         270 /  308         58.2          17.2     128.9X
 SQL Parquet MR                                2427 / 2431          6.5         154.3      14.3X
 SQL ORC Vectorized                             388 /  398         40.6          24.6      89.8X
-SQL ORC Vectorized with copy                   395 /  402         39.9          25.1      88.2X
 SQL ORC MR                                    1819 / 1851          8.6         115.7      19.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -87,7 +83,6 @@ SQL Json                                    12039 / 12215          1.3         7
 SQL Parquet Vectorized                         170 /  177         92.4          10.8     169.0X
 SQL Parquet MR                                2184 / 2196          7.2         138.9      13.2X
 SQL ORC Vectorized                             432 /  440         36.4          27.5      66.5X
-SQL ORC Vectorized with copy                   439 /  442         35.9          27.9      65.6X
 SQL ORC MR                                    1812 / 1833          8.7         115.2      15.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -106,7 +101,6 @@ SQL Json                                    18895 / 18898          0.8        12
 SQL Parquet Vectorized                         267 /  276         58.9          17.0     135.6X
 SQL Parquet MR                                2355 / 2363          6.7         149.7      15.4X
 SQL ORC Vectorized                             543 /  546         29.0          34.5      66.6X
-SQL ORC Vectorized with copy                   548 /  557         28.7          34.8      66.0X
 SQL ORC MR                                    2246 / 2258          7.0         142.8      16.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -130,7 +124,6 @@ SQL Json                                    12145 / 12174          0.9        11
 SQL Parquet Vectorized                        2363 / 2377          4.4         225.3       8.9X
 SQL Parquet MR                                4555 / 4557          2.3         434.4       4.6X
 SQL ORC Vectorized                            2361 / 2388          4.4         225.1       9.0X
-SQL ORC Vectorized with copy                  2540 / 2557          4.1         242.2       8.3X
 SQL ORC MR                                    4186 / 4209          2.5         399.2       5.0X
 
 
@@ -147,7 +140,6 @@ SQL Json                                      7025 / 7025          1.5         6
 SQL Parquet Vectorized                         803 /  821         13.1          76.6      14.6X
 SQL Parquet MR                                1776 / 1790          5.9         169.4       6.6X
 SQL ORC Vectorized                             491 /  494         21.4          46.8      23.8X
-SQL ORC Vectorized with copy                   723 /  725         14.5          68.9      16.2X
 SQL ORC MR                                    2050 / 2063          5.1         195.5       5.7X
 
 
@@ -164,21 +156,18 @@ Data column - Json                          12876 / 12882          1.2         8
 Data column - Parquet Vectorized               277 /  282         56.7          17.6     111.6X
 Data column - Parquet MR                      3398 / 3402          4.6         216.0       9.1X
 Data column - ORC Vectorized                   399 /  407         39.4          25.4      77.5X
-Data column - ORC Vectorized with copy         407 /  447         38.6          25.9      76.0X
 Data column - ORC MR                          2583 / 2589          6.1         164.2      12.0X
 Partition column - CSV                        7403 / 7427          2.1         470.7       4.2X
 Partition column - Json                       5587 / 5625          2.8         355.2       5.5X
 Partition column - Parquet Vectorized           71 /   78        222.6           4.5     438.3X
 Partition column - Parquet MR                 1798 / 1808          8.7         114.3      17.2X
 Partition column - ORC Vectorized               72 /   75        219.0           4.6     431.2X
-Partition column - ORC Vectorized with copy        71 /   77        221.1           4.5     435.4X
 Partition column - ORC MR                     1772 / 1778          8.9         112.6      17.5X
 Both columns - CSV                          30211 / 30212          0.5        1920.7       1.0X
 Both columns - Json                         13382 / 13391          1.2         850.8       2.3X
 Both columns - Parquet Vectorized              321 /  333         49.0          20.4      96.4X
 Both columns - Parquet MR                     3656 / 3661          4.3         232.4       8.5X
 Both columns - ORC Vectorized                  443 /  448         35.5          28.2      69.9X
-Both column - ORC Vectorized with copy         527 /  533         29.9          33.5      58.8X
 Both columns - ORC MR                         2626 / 2633          6.0         167.0      11.8X
 
 
@@ -196,7 +185,6 @@ SQL Parquet Vectorized                        1563 / 1564          6.7         1
 SQL Parquet MR                                3835 / 3836          2.7         365.8       3.6X
 ParquetReader Vectorized                      1115 / 1118          9.4         106.4      12.5X
 SQL ORC Vectorized                            1172 / 1208          8.9         111.8      11.9X
-SQL ORC Vectorized with copy                  1630 / 1644          6.4         155.5       8.5X
 SQL ORC MR                                    3708 / 3711          2.8         353.6       3.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -209,7 +197,6 @@ SQL Parquet Vectorized                        1103 / 1112          9.5         1
 SQL Parquet MR                                2841 / 2847          3.7         271.0       4.9X
 ParquetReader Vectorized                       992 / 1012         10.6          94.6      14.1X
 SQL ORC Vectorized                            1275 / 1349          8.2         121.6      11.0X
-SQL ORC Vectorized with copy                  1631 / 1644          6.4         155.5       8.6X
 SQL ORC MR                                    3244 / 3259          3.2         309.3       4.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -222,7 +209,6 @@ SQL Parquet Vectorized                         238 /  242         44.1
 SQL Parquet MR                                1730 / 1734          6.1         165.0       6.5X
 ParquetReader Vectorized                       237 /  238         44.3          22.6      47.4X
 SQL ORC Vectorized                             459 /  462         22.8          43.8      24.4X
-SQL ORC Vectorized with copy                   581 /  583         18.1          55.4      19.3X
 SQL ORC MR                                    1767 / 1783          5.9         168.5       6.4X
 
 
@@ -239,7 +225,6 @@ SQL Json                                      2808 / 2843          0.4        26
 SQL Parquet Vectorized                          56 /   63         18.9          52.9      59.8X
 SQL Parquet MR                                 215 /  219          4.9         205.4      15.4X
 SQL ORC Vectorized                              64 /   76         16.4          60.9      52.0X
-SQL ORC Vectorized with copy                    64 /   67         16.3          61.3      51.7X
 SQL ORC MR                                     314 /  316          3.3         299.6      10.6X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -251,7 +236,6 @@ SQL Json                                    10294 / 10325          0.1        98
 SQL Parquet Vectorized                          72 /   85         14.5          69.0     110.3X
 SQL Parquet MR                                 237 /  241          4.4         226.4      33.6X
 SQL ORC Vectorized                              82 /   92         12.7          78.5      97.0X
-SQL ORC Vectorized with copy                    82 /   88         12.7          78.5      97.0X
 SQL ORC MR                                     900 /  909          1.2         858.5       8.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -263,7 +247,6 @@ SQL Json                                    18813 / 18827          0.1       179
 SQL Parquet Vectorized                         107 /  111          9.8         101.8     126.3X
 SQL Parquet MR                                 275 /  286          3.8         262.3      49.0X
 SQL ORC Vectorized                             107 /  115          9.8         101.7     126.4X
-SQL ORC Vectorized with copy                   107 /  115          9.8         102.3     125.8X
 SQL ORC MR                                    1659 / 1664          0.6        1582.3       8.1X
 
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
index 7dc90df05a8f..efca96e9ce58 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.datasources.orc;
 
 import java.io.IOException;
-import java.util.stream.IntStream;
 
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.conf.Configuration;
@@ -31,16 +30,11 @@
 import org.apache.orc.Reader;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.mapred.OrcInputFormat;
-import org.apache.orc.storage.common.type.HiveDecimal;
 import org.apache.orc.storage.ql.exec.vector.*;
-import org.apache.orc.storage.serde2.io.HiveDecimalWritable;
 
-import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
-import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
-import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
@@ -77,21 +71,10 @@ public class OrcColumnarBatchReader extends RecordReader<Void, ColumnarBatch> {
   @VisibleForTesting
   public ColumnarBatch columnarBatch;
 
-  // Writable column vectors of the result columnar batch.
-  private WritableColumnVector[] columnVectors;
-
-  // The wrapped ORC column vectors. It should be null if `copyToSpark` is true.
+  // The wrapped ORC column vectors.
   private org.apache.spark.sql.vectorized.ColumnVector[] orcVectorWrappers;
 
-  // The memory mode of the columnarBatch
-  private final MemoryMode MEMORY_MODE;
-
-  // Whether or not to copy the ORC columnar batch to Spark columnar batch.
-  private final boolean copyToSpark;
-
-  public OrcColumnarBatchReader(boolean useOffHeap, boolean copyToSpark, int capacity) {
-    MEMORY_MODE = useOffHeap ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
-    this.copyToSpark = copyToSpark;
+  public OrcColumnarBatchReader(int capacity) {
     this.capacity = capacity;
   }
 
@@ -177,53 +160,32 @@ public void initBatch(
     this.requestedDataColIds = requestedDataColIds;
 
     StructType resultSchema = new StructType(requiredFields);
-    if (copyToSpark) {
-      if (MEMORY_MODE == MemoryMode.OFF_HEAP) {
-        columnVectors = OffHeapColumnVector.allocateColumns(capacity, resultSchema);
-      } else {
-        columnVectors = OnHeapColumnVector.allocateColumns(capacity, resultSchema);
-      }
-
-      // Initialize the partition columns and missing columns once.
-      for (int i = 0; i < requiredFields.length; i++) {
-        if (requestedPartitionColIds[i] != -1) {
-          ColumnVectorUtils.populate(columnVectors[i],
-            partitionValues, requestedPartitionColIds[i]);
-          columnVectors[i].setIsConstant();
-        } else if (requestedDataColIds[i] == -1) {
-          columnVectors[i].putNulls(0, capacity);
-          columnVectors[i].setIsConstant();
-        }
-      }
 
-      columnarBatch = new ColumnarBatch(columnVectors);
-    } else {
-      // Just wrap the ORC column vector instead of copying it to Spark column vector.
-      orcVectorWrappers = new org.apache.spark.sql.vectorized.ColumnVector[resultSchema.length()];
+    // Just wrap the ORC column vector instead of copying it to Spark column vector.
+    orcVectorWrappers = new org.apache.spark.sql.vectorized.ColumnVector[resultSchema.length()];
 
-      for (int i = 0; i < requiredFields.length; i++) {
-        DataType dt = requiredFields[i].dataType();
-        if (requestedPartitionColIds[i] != -1) {
-          OnHeapColumnVector partitionCol = new OnHeapColumnVector(capacity, dt);
-          ColumnVectorUtils.populate(partitionCol, partitionValues, requestedPartitionColIds[i]);
-          partitionCol.setIsConstant();
-          orcVectorWrappers[i] = partitionCol;
+    for (int i = 0; i < requiredFields.length; i++) {
+      DataType dt = requiredFields[i].dataType();
+      if (requestedPartitionColIds[i] != -1) {
+        OnHeapColumnVector partitionCol = new OnHeapColumnVector(capacity, dt);
+        ColumnVectorUtils.populate(partitionCol, partitionValues, requestedPartitionColIds[i]);
+        partitionCol.setIsConstant();
+        orcVectorWrappers[i] = partitionCol;
+      } else {
+        int colId = requestedDataColIds[i];
+        // Initialize the missing columns once.
+        if (colId == -1) {
+          OnHeapColumnVector missingCol = new OnHeapColumnVector(capacity, dt);
+          missingCol.putNulls(0, capacity);
+          missingCol.setIsConstant();
+          orcVectorWrappers[i] = missingCol;
         } else {
-          int colId = requestedDataColIds[i];
-          // Initialize the missing columns once.
-          if (colId == -1) {
-            OnHeapColumnVector missingCol = new OnHeapColumnVector(capacity, dt);
-            missingCol.putNulls(0, capacity);
-            missingCol.setIsConstant();
-            orcVectorWrappers[i] = missingCol;
-          } else {
-            orcVectorWrappers[i] = new OrcColumnVector(dt, batch.cols[colId]);
-          }
+          orcVectorWrappers[i] = new OrcColumnVector(dt, batch.cols[colId]);
         }
       }
-
-      columnarBatch = new ColumnarBatch(orcVectorWrappers);
     }
+
+    columnarBatch = new ColumnarBatch(orcVectorWrappers);
   }
 
   /**
@@ -238,325 +200,11 @@ private boolean nextBatch() throws IOException {
     }
     columnarBatch.setNumRows(batchSize);
 
-    if (!copyToSpark) {
-      for (int i = 0; i < requiredFields.length; i++) {
-        if (requestedDataColIds[i] != -1) {
-          ((OrcColumnVector) orcVectorWrappers[i]).setBatchSize(batchSize);
-        }
-      }
-      return true;
-    }
-
-    for (WritableColumnVector vector : columnVectors) {
-      vector.reset();
-    }
-
     for (int i = 0; i < requiredFields.length; i++) {
-      StructField field = requiredFields[i];
-      WritableColumnVector toColumn = columnVectors[i];
-
-      if (requestedDataColIds[i] >= 0) {
-        ColumnVector fromColumn = batch.cols[requestedDataColIds[i]];
-
-        if (fromColumn.isRepeating) {
-          putRepeatingValues(batchSize, field, fromColumn, toColumn);
-        } else if (fromColumn.noNulls) {
-          putNonNullValues(batchSize, field, fromColumn, toColumn);
-        } else {
-          putValues(batchSize, field, fromColumn, toColumn);
-        }
+      if (requestedDataColIds[i] != -1) {
+        ((OrcColumnVector) orcVectorWrappers[i]).setBatchSize(batchSize);
       }
     }
     return true;
   }
-
-  private void putRepeatingValues(
-      int batchSize,
-      StructField field,
-      ColumnVector fromColumn,
-      WritableColumnVector toColumn) {
-    if (fromColumn.isNull[0]) {
-      toColumn.putNulls(0, batchSize);
-    } else {
-      DataType type = field.dataType();
-      if (type instanceof BooleanType) {
-        toColumn.putBooleans(0, batchSize, ((LongColumnVector)fromColumn).vector[0] == 1);
-      } else if (type instanceof ByteType) {
-        toColumn.putBytes(0, batchSize, (byte)((LongColumnVector)fromColumn).vector[0]);
-      } else if (type instanceof ShortType) {
-        toColumn.putShorts(0, batchSize, (short)((LongColumnVector)fromColumn).vector[0]);
-      } else if (type instanceof IntegerType || type instanceof DateType) {
-        toColumn.putInts(0, batchSize, (int)((LongColumnVector)fromColumn).vector[0]);
-      } else if (type instanceof LongType) {
-        toColumn.putLongs(0, batchSize, ((LongColumnVector)fromColumn).vector[0]);
-      } else if (type instanceof TimestampType) {
-        toColumn.putLongs(0, batchSize,
-          fromTimestampColumnVector((TimestampColumnVector)fromColumn, 0));
-      } else if (type instanceof FloatType) {
-        toColumn.putFloats(0, batchSize, (float)((DoubleColumnVector)fromColumn).vector[0]);
-      } else if (type instanceof DoubleType) {
-        toColumn.putDoubles(0, batchSize, ((DoubleColumnVector)fromColumn).vector[0]);
-      } else if (type instanceof StringType || type instanceof BinaryType) {
-        BytesColumnVector data = (BytesColumnVector)fromColumn;
-        int size = data.vector[0].length;
-        toColumn.arrayData().reserve(size);
-        toColumn.arrayData().putBytes(0, size, data.vector[0], 0);
-        for (int index = 0; index < batchSize; index++) {
-          toColumn.putArray(index, 0, size);
-        }
-      } else if (type instanceof DecimalType) {
-        DecimalType decimalType = (DecimalType)type;
-        putDecimalWritables(
-          toColumn,
-          batchSize,
-          decimalType.precision(),
-          decimalType.scale(),
-          ((DecimalColumnVector)fromColumn).vector[0]);
-      } else {
-        throw new UnsupportedOperationException("Unsupported Data Type: " + type);
-      }
-    }
-  }
-
-  private void putNonNullValues(
-      int batchSize,
-      StructField field,
-      ColumnVector fromColumn,
-      WritableColumnVector toColumn) {
-    DataType type = field.dataType();
-    if (type instanceof BooleanType) {
-      long[] data = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        toColumn.putBoolean(index, data[index] == 1);
-      }
-    } else if (type instanceof ByteType) {
-      long[] data = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        toColumn.putByte(index, (byte)data[index]);
-      }
-    } else if (type instanceof ShortType) {
-      long[] data = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        toColumn.putShort(index, (short)data[index]);
-      }
-    } else if (type instanceof IntegerType || type instanceof DateType) {
-      long[] data = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        toColumn.putInt(index, (int)data[index]);
-      }
-    } else if (type instanceof LongType) {
-      toColumn.putLongs(0, batchSize, ((LongColumnVector)fromColumn).vector, 0);
-    } else if (type instanceof TimestampType) {
-      TimestampColumnVector data = ((TimestampColumnVector)fromColumn);
-      for (int index = 0; index < batchSize; index++) {
-        toColumn.putLong(index, fromTimestampColumnVector(data, index));
-      }
-    } else if (type instanceof FloatType) {
-      double[] data = ((DoubleColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        toColumn.putFloat(index, (float)data[index]);
-      }
-    } else if (type instanceof DoubleType) {
-      toColumn.putDoubles(0, batchSize, ((DoubleColumnVector)fromColumn).vector, 0);
-    } else if (type instanceof StringType || type instanceof BinaryType) {
-      BytesColumnVector data = ((BytesColumnVector)fromColumn);
-      WritableColumnVector arrayData = toColumn.arrayData();
-      int totalNumBytes = IntStream.of(data.length).sum();
-      arrayData.reserve(totalNumBytes);
-      for (int index = 0, pos = 0; index < batchSize; pos += data.length[index], index++) {
-        arrayData.putBytes(pos, data.length[index], data.vector[index], data.start[index]);
-        toColumn.putArray(index, pos, data.length[index]);
-      }
-    } else if (type instanceof DecimalType) {
-      DecimalType decimalType = (DecimalType)type;
-      DecimalColumnVector data = ((DecimalColumnVector)fromColumn);
-      if (decimalType.precision() > Decimal.MAX_LONG_DIGITS()) {
-        toColumn.arrayData().reserve(batchSize * 16);
-      }
-      for (int index = 0; index < batchSize; index++) {
-        putDecimalWritable(
-          toColumn,
-          index,
-          decimalType.precision(),
-          decimalType.scale(),
-          data.vector[index]);
-      }
-    } else {
-      throw new UnsupportedOperationException("Unsupported Data Type: " + type);
-    }
-  }
-
-  private void putValues(
-      int batchSize,
-      StructField field,
-      ColumnVector fromColumn,
-      WritableColumnVector toColumn) {
-    DataType type = field.dataType();
-    if (type instanceof BooleanType) {
-      long[] vector = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          toColumn.putBoolean(index, vector[index] == 1);
-        }
-      }
-    } else if (type instanceof ByteType) {
-      long[] vector = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          toColumn.putByte(index, (byte)vector[index]);
-        }
-      }
-    } else if (type instanceof ShortType) {
-      long[] vector = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          toColumn.putShort(index, (short)vector[index]);
-        }
-      }
-    } else if (type instanceof IntegerType || type instanceof DateType) {
-      long[] vector = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          toColumn.putInt(index, (int)vector[index]);
-        }
-      }
-    } else if (type instanceof LongType) {
-      long[] vector = ((LongColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          toColumn.putLong(index, vector[index]);
-        }
-      }
-    } else if (type instanceof TimestampType) {
-      TimestampColumnVector vector = ((TimestampColumnVector)fromColumn);
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          toColumn.putLong(index, fromTimestampColumnVector(vector, index));
-        }
-      }
-    } else if (type instanceof FloatType) {
-      double[] vector = ((DoubleColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          toColumn.putFloat(index, (float)vector[index]);
-        }
-      }
-    } else if (type instanceof DoubleType) {
-      double[] vector = ((DoubleColumnVector)fromColumn).vector;
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          toColumn.putDouble(index, vector[index]);
-        }
-      }
-    } else if (type instanceof StringType || type instanceof BinaryType) {
-      BytesColumnVector vector = (BytesColumnVector)fromColumn;
-      WritableColumnVector arrayData = toColumn.arrayData();
-      int totalNumBytes = IntStream.of(vector.length).sum();
-      arrayData.reserve(totalNumBytes);
-      for (int index = 0, pos = 0; index < batchSize; pos += vector.length[index], index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          arrayData.putBytes(pos, vector.length[index], vector.vector[index], vector.start[index]);
-          toColumn.putArray(index, pos, vector.length[index]);
-        }
-      }
-    } else if (type instanceof DecimalType) {
-      DecimalType decimalType = (DecimalType)type;
-      HiveDecimalWritable[] vector = ((DecimalColumnVector)fromColumn).vector;
-      if (decimalType.precision() > Decimal.MAX_LONG_DIGITS()) {
-        toColumn.arrayData().reserve(batchSize * 16);
-      }
-      for (int index = 0; index < batchSize; index++) {
-        if (fromColumn.isNull[index]) {
-          toColumn.putNull(index);
-        } else {
-          putDecimalWritable(
-            toColumn,
-            index,
-            decimalType.precision(),
-            decimalType.scale(),
-            vector[index]);
-        }
-      }
-    } else {
-      throw new UnsupportedOperationException("Unsupported Data Type: " + type);
-    }
-  }
-
-  /**
-   * Returns the number of micros since epoch from an element of TimestampColumnVector.
-   */
-  private static long fromTimestampColumnVector(TimestampColumnVector vector, int index) {
-    return vector.time[index] * 1000 + (vector.nanos[index] / 1000 % 1000);
-  }
-
-  /**
-   * Put a `HiveDecimalWritable` to a `WritableColumnVector`.
-   */
-  private static void putDecimalWritable(
-      WritableColumnVector toColumn,
-      int index,
-      int precision,
-      int scale,
-      HiveDecimalWritable decimalWritable) {
-    HiveDecimal decimal = decimalWritable.getHiveDecimal();
-    Decimal value =
-      Decimal.apply(decimal.bigDecimalValue(), decimal.precision(), decimal.scale());
-    value.changePrecision(precision, scale);
-
-    if (precision <= Decimal.MAX_INT_DIGITS()) {
-      toColumn.putInt(index, (int) value.toUnscaledLong());
-    } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
-      toColumn.putLong(index, value.toUnscaledLong());
-    } else {
-      byte[] bytes = value.toJavaBigDecimal().unscaledValue().toByteArray();
-      toColumn.arrayData().putBytes(index * 16, bytes.length, bytes, 0);
-      toColumn.putArray(index, index * 16, bytes.length);
-    }
-  }
-
-  /**
-   * Put `HiveDecimalWritable`s to a `WritableColumnVector`.
-   */
-  private static void putDecimalWritables(
-      WritableColumnVector toColumn,
-      int size,
-      int precision,
-      int scale,
-      HiveDecimalWritable decimalWritable) {
-    HiveDecimal decimal = decimalWritable.getHiveDecimal();
-    Decimal value =
-      Decimal.apply(decimal.bigDecimalValue(), decimal.precision(), decimal.scale());
-    value.changePrecision(precision, scale);
-
-    if (precision <= Decimal.MAX_INT_DIGITS()) {
-      toColumn.putInts(0, size, (int) value.toUnscaledLong());
-    } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
-      toColumn.putLongs(0, size, value.toUnscaledLong());
-    } else {
-      byte[] bytes = value.toJavaBigDecimal().unscaledValue().toByteArray();
-      toColumn.arrayData().reserve(bytes.length);
-      toColumn.arrayData().putBytes(0, bytes.length, bytes, 0);
-      for (int index = 0; index < size; index++) {
-        toColumn.putArray(index, 0, bytes.length);
-      }
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index cd10ad21cd82..14779cdba417 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -38,7 +38,6 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.SerializableConfiguration
@@ -162,10 +161,8 @@ class OrcFileFormat
 
     val resultSchema = StructType(requiredSchema.fields ++ partitionSchema.fields)
     val sqlConf = sparkSession.sessionState.conf
-    val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled
     val enableVectorizedReader = supportBatch(sparkSession, resultSchema)
     val capacity = sqlConf.orcVectorizedReaderBatchSize
-    val copyToSpark = sparkSession.sessionState.conf.getConf(SQLConf.ORC_COPY_BATCH_TO_SPARK)
 
     val broadcastedConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
@@ -197,10 +194,8 @@ class OrcFileFormat
         val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
         val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
 
-        val taskContext = Option(TaskContext.get())
         if (enableVectorizedReader) {
-          val batchReader = new OrcColumnarBatchReader(
-            enableOffHeapColumnVector && taskContext.isDefined, copyToSpark, capacity)
+          val batchReader = new OrcColumnarBatchReader(capacity)
           // SPARK-23399 Register a task completion listener first to call `close()` in all cases.
           // There is a possibility that `initialize` and `initBatch` hit some errors (like OOM)
           // after opening a file.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
index ecd9ead0ae39..aca7081c0d3d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
@@ -56,7 +56,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
 
   // Set default configs. Individual cases will change them if necessary.
   spark.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
-  spark.conf.set(SQLConf.ORC_COPY_BATCH_TO_SPARK.key, "false")
   spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
   spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
 
@@ -139,12 +138,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(id) FROM orcTable").collect()
         }
 
-        sqlBenchmark.addCase("SQL ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(id) FROM orcTable").collect()
-          }
-        }
-
         sqlBenchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(id) FROM orcTable").collect()
@@ -261,12 +254,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(c1), sum(length(c2)) FROM orcTable").collect()
         }
 
-        benchmark.addCase("SQL ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(c1), sum(length(c2)) FROM orcTable").collect()
-          }
-        }
-
         benchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(c1), sum(length(c2)) FROM orcTable").collect()
@@ -312,12 +299,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("select sum(length(c1)) from orcTable").collect()
         }
 
-        benchmark.addCase("SQL ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("select sum(length(c1)) from orcTable").collect()
-          }
-        }
-
         benchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("select sum(length(c1)) from orcTable").collect()
@@ -361,12 +342,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(id) FROM orcTable").collect()
         }
 
-        benchmark.addCase("Data column - ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(id) FROM orcTable").collect()
-          }
-        }
-
         benchmark.addCase("Data column - ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(id) FROM orcTable").collect()
@@ -395,12 +370,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(p) FROM orcTable").collect()
         }
 
-        benchmark.addCase("Partition column - ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(p) FROM orcTable").collect()
-          }
-        }
-
         benchmark.addCase("Partition column - ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(p) FROM orcTable").collect()
@@ -429,12 +398,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(p), sum(id) FROM orcTable").collect()
         }
 
-        benchmark.addCase("Both column - ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(p), sum(id) FROM orcTable").collect()
-          }
-        }
-
         benchmark.addCase("Both columns - ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(p), sum(id) FROM orcTable").collect()
@@ -513,13 +476,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
             "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
         }
 
-        benchmark.addCase("SQL ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT SUM(LENGTH(c2)) FROM orcTable " +
-              "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
-          }
-        }
-
         benchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT SUM(LENGTH(c2)) FROM orcTable " +
@@ -570,12 +526,6 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql(s"SELECT sum(c$middle) FROM orcTable").collect()
         }
 
-        benchmark.addCase("SQL ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql(s"SELECT sum(c$middle) FROM orcTable").collect()
-          }
-        }
-
         benchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql(s"SELECT sum(c$middle) FROM orcTable").collect()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
index 52abeb20e7f2..c16fcc67f8dd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
@@ -43,7 +43,7 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SQLTestUtils with Share
         requestedDataColIds: Array[Int],
         requestedPartitionColIds: Array[Int],
         resultFields: Array[StructField]): OrcColumnarBatchReader = {
-      val reader = new OrcColumnarBatchReader(false, false, 4096)
+      val reader = new OrcColumnarBatchReader(4096)
       reader.initBatch(
         orcFileSchema,
         resultFields,
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
index 80c2f5e93405..caa78b9a8f10 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -8,7 +8,6 @@ SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1725 / 1759          9.1         109.7       1.0X
 Native ORC Vectorized                          272 /  316         57.8          17.3       6.3X
-Native ORC Vectorized with copy                239 /  254         65.7          15.2       7.2X
 Hive built-in ORC                             1970 / 1987          8.0         125.3       0.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -17,7 +16,6 @@ SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1633 / 1672          9.6         103.8       1.0X
 Native ORC Vectorized                          238 /  255         66.0          15.1       6.9X
-Native ORC Vectorized with copy                235 /  253         66.8          15.0       6.9X
 Hive built-in ORC                             2293 / 2305          6.9         145.8       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -26,7 +24,6 @@ SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1677 / 1699          9.4         106.6       1.0X
 Native ORC Vectorized                          325 /  342         48.3          20.7       5.2X
-Native ORC Vectorized with copy                328 /  341         47.9          20.9       5.1X
 Hive built-in ORC                             2561 / 2569          6.1         162.8       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -35,7 +32,6 @@ SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1791 / 1795          8.8         113.9       1.0X
 Native ORC Vectorized                          400 /  408         39.3          25.4       4.5X
-Native ORC Vectorized with copy                410 /  417         38.4          26.1       4.4X
 Hive built-in ORC                             2713 / 2720          5.8         172.5       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -44,7 +40,6 @@ SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1791 / 1805          8.8         113.8       1.0X
 Native ORC Vectorized                          433 /  438         36.3          27.5       4.1X
-Native ORC Vectorized with copy                441 /  447         35.7          28.0       4.1X
 Hive built-in ORC                             2690 / 2803          5.8         171.0       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -53,7 +48,6 @@ SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1911 / 1930          8.2         121.5       1.0X
 Native ORC Vectorized                          543 /  552         29.0          34.5       3.5X
-Native ORC Vectorized with copy                547 /  555         28.8          34.8       3.5X
 Hive built-in ORC                             2967 / 3065          5.3         188.6       0.6X
 
 
@@ -67,7 +61,6 @@ Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 4160 / 4188          2.5         396.7       1.0X
 Native ORC Vectorized                         2405 / 2406          4.4         229.4       1.7X
-Native ORC Vectorized with copy               2588 / 2592          4.1         246.8       1.6X
 Hive built-in ORC                             5514 / 5562          1.9         525.9       0.8X
 
 
@@ -81,15 +74,12 @@ Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Data column - Native ORC MR                   1863 / 1867          8.4         118.4       1.0X
 Data column - Native ORC Vectorized            411 /  418         38.2          26.2       4.5X
-Data column - Native ORC Vectorized with copy       417 /  422         37.8          26.5       4.5X
 Data column - Hive built-in ORC               3297 / 3308          4.8         209.6       0.6X
 Partition column - Native ORC MR              1505 / 1506         10.4          95.7       1.2X
 Partition column - Native ORC Vectorized        80 /   93        195.6           5.1      23.2X
-Partition column - Native ORC Vectorized with copy        78 /   86        201.4           5.0      23.9X
 Partition column - Hive built-in ORC          1960 / 1979          8.0         124.6       1.0X
 Both columns - Native ORC MR                  2076 / 2090          7.6         132.0       0.9X
 Both columns - Native ORC Vectorized           450 /  463         34.9          28.6       4.1X
-Both column - Native ORC Vectorized with copy       532 /  538         29.6          33.8       3.5X
 Both columns - Hive built-in ORC              3528 / 3548          4.5         224.3       0.5X
 
 
@@ -103,7 +93,6 @@ Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1727 / 1733          6.1         164.7       1.0X
 Native ORC Vectorized                          375 /  379         28.0          35.7       4.6X
-Native ORC Vectorized with copy                552 /  556         19.0          52.6       3.1X
 Hive built-in ORC                             2665 / 2666          3.9         254.2       0.6X
 
 
@@ -117,7 +106,6 @@ String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 3324 / 3325          3.2         317.0       1.0X
 Native ORC Vectorized                         1085 / 1106          9.7         103.4       3.1X
-Native ORC Vectorized with copy               1463 / 1471          7.2         139.5       2.3X
 Hive built-in ORC                             5272 / 5299          2.0         502.8       0.6X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -126,7 +114,6 @@ String with Nulls Scan (50.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 3045 / 3046          3.4         290.4       1.0X
 Native ORC Vectorized                         1248 / 1260          8.4         119.0       2.4X
-Native ORC Vectorized with copy               1609 / 1624          6.5         153.5       1.9X
 Hive built-in ORC                             3989 / 3999          2.6         380.4       0.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -135,7 +122,6 @@ String with Nulls Scan (95.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1692 / 1694          6.2         161.3       1.0X
 Native ORC Vectorized                          471 /  493         22.3          44.9       3.6X
-Native ORC Vectorized with copy                588 /  590         17.8          56.1       2.9X
 Hive built-in ORC                             2398 / 2411          4.4         228.7       0.7X
 
 
@@ -149,7 +135,6 @@ Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 1371 / 1379          0.8        1307.5       1.0X
 Native ORC Vectorized                          121 /  135          8.6         115.8      11.3X
-Native ORC Vectorized with copy                122 /  138          8.6         116.2      11.3X
 Hive built-in ORC                              521 /  561          2.0         497.1       2.6X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -158,7 +143,6 @@ Single Column Scan from 200 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 2711 / 2767          0.4        2585.5       1.0X
 Native ORC Vectorized                          210 /  232          5.0         200.5      12.9X
-Native ORC Vectorized with copy                208 /  219          5.0         198.4      13.0X
 Hive built-in ORC                              764 /  775          1.4         728.3       3.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
@@ -167,7 +151,6 @@ Single Column Scan from 300 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Native ORC MR                                 3979 / 3988          0.3        3794.4       1.0X
 Native ORC Vectorized                          357 /  366          2.9         340.2      11.2X
-Native ORC Vectorized with copy                361 /  371          2.9         344.5      11.0X
 Hive built-in ORC                             1091 / 1095          1.0        1040.5       3.6X
 
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
index eb3cde8472da..c03ae144a159 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -96,12 +96,6 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
         }
 
-        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
-          }
-        }
-
         benchmark.addCase("Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(id) FROM hiveOrcTable").collect()
         }
@@ -133,12 +127,6 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").collect()
         }
 
-        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").collect()
-          }
-        }
-
         benchmark.addCase("Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").collect()
         }
@@ -168,12 +156,6 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
         }
 
-        benchmark.addCase("Data column - Native ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
-          }
-        }
-
         benchmark.addCase("Data column - Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(id) FROM hiveOrcTable").collect()
         }
@@ -188,12 +170,6 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(p) FROM nativeOrcTable").collect()
         }
 
-        benchmark.addCase("Partition column - Native ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(p) FROM nativeOrcTable").collect()
-          }
-        }
-
         benchmark.addCase("Partition column - Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(p) FROM hiveOrcTable").collect()
         }
@@ -208,12 +184,6 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").collect()
         }
 
-        benchmark.addCase("Both column - Native ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").collect()
-          }
-        }
-
         benchmark.addCase("Both columns - Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").collect()
         }
@@ -242,12 +212,6 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").collect()
         }
 
-        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").collect()
-          }
-        }
-
         benchmark.addCase("Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").collect()
         }
@@ -284,13 +248,6 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
             "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
         }
 
-        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql("SELECT SUM(LENGTH(c2)) FROM nativeOrcTable " +
-              "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
-          }
-        }
-
         benchmark.addCase("Hive built-in ORC") { _ =>
           spark.sql("SELECT SUM(LENGTH(c2)) FROM hiveOrcTable " +
             "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
@@ -324,12 +281,6 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").collect()
         }
 
-        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
-          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
-            spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").collect()
-          }
-        }
-
         benchmark.addCase("Hive built-in ORC") { _ =>
           spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").collect()
         }

From 98e831d32115aeec4ded235bf42978f407e376da Mon Sep 17 00:00:00 2001
From: Yuanjian Li <xyliyuanjian@gmail.com>
Date: Fri, 11 Jan 2019 14:28:37 +0800
Subject: [PATCH 0272/1072] [SPARK-25921][FOLLOW UP][PYSPARK] Fix barrier task
 run without BarrierTaskContext while python worker reuse

## What changes were proposed in this pull request?

It's the follow-up PR for #22962, contains the following works:
- Remove `__init__` in TaskContext and BarrierTaskContext.
- Add more comments to explain the fix.
- Rewrite UT in a new class.

## How was this patch tested?

New UT in test_taskcontext.py

Closes #23435 from xuanyuanking/SPARK-25921-follow.

Authored-by: Yuanjian Li <xyliyuanjian@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/taskcontext.py            | 14 +++---
 python/pyspark/tests/test_taskcontext.py | 57 +++++++++++++++++-------
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py
index 98b505c9046b..de4b6af23666 100644
--- a/python/pyspark/taskcontext.py
+++ b/python/pyspark/taskcontext.py
@@ -48,10 +48,6 @@ def __new__(cls):
         cls._taskContext = taskContext = object.__new__(cls)
         return taskContext
 
-    def __init__(self):
-        """Construct a TaskContext, use get instead"""
-        pass
-
     @classmethod
     def _getOrCreate(cls):
         """Internal function to get or create global TaskContext."""
@@ -140,13 +136,13 @@ class BarrierTaskContext(TaskContext):
     _port = None
     _secret = None
 
-    def __init__(self):
-        """Construct a BarrierTaskContext, use get instead"""
-        pass
-
     @classmethod
     def _getOrCreate(cls):
-        """Internal function to get or create global BarrierTaskContext."""
+        """
+        Internal function to get or create global BarrierTaskContext. We need to make sure
+        BarrierTaskContext is returned from here because it is needed in python worker reuse
+        scenario, see SPARK-25921 for more details.
+        """
         if not isinstance(cls._taskContext, BarrierTaskContext):
             cls._taskContext = object.__new__(cls)
         return cls._taskContext
diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py
index b3a967440a9b..fdb5c40b78a4 100644
--- a/python/pyspark/tests/test_taskcontext.py
+++ b/python/pyspark/tests/test_taskcontext.py
@@ -14,11 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
 import random
 import sys
 import time
+import unittest
 
-from pyspark import SparkContext, TaskContext, BarrierTaskContext
+from pyspark import SparkConf, SparkContext, TaskContext, BarrierTaskContext
 from pyspark.testing.utils import PySparkTestCase
 
 
@@ -118,21 +120,6 @@ def context_barrier(x):
         times = rdd.barrier().mapPartitions(f).map(context_barrier).collect()
         self.assertTrue(max(times) - min(times) < 1)
 
-    def test_barrier_with_python_worker_reuse(self):
-        """
-        Verify that BarrierTaskContext.barrier() with reused python worker.
-        """
-        self.sc._conf.set("spark.python.work.reuse", "true")
-        rdd = self.sc.parallelize(range(4), 4)
-        # start a normal job first to start all worker
-        result = rdd.map(lambda x: x ** 2).collect()
-        self.assertEqual([0, 1, 4, 9], result)
-        # make sure `spark.python.work.reuse=true`
-        self.assertEqual(self.sc._conf.get("spark.python.work.reuse"), "true")
-
-        # worker will be reused in this barrier job
-        self.test_barrier()
-
     def test_barrier_infos(self):
         """
         Verify that BarrierTaskContext.getTaskInfos() returns a list of all task infos in the
@@ -149,6 +136,44 @@ def f(iterator):
         self.assertTrue(len(taskInfos[0]) == 4)
 
 
+class TaskContextTestsWithWorkerReuse(unittest.TestCase):
+
+    def setUp(self):
+        class_name = self.__class__.__name__
+        conf = SparkConf().set("spark.python.worker.reuse", "true")
+        self.sc = SparkContext('local[2]', class_name, conf=conf)
+
+    def test_barrier_with_python_worker_reuse(self):
+        """
+        Regression test for SPARK-25921: verify that BarrierTaskContext.barrier() with
+        reused python worker.
+        """
+        # start a normal job first to start all workers and get all worker pids
+        worker_pids = self.sc.parallelize(range(2), 2).map(lambda x: os.getpid()).collect()
+        # the worker will reuse in this barrier job
+        rdd = self.sc.parallelize(range(10), 2)
+
+        def f(iterator):
+            yield sum(iterator)
+
+        def context_barrier(x):
+            tc = BarrierTaskContext.get()
+            time.sleep(random.randint(1, 10))
+            tc.barrier()
+            return (time.time(), os.getpid())
+
+        result = rdd.barrier().mapPartitions(f).map(context_barrier).collect()
+        times = list(map(lambda x: x[0], result))
+        pids = list(map(lambda x: x[1], result))
+        # check both barrier and worker reuse effect
+        self.assertTrue(max(times) - min(times) < 1)
+        for pid in pids:
+            self.assertTrue(pid in worker_pids)
+
+    def tearDown(self):
+        self.sc.stop()
+
+
 if __name__ == "__main__":
     import unittest
     from pyspark.tests.test_taskcontext import *

From 1f1d98c6facd556b70f457184231b5af78de8d53 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 11 Jan 2019 14:52:13 +0800
Subject: [PATCH 0273/1072] [SPARK-26580][SQL] remove Scala 2.11 hack for Scala
 UDF

## What changes were proposed in this pull request?

In https://github.com/apache/spark/pull/22732 , we tried our best to keep the behavior of Scala UDF unchanged in Spark 2.4.

However, since Spark 3.0, Scala 2.12 is the default. The trick that was used to keep the behavior unchanged doesn't work with Scala 2.12.

This PR proposes to remove the Scala 2.11 hack, as it's not useful.

## How was this patch tested?

existing tests.

Closes #23498 from cloud-fan/udf.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md             |  2 ++
 .../spark/sql/catalyst/ScalaReflection.scala    | 17 -----------------
 .../sql/catalyst/expressions/ScalaUDF.scala     | 12 ------------
 .../sql/expressions/UserDefinedFunction.scala   | 12 +-----------
 .../scala/org/apache/spark/sql/functions.scala  |  7 +++++++
 .../scala/org/apache/spark/sql/UDFSuite.scala   | 13 +++++++++++++
 6 files changed, 23 insertions(+), 40 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 4e36fd45fb04..a2d782e782ae 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -43,6 +43,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
 
+  - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index c8542d0f2f7d..1b068355b908 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -957,23 +957,6 @@ trait ScalaReflection extends Logging {
     tpe.dealias.erasure.typeSymbol.asClass.fullName
   }
 
-  /**
-   * Returns the nullability of the input parameter types of the scala function object.
-   *
-   * Note that this only works with Scala 2.11, and the information returned may be inaccurate if
-   * used with a different Scala version.
-   */
-  def getParameterTypeNullability(func: AnyRef): Seq[Boolean] = {
-    if (!Properties.versionString.contains("2.11")) {
-      logWarning(s"Scala ${Properties.versionString} cannot get type nullability correctly via " +
-        "reflection, thus Spark cannot add proper input null check for UDF. To avoid this " +
-        "problem, use the typed UDF interfaces instead.")
-    }
-    val methods = func.getClass.getMethods.filter(m => m.getName == "apply" && !m.isBridge)
-    assert(methods.length == 1)
-    methods.head.getParameterTypes.map(!_.isPrimitive)
-  }
-
   /**
    * Returns the parameter names and types for the primary constructor of this type.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index fae1119c394b..c9e0a2e6a2e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -54,18 +54,6 @@ case class ScalaUDF(
     udfDeterministic: Boolean = true)
   extends Expression with NonSQLExpression with UserDefinedExpression {
 
-  // The constructor for SPARK 2.1 and 2.2
-  def this(
-      function: AnyRef,
-      dataType: DataType,
-      children: Seq[Expression],
-      inputTypes: Seq[DataType],
-      udfName: Option[String]) = {
-    this(
-      function, dataType, children, ScalaReflection.getParameterTypeNullability(function),
-      inputTypes, udfName, nullable = true, udfDeterministic = true)
-  }
-
   override lazy val deterministic: Boolean = udfDeterministic && children.forall(_.deterministic)
 
   override def toString: String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 1b2d6c7ffb52..4d8e1c5fa42a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -102,17 +102,7 @@ private[sql] case class SparkUserDefinedFunction(
     // It's possible that some of the inputs don't have a specific type(e.g. `Any`),  skip type
     // check and null check for them.
     val inputTypes = inputSchemas.map(_.map(_.dataType).getOrElse(AnyDataType))
-
-    val inputsNullSafe = if (inputSchemas.isEmpty) {
-      // This is for backward compatibility of `functions.udf(AnyRef, DataType)`. We need to
-      // do reflection of the lambda function object and see if its arguments are nullable or not.
-      // This doesn't work for Scala 2.12 and we should consider removing this workaround, as Spark
-      // uses Scala 2.12 by default since 3.0.
-      ScalaReflection.getParameterTypeNullability(f)
-    } else {
-      inputSchemas.map(_.map(_.nullable).getOrElse(true))
-    }
-
+    val inputsNullSafe = inputSchemas.map(_.map(_.nullable).getOrElse(true))
     ScalaUDF(
       f,
       dataType,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 7572cf23cde8..1199cd8df6a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4250,6 +4250,13 @@ object functions {
    * By default the returned UDF is deterministic. To change it to nondeterministic, call the
    * API `UserDefinedFunction.asNondeterministic()`.
    *
+   * Note that, although the Scala closure can have primitive-type function argument, it doesn't
+   * work well with null values. Because the Scala closure is passed in as Any type, there is no
+   * type information for the function arguments. Without the type information, Spark may blindly
+   * pass null to the Scala closure with primitive-type argument, and the closure will see the
+   * default value of the Java type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`,
+   * the result is 0 for null input.
+   *
    * @param f  A closure in Scala
    * @param dataType  The output data type of the UDF
    *
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 06b9343c3758..5ac2093799b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -423,6 +423,19 @@ class UDFSuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("SPARK-25044 Verify null input handling for primitive types - with udf(Any, DataType)") {
+    val f = udf((x: Int) => x, IntegerType)
+    checkAnswer(
+      Seq(new Integer(1), null).toDF("x").select(f($"x")),
+      Row(1) :: Row(0) :: Nil)
+
+    val f2 = udf((x: Double) => x, DoubleType)
+    checkAnswer(
+      Seq(new java.lang.Double(1.1), null).toDF("x").select(f2($"x")),
+      Row(1.1) :: Row(0.0) :: Nil)
+
+  }
+
   test("SPARK-26308: udf with decimal") {
     val df1 = spark.createDataFrame(
       sparkContext.parallelize(Seq(Row(new BigDecimal("2011000000000002456556")))),

From 51a6ba0181a013f2b62b47184785a8b6f6a78f12 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 11 Jan 2019 08:53:12 -0600
Subject: [PATCH 0274/1072] [SPARK-26503][CORE] Get rid of
 spark.sql.legacy.timeParser.enabled

## What changes were proposed in this pull request?

Per discussion in #23391 (comment) this proposes to just remove the old pre-Spark-3 time parsing behavior.

This is a rebase of https://github.com/apache/spark/pull/23411

## How was this patch tested?

Existing tests.

Closes #23495 from srowen/SPARK-26503.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/catalyst/util/DateFormatter.scala     |  43 +---
 .../catalyst/util/TimestampFormatter.scala    |  37 +---
 .../apache/spark/sql/internal/SQLConf.scala   |   9 -
 .../catalyst/json/JsonInferSchemaSuite.scala  |  77 +++-----
 .../datasources/json/JsonSuite.scala          | 183 ++++--------------
 .../sql/sources/HadoopFsRelationTest.scala    | 107 +++++-----
 6 files changed, 120 insertions(+), 336 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index db9255281488..c47b08729c4e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -20,12 +20,6 @@ package org.apache.spark.sql.catalyst.util
 import java.time.{Instant, ZoneId}
 import java.util.Locale
 
-import scala.util.Try
-
-import org.apache.commons.lang3.time.FastDateFormat
-
-import org.apache.spark.sql.internal.SQLConf
-
 sealed trait DateFormatter extends Serializable {
   def parse(s: String): Int // returns days since epoch
   def format(days: Int): String
@@ -56,43 +50,8 @@ class Iso8601DateFormatter(
   }
 }
 
-class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter {
-  @transient
-  private lazy val format = FastDateFormat.getInstance(pattern, locale)
-
-  override def parse(s: String): Int = {
-    val milliseconds = format.parse(s).getTime
-    DateTimeUtils.millisToDays(milliseconds)
-  }
-
-  override def format(days: Int): String = {
-    val date = DateTimeUtils.toJavaDate(days)
-    format.format(date)
-  }
-}
-
-class LegacyFallbackDateFormatter(
-    pattern: String,
-    locale: Locale) extends LegacyDateFormatter(pattern, locale) {
-  override def parse(s: String): Int = {
-    Try(super.parse(s)).orElse {
-      // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
-      // compatibility.
-      Try(DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(s).getTime))
-    }.getOrElse {
-      // In Spark 1.5.0, we store the data as number of days since epoch in string.
-      // So, we just convert it to Int.
-      s.toInt
-    }
-  }
-}
-
 object DateFormatter {
   def apply(format: String, locale: Locale): DateFormatter = {
-    if (SQLConf.get.legacyTimeParserEnabled) {
-      new LegacyFallbackDateFormatter(format, locale)
-    } else {
-      new Iso8601DateFormatter(format, locale)
-    }
+    new Iso8601DateFormatter(format, locale)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 8042099e5a92..10c73b2f9955 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -23,12 +23,6 @@ import java.time.format.DateTimeParseException
 import java.time.temporal.TemporalQueries
 import java.util.{Locale, TimeZone}
 
-import scala.util.Try
-
-import org.apache.commons.lang3.time.FastDateFormat
-
-import org.apache.spark.sql.internal.SQLConf
-
 sealed trait TimestampFormatter extends Serializable {
   /**
    * Parses a timestamp in a string and converts it to microseconds.
@@ -79,37 +73,8 @@ class Iso8601TimestampFormatter(
   }
 }
 
-class LegacyTimestampFormatter(
-    pattern: String,
-    timeZone: TimeZone,
-    locale: Locale) extends TimestampFormatter {
-  @transient
-  private lazy val format = FastDateFormat.getInstance(pattern, timeZone, locale)
-
-  protected def toMillis(s: String): Long = format.parse(s).getTime
-
-  override def parse(s: String): Long = toMillis(s) * DateTimeUtils.MICROS_PER_MILLIS
-
-  override def format(us: Long): String = {
-    format.format(DateTimeUtils.toJavaTimestamp(us))
-  }
-}
-
-class LegacyFallbackTimestampFormatter(
-    pattern: String,
-    timeZone: TimeZone,
-    locale: Locale) extends LegacyTimestampFormatter(pattern, timeZone, locale) {
-  override def toMillis(s: String): Long = {
-    Try {super.toMillis(s)}.getOrElse(DateTimeUtils.stringToTime(s).getTime)
-  }
-}
-
 object TimestampFormatter {
   def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = {
-    if (SQLConf.get.legacyTimeParserEnabled) {
-      new LegacyFallbackTimestampFormatter(format, timeZone, locale)
-    } else {
-      new Iso8601TimestampFormatter(format, timeZone, locale)
-    }
+    new Iso8601TimestampFormatter(format, timeZone, locale)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 9804af7dff17..c1b885a72ad3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1625,13 +1625,6 @@ object SQLConf {
         "a SparkConf entry.")
       .booleanConf
       .createWithDefault(true)
-
-  val LEGACY_TIME_PARSER_ENABLED = buildConf("spark.sql.legacy.timeParser.enabled")
-    .doc("When set to true, java.text.SimpleDateFormat is used for formatting and parsing " +
-      " dates/timestamps in a locale-sensitive manner. When set to false, classes from " +
-      "java.time.* packages are used for the same purpose.")
-    .booleanConf
-    .createWithDefault(false)
 }
 
 /**
@@ -2057,8 +2050,6 @@ class SQLConf extends Serializable with Logging {
   def setCommandRejectsSparkCoreConfs: Boolean =
     getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)
 
-  def legacyTimeParserEnabled: Boolean = getConf(SQLConf.LEGACY_TIME_PARSER_ENABLED)
-
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
index 9a6f4f5f9b0c..8ce45f06ba65 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
@@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.JsonFactory
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
@@ -43,61 +42,45 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("inferring timestamp type") {
-    Seq(true, false).foreach { legacyParser =>
-      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
-        checkTimestampType("yyyy", """{"a": "2018"}""")
-        checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
-        checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
-        checkTimestampType(
-          "yyyy-MM-dd'T'HH:mm:ss.SSS",
-          """{"a": "2018-12-02T21:04:00.123"}""")
-        checkTimestampType(
-          "yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
-          """{"a": "2018-12-02T21:04:00.123567+01:00"}""")
-      }
-    }
+    checkTimestampType("yyyy", """{"a": "2018"}""")
+    checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
+    checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
+    checkTimestampType(
+      "yyyy-MM-dd'T'HH:mm:ss.SSS",
+      """{"a": "2018-12-02T21:04:00.123"}""")
+    checkTimestampType(
+      "yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
+      """{"a": "2018-12-02T21:04:00.123567+01:00"}""")
   }
 
   test("prefer decimals over timestamps") {
-    Seq(true, false).foreach { legacyParser =>
-      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
-        checkType(
-          options = Map(
-            "prefersDecimal" -> "true",
-            "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
-          ),
-          json = """{"a": "20181202.210400123"}""",
-          dt = DecimalType(17, 9)
-        )
-      }
-    }
+    checkType(
+      options = Map(
+        "prefersDecimal" -> "true",
+        "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
+      ),
+      json = """{"a": "20181202.210400123"}""",
+      dt = DecimalType(17, 9)
+    )
   }
 
   test("skip decimal type inferring") {
-    Seq(true, false).foreach { legacyParser =>
-      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
-        checkType(
-          options = Map(
-            "prefersDecimal" -> "false",
-            "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
-          ),
-          json = """{"a": "20181202.210400123"}""",
-          dt = TimestampType
-        )
-      }
-    }
+    checkType(
+      options = Map(
+        "prefersDecimal" -> "false",
+        "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
+      ),
+      json = """{"a": "20181202.210400123"}""",
+      dt = TimestampType
+    )
   }
 
   test("fallback to string type") {
-    Seq(true, false).foreach { legacyParser =>
-      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
-        checkType(
-          options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
-          json = """{"a": "20181202.210400123"}""",
-          dt = StringType
-        )
-      }
-    }
+    checkType(
+      options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
+      json = """{"a": "20181202.210400123"}""",
+      dt = StringType
+    )
   }
 
   test("disable timestamp inferring") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 8f575a371c98..78debb573111 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1451,109 +1451,6 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     })
   }
 
-  test("backward compatibility") {
-    withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> "true") {
-      // This test we make sure our JSON support can read JSON data generated by previous version
-      // of Spark generated through toJSON method and JSON data source.
-      // The data is generated by the following program.
-      // Here are a few notes:
-      //  - Spark 1.5.0 cannot save timestamp data. So, we manually added timestamp field (col13)
-      //      in the JSON object.
-      //  - For Spark before 1.5.1, we do not generate UDTs. So, we manually added the UDT value to
-      //      JSON objects generated by those Spark versions (col17).
-      //  - If the type is NullType, we do not write data out.
-
-      // Create the schema.
-      val struct =
-        StructType(
-          StructField("f1", FloatType, true) ::
-            StructField("f2", ArrayType(BooleanType), true) :: Nil)
-
-      val dataTypes =
-        Seq(
-          StringType, BinaryType, NullType, BooleanType,
-          ByteType, ShortType, IntegerType, LongType,
-          FloatType, DoubleType, DecimalType(25, 5), DecimalType(6, 5),
-          DateType, TimestampType,
-          ArrayType(IntegerType), MapType(StringType, LongType), struct,
-          new TestUDT.MyDenseVectorUDT())
-      val fields = dataTypes.zipWithIndex.map { case (dataType, index) =>
-        StructField(s"col$index", dataType, nullable = true)
-      }
-      val schema = StructType(fields)
-
-      val constantValues =
-        Seq(
-          "a string in binary".getBytes(StandardCharsets.UTF_8),
-          null,
-          true,
-          1.toByte,
-          2.toShort,
-          3,
-          Long.MaxValue,
-          0.25.toFloat,
-          0.75,
-          new java.math.BigDecimal(s"1234.23456"),
-          new java.math.BigDecimal(s"1.23456"),
-          java.sql.Date.valueOf("2015-01-01"),
-          java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123"),
-          Seq(2, 3, 4),
-          Map("a string" -> 2000L),
-          Row(4.75.toFloat, Seq(false, true)),
-          new TestUDT.MyDenseVector(Array(0.25, 2.25, 4.25)))
-      val data =
-        Row.fromSeq(Seq("Spark " + spark.sparkContext.version) ++ constantValues) :: Nil
-
-      // Data generated by previous versions.
-      // scalastyle:off
-      val existingJSONData =
-      """{"col0":"Spark 1.2.2","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-        """{"col0":"Spark 1.3.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-        """{"col0":"Spark 1.3.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-        """{"col0":"Spark 1.4.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-        """{"col0":"Spark 1.4.1","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-        """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"2015-01-01","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" ::
-        """{"col0":"Spark 1.5.0","col1":"YSBzdHJpbmcgaW4gYmluYXJ5","col3":true,"col4":1,"col5":2,"col6":3,"col7":9223372036854775807,"col8":0.25,"col9":0.75,"col10":1234.23456,"col11":1.23456,"col12":"16436","col13":"2015-01-01 23:50:59.123","col14":[2,3,4],"col15":{"a string":2000},"col16":{"f1":4.75,"f2":[false,true]},"col17":[0.25,2.25,4.25]}""" :: Nil
-      // scalastyle:on
-
-      // Generate data for the current version.
-      val df = spark.createDataFrame(spark.sparkContext.parallelize(data, 1), schema)
-      withTempPath { path =>
-        df.write.format("json").mode("overwrite").save(path.getCanonicalPath)
-
-        // df.toJSON will convert internal rows to external rows first and then generate
-        // JSON objects. While, df.write.format("json") will write internal rows directly.
-        val allJSON =
-        existingJSONData ++
-          df.toJSON.collect() ++
-          sparkContext.textFile(path.getCanonicalPath).collect()
-
-        Utils.deleteRecursively(path)
-        sparkContext.parallelize(allJSON, 1).saveAsTextFile(path.getCanonicalPath)
-
-        // Read data back with the schema specified.
-        val col0Values =
-          Seq(
-            "Spark 1.2.2",
-            "Spark 1.3.1",
-            "Spark 1.3.1",
-            "Spark 1.4.1",
-            "Spark 1.4.1",
-            "Spark 1.5.0",
-            "Spark 1.5.0",
-            "Spark " + spark.sparkContext.version,
-            "Spark " + spark.sparkContext.version)
-        val expectedResult = col0Values.map { v =>
-          Row.fromSeq(Seq(v) ++ constantValues)
-        }
-        checkAnswer(
-          spark.read.format("json").schema(schema).load(path.getCanonicalPath),
-          expectedResult
-        )
-      }
-    }
-  }
-
   test("SPARK-11544 test pathfilter") {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
@@ -2592,53 +2489,45 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
   }
 
   test("inferring timestamp type") {
-    Seq(true, false).foreach { legacyParser =>
-      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
-        def schemaOf(jsons: String*): StructType = spark.read.json(jsons.toDS).schema
-
-        assert(schemaOf(
-          """{"a":"2018-12-17T10:11:12.123-01:00"}""",
-          """{"a":"2018-12-16T22:23:24.123-02:00"}""") === fromDDL("a timestamp"))
-
-        assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":1}""")
-          === fromDDL("a string"))
-        assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":"123"}""")
-          === fromDDL("a string"))
-
-        assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":null}""")
-          === fromDDL("a timestamp"))
-        assert(schemaOf("""{"a":null}""", """{"a":"2018-12-17T10:11:12.123-01:00"}""")
-          === fromDDL("a timestamp"))
-      }
-    }
+    def schemaOf(jsons: String*): StructType = spark.read.json(jsons.toDS).schema
+
+    assert(schemaOf(
+      """{"a":"2018-12-17T10:11:12.123-01:00"}""",
+      """{"a":"2018-12-16T22:23:24.123-02:00"}""") === fromDDL("a timestamp"))
+
+    assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":1}""")
+      === fromDDL("a string"))
+    assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":"123"}""")
+      === fromDDL("a string"))
+
+    assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":null}""")
+      === fromDDL("a timestamp"))
+    assert(schemaOf("""{"a":null}""", """{"a":"2018-12-17T10:11:12.123-01:00"}""")
+      === fromDDL("a timestamp"))
   }
 
   test("roundtrip for timestamp type inferring") {
-    Seq(true, false).foreach { legacyParser =>
-      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
-        val customSchema = new StructType().add("date", TimestampType)
-        withTempDir { dir =>
-          val timestampsWithFormatPath = s"${dir.getCanonicalPath}/timestampsWithFormat.json"
-          val timestampsWithFormat = spark.read
-            .option("timestampFormat", "dd/MM/yyyy HH:mm")
-            .json(datesRecords)
-          assert(timestampsWithFormat.schema === customSchema)
-
-          timestampsWithFormat.write
-            .format("json")
-            .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
-            .option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
-            .save(timestampsWithFormatPath)
-
-          val readBack = spark.read
-            .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
-            .option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
-            .json(timestampsWithFormatPath)
-
-          assert(readBack.schema === customSchema)
-          checkAnswer(readBack, timestampsWithFormat)
-        }
-      }
+    val customSchema = new StructType().add("date", TimestampType)
+    withTempDir { dir =>
+      val timestampsWithFormatPath = s"${dir.getCanonicalPath}/timestampsWithFormat.json"
+      val timestampsWithFormat = spark.read
+        .option("timestampFormat", "dd/MM/yyyy HH:mm")
+        .json(datesRecords)
+      assert(timestampsWithFormat.schema === customSchema)
+
+      timestampsWithFormat.write
+        .format("json")
+        .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
+        .option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
+        .save(timestampsWithFormatPath)
+
+      val readBack = spark.read
+        .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
+        .option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
+        .json(timestampsWithFormatPath)
+
+      assert(readBack.schema === customSchema)
+      checkAnswer(readBack, timestampsWithFormat)
     }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 57b896612bfe..bf6d0ea5788d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.sources
 
 import java.io.File
-import java.util.TimeZone
 
 import scala.util.Random
 
@@ -126,61 +125,59 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
     } else {
       Seq(false)
     }
-    withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> "false") {
-      for (dataType <- supportedDataTypes) {
-        for (parquetDictionaryEncodingEnabled <- parquetDictionaryEncodingEnabledConfs) {
-          val extraMessage = if (isParquetDataSource) {
-            s" with parquet.enable.dictionary = $parquetDictionaryEncodingEnabled"
-          } else {
-            ""
-          }
-          logInfo(s"Testing $dataType data type$extraMessage")
-
-          val extraOptions = Map[String, String](
-            "parquet.enable.dictionary" -> parquetDictionaryEncodingEnabled.toString,
-            "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss.SSSXXXXX"
-          )
-
-          withTempPath { file =>
-            val path = file.getCanonicalPath
-
-            val seed = System.nanoTime()
-            withClue(s"Random data generated with the seed: ${seed}") {
-              val dataGenerator = RandomDataGenerator.forType(
-                dataType = dataType,
-                nullable = true,
-                new Random(seed)
-              ).getOrElse {
-                fail(s"Failed to create data generator for schema $dataType")
-              }
-
-              // Create a DF for the schema with random data. The index field is used to sort the
-              // DataFrame.  This is a workaround for SPARK-10591.
-              val schema = new StructType()
-                .add("index", IntegerType, nullable = false)
-                .add("col", dataType, nullable = true)
-              val rdd =
-                spark.sparkContext.parallelize((1 to 10).map(i => Row(i, dataGenerator())))
-              val df = spark.createDataFrame(rdd, schema).orderBy("index").coalesce(1)
-
-              df.write
-                .mode("overwrite")
-                .format(dataSourceName)
-                .option("dataSchema", df.schema.json)
-                .options(extraOptions)
-                .save(path)
-
-              val loadedDF = spark
-                .read
-                .format(dataSourceName)
-                .option("dataSchema", df.schema.json)
-                .schema(df.schema)
-                .options(extraOptions)
-                .load(path)
-                .orderBy("index")
-
-              checkAnswer(loadedDF, df)
+    for (dataType <- supportedDataTypes) {
+      for (parquetDictionaryEncodingEnabled <- parquetDictionaryEncodingEnabledConfs) {
+        val extraMessage = if (isParquetDataSource) {
+          s" with parquet.enable.dictionary = $parquetDictionaryEncodingEnabled"
+        } else {
+          ""
+        }
+        logInfo(s"Testing $dataType data type$extraMessage")
+
+        val extraOptions = Map[String, String](
+          "parquet.enable.dictionary" -> parquetDictionaryEncodingEnabled.toString,
+          "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss.SSSXXXXX"
+        )
+
+        withTempPath { file =>
+          val path = file.getCanonicalPath
+
+          val seed = System.nanoTime()
+          withClue(s"Random data generated with the seed: ${seed}") {
+            val dataGenerator = RandomDataGenerator.forType(
+              dataType = dataType,
+              nullable = true,
+              new Random(seed)
+            ).getOrElse {
+              fail(s"Failed to create data generator for schema $dataType")
             }
+
+            // Create a DF for the schema with random data. The index field is used to sort the
+            // DataFrame.  This is a workaround for SPARK-10591.
+            val schema = new StructType()
+              .add("index", IntegerType, nullable = false)
+              .add("col", dataType, nullable = true)
+            val rdd =
+              spark.sparkContext.parallelize((1 to 10).map(i => Row(i, dataGenerator())))
+            val df = spark.createDataFrame(rdd, schema).orderBy("index").coalesce(1)
+
+            df.write
+              .mode("overwrite")
+              .format(dataSourceName)
+              .option("dataSchema", df.schema.json)
+              .options(extraOptions)
+              .save(path)
+
+            val loadedDF = spark
+              .read
+              .format(dataSourceName)
+              .option("dataSchema", df.schema.json)
+              .schema(df.schema)
+              .options(extraOptions)
+              .load(path)
+              .orderBy("index")
+
+            checkAnswer(loadedDF, df)
           }
         }
       }

From d9e4cf67c06b2d6daa4cd24b056e33dfb5eb35f5 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 11 Jan 2019 10:18:07 -0800
Subject: [PATCH 0275/1072] [SPARK-26482][CORE] Use ConfigEntry for hardcoded
 configs for ui categories

## What changes were proposed in this pull request?

The PR makes hardcoded configs below to use `ConfigEntry`.

* spark.ui
* spark.ssl
* spark.authenticate
* spark.master.rest
* spark.master.ui
* spark.metrics
* spark.admin
* spark.modify.acl

This patch doesn't change configs which are not relevant to SparkConf (e.g. system properties).

## How was this patch tested?

Existing tests.

Closes #23423 from HeartSaVioR/SPARK-26466.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/SecurityManager.scala    |  54 +++---
 .../scala/org/apache/spark/SparkContext.scala |   5 +-
 .../spark/deploy/LocalSparkCluster.scala      |   4 +-
 .../org/apache/spark/deploy/SparkSubmit.scala |   1 +
 .../deploy/history/FsHistoryProvider.scala    |  47 +++--
 .../spark/deploy/history/HistoryServer.scala  |  10 +-
 .../apache/spark/deploy/master/Master.scala   |  10 +-
 .../spark/deploy/master/MasterArguments.scala |   5 +-
 .../spark/deploy/master/ui/MasterWebUI.scala  |   3 +-
 .../deploy/rest/StandaloneRestServer.scala    |   2 +-
 .../spark/deploy/worker/ExecutorRunner.scala  |   3 +-
 .../apache/spark/deploy/worker/Worker.scala   |   3 +-
 .../spark/internal/config/History.scala       |  12 +-
 .../org/apache/spark/internal/config/UI.scala | 145 ++++++++++++++++
 .../spark/internal/config/package.scala       |  50 +++---
 .../apache/spark/metrics/MetricsConfig.scala  |   3 +-
 .../apache/spark/ui/ConsoleProgressBar.scala  |   4 +-
 .../apache/spark/ui/HttpSecurityFilter.scala  |   2 +-
 .../org/apache/spark/ui/JettyUtils.scala      |   4 +-
 .../scala/org/apache/spark/ui/SparkUI.scala   |   6 +-
 .../apache/spark/ui/exec/ExecutorsTab.scala   |   3 +-
 .../org/apache/spark/ui/jobs/StagePage.scala  |   3 +-
 .../scala/org/apache/spark/util/Utils.scala   |   4 +-
 .../org/apache/spark/CheckpointSuite.scala    |   3 +-
 .../apache/spark/SecurityManagerSuite.scala   | 160 +++++++++---------
 .../org/apache/spark/SparkContextSuite.scala  |   5 +-
 .../spark/deploy/SparkSubmitSuite.scala       |  13 +-
 .../history/FsHistoryProviderSuite.scala      |  23 +--
 .../deploy/history/HistoryServerSuite.scala   |   7 +-
 .../spark/deploy/master/MasterSuite.scala     |  10 +-
 .../apache/spark/executor/ExecutorSuite.scala |   3 +-
 .../spark/launcher/LauncherBackendSuite.scala |   3 +-
 .../spark/metrics/MetricsConfigSuite.scala    |   9 +-
 .../spark/metrics/MetricsSystemSuite.scala    |   2 +-
 .../NettyBlockTransferSecuritySuite.scala     |  27 +--
 .../org/apache/spark/rpc/RpcEnvSuite.scala    |  29 ++--
 .../spark/scheduler/TaskContextSuite.scala    |   3 +-
 .../BlockManagerReplicationSuite.scala        |   4 +-
 .../spark/ui/HttpSecurityFilterSuite.scala    |  10 +-
 .../org/apache/spark/ui/UISeleniumSuite.scala |  13 +-
 .../scala/org/apache/spark/ui/UISuite.scala   |   3 +-
 .../k8s/features/BasicDriverFeatureStep.scala |   1 +
 .../BasicDriverFeatureStepSuite.scala         |   3 +-
 .../KubernetesTestComponents.scala            |   5 +-
 .../spark/deploy/yarn/ApplicationMaster.scala |   5 +-
 .../cluster/YarnSchedulerBackend.scala        |   5 +-
 .../spark/deploy/yarn/YarnClusterSuite.scala  |   4 +-
 .../yarn/YarnSparkHadoopUtilSuite.scala       |  10 +-
 .../spark/sql/SparkSessionBuilderSuite.scala  |   3 +-
 .../execution/ExchangeCoordinatorSuite.scala  |   3 +-
 .../benchmark/DataSourceReadBenchmark.scala   |   3 +-
 .../benchmark/FilterPushdownBenchmark.scala   |   3 +-
 .../hive/thriftserver/HiveThriftServer2.scala |   5 +-
 .../apache/spark/sql/hive/test/TestHive.scala |   3 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |  23 +--
 .../hive/execution/ConcurrentHiveSuite.scala  |   3 +-
 .../apache/spark/streaming/Checkpoint.scala   |   3 +-
 .../spark/streaming/StreamingContext.scala    |   3 +-
 .../spark/streaming/ReceiverSuite.scala       |   3 +-
 .../spark/streaming/UISeleniumSuite.scala     |   3 +-
 60 files changed, 496 insertions(+), 305 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/UI.scala

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 15783c952c23..c64fdc02efc7 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -29,6 +29,7 @@ import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.sasl.SecretKeyHolder
 import org.apache.spark.util.Utils
@@ -56,17 +57,13 @@ private[spark] class SecurityManager(
   private val WILDCARD_ACL = "*"
 
   private val authOn = sparkConf.get(NETWORK_AUTH_ENABLED)
-  // keep spark.ui.acls.enable for backwards compatibility with 1.0
-  private var aclsOn =
-    sparkConf.getBoolean("spark.acls.enable", sparkConf.getBoolean("spark.ui.acls.enable", false))
+  private var aclsOn = sparkConf.get(ACLS_ENABLE)
 
   // admin acls should be set before view or modify acls
-  private var adminAcls: Set[String] =
-    stringToSet(sparkConf.get("spark.admin.acls", ""))
+  private var adminAcls: Set[String] = sparkConf.get(ADMIN_ACLS).toSet
 
   // admin group acls should be set before view or modify group acls
-  private var adminAclsGroups : Set[String] =
-    stringToSet(sparkConf.get("spark.admin.acls.groups", ""))
+  private var adminAclsGroups: Set[String] = sparkConf.get(ADMIN_ACLS_GROUPS).toSet
 
   private var viewAcls: Set[String] = _
 
@@ -82,11 +79,11 @@ private[spark] class SecurityManager(
   private val defaultAclUsers = Set[String](System.getProperty("user.name", ""),
     Utils.getCurrentUserName())
 
-  setViewAcls(defaultAclUsers, sparkConf.get("spark.ui.view.acls", ""))
-  setModifyAcls(defaultAclUsers, sparkConf.get("spark.modify.acls", ""))
+  setViewAcls(defaultAclUsers, sparkConf.get(UI_VIEW_ACLS))
+  setModifyAcls(defaultAclUsers, sparkConf.get(MODIFY_ACLS))
 
-  setViewAclsGroups(sparkConf.get("spark.ui.view.acls.groups", ""));
-  setModifyAclsGroups(sparkConf.get("spark.modify.acls.groups", ""));
+  setViewAclsGroups(sparkConf.get(UI_VIEW_ACLS_GROUPS))
+  setModifyAclsGroups(sparkConf.get(MODIFY_ACLS_GROUPS))
 
   private var secretKey: String = _
   logInfo("SecurityManager: authentication " + (if (authOn) "enabled" else "disabled") +
@@ -127,23 +124,16 @@ private[spark] class SecurityManager(
     opts
   }
 
-  /**
-   * Split a comma separated String, filter out any empty items, and return a Set of strings
-   */
-  private def stringToSet(list: String): Set[String] = {
-    list.split(',').map(_.trim).filter(!_.isEmpty).toSet
-  }
-
   /**
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setViewAcls(defaultUsers: Set[String], allowedUsers: String) {
-    viewAcls = (adminAcls ++ defaultUsers ++ stringToSet(allowedUsers))
+  def setViewAcls(defaultUsers: Set[String], allowedUsers: Seq[String]) {
+    viewAcls = adminAcls ++ defaultUsers ++ allowedUsers
     logInfo("Changing view acls to: " + viewAcls.mkString(","))
   }
 
-  def setViewAcls(defaultUser: String, allowedUsers: String) {
+  def setViewAcls(defaultUser: String, allowedUsers: Seq[String]) {
     setViewAcls(Set[String](defaultUser), allowedUsers)
   }
 
@@ -151,8 +141,8 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setViewAclsGroups(allowedUserGroups: String) {
-    viewAclsGroups = (adminAclsGroups ++ stringToSet(allowedUserGroups));
+  def setViewAclsGroups(allowedUserGroups: Seq[String]) {
+    viewAclsGroups = adminAclsGroups ++ allowedUserGroups
     logInfo("Changing view acls groups to: " + viewAclsGroups.mkString(","))
   }
 
@@ -179,8 +169,8 @@ private[spark] class SecurityManager(
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setModifyAcls(defaultUsers: Set[String], allowedUsers: String) {
-    modifyAcls = (adminAcls ++ defaultUsers ++ stringToSet(allowedUsers))
+  def setModifyAcls(defaultUsers: Set[String], allowedUsers: Seq[String]) {
+    modifyAcls = adminAcls ++ defaultUsers ++ allowedUsers
     logInfo("Changing modify acls to: " + modifyAcls.mkString(","))
   }
 
@@ -188,8 +178,8 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setModifyAclsGroups(allowedUserGroups: String) {
-    modifyAclsGroups = (adminAclsGroups ++ stringToSet(allowedUserGroups));
+  def setModifyAclsGroups(allowedUserGroups: Seq[String]) {
+    modifyAclsGroups = adminAclsGroups ++ allowedUserGroups
     logInfo("Changing modify acls groups to: " + modifyAclsGroups.mkString(","))
   }
 
@@ -216,8 +206,8 @@ private[spark] class SecurityManager(
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setAdminAcls(adminUsers: String) {
-    adminAcls = stringToSet(adminUsers)
+  def setAdminAcls(adminUsers: Seq[String]) {
+    adminAcls = adminUsers.toSet
     logInfo("Changing admin acls to: " + adminAcls.mkString(","))
   }
 
@@ -225,8 +215,8 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setAdminAclsGroups(adminUserGroups: String) {
-    adminAclsGroups = stringToSet(adminUserGroups)
+  def setAdminAclsGroups(adminUserGroups: Seq[String]) {
+    adminAclsGroups = adminUserGroups.toSet
     logInfo("Changing admin acls groups to: " + adminAclsGroups.mkString(","))
   }
 
@@ -416,7 +406,7 @@ private[spark] object SecurityManager {
 
   val k8sRegex = "k8s.*".r
   val SPARK_AUTH_CONF = NETWORK_AUTH_ENABLED.key
-  val SPARK_AUTH_SECRET_CONF = "spark.authenticate.secret"
+  val SPARK_AUTH_SECRET_CONF = AUTH_SECRET.key
   // This is used to set auth secret to an executor's env variable. It should have the same
   // value as SPARK_AUTH_SECRET_CONF set in SparkConf
   val ENV_AUTH_SECRET = "_SPARK_AUTH_SECRET"
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 3a1e1b931002..3bbf9f3606f4 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -46,6 +46,7 @@ import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream,
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd._
@@ -440,7 +441,7 @@ class SparkContext(config: SparkConf) extends Logging {
       }
 
     _ui =
-      if (conf.getBoolean("spark.ui.enabled", true)) {
+      if (conf.get(UI_ENABLED)) {
         Some(SparkUI.create(Some(this), _statusStore, _conf, _env.securityManager, appName, "",
           startTime))
       } else {
@@ -510,7 +511,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _applicationId = _taskScheduler.applicationId()
     _applicationAttemptId = taskScheduler.applicationAttemptId()
     _conf.set("spark.app.id", _applicationId)
-    if (_conf.getBoolean("spark.ui.reverseProxy", false)) {
+    if (_conf.get(UI_REVERSE_PROXY)) {
       System.setProperty("spark.ui.proxyBase", "/proxy/" + _applicationId)
     }
     _ui.foreach(_.setAppId(_applicationId))
diff --git a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index be293f88a9d4..c1866b4c3606 100644
--- a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -51,8 +51,8 @@ class LocalSparkCluster(
 
     // Disable REST server on Master in this mode unless otherwise specified
     val _conf = conf.clone()
-      .setIfMissing("spark.master.rest.enabled", "false")
-      .set(config.SHUFFLE_SERVICE_ENABLED.key, "false")
+      .setIfMissing(config.MASTER_REST_SERVER_ENABLED, false)
+      .set(config.SHUFFLE_SERVICE_ENABLED, false)
 
     /* Start the Master */
     val (rpcEnv, webUiPort, _) = Master.startRpcEnvAndEndpoint(localHostname, 0, 0, _conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index a4c65aeaae3f..57a8bdf01aa5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -50,6 +50,7 @@ import org.apache.spark.api.r.RUtils
 import org.apache.spark.deploy.rest._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.util._
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 3c5648434fa6..33e89c393680 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -42,10 +42,11 @@ import org.fusesource.leveldbjni.internal.NativeDB
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.{DRIVER_LOG_DFS_DIR, History}
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.internal.config.Tests.IS_TESTING
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.ReplayListenerBus._
@@ -105,12 +106,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   private val logDir = conf.get(History.HISTORY_LOG_DIR)
 
-  private val HISTORY_UI_ACLS_ENABLE = conf.get(History.UI_ACLS_ENABLE)
-  private val HISTORY_UI_ADMIN_ACLS = conf.get(History.UI_ADMIN_ACLS)
-  private val HISTORY_UI_ADMIN_ACLS_GROUPS = conf.get(History.UI_ADMIN_ACLS_GROUPS)
-  logInfo(s"History server ui acls " + (if (HISTORY_UI_ACLS_ENABLE) "enabled" else "disabled") +
-    "; users with admin permissions: " + HISTORY_UI_ADMIN_ACLS.toString +
-    "; groups with admin permissions" + HISTORY_UI_ADMIN_ACLS_GROUPS.toString)
+  private val historyUiAclsEnable = conf.get(History.HISTORY_SERVER_UI_ACLS_ENABLE)
+  private val historyUiAdminAcls = conf.get(History.HISTORY_SERVER_UI_ADMIN_ACLS)
+  private val historyUiAdminAclsGroups = conf.get(History.HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS)
+  logInfo(s"History server ui acls " + (if (historyUiAclsEnable) "enabled" else "disabled") +
+    "; users with admin permissions: " + historyUiAdminAcls.mkString(",") +
+    "; groups with admin permissions" + historyUiAdminAclsGroups.mkString(","))
 
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
   // Visible for testing
@@ -314,6 +315,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   override def getLastUpdatedTime(): Long = lastScanTime.get()
 
+  /**
+   * Split a comma separated String, filter out any empty items, and return a Sequence of strings
+   */
+  private def stringToSeq(list: String): Seq[String] = {
+    list.split(',').map(_.trim).filter(!_.isEmpty)
+  }
+
   override def getAppUI(appId: String, attemptId: Option[String]): Option[LoadedAppUI] = {
     val app = try {
       load(appId)
@@ -330,13 +338,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val conf = this.conf.clone()
     val secManager = new SecurityManager(conf)
 
-    secManager.setAcls(HISTORY_UI_ACLS_ENABLE)
+    secManager.setAcls(historyUiAclsEnable)
     // make sure to set admin acls before view acls so they are properly picked up
-    secManager.setAdminAcls(HISTORY_UI_ADMIN_ACLS + "," + attempt.adminAcls.getOrElse(""))
-    secManager.setViewAcls(attempt.info.sparkUser, attempt.viewAcls.getOrElse(""))
-    secManager.setAdminAclsGroups(HISTORY_UI_ADMIN_ACLS_GROUPS + "," +
-      attempt.adminAclsGroups.getOrElse(""))
-    secManager.setViewAclsGroups(attempt.viewAclsGroups.getOrElse(""))
+    secManager.setAdminAcls(historyUiAdminAcls ++ stringToSeq(attempt.adminAcls.getOrElse("")))
+    secManager.setViewAcls(attempt.info.sparkUser, stringToSeq(attempt.viewAcls.getOrElse("")))
+    secManager.setAdminAclsGroups(historyUiAdminAclsGroups ++
+      stringToSeq(attempt.adminAclsGroups.getOrElse("")))
+    secManager.setViewAclsGroups(stringToSeq(attempt.viewAclsGroups.getOrElse("")))
 
     val kvstore = try {
       diskManager match {
@@ -1187,11 +1195,16 @@ private[history] class AppListingListener(
     // Only parse the first env update, since any future changes don't have any effect on
     // the ACLs set for the UI.
     if (!gotEnvUpdate) {
+      def emptyStringToNone(strOption: Option[String]): Option[String] = strOption match {
+        case Some("") => None
+        case _ => strOption
+      }
+
       val allProperties = event.environmentDetails("Spark Properties").toMap
-      attempt.viewAcls = allProperties.get("spark.ui.view.acls")
-      attempt.adminAcls = allProperties.get("spark.admin.acls")
-      attempt.viewAclsGroups = allProperties.get("spark.ui.view.acls.groups")
-      attempt.adminAclsGroups = allProperties.get("spark.admin.acls.groups")
+      attempt.viewAcls = emptyStringToNone(allProperties.get(UI_VIEW_ACLS.key))
+      attempt.adminAcls = emptyStringToNone(allProperties.get(ADMIN_ACLS.key))
+      attempt.viewAclsGroups = emptyStringToNone(allProperties.get(UI_VIEW_ACLS_GROUPS.key))
+      attempt.adminAclsGroups = emptyStringToNone(allProperties.get(ADMIN_ACLS_GROUPS.key))
 
       gotEnvUpdate = true
       checkProgress()
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index ff2ea3b843ee..7c9ce14c652c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -31,6 +31,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
@@ -302,11 +303,10 @@ object HistoryServer extends Logging {
       config.set(SecurityManager.SPARK_AUTH_CONF, "false")
     }
 
-    if (config.getBoolean("spark.acls.enable", config.getBoolean("spark.ui.acls.enable", false))) {
-      logInfo("Either spark.acls.enable or spark.ui.acls.enable is configured, clearing it and " +
-        "only using spark.history.ui.acl.enable")
-      config.set("spark.acls.enable", "false")
-      config.set("spark.ui.acls.enable", "false")
+    if (config.get(ACLS_ENABLE)) {
+      logInfo(s"${ACLS_ENABLE.key} is configured, " +
+        s"clearing it and only using ${History.HISTORY_SERVER_UI_ACLS_ENABLE.key}")
+      config.set(ACLS_ENABLE, false)
     }
 
     new SecurityManager(config)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index e1184248af46..32f6d1f91a57 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -33,6 +33,8 @@ import org.apache.spark.deploy.master.MasterMessages._
 import org.apache.spark.deploy.master.ui.MasterWebUI
 import org.apache.spark.deploy.rest.StandaloneRestServer
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
 import org.apache.spark.serializer.{JavaSerializer, Serializer}
@@ -115,13 +117,13 @@ private[deploy] class Master(
 
   // Default maxCores for applications that don't specify it (i.e. pass Int.MaxValue)
   private val defaultCores = conf.getInt("spark.deploy.defaultCores", Int.MaxValue)
-  val reverseProxy = conf.getBoolean("spark.ui.reverseProxy", false)
+  val reverseProxy = conf.get(UI_REVERSE_PROXY)
   if (defaultCores < 1) {
     throw new SparkException("spark.deploy.defaultCores must be positive")
   }
 
   // Alternative application submission gateway that is stable across Spark versions
-  private val restServerEnabled = conf.getBoolean("spark.master.rest.enabled", false)
+  private val restServerEnabled = conf.get(MASTER_REST_SERVER_ENABLED)
   private var restServer: Option[StandaloneRestServer] = None
   private var restServerBoundPort: Option[Int] = None
 
@@ -140,7 +142,7 @@ private[deploy] class Master(
     webUi.bind()
     masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort
     if (reverseProxy) {
-      masterWebUiUrl = conf.get("spark.ui.reverseProxyUrl", masterWebUiUrl)
+      masterWebUiUrl = conf.get(UI_REVERSE_PROXY_URL).orElse(Some(masterWebUiUrl)).get
       webUi.addProxy()
       logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " +
        s"Applications UIs are available at $masterWebUiUrl")
@@ -152,7 +154,7 @@ private[deploy] class Master(
     }, 0, WORKER_TIMEOUT_MS, TimeUnit.MILLISECONDS)
 
     if (restServerEnabled) {
-      val port = conf.getInt("spark.master.rest.port", 6066)
+      val port = conf.get(MASTER_REST_SERVER_PORT)
       restServer = Some(new StandaloneRestServer(address.host, port, conf, self, masterUrl))
     }
     restServerBoundPort = restServer.map(_.start())
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index 615d2533cf08..cd31bbdcfab5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -21,6 +21,7 @@ import scala.annotation.tailrec
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.MASTER_UI_PORT
 import org.apache.spark.util.{IntParam, Utils}
 
 /**
@@ -53,8 +54,8 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
   // This mutates the SparkConf, so all accesses to it must be made after this line
   propertiesFile = Utils.loadDefaultSparkProperties(conf, propertiesFile)
 
-  if (conf.contains("spark.master.ui.port")) {
-    webUiPort = conf.get("spark.master.ui.port").toInt
+  if (conf.contains(MASTER_UI_PORT.key)) {
+    webUiPort = conf.get(MASTER_UI_PORT)
   }
 
   @tailrec
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index e87b2240564b..be402ae24751 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy.master.ui
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.Master
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI.UI_KILL_ENABLED
 import org.apache.spark.ui.{SparkUI, WebUI}
 import org.apache.spark.ui.JettyUtils._
 
@@ -34,7 +35,7 @@ class MasterWebUI(
     requestedPort, master.conf, name = "MasterUI") with Logging {
 
   val masterEndpointRef = master.self
-  val killEnabled = master.conf.getBoolean("spark.ui.killEnabled", true)
+  val killEnabled = master.conf.get(UI_KILL_ENABLED)
 
   initialize()
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
index c75e684df226..a70754c6e2c4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
@@ -146,7 +146,7 @@ private[rest] class StandaloneSubmitRequestServlet(
     // the driver.
     val masters = sparkProperties.get("spark.master")
     val (_, masterPort) = Utils.extractHostPortFromSparkUrl(masterUrl)
-    val masterRestPort = this.conf.getInt("spark.master.rest.port", 6066)
+    val masterRestPort = this.conf.get(config.MASTER_REST_SERVER_PORT)
     val updatedMasters = masters.map(
       _.replace(s":$masterRestPort", s":$masterPort")).getOrElse(masterUrl)
     val appArgs = request.appArgs
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index dc6a3076a511..c74a95718d82 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -28,6 +28,7 @@ import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
 import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 import org.apache.spark.util.logging.FileAppender
@@ -160,7 +161,7 @@ private[deploy] class ExecutorRunner(
 
       // Add webUI log urls
       val baseUrl =
-        if (conf.getBoolean("spark.ui.reverseProxy", false)) {
+        if (conf.get(UI_REVERSE_PROXY)) {
           s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
         } else {
           s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 467df26c4735..8c3593cf0a64 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -38,6 +38,7 @@ import org.apache.spark.deploy.master.{DriverState, Master}
 import org.apache.spark.deploy.worker.ui.WorkerWebUI
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.Tests.IS_TESTING
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
 import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
@@ -165,7 +166,7 @@ private[deploy] class Worker(
   private val metricsSystem = MetricsSystem.createMetricsSystem("worker", conf, securityMgr)
   private val workerSource = new WorkerSource(this)
 
-  val reverseProxy = conf.getBoolean("spark.ui.reverseProxy", false)
+  val reverseProxy = conf.get(UI_REVERSE_PROXY)
 
   private var registerMasterFutures: Array[JFuture[_]] = null
   private var registrationRetryTimer: Option[JScheduledFuture[_]] = None
diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
index f984dd385344..e7d25bfe33dc 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/History.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -88,17 +88,19 @@ private[spark] object History {
   val MAX_DRIVER_LOG_AGE_S = ConfigBuilder("spark.history.fs.driverlog.cleaner.maxAge")
     .fallbackConf(MAX_LOG_AGE_S)
 
-  val UI_ACLS_ENABLE = ConfigBuilder("spark.history.ui.acls.enable")
+  val HISTORY_SERVER_UI_ACLS_ENABLE = ConfigBuilder("spark.history.ui.acls.enable")
     .booleanConf
     .createWithDefault(false)
 
-  val UI_ADMIN_ACLS = ConfigBuilder("spark.history.ui.admin.acls")
+  val HISTORY_SERVER_UI_ADMIN_ACLS = ConfigBuilder("spark.history.ui.admin.acls")
     .stringConf
-    .createWithDefault("")
+    .toSequence
+    .createWithDefault(Nil)
 
-  val UI_ADMIN_ACLS_GROUPS = ConfigBuilder("spark.history.ui.admin.acls.groups")
+  val HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS = ConfigBuilder("spark.history.ui.admin.acls.groups")
     .stringConf
-    .createWithDefault("")
+    .toSequence
+    .createWithDefault(Nil)
 
   val NUM_REPLAY_THREADS = ConfigBuilder("spark.history.fs.numReplayThreads")
     .intConf
diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
new file mode 100644
index 000000000000..6c04f0dd2bbb
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.network.util.ByteUnit
+
+private[spark] object UI {
+
+  val UI_SHOW_CONSOLE_PROGRESS = ConfigBuilder("spark.ui.showConsoleProgress")
+    .doc("When true, show the progress bar in the console.")
+    .booleanConf
+    .createWithDefault(false)
+
+  val UI_CONSOLE_PROGRESS_UPDATE_INTERVAL =
+    ConfigBuilder("spark.ui.consoleProgress.update.interval")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefault(200)
+
+  val UI_ENABLED = ConfigBuilder("spark.ui.enabled")
+    .doc("Whether to run the web UI for the Spark application.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val UI_PORT = ConfigBuilder("spark.ui.port")
+    .doc("Port for your application's dashboard, which shows memory and workload data.")
+    .intConf
+    .createWithDefault(4040)
+
+  val UI_FILTERS = ConfigBuilder("spark.ui.filters")
+    .doc("Comma separated list of filter class names to apply to the Spark Web UI.")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  val UI_ALLOW_FRAMING_FROM = ConfigBuilder("spark.ui.allowFramingFrom")
+    .stringConf
+    .createOptional
+
+  val UI_REVERSE_PROXY = ConfigBuilder("spark.ui.reverseProxy")
+    .doc("Enable running Spark Master as reverse proxy for worker and application UIs. " +
+      "In this mode, Spark master will reverse proxy the worker and application UIs to enable " +
+      "access without requiring direct access to their hosts. Use it with caution, as worker " +
+      "and application UI will not be accessible directly, you will only be able to access them" +
+      "through spark master/proxy public URL. This setting affects all the workers and " +
+      "application UIs running in the cluster and must be set on all the workers, drivers " +
+      " and masters.")
+    .booleanConf
+    .createWithDefault(false)
+
+  val UI_REVERSE_PROXY_URL = ConfigBuilder("spark.ui.reverseProxyUrl")
+    .doc("This is the URL where your proxy is running. This URL is for proxy which is running " +
+      "in front of Spark Master. This is useful when running proxy for authentication e.g. " +
+      "OAuth proxy. Make sure this is a complete URL including scheme (http/https) and port to " +
+      "reach your proxy.")
+    .stringConf
+    .createOptional
+
+  val UI_KILL_ENABLED = ConfigBuilder("spark.ui.killEnabled")
+    .doc("Allows jobs and stages to be killed from the web UI.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val UI_THREAD_DUMPS_ENABLED = ConfigBuilder("spark.ui.threadDumpsEnabled")
+    .booleanConf
+    .createWithDefault(true)
+
+  val UI_X_XSS_PROTECTION = ConfigBuilder("spark.ui.xXssProtection")
+    .doc("Value for HTTP X-XSS-Protection response header")
+    .stringConf
+    .createWithDefaultString("1; mode=block")
+
+  val UI_X_CONTENT_TYPE_OPTIONS = ConfigBuilder("spark.ui.xContentTypeOptions.enabled")
+    .doc("Set to 'true' for setting X-Content-Type-Options HTTP response header to 'nosniff'")
+    .booleanConf
+    .createWithDefault(true)
+
+  val UI_STRICT_TRANSPORT_SECURITY = ConfigBuilder("spark.ui.strictTransportSecurity")
+    .doc("Value for HTTP Strict Transport Security Response Header")
+    .stringConf
+    .createOptional
+
+  val UI_REQUEST_HEADER_SIZE = ConfigBuilder("spark.ui.requestHeaderSize")
+    .doc("Value for HTTP request header size in bytes.")
+    .bytesConf(ByteUnit.BYTE)
+    .createWithDefaultString("8k")
+
+  val UI_TIMELINE_TASKS_MAXIMUM = ConfigBuilder("spark.ui.timeline.tasks.maximum")
+    .intConf
+    .createWithDefault(1000)
+
+  val ACLS_ENABLE = ConfigBuilder("spark.acls.enable")
+    .booleanConf
+    .createWithDefault(false)
+
+  val UI_VIEW_ACLS = ConfigBuilder("spark.ui.view.acls")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  val UI_VIEW_ACLS_GROUPS = ConfigBuilder("spark.ui.view.acls.groups")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  val ADMIN_ACLS = ConfigBuilder("spark.admin.acls")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  val ADMIN_ACLS_GROUPS = ConfigBuilder("spark.admin.acls.groups")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  val MODIFY_ACLS = ConfigBuilder("spark.modify.acls")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  val MODIFY_ACLS_GROUPS = ConfigBuilder("spark.modify.acls.groups")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  val USER_GROUPS_MAPPING = ConfigBuilder("spark.user.groups.mapping")
+    .stringConf
+    .createWithDefault("org.apache.spark.security.ShellBasedGroupsMappingProvider")
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 8caaa73b0227..c942c2769a42 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -326,6 +326,10 @@ package object config {
     .stringConf
     .createOptional
 
+  private[spark] val METRICS_CONF = ConfigBuilder("spark.metrics.conf")
+    .stringConf
+    .createOptional
+
   private[spark] val PYSPARK_DRIVER_PYTHON = ConfigBuilder("spark.pyspark.driver.python")
     .stringConf
     .createOptional
@@ -338,11 +342,6 @@ package object config {
   private[spark] val HISTORY_UI_MAX_APPS =
     ConfigBuilder("spark.history.ui.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE)
 
-  private[spark] val UI_SHOW_CONSOLE_PROGRESS = ConfigBuilder("spark.ui.showConsoleProgress")
-    .doc("When true, show the progress bar in the console.")
-    .booleanConf
-    .createWithDefault(false)
-
   private[spark] val IO_ENCRYPTION_ENABLED = ConfigBuilder("spark.io.encryption.enabled")
     .booleanConf
     .createWithDefault(false)
@@ -446,6 +445,11 @@ package object config {
       .regexConf
       .createOptional
 
+  private[spark] val AUTH_SECRET =
+    ConfigBuilder("spark.authenticate.secret")
+      .stringConf
+      .createOptional
+
   private[spark] val AUTH_SECRET_BIT_LENGTH =
     ConfigBuilder("spark.authenticate.secretBitLength")
       .intConf
@@ -625,30 +629,6 @@ package object config {
       .toSequence
       .createWithDefault(Nil)
 
-  private[spark] val UI_X_XSS_PROTECTION =
-    ConfigBuilder("spark.ui.xXssProtection")
-      .doc("Value for HTTP X-XSS-Protection response header")
-      .stringConf
-      .createWithDefaultString("1; mode=block")
-
-  private[spark] val UI_X_CONTENT_TYPE_OPTIONS =
-    ConfigBuilder("spark.ui.xContentTypeOptions.enabled")
-      .doc("Set to 'true' for setting X-Content-Type-Options HTTP response header to 'nosniff'")
-      .booleanConf
-      .createWithDefault(true)
-
-  private[spark] val UI_STRICT_TRANSPORT_SECURITY =
-    ConfigBuilder("spark.ui.strictTransportSecurity")
-      .doc("Value for HTTP Strict Transport Security Response Header")
-      .stringConf
-      .createOptional
-
-  private[spark] val UI_REQUEST_HEADER_SIZE =
-    ConfigBuilder("spark.ui.requestHeaderSize")
-      .doc("Value for HTTP request header size in bytes.")
-      .bytesConf(ByteUnit.BYTE)
-      .createWithDefaultString("8k")
-
   private[spark] val EXTRA_LISTENERS = ConfigBuilder("spark.extraListeners")
     .doc("Class names of listeners to add to SparkContext during initialization.")
     .stringConf
@@ -780,4 +760,16 @@ package object config {
     ConfigBuilder("spark.executor.logs.rolling.enableCompression")
       .booleanConf
       .createWithDefault(false)
+
+  private[spark] val MASTER_REST_SERVER_ENABLED = ConfigBuilder("spark.master.rest.enabled")
+    .booleanConf
+    .createWithDefault(false)
+
+  private[spark] val MASTER_REST_SERVER_PORT = ConfigBuilder("spark.master.rest.port")
+    .intConf
+    .createWithDefault(6066)
+
+  private[spark] val MASTER_UI_PORT = ConfigBuilder("spark.master.ui.port")
+    .intConf
+    .createWithDefault(8080)
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
index a4056508c181..b6be8aaefd35 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -26,6 +26,7 @@ import scala.util.matching.Regex
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.METRICS_CONF
 import org.apache.spark.util.Utils
 
 private[spark] class MetricsConfig(conf: SparkConf) extends Logging {
@@ -52,7 +53,7 @@ private[spark] class MetricsConfig(conf: SparkConf) extends Logging {
     // Add default properties in case there's no properties file
     setDefaultProperties(properties)
 
-    loadPropertiesFromFile(conf.getOption("spark.metrics.conf"))
+    loadPropertiesFromFile(conf.get(METRICS_CONF))
 
     // Also look for the properties in provided Spark configuration
     val prefix = "spark.metrics.conf."
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
index 3c4ee4eb6bbb..f0ae26e7a88e 100644
--- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -21,6 +21,7 @@ import java.util.{Timer, TimerTask}
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.status.api.v1.StageData
 
 /**
@@ -33,8 +34,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
   // Carriage return
   private val CR = '\r'
   // Update period of progress bar, in milliseconds
-  private val updatePeriodMSec =
-    sc.getConf.getTimeAsMs("spark.ui.consoleProgress.update.interval", "200")
+  private val updatePeriodMSec = sc.getConf.get(UI_CONSOLE_PROGRESS_UPDATE_INTERVAL)
   // Delay to show up a progress bar, in milliseconds
   private val firstDelayMSec = 500L
 
diff --git a/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala b/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
index da84fdf8fe14..fc9b50f14a08 100644
--- a/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
+++ b/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
@@ -26,7 +26,7 @@ import scala.collection.JavaConverters._
 import org.apache.commons.lang3.StringEscapeUtils
 
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 
 /**
  * A servlet filter that implements HTTP security features. The following actions are taken
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 08f5fb937da7..e04eb8419b7d 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -40,7 +40,7 @@ import org.json4s.jackson.JsonMethods.{pretty, render}
 
 import org.apache.spark.{SecurityManager, SparkConf, SSLOptions}
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.Utils
 
 /**
@@ -500,7 +500,7 @@ private[spark] case class ServerInfo(
    * of the chain to perform security-related functions.
    */
   private def addFilters(handler: ServletContextHandler, securityMgr: SecurityManager): Unit = {
-    conf.getOption("spark.ui.filters").toSeq.flatMap(Utils.stringToSeq).foreach { filter =>
+    conf.get(UI_FILTERS).foreach { filter =>
       logInfo(s"Adding filter to ${handler.getContextPath()}: $filter")
       val oldParams = conf.getOption(s"spark.$filter.params").toSeq
         .flatMap(Utils.stringToSeq)
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index d315ef66e0dc..7378801f7b4e 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.{JobExecutionStatus, SecurityManager, SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.scheduler._
 import org.apache.spark.status.AppStatusStore
 import org.apache.spark.status.api.v1._
@@ -50,7 +51,7 @@ private[spark] class SparkUI private (
   with Logging
   with UIRoot {
 
-  val killEnabled = sc.map(_.conf.getBoolean("spark.ui.killEnabled", true)).getOrElse(false)
+  val killEnabled = sc.map(_.conf.get(UI_KILL_ENABLED)).getOrElse(false)
 
   var appId: String = _
 
@@ -151,12 +152,11 @@ private[spark] abstract class SparkUITab(parent: SparkUI, prefix: String)
 }
 
 private[spark] object SparkUI {
-  val DEFAULT_PORT = 4040
   val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
   val DEFAULT_POOL_NAME = "default"
 
   def getUIPort(conf: SparkConf): Int = {
-    conf.getInt("spark.ui.port", SparkUI.DEFAULT_PORT)
+    conf.get(UI_PORT)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
index d5a60f52cbb0..2ddd78accde8 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
@@ -21,6 +21,7 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.ui.{SparkUI, SparkUITab, UIUtils, WebUIPage}
 
 private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") {
@@ -29,7 +30,7 @@ private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "exec
 
   private def init(): Unit = {
     val threadDumpEnabled =
-      parent.sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true)
+      parent.sc.isDefined && parent.conf.get(UI_THREAD_DUMPS_ENABLED)
 
     attachPage(new ExecutorsPage(this, threadDumpEnabled))
     if (threadDumpEnabled) {
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 8ec625da042f..2c94853f312e 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -27,6 +27,7 @@ import scala.xml.{Node, Unparsed}
 
 import org.apache.commons.lang3.StringEscapeUtils
 
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.scheduler.TaskLocality
 import org.apache.spark.status._
 import org.apache.spark.status.api.v1._
@@ -63,7 +64,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
 
   // TODO: We should consider increasing the number of this parameter over time
   // if we find that it's okay.
-  private val MAX_TIMELINE_TASKS = parent.conf.getInt("spark.ui.timeline.tasks.maximum", 1000)
+  private val MAX_TIMELINE_TASKS = parent.conf.get(UI_TIMELINE_TASKS_MAXIMUM)
 
   private def getLocalitySummaryString(localitySummary: Map[String, Long]): String = {
     val names = Map(
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 16ef38142ad9..83d1b2b42f19 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -61,6 +61,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests.IS_TESTING
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
@@ -2387,8 +2388,7 @@ private[spark] object Utils extends Logging {
 
   // Returns the groups to which the current user belongs.
   def getCurrentUserGroups(sparkConf: SparkConf, username: String): Set[String] = {
-    val groupProviderClassName = sparkConf.get("spark.user.groups.mapping",
-      "org.apache.spark.security.ShellBasedGroupsMappingProvider")
+    val groupProviderClassName = sparkConf.get(USER_GROUPS_MAPPING)
     if (groupProviderClassName != "") {
       try {
         val groupMappingServiceProvider = classForName(groupProviderClassName).
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index 6d9e47cfd00f..0e019e473a88 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -24,6 +24,7 @@ import scala.reflect.ClassTag
 import com.google.common.io.ByteStreams
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rdd._
 import org.apache.spark.storage.{BlockId, StorageLevel, TestBlockId}
@@ -589,7 +590,7 @@ class CheckpointCompressionSuite extends SparkFunSuite with LocalSparkContext {
     withTempDir { checkpointDir =>
       val conf = new SparkConf()
         .set("spark.checkpoint.compress", "true")
-        .set("spark.ui.enabled", "false")
+        .set(UI_ENABLED.key, "false")
       sc = new SparkContext("local", "test", conf)
       sc.setCheckpointDir(checkpointDir.toString)
       val rdd = sc.makeRDD(1 to 20, numSlices = 1)
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index e9061f4e7beb..9f0d2ac82ead 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -26,6 +26,7 @@ import java.util.Base64
 import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.security.GroupMappingServiceProvider
 import org.apache.spark.util.{ResetSystemProperties, SparkConfWithEnv, Utils}
@@ -43,11 +44,11 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("set security with conf") {
     val conf = new SparkConf
-    conf.set("spark.authenticate", "true")
-    conf.set("spark.authenticate.secret", "good")
-    conf.set("spark.ui.acls.enable", "true")
-    conf.set("spark.ui.view.acls", "user1,user2")
-    val securityManager = new SecurityManager(conf);
+    conf.set(NETWORK_AUTH_ENABLED, true)
+    conf.set(AUTH_SECRET, "good")
+    conf.set(ACLS_ENABLE, true)
+    conf.set(UI_VIEW_ACLS, Seq("user1", "user2"))
+    val securityManager = new SecurityManager(conf)
     assert(securityManager.isAuthenticationEnabled() === true)
     assert(securityManager.aclsEnabled() === true)
     assert(securityManager.checkUIViewPermissions("user1") === true)
@@ -57,10 +58,10 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("set security with conf for groups") {
     val conf = new SparkConf
-    conf.set("spark.authenticate", "true")
-    conf.set("spark.authenticate.secret", "good")
-    conf.set("spark.ui.acls.enable", "true")
-    conf.set("spark.ui.view.acls.groups", "group1,group2")
+    conf.set(NETWORK_AUTH_ENABLED, true)
+    conf.set(AUTH_SECRET, "good")
+    conf.set(ACLS_ENABLE, true)
+    conf.set(UI_VIEW_ACLS_GROUPS, Seq("group1", "group2"))
     // default ShellBasedGroupsMappingProvider is used to resolve user groups
     val securityManager = new SecurityManager(conf);
     // assuming executing user does not belong to group1,group2
@@ -68,27 +69,27 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(securityManager.checkUIViewPermissions("user2") === false)
 
     val conf2 = new SparkConf
-    conf2.set("spark.authenticate", "true")
-    conf2.set("spark.authenticate.secret", "good")
-    conf2.set("spark.ui.acls.enable", "true")
-    conf2.set("spark.ui.view.acls.groups", "group1,group2")
+    conf2.set(NETWORK_AUTH_ENABLED, true)
+    conf2.set(AUTH_SECRET, "good")
+    conf2.set(ACLS_ENABLE, true)
+    conf2.set(UI_VIEW_ACLS_GROUPS, Seq("group1", "group2"))
     // explicitly specify a custom GroupsMappingServiceProvider
-    conf2.set("spark.user.groups.mapping", "org.apache.spark.DummyGroupMappingServiceProvider")
+    conf2.set(USER_GROUPS_MAPPING, "org.apache.spark.DummyGroupMappingServiceProvider")
 
-    val securityManager2 = new SecurityManager(conf2);
+    val securityManager2 = new SecurityManager(conf2)
     // group4,group5 do not match
     assert(securityManager2.checkUIViewPermissions("user1") === true)
     assert(securityManager2.checkUIViewPermissions("user2") === true)
 
     val conf3 = new SparkConf
-    conf3.set("spark.authenticate", "true")
-    conf3.set("spark.authenticate.secret", "good")
-    conf3.set("spark.ui.acls.enable", "true")
-    conf3.set("spark.ui.view.acls.groups", "group4,group5")
+    conf3.set(NETWORK_AUTH_ENABLED, true)
+    conf3.set(AUTH_SECRET, "good")
+    conf3.set(ACLS_ENABLE, true)
+    conf3.set(UI_VIEW_ACLS_GROUPS, Seq("group4", "group5"))
     // explicitly specify a bogus GroupsMappingServiceProvider
-    conf3.set("spark.user.groups.mapping", "BogusServiceProvider")
+    conf3.set(USER_GROUPS_MAPPING, "BogusServiceProvider")
 
-    val securityManager3 = new SecurityManager(conf3);
+    val securityManager3 = new SecurityManager(conf3)
     // BogusServiceProvider cannot be loaded and an error is logged returning an empty group set
     assert(securityManager3.checkUIViewPermissions("user1") === false)
     assert(securityManager3.checkUIViewPermissions("user2") === false)
@@ -96,7 +97,7 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("set security with api") {
     val conf = new SparkConf
-    conf.set("spark.ui.view.acls", "user1,user2")
+    conf.set(UI_VIEW_ACLS, Seq("user1", "user2"))
     val securityManager = new SecurityManager(conf);
     securityManager.setAcls(true)
     assert(securityManager.aclsEnabled() === true)
@@ -108,7 +109,7 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
     securityManager.setAcls(true)
     assert(securityManager.aclsEnabled() === true)
-    securityManager.setViewAcls(Set[String]("user5"), "user6,user7")
+    securityManager.setViewAcls(Set[String]("user5"), Seq("user6", "user7"))
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user5") === true)
     assert(securityManager.checkUIViewPermissions("user6") === true)
@@ -119,41 +120,41 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("set security with api for groups") {
     val conf = new SparkConf
-    conf.set("spark.user.groups.mapping", "org.apache.spark.DummyGroupMappingServiceProvider")
+    conf.set(USER_GROUPS_MAPPING, "org.apache.spark.DummyGroupMappingServiceProvider")
 
-    val securityManager = new SecurityManager(conf);
+    val securityManager = new SecurityManager(conf)
     securityManager.setAcls(true)
-    securityManager.setViewAclsGroups("group1,group2")
+    securityManager.setViewAclsGroups(Seq("group1", "group2"))
 
     // group1,group2 match
     assert(securityManager.checkUIViewPermissions("user1") === true)
     assert(securityManager.checkUIViewPermissions("user2") === true)
 
     // change groups so they do not match
-    securityManager.setViewAclsGroups("group4,group5")
+    securityManager.setViewAclsGroups(Seq("group4", "group5"))
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user2") === false)
 
     val conf2 = new SparkConf
-    conf.set("spark.user.groups.mapping", "BogusServiceProvider")
+    conf.set(USER_GROUPS_MAPPING, "BogusServiceProvider")
 
     val securityManager2 = new SecurityManager(conf2)
     securityManager2.setAcls(true)
-    securityManager2.setViewAclsGroups("group1,group2")
+    securityManager2.setViewAclsGroups(Seq("group1", "group2"))
 
     // group1,group2 do not match because of BogusServiceProvider
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user2") === false)
 
     // setting viewAclsGroups to empty should still not match because of BogusServiceProvider
-    securityManager2.setViewAclsGroups("")
+    securityManager2.setViewAclsGroups(Nil)
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user2") === false)
   }
 
   test("set security modify acls") {
     val conf = new SparkConf
-    conf.set("spark.modify.acls", "user1,user2")
+    conf.set(MODIFY_ACLS, Seq("user1", "user2"))
 
     val securityManager = new SecurityManager(conf);
     securityManager.setAcls(true)
@@ -166,7 +167,7 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
     securityManager.setAcls(true)
     assert(securityManager.aclsEnabled() === true)
-    securityManager.setModifyAcls(Set("user5"), "user6,user7")
+    securityManager.setModifyAcls(Set("user5"), Seq("user6", "user7"))
     assert(securityManager.checkModifyPermissions("user1") === false)
     assert(securityManager.checkModifyPermissions("user5") === true)
     assert(securityManager.checkModifyPermissions("user6") === true)
@@ -177,34 +178,35 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("set security modify acls for groups") {
     val conf = new SparkConf
-    conf.set("spark.user.groups.mapping", "org.apache.spark.DummyGroupMappingServiceProvider")
+    conf.set(USER_GROUPS_MAPPING, "org.apache.spark.DummyGroupMappingServiceProvider")
 
-    val securityManager = new SecurityManager(conf);
+    val securityManager = new SecurityManager(conf)
     securityManager.setAcls(true)
-    securityManager.setModifyAclsGroups("group1,group2")
+    securityManager.setModifyAclsGroups(Seq("group1", "group2"))
 
     // group1,group2 match
     assert(securityManager.checkModifyPermissions("user1") === true)
     assert(securityManager.checkModifyPermissions("user2") === true)
 
     // change groups so they do not match
-    securityManager.setModifyAclsGroups("group4,group5")
+    securityManager.setModifyAclsGroups(Seq("group4", "group5"))
     assert(securityManager.checkModifyPermissions("user1") === false)
     assert(securityManager.checkModifyPermissions("user2") === false)
 
     // change so they match again
-    securityManager.setModifyAclsGroups("group2,group3")
+    securityManager.setModifyAclsGroups(Seq("group2", "group3"))
+
     assert(securityManager.checkModifyPermissions("user1") === true)
     assert(securityManager.checkModifyPermissions("user2") === true)
   }
 
   test("set security admin acls") {
     val conf = new SparkConf
-    conf.set("spark.admin.acls", "user1,user2")
-    conf.set("spark.ui.view.acls", "user3")
-    conf.set("spark.modify.acls", "user4")
+    conf.set(ADMIN_ACLS, Seq("user1", "user2"))
+    conf.set(UI_VIEW_ACLS, Seq("user3"))
+    conf.set(MODIFY_ACLS, Seq("user4"))
 
-    val securityManager = new SecurityManager(conf);
+    val securityManager = new SecurityManager(conf)
     securityManager.setAcls(true)
     assert(securityManager.aclsEnabled() === true)
 
@@ -221,9 +223,9 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(securityManager.checkUIViewPermissions("user5") === false)
     assert(securityManager.checkUIViewPermissions(null) === true)
 
-    securityManager.setAdminAcls("user6")
-    securityManager.setViewAcls(Set[String]("user8"), "user9")
-    securityManager.setModifyAcls(Set("user11"), "user9")
+    securityManager.setAdminAcls(Seq("user6"))
+    securityManager.setViewAcls(Set[String]("user8"), Seq("user9"))
+    securityManager.setModifyAcls(Set("user11"), Seq("user9"))
     assert(securityManager.checkModifyPermissions("user6") === true)
     assert(securityManager.checkModifyPermissions("user11") === true)
     assert(securityManager.checkModifyPermissions("user9") === true)
@@ -240,12 +242,12 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("set security admin acls for groups") {
     val conf = new SparkConf
-    conf.set("spark.admin.acls.groups", "group1")
-    conf.set("spark.ui.view.acls.groups", "group2")
-    conf.set("spark.modify.acls.groups", "group3")
-    conf.set("spark.user.groups.mapping", "org.apache.spark.DummyGroupMappingServiceProvider")
+    conf.set(ADMIN_ACLS_GROUPS, Seq("group1"))
+    conf.set(UI_VIEW_ACLS_GROUPS, Seq("group2"))
+    conf.set(MODIFY_ACLS_GROUPS, Seq("group3"))
+    conf.set(USER_GROUPS_MAPPING, "org.apache.spark.DummyGroupMappingServiceProvider")
 
-    val securityManager = new SecurityManager(conf);
+    val securityManager = new SecurityManager(conf)
     securityManager.setAcls(true)
     assert(securityManager.aclsEnabled() === true)
 
@@ -254,38 +256,38 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(securityManager.checkUIViewPermissions("user1") === true)
 
     // change admin groups so they do not match. view and modify groups are set to admin groups
-    securityManager.setAdminAclsGroups("group4,group5")
+    securityManager.setAdminAclsGroups(Seq("group4", "group5"))
     // invoke the set ui and modify to propagate the changes
-    securityManager.setViewAclsGroups("")
-    securityManager.setModifyAclsGroups("")
+    securityManager.setViewAclsGroups(Nil)
+    securityManager.setModifyAclsGroups(Nil)
 
     assert(securityManager.checkModifyPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user1") === false)
 
     // change modify groups so they match
-    securityManager.setModifyAclsGroups("group3")
+    securityManager.setModifyAclsGroups(Seq("group3"))
     assert(securityManager.checkModifyPermissions("user1") === true)
     assert(securityManager.checkUIViewPermissions("user1") === false)
 
     // change view groups so they match
-    securityManager.setViewAclsGroups("group2")
-    securityManager.setModifyAclsGroups("group4")
+    securityManager.setViewAclsGroups(Seq("group2"))
+    securityManager.setModifyAclsGroups(Seq("group4"))
     assert(securityManager.checkModifyPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user1") === true)
 
     // change modify and view groups so they do not match
-    securityManager.setViewAclsGroups("group7")
-    securityManager.setModifyAclsGroups("group8")
+    securityManager.setViewAclsGroups(Seq("group7"))
+    securityManager.setModifyAclsGroups(Seq("group8"))
     assert(securityManager.checkModifyPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user1") === false)
   }
 
   test("set security with * in acls") {
     val conf = new SparkConf
-    conf.set("spark.ui.acls.enable", "true")
-    conf.set("spark.admin.acls", "user1,user2")
-    conf.set("spark.ui.view.acls", "*")
-    conf.set("spark.modify.acls", "user4")
+    conf.set(ACLS_ENABLE.key, "true")
+    conf.set(ADMIN_ACLS, Seq("user1", "user2"))
+    conf.set(UI_VIEW_ACLS, Seq("*"))
+    conf.set(MODIFY_ACLS, Seq("user4"))
 
     val securityManager = new SecurityManager(conf)
     assert(securityManager.aclsEnabled() === true)
@@ -299,22 +301,22 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(securityManager.checkModifyPermissions("user8") === false)
 
     // check for modifyAcls with *
-    securityManager.setModifyAcls(Set("user4"), "*")
+    securityManager.setModifyAcls(Set("user4"), Seq("*"))
     assert(securityManager.checkModifyPermissions("user7") === true)
     assert(securityManager.checkModifyPermissions("user8") === true)
 
-    securityManager.setAdminAcls("user1,user2")
-    securityManager.setModifyAcls(Set("user1"), "user2")
-    securityManager.setViewAcls(Set("user1"), "user2")
+    securityManager.setAdminAcls(Seq("user1", "user2"))
+    securityManager.setModifyAcls(Set("user1"), Seq("user2"))
+    securityManager.setViewAcls(Set("user1"), Seq("user2"))
     assert(securityManager.checkUIViewPermissions("user5") === false)
     assert(securityManager.checkUIViewPermissions("user6") === false)
     assert(securityManager.checkModifyPermissions("user7") === false)
     assert(securityManager.checkModifyPermissions("user8") === false)
 
     // check for adminAcls with *
-    securityManager.setAdminAcls("user1,*")
-    securityManager.setModifyAcls(Set("user1"), "user2")
-    securityManager.setViewAcls(Set("user1"), "user2")
+    securityManager.setAdminAcls(Seq("user1", "*"))
+    securityManager.setModifyAcls(Set("user1"), Seq("user2"))
+    securityManager.setViewAcls(Set("user1"), Seq("user2"))
     assert(securityManager.checkUIViewPermissions("user5") === true)
     assert(securityManager.checkUIViewPermissions("user6") === true)
     assert(securityManager.checkModifyPermissions("user7") === true)
@@ -323,10 +325,10 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("set security with * in acls for groups") {
     val conf = new SparkConf
-    conf.set("spark.ui.acls.enable", "true")
-    conf.set("spark.admin.acls.groups", "group4,group5")
-    conf.set("spark.ui.view.acls.groups", "*")
-    conf.set("spark.modify.acls.groups", "group6")
+    conf.set(ACLS_ENABLE, true)
+    conf.set(ADMIN_ACLS_GROUPS, Seq("group4", "group5"))
+    conf.set(UI_VIEW_ACLS_GROUPS, Seq("*"))
+    conf.set(MODIFY_ACLS_GROUPS, Seq("group6"))
 
     val securityManager = new SecurityManager(conf)
     assert(securityManager.aclsEnabled() === true)
@@ -338,17 +340,17 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(securityManager.checkModifyPermissions("user2") === false)
 
     // check for modifyAcls with *
-    securityManager.setModifyAclsGroups("*")
-    securityManager.setViewAclsGroups("group6")
+    securityManager.setModifyAclsGroups(Seq("*"))
+    securityManager.setViewAclsGroups(Seq("group6"))
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user2") === false)
     assert(securityManager.checkModifyPermissions("user1") === true)
     assert(securityManager.checkModifyPermissions("user2") === true)
 
     // check for adminAcls with *
-    securityManager.setAdminAclsGroups("group9,*")
-    securityManager.setModifyAclsGroups("group4,group5")
-    securityManager.setViewAclsGroups("group6,group7")
+    securityManager.setAdminAclsGroups(Seq("group9", "*"))
+    securityManager.setModifyAclsGroups(Seq("group4", "group5"))
+    securityManager.setViewAclsGroups(Seq("group6", "group7"))
     assert(securityManager.checkUIViewPermissions("user5") === true)
     assert(securityManager.checkUIViewPermissions("user6") === true)
     assert(securityManager.checkModifyPermissions("user7") === true)
@@ -367,13 +369,13 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(securityManager.checkModifyPermissions("user1") === false)
 
     // set groups only
-    securityManager.setAdminAclsGroups("group1,group2")
+    securityManager.setAdminAclsGroups(Seq("group1", "group2"))
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkModifyPermissions("user1") === false)
   }
 
   test("missing secret authentication key") {
-    val conf = new SparkConf().set("spark.authenticate", "true")
+    val conf = new SparkConf().set(NETWORK_AUTH_ENABLED, true)
     val mgr = new SecurityManager(conf)
     intercept[IllegalArgumentException] {
       mgr.getSecretKey()
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 66de2f2ac86a..41d5dee4189f 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -33,7 +33,8 @@ import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFor
 import org.scalatest.Matchers._
 import org.scalatest.concurrent.Eventually
 
-import org.apache.spark.internal.config.EXECUTOR_HEARTBEAT_DROP_ZERO_ACCUMULATOR_UPDATES
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorMetricsUpdate, SparkListenerJobStart, SparkListenerTaskEnd, SparkListenerTaskStart}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -665,7 +666,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     val conf = new SparkConf()
       .setMaster("local-cluster[1,2,1024]")
       .setAppName("test-cluster")
-      .set("spark.ui.enabled", "false")
+      .set(UI_ENABLED.key, "false")
       // Disable this so that if a task is running, we can make sure the executor will always send
       // task metrics via heartbeat to driver.
       .set(EXECUTOR_HEARTBEAT_DROP_ZERO_ACCUMULATOR_UPDATES.key, "false")
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 2a7a55cbb903..c6e961e564cf 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -42,6 +42,7 @@ import org.apache.spark.deploy.SparkSubmit._
 import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.util.{CommandLineUtils, ResetSystemProperties, Utils}
@@ -289,7 +290,7 @@ class SparkSubmitSuite
     conf.get("spark.yarn.dist.files") should include regex (".*file1.txt,.*file2.txt")
     conf.get("spark.yarn.dist.archives") should include regex (".*archive1.txt,.*archive2.txt")
     conf.get("spark.app.name") should be ("beauty")
-    conf.get("spark.ui.enabled") should be ("false")
+    conf.get(UI_ENABLED) should be (false)
     sys.props("SPARK_SUBMIT") should be ("true")
   }
 
@@ -328,7 +329,7 @@ class SparkSubmitSuite
     conf.get("spark.yarn.dist.archives") should include regex (".*archive1.txt,.*archive2.txt")
     conf.get("spark.yarn.dist.jars") should include
       regex (".*one.jar,.*two.jar,.*three.jar,.*thejar.jar")
-    conf.get("spark.ui.enabled") should be ("false")
+    conf.get(UI_ENABLED) should be (false)
     sys.props("SPARK_SUBMIT") should be ("true")
   }
 
@@ -377,9 +378,9 @@ class SparkSubmitSuite
     confMap.keys should contain ("spark.driver.memory")
     confMap.keys should contain ("spark.driver.cores")
     confMap.keys should contain ("spark.driver.supervise")
-    confMap.keys should contain ("spark.ui.enabled")
+    confMap.keys should contain (UI_ENABLED.key)
     confMap.keys should contain ("spark.submit.deployMode")
-    conf.get("spark.ui.enabled") should be ("false")
+    conf.get(UI_ENABLED) should be (false)
   }
 
   test("handles standalone client mode") {
@@ -401,7 +402,7 @@ class SparkSubmitSuite
     classpath(0) should endWith ("thejar.jar")
     conf.get("spark.executor.memory") should be ("5g")
     conf.get("spark.cores.max") should be ("5")
-    conf.get("spark.ui.enabled") should be ("false")
+    conf.get(UI_ENABLED) should be (false)
   }
 
   test("handles mesos client mode") {
@@ -423,7 +424,7 @@ class SparkSubmitSuite
     classpath(0) should endWith ("thejar.jar")
     conf.get("spark.executor.memory") should be ("5g")
     conf.get("spark.cores.max") should be ("5")
-    conf.get("spark.ui.enabled") should be ("false")
+    conf.get(UI_ENABLED) should be (false)
   }
 
   test("handles k8s cluster mode") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 7d6efd95fbab..cc32a0a7d1c1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -44,6 +44,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.DRIVER_LOG_DFS_DIR
 import org.apache.spark.internal.config.History._
+import org.apache.spark.internal.config.UI.{ADMIN_ACLS, ADMIN_ACLS_GROUPS, USER_GROUPS_MAPPING}
 import org.apache.spark.io._
 import org.apache.spark.scheduler._
 import org.apache.spark.security.GroupMappingServiceProvider
@@ -644,12 +645,12 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
     // Test both history ui admin acls and application acls are configured.
     val conf1 = createTestConf()
-      .set(UI_ACLS_ENABLE, true)
-      .set(UI_ADMIN_ACLS, "user1,user2")
-      .set(UI_ADMIN_ACLS_GROUPS, "group1")
-      .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
+      .set(HISTORY_SERVER_UI_ACLS_ENABLE, true)
+      .set(HISTORY_SERVER_UI_ADMIN_ACLS, Seq("user1", "user2"))
+      .set(HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS, Seq("group1"))
+      .set(USER_GROUPS_MAPPING, classOf[TestGroupsMappingProvider].getName)
 
-    createAndCheck(conf1, ("spark.admin.acls", "user"), ("spark.admin.acls.groups", "group")) {
+    createAndCheck(conf1, (ADMIN_ACLS.key, "user"), (ADMIN_ACLS_GROUPS.key, "group")) {
       securityManager =>
         // Test whether user has permission to access UI.
         securityManager.checkUIViewPermissions("user1") should be (true)
@@ -666,10 +667,10 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
     // Test only history ui admin acls are configured.
     val conf2 = createTestConf()
-      .set(UI_ACLS_ENABLE, true)
-      .set(UI_ADMIN_ACLS, "user1,user2")
-      .set(UI_ADMIN_ACLS_GROUPS, "group1")
-      .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
+      .set(HISTORY_SERVER_UI_ACLS_ENABLE, true)
+      .set(HISTORY_SERVER_UI_ADMIN_ACLS, Seq("user1", "user2"))
+      .set(HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS, Seq("group1"))
+      .set(USER_GROUPS_MAPPING, classOf[TestGroupsMappingProvider].getName)
     createAndCheck(conf2) { securityManager =>
       // Test whether user has permission to access UI.
       securityManager.checkUIViewPermissions("user1") should be (true)
@@ -686,8 +687,8 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
 
     // Test neither history ui admin acls nor application acls are configured.
      val conf3 = createTestConf()
-      .set(UI_ACLS_ENABLE, true)
-      .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
+      .set(HISTORY_SERVER_UI_ACLS_ENABLE, true)
+      .set(USER_GROUPS_MAPPING, classOf[TestGroupsMappingProvider].getName)
     createAndCheck(conf3) { securityManager =>
       // Test whether user has permission to access UI.
       securityManager.checkUIViewPermissions("user1") should be (false)
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index bb7d3c52bc9c..1a071fa77133 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -48,6 +48,7 @@ import org.apache.spark._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Tests.IS_TESTING
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.status.api.v1.ApplicationInfo
 import org.apache.spark.status.api.v1.JobData
 import org.apache.spark.ui.SparkUI
@@ -613,9 +614,9 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
     stop()
     init(
-      "spark.ui.filters" -> classOf[FakeAuthFilter].getName(),
-      UI_ACLS_ENABLE.key -> "true",
-      UI_ADMIN_ACLS.key -> admin)
+      UI_FILTERS.key -> classOf[FakeAuthFilter].getName(),
+      HISTORY_SERVER_UI_ACLS_ENABLE.key -> "true",
+      HISTORY_SERVER_UI_ADMIN_ACLS.key -> admin)
 
     val tests = Seq(
       (owner, HttpServletResponse.SC_OK),
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 84b3a29b58bf..f788db78f913 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -39,6 +39,8 @@ import other.supplier.{CustomPersistenceEngine, CustomRecoveryModeFactory}
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy._
 import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.rpc.{RpcAddress, RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.serializer
 
@@ -104,7 +106,7 @@ class MasterSuite extends SparkFunSuite
     conf.set("spark.deploy.recoveryMode", "CUSTOM")
     conf.set("spark.deploy.recoveryMode.factory",
       classOf[CustomRecoveryModeFactory].getCanonicalName)
-    conf.set("spark.master.rest.enabled", "false")
+    conf.set(MASTER_REST_SERVER_ENABLED, false)
 
     val instantiationAttempts = CustomRecoveryModeFactory.instantiationAttempts
 
@@ -189,7 +191,7 @@ class MasterSuite extends SparkFunSuite
     conf.set("spark.deploy.recoveryMode", "CUSTOM")
     conf.set("spark.deploy.recoveryMode.factory",
       classOf[FakeRecoveryModeFactory].getCanonicalName)
-    conf.set("spark.master.rest.enabled", "false")
+    conf.set(MASTER_REST_SERVER_ENABLED, false)
 
     val fakeAppInfo = makeAppInfo(1024)
     val fakeWorkerInfo = makeWorkerInfo(8192, 16)
@@ -286,8 +288,8 @@ class MasterSuite extends SparkFunSuite
     implicit val formats = org.json4s.DefaultFormats
     val reverseProxyUrl = "http://localhost:8080"
     val conf = new SparkConf()
-    conf.set("spark.ui.reverseProxy", "true")
-    conf.set("spark.ui.reverseProxyUrl", reverseProxyUrl)
+    conf.set(UI_REVERSE_PROXY, true)
+    conf.set(UI_REVERSE_PROXY_URL, reverseProxyUrl)
     val localCluster = new LocalSparkCluster(2, 2, 512, conf)
     localCluster.start()
     try {
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index a5fe2026c0f7..558cd3626ab9 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -41,6 +41,7 @@ import org.scalatest.mockito.MockitoSugar
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.memory.TestMemoryManager
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rdd.RDD
@@ -169,7 +170,7 @@ class ExecutorSuite extends SparkFunSuite
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("executor thread test")
-      .set("spark.ui.enabled", "false")
+      .set(UI_ENABLED.key, "false")
     sc = new SparkContext(conf)
     val executorThread = sc.parallelize(Seq(1), 1).map { _ =>
       Thread.currentThread.getClass.getName
diff --git a/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala b/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
index c88cc13654ce..548949edf47b 100644
--- a/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
@@ -26,6 +26,7 @@ import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.util.Utils
 
 class LauncherBackendSuite extends SparkFunSuite with Matchers {
@@ -48,7 +49,7 @@ class LauncherBackendSuite extends SparkFunSuite with Matchers {
     val handle = new SparkLauncher(env)
       .setSparkHome(sys.props("spark.test.home"))
       .setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, System.getProperty("java.class.path"))
-      .setConf("spark.ui.enabled", "false")
+      .setConf(UI_ENABLED.key, "false")
       .setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, s"-Dtest.appender=console")
       .setMaster(master)
       .setAppResource(SparkLauncher.NO_RESOURCE)
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
index a85011b42bbc..800fc1e4a3f1 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsConfigSuite.scala
@@ -21,6 +21,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.SparkConf
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.config.METRICS_CONF
 
 class MetricsConfigSuite extends SparkFunSuite with BeforeAndAfter {
   var filePath: String = _
@@ -31,7 +32,7 @@ class MetricsConfigSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("MetricsConfig with default properties") {
     val sparkConf = new SparkConf(loadDefaults = false)
-    sparkConf.set("spark.metrics.conf", "dummy-file")
+    sparkConf.set(METRICS_CONF, "dummy-file")
     val conf = new MetricsConfig(sparkConf)
     conf.initialize()
 
@@ -47,7 +48,7 @@ class MetricsConfigSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("MetricsConfig with properties set from a file") {
     val sparkConf = new SparkConf(loadDefaults = false)
-    sparkConf.set("spark.metrics.conf", filePath)
+    sparkConf.set(METRICS_CONF, filePath)
     val conf = new MetricsConfig(sparkConf)
     conf.initialize()
 
@@ -110,7 +111,7 @@ class MetricsConfigSuite extends SparkFunSuite with BeforeAndAfter {
     setMetricsProperty(sparkConf, "*.source.jvm.class", "org.apache.spark.SomeOtherSource")
     setMetricsProperty(sparkConf, "master.sink.console.period", "50")
     setMetricsProperty(sparkConf, "master.sink.console.unit", "seconds")
-    sparkConf.set("spark.metrics.conf", filePath)
+    sparkConf.set(METRICS_CONF, filePath)
     val conf = new MetricsConfig(sparkConf)
     conf.initialize()
 
@@ -135,7 +136,7 @@ class MetricsConfigSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("MetricsConfig with subProperties") {
     val sparkConf = new SparkConf(loadDefaults = false)
-    sparkConf.set("spark.metrics.conf", filePath)
+    sparkConf.set(METRICS_CONF, filePath)
     val conf = new MetricsConfig(sparkConf)
     conf.initialize()
 
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
index a7a24114f17e..c512f29c8442 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
@@ -35,7 +35,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
 
   before {
     filePath = getClass.getClassLoader.getResource("test_metrics_system.properties").getFile
-    conf = new SparkConf(false).set("spark.metrics.conf", filePath)
+    conf = new SparkConf(false).set(METRICS_CONF, filePath)
     securityMgr = new SecurityManager(conf)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
index 21138bd4a16b..f1cf14de1f87 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -32,6 +32,7 @@ import org.scalatest.Matchers
 import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config._
 import org.apache.spark.network.{BlockDataManager, BlockTransferService}
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
@@ -50,8 +51,8 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
 
   test("security on same password") {
     val conf = new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good")
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good")
       .set("spark.app.id", "app-id")
     testConnection(conf, conf) match {
       case Success(_) => // expected
@@ -61,10 +62,10 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
 
   test("security on mismatch password") {
     val conf0 = new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good")
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good")
       .set("spark.app.id", "app-id")
-    val conf1 = conf0.clone.set("spark.authenticate.secret", "bad")
+    val conf1 = conf0.clone.set(AUTH_SECRET, "bad")
     testConnection(conf0, conf1) match {
       case Success(_) => fail("Should have failed")
       case Failure(t) => t.getMessage should include ("Mismatched response")
@@ -73,10 +74,10 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
 
   test("security mismatch auth off on server") {
     val conf0 = new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good")
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good")
       .set("spark.app.id", "app-id")
-    val conf1 = conf0.clone.set("spark.authenticate", "false")
+    val conf1 = conf0.clone.set(NETWORK_AUTH_ENABLED, false)
     testConnection(conf0, conf1) match {
       case Success(_) => fail("Should have failed")
       case Failure(t) => // any funny error may occur, sever will interpret SASL token as RPC
@@ -85,10 +86,10 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
 
   test("security mismatch auth off on client") {
     val conf0 = new SparkConf()
-      .set("spark.authenticate", "false")
-      .set("spark.authenticate.secret", "good")
+      .set(NETWORK_AUTH_ENABLED, false)
+      .set(AUTH_SECRET, "good")
       .set("spark.app.id", "app-id")
-    val conf1 = conf0.clone.set("spark.authenticate", "true")
+    val conf1 = conf0.clone.set(NETWORK_AUTH_ENABLED, true)
     testConnection(conf0, conf1) match {
       case Success(_) => fail("Should have failed")
       case Failure(t) => t.getMessage should include ("Expected SaslMessage")
@@ -97,8 +98,8 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
 
   test("security with aes encryption") {
     val conf = new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good")
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good")
       .set("spark.app.id", "app-id")
       .set("spark.network.crypto.enabled", "true")
       .set("spark.network.crypto.saslFallback", "false")
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 558b7fa49832..51bf5c273f04 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -36,6 +36,7 @@ import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkEnv, SparkException, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.config._
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
@@ -693,42 +694,42 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   test("send with authentication") {
     testSend(new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good"))
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good"))
   }
 
   test("send with SASL encryption") {
     testSend(new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good")
-      .set("spark.authenticate.enableSaslEncryption", "true"))
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good")
+      .set(SASL_ENCRYPTION_ENABLED, true))
   }
 
   test("send with AES encryption") {
     testSend(new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good")
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good")
       .set("spark.network.crypto.enabled", "true")
       .set("spark.network.crypto.saslFallback", "false"))
   }
 
   test("ask with authentication") {
     testAsk(new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good"))
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good"))
   }
 
   test("ask with SASL encryption") {
     testAsk(new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good")
-      .set("spark.authenticate.enableSaslEncryption", "true"))
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good")
+      .set(SASL_ENCRYPTION_ENABLED, true))
   }
 
   test("ask with AES encryption") {
     testAsk(new SparkConf()
-      .set("spark.authenticate", "true")
-      .set("spark.authenticate.secret", "good")
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(AUTH_SECRET, "good")
       .set("spark.network.crypto.enabled", "true")
       .set("spark.network.crypto.saslFallback", "false"))
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 3bfc97b80184..27369759fad5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -25,6 +25,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
 import org.apache.spark.executor.{Executor, TaskMetrics, TaskMetricsSuite}
+import org.apache.spark.internal.config.METRICS_CONF
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.metrics.source.JvmSource
 import org.apache.spark.network.util.JavaUtils
@@ -37,7 +38,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
   test("provide metrics sources") {
     val filePath = getClass.getClassLoader.getResource("test_metrics_config.properties").getFile
     val conf = new SparkConf(loadDefaults = false)
-      .set("spark.metrics.conf", filePath)
+      .set(METRICS_CONF, filePath)
     sc = new SparkContext("local", "test", conf)
     val rdd = sc.makeRDD(1 to 1)
     val result = sc.runJob(rdd, (tc: TaskContext, it: Iterator[Int]) => {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 2250ae2f771e..1070e8753f4b 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -31,7 +31,7 @@ import org.scalatest.concurrent.Eventually._
 import org.apache.spark._
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.{DRIVER_PORT, MEMORY_OFFHEAP_SIZE}
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.BlockTransferService
@@ -86,7 +86,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
   before {
     rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
 
-    conf.set("spark.authenticate", "false")
+    conf.set(NETWORK_AUTH_ENABLED, false)
     conf.set(DRIVER_PORT, rpcEnv.address.port)
     conf.set(IS_TESTING, true)
     conf.set("spark.memory.fraction", "1")
diff --git a/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala b/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala
index f46cc293ed27..098d012eed88 100644
--- a/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala
@@ -28,7 +28,7 @@ import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{mock, times, verify, when}
 
 import org.apache.spark._
-import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 
 class HttpSecurityFilterSuite extends SparkFunSuite {
 
@@ -92,9 +92,9 @@ class HttpSecurityFilterSuite extends SparkFunSuite {
 
   test("perform access control") {
     val conf = new SparkConf(false)
-      .set("spark.ui.acls.enable", "true")
-      .set("spark.admin.acls", "admin")
-      .set("spark.ui.view.acls", "alice")
+      .set(ACLS_ENABLE, true)
+      .set(ADMIN_ACLS, Seq("admin"))
+      .set(UI_VIEW_ACLS, Seq("alice"))
     val secMgr = new SecurityManager(conf)
 
     val req = mockEmptyRequest()
@@ -123,7 +123,7 @@ class HttpSecurityFilterSuite extends SparkFunSuite {
 
   test("set security-related headers") {
     val conf = new SparkConf(false)
-      .set("spark.ui.allowFramingFrom", "example.com")
+      .set(UI_ALLOW_FRAMING_FROM, "example.com")
       .set(UI_X_XSS_PROTECTION, "xssProtection")
       .set(UI_X_CONTENT_TYPE_OPTIONS, true)
       .set(UI_STRICT_TRANSPORT_SECURITY, "tsec")
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index b04b065f9ecb..b184b74bf3cb 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -39,8 +39,9 @@ import org.apache.spark._
 import org.apache.spark.LocalSparkContext._
 import org.apache.spark.api.java.StorageLevels
 import org.apache.spark.deploy.history.HistoryServerSuite
-import org.apache.spark.internal.config.MEMORY_OFFHEAP_SIZE
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Status._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.status.api.v1.{JacksonMessageWriter, RDDDataDistribution, StageStatus}
 
@@ -103,9 +104,9 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
-      .set("spark.ui.enabled", "true")
-      .set("spark.ui.port", "0")
-      .set("spark.ui.killEnabled", killEnabled.toString)
+      .set(UI_ENABLED, true)
+      .set(UI_PORT, 0)
+      .set(UI_KILL_ENABLED, killEnabled)
       .set(MEMORY_OFFHEAP_SIZE.key, "64m")
     val sc = new SparkContext(conf)
     assert(sc.ui.isDefined)
@@ -531,8 +532,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
-      .set("spark.ui.enabled", "true")
-      .set("spark.ui.port", "0")
+      .set(UI_ENABLED, true)
+      .set(UI_PORT, 0)
       .set(MAX_RETAINED_STAGES, 3)
       .set(MAX_RETAINED_JOBS, 2)
       .set(ASYNC_TRACKING_ENABLED, false)
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index eaa8f28ae062..1bd7aed7920b 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -32,6 +32,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
 import org.apache.spark.LocalSparkContext._
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.util.Utils
 
 class UISuite extends SparkFunSuite {
@@ -44,7 +45,7 @@ class UISuite extends SparkFunSuite {
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
-      .set("spark.ui.enabled", "true")
+      .set(UI_ENABLED, true)
     val sc = new SparkContext(conf)
     assert(sc.ui.isDefined)
     sc
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index d52988df58d6..39834fc640ac 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -27,6 +27,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.Utils
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 27d59dd7f3e5..90255a532173 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.Utils
 
@@ -69,7 +70,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     val expectedPortNames = Set(
       containerPort(DRIVER_PORT_NAME, DEFAULT_DRIVER_PORT),
       containerPort(BLOCK_MANAGER_PORT_NAME, DEFAULT_BLOCKMANAGER_PORT),
-      containerPort(UI_PORT_NAME, SparkUI.DEFAULT_PORT)
+      containerPort(UI_PORT_NAME, UI_PORT.defaultValue.get)
     )
     val foundPortNames = configuredPod.container.getPorts.asScala.toSet
     assert(expectedPortNames === foundPortNames)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index 250eaab9e980..d90c3067a420 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -28,6 +28,7 @@ import org.scalatest.concurrent.Eventually
 import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Tests.IS_TESTING
+import org.apache.spark.internal.config.UI.UI_ENABLED
 
 private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesClient) {
 
@@ -67,8 +68,8 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl
       .set("spark.executor.cores", "1")
       .set("spark.executors.instances", "1")
       .set("spark.app.name", "spark-test-app")
-      .set("spark.ui.enabled", "true")
-      .set(IS_TESTING.key, "false")
+      .set(IS_TESTING, false)
+      .set(UI_ENABLED, true)
       .set("spark.kubernetes.submission.waitAppCompletion", "false")
       .set("spark.kubernetes.authenticate.driver.serviceAccountName", serviceAccountName)
   }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 1ece7bdc979c..01b91887985f 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -44,6 +44,7 @@ import org.apache.spark.deploy.history.HistoryServer
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, YarnSchedulerBackend}
@@ -224,7 +225,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       val attemptID = if (isClusterMode) {
         // Set the web ui port to be ephemeral for yarn so we don't conflict with
         // other spark processes running on the same box
-        System.setProperty("spark.ui.port", "0")
+        System.setProperty(UI_PORT.key, "0")
 
         // Set the master and deploy mode property to match the requested mode.
         System.setProperty("spark.master", "yarn")
@@ -620,7 +621,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
         d.send(AddWebUIFilter(amFilter, params.toMap, proxyBase))
 
       case None =>
-        System.setProperty("spark.ui.filters", amFilter)
+        System.setProperty(UI_FILTERS.key, amFilter)
         params.foreach { case (k, v) => System.setProperty(s"spark.$amFilter.param.$k", v) }
     }
   }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index a9ff3023a581..a7bed75a02ad 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -33,6 +33,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -175,7 +176,7 @@ private[spark] abstract class YarnSchedulerBackend(
       filterParams != null && filterParams.nonEmpty
     if (hasFilter) {
       // SPARK-26255: Append user provided filters(spark.ui.filters) with yarn filter.
-      val allFilters = filterName + "," + conf.get("spark.ui.filters", "")
+      val allFilters = Seq(filterName) ++ conf.get(UI_FILTERS)
       logInfo(s"Add WebUI Filter. $filterName, $filterParams, $proxyBase")
 
       // For already installed handlers, prepend the filter.
@@ -186,7 +187,7 @@ private[spark] abstract class YarnSchedulerBackend(
           filterParams.foreach { case (k, v) =>
             conf.set(s"spark.$filterName.param.$k", v)
           }
-          conf.set("spark.ui.filters", allFilters)
+          conf.set(UI_FILTERS, allFilters)
 
           ui.getHandlers.map(_.getServletHandler()).foreach { h =>
             val holder = new FilterHolder()
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 506b27c677f5..b7e83c8a62ec 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -36,6 +36,8 @@ import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher._
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart,
   SparkListenerExecutorAdded}
@@ -192,7 +194,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     val propsFile = createConfFile()
     val handle = new SparkLauncher(env)
       .setSparkHome(sys.props("spark.test.home"))
-      .setConf("spark.ui.enabled", "false")
+      .setConf(UI_ENABLED.key, "false")
       .setPropertiesFile(propsFile)
       .setMaster("yarn")
       .setDeployMode("client")
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index 61c0c43f7c04..de7ff8238c9e 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -30,6 +30,8 @@ import org.scalatest.Matchers
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
 class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
@@ -83,7 +85,7 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
 
     // spark acls on, just pick up default user
     val sparkConf = new SparkConf()
-    sparkConf.set("spark.acls.enable", "true")
+    sparkConf.set(ACLS_ENABLE, true)
 
     val securityMgr = new SecurityManager(sparkConf)
     val acls = YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr)
@@ -111,9 +113,9 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
 
     // default spark acls are on and specify acls
     val sparkConf = new SparkConf()
-    sparkConf.set("spark.acls.enable", "true")
-    sparkConf.set("spark.ui.view.acls", "user1,user2")
-    sparkConf.set("spark.modify.acls", "user3,user4")
+    sparkConf.set(ACLS_ENABLE, true)
+    sparkConf.set(UI_VIEW_ACLS, Seq("user1", "user2"))
+    sparkConf.set(MODIFY_ACLS, Seq("user3", "user4"))
 
     val securityMgr = new SecurityManager(sparkConf)
     val acls = YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 44bf8624a6bc..10b17571d2aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -38,7 +39,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
   test("create with config options and propagate them to SparkContext and SparkSession") {
     val session = SparkSession.builder()
       .master("local")
-      .config("spark.ui.enabled", value = false)
+      .config(UI_ENABLED.key, value = false)
       .config("some-config", "v2")
       .getOrCreate()
     assert(session.sparkContext.conf.get("some-config") == "v2")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
index 4a439940beb7..74f33f6c8139 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{MapOutputStatistics, SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.exchange.{ExchangeCoordinator, ReusedExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.functions._
@@ -262,7 +263,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
       new SparkConf(false)
         .setMaster("local[*]")
         .setAppName("test")
-        .set("spark.ui.enabled", "false")
+        .set(UI_ENABLED, false)
         .set(SQLConf.SHUFFLE_PARTITIONS.key, "5")
         .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true")
         .set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
index aca7081c0d3d..bd2470ee2066 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
@@ -23,6 +23,7 @@ import scala.util.Random
 
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.SQLHelper
@@ -50,7 +51,7 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
     .set("spark.master", "local[1]")
     .setIfMissing("spark.driver.memory", "3g")
     .setIfMissing("spark.executor.memory", "3g")
-    .setIfMissing("spark.ui.enabled", "false")
+    .setIfMissing(UI_ENABLED, false)
 
   val spark = SparkSession.builder.config(conf).getOrCreate()
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
index 017b74aabff7..b04024371713 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
@@ -23,6 +23,7 @@ import scala.util.Random
 
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.functions.monotonically_increasing_id
@@ -48,7 +49,7 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper {
     .set("spark.master", "local[1]")
     .setIfMissing("spark.driver.memory", "3g")
     .setIfMissing("spark.executor.memory", "3g")
-    .setIfMissing("spark.ui.enabled", "false")
+    .setIfMissing(UI_ENABLED, false)
     .setIfMissing("orc.compression", "snappy")
     .setIfMissing("spark.sql.parquet.compression.codec", "snappy")
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index 64d924fdb009..d1de9f037992 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -31,6 +31,7 @@ import org.apache.hive.service.server.HiveServer2
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd, SparkListenerJobStart}
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.hive.HiveUtils
@@ -63,7 +64,7 @@ object HiveThriftServer2 extends Logging {
     server.start()
     listener = new HiveThriftServer2Listener(server, sqlContext.conf)
     sqlContext.sparkContext.addSparkListener(listener)
-    uiTab = if (sqlContext.sparkContext.getConf.getBoolean("spark.ui.enabled", true)) {
+    uiTab = if (sqlContext.sparkContext.getConf.get(UI_ENABLED)) {
       Some(new ThriftServerTab(sqlContext.sparkContext))
     } else {
       None
@@ -101,7 +102,7 @@ object HiveThriftServer2 extends Logging {
       logInfo("HiveThriftServer2 started")
       listener = new HiveThriftServer2Listener(server, SparkSQLEnv.sqlContext.conf)
       SparkSQLEnv.sparkContext.addSparkListener(listener)
-      uiTab = if (SparkSQLEnv.sparkContext.getConf.getBoolean("spark.ui.enabled", true)) {
+      uiTab = if (SparkSQLEnv.sparkContext.getConf.get(UI_ENABLED)) {
         Some(new ThriftServerTab(SparkSQLEnv.sparkContext))
       } else {
         None
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 4c2bc62b9faf..23dd350d4b2c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
@@ -59,7 +60,7 @@ object TestHive
           "org.apache.spark.sql.hive.execution.PairSerDe")
         .set("spark.sql.warehouse.dir", TestHiveContext.makeWarehouseDir().toURI.getPath)
         // SPARK-8910
-        .set("spark.ui.enabled", "false")
+        .set(UI_ENABLED, false)
         .set("spark.unsafe.exceptionOnMemoryLeak", "true")
         // Disable ConvertToLocalRelation for better test coverage. Test cases built on
         // LocalRelation will exercise the optimization rules better by disabling it as
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index f839e8979d35..d3640086e74a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -26,6 +26,7 @@ import org.scalatest.{BeforeAndAfterEach, Matchers}
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.{QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
@@ -335,7 +336,7 @@ object SetMetastoreURLTest extends Logging {
     val sparkConf = new SparkConf(loadDefaults = true)
     val builder = SparkSession.builder()
       .config(sparkConf)
-      .config("spark.ui.enabled", "false")
+      .config(UI_ENABLED.key, "false")
       .config("spark.sql.hive.metastore.version", "0.13.1")
       // The issue described in SPARK-16901 only appear when
       // spark.sql.hive.metastore.jars is not set to builtin.
@@ -370,7 +371,7 @@ object SetWarehouseLocationTest extends Logging {
   def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
 
-    val sparkConf = new SparkConf(loadDefaults = true).set("spark.ui.enabled", "false")
+    val sparkConf = new SparkConf(loadDefaults = true).set(UI_ENABLED, false)
     val providedExpectedWarehouseLocation =
       sparkConf.getOption("spark.sql.test.expectedWarehouseDir")
 
@@ -449,7 +450,7 @@ object TemporaryHiveUDFTest extends Logging {
   def main(args: Array[String]) {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
-    conf.set("spark.ui.enabled", "false")
+    conf.set(UI_ENABLED, false)
     val sc = new SparkContext(conf)
     val hiveContext = new TestHiveContext(sc)
 
@@ -487,7 +488,7 @@ object PermanentHiveUDFTest1 extends Logging {
   def main(args: Array[String]) {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
-    conf.set("spark.ui.enabled", "false")
+    conf.set(UI_ENABLED, false)
     val sc = new SparkContext(conf)
     val hiveContext = new TestHiveContext(sc)
 
@@ -525,7 +526,7 @@ object PermanentHiveUDFTest2 extends Logging {
   def main(args: Array[String]) {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
-    conf.set("spark.ui.enabled", "false")
+    conf.set(UI_ENABLED, false)
     val sc = new SparkContext(conf)
     val hiveContext = new TestHiveContext(sc)
     // Load a Hive UDF from the jar.
@@ -561,7 +562,7 @@ object SparkSubmitClassLoaderTest extends Logging {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val hiveWarehouseLocation = Utils.createTempDir()
-    conf.set("spark.ui.enabled", "false")
+    conf.set(UI_ENABLED, false)
     conf.set("spark.sql.warehouse.dir", hiveWarehouseLocation.toString)
     val sc = new SparkContext(conf)
     val hiveContext = new TestHiveContext(sc)
@@ -654,7 +655,7 @@ object SparkSQLConfTest extends Logging {
       // For this simple test, we do not really clone this object.
       override def clone: SparkConf = this
     }
-    conf.set("spark.ui.enabled", "false")
+    conf.set(UI_ENABLED, false)
     val sc = new SparkContext(conf)
     val hiveContext = new TestHiveContext(sc)
     // Run a simple command to make sure all lazy vals in hiveContext get instantiated.
@@ -676,7 +677,7 @@ object SPARK_9757 extends QueryTest {
       new SparkConf()
         .set("spark.sql.hive.metastore.version", "0.13.1")
         .set("spark.sql.hive.metastore.jars", "maven")
-        .set("spark.ui.enabled", "false")
+        .set(UI_ENABLED, false)
         .set("spark.sql.warehouse.dir", hiveWarehouseLocation.toString))
 
     val hiveContext = new TestHiveContext(sparkContext)
@@ -722,7 +723,7 @@ object SPARK_11009 extends QueryTest {
 
     val sparkContext = new SparkContext(
       new SparkConf()
-        .set("spark.ui.enabled", "false")
+        .set(UI_ENABLED, false)
         .set("spark.sql.shuffle.partitions", "100"))
 
     val hiveContext = new TestHiveContext(sparkContext)
@@ -753,7 +754,7 @@ object SPARK_14244 extends QueryTest {
 
     val sparkContext = new SparkContext(
       new SparkConf()
-        .set("spark.ui.enabled", "false")
+        .set(UI_ENABLED, false)
         .set("spark.sql.shuffle.partitions", "100"))
 
     val hiveContext = new TestHiveContext(sparkContext)
@@ -774,7 +775,7 @@ object SPARK_14244 extends QueryTest {
 object SPARK_18360 {
   def main(args: Array[String]): Unit = {
     val spark = SparkSession.builder()
-      .config("spark.ui.enabled", "false")
+      .config(UI_ENABLED.key, "false")
       .enableHiveSupport().getOrCreate()
 
     val defaultDbLocation = spark.catalog.getDatabase("default").locationUri
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
index 07d8c5bacb1a..76134d23d18a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.execution
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.hive.test.TestHiveContext
 
 class ConcurrentHiveSuite extends SparkFunSuite with BeforeAndAfterAll {
@@ -27,7 +28,7 @@ class ConcurrentHiveSuite extends SparkFunSuite with BeforeAndAfterAll {
     test("Multiple Hive Instances") {
       (1 to 10).map { i =>
         val conf = new SparkConf()
-        conf.set("spark.ui.enabled", "false")
+        conf.set(UI_ENABLED, false)
         val ts =
           new TestHiveContext(new SparkContext("local", s"TestSQLContext$i", conf))
         ts.sparkSession.sql("SHOW TABLES").collect()
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 135430f1ef62..e042adaa8611 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.streaming.scheduler.JobGenerator
 import org.apache.spark.util.Utils
@@ -61,7 +62,7 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
       "spark.yarn.principal",
       "spark.kerberos.keytab",
       "spark.kerberos.principal",
-      "spark.ui.filters",
+      UI_FILTERS.key,
       "spark.mesos.driver.frameworkId")
 
     val newSparkConf = new SparkConf(loadDefaults = false).setAll(sparkConfPairs)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 122f25b21a0d..c09cbb330844 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -38,6 +38,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.input.FixedLengthBinaryInputFormat
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.SerializationDebugger
@@ -188,7 +189,7 @@ class StreamingContext private[streaming] (
   private[streaming] val progressListener = new StreamingJobProgressListener(this)
 
   private[streaming] val uiTab: Option[StreamingTab] =
-    if (conf.getBoolean("spark.ui.enabled", true)) {
+    if (conf.get(UI_ENABLED)) {
       Some(new StreamingTab(this))
     } else {
       None
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
index fc6218a33f74..9d1203b7632c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -29,6 +29,7 @@ import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StreamBlockId
 import org.apache.spark.streaming.receiver._
@@ -200,7 +201,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     val sparkConf = new SparkConf()
       .setMaster("local[4]")  // must be at least 3 as we are going to start 2 receivers
       .setAppName(framework)
-      .set("spark.ui.enabled", "true")
+      .set(UI_ENABLED, true)
       .set("spark.streaming.receiver.writeAheadLog.enable", "true")
       .set("spark.streaming.receiver.writeAheadLog.rollingIntervalSecs", "1")
     val batchDuration = Milliseconds(500)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
index 957feca2e552..29e451332088 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
@@ -27,6 +27,7 @@ import org.scalatest.selenium.WebBrowser
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.ui.SparkUICssErrorHandler
 
 /**
@@ -61,7 +62,7 @@ class UISeleniumSuite
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
-      .set("spark.ui.enabled", "true")
+      .set(UI_ENABLED, true)
     val ssc = new StreamingContext(conf, Seconds(1))
     assert(ssc.sc.ui.isDefined, "Spark UI is not started!")
     ssc

From 50ebf3a43b84c8538ec60437189221c2c527990b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 11 Jan 2019 19:23:32 +0000
Subject: [PATCH 0276/1072] [SPARK-26551][SQL] Fix schema pruning error when
 selecting one complex field and having is not null predicate on another one

## What changes were proposed in this pull request?

Schema pruning has errors when selecting one complex field and having is not null predicate on another one:

```scala
val query = sql("select * from contacts")
  .where("name.middle is not null")
  .select(
    "id",
    "name.first",
    "name.middle",
    "name.last"
  )
  .where("last = 'Jones'")
  .select(count("id"))
```

```
java.lang.IllegalArgumentException: middle does not exist. Available: last
[info]   at org.apache.spark.sql.types.StructType.$anonfun$fieldIndex$1(StructType.scala:303)
[info]   at scala.collection.immutable.Map$Map1.getOrElse(Map.scala:119)
[info]   at org.apache.spark.sql.types.StructType.fieldIndex(StructType.scala:302)
[info]   at org.apache.spark.sql.execution.ProjectionOverSchema.$anonfun$getProjection$6(ProjectionOverSchema.scala:58)
[info]   at scala.Option.map(Option.scala:163)
[info]   at org.apache.spark.sql.execution.ProjectionOverSchema.getProjection(ProjectionOverSchema.scala:56)
[info]   at org.apache.spark.sql.execution.ProjectionOverSchema.unapply(ProjectionOverSchema.scala:32)
[info]   at org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruning$$anonfun$$nestedInanonfun$buildNewProjection$1$1.applyOrElse(Parque
tSchemaPruning.scala:153)
```

## How was this patch tested?

Added tests.

Closes #23474 from viirya/SPARK-26551.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 .../parquet/ParquetSchemaPruning.scala        | 34 +++++++++++++-----
 .../parquet/ParquetSchemaPruningSuite.scala   | 36 +++++++++++++++++++
 2 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
index 91080b15727d..840fcae8c691 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
@@ -116,10 +116,28 @@ private[sql] object ParquetSchemaPruning extends Rule[LogicalPlan] {
     // For example, for a query `SELECT name.first FROM contacts WHERE name IS NOT NULL`,
     // we don't need to read nested fields of `name` struct other than `first` field.
     val (rootFields, optRootFields) = (projectionRootFields ++ filterRootFields)
-      .distinct.partition(_.contentAccessed)
+      .distinct.partition(!_.prunedIfAnyChildAccessed)
 
     optRootFields.filter { opt =>
-      !rootFields.exists(_.field.name == opt.field.name)
+      !rootFields.exists { root =>
+        root.field.name == opt.field.name && {
+          // Checking if current optional root field can be pruned.
+          // For each required root field, we merge it with the optional root field:
+          // 1. If this optional root field has nested fields and any nested field of it is used
+          //    in the query, the merged field type must equal to the optional root field type.
+          //    We can prune this optional root field. For example, for optional root field
+          //    `struct<name:struct<middle:string,last:string>>`, if its field
+          //    `struct<name:struct<last:string>>` is used, we don't need to add this optional
+          //    root field.
+          // 2. If this optional root field has no nested fields, the merged field type equals
+          //    to the optional root field only if they are the same. If they are, we can prune
+          //    this optional root field too.
+          val rootFieldType = StructType(Array(root.field))
+          val optFieldType = StructType(Array(opt.field))
+          val merged = optFieldType.merge(rootFieldType)
+          merged.sameType(optFieldType)
+        }
+      }
     } ++ rootFields
   }
 
@@ -213,11 +231,11 @@ private[sql] object ParquetSchemaPruning extends Rule[LogicalPlan] {
       // don't actually use any nested fields. These root field accesses might be excluded later
       // if there are any nested fields accesses in the query plan.
       case IsNotNull(SelectedField(field)) =>
-        RootField(field, derivedFromAtt = false, contentAccessed = false) :: Nil
+        RootField(field, derivedFromAtt = false, prunedIfAnyChildAccessed = true) :: Nil
       case IsNull(SelectedField(field)) =>
-        RootField(field, derivedFromAtt = false, contentAccessed = false) :: Nil
+        RootField(field, derivedFromAtt = false, prunedIfAnyChildAccessed = true) :: Nil
       case IsNotNull(_: Attribute) | IsNull(_: Attribute) =>
-        expr.children.flatMap(getRootFields).map(_.copy(contentAccessed = false))
+        expr.children.flatMap(getRootFields).map(_.copy(prunedIfAnyChildAccessed = true))
       case _ =>
         expr.children.flatMap(getRootFields)
     }
@@ -271,9 +289,9 @@ private[sql] object ParquetSchemaPruning extends Rule[LogicalPlan] {
   /**
    * This represents a "root" schema field (aka top-level, no-parent). `field` is the
    * `StructField` for field name and datatype. `derivedFromAtt` indicates whether it
-   * was derived from an attribute or had a proper child. `contentAccessed` means whether
-   * it was accessed with its content by the expressions refer it.
+   * was derived from an attribute or had a proper child. `prunedIfAnyChildAccessed` means
+   * whether this root field can be pruned if any of child field is used in the query.
    */
   private case class RootField(field: StructField, derivedFromAtt: Boolean,
-    contentAccessed: Boolean = true)
+    prunedIfAnyChildAccessed: Boolean = false)
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index 9a02529a2550..4d15f38321a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.SchemaPruningTest
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.StructType
@@ -217,6 +218,41 @@ class ParquetSchemaPruningSuite
       Row("Y.") :: Nil)
   }
 
+  testSchemaPruning("select one complex field and having is null predicate on another " +
+      "complex field") {
+    val query = sql("select * from contacts")
+      .where("name.middle is not null")
+      .select(
+        "id",
+        "name.first",
+        "name.middle",
+        "name.last"
+      )
+      .where("last = 'Jones'")
+      .select(count("id")).toDF()
+    checkScan(query,
+      "struct<id:int,name:struct<middle:string,last:string>>")
+    checkAnswer(query, Row(0) :: Nil)
+  }
+
+  testSchemaPruning("select one deep nested complex field and having is null predicate on " +
+      "another deep nested complex field") {
+    val query = sql("select * from contacts")
+      .where("employer.company.address is not null")
+      .selectExpr(
+        "id",
+        "name.first",
+        "name.middle",
+        "name.last",
+        "employer.id as employer_id"
+      )
+      .where("employer_id = 0")
+      .select(count("id")).toDF()
+    checkScan(query,
+      "struct<id:int,employer:struct<id:int,company:struct<address:string>>>")
+    checkAnswer(query, Row(1) :: Nil)
+  }
+
   private def testSchemaPruning(testName: String)(testThunk: => Unit) {
     test(s"Spark vectorized reader - without partition data column - $testName") {
       withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {

From ae382c94dd10ff494dde4de44e66182bf6dbe8f8 Mon Sep 17 00:00:00 2001
From: Mukul Murthy <mukul.murthy@gmail.com>
Date: Fri, 11 Jan 2019 11:46:14 -0800
Subject: [PATCH 0277/1072] [SPARK-26586][SS] Fix race condition that causes
 streams to run with unexpected confs

## What changes were proposed in this pull request?

Fix race condition where streams can have unexpected conf values.

New streaming queries should run with isolated SparkSessions so that they aren't affected by conf updates after they are started. In StreamExecution, the parent SparkSession is cloned and used to run each batch, but this cloning happens in a separate thread and may happen after DataStreamWriter.start() returns. If a stream is started and a conf key is set immediately after, the stream is likely to have the new value.

## How was this patch tested?

New unit test that fails prior to the production change and passes with it.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23513 from mukulmurthy/26586.

Authored-by: Mukul Murthy <mukul.murthy@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../execution/streaming/StreamExecution.scala |  5 ++--
 .../test/DataStreamReaderWriterSuite.scala    | 24 +++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 83824f40ab90..90f7b477103a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -181,6 +181,9 @@ abstract class StreamExecution(
   lazy val streamMetrics = new MetricsReporter(
     this, s"spark.streaming.${Option(name).getOrElse(id)}")
 
+  /** Isolated spark session to run the batches with. */
+  private val sparkSessionForStream = sparkSession.cloneSession()
+
   /**
    * The thread that runs the micro-batches of this stream. Note that this thread must be
    * [[org.apache.spark.util.UninterruptibleThread]] to workaround KAFKA-1894: interrupting a
@@ -270,8 +273,6 @@ abstract class StreamExecution(
       // force initialization of the logical plan so that the sources can be created
       logicalPlan
 
-      // Isolated spark session to run the batches with.
-      val sparkSessionForStream = sparkSession.cloneSession()
       // Adaptive execution can change num shuffle partitions, disallow
       sparkSessionForStream.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
       // Disable cost-based join optimization as we do not want stateful operations to be rearranged
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 4d3a54a048e8..74ea0bfacba5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.streaming.test
 
 import java.io.File
+import java.util.ConcurrentModificationException
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
@@ -651,4 +652,27 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
 
     LastOptions.clear()
   }
+
+  test("SPARK-26586: Streams should have isolated confs") {
+    import testImplicits._
+    val input = MemoryStream[Int]
+    input.addData(1 to 10)
+    spark.conf.set("testKey1", 0)
+    val queries = (1 to 10).map { i =>
+      spark.conf.set("testKey1", i)
+      input.toDF().writeStream
+        .foreachBatch { (df: Dataset[Row], id: Long) =>
+          val v = df.sparkSession.conf.get("testKey1").toInt
+          if (i != v) {
+            throw new ConcurrentModificationException(s"Stream $i has the wrong conf value $v")
+          }
+        }
+        .start()
+    }
+    try {
+      queries.foreach(_.processAllAvailable())
+    } finally {
+      queries.foreach(_.stop())
+    }
+  }
 }

From 19e17acf2dbb058d17efcc3c01633c1583590af7 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 11 Jan 2019 17:22:05 -0800
Subject: [PATCH 0278/1072] [SPARK-25692][TEST] Increase timeout in
 fetchBothChunks test

## What changes were proposed in this pull request?

`ChunkFetchIntegrationSuite.fetchBothChunks` fails frequently due to timeout in Apache Spark Jenkins environments.

```scala
org.apache.spark.network.ChunkFetchIntegrationSuite
[ERROR] fetchBothChunks(org.apache.spark.network.ChunkFetchIntegrationSuite)
Time elapsed: 5.015 s  <<< FAILURE!
java.lang.AssertionError: Timeout getting response from the server
	at org.apache.spark.network.ChunkFetchIntegrationSuite.fetchChunks(ChunkFetchIntegrationSuite.java:176)
	at org.apache.spark.network.ChunkFetchIntegrationSuite.fetchBothChunks(ChunkFetchIntegrationSuite.java:210)
```

The followings are the recent failures on `amp-jenkins-worker-05`. The timeout seems to be too sensitive in low-end machines. This PR increases the timeout from 5 seconds to 60 seconds in order to be more robust.

- [master 5856](https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7/5856/)
- [master 5837](https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7/5837/testReport)
- [master 5835](https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7/5835/testReport)
- [master 5829](https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7/5829/testReport)
- [master 5828](https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7/5828/testReport)
- [master 5822](https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7/5822/testReport)
- [master 5814](https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7/5814/testReport)

- [SparkPullRequestBuilder 100784](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/100784/consoleFull)

- [SparkPullRequestBuilder 100785](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/100785/consoleFull)

- [SparkPullRequestBuilder 100787](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/100787/consoleFull)

- [SparkPullRequestBuilder 100788](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/100788/consoleFull)

## How was this patch tested?

N/A (Monitor the Jenkins on `amp-jenkins-worker-05` machine)

Closes #23522 from dongjoon-hyun/SPARK-25692.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/network/ChunkFetchIntegrationSuite.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
index 37a8664a5266..ab4dd04a1298 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
@@ -172,7 +172,7 @@ public void onFailure(int chunkIndex, Throwable e) {
       for (int chunkIndex : chunkIndices) {
         client.fetchChunk(STREAM_ID, chunkIndex, callback);
       }
-      if (!sem.tryAcquire(chunkIndices.size(), 5, TimeUnit.SECONDS)) {
+      if (!sem.tryAcquire(chunkIndices.size(), 60, TimeUnit.SECONDS)) {
         fail("Timeout getting response from the server");
       }
     }

From e00ebd5c72e719500f3deb676afc5c99be20a95e Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 11 Jan 2019 21:58:06 -0800
Subject: [PATCH 0279/1072] [SPARK-26482][K8S][TEST][FOLLOWUP] Fix compile
 failure

## What changes were proposed in this pull request?

This fixes K8S integration test compilation failure introduced by #23423 .
```scala
$ build/sbt -Pkubernetes-integration-tests test:package
...
[error] /Users/dongjoon/APACHE/spark/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala:71: type mismatch;
[error]  found   : org.apache.spark.internal.config.OptionalConfigEntry[Boolean]
[error]  required: String
[error]       .set(IS_TESTING, false)
[error]            ^
[error] /Users/dongjoon/APACHE/spark/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala:71: type mismatch;
[error]  found   : Boolean(false)
[error]  required: String
[error]       .set(IS_TESTING, false)
[error]                        ^
[error] two errors found
```

## How was this patch tested?

Pass the K8S integration test.

Closes #23527 from dongjoon-hyun/SPARK-26482.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../deploy/k8s/integrationtest/KubernetesTestComponents.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index d90c3067a420..c8698039e3e4 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -68,8 +68,8 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl
       .set("spark.executor.cores", "1")
       .set("spark.executors.instances", "1")
       .set("spark.app.name", "spark-test-app")
-      .set(IS_TESTING, false)
-      .set(UI_ENABLED, true)
+      .set(IS_TESTING.key, "false")
+      .set(UI_ENABLED.key, "true")
       .set("spark.kubernetes.submission.waitAppCompletion", "false")
       .set("spark.kubernetes.authenticate.driver.serviceAccountName", serviceAccountName)
   }

From 3587a9a2275615b82492b89204b141636542ce52 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 11 Jan 2019 22:53:58 -0800
Subject: [PATCH 0280/1072] [SPARK-26607][SQL][TEST] Remove Spark 2.2.x testing
 from HiveExternalCatalogVersionsSuite

## What changes were proposed in this pull request?

The vote of final release of `branch-2.2` passed and the branch goes EOL. This PR removes Spark 2.2.x from the testing coverage.

## How was this patch tested?

Pass the Jenkins.

Closes #23526 from dongjoon-hyun/SPARK-26607.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index f1e842334416..dd0e1bd0fe30 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -206,7 +206,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
 
 object PROCESS_TABLES extends QueryTest with SQLTestUtils {
   // Tests the latest version of every release line.
-  val testingVersions = Seq("2.2.2", "2.3.2", "2.4.0")
+  val testingVersions = Seq("2.3.2", "2.4.0")
 
   protected var spark: SparkSession = _
 

From 5b37092311bfc1255f1d4d81127ae4242ba1d1aa Mon Sep 17 00:00:00 2001
From: Oleksii Shkarupin <a.shkarupin@gmail.com>
Date: Sat, 12 Jan 2019 11:06:39 -0800
Subject: [PATCH 0281/1072] [SPARK-26538][SQL] Set default precision and scale
 for elements of postgres numeric array

## What changes were proposed in this pull request?

When determining CatalystType for postgres columns with type `numeric[]` set the type of array element to `DecimalType(38, 18)` instead of `DecimalType(0,0)`.

## How was this patch tested?

Tested with modified `org.apache.spark.sql.jdbc.JDBCSuite`.
Ran the `PostgresIntegrationSuite` manually.

Closes #23456 from a-shkarupin/postgres_numeric_array.

Lead-authored-by: Oleksii Shkarupin <a.shkarupin@gmail.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/jdbc/PostgresIntegrationSuite.scala    | 12 ++++++++----
 .../org/apache/spark/sql/jdbc/PostgresDialect.scala  |  5 ++++-
 .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala  |  3 +++
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index be32cb89f488..e8d5b468df63 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -46,14 +46,15 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     conn.prepareStatement("CREATE TABLE bar (c0 text, c1 integer, c2 double precision, c3 bigint, "
       + "c4 bit(1), c5 bit(10), c6 bytea, c7 boolean, c8 inet, c9 cidr, "
       + "c10 integer[], c11 text[], c12 real[], c13 numeric(2,2)[], c14 enum_type, "
-      + "c15 float4, c16 smallint)").executeUpdate()
+      + "c15 float4, c16 smallint, c17 numeric[])").executeUpdate()
     conn.prepareStatement("INSERT INTO bar VALUES ('hello', 42, 1.25, 123456789012345, B'0', "
       + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16', "
-      + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}', '{0.11, 0.22}', 'd1', 1.01, 1)"""
+      + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}', '{0.11, 0.22}', 'd1', 1.01, 1, """
+      + "'{111.2222, 333.4444}')"
     ).executeUpdate()
     conn.prepareStatement("INSERT INTO bar VALUES (null, null, null, null, null, "
       + "null, null, null, null, null, "
-      + "null, null, null, null, null, null, null)"
+      + "null, null, null, null, null, null, null, null)"
     ).executeUpdate()
 
     conn.prepareStatement("CREATE TABLE ts_with_timezone " +
@@ -85,7 +86,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(rows.length == 2)
     // Test the types, and values using the first row.
     val types = rows(0).toSeq.map(x => x.getClass)
-    assert(types.length == 17)
+    assert(types.length == 18)
     assert(classOf[String].isAssignableFrom(types(0)))
     assert(classOf[java.lang.Integer].isAssignableFrom(types(1)))
     assert(classOf[java.lang.Double].isAssignableFrom(types(2)))
@@ -103,6 +104,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(classOf[String].isAssignableFrom(types(14)))
     assert(classOf[java.lang.Float].isAssignableFrom(types(15)))
     assert(classOf[java.lang.Short].isAssignableFrom(types(16)))
+    assert(classOf[Seq[BigDecimal]].isAssignableFrom(types(17)))
     assert(rows(0).getString(0).equals("hello"))
     assert(rows(0).getInt(1) == 42)
     assert(rows(0).getDouble(2) == 1.25)
@@ -123,6 +125,8 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(rows(0).getString(14) == "d1")
     assert(rows(0).getFloat(15) == 1.01f)
     assert(rows(0).getShort(16) == 1)
+    assert(rows(0).getSeq(17) ==
+      Seq("111.222200000000000000", "333.444400000000000000").map(BigDecimal(_).bigDecimal))
 
     // Test reading null values using the second row.
     assert(0.until(16).forall(rows(1).isNullAt(_)))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index f8d2bc8e0f13..5be45c973a5f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -60,7 +60,10 @@ private object PostgresDialect extends JdbcDialect {
     case "bytea" => Some(BinaryType)
     case "timestamp" | "timestamptz" | "time" | "timetz" => Some(TimestampType)
     case "date" => Some(DateType)
-    case "numeric" | "decimal" => Some(DecimalType.bounded(precision, scale))
+    case "numeric" | "decimal" if precision > 0 => Some(DecimalType.bounded(precision, scale))
+    case "numeric" | "decimal" =>
+      // SPARK-26538: handle numeric without explicit precision and scale.
+      Some(DecimalType. SYSTEM_DEFAULT)
     case _ => None
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index aefa5da94481..284900b68ae5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -850,8 +850,11 @@ class JDBCSuite extends QueryTest
 
   test("PostgresDialect type mapping") {
     val Postgres = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db")
+    val md = new MetadataBuilder().putLong("scale", 0)
     assert(Postgres.getCatalystType(java.sql.Types.OTHER, "json", 1, null) === Some(StringType))
     assert(Postgres.getCatalystType(java.sql.Types.OTHER, "jsonb", 1, null) === Some(StringType))
+    assert(Postgres.getCatalystType(java.sql.Types.ARRAY, "_numeric", 0, md) ==
+      Some(ArrayType(DecimalType.SYSTEM_DEFAULT)))
     assert(Postgres.getJDBCType(FloatType).map(_.databaseTypeDefinition).get == "FLOAT4")
     assert(Postgres.getJDBCType(DoubleType).map(_.databaseTypeDefinition).get == "FLOAT8")
     val errMsg = intercept[IllegalArgumentException] {

From 3bd77aa9f6775645407baa4266f6d0c02a8af902 Mon Sep 17 00:00:00 2001
From: Kengo Seki <sekikn@apache.org>
Date: Sat, 12 Jan 2019 14:53:33 -0600
Subject: [PATCH 0282/1072] [SPARK-26564] Fix wrong assertions and error
 messages for parameter checking

## What changes were proposed in this pull request?

If users set equivalent values to spark.network.timeout and spark.executor.heartbeatInterval, they get the following message:

```
java.lang.IllegalArgumentException: requirement failed: The value of spark.network.timeout=120s must be no less than the value of spark.executor.heartbeatInterval=120s.
```

But it's misleading since it can be read as they could be equal. So this PR replaces "no less than" with "greater than". Also, it fixes similar inconsistencies found in MLlib and SQL components.

## How was this patch tested?

Ran Spark with equivalent values for them manually and confirmed that the revised message was displayed.

Closes #23488 from sekikn/SPARK-26564.

Authored-by: Kengo Seki <sekikn@apache.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkConf.scala            | 2 +-
 .../scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala  | 2 +-
 .../spark/sql/execution/exchange/BroadcastExchangeExec.scala    | 2 +-
 .../org/apache/spark/sql/execution/joins/HashedRelation.scala   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index f27df505fa5f..1100868ac1f4 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -594,7 +594,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     // If spark.executor.heartbeatInterval bigger than spark.network.timeout,
     // it will almost always cause ExecutorLostFailure. See SPARK-22754.
     require(executorTimeoutThresholdMs > executorHeartbeatIntervalMs, "The value of " +
-      s"spark.network.timeout=${executorTimeoutThresholdMs}ms must be no less than the value of " +
+      s"spark.network.timeout=${executorTimeoutThresholdMs}ms must be greater than the value of " +
       s"spark.executor.heartbeatInterval=${executorHeartbeatIntervalMs}ms.")
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 1b7c15f1f0a8..134d6a9b442a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -88,7 +88,7 @@ private[ml] class WeightedLeastSquares(
   require(regParam >= 0.0, s"regParam cannot be negative: $regParam")
   require(elasticNetParam >= 0.0 && elasticNetParam <= 1.0,
     s"elasticNetParam must be in [0, 1]: $elasticNetParam")
-  require(maxIter >= 0, s"maxIter must be a positive integer: $maxIter")
+  require(maxIter > 0, s"maxIter must be a positive integer: $maxIter")
   require(tol >= 0.0, s"tol must be >= 0, but was set to $tol")
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index a80673c705f1..703d351bea7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -79,7 +79,7 @@ case class BroadcastExchangeExec(
           val (numRows, input) = child.executeCollectIterator()
           if (numRows >= 512000000) {
             throw new SparkException(
-              s"Cannot broadcast the table with more than 512 millions rows: $numRows rows")
+              s"Cannot broadcast the table with 512 million or more rows: $numRows rows")
           }
 
           val beforeBuild = System.nanoTime()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index a708926dd1f8..90abc84daa77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -413,7 +413,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
 
   private def init(): Unit = {
     if (mm != null) {
-      require(capacity < 512000000, "Cannot broadcast more than 512 millions rows")
+      require(capacity < 512000000, "Cannot broadcast 512 million or more rows")
       var n = 1
       while (n < capacity) n *= 2
       ensureAcquireMemory(n * 2L * 8 + (1 << 20))

From 4ff2b94a7c827f9cc3e6c79fe090568d2743c0ca Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sun, 13 Jan 2019 11:20:22 +0800
Subject: [PATCH 0283/1072] [SPARK-26503][CORE][DOC][FOLLOWUP] Get rid of
 spark.sql.legacy.timeParser.enabled

## What changes were proposed in this pull request?

The SQL config `spark.sql.legacy.timeParser.enabled` was removed by https://github.com/apache/spark/pull/23495. The PR cleans up the SQL migration guide and the comment for `UnixTimestamp`.

Closes #23529 from MaxGekk/get-rid-off-legacy-parser-followup.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md                           | 4 ++--
 .../spark/sql/catalyst/expressions/datetimeExpressions.scala  | 4 +---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index a2d782e782ae..fce0b9a5f86a 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -33,13 +33,13 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
 
-  - Since Spark 3.0, CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpose with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
+  - Since Spark 3.0, CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpose with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`.
 
   - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
 
   - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
 
-  - Since Spark 3.0, the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions use java.time API for parsing and formatting dates/timestamps from/to strings by using ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html) based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, java.text.SimpleDateFormat and java.util.GregorianCalendar (hybrid calendar that supports both the Julian and Gregorian calendar systems, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html) is used for the same purpose. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
+  - Since Spark 3.0, the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions use java.time API for parsing and formatting dates/timestamps from/to strings by using ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html) based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, java.text.SimpleDateFormat and java.util.GregorianCalendar (hybrid calendar that supports both the Julian and Gregorian calendar systems, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html) is used for the same purpose. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`.
 
   - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 8fc0112c0257..b1832da93af3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -614,9 +614,7 @@ case class ToUnixTimestamp(
 
 /**
  * Converts time string with given pattern to Unix time stamp (in seconds), returns null if fail.
- * See [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html]
- * if SQL config spark.sql.legacy.timeParser.enabled is set to true otherwise
- * [https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html].
+ * See [https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html].
  * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
  * If the second parameter is missing, use "yyyy-MM-dd HH:mm:ss".
  * If no parameters provided, the first parameter will be current_timestamp.

From c01152dd22093e9f5d2aa533598e4d4209d30922 Mon Sep 17 00:00:00 2001
From: Petar Petrov <petar.petrov@leanplum.com>
Date: Sun, 13 Jan 2019 13:39:12 -0600
Subject: [PATCH 0284/1072] [SPARK-23182][CORE] Allow enabling TCP keep alive
 on the RPC connections

## What changes were proposed in this pull request?

Make it possible for the master to enable TCP keep alive on the RPC connections with clients.

## How was this patch tested?

Manually tested.

Added the following:
```
spark.rpc.io.enableTcpKeepAlive  true
```
to spark-defaults.conf.

Observed the following on the Spark master:
```
$ netstat -town | grep 7077
tcp6       0      0 10.240.3.134:7077       10.240.1.25:42851       ESTABLISHED keepalive (6736.50/0/0)
tcp6       0      0 10.240.3.134:44911      10.240.3.134:7077       ESTABLISHED keepalive (4098.68/0/0)
tcp6       0      0 10.240.3.134:7077       10.240.3.134:44911      ESTABLISHED keepalive (4098.68/0/0)
```

Which proves that the keep alive setting is taking effect.

It's currently possible to enable TCP keep alive on the worker / executor, but is not possible to configure on other RPC connections. It's unclear to me why this could be the case. Keep alive is more important for the master to protect it against suddenly departing workers / executors, thus I think it's very important to have it. Particularly this makes the master resilient in case of using preemptible worker VMs in GCE. GCE has the concept of shutdown scripts, which it doesn't guarantee to execute. So workers often don't get shutdown gracefully and the TCP connections on the master linger as there's nothing to close them. Thus the need of enabling keep alive.

This enables keep-alive on connections besides the master's connections, but that shouldn't cause harm.

Closes #20512 from peshopetrov/master.

Authored-by: Petar Petrov <petar.petrov@leanplum.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/network/server/TransportServer.java   |  4 ++++
 .../org/apache/spark/network/util/TransportConf.java   | 10 ++++++++++
 2 files changed, 14 insertions(+)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index a0ecde292646..9b327d5404b4 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -126,6 +126,10 @@ private void init(String hostToBind, int portToBind) {
       bootstrap.childOption(ChannelOption.SO_SNDBUF, conf.sendBuf());
     }
 
+    if (conf.enableTcpKeepAlive()) {
+      bootstrap.childOption(ChannelOption.SO_KEEPALIVE, true);
+    }
+
     bootstrap.childHandler(new ChannelInitializer<SocketChannel>() {
       @Override
       protected void initChannel(SocketChannel ch) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 89ee5ee3c0cd..3628da68f1c6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -42,6 +42,7 @@ public class TransportConf {
   private final String SPARK_NETWORK_IO_RETRYWAIT_KEY;
   private final String SPARK_NETWORK_IO_LAZYFD_KEY;
   private final String SPARK_NETWORK_VERBOSE_METRICS;
+  private final String SPARK_NETWORK_IO_ENABLETCPKEEPALIVE_KEY;
 
   private final ConfigProvider conf;
 
@@ -64,6 +65,7 @@ public TransportConf(String module, ConfigProvider conf) {
     SPARK_NETWORK_IO_RETRYWAIT_KEY = getConfKey("io.retryWait");
     SPARK_NETWORK_IO_LAZYFD_KEY = getConfKey("io.lazyFD");
     SPARK_NETWORK_VERBOSE_METRICS = getConfKey("io.enableVerboseMetrics");
+    SPARK_NETWORK_IO_ENABLETCPKEEPALIVE_KEY = getConfKey("io.enableTcpKeepAlive");
   }
 
   public int getInt(String name, int defaultValue) {
@@ -173,6 +175,14 @@ public boolean verboseMetrics() {
     return conf.getBoolean(SPARK_NETWORK_VERBOSE_METRICS, false);
   }
 
+  /**
+   * Whether to enable TCP keep-alive. If true, the TCP keep-alives are enabled, which removes
+   * connections that are idle for too long.
+   */
+  public boolean enableTcpKeepAlive() {
+    return conf.getBoolean(SPARK_NETWORK_IO_ENABLETCPKEEPALIVE_KEY, false);
+  }
+
   /**
    * Maximum number of retries when binding to a port before giving up.
    */

From 09b05487b78fdd4b5d4cde114f5f4440a076bf10 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Sun, 13 Jan 2019 23:54:19 +0100
Subject: [PATCH 0285/1072] [SPARK-26450][SQL] Avoid rebuilding map of schema
 for every column in projection

## What changes were proposed in this pull request?

When creating some unsafe projections, Spark rebuilds the map of schema attributes once for each expression in the projection. Some file format readers create one unsafe projection per input file, others create one per task. ProjectExec also creates one unsafe projection per task. As a result, for wide queries on wide tables, Spark might build the map of schema attributes hundreds of thousands of times.

This PR changes two functions to reuse the same AttributeSeq instance when creating BoundReference objects for each expression in the projection. This avoids the repeated rebuilding of the map of schema attributes.

### Benchmarks

The time saved by this PR depends on size of the schema, size of the projection, number of input files (or number of file splits), number of tasks, and file format. I chose a couple of example cases.

In the following tests, I ran the query
```sql
select * from table where id1 = 1
```

Matching rows are about 0.2% of the table.

#### Orc table 6000 columns, 500K rows, 34 input files

baseline | pr | improvement
----|----|----
1.772306 min | 1.487267 min | 16.082943%

#### Orc table 6000 columns, 500K rows, *17* input files

baseline | pr | improvement
----|----|----
 1.656400 min | 1.423550 min | 14.057595%

#### Orc table 60 columns, 50M rows, 34 input files

baseline | pr | improvement
----|----|----
0.299878 min | 0.290339 min | 3.180926%

#### Parquet table 6000 columns, 500K rows, 34 input files

baseline | pr | improvement
----|----|----
1.478306 min | 1.373728 min | 7.074165%

Note: The parquet reader does not create an unsafe projection. However, the filter operation in the query causes the planner to add a ProjectExec, which does create an unsafe projection for each task. So these results have nothing to do with Parquet itself.

#### Parquet table 60 columns, 50M rows, 34 input files

baseline | pr | improvement
----|----|----
0.245006 min | 0.242200 min | 1.145099%

#### CSV table 6000 columns, 500K rows, 34 input files

baseline | pr | improvement
----|----|----
2.390117 min | 2.182778 min | 8.674844%

#### CSV table 60 columns, 50M rows, 34 input files

baseline | pr | improvement
----|----|----
1.520911 min | 1.510211 min | 0.703526%

## How was this patch tested?

SQL unit tests
Python core and SQL test

Closes #23392 from bersprockets/norebuild.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../catalyst/expressions/BoundAttribute.scala |  9 ++++
 .../InterpretedMutableProjection.scala        |  3 +-
 .../sql/catalyst/expressions/Projection.scala |  9 ++--
 .../codegen/GenerateMutableProjection.scala   |  3 +-
 .../codegen/GenerateOrdering.scala            |  5 ++-
 .../codegen/GenerateSafeProjection.scala      |  3 +-
 .../codegen/GenerateUnsafeProjection.scala    |  3 +-
 .../sql/catalyst/expressions/ordering.scala   |  3 +-
 .../sql/catalyst/expressions/package.scala    |  7 ---
 .../spark/sql/execution/ExpandExec.scala      |  5 ++-
 .../aggregate/AggregationIterator.scala       |  3 +-
 .../aggregate/HashAggregateExec.scala         | 45 +++++++++----------
 .../execution/basicPhysicalOperators.scala    |  3 +-
 .../datasources/FileFormatWriter.scala        |  6 +--
 .../spark/sql/execution/joins/HashJoin.scala  |  6 +--
 .../execution/joins/SortMergeJoinExec.scala   |  3 +-
 .../window/WindowFunctionFrame.scala          |  8 ++--
 17 files changed, 68 insertions(+), 56 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
index ea8c369ee49e..7ae5924b20fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
@@ -86,4 +86,13 @@ object BindReferences extends Logging {
       }
     }.asInstanceOf[A] // Kind of a hack, but safe.  TODO: Tighten return type when possible.
   }
+
+  /**
+   * A helper function to bind given expressions to an input schema.
+   */
+  def bindReferences[A <: Expression](
+      expressions: Seq[A],
+      input: AttributeSeq): Seq[A] = {
+    expressions.map(BindReferences.bindReference(_, input))
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
index 122a564da61b..5c8aa4e2e9d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
 
 
@@ -30,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
  */
 class InterpretedMutableProjection(expressions: Seq[Expression]) extends MutableProjection {
   def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) =
-    this(toBoundExprs(expressions, inputSchema))
+    this(bindReferences(expressions, inputSchema))
 
   private[this] val buffer = new Array[Any](expressions.size)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index b48f7ba655b2..eaaf94baac21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateMutableProjection, GenerateSafeProjection, GenerateUnsafeProjection}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
@@ -30,7 +31,7 @@ import org.apache.spark.sql.types.{DataType, StructType}
  */
 class InterpretedProjection(expressions: Seq[Expression]) extends Projection {
   def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) =
-    this(expressions.map(BindReferences.bindReference(_, inputSchema)))
+    this(bindReferences(expressions, inputSchema))
 
   override def initialize(partitionIndex: Int): Unit = {
     expressions.foreach(_.foreach {
@@ -99,7 +100,7 @@ object MutableProjection
    * `inputSchema`.
    */
   def create(exprs: Seq[Expression], inputSchema: Seq[Attribute]): MutableProjection = {
-    create(toBoundExprs(exprs, inputSchema))
+    create(bindReferences(exprs, inputSchema))
   }
 }
 
@@ -162,7 +163,7 @@ object UnsafeProjection
    * `inputSchema`.
    */
   def create(exprs: Seq[Expression], inputSchema: Seq[Attribute]): UnsafeProjection = {
-    create(toBoundExprs(exprs, inputSchema))
+    create(bindReferences(exprs, inputSchema))
   }
 }
 
@@ -203,6 +204,6 @@ object SafeProjection extends CodeGeneratorWithInterpretedFallback[Seq[Expressio
    * `inputSchema`.
    */
   def create(exprs: Seq[Expression], inputSchema: Seq[Attribute]): Projection = {
-    create(toBoundExprs(exprs, inputSchema))
+    create(bindReferences(exprs, inputSchema))
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index d588e7f08130..838bd1c679e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
 
 // MutableProjection is not accessible in Java
@@ -35,7 +36,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
     in.map(ExpressionCanonicalizer.execute)
 
   protected def bind(in: Seq[Expression], inputSchema: Seq[Attribute]): Seq[Expression] =
-    in.map(BindReferences.bindReference(_, inputSchema))
+    bindReferences(in, inputSchema)
 
   def generate(
       expressions: Seq[Expression],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 283fd2a6e938..b66b80ad31dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -25,6 +25,7 @@ import com.esotericsoftware.kryo.io.{Input, Output}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
@@ -46,7 +47,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
     in.map(ExpressionCanonicalizer.execute(_).asInstanceOf[SortOrder])
 
   protected def bind(in: Seq[SortOrder], inputSchema: Seq[Attribute]): Seq[SortOrder] =
-    in.map(BindReferences.bindReference(_, inputSchema))
+    bindReferences(in, inputSchema)
 
   /**
    * Creates a code gen ordering for sorting this schema, in ascending order.
@@ -188,7 +189,7 @@ class LazilyGeneratedOrdering(val ordering: Seq[SortOrder])
   extends Ordering[InternalRow] with KryoSerializable {
 
   def this(ordering: Seq[SortOrder], inputSchema: Seq[Attribute]) =
-    this(ordering.map(BindReferences.bindReference(_, inputSchema)))
+    this(bindReferences(ordering, inputSchema))
 
   @transient
   private[this] var generatedOrdering = GenerateOrdering.generate(ordering)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index 39778661d1c4..e285398ba195 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -21,6 +21,7 @@ import scala.annotation.tailrec
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData}
@@ -41,7 +42,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     in.map(ExpressionCanonicalizer.execute)
 
   protected def bind(in: Seq[Expression], inputSchema: Seq[Attribute]): Seq[Expression] =
-    in.map(BindReferences.bindReference(_, inputSchema))
+    bindReferences(in, inputSchema)
 
   private def createCodeForStruct(
       ctx: CodegenContext,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 0ecd0de8d820..fb1d8a3c8e73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.types._
 
@@ -317,7 +318,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     in.map(ExpressionCanonicalizer.execute)
 
   protected def bind(in: Seq[Expression], inputSchema: Seq[Attribute]): Seq[Expression] =
-    in.map(BindReferences.bindReference(_, inputSchema))
+    bindReferences(in, inputSchema)
 
   def generate(
       expressions: Seq[Expression],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala
index e24a3de3cfdb..c8d667143f45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.types._
 
 
@@ -27,7 +28,7 @@ import org.apache.spark.sql.types._
 class InterpretedOrdering(ordering: Seq[SortOrder]) extends Ordering[InternalRow] {
 
   def this(ordering: Seq[SortOrder], inputSchema: Seq[Attribute]) =
-    this(ordering.map(BindReferences.bindReference(_, inputSchema)))
+    this(bindReferences(ordering, inputSchema))
 
   def compare(a: InternalRow, b: InternalRow): Int = {
     var i = 0
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index bf18e8bcb52d..932c36473724 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -85,13 +85,6 @@ package object expressions  {
     override def apply(row: InternalRow): InternalRow = row
   }
 
-  /**
-   * A helper function to bind given expressions to an input schema.
-   */
-  def toBoundExprs(exprs: Seq[Expression], inputSchema: Seq[Attribute]): Seq[Expression] = {
-    exprs.map(BindReferences.bindReference(_, inputSchema))
-  }
-
   /**
    * Helper functions for working with `Seq[Attribute]`.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
index 5b4edf5136e3..85f49140a4b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
@@ -145,11 +145,12 @@ case class ExpandExec(
     // Part 1: declare variables for each column
     // If a column has the same value for all output rows, then we also generate its computation
     // right after declaration. Otherwise its value is computed in the part 2.
+    lazy val attributeSeq: AttributeSeq = child.output
     val outputColumns = output.indices.map { col =>
       val firstExpr = projections.head(col)
       if (sameOutput(col)) {
         // This column is the same across all output rows. Just generate code for it here.
-        BindReferences.bindReference(firstExpr, child.output).genCode(ctx)
+        BindReferences.bindReference(firstExpr, attributeSeq).genCode(ctx)
       } else {
         val isNull = ctx.freshName("isNull")
         val value = ctx.freshName("value")
@@ -170,7 +171,7 @@ case class ExpandExec(
       var updateCode = ""
       for (col <- exprs.indices) {
         if (!sameOutput(col)) {
-          val ev = BindReferences.bindReference(exprs(col), child.output).genCode(ctx)
+          val ev = BindReferences.bindReference(exprs(col), attributeSeq).genCode(ctx)
           updateCode +=
             s"""
                |${ev.code}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
index 98c4a5129995..a1fb23d621d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
@@ -77,6 +77,7 @@ abstract class AggregationIterator(
     val expressionsLength = expressions.length
     val functions = new Array[AggregateFunction](expressionsLength)
     var i = 0
+    val inputAttributeSeq: AttributeSeq = inputAttributes
     while (i < expressionsLength) {
       val func = expressions(i).aggregateFunction
       val funcWithBoundReferences: AggregateFunction = expressions(i).mode match {
@@ -86,7 +87,7 @@ abstract class AggregationIterator(
           // this function is Partial or Complete because we will call eval of this
           // function's children in the update method of this aggregate function.
           // Those eval calls require BoundReferences to work.
-          BindReferences.bindReference(func, inputAttributes)
+          BindReferences.bindReference(func, inputAttributeSeq)
         case _ =>
           // We only need to set inputBufferOffset for aggregate functions with mode
           // PartialMerge and Final.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 2355d305c38e..220a4b055203 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -23,6 +23,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
@@ -199,15 +200,13 @@ case class HashAggregateExec(
     val (resultVars, genResult) = if (modes.contains(Final) || modes.contains(Complete)) {
       // evaluate aggregate results
       ctx.currentVars = bufVars
-      val aggResults = functions.map(_.evaluateExpression).map { e =>
-        BindReferences.bindReference(e, aggregateBufferAttributes).genCode(ctx)
-      }
+      val aggResults = bindReferences(
+        functions.map(_.evaluateExpression),
+        aggregateBufferAttributes).map(_.genCode(ctx))
       val evaluateAggResults = evaluateVariables(aggResults)
       // evaluate result expressions
       ctx.currentVars = aggResults
-      val resultVars = resultExpressions.map { e =>
-        BindReferences.bindReference(e, aggregateAttributes).genCode(ctx)
-      }
+      val resultVars = bindReferences(resultExpressions, aggregateAttributes).map(_.genCode(ctx))
       (resultVars, s"""
         |$evaluateAggResults
         |${evaluateVariables(resultVars)}
@@ -264,7 +263,7 @@ case class HashAggregateExec(
       }
     }
     ctx.currentVars = bufVars ++ input
-    val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttrs))
+    val boundUpdateExpr = bindReferences(updateExpr, inputAttrs)
     val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
     val effectiveCodes = subExprs.codes.mkString("\n")
     val aggVals = ctx.withSubExprEliminationExprs(subExprs.states) {
@@ -456,16 +455,16 @@ case class HashAggregateExec(
       val evaluateBufferVars = evaluateVariables(bufferVars)
       // evaluate the aggregation result
       ctx.currentVars = bufferVars
-      val aggResults = declFunctions.map(_.evaluateExpression).map { e =>
-        BindReferences.bindReference(e, aggregateBufferAttributes).genCode(ctx)
-      }
+      val aggResults = bindReferences(
+        declFunctions.map(_.evaluateExpression),
+        aggregateBufferAttributes).map(_.genCode(ctx))
       val evaluateAggResults = evaluateVariables(aggResults)
       // generate the final result
       ctx.currentVars = keyVars ++ aggResults
       val inputAttrs = groupingAttributes ++ aggregateAttributes
-      val resultVars = resultExpressions.map { e =>
-        BindReferences.bindReference(e, inputAttrs).genCode(ctx)
-      }
+      val resultVars = bindReferences[Expression](
+        resultExpressions,
+        inputAttrs).map(_.genCode(ctx))
       s"""
        $evaluateKeyVars
        $evaluateBufferVars
@@ -494,9 +493,9 @@ case class HashAggregateExec(
 
       ctx.currentVars = keyVars ++ resultBufferVars
       val inputAttrs = resultExpressions.map(_.toAttribute)
-      val resultVars = resultExpressions.map { e =>
-        BindReferences.bindReference(e, inputAttrs).genCode(ctx)
-      }
+      val resultVars = bindReferences[Expression](
+        resultExpressions,
+        inputAttrs).map(_.genCode(ctx))
       s"""
        $evaluateKeyVars
        $evaluateResultBufferVars
@@ -506,9 +505,9 @@ case class HashAggregateExec(
       // generate result based on grouping key
       ctx.INPUT_ROW = keyTerm
       ctx.currentVars = null
-      val eval = resultExpressions.map{ e =>
-        BindReferences.bindReference(e, groupingAttributes).genCode(ctx)
-      }
+      val eval = bindReferences[Expression](
+        resultExpressions,
+        groupingAttributes).map(_.genCode(ctx))
       consume(ctx, eval)
     }
     ctx.addNewFunction(funcName,
@@ -730,9 +729,9 @@ case class HashAggregateExec(
   private def doConsumeWithKeys(ctx: CodegenContext, input: Seq[ExprCode]): String = {
     // create grouping key
     val unsafeRowKeyCode = GenerateUnsafeProjection.createCode(
-      ctx, groupingExpressions.map(e => BindReferences.bindReference[Expression](e, child.output)))
+      ctx, bindReferences[Expression](groupingExpressions, child.output))
     val fastRowKeys = ctx.generateExpressions(
-      groupingExpressions.map(e => BindReferences.bindReference[Expression](e, child.output)))
+      bindReferences[Expression](groupingExpressions, child.output))
     val unsafeRowKeys = unsafeRowKeyCode.value
     val unsafeRowBuffer = ctx.freshName("unsafeRowAggBuffer")
     val fastRowBuffer = ctx.freshName("fastAggBuffer")
@@ -825,7 +824,7 @@ case class HashAggregateExec(
 
     val updateRowInRegularHashMap: String = {
       ctx.INPUT_ROW = unsafeRowBuffer
-      val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttr))
+      val boundUpdateExpr = bindReferences(updateExpr, inputAttr)
       val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
       val effectiveCodes = subExprs.codes.mkString("\n")
       val unsafeRowBufferEvals = ctx.withSubExprEliminationExprs(subExprs.states) {
@@ -849,7 +848,7 @@ case class HashAggregateExec(
       if (isFastHashMapEnabled) {
         if (isVectorizedHashMapEnabled) {
           ctx.INPUT_ROW = fastRowBuffer
-          val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttr))
+          val boundUpdateExpr = bindReferences(updateExpr, inputAttr)
           val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
           val effectiveCodes = subExprs.codes.mkString("\n")
           val fastRowEvals = ctx.withSubExprEliminationExprs(subExprs.states) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 2570b36b3166..318dca0cfd82 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -24,6 +24,7 @@ import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskCon
 import org.apache.spark.rdd.{EmptyRDD, PartitionwiseSampledRDD, RDD}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.metric.SQLMetrics
@@ -56,7 +57,7 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
   }
 
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
-    val exprs = projectList.map(x => BindReferences.bindReference[Expression](x, child.output))
+    val exprs = bindReferences[Expression](projectList, child.output)
     val resultVars = exprs.map(_.genCode(ctx))
     // Evaluation of non-deterministic expressions can't be deferred.
     val nonDeterministicAttrs = projectList.filterNot(_.deterministic).map(_.toAttribute)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 774fe38f5c2e..260ad97506a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.execution.{SortExec, SparkPlan, SQLExecution}
@@ -145,9 +146,8 @@ object FileFormatWriter extends Logging {
         // SPARK-21165: the `requiredOrdering` is based on the attributes from analyzed plan, and
         // the physical plan may have different attribute ids due to optimizer removing some
         // aliases. Here we bind the expression ahead to avoid potential attribute ids mismatch.
-        val orderingExpr = requiredOrdering
-          .map(SortOrder(_, Ascending))
-          .map(BindReferences.bindReference(_, outputSpec.outputColumns))
+        val orderingExpr = bindReferences(
+          requiredOrdering.map(SortOrder(_, Ascending)), outputSpec.outputColumns)
         SortExec(
           orderingExpr,
           global = false,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index 1aef5f686426..5ee4c7ffb191 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.{RowIterator, SparkPlan}
@@ -63,9 +64,8 @@ trait HashJoin {
   protected lazy val (buildKeys, streamedKeys) = {
     require(leftKeys.map(_.dataType) == rightKeys.map(_.dataType),
       "Join keys from two sides should have same types")
-    val lkeys = HashJoin.rewriteKeyExpr(leftKeys).map(BindReferences.bindReference(_, left.output))
-    val rkeys = HashJoin.rewriteKeyExpr(rightKeys)
-      .map(BindReferences.bindReference(_, right.output))
+    val lkeys = bindReferences(HashJoin.rewriteKeyExpr(leftKeys), left.output)
+    val rkeys = bindReferences(HashJoin.rewriteKeyExpr(rightKeys), right.output)
     buildSide match {
       case BuildLeft => (lkeys, rkeys)
       case BuildRight => (rkeys, lkeys)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index d7d3f6d6078b..f829f07e8072 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans._
@@ -393,7 +394,7 @@ case class SortMergeJoinExec(
       input: Seq[Attribute]): Seq[ExprCode] = {
     ctx.INPUT_ROW = row
     ctx.currentVars = null
-    keys.map(BindReferences.bindReference(_, input).genCode(ctx))
+    bindReferences(keys, input).map(_.genCode(ctx))
   }
 
   private def copyKeys(ctx: CodegenContext, vars: Seq[ExprCode]): Seq[ExprCode] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index a5601899ea2d..d5f2ffa5573a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -21,6 +21,7 @@ import java.util
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
 import org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray
 
@@ -103,9 +104,8 @@ final class OffsetWindowFunctionFrame(
   private[this] val projection = {
     // Collect the expressions and bind them.
     val inputAttrs = inputSchema.map(_.withNullability(true))
-    val boundExpressions = Seq.fill(ordinal)(NoOp) ++ expressions.toSeq.map { e =>
-      BindReferences.bindReference(e.input, inputAttrs)
-    }
+    val boundExpressions = Seq.fill(ordinal)(NoOp) ++ bindReferences(
+      expressions.toSeq.map(_.input), inputAttrs)
 
     // Create the projection.
     newMutableProjection(boundExpressions, Nil).target(target)
@@ -114,7 +114,7 @@ final class OffsetWindowFunctionFrame(
   /** Create the projection used when the offset row DOES NOT exists. */
   private[this] val fillDefaultValue = {
     // Collect the expressions and bind them.
-    val inputAttrs = inputSchema.map(_.withNullability(true))
+    val inputAttrs: AttributeSeq = inputSchema.map(_.withNullability(true))
     val boundExpressions = Seq.fill(ordinal)(NoOp) ++ expressions.toSeq.map { e =>
       if (e.default == null || e.default.foldable && e.default.eval() == null) {
         // The default value is null.

From 985f966b9c404390b1bbe85001d5a670fabb1a45 Mon Sep 17 00:00:00 2001
From: maryannxue <maryannxue@apache.org>
Date: Sun, 13 Jan 2019 15:30:45 -0800
Subject: [PATCH 0286/1072] [SPARK-26065][FOLLOW-UP][SQL] Revert hint behavior
 in join reordering

## What changes were proposed in this pull request?

This is to fix a bug in #23036 that would cause a join hint to be applied on node it is not supposed to after join reordering. For example,
```
  val join = df.join(df, "id")
  val broadcasted = join.hint("broadcast")
  val join2 = join.join(broadcasted, "id").join(broadcasted, "id")
```
There should only be 2 broadcast hints on `join2`, but after join reordering there would be 4. It is because the hint application in join reordering compares the attribute set for testing relation equivalency.
Moreover, it could still be problematic even if the child relations were used in testing relation equivalency, due to the potential exprId conflict in nested self-join.

As a result, this PR simply reverts the join reorder hint behavior change introduced in #23036, which means if a join hint is present, the join node itself will not participate in the join reordering, while the sub-joins within its children still can.

## How was this patch tested?

Added new tests

Closes #23524 from maryannxue/query-hint-followup-2.

Authored-by: maryannxue <maryannxue@apache.org>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../optimizer/CostBasedJoinReorder.scala      | 72 +++-----------
 .../spark/sql/catalyst/optimizer/joins.scala  | 22 ++---
 .../sql/catalyst/planning/patterns.scala      | 30 +++---
 .../optimizer/JoinOptimizationSuite.scala     |  2 +-
 .../catalyst/optimizer/JoinReorderSuite.scala | 95 ++++++++-----------
 .../org/apache/spark/sql/JoinHintSuite.scala  | 26 ++++-
 6 files changed, 94 insertions(+), 153 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 743d3ce944fe..6540e95b01e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -31,40 +31,6 @@ import org.apache.spark.sql.internal.SQLConf
  * Cost-based join reorder.
  * We may have several join reorder algorithms in the future. This class is the entry of these
  * algorithms, and chooses which one to use.
- *
- * Note that join strategy hints, e.g. the broadcast hint, do not interfere with the reordering.
- * Such hints will be applied on the equivalent counterparts (i.e., join between the same relations
- * regardless of the join order) of the original nodes after reordering.
- * For example, the plan before reordering is like:
- *
- *            Join
- *            /   \
- *          Hint1 t4
- *          /
- *        Join
- *        / \
- *      Join t3
- *      /   \
- *    Hint2 t2
- *    /
- *   t1
- *
- * The original join order as illustrated above is "((t1 JOIN t2) JOIN t3) JOIN t4", and after
- * reordering, the new join order is "((t1 JOIN t3) JOIN t2) JOIN t4", so the new plan will be like:
- *
- *            Join
- *            /   \
- *          Hint1 t4
- *          /
- *        Join
- *        / \
- *      Join t2
- *      / \
- *    t1  t3
- *
- * "Hint1" is applied on "(t1 JOIN t3) JOIN t2" as it is equivalent to the original hinted node,
- * "(t1 JOIN t2) JOIN t3"; while "Hint2" has disappeared from the new plan since there is no
- * equivalent node to "t1 JOIN t2".
  */
 object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
 
@@ -74,30 +40,24 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
     if (!conf.cboEnabled || !conf.joinReorderEnabled) {
       plan
     } else {
-      // Use a map to track the hints on the join items.
-      val hintMap = new mutable.HashMap[AttributeSet, HintInfo]
       val result = plan transformDown {
         // Start reordering with a joinable item, which is an InnerLike join with conditions.
-        case j @ Join(_, _, _: InnerLike, Some(cond), _) =>
-          reorder(j, j.output, hintMap)
-        case p @ Project(projectList, Join(_, _, _: InnerLike, Some(cond), _))
-          if projectList.forall(_.isInstanceOf[Attribute]) =>
-          reorder(p, p.output, hintMap)
+        // Avoid reordering if a join hint is present.
+        case j @ Join(_, _, _: InnerLike, Some(cond), hint) if hint == JoinHint.NONE =>
+          reorder(j, j.output)
+        case p @ Project(projectList, Join(_, _, _: InnerLike, Some(cond), hint))
+          if projectList.forall(_.isInstanceOf[Attribute]) && hint == JoinHint.NONE =>
+          reorder(p, p.output)
       }
       // After reordering is finished, convert OrderedJoin back to Join.
       result transform {
-        case OrderedJoin(left, right, jt, cond) =>
-          val joinHint = JoinHint(hintMap.get(left.outputSet), hintMap.get(right.outputSet))
-          Join(left, right, jt, cond, joinHint)
+        case OrderedJoin(left, right, jt, cond) => Join(left, right, jt, cond, JoinHint.NONE)
       }
     }
   }
 
-  private def reorder(
-      plan: LogicalPlan,
-      output: Seq[Attribute],
-      hintMap: mutable.HashMap[AttributeSet, HintInfo]): LogicalPlan = {
-    val (items, conditions) = extractInnerJoins(plan, hintMap)
+  private def reorder(plan: LogicalPlan, output: Seq[Attribute]): LogicalPlan = {
+    val (items, conditions) = extractInnerJoins(plan)
     val result =
       // Do reordering if the number of items is appropriate and join conditions exist.
       // We also need to check if costs of all items can be evaluated.
@@ -115,20 +75,16 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
    * Extracts items of consecutive inner joins and join conditions.
    * This method works for bushy trees and left/right deep trees.
    */
-  private def extractInnerJoins(
-      plan: LogicalPlan,
-      hintMap: mutable.HashMap[AttributeSet, HintInfo]): (Seq[LogicalPlan], Set[Expression]) = {
+  private def extractInnerJoins(plan: LogicalPlan): (Seq[LogicalPlan], Set[Expression]) = {
     plan match {
-      case Join(left, right, _: InnerLike, Some(cond), hint) =>
-        hint.leftHint.foreach(hintMap.put(left.outputSet, _))
-        hint.rightHint.foreach(hintMap.put(right.outputSet, _))
-        val (leftPlans, leftConditions) = extractInnerJoins(left, hintMap)
-        val (rightPlans, rightConditions) = extractInnerJoins(right, hintMap)
+      case Join(left, right, _: InnerLike, Some(cond), _) =>
+        val (leftPlans, leftConditions) = extractInnerJoins(left)
+        val (rightPlans, rightConditions) = extractInnerJoins(right)
         (leftPlans ++ rightPlans, splitConjunctivePredicates(cond).toSet ++
           leftConditions ++ rightConditions)
       case Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), _))
         if projectList.forall(_.isInstanceOf[Attribute]) =>
-        extractInnerJoins(j, hintMap)
+        extractInnerJoins(j)
       case _ =>
         (Seq(plan), Set())
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index 82aefca8a1af..251ece315f6a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -43,13 +43,11 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
    *
    * @param input a list of LogicalPlans to inner join and the type of inner join.
    * @param conditions a list of condition for join.
-   * @param hintMap a map of relation output attribute sets to their corresponding hints.
    */
   @tailrec
   final def createOrderedJoin(
       input: Seq[(LogicalPlan, InnerLike)],
-      conditions: Seq[Expression],
-      hintMap: Map[AttributeSet, HintInfo]): LogicalPlan = {
+      conditions: Seq[Expression]): LogicalPlan = {
     assert(input.size >= 2)
     if (input.size == 2) {
       val (joinConditions, others) = conditions.partition(canEvaluateWithinJoin)
@@ -58,8 +56,8 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
         case (Inner, Inner) => Inner
         case (_, _) => Cross
       }
-      val join = Join(left, right, innerJoinType, joinConditions.reduceLeftOption(And),
-        JoinHint(hintMap.get(left.outputSet), hintMap.get(right.outputSet)))
+      val join = Join(left, right, innerJoinType,
+        joinConditions.reduceLeftOption(And), JoinHint.NONE)
       if (others.nonEmpty) {
         Filter(others.reduceLeft(And), join)
       } else {
@@ -82,27 +80,27 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
       val joinedRefs = left.outputSet ++ right.outputSet
       val (joinConditions, others) = conditions.partition(
         e => e.references.subsetOf(joinedRefs) && canEvaluateWithinJoin(e))
-      val joined = Join(left, right, innerJoinType, joinConditions.reduceLeftOption(And),
-        JoinHint(hintMap.get(left.outputSet), hintMap.get(right.outputSet)))
+      val joined = Join(left, right, innerJoinType,
+        joinConditions.reduceLeftOption(And), JoinHint.NONE)
 
       // should not have reference to same logical plan
-      createOrderedJoin(Seq((joined, Inner)) ++ rest.filterNot(_._1 eq right), others, hintMap)
+      createOrderedJoin(Seq((joined, Inner)) ++ rest.filterNot(_._1 eq right), others)
     }
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case p @ ExtractFiltersAndInnerJoins(input, conditions, hintMap)
+    case p @ ExtractFiltersAndInnerJoins(input, conditions)
         if input.size > 2 && conditions.nonEmpty =>
       val reordered = if (SQLConf.get.starSchemaDetection && !SQLConf.get.cboEnabled) {
         val starJoinPlan = StarSchemaDetection.reorderStarJoins(input, conditions)
         if (starJoinPlan.nonEmpty) {
           val rest = input.filterNot(starJoinPlan.contains(_))
-          createOrderedJoin(starJoinPlan ++ rest, conditions, hintMap)
+          createOrderedJoin(starJoinPlan ++ rest, conditions)
         } else {
-          createOrderedJoin(input, conditions, hintMap)
+          createOrderedJoin(input, conditions)
         }
       } else {
-        createOrderedJoin(input, conditions, hintMap)
+        createOrderedJoin(input, conditions)
       }
 
       if (p.sameOutput(reordered)) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index 95be0a52cb2e..a816922f49ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -166,35 +166,27 @@ object ExtractFiltersAndInnerJoins extends PredicateHelper {
    * was involved in an explicit cross join. Also returns the entire list of join conditions for
    * the left-deep tree.
    */
-  def flattenJoin(
-      plan: LogicalPlan,
-      hintMap: mutable.HashMap[AttributeSet, HintInfo],
-      parentJoinType: InnerLike = Inner)
+  def flattenJoin(plan: LogicalPlan, parentJoinType: InnerLike = Inner)
       : (Seq[(LogicalPlan, InnerLike)], Seq[Expression]) = plan match {
-    case Join(left, right, joinType: InnerLike, cond, hint) =>
-      val (plans, conditions) = flattenJoin(left, hintMap, joinType)
-      hint.leftHint.map(hintMap.put(left.outputSet, _))
-      hint.rightHint.map(hintMap.put(right.outputSet, _))
+    case Join(left, right, joinType: InnerLike, cond, hint) if hint == JoinHint.NONE =>
+      val (plans, conditions) = flattenJoin(left, joinType)
       (plans ++ Seq((right, joinType)), conditions ++
         cond.toSeq.flatMap(splitConjunctivePredicates))
-    case Filter(filterCondition, j @ Join(_, _, _: InnerLike, _, _)) =>
-      val (plans, conditions) = flattenJoin(j, hintMap)
+    case Filter(filterCondition, j @ Join(_, _, _: InnerLike, _, hint)) if hint == JoinHint.NONE =>
+      val (plans, conditions) = flattenJoin(j)
       (plans, conditions ++ splitConjunctivePredicates(filterCondition))
 
     case _ => (Seq((plan, parentJoinType)), Seq.empty)
   }
 
   def unapply(plan: LogicalPlan)
-      : Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression], Map[AttributeSet, HintInfo])]
+      : Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])]
       = plan match {
-    case f @ Filter(filterCondition, j @ Join(_, _, joinType: InnerLike, _, _)) =>
-      val hintMap = new mutable.HashMap[AttributeSet, HintInfo]
-      val flattened = flattenJoin(f, hintMap)
-      Some((flattened._1, flattened._2, hintMap.toMap))
-    case j @ Join(_, _, joinType, _, _) =>
-      val hintMap = new mutable.HashMap[AttributeSet, HintInfo]
-      val flattened = flattenJoin(j, hintMap)
-      Some((flattened._1, flattened._2, hintMap.toMap))
+    case f @ Filter(filterCondition, j @ Join(_, _, joinType: InnerLike, _, hint))
+        if hint == JoinHint.NONE =>
+      Some(flattenJoin(f))
+    case j @ Join(_, _, joinType, _, hint) if hint == JoinHint.NONE =>
+      Some(flattenJoin(j))
     case _ => None
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index 9093d7fecb0f..c570643c7410 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -66,7 +66,7 @@ class JoinOptimizationSuite extends PlanTest {
     def testExtractCheckCross
         (plan: LogicalPlan, expected: Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])]) {
       assert(
-        ExtractFiltersAndInnerJoins.unapply(plan) === expected.map(e => (e._1, e._2, Map.empty)))
+        ExtractFiltersAndInnerJoins.unapply(plan) === expected.map(e => (e._1, e._2)))
     }
 
     testExtract(x, None)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
index 0dee84620586..f1da0a8e865b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
@@ -292,77 +292,56 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
     assertEqualPlans(originalPlan, bestPlan)
   }
 
-  test("hints preservation") {
-    // Apply hints if we find an equivalent node in the new plan, otherwise discard them.
+  test("don't reorder if hints present") {
     val originalPlan =
-      t1.join(t2.hint("broadcast")).hint("broadcast").join(t4.join(t3).hint("broadcast"))
-        .where((nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")) &&
-          (nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
-          (nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
-
-    val bestPlan =
-    t1.join(t2.hint("broadcast"), Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
-      .hint("broadcast")
-      .join(
-        t4.join(t3, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
-          .hint("broadcast"),
-        Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
+      t1.join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .hint("broadcast")
+        .join(
+          t4.join(t3, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+            .hint("broadcast"),
+          Inner,
+          Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
 
-    assertEqualPlans(originalPlan, bestPlan)
+    assertEqualPlans(originalPlan, originalPlan)
 
     val originalPlan2 =
-      t1.join(t2).hint("broadcast").join(t3).hint("broadcast").join(t4.hint("broadcast"))
-        .where((nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")) &&
-          (nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
-          (nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
-
-    val bestPlan2 =
       t1.join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
         .hint("broadcast")
-        .join(
-          t4.hint("broadcast")
-            .join(t3, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))),
-          Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
-        .select(outputsOf(t1, t2, t3, t4): _*)
+        .join(t4, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+        .hint("broadcast")
+        .join(t3, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
 
-    assertEqualPlans(originalPlan2, bestPlan2)
+    assertEqualPlans(originalPlan2, originalPlan2)
+  }
 
-    val originalPlan3 =
-      t1.join(t4).hint("broadcast")
-        .join(t2.hint("broadcast")).hint("broadcast")
-        .join(t3.hint("broadcast"))
-        .where((nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")) &&
-        (nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
-        (nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+  test("reorder below and above the hint node") {
+    val originalPlan =
+      t1.join(t2).join(t3)
+        .where((nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
+          (nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
+        .hint("broadcast").join(t4)
 
-    val bestPlan3 =
-      t1.join(t2.hint("broadcast"), Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
-        .join(
-          t4.join(t3.hint("broadcast"),
-            Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))),
-          Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
-        .select(outputsOf(t1, t4, t2, t3): _*)
+    val bestPlan =
+    t1.join(t3, Inner, Some(nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
+      .join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+      .select(outputsOf(t1, t2, t3): _*)
+      .hint("broadcast").join(t4)
 
-    assertEqualPlans(originalPlan3, bestPlan3)
+    assertEqualPlans(originalPlan, bestPlan)
 
-    val originalPlan4 =
-      t2.hint("broadcast")
-        .join(t4).hint("broadcast")
-        .join(t3.hint("broadcast")).hint("broadcast")
-        .join(t1)
-        .where((nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")) &&
-          (nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
-          (nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+    val originalPlan2 =
+      t1.join(t2).join(t3)
+        .where((nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")) &&
+          (nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
+        .join(t4.hint("broadcast"))
 
-    val bestPlan4 =
-      t1.join(t2.hint("broadcast"), Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
-        .join(
-          t4.join(t3.hint("broadcast"),
-            Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))),
-          Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
-        .select(outputsOf(t2, t4, t3, t1): _*)
+    val bestPlan2 =
+      t1.join(t3, Inner, Some(nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
+        .join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .select(outputsOf(t1, t2, t3): _*)
+        .join(t4.hint("broadcast"))
 
-    assertEqualPlans(originalPlan4, bestPlan4)
+    assertEqualPlans(originalPlan2, bestPlan2)
   }
 
   private def assertEqualPlans(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
index 55f210cb04db..30a3d54fd833 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
 class JoinHintSuite extends PlanTest with SharedSQLContext {
@@ -100,7 +101,7 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
     }
   }
 
-  test("hint preserved after join reorder") {
+  test("hints prevent join reorder") {
     withTempView("a", "b", "c") {
       df1.createOrReplaceTempView("a")
       df2.createOrReplaceTempView("b")
@@ -118,12 +119,10 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
       verifyJoinHint(
         sql("select /*+ broadcast(a, c)*/ * from a, c, b " +
           "where a.a1 = b.b1 and b.b1 = c.c1"),
-        JoinHint(
-          None,
-          Some(HintInfo(broadcast = true))) ::
+        JoinHint.NONE ::
           JoinHint(
             Some(HintInfo(broadcast = true)),
-            None):: Nil
+            Some(HintInfo(broadcast = true))):: Nil
       )
       verifyJoinHint(
         sql("select /*+ broadcast(b, c)*/ * from a, c, b " +
@@ -199,4 +198,21 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
         None) :: Nil
     )
   }
+
+  test("hints prevent cost-based join reorder") {
+    withSQLConf(SQLConf.CBO_ENABLED.key -> "true", SQLConf.JOIN_REORDER_ENABLED.key -> "true") {
+      val join = df.join(df, "id")
+      val broadcasted = join.hint("broadcast")
+      verifyJoinHint(
+        join.join(broadcasted, "id").join(broadcasted, "id"),
+        JoinHint(
+          None,
+          Some(HintInfo(broadcast = true))) ::
+          JoinHint(
+            None,
+            Some(HintInfo(broadcast = true))) ::
+          JoinHint.NONE :: JoinHint.NONE :: JoinHint.NONE :: Nil
+      )
+    }
+  }
 }

From 3f8007102af5c6357096fa443e1dcdaa494265d0 Mon Sep 17 00:00:00 2001
From: John Zhuge <jzhuge@apache.org>
Date: Sun, 13 Jan 2019 15:36:40 -0800
Subject: [PATCH 0287/1072] [SPARK-26576][SQL] Broadcast hint not applied to
 partitioned table

## What changes were proposed in this pull request?

Make sure broadcast hint is applied to partitioned tables.

## How was this patch tested?

- A new unit test in PruneFileSourcePartitionsSuite
- Unit test suites touched by SPARK-14581: JoinOptimizationSuite, FilterPushdownSuite, ColumnPruningSuite, and PruneFiltersSuite

Closes #23507 from jzhuge/SPARK-26576.

Closes #23530 from jzhuge/SPARK-26576-master.

Authored-by: John Zhuge <jzhuge@apache.org>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../PruneFileSourcePartitionsSuite.scala         | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
index 94384185d190..6b2d0c656b37 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive.execution
 
+import org.scalatest.Matchers._
+
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -25,7 +27,10 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
+import org.apache.spark.sql.functions.broadcast
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
@@ -91,4 +96,15 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
       assert(size2 < tableStats.get.sizeInBytes)
     }
   }
+
+  test("SPARK-26576 Broadcast hint not applied to partitioned table") {
+    withTable("tbl") {
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+        spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
+        val df = spark.table("tbl")
+        val qe = df.join(broadcast(df), "p").queryExecution
+        qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1
+      }
+    }
+  }
 }

From 115fecfd840d58ce3211bf1dd7b130cb862730a5 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Mon, 14 Jan 2019 21:59:25 +0800
Subject: [PATCH 0288/1072] [SPARK-26456][SQL] Cast date/timestamp to string by
 Date/TimestampFormatter

## What changes were proposed in this pull request?

In the PR, I propose to switch on `TimestampFormatter`/`DateFormatter` in casting dates/timestamps to strings. The changes should make the date/timestamp casting consistent to JSON/CSV datasources and time-related functions like `to_date`, `to_unix_timestamp`/`from_unixtime`.

Local formatters are moved out from `DateTimeUtils` to where they are actually used. It allows to avoid re-creation of new formatter instance per-each call. Another reason is to have separate parser for `PartitioningUtils` because default parsing pattern cannot be used (expected optional section `[.S]`).

## How was this patch tested?

It was tested by `DateTimeUtilsSuite`, `CastSuite` and `JDBC*Suite`.

Closes #23391 from MaxGekk/thread-local-date-format.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala | 19 +++++---
 .../expressions/datetimeExpressions.scala     | 10 ++---
 .../sql/catalyst/util/DateFormatter.scala     |  7 +++
 .../sql/catalyst/util/DateTimeUtils.scala     | 45 +------------------
 .../catalyst/util/TimestampFormatter.scala    | 13 +++++-
 .../catalyst/util/DateTimeUtilsSuite.scala    |  3 +-
 .../spark/sql/util/DateFormatterSuite.scala   | 10 ++---
 .../sql/util/TimestampFormatterSuite.scala    | 13 +++---
 .../spark/sql/execution/HiveResult.scala      | 12 ++---
 .../datasources/PartitioningUtils.scala       | 43 +++++++++++++-----
 .../datasources/jdbc/JDBCRelation.scala       |  9 ++--
 .../spark/sql/execution/HiveResultSuite.scala | 21 ++++++++-
 .../ParquetPartitionDiscoverySuite.scala      | 18 +++++---
 13 files changed, 125 insertions(+), 98 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index ee463bf5eb6a..ff6a68b29020 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -230,12 +230,15 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // [[func]] assumes the input is no longer null because eval already does the null check.
   @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
 
+  private lazy val dateFormatter = DateFormatter()
+  private lazy val timestampFormatter = TimestampFormatter(timeZone)
+
   // UDFToString
   private[this] def castToString(from: DataType): Any => Any = from match {
     case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes)
-    case DateType => buildCast[Int](_, d => UTF8String.fromString(DateTimeUtils.dateToString(d)))
+    case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d)))
     case TimestampType => buildCast[Long](_,
-      t => UTF8String.fromString(DateTimeUtils.timestampToString(t, timeZone)))
+      t => UTF8String.fromString(DateTimeUtils.timestampToString(timestampFormatter, t)))
     case ArrayType(et, _) =>
       buildCast[ArrayData](_, array => {
         val builder = new UTF8StringBuilder
@@ -843,12 +846,16 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       case BinaryType =>
         (c, evPrim, evNull) => code"$evPrim = UTF8String.fromBytes($c);"
       case DateType =>
-        (c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString(
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.dateToString($c));"""
+        val df = JavaCode.global(
+          ctx.addReferenceObj("dateFormatter", dateFormatter),
+          dateFormatter.getClass)
+        (c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString(${df}.format($c));"""
       case TimestampType =>
-        val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
+        val tf = JavaCode.global(
+          ctx.addReferenceObj("timestampFormatter", timestampFormatter),
+          timestampFormatter.getClass)
         (c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString(
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString($c, $tz));"""
+          org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString($tf, $c));"""
       case ArrayType(et, _) =>
         (c, evPrim, evNull) => {
           val buffer = ctx.freshVariable("buffer", classOf[UTF8StringBuilder])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index b1832da93af3..e7583628a934 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -562,7 +562,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
     copy(timeZoneId = Option(timeZoneId))
 
   override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
-    val df = TimestampFormatter(format.toString, timeZone, Locale.US)
+    val df = TimestampFormatter(format.toString, timeZone)
     UTF8String.fromString(df.format(timestamp.asInstanceOf[Long]))
   }
 
@@ -665,7 +665,7 @@ abstract class UnixTime
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
   private lazy val formatter: TimestampFormatter =
     try {
-      TimestampFormatter(constFormat.toString, timeZone, Locale.US)
+      TimestampFormatter(constFormat.toString, timeZone)
     } catch {
       case NonFatal(_) => null
     }
@@ -698,7 +698,7 @@ abstract class UnixTime
           } else {
             val formatString = f.asInstanceOf[UTF8String].toString
             try {
-              TimestampFormatter(formatString, timeZone, Locale.US).parse(
+              TimestampFormatter(formatString, timeZone).parse(
                 t.asInstanceOf[UTF8String].toString) / MICROS_PER_SECOND
             } catch {
               case NonFatal(_) => null
@@ -819,7 +819,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
   private lazy val formatter: TimestampFormatter =
     try {
-      TimestampFormatter(constFormat.toString, timeZone, Locale.US)
+      TimestampFormatter(constFormat.toString, timeZone)
     } catch {
       case NonFatal(_) => null
     }
@@ -845,7 +845,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           null
         } else {
           try {
-            UTF8String.fromString(TimestampFormatter(f.toString, timeZone, Locale.US)
+            UTF8String.fromString(TimestampFormatter(f.toString, timeZone)
               .format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
           } catch {
             case NonFatal(_) => null
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index c47b08729c4e..adc69ab1c652 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -51,7 +51,14 @@ class Iso8601DateFormatter(
 }
 
 object DateFormatter {
+  val defaultPattern: String = "yyyy-MM-dd"
+  val defaultLocale: Locale = Locale.US
+
   def apply(format: String, locale: Locale): DateFormatter = {
     new Iso8601DateFormatter(format, locale)
   }
+
+  def apply(format: String): DateFormatter = apply(format, defaultLocale)
+
+  def apply(): DateFormatter = apply(defaultPattern)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index e95117f95cdb..da8899a02f31 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -92,32 +92,6 @@ object DateTimeUtils {
     }
   }
 
-  // `SimpleDateFormat` is not thread-safe.
-  private val threadLocalTimestampFormat = new ThreadLocal[DateFormat] {
-    override def initialValue(): SimpleDateFormat = {
-      new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
-    }
-  }
-
-  def getThreadLocalTimestampFormat(timeZone: TimeZone): DateFormat = {
-    val sdf = threadLocalTimestampFormat.get()
-    sdf.setTimeZone(timeZone)
-    sdf
-  }
-
-  // `SimpleDateFormat` is not thread-safe.
-  private val threadLocalDateFormat = new ThreadLocal[DateFormat] {
-    override def initialValue(): SimpleDateFormat = {
-      new SimpleDateFormat("yyyy-MM-dd", Locale.US)
-    }
-  }
-
-  def getThreadLocalDateFormat(timeZone: TimeZone): DateFormat = {
-    val sdf = threadLocalDateFormat.get()
-    sdf.setTimeZone(timeZone)
-    sdf
-  }
-
   private val computedTimeZones = new ConcurrentHashMap[String, TimeZone]
   private val computeTimeZone = new JFunction[String, TimeZone] {
     override def apply(timeZoneId: String): TimeZone = TimeZone.getTimeZone(timeZoneId)
@@ -149,24 +123,11 @@ object DateTimeUtils {
     millisLocal - getOffsetFromLocalMillis(millisLocal, timeZone)
   }
 
-  def dateToString(days: SQLDate): String =
-    getThreadLocalDateFormat(defaultTimeZone()).format(toJavaDate(days))
-
-  def dateToString(days: SQLDate, timeZone: TimeZone): String = {
-    getThreadLocalDateFormat(timeZone).format(toJavaDate(days))
-  }
-
-  // Converts Timestamp to string according to Hive TimestampWritable convention.
-  def timestampToString(us: SQLTimestamp): String = {
-    timestampToString(us, defaultTimeZone())
-  }
-
   // Converts Timestamp to string according to Hive TimestampWritable convention.
-  def timestampToString(us: SQLTimestamp, timeZone: TimeZone): String = {
+  def timestampToString(tf: TimestampFormatter, us: SQLTimestamp): String = {
     val ts = toJavaTimestamp(us)
     val timestampString = ts.toString
-    val timestampFormat = getThreadLocalTimestampFormat(timeZone)
-    val formatted = timestampFormat.format(ts)
+    val formatted = tf.format(us)
 
     if (timestampString.length > 19 && timestampString.substring(19) != ".0") {
       formatted + timestampString.substring(19)
@@ -1168,7 +1129,5 @@ object DateTimeUtils {
    */
   private[util] def resetThreadLocals(): Unit = {
     threadLocalGmtCalendar.remove()
-    threadLocalTimestampFormat.remove()
-    threadLocalDateFormat.remove()
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 10c73b2f9955..1374a825ec6d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -36,7 +36,7 @@ sealed trait TimestampFormatter extends Serializable {
   @throws(classOf[ParseException])
   @throws(classOf[DateTimeParseException])
   @throws(classOf[DateTimeException])
-  def parse(s: String): Long // returns microseconds since epoch
+  def parse(s: String): Long
   def format(us: Long): String
 }
 
@@ -74,7 +74,18 @@ class Iso8601TimestampFormatter(
 }
 
 object TimestampFormatter {
+  val defaultPattern: String = "yyyy-MM-dd HH:mm:ss"
+  val defaultLocale: Locale = Locale.US
+
   def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = {
     new Iso8601TimestampFormatter(format, timeZone, locale)
   }
+
+  def apply(format: String, timeZone: TimeZone): TimestampFormatter = {
+    apply(format, timeZone, defaultLocale)
+  }
+
+  def apply(timeZone: TimeZone): TimestampFormatter = {
+    apply(defaultPattern, timeZone, defaultLocale)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 2cb6110e2c09..e732eb0ef981 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -35,10 +35,11 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("nanoseconds truncation") {
+    val tf = TimestampFormatter(DateTimeUtils.defaultTimeZone())
     def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
       val parsedTimestampOp = DateTimeUtils.stringToTimestamp(UTF8String.fromString(originalTime))
       assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly")
-      assert(DateTimeUtils.timestampToString(parsedTimestampOp.get) === expectedParsedTime)
+      assert(DateTimeUtils.timestampToString(tf, parsedTimestampOp.get) === expectedParsedTime)
     }
 
     checkStringToTimestamp("2015-01-02 00:00:00.123456789", "2015-01-02 00:00:00.123456")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
index 2dc55e0e1f63..4d5872c92f5a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -29,7 +29,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   test("parsing dates") {
     DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-        val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+        val formatter = DateFormatter()
         val daysSinceEpoch = formatter.parse("2018-12-02")
         assert(daysSinceEpoch === 17867)
       }
@@ -39,7 +39,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   test("format dates") {
     DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-        val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+        val formatter = DateFormatter()
         val date = formatter.format(17867)
         assert(date === "2018-12-02")
       }
@@ -59,7 +59,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
       "5010-11-17").foreach { date =>
       DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
         withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+          val formatter = DateFormatter()
           val days = formatter.parse(date)
           val formatted = formatter.format(days)
           assert(date === formatted)
@@ -82,7 +82,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
       1110657).foreach { days =>
       DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
         withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter("yyyy-MM-dd", Locale.US)
+          val formatter = DateFormatter()
           val date = formatter.format(days)
           val parsed = formatter.parse(date)
           assert(days === parsed)
@@ -92,7 +92,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("parsing date without explicit day") {
-    val formatter = DateFormatter("yyyy MMM", Locale.US)
+    val formatter = DateFormatter("yyyy MMM")
     val daysSinceEpoch = formatter.parse("2018 Dec")
     assert(daysSinceEpoch === LocalDate.of(2018, 12, 1).toEpochDay)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index 2ce3eacc30cc..d007adf3aab8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -41,8 +41,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
     DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
       val formatter = TimestampFormatter(
         "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
-        TimeZone.getTimeZone(timeZone),
-        Locale.US)
+        TimeZone.getTimeZone(timeZone))
       val microsSinceEpoch = formatter.parse(localDate)
       assert(microsSinceEpoch === expectedMicros(timeZone))
     }
@@ -62,8 +61,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
     DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
       val formatter = TimestampFormatter(
         "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
-        TimeZone.getTimeZone(timeZone),
-        Locale.US)
+        TimeZone.getTimeZone(timeZone))
       val timestamp = formatter.format(microsSinceEpoch)
       assert(timestamp === expectedTimestamp(timeZone))
     }
@@ -82,7 +80,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
         2177456523456789L,
         11858049903010203L).foreach { micros =>
         DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
-          val formatter = TimestampFormatter(pattern, timeZone, Locale.US)
+          val formatter = TimestampFormatter(pattern, timeZone)
           val timestamp = formatter.format(micros)
           val parsed = formatter.parse(timestamp)
           assert(micros === parsed)
@@ -103,7 +101,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
       "2039-01-01T01:02:03.456789",
       "2345-10-07T22:45:03.010203").foreach { timestamp =>
       DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
-        val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", timeZone, Locale.US)
+        val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", timeZone)
         val micros = formatter.parse(timestamp)
         val formatted = formatter.format(micros)
         assert(timestamp === formatted)
@@ -114,8 +112,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
   test(" case insensitive parsing of am and pm") {
     val formatter = TimestampFormatter(
       "yyyy MMM dd hh:mm:ss a",
-      TimeZone.getTimeZone("UTC"),
-      Locale.US)
+      TimeZone.getTimeZone("UTC"))
     val micros = formatter.parse("2009 Mar 20 11:30:01 am")
     assert(micros === TimeUnit.SECONDS.toMicros(
       LocalDateTime.of(2009, 3, 20, 11, 30, 1).toEpochSecond(ZoneOffset.UTC)))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index c90b254a6d12..d3934a0e52de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -21,7 +21,7 @@ import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExecutedCommandExec, ShowTablesCommand}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -77,6 +77,10 @@ object HiveResult {
     TimestampType,
     BinaryType)
 
+  private lazy val dateFormatter = DateFormatter()
+  private lazy val timestampFormatter = TimestampFormatter(
+    DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone))
+
   /** Hive outputs fields of structs slightly differently than top level attributes. */
   private def toHiveStructString(a: (Any, DataType)): String = a match {
     case (struct: Row, StructType(fields)) =>
@@ -111,11 +115,9 @@ object HiveResult {
           toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
       }.toSeq.sorted.mkString("{", ",", "}")
     case (null, _) => "NULL"
-    case (d: Date, DateType) =>
-      DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
+    case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d))
     case (t: Timestamp, TimestampType) =>
-      val timeZone = DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)
-      DateTimeUtils.timestampToString(DateTimeUtils.fromJavaTimestamp(t), timeZone)
+      DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t))
     case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
     case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
     case (interval, CalendarIntervalType) => interval.toString
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 6458b65466fb..ee770426e61f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -26,13 +26,12 @@ import scala.util.Try
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -59,6 +58,8 @@ object PartitionSpec {
 
 object PartitioningUtils {
 
+  val timestampPartitionPattern = "yyyy-MM-dd HH:mm:ss[.S]"
+
   private[datasources] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal])
   {
     require(columnNames.size == literals.size)
@@ -122,10 +123,12 @@ object PartitioningUtils {
       Map.empty[String, DataType]
     }
 
+    val dateFormatter = DateFormatter()
+    val timestampFormatter = TimestampFormatter(timestampPartitionPattern, timeZone)
     // First, we need to parse every partition's path and see if we can find partition values.
     val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
       parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes,
-        validatePartitionColumns, timeZone)
+        validatePartitionColumns, timeZone, dateFormatter, timestampFormatter)
     }.unzip
 
     // We create pairs of (path -> path's partition value) here
@@ -208,7 +211,9 @@ object PartitioningUtils {
       basePaths: Set[Path],
       userSpecifiedDataTypes: Map[String, DataType],
       validatePartitionColumns: Boolean,
-      timeZone: TimeZone): (Option[PartitionValues], Option[Path]) = {
+      timeZone: TimeZone,
+      dateFormatter: DateFormatter,
+      timestampFormatter: TimestampFormatter): (Option[PartitionValues], Option[Path]) = {
     val columns = ArrayBuffer.empty[(String, Literal)]
     // Old Hadoop versions don't have `Path.isRoot`
     var finished = path.getParent == null
@@ -230,7 +235,7 @@ object PartitioningUtils {
         // Once we get the string, we try to parse it and find the partition column and value.
         val maybeColumn =
           parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes,
-            validatePartitionColumns, timeZone)
+            validatePartitionColumns, timeZone, dateFormatter, timestampFormatter)
         maybeColumn.foreach(columns += _)
 
         // Now, we determine if we should stop.
@@ -265,7 +270,9 @@ object PartitioningUtils {
       typeInference: Boolean,
       userSpecifiedDataTypes: Map[String, DataType],
       validatePartitionColumns: Boolean,
-      timeZone: TimeZone): Option[(String, Literal)] = {
+      timeZone: TimeZone,
+      dateFormatter: DateFormatter,
+      timestampFormatter: TimestampFormatter): Option[(String, Literal)] = {
     val equalSignIndex = columnSpec.indexOf('=')
     if (equalSignIndex == -1) {
       None
@@ -280,7 +287,12 @@ object PartitioningUtils {
         // SPARK-26188: if user provides corresponding column schema, get the column value without
         //              inference, and then cast it as user specified data type.
         val dataType = userSpecifiedDataTypes(columnName)
-        val columnValueLiteral = inferPartitionColumnValue(rawColumnValue, false, timeZone)
+        val columnValueLiteral = inferPartitionColumnValue(
+          rawColumnValue,
+          false,
+          timeZone,
+          dateFormatter,
+          timestampFormatter)
         val columnValue = columnValueLiteral.eval()
         val castedValue = Cast(columnValueLiteral, dataType, Option(timeZone.getID)).eval()
         if (validatePartitionColumns && columnValue != null && castedValue == null) {
@@ -289,7 +301,12 @@ object PartitioningUtils {
         }
         Literal.create(castedValue, dataType)
       } else {
-        inferPartitionColumnValue(rawColumnValue, typeInference, timeZone)
+        inferPartitionColumnValue(
+          rawColumnValue,
+          typeInference,
+          timeZone,
+          dateFormatter,
+          timestampFormatter)
       }
       Some(columnName -> literal)
     }
@@ -442,7 +459,9 @@ object PartitioningUtils {
   private[datasources] def inferPartitionColumnValue(
       raw: String,
       typeInference: Boolean,
-      timeZone: TimeZone): Literal = {
+      timeZone: TimeZone,
+      dateFormatter: DateFormatter,
+      timestampFormatter: TimestampFormatter): Literal = {
     val decimalTry = Try {
       // `BigDecimal` conversion can fail when the `field` is not a form of number.
       val bigDecimal = new JBigDecimal(raw)
@@ -457,7 +476,7 @@ object PartitioningUtils {
     val dateTry = Try {
       // try and parse the date, if no exception occurs this is a candidate to be resolved as
       // DateType
-      DateTimeUtils.getThreadLocalDateFormat(DateTimeUtils.defaultTimeZone()).parse(raw)
+      dateFormatter.parse(raw)
       // SPARK-23436: Casting the string to date may still return null if a bad Date is provided.
       // This can happen since DateFormat.parse  may not use the entire text of the given string:
       // so if there are extra-characters after the date, it returns correctly.
@@ -474,7 +493,7 @@ object PartitioningUtils {
       val unescapedRaw = unescapePathName(raw)
       // try and parse the date, if no exception occurs this is a candidate to be resolved as
       // TimestampType
-      DateTimeUtils.getThreadLocalTimestampFormat(timeZone).parse(unescapedRaw)
+      timestampFormatter.parse(unescapedRaw)
       // SPARK-23436: see comment for date
       val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval()
       // Disallow TimestampType if the cast returned null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 51c385e25bee..13ed105004d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -26,7 +26,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.jdbc.JdbcDialects
 import org.apache.spark.sql.sources._
@@ -185,10 +185,11 @@ private[sql] object JDBCRelation extends Logging {
       columnType: DataType,
       timeZoneId: String): String = {
     def dateTimeToString(): String = {
-      val timeZone = DateTimeUtils.getTimeZone(timeZoneId)
       val dateTimeStr = columnType match {
-        case DateType => DateTimeUtils.dateToString(value.toInt, timeZone)
-        case TimestampType => DateTimeUtils.timestampToString(value, timeZone)
+        case DateType => DateFormatter().format(value.toInt)
+        case TimestampType =>
+          val timestampFormatter = TimestampFormatter(DateTimeUtils.getTimeZone(timeZoneId))
+          DateTimeUtils.timestampToString(timestampFormatter, value)
       }
       s"'$dateTimeStr'"
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
index 4205b3f79a97..bbce4705871d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
@@ -17,10 +17,27 @@
 
 package org.apache.spark.sql.execution
 
+import java.sql.{Date, Timestamp}
+
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT}
+import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSQLContext}
+
+class HiveResultSuite extends SparkFunSuite with SharedSQLContext {
+  import testImplicits._
 
-class HiveResultSuite extends SparkFunSuite {
+  test("date formatting in hive result") {
+    val date = "2018-12-28"
+    val executedPlan = Seq(Date.valueOf(date)).toDS().queryExecution.executedPlan
+    val result = HiveResult.hiveResultString(executedPlan)
+    assert(result.head == date)
+  }
+
+  test("timestamp formatting in hive result") {
+    val timestamp = "2018-12-28 01:02:03"
+    val executedPlan = Seq(Timestamp.valueOf(timestamp)).toDS().queryExecution.executedPlan
+    val result = HiveResult.hiveResultString(executedPlan)
+    assert(result.head == timestamp)
+  }
 
   test("toHiveString correctly handles UDTs") {
     val point = new ExamplePoint(50.0, 50.0)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 88067358667c..864c1e99fbfb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition}
 import org.apache.spark.sql.execution.streaming.MemoryStream
@@ -56,6 +56,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
   val timeZone = TimeZone.getDefault()
   val timeZoneId = timeZone.getID
+  val df = DateFormatter()
+  val tf = TimestampFormatter(timestampPartitionPattern, timeZone)
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()
@@ -69,7 +71,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
   test("column type inference") {
     def check(raw: String, literal: Literal, timeZone: TimeZone = timeZone): Unit = {
-      assert(inferPartitionColumnValue(raw, true, timeZone) === literal)
+      assert(inferPartitionColumnValue(raw, true, timeZone, df, tf) === literal)
     }
 
     check("10", Literal.create(10, IntegerType))
@@ -197,13 +199,13 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
   test("parse partition") {
     def check(path: String, expected: Option[PartitionValues]): Unit = {
       val actual = parsePartition(new Path(path), true, Set.empty[Path],
-        Map.empty, true, timeZone)._1
+        Map.empty, true, timeZone, df, tf)._1
       assert(expected === actual)
     }
 
     def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = {
       val message = intercept[T] {
-        parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZone)
+        parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZone, df, tf)
       }.getMessage
 
       assert(message.contains(expected))
@@ -249,7 +251,9 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       basePaths = Set(new Path("file://path/a=10")),
       Map.empty,
       true,
-      timeZone = timeZone)._1
+      timeZone = timeZone,
+      df,
+      tf)._1
 
     assert(partitionSpec1.isEmpty)
 
@@ -260,7 +264,9 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       basePaths = Set(new Path("file://path")),
       Map.empty,
       true,
-      timeZone = timeZone)._1
+      timeZone = timeZone,
+      df,
+      tf)._1
 
     assert(partitionSpec2 ==
       Option(PartitionValues(

From bafc7ac0259d5ea92b33a4005a28701c0c960a98 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Mon, 14 Jan 2019 13:37:24 -0800
Subject: [PATCH 0289/1072] [SPARK-26350][SS] Allow to override group id of the
 Kafka consumer

## What changes were proposed in this pull request?

This PR allows the user to override `kafka.group.id` for better monitoring or security. The user needs to make sure there are not multiple queries or sources using the same group id.

It also fixes a bug that the `groupIdPrefix` option cannot be retrieved.

## How was this patch tested?

The new added unit tests.

Closes #23301 from zsxwing/SPARK-26350.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../structured-streaming-kafka-integration.md | 25 +++++++++++++++--
 .../kafka010/KafkaContinuousReadSupport.scala | 10 +++++--
 .../kafka010/KafkaMicroBatchReadSupport.scala |  9 +++++-
 .../sql/kafka010/KafkaOffsetReader.scala      |  6 ++--
 .../spark/sql/kafka010/KafkaSource.scala      |  8 +++++-
 .../sql/kafka010/KafkaSourceProvider.scala    | 24 ++++++++++++----
 .../kafka010/KafkaMicroBatchSourceSuite.scala | 28 ++++++++++++++++++-
 .../sql/kafka010/KafkaRelationSuite.scala     | 12 ++++++++
 8 files changed, 106 insertions(+), 16 deletions(-)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 3d64ec4cb55f..c19aa5c504b0 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -379,7 +379,25 @@ The following configurations are optional:
   <td>string</td>
   <td>spark-kafka-source</td>
   <td>streaming and batch</td>
-  <td>Prefix of consumer group identifiers (`group.id`) that are generated by structured streaming queries</td>
+  <td>Prefix of consumer group identifiers (`group.id`) that are generated by structured streaming
+  queries. If "kafka.group.id" is set, this option will be ignored. </td>
+</tr>
+<tr>
+  <td>kafka.group.id</td>
+  <td>string</td>
+  <td>none</td>
+  <td>streaming and batch</td>
+  <td>The Kafka group id to use in Kafka consumer while reading from Kafka. Use this with caution.
+  By default, each query generates a unique group id for reading data. This ensures that each Kafka
+  source has its own consumer group that does not face interference from any other consumer, and
+  therefore can read all of the partitions of its subscribed topics. In some scenarios (for example,
+  Kafka group-based authorization), you may want to use a specific authorized group id to read data.
+  You can optionally set the group id. However, do this with extreme caution as it can cause
+  unexpected behavior. Concurrently running queries (both, batch and streaming) or sources with the
+  same group id are likely interfere with each other causing each query to read only part of the
+  data. This may also occur when queries are started/restarted in quick succession. To minimize such
+  issues, set the Kafka consumer session timeout (by setting option "kafka.session.timeout.ms") to
+  be very small. When this is set, option "groupIdPrefix" will be ignored. </td>
 </tr>
 </table>
 
@@ -592,8 +610,9 @@ for parameters related to writing data.
 Note that the following Kafka params cannot be set and the Kafka source or sink will throw an exception:
 
 - **group.id**: Kafka source will create a unique group id for each query automatically. The user can
-set the prefix of the automatically generated group.id's via the optional source option `groupIdPrefix`, default value
-is "spark-kafka-source".
+set the prefix of the automatically generated group.id's via the optional source option `groupIdPrefix`,
+default value is "spark-kafka-source". You can also set "kafka.group.id" to force Spark to use a special
+group id, however, please read warnings for this option and use it with caution.
 - **auto.offset.reset**: Set the source option `startingOffsets` to specify
  where to start instead. Structured Streaming manages which offsets are consumed internally, rather
  than rely on the kafka Consumer to do it. This will ensure that no data is missed when new
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala
index 02dfb9ca2b95..f328567c95d2 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.kafka010
 import java.{util => ju}
 import java.util.concurrent.TimeoutException
 
-import org.apache.kafka.clients.consumer.{ConsumerRecord, OffsetOutOfRangeException}
+import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, OffsetOutOfRangeException}
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.TaskContext
@@ -167,7 +167,13 @@ class KafkaContinuousScanConfigBuilder(
 
     val deletedPartitions = oldStartPartitionOffsets.keySet.diff(currentPartitionSet)
     if (deletedPartitions.nonEmpty) {
-      reportDataLoss(s"Some partitions were deleted: $deletedPartitions")
+      val message = if (
+          offsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+      } else {
+        s"$deletedPartitions are gone. Some data may have been missed."
+      }
+      reportDataLoss(message)
     }
 
     val startOffsets = newPartitionOffsets ++
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala
index b4f042e93a5d..1c1d26a901b4 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala
@@ -22,6 +22,7 @@ import java.io._
 import java.nio.charset.StandardCharsets
 
 import org.apache.commons.io.IOUtils
+import org.apache.kafka.clients.consumer.ConsumerConfig
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
@@ -122,7 +123,13 @@ private[kafka010] class KafkaMicroBatchReadSupport(
     // Find deleted partitions, and report data loss if required
     val deletedPartitions = startPartitionOffsets.keySet.diff(endPartitionOffsets.keySet)
     if (deletedPartitions.nonEmpty) {
-      reportDataLoss(s"$deletedPartitions are gone. Some data may have been missed")
+      val message =
+        if (kafkaOffsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+          s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+        } else {
+          s"$deletedPartitions are gone. Some data may have been missed."
+        }
+      reportDataLoss(message)
     }
 
     // Use the end partitions to calculate offset ranges to ignore partitions that have
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index fc443d22bf5a..14bc6bae6d67 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -46,7 +46,7 @@ import org.apache.spark.util.{ThreadUtils, UninterruptibleThread}
  */
 private[kafka010] class KafkaOffsetReader(
     consumerStrategy: ConsumerStrategy,
-    driverKafkaParams: ju.Map[String, Object],
+    val driverKafkaParams: ju.Map[String, Object],
     readerOptions: Map[String, String],
     driverGroupIdPrefix: String) extends Logging {
   /**
@@ -82,7 +82,9 @@ private[kafka010] class KafkaOffsetReader(
     assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
     if (_consumer == null) {
       val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams)
-      newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId())
+      if (driverKafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG) == null) {
+        newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId())
+      }
       _consumer = consumerStrategy.createConsumer(newKafkaParams)
     }
     _consumer
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index d65b3cea632c..da5533409935 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -22,6 +22,7 @@ import java.io._
 import java.nio.charset.StandardCharsets
 
 import org.apache.commons.io.IOUtils
+import org.apache.kafka.clients.consumer.ConsumerConfig
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkContext
@@ -242,7 +243,12 @@ private[kafka010] class KafkaSource(
 
     val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet)
     if (deletedPartitions.nonEmpty) {
-      reportDataLoss(s"$deletedPartitions are gone. Some data may have been missed")
+      val message = if (kafkaReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+      } else {
+        s"$deletedPartitions are gone. Some data may have been missed."
+      }
+      reportDataLoss(message)
     }
 
     // Use the until partitions to calculate offset ranges to ignore partitions that have
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 5774ee7a1c94..b59f21ab130a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -335,9 +335,11 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     // Validate user-specified Kafka options
 
     if (caseInsensitiveParams.contains(s"kafka.${ConsumerConfig.GROUP_ID_CONFIG}")) {
-      throw new IllegalArgumentException(
-        s"Kafka option '${ConsumerConfig.GROUP_ID_CONFIG}' is not supported as " +
-          s"user-specified consumer groups are not used to track offsets.")
+      logWarning(CUSTOM_GROUP_ID_ERROR_MESSAGE)
+      if (caseInsensitiveParams.contains(GROUP_ID_PREFIX)) {
+        logWarning("Option 'groupIdPrefix' will be ignored as " +
+          s"option 'kafka.${ConsumerConfig.GROUP_ID_CONFIG}' has been set.")
+      }
     }
 
     if (caseInsensitiveParams.contains(s"kafka.${ConsumerConfig.AUTO_OFFSET_RESET_CONFIG}")) {
@@ -440,6 +442,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
   private[kafka010] val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
   private val FAIL_ON_DATA_LOSS_OPTION_KEY = "failondataloss"
   private val MIN_PARTITIONS_OPTION_KEY = "minpartitions"
+  private val GROUP_ID_PREFIX = "groupidprefix"
 
   val TOPIC_OPTION_KEY = "topic"
 
@@ -459,7 +462,16 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       | source option "failOnDataLoss" to "false".
     """.stripMargin
 
-
+  val CUSTOM_GROUP_ID_ERROR_MESSAGE =
+    s"""Kafka option 'kafka.${ConsumerConfig.GROUP_ID_CONFIG}' has been set on this query, it is
+       | not recommended to set this option. This option is unsafe to use since multiple concurrent
+       | queries or sources using the same group id will interfere with each other as they are part
+       | of the same consumer group. Restarted queries may also suffer interference from the
+       | previous run having the same group id. The user should have only one query per group id,
+       | and/or set the option 'kafka.session.timeout.ms' to be very small so that the Kafka
+       | consumers from the previous query are marked dead by the Kafka group coordinator before the
+       | restarted query starts running.
+    """.stripMargin
 
   private val serClassName = classOf[ByteArraySerializer].getName
   private val deserClassName = classOf[ByteArrayDeserializer].getName
@@ -510,7 +522,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none")
 
       // So that consumers in executors do not mess with any existing group id
-      .set(ConsumerConfig.GROUP_ID_CONFIG, s"$uniqueGroupId-executor")
+      .setIfUnset(ConsumerConfig.GROUP_ID_CONFIG, s"$uniqueGroupId-executor")
 
       // So that consumers in executors does not commit offsets unnecessarily
       .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
@@ -529,7 +541,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       parameters: Map[String, String],
       metadataPath: String): String = {
     val groupIdPrefix = parameters
-      .getOrElse("groupIdPrefix", "spark-kafka-source")
+      .getOrElse(GROUP_ID_PREFIX, "spark-kafka-source")
     s"${groupIdPrefix}-${UUID.randomUUID}-${metadataPath.hashCode}"
   }
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index d4eb52654005..64020882b306 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -629,6 +629,33 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     )
   }
 
+  test("allow group.id override") {
+    // Tests code path KafkaSourceProvider.{sourceSchema(.), createSource(.)}
+    // as well as KafkaOffsetReader.createConsumer(.)
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessages(topic, (1 to 10).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (11 to 20).map(_.toString).toArray, Some(1))
+    testUtils.sendMessages(topic, (21 to 30).map(_.toString).toArray, Some(2))
+
+    val dsKafka = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.group.id", "id-" + Random.nextInt())
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("subscribe", topic)
+      .option("startingOffsets", "earliest")
+      .load()
+      .selectExpr("CAST(value AS STRING)")
+      .as[String]
+      .map(_.toInt)
+
+    testStream(dsKafka)(
+      makeSureGetOffsetCalled,
+      CheckAnswer(1 to 30: _*)
+    )
+  }
+
   test("ensure stream-stream self-join generates only one offset in log and correct metrics") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 2)
@@ -1233,7 +1260,6 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("not supported"))
     }
 
-    testUnsupportedConfig("kafka.group.id")
     testUnsupportedConfig("kafka.auto.offset.reset")
     testUnsupportedConfig("kafka.enable.auto.commit")
     testUnsupportedConfig("kafka.interceptor.classes")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index 8cfca56433f5..efe7385ed16b 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -239,6 +239,18 @@ class KafkaRelationSuite extends QueryTest with SharedSQLContext with KafkaTest
     testBadOptions("subscribePattern" -> "")("pattern to subscribe is empty")
   }
 
+  test("allow group.id overriding") {
+    // Tests code path KafkaSourceProvider.createRelation(.)
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessages(topic, (1 to 10).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (11 to 20).map(_.toString).toArray, Some(1))
+    testUtils.sendMessages(topic, (21 to 30).map(_.toString).toArray, Some(2))
+
+    val df = createDF(topic, withOptions = Map("kafka.group.id" -> "custom"))
+    checkAnswer(df, (1 to 30).map(_.toString).toDF())
+  }
+
   test("read Kafka transactional messages: read_committed") {
     val topic = newTopic()
     testUtils.createTopic(topic)

From abc937b24756e5d7479bac7229b0b4c1dc82efeb Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 14 Jan 2019 19:17:39 -0600
Subject: [PATCH 0290/1072] [MINOR][BUILD] Remove binary license/notice files
 in a source release for branch-2.4+ only

## What changes were proposed in this pull request?
To skip some steps to remove binary license/notice files in a source release for branch2.3 (these files only exist in master/branch-2.4 now), this pr checked a Spark release version in `dev/create-release/release-build.sh`.

## How was this patch tested?
Manually checked.

Closes #23538 from maropu/FixReleaseScript.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/create-release/release-build.sh | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 2fdb5c8dd38a..3b5f9ef2afe8 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -176,10 +176,14 @@ if [[ "$1" == "package" ]]; then
   # Source and binary tarballs
   echo "Packaging release source tarballs"
   cp -r spark spark-$SPARK_VERSION
-  # For source release, exclude copy of binary license/notice
-  rm spark-$SPARK_VERSION/LICENSE-binary
-  rm spark-$SPARK_VERSION/NOTICE-binary
-  rm -r spark-$SPARK_VERSION/licenses-binary
+
+  # For source release in v2.4+, exclude copy of binary license/notice
+  if [[ $SPARK_VERSION > "2.4" ]]; then
+    rm spark-$SPARK_VERSION/LICENSE-binary
+    rm spark-$SPARK_VERSION/NOTICE-binary
+    rm -r spark-$SPARK_VERSION/licenses-binary
+  fi
+
   tar cvzf spark-$SPARK_VERSION.tgz spark-$SPARK_VERSION
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \
     --detach-sig spark-$SPARK_VERSION.tgz

From 33b5039cd3fb0dfa2c9812f5ed76d9f4f4ef381f Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 15 Jan 2019 13:02:55 +0800
Subject: [PATCH 0291/1072] [SPARK-25935][SQL] Allow null rows for bad records
 from JSON/CSV parsers

## What changes were proposed in this pull request?

This PR reverts  #22938 per discussion in #23325

Closes #23325

Closes #23543 from MaxGekk/return-nulls-from-json-parser.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/tests/fulltests/test_sparkSQL.R         |  2 +-
 docs/sql-migration-guide-upgrade.md           |  2 --
 .../expressions/jsonExpressions.scala         | 26 +++++++------------
 .../sql/catalyst/json/JacksonParser.scala     |  2 +-
 .../expressions/JsonExpressionsSuite.scala    |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 10 +++++++
 .../datasources/json/JsonSuite.scala          |  7 +----
 7 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index a1805f57b1dc..88f228621952 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1694,7 +1694,7 @@ test_that("column functions", {
 
   # check for unparseable
   df <- as.DataFrame(list(list("a" = "")))
-  expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]]$a, NA)
+  expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)
 
   # check if array type in string is correctly supported.
   jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index fce0b9a5f86a..5d3d4c6ece39 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -17,8 +17,6 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, the `from_json` functions supports two modes - `PERMISSIVE` and `FAILFAST`. The modes can be set via the `mode` option. The default mode became `PERMISSIVE`. In previous versions, behavior of `from_json` did not conform to either `PERMISSIVE` nor `FAILFAST`, especially in processing of malformed JSON records. For example, the JSON string `{"a" 1}` with the schema `a INT` is converted to `null` by previous versions but Spark 3.0 converts it to `Row(null)`.
 
-  - In Spark version 2.4 and earlier, the `from_json` function produces `null`s for JSON strings and JSON datasource skips the same independently of its mode if there is no valid root JSON token in its input (` ` for example). Since Spark 3.0, such input is treated as a bad record and handled according to specified mode. For example, in the `PERMISSIVE` mode the ` ` input is converted to `Row(null, null)` if specified schema is `key STRING, value INT`.
-
   - The `ADD JAR` command previously returned a result set with the single value 0. It now returns an empty result set.
 
   - In Spark version 2.4 and earlier, users can create map values with map type key via built-in function like `CreateMap`, `MapFromArrays`, etc. Since Spark 3.0, it's not allowed to create map values with map type key with these built-in functions. Users can still read map values with map type key from data source or Java/Scala collections, though they are not very useful.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index e0cab537ce1c..3403349c8974 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -548,23 +548,15 @@ case class JsonToStructs(
       s"Input schema ${nullableSchema.catalogString} must be a struct, an array or a map.")
   }
 
-  @transient
-  private lazy val castRow = nullableSchema match {
-    case _: StructType => (row: InternalRow) => row
-    case _: ArrayType => (row: InternalRow) => row.getArray(0)
-    case _: MapType => (row: InternalRow) => row.getMap(0)
-  }
-
   // This converts parsed rows to the desired output by the given schema.
-  private def convertRow(rows: Iterator[InternalRow]) = {
-    if (rows.hasNext) {
-      val result = rows.next()
-      // JSON's parser produces one record only.
-      assert(!rows.hasNext)
-      castRow(result)
-    } else {
-      throw new IllegalArgumentException("Expected one row from JSON parser.")
-    }
+  @transient
+  lazy val converter = nullableSchema match {
+    case _: StructType =>
+      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next() else null
+    case _: ArrayType =>
+      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getArray(0) else null
+    case _: MapType =>
+      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getMap(0) else null
   }
 
   val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
@@ -600,7 +592,7 @@ case class JsonToStructs(
     copy(timeZoneId = Option(timeZoneId))
 
   override def nullSafeEval(json: Any): Any = {
-    convertRow(parser.parse(json.asInstanceOf[UTF8String]))
+    converter(parser.parse(json.asInstanceOf[UTF8String]))
   }
 
   override def inputTypes: Seq[AbstractDataType] = StringType :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 3f245e1400fa..8cf758e26e29 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -399,7 +399,7 @@ class JacksonParser(
         // a null first token is equivalent to testing for input.trim.isEmpty
         // but it works on any token stream and not just strings
         parser.nextToken() match {
-          case null => throw new RuntimeException("Not found any JSON token")
+          case null => Nil
           case _ => rootConverter.apply(parser) match {
             case null => throw new RuntimeException("Root converter returned null")
             case rows => rows
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 238e6e34b4ae..b190d6f5caa1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -548,7 +548,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
     val schema = StructType(StructField("a", IntegerType) :: Nil)
     checkEvaluation(
       JsonToStructs(schema, Map.empty, Literal.create(" ", StringType), gmtId),
-      InternalRow(null)
+      null
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 656da9fa0180..806f0b2239fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -240,6 +240,16 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       Seq(Row("1"), Row("2")))
   }
 
+  test("SPARK-11226 Skip empty line in json file") {
+    spark.read
+      .json(Seq("{\"a\": \"1\"}}", "{\"a\": \"2\"}}", "{\"a\": \"3\"}}", "").toDS())
+      .createOrReplaceTempView("d")
+
+    checkAnswer(
+      sql("select count(1) from d"),
+      Seq(Row(3)))
+  }
+
   test("SPARK-8828 sum should return null if all input values are null") {
     checkAnswer(
       sql("select sum(a), avg(a) from allNulls"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 78debb573111..49dd9c22e831 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1126,7 +1126,6 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         Row(null, null, null),
         Row(null, null, null),
         Row(null, null, null),
-        Row(null, null, null),
         Row("str_a_4", "str_b_4", "str_c_4"),
         Row(null, null, null))
     )
@@ -1148,7 +1147,6 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       checkAnswer(
         jsonDF.select($"a", $"b", $"c", $"_unparsed"),
         Row(null, null, null, "{") ::
-          Row(null, null, null, "") ::
           Row(null, null, null, """{"a":1, b:2}""") ::
           Row(null, null, null, """{"a":{, b:3}""") ::
           Row("str_a_4", "str_b_4", "str_c_4", null) ::
@@ -1163,7 +1161,6 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       checkAnswer(
         jsonDF.filter($"_unparsed".isNotNull).select($"_unparsed"),
         Row("{") ::
-          Row("") ::
           Row("""{"a":1, b:2}""") ::
           Row("""{"a":{, b:3}""") ::
           Row("]") :: Nil
@@ -1185,7 +1182,6 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     checkAnswer(
       jsonDF.selectExpr("a", "b", "c", "_malformed"),
       Row(null, null, null, "{") ::
-        Row(null, null, null, "") ::
         Row(null, null, null, """{"a":1, b:2}""") ::
         Row(null, null, null, """{"a":{, b:3}""") ::
         Row("str_a_4", "str_b_4", "str_c_4", null) ::
@@ -1727,7 +1723,6 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       val path = dir.getCanonicalPath
       primitiveFieldAndType
         .toDF("value")
-        .repartition(1)
         .write
         .option("compression", "GzIp")
         .text(path)
@@ -2428,7 +2423,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     }
 
     checkCount(2)
-    countForMalformedJSON(1, Seq(""))
+    countForMalformedJSON(0, Seq(""))
   }
 
   test("SPARK-25040: empty strings should be disallowed") {

From a77505d4d3e5db4413d7fa76610265792d949c64 Mon Sep 17 00:00:00 2001
From: SongYadong <song.yadong1@zte.com.cn>
Date: Tue, 15 Jan 2019 14:40:00 +0800
Subject: [PATCH 0292/1072] [CORE][MINOR] Fix some typos about MemoryMode

## What changes were proposed in this pull request?

Fix typos in comments by replacing "in-heap" with "on-heap".

## How was this patch tested?

Existing Tests.

Closes #23533 from SongYadong/typos_inheap_to_onheap.

Authored-by: SongYadong <song.yadong1@zte.com.cn>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../main/java/org/apache/spark/unsafe/array/LongArray.java  | 2 +-
 .../java/org/apache/spark/unsafe/memory/MemoryLocation.java | 2 +-
 .../java/org/apache/spark/memory/TaskMemoryManager.java     | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
index 2cd39bd60c2a..305cc1c5d111 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
@@ -23,7 +23,7 @@
 /**
  * An array of long values. Compared with native JVM arrays, this:
  * <ul>
- *   <li>supports using both in-heap and off-heap memory</li>
+ *   <li>supports using both on-heap and off-heap memory</li>
  *   <li>has no bound checking, and thus can crash the JVM process when assert is turned off</li>
  * </ul>
  */
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
index 74ebc87dc978..897b8a2b7ec5 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
@@ -21,7 +21,7 @@
 
 /**
  * A memory location. Tracked either by a memory address (with off-heap allocation),
- * or by an offset from a JVM object (in-heap allocation).
+ * or by an offset from a JVM object (on-heap allocation).
  */
 public class MemoryLocation {
 
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index 28b646ba3c95..1d9391845be5 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -85,9 +85,9 @@ public class TaskMemoryManager {
   /**
    * Similar to an operating system's page table, this array maps page numbers into base object
    * pointers, allowing us to translate between the hashtable's internal 64-bit address
-   * representation and the baseObject+offset representation which we use to support both in- and
+   * representation and the baseObject+offset representation which we use to support both on- and
    * off-heap addresses. When using an off-heap allocator, every entry in this map will be `null`.
-   * When using an in-heap allocator, the entries in this map will point to pages' base objects.
+   * When using an on-heap allocator, the entries in this map will point to pages' base objects.
    * Entries are added to this map as new data pages are allocated.
    */
   private final MemoryBlock[] pageTable = new MemoryBlock[PAGE_TABLE_SIZE];
@@ -102,7 +102,7 @@ public class TaskMemoryManager {
   private final long taskAttemptId;
 
   /**
-   * Tracks whether we're in-heap or off-heap. For off-heap, we short-circuit most of these methods
+   * Tracks whether we're on-heap or off-heap. For off-heap, we short-circuit most of these methods
    * without doing any masking or lookups. Since this branching should be well-predicted by the JIT,
    * this extra layer of indirection / abstraction hopefully shouldn't be too expensive.
    */

From b45ff02e77da013c878574e70d14faf09e1aed39 Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Tue, 15 Jan 2019 07:25:50 -0700
Subject: [PATCH 0293/1072] [SPARK-26203][SQL][TEST] Benchmark performance of
 In and InSet expressions

## What changes were proposed in this pull request?

This PR contains benchmarks for `In` and `InSet` expressions. They cover literals of different data types and will help us to decide where to integrate the switch-based logic for bytes/shorts/ints.

As discussed in [PR-23171](https://github.com/apache/spark/pull/23171), one potential approach is to convert `In` to `InSet` if all elements are literals independently of data types and the number of elements. According to the results of this PR, we might want to keep the threshold for the number of elements. The if-else approach approach might be faster for some data types on a small number of elements (structs? arrays? small decimals?).

### byte / short / int / long

Unless the number of items is really big, `InSet` is slower than `In` because of autoboxing .

Interestingly, `In` scales worse on bytes/shorts than on ints/longs. For example, `InSet` starts to match the performance on around 50 bytes/shorts while this does not happen on the same number of ints/longs. This is a bit strange as shorts/bytes (e.g., `(byte) 1`, `(short) 2`) are represented as ints in the bytecode.

### float / double

Use cases on floats/doubles also suffer from autoboxing. Therefore, `In` outperforms `InSet` on 10 elements.

Similarly to shorts/bytes, `In` scales worse on floats/doubles than on ints/longs because the equality condition is more complicated (e.g., `java.lang.Float.isNaN(filter_valueArg_0) && java.lang.Float.isNaN(9.0F)) || filter_valueArg_0 == 9.0F`).

### decimal

The reason why we have separate benchmarks for small and large decimals is that Spark might use longs to represent decimals in some cases.

If this optimization happens, then `equals` will be nothing else as comparing longs. If this does not happen, Spark will create an instance of `scala.BigDecimal` and use it for comparisons. The latter is more expensive.

`Decimal$hashCode` will always use `scala.BigDecimal$hashCode` even if the number is small enough to fit into a long variable. As a consequence, we see that use cases on small decimals are faster with `In` as they are using long comparisons under the hood. Large decimal values are always faster with `InSet`.

### string

`UTF8String$equals` is not cheap. Therefore, `In` does not really outperform `InSet` as in previous use cases.

### timestamp / date

Under the hood, timestamp/date values will be represented as long/int values. So, `In` allows us to avoid autoboxing.

### array

Arrays are working as expected. `In` is faster on 5 elements while `InSet` is faster on 15 elements. The benchmarks are using `UnsafeArrayData`.

### struct

`InSet` is always faster than `In` for structs. These benchmarks use `GenericInternalRow`.

Closes #23291 from aokolnychyi/spark-26203.

Lead-authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../InExpressionBenchmark-results.txt         | 551 ++++++++++++++++++
 .../benchmark/InExpressionBenchmark.scala     | 214 +++++++
 2 files changed, 765 insertions(+)
 create mode 100644 sql/core/benchmarks/InExpressionBenchmark-results.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala

diff --git a/sql/core/benchmarks/InExpressionBenchmark-results.txt b/sql/core/benchmarks/InExpressionBenchmark-results.txt
new file mode 100644
index 000000000000..d2adbded6614
--- /dev/null
+++ b/sql/core/benchmarks/InExpressionBenchmark-results.txt
@@ -0,0 +1,551 @@
+================================================================================================
+In Expression Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 bytes:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  101 /  138         98.7          10.1       1.0X
+InSet expression                               125 /  136         79.7          12.5       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 bytes:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  101 /  111         99.3          10.1       1.0X
+InSet expression                               126 /  133         79.6          12.6       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 bytes:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  176 /  183         56.9          17.6       1.0X
+InSet expression                               174 /  184         57.4          17.4       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 bytes:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  299 /  312         33.5          29.9       1.0X
+InSet expression                               243 /  246         41.2          24.3       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 bytes:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  512 /  518         19.5          51.2       1.0X
+InSet expression                               388 /  400         25.8          38.8       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 bytes:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  854 /  866         11.7          85.4       1.0X
+InSet expression                               686 /  694         14.6          68.6       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 shorts:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   59 /   62        169.6           5.9       1.0X
+InSet expression                               163 /  168         61.3          16.3       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 shorts:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   76 /   78        132.0           7.6       1.0X
+InSet expression                               182 /  186         54.9          18.2       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 shorts:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  126 /  128         79.4          12.6       1.0X
+InSet expression                               190 /  193         52.7          19.0       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 shorts:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  227 /  227         44.1          22.7       1.0X
+InSet expression                               232 /  235         43.1          23.2       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 shorts:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  408 /  414         24.5          40.8       1.0X
+InSet expression                               203 /  209         49.3          20.3       2.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 shorts:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  762 /  765         13.1          76.2       1.0X
+InSet expression                               192 /  196         52.1          19.2       4.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 ints:                                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   53 /   57        187.3           5.3       1.0X
+InSet expression                               156 /  160         63.9          15.6       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 ints:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   70 /   74        142.4           7.0       1.0X
+InSet expression                               170 /  176         58.9          17.0       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 ints:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  125 /  126         80.2          12.5       1.0X
+InSet expression                               174 /  179         57.4          17.4       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 ints:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  220 /  222         45.5          22.0       1.0X
+InSet expression                               215 /  221         46.6          21.5       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 ints:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  404 /  407         24.8          40.4       1.0X
+InSet expression                               189 /  192         53.0          18.9       2.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 ints:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  760 /  764         13.2          76.0       1.0X
+InSet expression                               176 /  179         56.8          17.6       4.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 longs:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   50 /   52        200.3           5.0       1.0X
+InSet expression                               147 /  151         68.1          14.7       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 longs:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   65 /   66        154.8           6.5       1.0X
+InSet expression                               162 /  166         61.6          16.2       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 longs:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  117 /  119         85.1          11.7       1.0X
+InSet expression                               170 /  175         58.8          17.0       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 longs:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  207 /  208         48.3          20.7       1.0X
+InSet expression                               211 /  214         47.4          21.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 longs:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  387 /  389         25.9          38.7       1.0X
+InSet expression                               185 /  187         54.2          18.5       2.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 longs:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  742 /  744         13.5          74.2       1.0X
+InSet expression                               172 /  173         58.3          17.2       4.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 floats:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   88 /   91        113.0           8.8       1.0X
+InSet expression                               170 /  171         58.9          17.0       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 floats:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  129 /  132         77.5          12.9       1.0X
+InSet expression                               188 /  189         53.2          18.8       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 floats:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  243 /  244         41.2          24.3       1.0X
+InSet expression                               192 /  194         52.2          19.2       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 floats:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  421 /  424         23.7          42.1       1.0X
+InSet expression                               237 /  240         42.2          23.7       1.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 floats:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  775 /  777         12.9          77.5       1.0X
+InSet expression                               205 /  209         48.8          20.5       3.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 floats:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 3052 / 3151          3.3         305.2       1.0X
+InSet expression                               197 /  199         50.8          19.7      15.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 doubles:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   82 /   84        121.6           8.2       1.0X
+InSet expression                               167 /  169         60.0          16.7       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 doubles:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  124 /  131         80.3          12.4       1.0X
+InSet expression                               186 /  187         53.9          18.6       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 doubles:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  237 /  239         42.1          23.7       1.0X
+InSet expression                               193 /  194         51.8          19.3       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 doubles:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  416 /  418         24.0          41.6       1.0X
+InSet expression                               239 /  241         41.8          23.9       1.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 doubles:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  771 /  774         13.0          77.1       1.0X
+InSet expression                               204 /  207         49.1          20.4       3.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 doubles:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 3755 / 3801          2.7         375.5       1.0X
+InSet expression                               194 /  197         51.5          19.4      19.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 small decimals:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   46 /   49         21.6          46.4       1.0X
+InSet expression                               136 /  141          7.4         135.7       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 small decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   57 /   61         17.5          57.1       1.0X
+InSet expression                               137 /  140          7.3         137.2       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 small decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   89 /   92         11.2          89.4       1.0X
+InSet expression                               139 /  141          7.2         138.7       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 small decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  170 /  171          5.9         169.5       1.0X
+InSet expression                               146 /  148          6.9         145.8       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 small decimals:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  429 /  431          2.3         429.2       1.0X
+InSet expression                               145 /  148          6.9         144.9       3.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 small decimals:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  995 / 1207          1.0         995.0       1.0X
+InSet expression                               154 /  158          6.5         154.1       6.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 large decimals:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  268 /  307          3.7         268.3       1.0X
+InSet expression                               171 /  176          5.8         171.1       1.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 large decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  455 /  458          2.2         455.2       1.0X
+InSet expression                               173 /  176          5.8         173.1       2.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 large decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 1095 / 1099          0.9        1095.2       1.0X
+InSet expression                               179 /  183          5.6         178.7       6.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 large decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 2099 / 2110          0.5        2098.6       1.0X
+InSet expression                               183 /  187          5.5         183.2      11.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 large decimals:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 3885 / 3911          0.3        3885.4       1.0X
+InSet expression                               207 /  223          4.8         206.6      18.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 large decimals:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 7759 / 7867          0.1        7759.2       1.0X
+InSet expression                               214 /  217          4.7         214.4      36.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 strings:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  126 /  127          7.9         126.0       1.0X
+InSet expression                               139 /  142          7.2         139.0       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 strings:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  128 /  132          7.8         128.2       1.0X
+InSet expression                               142 /  144          7.0         142.0       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 strings:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  151 /  153          6.6         150.9       1.0X
+InSet expression                               150 /  152          6.7         150.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 strings:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  238 /  240          4.2         238.5       1.0X
+InSet expression                               152 /  154          6.6         152.4       1.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 strings:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  431 /  432          2.3         431.2       1.0X
+InSet expression                               149 /  151          6.7         148.8       2.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 strings:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  822 / 1060          1.2         821.7       1.0X
+InSet expression                               153 /  162          6.5         152.9       5.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 timestamps:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   42 /   44        240.5           4.2       1.0X
+InSet expression                               158 /  161         63.5          15.8       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 timestamps:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   57 /   59        174.5           5.7       1.0X
+InSet expression                               173 /  176         57.8          17.3       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 timestamps:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  110 /  113         91.1          11.0       1.0X
+InSet expression                               223 /  226         44.9          22.3       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 timestamps:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  190 /  193         52.6          19.0       1.0X
+InSet expression                               238 /  240         42.1          23.8       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 timestamps:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  355 /  367         28.2          35.5       1.0X
+InSet expression                               221 /  222         45.2          22.1       1.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 timestamps:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  692 /  694         14.5          69.2       1.0X
+InSet expression                               220 /  222         45.4          22.0       3.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 dates:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  143 /  145         70.0          14.3       1.0X
+InSet expression                               264 /  269         37.9          26.4       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 dates:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  141 /  142         71.1          14.1       1.0X
+InSet expression                               268 /  269         37.3          26.8       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 dates:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  196 /  196         51.1          19.6       1.0X
+InSet expression                               277 /  282         36.1          27.7       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 dates:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  338 /  351         29.5          33.8       1.0X
+InSet expression                               287 /  290         34.9          28.7       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 dates:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  541 /  542         18.5          54.1       1.0X
+InSet expression                               299 /  300         33.5          29.9       1.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 dates:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  883 /  900         11.3          88.3       1.0X
+InSet expression                               296 /  298         33.8          29.6       3.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 arrays:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   51 /   53         19.6          51.0       1.0X
+InSet expression                                96 /   97         10.5          95.7       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 arrays:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   77 /   79         13.1          76.6       1.0X
+InSet expression                                96 /   98         10.4          96.0       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 arrays:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  275 /  276          3.6         274.6       1.0X
+InSet expression                               119 /  121          8.4         119.1       2.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 arrays:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  592 /  663          1.7         592.1       1.0X
+InSet expression                               164 /  172          6.1         164.3       3.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 arrays:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 2555 / 2733          0.4        2554.7       1.0X
+InSet expression                               194 /  198          5.2         193.9      13.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 arrays:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 9215 / 9778          0.1        9214.8       1.0X
+InSet expression                               253 /  256          3.9         253.2      36.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 structs:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   46 /   47         22.0          45.5       1.0X
+InSet expression                               157 /  162          6.4         156.5       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 structs:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                   61 /   63         16.5          60.7       1.0X
+InSet expression                               158 /  161          6.3         158.2       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 structs:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  133 /  140          7.5         132.8       1.0X
+InSet expression                               199 /  202          5.0         198.8       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 structs:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                  369 /  372          2.7         369.1       1.0X
+InSet expression                               283 /  294          3.5         282.7       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 structs:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 1570 / 1731          0.6        1569.8       1.0X
+InSet expression                               332 /  334          3.0         332.0       4.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 structs:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+In expression                                 6332 / 6794          0.2        6331.8       1.0X
+InSet expression                               441 /  444          2.3         440.9      14.4X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
new file mode 100644
index 000000000000..cf4a34b20627
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.{array, struct}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+/**
+ * A benchmark that compares the performance of different ways to evaluate SQL IN expressions.
+ *
+ * Specifically, this class compares the if-based approach, which might iterate through all items
+ * inside the IN value list, to other options with better worst-case time complexities (e.g., sets).
+ *
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/InExpressionBenchmark-results.txt".
+ * }}}
+ */
+object InExpressionBenchmark extends SqlBasedBenchmark {
+
+  import spark.implicits._
+
+  private def runByteBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems bytes"
+    val values = (Byte.MinValue until Byte.MinValue + numItems).map(v => s"${v}Y")
+    val df = spark.range(0, numRows).select($"id".cast(ByteType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runShortBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems shorts"
+    val values = (1 to numItems).map(v => s"${v}S")
+    val df = spark.range(0, numRows).select($"id".cast(ShortType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runIntBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems ints"
+    val values = 1 to numItems
+    val df = spark.range(0, numRows).select($"id".cast(IntegerType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runLongBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems longs"
+    val values = (1 to numItems).map(v => s"${v}L")
+    val df = spark.range(0, numRows).toDF("id")
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runFloatBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems floats"
+    val values = (1 to numItems).map(v => s"CAST($v AS float)")
+    val df = spark.range(0, numRows).select($"id".cast(FloatType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runDoubleBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems doubles"
+    val values = (1 to numItems).map(v => s"$v.0D")
+    val df = spark.range(0, numRows).select($"id".cast(DoubleType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runSmallDecimalBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems small decimals"
+    val values = (1 to numItems).map(v => s"CAST($v AS decimal(12, 1))")
+    val df = spark.range(0, numRows).select($"id".cast(DecimalType(12, 1)))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runLargeDecimalBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems large decimals"
+    val values = (1 to numItems).map(v => s"9223372036854775812.10539$v")
+    val df = spark.range(0, numRows).select($"id".cast(DecimalType(30, 7)))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runStringBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems strings"
+    val values = (1 to numItems).map(n => s"'$n'")
+    val df = spark.range(0, numRows).select($"id".cast(StringType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runTimestampBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems timestamps"
+    val values = (1 to numItems).map(m => s"CAST('1970-01-01 01:00:00.$m' AS timestamp)")
+    val df = spark.range(0, numRows).select($"id".cast(TimestampType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runDateBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems dates"
+    val values = (1 to numItems).map(n => 1970 + n).map(y => s"CAST('$y-01-01' AS date)")
+    val df = spark.range(0, numRows).select($"id".cast(TimestampType).cast(DateType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runArrayBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems arrays"
+    val values = (1 to numItems).map(i => s"array($i)")
+    val df = spark.range(0, numRows).select(array($"id").as("id"))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runStructBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val name = s"$numItems structs"
+    val values = (1 to numItems).map(i => s"struct($i)")
+    val df = spark.range(0, numRows).select(struct($"id".as("col1")).as("id"))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
+  private def runBenchmark(
+      name: String,
+      df: DataFrame,
+      values: Seq[Any],
+      numRows: Long,
+      minNumIters: Int): Unit = {
+
+    val benchmark = new Benchmark(name, numRows, minNumIters, output = output)
+
+    df.createOrReplaceTempView("t")
+
+    def testClosure(): Unit = {
+      val df = spark.sql(s"SELECT * FROM t WHERE id IN (${values.mkString(",")})")
+      df.queryExecution.toRdd.foreach(_ => Unit)
+    }
+
+    benchmark.addCase("In expression") { _ =>
+      withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> values.size.toString) {
+        testClosure()
+      }
+    }
+
+    benchmark.addCase("InSet expression") { _ =>
+      withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "1") {
+        testClosure()
+      }
+    }
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val numItemsSeq = Seq(5, 10, 25, 50, 100, 200)
+    val largeNumRows = 10000000
+    val smallNumRows = 1000000
+    val minNumIters = 5
+
+    runBenchmark("In Expression Benchmark") {
+      numItemsSeq.foreach { numItems =>
+        runByteBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runShortBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runIntBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runLongBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runFloatBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runDoubleBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runSmallDecimalBenchmark(numItems, smallNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runLargeDecimalBenchmark(numItems, smallNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runStringBenchmark(numItems, smallNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runTimestampBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runDateBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runArrayBenchmark(numItems, smallNumRows, minNumIters)
+      }
+      numItemsSeq.foreach { numItems =>
+        runStructBenchmark(numItems, smallNumRows, minNumIters)
+      }
+    }
+  }
+}

From 5ca45e8a3db75907e8699e7dff63accbff5bfae5 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Tue, 15 Jan 2019 10:00:01 -0800
Subject: [PATCH 0294/1072] [SPARK-26592][SS] Throw exception when kafka
 delegation token tried to obtain with proxy user

## What changes were proposed in this pull request?

Kafka is not yet support to obtain delegation token with proxy user. It has to be turned off until https://issues.apache.org/jira/browse/KAFKA-6945 implemented.

In this PR an exception will be thrown when this situation happens.

## How was this patch tested?

Additional unit test.

Closes #23511 from gaborgsomogyi/SPARK-26592.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../deploy/security/KafkaTokenUtil.scala      | 14 +++++++++++++-
 .../deploy/security/KafkaTokenUtilSuite.scala | 19 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala b/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
index aec0f72feb3c..f3638533e1b7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.deploy.security
 
-import java.{ util => ju }
+import java.{util => ju}
 import java.text.SimpleDateFormat
 
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.io.Text
+import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
 import org.apache.kafka.clients.CommonClientConfigs
@@ -33,6 +34,7 @@ import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, S
 import org.apache.kafka.common.security.token.delegation.DelegationToken
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 
@@ -45,6 +47,8 @@ private[spark] object KafkaTokenUtil extends Logging {
   }
 
   private[security] def obtainToken(sparkConf: SparkConf): (Token[_ <: TokenIdentifier], Long) = {
+    checkProxyUser()
+
     val adminClient = AdminClient.create(createAdminClientProperties(sparkConf))
     val createDelegationTokenOptions = new CreateDelegationTokenOptions()
     val createResult = adminClient.createDelegationToken(createDelegationTokenOptions)
@@ -59,6 +63,14 @@ private[spark] object KafkaTokenUtil extends Logging {
     ), token.tokenInfo.expiryTimestamp)
   }
 
+  private[security] def checkProxyUser(): Unit = {
+    val currentUser = UserGroupInformation.getCurrentUser()
+    // Obtaining delegation token for proxy user is planned but not yet implemented
+    // See https://issues.apache.org/jira/browse/KAFKA-6945
+    require(!SparkHadoopUtil.get.isProxyUser(currentUser), "Obtaining delegation token for proxy " +
+      "user is not yet supported.")
+  }
+
   private[security] def createAdminClientProperties(sparkConf: SparkConf): ju.Properties = {
     val adminClientProperties = new ju.Properties
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
index 18aa537b3a51..daa7e544cc9c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.deploy.security
 
-import java.{ util => ju }
+import java.{util => ju}
+import java.security.PrivilegedExceptionAction
 import javax.security.auth.login.{AppConfigurationEntry, Configuration}
 
+import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
@@ -78,6 +80,21 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
     Configuration.setConfiguration(null)
   }
 
+  test("checkProxyUser with proxy current user should throw exception") {
+    val realUser = UserGroupInformation.createUserForTesting("realUser", Array())
+    UserGroupInformation.createProxyUserForTesting("proxyUser", realUser, Array()).doAs(
+      new PrivilegedExceptionAction[Unit]() {
+        override def run(): Unit = {
+          val thrown = intercept[IllegalArgumentException] {
+            KafkaTokenUtil.checkProxyUser()
+          }
+          assert(thrown.getMessage contains
+            "Obtaining delegation token for proxy user is not yet supported.")
+        }
+      }
+    )
+  }
+
   test("createAdminClientProperties without bootstrap servers should throw exception") {
     val thrown = intercept[IllegalArgumentException] {
       KafkaTokenUtil.createAdminClientProperties(sparkConf)

From 7296999c4751cfddcca5b77e3348354cff65d069 Mon Sep 17 00:00:00 2001
From: Pralabh Kumar <pkumar2@linkedin.com>
Date: Tue, 15 Jan 2019 12:50:07 -0600
Subject: [PATCH 0295/1072] [SPARK-26462][CORE] Use ConfigEntry for hardcoded
 configs for execution categories

## What changes were proposed in this pull request?

Make the following hardcoded configs to use ConfigEntry.
spark.memory
spark.storage
spark.io
spark.buffer
spark.rdd
spark.locality
spark.broadcast
spark.reducer

## How was this patch tested?

Existing tests.

Closes #23447 from pralabhkumar/execution_categories.

Authored-by: Pralabh Kumar <pkumar2@linkedin.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/HeartbeatReceiver.scala  |   4 +-
 .../scala/org/apache/spark/SparkConf.scala    |   8 +-
 .../apache/spark/api/python/PythonRDD.scala   |   3 +-
 .../spark/api/python/PythonRunner.scala       |   4 +-
 .../org/apache/spark/api/r/RRunner.scala      |   4 +-
 .../spark/broadcast/TorrentBroadcast.scala    |   8 +-
 .../apache/spark/deploy/SparkHadoopUtil.scala |   3 +-
 .../org/apache/spark/executor/Executor.scala  |   2 +-
 .../spark/internal/config/package.scala       | 191 +++++++++++++++++-
 .../apache/spark/io/CompressionCodec.scala    |  13 +-
 .../apache/spark/memory/MemoryManager.scala   |   2 +-
 .../spark/memory/UnifiedMemoryManager.scala   |   4 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala |   3 +-
 .../spark/rdd/ReliableCheckpointRDD.scala     |  10 +-
 .../scala/org/apache/spark/rdd/UnionRDD.scala |   3 +-
 .../spark/scheduler/TaskSetManager.scala      |  13 +-
 .../spark/serializer/SerializerManager.scala  |   5 +-
 .../shuffle/BlockStoreShuffleReader.scala     |   4 +-
 .../apache/spark/storage/BlockManager.scala   |   7 +-
 .../storage/BlockManagerMasterEndpoint.scala  |   6 +-
 .../org/apache/spark/storage/DiskStore.scala  |   2 +-
 .../apache/spark/storage/TopologyMapper.scala |   4 +-
 .../spark/storage/memory/MemoryStore.scala    |   4 +-
 .../org/apache/spark/DistributedSuite.scala   |   8 +-
 .../org/apache/spark/SparkConfSuite.scala     |   4 +-
 .../spark/broadcast/BroadcastSuite.scala      |   7 +-
 .../memory/UnifiedMemoryManagerSuite.scala    |  20 +-
 .../scala/org/apache/spark/rdd/RDDSuite.scala |   5 +-
 .../scheduler/BlacklistIntegrationSuite.scala |   2 +-
 .../scheduler/EventLoggingListenerSuite.scala |   2 +-
 .../scheduler/TaskSchedulerImplSuite.scala    |   4 +-
 .../spark/scheduler/TaskSetManagerSuite.scala |   4 +-
 .../sort/ShuffleExternalSorterSuite.scala     |   4 +-
 .../BlockManagerReplicationSuite.scala        |  17 +-
 .../spark/storage/BlockManagerSuite.scala     |  18 +-
 .../apache/spark/storage/DiskStoreSuite.scala |   2 +-
 .../spark/storage/MemoryStoreSuite.scala      |   3 +-
 .../spark/storage/TopologyMapperSuite.scala   |   3 +-
 .../ExternalAppendOnlyMapSuite.scala          |   2 +-
 39 files changed, 309 insertions(+), 103 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index ab0ae55ed357..67f2c279dca5 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 import scala.concurrent.Future
 
 import org.apache.spark.executor.ExecutorMetrics
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.BlockManagerId
@@ -83,7 +83,7 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
   // "spark.network.timeoutInterval" uses "seconds", while
   // "spark.storage.blockManagerTimeoutIntervalMs" uses "milliseconds"
   private val timeoutIntervalMs =
-    sc.conf.getTimeAsMs("spark.storage.blockManagerTimeoutIntervalMs", "60s")
+    sc.conf.get(config.STORAGE_BLOCKMANAGER_TIMEOUTINTERVAL)
   private val checkTimeoutIntervalMs =
     sc.conf.getTimeAsSeconds("spark.network.timeoutInterval", s"${timeoutIntervalMs}ms") * 1000
 
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 1100868ac1f4..22bcb8113ce0 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -532,7 +532,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     }
 
     // Validate memory fractions
-    for (key <- Seq("spark.memory.fraction", "spark.memory.storageFraction")) {
+    for (key <- Seq(MEMORY_FRACTION.key, MEMORY_STORAGE_FRACTION.key)) {
       val value = getDouble(key, 0.5)
       if (value > 1 || value < 0) {
         throw new IllegalArgumentException(s"$key should be between 0 and 1 (was '$value').")
@@ -664,7 +664,7 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.yarn.applicationMaster.waitTries", "1.3",
         // Translate old value to a duration, with 10s wait time per try.
         translation = s => s"${s.toLong * 10}s")),
-    "spark.reducer.maxSizeInFlight" -> Seq(
+    REDUCER_MAX_SIZE_IN_FLIGHT.key -> Seq(
       AlternateConfig("spark.reducer.maxMbInFlight", "1.4")),
     "spark.kryoserializer.buffer" -> Seq(
       AlternateConfig("spark.kryoserializer.buffer.mb", "1.4",
@@ -675,9 +675,9 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.shuffle.file.buffer.kb", "1.4")),
     EXECUTOR_LOGS_ROLLING_MAX_SIZE.key -> Seq(
       AlternateConfig("spark.executor.logs.rolling.size.maxBytes", "1.4")),
-    "spark.io.compression.snappy.blockSize" -> Seq(
+    IO_COMPRESSION_SNAPPY_BLOCKSIZE.key -> Seq(
       AlternateConfig("spark.io.compression.snappy.block.size", "1.4")),
-    "spark.io.compression.lz4.blockSize" -> Seq(
+    IO_COMPRESSION_LZ4_BLOCKSIZE.key -> Seq(
       AlternateConfig("spark.io.compression.lz4.block.size", "1.4")),
     "spark.rpc.numRetries" -> Seq(
       AlternateConfig("spark.akka.num.retries", "1.4")),
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 5ed5070558af..14ea289e5f90 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -39,6 +39,7 @@ import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.input.PortableDataStream
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.security.SocketAuthHelper
@@ -604,7 +605,7 @@ private[spark] class PythonAccumulatorV2(
 
   Utils.checkHost(serverHost)
 
-  val bufferSize = SparkEnv.get.conf.getInt("spark.buffer.size", 65536)
+  val bufferSize = SparkEnv.get.conf.get(BUFFER_SIZE)
 
   /**
    * We try to reuse a single Socket to transfer accumulator updates, as they are all added
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 5168e9330965..b7f14e062b43 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -27,7 +27,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.EXECUTOR_CORES
+import org.apache.spark.internal.config.{BUFFER_SIZE, EXECUTOR_CORES}
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util._
@@ -71,7 +71,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
   require(funcs.length == argOffsets.length, "argOffsets should have the same length as funcs")
 
   private val conf = SparkEnv.get.conf
-  private val bufferSize = conf.getInt("spark.buffer.size", 65536)
+  private val bufferSize = conf.get(BUFFER_SIZE)
   private val reuseWorker = conf.get(PYTHON_WORKER_REUSE)
   // each python worker gets an equal part of the allocation. the worker pool will grow to the
   // number of concurrent tasks, which is determined by the number of cores in this executor.
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index 3fdea04cdf7a..b367c7ffb159 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -27,6 +27,7 @@ import scala.util.Try
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.internal.config.R._
 import org.apache.spark.util.Utils
 
@@ -124,7 +125,8 @@ private[spark] class RRunner[U](
       partitionIndex: Int): Unit = {
     val env = SparkEnv.get
     val taskContext = TaskContext.get()
-    val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
+    val bufferSize = System.getProperty(BUFFER_SIZE.key,
+      BUFFER_SIZE.defaultValueString).toInt
     val stream = new BufferedOutputStream(output, bufferSize)
 
     new Thread("writer for R") {
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 26ead57316e1..6410866fca6a 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -27,7 +27,7 @@ import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage._
@@ -74,14 +74,14 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   @transient private var blockSize: Int = _
 
   private def setConf(conf: SparkConf) {
-    compressionCodec = if (conf.getBoolean("spark.broadcast.compress", true)) {
+    compressionCodec = if (conf.get(config.BROADCAST_COMPRESS)) {
       Some(CompressionCodec.createCodec(conf))
     } else {
       None
     }
     // Note: use getSizeAsKb (not bytes) to maintain compatibility if no units are provided
-    blockSize = conf.getSizeAsKb("spark.broadcast.blockSize", "4m").toInt * 1024
-    checksumEnabled = conf.getBoolean("spark.broadcast.checksum", true)
+    blockSize = conf.get(config.BROADCAST_BLOCKSIZE).toInt * 1024
+    checksumEnabled = conf.get(config.BROADCAST_CHECKSUM)
   }
   setConf(SparkEnv.get.conf)
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 937199273dab..a97d0721de7b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -38,6 +38,7 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdenti
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.util.Utils
 
 /**
@@ -442,7 +443,7 @@ private[spark] object SparkHadoopUtil {
         }
       }
       appendSparkHadoopConfigs(conf, hadoopConf)
-      val bufferSize = conf.get("spark.buffer.size", "65536")
+      val bufferSize = conf.get(BUFFER_SIZE).toString
       hadoopConf.set("io.file.buffer.size", bufferSize)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index a30a501e5d4a..a6759c41a142 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -442,7 +442,7 @@ private[spark] class Executor(
             val errMsg =
               s"${releasedLocks.size} block locks were not released by TID = $taskId:\n" +
                 releasedLocks.mkString("[", ", ", "]")
-            if (conf.getBoolean("spark.storage.exceptionOnPinLeak", false)) {
+            if (conf.get(STORAGE_EXCEPTION_PIN_LEAK)) {
               throw new SparkException(errMsg)
             } else {
               logInfo(errMsg)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index c942c2769a42..95becfa57e48 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.EventLoggingListener
+import org.apache.spark.storage.{DefaultTopologyMapper, RandomBlockReplicationPolicy}
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
 
@@ -187,6 +188,82 @@ package object config {
     .checkValue(_ >= 0, "The off-heap memory size must not be negative")
     .createWithDefault(0)
 
+  private[spark] val MEMORY_STORAGE_FRACTION = ConfigBuilder("spark.memory.storageFraction")
+    .doc("Amount of storage memory immune to eviction, expressed as a fraction of the " +
+      "size of the region set aside by spark.memory.fraction. The higher this is, the " +
+      "less working memory may be available to execution and tasks may spill to disk more " +
+      "often. Leaving this at the default value is recommended. ")
+    .doubleConf
+    .createWithDefault(0.5)
+
+  private[spark] val MEMORY_FRACTION = ConfigBuilder("spark.memory.fraction")
+    .doc("Fraction of (heap space - 300MB) used for execution and storage. The " +
+      "lower this is, the more frequently spills and cached data eviction occur. " +
+      "The purpose of this config is to set aside memory for internal metadata, " +
+      "user data structures, and imprecise size estimation in the case of sparse, " +
+      "unusually large records. Leaving this at the default value is recommended.  ")
+    .doubleConf
+    .createWithDefault(0.6)
+
+  private[spark] val STORAGE_SAFETY_FRACTION = ConfigBuilder("spark.storage.safetyFraction")
+    .doubleConf
+    .createWithDefault(0.9)
+
+  private[spark] val STORAGE_UNROLL_MEMORY_THRESHOLD =
+    ConfigBuilder("spark.storage.unrollMemoryThreshold")
+      .doc("Initial memory to request before unrolling any block")
+      .longConf
+      .createWithDefault(1024 * 1024)
+
+  private[spark] val STORAGE_REPLICATION_PROACTIVE =
+    ConfigBuilder("spark.storage.replication.proactive")
+      .doc("Enables proactive block replication for RDD blocks. " +
+        "Cached RDD block replicas lost due to executor failures are replenished " +
+        "if there are any existing available replicas. This tries to " +
+        "get the replication level of the block to the initial number")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val STORAGE_MEMORY_MAP_THRESHOLD =
+    ConfigBuilder("spark.storage.memoryMapThreshold")
+      .doc("Size in bytes of a block above which Spark memory maps when " +
+        "reading a block from disk. " +
+        "This prevents Spark from memory mapping very small blocks. " +
+        "In general, memory mapping has high overhead for blocks close to or below " +
+        "the page size of the operating system.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("2m")
+
+  private[spark] val STORAGE_REPLICATION_POLICY =
+    ConfigBuilder("spark.storage.replication.policy")
+      .stringConf
+      .createWithDefaultString(classOf[RandomBlockReplicationPolicy].getName)
+
+  private[spark] val STORAGE_REPLICATION_TOPOLOGY_MAPPER =
+    ConfigBuilder("spark.storage.replication.topologyMapper")
+      .stringConf
+      .createWithDefaultString(classOf[DefaultTopologyMapper].getName)
+
+  private[spark] val STORAGE_CACHED_PEERS_TTL = ConfigBuilder("spark.storage.cachedPeersTtl")
+    .intConf.createWithDefault(60 * 1000)
+
+  private[spark] val STORAGE_MAX_REPLICATION_FAILURE =
+    ConfigBuilder("spark.storage.maxReplicationFailures")
+      .intConf.createWithDefault(1)
+
+  private[spark] val STORAGE_REPLICATION_TOPOLOGY_FILE =
+    ConfigBuilder("spark.storage.replication.topologyFile").stringConf.createOptional
+
+  private[spark] val STORAGE_EXCEPTION_PIN_LEAK =
+    ConfigBuilder("spark.storage.exceptionOnPinLeak")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val STORAGE_BLOCKMANAGER_TIMEOUTINTERVAL =
+    ConfigBuilder("spark.storage.blockManagerTimeoutIntervalMs")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("60s")
+
   private[spark] val IS_PYTHON_APP = ConfigBuilder("spark.yarn.isPython").internal()
     .booleanConf.createWithDefault(false)
 
@@ -400,7 +477,7 @@ package object config {
 
   private[spark] val IGNORE_MISSING_FILES = ConfigBuilder("spark.files.ignoreMissingFiles")
     .doc("Whether to ignore missing files. If true, the Spark jobs will continue to run when " +
-        "encountering missing files and the contents that have been read will still be returned.")
+      "encountering missing files and the contents that have been read will still be returned.")
     .booleanConf
     .createWithDefault(false)
 
@@ -772,4 +849,116 @@ package object config {
   private[spark] val MASTER_UI_PORT = ConfigBuilder("spark.master.ui.port")
     .intConf
     .createWithDefault(8080)
+
+  private[spark] val IO_COMPRESSION_SNAPPY_BLOCKSIZE =
+    ConfigBuilder("spark.io.compression.snappy.blockSize")
+      .doc("Block size in bytes used in Snappy compression, in the case when " +
+        "Snappy compression codec is used. Lowering this block size " +
+        "will also lower shuffle memory usage when Snappy is used")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("32k")
+
+  private[spark] val IO_COMPRESSION_LZ4_BLOCKSIZE =
+    ConfigBuilder("spark.io.compression.lz4.blockSize")
+      .doc("Block size in bytes used in LZ4 compression, in the case when LZ4 compression" +
+        "codec is used. Lowering this block size will also lower shuffle memory " +
+        "usage when LZ4 is used.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("32k")
+
+  private[spark] val IO_COMPRESSION_CODEC =
+    ConfigBuilder("spark.io.compression.codec")
+      .doc("The codec used to compress internal data such as RDD partitions, event log, " +
+        "broadcast variables and shuffle outputs. By default, Spark provides four codecs: " +
+        "lz4, lzf, snappy, and zstd. You can also use fully qualified class names to specify " +
+        "the codec")
+      .stringConf
+      .createWithDefaultString("lz4")
+
+  private[spark] val IO_COMPRESSION_ZSTD_BUFFERSIZE =
+    ConfigBuilder("spark.io.compression.zstd.bufferSize")
+      .doc("Buffer size in bytes used in Zstd compression, in the case when Zstd " +
+        "compression codec is used. Lowering this size will lower the shuffle " +
+        "memory usage when Zstd is used, but it might increase the compression " +
+        "cost because of excessive JNI call overhead")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("32k")
+
+  private[spark] val IO_COMPRESSION_ZSTD_LEVEL =
+    ConfigBuilder("spark.io.compression.zstd.level")
+      .doc("Compression level for Zstd compression codec. Increasing the compression " +
+        "level will result in better compression at the expense of more CPU and memory")
+      .intConf
+      .createWithDefault(1)
+
+  private[spark] val BUFFER_SIZE =
+    ConfigBuilder("spark.buffer.size")
+      .intConf
+      .createWithDefault(65536)
+
+  private[spark] val LOCALITY_WAIT_PROCESS = ConfigBuilder("spark.locality.wait.process")
+    .fallbackConf(LOCALITY_WAIT)
+
+  private[spark] val LOCALITY_WAIT_NODE = ConfigBuilder("spark.locality.wait.node")
+    .fallbackConf(LOCALITY_WAIT)
+
+  private[spark] val LOCALITY_WAIT_RACK = ConfigBuilder("spark.locality.wait.rack")
+    .fallbackConf(LOCALITY_WAIT)
+
+    private[spark] val REDUCER_MAX_SIZE_IN_FLIGHT = ConfigBuilder("spark.reducer.maxSizeInFlight")
+    .doc("Maximum size of map outputs to fetch simultaneously from each reduce task, " +
+      "in MiB unless otherwise specified. Since each output requires us to create a " +
+      "buffer to receive it, this represents a fixed memory overhead per reduce task, " +
+      "so keep it small unless you have a large amount of memory")
+    .bytesConf(ByteUnit.MiB)
+    .createWithDefaultString("48m")
+
+  private[spark] val REDUCER_MAX_REQS_IN_FLIGHT = ConfigBuilder("spark.reducer.maxReqsInFlight")
+    .doc("This configuration limits the number of remote requests to fetch blocks at " +
+      "any given point. When the number of hosts in the cluster increase, " +
+      "it might lead to very large number of inbound connections to one or more nodes, " +
+      "causing the workers to fail under load. By allowing it to limit the number of " +
+      "fetch requests, this scenario can be mitigated")
+    .intConf
+    .createWithDefault(Int.MaxValue)
+
+  private[spark] val BROADCAST_COMPRESS = ConfigBuilder("spark.broadcast.compress")
+    .doc("Whether to compress broadcast variables before sending them. " +
+      "Generally a good idea. Compression will use spark.io.compression.codec")
+    .booleanConf.createWithDefault(true)
+
+  private[spark] val BROADCAST_BLOCKSIZE = ConfigBuilder("spark.broadcast.blockSize")
+    .doc("Size of each piece of a block for TorrentBroadcastFactory, in " +
+      "KiB unless otherwise specified. Too large a value decreases " +
+      "parallelism during broadcast (makes it slower); however, " +
+      "if it is too small, BlockManager might take a performance hit")
+    .bytesConf(ByteUnit.KiB)
+    .createWithDefaultString("4m")
+
+  private[spark] val BROADCAST_CHECKSUM = ConfigBuilder("spark.broadcast.checksum")
+    .doc("Whether to enable checksum for broadcast. If enabled, " +
+      "broadcasts will include a checksum, which can help detect " +
+      "corrupted blocks, at the cost of computing and sending a little " +
+      "more data. It's possible to disable it if the network has other " +
+      "mechanisms to guarantee data won't be corrupted during broadcast")
+    .booleanConf.createWithDefault(true)
+
+  private[spark] val RDD_COMPRESS = ConfigBuilder("spark.rdd.compress")
+    .doc("Whether to compress serialized RDD partitions " +
+      "(e.g. for StorageLevel.MEMORY_ONLY_SER in Scala " +
+      "or StorageLevel.MEMORY_ONLY in Python). Can save substantial " +
+      "space at the cost of some extra CPU time. " +
+      "Compression will use spark.io.compression.codec")
+    .booleanConf.createWithDefault(false)
+
+  private[spark] val RDD_PARALLEL_LISTING_THRESHOLD =
+    ConfigBuilder("spark.rdd.parallelListingThreshold")
+      .intConf
+      .createWithDefault(10)
+
+  private[spark] val RDD_LIMIT_SCALE_UP_FACTOR =
+    ConfigBuilder("spark.rdd.limit.scaleUpFactor")
+      .intConf
+      .createWithDefault(4)
+
 }
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index c4f4b18769d2..288c0d18191c 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -27,6 +27,7 @@ import org.xerial.snappy.{Snappy, SnappyInputStream, SnappyOutputStream}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
 /**
@@ -51,7 +52,7 @@ trait CompressionCodec {
 
 private[spark] object CompressionCodec {
 
-  private val configKey = "spark.io.compression.codec"
+  private val configKey = IO_COMPRESSION_CODEC.key
 
   private[spark] def supportsConcatenationOfSerializedStreams(codec: CompressionCodec): Boolean = {
     (codec.isInstanceOf[SnappyCompressionCodec] || codec.isInstanceOf[LZFCompressionCodec]
@@ -65,7 +66,7 @@ private[spark] object CompressionCodec {
     "zstd" -> classOf[ZStdCompressionCodec].getName)
 
   def getCodecName(conf: SparkConf): String = {
-    conf.get(configKey, DEFAULT_COMPRESSION_CODEC)
+    conf.get(IO_COMPRESSION_CODEC)
   }
 
   def createCodec(conf: SparkConf): CompressionCodec = {
@@ -117,7 +118,7 @@ private[spark] object CompressionCodec {
 class LZ4CompressionCodec(conf: SparkConf) extends CompressionCodec {
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
-    val blockSize = conf.getSizeAsBytes("spark.io.compression.lz4.blockSize", "32k").toInt
+    val blockSize = conf.get(IO_COMPRESSION_LZ4_BLOCKSIZE).toInt
     new LZ4BlockOutputStream(s, blockSize)
   }
 
@@ -166,7 +167,7 @@ class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
   }
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
-    val blockSize = conf.getSizeAsBytes("spark.io.compression.snappy.blockSize", "32k").toInt
+    val blockSize = conf.get(IO_COMPRESSION_SNAPPY_BLOCKSIZE).toInt
     new SnappyOutputStream(s, blockSize)
   }
 
@@ -185,10 +186,10 @@ class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
 @DeveloperApi
 class ZStdCompressionCodec(conf: SparkConf) extends CompressionCodec {
 
-  private val bufferSize = conf.getSizeAsBytes("spark.io.compression.zstd.bufferSize", "32k").toInt
+  private val bufferSize = conf.get(IO_COMPRESSION_ZSTD_BUFFERSIZE).toInt
   // Default compression level for zstd compression to 1 because it is
   // fastest of all with reasonably high compression ratio.
-  private val level = conf.getInt("spark.io.compression.zstd.level", 1)
+  private val level = conf.get(IO_COMPRESSION_ZSTD_LEVEL)
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
     // Wrap the zstd output stream in a buffered output stream, so that we can
diff --git a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
index 4fde2d0beaa7..4ba41072c3cc 100644
--- a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
@@ -57,7 +57,7 @@ private[spark] abstract class MemoryManager(
 
   protected[this] val maxOffHeapMemory = conf.get(MEMORY_OFFHEAP_SIZE)
   protected[this] val offHeapStorageMemory =
-    (maxOffHeapMemory * conf.getDouble("spark.memory.storageFraction", 0.5)).toLong
+    (maxOffHeapMemory * conf.get(MEMORY_STORAGE_FRACTION)).toLong
 
   offHeapExecutionMemoryPool.incrementPoolSize(maxOffHeapMemory - offHeapStorageMemory)
   offHeapStorageMemoryPool.incrementPoolSize(offHeapStorageMemory)
diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
index a0fbbbdebd02..7282a83f0739 100644
--- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -203,7 +203,7 @@ object UnifiedMemoryManager {
       conf,
       maxHeapMemory = maxMemory,
       onHeapStorageRegionSize =
-        (maxMemory * conf.getDouble("spark.memory.storageFraction", 0.5)).toLong,
+        (maxMemory * conf.get(config.MEMORY_STORAGE_FRACTION)).toLong,
       numCores = numCores)
   }
 
@@ -230,7 +230,7 @@ object UnifiedMemoryManager {
       }
     }
     val usableMemory = systemMemory - reservedMemory
-    val memoryFraction = conf.getDouble("spark.memory.fraction", 0.6)
+    val memoryFraction = conf.get(config.MEMORY_FRACTION)
     (usableMemory * memoryFraction).toLong
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 6a25ee20b2c6..b2ed2d30aca9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -36,6 +36,7 @@ import org.apache.spark.Partitioner._
 import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.RDD_LIMIT_SCALE_UP_FACTOR
 import org.apache.spark.partial.BoundedDouble
 import org.apache.spark.partial.CountEvaluator
 import org.apache.spark.partial.GroupedCountEvaluator
@@ -1349,7 +1350,7 @@ abstract class RDD[T: ClassTag](
    * an exception if called on an RDD of `Nothing` or `Null`.
    */
   def take(num: Int): Array[T] = withScope {
-    val scaleUpFactor = Math.max(conf.getInt("spark.rdd.limit.scaleUpFactor", 4), 2)
+    val scaleUpFactor = Math.max(conf.get(RDD_LIMIT_SCALE_UP_FACTOR), 2)
     if (num == 0) {
       new Array[T](0)
     } else {
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
index 8273d8a9eb47..d165610291f1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.CHECKPOINT_COMPRESS
+import org.apache.spark.internal.config.{BUFFER_SIZE, CHECKPOINT_COMPRESS}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
@@ -176,7 +176,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
     val tempOutputPath =
       new Path(outputDir, s".$finalOutputName-attempt-${ctx.attemptNumber()}")
 
-    val bufferSize = env.conf.getInt("spark.buffer.size", 65536)
+    val bufferSize = env.conf.get(BUFFER_SIZE)
 
     val fileOutputStream = if (blockSize < 0) {
       val fileStream = fs.create(tempOutputPath, false, bufferSize)
@@ -222,7 +222,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
     sc: SparkContext, partitioner: Partitioner, checkpointDirPath: Path): Unit = {
     try {
       val partitionerFilePath = new Path(checkpointDirPath, checkpointPartitionerFileName)
-      val bufferSize = sc.conf.getInt("spark.buffer.size", 65536)
+      val bufferSize = sc.conf.get(BUFFER_SIZE)
       val fs = partitionerFilePath.getFileSystem(sc.hadoopConfiguration)
       val fileOutputStream = fs.create(partitionerFilePath, false, bufferSize)
       val serializer = SparkEnv.get.serializer.newInstance()
@@ -249,7 +249,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
       sc: SparkContext,
       checkpointDirPath: String): Option[Partitioner] = {
     try {
-      val bufferSize = sc.conf.getInt("spark.buffer.size", 65536)
+      val bufferSize = sc.conf.get(BUFFER_SIZE)
       val partitionerFilePath = new Path(checkpointDirPath, checkpointPartitionerFileName)
       val fs = partitionerFilePath.getFileSystem(sc.hadoopConfiguration)
       val fileInputStream = fs.open(partitionerFilePath, bufferSize)
@@ -287,7 +287,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
       context: TaskContext): Iterator[T] = {
     val env = SparkEnv.get
     val fs = path.getFileSystem(broadcastedConf.value.value)
-    val bufferSize = env.conf.getInt("spark.buffer.size", 65536)
+    val bufferSize = env.conf.get(BUFFER_SIZE)
     val fileInputStream = {
       val fileStream = fs.open(path, bufferSize)
       if (env.conf.get(CHECKPOINT_COMPRESS)) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 60e383afadf1..6480e87c477a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -26,6 +26,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
 import org.apache.spark.util.Utils
 
 /**
@@ -71,7 +72,7 @@ class UnionRDD[T: ClassTag](
 
   // visible for testing
   private[spark] val isPartitionListingParallel: Boolean =
-    rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10)
+    rdds.length > conf.get(RDD_PARALLEL_LISTING_THRESHOLD)
 
   override def getPartitions: Array[Partition] = {
     val parRDDs = if (isPartitionListingParallel) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 41f032ccf82b..1b42cb4d8207 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -1043,16 +1043,15 @@ private[spark] class TaskSetManager(
   }
 
   private def getLocalityWait(level: TaskLocality.TaskLocality): Long = {
-    val defaultWait = conf.get(config.LOCALITY_WAIT)
-    val localityWaitKey = level match {
-      case TaskLocality.PROCESS_LOCAL => "spark.locality.wait.process"
-      case TaskLocality.NODE_LOCAL => "spark.locality.wait.node"
-      case TaskLocality.RACK_LOCAL => "spark.locality.wait.rack"
+    val localityWait = level match {
+      case TaskLocality.PROCESS_LOCAL => config.LOCALITY_WAIT_PROCESS
+      case TaskLocality.NODE_LOCAL => config.LOCALITY_WAIT_NODE
+      case TaskLocality.RACK_LOCAL => config.LOCALITY_WAIT_RACK
       case _ => null
     }
 
-    if (localityWaitKey != null) {
-      conf.getTimeAsMs(localityWaitKey, defaultWait.toString)
+    if (localityWait != null) {
+      conf.get(localityWait)
     } else {
       0L
     }
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 1d4b05caaa14..1e233ca5c2a4 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer
 import scala.reflect.ClassTag
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.security.CryptoStreamUtils
 import org.apache.spark.storage._
@@ -63,11 +64,11 @@ private[spark] class SerializerManager(
   }
 
   // Whether to compress broadcast variables that are stored
-  private[this] val compressBroadcast = conf.getBoolean("spark.broadcast.compress", true)
+  private[this] val compressBroadcast = conf.get(config.BROADCAST_COMPRESS)
   // Whether to compress shuffle output that are stored
   private[this] val compressShuffle = conf.getBoolean("spark.shuffle.compress", true)
   // Whether to compress RDD partitions that are stored serialized
-  private[this] val compressRdds = conf.getBoolean("spark.rdd.compress", false)
+  private[this] val compressRdds = conf.get(config.RDD_COMPRESS)
   // Whether to compress shuffle output temporarily spilled to disk
   private[this] val compressShuffleSpill = conf.getBoolean("spark.shuffle.spill.compress", true)
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 27e2f98c58f0..37f5169d095d 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -50,8 +50,8 @@ private[spark] class BlockStoreShuffleReader[K, C](
       mapOutputTracker.getMapSizesByExecutorId(handle.shuffleId, startPartition, endPartition),
       serializerManager.wrapStream,
       // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility
-      SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024,
-      SparkEnv.get.conf.getInt("spark.reducer.maxReqsInFlight", Int.MaxValue),
+      SparkEnv.get.conf.get(config.REDUCER_MAX_SIZE_IN_FLIGHT) * 1024 * 1024,
+      SparkEnv.get.conf.get(config.REDUCER_MAX_REQS_IN_FLIGHT),
       SparkEnv.get.conf.get(config.REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS),
       SparkEnv.get.conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM),
       SparkEnv.get.conf.getBoolean("spark.shuffle.detectCorrupt", true),
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 1dfbc6effb34..5bfe778250b6 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -233,8 +233,7 @@ private[spark] class BlockManager(
     shuffleClient.init(appId)
 
     blockReplicationPolicy = {
-      val priorityClass = conf.get(
-        "spark.storage.replication.policy", classOf[RandomBlockReplicationPolicy].getName)
+      val priorityClass = conf.get(config.STORAGE_REPLICATION_POLICY)
       val clazz = Utils.classForName(priorityClass)
       val ret = clazz.getConstructor().newInstance().asInstanceOf[BlockReplicationPolicy]
       logInfo(s"Using $priorityClass for block replication policy")
@@ -1339,7 +1338,7 @@ private[spark] class BlockManager(
    */
   private def getPeers(forceFetch: Boolean): Seq[BlockManagerId] = {
     peerFetchLock.synchronized {
-      val cachedPeersTtl = conf.getInt("spark.storage.cachedPeersTtl", 60 * 1000) // milliseconds
+      val cachedPeersTtl = conf.get(config.STORAGE_CACHED_PEERS_TTL) // milliseconds
       val timeout = System.currentTimeMillis - lastPeerFetchTime > cachedPeersTtl
       if (cachedPeers == null || forceFetch || timeout) {
         cachedPeers = master.getPeers(blockManagerId).sortBy(_.hashCode)
@@ -1393,7 +1392,7 @@ private[spark] class BlockManager(
       classTag: ClassTag[_],
       existingReplicas: Set[BlockManagerId] = Set.empty): Unit = {
 
-    val maxReplicationFailures = conf.getInt("spark.storage.maxReplicationFailures", 1)
+    val maxReplicationFailures = conf.get(config.STORAGE_MAX_REPLICATION_FAILURE)
     val tLevel = StorageLevel(
       useDisk = level.useDisk,
       useMemory = level.useMemory,
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index f984cf76e346..f5d6029e445c 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -27,7 +27,7 @@ import scala.util.Random
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.BlockManagerMessages._
@@ -60,7 +60,7 @@ class BlockManagerMasterEndpoint(
 
   private val topologyMapper = {
     val topologyMapperClassName = conf.get(
-      "spark.storage.replication.topologyMapper", classOf[DefaultTopologyMapper].getName)
+      config.STORAGE_REPLICATION_TOPOLOGY_MAPPER)
     val clazz = Utils.classForName(topologyMapperClassName)
     val mapper =
       clazz.getConstructor(classOf[SparkConf]).newInstance(conf).asInstanceOf[TopologyMapper]
@@ -68,7 +68,7 @@ class BlockManagerMasterEndpoint(
     mapper
   }
 
-  val proactivelyReplicate = conf.get("spark.storage.replication.proactive", "false").toBoolean
+  val proactivelyReplicate = conf.get(config.STORAGE_REPLICATION_PROACTIVE)
 
   logInfo("BlockManagerMasterEndpoint up")
 
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 29963a95cb07..36cbaebfcc32 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -45,7 +45,7 @@ private[spark] class DiskStore(
     diskManager: DiskBlockManager,
     securityManager: SecurityManager) extends Logging {
 
-  private val minMemoryMapBytes = conf.getSizeAsBytes("spark.storage.memoryMapThreshold", "2m")
+  private val minMemoryMapBytes = conf.get(config.STORAGE_MEMORY_MAP_THRESHOLD)
   private val maxMemoryMapBytes = conf.get(config.MEMORY_MAP_LIMIT_FOR_TESTS)
   private val blockSizes = new ConcurrentHashMap[BlockId, Long]()
 
diff --git a/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala b/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala
index a150a8e3636e..3c2c4b46dc4c 100644
--- a/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.util.Utils
 
 /**
@@ -68,7 +68,7 @@ class DefaultTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with L
  */
 @DeveloperApi
 class FileBasedTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with Logging {
-  val topologyFile = conf.getOption("spark.storage.replication.topologyFile")
+  val topologyFile = conf.get(config.STORAGE_REPLICATION_TOPOLOGY_FILE)
   require(topologyFile.isDefined, "Please specify topology file via " +
     "spark.storage.replication.topologyFile for FileBasedTopologyMapper.")
   val topologyMap = Utils.getPropertiesFromFile(topologyFile.get)
diff --git a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
index 8513359934be..375d05be74bf 100644
--- a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
@@ -29,7 +29,7 @@ import com.google.common.io.ByteStreams
 
 import org.apache.spark.{SparkConf, TaskContext}
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.{UNROLL_MEMORY_CHECK_PERIOD, UNROLL_MEMORY_GROWTH_FACTOR}
+import org.apache.spark.internal.config.{STORAGE_UNROLL_MEMORY_THRESHOLD, UNROLL_MEMORY_CHECK_PERIOD, UNROLL_MEMORY_GROWTH_FACTOR}
 import org.apache.spark.memory.{MemoryManager, MemoryMode}
 import org.apache.spark.serializer.{SerializationStream, SerializerManager}
 import org.apache.spark.storage._
@@ -100,7 +100,7 @@ private[spark] class MemoryStore(
 
   // Initial memory to request before unrolling any block
   private val unrollMemoryThreshold: Long =
-    conf.getLong("spark.storage.unrollMemoryThreshold", 1024 * 1024)
+    conf.get(STORAGE_UNROLL_MEMORY_THRESHOLD)
 
   /** Total amount of memory available for storage, in bytes. */
   private def maxMemory: Long = {
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 21050e44414f..17ffc7bd36c6 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -87,7 +87,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
   }
 
   test("groupByKey where map output sizes exceed maxMbInFlight") {
-    val conf = new SparkConf().set("spark.reducer.maxSizeInFlight", "1m")
+    val conf = new SparkConf().set(config.REDUCER_MAX_SIZE_IN_FLIGHT.key, "1m")
     sc = new SparkContext(clusterUrl, "test", conf)
     // This data should be around 20 MB, so even with 4 mappers and 2 reducers, each map output
     // file should be about 2.5 MB
@@ -217,8 +217,9 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
   test("compute without caching when no partitions fit in memory") {
     val size = 10000
     val conf = new SparkConf()
-      .set("spark.storage.unrollMemoryThreshold", "1024")
+      .set(config.STORAGE_UNROLL_MEMORY_THRESHOLD, 1024L)
       .set(TEST_MEMORY, size.toLong / 2)
+
     sc = new SparkContext(clusterUrl, "test", conf)
     val data = sc.parallelize(1 to size, 2).persist(StorageLevel.MEMORY_ONLY)
     assert(data.count() === size)
@@ -233,8 +234,9 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     val size = 10000
     val numPartitions = 20
     val conf = new SparkConf()
-      .set("spark.storage.unrollMemoryThreshold", "1024")
+      .set(config.STORAGE_UNROLL_MEMORY_THRESHOLD, 1024L)
       .set(TEST_MEMORY, size.toLong)
+
     sc = new SparkContext(clusterUrl, "test", conf)
     val data = sc.parallelize(1 to size, numPartitions).persist(StorageLevel.MEMORY_ONLY)
     assert(data.count() === size)
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 9a6abbdb0a46..a8849ab9e7ce 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -290,12 +290,12 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     // set the conf in the deprecated way
     conf.set("spark.io.compression.lz4.block.size", "12345")
     // get the conf in the recommended way
-    assert(conf.get("spark.io.compression.lz4.blockSize") === "12345")
+    assert(conf.get(IO_COMPRESSION_LZ4_BLOCKSIZE.key) === "12345")
     // we can still get the conf in the deprecated way
     assert(conf.get("spark.io.compression.lz4.block.size") === "12345")
     // the contains() also works as expected
     assert(conf.contains("spark.io.compression.lz4.block.size"))
-    assert(conf.contains("spark.io.compression.lz4.blockSize"))
+    assert(conf.contains(IO_COMPRESSION_LZ4_BLOCKSIZE.key))
     assert(conf.contains("spark.io.unknown") === false)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 6976464e8ab5..6d748126f641 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -24,6 +24,7 @@ import scala.util.Random
 import org.scalatest.Assertions
 
 import org.apache.spark._
+import org.apache.spark.internal.config
 import org.apache.spark.io.SnappyCompressionCodec
 import org.apache.spark.rdd.RDD
 import org.apache.spark.security.EncryptionFunSuite
@@ -68,7 +69,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
   encryptionTest("Accessing TorrentBroadcast variables in a local cluster") { conf =>
     val numSlaves = 4
     conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    conf.set("spark.broadcast.compress", "true")
+    conf.set(config.BROADCAST_COMPRESS, true)
     sc = new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test", conf)
     val list = List[Int](1, 2, 3, 4)
     val broadcast = sc.broadcast(list)
@@ -145,7 +146,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
   encryptionTest("Cache broadcast to disk") { conf =>
     conf.setMaster("local")
       .setAppName("test")
-      .set("spark.memory.storageFraction", "0.0")
+      .set(config.MEMORY_STORAGE_FRACTION, 0.0)
     sc = new SparkContext(conf)
     val list = List[Int](1, 2, 3, 4)
     val broadcast = sc.broadcast(list)
@@ -172,7 +173,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
     val conf = new SparkConf()
       .setMaster("local[4]")
       .setAppName("test")
-      .set("spark.memory.storageFraction", "0.0")
+      .set(config.MEMORY_STORAGE_FRACTION, 0.0)
 
     sc = new SparkContext(conf)
     val list = List[Int](1, 2, 3, 4)
diff --git a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
index 8556e920daeb..0a689f81a576 100644
--- a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
@@ -43,10 +43,10 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
       maxOnHeapExecutionMemory: Long,
       maxOffHeapExecutionMemory: Long): UnifiedMemoryManager = {
     val conf = new SparkConf()
-      .set("spark.memory.fraction", "1")
+      .set(MEMORY_FRACTION, 1.0)
       .set(TEST_MEMORY, maxOnHeapExecutionMemory)
       .set(MEMORY_OFFHEAP_SIZE, maxOffHeapExecutionMemory)
-      .set("spark.memory.storageFraction", storageFraction.toString)
+      .set(MEMORY_STORAGE_FRACTION, storageFraction)
     UnifiedMemoryManager(conf, numCores = 1)
   }
 
@@ -223,9 +223,10 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
     val reservedMemory = 300L * 1024
     val memoryFraction = 0.8
     val conf = new SparkConf()
-      .set("spark.memory.fraction", memoryFraction.toString)
+      .set(MEMORY_FRACTION, memoryFraction)
       .set(TEST_MEMORY, systemMemory)
       .set(TEST_RESERVED_MEMORY, reservedMemory)
+
     val mm = UnifiedMemoryManager(conf, numCores = 1)
     val expectedMaxMemory = ((systemMemory - reservedMemory) * memoryFraction).toLong
     assert(mm.maxHeapMemory === expectedMaxMemory)
@@ -243,9 +244,10 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
     val reservedMemory = 300L * 1024
     val memoryFraction = 0.8
     val conf = new SparkConf()
-      .set("spark.memory.fraction", memoryFraction.toString)
+      .set(MEMORY_FRACTION, memoryFraction)
       .set(TEST_MEMORY, systemMemory)
       .set(TEST_RESERVED_MEMORY, reservedMemory)
+
     val mm = UnifiedMemoryManager(conf, numCores = 1)
 
     // Try using an executor memory that's too small
@@ -258,9 +260,10 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
 
   test("execution can evict cached blocks when there are multiple active tasks (SPARK-12155)") {
     val conf = new SparkConf()
-      .set("spark.memory.fraction", "1")
-      .set("spark.memory.storageFraction", "0")
+      .set(MEMORY_FRACTION, 1.0)
+      .set(MEMORY_STORAGE_FRACTION, 0.0)
       .set(TEST_MEMORY, 1000L)
+
     val mm = UnifiedMemoryManager(conf, numCores = 2)
     val ms = makeMemoryStore(mm)
     val memoryMode = MemoryMode.ON_HEAP
@@ -284,9 +287,10 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
 
   test("SPARK-15260: atomically resize memory pools") {
     val conf = new SparkConf()
-      .set("spark.memory.fraction", "1")
-      .set("spark.memory.storageFraction", "0")
+      .set(MEMORY_FRACTION, 1.0)
+      .set(MEMORY_STORAGE_FRACTION, 0.0)
       .set(TEST_MEMORY, 1000L)
+
     val mm = UnifiedMemoryManager(conf, numCores = 2)
     makeBadMemoryStore(mm)
     val memoryMode = MemoryMode.ON_HEAP
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 2227698cf1ad..e957340826e8 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -29,6 +29,7 @@ import org.apache.hadoop.mapred.{FileSplit, TextInputFormat}
 
 import org.apache.spark._
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
+import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
 import org.apache.spark.rdd.RDDSuiteUtils._
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -136,10 +137,10 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext {
 
     assert(serialUnion.asInstanceOf[UnionRDD[Int]].isPartitionListingParallel === false)
 
-    sc.conf.set("spark.rdd.parallelListingThreshold", "1")
+    sc.conf.set(RDD_PARALLEL_LISTING_THRESHOLD, 1)
     val parallelUnion = sc.union(nums1, nums2)
     val actual = parallelUnion.collect().toList
-    sc.conf.remove("spark.rdd.parallelListingThreshold")
+    sc.conf.remove(RDD_PARALLEL_LISTING_THRESHOLD.key)
 
     assert(parallelUnion.asInstanceOf[UnionRDD[Int]].isPartitionListingParallel === true)
     assert(expected === actual)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
index 2215f7f36621..aadddca3a560 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
@@ -85,7 +85,7 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
     extraConfs = Seq(
        config.BLACKLIST_ENABLED.key -> "true",
       // just to avoid this test taking too long
-      "spark.locality.wait" -> "10ms"
+      config.LOCALITY_WAIT.key -> "10ms"
     )
   ) {
     val rdd = new MockRDDWithLocalityPrefs(sc, 10, Nil, badHost)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index 04987e6ef79e..c0cb158e03d8 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -533,7 +533,7 @@ object EventLoggingListenerSuite {
     conf.set(EVENT_LOG_DIR, logDir.toString)
     compressionCodec.foreach { codec =>
       conf.set(EVENT_LOG_COMPRESS, true)
-      conf.set("spark.io.compression.codec", codec)
+      conf.set(IO_COMPRESSION_CODEC, codec)
     }
     conf.set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
     conf
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 9c555a923d62..85c87b95f7c2 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -1011,7 +1011,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("Locality should be used for bulk offers even with delay scheduling off") {
     val conf = new SparkConf()
-      .set("spark.locality.wait", "0")
+      .set(config.LOCALITY_WAIT.key, "0")
     sc = new SparkContext("local", "TaskSchedulerImplSuite", conf)
     // we create a manual clock just so we can be sure the clock doesn't advance at all in this test
     val clock = new ManualClock()
@@ -1058,7 +1058,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("With delay scheduling off, tasks can be run at any locality level immediately") {
     val conf = new SparkConf()
-      .set("spark.locality.wait", "0")
+      .set(config.LOCALITY_WAIT.key, "0")
     sc = new SparkContext("local", "TaskSchedulerImplSuite", conf)
 
     // we create a manual clock just so we can be sure the clock doesn't advance at all in this test
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index f9dfd2c456c5..502a013193aa 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -166,7 +166,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
   private val conf = new SparkConf
 
-  val LOCALITY_WAIT_MS = conf.getTimeAsMs("spark.locality.wait", "3s")
+  val LOCALITY_WAIT_MS = conf.get(config.LOCALITY_WAIT)
   val MAX_TASK_FAILURES = 4
 
   var sched: FakeTaskScheduler = null
@@ -429,7 +429,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
       set(config.BLACKLIST_ENABLED, true).
       set(config.BLACKLIST_TIMEOUT_CONF, rescheduleDelay).
       // don't wait to jump locality levels in this test
-      set("spark.locality.wait", "0")
+      set(config.LOCALITY_WAIT.key, "0")
 
     sc = new SparkContext("local", "test", conf)
     // two executors on same host, one on different.
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
index 43621cb85762..9d71647d082c 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
@@ -24,6 +24,7 @@ import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.executor.{ShuffleWriteMetrics, TaskMetrics}
+import org.apache.spark.internal.config.MEMORY_FRACTION
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory._
 import org.apache.spark.unsafe.Platform
@@ -36,7 +37,8 @@ class ShuffleExternalSorterSuite extends SparkFunSuite with LocalSparkContext wi
       .setAppName("ShuffleExternalSorterSuite")
       .set(IS_TESTING, true)
       .set(TEST_MEMORY, 1600L)
-      .set("spark.memory.fraction", "1")
+      .set(MEMORY_FRACTION, 1.0)
+
     sc = new SparkContext(conf)
 
     val memoryManager = UnifiedMemoryManager(conf, 1)
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 1070e8753f4b..10855bf59c3a 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -85,18 +85,17 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
 
   before {
     rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
-
     conf.set(NETWORK_AUTH_ENABLED, false)
     conf.set(DRIVER_PORT, rpcEnv.address.port)
     conf.set(IS_TESTING, true)
-    conf.set("spark.memory.fraction", "1")
-    conf.set("spark.memory.storageFraction", "1")
-    conf.set("spark.storage.unrollMemoryThreshold", "512")
+    conf.set(MEMORY_FRACTION, 1.0)
+    conf.set(MEMORY_STORAGE_FRACTION, 1.0)
+    conf.set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
 
     // to make a replication attempt to inactive store fail fast
     conf.set("spark.core.connection.ack.wait.timeout", "1s")
     // to make cached peers refresh frequently
-    conf.set("spark.storage.cachedPeersTtl", "10")
+    conf.set(STORAGE_CACHED_PEERS_TTL, 10)
 
     sc = new SparkContext("local", "test", conf)
     master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager",
@@ -424,8 +423,8 @@ class BlockManagerReplicationSuite extends BlockManagerReplicationBehavior {
 class BlockManagerProactiveReplicationSuite extends BlockManagerReplicationBehavior {
   val conf = new SparkConf(false).set("spark.app.id", "test")
   conf.set("spark.kryoserializer.buffer", "1m")
-  conf.set("spark.storage.replication.proactive", "true")
-  conf.set("spark.storage.exceptionOnPinLeak", "true")
+  conf.set(STORAGE_REPLICATION_PROACTIVE, true)
+  conf.set(STORAGE_EXCEPTION_PIN_LEAK, true)
 
   (2 to 5).foreach { i =>
     test(s"proactive block replication - $i replicas - ${i - 1} block manager deletions") {
@@ -500,10 +499,10 @@ class BlockManagerBasicStrategyReplicationSuite extends BlockManagerReplicationB
   val conf: SparkConf = new SparkConf(false).set("spark.app.id", "test")
   conf.set("spark.kryoserializer.buffer", "1m")
   conf.set(
-    "spark.storage.replication.policy",
+    STORAGE_REPLICATION_POLICY,
     classOf[BasicBlockReplicationPolicy].getName)
   conf.set(
-    "spark.storage.replication.topologyMapper",
+    STORAGE_REPLICATION_TOPOLOGY_MAPPER,
     classOf[DummyTopologyMapper].getName)
 }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index c23264191e12..cafd980a6233 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -117,10 +117,10 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     conf = new SparkConf(false)
       .set("spark.app.id", "test")
       .set(IS_TESTING, true)
-      .set("spark.memory.fraction", "1")
-      .set("spark.memory.storageFraction", "1")
+      .set(MEMORY_FRACTION, 1.0)
+      .set(MEMORY_STORAGE_FRACTION, 1.0)
       .set("spark.kryoserializer.buffer", "1m")
-      .set("spark.storage.unrollMemoryThreshold", "512")
+      .set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
 
     rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
     conf.set(DRIVER_PORT, rpcEnv.address.port)
@@ -854,7 +854,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       store.stop()
       store = null
 
-      conf.set("spark.broadcast.compress", "true")
+      conf.set(BROADCAST_COMPRESS, true)
       store = makeBlockManager(20000, "exec3")
       store.putSingle(
         BroadcastBlockId(0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
@@ -863,7 +863,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       store.stop()
       store = null
 
-      conf.set("spark.broadcast.compress", "false")
+      conf.set(BROADCAST_COMPRESS, false)
       store = makeBlockManager(20000, "exec4")
       store.putSingle(
         BroadcastBlockId(0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
@@ -871,14 +871,14 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       store.stop()
       store = null
 
-      conf.set("spark.rdd.compress", "true")
+      conf.set(RDD_COMPRESS, true)
       store = makeBlockManager(20000, "exec5")
       store.putSingle(rdd(0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(rdd(0, 0)) <= 1000, "rdd_0_0 was not compressed")
       store.stop()
       store = null
 
-      conf.set("spark.rdd.compress", "false")
+      conf.set(RDD_COMPRESS, false)
       store = makeBlockManager(20000, "exec6")
       store.putSingle(rdd(0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(rdd(0, 0)) >= 10000, "rdd_0_0 was compressed")
@@ -893,8 +893,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       store = null
     } finally {
       System.clearProperty("spark.shuffle.compress")
-      System.clearProperty("spark.broadcast.compress")
-      System.clearProperty("spark.rdd.compress")
+      System.clearProperty(BROADCAST_COMPRESS.key)
+      System.clearProperty(RDD_COMPRESS.key)
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
index 6f60b08088cd..97b9c973e97f 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
@@ -39,7 +39,7 @@ class DiskStoreSuite extends SparkFunSuite {
     // It will cause error when we tried to re-open the filestore and the
     // memory-mapped byte buffer tot he file has not been GC on Windows.
     assume(!Utils.isWindows)
-    val confKey = "spark.storage.memoryMapThreshold"
+    val confKey = config.STORAGE_MEMORY_MAP_THRESHOLD.key
 
     // Create a non-trivial (not all zeros) byte array
     val bytes = Array.tabulate[Byte](1000)(_.toByte)
diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
index 7cdcd0fea2ed..5595ce4d8404 100644
--- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
@@ -26,6 +26,7 @@ import scala.reflect.ClassTag
 import org.scalatest._
 
 import org.apache.spark._
+import org.apache.spark.internal.config.STORAGE_UNROLL_MEMORY_THRESHOLD
 import org.apache.spark.memory.{MemoryMode, UnifiedMemoryManager}
 import org.apache.spark.serializer.{KryoSerializer, SerializerManager}
 import org.apache.spark.storage.memory.{BlockEvictionHandler, MemoryStore, PartiallySerializedBlock, PartiallyUnrolledIterator}
@@ -39,7 +40,7 @@ class MemoryStoreSuite
   with ResetSystemProperties {
 
   var conf: SparkConf = new SparkConf(false)
-    .set("spark.storage.unrollMemoryThreshold", "512")
+    .set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
 
   // Reuse a serializer across tests to avoid creating a new thread-local buffer on each test
   val serializer = new KryoSerializer(new SparkConf(false).set("spark.kryoserializer.buffer", "1m"))
diff --git a/core/src/test/scala/org/apache/spark/storage/TopologyMapperSuite.scala b/core/src/test/scala/org/apache/spark/storage/TopologyMapperSuite.scala
index bbd252d7be7e..0bc26adeeb44 100644
--- a/core/src/test/scala/org/apache/spark/storage/TopologyMapperSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/TopologyMapperSuite.scala
@@ -22,6 +22,7 @@ import java.io.{File, FileOutputStream}
 import org.scalatest.{BeforeAndAfter, Matchers}
 
 import org.apache.spark._
+import org.apache.spark.internal.config.STORAGE_REPLICATION_TOPOLOGY_FILE
 import org.apache.spark.util.Utils
 
 class TopologyMapperSuite  extends SparkFunSuite
@@ -36,7 +37,7 @@ class TopologyMapperSuite  extends SparkFunSuite
     val propsFile = createPropertiesFile(props)
 
     val sparkConf = (new SparkConf(false))
-    sparkConf.set("spark.storage.replication.topologyFile", propsFile.getAbsolutePath)
+    sparkConf.set(STORAGE_REPLICATION_TOPOLOGY_FILE, propsFile.getAbsolutePath)
     val topologyMapper = new FileBasedTopologyMapper(sparkConf)
 
     props.foreach {case (host, topology) =>
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 1e0399809ba8..5efbf4e6280c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -58,7 +58,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
     conf.set("spark.shuffle.spill.compress", codec.isDefined.toString)
     conf.set("spark.shuffle.compress", codec.isDefined.toString)
-    codec.foreach { c => conf.set("spark.io.compression.codec", c) }
+    codec.foreach { c => conf.set(IO_COMPRESSION_CODEC, c) }
     // Ensure that we actually have multiple batches per spill file
     conf.set("spark.shuffle.spill.batchSize", "10")
     conf

From 8a54492149180b57b042e3406fe4b1e53df97291 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 15 Jan 2019 11:23:38 -0800
Subject: [PATCH 0296/1072] [SPARK-25857][CORE] Add developer documentation
 regarding delegation tokens.

Closes #23348 from vanzin/SPARK-25857.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/deploy/security/README.md    | 249 ++++++++++++++++++
 1 file changed, 249 insertions(+)
 create mode 100644 core/src/main/scala/org/apache/spark/deploy/security/README.md

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/README.md b/core/src/main/scala/org/apache/spark/deploy/security/README.md
new file mode 100644
index 000000000000..c3ef60a231f0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/security/README.md
@@ -0,0 +1,249 @@
+# Delegation Token Handling In Spark
+
+This document aims to explain and demystify delegation tokens as they are used by Spark, since
+this topic is generally a huge source of confusion.
+
+
+## What are delegation tokens and why use them?
+
+Delegation tokens (DTs from now on) are authentication tokens used by some services to replace
+Kerberos service tokens. Many services in the Hadoop ecosystem have support for DTs, since they
+have some very desirable advantages over Kerberos tokens:
+
+* No need to distribute Kerberos credentials
+
+In a distributed application, distributing Kerberos credentials is tricky. Not all users have
+keytabs, and when they do, it's generally frowned upon to distribute them over the network as
+part of application data.
+
+DTs allow for a single place (e.g. the Spark driver) to require Kerberos credentials. That entity
+can then distribute the DTs to other parts of the distributed application (e.g. Spark executors),
+so they can authenticate to services.
+
+* A single token per service is used for authentication
+
+If Kerberos authentication were used, each client connection to a server would require a trip
+to the KDC and generation of a service ticket. In a distributed system, the number of service
+tickets can balloon pretty quickly when you think about the number of client processes (e.g. Spark
+executors) vs. the number of service processes (e.g. HDFS DataNodes). That generates unnecessary
+extra load on the KDC, and may even run into usage limits set up by the KDC admin.
+
+* DTs are only used for authentication
+
+DTs, unlike TGTs, can only be used to authenticate to the specific service for which they were
+issued. You cannot use an existing DT to create new DTs or to create DTs for a different service.
+
+So in short, DTs are *not* Kerberos tokens. They are used by many services to replace Kerberos
+authentication, or even other forms of authentication, although there is nothing (aside from
+maybe implementation details) that ties them to Kerberos or any other authentication mechanism.
+
+
+## Lifecycle of DTs
+
+DTs, unlike Kerberos tokens, are service-specific. There is no centralized location you contact
+to create a DT for a service. So, the first step needed to get a DT is being able to authenticate
+to the service in question. In the Hadoop ecosystem, that is generally done using Kerberos.
+
+This requires Kerberos credentials to be available somewhere for the application to use. The user
+is generally responsible for providing those credentials, which is most commonly done by logging
+in to the KDC (e.g. using "kinit"). That generates a (Kerberos) "token cache" containing a TGT
+(ticket granting ticket), which can then be used to request service tickets.
+
+There are other ways of obtaining TGTs, but, ultimately, you need a TGT to bootstrap the process.
+
+Once a TGT is available, the target service's client library can then be used to authenticate
+to the service using the Kerberos credentials, and request the creation of a delegation token.
+This token can now be sent to other processes and used to authenticate to different daemons
+belonging to that service.
+
+And thus the first drawback of DTs becomes apparent: you need service-specific logic to create and
+use them.
+
+Spark implements a (somewhat) pluggable, internal DT creation API. Support for new services can be
+added by implementing a `HadoopDelegationTokenProvider` that is then called by Spark when generating
+delegation tokens for an application. Spark makes the DTs available to code by stashing them in the
+`UserGroupInformation` credentials, and it's up to the DT provider and the respective client library
+to agree on how to use those tokens.
+
+Once they are created, the semantics of how DTs operate are also service-specific. But, in general,
+they try to follow the semantics of Kerberos tokens:
+
+* A "renewable period (equivalent to TGT's "lifetime") which is for how long the DT is valid
+  before it requires renewal.
+* A "max lifetime" (equivalent to TGT's "renewable life") which is for how long the DT can be
+  renewed.
+
+Once the token reaches its "max lifetime", a new one needs to be created by contacting the
+appropriate service, restarting the above process.
+
+
+## DT Renewal, Renewers, and YARN
+
+This is the most confusing part of DT handling, and part of it is because much of the system was
+designed with MapReduce, and later YARN, in mind.
+
+As seen above, DTs need to be renewed periodically until they finally expire for good. An example of
+this is the default configuration of HDFS services: delegation tokens are valid for up to 7 days,
+and need to be renewed every 24 hours. If 24 hours pass without the token being renewed, the token
+cannot be used anymore. And the token cannot be renewed anymore after 7 days.
+
+This raises the question: who renews tokens? And for a long time the answer was YARN.
+
+When YARN applications are submitted, a set of DTs is also submitted with them. YARN takes care
+of distributing these tokens to containers (using conventions set by the `UserGroupInformation`
+API) and, also, keeping them renewed while the app is running. These tokens are used not just
+by the application; they are also used by YARN itself to implement features like log collection
+and aggregation.
+
+But this has a few caveats.
+
+
+1. Who renews the tokens?
+
+This is handled mostly transparently by the Hadoop libraries in the case of YARN. Some services have
+the concept of a token "renewer". This "renewer" is the name of the principal that is allowed to
+renew the DT. When submitting to YARN, that will be the principal that the YARN service is running
+as, which means that the client application needs to know that information.
+
+For other resource managers, the renewer mostly does not matter, since there is no service that
+is doing the renewal. Except that it sometimes leaks into library code, such as in SPARK-20328.
+
+
+2. What tokens are renewed?
+
+This is probably the biggest caveat.
+
+As discussed in the previous section, DTs are service-specific, and require service-specific
+libraries for creation *and* renewal. This means that for YARN to be able to renew application
+tokens, YARN needs:
+
+* The client libraries for all the services the application is using
+* Information about how to connect to the services the application is using
+* Permissions to connect to those services
+
+In reality, though, most of the time YARN has access to a single HDFS cluster, and that will be
+the extent of its DT renewal features. Any other tokens sent to YARN will be distributed to
+containers, but will not be renewed.
+
+This means that those tokens will expire way before their max lifetime, unless some other code
+takes care of renewing them.
+
+Also, not all client libraries even implement token renewal. To use the example of a service
+supported by Spark, the `renew()` method of HBase tokens is a no-op. So the only way to "renew" an
+HBase token is to create a new one.
+
+
+3. What happens when tokens expire for good?
+
+The final caveat is that DTs have a maximum life, regardless of renewal. And after that deadline
+is met, you need to create new tokens to be able to connect to the services. That means you need
+the ability to connect to the service without a delegation token, which requires some form of
+authentication aside from DTs.
+
+This is especially important for long-running applications that run unsupervised. They must be
+able to keep on going without having someone logging into a terminal and typing a password every
+few days.
+
+
+## DT Renewal in Spark
+
+Because of the issues explained above, Spark implements a different way of doing renewal. Spark's
+solution is a compromise: it targets the lowest common denominator, which is services like HBase
+that do not support actual token renewal.
+
+In Spark, DT "renewal" is enabled by giving the application a Kerberos keytab. A keytab is
+basically your Kerberos password written into a plain text file, which is why it's so sensitive:
+if anyone is able to get hold of that keytab file, they'll be able to authenticate to any service
+as that user, for as long as the credentials stored in the keytab remain valid in the KDC.
+
+By having the keytab, Spark can indefinitely maintain a valid Kerberos TGT.
+
+With Kerberos credentials available, Spark will create new DTs for the configured services as old
+ones expire. So Spark doesn't renew tokens as explained in the previous section: it will create new
+tokens at every renewal interval instead, and distribute those tokens to executors.
+
+This also has another advantage on top of supporting services like HBase: it removes the dependency
+on an external renewal service (like YARN). That way, Spark's renewal feature can be used with
+resource managers that are not DT-aware, such as Mesos or Kubernetes, as long as the application
+has access to a keytab.
+
+
+## DTs and Proxy Users
+
+"Proxy users" is Hadoop-speak for impersonation. It allows user A to impersonate user B when
+connecting to a service, if that service allows it.
+
+Spark allows impersonation when submitting applications, so that the whole application runs as
+user B in the above example.
+
+Spark does not allow token renewal when impersonation is on. Impersonation was added in Spark
+as a means for services (like Hive or Oozie) to start Spark applications on behalf of users.
+That means that those services would provide the Spark launcher code with privileged credentials
+and, potentially, user code that will run when the application starts. The user code is not
+necessarily under control of the service.
+
+In that situation, the service credentials should never be made available to the Spark application,
+since that would be tantamount to giving your service credentials to unprivileged users.
+
+The above also implies that running impersonated applications in client mode can be a security
+concern, since arbitrary user code would have access to the same local content as the privileged
+user. But unlike token renewal, Spark does not prevent that configuration from running.
+
+When impersonating, the Spark launcher will create DTs for the "proxy" user. In the example
+used above, that means that when code authenticates to a service using the DTs, the authenticated
+user will be "B", not "A".
+
+Note that "proxy user" is a very Hadoop-specific concept. It does not apply to OS users (which
+is why the client-mode case is an issue) and to services that do not authenticate using Hadoop's
+`UserGroupInformation` system. It is generally used in the context of YARN - since an application
+submitted as a proxy user will run as that particular user in the YARN cluster, obeying any
+Hadoop-to-local-OS-user mapping configured for the service. But the overall support should work
+for connecting to other services even when YARN is not being used.
+
+Also, if writing a new DT provider in Spark, be aware that providers need to explicitly handle
+impersonation. If a service does not support impersonation, the provider should either error out or
+not generate tokens, depending on what makes more sense in the context.
+
+
+## Externally Generated DTs
+
+Spark uses the `UserGroupInformation` API to manage the Hadoop credentials. That means that Spark
+inherits the feature of loading DTs automatically from a file. The Hadoop classes will load the
+token cache pointed at by the `HADOOP_TOKEN_FILE_LOCATION` environment variable, when it's defined.
+
+In this situation, Spark will not create DTs for the services that already have tokens in the
+cache. It may try to get delegation tokens for other services if Kerberos credentials are also
+provided.
+
+This feature is mostly used by services that start Spark on behalf of users. Regular users do not
+generally use this feature, given it would require them to figure out how to get those tokens
+outside of Spark.
+
+
+## Limitations of DT support in Spark
+
+There are certain limitations to bear in mind when talking about DTs in Spark.
+
+The first one is that not all DTs actually expose their renewal period. This is generally a
+service configuration that is not generally exposed to clients. For this reason, certain DT
+providers cannot provide a renewal period to the Spark code, thus requiring that the service's
+configuration is in some way synchronized with another one that does provide that information.
+
+The HDFS service, which is generally available when DTs are needed in the first place, provides
+that information, so in general it's a good idea for all services using DTs to use the same
+configuration as HDFS for the renewal period.
+
+The second one is that Spark doesn't always know what delegation tokens will be needed. For
+example, when submitting an application in cluster mode without a keytab, the launcher needs
+to create DTs without knowing what the application code will actually be doing. This means that
+Spark will try to get as many delegation tokens as is possible based on the configuration
+available. That means that if an HBase configuration is available to the launcher but the app
+doesn't actually use HBase, a DT will still be generated. The user would have to explicitly
+opt out of generating HBase tokens in that case.
+
+The third one is that it's hard to create DTs "as needed". Without being able to authenticate
+to specific services, Spark cannot create DTs, which means that applications submitted in cluster
+mode like the above need DTs to be created up front, instead of on demand.
+
+The advantage, though, is that user code does not need to worry about DTs, since Spark will handle
+them transparently when the proper configuration is available.

From 1b75f3bcfff9cb82995344a0b2aef9d184437613 Mon Sep 17 00:00:00 2001
From: Devaraj K <devaraj@apache.org>
Date: Tue, 15 Jan 2019 15:45:20 -0600
Subject: [PATCH 0297/1072] [SPARK-17928][MESOS] No driver.memoryOverhead
 setting for mesos cluster mode

## What changes were proposed in this pull request?

Added a new configuration 'spark.mesos.driver.memoryOverhead' for providing the driver memory overhead in mesos cluster mode.

## How was this patch tested?
Verified it manually, Resource Scheduler allocates (drivermemory+ driver memoryOverhead) for driver in mesos cluster mode.

Closes #17726 from devaraj-kavali/SPARK-17928.

Authored-by: Devaraj K <devaraj@apache.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/configuration.md                         |  2 +-
 .../deploy/rest/mesos/MesosRestServer.scala   | 11 ++-
 .../rest/mesos/MesosRestServerSuite.scala     | 85 +++++++++++++++++++
 3 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala

diff --git a/docs/configuration.md b/docs/configuration.md
index 3c383eed04d8..7d3bbf93ae96 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -169,7 +169,7 @@ of the most common options to set are:
     The amount of off-heap memory to be allocated per driver in cluster mode, in MiB unless
     otherwise specified. This is memory that accounts for things like VM overheads, interned strings, 
     other native overheads, etc. This tends to grow with the container size (typically 6-10%). 
-    This option is currently supported on YARN and Kubernetes.
+    This option is currently supported on YARN, Mesos and Kubernetes.
   </td>
 </tr>
 <tr>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
index a4aba3e9c0d0..3ff68348be7b 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
@@ -68,6 +68,10 @@ private[mesos] class MesosSubmitRequestServlet(
   private def newDriverId(submitDate: Date): String =
     f"driver-${createDateFormat.format(submitDate)}-${nextDriverNumber.incrementAndGet()}%04d"
 
+  // These defaults copied from YARN
+  private val MEMORY_OVERHEAD_FACTOR = 0.10
+  private val MEMORY_OVERHEAD_MIN = 384
+
   /**
    * Build a driver description from the fields specified in the submit request.
    *
@@ -98,6 +102,7 @@ private[mesos] class MesosSubmitRequestServlet(
     val driverExtraLibraryPath = sparkProperties.get(config.DRIVER_LIBRARY_PATH.key)
     val superviseDriver = sparkProperties.get(config.DRIVER_SUPERVISE.key)
     val driverMemory = sparkProperties.get(config.DRIVER_MEMORY.key)
+    val driverMemoryOverhead = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD.key)
     val driverCores = sparkProperties.get(config.DRIVER_CORES.key)
     val name = request.sparkProperties.getOrElse("spark.app.name", mainClass)
 
@@ -112,13 +117,15 @@ private[mesos] class MesosSubmitRequestServlet(
       mainClass, appArgs, environmentVariables, extraClassPath, extraLibraryPath, javaOpts)
     val actualSuperviseDriver = superviseDriver.map(_.toBoolean).getOrElse(DEFAULT_SUPERVISE)
     val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY)
+    val actualDriverMemoryOverhead = driverMemoryOverhead.map(_.toInt).getOrElse(
+      math.max((MEMORY_OVERHEAD_FACTOR * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN))
     val actualDriverCores = driverCores.map(_.toDouble).getOrElse(DEFAULT_CORES)
     val submitDate = new Date()
     val submissionId = newDriverId(submitDate)
 
     new MesosDriverDescription(
-      name, appResource, actualDriverMemory, actualDriverCores, actualSuperviseDriver,
-      command, request.sparkProperties, submissionId, submitDate)
+      name, appResource, actualDriverMemory + actualDriverMemoryOverhead, actualDriverCores,
+      actualSuperviseDriver, command, request.sparkProperties, submissionId, submitDate)
   }
 
   protected override def handleSubmit(
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
new file mode 100644
index 000000000000..1f83149a0565
--- /dev/null
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.rest.mesos
+
+import javax.servlet.http.HttpServletResponse
+
+import org.scalatest.mockito.MockitoSugar
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.TestPrematureExit
+import org.apache.spark.deploy.mesos.MesosDriverDescription
+import org.apache.spark.deploy.rest.{CreateSubmissionRequest, CreateSubmissionResponse, SubmitRestProtocolMessage, SubmitRestProtocolResponse}
+import org.apache.spark.internal.config
+import org.apache.spark.scheduler.cluster.mesos.{MesosClusterPersistenceEngineFactory, MesosClusterScheduler}
+
+class MesosRestServerSuite extends SparkFunSuite
+  with TestPrematureExit with MockitoSugar {
+
+  test("test default driver overhead memory") {
+    testOverheadMemory(new SparkConf(), "2000M", 2384)
+  }
+
+  test("test driver overhead memory with overhead factor") {
+    testOverheadMemory(new SparkConf(), "5000M", 5500)
+  }
+
+  test("test configured driver overhead memory") {
+    val conf = new SparkConf()
+    conf.set(config.DRIVER_MEMORY_OVERHEAD.key, "1000")
+    testOverheadMemory(conf, "2000M", 3000)
+  }
+
+  def testOverheadMemory(conf: SparkConf, driverMemory: String, expectedResult: Int) {
+    conf.set("spark.master", "testmaster")
+    conf.set("spark.app.name", "testapp")
+    conf.set(config.DRIVER_MEMORY.key, driverMemory)
+    var actualMem = 0
+    class TestMesosClusterScheduler extends MesosClusterScheduler(
+        mock[MesosClusterPersistenceEngineFactory], conf) {
+      override def submitDriver(desc: MesosDriverDescription): CreateSubmissionResponse = {
+        actualMem = desc.mem
+        mock[CreateSubmissionResponse]
+      }
+    }
+
+    class TestServlet extends MesosSubmitRequestServlet(new TestMesosClusterScheduler, conf) {
+      override def handleSubmit(
+          requestMessageJson: String,
+          requestMessage: SubmitRestProtocolMessage,
+          responseServlet: HttpServletResponse): SubmitRestProtocolResponse = {
+        super.handleSubmit(requestMessageJson, requestMessage, responseServlet)
+      }
+
+      override def findUnknownFields(
+          requestJson: String,
+          requestMessage: SubmitRestProtocolMessage): Array[String] = {
+        Array()
+      }
+    }
+    val servlet = new TestServlet()
+    val request = new CreateSubmissionRequest()
+    request.appResource = "testresource"
+    request.mainClass = "mainClass"
+    request.appArgs = Array("appArgs")
+    request.environmentVariables = Map("envVar" -> "envVal")
+    request.sparkProperties = conf.getAll.toMap
+    servlet.handleSubmit("null", request, null)
+    assert(actualMem == expectedResult)
+  }
+}

From 954ef96c495268e3a22b7b661ce45c558b532c65 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 15 Jan 2019 13:53:48 -0800
Subject: [PATCH 0298/1072] [SPARK-25530][SQL] data source v2 API refactor
 (batch write)

## What changes were proposed in this pull request?

Adjust the batch write API to match the read API refactor after https://github.com/apache/spark/pull/23086

The doc with high-level ideas:
https://docs.google.com/document/d/1vI26UEuDpVuOjWw4WPoH2T6y8WAekwtI7qoowhOFnI4/edit?usp=sharing

Basically it renames `BatchWriteSupportProvider` to `SupportsBatchWrite`, and make it extend `Table`. Renames `WriteSupport` to `Write`. It also cleans up some code as batch API is completed.

This PR also removes the test from https://github.com/apache/spark/pull/22688 . Now data source must return a table for read/write.

A few notes about future changes:
1. We will create `SupportsStreamingWrite` later for streaming APIs
2. We will create `SupportsBatchReplaceWhere`, `SupportsBatchAppend`, etc. for the new end-user write APIs. I think streaming APIs would remain to use `OutputMode`, and new end-user write APIs will apply to batch only, at least in the near future.
3. We will remove `SaveMode` from data source API: https://issues.apache.org/jira/browse/SPARK-26356

## How was this patch tested?

existing tests

Closes #23208 from cloud-fan/refactor-batch.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sources/v2/BatchWriteSupportProvider.java | 59 ------------
 .../spark/sql/sources/v2/DataSourceV2.java    | 10 +-
 .../sql/sources/v2/SupportsBatchWrite.java    | 32 +++++++
 .../spark/sql/sources/v2/SupportsRead.java    |  5 +-
 .../spark/sql/sources/v2/SupportsWrite.java   | 35 +++++++
 ...BatchWriteSupport.java => BatchWrite.java} |  2 +-
 .../sql/sources/v2/writer/DataWriter.java     | 12 +--
 .../sources/v2/writer/DataWriterFactory.java  |  2 +-
 .../sources/v2/writer/SupportsSaveMode.java   | 26 ++++++
 .../sql/sources/v2/writer/WriteBuilder.java   | 69 ++++++++++++++
 .../v2/writer/WriterCommitMessage.java        |  2 +-
 .../apache/spark/sql/DataFrameReader.scala    |  8 +-
 .../apache/spark/sql/DataFrameWriter.scala    | 50 +++++-----
 .../datasources/v2/DataSourceV2Relation.scala | 89 +++++-------------
 .../datasources/v2/DataSourceV2Strategy.scala | 14 ++-
 .../v2/WriteToDataSourceV2Exec.scala          | 26 +++---
 .../streaming/MicroBatchExecution.scala       |  4 +-
 ...ritSupport.scala => MicroBatchWrite.scala} |  7 +-
 .../sources/PackedRowWriterFactory.scala      |  4 +-
 .../sql/sources/v2/DataSourceV2Suite.scala    | 28 +++---
 .../sources/v2/SimpleWritableDataSource.scala | 91 +++++++++++--------
 21 files changed, 330 insertions(+), 245 deletions(-)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchWriteSupportProvider.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
 rename sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/{BatchWriteSupport.java => BatchWrite.java} (99%)
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/{MicroBatchWritSupport.scala => MicroBatchWrite.scala} (84%)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchWriteSupportProvider.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchWriteSupportProvider.java
deleted file mode 100644
index df439e2c02fe..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchWriteSupportProvider.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import java.util.Optional;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.SaveMode;
-import org.apache.spark.sql.sources.v2.writer.BatchWriteSupport;
-import org.apache.spark.sql.types.StructType;
-
-/**
- * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
- * provide data writing ability for batch processing.
- *
- * This interface is used to create {@link BatchWriteSupport} instances when end users run
- * {@code Dataset.write.format(...).option(...).save()}.
- */
-@Evolving
-public interface BatchWriteSupportProvider extends DataSourceV2 {
-
-  /**
-   * Creates an optional {@link BatchWriteSupport} instance to save the data to this data source,
-   * which is called by Spark at the beginning of each batch query.
-   *
-   * Data sources can return None if there is no writing needed to be done according to the save
-   * mode.
-   *
-   * @param queryId A unique string for the writing query. It's possible that there are many
-   *                writing queries running at the same time, and the returned
-   *                {@link BatchWriteSupport} can use this id to distinguish itself from others.
-   * @param schema the schema of the data to be written.
-   * @param mode the save mode which determines what to do when the data are already in this data
-   *             source, please refer to {@link SaveMode} for more details.
-   * @param options the options for the returned data source writer, which is an immutable
-   *                case-insensitive string-to-string map.
-   * @return a write support to write data to this data source.
-   */
-  Optional<BatchWriteSupport> createBatchWriteSupport(
-      String queryId,
-      StructType schema,
-      SaveMode mode,
-      DataSourceOptions options);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
index eae7a45d1d44..4aaa57dd4db9 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
@@ -20,15 +20,7 @@
 import org.apache.spark.annotation.Evolving;
 
 /**
- * The base interface for data source v2. Implementations must have a public, 0-arg constructor.
- *
- * Note that this is an empty interface. Data source implementations must mix in interfaces such as
- * {@link BatchReadSupportProvider} or {@link BatchWriteSupportProvider}, which can provide
- * batch or streaming read/write support instances. Otherwise it's just a dummy data source which
- * is un-readable/writable.
- *
- * If Spark fails to execute any methods in the implementations of this interface (by throwing an
- * exception), the read action will fail and no Spark job will be submitted.
+ * TODO: remove it when we finish the API refactor for streaming side.
  */
 @Evolving
 public interface DataSourceV2 {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
new file mode 100644
index 000000000000..08caadd5308e
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
+
+/**
+ * An empty mix-in interface for {@link Table}, to indicate this table supports batch write.
+ * <p>
+ * If a {@link Table} implements this interface, the
+ * {@link SupportsWrite#newWriteBuilder(DataSourceOptions)}  must return a {@link WriteBuilder}
+ * with {@link WriteBuilder#buildForBatch()} implemented.
+ * </p>
+ */
+@Evolving
+public interface SupportsBatchWrite extends SupportsWrite {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
index e22738d20d50..5031c71c0fd4 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
@@ -29,7 +29,10 @@ interface SupportsRead extends Table {
 
   /**
    * Returns a {@link ScanBuilder} which can be used to build a {@link Scan}. Spark will call this
-   * method to configure each scan.
+   * method to configure each data source scan.
+   *
+   * @param options The options for reading, which is an immutable case-insensitive
+   *                string-to-string map.
    */
   ScanBuilder newScanBuilder(DataSourceOptions options);
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
new file mode 100644
index 000000000000..ecdfe2073025
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.sql.sources.v2.writer.BatchWrite;
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
+
+/**
+ * An internal base interface of mix-in interfaces for writable {@link Table}. This adds
+ * {@link #newWriteBuilder(DataSourceOptions)} that is used to create a write
+ * for batch or streaming.
+ */
+interface SupportsWrite extends Table {
+
+  /**
+   * Returns a {@link WriteBuilder} which can be used to create {@link BatchWrite}. Spark will call
+   * this method to configure each data source write.
+   */
+  WriteBuilder newWriteBuilder(DataSourceOptions options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWriteSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java
similarity index 99%
rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWriteSupport.java
rename to sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java
index efe1ac4f78db..91297759971b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWriteSupport.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java
@@ -38,7 +38,7 @@
  * Please refer to the documentation of commit/abort methods for detailed specifications.
  */
 @Evolving
-public interface BatchWriteSupport {
+public interface BatchWrite {
 
   /**
    * Creates a writer factory which will be serialized and sent to executors.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
index d142ee523ef9..11228ad1ea67 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
@@ -36,11 +36,11 @@
  *
  * If this data writer succeeds(all records are successfully written and {@link #commit()}
  * succeeds), a {@link WriterCommitMessage} will be sent to the driver side and pass to
- * {@link BatchWriteSupport#commit(WriterCommitMessage[])} with commit messages from other data
+ * {@link BatchWrite#commit(WriterCommitMessage[])} with commit messages from other data
  * writers. If this data writer fails(one record fails to write or {@link #commit()} fails), an
  * exception will be sent to the driver side, and Spark may retry this writing task a few times.
  * In each retry, {@link DataWriterFactory#createWriter(int, long)} will receive a
- * different `taskId`. Spark will call {@link BatchWriteSupport#abort(WriterCommitMessage[])}
+ * different `taskId`. Spark will call {@link BatchWrite#abort(WriterCommitMessage[])}
  * when the configured number of retries is exhausted.
  *
  * Besides the retry mechanism, Spark may launch speculative tasks if the existing writing task
@@ -71,11 +71,11 @@ public interface DataWriter<T> {
   /**
    * Commits this writer after all records are written successfully, returns a commit message which
    * will be sent back to driver side and passed to
-   * {@link BatchWriteSupport#commit(WriterCommitMessage[])}.
+   * {@link BatchWrite#commit(WriterCommitMessage[])}.
    *
    * The written data should only be visible to data source readers after
-   * {@link BatchWriteSupport#commit(WriterCommitMessage[])} succeeds, which means this method
-   * should still "hide" the written data and ask the {@link BatchWriteSupport} at driver side to
+   * {@link BatchWrite#commit(WriterCommitMessage[])} succeeds, which means this method
+   * should still "hide" the written data and ask the {@link BatchWrite} at driver side to
    * do the final commit via {@link WriterCommitMessage}.
    *
    * If this method fails (by throwing an exception), {@link #abort()} will be called and this
@@ -93,7 +93,7 @@ public interface DataWriter<T> {
    * failed.
    *
    * If this method fails(by throwing an exception), the underlying data source may have garbage
-   * that need to be cleaned by {@link BatchWriteSupport#abort(WriterCommitMessage[])} or manually,
+   * that need to be cleaned by {@link BatchWrite#abort(WriterCommitMessage[])} or manually,
    * but these garbage should not be visible to data source readers.
    *
    * @throws IOException if failure happens during disk/network IO like writing files.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
index 65105f46b82d..bf2db9059b08 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
@@ -24,7 +24,7 @@
 import org.apache.spark.sql.catalyst.InternalRow;
 
 /**
- * A factory of {@link DataWriter} returned by {@link BatchWriteSupport#createBatchWriterFactory()},
+ * A factory of {@link DataWriter} returned by {@link BatchWrite#createBatchWriterFactory()},
  * which is responsible for creating and initializing the actual data writer at executor side.
  *
  * Note that, the writer factory will be serialized and sent to executors, then the data writer
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java
new file mode 100644
index 000000000000..c4295f237187
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.writer;
+
+import org.apache.spark.sql.SaveMode;
+
+// A temporary mixin trait for `WriteBuilder` to support `SaveMode`. Will be removed before
+// Spark 3.0 when all the new write operators are finished. See SPARK-26356 for more details.
+public interface SupportsSaveMode extends WriteBuilder {
+  WriteBuilder mode(SaveMode mode);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
new file mode 100644
index 000000000000..e861c72af9e6
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.writer;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.SupportsBatchWrite;
+import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An interface for building the {@link BatchWrite}. Implementations can mix in some interfaces to
+ * support different ways to write data to data sources.
+ *
+ * Unless modified by a mixin interface, the {@link BatchWrite} configured by this builder is to
+ * append data without affecting existing data.
+ */
+@Evolving
+public interface WriteBuilder {
+
+  /**
+   * Passes the `queryId` from Spark to data source. `queryId` is a unique string of the query. It's
+   * possible that there are many queries running at the same time, or a query is restarted and
+   * resumed. {@link BatchWrite} can use this id to identify the query.
+   *
+   * @return a new builder with the `queryId`. By default it returns `this`, which means the given
+   *         `queryId` is ignored. Please override this method to take the `queryId`.
+   */
+  default WriteBuilder withQueryId(String queryId) {
+    return this;
+  }
+
+  /**
+   * Passes the schema of the input data from Spark to data source.
+   *
+   * @return a new builder with the `schema`. By default it returns `this`, which means the given
+   *         `schema` is ignored. Please override this method to take the `schema`.
+   */
+  default WriteBuilder withInputDataSchema(StructType schema) {
+    return this;
+  }
+
+  /**
+   * Returns a {@link BatchWrite} to write data to batch source. By default this method throws
+   * exception, data sources must overwrite this method to provide an implementation, if the
+   * {@link Table} that creates this scan implements {@link SupportsBatchWrite}.
+   *
+   * Note that, the returned {@link BatchWrite} can be null if the implementation supports SaveMode,
+   * to indicate that no writing is needed. We can clean it up after removing
+   * {@link SupportsSaveMode}.
+   */
+  default BatchWrite buildForBatch() {
+    throw new UnsupportedOperationException("Batch scans are not supported");
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
index 9216e3439909..6334c8f64309 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
@@ -24,7 +24,7 @@
 
 /**
  * A commit message returned by {@link DataWriter#commit()} and will be sent back to the driver side
- * as the input parameter of {@link BatchWriteSupport#commit(WriterCommitMessage[])} or
+ * as the input parameter of {@link BatchWrite#commit(WriterCommitMessage[])} or
  * {@link StreamingWriteSupport#commit(long, WriterCommitMessage[])}.
  *
  * This is an empty interface, data sources should define their own message class and use it when
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index ce8e4c8f5b82..af369a5bca46 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.types.{StringType, StructType}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -209,10 +209,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         case _ => provider.getTable(dsOptions)
       }
       table match {
-        case s: SupportsBatchRead =>
-          Dataset.ofRows(sparkSession, DataSourceV2Relation.create(
-            provider, s, finalOptions, userSpecifiedSchema = userSpecifiedSchema))
-
+        case _: SupportsBatchRead =>
+          Dataset.ofRows(sparkSession, DataSourceV2Relation.create(table, finalOptions))
         case _ => loadV1Source(paths: _*)
       }
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 981b3a8fd4ac..228dcb94b9ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, Logi
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, WriteToDataSourceV2}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -241,33 +242,38 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
     assertNotBucketed("save")
 
-    val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
-    if (classOf[DataSourceV2].isAssignableFrom(cls)) {
-      val source = cls.getConstructor().newInstance().asInstanceOf[DataSourceV2]
-      source match {
-        case provider: BatchWriteSupportProvider =>
-          val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
-            source,
-            df.sparkSession.sessionState.conf)
-          val options = sessionOptions ++ extraOptions
-
+    val session = df.sparkSession
+    val cls = DataSource.lookupDataSource(source, session.sessionState.conf)
+    if (classOf[TableProvider].isAssignableFrom(cls)) {
+      val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
+      val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
+        provider, session.sessionState.conf)
+      val options = sessionOptions ++ extraOptions
+      val dsOptions = new DataSourceOptions(options.asJava)
+      provider.getTable(dsOptions) match {
+        case table: SupportsBatchWrite =>
           if (mode == SaveMode.Append) {
-            val relation = DataSourceV2Relation.createRelationForWrite(source, options)
+            val relation = DataSourceV2Relation.create(table, options)
             runCommand(df.sparkSession, "save") {
               AppendData.byName(relation, df.logicalPlan)
             }
-
           } else {
-            val writer = provider.createBatchWriteSupport(
-              UUID.randomUUID().toString,
-              df.logicalPlan.output.toStructType,
-              mode,
-              new DataSourceOptions(options.asJava))
-
-            if (writer.isPresent) {
-              runCommand(df.sparkSession, "save") {
-                WriteToDataSourceV2(writer.get, df.logicalPlan)
-              }
+            val writeBuilder = table.newWriteBuilder(dsOptions)
+              .withQueryId(UUID.randomUUID().toString)
+              .withInputDataSchema(df.logicalPlan.schema)
+            writeBuilder match {
+              case s: SupportsSaveMode =>
+                val write = s.mode(mode).buildForBatch()
+                // It can only return null with `SupportsSaveMode`. We can clean it up after
+                // removing `SupportsSaveMode`.
+                if (write != null) {
+                  runCommand(df.sparkSession, "save") {
+                    WriteToDataSourceV2(write, df.logicalPlan)
+                  }
+                }
+
+              case _ => throw new AnalysisException(
+                s"data source ${table.name} does not support SaveMode $mode")
             }
           }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 7bf2b8bff373..632157818434 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -21,46 +21,51 @@ import java.util.UUID
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.writer.BatchWriteSupport
+import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.types.StructType
 
 /**
- * A logical plan representing a data source v2 scan.
+ * A logical plan representing a data source v2 table.
  *
- * @param source An instance of a [[DataSourceV2]] implementation.
- * @param options The options for this scan. Used to create fresh [[BatchWriteSupport]].
- * @param userSpecifiedSchema The user-specified schema for this scan.
+ * @param table   The table that this relation represents.
+ * @param options The options for this table operation. It's used to create fresh [[ScanBuilder]]
+ *                and [[WriteBuilder]].
  */
 case class DataSourceV2Relation(
-    // TODO: remove `source` when we finish API refactor for write.
-    source: TableProvider,
-    table: SupportsBatchRead,
+    table: Table,
     output: Seq[AttributeReference],
-    options: Map[String, String],
-    userSpecifiedSchema: Option[StructType] = None)
+    options: Map[String, String])
   extends LeafNode with MultiInstanceRelation with NamedRelation {
 
-  import DataSourceV2Relation._
-
   override def name: String = table.name()
 
   override def simpleString(maxFields: Int): String = {
     s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
   }
 
-  def newWriteSupport(): BatchWriteSupport = source.createWriteSupport(options, schema)
+  def newScanBuilder(): ScanBuilder = table match {
+    case s: SupportsBatchRead =>
+      val dsOptions = new DataSourceOptions(options.asJava)
+      s.newScanBuilder(dsOptions)
+    case _ => throw new AnalysisException(s"Table is not readable: ${table.name()}")
+  }
+
 
-  def newScanBuilder(): ScanBuilder = {
-    val dsOptions = new DataSourceOptions(options.asJava)
-    table.newScanBuilder(dsOptions)
+
+  def newWriteBuilder(schema: StructType): WriteBuilder = table match {
+    case s: SupportsBatchWrite =>
+      val dsOptions = new DataSourceOptions(options.asJava)
+      s.newWriteBuilder(dsOptions)
+        .withQueryId(UUID.randomUUID().toString)
+        .withInputDataSchema(schema)
+    case _ => throw new AnalysisException(s"Table is not writable: ${table.name()}")
   }
 
   override def computeStats(): Statistics = {
@@ -126,52 +131,8 @@ case class StreamingDataSourceV2Relation(
 }
 
 object DataSourceV2Relation {
-  private implicit class SourceHelpers(source: DataSourceV2) {
-    def asWriteSupportProvider: BatchWriteSupportProvider = {
-      source match {
-        case provider: BatchWriteSupportProvider =>
-          provider
-        case _ =>
-          throw new AnalysisException(s"Data source is not writable: $name")
-      }
-    }
-
-    def name: String = {
-      source match {
-        case registered: DataSourceRegister =>
-          registered.shortName()
-        case _ =>
-          source.getClass.getSimpleName
-      }
-    }
-
-    def createWriteSupport(
-        options: Map[String, String],
-        schema: StructType): BatchWriteSupport = {
-      asWriteSupportProvider.createBatchWriteSupport(
-        UUID.randomUUID().toString,
-        schema,
-        SaveMode.Append,
-        new DataSourceOptions(options.asJava)).get
-    }
-  }
-
-  def create(
-      provider: TableProvider,
-      table: SupportsBatchRead,
-      options: Map[String, String],
-      userSpecifiedSchema: Option[StructType] = None): DataSourceV2Relation = {
+  def create(table: Table, options: Map[String, String]): DataSourceV2Relation = {
     val output = table.schema().toAttributes
-    DataSourceV2Relation(provider, table, output, options, userSpecifiedSchema)
-  }
-
-  // TODO: remove this when we finish API refactor for write.
-  def createRelationForWrite(
-      source: DataSourceV2,
-      options: Map[String, String]): DataSourceV2Relation = {
-    val provider = source.asInstanceOf[TableProvider]
-    val dsOptions = new DataSourceOptions(options.asJava)
-    val table = provider.getTable(dsOptions)
-    create(provider, table.asInstanceOf[SupportsBatchRead], options)
+    DataSourceV2Relation(table, output, options)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 2e26fce880b6..79540b024621 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.{sources, Strategy}
+import org.apache.spark.sql.{sources, AnalysisException, SaveMode, Strategy}
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, Repartition}
@@ -28,6 +28,7 @@ import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport
+import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode
 
 object DataSourceV2Strategy extends Strategy {
 
@@ -110,7 +111,7 @@ object DataSourceV2Strategy extends Strategy {
       val (scan, output) = pruneColumns(scanBuilder, relation, project ++ postScanFilters)
       logInfo(
         s"""
-           |Pushing operators to ${relation.source.getClass}
+           |Pushing operators to ${relation.name}
            |Pushed Filters: ${pushedFilters.mkString(", ")}
            |Post-Scan Filters: ${postScanFilters.mkString(",")}
            |Output: ${output.mkString(", ")}
@@ -136,7 +137,14 @@ object DataSourceV2Strategy extends Strategy {
       WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil
 
     case AppendData(r: DataSourceV2Relation, query, _) =>
-      WriteToDataSourceV2Exec(r.newWriteSupport(), planLater(query)) :: Nil
+      val writeBuilder = r.newWriteBuilder(query.schema)
+      writeBuilder match {
+        case s: SupportsSaveMode =>
+          val write = s.mode(SaveMode.Append).buildForBatch()
+          assert(write != null)
+          WriteToDataSourceV2Exec(write, planLater(query)) :: Nil
+        case _ => throw new AnalysisException(s"data source ${r.name} does not support SaveMode")
+      }
 
     case WriteToContinuousDataSource(writer, query) =>
       WriteToContinuousDataSourceExec(writer, planLater(query)) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index d7e20eed4cbc..406fb8c3a383 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -35,7 +35,7 @@ import org.apache.spark.util.{LongAccumulator, Utils}
  * specific logical plans, like [[org.apache.spark.sql.catalyst.plans.logical.AppendData]].
  */
 @deprecated("Use specific logical plans like AppendData instead", "2.4.0")
-case class WriteToDataSourceV2(writeSupport: BatchWriteSupport, query: LogicalPlan)
+case class WriteToDataSourceV2(batchWrite: BatchWrite, query: LogicalPlan)
   extends LogicalPlan {
   override def children: Seq[LogicalPlan] = Seq(query)
   override def output: Seq[Attribute] = Nil
@@ -44,7 +44,7 @@ case class WriteToDataSourceV2(writeSupport: BatchWriteSupport, query: LogicalPl
 /**
  * The physical plan for writing data into data source v2.
  */
-case class WriteToDataSourceV2Exec(writeSupport: BatchWriteSupport, query: SparkPlan)
+case class WriteToDataSourceV2Exec(batchWrite: BatchWrite, query: SparkPlan)
   extends UnaryExecNode {
 
   var commitProgress: Option[StreamWriterCommitProgress] = None
@@ -53,13 +53,13 @@ case class WriteToDataSourceV2Exec(writeSupport: BatchWriteSupport, query: Spark
   override def output: Seq[Attribute] = Nil
 
   override protected def doExecute(): RDD[InternalRow] = {
-    val writerFactory = writeSupport.createBatchWriterFactory()
-    val useCommitCoordinator = writeSupport.useCommitCoordinator
+    val writerFactory = batchWrite.createBatchWriterFactory()
+    val useCommitCoordinator = batchWrite.useCommitCoordinator
     val rdd = query.execute()
     val messages = new Array[WriterCommitMessage](rdd.partitions.length)
     val totalNumRowsAccumulator = new LongAccumulator()
 
-    logInfo(s"Start processing data source write support: $writeSupport. " +
+    logInfo(s"Start processing data source write support: $batchWrite. " +
       s"The input RDD has ${messages.length} partitions.")
 
     try {
@@ -72,26 +72,26 @@ case class WriteToDataSourceV2Exec(writeSupport: BatchWriteSupport, query: Spark
           val commitMessage = result.writerCommitMessage
           messages(index) = commitMessage
           totalNumRowsAccumulator.add(result.numRows)
-          writeSupport.onDataWriterCommit(commitMessage)
+          batchWrite.onDataWriterCommit(commitMessage)
         }
       )
 
-      logInfo(s"Data source write support $writeSupport is committing.")
-      writeSupport.commit(messages)
-      logInfo(s"Data source write support $writeSupport committed.")
+      logInfo(s"Data source write support $batchWrite is committing.")
+      batchWrite.commit(messages)
+      logInfo(s"Data source write support $batchWrite committed.")
       commitProgress = Some(StreamWriterCommitProgress(totalNumRowsAccumulator.value))
     } catch {
       case cause: Throwable =>
-        logError(s"Data source write support $writeSupport is aborting.")
+        logError(s"Data source write support $batchWrite is aborting.")
         try {
-          writeSupport.abort(messages)
+          batchWrite.abort(messages)
         } catch {
           case t: Throwable =>
-            logError(s"Data source write support $writeSupport failed to abort.")
+            logError(s"Data source write support $batchWrite failed to abort.")
             cause.addSuppressed(t)
             throw new SparkException("Writing job failed.", cause)
         }
-        logError(s"Data source write support $writeSupport aborted.")
+        logError(s"Data source write support $batchWrite aborted.")
         cause match {
           // Only wrap non fatal exceptions.
           case NonFatal(e) => throw new SparkException("Writing job aborted.", e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 38ecb0dd12da..db1bf32a156c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan,
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2, WriteToDataSourceV2Exec}
-import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWritSupport, RateControlMicroBatchReadSupport}
+import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, RateControlMicroBatchReadSupport}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset => OffsetV2}
@@ -515,7 +515,7 @@ class MicroBatchExecution(
           newAttributePlan.schema,
           outputMode,
           new DataSourceOptions(extraOptions.asJava))
-        WriteToDataSourceV2(new MicroBatchWritSupport(currentBatchId, writer), newAttributePlan)
+        WriteToDataSourceV2(new MicroBatchWrite(currentBatchId, writer), newAttributePlan)
       case _ => throw new IllegalArgumentException(s"unknown sink type for $sink")
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWritSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
similarity index 84%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWritSupport.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
index 9f88416871f8..143235efee81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWritSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
@@ -18,16 +18,15 @@
 package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.writer.{BatchWriteSupport, DataWriter, DataWriterFactory, WriterCommitMessage}
+import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWriteSupport}
 
 /**
- * A [[BatchWriteSupport]] used to hook V2 stream writers into a microbatch plan. It implements
+ * A [[BatchWrite]] used to hook V2 stream writers into a microbatch plan. It implements
  * the non-streaming interface, forwarding the epoch ID determined at construction to a wrapped
  * streaming write support.
  */
-class MicroBatchWritSupport(eppchId: Long, val writeSupport: StreamingWriteSupport)
-  extends BatchWriteSupport {
+class MicroBatchWrite(eppchId: Long, val writeSupport: StreamingWriteSupport) extends BatchWrite {
 
   override def commit(messages: Array[WriterCommitMessage]): Unit = {
     writeSupport.commit(eppchId, messages)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
index ac3c71cc222b..fd4cb444ce58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
@@ -21,12 +21,12 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.writer.{BatchWriteSupport, DataWriter, DataWriterFactory, WriterCommitMessage}
+import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingDataWriterFactory
 
 /**
  * A simple [[DataWriterFactory]] whose tasks just pack rows into the commit message for delivery
- * to a [[BatchWriteSupport]] on the driver.
+ * to a [[BatchWrite]] on the driver.
  *
  * Note that, because it sends all rows to the driver, this factory will generally be unsuitable
  * for production-quality sinks. It's intended for use in tests.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index d282193d35d7..c60ea4a2f9f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -329,8 +329,8 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
         .format(classOf[DataSourceV2WithSessionConfig].getName).load()
       val options = df.queryExecution.optimizedPlan.collectFirst {
         case d: DataSourceV2Relation => d.options
-      }
-      assert(options.get.get(optionName) == Some("false"))
+      }.get
+      assert(options.get(optionName).get == "false")
     }
   }
 
@@ -356,13 +356,11 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
       val cls = classOf[SimpleWriteOnlyDataSource]
       val path = file.getCanonicalPath
       val df = spark.range(5).select('id as 'i, -'id as 'j)
-      try {
-        df.write.format(cls.getName).option("path", path).mode("error").save()
-        df.write.format(cls.getName).option("path", path).mode("overwrite").save()
-        df.write.format(cls.getName).option("path", path).mode("ignore").save()
-      } catch {
-        case e: SchemaReadAttemptException => fail("Schema read was attempted.", e)
-      }
+      // non-append mode should not throw exception, as they don't access schema.
+      df.write.format(cls.getName).option("path", path).mode("error").save()
+      df.write.format(cls.getName).option("path", path).mode("overwrite").save()
+      df.write.format(cls.getName).option("path", path).mode("ignore").save()
+      // append mode will access schema and should throw exception.
       intercept[SchemaReadAttemptException] {
         df.write.format(cls.getName).option("path", path).mode("append").save()
       }
@@ -680,10 +678,12 @@ object SpecificReaderFactory extends PartitionReaderFactory {
 class SchemaReadAttemptException(m: String) extends RuntimeException(m)
 
 class SimpleWriteOnlyDataSource extends SimpleWritableDataSource {
-  override def writeSchema(): StructType = {
-    // This is a bit hacky since this source implements read support but throws
-    // during schema retrieval. Might have to rewrite but it's done
-    // such so for minimised changes.
-    throw new SchemaReadAttemptException("read is not supported")
+
+  override def getTable(options: DataSourceOptions): Table = {
+    new MyTable(options) {
+      override def schema(): StructType = {
+        throw new SchemaReadAttemptException("schema should not be read.")
+      }
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
index 82bb4fa33a3a..6e4f2bbcd6b6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.sources.v2
 
 import java.io.{BufferedReader, InputStreamReader, IOException}
-import java.util.Optional
 
 import scala.collection.JavaConverters._
 
@@ -35,15 +34,13 @@ import org.apache.spark.util.SerializableConfiguration
 
 /**
  * A HDFS based transactional writable data source.
- * Each task writes data to `target/_temporary/queryId/$jobId-$partitionId-$attemptNumber`.
- * Each job moves files from `target/_temporary/queryId/` to `target`.
+ * Each task writes data to `target/_temporary/uniqueId/$jobId-$partitionId-$attemptNumber`.
+ * Each job moves files from `target/_temporary/uniqueId/` to `target`.
  */
 class SimpleWritableDataSource extends DataSourceV2
-  with TableProvider
-  with BatchWriteSupportProvider
-  with SessionConfigSupport {
+  with TableProvider with SessionConfigSupport {
 
-  protected def writeSchema(): StructType = new StructType().add("i", "long").add("j", "long")
+  private val tableSchema = new StructType().add("i", "long").add("j", "long")
 
   override def keyPrefix: String = "simpleWritableDataSource"
 
@@ -68,22 +65,50 @@ class SimpleWritableDataSource extends DataSourceV2
       new CSVReaderFactory(serializableConf)
     }
 
-    override def readSchema(): StructType = writeSchema
+    override def readSchema(): StructType = tableSchema
   }
 
-  override def getTable(options: DataSourceOptions): Table = {
-    val path = new Path(options.get("path").get())
-    val conf = SparkContext.getActive.get.hadoopConfiguration
-    new SimpleBatchTable {
-      override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
-        new MyScanBuilder(path.toUri.toString, conf)
+  class MyWriteBuilder(path: String) extends WriteBuilder with SupportsSaveMode {
+    private var queryId: String = _
+    private var mode: SaveMode = _
+
+    override def withQueryId(queryId: String): WriteBuilder = {
+      this.queryId = queryId
+      this
+    }
+
+    override def mode(mode: SaveMode): WriteBuilder = {
+      this.mode = mode
+      this
+    }
+
+    override def buildForBatch(): BatchWrite = {
+      assert(mode != null)
+
+      val hadoopPath = new Path(path)
+      val hadoopConf = SparkContext.getActive.get.hadoopConfiguration
+      val fs = hadoopPath.getFileSystem(hadoopConf)
+
+      if (mode == SaveMode.ErrorIfExists) {
+        if (fs.exists(hadoopPath)) {
+          throw new RuntimeException("data already exists.")
+        }
+      }
+      if (mode == SaveMode.Ignore) {
+        if (fs.exists(hadoopPath)) {
+          return null
+        }
+      }
+      if (mode == SaveMode.Overwrite) {
+        fs.delete(hadoopPath, true)
       }
 
-      override def schema(): StructType = writeSchema
+      val pathStr = hadoopPath.toUri.toString
+      new MyBatchWrite(queryId, pathStr, hadoopConf)
     }
   }
 
-  class WritSupport(queryId: String, path: String, conf: Configuration) extends BatchWriteSupport {
+  class MyBatchWrite(queryId: String, path: String, conf: Configuration) extends BatchWrite {
     override def createBatchWriterFactory(): DataWriterFactory = {
       SimpleCounter.resetCounter
       new CSVDataWriterFactory(path, queryId, new SerializableConfiguration(conf))
@@ -116,33 +141,23 @@ class SimpleWritableDataSource extends DataSourceV2
     }
   }
 
-  override def createBatchWriteSupport(
-      queryId: String,
-      schema: StructType,
-      mode: SaveMode,
-      options: DataSourceOptions): Optional[BatchWriteSupport] = {
-    assert(!SparkContext.getActive.get.conf.getBoolean("spark.speculation", false))
+  class MyTable(options: DataSourceOptions) extends SimpleBatchTable with SupportsBatchWrite {
+    private val path = options.get("path").get()
+    private val conf = SparkContext.getActive.get.hadoopConfiguration
 
-    val path = new Path(options.get("path").get())
-    val conf = SparkContext.getActive.get.hadoopConfiguration
-    val fs = path.getFileSystem(conf)
+    override def schema(): StructType = tableSchema
 
-    if (mode == SaveMode.ErrorIfExists) {
-      if (fs.exists(path)) {
-        throw new RuntimeException("data already exists.")
-      }
+    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+      new MyScanBuilder(new Path(path).toUri.toString, conf)
     }
-    if (mode == SaveMode.Ignore) {
-      if (fs.exists(path)) {
-        return Optional.empty()
-      }
-    }
-    if (mode == SaveMode.Overwrite) {
-      fs.delete(path, true)
+
+    override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+      new MyWriteBuilder(path)
     }
+  }
 
-    val pathStr = path.toUri.toString
-    Optional.of(new WritSupport(queryId, pathStr, conf))
+  override def getTable(options: DataSourceOptions): Table = {
+    new MyTable(options)
   }
 }
 

From 2ebb79b2a607aa25ea22826d9c5d6af18c97a7f2 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Tue, 15 Jan 2019 14:21:51 -0800
Subject: [PATCH 0299/1072] [SPARK-26350][FOLLOWUP] Add actual verification on
 new UT introduced on SPARK-26350

## What changes were proposed in this pull request?

This patch adds the check to verify consumer group id is given correctly when custom group id is provided to Kafka parameter.

## How was this patch tested?

Modified UT.

Closes #23544 from HeartSaVioR/SPARK-26350-follow-up-actual-verification-on-UT.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../sql/kafka010/KafkaMicroBatchSourceSuite.scala  | 14 ++++++++++++--
 .../spark/sql/kafka010/KafkaRelationSuite.scala    | 13 ++++++++++++-
 .../apache/spark/sql/kafka010/KafkaTestUtils.scala |  6 +++++-
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 64020882b306..cb453846134e 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -28,6 +28,7 @@ import scala.collection.JavaConverters._
 import scala.io.Source
 import scala.util.Random
 
+import org.apache.kafka.clients.admin.{AdminClient, ConsumerGroupListing}
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord, RecordMetadata}
 import org.apache.kafka.common.TopicPartition
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
@@ -638,10 +639,11 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     testUtils.sendMessages(topic, (11 to 20).map(_.toString).toArray, Some(1))
     testUtils.sendMessages(topic, (21 to 30).map(_.toString).toArray, Some(2))
 
+    val customGroupId = "id-" + Random.nextInt()
     val dsKafka = spark
       .readStream
       .format("kafka")
-      .option("kafka.group.id", "id-" + Random.nextInt())
+      .option("kafka.group.id", customGroupId)
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("subscribe", topic)
       .option("startingOffsets", "earliest")
@@ -652,7 +654,15 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
 
     testStream(dsKafka)(
       makeSureGetOffsetCalled,
-      CheckAnswer(1 to 30: _*)
+      CheckAnswer(1 to 30: _*),
+      Execute { _ =>
+        val consumerGroups = testUtils.listConsumerGroups()
+        val validGroups = consumerGroups.valid().get()
+        val validGroupsId = validGroups.asScala.map(_.groupId())
+        assert(validGroupsId.exists(_ === customGroupId), "Valid consumer groups don't " +
+          s"contain the expected group id - Valid consumer groups: $validGroupsId / " +
+          s"expected group id: $customGroupId")
+      }
     )
   }
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index efe7385ed16b..2cd13a994ee8 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -20,6 +20,9 @@ package org.apache.spark.sql.kafka010
 import java.util.Locale
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.collection.JavaConverters._
+import scala.util.Random
+
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
 
@@ -247,8 +250,16 @@ class KafkaRelationSuite extends QueryTest with SharedSQLContext with KafkaTest
     testUtils.sendMessages(topic, (11 to 20).map(_.toString).toArray, Some(1))
     testUtils.sendMessages(topic, (21 to 30).map(_.toString).toArray, Some(2))
 
-    val df = createDF(topic, withOptions = Map("kafka.group.id" -> "custom"))
+    val customGroupId = "id-" + Random.nextInt()
+    val df = createDF(topic, withOptions = Map("kafka.group.id" -> customGroupId))
     checkAnswer(df, (1 to 30).map(_.toString).toDF())
+
+    val consumerGroups = testUtils.listConsumerGroups()
+    val validGroups = consumerGroups.valid().get()
+    val validGroupsId = validGroups.asScala.map(_.groupId())
+    assert(validGroupsId.exists(_ === customGroupId), "Valid consumer groups don't " +
+      s"contain the expected group id - Valid consumer groups: $validGroupsId / " +
+      s"expected group id: $customGroupId")
   }
 
   test("read Kafka transactional messages: read_committed") {
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index bf6934be5270..dacfffa86753 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -33,7 +33,7 @@ import kafka.server.{KafkaConfig, KafkaServer}
 import kafka.server.checkpoints.OffsetCheckpointFile
 import kafka.utils.ZkUtils
 import org.apache.kafka.clients.CommonClientConfigs
-import org.apache.kafka.clients.admin.{AdminClient, CreatePartitionsOptions, NewPartitions}
+import org.apache.kafka.clients.admin.{AdminClient, CreatePartitionsOptions, ListConsumerGroupsResult, NewPartitions}
 import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer._
 import org.apache.kafka.common.TopicPartition
@@ -311,6 +311,10 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
     offsets
   }
 
+  def listConsumerGroups(): ListConsumerGroupsResult = {
+    adminClient.listConsumerGroups()
+  }
+
   protected def brokerConfiguration: Properties = {
     val props = new Properties()
     props.put("broker.id", "0")

From cf133e611020ed178f90358464a1b88cdd9b7889 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 16 Jan 2019 10:58:07 +0800
Subject: [PATCH 0300/1072] [SPARK-26604][CORE] Clean up channel registration
 for StreamManager

## What changes were proposed in this pull request?

Now in `TransportRequestHandler.processStreamRequest`, when a stream request is processed, the stream id is not registered with the current channel in stream manager. It should do that so in case of that the channel gets terminated we can remove associated streams of stream requests too.

This also cleans up channel registration in `StreamManager`. Since `StreamManager` doesn't register channel but only `OneForOneStreamManager` does it, this removes `registerChannel` from `StreamManager`. When `OneForOneStreamManager` goes to register stream, it will also register channel for the stream.

## How was this patch tested?

Existing tests.

Closes #23521 from viirya/SPARK-26604.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../server/ChunkFetchRequestHandler.java      |  1 -
 .../server/OneForOneStreamManager.java        | 25 +++++++++++--------
 .../spark/network/server/StreamManager.java   | 10 --------
 .../ChunkFetchRequestHandlerSuite.java        |  3 +--
 .../network/TransportRequestHandlerSuite.java |  9 +++++--
 .../server/OneForOneStreamManagerSuite.java   |  5 ++--
 .../shuffle/ExternalShuffleBlockHandler.java  |  2 +-
 .../ExternalShuffleBlockHandlerSuite.java     |  3 ++-
 .../network/netty/NettyBlockRpcServer.scala   |  3 ++-
 9 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
index f08d8b0f984c..43c3d23b6304 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
@@ -90,7 +90,6 @@ protected void channelRead0(
     ManagedBuffer buf;
     try {
       streamManager.checkAuthorization(client, msg.streamChunkId.streamId);
-      streamManager.registerChannel(channel, msg.streamChunkId.streamId);
       buf = streamManager.getChunk(msg.streamChunkId.streamId, msg.streamChunkId.chunkIndex);
     } catch (Exception e) {
       logger.error(String.format("Error opening block %s for request from %s",
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
index 0f6a8824d95e..6fafcc131fa2 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -23,6 +23,7 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import io.netty.channel.Channel;
 import org.apache.commons.lang3.tuple.ImmutablePair;
@@ -49,7 +50,7 @@ private static class StreamState {
     final Iterator<ManagedBuffer> buffers;
 
     // The channel associated to the stream
-    Channel associatedChannel = null;
+    final Channel associatedChannel;
 
     // Used to keep track of the index of the buffer that the user has retrieved, just to ensure
     // that the caller only requests each chunk one at a time, in order.
@@ -58,9 +59,10 @@ private static class StreamState {
     // Used to keep track of the number of chunks being transferred and not finished yet.
     volatile long chunksBeingTransferred = 0L;
 
-    StreamState(String appId, Iterator<ManagedBuffer> buffers) {
+    StreamState(String appId, Iterator<ManagedBuffer> buffers, Channel channel) {
       this.appId = appId;
       this.buffers = Preconditions.checkNotNull(buffers);
+      this.associatedChannel = channel;
     }
   }
 
@@ -71,13 +73,6 @@ public OneForOneStreamManager() {
     streams = new ConcurrentHashMap<>();
   }
 
-  @Override
-  public void registerChannel(Channel channel, long streamId) {
-    if (streams.containsKey(streamId)) {
-      streams.get(streamId).associatedChannel = channel;
-    }
-  }
-
   @Override
   public ManagedBuffer getChunk(long streamId, int chunkIndex) {
     StreamState state = streams.get(streamId);
@@ -195,11 +190,19 @@ public long chunksBeingTransferred() {
    *
    * If an app ID is provided, only callers who've authenticated with the given app ID will be
    * allowed to fetch from this stream.
+   *
+   * This method also associates the stream with a single client connection, which is guaranteed
+   * to be the only reader of the stream. Once the connection is closed, the stream will never
+   * be used again, enabling cleanup by `connectionTerminated`.
    */
-  public long registerStream(String appId, Iterator<ManagedBuffer> buffers) {
+  public long registerStream(String appId, Iterator<ManagedBuffer> buffers, Channel channel) {
     long myStreamId = nextStreamId.getAndIncrement();
-    streams.put(myStreamId, new StreamState(appId, buffers));
+    streams.put(myStreamId, new StreamState(appId, buffers, channel));
     return myStreamId;
   }
 
+  @VisibleForTesting
+  public int numStreamStates() {
+    return streams.size();
+  }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/StreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/StreamManager.java
index c53529583160..e48d27be1126 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/StreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/StreamManager.java
@@ -60,16 +60,6 @@ public ManagedBuffer openStream(String streamId) {
     throw new UnsupportedOperationException();
   }
 
-  /**
-   * Associates a stream with a single client connection, which is guaranteed to be the only reader
-   * of the stream. The getChunk() method will be called serially on this connection and once the
-   * connection is closed, the stream will never be used again, enabling cleanup.
-   *
-   * This must be called before the first getChunk() on the stream, but it may be invoked multiple
-   * times with the same channel and stream id.
-   */
-  public void registerChannel(Channel channel, long streamId) { }
-
   /**
    * Indicates that the given channel has been terminated. After this occurs, we are guaranteed not
    * to read from the associated streams again, so any state can be cleaned up.
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java
index 2c72c53a33ae..6c9239606bb8 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java
@@ -64,8 +64,7 @@ public void handleChunkFetchRequest() throws Exception {
     managedBuffers.add(new TestManagedBuffer(20));
     managedBuffers.add(new TestManagedBuffer(30));
     managedBuffers.add(new TestManagedBuffer(40));
-    long streamId = streamManager.registerStream("test-app", managedBuffers.iterator());
-    streamManager.registerChannel(channel, streamId);
+    long streamId = streamManager.registerStream("test-app", managedBuffers.iterator(), channel);
     TransportClient reverseClient = mock(TransportClient.class);
     ChunkFetchRequestHandler requestHandler = new ChunkFetchRequestHandler(reverseClient,
       rpcHandler.getStreamManager(), 2L);
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
index ad640415a8e6..a87f6c11a2bf 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
@@ -58,8 +58,10 @@ public void handleStreamRequest() throws Exception {
     managedBuffers.add(new TestManagedBuffer(20));
     managedBuffers.add(new TestManagedBuffer(30));
     managedBuffers.add(new TestManagedBuffer(40));
-    long streamId = streamManager.registerStream("test-app", managedBuffers.iterator());
-    streamManager.registerChannel(channel, streamId);
+    long streamId = streamManager.registerStream("test-app", managedBuffers.iterator(), channel);
+
+    assert streamManager.numStreamStates() == 1;
+
     TransportClient reverseClient = mock(TransportClient.class);
     TransportRequestHandler requestHandler = new TransportRequestHandler(channel, reverseClient,
       rpcHandler, 2L);
@@ -94,5 +96,8 @@ public void handleStreamRequest() throws Exception {
     requestHandler.handle(request3);
     verify(channel, times(1)).close();
     assert responseAndPromisePairs.size() == 3;
+
+    streamManager.connectionTerminated(channel);
+    assert streamManager.numStreamStates() == 0;
   }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
index c647525d8f1b..4248762c3238 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
@@ -37,14 +37,15 @@ public void managedBuffersAreFeedWhenConnectionIsClosed() throws Exception {
     TestManagedBuffer buffer2 = Mockito.spy(new TestManagedBuffer(20));
     buffers.add(buffer1);
     buffers.add(buffer2);
-    long streamId = manager.registerStream("appId", buffers.iterator());
 
     Channel dummyChannel = Mockito.mock(Channel.class, Mockito.RETURNS_SMART_NULLS);
-    manager.registerChannel(dummyChannel, streamId);
+    manager.registerStream("appId", buffers.iterator(), dummyChannel);
+    assert manager.numStreamStates() == 1;
 
     manager.connectionTerminated(dummyChannel);
 
     Mockito.verify(buffer1, Mockito.times(1)).release();
     Mockito.verify(buffer2, Mockito.times(1)).release();
+    assert manager.numStreamStates() == 0;
   }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
index 788a845c5775..b25e48a164e6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -92,7 +92,7 @@ protected void handleMessage(
         OpenBlocks msg = (OpenBlocks) msgObj;
         checkAuth(client, msg.appId);
         long streamId = streamManager.registerStream(client.getClientId(),
-          new ManagedBufferIterator(msg.appId, msg.execId, msg.blockIds));
+          new ManagedBufferIterator(msg.appId, msg.execId, msg.blockIds), client.getChannel());
         if (logger.isTraceEnabled()) {
           logger.trace("Registered streamId {} with {} buffers for client {} from host {}",
                        streamId,
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
index 4cc9a16e1449..537c277cd26b 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
@@ -103,7 +103,8 @@ public void testOpenShuffleBlocks() {
     @SuppressWarnings("unchecked")
     ArgumentCaptor<Iterator<ManagedBuffer>> stream = (ArgumentCaptor<Iterator<ManagedBuffer>>)
         (ArgumentCaptor<?>) ArgumentCaptor.forClass(Iterator.class);
-    verify(streamManager, times(1)).registerStream(anyString(), stream.capture());
+    verify(streamManager, times(1)).registerStream(anyString(), stream.capture(),
+      any());
     Iterator<ManagedBuffer> buffers = stream.getValue();
     assertEquals(block0Marker, buffers.next());
     assertEquals(block1Marker, buffers.next());
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 7076701421e2..27f4f94ea55f 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -59,7 +59,8 @@ class NettyBlockRpcServer(
         val blocksNum = openBlocks.blockIds.length
         val blocks = for (i <- (0 until blocksNum).view)
           yield blockManager.getBlockData(BlockId.apply(openBlocks.blockIds(i)))
-        val streamId = streamManager.registerStream(appId, blocks.iterator.asJava)
+        val streamId = streamManager.registerStream(appId, blocks.iterator.asJava,
+          client.getChannel)
         logTrace(s"Registered streamId $streamId with $blocksNum buffers")
         responseContext.onSuccess(new StreamHandle(streamId, blocksNum).toByteBuffer)
 

From 819e5ea7c290f842c51ead8b4a6593678aeef6bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Wed, 16 Jan 2019 09:00:21 -0600
Subject: [PATCH 0301/1072] [SPARK-26615][CORE] Fixing transport server/client
 resource leaks in the core unittests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Fixing resource leaks where TransportClient/TransportServer instances are not closed properly.

In StandaloneSchedulerBackend the null check is added because during the SparkContextSchedulerCreationSuite #"local-cluster" test it turned out that client is not initialised as org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend#start isn't called. It throw an NPE and some resource remained in open.

## How was this patch tested?

By executing the unittests and using some extra temporary logging for counting created and closed TransportClient/TransportServer instances.

Closes #23540 from attilapiros/leaks.

Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../cluster/StandaloneSchedulerBackend.scala  |   5 +-
 .../SparkContextSchedulerCreationSuite.scala  | 103 +++++++------
 .../spark/deploy/client/AppClientSuite.scala  |  75 +++++-----
 .../spark/deploy/master/MasterSuite.scala     | 111 +++++++-------
 .../spark/storage/BlockManagerSuite.scala     | 138 ++++++++----------
 5 files changed, 228 insertions(+), 204 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 66080b6e6b4f..e0605fee9cbf 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -224,8 +224,9 @@ private[spark] class StandaloneSchedulerBackend(
     if (stopping.compareAndSet(false, true)) {
       try {
         super.stop()
-        client.stop()
-
+        if (client != null) {
+          client.stop()
+        }
         val callback = shutdownCallback
         if (callback != null) {
           callback(this)
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
index f8938dfedee5..811b9757232e 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -23,110 +23,129 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.{SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
+import org.apache.spark.util.Utils
 
 
 class SparkContextSchedulerCreationSuite
   extends SparkFunSuite with LocalSparkContext with PrivateMethodTester with Logging {
 
-  def createTaskScheduler(master: String): TaskSchedulerImpl =
-    createTaskScheduler(master, "client")
+  def noOp(taskSchedulerImpl: TaskSchedulerImpl): Unit = {}
 
-  def createTaskScheduler(master: String, deployMode: String): TaskSchedulerImpl =
-    createTaskScheduler(master, deployMode, new SparkConf())
+  def createTaskScheduler(master: String)(body: TaskSchedulerImpl => Unit = noOp): Unit =
+    createTaskScheduler(master, "client")(body)
+
+  def createTaskScheduler(master: String, deployMode: String)(
+      body: TaskSchedulerImpl => Unit): Unit =
+    createTaskScheduler(master, deployMode, new SparkConf())(body)
 
   def createTaskScheduler(
       master: String,
       deployMode: String,
-      conf: SparkConf): TaskSchedulerImpl = {
+      conf: SparkConf)(body: TaskSchedulerImpl => Unit): Unit = {
     // Create local SparkContext to setup a SparkEnv. We don't actually want to start() the
     // real schedulers, so we don't want to create a full SparkContext with the desired scheduler.
     sc = new SparkContext("local", "test", conf)
     val createTaskSchedulerMethod =
       PrivateMethod[Tuple2[SchedulerBackend, TaskScheduler]]('createTaskScheduler)
-    val (_, sched) = SparkContext invokePrivate createTaskSchedulerMethod(sc, master, deployMode)
-    sched.asInstanceOf[TaskSchedulerImpl]
+    val (_, sched) =
+      SparkContext invokePrivate createTaskSchedulerMethod(sc, master, deployMode)
+    try {
+      body(sched.asInstanceOf[TaskSchedulerImpl])
+    } finally {
+      Utils.tryLogNonFatalError {
+        sched.stop()
+      }
+    }
   }
 
   test("bad-master") {
     val e = intercept[SparkException] {
-      createTaskScheduler("localhost:1234")
+      createTaskScheduler("localhost:1234")()
     }
     assert(e.getMessage.contains("Could not parse Master URL"))
   }
 
   test("local") {
-    val sched = createTaskScheduler("local")
-    sched.backend match {
-      case s: LocalSchedulerBackend => assert(s.totalCores === 1)
-      case _ => fail()
+    val sched = createTaskScheduler("local") { sched =>
+      sched.backend match {
+        case s: LocalSchedulerBackend => assert(s.totalCores === 1)
+        case _ => fail()
+      }
     }
   }
 
   test("local-*") {
-    val sched = createTaskScheduler("local[*]")
-    sched.backend match {
-      case s: LocalSchedulerBackend =>
-        assert(s.totalCores === Runtime.getRuntime.availableProcessors())
-      case _ => fail()
+    val sched = createTaskScheduler("local[*]") { sched =>
+      sched.backend match {
+        case s: LocalSchedulerBackend =>
+          assert(s.totalCores === Runtime.getRuntime.availableProcessors())
+        case _ => fail()
+      }
     }
   }
 
   test("local-n") {
-    val sched = createTaskScheduler("local[5]")
-    assert(sched.maxTaskFailures === 1)
-    sched.backend match {
-      case s: LocalSchedulerBackend => assert(s.totalCores === 5)
-      case _ => fail()
+    val sched = createTaskScheduler("local[5]") { sched =>
+      assert(sched.maxTaskFailures === 1)
+      sched.backend match {
+        case s: LocalSchedulerBackend => assert(s.totalCores === 5)
+        case _ => fail()
+      }
     }
   }
 
   test("local-*-n-failures") {
-    val sched = createTaskScheduler("local[* ,2]")
-    assert(sched.maxTaskFailures === 2)
-    sched.backend match {
-      case s: LocalSchedulerBackend =>
-        assert(s.totalCores === Runtime.getRuntime.availableProcessors())
-      case _ => fail()
+    val sched = createTaskScheduler("local[* ,2]") { sched =>
+      assert(sched.maxTaskFailures === 2)
+      sched.backend match {
+        case s: LocalSchedulerBackend =>
+          assert(s.totalCores === Runtime.getRuntime.availableProcessors())
+        case _ => fail()
+      }
     }
   }
 
   test("local-n-failures") {
-    val sched = createTaskScheduler("local[4, 2]")
-    assert(sched.maxTaskFailures === 2)
-    sched.backend match {
-      case s: LocalSchedulerBackend => assert(s.totalCores === 4)
-      case _ => fail()
+    val sched = createTaskScheduler("local[4, 2]") { sched =>
+      assert(sched.maxTaskFailures === 2)
+      sched.backend match {
+        case s: LocalSchedulerBackend => assert(s.totalCores === 4)
+        case _ => fail()
+      }
     }
   }
 
   test("bad-local-n") {
     val e = intercept[SparkException] {
-      createTaskScheduler("local[2*]")
+      createTaskScheduler("local[2*]")()
     }
     assert(e.getMessage.contains("Could not parse Master URL"))
   }
 
   test("bad-local-n-failures") {
     val e = intercept[SparkException] {
-      createTaskScheduler("local[2*,4]")
+      createTaskScheduler("local[2*,4]")()
     }
     assert(e.getMessage.contains("Could not parse Master URL"))
   }
 
   test("local-default-parallelism") {
     val conf = new SparkConf().set("spark.default.parallelism", "16")
-    val sched = createTaskScheduler("local", "client", conf)
 
-    sched.backend match {
-      case s: LocalSchedulerBackend => assert(s.defaultParallelism() === 16)
-      case _ => fail()
+    val sched = createTaskScheduler("local", "client", conf) { sched =>
+      sched.backend match {
+        case s: LocalSchedulerBackend => assert(s.defaultParallelism() === 16)
+        case _ => fail()
+      }
     }
   }
 
   test("local-cluster") {
-    createTaskScheduler("local-cluster[3, 14, 1024]").backend match {
-      case s: StandaloneSchedulerBackend => // OK
-      case _ => fail()
+    createTaskScheduler("local-cluster[3, 14, 1024]") { sched =>
+      sched.backend match {
+        case s: StandaloneSchedulerBackend => // OK
+        case _ => fail()
+      }
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index a1707e6540b3..baeefea3158e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.deploy.client
 
+import java.io.Closeable
 import java.util.concurrent.ConcurrentLinkedQueue
 
 import scala.concurrent.duration._
@@ -85,57 +86,59 @@ class AppClientSuite
   }
 
   test("interface methods of AppClient using local Master") {
-    val ci = new AppClientInst(masterRpcEnv.address.toSparkURL)
+    Utils.tryWithResource(new AppClientInst(masterRpcEnv.address.toSparkURL)) { ci =>
 
-    ci.client.start()
+      ci.client.start()
 
-    // Client should connect with one Master which registers the application
-    eventually(timeout(10.seconds), interval(10.millis)) {
-      val apps = getApplications()
-      assert(ci.listener.connectedIdList.size === 1, "client listener should have one connection")
-      assert(apps.size === 1, "master should have 1 registered app")
-    }
+      // Client should connect with one Master which registers the application
+      eventually(timeout(10.seconds), interval(10.millis)) {
+        val apps = getApplications()
+        assert(ci.listener.connectedIdList.size === 1, "client listener should have one connection")
+        assert(apps.size === 1, "master should have 1 registered app")
+      }
 
-    // Send message to Master to request Executors, verify request by change in executor limit
-    val numExecutorsRequested = 1
-    whenReady(
+      // Send message to Master to request Executors, verify request by change in executor limit
+      val numExecutorsRequested = 1
+      whenReady(
         ci.client.requestTotalExecutors(numExecutorsRequested),
         timeout(10.seconds),
         interval(10.millis)) { acknowledged =>
-      assert(acknowledged)
-    }
+        assert(acknowledged)
+      }
 
-    eventually(timeout(10.seconds), interval(10.millis)) {
-      val apps = getApplications()
-      assert(apps.head.getExecutorLimit === numExecutorsRequested, s"executor request failed")
-    }
+      eventually(timeout(10.seconds), interval(10.millis)) {
+        val apps = getApplications()
+        assert(apps.head.getExecutorLimit === numExecutorsRequested, s"executor request failed")
+      }
 
-    // Send request to kill executor, verify request was made
-    val executorId: String = getApplications().head.executors.head._2.fullId
-    whenReady(
+      // Send request to kill executor, verify request was made
+      val executorId: String = getApplications().head.executors.head._2.fullId
+      whenReady(
         ci.client.killExecutors(Seq(executorId)),
         timeout(10.seconds),
         interval(10.millis)) { acknowledged =>
-      assert(acknowledged)
-    }
+        assert(acknowledged)
+      }
 
-    // Issue stop command for Client to disconnect from Master
-    ci.client.stop()
+      // Issue stop command for Client to disconnect from Master
+      ci.client.stop()
 
-    // Verify Client is marked dead and unregistered from Master
-    eventually(timeout(10.seconds), interval(10.millis)) {
-      val apps = getApplications()
-      assert(ci.listener.deadReasonList.size === 1, "client should have been marked dead")
-      assert(apps.isEmpty, "master should have 0 registered apps")
+      // Verify Client is marked dead and unregistered from Master
+      eventually(timeout(10.seconds), interval(10.millis)) {
+        val apps = getApplications()
+        assert(ci.listener.deadReasonList.size === 1, "client should have been marked dead")
+        assert(apps.isEmpty, "master should have 0 registered apps")
+      }
     }
   }
 
   test("request from AppClient before initialized with master") {
-    val ci = new AppClientInst(masterRpcEnv.address.toSparkURL)
+    Utils.tryWithResource(new AppClientInst(masterRpcEnv.address.toSparkURL)) { ci =>
 
-    // requests to master should fail immediately
-    whenReady(ci.client.requestTotalExecutors(3), timeout(1.seconds)) { success =>
-      assert(success === false)
+      // requests to master should fail immediately
+      whenReady(ci.client.requestTotalExecutors(3), timeout(1.seconds)) { success =>
+        assert(success === false)
+      }
     }
   }
 
@@ -219,13 +222,17 @@ class AppClientSuite
   }
 
   /** Create AppClient and supporting objects */
-  private class AppClientInst(masterUrl: String) {
+  private class AppClientInst(masterUrl: String) extends Closeable {
     val rpcEnv = RpcEnv.create("spark", Utils.localHostName(), 0, conf, securityManager)
     private val cmd = new Command(TestExecutor.getClass.getCanonicalName.stripSuffix("$"),
       List(), Map(), Seq(), Seq(), Seq())
     private val desc = new ApplicationDescription("AppClientSuite", Some(1), 512, cmd, "ignored")
     val listener = new AppClientCollector
     val client = new StandaloneAppClient(rpcEnv, Array(masterUrl), desc, listener, new SparkConf)
+
+    override def close(): Unit = {
+      rpcEnv.shutdown()
+    }
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index f788db78f913..5904d03a2d34 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -644,59 +644,70 @@ class MasterSuite extends SparkFunSuite
       val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
       assert(masterState.status === RecoveryState.ALIVE, "Master is not alive")
     }
-    val worker1 = new MockWorker(master.self)
-    worker1.rpcEnv.setupEndpoint("worker", worker1)
-    val worker1Reg = RegisterWorker(
-      worker1.id,
-      "localhost",
-      9998,
-      worker1.self,
-      10,
-      1024,
-      "http://localhost:8080",
-      RpcAddress("localhost2", 10000))
-    master.self.send(worker1Reg)
-    val driver = DeployTestUtils.createDriverDesc().copy(supervise = true)
-    master.self.askSync[SubmitDriverResponse](RequestSubmitDriver(driver))
-
-    eventually(timeout(10.seconds)) {
-      assert(worker1.apps.nonEmpty)
-    }
-
-    eventually(timeout(10.seconds)) {
-      val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
-      assert(masterState.workers(0).state == WorkerState.DEAD)
-    }
+    var worker1: MockWorker = null
+    var worker2: MockWorker = null
+    try {
+      worker1 = new MockWorker(master.self)
+      worker1.rpcEnv.setupEndpoint("worker", worker1)
+      val worker1Reg = RegisterWorker(
+        worker1.id,
+        "localhost",
+        9998,
+        worker1.self,
+        10,
+        1024,
+        "http://localhost:8080",
+        RpcAddress("localhost2", 10000))
+      master.self.send(worker1Reg)
+      val driver = DeployTestUtils.createDriverDesc().copy(supervise = true)
+      master.self.askSync[SubmitDriverResponse](RequestSubmitDriver(driver))
+
+      eventually(timeout(10.seconds)) {
+        assert(worker1.apps.nonEmpty)
+      }
 
-    val worker2 = new MockWorker(master.self)
-    worker2.rpcEnv.setupEndpoint("worker", worker2)
-    master.self.send(RegisterWorker(
-      worker2.id,
-      "localhost",
-      9999,
-      worker2.self,
-      10,
-      1024,
-      "http://localhost:8081",
-      RpcAddress("localhost", 10001)))
-    eventually(timeout(10.seconds)) {
-      assert(worker2.apps.nonEmpty)
-    }
+      eventually(timeout(10.seconds)) {
+        val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
+        assert(masterState.workers(0).state == WorkerState.DEAD)
+      }
 
-    master.self.send(worker1Reg)
-    eventually(timeout(10.seconds)) {
-      val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
+      worker2 = new MockWorker(master.self)
+      worker2.rpcEnv.setupEndpoint("worker", worker2)
+      master.self.send(RegisterWorker(
+        worker2.id,
+        "localhost",
+        9999,
+        worker2.self,
+        10,
+        1024,
+        "http://localhost:8081",
+        RpcAddress("localhost", 10001)))
+      eventually(timeout(10.seconds)) {
+        assert(worker2.apps.nonEmpty)
+      }
 
-      val worker = masterState.workers.filter(w => w.id == worker1.id)
-      assert(worker.length == 1)
-      // make sure the `DriverStateChanged` arrives at Master.
-      assert(worker(0).drivers.isEmpty)
-      assert(worker1.apps.isEmpty)
-      assert(worker1.drivers.isEmpty)
-      assert(worker2.apps.size == 1)
-      assert(worker2.drivers.size == 1)
-      assert(masterState.activeDrivers.length == 1)
-      assert(masterState.activeApps.length == 1)
+      master.self.send(worker1Reg)
+      eventually(timeout(10.seconds)) {
+        val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
+
+        val worker = masterState.workers.filter(w => w.id == worker1.id)
+        assert(worker.length == 1)
+        // make sure the `DriverStateChanged` arrives at Master.
+        assert(worker(0).drivers.isEmpty)
+        assert(worker1.apps.isEmpty)
+        assert(worker1.drivers.isEmpty)
+        assert(worker2.apps.size == 1)
+        assert(worker2.drivers.size == 1)
+        assert(masterState.activeDrivers.length == 1)
+        assert(masterState.activeApps.length == 1)
+      }
+    } finally {
+      if (worker1 != null) {
+        worker1.rpcEnv.shutdown()
+      }
+      if (worker2 != null) {
+        worker2.rpcEnv.shutdown()
+      }
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index cafd980a6233..233a84e63ad0 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -46,7 +46,6 @@ import org.apache.spark.network.netty.{NettyBlockTransferService, SparkTransport
 import org.apache.spark.network.server.{NoOpRpcHandler, TransportServer, TransportServerBootstrap}
 import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager}
 import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, RegisterExecutor}
-import org.apache.spark.network.util.TransportConf
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.security.{CryptoStreamUtils, EncryptionFunSuite}
@@ -66,9 +65,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   implicit val defaultSignaler: Signaler = ThreadSignaler
 
   var conf: SparkConf = null
-  var store: BlockManager = null
-  var store2: BlockManager = null
-  var store3: BlockManager = null
+  val allStores = ArrayBuffer[BlockManager]()
   var rpcEnv: RpcEnv = null
   var master: BlockManagerMaster = null
   val securityMgr = new SecurityManager(new SparkConf(false))
@@ -106,6 +103,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val blockManager = new BlockManager(name, rpcEnv, master, serializerManager, bmConf,
       memManager, mapOutputTracker, shuffleManager, transfer, bmSecurityMgr, 0)
     memManager.setMemoryStore(blockManager.memoryStore)
+    allStores += blockManager
     blockManager.initialize("app-id")
     blockManager
   }
@@ -140,18 +138,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   override def afterEach(): Unit = {
     try {
       conf = null
-      if (store != null) {
-        store.stop()
-        store = null
-      }
-      if (store2 != null) {
-        store2.stop()
-        store2 = null
-      }
-      if (store3 != null) {
-        store3.stop()
-        store3 = null
-      }
+      allStores.foreach(_.stop())
+      allStores.clear()
       rpcEnv.shutdown()
       rpcEnv.awaitTermination()
       rpcEnv = null
@@ -161,6 +149,11 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     }
   }
 
+  private def stopBlockManager(blockManager: BlockManager): Unit = {
+    allStores -= blockManager
+    blockManager.stop()
+  }
+
   test("StorageLevel object caching") {
     val level1 = StorageLevel(false, false, false, 3)
     // this should return the same object as level1
@@ -204,7 +197,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("master + 1 manager interaction") {
-    store = makeBlockManager(20000)
+    val store = makeBlockManager(20000)
     val a1 = new Array[Byte](4000)
     val a2 = new Array[Byte](4000)
     val a3 = new Array[Byte](4000)
@@ -234,8 +227,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("master + 2 managers interaction") {
-    store = makeBlockManager(2000, "exec1")
-    store2 = makeBlockManager(2000, "exec2")
+    val store = makeBlockManager(2000, "exec1")
+    val store2 = makeBlockManager(2000, "exec2")
 
     val peers = master.getPeers(store.blockManagerId)
     assert(peers.size === 1, "master did not return the other manager as a peer")
@@ -250,7 +243,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("removing block") {
-    store = makeBlockManager(20000)
+    val store = makeBlockManager(20000)
     val a1 = new Array[Byte](4000)
     val a2 = new Array[Byte](4000)
     val a3 = new Array[Byte](4000)
@@ -298,7 +291,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("removing rdd") {
-    store = makeBlockManager(20000)
+    val store = makeBlockManager(20000)
     val a1 = new Array[Byte](4000)
     val a2 = new Array[Byte](4000)
     val a3 = new Array[Byte](4000)
@@ -331,7 +324,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("removing broadcast") {
-    store = makeBlockManager(2000)
+    val store = makeBlockManager(2000)
     val driverStore = store
     val executorStore = makeBlockManager(2000, "executor")
     val a1 = new Array[Byte](400)
@@ -397,11 +390,10 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     }
     executorStore.stop()
     driverStore.stop()
-    store = null
   }
 
   test("reregistration on heart beat") {
-    store = makeBlockManager(2000)
+    val store = makeBlockManager(2000)
     val a1 = new Array[Byte](400)
 
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
@@ -418,7 +410,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("reregistration on block update") {
-    store = makeBlockManager(2000)
+    val store = makeBlockManager(2000)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
 
@@ -436,7 +428,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("reregistration doesn't dead lock") {
-    store = makeBlockManager(2000)
+    val store = makeBlockManager(2000)
     val a1 = new Array[Byte](400)
     val a2 = List(new Array[Byte](400))
 
@@ -474,7 +466,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("correct BlockResult returned from get() calls") {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     val list1 = List(new Array[Byte](2000), new Array[Byte](2000))
     val list2 = List(new Array[Byte](500), new Array[Byte](1000), new Array[Byte](1500))
     val list1SizeEstimate = SizeEstimator.estimate(list1.iterator.toArray)
@@ -545,27 +537,25 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
 
   test("SPARK-9591: getRemoteBytes from another location when Exception throw") {
     conf.set("spark.shuffle.io.maxRetries", "0")
-    store = makeBlockManager(8000, "executor1")
-    store2 = makeBlockManager(8000, "executor2")
-    store3 = makeBlockManager(8000, "executor3")
+    val store = makeBlockManager(8000, "executor1")
+    val store2 = makeBlockManager(8000, "executor2")
+    val store3 = makeBlockManager(8000, "executor3")
     val list1 = List(new Array[Byte](4000))
     store2.putIterator(
       "list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     store3.putIterator(
       "list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(store.getRemoteBytes("list1").isDefined, "list1Get expected to be fetched")
-    store2.stop()
-    store2 = null
+    stopBlockManager(store2)
     assert(store.getRemoteBytes("list1").isDefined, "list1Get expected to be fetched")
-    store3.stop()
-    store3 = null
+    stopBlockManager(store3)
     // Should return None instead of throwing an exception:
     assert(store.getRemoteBytes("list1").isEmpty)
   }
 
   test("SPARK-14252: getOrElseUpdate should still read from remote storage") {
-    store = makeBlockManager(8000, "executor1")
-    store2 = makeBlockManager(8000, "executor2")
+    val store = makeBlockManager(8000, "executor1")
+    val store2 = makeBlockManager(8000, "executor2")
     val list1 = List(new Array[Byte](4000))
     store2.putIterator(
       "list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
@@ -593,7 +583,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   private def testInMemoryLRUStorage(storageLevel: StorageLevel): Unit = {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     val a1 = new Array[Byte](4000)
     val a2 = new Array[Byte](4000)
     val a3 = new Array[Byte](4000)
@@ -612,7 +602,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("in-memory LRU for partitions of same RDD") {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     val a1 = new Array[Byte](4000)
     val a2 = new Array[Byte](4000)
     val a3 = new Array[Byte](4000)
@@ -631,7 +621,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("in-memory LRU for partitions of multiple RDDs") {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     store.putSingle(rdd(0, 1), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(0, 2), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(1, 1), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
@@ -654,7 +644,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   encryptionTest("on-disk storage") { _conf =>
-    store = makeBlockManager(1200, testConf = Some(_conf))
+    val store = makeBlockManager(1200, testConf = Some(_conf))
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -694,7 +684,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       storageLevel: StorageLevel,
       getAsBytes: Boolean,
       testConf: SparkConf): Unit = {
-    store = makeBlockManager(12000, testConf = Some(testConf))
+    val store = makeBlockManager(12000, testConf = Some(testConf))
     val accessMethod =
       if (getAsBytes) store.getLocalBytesAndReleaseLock else store.getSingleAndReleaseLock
     val a1 = new Array[Byte](4000)
@@ -723,7 +713,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   encryptionTest("LRU with mixed storage levels") { _conf =>
-    store = makeBlockManager(12000, testConf = Some(_conf))
+    val store = makeBlockManager(12000, testConf = Some(_conf))
     val a1 = new Array[Byte](4000)
     val a2 = new Array[Byte](4000)
     val a3 = new Array[Byte](4000)
@@ -745,7 +735,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   encryptionTest("in-memory LRU with streams") { _conf =>
-    store = makeBlockManager(12000, testConf = Some(_conf))
+    val store = makeBlockManager(12000, testConf = Some(_conf))
     val list1 = List(new Array[Byte](2000), new Array[Byte](2000))
     val list2 = List(new Array[Byte](2000), new Array[Byte](2000))
     val list3 = List(new Array[Byte](2000), new Array[Byte](2000))
@@ -773,7 +763,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   encryptionTest("LRU with mixed storage levels and streams") { _conf =>
-    store = makeBlockManager(12000, testConf = Some(_conf))
+    val store = makeBlockManager(12000, testConf = Some(_conf))
     val list1 = List(new Array[Byte](2000), new Array[Byte](2000))
     val list2 = List(new Array[Byte](2000), new Array[Byte](2000))
     val list3 = List(new Array[Byte](2000), new Array[Byte](2000))
@@ -826,7 +816,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("overly large block") {
-    store = makeBlockManager(5000)
+    val store = makeBlockManager(5000)
     store.putSingle("a1", new Array[Byte](10000), StorageLevel.MEMORY_ONLY)
     assert(store.getSingleAndReleaseLock("a1") === None, "a1 was in store")
     store.putSingle("a2", new Array[Byte](10000), StorageLevel.MEMORY_AND_DISK)
@@ -837,13 +827,12 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   test("block compression") {
     try {
       conf.set("spark.shuffle.compress", "true")
-      store = makeBlockManager(20000, "exec1")
+      var store = makeBlockManager(20000, "exec1")
       store.putSingle(
         ShuffleBlockId(0, 0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(ShuffleBlockId(0, 0, 0)) <= 100,
         "shuffle_0_0_0 was not compressed")
-      store.stop()
-      store = null
+      stopBlockManager(store)
 
       conf.set("spark.shuffle.compress", "false")
       store = makeBlockManager(20000, "exec2")
@@ -851,8 +840,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         ShuffleBlockId(0, 0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(ShuffleBlockId(0, 0, 0)) >= 10000,
         "shuffle_0_0_0 was compressed")
-      store.stop()
-      store = null
+      stopBlockManager(store)
 
       conf.set(BROADCAST_COMPRESS, true)
       store = makeBlockManager(20000, "exec3")
@@ -860,37 +848,32 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         BroadcastBlockId(0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(BroadcastBlockId(0)) <= 1000,
         "broadcast_0 was not compressed")
-      store.stop()
-      store = null
+      stopBlockManager(store)
 
       conf.set(BROADCAST_COMPRESS, false)
       store = makeBlockManager(20000, "exec4")
       store.putSingle(
         BroadcastBlockId(0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(BroadcastBlockId(0)) >= 10000, "broadcast_0 was compressed")
-      store.stop()
-      store = null
+      stopBlockManager(store)
 
       conf.set(RDD_COMPRESS, true)
       store = makeBlockManager(20000, "exec5")
       store.putSingle(rdd(0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(rdd(0, 0)) <= 1000, "rdd_0_0 was not compressed")
-      store.stop()
-      store = null
+      stopBlockManager(store)
 
       conf.set(RDD_COMPRESS, false)
       store = makeBlockManager(20000, "exec6")
       store.putSingle(rdd(0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(rdd(0, 0)) >= 10000, "rdd_0_0 was compressed")
-      store.stop()
-      store = null
+      stopBlockManager(store)
 
       // Check that any other block types are also kept uncompressed
       store = makeBlockManager(20000, "exec7")
       store.putSingle("other_block", new Array[Byte](10000), StorageLevel.MEMORY_ONLY)
       assert(store.memoryStore.getSize("other_block") >= 10000, "other_block was compressed")
-      store.stop()
-      store = null
+      stopBlockManager(store)
     } finally {
       System.clearProperty("spark.shuffle.compress")
       System.clearProperty(BROADCAST_COMPRESS.key)
@@ -904,7 +887,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val transfer = new NettyBlockTransferService(conf, securityMgr, "localhost", "localhost", 0, 1)
     val memoryManager = UnifiedMemoryManager(conf, numCores = 1)
     val serializerManager = new SerializerManager(new JavaSerializer(conf), conf)
-    store = new BlockManager(SparkContext.DRIVER_IDENTIFIER, rpcEnv, master,
+    val store = new BlockManager(SparkContext.DRIVER_IDENTIFIER, rpcEnv, master,
       serializerManager, conf, memoryManager, mapOutputTracker,
       shuffleManager, transfer, securityMgr, 0)
     memoryManager.setMemoryStore(store.memoryStore)
@@ -926,7 +909,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   test("turn off updated block statuses") {
     val conf = new SparkConf()
     conf.set(TASK_METRICS_TRACK_UPDATED_BLOCK_STATUSES, false)
-    store = makeBlockManager(12000, testConf = Some(conf))
+    val store = makeBlockManager(12000, testConf = Some(conf))
 
     store.registerTask(0)
     val list = List.fill(2)(new Array[Byte](2000))
@@ -954,7 +937,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   test("updated block statuses") {
     val conf = new SparkConf()
     conf.set(TASK_METRICS_TRACK_UPDATED_BLOCK_STATUSES, true)
-    store = makeBlockManager(12000, testConf = Some(conf))
+    val store = makeBlockManager(12000, testConf = Some(conf))
     store.registerTask(0)
     val list = List.fill(2)(new Array[Byte](2000))
     val bigList = List.fill(8)(new Array[Byte](2000))
@@ -1052,7 +1035,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("query block statuses") {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     val list = List.fill(2)(new Array[Byte](2000))
 
     // Tell master. By LRU, only list2 and list3 remains.
@@ -1097,7 +1080,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("get matching blocks") {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     val list = List.fill(2)(new Array[Byte](100))
 
     // insert some blocks
@@ -1141,7 +1124,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("SPARK-1194 regression: fix the same-RDD rule for cache replacement") {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     store.putSingle(rdd(0, 0), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(1, 0), new Array[Byte](4000), StorageLevel.MEMORY_ONLY)
     // Access rdd_1_0 to ensure it's not least recently used.
@@ -1155,7 +1138,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("safely unroll blocks through putIterator (disk)") {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     val memoryStore = store.memoryStore
     val diskStore = store.diskStore
     val smallList = List.fill(40)(new Array[Byte](100))
@@ -1194,7 +1177,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("read-locked blocks cannot be evicted from memory") {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     val arr = new Array[Byte](4000)
     // First store a1 and a2, both in memory, and a3, on disk only
     store.putSingle("a1", arr, StorageLevel.MEMORY_ONLY_SER)
@@ -1220,7 +1203,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   private def testReadWithLossOfOnDiskFiles(
       storageLevel: StorageLevel,
       readMethod: BlockManager => Option[_]): Unit = {
-    store = makeBlockManager(12000)
+    val store = makeBlockManager(12000)
     assert(store.putSingle("blockId", new Array[Byte](4000), storageLevel))
     assert(store.getStatus("blockId").isDefined)
     // Directly delete all files from the disk store, triggering failures when reading blocks:
@@ -1260,7 +1243,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   test("SPARK-13328: refresh block locations (fetch should fail after hitting a threshold)") {
     val mockBlockTransferService =
       new MockBlockTransferService(conf.getInt("spark.block.failures.beforeLocationRefresh", 5))
-    store = makeBlockManager(8000, "executor1", transferService = Option(mockBlockTransferService))
+    val store =
+      makeBlockManager(8000, "executor1", transferService = Option(mockBlockTransferService))
     store.putSingle("item", 999L, StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(store.getRemoteBytes("item").isEmpty)
   }
@@ -1280,7 +1264,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     when(mockBlockManagerMaster.getLocations(mc.any[BlockId])).thenReturn(
       blockManagerIds)
 
-    store = makeBlockManager(8000, "executor1", mockBlockManagerMaster,
+    val store = makeBlockManager(8000, "executor1", mockBlockManagerMaster,
       transferService = Option(mockBlockTransferService))
     val block = store.getRemoteBytes("item")
       .asInstanceOf[Option[ByteBuffer]]
@@ -1301,8 +1285,10 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         throw new InterruptedException("Intentional interrupt")
       }
     }
-    store = makeBlockManager(8000, "executor1", transferService = Option(mockBlockTransferService))
-    store2 = makeBlockManager(8000, "executor2", transferService = Option(mockBlockTransferService))
+    val store =
+      makeBlockManager(8000, "executor1", transferService = Option(mockBlockTransferService))
+    val store2 =
+      makeBlockManager(8000, "executor2", transferService = Option(mockBlockTransferService))
     intercept[InterruptedException] {
       store.putSingle("item", "value", StorageLevel.MEMORY_ONLY_2, tellMaster = true)
     }
@@ -1312,8 +1298,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   }
 
   test("SPARK-17484: master block locations are updated following an invalid remote block fetch") {
-    store = makeBlockManager(8000, "executor1")
-    store2 = makeBlockManager(8000, "executor2")
+    val store = makeBlockManager(8000, "executor1")
+    val store2 = makeBlockManager(8000, "executor2")
     store.putSingle("item", "value", StorageLevel.MEMORY_ONLY, tellMaster = true)
     assert(master.getLocations("item").nonEmpty)
     store.removeBlock("item", tellMaster = false)
@@ -1410,7 +1396,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       Option(BlockLocationsAndStatus(blockLocations, blockStatus)))
     when(mockBlockManagerMaster.getLocations(mc.any[BlockId])).thenReturn(blockLocations)
 
-    store = makeBlockManager(8000, "executor1", mockBlockManagerMaster,
+    val store = makeBlockManager(8000, "executor1", mockBlockManagerMaster,
       transferService = Option(mockBlockTransferService))
     val block = store.getRemoteBytes("item")
       .asInstanceOf[Option[ByteBuffer]]

From e92088de4d6755f975eb8b44b4d75b81e5a0720e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 16 Jan 2019 23:23:36 +0800
Subject: [PATCH 0302/1072] [MINOR][PYTHON] Fix SQLContext to SparkSession in
 Python API main page

## What changes were proposed in this pull request?

This PR proposes to fix deprecated `SQLContext` to `SparkSession` in Python API main page.

**Before:**

![screen shot 2019-01-16 at 5 30 19 pm](https://user-images.githubusercontent.com/6477701/51239583-bac82f80-19b4-11e9-9129-8dae2c23ec79.png)

**After:**

![screen shot 2019-01-16 at 5 29 54 pm](https://user-images.githubusercontent.com/6477701/51239577-b734a880-19b4-11e9-8539-592cb772168d.png)

## How was this patch tested?

Manually checked the doc after building it.
I also checked by `grep -r "SQLContext"` and looks this is the only instance left.

Closes #23565 from HyukjinKwon/minor-doc-change.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/docs/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/docs/index.rst b/python/docs/index.rst
index 421c8de86a3c..0e7b62361802 100644
--- a/python/docs/index.rst
+++ b/python/docs/index.rst
@@ -37,7 +37,7 @@ Core classes:
 
     A Discretized Stream (DStream), the basic abstraction in Spark Streaming.
 
-    :class:`pyspark.sql.SQLContext`
+    :class:`pyspark.sql.SparkSession`
 
     Main entry point for DataFrame and SQL functionality.
 

From 670bc55f8d357a5cd894e290cc2834e952a7cfe0 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 16 Jan 2019 23:25:57 +0800
Subject: [PATCH 0303/1072] [SPARK-25992][PYTHON] Document SparkContext cannot
 be shared for multiprocessing

## What changes were proposed in this pull request?

This PR proposes to explicitly document that SparkContext cannot be shared for multiprocessing, and multi-processing execution is not guaranteed in PySpark.

I have seen some cases that users attempt to use multiple processes via `multiprocessing` module time to time. For instance, see the example in the JIRA (https://issues.apache.org/jira/browse/SPARK-25992).

Py4J itself does not support Python's multiprocessing out of the box (sharing the same JavaGateways for instance).

In general, such pattern can cause errors with somewhat arbitrary symptoms difficult to diagnose. For instance, see the error message in JIRA:

```
Traceback (most recent call last):
File "/Users/abdealijk/anaconda3/lib/python3.6/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
File "/Users/abdealijk/anaconda3/lib/python3.6/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
File "/Users/abdealijk/anaconda3/lib/python3.6/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
File "/Users/abdealijk/anaconda3/lib/python3.6/socketserver.py", line 696, in __init__
    self.handle()
File "/usr/local/hadoop/spark2.3.1/python/pyspark/accumulators.py", line 238, in handle
    _accumulatorRegistry[aid] += update
KeyError: 0
```

The root cause of this was because global `_accumulatorRegistry` is not shared across processes.

Using thread instead of process is quite easy in Python. See `threading` vs `multiprocessing` in Python - they can be usually direct replacement for each other. For instance, Python also support threadpool as well (`multiprocessing.pool.ThreadPool`) which can be direct replacement of process-based thread pool (`multiprocessing.Pool`).

## How was this patch tested?

Manually tested, and manually built the doc.

Closes #23564 from HyukjinKwon/SPARK-25992.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/context.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 316fbc8bfda5..e7923009dab8 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -66,6 +66,10 @@ class SparkContext(object):
 
     .. note:: Only one :class:`SparkContext` should be active per JVM. You must `stop()`
         the active :class:`SparkContext` before creating a new one.
+
+    .. note:: :class:`SparkContext` instance is not supported to share across multiple
+        processes out of the box, and PySpark does not guarantee multi-processing execution.
+        Use threads instead for concurrent processing purpose.
     """
 
     _gateway = None

From 06d5b173b687c23aa53e293ed6e12ec746393876 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 16 Jan 2019 09:42:14 -0800
Subject: [PATCH 0304/1072] [SPARK-26629][SS] Fixed error with multiple file
 stream in a query + restart on a batch that has no data for one file stream

## What changes were proposed in this pull request?
When a streaming query has multiple file streams, and there is a batch where one of the file streams dont have data in that batch, then if the query has to restart from that, it will throw the following error.
```
java.lang.IllegalStateException: batch 1 doesn't exist
	at org.apache.spark.sql.execution.streaming.HDFSMetadataLog$.verifyBatchIds(HDFSMetadataLog.scala:300)
	at org.apache.spark.sql.execution.streaming.FileStreamSourceLog.get(FileStreamSourceLog.scala:120)
	at org.apache.spark.sql.execution.streaming.FileStreamSource.getBatch(FileStreamSource.scala:181)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$populateStartOffsets$2.apply(MicroBatchExecution.scala:294)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$populateStartOffsets$2.apply(MicroBatchExecution.scala:291)
	at scala.collection.Iterator$class.foreach(Iterator.scala:891)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
	at org.apache.spark.sql.execution.streaming.StreamProgress.foreach(StreamProgress.scala:25)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.org$apache$spark$sql$execution$streaming$MicroBatchExecution$$populateStartOffsets(MicroBatchExecution.scala:291)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply$mcV$sp(MicroBatchExecution.scala:178)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:175)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:175)
	at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:251)
	at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:61)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1.apply$mcZ$sp(MicroBatchExecution.scala:175)
	at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:56)
	at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:169)
	at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:295)
	at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:205)
```

Existing `HDFSMetadata.verifyBatchIds` threw error whenever the `batchIds` list was empty. In the context of `FileStreamSource.getBatch` (where verify is called) and `FileStreamSourceLog` (subclass of `HDFSMetadata`), this is usually okay because, in a streaming query with one file stream, the `batchIds` can never be empty:
- A batch is planned only when the `FileStreamSourceLog` has seen new offset (that is, there are new data files).
- So `FileStreamSource.getBatch` will be called on X to Y where X will always be > Y. This calls internally`HDFSMetadata.verifyBatchIds (X+1, Y)` with X+1-Y ids.

For example.,`FileStreamSource.getBatch(4, 5)` will call `verify(batchIds = Seq(5), start = 5, end = 5)`. However, the invariant of X > Y is not true when there are two file stream sources, as a batch may be planned even when only one of the file streams has data. So one of the file stream may not have data, which can call `FileStreamSource.getBatch(X, X)` -> `verify(batchIds = Seq.empty, start = X+1, end = X)` -> failure.

Note that `FileStreamSource.getBatch(X, X)` gets called **only when restarting a query in a batch where a file source did not have data**. This is because in normal planning of batches, `MicroBatchExecution` avoids calling `FileStreamSource.getBatch(X, X)` when offset X has not changed. However, when restarting a stream at such a batch, `MicroBatchExecution.populateStartOffsets()` calls `FileStreamSource.getBatch(X, X)` (DataSource V1 hack to initialize the source with last known offsets) thus hitting this issue.

The minimum solution here is to skip verification when `FileStreamSource.getBatch(X, X)`.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23557 from tdas/SPARK-26629.

Authored-by: Tathagata Das <tathagata.das1565@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../streaming/FileStreamSourceLog.scala       |  4 +-
 .../execution/streaming/HDFSMetadataLog.scala |  3 +-
 .../streaming/HDFSMetadataLogSuite.scala      |  6 ++
 .../sql/streaming/FileStreamSourceSuite.scala | 75 +++++++++++++++++--
 4 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
index 8628471fdb92..7b2ea9627a98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
@@ -117,7 +117,9 @@ class FileStreamSourceLog(
 
     val batches =
       (existedBatches ++ retrievedBatches).map(i => i._1 -> i._2.get).toArray.sortBy(_._1)
-    HDFSMetadataLog.verifyBatchIds(batches.map(_._1), startId, endId)
+    if (startBatchId <= endBatchId) {
+      HDFSMetadataLog.verifyBatchIds(batches.map(_._1), startId, endId)
+    }
     batches
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index bd0a46115ceb..62d524ff19b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -262,7 +262,8 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
 object HDFSMetadataLog {
 
   /**
-   * Verify if batchIds are continuous and between `startId` and `endId`.
+   * Verify if batchIds are continuous and between `startId` and `endId` (both inclusive and
+   * startId assumed to be <= endId).
    *
    * @param batchIds the sorted ids to verify.
    * @param startId the start id. If it's set, batchIds should start with this id.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index 9268306ce427..0e36e7f5da12 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -178,5 +178,11 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
     intercept[IllegalStateException](verifyBatchIds(Seq(2, 3, 4), None, Some(5L)))
     intercept[IllegalStateException](verifyBatchIds(Seq(2, 3, 4), Some(1L), Some(5L)))
     intercept[IllegalStateException](verifyBatchIds(Seq(1, 2, 4, 5), Some(1L), Some(5L)))
+
+    // Related to SPARK-26629, this capatures the behavior for verifyBatchIds when startId > endId
+    intercept[IllegalStateException](verifyBatchIds(Seq(), Some(2L), Some(1L)))
+    intercept[AssertionError](verifyBatchIds(Seq(2), Some(2L), Some(1L)))
+    intercept[AssertionError](verifyBatchIds(Seq(1), Some(2L), Some(1L)))
+    intercept[AssertionError](verifyBatchIds(Seq(0), Some(2L), Some(1L)))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index de664cafed3b..9235c6d7c896 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -48,21 +48,33 @@ abstract class FileStreamSourceTest
    * `FileStreamSource` actually being used in the execution.
    */
   abstract class AddFileData extends AddData {
+    private val _qualifiedBasePath = PrivateMethod[Path]('qualifiedBasePath)
+
+    private def isSamePath(fileSource: FileStreamSource, srcPath: File): Boolean = {
+      val path = (fileSource invokePrivate _qualifiedBasePath()).toString.stripPrefix("file:")
+      path == srcPath.getCanonicalPath
+    }
+
     override def addData(query: Option[StreamExecution]): (Source, Offset) = {
       require(
         query.nonEmpty,
         "Cannot add data when there is no query for finding the active file stream source")
 
       val sources = getSourcesFromStreamingQuery(query.get)
-      if (sources.isEmpty) {
+      val source = if (sources.isEmpty) {
         throw new Exception(
           "Could not find file source in the StreamExecution logical plan to add data to")
-      } else if (sources.size > 1) {
-        throw new Exception(
-          "Could not select the file source in the StreamExecution logical plan as there" +
-            "are multiple file sources:\n\t" + sources.mkString("\n\t"))
+      } else if (sources.size == 1) {
+        sources.head
+      } else {
+        val matchedSources = sources.filter(isSamePath(_, src))
+        if (matchedSources.size != 1) {
+          throw new Exception(
+            "Could not select the file source in StreamExecution as there are multiple" +
+              s" file sources and none / more than one matches $src:\n" + sources.mkString("\n"))
+        }
+        matchedSources.head
       }
-      val source = sources.head
       val newOffset = source.withBatchingLocked {
         addData(source)
         new FileStreamSourceOffset(source.currentLogOffset + 1)
@@ -71,6 +83,9 @@ abstract class FileStreamSourceTest
       (source, newOffset)
     }
 
+    /** Source directory to add file data to */
+    protected def src: File
+
     protected def addData(source: FileStreamSource): Unit
   }
 
@@ -1494,6 +1509,54 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       newSource.getBatch(None, FileStreamSourceOffset(1))
     }
   }
+
+  test("SPARK-26629: multiple file sources work with restarts when a source does not have data") {
+    withTempDirs { case (dir, tmp) =>
+      val sourceDir1 = new File(dir, "source1")
+      val sourceDir2 = new File(dir, "source2")
+      sourceDir1.mkdirs()
+      sourceDir2.mkdirs()
+
+      val source1 = createFileStream("text", s"${sourceDir1.getCanonicalPath}")
+      val source2 = createFileStream("text", s"${sourceDir2.getCanonicalPath}")
+      val unioned = source1.union(source2)
+
+      def addMultiTextFileData(
+          source1Content: String,
+          source2Content: String): StreamAction = {
+        val actions = Seq(
+          AddTextFileData(source1Content, sourceDir1, tmp),
+          AddTextFileData(source2Content, sourceDir2, tmp)
+        ).filter(_.content != null)  // don't write to a source dir if no content specified
+        StreamProgressLockedActions(actions, desc = actions.mkString("[ ", " | ", " ]"))
+      }
+
+      testStream(unioned)(
+        StartStream(),
+        addMultiTextFileData(source1Content = "source1_0", source2Content = "source2_0"),
+        CheckNewAnswer("source1_0", "source2_0"),
+        StopStream,
+
+        StartStream(),
+        addMultiTextFileData(source1Content = "source1_1", source2Content = null),
+        CheckNewAnswer("source1_1"),
+        StopStream,
+
+        // Restart after a batch with one file source having no new data.
+        // This restart is needed to hit the issue in SPARK-26629.
+
+        StartStream(),
+        addMultiTextFileData(source1Content = null, source2Content = "source2_2"),
+        CheckNewAnswer("source2_2"),
+        StopStream,
+
+        StartStream(),
+        addMultiTextFileData(source1Content = "source1_3", source2Content = "source2_3"),
+        CheckNewAnswer("source1_3", "source2_3"),
+        StopStream
+      )
+    }
+  }
 }
 
 class FileStreamSourceStressTestSuite extends FileStreamSourceTest {

From 190814e82eca3da450683685798d470712560a5d Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 16 Jan 2019 19:01:58 +0100
Subject: [PATCH 0305/1072] [SPARK-26550][SQL] New built-in datasource - noop

## What changes were proposed in this pull request?

In the PR, I propose new built-in datasource with name `noop` which can be used in:
- benchmarking to avoid additional overhead of actions and unnecessary type conversions
- caching of datasets/dataframes
- producing other side effects as a consequence of row materialisations like uploading data to a IO caches.

## How was this patch tested?

Added a test to check that datasource rows are materialised.

Closes #23471 from MaxGekk/none-datasource.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 ...pache.spark.sql.sources.DataSourceRegister |  1 +
 .../datasources/noop/NoopDataSource.scala     | 66 +++++++++++++++++++
 .../datasources/noop/NoopSuite.scala          | 62 +++++++++++++++++
 3 files changed, 129 insertions(+)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala

diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 1b37905543b4..7cdfddc5e7aa 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,6 +1,7 @@
 org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
 org.apache.spark.sql.execution.datasources.json.JsonFileFormat
+org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 org.apache.spark.sql.execution.datasources.text.TextFileFormat
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
new file mode 100644
index 000000000000..79e4c62a32bd
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.noop
+
+import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.sources.DataSourceRegister
+import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.types.StructType
+
+/**
+ * This is no-op datasource. It does not do anything besides consuming its input.
+ * This can be useful for benchmarking or to cache data without any additional overhead.
+ */
+class NoopDataSource
+  extends DataSourceV2
+  with TableProvider
+  with DataSourceRegister {
+
+  override def shortName(): String = "noop"
+  override def getTable(options: DataSourceOptions): Table = NoopTable
+}
+
+private[noop] object NoopTable extends Table with SupportsBatchWrite {
+  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = NoopWriteBuilder
+  override def name(): String = "noop-table"
+  override def schema(): StructType = new StructType()
+}
+
+private[noop] object NoopWriteBuilder extends WriteBuilder with SupportsSaveMode {
+  override def buildForBatch(): BatchWrite = NoopBatchWrite
+  override def mode(mode: SaveMode): WriteBuilder = this
+}
+
+private[noop] object NoopBatchWrite extends BatchWrite {
+  override def createBatchWriterFactory(): DataWriterFactory = NoopWriterFactory
+  override def commit(messages: Array[WriterCommitMessage]): Unit = {}
+  override def abort(messages: Array[WriterCommitMessage]): Unit = {}
+}
+
+private[noop] object NoopWriterFactory extends DataWriterFactory {
+  override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = NoopWriter
+}
+
+private[noop] object NoopWriter extends DataWriter[InternalRow] {
+  override def write(record: InternalRow): Unit = {}
+  override def commit(): WriterCommitMessage = null
+  override def abort(): Unit = {}
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
new file mode 100644
index 000000000000..59de28688ec1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.noop
+
+import org.apache.spark.sql.test.SharedSQLContext
+
+class NoopSuite extends SharedSQLContext {
+  import testImplicits._
+
+  test("materialisation of all rows") {
+    val numElems = 10
+    val accum = spark.sparkContext.longAccumulator
+    spark.range(numElems)
+      .map { x =>
+        accum.add(1)
+        x
+      }
+      .write
+      .format("noop")
+      .save()
+    assert(accum.value == numElems)
+  }
+
+  test("read partitioned data") {
+    val numElems = 100
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      spark.range(numElems)
+        .select('id mod 10 as "key", 'id as "value")
+        .write
+        .partitionBy("key")
+        .parquet(path)
+
+      val accum = spark.sparkContext.longAccumulator
+      spark.read
+        .parquet(path)
+        .as[(Long, Long)]
+        .map { x =>
+          accum.add(1)
+          x
+        }
+        .write.format("noop").save()
+      assert(accum.value == numElems)
+    }
+  }
+}
+

From 8f170787d24912c3a94ce1510197820c87df472a Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 16 Jan 2019 19:16:37 +0000
Subject: [PATCH 0306/1072] [SPARK-26619][SQL] Prune the unused serializers
 from SerializeFromObject

## What changes were proposed in this pull request?

`SerializeFromObject` now keeps all serializer expressions for domain object even when only part of output attributes are used by top plan.

We should be able to prune unused serializers from `SerializeFromObject` in such case.

## How was this patch tested?

Added tests.

Closes #23562 from viirya/SPARK-26619.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala      |  5 +++++
 .../sql/catalyst/optimizer/ColumnPruningSuite.scala   |  9 +++++++++
 .../scala/org/apache/spark/sql/DatasetSuite.scala     | 11 +++++++++++
 3 files changed, 25 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d92f7f860b1b..20f1221be425 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -561,6 +561,11 @@ object ColumnPruning extends Rule[LogicalPlan] {
     case d @ DeserializeToObject(_, _, child) if !child.outputSet.subsetOf(d.references) =>
       d.copy(child = prunedChild(child, d.references))
 
+    case p @ Project(_, s: SerializeFromObject) if p.references != s.outputSet =>
+      val usedRefs = p.references
+      val prunedSerializer = s.serializer.filter(usedRefs.contains)
+      p.copy(child = SerializeFromObject(prunedSerializer, s.child))
+
     // Prunes the unused columns from child of Aggregate/Expand/Generate/ScriptTransformation
     case a @ Aggregate(_, _, child) if !child.outputSet.subsetOf(a.references) =>
       a.copy(child = prunedChild(child, a.references))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 0cd6e092e203..73112e3d3bb4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -400,5 +400,14 @@ class ColumnPruningSuite extends PlanTest {
     comparePlans(optimized, expected)
   }
 
+  test("SPARK-26619: Prune the unused serializers from SerializeFromObject") {
+    val testRelation = LocalRelation('_1.int, '_2.int)
+    val serializerObject = CatalystSerde.serialize[(Int, Int)](
+      CatalystSerde.deserialize[(Int, Int)](testRelation))
+    val query = serializerObject.select('_1)
+    val optimized = Optimize.execute(query.analyze)
+    val expected = serializerObject.copy(serializer = Seq(serializerObject.serializer.head)).analyze
+    comparePlans(optimized, expected)
+  }
   // todo: add more tests for column pruning
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index c90b15814a53..fb8239ea34bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.ScroogeLikeExample
 import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
+import org.apache.spark.sql.catalyst.plans.logical.SerializeFromObject
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec}
@@ -1667,6 +1668,16 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val exceptDF = inputDF.filter(col("a").isin("0") or col("b") > "c")
     checkAnswer(inputDF.except(exceptDF), Seq(Row("1", null)))
   }
+
+  test("SPARK-26619: Prune the unused serializers from SerializeFromObjec") {
+    val data = Seq(("a", 1), ("b", 2), ("c", 3))
+    val ds = data.toDS().map(t => (t._1, t._2 + 1)).select("_1")
+    val serializer = ds.queryExecution.optimizedPlan.collect {
+      case s: SerializeFromObject => s
+    }.head
+    assert(serializer.serializer.size == 1)
+    checkAnswer(ds, Seq(Row("a"), Row("b"), Row("c")))
+  }
 }
 
 case class TestDataUnion(x: Int, y: Int, z: Int)

From 01301d09721cc12f1cc66ab52de3da117f5d33e6 Mon Sep 17 00:00:00 2001
From: Vinoo Ganesh <vganesh@palantir.com>
Date: Wed, 16 Jan 2019 11:43:10 -0800
Subject: [PATCH 0307/1072] [SPARK-26625] Add oauthToken to
 spark.redaction.regex

## What changes were proposed in this pull request?

The regex (spark.redaction.regex) that is used to decide which config properties or environment settings are sensitive should also include oauthToken to match  spark.kubernetes.authenticate.submission.oauthToken

## How was this patch tested?

Simple regex addition - happy to add a test if needed.

Author: Vinoo Ganesh <vganesh@palantir.com>

Closes #23555 from vinooganesh/vinooganesh/SPARK-26625.
---
 .../main/scala/org/apache/spark/internal/config/package.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 95becfa57e48..0e786375301d 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -512,7 +512,7 @@ package object config {
         "a property key or value, the value is redacted from the environment UI and various logs " +
         "like YARN and event logs.")
       .regexConf
-      .createWithDefault("(?i)secret|password".r)
+      .createWithDefault("(?i)secret|password|token".r)
 
   private[spark] val STRING_REDACTION_PATTERN =
     ConfigBuilder("spark.redaction.string.regex")

From dc3b35c5da42def803dd05e2db7506714018e27b Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Wed, 16 Jan 2019 15:21:11 -0800
Subject: [PATCH 0308/1072] [SPARK-26633][REPL] Add
 ExecutorClassLoader.getResourceAsStream

## What changes were proposed in this pull request?

Add `ExecutorClassLoader.getResourceAsStream`, so that classes dynamically generated by the REPL can be accessed by user code as `InputStream`s for non-class-loading purposes, such as reading the class file for extracting method/constructor parameter names.

Caveat: The convention in Java's `ClassLoader` is that `ClassLoader.getResourceAsStream()` should be considered as a convenience method of `ClassLoader.getResource()`, where the latter provides a `URL` for the resource, and the former invokes `openStream()` on it to serve the resource as an `InputStream`. The former should also catch `IOException` from `openStream()` and convert it to `null`.

This PR breaks this convention by only overriding `ClassLoader.getResourceAsStream()` instead of also overriding `ClassLoader.getResource()`, so after this PR, it would be possible to get a non-null result from the former, but get a null result from the latter. This isn't ideal, but it's sufficient to cover the main use case and practically it shouldn't matter.
To implement the convention properly, we'd need to register a URL protocol handler with Java to allow it to properly handle the `spark://` protocol, etc, which sounds like an overkill for the intent of this PR.

Credit goes to zsxwing for the initial investigation and fix suggestion.

## How was this patch tested?

Added new test case in `ExecutorClassLoaderSuite` and `ReplSuite`.

Closes #23558 from rednaxelafx/executorclassloader-getresourceasstream.

Authored-by: Kris Mok <kris.mok@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/repl/ExecutorClassLoader.scala      | 31 ++++++++++++++++--
 .../spark/repl/ExecutorClassLoaderSuite.scala | 11 +++++++
 .../org/apache/spark/repl/ReplSuite.scala     | 32 +++++++++++++++++++
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index 3176502b9e7c..177bce2f005f 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,8 +33,11 @@ import org.apache.spark.util.ParentClassLoader
 /**
  * A ClassLoader that reads classes from a Hadoop FileSystem or Spark RPC endpoint, used to load
  * classes defined by the interpreter when the REPL is used. Allows the user to specify if user
- * class path should be first. This class loader delegates getting/finding resources to parent
- * loader, which makes sense until REPL never provide resource dynamically.
+ * class path should be first.
+ * This class loader delegates getting/finding resources to parent loader, which makes sense because
+ * the REPL never produce resources dynamically. One exception is when getting a Class file as
+ * resource stream, in which case we will try to fetch the Class file in the same way as loading
+ * the class, so that dynamically generated Classes from the REPL can be picked up.
  *
  * Note: [[ClassLoader]] will preferentially load class from parent. Only when parent is null or
  * the load failed, that it will call the overridden `findClass` function. To avoid the potential
@@ -71,6 +74,30 @@ class ExecutorClassLoader(
     parentLoader.getResources(name)
   }
 
+  override def getResourceAsStream(name: String): InputStream = {
+    if (userClassPathFirst) {
+      val res = getClassResourceAsStreamLocally(name)
+      if (res != null) res else parentLoader.getResourceAsStream(name)
+    } else {
+      val res = parentLoader.getResourceAsStream(name)
+      if (res != null) res else getClassResourceAsStreamLocally(name)
+    }
+  }
+
+  private def getClassResourceAsStreamLocally(name: String): InputStream = {
+    // Class files can be dynamically generated from the REPL. Allow this class loader to
+    // load such files for purposes other than loading the class.
+    try {
+      if (name.endsWith(".class")) fetchFn(name) else null
+    } catch {
+      // The helper functions referenced by fetchFn throw CNFE to indicate failure to fetch
+      // the class. It matches what IOException was supposed to be used for, and
+      // ClassLoader.getResourceAsStream() catches IOException and returns null in that case.
+      // So we follow that model and handle CNFE here.
+      case _: ClassNotFoundException => null
+    }
+  }
+
   override def findClass(name: String): Class[_] = {
     if (userClassPathFirst) {
       findClassLocally(name).getOrElse(parentLoader.loadClass(name))
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index e9ed01ff2233..4752495e8e75 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -208,6 +208,17 @@ class ExecutorClassLoaderSuite
     intercept[java.lang.ClassNotFoundException] {
       classLoader.loadClass("ReplFakeClassDoesNotExist").getConstructor().newInstance()
     }
+
+    // classLoader.getResourceAsStream() should also be able to fetch the Class file
+    val fakeClassInputStream = classLoader.getResourceAsStream("ReplFakeClass2.class")
+    try {
+      val magic = new Array[Byte](4)
+      fakeClassInputStream.read(magic)
+      // first 4 bytes should match the magic number of Class file
+      assert(magic === Array[Byte](0xCA.toByte, 0xFE.toByte, 0xBA.toByte, 0xBE.toByte))
+    } finally {
+      if (fakeClassInputStream != null) fakeClassInputStream.close()
+    }
   }
 
 }
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 4f3df729177f..a46cb6b3f401 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -260,4 +260,36 @@ class ReplSuite extends SparkFunSuite {
     assertContains("!!2!!", output2)
   }
 
+  test("SPARK-26633: ExecutorClassLoader.getResourceAsStream find REPL classes") {
+    val output = runInterpreterInPasteMode("local-cluster[1,1,1024]",
+      """
+        |case class TestClass(value: Int)
+        |
+        |sc.parallelize(1 to 1).map { _ =>
+        |  val clz = classOf[TestClass]
+        |  val name = clz.getName.replace('.', '/') + ".class";
+        |  val stream = clz.getClassLoader.getResourceAsStream(name)
+        |  if (stream == null) {
+        |    "failed: stream is null"
+        |  } else {
+        |    val magic = new Array[Byte](4)
+        |    try {
+        |      stream.read(magic)
+        |      // the magic number of a Java Class file
+        |      val expected = Array[Byte](0xCA.toByte, 0xFE.toByte, 0xBA.toByte, 0xBE.toByte)
+        |      if (magic sameElements expected) {
+        |        "successful"
+        |      } else {
+        |        "failed: unexpected contents from stream"
+        |      }
+        |    } finally {
+        |      stream.close()
+        |    }
+        |  }
+        |}.collect()
+      """.stripMargin)
+    assertDoesNotContain("failed", output)
+    assertContains("successful", output)
+  }
+
 }

From 272428db6fabf43ceaa7d0ee9297916e5f9b5c24 Mon Sep 17 00:00:00 2001
From: Luca Canali <luca.canali@cern.ch>
Date: Wed, 16 Jan 2019 20:55:28 -0600
Subject: [PATCH 0309/1072] [SPARK-26600] Update spark-submit usage message

## What changes were proposed in this pull request?

Spark-submit usage message should be put in sync with recent changes in particular regarding K8S support. These are the proposed changes to the usage message:

--executor-cores NUM -> can be useed for Spark on YARN and K8S

--principal PRINCIPAL  and --keytab KEYTAB -> can be used for Spark on YARN and K8S

--total-executor-cores NUM> can be used for Spark standalone, YARN and K8S

In addition this PR proposes to remove certain implementation details from the --keytab argument description as the implementation details vary between YARN and K8S, for example.

## How was this patch tested?

Manually tested

Closes #23518 from LucaCanali/updateSparkSubmitArguments.

Authored-by: Luca Canali <luca.canali@cern.ch>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/deploy/SparkSubmitArguments.scala   | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 34facd5a58c4..f5e4c4a6732f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -576,27 +576,26 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |  --kill SUBMISSION_ID        If given, kills the driver specified.
         |  --status SUBMISSION_ID      If given, requests the status of the driver specified.
         |
-        | Spark standalone and Mesos only:
+        | Spark standalone, Mesos and Kubernetes only:
         |  --total-executor-cores NUM  Total cores for all executors.
         |
-        | Spark standalone and YARN only:
-        |  --executor-cores NUM        Number of cores per executor. (Default: 1 in YARN mode,
-        |                              or all available cores on the worker in standalone mode)
+        | Spark standalone, YARN and Kubernetes only:
+        |  --executor-cores NUM        Number of cores used by each executor. (Default: 1 in
+        |                              YARN and K8S modes, or all available cores on the worker
+        |                              in standalone mode).
         |
-        | YARN-only:
+        | Spark on YARN and Kubernetes only:
+        |  --principal PRINCIPAL       Principal to be used to login to KDC.
+        |  --keytab KEYTAB             The full path to the file that contains the keytab for the
+        |                              principal specified above.
+        |
+        | Spark on YARN only:
         |  --queue QUEUE_NAME          The YARN queue to submit to (Default: "default").
         |  --num-executors NUM         Number of executors to launch (Default: 2).
         |                              If dynamic allocation is enabled, the initial number of
         |                              executors will be at least NUM.
         |  --archives ARCHIVES         Comma separated list of archives to be extracted into the
         |                              working directory of each executor.
-        |  --principal PRINCIPAL       Principal to be used to login to KDC, while running on
-        |                              secure HDFS.
-        |  --keytab KEYTAB             The full path to the file that contains the keytab for the
-        |                              principal specified above. This keytab will be copied to
-        |                              the node running the Application Master via the Secure
-        |                              Distributed Cache, for renewing the login tickets and the
-        |                              delegation tokens periodically.
       """.stripMargin
     )
 

From 38f030725c561979ca98b2a6cc7ca6c02a1f80ed Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Wed, 16 Jan 2019 20:57:21 -0600
Subject: [PATCH 0310/1072] [SPARK-26466][CORE] Use ConfigEntry for hardcoded
 configs for submit categories.

## What changes were proposed in this pull request?

The PR makes hardcoded configs below to use `ConfigEntry`.

* spark.kryo
* spark.kryoserializer
* spark.serializer
* spark.jars
* spark.files
* spark.submit
* spark.deploy
* spark.worker

This patch doesn't change configs which are not relevant to SparkConf (e.g. system properties).

## How was this patch tested?

Existing tests.

Closes #23532 from HeartSaVioR/SPARK-26466-v2.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/SparkConf.scala    | 21 +++---
 .../scala/org/apache/spark/SparkContext.scala |  4 +-
 .../scala/org/apache/spark/SparkEnv.scala     |  9 ++-
 .../scala/org/apache/spark/api/r/RUtils.scala |  3 +-
 .../spark/deploy/FaultToleranceTest.scala     |  6 +-
 .../spark/deploy/SparkCuratorUtil.scala       |  3 +-
 .../org/apache/spark/deploy/SparkSubmit.scala | 29 ++++----
 .../apache/spark/deploy/master/Master.scala   | 48 ++++++-------
 .../deploy/master/RecoveryModeFactory.scala   |  7 +-
 .../master/ZooKeeperLeaderElectionAgent.scala |  5 +-
 .../master/ZooKeeperPersistenceEngine.scala   | 15 ++--
 .../spark/deploy/worker/DriverRunner.scala    |  6 +-
 .../apache/spark/deploy/worker/Worker.scala   | 20 +++---
 .../spark/deploy/worker/WorkerArguments.scala |  5 +-
 .../spark/deploy/worker/ui/WorkerWebUI.scala  |  2 -
 .../apache/spark/internal/config/Deploy.scala | 68 +++++++++++++++++++
 .../apache/spark/internal/config/Kryo.scala   | 57 ++++++++++++++++
 .../apache/spark/internal/config/Worker.scala | 63 +++++++++++++++++
 .../spark/internal/config/package.scala       | 31 +++++++++
 .../spark/serializer/JavaSerializer.scala     |  5 +-
 .../spark/serializer/KryoSerializer.scala     | 29 ++++----
 .../scala/org/apache/spark/util/Utils.scala   | 12 ++--
 .../apache/spark/JobCancellationSuite.scala   |  3 +-
 .../scala/org/apache/spark/ShuffleSuite.scala |  3 +-
 .../org/apache/spark/SparkConfSuite.scala     | 31 +++++----
 .../api/python/PythonBroadcastSuite.scala     |  3 +-
 .../spark/broadcast/BroadcastSuite.scala      |  3 +-
 .../spark/deploy/SparkSubmitSuite.scala       | 34 +++++-----
 .../spark/deploy/master/MasterSuite.scala     | 14 ++--
 .../master/PersistenceEngineSuite.scala       |  3 +-
 .../deploy/rest/SubmitRestProtocolSuite.scala |  3 +-
 .../spark/deploy/worker/WorkerSuite.scala     |  9 +--
 .../spark/scheduler/MapStatusSuite.scala      |  2 +-
 .../GenericAvroSerializerSuite.scala          |  3 +-
 .../spark/serializer/KryoBenchmark.scala      |  8 ++-
 .../serializer/KryoSerializerBenchmark.scala  |  8 ++-
 .../KryoSerializerDistributedSuite.scala      |  4 +-
 .../KryoSerializerResizableOutputSuite.scala  | 14 ++--
 .../serializer/KryoSerializerSuite.scala      | 44 ++++++------
 .../SerializerPropertiesSuite.scala           |  3 +-
 .../UnsafeKryoSerializerSuite.scala           |  6 +-
 .../spark/storage/FlatmapIteratorSuite.scala  |  4 +-
 .../org/apache/spark/util/UtilsSuite.scala    |  3 +-
 .../ExternalAppendOnlyMapSuite.scala          |  4 +-
 .../util/collection/ExternalSorterSuite.scala |  7 +-
 .../spark/ml/attribute/AttributeSuite.scala   |  3 +-
 .../spark/ml/feature/InstanceSuite.scala      |  3 +-
 .../spark/ml/feature/LabeledPointSuite.scala  |  3 +-
 .../spark/ml/tree/impl/TreePointSuite.scala   |  3 +-
 .../spark/mllib/clustering/KMeansSuite.scala  |  3 +-
 .../spark/mllib/feature/Word2VecSuite.scala   | 19 ++++--
 .../spark/mllib/linalg/MatricesSuite.scala    |  3 +-
 .../spark/mllib/linalg/VectorsSuite.scala     |  3 +-
 .../mllib/regression/LabeledPointSuite.scala  |  3 +-
 .../MultivariateGaussianSuite.scala           |  3 +-
 .../k8s/features/BasicDriverFeatureStep.scala | 11 ++-
 .../features/DriverCommandFeatureStep.scala   | 13 ++--
 .../BasicDriverFeatureStepSuite.scala         |  6 +-
 .../KubernetesTestComponents.scala            |  3 +-
 .../deploy/mesos/MesosClusterDispatcher.scala |  1 +
 .../apache/spark/deploy/mesos/config.scala    | 12 ----
 .../mesos/MesosClusterPersistenceEngine.scala | 11 +--
 .../cluster/mesos/MesosClusterScheduler.scala | 18 ++---
 .../org/apache/spark/deploy/yarn/Client.scala |  6 +-
 .../spark/deploy/yarn/ClientSuite.scala       |  3 +-
 .../spark/deploy/yarn/YarnClusterSuite.scala  |  2 +-
 .../DatasetSerializerRegistratorSuite.scala   |  3 +-
 ...nalAppendOnlyUnsafeRowArrayBenchmark.scala |  4 +-
 .../execution/joins/HashedRelationSuite.scala |  5 +-
 69 files changed, 528 insertions(+), 280 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/Worker.scala

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 22bcb8113ce0..b596be0885b1 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -28,6 +28,7 @@ import org.apache.avro.{Schema, SchemaNormalization}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.util.Utils
 
@@ -123,7 +124,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   /** Set JAR files to distribute to the cluster. */
   def setJars(jars: Seq[String]): SparkConf = {
     for (jar <- jars if (jar == null)) logWarning("null jar passed to SparkContext constructor")
-    set("spark.jars", jars.filter(_ != null).mkString(","))
+    set(JARS, jars.filter(_ != null))
   }
 
   /** Set JAR files to distribute to the cluster. (Java-friendly version.) */
@@ -201,12 +202,12 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    */
   def registerKryoClasses(classes: Array[Class[_]]): SparkConf = {
     val allClassNames = new LinkedHashSet[String]()
-    allClassNames ++= get("spark.kryo.classesToRegister", "").split(',').map(_.trim)
+    allClassNames ++= get(KRYO_CLASSES_TO_REGISTER).map(_.trim)
       .filter(!_.isEmpty)
     allClassNames ++= classes.map(_.getName)
 
-    set("spark.kryo.classesToRegister", allClassNames.mkString(","))
-    set("spark.serializer", classOf[KryoSerializer].getName)
+    set(KRYO_CLASSES_TO_REGISTER, allClassNames.toSeq)
+    set(SERIALIZER, classOf[KryoSerializer].getName)
     this
   }
 
@@ -547,20 +548,20 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
         case "yarn-cluster" =>
           logWarning(warning)
           set("spark.master", "yarn")
-          set("spark.submit.deployMode", "cluster")
+          set(SUBMIT_DEPLOY_MODE, "cluster")
         case "yarn-client" =>
           logWarning(warning)
           set("spark.master", "yarn")
-          set("spark.submit.deployMode", "client")
+          set(SUBMIT_DEPLOY_MODE, "client")
         case _ => // Any other unexpected master will be checked when creating scheduler backend.
       }
     }
 
-    if (contains("spark.submit.deployMode")) {
-      get("spark.submit.deployMode") match {
+    if (contains(SUBMIT_DEPLOY_MODE)) {
+      get(SUBMIT_DEPLOY_MODE) match {
         case "cluster" | "client" =>
-        case e => throw new SparkException("spark.submit.deployMode can only be \"cluster\" or " +
-          "\"client\".")
+        case e => throw new SparkException(s"${SUBMIT_DEPLOY_MODE.key} can only be " +
+          "\"cluster\" or \"client\".")
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 3bbf9f3606f4..c9afc799fa8d 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -229,7 +229,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def jars: Seq[String] = _jars
   def files: Seq[String] = _files
   def master: String = _conf.get("spark.master")
-  def deployMode: String = _conf.getOption("spark.submit.deployMode").getOrElse("client")
+  def deployMode: String = _conf.get(SUBMIT_DEPLOY_MODE)
   def appName: String = _conf.get("spark.app.name")
 
   private[spark] def isEventLogEnabled: Boolean = _conf.get(EVENT_LOG_ENABLED)
@@ -2640,7 +2640,7 @@ object SparkContext extends Logging {
       case SparkMasterRegex.LOCAL_N_REGEX(threads) => convertToInt(threads)
       case SparkMasterRegex.LOCAL_N_FAILURES_REGEX(threads, _) => convertToInt(threads)
       case "yarn" =>
-        if (conf != null && conf.getOption("spark.submit.deployMode").contains("cluster")) {
+        if (conf != null && conf.get(SUBMIT_DEPLOY_MODE) == "cluster") {
           conf.getInt(DRIVER_CORES.key, 0)
         } else {
           0
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index ba5ed8ab1f30..4d7542ca18d5 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -274,14 +274,13 @@ object SparkEnv extends Logging {
       }
     }
 
-    // Create an instance of the class named by the given SparkConf property, or defaultClassName
+    // Create an instance of the class named by the given SparkConf property
     // if the property is not set, possibly initializing it with our conf
-    def instantiateClassFromConf[T](propertyName: String, defaultClassName: String): T = {
-      instantiateClass[T](conf.get(propertyName, defaultClassName))
+    def instantiateClassFromConf[T](propertyName: ConfigEntry[String]): T = {
+      instantiateClass[T](conf.get(propertyName))
     }
 
-    val serializer = instantiateClassFromConf[Serializer](
-      "spark.serializer", "org.apache.spark.serializer.JavaSerializer")
+    val serializer = instantiateClassFromConf[Serializer](SERIALIZER)
     logDebug(s"Using serializer: ${serializer.getClass}")
 
     val serializerManager = new SerializerManager(serializer, conf, ioEncryptionKey)
diff --git a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
index 9bf35af1da92..6832223a5daf 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
@@ -23,6 +23,7 @@ import java.util.Arrays
 import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.api.java.JavaSparkContext
 import org.apache.spark.api.python.PythonUtils
+import org.apache.spark.internal.config._
 
 private[spark] object RUtils {
   // Local path where R binary packages built from R source code contained in the spark
@@ -63,7 +64,7 @@ private[spark] object RUtils {
         (sys.props("spark.master"), sys.props("spark.submit.deployMode"))
       } else {
         val sparkConf = SparkEnv.get.conf
-        (sparkConf.get("spark.master"), sparkConf.get("spark.submit.deployMode", "client"))
+        (sparkConf.get("spark.master"), sparkConf.get(SUBMIT_DEPLOY_MODE))
       }
 
     val isYarnCluster = master != null && master.contains("yarn") && deployMode == "cluster"
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index 0679bdf7c707..a66243012041 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -60,7 +60,7 @@ import org.apache.spark.util.{ThreadUtils, Utils}
 private object FaultToleranceTest extends App with Logging {
 
   private val conf = new SparkConf()
-  private val ZK_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark")
+  private val zkDir = conf.get(config.Deploy.ZOOKEEPER_DIRECTORY).getOrElse("/spark")
 
   private val masters = ListBuffer[TestMasterInfo]()
   private val workers = ListBuffer[TestWorkerInfo]()
@@ -87,8 +87,8 @@ private object FaultToleranceTest extends App with Logging {
     terminateCluster()
 
     // Clear ZK directories in between tests (for speed purposes)
-    SparkCuratorUtil.deleteRecursive(zk, ZK_DIR + "/spark_leader")
-    SparkCuratorUtil.deleteRecursive(zk, ZK_DIR + "/master_status")
+    SparkCuratorUtil.deleteRecursive(zk, zkDir + "/spark_leader")
+    SparkCuratorUtil.deleteRecursive(zk, zkDir + "/master_status")
   }
 
   test("sanity-basic") {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
index 8247110940db..8118c01eb712 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
@@ -25,6 +25,7 @@ import org.apache.zookeeper.KeeperException
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Deploy.ZOOKEEPER_URL
 
 private[spark] object SparkCuratorUtil extends Logging {
 
@@ -35,7 +36,7 @@ private[spark] object SparkCuratorUtil extends Logging {
 
   def newClient(
       conf: SparkConf,
-      zkUrlConf: String = "spark.deploy.zookeeper.url"): CuratorFramework = {
+      zkUrlConf: String = ZOOKEEPER_URL.key): CuratorFramework = {
     val ZK_URL = conf.get(zkUrlConf)
     val zk = CuratorFrameworkFactory.newClient(ZK_URL,
       ZK_SESSION_TIMEOUT_MILLIS, ZK_CONNECTION_TIMEOUT_MILLIS,
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 57a8bdf01aa5..b403cc44ea6d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -437,7 +437,7 @@ private[spark] class SparkSubmit extends Logging {
     }
 
     if (localPyFiles != null) {
-      sparkConf.set("spark.submit.pyFiles", localPyFiles)
+      sparkConf.set(SUBMIT_PYTHON_FILES, localPyFiles.split(",").toSeq)
     }
 
     // In YARN mode for an R app, add the SparkR package archive and the R package
@@ -614,11 +614,11 @@ private[spark] class SparkSubmit extends Logging {
     // For YARN cluster mode, the jar is already distributed on each node as "app.jar"
     // For python and R files, the primary resource is already distributed as a regular file
     if (!isYarnCluster && !args.isPython && !args.isR) {
-      var jars = sparkConf.getOption("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq.empty)
+      var jars = sparkConf.get(JARS)
       if (isUserJar(args.primaryResource)) {
         jars = jars ++ Seq(args.primaryResource)
       }
-      sparkConf.set("spark.jars", jars.mkString(","))
+      sparkConf.set(JARS, jars)
     }
 
     // In standalone cluster mode, use the REST client to submit the application (Spark 1.3+).
@@ -681,7 +681,7 @@ private[spark] class SparkSubmit extends Logging {
         // Second argument is main class
         childArgs += (args.primaryResource, "")
         if (args.pyFiles != null) {
-          sparkConf.set("spark.submit.pyFiles", args.pyFiles)
+          sparkConf.set(SUBMIT_PYTHON_FILES, args.pyFiles.split(",").toSeq)
         }
       } else if (args.isR) {
         // Second argument is main class
@@ -748,18 +748,17 @@ private[spark] class SparkSubmit extends Logging {
     // Resolve and format python file paths properly before adding them to the PYTHONPATH.
     // The resolving part is redundant in the case of --py-files, but necessary if the user
     // explicitly sets `spark.submit.pyFiles` in his/her default properties file.
-    sparkConf.getOption("spark.submit.pyFiles").foreach { pyFiles =>
-      val resolvedPyFiles = Utils.resolveURIs(pyFiles)
-      val formattedPyFiles = if (!isYarnCluster && !isMesosCluster) {
-        PythonRunner.formatPaths(resolvedPyFiles).mkString(",")
-      } else {
-        // Ignoring formatting python path in yarn and mesos cluster mode, these two modes
-        // support dealing with remote python files, they could distribute and add python files
-        // locally.
-        resolvedPyFiles
-      }
-      sparkConf.set("spark.submit.pyFiles", formattedPyFiles)
+    val pyFiles = sparkConf.get(SUBMIT_PYTHON_FILES)
+    val resolvedPyFiles = Utils.resolveURIs(pyFiles.mkString(","))
+    val formattedPyFiles = if (!isYarnCluster && !isMesosCluster) {
+      PythonRunner.formatPaths(resolvedPyFiles).mkString(",")
+    } else {
+      // Ignoring formatting python path in yarn and mesos cluster mode, these two modes
+      // support dealing with remote python files, they could distribute and add python files
+      // locally.
+      resolvedPyFiles
     }
+    sparkConf.set(SUBMIT_PYTHON_FILES, formattedPyFiles.split(",").toSeq)
 
     (childArgs, childClasspath, sparkConf, childMainClass)
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 32f6d1f91a57..b26da8a467fd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -34,7 +34,9 @@ import org.apache.spark.deploy.master.ui.MasterWebUI
 import org.apache.spark.deploy.rest.StandaloneRestServer
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.internal.config.UI._
+import org.apache.spark.internal.config.Worker._
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
 import org.apache.spark.serializer.{JavaSerializer, Serializer}
@@ -56,12 +58,12 @@ private[deploy] class Master(
   // For application IDs
   private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
 
-  private val WORKER_TIMEOUT_MS = conf.getLong("spark.worker.timeout", 60) * 1000
-  private val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
-  private val RETAINED_DRIVERS = conf.getInt("spark.deploy.retainedDrivers", 200)
-  private val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
-  private val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
-  private val MAX_EXECUTOR_RETRIES = conf.getInt("spark.deploy.maxExecutorRetries", 10)
+  private val workerTimeoutMs = conf.get(WORKER_TIMEOUT) * 1000
+  private val retainedApplications = conf.get(RETAINED_APPLICATIONS)
+  private val retainedDrivers = conf.get(RETAINED_DRIVERS)
+  private val reaperIterations = conf.get(REAPER_ITERATIONS)
+  private val recoveryMode = conf.get(RECOVERY_MODE)
+  private val maxExecutorRetries = conf.get(MAX_EXECUTOR_RETRIES)
 
   val workers = new HashSet[WorkerInfo]
   val idToApp = new HashMap[String, ApplicationInfo]
@@ -113,13 +115,13 @@ private[deploy] class Master(
   // As a temporary workaround before better ways of configuring memory, we allow users to set
   // a flag that will perform round-robin scheduling across the nodes (spreading out each app
   // among all the nodes) instead of trying to consolidate each app onto a small # of nodes.
-  private val spreadOutApps = conf.getBoolean("spark.deploy.spreadOut", true)
+  private val spreadOutApps = conf.get(SPREAD_OUT_APPS)
 
   // Default maxCores for applications that don't specify it (i.e. pass Int.MaxValue)
-  private val defaultCores = conf.getInt("spark.deploy.defaultCores", Int.MaxValue)
+  private val defaultCores = conf.get(DEFAULT_CORES)
   val reverseProxy = conf.get(UI_REVERSE_PROXY)
   if (defaultCores < 1) {
-    throw new SparkException("spark.deploy.defaultCores must be positive")
+    throw new SparkException(s"${DEFAULT_CORES.key} must be positive")
   }
 
   // Alternative application submission gateway that is stable across Spark versions
@@ -151,7 +153,7 @@ private[deploy] class Master(
       override def run(): Unit = Utils.tryLogNonFatalError {
         self.send(CheckForWorkerTimeOut)
       }
-    }, 0, WORKER_TIMEOUT_MS, TimeUnit.MILLISECONDS)
+    }, 0, workerTimeoutMs, TimeUnit.MILLISECONDS)
 
     if (restServerEnabled) {
       val port = conf.get(MASTER_REST_SERVER_PORT)
@@ -168,7 +170,7 @@ private[deploy] class Master(
     applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
 
     val serializer = new JavaSerializer(conf)
-    val (persistenceEngine_, leaderElectionAgent_) = RECOVERY_MODE match {
+    val (persistenceEngine_, leaderElectionAgent_) = recoveryMode match {
       case "ZOOKEEPER" =>
         logInfo("Persisting recovery state to ZooKeeper")
         val zkFactory =
@@ -179,7 +181,7 @@ private[deploy] class Master(
           new FileSystemRecoveryModeFactory(conf, serializer)
         (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
       case "CUSTOM" =>
-        val clazz = Utils.classForName(conf.get("spark.deploy.recoveryMode.factory"))
+        val clazz = Utils.classForName(conf.get(RECOVERY_MODE_FACTORY))
         val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serializer])
           .newInstance(conf, serializer)
           .asInstanceOf[StandaloneRecoveryModeFactory]
@@ -233,7 +235,7 @@ private[deploy] class Master(
           override def run(): Unit = Utils.tryLogNonFatalError {
             self.send(CompleteRecovery)
           }
-        }, WORKER_TIMEOUT_MS, TimeUnit.MILLISECONDS)
+        }, workerTimeoutMs, TimeUnit.MILLISECONDS)
       }
 
     case CompleteRecovery => completeRecovery()
@@ -311,8 +313,8 @@ private[deploy] class Master(
             // Important note: this code path is not exercised by tests, so be very careful when
             // changing this `if` condition.
             if (!normalExit
-                && appInfo.incrementRetryCount() >= MAX_EXECUTOR_RETRIES
-                && MAX_EXECUTOR_RETRIES >= 0) { // < 0 disables this application-killing path
+                && appInfo.incrementRetryCount() >= maxExecutorRetries
+                && maxExecutorRetries >= 0) { // < 0 disables this application-killing path
               val execs = appInfo.executors.values
               if (!execs.exists(_.state == ExecutorState.RUNNING)) {
                 logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
@@ -870,8 +872,8 @@ private[deploy] class Master(
       endpointToApp -= app.driver
       addressToApp -= app.driver.address
 
-      if (completedApps.size >= RETAINED_APPLICATIONS) {
-        val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
+      if (completedApps.size >= retainedApplications) {
+        val toRemove = math.max(retainedApplications / 10, 1)
         completedApps.take(toRemove).foreach { a =>
           applicationMetricsSystem.removeSource(a.appSource)
         }
@@ -989,14 +991,14 @@ private[deploy] class Master(
   private def timeOutDeadWorkers() {
     // Copy the workers into an array so we don't modify the hashset while iterating through it
     val currentTime = System.currentTimeMillis()
-    val toRemove = workers.filter(_.lastHeartbeat < currentTime - WORKER_TIMEOUT_MS).toArray
+    val toRemove = workers.filter(_.lastHeartbeat < currentTime - workerTimeoutMs).toArray
     for (worker <- toRemove) {
       if (worker.state != WorkerState.DEAD) {
         logWarning("Removing %s because we got no heartbeat in %d seconds".format(
-          worker.id, WORKER_TIMEOUT_MS / 1000))
-        removeWorker(worker, s"Not receiving heartbeat for ${WORKER_TIMEOUT_MS / 1000} seconds")
+          worker.id, workerTimeoutMs / 1000))
+        removeWorker(worker, s"Not receiving heartbeat for ${workerTimeoutMs / 1000} seconds")
       } else {
-        if (worker.lastHeartbeat < currentTime - ((REAPER_ITERATIONS + 1) * WORKER_TIMEOUT_MS)) {
+        if (worker.lastHeartbeat < currentTime - ((reaperIterations + 1) * workerTimeoutMs)) {
           workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it
         }
       }
@@ -1031,8 +1033,8 @@ private[deploy] class Master(
       case Some(driver) =>
         logInfo(s"Removing driver: $driverId")
         drivers -= driver
-        if (completedDrivers.size >= RETAINED_DRIVERS) {
-          val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
+        if (completedDrivers.size >= retainedDrivers) {
+          val toRemove = math.max(retainedDrivers / 10, 1)
           completedDrivers.trimStart(toRemove)
         }
         completedDrivers += driver
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
index ffdd635be4f5..470798793cec 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy.master
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Deploy.RECOVERY_DIRECTORY
 import org.apache.spark.serializer.Serializer
 
 /**
@@ -52,11 +53,11 @@ abstract class StandaloneRecoveryModeFactory(conf: SparkConf, serializer: Serial
 private[master] class FileSystemRecoveryModeFactory(conf: SparkConf, serializer: Serializer)
   extends StandaloneRecoveryModeFactory(conf, serializer) with Logging {
 
-  val RECOVERY_DIR = conf.get("spark.deploy.recoveryDirectory", "")
+  val recoveryDir = conf.get(RECOVERY_DIRECTORY)
 
   def createPersistenceEngine(): PersistenceEngine = {
-    logInfo("Persisting recovery state to directory: " + RECOVERY_DIR)
-    new FileSystemPersistenceEngine(RECOVERY_DIR, serializer)
+    logInfo("Persisting recovery state to directory: " + recoveryDir)
+    new FileSystemPersistenceEngine(recoveryDir, serializer)
   }
 
   def createLeaderElectionAgent(master: LeaderElectable): LeaderElectionAgent = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
index 1e8dabfbe6b0..47f309144bdc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
@@ -23,11 +23,12 @@ import org.apache.curator.framework.recipes.leader.{LeaderLatch, LeaderLatchList
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkCuratorUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Deploy.ZOOKEEPER_DIRECTORY
 
 private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderElectable,
     conf: SparkConf) extends LeaderLatchListener with LeaderElectionAgent with Logging  {
 
-  val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/leader_election"
+  val workingDir = conf.get(ZOOKEEPER_DIRECTORY).getOrElse("/spark") + "/leader_election"
 
   private var zk: CuratorFramework = _
   private var leaderLatch: LeaderLatch = _
@@ -38,7 +39,7 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
   private def start() {
     logInfo("Starting ZooKeeper LeaderElection agent")
     zk = SparkCuratorUtil.newClient(conf)
-    leaderLatch = new LeaderLatch(zk, WORKING_DIR)
+    leaderLatch = new LeaderLatch(zk, workingDir)
     leaderLatch.addListener(this)
     leaderLatch.start()
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
index af850e4871e5..73dd0de01796 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
@@ -28,6 +28,7 @@ import org.apache.zookeeper.CreateMode
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkCuratorUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.serializer.Serializer
 
 
@@ -35,22 +36,22 @@ private[master] class ZooKeeperPersistenceEngine(conf: SparkConf, val serializer
   extends PersistenceEngine
   with Logging {
 
-  private val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/master_status"
+  private val workingDir = conf.get(ZOOKEEPER_DIRECTORY).getOrElse("/spark") + "/master_status"
   private val zk: CuratorFramework = SparkCuratorUtil.newClient(conf)
 
-  SparkCuratorUtil.mkdir(zk, WORKING_DIR)
+  SparkCuratorUtil.mkdir(zk, workingDir)
 
 
   override def persist(name: String, obj: Object): Unit = {
-    serializeIntoFile(WORKING_DIR + "/" + name, obj)
+    serializeIntoFile(workingDir + "/" + name, obj)
   }
 
   override def unpersist(name: String): Unit = {
-    zk.delete().forPath(WORKING_DIR + "/" + name)
+    zk.delete().forPath(workingDir + "/" + name)
   }
 
   override def read[T: ClassTag](prefix: String): Seq[T] = {
-    zk.getChildren.forPath(WORKING_DIR).asScala
+    zk.getChildren.forPath(workingDir).asScala
       .filter(_.startsWith(prefix)).flatMap(deserializeFromFile[T])
   }
 
@@ -66,13 +67,13 @@ private[master] class ZooKeeperPersistenceEngine(conf: SparkConf, val serializer
   }
 
   private def deserializeFromFile[T](filename: String)(implicit m: ClassTag[T]): Option[T] = {
-    val fileData = zk.getData().forPath(WORKING_DIR + "/" + filename)
+    val fileData = zk.getData().forPath(workingDir + "/" + filename)
     try {
       Some(serializer.newInstance().deserialize[T](ByteBuffer.wrap(fileData)))
     } catch {
       case e: Exception =>
         logWarning("Exception while reading persisted file, deleting", e)
-        zk.delete().forPath(WORKING_DIR + "/" + filename)
+        zk.delete().forPath(workingDir + "/" + filename)
         None
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index a6d13d12fc28..8c2a907b8689 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -31,6 +31,7 @@ import org.apache.spark.deploy.DeployMessages.DriverStateChanged
 import org.apache.spark.deploy.master.DriverState
 import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Worker.WORKER_DRIVER_TERMINATE_TIMEOUT
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.{Clock, ShutdownHookManager, SystemClock, Utils}
 
@@ -57,8 +58,7 @@ private[deploy] class DriverRunner(
   @volatile private[worker] var finalException: Option[Exception] = None
 
   // Timeout to wait for when trying to terminate a driver.
-  private val DRIVER_TERMINATE_TIMEOUT_MS =
-    conf.getTimeAsMs("spark.worker.driverTerminateTimeout", "10s")
+  private val driverTerminateTimeoutMs = conf.get(WORKER_DRIVER_TERMINATE_TIMEOUT)
 
   // Decoupled for testing
   def setClock(_clock: Clock): Unit = {
@@ -122,7 +122,7 @@ private[deploy] class DriverRunner(
     killed = true
     synchronized {
       process.foreach { p =>
-        val exitCode = Utils.terminateProcess(p, DRIVER_TERMINATE_TIMEOUT_MS)
+        val exitCode = Utils.terminateProcess(p, driverTerminateTimeoutMs)
         if (exitCode.isEmpty) {
           logWarning("Failed to terminate driver process: " + p +
               ". This process will likely be orphaned.")
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 8c3593cf0a64..115450b93563 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -39,6 +39,7 @@ import org.apache.spark.deploy.worker.ui.WorkerWebUI
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI._
+import org.apache.spark.internal.config.Worker._
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
 import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
@@ -74,7 +75,7 @@ private[deploy] class Worker(
   // For worker and executor IDs
   private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
   // Send a heartbeat every (heartbeat timeout) / 4 milliseconds
-  private val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4
+  private val HEARTBEAT_MILLIS = conf.get(WORKER_TIMEOUT) * 1000 / 4
 
   // Model retries to connect to the master, after Hadoop's model.
   // The first six attempts to reconnect are in shorter intervals (between 5 and 15 seconds)
@@ -93,13 +94,11 @@ private[deploy] class Worker(
   private val PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS = (math.round(60
     * REGISTRATION_RETRY_FUZZ_MULTIPLIER))
 
-  private val CLEANUP_ENABLED = conf.getBoolean("spark.worker.cleanup.enabled", false)
+  private val CLEANUP_ENABLED = conf.get(WORKER_CLEANUP_ENABLED)
   // How often worker will clean up old app folders
-  private val CLEANUP_INTERVAL_MILLIS =
-    conf.getLong("spark.worker.cleanup.interval", 60 * 30) * 1000
+  private val CLEANUP_INTERVAL_MILLIS = conf.get(WORKER_CLEANUP_INTERVAL) * 1000
   // TTL for app folders/data;  after TTL expires it will be cleaned up
-  private val APP_DATA_RETENTION_SECONDS =
-    conf.getLong("spark.worker.cleanup.appDataTtl", 7 * 24 * 3600)
+  private val APP_DATA_RETENTION_SECONDS = conf.get(APP_DATA_RETENTION)
 
   // Whether or not cleanup the non-shuffle files on executor exits.
   private val CLEANUP_NON_SHUFFLE_FILES_ENABLED =
@@ -111,8 +110,7 @@ private[deploy] class Worker(
    * Whether to use the master address in `masterRpcAddresses` if possible. If it's disabled, Worker
    * will just use the address received from Master.
    */
-  private val preferConfiguredMasterAddress =
-    conf.getBoolean("spark.worker.preferConfiguredMasterAddress", false)
+  private val preferConfiguredMasterAddress = conf.get(PREFER_CONFIGURED_MASTER_ADDRESS)
   /**
    * The master address to connect in case of failure. When the connection is broken, worker will
    * use this address to connect. This is usually just one of `masterRpcAddresses`. However, when
@@ -143,10 +141,8 @@ private[deploy] class Worker(
   val appDirectories = new HashMap[String, Seq[String]]
   val finishedApps = new HashSet[String]
 
-  val retainedExecutors = conf.getInt("spark.worker.ui.retainedExecutors",
-    WorkerWebUI.DEFAULT_RETAINED_EXECUTORS)
-  val retainedDrivers = conf.getInt("spark.worker.ui.retainedDrivers",
-    WorkerWebUI.DEFAULT_RETAINED_DRIVERS)
+  val retainedExecutors = conf.get(WORKER_UI_RETAINED_EXECUTORS)
+  val retainedDrivers = conf.get(WORKER_UI_RETAINED_DRIVERS)
 
   // The shuffle service is not actually started unless configured.
   private val shuffleService = if (externalShuffleServiceSupplier != null) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 580281288b06..8c87708e960e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -22,6 +22,7 @@ import java.lang.management.ManagementFactory
 import scala.annotation.tailrec
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.Worker._
 import org.apache.spark.util.{IntParam, MemoryParam, Utils}
 
 /**
@@ -59,9 +60,7 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
   // This mutates the SparkConf, so all accesses to it must be made after this line
   propertiesFile = Utils.loadDefaultSparkProperties(conf, propertiesFile)
 
-  if (conf.contains("spark.worker.ui.port")) {
-    webUiPort = conf.get("spark.worker.ui.port").toInt
-  }
+  conf.get(WORKER_UI_PORT).foreach { webUiPort = _ }
 
   checkWorkerMemory()
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index 54886955b98f..96980c3ff033 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -56,6 +56,4 @@ class WorkerWebUI(
 
 private[worker] object WorkerWebUI {
   val STATIC_RESOURCE_BASE = SparkUI.STATIC_RESOURCE_DIR
-  val DEFAULT_RETAINED_DRIVERS = 1000
-  val DEFAULT_RETAINED_EXECUTORS = 1000
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala b/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
new file mode 100644
index 000000000000..ceab957b3663
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+private[spark] object Deploy {
+  val RECOVERY_MODE = ConfigBuilder("spark.deploy.recoveryMode")
+    .stringConf
+    .createWithDefault("NONE")
+
+  val RECOVERY_MODE_FACTORY = ConfigBuilder("spark.deploy.recoveryMode.factory")
+    .stringConf
+    .createWithDefault("")
+
+  val RECOVERY_DIRECTORY = ConfigBuilder("spark.deploy.recoveryDirectory")
+    .stringConf
+    .createWithDefault("")
+
+  val ZOOKEEPER_URL = ConfigBuilder("spark.deploy.zookeeper.url")
+    .doc(s"When `${RECOVERY_MODE.key}` is set to ZOOKEEPER, this " +
+      "configuration is used to set the zookeeper URL to connect to.")
+    .stringConf
+    .createOptional
+
+  val ZOOKEEPER_DIRECTORY = ConfigBuilder("spark.deploy.zookeeper.dir")
+    .stringConf
+    .createOptional
+
+  val RETAINED_APPLICATIONS = ConfigBuilder("spark.deploy.retainedApplications")
+    .intConf
+    .createWithDefault(200)
+
+  val RETAINED_DRIVERS = ConfigBuilder("spark.deploy.retainedDrivers")
+    .intConf
+    .createWithDefault(200)
+
+  val REAPER_ITERATIONS = ConfigBuilder("spark.dead.worker.persistence")
+    .intConf
+    .createWithDefault(15)
+
+  val MAX_EXECUTOR_RETRIES = ConfigBuilder("spark.deploy.maxExecutorRetries")
+    .intConf
+    .createWithDefault(10)
+
+  val SPREAD_OUT_APPS = ConfigBuilder("spark.deploy.spreadOut")
+    .booleanConf
+    .createWithDefault(true)
+
+  val DEFAULT_CORES = ConfigBuilder("spark.deploy.defaultCores")
+    .intConf
+    .createWithDefault(Int.MaxValue)
+
+
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala b/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
new file mode 100644
index 000000000000..7873141440de
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+import org.apache.spark.network.util.ByteUnit
+
+private[spark] object Kryo {
+
+  val KRYO_REGISTRATION_REQUIRED = ConfigBuilder("spark.kryo.registrationRequired")
+    .booleanConf
+    .createWithDefault(false)
+
+  val KRYO_USER_REGISTRATORS = ConfigBuilder("spark.kryo.registrator")
+    .stringConf
+    .createOptional
+
+  val KRYO_CLASSES_TO_REGISTER = ConfigBuilder("spark.kryo.classesToRegister")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  val KRYO_USE_UNSAFE = ConfigBuilder("spark.kyro.unsafe")
+    .booleanConf
+    .createWithDefault(false)
+
+  val KRYO_USE_POOL = ConfigBuilder("spark.kyro.pool")
+    .booleanConf
+    .createWithDefault(true)
+
+  val KRYO_REFERENCE_TRACKING = ConfigBuilder("spark.kryo.referenceTracking")
+    .booleanConf
+    .createWithDefault(true)
+
+  val KRYO_SERIALIZER_BUFFER_SIZE = ConfigBuilder("spark.kryoserializer.buffer")
+    .bytesConf(ByteUnit.KiB)
+    .createWithDefaultString("64k")
+
+  val KRYO_SERIALIZER_MAX_BUFFER_SIZE = ConfigBuilder("spark.kryoserializer.buffer.max")
+    .bytesConf(ByteUnit.MiB)
+    .createWithDefaultString("64m")
+
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Worker.scala b/core/src/main/scala/org/apache/spark/internal/config/Worker.scala
new file mode 100644
index 000000000000..47f7167d2c9c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Worker.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+import java.util.concurrent.TimeUnit
+
+private[spark] object Worker {
+  val WORKER_TIMEOUT = ConfigBuilder("spark.worker.timeout")
+    .longConf
+    .createWithDefault(60)
+
+  val WORKER_DRIVER_TERMINATE_TIMEOUT = ConfigBuilder("spark.worker.driverTerminateTimeout")
+    .timeConf(TimeUnit.MILLISECONDS)
+    .createWithDefaultString("10s")
+
+  val WORKER_CLEANUP_ENABLED = ConfigBuilder("spark.worker.cleanup.enabled")
+    .booleanConf
+    .createWithDefault(false)
+
+  val WORKER_CLEANUP_INTERVAL = ConfigBuilder("spark.worker.cleanup.interval")
+    .longConf
+    .createWithDefault(60 * 30)
+
+  val APP_DATA_RETENTION = ConfigBuilder("spark.worker.cleanup.appDataTtl")
+    .longConf
+    .createWithDefault(7 * 24 * 3600)
+
+  val PREFER_CONFIGURED_MASTER_ADDRESS = ConfigBuilder("spark.worker.preferConfiguredMasterAddress")
+    .booleanConf
+    .createWithDefault(false)
+
+  val WORKER_UI_PORT = ConfigBuilder("spark.worker.ui.port")
+    .intConf
+    .createOptional
+
+  val WORKER_UI_RETAINED_EXECUTORS = ConfigBuilder("spark.worker.ui.retainedExecutors")
+    .intConf
+    .createWithDefault(1000)
+
+  val WORKER_UI_RETAINED_DRIVERS = ConfigBuilder("spark.worker.ui.retainedDrivers")
+    .intConf
+    .createWithDefault(1000)
+
+  val UNCOMPRESSED_LOG_FILE_LENGTH_CACHE_SIZE_CONF =
+    ConfigBuilder("spark.worker.ui.compressedLogFileLengthCacheSize")
+    .intConf
+    .createWithDefault(100)
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 0e786375301d..99ce22042718 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -961,4 +961,35 @@ package object config {
       .intConf
       .createWithDefault(4)
 
+  private[spark] val SERIALIZER = ConfigBuilder("spark.serializer")
+    .stringConf
+    .createWithDefault("org.apache.spark.serializer.JavaSerializer")
+
+  private[spark] val SERIALIZER_OBJECT_STREAM_RESET =
+    ConfigBuilder("spark.serializer.objectStreamReset")
+      .intConf
+      .createWithDefault(100)
+
+  private[spark] val SERIALIZER_EXTRA_DEBUG_INFO = ConfigBuilder("spark.serializer.extraDebugInfo")
+    .booleanConf
+    .createWithDefault(true)
+
+  private[spark] val JARS = ConfigBuilder("spark.jars")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  private[spark] val FILES = ConfigBuilder("spark.files")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  private[spark] val SUBMIT_DEPLOY_MODE = ConfigBuilder("spark.submit.deployMode")
+    .stringConf
+    .createWithDefault("client")
+
+  private[spark] val SUBMIT_PYTHON_FILES = ConfigBuilder("spark.submit.pyFiles")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
 }
diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index f60dcfddfdc2..70564eeefda8 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -24,6 +24,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.internal.config._
 import org.apache.spark.util.{ByteBufferInputStream, ByteBufferOutputStream, Utils}
 
 private[spark] class JavaSerializationStream(
@@ -137,8 +138,8 @@ private[spark] class JavaSerializerInstance(
  */
 @DeveloperApi
 class JavaSerializer(conf: SparkConf) extends Serializer with Externalizable {
-  private var counterReset = conf.getInt("spark.serializer.objectStreamReset", 100)
-  private var extraDebugInfo = conf.getBoolean("spark.serializer.extraDebugInfo", true)
+  private var counterReset = conf.get(SERIALIZER_OBJECT_STREAM_RESET)
+  private var extraDebugInfo = conf.get(SERIALIZER_EXTRA_DEBUG_INFO)
 
   protected def this() = this(new SparkConf())  // For deserialization only
 
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 72ca0fbe667e..2df133dd2b13 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -39,6 +39,7 @@ import org.roaringbitmap.RoaringBitmap
 import org.apache.spark._
 import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus}
 import org.apache.spark.storage._
@@ -58,34 +59,34 @@ class KryoSerializer(conf: SparkConf)
   with Logging
   with Serializable {
 
-  private val bufferSizeKb = conf.getSizeAsKb("spark.kryoserializer.buffer", "64k")
+  private val bufferSizeKb = conf.get(KRYO_SERIALIZER_BUFFER_SIZE)
 
   if (bufferSizeKb >= ByteUnit.GiB.toKiB(2)) {
-    throw new IllegalArgumentException("spark.kryoserializer.buffer must be less than " +
+    throw new IllegalArgumentException(s"${KRYO_SERIALIZER_BUFFER_SIZE.key} must be less than " +
       s"2048 MiB, got: + ${ByteUnit.KiB.toMiB(bufferSizeKb)} MiB.")
   }
   private val bufferSize = ByteUnit.KiB.toBytes(bufferSizeKb).toInt
 
-  val maxBufferSizeMb = conf.getSizeAsMb("spark.kryoserializer.buffer.max", "64m").toInt
+  val maxBufferSizeMb = conf.get(KRYO_SERIALIZER_MAX_BUFFER_SIZE).toInt
   if (maxBufferSizeMb >= ByteUnit.GiB.toMiB(2)) {
-    throw new IllegalArgumentException("spark.kryoserializer.buffer.max must be less than " +
-      s"2048 MiB, got: + $maxBufferSizeMb MiB.")
+    throw new IllegalArgumentException(s"${KRYO_SERIALIZER_MAX_BUFFER_SIZE.key} must be less " +
+      s"than 2048 MiB, got: $maxBufferSizeMb MiB.")
   }
   private val maxBufferSize = ByteUnit.MiB.toBytes(maxBufferSizeMb).toInt
 
-  private val referenceTracking = conf.getBoolean("spark.kryo.referenceTracking", true)
-  private val registrationRequired = conf.getBoolean("spark.kryo.registrationRequired", false)
-  private val userRegistrators = conf.get("spark.kryo.registrator", "")
-    .split(',').map(_.trim)
+  private val referenceTracking = conf.get(KRYO_REFERENCE_TRACKING)
+  private val registrationRequired = conf.get(KRYO_REGISTRATION_REQUIRED)
+  private val userRegistrators = conf.get(KRYO_USER_REGISTRATORS)
+    .map(_.trim)
     .filter(!_.isEmpty)
-  private val classesToRegister = conf.get("spark.kryo.classesToRegister", "")
-    .split(',').map(_.trim)
+  private val classesToRegister = conf.get(KRYO_CLASSES_TO_REGISTER)
+    .map(_.trim)
     .filter(!_.isEmpty)
 
   private val avroSchemas = conf.getAvroSchema
   // whether to use unsafe based IO for serialization
-  private val useUnsafe = conf.getBoolean("spark.kryo.unsafe", false)
-  private val usePool = conf.getBoolean("spark.kryo.pool", true)
+  private val useUnsafe = conf.get(KRYO_USE_UNSAFE)
+  private val usePool = conf.get(KRYO_USE_POOL)
 
   def newKryoOutput(): KryoOutput =
     if (useUnsafe) {
@@ -407,7 +408,7 @@ private[spark] class KryoSerializerInstance(
     } catch {
       case e: KryoException if e.getMessage.startsWith("Buffer overflow") =>
         throw new SparkException(s"Kryo serialization failed: ${e.getMessage}. To avoid this, " +
-          "increase spark.kryoserializer.buffer.max value.", e)
+          s"increase ${KRYO_SERIALIZER_MAX_BUFFER_SIZE.key} value.", e)
     } finally {
       releaseKryo(kryo)
     }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 83d1b2b42f19..7416559bb808 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -62,6 +62,7 @@ import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI._
+import org.apache.spark.internal.config.Worker._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
@@ -1457,16 +1458,12 @@ private[spark] object Utils extends Logging {
     CallSite(shortForm, longForm)
   }
 
-  private val UNCOMPRESSED_LOG_FILE_LENGTH_CACHE_SIZE_CONF =
-    "spark.worker.ui.compressedLogFileLengthCacheSize"
-  private val DEFAULT_UNCOMPRESSED_LOG_FILE_LENGTH_CACHE_SIZE = 100
   private var compressedLogFileLengthCache: LoadingCache[String, java.lang.Long] = null
   private def getCompressedLogFileLengthCache(
       sparkConf: SparkConf): LoadingCache[String, java.lang.Long] = this.synchronized {
     if (compressedLogFileLengthCache == null) {
-      val compressedLogFileLengthCacheSize = sparkConf.getInt(
-        UNCOMPRESSED_LOG_FILE_LENGTH_CACHE_SIZE_CONF,
-        DEFAULT_UNCOMPRESSED_LOG_FILE_LENGTH_CACHE_SIZE)
+      val compressedLogFileLengthCacheSize = sparkConf.get(
+        UNCOMPRESSED_LOG_FILE_LENGTH_CACHE_SIZE_CONF)
       compressedLogFileLengthCache = CacheBuilder.newBuilder()
         .maximumSize(compressedLogFileLengthCacheSize)
         .build[String, java.lang.Long](new CacheLoader[String, java.lang.Long]() {
@@ -2535,8 +2532,7 @@ private[spark] object Utils extends Logging {
    * has its own mechanism to distribute jars.
    */
   def getUserJars(conf: SparkConf): Seq[String] = {
-    val sparkJars = conf.getOption("spark.jars")
-    sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+    conf.get(JARS).filter(_.nonEmpty)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index 61da4138896c..f8adaf59fa0a 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -27,6 +27,7 @@ import scala.concurrent.duration._
 import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
 
+import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.scheduler.{SparkListener, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
 import org.apache.spark.util.ThreadUtils
 
@@ -256,7 +257,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
       .set("spark.task.reaper.enabled", "true")
       .set("spark.task.reaper.killTimeout", "-1")
       .set("spark.task.reaper.PollingInterval", "1s")
-      .set("spark.deploy.maxExecutorRetries", "1")
+      .set(MAX_EXECUTOR_RETRIES, 1)
     sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
 
     // Add a listener to release the semaphore once any tasks are launched.
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index ffa70425ea36..3203f8f2786f 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.{Callable, CyclicBarrier, Executors, ExecutorService
 import org.scalatest.Matchers
 
 import org.apache.spark.ShuffleSuite.NonJavaSerializableClass
+import org.apache.spark.internal.config.SERIALIZER
 import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD, SubtractedRDD}
@@ -215,7 +216,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
 
   test("sort with Java non serializable class - Kryo") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    val myConf = conf.clone().set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    val myConf = conf.clone().set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
     sc = new SparkContext("local-cluster[2,1,1024]", "test", myConf)
     val a = sc.parallelize(1 to 10, 2)
     val b = a.map { x =>
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index a8849ab9e7ce..4071dd4eff07 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -28,6 +28,7 @@ import com.esotericsoftware.kryo.Kryo
 
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.serializer.{JavaSerializer, KryoRegistrator, KryoSerializer}
 import org.apache.spark.util.{ResetSystemProperties, RpcUtils}
@@ -78,7 +79,7 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     assert(conf.get("spark.master") === "local[3]")
     assert(conf.get("spark.app.name") === "My app")
     assert(conf.get("spark.home") === "/path")
-    assert(conf.get("spark.jars") === "a.jar,b.jar")
+    assert(conf.get(JARS) === Seq("a.jar", "b.jar"))
     assert(conf.get("spark.executorEnv.VAR1") === "value1")
     assert(conf.get("spark.executorEnv.VAR2") === "value2")
     assert(conf.get("spark.executorEnv.VAR3") === "value3")
@@ -86,7 +87,7 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     // Test the Java-friendly versions of these too
     conf.setJars(Array("c.jar", "d.jar"))
     conf.setExecutorEnv(Array(("VAR4", "value4"), ("VAR5", "value5")))
-    assert(conf.get("spark.jars") === "c.jar,d.jar")
+    assert(conf.get(JARS) === Seq("c.jar", "d.jar"))
     assert(conf.get("spark.executorEnv.VAR4") === "value4")
     assert(conf.get("spark.executorEnv.VAR5") === "value5")
   }
@@ -182,19 +183,19 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
   }
 
   test("register kryo classes through registerKryoClasses") {
-    val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
+    val conf = new SparkConf().set(KRYO_REGISTRATION_REQUIRED, true)
 
     conf.registerKryoClasses(Array(classOf[Class1], classOf[Class2]))
-    assert(conf.get("spark.kryo.classesToRegister") ===
-      classOf[Class1].getName + "," + classOf[Class2].getName)
+    assert(conf.get(KRYO_CLASSES_TO_REGISTER).toSet ===
+      Seq(classOf[Class1].getName, classOf[Class2].getName).toSet)
 
     conf.registerKryoClasses(Array(classOf[Class3]))
-    assert(conf.get("spark.kryo.classesToRegister") ===
-      classOf[Class1].getName + "," + classOf[Class2].getName + "," + classOf[Class3].getName)
+    assert(conf.get(KRYO_CLASSES_TO_REGISTER).toSet ===
+      Seq(classOf[Class1].getName, classOf[Class2].getName, classOf[Class3].getName).toSet)
 
     conf.registerKryoClasses(Array(classOf[Class2]))
-    assert(conf.get("spark.kryo.classesToRegister") ===
-      classOf[Class1].getName + "," + classOf[Class2].getName + "," + classOf[Class3].getName)
+    assert(conf.get(KRYO_CLASSES_TO_REGISTER).toSet ===
+      Seq(classOf[Class1].getName, classOf[Class2].getName, classOf[Class3].getName).toSet)
 
     // Kryo doesn't expose a way to discover registered classes, but at least make sure this doesn't
     // blow up.
@@ -205,12 +206,12 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
   }
 
   test("register kryo classes through registerKryoClasses and custom registrator") {
-    val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
+    val conf = new SparkConf().set(KRYO_REGISTRATION_REQUIRED, true)
 
     conf.registerKryoClasses(Array(classOf[Class1]))
-    assert(conf.get("spark.kryo.classesToRegister") === classOf[Class1].getName)
+    assert(conf.get(KRYO_CLASSES_TO_REGISTER).toSet === Seq(classOf[Class1].getName).toSet)
 
-    conf.set("spark.kryo.registrator", classOf[CustomRegistrator].getName)
+    conf.set(KRYO_USER_REGISTRATORS, classOf[CustomRegistrator].getName)
 
     // Kryo doesn't expose a way to discover registered classes, but at least make sure this doesn't
     // blow up.
@@ -220,9 +221,9 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
   }
 
   test("register kryo classes through conf") {
-    val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
-    conf.set("spark.kryo.classesToRegister", "java.lang.StringBuffer")
-    conf.set("spark.serializer", classOf[KryoSerializer].getName)
+    val conf = new SparkConf().set(KRYO_REGISTRATION_REQUIRED, true)
+    conf.set(KRYO_CLASSES_TO_REGISTER, Seq("java.lang.StringBuffer"))
+    conf.set(SERIALIZER, classOf[KryoSerializer].getName)
 
     // Kryo doesn't expose a way to discover registered classes, but at least make sure this doesn't
     // blow up.
diff --git a/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala b/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala
index 7407a656dbfc..24004de75105 100644
--- a/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala
@@ -24,6 +24,7 @@ import scala.io.Source
 import org.scalatest.Matchers
 
 import org.apache.spark.{SharedSparkContext, SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.util.Utils
 
@@ -48,7 +49,7 @@ class PythonBroadcastSuite extends SparkFunSuite with Matchers with SharedSparkC
       }
       val broadcast = new PythonBroadcast(broadcastDataFile.getAbsolutePath)
       assertBroadcastIsValid(broadcast)
-      val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
+      val conf = new SparkConf().set(KRYO_REGISTRATION_REQUIRED, true)
       val deserializedBroadcast =
         Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance())
       assertBroadcastIsValid(deserializedBroadcast)
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 6d748126f641..18ec60dd9e01 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -25,6 +25,7 @@ import org.scalatest.Assertions
 
 import org.apache.spark._
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.SERIALIZER
 import org.apache.spark.io.SnappyCompressionCodec
 import org.apache.spark.rdd.RDD
 import org.apache.spark.security.EncryptionFunSuite
@@ -68,7 +69,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
 
   encryptionTest("Accessing TorrentBroadcast variables in a local cluster") { conf =>
     val numSlaves = 4
-    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
     conf.set(config.BROADCAST_COMPRESS, true)
     sc = new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test", conf)
     val list = List[Int](1, 2, 3, 4)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index c6e961e564cf..30efbb000d80 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -221,7 +221,7 @@ class SparkSubmitSuite
     val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
 
     appArgs.deployMode should be ("client")
-    conf.get("spark.submit.deployMode") should be ("client")
+    conf.get(SUBMIT_DEPLOY_MODE) should be ("client")
 
     // Both cmd line and configuration are specified, cmdline option takes the priority
     val clArgs1 = Seq(
@@ -235,7 +235,7 @@ class SparkSubmitSuite
     val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1)
 
     appArgs1.deployMode should be ("cluster")
-    conf1.get("spark.submit.deployMode") should be ("cluster")
+    conf1.get(SUBMIT_DEPLOY_MODE) should be ("cluster")
 
     // Neither cmdline nor configuration are specified, client mode is the default choice
     val clArgs2 = Seq(
@@ -248,7 +248,7 @@ class SparkSubmitSuite
 
     val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
     appArgs2.deployMode should be ("client")
-    conf2.get("spark.submit.deployMode") should be ("client")
+    conf2.get(SUBMIT_DEPLOY_MODE) should be ("client")
   }
 
   test("handles YARN cluster mode") {
@@ -374,12 +374,12 @@ class SparkSubmitSuite
     val confMap = conf.getAll.toMap
     confMap.keys should contain ("spark.master")
     confMap.keys should contain ("spark.app.name")
-    confMap.keys should contain ("spark.jars")
+    confMap.keys should contain (JARS.key)
     confMap.keys should contain ("spark.driver.memory")
     confMap.keys should contain ("spark.driver.cores")
     confMap.keys should contain ("spark.driver.supervise")
     confMap.keys should contain (UI_ENABLED.key)
-    confMap.keys should contain ("spark.submit.deployMode")
+    confMap.keys should contain (SUBMIT_DEPLOY_MODE.key)
     conf.get(UI_ENABLED) should be (false)
   }
 
@@ -467,7 +467,7 @@ class SparkSubmitSuite
     val (_, _, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
     conf.get("spark.executor.memory") should be ("5g")
     conf.get("spark.master") should be ("yarn")
-    conf.get("spark.submit.deployMode") should be ("cluster")
+    conf.get(SUBMIT_DEPLOY_MODE) should be ("cluster")
     mainClass should be (SparkSubmit.YARN_CLUSTER_SUBMIT_CLASS)
   }
 
@@ -662,7 +662,7 @@ class SparkSubmitSuite
       val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
       appArgs.jars should be(Utils.resolveURIs(jars))
       appArgs.files should be(Utils.resolveURIs(files))
-      conf.get("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
+      conf.get(JARS) should be(Utils.resolveURIs(jars + ",thejar.jar").split(",").toSeq)
       conf.get("spark.files") should be(Utils.resolveURIs(files))
 
       // Test files and archives (Yarn)
@@ -692,8 +692,8 @@ class SparkSubmitSuite
       val appArgs3 = new SparkSubmitArguments(clArgs3)
       val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
       appArgs3.pyFiles should be(Utils.resolveURIs(pyFiles))
-      conf3.get("spark.submit.pyFiles") should be(
-        PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
+      conf3.get(SUBMIT_PYTHON_FILES) should be(
+        PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)))
       conf3.get(PYSPARK_DRIVER_PYTHON.key) should be("python3.4")
       conf3.get(PYSPARK_PYTHON.key) should be("python3.5")
     }
@@ -744,8 +744,8 @@ class SparkSubmitSuite
       )
       val appArgs = new SparkSubmitArguments(clArgs)
       val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
-      conf.get("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
-      conf.get("spark.files") should be(Utils.resolveURIs(files))
+      conf.get(JARS) should be(Utils.resolveURIs(jars + ",thejar.jar").split(",").toSeq)
+      conf.get(FILES) should be(Utils.resolveURIs(files).split(",").toSeq)
 
       // Test files and archives (Yarn)
       val f2 = File.createTempFile("test-submit-files-archives", "", tmpDir)
@@ -776,8 +776,8 @@ class SparkSubmitSuite
       )
       val appArgs3 = new SparkSubmitArguments(clArgs3)
       val (_, _, conf3, _) = submit.prepareSubmitEnvironment(appArgs3)
-      conf3.get("spark.submit.pyFiles") should be(
-        PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
+      conf3.get(SUBMIT_PYTHON_FILES) should be(
+        PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)))
 
       // Test remote python files
       val hadoopConf = new Configuration()
@@ -798,7 +798,7 @@ class SparkSubmitSuite
       val appArgs4 = new SparkSubmitArguments(clArgs4)
       val (_, _, conf4, _) = submit.prepareSubmitEnvironment(appArgs4, conf = Some(hadoopConf))
       // Should not format python path for yarn cluster mode
-      conf4.get("spark.submit.pyFiles") should be(Utils.resolveURIs(remotePyFiles))
+      conf4.get(SUBMIT_PYTHON_FILES) should be(Utils.resolveURIs(remotePyFiles).split(","))
     }
   }
 
@@ -1024,7 +1024,7 @@ class SparkSubmitSuite
       conf.get("spark.repl.local.jars") should (startWith("file:"))
 
       // local py files should not be a URI format.
-      conf.get("spark.submit.pyFiles") should (startWith("/"))
+      conf.get(SUBMIT_PYTHON_FILES).foreach { _ should (startWith("/")) }
     }
   }
 
@@ -1155,7 +1155,7 @@ class SparkSubmitSuite
       val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf))
 
       conf.get(PY_FILES.key) should be(s"s3a://${pyFile.getAbsolutePath}")
-      conf.get("spark.submit.pyFiles") should (startWith("/"))
+      conf.get(SUBMIT_PYTHON_FILES).foreach { _ should (startWith("/")) }
 
       // Verify "spark.submit.pyFiles"
       val args1 = Seq(
@@ -1171,7 +1171,7 @@ class SparkSubmitSuite
       val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1, conf = Some(hadoopConf))
 
       conf1.get(PY_FILES.key) should be(s"s3a://${pyFile.getAbsolutePath}")
-      conf1.get("spark.submit.pyFiles") should (startWith("/"))
+      conf.get(SUBMIT_PYTHON_FILES).foreach { _ should (startWith("/")) }
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 5904d03a2d34..fbf2acc3175d 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -40,7 +40,9 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy._
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.internal.config.UI._
+import org.apache.spark.internal.config.Worker._
 import org.apache.spark.rpc.{RpcAddress, RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.serializer
 
@@ -103,9 +105,8 @@ class MasterSuite extends SparkFunSuite
 
   test("can use a custom recovery mode factory") {
     val conf = new SparkConf(loadDefaults = false)
-    conf.set("spark.deploy.recoveryMode", "CUSTOM")
-    conf.set("spark.deploy.recoveryMode.factory",
-      classOf[CustomRecoveryModeFactory].getCanonicalName)
+    conf.set(RECOVERY_MODE, "CUSTOM")
+    conf.set(RECOVERY_MODE_FACTORY, classOf[CustomRecoveryModeFactory].getCanonicalName)
     conf.set(MASTER_REST_SERVER_ENABLED, false)
 
     val instantiationAttempts = CustomRecoveryModeFactory.instantiationAttempts
@@ -188,9 +189,8 @@ class MasterSuite extends SparkFunSuite
 
   test("master correctly recover the application") {
     val conf = new SparkConf(loadDefaults = false)
-    conf.set("spark.deploy.recoveryMode", "CUSTOM")
-    conf.set("spark.deploy.recoveryMode.factory",
-      classOf[FakeRecoveryModeFactory].getCanonicalName)
+    conf.set(RECOVERY_MODE, "CUSTOM")
+    conf.set(RECOVERY_MODE_FACTORY, classOf[FakeRecoveryModeFactory].getCanonicalName)
     conf.set(MASTER_REST_SERVER_ENABLED, false)
 
     val fakeAppInfo = makeAppInfo(1024)
@@ -637,7 +637,7 @@ class MasterSuite extends SparkFunSuite
   }
 
   test("SPARK-19900: there should be a corresponding driver for the app after relaunching driver") {
-    val conf = new SparkConf().set("spark.worker.timeout", "1")
+    val conf = new SparkConf().set(WORKER_TIMEOUT, 1L)
     val master = makeMaster(conf)
     master.rpcEnv.setupEndpoint(Master.ENDPOINT_NAME, master)
     eventually(timeout(10.seconds)) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
index 30278655dbe0..3d8a46bd02e1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
@@ -24,6 +24,7 @@ import org.apache.commons.lang3.RandomUtils
 import org.apache.curator.test.TestingServer
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Deploy.ZOOKEEPER_URL
 import org.apache.spark.rpc.{RpcEndpoint, RpcEnv}
 import org.apache.spark.serializer.{JavaSerializer, Serializer}
 import org.apache.spark.util.Utils
@@ -48,7 +49,7 @@ class PersistenceEngineSuite extends SparkFunSuite {
     val zkTestServer = new TestingServer(findFreePort(conf))
     try {
       testPersistenceEngine(conf, serializer => {
-        conf.set("spark.deploy.zookeeper.url", zkTestServer.getConnectString)
+        conf.set(ZOOKEEPER_URL, zkTestServer.getConnectString)
         new ZooKeeperPersistenceEngine(conf, serializer)
       })
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
index 75c50af23c66..87655f310f1e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
@@ -22,6 +22,7 @@ import java.lang.Boolean
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
 /**
@@ -93,7 +94,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
     message.sparkProperties = conf.getAll.toMap
     message.validate()
     // optional fields
-    conf.set("spark.jars", "mayonnaise.jar,ketchup.jar")
+    conf.set(JARS, Seq("mayonnaise.jar", "ketchup.jar"))
     conf.set("spark.files", "fireball.png")
     conf.set("spark.driver.memory", s"${Utils.DEFAULT_DRIVER_MEM_MB}m")
     conf.set("spark.driver.cores", "180")
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
index e5e5b5e428c4..0ddf38c04409 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.{Command, ExecutorState, ExternalShuffleService}
 import org.apache.spark.deploy.DeployMessages.{DriverStateChanged, ExecutorStateChanged}
 import org.apache.spark.deploy.master.DriverState
+import org.apache.spark.internal.config.Worker._
 import org.apache.spark.rpc.{RpcAddress, RpcEnv}
 
 class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
@@ -100,7 +101,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
 
   test("test clearing of finishedExecutors (small number of executors)") {
     val conf = new SparkConf()
-    conf.set("spark.worker.ui.retainedExecutors", 2.toString)
+    conf.set(WORKER_UI_RETAINED_EXECUTORS, 2)
     val worker = makeWorker(conf)
     // initialize workers
     for (i <- 0 until 5) {
@@ -124,7 +125,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
 
   test("test clearing of finishedExecutors (more executors)") {
     val conf = new SparkConf()
-    conf.set("spark.worker.ui.retainedExecutors", 30.toString)
+    conf.set(WORKER_UI_RETAINED_EXECUTORS, 30)
     val worker = makeWorker(conf)
     // initialize workers
     for (i <- 0 until 50) {
@@ -157,7 +158,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
 
   test("test clearing of finishedDrivers (small number of drivers)") {
     val conf = new SparkConf()
-    conf.set("spark.worker.ui.retainedDrivers", 2.toString)
+    conf.set(WORKER_UI_RETAINED_DRIVERS, 2)
     val worker = makeWorker(conf)
     // initialize workers
     for (i <- 0 until 5) {
@@ -181,7 +182,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
 
   test("test clearing of finishedDrivers (more drivers)") {
     val conf = new SparkConf()
-    conf.set("spark.worker.ui.retainedDrivers", 30.toString)
+    conf.set(WORKER_UI_RETAINED_DRIVERS, 30)
     val worker = makeWorker(conf)
     // initialize workers
     for (i <- 0 until 50) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index f41ffb7f2c0b..c1e7fb9a1db1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -181,7 +181,7 @@ class MapStatusSuite extends SparkFunSuite {
 
   test("SPARK-21133 HighlyCompressedMapStatus#writeExternal throws NPE") {
     val conf = new SparkConf()
-      .set("spark.serializer", classOf[KryoSerializer].getName)
+      .set(config.SERIALIZER, classOf[KryoSerializer].getName)
       .setMaster("local")
       .setAppName("SPARK-21133")
     withSpark(new SparkContext(conf)) { sc =>
diff --git a/core/src/test/scala/org/apache/spark/serializer/GenericAvroSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/GenericAvroSerializerSuite.scala
index 3734f1cb408f..8610b18702ec 100644
--- a/core/src/test/scala/org/apache/spark/serializer/GenericAvroSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/GenericAvroSerializerSuite.scala
@@ -25,9 +25,10 @@ import org.apache.avro.{Schema, SchemaBuilder}
 import org.apache.avro.generic.GenericData.Record
 
 import org.apache.spark.{SharedSparkContext, SparkFunSuite}
+import org.apache.spark.internal.config.SERIALIZER
 
 class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
-  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+  conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
 
   val schema : Schema = SchemaBuilder
     .record("testRecord").fields()
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala b/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
index d7730f23da10..fd228cded783 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
@@ -22,6 +22,8 @@ import scala.util.Random
 
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.serializer.KryoTest._
 
 /**
@@ -122,9 +124,9 @@ object KryoBenchmark extends BenchmarkBase {
 
   def createSerializer(useUnsafe: Boolean): SerializerInstance = {
     val conf = new SparkConf()
-    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    conf.set("spark.kryo.registrator", classOf[MyRegistrator].getName)
-    conf.set("spark.kryo.unsafe", useUnsafe.toString)
+    conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
+    conf.set(KRYO_USER_REGISTRATORS, classOf[MyRegistrator].getName)
+    conf.set(KRYO_USE_UNSAFE, useUnsafe)
 
     new KryoSerializer(conf).newInstance()
   }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
index 2a15c6f6a2d9..2915b99dcfb6 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
@@ -23,6 +23,8 @@ import scala.concurrent.duration._
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.serializer.KryoTest._
 import org.apache.spark.util.ThreadUtils
 
@@ -69,9 +71,9 @@ object KryoSerializerBenchmark extends BenchmarkBase {
 
   def createSparkContext(usePool: Boolean): SparkContext = {
     val conf = new SparkConf()
-    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    conf.set("spark.kryo.registrator", classOf[MyRegistrator].getName)
-    conf.set("spark.kryo.pool", usePool.toString)
+    conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
+    conf.set(KRYO_USER_REGISTRATORS, classOf[MyRegistrator].getName)
+    conf.set(KRYO_USE_POOL, usePool)
 
     if (sc != null) {
       sc.stop()
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
index 46aa9c37986c..ae871091ba3c 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -28,8 +28,8 @@ class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContex
 
   test("kryo objects are serialised consistently in different processes") {
     val conf = new SparkConf(false)
-      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
+      .set(config.SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
+      .set(config.Kryo.KRYO_USER_REGISTRATORS, classOf[AppJarRegistrator].getName)
       .set(config.MAX_TASK_FAILURES, 1)
       .set(config.BLACKLIST_ENABLED, false)
 
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala
index cf01f79f4909..25f0b19c980f 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala
@@ -21,6 +21,8 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.LocalSparkContext._
 import org.apache.spark.SparkContext
 import org.apache.spark.SparkException
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Kryo._
 
 class KryoSerializerResizableOutputSuite extends SparkFunSuite {
 
@@ -29,9 +31,9 @@ class KryoSerializerResizableOutputSuite extends SparkFunSuite {
 
   test("kryo without resizable output buffer should fail on large array") {
     val conf = new SparkConf(false)
-    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    conf.set("spark.kryoserializer.buffer", "1m")
-    conf.set("spark.kryoserializer.buffer.max", "1m")
+    conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
+    conf.set(KRYO_SERIALIZER_BUFFER_SIZE.key, "1m")
+    conf.set(KRYO_SERIALIZER_MAX_BUFFER_SIZE.key, "1m")
     withSpark(new SparkContext("local", "test", conf)) { sc =>
       intercept[SparkException](sc.parallelize(x).collect())
     }
@@ -39,9 +41,9 @@ class KryoSerializerResizableOutputSuite extends SparkFunSuite {
 
   test("kryo with resizable output buffer should succeed on large array") {
     val conf = new SparkConf(false)
-    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    conf.set("spark.kryoserializer.buffer", "1m")
-    conf.set("spark.kryoserializer.buffer.max", "2m")
+    conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
+    conf.set(KRYO_SERIALIZER_BUFFER_SIZE.key, "1m")
+    conf.set(KRYO_SERIALIZER_MAX_BUFFER_SIZE.key, "2m")
     withSpark(new SparkContext("local", "test", conf)) { sc =>
       assert(sc.parallelize(x).collect() === x)
     }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 8af53274d9b2..41fb405bd62c 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -32,19 +32,21 @@ import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
 import org.roaringbitmap.RoaringBitmap
 
 import org.apache.spark.{SharedSparkContext, SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.scheduler.HighlyCompressedMapStatus
 import org.apache.spark.serializer.KryoTest._
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
-  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-  conf.set("spark.kryo.registrator", classOf[MyRegistrator].getName)
-  conf.set("spark.kryo.unsafe", "false")
+  conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
+  conf.set(KRYO_USER_REGISTRATORS, classOf[MyRegistrator].getName)
+  conf.set(KRYO_USE_UNSAFE, false)
 
   test("SPARK-7392 configuration limits") {
-    val kryoBufferProperty = "spark.kryoserializer.buffer"
-    val kryoBufferMaxProperty = "spark.kryoserializer.buffer.max"
+    val kryoBufferProperty = KRYO_SERIALIZER_BUFFER_SIZE.key
+    val kryoBufferMaxProperty = KRYO_SERIALIZER_MAX_BUFFER_SIZE.key
 
     def newKryoInstance(
         conf: SparkConf,
@@ -81,7 +83,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("basic types") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
     def check[T: ClassTag](t: T) {
@@ -114,7 +116,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("pairs") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
     def check[T: ClassTag](t: T) {
@@ -141,7 +143,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("Scala data structures") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
     def check[T: ClassTag](t: T) {
@@ -169,7 +171,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("Bug: SPARK-10251") {
-    val ser = new KryoSerializer(conf.clone.set("spark.kryo.registrationRequired", "true"))
+    val ser = new KryoSerializer(conf.clone.set(KRYO_REGISTRATION_REQUIRED, true))
       .newInstance()
     def check[T: ClassTag](t: T) {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
@@ -253,7 +255,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     hashMap.put("foo", "bar")
     check(hashMap)
 
-    System.clearProperty("spark.kryo.registrator")
+    System.clearProperty(KRYO_USER_REGISTRATORS.key)
   }
 
   test("kryo with collect") {
@@ -310,7 +312,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     import org.apache.spark.SparkException
 
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrator", "this.class.does.not.exist")
+    conf.set(KRYO_USER_REGISTRATORS, "this.class.does.not.exist")
 
     val thrown = intercept[SparkException](new KryoSerializer(conf).newInstance().serialize(1))
     assert(thrown.getMessage.contains("Failed to register classes with Kryo"))
@@ -337,7 +339,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("registration of HighlyCompressedMapStatus") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     // these cases require knowing the internals of RoaringBitmap a little.  Blocks span 2^16
     // values, and they use a bitmap (dense) if they have more than 4096 values, and an
@@ -355,7 +357,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("serialization buffer overflow reporting") {
     import org.apache.spark.SparkException
-    val kryoBufferMaxProperty = "spark.kryoserializer.buffer.max"
+    val kryoBufferMaxProperty = KRYO_SERIALIZER_MAX_BUFFER_SIZE.key
 
     val largeObject = (1 to 1000000).toArray
 
@@ -409,7 +411,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   test("getAutoReset") {
     val ser = new KryoSerializer(new SparkConf).newInstance().asInstanceOf[KryoSerializerInstance]
     assert(ser.getAutoReset)
-    val conf = new SparkConf().set("spark.kryo.registrator",
+    val conf = new SparkConf().set(KRYO_USER_REGISTRATORS,
       classOf[RegistratorWithoutAutoReset].getName)
     val ser2 = new KryoSerializer(conf).newInstance().asInstanceOf[KryoSerializerInstance]
     assert(!ser2.getAutoReset)
@@ -438,10 +440,10 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   private def testSerializerInstanceReuse(
       autoReset: Boolean, referenceTracking: Boolean, usePool: Boolean): Unit = {
     val conf = new SparkConf(loadDefaults = false)
-      .set("spark.kryo.referenceTracking", referenceTracking.toString)
-      .set("spark.kryo.pool", usePool.toString)
+      .set(KRYO_REFERENCE_TRACKING, referenceTracking)
+      .set(KRYO_USE_POOL, usePool)
     if (!autoReset) {
-      conf.set("spark.kryo.registrator", classOf[RegistratorWithoutAutoReset].getName)
+      conf.set(KRYO_USER_REGISTRATORS, classOf[RegistratorWithoutAutoReset].getName)
     }
     val ser = new KryoSerializer(conf)
     val serInstance = ser.newInstance().asInstanceOf[KryoSerializerInstance]
@@ -478,7 +480,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     implicit val executionContext: ExecutionContext = ExecutionContext.fromExecutor(
       Executors.newFixedThreadPool(4))
 
-    val ser = new KryoSerializer(conf.clone.set("spark.kryo.pool", "true"))
+    val ser = new KryoSerializer(conf.clone.set(KRYO_USE_POOL, true))
 
     val tests = mutable.ListBuffer[Future[Boolean]]()
 
@@ -519,9 +521,9 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 }
 
 class KryoSerializerAutoResetDisabledSuite extends SparkFunSuite with SharedSparkContext {
-  conf.set("spark.serializer", classOf[KryoSerializer].getName)
-  conf.set("spark.kryo.registrator", classOf[RegistratorWithoutAutoReset].getName)
-  conf.set("spark.kryo.referenceTracking", "true")
+  conf.set(SERIALIZER, classOf[KryoSerializer].getName)
+  conf.set(KRYO_USER_REGISTRATORS, classOf[RegistratorWithoutAutoReset].getName)
+  conf.set(KRYO_REFERENCE_TRACKING, true)
   conf.set("spark.shuffle.manager", "sort")
   conf.set("spark.shuffle.sort.bypassMergeThreshold", "200")
 
diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala
index 99882bf76e29..dad080c5fc16 100644
--- a/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala
@@ -24,6 +24,7 @@ import scala.util.Random
 import org.scalatest.Assertions
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.serializer.KryoTest.RegistratorWithoutAutoReset
 
 /**
@@ -50,7 +51,7 @@ class SerializerPropertiesSuite extends SparkFunSuite {
   }
 
   test("KryoSerializer does not support relocation when auto-reset is disabled") {
-    val conf = new SparkConf().set("spark.kryo.registrator",
+    val conf = new SparkConf().set(KRYO_USER_REGISTRATORS,
       classOf[RegistratorWithoutAutoReset].getName)
     val ser = new KryoSerializer(conf)
     assert(!ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
diff --git a/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
index d63a45ae4a6a..126ba0e8b1e9 100644
--- a/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
@@ -17,17 +17,19 @@
 
 package org.apache.spark.serializer
 
+import org.apache.spark.internal.config.Kryo._
+
 class UnsafeKryoSerializerSuite extends KryoSerializerSuite {
 
   // This test suite should run all tests in KryoSerializerSuite with kryo unsafe.
 
   override def beforeAll() {
-    conf.set("spark.kryo.unsafe", "true")
+    conf.set(KRYO_USE_UNSAFE, true)
     super.beforeAll()
   }
 
   override def afterAll() {
-    conf.set("spark.kryo.unsafe", "false")
+    conf.set(KRYO_USE_UNSAFE, false)
     super.afterAll()
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/storage/FlatmapIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/FlatmapIteratorSuite.scala
index 42828506895a..fc16fe362882 100644
--- a/core/src/test/scala/org/apache/spark/storage/FlatmapIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/FlatmapIteratorSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.storage
 
 import org.apache.spark._
-
+import org.apache.spark.internal.config._
 
 class FlatmapIteratorSuite extends SparkFunSuite with LocalSparkContext {
   /* Tests the ability of Spark to deal with user provided iterators from flatMap
@@ -55,7 +55,7 @@ class FlatmapIteratorSuite extends SparkFunSuite with LocalSparkContext {
 
   test("Serializer Reset") {
     val sconf = new SparkConf().setMaster("local").setAppName("serializer_reset_test")
-      .set("spark.serializer.objectStreamReset", "10")
+      .set(SERIALIZER_OBJECT_STREAM_RESET, 10)
     sc = new SparkContext(sconf)
     val expand_size = 500
     val data = sc.parallelize(Seq(1, 2)).
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index d3f94fbe05d7..7aca0ad3d5c3 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -40,6 +40,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite, TaskContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.SparkListener
 
@@ -829,7 +830,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
   test("isDynamicAllocationEnabled") {
     val conf = new SparkConf()
     conf.set("spark.master", "yarn")
-    conf.set("spark.submit.deployMode", "client")
+    conf.set(SUBMIT_DEPLOY_MODE, "client")
     assert(Utils.isDynamicAllocationEnabled(conf) === false)
     assert(Utils.isDynamicAllocationEnabled(
       conf.set("spark.dynamicAllocation.enabled", "false")) === false)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 5efbf4e6280c..de70153198b3 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -54,8 +54,8 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     val conf = new SparkConf(loadDefaults)
     // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
     // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
-    conf.set("spark.serializer.objectStreamReset", "1")
-    conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
+    conf.set(SERIALIZER_OBJECT_STREAM_RESET, 1)
+    conf.set(SERIALIZER, "org.apache.spark.serializer.JavaSerializer")
     conf.set("spark.shuffle.spill.compress", codec.isDefined.toString)
     conf.set("spark.shuffle.compress", codec.isDefined.toString)
     codec.foreach { c => conf.set(IO_COMPRESSION_CODEC, c) }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index 14148e0e67fa..30064090a30c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -23,6 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
 import org.apache.spark._
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests.TEST_MEMORY
 import org.apache.spark.memory.MemoryTestingUtils
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
@@ -268,12 +269,12 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   private def createSparkConf(loadDefaults: Boolean, kryo: Boolean): SparkConf = {
     val conf = new SparkConf(loadDefaults)
     if (kryo) {
-      conf.set("spark.serializer", classOf[KryoSerializer].getName)
+      conf.set(SERIALIZER, classOf[KryoSerializer].getName)
     } else {
       // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
       // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
-      conf.set("spark.serializer.objectStreamReset", "1")
-      conf.set("spark.serializer", classOf[JavaSerializer].getName)
+      conf.set(SERIALIZER_OBJECT_STREAM_RESET, 1)
+      conf.set(SERIALIZER, classOf[JavaSerializer].getName)
     }
     conf.set("spark.shuffle.sort.bypassMergeThreshold", "0")
     // Ensure that we actually have multiple batches per spill file
diff --git a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
index eb5f3ca45940..7f892fd509e7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.attribute
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.sql.types._
 
@@ -225,7 +226,7 @@ class AttributeSuite extends SparkFunSuite {
 
   test("Kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
index cca7399b4b9c..5a7449005839 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.serializer.KryoSerializer
 
 class InstanceSuite extends SparkFunSuite{
   test("Kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala
index 05c7a58ee5ff..63c163590af5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.serializer.KryoSerializer
 
 class LabeledPointSuite extends SparkFunSuite {
   test("Kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
index f41abe48f2c5..3a44e79291cc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
@@ -18,12 +18,13 @@
 package org.apache.spark.ml.tree.impl
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.serializer.KryoSerializer
 
 class TreePointSuite extends SparkFunSuite {
   test("Kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index d18cef7e264d..c4bf5b27187f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.clustering
 import scala.util.Random
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 import org.apache.spark.mllib.util.TestingUtils._
@@ -316,7 +317,7 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("Kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
index f4fa216b8eba..a679fe43414f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.mllib.feature
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.internal.SQLConf._
 import org.apache.spark.util.Utils
 
 class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext {
@@ -109,12 +111,16 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("big model load / save") {
     // backupping old values
-    val oldBufferConfValue = spark.conf.get("spark.kryoserializer.buffer.max", "64m")
-    val oldBufferMaxConfValue = spark.conf.get("spark.kryoserializer.buffer", "64k")
+    val oldBufferConfValue = spark.conf.get(KRYO_SERIALIZER_BUFFER_SIZE.key, "64m")
+    val oldBufferMaxConfValue = spark.conf.get(KRYO_SERIALIZER_MAX_BUFFER_SIZE.key, "64k")
+    val oldSetCommandRejectsSparkCoreConfs = spark.conf.get(
+      SET_COMMAND_REJECTS_SPARK_CORE_CONFS.key, "true")
 
     // setting test values to trigger partitioning
-    spark.conf.set("spark.kryoserializer.buffer", "50b")
-    spark.conf.set("spark.kryoserializer.buffer.max", "50b")
+
+    // this is needed to set configurations which are also defined to SparkConf
+    spark.conf.set(SET_COMMAND_REJECTS_SPARK_CORE_CONFS.key, "false")
+    spark.conf.set(KRYO_SERIALIZER_BUFFER_SIZE.key, "50b")
 
     // create a model bigger than 50 Bytes
     val word2VecMap = Map((0 to 10).map(i => s"$i" -> Array.fill(10)(0.1f)): _*)
@@ -137,8 +143,9 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext {
         "that spans over multiple partitions", t)
     } finally {
       Utils.deleteRecursively(tempDir)
-      spark.conf.set("spark.kryoserializer.buffer", oldBufferConfValue)
-      spark.conf.set("spark.kryoserializer.buffer.max", oldBufferMaxConfValue)
+      spark.conf.set(KRYO_SERIALIZER_BUFFER_SIZE.key, oldBufferConfValue)
+      spark.conf.set(KRYO_SERIALIZER_MAX_BUFFER_SIZE.key, oldBufferMaxConfValue)
+      spark.conf.set(SET_COMMAND_REJECTS_SPARK_CORE_CONFS.key, oldSetCommandRejectsSparkCoreConfs)
     }
 
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 2c3f84617cfa..b4520d42fedf 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -27,6 +27,7 @@ import org.mockito.Mockito.when
 import org.scalatest.mockito.MockitoSugar._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.ml.{linalg => newlinalg}
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.serializer.KryoSerializer
@@ -34,7 +35,7 @@ import org.apache.spark.serializer.KryoSerializer
 class MatricesSuite extends SparkFunSuite {
   test("kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index 217b4a35438f..fee0b02bf8ed 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -25,6 +25,7 @@ import org.json4s.jackson.JsonMethods.{parse => parseJson}
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.ml.{linalg => newlinalg}
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.serializer.KryoSerializer
@@ -38,7 +39,7 @@ class VectorsSuite extends SparkFunSuite with Logging {
 
   test("kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
     def check[T: ClassTag](t: T) {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
index c1449ece740d..d3366dcde2ed 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.regression
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.ml.feature.{LabeledPoint => NewLabeledPoint}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.serializer.KryoSerializer
@@ -57,7 +58,7 @@ class LabeledPointSuite extends SparkFunSuite {
 
   test("Kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
index 5b4a2607f0b2..4c88fd3d83a0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.stat.distribution
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.mllib.linalg.{Matrices, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
@@ -83,7 +84,7 @@ class MultivariateGaussianSuite extends SparkFunSuite with MLlibTestSparkContext
 
   test("Kryo class register") {
     val conf = new SparkConf(false)
-    conf.set("spark.kryo.registrationRequired", "true")
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 39834fc640ac..e664b647bd1e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -154,12 +154,11 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true",
       MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString)
 
-    Seq("spark.jars", "spark.files").foreach { key =>
-      conf.getOption(key).foreach { value =>
-        val resolved = KubernetesUtils.resolveFileUrisAndPath(Utils.stringToSeq(value))
-        if (resolved.nonEmpty) {
-          additionalProps.put(key, resolved.mkString(","))
-        }
+    Seq(JARS, FILES).foreach { key =>
+      val value = conf.get(key)
+      val resolved = KubernetesUtils.resolveFileUrisAndPath(value)
+      if (resolved.nonEmpty) {
+        additionalProps.put(key.key, resolved.mkString(","))
       }
     }
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
index 76b4ec98d494..bd3f8a1681e5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
@@ -109,21 +109,22 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   }
 
   private def additionalJavaProperties(resource: String): Map[String, String] = {
-    resourceType(APP_RESOURCE_TYPE_JAVA) ++ mergeFileList("spark.jars", Seq(resource))
+    resourceType(APP_RESOURCE_TYPE_JAVA) ++ mergeFileList(JARS, Seq(resource))
   }
 
   private def additionalPythonProperties(resource: String): Map[String, String] = {
     resourceType(APP_RESOURCE_TYPE_PYTHON) ++
-      mergeFileList("spark.files", Seq(resource) ++ conf.pyFiles)
+      mergeFileList(FILES, Seq(resource) ++ conf.pyFiles)
   }
 
   private def additionalRProperties(resource: String): Map[String, String] = {
-    resourceType(APP_RESOURCE_TYPE_R) ++ mergeFileList("spark.files", Seq(resource))
+    resourceType(APP_RESOURCE_TYPE_R) ++ mergeFileList(FILES, Seq(resource))
   }
 
-  private def mergeFileList(key: String, filesToAdd: Seq[String]): Map[String, String] = {
-    val existing = Utils.stringToSeq(conf.get(key, ""))
-    Map(key -> (existing ++ filesToAdd).distinct.mkString(","))
+  private def mergeFileList(key: ConfigEntry[Seq[String]], filesToAdd: Seq[String])
+    : Map[String, String] = {
+    val existing = conf.get(key)
+    Map(key.key -> (existing ++ filesToAdd).distinct.mkString(","))
   }
 
   private def resourceType(resType: String): Map[String, String] = {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 90255a532173..ccf88cc53f8c 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -143,7 +143,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     val sparkConf = new SparkConf()
       .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod")
       .setJars(allJars)
-      .set("spark.files", allFiles.mkString(","))
+      .set(FILES, allFiles)
       .set(CONTAINER_IMAGE, "spark-driver:latest")
     val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
 
@@ -154,8 +154,8 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       "spark.app.id" -> KubernetesTestConf.APP_ID,
       KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> kubernetesConf.resourceNamePrefix,
       "spark.kubernetes.submitInDriver" -> "true",
-      "spark.jars" -> "/opt/spark/jar1.jar,hdfs:///opt/spark/jar2.jar",
-      "spark.files" -> "https://localhost:9000/file1.txt,/opt/spark/file2.txt",
+      JARS.key -> "/opt/spark/jar1.jar,hdfs:///opt/spark/jar2.jar",
+      FILES.key -> "https://localhost:9000/file1.txt,/opt/spark/file2.txt",
       MEMORY_OVERHEAD_FACTOR.key -> MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString)
     assert(additionalProperties === expectedSparkConf)
   }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index c8698039e3e4..e539c8e78dab 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -27,6 +27,7 @@ import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.JARS
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI.UI_ENABLED
 
@@ -86,7 +87,7 @@ private[spark] class SparkAppConf {
 
   def get(key: String): String = map.getOrElse(key, "")
 
-  def setJars(jars: Seq[String]): Unit = set("spark.jars", jars.mkString(","))
+  def setJars(jars: Seq[String]): Unit = set(JARS.key, jars.mkString(","))
 
   override def toString: String = map.toString
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
index 32ac4f37c5f9..bc1247ad7893 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
@@ -25,6 +25,7 @@ import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.deploy.mesos.ui.MesosClusterUI
 import org.apache.spark.deploy.rest.mesos.MesosRestServer
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.scheduler.cluster.mesos._
 import org.apache.spark.util.{CommandLineUtils, ShutdownHookManager, SparkUncaughtExceptionHandler, Utils}
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
index dd0b2bad1ecb..2b8655ceee0c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
@@ -63,11 +63,6 @@ package object config {
       .timeConf(TimeUnit.SECONDS)
       .createWithDefaultString("30s")
 
-  private[spark] val RECOVERY_MODE =
-    ConfigBuilder("spark.deploy.recoveryMode")
-      .stringConf
-      .createWithDefault("NONE")
-
   private[spark] val DISPATCHER_WEBUI_URL =
     ConfigBuilder("spark.mesos.dispatcher.webui.url")
       .doc("Set the Spark Mesos dispatcher webui_url for interacting with the " +
@@ -75,13 +70,6 @@ package object config {
       .stringConf
       .createOptional
 
-  private[spark] val ZOOKEEPER_URL =
-    ConfigBuilder("spark.deploy.zookeeper.url")
-      .doc("When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this " +
-        "configuration is used to set the zookeeper URL to connect to.")
-      .stringConf
-      .createOptional
-
   private[spark] val HISTORY_SERVER_URL =
     ConfigBuilder("spark.mesos.dispatcher.historyServer.url")
       .doc("Set the URL of the history server. The dispatcher will then " +
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala
index 61ab3e87c571..123412f21e2a 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala
@@ -26,6 +26,7 @@ import org.apache.zookeeper.KeeperException.NoNodeException
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkCuratorUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.util.Utils
 
 /**
@@ -94,13 +95,13 @@ private[spark] class ZookeeperMesosClusterPersistenceEngine(
     zk: CuratorFramework,
     conf: SparkConf)
   extends MesosClusterPersistenceEngine with Logging {
-  private val WORKING_DIR =
-    conf.get("spark.deploy.zookeeper.dir", "/spark_mesos_dispatcher") + "/" + baseDir
+  private val workingDir =
+    conf.get(ZOOKEEPER_DIRECTORY).getOrElse("/spark_mesos_dispatcher") + "/" + baseDir
 
-  SparkCuratorUtil.mkdir(zk, WORKING_DIR)
+  SparkCuratorUtil.mkdir(zk, workingDir)
 
   def path(name: String): String = {
-    WORKING_DIR + "/" + name
+    workingDir + "/" + name
   }
 
   override def expunge(name: String): Unit = {
@@ -129,6 +130,6 @@ private[spark] class ZookeeperMesosClusterPersistenceEngine(
   }
 
   override def fetchAll[T](): Iterable[T] = {
-    zk.getChildren.forPath(WORKING_DIR).asScala.flatMap(fetch[T])
+    zk.getChildren.forPath(workingDir).asScala.flatMap(fetch[T])
   }
 }
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 021b1ac84805..8c961a541aa1 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -32,7 +32,7 @@ import org.apache.mesos.Protos.TaskStatus.Reason
 import org.apache.spark.{SecurityManager, SparkConf, SparkException, TaskState}
 import org.apache.spark.deploy.mesos.{config, MesosDriverDescription}
 import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KillSubmissionResponse, SubmissionStatusResponse}
-import org.apache.spark.internal.config.{CORES_MAX, EXECUTOR_LIBRARY_PATH, EXECUTOR_MEMORY}
+import org.apache.spark.internal.config._
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.util.Utils
 
@@ -432,7 +432,7 @@ private[spark] class MesosClusterScheduler(
   private def getDriverUris(desc: MesosDriverDescription): List[CommandInfo.URI] = {
     val confUris = List(conf.getOption("spark.mesos.uris"),
       desc.conf.getOption("spark.mesos.uris"),
-      desc.conf.getOption("spark.submit.pyFiles")).flatMap(
+      Some(desc.conf.get(SUBMIT_PYTHON_FILES).mkString(","))).flatMap(
       _.map(_.split(",").map(_.trim))
     ).flatten
 
@@ -534,16 +534,16 @@ private[spark] class MesosClusterScheduler(
     desc.conf.getOption(CORES_MAX.key).foreach { v =>
       options ++= Seq("--total-executor-cores", v)
     }
-    desc.conf.getOption("spark.submit.pyFiles").foreach { pyFiles =>
-      val formattedFiles = pyFiles.split(",")
-        .map { path => new File(sandboxPath, path.split("/").last).toString() }
-        .mkString(",")
-      options ++= Seq("--py-files", formattedFiles)
-    }
+
+    val pyFiles = desc.conf.get(SUBMIT_PYTHON_FILES)
+    val formattedFiles = pyFiles.map { path =>
+      new File(sandboxPath, path.split("/").last).toString()
+    }.mkString(",")
+    options ++= Seq("--py-files", formattedFiles)
 
     // --conf
     val replicatedOptionsBlacklist = Set(
-      "spark.jars", // Avoids duplicate classes in classpath
+      JARS.key, // Avoids duplicate classes in classpath
       "spark.submit.deployMode", // this would be set to `cluster`, but we need client
       "spark.master" // this contains the address of the dispatcher, not master
     )
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 84921800a471..79922920267e 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -68,7 +68,7 @@ private[spark] class Client(
   private val yarnClient = YarnClient.createYarnClient
   private val hadoopConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf))
 
-  private val isClusterMode = sparkConf.get("spark.submit.deployMode", "client") == "cluster"
+  private val isClusterMode = sparkConf.get(SUBMIT_DEPLOY_MODE) == "cluster"
 
   // AM related configurations
   private val amMemory = if (isClusterMode) {
@@ -1532,8 +1532,8 @@ private[spark] class YarnClusterApplication extends SparkApplication {
   override def start(args: Array[String], conf: SparkConf): Unit = {
     // SparkSubmit would use yarn cache to distribute files & jars in yarn mode,
     // so remove them from sparkConf here for yarn mode.
-    conf.remove("spark.jars")
-    conf.remove("spark.files")
+    conf.remove(JARS)
+    conf.remove(FILES)
 
     new Client(new ClientArguments(args), conf).run()
   }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 9acd99546c03..25827fd25e6b 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -40,6 +40,7 @@ import org.scalatest.Matchers
 
 import org.apache.spark.{SparkConf, SparkFunSuite, TestUtils}
 import org.apache.spark.deploy.yarn.config._
+import org.apache.spark.internal.config._
 import org.apache.spark.util.{SparkConfWithEnv, Utils}
 
 class ClientSuite extends SparkFunSuite with Matchers {
@@ -368,7 +369,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       val resources = Map("fpga" -> 2, "gpu" -> 3)
       ResourceRequestTestHelper.initializeResourceTypes(resources.keys.toSeq)
 
-      val conf = new SparkConf().set("spark.submit.deployMode", deployMode)
+      val conf = new SparkConf().set(SUBMIT_DEPLOY_MODE, deployMode)
       resources.foreach { case (name, v) =>
         conf.set(prefix + name, v.toString)
       }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index b7e83c8a62ec..faddb8fc7e22 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -443,7 +443,7 @@ private object YarnClusterDriver extends Logging with Matchers {
 
       // If we are running in yarn-cluster mode, verify that driver logs links and present and are
       // in the expected format.
-      if (conf.get("spark.submit.deployMode") == "cluster") {
+      if (conf.get(SUBMIT_DEPLOY_MODE) == "cluster") {
         assert(listener.driverLogs.nonEmpty)
         val driverLogs = listener.driverLogs.get
         assert(driverLogs.size === 2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala
index 68f7de047b39..69728efcd1d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala
@@ -21,6 +21,7 @@ import com.esotericsoftware.kryo.{Kryo, Serializer}
 import com.esotericsoftware.kryo.io.{Input, Output}
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.serializer.KryoRegistrator
 import org.apache.spark.sql.test.SharedSQLContext
 
@@ -33,7 +34,7 @@ class DatasetSerializerRegistratorSuite extends QueryTest with SharedSQLContext
 
   override protected def sparkConf: SparkConf = {
     // Make sure we use the KryoRegistrator
-    super.sparkConf.set("spark.kryo.registrator", TestRegistrator().getClass.getCanonicalName)
+    super.sparkConf.set(KRYO_USER_REGISTRATORS, TestRegistrator().getClass.getCanonicalName)
   }
 
   test("Kryo registrator") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
index e174dc6f31a4..0869e25674e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
@@ -45,8 +45,8 @@ object ExternalAppendOnlyUnsafeRowArrayBenchmark extends BenchmarkBase {
   private val conf = new SparkConf(false)
     // Make the Java serializer write a reset instruction (TC_RESET) after each object to test
     // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
-    .set("spark.serializer.objectStreamReset", "1")
-    .set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
+    .set(config.SERIALIZER_OBJECT_STREAM_RESET, 1)
+    .set(config.SERIALIZER, "org.apache.spark.serializer.JavaSerializer")
 
   private def withFakeTaskContext(f: => Unit): Unit = {
     val sc = new SparkContext("local", "test", conf)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index 7b55e839e3b4..1c8991010504 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -22,7 +22,8 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream,
 import scala.util.Random
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.memory.{TaskMemoryManager, UnifiedMemoryManager}
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.sql.catalyst.InternalRow
@@ -309,7 +310,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
 
   test("Spark-14521") {
     val ser = new KryoSerializer(
-      (new SparkConf).set("spark.kryo.referenceTracking", "false")).newInstance()
+      (new SparkConf).set(KRYO_REFERENCE_TRACKING, false)).newInstance()
     val key = Seq(BoundReference(0, LongType, false))
 
     // Testing Kryo serialization of HashedRelation

From 4915cb3adff6f1ec9638b24bfb74f83940ec3d95 Mon Sep 17 00:00:00 2001
From: wright <wright@semmle.com>
Date: Wed, 16 Jan 2019 21:00:58 -0600
Subject: [PATCH 0311/1072] [MINOR][BUILD] ensure call to translate_component
 has correct number of arguments

## What changes were proposed in this pull request?

The call to `translate_component` only supplied 2 out of the 3 required arguments. I added a default empty list for the missing argument to avoid a run-time error.

I work for Semmle, and noticed the bug with our LGTM code analyzer:
https://lgtm.com/projects/g/apache/spark/snapshot/0655f1624ff7b73e5c8937ab9e83453a5a3a4466/files/dev/create-release/releaseutils.py?sort=name&dir=ASC&mode=heatmap#x1434915b6576fb40:1

## How was this patch tested?

I checked that  `./dev/run-tests` pass OK.

Closes #23567 from ipwright/wrong-number-of-arguments-fix.

Authored-by: wright <wright@semmle.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/create-release/releaseutils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
index f273b337fdb4..a5a26ae8f535 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -236,7 +236,7 @@ def translate_component(component, commit_hash, warnings):
 # The returned components are already filtered and translated
 def find_components(commit, commit_hash):
     components = re.findall(r"\[\w*\]", commit.lower())
-    components = [translate_component(c, commit_hash)
+    components = [translate_component(c, commit_hash, [])
                   for c in components if c in known_components]
     return components
 

From 650b879de9e3b426fd38cdf1a8abf701a0c4a086 Mon Sep 17 00:00:00 2001
From: xiaodeshan <xiaodeshan@xiaomi.com>
Date: Thu, 17 Jan 2019 05:51:43 -0600
Subject: [PATCH 0312/1072] [SPARK-26457] Show hadoop configurations in
 HistoryServer environment tab

## What changes were proposed in this pull request?

I know that yarn provided all hadoop configurations. But I guess it may be fine that the historyserver unify all configuration in it. It will be convenient for us to debug some problems.

## How was this patch tested?

![image](https://user-images.githubusercontent.com/42019462/50808610-4d742900-133a-11e9-868c-2976e856ed9a.png)

Closes #23486 from deshanxiao/spark-26457.

Lead-authored-by: xiaodeshan <xiaodeshan@xiaomi.com>
Co-authored-by: deshanxiao <42019462+deshanxiao@users.noreply.github.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/ui/static/webui.js       |  1 +
 .../scala/org/apache/spark/SparkContext.scala |  4 ++--
 .../scala/org/apache/spark/SparkEnv.scala     |  7 +++++++
 .../spark/status/AppStatusListener.scala      |  1 +
 .../org/apache/spark/status/api/v1/api.scala  |  1 +
 .../apache/spark/ui/env/EnvironmentPage.scala | 21 +++++++++++++++----
 .../org/apache/spark/util/JsonProtocol.scala  |  6 ++++++
 .../app_environment_expectation.json          |  5 +++++
 .../spark-events/app-20161116163331-0000      |  2 +-
 .../history/FsHistoryProviderSuite.scala      |  2 ++
 .../scheduler/EventLoggingListenerSuite.scala |  3 ++-
 .../apache/spark/util/JsonProtocolSuite.scala |  4 ++++
 project/MimaExcludes.scala                    |  3 +++
 13 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.js b/core/src/main/resources/org/apache/spark/ui/static/webui.js
index b1254e08fa50..9f5744af25b0 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.js
@@ -63,6 +63,7 @@ $(function() {
   collapseTablePageLoad('collapse-aggregated-finishedDrivers','aggregated-finishedDrivers');
   collapseTablePageLoad('collapse-aggregated-runtimeInformation','aggregated-runtimeInformation');
   collapseTablePageLoad('collapse-aggregated-sparkProperties','aggregated-sparkProperties');
+  collapseTablePageLoad('collapse-aggregated-hadoopProperties','aggregated-hadoopProperties');
   collapseTablePageLoad('collapse-aggregated-systemProperties','aggregated-systemProperties');
   collapseTablePageLoad('collapse-aggregated-classpathEntries','aggregated-classpathEntries');
   collapseTablePageLoad('collapse-aggregated-activeJobs','aggregated-activeJobs');
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index c9afc799fa8d..e0c0635d6cfa 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2370,8 +2370,8 @@ class SparkContext(config: SparkConf) extends Logging {
       val schedulingMode = getSchedulingMode.toString
       val addedJarPaths = addedJars.keys.toSeq
       val addedFilePaths = addedFiles.keys.toSeq
-      val environmentDetails = SparkEnv.environmentDetails(conf, schedulingMode, addedJarPaths,
-        addedFilePaths)
+      val environmentDetails = SparkEnv.environmentDetails(conf, hadoopConfiguration,
+        schedulingMode, addedJarPaths, addedFilePaths)
       val environmentUpdate = SparkListenerEnvironmentUpdate(environmentDetails)
       listenerBus.post(environmentUpdate)
     }
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 4d7542ca18d5..ff4a043f6f9a 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -21,10 +21,12 @@ import java.io.File
 import java.net.Socket
 import java.util.Locale
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.Properties
 
 import com.google.common.collect.MapMaker
+import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.python.PythonWorkerFactory
@@ -400,6 +402,7 @@ object SparkEnv extends Logging {
   private[spark]
   def environmentDetails(
       conf: SparkConf,
+      hadoopConf: Configuration,
       schedulingMode: String,
       addedJars: Seq[String],
       addedFiles: Seq[String]): Map[String, Seq[(String, String)]] = {
@@ -435,9 +438,13 @@ object SparkEnv extends Logging {
     val addedJarsAndFiles = (addedJars ++ addedFiles).map((_, "Added By User"))
     val classPaths = (addedJarsAndFiles ++ classPathEntries).sorted
 
+    // Add Hadoop properties, it will not ignore configs including in Spark. Some spark
+    // conf starting with "spark.hadoop" may overwrite it.
+    val hadoopProperties = hadoopConf.asScala.map(entry => (entry.getKey, entry.getValue)).toSeq
     Map[String, Seq[(String, String)]](
       "JVM Information" -> jvmInformation,
       "Spark Properties" -> sparkProperties,
+      "Hadoop Properties" -> hadoopProperties,
       "System Properties" -> otherProperties,
       "Classpath Entries" -> classPaths)
   }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 262ff6547faa..12c5d4d2c6ef 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -147,6 +147,7 @@ private[spark] class AppStatusListener(
     val envInfo = new v1.ApplicationEnvironmentInfo(
       runtime,
       details.getOrElse("Spark Properties", Nil),
+      details.getOrElse("Hadoop Properties", Nil),
       details.getOrElse("System Properties", Nil),
       details.getOrElse("Classpath Entries", Nil))
 
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index c7d3cd37db6f..825fc5427806 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -352,6 +352,7 @@ class VersionInfo private[spark](
 class ApplicationEnvironmentInfo private[spark] (
     val runtime: RuntimeInfo,
     val sparkProperties: Seq[(String, String)],
+    val hadoopProperties: Seq[(String, String)],
     val systemProperties: Seq[(String, String)],
     val classpathEntries: Seq[(String, String)])
 
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
index 3d465a34e44a..cbb8b3c88c65 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
@@ -42,6 +42,8 @@ private[ui] class EnvironmentPage(
       propertyHeader, jvmRow, jvmInformation, fixedWidth = true)
     val sparkPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
       Utils.redact(conf, appEnv.sparkProperties.toSeq), fixedWidth = true)
+    val hadoopPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
+      Utils.redact(conf, appEnv.hadoopProperties.toSeq), fixedWidth = true)
     val systemPropertiesTable = UIUtils.listingTable(
       propertyHeader, propertyRow, appEnv.systemProperties, fixedWidth = true)
     val classpathEntriesTable = UIUtils.listingTable(
@@ -70,26 +72,37 @@ private[ui] class EnvironmentPage(
         <div class="aggregated-sparkProperties collapsible-table">
           {sparkPropertiesTable}
         </div>
+        <span class="collapse-aggregated-hadoopProperties collapse-table"
+              onClick="collapseTable('collapse-aggregated-hadoopProperties',
+            'aggregated-hadoopProperties')">
+          <h4>
+            <span class="collapse-table-arrow arrow-closed"></span>
+            <a>Hadoop Properties</a>
+          </h4>
+        </span>
+        <div class="aggregated-hadoopProperties collapsible-table collapsed">
+          {hadoopPropertiesTable}
+        </div>
         <span class="collapse-aggregated-systemProperties collapse-table"
             onClick="collapseTable('collapse-aggregated-systemProperties',
             'aggregated-systemProperties')">
           <h4>
-            <span class="collapse-table-arrow arrow-open"></span>
+            <span class="collapse-table-arrow arrow-closed"></span>
             <a>System Properties</a>
           </h4>
         </span>
-        <div class="aggregated-systemProperties collapsible-table">
+        <div class="aggregated-systemProperties collapsible-table collapsed">
           {systemPropertiesTable}
         </div>
         <span class="collapse-aggregated-classpathEntries collapse-table"
             onClick="collapseTable('collapse-aggregated-classpathEntries',
             'aggregated-classpathEntries')">
           <h4>
-            <span class="collapse-table-arrow arrow-open"></span>
+            <span class="collapse-table-arrow arrow-closed"></span>
             <a>Classpath Entries</a>
           </h4>
         </span>
-        <div class="aggregated-classpathEntries collapsible-table">
+        <div class="aggregated-classpathEntries collapsible-table collapsed">
           {classpathEntriesTable}
         </div>
       </span>
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 348291fe5e7a..3370152975a5 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -171,11 +171,13 @@ private[spark] object JsonProtocol {
     val environmentDetails = environmentUpdate.environmentDetails
     val jvmInformation = mapToJson(environmentDetails("JVM Information").toMap)
     val sparkProperties = mapToJson(environmentDetails("Spark Properties").toMap)
+    val hadoopProperties = mapToJson(environmentDetails("Hadoop Properties").toMap)
     val systemProperties = mapToJson(environmentDetails("System Properties").toMap)
     val classpathEntries = mapToJson(environmentDetails("Classpath Entries").toMap)
     ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.environmentUpdate) ~
     ("JVM Information" -> jvmInformation) ~
     ("Spark Properties" -> sparkProperties) ~
+    ("Hadoop Properties" -> hadoopProperties) ~
     ("System Properties" -> systemProperties) ~
     ("Classpath Entries" -> classpathEntries)
   }
@@ -653,9 +655,13 @@ private[spark] object JsonProtocol {
   }
 
   def environmentUpdateFromJson(json: JValue): SparkListenerEnvironmentUpdate = {
+    // For compatible with previous event logs
+    val hadoopProperties = jsonOption(json \ "Hadoop Properties").map(mapFromJson(_).toSeq)
+      .getOrElse(Seq.empty)
     val environmentDetails = Map[String, Seq[(String, String)]](
       "JVM Information" -> mapFromJson(json \ "JVM Information").toSeq,
       "Spark Properties" -> mapFromJson(json \ "Spark Properties").toSeq,
+      "Hadoop Properties" -> hadoopProperties,
       "System Properties" -> mapFromJson(json \ "System Properties").toSeq,
       "Classpath Entries" -> mapFromJson(json \ "Classpath Entries").toSeq)
     SparkListenerEnvironmentUpdate(environmentDetails)
diff --git a/core/src/test/resources/HistoryServerExpectations/app_environment_expectation.json b/core/src/test/resources/HistoryServerExpectations/app_environment_expectation.json
index 4ed053899ee6..a64617256d63 100644
--- a/core/src/test/resources/HistoryServerExpectations/app_environment_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/app_environment_expectation.json
@@ -32,6 +32,11 @@
     [ "spark.app.id", "app-20161116163331-0000" ],
     [ "spark.task.maxFailures", "4" ]
   ],
+  "hadoopProperties" : [
+    [ "mapreduce.jobtracker.address", "local" ],
+    [ "yarn.resourcemanager.scheduler.monitor.policies", "org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy" ],
+    [ "mapreduce.jobhistory.client.thread-count", "10" ]
+  ],
   "systemProperties" : [
     [ "java.io.tmpdir", "/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/" ],
     [ "line.separator", "\n" ],
diff --git a/core/src/test/resources/spark-events/app-20161116163331-0000 b/core/src/test/resources/spark-events/app-20161116163331-0000
index 57cfc5b97312..8f77fdd34f43 100755
--- a/core/src/test/resources/spark-events/app-20161116163331-0000
+++ b/core/src/test/resources/spark-events/app-20161116163331-0000
@@ -1,6 +1,6 @@
 {"Event":"SparkListenerLogStart","Spark Version":"2.1.0-SNAPSHOT"}
 {"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"172.22.0.167","Port":51475},"Maximum Memory":908381388,"Timestamp":1479335611477,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":524288000}
-{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre","Java Version":"1.8.0_92 (Oracle Corporation)","Scala Version":"version 2.11.8"},"Spark Properties":{"spark.blacklist.task.maxTaskAttemptsPerExecutor":"3","spark.blacklist.enabled":"TRUE","spark.driver.host":"172.22.0.167","spark.blacklist.task.maxTaskAttemptsPerNode":"3","spark.eventLog.enabled":"TRUE","spark.driver.port":"51459","spark.repl.class.uri":"spark://172.22.0.167:51459/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/spark-1cbc97d0-7fe6-4c9f-8c2c-f6fe51ee3cf2/repl-39929169-ac4c-4c6d-b116-f648e4dd62ed","spark.app.name":"Spark shell","spark.blacklist.stage.maxFailedExecutorsPerNode":"3","spark.scheduler.mode":"FIFO","spark.eventLog.overwrite":"TRUE","spark.blacklist.stage.maxFailedTasksPerExecutor":"3","spark.executor.id":"driver","spark.blacklist.application.maxFailedExecutorsPerNode":"2","spark.submit.deployMode":"client","spark.master":"local-cluster[4,4,1024]","spark.home":"/Users/Jose/IdeaProjects/spark","spark.eventLog.dir":"/Users/jose/logs","spark.sql.catalogImplementation":"in-memory","spark.eventLog.compress":"FALSE","spark.blacklist.application.maxFailedTasksPerExecutor":"1","spark.blacklist.timeout":"1000000","spark.app.id":"app-20161116163331-0000","spark.task.maxFailures":"4"},"System Properties":{"java.io.tmpdir":"/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/Jose","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib","user.dir":"/Users/Jose/IdeaProjects/spark","java.library.path":"/Users/Jose/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.92-b14","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_92-b14","java.vm.info":"mixed mode","java.ext.dirs":"/Users/Jose/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","io.netty.maxDirectMemory":"0","java.class.version":"52.0","scala.usejavacp":"true","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.11.6","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"jose","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --master local-cluster[4,4,1024] --conf spark.blacklist.enabled=TRUE --conf spark.blacklist.timeout=1000000 --conf spark.blacklist.application.maxFailedTasksPerExecutor=1 --conf spark.eventLog.overwrite=TRUE --conf spark.blacklist.task.maxTaskAttemptsPerNode=3 --conf spark.blacklist.stage.maxFailedTasksPerExecutor=3 --conf spark.blacklist.task.maxTaskAttemptsPerExecutor=3 --conf spark.eventLog.compress=FALSE --conf spark.blacklist.stage.maxFailedExecutorsPerNode=3 --conf spark.eventLog.enabled=TRUE --conf spark.eventLog.dir=/Users/jose/logs --conf spark.blacklist.application.maxFailedExecutorsPerNode=2 --conf spark.task.maxFailures=4 --class org.apache.spark.repl.Main --name Spark shell spark-shell -i /Users/Jose/dev/jose-utils/blacklist/test-blacklist.scala","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre","java.version":"1.8.0_92","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-mapred-1.7.7-hadoop2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-core-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-servlet-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-column-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/snappy-java-1.1.2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/oro-2.0.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/arpack_combined_all-0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pmml-schema-1.2.15.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-assembly_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javassist-3.18.1-GA.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-tags_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-launcher_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-math3-3.4.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-api-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-xml_2.11-1.0.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/objenesis-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spire-macros_2.11-0.7.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-reflect-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-mllib-local_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-mllib_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-server-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/core/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-mapper-asl-1.9.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-module-scala_2.11-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-framework-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.inject-1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-client-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-core-asl-1.9.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-common/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/zookeeper-3.4.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-auth-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/repl/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jul-to-slf4j-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-media-jaxb-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-io-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/RoaringBitmap-0.5.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.ws.rs-api-2.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/catalyst/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-unsafe_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-repl_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-continuation-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-client-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/hive-thriftserver/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-annotations-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-graphite-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-api-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-container-servlet-core-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/streaming/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-net-3.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-proxy-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-catalyst_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/lz4-1.3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-crypto-1.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-yarn/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.annotation-api-1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-sql_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/guava-14.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.servlet-api-3.1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-collections-3.2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/conf/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/unused-1.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/aopalliance-1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-encoding-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/tags/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-jackson_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-cli-1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-server-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/cglib-2.2.1-v20090111.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pyrolite-4.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-library-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-parser-combinators_2.11-1.0.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-util-6.1.26.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/py4j-0.10.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-configuration-1.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/core-1.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/core/target/jars/*":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-shuffle/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-format-2.3.0-incubating.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/kryo-shaded-3.0.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/core/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/chill-java-0.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-annotations-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-hadoop-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/hive/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-ipc-1.7.7.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xz-1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-jackson-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/aopalliance-repackaged-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-common-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/log4j-1.2.17.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-core-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-util-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scalap-2.11.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/osgi-resource-locator-1.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-beanutils-1.7.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-compress-1.4.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jcl-over-slf4j-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/yarn/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-plus-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/protobuf-java-2.5.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/unsafe/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-module-paranamer-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/leveldbjni-all-1.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-core-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/slf4j-api-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/compress-lzf-1.0.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/stream-2.7.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-shuffle-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-codec-1.10.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/sketch/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/breeze_2.11-0.12.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-core_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-container-servlet-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-network-shuffle_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-lang-2.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/ivy-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-math-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-hdfs-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-compiler-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-jvm-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-lang3-3.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jsr305-1.3.9.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/minlog-1.3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/netty-3.8.0.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-webapp-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-ast_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xbean-asm5-shaded-4.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-io-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/slf4j-log4j12-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-locator-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/shapeless_2.11-2.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-network-common_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-xml-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-httpclient-3.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.inject-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/mllib/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scalatest_2.11-2.2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-utils-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-client-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-guava-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-jndi-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/graphx/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-app-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/examples/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xmlenc-0.52.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jets3t-0.7.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-recipes-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/opencsv-2.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jtransforms-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/antlr4-runtime-4.5.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/chill_2.11-0.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-digester-1.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/univocity-parsers-2.2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jline-2.12.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-streaming_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/launcher/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/breeze-macros_2.11-0.12.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-client-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-databind-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-servlets-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/paranamer-2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-security-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-ipc-1.7.7-tests.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-1.7.7.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spire_2.11-0.7.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-client-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-json-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-beanutils-core-1.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/validation-api-1.1.0.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-graphx_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/netty-all-4.0.41.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/janino-3.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-core_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-compiler-3.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/guice-3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-server-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-http-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-common-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-jobclient-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-sketch_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pmml-model-1.2.15.jar":"System Classpath"}}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre","Java Version":"1.8.0_92 (Oracle Corporation)","Scala Version":"version 2.11.8"},"Spark Properties":{"spark.blacklist.task.maxTaskAttemptsPerExecutor":"3","spark.blacklist.enabled":"TRUE","spark.driver.host":"172.22.0.167","spark.blacklist.task.maxTaskAttemptsPerNode":"3","spark.eventLog.enabled":"TRUE","spark.driver.port":"51459","spark.repl.class.uri":"spark://172.22.0.167:51459/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/spark-1cbc97d0-7fe6-4c9f-8c2c-f6fe51ee3cf2/repl-39929169-ac4c-4c6d-b116-f648e4dd62ed","spark.app.name":"Spark shell","spark.blacklist.stage.maxFailedExecutorsPerNode":"3","spark.scheduler.mode":"FIFO","spark.eventLog.overwrite":"TRUE","spark.blacklist.stage.maxFailedTasksPerExecutor":"3","spark.executor.id":"driver","spark.blacklist.application.maxFailedExecutorsPerNode":"2","spark.submit.deployMode":"client","spark.master":"local-cluster[4,4,1024]","spark.home":"/Users/Jose/IdeaProjects/spark","spark.eventLog.dir":"/Users/jose/logs","spark.sql.catalogImplementation":"in-memory","spark.eventLog.compress":"FALSE","spark.blacklist.application.maxFailedTasksPerExecutor":"1","spark.blacklist.timeout":"1000000","spark.app.id":"app-20161116163331-0000","spark.task.maxFailures":"4"},"Hadoop Properties":{"mapreduce.jobtracker.address":"local","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","mapreduce.jobhistory.client.thread-count":"10"},"System Properties":{"java.io.tmpdir":"/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/Jose","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib","user.dir":"/Users/Jose/IdeaProjects/spark","java.library.path":"/Users/Jose/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.92-b14","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_92-b14","java.vm.info":"mixed mode","java.ext.dirs":"/Users/Jose/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","io.netty.maxDirectMemory":"0","java.class.version":"52.0","scala.usejavacp":"true","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.11.6","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"jose","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --master local-cluster[4,4,1024] --conf spark.blacklist.enabled=TRUE --conf spark.blacklist.timeout=1000000 --conf spark.blacklist.application.maxFailedTasksPerExecutor=1 --conf spark.eventLog.overwrite=TRUE --conf spark.blacklist.task.maxTaskAttemptsPerNode=3 --conf spark.blacklist.stage.maxFailedTasksPerExecutor=3 --conf spark.blacklist.task.maxTaskAttemptsPerExecutor=3 --conf spark.eventLog.compress=FALSE --conf spark.blacklist.stage.maxFailedExecutorsPerNode=3 --conf spark.eventLog.enabled=TRUE --conf spark.eventLog.dir=/Users/jose/logs --conf spark.blacklist.application.maxFailedExecutorsPerNode=2 --conf spark.task.maxFailures=4 --class org.apache.spark.repl.Main --name Spark shell spark-shell -i /Users/Jose/dev/jose-utils/blacklist/test-blacklist.scala","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre","java.version":"1.8.0_92","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-mapred-1.7.7-hadoop2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-core-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-servlet-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-column-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/snappy-java-1.1.2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/oro-2.0.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/arpack_combined_all-0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pmml-schema-1.2.15.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-assembly_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javassist-3.18.1-GA.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-tags_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-launcher_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-math3-3.4.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-api-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-xml_2.11-1.0.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/objenesis-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spire-macros_2.11-0.7.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-reflect-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-mllib-local_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-mllib_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-server-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/core/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-mapper-asl-1.9.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-module-scala_2.11-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-framework-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.inject-1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-client-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-core-asl-1.9.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-common/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/zookeeper-3.4.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-auth-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/repl/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jul-to-slf4j-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-media-jaxb-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-io-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/RoaringBitmap-0.5.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.ws.rs-api-2.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/catalyst/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-unsafe_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-repl_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-continuation-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-client-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/hive-thriftserver/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-annotations-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-graphite-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-api-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-container-servlet-core-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/streaming/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-net-3.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-proxy-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-catalyst_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/lz4-1.3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-crypto-1.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-yarn/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.annotation-api-1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-sql_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/guava-14.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.servlet-api-3.1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-collections-3.2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/conf/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/unused-1.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/aopalliance-1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-encoding-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/tags/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-jackson_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-cli-1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-server-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/cglib-2.2.1-v20090111.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pyrolite-4.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-library-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-parser-combinators_2.11-1.0.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-util-6.1.26.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/py4j-0.10.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-configuration-1.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/core-1.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/core/target/jars/*":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-shuffle/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-format-2.3.0-incubating.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/kryo-shaded-3.0.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/core/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/chill-java-0.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-annotations-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-hadoop-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/hive/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-ipc-1.7.7.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xz-1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-jackson-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/aopalliance-repackaged-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-common-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/log4j-1.2.17.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-core-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-util-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scalap-2.11.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/osgi-resource-locator-1.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-beanutils-1.7.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-compress-1.4.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jcl-over-slf4j-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/yarn/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-plus-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/protobuf-java-2.5.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/unsafe/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-module-paranamer-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/leveldbjni-all-1.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-core-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/slf4j-api-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/compress-lzf-1.0.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/stream-2.7.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-shuffle-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-codec-1.10.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/sketch/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/breeze_2.11-0.12.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-core_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-container-servlet-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-network-shuffle_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-lang-2.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/ivy-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-math-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-hdfs-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-compiler-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-jvm-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-lang3-3.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jsr305-1.3.9.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/minlog-1.3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/netty-3.8.0.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-webapp-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-ast_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xbean-asm5-shaded-4.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-io-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/slf4j-log4j12-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-locator-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/shapeless_2.11-2.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-network-common_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-xml-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-httpclient-3.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.inject-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/mllib/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scalatest_2.11-2.2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-utils-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-client-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-guava-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-jndi-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/graphx/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-app-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/examples/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xmlenc-0.52.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jets3t-0.7.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-recipes-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/opencsv-2.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jtransforms-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/antlr4-runtime-4.5.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/chill_2.11-0.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-digester-1.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/univocity-parsers-2.2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jline-2.12.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-streaming_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/launcher/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/breeze-macros_2.11-0.12.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-client-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-databind-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-servlets-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/paranamer-2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-security-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-ipc-1.7.7-tests.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-1.7.7.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spire_2.11-0.7.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-client-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-json-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-beanutils-core-1.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/validation-api-1.1.0.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-graphx_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/netty-all-4.0.41.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/janino-3.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-core_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-compiler-3.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/guice-3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-server-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-http-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-common-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-jobclient-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-sketch_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pmml-model-1.2.15.jar":"System Classpath"}}
 {"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"app-20161116163331-0000","Timestamp":1479335609916,"User":"jose"}
 {"Event":"SparkListenerExecutorAdded","Timestamp":1479335615320,"Executor ID":"3","Executor Info":{"Host":"172.22.0.167","Total Cores":4,"Log Urls":{"stdout":"http://172.22.0.167:51466/logPage/?appId=app-20161116163331-0000&executorId=3&logType=stdout","stderr":"http://172.22.0.167:51466/logPage/?appId=app-20161116163331-0000&executorId=3&logType=stderr"}}}
 {"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"3","Host":"172.22.0.167","Port":51485},"Maximum Memory":908381388,"Timestamp":1479335615387,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":524288000}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index cc32a0a7d1c1..74574e2024c1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -624,6 +624,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
             "test", Some("attempt1")),
           SparkListenerEnvironmentUpdate(Map(
             "Spark Properties" -> properties.toSeq,
+            "Hadoop Properties" -> Seq.empty,
             "JVM Information" -> Seq.empty,
             "System Properties" -> Seq.empty,
             "Classpath Entries" -> Seq.empty
@@ -882,6 +883,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
         SparkListenerApplicationStart("end-event-test", Some("end-event-test"), 1L, "test", None),
         SparkListenerEnvironmentUpdate(Map(
           "Spark Properties" -> Seq.empty,
+          "Hadoop Properties" -> Seq.empty,
           "JVM Information" -> Seq.empty,
           "System Properties" -> Seq.empty,
           "Classpath Entries" -> Seq.empty
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index c0cb158e03d8..811b9fe46fdf 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -108,8 +108,9 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     val secretPassword = "secret_password"
     val conf = getLoggingConf(testDirPath, None)
       .set(key, secretPassword)
+    val hadoopconf = SparkHadoopUtil.get.newConfiguration(new SparkConf())
     val eventLogger = new EventLoggingListener("test", None, testDirPath.toUri(), conf)
-    val envDetails = SparkEnv.environmentDetails(conf, "FIFO", Seq.empty, Seq.empty)
+    val envDetails = SparkEnv.environmentDetails(conf, hadoopconf, "FIFO", Seq.empty, Seq.empty)
     val event = SparkListenerEnvironmentUpdate(envDetails)
     val redactedProps = eventLogger.redactEvent(event).environmentDetails("Spark Properties").toMap
     assert(redactedProps(key) == "*********(redacted)")
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index b88f25726fc4..c63f04db184d 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -66,6 +66,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     val environmentUpdate = SparkListenerEnvironmentUpdate(Map[String, Seq[(String, String)]](
       "JVM Information" -> Seq(("GC speed", "9999 objects/s"), ("Java home", "Land of coffee")),
       "Spark Properties" -> Seq(("Job throughput", "80000 jobs/s, regardless of job type")),
+      "Hadoop Properties" -> Seq(("hadoop.tmp.dir", "/usr/local/hadoop/tmp")),
       "System Properties" -> Seq(("Username", "guest"), ("Password", "guest")),
       "Classpath Entries" -> Seq(("Super library", "/tmp/super_library"))
     ))
@@ -1761,6 +1762,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |  "Spark Properties": {
       |    "Job throughput": "80000 jobs/s, regardless of job type"
       |  },
+      |  "Hadoop Properties": {
+      |    "hadoop.tmp.dir": "/usr/local/hadoop/tmp"
+      |  },
       |  "System Properties": {
       |    "Username": "guest",
       |    "Password": "guest"
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index cf8d9f3c24d0..3e232baaec49 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -227,6 +227,9 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.setActiveContext"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.markPartiallyConstructed"),
 
+    // [SPARK-26457] Show hadoop configurations in HistoryServer environment tab
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ApplicationEnvironmentInfo.this"),
+
     // Data Source V2 API changes
     (problem: Problem) => problem match {
       case MissingClassProblem(cls) =>

From c0632cec04e5b0f3fb3c3f27c21a2d3f3fbb4f7e Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 17 Jan 2019 23:33:29 +0800
Subject: [PATCH 0313/1072] [SPARK-23817][SQL] Create file source V2 framework
 and migrate ORC read path

## What changes were proposed in this pull request?
Create a framework for file source V2 based on data source V2 API.
As a good example for demonstrating the framework, this PR also migrate ORC source. This is because ORC file source supports both row scan and columnar scan, and the implementation is simpler comparing with Parquet.

Note: Currently only read path of V2 API is done, this framework and migration are only for the read path.
Supports the following scan:
- Scan ColumnarBatch
- Scan UnsafeRow
- Push down filters
- Push down required columns

Not supported( due to the limitation of data source V2 API):
- Stats metrics
- Catalog table
- Writes

## How was this patch tested?

Unit test

Closes #23383 from gengliangwang/latest_orcV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  11 +-
 .../sql/sources/v2/DataSourceOptions.java     |  10 ++
 .../apache/spark/sql/DataFrameReader.scala    |  18 +-
 .../apache/spark/sql/DataFrameWriter.scala    |   3 +-
 .../sql/execution/DataSourceScanExec.scala    | 107 ++---------
 .../sql/execution/PartitionedFileUtil.scala   |  94 ++++++++++
 .../spark/sql/execution/command/tables.scala  |   5 +-
 .../execution/datasources/DataSource.scala    |  97 ++++++----
 .../datasources/FallbackOrcDataSourceV2.scala |  42 +++++
 .../execution/datasources/FilePartition.scala |  97 ++++++++++
 .../execution/datasources/FileScanRDD.scala   |  24 +--
 .../datasources/HadoopFsRelation.scala        |  22 +--
 .../datasources/PartitioningUtils.scala       |  36 +++-
 .../datasources/orc/OrcFilters.scala          |  20 ++-
 .../datasources/v2/EmptyPartitionReader.scala |  34 ++++
 .../datasources/v2/FileDataSourceV2.scala     |  54 ++++++
 .../datasources/v2/FilePartitionReader.scala  |  77 ++++++++
 .../v2/FilePartitionReaderFactory.scala       |  61 +++++++
 .../execution/datasources/v2/FileScan.scala   |  61 +++++++
 .../datasources/v2/FileScanBuilder.scala      |  31 ++++
 .../execution/datasources/v2/FileTable.scala  |  51 ++++++
 .../v2/PartitionRecordReader.scala            |  41 +++++
 .../datasources/v2/orc/OrcDataSourceV2.scala  |  46 +++++
 .../v2/orc/OrcPartitionReaderFactory.scala    | 169 ++++++++++++++++++
 .../datasources/v2/orc/OrcScan.scala          |  43 +++++
 .../datasources/v2/orc/OrcScanBuilder.scala   |  62 +++++++
 .../datasources/v2/orc/OrcTable.scala         |  39 ++++
 .../internal/BaseSessionStateBuilder.scala    |   1 +
 .../org/apache/spark/sql/QueryTest.scala      |   5 +-
 .../datasources/orc/OrcFilterSuite.scala      |  85 +++++----
 .../orc/OrcPartitionDiscoverySuite.scala      |   7 +
 .../datasources/orc/OrcQuerySuite.scala       |   9 +-
 .../datasources/orc/OrcV1FilterSuite.scala    | 104 +++++++++++
 .../v2/FileDataSourceV2FallBackSuite.scala    |  97 ++++++++++
 .../sql/hive/HiveSessionStateBuilder.scala    |   1 +
 .../sql/hive/MetastoreDataSourcesSuite.scala  |   2 +-
 36 files changed, 1436 insertions(+), 230 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index c1b885a72ad3..ebc8c3705ea2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1419,8 +1419,15 @@ object SQLConf {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefault(100)
 
-  val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
+  val USE_V1_SOURCE_READER_LIST = buildConf("spark.sql.sources.read.useV1SourceList")
     .internal()
+    .doc("A comma-separated list of data source short names or fully qualified data source" +
+      " register class names for which data source V2 read paths are disabled. Reads from these" +
+      " sources will fall back to the V1 sources.")
+    .stringConf
+    .createWithDefault("")
+
+  val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
       " StreamWriteSupport is disabled. Writes to these sources will fall back to the V1 Sinks.")
     .stringConf
@@ -2002,6 +2009,8 @@ class SQLConf extends Serializable with Logging {
   def continuousStreamingExecutorPollIntervalMs: Long =
     getConf(CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS)
 
+  def userV1SourceReaderList: String = getConf(USE_V1_SOURCE_READER_LIST)
+
   def disabledV2StreamingWriters: String = getConf(DISABLED_V2_STREAMING_WRITERS)
 
   def disabledV2StreamingMicroBatchReaders: String =
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java
index 1c5e3a0cd31e..00af0bf1b172 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java
@@ -163,6 +163,11 @@ public double getDouble(String key, double defaultValue) {
    */
   public static final String DATABASE_KEY = "database";
 
+  /**
+   * The option key for whether to check existence of files for a table.
+   */
+  public static final String CHECK_FILES_EXIST_KEY = "check_files_exist";
+
   /**
    * Returns all the paths specified by both the singular path option and the multiple
    * paths option.
@@ -197,4 +202,9 @@ public Optional<String> tableName() {
   public Optional<String> databaseName() {
     return get(DATABASE_KEY);
   }
+
+  public Boolean checkFilesExist() {
+    Optional<String> result = get(CHECK_FILES_EXIST_KEY);
+    return result.isPresent() && result.get().equals("true");
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index af369a5bca46..2b1521730bc0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -37,8 +37,7 @@ import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv._
 import org.apache.spark.sql.execution.datasources.jdbc._
 import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, FileDataSourceV2, FileTable}
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.unsafe.types.UTF8String
@@ -193,7 +192,16 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         "read files of Hive data source directly.")
     }
 
-    val cls = DataSource.lookupDataSource(source, sparkSession.sessionState.conf)
+    val useV1Sources =
+      sparkSession.sessionState.conf.userV1SourceReaderList.toLowerCase(Locale.ROOT).split(",")
+    val lookupCls = DataSource.lookupDataSource(source, sparkSession.sessionState.conf)
+    val cls = lookupCls.newInstance() match {
+      case f: FileDataSourceV2 if useV1Sources.contains(f.shortName()) ||
+        useV1Sources.contains(lookupCls.getCanonicalName.toLowerCase(Locale.ROOT)) =>
+        f.fallBackFileFormat
+      case _ => lookupCls
+    }
+
     if (classOf[TableProvider].isAssignableFrom(cls)) {
       val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
@@ -202,7 +210,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         val objectMapper = new ObjectMapper()
         DataSourceOptions.PATHS_KEY -> objectMapper.writeValueAsString(paths.toArray)
       }
-      val finalOptions = sessionOptions ++ extraOptions.toMap + pathsOption
+      val checkFilesExistsOption = DataSourceOptions.CHECK_FILES_EXIST_KEY -> "true"
+      val finalOptions = sessionOptions ++ extraOptions.toMap + pathsOption + checkFilesExistsOption
       val dsOptions = new DataSourceOptions(finalOptions.asJava)
       val table = userSpecifiedSchema match {
         case Some(schema) => provider.getTable(dsOptions, schema)
@@ -211,6 +220,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
       table match {
         case _: SupportsBatchRead =>
           Dataset.ofRows(sparkSession, DataSourceV2Relation.create(table, finalOptions))
+
         case _ => loadV1Source(paths: _*)
       }
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 228dcb94b9ac..d9404cd92992 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -248,7 +248,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         provider, session.sessionState.conf)
-      val options = sessionOptions ++ extraOptions
+      val checkFilesExistsOption = DataSourceOptions.CHECK_FILES_EXIST_KEY -> "false"
+      val options = sessionOptions ++ extraOptions + checkFilesExistsOption
       val dsOptions = new DataSourceOptions(options.asJava)
       provider.getTable(dsOptions) match {
         case table: SupportsBatchWrite =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 8b84eda36103..f6f3fb18c97f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -373,8 +373,7 @@ case class FileSourceScanExec(
     val filesGroupedToBuckets =
       selectedPartitions.flatMap { p =>
         p.files.filter(_.getLen > 0).map { f =>
-          val hosts = getBlockHosts(getBlockLocations(f), 0, f.getLen)
-          PartitionedFile(p.values, f.getPath.toUri.toString, 0, f.getLen, hosts)
+          PartitionedFileUtil.getPartitionedFile(f, f.getPath, p.values)
         }
       }.groupBy { f =>
         BucketingUtils
@@ -410,107 +409,35 @@ case class FileSourceScanExec(
       readFile: (PartitionedFile) => Iterator[InternalRow],
       selectedPartitions: Seq[PartitionDirectory],
       fsRelation: HadoopFsRelation): RDD[InternalRow] = {
-    val defaultMaxSplitBytes =
-      fsRelation.sparkSession.sessionState.conf.filesMaxPartitionBytes
     val openCostInBytes = fsRelation.sparkSession.sessionState.conf.filesOpenCostInBytes
-    val defaultParallelism = fsRelation.sparkSession.sparkContext.defaultParallelism
-    val totalBytes = selectedPartitions.flatMap(_.files.map(_.getLen + openCostInBytes)).sum
-    val bytesPerCore = totalBytes / defaultParallelism
-
-    val maxSplitBytes = Math.min(defaultMaxSplitBytes, Math.max(openCostInBytes, bytesPerCore))
+    val maxSplitBytes =
+      FilePartition.maxSplitBytes(fsRelation.sparkSession, selectedPartitions)
     logInfo(s"Planning scan with bin packing, max size: $maxSplitBytes bytes, " +
       s"open cost is considered as scanning $openCostInBytes bytes.")
 
     val splitFiles = selectedPartitions.flatMap { partition =>
       partition.files.filter(_.getLen > 0).flatMap { file =>
-        val blockLocations = getBlockLocations(file)
-        if (fsRelation.fileFormat.isSplitable(
-            fsRelation.sparkSession, fsRelation.options, file.getPath)) {
-          (0L until file.getLen by maxSplitBytes).map { offset =>
-            val remaining = file.getLen - offset
-            val size = if (remaining > maxSplitBytes) maxSplitBytes else remaining
-            val hosts = getBlockHosts(blockLocations, offset, size)
-            PartitionedFile(
-              partition.values, file.getPath.toUri.toString, offset, size, hosts)
-          }
-        } else {
-          val hosts = getBlockHosts(blockLocations, 0, file.getLen)
-          Seq(PartitionedFile(
-            partition.values, file.getPath.toUri.toString, 0, file.getLen, hosts))
-        }
+        // getPath() is very expensive so we only want to call it once in this block:
+        val filePath = file.getPath
+        val isSplitable = relation.fileFormat.isSplitable(
+          relation.sparkSession, relation.options, filePath)
+        PartitionedFileUtil.splitFiles(
+          sparkSession = relation.sparkSession,
+          file = file,
+          filePath = filePath,
+          isSplitable = isSplitable,
+          maxSplitBytes = maxSplitBytes,
+          partitionValues = partition.values
+        )
       }
     }.toArray.sortBy(_.length)(implicitly[Ordering[Long]].reverse)
 
-    val partitions = new ArrayBuffer[FilePartition]
-    val currentFiles = new ArrayBuffer[PartitionedFile]
-    var currentSize = 0L
-
-    /** Close the current partition and move to the next. */
-    def closePartition(): Unit = {
-      if (currentFiles.nonEmpty) {
-        val newPartition =
-          FilePartition(
-            partitions.size,
-            currentFiles.toArray.toSeq) // Copy to a new Array.
-        partitions += newPartition
-      }
-      currentFiles.clear()
-      currentSize = 0
-    }
-
-    // Assign files to partitions using "Next Fit Decreasing"
-    splitFiles.foreach { file =>
-      if (currentSize + file.length > maxSplitBytes) {
-        closePartition()
-      }
-      // Add the given file to the current partition.
-      currentSize += file.length + openCostInBytes
-      currentFiles += file
-    }
-    closePartition()
+    val partitions =
+      FilePartition.getFilePartitions(relation.sparkSession, splitFiles, maxSplitBytes)
 
     new FileScanRDD(fsRelation.sparkSession, readFile, partitions)
   }
 
-  private def getBlockLocations(file: FileStatus): Array[BlockLocation] = file match {
-    case f: LocatedFileStatus => f.getBlockLocations
-    case f => Array.empty[BlockLocation]
-  }
-
-  // Given locations of all blocks of a single file, `blockLocations`, and an `(offset, length)`
-  // pair that represents a segment of the same file, find out the block that contains the largest
-  // fraction the segment, and returns location hosts of that block. If no such block can be found,
-  // returns an empty array.
-  private def getBlockHosts(
-      blockLocations: Array[BlockLocation], offset: Long, length: Long): Array[String] = {
-    val candidates = blockLocations.map {
-      // The fragment starts from a position within this block
-      case b if b.getOffset <= offset && offset < b.getOffset + b.getLength =>
-        b.getHosts -> (b.getOffset + b.getLength - offset).min(length)
-
-      // The fragment ends at a position within this block
-      case b if offset <= b.getOffset && offset + length < b.getLength =>
-        b.getHosts -> (offset + length - b.getOffset).min(length)
-
-      // The fragment fully contains this block
-      case b if offset <= b.getOffset && b.getOffset + b.getLength <= offset + length =>
-        b.getHosts -> b.getLength
-
-      // The fragment doesn't intersect with this block
-      case b =>
-        b.getHosts -> 0L
-    }.filter { case (hosts, size) =>
-      size > 0L
-    }
-
-    if (candidates.isEmpty) {
-      Array.empty[String]
-    } else {
-      val (hosts, _) = candidates.maxBy { case (_, size) => size }
-      hosts
-    }
-  }
-
   override def doCanonicalize(): FileSourceScanExec = {
     FileSourceScanExec(
       relation,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
new file mode 100644
index 000000000000..3196624f7c7c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.hadoop.fs.{BlockLocation, FileStatus, LocatedFileStatus, Path}
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources._
+
+object PartitionedFileUtil {
+  def splitFiles(
+      sparkSession: SparkSession,
+      file: FileStatus,
+      filePath: Path,
+      isSplitable: Boolean,
+      maxSplitBytes: Long,
+      partitionValues: InternalRow): Seq[PartitionedFile] = {
+    if (isSplitable) {
+      (0L until file.getLen by maxSplitBytes).map { offset =>
+        val remaining = file.getLen - offset
+        val size = if (remaining > maxSplitBytes) maxSplitBytes else remaining
+        val hosts = getBlockHosts(getBlockLocations(file), offset, size)
+        PartitionedFile(partitionValues, filePath.toUri.toString, offset, size, hosts)
+      }
+    } else {
+      Seq(getPartitionedFile(file, filePath, partitionValues))
+    }
+  }
+
+  def getPartitionedFile(
+      file: FileStatus,
+      filePath: Path,
+      partitionValues: InternalRow): PartitionedFile = {
+    val hosts = getBlockHosts(getBlockLocations(file), 0, file.getLen)
+    PartitionedFile(partitionValues, filePath.toUri.toString, 0, file.getLen, hosts)
+  }
+
+  private def getBlockLocations(file: FileStatus): Array[BlockLocation] = file match {
+    case f: LocatedFileStatus => f.getBlockLocations
+    case f => Array.empty[BlockLocation]
+  }
+  // Given locations of all blocks of a single file, `blockLocations`, and an `(offset, length)`
+  // pair that represents a segment of the same file, find out the block that contains the largest
+  // fraction the segment, and returns location hosts of that block. If no such block can be found,
+  // returns an empty array.
+  private def getBlockHosts(
+      blockLocations: Array[BlockLocation],
+      offset: Long,
+      length: Long): Array[String] = {
+    val candidates = blockLocations.map {
+      // The fragment starts from a position within this block
+      case b if b.getOffset <= offset && offset < b.getOffset + b.getLength =>
+        b.getHosts -> (b.getOffset + b.getLength - offset).min(length)
+
+      // The fragment ends at a position within this block
+      case b if offset <= b.getOffset && offset + length < b.getLength =>
+        b.getHosts -> (offset + length - b.getOffset).min(length)
+
+      // The fragment fully contains this block
+      case b if offset <= b.getOffset && b.getOffset + b.getLength <= offset + length =>
+        b.getHosts -> b.getLength
+
+      // The fragment doesn't intersect with this block
+      case b =>
+        b.getHosts -> 0L
+    }.filter { case (hosts, size) =>
+      size > 0L
+    }
+
+    if (candidates.isEmpty) {
+      Array.empty[String]
+    } else {
+      val (hosts, _) = candidates.maxBy { case (_, size) => size }
+      hosts
+    }
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index e2cd40906f40..d24e66e58385 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
@@ -214,7 +215,7 @@ case class AlterTableAddColumnsCommand(
   /**
    * ALTER TABLE ADD COLUMNS command does not support temporary view/table,
    * view, or datasource table with text, orc formats or external provider.
-   * For datasource table, it currently only supports parquet, json, csv.
+   * For datasource table, it currently only supports parquet, json, csv, orc.
    */
   private def verifyAlterTableAddColumn(
       conf: SQLConf,
@@ -237,7 +238,7 @@ case class AlterTableAddColumnsCommand(
         // TextFileFormat only default to one column "value"
         // Hive type is already considered as hive serde table, so the logic will not
         // come in here.
-        case _: JsonFileFormat | _: CSVFileFormat | _: ParquetFileFormat =>
+        case _: JsonFileFormat | _: CSVFileFormat | _: ParquetFileFormat | _: OrcDataSourceV2 =>
         case s if s.getClass.getCanonicalName.endsWith("OrcFileFormat") =>
         case s =>
           throw new AnalysisException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 5dad784e45af..d48261e783dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 import scala.language.{existentials, implicitConversions}
 import scala.util.{Failure, Success, Try}
 
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -40,6 +41,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{RateStreamProvider, TextSocketSourceProvider}
 import org.apache.spark.sql.internal.SQLConf
@@ -90,8 +93,19 @@ case class DataSource(
 
   case class SourceInfo(name: String, schema: StructType, partitionColumns: Seq[String])
 
-  lazy val providingClass: Class[_] =
-    DataSource.lookupDataSource(className, sparkSession.sessionState.conf)
+  lazy val providingClass: Class[_] = {
+    val cls = DataSource.lookupDataSource(className, sparkSession.sessionState.conf)
+    // `providingClass` is used for resolving data source relation for catalog tables.
+    // As now catalog for data source V2 is under development, here we fall back all the
+    // [[FileDataSourceV2]] to [[FileFormat]] to guarantee the current catalog works.
+    // [[FileDataSourceV2]] will still be used if we call the load()/save() method in
+    // [[DataFrameReader]]/[[DataFrameWriter]], since they use method `lookupDataSource`
+    // instead of `providingClass`.
+    cls.newInstance() match {
+      case f: FileDataSourceV2 => f.fallBackFileFormat
+      case _ => cls
+    }
+  }
   lazy val sourceInfo: SourceInfo = sourceSchema()
   private val caseInsensitiveOptions = CaseInsensitiveMap(options)
   private val equality = sparkSession.sessionState.conf.resolver
@@ -543,40 +557,9 @@ case class DataSource(
       checkFilesExist: Boolean): Seq[Path] = {
     val allPaths = caseInsensitiveOptions.get("path") ++ paths
     val hadoopConf = sparkSession.sessionState.newHadoopConf()
-    val allGlobPath = allPaths.flatMap { path =>
-      val hdfsPath = new Path(path)
-      val fs = hdfsPath.getFileSystem(hadoopConf)
-      val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
-      val globPath = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
-
-      if (checkEmptyGlobPath && globPath.isEmpty) {
-        throw new AnalysisException(s"Path does not exist: $qualified")
-      }
-
-      // Sufficient to check head of the globPath seq for non-glob scenario
-      // Don't need to check once again if files exist in streaming mode
-      if (checkFilesExist && !fs.exists(globPath.head)) {
-        throw new AnalysisException(s"Path does not exist: ${globPath.head}")
-      }
-      globPath
-    }.toSeq
 
-    if (checkFilesExist) {
-      val (filteredOut, filteredIn) = allGlobPath.partition { path =>
-        InMemoryFileIndex.shouldFilterOut(path.getName)
-      }
-      if (filteredOut.nonEmpty) {
-        if (filteredIn.isEmpty) {
-          logWarning(
-            s"All paths were ignored:\n  ${filteredOut.mkString("\n  ")}")
-        } else {
-          logDebug(
-            s"Some paths were ignored:\n  ${filteredOut.mkString("\n  ")}")
-        }
-      }
-    }
-
-    allGlobPath
+    DataSource.checkAndGlobPathIfNecessary(allPaths.toSeq, hadoopConf,
+      checkEmptyGlobPath, checkFilesExist)
   }
 }
 
@@ -632,7 +615,7 @@ object DataSource extends Logging {
     val provider1 = backwardCompatibilityMap.getOrElse(provider, provider) match {
       case name if name.equalsIgnoreCase("orc") &&
           conf.getConf(SQLConf.ORC_IMPLEMENTATION) == "native" =>
-        classOf[OrcFileFormat].getCanonicalName
+        classOf[OrcDataSourceV2].getCanonicalName
       case name if name.equalsIgnoreCase("orc") &&
           conf.getConf(SQLConf.ORC_IMPLEMENTATION) == "hive" =>
         "org.apache.spark.sql.hive.orc.OrcFileFormat"
@@ -721,6 +704,48 @@ object DataSource extends Logging {
     }
   }
 
+  /**
+   * Checks and returns files in all the paths.
+   */
+  private[sql] def checkAndGlobPathIfNecessary(
+      paths: Seq[String],
+      hadoopConf: Configuration,
+      checkEmptyGlobPath: Boolean,
+      checkFilesExist: Boolean): Seq[Path] = {
+    val allGlobPath = paths.flatMap { path =>
+      val hdfsPath = new Path(path)
+      val fs = hdfsPath.getFileSystem(hadoopConf)
+      val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+      val globPath = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
+
+      if (checkEmptyGlobPath && globPath.isEmpty) {
+        throw new AnalysisException(s"Path does not exist: $qualified")
+      }
+
+      // Sufficient to check head of the globPath seq for non-glob scenario
+      // Don't need to check once again if files exist in streaming mode
+      if (checkFilesExist && !fs.exists(globPath.head)) {
+        throw new AnalysisException(s"Path does not exist: ${globPath.head}")
+      }
+      globPath
+    }
+
+    if (checkFilesExist) {
+      val (filteredOut, filteredIn) = allGlobPath.partition { path =>
+        InMemoryFileIndex.shouldFilterOut(path.getName)
+      }
+      if (filteredIn.isEmpty) {
+        logWarning(
+          s"All paths were ignored:\n  ${filteredOut.mkString("\n  ")}")
+      } else {
+        logDebug(
+          s"Some paths were ignored:\n  ${filteredOut.mkString("\n  ")}")
+      }
+    }
+
+    allGlobPath
+  }
+
   /**
    * When creating a data source table, the `path` option has a special meaning: the table location.
    * This method extracts the `path` option and treat it as table location to build a
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
new file mode 100644
index 000000000000..cefdfd14b3b3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2}
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
+
+/**
+ * Replace the ORC V2 data source of table in [[InsertIntoTable]] to V1 [[FileFormat]].
+ * E.g, with temporary view `t` using [[FileDataSourceV2]], inserting into  view `t` fails
+ * since there is no corresponding physical plan.
+ * SPARK-23817: This is a temporary hack for making current data source V2 work. It should be
+ * removed when write path of file data source v2 is finished.
+ */
+class FallbackOrcDataSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    case i @ InsertIntoTable(d @DataSourceV2Relation(table: OrcTable, _, _), _, _, _, _) =>
+      val v1FileFormat = new OrcFileFormat
+      val relation = HadoopFsRelation(table.getFileIndex, table.getFileIndex.partitionSchema,
+        table.schema(), None, v1FileFormat, d.options)(sparkSession)
+      i.copy(table = LogicalRelation(relation))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
new file mode 100644
index 000000000000..4b1ade8e2957
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.Partition
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.sources.v2.reader.InputPartition
+
+/**
+ * A collection of file blocks that should be read as a single task
+ * (possibly from multiple partitioned directories).
+ */
+case class FilePartition(index: Int, files: Seq[PartitionedFile])
+  extends Partition with InputPartition {
+  override def preferredLocations(): Array[String] = {
+    // Computes total number of bytes can be retrieved from each host.
+    val hostToNumBytes = mutable.HashMap.empty[String, Long]
+    files.foreach { file =>
+      file.locations.filter(_ != "localhost").foreach { host =>
+        hostToNumBytes(host) = hostToNumBytes.getOrElse(host, 0L) + file.length
+      }
+    }
+
+    // Takes the first 3 hosts with the most data to be retrieved
+    hostToNumBytes.toSeq.sortBy {
+      case (host, numBytes) => numBytes
+    }.reverse.take(3).map {
+      case (host, numBytes) => host
+    }.toArray
+  }
+}
+
+object FilePartition extends Logging {
+
+  def getFilePartitions(
+      sparkSession: SparkSession,
+      partitionedFiles: Seq[PartitionedFile],
+      maxSplitBytes: Long): Seq[FilePartition] = {
+    val partitions = new ArrayBuffer[FilePartition]
+    val currentFiles = new ArrayBuffer[PartitionedFile]
+    var currentSize = 0L
+
+    /** Close the current partition and move to the next. */
+    def closePartition(): Unit = {
+      if (currentFiles.nonEmpty) {
+        // Copy to a new Array.
+        val newPartition = FilePartition(partitions.size, currentFiles.toArray.toSeq)
+        partitions += newPartition
+      }
+      currentFiles.clear()
+      currentSize = 0
+    }
+
+    val openCostInBytes = sparkSession.sessionState.conf.filesOpenCostInBytes
+    // Assign files to partitions using "Next Fit Decreasing"
+    partitionedFiles.foreach { file =>
+      if (currentSize + file.length > maxSplitBytes) {
+        closePartition()
+      }
+      // Add the given file to the current partition.
+      currentSize += file.length + openCostInBytes
+      currentFiles += file
+    }
+    closePartition()
+    partitions
+  }
+
+  def maxSplitBytes(
+      sparkSession: SparkSession,
+      selectedPartitions: Seq[PartitionDirectory]): Long = {
+    val defaultMaxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes
+    val openCostInBytes = sparkSession.sessionState.conf.filesOpenCostInBytes
+    val defaultParallelism = sparkSession.sparkContext.defaultParallelism
+    val totalBytes = selectedPartitions.flatMap(_.files.map(_.getLen + openCostInBytes)).sum
+    val bytesPerCore = totalBytes / defaultParallelism
+
+    Math.min(defaultMaxSplitBytes, Math.max(openCostInBytes, bytesPerCore))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index ffea33c08ef9..d92ea2e06895 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -29,6 +29,7 @@ import org.apache.spark.rdd.{InputFileBlockHolder, RDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.QueryExecutionException
+import org.apache.spark.sql.sources.v2.reader.InputPartition
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.NextIterator
 
@@ -53,12 +54,6 @@ case class PartitionedFile(
   }
 }
 
-/**
- * A collection of file blocks that should be read as a single task
- * (possibly from multiple partitioned directories).
- */
-case class FilePartition(index: Int, files: Seq[PartitionedFile]) extends RDDPartition
-
 /**
  * An RDD that scans a list of file partitions.
  */
@@ -216,21 +211,6 @@ class FileScanRDD(
   override protected def getPartitions: Array[RDDPartition] = filePartitions.toArray
 
   override protected def getPreferredLocations(split: RDDPartition): Seq[String] = {
-    val files = split.asInstanceOf[FilePartition].files
-
-    // Computes total number of bytes can be retrieved from each host.
-    val hostToNumBytes = mutable.HashMap.empty[String, Long]
-    files.foreach { file =>
-      file.locations.filter(_ != "localhost").foreach { host =>
-        hostToNumBytes(host) = hostToNumBytes.getOrElse(host, 0L) + file.length
-      }
-    }
-
-    // Takes the first 3 hosts with the most data to be retrieved
-    hostToNumBytes.toSeq.sortBy {
-      case (host, numBytes) => numBytes
-    }.reverse.take(3).map {
-      case (host, numBytes) => host
-    }
+    split.asInstanceOf[FilePartition].preferredLocations()
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
index b2f73b7f8d1f..d278802e6c9f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
@@ -52,28 +52,12 @@ case class HadoopFsRelation(
 
   override def sqlContext: SQLContext = sparkSession.sqlContext
 
-  private def getColName(f: StructField): String = {
-    if (sparkSession.sessionState.conf.caseSensitiveAnalysis) {
-      f.name
-    } else {
-      f.name.toLowerCase(Locale.ROOT)
-    }
-  }
-
-  val overlappedPartCols = mutable.Map.empty[String, StructField]
-  partitionSchema.foreach { partitionField =>
-    if (dataSchema.exists(getColName(_) == getColName(partitionField))) {
-      overlappedPartCols += getColName(partitionField) -> partitionField
-    }
-  }
-
   // When data and partition schemas have overlapping columns, the output
   // schema respects the order of the data schema for the overlapping columns, and it
   // respects the data types of the partition schema.
-  val schema: StructType = {
-    StructType(dataSchema.map(f => overlappedPartCols.getOrElse(getColName(f), f)) ++
-      partitionSchema.filterNot(f => overlappedPartCols.contains(getColName(f))))
-  }
+  val (schema: StructType, overlappedPartCols: Map[String, StructField]) =
+    PartitioningUtils.mergeDataAndPartitionSchema(dataSchema,
+      partitionSchema, sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
   def partitionSchemaOption: Option[StructType] =
     if (partitionSchema.isEmpty) None else Some(partitionSchema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index ee770426e61f..a2e08180cc50 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -21,6 +21,7 @@ import java.lang.{Double => JDouble, Long => JLong}
 import java.math.{BigDecimal => JBigDecimal}
 import java.util.{Locale, TimeZone}
 
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Try
 
@@ -30,7 +31,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, Literal}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
@@ -65,9 +66,7 @@ object PartitioningUtils {
     require(columnNames.size == literals.size)
   }
 
-  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.DEFAULT_PARTITION_NAME
-  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
-  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.unescapePathName
+  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.{escapePathName, unescapePathName, DEFAULT_PARTITION_NAME}
 
   /**
    * Given a group of qualified paths, tries to parse them and returns a partition specification.
@@ -558,6 +557,35 @@ object PartitioningUtils {
     }).asNullable
   }
 
+  def mergeDataAndPartitionSchema(
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      caseSensitive: Boolean): (StructType, Map[String, StructField]) = {
+    val overlappedPartCols = mutable.Map.empty[String, StructField]
+    partitionSchema.foreach { partitionField =>
+      val partitionFieldName = getColName(partitionField, caseSensitive)
+      if (dataSchema.exists(getColName(_, caseSensitive) == partitionFieldName)) {
+        overlappedPartCols += partitionFieldName -> partitionField
+      }
+    }
+
+    // When data and partition schemas have overlapping columns, the output
+    // schema respects the order of the data schema for the overlapping columns, and it
+    // respects the data types of the partition schema.
+    val fullSchema =
+    StructType(dataSchema.map(f => overlappedPartCols.getOrElse(getColName(f, caseSensitive), f)) ++
+      partitionSchema.filterNot(f => overlappedPartCols.contains(getColName(f, caseSensitive))))
+    (fullSchema, overlappedPartCols.toMap)
+  }
+
+  def getColName(f: StructField, caseSensitive: Boolean): String = {
+    if (caseSensitive) {
+      f.name
+    } else {
+      f.name.toLowerCase(Locale.ROOT)
+    }
+  }
+
   private def columnNameEquality(caseSensitive: Boolean): (String, String) => Boolean = {
     if (caseSensitive) {
       org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 0a64981b421c..cd2a68a53bab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -82,22 +82,24 @@ private[sql] object OrcFilters {
    */
   def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = {
     val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
-
-    // First, tries to convert each filter individually to see whether it's convertible, and then
-    // collect all convertible ones to build the final `SearchArgument`.
-    val convertibleFilters = for {
-      filter <- filters
-      _ <- buildSearchArgument(dataTypeMap, filter, newBuilder)
-    } yield filter
-
     for {
       // Combines all convertible filters using `And` to produce a single conjunction
-      conjunction <- buildTree(convertibleFilters)
+      conjunction <- buildTree(convertibleFilters(schema, dataTypeMap, filters))
       // Then tries to build a single ORC `SearchArgument` for the conjunction predicate
       builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
     } yield builder.build()
   }
 
+  def convertibleFilters(
+      schema: StructType,
+      dataTypeMap: Map[String, DataType],
+      filters: Seq[Filter]): Seq[Filter] = {
+    for {
+      filter <- filters
+      _ <- buildSearchArgument(dataTypeMap, filter, newBuilder())
+    } yield filter
+  }
+
   /**
    * Return true if this is a searchable type in ORC.
    * Both CharType and VarcharType are cleaned at AstBuilder.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
new file mode 100644
index 000000000000..b177d15e1fe3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.io.IOException
+
+import org.apache.spark.sql.sources.v2.reader.PartitionReader
+
+/**
+ * A [[PartitionReader]] with empty output.
+ */
+class EmptyPartitionReader[T] extends PartitionReader[T] {
+  override def next(): Boolean = false
+
+  override def get(): T =
+    throw new IOException("No records should be returned from EmptyDataReader")
+
+  override def close(): Unit = {}
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
new file mode 100644
index 000000000000..a0c932cbb0e0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.sources.DataSourceRegister
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, SupportsBatchRead, TableProvider}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * A base interface for data source v2 implementations of the built-in file-based data sources.
+ */
+trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
+  /**
+   * Returns a V1 [[FileFormat]] class of the same file data source.
+   * This is a solution for the following cases:
+   * 1. File datasource V2 implementations cause regression. Users can disable the problematic data
+   *    source via SQL configuration and fall back to FileFormat.
+   * 2. Catalog support is required, which is still under development for data source V2.
+   */
+  def fallBackFileFormat: Class[_ <: FileFormat]
+
+  lazy val sparkSession = SparkSession.active
+
+  def getFileIndex(
+      options: DataSourceOptions,
+      userSpecifiedSchema: Option[StructType]): PartitioningAwareFileIndex = {
+    val filePaths = options.paths()
+    val hadoopConf =
+      sparkSession.sessionState.newHadoopConfWithOptions(options.asMap().asScala.toMap)
+    val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(filePaths, hadoopConf,
+      checkEmptyGlobPath = true, checkFilesExist = options.checkFilesExist())
+    val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
+    new InMemoryFileIndex(sparkSession, rootPathsSpecified,
+      options.asMap().asScala.toMap, userSpecifiedSchema, fileStatusCache)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
new file mode 100644
index 000000000000..d76d69dba31d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.io.{FileNotFoundException, IOException}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.reader.PartitionReader
+
+class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
+  extends PartitionReader[T] with Logging {
+  private var currentReader: PartitionedFileReader[T] = null
+
+  private val sqlConf = SQLConf.get
+  private def ignoreMissingFiles = sqlConf.ignoreMissingFiles
+  private def ignoreCorruptFiles = sqlConf.ignoreCorruptFiles
+
+  override def next(): Boolean = {
+    if (currentReader == null) {
+      if (readers.hasNext) {
+        if (ignoreMissingFiles || ignoreCorruptFiles) {
+          try {
+            currentReader = readers.next()
+            logInfo(s"Reading file $currentReader")
+          } catch {
+            case e: FileNotFoundException if ignoreMissingFiles =>
+              logWarning(s"Skipped missing file: $currentReader", e)
+              currentReader = null
+              return false
+            // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
+            case e: FileNotFoundException if !ignoreMissingFiles => throw e
+            case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
+              logWarning(
+                s"Skipped the rest of the content in the corrupted file: $currentReader", e)
+              currentReader = null
+              return false
+          }
+        } else {
+          currentReader = readers.next()
+          logInfo(s"Reading file $currentReader")
+        }
+      } else {
+        return false
+      }
+    }
+    if (currentReader.next()) {
+      true
+    } else {
+      close()
+      currentReader = null
+      next()
+    }
+  }
+
+  override def get(): T = currentReader.get()
+
+  override def close(): Unit = {
+    if (currentReader != null) {
+      currentReader.close()
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
new file mode 100644
index 000000000000..101a70ee92ce
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
+import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
+  override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
+    assert(partition.isInstanceOf[FilePartition])
+    val filePartition = partition.asInstanceOf[FilePartition]
+    val iter = filePartition.files.toIterator.map { file =>
+      new PartitionedFileReader(file, buildReader(file))
+    }
+    new FilePartitionReader[InternalRow](iter)
+  }
+
+  override def createColumnarReader(partition: InputPartition): PartitionReader[ColumnarBatch] = {
+    assert(partition.isInstanceOf[FilePartition])
+    val filePartition = partition.asInstanceOf[FilePartition]
+    val iter = filePartition.files.toIterator.map { file =>
+      new PartitionedFileReader(file, buildColumnarReader(file))
+    }
+    new FilePartitionReader[ColumnarBatch](iter)
+  }
+
+  def buildReader(partitionedFile: PartitionedFile): PartitionReader[InternalRow]
+
+  def buildColumnarReader(partitionedFile: PartitionedFile): PartitionReader[ColumnarBatch] = {
+    throw new UnsupportedOperationException("Cannot create columnar reader.")
+  }
+}
+
+// A compound class for combining file and its corresponding reader.
+private[v2] class PartitionedFileReader[T](
+    file: PartitionedFile,
+    reader: PartitionReader[T]) extends PartitionReader[T] {
+  override def next(): Boolean = reader.next()
+
+  override def get(): T = reader.get()
+
+  override def close(): Unit = reader.close()
+
+  override def toString: String = file.toString
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
new file mode 100644
index 000000000000..3615b15be6fd
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.PartitionedFileUtil
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
+import org.apache.spark.sql.types.StructType
+
+abstract class FileScan(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex) extends Scan with Batch {
+  /**
+   * Returns whether a file with `path` could be split or not.
+   */
+  def isSplitable(path: Path): Boolean = {
+    false
+  }
+
+  protected def partitions: Seq[FilePartition] = {
+    val selectedPartitions = fileIndex.listFiles(Seq.empty, Seq.empty)
+    val maxSplitBytes = FilePartition.maxSplitBytes(sparkSession, selectedPartitions)
+    val splitFiles = selectedPartitions.flatMap { partition =>
+      partition.files.flatMap { file =>
+        val filePath = file.getPath
+        PartitionedFileUtil.splitFiles(
+          sparkSession = sparkSession,
+          file = file,
+          filePath = filePath,
+          isSplitable = isSplitable(filePath),
+          maxSplitBytes = maxSplitBytes,
+          partitionValues = partition.values
+        )
+      }.toArray.sortBy(_.length)(implicitly[Ordering[Long]].reverse)
+    }
+    FilePartition.getFilePartitions(sparkSession, splitFiles, maxSplitBytes)
+  }
+
+  override def planInputPartitions(): Array[InputPartition] = {
+    partitions.toArray
+  }
+
+  override def toBatch: Batch = this
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
new file mode 100644
index 000000000000..5dd343ba44b6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.sources.v2.reader.{ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns}
+import org.apache.spark.sql.types.StructType
+
+abstract class FileScanBuilder(schema: StructType)
+  extends ScanBuilder
+  with SupportsPushDownRequiredColumns
+  with SupportsPushDownFilters {
+  protected var readSchema = schema
+
+  override def pruneColumns(requiredSchema: StructType): Unit = {
+    this.readSchema = requiredSchema
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
new file mode 100644
index 000000000000..b1786541a805
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.hadoop.fs.FileStatus
+
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.sources.v2.{SupportsBatchRead, Table}
+import org.apache.spark.sql.types.StructType
+
+abstract class FileTable(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    userSpecifiedSchema: Option[StructType]) extends Table with SupportsBatchRead {
+  def getFileIndex: PartitioningAwareFileIndex = this.fileIndex
+
+  lazy val dataSchema: StructType = userSpecifiedSchema.orElse {
+    inferSchema(fileIndex.allFiles())
+  }.getOrElse {
+    throw new AnalysisException(
+      s"Unable to infer schema for $name. It must be specified manually.")
+  }.asNullable
+
+  override def schema(): StructType = {
+    val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+    PartitioningUtils.mergeDataAndPartitionSchema(dataSchema,
+      fileIndex.partitionSchema, caseSensitive)._1
+  }
+
+  /**
+   * When possible, this method should return the schema of the given `files`.  When the format
+   * does not support inference, or no valid files are given should return None.  In these cases
+   * Spark will require that user specify the schema manually.
+   */
+  def inferSchema(files: Seq[FileStatus]): Option[StructType]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
new file mode 100644
index 000000000000..ff78ef3220c1
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.hadoop.mapreduce.RecordReader
+
+import org.apache.spark.sql.sources.v2.reader.PartitionReader
+
+class PartitionRecordReader[T](
+    private[this] var rowReader: RecordReader[_, T]) extends PartitionReader[T] {
+  override def next(): Boolean = rowReader.nextKeyValue()
+
+  override def get(): T = rowReader.getCurrentValue
+
+  override def close(): Unit = rowReader.close()
+}
+
+class PartitionRecordReaderWithProject[X, T](
+    private[this] var rowReader: RecordReader[_, X],
+    project: X => T) extends PartitionReader[T] {
+  override def next(): Boolean = rowReader.nextKeyValue()
+
+  override def get(): T = project(rowReader.getCurrentValue)
+
+  override def close(): Unit = rowReader.close()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
new file mode 100644
index 000000000000..db1f2f793422
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.orc
+
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, Table}
+import org.apache.spark.sql.types.StructType
+
+class OrcDataSourceV2 extends FileDataSourceV2 {
+
+  override def fallBackFileFormat: Class[_ <: FileFormat] = classOf[OrcFileFormat]
+
+  override def shortName(): String = "orc"
+
+  private def getTableName(options: DataSourceOptions): String = {
+    shortName() + ":" + options.paths().mkString(";")
+  }
+
+  override def getTable(options: DataSourceOptions): Table = {
+    val tableName = getTableName(options)
+    val fileIndex = getFileIndex(options, None)
+    OrcTable(tableName, sparkSession, fileIndex, None)
+  }
+
+  override def getTable(options: DataSourceOptions, schema: StructType): Table = {
+    val tableName = getTableName(options)
+    val fileIndex = getFileIndex(options, Some(schema))
+    OrcTable(tableName, sparkSession, fileIndex, Some(schema))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
new file mode 100644
index 000000000000..f6fc0ca83908
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.orc
+
+import java.net.URI
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
+import org.apache.hadoop.mapreduce.lib.input.FileSplit
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.apache.orc.{OrcConf, OrcFile}
+import org.apache.orc.mapred.OrcStruct
+import org.apache.orc.mapreduce.OrcInputFormat
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.JoinedRow
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.execution.datasources.{PartitionedFile, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.orc.{OrcColumnarBatchReader, OrcDeserializer, OrcUtils}
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader}
+import org.apache.spark.sql.types.{AtomicType, StructType}
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.SerializableConfiguration
+
+/**
+ * A factory used to create Orc readers.
+ *
+ * @param sqlConf SQL configuration.
+ * @param broadcastedConf Broadcast serializable Hadoop Configuration.
+ * @param dataSchema Schema of orc files.
+ * @param partitionSchema Schema of partitions.
+ * @param readSchema Required schema in the batch scan.
+ */
+case class OrcPartitionReaderFactory(
+    sqlConf: SQLConf,
+    broadcastedConf: Broadcast[SerializableConfiguration],
+    dataSchema: StructType,
+    partitionSchema: StructType,
+    readSchema: StructType) extends FilePartitionReaderFactory {
+  private val isCaseSensitive = sqlConf.caseSensitiveAnalysis
+  private val capacity = sqlConf.orcVectorizedReaderBatchSize
+
+  override def supportColumnarReads(partition: InputPartition): Boolean = {
+    sqlConf.orcVectorizedReaderEnabled && sqlConf.wholeStageEnabled &&
+      readSchema.length <= sqlConf.wholeStageMaxNumFields &&
+      readSchema.forall(_.dataType.isInstanceOf[AtomicType])
+  }
+
+  override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
+    val conf = broadcastedConf.value.value
+
+    val filePath = new Path(new URI(file.filePath))
+
+    val fs = filePath.getFileSystem(conf)
+    val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
+    val reader = OrcFile.createReader(filePath, readerOptions)
+
+    val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
+      isCaseSensitive, dataSchema, readSchema, reader, conf)
+
+    if (requestedColIdsOrEmptyFile.isEmpty) {
+      new EmptyPartitionReader[InternalRow]
+    } else {
+      val requestedColIds = requestedColIdsOrEmptyFile.get
+      assert(requestedColIds.length == readSchema.length,
+        "[BUG] requested column IDs do not match required schema")
+      val taskConf = new Configuration(conf)
+      taskConf.set(OrcConf.INCLUDE_COLUMNS.getAttribute,
+        requestedColIds.filter(_ != -1).sorted.mkString(","))
+
+      val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty)
+      val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+      val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
+
+      val requiredDataSchema = subtractSchema(readSchema, partitionSchema)
+      val orcRecordReader = new OrcInputFormat[OrcStruct]
+        .createRecordReader(fileSplit, taskAttemptContext)
+
+      val fullSchema = requiredDataSchema.toAttributes ++ partitionSchema.toAttributes
+      val unsafeProjection = GenerateUnsafeProjection.generate(fullSchema, fullSchema)
+      val deserializer = new OrcDeserializer(dataSchema, requiredDataSchema, requestedColIds)
+
+      val projection = if (partitionSchema.length == 0) {
+        (value: OrcStruct) => unsafeProjection(deserializer.deserialize(value))
+      } else {
+        val joinedRow = new JoinedRow()
+        (value: OrcStruct) =>
+          unsafeProjection(joinedRow(deserializer.deserialize(value), file.partitionValues))
+      }
+      new PartitionRecordReaderWithProject(orcRecordReader, projection)
+    }
+  }
+
+  override def buildColumnarReader(file: PartitionedFile): PartitionReader[ColumnarBatch] = {
+    val conf = broadcastedConf.value.value
+
+    val filePath = new Path(new URI(file.filePath))
+
+    val fs = filePath.getFileSystem(conf)
+    val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
+    val reader = OrcFile.createReader(filePath, readerOptions)
+
+    val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
+      isCaseSensitive, dataSchema, readSchema, reader, conf)
+
+    if (requestedColIdsOrEmptyFile.isEmpty) {
+      new EmptyPartitionReader
+    } else {
+      val requestedColIds = requestedColIdsOrEmptyFile.get
+      assert(requestedColIds.length == readSchema.length,
+        "[BUG] requested column IDs do not match required schema")
+      val taskConf = new Configuration(conf)
+      taskConf.set(OrcConf.INCLUDE_COLUMNS.getAttribute,
+        requestedColIds.filter(_ != -1).sorted.mkString(","))
+
+      val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty)
+      val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+      val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
+
+      val batchReader = new OrcColumnarBatchReader(capacity)
+      batchReader.initialize(fileSplit, taskAttemptContext)
+      val columnNameMap = partitionSchema.fields.map(
+        PartitioningUtils.getColName(_, isCaseSensitive)).zipWithIndex.toMap
+      val requestedPartitionColIds = readSchema.fields.map { field =>
+        columnNameMap.getOrElse(PartitioningUtils.getColName(field, isCaseSensitive), -1)
+      }
+
+      batchReader.initBatch(
+        reader.getSchema,
+        readSchema.fields,
+        requestedColIds,
+        requestedPartitionColIds,
+        file.partitionValues)
+      new PartitionRecordReader(batchReader)
+    }
+  }
+
+  /**
+   * Returns a new StructType that is a copy of the original StructType, removing any items that
+   * also appear in other StructType. The order is preserved from the original StructType.
+   */
+  private def subtractSchema(original: StructType, other: StructType): StructType = {
+    val otherNameSet = other.fields.map(PartitioningUtils.getColName(_, isCaseSensitive)).toSet
+    val fields = original.fields.filterNot { field =>
+      otherNameSet.contains(PartitioningUtils.getColName(field, isCaseSensitive))
+    }
+
+    StructType(fields)
+  }
+
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
new file mode 100644
index 000000000000..a792ad318b39
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.orc
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.v2.FileScan
+import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.SerializableConfiguration
+
+case class OrcScan(
+    sparkSession: SparkSession,
+    hadoopConf: Configuration,
+    fileIndex: PartitioningAwareFileIndex,
+    dataSchema: StructType,
+    readSchema: StructType) extends FileScan(sparkSession, fileIndex) {
+  override def isSplitable(path: Path): Boolean = true
+
+  override def createReaderFactory(): PartitionReaderFactory = {
+    val broadcastedConf = sparkSession.sparkContext.broadcast(
+      new SerializableConfiguration(hadoopConf))
+    OrcPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
+      dataSchema, fileIndex.partitionSchema, readSchema)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
new file mode 100644
index 000000000000..eb27bbd3abea
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2.orc
+
+import scala.collection.JavaConverters._
+
+import org.apache.orc.mapreduce.OrcInputFormat
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.orc.OrcFilters
+import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.sources.v2.reader.Scan
+import org.apache.spark.sql.types.StructType
+
+case class OrcScanBuilder(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    schema: StructType,
+    dataSchema: StructType,
+    options: DataSourceOptions) extends FileScanBuilder(schema) {
+  lazy val hadoopConf =
+    sparkSession.sessionState.newHadoopConfWithOptions(options.asMap().asScala.toMap)
+
+  override def build(): Scan = {
+    OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema, readSchema)
+  }
+
+  private var _pushedFilters: Array[Filter] = Array.empty
+
+  override def pushFilters(filters: Array[Filter]): Array[Filter] = {
+    if (sparkSession.sessionState.conf.orcFilterPushDown) {
+      OrcFilters.createFilter(schema, filters).foreach { f =>
+        // The pushed filters will be set in `hadoopConf`. After that, we can simply use the
+        // changed `hadoopConf` in executors.
+        OrcInputFormat.setSearchArgument(hadoopConf, f, schema.fieldNames)
+      }
+      val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
+      _pushedFilters = OrcFilters.convertibleFilters(schema, dataTypeMap, filters).toArray
+    }
+    filters
+  }
+
+  override def pushedFilters(): Array[Filter] = _pushedFilters
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
new file mode 100644
index 000000000000..719e757c33cb
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.orc
+
+import org.apache.hadoop.fs.FileStatus
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.orc.OrcUtils
+import org.apache.spark.sql.execution.datasources.v2.FileTable
+import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.types.StructType
+
+case class OrcTable(
+    name: String,
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    userSpecifiedSchema: Option[StructType])
+  extends FileTable(sparkSession, fileIndex, userSpecifiedSchema) {
+  override def newScanBuilder(options: DataSourceOptions): OrcScanBuilder =
+    new OrcScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+
+  override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
+    OrcUtils.readSchema(sparkSession, files)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 319c2649592f..a605dc640dc9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -159,6 +159,7 @@ abstract class BaseSessionStateBuilder(
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
+        new FallbackOrcDataSourceV2(session) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index cf25f1ce910d..d83deb17a090 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 
 
@@ -234,7 +235,9 @@ object QueryTest {
       checkToRDD: Boolean = true): Option[String] = {
     val isSorted = df.logicalPlan.collect { case s: logical.Sort => s }.nonEmpty
     if (checkToRDD) {
-      df.rdd.count()  // Also attempt to deserialize as an RDD [SPARK-15791]
+      SQLExecution.withSQLConfPropagated(df.sparkSession) {
+        df.rdd.count() // Also attempt to deserialize as an RDD [SPARK-15791]
+      }
     }
 
     val sparkAnswer = try df.collect().toSeq catch {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index ee12f3089243..cccd8e9ee8bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -24,11 +24,15 @@ import scala.collection.JavaConverters._
 
 import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
 
-import org.apache.spark.sql.{Column, DataFrame}
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -41,7 +45,7 @@ import org.apache.spark.sql.types._
  */
 class OrcFilterSuite extends OrcTest with SharedSQLContext {
 
-  private def checkFilterPredicate(
+  protected def checkFilterPredicate(
       df: DataFrame,
       predicate: Predicate,
       checker: (SearchArgument) => Unit): Unit = {
@@ -50,24 +54,24 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
       .select(output.map(e => Column(e)): _*)
       .where(Column(predicate))
 
-    var maybeRelation: Option[HadoopFsRelation] = None
-    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
-      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
-        maybeRelation = Some(orcRelation)
-        filters
-    }.flatten.reduceLeftOption(_ && _)
-    assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
-
-    val (_, selectedFilters, _) =
-      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
-    assert(selectedFilters.nonEmpty, "No filter is pushed down")
-
-    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters)
-    assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $selectedFilters")
-    checker(maybeFilter.get)
+    query.queryExecution.optimizedPlan match {
+      case PhysicalOperation(_, filters,
+        DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
+        assert(filters.nonEmpty, "No filter is analyzed from the given query")
+        val scanBuilder = orcTable.newScanBuilder(new DataSourceOptions(options.asJava))
+        scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
+        val pushedFilters = scanBuilder.pushedFilters()
+        assert(pushedFilters.nonEmpty, "No filter is pushed down")
+        val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
+        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pushedFilters")
+        checker(maybeFilter.get)
+
+      case _ =>
+        throw new AnalysisException("Can not match OrcTable in the query.")
+    }
   }
 
-  private def checkFilterPredicate
+  protected def checkFilterPredicate
       (predicate: Predicate, filterOperator: PredicateLeaf.Operator)
       (implicit df: DataFrame): Unit = {
     def checkComparisonOperator(filter: SearchArgument) = {
@@ -77,7 +81,7 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     checkFilterPredicate(df, predicate, checkComparisonOperator)
   }
 
-  private def checkFilterPredicate
+  protected def checkFilterPredicate
       (predicate: Predicate, stringExpr: String)
       (implicit df: DataFrame): Unit = {
     def checkLogicalOperator(filter: SearchArgument) = {
@@ -86,28 +90,32 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     checkFilterPredicate(df, predicate, checkLogicalOperator)
   }
 
-  private def checkNoFilterPredicate
-      (predicate: Predicate)
+  protected def checkNoFilterPredicate
+      (predicate: Predicate, noneSupported: Boolean = false)
       (implicit df: DataFrame): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
       .select(output.map(e => Column(e)): _*)
       .where(Column(predicate))
 
-    var maybeRelation: Option[HadoopFsRelation] = None
-    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
-      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
-        maybeRelation = Some(orcRelation)
-        filters
-    }.flatten.reduceLeftOption(_ && _)
-    assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
-
-    val (_, selectedFilters, _) =
-      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
-    assert(selectedFilters.nonEmpty, "No filter is pushed down")
-
-    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters)
-    assert(maybeFilter.isEmpty, s"Could generate filter predicate for $selectedFilters")
+    query.queryExecution.optimizedPlan match {
+      case PhysicalOperation(_, filters,
+      DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
+        assert(filters.nonEmpty, "No filter is analyzed from the given query")
+        val scanBuilder = orcTable.newScanBuilder(new DataSourceOptions(options.asJava))
+        scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
+        val pushedFilters = scanBuilder.pushedFilters()
+        if (noneSupported) {
+          assert(pushedFilters.isEmpty, "Unsupported filters should not show in pushed filters")
+        } else {
+          assert(pushedFilters.nonEmpty, "No filter is pushed down")
+          val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
+          assert(maybeFilter.isEmpty, s"Couldn't generate filter predicate for $pushedFilters")
+        }
+
+      case _ =>
+        throw new AnalysisException("Can not match OrcTable in the query.")
+    }
   }
 
   test("filter pushdown - integer") {
@@ -346,15 +354,15 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     }
     // ArrayType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
-      checkNoFilterPredicate('_1.isNull)
+      checkNoFilterPredicate('_1.isNull, noneSupported = true)
     }
     // BinaryType
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
-      checkNoFilterPredicate('_1 <=> 1.b)
+      checkNoFilterPredicate('_1 <=> 1.b, noneSupported = true)
     }
     // MapType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
-      checkNoFilterPredicate('_1.isNotNull)
+      checkNoFilterPredicate('_1.isNotNull, noneSupported = true)
     }
   }
 
@@ -419,3 +427,4 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     }
   }
 }
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
index d1911ea7f32a..4a695ac74c47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql.execution.datasources.orc
 
 import java.io.File
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
 // The data where the partitioning key exists only in the directory structure.
@@ -227,3 +229,8 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
 }
 
 class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext
+
+class OrcV1PartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 918dbcdfa1cc..d0b386b88c59 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -31,7 +31,7 @@ import org.apache.orc.OrcConf.COMPRESS
 import org.apache.orc.mapred.OrcStruct
 import org.apache.orc.mapreduce.OrcInputFormat
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, RecordReaderIterator}
@@ -574,7 +574,7 @@ abstract class OrcQueryTest extends OrcTest {
       val m1 = intercept[AnalysisException] {
         testAllCorruptFiles()
       }.getMessage
-      assert(m1.contains("Unable to infer schema for ORC"))
+      assert(m1.contains("Unable to infer schema"))
       testAllCorruptFilesWithoutSchemaInfer()
     }
 
@@ -681,3 +681,8 @@ class OrcQuerySuite extends OrcQueryTest with SharedSQLContext {
     }
   }
 }
+
+class OrcV1QuerySuite extends OrcQuerySuite {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
new file mode 100644
index 000000000000..cf5bbb3fff70
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.orc
+
+import scala.collection.JavaConverters._
+
+import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{Column, DataFrame}
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Predicate}
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.internal.SQLConf
+
+class OrcV1FilterSuite extends OrcFilterSuite {
+
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+
+  override def checkFilterPredicate(
+      df: DataFrame,
+      predicate: Predicate,
+      checker: (SearchArgument) => Unit): Unit = {
+    val output = predicate.collect { case a: Attribute => a }.distinct
+    val query = df
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
+
+    var maybeRelation: Option[HadoopFsRelation] = None
+    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
+      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
+        maybeRelation = Some(orcRelation)
+        filters
+    }.flatten.reduceLeftOption(_ && _)
+    assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
+
+    val (_, selectedFilters, _) =
+      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
+    assert(selectedFilters.nonEmpty, "No filter is pushed down")
+
+    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters)
+    assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $selectedFilters")
+    checker(maybeFilter.get)
+  }
+
+  override def checkFilterPredicate
+      (predicate: Predicate, filterOperator: PredicateLeaf.Operator)
+      (implicit df: DataFrame): Unit = {
+    def checkComparisonOperator(filter: SearchArgument) = {
+      val operator = filter.getLeaves.asScala
+      assert(operator.map(_.getOperator).contains(filterOperator))
+    }
+    checkFilterPredicate(df, predicate, checkComparisonOperator)
+  }
+
+  override def checkFilterPredicate
+      (predicate: Predicate, stringExpr: String)
+      (implicit df: DataFrame): Unit = {
+    def checkLogicalOperator(filter: SearchArgument) = {
+      assert(filter.toString == stringExpr)
+    }
+    checkFilterPredicate(df, predicate, checkLogicalOperator)
+  }
+
+  override def checkNoFilterPredicate
+      (predicate: Predicate, noneSupported: Boolean = false)
+      (implicit df: DataFrame): Unit = {
+    val output = predicate.collect { case a: Attribute => a }.distinct
+    val query = df
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
+
+    var maybeRelation: Option[HadoopFsRelation] = None
+    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
+      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
+        maybeRelation = Some(orcRelation)
+        filters
+    }.flatten.reduceLeftOption(_ && _)
+    assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
+
+    val (_, selectedFilters, _) =
+      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
+    assert(selectedFilters.nonEmpty, "No filter is pushed down")
+
+    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters)
+    assert(maybeFilter.isEmpty, s"Could generate filter predicate for $selectedFilters")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
new file mode 100644
index 000000000000..f57c581fd800
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.sources.v2
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetTest}
+import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.StructType
+
+class DummyReadOnlyFileDataSourceV2 extends FileDataSourceV2 {
+
+  override def fallBackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
+
+  override def shortName(): String = "parquet"
+
+  override def getTable(options: DataSourceOptions): Table = {
+    new DummyReadOnlyFileTable
+  }
+}
+
+class DummyReadOnlyFileTable extends Table with SupportsBatchRead {
+  override def name(): String = "dummy"
+
+  override def schema(): StructType = StructType(Nil)
+
+  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+    throw new AnalysisException("Dummy file reader")
+  }
+}
+
+class FileDataSourceV2FallBackSuite extends QueryTest with ParquetTest with SharedSQLContext {
+
+  private val dummyParquetReaderV2 = classOf[DummyReadOnlyFileDataSourceV2].getName
+
+  test("Fall back to v1 when writing to file with read only FileDataSourceV2") {
+    val df = spark.range(10).toDF()
+    withTempPath { file =>
+      val path = file.getCanonicalPath
+      // Writing file should fall back to v1 and succeed.
+      df.write.format(dummyParquetReaderV2).save(path)
+
+      // Validate write result with [[ParquetFileFormat]].
+      checkAnswer(spark.read.parquet(path), df)
+
+      // Dummy File reader should fail as expected.
+      val exception = intercept[AnalysisException] {
+        spark.read.format(dummyParquetReaderV2).load(path).collect()
+      }
+      assert(exception.message.equals("Dummy file reader"))
+    }
+  }
+
+  test("Fall back read path to v1 with configuration USE_V1_SOURCE_READER_LIST") {
+    val df = spark.range(10).toDF()
+    withTempPath { file =>
+      val path = file.getCanonicalPath
+      df.write.parquet(path)
+      Seq(
+        "foo,parquet,bar",
+        "ParQuet,bar,foo",
+        s"foobar,$dummyParquetReaderV2"
+      ).foreach { fallbackReaders =>
+        withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> fallbackReaders) {
+          // Reading file should fall back to v1 and succeed.
+          checkAnswer(spark.read.format(dummyParquetReaderV2).load(path), df)
+          checkAnswer(sql(s"SELECT * FROM parquet.`$path`"), df)
+        }
+      }
+
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "foo,bar") {
+        // Dummy File reader should fail as DISABLED_V2_FILE_DATA_SOURCE_READERS doesn't include it.
+        val exception = intercept[AnalysisException] {
+          spark.read.format(dummyParquetReaderV2).load(path).collect()
+        }
+        assert(exception.message.equals("Dummy file reader"))
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 4f3914740ec2..132b0e4db0d7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -71,6 +71,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
       new ResolveHiveSerdeTable(session) +:
         new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
+        new FallbackOrcDataSourceV2(session) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index d93215fefb81..3402ed240f8b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -908,7 +908,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       }
       assert(e.getMessage.contains(
         "The format of the existing table default.appendOrcToParquet is `ParquetFileFormat`. " +
-          "It doesn't match the specified format `OrcFileFormat`"))
+          "It doesn't match the specified format"))
     }
 
     withTable("appendParquetToJson") {

From 6f8c0e5255a30af6e0e0350b30247ea373861bed Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 17 Jan 2019 17:53:00 +0100
Subject: [PATCH 0314/1072] [SPARK-26593][SQL] Use Proleptic Gregorian calendar
 in casting UTF8String to Date/TimestampType

## What changes were proposed in this pull request?

In the PR, I propose to use *java.time* classes in `stringToDate` and `stringToTimestamp`. This switches the methods from the hybrid calendar (Gregorian+Julian) to Proleptic Gregorian calendar. And it should make the casting consistent to other Spark classes that converts textual representation of dates/timestamps to `DateType`/`TimestampType`.

## How was this patch tested?

The changes were tested by existing suites - `HashExpressionsSuite`, `CastSuite` and `DateTimeUtilsSuite`.

Closes #23512 from MaxGekk/utf8string-timestamp-parsing.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/util/DateFormatter.scala     |   8 +-
 .../sql/catalyst/util/DateTimeUtils.scala     | 311 +++---------------
 .../catalyst/util/TimestampFormatter.scala    |   8 +-
 .../expressions/DateExpressionsSuite.scala    |  73 ++--
 .../expressions/HashExpressionsSuite.scala    |   4 +-
 .../sql/catalyst/util/DateTimeTestUtils.scala |   2 -
 .../catalyst/util/DateTimeUtilsSuite.scala    | 124 ++-----
 .../parquet/ParquetQuerySuite.scala           |  53 ++-
 8 files changed, 151 insertions(+), 432 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index adc69ab1c652..9535a369cb2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.catalyst.util
 import java.time.{Instant, ZoneId}
 import java.util.Locale
 
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToDays
+
 sealed trait DateFormatter extends Serializable {
   def parse(s: String): Int // returns days since epoch
   def format(days: Int): String
@@ -38,11 +40,7 @@ class Iso8601DateFormatter(
     toInstantWithZoneId(temporalAccessor, UTC)
   }
 
-  override def parse(s: String): Int = {
-    val seconds = toInstant(s).getEpochSecond
-    val days = Math.floorDiv(seconds, DateTimeUtils.SECONDS_PER_DAY)
-    days.toInt
-  }
+  override def parse(s: String): Int = instantToDays(toInstant(s))
 
   override def format(days: Int): String = {
     val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index da8899a02f31..867647921349 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -18,13 +18,12 @@
 package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
-import java.text.{DateFormat, SimpleDateFormat}
+import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZonedDateTime}
+import java.time.Year.isLeap
+import java.time.temporal.IsoFields
 import java.util.{Calendar, Locale, TimeZone}
-import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import java.util.function.{Function => JFunction}
-import javax.xml.bind.DatatypeConverter
-
-import scala.annotation.tailrec
 
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -53,30 +52,12 @@ object DateTimeUtils {
   final val NANOS_PER_MICROS = 1000L
   final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L
 
-  // number of days in 400 years by Gregorian calendar
-  final val daysIn400Years: Int = 146097
-
-  // In the Julian calendar every year that is exactly divisible by 4 is a leap year without any
-  // exception. But in the Gregorian calendar every year that is exactly divisible by four
-  // is a leap year, except for years that are exactly divisible by 100, but these centurial years
-  // are leap years if they are exactly divisible by 400.
-  // So there are 3 extra days in the Julian calendar within a 400 years cycle compared to the
-  // Gregorian calendar.
-  final val extraLeapDaysIn400YearsJulian = 3
-
-  // number of days in 400 years by Julian calendar
-  final val daysIn400YearsInJulian: Int = daysIn400Years + extraLeapDaysIn400YearsJulian
-
   // number of days between 1.1.1970 and 1.1.2001
   final val to2001 = -11323
 
   // this is year -17999, calculation: 50 * daysIn400Year
   final val YearZero = -17999
   final val toYearZero = to2001 + 7304850
-
-  // days to year -17999 in Julian calendar
-  final val toYearZeroInJulian = toYearZero + 49 * extraLeapDaysIn400YearsJulian
-
   final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
   final val TimeZoneUTC = TimeZone.getTimeZone("UTC")
   final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
@@ -85,13 +66,6 @@ object DateTimeUtils {
 
   def defaultTimeZone(): TimeZone = TimeZone.getDefault()
 
-  // Reuse the Calendar object in each thread as it is expensive to create in each method call.
-  private val threadLocalGmtCalendar = new ThreadLocal[Calendar] {
-    override protected def initialValue: Calendar = {
-      Calendar.getInstance(TimeZoneGMT)
-    }
-  }
-
   private val computedTimeZones = new ConcurrentHashMap[String, TimeZone]
   private val computeTimeZone = new JFunction[String, TimeZone] {
     override def apply(timeZoneId: String): TimeZone = TimeZone.getTimeZone(timeZoneId)
@@ -136,27 +110,6 @@ object DateTimeUtils {
     }
   }
 
-  @tailrec
-  def stringToTime(s: String): java.util.Date = {
-    val indexOfGMT = s.indexOf("GMT")
-    if (indexOfGMT != -1) {
-      // ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00)
-      val s0 = s.substring(0, indexOfGMT)
-      val s1 = s.substring(indexOfGMT + 3)
-      // Mapped to 2000-01-01T00:00+01:00
-      stringToTime(s0 + s1)
-    } else if (!s.contains('T')) {
-      // JDBC escape string
-      if (s.contains(' ')) {
-        Timestamp.valueOf(s)
-      } else {
-        Date.valueOf(s)
-      }
-    } else {
-      DatatypeConverter.parseDateTime(s).getTime()
-    }
-  }
-
   /**
    * Returns the number of days since epoch from java.sql.Date.
    */
@@ -388,23 +341,35 @@ object DateTimeUtils {
       return None
     }
 
-    val c = if (tz.isEmpty) {
-      Calendar.getInstance(timeZone)
+    val zoneId = if (tz.isEmpty) {
+      timeZone.toZoneId
     } else {
-      Calendar.getInstance(
-        getTimeZone(f"GMT${tz.get.toChar}${segments(7)}%02d:${segments(8)}%02d"))
+      getTimeZone(f"GMT${tz.get.toChar}${segments(7)}%02d:${segments(8)}%02d").toZoneId
     }
-    c.set(Calendar.MILLISECOND, 0)
-
-    if (justTime) {
-      c.set(Calendar.HOUR_OF_DAY, segments(3))
-      c.set(Calendar.MINUTE, segments(4))
-      c.set(Calendar.SECOND, segments(5))
+    val nanoseconds = TimeUnit.MICROSECONDS.toNanos(segments(6))
+    val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
+    val localDate = if (justTime) {
+      LocalDate.now(zoneId)
     } else {
-      c.set(segments(0), segments(1) - 1, segments(2), segments(3), segments(4), segments(5))
+      LocalDate.of(segments(0), segments(1), segments(2))
     }
+    val localDateTime = LocalDateTime.of(localDate, localTime)
+    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
+    val instant = Instant.from(zonedDateTime)
+
+    Some(instantToMicros(instant))
+  }
+
+  def instantToMicros(instant: Instant): Long = {
+    val sec = Math.multiplyExact(instant.getEpochSecond, MICROS_PER_SECOND)
+    val result = Math.addExact(sec, instant.getNano / NANOS_PER_MICROS)
+    result
+  }
 
-    Some(c.getTimeInMillis * 1000 + segments(6))
+  def instantToDays(instant: Instant): Int = {
+    val seconds = instant.getEpochSecond
+    val days = Math.floorDiv(seconds, SECONDS_PER_DAY)
+    days.toInt
   }
 
   /**
@@ -457,11 +422,9 @@ object DateTimeUtils {
       return None
     }
 
-    val c = threadLocalGmtCalendar.get()
-    c.clear()
-    c.set(segments(0), segments(1) - 1, segments(2), 0, 0, 0)
-    c.set(Calendar.MILLISECOND, 0)
-    Some((c.getTimeInMillis / MILLIS_PER_DAY).toInt)
+    val localDate = LocalDate.of(segments(0), segments(1), segments(2))
+    val instant = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toInstant
+    Some(instantToDays(instant))
   }
 
   /**
@@ -472,9 +435,9 @@ object DateTimeUtils {
       return true
     }
     if (month == 2) {
-      if (isLeapYear(year) && day > 29) {
+      if (isLeap(year) && day > 29) {
         return true
-      } else if (!isLeapYear(year) && day > 28) {
+      } else if (!isLeap(year) && day > 28) {
         return true
       }
     } else if (!MonthOf31Days.contains(month) && day > 30) {
@@ -544,78 +507,12 @@ object DateTimeUtils {
     ((localTimestamp(microsec, timeZone) / MICROS_PER_SECOND) % 60).toInt
   }
 
-  private[this] def isLeapYear(year: Int): Boolean = {
-    (year % 4) == 0 && ((year % 100) != 0 || (year % 400) == 0)
-  }
-
-  /**
-   * Return the number of days since the start of 400 year period.
-   * The second year of a 400 year period (year 1) starts on day 365.
-   */
-  private[this] def yearBoundary(year: Int, isGregorian: Boolean): Int = {
-    if (isGregorian) {
-      year * 365 + ((year / 4) - (year / 100) + (year / 400))
-    } else {
-      year * 365 + (year / 4)
-    }
-  }
-
-  /**
-   * Calculates the number of years for the given number of days. This depends
-   * on a 400 year period.
-   * @param days days since the beginning of the 400 year period
-   * @param isGregorian indicates whether leap years should be calculated according to Gregorian
-   *                    (or Julian) calendar
-   * @return (number of year, days in year)
-   */
-  private[this] def numYears(days: Int, isGregorian: Boolean): (Int, Int) = {
-    val year = days / 365
-    val boundary = yearBoundary(year, isGregorian)
-    if (days > boundary) {
-      (year, days - boundary)
-    } else {
-      (year - 1, days - yearBoundary(year - 1, isGregorian))
-    }
-  }
-
-  /**
-   * Calculates the year and the number of the day in the year for the given
-   * number of days. The given days is the number of days since 1.1.1970.
-   *
-   * The calculation uses the fact that the period 1.1.2001 until 31.12.2400 is
-   * equals to the period 1.1.1601 until 31.12.2000.
-   */
-  private[this] def getYearAndDayInYear(daysSince1970: SQLDate): (Int, Int) = {
-    // Since Julian calendar was replaced with the Gregorian calendar,
-    // the 10 days after Oct. 4 were skipped.
-    // (1582-10-04) -141428 days since 1970-01-01
-    if (daysSince1970 <= -141428) {
-      getYearAndDayInYear(daysSince1970 - 10, toYearZeroInJulian, daysIn400YearsInJulian, false)
-    } else {
-      getYearAndDayInYear(daysSince1970, toYearZero, daysIn400Years, true)
-    }
-  }
-
-  private def getYearAndDayInYear(
-      daysSince1970: SQLDate,
-      toYearZero: SQLDate,
-      daysIn400Years: SQLDate,
-      isGregorian: Boolean): (Int, Int) = {
-    // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999)
-    val daysNormalized = daysSince1970 + toYearZero
-    val numOfQuarterCenturies = daysNormalized / daysIn400Years
-    val daysInThis400 = daysNormalized % daysIn400Years + 1
-    val (years, dayInYear) = numYears(daysInThis400, isGregorian)
-    val year: Int = (2001 - 20000) + 400 * numOfQuarterCenturies + years
-    (year, dayInYear)
-  }
-
   /**
    * Returns the 'day in year' value for the given date. The date is expressed in days
    * since 1.1.1970.
    */
   def getDayInYear(date: SQLDate): Int = {
-    getYearAndDayInYear(date)._2
+    LocalDate.ofEpochDay(date).getDayOfYear
   }
 
   /**
@@ -623,7 +520,7 @@ object DateTimeUtils {
    * since 1.1.1970.
    */
   def getYear(date: SQLDate): Int = {
-    getYearAndDayInYear(date)._1
+    LocalDate.ofEpochDay(date).getYear
   }
 
   /**
@@ -631,19 +528,7 @@ object DateTimeUtils {
    * since 1.1.1970.
    */
   def getQuarter(date: SQLDate): Int = {
-    var (year, dayInYear) = getYearAndDayInYear(date)
-    if (isLeapYear(year)) {
-      dayInYear = dayInYear - 1
-    }
-    if (dayInYear <= 90) {
-      1
-    } else if (dayInYear <= 181) {
-      2
-    } else if (dayInYear <= 273) {
-      3
-    } else {
-      4
-    }
+    LocalDate.ofEpochDay(date).get(IsoFields.QUARTER_OF_YEAR)
   }
 
   /**
@@ -651,43 +536,8 @@ object DateTimeUtils {
    * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month).
    */
   def splitDate(date: SQLDate): (Int, Int, Int, Int) = {
-    var (year, dayInYear) = getYearAndDayInYear(date)
-    val isLeap = isLeapYear(year)
-    if (isLeap && dayInYear == 60) {
-      (year, 2, 29, 0)
-    } else {
-      if (isLeap && dayInYear > 60) dayInYear -= 1
-
-      if (dayInYear <= 181) {
-        if (dayInYear <= 31) {
-          (year, 1, dayInYear, 31 - dayInYear)
-        } else if (dayInYear <= 59) {
-          (year, 2, dayInYear - 31, if (isLeap) 60 - dayInYear else 59 - dayInYear)
-        } else if (dayInYear <= 90) {
-          (year, 3, dayInYear - 59, 90 - dayInYear)
-        } else if (dayInYear <= 120) {
-          (year, 4, dayInYear - 90, 120 - dayInYear)
-        } else if (dayInYear <= 151) {
-          (year, 5, dayInYear - 120, 151 - dayInYear)
-        } else {
-          (year, 6, dayInYear - 151, 181 - dayInYear)
-        }
-      } else {
-        if (dayInYear <= 212) {
-          (year, 7, dayInYear - 181, 212 - dayInYear)
-        } else if (dayInYear <= 243) {
-          (year, 8, dayInYear - 212, 243 - dayInYear)
-        } else if (dayInYear <= 273) {
-          (year, 9, dayInYear - 243, 273 - dayInYear)
-        } else if (dayInYear <= 304) {
-          (year, 10, dayInYear - 273, 304 - dayInYear)
-        } else if (dayInYear <= 334) {
-          (year, 11, dayInYear - 304, 334 - dayInYear)
-        } else {
-          (year, 12, dayInYear - 334, 365 - dayInYear)
-        }
-      }
-    }
+    val ld = LocalDate.ofEpochDay(date)
+    (ld.getYear, ld.getMonthValue, ld.getDayOfMonth, ld.lengthOfMonth() - ld.getDayOfMonth)
   }
 
   /**
@@ -695,40 +545,7 @@ object DateTimeUtils {
    * since 1.1.1970. January is month 1.
    */
   def getMonth(date: SQLDate): Int = {
-    var (year, dayInYear) = getYearAndDayInYear(date)
-    if (isLeapYear(year)) {
-      if (dayInYear == 60) {
-        return 2
-      } else if (dayInYear > 60) {
-        dayInYear = dayInYear - 1
-      }
-    }
-
-    if (dayInYear <= 31) {
-      1
-    } else if (dayInYear <= 59) {
-      2
-    } else if (dayInYear <= 90) {
-      3
-    } else if (dayInYear <= 120) {
-      4
-    } else if (dayInYear <= 151) {
-      5
-    } else if (dayInYear <= 181) {
-      6
-    } else if (dayInYear <= 212) {
-      7
-    } else if (dayInYear <= 243) {
-      8
-    } else if (dayInYear <= 273) {
-      9
-    } else if (dayInYear <= 304) {
-      10
-    } else if (dayInYear <= 334) {
-      11
-    } else {
-      12
-    }
+    LocalDate.ofEpochDay(date).getMonthValue
   }
 
   /**
@@ -736,40 +553,7 @@ object DateTimeUtils {
    * since 1.1.1970.
    */
   def getDayOfMonth(date: SQLDate): Int = {
-    var (year, dayInYear) = getYearAndDayInYear(date)
-    if (isLeapYear(year)) {
-      if (dayInYear == 60) {
-        return 29
-      } else if (dayInYear > 60) {
-        dayInYear = dayInYear - 1
-      }
-    }
-
-    if (dayInYear <= 31) {
-      dayInYear
-    } else if (dayInYear <= 59) {
-      dayInYear - 31
-    } else if (dayInYear <= 90) {
-      dayInYear - 59
-    } else if (dayInYear <= 120) {
-      dayInYear - 90
-    } else if (dayInYear <= 151) {
-      dayInYear - 120
-    } else if (dayInYear <= 181) {
-      dayInYear - 151
-    } else if (dayInYear <= 212) {
-      dayInYear - 181
-    } else if (dayInYear <= 243) {
-      dayInYear - 212
-    } else if (dayInYear <= 273) {
-      dayInYear - 243
-    } else if (dayInYear <= 304) {
-      dayInYear - 273
-    } else if (dayInYear <= 334) {
-      dayInYear - 304
-    } else {
-      dayInYear - 334
-    }
+    LocalDate.ofEpochDay(date).getDayOfMonth
   }
 
   /**
@@ -785,7 +569,7 @@ object DateTimeUtils {
     val absoluteYear = absoluteMonth / 12
     var monthInYear = absoluteMonth - absoluteYear * 12
     var date = getDateFromYear(absoluteYear)
-    if (monthInYear >= 2 && isLeapYear(absoluteYear + YearZero)) {
+    if (monthInYear >= 2 && isLeap(absoluteYear + YearZero)) {
       date += 1
     }
     while (monthInYear > 0) {
@@ -816,7 +600,7 @@ object DateTimeUtils {
     val currentMonthInYear = nonNegativeMonth % 12
     val currentYear = nonNegativeMonth / 12
 
-    val leapDay = if (currentMonthInYear == 1 && isLeapYear(currentYear + YearZero)) 1 else 0
+    val leapDay = if (currentMonthInYear == 1 && isLeap(currentYear + YearZero)) 1 else 0
     val lastDayOfMonth = monthDays(currentMonthInYear) + leapDay
 
     val currentDayInMonth = if (daysToMonthEnd == 0 || dayOfMonth >= lastDayOfMonth) {
@@ -938,8 +722,8 @@ object DateTimeUtils {
    * since 1.1.1970.
    */
   def getLastDayOfMonth(date: SQLDate): SQLDate = {
-    val (_, _, _, daysToMonthEnd) = splitDate(date)
-    date + daysToMonthEnd
+    val localDate = LocalDate.ofEpochDay(date)
+    (date - localDate.getDayOfMonth) + localDate.lengthOfMonth()
   }
 
   // Visible for testing.
@@ -1123,11 +907,4 @@ object DateTimeUtils {
   def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
     convertTz(time, getTimeZone(timeZone), TimeZoneGMT)
   }
-
-  /**
-   * Re-initialize the current thread's thread locals. Exposed for testing.
-   */
-  private[util] def resetThreadLocals(): Unit = {
-    threadLocalGmtCalendar.remove()
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 1374a825ec6d..4ec61e1ca4a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -23,6 +23,8 @@ import java.time.format.DateTimeParseException
 import java.time.temporal.TemporalQueries
 import java.util.{Locale, TimeZone}
 
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
+
 sealed trait TimestampFormatter extends Serializable {
   /**
    * Parses a timestamp in a string and converts it to microseconds.
@@ -56,12 +58,6 @@ class Iso8601TimestampFormatter(
     }
   }
 
-  private def instantToMicros(instant: Instant): Long = {
-    val sec = Math.multiplyExact(instant.getEpochSecond, DateTimeUtils.MICROS_PER_SECOND)
-    val result = Math.addExact(sec, instant.getNano / DateTimeUtils.NANOS_PER_MICROS)
-    result
-  }
-
   override def parse(s: String): Long = instantToMicros(toInstant(s))
 
   override def format(us: Long): String = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index c9d733726ff2..c3b7e19455cd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.expressions
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.{Calendar, Locale, TimeZone}
+import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -40,12 +41,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   val pstId = Option(TimeZonePST.getID)
   val jstId = Option(TimeZoneJST.getID)
 
-  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
-  sdf.setTimeZone(TimeZoneGMT)
-  val sdfDate = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
-  sdfDate.setTimeZone(TimeZoneGMT)
-  val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
-  val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime)
+  def toMillis(timestamp: String): Long = {
+    val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", TimeZoneGMT)
+    TimeUnit.MICROSECONDS.toMillis(tf.parse(timestamp))
+  }
+  val date = "2015-04-08 13:10:15"
+  val d = new Date(toMillis(date))
+  val time = "2013-11-08 13:10:15"
+  val ts = new Timestamp(toMillis(time))
 
   test("datetime function current_date") {
     val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis(), TimeZoneGMT)
@@ -78,15 +81,15 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
     checkEvaluation(DayOfYear(Literal.create(null, DateType)), null)
 
-    checkEvaluation(DayOfYear(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 288)
-    checkEvaluation(DayOfYear(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 277)
+    checkEvaluation(DayOfYear(Cast(Literal("1582-10-15 13:10:15"), DateType)), 288)
+    checkEvaluation(DayOfYear(Cast(Literal("1582-10-04 13:10:15"), DateType)), 277)
     checkConsistencyBetweenInterpretedAndCodegen(DayOfYear, DateType)
   }
 
   test("Year") {
     checkEvaluation(Year(Literal.create(null, DateType)), null)
     checkEvaluation(Year(Literal(d)), 2015)
-    checkEvaluation(Year(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 2015)
+    checkEvaluation(Year(Cast(Literal(date), DateType, gmtId)), 2015)
     checkEvaluation(Year(Cast(Literal(ts), DateType, gmtId)), 2013)
 
     val c = Calendar.getInstance()
@@ -100,15 +103,15 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         }
       }
     }
-    checkEvaluation(Year(Literal(new Date(sdf.parse("1582-01-01 13:10:15").getTime))), 1582)
-    checkEvaluation(Year(Literal(new Date(sdf.parse("1581-12-31 13:10:15").getTime))), 1581)
+    checkEvaluation(Year(Cast(Literal("1582-01-01 13:10:15"), DateType)), 1582)
+    checkEvaluation(Year(Cast(Literal("1581-12-31 13:10:15"), DateType)), 1581)
     checkConsistencyBetweenInterpretedAndCodegen(Year, DateType)
   }
 
   test("Quarter") {
     checkEvaluation(Quarter(Literal.create(null, DateType)), null)
     checkEvaluation(Quarter(Literal(d)), 2)
-    checkEvaluation(Quarter(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 2)
+    checkEvaluation(Quarter(Cast(Literal(date), DateType, gmtId)), 2)
     checkEvaluation(Quarter(Cast(Literal(ts), DateType, gmtId)), 4)
 
     val c = Calendar.getInstance()
@@ -123,20 +126,20 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
 
-    checkEvaluation(Quarter(Literal(new Date(sdf.parse("1582-10-01 13:10:15").getTime))), 4)
-    checkEvaluation(Quarter(Literal(new Date(sdf.parse("1582-09-30 13:10:15").getTime))), 3)
+    checkEvaluation(Quarter(Cast(Literal("1582-10-01 13:10:15"), DateType)), 4)
+    checkEvaluation(Quarter(Cast(Literal("1582-09-30 13:10:15"), DateType)), 3)
     checkConsistencyBetweenInterpretedAndCodegen(Quarter, DateType)
   }
 
   test("Month") {
     checkEvaluation(Month(Literal.create(null, DateType)), null)
     checkEvaluation(Month(Literal(d)), 4)
-    checkEvaluation(Month(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 4)
+    checkEvaluation(Month(Cast(Literal(date), DateType, gmtId)), 4)
     checkEvaluation(Month(Cast(Literal(ts), DateType, gmtId)), 11)
 
-    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-04-28 13:10:15").getTime))), 4)
-    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 10)
-    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 10)
+    checkEvaluation(Month(Cast(Literal("1582-04-28 13:10:15"), DateType)), 4)
+    checkEvaluation(Month(Cast(Literal("1582-10-04 13:10:15"), DateType)), 10)
+    checkEvaluation(Month(Cast(Literal("1582-10-15 13:10:15"), DateType)), 10)
 
     val c = Calendar.getInstance()
     (2003 to 2004).foreach { y =>
@@ -156,12 +159,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(DayOfMonth(Cast(Literal("2000-02-29"), DateType)), 29)
     checkEvaluation(DayOfMonth(Literal.create(null, DateType)), null)
     checkEvaluation(DayOfMonth(Literal(d)), 8)
-    checkEvaluation(DayOfMonth(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 8)
+    checkEvaluation(DayOfMonth(Cast(Literal(date), DateType, gmtId)), 8)
     checkEvaluation(DayOfMonth(Cast(Literal(ts), DateType, gmtId)), 8)
 
-    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-04-28 13:10:15").getTime))), 28)
-    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 15)
-    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 4)
+    checkEvaluation(DayOfMonth(Cast(Literal("1582-04-28 13:10:15"), DateType)), 28)
+    checkEvaluation(DayOfMonth(Cast(Literal("1582-10-15 13:10:15"), DateType)), 15)
+    checkEvaluation(DayOfMonth(Cast(Literal("1582-10-04 13:10:15"), DateType)), 4)
 
     val c = Calendar.getInstance()
     (1999 to 2000).foreach { y =>
@@ -179,7 +182,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(Second(Literal.create(null, DateType), gmtId).resolved === false)
     assert(Second(Cast(Literal(d), TimestampType, gmtId), gmtId).resolved === true)
     checkEvaluation(Second(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
-    checkEvaluation(Second(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 15)
+    checkEvaluation(Second(Cast(Literal(date), TimestampType, gmtId), gmtId), 15)
     checkEvaluation(Second(Literal(ts), gmtId), 15)
 
     val c = Calendar.getInstance()
@@ -200,13 +203,13 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("DayOfWeek") {
     checkEvaluation(DayOfWeek(Literal.create(null, DateType)), null)
     checkEvaluation(DayOfWeek(Literal(d)), Calendar.WEDNESDAY)
-    checkEvaluation(DayOfWeek(Cast(Literal(sdfDate.format(d)), DateType, gmtId)),
+    checkEvaluation(DayOfWeek(Cast(Literal(date), DateType, gmtId)),
       Calendar.WEDNESDAY)
     checkEvaluation(DayOfWeek(Cast(Literal(ts), DateType, gmtId)), Calendar.FRIDAY)
     checkEvaluation(DayOfWeek(Cast(Literal("2011-05-06"), DateType, gmtId)), Calendar.FRIDAY)
-    checkEvaluation(DayOfWeek(Literal(new Date(sdf.parse("2017-05-27 13:10:15").getTime))),
+    checkEvaluation(DayOfWeek(Literal(new Date(toMillis("2017-05-27 13:10:15")))),
       Calendar.SATURDAY)
-    checkEvaluation(DayOfWeek(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))),
+    checkEvaluation(DayOfWeek(Literal(new Date(toMillis("1582-10-15 13:10:15")))),
       Calendar.FRIDAY)
     checkConsistencyBetweenInterpretedAndCodegen(DayOfWeek, DateType)
   }
@@ -214,22 +217,22 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("WeekDay") {
     checkEvaluation(WeekDay(Literal.create(null, DateType)), null)
     checkEvaluation(WeekDay(Literal(d)), 2)
-    checkEvaluation(WeekDay(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 2)
+    checkEvaluation(WeekDay(Cast(Literal(date), DateType, gmtId)), 2)
     checkEvaluation(WeekDay(Cast(Literal(ts), DateType, gmtId)), 4)
     checkEvaluation(WeekDay(Cast(Literal("2011-05-06"), DateType, gmtId)), 4)
-    checkEvaluation(WeekDay(Literal(new Date(sdf.parse("2017-05-27 13:10:15").getTime))), 5)
-    checkEvaluation(WeekDay(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 4)
+    checkEvaluation(WeekDay(Literal(new Date(toMillis("2017-05-27 13:10:15")))), 5)
+    checkEvaluation(WeekDay(Literal(new Date(toMillis("1582-10-15 13:10:15")))), 4)
     checkConsistencyBetweenInterpretedAndCodegen(WeekDay, DateType)
   }
 
   test("WeekOfYear") {
     checkEvaluation(WeekOfYear(Literal.create(null, DateType)), null)
     checkEvaluation(WeekOfYear(Literal(d)), 15)
-    checkEvaluation(WeekOfYear(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 15)
+    checkEvaluation(WeekOfYear(Cast(Literal(date), DateType, gmtId)), 15)
     checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType, gmtId)), 45)
     checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType, gmtId)), 18)
-    checkEvaluation(WeekOfYear(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 40)
-    checkEvaluation(WeekOfYear(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 40)
+    checkEvaluation(WeekOfYear(Literal(new Date(toMillis("1582-10-15 13:10:15")))), 40)
+    checkEvaluation(WeekOfYear(Literal(new Date(toMillis("1582-10-04 13:10:15")))), 39)
     checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType)
   }
 
@@ -266,7 +269,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(Hour(Literal.create(null, DateType), gmtId).resolved === false)
     assert(Hour(Literal(ts), gmtId).resolved === true)
     checkEvaluation(Hour(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
-    checkEvaluation(Hour(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 13)
+    checkEvaluation(Hour(Cast(Literal(date), TimestampType, gmtId), gmtId), 13)
     checkEvaluation(Hour(Literal(ts), gmtId), 13)
 
     val c = Calendar.getInstance()
@@ -293,7 +296,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(Minute(Literal(ts), gmtId).resolved === true)
     checkEvaluation(Minute(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
     checkEvaluation(
-      Minute(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 10)
+      Minute(Cast(Literal(date), TimestampType, gmtId), gmtId), 10)
     checkEvaluation(Minute(Literal(ts), gmtId), 10)
 
     val c = Calendar.getInstance()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index 4281c89ac475..555ccb892497 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -183,7 +183,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkHiveHashForDateType("2017-01-01", 17167)
 
     // boundary cases
-    checkHiveHashForDateType("0000-01-01", -719530)
+    checkHiveHashForDateType("0000-01-01", -719528)
     checkHiveHashForDateType("9999-12-31", 2932896)
 
     // epoch
@@ -226,7 +226,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       TimeZone.getTimeZone("US/Pacific"))
 
     // boundary cases
-    checkHiveHashForTimestampType("0001-01-01 00:00:00", 1645926784)
+    checkHiveHashForTimestampType("0001-01-01 00:00:00", 1645969984)
     checkHiveHashForTimestampType("9999-01-01 00:00:00", -1081818240)
 
     // epoch
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
index 66d8d28988f8..442d78598a38 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
@@ -40,12 +40,10 @@ object DateTimeTestUtils {
   def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = {
     val originalDefaultTimeZone = TimeZone.getDefault
     try {
-      DateTimeUtils.resetThreadLocals()
       TimeZone.setDefault(newDefaultTimeZone)
       block
     } finally {
       TimeZone.setDefault(originalDefaultTimeZone)
-      DateTimeUtils.resetThreadLocals()
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index e732eb0ef981..ef34150fa623 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
+import java.time.LocalDate
 import java.util.{Calendar, Locale, TimeZone}
+import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
@@ -29,9 +31,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
   val TimeZonePST = TimeZone.getTimeZone("PST")
 
-  private[this] def getInUTCDays(timestamp: Long): Int = {
-    val tz = TimeZone.getDefault
-    ((timestamp + tz.getOffset(timestamp)) / MILLIS_PER_DAY).toInt
+  private[this] def getInUTCDays(localDate: LocalDate): Int = {
+    val epochSeconds = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toEpochSecond
+    TimeUnit.SECONDS.toDays(epochSeconds).toInt
   }
 
   test("nanoseconds truncation") {
@@ -131,8 +133,10 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       millisToDays(c.getTimeInMillis))
     c.set(1, 0, 1, 0, 0, 0)
     c.set(Calendar.MILLISECOND, 0)
+    val localDate = LocalDate.of(1, 1, 1)
+      .atStartOfDay(TimeZoneUTC.toZoneId)
     assert(stringToDate(UTF8String.fromString("0001")).get ===
-      millisToDays(c.getTimeInMillis))
+      TimeUnit.SECONDS.toDays(localDate.toEpochSecond))
     c = Calendar.getInstance()
     c.set(2015, 2, 1, 0, 0, 0)
     c.set(Calendar.MILLISECOND, 0)
@@ -157,38 +161,6 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     assert(stringToDate(UTF8String.fromString("02015")).isEmpty)
   }
 
-  test("string to time") {
-    // Tests with UTC.
-    val c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
-    c.set(Calendar.MILLISECOND, 0)
-
-    c.set(1900, 0, 1, 0, 0, 0)
-    assert(stringToTime("1900-01-01T00:00:00GMT-00:00") === c.getTime())
-
-    c.set(2000, 11, 30, 10, 0, 0)
-    assert(stringToTime("2000-12-30T10:00:00Z") === c.getTime())
-
-    // Tests with set time zone.
-    c.setTimeZone(TimeZone.getTimeZone("GMT-04:00"))
-    c.set(Calendar.MILLISECOND, 0)
-
-    c.set(1900, 0, 1, 0, 0, 0)
-    assert(stringToTime("1900-01-01T00:00:00-04:00") === c.getTime())
-
-    c.set(1900, 0, 1, 0, 0, 0)
-    assert(stringToTime("1900-01-01T00:00:00GMT-04:00") === c.getTime())
-
-    // Tests with local time zone.
-    c.setTimeZone(TimeZone.getDefault())
-    c.set(Calendar.MILLISECOND, 0)
-
-    c.set(2000, 11, 30, 0, 0, 0)
-    assert(stringToTime("2000-12-30") === new Date(c.getTimeInMillis()))
-
-    c.set(2000, 11, 30, 10, 0, 0)
-    assert(stringToTime("2000-12-30 10:00:00") === new Timestamp(c.getTimeInMillis()))
-  }
-
   test("string to timestamp") {
     for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
       def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = {
@@ -201,7 +173,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       checkStringToTimestamp("1969-12-31 16:00:00", Option(c.getTimeInMillis * 1000))
       c.set(1, 0, 1, 0, 0, 0)
       c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("0001", Option(c.getTimeInMillis * 1000))
+      val date = LocalDate.of(1, 1, 1)
+        .atStartOfDay(tz.toZoneId)
+      checkStringToTimestamp("0001", Option(TimeUnit.SECONDS.toMicros(date.toEpochSecond)))
       c = Calendar.getInstance(tz)
       c.set(2015, 2, 1, 0, 0, 0)
       c.set(Calendar.MILLISECOND, 0)
@@ -404,73 +378,47 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("get day in year") {
-    val c = Calendar.getInstance()
-    c.set(2015, 2, 18, 0, 0, 0)
-    assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 77)
-    c.set(2012, 2, 18, 0, 0, 0)
-    assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 78)
+    assert(getDayInYear(getInUTCDays(LocalDate.of(2015, 3, 18))) === 77)
+    assert(getDayInYear(getInUTCDays(LocalDate.of(2012, 3, 18))) === 78)
   }
 
-  test("SPARK-26002: correct day of year calculations for Julian calendar years") {
-    val c = Calendar.getInstance()
-    c.set(Calendar.MILLISECOND, 0)
-    (1000 to 1600 by 100).foreach { year =>
+  test("day of year calculations for old years") {
+    var date = LocalDate.of(1582, 3, 1)
+    assert(getDayInYear(getInUTCDays(date)) === 60)
+
+    (1000 to 1600 by 10).foreach { year =>
       // January 1 is the 1st day of year.
-      c.set(year, 0, 1, 0, 0, 0)
-      assert(getYear(getInUTCDays(c.getTimeInMillis)) === year)
-      assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 1)
-      assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 1)
-
-      // March 1 is the 61st day of the year as they are leap years. It is true for
-      // even the multiples of 100 as before 1582-10-4 the Julian calendar leap year calculation
-      // is used in which every multiples of 4 are leap years
-      c.set(year, 2, 1, 0, 0, 0)
-      assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 61)
-      assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 3)
-
-      // testing leap day (February 29) in leap years
-      c.set(year, 1, 29, 0, 0, 0)
-      assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
-
-      // For non-leap years:
-      c.set(year + 1, 2, 1, 0, 0, 0)
-      assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
+      date = LocalDate.of(year, 1, 1)
+      assert(getYear(getInUTCDays(date)) === year)
+      assert(getMonth(getInUTCDays(date)) === 1)
+      assert(getDayInYear(getInUTCDays(date)) === 1)
+
+      // December 31 is the 1st day of year.
+      date = LocalDate.of(year, 12, 31)
+      assert(getYear(getInUTCDays(date)) === year)
+      assert(getMonth(getInUTCDays(date)) === 12)
+      assert(getDayOfMonth(getInUTCDays(date)) === 31)
     }
-
-    c.set(1582, 2, 1, 0, 0, 0)
-    assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
   }
 
   test("get year") {
-    val c = Calendar.getInstance()
-    c.set(2015, 2, 18, 0, 0, 0)
-    assert(getYear(getInUTCDays(c.getTimeInMillis)) === 2015)
-    c.set(2012, 2, 18, 0, 0, 0)
-    assert(getYear(getInUTCDays(c.getTimeInMillis)) === 2012)
+    assert(getYear(getInUTCDays(LocalDate.of(2015, 2, 18))) === 2015)
+    assert(getYear(getInUTCDays(LocalDate.of(2012, 2, 18))) === 2012)
   }
 
   test("get quarter") {
-    val c = Calendar.getInstance()
-    c.set(2015, 2, 18, 0, 0, 0)
-    assert(getQuarter(getInUTCDays(c.getTimeInMillis)) === 1)
-    c.set(2012, 11, 18, 0, 0, 0)
-    assert(getQuarter(getInUTCDays(c.getTimeInMillis)) === 4)
+    assert(getQuarter(getInUTCDays(LocalDate.of(2015, 2, 18))) === 1)
+    assert(getQuarter(getInUTCDays(LocalDate.of(2012, 11, 18))) === 4)
   }
 
   test("get month") {
-    val c = Calendar.getInstance()
-    c.set(2015, 2, 18, 0, 0, 0)
-    assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 3)
-    c.set(2012, 11, 18, 0, 0, 0)
-    assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 12)
+    assert(getMonth(getInUTCDays(LocalDate.of(2015, 3, 18))) === 3)
+    assert(getMonth(getInUTCDays(LocalDate.of(2012, 12, 18))) === 12)
   }
 
   test("get day of month") {
-    val c = Calendar.getInstance()
-    c.set(2015, 2, 18, 0, 0, 0)
-    assert(getDayOfMonth(getInUTCDays(c.getTimeInMillis)) === 18)
-    c.set(2012, 11, 24, 0, 0, 0)
-    assert(getDayOfMonth(getInUTCDays(c.getTimeInMillis)) === 24)
+    assert(getDayOfMonth(getInUTCDays(LocalDate.of(2015, 3, 18))) === 18)
+    assert(getDayOfMonth(getInUTCDays(LocalDate.of(2012, 12, 24))) === 24)
   }
 
   test("date add months") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 54c77dddc352..ce1dc6e159c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import java.io.File
-import java.sql.Timestamp
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.parquet.hadoop.ParquetOutputFormat
@@ -187,12 +186,12 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
       sql("insert into ts values (1, '2016-01-01 10:11:12.123456')")
       sql("insert into ts values (2, null)")
       sql("insert into ts values (3, '1965-01-01 10:11:12.123456')")
-      checkAnswer(
-        sql("select * from ts"),
-        Seq(
-          Row(1, Timestamp.valueOf("2016-01-01 10:11:12.123456")),
-          Row(2, null),
-          Row(3, Timestamp.valueOf("1965-01-01 10:11:12.123456"))))
+      val expected = Seq(
+        (1, "2016-01-01 10:11:12.123456"),
+        (2, null),
+        (3, "1965-01-01 10:11:12.123456"))
+        .toDS().select('_1, $"_2".cast("timestamp"))
+      checkAnswer(sql("select * from ts"), expected)
     }
 
     // The microsecond portion is truncated when written as TIMESTAMP_MILLIS.
@@ -206,30 +205,30 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
         sql("insert into ts values (5, '1965-01-01 10:11:12.1')")
         sql("insert into ts values (6, '1965-01-01 10:11:12.123456789')")
         sql("insert into ts values (7, '0001-01-01 00:00:00.000000')")
-        checkAnswer(
-          sql("select * from ts"),
-          Seq(
-            Row(1, Timestamp.valueOf("2016-01-01 10:11:12.123")),
-            Row(2, null),
-            Row(3, Timestamp.valueOf("1965-01-01 10:11:12.125")),
-            Row(4, Timestamp.valueOf("1965-01-01 10:11:12.125")),
-            Row(5, Timestamp.valueOf("1965-01-01 10:11:12.1")),
-            Row(6, Timestamp.valueOf("1965-01-01 10:11:12.123")),
-            Row(7, Timestamp.valueOf("0001-01-01 00:00:00.000"))))
+        val expected = Seq(
+          (1, "2016-01-01 10:11:12.123"),
+          (2, null),
+          (3, "1965-01-01 10:11:12.125"),
+          (4, "1965-01-01 10:11:12.125"),
+          (5, "1965-01-01 10:11:12.1"),
+          (6, "1965-01-01 10:11:12.123"),
+          (7, "0001-01-01 00:00:00.000"))
+          .toDS().select('_1, $"_2".cast("timestamp"))
+        checkAnswer(sql("select * from ts"), expected)
 
         // Read timestamps that were encoded as TIMESTAMP_MILLIS annotated as INT64
         // with PARQUET_INT64_AS_TIMESTAMP_MILLIS set to false.
         withSQLConf(SQLConf.PARQUET_INT64_AS_TIMESTAMP_MILLIS.key -> "false") {
-          checkAnswer(
-            sql("select * from ts"),
-            Seq(
-              Row(1, Timestamp.valueOf("2016-01-01 10:11:12.123")),
-              Row(2, null),
-              Row(3, Timestamp.valueOf("1965-01-01 10:11:12.125")),
-              Row(4, Timestamp.valueOf("1965-01-01 10:11:12.125")),
-              Row(5, Timestamp.valueOf("1965-01-01 10:11:12.1")),
-              Row(6, Timestamp.valueOf("1965-01-01 10:11:12.123")),
-              Row(7, Timestamp.valueOf("0001-01-01 00:00:00.000"))))
+          val expected = Seq(
+            (1, "2016-01-01 10:11:12.123"),
+            (2, null),
+            (3, "1965-01-01 10:11:12.125"),
+            (4, "1965-01-01 10:11:12.125"),
+            (5, "1965-01-01 10:11:12.1"),
+            (6, "1965-01-01 10:11:12.123"),
+            (7, "0001-01-01 00:00:00.000"))
+            .toDS().select('_1, $"_2".cast("timestamp"))
+          checkAnswer(sql("select * from ts"), expected)
         }
       }
     }

From 1b575ef5d1b8e3e672b2fca5c354d6678bd78bd1 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Thu, 17 Jan 2019 12:29:17 -0600
Subject: [PATCH 0315/1072] [SPARK-26621][CORE] Use ConfigEntry for hardcoded
 configs for shuffle categories.

## What changes were proposed in this pull request?

The PR makes hardcoded `spark.shuffle` configs to use ConfigEntry and put them in the config package.

## How was this patch tested?
Existing unit tests

Closes #23550 from 10110346/ConfigEntry_shuffle.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sort/BypassMergeSortShuffleWriter.java    |  3 +-
 .../shuffle/sort/ShuffleExternalSorter.java   |  2 +-
 .../shuffle/sort/UnsafeShuffleWriter.java     | 10 +--
 .../org/apache/spark/MapOutputTracker.scala   | 13 ++-
 .../scala/org/apache/spark/SparkEnv.scala     |  4 +-
 .../spark/internal/config/package.scala       | 90 +++++++++++++++++++
 .../spark/serializer/SerializerManager.scala  |  4 +-
 .../shuffle/BlockStoreShuffleReader.scala     |  2 +-
 .../shuffle/sort/SortShuffleWriter.scala      |  4 +-
 .../apache/spark/storage/BlockManager.scala   |  2 +-
 .../scala/org/apache/spark/util/Utils.scala   |  2 +-
 .../collection/ExternalAppendOnlyMap.scala    |  7 +-
 .../util/collection/ExternalSorter.scala      |  6 +-
 .../spark/util/collection/Spillable.scala     |  2 +-
 .../sort/UnsafeShuffleWriterSuite.java        | 15 ++--
 .../map/AbstractBytesToBytesMapSuite.java     |  7 +-
 .../sort/UnsafeExternalSorterSuite.java       |  2 +-
 .../apache/spark/ContextCleanerSuite.scala    |  6 +-
 .../spark/ExternalShuffleServiceSuite.scala   |  6 +-
 .../apache/spark/MapOutputTrackerSuite.scala  |  7 +-
 .../scala/org/apache/spark/ShuffleSuite.scala | 14 +--
 .../org/apache/spark/SortShuffleSuite.scala   |  3 +-
 .../security/CryptoStreamUtilsSuite.scala     |  4 +-
 .../serializer/KryoSerializerSuite.scala      |  4 +-
 .../BlockStoreShuffleReaderSuite.scala        |  5 +-
 .../spark/storage/BlockManagerSuite.scala     |  6 +-
 .../ExternalAppendOnlyMapSuite.scala          | 22 ++---
 .../util/collection/ExternalSorterSuite.scala | 48 +++++-----
 .../execution/UnsafeExternalRowSorter.java    |  4 +-
 .../RowBasedKeyValueBatchSuite.java           |  5 +-
 .../sql/execution/UnsafeKVExternalSorter.java |  7 +-
 .../exchange/ShuffleExchangeExec.scala        |  3 +-
 .../execution/UnsafeRowSerializerSuite.scala  |  8 +-
 33 files changed, 211 insertions(+), 116 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index 997bc9e3f043..32b446785a9f 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -34,6 +34,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.config.package$;
 import org.apache.spark.Partitioner;
 import org.apache.spark.ShuffleDependency;
 import org.apache.spark.SparkConf;
@@ -104,7 +105,7 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
       SparkConf conf,
       ShuffleWriteMetricsReporter writeMetrics) {
     // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
-    this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024;
+    this.fileBufferSize = (int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
     this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true);
     this.blockManager = blockManager;
     final ShuffleDependency<K, V, V> dep = handle.dependency();
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index dc43215373e1..024756087bf7 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -129,7 +129,7 @@ final class ShuffleExternalSorter extends MemoryConsumer {
         (int) conf.get(package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD());
     this.writeMetrics = writeMetrics;
     this.inMemSorter = new ShuffleInMemorySorter(
-      this, initialSize, conf.getBoolean("spark.shuffle.sort.useRadixSort", true));
+      this, initialSize, (boolean) conf.get(package$.MODULE$.SHUFFLE_SORT_USE_RADIXSORT()));
     this.peakMemoryUsedBytes = getMemoryUsage();
     this.diskWriteBufferSize =
         (int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE());
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 4b0c74341551..36081069b0e7 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -37,6 +37,7 @@
 
 import org.apache.spark.*;
 import org.apache.spark.annotation.Private;
+import org.apache.spark.internal.config.package$;
 import org.apache.spark.io.CompressionCodec;
 import org.apache.spark.io.CompressionCodec$;
 import org.apache.spark.io.NioBufferedFileInputStream;
@@ -55,7 +56,6 @@
 import org.apache.spark.storage.TimeTrackingOutputStream;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.util.Utils;
-import org.apache.spark.internal.config.package$;
 
 @Private
 public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
@@ -143,8 +143,8 @@ public UnsafeShuffleWriter(
     this.taskContext = taskContext;
     this.sparkConf = sparkConf;
     this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
-    this.initialSortBufferSize = sparkConf.getInt("spark.shuffle.sort.initialBufferSize",
-                                                  DEFAULT_INITIAL_SORT_BUFFER_SIZE);
+    this.initialSortBufferSize =
+      (int) sparkConf.get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE());
     this.inputBufferSizeInBytes =
       (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
     this.outputBufferSizeInBytes =
@@ -282,10 +282,10 @@ void forceSorterToSpill() throws IOException {
    * @return the partition lengths in the merged file.
    */
   private long[] mergeSpills(SpillInfo[] spills, File outputFile) throws IOException {
-    final boolean compressionEnabled = sparkConf.getBoolean("spark.shuffle.compress", true);
+    final boolean compressionEnabled = (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_COMPRESS());
     final CompressionCodec compressionCodec = CompressionCodec$.MODULE$.createCodec(sparkConf);
     final boolean fastMergeEnabled =
-      sparkConf.getBoolean("spark.shuffle.unsafe.fastMergeEnabled", true);
+      (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_UNDAFE_FAST_MERGE_ENABLE());
     final boolean fastMergeIsSupported = !compressionEnabled ||
       CompressionCodec$.MODULE$.supportsConcatenationOfSerializedStreams(compressionCodec);
     final boolean encryptionEnabled = blockManager.serializerManager().encryptionEnabled();
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 1c4fa4bc6541..a8b8e9688ac1 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -322,11 +322,10 @@ private[spark] class MapOutputTrackerMaster(
   extends MapOutputTracker(conf) {
 
   // The size at which we use Broadcast to send the map output statuses to the executors
-  private val minSizeForBroadcast =
-    conf.getSizeAsBytes("spark.shuffle.mapOutput.minSizeForBroadcast", "512k").toInt
+  private val minSizeForBroadcast = conf.get(SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST).toInt
 
   /** Whether to compute locality preferences for reduce tasks */
-  private val shuffleLocalityEnabled = conf.getBoolean("spark.shuffle.reduceLocality.enabled", true)
+  private val shuffleLocalityEnabled = conf.get(SHUFFLE_REDUCE_LOCALITY_ENABLE)
 
   // Number of map and reduce tasks above which we do not assign preferred locations based on map
   // output sizes. We limit the size of jobs for which assign preferred locations as computing the
@@ -353,7 +352,7 @@ private[spark] class MapOutputTrackerMaster(
   // Thread pool used for handling map output status requests. This is a separate thread pool
   // to ensure we don't block the normal dispatcher threads.
   private val threadpool: ThreadPoolExecutor = {
-    val numThreads = conf.getInt("spark.shuffle.mapOutput.dispatcher.numThreads", 8)
+    val numThreads = conf.get(SHUFFLE_MAPOUTPUT_DISPATCHER_NUM_THREADS)
     val pool = ThreadUtils.newDaemonFixedThreadPool(numThreads, "map-output-dispatcher")
     for (i <- 0 until numThreads) {
       pool.execute(new MessageLoop)
@@ -364,9 +363,9 @@ private[spark] class MapOutputTrackerMaster(
   // Make sure that we aren't going to exceed the max RPC message size by making sure
   // we use broadcast to send large map output statuses.
   if (minSizeForBroadcast > maxRpcMessageSize) {
-    val msg = s"spark.shuffle.mapOutput.minSizeForBroadcast ($minSizeForBroadcast bytes) must " +
-      s"be <= spark.rpc.message.maxSize ($maxRpcMessageSize bytes) to prevent sending an rpc " +
-      "message that is too large."
+    val msg = s"${SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST.key} ($minSizeForBroadcast bytes) " +
+      s"must be <= spark.rpc.message.maxSize ($maxRpcMessageSize bytes) to prevent sending an " +
+      "rpc message that is too large."
     logError(msg)
     throw new IllegalArgumentException(msg)
   }
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index ff4a043f6f9a..36998d159616 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.python.PythonWorkerFactory
 import org.apache.spark.broadcast.BroadcastManager
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
 import org.apache.spark.memory.{MemoryManager, UnifiedMemoryManager}
 import org.apache.spark.metrics.MetricsSystem
@@ -318,7 +318,7 @@ object SparkEnv extends Logging {
     val shortShuffleMgrNames = Map(
       "sort" -> classOf[org.apache.spark.shuffle.sort.SortShuffleManager].getName,
       "tungsten-sort" -> classOf[org.apache.spark.shuffle.sort.SortShuffleManager].getName)
-    val shuffleMgrName = conf.get("spark.shuffle.manager", "sort")
+    val shuffleMgrName = conf.get(config.SHUFFLE_MANAGER)
     val shuffleMgrClass =
       shortShuffleMgrNames.getOrElse(shuffleMgrName.toLowerCase(Locale.ROOT), shuffleMgrName)
     val shuffleManager = instantiateClass[ShuffleManager](shuffleMgrClass)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 99ce22042718..6488d5300472 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -749,6 +749,96 @@ package object config {
       .timeConf(TimeUnit.SECONDS)
       .createWithDefaultString("1h")
 
+  private[spark] val SHUFFLE_SORT_INIT_BUFFER_SIZE =
+    ConfigBuilder("spark.shuffle.sort.initialBufferSize")
+      .internal()
+      .intConf
+      .checkValue(v => v > 0, "The value should be a positive integer.")
+      .createWithDefault(4096)
+
+  private[spark] val SHUFFLE_COMPRESS =
+    ConfigBuilder("spark.shuffle.compress")
+      .doc("Whether to compress shuffle output. Compression will use " +
+        "spark.io.compression.codec.")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val SHUFFLE_SPILL_COMPRESS =
+    ConfigBuilder("spark.shuffle.spill.compress")
+      .doc("Whether to compress data spilled during shuffles. Compression will use " +
+        "spark.io.compression.codec.")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val SHUFFLE_SPILL_INITIAL_MEM_THRESHOLD =
+    ConfigBuilder("spark.shuffle.spill.initialMemoryThreshold")
+      .internal()
+      .doc("Initial threshold for the size of a collection before we start tracking its " +
+        "memory usage.")
+      .longConf
+      .createWithDefault(5 * 1024 * 1024)
+
+  private[spark] val SHUFFLE_SPILL_BATCH_SIZE =
+    ConfigBuilder("spark.shuffle.spill.batchSize")
+      .internal()
+      .doc("Size of object batches when reading/writing from serializers.")
+      .longConf
+      .createWithDefault(10000)
+
+  private[spark] val SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD =
+    ConfigBuilder("spark.shuffle.sort.bypassMergeThreshold")
+      .doc("In the sort-based shuffle manager, avoid merge-sorting data if there is no " +
+        "map-side aggregation and there are at most this many reduce partitions")
+      .intConf
+      .createWithDefault(200)
+
+  private[spark] val SHUFFLE_MANAGER =
+    ConfigBuilder("spark.shuffle.manager")
+      .stringConf
+      .createWithDefault("sort")
+
+  private[spark] val SHUFFLE_REDUCE_LOCALITY_ENABLE =
+    ConfigBuilder("spark.shuffle.reduceLocality.enabled")
+      .doc("Whether to compute locality preferences for reduce tasks")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST =
+    ConfigBuilder("spark.shuffle.mapOutput.minSizeForBroadcast")
+      .doc("The size at which we use Broadcast to send the map output statuses to the executors.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("512k")
+
+  private[spark] val SHUFFLE_MAPOUTPUT_DISPATCHER_NUM_THREADS =
+    ConfigBuilder("spark.shuffle.mapOutput.dispatcher.numThreads")
+      .intConf
+      .createWithDefault(8)
+
+  private[spark] val SHUFFLE_DETECT_CORRUPT =
+    ConfigBuilder("spark.shuffle.detectCorrupt")
+      .doc("Whether to detect any corruption in fetched blocks.")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val SHUFFLE_SYNC =
+    ConfigBuilder("spark.shuffle.sync")
+      .doc("Whether to force outstanding writes to disk.")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val SHUFFLE_UNDAFE_FAST_MERGE_ENABLE =
+    ConfigBuilder("spark.shuffle.unsafe.fastMergeEnabled")
+      .doc("Whether to perform a fast spill merge.")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val SHUFFLE_SORT_USE_RADIXSORT =
+    ConfigBuilder("spark.shuffle.sort.useRadixSort")
+      .doc("Whether to use radix sort for sorting in-memory partition ids. Radix sort is much " +
+        "faster, but requires additional memory to be reserved memory as pointers are added.")
+      .booleanConf
+      .createWithDefault(true)
+
   private[spark] val SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS =
     ConfigBuilder("spark.shuffle.minNumPartitionsToHighlyCompress")
       .internal()
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 1e233ca5c2a4..3e3c387911d3 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -66,11 +66,11 @@ private[spark] class SerializerManager(
   // Whether to compress broadcast variables that are stored
   private[this] val compressBroadcast = conf.get(config.BROADCAST_COMPRESS)
   // Whether to compress shuffle output that are stored
-  private[this] val compressShuffle = conf.getBoolean("spark.shuffle.compress", true)
+  private[this] val compressShuffle = conf.get(config.SHUFFLE_COMPRESS)
   // Whether to compress RDD partitions that are stored serialized
   private[this] val compressRdds = conf.get(config.RDD_COMPRESS)
   // Whether to compress shuffle output temporarily spilled to disk
-  private[this] val compressShuffleSpill = conf.getBoolean("spark.shuffle.spill.compress", true)
+  private[this] val compressShuffleSpill = conf.get(config.SHUFFLE_SPILL_COMPRESS)
 
   /* The compression codec to use. Note that the "lazy" val is necessary because we want to delay
    * the initialization of the compression codec until it is first used. The reason is that a Spark
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 37f5169d095d..daafe305c8f8 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -54,7 +54,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
       SparkEnv.get.conf.get(config.REDUCER_MAX_REQS_IN_FLIGHT),
       SparkEnv.get.conf.get(config.REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS),
       SparkEnv.get.conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM),
-      SparkEnv.get.conf.getBoolean("spark.shuffle.detectCorrupt", true),
+      SparkEnv.get.conf.get(config.SHUFFLE_DETECT_CORRUPT),
       readMetrics)
 
     val serializerInstance = dep.serializer.newInstance()
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 274399b9cc1f..16058de8bf3f 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.shuffle.sort
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.shuffle.{BaseShuffleHandle, IndexShuffleBlockResolver, ShuffleWriter}
 import org.apache.spark.storage.ShuffleBlockId
@@ -108,7 +108,7 @@ private[spark] object SortShuffleWriter {
     if (dep.mapSideCombine) {
       false
     } else {
-      val bypassMergeThreshold: Int = conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+      val bypassMergeThreshold: Int = conf.get(config.SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD)
       dep.partitioner.numPartitions <= bypassMergeThreshold
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 5bfe778250b6..8f993bfbf08a 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -942,7 +942,7 @@ private[spark] class BlockManager(
       serializerInstance: SerializerInstance,
       bufferSize: Int,
       writeMetrics: ShuffleWriteMetricsReporter): DiskBlockObjectWriter = {
-    val syncWrites = conf.getBoolean("spark.shuffle.sync", false)
+    val syncWrites = conf.get(config.SHUFFLE_SYNC)
     new DiskBlockObjectWriter(file, serializerManager, serializerInstance, bufferSize,
       syncWrites, writeMetrics, blockId)
   }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 7416559bb808..1a6dc1fa0cbd 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -812,7 +812,7 @@ private[spark] object Utils extends Logging {
     } else {
       if (conf.getenv("MESOS_SANDBOX") != null && shuffleServiceEnabled) {
         logInfo("MESOS_SANDBOX available but not using provided Mesos sandbox because " +
-          "spark.shuffle.service.enabled is enabled.")
+          s"${config.SHUFFLE_SERVICE_ENABLED.key} is enabled.")
       }
       // In non-Yarn mode (or for the driver in yarn-client mode), we cannot trust the user
       // configuration to point to a secure directory. So create a subdirectory with restricted
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 19ff109b673e..1ba3b7875f8d 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -29,7 +29,7 @@ import com.google.common.io.ByteStreams
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.serializer.{DeserializationStream, Serializer, SerializerManager}
 import org.apache.spark.storage.{BlockId, BlockManager}
 import org.apache.spark.util.CompletionIterator
@@ -97,15 +97,14 @@ class ExternalAppendOnlyMap[K, V, C](
    * NOTE: Setting this too low can cause excessive copying when serializing, since some serializers
    * grow internal data structures by growing + copying every time the number of objects doubles.
    */
-  private val serializerBatchSize = sparkConf.getLong("spark.shuffle.spill.batchSize", 10000)
+  private val serializerBatchSize = sparkConf.get(config.SHUFFLE_SPILL_BATCH_SIZE)
 
   // Number of bytes spilled in total
   private var _diskBytesSpilled = 0L
   def diskBytesSpilled: Long = _diskBytesSpilled
 
   // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
-  private val fileBufferSize =
-    sparkConf.getSizeAsKb("spark.shuffle.file.buffer", "32k").toInt * 1024
+  private val fileBufferSize = sparkConf.get(config.SHUFFLE_FILE_BUFFER_SIZE).toInt * 1024
 
   // Write metrics
   private val writeMetrics: ShuffleWriteMetrics = new ShuffleWriteMetrics()
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 46279e79d78d..4806c1396725 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -27,7 +27,7 @@ import com.google.common.io.ByteStreams
 
 import org.apache.spark._
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.serializer._
 import org.apache.spark.storage.{BlockId, DiskBlockObjectWriter}
 
@@ -109,7 +109,7 @@ private[spark] class ExternalSorter[K, V, C](
   private val serInstance = serializer.newInstance()
 
   // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
-  private val fileBufferSize = conf.getSizeAsKb("spark.shuffle.file.buffer", "32k").toInt * 1024
+  private val fileBufferSize = conf.get(config.SHUFFLE_FILE_BUFFER_SIZE).toInt * 1024
 
   // Size of object batches when reading/writing from serializers.
   //
@@ -118,7 +118,7 @@ private[spark] class ExternalSorter[K, V, C](
   //
   // NOTE: Setting this too low can cause excessive copying when serializing, since some serializers
   // grow internal data structures by growing + copying every time the number of objects doubles.
-  private val serializerBatchSize = conf.getLong("spark.shuffle.spill.batchSize", 10000)
+  private val serializerBatchSize = conf.get(config.SHUFFLE_SPILL_BATCH_SIZE)
 
   // Data structures to store in-memory objects before we spill. Depending on whether we have an
   // Aggregator set, we either put objects into an AppendOnlyMap where we combine them, or we
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
index 81457b53cd81..bfc0face5d8e 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
@@ -51,7 +51,7 @@ private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
   // Initial threshold for the size of a collection before we start tracking its memory usage
   // For testing only
   private[this] val initialMemoryThreshold: Long =
-    SparkEnv.get.conf.getLong("spark.shuffle.spill.initialMemoryThreshold", 5 * 1024 * 1024)
+    SparkEnv.get.conf.get(SHUFFLE_SPILL_INITIAL_MEM_THRESHOLD)
 
   // Force this collection to spill when there are this many elements in memory
   // For testing only
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index aa5082f1ac7f..f34ae99c992d 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -45,6 +45,7 @@
 import org.apache.spark.io.LZ4CompressionCodec;
 import org.apache.spark.io.LZFCompressionCodec;
 import org.apache.spark.io.SnappyCompressionCodec;
+import org.apache.spark.internal.config.package$;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.memory.TestMemoryManager;
 import org.apache.spark.network.util.LimitedInputStream;
@@ -184,7 +185,7 @@ private List<Tuple2<Object, Object>> readRecordsFromFile() throws IOException {
         fin.getChannel().position(startOffset);
         InputStream in = new LimitedInputStream(fin, partitionSize);
         in = blockManager.serializerManager().wrapForEncryption(in);
-        if (conf.getBoolean("spark.shuffle.compress", true)) {
+        if ((boolean) conf.get(package$.MODULE$.SHUFFLE_COMPRESS())) {
           in = CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(in);
         }
         try (DeserializationStream recordsStream = serializer.newInstance().deserializeStream(in)) {
@@ -279,12 +280,12 @@ private void testMergingSpills(
       String compressionCodecName,
       boolean encrypt) throws Exception {
     if (compressionCodecName != null) {
-      conf.set("spark.shuffle.compress", "true");
+      conf.set(package$.MODULE$.SHUFFLE_COMPRESS(), true);
       conf.set("spark.io.compression.codec", compressionCodecName);
     } else {
-      conf.set("spark.shuffle.compress", "false");
+      conf.set(package$.MODULE$.SHUFFLE_COMPRESS(), false);
     }
-    conf.set(org.apache.spark.internal.config.package$.MODULE$.IO_ENCRYPTION_ENABLED(), encrypt);
+    conf.set(package$.MODULE$.IO_ENCRYPTION_ENABLED(), encrypt);
 
     SerializerManager manager;
     if (encrypt) {
@@ -390,7 +391,7 @@ public void mergeSpillsWithFileStreamAndCompressionAndEncryption() throws Except
 
   @Test
   public void mergeSpillsWithCompressionAndEncryptionSlowPath() throws Exception {
-    conf.set("spark.shuffle.unsafe.fastMergeEnabled", "false");
+    conf.set(package$.MODULE$.SHUFFLE_UNDAFE_FAST_MERGE_ENABLE(), false);
     testMergingSpills(false, LZ4CompressionCodec.class.getName(), true);
   }
 
@@ -430,14 +431,14 @@ public void writeEnoughDataToTriggerSpill() throws Exception {
 
   @Test
   public void writeEnoughRecordsToTriggerSortBufferExpansionAndSpillRadixOff() throws Exception {
-    conf.set("spark.shuffle.sort.useRadixSort", "false");
+    conf.set(package$.MODULE$.SHUFFLE_SORT_USE_RADIXSORT(), false);
     writeEnoughRecordsToTriggerSortBufferExpansionAndSpill();
     assertEquals(2, spillFilesCreated.size());
   }
 
   @Test
   public void writeEnoughRecordsToTriggerSortBufferExpansionAndSpillRadixOn() throws Exception {
-    conf.set("spark.shuffle.sort.useRadixSort", "true");
+    conf.set(package$.MODULE$.SHUFFLE_SORT_USE_RADIXSORT(), true);
     writeEnoughRecordsToTriggerSortBufferExpansionAndSpill();
     assertEquals(3, spillFilesCreated.size());
   }
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index ecfebf8f8287..e4e0d47fc101 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -45,6 +45,7 @@
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.util.Utils;
+import org.apache.spark.internal.config.package$;
 
 import static org.hamcrest.Matchers.greaterThan;
 import static org.junit.Assert.assertEquals;
@@ -63,7 +64,7 @@ public abstract class AbstractBytesToBytesMapSuite {
   private TaskMemoryManager taskMemoryManager;
   private SerializerManager serializerManager = new SerializerManager(
       new JavaSerializer(new SparkConf()),
-      new SparkConf().set("spark.shuffle.spill.compress", "false"));
+      new SparkConf().set(package$.MODULE$.SHUFFLE_SPILL_COMPRESS(), false));
   private static final long PAGE_SIZE_BYTES = 1L << 26; // 64 megabytes
 
   final LinkedList<File> spillFilesCreated = new LinkedList<>();
@@ -79,8 +80,8 @@ public void setup() {
         new SparkConf()
           .set("spark.memory.offHeap.enabled", "" + useOffHeapMemoryAllocator())
           .set("spark.memory.offHeap.size", "256mb")
-          .set("spark.shuffle.spill.compress", "false")
-          .set("spark.shuffle.compress", "false"));
+          .set(package$.MODULE$.SHUFFLE_SPILL_COMPRESS(), false)
+          .set(package$.MODULE$.SHUFFLE_COMPRESS(), false));
     taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
 
     tempDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "unsafe-test");
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index d1b29d90ad91..a56743f738b6 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -63,7 +63,7 @@ public class UnsafeExternalSorterSuite {
   final TaskMemoryManager taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
   final SerializerManager serializerManager = new SerializerManager(
     new JavaSerializer(conf),
-    conf.clone().set("spark.shuffle.spill.compress", "false"));
+    conf.clone().set(package$.MODULE$.SHUFFLE_SPILL_COMPRESS(), false));
   // Use integer comparison for comparing prefixes (which are partition ids, in this case)
   final PrefixComparator prefixComparator = PrefixComparators.LONG;
   // Since the key fits within the 8-byte prefix, we don't need to do any record comparison, so
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 1fcc975ab39a..9e282844f9fe 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -28,7 +28,7 @@ import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.PatienceConfiguration
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rdd.{RDD, ReliableRDDCheckpointData}
 import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.storage._
@@ -49,7 +49,7 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So
     .set("spark.cleaner.referenceTracking.blocking", "true")
     .set("spark.cleaner.referenceTracking.blocking.shuffle", "true")
     .set("spark.cleaner.referenceTracking.cleanCheckpoints", "true")
-    .set("spark.shuffle.manager", shuffleManager.getName)
+    .set(config.SHUFFLE_MANAGER, shuffleManager.getName)
 
   before {
     sc = new SparkContext(conf)
@@ -319,7 +319,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
       .setAppName("ContextCleanerSuite")
       .set("spark.cleaner.referenceTracking.blocking", "true")
       .set("spark.cleaner.referenceTracking.blocking.shuffle", "true")
-      .set("spark.shuffle.manager", shuffleManager.getName)
+      .set(config.SHUFFLE_MANAGER, shuffleManager.getName)
     sc = new SparkContext(conf2)
 
     val numRdds = 10
diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
index 462d5f5604ae..262e2a7ef5c9 100644
--- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -42,9 +42,9 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll {
     val transportContext = new TransportContext(transportConf, rpcHandler)
     server = transportContext.createServer()
 
-    conf.set("spark.shuffle.manager", "sort")
-    conf.set(config.SHUFFLE_SERVICE_ENABLED.key, "true")
-    conf.set(config.SHUFFLE_SERVICE_PORT.key, server.getPort.toString)
+    conf.set(config.SHUFFLE_MANAGER, "sort")
+    conf.set(config.SHUFFLE_SERVICE_ENABLED, true)
+    conf.set(config.SHUFFLE_SERVICE_PORT, server.getPort)
   }
 
   override def afterAll() {
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index c088da8fbf3b..adaa069ff227 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -24,6 +24,7 @@ import org.mockito.Mockito._
 
 import org.apache.spark.LocalSparkContext._
 import org.apache.spark.broadcast.BroadcastManager
+import org.apache.spark.internal.config._
 import org.apache.spark.rpc.{RpcAddress, RpcCallContext, RpcEnv}
 import org.apache.spark.scheduler.{CompressedMapStatus, MapStatus}
 import org.apache.spark.shuffle.FetchFailedException
@@ -171,7 +172,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val newConf = new SparkConf
     newConf.set("spark.rpc.message.maxSize", "1")
     newConf.set("spark.rpc.askTimeout", "1") // Fail fast
-    newConf.set("spark.shuffle.mapOutput.minSizeForBroadcast", "1048576")
+    newConf.set(SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST, 1048576L)
 
     val masterTracker = newTrackerMaster(newConf)
     val rpcEnv = createRpcEnv("spark")
@@ -200,7 +201,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val newConf = new SparkConf
     newConf.set("spark.rpc.message.maxSize", "1")
     newConf.set("spark.rpc.askTimeout", "1") // Fail fast
-    newConf.set("spark.shuffle.mapOutput.minSizeForBroadcast", Int.MaxValue.toString)
+    newConf.set(SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST, Int.MaxValue.toLong)
 
     intercept[IllegalArgumentException] { newTrackerMaster(newConf) }
   }
@@ -244,7 +245,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val newConf = new SparkConf
     newConf.set("spark.rpc.message.maxSize", "1")
     newConf.set("spark.rpc.askTimeout", "1") // Fail fast
-    newConf.set("spark.shuffle.mapOutput.minSizeForBroadcast", "10240") // 10 KiB << 1MiB framesize
+    newConf.set(SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST, 10240L) // 10 KiB << 1MiB framesize
 
     // needs TorrentBroadcast so need a SparkContext
     withSpark(new SparkContext("local", "MapOutputTrackerSuite", newConf)) { sc =>
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 3203f8f2786f..8b1084a8edc7 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.{Callable, CyclicBarrier, Executors, ExecutorService
 import org.scalatest.Matchers
 
 import org.apache.spark.ShuffleSuite.NonJavaSerializableClass
-import org.apache.spark.internal.config.SERIALIZER
+import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD, SubtractedRDD}
@@ -42,7 +42,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
   conf.set(TEST_NO_STAGE_RETRY, true)
 
   test("groupByKey without compression") {
-    val myConf = conf.clone().set("spark.shuffle.compress", "false")
+    val myConf = conf.clone().set(config.SHUFFLE_COMPRESS, false)
     sc = new SparkContext("local", "test", myConf)
     val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)), 4)
     val groups = pairs.groupByKey(4).collect()
@@ -216,7 +216,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
 
   test("sort with Java non serializable class - Kryo") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    val myConf = conf.clone().set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
+    val myConf = conf.clone().set(config.SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
     sc = new SparkContext("local-cluster[2,1,1024]", "test", myConf)
     val a = sc.parallelize(1 to 10, 2)
     val b = a.map { x =>
@@ -252,8 +252,8 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
       val myConf = conf.clone()
         .setAppName("test")
         .setMaster("local")
-        .set("spark.shuffle.spill.compress", shuffleSpillCompress.toString)
-        .set("spark.shuffle.compress", shuffleCompress.toString)
+        .set(config.SHUFFLE_SPILL_COMPRESS, shuffleSpillCompress)
+        .set(config.SHUFFLE_COMPRESS, shuffleCompress)
       resetSparkContext()
       sc = new SparkContext(myConf)
       val diskBlockManager = sc.env.blockManager.diskBlockManager
@@ -263,8 +263,8 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
         assert(diskBlockManager.getAllFiles().nonEmpty)
       } catch {
         case e: Exception =>
-          val errMsg = s"Failed with spark.shuffle.spill.compress=$shuffleSpillCompress," +
-            s" spark.shuffle.compress=$shuffleCompress"
+          val errMsg = s"Failed with ${config.SHUFFLE_SPILL_COMPRESS.key}=$shuffleSpillCompress," +
+            s" ${config.SHUFFLE_COMPRESS.key}=$shuffleCompress"
           throw new Exception(errMsg, e)
       }
     }
diff --git a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
index c0126e41ff7f..1aceda498d7c 100644
--- a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
@@ -25,6 +25,7 @@ import org.apache.commons.io.FileUtils
 import org.apache.commons.io.filefilter.TrueFileFilter
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.internal.config
 import org.apache.spark.rdd.ShuffledRDD
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.shuffle.sort.SortShuffleManager
@@ -42,7 +43,7 @@ class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {
     // before/after a test, it could return the same directory even if this property
     // is configured.
     Utils.clearLocalRootDirs()
-    conf.set("spark.shuffle.manager", "sort")
+    conf.set(config.SHUFFLE_MANAGER, "sort")
   }
 
   override def beforeEach(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala b/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
index e5d1bf4fde9e..abccb8e9bbf2 100644
--- a/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
@@ -75,8 +75,8 @@ class CryptoStreamUtilsSuite extends SparkFunSuite {
 
   test("serializer manager integration") {
     val conf = createConf()
-      .set("spark.shuffle.compress", "true")
-      .set("spark.shuffle.spill.compress", "true")
+      .set(SHUFFLE_COMPRESS, true)
+      .set(SHUFFLE_SPILL_COMPRESS, true)
 
     val plainStr = "hello world"
     val blockId = new TempShuffleBlockId(UUID.randomUUID())
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 41fb405bd62c..16eec7e0bea1 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -524,8 +524,8 @@ class KryoSerializerAutoResetDisabledSuite extends SparkFunSuite with SharedSpar
   conf.set(SERIALIZER, classOf[KryoSerializer].getName)
   conf.set(KRYO_USER_REGISTRATORS, classOf[RegistratorWithoutAutoReset].getName)
   conf.set(KRYO_REFERENCE_TRACKING, true)
-  conf.set("spark.shuffle.manager", "sort")
-  conf.set("spark.shuffle.sort.bypassMergeThreshold", "200")
+  conf.set(SHUFFLE_MANAGER, "sort")
+  conf.set(SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD, 200)
 
   test("sort-shuffle with bypassMergeSort (SPARK-7873)") {
     val myObject = ("Hello", "World")
diff --git a/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
index eb97d5a1e507..6d2ef17a7a79 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer
 import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark._
+import org.apache.spark.internal.config
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.serializer.{JavaSerializer, SerializerManager}
 import org.apache.spark.storage.{BlockManager, BlockManagerId, ShuffleBlockId}
@@ -123,8 +124,8 @@ class BlockStoreShuffleReaderSuite extends SparkFunSuite with LocalSparkContext
     val serializerManager = new SerializerManager(
       serializer,
       new SparkConf()
-        .set("spark.shuffle.compress", "false")
-        .set("spark.shuffle.spill.compress", "false"))
+        .set(config.SHUFFLE_COMPRESS, false)
+        .set(config.SHUFFLE_SPILL_COMPRESS, false))
 
     val taskContext = TaskContext.empty()
     val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 233a84e63ad0..04de0e41a341 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -826,7 +826,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
 
   test("block compression") {
     try {
-      conf.set("spark.shuffle.compress", "true")
+      conf.set(SHUFFLE_COMPRESS, true)
       var store = makeBlockManager(20000, "exec1")
       store.putSingle(
         ShuffleBlockId(0, 0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
@@ -834,7 +834,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         "shuffle_0_0_0 was not compressed")
       stopBlockManager(store)
 
-      conf.set("spark.shuffle.compress", "false")
+      conf.set(SHUFFLE_COMPRESS, false)
       store = makeBlockManager(20000, "exec2")
       store.putSingle(
         ShuffleBlockId(0, 0, 0), new Array[Byte](10000), StorageLevel.MEMORY_ONLY_SER)
@@ -875,7 +875,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       assert(store.memoryStore.getSize("other_block") >= 10000, "other_block was compressed")
       stopBlockManager(store)
     } finally {
-      System.clearProperty("spark.shuffle.compress")
+      System.clearProperty(SHUFFLE_COMPRESS.key)
       System.clearProperty(BROADCAST_COMPRESS.key)
       System.clearProperty(RDD_COMPRESS.key)
     }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index de70153198b3..43abb56254dd 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -56,11 +56,11 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     // for a bug we had with bytes written past the last object in a batch (SPARK-2792)
     conf.set(SERIALIZER_OBJECT_STREAM_RESET, 1)
     conf.set(SERIALIZER, "org.apache.spark.serializer.JavaSerializer")
-    conf.set("spark.shuffle.spill.compress", codec.isDefined.toString)
-    conf.set("spark.shuffle.compress", codec.isDefined.toString)
+    conf.set(SHUFFLE_SPILL_COMPRESS, codec.isDefined)
+    conf.set(SHUFFLE_COMPRESS, codec.isDefined)
     codec.foreach { c => conf.set(IO_COMPRESSION_CODEC, c) }
     // Ensure that we actually have multiple batches per spill file
-    conf.set("spark.shuffle.spill.batchSize", "10")
+    conf.set(SHUFFLE_SPILL_BATCH_SIZE, 10L)
     conf
   }
 
@@ -253,7 +253,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
   private def testSimpleSpilling(codec: Option[String] = None, encrypt: Boolean = false): Unit = {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true, codec)  // Load defaults for Spark home
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 4).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 4)
     conf.set(IO_ENCRYPTION_ENABLED, encrypt)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
 
@@ -297,7 +297,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
   test("ExternalAppendOnlyMap shouldn't fail when forced to spill before calling its iterator") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val map = createExternalMap[String]
     val consumer = createExternalMap[String]
@@ -308,7 +308,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
   test("spilling with hash collisions") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val map = createExternalMap[String]
 
@@ -359,7 +359,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
   test("spilling with many hash collisions") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
     val map =
@@ -388,7 +388,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
   test("spilling with hash collisions using the Int.MaxValue key") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val map = createExternalMap[Int]
 
@@ -407,7 +407,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
   test("spilling with null keys and values") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val map = createExternalMap[Int]
 
@@ -533,7 +533,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
   test("SPARK-22713 external aggregation updates peak execution memory") {
     val spillThreshold = 1000
     val conf = createSparkConf(loadDefaults = false)
-      .set("spark.shuffle.spill.numElementsForceSpillThreshold", spillThreshold.toString)
+      .set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, spillThreshold)
     sc = new SparkContext("local", "test", conf)
     // No spilling
     AccumulatorSuite.verifyPeakExecutionMemorySet(sc, "external map without spilling") {
@@ -553,7 +553,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     val conf = createSparkConf(loadDefaults = false)
       .set("spark.memory.storageFraction", "0.999")
       .set(TEST_MEMORY, 471859200L)
-      .set("spark.shuffle.sort.bypassMergeThreshold", "0")
+      .set(SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD, 0)
     sc = new SparkContext("local", "test", conf)
     val N = 200000
     sc.parallelize(1 to N, 2)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index 30064090a30c..d6c1562a7c59 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -124,7 +124,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   test("spilling with hash collisions") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true, kryo = false)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
 
@@ -185,7 +185,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   test("spilling with many hash collisions") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true, kryo = false)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
     val agg = new Aggregator[FixedHashObject, Int, Int](_ => 1, _ + _, _ + _)
@@ -208,7 +208,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   test("spilling with hash collisions using the Int.MaxValue key") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true, kryo = false)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
 
@@ -234,7 +234,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   test("spilling with null keys and values") {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true, kryo = false)
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
 
@@ -276,10 +276,10 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
       conf.set(SERIALIZER_OBJECT_STREAM_RESET, 1)
       conf.set(SERIALIZER, classOf[JavaSerializer].getName)
     }
-    conf.set("spark.shuffle.sort.bypassMergeThreshold", "0")
+    conf.set(SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD, 0)
     // Ensure that we actually have multiple batches per spill file
-    conf.set("spark.shuffle.spill.batchSize", "10")
-    conf.set("spark.shuffle.spill.initialMemoryThreshold", "512")
+    conf.set(SHUFFLE_SPILL_BATCH_SIZE, 10L)
+    conf.set(SHUFFLE_SPILL_INITIAL_MEM_THRESHOLD, 512L)
     conf
   }
 
@@ -302,7 +302,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
    * =========================================== */
 
   private def emptyDataStream(conf: SparkConf) {
-    conf.set("spark.shuffle.manager", "sort")
+    conf.set(SHUFFLE_MANAGER, "sort")
     sc = new SparkContext("local", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
 
@@ -335,7 +335,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   private def fewElementsPerPartition(conf: SparkConf) {
-    conf.set("spark.shuffle.manager", "sort")
+    conf.set(SHUFFLE_MANAGER, "sort")
     sc = new SparkContext("local", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
 
@@ -377,8 +377,8 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
 
   private def emptyPartitionsWithSpilling(conf: SparkConf) {
     val size = 1000
-    conf.set("spark.shuffle.manager", "sort")
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_MANAGER, "sort")
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
 
@@ -402,8 +402,8 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
 
   private def testSpillingInLocalCluster(conf: SparkConf, numReduceTasks: Int) {
     val size = 5000
-    conf.set("spark.shuffle.manager", "sort")
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 4).toString)
+    conf.set(SHUFFLE_MANAGER, "sort")
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 4)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
 
     assertSpilled(sc, "reduceByKey") {
@@ -462,8 +462,8 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   private def cleanupIntermediateFilesInSorter(withFailures: Boolean): Unit = {
     val size = 1200
     val conf = createSparkConf(loadDefaults = false, kryo = false)
-    conf.set("spark.shuffle.manager", "sort")
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 4).toString)
+    conf.set(SHUFFLE_MANAGER, "sort")
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 4)
     sc = new SparkContext("local", "test", conf)
     val diskBlockManager = sc.env.blockManager.diskBlockManager
     val ord = implicitly[Ordering[Int]]
@@ -491,8 +491,8 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   private def cleanupIntermediateFilesInShuffle(withFailures: Boolean): Unit = {
     val size = 1200
     val conf = createSparkConf(loadDefaults = false, kryo = false)
-    conf.set("spark.shuffle.manager", "sort")
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 4).toString)
+    conf.set(SHUFFLE_MANAGER, "sort")
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 4)
     sc = new SparkContext("local", "test", conf)
     val diskBlockManager = sc.env.blockManager.diskBlockManager
     val data = sc.parallelize(0 until size, 2).map { i =>
@@ -527,9 +527,9 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
       withSpilling: Boolean) {
     val size = 1000
     if (withSpilling) {
-      conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+      conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     }
-    conf.set("spark.shuffle.manager", "sort")
+    conf.set(SHUFFLE_MANAGER, "sort")
     sc = new SparkContext("local", "test", conf)
     val agg =
       if (withPartialAgg) {
@@ -561,8 +561,8 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   private def sortWithoutBreakingSortingContracts(conf: SparkConf) {
     val size = 100000
     val conf = createSparkConf(loadDefaults = true, kryo = false)
-    conf.set("spark.shuffle.manager", "sort")
-    conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 2).toString)
+    conf.set(SHUFFLE_MANAGER, "sort")
+    conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
 
     // Using wrongOrdering to show integer overflow introduced exception.
@@ -619,8 +619,8 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
   test("sorting updates peak execution memory") {
     val spillThreshold = 1000
     val conf = createSparkConf(loadDefaults = false, kryo = false)
-      .set("spark.shuffle.manager", "sort")
-      .set("spark.shuffle.spill.numElementsForceSpillThreshold", spillThreshold.toString)
+      .set(SHUFFLE_MANAGER, "sort")
+      .set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, spillThreshold)
     sc = new SparkContext("local", "test", conf)
     // Avoid aggregating here to make sure we're not also using ExternalAppendOnlyMap
     // No spilling
@@ -641,7 +641,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     val conf = createSparkConf(loadDefaults = false, kryo = false)
       .set("spark.memory.storageFraction", "0.999")
       .set(TEST_MEMORY, 471859200L)
-      .set("spark.shuffle.sort.bypassMergeThreshold", "0")
+      .set(SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD, 0)
     sc = new SparkContext("local", "test", conf)
     val N = 200000
     val p = new org.apache.spark.HashPartitioner(2)
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
index 5395e4035e68..d6edddfc1ae6 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
@@ -40,7 +40,6 @@
 
 public final class UnsafeExternalRowSorter {
 
-  static final int DEFAULT_INITIAL_SORT_BUFFER_SIZE = 4096;
   /**
    * If positive, forces records to be spilled to disk at the given frequency (measured in numbers
    * of records). This is only intended to be used in tests.
@@ -112,8 +111,7 @@ private UnsafeExternalRowSorter(
       taskContext,
       recordComparatorSupplier,
       prefixComparator,
-      sparkEnv.conf().getInt("spark.shuffle.sort.initialBufferSize",
-                             DEFAULT_INITIAL_SORT_BUFFER_SIZE),
+      (int) sparkEnv.conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
       pageSizeBytes,
       (int) SparkEnv.get().conf().get(
         package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()),
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
index 8da778800bb9..16452b4bd538 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
@@ -29,6 +29,7 @@
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter;
 import org.apache.spark.unsafe.types.UTF8String;
+import org.apache.spark.internal.config.package$;
 
 import java.util.Random;
 
@@ -105,8 +106,8 @@ private boolean checkValue(UnsafeRow row, long v1, long v2) {
   public void setup() {
     memoryManager = new TestMemoryManager(new SparkConf()
             .set("spark.memory.offHeap.enabled", "false")
-            .set("spark.shuffle.spill.compress", "false")
-            .set("spark.shuffle.compress", "false"));
+            .set(package$.MODULE$.SHUFFLE_SPILL_COMPRESS(), false)
+            .set(package$.MODULE$.SHUFFLE_COMPRESS(), false));
     taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
index 9eb03430a7db..5b126338f33b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
@@ -25,6 +25,7 @@
 
 import org.apache.spark.SparkEnv;
 import org.apache.spark.TaskContext;
+import org.apache.spark.internal.config.package$;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.serializer.SerializerManager;
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
@@ -93,8 +94,7 @@ public UnsafeKVExternalSorter(
         taskContext,
         comparatorSupplier,
         prefixComparator,
-        SparkEnv.get().conf().getInt("spark.shuffle.sort.initialBufferSize",
-                                     UnsafeExternalRowSorter.DEFAULT_INITIAL_SORT_BUFFER_SIZE),
+        (int) SparkEnv.get().conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
         pageSizeBytes,
         numElementsForSpillThreshold,
         canUseRadixSort);
@@ -160,8 +160,7 @@ public UnsafeKVExternalSorter(
         taskContext,
         comparatorSupplier,
         prefixComparator,
-        SparkEnv.get().conf().getInt("spark.shuffle.sort.initialBufferSize",
-                                     UnsafeExternalRowSorter.DEFAULT_INITIAL_SORT_BUFFER_SIZE),
+        (int) SparkEnv.get().conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
         pageSizeBytes,
         numElementsForSpillThreshold,
         inMemSorter);
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index da7b0c6f43fb..16398e34bdeb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -21,6 +21,7 @@ import java.util.Random
 import java.util.function.Supplier
 
 import org.apache.spark._
+import org.apache.spark.internal.config
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{ShuffleWriteMetricsReporter, ShuffleWriteProcessor}
@@ -172,7 +173,7 @@ object ShuffleExchangeExec {
     val conf = SparkEnv.get.conf
     val shuffleManager = SparkEnv.get.shuffleManager
     val sortBasedShuffleOn = shuffleManager.isInstanceOf[SortShuffleManager]
-    val bypassMergeThreshold = conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+    val bypassMergeThreshold = conf.get(config.SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD)
     val numParts = partitioner.numPartitions
     if (sortBasedShuffleOn) {
       if (numParts <= bypassMergeThreshold) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index 963e42517b44..1640a9611ec3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -21,6 +21,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
 import java.util.Properties
 
 import org.apache.spark._
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests.TEST_MEMORY
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.rdd.RDD
@@ -98,9 +99,10 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
 
   test("SPARK-10466: external sorter spilling with unsafe row serializer") {
     val conf = new SparkConf()
-      .set("spark.shuffle.spill.initialMemoryThreshold", "1")
-      .set("spark.shuffle.sort.bypassMergeThreshold", "0")
+      .set(SHUFFLE_SPILL_INITIAL_MEM_THRESHOLD, 1L)
+      .set(SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD, 0)
       .set(TEST_MEMORY, 80000L)
+
     spark = SparkSession.builder().master("local").appName("test").config(conf).getOrCreate()
     val outputFile = File.createTempFile("test-unsafe-row-serializer-spill", "")
     outputFile.deleteOnExit()
@@ -127,7 +129,7 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
   }
 
   test("SPARK-10403: unsafe row serializer with SortShuffleManager") {
-    val conf = new SparkConf().set("spark.shuffle.manager", "sort")
+    val conf = new SparkConf().set(SHUFFLE_MANAGER, "sort")
     spark = SparkSession.builder().master("local").appName("test").config(conf).getOrCreate()
     val row = Row("Hello", 123)
     val unsafeRow = toUnsafeRow(row, Array(StringType, IntegerType))

From ede35c88e02323c69a5a6feb7e8ff4d29ffb0456 Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Thu, 17 Jan 2019 10:49:42 -0800
Subject: [PATCH 0316/1072] [SPARK-26622][SQL] Revise SQL Metrics labels

## What changes were proposed in this pull request?

Try to make labels more obvious
"avg hash probe"	avg hash probe bucket iterations
"partition pruning time (ms)"	dynamic partition pruning time
"total number of files in the table"	file count
"number of files that would be returned by partition pruning alone"	file count after partition pruning
"total size of files in the table"	file size
"size of files that would be returned by partition pruning alone"	file size after partition pruning
"metadata time (ms)"	metadata time
"aggregate time"	time in aggregation build
"aggregate time"	time in aggregation build
"time to construct rdd bc"	time to build
"total time to remove rows"	time to remove
"total time to update rows"	time to update

Add proper metric type to some metrics:
"bytes of written output"	written output - createSizeMetric
"metadata time"	- createTimingMetric
"dataSize"	- createSizeMetric
"collectTime"	- createTimingMetric
"buildTime"	- createTimingMetric
"broadcastTIme"	- createTimingMetric

## How is this patch tested?

Existing tests.

Author: Stacy Kerkela <stacy.kerkeladatabricks.com>
Signed-off-by: Juliusz Sompolski <julekdatabricks.com>

Closes #23551 from juliuszsompolski/SPARK-26622.

Lead-authored-by: Juliusz Sompolski <julek@databricks.com>
Co-authored-by: Stacy Kerkela <stacy.kerkela@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/unsafe/map/BytesToBytesMap.java |  2 +-
 .../execution/UnsafeFixedWidthAggregationMap.java    |  6 +++---
 .../spark/sql/execution/DataSourceScanExec.scala     |  4 ++--
 .../sql/execution/aggregate/HashAggregateExec.scala  |  7 ++++---
 .../aggregate/ObjectHashAggregateExec.scala          |  2 +-
 .../aggregate/TungstenAggregationIterator.scala      |  2 +-
 .../spark/sql/execution/basicPhysicalOperators.scala |  4 ++--
 .../datasources/BasicWriteStatsTracker.scala         |  2 +-
 .../execution/exchange/BroadcastExchangeExec.scala   |  8 ++++----
 .../sql/execution/streaming/statefulOperators.scala  |  4 ++--
 .../spark/sql/execution/metric/SQLMetricsSuite.scala | 12 ++++++------
 .../sql/execution/metric/SQLMetricsTestUtils.scala   |  6 ++++--
 12 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 2ff98a69ee1f..13ca7fb8a318 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -854,7 +854,7 @@ public long getPeakMemoryUsedBytes() {
   /**
    * Returns the average number of probes per key lookup.
    */
-  public double getAverageProbesPerLookup() {
+  public double getAvgHashProbeBucketListIterations() {
     return (1.0 * numProbes) / numKeyLookups;
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
index 7e76a651ba2c..117e98f33a0e 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
@@ -226,10 +226,10 @@ public void free() {
   }
 
   /**
-   * Gets the average hash map probe per looking up for the underlying `BytesToBytesMap`.
+   * Gets the average bucket list iterations per lookup in the underlying `BytesToBytesMap`.
    */
-  public double getAverageProbesPerLookup() {
-    return map.getAverageProbesPerLookup();
+  public double getAvgHashProbeBucketListIterations() {
+    return map.getAvgHashProbeBucketListIterations();
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index f6f3fb18c97f..f852a5296493 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -319,8 +319,8 @@ case class FileSourceScanExec(
 
   override lazy val metrics =
     Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
-      "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of files"),
-      "metadataTime" -> SQLMetrics.createMetric(sparkContext, "metadata time"),
+      "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of files read"),
+      "metadataTime" -> SQLMetrics.createTimingMetric(sparkContext, "metadata time"),
       "scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time"))
 
   protected override def doExecute(): RDD[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 220a4b055203..19a47ffc6dd0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -63,8 +63,9 @@ case class HashAggregateExec(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
     "peakMemory" -> SQLMetrics.createSizeMetric(sparkContext, "peak memory"),
     "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size"),
-    "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "aggregate time"),
-    "avgHashProbe" -> SQLMetrics.createAverageMetric(sparkContext, "avg hash probe"))
+    "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "time in aggregation build"),
+    "avgHashProbe" ->
+      SQLMetrics.createAverageMetric(sparkContext, "avg hash probe bucket list iters"))
 
   override def output: Seq[Attribute] = resultExpressions.map(_.toAttribute)
 
@@ -362,7 +363,7 @@ case class HashAggregateExec(
     metrics.incPeakExecutionMemory(maxMemory)
 
     // Update average hashmap probe
-    avgHashProbe.set(hashMap.getAverageProbesPerLookup())
+    avgHashProbe.set(hashMap.getAvgHashProbeBucketListIterations)
 
     if (sorter == null) {
       // not spilled
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
index bd52c6321647..5b340eead39e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
@@ -77,7 +77,7 @@ case class ObjectHashAggregateExec(
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
-    "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "aggregate time")
+    "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "time in aggregation build")
   )
 
   override def output: Seq[Attribute] = resultExpressions.map(_.toAttribute)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
index 6d849869b577..6dc64657ebf1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
@@ -387,7 +387,7 @@ class TungstenAggregationIterator(
     metrics.incPeakExecutionMemory(maxMemory)
 
     // Updating average hashmap probe
-    avgHashProbe.set(hashMap.getAverageProbesPerLookup())
+    avgHashProbe.set(hashMap.getAvgHashProbeBucketListIterations)
   })
 
   ///////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 318dca0cfd82..4352721b46b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -663,8 +663,8 @@ object CoalesceExec {
 case class SubqueryExec(name: String, child: SparkPlan) extends UnaryExecNode {
 
   override lazy val metrics = Map(
-    "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"),
-    "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)"))
+    "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
+    "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"))
 
   override def output: Seq[Attribute] = child.output
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
index ba7d2b7cbdb1..b71c2d12f02b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
@@ -173,7 +173,7 @@ object BasicWriteJobStatsTracker {
     val sparkContext = SparkContext.getActive.get
     Map(
       NUM_FILES_KEY -> SQLMetrics.createMetric(sparkContext, "number of written files"),
-      NUM_OUTPUT_BYTES_KEY -> SQLMetrics.createMetric(sparkContext, "bytes of written output"),
+      NUM_OUTPUT_BYTES_KEY -> SQLMetrics.createSizeMetric(sparkContext, "written output"),
       NUM_OUTPUT_ROWS_KEY -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
       NUM_PARTS_KEY -> SQLMetrics.createMetric(sparkContext, "number of dynamic part")
     )
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index 703d351bea7c..d55d4fa3aa90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -44,10 +44,10 @@ case class BroadcastExchangeExec(
     child: SparkPlan) extends Exchange {
 
   override lazy val metrics = Map(
-    "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"),
-    "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)"),
-    "buildTime" -> SQLMetrics.createMetric(sparkContext, "time to build (ms)"),
-    "broadcastTime" -> SQLMetrics.createMetric(sparkContext, "time to broadcast (ms)"))
+    "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
+    "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"),
+    "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build"),
+    "broadcastTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to broadcast"))
 
   override def outputPartitioning: Partitioning = BroadcastPartitioning(mode)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index c11af345b024..d689a6f3c981 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -79,8 +79,8 @@ trait StateStoreWriter extends StatefulOperator { self: SparkPlan =>
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
     "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"),
     "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"),
-    "allUpdatesTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "total time to update rows"),
-    "allRemovalsTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "total time to remove rows"),
+    "allUpdatesTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "time to update"),
+    "allRemovalsTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "time to remove"),
     "commitTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "time to commit changes"),
     "stateMemory" -> SQLMetrics.createSizeMetric(sparkContext, "memory used by state")
   ) ++ stateStoreCustomMetrics
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 6174ec4c8908..98a8ad5eeb2b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -96,9 +96,9 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     val df = testData2.groupBy().count() // 2 partitions
     val expected1 = Seq(
       Map("number of output rows" -> 2L,
-        "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"),
+        "avg hash probe bucket list iters (min, med, max)" -> "\n(1, 1, 1)"),
       Map("number of output rows" -> 1L,
-        "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))
+        "avg hash probe bucket list iters (min, med, max)" -> "\n(1, 1, 1)"))
     val shuffleExpected1 = Map(
       "records read" -> 2L,
       "local blocks read" -> 2L,
@@ -114,9 +114,9 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     val df2 = testData2.groupBy('a).count()
     val expected2 = Seq(
       Map("number of output rows" -> 4L,
-        "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"),
+        "avg hash probe bucket list iters (min, med, max)" -> "\n(1, 1, 1)"),
       Map("number of output rows" -> 3L,
-        "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))
+        "avg hash probe bucket list iters (min, med, max)" -> "\n(1, 1, 1)"))
     val shuffleExpected2 = Map(
       "records read" -> 4L,
       "local blocks read" -> 4L,
@@ -162,7 +162,7 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       }
       val metrics = getSparkPlanMetrics(df, 1, nodeIds, enableWholeStage).get
       nodeIds.foreach { nodeId =>
-        val probes = metrics(nodeId)._2("avg hash probe (min, med, max)")
+        val probes = metrics(nodeId)._2("avg hash probe bucket list iters (min, med, max)")
         probes.toString.stripPrefix("\n(").stripSuffix(")").split(", ").foreach { probe =>
           assert(probe.toDouble > 1.0)
         }
@@ -570,7 +570,7 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       testSparkPlanMetrics(df, 1, Map(
         0L -> (("Scan parquet default.testdataforscan", Map(
           "number of output rows" -> 3L,
-          "number of files" -> 2L))))
+          "number of files read" -> 2L))))
       )
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index 0e13f7dd55ba..f12eeaa58064 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -84,8 +84,10 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
       assert(metricValue == expected)
     }
 
-    val totalNumBytesMetric = executedNode.metrics.find(_.name == "bytes of written output").get
-    val totalNumBytes = metrics(totalNumBytesMetric.accumulatorId).replaceAll(",", "").toInt
+    val totalNumBytesMetric = executedNode.metrics.find(
+      _.name == "written output total (min, med, max)").get
+    val totalNumBytes = metrics(totalNumBytesMetric.accumulatorId).replaceAll(",", "")
+      .split(" ").head.trim.toDouble
     assert(totalNumBytes > 0)
   }
 

From 0b3abef1950f486001160ec578e4f628c199eeb4 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Thu, 17 Jan 2019 14:24:21 -0600
Subject: [PATCH 0317/1072] [SPARK-26638][PYSPARK][ML] Pyspark vector classes
 always return error for unary negation

## What changes were proposed in this pull request?

Fix implementation of unary negation (`__neg__`) in Pyspark DenseVectors

## How was this patch tested?

Existing tests, plus new doctest

Closes #23570 from srowen/SPARK-26638.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/ml/linalg/__init__.py    | 6 +++++-
 python/pyspark/mllib/linalg/__init__.py | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index 2548fd0f50b3..9da983667be7 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -270,6 +270,8 @@ class DenseVector(Vector):
     DenseVector([3.0, 2.0])
     >>> u % 2
     DenseVector([1.0, 0.0])
+    >>> -v
+    DenseVector([-1.0, -2.0])
     """
     def __init__(self, ar):
         if isinstance(ar, bytes):
@@ -436,6 +438,9 @@ def __hash__(self):
     def __getattr__(self, item):
         return getattr(self.array, item)
 
+    def __neg__(self):
+        return DenseVector(-self.array)
+
     def _delegate(op):
         def func(self, other):
             if isinstance(other, DenseVector):
@@ -443,7 +448,6 @@ def func(self, other):
             return DenseVector(getattr(self.array, op)(other))
         return func
 
-    __neg__ = _delegate("__neg__")
     __add__ = _delegate("__add__")
     __sub__ = _delegate("__sub__")
     __mul__ = _delegate("__mul__")
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 4afd6666400b..94a3e2af4d2d 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -281,6 +281,8 @@ class DenseVector(Vector):
     DenseVector([3.0, 2.0])
     >>> u % 2
     DenseVector([1.0, 0.0])
+    >>> -v
+    DenseVector([-1.0, -2.0])
     """
     def __init__(self, ar):
         if isinstance(ar, bytes):
@@ -480,6 +482,9 @@ def __hash__(self):
     def __getattr__(self, item):
         return getattr(self.array, item)
 
+    def __neg__(self):
+        return DenseVector(-self.array)
+
     def _delegate(op):
         def func(self, other):
             if isinstance(other, DenseVector):
@@ -487,7 +492,6 @@ def func(self, other):
             return DenseVector(getattr(self.array, op)(other))
         return func
 
-    __neg__ = _delegate("__neg__")
     __add__ = _delegate("__add__")
     __sub__ = _delegate("__sub__")
     __mul__ = _delegate("__mul__")

From c2d0d700b551e864bb7b2ae2a175ec8ade704488 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Thu, 17 Jan 2019 19:40:39 -0600
Subject: [PATCH 0318/1072] [SPARK-26640][CORE][ML][SQL][STREAMING][PYSPARK]
 Code cleanup from lgtm.com analysis

## What changes were proposed in this pull request?

Misc code cleanup from lgtm.com analysis. See comments below for details.

## How was this patch tested?

Existing tests.

Closes #23571 from srowen/SPARK-26640.

Lead-authored-by: Sean Owen <sean.owen@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/ui/static/executorspage.js   |  3 +-
 .../org/apache/spark/ui/static/historypage.js | 16 +++++-----
 .../apache/spark/ui/static/spark-dag-viz.js   |  8 ++---
 .../org/apache/spark/ui/static/stagepage.js   |  5 ++--
 .../org/apache/spark/ui/static/table.js       |  2 +-
 .../org/apache/spark/ui/static/utils.js       |  2 +-
 .../org/apache/spark/ui/static/webui.js       |  2 +-
 dev/pip-sanity-check.py                       |  1 -
 dev/run-tests.py                              | 29 +------------------
 ...JavaRandomForestClassificationExample.java |  2 +-
 .../python/mllib/bisecting_k_means_example.py |  2 +-
 .../mllib/isotonic_regression_example.py      |  2 +-
 .../mllib/multi_class_metrics_example.py      |  6 ++--
 .../python/mllib/ranking_metrics_example.py   |  2 +-
 .../python/mllib/standard_scaler_example.py   |  2 +-
 examples/src/main/python/sql/hive.py          |  2 +-
 .../launcher/SparkSubmitCommandBuilder.java   |  1 +
 python/pyspark/broadcast.py                   |  1 -
 python/pyspark/context.py                     |  1 -
 python/pyspark/java_gateway.py                |  4 +--
 python/pyspark/ml/classification.py           |  2 +-
 python/pyspark/ml/fpm.py                      |  2 +-
 python/pyspark/ml/regression.py               |  1 -
 python/pyspark/mllib/classification.py        | 16 +++++-----
 python/pyspark/mllib/clustering.py            |  1 -
 python/pyspark/mllib/evaluation.py            |  3 +-
 python/pyspark/mllib/feature.py               |  2 --
 python/pyspark/mllib/fpm.py                   |  4 +--
 python/pyspark/mllib/regression.py            | 24 +++++++--------
 python/pyspark/mllib/tree.py                  |  2 +-
 python/pyspark/mllib/util.py                  |  3 +-
 python/pyspark/profiler.py                    |  4 +--
 python/pyspark/rdd.py                         |  6 ++--
 python/pyspark/sql/context.py                 |  6 ++--
 python/pyspark/sql/types.py                   |  6 ++--
 python/pyspark/streaming/context.py           |  5 +---
 python/pyspark/streaming/kinesis.py           |  4 +--
 python/pyspark/taskcontext.py                 |  1 -
 python/pyspark/worker.py                      |  1 -
 python/run-tests.py                           |  1 -
 python/setup.py                               |  2 +-
 .../sql/execution/ui/static/spark-sql-viz.js  |  2 +-
 .../streaming/ui/static/streaming-page.js     |  8 ++---
 43 files changed, 71 insertions(+), 128 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index a48c02ae279b..98d67c91f24b 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -114,7 +114,6 @@ $(document).ready(function () {
 
         var endPoint = createRESTEndPointForExecutorsPage(appId);
         $.getJSON(endPoint, function (response, status, jqXHR) {
-            var summary = [];
             var allExecCnt = 0;
             var allRDDBlocks = 0;
             var allMemoryUsed = 0;
@@ -505,7 +504,7 @@ $(document).ready(function () {
                         {data: 'allTotalTasks'},
                         {
                             data: function (row, type) {
-                                return type === 'display' ? (formatDuration(row.allTotalDuration, type) + ' (' + formatDuration(row.allTotalGCTime, type) + ')') : row.allTotalDuration
+                                return type === 'display' ? (formatDuration(row.allTotalDuration) + ' (' + formatDuration(row.allTotalGCTime) + ')') : row.allTotalDuration
                             },
                             "fnCreatedCell": function (nTd, sData, oData, iRow, iCol) {
                                 if (oData.allTotalDuration > 0) {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index abc2ec0fa653..35e4de9ac664 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -103,12 +103,12 @@ $(document).ready(function() {
       pageLength: 20
     });
 
-    historySummary = $("#history-summary");
-    searchString = historySummary["context"]["location"]["search"];
-    requestedIncomplete = getParameterByName("showIncomplete", searchString);
+    var historySummary = $("#history-summary");
+    var searchString = historySummary["context"]["location"]["search"];
+    var requestedIncomplete = getParameterByName("showIncomplete", searchString);
     requestedIncomplete = (requestedIncomplete == "true" ? true : false);
 
-    appParams = {
+    var appParams = {
       limit: appLimit,
       status: (requestedIncomplete ? "running" : "completed")
     };
@@ -116,7 +116,7 @@ $(document).ready(function() {
     $.getJSON(uiRoot + "/api/v1/applications", appParams, function(response,status,jqXHR) {
       var array = [];
       var hasMultipleAttempts = false;
-      for (i in response) {
+      for (var i in response) {
         var app = response[i];
         if (app["attempts"][0]["completed"] == requestedIncomplete) {
           continue; // if we want to show for Incomplete, we skip the completed apps; otherwise skip incomplete ones.
@@ -127,7 +127,7 @@ $(document).ready(function() {
             hasMultipleAttempts = true;
         }
         var num = app["attempts"].length;
-        for (j in app["attempts"]) {
+        for (var j in app["attempts"]) {
           var attempt = app["attempts"][j];
           attempt["startTime"] = formatTimeMillis(attempt["startTimeEpoch"]);
           attempt["endTime"] = formatTimeMillis(attempt["endTimeEpoch"]);
@@ -149,7 +149,7 @@ $(document).ready(function() {
         "applications": array,
         "hasMultipleAttempts": hasMultipleAttempts,
         "showCompletedColumns": !requestedIncomplete,
-      }
+      };
 
       $.get(uiRoot + "/static/historypage-template.html", function(template) {
         var sibling = historySummary.prev();
@@ -157,7 +157,7 @@ $(document).ready(function() {
         var apps = $(Mustache.render($(template).filter("#history-summary-template").html(),data));
         var attemptIdColumnName = 'attemptId';
         var startedColumnName = 'started';
-        var defaultSortColumn = completedColumnName = 'completed';
+        var completedColumnName = 'completed';
         var durationColumnName = 'duration';
         var conf = {
           "columns": [
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 75b959fdeb59..cf508ac573f3 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -220,7 +220,7 @@ function renderDagVizForJob(svgContainer) {
     } else {
       // Link each graph to the corresponding stage page (TODO: handle stage attempts)
       // Use the link from the stage table so it also works for the history server
-      var attemptId = 0
+      var attemptId = 0;
       var stageLink = d3.select("#stage-" + stageId + "-" + attemptId)
         .select("a.name-link")
         .attr("href");
@@ -236,7 +236,7 @@ function renderDagVizForJob(svgContainer) {
     // existing ones, taking into account the position and width of the last stage's
     // container. We do not need to do this for the first stage of this job.
     if (i > 0) {
-      var existingStages = svgContainer.selectAll("g.cluster.stage")
+      var existingStages = svgContainer.selectAll("g.cluster.stage");
       if (!existingStages.empty()) {
         var lastStage = d3.select(existingStages[0].pop());
         var lastStageWidth = toFloat(lastStage.select("rect").attr("width"));
@@ -369,8 +369,8 @@ function resizeSvg(svg) {
  * here this function is to enable line break.
  */
 function interpretLineBreak(svg) {
-  var allTSpan = svg.selectAll("tspan").each(function() {
-    node = d3.select(this);
+  svg.selectAll("tspan").each(function() {
+    var node = d3.select(this);
     var original = node[0][0].innerHTML;
     if (original.indexOf("\\n") != -1) {
       var arr = original.split("\\n");
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 5b792ffc584d..4fe2cd95c673 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -263,7 +263,6 @@ function reselectCheckboxesBasedOnTaskTableState() {
 
 function getStageAttemptId() {
   var words = document.baseURI.split('?');
-  var attemptIdStr = words[1].split('&')[1];
   var digitsRegex = /[0-9]+/;
   // We are using regex here to extract the stage attempt id as there might be certain url's with format
   // like /proxy/application_1539986433979_27115/stages/stage/?id=0&attempt=0#tasksTitle
@@ -433,7 +432,7 @@ $(document).ready(function () {
                     "oLanguage": {
                         "sEmptyTable": "No data to show yet"
                     }
-                }
+                };
                 var executorSummaryTableSelector =
                     $("#summary-executor-table").DataTable(executorSummaryConf);
                 $('#parent-container [data-toggle="tooltip"]').tooltip();
@@ -612,7 +611,7 @@ $(document).ready(function () {
                     "searching": false,
                     "order": [[0, "asc"]],
                     "bAutoWidth": false
-                }
+                };
                 $("#accumulator-table").DataTable(accumulatorConf);
 
                 // building tasks table that uses server side functionality
diff --git a/core/src/main/resources/org/apache/spark/ui/static/table.js b/core/src/main/resources/org/apache/spark/ui/static/table.js
index 0315ebf5c48a..fd258d5ee70d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/table.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/table.js
@@ -89,7 +89,7 @@ function onSearchStringChange() {
             if($(this).attr('id') && $(this).attr('id').match(/thread_[0-9]+_tr/) ) {
                 var children = $(this).children()
                 var found = false
-                for (i = 0; i < children.length; i++) {
+                for (var i = 0; i < children.length; i++) {
                     if (children.eq(i).text().toLowerCase().indexOf(searchString) >= 0) {
                         found = true
                     }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js
index 22985e31a780..6fc34a9e1f7e 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/utils.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js
@@ -170,7 +170,7 @@ function createRESTEndPointForExecutorsPage(appId) {
     if (ind > 0) {
         var appId = words[ind + 1];
         var newBaseURI = words.slice(0, ind + 2).join('/');
-        return newBaseURI + "/api/v1/applications/" + appId + "/allexecutors"
+        return newBaseURI + "/api/v1/applications/" + appId + "/allexecutors";
     }
     ind = words.indexOf("history");
     if (ind > 0) {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.js b/core/src/main/resources/org/apache/spark/ui/static/webui.js
index 9f5744af25b0..89622106ff1f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.js
@@ -33,7 +33,7 @@ function collapseTable(thisName, table){
     var status = window.localStorage.getItem(thisName) == "true";
     status = !status;
 
-    thisClass = '.' + thisName
+    var thisClass = '.' + thisName
 
     // Expand the list of additional metrics.
     var tableDiv = $(thisClass).parent().find('.' + table);
diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py
index c491005f4971..4171f28684d5 100644
--- a/dev/pip-sanity-check.py
+++ b/dev/pip-sanity-check.py
@@ -18,7 +18,6 @@
 from __future__ import print_function
 
 from pyspark.sql import SparkSession
-from pyspark.ml.param import Params
 from pyspark.mllib.linalg import *
 import sys
 
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 27f7527052e2..e1ed2744d78b 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -29,7 +29,7 @@
 
 from sparktestsupport import SPARK_HOME, USER_HOME, ERROR_CODES
 from sparktestsupport.shellutils import exit_from_command_with_retcode, run_cmd, rm_r, which
-from sparktestsupport.toposort import toposort_flatten, toposort
+from sparktestsupport.toposort import toposort_flatten
 import sparktestsupport.modules as modules
 
 
@@ -153,30 +153,6 @@ def determine_java_executable():
     return java_exe if java_exe else which("java")
 
 
-JavaVersion = namedtuple('JavaVersion', ['major', 'minor', 'patch'])
-
-
-def determine_java_version(java_exe):
-    """Given a valid java executable will return its version in named tuple format
-    with accessors '.major', '.minor', '.patch', '.update'"""
-
-    raw_output = subprocess.check_output([java_exe, "-version"],
-                                         stderr=subprocess.STDOUT,
-                                         universal_newlines=True)
-
-    raw_output_lines = raw_output.split('\n')
-
-    # find raw version string, eg 'java version "1.8.0_25"'
-    raw_version_str = next(x for x in raw_output_lines if " version " in x)
-
-    match = re.search(r'(\d+)\.(\d+)\.(\d+)', raw_version_str)
-
-    major = int(match.group(1))
-    minor = int(match.group(2))
-    patch = int(match.group(3))
-
-    return JavaVersion(major, minor, patch)
-
 # -------------------------------------------------------------------------------------------------
 # Functions for running the other build and test scripts
 # -------------------------------------------------------------------------------------------------
@@ -443,7 +419,6 @@ def run_python_packaging_tests():
 def run_build_tests():
     set_title_and_block("Running build tests", "BLOCK_BUILD_TESTS")
     run_cmd([os.path.join(SPARK_HOME, "dev", "test-dependencies.sh")])
-    pass
 
 
 def run_sparkr_tests():
@@ -495,8 +470,6 @@ def main():
               " install one and retry.")
         sys.exit(2)
 
-    java_version = determine_java_version(java_exe)
-
     # install SparkR
     if which("R"):
         run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")])
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java
index 0707db8d3e83..7ee3c0a3e192 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java
@@ -50,7 +50,7 @@ public static void main(String[] args) {
     // Empty categoricalFeaturesInfo indicates all features are continuous.
     int numClasses = 2;
     Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
-    Integer numTrees = 3; // Use more in practice.
+    int numTrees = 3; // Use more in practice.
     String featureSubsetStrategy = "auto"; // Let the algorithm choose.
     String impurity = "gini";
     int maxDepth = 5;
diff --git a/examples/src/main/python/mllib/bisecting_k_means_example.py b/examples/src/main/python/mllib/bisecting_k_means_example.py
index 31f3e72d7ff1..36e36fc6897f 100644
--- a/examples/src/main/python/mllib/bisecting_k_means_example.py
+++ b/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -23,7 +23,7 @@
 
 from pyspark import SparkContext
 # $example on$
-from pyspark.mllib.clustering import BisectingKMeans, BisectingKMeansModel
+from pyspark.mllib.clustering import BisectingKMeans
 # $example off$
 
 if __name__ == "__main__":
diff --git a/examples/src/main/python/mllib/isotonic_regression_example.py b/examples/src/main/python/mllib/isotonic_regression_example.py
index 33d618ab48ea..f5322d79c45b 100644
--- a/examples/src/main/python/mllib/isotonic_regression_example.py
+++ b/examples/src/main/python/mllib/isotonic_regression_example.py
@@ -23,7 +23,7 @@
 from pyspark import SparkContext
 # $example on$
 import math
-from pyspark.mllib.regression import LabeledPoint, IsotonicRegression, IsotonicRegressionModel
+from pyspark.mllib.regression import IsotonicRegression, IsotonicRegressionModel
 from pyspark.mllib.util import MLUtils
 # $example off$
 
diff --git a/examples/src/main/python/mllib/multi_class_metrics_example.py b/examples/src/main/python/mllib/multi_class_metrics_example.py
index 7dc5fb4f9127..03a564e75be9 100644
--- a/examples/src/main/python/mllib/multi_class_metrics_example.py
+++ b/examples/src/main/python/mllib/multi_class_metrics_example.py
@@ -45,9 +45,9 @@
     metrics = MulticlassMetrics(predictionAndLabels)
 
     # Overall statistics
-    precision = metrics.precision()
-    recall = metrics.recall()
-    f1Score = metrics.fMeasure()
+    precision = metrics.precision(1.0)
+    recall = metrics.recall(1.0)
+    f1Score = metrics.fMeasure(1.0)
     print("Summary Stats")
     print("Precision = %s" % precision)
     print("Recall = %s" % recall)
diff --git a/examples/src/main/python/mllib/ranking_metrics_example.py b/examples/src/main/python/mllib/ranking_metrics_example.py
index 21333deded35..0913bb34cf9d 100644
--- a/examples/src/main/python/mllib/ranking_metrics_example.py
+++ b/examples/src/main/python/mllib/ranking_metrics_example.py
@@ -17,7 +17,7 @@
 
 # $example on$
 from pyspark.mllib.recommendation import ALS, Rating
-from pyspark.mllib.evaluation import RegressionMetrics, RankingMetrics
+from pyspark.mllib.evaluation import RegressionMetrics
 # $example off$
 from pyspark import SparkContext
 
diff --git a/examples/src/main/python/mllib/standard_scaler_example.py b/examples/src/main/python/mllib/standard_scaler_example.py
index 442094e1bf36..11ed34427dfe 100644
--- a/examples/src/main/python/mllib/standard_scaler_example.py
+++ b/examples/src/main/python/mllib/standard_scaler_example.py
@@ -19,7 +19,7 @@
 
 from pyspark import SparkContext
 # $example on$
-from pyspark.mllib.feature import StandardScaler, StandardScalerModel
+from pyspark.mllib.feature import StandardScaler
 from pyspark.mllib.linalg import Vectors
 from pyspark.mllib.util import MLUtils
 # $example off$
diff --git a/examples/src/main/python/sql/hive.py b/examples/src/main/python/sql/hive.py
index 33fc2dfbeefa..e96a8af71adc 100644
--- a/examples/src/main/python/sql/hive.py
+++ b/examples/src/main/python/sql/hive.py
@@ -23,7 +23,7 @@
 from __future__ import print_function
 
 # $example on:spark_hive$
-from os.path import expanduser, join, abspath
+from os.path import join, abspath
 
 from pyspark.sql import SparkSession
 from pyspark.sql import Row
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index cc65f78b45c3..e3ee843f6244 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -455,6 +455,7 @@ protected boolean handle(String opt, String value) {
           conf.put(SparkLauncher.DRIVER_EXTRA_CLASSPATH, value);
           break;
         case CONF:
+          checkArgument(value != null, "Missing argument to %s", CONF);
           String[] setConf = value.split("=", 2);
           checkArgument(setConf.length == 2, "Invalid argument to %s: %s", CONF, value);
           conf.put(setConf[0], setConf[1]);
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 29358b5740e5..69447a6fb561 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -17,7 +17,6 @@
 
 import gc
 import os
-import socket
 import sys
 from tempfile import NamedTemporaryFile
 import threading
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index e7923009dab8..94c6f4adab77 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -439,7 +439,6 @@ def stop(self):
                     ' been killed or may also be in a zombie state.',
                     RuntimeWarning
                 )
-                pass
             finally:
                 self._jsc = None
         if getattr(self, "_accumulatorServer", None):
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index c8c5f801f89b..d8315c63a8fc 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -18,7 +18,6 @@
 import atexit
 import os
 import sys
-import select
 import signal
 import shlex
 import shutil
@@ -174,8 +173,7 @@ def local_connect_and_auth(port, auth_secret):
             errors.append("tried to connect to %s, but an error occured: %s" % (sa, emsg))
             sock.close()
             sock = None
-    else:
-        raise Exception("could not open socket: %s" % errors)
+    raise Exception("could not open socket: %s" % errors)
 
 
 def ensure_callback_server_started(gw):
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 6ddfce95a3d4..89b927814c09 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -23,7 +23,7 @@
 from pyspark.ml import Estimator, Model
 from pyspark.ml.param.shared import *
 from pyspark.ml.regression import DecisionTreeModel, DecisionTreeRegressionModel, \
-    GBTParams, HasVarianceImpurity, RandomForestParams, TreeEnsembleModel, TreeEnsembleParams
+    GBTParams, HasVarianceImpurity, RandomForestParams, TreeEnsembleModel
 from pyspark.ml.util import *
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
 from pyspark.ml.wrapper import JavaWrapper
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 734763ebd3fa..ed71fb0c5759 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -18,7 +18,7 @@
 from pyspark import keyword_only, since
 from pyspark.sql import DataFrame
 from pyspark.ml.util import *
-from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, _jvm
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
 from pyspark.ml.param.shared import *
 
 __all__ = ["FPGrowth", "FPGrowthModel", "PrefixSpan"]
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 78cb4a670355..9e1f8f88ca1c 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -16,7 +16,6 @@
 #
 
 import sys
-import warnings
 
 from pyspark import since, keyword_only
 from pyspark.ml.param.shared import *
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index e00ed95ef070..d2037be2c64f 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -20,12 +20,10 @@
 import warnings
 
 import numpy
-from numpy import array
 
 from pyspark import RDD, since
-from pyspark.streaming import DStream
 from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py
-from pyspark.mllib.linalg import DenseVector, SparseVector, _convert_to_vector
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector
 from pyspark.mllib.regression import (
     LabeledPoint, LinearModel, _regression_train_wrapper,
     StreamingLinearAlgorithm)
@@ -126,9 +124,9 @@ class LogisticRegressionModel(LinearClassificationModel):
     ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
     ... ]
     >>> lrm = LogisticRegressionWithSGD.train(sc.parallelize(sparse_data), iterations=10)
-    >>> lrm.predict(array([0.0, 1.0]))
+    >>> lrm.predict(numpy.array([0.0, 1.0]))
     1
-    >>> lrm.predict(array([1.0, 0.0]))
+    >>> lrm.predict(numpy.array([1.0, 0.0]))
     0
     >>> lrm.predict(SparseVector(2, {1: 1.0}))
     1
@@ -138,7 +136,7 @@ class LogisticRegressionModel(LinearClassificationModel):
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = LogisticRegressionModel.load(sc, path)
-    >>> sameModel.predict(array([0.0, 1.0]))
+    >>> sameModel.predict(numpy.array([0.0, 1.0]))
     1
     >>> sameModel.predict(SparseVector(2, {0: 1.0}))
     0
@@ -424,7 +422,7 @@ class SVMModel(LinearClassificationModel):
     >>> svm.predict(sc.parallelize([[1.0]])).collect()
     [1]
     >>> svm.clearThreshold()
-    >>> svm.predict(array([1.0]))
+    >>> svm.predict(numpy.array([1.0]))
     1.44...
 
     >>> sparse_data = [
@@ -577,9 +575,9 @@ class NaiveBayesModel(Saveable, Loader):
     ...     LabeledPoint(1.0, [1.0, 0.0]),
     ... ]
     >>> model = NaiveBayes.train(sc.parallelize(data))
-    >>> model.predict(array([0.0, 1.0]))
+    >>> model.predict(numpy.array([0.0, 1.0]))
     0.0
-    >>> model.predict(array([1.0, 0.0]))
+    >>> model.predict(numpy.array([1.0, 0.0]))
     1.0
     >>> model.predict(sc.parallelize([[1.0, 0.0]])).collect()
     [1.0]
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 4f4355ddb60e..58da434fc38a 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -33,7 +33,6 @@
 from pyspark.rdd import RDD, ignore_unicode_prefix
 from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, callJavaFunc, _py2java, _java2py
 from pyspark.mllib.linalg import SparseVector, _convert_to_vector, DenseVector
-from pyspark.mllib.regression import LabeledPoint
 from pyspark.mllib.stat.distribution import MultivariateGaussian
 from pyspark.mllib.util import Saveable, Loader, inherit_doc, JavaLoader, JavaSaveable
 from pyspark.streaming import DStream
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index b171e46871fd..6ca6df672f30 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -16,12 +16,11 @@
 #
 
 import sys
-import warnings
 
 from pyspark import since
 from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
 from pyspark.sql import SQLContext
-from pyspark.sql.types import StructField, StructType, DoubleType, IntegerType, ArrayType
+from pyspark.sql.types import StructField, StructType, DoubleType
 
 __all__ = ['BinaryClassificationMetrics', 'RegressionMetrics',
            'MulticlassMetrics', 'RankingMetrics']
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 6d7d4d61db04..b1bcdb9078e3 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -22,8 +22,6 @@
 
 import sys
 import warnings
-import random
-import binascii
 if sys.version >= '3':
     basestring = str
     unicode = str
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index 6accb9b4926e..373a141456b2 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -17,11 +17,9 @@
 
 import sys
 
-import numpy
-from numpy import array
 from collections import namedtuple
 
-from pyspark import SparkContext, since
+from pyspark import since
 from pyspark.rdd import ignore_unicode_prefix
 from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
 from pyspark.mllib.util import JavaSaveable, JavaLoader, inherit_doc
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 6be45f51862c..56ee0083abca 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -19,12 +19,11 @@
 import warnings
 
 import numpy as np
-from numpy import array
 
 from pyspark import RDD, since
 from pyspark.streaming.dstream import DStream
 from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc
-from pyspark.mllib.linalg import SparseVector, Vectors, _convert_to_vector
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector
 from pyspark.mllib.util import Saveable, Loader
 
 __all__ = ['LabeledPoint', 'LinearModel',
@@ -168,15 +167,15 @@ class LinearRegressionModel(LinearRegressionModelBase):
     ...     LabeledPoint(2.0, SparseVector(1, {0: 3.0}))
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
-    ...     initialWeights=array([1.0]))
-    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    ...     initialWeights=np.array([1.0]))
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
-    ...    miniBatchFraction=1.0, initialWeights=array([1.0]), regParam=0.1, regType="l2",
+    ...    miniBatchFraction=1.0, initialWeights=np.array([1.0]), regParam=0.1, regType="l2",
     ...    intercept=True, validateData=True)
-    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
@@ -305,7 +304,8 @@ class LassoModel(LinearRegressionModelBase):
     ...     LabeledPoint(3.0, [2.0]),
     ...     LabeledPoint(2.0, [3.0])
     ... ]
-    >>> lrm = LassoWithSGD.train(sc.parallelize(data), iterations=10, initialWeights=array([1.0]))
+    >>> lrm = LassoWithSGD.train(
+    ...     sc.parallelize(data), iterations=10, initialWeights=np.array([1.0]))
     >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
     >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
@@ -336,13 +336,13 @@ class LassoModel(LinearRegressionModelBase):
     ...     LabeledPoint(2.0, SparseVector(1, {0: 3.0}))
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
-    ...     initialWeights=array([1.0]))
+    ...     initialWeights=np.array([1.0]))
     >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
     >>> lrm = LassoWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
-    ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=array([1.0]), intercept=True,
+    ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=np.array([1.0]), intercept=True,
     ...     validateData=True)
     >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
@@ -449,7 +449,7 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     ...     LabeledPoint(2.0, [3.0])
     ... ]
     >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10,
-    ...     initialWeights=array([1.0]))
+    ...     initialWeights=np.array([1.0]))
     >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
     >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
@@ -480,13 +480,13 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     ...     LabeledPoint(2.0, SparseVector(1, {0: 3.0}))
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
-    ...     initialWeights=array([1.0]))
+    ...     initialWeights=np.array([1.0]))
     >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
     >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
-    ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=array([1.0]), intercept=True,
+    ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=np.array([1.0]), intercept=True,
     ...     validateData=True)
     >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
     True
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index b05734ce489d..2d8df461acf9 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -20,7 +20,7 @@
 import sys
 import random
 
-from pyspark import SparkContext, RDD, since
+from pyspark import RDD, since
 from pyspark.mllib.common import callMLlibFunc, inherit_doc, JavaModelWrapper
 from pyspark.mllib.linalg import _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index 51f20db2927e..0190bf3cc0e3 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -17,7 +17,6 @@
 
 import sys
 import numpy as np
-import warnings
 
 if sys.version > '3':
     xrange = range
@@ -420,7 +419,7 @@ def load(cls, sc, path):
                      was saved.
         :return: model instance
         """
-        raise NotImplemented
+        raise NotImplementedError
 
 
 @inherit_doc
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index 3c7656ab5758..b9423b760487 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -104,11 +104,11 @@ def __init__(self, ctx):
 
     def profile(self, func):
         """ Do profiling on the function `func`"""
-        raise NotImplemented
+        raise NotImplementedError
 
     def stats(self):
         """ Return the collected profiling stats (pstats.Stats)"""
-        raise NotImplemented
+        raise NotImplementedError
 
     def show(self, id):
         """ Print the profile stats to stdout, id is the RDD id """
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index b6e17cab44e9..73969307a5e8 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -25,7 +25,6 @@
 import heapq
 import bisect
 import random
-import socket
 from subprocess import Popen, PIPE
 from tempfile import NamedTemporaryFile
 from threading import Thread
@@ -42,8 +41,7 @@
 from pyspark.java_gateway import local_connect_and_auth
 from pyspark.serializers import NoOpSerializer, CartesianDeserializer, \
     BatchedSerializer, CloudPickleSerializer, PairDeserializer, \
-    PickleSerializer, pack_long, AutoBatchedSerializer, write_with_length, \
-    UTF8Deserializer
+    PickleSerializer, pack_long, AutoBatchedSerializer
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_full_outer_join, python_cogroup
 from pyspark.statcounter import StatCounter
@@ -53,7 +51,7 @@
 from pyspark.shuffle import Aggregator, ExternalMerger, \
     get_used_memory, ExternalSorter, ExternalGroupBy
 from pyspark.traceback_utils import SCCallSiteSync
-from pyspark.util import fail_on_stopiteration, _exception_message
+from pyspark.util import fail_on_stopiteration
 
 
 __all__ = ["RDD"]
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 1938965a7e17..48a49c583f9c 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -537,10 +537,8 @@ def _test():
     globs['df'] = rdd.toDF()
     jsonStrings = [
         '{"field1": 1, "field2": "row1", "field3":{"field4":11}}',
-        '{"field1" : 2, "field3":{"field4":22, "field5": [10, 11]},'
-        '"field6":[{"field7": "row2"}]}',
-        '{"field1" : null, "field2": "row3", '
-        '"field3":{"field4":33, "field5": []}}'
+        '{"field1" : 2, "field3":{"field4":22, "field5": [10, 11]},"field6":[{"field7": "row2"}]}',
+        '{"field1" : null, "field2": "row3", "field3":{"field4":33, "field5": []}}'
     ]
     globs['jsonStrings'] = jsonStrings
     globs['json'] = sc.parallelize(jsonStrings)
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 1d24c40e5858..22ee5d39db38 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1018,14 +1018,12 @@ def _infer_type(obj):
         for key, value in obj.items():
             if key is not None and value is not None:
                 return MapType(_infer_type(key), _infer_type(value), True)
-        else:
-            return MapType(NullType(), NullType(), True)
+        return MapType(NullType(), NullType(), True)
     elif isinstance(obj, list):
         for v in obj:
             if v is not None:
                 return ArrayType(_infer_type(obj[0]), True)
-        else:
-            return ArrayType(NullType(), True)
+        return ArrayType(NullType(), True)
     elif isinstance(obj, array):
         if obj.typecode in _array_type_mappings:
             return ArrayType(_array_type_mappings[obj.typecode](), False)
diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
index e1c194b44650..2d84373fb28a 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -17,10 +17,7 @@
 
 from __future__ import print_function
 
-import os
-import sys
-
-from py4j.java_gateway import java_import, JavaObject
+from py4j.java_gateway import java_import
 
 from pyspark import RDD, SparkConf
 from pyspark.serializers import NoOpSerializer, UTF8Deserializer, CloudPickleSerializer
diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py
index b839859c4525..b3348828fdf6 100644
--- a/python/pyspark/streaming/kinesis.py
+++ b/python/pyspark/streaming/kinesis.py
@@ -15,9 +15,7 @@
 # limitations under the License.
 #
 
-from py4j.protocol import Py4JJavaError
-
-from pyspark.serializers import PairDeserializer, NoOpSerializer
+from pyspark.serializers import NoOpSerializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.streaming import DStream
 
diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py
index de4b6af23666..dff5e183bdc7 100644
--- a/python/pyspark/taskcontext.py
+++ b/python/pyspark/taskcontext.py
@@ -16,7 +16,6 @@
 #
 
 from __future__ import print_function
-import socket
 
 from pyspark.java_gateway import local_connect_and_auth
 from pyspark.serializers import write_int, UTF8Deserializer
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index bf007b0c62d8..1e7424ab3a1b 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -28,7 +28,6 @@
     import resource
 except ImportError:
     has_resource_module = False
-import socket
 import traceback
 
 from pyspark.accumulators import _accumulatorRegistry
diff --git a/python/run-tests.py b/python/run-tests.py
index e45268c13769..7456170ba2d5 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -33,7 +33,6 @@
     import Queue
 else:
     import queue as Queue
-from distutils.version import LooseVersion
 from multiprocessing import Manager
 
 
diff --git a/python/setup.py b/python/setup.py
index 7da67a4109ed..22f0940db93e 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -20,7 +20,7 @@
 import glob
 import os
 import sys
-from setuptools import setup, find_packages
+from setuptools import setup
 from shutil import copyfile, copytree, rmtree
 
 if sys.version_info < (2, 7):
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
index 5161fcde669e..46d3fbc8c3cb 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
@@ -42,7 +42,7 @@ function renderPlanViz() {
     setupTooltipForSparkPlanNode(i);
   }
 
-  resizeSvg(svg)
+  resizeSvg(svg);
 }
 
 /* -------------------- *
diff --git a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
index d004f34ab186..5b75bc3011b6 100644
--- a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
+++ b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
@@ -129,7 +129,7 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
     svg.append("g")
         .attr("class", "x axis")
         .attr("transform", "translate(0," + height + ")")
-        .call(xAxis)
+        .call(xAxis);
 
     svg.append("g")
         .attr("class", "y axis")
@@ -198,7 +198,7 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
                     lastClickedBatch = null;
                 }
                 lastClickedBatch = d.x;
-                highlightBatchRow(lastClickedBatch)
+                highlightBatchRow(lastClickedBatch);
                 lastTimeout = window.setTimeout(function () {
                     lastTimeout = null;
                     if (lastClickedBatch != null) {
@@ -261,9 +261,9 @@ function drawHistogram(id, values, minY, maxY, unitY, batchInterval) {
 
     svg.append("g")
         .attr("class", "y axis")
-        .call(yAxis)
+        .call(yAxis);
 
-    var bar = svg.selectAll(".bar")
+    svg.selectAll(".bar")
         .data(data)
         .enter()
         .append("g")

From e3418649dcb50f2a2fb977560d87a94c81516198 Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Thu, 17 Jan 2019 22:43:39 -0800
Subject: [PATCH 0319/1072] [SPARK-26659][SQL] Fix duplicate cmd.nodeName in
 the explain output of DataWritingCommandExec

## What changes were proposed in this pull request?

`DataWritingCommandExec` generates `cmd.nodeName` twice in its explain output, e.g. when running this query `spark.sql("create table foo stored as parquet as select id, id % 10 as cat1, id % 20 as cat2 from range(10)")`,
```
Execute OptimizedCreateHiveTableAsSelectCommand OptimizedCreateHiveTableAsSelectCommand [Database:default, TableName: foo, InsertIntoHiveTable]
+- *(1) Project [id#2L, (id#2L % 10) AS cat1#0L, (id#2L % 20) AS cat2#1L]
   +- *(1) Range (0, 10, step=1, splits=8)
```
After the fix, it'll go back to normal:
```
Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, TableName: foo, InsertIntoHiveTable]
+- *(1) Project [id#2L, (id#2L % 10) AS cat1#0L, (id#2L % 20) AS cat2#1L]
   +- *(1) Range (0, 10, step=1, splits=8)
```

This duplication is introduced when this specialized `DataWritingCommandExec` was created in place of `ExecutedCommandExec`.

The former is a `UnaryExecNode` whose `children` include the physical plan of the query, and the `cmd` is picked up via `TreeNode.stringArgs` into the argument string. The duplication comes from: `DataWritingCommandExec.nodeName` is `s"Execute ${cmd.nodeName}"` while the argument string is `cmd.simpleString()` which also includes `cmd.nodeName`.

The latter didn't have that problem because it's a `LeafExecNode` with no children, and it declares the `cmd` as being a part of the `innerChildren` which is excluded from the argument string.

## How was this patch tested?

Manual testing of running the example above in a local Spark Shell.
Also added a new test case in `ExplainSuite`.

Closes #23579 from rednaxelafx/fix-explain.

Authored-by: Kris Mok <kris.mok@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/execution/command/commands.scala      |  3 ++
 .../org/apache/spark/sql/ExplainSuite.scala   | 28 +++++++++++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 754a3316ffb7..a1f2785ed362 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -110,6 +110,9 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
 
   override def nodeName: String = "Execute " + cmd.nodeName
 
+  // override the default one, otherwise the `cmd.nodeName` will appear twice from simpleString
+  override def argString(maxFields: Int): String = cmd.argString(maxFields)
+
   override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray
 
   override def executeToIterator: Iterator[InternalRow] = sideEffectResult.toIterator
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index ce475922eb5e..ec688282d574 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -25,16 +25,25 @@ class ExplainSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
   /**
-   * Runs the plan and makes sure the plans contains all of the keywords.
+   * Get the explain from a DataFrame and run the specified action on it.
    */
-  private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*): Unit = {
+  private def withNormalizedExplain(df: DataFrame, extended: Boolean)(f: String => Unit) = {
     val output = new java.io.ByteArrayOutputStream()
     Console.withOut(output) {
-      df.explain(extended = true)
+      df.explain(extended = extended)
     }
     val normalizedOutput = output.toString.replaceAll("#\\d+", "#x")
-    for (key <- keywords) {
-      assert(normalizedOutput.contains(key))
+    f(normalizedOutput)
+  }
+
+  /**
+   * Runs the plan and makes sure the plans contains all of the keywords.
+   */
+  private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*): Unit = {
+    withNormalizedExplain(df, extended = true) { normalizedOutput =>
+      for (key <- keywords) {
+        assert(normalizedOutput.contains(key))
+      }
     }
   }
 
@@ -182,6 +191,15 @@ class ExplainSuite extends QueryTest with SharedSQLContext {
         "id#xL AS nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, " +
         "x AS nvl2(`id`, 'x', 'y')#x]")
   }
+
+  test("SPARK-26659: explain of DataWritingCommandExec should not contain duplicate cmd.nodeName") {
+    withTable("temptable") {
+      val df = sql("create table temptable using parquet as select * from range(2)")
+      withNormalizedExplain(df, extended = false) { normalizedOutput =>
+        assert("Create\\w*?TableAsSelectCommand".r.findAllMatchIn(normalizedOutput).length == 1)
+      }
+    }
+  }
 }
 
 case class ExplainSingleData(id: Int)

From 34db5f565271ad9d9d5134a4e914c10f1677a38a Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Fri, 18 Jan 2019 12:47:36 +0100
Subject: [PATCH 0320/1072] [SPARK-26618][SQL] Make typed Timestamp/Date
 literals consistent to casting

## What changes were proposed in this pull request?

In the PR, I propose to make creation of typed Literals `TIMESTAMP` and `DATE` consistent to the `Cast` expression. More precisely, reusing the `Cast` expression in the type constructors. In this way, it allows:
- To use the same calendar in parsing methods
- To support the same set of timestamp/date patterns

For example, creating timestamp literal:
```sql
SELECT TIMESTAMP '2019-01-14 20:54:00.000'
```
behaves similarly as casting the string literal:
```sql
SELECT CAST('2019-01-14 20:54:00.000' AS TIMESTAMP)
```

## How was this patch tested?

This was tested by `SQLQueryTestSuite` as well as `ExpressionParserSuite`.

Closes #23541 from MaxGekk/timestamp-date-constructors.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      | 15 ++++---
 .../parser/ExpressionParserSuite.scala        | 42 +++++++++++++++++++
 .../sql-tests/results/literals.sql.out        |  4 +-
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a27c6d3c3671..24bbe116ad89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.parser
 
-import java.sql.{Date, Timestamp}
 import java.util.Locale
 import javax.xml.bind.DatatypeConverter
 
@@ -37,9 +36,10 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getTimeZone, stringToDate, stringToTimestamp}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 import org.apache.spark.util.random.RandomSampler
 
 /**
@@ -1552,12 +1552,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) {
     val value = string(ctx.STRING)
     val valueType = ctx.identifier.getText.toUpperCase(Locale.ROOT)
+    def toLiteral[T](f: UTF8String => Option[T], t: DataType): Literal = {
+      f(UTF8String.fromString(value)).map(Literal(_, t)).getOrElse {
+        throw new ParseException(s"Cannot parse the $valueType value: $value", ctx)
+      }
+    }
     try {
       valueType match {
-        case "DATE" =>
-          Literal(Date.valueOf(value))
+        case "DATE" => toLiteral(stringToDate, DateType)
         case "TIMESTAMP" =>
-          Literal(Timestamp.valueOf(value))
+          val timeZone = getTimeZone(SQLConf.get.sessionLocalTimeZone)
+          toLiteral(stringToTimestamp(_, timeZone), TimestampType)
         case "X" =>
           val padding = if (value.length % 2 != 0) "0" else ""
           Literal(DatatypeConverter.parseHexBinary(padding + value))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 8bcc69d580d8..7541d9d089b2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -17,12 +17,15 @@
 package org.apache.spark.sql.catalyst.parser
 
 import java.sql.{Date, Timestamp}
+import java.time.LocalDateTime
+import java.util.concurrent.TimeUnit
 
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -56,6 +59,13 @@ class ExpressionParserSuite extends PlanTest {
     }
   }
 
+  def assertEval(
+      sqlCommand: String,
+      expect: Any,
+      parser: ParserInterface = defaultParser): Unit = {
+    assert(parser.parseExpression(sqlCommand).eval() === expect)
+  }
+
   test("star expressions") {
     // Global Star
     assertEqual("*", UnresolvedStar(None))
@@ -680,4 +690,36 @@ class ExpressionParserSuite extends PlanTest {
     assertEqual("last(a ignore nulls)", Last('a, Literal(true)).toAggregateExpression())
     assertEqual("last(a)", Last('a, Literal(false)).toAggregateExpression())
   }
+
+  test("timestamp literals") {
+    DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone.getID) {
+        def toMicros(time: LocalDateTime): Long = {
+          val seconds = time.atZone(timeZone.toZoneId).toInstant.getEpochSecond
+          TimeUnit.SECONDS.toMicros(seconds)
+        }
+        assertEval(
+          sqlCommand = "TIMESTAMP '2019-01-14 20:54:00.000'",
+          expect = toMicros(LocalDateTime.of(2019, 1, 14, 20, 54)))
+        assertEval(
+          sqlCommand = "Timestamp '2000-01-01T00:55:00'",
+          expect = toMicros(LocalDateTime.of(2000, 1, 1, 0, 55)))
+        // Parsing of the string does not depend on the SQL config because the string contains
+        // time zone offset already.
+        assertEval(
+          sqlCommand = "TIMESTAMP '2019-01-16 20:50:00.567000+01:00'",
+          expect = 1547668200567000L)
+      }
+    }
+  }
+
+  test("date literals") {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        assertEval("DATE '2019-01-14'", 17910)
+        assertEval("DATE '2019-01'", 17897)
+        assertEval("DATE '2019'", 17897)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index 7f301614523b..8d8decbdaca2 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -291,7 +291,7 @@ struct<>
 -- !query 31 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-Exception parsing DATE(line 1, pos 7)
+Cannot parse the DATE value: mar 11 2016(line 1, pos 7)
 
 == SQL ==
 select date 'mar 11 2016'
@@ -313,7 +313,7 @@ struct<>
 -- !query 33 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff](line 1, pos 7)
+Cannot parse the TIMESTAMP value: 2016-33-11 20:54:00.000(line 1, pos 7)
 
 == SQL ==
 select timestamp '2016-33-11 20:54:00.000'

From 8503aa300708fd8367c665e45d317c6ba4214ab2 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 18 Jan 2019 23:53:11 +0800
Subject: [PATCH 0321/1072] [SPARK-26646][TEST][PYSPARK] Fix flaky test:
 pyspark.mllib.tests.test_streaming_algorithms
 StreamingLogisticRegressionWithSGDTests.test_training_and_prediction

## What changes were proposed in this pull request?

The test pyspark.mllib.tests.test_streaming_algorithms StreamingLogisticRegressionWithSGDTests.test_training_and_prediction looks sometimes flaky.

```
======================================================================
FAIL: test_training_and_prediction (pyspark.mllib.tests.test_streaming_algorithms.StreamingLogisticRegressionWithSGDTests)
Test that the model improves on toy data with no. of batches
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/mllib/tests/test_streaming_algorithms.py", line 367, in test_training_and_prediction
    self._eventually(condition, timeout=60.0)
  File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/mllib/tests/test_streaming_algorithms.py", line 69, in _eventually
    lastValue = condition()
  File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/mllib/tests/test_streaming_algorithms.py", line 362, in condition
    self.assertGreater(errors[1] - errors[-1], 0.3)
AssertionError: -0.070000000000000062 not greater than 0.3

----------------------------------------------------------------------
Ran 13 tests in 198.327s

FAILED (failures=1, skipped=1)

Had test failures in pyspark.mllib.tests.test_streaming_algorithms with python3.4; see logs
```

The predict stream can possibly be consumed to the end before the input stream. When it happens, the model improvement is not high as expected and causes test failed. This patch tries to increase number of batches of streams. This won't increase test time because we have a timeout there.

## How was this patch tested?

Manually test.

Closes #23586 from viirya/SPARK-26646.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/mllib/tests/test_streaming_algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py
index bf2ad2d267bb..cab30104933a 100644
--- a/python/pyspark/mllib/tests/test_streaming_algorithms.py
+++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py
@@ -334,7 +334,7 @@ def test_training_and_prediction(self):
         """Test that the model improves on toy data with no. of batches"""
         input_batches = [
             self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
-            for i in range(20)]
+            for i in range(40)]
         predict_batches = [
             b.map(lambda lp: (lp.label, lp.features)) for b in input_batches]
 

From 64cc9e572e0213d5dea241b2b48ecdd68a5c6c99 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 18 Jan 2019 23:57:04 -0800
Subject: [PATCH 0322/1072] [SPARK-26477][CORE] Use ConfigEntry for hardcoded
 configs for unsafe category

## What changes were proposed in this pull request?

The PR makes hardcoded `spark.unsafe` configs to use ConfigEntry and put them in the `config` package.

## How was this patch tested?

Existing UTs

Closes #23412 from kiszk/SPARK-26477.

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../unsafe/sort/UnsafeSorterSpillReader.java  | 35 ++++++++-----------
 .../org/apache/spark/executor/Executor.scala  |  2 +-
 .../spark/internal/config/package.scala       | 21 +++++++++++
 .../scala/org/apache/spark/FailureSuite.scala |  3 +-
 .../org/apache/spark/ml/util/MLTest.scala     |  3 +-
 .../spark/sql/test/SharedSparkSession.scala   |  3 +-
 .../apache/spark/sql/hive/test/TestHive.scala |  3 +-
 7 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index fb179d07edeb..99303e60e4a3 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -21,13 +21,13 @@
 import com.google.common.io.Closeables;
 import org.apache.spark.SparkEnv;
 import org.apache.spark.TaskContext;
+import org.apache.spark.internal.config.package$;
+import org.apache.spark.internal.config.ConfigEntry;
 import org.apache.spark.io.NioBufferedFileInputStream;
 import org.apache.spark.io.ReadAheadInputStream;
 import org.apache.spark.serializer.SerializerManager;
 import org.apache.spark.storage.BlockId;
 import org.apache.spark.unsafe.Platform;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.*;
 
@@ -36,9 +36,7 @@
  * of the file format).
  */
 public final class UnsafeSorterSpillReader extends UnsafeSorterIterator implements Closeable {
-  private static final Logger logger = LoggerFactory.getLogger(UnsafeSorterSpillReader.class);
-  private static final int DEFAULT_BUFFER_SIZE_BYTES = 1024 * 1024; // 1 MB
-  private static final int MAX_BUFFER_SIZE_BYTES = 16777216; // 16 mb
+  public static final int MAX_BUFFER_SIZE_BYTES = 16777216; // 16 mb
 
   private InputStream in;
   private DataInputStream din;
@@ -59,28 +57,23 @@ public UnsafeSorterSpillReader(
       File file,
       BlockId blockId) throws IOException {
     assert (file.length() > 0);
-    long bufferSizeBytes =
-        SparkEnv.get() == null ?
-            DEFAULT_BUFFER_SIZE_BYTES:
-            SparkEnv.get().conf().getSizeAsBytes("spark.unsafe.sorter.spill.reader.buffer.size",
-                                                 DEFAULT_BUFFER_SIZE_BYTES);
-    if (bufferSizeBytes > MAX_BUFFER_SIZE_BYTES || bufferSizeBytes < DEFAULT_BUFFER_SIZE_BYTES) {
-      // fall back to a sane default value
-      logger.warn("Value of config \"spark.unsafe.sorter.spill.reader.buffer.size\" = {} not in " +
-        "allowed range [{}, {}). Falling back to default value : {} bytes", bufferSizeBytes,
-        DEFAULT_BUFFER_SIZE_BYTES, MAX_BUFFER_SIZE_BYTES, DEFAULT_BUFFER_SIZE_BYTES);
-      bufferSizeBytes = DEFAULT_BUFFER_SIZE_BYTES;
-    }
+    final ConfigEntry<Object> bufferSizeConfigEntry =
+        package$.MODULE$.UNSAFE_SORTER_SPILL_READER_BUFFER_SIZE();
+    // This value must be less than or equal to MAX_BUFFER_SIZE_BYTES. Cast to int is always safe.
+    final int DEFAULT_BUFFER_SIZE_BYTES =
+        ((Long) bufferSizeConfigEntry.defaultValue().get()).intValue();
+    int bufferSizeBytes = SparkEnv.get() == null ? DEFAULT_BUFFER_SIZE_BYTES :
+        ((Long) SparkEnv.get().conf().get(bufferSizeConfigEntry)).intValue();
 
-    final boolean readAheadEnabled = SparkEnv.get() != null &&
-        SparkEnv.get().conf().getBoolean("spark.unsafe.sorter.spill.read.ahead.enabled", true);
+    final boolean readAheadEnabled = SparkEnv.get() != null && (boolean)SparkEnv.get().conf().get(
+        package$.MODULE$.UNSAFE_SORTER_SPILL_READ_AHEAD_ENABLED());
 
     final InputStream bs =
-        new NioBufferedFileInputStream(file, (int) bufferSizeBytes);
+        new NioBufferedFileInputStream(file, bufferSizeBytes);
     try {
       if (readAheadEnabled) {
         this.in = new ReadAheadInputStream(serializerManager.wrapStream(blockId, bs),
-                (int) bufferSizeBytes);
+                bufferSizeBytes);
       } else {
         this.in = serializerManager.wrapStream(blockId, bs);
       }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index a6759c41a142..a3644e713065 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -431,7 +431,7 @@ private[spark] class Executor(
 
           if (freedMemory > 0 && !threwException) {
             val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, TID = $taskId"
-            if (conf.getBoolean("spark.unsafe.exceptionOnMemoryLeak", false)) {
+            if (conf.get(UNSAFE_EXCEPTION_ON_MEMORY_LEAK)) {
               throw new SparkException(errMsg)
             } else {
               logWarning(errMsg)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 6488d5300472..1e7280005514 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -25,6 +25,7 @@ import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.storage.{DefaultTopologyMapper, RandomBlockReplicationPolicy}
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillReader.MAX_BUFFER_SIZE_BYTES
 
 package object config {
 
@@ -899,6 +900,26 @@ package object config {
       .checkValue(v => v > 0, "The max failures should be a positive value.")
       .createWithDefault(40)
 
+  private[spark] val UNSAFE_EXCEPTION_ON_MEMORY_LEAK =
+    ConfigBuilder("spark.unsafe.exceptionOnMemoryLeak")
+      .internal()
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val UNSAFE_SORTER_SPILL_READ_AHEAD_ENABLED =
+    ConfigBuilder("spark.unsafe.sorter.spill.read.ahead.enabled")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val UNSAFE_SORTER_SPILL_READER_BUFFER_SIZE =
+    ConfigBuilder("spark.unsafe.sorter.spill.reader.buffer.size")
+      .internal()
+      .bytesConf(ByteUnit.BYTE)
+      .checkValue(v => 1024 * 1024 <= v && v <= MAX_BUFFER_SIZE_BYTES,
+        s"The value must be in allowed range [1,048,576, ${MAX_BUFFER_SIZE_BYTES}].")
+      .createWithDefault(1024 * 1024)
+
   private[spark] val EXECUTOR_PLUGINS =
     ConfigBuilder("spark.executor.plugins")
       .doc("Comma-separated list of class names for \"plugins\" implementing " +
diff --git a/core/src/test/scala/org/apache/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala
index f2d97d452ddb..5f79b526a419 100644
--- a/core/src/test/scala/org/apache/spark/FailureSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark
 
 import java.io.{IOException, NotSerializableException, ObjectInputStream}
 
+import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
 import org.apache.spark.memory.TestMemoryConsumer
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.NonSerializable
@@ -144,7 +145,7 @@ class FailureSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("managed memory leak error should not mask other failures (SPARK-9266") {
-    val conf = new SparkConf().set("spark.unsafe.exceptionOnMemoryLeak", "true")
+    val conf = new SparkConf().set(UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
     sc = new SparkContext("local[1,1]", "test", conf)
 
     // If a task leaks memory but fails due to some other cause, then make sure that the original
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
index acac171346a8..514fa7f2e1b8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
@@ -22,6 +22,7 @@ import java.io.File
 import org.scalatest.Suite
 
 import org.apache.spark.{DebugFilesystem, SparkConf, SparkContext}
+import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
 import org.apache.spark.ml.{PredictionModel, Transformer}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.sql.{DataFrame, Dataset, Encoder, Row}
@@ -40,7 +41,7 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
   protected override def sparkConf = {
     new SparkConf()
       .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
-      .set("spark.unsafe.exceptionOnMemoryLeak", "true")
+      .set(UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
       .set(SQLConf.CODEGEN_FALLBACK.key, "false")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
index e7e0ce64963a..8734639edaa6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
@@ -23,6 +23,7 @@ import org.scalatest.{BeforeAndAfterEach, Suite}
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.{DebugFilesystem, SparkConf}
+import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
 import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.internal.SQLConf
@@ -38,7 +39,7 @@ trait SharedSparkSession
   protected def sparkConf = {
     new SparkConf()
       .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
-      .set("spark.unsafe.exceptionOnMemoryLeak", "true")
+      .set(UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
       .set(SQLConf.CODEGEN_FALLBACK.key, "false")
       // Disable ConvertToLocalRelation for better test coverage. Test cases built on
       // LocalRelation will exercise the optimization rules better by disabling it as
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 23dd350d4b2c..1db57b76ac24 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
@@ -61,7 +62,7 @@ object TestHive
         .set("spark.sql.warehouse.dir", TestHiveContext.makeWarehouseDir().toURI.getPath)
         // SPARK-8910
         .set(UI_ENABLED, false)
-        .set("spark.unsafe.exceptionOnMemoryLeak", "true")
+        .set(config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
         // Disable ConvertToLocalRelation for better test coverage. Test cases built on
         // LocalRelation will exercise the optimization rules better by disabling it as
         // this rule may potentially block testing of other optimization rules such as

From ace23642967eacbb2252d9f9fa447769de6f700c Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Sat, 19 Jan 2019 08:54:55 -0600
Subject: [PATCH 0323/1072] [MINOR][TEST] Correct some unit test mistakes

## What changes were proposed in this pull request?

Correct some unit test mistakes.

## How was this patch tested?
N/A

Closes #23583 from 10110346/unused_symbol.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala | 2 +-
 .../test/scala/org/apache/spark/sql/sources/InsertSuite.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index fb8239ea34bb..ab8294838e75 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -476,7 +476,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
 
     val keyValue1 = ds.groupByKey(t => (t._1, "key")).mapValues(t => (t._2, "value"))
     val agged1 = keyValue1.mapGroups { case (g, iter) => (g._1, iter.map(_._1).sum) }
-    checkDataset(agged, ("a", 30), ("b", 3), ("c", 1))
+    checkDataset(agged1, ("a", 30), ("b", 3), ("c", 1))
   }
 
   test("groupBy function, reduce") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 0b6d93975dae..4f1ae069d4b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -170,7 +170,7 @@ class InsertSuite extends DataSourceTest with SharedSQLContext {
 
     // Writing the table to more part files.
     val rdd2 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str$i"}"""), 10)
-    spark.read.json(rdd1.toDS()).createOrReplaceTempView("jt2")
+    spark.read.json(rdd2.toDS()).createOrReplaceTempView("jt2")
     sql(
       s"""
          |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt2

From 6d9c54b62cee6fdf396f507caf7eb7f2e3f35b0a Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Sun, 20 Jan 2019 17:43:50 +0800
Subject: [PATCH 0324/1072] [SPARK-26645][PYTHON] Support decimals with
 negative scale when parsing datatype

## What changes were proposed in this pull request?

When parsing datatypes from the json internal representation, PySpark doesn't support decimals with negative scales. Since they are allowed and can actually happen, PySpark should be able to successfully parse them.

## How was this patch tested?

added test

Closes #23575 from mgaido91/SPARK-26645.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_types.py | 8 +++++++-
 python/pyspark/sql/types.py            | 4 +++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index fb673f2a385e..3afb88c4351f 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -24,7 +24,7 @@
 import unittest
 
 from pyspark.sql import Row
-from pyspark.sql.functions import UserDefinedFunction
+from pyspark.sql.functions import col, UserDefinedFunction
 from pyspark.sql.types import *
 from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_type_mappings, \
     _array_unsigned_int_typecode_ctype_mappings, _infer_type, _make_type_verifier, _merge_type
@@ -202,6 +202,12 @@ def test_create_dataframe_from_dict_respects_schema(self):
         df = self.spark.createDataFrame([{'a': 1}], ["b"])
         self.assertEqual(df.columns, ['b'])
 
+    def test_negative_decimal(self):
+        df = self.spark.createDataFrame([(1, ), (11, )], ["value"])
+        ret = df.select(col("value").cast(DecimalType(1, -1))).collect()
+        actual = list(map(lambda r: int(r.value), ret))
+        self.assertEqual(actual, [0, 10])
+
     def test_create_dataframe_from_objects(self):
         data = [MyObject(1, "1"), MyObject(2, "2")]
         df = self.spark.createDataFrame(data)
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 22ee5d39db38..00e90fca615f 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -752,7 +752,7 @@ def __eq__(self, other):
                           for v in [ArrayType, MapType, StructType])
 
 
-_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)")
+_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
 
 
 def _parse_datatype_string(s):
@@ -865,6 +865,8 @@ def _parse_datatype_json_string(json_string):
     >>> complex_maptype = MapType(complex_structtype,
     ...                           complex_arraytype, False)
     >>> check_datatype(complex_maptype)
+    >>> # Decimal with negative scale.
+    >>> check_datatype(DecimalType(1,-1))
     """
     return _parse_datatype_json_value(json.loads(json_string))
 

From 6c18d8d8079ac4d2d6dc7539601ab83fc5b51760 Mon Sep 17 00:00:00 2001
From: Luca Canali <luca.canali@cern.ch>
Date: Sun, 20 Jan 2019 12:43:34 -0800
Subject: [PATCH 0325/1072] [SPARK-26642][K8S] Add --num-executors option to
 spark-submit for Spark on K8S.

## What changes were proposed in this pull request?

This PR proposes to extend the spark-submit option --num-executors to be applicable to Spark on K8S too. It is motivated by convenience, for example when migrating jobs written for YARN to run on K8S.

## How was this patch tested?

Manually tested on a K8S cluster.

Author: Luca Canali <luca.canali@cern.ch>

Closes #23573 from LucaCanali/addNumExecutorsToK8s.
---
 .../main/scala/org/apache/spark/deploy/SparkSubmit.scala    | 4 ++--
 .../org/apache/spark/deploy/SparkSubmitArguments.scala      | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index b403cc44ea6d..d5e17ffb55d9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -537,14 +537,14 @@ private[spark] class SparkSubmit extends Logging {
 
       // Yarn only
       OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.queue"),
-      OptionAssigner(args.numExecutors, YARN, ALL_DEPLOY_MODES,
-        confKey = EXECUTOR_INSTANCES.key),
       OptionAssigner(args.pyFiles, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.pyFiles"),
       OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.jars"),
       OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.files"),
       OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.archives"),
 
       // Other options
+      OptionAssigner(args.numExecutors, YARN | KUBERNETES, ALL_DEPLOY_MODES,
+        confKey = EXECUTOR_INSTANCES.key),
       OptionAssigner(args.executorCores, STANDALONE | YARN | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = EXECUTOR_CORES.key),
       OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN | KUBERNETES, ALL_DEPLOY_MODES,
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index f5e4c4a6732f..9692d2a7e1ce 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -585,15 +585,15 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |                              in standalone mode).
         |
         | Spark on YARN and Kubernetes only:
+        |  --num-executors NUM         Number of executors to launch (Default: 2).
+        |                              If dynamic allocation is enabled, the initial number of
+        |                              executors will be at least NUM.
         |  --principal PRINCIPAL       Principal to be used to login to KDC.
         |  --keytab KEYTAB             The full path to the file that contains the keytab for the
         |                              principal specified above.
         |
         | Spark on YARN only:
         |  --queue QUEUE_NAME          The YARN queue to submit to (Default: "default").
-        |  --num-executors NUM         Number of executors to launch (Default: 2).
-        |                              If dynamic allocation is enabled, the initial number of
-        |                              executors will be at least NUM.
         |  --archives ARCHIVES         Comma separated list of archives to be extracted into the
         |                              working directory of each executor.
       """.stripMargin

From 9a30e23211e165a44acc0dbe19693950f7a7cc73 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Sun, 20 Jan 2019 18:11:14 -0600
Subject: [PATCH 0326/1072] [SPARK-26351][MLLIB] Update doc and minor
 correction in the mllib evaluation metrics

## What changes were proposed in this pull request?
Currently, there are some minor inconsistencies in doc compared to the code. In this PR, I am correcting those inconsistencies.
1) Links related to the evaluation metrics in the docs are not working
2) Minor correction in the evaluation metrics formulas in docs.

## How was this patch tested?

NA

Closes #23589 from shahidki31/docCorrection.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/mllib-evaluation-metrics.md              | 22 +++++++++----------
 .../mllib/evaluation/RankingMetrics.scala     |  2 ++
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
index c65ecdcb67ee..896d95bd4884 100644
--- a/docs/mllib-evaluation-metrics.md
+++ b/docs/mllib-evaluation-metrics.md
@@ -413,13 +413,13 @@ A ranking system usually deals with a set of $M$ users
 
 $$U = \left\{u_0, u_1, ..., u_{M-1}\right\}$$
 
-Each user ($u_i$) having a set of $N$ ground truth relevant documents
+Each user ($u_i$) having a set of $N_i$ ground truth relevant documents
 
-$$D_i = \left\{d_0, d_1, ..., d_{N-1}\right\}$$
+$$D_i = \left\{d_0, d_1, ..., d_{N_i-1}\right\}$$
 
-And a list of $Q$ recommended documents, in order of decreasing relevance
+And a list of $Q_i$ recommended documents, in order of decreasing relevance
 
-$$R_i = \left[r_0, r_1, ..., r_{Q-1}\right]$$
+$$R_i = \left[r_0, r_1, ..., r_{Q_i-1}\right]$$
 
 The goal of the ranking system is to produce the most relevant set of documents for each user. The relevance of the
 sets and the effectiveness of the algorithms can be measured using the metrics listed below.
@@ -439,10 +439,10 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{
         Precision at k
       </td>
       <td>
-        $p(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{k} \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} rel_{D_i}(R_i(j))}$
+        $p(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{k} \sum_{j=0}^{\text{min}(Q_i, k) - 1} rel_{D_i}(R_i(j))}$
       </td>
       <td>
-        <a href="https://en.wikipedia.org/wiki/Information_retrieval#Precision_at_K">Precision at k</a> is a measure of
+        <a href="https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Precision_at_K">Precision at k</a> is a measure of
          how many of the first k recommended documents are in the set of true relevant documents averaged across all
          users. In this metric, the order of the recommendations is not taken into account.
       </td>
@@ -450,10 +450,10 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{
     <tr>
       <td>Mean Average Precision</td>
       <td>
-        $MAP=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{\left|D_i\right|} \sum_{j=0}^{Q-1} \frac{rel_{D_i}(R_i(j))}{j + 1}}$
+        $MAP=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{N_i} \sum_{j=0}^{Q_i-1} \frac{rel_{D_i}(R_i(j))}{j + 1}}$
       </td>
       <td>
-        <a href="https://en.wikipedia.org/wiki/Information_retrieval#Mean_average_precision">MAP</a> is a measure of how
+        <a href="https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Mean_average_precision">MAP</a> is a measure of how
          many of the recommended documents are in the set of true relevant documents, where the
         order of the recommendations is taken into account (i.e. penalty for highly relevant documents is higher).
       </td>
@@ -462,10 +462,10 @@ $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{
       <td>Normalized Discounted Cumulative Gain</td>
       <td>
         $NDCG(k)=\frac{1}{M} \sum_{i=0}^{M-1} {\frac{1}{IDCG(D_i, k)}\sum_{j=0}^{n-1}
-          \frac{rel_{D_i}(R_i(j))}{\text{ln}(j+2)}} \\
+          \frac{rel_{D_i}(R_i(j))}{\text{log}(j+2)}} \\
         \text{Where} \\
-        \hspace{5 mm} n = \text{min}\left(\text{max}\left(|R_i|,|D_i|\right),k\right) \\
-        \hspace{5 mm} IDCG(D, k) = \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} \frac{1}{\text{ln}(j+2)}$
+        \hspace{5 mm} n = \text{min}\left(\text{max}\left(Q_i, N_i\right),k\right) \\
+        \hspace{5 mm} IDCG(D, k) = \sum_{j=0}^{\text{min}(\left|D\right|, k) - 1} \frac{1}{\text{log}(j+2)}$
       </td>
       <td>
         <a href="https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG">NDCG at k</a> is a
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index b98aa0534152..4935d1141113 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -138,6 +138,8 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
         var dcg = 0.0
         var i = 0
         while (i < n) {
+          // Base of the log doesn't matter for calculating NDCG,
+          // if the relevance value is binary.
           val gain = 1.0 / math.log(i + 2)
           if (i < pred.length && labSet.contains(pred(i))) {
             dcg += gain

From 4c1cd809f812fbd208bf09c5f4d92641ecd17605 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Tue, 22 Jan 2019 02:24:12 +0800
Subject: [PATCH 0327/1072] [SPARK-26652][SQL] Remove fromJSON and fromString
 from Literal

## What changes were proposed in this pull request?

The `fromString` and `fromJSON` methods of the `Literal` object are removed because they are not used.

Closes #23596

Closes #23603 from MaxGekk/remove-literal-fromstring.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/expressions/literals.scala   | 34 -------------------
 .../expressions/LiteralExpressionSuite.scala  | 21 ------------
 2 files changed, 55 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 48beffa18a55..d7ee22fe1bbe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -126,40 +126,6 @@ object Literal {
   def fromObject(obj: Any, objType: DataType): Literal = new Literal(obj, objType)
   def fromObject(obj: Any): Literal = new Literal(obj, ObjectType(obj.getClass))
 
-  def fromJSON(json: JValue): Literal = {
-    val dataType = DataType.parseDataType(json \ "dataType")
-    json \ "value" match {
-      case JNull => Literal.create(null, dataType)
-      case JString(str) => fromString(str, dataType)
-      case other => sys.error(s"$other is not a valid Literal json value")
-    }
-  }
-
-  /**
-   * Constructs a Literal from a String
-   */
-  def fromString(str: String, dataType: DataType): Literal = {
-    val value = dataType match {
-      case BooleanType => str.toBoolean
-      case ByteType => str.toByte
-      case ShortType => str.toShort
-      case IntegerType => str.toInt
-      case LongType => str.toLong
-      case FloatType => str.toFloat
-      case DoubleType => str.toDouble
-      case StringType => UTF8String.fromString(str)
-      case DateType => java.sql.Date.valueOf(str)
-      case TimestampType => java.sql.Timestamp.valueOf(str)
-      case CalendarIntervalType => CalendarInterval.fromString(str)
-      case t: DecimalType =>
-        val d = Decimal(str)
-        assert(d.changePrecision(t.precision, t.scale))
-        d
-      case _ => null
-    }
-    Literal.create(value, dataType)
-  }
-
   def create(v: Any, dataType: DataType): Literal = {
     Literal(CatalystTypeConverters.convertToCatalyst(v), dataType)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 133aaa449ea4..995d7b455765 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -228,25 +228,4 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Literal('\u0000'), "\u0000")
     checkEvaluation(Literal.create('\n'), "\n")
   }
-
-  test("fromString converts String/DataType input correctly") {
-    checkEvaluation(Literal.fromString(false.toString, BooleanType), false)
-    checkEvaluation(Literal.fromString(null, NullType), null)
-    checkEvaluation(Literal.fromString(Int.MaxValue.toByte.toString, ByteType), Int.MaxValue.toByte)
-    checkEvaluation(Literal.fromString(Short.MaxValue.toShort.toString, ShortType), Short.MaxValue
-      .toShort)
-    checkEvaluation(Literal.fromString(Int.MaxValue.toString, IntegerType), Int.MaxValue)
-    checkEvaluation(Literal.fromString(Long.MaxValue.toString, LongType), Long.MaxValue)
-    checkEvaluation(Literal.fromString(Float.MaxValue.toString, FloatType), Float.MaxValue)
-    checkEvaluation(Literal.fromString(Double.MaxValue.toString, DoubleType), Double.MaxValue)
-    checkEvaluation(Literal.fromString("1.23456", DecimalType(10, 5)), Decimal(1.23456))
-    checkEvaluation(Literal.fromString("Databricks", StringType), "Databricks")
-    val dateString = "1970-01-01"
-    checkEvaluation(Literal.fromString(dateString, DateType), java.sql.Date.valueOf(dateString))
-    val timestampString = "0000-01-01 00:00:00"
-    checkEvaluation(Literal.fromString(timestampString, TimestampType),
-      java.sql.Timestamp.valueOf(timestampString))
-    val calInterval = new CalendarInterval(1, 1)
-    checkEvaluation(Literal.fromString(calInterval.toString, CalendarIntervalType), calInterval)
-  }
 }

From 75d84498a4e649dba0b8c9ec35965066246d1bd2 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 21 Jan 2019 14:27:17 -0800
Subject: [PATCH 0328/1072] [SPARK-26676][PYTHON] Make
 HiveContextSQLTests.test_unbounded_frames test compatible with Python 2 and
 PyPy

## What changes were proposed in this pull request?

This particular test is being skipped at PyPy and Python 2.

```
Skipped tests in pyspark.sql.tests.test_context with pypy:
    test_unbounded_frames (pyspark.sql.tests.test_context.HiveContextSQLTests) ... skipped "Unittest < 3.3 doesn't support mocking"

Skipped tests in pyspark.sql.tests.test_context with python2.7:
    test_unbounded_frames (pyspark.sql.tests.test_context.HiveContextSQLTests) ... skipped "Unittest < 3.3 doesn't support mocking"
```

We don't have to use unittest 3.3 module to mock. And looks the test itself isn't compatible with Python 2.

This PR makes:
 - Manually monkey-patch `sys.maxsize` to get rid of unittest 3.3 condition
 - Use the built-in `reload` in Python 2, and `importlib.reload` in Python 3

## How was this patch tested?

Manually tested, and unit test is fixed.

Closes #23604 from HyukjinKwon/test-window.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 python/pyspark/sql/tests/test_context.py | 36 +++++++++++-------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index 918f4ad2d62f..dc81426724e5 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -19,6 +19,11 @@
 import sys
 import tempfile
 import unittest
+try:
+    from importlib import reload  # Python 3.4+ only.
+except ImportError:
+    # Otherwise, we will stick to Python 2's built-in reload.
+    pass
 
 import py4j
 
@@ -216,12 +221,9 @@ def test_datetime_functions(self):
         parse_result = df.select(functions.to_date(functions.col("dateCol"))).first()
         self.assertEquals(date(2017, 1, 22), parse_result['to_date(`dateCol`)'])
 
-    @unittest.skipIf(sys.version_info < (3, 3), "Unittest < 3.3 doesn't support mocking")
     def test_unbounded_frames(self):
-        from unittest.mock import patch
         from pyspark.sql import functions as F
         from pyspark.sql import window
-        import importlib
 
         df = self.spark.range(0, 3)
 
@@ -235,22 +237,18 @@ def range_frame_match():
                 F.count("*").over(window.Window.rangeBetween(-sys.maxsize, sys.maxsize))
             ).columns[0]
 
-        with patch("sys.maxsize", 2 ** 31 - 1):
-            importlib.reload(window)
-            self.assertTrue(rows_frame_match())
-            self.assertTrue(range_frame_match())
-
-        with patch("sys.maxsize", 2 ** 63 - 1):
-            importlib.reload(window)
-            self.assertTrue(rows_frame_match())
-            self.assertTrue(range_frame_match())
-
-        with patch("sys.maxsize", 2 ** 127 - 1):
-            importlib.reload(window)
-            self.assertTrue(rows_frame_match())
-            self.assertTrue(range_frame_match())
-
-        importlib.reload(window)
+        for new_maxsize in [2 ** 31 - 1, 2 ** 63 - 1, 2 ** 127 - 1]:
+            old_maxsize = sys.maxsize
+            sys.maxsize = new_maxsize
+            try:
+                # Manually reload window module to use monkey-patched sys.maxsize.
+                reload(window)
+                self.assertTrue(rows_frame_match())
+                self.assertTrue(range_frame_match())
+            finally:
+                sys.maxsize = old_maxsize
+
+        reload(window)
 
 
 if __name__ == "__main__":

From 098a2c41fcc46ba079106b2c9eb31197c8cb3bc9 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 21 Jan 2019 14:29:12 -0800
Subject: [PATCH 0329/1072] [SPARK-26520][SQL] data source v2 API refactor
 (micro-batch read)

## What changes were proposed in this pull request?

Following https://github.com/apache/spark/pull/23086, this PR does the API refactor for micro-batch read, w.r.t. the [doc](https://docs.google.com/document/d/1uUmKCpWLdh9vHxP7AWJ9EgbwB_U6T3EJYNjhISGmiQg/edit?usp=sharing)

The major changes:
1. rename `XXXMicroBatchReadSupport` to `XXXMicroBatchReadStream`
2. implement `TableProvider`, `Table`, `ScanBuilder` and `Scan` for streaming sources
3. at the beginning of micro-batch streaming execution, convert `StreamingRelationV2` to `StreamingDataSourceV2Relation` directly, instead of `StreamingExecutionRelation`.

followup:
support operator pushdown for stream sources

## How was this patch tested?

existing tests

Closes #23430 from cloud-fan/micro-batch.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 ...port.scala => KafkaMicroBatchStream.scala} |  28 ++---
 .../sql/kafka010/KafkaSourceProvider.scala    |  86 ++++++++------
 .../kafka010/KafkaContinuousSourceSuite.scala |   4 +-
 .../sql/kafka010/KafkaContinuousTest.scala    |   4 +-
 .../kafka010/KafkaMicroBatchSourceSuite.scala |  28 ++---
 .../v2/MicroBatchReadSupportProvider.java     |  70 ------------
 .../sources/v2/SupportsMicroBatchRead.java    |  34 ++++++
 .../spark/sql/sources/v2/reader/Scan.java     |  18 +++
 .../streaming/MicroBatchReadSupport.java      |  60 ----------
 .../v2/reader/streaming/MicroBatchStream.java |  57 ++++++++++
 .../sources/v2/reader/streaming/Offset.java   |   2 +-
 .../v2/reader/streaming/SparkDataStream.java  |  52 +++++++++
 .../streaming/StreamingReadSupport.java       |   2 +-
 ...canExec.scala => ContinuousScanExec.scala} |  12 +-
 .../datasources/v2/DataSourceV2Relation.scala |  27 ++++-
 .../datasources/v2/DataSourceV2Strategy.scala |  16 ++-
 .../datasources/v2/MicroBatchScanExec.scala   |  92 +++++++++++++++
 .../streaming/MicroBatchExecution.scala       | 105 +++++++++---------
 .../streaming/ProgressReporter.scala          |  18 +--
 .../streaming/StreamingRelation.scala         |   3 +-
 .../continuous/ContinuousExecution.scala      |  10 +-
 .../sql/execution/streaming/memory.scala      |  64 ++++++++---
 .../sources/ContinuousMemoryStream.scala      |   3 +-
 ...cala => RateControlMicroBatchStream.scala} |   6 +-
 ...scala => RateStreamMicroBatchStream.scala} |  45 +++-----
 .../sources/RateStreamProvider.scala          |  72 +++++++-----
 ...scala => TextSocketMicroBatchStream.scala} |  94 +++-------------
 .../sources/TextSocketSourceProvider.scala    | 104 +++++++++++++++++
 .../sql/streaming/DataStreamReader.scala      |  98 ++++++++--------
 .../sources/RateStreamProviderSuite.scala     |  88 +++++++--------
 .../sources/TextSocketStreamSuite.scala       |  33 +++---
 .../spark/sql/streaming/StreamSuite.scala     |  21 ++--
 .../spark/sql/streaming/StreamTest.scala      |  18 ++-
 .../StreamingQueryManagerSuite.scala          |   9 +-
 .../sql/streaming/StreamingQuerySuite.scala   |   6 +-
 .../continuous/ContinuousSuite.scala          |   4 +-
 .../sources/StreamingDataSourceV2Suite.scala  |  73 ++++++++----
 37 files changed, 863 insertions(+), 603 deletions(-)
 rename external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/{KafkaMicroBatchReadSupport.scala => KafkaMicroBatchStream.scala} (93%)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/MicroBatchReadSupportProvider.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchReadSupport.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/{DataSourceV2StreamingScanExec.scala => ContinuousScanExec.scala} (91%)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/{RateControlMicroBatchReadSupport.scala => RateControlMicroBatchStream.scala} (86%)
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/{RateStreamMicroBatchReadSupport.scala => RateStreamMicroBatchStream.scala} (82%)
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/{socket.scala => TextSocketMicroBatchStream.scala} (62%)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
similarity index 93%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala
rename to external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 1c1d26a901b4..3ae9bd3399f4 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchReadSupport.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -30,17 +30,16 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.execution.streaming.{HDFSMetadataLog, SerializedOffset, SimpleStreamingScanConfig, SimpleStreamingScanConfigBuilder}
-import org.apache.spark.sql.execution.streaming.sources.RateControlMicroBatchReadSupport
+import org.apache.spark.sql.execution.streaming.{HDFSMetadataLog, SerializedOffset}
+import org.apache.spark.sql.execution.streaming.sources.RateControlMicroBatchStream
 import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
 import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.util.UninterruptibleThread
 
 /**
- * A [[MicroBatchReadSupport]] that reads data from Kafka.
+ * A [[MicroBatchStream]] that reads data from Kafka.
  *
  * The [[KafkaSourceOffset]] is the custom [[Offset]] defined for this source that contains
  * a map of TopicPartition -> offset. Note that this offset is 1 + (available offset). For
@@ -55,13 +54,13 @@ import org.apache.spark.util.UninterruptibleThread
  * To avoid this issue, you should make sure stopping the query before stopping the Kafka brokers
  * and not use wrong broker addresses.
  */
-private[kafka010] class KafkaMicroBatchReadSupport(
+private[kafka010] class KafkaMicroBatchStream(
     kafkaOffsetReader: KafkaOffsetReader,
     executorKafkaParams: ju.Map[String, Object],
     options: DataSourceOptions,
     metadataPath: String,
     startingOffsets: KafkaOffsetRangeLimit,
-    failOnDataLoss: Boolean) extends RateControlMicroBatchReadSupport with Logging {
+    failOnDataLoss: Boolean) extends RateControlMicroBatchStream with Logging {
 
   private val pollTimeoutMs = options.getLong(
     "kafkaConsumer.pollTimeoutMs",
@@ -94,16 +93,9 @@ private[kafka010] class KafkaMicroBatchReadSupport(
     endPartitionOffsets
   }
 
-  override def fullSchema(): StructType = KafkaOffsetReader.kafkaSchema
-
-  override def newScanConfigBuilder(start: Offset, end: Offset): ScanConfigBuilder = {
-    new SimpleStreamingScanConfigBuilder(fullSchema(), start, Some(end))
-  }
-
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-    val sc = config.asInstanceOf[SimpleStreamingScanConfig]
-    val startPartitionOffsets = sc.start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
-    val endPartitionOffsets = sc.end.get.asInstanceOf[KafkaSourceOffset].partitionToOffsets
+  override def planInputPartitions(start: Offset, end: Offset): Array[InputPartition] = {
+    val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
+    val endPartitionOffsets = end.asInstanceOf[KafkaSourceOffset].partitionToOffsets
 
     // Find the new partitions, and get their earliest offsets
     val newPartitions = endPartitionOffsets.keySet.diff(startPartitionOffsets.keySet)
@@ -168,7 +160,7 @@ private[kafka010] class KafkaMicroBatchReadSupport(
     }.toArray
   }
 
-  override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
+  override def createReaderFactory(): PartitionReaderFactory = {
     KafkaMicroBatchReaderFactory
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index b59f21ab130a..58c90b897091 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -31,6 +31,8 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
+import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
@@ -47,7 +49,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     with CreatableRelationProvider
     with StreamingWriteSupportProvider
     with ContinuousReadSupportProvider
-    with MicroBatchReadSupportProvider
+    with TableProvider
     with Logging {
   import KafkaSourceProvider._
 
@@ -101,40 +103,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       failOnDataLoss(caseInsensitiveParams))
   }
 
-  /**
-   * Creates a [[org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReadSupport]] to read
-   * batches of Kafka data in a micro-batch streaming query.
-   */
-  override def createMicroBatchReadSupport(
-      metadataPath: String,
-      options: DataSourceOptions): KafkaMicroBatchReadSupport = {
-
-    val parameters = options.asMap().asScala.toMap
-    validateStreamOptions(parameters)
-    // Each running query should use its own group id. Otherwise, the query may be only assigned
-    // partial data since Kafka will assign partitions to multiple consumers having the same group
-    // id. Hence, we should generate a unique id for each query.
-    val uniqueGroupId = streamingUniqueGroupId(parameters, metadataPath)
-
-    val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
-    val specifiedKafkaParams = convertToSpecifiedParams(parameters)
-
-    val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(caseInsensitiveParams,
-      STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
-
-    val kafkaOffsetReader = new KafkaOffsetReader(
-      strategy(caseInsensitiveParams),
-      kafkaParamsForDriver(specifiedKafkaParams),
-      parameters,
-      driverGroupIdPrefix = s"$uniqueGroupId-driver")
-
-    new KafkaMicroBatchReadSupport(
-      kafkaOffsetReader,
-      kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId),
-      options,
-      metadataPath,
-      startingStreamOffsets,
-      failOnDataLoss(caseInsensitiveParams))
+  override def getTable(options: DataSourceOptions): KafkaTable = {
+    new KafkaTable(strategy(options.asMap().asScala.toMap))
   }
 
   /**
@@ -434,6 +404,52 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       logWarning("maxOffsetsPerTrigger option ignored in batch queries")
     }
   }
+
+  class KafkaTable(strategy: => ConsumerStrategy) extends Table
+    with SupportsMicroBatchRead {
+
+    override def name(): String = s"Kafka $strategy"
+
+    override def schema(): StructType = KafkaOffsetReader.kafkaSchema
+
+    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new ScanBuilder {
+      override def build(): Scan = new KafkaScan(options)
+    }
+  }
+
+  class KafkaScan(options: DataSourceOptions) extends Scan {
+
+    override def readSchema(): StructType = KafkaOffsetReader.kafkaSchema
+
+    override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
+      val parameters = options.asMap().asScala.toMap
+      validateStreamOptions(parameters)
+      // Each running query should use its own group id. Otherwise, the query may be only assigned
+      // partial data since Kafka will assign partitions to multiple consumers having the same group
+      // id. Hence, we should generate a unique id for each query.
+      val uniqueGroupId = streamingUniqueGroupId(parameters, checkpointLocation)
+
+      val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
+      val specifiedKafkaParams = convertToSpecifiedParams(parameters)
+
+      val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
+        caseInsensitiveParams, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+
+      val kafkaOffsetReader = new KafkaOffsetReader(
+        strategy(parameters),
+        kafkaParamsForDriver(specifiedKafkaParams),
+        parameters,
+        driverGroupIdPrefix = s"$uniqueGroupId-driver")
+
+      new KafkaMicroBatchStream(
+        kafkaOffsetReader,
+        kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId),
+        options,
+        checkpointLocation,
+        startingStreamOffsets,
+        failOnDataLoss(caseInsensitiveParams))
+    }
+  }
 }
 
 private[kafka010] object KafkaSourceProvider extends Logging {
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
index 9ba066a4cdc3..2f7fd7f7d47b 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.kafka010
 import org.apache.kafka.clients.producer.ProducerRecord
 
 import org.apache.spark.sql.Dataset
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2StreamingScanExec
+import org.apache.spark.sql.execution.datasources.v2.ContinuousScanExec
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.streaming.Trigger
 
@@ -208,7 +208,7 @@ class KafkaContinuousSourceTopicDeletionSuite extends KafkaContinuousTest {
         eventually(timeout(streamingTimeout)) {
           assert(
             query.lastExecution.executedPlan.collectFirst {
-              case scan: DataSourceV2StreamingScanExec
+              case scan: ContinuousScanExec
                 if scan.readSupport.isInstanceOf[KafkaContinuousReadSupport] =>
                 scan.scanConfig.asInstanceOf[KafkaContinuousScanConfig]
             }.exists { config =>
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
index 5549e821be75..fa3b623586aa 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import org.apache.spark.SparkContext
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart}
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2StreamingScanExec
+import org.apache.spark.sql.execution.datasources.v2.ContinuousScanExec
 import org.apache.spark.sql.execution.streaming.StreamExecution
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.streaming.Trigger
@@ -47,7 +47,7 @@ trait KafkaContinuousTest extends KafkaSourceTest {
     eventually(timeout(streamingTimeout)) {
       assert(
         query.lastExecution.executedPlan.collectFirst {
-          case scan: DataSourceV2StreamingScanExec
+          case scan: ContinuousScanExec
               if scan.readSupport.isInstanceOf[KafkaContinuousReadSupport] =>
             scan.scanConfig.asInstanceOf[KafkaContinuousScanConfig]
         }.exists(_.knownPartitions.size == newCount),
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index cb453846134e..90b501573a95 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -35,7 +35,7 @@ import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.{Dataset, ForeachWriter, SparkSession}
-import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
+import org.apache.spark.sql.execution.datasources.v2.{OldStreamingDataSourceV2Relation, StreamingDataSourceV2Relation}
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
@@ -118,11 +118,13 @@ abstract class KafkaSourceTest extends StreamTest with SharedSQLContext with Kaf
       val sources: Seq[BaseStreamingSource] = {
         query.get.logicalPlan.collect {
           case StreamingExecutionRelation(source: KafkaSource, _) => source
-          case StreamingExecutionRelation(source: KafkaMicroBatchReadSupport, _) => source
+          case r: StreamingDataSourceV2Relation
+              if r.stream.isInstanceOf[KafkaMicroBatchStream] =>
+            r.stream.asInstanceOf[KafkaMicroBatchStream]
         } ++ (query.get.lastExecution match {
           case null => Seq()
           case e => e.logical.collect {
-            case r: StreamingDataSourceV2Relation
+            case r: OldStreamingDataSourceV2Relation
                 if r.readSupport.isInstanceOf[KafkaContinuousReadSupport] =>
               r.readSupport.asInstanceOf[KafkaContinuousReadSupport]
           }
@@ -1062,9 +1064,10 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
     testStream(kafka)(
       makeSureGetOffsetCalled,
       AssertOnQuery { query =>
-        query.logicalPlan.collect {
-          case StreamingExecutionRelation(_: KafkaMicroBatchReadSupport, _) => true
-        }.nonEmpty
+        query.logicalPlan.find {
+          case r: StreamingDataSourceV2Relation => r.stream.isInstanceOf[KafkaMicroBatchStream]
+          case _ => false
+        }.isDefined
       }
     )
   }
@@ -1088,13 +1091,12 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
           "kafka.bootstrap.servers" -> testUtils.brokerAddress,
           "subscribe" -> topic
         ) ++ Option(minPartitions).map { p => "minPartitions" -> p}
-        val readSupport = provider.createMicroBatchReadSupport(
-          dir.getAbsolutePath, new DataSourceOptions(options.asJava))
-        val config = readSupport.newScanConfigBuilder(
+        val dsOptions = new DataSourceOptions(options.asJava)
+        val table = provider.getTable(dsOptions)
+        val stream = table.newScanBuilder(dsOptions).build().toMicroBatchStream(dir.getAbsolutePath)
+        val inputPartitions = stream.planInputPartitions(
           KafkaSourceOffset(Map(tp -> 0L)),
-          KafkaSourceOffset(Map(tp -> 100L))).build()
-        val inputPartitions = readSupport.planInputPartitions(config)
-          .map(_.asInstanceOf[KafkaMicroBatchInputPartition])
+          KafkaSourceOffset(Map(tp -> 100L))).map(_.asInstanceOf[KafkaMicroBatchInputPartition])
         withClue(s"minPartitions = $minPartitions generated factories $inputPartitions\n\t") {
           assert(inputPartitions.size == numPartitionsGenerated)
           inputPartitions.foreach { f => assert(f.reuseKafkaConsumer == reusesConsumers) }
@@ -1410,7 +1412,7 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
     val reader = spark
       .readStream
       .format("kafka")
-      .option("startingOffsets", s"latest")
+      .option("startingOffsets", "latest")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("kafka.metadata.max.age.ms", "1")
       .option("failOnDataLoss", failOnDataLoss.toString)
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/MicroBatchReadSupportProvider.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/MicroBatchReadSupportProvider.java
deleted file mode 100644
index c4d9ef88f607..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/MicroBatchReadSupportProvider.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils;
-import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReadSupport;
-import org.apache.spark.sql.types.StructType;
-
-/**
- * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
- * provide data reading ability for micro-batch stream processing.
- *
- * This interface is used to create {@link MicroBatchReadSupport} instances when end users run
- * {@code SparkSession.readStream.format(...).option(...).load()} with a micro-batch trigger.
- */
-@Evolving
-public interface MicroBatchReadSupportProvider extends DataSourceV2 {
-
-  /**
-   * Creates a {@link MicroBatchReadSupport} instance to scan the data from this streaming data
-   * source with a user specified schema, which is called by Spark at the beginning of each
-   * micro-batch streaming query.
-   *
-   * By default this method throws {@link UnsupportedOperationException}, implementations should
-   * override this method to handle user specified schema.
-   *
-   * @param schema the user provided schema.
-   * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
-   *                           recovery. Readers for the same logical source in the same query
-   *                           will be given the same checkpointLocation.
-   * @param options the options for the returned data source reader, which is an immutable
-   *                case-insensitive string-to-string map.
-   */
-  default MicroBatchReadSupport createMicroBatchReadSupport(
-      StructType schema,
-      String checkpointLocation,
-      DataSourceOptions options) {
-    return DataSourceV2Utils.failForUserSpecifiedSchema(this);
-  }
-
-  /**
-   * Creates a {@link MicroBatchReadSupport} instance to scan the data from this streaming data
-   * source, which is called by Spark at the beginning of each micro-batch streaming query.
-   *
-   * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
-   *                           recovery. Readers for the same logical source in the same query
-   *                           will be given the same checkpointLocation.
-   * @param options the options for the returned data source reader, which is an immutable
-   *                case-insensitive string-to-string map.
-   */
-  MicroBatchReadSupport createMicroBatchReadSupport(
-      String checkpointLocation,
-      DataSourceOptions options);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
new file mode 100644
index 000000000000..9408e323f9da
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+
+/**
+ * An empty mix-in interface for {@link Table}, to indicate this table supports streaming scan with
+ * micro-batch mode.
+ * <p>
+ * If a {@link Table} implements this interface, the
+ * {@link SupportsRead#newScanBuilder(DataSourceOptions)} must return a {@link ScanBuilder} that
+ * builds {@link Scan} with {@link Scan#toMicroBatchStream(String)} implemented.
+ * </p>
+ */
+@Evolving
+public interface SupportsMicroBatchRead extends SupportsRead { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
index 4d84fb19aa02..c60fb2ba0b0b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
@@ -18,8 +18,10 @@
 package org.apache.spark.sql.sources.v2.reader;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.sources.v2.SupportsBatchRead;
+import org.apache.spark.sql.sources.v2.SupportsMicroBatchRead;
 import org.apache.spark.sql.sources.v2.Table;
 
 /**
@@ -65,4 +67,20 @@ default String description() {
   default Batch toBatch() {
     throw new UnsupportedOperationException("Batch scans are not supported");
   }
+
+  /**
+   * Returns the physical representation of this scan for streaming query with micro-batch mode. By
+   * default this method throws exception, data sources must overwrite this method to provide an
+   * implementation, if the {@link Table} that creates this scan implements
+   * {@link SupportsMicroBatchRead}.
+   *
+   * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
+   *                           recovery. Data streams for the same logical source in the same query
+   *                           will be given the same checkpointLocation.
+   *
+   * @throws UnsupportedOperationException
+   */
+  default MicroBatchStream toMicroBatchStream(String checkpointLocation) {
+    throw new UnsupportedOperationException("Micro-batch scans are not supported");
+  }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchReadSupport.java
deleted file mode 100644
index f56066c63938..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchReadSupport.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.reader.streaming;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.streaming.BaseStreamingSource;
-import org.apache.spark.sql.sources.v2.reader.*;
-
-/**
- * An interface that defines how to scan the data from data source for micro-batch streaming
- * processing.
- *
- * The execution engine will get an instance of this interface from a data source provider
- * (e.g. {@link org.apache.spark.sql.sources.v2.MicroBatchReadSupportProvider}) at the start of a
- * streaming query, then call {@link #newScanConfigBuilder(Offset, Offset)} and create an instance
- * of {@link ScanConfig} for each micro-batch. The {@link ScanConfig} will be used to create input
- * partitions and reader factory to scan a micro-batch with a Spark job. At the end {@link #stop()}
- * will be called when the streaming execution is completed. Note that a single query may have
- * multiple executions due to restart or failure recovery.
- */
-@Evolving
-public interface MicroBatchReadSupport extends StreamingReadSupport, BaseStreamingSource {
-
-  /**
-   * Returns a builder of {@link ScanConfig}. Spark will call this method and create a
-   * {@link ScanConfig} for each data scanning job.
-   *
-   * The builder can take some query specific information to do operators pushdown, store streaming
-   * offsets, etc., and keep these information in the created {@link ScanConfig}.
-   *
-   * This is the first step of the data scan. All other methods in {@link MicroBatchReadSupport}
-   * needs to take {@link ScanConfig} as an input.
-   */
-  ScanConfigBuilder newScanConfigBuilder(Offset start, Offset end);
-
-  /**
-   * Returns a factory, which produces one {@link PartitionReader} for one {@link InputPartition}.
-   */
-  PartitionReaderFactory createReaderFactory(ScanConfig config);
-
-  /**
-   * Returns the most recent offset available.
-   */
-  Offset latestOffset();
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
new file mode 100644
index 000000000000..2fb3957293df
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader.streaming;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.InputPartition;
+import org.apache.spark.sql.sources.v2.reader.PartitionReader;
+import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
+import org.apache.spark.sql.sources.v2.reader.Scan;
+
+/**
+ * A {@link SparkDataStream} for streaming queries with micro-batch mode.
+ */
+@Evolving
+public interface MicroBatchStream extends SparkDataStream {
+
+  /**
+   * Returns the most recent offset available.
+   */
+  Offset latestOffset();
+
+  /**
+   * Returns a list of {@link InputPartition input partitions} given the start and end offsets. Each
+   * {@link InputPartition} represents a data split that can be processed by one Spark task. The
+   * number of input partitions returned here is the same as the number of RDD partitions this scan
+   * outputs.
+   * <p>
+   * If the {@link Scan} supports filter pushdown, this stream is likely configured with a filter
+   * and is responsible for creating splits for that filter, which is not a full scan.
+   * </p>
+   * <p>
+   * This method will be called multiple times, to launch one Spark job for each micro-batch in this
+   * data stream.
+   * </p>
+   */
+  InputPartition[] planInputPartitions(Offset start, Offset end);
+
+  /**
+   * Returns a factory, which produces one {@link PartitionReader} for one {@link InputPartition}.
+   */
+  PartitionReaderFactory createReaderFactory();
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
index 6104175d2c9e..67bff0c27e8a 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
@@ -20,7 +20,7 @@
 import org.apache.spark.annotation.Evolving;
 
 /**
- * An abstract representation of progress through a {@link MicroBatchReadSupport} or
+ * An abstract representation of progress through a {@link MicroBatchStream} or
  * {@link ContinuousReadSupport}.
  * During execution, offsets provided by the data source implementation will be logged and used as
  * restart checkpoints. Each source should provide an offset implementation which the source can use
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
new file mode 100644
index 000000000000..8ea34be8d839
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader.streaming;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.execution.streaming.BaseStreamingSource;
+
+/**
+ * The base interface representing a readable data stream in a Spark streaming query. It's
+ * responsible to manage the offsets of the streaming source in the streaming query.
+ *
+ * Data sources should implement concrete data stream interfaces: {@link MicroBatchStream}.
+ */
+@Evolving
+public interface SparkDataStream extends BaseStreamingSource {
+
+  /**
+   * Returns the initial offset for a streaming query to start reading from. Note that the
+   * streaming data source should not assume that it will start reading from its initial offset:
+   * if Spark is restarting an existing query, it will restart from the check-pointed offset rather
+   * than the initial one.
+   */
+  Offset initialOffset();
+
+  /**
+   * Deserialize a JSON string into an Offset of the implementation-defined offset type.
+   *
+   * @throws IllegalArgumentException if the JSON does not encode a valid offset for this reader
+   */
+  Offset deserializeOffset(String json);
+
+  /**
+   * Informs the source that Spark has completed processing all data for offsets less than or
+   * equal to `end` and will only request offsets greater than `end` in the future.
+   */
+  void commit(Offset end);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
index bd39fc858d3b..9a8c1bdd23be 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
@@ -23,7 +23,7 @@
 
 /**
  * A base interface for streaming read support. Data sources should implement concrete streaming
- * read support interfaces: {@link MicroBatchReadSupport} or {@link ContinuousReadSupport}.
+ * read support interfaces: {@link ContinuousReadSupport}.
  * This is exposed for a testing purpose.
  */
 @VisibleForTesting
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
similarity index 91%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
index be75fe4f596d..c735b0ef68a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StreamingScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
@@ -26,14 +26,13 @@ import org.apache.spark.sql.execution.{ColumnarBatchScan, LeafExecNode, WholeSta
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.sources.v2.DataSourceV2
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReaderFactory, ContinuousReadSupport, MicroBatchReadSupport}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReaderFactory, ContinuousReadSupport}
 
 /**
- * Physical plan node for scanning data from a data source.
+ * Physical plan node for scanning data from a streaming data source with continuous mode.
  */
-// TODO: micro-batch should be handled by `DataSourceV2ScanExec`, after we finish the API refactor
-// completely.
-case class DataSourceV2StreamingScanExec(
+// TODO: merge it and `MicroBatchScanExec`.
+case class ContinuousScanExec(
     output: Seq[AttributeReference],
     @transient source: DataSourceV2,
     @transient options: Map[String, String],
@@ -46,7 +45,7 @@ case class DataSourceV2StreamingScanExec(
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
-    case other: DataSourceV2StreamingScanExec =>
+    case other: ContinuousScanExec =>
       output == other.output && readSupport.getClass == other.readSupport.getClass &&
         options == other.options
     case _ => false
@@ -70,7 +69,6 @@ case class DataSourceV2StreamingScanExec(
   private lazy val partitions: Seq[InputPartition] = readSupport.planInputPartitions(scanConfig)
 
   private lazy val readerFactory = readSupport match {
-    case r: MicroBatchReadSupport => r.createReaderFactory(scanConfig)
     case r: ContinuousReadSupport => r.createContinuousReaderFactory(scanConfig)
     case _ => throw new IllegalStateException("unknown read support: " + readSupport)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 632157818434..63e97e67dc64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -23,11 +23,12 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.types.StructType
 
@@ -92,6 +93,28 @@ case class DataSourceV2Relation(
  * after we figure out how to apply operator push-down for streaming data sources.
  */
 case class StreamingDataSourceV2Relation(
+    output: Seq[Attribute],
+    scanDesc: String,
+    stream: SparkDataStream,
+    startOffset: Option[Offset] = None,
+    endOffset: Option[Offset] = None)
+  extends LeafNode with MultiInstanceRelation {
+
+  override def isStreaming: Boolean = true
+
+  override def newInstance(): LogicalPlan = copy(output = output.map(_.newInstance()))
+
+  override def computeStats(): Statistics = stream match {
+    case r: SupportsReportStatistics =>
+      val statistics = r.estimateStatistics()
+      Statistics(sizeInBytes = statistics.sizeInBytes().orElse(conf.defaultSizeInBytes))
+    case _ =>
+      Statistics(sizeInBytes = conf.defaultSizeInBytes)
+  }
+}
+
+// TODO: remove it after finish API refactor for continuous streaming.
+case class OldStreamingDataSourceV2Relation(
     output: Seq[AttributeReference],
     source: DataSourceV2,
     options: Map[String, String],
@@ -111,7 +134,7 @@ case class StreamingDataSourceV2Relation(
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
-    case other: StreamingDataSourceV2Relation =>
+    case other: OldStreamingDataSourceV2Relation =>
       output == other.output && readSupport.getClass == other.readSupport.getClass &&
         options == other.options
     case _ => false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 79540b024621..b4c547104c4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchStream}
 import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode
 
 object DataSourceV2Strategy extends Strategy {
@@ -125,12 +125,19 @@ object DataSourceV2Strategy extends Strategy {
       // always add the projection, which will produce unsafe rows required by some operators
       ProjectExec(project, withFilter) :: Nil
 
-    case r: StreamingDataSourceV2Relation =>
+    case r: StreamingDataSourceV2Relation if r.startOffset.isDefined && r.endOffset.isDefined =>
+      val microBatchStream = r.stream.asInstanceOf[MicroBatchStream]
+      // ensure there is a projection, which will produce unsafe rows required by some operators
+      ProjectExec(r.output,
+        MicroBatchScanExec(
+          r.output, r.scanDesc, microBatchStream, r.startOffset.get, r.endOffset.get)) :: Nil
+
+    case r: OldStreamingDataSourceV2Relation =>
       // TODO: support operator pushdown for streaming data sources.
       val scanConfig = r.scanConfigBuilder.build()
       // ensure there is a projection, which will produce unsafe rows required by some operators
       ProjectExec(r.output,
-        DataSourceV2StreamingScanExec(
+        ContinuousScanExec(
           r.output, r.source, r.options, r.pushedFilters, r.readSupport, scanConfig)) :: Nil
 
     case WriteToDataSourceV2(writer, query) =>
@@ -151,7 +158,8 @@ object DataSourceV2Strategy extends Strategy {
 
     case Repartition(1, false, child) =>
       val isContinuous = child.find {
-        case s: StreamingDataSourceV2Relation => s.readSupport.isInstanceOf[ContinuousReadSupport]
+        case s: OldStreamingDataSourceV2Relation =>
+          s.readSupport.isInstanceOf[ContinuousReadSupport]
         case _ => false
       }.isDefined
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
new file mode 100644
index 000000000000..feea8bcb80c8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
+import org.apache.spark.sql.catalyst.plans.physical
+import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
+import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.execution.{ColumnarBatchScan, LeafExecNode, WholeStageCodegenExec}
+import org.apache.spark.sql.sources.v2.reader.SupportsReportPartitioning
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+
+/**
+ * Physical plan node for scanning a micro-batch of data from a data source.
+ */
+case class MicroBatchScanExec(
+    output: Seq[Attribute],
+    scanDesc: String,
+    @transient stream: MicroBatchStream,
+    @transient start: Offset,
+    @transient end: Offset) extends LeafExecNode with ColumnarBatchScan {
+
+  override def simpleString(maxFields: Int): String = {
+    s"ScanV2${truncatedString(output, "[", ", ", "]", maxFields)} $scanDesc"
+  }
+
+  // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
+  override def equals(other: Any): Boolean = other match {
+    case other: MicroBatchScanExec => this.stream == other.stream
+    case _ => false
+  }
+
+  override def hashCode(): Int = stream.hashCode()
+
+  private lazy val partitions = stream.planInputPartitions(start, end)
+
+  private lazy val readerFactory = stream.createReaderFactory()
+
+  override def outputPartitioning: physical.Partitioning = stream match {
+    case _ if partitions.length == 1 =>
+      SinglePartition
+
+    case s: SupportsReportPartitioning =>
+      new DataSourcePartitioning(
+        s.outputPartitioning(), AttributeMap(output.map(a => a -> a.name)))
+
+    case _ => super.outputPartitioning
+  }
+
+  override def supportsBatch: Boolean = {
+    require(partitions.forall(readerFactory.supportColumnarReads) ||
+      !partitions.exists(readerFactory.supportColumnarReads),
+      "Cannot mix row-based and columnar input partitions.")
+
+    partitions.exists(readerFactory.supportColumnarReads)
+  }
+
+  private lazy val inputRDD: RDD[InternalRow] = {
+    new DataSourceRDD(sparkContext, partitions, readerFactory, supportsBatch)
+  }
+
+  override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(inputRDD)
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    if (supportsBatch) {
+      WholeStageCodegenExec(this)(codegenStageId = 0).execute()
+    } else {
+      val numOutputRows = longMetric("numOutputRows")
+      inputRDD.map { r =>
+        numOutputRows += 1
+        r
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index db1bf32a156c..64270e1f44a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -22,15 +22,15 @@ import scala.collection.mutable.{Map => MutableMap}
 
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2, WriteToDataSourceV2Exec}
-import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, RateControlMicroBatchReadSupport}
+import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, RateControlMicroBatchStream}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset => OffsetV2}
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
 
@@ -51,9 +51,6 @@ class MicroBatchExecution(
 
   @volatile protected var sources: Seq[BaseStreamingSource] = Seq.empty
 
-  private val readSupportToDataSourceMap =
-    MutableMap.empty[MicroBatchReadSupport, (DataSourceV2, Map[String, String])]
-
   private val triggerExecutor = trigger match {
     case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock)
     case OneTimeTrigger => OneTimeExecutor()
@@ -69,6 +66,7 @@ class MicroBatchExecution(
     var nextSourceId = 0L
     val toExecutionRelationMap = MutableMap[StreamingRelation, StreamingExecutionRelation]()
     val v2ToExecutionRelationMap = MutableMap[StreamingRelationV2, StreamingExecutionRelation]()
+    val v2ToRelationMap = MutableMap[StreamingRelationV2, StreamingDataSourceV2Relation]()
     // We transform each distinct streaming relation into a StreamingExecutionRelation, keeping a
     // map as we go to ensure each identical relation gets the same StreamingExecutionRelation
     // object. For each microbatch, the StreamingExecutionRelation will be replaced with a logical
@@ -90,36 +88,39 @@ class MicroBatchExecution(
           logInfo(s"Using Source [$source] from DataSourceV1 named '$sourceName' [$dataSourceV1]")
           StreamingExecutionRelation(source, output)(sparkSession)
         })
-      case s @ StreamingRelationV2(
-        dataSourceV2: MicroBatchReadSupportProvider, sourceName, options, output, _) if
-          !disabledSources.contains(dataSourceV2.getClass.getCanonicalName) =>
-        v2ToExecutionRelationMap.getOrElseUpdate(s, {
+      case s @ StreamingRelationV2(ds, dsName, table: SupportsMicroBatchRead, options, output, _)
+          if !disabledSources.contains(ds.getClass.getCanonicalName) =>
+        v2ToRelationMap.getOrElseUpdate(s, {
           // Materialize source to avoid creating it in every batch
           val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
-          val readSupport = dataSourceV2.createMicroBatchReadSupport(
-            metadataPath,
-            new DataSourceOptions(options.asJava))
           nextSourceId += 1
-          readSupportToDataSourceMap(readSupport) = dataSourceV2 -> options
-          logInfo(s"Using MicroBatchReadSupport [$readSupport] from " +
-            s"DataSourceV2 named '$sourceName' [$dataSourceV2]")
-          StreamingExecutionRelation(readSupport, output)(sparkSession)
+          logInfo(s"Reading table [$table] from DataSourceV2 named '$dsName' [$ds]")
+          val dsOptions = new DataSourceOptions(options.asJava)
+          // TODO: operator pushdown.
+          val scan = table.newScanBuilder(dsOptions).build()
+          val stream = scan.toMicroBatchStream(metadataPath)
+          StreamingDataSourceV2Relation(output, scan.description(), stream)
         })
-      case s @ StreamingRelationV2(dataSourceV2, sourceName, _, output, v1Relation) =>
+      case s @ StreamingRelationV2(ds, dsName, _, _, output, v1Relation) =>
         v2ToExecutionRelationMap.getOrElseUpdate(s, {
           // Materialize source to avoid creating it in every batch
           val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
           if (v1Relation.isEmpty) {
             throw new UnsupportedOperationException(
-              s"Data source $sourceName does not support microbatch processing.")
+              s"Data source $dsName does not support microbatch processing.")
           }
           val source = v1Relation.get.dataSource.createSource(metadataPath)
           nextSourceId += 1
-          logInfo(s"Using Source [$source] from DataSourceV2 named '$sourceName' [$dataSourceV2]")
+          logInfo(s"Using Source [$source] from DataSourceV2 named '$dsName' [$ds]")
           StreamingExecutionRelation(source, output)(sparkSession)
         })
     }
-    sources = _logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
+    sources = _logicalPlan.collect {
+      // v1 source
+      case s: StreamingExecutionRelation => s.source
+      // v2 source
+      case r: StreamingDataSourceV2Relation => r.stream
+    }
     uniqueSources = sources.distinct
     _logicalPlan
   }
@@ -350,7 +351,7 @@ class MicroBatchExecution(
         reportTimeTaken("getOffset") {
           (s, s.getOffset)
         }
-      case s: RateControlMicroBatchReadSupport =>
+      case s: RateControlMicroBatchStream =>
         updateStatusMessage(s"Getting offsets from $s")
         reportTimeTaken("latestOffset") {
           val startOffset = availableOffsets
@@ -358,7 +359,7 @@ class MicroBatchExecution(
             .getOrElse(s.initialOffset())
           (s, Option(s.latestOffset(startOffset)))
         }
-      case s: MicroBatchReadSupport =>
+      case s: MicroBatchStream =>
         updateStatusMessage(s"Getting offsets from $s")
         reportTimeTaken("latestOffset") {
           (s, Option(s.latestOffset()))
@@ -402,8 +403,8 @@ class MicroBatchExecution(
           if (prevBatchOff.isDefined) {
             prevBatchOff.get.toStreamProgress(sources).foreach {
               case (src: Source, off) => src.commit(off)
-              case (readSupport: MicroBatchReadSupport, off) =>
-                readSupport.commit(readSupport.deserializeOffset(off.json))
+              case (stream: MicroBatchStream, off) =>
+                stream.commit(stream.deserializeOffset(off.json))
               case (src, _) =>
                 throw new IllegalArgumentException(
                   s"Unknown source is found at constructNextBatch: $src")
@@ -448,39 +449,30 @@ class MicroBatchExecution(
           logDebug(s"Retrieving data from $source: $current -> $available")
           Some(source -> batch.logicalPlan)
 
-        // TODO(cloud-fan): for data source v2, the new batch is just a new `ScanConfigBuilder`, but
-        // to be compatible with streaming source v1, we return a logical plan as a new batch here.
-        case (readSupport: MicroBatchReadSupport, available)
-          if committedOffsets.get(readSupport).map(_ != available).getOrElse(true) =>
-          val current = committedOffsets.get(readSupport).map {
-            off => readSupport.deserializeOffset(off.json)
+        case (stream: MicroBatchStream, available)
+          if committedOffsets.get(stream).map(_ != available).getOrElse(true) =>
+          val current = committedOffsets.get(stream).map {
+            off => stream.deserializeOffset(off.json)
           }
           val endOffset: OffsetV2 = available match {
-            case v1: SerializedOffset => readSupport.deserializeOffset(v1.json)
+            case v1: SerializedOffset => stream.deserializeOffset(v1.json)
             case v2: OffsetV2 => v2
           }
-          val startOffset = current.getOrElse(readSupport.initialOffset)
-          val scanConfigBuilder = readSupport.newScanConfigBuilder(startOffset, endOffset)
-          logDebug(s"Retrieving data from $readSupport: $current -> $endOffset")
-
-          val (source, options) = readSupport match {
-            // `MemoryStream` is special. It's for test only and doesn't have a `DataSourceV2`
-            // implementation. We provide a fake one here for explain.
-            case _: MemoryStream[_] => MemoryStreamDataSource -> Map.empty[String, String]
-            // Provide a fake value here just in case something went wrong, e.g. the reader gives
-            // a wrong `equals` implementation.
-            case _ => readSupportToDataSourceMap.getOrElse(readSupport, {
-              FakeDataSourceV2 -> Map.empty[String, String]
-            })
-          }
-          Some(readSupport -> StreamingDataSourceV2Relation(
-            readSupport.fullSchema().toAttributes, source, options, readSupport, scanConfigBuilder))
+          val startOffset = current.getOrElse(stream.initialOffset)
+          logDebug(s"Retrieving data from $stream: $current -> $endOffset")
+
+          // To be compatible with the v1 source, the `newData` is represented as a logical plan,
+          // while the `newData` of v2 source is just the start and end offsets. Here we return a
+          // fake logical plan to carry the offsets.
+          Some(stream -> OffsetHolder(startOffset, endOffset))
+
         case _ => None
       }
     }
 
     // Replace sources in the logical plan with data that has arrived since the last batch.
     val newBatchesPlan = logicalPlan transform {
+      // For v1 sources.
       case StreamingExecutionRelation(source, output) =>
         newData.get(source).map { dataPlan =>
           val maxFields = SQLConf.get.maxToStringFields
@@ -495,6 +487,15 @@ class MicroBatchExecution(
         }.getOrElse {
           LocalRelation(output, isStreaming = true)
         }
+
+      // For v2 sources.
+      case r: StreamingDataSourceV2Relation =>
+        newData.get(r.stream).map {
+          case OffsetHolder(start, end) =>
+            r.copy(startOffset = Some(start), endOffset = Some(end))
+        }.getOrElse {
+          LocalRelation(r.output, isStreaming = true)
+        }
     }
 
     // Rewire the plan to use the new attributes that were returned by the source.
@@ -580,6 +581,6 @@ object MicroBatchExecution {
   val BATCH_ID_KEY = "streaming.sql.batchId"
 }
 
-object MemoryStreamDataSource extends DataSourceV2
-
-object FakeDataSourceV2 extends DataSourceV2
+case class OffsetHolder(start: OffsetV2, end: OffsetV2) extends LeafNode {
+  override def output: Seq[Attribute] = Nil
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index d1f3f74c5e73..25283515b882 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -28,8 +28,8 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2StreamingScanExec, StreamWriterCommitProgress}
-import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReadSupport
+import org.apache.spark.sql.execution.datasources.v2.{MicroBatchScanExec, StreamingDataSourceV2Relation, StreamWriterCommitProgress}
+import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
 import org.apache.spark.util.Clock
@@ -247,10 +247,12 @@ trait ProgressReporter extends Logging {
     }
 
     val onlyDataSourceV2Sources = {
-      // Check whether the streaming query's logical plan has only V2 data sources
-      val allStreamingLeaves =
-        logicalPlan.collect { case s: StreamingExecutionRelation => s }
-      allStreamingLeaves.forall { _.source.isInstanceOf[MicroBatchReadSupport] }
+      // Check whether the streaming query's logical plan has only V2 micro-batch data sources
+      val allStreamingLeaves = logicalPlan.collect {
+        case s: StreamingDataSourceV2Relation => s.stream.isInstanceOf[MicroBatchStream]
+        case _: StreamingExecutionRelation => false
+      }
+      allStreamingLeaves.forall(_ == true)
     }
 
     if (onlyDataSourceV2Sources) {
@@ -258,9 +260,9 @@ trait ProgressReporter extends Logging {
       // (can happen with self-unions or self-joins). This means the source is scanned multiple
       // times in the query, we should count the numRows for each scan.
       val sourceToInputRowsTuples = lastExecution.executedPlan.collect {
-        case s: DataSourceV2StreamingScanExec if s.readSupport.isInstanceOf[BaseStreamingSource] =>
+        case s: MicroBatchScanExec =>
           val numRows = s.metrics.get("numOutputRows").map(_.value).getOrElse(0L)
-          val source = s.readSupport.asInstanceOf[BaseStreamingSource]
+          val source = s.stream.asInstanceOf[BaseStreamingSource]
           source -> numRows
       }
       logDebug("Source -> # input rows\n\t" + sourceToInputRowsTuples.mkString("\n\t"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 4b696dfa5735..535fa1c70b3f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceV2}
+import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceV2, Table}
 
 object StreamingRelation {
   def apply(dataSource: DataSource): StreamingRelation = {
@@ -94,6 +94,7 @@ case class StreamingExecutionRelation(
 case class StreamingRelationV2(
     dataSource: DataSourceV2,
     sourceName: String,
+    table: Table,
     extraOptions: Map[String, String],
     output: Seq[Attribute],
     v1Relation: Option[StreamingRelation])(session: SparkSession)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 89033b70f143..c74fa141372d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Curre
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2StreamingScanExec, StreamingDataSourceV2Relation}
+import org.apache.spark.sql.execution.datasources.v2.{ContinuousScanExec, OldStreamingDataSourceV2Relation}
 import org.apache.spark.sql.execution.streaming.{ContinuousExecutionRelation, StreamingRelationV2, _}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2
@@ -64,12 +64,12 @@ class ContinuousExecution(
     val toExecutionRelationMap = MutableMap[StreamingRelationV2, ContinuousExecutionRelation]()
     analyzedPlan.transform {
       case r @ StreamingRelationV2(
-          source: ContinuousReadSupportProvider, _, extraReaderOptions, output, _) =>
+          source: ContinuousReadSupportProvider, _, _, extraReaderOptions, output, _) =>
         // TODO: shall we create `ContinuousReadSupport` here instead of each reconfiguration?
         toExecutionRelationMap.getOrElseUpdate(r, {
           ContinuousExecutionRelation(source, extraReaderOptions, output)(sparkSession)
         })
-      case StreamingRelationV2(_, sourceName, _, _, _) =>
+      case StreamingRelationV2(_, sourceName, _, _, _, _) =>
         throw new UnsupportedOperationException(
           s"Data source $sourceName does not support continuous processing.")
     }
@@ -177,7 +177,7 @@ class ContinuousExecution(
         val realOffset = loggedOffset.map(off => readSupport.deserializeOffset(off.json))
         val startOffset = realOffset.getOrElse(readSupport.initialOffset)
         val scanConfigBuilder = readSupport.newScanConfigBuilder(startOffset)
-        StreamingDataSourceV2Relation(newOutput, source, options, readSupport, scanConfigBuilder)
+        OldStreamingDataSourceV2Relation(newOutput, source, options, readSupport, scanConfigBuilder)
     }
 
     // Rewire the plan to use the new attributes that were returned by the source.
@@ -211,7 +211,7 @@ class ContinuousExecution(
     }
 
     val (readSupport, scanConfig) = lastExecution.executedPlan.collect {
-      case scan: DataSourceV2StreamingScanExec
+      case scan: ContinuousScanExec
           if scan.readSupport.isInstanceOf[ContinuousReadSupport] =>
         scan.readSupport.asInstanceOf[ContinuousReadSupport] -> scan.scanConfig
     }.head
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 13b75ae4a433..5406679630e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -33,8 +33,9 @@ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUti
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsMicroBatchRead, Table, TableProvider}
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset => OffsetV2}
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
@@ -50,7 +51,7 @@ object MemoryStream {
  * A base class for memory stream implementations. Supports adding data and resetting.
  */
 abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends BaseStreamingSource {
-  protected val encoder = encoderFor[A]
+  val encoder = encoderFor[A]
   protected val attributes = encoder.schema.toAttributes
 
   def toDS(): Dataset[A] = {
@@ -72,16 +73,56 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Bas
   def addData(data: TraversableOnce[A]): Offset
 }
 
+// This class is used to indicate the memory stream data source. We don't actually use it, as
+// memory stream is for test only and we never look it up by name.
+object MemoryStreamTableProvider extends TableProvider {
+  override def getTable(options: DataSourceOptions): Table = {
+    throw new IllegalStateException("MemoryStreamTableProvider should not be used.")
+  }
+}
+
+class MemoryStreamTable(val stream: MemoryStreamBase[_]) extends Table with SupportsMicroBatchRead {
+
+  override def name(): String = "MemoryStreamDataSource"
+
+  override def schema(): StructType = stream.fullSchema()
+
+  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+    new MemoryStreamScanBuilder(stream)
+  }
+}
+
+class MemoryStreamScanBuilder(stream: MemoryStreamBase[_]) extends ScanBuilder with Scan {
+
+  override def build(): Scan = this
+
+  override def description(): String = "MemoryStreamDataSource"
+
+  override def readSchema(): StructType = stream.fullSchema()
+
+  override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
+    stream.asInstanceOf[MemoryStream[_]]
+  }
+}
+
 /**
  * A [[Source]] that produces value stored in memory as they are added by the user.  This [[Source]]
  * is intended for use in unit tests as it can only replay data when the object is still
  * available.
  */
 case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
-    extends MemoryStreamBase[A](sqlContext) with MicroBatchReadSupport with Logging {
+    extends MemoryStreamBase[A](sqlContext) with MicroBatchStream with Logging {
+
+  protected val logicalPlan: LogicalPlan = {
+    StreamingRelationV2(
+      MemoryStreamTableProvider,
+      "memory",
+      new MemoryStreamTable(this),
+      Map.empty,
+      attributes,
+      None)(sqlContext.sparkSession)
+  }
 
-  protected val logicalPlan: LogicalPlan =
-    StreamingExecutionRelation(this, attributes)(sqlContext.sparkSession)
   protected val output = logicalPlan.output
 
   /**
@@ -130,14 +171,9 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
     if (currentOffset.offset == -1) null else currentOffset
   }
 
-  override def newScanConfigBuilder(start: OffsetV2, end: OffsetV2): ScanConfigBuilder = {
-    new SimpleStreamingScanConfigBuilder(fullSchema(), start, Some(end))
-  }
-
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-    val sc = config.asInstanceOf[SimpleStreamingScanConfig]
-    val startOffset = sc.start.asInstanceOf[LongOffset]
-    val endOffset = sc.end.get.asInstanceOf[LongOffset]
+  override def planInputPartitions(start: OffsetV2, end: OffsetV2): Array[InputPartition] = {
+    val startOffset = start.asInstanceOf[LongOffset]
+    val endOffset = end.asInstanceOf[LongOffset]
     synchronized {
       // Compute the internal batch numbers to fetch: [startOrdinal, endOrdinal)
       val startOrdinal = startOffset.offset.toInt + 1
@@ -159,7 +195,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
     }
   }
 
-  override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
+  override def createReaderFactory(): PartitionReaderFactory = {
     MemoryStreamReaderFactory
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
index dbcc4483e577..8c5c9eff55ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
@@ -50,7 +50,8 @@ class ContinuousMemoryStream[A : Encoder](id: Int, sqlContext: SQLContext, numPa
   private implicit val formats = Serialization.formats(NoTypeHints)
 
   protected val logicalPlan =
-    StreamingRelationV2(this, "memory", Map(), attributes, None)(sqlContext.sparkSession)
+    // TODO: don't pass null as table after finish API refactor for continuous stream.
+    StreamingRelationV2(this, "memory", null, Map(), attributes, None)(sqlContext.sparkSession)
 
   // ContinuousReader implementation
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala
similarity index 86%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchReadSupport.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala
index 90680ea38fbd..6a66f52c8f73 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala
@@ -17,10 +17,10 @@
 
 package org.apache.spark.sql.execution.streaming.sources
 
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset}
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 
-// A special `MicroBatchReadSupport` that can get latestOffset with a start offset.
-trait RateControlMicroBatchReadSupport extends MicroBatchReadSupport {
+// A special `MicroBatchStream` that can get latestOffset with a start offset.
+trait RateControlMicroBatchStream extends MicroBatchStream {
 
   override def latestOffset(): Offset = {
     throw new IllegalAccessException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
similarity index 82%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchReadSupport.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
index f5364047adff..a8feed34b96d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
@@ -24,19 +24,23 @@ import java.util.concurrent.TimeUnit
 import org.apache.commons.io.IOUtils
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchReadSupport, Offset}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.util.{ManualClock, SystemClock}
 
-class RateStreamMicroBatchReadSupport(options: DataSourceOptions, checkpointLocation: String)
-  extends MicroBatchReadSupport with Logging {
+class RateStreamMicroBatchStream(
+    rowsPerSecond: Long,
+    // The default values here are used in tests.
+    rampUpTimeSeconds: Long = 0,
+    numPartitions: Int = 1,
+    options: DataSourceOptions,
+    checkpointLocation: String)
+  extends MicroBatchStream with Logging {
   import RateStreamProvider._
 
   private[sources] val clock = {
@@ -44,14 +48,6 @@ class RateStreamMicroBatchReadSupport(options: DataSourceOptions, checkpointLoca
     if (options.getBoolean("useManualClock", false)) new ManualClock else new SystemClock
   }
 
-  private val rowsPerSecond =
-    options.get(ROWS_PER_SECOND).orElse("1").toLong
-
-  private val rampUpTimeSeconds =
-    Option(options.get(RAMP_UP_TIME).orElse(null.asInstanceOf[String]))
-      .map(JavaUtils.timeStringAsSec(_))
-      .getOrElse(0L)
-
   private val maxSeconds = Long.MaxValue / rowsPerSecond
 
   if (rampUpTimeSeconds > maxSeconds) {
@@ -117,16 +113,10 @@ class RateStreamMicroBatchReadSupport(options: DataSourceOptions, checkpointLoca
     LongOffset(json.toLong)
   }
 
-  override def fullSchema(): StructType = SCHEMA
 
-  override def newScanConfigBuilder(start: Offset, end: Offset): ScanConfigBuilder = {
-    new SimpleStreamingScanConfigBuilder(fullSchema(), start, Some(end))
-  }
-
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-    val sc = config.asInstanceOf[SimpleStreamingScanConfig]
-    val startSeconds = sc.start.asInstanceOf[LongOffset].offset
-    val endSeconds = sc.end.get.asInstanceOf[LongOffset].offset
+  override def planInputPartitions(start: Offset, end: Offset): Array[InputPartition] = {
+    val startSeconds = start.asInstanceOf[LongOffset].offset
+    val endSeconds = end.asInstanceOf[LongOffset].offset
     assert(startSeconds <= endSeconds, s"startSeconds($startSeconds) > endSeconds($endSeconds)")
     if (endSeconds > maxSeconds) {
       throw new ArithmeticException("Integer overflow. Max offset with " +
@@ -148,21 +138,14 @@ class RateStreamMicroBatchReadSupport(options: DataSourceOptions, checkpointLoca
     val localStartTimeMs = creationTimeMs + TimeUnit.SECONDS.toMillis(startSeconds)
     val relativeMsPerValue =
       TimeUnit.SECONDS.toMillis(endSeconds - startSeconds).toDouble / (rangeEnd - rangeStart)
-    val numPartitions = {
-      val activeSession = SparkSession.getActiveSession
-      require(activeSession.isDefined)
-      Option(options.get(NUM_PARTITIONS).orElse(null.asInstanceOf[String]))
-        .map(_.toInt)
-        .getOrElse(activeSession.get.sparkContext.defaultParallelism)
-    }
 
     (0 until numPartitions).map { p =>
-      new RateStreamMicroBatchInputPartition(
+      RateStreamMicroBatchInputPartition(
         p, numPartitions, rangeStart, rangeEnd, localStartTimeMs, relativeMsPerValue)
     }.toArray
   }
 
-  override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
+  override def createReaderFactory(): PartitionReaderFactory = {
     RateStreamMicroBatchReaderFactory
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index 6942dfbfe0ec..8d334f0afd0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.network.util.JavaUtils
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.streaming.continuous.RateStreamContinuousReadSupport
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchReadSupport}
+import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchStream}
 import org.apache.spark.sql.types._
 
 /**
@@ -39,38 +41,31 @@ import org.apache.spark.sql.types._
  *    be resource constrained, and `numPartitions` can be tweaked to help reach the desired speed.
  */
 class RateStreamProvider extends DataSourceV2
-  with MicroBatchReadSupportProvider with ContinuousReadSupportProvider with DataSourceRegister {
+  with TableProvider with ContinuousReadSupportProvider with DataSourceRegister {
   import RateStreamProvider._
 
-  override def createMicroBatchReadSupport(
-      checkpointLocation: String,
-      options: DataSourceOptions): MicroBatchReadSupport = {
-      if (options.get(ROWS_PER_SECOND).isPresent) {
-      val rowsPerSecond = options.get(ROWS_PER_SECOND).get().toLong
-      if (rowsPerSecond <= 0) {
-        throw new IllegalArgumentException(
-          s"Invalid value '$rowsPerSecond'. The option 'rowsPerSecond' must be positive")
-      }
+  override def getTable(options: DataSourceOptions): Table = {
+    val rowsPerSecond = options.getLong(ROWS_PER_SECOND, 1)
+    if (rowsPerSecond <= 0) {
+      throw new IllegalArgumentException(
+        s"Invalid value '$rowsPerSecond'. The option 'rowsPerSecond' must be positive")
     }
 
-    if (options.get(RAMP_UP_TIME).isPresent) {
-      val rampUpTimeSeconds =
-        JavaUtils.timeStringAsSec(options.get(RAMP_UP_TIME).get())
-      if (rampUpTimeSeconds < 0) {
-        throw new IllegalArgumentException(
-          s"Invalid value '$rampUpTimeSeconds'. The option 'rampUpTime' must not be negative")
-      }
+    val rampUpTimeSeconds = Option(options.get(RAMP_UP_TIME).orElse(null))
+      .map(JavaUtils.timeStringAsSec)
+      .getOrElse(0L)
+    if (rampUpTimeSeconds < 0) {
+      throw new IllegalArgumentException(
+        s"Invalid value '$rampUpTimeSeconds'. The option 'rampUpTime' must not be negative")
     }
 
-    if (options.get(NUM_PARTITIONS).isPresent) {
-      val numPartitions = options.get(NUM_PARTITIONS).get().toInt
-      if (numPartitions <= 0) {
-        throw new IllegalArgumentException(
-          s"Invalid value '$numPartitions'. The option 'numPartitions' must be positive")
-      }
+    val numPartitions = options.getInt(
+      NUM_PARTITIONS, SparkSession.active.sparkContext.defaultParallelism)
+    if (numPartitions <= 0) {
+      throw new IllegalArgumentException(
+        s"Invalid value '$numPartitions'. The option 'numPartitions' must be positive")
     }
-
-    new RateStreamMicroBatchReadSupport(options, checkpointLocation)
+    new RateStreamTable(rowsPerSecond, rampUpTimeSeconds, numPartitions)
   }
 
   override def createContinuousReadSupport(
@@ -82,6 +77,31 @@ class RateStreamProvider extends DataSourceV2
   override def shortName(): String = "rate"
 }
 
+class RateStreamTable(
+    rowsPerSecond: Long,
+    rampUpTimeSeconds: Long,
+    numPartitions: Int)
+  extends Table with SupportsMicroBatchRead {
+
+  override def name(): String = {
+    s"RateStream(rowsPerSecond=$rowsPerSecond, rampUpTimeSeconds=$rampUpTimeSeconds, " +
+      s"numPartitions=$numPartitions)"
+  }
+
+  override def schema(): StructType = RateStreamProvider.SCHEMA
+
+  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new ScanBuilder {
+    override def build(): Scan = new Scan {
+      override def readSchema(): StructType = RateStreamProvider.SCHEMA
+
+      override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
+        new RateStreamMicroBatchStream(
+          rowsPerSecond, rampUpTimeSeconds, numPartitions, options, checkpointLocation)
+      }
+    }
+  }
+}
+
 object RateStreamProvider {
   val SCHEMA =
     StructType(StructField("timestamp", TimestampType) :: StructField("value", LongType) :: Nil)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/socket.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
similarity index 62%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/socket.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
index b2a573eae504..ddf398b7752e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/socket.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
@@ -19,44 +19,29 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import java.io.{BufferedReader, InputStreamReader, IOException}
 import java.net.Socket
-import java.text.SimpleDateFormat
-import java.util.{Calendar, Locale}
+import java.util.Calendar
 import java.util.concurrent.atomic.AtomicBoolean
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable.ListBuffer
-import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql._
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.streaming.{LongOffset, SimpleStreamingScanConfig, SimpleStreamingScanConfigBuilder}
-import org.apache.spark.sql.execution.streaming.continuous.TextSocketContinuousReadSupport
-import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions, DataSourceV2, MicroBatchReadSupportProvider}
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchReadSupport, Offset}
-import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
+import org.apache.spark.sql.execution.streaming.LongOffset
+import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.unsafe.types.UTF8String
 
-object TextSocketReader {
-  val SCHEMA_REGULAR = StructType(StructField("value", StringType) :: Nil)
-  val SCHEMA_TIMESTAMP = StructType(StructField("value", StringType) ::
-    StructField("timestamp", TimestampType) :: Nil)
-  val DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
-}
-
 /**
  * A MicroBatchReadSupport that reads text lines through a TCP socket, designed only for tutorials
  * and debugging. This MicroBatchReadSupport will *not* work in production applications due to
  * multiple reasons, including no support for fault recovery.
  */
-class TextSocketMicroBatchReadSupport(options: DataSourceOptions)
-  extends MicroBatchReadSupport with Logging {
-
-  private val host: String = options.get("host").get()
-  private val port: Int = options.get("port").get().toInt
+class TextSocketMicroBatchStream(host: String, port: Int, options: DataSourceOptions)
+  extends MicroBatchStream with Logging {
 
   @GuardedBy("this")
   private var socket: Socket = null
@@ -99,7 +84,7 @@ class TextSocketMicroBatchReadSupport(options: DataSourceOptions)
               logWarning(s"Stream closed by $host:$port")
               return
             }
-            TextSocketMicroBatchReadSupport.this.synchronized {
+            TextSocketMicroBatchStream.this.synchronized {
               val newData = (
                 UTF8String.fromString(line),
                 DateTimeUtils.fromMillis(Calendar.getInstance().getTimeInMillis)
@@ -124,22 +109,9 @@ class TextSocketMicroBatchReadSupport(options: DataSourceOptions)
     LongOffset(json.toLong)
   }
 
-  override def fullSchema(): StructType = {
-    if (options.getBoolean("includeTimestamp", false)) {
-      TextSocketReader.SCHEMA_TIMESTAMP
-    } else {
-      TextSocketReader.SCHEMA_REGULAR
-    }
-  }
-
-  override def newScanConfigBuilder(start: Offset, end: Offset): ScanConfigBuilder = {
-    new SimpleStreamingScanConfigBuilder(fullSchema(), start, Some(end))
-  }
-
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-    val sc = config.asInstanceOf[SimpleStreamingScanConfig]
-    val startOrdinal = sc.start.asInstanceOf[LongOffset].offset.toInt + 1
-    val endOrdinal = sc.end.get.asInstanceOf[LongOffset].offset.toInt + 1
+  override def planInputPartitions(start: Offset, end: Offset): Array[InputPartition] = {
+    val startOrdinal = start.asInstanceOf[LongOffset].offset.toInt + 1
+    val endOrdinal = end.asInstanceOf[LongOffset].offset.toInt + 1
 
     // Internal buffer only holds the batches after lastOffsetCommitted
     val rawList = synchronized {
@@ -164,7 +136,7 @@ class TextSocketMicroBatchReadSupport(options: DataSourceOptions)
     slices.map(TextSocketInputPartition)
   }
 
-  override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
+  override def createReaderFactory(): PartitionReaderFactory = {
     new PartitionReaderFactory {
       override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
         val slice = partition.asInstanceOf[TextSocketInputPartition].slice
@@ -220,43 +192,3 @@ class TextSocketMicroBatchReadSupport(options: DataSourceOptions)
 }
 
 case class TextSocketInputPartition(slice: ListBuffer[(UTF8String, Long)]) extends InputPartition
-
-class TextSocketSourceProvider extends DataSourceV2
-  with MicroBatchReadSupportProvider with ContinuousReadSupportProvider
-  with DataSourceRegister with Logging {
-
-  private def checkParameters(params: DataSourceOptions): Unit = {
-    logWarning("The socket source should not be used for production applications! " +
-      "It does not support recovery.")
-    if (!params.get("host").isPresent) {
-      throw new AnalysisException("Set a host to read from with option(\"host\", ...).")
-    }
-    if (!params.get("port").isPresent) {
-      throw new AnalysisException("Set a port to read from with option(\"port\", ...).")
-    }
-    Try {
-      params.get("includeTimestamp").orElse("false").toBoolean
-    } match {
-      case Success(_) =>
-      case Failure(_) =>
-        throw new AnalysisException("includeTimestamp must be set to either \"true\" or \"false\"")
-    }
-  }
-
-  override def createMicroBatchReadSupport(
-      checkpointLocation: String,
-      options: DataSourceOptions): MicroBatchReadSupport = {
-    checkParameters(options)
-    new TextSocketMicroBatchReadSupport(options)
-  }
-
-  override def createContinuousReadSupport(
-      checkpointLocation: String,
-      options: DataSourceOptions): ContinuousReadSupport = {
-    checkParameters(options)
-    new TextSocketContinuousReadSupport(options)
-  }
-
-  /** String that represents the format that this data source provider uses. */
-  override def shortName(): String = "socket"
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
new file mode 100644
index 000000000000..35007785b41a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.sources
+
+import java.text.SimpleDateFormat
+import java.util.Locale
+
+import scala.util.{Failure, Success, Try}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql._
+import org.apache.spark.sql.execution.streaming.continuous.TextSocketContinuousReadSupport
+import org.apache.spark.sql.sources.DataSourceRegister
+import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchStream}
+import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
+
+class TextSocketSourceProvider extends DataSourceV2
+  with TableProvider with ContinuousReadSupportProvider
+  with DataSourceRegister with Logging {
+
+  private def checkParameters(params: DataSourceOptions): Unit = {
+    logWarning("The socket source should not be used for production applications! " +
+      "It does not support recovery.")
+    if (!params.get("host").isPresent) {
+      throw new AnalysisException("Set a host to read from with option(\"host\", ...).")
+    }
+    if (!params.get("port").isPresent) {
+      throw new AnalysisException("Set a port to read from with option(\"port\", ...).")
+    }
+    Try {
+      params.get("includeTimestamp").orElse("false").toBoolean
+    } match {
+      case Success(_) =>
+      case Failure(_) =>
+        throw new AnalysisException("includeTimestamp must be set to either \"true\" or \"false\"")
+    }
+  }
+
+  override def getTable(options: DataSourceOptions): Table = {
+    checkParameters(options)
+    new TextSocketTable(
+      options.get("host").get,
+      options.getInt("port", -1),
+      options.getBoolean("includeTimestamp", false))
+  }
+
+  override def createContinuousReadSupport(
+      checkpointLocation: String,
+      options: DataSourceOptions): ContinuousReadSupport = {
+    checkParameters(options)
+    new TextSocketContinuousReadSupport(options)
+  }
+
+  /** String that represents the format that this data source provider uses. */
+  override def shortName(): String = "socket"
+}
+
+class TextSocketTable(host: String, port: Int, includeTimestamp: Boolean)
+  extends Table with SupportsMicroBatchRead {
+
+  override def name(): String = s"Socket[$host:$port]"
+
+  override def schema(): StructType = {
+    if (includeTimestamp) {
+      TextSocketReader.SCHEMA_TIMESTAMP
+    } else {
+      TextSocketReader.SCHEMA_REGULAR
+    }
+  }
+
+  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new ScanBuilder {
+    override def build(): Scan = new Scan {
+      override def readSchema(): StructType = schema()
+
+      override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
+        new TextSocketMicroBatchStream(host, port, options)
+      }
+    }
+  }
+}
+
+object TextSocketReader {
+  val SCHEMA_REGULAR = StructType(StructField("value", StringType) :: Nil)
+  val SCHEMA_TIMESTAMP = StructType(StructField("value", StringType) ::
+    StructField("timestamp", TimestampType) :: Nil)
+  val DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 98589da9552c..417dd5584b30 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -29,8 +29,8 @@ import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
 import org.apache.spark.sql.sources.StreamSourceProvider
-import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions, MicroBatchReadSupportProvider}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchReadSupport}
+import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
@@ -173,60 +173,54 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
       case _ => None
     }
     ds match {
-      case s: MicroBatchReadSupportProvider =>
+      case provider: TableProvider =>
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
-          ds = s, conf = sparkSession.sessionState.conf)
+          ds = provider, conf = sparkSession.sessionState.conf)
         val options = sessionOptions ++ extraOptions
-        val dataSourceOptions = new DataSourceOptions(options.asJava)
-        var tempReadSupport: MicroBatchReadSupport = null
-        val schema = try {
-          val tmpCheckpointPath = Utils.createTempDir(namePrefix = s"tempCP").getCanonicalPath
-          tempReadSupport = if (userSpecifiedSchema.isDefined) {
-            s.createMicroBatchReadSupport(
-              userSpecifiedSchema.get, tmpCheckpointPath, dataSourceOptions)
-          } else {
-            s.createMicroBatchReadSupport(tmpCheckpointPath, dataSourceOptions)
-          }
-          tempReadSupport.fullSchema()
-        } finally {
-          // Stop tempReader to avoid side-effect thing
-          if (tempReadSupport != null) {
-            tempReadSupport.stop()
-            tempReadSupport = null
-          }
+        val dsOptions = new DataSourceOptions(options.asJava)
+        val table = userSpecifiedSchema match {
+          case Some(schema) => provider.getTable(dsOptions, schema)
+          case _ => provider.getTable(dsOptions)
         }
-        Dataset.ofRows(
-          sparkSession,
-          StreamingRelationV2(
-            s, source, options,
-            schema.toAttributes, v1Relation)(sparkSession))
-      case s: ContinuousReadSupportProvider =>
-        val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
-          ds = s, conf = sparkSession.sessionState.conf)
-        val options = sessionOptions ++ extraOptions
-        val dataSourceOptions = new DataSourceOptions(options.asJava)
-        var tempReadSupport: ContinuousReadSupport = null
-        val schema = try {
-          val tmpCheckpointPath = Utils.createTempDir(namePrefix = s"tempCP").getCanonicalPath
-          tempReadSupport = if (userSpecifiedSchema.isDefined) {
-            s.createContinuousReadSupport(
-              userSpecifiedSchema.get, tmpCheckpointPath, dataSourceOptions)
-          } else {
-            s.createContinuousReadSupport(tmpCheckpointPath, dataSourceOptions)
-          }
-          tempReadSupport.fullSchema()
-        } finally {
-          // Stop tempReader to avoid side-effect thing
-          if (tempReadSupport != null) {
-            tempReadSupport.stop()
-            tempReadSupport = null
-          }
+        table match {
+          case s: SupportsMicroBatchRead =>
+            Dataset.ofRows(
+              sparkSession,
+              StreamingRelationV2(
+                provider, source, s, options,
+                table.schema.toAttributes, v1Relation)(sparkSession))
+
+          case _ if ds.isInstanceOf[ContinuousReadSupportProvider] =>
+            val provider = ds.asInstanceOf[ContinuousReadSupportProvider]
+            var tempReadSupport: ContinuousReadSupport = null
+            val schema = try {
+              val tmpCheckpointPath = Utils.createTempDir(namePrefix = s"tempCP").getCanonicalPath
+              tempReadSupport = if (userSpecifiedSchema.isDefined) {
+                provider.createContinuousReadSupport(
+                  userSpecifiedSchema.get, tmpCheckpointPath, dsOptions)
+              } else {
+                provider.createContinuousReadSupport(tmpCheckpointPath, dsOptions)
+              }
+              tempReadSupport.fullSchema()
+            } finally {
+              // Stop tempReader to avoid side-effect thing
+              if (tempReadSupport != null) {
+                tempReadSupport.stop()
+                tempReadSupport = null
+              }
+            }
+            Dataset.ofRows(
+              sparkSession,
+              // TODO: do not pass null as table after finish the API refactor for continuous
+              // stream.
+              StreamingRelationV2(
+                provider, source, table = null, options,
+                schema.toAttributes, v1Relation)(sparkSession))
+
+          // fallback to v1
+          case _ => Dataset.ofRows(sparkSession, StreamingRelation(v1DataSource))
         }
-        Dataset.ofRows(
-          sparkSession,
-          StreamingRelationV2(
-            s, source, options,
-            schema.toAttributes, v1Relation)(sparkSession))
+
       case _ =>
         // Code path for data source v1.
         Dataset.ofRows(sparkSession, StreamingRelation(v1DataSource))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index be3efed71403..d40a1fdec0bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -25,15 +25,16 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions, MicroBatchReadSupportProvider}
+import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions}
 import org.apache.spark.sql.sources.v2.reader.streaming.Offset
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.util.ManualClock
 
-class RateSourceSuite extends StreamTest {
+class RateStreamProviderSuite extends StreamTest {
 
   import testImplicits._
 
@@ -41,7 +42,9 @@ class RateSourceSuite extends StreamTest {
     override def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset) = {
       assert(query.nonEmpty)
       val rateSource = query.get.logicalPlan.collect {
-        case StreamingExecutionRelation(source: RateStreamMicroBatchReadSupport, _) => source
+        case r: StreamingDataSourceV2Relation
+            if r.stream.isInstanceOf[RateStreamMicroBatchStream] =>
+          r.stream.asInstanceOf[RateStreamMicroBatchStream]
       }.head
 
       rateSource.clock.asInstanceOf[ManualClock].advance(TimeUnit.SECONDS.toMillis(seconds))
@@ -51,28 +54,16 @@ class RateSourceSuite extends StreamTest {
     }
   }
 
-  test("microbatch in registry") {
-    withTempDir { temp =>
-      DataSource.lookupDataSource("rate", spark.sqlContext.conf).
-        getConstructor().newInstance() match {
-        case ds: MicroBatchReadSupportProvider =>
-          val readSupport = ds.createMicroBatchReadSupport(
-            temp.getCanonicalPath, DataSourceOptions.empty())
-          assert(readSupport.isInstanceOf[RateStreamMicroBatchReadSupport])
-        case _ =>
-          throw new IllegalStateException("Could not find read support for rate")
-      }
-    }
+  test("RateStreamProvider in registry") {
+    val ds = DataSource.lookupDataSource("rate", spark.sqlContext.conf).newInstance()
+    assert(ds.isInstanceOf[RateStreamProvider], "Could not find rate source")
   }
 
   test("compatible with old path in registry") {
-    DataSource.lookupDataSource("org.apache.spark.sql.execution.streaming.RateSourceProvider",
-      spark.sqlContext.conf).getConstructor().newInstance() match {
-      case ds: MicroBatchReadSupportProvider =>
-        assert(ds.isInstanceOf[RateStreamProvider])
-      case _ =>
-        throw new IllegalStateException("Could not find read support for rate")
-    }
+    val ds = DataSource.lookupDataSource(
+      "org.apache.spark.sql.execution.streaming.RateSourceProvider",
+      spark.sqlContext.conf).newInstance()
+    assert(ds.isInstanceOf[RateStreamProvider], "Could not find rate source")
   }
 
   test("microbatch - basic") {
@@ -142,17 +133,17 @@ class RateSourceSuite extends StreamTest {
 
   test("microbatch - infer offsets") {
     withTempDir { temp =>
-      val readSupport = new RateStreamMicroBatchReadSupport(
-        new DataSourceOptions(
-          Map("numPartitions" -> "1", "rowsPerSecond" -> "100", "useManualClock" -> "true").asJava),
-        temp.getCanonicalPath)
-      readSupport.clock.asInstanceOf[ManualClock].advance(100000)
-      val startOffset = readSupport.initialOffset()
+      val stream = new RateStreamMicroBatchStream(
+        rowsPerSecond = 100,
+        options = new DataSourceOptions(Map("useManualClock" -> "true").asJava),
+        checkpointLocation = temp.getCanonicalPath)
+      stream.clock.asInstanceOf[ManualClock].advance(100000)
+      val startOffset = stream.initialOffset()
       startOffset match {
         case r: LongOffset => assert(r.offset === 0L)
         case _ => throw new IllegalStateException("unexpected offset type")
       }
-      readSupport.latestOffset() match {
+      stream.latestOffset() match {
         case r: LongOffset => assert(r.offset >= 100)
         case _ => throw new IllegalStateException("unexpected offset type")
       }
@@ -161,16 +152,14 @@ class RateSourceSuite extends StreamTest {
 
   test("microbatch - predetermined batch size") {
     withTempDir { temp =>
-      val readSupport = new RateStreamMicroBatchReadSupport(
-        new DataSourceOptions(Map("numPartitions" -> "1", "rowsPerSecond" -> "20").asJava),
-        temp.getCanonicalPath)
-      val startOffset = LongOffset(0L)
-      val endOffset = LongOffset(1L)
-      val config = readSupport.newScanConfigBuilder(startOffset, endOffset).build()
-      val tasks = readSupport.planInputPartitions(config)
-      val readerFactory = readSupport.createReaderFactory(config)
-      assert(tasks.size == 1)
-      val dataReader = readerFactory.createReader(tasks(0))
+      val stream = new RateStreamMicroBatchStream(
+        rowsPerSecond = 20,
+        options = DataSourceOptions.empty(),
+        checkpointLocation = temp.getCanonicalPath)
+      val partitions = stream.planInputPartitions(LongOffset(0L), LongOffset(1L))
+      val readerFactory = stream.createReaderFactory()
+      assert(partitions.size == 1)
+      val dataReader = readerFactory.createReader(partitions(0))
       val data = ArrayBuffer[InternalRow]()
       while (dataReader.next()) {
         data.append(dataReader.get())
@@ -181,17 +170,16 @@ class RateSourceSuite extends StreamTest {
 
   test("microbatch - data read") {
     withTempDir { temp =>
-      val readSupport = new RateStreamMicroBatchReadSupport(
-        new DataSourceOptions(Map("numPartitions" -> "11", "rowsPerSecond" -> "33").asJava),
-        temp.getCanonicalPath)
-      val startOffset = LongOffset(0L)
-      val endOffset = LongOffset(1L)
-      val config = readSupport.newScanConfigBuilder(startOffset, endOffset).build()
-      val tasks = readSupport.planInputPartitions(config)
-      val readerFactory = readSupport.createReaderFactory(config)
-      assert(tasks.size == 11)
-
-      val readData = tasks
+      val stream = new RateStreamMicroBatchStream(
+        rowsPerSecond = 33,
+        numPartitions = 11,
+        options = DataSourceOptions.empty(),
+        checkpointLocation = temp.getCanonicalPath)
+      val partitions = stream.planInputPartitions(LongOffset(0L), LongOffset(1L))
+      val readerFactory = stream.createReaderFactory()
+      assert(partitions.size == 11)
+
+      val readData = partitions
         .map(readerFactory.createReader)
         .flatMap { reader =>
           val buf = scala.collection.mutable.ListBuffer[InternalRow]()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index 7db31f1f8f69..cf069d571081 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -30,10 +30,11 @@ import org.scalatest.BeforeAndAfterEach
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, MicroBatchReadSupportProvider}
+import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader.streaming.Offset
 import org.apache.spark.sql.streaming.{StreamingQueryException, StreamTest}
 import org.apache.spark.sql.test.SharedSQLContext
@@ -59,7 +60,9 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
         "Cannot add data when there is no query for finding the active socket source")
 
       val sources = query.get.logicalPlan.collect {
-        case StreamingExecutionRelation(source: TextSocketMicroBatchReadSupport, _) => source
+        case r: StreamingDataSourceV2Relation
+            if r.stream.isInstanceOf[TextSocketMicroBatchStream] =>
+          r.stream.asInstanceOf[TextSocketMicroBatchStream]
       }
       if (sources.isEmpty) {
         throw new Exception(
@@ -83,13 +86,10 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
   }
 
   test("backward compatibility with old path") {
-    DataSource.lookupDataSource("org.apache.spark.sql.execution.streaming.TextSocketSourceProvider",
-      spark.sqlContext.conf).getConstructor().newInstance() match {
-      case ds: MicroBatchReadSupportProvider =>
-        assert(ds.isInstanceOf[TextSocketSourceProvider])
-      case _ =>
-        throw new IllegalStateException("Could not find socket source")
-    }
+    val ds = DataSource.lookupDataSource(
+      "org.apache.spark.sql.execution.streaming.TextSocketSourceProvider",
+      spark.sqlContext.conf).newInstance()
+    assert(ds.isInstanceOf[TextSocketSourceProvider], "Could not find socket source")
   }
 
   test("basic usage") {
@@ -175,16 +175,13 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
   test("params not given") {
     val provider = new TextSocketSourceProvider
     intercept[AnalysisException] {
-      provider.createMicroBatchReadSupport(
-        "", new DataSourceOptions(Map.empty[String, String].asJava))
+      provider.getTable(new DataSourceOptions(Map.empty[String, String].asJava))
     }
     intercept[AnalysisException] {
-      provider.createMicroBatchReadSupport(
-        "", new DataSourceOptions(Map("host" -> "localhost").asJava))
+      provider.getTable(new DataSourceOptions(Map("host" -> "localhost").asJava))
     }
     intercept[AnalysisException] {
-      provider.createMicroBatchReadSupport(
-        "", new DataSourceOptions(Map("port" -> "1234").asJava))
+      provider.getTable(new DataSourceOptions(Map("port" -> "1234").asJava))
     }
   }
 
@@ -192,8 +189,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
     val provider = new TextSocketSourceProvider
     val params = Map("host" -> "localhost", "port" -> "1234", "includeTimestamp" -> "fasle")
     intercept[AnalysisException] {
-      val a = new DataSourceOptions(params.asJava)
-      provider.createMicroBatchReadSupport("", a)
+      provider.getTable(new DataSourceOptions(params.asJava))
     }
   }
 
@@ -204,8 +200,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
       StructField("area", StringType) :: Nil)
     val params = Map("host" -> "localhost", "port" -> "1234")
     val exception = intercept[UnsupportedOperationException] {
-      provider.createMicroBatchReadSupport(
-        userSpecifiedSchema, "", new DataSourceOptions(params.asJava))
+      provider.getTable(new DataSourceOptions(params.asJava), userSpecifiedSchema)
     }
     assert(exception.getMessage.contains(
       "socket source does not support user-specified schema"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 55fdcee83f11..72321c418f9b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -36,7 +36,6 @@ import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
 import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreConf, StateStoreId, StateStoreProvider}
 import org.apache.spark.sql.functions._
@@ -96,18 +95,16 @@ class StreamSuite extends StreamTest {
     val streamingRelation = spark.readStream.format("rate").load().logicalPlan collect {
       case s: StreamingRelationV2 => s
     }
-    assert(streamingRelation.nonEmpty, "cannot find StreamingExecutionRelation")
+    assert(streamingRelation.nonEmpty, "cannot find StreamingRelationV2")
     assert(
       streamingRelation.head.computeStats.sizeInBytes == spark.sessionState.conf.defaultSizeInBytes)
   }
 
   test("StreamingExecutionRelation.computeStats") {
-    val streamingExecutionRelation = MemoryStream[Int].toDF.logicalPlan collect {
-      case s: StreamingExecutionRelation => s
-    }
-    assert(streamingExecutionRelation.nonEmpty, "cannot find StreamingExecutionRelation")
-    assert(streamingExecutionRelation.head.computeStats.sizeInBytes
-      == spark.sessionState.conf.defaultSizeInBytes)
+    val memoryStream = MemoryStream[Int]
+    val executionRelation = StreamingExecutionRelation(
+      memoryStream, memoryStream.encoder.schema.toAttributes)(memoryStream.sqlContext.sparkSession)
+    assert(executionRelation.computeStats.sizeInBytes == spark.sessionState.conf.defaultSizeInBytes)
   }
 
   test("explain join with a normal source") {
@@ -495,9 +492,9 @@ class StreamSuite extends StreamTest {
 
       val explainWithoutExtended = q.explainInternal(false)
       // `extended = false` only displays the physical plan.
-      assert("Streaming RelationV2 MemoryStreamDataSource".r
+      assert("StreamingDataSourceV2Relation".r
         .findAllMatchIn(explainWithoutExtended).size === 0)
-      assert("ScanV2 MemoryStreamDataSource".r
+      assert("ScanV2".r
         .findAllMatchIn(explainWithoutExtended).size === 1)
       // Use "StateStoreRestore" to verify that it does output a streaming physical plan
       assert(explainWithoutExtended.contains("StateStoreRestore"))
@@ -505,9 +502,9 @@ class StreamSuite extends StreamTest {
       val explainWithExtended = q.explainInternal(true)
       // `extended = true` displays 3 logical plans (Parsed/Optimized/Optimized) and 1 physical
       // plan.
-      assert("Streaming RelationV2 MemoryStreamDataSource".r
+      assert("StreamingDataSourceV2Relation".r
         .findAllMatchIn(explainWithExtended).size === 3)
-      assert("ScanV2 MemoryStreamDataSource".r
+      assert("ScanV2".r
         .findAllMatchIn(explainWithExtended).size === 1)
       // Use "StateStoreRestore" to verify that it does output a streaming physical plan
       assert(explainWithExtended.contains("StateStoreRestore"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index d878c345c298..b4bd6f6b2edc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -39,12 +39,12 @@ import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder, Ro
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.AllTuples
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
+import org.apache.spark.sql.execution.datasources.v2.{OldStreamingDataSourceV2Relation, StreamingDataSourceV2Relation}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch}
 import org.apache.spark.sql.execution.streaming.sources.MemorySinkV2
 import org.apache.spark.sql.execution.streaming.state.StateStore
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.{Clock, SystemClock, Utils}
@@ -688,8 +688,20 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
             def findSourceIndex(plan: LogicalPlan): Option[Int] = {
               plan
                 .collect {
+                  // v1 source
                   case r: StreamingExecutionRelation => r.source
-                  case r: StreamingDataSourceV2Relation => r.readSupport
+                  // v2 source
+                  case r: StreamingDataSourceV2Relation => r.stream
+                  case r: OldStreamingDataSourceV2Relation => r.readSupport
+                  // We can add data to memory stream before starting it. Then the input plan has
+                  // not been processed by the streaming engine and contains `StreamingRelationV2`.
+                  case r: StreamingRelationV2 if r.sourceName == "memory" =>
+                    // TODO: remove this null hack after finish API refactor for continuous stream.
+                    if (r.table == null) {
+                      r.dataSource.asInstanceOf[ContinuousReadSupport]
+                    } else {
+                      r.table.asInstanceOf[MemoryStreamTable].stream
+                    }
                 }
                 .zipWithIndex
                 .find(_._1 == source)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index 46eec736d402..13b8866c22b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -24,15 +24,14 @@ import scala.util.Random
 import scala.util.control.NonFatal
 
 import org.scalatest.BeforeAndAfter
-import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.Span
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{AnalysisException, Dataset}
+import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.BlockingSource
 import org.apache.spark.util.Utils
 
@@ -304,8 +303,8 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
       if (withError) {
         logDebug(s"Terminating query ${queryToStop.name} with error")
         queryToStop.asInstanceOf[StreamingQueryWrapper].streamingQuery.logicalPlan.collect {
-          case StreamingExecutionRelation(source, _) =>
-            source.asInstanceOf[MemoryStream[Int]].addData(0)
+          case r: StreamingDataSourceV2Relation =>
+            r.stream.asInstanceOf[MemoryStream[Int]].addData(0)
         }
       } else {
         logDebug(s"Stopping query ${queryToStop.name}")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 29b816486a1f..62fde98e40dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -220,10 +220,10 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       }
 
       // getBatch should take 100 ms the first time it is called
-      override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
+      override def planInputPartitions(start: OffsetV2, end: OffsetV2): Array[InputPartition] = {
         synchronized {
           clock.waitTillTime(1150)
-          super.planInputPartitions(config)
+          super.planInputPartitions(start, end)
         }
       }
     }
@@ -906,7 +906,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     assert(df.logicalPlan.toJSON.contains("StreamingRelationV2"))
 
     testStream(df)(
-      AssertOnQuery(_.logicalPlan.toJSON.contains("StreamingExecutionRelation"))
+      AssertOnQuery(_.logicalPlan.toJSON.contains("StreamingDataSourceV2Relation"))
     )
 
     testStream(df, useV2Sink = true)(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index 756092fc7ff5..f85cae9fa433 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.streaming.continuous
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2StreamingScanExec
+import org.apache.spark.sql.execution.datasources.v2.ContinuousScanExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
@@ -41,7 +41,7 @@ class ContinuousSuiteBase extends StreamTest {
       case s: ContinuousExecution =>
         assert(numTriggers >= 2, "must wait for at least 2 triggers to ensure query is initialized")
         val reader = s.lastExecution.executedPlan.collectFirst {
-          case DataSourceV2StreamingScanExec(_, _, _, _, r: RateStreamContinuousReadSupport, _) => r
+          case ContinuousScanExec(_, _, _, _, r: RateStreamContinuousReadSupport, _) => r
         }.get
 
         val deltaMs = numTriggers * 1000 + 300
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 31fce46c2dab..d98cc41de9b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -24,26 +24,35 @@ import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
 import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReaderFactory, ScanConfig, ScanConfigBuilder}
+import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamTest, Trigger}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-case class FakeReadSupport() extends MicroBatchReadSupport with ContinuousReadSupport {
+class FakeDataStream extends MicroBatchStream {
   override def deserializeOffset(json: String): Offset = RateStreamOffset(Map())
   override def commit(end: Offset): Unit = {}
   override def stop(): Unit = {}
-  override def mergeOffsets(offsets: Array[PartitionOffset]): Offset = RateStreamOffset(Map())
-  override def fullSchema(): StructType = StructType(Seq())
-  override def newScanConfigBuilder(start: Offset, end: Offset): ScanConfigBuilder = null
   override def initialOffset(): Offset = RateStreamOffset(Map())
   override def latestOffset(): Offset = RateStreamOffset(Map())
-  override def newScanConfigBuilder(start: Offset): ScanConfigBuilder = null
-  override def createReaderFactory(config: ScanConfig): PartitionReaderFactory = {
+  override def planInputPartitions(start: Offset, end: Offset): Array[InputPartition] = {
+    throw new IllegalStateException("fake source - cannot actually read")
+  }
+  override def createReaderFactory(): PartitionReaderFactory = {
     throw new IllegalStateException("fake source - cannot actually read")
   }
+}
+
+case class FakeReadSupport() extends ContinuousReadSupport {
+  override def deserializeOffset(json: String): Offset = RateStreamOffset(Map())
+  override def commit(end: Offset): Unit = {}
+  override def stop(): Unit = {}
+  override def mergeOffsets(offsets: Array[PartitionOffset]): Offset = RateStreamOffset(Map())
+  override def fullSchema(): StructType = StructType(Seq())
+  override def initialOffset(): Offset = RateStreamOffset(Map())
+  override def newScanConfigBuilder(start: Offset): ScanConfigBuilder = null
   override def createContinuousReaderFactory(
       config: ScanConfig): ContinuousPartitionReaderFactory = {
     throw new IllegalStateException("fake source - cannot actually read")
@@ -53,13 +62,16 @@ case class FakeReadSupport() extends MicroBatchReadSupport with ContinuousReadSu
   }
 }
 
-trait FakeMicroBatchReadSupportProvider extends MicroBatchReadSupportProvider {
-  override def createMicroBatchReadSupport(
-      checkpointLocation: String,
-      options: DataSourceOptions): MicroBatchReadSupport = {
-    LastReadOptions.options = options
-    FakeReadSupport()
-  }
+class FakeScanBuilder extends ScanBuilder with Scan {
+  override def build(): Scan = this
+  override def readSchema(): StructType = StructType(Seq())
+  override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = new FakeDataStream
+}
+
+class FakeMicroBatchReadTable extends Table with SupportsMicroBatchRead {
+  override def name(): String = "fake"
+  override def schema(): StructType = StructType(Seq())
+  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new FakeScanBuilder
 }
 
 trait FakeContinuousReadSupportProvider extends ContinuousReadSupportProvider {
@@ -84,25 +96,38 @@ trait FakeStreamingWriteSupportProvider extends StreamingWriteSupportProvider {
 
 class FakeReadMicroBatchOnly
     extends DataSourceRegister
-    with FakeMicroBatchReadSupportProvider
+    with TableProvider
     with SessionConfigSupport {
   override def shortName(): String = "fake-read-microbatch-only"
 
   override def keyPrefix: String = shortName()
+
+  override def getTable(options: DataSourceOptions): Table = {
+    LastReadOptions.options = options
+    new FakeMicroBatchReadTable {}
+  }
 }
 
 class FakeReadContinuousOnly
     extends DataSourceRegister
+    with TableProvider
     with FakeContinuousReadSupportProvider
     with SessionConfigSupport {
   override def shortName(): String = "fake-read-continuous-only"
 
   override def keyPrefix: String = shortName()
+
+  override def getTable(options: DataSourceOptions): Table = new Table {
+    override def schema(): StructType = StructType(Seq())
+    override def name(): String = "fake"
+  }
 }
 
 class FakeReadBothModes extends DataSourceRegister
-    with FakeMicroBatchReadSupportProvider with FakeContinuousReadSupportProvider {
+    with TableProvider with FakeContinuousReadSupportProvider {
   override def shortName(): String = "fake-read-microbatch-continuous"
+
+  override def getTable(options: DataSourceOptions): Table = new FakeMicroBatchReadTable {}
 }
 
 class FakeReadNeitherMode extends DataSourceRegister {
@@ -303,10 +328,18 @@ class StreamingDataSourceV2Suite extends StreamTest {
         getConstructor().newInstance()
       val writeSource = DataSource.lookupDataSource(write, spark.sqlContext.conf).
         getConstructor().newInstance()
+
+      def isMicroBatch(ds: Any): Boolean = ds match {
+        case provider: TableProvider =>
+          val table = provider.getTable(DataSourceOptions.empty())
+          table.isInstanceOf[SupportsMicroBatchRead]
+        case _ => false
+      }
+
       (readSource, writeSource, trigger) match {
         // Valid microbatch queries.
-        case (_: MicroBatchReadSupportProvider, _: StreamingWriteSupportProvider, t)
-          if !t.isInstanceOf[ContinuousTrigger] =>
+        case (_: TableProvider, _: StreamingWriteSupportProvider, t)
+          if isMicroBatch(readSource) && !t.isInstanceOf[ContinuousTrigger] =>
           testPositiveCase(read, write, trigger)
 
         // Valid continuous queries.
@@ -316,7 +349,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
 
         // Invalid - can't read at all
         case (r, _, _)
-            if !r.isInstanceOf[MicroBatchReadSupportProvider]
+            if !r.isInstanceOf[TableProvider]
               && !r.isInstanceOf[ContinuousReadSupportProvider] =>
           testNegativeCase(read, write, trigger,
             s"Data source $read does not support streamed reading")
@@ -334,7 +367,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
 
         // Invalid - trigger is microbatch but reader is not
         case (r, _, t)
-           if !r.isInstanceOf[MicroBatchReadSupportProvider] &&
+           if !isMicroBatch(r) &&
              !t.isInstanceOf[ContinuousTrigger] =>
           testPostCreationNegativeCase(read, write, trigger,
             s"Data source $read does not support microbatch processing")

From f92d2766535d882b17f6d3b061d1df57bc84a90e Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 22 Jan 2019 14:54:41 +0800
Subject: [PATCH 0330/1072] [SPARK-25811][PYSPARK] Raise a proper error when
 unsafe cast is detected by PyArrow

## What changes were proposed in this pull request?

Since 0.11.0, PyArrow supports to raise an error for unsafe cast ([PR](https://github.com/apache/arrow/pull/2504)). We should use it to raise a proper error for pandas udf users when such cast is detected.

Added a SQL config `spark.sql.execution.pandas.arrowSafeTypeConversion` to disable Arrow safe type check.

## How was this patch tested?

Added test and manually test.

Closes #22807 from viirya/SPARK-25811.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md           | 48 +++++++++++++++
 python/pyspark/serializers.py                 | 19 ++++--
 python/pyspark/sql/session.py                 |  3 +-
 python/pyspark/sql/tests/test_pandas_udf.py   | 58 +++++++++++++++++++
 python/pyspark/worker.py                      |  4 +-
 .../apache/spark/sql/internal/SQLConf.scala   | 12 ++++
 .../sql/execution/arrow/ArrowUtils.scala      |  4 +-
 7 files changed, 141 insertions(+), 7 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 5d3d4c6ece39..3d1b804ccc80 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -41,6 +41,54 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
 
+  - In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting Pandas.Series to Arrow array during serialization. Arrow will raise errors when detecting unsafe type conversion like overflow. Setting `spark.sql.execution.pandas.arrowSafeTypeConversion` to true can enable it. The default setting is false. PySpark's behavior for Arrow versions is illustrated in the table below:
+  <table class="table">
+        <tr>
+          <th>
+            <b>PyArrow version</b>
+          </th>
+          <th>
+            <b>Integer Overflow</b>
+          </th>
+          <th>
+            <b>Floating Point Truncation</b>
+          </th>
+        </tr>
+        <tr>
+          <th>
+            <b>version < 0.11.0</b>
+          </th>
+          <th>
+            <b>Raise error</b>
+          </th>
+          <th>
+            <b>Silently allows</b>
+          </th>
+        </tr>
+        <tr>
+          <th>
+            <b>version > 0.11.0, arrowSafeTypeConversion=false</b>
+          </th>
+          <th>
+            <b>Silent overflow</b>
+          </th>
+          <th>
+            <b>Silently allows</b>
+          </th>
+        </tr>
+        <tr>
+          <th>
+            <b>version > 0.11.0, arrowSafeTypeConversion=true</b>
+          </th>
+          <th>
+            <b>Raise error</b>
+          </th>
+          <th>
+            <b>Raise error</b>
+          </th>
+        </tr>
+  </table>
+
   - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
 
 ## Upgrading From Spark SQL 2.3 to 2.4
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index fd4695210fb7..741dfb2961ce 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -245,7 +245,7 @@ def __repr__(self):
         return "ArrowStreamSerializer"
 
 
-def _create_batch(series, timezone):
+def _create_batch(series, timezone, safecheck):
     """
     Create an Arrow record batch from the given pandas.Series or list of Series, with optional type.
 
@@ -284,7 +284,17 @@ def create_array(s, t):
         elif LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
             # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
             return pa.Array.from_pandas(s, mask=mask, type=t)
-        return pa.Array.from_pandas(s, mask=mask, type=t, safe=False)
+
+        try:
+            array = pa.Array.from_pandas(s, mask=mask, type=t, safe=safecheck)
+        except pa.ArrowException as e:
+            error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \
+                        "Array (%s). It can be caused by overflows or other unsafe " + \
+                        "conversions warned by Arrow. Arrow safe type check can be " + \
+                        "disabled by using SQL config " + \
+                        "`spark.sql.execution.pandas.arrowSafeTypeConversion`."
+            raise RuntimeError(error_msg % (s.dtype, t), e)
+        return array
 
     arrs = [create_array(s, t) for s, t in series]
     return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
@@ -295,9 +305,10 @@ class ArrowStreamPandasSerializer(Serializer):
     Serializes Pandas.Series as Arrow data with Arrow streaming format.
     """
 
-    def __init__(self, timezone):
+    def __init__(self, timezone, safecheck):
         super(ArrowStreamPandasSerializer, self).__init__()
         self._timezone = timezone
+        self._safecheck = safecheck
 
     def arrow_to_pandas(self, arrow_column):
         from pyspark.sql.types import from_arrow_type, \
@@ -317,7 +328,7 @@ def dump_stream(self, iterator, stream):
         writer = None
         try:
             for series in iterator:
-                batch = _create_batch(series, self._timezone)
+                batch = _create_batch(series, self._timezone, self._safecheck)
                 if writer is None:
                     write_int(SpecialLengths.START_ARROW_STREAM, stream)
                     writer = pa.RecordBatchStreamWriter(stream, batch.schema)
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 6f4b32757314..bdf1701a5895 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -556,8 +556,9 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
         pdf_slices = (pdf[start:start + step] for start in xrange(0, len(pdf), step))
 
         # Create Arrow record batches
+        safecheck = self._wrapped._conf.arrowSafeTypeConversion()
         batches = [_create_batch([(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)],
-                                 timezone)
+                                 timezone, safecheck)
                    for pdf_slice in pdf_slices]
 
         # Create the Spark schema from the first Arrow batch (always at least 1 batch after slicing)
diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py
index d4d9679649ee..fd6d4e18ed2a 100644
--- a/python/pyspark/sql/tests/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/test_pandas_udf.py
@@ -197,6 +197,64 @@ def foofoo(x, y):
             ).collect
         )
 
+    def test_pandas_udf_detect_unsafe_type_conversion(self):
+        from distutils.version import LooseVersion
+        import pandas as pd
+        import numpy as np
+        import pyarrow as pa
+
+        values = [1.0] * 3
+        pdf = pd.DataFrame({'A': values})
+        df = self.spark.createDataFrame(pdf).repartition(1)
+
+        @pandas_udf(returnType="int")
+        def udf(column):
+            return pd.Series(np.linspace(0, 1, 3))
+
+        # Since 0.11.0, PyArrow supports the feature to raise an error for unsafe cast.
+        if LooseVersion(pa.__version__) >= LooseVersion("0.11.0"):
+            with self.sql_conf({
+                    "spark.sql.execution.pandas.arrowSafeTypeConversion": True}):
+                with self.assertRaisesRegexp(Exception,
+                                             "Exception thrown when converting pandas.Series"):
+                    df.select(['A']).withColumn('udf', udf('A')).collect()
+
+        # Disabling Arrow safe type check.
+        with self.sql_conf({
+                "spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+            df.select(['A']).withColumn('udf', udf('A')).collect()
+
+    def test_pandas_udf_arrow_overflow(self):
+        from distutils.version import LooseVersion
+        import pandas as pd
+        import pyarrow as pa
+
+        df = self.spark.range(0, 1)
+
+        @pandas_udf(returnType="byte")
+        def udf(column):
+            return pd.Series([128])
+
+        # Arrow 0.11.0+ allows enabling or disabling safe type check.
+        if LooseVersion(pa.__version__) >= LooseVersion("0.11.0"):
+            # When enabling safe type check, Arrow 0.11.0+ disallows overflow cast.
+            with self.sql_conf({
+                    "spark.sql.execution.pandas.arrowSafeTypeConversion": True}):
+                with self.assertRaisesRegexp(Exception,
+                                             "Exception thrown when converting pandas.Series"):
+                    df.withColumn('udf', udf('id')).collect()
+
+            # Disabling safe type check, let Arrow do the cast anyway.
+            with self.sql_conf({"spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+                df.withColumn('udf', udf('id')).collect()
+        else:
+            # SQL config `arrowSafeTypeConversion` no matters for older Arrow.
+            # Overflow cast causes an error.
+            with self.sql_conf({"spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+                with self.assertRaisesRegexp(Exception,
+                                             "Integer value out of bounds"):
+                    df.withColumn('udf', udf('id')).collect()
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_pandas_udf import *
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 1e7424ab3a1b..01934a0e7275 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -252,7 +252,9 @@ def read_udfs(pickleSer, infile, eval_type):
 
         # NOTE: if timezone is set here, that implies respectSessionTimeZone is True
         timezone = runner_conf.get("spark.sql.session.timeZone", None)
-        ser = ArrowStreamPandasSerializer(timezone)
+        safecheck = runner_conf.get("spark.sql.execution.pandas.arrowSafeTypeConversion",
+                                    "false").lower() == 'true'
+        ser = ArrowStreamPandasSerializer(timezone, safecheck)
     else:
         ser = BatchedSerializer(PickleSerializer(), 100)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index ebc8c3705ea2..6b301c3c9cb5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1324,6 +1324,16 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val PANDAS_ARROW_SAFE_TYPE_CONVERSION =
+    buildConf("spark.sql.execution.pandas.arrowSafeTypeConversion")
+      .internal()
+      .doc("When true, Arrow will perform safe type conversion when converting " +
+        "Pandas.Series to Arrow array during serialization. Arrow will raise errors " +
+        "when detecting unsafe type conversion like overflow. When false, disabling Arrow's type " +
+        "check and do type conversions anyway. This config only works for Arrow 0.11.0+.")
+      .booleanConf
+      .createWithDefault(false)
+
   val REPLACE_EXCEPT_WITH_FILTER = buildConf("spark.sql.optimizer.replaceExceptWithFilter")
     .internal()
     .doc("When true, the apply function of the rule verifies whether the right node of the" +
@@ -1998,6 +2008,8 @@ class SQLConf extends Serializable with Logging {
   def pandasGroupedMapAssignColumnsByName: Boolean =
     getConf(SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME)
 
+  def arrowSafeTypeConversion: Boolean = getConf(SQLConf.PANDAS_ARROW_SAFE_TYPE_CONVERSION)
+
   def replaceExceptWithFilter: Boolean = getConf(REPLACE_EXCEPT_WITH_FILTER)
 
   def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowUtils.scala
index b1e8fb39ac9d..7de6256aef08 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowUtils.scala
@@ -133,6 +133,8 @@ object ArrowUtils {
     }
     val pandasColsByName = Seq(SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME.key ->
       conf.pandasGroupedMapAssignColumnsByName.toString)
-    Map(timeZoneConf ++ pandasColsByName: _*)
+    val arrowSafeTypeCheck = Seq(SQLConf.PANDAS_ARROW_SAFE_TYPE_CONVERSION.key ->
+      conf.arrowSafeTypeConversion.toString)
+    Map(timeZoneConf ++ pandasColsByName ++ arrowSafeTypeCheck: _*)
   }
 }

From d2e86cb3cd8aaf5a2db639bb7402e41bba16e9fe Mon Sep 17 00:00:00 2001
From: Jatin Puri <purijatin@gmail.com>
Date: Tue, 22 Jan 2019 07:41:54 -0600
Subject: [PATCH 0331/1072] [SPARK-26616][MLLIB] Expose document frequency in
 IDFModel

## What changes were proposed in this pull request?

This change exposes the `df` (document frequency) as a public val along with the number of documents (`m`) as part of the IDF model.

* The document frequency is returned as an `Array[Long]`
* If the minimum  document frequency is set, this is considered in the df calculation. If the count is less than minDocFreq, the df is 0 for such terms
* numDocs is not very required. But it can be useful, if we plan to provide a provision in future for user to give their own idf function, instead of using a default (log((1+m)/(1+df))). In such cases, the user can provide a function taking input of `m` and `df` and returning the idf value
* Pyspark changes

## How was this patch tested?

The existing test case was edited to also check for the document frequency values.

I  am not very good with python or pyspark. I have committed and run tests based on my understanding. Kindly let me know if I have missed anything

Reviewer request: mengxr  zjffdu yinxusen

Closes #23549 from purijatin/master.

Authored-by: Jatin Puri <purijatin@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/ml/feature/IDF.scala     | 31 +++++++++++++++----
 .../org/apache/spark/mllib/feature/IDF.scala  | 19 +++++++-----
 .../apache/spark/ml/feature/IDFSuite.scala    |  7 +++--
 .../apache/spark/mllib/feature/IDFSuite.scala | 12 ++++---
 project/MimaExcludes.scala                    |  6 +++-
 python/pyspark/ml/feature.py                  | 20 ++++++++++++
 python/pyspark/ml/tests/test_feature.py       |  2 ++
 python/pyspark/mllib/feature.py               | 14 +++++++++
 8 files changed, 91 insertions(+), 20 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 58897cca4e5c..98a9674343b2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -32,6 +32,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.VersionUtils.majorVersion
 
 /**
  * Params for [[IDF]] and [[IDFModel]].
@@ -151,6 +152,15 @@ class IDFModel private[ml] (
   @Since("2.0.0")
   def idf: Vector = idfModel.idf.asML
 
+  /** Returns the document frequency */
+  @Since("3.0.0")
+  def docFreq: Array[Long] = idfModel.docFreq
+
+  /** Returns number of documents evaluated to compute idf */
+  @Since("3.0.0")
+  def numDocs: Long = idfModel.numDocs
+
+
   @Since("1.6.0")
   override def write: MLWriter = new IDFModelWriter(this)
 }
@@ -160,11 +170,11 @@ object IDFModel extends MLReadable[IDFModel] {
 
   private[IDFModel] class IDFModelWriter(instance: IDFModel) extends MLWriter {
 
-    private case class Data(idf: Vector)
+    private case class Data(idf: Vector, docFreq: Array[Long], numDocs: Long)
 
     override protected def saveImpl(path: String): Unit = {
       DefaultParamsWriter.saveMetadata(instance, path, sc)
-      val data = Data(instance.idf)
+      val data = Data(instance.idf, instance.docFreq, instance.numDocs)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
     }
@@ -178,10 +188,19 @@ object IDFModel extends MLReadable[IDFModel] {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
-      val Row(idf: Vector) = MLUtils.convertVectorColumnsToML(data, "idf")
-        .select("idf")
-        .head()
-      val model = new IDFModel(metadata.uid, new feature.IDFModel(OldVectors.fromML(idf)))
+
+      val model = if (majorVersion(metadata.sparkVersion) >= 3) {
+        val Row(idf: Vector, df: Seq[_], numDocs: Long) = data.select("idf", "docFreq", "numDocs")
+          .head()
+        new IDFModel(metadata.uid, new feature.IDFModel(OldVectors.fromML(idf),
+          df.asInstanceOf[Seq[Long]].toArray, numDocs))
+      } else {
+        val Row(idf: Vector) = MLUtils.convertVectorColumnsToML(data, "idf")
+          .select("idf")
+          .head()
+        new IDFModel(metadata.uid,
+          new feature.IDFModel(OldVectors.fromML(idf), new Array[Long](idf.size), 0L))
+      }
       metadata.getAndSetParams(model)
       model
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
index bb4b37ef21a8..6407be68c9da 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
@@ -32,6 +32,7 @@ import org.apache.spark.rdd.RDD
  * This implementation supports filtering out terms which do not appear in a minimum number
  * of documents (controlled by the variable `minDocFreq`). For terms that are not in
  * at least `minDocFreq` documents, the IDF is found as 0, resulting in TF-IDFs of 0.
+ * The document frequency is 0 as well for such terms
  *
  * @param minDocFreq minimum of documents in which a term
  *                   should appear for filtering
@@ -50,12 +51,12 @@ class IDF @Since("1.2.0") (@Since("1.2.0") val minDocFreq: Int) {
    */
   @Since("1.1.0")
   def fit(dataset: RDD[Vector]): IDFModel = {
-    val idf = dataset.treeAggregate(new IDF.DocumentFrequencyAggregator(
-          minDocFreq = minDocFreq))(
+    val (idf: Vector, docFreq: Array[Long], numDocs: Long) = dataset.treeAggregate(
+      new IDF.DocumentFrequencyAggregator(minDocFreq = minDocFreq))(
       seqOp = (df, v) => df.add(v),
       combOp = (df1, df2) => df1.merge(df2)
     ).idf()
-    new IDFModel(idf)
+    new IDFModel(idf, docFreq, numDocs)
   }
 
   /**
@@ -128,13 +129,14 @@ private object IDF {
 
     private def isEmpty: Boolean = m == 0L
 
-    /** Returns the current IDF vector. */
-    def idf(): Vector = {
+    /** Returns the current IDF vector, docFreq, number of documents */
+    def idf(): (Vector, Array[Long], Long) = {
       if (isEmpty) {
         throw new IllegalStateException("Haven't seen any document yet.")
       }
       val n = df.length
       val inv = new Array[Double](n)
+      val dfv = new Array[Long](n)
       var j = 0
       while (j < n) {
         /*
@@ -148,10 +150,11 @@ private object IDF {
          */
         if (df(j) >= minDocFreq) {
           inv(j) = math.log((m + 1.0) / (df(j) + 1.0))
+          dfv(j) = df(j)
         }
         j += 1
       }
-      Vectors.dense(inv)
+      (Vectors.dense(inv), dfv, m)
     }
   }
 }
@@ -160,7 +163,9 @@ private object IDF {
  * Represents an IDF model that can transform term frequency vectors.
  */
 @Since("1.1.0")
-class IDFModel private[spark] (@Since("1.1.0") val idf: Vector) extends Serializable {
+class IDFModel private[spark](@Since("1.1.0") val idf: Vector,
+                              @Since("3.0.0") val docFreq: Array[Long],
+                              @Since("3.0.0") val numDocs: Long) extends Serializable {
 
   /**
    * Transforms term frequency (TF) vectors to TF-IDF vectors.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
index cdd62be43b54..73b2b82daaf4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
@@ -44,7 +44,7 @@ class IDFSuite extends MLTest with DefaultReadWriteTest {
 
   test("params") {
     ParamsSuite.checkParams(new IDF)
-    val model = new IDFModel("idf", new OldIDFModel(Vectors.dense(1.0)))
+    val model = new IDFModel("idf", new OldIDFModel(Vectors.dense(1.0), Array(1L), 1))
     ParamsSuite.checkParams(model)
   }
 
@@ -112,10 +112,13 @@ class IDFSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("IDFModel read/write") {
-    val instance = new IDFModel("myIDFModel", new OldIDFModel(Vectors.dense(1.0, 2.0)))
+    val instance = new IDFModel("myIDFModel",
+      new OldIDFModel(Vectors.dense(1.0, 2.0), Array(1, 2), 2))
       .setInputCol("myInputCol")
       .setOutputCol("myOutputCol")
     val newInstance = testDefaultReadWrite(instance)
     assert(newInstance.idf === instance.idf)
+    assert(newInstance.docFreq === instance.docFreq)
+    assert(newInstance.numDocs === instance.numDocs)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
index 5c938a61ed99..1049730ffd01 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
@@ -39,9 +39,11 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext {
       math.log((m + 1.0) / (x + 1.0))
     })
     assert(model.idf ~== expected absTol 1e-12)
+    assert(model.numDocs === 3)
+    assert(model.docFreq === Array(0, 3, 1, 2))
 
     val assertHelper = (tfidf: Array[Vector]) => {
-      assert(tfidf.size === 3)
+      assert(tfidf.length === 3)
       val tfidf0 = tfidf(0).asInstanceOf[SparseVector]
       assert(tfidf0.indices === Array(1, 3))
       assert(Vectors.dense(tfidf0.values) ~==
@@ -70,19 +72,21 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext {
     )
     val m = localTermFrequencies.size
     val termFrequencies = sc.parallelize(localTermFrequencies, 2)
-    val idf = new IDF(minDocFreq = 1)
+    val idf = new IDF(minDocFreq = 2)
     val model = idf.fit(termFrequencies)
     val expected = Vectors.dense(Array(0, 3, 1, 2).map { x =>
-      if (x > 0) {
+      if (x >= 2) {
         math.log((m + 1.0) / (x + 1.0))
       } else {
         0
       }
     })
     assert(model.idf ~== expected absTol 1e-12)
+    assert(model.numDocs === 3)
+    assert(model.docFreq === Array(0, 3, 0, 2))
 
     val assertHelper = (tfidf: Array[Vector]) => {
-      assert(tfidf.size === 3)
+      assert(tfidf.length === 3)
       val tfidf0 = tfidf(0).asInstanceOf[SparseVector]
       assert(tfidf0.indices === Array(1, 3))
       assert(Vectors.dense(tfidf0.values) ~==
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 3e232baaec49..4cf312d259ea 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -273,7 +273,11 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.copy$default$1"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.productIterator"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.productPrefix"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.copy$default$3")
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.copy$default$3"),
+
+    // [SPARK-26616][MLlib] Expose document frequency in IDFModel
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.IDFModel.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.feature.IDF#DocumentFrequencyAggregator.idf")
   )
 
   // Exclude rules for 2.4.x
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 08ae58246adb..23d56c8ad357 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -966,6 +966,10 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
     >>> model = idf.fit(df)
     >>> model.idf
     DenseVector([0.0, 0.0])
+    >>> model.docFreq
+    [0, 3]
+    >>> model.numDocs == df.count()
+    True
     >>> model.transform(df).head().idf
     DenseVector([0.0, 0.0])
     >>> idf.setParams(outputCol="freqs").fit(df).transform(df).collect()[1].freqs
@@ -1045,6 +1049,22 @@ def idf(self):
         """
         return self._call_java("idf")
 
+    @property
+    @since("3.0.0")
+    def docFreq(self):
+        """
+        Returns the document frequency.
+        """
+        return self._call_java("docFreq")
+
+    @property
+    @since("3.0.0")
+    def numDocs(self):
+        """
+        Returns number of documents evaluated to compute idf
+        """
+        return self._call_java("numDocs")
+
 
 @inherit_doc
 class Imputer(JavaEstimator, HasInputCols, JavaMLReadable, JavaMLWritable):
diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py
index 325feaba6695..53d3ff9ee331 100644
--- a/python/pyspark/ml/tests/test_feature.py
+++ b/python/pyspark/ml/tests/test_feature.py
@@ -67,6 +67,8 @@ def test_idf(self):
                          "Model should inherit the UID from its parent estimator.")
         output = idf0m.transform(dataset)
         self.assertIsNotNone(output.head().idf)
+        self.assertIsNotNone(idf0m.docFreq)
+        self.assertEqual(idf0m.numDocs, 3)
         # Test that parameters transferred to Python Model
         check_params(self, idf0m)
 
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index b1bcdb9078e3..905c4da2d880 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -518,6 +518,20 @@ def idf(self):
         """
         return self.call('idf')
 
+    @since('3.0.0')
+    def docFreq(self):
+        """
+        Returns the document frequency.
+        """
+        return self.call('docFreq')
+
+    @since('3.0.0')
+    def numDocs(self):
+        """
+        Returns number of documents evaluated to compute idf
+        """
+        return self.call('numDocs')
+
 
 class IDF(object):
     """

From 7bf0794651f4d11547325539ebf7131a57ee1ba2 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 22 Jan 2019 07:44:36 -0600
Subject: [PATCH 0332/1072] [SPARK-26463][CORE] Use ConfigEntry for hardcoded
 configs for scheduler categories.

## What changes were proposed in this pull request?

The PR makes hardcoded `spark.dynamicAllocation`, `spark.scheduler`, `spark.rpc`, `spark.task`, `spark.speculation`, and `spark.cleaner` configs to use `ConfigEntry`.

## How was this patch tested?

Existing tests

Closes #23416 from kiszk/SPARK-26463.

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/ContextCleaner.scala     |  11 +-
 .../spark/ExecutorAllocationManager.scala     |  39 +++---
 .../org/apache/spark/HeartbeatReceiver.scala  |  16 +--
 .../org/apache/spark/SecurityManager.scala    |   2 +-
 .../scala/org/apache/spark/SparkConf.scala    |  26 ++--
 .../scala/org/apache/spark/SparkContext.scala |   3 +-
 .../scala/org/apache/spark/SparkEnv.scala     |   4 +-
 .../org/apache/spark/deploy/Client.scala      |   5 +-
 .../spark/deploy/SparkSubmitArguments.scala   |   3 +-
 .../org/apache/spark/executor/Executor.scala  |  12 +-
 .../spark/internal/config/Network.scala       |  93 +++++++++++++
 .../spark/internal/config/package.scala       | 131 +++++++++++++++++-
 .../apache/spark/rdd/PairRDDFunctions.scala   |   3 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala |   5 +-
 .../spark/rdd/ReliableRDDCheckpointData.scala |   3 +-
 .../apache/spark/rpc/netty/Dispatcher.scala   |   5 +-
 .../apache/spark/rpc/netty/NettyRpcEnv.scala  |   7 +-
 .../BarrierJobAllocationFailed.scala          |   3 +-
 .../spark/scheduler/BlacklistTracker.scala    |   6 +-
 .../spark/scheduler/SchedulableBuilder.scala  |  10 +-
 .../spark/scheduler/TaskSchedulerImpl.scala   |  13 +-
 .../spark/scheduler/TaskSetManager.scala      |  11 +-
 .../CoarseGrainedSchedulerBackend.scala       |  12 +-
 .../spark/status/AppStatusListener.scala      |   7 +-
 .../apache/spark/storage/BlockManager.scala   |   6 +-
 .../apache/spark/ui/UIWorkloadGenerator.scala |   5 +-
 .../apache/spark/ui/jobs/AllJobsPage.scala    |   3 +-
 .../org/apache/spark/ui/jobs/JobsTab.scala    |   3 +-
 .../org/apache/spark/ui/jobs/StagesTab.scala  |   3 +-
 .../org/apache/spark/util/RpcUtils.scala      |  13 +-
 .../scala/org/apache/spark/util/Utils.scala   |   4 +-
 .../spark/BarrierStageOnSubmittedSuite.scala  |  30 ++--
 .../apache/spark/ContextCleanerSuite.scala    |  14 +-
 .../ExecutorAllocationManagerSuite.scala      |  52 +++----
 .../apache/spark/HeartbeatReceiverSuite.scala |   3 +-
 .../apache/spark/JobCancellationSuite.scala   |  23 +--
 .../apache/spark/MapOutputTrackerSuite.scala  |  13 +-
 .../org/apache/spark/SparkConfSuite.scala     |  17 +--
 .../org/apache/spark/SparkContextSuite.scala  |   2 +-
 .../spark/deploy/SparkSubmitSuite.scala       |   4 +-
 .../StandaloneDynamicAllocationSuite.scala    |   6 +-
 .../NettyBlockTransferSecuritySuite.scala     |   5 +-
 .../org/apache/spark/rpc/RpcEnvSuite.scala    |  13 +-
 .../scheduler/BlacklistIntegrationSuite.scala |   4 +-
 .../scheduler/BlacklistTrackerSuite.scala     |   8 +-
 .../CoarseGrainedSchedulerBackendSuite.scala  |   8 +-
 .../apache/spark/scheduler/PoolSuite.scala    |  12 +-
 .../scheduler/SchedulerIntegrationSuite.scala |   3 +-
 .../spark/scheduler/SparkListenerSuite.scala  |   3 +-
 .../scheduler/TaskResultGetterSuite.scala     |   3 +-
 .../scheduler/TaskSchedulerImplSuite.scala    |  10 +-
 .../spark/scheduler/TaskSetManagerSuite.scala |  40 +++---
 .../KryoSerializerDistributedSuite.scala      |   2 +-
 .../spark/status/AppStatusListenerSuite.scala |   2 +-
 .../org/apache/spark/util/UtilsSuite.scala    |  14 +-
 .../mllib/util/LocalClusterSparkContext.scala |   3 +-
 .../KubernetesClusterSchedulerBackend.scala   |   3 +-
 .../MesosCoarseGrainedSchedulerBackend.scala  |   3 +-
 ...osCoarseGrainedSchedulerBackendSuite.scala |   6 +-
 .../cluster/YarnSchedulerBackend.scala        |   3 +-
 .../yarn/YarnShuffleIntegrationSuite.scala    |   5 +-
 .../apache/spark/sql/RuntimeConfigSuite.scala |   2 +-
 .../streaming/state/StateStoreSuite.scala     |   3 +-
 .../ui/SQLAppStatusListenerSuite.scala        |   2 +-
 .../sources/v2/SimpleWritableDataSource.scala |   1 +
 .../SparkExecuteStatementOperation.scala      |   6 +-
 .../sql/hive/execution/HiveFileFormat.scala   |   3 +-
 .../ExecutorAllocationManagerSuite.scala      |   5 +-
 68 files changed, 522 insertions(+), 281 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/Network.scala

diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index 4d884dec0791..305ec46a364a 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -25,6 +25,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.rdd.{RDD, ReliableRDDCheckpointData}
 import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, ThreadUtils, Utils}
 
@@ -83,8 +84,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
    * on the driver, this may happen very occasionally or not at all. Not cleaning at all may
    * lead to executors running out of disk space after a while.
    */
-  private val periodicGCInterval =
-    sc.conf.getTimeAsSeconds("spark.cleaner.periodicGC.interval", "30min")
+  private val periodicGCInterval = sc.conf.get(CLEANER_PERIODIC_GC_INTERVAL)
 
   /**
    * Whether the cleaning thread will block on cleanup tasks (other than shuffle, which
@@ -96,8 +96,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
    * for instance, when the driver performs a GC and cleans up all broadcast blocks that are no
    * longer in scope.
    */
-  private val blockOnCleanupTasks = sc.conf.getBoolean(
-    "spark.cleaner.referenceTracking.blocking", true)
+  private val blockOnCleanupTasks = sc.conf.get(CLEANER_REFERENCE_TRACKING_BLOCKING)
 
   /**
    * Whether the cleaning thread will block on shuffle cleanup tasks.
@@ -109,8 +108,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
    * until the real RPC issue (referred to in the comment above `blockOnCleanupTasks`) is
    * resolved.
    */
-  private val blockOnShuffleCleanupTasks = sc.conf.getBoolean(
-    "spark.cleaner.referenceTracking.blocking.shuffle", false)
+  private val blockOnShuffleCleanupTasks =
+    sc.conf.get(CLEANER_REFERENCE_TRACKING_BLOCKING_SHUFFLE)
 
   @volatile private var stopped = false
 
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 0807e653b41a..c9da30efebed 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -107,28 +107,25 @@ private[spark] class ExecutorAllocationManager(
   private val initialNumExecutors = Utils.getDynamicAllocationInitialExecutors(conf)
 
   // How long there must be backlogged tasks for before an addition is triggered (seconds)
-  private val schedulerBacklogTimeoutS = conf.getTimeAsSeconds(
-    "spark.dynamicAllocation.schedulerBacklogTimeout", "1s")
+  private val schedulerBacklogTimeoutS = conf.get(DYN_ALLOCATION_SCHEDULER_BACKLOG_TIMEOUT)
 
   // Same as above, but used only after `schedulerBacklogTimeoutS` is exceeded
-  private val sustainedSchedulerBacklogTimeoutS = conf.getTimeAsSeconds(
-    "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout", s"${schedulerBacklogTimeoutS}s")
+  private val sustainedSchedulerBacklogTimeoutS =
+    conf.get(DYN_ALLOCATION_SUSTAINED_SCHEDULER_BACKLOG_TIMEOUT)
 
   // How long an executor must be idle for before it is removed (seconds)
-  private val executorIdleTimeoutS = conf.getTimeAsSeconds(
-    "spark.dynamicAllocation.executorIdleTimeout", "60s")
+  private val executorIdleTimeoutS = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT)
 
-  private val cachedExecutorIdleTimeoutS = conf.getTimeAsSeconds(
-    "spark.dynamicAllocation.cachedExecutorIdleTimeout", s"${Integer.MAX_VALUE}s")
+  private val cachedExecutorIdleTimeoutS = conf.get(DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT)
 
   // During testing, the methods to actually kill and add executors are mocked out
-  private val testing = conf.getBoolean("spark.dynamicAllocation.testing", false)
+  private val testing = conf.get(DYN_ALLOCATION_TESTING)
 
   // TODO: The default value of 1 for spark.executor.cores works right now because dynamic
   // allocation is only supported for YARN and the default number of cores per executor in YARN is
   // 1, but it might need to be attained differently for different cluster managers
   private val tasksPerExecutorForFullParallelism =
-    conf.get(EXECUTOR_CORES) / conf.getInt("spark.task.cpus", 1)
+    conf.get(EXECUTOR_CORES) / conf.get(CPUS_PER_TASK)
 
   private val executorAllocationRatio =
     conf.get(DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO)
@@ -195,27 +192,29 @@ private[spark] class ExecutorAllocationManager(
    */
   private def validateSettings(): Unit = {
     if (minNumExecutors < 0 || maxNumExecutors < 0) {
-      throw new SparkException("spark.dynamicAllocation.{min/max}Executors must be positive!")
+      throw new SparkException(
+        s"${DYN_ALLOCATION_MIN_EXECUTORS.key} and ${DYN_ALLOCATION_MAX_EXECUTORS.key} must be " +
+          "positive!")
     }
     if (maxNumExecutors == 0) {
-      throw new SparkException("spark.dynamicAllocation.maxExecutors cannot be 0!")
+      throw new SparkException(s"${DYN_ALLOCATION_MAX_EXECUTORS.key} cannot be 0!")
     }
     if (minNumExecutors > maxNumExecutors) {
-      throw new SparkException(s"spark.dynamicAllocation.minExecutors ($minNumExecutors) must " +
-        s"be less than or equal to spark.dynamicAllocation.maxExecutors ($maxNumExecutors)!")
+      throw new SparkException(s"${DYN_ALLOCATION_MIN_EXECUTORS.key} ($minNumExecutors) must " +
+        s"be less than or equal to ${DYN_ALLOCATION_MAX_EXECUTORS.key} ($maxNumExecutors)!")
     }
     if (schedulerBacklogTimeoutS <= 0) {
-      throw new SparkException("spark.dynamicAllocation.schedulerBacklogTimeout must be > 0!")
+      throw new SparkException(s"${DYN_ALLOCATION_SCHEDULER_BACKLOG_TIMEOUT.key} must be > 0!")
     }
     if (sustainedSchedulerBacklogTimeoutS <= 0) {
       throw new SparkException(
-        "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout must be > 0!")
+        s"s${DYN_ALLOCATION_SUSTAINED_SCHEDULER_BACKLOG_TIMEOUT.key} must be > 0!")
     }
     if (executorIdleTimeoutS < 0) {
-      throw new SparkException("spark.dynamicAllocation.executorIdleTimeout must be >= 0!")
+      throw new SparkException(s"${DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key} must be >= 0!")
     }
     if (cachedExecutorIdleTimeoutS < 0) {
-      throw new SparkException("spark.dynamicAllocation.cachedExecutorIdleTimeout must be >= 0!")
+      throw new SparkException(s"${DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT.key} must be >= 0!")
     }
     // Require external shuffle service for dynamic allocation
     // Otherwise, we may lose shuffle files when killing executors
@@ -224,12 +223,12 @@ private[spark] class ExecutorAllocationManager(
         "shuffle service. You may enable this through spark.shuffle.service.enabled.")
     }
     if (tasksPerExecutorForFullParallelism == 0) {
-      throw new SparkException(s"${EXECUTOR_CORES.key} must not be < spark.task.cpus.")
+      throw new SparkException(s"${EXECUTOR_CORES.key} must not be < ${CPUS_PER_TASK.key}.")
     }
 
     if (executorAllocationRatio > 1.0 || executorAllocationRatio <= 0.0) {
       throw new SparkException(
-        "spark.dynamicAllocation.executorAllocationRatio must be > 0 and <= 1.0")
+        s"${DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO.key} must be > 0 and <= 1.0")
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index 67f2c279dca5..55f2ae61bdbb 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -24,6 +24,7 @@ import scala.concurrent.Future
 
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config.Network
 import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.BlockManagerId
@@ -74,18 +75,9 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
   // executor ID -> timestamp of when the last heartbeat from this executor was received
   private val executorLastSeen = new mutable.HashMap[String, Long]
 
-  // "spark.network.timeout" uses "seconds", while `spark.storage.blockManagerSlaveTimeoutMs` uses
-  // "milliseconds"
-  private val executorTimeoutMs =
-    sc.conf.getTimeAsMs("spark.storage.blockManagerSlaveTimeoutMs",
-      s"${sc.conf.getTimeAsSeconds("spark.network.timeout", "120s")}s")
-
-  // "spark.network.timeoutInterval" uses "seconds", while
-  // "spark.storage.blockManagerTimeoutIntervalMs" uses "milliseconds"
-  private val timeoutIntervalMs =
-    sc.conf.get(config.STORAGE_BLOCKMANAGER_TIMEOUTINTERVAL)
-  private val checkTimeoutIntervalMs =
-    sc.conf.getTimeAsSeconds("spark.network.timeoutInterval", s"${timeoutIntervalMs}ms") * 1000
+  private val executorTimeoutMs = sc.conf.get(config.STORAGE_BLOCKMANAGER_SLAVE_TIMEOUT)
+
+  private val checkTimeoutIntervalMs = sc.conf.get(Network.NETWORK_TIMEOUT_INTERVAL)
 
   private var timeoutCheckingTask: ScheduledFuture[_] = null
 
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index c64fdc02efc7..0661b3067122 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -288,7 +288,7 @@ private[spark] class SecurityManager(
    * @return Whether to enable encryption when connecting to services that support it.
    */
   def isEncryptionEnabled(): Boolean = {
-    sparkConf.get(NETWORK_ENCRYPTION_ENABLED) || sparkConf.get(SASL_ENCRYPTION_ENABLED)
+    sparkConf.get(Network.NETWORK_CRYPTO_ENABLED) || sparkConf.get(SASL_ENCRYPTION_ENABLED)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index b596be0885b1..7d49f10d4482 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -29,6 +29,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Kryo._
+import org.apache.spark.internal.config.Network._
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.util.Utils
 
@@ -576,26 +577,27 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
-    if (contains(EXECUTOR_CORES) && contains("spark.task.cpus")) {
+    if (contains(EXECUTOR_CORES) && contains(CPUS_PER_TASK)) {
       val executorCores = get(EXECUTOR_CORES)
-      val taskCpus = getInt("spark.task.cpus", 1)
+      val taskCpus = get(CPUS_PER_TASK)
 
       if (executorCores < taskCpus) {
-        throw new SparkException(s"${EXECUTOR_CORES.key} must not be less than spark.task.cpus.")
+        throw new SparkException(
+          s"${EXECUTOR_CORES.key} must not be less than ${CPUS_PER_TASK.key}.")
       }
     }
 
-    val encryptionEnabled = get(NETWORK_ENCRYPTION_ENABLED) || get(SASL_ENCRYPTION_ENABLED)
+    val encryptionEnabled = get(NETWORK_CRYPTO_ENABLED) || get(SASL_ENCRYPTION_ENABLED)
     require(!encryptionEnabled || get(NETWORK_AUTH_ENABLED),
       s"${NETWORK_AUTH_ENABLED.key} must be enabled when enabling encryption.")
 
-    val executorTimeoutThresholdMs =
-      getTimeAsSeconds("spark.network.timeout", "120s") * 1000
+    val executorTimeoutThresholdMs = get(NETWORK_TIMEOUT) * 1000
     val executorHeartbeatIntervalMs = get(EXECUTOR_HEARTBEAT_INTERVAL)
+    val networkTimeout = NETWORK_TIMEOUT.key
     // If spark.executor.heartbeatInterval bigger than spark.network.timeout,
     // it will almost always cause ExecutorLostFailure. See SPARK-22754.
     require(executorTimeoutThresholdMs > executorHeartbeatIntervalMs, "The value of " +
-      s"spark.network.timeout=${executorTimeoutThresholdMs}ms must be greater than the value of " +
+      s"${networkTimeout}=${executorTimeoutThresholdMs}ms must be no less than the value of " +
       s"spark.executor.heartbeatInterval=${executorHeartbeatIntervalMs}ms.")
   }
 
@@ -680,13 +682,13 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.io.compression.snappy.block.size", "1.4")),
     IO_COMPRESSION_LZ4_BLOCKSIZE.key -> Seq(
       AlternateConfig("spark.io.compression.lz4.block.size", "1.4")),
-    "spark.rpc.numRetries" -> Seq(
+    RPC_NUM_RETRIES.key -> Seq(
       AlternateConfig("spark.akka.num.retries", "1.4")),
-    "spark.rpc.retry.wait" -> Seq(
+    RPC_RETRY_WAIT.key -> Seq(
       AlternateConfig("spark.akka.retry.wait", "1.4")),
-    "spark.rpc.askTimeout" -> Seq(
+    RPC_ASK_TIMEOUT.key -> Seq(
       AlternateConfig("spark.akka.askTimeout", "1.4")),
-    "spark.rpc.lookupTimeout" -> Seq(
+    RPC_LOOKUP_TIMEOUT.key -> Seq(
       AlternateConfig("spark.akka.lookupTimeout", "1.4")),
     "spark.streaming.fileStream.minRememberDuration" -> Seq(
       AlternateConfig("spark.streaming.minRememberDuration", "1.5")),
@@ -694,7 +696,7 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.yarn.max.worker.failures", "1.5")),
     MEMORY_OFFHEAP_ENABLED.key -> Seq(
       AlternateConfig("spark.unsafe.offHeap", "1.6")),
-    "spark.rpc.message.maxSize" -> Seq(
+    RPC_MESSAGE_MAX_SIZE.key -> Seq(
       AlternateConfig("spark.akka.frameSize", "1.6")),
     "spark.yarn.jars" -> Seq(
       AlternateConfig("spark.yarn.jar", "2.0")),
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index e0c0635d6cfa..be70d422cb55 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -553,7 +553,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _executorAllocationManager.foreach(_.start())
 
     _cleaner =
-      if (_conf.getBoolean("spark.cleaner.referenceTracking", true)) {
+      if (_conf.get(CLEANER_REFERENCE_TRACKING)) {
         Some(new ContextCleaner(this))
       } else {
         None
@@ -2538,6 +2538,7 @@ object SparkContext extends Logging {
   private[spark] val SPARK_JOB_DESCRIPTION = "spark.job.description"
   private[spark] val SPARK_JOB_GROUP_ID = "spark.jobGroup.id"
   private[spark] val SPARK_JOB_INTERRUPT_ON_CANCEL = "spark.job.interruptOnCancel"
+  private[spark] val SPARK_SCHEDULER_POOL = "spark.scheduler.pool"
   private[spark] val RDD_SCOPE_KEY = "spark.rdd.scope"
   private[spark] val RDD_SCOPE_NO_OVERRIDE_KEY = "spark.rdd.scope.noOverride"
 
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 36998d159616..93d5cd79335e 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -417,8 +417,8 @@ object SparkEnv extends Logging {
     // Spark properties
     // This includes the scheduling mode whether or not it is configured (used by SparkUI)
     val schedulerMode =
-      if (!conf.contains("spark.scheduler.mode")) {
-        Seq(("spark.scheduler.mode", schedulingMode))
+      if (!conf.contains(SCHEDULER_MODE)) {
+        Seq((SCHEDULER_MODE.key, schedulingMode))
       } else {
         Seq.empty[(String, String)]
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index d94b174d8d86..e65a494fae29 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -28,6 +28,7 @@ import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.{DriverState, Master}
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config.Network.RPC_ASK_TIMEOUT
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.util.{SparkExitCode, ThreadUtils, Utils}
 
@@ -226,8 +227,8 @@ private[spark] class ClientApp extends SparkApplication {
   override def start(args: Array[String], conf: SparkConf): Unit = {
     val driverArgs = new ClientArguments(args)
 
-    if (!conf.contains("spark.rpc.askTimeout")) {
-      conf.set("spark.rpc.askTimeout", "10s")
+    if (!conf.contains(RPC_ASK_TIMEOUT)) {
+      conf.set(RPC_ASK_TIMEOUT, "10s")
     }
     Logger.getRootLogger.setLevel(driverArgs.logLevel)
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 9692d2a7e1ce..e23d1f87b766 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -32,6 +32,7 @@ import scala.util.Try
 import org.apache.spark.{SparkException, SparkUserAppException}
 import org.apache.spark.deploy.SparkSubmitAction._
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config.DYN_ALLOCATION_ENABLED
 import org.apache.spark.launcher.SparkSubmitArgumentsParser
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.util.Utils
@@ -208,7 +209,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       .orElse(sparkProperties.get("spark.yarn.principal"))
       .orNull
     dynamicAllocationEnabled =
-      sparkProperties.get("spark.dynamicAllocation.enabled").exists("true".equalsIgnoreCase)
+      sparkProperties.get(DYN_ALLOCATION_ENABLED.key).exists("true".equalsIgnoreCase)
 
     // Try to set main class from JAR if no --class argument is given
     if (mainClass == null && !isPython && !isR && primaryResource != null) {
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index a3644e713065..fccb1aca719e 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -124,7 +124,7 @@ private[spark] class Executor(
   private val userClassPathFirst = conf.get(EXECUTOR_USER_CLASS_PATH_FIRST)
 
   // Whether to monitor killed / interrupted tasks
-  private val taskReaperEnabled = conf.getBoolean("spark.task.reaper.enabled", false)
+  private val taskReaperEnabled = conf.get(TASK_REAPER_ENABLED)
 
   // Create our ClassLoader
   // do this after SparkEnv creation so can access the SecurityManager
@@ -163,7 +163,7 @@ private[spark] class Executor(
   // Max size of direct result. If task result is bigger than this, we use the block manager
   // to send the result back.
   private val maxDirectResultSize = Math.min(
-    conf.getSizeAsBytes("spark.task.maxDirectResultSize", 1L << 20),
+    conf.get(TASK_MAX_DIRECT_RESULT_SIZE),
     RpcUtils.maxMessageSizeBytes(conf))
 
   private val maxResultSize = conf.get(MAX_RESULT_SIZE)
@@ -667,13 +667,11 @@ private[spark] class Executor(
 
     private[this] val taskId: Long = taskRunner.taskId
 
-    private[this] val killPollingIntervalMs: Long =
-      conf.getTimeAsMs("spark.task.reaper.pollingInterval", "10s")
+    private[this] val killPollingIntervalMs: Long = conf.get(TASK_REAPER_POLLING_INTERVAL)
 
-    private[this] val killTimeoutMs: Long = conf.getTimeAsMs("spark.task.reaper.killTimeout", "-1")
+    private[this] val killTimeoutMs: Long = conf.get(TASK_REAPER_KILL_TIMEOUT)
 
-    private[this] val takeThreadDump: Boolean =
-      conf.getBoolean("spark.task.reaper.threadDump", true)
+    private[this] val takeThreadDump: Boolean = conf.get(TASK_REAPER_THREAD_DUMP)
 
     override def run(): Unit = {
       val startTimeMs = System.currentTimeMillis()
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Network.scala b/core/src/main/scala/org/apache/spark/internal/config/Network.scala
new file mode 100644
index 000000000000..129e31a82979
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Network.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+import java.util.concurrent.TimeUnit
+
+private[spark] object Network {
+
+  private[spark] val NETWORK_CRYPTO_SASL_FALLBACK =
+    ConfigBuilder("spark.network.crypto.saslFallback")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val NETWORK_CRYPTO_ENABLED =
+    ConfigBuilder("spark.network.crypto.enabled")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val NETWORK_REMOTE_READ_NIO_BUFFER_CONVERSION =
+    ConfigBuilder("spark.network.remoteReadNioBufferConversion")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val NETWORK_TIMEOUT =
+    ConfigBuilder("spark.network.timeout")
+      .timeConf(TimeUnit.SECONDS)
+      .createWithDefaultString("120s")
+
+  private[spark] val NETWORK_TIMEOUT_INTERVAL =
+    ConfigBuilder("spark.network.timeoutInterval")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString(STORAGE_BLOCKMANAGER_TIMEOUTINTERVAL.defaultValueString)
+
+  private[spark] val RPC_ASK_TIMEOUT =
+    ConfigBuilder("spark.rpc.askTimeout")
+      .stringConf
+      .createOptional
+
+  private[spark] val RPC_CONNECT_THREADS =
+    ConfigBuilder("spark.rpc.connect.threads")
+      .intConf
+      .createWithDefault(64)
+
+  private[spark] val RPC_IO_NUM_CONNECTIONS_PER_PEER =
+    ConfigBuilder("spark.rpc.io.numConnectionsPerPeer")
+      .intConf
+      .createWithDefault(1)
+
+  private[spark] val RPC_IO_THREADS =
+    ConfigBuilder("spark.rpc.io.threads")
+      .intConf
+      .createOptional
+
+  private[spark] val RPC_LOOKUP_TIMEOUT =
+    ConfigBuilder("spark.rpc.lookupTimeout")
+      .stringConf
+      .createOptional
+
+  private[spark] val RPC_MESSAGE_MAX_SIZE =
+    ConfigBuilder("spark.rpc.message.maxSize")
+      .intConf
+      .createWithDefault(128)
+
+  private[spark] val RPC_NETTY_DISPATCHER_NUM_THREADS =
+    ConfigBuilder("spark.rpc.netty.dispatcher.numThreads")
+      .intConf
+      .createOptional
+
+  private[spark] val RPC_NUM_RETRIES =
+    ConfigBuilder("spark.rpc.numRetries")
+      .intConf
+      .createWithDefault(3)
+
+  private[spark] val RPC_RETRY_WAIT =
+    ConfigBuilder("spark.rpc.retry.wait")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("3s")
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 1e7280005514..b5912847968f 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.TimeUnit
 
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.ByteUnit
-import org.apache.spark.scheduler.EventLoggingListener
+import org.apache.spark.scheduler.{EventLoggingListener, SchedulingMode}
 import org.apache.spark.storage.{DefaultTopologyMapper, RandomBlockReplicationPolicy}
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
@@ -265,11 +265,22 @@ package object config {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("60s")
 
+  private[spark] val STORAGE_BLOCKMANAGER_SLAVE_TIMEOUT =
+    ConfigBuilder("spark.storage.blockManagerSlaveTimeoutMs")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString(Network.NETWORK_TIMEOUT.defaultValueString)
+
   private[spark] val IS_PYTHON_APP = ConfigBuilder("spark.yarn.isPython").internal()
     .booleanConf.createWithDefault(false)
 
   private[spark] val CPUS_PER_TASK = ConfigBuilder("spark.task.cpus").intConf.createWithDefault(1)
 
+  private[spark] val DYN_ALLOCATION_ENABLED =
+    ConfigBuilder("spark.dynamicAllocation.enabled").booleanConf.createWithDefault(false)
+
+  private[spark] val DYN_ALLOCATION_TESTING =
+    ConfigBuilder("spark.dynamicAllocation.testing").booleanConf.createWithDefault(false)
+
   private[spark] val DYN_ALLOCATION_MIN_EXECUTORS =
     ConfigBuilder("spark.dynamicAllocation.minExecutors").intConf.createWithDefault(0)
 
@@ -284,6 +295,22 @@ package object config {
     ConfigBuilder("spark.dynamicAllocation.executorAllocationRatio")
       .doubleConf.createWithDefault(1.0)
 
+  private[spark] val DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT =
+    ConfigBuilder("spark.dynamicAllocation.cachedExecutorIdleTimeout")
+      .timeConf(TimeUnit.SECONDS).createWithDefault(Integer.MAX_VALUE)
+
+  private[spark] val DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT =
+    ConfigBuilder("spark.dynamicAllocation.executorIdleTimeout")
+      .timeConf(TimeUnit.SECONDS).createWithDefault(60)
+
+  private[spark] val DYN_ALLOCATION_SCHEDULER_BACKLOG_TIMEOUT =
+    ConfigBuilder("spark.dynamicAllocation.schedulerBacklogTimeout")
+      .timeConf(TimeUnit.SECONDS).createWithDefault(1)
+
+  private[spark] val DYN_ALLOCATION_SUSTAINED_SCHEDULER_BACKLOG_TIMEOUT =
+    ConfigBuilder("spark.dynamicAllocation.sustainedSchedulerBacklogTimeout")
+      .fallbackConf(DYN_ALLOCATION_SCHEDULER_BACKLOG_TIMEOUT)
+
   private[spark] val LOCALITY_WAIT = ConfigBuilder("spark.locality.wait")
     .timeConf(TimeUnit.MILLISECONDS)
     .createWithDefaultString("3s")
@@ -316,11 +343,36 @@ package object config {
     .toSequence
     .createWithDefault(Nil)
 
-  private[spark] val MAX_TASK_FAILURES =
+  private[spark] val TASK_MAX_DIRECT_RESULT_SIZE =
+    ConfigBuilder("spark.task.maxDirectResultSize")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefault(1L << 20)
+
+  private[spark] val TASK_MAX_FAILURES =
     ConfigBuilder("spark.task.maxFailures")
       .intConf
       .createWithDefault(4)
 
+  private[spark] val TASK_REAPER_ENABLED =
+    ConfigBuilder("spark.task.reaper.enabled")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val TASK_REAPER_KILL_TIMEOUT =
+    ConfigBuilder("spark.task.reaper.killTimeout")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefault(-1)
+
+  private[spark] val TASK_REAPER_POLLING_INTERVAL =
+    ConfigBuilder("spark.task.reaper.pollingInterval")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("10s")
+
+  private[spark] val TASK_REAPER_THREAD_DUMP =
+    ConfigBuilder("spark.task.reaper.threadDump")
+      .booleanConf
+      .createWithDefault(true)
+
   // Blacklist confs
   private[spark] val BLACKLIST_ENABLED =
     ConfigBuilder("spark.blacklist.enabled")
@@ -574,11 +626,6 @@ package object config {
         "secret keys are only allowed when using Kubernetes.")
       .fallbackConf(AUTH_SECRET_FILE)
 
-  private[spark] val NETWORK_ENCRYPTION_ENABLED =
-    ConfigBuilder("spark.network.crypto.enabled")
-      .booleanConf
-      .createWithDefault(false)
-
   private[spark] val BUFFER_WRITE_CHUNK_SIZE =
     ConfigBuilder("spark.buffer.write.chunkSize")
       .internal()
@@ -930,6 +977,31 @@ package object config {
       .toSequence
       .createWithDefault(Nil)
 
+  private[spark] val CLEANER_PERIODIC_GC_INTERVAL =
+    ConfigBuilder("spark.cleaner.periodicGC.interval")
+      .timeConf(TimeUnit.SECONDS)
+      .createWithDefaultString("30min")
+
+  private[spark] val CLEANER_REFERENCE_TRACKING =
+    ConfigBuilder("spark.cleaner.referenceTracking")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val CLEANER_REFERENCE_TRACKING_BLOCKING =
+    ConfigBuilder("spark.cleaner.referenceTracking.blocking")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val CLEANER_REFERENCE_TRACKING_BLOCKING_SHUFFLE =
+    ConfigBuilder("spark.cleaner.referenceTracking.blocking.shuffle")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val CLEANER_REFERENCE_TRACKING_CLEAN_CHECKPOINTS =
+    ConfigBuilder("spark.cleaner.referenceTracking.cleanCheckpoints")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EXECUTOR_LOGS_ROLLING_STRATEGY =
     ConfigBuilder("spark.executor.logs.rolling.strategy").stringConf.createWithDefault("")
 
@@ -1103,4 +1175,49 @@ package object config {
     .stringConf
     .toSequence
     .createWithDefault(Nil)
+
+  private[spark] val SCHEDULER_ALLOCATION_FILE =
+    ConfigBuilder("spark.scheduler.allocation.file")
+      .stringConf
+      .createOptional
+
+  private[spark] val SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO =
+    ConfigBuilder("spark.scheduler.minRegisteredResourcesRatio")
+      .doubleConf
+      .createOptional
+
+  private[spark] val SCHEDULER_MAX_REGISTERED_RESOURCE_WAITING_TIME =
+    ConfigBuilder("spark.scheduler.maxRegisteredResourcesWaitingTime")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("30s")
+
+  private[spark] val SCHEDULER_MODE =
+    ConfigBuilder("spark.scheduler.mode")
+      .stringConf
+      .createWithDefault(SchedulingMode.FIFO.toString)
+
+  private[spark] val SCHEDULER_REVIVE_INTERVAL =
+    ConfigBuilder("spark.scheduler.revive.interval")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createOptional
+
+  private[spark] val SPECULATION_ENABLED =
+    ConfigBuilder("spark.speculation")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val SPECULATION_INTERVAL =
+    ConfigBuilder("spark.speculation.interval")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefault(100)
+
+  private[spark] val SPECULATION_MULTIPLIER =
+    ConfigBuilder("spark.speculation.multiplier")
+      .doubleConf
+      .createWithDefault(1.5)
+
+  private[spark] val SPECULATION_QUANTILE =
+    ConfigBuilder("spark.speculation.quantile")
+      .doubleConf
+      .createWithDefault(0.75)
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 4bf4f082d038..8b5f9bba50dc 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -36,6 +36,7 @@ import org.apache.spark._
 import org.apache.spark.Partitioner.defaultPartitioner
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.SPECULATION_ENABLED
 import org.apache.spark.internal.io._
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.serializer.Serializer
@@ -1051,7 +1052,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
 
     // When speculation is on and output committer class name contains "Direct", we should warn
     // users that they may loss data if they are using a direct output committer.
-    val speculationEnabled = self.conf.getBoolean("spark.speculation", false)
+    val speculationEnabled = self.conf.get(SPECULATION_ENABLED)
     val outputCommitterClass = hadoopConf.get("mapred.output.committer.class", "")
     if (speculationEnabled && outputCommitterClass.contains("Direct")) {
       val warningMessage =
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index b2ed2d30aca9..954582f7ce66 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -36,6 +36,7 @@ import org.apache.spark.Partitioner._
 import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.RDD_LIMIT_SCALE_UP_FACTOR
 import org.apache.spark.partial.BoundedDouble
 import org.apache.spark.partial.CountEvaluator
@@ -1591,8 +1592,8 @@ abstract class RDD[T: ClassTag](
    * The checkpoint directory set through `SparkContext#setCheckpointDir` is not used.
    */
   def localCheckpoint(): this.type = RDDCheckpointData.synchronized {
-    if (conf.getBoolean("spark.dynamicAllocation.enabled", false) &&
-        conf.contains("spark.dynamicAllocation.cachedExecutorIdleTimeout")) {
+    if (conf.get(DYN_ALLOCATION_ENABLED) &&
+        conf.contains(DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT)) {
       logWarning("Local checkpointing is NOT safe to use with dynamic allocation, " +
         "which removes executors along with their cached blocks. If you must use both " +
         "features, you are advised to set `spark.dynamicAllocation.cachedExecutorIdleTimeout` " +
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
index b6d723c68279..7a592ab11ec7 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.CLEANER_REFERENCE_TRACKING_CLEAN_CHECKPOINTS
 
 /**
  * An implementation of checkpointing that writes the RDD data to reliable storage.
@@ -58,7 +59,7 @@ private[spark] class ReliableRDDCheckpointData[T: ClassTag](@transient private v
     val newRDD = ReliableCheckpointRDD.writeRDDToCheckpointDirectory(rdd, cpDir)
 
     // Optionally clean our checkpoint files if the reference is out of scope
-    if (rdd.conf.getBoolean("spark.cleaner.referenceTracking.cleanCheckpoints", false)) {
+    if (rdd.conf.get(CLEANER_REFERENCE_TRACKING_CLEAN_CHECKPOINTS)) {
       rdd.context.cleaner.foreach { cleaner =>
         cleaner.registerRDDCheckpointDataForCleanup(newRDD, rdd.id)
       }
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index 904c4d02dd2a..ce238a256cfb 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -26,6 +26,7 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Network.RPC_NETTY_DISPATCHER_NUM_THREADS
 import org.apache.spark.network.client.RpcResponseCallback
 import org.apache.spark.rpc._
 import org.apache.spark.util.ThreadUtils
@@ -197,8 +198,8 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
   private val threadpool: ThreadPoolExecutor = {
     val availableCores =
       if (numUsableCores > 0) numUsableCores else Runtime.getRuntime.availableProcessors()
-    val numThreads = nettyEnv.conf.getInt("spark.rpc.netty.dispatcher.numThreads",
-      math.max(2, availableCores))
+    val numThreads = nettyEnv.conf.get(RPC_NETTY_DISPATCHER_NUM_THREADS)
+      .getOrElse(math.max(2, availableCores))
     val pool = ThreadUtils.newDaemonFixedThreadPool(numThreads, "dispatcher-event-loop")
     for (i <- 0 until numThreads) {
       pool.execute(new MessageLoop)
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index 47576959322d..25401966fd16 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -31,6 +31,7 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Network._
 import org.apache.spark.network.TransportContext
 import org.apache.spark.network.client._
 import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap}
@@ -48,9 +49,9 @@ private[netty] class NettyRpcEnv(
     numUsableCores: Int) extends RpcEnv(conf) with Logging {
 
   private[netty] val transportConf = SparkTransportConf.fromSparkConf(
-    conf.clone.set("spark.rpc.io.numConnectionsPerPeer", "1"),
+    conf.clone.set(RPC_IO_NUM_CONNECTIONS_PER_PEER, 1),
     "rpc",
-    conf.getInt("spark.rpc.io.threads", numUsableCores))
+    conf.get(RPC_IO_THREADS).getOrElse(numUsableCores))
 
   private val dispatcher: Dispatcher = new Dispatcher(this, numUsableCores)
 
@@ -87,7 +88,7 @@ private[netty] class NettyRpcEnv(
   // TODO: a non-blocking TransportClientFactory.createClient in future
   private[netty] val clientConnectionExecutor = ThreadUtils.newDaemonCachedThreadPool(
     "netty-rpc-connection",
-    conf.getInt("spark.rpc.connect.threads", 64))
+    conf.get(RPC_CONNECT_THREADS))
 
   @volatile private var server: TransportServer = _
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala b/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala
index 803a0a1226d6..64a02b30ca0d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.scheduler
 
 import org.apache.spark.SparkException
+import org.apache.spark.internal.config.DYN_ALLOCATION_ENABLED
 
 /**
  * Exception thrown when submit a job with barrier stage(s) failing a required check.
@@ -51,7 +52,7 @@ private[spark] object BarrierJobAllocationFailed {
   val ERROR_MESSAGE_RUN_BARRIER_WITH_DYN_ALLOCATION =
     "[SPARK-24942]: Barrier execution mode does not support dynamic resource allocation for " +
       "now. You can disable dynamic resource allocation by setting Spark conf " +
-      "\"spark.dynamicAllocation.enabled\" to \"false\"."
+      s""""${DYN_ALLOCATION_ENABLED.key}" to "false"."""
 
   // Error message when running a barrier stage that requires more slots than current total number.
   val ERROR_MESSAGE_BARRIER_REQUIRE_MORE_SLOTS_THAN_CURRENT_TOTAL_NUMBER =
diff --git a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
index ef6d02d85c27..9e524c52267b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
@@ -460,15 +460,15 @@ private[spark] object BlacklistTracker extends Logging {
       }
     }
 
-    val maxTaskFailures = conf.get(config.MAX_TASK_FAILURES)
+    val maxTaskFailures = conf.get(config.TASK_MAX_FAILURES)
     val maxNodeAttempts = conf.get(config.MAX_TASK_ATTEMPTS_PER_NODE)
 
     if (maxNodeAttempts >= maxTaskFailures) {
       throw new IllegalArgumentException(s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " +
-        s"( = ${maxNodeAttempts}) was >= ${config.MAX_TASK_FAILURES.key} " +
+        s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " +
         s"( = ${maxTaskFailures} ).  Though blacklisting is enabled, with this configuration, " +
         s"Spark will not be robust to one bad node.  Decrease " +
-        s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.MAX_TASK_FAILURES.key}, " +
+        s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " +
         s"or disable blacklisting with ${config.BLACKLIST_ENABLED.key}")
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
index 5f3c280ec31e..c85c74f2fb97 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
@@ -23,8 +23,9 @@ import java.util.{Locale, NoSuchElementException, Properties}
 import scala.util.control.NonFatal
 import scala.xml.{Node, XML}
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.SCHEDULER_ALLOCATION_FILE
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.util.Utils
 
@@ -56,10 +57,9 @@ private[spark] class FIFOSchedulableBuilder(val rootPool: Pool)
 private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
   extends SchedulableBuilder with Logging {
 
-  val SCHEDULER_ALLOCATION_FILE_PROPERTY = "spark.scheduler.allocation.file"
-  val schedulerAllocFile = conf.getOption(SCHEDULER_ALLOCATION_FILE_PROPERTY)
+  val schedulerAllocFile = conf.get(SCHEDULER_ALLOCATION_FILE)
   val DEFAULT_SCHEDULER_FILE = "fairscheduler.xml"
-  val FAIR_SCHEDULER_PROPERTIES = "spark.scheduler.pool"
+  val FAIR_SCHEDULER_PROPERTIES = SparkContext.SPARK_SCHEDULER_POOL
   val DEFAULT_POOL_NAME = "default"
   val MINIMUM_SHARES_PROPERTY = "minShare"
   val SCHEDULING_MODE_PROPERTY = "schedulingMode"
@@ -85,7 +85,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
         } else {
           logWarning("Fair Scheduler configuration file not found so jobs will be scheduled in " +
             s"FIFO order. To use fair scheduling, configure pools in $DEFAULT_SCHEDULER_FILE or " +
-            s"set $SCHEDULER_ALLOCATION_FILE_PROPERTY to a file that contains the configuration.")
+            s"set ${SCHEDULER_ALLOCATION_FILE.key} to a file that contains the configuration.")
           None
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 61556ea64261..d551fb71a103 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -31,6 +31,7 @@ import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config._
 import org.apache.spark.rpc.RpcEndpoint
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.scheduler.TaskLocality.TaskLocality
@@ -61,7 +62,7 @@ private[spark] class TaskSchedulerImpl(
   import TaskSchedulerImpl._
 
   def this(sc: SparkContext) = {
-    this(sc, sc.conf.get(config.MAX_TASK_FAILURES))
+    this(sc, sc.conf.get(config.TASK_MAX_FAILURES))
   }
 
   // Lazily initializing blacklistTrackerOpt to avoid getting empty ExecutorAllocationClient,
@@ -71,7 +72,7 @@ private[spark] class TaskSchedulerImpl(
   val conf = sc.conf
 
   // How often to check for speculative tasks
-  val SPECULATION_INTERVAL_MS = conf.getTimeAsMs("spark.speculation.interval", "100ms")
+  val SPECULATION_INTERVAL_MS = conf.get(SPECULATION_INTERVAL)
 
   // Duplicate copies of a task will only be launched if the original copy has been running for
   // at least this amount of time. This is to avoid the overhead of launching speculative copies
@@ -85,7 +86,7 @@ private[spark] class TaskSchedulerImpl(
   val STARVATION_TIMEOUT_MS = conf.getTimeAsMs("spark.starvation.timeout", "15s")
 
   // CPUs to request per task
-  val CPUS_PER_TASK = conf.getInt("spark.task.cpus", 1)
+  val CPUS_PER_TASK = conf.get(config.CPUS_PER_TASK)
 
   // TaskSetManagers are not thread safe, so any access to one should be synchronized
   // on this class.
@@ -131,7 +132,7 @@ private[spark] class TaskSchedulerImpl(
 
   private var schedulableBuilder: SchedulableBuilder = null
   // default scheduler is FIFO
-  private val schedulingModeConf = conf.get(SCHEDULER_MODE_PROPERTY, SchedulingMode.FIFO.toString)
+  private val schedulingModeConf = conf.get(SCHEDULER_MODE)
   val schedulingMode: SchedulingMode =
     try {
       SchedulingMode.withName(schedulingModeConf.toUpperCase(Locale.ROOT))
@@ -183,7 +184,7 @@ private[spark] class TaskSchedulerImpl(
   override def start() {
     backend.start()
 
-    if (!isLocal && conf.getBoolean("spark.speculation", false)) {
+    if (!isLocal && conf.get(SPECULATION_ENABLED)) {
       logInfo("Starting speculative execution thread")
       speculationScheduler.scheduleWithFixedDelay(new Runnable {
         override def run(): Unit = Utils.tryOrStopSparkContext(sc) {
@@ -857,7 +858,7 @@ private[spark] class TaskSchedulerImpl(
 
 private[spark] object TaskSchedulerImpl {
 
-  val SCHEDULER_MODE_PROPERTY = "spark.scheduler.mode"
+  val SCHEDULER_MODE_PROPERTY = SCHEDULER_MODE.key
 
   /**
    * Used to balance containers across hosts.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 1b42cb4d8207..6f3f77c304dc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -28,6 +28,7 @@ import scala.util.control.NonFatal
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config._
 import org.apache.spark.scheduler.SchedulingMode._
 import org.apache.spark.util.{AccumulatorV2, Clock, LongAccumulator, SystemClock, Utils}
 import org.apache.spark.util.collection.MedianHeap
@@ -61,12 +62,12 @@ private[spark] class TaskSetManager(
   private val addedFiles = HashMap[String, Long](sched.sc.addedFiles.toSeq: _*)
 
   // Quantile of tasks at which to start speculation
-  val SPECULATION_QUANTILE = conf.getDouble("spark.speculation.quantile", 0.75)
-  val SPECULATION_MULTIPLIER = conf.getDouble("spark.speculation.multiplier", 1.5)
+  val speculationQuantile = conf.get(SPECULATION_QUANTILE)
+  val speculationMultiplier = conf.get(SPECULATION_MULTIPLIER)
 
   val maxResultSize = conf.get(config.MAX_RESULT_SIZE)
 
-  val speculationEnabled = conf.getBoolean("spark.speculation", false)
+  val speculationEnabled = conf.get(SPECULATION_ENABLED)
 
   // Serializer for closures and tasks.
   val env = SparkEnv.get
@@ -1015,13 +1016,13 @@ private[spark] class TaskSetManager(
       return false
     }
     var foundTasks = false
-    val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt
+    val minFinishedForSpeculation = (speculationQuantile * numTasks).floor.toInt
     logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation)
 
     if (tasksSuccessful >= minFinishedForSpeculation && tasksSuccessful > 0) {
       val time = clock.getTimeMillis()
       val medianDuration = successfulTaskDurations.median
-      val threshold = max(SPECULATION_MULTIPLIER * medianDuration, minTimeToSpeculation)
+      val threshold = max(speculationMultiplier * medianDuration, minTimeToSpeculation)
       // TODO: Threshold should also look at standard deviation of task durations and have a lower
       // bound based on that.
       logDebug("Task length threshold for speculation: " + threshold)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 98ed2fffc0ac..3b058754e7f5 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -30,6 +30,8 @@ import org.apache.spark.{ExecutorAllocationClient, SparkEnv, SparkException, Tas
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Network._
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -58,11 +60,11 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   // Submit tasks only after (registered resources / total expected resources)
   // is equal to at least this value, that is double between 0 and 1.
   private val _minRegisteredRatio =
-    math.min(1, conf.getDouble("spark.scheduler.minRegisteredResourcesRatio", 0))
+    math.min(1, conf.get(SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO).getOrElse(0.0))
   // Submit tasks after maxRegisteredWaitingTime milliseconds
   // if minRegisteredRatio has not yet been reached
   private val maxRegisteredWaitingTimeMs =
-    conf.getTimeAsMs("spark.scheduler.maxRegisteredResourcesWaitingTime", "30s")
+    conf.get(SCHEDULER_MAX_REGISTERED_RESOURCE_WAITING_TIME)
   private val createTime = System.currentTimeMillis()
 
   // Accessing `executorDataMap` in `DriverEndpoint.receive/receiveAndReply` doesn't need any
@@ -118,7 +120,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     override def onStart() {
       // Periodically revive offers to allow delay scheduling to work
-      val reviveIntervalMs = conf.getTimeAsMs("spark.scheduler.revive.interval", "1s")
+      val reviveIntervalMs = conf.get(SCHEDULER_REVIVE_INTERVAL).getOrElse(1000L)
 
       reviveThread.scheduleAtFixedRate(new Runnable {
         override def run(): Unit = Utils.tryLogNonFatalError {
@@ -301,8 +303,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           Option(scheduler.taskIdToTaskSetManager.get(task.taskId)).foreach { taskSetMgr =>
             try {
               var msg = "Serialized task %s:%d was %d bytes, which exceeds max allowed: " +
-                "spark.rpc.message.maxSize (%d bytes). Consider increasing " +
-                "spark.rpc.message.maxSize or using broadcast variables for large values."
+                s"${RPC_MESSAGE_MAX_SIZE.key} (%d bytes). Consider increasing " +
+                s"${RPC_MESSAGE_MAX_SIZE.key} or using broadcast variables for large values."
               msg = msg.format(task.taskId, task.index, serializedTask.limit(), maxRpcMessageSize)
               taskSetMgr.abort(msg)
             } catch {
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 12c5d4d2c6ef..1067cdc84ceb 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -25,8 +25,9 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 
 import org.apache.spark._
-import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
+import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.CPUS_PER_TASK
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.scheduler._
 import org.apache.spark.status.api.v1
@@ -151,7 +152,7 @@ private[spark] class AppStatusListener(
       details.getOrElse("System Properties", Nil),
       details.getOrElse("Classpath Entries", Nil))
 
-    coresPerTask = envInfo.sparkProperties.toMap.get("spark.task.cpus").map(_.toInt)
+    coresPerTask = envInfo.sparkProperties.toMap.get(CPUS_PER_TASK.key).map(_.toInt)
       .getOrElse(coresPerTask)
 
     kvstore.write(new ApplicationEnvironmentInfoWrapper(envInfo))
@@ -434,7 +435,7 @@ private[spark] class AppStatusListener(
     val stage = getOrCreateStage(event.stageInfo)
     stage.status = v1.StageStatus.ACTIVE
     stage.schedulingPool = Option(event.properties).flatMap { p =>
-      Option(p.getProperty("spark.scheduler.pool"))
+      Option(p.getProperty(SparkContext.SPARK_SCHEDULER_POOL))
     }.getOrElse(SparkUI.DEFAULT_POOL_NAME)
 
     // Look at all active jobs to find the ones that mention this stage.
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 8f993bfbf08a..dd05cb3f61c1 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -36,7 +36,9 @@ import com.codahale.metrics.{MetricRegistry, MetricSet}
 
 import org.apache.spark._
 import org.apache.spark.executor.DataReadMethod
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Network
 import org.apache.spark.memory.{MemoryManager, MemoryMode}
 import org.apache.spark.metrics.source.Source
 import org.apache.spark.network._
@@ -133,7 +135,7 @@ private[spark] class BlockManager(
   private[spark] val externalShuffleServiceEnabled =
     conf.get(config.SHUFFLE_SERVICE_ENABLED)
   private val remoteReadNioBufferConversion =
-    conf.getBoolean("spark.network.remoteReadNioBufferConversion", false)
+    conf.get(Network.NETWORK_REMOTE_READ_NIO_BUFFER_CONVERSION)
 
   val diskBlockManager = {
     // Only perform cleanup if an external service is not serving our shuffle files.
diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index 6229e800957d..8845dcf48a84 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.Semaphore
 import scala.util.Random
 
 import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.internal.config.SCHEDULER_MODE
 import org.apache.spark.scheduler.SchedulingMode
 
 // scalastyle:off
@@ -50,14 +51,14 @@ private[spark] object UIWorkloadGenerator {
 
     val schedulingMode = SchedulingMode.withName(args(1))
     if (schedulingMode == SchedulingMode.FAIR) {
-      conf.set("spark.scheduler.mode", "FAIR")
+      conf.set(SCHEDULER_MODE, "FAIR")
     }
     val nJobSet = args(2).toInt
     val sc = new SparkContext(conf)
 
     def setProperties(s: String): Unit = {
       if (schedulingMode == SchedulingMode.FAIR) {
-        sc.setLocalProperty("spark.scheduler.pool", s)
+        sc.setLocalProperty(SparkContext.SPARK_SCHEDULER_POOL, s)
       }
       sc.setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, s)
     }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index b35ea5b52549..4523326e020d 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -28,6 +28,7 @@ import scala.xml._
 import org.apache.commons.lang3.StringEscapeUtils
 
 import org.apache.spark.JobExecutionStatus
+import org.apache.spark.internal.config.SCHEDULER_MODE
 import org.apache.spark.scheduler._
 import org.apache.spark.status.AppStatusStore
 import org.apache.spark.status.api.v1
@@ -295,7 +296,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
     }
 
     val schedulingMode = store.environmentInfo().sparkProperties.toMap
-      .get("spark.scheduler.mode")
+      .get(SCHEDULER_MODE.key)
       .map { mode => SchedulingMode.withName(mode).toString }
       .getOrElse("Unknown")
 
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
index 37bb292bd595..1c1915e7e2cd 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
@@ -22,6 +22,7 @@ import javax.servlet.http.HttpServletRequest
 import scala.collection.JavaConverters._
 
 import org.apache.spark.JobExecutionStatus
+import org.apache.spark.internal.config.SCHEDULER_MODE
 import org.apache.spark.scheduler.SchedulingMode
 import org.apache.spark.status.AppStatusStore
 import org.apache.spark.ui._
@@ -37,7 +38,7 @@ private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore)
     store
       .environmentInfo()
       .sparkProperties
-      .contains(("spark.scheduler.mode", SchedulingMode.FAIR.toString))
+      .contains((SCHEDULER_MODE.key, SchedulingMode.FAIR.toString))
   }
 
   def getSparkUser: String = parent.getSparkUser
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
index e16c337ba164..b74f3dbe8858 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ui.jobs
 
 import javax.servlet.http.HttpServletRequest
 
+import org.apache.spark.internal.config.SCHEDULER_MODE
 import org.apache.spark.scheduler.SchedulingMode
 import org.apache.spark.status.AppStatusStore
 import org.apache.spark.status.api.v1.StageStatus
@@ -40,7 +41,7 @@ private[ui] class StagesTab(val parent: SparkUI, val store: AppStatusStore)
     store
       .environmentInfo()
       .sparkProperties
-      .contains(("spark.scheduler.mode", SchedulingMode.FAIR.toString))
+      .contains((SCHEDULER_MODE.key, SchedulingMode.FAIR.toString))
   }
 
   def handleKillRequest(request: HttpServletRequest): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
index 902e48fed391..7272b375e538 100644
--- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
@@ -19,6 +19,7 @@ package org.apache.spark.util
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Network._
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, RpcTimeout}
 
 private[spark] object RpcUtils {
@@ -35,32 +36,32 @@ private[spark] object RpcUtils {
 
   /** Returns the configured number of times to retry connecting */
   def numRetries(conf: SparkConf): Int = {
-    conf.getInt("spark.rpc.numRetries", 3)
+    conf.get(RPC_NUM_RETRIES)
   }
 
   /** Returns the configured number of milliseconds to wait on each retry */
   def retryWaitMs(conf: SparkConf): Long = {
-    conf.getTimeAsMs("spark.rpc.retry.wait", "3s")
+    conf.get(RPC_RETRY_WAIT)
   }
 
   /** Returns the default Spark timeout to use for RPC ask operations. */
   def askRpcTimeout(conf: SparkConf): RpcTimeout = {
-    RpcTimeout(conf, Seq("spark.rpc.askTimeout", "spark.network.timeout"), "120s")
+    RpcTimeout(conf, Seq(RPC_ASK_TIMEOUT.key, NETWORK_TIMEOUT.key), "120s")
   }
 
   /** Returns the default Spark timeout to use for RPC remote endpoint lookup. */
   def lookupRpcTimeout(conf: SparkConf): RpcTimeout = {
-    RpcTimeout(conf, Seq("spark.rpc.lookupTimeout", "spark.network.timeout"), "120s")
+    RpcTimeout(conf, Seq(RPC_LOOKUP_TIMEOUT.key, NETWORK_TIMEOUT.key), "120s")
   }
 
   private val MAX_MESSAGE_SIZE_IN_MB = Int.MaxValue / 1024 / 1024
 
   /** Returns the configured max message size for messages in bytes. */
   def maxMessageSizeBytes(conf: SparkConf): Int = {
-    val maxSizeInMB = conf.getInt("spark.rpc.message.maxSize", 128)
+    val maxSizeInMB = conf.get(RPC_MESSAGE_MAX_SIZE)
     if (maxSizeInMB > MAX_MESSAGE_SIZE_IN_MB) {
       throw new IllegalArgumentException(
-        s"spark.rpc.message.maxSize should not be greater than $MAX_MESSAGE_SIZE_IN_MB MB")
+        s"${RPC_MESSAGE_MAX_SIZE.key} should not be greater than $MAX_MESSAGE_SIZE_IN_MB MB")
     }
     maxSizeInMB * 1024 * 1024
   }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 1a6dc1fa0cbd..0ad1ffc28b09 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2465,9 +2465,9 @@ private[spark] object Utils extends Logging {
    * Return whether dynamic allocation is enabled in the given conf.
    */
   def isDynamicAllocationEnabled(conf: SparkConf): Boolean = {
-    val dynamicAllocationEnabled = conf.getBoolean("spark.dynamicAllocation.enabled", false)
+    val dynamicAllocationEnabled = conf.get(DYN_ALLOCATION_ENABLED)
     dynamicAllocationEnabled &&
-      (!isLocalMaster(conf) || conf.getBoolean("spark.dynamicAllocation.testing", false))
+      (!isLocalMaster(conf) || conf.get(DYN_ALLOCATION_TESTING))
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala b/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
index d49ab4aa7df1..ca839d148051 100644
--- a/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark
 import scala.concurrent.duration._
 import scala.language.postfixOps
 
+import org.apache.spark.internal.config._
 import org.apache.spark.rdd.{PartitionPruningRDD, RDD}
 import org.apache.spark.scheduler.BarrierJobAllocationFailed._
-import org.apache.spark.scheduler.DAGScheduler
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -157,8 +157,8 @@ class BarrierStageOnSubmittedSuite extends SparkFunSuite with LocalSparkContext
 
   test("submit a barrier ResultStage with dynamic resource allocation enabled") {
     val conf = new SparkConf()
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set("spark.dynamicAllocation.testing", "true")
+      .set(DYN_ALLOCATION_ENABLED, true)
+      .set(DYN_ALLOCATION_TESTING, true)
       .setMaster("local[4]")
       .setAppName("test")
     sc = createSparkContext(Some(conf))
@@ -172,8 +172,8 @@ class BarrierStageOnSubmittedSuite extends SparkFunSuite with LocalSparkContext
 
   test("submit a barrier ShuffleMapStage with dynamic resource allocation enabled") {
     val conf = new SparkConf()
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set("spark.dynamicAllocation.testing", "true")
+      .set(DYN_ALLOCATION_ENABLED, true)
+      .set(DYN_ALLOCATION_TESTING, true)
       .setMaster("local[4]")
       .setAppName("test")
     sc = createSparkContext(Some(conf))
@@ -191,9 +191,9 @@ class BarrierStageOnSubmittedSuite extends SparkFunSuite with LocalSparkContext
       "mode") {
     val conf = new SparkConf()
       // Shorten the time interval between two failed checks to make the test fail faster.
-      .set("spark.scheduler.barrier.maxConcurrentTasksCheck.interval", "1s")
+      .set(BARRIER_MAX_CONCURRENT_TASKS_CHECK_INTERVAL.key, "1s")
       // Reduce max check failures allowed to make the test fail faster.
-      .set("spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures", "3")
+      .set(BARRIER_MAX_CONCURRENT_TASKS_CHECK_MAX_FAILURES, 3)
       .setMaster("local[4]")
       .setAppName("test")
     sc = createSparkContext(Some(conf))
@@ -208,9 +208,9 @@ class BarrierStageOnSubmittedSuite extends SparkFunSuite with LocalSparkContext
     "local mode") {
     val conf = new SparkConf()
       // Shorten the time interval between two failed checks to make the test fail faster.
-      .set("spark.scheduler.barrier.maxConcurrentTasksCheck.interval", "1s")
+      .set(BARRIER_MAX_CONCURRENT_TASKS_CHECK_INTERVAL.key, "1s")
       // Reduce max check failures allowed to make the test fail faster.
-      .set("spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures", "3")
+      .set(BARRIER_MAX_CONCURRENT_TASKS_CHECK_MAX_FAILURES, 3)
       .setMaster("local[4]")
       .setAppName("test")
     sc = createSparkContext(Some(conf))
@@ -226,11 +226,11 @@ class BarrierStageOnSubmittedSuite extends SparkFunSuite with LocalSparkContext
   test("submit a barrier ResultStage that requires more slots than current total under " +
     "local-cluster mode") {
     val conf = new SparkConf()
-      .set("spark.task.cpus", "2")
+      .set(CPUS_PER_TASK, 2)
       // Shorten the time interval between two failed checks to make the test fail faster.
-      .set("spark.scheduler.barrier.maxConcurrentTasksCheck.interval", "1s")
+      .set(BARRIER_MAX_CONCURRENT_TASKS_CHECK_INTERVAL.key, "1s")
       // Reduce max check failures allowed to make the test fail faster.
-      .set("spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures", "3")
+      .set(BARRIER_MAX_CONCURRENT_TASKS_CHECK_MAX_FAILURES, 3)
       .setMaster("local-cluster[4, 3, 1024]")
       .setAppName("test")
     sc = createSparkContext(Some(conf))
@@ -244,11 +244,11 @@ class BarrierStageOnSubmittedSuite extends SparkFunSuite with LocalSparkContext
   test("submit a barrier ShuffleMapStage that requires more slots than current total under " +
     "local-cluster mode") {
     val conf = new SparkConf()
-      .set("spark.task.cpus", "2")
+      .set(CPUS_PER_TASK, 2)
       // Shorten the time interval between two failed checks to make the test fail faster.
-      .set("spark.scheduler.barrier.maxConcurrentTasksCheck.interval", "1s")
+      .set(BARRIER_MAX_CONCURRENT_TASKS_CHECK_INTERVAL.key, "1s")
       // Reduce max check failures allowed to make the test fail faster.
-      .set("spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures", "3")
+      .set(BARRIER_MAX_CONCURRENT_TASKS_CHECK_MAX_FAILURES, 3)
       .setMaster("local-cluster[4, 3, 1024]")
       .setAppName("test")
     sc = createSparkContext(Some(conf))
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 9e282844f9fe..b9b47d4dcd25 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -29,10 +29,10 @@ import org.scalatest.concurrent.PatienceConfiguration
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config._
 import org.apache.spark.rdd.{RDD, ReliableRDDCheckpointData}
 import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.storage._
-import org.apache.spark.util.Utils
 
 /**
  * An abstract base class for context cleaner tests, which sets up a context with a config
@@ -46,9 +46,9 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So
   val conf = new SparkConf()
     .setMaster("local[2]")
     .setAppName("ContextCleanerSuite")
-    .set("spark.cleaner.referenceTracking.blocking", "true")
-    .set("spark.cleaner.referenceTracking.blocking.shuffle", "true")
-    .set("spark.cleaner.referenceTracking.cleanCheckpoints", "true")
+    .set(CLEANER_REFERENCE_TRACKING_BLOCKING, true)
+    .set(CLEANER_REFERENCE_TRACKING_BLOCKING_SHUFFLE, true)
+    .set(CLEANER_REFERENCE_TRACKING_CLEAN_CHECKPOINTS, true)
     .set(config.SHUFFLE_MANAGER, shuffleManager.getName)
 
   before {
@@ -234,7 +234,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
       val conf = new SparkConf()
         .setMaster("local[2]")
         .setAppName("cleanupCheckpoint")
-        .set("spark.cleaner.referenceTracking.cleanCheckpoints", "false")
+        .set(CLEANER_REFERENCE_TRACKING_CLEAN_CHECKPOINTS, false)
       sc = new SparkContext(conf)
       rdd = newPairRDD()
       sc.setCheckpointDir(checkpointDir.toString)
@@ -317,8 +317,8 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     val conf2 = new SparkConf()
       .setMaster("local-cluster[2, 1, 1024]")
       .setAppName("ContextCleanerSuite")
-      .set("spark.cleaner.referenceTracking.blocking", "true")
-      .set("spark.cleaner.referenceTracking.blocking.shuffle", "true")
+      .set(CLEANER_REFERENCE_TRACKING_BLOCKING, true)
+      .set(CLEANER_REFERENCE_TRACKING_BLOCKING_SHUFFLE, true)
       .set(config.SHUFFLE_MANAGER, shuffleManager.getName)
     sc = new SparkContext(conf2)
 
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 6b310b9cb67a..fdaea28305b4 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -63,19 +63,19 @@ class ExecutorAllocationManagerSuite
     val conf = new SparkConf()
       .setMaster("myDummyLocalExternalClusterManager")
       .setAppName("test-executor-allocation-manager")
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set("spark.dynamicAllocation.testing", "true")
+      .set(config.DYN_ALLOCATION_ENABLED, true)
+      .set(config.DYN_ALLOCATION_TESTING, true)
     val sc0 = new SparkContext(conf)
     contexts += sc0
     assert(sc0.executorAllocationManager.isDefined)
     sc0.stop()
 
     // Min < 0
-    val conf1 = conf.clone().set("spark.dynamicAllocation.minExecutors", "-1")
+    val conf1 = conf.clone().set(config.DYN_ALLOCATION_MIN_EXECUTORS, -1)
     intercept[SparkException] { contexts += new SparkContext(conf1) }
 
     // Max < 0
-    val conf2 = conf.clone().set("spark.dynamicAllocation.maxExecutors", "-1")
+    val conf2 = conf.clone().set(config.DYN_ALLOCATION_MAX_EXECUTORS, -1)
     intercept[SparkException] { contexts += new SparkContext(conf2) }
 
     // Both min and max, but min > max
@@ -151,11 +151,11 @@ class ExecutorAllocationManagerSuite
     val conf = new SparkConf()
       .setMaster("myDummyLocalExternalClusterManager")
       .setAppName("test-executor-allocation-manager")
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set("spark.dynamicAllocation.testing", "true")
-      .set("spark.dynamicAllocation.maxExecutors", "15")
-      .set("spark.dynamicAllocation.minExecutors", "3")
-      .set("spark.dynamicAllocation.executorAllocationRatio", divisor.toString)
+      .set(config.DYN_ALLOCATION_ENABLED, true)
+      .set(config.DYN_ALLOCATION_TESTING, true)
+      .set(config.DYN_ALLOCATION_MAX_EXECUTORS, 15)
+      .set(config.DYN_ALLOCATION_MIN_EXECUTORS, 3)
+      .set(config.DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO, divisor)
       .set(config.EXECUTOR_CORES, cores)
     val sc = new SparkContext(conf)
     contexts += sc
@@ -1093,14 +1093,14 @@ class ExecutorAllocationManagerSuite
     val initialExecutors = 1
     val maxExecutors = 2
     val conf = new SparkConf()
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set(config.SHUFFLE_SERVICE_ENABLED.key, "true")
-      .set("spark.dynamicAllocation.minExecutors", minExecutors.toString)
-      .set("spark.dynamicAllocation.maxExecutors", maxExecutors.toString)
-      .set("spark.dynamicAllocation.initialExecutors", initialExecutors.toString)
-      .set("spark.dynamicAllocation.schedulerBacklogTimeout", "1000ms")
-      .set("spark.dynamicAllocation.sustainedSchedulerBacklogTimeout", "1000ms")
-      .set("spark.dynamicAllocation.executorIdleTimeout", s"3000ms")
+      .set(config.DYN_ALLOCATION_ENABLED, true)
+      .set(config.SHUFFLE_SERVICE_ENABLED, true)
+      .set(config.DYN_ALLOCATION_MIN_EXECUTORS, minExecutors)
+      .set(config.DYN_ALLOCATION_MAX_EXECUTORS, maxExecutors)
+      .set(config.DYN_ALLOCATION_INITIAL_EXECUTORS, initialExecutors)
+      .set(config.DYN_ALLOCATION_SCHEDULER_BACKLOG_TIMEOUT.key, "1000ms")
+      .set(config.DYN_ALLOCATION_SUSTAINED_SCHEDULER_BACKLOG_TIMEOUT.key, "1000ms")
+      .set(config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, "3000ms")
     val mockAllocationClient = mock(classOf[ExecutorAllocationClient])
     val mockBMM = mock(classOf[BlockManagerMaster])
     val manager = new ExecutorAllocationManager(
@@ -1155,16 +1155,16 @@ class ExecutorAllocationManagerSuite
     val conf = new SparkConf()
       .setMaster("myDummyLocalExternalClusterManager")
       .setAppName("test-executor-allocation-manager")
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set("spark.dynamicAllocation.minExecutors", minExecutors.toString)
-      .set("spark.dynamicAllocation.maxExecutors", maxExecutors.toString)
-      .set("spark.dynamicAllocation.initialExecutors", initialExecutors.toString)
-      .set("spark.dynamicAllocation.schedulerBacklogTimeout",
-          s"${schedulerBacklogTimeout.toString}s")
-      .set("spark.dynamicAllocation.sustainedSchedulerBacklogTimeout",
+      .set(config.DYN_ALLOCATION_ENABLED, true)
+      .set(config.DYN_ALLOCATION_MIN_EXECUTORS, minExecutors)
+      .set(config.DYN_ALLOCATION_MAX_EXECUTORS, maxExecutors)
+      .set(config.DYN_ALLOCATION_INITIAL_EXECUTORS, initialExecutors)
+      .set(config.DYN_ALLOCATION_SCHEDULER_BACKLOG_TIMEOUT.key,
+        s"${schedulerBacklogTimeout.toString}s")
+      .set(config.DYN_ALLOCATION_SUSTAINED_SCHEDULER_BACKLOG_TIMEOUT.key,
         s"${sustainedSchedulerBacklogTimeout.toString}s")
-      .set("spark.dynamicAllocation.executorIdleTimeout", s"${executorIdleTimeout.toString}s")
-      .set("spark.dynamicAllocation.testing", "true")
+      .set(config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, s"${executorIdleTimeout.toString}s")
+      .set(config.DYN_ALLOCATION_TESTING, true)
       // SPARK-22864: effectively disable the allocation schedule by setting the period to a
       // really long value.
       .set(TEST_SCHEDULE_INTERVAL, 10000L)
diff --git a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
index a69e589743ef..dbe187d54ccc 100644
--- a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
@@ -28,6 +28,7 @@ import org.mockito.Mockito.{mock, spy, verify, when}
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
+import org.apache.spark.internal.config.DYN_ALLOCATION_TESTING
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -67,7 +68,7 @@ class HeartbeatReceiverSuite
     val conf = new SparkConf()
       .setMaster("local[2]")
       .setAppName("test")
-      .set("spark.dynamicAllocation.testing", "true")
+      .set(DYN_ALLOCATION_TESTING, true)
     sc = spy(new SparkContext(conf))
     scheduler = mock(classOf[TaskSchedulerImpl])
     when(sc.taskScheduler).thenReturn(scheduler)
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index f8adaf59fa0a..0c59259db544 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -27,6 +27,7 @@ import scala.concurrent.duration._
 import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
 
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.scheduler.{SparkListener, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
 import org.apache.spark.util.ThreadUtils
@@ -52,7 +53,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
   }
 
   test("local mode, FIFO scheduler") {
-    val conf = new SparkConf().set("spark.scheduler.mode", "FIFO")
+    val conf = new SparkConf().set(SCHEDULER_MODE, "FIFO")
     sc = new SparkContext("local[2]", "test", conf)
     testCount()
     testTake()
@@ -61,9 +62,9 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
   }
 
   test("local mode, fair scheduler") {
-    val conf = new SparkConf().set("spark.scheduler.mode", "FAIR")
+    val conf = new SparkConf().set(SCHEDULER_MODE, "FAIR")
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
-    conf.set("spark.scheduler.allocation.file", xmlPath)
+    conf.set(SCHEDULER_ALLOCATION_FILE, xmlPath)
     sc = new SparkContext("local[2]", "test", conf)
     testCount()
     testTake()
@@ -72,7 +73,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
   }
 
   test("cluster mode, FIFO scheduler") {
-    val conf = new SparkConf().set("spark.scheduler.mode", "FIFO")
+    val conf = new SparkConf().set(SCHEDULER_MODE, "FIFO")
     sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
     testCount()
     testTake()
@@ -81,9 +82,9 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
   }
 
   test("cluster mode, fair scheduler") {
-    val conf = new SparkConf().set("spark.scheduler.mode", "FAIR")
+    val conf = new SparkConf().set(SCHEDULER_MODE, "FAIR")
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
-    conf.set("spark.scheduler.allocation.file", xmlPath)
+    conf.set(SCHEDULER_ALLOCATION_FILE, xmlPath)
     sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
     testCount()
     testTake()
@@ -217,8 +218,8 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
 
   test("task reaper kills JVM if killed tasks keep running for too long") {
     val conf = new SparkConf()
-      .set("spark.task.reaper.enabled", "true")
-      .set("spark.task.reaper.killTimeout", "5s")
+      .set(TASK_REAPER_ENABLED, true)
+      .set(TASK_REAPER_KILL_TIMEOUT.key, "5s")
     sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
 
     // Add a listener to release the semaphore once any tasks are launched.
@@ -254,9 +255,9 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
 
   test("task reaper will not kill JVM if spark.task.killTimeout == -1") {
     val conf = new SparkConf()
-      .set("spark.task.reaper.enabled", "true")
-      .set("spark.task.reaper.killTimeout", "-1")
-      .set("spark.task.reaper.PollingInterval", "1s")
+      .set(TASK_REAPER_ENABLED, true)
+      .set(TASK_REAPER_KILL_TIMEOUT.key, "-1")
+      .set(TASK_REAPER_POLLING_INTERVAL.key, "1s")
       .set(MAX_EXECUTOR_RETRIES, 1)
     sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
 
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index adaa069ff227..d86975964b55 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -25,6 +25,7 @@ import org.mockito.Mockito._
 import org.apache.spark.LocalSparkContext._
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Network.{RPC_ASK_TIMEOUT, RPC_MESSAGE_MAX_SIZE}
 import org.apache.spark.rpc.{RpcAddress, RpcCallContext, RpcEnv}
 import org.apache.spark.scheduler.{CompressedMapStatus, MapStatus}
 import org.apache.spark.shuffle.FetchFailedException
@@ -170,8 +171,8 @@ class MapOutputTrackerSuite extends SparkFunSuite {
 
   test("remote fetch below max RPC message size") {
     val newConf = new SparkConf
-    newConf.set("spark.rpc.message.maxSize", "1")
-    newConf.set("spark.rpc.askTimeout", "1") // Fail fast
+    newConf.set(RPC_MESSAGE_MAX_SIZE, 1)
+    newConf.set(RPC_ASK_TIMEOUT, "1") // Fail fast
     newConf.set(SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST, 1048576L)
 
     val masterTracker = newTrackerMaster(newConf)
@@ -199,8 +200,8 @@ class MapOutputTrackerSuite extends SparkFunSuite {
 
   test("min broadcast size exceeds max RPC message size") {
     val newConf = new SparkConf
-    newConf.set("spark.rpc.message.maxSize", "1")
-    newConf.set("spark.rpc.askTimeout", "1") // Fail fast
+    newConf.set(RPC_MESSAGE_MAX_SIZE, 1)
+    newConf.set(RPC_ASK_TIMEOUT, "1") // Fail fast
     newConf.set(SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST, Int.MaxValue.toLong)
 
     intercept[IllegalArgumentException] { newTrackerMaster(newConf) }
@@ -243,8 +244,8 @@ class MapOutputTrackerSuite extends SparkFunSuite {
 
   test("remote fetch using broadcast") {
     val newConf = new SparkConf
-    newConf.set("spark.rpc.message.maxSize", "1")
-    newConf.set("spark.rpc.askTimeout", "1") // Fail fast
+    newConf.set(RPC_MESSAGE_MAX_SIZE, 1)
+    newConf.set(RPC_ASK_TIMEOUT, "1") // Fail fast
     newConf.set(SHUFFLE_MAPOUTPUT_MIN_SIZE_FOR_BROADCAST, 10240L) // 10 KiB << 1MiB framesize
 
     // needs TorrentBroadcast so need a SparkContext
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 4071dd4eff07..079170d47d0b 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -29,6 +29,7 @@ import com.esotericsoftware.kryo.Kryo
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Kryo._
+import org.apache.spark.internal.config.Network._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.serializer.{JavaSerializer, KryoRegistrator, KryoSerializer}
 import org.apache.spark.util.{ResetSystemProperties, RpcUtils}
@@ -142,7 +143,7 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
   test("creating SparkContext with cpus per tasks bigger than cores per executors") {
     val conf = new SparkConf(false)
       .set(EXECUTOR_CORES, 1)
-      .set("spark.task.cpus", "2")
+      .set(CPUS_PER_TASK, 2)
     intercept[SparkException] { sc = new SparkContext(conf) }
   }
 
@@ -268,10 +269,10 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
   test("akka deprecated configs") {
     val conf = new SparkConf()
 
-    assert(!conf.contains("spark.rpc.numRetries"))
-    assert(!conf.contains("spark.rpc.retry.wait"))
-    assert(!conf.contains("spark.rpc.askTimeout"))
-    assert(!conf.contains("spark.rpc.lookupTimeout"))
+    assert(!conf.contains(RPC_NUM_RETRIES))
+    assert(!conf.contains(RPC_RETRY_WAIT))
+    assert(!conf.contains(RPC_ASK_TIMEOUT))
+    assert(!conf.contains(RPC_LOOKUP_TIMEOUT))
 
     conf.set("spark.akka.num.retries", "1")
     assert(RpcUtils.numRetries(conf) === 1)
@@ -322,12 +323,12 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     val conf = new SparkConf()
     conf.validateSettings()
 
-    conf.set(NETWORK_ENCRYPTION_ENABLED, true)
+    conf.set(NETWORK_CRYPTO_ENABLED, true)
     intercept[IllegalArgumentException] {
       conf.validateSettings()
     }
 
-    conf.set(NETWORK_ENCRYPTION_ENABLED, false)
+    conf.set(NETWORK_CRYPTO_ENABLED, false)
     conf.set(SASL_ENCRYPTION_ENABLED, true)
     intercept[IllegalArgumentException] {
       conf.validateSettings()
@@ -341,7 +342,7 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     val conf = new SparkConf()
     conf.validateSettings()
 
-    conf.set("spark.network.timeout", "5s")
+    conf.set(NETWORK_TIMEOUT.key, "5s")
     intercept[IllegalArgumentException] {
       conf.validateSettings()
     }
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 41d5dee4189f..7a16f7b715e6 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -423,7 +423,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   test("No exception when both num-executors and dynamic allocation set.") {
     noException should be thrownBy {
       sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")
-        .set("spark.dynamicAllocation.enabled", "true").set("spark.executor.instances", "6"))
+        .set(DYN_ALLOCATION_ENABLED, true).set("spark.executor.instances", "6"))
       assert(sc.executorAllocationManager.isEmpty)
       assert(sc.getConf.getInt("spark.executor.instances", 0) === 6)
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 30efbb000d80..3712d1a1f632 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -195,7 +195,7 @@ class SparkSubmitSuite
       "--name", "myApp",
       "--class", "Foo",
       "--num-executors", "0",
-      "--conf", "spark.dynamicAllocation.enabled=true",
+      "--conf", s"${DYN_ALLOCATION_ENABLED.key}=true",
       "thejar.jar")
     new SparkSubmitArguments(clArgs1)
 
@@ -203,7 +203,7 @@ class SparkSubmitSuite
       "--name", "myApp",
       "--class", "Foo",
       "--num-executors", "0",
-      "--conf", "spark.dynamicAllocation.enabled=false",
+      "--conf", s"${DYN_ALLOCATION_ENABLED.key}=false",
       "thejar.jar")
 
     val e = intercept[SparkException](new SparkSubmitArguments(clArgs2))
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index 8c3c38dbc7ea..a6d5066423d1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -457,9 +457,9 @@ class StandaloneDynamicAllocationSuite
   test("initial executor limit") {
     val initialExecutorLimit = 1
     val myConf = appConf
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set(config.SHUFFLE_SERVICE_ENABLED.key, "true")
-      .set("spark.dynamicAllocation.initialExecutors", initialExecutorLimit.toString)
+      .set(config.DYN_ALLOCATION_ENABLED, true)
+      .set(config.SHUFFLE_SERVICE_ENABLED, true)
+      .set(config.DYN_ALLOCATION_INITIAL_EXECUTORS, initialExecutorLimit)
     sc = new SparkContext(myConf)
     val appId = sc.applicationId
     eventually(timeout(10.seconds), interval(10.millis)) {
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
index f1cf14de1f87..544d52d48b38 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -33,6 +33,7 @@ import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Network
 import org.apache.spark.network.{BlockDataManager, BlockTransferService}
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
@@ -101,8 +102,8 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
       .set(NETWORK_AUTH_ENABLED, true)
       .set(AUTH_SECRET, "good")
       .set("spark.app.id", "app-id")
-      .set("spark.network.crypto.enabled", "true")
-      .set("spark.network.crypto.saslFallback", "false")
+      .set(Network.NETWORK_CRYPTO_ENABLED, true)
+      .set(Network.NETWORK_CRYPTO_SASL_FALLBACK, false)
     testConnection(conf, conf) match {
       case Success(_) => // expected
       case Failure(t) => fail(t)
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 51bf5c273f04..1b7f166fbffc 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -37,6 +37,7 @@ import org.scalatest.concurrent.Eventually._
 import org.apache.spark.{SecurityManager, SparkConf, SparkEnv, SparkException, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Network
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
@@ -172,8 +173,8 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     val conf = new SparkConf()
     val shortProp = "spark.rpc.short.timeout"
-    conf.set("spark.rpc.retry.wait", "0")
-    conf.set("spark.rpc.numRetries", "1")
+    conf.set(Network.RPC_RETRY_WAIT, 0L)
+    conf.set(Network.RPC_NUM_RETRIES, 1)
     val anotherEnv = createRpcEnv(conf, "remote", 0, clientMode = true)
     // Use anotherEnv to find out the RpcEndpointRef
     val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "ask-timeout")
@@ -709,8 +710,8 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     testSend(new SparkConf()
       .set(NETWORK_AUTH_ENABLED, true)
       .set(AUTH_SECRET, "good")
-      .set("spark.network.crypto.enabled", "true")
-      .set("spark.network.crypto.saslFallback", "false"))
+      .set(Network.NETWORK_CRYPTO_ENABLED, true)
+      .set(Network.NETWORK_CRYPTO_SASL_FALLBACK, false))
   }
 
   test("ask with authentication") {
@@ -730,8 +731,8 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     testAsk(new SparkConf()
       .set(NETWORK_AUTH_ENABLED, true)
       .set(AUTH_SECRET, "good")
-      .set("spark.network.crypto.enabled", "true")
-      .set("spark.network.crypto.saslFallback", "false"))
+      .set(Network.NETWORK_CRYPTO_ENABLED, true)
+      .set(Network.NETWORK_CRYPTO_SASL_FALLBACK, false))
   }
 
   test("construct RpcTimeout with conf property") {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
index aadddca3a560..0fe0e5b78233 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
@@ -58,7 +58,7 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
     "With default settings, job can succeed despite multiple bad executors on node",
     extraConfs = Seq(
       config.BLACKLIST_ENABLED.key -> "true",
-      config.MAX_TASK_FAILURES.key -> "4",
+      config.TASK_MAX_FAILURES.key -> "4",
       TEST_N_HOSTS.key -> "2",
       TEST_N_EXECUTORS_HOST.key -> "5",
       TEST_N_CORES_EXECUTOR.key -> "10"
@@ -106,7 +106,7 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
       TEST_N_HOSTS.key -> "2",
       TEST_N_EXECUTORS_HOST.key -> "1",
       TEST_N_CORES_EXECUTOR.key -> "1",
-      "spark.scheduler.blacklist.unschedulableTaskSetTimeout" -> "0s"
+      config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0s"
     )
   ) {
     def runBackend(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
index aea4c5f96bbe..0adfb077964e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
@@ -443,20 +443,20 @@ class BlacklistTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with M
       (2, 2),
       (2, 3)
     ).foreach { case (maxTaskFailures, maxNodeAttempts) =>
-      conf.set(config.MAX_TASK_FAILURES, maxTaskFailures)
+      conf.set(config.TASK_MAX_FAILURES, maxTaskFailures)
       conf.set(config.MAX_TASK_ATTEMPTS_PER_NODE.key, maxNodeAttempts.toString)
       val excMsg = intercept[IllegalArgumentException] {
         BlacklistTracker.validateBlacklistConfs(conf)
       }.getMessage()
       assert(excMsg === s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " +
-        s"( = ${maxNodeAttempts}) was >= ${config.MAX_TASK_FAILURES.key} " +
+        s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " +
         s"( = ${maxTaskFailures} ).  Though blacklisting is enabled, with this configuration, " +
         s"Spark will not be robust to one bad node.  Decrease " +
-        s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.MAX_TASK_FAILURES.key}, " +
+        s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " +
         s"or disable blacklisting with ${config.BLACKLIST_ENABLED.key}")
     }
 
-    conf.remove(config.MAX_TASK_FAILURES)
+    conf.remove(config.TASK_MAX_FAILURES)
     conf.remove(config.MAX_TASK_ATTEMPTS_PER_NODE)
 
     Seq(
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index c5a39669366c..b31b8cf95c35 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -24,6 +24,8 @@ import scala.concurrent.duration._
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite}
+import org.apache.spark.internal.config.CPUS_PER_TASK
+import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.{RpcUtils, SerializableBuffer}
 
@@ -34,7 +36,7 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
 
   test("serialized task larger than max RPC message size") {
     val conf = new SparkConf
-    conf.set("spark.rpc.message.maxSize", "1")
+    conf.set(RPC_MESSAGE_MAX_SIZE, 1)
     conf.set("spark.default.parallelism", "1")
     sc = new SparkContext("local-cluster[2, 1, 1024]", "test", conf)
     val frameSize = RpcUtils.maxMessageSizeBytes(sc.conf)
@@ -62,7 +64,7 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
 
   test("compute max number of concurrent tasks can be launched when spark.task.cpus > 1") {
     val conf = new SparkConf()
-      .set("spark.task.cpus", "2")
+      .set(CPUS_PER_TASK, 2)
       .setMaster("local-cluster[4, 3, 1024]")
       .setAppName("test")
     sc = new SparkContext(conf)
@@ -76,7 +78,7 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
 
   test("compute max number of concurrent tasks can be launched when some executors are busy") {
     val conf = new SparkConf()
-      .set("spark.task.cpus", "2")
+      .set(CPUS_PER_TASK, 2)
       .setMaster("local-cluster[4, 3, 1024]")
       .setAppName("test")
     sc = new SparkContext(conf)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
index 5bd3955f5adb..b953add9d58c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
@@ -21,6 +21,7 @@ import java.io.FileNotFoundException
 import java.util.Properties
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config.SCHEDULER_ALLOCATION_FILE
 import org.apache.spark.scheduler.SchedulingMode._
 
 /**
@@ -31,7 +32,6 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
 
   val LOCAL = "local"
   val APP_NAME = "PoolSuite"
-  val SCHEDULER_ALLOCATION_FILE_PROPERTY = "spark.scheduler.allocation.file"
   val TEST_POOL = "testPool"
 
   def createTaskSetManager(stageId: Int, numTasks: Int, taskScheduler: TaskSchedulerImpl)
@@ -80,7 +80,7 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
    */
   test("Fair Scheduler Test") {
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
-    val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, xmlPath)
+    val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE, xmlPath)
     sc = new SparkContext(LOCAL, APP_NAME, conf)
     val taskScheduler = new TaskSchedulerImpl(sc)
 
@@ -182,7 +182,7 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
   test("SPARK-17663: FairSchedulableBuilder sets default values for blank or invalid datas") {
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler-with-invalid-data.xml")
       .getFile()
-    val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, xmlPath)
+    val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE, xmlPath)
 
     val rootPool = new Pool("", FAIR, 0, 0)
     val schedulableBuilder = new FairSchedulableBuilder(rootPool, conf)
@@ -218,7 +218,7 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
     val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1, taskScheduler)
 
     val properties = new Properties()
-    properties.setProperty("spark.scheduler.pool", TEST_POOL)
+    properties.setProperty(SparkContext.SPARK_SCHEDULER_POOL, TEST_POOL)
 
     // When FIFO Scheduler is used and task sets are submitted, they should be added to
     // the root pool, and no additional pools should be created
@@ -296,7 +296,7 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
   test("Fair Scheduler should build fair scheduler when " +
     "valid spark.scheduler.allocation.file property is set") {
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler-with-valid-data.xml").getFile()
-    val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, xmlPath)
+    val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE, xmlPath)
     sc = new SparkContext(LOCAL, APP_NAME, conf)
 
     val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
@@ -326,7 +326,7 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
 
   test("Fair Scheduler should throw FileNotFoundException " +
     "when invalid spark.scheduler.allocation.file property is set") {
-    val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, "INVALID_FILE_PATH")
+    val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE, "INVALID_FILE_PATH")
     sc = new SparkContext(LOCAL, APP_NAME, conf)
 
     val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index ff0f99b5c94d..aa6db8d0423a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -34,6 +34,7 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark._
 import org.apache.spark.TaskState._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.SCHEDULER_REVIVE_INTERVAL
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.{CallSite, ThreadUtils, Utils}
 
@@ -290,7 +291,7 @@ private[spark] abstract class MockBackend(
   // Periodically revive offers to allow delay scheduling to work
   private val reviveThread =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
-  private val reviveIntervalMs = conf.getTimeAsMs("spark.scheduler.revive.interval", "10ms")
+  private val reviveIntervalMs = conf.get(SCHEDULER_REVIVE_INTERVAL).getOrElse(10L)
 
   /**
    * Test backends should call this to get a task that has been assigned to them by the scheduler.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 6ffd1e84f7ad..2940aef2e21a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -29,6 +29,7 @@ import org.scalatest.Matchers
 import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.util.{ResetSystemProperties, RpcUtils}
 
@@ -358,7 +359,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
   }
 
   test("onTaskGettingResult() called when result fetched remotely") {
-    val conf = new SparkConf().set("spark.rpc.message.maxSize", "1")
+    val conf = new SparkConf().set(RPC_MESSAGE_MAX_SIZE, 1)
     sc = new SparkContext("local", "SparkListenerSuite", conf)
     val listener = new SaveTaskEvents
     sc.addSparkListener(listener)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index ea1439cfebca..5b17dbf8b4c2 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -35,6 +35,7 @@ import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
 import org.apache.spark.TestUtils.JavaSourceFromString
+import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 import org.apache.spark.storage.TaskResultBlockId
 import org.apache.spark.util.{MutableURLClassLoader, RpcUtils, Utils}
 
@@ -110,7 +111,7 @@ class TaskResultGetterSuite extends SparkFunSuite with BeforeAndAfter with Local
 
   // Set the RPC message size to be as small as possible (it must be an integer, so 1 is as small
   // as we can make it) so the tests don't take too long.
-  def conf: SparkConf = new SparkConf().set("spark.rpc.message.maxSize", "1")
+  def conf: SparkConf = new SparkConf().set(RPC_MESSAGE_MAX_SIZE, 1)
 
   test("handling results smaller than max RPC message size") {
     sc = new SparkContext("local", "test", conf)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 85c87b95f7c2..016adb8b70e7 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -92,7 +92,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
     sc = new SparkContext(conf)
     taskScheduler =
-      new TaskSchedulerImpl(sc, sc.conf.getInt("spark.task.maxFailures", 4)) {
+      new TaskSchedulerImpl(sc, sc.conf.get(config.TASK_MAX_FAILURES)) {
         override def createTaskSetManager(taskSet: TaskSet, maxFailures: Int): TaskSetManager = {
           val tsm = super.createTaskSetManager(taskSet, maxFailures)
           // we need to create a spied tsm just so we can set the TaskSetBlacklist
@@ -155,7 +155,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("Scheduler correctly accounts for multiple CPUs per task") {
     val taskCpus = 2
-    val taskScheduler = setupScheduler("spark.task.cpus" -> taskCpus.toString)
+    val taskScheduler = setupScheduler(config.CPUS_PER_TASK.key -> taskCpus.toString)
     // Give zero core offers. Should not generate any tasks
     val zeroCoreWorkerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", 0),
       new WorkerOffer("executor1", "host1", 0))
@@ -185,7 +185,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("Scheduler does not crash when tasks are not serializable") {
     val taskCpus = 2
-    val taskScheduler = setupScheduler("spark.task.cpus" -> taskCpus.toString)
+    val taskScheduler = setupScheduler(config.CPUS_PER_TASK.key -> taskCpus.toString)
     val numFreeCores = 1
     val taskSet = new TaskSet(
       Array(new NotSerializableFakeTask(1, 0), new NotSerializableFakeTask(0, 1)), 0, 0, 0, null)
@@ -1208,7 +1208,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("don't schedule for a barrier taskSet if available slots are less than pending tasks") {
     val taskCpus = 2
-    val taskScheduler = setupScheduler("spark.task.cpus" -> taskCpus.toString)
+    val taskScheduler = setupScheduler(config.CPUS_PER_TASK.key -> taskCpus.toString)
 
     val numFreeCores = 3
     val workerOffers = IndexedSeq(
@@ -1225,7 +1225,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("schedule tasks for a barrier taskSet if all tasks can be launched together") {
     val taskCpus = 2
-    val taskScheduler = setupScheduler("spark.task.cpus" -> taskCpus.toString)
+    val taskScheduler = setupScheduler(config.CPUS_PER_TASK.key -> taskCpus.toString)
 
     val numFreeCores = 3
     val workerOffers = IndexedSeq(
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 502a013193aa..d0f98b5e4eab 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -680,7 +680,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
   }
 
   test("[SPARK-13931] taskSetManager should not send Resubmitted tasks after being a zombie") {
-    val conf = new SparkConf().set("spark.speculation", "true")
+    val conf = new SparkConf().set(config.SPECULATION_ENABLED, true)
     sc = new SparkContext("local", "test", conf)
 
     sched = new FakeTaskScheduler(sc, ("execA", "host1"), ("execB", "host2"))
@@ -747,12 +747,12 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
 
   test("[SPARK-22074] Task killed by other attempt task should not be resubmitted") {
-    val conf = new SparkConf().set("spark.speculation", "true")
+    val conf = new SparkConf().set(config.SPECULATION_ENABLED, true)
     sc = new SparkContext("local", "test", conf)
     // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
-    sc.conf.set("spark.speculation.multiplier", "0.0")
-    sc.conf.set("spark.speculation.quantile", "0.5")
-    sc.conf.set("spark.speculation", "true")
+    sc.conf.set(config.SPECULATION_MULTIPLIER, 0.0)
+    sc.conf.set(config.SPECULATION_QUANTILE, 0.5)
+    sc.conf.set(config.SPECULATION_ENABLED, true)
 
     var killTaskCalled = false
     sched = new FakeTaskScheduler(sc, ("exec1", "host1"),
@@ -1013,8 +1013,8 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
     val taskSet = FakeTask.createTaskSet(4)
     // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
-    sc.conf.set("spark.speculation.multiplier", "0.0")
-    sc.conf.set("spark.speculation", "true")
+    sc.conf.set(config.SPECULATION_MULTIPLIER, 0.0)
+    sc.conf.set(config.SPECULATION_ENABLED, true)
     val clock = new ManualClock()
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
     val accumUpdatesByTask: Array[Seq[AccumulatorV2[_, _]]] = taskSet.tasks.map { task =>
@@ -1070,9 +1070,9 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
     val taskSet = FakeTask.createTaskSet(5)
     // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
-    sc.conf.set("spark.speculation.multiplier", "0.0")
-    sc.conf.set("spark.speculation.quantile", "0.6")
-    sc.conf.set("spark.speculation", "true")
+    sc.conf.set(config.SPECULATION_MULTIPLIER, 0.0)
+    sc.conf.set(config.SPECULATION_QUANTILE, 0.6)
+    sc.conf.set(config.SPECULATION_ENABLED, true)
     val clock = new ManualClock()
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
     val accumUpdatesByTask: Array[Seq[AccumulatorV2[_, _]]] = taskSet.tasks.map { task =>
@@ -1366,12 +1366,12 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
   }
 
   test("[SPARK-24677] Avoid NoSuchElementException from MedianHeap") {
-    val conf = new SparkConf().set("spark.speculation", "true")
+    val conf = new SparkConf().set(config.SPECULATION_ENABLED, true)
     sc = new SparkContext("local", "test", conf)
     // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
-    sc.conf.set("spark.speculation.multiplier", "0.0")
-    sc.conf.set("spark.speculation.quantile", "0.1")
-    sc.conf.set("spark.speculation", "true")
+    sc.conf.set(config.SPECULATION_MULTIPLIER, 0.0)
+    sc.conf.set(config.SPECULATION_QUANTILE, 0.1)
+    sc.conf.set(config.SPECULATION_ENABLED, true)
 
     sched = new FakeTaskScheduler(sc)
     sched.initialize(new FakeSchedulerBackend())
@@ -1416,13 +1416,13 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
 
   test("SPARK-24755 Executor loss can cause task to not be resubmitted") {
-    val conf = new SparkConf().set("spark.speculation", "true")
+    val conf = new SparkConf().set(config.SPECULATION_ENABLED, true)
     sc = new SparkContext("local", "test", conf)
     // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
-    sc.conf.set("spark.speculation.multiplier", "0.0")
+    sc.conf.set(config.SPECULATION_MULTIPLIER, 0.0)
 
-    sc.conf.set("spark.speculation.quantile", "0.5")
-    sc.conf.set("spark.speculation", "true")
+    sc.conf.set(config.SPECULATION_QUANTILE, 0.5)
+    sc.conf.set(config.SPECULATION_ENABLED, true)
 
     var killTaskCalled = false
     sched = new FakeTaskScheduler(sc, ("exec1", "host1"),
@@ -1538,8 +1538,8 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
     val taskSet = FakeTask.createTaskSet(4)
     // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
-    sc.conf.set("spark.speculation.multiplier", "0.0")
-    sc.conf.set("spark.speculation", "true")
+    sc.conf.set(config.SPECULATION_MULTIPLIER, 0.0)
+    sc.conf.set(config.SPECULATION_ENABLED, true)
     val clock = new ManualClock()
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
     val accumUpdatesByTask: Array[Seq[AccumulatorV2[_, _]]] = taskSet.tasks.map { task =>
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
index ae871091ba3c..57ed1f647fae 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -30,7 +30,7 @@ class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContex
     val conf = new SparkConf(false)
       .set(config.SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
       .set(config.Kryo.KRYO_USER_REGISTRATORS, classOf[AppJarRegistrator].getName)
-      .set(config.MAX_TASK_FAILURES, 1)
+      .set(config.TASK_MAX_FAILURES, 1)
       .set(config.BLACKLIST_ENABLED, false)
 
     val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 71eeb0480245..ede9466b3d63 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -154,7 +154,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     val jobProps = new Properties()
     jobProps.setProperty(SparkContext.SPARK_JOB_GROUP_ID, "jobGroup")
-    jobProps.setProperty("spark.scheduler.pool", "schedPool")
+    jobProps.setProperty(SparkContext.SPARK_SCHEDULER_POOL, "schedPool")
 
     listener.onJobStart(SparkListenerJobStart(1, time, stages, jobProps))
 
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 7aca0ad3d5c3..0086a7951a27 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -833,32 +833,32 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     conf.set(SUBMIT_DEPLOY_MODE, "client")
     assert(Utils.isDynamicAllocationEnabled(conf) === false)
     assert(Utils.isDynamicAllocationEnabled(
-      conf.set("spark.dynamicAllocation.enabled", "false")) === false)
+      conf.set(DYN_ALLOCATION_ENABLED, false)) === false)
     assert(Utils.isDynamicAllocationEnabled(
-      conf.set("spark.dynamicAllocation.enabled", "true")) === true)
+      conf.set(DYN_ALLOCATION_ENABLED, true)) === true)
     assert(Utils.isDynamicAllocationEnabled(
       conf.set("spark.executor.instances", "1")) === true)
     assert(Utils.isDynamicAllocationEnabled(
       conf.set("spark.executor.instances", "0")) === true)
     assert(Utils.isDynamicAllocationEnabled(conf.set("spark.master", "local")) === false)
-    assert(Utils.isDynamicAllocationEnabled(conf.set("spark.dynamicAllocation.testing", "true")))
+    assert(Utils.isDynamicAllocationEnabled(conf.set(DYN_ALLOCATION_TESTING, true)))
   }
 
   test("getDynamicAllocationInitialExecutors") {
     val conf = new SparkConf()
     assert(Utils.getDynamicAllocationInitialExecutors(conf) === 0)
     assert(Utils.getDynamicAllocationInitialExecutors(
-      conf.set("spark.dynamicAllocation.minExecutors", "3")) === 3)
+      conf.set(DYN_ALLOCATION_MIN_EXECUTORS, 3)) === 3)
     assert(Utils.getDynamicAllocationInitialExecutors( // should use minExecutors
       conf.set("spark.executor.instances", "2")) === 3)
     assert(Utils.getDynamicAllocationInitialExecutors( // should use executor.instances
       conf.set("spark.executor.instances", "4")) === 4)
     assert(Utils.getDynamicAllocationInitialExecutors( // should use executor.instances
-      conf.set("spark.dynamicAllocation.initialExecutors", "3")) === 4)
+      conf.set(DYN_ALLOCATION_INITIAL_EXECUTORS, 3)) === 4)
     assert(Utils.getDynamicAllocationInitialExecutors( // should use initialExecutors
-      conf.set("spark.dynamicAllocation.initialExecutors", "5")) === 5)
+      conf.set(DYN_ALLOCATION_INITIAL_EXECUTORS, 5)) === 5)
     assert(Utils.getDynamicAllocationInitialExecutors( // should use minExecutors
-      conf.set("spark.dynamicAllocation.initialExecutors", "2")
+      conf.set(DYN_ALLOCATION_INITIAL_EXECUTORS, 2)
         .set("spark.executor.instances", "1")) === 3)
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
index 95d874b8432e..2853b752cb85 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.util
 import org.scalatest.{BeforeAndAfterAll, Suite}
 
 import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 
 trait LocalClusterSparkContext extends BeforeAndAfterAll { self: Suite =>
   @transient var sc: SparkContext = _
@@ -29,7 +30,7 @@ trait LocalClusterSparkContext extends BeforeAndAfterAll { self: Suite =>
     val conf = new SparkConf()
       .setMaster("local-cluster[2, 1, 1024]")
       .setAppName("test-cluster")
-      .set("spark.rpc.message.maxSize", "1") // set to 1MB to detect direct serialization of data
+      .set(RPC_MESSAGE_MAX_SIZE, 1) // set to 1MB to detect direct serialization of data
     sc = new SparkContext(conf)
   }
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index e285e202a148..0d2737efde57 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -26,6 +26,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
+import org.apache.spark.internal.config.SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.{ExecutorLossReason, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SchedulerBackendUtils}
@@ -47,7 +48,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
     ExecutionContext.fromExecutorService(requestExecutorsService)
 
   protected override val minRegisteredRatio =
-    if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) {
+    if (conf.get(SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO).isEmpty) {
       0.8
     } else {
       super.minRegisteredRatio
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index fb235350700f..be2854fc16cf 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -630,8 +630,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
           .registerDriverWithShuffleService(
             slave.hostname,
             externalShufflePort,
-            sc.conf.getTimeAsMs("spark.storage.blockManagerSlaveTimeoutMs",
-              s"${sc.conf.getTimeAsSeconds("spark.network.timeout", "120s")}s"),
+            sc.conf.get(config.STORAGE_BLOCKMANAGER_SLAVE_TIMEOUT),
             sc.conf.get(config.EXECUTOR_HEARTBEAT_INTERVAL))
         slave.shuffleRegistered = true
       }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index 0cfaa0a0c9a6..f2fd5e606b6c 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -591,7 +591,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     val expectedCores = 1
     setBackend(Map(
       "spark.cores.max" -> expectedCores.toString,
-      "spark.scheduler.minRegisteredResourcesRatio" -> "1.0"))
+      SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO.key -> "1.0"))
 
     val offers = List(Resources(backend.executorMemory(sc), expectedCores))
     offerResources(offers)
@@ -604,8 +604,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("supports data locality with dynamic allocation") {
     setBackend(Map(
-      "spark.dynamicAllocation.enabled" -> "true",
-      "spark.dynamicAllocation.testing" -> "true",
+      DYN_ALLOCATION_ENABLED.key -> "true",
+      DYN_ALLOCATION_TESTING.key -> "true",
       "spark.locality.wait" -> "1s"))
 
     assert(backend.getExecutorIds().isEmpty)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index a7bed75a02ad..3dae11e3b424 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -33,6 +33,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
@@ -52,7 +53,7 @@ private[spark] abstract class YarnSchedulerBackend(
   private val stopped = new AtomicBoolean(false)
 
   override val minRegisteredRatio =
-    if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) {
+    if (conf.get(config.SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO).isEmpty) {
       0.8
     } else {
       super.minRegisteredRatio
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index 37bccaf0439b..8c62069a8dd6 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark._
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Network._
 import org.apache.spark.network.shuffle.ShuffleTestAccessor
 import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
 import org.apache.spark.tags.ExtendedYarnTest
@@ -94,14 +95,14 @@ class YarnShuffleAuthSuite extends YarnShuffleIntegrationSuite {
   override def newYarnConfig(): YarnConfiguration = {
     val yarnConfig = super.newYarnConfig()
     yarnConfig.set(NETWORK_AUTH_ENABLED.key, "true")
-    yarnConfig.set(NETWORK_ENCRYPTION_ENABLED.key, "true")
+    yarnConfig.set(NETWORK_CRYPTO_ENABLED.key, "true")
     yarnConfig
   }
 
   override protected def extraSparkConf(): Map[String, String] = {
     super.extraSparkConf() ++ Map(
       NETWORK_AUTH_ENABLED.key -> "true",
-      NETWORK_ENCRYPTION_ENABLED.key -> "true"
+      NETWORK_CRYPTO_ENABLED.key -> "true"
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
index 6196757eb701..328423160696 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
@@ -63,7 +63,7 @@ class RuntimeConfigSuite extends SparkFunSuite {
     assert(!conf.isModifiable("spark.sql.sources.schemaStringLengthThreshold"))
     assert(conf.isModifiable("spark.sql.streaming.checkpointLocation"))
     // Core configs
-    assert(!conf.isModifiable("spark.task.cpus"))
+    assert(!conf.isModifiable(config.CPUS_PER_TASK.key))
     assert(!conf.isModifiable("spark.executor.cores"))
     // Invalid config parameters
     assert(!conf.isModifiable(""))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 5e973145b0a3..12bc68cfb14d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -35,6 +35,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
 import org.apache.spark.LocalSparkContext._
+import org.apache.spark.internal.config.Network.RPC_NUM_RETRIES
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.util.quietly
@@ -393,7 +394,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       .set(StateStore.MAINTENANCE_INTERVAL_CONFIG, "10ms")
       // Make sure that when SparkContext stops, the StateStore maintenance thread 'quickly'
       // fails to talk to the StateStoreCoordinator and unloads all the StateStores
-      .set("spark.rpc.numRetries", "1")
+      .set(RPC_NUM_RETRIES, 1)
     val opId = 0
     val dir = newDir()
     val storeProviderId = StateStoreProviderId(StateStoreId(dir, opId, 0), UUID.randomUUID)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index d79c0cf5e1c2..4c008c4402a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -603,7 +603,7 @@ class SQLAppStatusListenerMemoryLeakSuite extends SparkFunSuite {
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
-      .set(config.MAX_TASK_FAILURES, 1) // Don't retry the tasks to run this test quickly
+      .set(config.TASK_MAX_FAILURES, 1) // Don't retry the tasks to run this test quickly
       .set("spark.sql.ui.retainedExecutions", "50") // Set it to 50 to run this test quickly
       .set(ASYNC_TRACKING_ENABLED, false)
     withSpark(new SparkContext(conf)) { sc =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
index 6e4f2bbcd6b6..daca65fd1ad2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
@@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.SparkContext
+import org.apache.spark.internal.config.SPECULATION_ENABLED
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.sources.v2.reader._
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index e68c6011c139..83dfa74ace05 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -32,6 +32,7 @@ import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.ExecuteStatementOperation
 import org.apache.hive.service.cli.session.HiveSession
 
+import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLContext}
 import org.apache.spark.sql.execution.HiveResult
@@ -226,7 +227,7 @@ private[hive] class SparkExecuteStatementOperation(
     sqlContext.sparkContext.setJobGroup(statementId, statement)
     val pool = sessionToActivePool.get(parentSession.getSessionHandle)
     if (pool != null) {
-      sqlContext.sparkContext.setLocalProperty("spark.scheduler.pool", pool)
+      sqlContext.sparkContext.setLocalProperty(SparkContext.SPARK_SCHEDULER_POOL, pool)
     }
     try {
       result = sqlContext.sql(statement)
@@ -234,7 +235,8 @@ private[hive] class SparkExecuteStatementOperation(
       result.queryExecution.logical match {
         case SetCommand(Some((SQLConf.THRIFTSERVER_POOL.key, Some(value)))) =>
           sessionToActivePool.put(parentSession.getSessionHandle, value)
-          logInfo(s"Setting spark.scheduler.pool=$value for future statements in this session.")
+          logInfo(s"Setting ${SparkContext.SPARK_SCHEDULER_POOL}=$value for future statements " +
+            "in this session.")
         case _ =>
       }
       HiveThriftServer2.listener.onStatementParsed(statementId, result.queryExecution.toString())
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
index d8d2a80e0e8b..27071075b416 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.mapred.{JobConf, Reporter}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.SPECULATION_ENABLED
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriter, OutputWriterFactory}
@@ -69,7 +70,7 @@ class HiveFileFormat(fileSinkConf: FileSinkDesc)
 
     // When speculation is on and output committer class name contains "Direct", we should warn
     // users that they may loss data if they are using a direct output committer.
-    val speculationEnabled = sparkSession.sparkContext.conf.getBoolean("spark.speculation", false)
+    val speculationEnabled = sparkSession.sparkContext.conf.get(SPECULATION_ENABLED)
     val outputCommitterClass = conf.get("mapred.output.committer.class", "")
     if (speculationEnabled && outputCommitterClass.contains("Direct")) {
       val warningMessage =
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index 7ec02c4782e4..4707d6e31fd7 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -25,6 +25,7 @@ import org.scalatest.mockito.MockitoSugar
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.{DYN_ALLOCATION_ENABLED, DYN_ALLOCATION_TESTING}
 import org.apache.spark.streaming.{DummyInputDStream, Seconds, StreamingContext}
 import org.apache.spark.util.{ManualClock, Utils}
 
@@ -332,8 +333,8 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
 
     val confWithBothDynamicAllocationEnabled = new SparkConf()
       .set("spark.streaming.dynamicAllocation.enabled", "true")
-      .set("spark.dynamicAllocation.enabled", "true")
-      .set("spark.dynamicAllocation.testing", "true")
+      .set(DYN_ALLOCATION_ENABLED, true)
+      .set(DYN_ALLOCATION_TESTING, true)
     require(Utils.isDynamicAllocationEnabled(confWithBothDynamicAllocationEnabled) === true)
     withStreamingContext(confWithBothDynamicAllocationEnabled) { ssc =>
       intercept[IllegalArgumentException] {

From 055bf8ea1fe8dd4109772a282909ac3d69627f1b Mon Sep 17 00:00:00 2001
From: Nihar Sheth <niharrsheth@gmail.com>
Date: Tue, 22 Jan 2019 08:41:42 -0600
Subject: [PATCH 0333/1072] [SPARK-24938][CORE] Prevent Netty from using onheap
 memory for headers without regard for configuration

## What changes were proposed in this pull request?

In MessageEncoder.java, the header would always be allocated on onheap memory regardless of whether netty was configured to use/prefer onheap or offheap. By default this made netty allocate 16mb of onheap memory for a tiny header message. It would be more practical to use preallocated buffers.

Using a memory monitor tool on a simple spark application, the following services currently allocate 16 mb of onheap memory:
netty-rpc-client
netty-blockTransfer-client
netty-external-shuffle-client

With this change, the memory monitor tool reports all three of these services as using 0 b of onheap memory. The offheap memory allocation does not increase, but more of the already-allocated space is used.

## How was this patch tested?

Manually tested change using spark-memory-tool https://github.com/squito/spark-memory

Closes #22114 from NiharS/nettybuffer.

Lead-authored-by: Nihar Sheth <niharrsheth@gmail.com>
Co-authored-by: Nihar Sheth <nsheth@cloudera.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../java/org/apache/spark/network/protocol/MessageEncoder.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
index 997f74e1a21b..06dc447309dd 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
@@ -79,7 +79,7 @@ public void encode(ChannelHandlerContext ctx, Message in, List<Object> out) thro
     // sent.
     int headerLength = 8 + msgType.encodedLength() + in.encodedLength();
     long frameLength = headerLength + (isBodyInFrame ? bodyLength : 0);
-    ByteBuf header = ctx.alloc().heapBuffer(headerLength);
+    ByteBuf header = ctx.alloc().buffer(headerLength);
     header.writeLong(frameLength);
     msgType.encode(header);
     in.encode(header);

From 06792afd4c9c719df4af34b0768a999271383330 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Tue, 22 Jan 2019 09:34:59 -0600
Subject: [PATCH 0334/1072] [SPARK-16838][PYTHON] Add PMML export for ML KMeans
 in PySpark

## What changes were proposed in this pull request?

Add PMML export support for ML KMeans to PySpark.

## How was this patch tested?

Add tests in ml.tests.PersistenceTest.

Closes #23592 from huaxingao/spark-16838.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/ml/clustering.py             |  2 +-
 python/pyspark/ml/tests/test_persistence.py | 37 +++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 5a776aec1425..b9c6bdf52115 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -323,7 +323,7 @@ def trainingCost(self):
         return self._call_java("trainingCost")
 
 
-class KMeansModel(JavaModel, JavaMLWritable, JavaMLReadable):
+class KMeansModel(JavaModel, GeneralJavaMLWritable, JavaMLReadable):
     """
     Model fitted by KMeans.
 
diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py
index 34d687039ab3..63b0594e46be 100644
--- a/python/pyspark/ml/tests/test_persistence.py
+++ b/python/pyspark/ml/tests/test_persistence.py
@@ -23,6 +23,7 @@
 from pyspark.ml import Transformer
 from pyspark.ml.classification import DecisionTreeClassifier, LogisticRegression, OneVsRest, \
     OneVsRestModel
+from pyspark.ml.clustering import KMeans
 from pyspark.ml.feature import Binarizer, HashingTF, PCA
 from pyspark.ml.linalg import Vectors
 from pyspark.ml.param import Params
@@ -89,6 +90,42 @@ def test_logistic_regression(self):
         except OSError:
             pass
 
+    def test_kmeans(self):
+        kmeans = KMeans(k=2, seed=1)
+        path = tempfile.mkdtemp()
+        km_path = path + "/km"
+        kmeans.save(km_path)
+        kmeans2 = KMeans.load(km_path)
+        self.assertEqual(kmeans.uid, kmeans2.uid)
+        self.assertEqual(type(kmeans.uid), type(kmeans2.uid))
+        self.assertEqual(kmeans2.uid, kmeans2.k.parent,
+                         "Loaded KMeans instance uid (%s) did not match Param's uid (%s)"
+                         % (kmeans2.uid, kmeans2.k.parent))
+        self.assertEqual(kmeans._defaultParamMap[kmeans.k], kmeans2._defaultParamMap[kmeans2.k],
+                         "Loaded KMeans instance default params did not match " +
+                         "original defaults")
+        try:
+            rmtree(path)
+        except OSError:
+            pass
+
+    def test_kmean_pmml_basic(self):
+        # Most of the validation is done in the Scala side, here we just check
+        # that we output text rather than parquet (e.g. that the format flag
+        # was respected).
+        data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
+                (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
+        df = self.spark.createDataFrame(data, ["features"])
+        kmeans = KMeans(k=2, seed=1)
+        model = kmeans.fit(df)
+        path = tempfile.mkdtemp()
+        km_path = path + "/km-pmml"
+        model.write().format("pmml").save(km_path)
+        pmml_text_list = self.sc.textFile(km_path).collect()
+        pmml_text = "\n".join(pmml_text_list)
+        self.assertIn("Apache Spark", pmml_text)
+        self.assertIn("PMML", pmml_text)
+
     def _compare_params(self, m1, m2, param):
         """
         Compare 2 ML Params instances for the given param, and assert both have the same param value

From 64ce1c9f932b4acdf8f483265a628c124c9fd15d Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 22 Jan 2019 17:33:29 +0100
Subject: [PATCH 0335/1072] [SPARK-26657][SQL] Use Proleptic Gregorian calendar
 in DayWeek and in WeekOfYear

## What changes were proposed in this pull request?

The expressions `DayWeek`, `DayOfWeek`, `WeekDay` and `WeekOfYear` are changed to use Proleptic Gregorian calendar instead of the hybrid one (Julian+Gregorian). This was achieved by using Java 8 API for date/timestamp manipulation, in particular the `LocalDate` class.

Week of year calculation is performed according to ISO-8601. The first week of a week-based-year is the first Monday-based week of the standard ISO year that has at least 4 days in the new year (see https://docs.oracle.com/javase/8/docs/api/java/time/temporal/IsoFields.html).

## How was this patch tested?

The changes were tested by `DateExpressionsSuite` and `DateFunctionsSuite`.

Closes #23594 from MaxGekk/dayweek-gregorian.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |  2 +
 .../expressions/datetimeExpressions.scala     | 62 +++++--------------
 .../expressions/DateExpressionsSuite.scala    |  4 +-
 3 files changed, 20 insertions(+), 48 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 3d1b804ccc80..d442087721cb 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -91,6 +91,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
 
+  - Since Spark 3.0, the `weekofyear`, `weekday` and `dayofweek` functions use java.time API for calculation week number of year and day number of week based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, the hybrid calendar (Julian + Gregorian) is used for the same purpose. Results of the functions returned by Spark 3.0 and previous versions can be different for dates before October 15, 1582 (Gregorian).
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index e7583628a934..ec595023ef57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
-import java.util.{Calendar, Locale, TimeZone}
+import java.time.LocalDate
+import java.time.temporal.IsoFields
+import java.util.{Locale, TimeZone}
 
 import scala.util.control.NonFatal
 
@@ -430,20 +432,14 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa
 case class DayOfWeek(child: Expression) extends DayWeek {
 
   override protected def nullSafeEval(date: Any): Any = {
-    cal.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L)
-    cal.get(Calendar.DAY_OF_WEEK)
+    val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int])
+    localDate.getDayOfWeek.plus(1).getValue
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, time => {
-      val cal = classOf[Calendar].getName
-      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-      val c = "calDayOfWeek"
-      ctx.addImmutableStateIfNotExists(cal, c,
-        v => s"""$v = $cal.getInstance($dtu.getTimeZone("UTC"));""")
+    nullSafeCodeGen(ctx, ev, days => {
       s"""
-        $c.setTimeInMillis($time * 1000L * 3600L * 24L);
-        ${ev.value} = $c.get($cal.DAY_OF_WEEK);
+        ${ev.value} = java.time.LocalDate.ofEpochDay($days).getDayOfWeek().plus(1).getValue();
       """
     })
   }
@@ -462,20 +458,14 @@ case class DayOfWeek(child: Expression) extends DayWeek {
 case class WeekDay(child: Expression) extends DayWeek {
 
   override protected def nullSafeEval(date: Any): Any = {
-    cal.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L)
-    (cal.get(Calendar.DAY_OF_WEEK) + 5 ) % 7
+    val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int])
+    localDate.getDayOfWeek.ordinal()
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, time => {
-      val cal = classOf[Calendar].getName
-      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-      val c = "calWeekDay"
-      ctx.addImmutableStateIfNotExists(cal, c,
-        v => s"""$v = $cal.getInstance($dtu.getTimeZone("UTC"));""")
+    nullSafeCodeGen(ctx, ev, days => {
       s"""
-        $c.setTimeInMillis($time * 1000L * 3600L * 24L);
-        ${ev.value} = ($c.get($cal.DAY_OF_WEEK) + 5) % 7;
+         ${ev.value} = java.time.LocalDate.ofEpochDay($days).getDayOfWeek().ordinal();
       """
     })
   }
@@ -486,10 +476,6 @@ abstract class DayWeek extends UnaryExpression with ImplicitCastInputTypes {
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
 
   override def dataType: DataType = IntegerType
-
-  @transient protected lazy val cal: Calendar = {
-    Calendar.getInstance(DateTimeUtils.getTimeZone("UTC"))
-  }
 }
 
 // scalastyle:off line.size.limit
@@ -508,32 +494,16 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
 
   override def dataType: DataType = IntegerType
 
-  @transient private lazy val c = {
-    val c = Calendar.getInstance(DateTimeUtils.getTimeZone("UTC"))
-    c.setFirstDayOfWeek(Calendar.MONDAY)
-    c.setMinimalDaysInFirstWeek(4)
-    c
-  }
-
   override protected def nullSafeEval(date: Any): Any = {
-    c.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L)
-    c.get(Calendar.WEEK_OF_YEAR)
+    val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int])
+    localDate.get(IsoFields.WEEK_OF_WEEK_BASED_YEAR)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, time => {
-      val cal = classOf[Calendar].getName
-      val c = "calWeekOfYear"
-      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-      ctx.addImmutableStateIfNotExists(cal, c, v =>
-        s"""
-           |$v = $cal.getInstance($dtu.getTimeZone("UTC"));
-           |$v.setFirstDayOfWeek($cal.MONDAY);
-           |$v.setMinimalDaysInFirstWeek(4);
-         """.stripMargin)
+    nullSafeCodeGen(ctx, ev, days => {
       s"""
-         |$c.setTimeInMillis($time * 1000L * 3600L * 24L);
-         |${ev.value} = $c.get($cal.WEEK_OF_YEAR);
+         |${ev.value} = java.time.LocalDate.ofEpochDay($days).get(
+         |  java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR);
        """.stripMargin
     })
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index c3b7e19455cd..c3c29e31e4b3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -231,8 +231,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(WeekOfYear(Cast(Literal(date), DateType, gmtId)), 15)
     checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType, gmtId)), 45)
     checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType, gmtId)), 18)
-    checkEvaluation(WeekOfYear(Literal(new Date(toMillis("1582-10-15 13:10:15")))), 40)
-    checkEvaluation(WeekOfYear(Literal(new Date(toMillis("1582-10-04 13:10:15")))), 39)
+    checkEvaluation(WeekOfYear(Cast(Literal("1582-10-15 13:10:15"), DateType, gmtId)), 41)
+    checkEvaluation(WeekOfYear(Cast(Literal("1582-10-04 13:10:15"), DateType, gmtId)), 40)
     checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType)
   }
 

From 66450bbc1bb4397f06ca9a6ecba4d16c82d711fd Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Tue, 22 Jan 2019 09:00:52 -0800
Subject: [PATCH 0336/1072] [SPARK-26665][CORE] Fix a bug that
 BlockTransferService.fetchBlockSync may hang forever

## What changes were proposed in this pull request?

`ByteBuffer.allocate` may throw `OutOfMemoryError` when the block is large but no enough memory is available. However, when this happens, right now BlockTransferService.fetchBlockSync will just hang forever as its `BlockFetchingListener. onBlockFetchSuccess` doesn't complete `Promise`.

This PR catches `Throwable` and uses the error to complete `Promise`.

## How was this patch tested?

Added a unit test. Since I cannot make `ByteBuffer.allocate` throw `OutOfMemoryError`, I passed a negative size to make `ByteBuffer.allocate` fail. Although the error type is different, it should trigger the same code path.

Closes #23590 from zsxwing/SPARK-26665.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../spark/network/BlockTransferService.scala  |  12 +-
 .../network/BlockTransferServiceSuite.scala   | 104 ++++++++++++++++++
 2 files changed, 112 insertions(+), 4 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/network/BlockTransferServiceSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index a58c8fa2e763..51ced697c6af 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -107,10 +107,14 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
             case e: EncryptedManagedBuffer =>
               result.success(e)
             case _ =>
-              val ret = ByteBuffer.allocate(data.size.toInt)
-              ret.put(data.nioByteBuffer())
-              ret.flip()
-              result.success(new NioManagedBuffer(ret))
+              try {
+                val ret = ByteBuffer.allocate(data.size.toInt)
+                ret.put(data.nioByteBuffer())
+                ret.flip()
+                result.success(new NioManagedBuffer(ret))
+              } catch {
+                case e: Throwable => result.failure(e)
+              }
           }
         }
       }, tempFileManager)
diff --git a/core/src/test/scala/org/apache/spark/network/BlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/BlockTransferServiceSuite.scala
new file mode 100644
index 000000000000..d7e4b9166fa0
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/BlockTransferServiceSuite.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network
+
+import java.io.InputStream
+import java.nio.ByteBuffer
+
+import scala.concurrent.Future
+import scala.concurrent.duration._
+import scala.reflect.ClassTag
+
+import org.scalatest.concurrent._
+
+import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager}
+import org.apache.spark.storage.{BlockId, StorageLevel}
+
+class BlockTransferServiceSuite extends SparkFunSuite with TimeLimits {
+
+  implicit val defaultSignaler: Signaler = ThreadSignaler
+
+  test("fetchBlockSync should not hang when BlockFetchingListener.onBlockFetchSuccess fails") {
+    // Create a mocked `BlockTransferService` to call `BlockFetchingListener.onBlockFetchSuccess`
+    // with a bad `ManagedBuffer` which will trigger an exception in `onBlockFetchSuccess`.
+    val blockTransferService = new BlockTransferService {
+      override def init(blockDataManager: BlockDataManager): Unit = {}
+
+      override def close(): Unit = {}
+
+      override def port: Int = 0
+
+      override def hostName: String = "localhost-unused"
+
+      override def fetchBlocks(
+          host: String,
+          port: Int,
+          execId: String,
+          blockIds: Array[String],
+          listener: BlockFetchingListener,
+          tempFileManager: DownloadFileManager): Unit = {
+        // Notify BlockFetchingListener with a bad ManagedBuffer asynchronously
+        new Thread() {
+          override def run(): Unit = {
+            // This is a bad buffer to trigger `IllegalArgumentException` in
+            // `BlockFetchingListener.onBlockFetchSuccess`. The real issue we hit is
+            // `ByteBuffer.allocate` throws `OutOfMemoryError`, but we cannot make it happen in
+            // a test. Instead, we use a negative size value to make `ByteBuffer.allocate` fail,
+            // and this should trigger the same code path as `OutOfMemoryError`.
+            val badBuffer = new ManagedBuffer {
+              override def size(): Long = -1
+
+              override def nioByteBuffer(): ByteBuffer = null
+
+              override def createInputStream(): InputStream = null
+
+              override def retain(): ManagedBuffer = this
+
+              override def release(): ManagedBuffer = this
+
+              override def convertToNetty(): AnyRef = null
+            }
+            listener.onBlockFetchSuccess("block-id-unused", badBuffer)
+          }
+        }.start()
+      }
+
+      override def uploadBlock(
+          hostname: String,
+          port: Int,
+          execId: String,
+          blockId: BlockId,
+          blockData: ManagedBuffer,
+          level: StorageLevel,
+          classTag: ClassTag[_]): Future[Unit] = {
+        // This method is unused in this test
+        throw new UnsupportedOperationException("uploadBlock")
+      }
+    }
+
+    val e = intercept[SparkException] {
+      failAfter(10.seconds) {
+        blockTransferService.fetchBlockSync(
+          "localhost-unused", 0, "exec-id-unused", "block-id-unused", null)
+      }
+    }
+    assert(e.getCause.isInstanceOf[IllegalArgumentException])
+  }
+}

From c542c247bbfe1214c0bf81076451718a9e8931dc Mon Sep 17 00:00:00 2001
From: Rob Vesse <rvesse@dotnetrdf.org>
Date: Tue, 22 Jan 2019 10:17:40 -0800
Subject: [PATCH 0337/1072] [SPARK-25887][K8S] Configurable K8S context support

This enhancement allows for specifying the desired context to use for the initial K8S client auto-configuration.  This allows users to more easily access alternative K8S contexts without having to first
explicitly change their current context via kubectl.

Explicitly set my K8S context to a context pointing to a non-existent cluster, then launched Spark jobs with explicitly specified contexts via the new `spark.kubernetes.context` configuration property.

Example Output:

```
> kubectl config current-context
minikube
> minikube status
minikube: Stopped
cluster:
kubectl:
> ./spark-submit --master k8s://https://localhost:6443 --deploy-mode cluster --name spark-pi --class org.apache.spark.examples.SparkPi --conf spark.executor.instances=2 --conf spark.kubernetes.context=docker-for-desktop --conf spark.kubernetes.container.image=rvesse/spark:debian local:///opt/spark/examples/jars/spark-examples_2.11-3.0.0-SNAPSHOT.jar 4
18/10/31 11:57:51 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
18/10/31 11:57:51 INFO SparkKubernetesClientFactory: Auto-configuring K8S client using context docker-for-desktop from users K8S config file
18/10/31 11:57:52 INFO LoggingPodStatusWatcherImpl: State changed, new state:
	 pod name: spark-pi-1540987071845-driver
	 namespace: default
	 labels: spark-app-selector -> spark-2c4abc226ed3415986eb602bd13f3582, spark-role -> driver
	 pod uid: 32462cac-dd04-11e8-b6c6-025000000001
	 creation time: 2018-10-31T11:57:52Z
	 service account name: default
	 volumes: spark-local-dir-1, spark-conf-volume, default-token-glpfv
	 node name: N/A
	 start time: N/A
	 phase: Pending
	 container status: N/A
18/10/31 11:57:52 INFO LoggingPodStatusWatcherImpl: State changed, new state:
	 pod name: spark-pi-1540987071845-driver
	 namespace: default
	 labels: spark-app-selector -> spark-2c4abc226ed3415986eb602bd13f3582, spark-role -> driver
	 pod uid: 32462cac-dd04-11e8-b6c6-025000000001
	 creation time: 2018-10-31T11:57:52Z
	 service account name: default
	 volumes: spark-local-dir-1, spark-conf-volume, default-token-glpfv
	 node name: docker-for-desktop
	 start time: N/A
	 phase: Pending
	 container status: N/A
...
18/10/31 11:58:03 INFO LoggingPodStatusWatcherImpl: State changed, new state:
	 pod name: spark-pi-1540987071845-driver
	 namespace: default
	 labels: spark-app-selector -> spark-2c4abc226ed3415986eb602bd13f3582, spark-role -> driver
	 pod uid: 32462cac-dd04-11e8-b6c6-025000000001
	 creation time: 2018-10-31T11:57:52Z
	 service account name: default
	 volumes: spark-local-dir-1, spark-conf-volume, default-token-glpfv
	 node name: docker-for-desktop
	 start time: 2018-10-31T11:57:52Z
	 phase: Succeeded
	 container status:
		 container name: spark-kubernetes-driver
		 container image: rvesse/spark:debian
		 container state: terminated
		 container started at: 2018-10-31T11:57:54Z
		 container finished at: 2018-10-31T11:58:02Z
		 exit code: 0
		 termination reason: Completed
```

Without the `spark.kubernetes.context` setting this will fail because the current context - `minikube` - is pointing to a non-running cluster e.g.

```
> ./spark-submit --master k8s://https://localhost:6443 --deploy-mode cluster --name spark-pi --class org.apache.spark.examples.SparkPi --conf spark.executor.instances=2 --conf spark.kubernetes.container.image=rvesse/spark:debian local:///opt/spark/examples/jars/spark-examples_2.11-3.0.0-SNAPSHOT.jar 4
18/10/31 12:02:30 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
18/10/31 12:02:30 INFO SparkKubernetesClientFactory: Auto-configuring K8S client using current context from users K8S config file
18/10/31 12:02:31 WARN WatchConnectionManager: Exec Failure
javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
	at sun.security.ssl.Alerts.getSSLException(Alerts.java:192)
	at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1949)
	at sun.security.ssl.Handshaker.fatalSE(Handshaker.java:302)
	at sun.security.ssl.Handshaker.fatalSE(Handshaker.java:296)
	at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1509)
	at sun.security.ssl.ClientHandshaker.processMessage(ClientHandshaker.java:216)
	at sun.security.ssl.Handshaker.processLoop(Handshaker.java:979)
	at sun.security.ssl.Handshaker.process_record(Handshaker.java:914)
	at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:1062)
	at sun.security.ssl.SSLSocketImpl.performInitialHandshake(SSLSocketImpl.java:1375)
	at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1403)
	at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1387)
	at okhttp3.internal.connection.RealConnection.connectTls(RealConnection.java:281)
	at okhttp3.internal.connection.RealConnection.establishProtocol(RealConnection.java:251)
	at okhttp3.internal.connection.RealConnection.connect(RealConnection.java:151)
	at okhttp3.internal.connection.StreamAllocation.findConnection(StreamAllocation.java:195)
	at okhttp3.internal.connection.StreamAllocation.findHealthyConnection(StreamAllocation.java:121)
	at okhttp3.internal.connection.StreamAllocation.newStream(StreamAllocation.java:100)
	at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:42)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:120)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at io.fabric8.kubernetes.client.utils.BackwardsCompatibilityInterceptor.intercept(BackwardsCompatibilityInterceptor.java:119)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at io.fabric8.kubernetes.client.utils.ImpersonatorInterceptor.intercept(ImpersonatorInterceptor.java:66)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at io.fabric8.kubernetes.client.utils.HttpClientUtils$2.intercept(HttpClientUtils.java:109)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:185)
	at okhttp3.RealCall$AsyncCall.execute(RealCall.java:135)
	at okhttp3.internal.NamedRunnable.run(NamedRunnable.java:32)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
	at sun.security.validator.PKIXValidator.doBuild(PKIXValidator.java:387)
	at sun.security.validator.PKIXValidator.engineValidate(PKIXValidator.java:292)
	at sun.security.validator.Validator.validate(Validator.java:260)
	at sun.security.ssl.X509TrustManagerImpl.validate(X509TrustManagerImpl.java:324)
	at sun.security.ssl.X509TrustManagerImpl.checkTrusted(X509TrustManagerImpl.java:229)
	at sun.security.ssl.X509TrustManagerImpl.checkServerTrusted(X509TrustManagerImpl.java:124)
	at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1491)
	... 39 more
Caused by: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
	at sun.security.provider.certpath.SunCertPathBuilder.build(SunCertPathBuilder.java:141)
	at sun.security.provider.certpath.SunCertPathBuilder.engineBuild(SunCertPathBuilder.java:126)
	at java.security.cert.CertPathBuilder.build(CertPathBuilder.java:280)
	at sun.security.validator.PKIXValidator.doBuild(PKIXValidator.java:382)
	... 45 more
Exception in thread "kubernetes-dispatcher-0" Exception in thread "main" java.util.concurrent.RejectedExecutionException: Task java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask611a9c09 rejected from java.util.concurrent.ScheduledThreadPoolExecutor404819e4[Terminated, pool size = 0, active threads = 0, queued tasks = 0, completed tasks = 0]
	at java.util.concurrent.ThreadPoolExecutor$AbortPolicy.rejectedExecution(ThreadPoolExecutor.java:2047)
	at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:823)
	at java.util.concurrent.ScheduledThreadPoolExecutor.delayedExecute(ScheduledThreadPoolExecutor.java:326)
	at java.util.concurrent.ScheduledThreadPoolExecutor.schedule(ScheduledThreadPoolExecutor.java:533)
	at java.util.concurrent.ScheduledThreadPoolExecutor.submit(ScheduledThreadPoolExecutor.java:632)
	at java.util.concurrent.Executors$DelegatedExecutorService.submit(Executors.java:678)
	at io.fabric8.kubernetes.client.dsl.internal.WatchConnectionManager.scheduleReconnect(WatchConnectionManager.java:300)
	at io.fabric8.kubernetes.client.dsl.internal.WatchConnectionManager.access$800(WatchConnectionManager.java:48)
	at io.fabric8.kubernetes.client.dsl.internal.WatchConnectionManager$2.onFailure(WatchConnectionManager.java:213)
	at okhttp3.internal.ws.RealWebSocket.failWebSocket(RealWebSocket.java:543)
	at okhttp3.internal.ws.RealWebSocket$2.onFailure(RealWebSocket.java:208)
	at okhttp3.RealCall$AsyncCall.execute(RealCall.java:148)
	at okhttp3.internal.NamedRunnable.run(NamedRunnable.java:32)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
io.fabric8.kubernetes.client.KubernetesClientException: Failed to start websocket
	at io.fabric8.kubernetes.client.dsl.internal.WatchConnectionManager$2.onFailure(WatchConnectionManager.java:204)
	at okhttp3.internal.ws.RealWebSocket.failWebSocket(RealWebSocket.java:543)
	at okhttp3.internal.ws.RealWebSocket$2.onFailure(RealWebSocket.java:208)
	at okhttp3.RealCall$AsyncCall.execute(RealCall.java:148)
	at okhttp3.internal.NamedRunnable.run(NamedRunnable.java:32)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
	at sun.security.ssl.Alerts.getSSLException(Alerts.java:192)
	at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1949)
	at sun.security.ssl.Handshaker.fatalSE(Handshaker.java:302)
	at sun.security.ssl.Handshaker.fatalSE(Handshaker.java:296)
	at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1509)
	at sun.security.ssl.ClientHandshaker.processMessage(ClientHandshaker.java:216)
	at sun.security.ssl.Handshaker.processLoop(Handshaker.java:979)
	at sun.security.ssl.Handshaker.process_record(Handshaker.java:914)
	at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:1062)
	at sun.security.ssl.SSLSocketImpl.performInitialHandshake(SSLSocketImpl.java:1375)
	at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1403)
	at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1387)
	at okhttp3.internal.connection.RealConnection.connectTls(RealConnection.java:281)
	at okhttp3.internal.connection.RealConnection.establishProtocol(RealConnection.java:251)
	at okhttp3.internal.connection.RealConnection.connect(RealConnection.java:151)
	at okhttp3.internal.connection.StreamAllocation.findConnection(StreamAllocation.java:195)
	at okhttp3.internal.connection.StreamAllocation.findHealthyConnection(StreamAllocation.java:121)
	at okhttp3.internal.connection.StreamAllocation.newStream(StreamAllocation.java:100)
	at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:42)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:120)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at io.fabric8.kubernetes.client.utils.BackwardsCompatibilityInterceptor.intercept(BackwardsCompatibilityInterceptor.java:119)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at io.fabric8.kubernetes.client.utils.ImpersonatorInterceptor.intercept(ImpersonatorInterceptor.java:66)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at io.fabric8.kubernetes.client.utils.HttpClientUtils$2.intercept(HttpClientUtils.java:109)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
	at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
	at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:185)
	at okhttp3.RealCall$AsyncCall.execute(RealCall.java:135)
	... 4 more
Caused by: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
	at sun.security.validator.PKIXValidator.doBuild(PKIXValidator.java:387)
	at sun.security.validator.PKIXValidator.engineValidate(PKIXValidator.java:292)
	at sun.security.validator.Validator.validate(Validator.java:260)
	at sun.security.ssl.X509TrustManagerImpl.validate(X509TrustManagerImpl.java:324)
	at sun.security.ssl.X509TrustManagerImpl.checkTrusted(X509TrustManagerImpl.java:229)
	at sun.security.ssl.X509TrustManagerImpl.checkServerTrusted(X509TrustManagerImpl.java:124)
	at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1491)
	... 39 more
Caused by: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
	at sun.security.provider.certpath.SunCertPathBuilder.build(SunCertPathBuilder.java:141)
	at sun.security.provider.certpath.SunCertPathBuilder.engineBuild(SunCertPathBuilder.java:126)
	at java.security.cert.CertPathBuilder.build(CertPathBuilder.java:280)
	at sun.security.validator.PKIXValidator.doBuild(PKIXValidator.java:382)
	... 45 more
18/10/31 12:02:31 INFO ShutdownHookManager: Shutdown hook called
18/10/31 12:02:31 INFO ShutdownHookManager: Deleting directory /private/var/folders/6b/y1010qp107j9w2dhhy8csvz0000xq3/T/spark-5e649891-8a0f-4f17-bf3a-33b34082eba8
```

Suggested reviews: mccheah liyinan926 - this is the follow up fix to the bug discovered while working on SPARK-25809 (PR #22805)

Closes #22904 from rvesse/SPARK-25887.

Authored-by: Rob Vesse <rvesse@dotnetrdf.org>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 docs/running-on-kubernetes.md                 | 24 +++++++++++++++++--
 .../org/apache/spark/deploy/k8s/Config.scala  | 12 ++++++++++
 .../k8s/SparkKubernetesClientFactory.scala    | 19 ++++++++++-----
 3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 3453ee912205..e23f28cf755c 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -334,6 +334,16 @@ the Spark application.
 
 ## Kubernetes Features
 
+### Configuration File
+
+Your Kubernetes config file typically lives under `.kube/config` in your home directory or in a location specified by the `KUBECONFIG` environment variable.  Spark on Kubernetes will attempt to use this file to do an initial auto-configuration of the Kubernetes client used to interact with the Kubernetes cluster.  A variety of Spark configuration properties are provided that allow further customising the client configuration e.g. using an alternative authentication method.
+
+### Contexts
+
+Kubernetes configuration files can contain multiple contexts that allow for switching between different clusters and/or user identities.  By default Spark on Kubernetes will use your current context (which can be checked by running `kubectl config current-context`) when doing the initial auto-configuration of the Kubernetes client.  
+
+In order to use an alternative context users can specify the desired context via the Spark configuration property `spark.kubernetes.context` e.g. `spark.kubernetes.context=minikube`.
+
 ### Namespaces
 
 Kubernetes has the concept of [namespaces](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
@@ -406,13 +416,23 @@ Some of these include:
 
 # Configuration
 
-See the [configuration page](configuration.html) for information on Spark configurations.  The following configurations are
-specific to Spark on Kubernetes.
+See the [configuration page](configuration.html) for information on Spark configurations.  The following configurations are specific to Spark on Kubernetes.
 
 #### Spark Properties
 
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.kubernetes.context</code></td>
+  <td><code>(none)</code></td>
+  <td>
+    The context from the user Kubernetes configuration file used for the initial 
+    auto-configuration of the Kubernetes client library.  When not specified then
+    the users current context is used.  <strong>NB:</strong> Many of the 
+    auto-configured settings can be overridden by the use of other Spark 
+    configuration properties e.g. <code>spark.kubernetes.namespace</code>.
+  </td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.namespace</code></td>
   <td><code>default</code></td>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index e8bf16df190e..4cca1e22bd10 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -24,6 +24,18 @@ import org.apache.spark.internal.config.ConfigBuilder
 
 private[spark] object Config extends Logging {
 
+  val KUBERNETES_CONTEXT =
+    ConfigBuilder("spark.kubernetes.context")
+      .doc("The desired context from your K8S config file used to configure the K8S " +
+        "client for interacting with the cluster.  Useful if your config file has " +
+        "multiple clusters or user identities defined.  The client library used " +
+        "locates the config file via the KUBECONFIG environment variable or by defaulting " +
+        "to .kube/config under your home directory.  If not specified then your current " +
+        "context is used.  You can always override specific aspects of the config file " +
+        "provided configuration using other Spark on K8S configuration options.")
+      .stringConf
+      .createOptional
+
   val KUBERNETES_NAMESPACE =
     ConfigBuilder("spark.kubernetes.namespace")
       .doc("The namespace that will be used for running the driver and executor pods.")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
index 77bd66b608e7..06dea42cc135 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
@@ -21,11 +21,13 @@ import java.io.File
 import com.google.common.base.Charsets
 import com.google.common.io.Files
 import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient}
+import io.fabric8.kubernetes.client.Config.autoConfigure
 import io.fabric8.kubernetes.client.utils.HttpClientUtils
 import okhttp3.Dispatcher
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.internal.Logging
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -33,7 +35,7 @@ import org.apache.spark.util.ThreadUtils
  * parse configuration keys, similar to the manner in which Spark's SecurityManager parses SSL
  * options for different components.
  */
-private[spark] object SparkKubernetesClientFactory {
+private[spark] object SparkKubernetesClientFactory extends Logging {
 
   def createKubernetesClient(
       master: String,
@@ -42,9 +44,6 @@ private[spark] object SparkKubernetesClientFactory {
       sparkConf: SparkConf,
       defaultServiceAccountToken: Option[File],
       defaultServiceAccountCaCert: Option[File]): KubernetesClient = {
-
-    // TODO [SPARK-25887] Support configurable context
-
     val oauthTokenFileConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_FILE_CONF_SUFFIX"
     val oauthTokenConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_CONF_SUFFIX"
     val oauthTokenFile = sparkConf.getOption(oauthTokenFileConf)
@@ -67,8 +66,16 @@ private[spark] object SparkKubernetesClientFactory {
     val dispatcher = new Dispatcher(
       ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher"))
 
-    // TODO [SPARK-25887] Create builder in a way that respects configurable context
-    val config = new ConfigBuilder()
+    // Allow for specifying a context used to auto-configure from the users K8S config file
+    val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(_.nonEmpty)
+    logInfo("Auto-configuring K8S client using " +
+      kubeContext.map("context " + _).getOrElse("current context") +
+      " from users K8S config file")
+
+    // Start from an auto-configured config with the desired context
+    // Fabric 8 uses null to indicate that the users current context should be used so if no
+    // explicit setting pass null
+    val config = new ConfigBuilder(autoConfigure(kubeContext.getOrElse(null)))
       .withApiVersion("v1")
       .withMasterUrl(master)
       .withWebsocketPingInterval(0)

From dc2da72100811988ee1b31190f219b620f88f8de Mon Sep 17 00:00:00 2001
From: Rob Vesse <rvesse@dotnetrdf.org>
Date: Tue, 22 Jan 2019 10:31:17 -0800
Subject: [PATCH 0338/1072] [SPARK-26685][K8S] Correct placement of ARG
 declaration

Latest Docker releases are stricter in their enforcement of build argument scope.  The location of the `ARG spark_uid` declaration in the Python and R Dockerfiles means the variable is out of scope by the time it is used in a `USER` declaration resulting in a container running as root rather than the default/configured UID.

Also with some of the refactoring of the script that has happened since my PR that introduced the configurable UID it turns out the `-u <uid>` argument is not being properly passed to the Python and R image builds when those are opted into

## What changes were proposed in this pull request?

This commit moves the `ARG` declaration to just before the argument is used such that it is in scope.  It also ensures that Python and R image builds receive the build arguments that include the `spark_uid` argument where relevant

## How was this patch tested?

Prior to the patch images are produced where the Python and R images ignore the default/configured UID:

```
> docker run -it --entrypoint /bin/bash rvesse/spark-py:uid456
bash-4.4# whoami
root
bash-4.4# id -u
0
bash-4.4# exit
> docker run -it --entrypoint /bin/bash rvesse/spark:uid456
bash-4.4$ id -u
456
bash-4.4$ exit
```

Note that the Python image is still running as `root` having ignored the configured UID of 456 while the base image has the correct UID because the relevant `ARG` declaration is correctly in scope.

After the patch the correct UID is observed:

```
> docker run -it --entrypoint /bin/bash rvesse/spark-r:uid456
bash-4.4$ id -u
456
bash-4.4$ exit
exit
> docker run -it --entrypoint /bin/bash rvesse/spark-py:uid456
bash-4.4$ id -u
456
bash-4.4$ exit
exit
> docker run -it --entrypoint /bin/bash rvesse/spark:uid456
bash-4.4$ id -u
456
bash-4.4$ exit
```

Closes #23611 from rvesse/SPARK-26685.

Authored-by: Rob Vesse <rvesse@dotnetrdf.org>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 bin/docker-image-tool.sh                                       | 3 ++-
 .../docker/src/main/dockerfiles/spark/bindings/R/Dockerfile    | 2 +-
 .../src/main/dockerfiles/spark/bindings/python/Dockerfile      | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index 4f66137eb1c7..efaf09e74c40 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -154,10 +154,11 @@ function build {
   fi
 
   local BINDING_BUILD_ARGS=(
-    ${BUILD_PARAMS}
+    ${BUILD_ARGS[@]}
     --build-arg
     base_img=$(image_ref spark)
   )
+
   local BASEDOCKERFILE=${BASEDOCKERFILE:-"kubernetes/dockerfiles/spark/Dockerfile"}
   local PYDOCKERFILE=${PYDOCKERFILE:-false}
   local RDOCKERFILE=${RDOCKERFILE:-false}
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
index 9ded57c65510..34d449c9f08b 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
@@ -16,7 +16,6 @@
 #
 
 ARG base_img
-ARG spark_uid=185
 
 FROM $base_img
 WORKDIR /
@@ -35,4 +34,5 @@ WORKDIR /opt/spark/work-dir
 ENTRYPOINT [ "/opt/entrypoint.sh" ]
 
 # Specify the User that the actual main process will run as
+ARG spark_uid=185
 USER ${spark_uid}
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
index 36b91eb9a3aa..5044900d1d8a 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -16,7 +16,6 @@
 #
 
 ARG base_img
-ARG spark_uid=185
 
 FROM $base_img
 WORKDIR /
@@ -46,4 +45,5 @@ WORKDIR /opt/spark/work-dir
 ENTRYPOINT [ "/opt/entrypoint.sh" ]
 
 # Specify the User that the actual main process will run as
+ARG spark_uid=185
 USER ${spark_uid}

From 02d8ae3d598f201c8f614c8af5f0d94470e98e98 Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Tue, 22 Jan 2019 13:55:41 -0800
Subject: [PATCH 0339/1072] [SPARK-26661][SQL] Show actual class name of the
 writing command in CTAS explain

## What changes were proposed in this pull request?

The explain output of the Hive CTAS command, regardless of whether it's actually writing via Hive's SerDe or converted into using Spark's data source, would always show that it's using `InsertIntoHiveTable` because it's hardcoded.

e.g.
```
Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, TableName: foo, InsertIntoHiveTable]
```
This CTAS is converted into using Spark's data source, but it still says `InsertIntoHiveTable` in the explain output.

It's better to show the actual class name of the writing command used. For the example above, it'd be:
```
Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, TableName: foo, InsertIntoHadoopFsRelationCommand]
```

## How was this patch tested?

Added test case in `HiveExplainSuite`

Closes #23582 from rednaxelafx/fix-explain-1.

Authored-by: Kris Mok <kris.mok@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../CreateHiveTableAsSelectCommand.scala      | 15 +++++++++--
 .../sql/hive/execution/HiveExplainSuite.scala | 25 +++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 7249eacfbf9a..9f79997e2979 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.{DataWritingCommand, DDLUtils}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
 import org.apache.spark.sql.hive.HiveSessionCatalog
+import org.apache.spark.util.Utils
 
 trait CreateHiveTableAsSelectBase extends DataWritingCommand {
   val tableDesc: CatalogTable
@@ -83,10 +84,14 @@ trait CreateHiveTableAsSelectBase extends DataWritingCommand {
     tableDesc: CatalogTable,
     tableExists: Boolean): DataWritingCommand
 
+  // A subclass should override this with the Class name of the concrete type expected to be
+  // returned from `getWritingCommand`.
+  def writingCommandClassName: String
+
   override def argString(maxFields: Int): String = {
-    s"[Database:${tableDesc.database}, " +
+    s"[Database: ${tableDesc.database}, " +
     s"TableName: ${tableDesc.identifier.table}, " +
-    s"InsertIntoHiveTable]"
+    s"${writingCommandClassName}]"
   }
 }
 
@@ -118,6 +123,9 @@ case class CreateHiveTableAsSelectCommand(
       ifPartitionNotExists = false,
       outputColumnNames = outputColumnNames)
   }
+
+  override def writingCommandClassName: String =
+    Utils.getSimpleName(classOf[InsertIntoHiveTable])
 }
 
 /**
@@ -162,4 +170,7 @@ case class OptimizedCreateHiveTableAsSelectCommand(
       Some(hadoopRelation.location),
       query.output.map(_.name))
   }
+
+  override def writingCommandClassName: String =
+    Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand])
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index c349a327694b..d413dfb2b2dc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -20,9 +20,13 @@ package org.apache.spark.sql.hive.execution
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
+import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.util.Utils
 
 /**
  * A set of tests that validates support for Hive Explain command.
@@ -182,4 +186,25 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       assert(output.toString.contains(s"Scan hive default.$tableName"))
     }
   }
+
+  test("SPARK-26661: Show actual class name of the writing command in CTAS explain") {
+    Seq(true, false).foreach { convertCTAS =>
+      withSQLConf(
+          HiveUtils.CONVERT_METASTORE_CTAS.key -> convertCTAS.toString,
+          HiveUtils.CONVERT_METASTORE_PARQUET.key -> convertCTAS.toString) {
+
+        val df = sql(s"EXPLAIN CREATE TABLE tab1 STORED AS PARQUET AS SELECT * FROM range(2)")
+        val keywords = if (convertCTAS) {
+          Seq(
+            s"Execute ${Utils.getSimpleName(classOf[OptimizedCreateHiveTableAsSelectCommand])}",
+            Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand]))
+        } else {
+          Seq(
+            s"Execute ${Utils.getSimpleName(classOf[CreateHiveTableAsSelectCommand])}",
+            Utils.getSimpleName(classOf[InsertIntoHiveTable]))
+        }
+        checkKeywordsExist(df, keywords: _*)
+      }
+    }
+  }
 }

From 9d2a11554b0b2ac27bd506e6941ef8d339756db2 Mon Sep 17 00:00:00 2001
From: Darcy Shen <sadhen@zoho.com>
Date: Tue, 22 Jan 2019 18:27:24 -0600
Subject: [PATCH 0340/1072] [MINOR][DOC] Documentation on JVM options for SBT

## What changes were proposed in this pull request?

Documentation and .gitignore

## How was this patch tested?

Manual test that SBT honors the settings in .jvmopts if present

Closes #23615 from sadhen/impr/gitignore.

Authored-by: Darcy Shen <sadhen@zoho.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .gitignore             | 3 +++
 docs/building-spark.md | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/.gitignore b/.gitignore
index e4c44d0590d5..d5cf66d1db12 100644
--- a/.gitignore
+++ b/.gitignore
@@ -94,3 +94,6 @@ spark-warehouse/
 *.Rproj.*
 
 .Rproj.user
+
+# For SBT
+.jvmopts
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 55695f35931c..b16083f9f828 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -138,6 +138,14 @@ To avoid the overhead of launching sbt each time you need to re-compile, you can
 in interactive mode by running `build/sbt`, and then run all build commands at the command
 prompt.
 
+### Setting up SBT's Memory Usage
+Configure the JVM options for SBT in `.jvmopts` at the project root, for example:
+
+    -Xmx2g
+    -XX:ReservedCodeCacheSize=512m
+
+For the meanings of these two options, please carefully read the [Setting up Maven's Memory Usage section](http://spark.apache.org/docs/latest/building-spark.html#setting-up-mavens-memory-usage).
+
 ## Speeding up Compilation
 
 Developers who compile Spark frequently may want to speed up compilation; e.g., by using Zinc

From 0d35f9ea3a73efd858f9e1a70fd13813ffaa344d Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Tue, 22 Jan 2019 18:29:18 -0600
Subject: [PATCH 0341/1072] [SPARK-24484][MLLIB] Power Iteration Clustering is
 giving incorrect clustering results when there are mutiple leading eigen
 values.

## What changes were proposed in this pull request?
![image](https://user-images.githubusercontent.com/23054875/41823325-e83e1d34-781b-11e8-8c34-fc6e7a042f3f.png)

![image](https://user-images.githubusercontent.com/23054875/41823367-733c9ba4-781c-11e8-8da2-b26460c2af63.png)
![image](https://user-images.githubusercontent.com/23054875/41823409-179dd910-781d-11e8-8d8c-9865156fad15.png)

**Method to determine if the top eigen values has same magnitude but opposite signs**
The vector is written as a linear combination of the eigen vectors at iteration k.
![image](https://user-images.githubusercontent.com/23054875/41822941-f8b13d4c-7814-11e8-8091-54c02721c1c5.png)
![image](https://user-images.githubusercontent.com/23054875/41822982-b80a6fc4-7815-11e8-9129-ed96a14f037f.png)
![image](https://user-images.githubusercontent.com/23054875/41823022-5b69e906-7816-11e8-847a-8fa5f0b6200e.png)

![image](https://user-images.githubusercontent.com/23054875/41823087-54311398-7817-11e8-90bf-e1be2bbff323.png)
![image](https://user-images.githubusercontent.com/23054875/41823121-e0b78324-7817-11e8-9596-379bd2e518af.png)
![image](https://user-images.githubusercontent.com/23054875/41823151-965319d2-7818-11e8-8b91-10f6276ace62.png)
![image](https://user-images.githubusercontent.com/23054875/41823182-75cdbad6-7819-11e8-912f-23c66a8359de.png)
![image](https://user-images.githubusercontent.com/23054875/41823221-1ca77a36-781a-11e8-9a40-48bd165797cc.png)
![image](https://user-images.githubusercontent.com/23054875/41823272-f6962b2a-781a-11e8-9978-1b2dc0dc8b2c.png)
![image](https://user-images.githubusercontent.com/23054875/41823303-75b296f0-781b-11e8-8501-6133b04769c8.png)

**So, we need to check if the reileigh coefficient at the convergence is lesser than the norm of the estimated eigen vector before normalizing**

(Please fill in changes proposed in this fix)
Added a UT

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #21627 from shahidki31/picConvergence.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../clustering/PowerIterationClustering.scala | 21 ++++++
 .../PowerIterationClusteringSuite.scala       | 68 +++++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index 9444f29a91ed..765f2727b4fe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -378,6 +378,27 @@ object PowerIterationClustering extends Logging {
       logInfo(s"$msgPrefix: delta = $delta.")
       diffDelta = math.abs(delta - prevDelta)
       logInfo(s"$msgPrefix: diff(delta) = $diffDelta.")
+
+      if (math.abs(diffDelta) < tol) {
+        /**
+         * Power Iteration fails to converge if absolute value of top 2 eigen values are equal,
+         * but with opposite sign. The resultant vector flip-flops between two vectors.
+         * We should give an exception, if we detect the failure of the convergence of the
+         * power iteration
+         */
+
+        // Rayleigh quotient = x^tAx / x^tx
+        val xTAx = curG.joinVertices(v) {
+          case (_, x, y) => x * y
+        }.vertices.values.sum()
+        val xTx = curG.vertices.mapValues(x => x * x).values.sum()
+        val rayleigh = xTAx / xTx
+
+        if (math.abs(norm - math.abs(rayleigh)) > tol) {
+          logWarning(s"Power Iteration fail to converge. delta = ${delta}," +
+            s" difference delta = ${diffDelta} and norm = ${norm}")
+        }
+      }
       // update v
       curG = Graph(VertexRDD(v1), g.edges)
       prevDelta = delta
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
index 0ba3ffabb75d..97269eea5b83 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
@@ -184,6 +184,74 @@ class PowerIterationClusteringSuite extends SparkFunSuite
     assert(localAssignments === localAssignments2)
   }
 
+  test("power iteration clustering gives incorrect results due to failed to converge") {
+    /*
+         Graph:
+            1
+           /
+          /
+         0      2 -- 3
+     */
+    val data1 = spark.createDataFrame(Seq(
+      (0, 1),
+      (2, 3)
+    )).toDF("src", "dst")
+
+    val assignments1 = new PowerIterationClustering()
+      .setInitMode("random")
+      .setK(2)
+      .assignClusters(data1)
+      .select("id", "cluster")
+      .as[(Long, Int)]
+      .collect()
+    val predictions1 = Array.fill(2)(mutable.Set.empty[Long])
+    assignments1.foreach {
+      case (id, cluster) => predictions1(cluster) += id
+    }
+    assert(Set(predictions1(0).size, predictions1(1).size) !== Set(2, 2))
+
+
+    /*
+         Graph:
+            1
+           /
+          /
+         0 - - 2     3 -- 4
+     */
+    val data2 = spark.createDataFrame(Seq(
+      (0, 1),
+      (0, 2),
+      (3, 4)
+    )).toDF("src", "dst")
+
+    var assignments2 = new PowerIterationClustering()
+      .setInitMode("random")
+      .setK(2)
+      .assignClusters(data2)
+      .select("id", "cluster")
+      .as[(Long, Int)]
+      .collect()
+    val predictions2 = Array.fill(2)(mutable.Set.empty[Long])
+    assignments2.foreach {
+      case (id, cluster) => predictions2(cluster) += id
+    }
+    assert(Set(predictions2(0).size, predictions2(1).size) !== Set(2, 3))
+
+
+    var assignments3 = new PowerIterationClustering()
+      .setInitMode("degree")
+      .setK(2)
+      .assignClusters(data2)
+      .select("id", "cluster")
+      .as[(Long, Int)]
+      .collect()
+    val predictions3 = Array.fill(2)(mutable.Set.empty[Long])
+    assignments3.foreach {
+      case (id, cluster) => predictions3(cluster) += id
+    }
+    assert(Set(predictions3(0).size, predictions3(1).size) !== Set(2, 3))
+  }
+
   test("read/write") {
     val t = new PowerIterationClustering()
       .setK(4)

From 6dcad38ba3393188084f378b7ff6dfc12b685b13 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Tue, 22 Jan 2019 19:22:06 -0600
Subject: [PATCH 0342/1072] [SPARK-26228][MLLIB] OOM issue encountered when
 computing Gramian matrix

## What changes were proposed in this pull request?

Avoid memory problems in closure cleaning when handling large Gramians (>= 16K rows/cols) by using null as zeroValue

## How was this patch tested?

Existing tests.
Note that it's hard to test the case that triggers this issue as it would require a large amount of memory and run a while. I confirmed locally that a 16K x 16K Gramian failed with tons of driver memory before, and didn't fail upfront after this change.

Closes #23600 from srowen/SPARK-26228.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../mllib/linalg/distributed/RowMatrix.scala  | 40 ++++++++++++++++---
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index ff02e5dd3c25..56caeac05c0c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -119,11 +119,25 @@ class RowMatrix @Since("1.0.0") (
     val nt = if (n % 2 == 0) ((n / 2) * (n + 1)) else (n * ((n + 1) / 2))
 
     // Compute the upper triangular part of the gram matrix.
-    val GU = rows.treeAggregate(new BDV[Double](nt))(
-      seqOp = (U, v) => {
+    val GU = rows.treeAggregate(null.asInstanceOf[BDV[Double]])(
+      seqOp = (maybeU, v) => {
+        val U =
+          if (maybeU == null) {
+            new BDV[Double](nt)
+          } else {
+            maybeU
+          }
         BLAS.spr(1.0, v, U.data)
         U
-      }, combOp = (U1, U2) => U1 += U2)
+      }, combOp = (U1, U2) =>
+        if (U1 == null) {
+          U2
+        } else if (U2 == null) {
+          U1
+        } else {
+          U1 += U2
+        }
+    )
 
     RowMatrix.triuToFull(n, GU.data)
   }
@@ -136,8 +150,14 @@ class RowMatrix @Since("1.0.0") (
     // This succeeds when n <= 65535, which is checked above
     val nt = if (n % 2 == 0) ((n / 2) * (n + 1)) else (n * ((n + 1) / 2))
 
-    val MU = rows.treeAggregate(new BDV[Double](nt))(
-      seqOp = (U, v) => {
+    val MU = rows.treeAggregate(null.asInstanceOf[BDV[Double]])(
+      seqOp = (maybeU, v) => {
+        val U =
+          if (maybeU == null) {
+            new BDV[Double](nt)
+          } else {
+            maybeU
+          }
 
         val n = v.size
         val na = Array.ofDim[Double](n)
@@ -150,7 +170,15 @@ class RowMatrix @Since("1.0.0") (
 
         BLAS.spr(1.0, new DenseVector(na), U.data)
         U
-      }, combOp = (U1, U2) => U1 += U2)
+      }, combOp = (U1, U2) =>
+        if (U1 == null) {
+          U2
+        } else if (U2 == null) {
+          U1
+        } else {
+          U1 += U2
+        }
+    )
 
     bc.destroy()
 

From 3da71f2da192276af041024b73e85e0acaac66a4 Mon Sep 17 00:00:00 2001
From: Ngone51 <ngone_5451@163.com>
Date: Wed, 23 Jan 2019 10:23:40 +0800
Subject: [PATCH 0343/1072] [SPARK-22465][CORE][FOLLOWUP] Use existing
 partitioner when defaultNumPartitions is equal to
 maxPartitioner.numPartitions

## What changes were proposed in this pull request?

Followup of #20091. We could also use existing partitioner when defaultNumPartitions is equal to the maxPartitioner's numPartitions.

## How was this patch tested?

Existed.

Closes #23581 from Ngone51/dev-use-existing-partitioner-when-defaultNumPartitions-equalTo-MaxPartitioner#-numPartitions.

Authored-by: Ngone51 <ngone_5451@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 core/src/main/scala/org/apache/spark/Partitioner.scala | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 515237558fd8..a0cba8ab13fe 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -51,8 +51,8 @@ object Partitioner {
    *
    * When available, we choose the partitioner from rdds with maximum number of partitions. If this
    * partitioner is eligible (number of partitions within an order of maximum number of partitions
-   * in rdds), or has partition number higher than default partitions number - we use this
-   * partitioner.
+   * in rdds), or has partition number higher than or equal to default partitions number - we use
+   * this partitioner.
    *
    * Otherwise, we'll use a new HashPartitioner with the default partitions number.
    *
@@ -79,9 +79,9 @@ object Partitioner {
     }
 
     // If the existing max partitioner is an eligible one, or its partitions number is larger
-    // than the default number of partitions, use the existing partitioner.
+    // than or equal to the default number of partitions, use the existing partitioner.
     if (hasMaxPartitioner.nonEmpty && (isEligiblePartitioner(hasMaxPartitioner.get, rdds) ||
-        defaultNumPartitions < hasMaxPartitioner.get.getNumPartitions)) {
+        defaultNumPartitions <= hasMaxPartitioner.get.getNumPartitions)) {
       hasMaxPartitioner.get.partitioner.get
     } else {
       new HashPartitioner(defaultNumPartitions)

From 1ed1b4d8e1a5b9ca0ec8b15f36542d7a63eebf94 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 23 Jan 2019 15:33:02 +0800
Subject: [PATCH 0344/1072] [SPARK-26637][SQL] Makes GetArrayItem nullability
 more precise

## What changes were proposed in this pull request?
In the master, GetArrayItem nullable is always true;
https://github.com/apache/spark/blob/cf133e611020ed178f90358464a1b88cdd9b7889/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala#L236

But, If input array size is constant and ordinal is foldable, we could make GetArrayItem nullability more precise. This pr added code to make `GetArrayItem` nullability more precise.

## How was this patch tested?
Added tests in `ComplexTypeSuite`.

Closes #23566 from maropu/GetArrayItemNullability.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/complexTypeExtractors.scala   | 15 ++++++++-
 .../expressions/ComplexTypeSuite.scala        | 33 +++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 8994eeff92c7..104ad98ca099 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -233,7 +233,20 @@ case class GetArrayItem(child: Expression, ordinal: Expression)
   override def right: Expression = ordinal
 
   /** `Null` is returned for invalid ordinals. */
-  override def nullable: Boolean = true
+  override def nullable: Boolean = if (ordinal.foldable && !ordinal.nullable) {
+    val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue()
+    child match {
+      case CreateArray(ar) if intOrdinal < ar.length =>
+        ar(intOrdinal).nullable
+      case GetArrayStructFields(CreateArray(elements), field, _, _, _)
+          if intOrdinal < elements.length =>
+        elements(intOrdinal).nullable || field.nullable
+      case _ =>
+        true
+    }
+  } else {
+    true
+  }
 
   override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index dc6046481504..d8d65715281d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -59,6 +59,39 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(GetArrayItem(nestedArray, Literal(0)), Seq(1))
   }
 
+  test("SPARK-26637 handles GetArrayItem nullability correctly when input array size is constant") {
+    // CreateArray case
+    val a = AttributeReference("a", IntegerType, nullable = false)()
+    val b = AttributeReference("b", IntegerType, nullable = true)()
+    val array = CreateArray(a :: b :: Nil)
+    assert(!GetArrayItem(array, Literal(0)).nullable)
+    assert(GetArrayItem(array, Literal(1)).nullable)
+    assert(!GetArrayItem(array, Subtract(Literal(2), Literal(2))).nullable)
+    assert(GetArrayItem(array, AttributeReference("ordinal", IntegerType)()).nullable)
+
+    // GetArrayStructFields case
+    val f1 = StructField("a", IntegerType, nullable = false)
+    val f2 = StructField("b", IntegerType, nullable = true)
+    val structType = StructType(f1 :: f2 :: Nil)
+    val c = AttributeReference("c", structType, nullable = false)()
+    val inputArray1 = CreateArray(c :: Nil)
+    val inputArray1ContainsNull = c.nullable
+    val stArray1 = GetArrayStructFields(inputArray1, f1, 0, 2, inputArray1ContainsNull)
+    assert(!GetArrayItem(stArray1, Literal(0)).nullable)
+    val stArray2 = GetArrayStructFields(inputArray1, f2, 1, 2, inputArray1ContainsNull)
+    assert(GetArrayItem(stArray2, Literal(0)).nullable)
+
+    val d = AttributeReference("d", structType, nullable = true)()
+    val inputArray2 = CreateArray(c :: d :: Nil)
+    val inputArray2ContainsNull = c.nullable || d.nullable
+    val stArray3 = GetArrayStructFields(inputArray2, f1, 0, 2, inputArray2ContainsNull)
+    assert(!GetArrayItem(stArray3, Literal(0)).nullable)
+    assert(GetArrayItem(stArray3, Literal(1)).nullable)
+    val stArray4 = GetArrayStructFields(inputArray2, f2, 1, 2, inputArray2ContainsNull)
+    assert(GetArrayItem(stArray4, Literal(0)).nullable)
+    assert(GetArrayItem(stArray4, Literal(1)).nullable)
+  }
+
   test("GetMapValue") {
     val typeM = MapType(StringType, StringType)
     val map = Literal.create(Map("a" -> "b"), typeM)

From 46d5bb9a0fa4f44ea857e2cb15bf15acd773b839 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Wed, 23 Jan 2019 20:23:17 +0800
Subject: [PATCH 0345/1072] [SPARK-26653][SQL] Use Proleptic Gregorian calendar
 in parsing JDBC lower/upper bounds

## What changes were proposed in this pull request?

In the PR, I propose using of the `stringToDate` and `stringToTimestamp` methods in parsing JDBC lower/upper bounds of the partition column if it has `DateType` or `TimestampType`. Since those methods have been ported on Proleptic Gregorian calendar by #23512, the PR switches parsing of JDBC bounds of the partition column on the calendar as well.

## How was this patch tested?

This was tested by `JDBCSuite`.

Closes #23597 from MaxGekk/jdbc-parse-timestamp-bounds.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |  2 ++
 .../datasources/jdbc/JDBCRelation.scala       | 27 ++++++++++-----
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 34 ++++++++++++++++++-
 3 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index d442087721cb..98fc9faca422 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -93,6 +93,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, the `weekofyear`, `weekday` and `dayofweek` functions use java.time API for calculation week number of year and day number of week based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, the hybrid calendar (Julian + Gregorian) is used for the same purpose. Results of the functions returned by Spark 3.0 and previous versions can be different for dates before October 15, 1582 (Gregorian).
 
+  - Since Spark 3.0, the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 13ed105004d7..c0f78b527388 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.jdbc
 
-import java.sql.{Date, Timestamp}
-
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.Partition
@@ -27,10 +25,12 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getTimeZone, stringToDate, stringToTimestamp}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.jdbc.JdbcDialects
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{DataType, DateType, NumericType, StructType, TimestampType}
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * Instructions on how to partition the table among workers.
@@ -85,8 +85,8 @@ private[sql] object JDBCRelation extends Logging {
         val (column, columnType) = verifyAndGetNormalizedPartitionColumn(
           schema, partitionColumn.get, resolver, jdbcOptions)
 
-        val lowerBoundValue = toInternalBoundValue(lowerBound.get, columnType)
-        val upperBoundValue = toInternalBoundValue(upperBound.get, columnType)
+        val lowerBoundValue = toInternalBoundValue(lowerBound.get, columnType, timeZoneId)
+        val upperBoundValue = toInternalBoundValue(upperBound.get, columnType, timeZoneId)
         JDBCPartitioningInfo(
           column, columnType, lowerBoundValue, upperBoundValue, numPartitions.get)
       }
@@ -174,10 +174,21 @@ private[sql] object JDBCRelation extends Logging {
     (dialect.quoteIdentifier(column.name), column.dataType)
   }
 
-  private def toInternalBoundValue(value: String, columnType: DataType): Long = columnType match {
-    case _: NumericType => value.toLong
-    case DateType => DateTimeUtils.fromJavaDate(Date.valueOf(value)).toLong
-    case TimestampType => DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf(value))
+  private def toInternalBoundValue(
+      value: String,
+      columnType: DataType,
+      timeZoneId: String): Long = {
+    def parse[T](f: UTF8String => Option[T]): T = {
+      f(UTF8String.fromString(value)).getOrElse {
+        throw new IllegalArgumentException(
+          s"Cannot parse the bound value $value as ${columnType.catalogString}")
+      }
+    }
+    columnType match {
+      case _: NumericType => value.toLong
+      case DateType => parse(stringToDate).toLong
+      case TimestampType => parse(stringToTimestamp(_, getTimeZone(timeZoneId)))
+    }
   }
 
   private def toBoundValueInWhereClause(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 284900b68ae5..a4dc537d31b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -27,7 +27,7 @@ import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeTestUtils}
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -1523,4 +1523,36 @@ class JDBCSuite extends QueryTest
     assert(e.contains("The driver could not open a JDBC connection. " +
       "Check the URL: jdbc:mysql://localhost/db"))
   }
+
+  test("support casting patterns for lower/upper bounds of TimestampType") {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        Seq(
+          ("1972-07-04 03:30:00", "1972-07-15 20:50:32.5", "1972-07-27 14:11:05"),
+          ("2019-01-20 12:00:00.502", "2019-01-20 12:00:00.751", "2019-01-20 12:00:01.000"),
+          ("2019-01-20T00:00:00.123456", "2019-01-20 00:05:00.123456",
+            "2019-01-20T00:10:00.123456"),
+          ("1500-01-20T00:00:00.123456", "1500-01-20 00:05:00.123456", "1500-01-20T00:10:00.123456")
+        ).foreach { case (lower, middle, upper) =>
+          val df = spark.read.format("jdbc")
+            .option("url", urlWithUserAndPass)
+            .option("dbtable", "TEST.DATETIME")
+            .option("partitionColumn", "t")
+            .option("lowerBound", lower)
+            .option("upperBound", upper)
+            .option("numPartitions", 2)
+            .load()
+
+          df.logicalPlan match {
+            case lr: LogicalRelation if lr.relation.isInstanceOf[JDBCRelation] =>
+              val jdbcRelation = lr.relation.asInstanceOf[JDBCRelation]
+              val whereClauses = jdbcRelation.parts.map(_.asInstanceOf[JDBCPartition].whereClause)
+              assert(whereClauses.toSet === Set(
+                s""""T" < '$middle' or "T" is null""",
+                s""""T" >= '$middle'"""))
+          }
+        }
+      }
+    }
+  }
 }

From d008e23ab50637ebe54eebe8784f5410f15486cc Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 23 Jan 2019 08:50:03 -0600
Subject: [PATCH 0346/1072] [SPARK-26681][SQL] Support Ammonite inner-class
 scopes.

## What changes were proposed in this pull request?

This adds a new pattern to recognize Ammonite REPL classes and return the correct scope.

## How was this patch tested?

Manually tested with Spark in an Ammonite session.

Closes #23607 from rdblue/SPARK-26681-support-ammonite-scopes.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/catalyst/encoders/OuterScopes.scala      | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
index a1f0312bd853..665b2cd1274f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
@@ -53,6 +53,12 @@ object OuterScopes {
     val outer = outerScopes.get(outerClassName)
     if (outer == null) {
       outerClassName match {
+        case AmmoniteREPLClass(cellClassName) =>
+          () => {
+            val objClass = Utils.classForName(cellClassName)
+            val objInstance = objClass.getField("MODULE$").get(null)
+            objClass.getMethod("instance").invoke(objInstance)
+          }
         // If the outer class is generated by REPL, users don't need to register it as it has
         // only one instance and there is a way to retrieve it: get the `$read` object, call the
         // `INSTANCE()` method to get the single instance of class `$read`. Then call `$iw()`
@@ -95,4 +101,8 @@ object OuterScopes {
 
   // The format of REPL generated wrapper class's name, e.g. `$line12.$read$$iw$$iw`
   private[this] val REPLClass = """^(\$line(?:\d+)\.\$read)(?:\$\$iw)+$""".r
+
+  // The format of ammonite REPL generated wrapper class's name,
+  // e.g. `ammonite.$sess.cmd8$Helper$Foo` -> `ammonite.$sess.cmd8.instance.Foo`
+  private[this] val AmmoniteREPLClass = """^(ammonite\.\$sess\.cmd(?:\d+)\$).*""".r
 }

From 0446363ef466922a42de019ca14fada62959c1f3 Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Wed, 23 Jan 2019 08:51:39 -0600
Subject: [PATCH 0347/1072] [SPARK-26660] Add warning logs when broadcasting
 large task binary

## What changes were proposed in this pull request?

Currently, some ML library may generate large ml model, which may be referenced in the task closure, so driver will broadcasting large task binary, and executor may not able to deserialize it and result in OOM failures(for instance, executor's memory is not enough). This problem not only affects apps using ml library, some user specified closure or function which refers large data may also have this problem.

In order to facilitate the debuging of memory problem caused by large taskBinary broadcast, we can add same warning logs for it.

This PR will add some warning logs on the driver side when broadcasting a large task binary, and it also included some minor log changes in the reading of broadcast.

## How was this patch tested?
NA-Just log changes.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23580 from liupc/Add-warning-logs-for-large-taskBinary-size.

Authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/broadcast/TorrentBroadcast.scala   | 4 +++-
 .../main/scala/org/apache/spark/scheduler/DAGScheduler.scala  | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 6410866fca6a..7680587c9bfc 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -236,7 +236,9 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
               throw new SparkException(s"Failed to get locally stored broadcast data: $broadcastId")
             }
           case None =>
-            logInfo("Started reading broadcast variable " + id)
+            val estimatedTotalSize = Utils.bytesToString(numBlocks * blockSize)
+            logInfo(s"Started reading broadcast variable $id with $numBlocks pieces " +
+              s"(estimated total size $estimatedTotalSize)")
             val startTimeMs = System.currentTimeMillis()
             val blocks = readBlocks()
             logInfo("Reading broadcast variable " + id + " took" + Utils.getUsedTimeMs(startTimeMs))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index f6ade180ee25..ecb8ac012145 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1162,6 +1162,10 @@ private[spark] class DAGScheduler(
         partitions = stage.rdd.partitions
       }
 
+      if (taskBinaryBytes.length * 1000 > TaskSetManager.TASK_SIZE_TO_WARN_KB) {
+        logWarning(s"Broadcasting large task binary with size " +
+          s"${Utils.bytesToString(taskBinaryBytes.length)}")
+      }
       taskBinary = sc.broadcast(taskBinaryBytes)
     } catch {
       // In the case of a failure during serialization, abort the stage.

From 0df29bfbdc6ec7f3ba7d7b54e024059b3894da16 Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Thu, 24 Jan 2019 00:12:26 +0000
Subject: [PATCH 0348/1072] [SPARK-26706][SQL] Fix `illegalNumericPrecedence`
 for ByteType

## What changes were proposed in this pull request?

This PR contains a minor change in `Cast$mayTruncate` that fixes its logic for bytes.

Right now, `mayTruncate(ByteType, LongType)` returns `false` while `mayTruncate(ShortType, LongType)` returns `true`. Consequently, `spark.range(1, 3).as[Byte]` and `spark.range(1, 3).as[Short]` behave differently.

Potentially, this bug can silently corrupt someone's data.
```scala
// executes silently even though Long is converted into Byte
spark.range(Long.MaxValue - 10, Long.MaxValue).as[Byte]
  .map(b => b - 1)
  .show()
+-----+
|value|
+-----+
|  -12|
|  -11|
|  -10|
|   -9|
|   -8|
|   -7|
|   -6|
|   -5|
|   -4|
|   -3|
+-----+
// throws an AnalysisException: Cannot up cast `id` from bigint to smallint as it may truncate
spark.range(Long.MaxValue - 10, Long.MaxValue).as[Short]
  .map(s => s - 1)
  .show()
```
## How was this patch tested?

This PR comes with a set of unit tests.

Closes #23632 from aokolnychyi/cast-fix.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala |  2 +-
 .../sql/catalyst/expressions/CastSuite.scala  | 36 +++++++++++++++++++
 .../org/apache/spark/sql/DatasetSuite.scala   |  9 +++++
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index ff6a68b29020..a6926d8996bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -131,7 +131,7 @@ object Cast {
   private def illegalNumericPrecedence(from: DataType, to: DataType): Boolean = {
     val fromPrecedence = TypeCoercion.numericPrecedence.indexOf(from)
     val toPrecedence = TypeCoercion.numericPrecedence.indexOf(to)
-    toPrecedence > 0 && fromPrecedence > toPrecedence
+    toPrecedence >= 0 && fromPrecedence > toPrecedence
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 94dee7ea048c..11956e1b7f19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -25,6 +25,7 @@ import scala.util.Random
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -955,4 +956,39 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
     checkEvaluation(ret6, "[1, [1 -> a, 2 -> b, 3 -> c]]")
   }
+
+  test("SPARK-26706: Fix Cast.mayTruncate for bytes") {
+    assert(!Cast.mayTruncate(ByteType, ByteType))
+    assert(!Cast.mayTruncate(DecimalType.ByteDecimal, ByteType))
+    assert(Cast.mayTruncate(ShortType, ByteType))
+    assert(Cast.mayTruncate(IntegerType, ByteType))
+    assert(Cast.mayTruncate(LongType, ByteType))
+    assert(Cast.mayTruncate(FloatType, ByteType))
+    assert(Cast.mayTruncate(DoubleType, ByteType))
+    assert(Cast.mayTruncate(DecimalType.IntDecimal, ByteType))
+  }
+
+  test("canSafeCast and mayTruncate must be consistent for numeric types") {
+    import DataTypeTestUtils._
+
+    def isCastSafe(from: NumericType, to: NumericType): Boolean = (from, to) match {
+      case (_, dt: DecimalType) => dt.isWiderThan(from)
+      case (dt: DecimalType, _) => dt.isTighterThan(to)
+      case _ => numericPrecedence.indexOf(from) <= numericPrecedence.indexOf(to)
+    }
+
+    numericTypes.foreach { from =>
+      val (safeTargetTypes, unsafeTargetTypes) = numericTypes.partition(to => isCastSafe(from, to))
+
+      safeTargetTypes.foreach { to =>
+        assert(Cast.canSafeCast(from, to), s"It should be possible to safely cast $from to $to")
+        assert(!Cast.mayTruncate(from, to), s"No truncation is expected when casting $from to $to")
+      }
+
+      unsafeTargetTypes.foreach { to =>
+        assert(!Cast.canSafeCast(from, to), s"It shouldn't be possible to safely cast $from to $to")
+        assert(Cast.mayTruncate(from, to), s"Truncation is expected when casting $from to $to")
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index ab8294838e75..105cec5d0d9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1678,6 +1678,15 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     assert(serializer.serializer.size == 1)
     checkAnswer(ds, Seq(Row("a"), Row("b"), Row("c")))
   }
+
+  test("SPARK-26706: Fix Cast.mayTruncate for bytes") {
+    val thrownException = intercept[AnalysisException] {
+      spark.range(Long.MaxValue - 10, Long.MaxValue).as[Byte]
+        .map(b => b - 1)
+        .collect()
+    }
+    assert(thrownException.message.contains("Cannot up cast `id` from bigint to tinyint"))
+  }
 }
 
 case class TestDataUnion(x: Int, y: Int, z: Int)

From 11be22bb5e4784578c2f9ec1b80b30b2cf0ac3c7 Mon Sep 17 00:00:00 2001
From: ayudovin <a.yudovin6695@gmail.com>
Date: Thu, 24 Jan 2019 10:35:44 +0800
Subject: [PATCH 0349/1072] [SPARK-25713][SQL] implementing copy for
 ColumnArray

## What changes were proposed in this pull request?

Implement copy() for ColumnarArray

## How was this patch tested?
 Updating test case to existing tests in ColumnVectorSuite

Closes #23569 from ayudovin/copy-for-columnArray.

Authored-by: ayudovin <a.yudovin6695@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/vectorized/ColumnarArray.java   | 22 ++++++++++++++++++-
 .../vectorized/ColumnVectorSuite.scala        | 18 +++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index dd2bd789c26d..147162729fb0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -17,7 +17,9 @@
 package org.apache.spark.sql.vectorized;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
 import org.apache.spark.sql.catalyst.util.ArrayData;
+import org.apache.spark.sql.catalyst.util.GenericArrayData;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
@@ -46,7 +48,25 @@ public int numElements() {
 
   @Override
   public ArrayData copy() {
-    throw new UnsupportedOperationException();
+    DataType dt = data.dataType();
+
+    if (dt instanceof BooleanType) {
+      return UnsafeArrayData.fromPrimitiveArray(toBooleanArray());
+    } else if (dt instanceof ByteType) {
+      return UnsafeArrayData.fromPrimitiveArray(toByteArray());
+    } else if (dt instanceof ShortType) {
+      return UnsafeArrayData.fromPrimitiveArray(toShortArray());
+    } else if (dt instanceof IntegerType) {
+      return UnsafeArrayData.fromPrimitiveArray(toIntArray());
+    } else if (dt instanceof LongType) {
+      return UnsafeArrayData.fromPrimitiveArray(toLongArray());
+    } else if (dt instanceof FloatType) {
+      return UnsafeArrayData.fromPrimitiveArray(toFloatArray());
+    } else if (dt instanceof DoubleType) {
+      return UnsafeArrayData.fromPrimitiveArray(toDoubleArray());
+    } else {
+      return new GenericArrayData(toObjectArray(dt));
+    }
   }
 
   @Override
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 2d1ad4b45678..866fcb1aeade 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -58,9 +58,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       assert(array.get(i, BooleanType) === (i % 2 == 0))
+      assert(arrayCopy.get(i, BooleanType) === (i % 2 == 0))
     }
   }
 
@@ -70,9 +72,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       assert(array.get(i, ByteType) === i.toByte)
+      assert(arrayCopy.get(i, ByteType) === i.toByte)
     }
   }
 
@@ -82,9 +86,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       assert(array.get(i, ShortType) === i.toShort)
+      assert(arrayCopy.get(i, ShortType) === i.toShort)
     }
   }
 
@@ -94,9 +100,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       assert(array.get(i, IntegerType) === i)
+      assert(arrayCopy.get(i, IntegerType) === i)
     }
   }
 
@@ -106,9 +114,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       assert(array.get(i, LongType) === i)
+      assert(arrayCopy.get(i, LongType) === i)
     }
   }
 
@@ -118,9 +128,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       assert(array.get(i, FloatType) === i.toFloat)
+      assert(arrayCopy.get(i, FloatType) === i.toFloat)
     }
   }
 
@@ -130,9 +142,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       assert(array.get(i, DoubleType) === i.toDouble)
+      assert(arrayCopy.get(i, DoubleType) === i.toDouble)
     }
   }
 
@@ -143,9 +157,11 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       assert(array.get(i, StringType) === UTF8String.fromString(s"str$i"))
+      assert(arrayCopy.get(i, StringType) === UTF8String.fromString(s"str$i"))
     }
   }
 
@@ -156,10 +172,12 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
 
     (0 until 10).foreach { i =>
       val utf8 = s"str$i".getBytes("utf8")
       assert(array.get(i, BinaryType) === utf8)
+      assert(arrayCopy.get(i, BinaryType) === utf8)
     }
   }
 

From d0e9219e03373b0db918d51bafe6a97775e2c65f Mon Sep 17 00:00:00 2001
From: Dave DeCaprio <daved@alum.mit.edu>
Date: Thu, 24 Jan 2019 10:48:48 +0800
Subject: [PATCH 0350/1072] [SPARK-26617][SQL] Cache manager locks

## What changes were proposed in this pull request?

Fixed several places in CacheManager where a write lock was being held while running the query optimizer.  This could cause a very lock block if the query optimization takes a long time.  This builds on changes from [SPARK-26548] that fixed this issue for one specific case in the CacheManager.

gatorsmile This is very similar to the PR you approved last week.

## How was this patch tested?

Has been tested on a live system where the blocking was causing major issues and it is working well.
CacheManager has no explicit unit test but is used in many places internally as part of the SharedState.

Closes #23539 from DaveDeCaprio/cache-manager-locks.

Lead-authored-by: Dave DeCaprio <daved@alum.mit.edu>
Co-authored-by: David DeCaprio <daved@alum.mit.edu>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/CacheManager.scala    | 69 ++++++++++++-------
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 728fde54fe69..0e6627ccb72a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
 import java.util.concurrent.locks.ReentrantReadWriteLock
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 
@@ -120,7 +121,7 @@ class CacheManager extends Logging {
   def uncacheQuery(
       query: Dataset[_],
       cascade: Boolean,
-      blocking: Boolean = true): Unit = writeLock {
+      blocking: Boolean = true): Unit = {
     uncacheQuery(query.sparkSession, query.logicalPlan, cascade, blocking)
   }
 
@@ -136,21 +137,27 @@ class CacheManager extends Logging {
       spark: SparkSession,
       plan: LogicalPlan,
       cascade: Boolean,
-      blocking: Boolean): Unit = writeLock {
+      blocking: Boolean): Unit = {
     val shouldRemove: LogicalPlan => Boolean =
       if (cascade) {
         _.find(_.sameResult(plan)).isDefined
       } else {
         _.sameResult(plan)
       }
-    val it = cachedData.iterator()
-    while (it.hasNext) {
-      val cd = it.next()
-      if (shouldRemove(cd.plan)) {
-        cd.cachedRepresentation.cacheBuilder.clearCache(blocking)
-        it.remove()
+    val plansToUncache = mutable.Buffer[CachedData]()
+    writeLock {
+      val it = cachedData.iterator()
+      while (it.hasNext) {
+        val cd = it.next()
+        if (shouldRemove(cd.plan)) {
+          plansToUncache += cd
+          it.remove()
+        }
       }
     }
+    plansToUncache.foreach { cd =>
+      cd.cachedRepresentation.cacheBuilder.clearCache(blocking)
+    }
     // Re-compile dependent cached queries after removing the cached query.
     if (!cascade) {
       recacheByCondition(spark, _.find(_.sameResult(plan)).isDefined, clearCache = false)
@@ -160,7 +167,7 @@ class CacheManager extends Logging {
   /**
    * Tries to re-cache all the cache entries that refer to the given plan.
    */
-  def recacheByPlan(spark: SparkSession, plan: LogicalPlan): Unit = writeLock {
+  def recacheByPlan(spark: SparkSession, plan: LogicalPlan): Unit = {
     recacheByCondition(spark, _.find(_.sameResult(plan)).isDefined)
   }
 
@@ -168,26 +175,36 @@ class CacheManager extends Logging {
       spark: SparkSession,
       condition: LogicalPlan => Boolean,
       clearCache: Boolean = true): Unit = {
-    val it = cachedData.iterator()
     val needToRecache = scala.collection.mutable.ArrayBuffer.empty[CachedData]
-    while (it.hasNext) {
-      val cd = it.next()
-      if (condition(cd.plan)) {
-        if (clearCache) {
-          cd.cachedRepresentation.cacheBuilder.clearCache()
+    writeLock {
+      val it = cachedData.iterator()
+      while (it.hasNext) {
+        val cd = it.next()
+        if (condition(cd.plan)) {
+          needToRecache += cd
+          // Remove the cache entry before we create a new one, so that we can have a different
+          // physical plan.
+          it.remove()
+        }
+      }
+    }
+    needToRecache.map { cd =>
+      if (clearCache) {
+        cd.cachedRepresentation.cacheBuilder.clearCache()
+      }
+      val plan = spark.sessionState.executePlan(cd.plan).executedPlan
+      val newCache = InMemoryRelation(
+        cacheBuilder = cd.cachedRepresentation.cacheBuilder.withCachedPlan(plan),
+        logicalPlan = cd.plan)
+      val recomputedPlan = cd.copy(cachedRepresentation = newCache)
+      writeLock {
+        if (lookupCachedData(recomputedPlan.plan).nonEmpty) {
+          logWarning("While recaching, data was already added to cache.")
+        } else {
+          cachedData.add(recomputedPlan)
         }
-        // Remove the cache entry before we create a new one, so that we can have a different
-        // physical plan.
-        it.remove()
-        val plan = spark.sessionState.executePlan(cd.plan).executedPlan
-        val newCache = InMemoryRelation(
-          cacheBuilder = cd.cachedRepresentation.cacheBuilder.withCachedPlan(plan),
-          logicalPlan = cd.plan)
-        needToRecache += cd.copy(cachedRepresentation = newCache)
       }
     }
-
-    needToRecache.foreach(cachedData.add)
   }
 
   /** Optionally returns cached data for the given [[Dataset]] */
@@ -225,7 +242,7 @@ class CacheManager extends Logging {
    * Tries to re-cache all the cache entries that contain `resourcePath` in one or more
    * `HadoopFsRelation` node(s) as part of its logical plan.
    */
-  def recacheByPath(spark: SparkSession, resourcePath: String): Unit = writeLock {
+  def recacheByPath(spark: SparkSession, resourcePath: String): Unit = {
     val (fs, qualifiedPath) = {
       val path = new Path(resourcePath)
       val fs = path.getFileSystem(spark.sessionState.newHadoopConf())

From d5a97c1c2c86ae335e91008fa25b3359c4560915 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Thu, 24 Jan 2019 12:45:25 +0800
Subject: [PATCH 0351/1072] [SPARK-26682][SQL] Use taskAttemptID instead of
 attemptNumber for Hadoop.

## What changes were proposed in this pull request?

Updates the attempt ID used by FileFormatWriter. Tasks in stage attempts use the same task attempt number and could conflict. Using Spark's task attempt ID guarantees that Hadoop TaskAttemptID instances are unique.

## How was this patch tested?

Existing tests. Also validated that we no longer detect this failure case in our logs after deployment.

Closes #23608 from rdblue/SPARK-26682-fix-hadoop-task-attempt-id.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/datasources/FileFormatWriter.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 260ad97506a8..91e92d884ed6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -170,7 +170,7 @@ object FileFormatWriter extends Logging {
             description = description,
             sparkStageId = taskContext.stageId(),
             sparkPartitionId = taskContext.partitionId(),
-            sparkAttemptNumber = taskContext.attemptNumber(),
+            sparkAttemptNumber = taskContext.taskAttemptId().toInt & Integer.MAX_VALUE,
             committer,
             iterator = iter)
         },

From d4a30fa9af81a8bbb50d75f495ca3787f68f10e4 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Thu, 24 Jan 2019 11:18:08 +0100
Subject: [PATCH 0352/1072] [SPARK-26680][SQL] Eagerly create inputVars while
 conditions are appropriate

## What changes were proposed in this pull request?

When a user passes a Stream to groupBy, ```CodegenSupport.consume``` ends up lazily generating ```inputVars``` from a Stream, since the field ```output``` will be a Stream. At the time ```output.zipWithIndex.map``` is called, conditions are correct. However, by the time the map operation actually executes, conditions are no longer appropriate. The closure used by the map operation ends up using a reference to the partially created ```inputVars```. As a result, a StackOverflowError occurs.

This PR ensures that ```inputVars``` is eagerly created while conditions are appropriate. It seems this was also an issue with the code path for creating ```inputVars``` from ```outputVars``` (SPARK-25767). I simply extended the solution for that code path to encompass both code paths.

## How was this patch tested?

SQL unit tests
new test
python tests

Closes #23617 from bersprockets/SPARK-26680_opt1.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/execution/WholeStageCodegenExec.scala  | 12 +++++++-----
 .../spark/sql/execution/WholeStageCodegenSuite.scala |  9 +++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 3b0a99669ccd..d3a93f5eb395 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -143,14 +143,11 @@ trait CodegenSupport extends SparkPlan {
    * Note that `outputVars` and `row` can't both be null.
    */
   final def consume(ctx: CodegenContext, outputVars: Seq[ExprCode], row: String = null): String = {
-    val inputVars =
+    val inputVarsCandidate =
       if (outputVars != null) {
         assert(outputVars.length == output.length)
         // outputVars will be used to generate the code for UnsafeRow, so we should copy them
-        outputVars.map(_.copy()) match {
-          case stream: Stream[ExprCode] => stream.force
-          case other => other
-        }
+        outputVars.map(_.copy())
       } else {
         assert(row != null, "outputVars and row cannot both be null.")
         ctx.currentVars = null
@@ -160,6 +157,11 @@ trait CodegenSupport extends SparkPlan {
         }
       }
 
+    val inputVars = inputVarsCandidate match {
+      case stream: Stream[ExprCode] => stream.force
+      case other => other
+    }
+
     val rowVar = prepareRowVar(ctx, row, outputVars)
 
     // Set up the `currentVars` in the codegen context, as we generate the code of `inputVars`
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 09ad0fdd6636..e03f08417162 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -330,4 +330,13 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
 
     checkAnswer(abc, Row(1, "a"))
   }
+
+  test("SPARK-26680: Stream in groupBy does not cause StackOverflowError") {
+    val groupByCols = Stream(col("key"))
+    val df = Seq((1, 2), (2, 3), (1, 3)).toDF("key", "value")
+      .groupBy(groupByCols: _*)
+      .max("value")
+
+    checkAnswer(df, Seq(Row(1, 3), Row(2, 3)))
+  }
 }

From 9813b1d0747ecc8ef8b2abaa185fb4c381271777 Mon Sep 17 00:00:00 2001
From: Tom van Bussel <tom.vanbussel@databricks.com>
Date: Thu, 24 Jan 2019 16:44:39 +0100
Subject: [PATCH 0353/1072] [SPARK-26690] Track query execution and time cost
 for checkpoints

## What changes were proposed in this pull request?

Checkpoints of Dataframes currently do not show up in SQL UI. This PR fixes that by setting an execution id for the execution of the checkpoint by wrapping the checkpoint code with a `withAction`.

## How was this patch tested?

A unit test was added to DatasetSuite.

Closes #23636 from tomvanbussel/SPARK-26690.

Authored-by: Tom van Bussel <tom.vanbussel@databricks.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala  | 61 ++++++++++---------
 .../org/apache/spark/sql/DatasetSuite.scala   | 17 +++++-
 2 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 44cada086489..32f623439678 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -619,40 +619,41 @@ class Dataset[T] private[sql](
    *                           the caching subsystem
    */
   private def checkpoint(eager: Boolean, reliableCheckpoint: Boolean): Dataset[T] = {
-    val internalRdd = queryExecution.toRdd.map(_.copy())
-    if (reliableCheckpoint) {
-      internalRdd.checkpoint()
-    } else {
-      internalRdd.localCheckpoint()
-    }
-
-    if (eager) {
-      internalRdd.count()
-    }
-
-    val physicalPlan = queryExecution.executedPlan
+    val actionName = if (reliableCheckpoint) "checkpoint" else "localCheckpoint"
+    withAction(actionName, queryExecution) { physicalPlan =>
+      val internalRdd = physicalPlan.execute().map(_.copy())
+      if (reliableCheckpoint) {
+        internalRdd.checkpoint()
+      } else {
+        internalRdd.localCheckpoint()
+      }
 
-    // Takes the first leaf partitioning whenever we see a `PartitioningCollection`. Otherwise the
-    // size of `PartitioningCollection` may grow exponentially for queries involving deep inner
-    // joins.
-    def firstLeafPartitioning(partitioning: Partitioning): Partitioning = {
-      partitioning match {
-        case p: PartitioningCollection => firstLeafPartitioning(p.partitionings.head)
-        case p => p
+      if (eager) {
+        internalRdd.count()
       }
-    }
 
-    val outputPartitioning = firstLeafPartitioning(physicalPlan.outputPartitioning)
+      // Takes the first leaf partitioning whenever we see a `PartitioningCollection`. Otherwise the
+      // size of `PartitioningCollection` may grow exponentially for queries involving deep inner
+      // joins.
+      def firstLeafPartitioning(partitioning: Partitioning): Partitioning = {
+        partitioning match {
+          case p: PartitioningCollection => firstLeafPartitioning(p.partitionings.head)
+          case p => p
+        }
+      }
 
-    Dataset.ofRows(
-      sparkSession,
-      LogicalRDD(
-        logicalPlan.output,
-        internalRdd,
-        outputPartitioning,
-        physicalPlan.outputOrdering,
-        isStreaming
-      )(sparkSession)).as[T]
+      val outputPartitioning = firstLeafPartitioning(physicalPlan.outputPartitioning)
+
+      Dataset.ofRows(
+        sparkSession,
+        LogicalRDD(
+          logicalPlan.output,
+          internalRdd,
+          outputPartitioning,
+          physicalPlan.outputOrdering,
+          isStreaming
+        )(sparkSession)).as[T]
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 105cec5d0d9c..8c34e47314db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -20,15 +20,16 @@ package org.apache.spark.sql
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.sql.{Date, Timestamp}
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.sql.catalyst.ScroogeLikeExample
 import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
 import org.apache.spark.sql.catalyst.plans.logical.SerializeFromObject
 import org.apache.spark.sql.catalyst.util.sideBySide
-import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec}
+import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
@@ -1687,6 +1688,18 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     }
     assert(thrownException.message.contains("Cannot up cast `id` from bigint to tinyint"))
   }
+
+  test("SPARK-26690: checkpoints should be executed with an execution id") {
+    def assertExecutionId: UserDefinedFunction = udf(AssertExecutionId.apply _)
+    spark.range(10).select(assertExecutionId($"id")).localCheckpoint(true)
+  }
+}
+
+object AssertExecutionId {
+  def apply(id: Long): Long = {
+    assert(TaskContext.get().getLocalProperty(SQLExecution.EXECUTION_ID_KEY) != null)
+    id
+  }
 }
 
 case class TestDataUnion(x: Int, y: Int, z: Int)

From 69dab94b139a4f1034407d0a562c2b94984b35de Mon Sep 17 00:00:00 2001
From: Rob Vesse <rvesse@dotnetrdf.org>
Date: Thu, 24 Jan 2019 10:11:55 -0800
Subject: [PATCH 0354/1072] [SPARK-26687][K8S] Fix handling of custom
 Dockerfile paths

## What changes were proposed in this pull request?

With the changes from vanzin's PR #23019 (SPARK-26025) we use a pared down temporary Docker build context which significantly improves build times.  However the way this is implemented leads to non-intuitive behaviour when supplying custom Docker file paths.  This is because of the following code snippets:

```
(cd $(img_ctx_dir base) && docker build $NOCACHEARG "${BUILD_ARGS[]}" \
    -t $(image_ref spark) \
    -f "$BASEDOCKERFILE" .)
```

Since the script changes to the temporary build context directory and then runs `docker build` there any path given for the Docker file is taken as relative to the temporary build context directory rather than to the directory where the user invoked the script.  This is rather unintuitive and produces somewhat unhelpful errors e.g.

```
> ./bin/docker-image-tool.sh -r rvesse -t badpath -p resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile build
Sending build context to Docker daemon  218.4MB
Step 1/15 : FROM openjdk:8-alpine
 ---> 5801f7d008e5
Step 2/15 : ARG spark_uid=185
 ---> Using cache
 ---> 5fd63df1ca39
...
Successfully tagged rvesse/spark:badpath
unable to prepare context: unable to evaluate symlinks in Dockerfile path: lstat /Users/rvesse/Documents/Work/Code/spark/target/tmp/docker/pyspark/resource-managers: no such file or directory
Failed to build PySpark Docker image, please refer to Docker build output for details.
```

Here we can see that the relative path that was valid where the user typed the command was not valid inside the build context directory.

To resolve this we need to ensure that we are resolving relative paths to Docker files appropriately which we do by adding a `resolve_file` function to the script and invoking that on the supplied Docker file paths

## How was this patch tested?

Validated that relative paths now work as expected:

```
> ./bin/docker-image-tool.sh -r rvesse -t badpath -p resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile build
Sending build context to Docker daemon  218.4MB
Step 1/15 : FROM openjdk:8-alpine
 ---> 5801f7d008e5
Step 2/15 : ARG spark_uid=185
 ---> Using cache
 ---> 5fd63df1ca39
Step 3/15 : RUN set -ex &&     apk upgrade --no-cache &&     apk add --no-cache bash tini libc6-compat linux-pam krb5 krb5-libs &&     mkdir -p /opt/spark &&     mkdir -p /opt/spark/examples &&     mkdir -p /opt/spark/work-dir &&     touch /opt/spark/RELEASE &&     rm /bin/sh &&     ln -sv /bin/bash /bin/sh &&     echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su &&     chgrp root /etc/passwd && chmod ug+rw /etc/passwd
 ---> Using cache
 ---> eb0a568e032f
Step 4/15 : COPY jars /opt/spark/jars
...
Successfully tagged rvesse/spark:badpath
Sending build context to Docker daemon  6.599MB
Step 1/13 : ARG base_img
Step 2/13 : ARG spark_uid=185
Step 3/13 : FROM $base_img
 ---> 8f4fff16f903
Step 4/13 : WORKDIR /
 ---> Running in 25466e66f27f
Removing intermediate container 25466e66f27f
 ---> 1470b6efae61
Step 5/13 : USER 0
 ---> Running in b094b739df37
Removing intermediate container b094b739df37
 ---> 6a27eb4acad3
Step 6/13 : RUN mkdir ${SPARK_HOME}/python
 ---> Running in bc8002c5b17c
Removing intermediate container bc8002c5b17c
 ---> 19bb12f4286a
Step 7/13 : RUN apk add --no-cache python &&     apk add --no-cache python3 &&     python -m ensurepip &&     python3 -m ensurepip &&     rm -r /usr/lib/python*/ensurepip &&     pip install --upgrade pip setuptools &&     rm -r /root/.cache
 ---> Running in 12dcba5e527f
...
Successfully tagged rvesse/spark-py:badpath
```

Closes #23613 from rvesse/SPARK-26687.

Authored-by: Rob Vesse <rvesse@dotnetrdf.org>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 bin/docker-image-tool.sh | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index efaf09e74c40..0388e23979dd 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -67,6 +67,16 @@ function docker_push {
   fi
 }
 
+function resolve_file {
+  local FILE=$1
+  if [ -n "$FILE" ]; then
+    local DIR=$(dirname $FILE)
+    DIR=$(cd $DIR && pwd)
+    FILE="${DIR}/$(basename $FILE)"
+  fi
+  echo $FILE
+}
+
 # Create a smaller build context for docker in dev builds to make the build faster. Docker
 # uploads all of the current directory to the daemon, and it can get pretty big with dev
 # builds that contain test log files and other artifacts.
@@ -258,9 +268,9 @@ while getopts f:p:R:mr:t:nb:u: option
 do
  case "${option}"
  in
- f) BASEDOCKERFILE=${OPTARG};;
- p) PYDOCKERFILE=${OPTARG};;
- R) RDOCKERFILE=${OPTARG};;
+ f) BASEDOCKERFILE=$(resolve_file ${OPTARG});;
+ p) PYDOCKERFILE=$(resolve_file ${OPTARG});;
+ R) RDOCKERFILE=$(resolve_file ${OPTARG});;
  r) REPO=${OPTARG};;
  t) TAG=${OPTARG};;
  n) NOCACHEARG="--no-cache";;

From 8d667c511c41239033defed6dfb07414ad98935d Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Thu, 24 Jan 2019 15:12:57 -0800
Subject: [PATCH 0355/1072] [SPARK-26530][CORE] Validate heartheat arguments in
 HeartbeatReceiver

## What changes were proposed in this pull request?

Currently, heartbeat related arguments is not validated in spark, so if these args are inproperly specified, the Application may run for a while and not failed until the max executor failures reached(especially with spark.dynamicAllocation.enabled=true), thus may incurs resources waste.

This PR is to precheck these arguments in HeartbeatReceiver to fix this problem.

## How was this patch tested?

NA-just validation changes

Closes #23445 from liupc/validate-heartbeat-arguments-in-SparkSubmitArguments.

Authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../main/scala/org/apache/spark/HeartbeatReceiver.scala  | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index 55f2ae61bdbb..ec3a184bc16c 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -79,6 +79,15 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
 
   private val checkTimeoutIntervalMs = sc.conf.get(Network.NETWORK_TIMEOUT_INTERVAL)
 
+  private val executorHeartbeatIntervalMs = sc.conf.get(config.EXECUTOR_HEARTBEAT_INTERVAL)
+
+  require(checkTimeoutIntervalMs <= executorTimeoutMs,
+    s"${Network.NETWORK_TIMEOUT_INTERVAL.key} should be less than or " +
+      s"equal to ${config.STORAGE_BLOCKMANAGER_SLAVE_TIMEOUT.key}.")
+  require(executorHeartbeatIntervalMs <= executorTimeoutMs,
+    s"${config.EXECUTOR_HEARTBEAT_INTERVAL.key} should be less than or " +
+      s"equal to ${config.STORAGE_BLOCKMANAGER_SLAVE_TIMEOUT.key}")
+
   private var timeoutCheckingTask: ScheduledFuture[_] = null
 
   // "eventLoopThread" is used to run some pretty fast actions. The actions running in it should not

From 3699763fda52bc6c37c911f7c1eb771836d0c6da Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 24 Jan 2019 16:13:58 -0800
Subject: [PATCH 0356/1072] [SPARK-26697][CORE] Log local & remote block sizes.

## What changes were proposed in this pull request?

To help debugging failed or slow tasks, its really useful to know the
size of the blocks getting fetched.  Though that is available at the
debug level, debug logs aren't on in general -- but there is already an
info level log line that this augments a little.

## How was this patch tested?

Ran very basic local-cluster mode app, looked at logs.  Example line:

```
INFO ShuffleBlockFetcherIterator: Getting 2 (194.0 B) non-empty blocks including 1 (97.0 B) local blocks and 1 (97.0 B) remote blocks
```

Full suite via jenkins.

Closes #23621 from squito/SPARK-26697.

Authored-by: Imran Rashid <irashid@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/storage/ShuffleBlockFetcherIterator.scala    | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 86f7c08eddcb..28decf0a8e2a 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -273,6 +273,8 @@ final class ShuffleBlockFetcherIterator(
     // Split local and remote blocks. Remote blocks are further split into FetchRequests of size
     // at most maxBytesInFlight in order to limit the amount of data in flight.
     val remoteRequests = new ArrayBuffer[FetchRequest]
+    var localBlockBytes = 0L
+    var remoteBlockBytes = 0L
 
     for ((address, blockInfos) <- blocksByAddress) {
       if (address.executorId == blockManager.blockManagerId.executorId) {
@@ -284,6 +286,7 @@ final class ShuffleBlockFetcherIterator(
           case None => // do nothing.
         }
         localBlocks ++= blockInfos.map(_._1)
+        localBlockBytes += blockInfos.map(_._2).sum
         numBlocksToFetch += localBlocks.size
       } else {
         val iterator = blockInfos.iterator
@@ -291,6 +294,7 @@ final class ShuffleBlockFetcherIterator(
         var curBlocks = new ArrayBuffer[(BlockId, Long)]
         while (iterator.hasNext) {
           val (blockId, size) = iterator.next()
+          remoteBlockBytes += size
           if (size < 0) {
             throw new BlockException(blockId, "Negative block size " + size)
           } else if (size == 0) {
@@ -317,8 +321,10 @@ final class ShuffleBlockFetcherIterator(
         }
       }
     }
-    logInfo(s"Getting $numBlocksToFetch non-empty blocks including ${localBlocks.size}" +
-        s" local blocks and ${remoteBlocks.size} remote blocks")
+    val totalBytes = localBlockBytes + remoteBlockBytes
+    logInfo(s"Getting $numBlocksToFetch (${Utils.bytesToString(totalBytes)}) non-empty blocks " +
+      s"including ${localBlocks.size} (${Utils.bytesToString(localBlockBytes)}) local blocks and " +
+      s"${remoteBlocks.size} (${Utils.bytesToString(remoteBlockBytes)}) remote blocks")
     remoteRequests
   }
 

From b2d36f65db44824dbdf3578d653d060ef1d87134 Mon Sep 17 00:00:00 2001
From: Ilya Matiach <ilmat@microsoft.com>
Date: Thu, 24 Jan 2019 18:20:28 -0700
Subject: [PATCH 0357/1072] [SPARK-19591][ML][MLLIB] Add sample weights to
 decision trees

This is updated PR https://github.com/apache/spark/pull/16722 to latest master

## What changes were proposed in this pull request?

This patch adds support for sample weights to DecisionTreeRegressor and DecisionTreeClassifier.

Note: This patch does not add support for sample weights to RandomForest. As discussed in the JIRA, we would like to add sample weights into the bagging process. This patch is large enough as is, and there are some additional considerations to be made for random forests. Since the machinery introduced here needs to be present regardless, I have opted to leave random forests for a follow up pr.
## How was this patch tested?

The algorithms are tested to ensure that:
    1. Arbitrary scaling of constant weights has no effect
    2. Outliers with small weights do not affect the learned model
    3. Oversampling and weighting are equivalent

Unit tests are also added to test other smaller components.
## Summary of changes

   - Impurity aggregators now store weighted sufficient statistics. They also store a raw count, however, since this is needed to use minInstancesPerNode.

   - Impurity aggregators now also hold the raw count.

   - This patch maintains the meaning of minInstancesPerNode, in that the parameter still corresponds to raw, unweighted counts. It also adds a new parameter minWeightFractionPerNode which requires that nodes must contain at least minWeightFractionPerNode * weightedNumExamples total weight.

   - This patch modifies findSplitsForContinuousFeatures to use weighted sums. Unit tests are added.

   - TreePoint is modified to hold a sample weight

   - BaggedPoint is modified from:
``` Scala
private[spark] class BaggedPoint[Datum](val datum: Datum, val subsampleWeights: Array[Double]) extends Serializable
```
to
``` Scala
private[spark] class BaggedPoint[Datum](
    val datum: Datum,
    val subsampleCounts: Array[Int],
    val sampleWeight: Double) extends Serializable
```
We do not simply multiply the counts by the weight and store that because we need the raw counts and the weight in order to use both minInstancesPerNode and minWeightPerNode

**Note**: many of the changed files are due simply to using Instance instead of LabeledPoint

Closes #21632 from imatiach-msft/ilmat/sample-weights.

Authored-by: Ilya Matiach <ilmat@microsoft.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/ml/util/TestingUtils.scala   |   2 +-
 .../spark/ml/classification/Classifier.scala  |  30 ++-
 .../DecisionTreeClassifier.scala              |  46 +++--
 .../RandomForestClassifier.scala              |  11 +-
 .../spark/ml/feature/LabeledPoint.scala       |   9 +
 .../ml/regression/DecisionTreeRegressor.scala |  37 +++-
 .../ml/regression/RandomForestRegressor.scala |  12 +-
 .../spark/ml/tree/impl/BaggedPoint.scala      |  38 ++--
 .../ml/tree/impl/DTStatsAggregator.scala      |  18 +-
 .../ml/tree/impl/DecisionTreeMetadata.scala   |  26 ++-
 .../spark/ml/tree/impl/RandomForest.scala     | 127 ++++++++----
 .../apache/spark/ml/tree/impl/TreePoint.scala |  26 +--
 .../org/apache/spark/ml/tree/treeModels.scala |  10 +-
 .../org/apache/spark/ml/tree/treeParams.scala |  26 ++-
 .../spark/mllib/tree/RandomForest.scala       |   5 +-
 .../mllib/tree/configuration/Strategy.scala   |  36 +++-
 .../spark/mllib/tree/impurity/Entropy.scala   |  28 ++-
 .../spark/mllib/tree/impurity/Gini.scala      |  30 ++-
 .../spark/mllib/tree/impurity/Impurity.scala  |  29 ++-
 .../spark/mllib/tree/impurity/Variance.scala  |  29 ++-
 .../DecisionTreeClassifierSuite.scala         |  48 ++++-
 .../RandomForestClassifierSuite.scala         |   5 +-
 .../DecisionTreeRegressorSuite.scala          |  35 +++-
 .../ml/regression/LinearRegressionSuite.scala |   1 +
 .../spark/ml/tree/impl/BaggedPointSuite.scala |  45 ++--
 .../ml/tree/impl/RandomForestSuite.scala      | 192 ++++++++++++------
 .../spark/ml/tree/impl/TreePointSuite.scala   |   2 +-
 .../apache/spark/ml/tree/impl/TreeTests.scala |  29 ++-
 .../apache/spark/ml/util/MLTestingUtils.scala |  25 ++-
 .../spark/mllib/tree/DecisionTreeSuite.scala  |  20 +-
 .../spark/mllib/tree/ImpuritySuite.scala      |  46 ++++-
 31 files changed, 743 insertions(+), 280 deletions(-)

diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala b/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
index 6c79d77f142e..fb98bcb9d6b0 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
@@ -52,7 +52,7 @@ object TestingUtils {
   /**
    * Private helper function for comparing two values using absolute tolerance.
    */
-  private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
+  private[ml] def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
     // Special case for NaNs
     if (x.isNaN && y.isNaN) {
       return true
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index 7e5790ab70ee..e35e6ce7fdad 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -77,17 +77,37 @@ abstract class Classifier[
    * @note Throws `SparkException` if any label is a non-integer or is negative
    */
   protected def extractLabeledPoints(dataset: Dataset[_], numClasses: Int): RDD[LabeledPoint] = {
-    require(numClasses > 0, s"Classifier (in extractLabeledPoints) found numClasses =" +
-      s" $numClasses, but requires numClasses > 0.")
+    validateNumClasses(numClasses)
     dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map {
       case Row(label: Double, features: Vector) =>
-        require(label % 1 == 0 && label >= 0 && label < numClasses, s"Classifier was given" +
-          s" dataset with invalid label $label.  Labels must be integers in range" +
-          s" [0, $numClasses).")
+        validateLabel(label, numClasses)
         LabeledPoint(label, features)
     }
   }
 
+  /**
+   * Validates that number of classes is greater than zero.
+   *
+   * @param numClasses Number of classes label can take.
+   */
+  protected def validateNumClasses(numClasses: Int): Unit = {
+    require(numClasses > 0, s"Classifier (in extractLabeledPoints) found numClasses =" +
+      s" $numClasses, but requires numClasses > 0.")
+  }
+
+  /**
+   * Validates the label on the classifier is a valid integer in the range [0, numClasses).
+   *
+   * @param label The label to validate.
+   * @param numClasses Number of classes label can take.  Labels must be integers in the range
+   *                  [0, numClasses).
+   */
+  protected def validateLabel(label: Double, numClasses: Int): Unit = {
+    require(label.toLong == label && label >= 0 && label < numClasses, s"Classifier was given" +
+      s" dataset with invalid label $label.  Labels must be integers in range" +
+      s" [0, $numClasses).")
+  }
+
   /**
    * Get the number of classes.  This looks in column metadata first, and if that is missing,
    * then this assumes classes are indexed 0,1,...,numClasses-1 and computes numClasses
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index d9292a547676..200ac0032e51 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -22,10 +22,12 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.tree._
+import org.apache.spark.ml.tree.{DecisionTreeModel, Node, TreeClassifierParams}
 import org.apache.spark.ml.tree.DecisionTreeModelReadWrite._
 import org.apache.spark.ml.tree.impl.RandomForest
 import org.apache.spark.ml.util._
@@ -33,8 +35,9 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.Dataset
-
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.types.DoubleType
 
 /**
  * Decision tree learning algorithm (http://en.wikipedia.org/wiki/Decision_tree_learning)
@@ -66,6 +69,9 @@ class DecisionTreeClassifier @Since("1.4.0") (
   def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
+  @Since("3.0.0")
+  def setMinWeightFractionPerNode(value: Double): this.type = set(minWeightFractionPerNode, value)
+
   @Since("1.4.0")
   def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
@@ -97,6 +103,16 @@ class DecisionTreeClassifier @Since("1.4.0") (
   @Since("1.6.0")
   def setSeed(value: Long): this.type = set(seed, value)
 
+  /**
+   * Sets the value of param [[weightCol]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * Default is not set, so all instances have weight one.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   override protected def train(
       dataset: Dataset[_]): DecisionTreeClassificationModel = instrumented { instr =>
     instr.logPipelineStage(this)
@@ -104,22 +120,27 @@ class DecisionTreeClassifier @Since("1.4.0") (
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
     val numClasses: Int = getNumClasses(dataset)
-    instr.logNumClasses(numClasses)
 
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +
         ".train() called with non-matching numClasses and thresholds.length." +
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
-
-    val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, numClasses)
+    validateNumClasses(numClasses)
+    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
+    val instances =
+      dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
+        case Row(label: Double, weight: Double, features: Vector) =>
+          validateLabel(label, numClasses)
+          Instance(label, weight, features)
+      }
     val strategy = getOldStrategy(categoricalFeatures, numClasses)
-
+    instr.logNumClasses(numClasses)
     instr.logParams(this, labelCol, featuresCol, predictionCol, rawPredictionCol,
       probabilityCol, maxDepth, maxBins, minInstancesPerNode, minInfoGain, maxMemoryInMB,
       cacheNodeIds, checkpointInterval, impurity, seed)
 
-    val trees = RandomForest.run(oldDataset, strategy, numTrees = 1, featureSubsetStrategy = "all",
+    val trees = RandomForest.run(instances, strategy, numTrees = 1, featureSubsetStrategy = "all",
       seed = $(seed), instr = Some(instr), parentUID = Some(uid))
 
     trees.head.asInstanceOf[DecisionTreeClassificationModel]
@@ -128,13 +149,13 @@ class DecisionTreeClassifier @Since("1.4.0") (
   /** (private[ml]) Train a decision tree on an RDD */
   private[ml] def train(data: RDD[LabeledPoint],
       oldStrategy: OldStrategy): DecisionTreeClassificationModel = instrumented { instr =>
+    val instances = data.map(_.toInstance)
     instr.logPipelineStage(this)
-    instr.logDataset(data)
+    instr.logDataset(instances)
     instr.logParams(this, maxDepth, maxBins, minInstancesPerNode, minInfoGain, maxMemoryInMB,
       cacheNodeIds, checkpointInterval, impurity, seed)
-
-    val trees = RandomForest.run(data, oldStrategy, numTrees = 1, featureSubsetStrategy = "all",
-      seed = 0L, instr = Some(instr), parentUID = Some(uid))
+    val trees = RandomForest.run(instances, oldStrategy, numTrees = 1,
+      featureSubsetStrategy = "all", seed = 0L, instr = Some(instr), parentUID = Some(uid))
 
     trees.head.asInstanceOf[DecisionTreeClassificationModel]
   }
@@ -180,6 +201,7 @@ class DecisionTreeClassificationModel private[ml] (
 
   /**
    * Construct a decision tree classification model.
+   *
    * @param rootNode  Root node of tree, with other nodes attached.
    */
   private[ml] def this(rootNode: Node, numFeatures: Int, numClasses: Int) =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 0a3bfd1f85e0..3500f2ad52a5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -21,20 +21,21 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
+import org.apache.spark.ml.tree.{RandomForestParams, TreeClassifierParams, TreeEnsembleModel}
 import org.apache.spark.ml.tree.impl.RandomForest
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset}
-import org.apache.spark.sql.functions._
-
+import org.apache.spark.sql.functions.{col, udf}
 
 /**
  * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> learning algorithm for
@@ -130,7 +131,7 @@ class RandomForestClassifier @Since("1.4.0") (
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
 
-    val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, numClasses)
+    val instances: RDD[Instance] = extractLabeledPoints(dataset, numClasses).map(_.toInstance)
     val strategy =
       super.getOldStrategy(categoricalFeatures, numClasses, OldAlgo.Classification, getOldImpurity)
 
@@ -139,7 +140,7 @@ class RandomForestClassifier @Since("1.4.0") (
       minInstancesPerNode, seed, subsamplingRate, thresholds, cacheNodeIds, checkpointInterval)
 
     val trees = RandomForest
-      .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr))
+      .run(instances, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr))
       .map(_.asInstanceOf[DecisionTreeClassificationModel])
 
     val numFeatures = trees.head.numFeatures
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
index 412954f7b2d5..f6667b73304a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
@@ -37,4 +37,13 @@ case class LabeledPoint(@Since("2.0.0") label: Double, @Since("2.0.0") features:
   override def toString: String = {
     s"($label,$features)"
   }
+
+  private[spark] def toInstance(weight: Double): Instance = {
+    Instance(label, weight, features)
+  }
+
+  private[spark] def toInstance: Instance = {
+    Instance(label, 1.0, features)
+  }
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index faadc4d7b4cc..525479151aa0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -23,9 +23,10 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{PredictionModel, Predictor}
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.DecisionTreeModelReadWrite._
 import org.apache.spark.ml.tree.impl.RandomForest
@@ -34,8 +35,9 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.DoubleType
 
 
 /**
@@ -65,6 +67,9 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
   def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
+  @Since("3.0.0")
+  def setMinWeightFractionPerNode(value: Double): this.type = set(minWeightFractionPerNode, value)
+
   @Since("1.4.0")
   def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
@@ -100,18 +105,33 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
   @Since("2.0.0")
   def setVarianceCol(value: String): this.type = set(varianceCol, value)
 
+  /**
+   * Sets the value of param [[weightCol]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * Default is not set, so all instances have weight one.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   override protected def train(
       dataset: Dataset[_]): DecisionTreeRegressionModel = instrumented { instr =>
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-    val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset)
+    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
+    val instances =
+      dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
+        case Row(label: Double, weight: Double, features: Vector) =>
+          Instance(label, weight, features)
+      }
     val strategy = getOldStrategy(categoricalFeatures)
 
     instr.logPipelineStage(this)
-    instr.logDataset(oldDataset)
+    instr.logDataset(instances)
     instr.logParams(this, params: _*)
 
-    val trees = RandomForest.run(oldDataset, strategy, numTrees = 1, featureSubsetStrategy = "all",
+    val trees = RandomForest.run(instances, strategy, numTrees = 1, featureSubsetStrategy = "all",
       seed = $(seed), instr = Some(instr), parentUID = Some(uid))
 
     trees.head.asInstanceOf[DecisionTreeRegressionModel]
@@ -126,8 +146,9 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
     instr.logDataset(data)
     instr.logParams(this, params: _*)
 
-    val trees = RandomForest.run(data, oldStrategy, numTrees = 1, featureSubsetStrategy,
-      seed = $(seed), instr = Some(instr), parentUID = Some(uid))
+    val instances = data.map(_.toInstance)
+    val trees = RandomForest.run(instances, oldStrategy, numTrees = 1,
+      featureSubsetStrategy, seed = $(seed), instr = Some(instr), parentUID = Some(uid))
 
     trees.head.asInstanceOf[DecisionTreeRegressionModel]
   }
@@ -155,6 +176,7 @@ object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor
  * <a href="http://en.wikipedia.org/wiki/Decision_tree_learning">
  * Decision tree (Wikipedia)</a> model for regression.
  * It supports both continuous and categorical features.
+ *
  * @param rootNode  Root of the decision tree
  */
 @Since("1.4.0")
@@ -173,6 +195,7 @@ class DecisionTreeRegressionModel private[ml] (
 
   /**
    * Construct a decision tree regression model.
+   *
    * @param rootNode  Root node of tree, with other nodes attached.
    */
   private[ml] def this(rootNode: Node, numFeatures: Int) =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index afa9a646412b..6f36dfb9ff51 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -22,7 +22,6 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{PredictionModel, Predictor}
-import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -32,10 +31,8 @@ import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel}
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset}
-import org.apache.spark.sql.functions._
-
+import org.apache.spark.sql.functions.{col, udf}
 
 /**
  * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a>
@@ -119,18 +116,19 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
       dataset: Dataset[_]): RandomForestRegressionModel = instrumented { instr =>
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-    val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset)
+
+    val instances = extractLabeledPoints(dataset).map(_.toInstance)
     val strategy =
       super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity)
 
     instr.logPipelineStage(this)
-    instr.logDataset(dataset)
+    instr.logDataset(instances)
     instr.logParams(this, labelCol, featuresCol, predictionCol, impurity, numTrees,
       featureSubsetStrategy, maxDepth, maxBins, maxMemoryInMB, minInfoGain,
       minInstancesPerNode, seed, subsamplingRate, cacheNodeIds, checkpointInterval)
 
     val trees = RandomForest
-      .run(oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr))
+      .run(instances, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr))
       .map(_.asInstanceOf[DecisionTreeRegressionModel])
 
     val numFeatures = trees.head.numFeatures
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
index 4e372702f0c6..c896b1589a93 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
@@ -33,13 +33,13 @@ import org.apache.spark.util.random.XORShiftRandom
  * this datum has 1 copy, 0 copies, and 4 copies in the 3 subsamples, respectively.
  *
  * @param datum  Data instance
- * @param subsampleWeights  Weight of this instance in each subsampled dataset.
- *
- * TODO: This does not currently support (Double) weighted instances.  Once MLlib has weighted
- *       dataset support, update.  (We store subsampleWeights as Double for this future extension.)
+ * @param subsampleCounts  Number of samples of this instance in each subsampled dataset.
+ * @param sampleWeight The weight of this instance.
  */
-private[spark] class BaggedPoint[Datum](val datum: Datum, val subsampleWeights: Array[Double])
-  extends Serializable
+private[spark] class BaggedPoint[Datum](
+    val datum: Datum,
+    val subsampleCounts: Array[Int],
+    val sampleWeight: Double = 1.0) extends Serializable
 
 private[spark] object BaggedPoint {
 
@@ -52,6 +52,7 @@ private[spark] object BaggedPoint {
    * @param subsamplingRate Fraction of the training data used for learning decision tree.
    * @param numSubsamples Number of subsamples of this RDD to take.
    * @param withReplacement Sampling with/without replacement.
+   * @param extractSampleWeight A function to get the sample weight of each datum.
    * @param seed Random seed.
    * @return BaggedPoint dataset representation.
    */
@@ -60,12 +61,14 @@ private[spark] object BaggedPoint {
       subsamplingRate: Double,
       numSubsamples: Int,
       withReplacement: Boolean,
+      extractSampleWeight: (Datum => Double) = (_: Datum) => 1.0,
       seed: Long = Utils.random.nextLong()): RDD[BaggedPoint[Datum]] = {
+    // TODO: implement weighted bootstrapping
     if (withReplacement) {
       convertToBaggedRDDSamplingWithReplacement(input, subsamplingRate, numSubsamples, seed)
     } else {
       if (numSubsamples == 1 && subsamplingRate == 1.0) {
-        convertToBaggedRDDWithoutSampling(input)
+        convertToBaggedRDDWithoutSampling(input, extractSampleWeight)
       } else {
         convertToBaggedRDDSamplingWithoutReplacement(input, subsamplingRate, numSubsamples, seed)
       }
@@ -82,16 +85,15 @@ private[spark] object BaggedPoint {
       val rng = new XORShiftRandom
       rng.setSeed(seed + partitionIndex + 1)
       instances.map { instance =>
-        val subsampleWeights = new Array[Double](numSubsamples)
+        val subsampleCounts = new Array[Int](numSubsamples)
         var subsampleIndex = 0
         while (subsampleIndex < numSubsamples) {
-          val x = rng.nextDouble()
-          subsampleWeights(subsampleIndex) = {
-            if (x < subsamplingRate) 1.0 else 0.0
+          if (rng.nextDouble() < subsamplingRate) {
+            subsampleCounts(subsampleIndex) = 1
           }
           subsampleIndex += 1
         }
-        new BaggedPoint(instance, subsampleWeights)
+        new BaggedPoint(instance, subsampleCounts)
       }
     }
   }
@@ -106,20 +108,20 @@ private[spark] object BaggedPoint {
       val poisson = new PoissonDistribution(subsample)
       poisson.reseedRandomGenerator(seed + partitionIndex + 1)
       instances.map { instance =>
-        val subsampleWeights = new Array[Double](numSubsamples)
+        val subsampleCounts = new Array[Int](numSubsamples)
         var subsampleIndex = 0
         while (subsampleIndex < numSubsamples) {
-          subsampleWeights(subsampleIndex) = poisson.sample()
+          subsampleCounts(subsampleIndex) = poisson.sample()
           subsampleIndex += 1
         }
-        new BaggedPoint(instance, subsampleWeights)
+        new BaggedPoint(instance, subsampleCounts)
       }
     }
   }
 
   private def convertToBaggedRDDWithoutSampling[Datum] (
-      input: RDD[Datum]): RDD[BaggedPoint[Datum]] = {
-    input.map(datum => new BaggedPoint(datum, Array(1.0)))
+      input: RDD[Datum],
+      extractSampleWeight: (Datum => Double)): RDD[BaggedPoint[Datum]] = {
+    input.map(datum => new BaggedPoint(datum, Array(1), extractSampleWeight(datum)))
   }
-
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala
index 5aeea1443d49..17ec161f2f50 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala
@@ -104,16 +104,21 @@ private[spark] class DTStatsAggregator(
   /**
    * Update the stats for a given (feature, bin) for ordered features, using the given label.
    */
-  def update(featureIndex: Int, binIndex: Int, label: Double, instanceWeight: Double): Unit = {
+  def update(
+      featureIndex: Int,
+      binIndex: Int,
+      label: Double,
+      numSamples: Int,
+      sampleWeight: Double): Unit = {
     val i = featureOffsets(featureIndex) + binIndex * statsSize
-    impurityAggregator.update(allStats, i, label, instanceWeight)
+    impurityAggregator.update(allStats, i, label, numSamples, sampleWeight)
   }
 
   /**
    * Update the parent node stats using the given label.
    */
-  def updateParent(label: Double, instanceWeight: Double): Unit = {
-    impurityAggregator.update(parentStats, 0, label, instanceWeight)
+  def updateParent(label: Double, numSamples: Int, sampleWeight: Double): Unit = {
+    impurityAggregator.update(parentStats, 0, label, numSamples, sampleWeight)
   }
 
   /**
@@ -127,9 +132,10 @@ private[spark] class DTStatsAggregator(
       featureOffset: Int,
       binIndex: Int,
       label: Double,
-      instanceWeight: Double): Unit = {
+      numSamples: Int,
+      sampleWeight: Double): Unit = {
     impurityAggregator.update(allStats, featureOffset + binIndex * statsSize,
-      label, instanceWeight)
+      label, numSamples, sampleWeight)
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
index 53189e0797b6..8f8a17171f98 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 import scala.util.Try
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.tree.TreeEnsembleParams
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
@@ -32,16 +32,20 @@ import org.apache.spark.rdd.RDD
 /**
  * Learning and dataset metadata for DecisionTree.
  *
+ * @param weightedNumExamples  Weighted count of samples in the tree.
  * @param numClasses    For classification: labels can take values {0, ..., numClasses - 1}.
  *                      For regression: fixed at 0 (no meaning).
  * @param maxBins  Maximum number of bins, for all features.
  * @param featureArity  Map: categorical feature index to arity.
  *                      I.e., the feature takes values in {0, ..., arity - 1}.
  * @param numBins  Number of bins for each feature.
+ * @param minWeightFractionPerNode  The minimum fraction of the total sample weight that must be
+ *                                  present in a leaf node in order to be considered a valid split.
  */
 private[spark] class DecisionTreeMetadata(
     val numFeatures: Int,
     val numExamples: Long,
+    val weightedNumExamples: Double,
     val numClasses: Int,
     val maxBins: Int,
     val featureArity: Map[Int, Int],
@@ -51,6 +55,7 @@ private[spark] class DecisionTreeMetadata(
     val quantileStrategy: QuantileStrategy,
     val maxDepth: Int,
     val minInstancesPerNode: Int,
+    val minWeightFractionPerNode: Double,
     val minInfoGain: Double,
     val numTrees: Int,
     val numFeaturesPerNode: Int) extends Serializable {
@@ -67,6 +72,8 @@ private[spark] class DecisionTreeMetadata(
 
   def isContinuous(featureIndex: Int): Boolean = !featureArity.contains(featureIndex)
 
+  def minWeightPerNode: Double = minWeightFractionPerNode * weightedNumExamples
+
   /**
    * Number of splits for the given feature.
    * For unordered features, there is 1 bin per split.
@@ -104,7 +111,7 @@ private[spark] object DecisionTreeMetadata extends Logging {
    * as well as the number of splits and bins for each feature.
    */
   def buildMetadata(
-      input: RDD[LabeledPoint],
+      input: RDD[Instance],
       strategy: Strategy,
       numTrees: Int,
       featureSubsetStrategy: String): DecisionTreeMetadata = {
@@ -115,7 +122,11 @@ private[spark] object DecisionTreeMetadata extends Logging {
     }
     require(numFeatures > 0, s"DecisionTree requires number of features > 0, " +
       s"but was given an empty features vector")
-    val numExamples = input.count()
+    val (numExamples, weightSum) = input.aggregate((0L, 0.0))(
+      seqOp = (cw, instance) => (cw._1 + 1L, cw._2 + instance.weight),
+      combOp = (cw1, cw2) => (cw1._1 + cw2._1, cw1._2 + cw2._2)
+    )
+
     val numClasses = strategy.algo match {
       case Classification => strategy.numClasses
       case Regression => 0
@@ -206,17 +217,18 @@ private[spark] object DecisionTreeMetadata extends Logging {
         }
     }
 
-    new DecisionTreeMetadata(numFeatures, numExamples, numClasses, numBins.max,
-      strategy.categoricalFeaturesInfo, unorderedFeatures.toSet, numBins,
+    new DecisionTreeMetadata(numFeatures, numExamples, weightSum, numClasses,
+      numBins.max, strategy.categoricalFeaturesInfo, unorderedFeatures.toSet, numBins,
       strategy.impurity, strategy.quantileCalculationStrategy, strategy.maxDepth,
-      strategy.minInstancesPerNode, strategy.minInfoGain, numTrees, numFeaturesPerNode)
+      strategy.minInstancesPerNode, strategy.minWeightFractionPerNode, strategy.minInfoGain,
+      numTrees, numFeaturesPerNode)
   }
 
   /**
    * Version of [[DecisionTreeMetadata#buildMetadata]] for DecisionTree.
    */
   def buildMetadata(
-      input: RDD[LabeledPoint],
+      input: RDD[Instance],
       strategy: Strategy): DecisionTreeMetadata = {
     buildMetadata(input, strategy, numTrees = 1, featureSubsetStrategy = "all")
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index 822abd2d3522..fb4c321a146f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -24,10 +24,12 @@ import scala.util.Random
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.classification.DecisionTreeClassificationModel
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.Instance
+import org.apache.spark.ml.impl.Utils
 import org.apache.spark.ml.regression.DecisionTreeRegressionModel
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.util.Instrumentation
+import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
 import org.apache.spark.mllib.tree.model.ImpurityStats
@@ -90,6 +92,24 @@ private[spark] object RandomForest extends Logging with Serializable {
       strategy: OldStrategy,
       numTrees: Int,
       featureSubsetStrategy: String,
+      seed: Long): Array[DecisionTreeModel] = {
+    val instances = input.map { case LabeledPoint(label, features) =>
+      Instance(label, 1.0, features.asML)
+    }
+    run(instances, strategy, numTrees, featureSubsetStrategy, seed, None)
+  }
+
+  /**
+   * Train a random forest.
+   *
+   * @param input Training data: RDD of `Instance`
+   * @return an unweighted set of trees
+   */
+  def run(
+      input: RDD[Instance],
+      strategy: OldStrategy,
+      numTrees: Int,
+      featureSubsetStrategy: String,
       seed: Long,
       instr: Option[Instrumentation],
       prune: Boolean = true, // exposed for testing only, real trees are always pruned
@@ -101,9 +121,10 @@ private[spark] object RandomForest extends Logging with Serializable {
 
     timer.start("init")
 
-    val retaggedInput = input.retag(classOf[LabeledPoint])
+    val retaggedInput = input.retag(classOf[Instance])
     val metadata =
       DecisionTreeMetadata.buildMetadata(retaggedInput, strategy, numTrees, featureSubsetStrategy)
+
     instr match {
       case Some(instrumentation) =>
         instrumentation.logNumFeatures(metadata.numFeatures)
@@ -132,7 +153,8 @@ private[spark] object RandomForest extends Logging with Serializable {
     val withReplacement = numTrees > 1
 
     val baggedInput = BaggedPoint
-      .convertToBaggedRDD(treeInput, strategy.subsamplingRate, numTrees, withReplacement, seed)
+      .convertToBaggedRDD(treeInput, strategy.subsamplingRate, numTrees, withReplacement,
+        (tp: TreePoint) => tp.weight, seed = seed)
       .persist(StorageLevel.MEMORY_AND_DISK)
 
     // depth of the decision tree
@@ -254,19 +276,21 @@ private[spark] object RandomForest extends Logging with Serializable {
    * For unordered features, bins correspond to subsets of categories; either the left or right bin
    * for each subset is updated.
    *
-   * @param agg  Array storing aggregate calculation, with a set of sufficient statistics for
-   *             each (feature, bin).
-   * @param treePoint  Data point being aggregated.
-   * @param splits possible splits indexed (numFeatures)(numSplits)
-   * @param unorderedFeatures  Set of indices of unordered features.
-   * @param instanceWeight  Weight (importance) of instance in dataset.
+   * @param agg Array storing aggregate calculation, with a set of sufficient statistics for
+   *            each (feature, bin).
+   * @param treePoint Data point being aggregated.
+   * @param splits Possible splits indexed (numFeatures)(numSplits)
+   * @param unorderedFeatures Set of indices of unordered features.
+   * @param numSamples Number of times this instance occurs in the sample.
+   * @param sampleWeight Weight (importance) of instance in dataset.
    */
   private def mixedBinSeqOp(
       agg: DTStatsAggregator,
       treePoint: TreePoint,
       splits: Array[Array[Split]],
       unorderedFeatures: Set[Int],
-      instanceWeight: Double,
+      numSamples: Int,
+      sampleWeight: Double,
       featuresForNode: Option[Array[Int]]): Unit = {
     val numFeaturesPerNode = if (featuresForNode.nonEmpty) {
       // Use subsampled features
@@ -293,14 +317,15 @@ private[spark] object RandomForest extends Logging with Serializable {
         var splitIndex = 0
         while (splitIndex < numSplits) {
           if (featureSplits(splitIndex).shouldGoLeft(featureValue, featureSplits)) {
-            agg.featureUpdate(leftNodeFeatureOffset, splitIndex, treePoint.label, instanceWeight)
+            agg.featureUpdate(leftNodeFeatureOffset, splitIndex, treePoint.label, numSamples,
+              sampleWeight)
           }
           splitIndex += 1
         }
       } else {
         // Ordered feature
         val binIndex = treePoint.binnedFeatures(featureIndex)
-        agg.update(featureIndexIdx, binIndex, treePoint.label, instanceWeight)
+        agg.update(featureIndexIdx, binIndex, treePoint.label, numSamples, sampleWeight)
       }
       featureIndexIdx += 1
     }
@@ -314,12 +339,14 @@ private[spark] object RandomForest extends Logging with Serializable {
    * @param agg  Array storing aggregate calculation, with a set of sufficient statistics for
    *             each (feature, bin).
    * @param treePoint  Data point being aggregated.
-   * @param instanceWeight  Weight (importance) of instance in dataset.
+   * @param numSamples Number of times this instance occurs in the sample.
+   * @param sampleWeight  Weight (importance) of instance in dataset.
    */
   private def orderedBinSeqOp(
       agg: DTStatsAggregator,
       treePoint: TreePoint,
-      instanceWeight: Double,
+      numSamples: Int,
+      sampleWeight: Double,
       featuresForNode: Option[Array[Int]]): Unit = {
     val label = treePoint.label
 
@@ -329,7 +356,7 @@ private[spark] object RandomForest extends Logging with Serializable {
       var featureIndexIdx = 0
       while (featureIndexIdx < featuresForNode.get.length) {
         val binIndex = treePoint.binnedFeatures(featuresForNode.get.apply(featureIndexIdx))
-        agg.update(featureIndexIdx, binIndex, label, instanceWeight)
+        agg.update(featureIndexIdx, binIndex, label, numSamples, sampleWeight)
         featureIndexIdx += 1
       }
     } else {
@@ -338,7 +365,7 @@ private[spark] object RandomForest extends Logging with Serializable {
       var featureIndex = 0
       while (featureIndex < numFeatures) {
         val binIndex = treePoint.binnedFeatures(featureIndex)
-        agg.update(featureIndex, binIndex, label, instanceWeight)
+        agg.update(featureIndex, binIndex, label, numSamples, sampleWeight)
         featureIndex += 1
       }
     }
@@ -427,14 +454,16 @@ private[spark] object RandomForest extends Logging with Serializable {
       if (nodeInfo != null) {
         val aggNodeIndex = nodeInfo.nodeIndexInGroup
         val featuresForNode = nodeInfo.featureSubset
-        val instanceWeight = baggedPoint.subsampleWeights(treeIndex)
+        val numSamples = baggedPoint.subsampleCounts(treeIndex)
+        val sampleWeight = baggedPoint.sampleWeight
         if (metadata.unorderedFeatures.isEmpty) {
-          orderedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, instanceWeight, featuresForNode)
+          orderedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, numSamples, sampleWeight,
+            featuresForNode)
         } else {
           mixedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, splits,
-            metadata.unorderedFeatures, instanceWeight, featuresForNode)
+            metadata.unorderedFeatures, numSamples, sampleWeight, featuresForNode)
         }
-        agg(aggNodeIndex).updateParent(baggedPoint.datum.label, instanceWeight)
+        agg(aggNodeIndex).updateParent(baggedPoint.datum.label, numSamples, sampleWeight)
       }
     }
 
@@ -594,8 +623,8 @@ private[spark] object RandomForest extends Logging with Serializable {
         if (!isLeaf) {
           node.split = Some(split)
           val childIsLeaf = (LearningNode.indexToLevel(nodeIndex) + 1) == metadata.maxDepth
-          val leftChildIsLeaf = childIsLeaf || (stats.leftImpurity == 0.0)
-          val rightChildIsLeaf = childIsLeaf || (stats.rightImpurity == 0.0)
+          val leftChildIsLeaf = childIsLeaf || (math.abs(stats.leftImpurity) < Utils.EPSILON)
+          val rightChildIsLeaf = childIsLeaf || (math.abs(stats.rightImpurity) < Utils.EPSILON)
           node.leftChild = Some(LearningNode(LearningNode.leftChildIndex(nodeIndex),
             leftChildIsLeaf, ImpurityStats.getEmptyImpurityStats(stats.leftImpurityCalculator)))
           node.rightChild = Some(LearningNode(LearningNode.rightChildIndex(nodeIndex),
@@ -659,15 +688,20 @@ private[spark] object RandomForest extends Logging with Serializable {
       stats.impurity
     }
 
+    val leftRawCount = leftImpurityCalculator.rawCount
+    val rightRawCount = rightImpurityCalculator.rawCount
     val leftCount = leftImpurityCalculator.count
     val rightCount = rightImpurityCalculator.count
 
     val totalCount = leftCount + rightCount
 
-    // If left child or right child doesn't satisfy minimum instances per node,
-    // then this split is invalid, return invalid information gain stats.
-    if ((leftCount < metadata.minInstancesPerNode) ||
-      (rightCount < metadata.minInstancesPerNode)) {
+    val violatesMinInstancesPerNode = (leftRawCount < metadata.minInstancesPerNode) ||
+      (rightRawCount < metadata.minInstancesPerNode)
+    val violatesMinWeightPerNode = (leftCount < metadata.minWeightPerNode) ||
+      (rightCount < metadata.minWeightPerNode)
+    // If left child or right child doesn't satisfy minimum weight per node or minimum
+    // instances per node, then this split is invalid, return invalid information gain stats.
+    if (violatesMinInstancesPerNode || violatesMinWeightPerNode) {
       return ImpurityStats.getInvalidImpurityStats(parentImpurityCalculator)
     }
 
@@ -734,7 +768,8 @@ private[spark] object RandomForest extends Logging with Serializable {
           // Find best split.
           val (bestFeatureSplitIndex, bestFeatureGainStats) =
             Range(0, numSplits).map { case splitIdx =>
-              val leftChildStats = binAggregates.getImpurityCalculator(nodeFeatureOffset, splitIdx)
+              val leftChildStats =
+                binAggregates.getImpurityCalculator(nodeFeatureOffset, splitIdx)
               val rightChildStats =
                 binAggregates.getImpurityCalculator(nodeFeatureOffset, numSplits)
               rightChildStats.subtract(leftChildStats)
@@ -876,14 +911,14 @@ private[spark] object RandomForest extends Logging with Serializable {
    *       and for multiclass classification with a high-arity feature,
    *       there is one bin per category.
    *
-   * @param input Training data: RDD of [[LabeledPoint]]
+   * @param input Training data: RDD of [[Instance]]
    * @param metadata Learning and dataset metadata
    * @param seed random seed
    * @return Splits, an Array of [[Split]]
    *          of size (numFeatures, numSplits)
    */
   protected[tree] def findSplits(
-      input: RDD[LabeledPoint],
+      input: RDD[Instance],
       metadata: DecisionTreeMetadata,
       seed: Long): Array[Array[Split]] = {
 
@@ -898,14 +933,14 @@ private[spark] object RandomForest extends Logging with Serializable {
       logDebug("fraction of data used for calculating quantiles = " + fraction)
       input.sample(withReplacement = false, fraction, new XORShiftRandom(seed).nextInt())
     } else {
-      input.sparkContext.emptyRDD[LabeledPoint]
+      input.sparkContext.emptyRDD[Instance]
     }
 
     findSplitsBySorting(sampledInput, metadata, continuousFeatures)
   }
 
   private def findSplitsBySorting(
-      input: RDD[LabeledPoint],
+      input: RDD[Instance],
       metadata: DecisionTreeMetadata,
       continuousFeatures: IndexedSeq[Int]): Array[Array[Split]] = {
 
@@ -917,7 +952,8 @@ private[spark] object RandomForest extends Logging with Serializable {
 
       input
         .flatMap { point =>
-          continuousFeatures.map(idx => (idx, point.features(idx))).filter(_._2 != 0.0)
+          continuousFeatures.map(idx => (idx, (point.weight, point.features(idx))))
+            .filter(_._2._2 != 0.0)
         }.groupByKey(numPartitions)
         .map { case (idx, samples) =>
           val thresholds = findSplitsForContinuousFeature(samples, metadata, idx)
@@ -982,7 +1018,7 @@ private[spark] object RandomForest extends Logging with Serializable {
    *       could be different from the specified `numSplits`.
    *       The `numSplits` attribute in the `DecisionTreeMetadata` class will be set accordingly.
    *
-   * @param featureSamples feature values of each sample
+   * @param featureSamples feature values and sample weights of each sample
    * @param metadata decision tree metadata
    *                 NOTE: `metadata.numbins` will be changed accordingly
    *                       if there are not enough splits to be found
@@ -990,7 +1026,7 @@ private[spark] object RandomForest extends Logging with Serializable {
    * @return array of split thresholds
    */
   private[tree] def findSplitsForContinuousFeature(
-      featureSamples: Iterable[Double],
+      featureSamples: Iterable[(Double, Double)],
       metadata: DecisionTreeMetadata,
       featureIndex: Int): Array[Double] = {
     require(metadata.isContinuous(featureIndex),
@@ -1002,19 +1038,24 @@ private[spark] object RandomForest extends Logging with Serializable {
       val numSplits = metadata.numSplits(featureIndex)
 
       // get count for each distinct value except zero value
-      val partNumSamples = featureSamples.size
-      val partValueCountMap = scala.collection.mutable.Map[Double, Int]()
-      featureSamples.foreach { x =>
-        partValueCountMap(x) = partValueCountMap.getOrElse(x, 0) + 1
+      val partValueCountMap = mutable.Map[Double, Double]()
+      var partNumSamples = 0.0
+      var unweightedNumSamples = 0.0
+      featureSamples.foreach { case (sampleWeight, feature) =>
+        partValueCountMap(feature) = partValueCountMap.getOrElse(feature, 0.0) + sampleWeight;
+        partNumSamples += sampleWeight;
+        unweightedNumSamples += 1.0
       }
 
       // Calculate the expected number of samples for finding splits
-      val numSamples = (samplesFractionForFindSplits(metadata) * metadata.numExamples).toInt
+      val weightedNumSamples = samplesFractionForFindSplits(metadata) *
+        metadata.weightedNumExamples
       // add expected zero value count and get complete statistics
-      val valueCountMap: Map[Double, Int] = if (numSamples - partNumSamples > 0) {
-        partValueCountMap.toMap + (0.0 -> (numSamples - partNumSamples))
+      val tolerance = Utils.EPSILON * unweightedNumSamples * unweightedNumSamples
+      val valueCountMap = if (weightedNumSamples - partNumSamples > tolerance) {
+        partValueCountMap + (0.0 -> (weightedNumSamples - partNumSamples))
       } else {
-        partValueCountMap.toMap
+        partValueCountMap
       }
 
       // sort distinct values
@@ -1031,7 +1072,7 @@ private[spark] object RandomForest extends Logging with Serializable {
           .toArray
       } else {
         // stride between splits
-        val stride: Double = numSamples.toDouble / (numSplits + 1)
+        val stride: Double = weightedNumSamples / (numSplits + 1)
         logDebug("stride = " + stride)
 
         // iterate `valueCount` to find splits
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
index a6ac64a0463c..72440b2c57aa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.tree.impl
 
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.tree.{ContinuousSplit, Split}
 import org.apache.spark.rdd.RDD
 
@@ -36,10 +36,12 @@ import org.apache.spark.rdd.RDD
  * @param label  Label from LabeledPoint
  * @param binnedFeatures  Binned feature values.
  *                        Same length as LabeledPoint.features, but values are bin indices.
+ * @param weight Sample weight for this TreePoint.
  */
-private[spark] class TreePoint(val label: Double, val binnedFeatures: Array[Int])
-  extends Serializable {
-}
+private[spark] class TreePoint(
+    val label: Double,
+    val binnedFeatures: Array[Int],
+    val weight: Double) extends Serializable
 
 private[spark] object TreePoint {
 
@@ -52,7 +54,7 @@ private[spark] object TreePoint {
    * @return  TreePoint dataset representation
    */
   def convertToTreeRDD(
-      input: RDD[LabeledPoint],
+      input: RDD[Instance],
       splits: Array[Array[Split]],
       metadata: DecisionTreeMetadata): RDD[TreePoint] = {
     // Construct arrays for featureArity for efficiency in the inner loop.
@@ -82,18 +84,18 @@ private[spark] object TreePoint {
    *                      for categorical features.
    */
   private def labeledPointToTreePoint(
-      labeledPoint: LabeledPoint,
+      instance: Instance,
       thresholds: Array[Array[Double]],
       featureArity: Array[Int]): TreePoint = {
-    val numFeatures = labeledPoint.features.size
+    val numFeatures = instance.features.size
     val arr = new Array[Int](numFeatures)
     var featureIndex = 0
     while (featureIndex < numFeatures) {
       arr(featureIndex) =
-        findBin(featureIndex, labeledPoint, featureArity(featureIndex), thresholds(featureIndex))
+        findBin(featureIndex, instance, featureArity(featureIndex), thresholds(featureIndex))
       featureIndex += 1
     }
-    new TreePoint(labeledPoint.label, arr)
+    new TreePoint(instance.label, arr, instance.weight)
   }
 
   /**
@@ -106,10 +108,10 @@ private[spark] object TreePoint {
    */
   private def findBin(
       featureIndex: Int,
-      labeledPoint: LabeledPoint,
+      instance: Instance,
       featureArity: Int,
       thresholds: Array[Double]): Int = {
-    val featureValue = labeledPoint.features(featureIndex)
+    val featureValue = instance.features(featureIndex)
 
     if (featureArity == 0) {
       val idx = java.util.Arrays.binarySearch(thresholds, featureValue)
@@ -125,7 +127,7 @@ private[spark] object TreePoint {
           s"DecisionTree given invalid data:" +
             s" Feature $featureIndex is categorical with values in {0,...,${featureArity - 1}," +
             s" but a data point gives it value $featureValue.\n" +
-            "  Bad data point: " + labeledPoint.toString)
+            s"  Bad data point: $instance")
       }
       featureValue.toInt
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index 4aa4c3617e7f..51d5d5c58c57 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -282,6 +282,7 @@ private[ml] object DecisionTreeModelReadWrite {
    *
    * @param id  Index used for tree reconstruction.  Indices follow a pre-order traversal.
    * @param impurityStats  Stats array.  Impurity type is stored in metadata.
+   * @param rawCount  The unweighted number of samples falling in this node.
    * @param gain  Gain, or arbitrary value if leaf node.
    * @param leftChild  Left child index, or arbitrary value if leaf node.
    * @param rightChild  Right child index, or arbitrary value if leaf node.
@@ -292,6 +293,7 @@ private[ml] object DecisionTreeModelReadWrite {
     prediction: Double,
     impurity: Double,
     impurityStats: Array[Double],
+    rawCount: Long,
     gain: Double,
     leftChild: Int,
     rightChild: Int,
@@ -311,11 +313,12 @@ private[ml] object DecisionTreeModelReadWrite {
         val (leftNodeData, leftIdx) = build(n.leftChild, id + 1)
         val (rightNodeData, rightIdx) = build(n.rightChild, leftIdx + 1)
         val thisNodeData = NodeData(id, n.prediction, n.impurity, n.impurityStats.stats,
-          n.gain, leftNodeData.head.id, rightNodeData.head.id, SplitData(n.split))
+          n.impurityStats.rawCount, n.gain, leftNodeData.head.id, rightNodeData.head.id,
+          SplitData(n.split))
         (thisNodeData +: (leftNodeData ++ rightNodeData), rightIdx)
       case _: LeafNode =>
         (Seq(NodeData(id, node.prediction, node.impurity, node.impurityStats.stats,
-          -1.0, -1, -1, SplitData(-1, Array.empty[Double], -1))),
+          node.impurityStats.rawCount, -1.0, -1, -1, SplitData(-1, Array.empty[Double], -1))),
           id)
     }
   }
@@ -360,7 +363,8 @@ private[ml] object DecisionTreeModelReadWrite {
     // traversal, this guarantees that child nodes will be built before parent nodes.
     val finalNodes = new Array[Node](nodes.length)
     nodes.reverseIterator.foreach { case n: NodeData =>
-      val impurityStats = ImpurityCalculator.getCalculator(impurityType, n.impurityStats)
+      val impurityStats =
+        ImpurityCalculator.getCalculator(impurityType, n.impurityStats, n.rawCount)
       val node = if (n.leftChild != -1) {
         val leftChild = finalNodes(n.leftChild)
         val rightChild = finalNodes(n.rightChild)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index c06c68d44ae1..df01dc007882 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
  * Note: Marked as private and DeveloperApi since this may be made public in the future.
  */
 private[ml] trait DecisionTreeParams extends PredictorParams
-  with HasCheckpointInterval with HasSeed {
+  with HasCheckpointInterval with HasSeed with HasWeightCol {
 
   /**
    * Maximum depth of the tree (>= 0).
@@ -74,6 +74,21 @@ private[ml] trait DecisionTreeParams extends PredictorParams
     " child to have fewer than minInstancesPerNode, the split will be discarded as invalid." +
     " Should be >= 1.", ParamValidators.gtEq(1))
 
+  /**
+   * Minimum fraction of the weighted sample count that each child must have after split.
+   * If a split causes the fraction of the total weight in the left or right child to be less than
+   * minWeightFractionPerNode, the split will be discarded as invalid.
+   * Should be in the interval [0.0, 0.5).
+   * (default = 0.0)
+   * @group param
+   */
+  final val minWeightFractionPerNode: DoubleParam = new DoubleParam(this,
+    "minWeightFractionPerNode", "Minimum fraction of the weighted sample count that each child " +
+    "must have after split. If a split causes the fraction of the total weight in the left or " +
+    "right child to be less than minWeightFractionPerNode, the split will be discarded as " +
+    "invalid. Should be in interval [0.0, 0.5)",
+    ParamValidators.inRange(0.0, 0.5, lowerInclusive = true, upperInclusive = false))
+
   /**
    * Minimum information gain for a split to be considered at a tree node.
    * Should be >= 0.0.
@@ -107,8 +122,9 @@ private[ml] trait DecisionTreeParams extends PredictorParams
     " algorithm will cache node IDs for each instance. Caching can speed up training of deeper" +
     " trees.")
 
-  setDefault(maxDepth -> 5, maxBins -> 32, minInstancesPerNode -> 1, minInfoGain -> 0.0,
-    maxMemoryInMB -> 256, cacheNodeIds -> false, checkpointInterval -> 10)
+  setDefault(maxDepth -> 5, maxBins -> 32, minInstancesPerNode -> 1,
+    minWeightFractionPerNode -> 0.0, minInfoGain -> 0.0, maxMemoryInMB -> 256,
+    cacheNodeIds -> false, checkpointInterval -> 10)
 
   /** @group getParam */
   final def getMaxDepth: Int = $(maxDepth)
@@ -119,6 +135,9 @@ private[ml] trait DecisionTreeParams extends PredictorParams
   /** @group getParam */
   final def getMinInstancesPerNode: Int = $(minInstancesPerNode)
 
+  /** @group getParam */
+  final def getMinWeightFractionPerNode: Double = $(minWeightFractionPerNode)
+
   /** @group getParam */
   final def getMinInfoGain: Double = $(minInfoGain)
 
@@ -143,6 +162,7 @@ private[ml] trait DecisionTreeParams extends PredictorParams
     strategy.maxMemoryInMB = getMaxMemoryInMB
     strategy.minInfoGain = getMinInfoGain
     strategy.minInstancesPerNode = getMinInstancesPerNode
+    strategy.minWeightFractionPerNode = getMinWeightFractionPerNode
     strategy.useNodeIdCache = getCacheNodeIds
     strategy.numClasses = numClasses
     strategy.categoricalFeaturesInfo = categoricalFeatures
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index a8c5286f3dc1..94224be80752 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -23,6 +23,7 @@ import scala.util.Try
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.tree.{DecisionTreeModel => NewDTModel, TreeEnsembleParams => NewRFParams}
 import org.apache.spark.ml.tree.impl.{RandomForest => NewRandomForest}
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -91,8 +92,8 @@ private class RandomForest (
    * @return RandomForestModel that can be used for prediction.
    */
   def run(input: RDD[LabeledPoint]): RandomForestModel = {
-    val trees: Array[NewDTModel] = NewRandomForest.run(input.map(_.asML), strategy, numTrees,
-      featureSubsetStrategy, seed.toLong, None)
+    val trees: Array[NewDTModel] =
+      NewRandomForest.run(input, strategy, numTrees, featureSubsetStrategy, seed.toLong)
     new RandomForestModel(strategy.algo, trees.map(_.toOld))
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 58e8f5be7b9f..d9dcb8001340 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -80,7 +80,8 @@ class Strategy @Since("1.3.0") (
     @Since("1.0.0") @BeanProperty var maxMemoryInMB: Int = 256,
     @Since("1.2.0") @BeanProperty var subsamplingRate: Double = 1,
     @Since("1.2.0") @BeanProperty var useNodeIdCache: Boolean = false,
-    @Since("1.2.0") @BeanProperty var checkpointInterval: Int = 10) extends Serializable {
+    @Since("1.2.0") @BeanProperty var checkpointInterval: Int = 10,
+    @Since("3.0.0") @BeanProperty var minWeightFractionPerNode: Double = 0.0) extends Serializable {
 
   /**
    */
@@ -96,6 +97,31 @@ class Strategy @Since("1.3.0") (
     isMulticlassClassification && (categoricalFeaturesInfo.size > 0)
   }
 
+  // scalastyle:off argcount
+  /**
+   * Backwards compatible constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]]
+   */
+  @Since("1.0.0")
+  def this(
+      algo: Algo,
+      impurity: Impurity,
+      maxDepth: Int,
+      numClasses: Int,
+      maxBins: Int,
+      quantileCalculationStrategy: QuantileStrategy,
+      categoricalFeaturesInfo: Map[Int, Int],
+      minInstancesPerNode: Int,
+      minInfoGain: Double,
+      maxMemoryInMB: Int,
+      subsamplingRate: Double,
+      useNodeIdCache: Boolean,
+      checkpointInterval: Int) {
+    this(algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy,
+      categoricalFeaturesInfo, minInstancesPerNode, minInfoGain, maxMemoryInMB,
+      subsamplingRate, useNodeIdCache, checkpointInterval, 0.0)
+  }
+  // scalastyle:on argcount
+
   /**
    * Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]]
    */
@@ -108,7 +134,8 @@ class Strategy @Since("1.3.0") (
       maxBins: Int,
       categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]) {
     this(algo, impurity, maxDepth, numClasses, maxBins, Sort,
-      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap)
+      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap,
+      minWeightFractionPerNode = 0.0)
   }
 
   /**
@@ -171,8 +198,9 @@ class Strategy @Since("1.3.0") (
   @Since("1.2.0")
   def copy: Strategy = {
     new Strategy(algo, impurity, maxDepth, numClasses, maxBins,
-      quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain,
-      maxMemoryInMB, subsamplingRate, useNodeIdCache, checkpointInterval)
+      quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode,
+      minInfoGain, maxMemoryInMB, subsamplingRate, useNodeIdCache,
+      checkpointInterval, minWeightFractionPerNode)
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index d4448da9eef5..f01a98e74886 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -83,23 +83,29 @@ object Entropy extends Impurity {
  * @param numClasses  Number of classes for label.
  */
 private[spark] class EntropyAggregator(numClasses: Int)
-  extends ImpurityAggregator(numClasses) with Serializable {
+  extends ImpurityAggregator(numClasses + 1) with Serializable {
 
   /**
    * Update stats for one (node, feature, bin) with the given label.
    * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
    * @param offset    Start index of stats for this (node, feature, bin).
    */
-  def update(allStats: Array[Double], offset: Int, label: Double, instanceWeight: Double): Unit = {
-    if (label >= statsSize) {
+  def update(
+      allStats: Array[Double],
+      offset: Int,
+      label: Double,
+      numSamples: Int,
+      sampleWeight: Double): Unit = {
+    if (label >= numClasses) {
       throw new IllegalArgumentException(s"EntropyAggregator given label $label" +
-        s" but requires label < numClasses (= $statsSize).")
+        s" but requires label < numClasses (= ${numClasses}).")
     }
     if (label < 0) {
       throw new IllegalArgumentException(s"EntropyAggregator given label $label" +
         s"but requires label is non-negative.")
     }
-    allStats(offset + label.toInt) += instanceWeight
+    allStats(offset + label.toInt) += numSamples * sampleWeight
+    allStats(offset + statsSize - 1) += numSamples
   }
 
   /**
@@ -108,7 +114,8 @@ private[spark] class EntropyAggregator(numClasses: Int)
    * @param offset    Start index of stats for this (node, feature, bin).
    */
   def getCalculator(allStats: Array[Double], offset: Int): EntropyCalculator = {
-    new EntropyCalculator(allStats.view(offset, offset + statsSize).toArray)
+    new EntropyCalculator(allStats.view(offset, offset + statsSize - 1).toArray,
+      allStats(offset + statsSize - 1).toLong)
   }
 }
 
@@ -118,12 +125,13 @@ private[spark] class EntropyAggregator(numClasses: Int)
  * (node, feature, bin).
  * @param stats  Array of sufficient statistics for a (node, feature, bin).
  */
-private[spark] class EntropyCalculator(stats: Array[Double]) extends ImpurityCalculator(stats) {
+private[spark] class EntropyCalculator(stats: Array[Double], var rawCount: Long)
+  extends ImpurityCalculator(stats) {
 
   /**
    * Make a deep copy of this [[ImpurityCalculator]].
    */
-  def copy: EntropyCalculator = new EntropyCalculator(stats.clone())
+  def copy: EntropyCalculator = new EntropyCalculator(stats.clone(), rawCount)
 
   /**
    * Calculate the impurity from the stored sufficient statistics.
@@ -131,9 +139,9 @@ private[spark] class EntropyCalculator(stats: Array[Double]) extends ImpurityCal
   def calculate(): Double = Entropy.calculate(stats, stats.sum)
 
   /**
-   * Number of data points accounted for in the sufficient statistics.
+   * Weighted number of data points accounted for in the sufficient statistics.
    */
-  def count: Long = stats.sum.toLong
+  def count: Double = stats.sum
 
   /**
    * Prediction which should be made based on the sufficient statistics.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index c5e34ffa4f2e..913ffbbb2457 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -80,23 +80,29 @@ object Gini extends Impurity {
  * @param numClasses  Number of classes for label.
  */
 private[spark] class GiniAggregator(numClasses: Int)
-  extends ImpurityAggregator(numClasses) with Serializable {
+  extends ImpurityAggregator(numClasses + 1) with Serializable {
 
   /**
    * Update stats for one (node, feature, bin) with the given label.
    * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
    * @param offset    Start index of stats for this (node, feature, bin).
    */
-  def update(allStats: Array[Double], offset: Int, label: Double, instanceWeight: Double): Unit = {
-    if (label >= statsSize) {
+  def update(
+      allStats: Array[Double],
+      offset: Int,
+      label: Double,
+      numSamples: Int,
+      sampleWeight: Double): Unit = {
+    if (label >= numClasses) {
       throw new IllegalArgumentException(s"GiniAggregator given label $label" +
-        s" but requires label < numClasses (= $statsSize).")
+        s" but requires label < numClasses (= ${numClasses}).")
     }
     if (label < 0) {
       throw new IllegalArgumentException(s"GiniAggregator given label $label" +
-        s"but requires label is non-negative.")
+        s"but requires label to be non-negative.")
     }
-    allStats(offset + label.toInt) += instanceWeight
+    allStats(offset + label.toInt) += numSamples * sampleWeight
+    allStats(offset + statsSize - 1) += numSamples
   }
 
   /**
@@ -105,7 +111,8 @@ private[spark] class GiniAggregator(numClasses: Int)
    * @param offset    Start index of stats for this (node, feature, bin).
    */
   def getCalculator(allStats: Array[Double], offset: Int): GiniCalculator = {
-    new GiniCalculator(allStats.view(offset, offset + statsSize).toArray)
+    new GiniCalculator(allStats.view(offset, offset + statsSize - 1).toArray,
+      allStats(offset + statsSize - 1).toLong)
   }
 }
 
@@ -115,12 +122,13 @@ private[spark] class GiniAggregator(numClasses: Int)
  * (node, feature, bin).
  * @param stats  Array of sufficient statistics for a (node, feature, bin).
  */
-private[spark] class GiniCalculator(stats: Array[Double]) extends ImpurityCalculator(stats) {
+private[spark] class GiniCalculator(stats: Array[Double], var rawCount: Long)
+  extends ImpurityCalculator(stats) {
 
   /**
    * Make a deep copy of this [[ImpurityCalculator]].
    */
-  def copy: GiniCalculator = new GiniCalculator(stats.clone())
+  def copy: GiniCalculator = new GiniCalculator(stats.clone(), rawCount)
 
   /**
    * Calculate the impurity from the stored sufficient statistics.
@@ -128,9 +136,9 @@ private[spark] class GiniCalculator(stats: Array[Double]) extends ImpurityCalcul
   def calculate(): Double = Gini.calculate(stats, stats.sum)
 
   /**
-   * Number of data points accounted for in the sufficient statistics.
+   * Weighted number of data points accounted for in the sufficient statistics.
    */
-  def count: Long = stats.sum.toLong
+  def count: Double = stats.sum
 
   /**
    * Prediction which should be made based on the sufficient statistics.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index f151a6a01b65..491473490eba 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -81,7 +81,12 @@ private[spark] abstract class ImpurityAggregator(val statsSize: Int) extends Ser
    * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
    * @param offset    Start index of stats for this (node, feature, bin).
    */
-  def update(allStats: Array[Double], offset: Int, label: Double, instanceWeight: Double): Unit
+  def update(
+      allStats: Array[Double],
+      offset: Int,
+      label: Double,
+      numSamples: Int,
+      sampleWeight: Double): Unit
 
   /**
    * Get an [[ImpurityCalculator]] for a (node, feature, bin).
@@ -122,6 +127,7 @@ private[spark] abstract class ImpurityCalculator(val stats: Array[Double]) exten
       stats(i) += other.stats(i)
       i += 1
     }
+    rawCount += other.rawCount
     this
   }
 
@@ -139,13 +145,19 @@ private[spark] abstract class ImpurityCalculator(val stats: Array[Double]) exten
       stats(i) -= other.stats(i)
       i += 1
     }
+    rawCount -= other.rawCount
     this
   }
 
   /**
-   * Number of data points accounted for in the sufficient statistics.
+   * Weighted number of data points accounted for in the sufficient statistics.
    */
-  def count: Long
+  def count: Double
+
+  /**
+   * Raw number of data points accounted for in the sufficient statistics.
+   */
+  var rawCount: Long
 
   /**
    * Prediction which should be made based on the sufficient statistics.
@@ -185,11 +197,14 @@ private[spark] object ImpurityCalculator {
    * Create an [[ImpurityCalculator]] instance of the given impurity type and with
    * the given stats.
    */
-  def getCalculator(impurity: String, stats: Array[Double]): ImpurityCalculator = {
+  def getCalculator(
+      impurity: String,
+      stats: Array[Double],
+      rawCount: Long): ImpurityCalculator = {
     impurity.toLowerCase(Locale.ROOT) match {
-      case "gini" => new GiniCalculator(stats)
-      case "entropy" => new EntropyCalculator(stats)
-      case "variance" => new VarianceCalculator(stats)
+      case "gini" => new GiniCalculator(stats, rawCount)
+      case "entropy" => new EntropyCalculator(stats, rawCount)
+      case "variance" => new VarianceCalculator(stats, rawCount)
       case _ =>
         throw new IllegalArgumentException(
           s"ImpurityCalculator builder did not recognize impurity type: $impurity")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index c9bf0db4de3c..a07b919271f7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -66,21 +66,32 @@ object Variance extends Impurity {
 
 /**
  * Class for updating views of a vector of sufficient statistics,
- * in order to compute impurity from a sample.
+ * in order to compute impurity from a sample. For variance, we track:
+ *   - sum(w_i)
+ *   - sum(w_i * y_i)
+ *   - sum(w_i * y_i * y_i)
+ *   - count(y_i)
  * Note: Instances of this class do not hold the data; they operate on views of the data.
  */
 private[spark] class VarianceAggregator()
-  extends ImpurityAggregator(statsSize = 3) with Serializable {
+  extends ImpurityAggregator(statsSize = 4) with Serializable {
 
   /**
    * Update stats for one (node, feature, bin) with the given label.
    * @param allStats  Flat stats array, with stats for this (node, feature, bin) contiguous.
    * @param offset    Start index of stats for this (node, feature, bin).
    */
-  def update(allStats: Array[Double], offset: Int, label: Double, instanceWeight: Double): Unit = {
+  def update(
+      allStats: Array[Double],
+      offset: Int,
+      label: Double,
+      numSamples: Int,
+      sampleWeight: Double): Unit = {
+    val instanceWeight = numSamples * sampleWeight
     allStats(offset) += instanceWeight
     allStats(offset + 1) += instanceWeight * label
     allStats(offset + 2) += instanceWeight * label * label
+    allStats(offset + 3) += numSamples
   }
 
   /**
@@ -89,7 +100,8 @@ private[spark] class VarianceAggregator()
    * @param offset    Start index of stats for this (node, feature, bin).
    */
   def getCalculator(allStats: Array[Double], offset: Int): VarianceCalculator = {
-    new VarianceCalculator(allStats.view(offset, offset + statsSize).toArray)
+    new VarianceCalculator(allStats.view(offset, offset + statsSize - 1).toArray,
+      allStats(offset + statsSize - 1).toLong)
   }
 }
 
@@ -99,7 +111,8 @@ private[spark] class VarianceAggregator()
  * (node, feature, bin).
  * @param stats  Array of sufficient statistics for a (node, feature, bin).
  */
-private[spark] class VarianceCalculator(stats: Array[Double]) extends ImpurityCalculator(stats) {
+private[spark] class VarianceCalculator(stats: Array[Double], var rawCount: Long)
+  extends ImpurityCalculator(stats) {
 
   require(stats.length == 3,
     s"VarianceCalculator requires sufficient statistics array stats to be of length 3," +
@@ -108,7 +121,7 @@ private[spark] class VarianceCalculator(stats: Array[Double]) extends ImpurityCa
   /**
    * Make a deep copy of this [[ImpurityCalculator]].
    */
-  def copy: VarianceCalculator = new VarianceCalculator(stats.clone())
+  def copy: VarianceCalculator = new VarianceCalculator(stats.clone(), rawCount)
 
   /**
    * Calculate the impurity from the stored sufficient statistics.
@@ -116,9 +129,9 @@ private[spark] class VarianceCalculator(stats: Array[Double]) extends ImpurityCa
   def calculate(): Double = Variance.calculate(stats(0), stats(1), stats(2))
 
   /**
-   * Number of data points accounted for in the sufficient statistics.
+   * Weighted number of data points accounted for in the sufficient statistics.
    */
-  def count: Long = stats(0).toLong
+  def count: Double = stats(0)
 
   /**
    * Prediction which should be made based on the sufficient statistics.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index 2930f4900d50..433c78237024 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -42,6 +42,8 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
   private var continuousDataPointsForMulticlassRDD: RDD[LabeledPoint] = _
   private var categoricalDataPointsForMulticlassForOrderedFeaturesRDD: RDD[LabeledPoint] = _
 
+  private val seed = 42
+
   override def beforeAll() {
     super.beforeAll()
     categoricalDataPointsRDD =
@@ -250,7 +252,7 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
 
     MLTestingUtils.checkCopyAndUids(dt, newTree)
 
-    testTransformer[(Vector, Double)](newData, newTree,
+    testTransformer[(Vector, Double, Double)](newData, newTree,
       "prediction", "rawPrediction", "probability") {
       case Row(pred: Double, rawPred: Vector, probPred: Vector) =>
         assert(pred === rawPred.argmax,
@@ -327,6 +329,49 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
     dt.fit(df)
   }
 
+  test("training with sample weights") {
+    val df = {
+      val nPoints = 100
+      val coefficients = Array(
+        -0.57997, 0.912083, -0.371077,
+        -0.16624, -0.84355, -0.048509)
+
+      val xMean = Array(5.843, 3.057)
+      val xVariance = Array(0.6856, 0.1899)
+
+      val testData = LogisticRegressionSuite.generateMultinomialLogisticInput(
+        coefficients, xMean, xVariance, addIntercept = true, nPoints, seed)
+
+      sc.parallelize(testData, 4).toDF()
+    }
+    val numClasses = 3
+    val predEquals = (x: Double, y: Double) => x == y
+    // (impurity, maxDepth)
+    val testParams = Seq(
+      ("gini", 10),
+      ("entropy", 10),
+      ("gini", 5)
+    )
+    for ((impurity, maxDepth) <- testParams) {
+      val estimator = new DecisionTreeClassifier()
+        .setMaxDepth(maxDepth)
+        .setSeed(seed)
+        .setMinWeightFractionPerNode(0.049)
+        .setImpurity(impurity)
+
+      MLTestingUtils.testArbitrarilyScaledWeights[DecisionTreeClassificationModel,
+        DecisionTreeClassifier](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, predEquals, 0.7))
+      MLTestingUtils.testOutliersWithSmallWeights[DecisionTreeClassificationModel,
+        DecisionTreeClassifier](df.as[LabeledPoint], estimator,
+        numClasses, MLTestingUtils.modelPredictionEquals(df, predEquals, 0.8),
+        outlierRatio = 2)
+      MLTestingUtils.testOversamplingVsWeighting[DecisionTreeClassificationModel,
+        DecisionTreeClassifier](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, predEquals, 0.7), seed)
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // Tests of model save/load
   /////////////////////////////////////////////////////////////////////////////
@@ -350,7 +395,6 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
       TreeTests.setMetadata(rdd, Map(0 -> 2, 1 -> 3), numClasses = 2)
     testEstimatorAndModelReadWrite(dt, categoricalData, allParamSettings,
       allParamSettings, checkModelData)
-
     // Continuous splits with tree depth 2
     val continuousData: DataFrame =
       TreeTests.setMetadata(rdd, Map.empty[Int, Int], numClasses = 2)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index ba4a9cf08278..027583ffc60b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.classification
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.tree.LeafNode
@@ -141,7 +141,7 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
 
     MLTestingUtils.checkCopyAndUids(rf, model)
 
-    testTransformer[(Vector, Double)](df, model, "prediction", "rawPrediction",
+    testTransformer[(Vector, Double, Double)](df, model, "prediction", "rawPrediction",
       "probability") { case Row(pred: Double, rawPred: Vector, probPred: Vector) =>
       assert(pred === rawPred.argmax,
         s"Expected prediction $pred but calculated ${rawPred.argmax} from rawPrediction.")
@@ -180,7 +180,6 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
   /////////////////////////////////////////////////////////////////////////////
   // Tests of feature importance
   /////////////////////////////////////////////////////////////////////////////
-
   test("Feature importance with toy data") {
     val numClasses = 2
     val rf = new RandomForestClassifier()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 29a438396516..38bfa6626e3c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.regression
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.tree.impl.TreeTests
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
@@ -26,6 +26,7 @@ import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint}
 import org.apache.spark.mllib.tree.{DecisionTree => OldDecisionTree,
   DecisionTreeSuite => OldDecisionTreeSuite}
+import org.apache.spark.mllib.util.LinearDataGenerator
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row}
 
@@ -35,11 +36,17 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
   import testImplicits._
 
   private var categoricalDataPointsRDD: RDD[LabeledPoint] = _
+  private var linearRegressionData: DataFrame = _
+
+  private val seed = 42
 
   override def beforeAll() {
     super.beforeAll()
     categoricalDataPointsRDD =
       sc.parallelize(OldDecisionTreeSuite.generateCategoricalDataPoints().map(_.asML))
+    linearRegressionData = sc.parallelize(LinearDataGenerator.generateLinearInput(
+      intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3),
+      xVariance = Array(0.7, 1.2), nPoints = 1000, seed, eps = 0.5), 2).map(_.asML).toDF()
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -88,7 +95,7 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
     val df = TreeTests.setMetadata(categoricalDataPointsRDD, categoricalFeatures, numClasses = 0)
     val model = dt.fit(df)
 
-    testTransformer[(Vector, Double)](df, model, "features", "variance") {
+    testTransformer[(Vector, Double, Double)](df, model, "features", "variance") {
       case Row(features: Vector, variance: Double) =>
         val expectedVariance = model.rootNode.predictImpl(features).impurityStats.calculate()
         assert(variance === expectedVariance,
@@ -101,7 +108,7 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
       .setMaxBins(6)
       .setSeed(0)
 
-    testTransformerByGlobalCheckFunc[(Vector, Double)](varianceDF, dt.fit(varianceDF),
+    testTransformerByGlobalCheckFunc[(Vector, Double, Double)](varianceDF, dt.fit(varianceDF),
       "variance") { case rows: Seq[Row] =>
       val calculatedVariances = rows.map(_.getDouble(0))
 
@@ -159,6 +166,28 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
       }
   }
 
+  test("training with sample weights") {
+    val df = linearRegressionData
+    val numClasses = 0
+    val testParams = Seq(5, 10)
+    for (maxDepth <- testParams) {
+      val estimator = new DecisionTreeRegressor()
+        .setMaxDepth(maxDepth)
+        .setMinWeightFractionPerNode(0.05)
+        .setSeed(123)
+      MLTestingUtils.testArbitrarilyScaledWeights[DecisionTreeRegressionModel,
+        DecisionTreeRegressor](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.1, 0.99))
+      MLTestingUtils.testOutliersWithSmallWeights[DecisionTreeRegressionModel,
+        DecisionTreeRegressor](df.as[LabeledPoint], estimator, numClasses,
+        MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.1, 0.99),
+        outlierRatio = 2)
+      MLTestingUtils.testOversamplingVsWeighting[DecisionTreeRegressionModel,
+        DecisionTreeRegressor](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.01, 1.0), seed)
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // Tests of model save/load
   /////////////////////////////////////////////////////////////////////////////
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index 90ceb7dee38f..76532897dff2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -891,6 +891,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
         .setStandardization(standardization)
         .setRegParam(regParam)
         .setElasticNetParam(elasticNetParam)
+        .setSolver(solver)
       MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
         datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals)
       MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
index 77ab3d8bb75f..63985482795b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.tree.impl
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.mllib.tree.EnsembleTestHelper
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 
@@ -26,12 +27,16 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
  */
 class BaggedPointSuite extends SparkFunSuite with MLlibTestSparkContext  {
 
-  test("BaggedPoint RDD: without subsampling") {
-    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+  test("BaggedPoint RDD: without subsampling with weights") {
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000).map { lp =>
+      Instance(lp.label, 0.5, lp.features.asML)
+    }
     val rdd = sc.parallelize(arr)
-    val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, 1, false, 42)
+    val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, 1, false,
+      (instance: Instance) => instance.weight * 4.0, seed = 42)
     baggedRDD.collect().foreach { baggedPoint =>
-      assert(baggedPoint.subsampleWeights.size == 1 && baggedPoint.subsampleWeights(0) == 1)
+      assert(baggedPoint.subsampleCounts.size === 1 && baggedPoint.subsampleCounts(0) === 1)
+      assert(baggedPoint.sampleWeight === 2.0)
     }
   }
 
@@ -40,13 +45,17 @@ class BaggedPointSuite extends SparkFunSuite with MLlibTestSparkContext  {
     val (expectedMean, expectedStddev) = (1.0, 1.0)
 
     val seeds = Array(123, 5354, 230, 349867, 23987)
-    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000).map(_.asML)
     val rdd = sc.parallelize(arr)
     seeds.foreach { seed =>
-      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, true, seed)
-      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, true,
+        (_: LabeledPoint) => 2.0, seed)
+      val subsampleCounts: Array[Array[Double]] =
+        baggedRDD.map(_.subsampleCounts.map(_.toDouble)).collect()
       EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
         expectedStddev, epsilon = 0.01)
+      // should ignore weight function for now
+      assert(baggedRDD.collect().forall(_.sampleWeight === 1.0))
     }
   }
 
@@ -59,8 +68,10 @@ class BaggedPointSuite extends SparkFunSuite with MLlibTestSparkContext  {
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
     val rdd = sc.parallelize(arr)
     seeds.foreach { seed =>
-      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, true, seed)
-      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      val baggedRDD =
+        BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, true, seed = seed)
+      val subsampleCounts: Array[Array[Double]] =
+        baggedRDD.map(_.subsampleCounts.map(_.toDouble)).collect()
       EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
         expectedStddev, epsilon = 0.01)
     }
@@ -71,13 +82,17 @@ class BaggedPointSuite extends SparkFunSuite with MLlibTestSparkContext  {
     val (expectedMean, expectedStddev) = (1.0, 0)
 
     val seeds = Array(123, 5354, 230, 349867, 23987)
-    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000).map(_.asML)
     val rdd = sc.parallelize(arr)
     seeds.foreach { seed =>
-      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, false, seed)
-      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, false,
+        (_: LabeledPoint) => 2.0, seed)
+      val subsampleCounts: Array[Array[Double]] =
+        baggedRDD.map(_.subsampleCounts.map(_.toDouble)).collect()
       EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
         expectedStddev, epsilon = 0.01)
+      // should ignore weight function for now
+      assert(baggedRDD.collect().forall(_.sampleWeight === 1.0))
     }
   }
 
@@ -90,8 +105,10 @@ class BaggedPointSuite extends SparkFunSuite with MLlibTestSparkContext  {
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
     val rdd = sc.parallelize(arr)
     seeds.foreach { seed =>
-      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, false, seed)
-      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, false,
+        seed = seed)
+      val subsampleCounts: Array[Array[Double]] =
+        baggedRDD.map(_.subsampleCounts.map(_.toDouble)).collect()
       EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
         expectedStddev, epsilon = 0.01)
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
index 5caa5117d575..b89cc6053b06 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
@@ -19,10 +19,11 @@ package org.apache.spark.ml.tree.impl
 
 import scala.annotation.tailrec
 import scala.collection.mutable
+import scala.language.implicitConversions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.classification.DecisionTreeClassificationModel
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.util.TestingUtils._
@@ -46,7 +47,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
   /////////////////////////////////////////////////////////////////////////////
 
   test("Binary classification with continuous features: split calculation") {
-    val arr = OldDTSuite.generateOrderedLabeledPointsWithLabel1().map(_.asML)
+    val arr = OldDTSuite.generateOrderedLabeledPointsWithLabel1().map(_.asML.toInstance)
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new OldStrategy(OldAlgo.Classification, Gini, 3, 2, 100)
@@ -58,7 +59,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
   }
 
   test("Binary classification with binary (ordered) categorical features: split calculation") {
-    val arr = OldDTSuite.generateCategoricalDataPoints().map(_.asML)
+    val arr = OldDTSuite.generateCategoricalDataPoints().map(_.asML.toInstance)
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new OldStrategy(OldAlgo.Classification, Gini, maxDepth = 2, numClasses = 2,
@@ -75,7 +76,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("Binary classification with 3-ary (ordered) categorical features," +
     " with no samples for one category: split calculation") {
-    val arr = OldDTSuite.generateCategoricalDataPoints().map(_.asML)
+    val arr = OldDTSuite.generateCategoricalDataPoints().map(_.asML.toInstance)
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new OldStrategy(OldAlgo.Classification, Gini, maxDepth = 2, numClasses = 2,
@@ -93,12 +94,12 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
   test("find splits for a continuous feature") {
     // find splits for normal case
     {
-      val fakeMetadata = new DecisionTreeMetadata(1, 200000, 0, 0,
+      val fakeMetadata = new DecisionTreeMetadata(1, 200000, 200000.0, 0, 0,
         Map(), Set(),
         Array(6), Gini, QuantileStrategy.Sort,
-        0, 0, 0.0, 0, 0
+        0, 0, 0.0, 0.0, 0, 0
       )
-      val featureSamples = Array.fill(10000)(math.random).filter(_ != 0.0)
+      val featureSamples = Array.fill(10000)((1.0, math.random)).filter(_._2 != 0.0)
       val splits = RandomForest.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
       assert(splits.length === 5)
       assert(fakeMetadata.numSplits(0) === 5)
@@ -109,15 +110,16 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
 
     // SPARK-16957: Use midpoints for split values.
     {
-      val fakeMetadata = new DecisionTreeMetadata(1, 8, 0, 0,
+      val fakeMetadata = new DecisionTreeMetadata(1, 8, 8.0, 0, 0,
         Map(), Set(),
         Array(3), Gini, QuantileStrategy.Sort,
-        0, 0, 0.0, 0, 0
+        0, 0, 0.0, 0.0, 0, 0
       )
 
       // possibleSplits <= numSplits
       {
-        val featureSamples = Array(0, 1, 0, 0, 1, 0, 1, 1).map(_.toDouble).filter(_ != 0.0)
+        val featureSamples = Array(0, 1, 0, 0, 1, 0, 1, 1)
+          .map(x => (1.0, x.toDouble)).filter(_._2 != 0.0)
         val splits = RandomForest.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
         val expectedSplits = Array((0.0 + 1.0) / 2)
         assert(splits === expectedSplits)
@@ -125,7 +127,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
 
       // possibleSplits > numSplits
       {
-        val featureSamples = Array(0, 0, 1, 1, 2, 2, 3, 3).map(_.toDouble).filter(_ != 0.0)
+        val featureSamples = Array(0, 0, 1, 1, 2, 2, 3, 3)
+          .map(x => (1.0, x.toDouble)).filter(_._2 != 0.0)
         val splits = RandomForest.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
         val expectedSplits = Array((0.0 + 1.0) / 2, (2.0 + 3.0) / 2)
         assert(splits === expectedSplits)
@@ -135,12 +138,12 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     // find splits should not return identical splits
     // when there are not enough split candidates, reduce the number of splits in metadata
     {
-      val fakeMetadata = new DecisionTreeMetadata(1, 12, 0, 0,
+      val fakeMetadata = new DecisionTreeMetadata(1, 12, 12.0, 0, 0,
         Map(), Set(),
         Array(5), Gini, QuantileStrategy.Sort,
-        0, 0, 0.0, 0, 0
+        0, 0, 0.0, 0.0, 0, 0
       )
-      val featureSamples = Array(1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3).map(_.toDouble)
+      val featureSamples = Array(1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3).map(x => (1.0, x.toDouble))
       val splits = RandomForest.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
       val expectedSplits = Array((1.0 + 2.0) / 2, (2.0 + 3.0) / 2)
       assert(splits === expectedSplits)
@@ -150,13 +153,13 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
 
     // find splits when most samples close to the minimum
     {
-      val fakeMetadata = new DecisionTreeMetadata(1, 18, 0, 0,
+      val fakeMetadata = new DecisionTreeMetadata(1, 18, 18.0, 0, 0,
         Map(), Set(),
         Array(3), Gini, QuantileStrategy.Sort,
-        0, 0, 0.0, 0, 0
+        0, 0, 0.0, 0.0, 0, 0
       )
-      val featureSamples = Array(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5)
-        .map(_.toDouble)
+      val featureSamples =
+        Array(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5).map(x => (1.0, x.toDouble))
       val splits = RandomForest.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
       val expectedSplits = Array((2.0 + 3.0) / 2, (3.0 + 4.0) / 2)
       assert(splits === expectedSplits)
@@ -164,37 +167,55 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
 
     // find splits when most samples close to the maximum
     {
-      val fakeMetadata = new DecisionTreeMetadata(1, 17, 0, 0,
+      val fakeMetadata = new DecisionTreeMetadata(1, 17, 17.0, 0, 0,
         Map(), Set(),
         Array(2), Gini, QuantileStrategy.Sort,
-        0, 0, 0.0, 0, 0
+        0, 0, 0.0, 0.0, 0, 0
       )
-      val featureSamples = Array(0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
-        .map(_.toDouble).filter(_ != 0.0)
+      val featureSamples =
+        Array(0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2).map(x => (1.0, x.toDouble))
       val splits = RandomForest.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
       val expectedSplits = Array((1.0 + 2.0) / 2)
       assert(splits === expectedSplits)
     }
 
-    // find splits for constant feature
+    // find splits for arbitrarily scaled data
     {
-      val fakeMetadata = new DecisionTreeMetadata(1, 3, 0, 0,
+      val fakeMetadata = new DecisionTreeMetadata(1, 0, 0.0, 0, 0,
+        Map(), Set(),
+        Array(6), Gini, QuantileStrategy.Sort,
+        0, 0, 0.0, 0.0, 0, 0
+      )
+      val featureSamplesUnitWeight = Array.fill(10)((1.0, math.random))
+      val featureSamplesSmallWeight = featureSamplesUnitWeight.map { case (w, x) => (w * 0.001, x)}
+      val featureSamplesLargeWeight = featureSamplesUnitWeight.map { case (w, x) => (w * 1000, x)}
+      val splitsUnitWeight = RandomForest
+        .findSplitsForContinuousFeature(featureSamplesUnitWeight, fakeMetadata, 0)
+      val splitsSmallWeight = RandomForest
+        .findSplitsForContinuousFeature(featureSamplesSmallWeight, fakeMetadata, 0)
+      val splitsLargeWeight = RandomForest
+        .findSplitsForContinuousFeature(featureSamplesLargeWeight, fakeMetadata, 0)
+      assert(splitsUnitWeight === splitsSmallWeight)
+      assert(splitsUnitWeight === splitsLargeWeight)
+    }
+
+    // find splits when most weight is close to the minimum
+    {
+      val fakeMetadata = new DecisionTreeMetadata(1, 0, 0.0, 0, 0,
         Map(), Set(),
         Array(3), Gini, QuantileStrategy.Sort,
-        0, 0, 0.0, 0, 0
+        0, 0, 0.0, 0.0, 0, 0
       )
-      val featureSamples = Array(0, 0, 0).map(_.toDouble).filter(_ != 0.0)
-      val featureSamplesEmpty = Array.empty[Double]
+      val featureSamples = Array((10, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6)).map {
+        case (w, x) => (w.toDouble, x.toDouble)
+      }
       val splits = RandomForest.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0)
-      assert(splits === Array.empty[Double])
-      val splitsEmpty =
-        RandomForest.findSplitsForContinuousFeature(featureSamplesEmpty, fakeMetadata, 0)
-      assert(splitsEmpty === Array.empty[Double])
+      assert(splits === Array(1.5, 2.5, 3.5, 4.5, 5.5))
     }
   }
 
   test("train with empty arrays") {
-    val lp = LabeledPoint(1.0, Vectors.dense(Array.empty[Double]))
+    val lp = LabeledPoint(1.0, Vectors.dense(Array.empty[Double])).toInstance
     val data = Array.fill(5)(lp)
     val rdd = sc.parallelize(data)
 
@@ -209,8 +230,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
   }
 
   test("train with constant features") {
-    val lp = LabeledPoint(1.0, Vectors.dense(0.0, 0.0, 0.0))
-    val data = Array.fill(5)(lp)
+    val instance = LabeledPoint(1.0, Vectors.dense(0.0, 0.0, 0.0)).toInstance
+    val data = Array.fill(5)(instance)
     val rdd = sc.parallelize(data)
     val strategy = new OldStrategy(
           OldAlgo.Classification,
@@ -222,7 +243,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     val Array(tree) = RandomForest.run(rdd, strategy, 1, "all", 42L, instr = None)
     assert(tree.rootNode.impurity === -1.0)
     assert(tree.depth === 0)
-    assert(tree.rootNode.prediction === lp.label)
+    assert(tree.rootNode.prediction === instance.label)
 
     // Test with no categorical features
     val strategy2 = new OldStrategy(
@@ -233,11 +254,11 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     val Array(tree2) = RandomForest.run(rdd, strategy2, 1, "all", 42L, instr = None)
     assert(tree2.rootNode.impurity === -1.0)
     assert(tree2.depth === 0)
-    assert(tree2.rootNode.prediction === lp.label)
+    assert(tree2.rootNode.prediction === instance.label)
   }
 
   test("Multiclass classification with unordered categorical features: split calculations") {
-    val arr = OldDTSuite.generateCategoricalDataPoints().map(_.asML)
+    val arr = OldDTSuite.generateCategoricalDataPoints().map(_.asML.toInstance)
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new OldStrategy(
@@ -278,7 +299,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
   }
 
   test("Multiclass classification with ordered categorical features: split calculations") {
-    val arr = OldDTSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures().map(_.asML)
+    val arr = OldDTSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures()
+      .map(_.asML.toInstance)
     assert(arr.length === 3000)
     val rdd = sc.parallelize(arr)
     val strategy = new OldStrategy(OldAlgo.Classification, Gini, maxDepth = 2, numClasses = 100,
@@ -310,7 +332,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       LabeledPoint(1.0, Vectors.dense(0.0, 1.0, 1.0)),
       LabeledPoint(0.0, Vectors.dense(2.0, 0.0, 0.0)),
       LabeledPoint(1.0, Vectors.dense(0.0, 2.0, 1.0)))
-    val input = sc.parallelize(arr)
+    val input = sc.parallelize(arr.map(_.toInstance))
 
     val strategy = new OldStrategy(algo = OldAlgo.Classification, impurity = Gini, maxDepth = 1,
       numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3))
@@ -352,7 +374,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       LabeledPoint(1.0, Vectors.dense(0.0, 1.0, 1.0)),
       LabeledPoint(0.0, Vectors.dense(2.0, 0.0, 0.0)),
       LabeledPoint(1.0, Vectors.dense(0.0, 2.0, 1.0)))
-    val input = sc.parallelize(arr)
+    val input = sc.parallelize(arr.map(_.toInstance))
 
     val strategy = new OldStrategy(algo = OldAlgo.Classification, impurity = Gini, maxDepth = 5,
       numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3))
@@ -404,7 +426,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       LabeledPoint(0.0, Vectors.dense(2.0)),
       LabeledPoint(0.0, Vectors.dense(2.0)),
       LabeledPoint(1.0, Vectors.dense(2.0)))
-    val input = sc.parallelize(arr)
+    val input = sc.parallelize(arr.map(_.toInstance))
 
     // Must set maxBins s.t. the feature will be treated as an ordered categorical feature.
     val strategy = new OldStrategy(algo = OldAlgo.Classification, impurity = Gini, maxDepth = 1,
@@ -424,7 +446,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
   }
 
   test("Second level node building with vs. without groups") {
-    val arr = OldDTSuite.generateOrderedLabeledPoints().map(_.asML)
+    val arr = OldDTSuite.generateOrderedLabeledPoints().map(_.asML.toInstance)
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     // For tree with 1 group
@@ -468,7 +490,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
   def binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy: OldStrategy) {
     val numFeatures = 50
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures, 1000)
-    val rdd = sc.parallelize(arr).map(_.asML)
+    val rdd = sc.parallelize(arr).map(_.asML.toInstance)
 
     // Select feature subset for top nodes.  Return true if OK.
     def checkFeatureSubsetStrategy(
@@ -581,16 +603,16 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       left2       parent
                 left  right
      */
-    val leftImp = new GiniCalculator(Array(3.0, 2.0, 1.0))
+    val leftImp = new GiniCalculator(Array(3.0, 2.0, 1.0), 6L)
     val left = new LeafNode(0.0, leftImp.calculate(), leftImp)
 
-    val rightImp = new GiniCalculator(Array(1.0, 2.0, 5.0))
+    val rightImp = new GiniCalculator(Array(1.0, 2.0, 5.0), 8L)
     val right = new LeafNode(2.0, rightImp.calculate(), rightImp)
 
     val parent = TreeTests.buildParentNode(left, right, new ContinuousSplit(0, 0.5))
     val parentImp = parent.impurityStats
 
-    val left2Imp = new GiniCalculator(Array(1.0, 6.0, 1.0))
+    val left2Imp = new GiniCalculator(Array(1.0, 6.0, 1.0), 8L)
     val left2 = new LeafNode(0.0, left2Imp.calculate(), left2Imp)
 
     val grandParent = TreeTests.buildParentNode(left2, parent, new ContinuousSplit(1, 1.0))
@@ -647,12 +669,12 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     // feature_0 = 0 improves the impurity measure, despite the prediction will always be 0
     // in both branches.
     val arr = Array(
-      LabeledPoint(0.0, Vectors.dense(0.0, 1.0)),
-      LabeledPoint(1.0, Vectors.dense(0.0, 1.0)),
-      LabeledPoint(0.0, Vectors.dense(0.0, 0.0)),
-      LabeledPoint(1.0, Vectors.dense(1.0, 0.0)),
-      LabeledPoint(0.0, Vectors.dense(1.0, 0.0)),
-      LabeledPoint(1.0, Vectors.dense(1.0, 1.0))
+      Instance(0.0, 1.0, Vectors.dense(0.0, 1.0)),
+      Instance(1.0, 1.0, Vectors.dense(0.0, 1.0)),
+      Instance(0.0, 1.0, Vectors.dense(0.0, 0.0)),
+      Instance(1.0, 1.0, Vectors.dense(1.0, 0.0)),
+      Instance(0.0, 1.0, Vectors.dense(1.0, 0.0)),
+      Instance(1.0, 1.0, Vectors.dense(1.0, 1.0))
     )
     val rdd = sc.parallelize(arr)
 
@@ -677,13 +699,13 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     // feature_1 = 1 improves the impurity measure, despite the prediction will always be 0.5
     // in both branches.
     val arr = Array(
-      LabeledPoint(0.0, Vectors.dense(0.0, 1.0)),
-      LabeledPoint(1.0, Vectors.dense(0.0, 1.0)),
-      LabeledPoint(0.0, Vectors.dense(0.0, 0.0)),
-      LabeledPoint(0.0, Vectors.dense(1.0, 0.0)),
-      LabeledPoint(1.0, Vectors.dense(1.0, 1.0)),
-      LabeledPoint(0.0, Vectors.dense(1.0, 1.0)),
-      LabeledPoint(0.5, Vectors.dense(1.0, 1.0))
+      Instance(0.0, 1.0, Vectors.dense(0.0, 1.0)),
+      Instance(1.0, 1.0, Vectors.dense(0.0, 1.0)),
+      Instance(0.0, 1.0, Vectors.dense(0.0, 0.0)),
+      Instance(0.0, 1.0, Vectors.dense(1.0, 0.0)),
+      Instance(1.0, 1.0, Vectors.dense(1.0, 1.0)),
+      Instance(0.0, 1.0, Vectors.dense(1.0, 1.0)),
+      Instance(0.5, 1.0, Vectors.dense(1.0, 1.0))
     )
     val rdd = sc.parallelize(arr)
 
@@ -700,6 +722,56 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(unprunedTree.numNodes === 5)
     assert(RandomForestSuite.getSumLeafCounters(List(prunedTree.rootNode)) === arr.size)
   }
+
+  test("weights at arbitrary scale") {
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(3, 10)
+    val rddWithUnitWeights = sc.parallelize(arr.map(_.asML.toInstance))
+    val rddWithSmallWeights = rddWithUnitWeights.map { inst =>
+      Instance(inst.label, 0.001, inst.features)
+    }
+    val rddWithBigWeights = rddWithUnitWeights.map { inst =>
+      Instance(inst.label, 1000, inst.features)
+    }
+    val strategy = new OldStrategy(OldAlgo.Classification, Gini, 3, 2)
+    val unitWeightTrees = RandomForest.run(rddWithUnitWeights, strategy, 3, "all", 42L, None)
+
+    val smallWeightTrees = RandomForest.run(rddWithSmallWeights, strategy, 3, "all", 42L, None)
+    unitWeightTrees.zip(smallWeightTrees).foreach { case (unitTree, smallWeightTree) =>
+      TreeTests.checkEqual(unitTree, smallWeightTree)
+    }
+
+    val bigWeightTrees = RandomForest.run(rddWithBigWeights, strategy, 3, "all", 42L, None)
+    unitWeightTrees.zip(bigWeightTrees).foreach { case (unitTree, bigWeightTree) =>
+      TreeTests.checkEqual(unitTree, bigWeightTree)
+    }
+  }
+
+  test("minWeightFraction and minInstancesPerNode") {
+    val data = Array(
+      Instance(0.0, 1.0, Vectors.dense(0.0)),
+      Instance(0.0, 1.0, Vectors.dense(0.0)),
+      Instance(0.0, 1.0, Vectors.dense(0.0)),
+      Instance(0.0, 1.0, Vectors.dense(0.0)),
+      Instance(1.0, 0.1, Vectors.dense(1.0))
+    )
+    val rdd = sc.parallelize(data)
+    val strategy = new OldStrategy(OldAlgo.Classification, Gini, 3, 2,
+      minWeightFractionPerNode = 0.5)
+    val Array(tree1) = RandomForest.run(rdd, strategy, 1, "all", 42L, None)
+    assert(tree1.depth === 0)
+
+    strategy.minWeightFractionPerNode = 0.0
+    val Array(tree2) = RandomForest.run(rdd, strategy, 1, "all", 42L, None)
+    assert(tree2.depth === 1)
+
+    strategy.minInstancesPerNode = 2
+    val Array(tree3) = RandomForest.run(rdd, strategy, 1, "all", 42L, None)
+    assert(tree3.depth === 0)
+
+    strategy.minInstancesPerNode = 1
+    val Array(tree4) = RandomForest.run(rdd, strategy, 1, "all", 42L, None)
+    assert(tree4.depth === 1)
+  }
 }
 
 private object RandomForestSuite {
@@ -717,7 +789,7 @@ private object RandomForestSuite {
     else {
       nodes.head match {
         case i: InternalNode => getSumLeafCounters(i.leftChild :: i.rightChild :: nodes.tail, acc)
-        case l: LeafNode => getSumLeafCounters(nodes.tail, acc + l.impurityStats.count)
+        case l: LeafNode => getSumLeafCounters(nodes.tail, acc + l.impurityStats.rawCount)
       }
     }
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
index 3a44e79291cc..f6fe25bc9fe8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
@@ -28,7 +28,7 @@ class TreePointSuite extends SparkFunSuite {
 
     val ser = new KryoSerializer(conf).newInstance()
 
-    val point = new TreePoint(1.0, Array(1, 2, 3))
+    val point = new TreePoint(1.0, Array(1, 2, 3), 1.0)
     val point2 = ser.deserialize[TreePoint](ser.serialize(point))
     assert(point.label === point2.label)
     assert(point.binnedFeatures === point2.binnedFeatures)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
index ae9794b87b08..f3096e28d3d4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
@@ -18,13 +18,15 @@
 package org.apache.spark.ml.tree.impl
 
 import scala.collection.JavaConverters._
+import scala.util.Random
 
 import org.apache.spark.{SparkContext, SparkFunSuite}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.ml.attribute.{AttributeGroup, NominalAttribute, NumericAttribute}
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.tree._
+import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, SparkSession}
 
@@ -32,6 +34,7 @@ private[ml] object TreeTests extends SparkFunSuite {
 
   /**
    * Convert the given data to a DataFrame, and set the features and label metadata.
+   *
    * @param data  Dataset.  Categorical features and labels must already have 0-based indices.
    *              This must be non-empty.
    * @param categoricalFeatures  Map: categorical feature index to number of distinct values
@@ -39,16 +42,22 @@ private[ml] object TreeTests extends SparkFunSuite {
    * @return DataFrame with metadata
    */
   def setMetadata(
-      data: RDD[LabeledPoint],
+      data: RDD[_],
       categoricalFeatures: Map[Int, Int],
       numClasses: Int): DataFrame = {
+    val dataOfInstance: RDD[Instance] = data.map {
+      _ match {
+        case instance: Instance => instance
+        case labeledPoint: LabeledPoint => labeledPoint.toInstance
+      }
+    }
     val spark = SparkSession.builder()
       .sparkContext(data.sparkContext)
       .getOrCreate()
     import spark.implicits._
 
-    val df = data.toDF()
-    val numFeatures = data.first().features.size
+    val df = dataOfInstance.toDF()
+    val numFeatures = dataOfInstance.first().features.size
     val featuresAttributes = Range(0, numFeatures).map { feature =>
       if (categoricalFeatures.contains(feature)) {
         NominalAttribute.defaultAttr.withIndex(feature).withNumValues(categoricalFeatures(feature))
@@ -64,7 +73,7 @@ private[ml] object TreeTests extends SparkFunSuite {
     }
     val labelMetadata = labelAttribute.toMetadata()
     df.select(df("features").as("features", featuresMetadata),
-      df("label").as("label", labelMetadata))
+      df("label").as("label", labelMetadata), df("weight"))
   }
 
   /**
@@ -80,6 +89,7 @@ private[ml] object TreeTests extends SparkFunSuite {
 
   /**
    * Set label metadata (particularly the number of classes) on a DataFrame.
+   *
    * @param data  Dataset.  Categorical features and labels must already have 0-based indices.
    *              This must be non-empty.
    * @param numClasses  Number of classes label can take. If 0, mark as continuous.
@@ -124,8 +134,8 @@ private[ml] object TreeTests extends SparkFunSuite {
    *       make mistakes such as creating loops of Nodes.
    */
   private def checkEqual(a: Node, b: Node): Unit = {
-    assert(a.prediction === b.prediction)
-    assert(a.impurity === b.impurity)
+    assert(a.prediction ~== b.prediction absTol 1e-8)
+    assert(a.impurity ~== b.impurity absTol 1e-8)
     (a, b) match {
       case (aye: InternalNode, bee: InternalNode) =>
         assert(aye.split === bee.split)
@@ -156,6 +166,7 @@ private[ml] object TreeTests extends SparkFunSuite {
   /**
    * Helper method for constructing a tree for testing.
    * Given left, right children, construct a parent node.
+   *
    * @param split  Split for parent node
    * @return  Parent node with children attached
    */
@@ -163,8 +174,8 @@ private[ml] object TreeTests extends SparkFunSuite {
     val leftImp = left.impurityStats
     val rightImp = right.impurityStats
     val parentImp = leftImp.copy.add(rightImp)
-    val leftWeight = leftImp.count / parentImp.count.toDouble
-    val rightWeight = rightImp.count / parentImp.count.toDouble
+    val leftWeight = leftImp.count / parentImp.count
+    val rightWeight = rightImp.count / parentImp.count
     val gain = parentImp.calculate() -
       (leftWeight * leftImp.calculate() + rightWeight * rightImp.calculate())
     val pred = parentImp.predict
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
index 91a8b14625a8..2c73700ee962 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
@@ -23,7 +23,7 @@ import org.apache.spark.ml.evaluation.Evaluator
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasWeightCol}
+import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.recommendation.{ALS, ALSModel}
 import org.apache.spark.ml.tree.impl.TreeTests
 import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
@@ -205,8 +205,8 @@ object MLTestingUtils extends SparkFunSuite {
       seed: Long): Unit = {
     val (overSampledData, weightedData) = genEquivalentOversampledAndWeightedInstances(
       data, seed)
-    val weightedModel = estimator.set(estimator.weightCol, "weight").fit(weightedData)
     val overSampledModel = estimator.set(estimator.weightCol, "").fit(overSampledData)
+    val weightedModel = estimator.set(estimator.weightCol, "weight").fit(weightedData)
     modelEquals(weightedModel, overSampledModel)
   }
 
@@ -228,7 +228,8 @@ object MLTestingUtils extends SparkFunSuite {
         List.fill(outlierRatio)(Instance(outlierLabel, 0.0001, f)) ++ List(Instance(l, w, f))
     }
     val trueModel = estimator.set(estimator.weightCol, "").fit(data)
-    val outlierModel = estimator.set(estimator.weightCol, "weight").fit(outlierDS)
+    val outlierModel = estimator.set(estimator.weightCol, "weight")
+      .fit(outlierDS)
     modelEquals(trueModel, outlierModel)
   }
 
@@ -241,7 +242,7 @@ object MLTestingUtils extends SparkFunSuite {
       estimator: E with HasWeightCol,
       modelEquals: (M, M) => Unit): Unit = {
     estimator.set(estimator.weightCol, "weight")
-    val models = Seq(0.001, 1.0, 1000.0).map { w =>
+    val models = Seq(0.01, 1.0, 1000.0).map { w =>
       val df = data.withColumn("weight", lit(w))
       estimator.fit(df)
     }
@@ -268,4 +269,20 @@ object MLTestingUtils extends SparkFunSuite {
     assert(newDatasetF.schema(featuresColName).dataType.equals(new ArrayType(FloatType, false)))
     (newDataset, newDatasetD, newDatasetF)
   }
+
+  def modelPredictionEquals[M <: PredictionModel[_, M]](
+      data: DataFrame,
+      compareFunc: (Double, Double) => Boolean,
+      fractionInTol: Double)(
+      model1: M,
+      model2: M): Unit = {
+    val pred1 = model1.transform(data).select(model1.getPredictionCol).collect()
+    val pred2 = model2.transform(data).select(model2.getPredictionCol).collect()
+    val inTol = pred1.zip(pred2).count { case (p1, p2) =>
+      val x = p1.getDouble(0)
+      val y = p2.getDouble(0)
+      compareFunc(x, y)
+    }
+    assert(inTol / pred1.length.toDouble >= fractionInTol)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index 34bc303ac607..8378a599362a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -73,7 +73,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
 
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
 
@@ -100,7 +100,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
       maxDepth = 2,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 2, 1 -> 2))
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
 
@@ -116,7 +116,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(Classification, Gini, maxDepth = 3,
       numClasses = 2, maxBins = 100)
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
 
@@ -133,7 +133,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(Classification, Gini, maxDepth = 3,
       numClasses = 2, maxBins = 100)
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
 
@@ -150,7 +150,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(Classification, Entropy, maxDepth = 3,
       numClasses = 2, maxBins = 100)
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
 
@@ -167,7 +167,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(Classification, Entropy, maxDepth = 3,
       numClasses = 2, maxBins = 100)
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
 
@@ -183,7 +183,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
       numClasses = 3, categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(strategy.isMulticlassClassification)
     assert(metadata.isUnordered(featureIndex = 0))
     assert(metadata.isUnordered(featureIndex = 1))
@@ -240,7 +240,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
       numClasses = 3, maxBins = maxBins,
       categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
     assert(strategy.isMulticlassClassification)
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(metadata.isUnordered(featureIndex = 0))
     assert(metadata.isUnordered(featureIndex = 1))
 
@@ -288,7 +288,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
       numClasses = 3, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3))
     assert(strategy.isMulticlassClassification)
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(metadata.isUnordered(featureIndex = 0))
 
     val model = DecisionTree.train(rdd, strategy)
@@ -310,7 +310,7 @@ class DecisionTreeSuite extends SparkFunSuite with MLlibTestSparkContext {
       numClasses = 3, maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 10, 1 -> 10))
     assert(strategy.isMulticlassClassification)
-    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML), strategy)
+    val metadata = DecisionTreeMetadata.buildMetadata(rdd.map(_.asML.toInstance), strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala
index d0f02dd966bd..078c6e6fff9f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala
@@ -18,23 +18,63 @@
 package org.apache.spark.mllib.tree
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.mllib.tree.impurity.{EntropyAggregator, GiniAggregator}
+import org.apache.spark.ml.util.TestingUtils._
+import org.apache.spark.mllib.tree.impurity._
 
 /**
  * Test suites for `GiniAggregator` and `EntropyAggregator`.
  */
 class ImpuritySuite extends SparkFunSuite {
+
+  private val seed = 42
+
   test("Gini impurity does not support negative labels") {
     val gini = new GiniAggregator(2)
     intercept[IllegalArgumentException] {
-      gini.update(Array(0.0, 1.0, 2.0), 0, -1, 0.0)
+      gini.update(Array(0.0, 1.0, 2.0), 0, -1, 3, 0.0)
     }
   }
 
   test("Entropy does not support negative labels") {
     val entropy = new EntropyAggregator(2)
     intercept[IllegalArgumentException] {
-      entropy.update(Array(0.0, 1.0, 2.0), 0, -1, 0.0)
+      entropy.update(Array(0.0, 1.0, 2.0), 0, -1, 3, 0.0)
+    }
+  }
+
+  test("Classification impurities are insensitive to scaling") {
+    val rng = new scala.util.Random(seed)
+    val weightedCounts = Array.fill(5)(rng.nextDouble())
+    val smallWeightedCounts = weightedCounts.map(_ * 0.0001)
+    val largeWeightedCounts = weightedCounts.map(_ * 10000)
+    Seq(Gini, Entropy).foreach { impurity =>
+      val impurity1 = impurity.calculate(weightedCounts, weightedCounts.sum)
+      assert(impurity.calculate(smallWeightedCounts, smallWeightedCounts.sum)
+        ~== impurity1 relTol 0.005)
+      assert(impurity.calculate(largeWeightedCounts, largeWeightedCounts.sum)
+        ~== impurity1 relTol 0.005)
     }
   }
+
+  test("Regression impurities are insensitive to scaling") {
+    def computeStats(samples: Seq[Double], weights: Seq[Double]): (Double, Double, Double) = {
+      samples.zip(weights).foldLeft((0.0, 0.0, 0.0)) { case ((wn, wy, wyy), (y, w)) =>
+        (wn + w, wy + w * y, wyy + w * y * y)
+      }
+    }
+    val rng = new scala.util.Random(seed)
+    val samples = Array.fill(10)(rng.nextDouble())
+    val _weights = Array.fill(10)(rng.nextDouble())
+    val smallWeights = _weights.map(_ * 0.0001)
+    val largeWeights = _weights.map(_ * 10000)
+    val (count, sum, sumSquared) = computeStats(samples, _weights)
+    Seq(Variance).foreach { impurity =>
+      val impurity1 = impurity.calculate(count, sum, sumSquared)
+      val (smallCount, smallSum, smallSumSquared) = computeStats(samples, smallWeights)
+      val (largeCount, largeSum, largeSumSquared) = computeStats(samples, largeWeights)
+      assert(impurity.calculate(smallCount, smallSum, smallSumSquared) ~== impurity1 relTol 0.005)
+      assert(impurity.calculate(largeCount, largeSum, largeSumSquared) ~== impurity1 relTol 0.005)
+    }
+  }
+
 }

From d97481ba17cf8b401bc19299bcc8945942cbeba2 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 25 Jan 2019 10:04:21 +0800
Subject: [PATCH 0358/1072] [MINOR] Add Jenkins and AppVeyor badges at
 README.md

## What changes were proposed in this pull request?

This PR adds Jenkins build and AppVeyor build badges

Take a look at https://github.com/apache/spark/blob/cc24b9e0b35e947e97b47ca9d1b350bb37148b41/README.md

## How was this patch tested?

Manually tested by `grip`.

Closes #23629 from HyukjinKwon/minor-badges.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index fd8c7f656968..f3b90ceb923e 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 # Apache Spark
 
+[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7)
+[![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
+
 Spark is a fast and general cluster computing system for Big Data. It provides
 high-level APIs in Scala, Java, Python, and R, and an optimized engine that
 supports general computation graphs for data analysis. It also supports a

From d2ff10cbe1c22f919a7b1999fe54db13f4178979 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 25 Jan 2019 10:11:49 +0800
Subject: [PATCH 0359/1072] [SPARK-23674][ML] Adds Spark ML Events to
 Instrumentation

## What changes were proposed in this pull request?

This PR proposes to add ML events to Instrumentation, and use it in Pipeline so that other developers can track and add some actions for them.

## Introduction

ML events (like SQL events) can be quite useful when people want to track and make some actions for corresponding ML operations. For instance, I have been working on integrating
Apache Spark with [Apache Atlas](https://atlas.apache.org/QuickStart.html). With some custom changes with this PR, I can visualise ML pipeline as below:

![spark_ml_streaming_lineage](https://user-images.githubusercontent.com/6477701/49682779-394bca80-faf5-11e8-85b8-5fae28b784b3.png)

Another good thing that might have to be considered is, that we can interact this with other SQL/Streaming events. For instance, where the input `Dataset` is originated. For instance, with current Apache Spark, I can visualise SQL operations as below:

![screen shot 2018-12-10 at 9 41 36 am](https://user-images.githubusercontent.com/6477701/49706269-d9bdfe00-fc5f-11e8-943a-3309d1856ba5.png)

I think we can combine those existing lineages together to easily understand where the data comes and goes. Currently, ML side is a hole so the lineages can't be connected for the current Apache Spark ..

To add up, I think it's not to mention how useful it is to track the SQL/Streaming operations. Likewise, I would like to propose ML events as well (as lowest stability `Unstable` APIs for now - no guarantee about stability).

## Implementation Details

### Sends event (but not expose ML specific listener)

**`mllib/src/main/scala/org/apache/spark/ml/events.scala`**

```scala
Unstable
case class ...StartEvent(caller, input)
Unstable
case class ...EndEvent(caller, output)

trait MLEvents {
  // Wrappers to send events:
  // def with...Event(body) = {
  //   body()
  //   SparkContext.getOrCreate().listenerBus.post(event)
  // }
}
```

This trait is used by `Instrumentation`.

```scala
class Instrumentation ... with MLEvents {
```

and used as below:

```scala
instrumented { instr =>
  instr.with...Event(...) {
    ...
  }
}
```

This way mimics both:

**1. Catalog events (see `org/apache/spark/sql/catalyst/catalog/events.scala`)**

- This allows a Catalog specific listener to be added `ExternalCatalogEventListener`

- It's implemented in a way of wrapping whole `ExternalCatalog` named `ExternalCatalogWithListener`
which delegates the operations to `ExternalCatalog`

This is not quite possible in this case because most of instances (like `Pipeline`) will be directly created in most of cases. We might be able to do that via extending `ListenerBus` for all possible instances but IMHO it's too invasive. Also, exposing another ML specific listener sounds a bit too much at this stage. Therefore, I simply borrowed file name and structures here

**2. SQL execution events (see `org/apache/spark/sql/execution/SQLExecution.scala`)**

- Add an object that wraps a body to send events

Current apporach is rather close to this. It has a `with...` wrapper to send events. I borrowed this approach to be consistent.

## Usage

It needs a custom implementation for a query listener. For instance,

with the custom listener below:

```scala
class CustomMLListener extends SparkListener
  def onOtherEvents(e) = e match {
    case e: MLEvent => // do something
    case _ => // pass
  }
}
```

There are two (existing) ways to use this.

```scala
spark.sparkContext.addSparkListener(new CustomMLListener)
```

```bash
spark-submit ...\
  --conf spark.extraListeners=CustomMLListener\
  ...
```

It's also similar with other existing implementation in SQL side.

## Target users

1. I think someone in general would likely utilise this feature like other event listeners. At least, I can see some interests going on outside.

    - SQL Listener
      - https://stackoverflow.com/questions/46409339/spark-listener-to-an-sql-query
      - http://apache-spark-user-list.1001560.n3.nabble.com/spark-sql-Custom-Query-Execution-listener-via-conf-properties-td30979.html

    - Streaming Query Listener
      - https://jhui.github.io/2017/01/15/Apache-Spark-Streaming/
      -  http://apache-spark-developers-list.1001551.n3.nabble.com/Structured-Streaming-with-Watermark-td25413.html#a25416

2. Someone would likely run this via Atlas. The plugin mirror intentionally is exposed at [spark-atlas-connector](https://github.com/hortonworks-spark/spark-atlas-connector) so that anyone could do something about lineage and governance in Atlas. I'm trying to show integrated lineages in Apache Spark but this is a missing hole.

## How was this patch tested?

Manually tested and unit tests were added.

Closes #23263 from HyukjinKwon/SPARK-23674-1.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/ml/Pipeline.scala  |  48 ++--
 .../scala/org/apache/spark/ml/events.scala    | 137 ++++++++++
 .../spark/ml/util/Instrumentation.scala       |   9 +-
 .../org/apache/spark/ml/util/ReadWrite.scala  |  11 +-
 .../org/apache/spark/ml/MLEventsSuite.scala   | 255 ++++++++++++++++++
 5 files changed, 436 insertions(+), 24 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/events.scala
 create mode 100644 mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 103082b7b976..69a4dbef138e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -31,6 +31,7 @@ import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.param.{Param, ParamMap, Params}
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.types.StructType
 
@@ -132,7 +133,8 @@ class Pipeline @Since("1.4.0") (
    * @return fitted pipeline
    */
   @Since("2.0.0")
-  override def fit(dataset: Dataset[_]): PipelineModel = {
+  override def fit(dataset: Dataset[_]): PipelineModel = instrumented(
+      instr => instr.withFitEvent(this, dataset) {
     transformSchema(dataset.schema, logging = true)
     val theStages = $(stages)
     // Search for the last estimator.
@@ -150,7 +152,7 @@ class Pipeline @Since("1.4.0") (
       if (index <= indexOfLastEstimator) {
         val transformer = stage match {
           case estimator: Estimator[_] =>
-            estimator.fit(curDataset)
+            instr.withFitEvent(estimator, curDataset)(estimator.fit(curDataset))
           case t: Transformer =>
             t
           case _ =>
@@ -158,7 +160,8 @@ class Pipeline @Since("1.4.0") (
               s"Does not support stage $stage of type ${stage.getClass}")
         }
         if (index < indexOfLastEstimator) {
-          curDataset = transformer.transform(curDataset)
+          curDataset = instr.withTransformEvent(
+            transformer, curDataset)(transformer.transform(curDataset))
         }
         transformers += transformer
       } else {
@@ -167,7 +170,7 @@ class Pipeline @Since("1.4.0") (
     }
 
     new PipelineModel(uid, transformers.toArray).setParent(this)
-  }
+  })
 
   @Since("1.4.0")
   override def copy(extra: ParamMap): Pipeline = {
@@ -197,10 +200,12 @@ object Pipeline extends MLReadable[Pipeline] {
   @Since("1.6.0")
   override def load(path: String): Pipeline = super.load(path)
 
-  private[Pipeline] class PipelineWriter(instance: Pipeline) extends MLWriter {
+  private[Pipeline] class PipelineWriter(val instance: Pipeline) extends MLWriter {
 
     SharedReadWrite.validateStages(instance.getStages)
 
+    override def save(path: String): Unit =
+      instrumented(_.withSaveInstanceEvent(this, path)(super.save(path)))
     override protected def saveImpl(path: String): Unit =
       SharedReadWrite.saveImpl(instance, instance.getStages, sc, path)
   }
@@ -210,10 +215,10 @@ object Pipeline extends MLReadable[Pipeline] {
     /** Checked against metadata when loading model */
     private val className = classOf[Pipeline].getName
 
-    override def load(path: String): Pipeline = {
+    override def load(path: String): Pipeline = instrumented(_.withLoadInstanceEvent(this, path) {
       val (uid: String, stages: Array[PipelineStage]) = SharedReadWrite.load(className, sc, path)
       new Pipeline(uid).setStages(stages)
-    }
+    })
   }
 
   /**
@@ -243,7 +248,7 @@ object Pipeline extends MLReadable[Pipeline] {
         instance: Params,
         stages: Array[PipelineStage],
         sc: SparkContext,
-        path: String): Unit = {
+        path: String): Unit = instrumented { instr =>
       val stageUids = stages.map(_.uid)
       val jsonParams = List("stageUids" -> parse(compact(render(stageUids.toSeq))))
       DefaultParamsWriter.saveMetadata(instance, path, sc, paramMap = Some(jsonParams))
@@ -251,8 +256,9 @@ object Pipeline extends MLReadable[Pipeline] {
       // Save stages
       val stagesDir = new Path(path, "stages").toString
       stages.zipWithIndex.foreach { case (stage, idx) =>
-        stage.asInstanceOf[MLWritable].write.save(
-          getStagePath(stage.uid, idx, stages.length, stagesDir))
+        val writer = stage.asInstanceOf[MLWritable].write
+        val stagePath = getStagePath(stage.uid, idx, stages.length, stagesDir)
+        instr.withSaveInstanceEvent(writer, stagePath)(writer.save(stagePath))
       }
     }
 
@@ -263,7 +269,7 @@ object Pipeline extends MLReadable[Pipeline] {
     def load(
         expectedClassName: String,
         sc: SparkContext,
-        path: String): (String, Array[PipelineStage]) = {
+        path: String): (String, Array[PipelineStage]) = instrumented { instr =>
       val metadata = DefaultParamsReader.loadMetadata(path, sc, expectedClassName)
 
       implicit val format = DefaultFormats
@@ -271,7 +277,8 @@ object Pipeline extends MLReadable[Pipeline] {
       val stageUids: Array[String] = (metadata.params \ "stageUids").extract[Seq[String]].toArray
       val stages: Array[PipelineStage] = stageUids.zipWithIndex.map { case (stageUid, idx) =>
         val stagePath = SharedReadWrite.getStagePath(stageUid, idx, stageUids.length, stagesDir)
-        DefaultParamsReader.loadParamsInstance[PipelineStage](stagePath, sc)
+        val reader = DefaultParamsReader.loadParamsInstanceReader[PipelineStage](stagePath, sc)
+        instr.withLoadInstanceEvent(reader, stagePath)(reader.load(stagePath))
       }
       (metadata.uid, stages)
     }
@@ -301,10 +308,12 @@ class PipelineModel private[ml] (
   }
 
   @Since("2.0.0")
-  override def transform(dataset: Dataset[_]): DataFrame = {
+  override def transform(dataset: Dataset[_]): DataFrame = instrumented(instr =>
+      instr.withTransformEvent(this, dataset) {
     transformSchema(dataset.schema, logging = true)
-    stages.foldLeft(dataset.toDF)((cur, transformer) => transformer.transform(cur))
-  }
+    stages.foldLeft(dataset.toDF)((cur, transformer) =>
+      instr.withTransformEvent(transformer, cur)(transformer.transform(cur)))
+  })
 
   @Since("1.2.0")
   override def transformSchema(schema: StructType): StructType = {
@@ -331,10 +340,12 @@ object PipelineModel extends MLReadable[PipelineModel] {
   @Since("1.6.0")
   override def load(path: String): PipelineModel = super.load(path)
 
-  private[PipelineModel] class PipelineModelWriter(instance: PipelineModel) extends MLWriter {
+  private[PipelineModel] class PipelineModelWriter(val instance: PipelineModel) extends MLWriter {
 
     SharedReadWrite.validateStages(instance.stages.asInstanceOf[Array[PipelineStage]])
 
+    override def save(path: String): Unit =
+      instrumented(_.withSaveInstanceEvent(this, path)(super.save(path)))
     override protected def saveImpl(path: String): Unit = SharedReadWrite.saveImpl(instance,
       instance.stages.asInstanceOf[Array[PipelineStage]], sc, path)
   }
@@ -344,7 +355,8 @@ object PipelineModel extends MLReadable[PipelineModel] {
     /** Checked against metadata when loading model */
     private val className = classOf[PipelineModel].getName
 
-    override def load(path: String): PipelineModel = {
+    override def load(path: String): PipelineModel = instrumented(_.withLoadInstanceEvent(
+        this, path) {
       val (uid: String, stages: Array[PipelineStage]) = SharedReadWrite.load(className, sc, path)
       val transformers = stages map {
         case stage: Transformer => stage
@@ -352,6 +364,6 @@ object PipelineModel extends MLReadable[PipelineModel] {
           s" was not a Transformer.  Bad stage ${other.uid} of type ${other.getClass}")
       }
       new PipelineModel(uid, transformers)
-    }
+    })
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/events.scala b/mllib/src/main/scala/org/apache/spark/ml/events.scala
new file mode 100644
index 000000000000..c51600fcca46
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/events.scala
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Unstable
+import org.apache.spark.internal.Logging
+import org.apache.spark.ml.util.{MLReader, MLWriter}
+import org.apache.spark.scheduler.SparkListenerEvent
+import org.apache.spark.sql.{DataFrame, Dataset}
+
+/**
+ * Event emitted by ML operations. Events are either fired before and/or
+ * after each operation (the event should document this).
+ *
+ * @note This is supported via [[Pipeline]] and [[PipelineModel]].
+ */
+@Unstable
+sealed trait MLEvent extends SparkListenerEvent
+
+/**
+ * Event fired before `Transformer.transform`.
+ */
+@Unstable
+case class TransformStart(transformer: Transformer, input: Dataset[_]) extends MLEvent
+/**
+ * Event fired after `Transformer.transform`.
+ */
+@Unstable
+case class TransformEnd(transformer: Transformer, output: Dataset[_]) extends MLEvent
+
+/**
+ * Event fired before `Estimator.fit`.
+ */
+@Unstable
+case class FitStart[M <: Model[M]](estimator: Estimator[M], dataset: Dataset[_]) extends MLEvent
+/**
+ * Event fired after `Estimator.fit`.
+ */
+@Unstable
+case class FitEnd[M <: Model[M]](estimator: Estimator[M], model: M) extends MLEvent
+
+/**
+ * Event fired before `MLReader.load`.
+ */
+@Unstable
+case class LoadInstanceStart[T](reader: MLReader[T], path: String) extends MLEvent
+/**
+ * Event fired after `MLReader.load`.
+ */
+@Unstable
+case class LoadInstanceEnd[T](reader: MLReader[T], instance: T) extends MLEvent
+
+/**
+ * Event fired before `MLWriter.save`.
+ */
+@Unstable
+case class SaveInstanceStart(writer: MLWriter, path: String) extends MLEvent
+/**
+ * Event fired after `MLWriter.save`.
+ */
+@Unstable
+case class SaveInstanceEnd(writer: MLWriter, path: String) extends MLEvent
+
+/**
+ * A small trait that defines some methods to send [[org.apache.spark.ml.MLEvent]].
+ */
+private[ml] trait MLEvents extends Logging {
+
+  private def listenerBus = SparkContext.getOrCreate().listenerBus
+
+  /**
+   * Log [[MLEvent]] to send. By default, it emits a debug-level log.
+   */
+  def logEvent(event: MLEvent): Unit = logDebug(s"Sending an MLEvent: $event")
+
+  def withFitEvent[M <: Model[M]](
+      estimator: Estimator[M], dataset: Dataset[_])(func: => M): M = {
+    val startEvent = FitStart(estimator, dataset)
+    logEvent(startEvent)
+    listenerBus.post(startEvent)
+    val model: M = func
+    val endEvent = FitEnd(estimator, model)
+    logEvent(endEvent)
+    listenerBus.post(endEvent)
+    model
+  }
+
+  def withTransformEvent(
+      transformer: Transformer, input: Dataset[_])(func: => DataFrame): DataFrame = {
+    val startEvent = TransformStart(transformer, input)
+    logEvent(startEvent)
+    listenerBus.post(startEvent)
+    val output: DataFrame = func
+    val endEvent = TransformEnd(transformer, output)
+    logEvent(endEvent)
+    listenerBus.post(endEvent)
+    output
+  }
+
+  def withLoadInstanceEvent[T](reader: MLReader[T], path: String)(func: => T): T = {
+    val startEvent = LoadInstanceStart(reader, path)
+    logEvent(startEvent)
+    listenerBus.post(startEvent)
+    val instance: T = func
+    val endEvent = LoadInstanceEnd(reader, instance)
+    logEvent(endEvent)
+    listenerBus.post(endEvent)
+    instance
+  }
+
+  def withSaveInstanceEvent(writer: MLWriter, path: String)(func: => Unit): Unit = {
+    listenerBus.post(SaveInstanceEnd(writer, path))
+    val startEvent = SaveInstanceStart(writer, path)
+    logEvent(startEvent)
+    listenerBus.post(startEvent)
+    func
+    val endEvent = SaveInstanceEnd(writer, path)
+    logEvent(endEvent)
+    listenerBus.post(endEvent)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
index 49654918bd8f..780650dd1ddb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
@@ -27,17 +27,18 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.PipelineStage
+import org.apache.spark.ml.{MLEvents, PipelineStage}
 import org.apache.spark.ml.param.{Param, Params}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Dataset
 import org.apache.spark.util.Utils
 
 /**
- * A small wrapper that defines a training session for an estimator, and some methods to log
- * useful information during this session.
+ * A small wrapper that defines a training session for an estimator, some methods to log
+ * useful information during this session, and some methods to send
+ * [[org.apache.spark.ml.MLEvent]].
  */
-private[spark] class Instrumentation private () extends Logging {
+private[spark] class Instrumentation private () extends Logging with MLEvents {
 
   private val id = UUID.randomUUID()
   private val shortId = id.toString.take(8)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index fbc7be25a564..ce8f3464b0a5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -624,10 +624,17 @@ private[ml] object DefaultParamsReader {
    * Load a `Params` instance from the given path, and return it.
    * This assumes the instance implements [[MLReadable]].
    */
-  def loadParamsInstance[T](path: String, sc: SparkContext): T = {
+  def loadParamsInstance[T](path: String, sc: SparkContext): T =
+    loadParamsInstanceReader(path, sc).load(path)
+
+  /**
+   * Load a `Params` instance reader from the given path, and return it.
+   * This assumes the instance implements [[MLReadable]].
+   */
+  def loadParamsInstanceReader[T](path: String, sc: SparkContext): MLReader[T] = {
     val metadata = DefaultParamsReader.loadMetadata(path, sc)
     val cls = Utils.classForName(metadata.className)
-    cls.getMethod("read").invoke(null).asInstanceOf[MLReader[T]].load(path)
+    cls.getMethod("read").invoke(null).asInstanceOf[MLReader[T]]
   }
 }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
new file mode 100644
index 000000000000..0a87328de643
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.collection.mutable
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
+import org.apache.hadoop.fs.Path
+import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.Mockito.when
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.concurrent.Eventually
+import org.scalatest.mockito.MockitoSugar.mock
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.util.{DefaultParamsReader, DefaultParamsWriter, MLWriter}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
+import org.apache.spark.sql._
+
+
+class MLEventsSuite
+  extends SparkFunSuite with BeforeAndAfterEach with MLlibTestSparkContext with Eventually {
+
+  private val events = mutable.ArrayBuffer.empty[MLEvent]
+  private val listener: SparkListener = new SparkListener {
+    override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
+      case e: MLEvent => events.append(e)
+      case _ =>
+    }
+  }
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.sparkContext.addSparkListener(listener)
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      events.clear()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      if (spark != null) {
+        spark.sparkContext.removeSparkListener(listener)
+      }
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  abstract class MyModel extends Model[MyModel]
+
+  test("pipeline fit events") {
+    val estimator1 = mock[Estimator[MyModel]]
+    val model1 = mock[MyModel]
+    val transformer1 = mock[Transformer]
+    val estimator2 = mock[Estimator[MyModel]]
+    val model2 = mock[MyModel]
+
+    when(estimator1.copy(any[ParamMap])).thenReturn(estimator1)
+    when(model1.copy(any[ParamMap])).thenReturn(model1)
+    when(transformer1.copy(any[ParamMap])).thenReturn(transformer1)
+    when(estimator2.copy(any[ParamMap])).thenReturn(estimator2)
+    when(model2.copy(any[ParamMap])).thenReturn(model2)
+
+    val dataset1 = mock[DataFrame]
+    val dataset2 = mock[DataFrame]
+    val dataset3 = mock[DataFrame]
+    val dataset4 = mock[DataFrame]
+    val dataset5 = mock[DataFrame]
+
+    when(dataset1.toDF).thenReturn(dataset1)
+    when(dataset2.toDF).thenReturn(dataset2)
+    when(dataset3.toDF).thenReturn(dataset3)
+    when(dataset4.toDF).thenReturn(dataset4)
+    when(dataset5.toDF).thenReturn(dataset5)
+
+    when(estimator1.fit(meq(dataset1))).thenReturn(model1)
+    when(model1.transform(meq(dataset1))).thenReturn(dataset2)
+    when(model1.parent).thenReturn(estimator1)
+    when(transformer1.transform(meq(dataset2))).thenReturn(dataset3)
+    when(estimator2.fit(meq(dataset3))).thenReturn(model2)
+
+    val pipeline = new Pipeline()
+      .setStages(Array(estimator1, transformer1, estimator2))
+    assert(events.isEmpty)
+    val pipelineModel = pipeline.fit(dataset1)
+    val expected =
+      FitStart(pipeline, dataset1) ::
+      FitStart(estimator1, dataset1) ::
+      FitEnd(estimator1, model1) ::
+      TransformStart(model1, dataset1) ::
+      TransformEnd(model1, dataset2) ::
+      TransformStart(transformer1, dataset2) ::
+      TransformEnd(transformer1, dataset3) ::
+      FitStart(estimator2, dataset3) ::
+      FitEnd(estimator2, model2) ::
+      FitEnd(pipeline, pipelineModel) :: Nil
+    eventually(timeout(10 seconds), interval(1 second)) {
+      assert(events === expected)
+    }
+  }
+
+  test("pipeline model transform events") {
+    val dataset1 = mock[DataFrame]
+    val dataset2 = mock[DataFrame]
+    val dataset3 = mock[DataFrame]
+    val dataset4 = mock[DataFrame]
+    when(dataset1.toDF).thenReturn(dataset1)
+    when(dataset2.toDF).thenReturn(dataset2)
+    when(dataset3.toDF).thenReturn(dataset3)
+    when(dataset4.toDF).thenReturn(dataset4)
+
+    val transformer1 = mock[Transformer]
+    val model = mock[MyModel]
+    val transformer2 = mock[Transformer]
+    when(transformer1.transform(meq(dataset1))).thenReturn(dataset2)
+    when(model.transform(meq(dataset2))).thenReturn(dataset3)
+    when(transformer2.transform(meq(dataset3))).thenReturn(dataset4)
+
+    val newPipelineModel = new PipelineModel(
+      "pipeline0", Array(transformer1, model, transformer2))
+    assert(events.isEmpty)
+    val output = newPipelineModel.transform(dataset1)
+    val expected =
+      TransformStart(newPipelineModel, dataset1) ::
+      TransformStart(transformer1, dataset1) ::
+      TransformEnd(transformer1, dataset2) ::
+      TransformStart(model, dataset2) ::
+      TransformEnd(model, dataset3) ::
+      TransformStart(transformer2, dataset3) ::
+      TransformEnd(transformer2, dataset4) ::
+      TransformEnd(newPipelineModel, output) :: Nil
+    eventually(timeout(10 seconds), interval(1 second)) {
+      assert(events === expected)
+    }
+  }
+
+  test("pipeline read/write events") {
+    def getInstance(w: MLWriter): AnyRef =
+      w.getClass.getDeclaredMethod("instance").invoke(w)
+
+    withTempDir { dir =>
+      val path = new Path(dir.getCanonicalPath, "pipeline").toUri.toString
+      val writableStage = new WritableStage("writableStage")
+      val newPipeline = new Pipeline().setStages(Array(writableStage))
+      val pipelineWriter = newPipeline.write
+      assert(events.isEmpty)
+      pipelineWriter.save(path)
+      eventually(timeout(10 seconds), interval(1 second)) {
+        events.foreach {
+          case e: SaveInstanceStart if e.writer.isInstanceOf[DefaultParamsWriter] =>
+            assert(e.path.endsWith("writableStage"))
+          case e: SaveInstanceEnd if e.writer.isInstanceOf[DefaultParamsWriter] =>
+            assert(e.path.endsWith("writableStage"))
+          case e: SaveInstanceStart if getInstance(e.writer).isInstanceOf[Pipeline] =>
+            assert(getInstance(e.writer).asInstanceOf[Pipeline].uid === newPipeline.uid)
+          case e: SaveInstanceEnd if getInstance(e.writer).isInstanceOf[Pipeline] =>
+            assert(getInstance(e.writer).asInstanceOf[Pipeline].uid === newPipeline.uid)
+          case e => fail(s"Unexpected event thrown: $e")
+        }
+      }
+
+      events.clear()
+      val pipelineReader = Pipeline.read
+      assert(events.isEmpty)
+      pipelineReader.load(path)
+      eventually(timeout(10 seconds), interval(1 second)) {
+        events.foreach {
+          case e: LoadInstanceStart[PipelineStage]
+              if e.reader.isInstanceOf[DefaultParamsReader[PipelineStage]] =>
+            assert(e.path.endsWith("writableStage"))
+          case e: LoadInstanceEnd[PipelineStage]
+              if e.reader.isInstanceOf[DefaultParamsReader[PipelineStage]] =>
+            assert(e.instance.isInstanceOf[PipelineStage])
+          case e: LoadInstanceStart[Pipeline] =>
+            assert(e.reader === pipelineReader)
+          case e: LoadInstanceEnd[Pipeline] =>
+            assert(e.instance.uid === newPipeline.uid)
+          case e => fail(s"Unexpected event thrown: $e")
+        }
+      }
+    }
+  }
+
+  test("pipeline model read/write events") {
+    def getInstance(w: MLWriter): AnyRef =
+      w.getClass.getDeclaredMethod("instance").invoke(w)
+
+    withTempDir { dir =>
+      val path = new Path(dir.getCanonicalPath, "pipeline").toUri.toString
+      val writableStage = new WritableStage("writableStage")
+      val pipelineModel =
+        new PipelineModel("pipeline_89329329", Array(writableStage.asInstanceOf[Transformer]))
+      val pipelineWriter = pipelineModel.write
+      assert(events.isEmpty)
+      pipelineWriter.save(path)
+      eventually(timeout(10 seconds), interval(1 second)) {
+        events.foreach {
+          case e: SaveInstanceStart if e.writer.isInstanceOf[DefaultParamsWriter] =>
+            assert(e.path.endsWith("writableStage"))
+          case e: SaveInstanceEnd if e.writer.isInstanceOf[DefaultParamsWriter] =>
+            assert(e.path.endsWith("writableStage"))
+          case e: SaveInstanceStart if getInstance(e.writer).isInstanceOf[PipelineModel] =>
+            assert(getInstance(e.writer).asInstanceOf[PipelineModel].uid === pipelineModel.uid)
+          case e: SaveInstanceEnd if getInstance(e.writer).isInstanceOf[PipelineModel] =>
+            assert(getInstance(e.writer).asInstanceOf[PipelineModel].uid === pipelineModel.uid)
+          case e => fail(s"Unexpected event thrown: $e")
+        }
+      }
+
+      events.clear()
+      val pipelineModelReader = PipelineModel.read
+      assert(events.isEmpty)
+      pipelineModelReader.load(path)
+      eventually(timeout(10 seconds), interval(1 second)) {
+        events.foreach {
+          case e: LoadInstanceStart[PipelineStage]
+            if e.reader.isInstanceOf[DefaultParamsReader[PipelineStage]] =>
+            assert(e.path.endsWith("writableStage"))
+          case e: LoadInstanceEnd[PipelineStage]
+            if e.reader.isInstanceOf[DefaultParamsReader[PipelineStage]] =>
+            assert(e.instance.isInstanceOf[PipelineStage])
+          case e: LoadInstanceStart[PipelineModel] =>
+            assert(e.reader === pipelineModelReader)
+          case e: LoadInstanceEnd[PipelineModel] =>
+            assert(e.instance.uid === pipelineModel.uid)
+          case e => fail(s"Unexpected event thrown: $e")
+        }
+      }
+    }
+  }
+}

From f5b9370da2745a744f8b2f077f1690e0e7035140 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 24 Jan 2019 18:24:49 -0800
Subject: [PATCH 0360/1072] [SPARK-26709][SQL] OptimizeMetadataOnlyQuery does
 not handle empty records correctly

## What changes were proposed in this pull request?

When reading from empty tables, the optimization `OptimizeMetadataOnlyQuery` may return wrong results:
```
sql("CREATE TABLE t (col1 INT, p1 INT) USING PARQUET PARTITIONED BY (p1)")
sql("INSERT INTO TABLE t PARTITION (p1 = 5) SELECT ID FROM range(1, 1)")
sql("SELECT MAX(p1) FROM t")
```
The result is supposed to be `null`. However, with the optimization the result is `5`.

The rule is originally ported from https://issues.apache.org/jira/browse/HIVE-1003 in #13494. In Hive, the rule is disabled by default in a later release(https://issues.apache.org/jira/browse/HIVE-15397), due to the same problem.

It is hard to completely avoid the correctness issue. Because data sources like Parquet can be metadata-only. Spark can't tell whether it is empty or not without actually reading it. This PR disable the optimization by default.

## How was this patch tested?

Unit test

Closes #23635 from gengliangwang/optimizeMetadata.

Lead-authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Co-authored-by: Xiao Li <gatorsmile@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 docs/sql-data-sources-parquet.md              | 12 ------
 .../apache/spark/sql/internal/SQLConf.scala   |  6 ++-
 .../execution/OptimizeMetadataOnlyQuery.scala |  5 +++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 37 +++++++++++++++++++
 .../sql/hive/execution/SQLQuerySuite.scala    | 18 +++++++++
 5 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index dcd293651846..5532bf9fa3f6 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -295,18 +295,6 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
     </p>
   </td>
 </tr>
-<tr>
-  <td><code>spark.sql.optimizer.metadataOnly</code></td>
-  <td>true</td>
-  <td>
-    <p>
-      When true, enable the metadata-only query optimization that use the table's metadata to
-      produce the partition columns instead of table scans. It applies when all the columns scanned
-      are partition columns and the query has an aggregate operator that satisfies distinct
-      semantics.
-    </p>
-  </td>
-</tr>
 <tr>
   <td><code>spark.sql.parquet.writeLegacyFormat</code></td>
   <td>false</td>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6b301c3c9cb5..da595e7a352d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -582,12 +582,14 @@ object SQLConf {
     .createWithDefault(HiveCaseSensitiveInferenceMode.INFER_AND_SAVE.toString)
 
   val OPTIMIZER_METADATA_ONLY = buildConf("spark.sql.optimizer.metadataOnly")
+    .internal()
     .doc("When true, enable the metadata-only query optimization that use the table's metadata " +
       "to produce the partition columns instead of table scans. It applies when all the columns " +
       "scanned are partition columns and the query has an aggregate operator that satisfies " +
-      "distinct semantics.")
+      "distinct semantics. By default the optimization is disabled, since it may return " +
+      "incorrect results when the files are empty.")
     .booleanConf
-    .createWithDefault(true)
+    .createWithDefault(false)
 
   val COLUMN_NAME_OF_CORRUPT_RECORD = buildConf("spark.sql.columnNameOfCorruptRecord")
     .doc("The name of internal column for storing raw/un-parsed JSON and CSV records that fail " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
index 3ca03ab2939a..45e5f415e8da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
@@ -72,6 +72,11 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
             })
           }
           if (isAllDistinctAgg) {
+            logWarning("Since configuration `spark.sql.optimizer.metadataOnly` is enabled, " +
+              "Spark will scan partition-level metadata without scanning data files. " +
+              "This could result in wrong results when the partition metadata exists but the " +
+              "inclusive data files are empty."
+            )
             a.withNewChildren(Seq(replaceTableScanWithPartitionMetadata(child, rel, filters)))
           } else {
             a
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 806f0b2239fe..b8c4d73f1b2b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2966,6 +2966,43 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-26709: OptimizeMetadataOnlyQuery does not handle empty records correctly") {
+    Seq(true, false).foreach { enableOptimizeMetadataOnlyQuery =>
+      withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> enableOptimizeMetadataOnlyQuery.toString) {
+        withTable("t") {
+          sql("CREATE TABLE t (col1 INT, p1 INT) USING PARQUET PARTITIONED BY (p1)")
+          sql("INSERT INTO TABLE t PARTITION (p1 = 5) SELECT ID FROM range(1, 1)")
+          if (enableOptimizeMetadataOnlyQuery) {
+            // The result is wrong if we enable the configuration.
+            checkAnswer(sql("SELECT MAX(p1) FROM t"), Row(5))
+          } else {
+            checkAnswer(sql("SELECT MAX(p1) FROM t"), Row(null))
+          }
+          checkAnswer(sql("SELECT MAX(col1) FROM t"), Row(null))
+        }
+
+        withTempPath { path =>
+          val tabLocation = path.getCanonicalPath
+          val partLocation1 = tabLocation + "/p=3"
+          val partLocation2 = tabLocation + "/p=1"
+          // SPARK-23271 empty RDD when saved should write a metadata only file
+          val df = spark.emptyDataFrame.select(lit(1).as("col"))
+          df.write.parquet(partLocation1)
+          val df2 = spark.range(10).toDF("col")
+          df2.write.parquet(partLocation2)
+          val readDF = spark.read.parquet(tabLocation)
+          if (enableOptimizeMetadataOnlyQuery) {
+            // The result is wrong if we enable the configuration.
+            checkAnswer(readDF.selectExpr("max(p)"), Row(3))
+          } else {
+            checkAnswer(readDF.selectExpr("max(p)"), Row(1))
+          }
+          checkAnswer(readDF.selectExpr("max(col)"), Row(9))
+        }
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 70efad103d13..d506edc0cf08 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2330,4 +2330,22 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
+  test("SPARK-26709: OptimizeMetadataOnlyQuery does not handle empty records correctly") {
+    Seq(true, false).foreach { enableOptimizeMetadataOnlyQuery =>
+      withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> enableOptimizeMetadataOnlyQuery.toString) {
+        withTable("t") {
+          sql("CREATE TABLE t (col1 INT, p1 INT) USING PARQUET PARTITIONED BY (p1)")
+          sql("INSERT INTO TABLE t PARTITION (p1 = 5) SELECT ID FROM range(1, 1)")
+          if (enableOptimizeMetadataOnlyQuery) {
+            // The result is wrong if we enable the configuration.
+            checkAnswer(sql("SELECT MAX(p1) FROM t"), Row(5))
+          } else {
+            checkAnswer(sql("SELECT MAX(p1) FROM t"), Row(null))
+          }
+          checkAnswer(sql("SELECT MAX(col1) FROM t"), Row(null))
+        }
+      }
+    }
+  }
+
 }

From 9452e0508ade3ba3fee1b4be53609e78763d854f Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Thu, 24 Jan 2019 19:25:38 -0800
Subject: [PATCH 0361/1072] [SPARK-26649][SS] Add DSv2 noop sink

## What changes were proposed in this pull request?

Noop data source for batch was added in [#23471](https://github.com/apache/spark/pull/23471).
In this PR I've added the streaming part.

## How was this patch tested?

Additional unit tests.

Closes #23631 from gaborgsomogyi/SPARK-26649.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/noop/NoopDataSource.scala     | 24 ++++-
 .../sql/streaming/DataStreamWriter.scala      |  2 +-
 .../datasources/noop/NoopStreamSuite.scala    | 96 +++++++++++++++++++
 3 files changed, 120 insertions(+), 2 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 79e4c62a32bd..452ebbbeb99c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWriteSupport}
+import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -31,10 +33,16 @@ import org.apache.spark.sql.types.StructType
 class NoopDataSource
   extends DataSourceV2
   with TableProvider
-  with DataSourceRegister {
+  with DataSourceRegister
+  with StreamingWriteSupportProvider {
 
   override def shortName(): String = "noop"
   override def getTable(options: DataSourceOptions): Table = NoopTable
+  override def createStreamingWriteSupport(
+      queryId: String,
+      schema: StructType,
+      mode: OutputMode,
+      options: DataSourceOptions): StreamingWriteSupport = NoopStreamingWriteSupport
 }
 
 private[noop] object NoopTable extends Table with SupportsBatchWrite {
@@ -64,3 +72,17 @@ private[noop] object NoopWriter extends DataWriter[InternalRow] {
   override def abort(): Unit = {}
 }
 
+private[noop] object NoopStreamingWriteSupport extends StreamingWriteSupport {
+  override def createStreamingWriterFactory(): StreamingDataWriterFactory =
+    NoopStreamingDataWriterFactory
+  override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
+  override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
+}
+
+private[noop] object NoopStreamingDataWriterFactory extends StreamingDataWriterFactory {
+  override def createWriter(
+      partitionId: Int,
+      taskId: Long,
+      epochId: Long): DataWriter[InternalRow] = NoopWriter
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 5733258a6b31..ea596ba728c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -330,7 +330,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         options,
         sink,
         outputMode,
-        useTempCheckpointLocation = source == "console",
+        useTempCheckpointLocation = source == "console" || source == "noop",
         recoverFromCheckpointLocation = true,
         trigger = trigger)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala
new file mode 100644
index 000000000000..3cb8287f09b2
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.noop
+
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.streaming.{StreamingQuery, StreamTest, Trigger}
+
+class NoopStreamSuite extends StreamTest {
+  import testImplicits._
+
+  test("microbatch") {
+    val input = MemoryStream[Int]
+    val query = input.toDF().writeStream.format("noop").start()
+    testMicroBatchQuery(query, input)
+  }
+
+  test("microbatch restart with checkpoint") {
+    val input = MemoryStream[Int]
+    withTempDir { checkpointDir =>
+      def testWithCheckpoint(): Unit = {
+        val query = input.toDF().writeStream
+          .option("checkpointLocation", checkpointDir.getAbsolutePath)
+          .format("noop")
+          .start()
+        testMicroBatchQuery(query, input)
+      }
+      testWithCheckpoint()
+      testWithCheckpoint()
+    }
+  }
+
+  private def testMicroBatchQuery(
+      query: StreamingQuery,
+      input: MemoryStream[Int],
+      data: Int*): Unit = {
+    assert(query.isActive)
+    try {
+      input.addData(1, 2, 3)
+      eventually(timeout(streamingTimeout)) {
+        assert(query.recentProgress.map(_.numInputRows).sum == 3)
+      }
+    } finally {
+      query.stop()
+    }
+  }
+
+  test("continuous") {
+    val input = getRateDataFrame()
+    val query = input.writeStream.format("noop").trigger(Trigger.Continuous(200)).start()
+    assert(query.isActive)
+    query.stop()
+  }
+
+  test("continuous restart with checkpoint") {
+    withTempDir { checkpointDir =>
+      def testWithCheckpoint(): Unit = {
+        val input = getRateDataFrame()
+        val query = input.writeStream
+          .option("checkpointLocation", checkpointDir.getAbsolutePath)
+          .format("noop")
+          .trigger(Trigger.Continuous(200))
+          .start()
+        assert(query.isActive)
+        query.stop()
+      }
+      testWithCheckpoint()
+      testWithCheckpoint()
+    }
+  }
+
+  private def getRateDataFrame(): DataFrame = {
+    spark.readStream
+      .format("rate")
+      .option("numPartitions", "1")
+      .option("rowsPerSecond", "5")
+      .load()
+      .select('value)
+  }
+}
+

From e3411a82c3296571e45e6d61fca7d9d5acc0aac9 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Fri, 25 Jan 2019 17:06:22 +0800
Subject: [PATCH 0362/1072] [SPARK-26720][SQL] Remove DateTimeUtils methods
 based on system default time zone

## What changes were proposed in this pull request?

In the PR, I propose to remove the following methods from `DateTimeUtils`:
- `timestampAddInterval` and `stringToTimestamp` - used only in test suites
- `truncTimestamp`, `getSeconds`, `getMinutes`, `getHours` - those methods assume system default time zone. They are not used in Spark.

## How was this patch tested?

This was tested by `DateTimeUtilsSuite` and `UnsafeArraySuite`.

Closes #23643 from MaxGekk/unused-date-time-utils.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/util/DateTimeUtils.scala     | 43 -----------------
 .../catalyst/util/DateTimeUtilsSuite.scala    | 46 ++++++++++---------
 .../sql/catalyst/util/UnsafeArraySuite.scala  |  5 +-
 3 files changed, 27 insertions(+), 67 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 867647921349..911750e14d8b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -219,10 +219,6 @@ object DateTimeUtils {
    * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
    * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
    */
-  def stringToTimestamp(s: UTF8String): Option[SQLTimestamp] = {
-    stringToTimestamp(s, defaultTimeZone())
-  }
-
   def stringToTimestamp(s: UTF8String, timeZone: TimeZone): Option[SQLTimestamp] = {
     if (s == null) {
       return None
@@ -453,21 +449,10 @@ object DateTimeUtils {
     microsec + toYearZero * MICROS_PER_DAY
   }
 
-  private def localTimestamp(microsec: SQLTimestamp): SQLTimestamp = {
-    localTimestamp(microsec, defaultTimeZone())
-  }
-
   private def localTimestamp(microsec: SQLTimestamp, timeZone: TimeZone): SQLTimestamp = {
     absoluteMicroSecond(microsec) + timeZone.getOffset(microsec / 1000) * 1000L
   }
 
-  /**
-   * Returns the hour value of a given timestamp value. The timestamp is expressed in microseconds.
-   */
-  def getHours(microsec: SQLTimestamp): Int = {
-    ((localTimestamp(microsec) / MICROS_PER_SECOND / 3600) % 24).toInt
-  }
-
   /**
    * Returns the hour value of a given timestamp value. The timestamp is expressed in microseconds.
    */
@@ -475,14 +460,6 @@ object DateTimeUtils {
     ((localTimestamp(microsec, timeZone) / MICROS_PER_SECOND / 3600) % 24).toInt
   }
 
-  /**
-   * Returns the minute value of a given timestamp value. The timestamp is expressed in
-   * microseconds.
-   */
-  def getMinutes(microsec: SQLTimestamp): Int = {
-    ((localTimestamp(microsec) / MICROS_PER_SECOND / 60) % 60).toInt
-  }
-
   /**
    * Returns the minute value of a given timestamp value. The timestamp is expressed in
    * microseconds.
@@ -491,14 +468,6 @@ object DateTimeUtils {
     ((localTimestamp(microsec, timeZone) / MICROS_PER_SECOND / 60) % 60).toInt
   }
 
-  /**
-   * Returns the second value of a given timestamp value. The timestamp is expressed in
-   * microseconds.
-   */
-  def getSeconds(microsec: SQLTimestamp): Int = {
-    ((localTimestamp(microsec) / MICROS_PER_SECOND) % 60).toInt
-  }
-
   /**
    * Returns the second value of a given timestamp value. The timestamp is expressed in
    * microseconds.
@@ -612,14 +581,6 @@ object DateTimeUtils {
     firstDayOfMonth(nonNegativeMonth) + currentDayInMonth - 1
   }
 
-  /**
-   * Add timestamp and full interval.
-   * Returns a timestamp value, expressed in microseconds since 1.1.1970 00:00:00.
-   */
-  def timestampAddInterval(start: SQLTimestamp, months: Int, microseconds: Long): SQLTimestamp = {
-    timestampAddInterval(start, months, microseconds, defaultTimeZone())
-  }
-
   /**
    * Add timestamp and full interval.
    * Returns a timestamp value, expressed in microseconds since 1.1.1970 00:00:00.
@@ -802,10 +763,6 @@ object DateTimeUtils {
     truncated * MICROS_PER_MILLIS
   }
 
-  def truncTimestamp(d: SQLTimestamp, level: Int): SQLTimestamp = {
-    truncTimestamp(d, level, defaultTimeZone())
-  }
-
   /**
    * Returns the truncate level, could be TRUNC_YEAR, TRUNC_MONTH, TRUNC_TO_DAY, TRUNC_TO_HOUR,
    * TRUNC_TO_MINUTE, TRUNC_TO_SECOND, TRUNC_TO_WEEK, TRUNC_TO_QUARTER or TRUNC_INVALID,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index ef34150fa623..0c78e8f510c5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.unsafe.types.UTF8String
 class DateTimeUtilsSuite extends SparkFunSuite {
 
   val TimeZonePST = TimeZone.getTimeZone("PST")
+  private def defaultTz = DateTimeUtils.defaultTimeZone()
 
   private[this] def getInUTCDays(localDate: LocalDate): Int = {
     val epochSeconds = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toEpochSecond
@@ -39,7 +40,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   test("nanoseconds truncation") {
     val tf = TimestampFormatter(DateTimeUtils.defaultTimeZone())
     def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
-      val parsedTimestampOp = DateTimeUtils.stringToTimestamp(UTF8String.fromString(originalTime))
+      val parsedTimestampOp = DateTimeUtils.stringToTimestamp(
+        UTF8String.fromString(originalTime), defaultTz)
       assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly")
       assert(DateTimeUtils.timestampToString(tf, parsedTimestampOp.get) === expectedParsedTime)
     }
@@ -328,11 +330,11 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
     // Test stringToTimestamp
     assert(stringToTimestamp(
-      UTF8String.fromString("2015-02-29 00:00:00")).isEmpty)
+      UTF8String.fromString("2015-02-29 00:00:00"), defaultTz).isEmpty)
     assert(stringToTimestamp(
-      UTF8String.fromString("2015-04-31 00:00:00")).isEmpty)
-    assert(stringToTimestamp(UTF8String.fromString("2015-02-29")).isEmpty)
-    assert(stringToTimestamp(UTF8String.fromString("2015-04-31")).isEmpty)
+      UTF8String.fromString("2015-04-31 00:00:00"), defaultTz).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("2015-02-29"), defaultTz).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("2015-04-31"), defaultTz).isEmpty)
   }
 
   test("hours") {
@@ -441,7 +443,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     c2.set(2000, 1, 29, 10, 30, 0)
     c2.set(Calendar.MILLISECOND, 123)
     val ts2 = c2.getTimeInMillis * 1000L
-    assert(timestampAddInterval(ts1, 36, 123000) === ts2)
+    assert(timestampAddInterval(ts1, 36, 123000, defaultTimeZone()) === ts2)
 
     val c3 = Calendar.getInstance(TimeZonePST)
     c3.set(1997, 1, 27, 16, 0, 0)
@@ -558,20 +560,20 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       val truncated =
         DateTimeUtils.truncTimestamp(inputTS, level, timezone)
       val expectedTS =
-        DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected))
+        DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected), defaultTz)
       assert(truncated === expectedTS.get)
     }
 
     val defaultInputTS =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359"))
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359"), defaultTz)
     val defaultInputTS1 =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359"))
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359"), defaultTz)
     val defaultInputTS2 =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359"))
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359"), defaultTz)
     val defaultInputTS3 =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359"))
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359"), defaultTz)
     val defaultInputTS4 =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359"))
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359"), defaultTz)
 
     testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get)
     testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get)
@@ -590,16 +592,16 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
     for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
       DateTimeTestUtils.withDefaultTimeZone(tz) {
-        val inputTS =
-          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359"))
-        val inputTS1 =
-          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359"))
-        val inputTS2 =
-          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359"))
-        val inputTS3 =
-          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359"))
-        val inputTS4 =
-          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359"))
+        val inputTS = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString("2015-03-05T09:32:05.359"), defaultTz)
+        val inputTS1 = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString("2015-03-31T20:32:05.359"), defaultTz)
+        val inputTS2 = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString("2015-04-01T02:32:05.359"), defaultTz)
+        val inputTS3 = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString("2015-03-30T02:32:05.359"), defaultTz)
+        val inputTS4 = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString("2015-03-29T02:32:05.359"), defaultTz)
 
         testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz)
         testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
index 755c8897cada..9d1eaa121979 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -36,9 +36,10 @@ class UnsafeArraySuite extends SparkFunSuite {
   val dateArray = Array(
     DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1")).get,
     DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26")).get)
+  private def defaultTz = DateTimeUtils.defaultTimeZone()
   val timestampArray = Array(
-    DateTimeUtils.stringToTimestamp(UTF8String.fromString("1970-1-1 00:00:00")).get,
-    DateTimeUtils.stringToTimestamp(UTF8String.fromString("2016-7-26 00:00:00")).get)
+    DateTimeUtils.stringToTimestamp(UTF8String.fromString("1970-1-1 00:00:00"), defaultTz).get,
+    DateTimeUtils.stringToTimestamp(UTF8String.fromString("2016-7-26 00:00:00"), defaultTz).get)
   val decimalArray4_1 = Array(
     BigDecimal("123.4").setScale(1, BigDecimal.RoundingMode.FLOOR),
     BigDecimal("567.8").setScale(1, BigDecimal.RoundingMode.FLOOR))

From b484490824527bd036b94bba86972d7c0bad26c8 Mon Sep 17 00:00:00 2001
From: ankurgupta <ankur.gupta@cloudera.com>
Date: Fri, 25 Jan 2019 10:21:26 -0800
Subject: [PATCH 0363/1072] [SPARK-26694][CORE] Progress bar should be enabled
 by default for spark-shell

## What changes were proposed in this pull request?
SPARK-21568 made a change to ensure that progress bar is enabled for spark-shell
by default but not for other apps. Before that change, this was distinguished
using log-level which is not a good way to determine the same as users can change
the default log-level. That commit changed the way to determine whether current
app is running in spark-shell or not but it left the log-level part as it is,
which causes this regression. SPARK-25118 changed the default log level to INFO
for spark-shell because of which the progress bar is not enabled anymore.

This commit will remove the log-level check for enabling progress bar for
spark-shell as it is not necessary and seems to be a leftover from SPARK-21568

## How was this patch tested?
1. Ensured that progress bar is enabled with spark-shell by default
2. Ensured that progress bar is not enabled with spark-submit

Closes #23618 from ankuriitg/ankurgupta/SPARK-26694.

Authored-by: ankurgupta <ankur.gupta@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/SparkContext.scala  |  2 +-
 .../apache/spark/deploy/SparkSubmitSuite.scala | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index be70d422cb55..f2b79b84b63c 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -434,7 +434,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _statusTracker = new SparkStatusTracker(this, _statusStore)
 
     _progressBar =
-      if (_conf.get(UI_SHOW_CONSOLE_PROGRESS) && !log.isInfoEnabled) {
+      if (_conf.get(UI_SHOW_CONSOLE_PROGRESS)) {
         Some(new ConsoleProgressBar(this))
       } else {
         None
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 3712d1a1f632..4b414f713376 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -479,11 +479,29 @@ class SparkSubmitSuite
     val appArgs1 = new SparkSubmitArguments(clArgs1)
     val (_, _, conf1, _) = submit.prepareSubmitEnvironment(appArgs1)
     conf1.get(UI_SHOW_CONSOLE_PROGRESS) should be (true)
+    var sc1: SparkContext = null
+    try {
+      sc1 = new SparkContext(conf1)
+      assert(sc1.progressBar.isDefined)
+    } finally {
+      if (sc1 != null) {
+        sc1.stop()
+      }
+    }
 
     val clArgs2 = Seq("--class", "org.SomeClass", "thejar.jar")
     val appArgs2 = new SparkSubmitArguments(clArgs2)
     val (_, _, conf2, _) = submit.prepareSubmitEnvironment(appArgs2)
     assert(!conf2.contains(UI_SHOW_CONSOLE_PROGRESS))
+    var sc2: SparkContext = null
+    try {
+      sc2 = new SparkContext(conf2)
+      assert(!sc2.progressBar.isDefined)
+    } finally {
+      if (sc2 != null) {
+        sc2.stop()
+      }
+    }
   }
 
   test("launch simple application with spark-submit") {

From 773efede2080639ef7eb546c11bf81401d160db2 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 25 Jan 2019 10:36:00 -0800
Subject: [PATCH 0364/1072] [SPARK-26254][CORE] Extract Hive + Kafka
 dependencies from Core.

## What changes were proposed in this pull request?

There are ugly provided dependencies inside core for the following:
* Hive
* Kafka

In this PR I've extracted them out. This PR contains the following:
* Token providers are now loaded with service loader
* Hive token provider moved to hive project
* Kafka token provider extracted into a new project

## How was this patch tested?

Existing + newly added unit tests.
Additionally tested on cluster.

Closes #23499 from gaborgsomogyi/SPARK-26254.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 core/pom.xml                                  |  40 -------
 ...loy.security.HadoopDelegationTokenProvider |   2 +
 .../HBaseDelegationTokenProvider.scala        |   2 +
 .../HadoopDelegationTokenManager.scala        |  38 ++++---
 .../HadoopDelegationTokenProvider.scala       |   3 +
 .../HadoopFSDelegationTokenProvider.scala     |   8 +-
 ...loy.security.HadoopDelegationTokenProvider |   1 +
 .../HadoopDelegationTokenManagerSuite.scala   | 102 ++++++------------
 external/kafka-0-10-sql/pom.xml               |   5 +
 .../sql/kafka010/KafkaConfigUpdater.scala     |   2 +-
 .../sql/kafka010/KafkaSecurityHelper.scala    |   2 +-
 .../kafka010/KafkaDelegationTokenTest.scala   |   4 +-
 external/kafka-0-10-token-provider/pom.xml    |  78 ++++++++++++++
 ...loy.security.HadoopDelegationTokenProvider |   1 +
 .../KafkaDelegationTokenProvider.scala        |   9 +-
 .../spark/kafka010}/KafkaTokenUtil.scala      |  10 +-
 .../src/test/resources/log4j.properties       |  28 +++++
 ...fkaHadoopDelegationTokenManagerSuite.scala |  32 ++++++
 .../spark/kafka010}/KafkaTokenUtilSuite.scala |   2 +-
 pom.xml                                       |   1 +
 project/MimaExcludes.scala                    |   3 +
 project/SparkBuild.scala                      |   6 +-
 .../hive/thriftserver/SparkSQLCLIDriver.scala |   4 +-
 ...loy.security.HadoopDelegationTokenProvider |   1 +
 .../HiveDelegationTokenProvider.scala         |   5 +-
 ...iveHadoopDelegationTokenManagerSuite.scala | 102 ++++++++++++++++++
 26 files changed, 336 insertions(+), 155 deletions(-)
 create mode 100644 core/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
 create mode 100644 core/src/test/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
 create mode 100644 external/kafka-0-10-token-provider/pom.xml
 create mode 100644 external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
 rename {core/src/main/scala/org/apache/spark/deploy/security => external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010}/KafkaDelegationTokenProvider.scala (88%)
 rename {core/src/main/scala/org/apache/spark/deploy/security => external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010}/KafkaTokenUtil.scala (96%)
 create mode 100644 external/kafka-0-10-token-provider/src/test/resources/log4j.properties
 create mode 100644 external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaHadoopDelegationTokenManagerSuite.scala
 rename {core/src/test/scala/org/apache/spark/deploy/security => external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010}/KafkaTokenUtilSuite.scala (99%)
 create mode 100644 sql/hive/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
 rename {core/src/main/scala/org/apache/spark/deploy => sql/hive/src/main/scala/org/apache/spark/sql/hive}/security/HiveDelegationTokenProvider.scala (96%)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/security/HiveHadoopDelegationTokenManagerSuite.scala

diff --git a/core/pom.xml b/core/pom.xml
index 1cd1ad9725d7..c87d9d5571c7 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -385,46 +385,6 @@
       <artifactId>commons-crypto</artifactId>
     </dependency>
 
-    <!--
-     The following dependencies are depended upon in HiveCredentialProvider, but are only executed if Hive is enabled in
-     the user's Hadoop configuration.  So in order to prevent spark-core from depending on Hive, these deps have been
-     placed in the "provided" scope, rather than the "compile" scope, and NoClassDefFoundError exceptions are handled
-     when the user has not explicitly compiled with the Hive module.
-    -->
-    <dependency>
-      <groupId>${hive.group}</groupId>
-      <artifactId>hive-exec</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>${hive.group}</groupId>
-      <artifactId>hive-metastore</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.thrift</groupId>
-      <artifactId>libthrift</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.thrift</groupId>
-      <artifactId>libfb303</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <!--
-     The following kafka dependency used to obtain delegation token.
-     In order to prevent spark-core from depending on kafka, these deps have been placed in the
-     "provided" scope, rather than the "compile" scope, and NoClassDefFoundError exceptions are
-     handled when the user explicitly use neither spark-streaming-kafka nor spark-sql-kafka modules.
-    -->
-    <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka-clients</artifactId>
-      <version>${kafka.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/core/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider b/core/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
new file mode 100644
index 000000000000..c1f2060cabcf
--- /dev/null
+++ b/core/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
@@ -0,0 +1,2 @@
+org.apache.spark.deploy.security.HadoopFSDelegationTokenProvider
+org.apache.spark.deploy.security.HBaseDelegationTokenProvider
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
index 6ef68351bc9b..3bf8c14b8dcf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
@@ -21,6 +21,7 @@ import scala.reflect.runtime.universe
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.Credentials
 import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 
@@ -36,6 +37,7 @@ private[security] class HBaseDelegationTokenProvider
   override def obtainDelegationTokens(
       hadoopConf: Configuration,
       sparkConf: SparkConf,
+      fileSystems: Set[FileSystem],
       creds: Credentials): Option[Long] = {
     try {
       val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index d97857a39fc2..2763a46c9889 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -20,8 +20,11 @@ package org.apache.spark.deploy.security
 import java.io.File
 import java.net.URI
 import java.security.PrivilegedExceptionAction
+import java.util.ServiceLoader
 import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 
+import scala.collection.mutable
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
@@ -33,7 +36,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.UpdateDelegationTokens
 import org.apache.spark.ui.UIUtils
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
  * Manager for delegation tokens in a Spark application.
@@ -132,7 +135,7 @@ private[spark] class HadoopDelegationTokenManager(
   def obtainDelegationTokens(creds: Credentials): Long = {
     delegationTokenProviders.values.flatMap { provider =>
       if (provider.delegationTokensRequired(sparkConf, hadoopConf)) {
-        provider.obtainDelegationTokens(hadoopConf, sparkConf, creds)
+        provider.obtainDelegationTokens(hadoopConf, sparkConf, fileSystemsToAccess(), creds)
       } else {
         logDebug(s"Service ${provider.serviceName} does not require a token." +
           s" Check your configuration to see if security is disabled or not.")
@@ -244,12 +247,19 @@ private[spark] class HadoopDelegationTokenManager(
   }
 
   private def loadProviders(): Map[String, HadoopDelegationTokenProvider] = {
-    val providers = Seq(
-      new HadoopFSDelegationTokenProvider(
-        () => HadoopDelegationTokenManager.this.fileSystemsToAccess())) ++
-      safeCreateProvider(new HiveDelegationTokenProvider) ++
-      safeCreateProvider(new HBaseDelegationTokenProvider) ++
-      safeCreateProvider(new KafkaDelegationTokenProvider)
+    val loader = ServiceLoader.load(classOf[HadoopDelegationTokenProvider],
+      Utils.getContextOrSparkClassLoader)
+    val providers = mutable.ArrayBuffer[HadoopDelegationTokenProvider]()
+
+    val iterator = loader.iterator
+    while (iterator.hasNext) {
+      try {
+        providers += iterator.next
+      } catch {
+        case t: Throwable =>
+          logDebug(s"Failed to load built in provider.", t)
+      }
+    }
 
     // Filter out providers for which spark.security.credentials.{service}.enabled is false.
     providers
@@ -257,16 +267,4 @@ private[spark] class HadoopDelegationTokenManager(
       .map { p => (p.serviceName, p) }
       .toMap
   }
-
-  private def safeCreateProvider(
-      createFn: => HadoopDelegationTokenProvider): Option[HadoopDelegationTokenProvider] = {
-    try {
-      Some(createFn)
-    } catch {
-      case t: Throwable =>
-        logDebug(s"Failed to load built in provider.", t)
-        None
-    }
-  }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
index ed0905088ab2..cb4c97bdc4c1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.deploy.security
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.Credentials
 
 import org.apache.spark.SparkConf
@@ -43,11 +44,13 @@ private[spark] trait HadoopDelegationTokenProvider {
    * Obtain delegation tokens for this service and get the time of the next renewal.
    * @param hadoopConf Configuration of current Hadoop Compatible system.
    * @param creds Credentials to add tokens and security keys to.
+   * @param fileSystems List of file systems for which to obtain delegation tokens.
    * @return If the returned tokens are renewable and can be renewed, return the time of the next
    *         renewal, otherwise None should be returned.
    */
   def obtainDelegationTokens(
     hadoopConf: Configuration,
     sparkConf: SparkConf,
+    fileSystems: Set[FileSystem],
     creds: Credentials): Option[Long]
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 00200f807d22..2cd160c49079 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -31,7 +31,7 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 
-private[deploy] class HadoopFSDelegationTokenProvider(fileSystems: () => Set[FileSystem])
+private[deploy] class HadoopFSDelegationTokenProvider
     extends HadoopDelegationTokenProvider with Logging {
 
   // This tokenRenewalInterval will be set in the first call to obtainDelegationTokens.
@@ -44,14 +44,14 @@ private[deploy] class HadoopFSDelegationTokenProvider(fileSystems: () => Set[Fil
   override def obtainDelegationTokens(
       hadoopConf: Configuration,
       sparkConf: SparkConf,
+      fileSystems: Set[FileSystem],
       creds: Credentials): Option[Long] = {
     try {
-      val fsToGetTokens = fileSystems()
-      val fetchCreds = fetchDelegationTokens(getTokenRenewer(hadoopConf), fsToGetTokens, creds)
+      val fetchCreds = fetchDelegationTokens(getTokenRenewer(hadoopConf), fileSystems, creds)
 
       // Get the token renewal interval if it is not set. It will only be called once.
       if (tokenRenewalInterval == null) {
-        tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf, fsToGetTokens)
+        tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf, fileSystems)
       }
 
       // Get the time of next renewal.
diff --git a/core/src/test/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider b/core/src/test/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
new file mode 100644
index 000000000000..f4107befc825
--- /dev/null
+++ b/core/src/test/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
@@ -0,0 +1 @@
+org.apache.spark.deploy.security.ExceptionThrowingDelegationTokenProvider
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
index af7d44b160fe..9cc4a914b90e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
@@ -17,99 +17,57 @@
 
 package org.apache.spark.deploy.security
 
-import org.apache.commons.io.IOUtils
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.security.Credentials
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.util.Utils
+
+private class ExceptionThrowingDelegationTokenProvider extends HadoopDelegationTokenProvider {
+  ExceptionThrowingDelegationTokenProvider.constructed = true
+  throw new IllegalArgumentException
+
+  override def serviceName: String = "throw"
+
+  override def delegationTokensRequired(
+    sparkConf: SparkConf,
+    hadoopConf: Configuration): Boolean = throw new IllegalArgumentException
+
+  override def obtainDelegationTokens(
+    hadoopConf: Configuration,
+    sparkConf: SparkConf,
+    fileSystems: Set[FileSystem],
+    creds: Credentials): Option[Long] = throw new IllegalArgumentException
+}
+
+private object ExceptionThrowingDelegationTokenProvider {
+  var constructed = false
+}
 
 class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
   private val hadoopConf = new Configuration()
 
   test("default configuration") {
+    ExceptionThrowingDelegationTokenProvider.constructed = false
     val manager = new HadoopDelegationTokenManager(new SparkConf(false), hadoopConf, null)
     assert(manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
-    assert(manager.isProviderLoaded("hive"))
-    assert(manager.isProviderLoaded("kafka"))
+    // This checks that providers are loaded independently and they have no effect on each other
+    assert(ExceptionThrowingDelegationTokenProvider.constructed)
+    assert(!manager.isProviderLoaded("throw"))
   }
 
-  test("disable hive credential provider") {
-    val sparkConf = new SparkConf(false).set("spark.security.credentials.hive.enabled", "false")
+  test("disable hadoopfs credential provider") {
+    val sparkConf = new SparkConf(false).set("spark.security.credentials.hadoopfs.enabled", "false")
     val manager = new HadoopDelegationTokenManager(sparkConf, hadoopConf, null)
-    assert(manager.isProviderLoaded("hadoopfs"))
-    assert(manager.isProviderLoaded("hbase"))
-    assert(!manager.isProviderLoaded("hive"))
-    assert(manager.isProviderLoaded("kafka"))
+    assert(!manager.isProviderLoaded("hadoopfs"))
   }
 
   test("using deprecated configurations") {
     val sparkConf = new SparkConf(false)
       .set("spark.yarn.security.tokens.hadoopfs.enabled", "false")
-      .set("spark.yarn.security.credentials.hive.enabled", "false")
     val manager = new HadoopDelegationTokenManager(sparkConf, hadoopConf, null)
     assert(!manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
-    assert(!manager.isProviderLoaded("hive"))
-    assert(manager.isProviderLoaded("kafka"))
-  }
-
-  test("SPARK-23209: obtain tokens when Hive classes are not available") {
-    // This test needs a custom class loader to hide Hive classes which are in the classpath.
-    // Because the manager code loads the Hive provider directly instead of using reflection, we
-    // need to drive the test through the custom class loader so a new copy that cannot find
-    // Hive classes is loaded.
-    val currentLoader = Thread.currentThread().getContextClassLoader()
-    val noHive = new ClassLoader() {
-      override def loadClass(name: String, resolve: Boolean): Class[_] = {
-        if (name.startsWith("org.apache.hive") || name.startsWith("org.apache.hadoop.hive")) {
-          throw new ClassNotFoundException(name)
-        }
-
-        val prefixBlacklist = Seq("java", "scala", "com.sun.", "sun.")
-        if (prefixBlacklist.exists(name.startsWith(_))) {
-          return currentLoader.loadClass(name)
-        }
-
-        val found = findLoadedClass(name)
-        if (found != null) {
-          return found
-        }
-
-        val classFileName = name.replaceAll("\\.", "/") + ".class"
-        val in = currentLoader.getResourceAsStream(classFileName)
-        if (in != null) {
-          val bytes = IOUtils.toByteArray(in)
-          return defineClass(name, bytes, 0, bytes.length)
-        }
-
-        throw new ClassNotFoundException(name)
-      }
-    }
-
-    Utils.withContextClassLoader(noHive) {
-      val test = noHive.loadClass(NoHiveTest.getClass.getName().stripSuffix("$"))
-      test.getMethod("runTest").invoke(null)
-    }
   }
 }
-
-/** Test code for SPARK-23209 to avoid using too much reflection above. */
-private object NoHiveTest {
-
-  def runTest(): Unit = {
-    try {
-      val manager = new HadoopDelegationTokenManager(new SparkConf(), new Configuration(), null)
-      require(!manager.isProviderLoaded("hive"))
-    } catch {
-      case e: Throwable =>
-        // Throw a better exception in case the test fails, since there may be a lot of nesting.
-        var cause = e
-        while (cause.getCause() != null) {
-          cause = cause.getCause()
-        }
-        throw cause
-    }
-  }
-
-}
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 1c77906f43b1..827ceb89a0c3 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -35,6 +35,11 @@
   <url>http://spark.apache.org/</url>
 
   <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
index bc1b8019f6a6..38bf5d7dc323 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
@@ -24,9 +24,9 @@ import scala.collection.JavaConverters._
 import org.apache.kafka.common.config.SaslConfigs
 
 import org.apache.spark.SparkEnv
-import org.apache.spark.deploy.security.KafkaTokenUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Kafka
+import org.apache.spark.kafka010.KafkaTokenUtil
 
 /**
  * Class to conveniently update Kafka config params, while logging the changes
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
index 7215295b1009..a11d54f992bd 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
@@ -21,9 +21,9 @@ import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.common.security.scram.ScramLoginModule
 
 import org.apache.spark.SparkConf
-import org.apache.spark.deploy.security.KafkaTokenUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.kafka010.KafkaTokenUtil
 
 private[kafka010] object KafkaSecurityHelper extends Logging {
   def isTokenAvailable(): Boolean = {
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
index 31247ab21908..d0cefc46c56f 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
@@ -26,8 +26,8 @@ import org.mockito.Mockito.mock
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
-import org.apache.spark.deploy.security.KafkaTokenUtil
-import org.apache.spark.deploy.security.KafkaTokenUtil.KafkaDelegationTokenIdentifier
+import org.apache.spark.kafka010.KafkaTokenUtil
+import org.apache.spark.kafka010.KafkaTokenUtil.KafkaDelegationTokenIdentifier
 
 /**
  * This is a trait which provides functionalities for Kafka delegation token related test suites.
diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
new file mode 100644
index 000000000000..b2abcd909958
--- /dev/null
+++ b/external/kafka-0-10-token-provider/pom.xml
@@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-token-provider-kafka-0-10_2.12</artifactId>
+  <properties>
+    <sbt.project.name>token-provider-kafka-0-10</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Kafka 0.10+ Token Provider for Streaming</name>
+  <url>http://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka-clients</artifactId>
+      <version>${kafka.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+
+</project>
diff --git a/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider b/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
new file mode 100644
index 000000000000..34014016584d
--- /dev/null
+++ b/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
@@ -0,0 +1 @@
+org.apache.spark.kafka010.KafkaDelegationTokenProvider
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
similarity index 88%
rename from core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
rename to external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
index f67cb26259fe..ac6baa09e022 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/KafkaDelegationTokenProvider.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
@@ -15,20 +15,22 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.security
+package org.apache.spark.kafka010
 
 import scala.language.existentials
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.Credentials
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.security.HadoopDelegationTokenProvider
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Kafka
 
-private[security] class KafkaDelegationTokenProvider
+private[spark] class KafkaDelegationTokenProvider
   extends HadoopDelegationTokenProvider with Logging {
 
   override def serviceName: String = "kafka"
@@ -36,6 +38,7 @@ private[security] class KafkaDelegationTokenProvider
   override def obtainDelegationTokens(
       hadoopConf: Configuration,
       sparkConf: SparkConf,
+      fileSystems: Set[FileSystem],
       creds: Credentials): Option[Long] = {
     try {
       logDebug("Attempting to fetch Kafka security token.")
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
similarity index 96%
rename from core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
rename to external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index f3638533e1b7..574d58bf2f40 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.security
+package org.apache.spark.kafka010
 
 import java.{util => ju}
 import java.text.SimpleDateFormat
@@ -46,7 +46,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     override def getKind: Text = TOKEN_KIND
   }
 
-  private[security] def obtainToken(sparkConf: SparkConf): (Token[_ <: TokenIdentifier], Long) = {
+  private[kafka010] def obtainToken(sparkConf: SparkConf): (Token[_ <: TokenIdentifier], Long) = {
     checkProxyUser()
 
     val adminClient = AdminClient.create(createAdminClientProperties(sparkConf))
@@ -63,7 +63,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     ), token.tokenInfo.expiryTimestamp)
   }
 
-  private[security] def checkProxyUser(): Unit = {
+  private[kafka010] def checkProxyUser(): Unit = {
     val currentUser = UserGroupInformation.getCurrentUser()
     // Obtaining delegation token for proxy user is planned but not yet implemented
     // See https://issues.apache.org/jira/browse/KAFKA-6945
@@ -71,7 +71,7 @@ private[spark] object KafkaTokenUtil extends Logging {
       "user is not yet supported.")
   }
 
-  private[security] def createAdminClientProperties(sparkConf: SparkConf): ju.Properties = {
+  private[kafka010] def createAdminClientProperties(sparkConf: SparkConf): ju.Properties = {
     val adminClientProperties = new ju.Properties
 
     val bootstrapServers = sparkConf.get(Kafka.BOOTSTRAP_SERVERS)
@@ -154,7 +154,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     }
   }
 
-  private[security] def getKeytabJaasParams(sparkConf: SparkConf): String = {
+  private[kafka010] def getKeytabJaasParams(sparkConf: SparkConf): String = {
     val params =
       s"""
       |${getKrb5LoginModuleName} required
diff --git a/external/kafka-0-10-token-provider/src/test/resources/log4j.properties b/external/kafka-0-10-token-provider/src/test/resources/log4j.properties
new file mode 100644
index 000000000000..75e3b53a093f
--- /dev/null
+++ b/external/kafka-0-10-token-provider/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.spark-project.jetty=WARN
+
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaHadoopDelegationTokenManagerSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaHadoopDelegationTokenManagerSuite.scala
new file mode 100644
index 000000000000..9aa7618d76da
--- /dev/null
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaHadoopDelegationTokenManagerSuite.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.kafka010
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.security.HadoopDelegationTokenManager
+
+class KafkaHadoopDelegationTokenManagerSuite extends SparkFunSuite {
+  private val hadoopConf = new Configuration()
+
+  test("default configuration") {
+    val manager = new HadoopDelegationTokenManager(new SparkConf(false), hadoopConf, null)
+    assert(manager.isProviderLoaded("kafka"))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
similarity index 99%
rename from core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
rename to external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
index daa7e544cc9c..5da626056ba4 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/KafkaTokenUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.security
+package org.apache.spark.kafka010
 
 import java.{util => ju}
 import java.security.PrivilegedExceptionAction
diff --git a/pom.xml b/pom.xml
index de14d6add84e..29a281a2828d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -101,6 +101,7 @@
     <module>examples</module>
     <module>repl</module>
     <module>launcher</module>
+    <module>external/kafka-0-10-token-provider</module>
     <module>external/kafka-0-10</module>
     <module>external/kafka-0-10-assembly</module>
     <module>external/kafka-0-10-sql</module>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 4cf312d259ea..0cdef0074cad 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,9 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    // [SPARK-26254][CORE] Extract Hive + Kafka dependencies from Core.
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.deploy.security.HiveDelegationTokenProvider"),
+
     // [SPARK-25765][ML] Add training cost to BisectingKMeans summary
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel.this"),
 
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index a8f0ca4d758d..8d836dad0d15 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -41,8 +41,8 @@ object BuildCommons {
 
   private val buildLocation = file(".").getAbsoluteFile.getParentFile
 
-  val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, sqlKafka010, avro) = Seq(
-    "catalyst", "sql", "hive", "hive-thriftserver", "sql-kafka-0-10", "avro"
+  val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro) = Seq(
+    "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro"
   ).map(ProjectRef(buildLocation, _))
 
   val streamingProjects@Seq(streaming, streamingKafka010) =
@@ -331,7 +331,7 @@ object SparkBuild extends PomBuild {
   val mimaProjects = allProjects.filterNot { x =>
     Seq(
       spark, hive, hiveThriftServer, catalyst, repl, networkCommon, networkShuffle, networkYarn,
-      unsafe, tags, sqlKafka010, kvstore, avro
+      unsafe, tags, tokenProviderKafka010, sqlKafka010, kvstore, avro
     ).contains(x)
   }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index bb96cea2b0ae..463cf1680efd 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -40,10 +40,10 @@ import org.apache.thrift.transport.TSocket
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.deploy.security.HiveDelegationTokenProvider
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.security.HiveDelegationTokenProvider
 import org.apache.spark.util.ShutdownHookManager
 
 /**
@@ -126,7 +126,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     val tokenProvider = new HiveDelegationTokenProvider()
     if (tokenProvider.delegationTokensRequired(sparkConf, hadoopConf)) {
       val credentials = new Credentials()
-      tokenProvider.obtainDelegationTokens(hadoopConf, sparkConf, credentials)
+      tokenProvider.obtainDelegationTokens(hadoopConf, sparkConf, Set.empty, credentials)
       UserGroupInformation.getCurrentUser.addCredentials(credentials)
     }
 
diff --git a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
new file mode 100644
index 000000000000..2b0acc0305c4
--- /dev/null
+++ b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
@@ -0,0 +1 @@
+org.apache.spark.sql.hive.security.HiveDelegationTokenProvider
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
similarity index 96%
rename from core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
rename to sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
index 4ca0136424fe..25eb2515a0c5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HiveDelegationTokenProvider.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.security
+package org.apache.spark.sql.hive.security
 
 import java.lang.reflect.UndeclaredThrowableException
 import java.security.PrivilegedExceptionAction
@@ -23,6 +23,7 @@ import java.security.PrivilegedExceptionAction
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.metadata.Hive
@@ -32,6 +33,7 @@ import org.apache.hadoop.security.token.Token
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.security.HadoopDelegationTokenProvider
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.KEYTAB
 import org.apache.spark.util.Utils
@@ -83,6 +85,7 @@ private[spark] class HiveDelegationTokenProvider
   override def obtainDelegationTokens(
       hadoopConf: Configuration,
       sparkConf: SparkConf,
+      fileSystems: Set[FileSystem],
       creds: Credentials): Option[Long] = {
     try {
       val conf = hiveConf(hadoopConf)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/security/HiveHadoopDelegationTokenManagerSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/security/HiveHadoopDelegationTokenManagerSuite.scala
new file mode 100644
index 000000000000..ce40cf51746b
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/security/HiveHadoopDelegationTokenManagerSuite.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.security
+
+import org.apache.commons.io.IOUtils
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.security.HadoopDelegationTokenManager
+import org.apache.spark.util.Utils
+
+class HiveHadoopDelegationTokenManagerSuite extends SparkFunSuite {
+  private val hadoopConf = new Configuration()
+
+  test("default configuration") {
+    val manager = new HadoopDelegationTokenManager(new SparkConf(false), hadoopConf, null)
+    assert(manager.isProviderLoaded("hive"))
+  }
+
+  test("using deprecated configurations") {
+    val sparkConf = new SparkConf(false)
+      .set("spark.yarn.security.credentials.hive.enabled", "false")
+    val manager = new HadoopDelegationTokenManager(sparkConf, hadoopConf, null)
+    assert(!manager.isProviderLoaded("hive"))
+  }
+
+  test("SPARK-23209: obtain tokens when Hive classes are not available") {
+    // This test needs a custom class loader to hide Hive classes which are in the classpath.
+    // Because the manager code loads the Hive provider directly instead of using reflection, we
+    // need to drive the test through the custom class loader so a new copy that cannot find
+    // Hive classes is loaded.
+    val currentLoader = Thread.currentThread().getContextClassLoader()
+    val noHive = new ClassLoader() {
+      override def loadClass(name: String, resolve: Boolean): Class[_] = {
+        if (name.startsWith("org.apache.hive") || name.startsWith("org.apache.hadoop.hive")) {
+          throw new ClassNotFoundException(name)
+        }
+
+        val prefixBlacklist = Seq("java", "scala", "com.sun.", "sun.")
+        if (prefixBlacklist.exists(name.startsWith(_))) {
+          return currentLoader.loadClass(name)
+        }
+
+        val found = findLoadedClass(name)
+        if (found != null) {
+          return found
+        }
+
+        val classFileName = name.replaceAll("\\.", "/") + ".class"
+        val in = currentLoader.getResourceAsStream(classFileName)
+        if (in != null) {
+          val bytes = IOUtils.toByteArray(in)
+          return defineClass(name, bytes, 0, bytes.length)
+        }
+
+        throw new ClassNotFoundException(name)
+      }
+    }
+
+    Utils.withContextClassLoader(noHive) {
+      val test = noHive.loadClass(NoHiveTest.getClass.getName().stripSuffix("$"))
+      test.getMethod("runTest").invoke(null)
+    }
+  }
+}
+
+/** Test code for SPARK-23209 to avoid using too much reflection above. */
+private object NoHiveTest {
+
+  def runTest(): Unit = {
+    try {
+      val manager = new HadoopDelegationTokenManager(new SparkConf(), new Configuration(), null)
+      assert(manager.isProviderLoaded("hadoopfs"))
+      assert(manager.isProviderLoaded("hbase"))
+      require(!manager.isProviderLoaded("hive"))
+    } catch {
+      case e: Throwable =>
+        // Throw a better exception in case the test fails, since there may be a lot of nesting.
+        var cause = e
+        while (cause.getCause() != null) {
+          cause = cause.getCause()
+        }
+        throw cause
+    }
+  }
+
+}

From f06bc0cd1dee2a58e04ebf24bf719a2f7ef2dc4e Mon Sep 17 00:00:00 2001
From: Devaraj K <devaraj@apache.org>
Date: Fri, 25 Jan 2019 11:52:45 -0800
Subject: [PATCH 0365/1072] [SPARK-22404][YARN] Provide an option to use
 unmanaged AM in yarn-client mode

## What changes were proposed in this pull request?

Providing a new configuration "spark.yarn.un-managed-am" (defaults to false) to enable the Unmanaged AM Application in Yarn Client mode which launches the Application Master service as part of the Client. It utilizes the existing code for communicating between the Application Master <-> Task Scheduler for the container requests/allocations/launch, and eliminates these,
1. Allocating and launching the Application Master container
2. Remote Node/Process communication between Application Master <-> Task Scheduler

## How was this patch tested?

I verified manually running the applications in yarn-client mode with "spark.yarn.un-managed-am" enabled, and also ensured that there is no impact to the existing execution flows.

I would like to hear others feedback/thoughts on this.

Closes #19616 from devaraj-kavali/SPARK-22404.

Authored-by: Devaraj K <devaraj@apache.org>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/deploy/yarn/ApplicationMaster.scala | 175 ++++++++++++------
 .../org/apache/spark/deploy/yarn/Client.scala |  74 ++++++--
 .../spark/deploy/yarn/ExecutorRunnable.scala  |   2 +-
 .../org/apache/spark/deploy/yarn/config.scala |   8 +
 .../cluster/YarnClientSchedulerBackend.scala  |   2 +-
 .../spark/deploy/yarn/ClientSuite.scala       |   6 +-
 .../spark/deploy/yarn/YarnClusterSuite.scala  |   4 +
 7 files changed, 193 insertions(+), 78 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 01b91887985f..7458e1314c9b 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException
+import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 
 import org.apache.spark._
@@ -54,36 +55,27 @@ import org.apache.spark.util._
 /**
  * Common application master functionality for Spark on Yarn.
  */
-private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends Logging {
+private[spark] class ApplicationMaster(
+    args: ApplicationMasterArguments,
+    sparkConf: SparkConf,
+    yarnConf: YarnConfiguration) extends Logging {
 
   // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
   // optimal as more containers are available. Might need to handle this better.
 
-  private val appAttemptId = YarnSparkHadoopUtil.getContainerId.getApplicationAttemptId()
-  private val isClusterMode = args.userClass != null
-
-  private val sparkConf = new SparkConf()
-  if (args.propertiesFile != null) {
-    Utils.getPropertiesFromFile(args.propertiesFile).foreach { case (k, v) =>
-      sparkConf.set(k, v)
+  private val appAttemptId =
+    if (System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name()) != null) {
+      YarnSparkHadoopUtil.getContainerId.getApplicationAttemptId()
+    } else {
+      null
     }
-  }
+
+  private val isClusterMode = args.userClass != null
 
   private val securityMgr = new SecurityManager(sparkConf)
 
   private var metricsSystem: Option[MetricsSystem] = None
 
-  // Set system properties for each config entry. This covers two use cases:
-  // - The default configuration stored by the SparkHadoopUtil class
-  // - The user application creating a new SparkConf in cluster mode
-  //
-  // Both cases create a new SparkConf object which reads these configs from system properties.
-  sparkConf.getAll.foreach { case (k, v) =>
-    sys.props(k) = v
-  }
-
-  private val yarnConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf))
-
   private val userClassLoader = {
     val classpath = Client.getUserClasspath(sparkConf)
     val urls = classpath.map { entry =>
@@ -153,8 +145,6 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
   // Next wait interval before allocator poll.
   private var nextAllocationInterval = initialAllocationInterval
 
-  private var rpcEnv: RpcEnv = null
-
   // In cluster mode, used to tell the AM when the user's SparkContext has been initialized.
   private val sparkContextPromise = Promise[SparkContext]()
 
@@ -162,15 +152,8 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
    * Load the list of localized files set by the client, used when launching executors. This should
    * be called in a context where the needed credentials to access HDFS are available.
    */
-  private def prepareLocalResources(): Map[String, LocalResource] = {
+  private def prepareLocalResources(distCacheConf: SparkConf): Map[String, LocalResource] = {
     logInfo("Preparing Local resources")
-    val distCacheConf = new SparkConf(false)
-    if (args.distCacheConf != null) {
-      Utils.getPropertiesFromFile(args.distCacheConf).foreach { case (k, v) =>
-        distCacheConf.set(k, v)
-      }
-    }
-
     val resources = HashMap[String, LocalResource]()
 
     def setupDistributedCache(
@@ -267,7 +250,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
           // we only want to unregister if we don't want the RM to retry
           if (finalStatus == FinalApplicationStatus.SUCCEEDED || isLastAttempt) {
             unregister(finalStatus, finalMsg)
-            cleanupStagingDir()
+            cleanupStagingDir(new Path(System.getenv("SPARK_YARN_STAGING_DIR")))
           }
         }
       }
@@ -299,6 +282,60 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     exitCode
   }
 
+  def runUnmanaged(
+      clientRpcEnv: RpcEnv,
+      appAttemptId: ApplicationAttemptId,
+      stagingDir: Path,
+      cachedResourcesConf: SparkConf): Unit = {
+    try {
+      new CallerContext(
+        "APPMASTER", sparkConf.get(APP_CALLER_CONTEXT),
+        Option(appAttemptId.getApplicationId.toString), None).setCurrentContext()
+
+      val driverRef = clientRpcEnv.setupEndpointRef(
+        RpcAddress(sparkConf.get("spark.driver.host"),
+          sparkConf.get("spark.driver.port").toInt),
+        YarnSchedulerBackend.ENDPOINT_NAME)
+      // The client-mode AM doesn't listen for incoming connections, so report an invalid port.
+      registerAM(Utils.localHostName, -1, sparkConf,
+        sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId)
+      addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId))
+      createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf)
+      reporterThread.join()
+    } catch {
+      case e: Exception =>
+        // catch everything else if not specifically handled
+        logError("Uncaught exception: ", e)
+        finish(FinalApplicationStatus.FAILED,
+          ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
+          "Uncaught exception: " + StringUtils.stringifyException(e))
+        if (!unregistered) {
+          unregister(finalStatus, finalMsg)
+          cleanupStagingDir(stagingDir)
+        }
+    } finally {
+      try {
+        metricsSystem.foreach { ms =>
+          ms.report()
+          ms.stop()
+        }
+      } catch {
+        case e: Exception =>
+          logWarning("Exception during stopping of the metric system: ", e)
+      }
+    }
+  }
+
+  def stopUnmanaged(stagingDir: Path): Unit = {
+    if (!finished) {
+      finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
+    }
+    if (!unregistered) {
+      unregister(finalStatus, finalMsg)
+      cleanupStagingDir(stagingDir)
+    }
+  }
+
   /**
    * Set the default final application status for client mode to UNDEFINED to handle
    * if YARN HA restarts the application so that it properly retries. Set the final
@@ -376,9 +413,10 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       host: String,
       port: Int,
       _sparkConf: SparkConf,
-      uiAddress: Option[String]): Unit = {
-    val appId = appAttemptId.getApplicationId().toString()
-    val attemptId = appAttemptId.getAttemptId().toString()
+      uiAddress: Option[String],
+      appAttempt: ApplicationAttemptId): Unit = {
+    val appId = appAttempt.getApplicationId().toString()
+    val attemptId = appAttempt.getAttemptId().toString()
     val historyAddress = ApplicationMaster
       .getHistoryServerAddress(_sparkConf, yarnConf, appId, attemptId)
 
@@ -386,7 +424,12 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     registered = true
   }
 
-  private def createAllocator(driverRef: RpcEndpointRef, _sparkConf: SparkConf): Unit = {
+  private def createAllocator(
+      driverRef: RpcEndpointRef,
+      _sparkConf: SparkConf,
+      rpcEnv: RpcEnv,
+      appAttemptId: ApplicationAttemptId,
+      distCacheConf: SparkConf): Unit = {
     // In client mode, the AM may be restarting after delegation tokens have reached their TTL. So
     // always contact the driver to get the current set of valid tokens, so that local resources can
     // be initialized below.
@@ -400,7 +443,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     val appId = appAttemptId.getApplicationId().toString()
     val driverUrl = RpcEndpointAddress(driverRef.address.host, driverRef.address.port,
       CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
-    val localResources = prepareLocalResources()
+    val localResources = prepareLocalResources(distCacheConf)
 
     // Before we initialize the allocator, let's log the information about how executors will
     // be run up front, to avoid printing this out for every single executor being launched.
@@ -438,7 +481,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
   }
 
   private def runDriver(): Unit = {
-    addAmIpFilter(None)
+    addAmIpFilter(None, System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV))
     userClassThread = startUserApplication()
 
     // This a bit hacky, but we need to wait until the spark.driver.port property has
@@ -449,17 +492,17 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       val sc = ThreadUtils.awaitResult(sparkContextPromise.future,
         Duration(totalWaitTime, TimeUnit.MILLISECONDS))
       if (sc != null) {
-        rpcEnv = sc.env.rpcEnv
+        val rpcEnv = sc.env.rpcEnv
 
         val userConf = sc.getConf
         val host = userConf.get(DRIVER_HOST_ADDRESS)
         val port = userConf.get(DRIVER_PORT)
-        registerAM(host, port, userConf, sc.ui.map(_.webUrl))
+        registerAM(host, port, userConf, sc.ui.map(_.webUrl), appAttemptId)
 
         val driverRef = rpcEnv.setupEndpointRef(
           RpcAddress(host, port),
           YarnSchedulerBackend.ENDPOINT_NAME)
-        createAllocator(driverRef, userConf)
+        createAllocator(driverRef, userConf, rpcEnv, appAttemptId, distCacheConf)
       } else {
         // Sanity check; should never happen in normal operation, since sc should only be null
         // if the user app did not create a SparkContext.
@@ -483,11 +526,11 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
   private def runExecutorLauncher(): Unit = {
     val hostname = Utils.localHostName
     val amCores = sparkConf.get(AM_CORES)
-    rpcEnv = RpcEnv.create("sparkYarnAM", hostname, hostname, -1, sparkConf, securityMgr,
+    val rpcEnv = RpcEnv.create("sparkYarnAM", hostname, hostname, -1, sparkConf, securityMgr,
       amCores, true)
 
     // The client-mode AM doesn't listen for incoming connections, so report an invalid port.
-    registerAM(hostname, -1, sparkConf, sparkConf.get(DRIVER_APP_UI_ADDRESS))
+    registerAM(hostname, -1, sparkConf, sparkConf.get(DRIVER_APP_UI_ADDRESS), appAttemptId)
 
     // The driver should be up and listening, so unlike cluster mode, just try to connect to it
     // with no waiting or retrying.
@@ -495,8 +538,9 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     val driverRef = rpcEnv.setupEndpointRef(
       RpcAddress(driverHost, driverPort),
       YarnSchedulerBackend.ENDPOINT_NAME)
-    addAmIpFilter(Some(driverRef))
-    createAllocator(driverRef, sparkConf)
+    addAmIpFilter(Some(driverRef),
+      System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV))
+    createAllocator(driverRef, sparkConf, rpcEnv, appAttemptId, distCacheConf)
 
     // In client mode the actor will stop the reporter thread.
     reporterThread.join()
@@ -592,15 +636,23 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     t
   }
 
+  private def distCacheConf(): SparkConf = {
+    val distCacheConf = new SparkConf(false)
+    if (args.distCacheConf != null) {
+      Utils.getPropertiesFromFile(args.distCacheConf).foreach { case (k, v) =>
+        distCacheConf.set(k, v)
+      }
+    }
+    distCacheConf
+  }
+
   /**
    * Clean up the staging directory.
    */
-  private def cleanupStagingDir(): Unit = {
-    var stagingDirPath: Path = null
+  private def cleanupStagingDir(stagingDirPath: Path): Unit = {
     try {
       val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES)
       if (!preserveFiles) {
-        stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
         logInfo("Deleting staging directory " + stagingDirPath)
         val fs = stagingDirPath.getFileSystem(yarnConf)
         fs.delete(stagingDirPath, true)
@@ -612,8 +664,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
   }
 
   /** Add the Yarn IP filter that is required for properly securing the UI. */
-  private def addAmIpFilter(driver: Option[RpcEndpointRef]) = {
-    val proxyBase = System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV)
+  private def addAmIpFilter(driver: Option[RpcEndpointRef], proxyBase: String) = {
     val amFilter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
     val params = client.getAmIpFilterParams(yarnConf, proxyBase)
     driver match {
@@ -743,9 +794,9 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     }
 
     override def onDisconnected(remoteAddress: RpcAddress): Unit = {
-      // In cluster mode, do not rely on the disassociated event to exit
+      // In cluster mode or unmanaged am case, do not rely on the disassociated event to exit
       // This avoids potentially reporting incorrect exit codes if the driver fails
-      if (!isClusterMode) {
+      if (!(isClusterMode || sparkConf.get(YARN_UNMANAGED_AM))) {
         logInfo(s"Driver terminated or disconnected! Shutting down. $remoteAddress")
         finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
       }
@@ -771,12 +822,28 @@ object ApplicationMaster extends Logging {
   def main(args: Array[String]): Unit = {
     SignalUtils.registerLogger(log)
     val amArgs = new ApplicationMasterArguments(args)
-    master = new ApplicationMaster(amArgs)
+    val sparkConf = new SparkConf()
+    if (amArgs.propertiesFile != null) {
+      Utils.getPropertiesFromFile(amArgs.propertiesFile).foreach { case (k, v) =>
+        sparkConf.set(k, v)
+      }
+    }
+    // Set system properties for each config entry. This covers two use cases:
+    // - The default configuration stored by the SparkHadoopUtil class
+    // - The user application creating a new SparkConf in cluster mode
+    //
+    // Both cases create a new SparkConf object which reads these configs from system properties.
+    sparkConf.getAll.foreach { case (k, v) =>
+      sys.props(k) = v
+    }
+
+    val yarnConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf))
+    master = new ApplicationMaster(amArgs, sparkConf, yarnConf)
 
-    val ugi = master.sparkConf.get(PRINCIPAL) match {
+    val ugi = sparkConf.get(PRINCIPAL) match {
       case Some(principal) =>
         val originalCreds = UserGroupInformation.getCurrentUser().getCredentials()
-        SparkHadoopUtil.get.loginUserFromKeytab(principal, master.sparkConf.get(KEYTAB).orNull)
+        SparkHadoopUtil.get.loginUserFromKeytab(principal, sparkConf.get(KEYTAB).orNull)
         val newUGI = UserGroupInformation.getCurrentUser()
         // Transfer the original user's tokens to the new user, since it may contain needed tokens
         // (such as those user to connect to YARN).
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 79922920267e..7582a2b1aa9e 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -34,7 +34,7 @@ import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
-import org.apache.hadoop.io.DataOutputBuffer
+import org.apache.hadoop.io.{DataOutputBuffer, Text}
 import org.apache.hadoop.mapreduce.MRJobConfig
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.util.StringUtils
@@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication}
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException
+import org.apache.hadoop.yarn.security.AMRMTokenIdentifier
 import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
@@ -55,11 +56,13 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils}
+import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util.{CallerContext, Utils}
 
 private[spark] class Client(
     val args: ClientArguments,
-    val sparkConf: SparkConf)
+    val sparkConf: SparkConf,
+    val rpcEnv: RpcEnv)
   extends Logging {
 
   import Client._
@@ -70,6 +73,10 @@ private[spark] class Client(
 
   private val isClusterMode = sparkConf.get(SUBMIT_DEPLOY_MODE) == "cluster"
 
+  private val isClientUnmanagedAMEnabled = sparkConf.get(YARN_UNMANAGED_AM) && !isClusterMode
+  private var appMaster: ApplicationMaster = _
+  private var stagingDirPath: Path = _
+
   // AM related configurations
   private val amMemory = if (isClusterMode) {
     sparkConf.get(DRIVER_MEMORY).toInt
@@ -133,16 +140,14 @@ private[spark] class Client(
 
   private var appId: ApplicationId = null
 
-  // The app staging dir based on the STAGING_DIR configuration if configured
-  // otherwise based on the users home directory.
-  private val appStagingBaseDir = sparkConf.get(STAGING_DIR).map { new Path(_) }
-    .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory())
-
   def reportLauncherState(state: SparkAppHandle.State): Unit = {
     launcherBackend.setState(state)
   }
 
   def stop(): Unit = {
+    if (appMaster != null) {
+      appMaster.stopUnmanaged(stagingDirPath)
+    }
     launcherBackend.close()
     yarnClient.stop()
   }
@@ -171,6 +176,12 @@ private[spark] class Client(
       val newAppResponse = newApp.getNewApplicationResponse()
       appId = newAppResponse.getApplicationId()
 
+      // The app staging dir based on the STAGING_DIR configuration if configured
+      // otherwise based on the users home directory.
+      val appStagingBaseDir = sparkConf.get(STAGING_DIR).map { new Path(_) }
+        .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory())
+      stagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
+
       new CallerContext("CLIENT", sparkConf.get(APP_CALLER_CONTEXT),
         Option(appId.toString)).setCurrentContext()
 
@@ -190,8 +201,8 @@ private[spark] class Client(
       appId
     } catch {
       case e: Throwable =>
-        if (appId != null) {
-          cleanupStagingDir(appId)
+        if (stagingDirPath != null) {
+          cleanupStagingDir()
         }
         throw e
     }
@@ -200,13 +211,12 @@ private[spark] class Client(
   /**
    * Cleanup application staging directory.
    */
-  private def cleanupStagingDir(appId: ApplicationId): Unit = {
+  private def cleanupStagingDir(): Unit = {
     if (sparkConf.get(PRESERVE_STAGING_FILES)) {
       return
     }
 
     def cleanupStagingDirInternal(): Unit = {
-      val stagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
       try {
         val fs = stagingDirPath.getFileSystem(hadoopConf)
         if (fs.delete(stagingDirPath, true)) {
@@ -289,7 +299,7 @@ private[spark] class Client(
             "does not support it", e)
       }
     }
-
+    appContext.setUnmanagedAM(isClientUnmanagedAMEnabled)
     appContext
   }
 
@@ -850,7 +860,6 @@ private[spark] class Client(
     : ContainerLaunchContext = {
     logInfo("Setting up container launch context for our AM")
     val appId = newAppResponse.getApplicationId
-    val appStagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
     val pySparkArchives =
       if (sparkConf.get(IS_PYTHON_APP)) {
         findPySparkArchives()
@@ -858,8 +867,8 @@ private[spark] class Client(
         Nil
       }
 
-    val launchEnv = setupLaunchEnv(appStagingDirPath, pySparkArchives)
-    val localResources = prepareLocalResources(appStagingDirPath, pySparkArchives)
+    val launchEnv = setupLaunchEnv(stagingDirPath, pySparkArchives)
+    val localResources = prepareLocalResources(stagingDirPath, pySparkArchives)
 
     val amContainer = Records.newRecord(classOf[ContainerLaunchContext])
     amContainer.setLocalResources(localResources.asJava)
@@ -1041,7 +1050,7 @@ private[spark] class Client(
         } catch {
           case e: ApplicationNotFoundException =>
             logError(s"Application $appId not found.")
-            cleanupStagingDir(appId)
+            cleanupStagingDir()
             return YarnAppReport(YarnApplicationState.KILLED, FinalApplicationStatus.KILLED, None)
           case NonFatal(e) =>
             val msg = s"Failed to contact YARN for application $appId."
@@ -1088,14 +1097,17 @@ private[spark] class Client(
       if (state == YarnApplicationState.FINISHED ||
           state == YarnApplicationState.FAILED ||
           state == YarnApplicationState.KILLED) {
-        cleanupStagingDir(appId)
+        cleanupStagingDir()
         return createAppReport(report)
       }
 
       if (returnOnRunning && state == YarnApplicationState.RUNNING) {
         return createAppReport(report)
       }
-
+      if (state == YarnApplicationState.ACCEPTED && isClientUnmanagedAMEnabled &&
+          appMaster == null && report.getAMRMToken != null) {
+        appMaster = startApplicationMasterService(report)
+      }
       lastState = state
     }
 
@@ -1103,6 +1115,30 @@ private[spark] class Client(
     throw new SparkException("While loop is depleted! This should never happen...")
   }
 
+  private def startApplicationMasterService(report: ApplicationReport): ApplicationMaster = {
+    // Add AMRMToken to establish connection between RM and AM
+    val token = report.getAMRMToken
+    val amRMToken: org.apache.hadoop.security.token.Token[AMRMTokenIdentifier] =
+      new org.apache.hadoop.security.token.Token[AMRMTokenIdentifier](
+        token.getIdentifier().array(), token.getPassword().array,
+        new Text(token.getKind()), new Text(token.getService()))
+    val currentUGI = UserGroupInformation.getCurrentUser
+    currentUGI.addToken(amRMToken)
+
+    // Start Application Service in a separate thread and continue with application monitoring
+    val appMaster = new ApplicationMaster(
+      new ApplicationMasterArguments(Array.empty), sparkConf, hadoopConf)
+    val amService = new Thread("Unmanaged Application Master Service") {
+      override def run(): Unit = {
+        appMaster.runUnmanaged(rpcEnv, report.getCurrentApplicationAttemptId,
+          stagingDirPath, cachedResourcesConf)
+      }
+    }
+    amService.setDaemon(true)
+    amService.start()
+    appMaster
+  }
+
   private def formatReportDetails(report: ApplicationReport): String = {
     val details = Seq[(String, String)](
       ("client token", getClientToken(report)),
@@ -1535,7 +1571,7 @@ private[spark] class YarnClusterApplication extends SparkApplication {
     conf.remove(JARS)
     conf.remove(FILES)
 
-    new Client(new ClientArguments(args), conf).run()
+    new Client(new ClientArguments(args), conf, null).run()
   }
 
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 49a0b93aa5c4..77ce2f65245a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -243,7 +243,7 @@ private[yarn] class ExecutorRunnable(
 
     // Add log urls
     container.foreach { c =>
-      sys.env.get("SPARK_USER").foreach { user =>
+      sys.env.filterKeys(_.endsWith("USER")).foreach { user =>
         val containerId = ConverterUtils.toString(c.getId)
         val address = c.getNodeHttpAddress
         val baseUrl = s"$httpScheme$address/node/containerlogs/$containerId/$user"
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index 6091cd496c03..16adaec04802 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -236,6 +236,14 @@ package object config {
       .stringConf
       .createOptional
 
+  /* Unmanaged AM configuration. */
+
+  private[spark] val YARN_UNMANAGED_AM = ConfigBuilder("spark.yarn.unmanagedAM.enabled")
+    .doc("In client mode, whether to launch the Application Master service as part of the client " +
+      "using unmanaged am.")
+    .booleanConf
+    .createWithDefault(false)
+
   /* Security configuration. */
 
   private[spark] val NAMENODES_TO_ACCESS = ConfigBuilder("spark.yarn.access.namenodes")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 934fba3e6ff3..7b77f8c5a7e7 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -54,7 +54,7 @@ private[spark] class YarnClientSchedulerBackend(
     logDebug("ClientArguments called with: " + argsArrayBuf.mkString(" "))
     val args = new ClientArguments(argsArrayBuf.toArray)
     totalExpectedExecutors = SchedulerBackendUtils.getInitialTargetExecutorNumber(conf)
-    client = new Client(args, conf)
+    client = new Client(args, conf, sc.env.rpcEnv)
     bindToYarn(client.submitApplication(), None)
 
     // SPARK-8687: Ensure all necessary properties have already been set before
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 25827fd25e6b..5ff8e06331c3 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -185,7 +185,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
     val getNewApplicationResponse = Records.newRecord(classOf[GetNewApplicationResponse])
     val containerLaunchContext = Records.newRecord(classOf[ContainerLaunchContext])
 
-    val client = new Client(args, sparkConf)
+    val client = new Client(args, sparkConf, null)
     client.createApplicationSubmissionContext(
       new YarnClientApplication(getNewApplicationResponse, appContext),
       containerLaunchContext)
@@ -378,7 +378,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       val getNewApplicationResponse = Records.newRecord(classOf[GetNewApplicationResponse])
       val containerLaunchContext = Records.newRecord(classOf[ContainerLaunchContext])
 
-      val client = new Client(new ClientArguments(Array()), conf)
+      val client = new Client(new ClientArguments(Array()), conf, null)
       client.createApplicationSubmissionContext(
         new YarnClientApplication(getNewApplicationResponse, appContext),
         containerLaunchContext)
@@ -456,7 +456,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       sparkConf: SparkConf,
       args: Array[String] = Array()): Client = {
     val clientArgs = new ClientArguments(args)
-    spy(new Client(clientArgs, sparkConf))
+    spy(new Client(clientArgs, sparkConf, null))
   }
 
   private def classpath(client: Client): Array[String] = {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index faddb8fc7e22..70ad231e9926 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -89,6 +89,10 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     testBasicYarnApp(false)
   }
 
+  test("run Spark in yarn-client mode with unmanaged am") {
+    testBasicYarnApp(true, Map(YARN_UNMANAGED_AM.key -> "true"))
+  }
+
   test("run Spark in yarn-client mode with different configurations, ensuring redaction") {
     testBasicYarnApp(true,
       Map(

From 5f3658a8d8bc6b11b20eb0d3935662e08d319460 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 25 Jan 2019 13:37:42 -0800
Subject: [PATCH 0366/1072] [SPARK-26170][SS] Add missing metrics in
 FlatMapGroupsWithState

## What changes were proposed in this pull request?

This patch addresses measuring possible metrics in StateStoreWriter to FlatMapGroupsWithStateExec. Please note that some metrics like time to remove elements are not addressed because they are coupled with state function.

## How was this patch tested?

Manually tested with https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala.

Snapshots below:

![screen shot 2018-11-26 at 4 13 40 pm](https://user-images.githubusercontent.com/1317309/48999346-b5f7b400-f199-11e8-89c7-8795f13470d6.png)
![screen shot 2018-11-26 at 4 13 54 pm](https://user-images.githubusercontent.com/1317309/48999347-b5f7b400-f199-11e8-91ef-ef0b2f816b2e.png)

Closes #23142 from HeartSaVioR/SPARK-26170.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Jose Torres <torres.joseph.f+github@gmail.com>
---
 .../FlatMapGroupsWithStateExec.scala          | 39 +++++++++++--------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index bfe7d00f5604..6b6eb78404e3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -111,28 +111,35 @@ case class FlatMapGroupsWithStateExec(
       indexOrdinal = None,
       sqlContext.sessionState,
       Some(sqlContext.streams.stateStoreCoordinator)) { case (store, iter) =>
+        val allUpdatesTimeMs = longMetric("allUpdatesTimeMs")
+        val commitTimeMs = longMetric("commitTimeMs")
+
         val processor = new InputProcessor(store)
 
-        // If timeout is based on event time, then filter late data based on watermark
-        val filteredIter = watermarkPredicateForData match {
-          case Some(predicate) if timeoutConf == EventTimeTimeout =>
-            iter.filter(row => !predicate.eval(row))
-          case _ =>
-            iter
+        // variable `outputIterator` is no chance to be unassigned, so setting to null is safe
+        var outputIterator: Iterator[InternalRow] = null
+        allUpdatesTimeMs += timeTakenMs {
+          // If timeout is based on event time, then filter late data based on watermark
+          val filteredIter = watermarkPredicateForData match {
+            case Some(predicate) if timeoutConf == EventTimeTimeout =>
+              iter.filter(row => !predicate.eval(row))
+            case _ =>
+              iter
+          }
+          // Generate a iterator that returns the rows grouped by the grouping function
+          // Note that this code ensures that the filtering for timeout occurs only after
+          // all the data has been processed. This is to ensure that the timeout information of all
+          // the keys with data is updated before they are processed for timeouts.
+          outputIterator = processor.processNewData(filteredIter) ++
+            processor.processTimedOutState()
         }
-        // Generate a iterator that returns the rows grouped by the grouping function
-        // Note that this code ensures that the filtering for timeout occurs only after
-        // all the data has been processed. This is to ensure that the timeout information of all
-        // the keys with data is updated before they are processed for timeouts.
-        val outputIterator =
-          processor.processNewData(filteredIter) ++ processor.processTimedOutState()
 
         // Return an iterator of all the rows generated by all the keys, such that when fully
         // consumed, all the state updates will be committed by the state store
-        CompletionIterator[InternalRow, Iterator[InternalRow]](
-          outputIterator,
-          {
-            store.commit()
+        CompletionIterator[InternalRow, Iterator[InternalRow]](outputIterator, {
+            commitTimeMs += timeTakenMs {
+              store.commit()
+            }
             setStoreMetrics(store)
           }
         )

From a4e48359acbf7d68247d82d664cf25a86b84d2f3 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 25 Jan 2019 14:58:03 -0800
Subject: [PATCH 0367/1072] [SPARK-26379][SS] Fix issue on adding
 current_timestamp/current_date to streaming query

## What changes were proposed in this pull request?

This patch proposes to fix issue on adding `current_timestamp` / `current_date` with streaming query.

The root reason is that Spark transforms `CurrentTimestamp`/`CurrentDate` to `CurrentBatchTimestamp` in MicroBatchExecution which makes transformed attributes not-yet-resolved. They will be resolved by IncrementalExecution.
(In ContinuousExecution, Spark doesn't allow using `current_timestamp` and `current_date` so it has been OK.)

It's OK for DataSource V1 sink because it simply leverages transformed logical plan and don't evaluate until they're resolved, but for DataSource V2 sink, Spark tries to extract the schema of transformed logical plan in prior to IncrementalExecution, and unresolved attributes will raise errors.

This patch fixes the issue via having separate pre-resolved logical plan to pass the schema to StreamingWriteSupport safely.

## How was this patch tested?

Added UT.

Closes #23609 from HeartSaVioR/SPARK-26379.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../streaming/MicroBatchExecution.scala       | 10 +++-
 .../spark/sql/streaming/StreamSuite.scala     | 46 +++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 64270e1f44a2..fde851a5c07d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -508,12 +508,20 @@ class MicroBatchExecution(
           cd.dataType, cd.timeZoneId)
     }
 
+    // Pre-resolve new attributes to ensure all attributes are resolved before
+    // accessing schema of logical plan. Note that it only leverages the information
+    // of attributes, so we don't need to concern about the value of literals.
+
+    val newAttrPlanPreResolvedForSchema = newAttributePlan transformAllExpressions {
+      case cbt: CurrentBatchTimestamp => cbt.toLiteral
+    }
+
     val triggerLogicalPlan = sink match {
       case _: Sink => newAttributePlan
       case s: StreamingWriteSupportProvider =>
         val writer = s.createStreamingWriteSupport(
           s"$runId",
-          newAttributePlan.schema,
+          newAttrPlanPreResolvedForSchema.schema,
           outputMode,
           new DataSourceOptions(extraOptions.asJava))
         WriteToDataSourceV2(new MicroBatchWrite(currentBatchId, writer), newAttributePlan)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 72321c418f9b..0bbef47fd32e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
@@ -1079,6 +1080,51 @@ class StreamSuite extends StreamTest {
       assert(query.exception.isEmpty)
     }
   }
+
+  test("SPARK-26379 Structured Streaming - Exception on adding current_timestamp / current_date" +
+    " to Dataset - use v2 sink") {
+    testCurrentTimestampOnStreamingQuery(useV2Sink = true)
+  }
+
+  test("SPARK-26379 Structured Streaming - Exception on adding current_timestamp / current_date" +
+    " to Dataset - use v1 sink") {
+    testCurrentTimestampOnStreamingQuery(useV2Sink = false)
+  }
+
+  private def testCurrentTimestampOnStreamingQuery(useV2Sink: Boolean): Unit = {
+    val input = MemoryStream[Int]
+    val df = input.toDS()
+      .withColumn("cur_timestamp", lit(current_timestamp()))
+      .withColumn("cur_date", lit(current_date()))
+
+    def assertBatchOutputAndUpdateLastTimestamp(
+        rows: Seq[Row],
+        curTimestamp: Long,
+        curDate: Int,
+        expectedValue: Int): Long = {
+      assert(rows.size === 1)
+      val row = rows.head
+      assert(row.getInt(0) === expectedValue)
+      assert(row.getTimestamp(1).getTime >= curTimestamp)
+      val days = DateTimeUtils.millisToDays(row.getDate(2).getTime)
+      assert(days == curDate || days == curDate + 1)
+      row.getTimestamp(1).getTime
+    }
+
+    var lastTimestamp = System.currentTimeMillis()
+    val currentDate = DateTimeUtils.millisToDays(lastTimestamp)
+    testStream(df, useV2Sink = useV2Sink) (
+      AddData(input, 1),
+      CheckLastBatch { rows: Seq[Row] =>
+        lastTimestamp = assertBatchOutputAndUpdateLastTimestamp(rows, lastTimestamp, currentDate, 1)
+      },
+      Execute { _ => Thread.sleep(1000) },
+      AddData(input, 2),
+      CheckLastBatch { rows: Seq[Row] =>
+        lastTimestamp = assertBatchOutputAndUpdateLastTimestamp(rows, lastTimestamp, currentDate, 2)
+      }
+    )
+  }
 }
 
 abstract class FakeSource extends StreamSourceProvider {

From f17a3d9c3afed77156ab7468b80be1f32bbdb5c6 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Fri, 25 Jan 2019 16:14:38 -0800
Subject: [PATCH 0368/1072] [SPARK-26711][SQL] Lazily convert string values to
 BigDecimal during JSON schema inference

## What changes were proposed in this pull request?

This PR fixes a bug where JSON schema inference attempts to convert every String value to a BigDecimal regardless of the setting of "prefersDecimal". With that bug, behavior is still correct, but performance is impacted.

This PR makes this conversion lazy, so it is only performed if prefersDecimal is set to true.

Using Spark with a single executor thread to infer the schema of a single-column, 100M row JSON file, the performance impact is as follows:

option | baseline | pr
-----|----|-----
inferTimestamp=_default_<br>prefersDecimal=_default_ | 12.5 minutes | 6.1 minutes |
inferTimestamp=false<br>prefersDecimal=_default_ | 6.5 minutes | 49 seconds |
inferTimestamp=false<br>prefersDecimal=true | 6.5 minutes | 6.5 minutes |

## How was this patch tested?

I ran JsonInferSchemaSuite and JsonSuite. Also, I ran manual tests to see performance impact (see above).

Closes #23653 from bersprockets/SPARK-26711_improved.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/catalyst/json/JsonInferSchema.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 0bf3f03cdb72..1fb459498201 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -122,7 +122,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
 
       case VALUE_STRING =>
         val field = parser.getText
-        val decimalTry = allCatch opt {
+        lazy val decimalTry = allCatch opt {
           val bigDecimal = decimalParser(field)
             DecimalType(bigDecimal.precision, bigDecimal.scale)
         }

From aa3d16d68b7ebd9210c330905f01590ef93d875c Mon Sep 17 00:00:00 2001
From: SongYadong <song.yadong1@zte.com.cn>
Date: Fri, 25 Jan 2019 22:28:12 -0600
Subject: [PATCH 0369/1072] [SPARK-26698][CORE] Use ConfigEntry for hardcoded
 configs for memory and storage categories

## What changes were proposed in this pull request?

This PR makes hardcoded configs about spark memory and storage to use `ConfigEntry` and put them in the config package.

## How was this patch tested?

Existing unit tests.

Closes #23623 from SongYadong/configEntry_for_mem_storage.

Authored-by: SongYadong <song.yadong1@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/deploy/worker/Worker.scala   |  2 +-
 .../spark/internal/config/package.scala       | 20 ++++++++++++++++++
 .../apache/spark/storage/BlockManager.scala   |  2 +-
 .../spark/storage/DiskBlockManager.scala      |  4 ++--
 .../spark/memory/TaskMemoryManagerSuite.java  | 21 ++++++++++++-------
 .../sort/PackedRecordPointerSuite.java        |  7 ++++---
 .../sort/ShuffleInMemorySorterSuite.java      |  5 +++--
 .../sort/UnsafeShuffleWriterSuite.java        |  2 +-
 .../map/AbstractBytesToBytesMapSuite.java     |  4 ++--
 .../sort/UnsafeExternalSorterSuite.java       |  2 +-
 .../sort/UnsafeInMemorySorterSuite.java       |  9 +++++---
 .../spark/deploy/worker/WorkerSuite.scala     |  3 ++-
 .../spark/storage/BlockManagerSuite.scala     |  4 ++--
 .../ExternalAppendOnlyMapSuite.scala          |  2 +-
 .../util/collection/ExternalSorterSuite.scala |  2 +-
 .../RowBasedKeyValueBatchSuite.java           |  2 +-
 .../sort/RecordBinaryComparatorSuite.java     |  4 +++-
 .../streaming/ReceivedBlockHandlerSuite.scala |  2 +-
 18 files changed, 65 insertions(+), 32 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 115450b93563..5f7ca5c29b07 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -102,7 +102,7 @@ private[deploy] class Worker(
 
   // Whether or not cleanup the non-shuffle files on executor exits.
   private val CLEANUP_NON_SHUFFLE_FILES_ENABLED =
-    conf.getBoolean("spark.storage.cleanupFilesAfterExecutorExit", true)
+    conf.get(config.STORAGE_CLEANUP_FILES_AFTER_EXECUTOR_EXIT)
 
   private var master: Option[RpcEndpointRef] = None
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index b5912847968f..2fb83d31a1df 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -270,6 +270,26 @@ package object config {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString(Network.NETWORK_TIMEOUT.defaultValueString)
 
+  private[spark] val STORAGE_CLEANUP_FILES_AFTER_EXECUTOR_EXIT =
+    ConfigBuilder("spark.storage.cleanupFilesAfterExecutorExit")
+      .doc("Whether or not cleanup the non-shuffle files on executor exits.")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val DISKSTORE_SUB_DIRECTORIES =
+    ConfigBuilder("spark.diskStore.subDirectories")
+      .doc("Number of subdirectories inside each path listed in spark.local.dir for " +
+        "hashing Block files into.")
+      .intConf
+      .createWithDefault(64)
+
+  private[spark] val BLOCK_FAILURES_BEFORE_LOCATION_REFRESH =
+    ConfigBuilder("spark.block.failures.beforeLocationRefresh")
+      .doc("Max number of failures before this block manager refreshes " +
+        "the block locations from the driver.")
+      .intConf
+      .createWithDefault(5)
+
   private[spark] val IS_PYTHON_APP = ConfigBuilder("spark.yarn.isPython").internal()
     .booleanConf.createWithDefault(false)
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index dd05cb3f61c1..e02870cd0194 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -196,7 +196,7 @@ private[spark] class BlockManager(
 
   // Max number of failures before this block manager refreshes the block locations from the driver
   private val maxFailuresBeforeLocationRefresh =
-    conf.getInt("spark.block.failures.beforeLocationRefresh", 5)
+    conf.get(config.BLOCK_FAILURES_BEFORE_LOCATION_REFRESH)
 
   private val slaveEndpoint = rpcEnv.setupEndpoint(
     "BlockManagerEndpoint" + BlockManager.ID_GENERATOR.next,
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index a69bcc925999..95ce4b0f09f5 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -22,7 +22,7 @@ import java.util.UUID
 
 import org.apache.spark.SparkConf
 import org.apache.spark.executor.ExecutorExitCode
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 
 /**
@@ -34,7 +34,7 @@ import org.apache.spark.util.{ShutdownHookManager, Utils}
  */
 private[spark] class DiskBlockManager(conf: SparkConf, deleteFilesOnStop: Boolean) extends Logging {
 
-  private[spark] val subDirsPerLocalDir = conf.getInt("spark.diskStore.subDirectories", 64)
+  private[spark] val subDirsPerLocalDir = conf.get(config.DISKSTORE_SUB_DIRECTORIES)
 
   /* Create one local directory for each path mentioned in spark.local.dir; then, inside this
    * directory, create multiple subdirectories that we will hash files into, in order to avoid
diff --git a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
index dc1fe774f796..d19525432f72 100644
--- a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
+++ b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
@@ -23,6 +23,7 @@
 import org.apache.spark.SparkConf;
 import org.apache.spark.unsafe.memory.MemoryAllocator;
 import org.apache.spark.unsafe.memory.MemoryBlock;
+import org.apache.spark.internal.config.package$;
 
 public class TaskMemoryManagerSuite {
 
@@ -30,7 +31,7 @@ public class TaskMemoryManagerSuite {
   public void leakedPageMemoryIsDetected() {
     final TaskMemoryManager manager = new TaskMemoryManager(
       new UnifiedMemoryManager(
-        new SparkConf().set("spark.memory.offHeap.enabled", "false"),
+        new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false),
         Long.MAX_VALUE,
         Long.MAX_VALUE,
         1),
@@ -44,8 +45,8 @@ public void leakedPageMemoryIsDetected() {
   @Test
   public void encodePageNumberAndOffsetOffHeap() {
     final SparkConf conf = new SparkConf()
-      .set("spark.memory.offHeap.enabled", "true")
-      .set("spark.memory.offHeap.size", "1000");
+      .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), true)
+      .set(package$.MODULE$.MEMORY_OFFHEAP_SIZE(), 1000L);
     final TaskMemoryManager manager = new TaskMemoryManager(new TestMemoryManager(conf), 0);
     final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.OFF_HEAP);
     final MemoryBlock dataPage = manager.allocatePage(256, c);
@@ -61,7 +62,8 @@ public void encodePageNumberAndOffsetOffHeap() {
   @Test
   public void encodePageNumberAndOffsetOnHeap() {
     final TaskMemoryManager manager = new TaskMemoryManager(
-      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+      new TestMemoryManager(
+        new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
     final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
     final MemoryBlock dataPage = manager.allocatePage(256, c);
     final long encodedAddress = manager.encodePageNumberAndOffset(dataPage, 64);
@@ -72,7 +74,8 @@ public void encodePageNumberAndOffsetOnHeap() {
   @Test
   public void freeingPageSetsPageNumberToSpecialConstant() {
     final TaskMemoryManager manager = new TaskMemoryManager(
-      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+      new TestMemoryManager(
+        new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
     final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
     final MemoryBlock dataPage = manager.allocatePage(256, c);
     c.freePage(dataPage);
@@ -82,7 +85,8 @@ public void freeingPageSetsPageNumberToSpecialConstant() {
   @Test(expected = AssertionError.class)
   public void freeingPageDirectlyInAllocatorTriggersAssertionError() {
     final TaskMemoryManager manager = new TaskMemoryManager(
-      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+      new TestMemoryManager(
+        new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
     final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
     final MemoryBlock dataPage = manager.allocatePage(256, c);
     MemoryAllocator.HEAP.free(dataPage);
@@ -91,7 +95,8 @@ public void freeingPageDirectlyInAllocatorTriggersAssertionError() {
   @Test(expected = AssertionError.class)
   public void callingFreePageOnDirectlyAllocatedPageTriggersAssertionError() {
     final TaskMemoryManager manager = new TaskMemoryManager(
-      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+      new TestMemoryManager(
+        new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
     final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
     final MemoryBlock dataPage = MemoryAllocator.HEAP.allocate(256);
     manager.freePage(dataPage, c);
@@ -199,7 +204,7 @@ public void offHeapConfigurationBackwardsCompatibility() {
     // was deprecated in Spark 1.6 and replaced by `spark.memory.offHeap.enabled` (see SPARK-12251).
     final SparkConf conf = new SparkConf()
       .set("spark.unsafe.offHeap", "true")
-      .set("spark.memory.offHeap.size", "1000");
+      .set(package$.MODULE$.MEMORY_OFFHEAP_SIZE(), 1000L);
     final TaskMemoryManager manager = new TaskMemoryManager(new TestMemoryManager(conf), 0);
     Assert.assertSame(MemoryMode.OFF_HEAP, manager.tungstenMemoryMode);
   }
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/PackedRecordPointerSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/PackedRecordPointerSuite.java
index 354efe18dbde..ccf47a4de558 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/PackedRecordPointerSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/PackedRecordPointerSuite.java
@@ -22,6 +22,7 @@
 import org.junit.Test;
 
 import org.apache.spark.SparkConf;
+import org.apache.spark.internal.config.package$;
 import org.apache.spark.memory.*;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 
@@ -34,7 +35,7 @@ public class PackedRecordPointerSuite {
 
   @Test
   public void heap() throws IOException {
-    final SparkConf conf = new SparkConf().set("spark.memory.offHeap.enabled", "false");
+    final SparkConf conf = new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false);
     final TaskMemoryManager memoryManager =
       new TaskMemoryManager(new TestMemoryManager(conf), 0);
     final MemoryConsumer c = new TestMemoryConsumer(memoryManager, MemoryMode.ON_HEAP);
@@ -55,8 +56,8 @@ public void heap() throws IOException {
   @Test
   public void offHeap() throws IOException {
     final SparkConf conf = new SparkConf()
-      .set("spark.memory.offHeap.enabled", "true")
-      .set("spark.memory.offHeap.size", "10000");
+      .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), true)
+      .set(package$.MODULE$.MEMORY_OFFHEAP_SIZE(), 10000L);
     final TaskMemoryManager memoryManager =
       new TaskMemoryManager(new TestMemoryManager(conf), 0);
     final MemoryConsumer c = new TestMemoryConsumer(memoryManager, MemoryMode.OFF_HEAP);
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorterSuite.java
index 694352ee2af4..7a17d90d6ed6 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/ShuffleInMemorySorterSuite.java
@@ -26,6 +26,7 @@
 
 import org.apache.spark.HashPartitioner;
 import org.apache.spark.SparkConf;
+import org.apache.spark.internal.config.package$;
 import org.apache.spark.memory.MemoryConsumer;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.memory.TestMemoryConsumer;
@@ -38,7 +39,7 @@ public class ShuffleInMemorySorterSuite {
   protected boolean shouldUseRadixSort() { return false; }
 
   final TestMemoryManager memoryManager =
-    new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false"));
+    new TestMemoryManager(new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false));
   final TaskMemoryManager taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
   final TestMemoryConsumer consumer = new TestMemoryConsumer(taskMemoryManager);
 
@@ -69,7 +70,7 @@ public void testBasicSorting() throws Exception {
       "Lychee",
       "Mango"
     };
-    final SparkConf conf = new SparkConf().set("spark.memory.offHeap.enabled", "false");
+    final SparkConf conf = new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false);
     final TaskMemoryManager memoryManager =
       new TaskMemoryManager(new TestMemoryManager(conf), 0);
     final MemoryConsumer c = new TestMemoryConsumer(memoryManager);
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index f34ae99c992d..9bf707f783d4 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -102,7 +102,7 @@ public void setUp() throws IOException {
     spillFilesCreated.clear();
     conf = new SparkConf()
       .set("spark.buffer.pageSize", "1m")
-      .set("spark.memory.offHeap.enabled", "false");
+      .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false);
     taskMetrics = new TaskMetrics();
     memoryManager = new TestMemoryManager(conf);
     taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index e4e0d47fc101..8d03c6778e18 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -78,8 +78,8 @@ public void setup() {
     memoryManager =
       new TestMemoryManager(
         new SparkConf()
-          .set("spark.memory.offHeap.enabled", "" + useOffHeapMemoryAllocator())
-          .set("spark.memory.offHeap.size", "256mb")
+          .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), useOffHeapMemoryAllocator())
+          .set(package$.MODULE$.MEMORY_OFFHEAP_SIZE(), 256 * 1024 * 1024L)
           .set(package$.MODULE$.SHUFFLE_SPILL_COMPRESS(), false)
           .set(package$.MODULE$.SHUFFLE_COMPRESS(), false));
     taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index a56743f738b6..dd71d3201b43 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -59,7 +59,7 @@ public class UnsafeExternalSorterSuite {
 
   final LinkedList<File> spillFilesCreated = new LinkedList<>();
   final TestMemoryManager memoryManager =
-    new TestMemoryManager(conf.clone().set("spark.memory.offHeap.enabled", "false"));
+    new TestMemoryManager(conf.clone().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false));
   final TaskMemoryManager taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
   final SerializerManager serializerManager = new SerializerManager(
     new JavaSerializer(conf),
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
index b0d485f0c953..2b8a0602730e 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
@@ -31,6 +31,7 @@
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.memory.MemoryBlock;
+import org.apache.spark.internal.config.package$;
 
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@@ -52,7 +53,8 @@ private static String getStringFromDataPage(Object baseObject, long baseOffset,
   @Test
   public void testSortingEmptyInput() {
     final TaskMemoryManager memoryManager = new TaskMemoryManager(
-      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+      new TestMemoryManager(
+        new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
     final TestMemoryConsumer consumer = new TestMemoryConsumer(memoryManager);
     final UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer,
       memoryManager,
@@ -78,7 +80,8 @@ public void testSortingOnlyByIntegerPrefix() throws Exception {
       "Mango"
     };
     final TaskMemoryManager memoryManager = new TaskMemoryManager(
-      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+      new TestMemoryManager(
+        new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
     final TestMemoryConsumer consumer = new TestMemoryConsumer(memoryManager);
     final MemoryBlock dataPage = memoryManager.allocatePage(2048, consumer);
     final Object baseObject = dataPage.getBaseObject();
@@ -146,7 +149,7 @@ public int compare(
   @Test
   public void freeAfterOOM() {
     final SparkConf sparkConf = new SparkConf();
-    sparkConf.set("spark.memory.offHeap.enabled", "false");
+    sparkConf.set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false);
 
     final TestMemoryManager testMemoryManager =
             new TestMemoryManager(sparkConf);
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
index 0ddf38c04409..f6559df40d99 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.{Command, ExecutorState, ExternalShuffleService}
 import org.apache.spark.deploy.DeployMessages.{DriverStateChanged, ExecutorStateChanged}
 import org.apache.spark.deploy.master.DriverState
+import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Worker._
 import org.apache.spark.rpc.{RpcAddress, RpcEnv}
 
@@ -224,7 +225,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
   }
 
   private def testCleanupFilesWithConfig(value: Boolean) = {
-    val conf = new SparkConf().set("spark.storage.cleanupFilesAfterExecutorExit", value.toString)
+    val conf = new SparkConf().set(config.STORAGE_CLEANUP_FILES_AFTER_EXECUTOR_EXIT, value)
 
     val cleanupCalled = new AtomicBoolean(false)
     when(shuffleService.executorRemoved(any[String], any[String])).thenAnswer(new Answer[Unit] {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 04de0e41a341..9a9556757358 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -1242,7 +1242,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
 
   test("SPARK-13328: refresh block locations (fetch should fail after hitting a threshold)") {
     val mockBlockTransferService =
-      new MockBlockTransferService(conf.getInt("spark.block.failures.beforeLocationRefresh", 5))
+      new MockBlockTransferService(conf.get(BLOCK_FAILURES_BEFORE_LOCATION_REFRESH))
     val store =
       makeBlockManager(8000, "executor1", transferService = Option(mockBlockTransferService))
     store.putSingle("item", 999L, StorageLevel.MEMORY_ONLY, tellMaster = true)
@@ -1251,7 +1251,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
 
   test("SPARK-13328: refresh block locations (fetch should succeed after location refresh)") {
     val maxFailuresBeforeLocationRefresh =
-      conf.getInt("spark.block.failures.beforeLocationRefresh", 5)
+      conf.get(BLOCK_FAILURES_BEFORE_LOCATION_REFRESH)
     val mockBlockManagerMaster = mock(classOf[BlockManagerMaster])
     val mockBlockTransferService =
       new MockBlockTransferService(maxFailuresBeforeLocationRefresh)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 43abb56254dd..d5a20ccc4a51 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -551,7 +551,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
 
   test("force to spill for external aggregation") {
     val conf = createSparkConf(loadDefaults = false)
-      .set("spark.memory.storageFraction", "0.999")
+      .set(MEMORY_STORAGE_FRACTION, 0.999)
       .set(TEST_MEMORY, 471859200L)
       .set(SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD, 0)
     sc = new SparkContext("local", "test", conf)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index d6c1562a7c59..bbc0b333af97 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -639,7 +639,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
 
   test("force to spill for external sorter") {
     val conf = createSparkConf(loadDefaults = false, kryo = false)
-      .set("spark.memory.storageFraction", "0.999")
+      .set(MEMORY_STORAGE_FRACTION, 0.999)
       .set(TEST_MEMORY, 471859200L)
       .set(SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD, 0)
     sc = new SparkContext("local", "test", conf)
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
index 16452b4bd538..f3d82b485e54 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
@@ -105,7 +105,7 @@ private boolean checkValue(UnsafeRow row, long v1, long v2) {
   @Before
   public void setup() {
     memoryManager = new TestMemoryManager(new SparkConf()
-            .set("spark.memory.offHeap.enabled", "false")
+            .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)
             .set(package$.MODULE$.SHUFFLE_SPILL_COMPRESS(), false)
             .set(package$.MODULE$.SHUFFLE_COMPRESS(), false));
     taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java
index 97f3dc588ecc..92dabc79d2bf 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java
@@ -18,6 +18,7 @@
 package test.org.apache.spark.sql.execution.sort;
 
 import org.apache.spark.SparkConf;
+import org.apache.spark.internal.config.package$;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.memory.TestMemoryConsumer;
 import org.apache.spark.memory.TestMemoryManager;
@@ -41,7 +42,8 @@
 public class RecordBinaryComparatorSuite {
 
   private final TaskMemoryManager memoryManager = new TaskMemoryManager(
-      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+      new TestMemoryManager(
+        new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
   private final TestMemoryConsumer consumer = new TestMemoryConsumer(memoryManager);
 
   private final int uaoSize = UnsafeAlignedOffset.getUaoSize();
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index d1a6e8a89acc..f442811024b6 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -214,7 +214,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
 
   test("Test Block - isFullyConsumed") {
     val sparkConf = new SparkConf().set("spark.app.id", "streaming-test")
-    sparkConf.set("spark.storage.unrollMemoryThreshold", "512")
+    sparkConf.set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
 
     sparkConf.set(IO_ENCRYPTION_ENABLED, enableEncryption)
     // Block Manager with 12000 * 0.4 = 4800 bytes of free space for unroll

From e71acd9a23926e6c84e462009f145a80ea24bf85 Mon Sep 17 00:00:00 2001
From: heguozi <zyzzxycj@gmail.com>
Date: Sat, 26 Jan 2019 10:17:03 -0800
Subject: [PATCH 0370/1072] [SPARK-26630][SQL] Support reading Hive-serde
 tables whose INPUTFORMAT is org.apache.hadoop.mapreduce

## What changes were proposed in this pull request?

When we read a hive table and create RDDs in `TableReader`, it'll throw exception `java.lang.ClassCastException: org.apache.hadoop.mapreduce.lib.input.TextInputFormat cannot be cast to org.apache.hadoop.mapred.InputFormat` if the input format class of the table is from mapreduce package.

Now we use NewHadoopRDD to deal with the new input format and keep HadoopRDD to the old one.

This PR is from #23506. We can reproduce this issue by executing the new test with the code in old version. When create a table with `org.apache.hadoop.mapreduce.....` input format, we will find the exception thrown in `org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:190)`

## How was this patch tested?

Added a new test.

Closes #23559 from Deegue/fix-hadoopRDD.

Lead-authored-by: heguozi <zyzzxycj@gmail.com>
Co-authored-by: Yizhong Zhang <zyzzxycj@163.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/hive/TableReader.scala   |  61 ++++++++---
 .../sql/hive/execution/HiveDDLSuite.scala     | 102 ++++++++++++++++++
 2 files changed, 149 insertions(+), 14 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 536bc4a3f4ec..ad117197d713 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -31,12 +31,13 @@ import org.apache.hadoop.hive.serde2.Deserializer
 import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorConverters, StructObjectInspector}
 import org.apache.hadoop.hive.serde2.objectinspector.primitive._
 import org.apache.hadoop.io.Writable
-import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf}
+import org.apache.hadoop.mapred.{FileInputFormat, InputFormat => oldInputClass, JobConf}
+import org.apache.hadoop.mapreduce.{InputFormat => newInputClass}
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
+import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.CastSupport
@@ -123,9 +124,7 @@ class HadoopTableReader(
     val inputPathStr = applyFilterIfNeeded(tablePath, filterOpt)
 
     // logDebug("Table input: %s".format(tablePath))
-    val ifc = hiveTable.getInputFormatClass
-      .asInstanceOf[java.lang.Class[InputFormat[Writable, Writable]]]
-    val hadoopRDD = createHadoopRdd(localTableDesc, inputPathStr, ifc)
+    val hadoopRDD = createHadoopRDD(localTableDesc, inputPathStr)
 
     val attrsWithIndex = attributes.zipWithIndex
     val mutableRow = new SpecificInternalRow(attributes.map(_.dataType))
@@ -164,7 +163,7 @@ class HadoopTableReader(
     def verifyPartitionPath(
         partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]]):
         Map[HivePartition, Class[_ <: Deserializer]] = {
-      if (!sparkSession.sessionState.conf.verifyPartitionPath) {
+      if (!conf.verifyPartitionPath) {
         partitionToDeserializer
       } else {
         val existPathSet = collection.mutable.Set[String]()
@@ -202,8 +201,6 @@ class HadoopTableReader(
       val partDesc = Utilities.getPartitionDesc(partition)
       val partPath = partition.getDataLocation
       val inputPathStr = applyFilterIfNeeded(partPath, filterOpt)
-      val ifc = partDesc.getInputFileFormatClass
-        .asInstanceOf[java.lang.Class[InputFormat[Writable, Writable]]]
       // Get partition field info
       val partSpec = partDesc.getPartSpec
       val partProps = partDesc.getProperties
@@ -243,7 +240,7 @@ class HadoopTableReader(
 
       // Create local references so that the outer object isn't serialized.
       val localTableDesc = tableDesc
-      createHadoopRdd(localTableDesc, inputPathStr, ifc).mapPartitions { iter =>
+      createHadoopRDD(localTableDesc, inputPathStr).mapPartitions { iter =>
         val hconf = broadcastedHiveConf.value.value
         val deserializer = localDeserializer.getConstructor().newInstance()
         // SPARK-13709: For SerDes like AvroSerDe, some essential information (e.g. Avro schema
@@ -288,16 +285,29 @@ class HadoopTableReader(
     }
   }
 
+  /**
+   * The entry of creating a RDD.
+   * [SPARK-26630] Using which HadoopRDD will be decided by the input format of tables.
+   * The input format of NewHadoopRDD is from `org.apache.hadoop.mapreduce` package while
+   * the input format of HadoopRDD is from `org.apache.hadoop.mapred` package.
+   */
+  private def createHadoopRDD(localTableDesc: TableDesc, inputPathStr: String): RDD[Writable] = {
+    val inputFormatClazz = localTableDesc.getInputFileFormatClass
+    if (classOf[newInputClass[_, _]].isAssignableFrom(inputFormatClazz)) {
+      createNewHadoopRDD(localTableDesc, inputPathStr)
+    } else {
+      createOldHadoopRDD(localTableDesc, inputPathStr)
+    }
+  }
+
   /**
    * Creates a HadoopRDD based on the broadcasted HiveConf and other job properties that will be
    * applied locally on each slave.
    */
-  private def createHadoopRdd(
-    tableDesc: TableDesc,
-    path: String,
-    inputFormatClass: Class[InputFormat[Writable, Writable]]): RDD[Writable] = {
-
+  private def createOldHadoopRDD(tableDesc: TableDesc, path: String): RDD[Writable] = {
     val initializeJobConfFunc = HadoopTableReader.initializeLocalJobConfFunc(path, tableDesc) _
+    val inputFormatClass = tableDesc.getInputFileFormatClass
+      .asInstanceOf[Class[oldInputClass[Writable, Writable]]]
 
     val rdd = new HadoopRDD(
       sparkSession.sparkContext,
@@ -311,6 +321,29 @@ class HadoopTableReader(
     // Only take the value (skip the key) because Hive works only with values.
     rdd.map(_._2)
   }
+
+  /**
+   * Creates a NewHadoopRDD based on the broadcasted HiveConf and other job properties that will be
+   * applied locally on each slave.
+   */
+  private def createNewHadoopRDD(tableDesc: TableDesc, path: String): RDD[Writable] = {
+    val newJobConf = new JobConf(hadoopConf)
+    HadoopTableReader.initializeLocalJobConfFunc(path, tableDesc)(newJobConf)
+    val inputFormatClass = tableDesc.getInputFileFormatClass
+      .asInstanceOf[Class[newInputClass[Writable, Writable]]]
+
+    val rdd = new NewHadoopRDD(
+      sparkSession.sparkContext,
+      inputFormatClass,
+      classOf[Writable],
+      classOf[Writable],
+      newJobConf
+    )
+
+    // Only take the value (skip the key) because Hive works only with values.
+    rdd.map(_._2)
+  }
+
 }
 
 private[hive] object HiveTableUtil {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 6abdc4054cb0..5e97a05c0e97 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -260,6 +260,108 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
       assert(err.contains("Cannot recognize hive type string:"))
    }
   }
+
+  test("SPARK-26630: table with old input format and without partitioned will use HadoopRDD") {
+    withTable("table_old", "table_ctas_old") {
+      sql(
+        """
+          |CREATE TABLE table_old (col1 LONG, col2 STRING, col3 DOUBLE, col4 BOOLEAN)
+          |STORED AS
+          |INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+          |OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+        """.stripMargin)
+      sql(
+        """
+          |INSERT INTO table_old
+          |VALUES (2147483648, 'AAA', 3.14, false), (2147483649, 'BBB', 3.142, true)
+        """.stripMargin)
+      checkAnswer(
+        sql("SELECT col1, col2, col3, col4 FROM table_old"),
+        Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
+
+      sql("CREATE TABLE table_ctas_old AS SELECT col1, col2, col3, col4 FROM table_old")
+      checkAnswer(
+        sql("SELECT col1, col2, col3, col4 from table_ctas_old"),
+        Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
+    }
+  }
+
+  test("SPARK-26630: table with old input format and partitioned will use HadoopRDD") {
+    withTable("table_pt_old", "table_ctas_pt_old") {
+      sql(
+        """
+          |CREATE TABLE table_pt_old (col1 LONG, col2 STRING, col3 DOUBLE, col4 BOOLEAN)
+          |PARTITIONED BY (pt INT)
+          |STORED AS
+          |INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+          |OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+        """.stripMargin)
+      sql(
+        """
+          |INSERT INTO table_pt_old PARTITION (pt = 1)
+          |VALUES (2147483648, 'AAA', 3.14, false), (2147483649, 'BBB', 3.142, true)
+        """.stripMargin)
+      checkAnswer(
+        sql("SELECT col1, col2, col3, col4 FROM table_pt_old WHERE pt = 1"),
+        Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
+
+      sql("CREATE TABLE table_ctas_pt_old AS SELECT col1, col2, col3, col4 FROM table_pt_old")
+      checkAnswer(
+        sql("SELECT col1, col2, col3, col4 from table_ctas_pt_old"),
+        Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
+    }
+  }
+
+  test("SPARK-26630: table with new input format and without partitioned will use NewHadoopRDD") {
+    withTable("table_new", "table_ctas_new") {
+      sql(
+        """
+          |CREATE TABLE table_new (col1 LONG, col2 STRING, col3 DOUBLE, col4 BOOLEAN)
+          |STORED AS
+          |INPUTFORMAT 'org.apache.hadoop.mapreduce.lib.input.TextInputFormat'
+          |OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+        """.stripMargin)
+      sql(
+        """
+          |INSERT INTO table_new
+          |VALUES (2147483648, 'AAA', 3.14, false), (2147483649, 'BBB', 3.142, true)
+        """.stripMargin)
+      checkAnswer(
+        sql("SELECT col1, col2, col3, col4 FROM table_new"),
+        Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
+
+      sql("CREATE TABLE table_ctas_new AS SELECT col1, col2, col3, col4 FROM table_new")
+      checkAnswer(
+        sql("SELECT col1, col2, col3, col4 from table_ctas_new"),
+        Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
+    }
+  }
+
+  test("SPARK-26630: table with new input format and partitioned will use NewHadoopRDD") {
+    withTable("table_pt_new", "table_ctas_pt_new") {
+      sql(
+        """
+          |CREATE TABLE table_pt_new (col1 LONG, col2 STRING, col3 DOUBLE, col4 BOOLEAN)
+          |PARTITIONED BY (pt INT)
+          |STORED AS
+          |INPUTFORMAT 'org.apache.hadoop.mapreduce.lib.input.TextInputFormat'
+          |OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+        """.stripMargin)
+      sql(
+        """
+          |INSERT INTO table_pt_new PARTITION (pt = 1)
+          |VALUES (2147483648, 'AAA', 3.14, false), (2147483649, 'BBB', 3.142, true)
+        """.stripMargin)
+      checkAnswer(
+        sql("SELECT col1, col2, col3, col4 FROM table_pt_new WHERE pt = 1"),
+        Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
+
+      sql("CREATE TABLE table_ctas_pt_new AS SELECT col1, col2, col3, col4 FROM table_pt_new")
+      checkAnswer(
+        sql("SELECT col1, col2, col3, col4 from table_ctas_pt_new"),
+        Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
+    }
+  }
 }
 
 class HiveDDLSuite

From e8982ca7ad94e98d907babf2d6f1068b7cd064c6 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@apache.org>
Date: Sun, 27 Jan 2019 10:45:49 +0800
Subject: [PATCH 0371/1072] [SPARK-25981][R] Enables Arrow optimization from R
 DataFrame to Spark DataFrame

## What changes were proposed in this pull request?

This PR targets to support Arrow optimization for conversion from R DataFrame to Spark DataFrame.
Like PySpark side, it falls back to non-optimization code path when it's unable to use Arrow optimization.

This can be tested as below:

```bash
$ ./bin/sparkR --conf spark.sql.execution.arrow.enabled=true
```

```r
collect(createDataFrame(mtcars))
```

### Requirements
  - R 3.5.x
  - Arrow package 0.12+
    ```bash
    Rscript -e 'remotes::install_github("apache/arrowapache-arrow-0.12.0", subdir = "r")'
    ```

**Note:** currently, Arrow R package is not in CRAN. Please take a look at ARROW-3204.
**Note:** currently, Arrow R package seems not supporting Windows. Please take a look at ARROW-3204.

### Benchmarks

**Shall**

```bash
sync && sudo purge
./bin/sparkR --conf spark.sql.execution.arrow.enabled=false
```

```bash
sync && sudo purge
./bin/sparkR --conf spark.sql.execution.arrow.enabled=true
```

**R code**

```r
createDataFrame(mtcars) # Initializes
rdf <- read.csv("500000.csv")

test <- function() {
  options(digits.secs = 6) # milliseconds
  start.time <- Sys.time()
  createDataFrame(rdf)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  print(time.taken)
}

test()
```

**Data (350 MB):**

```r
object.size(read.csv("500000.csv"))
350379504 bytes
```

"500000 Records"  http://eforexcel.com/wp/downloads-16-sample-csv-files-data-sets-for-testing/

**Results**

```
Time difference of 29.9468 secs
```

```
Time difference of 3.222129 secs
```

The performance improvement was around **950%**.
Actually, this PR improves around **1200%**+ because this PR includes a small optimization about regular R DataFrame -> Spark DatFrame. See https://github.com/apache/spark/pull/22954#discussion_r231847272

### Limitations:

For now, Arrow optimization with R does not support when the data is `raw`, and when user explicitly gives float type in the schema. They produce corrupt values.
In this case, we decide to fall back to non-optimization code path.

## How was this patch tested?

Small test was added.

I manually forced to set this optimization `true` for _all_ R tests and they were _all_ passed (with few of fallback warnings).

**TODOs:**
- [x] Draft codes
- [x] make the tests passed
- [x] make the CRAN check pass
- [x] Performance measurement
- [x] Supportability investigation (for instance types)
- [x] Wait for Arrow 0.12.0 release
- [x] Fix and match it to Arrow 0.12.0

Closes #22954 from HyukjinKwon/r-arrow-createdataframe.

Lead-authored-by: hyukjinkwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/SQLContext.R                          | 161 +++++++++++++++---
 R/pkg/R/context.R                             |  40 ++---
 R/pkg/tests/fulltests/test_sparkSQL.R         |  57 +++++++
 .../apache/spark/sql/internal/SQLConf.scala   |   5 +-
 .../org/apache/spark/sql/api/r/SQLUtils.scala |  22 +++
 5 files changed, 239 insertions(+), 46 deletions(-)

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index afcdd6faa849..2e5506a4097e 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -147,6 +147,70 @@ getDefaultSqlSource <- function() {
   l[["spark.sql.sources.default"]]
 }
 
+writeToFileInArrow <- function(fileName, rdf, numPartitions) {
+  requireNamespace1 <- requireNamespace
+
+  # R API in Arrow is not yet released in CRAN. CRAN requires to add the
+  # package in requireNamespace at DESCRIPTION. Later, CRAN checks if the package is available
+  # or not. Therefore, it works around by avoiding direct requireNamespace.
+  # Currently, as of Arrow 0.12.0, it can be installed by install_github. See ARROW-3204.
+  if (requireNamespace1("arrow", quietly = TRUE)) {
+    record_batch <- get("record_batch", envir = asNamespace("arrow"), inherits = FALSE)
+    RecordBatchStreamWriter <- get(
+      "RecordBatchStreamWriter", envir = asNamespace("arrow"), inherits = FALSE)
+    FileOutputStream <- get(
+      "FileOutputStream", envir = asNamespace("arrow"), inherits = FALSE)
+
+    numPartitions <- if (!is.null(numPartitions)) {
+      numToInt(numPartitions)
+    } else {
+      1
+    }
+
+    rdf_slices <- if (numPartitions > 1) {
+      split(rdf, makeSplits(numPartitions, nrow(rdf)))
+    } else {
+      list(rdf)
+    }
+
+    stream_writer <- NULL
+    tryCatch({
+      for (rdf_slice in rdf_slices) {
+        batch <- record_batch(rdf_slice)
+        if (is.null(stream_writer)) {
+          stream <- FileOutputStream(fileName)
+          schema <- batch$schema
+          stream_writer <- RecordBatchStreamWriter(stream, schema)
+        }
+
+        stream_writer$write_batch(batch)
+      }
+    },
+    finally = {
+      if (!is.null(stream_writer)) {
+        stream_writer$close()
+      }
+    })
+
+  } else {
+    stop("'arrow' package should be installed.")
+  }
+}
+
+checkTypeRequirementForArrow <- function(dataHead, schema) {
+  # Currenty Arrow optimization does not support raw for now.
+  # Also, it does not support explicit float type set by users. It leads to
+  # incorrect conversion. We will fall back to the path without Arrow optimization.
+  if (any(sapply(dataHead, is.raw))) {
+    stop("Arrow optimization with R DataFrame does not support raw type yet.")
+  }
+  if (inherits(schema, "structType")) {
+    if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
+      stop("Arrow optimization with R DataFrame does not support FloatType type yet.")
+    }
+  }
+}
+
 #' Create a SparkDataFrame
 #'
 #' Converts R data.frame or list into SparkDataFrame.
@@ -172,36 +236,76 @@ getDefaultSqlSource <- function() {
 createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
                             numPartitions = NULL) {
   sparkSession <- getSparkSession()
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
+  useArrow <- FALSE
+  firstRow <- NULL
 
   if (is.data.frame(data)) {
-      # Convert data into a list of rows. Each row is a list.
-
-      # get the names of columns, they will be put into RDD
-      if (is.null(schema)) {
-        schema <- names(data)
-      }
+    # get the names of columns, they will be put into RDD
+    if (is.null(schema)) {
+      schema <- names(data)
+    }
 
-      # get rid of factor type
-      cleanCols <- function(x) {
-        if (is.factor(x)) {
-          as.character(x)
-        } else {
-          x
-        }
+    # get rid of factor type
+    cleanCols <- function(x) {
+      if (is.factor(x)) {
+        as.character(x)
+      } else {
+        x
       }
+    }
+    data[] <- lapply(data, cleanCols)
+
+    args <- list(FUN = list, SIMPLIFY = FALSE, USE.NAMES = FALSE)
+    if (arrowEnabled) {
+      useArrow <- tryCatch({
+        stopifnot(length(data) > 0)
+        dataHead <- head(data, 1)
+        checkTypeRequirementForArrow(data, schema)
+        fileName <- tempfile(pattern = "sparwriteToFileInArrowk-arrow", fileext = ".tmp")
+        tryCatch({
+          writeToFileInArrow(fileName, data, numPartitions)
+          jrddInArrow <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
+                                     "readArrowStreamFromFile",
+                                     sparkSession,
+                                     fileName)
+        },
+        finally = {
+          # File might not be created.
+          suppressWarnings(file.remove(fileName))
+        })
+
+        firstRow <- do.call(mapply, append(args, dataHead))[[1]]
+        TRUE
+      },
+      error = function(e) {
+        warning(paste0("createDataFrame attempted Arrow optimization because ",
+                       "'spark.sql.execution.arrow.enabled' is set to true; however, ",
+                       "failed, attempting non-optimization. Reason: ",
+                       e))
+        FALSE
+      })
+    }
 
+    if (!useArrow) {
+      # Convert data into a list of rows. Each row is a list.
       # drop factors and wrap lists
-      data <- setNames(lapply(data, cleanCols), NULL)
+      data <- setNames(as.list(data), NULL)
 
       # check if all columns have supported type
       lapply(data, getInternalType)
 
       # convert to rows
-      args <- list(FUN = list, SIMPLIFY = FALSE, USE.NAMES = FALSE)
       data <- do.call(mapply, append(args, data))
+      if (length(data) > 0) {
+        firstRow <- data[[1]]
+      }
+    }
   }
 
-  if (is.list(data)) {
+  if (useArrow) {
+    rdd <- jrddInArrow
+  } else if (is.list(data)) {
     sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
     if (!is.null(numPartitions)) {
       rdd <- parallelize(sc, data, numSlices = numToInt(numPartitions))
@@ -215,14 +319,16 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
   }
 
   if (is.null(schema) || (!inherits(schema, "structType") && is.null(names(schema)))) {
-    row <- firstRDD(rdd)
+    if (is.null(firstRow)) {
+      firstRow <- firstRDD(rdd)
+    }
     names <- if (is.null(schema)) {
-      names(row)
+      names(firstRow)
     } else {
       as.list(schema)
     }
     if (is.null(names)) {
-      names <- lapply(1:length(row), function(x) {
+      names <- lapply(1:length(firstRow), function(x) {
         paste("_", as.character(x), sep = "")
       })
     }
@@ -237,8 +343,8 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
       nn
     })
 
-    types <- lapply(row, infer_type)
-    fields <- lapply(1:length(row), function(i) {
+    types <- lapply(firstRow, infer_type)
+    fields <- lapply(1:length(firstRow), function(i) {
       structField(names[[i]], types[[i]], TRUE)
     })
     schema <- do.call(structType, fields)
@@ -246,10 +352,15 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
 
   stopifnot(class(schema) == "structType")
 
-  jrdd <- getJRDD(lapply(rdd, function(x) x), "row")
-  srdd <- callJMethod(jrdd, "rdd")
-  sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "createDF",
-                     srdd, schema$jobj, sparkSession)
+  if (useArrow) {
+    sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
+                       "toDataFrame", rdd, schema$jobj, sparkSession)
+  } else {
+    jrdd <- getJRDD(lapply(rdd, function(x) x), "row")
+    srdd <- callJMethod(jrdd, "rdd")
+    sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "createDF",
+                       srdd, schema$jobj, sparkSession)
+  }
   dataFrame(sdf)
 }
 
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 0207f249f9aa..bac3efd03e8b 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -81,6 +81,26 @@ objectFile <- function(sc, path, minPartitions = NULL) {
   RDD(jrdd, "byte")
 }
 
+makeSplits <- function(numSerializedSlices, length) {
+  # Generate the slice ids to put each row
+  # For instance, for numSerializedSlices of 22, length of 50
+  #  [1]  0  0  2  2  4  4  6  6  6  9  9 11 11 13 13 15 15 15 18 18 20 20 22 22 22
+  # [26] 25 25 27 27 29 29 31 31 31 34 34 36 36 38 38 40 40 40 43 43 45 45 47 47 47
+  # Notice the slice group with 3 slices (ie. 6, 15, 22) are roughly evenly spaced.
+  # We are trying to reimplement the calculation in the positions method in ParallelCollectionRDD
+  if (numSerializedSlices > 0) {
+    unlist(lapply(0: (numSerializedSlices - 1), function(x) {
+      # nolint start
+      start <- trunc((as.numeric(x) * length) / numSerializedSlices)
+      end <- trunc(((as.numeric(x) + 1) * length) / numSerializedSlices)
+      # nolint end
+      rep(start, end - start)
+    }))
+  } else {
+    1
+  }
+}
+
 #' Create an RDD from a homogeneous list or vector.
 #'
 #' This function creates an RDD from a local homogeneous list in R. The elements
@@ -143,25 +163,7 @@ parallelize <- function(sc, coll, numSlices = 1) {
   # For large objects we make sure the size of each slice is also smaller than sizeLimit
   numSerializedSlices <- min(len, max(numSlices, ceiling(objectSize / sizeLimit)))
 
-  # Generate the slice ids to put each row
-  # For instance, for numSerializedSlices of 22, length of 50
-  #  [1]  0  0  2  2  4  4  6  6  6  9  9 11 11 13 13 15 15 15 18 18 20 20 22 22 22
-  # [26] 25 25 27 27 29 29 31 31 31 34 34 36 36 38 38 40 40 40 43 43 45 45 47 47 47
-  # Notice the slice group with 3 slices (ie. 6, 15, 22) are roughly evenly spaced.
-  # We are trying to reimplement the calculation in the positions method in ParallelCollectionRDD
-  splits <- if (numSerializedSlices > 0) {
-    unlist(lapply(0: (numSerializedSlices - 1), function(x) {
-      # nolint start
-      start <- trunc((as.numeric(x) * len) / numSerializedSlices)
-      end <- trunc(((as.numeric(x) + 1) * len) / numSerializedSlices)
-      # nolint end
-      rep(start, end - start)
-    }))
-  } else {
-    1
-  }
-
-  slices <- split(coll, splits)
+  slices <- split(coll, makeSplits(numSerializedSlices, len))
 
   # Serialize each slice: obtain a list of raws, or a list of lists (slices) of
   # 2-tuples of raws
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 88f228621952..93cb8903fc61 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -307,6 +307,63 @@ test_that("create DataFrame from RDD", {
   unsetHiveContext()
 })
 
+test_that("createDataFrame Arrow optimization", {
+  skip_if_not_installed("arrow")
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    expected <- collect(createDataFrame(mtcars))
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    expect_equal(collect(createDataFrame(mtcars)), expected)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("createDataFrame Arrow optimization - type specification", {
+  skip_if_not_installed("arrow")
+  rdf <- data.frame(list(list(a = 1,
+                              b = "a",
+                              c = TRUE,
+                              d = 1.1,
+                              e = 1L,
+                              f = as.Date("1990-02-24"),
+                              g = as.POSIXct("1990-02-24 12:34:56"))))
+
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+  conf <- callJMethod(sparkSession, "conf")
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    expected <- collect(createDataFrame(rdf))
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    expect_equal(collect(createDataFrame(rdf)), expected)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
 test_that("read/write csv as DataFrame", {
   if (windows_with_hadoop()) {
     csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index da595e7a352d..44842734251d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1286,8 +1286,9 @@ object SQLConf {
   val ARROW_EXECUTION_ENABLED =
     buildConf("spark.sql.execution.arrow.enabled")
       .doc("When true, make use of Apache Arrow for columnar data transfers. Currently available " +
-        "for use with pyspark.sql.DataFrame.toPandas, and " +
-        "pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame. " +
+        "for use with pyspark.sql.DataFrame.toPandas, " +
+        "pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame, " +
+        "and createDataFrame when its input is an R DataFrame. " +
         "The following data types are unsupported: " +
         "BinaryType, MapType, ArrayType of TimestampType, and nested StructType.")
       .booleanConf
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index f5d8d4ea0a4d..693be99d4749 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -32,6 +32,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericRowWithSchema}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.arrow.ArrowConverters
 import org.apache.spark.sql.execution.command.ShowTablesCommand
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types._
@@ -237,4 +238,25 @@ private[sql] object SQLUtils extends Logging {
   def createArrayType(column: Column): ArrayType = {
     new ArrayType(ExprUtils.evalTypeExpr(column.expr), true)
   }
+
+  /**
+   * R callable function to read a file in Arrow stream format and create an `RDD`
+   * using each serialized ArrowRecordBatch as a partition.
+   */
+  def readArrowStreamFromFile(
+      sparkSession: SparkSession,
+      filename: String): JavaRDD[Array[Byte]] = {
+    ArrowConverters.readArrowStreamFromFile(sparkSession.sqlContext, filename)
+  }
+
+  /**
+   * R callable function to create a `DataFrame` from a `JavaRDD` of serialized
+   * ArrowRecordBatches.
+   */
+  def toDataFrame(
+      arrowBatchRDD: JavaRDD[Array[Byte]],
+      schema: StructType,
+      sparkSession: SparkSession): DataFrame = {
+    ArrowConverters.toDataFrame(arrowBatchRDD, schema.json, sparkSession.sqlContext)
+  }
 }

From 860336d31e474df5bd4bec42181c3e8a9f94138d Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Sat, 26 Jan 2019 22:26:10 -0800
Subject: [PATCH 0372/1072] [SPARK-26735][SQL] Verify plan integrity for
 special expressions

## What changes were proposed in this pull request?

Add verification of plan integrity with regards to special expressions being hosted only in supported operators. Specifically:

- `AggregateExpression`: should only be hosted in `Aggregate`, or indirectly in `Window`
- `WindowExpression`: should only be hosted in `Window`
- `Generator`: should only be hosted in `Generate`

This will help us catch errors in future optimizer rules that incorrectly hoist special expression out of their supported operator.

TODO: This PR actually caught a bug in the analyzer in the test case `SPARK-23957 Remove redundant sort from subquery plan(scalar subquery)` in `SubquerySuite`, where a `max()` aggregate function is hosted in a `Sort` operator in the analyzed plan, which is invalid. That test case is disabled in this PR.
SPARK-26741 has been opened to track the fix in the analyzer.

## How was this patch tested?

Added new test case in `OptimizerStructuralIntegrityCheckerSuite`

Closes #23658 from rednaxelafx/plan-integrity.

Authored-by: Kris Mok <kris.mok@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 39 ++++++++++++++++--
 .../sql/catalyst/rules/RuleExecutor.scala     |  7 ++++
 ...mizerStructuralIntegrityCheckerSuite.scala | 41 +++++++++++++++++--
 .../catalyst/trees/RuleExecutorSuite.scala    | 20 ++++++++-
 .../org/apache/spark/sql/SubquerySuite.scala  |  2 +-
 5 files changed, 101 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 20f1221be425..38a051c15476 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -38,10 +38,43 @@ import org.apache.spark.util.Utils
 abstract class Optimizer(sessionCatalog: SessionCatalog)
   extends RuleExecutor[LogicalPlan] {
 
-  // Check for structural integrity of the plan in test mode. Currently we only check if a plan is
-  // still resolved after the execution of each rule.
+  // Check for structural integrity of the plan in test mode.
+  // Currently we check after the execution of each rule if a plan:
+  // - is still resolved
+  // - only host special expressions in supported operators
   override protected def isPlanIntegral(plan: LogicalPlan): Boolean = {
-    !Utils.isTesting || plan.resolved
+    !Utils.isTesting || (plan.resolved && checkSpecialExpressionIntegrity(plan))
+  }
+
+  /**
+   * Check if all operators in this plan hold structural integrity with regards to hosting special
+   * expressions.
+   * Returns true when all operators are integral.
+   */
+  private def checkSpecialExpressionIntegrity(plan: LogicalPlan): Boolean = {
+    plan.find(specialExpressionInUnsupportedOperator).isEmpty
+  }
+
+  /**
+   * Check if there's any expression in this query plan operator that is
+   * - A WindowExpression but the plan is not Window
+   * - An AggregateExpresion but the plan is not Aggregate or Window
+   * - A Generator but the plan is not Generate
+   * Returns true when this operator breaks structural integrity with one of the cases above.
+   */
+  private def specialExpressionInUnsupportedOperator(plan: LogicalPlan): Boolean = {
+    val exprs = plan.expressions
+    exprs.flatMap { root =>
+      root.find {
+        case e: WindowExpression
+          if !plan.isInstanceOf[Window] => true
+        case e: AggregateExpression
+          if !(plan.isInstanceOf[Aggregate] || plan.isInstanceOf[Window]) => true
+        case e: Generator
+          if !plan.isInstanceOf[Generate] => true
+        case _ => false
+      }
+    }.nonEmpty
   }
 
   protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index cf6ff4f98639..088f1fecb645 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -88,6 +88,13 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
     val planChangeLogger = new PlanChangeLogger()
     val tracker: Option[QueryPlanningTracker] = QueryPlanningTracker.get
 
+    // Run the structural integrity checker against the initial input
+    if (!isPlanIntegral(plan)) {
+      val message = "The structural integrity of the input plan is broken in " +
+        s"${this.getClass.getName.stripSuffix("$")}."
+      throw new TreeNodeException(plan, message, null)
+    }
+
     batches.foreach { batch =>
       val batchStartPlan = curPlan
       var iteration = 1
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index a22a81e9844d..5e0d2041fac5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -19,11 +19,12 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, NamedExpression}
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
 
@@ -35,6 +36,9 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
       case Project(projectList, child) =>
         val newAttr = UnresolvedAttribute("unresolvedAttr")
         Project(projectList ++ Seq(newAttr), child)
+      case agg @ Aggregate(Nil, aggregateExpressions, child) =>
+        // Project cannot host AggregateExpression
+        Project(aggregateExpressions, child)
     }
   }
 
@@ -47,7 +51,7 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
     override def defaultBatches: Seq[Batch] = Seq(newBatch) ++ super.defaultBatches
   }
 
-  test("check for invalid plan after execution of rule") {
+  test("check for invalid plan after execution of rule - unresolved attribute") {
     val analyzed = Project(Alias(Literal(10), "attr")() :: Nil, OneRowRelation()).analyze
     assert(analyzed.resolved)
     val message = intercept[TreeNodeException[LogicalPlan]] {
@@ -57,4 +61,35 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
     assert(message.contains(s"After applying rule $ruleName in batch OptimizeRuleBreakSI"))
     assert(message.contains("the structural integrity of the plan is broken"))
   }
+
+  test("check for invalid plan after execution of rule - special expression in wrong operator") {
+    val analyzed =
+      Aggregate(Nil, Seq[NamedExpression](max('id) as 'm),
+        LocalRelation('id.long)).analyze
+    assert(analyzed.resolved)
+
+    // Should fail verification with the OptimizeRuleBreakSI rule
+    val message = intercept[TreeNodeException[LogicalPlan]] {
+      Optimize.execute(analyzed)
+    }.getMessage
+    val ruleName = OptimizeRuleBreakSI.ruleName
+    assert(message.contains(s"After applying rule $ruleName in batch OptimizeRuleBreakSI"))
+    assert(message.contains("the structural integrity of the plan is broken"))
+
+    // Should not fail verification with the regular optimizer
+    SimpleTestOptimizer.execute(analyzed)
+  }
+
+  test("check for invalid plan before execution of any rule") {
+    val analyzed =
+      Aggregate(Nil, Seq[NamedExpression](max('id) as 'm),
+        LocalRelation('id.long)).analyze
+    val invalidPlan = OptimizeRuleBreakSI.apply(analyzed)
+
+    // Should fail verification right at the beginning
+    val message = intercept[TreeNodeException[LogicalPlan]] {
+      Optimize.execute(invalidPlan)
+    }.getMessage
+    assert(message.contains("The structural integrity of the input plan is broken"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
index a67f54b263cc..ab5d722975ef 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
@@ -57,7 +57,7 @@ class RuleExecutorSuite extends SparkFunSuite {
     assert(message.contains("Max iterations (10) reached for batch fixedPoint"))
   }
 
-  test("structural integrity checker") {
+  test("structural integrity checker - verify initial input") {
     object WithSIChecker extends RuleExecutor[Expression] {
       override protected def isPlanIntegral(expr: Expression): Boolean = expr match {
         case IntegerLiteral(_) => true
@@ -69,8 +69,26 @@ class RuleExecutorSuite extends SparkFunSuite {
     assert(WithSIChecker.execute(Literal(10)) === Literal(9))
 
     val message = intercept[TreeNodeException[LogicalPlan]] {
+      // The input is already invalid as determined by WithSIChecker.isPlanIntegral
       WithSIChecker.execute(Literal(10.1))
     }.getMessage
+    assert(message.contains("The structural integrity of the input plan is broken"))
+  }
+
+  test("structural integrity checker - verify rule execution result") {
+    object WithSICheckerForPositiveLiteral extends RuleExecutor[Expression] {
+      override protected def isPlanIntegral(expr: Expression): Boolean = expr match {
+        case IntegerLiteral(i) if i > 0 => true
+        case _ => false
+      }
+      val batches = Batch("once", Once, DecrementLiterals) :: Nil
+    }
+
+    assert(WithSICheckerForPositiveLiteral.execute(Literal(2)) === Literal(1))
+
+    val message = intercept[TreeNodeException[LogicalPlan]] {
+      WithSICheckerForPositiveLiteral.execute(Literal(1))
+    }.getMessage
     assert(message.contains("the structural integrity of the plan is broken"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 48c167660913..8ade43300987 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -1186,7 +1186,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("SPARK-23957 Remove redundant sort from subquery plan(scalar subquery)") {
+  ignore("SPARK-23957 Remove redundant sort from subquery plan(scalar subquery)") {
     withTempView("t1", "t2", "t3") {
       Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
       Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")

From 1ca6b8bc3df19503c00414e62161227725a99520 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 27 Jan 2019 10:04:51 -0800
Subject: [PATCH 0373/1072] [SPARK-26379][SS][FOLLOWUP] Use dummy TimeZoneId to
 avoid UnresolvedException in CurrentBatchTimestamp

## What changes were proposed in this pull request?

Spark replaces `CurrentTimestamp` with `CurrentBatchTimestamp`.
However, `CurrentBatchTimestamp` is `TimeZoneAwareExpression` while `CurrentTimestamp` isn't.
Without TimeZoneId, `CurrentBatchTimestamp` becomes unresolved and raises `UnresolvedException`.

Since `CurrentDate` is `TimeZoneAwareExpression`, there is no problem with `CurrentDate`.

This PR reverts the [previous patch](https://github.com/apache/spark/pull/23609) on `MicroBatchExecution` and fixes the root cause.

## How was this patch tested?

Pass the Jenkins with the updated test cases.

Closes #23660 from dongjoon-hyun/SPARK-26379.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../execution/streaming/MicroBatchExecution.scala | 15 +++++----------
 .../apache/spark/sql/streaming/StreamSuite.scala  | 10 +++-------
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index fde851a5c07d..792e3a38dda5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -501,27 +501,22 @@ class MicroBatchExecution(
     // Rewire the plan to use the new attributes that were returned by the source.
     val newAttributePlan = newBatchesPlan transformAllExpressions {
       case ct: CurrentTimestamp =>
+        // CurrentTimestamp is not TimeZoneAwareExpression while CurrentBatchTimestamp is.
+        // Without TimeZoneId, CurrentBatchTimestamp is unresolved. Here, we use an explicit
+        // dummy string to prevent UnresolvedException and to prevent to be used in the future.
         CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
-          ct.dataType)
+          ct.dataType, Some("Dummy TimeZoneId"))
       case cd: CurrentDate =>
         CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
           cd.dataType, cd.timeZoneId)
     }
 
-    // Pre-resolve new attributes to ensure all attributes are resolved before
-    // accessing schema of logical plan. Note that it only leverages the information
-    // of attributes, so we don't need to concern about the value of literals.
-
-    val newAttrPlanPreResolvedForSchema = newAttributePlan transformAllExpressions {
-      case cbt: CurrentBatchTimestamp => cbt.toLiteral
-    }
-
     val triggerLogicalPlan = sink match {
       case _: Sink => newAttributePlan
       case s: StreamingWriteSupportProvider =>
         val writer = s.createStreamingWriteSupport(
           s"$runId",
-          newAttrPlanPreResolvedForSchema.schema,
+          newAttributePlan.schema,
           outputMode,
           new DataSourceOptions(extraOptions.asJava))
         WriteToDataSourceV2(new MicroBatchWrite(currentBatchId, writer), newAttributePlan)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 0bbef47fd32e..868b43cc2b88 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -1081,21 +1081,19 @@ class StreamSuite extends StreamTest {
     }
   }
 
-  test("SPARK-26379 Structured Streaming - Exception on adding current_timestamp / current_date" +
+  test("SPARK-26379 Structured Streaming - Exception on adding current_timestamp " +
     " to Dataset - use v2 sink") {
     testCurrentTimestampOnStreamingQuery(useV2Sink = true)
   }
 
-  test("SPARK-26379 Structured Streaming - Exception on adding current_timestamp / current_date" +
+  test("SPARK-26379 Structured Streaming - Exception on adding current_timestamp " +
     " to Dataset - use v1 sink") {
     testCurrentTimestampOnStreamingQuery(useV2Sink = false)
   }
 
   private def testCurrentTimestampOnStreamingQuery(useV2Sink: Boolean): Unit = {
     val input = MemoryStream[Int]
-    val df = input.toDS()
-      .withColumn("cur_timestamp", lit(current_timestamp()))
-      .withColumn("cur_date", lit(current_date()))
+    val df = input.toDS().withColumn("cur_timestamp", lit(current_timestamp()))
 
     def assertBatchOutputAndUpdateLastTimestamp(
         rows: Seq[Row],
@@ -1106,8 +1104,6 @@ class StreamSuite extends StreamTest {
       val row = rows.head
       assert(row.getInt(0) === expectedValue)
       assert(row.getTimestamp(1).getTime >= curTimestamp)
-      val days = DateTimeUtils.millisToDays(row.getDate(2).getTime)
-      assert(days == curDate || days == curDate + 1)
       row.getTimestamp(1).getTime
     }
 

From 36a2e6371b4d173c3e03cc0d869c39335a0d7682 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Sun, 27 Jan 2019 10:11:42 -0800
Subject: [PATCH 0374/1072] [SPARK-26716][SQL] FileFormat: the supported types
 of read/write should be consistent

## What changes were proposed in this pull request?

1. Remove parameter `isReadPath`. The supported types of read/write should be the same.

2. Disallow reading `NullType` for ORC data source. In #21667 and #21389, it was supposed that ORC supports reading `NullType`, but can't write it. This doesn't make sense. I read docs and did some tests. ORC doesn't support `NullType`.

## How was this patch tested?

Unit tset

Closes #23639 from gengliangwang/supportDataType.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/DataSourceUtils.scala         |   2 +-
 .../execution/datasources/FileFormat.scala    |   2 +-
 .../datasources/csv/CSVFileFormat.scala       |   4 +-
 .../datasources/json/JsonFileFormat.scala     |  10 +-
 .../datasources/orc/OrcFileFormat.scala       |  12 +--
 .../parquet/ParquetFileFormat.scala           |  10 +-
 .../datasources/text/TextFileFormat.scala     |   2 +-
 .../spark/sql/FileBasedDataSourceSuite.scala  | 100 +++++++-----------
 .../spark/sql/hive/orc/OrcFileFormat.scala    |  12 +--
 9 files changed, 62 insertions(+), 92 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index 90cec5e72c1a..a32a9405676f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -45,7 +45,7 @@ object DataSourceUtils {
    */
   private def verifySchema(format: FileFormat, schema: StructType, isReadPath: Boolean): Unit = {
     schema.foreach { field =>
-      if (!format.supportDataType(field.dataType, isReadPath)) {
+      if (!format.supportDataType(field.dataType)) {
         throw new AnalysisException(
           s"$format data source does not support ${field.dataType.catalogString} data type.")
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index 2c162e23644e..f0b49715c8da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -156,7 +156,7 @@ trait FileFormat {
    * Returns whether this format supports the given [[DataType]] in read/write path.
    * By default all data types are supported.
    */
-  def supportDataType(dataType: DataType, isReadPath: Boolean): Boolean = true
+  def supportDataType(dataType: DataType): Boolean = true
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index f4f139d18005..d08a54cc9b1f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -153,10 +153,10 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
 
   override def equals(other: Any): Boolean = other.isInstanceOf[CSVFileFormat]
 
-  override def supportDataType(dataType: DataType, isReadPath: Boolean): Boolean = dataType match {
+  override def supportDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
 
-    case udt: UserDefinedType[_] => supportDataType(udt.sqlType, isReadPath)
+    case udt: UserDefinedType[_] => supportDataType(udt.sqlType)
 
     case _ => false
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index 40f55e706801..d3f04145b83d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -140,17 +140,17 @@ class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {
 
   override def equals(other: Any): Boolean = other.isInstanceOf[JsonFileFormat]
 
-  override def supportDataType(dataType: DataType, isReadPath: Boolean): Boolean = dataType match {
+  override def supportDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
 
-    case st: StructType => st.forall { f => supportDataType(f.dataType, isReadPath) }
+    case st: StructType => st.forall { f => supportDataType(f.dataType) }
 
-    case ArrayType(elementType, _) => supportDataType(elementType, isReadPath)
+    case ArrayType(elementType, _) => supportDataType(elementType)
 
     case MapType(keyType, valueType, _) =>
-      supportDataType(keyType, isReadPath) && supportDataType(valueType, isReadPath)
+      supportDataType(keyType) && supportDataType(valueType)
 
-    case udt: UserDefinedType[_] => supportDataType(udt.sqlType, isReadPath)
+    case udt: UserDefinedType[_] => supportDataType(udt.sqlType)
 
     case _: NullType => true
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 14779cdba417..2a764957be11 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -235,19 +235,17 @@ class OrcFileFormat
     }
   }
 
-  override def supportDataType(dataType: DataType, isReadPath: Boolean): Boolean = dataType match {
+  override def supportDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
 
-    case st: StructType => st.forall { f => supportDataType(f.dataType, isReadPath) }
+    case st: StructType => st.forall { f => supportDataType(f.dataType) }
 
-    case ArrayType(elementType, _) => supportDataType(elementType, isReadPath)
+    case ArrayType(elementType, _) => supportDataType(elementType)
 
     case MapType(keyType, valueType, _) =>
-      supportDataType(keyType, isReadPath) && supportDataType(valueType, isReadPath)
+      supportDataType(keyType) && supportDataType(valueType)
 
-    case udt: UserDefinedType[_] => supportDataType(udt.sqlType, isReadPath)
-
-    case _: NullType => isReadPath
+    case udt: UserDefinedType[_] => supportDataType(udt.sqlType)
 
     case _ => false
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index f04502d113ac..efa4f3f166d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -453,17 +453,17 @@ class ParquetFileFormat
     }
   }
 
-  override def supportDataType(dataType: DataType, isReadPath: Boolean): Boolean = dataType match {
+  override def supportDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
 
-    case st: StructType => st.forall { f => supportDataType(f.dataType, isReadPath) }
+    case st: StructType => st.forall { f => supportDataType(f.dataType) }
 
-    case ArrayType(elementType, _) => supportDataType(elementType, isReadPath)
+    case ArrayType(elementType, _) => supportDataType(elementType)
 
     case MapType(keyType, valueType, _) =>
-      supportDataType(keyType, isReadPath) && supportDataType(valueType, isReadPath)
+      supportDataType(keyType) && supportDataType(valueType)
 
-    case udt: UserDefinedType[_] => supportDataType(udt.sqlType, isReadPath)
+    case udt: UserDefinedType[_] => supportDataType(udt.sqlType)
 
     case _ => false
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index 0607f7b3c0d4..f8a24eb08029 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -139,7 +139,7 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
     }
   }
 
-  override def supportDataType(dataType: DataType, isReadPath: Boolean): Boolean =
+  override def supportDataType(dataType: DataType): Boolean =
     dataType == StringType
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 54299e9808bf..5e6705094e60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -367,69 +367,43 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
   }
 
   test("SPARK-24204 error handling for unsupported Null data types - csv, parquet, orc") {
-    withTempDir { dir =>
-      val tempDir = new File(dir, "files").getCanonicalPath
-
-      Seq("orc").foreach { format =>
-        // write path
-        var msg = intercept[AnalysisException] {
-          sql("select null").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        msg = intercept[AnalysisException] {
-          spark.udf.register("testType", () => new NullData())
-          sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        // read path
-        // We expect the types below should be passed for backward-compatibility
-
-        // Null type
-        var schema = StructType(StructField("a", NullType, true) :: Nil)
-        spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-        spark.read.schema(schema).format(format).load(tempDir).collect()
-
-        // UDT having null data
-        schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
-        spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-        spark.read.schema(schema).format(format).load(tempDir).collect()
-      }
-
-      Seq("parquet", "csv").foreach { format =>
-        // write path
-        var msg = intercept[AnalysisException] {
-          sql("select null").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        msg = intercept[AnalysisException] {
-          spark.udf.register("testType", () => new NullData())
-          sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        // read path
-        msg = intercept[AnalysisException] {
-          val schema = StructType(StructField("a", NullType, true) :: Nil)
-          spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-          spark.read.schema(schema).format(format).load(tempDir).collect()
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
-
-        msg = intercept[AnalysisException] {
-          val schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
-          spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-          spark.read.schema(schema).format(format).load(tempDir).collect()
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support null data type."))
+    // TODO(SPARK-26744): support data type validating in V2 data source, and test V2 as well.
+    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "orc") {
+      withTempDir { dir =>
+        val tempDir = new File(dir, "files").getCanonicalPath
+
+        Seq("parquet", "csv", "orc").foreach { format =>
+          // write path
+          var msg = intercept[AnalysisException] {
+            sql("select null").write.format(format).mode("overwrite").save(tempDir)
+          }.getMessage
+          assert(msg.toLowerCase(Locale.ROOT)
+            .contains(s"$format data source does not support null data type."))
+
+          msg = intercept[AnalysisException] {
+            spark.udf.register("testType", () => new NullData())
+            sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
+          }.getMessage
+          assert(msg.toLowerCase(Locale.ROOT)
+            .contains(s"$format data source does not support null data type."))
+
+          // read path
+          msg = intercept[AnalysisException] {
+            val schema = StructType(StructField("a", NullType, true) :: Nil)
+            spark.range(1).write.format(format).mode("overwrite").save(tempDir)
+            spark.read.schema(schema).format(format).load(tempDir).collect()
+          }.getMessage
+          assert(msg.toLowerCase(Locale.ROOT)
+            .contains(s"$format data source does not support null data type."))
+
+          msg = intercept[AnalysisException] {
+            val schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
+            spark.range(1).write.format(format).mode("overwrite").save(tempDir)
+            spark.read.schema(schema).format(format).load(tempDir).collect()
+          }.getMessage
+          assert(msg.toLowerCase(Locale.ROOT)
+            .contains(s"$format data source does not support null data type."))
+        }
       }
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 4e641e34c18d..bfb0a95d4e70 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -181,19 +181,17 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
     }
   }
 
-  override def supportDataType(dataType: DataType, isReadPath: Boolean): Boolean = dataType match {
+  override def supportDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
 
-    case st: StructType => st.forall { f => supportDataType(f.dataType, isReadPath) }
+    case st: StructType => st.forall { f => supportDataType(f.dataType) }
 
-    case ArrayType(elementType, _) => supportDataType(elementType, isReadPath)
+    case ArrayType(elementType, _) => supportDataType(elementType)
 
     case MapType(keyType, valueType, _) =>
-      supportDataType(keyType, isReadPath) && supportDataType(valueType, isReadPath)
+      supportDataType(keyType) && supportDataType(valueType)
 
-    case udt: UserDefinedType[_] => supportDataType(udt.sqlType, isReadPath)
-
-    case _: NullType => isReadPath
+    case udt: UserDefinedType[_] => supportDataType(udt.sqlType)
 
     case _ => false
   }

From ce7e7df99d9b33eaabee17c4f6216d2e12848eaf Mon Sep 17 00:00:00 2001
From: maryannxue <maryannxue@apache.org>
Date: Sun, 27 Jan 2019 11:39:27 -0800
Subject: [PATCH 0375/1072] [SPARK-26708][SQL] Incorrect result caused by
 inconsistency between a SQL cache's cached RDD and its physical plan

## What changes were proposed in this pull request?

When performing non-cascading cache invalidation, `recache` is called on the other cache entries which are dependent on the cache being invalidated. It leads to the the physical plans of those cache entries being re-compiled. For those cache entries, if the cache RDD has already been persisted, chances are there will be inconsistency between the data and the new plan. It can cause a correctness issue if the new plan's `outputPartitioning`  or `outputOrdering` is different from the that of the actual data, and meanwhile the cache is used by another query that asks for specific `outputPartitioning` or `outputOrdering` which happens to match the new plan but not the actual data.

The fix is to keep the cache entry as it is if the data has been loaded, otherwise re-build the cache entry, with a new plan and an empty cache buffer.

## How was this patch tested?

Added UT.

Closes #23644 from maryannxue/spark-26708.

Lead-authored-by: maryannxue <maryannxue@apache.org>
Co-authored-by: Xiao Li <gatorsmile@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/execution/CacheManager.scala    | 28 +++++++++---
 .../execution/columnar/InMemoryRelation.scala | 10 +----
 .../apache/spark/sql/DatasetCacheSuite.scala  | 44 ++++++++++++++++++-
 3 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 0e6627ccb72a..00c446107d54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -180,7 +180,26 @@ class CacheManager extends Logging {
       val it = cachedData.iterator()
       while (it.hasNext) {
         val cd = it.next()
-        if (condition(cd.plan)) {
+        // If `clearCache` is false (which means the recache request comes from a non-cascading
+        // cache invalidation) and the cache buffer has already been loaded, we do not need to
+        // re-compile a physical plan because the old plan will not be used any more by the
+        // CacheManager although it still lives in compiled `Dataset`s and it could still work.
+        // Otherwise, it means either `clearCache` is true, then we have to clear the cache buffer
+        // and re-compile the physical plan; or it is a non-cascading cache invalidation and cache
+        // buffer is still empty, then we could have a more efficient new plan by removing
+        // dependency on the previously removed cache entries.
+        // Note that the `CachedRDDBuilder`.`isCachedColumnBuffersLoaded` call is a non-locking
+        // status test and may not return the most accurate cache buffer state. So the worse case
+        // scenario can be:
+        // 1) The buffer has been loaded, but `isCachedColumnBuffersLoaded` returns false, then we
+        //    will clear the buffer and build a new plan. It is inefficient but doesn't affect
+        //    correctness.
+        // 2) The buffer has been cleared, but `isCachedColumnBuffersLoaded` returns true, then we
+        //    will keep it as it is. It means the physical plan has been re-compiled already in the
+        //    other thread.
+        val buildNewPlan =
+          clearCache || !cd.cachedRepresentation.cacheBuilder.isCachedColumnBuffersLoaded
+        if (condition(cd.plan) && buildNewPlan) {
           needToRecache += cd
           // Remove the cache entry before we create a new one, so that we can have a different
           // physical plan.
@@ -189,12 +208,11 @@ class CacheManager extends Logging {
       }
     }
     needToRecache.map { cd =>
-      if (clearCache) {
-        cd.cachedRepresentation.cacheBuilder.clearCache()
-      }
+      cd.cachedRepresentation.cacheBuilder.clearCache()
       val plan = spark.sessionState.executePlan(cd.plan).executedPlan
       val newCache = InMemoryRelation(
-        cacheBuilder = cd.cachedRepresentation.cacheBuilder.withCachedPlan(plan),
+        cacheBuilder = cd.cachedRepresentation
+          .cacheBuilder.copy(cachedPlan = plan)(_cachedColumnBuffers = null),
         logicalPlan = cd.plan)
       val recomputedPlan = cd.copy(cachedRepresentation = newCache)
       writeLock {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 41f406d6c299..efdb82f3254f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -75,14 +75,8 @@ case class CachedRDDBuilder(
     }
   }
 
-  def withCachedPlan(cachedPlan: SparkPlan): CachedRDDBuilder = {
-    new CachedRDDBuilder(
-      useCompression,
-      batchSize,
-      storageLevel,
-      cachedPlan = cachedPlan,
-      tableName
-    )(_cachedColumnBuffers)
+  def isCachedColumnBuffersLoaded: Boolean = {
+    _cachedColumnBuffers != null
   }
 
   private def buildBuffers(): RDD[CachedBatch] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
index fef6ddd0b93c..1ff9f6d2347a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
@@ -190,9 +190,9 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
 
     df1.unpersist(blocking = true)
 
-    // df1 un-cached; df2's cache plan re-compiled
+    // df1 un-cached; df2's cache plan stays the same
     assert(df1.storageLevel == StorageLevel.NONE)
-    assertCacheDependency(df1.groupBy('a).agg(sum('b)), 0)
+    assertCacheDependency(df1.groupBy('a).agg(sum('b)))
 
     val df4 = df1.groupBy('a).agg(sum('b)).agg(sum("sum(b)"))
     assertCached(df4)
@@ -206,4 +206,44 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
     // first time use, load cache
     checkDataset(df5, Row(10))
   }
+
+  test("SPARK-26708 Cache data and cached plan should stay consistent") {
+    val df = spark.range(0, 5).toDF("a")
+    val df1 = df.withColumn("b", 'a + 1)
+    val df2 = df.filter('a > 1)
+
+    df.cache()
+    // Add df1 to the CacheManager; the buffer is currently empty.
+    df1.cache()
+    // After calling collect(), df1's buffer has been loaded.
+    df1.collect()
+    // Add df2 to the CacheManager; the buffer is currently empty.
+    df2.cache()
+
+    // Verify that df1 is a InMemoryRelation plan with dependency on another cached plan.
+    assertCacheDependency(df1)
+    val df1InnerPlan = df1.queryExecution.withCachedData
+      .asInstanceOf[InMemoryRelation].cacheBuilder.cachedPlan
+    // Verify that df2 is a InMemoryRelation plan with dependency on another cached plan.
+    assertCacheDependency(df2)
+
+    df.unpersist(blocking = true)
+
+    // Verify that df1's cache has stayed the same, since df1's cache already has data
+    // before df.unpersist().
+    val df1Limit = df1.limit(2)
+    val df1LimitInnerPlan = df1Limit.queryExecution.withCachedData.collectFirst {
+      case i: InMemoryRelation => i.cacheBuilder.cachedPlan
+    }
+    assert(df1LimitInnerPlan.isDefined && df1LimitInnerPlan.get == df1InnerPlan)
+
+    // Verify that df2's cache has been re-cached, with a new physical plan rid of dependency
+    // on df, since df2's cache had not been loaded before df.unpersist().
+    val df2Limit = df2.limit(2)
+    val df2LimitInnerPlan = df2Limit.queryExecution.withCachedData.collectFirst {
+      case i: InMemoryRelation => i.cacheBuilder.cachedPlan
+    }
+    assert(df2LimitInnerPlan.isDefined &&
+      df2LimitInnerPlan.get.find(_.isInstanceOf[InMemoryTableScanExec]).isEmpty)
+  }
 }

From 3a17c6a06bc4e8a6cae506a61d3d83696d378607 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 28 Jan 2019 10:02:27 +0800
Subject: [PATCH 0376/1072] [SPARK-26743][PYTHON] Adds a test to check the
 actual resource limit set via 'spark.executor.pyspark.memory'

## What changes were proposed in this pull request?

https://github.com/apache/spark/pull/21977 added a feature to limit Python worker resource limit.
This PR is kind of a followup of it. It proposes to add a test that checks the actual resource limit set by 'spark.executor.pyspark.memory'.

## How was this patch tested?

Unit tests were added.

Closes #23663 from HyukjinKwon/test_rlimit.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/tests/test_worker.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index a4f108f18e17..5193cfeceac8 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -19,6 +19,12 @@
 import tempfile
 import threading
 import time
+import unittest
+has_resource_module = True
+try:
+    import resource
+except ImportError:
+    has_resource_module = False
 
 from py4j.protocol import Py4JJavaError
 
@@ -155,6 +161,28 @@ def test_reuse_worker_of_parallelize_xrange(self):
             self.assertTrue(pid in previous_pids)
 
 
+@unittest.skipIf(
+    not has_resource_module,
+    "Memory limit feature in Python worker is dependent on "
+    "Python's 'resource' module; however, not found.")
+class WorkerMemoryTest(PySparkTestCase):
+
+    def test_memory_limit(self):
+        self.sc._conf.set("spark.executor.pyspark.memory", "1m")
+        rdd = self.sc.parallelize(xrange(1), 1)
+
+        def getrlimit():
+            import resource
+            return resource.getrlimit(resource.RLIMIT_AS)
+
+        actual = rdd.map(lambda _: getrlimit()).collect()
+        self.assertTrue(len(actual) == 1)
+        self.assertTrue(len(actual[0]) == 2)
+        [(soft_limit, hard_limit)] = actual
+        self.assertEqual(soft_limit, 1024 * 1024)
+        self.assertEqual(hard_limit, 1024 * 1024)
+
+
 if __name__ == "__main__":
     import unittest
     from pyspark.tests.test_worker import *

From d53e11ffce3f721886918c1cb4525478971f02bc Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 28 Jan 2019 12:42:14 +0800
Subject: [PATCH 0377/1072] [SPARK-26725][TEST] Fix the input values of
 UnifiedMemoryManager constructor in test suites
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Adjust mem settings in UnifiedMemoryManager used in test suites to ha…ve execution memory > 0
Ref: https://github.com/apache/spark/pull/23457#issuecomment-457409976

## How was this patch tested?

Existing tests

Closes #23645 from srowen/SPARK-26725.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/internal/config/package.scala     |  1 +
 .../scala/org/apache/spark/memory/MemoryManager.scala  |  2 ++
 .../apache/spark/memory/TaskMemoryManagerSuite.java    |  2 +-
 .../org/apache/spark/memory/MemoryManagerSuite.scala   |  2 +-
 .../shuffle/sort/ShuffleExternalSorterSuite.scala      |  2 +-
 .../spark/storage/BlockManagerReplicationSuite.scala   |  2 +-
 .../org/apache/spark/storage/BlockManagerSuite.scala   |  2 +-
 .../org/apache/spark/storage/MemoryStoreSuite.scala    |  4 ++--
 .../spark/sql/execution/joins/HashedRelation.scala     |  6 +++---
 .../sql/execution/benchmark/AggregateBenchmark.scala   |  4 ++--
 .../benchmark/HashedRelationMetricsBenchmark.scala     |  2 +-
 .../sql/execution/joins/HashedRelationSuite.scala      | 10 +++++-----
 .../spark/streaming/ReceivedBlockHandlerSuite.scala    |  2 +-
 13 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 2fb83d31a1df..71b0df404668 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -195,6 +195,7 @@ package object config {
       "less working memory may be available to execution and tasks may spill to disk more " +
       "often. Leaving this at the default value is recommended. ")
     .doubleConf
+    .checkValue(v => v >= 0.0 && v < 1.0, "Storage fraction must be in [0,1)")
     .createWithDefault(0.5)
 
   private[spark] val MEMORY_FRACTION = ConfigBuilder("spark.memory.fraction")
diff --git a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
index 4ba41072c3cc..ff6d84b57ebc 100644
--- a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
@@ -41,6 +41,8 @@ private[spark] abstract class MemoryManager(
     onHeapStorageMemory: Long,
     onHeapExecutionMemory: Long) extends Logging {
 
+  require(onHeapExecutionMemory > 0, "onHeapExecutionMemory must be > 0")
+
   // -- Methods related to memory allocation policies and bookkeeping ------------------------------
 
   @GuardedBy("this")
diff --git a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
index d19525432f72..9ec303a4eff6 100644
--- a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
+++ b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
@@ -33,7 +33,7 @@ public void leakedPageMemoryIsDetected() {
       new UnifiedMemoryManager(
         new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false),
         Long.MAX_VALUE,
-        Long.MAX_VALUE,
+        Long.MAX_VALUE / 2,
         1),
       0);
     final MemoryConsumer c = new TestMemoryConsumer(manager);
diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
index 8b35f1dfddb0..1d5360a58fa1 100644
--- a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
@@ -291,7 +291,7 @@ private[memory] trait MemoryManagerSuite extends SparkFunSuite with BeforeAndAft
 
   test("off-heap execution allocations cannot exceed limit") {
     val memoryManager = createMemoryManager(
-      maxOnHeapExecutionMemory = 0L,
+      maxOnHeapExecutionMemory = 2L,
       maxOffHeapExecutionMemory = 1000L)
 
     val tMemManager = new TaskMemoryManager(memoryManager, 1)
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
index 9d71647d082c..8b955c98f795 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
@@ -37,7 +37,7 @@ class ShuffleExternalSorterSuite extends SparkFunSuite with LocalSparkContext wi
       .setAppName("ShuffleExternalSorterSuite")
       .set(IS_TESTING, true)
       .set(TEST_MEMORY, 1600L)
-      .set(MEMORY_FRACTION, 1.0)
+      .set(MEMORY_FRACTION, 0.9999)
 
     sc = new SparkContext(conf)
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 10855bf59c3a..b158b6c291d5 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -89,7 +89,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     conf.set(DRIVER_PORT, rpcEnv.address.port)
     conf.set(IS_TESTING, true)
     conf.set(MEMORY_FRACTION, 1.0)
-    conf.set(MEMORY_STORAGE_FRACTION, 1.0)
+    conf.set(MEMORY_STORAGE_FRACTION, 0.999)
     conf.set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
 
     // to make a replication attempt to inactive store fail fast
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 9a9556757358..40c6424b0cb1 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -116,7 +116,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       .set("spark.app.id", "test")
       .set(IS_TESTING, true)
       .set(MEMORY_FRACTION, 1.0)
-      .set(MEMORY_STORAGE_FRACTION, 1.0)
+      .set(MEMORY_STORAGE_FRACTION, 0.999)
       .set("spark.kryoserializer.buffer", "1m")
       .set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
 
diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
index 5595ce4d8404..3052ed9f176b 100644
--- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
@@ -60,7 +60,7 @@ class MemoryStoreSuite
   }
 
   def makeMemoryStore(maxMem: Long): (MemoryStore, BlockInfoManager) = {
-    val memManager = new UnifiedMemoryManager(conf, maxMem, maxMem, 1)
+    val memManager = new UnifiedMemoryManager(conf, maxMem, maxMem / 2, 1)
     val blockInfoManager = new BlockInfoManager
     val blockEvictionHandler = new BlockEvictionHandler {
       var memoryStore: MemoryStore = _
@@ -417,7 +417,7 @@ class MemoryStoreSuite
     val bytesPerSmallBlock = memStoreSize / numInitialBlocks
     def testFailureOnNthDrop(numValidBlocks: Int, readLockAfterDrop: Boolean): Unit = {
       val tc = TaskContext.empty()
-      val memManager = new UnifiedMemoryManager(conf, memStoreSize, memStoreSize.toInt, 1)
+      val memManager = new UnifiedMemoryManager(conf, memStoreSize, memStoreSize.toInt / 2, 1)
       val blockInfoManager = new BlockInfoManager
       blockInfoManager.registerTask(tc.taskAttemptId)
       var droppedSoFar = 0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 90abc84daa77..b03e8f551c00 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -97,7 +97,7 @@ private[execution] object HashedRelation {
         new UnifiedMemoryManager(
           new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
           Long.MaxValue,
-          Long.MaxValue,
+          Long.MaxValue / 2,
           1),
         0)
     }
@@ -230,7 +230,7 @@ private[joins] class UnsafeHashedRelation(
       new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
-        Long.MaxValue,
+        Long.MaxValue / 2,
         1),
       0)
 
@@ -395,7 +395,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
         new UnifiedMemoryManager(
           new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
           Long.MaxValue,
-          Long.MaxValue,
+          Long.MaxValue / 2,
           1),
         0),
       0)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
index 334f0275d4eb..81158d9e5424 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
@@ -476,7 +476,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
             new UnifiedMemoryManager(
               new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
               Long.MaxValue,
-              Long.MaxValue,
+              Long.MaxValue / 2,
               1),
             0)
           val map = new LongToUnsafeRowMap(taskMemoryManager, 64)
@@ -508,7 +508,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
               new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, s"${heap == "off"}")
                 .set(MEMORY_OFFHEAP_SIZE.key, "102400000"),
               Long.MaxValue,
-              Long.MaxValue,
+              Long.MaxValue / 2,
               1),
             0)
           val map = new BytesToBytesMap(taskMemoryManager, 1024, 64L << 20)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
index 0b356a9e34c5..ebe278bff7d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
@@ -46,7 +46,7 @@ object HashedRelationMetricsBenchmark extends SqlBasedBenchmark {
           new UnifiedMemoryManager(
             new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
             Long.MaxValue,
-            Long.MaxValue,
+            Long.MaxValue / 2,
             1),
           0)
         val unsafeProj = UnsafeProjection.create(Seq(BoundReference(0, LongType, false)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index 1c8991010504..02e474ce8337 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -40,7 +40,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
     new UnifiedMemoryManager(
       new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
       Long.MaxValue,
-      Long.MaxValue,
+      Long.MaxValue / 2,
       1),
     0)
 
@@ -89,7 +89,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
       new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
-        Long.MaxValue,
+        Long.MaxValue / 2,
         1),
       0)
     val binaryMap = new BytesToBytesMap(taskMemoryManager, 1, 1)
@@ -161,7 +161,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
       new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
-        Long.MaxValue,
+        Long.MaxValue / 2,
         1),
       0)
     val unsafeProj = UnsafeProjection.create(Seq(BoundReference(0, LongType, false)))
@@ -206,7 +206,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
       new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
-        Long.MaxValue,
+        Long.MaxValue / 2,
         1),
       0)
     val unsafeProj = UnsafeProjection.create(Seq(BoundReference(0, LongType, false)))
@@ -260,7 +260,7 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
       new UnifiedMemoryManager(
         new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
         Long.MaxValue,
-        Long.MaxValue,
+        Long.MaxValue / 2,
         1),
       0)
     val unsafeProj = UnsafeProjection.create(Array[DataType](StringType))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index f442811024b6..f0960d00a75f 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -280,7 +280,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
       maxMem: Long,
       conf: SparkConf,
       name: String = SparkContext.DRIVER_IDENTIFIER): BlockManager = {
-    val memManager = new UnifiedMemoryManager(conf, maxMem, maxMem, 1)
+    val memManager = new UnifiedMemoryManager(conf, maxMem, maxMem / 2, 1)
     val transfer = new NettyBlockTransferService(conf, securityMgr, "localhost", "localhost", 0, 1)
     val blockManager = new BlockManager(name, rpcEnv, blockManagerMaster, serializerManager, conf,
       memManager, mapOutputTracker, shuffleManager, transfer, securityMgr, 0)

From bd027f6e0e64b7e12ba9d3c77e0797a00c61e72d Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 28 Jan 2019 14:21:21 +0100
Subject: [PATCH 0378/1072] [SPARK-26656][SQL] Benchmarks for date and
 timestamp functions

## What changes were proposed in this pull request?

Added the following benchmarks:
- Extract components from timestamp like year, month, day and etc.
- Current date and time
- Date arithmetic like date_add, date_sub
- Format dates and timestamps
- Convert timestamps from/to UTC

Closes #23661 from MaxGekk/datetime-benchmark.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../benchmarks/DateTimeBenchmark-results.txt  | 416 ++++++++++++++++++
 .../benchmark/DateTimeBenchmark.scala         | 123 ++++++
 2 files changed, 539 insertions(+)
 create mode 100644 sql/core/benchmarks/DateTimeBenchmark-results.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala

diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
new file mode 100644
index 000000000000..8bbe3108ffb9
--- /dev/null
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -0,0 +1,416 @@
+================================================================================================
+Extract components
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+cast to timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+cast to timestamp wholestage off               276 /  290         36.2          27.6       1.0X
+cast to timestamp wholestage on                254 /  267         39.4          25.4       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+year of timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+year of timestamp wholestage off               699 /  700         14.3          69.9       1.0X
+year of timestamp wholestage on                680 /  689         14.7          68.0       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+quarter of timestamp:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+quarter of timestamp wholestage off            848 /  864         11.8          84.8       1.0X
+quarter of timestamp wholestage on             784 /  797         12.8          78.4       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+month of timestamp:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+month of timestamp wholestage off              652 /  653         15.3          65.2       1.0X
+month of timestamp wholestage on               671 /  677         14.9          67.1       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+weekofyear of timestamp:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+weekofyear of timestamp wholestage off        1233 / 1233          8.1         123.3       1.0X
+weekofyear of timestamp wholestage on         1236 / 1240          8.1         123.6       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+day of timestamp:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+day of timestamp wholestage off                649 /  655         15.4          64.9       1.0X
+day of timestamp wholestage on                 670 /  678         14.9          67.0       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+dayofyear of timestamp:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+dayofyear of timestamp wholestage off          692 /  704         14.5          69.2       1.0X
+dayofyear of timestamp wholestage on           695 /  698         14.4          69.5       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+dayofmonth of timestamp:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+dayofmonth of timestamp wholestage off         660 /  660         15.1          66.0       1.0X
+dayofmonth of timestamp wholestage on          667 /  671         15.0          66.7       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+dayofweek of timestamp:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+dayofweek of timestamp wholestage off          798 /  802         12.5          79.8       1.0X
+dayofweek of timestamp wholestage on           804 /  820         12.4          80.4       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+weekday of timestamp:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+weekday of timestamp wholestage off            759 /  760         13.2          75.9       1.0X
+weekday of timestamp wholestage on             774 /  813         12.9          77.4       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+hour of timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+hour of timestamp wholestage off               465 /  468         21.5          46.5       1.0X
+hour of timestamp wholestage on                443 /  451         22.6          44.3       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+minute of timestamp:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+minute of timestamp wholestage off             439 /  440         22.8          43.9       1.0X
+minute of timestamp wholestage on              448 /  452         22.3          44.8       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+second of timestamp:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+second of timestamp wholestage off             438 /  448         22.8          43.8       1.0X
+second of timestamp wholestage on              430 /  445         23.3          43.0       1.0X
+
+
+================================================================================================
+Current date and time
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+current_date:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+current_date wholestage off                    212 /  213         47.2          21.2       1.0X
+current_date wholestage on                     225 /  230         44.4          22.5       0.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+current_timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+current_timestamp wholestage off               216 /  217         46.3          21.6       1.0X
+current_timestamp wholestage on                210 /  216         47.7          21.0       1.0X
+
+
+================================================================================================
+Date arithmetic
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+cast to date:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+cast to date wholestage off                    463 /  464         21.6          46.3       1.0X
+cast to date wholestage on                     502 /  508         19.9          50.2       0.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+last_day:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+last_day wholestage off                        693 /  701         14.4          69.3       1.0X
+last_day wholestage on                         697 /  706         14.3          69.7       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+next_day:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+next_day wholestage off                        553 /  570         18.1          55.3       1.0X
+next_day wholestage on                         595 /  597         16.8          59.5       0.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_add:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_add wholestage off                        459 /  463         21.8          45.9       1.0X
+date_add wholestage on                         473 /  477         21.1          47.3       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_sub:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_sub wholestage off                        473 /  475         21.1          47.3       1.0X
+date_sub wholestage on                         479 /  497         20.9          47.9       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+add_months:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+add_months wholestage off                      898 /  908         11.1          89.8       1.0X
+add_months wholestage on                       898 /  906         11.1          89.8       1.0X
+
+
+================================================================================================
+Formatting dates
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+format date:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+format date wholestage off                    7181 / 7228          1.4         718.1       1.0X
+format date wholestage on                     7303 / 7356          1.4         730.3       1.0X
+
+
+================================================================================================
+Formatting timestamps
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+from_unixtime:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+from_unixtime wholestage off                  7472 / 7476          1.3         747.2       1.0X
+from_unixtime wholestage on                   7453 / 7460          1.3         745.3       1.0X
+
+
+================================================================================================
+Convert timestamps
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+from_utc_timestamp:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+from_utc_timestamp wholestage off              894 /  895         11.2          89.4       1.0X
+from_utc_timestamp wholestage on               881 /  883         11.4          88.1       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+to_utc_timestamp:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+to_utc_timestamp wholestage off                909 /  910         11.0          90.9       1.0X
+to_utc_timestamp wholestage on                 903 /  906         11.1          90.3       1.0X
+
+
+================================================================================================
+Intervals
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+cast interval:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+cast interval wholestage off                   245 /  256         40.7          24.5       1.0X
+cast interval wholestage on                    227 /  230         44.0          22.7       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+datediff:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+datediff wholestage off                        735 /  738         13.6          73.5       1.0X
+datediff wholestage on                         687 /  689         14.6          68.7       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+months_between:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+months_between wholestage off                 1653 / 1654          6.1         165.3       1.0X
+months_between wholestage on                  1630 / 1636          6.1         163.0       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+window:                                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+window wholestage off                         1636 / 1661          0.6        1635.9       1.0X
+window wholestage on                        19997 / 20240          0.1       19997.4       0.1X
+
+
+================================================================================================
+Truncation
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc YEAR:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc YEAR wholestage off                 889 /  892         11.2          88.9       1.0X
+date_trunc YEAR wholestage on                  831 /  837         12.0          83.1       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc YYYY:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc YYYY wholestage off                 910 /  917         11.0          91.0       1.0X
+date_trunc YYYY wholestage on                  834 /  849         12.0          83.4       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc YY:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc YY wholestage off                   897 /  901         11.2          89.7       1.0X
+date_trunc YY wholestage on                    836 /  841         12.0          83.6       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc MON:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc MON wholestage off                  942 /  944         10.6          94.2       1.0X
+date_trunc MON wholestage on                   881 /  887         11.3          88.1       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc MONTH:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc MONTH wholestage off                935 /  937         10.7          93.5       1.0X
+date_trunc MONTH wholestage on                 881 /  886         11.4          88.1       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc MM:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc MM wholestage off                   934 /  935         10.7          93.4       1.0X
+date_trunc MM wholestage on                    878 /  880         11.4          87.8       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc DAY:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc DAY wholestage off                  472 /  478         21.2          47.2       1.0X
+date_trunc DAY wholestage on                   415 /  418         24.1          41.5       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc DD:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc DD wholestage off                   472 /  485         21.2          47.2       1.0X
+date_trunc DD wholestage on                    414 /  417         24.1          41.4       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc HOUR:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc HOUR wholestage off                 451 /  462         22.2          45.1       1.0X
+date_trunc HOUR wholestage on                  422 /  429         23.7          42.2       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc MINUTE:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc MINUTE wholestage off               291 /  291         34.4          29.1       1.0X
+date_trunc MINUTE wholestage on                268 /  272         37.3          26.8       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc SECOND:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc SECOND wholestage off               290 /  290         34.5          29.0       1.0X
+date_trunc SECOND wholestage on                266 /  270         37.6          26.6       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc WEEK:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc WEEK wholestage off                 794 /  797         12.6          79.4       1.0X
+date_trunc WEEK wholestage on                  754 /  761         13.3          75.4       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+date_trunc QUARTER:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+date_trunc QUARTER wholestage off             5261 / 5271          1.9         526.1       1.0X
+date_trunc QUARTER wholestage on              5145 / 5151          1.9         514.5       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+trunc year:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+trunc year wholestage off                      233 /  233         43.0          23.3       1.0X
+trunc year wholestage on                       213 /  216         46.9          21.3       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+trunc yyyy:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+trunc yyyy wholestage off                      232 /  233         43.1          23.2       1.0X
+trunc yyyy wholestage on                       214 /  223         46.7          21.4       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+trunc yy:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+trunc yy wholestage off                        228 /  228         43.9          22.8       1.0X
+trunc yy wholestage on                         215 /  219         46.6          21.5       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+trunc mon:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+trunc mon wholestage off                       232 /  232         43.0          23.2       1.0X
+trunc mon wholestage on                        216 /  217         46.4          21.6       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+trunc month:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+trunc month wholestage off                     233 /  233         43.0          23.3       1.0X
+trunc month wholestage on                      215 /  217         46.4          21.5       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+trunc mm:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+trunc mm wholestage off                        226 /  233         44.2          22.6       1.0X
+trunc mm wholestage on                         214 /  220         46.7          21.4       1.1X
+
+
+================================================================================================
+Parsing
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+to timestamp str:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+to timestamp str wholestage off                166 /  170          6.0         165.8       1.0X
+to timestamp str wholestage on                 165 /  167          6.1         164.9       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+to_timestamp:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+to_timestamp wholestage off                   1439 / 1445          0.7        1439.4       1.0X
+to_timestamp wholestage on                    1366 / 1368          0.7        1365.7       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+to_unix_timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+to_unix_timestamp wholestage off              1386 / 1389          0.7        1385.6       1.0X
+to_unix_timestamp wholestage on               1388 / 1392          0.7        1387.8       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+to date str:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+to date str wholestage off                     163 /  166          6.1         163.3       1.0X
+to date str wholestage on                      160 /  163          6.2         160.3       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+to_date:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+to_date wholestage off                        1474 / 1474          0.7        1473.9       1.0X
+to_date wholestage on                         1460 / 1465          0.7        1459.6       1.0X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
new file mode 100644
index 000000000000..cbd51b490141
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
+
+/**
+ * Synthetic benchmark for date and timestamp functions.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/DateTimeBenchmark-results.txt".
+ * }}}
+ */
+object DateTimeBenchmark extends SqlBasedBenchmark {
+  private def doBenchmark(cardinality: Int, exprs: String*): Unit = {
+    spark.range(cardinality).selectExpr(exprs: _*).write.format("noop").save()
+  }
+
+  private def run(cardinality: Int, name: String, exprs: String*): Unit = {
+    codegenBenchmark(name, cardinality) {
+      doBenchmark(cardinality, exprs: _*)
+    }
+  }
+
+  private def run(cardinality: Int, func: String): Unit = {
+    codegenBenchmark(s"$func of timestamp", cardinality) {
+      doBenchmark(cardinality, s"$func(cast(id as timestamp))")
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val N = 10000000
+    runBenchmark("Extract components") {
+      run(N, "cast to timestamp", "cast(id as timestamp)")
+      run(N, "year")
+      run(N, "quarter")
+      run(N, "month")
+      run(N, "weekofyear")
+      run(N, "day")
+      run(N, "dayofyear")
+      run(N, "dayofmonth")
+      run(N, "dayofweek")
+      run(N, "weekday")
+      run(N, "hour")
+      run(N, "minute")
+      run(N, "second")
+    }
+    runBenchmark("Current date and time") {
+      run(N, "current_date", "current_date")
+      run(N, "current_timestamp", "current_timestamp")
+    }
+    runBenchmark("Date arithmetic") {
+      val dateExpr = "cast(cast(id as timestamp) as date)"
+      run(N, "cast to date", dateExpr)
+      run(N, "last_day", s"last_day($dateExpr)")
+      run(N, "next_day", s"next_day($dateExpr, 'TU')")
+      run(N, "date_add", s"date_add($dateExpr, 10)")
+      run(N, "date_sub", s"date_sub($dateExpr, 10)")
+      run(N, "add_months", s"add_months($dateExpr, 10)")
+    }
+    runBenchmark("Formatting dates") {
+      val dateExpr = "cast(cast(id as timestamp) as date)"
+      run(N, "format date", s"date_format($dateExpr, 'MMM yyyy')")
+    }
+    runBenchmark("Formatting timestamps") {
+      run(N, "from_unixtime", "from_unixtime(id, 'yyyy-MM-dd HH:mm:ss.SSSSSS')")
+    }
+    runBenchmark("Convert timestamps") {
+      val timestampExpr = "cast(id as timestamp)"
+      run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
+      run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
+    }
+    runBenchmark("Intervals") {
+      val (start, end) = ("cast(id as timestamp)", "cast((id+8640000) as timestamp)")
+      run(N, "cast interval", start, end)
+      run(N, "datediff", s"datediff($start, $end)")
+      run(N, "months_between", s"months_between($start, $end)")
+      run(1000000, "window", s"window($start, 100, 10, 1)")
+    }
+    runBenchmark("Truncation") {
+      val timestampExpr = "cast(id as timestamp)"
+      Seq("YEAR", "YYYY", "YY", "MON", "MONTH", "MM", "DAY", "DD", "HOUR", "MINUTE",
+          "SECOND", "WEEK", "QUARTER").foreach { level =>
+        run(N, s"date_trunc $level", s"date_trunc('$level', $timestampExpr)")
+      }
+      val dateExpr = "cast(cast(id as timestamp) as date)"
+      Seq("year", "yyyy", "yy", "mon", "month", "mm").foreach { level =>
+        run(N, s"trunc $level", s"trunc('$level', $dateExpr)")
+      }
+    }
+    runBenchmark("Parsing") {
+      val n = 1000000
+      val timestampStrExpr = "concat('2019-01-27 11:02:01.', cast(mod(id, 1000) as string))"
+      val pattern = "'yyyy-MM-dd HH:mm:ss.SSS'"
+      run(n, "to timestamp str", timestampStrExpr)
+      run(n, "to_timestamp", s"to_timestamp($timestampStrExpr, $pattern)")
+      run(n, "to_unix_timestamp", s"to_unix_timestamp($timestampStrExpr, $pattern)")
+      val dateStrExpr = "concat('2019-01-', cast(mod(id, 25) as string))"
+      run(n, "to date str", dateStrExpr)
+      run(n, "to_date", s"to_date($dateStrExpr, 'yyyy-MM-dd')")
+    }
+  }
+}

From ed71a825c56920327533ebb741707871848ccd6d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 28 Jan 2019 23:41:55 +0800
Subject: [PATCH 0379/1072] [SPARK-26700][CORE] enable fetch-big-block-to-disk
 by default

## What changes were proposed in this pull request?

This is a followup of #16989

The fetch-big-block-to-disk feature is disabled by default, because it's not compatible with external shuffle service prior to Spark 2.2. The client sends stream request to fetch block chunks, and old shuffle service can't support it.

After 2 years, Spark 2.2 has EOL, and now it's safe to turn on this feature by default

## How was this patch tested?

existing tests

Closes #23625 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/internal/config/package.scala       | 14 ++++++-----
 docs/configuration.md                         | 24 +++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 71b0df404668..32559aefd512 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -699,17 +699,19 @@ package object config {
   private[spark] val MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM =
     ConfigBuilder("spark.maxRemoteBlockSizeFetchToMem")
       .doc("Remote block will be fetched to disk when size of the block is above this threshold " +
-        "in bytes. This is to avoid a giant request takes too much memory. We can enable this " +
-        "config by setting a specific value(e.g. 200m). Note this configuration will affect " +
-        "both shuffle fetch and block manager remote block fetch. For users who enabled " +
-        "external shuffle service, this feature can only be worked when external shuffle" +
-        "service is newer than Spark 2.2.")
+        "in bytes. This is to avoid a giant request takes too much memory. Note this " +
+        "configuration will affect both shuffle fetch and block manager remote block fetch. " +
+        "For users who enabled external shuffle service, this feature can only work when " +
+        "external shuffle service is at least 2.3.0.")
       .bytesConf(ByteUnit.BYTE)
       // fetch-to-mem is guaranteed to fail if the message is bigger than 2 GB, so we might
       // as well use fetch-to-disk in that case.  The message includes some metadata in addition
       // to the block data itself (in particular UploadBlock has a lot of metadata), so we leave
       // extra room.
-      .createWithDefault(Int.MaxValue - 512)
+      .checkValue(
+        _ <= Int.MaxValue - 512,
+        "maxRemoteBlockSizeFetchToMem cannot be larger than (Int.MaxValue - 512) bytes.")
+      .createWithDefaultString("200m")
 
   private[spark] val TASK_METRICS_TRACK_UPDATED_BLOCK_STATUSES =
     ConfigBuilder("spark.taskMetrics.trackUpdatedBlockStatuses")
diff --git a/docs/configuration.md b/docs/configuration.md
index 7d3bbf93ae96..806e16af3640 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -626,19 +626,6 @@ Apart from these, the following properties are also available, and may be useful
     You can mitigate this issue by setting it to a lower value.
   </td>
 </tr>
-<tr>
-  <td><code>spark.maxRemoteBlockSizeFetchToMem</code></td>
-  <td>Int.MaxValue - 512</td>
-  <td>
-    The remote block will be fetched to disk when size of the block is above this threshold in bytes.
-    This is to avoid a giant request that takes too much memory.  By default, this is only enabled
-    for blocks > 2GB, as those cannot be fetched directly into memory, no matter what resources are
-    available.  But it can be turned down to a much lower value (eg. 200m) to avoid using too much
-    memory on smaller blocks as well. Note this configuration will affect both shuffle fetch
-    and block manager remote block fetch. For users who enabled external shuffle service,
-    this feature can only be used when external shuffle service is newer than Spark 2.2.
-  </td>
-</tr>
 <tr>
   <td><code>spark.shuffle.compress</code></td>
   <td>true</td>
@@ -1519,6 +1506,17 @@ Apart from these, the following properties are also available, and may be useful
     you can set larger value.
   </td>
 </tr>
+<tr>
+  <td><code>spark.maxRemoteBlockSizeFetchToMem</code></td>
+  <td>200m</td>
+  <td>
+    Remote block will be fetched to disk when size of the block is above this threshold
+    in bytes. This is to avoid a giant request takes too much memory. Note this
+    configuration will affect both shuffle fetch and block manager remote block fetch.
+    For users who enabled external shuffle service, this feature can only work when
+    external shuffle service is at least 2.3.0.
+  </td>
+</tr>
 </table>
 
 ### Scheduling

From 58e42cf50639d5b9d6c877b902ac559e132d041b Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Mon, 28 Jan 2019 10:52:17 -0600
Subject: [PATCH 0380/1072] [SPARK-26719][SQL] Get rid of java.util.Calendar in
 DateTimeUtils

## What changes were proposed in this pull request?

- Replacing `java.util.Calendar` in  `DateTimeUtils. truncTimestamp` and in `DateTimeUtils.getOffsetFromLocalMillis ` by equivalent code using Java 8 API for timestamp manipulations. The reason is `java.util.Calendar` is based on the hybrid calendar (Julian+Gregorian) but *java.time* classes use Proleptic Gregorian calendar which assumes by SQL standard.
-  Replacing `Calendar.getInstance()` in `DateTimeUtilsSuite` by similar code in `DateTimeTestUtils` using *java.time* classes

## How was this patch tested?

The changes were tested by existing suites: `DateExpressionsSuite`, `DateFunctionsSuite` and `DateTimeUtilsSuite`.

Closes #23641 from MaxGekk/cleanup-date-time-utils.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |   2 +
 .../sql/catalyst/util/DateTimeUtils.scala     |  52 +--
 .../sql/catalyst/util/DateTimeTestUtils.scala |  50 ++
 .../catalyst/util/DateTimeUtilsSuite.scala    | 433 +++++++-----------
 4 files changed, 231 insertions(+), 306 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 98fc9faca422..41f27a36209d 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -95,6 +95,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 3.0, the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
 
+  - Since Spark 3.0, the `date_trunc`, `from_utc_timestamp`, `to_utc_timestamp`, and `unix_timestamp` functions use java.time API based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, the hybrid calendar (Julian + Gregorian) is used for the same purpose. Results of the functions returned by Spark 3.0 and previous versions can be different for dates before October 15, 1582 (Gregorian).
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 911750e14d8b..a97c1f7100d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
-import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZonedDateTime}
+import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, Month, ZonedDateTime}
 import java.time.Year.isLeap
 import java.time.temporal.IsoFields
-import java.util.{Calendar, Locale, TimeZone}
+import java.util.{Locale, TimeZone}
 import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import java.util.function.{Function => JFunction}
 
@@ -744,18 +744,17 @@ object DateTimeUtils {
         daysToMillis(prevMonday, timeZone)
       case TRUNC_TO_QUARTER =>
         val dDays = millisToDays(millis, timeZone)
-        millis = daysToMillis(truncDate(dDays, TRUNC_TO_MONTH), timeZone)
-        val cal = Calendar.getInstance()
-        cal.setTimeInMillis(millis)
-        val quarter = getQuarter(dDays)
-        val month = quarter match {
-          case 1 => Calendar.JANUARY
-          case 2 => Calendar.APRIL
-          case 3 => Calendar.JULY
-          case 4 => Calendar.OCTOBER
+        val month = getQuarter(dDays) match {
+          case 1 => Month.JANUARY
+          case 2 => Month.APRIL
+          case 3 => Month.JULY
+          case 4 => Month.OCTOBER
         }
-        cal.set(Calendar.MONTH, month)
-        cal.getTimeInMillis()
+        millis = daysToMillis(truncDate(dDays, TRUNC_TO_MONTH), timeZone)
+        val instant = Instant.ofEpochMilli(millis)
+        val localDateTime = LocalDateTime.ofInstant(instant, timeZone.toZoneId)
+        val truncated = localDateTime.withMonth(month.getValue)
+        truncated.atZone(timeZone.toZoneId).toInstant.toEpochMilli
       case _ =>
         // caller make sure that this should never be reached
         sys.error(s"Invalid trunc level: $level")
@@ -798,26 +797,15 @@ object DateTimeUtils {
     if (offset != guess) {
       guess = tz.getOffset(millisLocal - offset)
       if (guess != offset) {
-        // fallback to do the reverse lookup using java.sql.Timestamp
+        // fallback to do the reverse lookup using java.time.LocalDateTime
         // this should only happen near the start or end of DST
-        val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
-        val year = getYear(days)
-        val month = getMonth(days)
-        val day = getDayOfMonth(days)
-
-        var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
-        if (millisOfDay < 0) {
-          millisOfDay += MILLIS_PER_DAY.toInt
-        }
-        val seconds = (millisOfDay / 1000L).toInt
-        val hh = seconds / 3600
-        val mm = seconds / 60 % 60
-        val ss = seconds % 60
-        val ms = millisOfDay % 1000
-        val calendar = Calendar.getInstance(tz)
-        calendar.set(year, month - 1, day, hh, mm, ss)
-        calendar.set(Calendar.MILLISECOND, ms)
-        guess = (millisLocal - calendar.getTimeInMillis()).toInt
+        val localDate = LocalDate.ofEpochDay(TimeUnit.MILLISECONDS.toDays(millisLocal))
+        val localTime = LocalTime.ofNanoOfDay(TimeUnit.MILLISECONDS.toNanos(
+          Math.floorMod(millisLocal, MILLIS_PER_DAY)))
+        val localDateTime = LocalDateTime.of(localDate, localTime)
+        val millisEpoch = localDateTime.atZone(tz.toZoneId).toInstant.toEpochMilli
+
+        guess = (millisLocal - millisEpoch).toInt
       }
     }
     guess
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
index 442d78598a38..c35ab2bfb1c1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
@@ -17,7 +17,11 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.time.{LocalDate, LocalDateTime, LocalTime}
 import java.util.TimeZone
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC
 
 /**
  * Helper functions for testing date and time functionality.
@@ -46,4 +50,50 @@ object DateTimeTestUtils {
       TimeZone.setDefault(originalDefaultTimeZone)
     }
   }
+
+  def localDateTimeToMicros(localDateTime: LocalDateTime, tz: TimeZone): Long = {
+    val instant = localDateTime.atZone(tz.toZoneId).toInstant
+    DateTimeUtils.instantToMicros(instant)
+  }
+
+  // Returns microseconds since epoch for the given date
+  def date(
+      year: Int,
+      month: Byte = 1,
+      day: Byte = 1,
+      hour: Byte = 0,
+      minute: Byte = 0,
+      sec: Byte = 0,
+      micros: Int = 0,
+      tz: TimeZone = TimeZoneUTC): Long = {
+    val nanos = TimeUnit.MICROSECONDS.toNanos(micros).toInt
+    val localDateTime = LocalDateTime.of(year, month, day, hour, minute, sec, nanos)
+    localDateTimeToMicros(localDateTime, tz)
+  }
+
+  // Returns number of days since epoch for the given date
+  def days(
+      year: Int,
+      month: Byte = 1,
+      day: Byte = 1,
+      hour: Byte = 0,
+      minute: Byte = 0,
+      sec: Byte = 0): Int = {
+    val micros = date(year, month, day, hour, minute, sec)
+    TimeUnit.MICROSECONDS.toDays(micros).toInt
+  }
+
+  // Returns microseconds since epoch for current date and give time
+  def time(
+      hour: Byte = 0,
+      minute: Byte = 0,
+      sec: Byte = 0,
+      micros: Int = 0,
+      tz: TimeZone = TimeZoneUTC): Long = {
+    val nanos = TimeUnit.MICROSECONDS.toNanos(micros).toInt
+    val localDate = LocalDate.now(tz.toZoneId)
+    val localTime = LocalTime.of(hour, minute, sec, nanos)
+    val localDateTime = LocalDateTime.of(localDate, localTime)
+    localDateTimeToMicros(localDateTime, tz)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 0c78e8f510c5..b71790e2562b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.LocalDate
-import java.util.{Calendar, Locale, TimeZone}
+import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -32,11 +32,6 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   val TimeZonePST = TimeZone.getTimeZone("PST")
   private def defaultTz = DateTimeUtils.defaultTimeZone()
 
-  private[this] def getInUTCDays(localDate: LocalDate): Int = {
-    val epochSeconds = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toEpochSecond
-    TimeUnit.SECONDS.toDays(epochSeconds).toInt
-  }
-
   test("nanoseconds truncation") {
     val tf = TimestampFormatter(DateTimeUtils.defaultTimeZone())
     def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
@@ -123,33 +118,13 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("string to date") {
-
-    var c = Calendar.getInstance()
-    c.set(2015, 0, 28, 0, 0, 0)
-    c.set(Calendar.MILLISECOND, 0)
-    assert(stringToDate(UTF8String.fromString("2015-01-28")).get ===
-      millisToDays(c.getTimeInMillis))
-    c.set(2015, 0, 1, 0, 0, 0)
-    c.set(Calendar.MILLISECOND, 0)
-    assert(stringToDate(UTF8String.fromString("2015")).get ===
-      millisToDays(c.getTimeInMillis))
-    c.set(1, 0, 1, 0, 0, 0)
-    c.set(Calendar.MILLISECOND, 0)
-    val localDate = LocalDate.of(1, 1, 1)
-      .atStartOfDay(TimeZoneUTC.toZoneId)
-    assert(stringToDate(UTF8String.fromString("0001")).get ===
-      TimeUnit.SECONDS.toDays(localDate.toEpochSecond))
-    c = Calendar.getInstance()
-    c.set(2015, 2, 1, 0, 0, 0)
-    c.set(Calendar.MILLISECOND, 0)
-    assert(stringToDate(UTF8String.fromString("2015-03")).get ===
-      millisToDays(c.getTimeInMillis))
-    c = Calendar.getInstance()
-    c.set(2015, 2, 18, 0, 0, 0)
-    c.set(Calendar.MILLISECOND, 0)
+    assert(stringToDate(UTF8String.fromString("2015-01-28")).get === days(2015, 1, 28))
+    assert(stringToDate(UTF8String.fromString("2015")).get === days(2015, 1, 1))
+    assert(stringToDate(UTF8String.fromString("0001")).get === days(1, 1, 1))
+    assert(stringToDate(UTF8String.fromString("2015-03")).get === days(2015, 3, 1))
     Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18 123142",
       "2015-03-18T123123", "2015-03-18T").foreach { s =>
-      assert(stringToDate(UTF8String.fromString(s)).get === millisToDays(c.getTimeInMillis))
+      assert(stringToDate(UTF8String.fromString(s)).get === days(2015, 3, 18))
     }
 
     assert(stringToDate(UTF8String.fromString("2015-03-18X")).isEmpty)
@@ -164,135 +139,92 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("string to timestamp") {
-    for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
+    for (tz <- ALL_TIMEZONES) {
       def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = {
         assert(stringToTimestamp(UTF8String.fromString(str), tz) === expected)
       }
 
-      var c = Calendar.getInstance(tz)
-      c.set(1969, 11, 31, 16, 0, 0)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("1969-12-31 16:00:00", Option(c.getTimeInMillis * 1000))
-      c.set(1, 0, 1, 0, 0, 0)
-      c.set(Calendar.MILLISECOND, 0)
-      val date = LocalDate.of(1, 1, 1)
-        .atStartOfDay(tz.toZoneId)
-      checkStringToTimestamp("0001", Option(TimeUnit.SECONDS.toMicros(date.toEpochSecond)))
-      c = Calendar.getInstance(tz)
-      c.set(2015, 2, 1, 0, 0, 0)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("2015-03", Option(c.getTimeInMillis * 1000))
-      c = Calendar.getInstance(tz)
-      c.set(2015, 2, 18, 0, 0, 0)
-      c.set(Calendar.MILLISECOND, 0)
-
+      checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, tz = tz)))
+      checkStringToTimestamp("0001", Option(date(1, 1, 1, 0, tz = tz)))
+      checkStringToTimestamp("2015-03", Option(date(2015, 3, 1, tz = tz)))
       Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18T").foreach { s =>
-        checkStringToTimestamp(s, Option(c.getTimeInMillis * 1000))
+        checkStringToTimestamp(s, Option(date(2015, 3, 18, tz = tz)))
       }
 
-      c = Calendar.getInstance(tz)
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("2015-03-18 12:03:17", Option(c.getTimeInMillis * 1000))
-      checkStringToTimestamp("2015-03-18T12:03:17", Option(c.getTimeInMillis * 1000))
+      var expected = Option(date(2015, 3, 18, 12, 3, 17, tz = tz))
+      checkStringToTimestamp("2015-03-18 12:03:17", expected)
+      checkStringToTimestamp("2015-03-18T12:03:17", expected)
 
       // If the string value includes timezone string, it represents the timestamp string
       // in the timezone regardless of the tz parameter.
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT-13:53"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("2015-03-18T12:03:17-13:53", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("2015-03-18T12:03:17Z", Option(c.getTimeInMillis * 1000))
-      checkStringToTimestamp("2015-03-18 12:03:17Z", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT-01:00"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("2015-03-18T12:03:17-1:0", Option(c.getTimeInMillis * 1000))
-      checkStringToTimestamp("2015-03-18T12:03:17-01:00", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("2015-03-18T12:03:17+07:30", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:03"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("2015-03-18T12:03:17+07:03", Option(c.getTimeInMillis * 1000))
+      var timeZone = TimeZone.getTimeZone("GMT-13:53")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17-13:53", expected)
+
+      timeZone = TimeZone.getTimeZone("UTC")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17Z", expected)
+      checkStringToTimestamp("2015-03-18 12:03:17Z", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT-01:00")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17-1:0", expected)
+      checkStringToTimestamp("2015-03-18T12:03:17-01:00", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT+07:30")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17+07:30", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT+07:03")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17+07:03", expected)
 
       // tests for the string including milliseconds.
-      c = Calendar.getInstance(tz)
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 123)
-      checkStringToTimestamp("2015-03-18 12:03:17.123", Option(c.getTimeInMillis * 1000))
-      checkStringToTimestamp("2015-03-18T12:03:17.123", Option(c.getTimeInMillis * 1000))
+      expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, tz = tz))
+      checkStringToTimestamp("2015-03-18 12:03:17.123", expected)
+      checkStringToTimestamp("2015-03-18T12:03:17.123", expected)
 
       // If the string value includes timezone string, it represents the timestamp string
       // in the timezone regardless of the tz parameter.
-      c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 456)
-      checkStringToTimestamp("2015-03-18T12:03:17.456Z", Option(c.getTimeInMillis * 1000))
-      checkStringToTimestamp("2015-03-18 12:03:17.456Z", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT-01:00"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 123)
-      checkStringToTimestamp("2015-03-18T12:03:17.123-1:0", Option(c.getTimeInMillis * 1000))
-      checkStringToTimestamp("2015-03-18T12:03:17.123-01:00", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 123)
-      checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 123)
-      checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 123)
-      checkStringToTimestamp(
-        "2015-03-18T12:03:17.123121+7:30", Option(c.getTimeInMillis * 1000 + 121))
+      timeZone = TimeZone.getTimeZone("UTC")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, 456000, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17.456Z", expected)
+      checkStringToTimestamp("2015-03-18 12:03:17.456Z", expected)
 
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 123)
-      checkStringToTimestamp(
-        "2015-03-18T12:03:17.12312+7:30", Option(c.getTimeInMillis * 1000 + 120))
-
-      c = Calendar.getInstance(tz)
-      c.set(Calendar.HOUR_OF_DAY, 18)
-      c.set(Calendar.MINUTE, 12)
-      c.set(Calendar.SECOND, 15)
-      c.set(Calendar.MILLISECOND, 0)
-      checkStringToTimestamp("18:12:15", Option(c.getTimeInMillis * 1000))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
-      c.set(Calendar.HOUR_OF_DAY, 18)
-      c.set(Calendar.MINUTE, 12)
-      c.set(Calendar.SECOND, 15)
-      c.set(Calendar.MILLISECOND, 123)
-      checkStringToTimestamp("T18:12:15.12312+7:30", Option(c.getTimeInMillis * 1000 + 120))
-
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
-      c.set(Calendar.HOUR_OF_DAY, 18)
-      c.set(Calendar.MINUTE, 12)
-      c.set(Calendar.SECOND, 15)
-      c.set(Calendar.MILLISECOND, 123)
-      checkStringToTimestamp("18:12:15.12312+7:30", Option(c.getTimeInMillis * 1000 + 120))
-
-      c = Calendar.getInstance(tz)
-      c.set(2011, 4, 6, 7, 8, 9)
-      c.set(Calendar.MILLISECOND, 100)
-      checkStringToTimestamp("2011-05-06 07:08:09.1000", Option(c.getTimeInMillis * 1000))
+      timeZone = TimeZone.getTimeZone("GMT-01:00")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17.123-1:0", expected)
+      checkStringToTimestamp("2015-03-18T12:03:17.123-01:00", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT+07:30")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT+07:30")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT+07:30")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, 123121, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17.123121+7:30", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT+07:30")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, 123120, tz = timeZone))
+      checkStringToTimestamp("2015-03-18T12:03:17.12312+7:30", expected)
+
+      expected = Option(time(18, 12, 15, tz = tz))
+      checkStringToTimestamp("18:12:15", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT+07:30")
+      expected = Option(time(18, 12, 15, 123120, tz = timeZone))
+      checkStringToTimestamp("T18:12:15.12312+7:30", expected)
+
+      timeZone = TimeZone.getTimeZone("GMT+07:30")
+      expected = Option(time(18, 12, 15, 123120, tz = timeZone))
+      checkStringToTimestamp("18:12:15.12312+7:30", expected)
+
+      expected = Option(date(2011, 5, 6, 7, 8, 9, 100000, tz = tz))
+      checkStringToTimestamp("2011-05-06 07:08:09.1000", expected)
 
       checkStringToTimestamp("238", None)
       checkStringToTimestamp("00238", None)
@@ -310,11 +242,10 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       checkStringToTimestamp("2015-03-18T12:03.17-1:0:0", None)
 
       // Truncating the fractional seconds
-      c = Calendar.getInstance(TimeZone.getTimeZone("GMT+00:00"))
-      c.set(2015, 2, 18, 12, 3, 17)
-      c.set(Calendar.MILLISECOND, 0)
+      timeZone = TimeZone.getTimeZone("GMT+00:00")
+      expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, tz = timeZone))
       checkStringToTimestamp(
-        "2015-03-18T12:03:17.123456789+0:00", Option(c.getTimeInMillis * 1000 + 123456))
+        "2015-03-18T12:03:17.123456789+0:00", expected)
     }
   }
 
@@ -338,35 +269,32 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("hours") {
-    val c = Calendar.getInstance(TimeZonePST)
-    c.set(2015, 2, 18, 13, 2, 11)
-    assert(getHours(c.getTimeInMillis * 1000, TimeZonePST) === 13)
-    assert(getHours(c.getTimeInMillis * 1000, TimeZoneGMT) === 20)
-    c.set(2015, 12, 8, 2, 7, 9)
-    assert(getHours(c.getTimeInMillis * 1000, TimeZonePST) === 2)
-    assert(getHours(c.getTimeInMillis * 1000, TimeZoneGMT) === 10)
+    var input = date(2015, 3, 18, 13, 2, 11, 0, TimeZonePST)
+    assert(getHours(input, TimeZonePST) === 13)
+    assert(getHours(input, TimeZoneGMT) === 20)
+    input = date(2015, 12, 8, 2, 7, 9, 0, TimeZonePST)
+    assert(getHours(input, TimeZonePST) === 2)
+    assert(getHours(input, TimeZoneGMT) === 10)
   }
 
   test("minutes") {
-    val c = Calendar.getInstance(TimeZonePST)
-    c.set(2015, 2, 18, 13, 2, 11)
-    assert(getMinutes(c.getTimeInMillis * 1000, TimeZonePST) === 2)
-    assert(getMinutes(c.getTimeInMillis * 1000, TimeZoneGMT) === 2)
-    assert(getMinutes(c.getTimeInMillis * 1000, TimeZone.getTimeZone("Australia/North")) === 32)
-    c.set(2015, 2, 8, 2, 7, 9)
-    assert(getMinutes(c.getTimeInMillis * 1000, TimeZonePST) === 7)
-    assert(getMinutes(c.getTimeInMillis * 1000, TimeZoneGMT) === 7)
-    assert(getMinutes(c.getTimeInMillis * 1000, TimeZone.getTimeZone("Australia/North")) === 37)
+    var input = date(2015, 3, 18, 13, 2, 11, 0, TimeZonePST)
+    assert(getMinutes(input, TimeZonePST) === 2)
+    assert(getMinutes(input, TimeZoneGMT) === 2)
+    assert(getMinutes(input, TimeZone.getTimeZone("Australia/North")) === 32)
+    input = date(2015, 3, 8, 2, 7, 9, 0, TimeZonePST)
+    assert(getMinutes(input, TimeZonePST) === 7)
+    assert(getMinutes(input, TimeZoneGMT) === 7)
+    assert(getMinutes(input, TimeZone.getTimeZone("Australia/North")) === 37)
   }
 
   test("seconds") {
-    val c = Calendar.getInstance(TimeZonePST)
-    c.set(2015, 2, 18, 13, 2, 11)
-    assert(getSeconds(c.getTimeInMillis * 1000, TimeZonePST) === 11)
-    assert(getSeconds(c.getTimeInMillis * 1000, TimeZoneGMT) === 11)
-    c.set(2015, 2, 8, 2, 7, 9)
-    assert(getSeconds(c.getTimeInMillis * 1000, TimeZonePST) === 9)
-    assert(getSeconds(c.getTimeInMillis * 1000, TimeZoneGMT) === 9)
+    var input = date(2015, 3, 18, 13, 2, 11, 0, TimeZonePST)
+    assert(getSeconds(input, TimeZonePST) === 11)
+    assert(getSeconds(input, TimeZoneGMT) === 11)
+    input = date(2015, 3, 8, 2, 7, 9, 0, TimeZonePST)
+    assert(getSeconds(input, TimeZonePST) === 9)
+    assert(getSeconds(input, TimeZoneGMT) === 9)
   }
 
   test("hours / minutes / seconds") {
@@ -380,122 +308,84 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("get day in year") {
-    assert(getDayInYear(getInUTCDays(LocalDate.of(2015, 3, 18))) === 77)
-    assert(getDayInYear(getInUTCDays(LocalDate.of(2012, 3, 18))) === 78)
+    assert(getDayInYear(days(2015, 3, 18)) === 77)
+    assert(getDayInYear(days(2012, 3, 18)) === 78)
   }
 
   test("day of year calculations for old years") {
-    var date = LocalDate.of(1582, 3, 1)
-    assert(getDayInYear(getInUTCDays(date)) === 60)
+    assert(getDayInYear(days(1582, 3)) === 60)
 
     (1000 to 1600 by 10).foreach { year =>
       // January 1 is the 1st day of year.
-      date = LocalDate.of(year, 1, 1)
-      assert(getYear(getInUTCDays(date)) === year)
-      assert(getMonth(getInUTCDays(date)) === 1)
-      assert(getDayInYear(getInUTCDays(date)) === 1)
+      assert(getYear(days(year)) === year)
+      assert(getMonth(days(year, 1)) === 1)
+      assert(getDayInYear(days(year, 1, 1)) === 1)
 
       // December 31 is the 1st day of year.
-      date = LocalDate.of(year, 12, 31)
-      assert(getYear(getInUTCDays(date)) === year)
-      assert(getMonth(getInUTCDays(date)) === 12)
-      assert(getDayOfMonth(getInUTCDays(date)) === 31)
+      val date = days(year, 12, 31)
+      assert(getYear(date) === year)
+      assert(getMonth(date) === 12)
+      assert(getDayOfMonth(date) === 31)
     }
   }
 
   test("get year") {
-    assert(getYear(getInUTCDays(LocalDate.of(2015, 2, 18))) === 2015)
-    assert(getYear(getInUTCDays(LocalDate.of(2012, 2, 18))) === 2012)
+    assert(getYear(days(2015, 2, 18)) === 2015)
+    assert(getYear(days(2012, 2, 18)) === 2012)
   }
 
   test("get quarter") {
-    assert(getQuarter(getInUTCDays(LocalDate.of(2015, 2, 18))) === 1)
-    assert(getQuarter(getInUTCDays(LocalDate.of(2012, 11, 18))) === 4)
+    assert(getQuarter(days(2015, 2, 18)) === 1)
+    assert(getQuarter(days(2012, 11, 18)) === 4)
   }
 
   test("get month") {
-    assert(getMonth(getInUTCDays(LocalDate.of(2015, 3, 18))) === 3)
-    assert(getMonth(getInUTCDays(LocalDate.of(2012, 12, 18))) === 12)
+    assert(getMonth(days(2015, 3, 18)) === 3)
+    assert(getMonth(days(2012, 12, 18)) === 12)
   }
 
   test("get day of month") {
-    assert(getDayOfMonth(getInUTCDays(LocalDate.of(2015, 3, 18))) === 18)
-    assert(getDayOfMonth(getInUTCDays(LocalDate.of(2012, 12, 24))) === 24)
+    assert(getDayOfMonth(days(2015, 3, 18)) === 18)
+    assert(getDayOfMonth(days(2012, 12, 24)) === 24)
   }
 
   test("date add months") {
-    val c1 = Calendar.getInstance()
-    c1.set(1997, 1, 28, 10, 30, 0)
-    val days1 = millisToDays(c1.getTimeInMillis)
-    val c2 = Calendar.getInstance()
-    c2.set(2000, 1, 29)
-    assert(dateAddMonths(days1, 36) === millisToDays(c2.getTimeInMillis))
-    c2.set(1996, 0, 31)
-    assert(dateAddMonths(days1, -13) === millisToDays(c2.getTimeInMillis))
+    val input = days(1997, 2, 28, 10, 30)
+    assert(dateAddMonths(input, 36) === days(2000, 2, 29))
+    assert(dateAddMonths(input, -13) === days(1996, 1, 31))
   }
 
   test("timestamp add months") {
-    val c1 = Calendar.getInstance()
-    c1.set(1997, 1, 28, 10, 30, 0)
-    c1.set(Calendar.MILLISECOND, 0)
-    val ts1 = c1.getTimeInMillis * 1000L
-    val c2 = Calendar.getInstance()
-    c2.set(2000, 1, 29, 10, 30, 0)
-    c2.set(Calendar.MILLISECOND, 123)
-    val ts2 = c2.getTimeInMillis * 1000L
-    assert(timestampAddInterval(ts1, 36, 123000, defaultTimeZone()) === ts2)
-
-    val c3 = Calendar.getInstance(TimeZonePST)
-    c3.set(1997, 1, 27, 16, 0, 0)
-    c3.set(Calendar.MILLISECOND, 0)
-    val ts3 = c3.getTimeInMillis * 1000L
-    val c4 = Calendar.getInstance(TimeZonePST)
-    c4.set(2000, 1, 27, 16, 0, 0)
-    c4.set(Calendar.MILLISECOND, 123)
-    val ts4 = c4.getTimeInMillis * 1000L
-    val c5 = Calendar.getInstance(TimeZoneGMT)
-    c5.set(2000, 1, 29, 0, 0, 0)
-    c5.set(Calendar.MILLISECOND, 123)
-    val ts5 = c5.getTimeInMillis * 1000L
+    val ts1 = date(1997, 2, 28, 10, 30, 0)
+    val ts2 = date(2000, 2, 29, 10, 30, 0, 123000)
+    assert(timestampAddInterval(ts1, 36, 123000, defaultTz) === ts2)
+
+    val ts3 = date(1997, 2, 27, 16, 0, 0, 0, TimeZonePST)
+    val ts4 = date(2000, 2, 27, 16, 0, 0, 123000, TimeZonePST)
+    val ts5 = date(2000, 2, 29, 0, 0, 0, 123000, TimeZoneGMT)
     assert(timestampAddInterval(ts3, 36, 123000, TimeZonePST) === ts4)
     assert(timestampAddInterval(ts3, 36, 123000, TimeZoneGMT) === ts5)
   }
 
   test("monthsBetween") {
-    val c1 = Calendar.getInstance()
-    c1.set(1997, 1, 28, 10, 30, 0)
-    val c2 = Calendar.getInstance()
-    c2.set(1996, 9, 30, 0, 0, 0)
-    assert(monthsBetween(
-      c1.getTimeInMillis * 1000L, c2.getTimeInMillis * 1000L, true, c1.getTimeZone) === 3.94959677)
-    assert(monthsBetween(
-      c1.getTimeInMillis * 1000L, c2.getTimeInMillis * 1000L, false, c1.getTimeZone)
-      === 3.9495967741935485)
+    val date1 = date(1997, 2, 28, 10, 30, 0)
+    var date2 = date(1996, 10, 30)
+    assert(monthsBetween(date1, date2, true, TimeZoneUTC) === 3.94959677)
+    assert(monthsBetween(date1, date2, false, TimeZoneUTC) === 3.9495967741935485)
     Seq(true, false).foreach { roundOff =>
-      c2.set(2000, 1, 28, 0, 0, 0)
-      assert(monthsBetween(
-        c1.getTimeInMillis * 1000L, c2.getTimeInMillis * 1000L, roundOff, c1.getTimeZone) === -36)
-      c2.set(2000, 1, 29, 0, 0, 0)
-      assert(monthsBetween(
-        c1.getTimeInMillis * 1000L, c2.getTimeInMillis * 1000L, roundOff, c1.getTimeZone) === -36)
-      c2.set(1996, 2, 31, 0, 0, 0)
-      assert(monthsBetween(
-        c1.getTimeInMillis * 1000L, c2.getTimeInMillis * 1000L, roundOff, c1.getTimeZone) === 11)
+      date2 = date(2000, 2, 28)
+      assert(monthsBetween(date1, date2, roundOff, TimeZoneUTC) === -36)
+      date2 = date(2000, 2, 29)
+      assert(monthsBetween(date1, date2, roundOff, TimeZoneUTC) === -36)
+      date2 = date(1996, 3, 31)
+      assert(monthsBetween(date1, date2, roundOff, TimeZoneUTC) === 11)
     }
 
-    val c3 = Calendar.getInstance(TimeZonePST)
-    c3.set(2000, 1, 28, 16, 0, 0)
-    val c4 = Calendar.getInstance(TimeZonePST)
-    c4.set(1997, 1, 28, 16, 0, 0)
-    assert(
-      monthsBetween(c3.getTimeInMillis * 1000L, c4.getTimeInMillis * 1000L, true, TimeZonePST)
-      === 36.0)
-    assert(
-      monthsBetween(c3.getTimeInMillis * 1000L, c4.getTimeInMillis * 1000L, true, TimeZoneGMT)
-      === 35.90322581)
-    assert(
-      monthsBetween(c3.getTimeInMillis * 1000L, c4.getTimeInMillis * 1000L, false, TimeZoneGMT)
-        === 35.903225806451616)
+    val date3 = date(2000, 2, 28, 16, tz = TimeZonePST)
+    val date4 = date(1997, 2, 28, 16, tz = TimeZonePST)
+    assert(monthsBetween(date3, date4, true, TimeZonePST) === 36.0)
+    assert(monthsBetween(date3, date4, true, TimeZoneGMT) === 35.90322581)
+    assert(monthsBetween(date3, date4, false, TimeZoneGMT) === 35.903225806451616)
   }
 
   test("from UTC timestamp") {
@@ -503,8 +393,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       assert(toJavaTimestamp(fromUTCTime(fromJavaTimestamp(Timestamp.valueOf(utc)), tz)).toString
         === expected)
     }
-    for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
-      DateTimeTestUtils.withDefaultTimeZone(tz) {
+    for (tz <- ALL_TIMEZONES) {
+      withDefaultTimeZone(tz) {
         test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
         test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
         test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
@@ -512,7 +402,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       }
     }
 
-    DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
+    withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
       // Daylight Saving Time
       test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
       test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
@@ -528,8 +418,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
         === expected)
     }
 
-    for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
-      DateTimeTestUtils.withDefaultTimeZone(tz) {
+    for (tz <- ALL_TIMEZONES) {
+      withDefaultTimeZone(tz) {
         test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
         test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
         test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
@@ -537,7 +427,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       }
     }
 
-    DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
+    withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
       // Daylight Saving Time
       test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
       // 2016-03-13 02:00:00 PST does not exists
@@ -590,8 +480,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS1.get)
     testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", defaultInputTS2.get)
 
-    for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
-      DateTimeTestUtils.withDefaultTimeZone(tz) {
+    for (tz <- ALL_TIMEZONES) {
+      withDefaultTimeZone(tz) {
         val inputTS = DateTimeUtils.stringToTimestamp(
           UTF8String.fromString("2015-03-05T09:32:05.359"), defaultTz)
         val inputTS1 = DateTimeUtils.stringToTimestamp(
@@ -622,20 +512,15 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("daysToMillis and millisToDays") {
-    val c = Calendar.getInstance(TimeZonePST)
-
-    c.set(2015, 11, 31, 16, 0, 0)
-    assert(millisToDays(c.getTimeInMillis, TimeZonePST) === 16800)
-    assert(millisToDays(c.getTimeInMillis, TimeZoneGMT) === 16801)
+    val input = TimeUnit.MICROSECONDS.toMillis(date(2015, 12, 31, 16, tz = TimeZonePST))
+    assert(millisToDays(input, TimeZonePST) === 16800)
+    assert(millisToDays(input, TimeZoneGMT) === 16801)
 
-    c.set(2015, 11, 31, 0, 0, 0)
-    c.set(Calendar.MILLISECOND, 0)
-    assert(daysToMillis(16800, TimeZonePST) === c.getTimeInMillis)
+    var expected = TimeUnit.MICROSECONDS.toMillis(date(2015, 12, 31, tz = TimeZonePST))
+    assert(daysToMillis(16800, TimeZonePST) === expected)
 
-    c.setTimeZone(TimeZoneGMT)
-    c.set(2015, 11, 31, 0, 0, 0)
-    c.set(Calendar.MILLISECOND, 0)
-    assert(daysToMillis(16800, TimeZoneGMT) === c.getTimeInMillis)
+    expected = TimeUnit.MICROSECONDS.toMillis(date(2015, 12, 31, tz = TimeZoneGMT))
+    assert(daysToMillis(16800, TimeZoneGMT) === expected)
 
     // There are some days are skipped entirely in some timezone, skip them here.
     val skipped_days = Map[String, Set[Int]](
@@ -646,7 +531,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       "Pacific/Kiritimati" -> Set(9130, 9131),
       "Pacific/Kwajalein" -> Set(8632),
       "MIT" -> Set(15338))
-    for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
+    for (tz <- ALL_TIMEZONES) {
       val skipped = skipped_days.getOrElse(tz.getID, Set.empty)
       (-20000 to 20000).foreach { d =>
         if (!skipped.contains(d)) {

From 1280bfd7564ca6d201a6e4a54ecf93a20f142f3a Mon Sep 17 00:00:00 2001
From: Xianjin YE <advancedxy@gmail.com>
Date: Mon, 28 Jan 2019 10:54:18 -0600
Subject: [PATCH 0381/1072] [SPARK-26713][CORE] Interrupt pipe IO threads in
 PipedRDD when task is finished

## What changes were proposed in this pull request?
Manually release stdin writer and stderr reader thread when task is finished. This commit also marks
ShuffleBlockFetchIterator as fully consumed if isZombie is set.

## How was this patch tested?
Added new test

Closes #23638 from advancedxy/SPARK-26713.

Authored-by: Xianjin YE <advancedxy@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/rdd/PipedRDD.scala | 33 +++++++++--
 .../storage/ShuffleBlockFetcherIterator.scala | 18 ++++--
 .../org/apache/spark/rdd/PipedRDDSuite.scala  | 24 ++++++++
 .../ShuffleBlockFetcherIteratorSuite.scala    | 59 +++++++++++++++++++
 4 files changed, 126 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index 02b28b72fb0e..f1daf62ad4d1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -113,7 +113,7 @@ private[spark] class PipedRDD[T: ClassTag](
     val childThreadException = new AtomicReference[Throwable](null)
 
     // Start a thread to print the process's stderr to ours
-    new Thread(s"stderr reader for $command") {
+    val stderrReaderThread = new Thread(s"${PipedRDD.STDERR_READER_THREAD_PREFIX} $command") {
       override def run(): Unit = {
         val err = proc.getErrorStream
         try {
@@ -128,10 +128,11 @@ private[spark] class PipedRDD[T: ClassTag](
           err.close()
         }
       }
-    }.start()
+    }
+    stderrReaderThread.start()
 
     // Start a thread to feed the process input from our parent's iterator
-    new Thread(s"stdin writer for $command") {
+    val stdinWriterThread = new Thread(s"${PipedRDD.STDIN_WRITER_THREAD_PREFIX} $command") {
       override def run(): Unit = {
         TaskContext.setTaskContext(context)
         val out = new PrintWriter(new BufferedWriter(
@@ -156,7 +157,28 @@ private[spark] class PipedRDD[T: ClassTag](
           out.close()
         }
       }
-    }.start()
+    }
+    stdinWriterThread.start()
+
+    // interrupts stdin writer and stderr reader threads when the corresponding task is finished.
+    // Otherwise, these threads could outlive the task's lifetime. For example:
+    //   val pipeRDD = sc.range(1, 100).pipe(Seq("cat"))
+    //   val abnormalRDD = pipeRDD.mapPartitions(_ => Iterator.empty)
+    // the iterator generated by PipedRDD is never involved. If the parent RDD's iterator takes a
+    // long time to generate(ShuffledRDD's shuffle operation for example), the stdin writer thread
+    // may consume significant memory and CPU time even if task is already finished.
+    context.addTaskCompletionListener[Unit] { _ =>
+      if (proc.isAlive) {
+        proc.destroy()
+      }
+
+      if (stdinWriterThread.isAlive) {
+        stdinWriterThread.interrupt()
+      }
+      if (stderrReaderThread.isAlive) {
+        stderrReaderThread.interrupt()
+      }
+    }
 
     // Return an iterator that read lines from the process's stdout
     val lines = Source.fromInputStream(proc.getInputStream)(encoding).getLines
@@ -219,4 +241,7 @@ private object PipedRDD {
     }
     buf
   }
+
+  val STDIN_WRITER_THREAD_PREFIX = "stdin writer for"
+  val STDERR_READER_THREAD_PREFIX = "stderr reader for"
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 28decf0a8e2a..f73c21b3486f 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -141,7 +141,14 @@ final class ShuffleBlockFetcherIterator(
 
   /**
    * Whether the iterator is still active. If isZombie is true, the callback interface will no
-   * longer place fetched blocks into [[results]].
+   * longer place fetched blocks into [[results]] and the iterator is marked as fully consumed.
+   *
+   * When the iterator is inactive, [[hasNext]] and [[next]] calls will honor that as there are
+   * cases the iterator is still being consumed. For example, ShuffledRDD + PipedRDD if the
+   * subprocess command is failed. The task will be marked as failed, then the iterator will be
+   * cleaned up at task completion, the [[next]] call (called in the stdin writer thread of
+   * PipedRDD if not exited yet) may hang at [[results.take]]. The defensive check in [[hasNext]]
+   * and [[next]] reduces the possibility of such race conditions.
    */
   @GuardedBy("this")
   private[this] var isZombie = false
@@ -378,7 +385,7 @@ final class ShuffleBlockFetcherIterator(
     logDebug("Got local blocks in " + Utils.getUsedTimeMs(startTime))
   }
 
-  override def hasNext: Boolean = numBlocksProcessed < numBlocksToFetch
+  override def hasNext: Boolean = !isZombie && (numBlocksProcessed < numBlocksToFetch)
 
   /**
    * Fetches the next (BlockId, InputStream). If a task fails, the ManagedBuffers
@@ -390,7 +397,7 @@ final class ShuffleBlockFetcherIterator(
    */
   override def next(): (BlockId, InputStream) = {
     if (!hasNext) {
-      throw new NoSuchElementException
+      throw new NoSuchElementException()
     }
 
     numBlocksProcessed += 1
@@ -401,7 +408,7 @@ final class ShuffleBlockFetcherIterator(
     // then fetch it one more time if it's corrupt, throw FailureFetchResult if the second fetch
     // is also corrupt, so the previous stage could be retried.
     // For local shuffle block, throw FailureFetchResult for the first IOException.
-    while (result == null) {
+    while (!isZombie && result == null) {
       val startFetchWait = System.currentTimeMillis()
       result = results.take()
       val stopFetchWait = System.currentTimeMillis()
@@ -495,6 +502,9 @@ final class ShuffleBlockFetcherIterator(
       fetchUpToMaxBytes()
     }
 
+    if (result == null) { // the iterator is already closed/cleaned up.
+      throw new NoSuchElementException()
+    }
     currentResult = result.asInstanceOf[SuccessFetchResult]
     (currentResult.blockId, new BufferReleasingInputStream(input, this))
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
index 1a0eb250e7cd..69739a2e5848 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.rdd
 
 import java.io.File
 
+import scala.collection.JavaConverters._
 import scala.collection.Map
 import scala.io.Codec
 
@@ -83,6 +84,29 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
     }
   }
 
+  test("stdin writer thread should be exited when task is finished") {
+    assume(TestUtils.testCommandAvailable("cat"))
+    val nums = sc.makeRDD(Array(1, 2, 3, 4), 1).map { x =>
+      val obj = new Object()
+      obj.synchronized {
+        obj.wait() // make the thread waits here.
+      }
+      x
+    }
+
+    val piped = nums.pipe(Seq("cat"))
+
+    val result = piped.mapPartitions(_ => Array.emptyIntArray.iterator)
+
+    assert(result.collect().length === 0)
+
+    // collect stderr writer threads
+    val stderrWriterThread = Thread.getAllStackTraces.keySet().asScala
+      .find { _.getName.startsWith(PipedRDD.STDIN_WRITER_THREAD_PREFIX) }
+
+    assert(stderrWriterThread.isEmpty)
+  }
+
   test("advanced pipe") {
     assume(TestUtils.testCommandAvailable("cat"))
     val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 6b83243fe496..98fe9663b621 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -217,6 +217,65 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     verify(blocks(ShuffleBlockId(0, 2, 0)), times(0)).release()
   }
 
+  test("iterator is all consumed if task completes early") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure remote blocks would return
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val blocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> createMockManagedBuffer(),
+      ShuffleBlockId(0, 1, 0) -> createMockManagedBuffer(),
+      ShuffleBlockId(0, 2, 0) -> createMockManagedBuffer())
+
+    // Semaphore to coordinate event sequence in two different threads.
+    val sem = new Semaphore(0)
+
+    val transfer = mock(classOf[BlockTransferService])
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
+      .thenAnswer(new Answer[Unit] {
+        override def answer(invocation: InvocationOnMock): Unit = {
+          val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+          Future {
+            // Return the first two blocks, and wait till task completion before returning the last
+            listener.onBlockFetchSuccess(
+              ShuffleBlockId(0, 0, 0).toString, blocks(ShuffleBlockId(0, 0, 0)))
+            listener.onBlockFetchSuccess(
+              ShuffleBlockId(0, 1, 0).toString, blocks(ShuffleBlockId(0, 1, 0)))
+            sem.acquire()
+            listener.onBlockFetchSuccess(
+              ShuffleBlockId(0, 2, 0).toString, blocks(ShuffleBlockId(0, 2, 0)))
+          }
+      }
+      })
+
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+
+    val taskContext = TaskContext.empty()
+    val iterator = new ShuffleBlockFetcherIterator(
+      taskContext,
+      transfer,
+      blockManager,
+      blocksByAddress,
+      (_, in) => in,
+      48 * 1024 * 1024,
+      Int.MaxValue,
+      Int.MaxValue,
+      Int.MaxValue,
+      true,
+      taskContext.taskMetrics.createTempShuffleReadMetrics())
+
+
+    assert(iterator.hasNext)
+    iterator.next()
+
+    taskContext.markTaskCompleted(None)
+    sem.release()
+    assert(iterator.hasNext === false)
+  }
+
   test("fail all blocks if any of the remote request fails") {
     val blockManager = mock(classOf[BlockManager])
     val localBmId = BlockManagerId("test-client", "test-client", 1)

From dfed439e33b7bf224dd412b0960402068d961c7b Mon Sep 17 00:00:00 2001
From: s71955 <sujithchacko.2010@gmail.com>
Date: Mon, 28 Jan 2019 10:08:23 -0800
Subject: [PATCH 0382/1072] [SPARK-26432][CORE] Obtain HBase delegation token
 operation compatible with HBase 2.x.x version API

## What changes were proposed in this pull request?

While obtaining token from hbase service , spark uses deprecated API of hbase ,
```public static Token<AuthenticationTokenIdentifier> obtainToken(Configuration conf)```
This deprecated API is already been removed from hbase 2.x version as part of the hbase 2.x major release. https://issues.apache.org/jira/browse/HBASE-14713_
there is one more stable API in
```public static Token<AuthenticationTokenIdentifier> obtainToken(Connection conn)``` in TokenUtil class
spark shall use this stable api for getting the delegation token.

To invoke this api first connection object has to be retrieved from ConnectionFactory and the same connection can be passed to obtainToken(Connection conn) for getting token.
eg: Call   ```public static Connection createConnection(Configuration conf)```
, then call   ```public static Token<AuthenticationTokenIdentifier> obtainToken( Connection conn)```.

## How was this patch tested?
Manual testing is been done.
Manual test result:
Before fix:

![hbase-dep-obtaintok 1](https://user-images.githubusercontent.com/12999161/50699264-64cac200-106d-11e9-81b4-e50ae8097f27.png)

After fix:
1. Create 2 tables in hbase shell
 >Launch hbase shell
 >Enter commands to create tables and load data
    create 'table1','cf'
    put 'table1','row1','cf:cid','20'

    create 'table2','cf'
    put 'table2','row1','cf:cid','30'

 >Show values command
   get 'table1','row1','cf:cid'  will diplay value as 20
   get 'table2','row1','cf:cid'  will diplay value as 30

2.Run SparkHbasetoHbase class in testSpark.jar using spark-submit

spark-submit --master yarn-cluster --class com.mrs.example.spark.SparkHbasetoHbase --conf "spark.yarn.security.credentials.hbase.enabled"="true" --conf "spark.security.credentials.hbase.enabled"="true" --keytab /opt/client/user.keytab --principal sen testSpark.jar

The SparkHbasetoHbase test class will update the value of table2 with sum of values of table1 & table2.

table2 = table1+table2
As we can see in the snapshot the spark job has been successfully able to interact with hbase service and able to update the row count.
![obtaintok_success 1](https://user-images.githubusercontent.com/12999161/50699393-bd9a5a80-106d-11e9-96c6-6c250d561efa.png)

Closes #23429 from sujith71955/master_hbase_service.

Authored-by: s71955 <sujithchacko.2010@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../HBaseDelegationTokenProvider.scala        | 56 +++++++++++++++++--
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
index 3bf8c14b8dcf..e345b0bbffea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.deploy.security
 
+import java.io.Closeable
+
 import scala.reflect.runtime.universe
 import scala.util.control.NonFatal
 
@@ -42,8 +44,8 @@ private[security] class HBaseDelegationTokenProvider
     try {
       val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
       val obtainToken = mirror.classLoader.
-        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
-        getMethod("obtainToken", classOf[Configuration])
+        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil")
+        .getMethod("obtainToken", classOf[Configuration])
 
       logDebug("Attempting to fetch HBase security token.")
       val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
@@ -52,12 +54,58 @@ private[security] class HBaseDelegationTokenProvider
       creds.addToken(token.getService, token)
     } catch {
       case NonFatal(e) =>
-        logWarning(s"Failed to get token from service $serviceName", e)
+        logWarning(s"Failed to get token from service $serviceName due to  " + e +
+          s" Retrying to fetch HBase security token with hbase connection parameter.")
+        // Seems to be spark is trying to get the token from HBase 2.x.x  version or above where the
+        // obtainToken(Configuration conf) API has been removed. Lets try obtaining the token from
+        // another compatible API of HBase service.
+        obtainDelegationTokensWithHBaseConn(hadoopConf, creds)
     }
-
     None
   }
 
+  /**
+   * Token<AuthenticationTokenIdentifier> obtainToken(Configuration conf) is a deprecated
+   * method and in Hbase 2.0.0 the method is already removed.
+   * The HBase client API used in below method is introduced from HBase 0.98.9 version,
+   * to invoke this api first connection object has to be retrieved from ConnectionFactory and the
+   * same connection can be passed to
+   * Token<AuthenticationTokenIdentifier> obtainToken(Connection conn) API
+   *
+   * @param hadoopConf Configuration of current Hadoop Compatible system.
+   * @param creds Credentials to add tokens and security keys to.
+   */
+  private def obtainDelegationTokensWithHBaseConn(
+      hadoopConf: Configuration,
+      creds: Credentials): Unit = {
+    var hbaseConnection : Closeable = null
+    try {
+      val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
+      val connectionFactoryClass = mirror.classLoader
+        .loadClass("org.apache.hadoop.hbase.client.ConnectionFactory")
+        .getMethod("createConnection", classOf[Configuration])
+      hbaseConnection = connectionFactoryClass.invoke(null, hbaseConf(hadoopConf))
+        .asInstanceOf[Closeable]
+      val connectionParamTypeClassRef = mirror.classLoader
+        .loadClass("org.apache.hadoop.hbase.client.Connection")
+      val obtainTokenMethod = mirror.classLoader
+        .loadClass("org.apache.hadoop.hbase.security.token.TokenUtil")
+        .getMethod("obtainToken", connectionParamTypeClassRef)
+      logDebug("Attempting to fetch HBase security token.")
+      val token = obtainTokenMethod.invoke(null, hbaseConnection)
+        .asInstanceOf[Token[_ <: TokenIdentifier]]
+      logInfo(s"Get token from HBase: ${token.toString}")
+      creds.addToken(token.getService, token)
+    } catch {
+      case NonFatal(e) =>
+        logWarning(s"Failed to get token from service $serviceName", e)
+    } finally {
+      if (null != hbaseConnection) {
+        hbaseConnection.close()
+      }
+    }
+  }
+
   override def delegationTokensRequired(
       sparkConf: SparkConf,
       hadoopConf: Configuration): Boolean = {

From 8baf3ba35bc13d72f8ab911284d7d75ce897cb5a Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 28 Jan 2019 13:47:32 -0600
Subject: [PATCH 0383/1072] [SPARK-26660][FOLLOWUP] Add warning logs when
 broadcasting large task binary

## What changes were proposed in this pull request?

The warning introduced in https://github.com/apache/spark/pull/23580 has a bug: https://github.com/apache/spark/pull/23580#issuecomment-458000380 This just fixes the logic.

## How was this patch tested?

N/A

Closes #23668 from srowen/SPARK-26660.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/scheduler/DAGScheduler.scala   | 2 +-
 .../scala/org/apache/spark/scheduler/TaskSetManager.scala | 8 ++++----
 .../org/apache/spark/scheduler/TaskSetManagerSuite.scala  | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index ecb8ac012145..75eb37ceb407 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1162,7 +1162,7 @@ private[spark] class DAGScheduler(
         partitions = stage.rdd.partitions
       }
 
-      if (taskBinaryBytes.length * 1000 > TaskSetManager.TASK_SIZE_TO_WARN_KB) {
+      if (taskBinaryBytes.length > TaskSetManager.TASK_SIZE_TO_WARN_KIB * 1024) {
         logWarning(s"Broadcasting large task binary with size " +
           s"${Utils.bytesToString(taskBinaryBytes.length)}")
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 6f3f77c304dc..b7bf06974fd5 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -494,12 +494,12 @@ private[spark] class TaskSetManager(
             abort(s"$msg Exception during serialization: $e")
             throw new TaskNotSerializableException(e)
         }
-        if (serializedTask.limit() > TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024 &&
+        if (serializedTask.limit() > TaskSetManager.TASK_SIZE_TO_WARN_KIB * 1024 &&
           !emittedTaskSizeWarning) {
           emittedTaskSizeWarning = true
           logWarning(s"Stage ${task.stageId} contains a task of very large size " +
-            s"(${serializedTask.limit() / 1024} KB). The maximum recommended task size is " +
-            s"${TaskSetManager.TASK_SIZE_TO_WARN_KB} KB.")
+            s"(${serializedTask.limit() / 1024} KiB). The maximum recommended task size is " +
+            s"${TaskSetManager.TASK_SIZE_TO_WARN_KIB} KiB.")
         }
         addRunningTask(taskId)
 
@@ -1101,5 +1101,5 @@ private[spark] class TaskSetManager(
 private[spark] object TaskSetManager {
   // The user will be warned if any stages contain a task that has a serialized size greater than
   // this.
-  val TASK_SIZE_TO_WARN_KB = 100
+  val TASK_SIZE_TO_WARN_KIB = 100
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index d0f98b5e4eab..60acd3ed4cd4 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -153,7 +153,7 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
  */
 class LargeTask(stageId: Int) extends Task[Array[Byte]](stageId, 0, 0) {
 
-  val randomBuffer = new Array[Byte](TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024)
+  val randomBuffer = new Array[Byte](TaskSetManager.TASK_SIZE_TO_WARN_KIB * 1024)
   val random = new Random(0)
   random.nextBytes(randomBuffer)
 

From 2a67dbfbd341af166b1c85904875f26a6dea5ba8 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 28 Jan 2019 13:32:34 -0800
Subject: [PATCH 0384/1072] [SPARK-26595][CORE] Allow credential renewal based
 on kerberos ticket cache.

This change addes a new mode for credential renewal that does not require
a keytab; it uses the local ticket cache instead, so it works while the
user keeps the cache valid.

This can be useful for, e.g., people running long spark-shell sessions where
their kerberos login is kept up-to-date.

The main change to enable this behavior is in HadoopDelegationTokenManager,
with a small change in the HDFS token provider. The other changes are to avoid
creating duplicate tokens when submitting the application to YARN; they allow
the tokens from the scheduler to be sent to the YARN AM, reducing the round trips
to HDFS.

For that, the scheduler initialization code was changed a little bit so that
the tokens are available when the YARN client is initialized. That basically
takes care of a long-standing TODO that was in the code to clean up configuration
propagation to the driver's RPC endpoint (in CoarseGrainedSchedulerBackend).

Tested with an app designed to stress this functionality, with both keytab and
cache-based logins. Some basic kerberos tests on k8s also.

Closes #23525 from vanzin/SPARK-26595.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../HadoopDelegationTokenManager.scala        | 38 +++++++++++++----
 .../HadoopFSDelegationTokenProvider.scala     | 34 +++++++--------
 .../spark/internal/config/package.scala       | 10 +++++
 .../CoarseGrainedSchedulerBackend.scala       | 42 +++++++------------
 docs/security.md                              | 32 ++++++++++----
 .../KubernetesClusterSchedulerBackend.scala   | 12 +++---
 .../MesosCoarseGrainedSchedulerBackend.scala  |  5 +--
 .../org/apache/spark/deploy/yarn/Client.scala | 25 +++++------
 .../cluster/YarnClientSchedulerBackend.scala  |  8 ++--
 .../cluster/YarnClusterSchedulerBackend.scala |  2 +
 .../cluster/YarnSchedulerBackend.scala        | 15 +++----
 11 files changed, 123 insertions(+), 100 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 2763a46c9889..487291e17605 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -41,7 +41,7 @@ import org.apache.spark.util.{ThreadUtils, Utils}
 /**
  * Manager for delegation tokens in a Spark application.
  *
- * When configured with a principal and a keytab, this manager will make sure long-running apps can
+ * When delegation token renewal is enabled, this manager will make sure long-running apps can
  * run without interruption while accessing secured services. It periodically logs in to the KDC
  * with user-provided credentials, and contacts all the configured secure services to obtain
  * delegation tokens to be distributed to the rest of the application.
@@ -50,6 +50,11 @@ import org.apache.spark.util.{ThreadUtils, Utils}
  * elapsed. The new tokens are sent to the Spark driver endpoint. The driver is tasked with
  * distributing the tokens to other processes that might need them.
  *
+ * Renewal can be enabled in two different ways: by providing a principal and keytab to Spark, or by
+ * enabling renewal based on the local credential cache. The latter has the drawback that Spark
+ * can't create new TGTs by itself, so the user has to manually update the Kerberos ticket cache
+ * externally.
+ *
  * This class can also be used just to create delegation tokens, by calling the
  * `obtainDelegationTokens` method. This option does not require calling the `start` method nor
  * providing a driver reference, but leaves it up to the caller to distribute the tokens that were
@@ -81,7 +86,11 @@ private[spark] class HadoopDelegationTokenManager(
   private var renewalExecutor: ScheduledExecutorService = _
 
   /** @return Whether delegation token renewal is enabled. */
-  def renewalEnabled: Boolean = principal != null
+  def renewalEnabled: Boolean = sparkConf.get(KERBEROS_RENEWAL_CREDENTIALS) match {
+    case "keytab" => principal != null
+    case "ccache" => UserGroupInformation.getCurrentUser().hasKerberosCredentials()
+    case _ => false
+  }
 
   /**
    * Start the token renewer. Requires a principal and keytab. Upon start, the renewer will
@@ -121,7 +130,7 @@ private[spark] class HadoopDelegationTokenManager(
 
   def stop(): Unit = {
     if (renewalExecutor != null) {
-      renewalExecutor.shutdown()
+      renewalExecutor.shutdownNow()
     }
   }
 
@@ -182,7 +191,7 @@ private[spark] class HadoopDelegationTokenManager(
 
   private def scheduleRenewal(delay: Long): Unit = {
     val _delay = math.max(0, delay)
-    logInfo(s"Scheduling login from keytab in ${UIUtils.formatDuration(delay)}.")
+    logInfo(s"Scheduling renewal in ${UIUtils.formatDuration(delay)}.")
 
     val renewalTask = new Runnable() {
       override def run(): Unit = {
@@ -206,6 +215,9 @@ private[spark] class HadoopDelegationTokenManager(
       schedulerRef.send(UpdateDelegationTokens(tokens))
       tokens
     } catch {
+      case _: InterruptedException =>
+        // Ignore, may happen if shutting down.
+        null
       case e: Exception =>
         val delay = TimeUnit.SECONDS.toMillis(sparkConf.get(CREDENTIALS_RENEWAL_RETRY_WAIT))
         logWarning(s"Failed to update tokens, will try again in ${UIUtils.formatDuration(delay)}!" +
@@ -239,11 +251,19 @@ private[spark] class HadoopDelegationTokenManager(
   }
 
   private def doLogin(): UserGroupInformation = {
-    logInfo(s"Attempting to login to KDC using principal: $principal")
-    require(new File(keytab).isFile(), s"Cannot find keytab at $keytab.")
-    val ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
-    logInfo("Successfully logged into KDC.")
-    ugi
+    if (principal != null) {
+      logInfo(s"Attempting to login to KDC using principal: $principal")
+      require(new File(keytab).isFile(), s"Cannot find keytab at $keytab.")
+      val ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
+      logInfo("Successfully logged into KDC.")
+      ugi
+    } else {
+      logInfo(s"Attempting to load user's ticket cache.")
+      val ccache = sparkConf.getenv("KRB5CCNAME")
+      val user = Option(sparkConf.getenv("KRB5PRINCIPAL")).getOrElse(
+        UserGroupInformation.getCurrentUser().getUserName())
+      UserGroupInformation.getUGIFromTicketCache(ccache, user)
+    }
   }
 
   private def loadProviders(): Map[String, HadoopDelegationTokenProvider] = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 2cd160c49079..3bc40de776ec 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -100,7 +100,7 @@ private[deploy] class HadoopFSDelegationTokenProvider
       creds: Credentials): Credentials = {
 
     filesystems.foreach { fs =>
-      logInfo("getting token for: " + fs)
+      logInfo(s"getting token for: $fs with renewer $renewer")
       fs.addDelegationTokens(renewer, creds)
     }
 
@@ -114,22 +114,22 @@ private[deploy] class HadoopFSDelegationTokenProvider
     // We cannot use the tokens generated with renewer yarn. Trying to renew
     // those will fail with an access control issue. So create new tokens with the logged in
     // user as renewer.
-    sparkConf.get(PRINCIPAL).flatMap { renewer =>
-      val creds = new Credentials()
-      fetchDelegationTokens(renewer, filesystems, creds)
-
-      val renewIntervals = creds.getAllTokens.asScala.filter {
-        _.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier]
-      }.flatMap { token =>
-        Try {
-          val newExpiration = token.renew(hadoopConf)
-          val identifier = token.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier]
-          val interval = newExpiration - identifier.getIssueDate
-          logInfo(s"Renewal interval is $interval for token ${token.getKind.toString}")
-          interval
-        }.toOption
-      }
-      if (renewIntervals.isEmpty) None else Some(renewIntervals.min)
+    val renewer = UserGroupInformation.getCurrentUser().getUserName()
+
+    val creds = new Credentials()
+    fetchDelegationTokens(renewer, filesystems, creds)
+
+    val renewIntervals = creds.getAllTokens.asScala.filter {
+      _.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier]
+    }.flatMap { token =>
+      Try {
+        val newExpiration = token.renew(hadoopConf)
+        val identifier = token.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier]
+        val interval = newExpiration - identifier.getIssueDate
+        logInfo(s"Renewal interval is $interval for token ${token.getKind.toString}")
+        interval
+      }.toOption
     }
+    if (renewIntervals.isEmpty) None else Some(renewIntervals.min)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 32559aefd512..66b1ff76446a 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -354,6 +354,16 @@ package object config {
     .timeConf(TimeUnit.SECONDS)
     .createWithDefaultString("1m")
 
+  private[spark] val KERBEROS_RENEWAL_CREDENTIALS =
+    ConfigBuilder("spark.kerberos.renewal.credentials")
+      .doc(
+        "Which credentials to use when renewing delegation tokens for executors. Can be either " +
+        "'keytab', the default, which requires a keytab to be provided, or 'ccache', which uses " +
+        "the local credentials cache.")
+      .stringConf
+      .checkValues(Set("keytab", "ccache"))
+      .createWithDefault("keytab")
+
   private[spark] val EXECUTOR_INSTANCES = ConfigBuilder("spark.executor.instances")
     .intConf
     .createOptional
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 3b058754e7f5..6bc0bdd97f7d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -110,14 +110,21 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   private val reviveThread =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
 
-  class DriverEndpoint(override val rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
-    extends ThreadSafeRpcEndpoint with Logging {
+  class DriverEndpoint extends ThreadSafeRpcEndpoint with Logging {
+
+    override val rpcEnv: RpcEnv = CoarseGrainedSchedulerBackend.this.rpcEnv
 
     // Executors that have been lost, but for which we don't yet know the real exit reason.
     protected val executorsPendingLossReason = new HashSet[String]
 
     protected val addressToExecutorId = new HashMap[RpcAddress, String]
 
+    // Spark configuration sent to executors. This is a lazy val so that subclasses of the
+    // scheduler can modify the SparkConf object before this view is created.
+    private lazy val sparkProperties = scheduler.sc.conf.getAll
+      .filter { case (k, _) => k.startsWith("spark.") }
+      .toSeq
+
     override def onStart() {
       // Periodically revive offers to allow delay scheduling to work
       val reviveIntervalMs = conf.get(SCHEDULER_REVIVE_INTERVAL).getOrElse(1000L)
@@ -386,23 +393,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
   }
 
-  var driverEndpoint: RpcEndpointRef = null
+  val driverEndpoint = rpcEnv.setupEndpoint(ENDPOINT_NAME, createDriverEndpoint())
 
   protected def minRegisteredRatio: Double = _minRegisteredRatio
 
   override def start() {
-    val properties = new ArrayBuffer[(String, String)]
-    for ((key, value) <- scheduler.sc.conf.getAll) {
-      if (key.startsWith("spark.")) {
-        properties += ((key, value))
-      }
-    }
-
-    // TODO (prashant) send conf instead of properties
-    driverEndpoint = createDriverEndpointRef(properties)
-
     if (UserGroupInformation.isSecurityEnabled()) {
-      delegationTokenManager = createTokenManager(driverEndpoint)
+      delegationTokenManager = createTokenManager()
       delegationTokenManager.foreach { dtm =>
         val tokens = if (dtm.renewalEnabled) {
           dtm.start()
@@ -412,20 +409,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           SparkHadoopUtil.get.serialize(creds)
         }
         if (tokens != null) {
-          delegationTokens.set(tokens)
+          updateDelegationTokens(tokens)
         }
       }
     }
   }
 
-  protected def createDriverEndpointRef(
-      properties: ArrayBuffer[(String, String)]): RpcEndpointRef = {
-    rpcEnv.setupEndpoint(ENDPOINT_NAME, createDriverEndpoint(properties))
-  }
-
-  protected def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = {
-    new DriverEndpoint(rpcEnv, properties)
-  }
+  protected def createDriverEndpoint(): DriverEndpoint = new DriverEndpoint()
 
   def stopExecutors() {
     try {
@@ -715,12 +705,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * Create the delegation token manager to be used for the application. This method is called
    * once during the start of the scheduler backend (so after the object has already been
    * fully constructed), only if security is enabled in the Hadoop configuration.
-   *
-   * @param schedulerRef RPC endpoint for the scheduler, where updated delegation tokens should be
-   *                     sent.
    */
-  protected def createTokenManager(
-      schedulerRef: RpcEndpointRef): Option[HadoopDelegationTokenManager] = None
+  protected def createTokenManager(): Option[HadoopDelegationTokenManager] = None
 
   /**
    * Called when a new set of delegation tokens is sent to the driver. Child classes can override
diff --git a/docs/security.md b/docs/security.md
index 8416ed91356a..a1dc584705a4 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -776,16 +776,32 @@ The following options provides finer-grained control for this feature:
 Long-running applications may run into issues if their run time exceeds the maximum delegation
 token lifetime configured in services it needs to access.
 
-Spark supports automatically creating new tokens for these applications when running in YARN mode.
-Kerberos credentials need to be provided to the Spark application via the `spark-submit` command,
-using the `--principal` and `--keytab` parameters.
+This feature is not available everywhere. In particular, it's only implemented
+on YARN and Kubernetes (both client and cluster modes), and on Mesos when using client mode.
 
-The provided keytab will be copied over to the machine running the Application Master via the Hadoop
-Distributed Cache. For this reason, it's strongly recommended that both YARN and HDFS be secured
-with encryption, at least.
+Spark supports automatically creating new tokens for these applications. There are two ways to
+enable this functionality.
 
-The Kerberos login will be periodically renewed using the provided credentials, and new delegation
-tokens for supported will be created.
+### Using a Keytab
+
+By providing Spark with a principal and keytab (e.g. using `spark-submit` with `--principal`
+and `--keytab` parameters), the application will maintain a valid Kerberos login that can be
+used to retrieve delegation tokens indefinitely.
+
+Note that when using a keytab in cluster mode, it will be copied over to the machine running the
+Spark driver. In the case of YARN, this means using HDFS as a staging area for the keytab, so it's
+strongly recommended that both YARN and HDFS be secured with encryption, at least.
+
+### Using a ticket cache
+
+By setting `spark.kerberos.renewal.credentials` to `ccache` in Spark's configuration, the local
+Kerberos ticket cache will be used for authentication. Spark will keep the ticket renewed during its
+renewable life, but after it expires a new ticket needs to be acquired (e.g. by running `kinit`).
+
+It's up to the user to maintain an updated ticket cache that Spark can use.
+
+The location of the ticket cache can be customized by setting the `KRB5CCNAME` environment
+variable.
 
 ## Secure Interaction with Kubernetes
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 0d2737efde57..4a91a2f5f10b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -144,17 +144,15 @@ private[spark] class KubernetesClusterSchedulerBackend(
     // Don't do anything else - let event handling from the Kubernetes API do the Spark changes
   }
 
-  override def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = {
-    new KubernetesDriverEndpoint(sc.env.rpcEnv, properties)
+  override def createDriverEndpoint(): DriverEndpoint = {
+    new KubernetesDriverEndpoint()
   }
 
-  override protected def createTokenManager(
-      schedulerRef: RpcEndpointRef): Option[HadoopDelegationTokenManager] = {
-    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, schedulerRef))
+  override protected def createTokenManager(): Option[HadoopDelegationTokenManager] = {
+    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, driverEndpoint))
   }
 
-  private class KubernetesDriverEndpoint(rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
-      extends DriverEndpoint(rpcEnv, sparkProperties) {
+  private class KubernetesDriverEndpoint extends DriverEndpoint {
 
     override def onDisconnected(rpcAddress: RpcAddress): Unit = {
       // Don't do anything besides disabling the executor - allow the Kubernetes API events to
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index be2854fc16cf..7e2a8ba4b07d 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -772,9 +772,8 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     }
   }
 
-  override protected def createTokenManager(
-      schedulerRef: RpcEndpointRef): Option[HadoopDelegationTokenManager] = {
-    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, schedulerRef))
+  override protected def createTokenManager(): Option[HadoopDelegationTokenManager] = {
+    Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, driverEndpoint))
   }
 
   private def numExecutors(): Int = {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 7582a2b1aa9e..7523e3c42c53 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -306,26 +306,21 @@ private[spark] class Client(
   /**
    * Set up security tokens for launching our ApplicationMaster container.
    *
-   * This method will obtain delegation tokens from all the registered providers, and set them in
-   * the AM's launch context.
+   * In client mode, a set of credentials has been obtained by the scheduler, so they are copied
+   * and sent to the AM. In cluster mode, new credentials are obtained and then sent to the AM,
+   * along with whatever credentials the current user already has.
    */
   private def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = {
-    val credentials = UserGroupInformation.getCurrentUser().getCredentials()
-    val credentialManager = new YARNHadoopDelegationTokenManager(sparkConf, hadoopConf, null)
-    credentialManager.obtainDelegationTokens(credentials)
-
-    // When using a proxy user, copy the delegation tokens to the user's credentials. Avoid
-    // that for regular users, since in those case the user already has access to the TGT,
-    // and adding delegation tokens could lead to expired or cancelled tokens being used
-    // later, as reported in SPARK-15754.
     val currentUser = UserGroupInformation.getCurrentUser()
-    if (SparkHadoopUtil.get.isProxyUser(currentUser)) {
-      currentUser.addCredentials(credentials)
+    val credentials = currentUser.getCredentials()
+
+    if (isClusterMode) {
+      val credentialManager = new YARNHadoopDelegationTokenManager(sparkConf, hadoopConf, null)
+      credentialManager.obtainDelegationTokens(credentials)
     }
 
-    val dob = new DataOutputBuffer
-    credentials.writeTokenStorageToStream(dob)
-    amContainer.setTokens(ByteBuffer.wrap(dob.getData))
+    val serializedCreds = SparkHadoopUtil.get.serialize(credentials)
+    amContainer.setTokens(ByteBuffer.wrap(serializedCreds))
   }
 
   /** Get the application report from the ResourceManager for an application we have submitted. */
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 7b77f8c5a7e7..c7c495fef853 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -43,6 +43,8 @@ private[spark] class YarnClientSchedulerBackend(
    * This waits until the application is running.
    */
   override def start() {
+    super.start()
+
     val driverHost = conf.get(config.DRIVER_HOST_ADDRESS)
     val driverPort = conf.get(config.DRIVER_PORT)
     val hostport = driverHost + ":" + driverPort
@@ -57,14 +59,12 @@ private[spark] class YarnClientSchedulerBackend(
     client = new Client(args, conf, sc.env.rpcEnv)
     bindToYarn(client.submitApplication(), None)
 
-    // SPARK-8687: Ensure all necessary properties have already been set before
-    // we initialize our driver scheduler backend, which serves these properties
-    // to the executors
-    super.start()
     waitForApplication()
 
     monitorThread = asyncMonitorApplication()
     monitorThread.start()
+
+    startBindings()
   }
 
   /**
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index 62bf9818ee24..e6680e182958 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -21,6 +21,7 @@ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 
 import org.apache.spark.SparkContext
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.Utils
@@ -35,6 +36,7 @@ private[spark] class YarnClusterSchedulerBackend(
     bindToYarn(attemptId.getApplicationId(), Some(attemptId))
     super.start()
     totalExpectedExecutors = SchedulerBackendUtils.getInitialTargetExecutorNumber(sc.conf)
+    startBindings()
   }
 
   override def getDriverLogUrls: Option[Map[String, String]] = {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index 3dae11e3b424..821fbcd956d5 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -90,11 +90,10 @@ private[spark] abstract class YarnSchedulerBackend(
     this.attemptId = attemptId
   }
 
-  override def start() {
+  protected def startBindings(): Unit = {
     require(appId.isDefined, "application ID unset")
     val binding = SchedulerExtensionServiceBinding(sc, appId.get, attemptId)
     services.start(binding)
-    super.start()
   }
 
   override def stop(): Unit = {
@@ -209,8 +208,8 @@ private[spark] abstract class YarnSchedulerBackend(
     }
   }
 
-  override def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = {
-    new YarnDriverEndpoint(rpcEnv, properties)
+  override def createDriverEndpoint(): DriverEndpoint = {
+    new YarnDriverEndpoint()
   }
 
   /**
@@ -223,9 +222,8 @@ private[spark] abstract class YarnSchedulerBackend(
     sc.executorAllocationManager.foreach(_.reset())
   }
 
-  override protected def createTokenManager(
-      schedulerRef: RpcEndpointRef): Option[HadoopDelegationTokenManager] = {
-    Some(new YARNHadoopDelegationTokenManager(sc.conf, sc.hadoopConfiguration, schedulerRef))
+  override protected def createTokenManager(): Option[HadoopDelegationTokenManager] = {
+    Some(new YARNHadoopDelegationTokenManager(sc.conf, sc.hadoopConfiguration, driverEndpoint))
   }
 
   /**
@@ -233,8 +231,7 @@ private[spark] abstract class YarnSchedulerBackend(
    * This endpoint communicates with the executors and queries the AM for an executor's exit
    * status when the executor is disconnected.
    */
-  private class YarnDriverEndpoint(rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
-      extends DriverEndpoint(rpcEnv, sparkProperties) {
+  private class YarnDriverEndpoint extends DriverEndpoint {
 
     /**
      * When onDisconnected is received at the driver endpoint, the superclass DriverEndpoint

From 92706e657629b36c9b3dc1478b2b80e351562135 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 28 Jan 2019 13:39:50 -0800
Subject: [PATCH 0385/1072] [SPARK-26747][SQL] Makes GetMapValue nullability
 more precise

## What changes were proposed in this pull request?
In master, `GetMapValue` nullable is always true;
https://github.com/apache/spark/blob/cf133e611020ed178f90358464a1b88cdd9b7889/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala#L371

But, If input key is foldable, we could make its nullability more precise.
This fix is the same with SPARK-26637(#23566).

## How was this patch tested?
Added tests in `ComplexTypeSuite`.

Closes #23669 from maropu/SPARK-26747.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../expressions/complexTypeExtractors.scala   | 16 +++++++++++++++-
 .../expressions/ComplexTypeSuite.scala        | 19 +++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 104ad98ca099..55ed617e2904 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -381,7 +381,21 @@ case class GetMapValue(child: Expression, key: Expression)
   override def right: Expression = key
 
   /** `Null` is returned for invalid ordinals. */
-  override def nullable: Boolean = true
+  override def nullable: Boolean = if (key.foldable && !key.nullable) {
+    val keyObj = key.eval()
+    child match {
+      case m: CreateMap if m.resolved =>
+        m.keys.zip(m.values).filter { case (k, _) => k.foldable && !k.nullable }.find {
+          case (k, _) if k.eval() == keyObj => true
+          case _ => false
+        }.map(_._2.nullable).getOrElse(true)
+      case _ =>
+        true
+    }
+  } else {
+    true
+  }
+
 
   override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index d8d65715281d..d65b49f11884 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -110,6 +110,25 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(GetMapValue(nestedMap, Literal("a")), Map("b" -> "c"))
   }
 
+  test("SPARK-26747 handles GetMapValue nullability correctly when input key is foldable") {
+    // String key test
+    val k1 = Literal("k1")
+    val v1 = AttributeReference("v1", StringType, nullable = true)()
+    val k2 = Literal("k2")
+    val v2 = AttributeReference("v2", StringType, nullable = false)()
+    val map1 = CreateMap(k1 :: v1 :: k2 :: v2 :: Nil)
+    assert(GetMapValue(map1, Literal("k1")).nullable)
+    assert(!GetMapValue(map1, Literal("k2")).nullable)
+    assert(GetMapValue(map1, Literal("non-existent-key")).nullable)
+
+    // Complex type key test
+    val k3 = Literal.create((1, "a"))
+    val k4 = Literal.create((2, "b"))
+    val map2 = CreateMap(k3 :: v1 :: k4 :: v2 :: Nil)
+    assert(GetMapValue(map2, Literal.create((1, "a"))).nullable)
+    assert(!GetMapValue(map2, Literal.create((2, "b"))).nullable)
+  }
+
   test("GetStructField") {
     val typeS = StructType(StructField("a", IntegerType) :: Nil)
     val struct = Literal.create(create_row(1), typeS)

From 16990f929921b3f784a85f3afbe1a22fbe77d895 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 29 Jan 2019 14:18:45 +0800
Subject: [PATCH 0386/1072] [SPARK-26566][PYTHON][SQL] Upgrade Apache Arrow to
 version 0.12.0

## What changes were proposed in this pull request?

Upgrade Apache Arrow to version 0.12.0. This includes the Java artifacts and fixes to enable usage with pyarrow 0.12.0

Version 0.12.0 includes the following selected fixes/improvements relevant to Spark users:

* Safe cast fails from numpy float64 array with nans to integer, ARROW-4258
* Java, Reduce heap usage for variable width vectors, ARROW-4147
* Binary identity cast not implemented, ARROW-4101
* pyarrow open_stream deprecated, use ipc.open_stream, ARROW-4098
* conversion to date object no longer needed, ARROW-3910
* Error reading IPC file with no record batches, ARROW-3894
* Signed to unsigned integer cast yields incorrect results when type sizes are the same, ARROW-3790
* from_pandas gives incorrect results when converting floating point to bool, ARROW-3428
* Import pyarrow fails if scikit-learn is installed from conda (boost-cpp / libboost issue), ARROW-3048
* Java update to official Flatbuffers version 1.9.0, ARROW-3175

complete list [here](https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20(Resolved%2C%20Closed)%20AND%20fixVersion%20%3D%200.12.0)

PySpark requires the following fixes to work with PyArrow 0.12.0

* Encrypted pyspark worker fails due to ChunkedStream missing closed property
* pyarrow now converts dates as objects by default, which causes error because type is assumed datetime64
* ArrowTests fails due to difference in raised error message
* pyarrow.open_stream deprecated
* tests fail because groupby adds index column with duplicate name

## How was this patch tested?

Ran unit tests with pyarrow versions 0.8.0, 0.10.0, 0.11.1, 0.12.0

Closes #23657 from BryanCutler/arrow-upgrade-012.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7                      |  8 ++++----
 dev/deps/spark-deps-hadoop-3.1                      |  8 ++++----
 pom.xml                                             |  2 +-
 python/pyspark/serializers.py                       | 13 +++++++++++--
 python/pyspark/sql/tests/test_arrow.py              |  2 +-
 .../sql/tests/test_pandas_udf_grouped_map.py        | 10 +++++-----
 python/pyspark/sql/types.py                         | 13 ++++++++++---
 7 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 1af29fcaff2a..0154fd26586b 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -14,9 +14,9 @@ apacheds-kerberos-codec-2.0.0-M15.jar
 api-asn1-api-1.0.0-M20.jar
 api-util-1.0.0-M20.jar
 arpack_combined_all-0.1.jar
-arrow-format-0.10.0.jar
-arrow-memory-0.10.0.jar
-arrow-vector-0.10.0.jar
+arrow-format-0.12.0.jar
+arrow-memory-0.12.0.jar
+arrow-vector-0.12.0.jar
 automaton-1.11-8.jar
 avro-1.8.2.jar
 avro-ipc-1.8.2.jar
@@ -58,7 +58,7 @@ datanucleus-core-3.2.10.jar
 datanucleus-rdbms-3.2.9.jar
 derby-10.12.1.1.jar
 eigenbase-properties-1.1.5.jar
-flatbuffers-1.2.0-3f79e055.jar
+flatbuffers-java-1.9.0.jar
 generex-1.0.1.jar
 gson-2.2.4.jar
 guava-14.0.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 05f180b17a58..7d5325c55e2e 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -12,9 +12,9 @@ aopalliance-1.0.jar
 aopalliance-repackaged-2.4.0-b34.jar
 apache-log4j-extras-1.2.17.jar
 arpack_combined_all-0.1.jar
-arrow-format-0.10.0.jar
-arrow-memory-0.10.0.jar
-arrow-vector-0.10.0.jar
+arrow-format-0.12.0.jar
+arrow-memory-0.12.0.jar
+arrow-vector-0.12.0.jar
 automaton-1.11-8.jar
 avro-1.8.2.jar
 avro-ipc-1.8.2.jar
@@ -57,7 +57,7 @@ derby-10.12.1.1.jar
 dnsjava-2.1.7.jar
 ehcache-3.3.1.jar
 eigenbase-properties-1.1.5.jar
-flatbuffers-1.2.0-3f79e055.jar
+flatbuffers-java-1.9.0.jar
 generex-1.0.1.jar
 geronimo-jcache_1.0_spec-1.0-alpha-1.jar
 gson-2.2.4.jar
diff --git a/pom.xml b/pom.xml
index 29a281a2828d..6676c5dcf979 100644
--- a/pom.xml
+++ b/pom.xml
@@ -193,7 +193,7 @@
     If you are changing Arrow version specification, please check ./python/pyspark/sql/utils.py,
     ./python/run-tests.py and ./python/setup.py too.
     -->
-    <arrow.version>0.10.0</arrow.version>
+    <arrow.version>0.12.0</arrow.version>
 
     <test.java.home>${java.home}</test.java.home>
     <test.exclude.tags></test.exclude.tags>
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 741dfb2961ce..1d170530d285 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -237,7 +237,7 @@ def dump_stream(self, iterator, stream):
 
     def load_stream(self, stream):
         import pyarrow as pa
-        reader = pa.open_stream(stream)
+        reader = pa.ipc.open_stream(stream)
         for batch in reader:
             yield batch
 
@@ -342,7 +342,7 @@ def load_stream(self, stream):
         Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series.
         """
         import pyarrow as pa
-        reader = pa.open_stream(stream)
+        reader = pa.ipc.open_stream(stream)
 
         for batch in reader:
             yield [self.arrow_to_pandas(c) for c in pa.Table.from_batches([batch]).itercolumns()]
@@ -828,6 +828,15 @@ def close(self):
         write_int(-1, self.wrapped)
         self.wrapped.close()
 
+    @property
+    def closed(self):
+        """
+        Return True if the `wrapped` object has been closed.
+        NOTE: this property is required by pyarrow to be used as a file-like object in
+        pyarrow.RecordBatchStreamWriter from ArrowStreamSerializer
+        """
+        return self.wrapped.closed
+
 
 if __name__ == '__main__':
     import doctest
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 21fe5000df5d..8a62500b17f2 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -250,7 +250,7 @@ def test_createDataFrame_with_incorrect_schema(self):
         fields[0], fields[7] = fields[7], fields[0]  # swap str with timestamp
         wrong_schema = StructType(fields)
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, ".*No cast.*string.*timestamp.*"):
+            with self.assertRaisesRegexp(Exception, ".*cast.*[s|S]tring.*timestamp.*"):
                 self.spark.createDataFrame(pdf, schema=wrong_schema)
 
     def test_createDataFrame_with_names(self):
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
index 80e70349b78d..a0a25359d1e0 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
@@ -198,7 +198,7 @@ def normalize(pdf):
 
         result = df.groupby(col('id') % 2 == 0).apply(normalize).sort('id', 'v').toPandas()
         pdf = df.toPandas()
-        expected = pdf.groupby(pdf['id'] % 2 == 0).apply(normalize.func)
+        expected = pdf.groupby(pdf['id'] % 2 == 0, as_index=False).apply(normalize.func)
         expected = expected.sort_values(['id', 'v']).reset_index(drop=True)
         expected = expected.assign(norm=expected.norm.astype('float64'))
         self.assertPandasEqual(expected, result)
@@ -345,21 +345,21 @@ def foo3(key, pdf):
 
         # Test groupby column
         result1 = df.groupby('id').apply(udf1).sort('id', 'v').toPandas()
-        expected1 = pdf.groupby('id')\
+        expected1 = pdf.groupby('id', as_index=False)\
             .apply(lambda x: udf1.func((x.id.iloc[0],), x))\
             .sort_values(['id', 'v']).reset_index(drop=True)
         self.assertPandasEqual(expected1, result1)
 
         # Test groupby expression
         result2 = df.groupby(df.id % 2).apply(udf1).sort('id', 'v').toPandas()
-        expected2 = pdf.groupby(pdf.id % 2)\
+        expected2 = pdf.groupby(pdf.id % 2, as_index=False)\
             .apply(lambda x: udf1.func((x.id.iloc[0] % 2,), x))\
             .sort_values(['id', 'v']).reset_index(drop=True)
         self.assertPandasEqual(expected2, result2)
 
         # Test complex groupby
         result3 = df.groupby(df.id, df.v % 2).apply(udf2).sort('id', 'v').toPandas()
-        expected3 = pdf.groupby([pdf.id, pdf.v % 2])\
+        expected3 = pdf.groupby([pdf.id, pdf.v % 2], as_index=False)\
             .apply(lambda x: udf2.func((x.id.iloc[0], (x.v % 2).iloc[0],), x))\
             .sort_values(['id', 'v']).reset_index(drop=True)
         self.assertPandasEqual(expected3, result3)
@@ -379,7 +379,7 @@ def rename_pdf(pdf, names):
 
         df = self.data
         grouped_df = df.groupby('id')
-        grouped_pdf = df.toPandas().groupby('id')
+        grouped_pdf = df.toPandas().groupby('id', as_index=False)
 
         # Function returns a pdf with required column names, but order could be arbitrary using dict
         def change_col_order(pdf):
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 00e90fca615f..4b8f2efff4ac 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1688,7 +1688,10 @@ def _check_series_convert_date(series, data_type):
     :param series: pandas.Series
     :param data_type: a Spark data type for the series
     """
-    if type(data_type) == DateType:
+    import pyarrow
+    from distutils.version import LooseVersion
+    # As of Arrow 0.12.0, date_as_objects is True by default, see ARROW-3910
+    if LooseVersion(pyarrow.__version__) < LooseVersion("0.12.0") and type(data_type) == DateType:
         return series.dt.date
     else:
         return series
@@ -1703,8 +1706,12 @@ def _check_dataframe_convert_date(pdf, schema):
     :param pdf: pandas.DataFrame
     :param schema: a Spark schema of the pandas.DataFrame
     """
-    for field in schema:
-        pdf[field.name] = _check_series_convert_date(pdf[field.name], field.dataType)
+    import pyarrow
+    from distutils.version import LooseVersion
+    # As of Arrow 0.12.0, date_as_objects is True by default, see ARROW-3910
+    if LooseVersion(pyarrow.__version__) < LooseVersion("0.12.0"):
+        for field in schema:
+            pdf[field.name] = _check_series_convert_date(pdf[field.name], field.dataType)
     return pdf
 
 
From e97ab1d9807134bb557ae73920af61e8534b2b08 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 29 Jan 2019 00:07:27 -0800
Subject: [PATCH 0387/1072] [SPARK-26695][SQL] data source v2 API refactor -
 continuous read

## What changes were proposed in this pull request?

Following https://github.com/apache/spark/pull/23430, this PR does the API refactor for continuous read, w.r.t. the [doc](https://docs.google.com/document/d/1uUmKCpWLdh9vHxP7AWJ9EgbwB_U6T3EJYNjhISGmiQg/edit?usp=sharing)

The major changes:
1. rename `XXXContinuousReadSupport` to `XXXContinuousStream`
2. at the beginning of continuous streaming execution, convert `StreamingRelationV2` to `StreamingDataSourceV2Relation` directly, instead of `StreamingExecutionRelation`.
3. remove all the hacks as we have finished all the read side API refactor

## How was this patch tested?

existing tests

Closes #23619 from cloud-fan/continuous.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 ...port.scala => KafkaContinuousStream.scala} |  90 +++++--------
 .../sql/kafka010/KafkaSourceProvider.scala    |  79 +++++------
 .../kafka010/KafkaContinuousSourceSuite.scala |   8 +-
 .../sql/kafka010/KafkaContinuousTest.scala    |   4 +-
 .../kafka010/KafkaMicroBatchSourceSuite.scala |  20 +--
 .../sources/v2/BatchReadSupportProvider.java  |  61 ---------
 .../v2/ContinuousReadSupportProvider.java     |  70 ----------
 .../spark/sql/sources/v2/DataSourceV2.java    |   2 +-
 ...ilder.java => SupportsContinuousRead.java} |  18 ++-
 .../spark/sql/sources/v2/reader/Batch.java    |   2 +-
 .../sources/v2/reader/BatchReadSupport.java   |  51 -------
 .../sql/sources/v2/reader/InputPartition.java |   2 +-
 .../reader/OldSupportsReportPartitioning.java |  38 ------
 .../reader/OldSupportsReportStatistics.java   |  38 ------
 .../sql/sources/v2/reader/ReadSupport.java    |  50 -------
 .../spark/sql/sources/v2/reader/Scan.java     |  22 ++-
 .../sql/sources/v2/reader/ScanConfig.java     |  45 -------
 .../SupportsPushDownRequiredColumns.java      |   2 +-
 .../v2/reader/SupportsReportPartitioning.java |   6 +-
 .../v2/reader/SupportsReportStatistics.java   |   4 +-
 .../streaming/ContinuousReadSupport.java      |  77 -----------
 .../v2/reader/streaming/ContinuousStream.java |  70 ++++++++++
 .../v2/reader/streaming/MicroBatchStream.java |   2 +-
 .../sources/v2/reader/streaming/Offset.java   |   2 +-
 .../v2/reader/streaming/SparkDataStream.java  |   3 +-
 .../streaming/StreamingReadSupport.java       |  52 --------
 .../datasources/v2/BatchScanExec.scala        |  49 +++++++
 .../datasources/v2/ContinuousScanExec.scala   |  99 +++-----------
 .../datasources/v2/DataSourceV2Relation.scala |  48 +------
 ...c.scala => DataSourceV2ScanExecBase.scala} |  39 ++----
 .../datasources/v2/DataSourceV2Strategy.scala |  16 +--
 .../datasources/v2/MicroBatchScanExec.scala   |  55 +-------
 .../streaming/MicroBatchExecution.scala       |   2 +-
 .../SimpleStreamingScanConfigBuilder.scala    |  40 ------
 .../streaming/StreamingRelation.scala         |  26 +---
 .../continuous/ContinuousExecution.scala      | 126 ++++++++----------
 .../ContinuousRateStreamSource.scala          |  26 ++--
 .../ContinuousTextSocketSource.scala          |  39 ++----
 .../continuous/EpochCoordinator.scala         |  10 +-
 .../sql/execution/streaming/memory.scala      |  33 ++---
 .../sources/ContinuousMemoryStream.scala      |  27 +---
 .../sources/RateStreamProvider.scala          |  18 ++-
 .../sources/TextSocketMicroBatchStream.scala  |   8 +-
 .../sources/TextSocketSourceProvider.scala    |  25 ++--
 .../sql/streaming/DataStreamReader.scala      |  35 +----
 .../sources/RateStreamProviderSuite.scala     |  29 ++--
 .../sources/TextSocketStreamSuite.scala       |  61 +++++----
 .../sql/sources/v2/DataSourceV2Suite.scala    |   8 +-
 .../spark/sql/streaming/StreamSuite.scala     |  12 +-
 .../spark/sql/streaming/StreamTest.scala      |  11 +-
 .../sql/streaming/StreamingQuerySuite.scala   |   4 +-
 .../ContinuousQueuedDataReaderSuite.scala     |   4 +-
 .../continuous/ContinuousSuite.scala          |   2 +-
 .../continuous/EpochCoordinatorSuite.scala    |   6 +-
 .../sources/StreamingDataSourceV2Suite.scala  |  88 +++++-------
 55 files changed, 516 insertions(+), 1248 deletions(-)
 rename external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/{KafkaContinuousReadSupport.scala => KafkaContinuousStream.scala} (84%)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchReadSupportProvider.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/ContinuousReadSupportProvider.java
 rename sql/core/src/main/java/org/apache/spark/sql/sources/v2/{reader/ScanConfigBuilder.java => SupportsContinuousRead.java} (59%)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/BatchReadSupport.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportPartitioning.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportStatistics.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ReadSupport.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfig.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousReadSupport.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/{DataSourceV2ScanExec.scala => DataSourceV2ScanExecBase.scala} (66%)
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/SimpleStreamingScanConfigBuilder.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
similarity index 84%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala
rename to external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
index f328567c95d2..0e6171724402 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReadSupport.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
@@ -30,10 +30,9 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
-import org.apache.spark.sql.types.StructType
 
 /**
- * A [[ContinuousReadSupport]] for data from kafka.
+ * A [[ContinuousStream]] for data from kafka.
  *
  * @param offsetReader  a reader used to get kafka offsets. Note that the actual data will be
  *                      read by per-task consumers generated later.
@@ -46,17 +45,23 @@ import org.apache.spark.sql.types.StructType
  *                       scenarios, where some offsets after the specified initial ones can't be
  *                       properly read.
  */
-class KafkaContinuousReadSupport(
+class KafkaContinuousStream(
     offsetReader: KafkaOffsetReader,
     kafkaParams: ju.Map[String, Object],
     sourceOptions: Map[String, String],
     metadataPath: String,
     initialOffsets: KafkaOffsetRangeLimit,
     failOnDataLoss: Boolean)
-  extends ContinuousReadSupport with Logging {
+  extends ContinuousStream with Logging {
 
   private val pollTimeoutMs = sourceOptions.getOrElse("kafkaConsumer.pollTimeoutMs", "512").toLong
 
+  // Initialized when creating reader factories. If this diverges from the partitions at the latest
+  // offsets, we need to reconfigure.
+  // Exposed outside this object only for unit tests.
+  @volatile private[sql] var knownPartitions: Set[TopicPartition] = _
+
+
   override def initialOffset(): Offset = {
     val offsets = initialOffsets match {
       case EarliestOffsetRangeLimit => KafkaSourceOffset(offsetReader.fetchEarliestOffsets())
@@ -67,18 +72,32 @@ class KafkaContinuousReadSupport(
     offsets
   }
 
-  override def fullSchema(): StructType = KafkaOffsetReader.kafkaSchema
-
-  override def newScanConfigBuilder(start: Offset): ScanConfigBuilder = {
-    new KafkaContinuousScanConfigBuilder(fullSchema(), start, offsetReader, reportDataLoss)
-  }
-
   override def deserializeOffset(json: String): Offset = {
     KafkaSourceOffset(JsonUtils.partitionOffsets(json))
   }
 
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-    val startOffsets = config.asInstanceOf[KafkaContinuousScanConfig].startOffsets
+  override def planInputPartitions(start: Offset): Array[InputPartition] = {
+    val oldStartPartitionOffsets = KafkaSourceOffset.getPartitionOffsets(start)
+
+    val currentPartitionSet = offsetReader.fetchEarliestOffsets().keySet
+    val newPartitions = currentPartitionSet.diff(oldStartPartitionOffsets.keySet)
+    val newPartitionOffsets = offsetReader.fetchEarliestOffsets(newPartitions.toSeq)
+
+    val deletedPartitions = oldStartPartitionOffsets.keySet.diff(currentPartitionSet)
+    if (deletedPartitions.nonEmpty) {
+      val message = if (
+        offsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+      } else {
+        s"$deletedPartitions are gone. Some data may have been missed."
+      }
+      reportDataLoss(message)
+    }
+
+    val startOffsets = newPartitionOffsets ++
+      oldStartPartitionOffsets.filterKeys(!deletedPartitions.contains(_))
+    knownPartitions = startOffsets.keySet
+
     startOffsets.toSeq.map {
       case (topicPartition, start) =>
         KafkaContinuousInputPartition(
@@ -86,8 +105,7 @@ class KafkaContinuousReadSupport(
     }.toArray
   }
 
-  override def createContinuousReaderFactory(
-      config: ScanConfig): ContinuousPartitionReaderFactory = {
+  override def createContinuousReaderFactory(): ContinuousPartitionReaderFactory = {
     KafkaContinuousReaderFactory
   }
 
@@ -105,8 +123,7 @@ class KafkaContinuousReadSupport(
     KafkaSourceOffset(mergedMap)
   }
 
-  override def needsReconfiguration(config: ScanConfig): Boolean = {
-    val knownPartitions = config.asInstanceOf[KafkaContinuousScanConfig].knownPartitions
+  override def needsReconfiguration(): Boolean = {
     offsetReader.fetchLatestOffsets(None).keySet != knownPartitions
   }
 
@@ -151,47 +168,6 @@ object KafkaContinuousReaderFactory extends ContinuousPartitionReaderFactory {
   }
 }
 
-class KafkaContinuousScanConfigBuilder(
-    schema: StructType,
-    startOffset: Offset,
-    offsetReader: KafkaOffsetReader,
-    reportDataLoss: String => Unit)
-  extends ScanConfigBuilder {
-
-  override def build(): ScanConfig = {
-    val oldStartPartitionOffsets = KafkaSourceOffset.getPartitionOffsets(startOffset)
-
-    val currentPartitionSet = offsetReader.fetchEarliestOffsets().keySet
-    val newPartitions = currentPartitionSet.diff(oldStartPartitionOffsets.keySet)
-    val newPartitionOffsets = offsetReader.fetchEarliestOffsets(newPartitions.toSeq)
-
-    val deletedPartitions = oldStartPartitionOffsets.keySet.diff(currentPartitionSet)
-    if (deletedPartitions.nonEmpty) {
-      val message = if (
-          offsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
-      } else {
-        s"$deletedPartitions are gone. Some data may have been missed."
-      }
-      reportDataLoss(message)
-    }
-
-    val startOffsets = newPartitionOffsets ++
-      oldStartPartitionOffsets.filterKeys(!deletedPartitions.contains(_))
-    KafkaContinuousScanConfig(schema, startOffsets)
-  }
-}
-
-case class KafkaContinuousScanConfig(
-    readSchema: StructType,
-    startOffsets: Map[TopicPartition, Long])
-  extends ScanConfig {
-
-  // Created when building the scan config builder. If this diverges from the partitions at the
-  // latest offsets, we need to reconfigure the kafka read support.
-  def knownPartitions: Set[TopicPartition] = startOffsets.keySet
-}
-
 /**
  * A per-task data reader for continuous Kafka processing.
  *
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 58c90b897091..9238899b0c00 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
@@ -48,7 +48,6 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     with RelationProvider
     with CreatableRelationProvider
     with StreamingWriteSupportProvider
-    with ContinuousReadSupportProvider
     with TableProvider
     with Logging {
   import KafkaSourceProvider._
@@ -107,46 +106,6 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     new KafkaTable(strategy(options.asMap().asScala.toMap))
   }
 
-  /**
-   * Creates a [[org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport]] to read
-   * Kafka data in a continuous streaming query.
-   */
-  override def createContinuousReadSupport(
-      metadataPath: String,
-      options: DataSourceOptions): KafkaContinuousReadSupport = {
-    val parameters = options.asMap().asScala.toMap
-    validateStreamOptions(parameters)
-    // Each running query should use its own group id. Otherwise, the query may be only assigned
-    // partial data since Kafka will assign partitions to multiple consumers having the same group
-    // id. Hence, we should generate a unique id for each query.
-    val uniqueGroupId = streamingUniqueGroupId(parameters, metadataPath)
-
-    val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
-    val specifiedKafkaParams =
-      parameters
-        .keySet
-        .filter(_.toLowerCase(Locale.ROOT).startsWith("kafka."))
-        .map { k => k.drop(6).toString -> parameters(k) }
-        .toMap
-
-    val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(caseInsensitiveParams,
-      STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
-
-    val kafkaOffsetReader = new KafkaOffsetReader(
-      strategy(caseInsensitiveParams),
-      kafkaParamsForDriver(specifiedKafkaParams),
-      parameters,
-      driverGroupIdPrefix = s"$uniqueGroupId-driver")
-
-    new KafkaContinuousReadSupport(
-      kafkaOffsetReader,
-      kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId),
-      parameters,
-      metadataPath,
-      startingStreamOffsets,
-      failOnDataLoss(caseInsensitiveParams))
-  }
-
   /**
    * Returns a new base relation with the given parameters.
    *
@@ -406,7 +365,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
   }
 
   class KafkaTable(strategy: => ConsumerStrategy) extends Table
-    with SupportsMicroBatchRead {
+    with SupportsMicroBatchRead with SupportsContinuousRead {
 
     override def name(): String = s"Kafka $strategy"
 
@@ -449,6 +408,40 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
         startingStreamOffsets,
         failOnDataLoss(caseInsensitiveParams))
     }
+
+    override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
+      val parameters = options.asMap().asScala.toMap
+      validateStreamOptions(parameters)
+      // Each running query should use its own group id. Otherwise, the query may be only assigned
+      // partial data since Kafka will assign partitions to multiple consumers having the same group
+      // id. Hence, we should generate a unique id for each query.
+      val uniqueGroupId = streamingUniqueGroupId(parameters, checkpointLocation)
+
+      val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
+      val specifiedKafkaParams =
+        parameters
+          .keySet
+          .filter(_.toLowerCase(Locale.ROOT).startsWith("kafka."))
+          .map { k => k.drop(6).toString -> parameters(k) }
+          .toMap
+
+      val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
+        caseInsensitiveParams, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+
+      val kafkaOffsetReader = new KafkaOffsetReader(
+        strategy(caseInsensitiveParams),
+        kafkaParamsForDriver(specifiedKafkaParams),
+        parameters,
+        driverGroupIdPrefix = s"$uniqueGroupId-driver")
+
+      new KafkaContinuousStream(
+        kafkaOffsetReader,
+        kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId),
+        parameters,
+        checkpointLocation,
+        startingStreamOffsets,
+        failOnDataLoss(caseInsensitiveParams))
+    }
   }
 }
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
index 2f7fd7f7d47b..be0cea212f50 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
@@ -209,11 +209,11 @@ class KafkaContinuousSourceTopicDeletionSuite extends KafkaContinuousTest {
           assert(
             query.lastExecution.executedPlan.collectFirst {
               case scan: ContinuousScanExec
-                if scan.readSupport.isInstanceOf[KafkaContinuousReadSupport] =>
-                scan.scanConfig.asInstanceOf[KafkaContinuousScanConfig]
-            }.exists { config =>
+                if scan.stream.isInstanceOf[KafkaContinuousStream] =>
+                scan.stream.asInstanceOf[KafkaContinuousStream]
+            }.exists { stream =>
               // Ensure the new topic is present and the old topic is gone.
-              config.knownPartitions.exists(_.topic == topic2)
+              stream.knownPartitions.exists(_.topic == topic2)
             },
             s"query never reconfigured to new topic $topic2")
         }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
index fa3b623586aa..ad1c2c59d9c8 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
@@ -48,8 +48,8 @@ trait KafkaContinuousTest extends KafkaSourceTest {
       assert(
         query.lastExecution.executedPlan.collectFirst {
           case scan: ContinuousScanExec
-              if scan.readSupport.isInstanceOf[KafkaContinuousReadSupport] =>
-            scan.scanConfig.asInstanceOf[KafkaContinuousScanConfig]
+              if scan.stream.isInstanceOf[KafkaContinuousStream] =>
+            scan.stream.asInstanceOf[KafkaContinuousStream]
         }.exists(_.knownPartitions.size == newCount),
         s"query never reconfigured to $newCount partitions")
     }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 90b501573a95..aa7baac2e9e2 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -28,14 +28,13 @@ import scala.collection.JavaConverters._
 import scala.io.Source
 import scala.util.Random
 
-import org.apache.kafka.clients.admin.{AdminClient, ConsumerGroupListing}
-import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord, RecordMetadata}
+import org.apache.kafka.clients.producer.{ProducerRecord, RecordMetadata}
 import org.apache.kafka.common.TopicPartition
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.{Dataset, ForeachWriter, SparkSession}
-import org.apache.spark.sql.execution.datasources.v2.{OldStreamingDataSourceV2Relation, StreamingDataSourceV2Relation}
+import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
@@ -118,17 +117,10 @@ abstract class KafkaSourceTest extends StreamTest with SharedSQLContext with Kaf
       val sources: Seq[BaseStreamingSource] = {
         query.get.logicalPlan.collect {
           case StreamingExecutionRelation(source: KafkaSource, _) => source
-          case r: StreamingDataSourceV2Relation
-              if r.stream.isInstanceOf[KafkaMicroBatchStream] =>
-            r.stream.asInstanceOf[KafkaMicroBatchStream]
-        } ++ (query.get.lastExecution match {
-          case null => Seq()
-          case e => e.logical.collect {
-            case r: OldStreamingDataSourceV2Relation
-                if r.readSupport.isInstanceOf[KafkaContinuousReadSupport] =>
-              r.readSupport.asInstanceOf[KafkaContinuousReadSupport]
-          }
-        })
+          case r: StreamingDataSourceV2Relation if r.stream.isInstanceOf[KafkaMicroBatchStream] ||
+              r.stream.isInstanceOf[KafkaContinuousStream] =>
+            r.stream
+        }
       }.distinct
 
       if (sources.isEmpty) {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchReadSupportProvider.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchReadSupportProvider.java
deleted file mode 100644
index 2a4933d75e8d..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchReadSupportProvider.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils;
-import org.apache.spark.sql.sources.v2.reader.BatchReadSupport;
-import org.apache.spark.sql.types.StructType;
-
-/**
- * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
- * provide data reading ability for batch processing.
- *
- * This interface is used to create {@link BatchReadSupport} instances when end users run
- * {@code SparkSession.read.format(...).option(...).load()}.
- */
-@Evolving
-public interface BatchReadSupportProvider extends DataSourceV2 {
-
-  /**
-   * Creates a {@link BatchReadSupport} instance to load the data from this data source with a user
-   * specified schema, which is called by Spark at the beginning of each batch query.
-   *
-   * Spark will call this method at the beginning of each batch query to create a
-   * {@link BatchReadSupport} instance.
-   *
-   * By default this method throws {@link UnsupportedOperationException}, implementations should
-   * override this method to handle user specified schema.
-   *
-   * @param schema the user specified schema.
-   * @param options the options for the returned data source reader, which is an immutable
-   *                case-insensitive string-to-string map.
-   */
-  default BatchReadSupport createBatchReadSupport(StructType schema, DataSourceOptions options) {
-    return DataSourceV2Utils.failForUserSpecifiedSchema(this);
-  }
-
-  /**
-   * Creates a {@link BatchReadSupport} instance to scan the data from this data source, which is
-   * called by Spark at the beginning of each batch query.
-   *
-   * @param options the options for the returned data source reader, which is an immutable
-   *                case-insensitive string-to-string map.
-   */
-  BatchReadSupport createBatchReadSupport(DataSourceOptions options);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ContinuousReadSupportProvider.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ContinuousReadSupportProvider.java
deleted file mode 100644
index b4f2eb34a156..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/ContinuousReadSupportProvider.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils;
-import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport;
-import org.apache.spark.sql.types.StructType;
-
-/**
- * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
- * provide data reading ability for continuous stream processing.
- *
- * This interface is used to create {@link ContinuousReadSupport} instances when end users run
- * {@code SparkSession.readStream.format(...).option(...).load()} with a continuous trigger.
- */
-@Evolving
-public interface ContinuousReadSupportProvider extends DataSourceV2 {
-
-  /**
-   * Creates a {@link ContinuousReadSupport} instance to scan the data from this streaming data
-   * source with a user specified schema, which is called by Spark at the beginning of each
-   * continuous streaming query.
-   *
-   * By default this method throws {@link UnsupportedOperationException}, implementations should
-   * override this method to handle user specified schema.
-   *
-   * @param schema the user provided schema.
-   * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
-   *                           recovery. Readers for the same logical source in the same query
-   *                           will be given the same checkpointLocation.
-   * @param options the options for the returned data source reader, which is an immutable
-   *                case-insensitive string-to-string map.
-   */
-  default ContinuousReadSupport createContinuousReadSupport(
-      StructType schema,
-      String checkpointLocation,
-      DataSourceOptions options) {
-    return DataSourceV2Utils.failForUserSpecifiedSchema(this);
-  }
-
-  /**
-   * Creates a {@link ContinuousReadSupport} instance to scan the data from this streaming data
-   * source, which is called by Spark at the beginning of each continuous streaming query.
-   *
-   * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
-   *                           recovery. Readers for the same logical source in the same query
-   *                           will be given the same checkpointLocation.
-   * @param options the options for the returned data source reader, which is an immutable
-   *                case-insensitive string-to-string map.
-   */
-  ContinuousReadSupport createContinuousReadSupport(
-      String checkpointLocation,
-      DataSourceOptions options);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
index 4aaa57dd4db9..43bdcca70cb0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
@@ -20,7 +20,7 @@
 import org.apache.spark.annotation.Evolving;
 
 /**
- * TODO: remove it when we finish the API refactor for streaming side.
+ * TODO: remove it when we finish the API refactor for streaming write side.
  */
 @Evolving
 public interface DataSourceV2 {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfigBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
similarity index 59%
rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfigBuilder.java
rename to sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
index 4922962f7065..b7fa3f24a238 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfigBuilder.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
@@ -15,16 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.sources.v2;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
 
 /**
- * An interface for building the {@link ScanConfig}. Implementations can mixin those
- * SupportsPushDownXYZ interfaces to do operator pushdown, and keep the operator pushdown result in
- * the returned {@link ScanConfig}.
+ * An empty mix-in interface for {@link Table}, to indicate this table supports streaming scan with
+ * continuous mode.
+ * <p>
+ * If a {@link Table} implements this interface, the
+ * {@link SupportsRead#newScanBuilder(DataSourceOptions)} must return a {@link ScanBuilder} that
+ * builds {@link Scan} with {@link Scan#toContinuousStream(String)} implemented.
+ * </p>
  */
 @Evolving
-public interface ScanConfigBuilder {
-  ScanConfig build();
-}
+public interface SupportsContinuousRead extends SupportsRead { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
index bcfa1983abb8..28d80b7a5bc3 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
@@ -42,7 +42,7 @@ public interface Batch {
   InputPartition[] planInputPartitions();
 
   /**
-   * Returns a factory, which produces one {@link PartitionReader} for one {@link InputPartition}.
+   * Returns a factory to create a {@link PartitionReader} for each {@link InputPartition}.
    */
   PartitionReaderFactory createReaderFactory();
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/BatchReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/BatchReadSupport.java
deleted file mode 100644
index 518a8b03a2c6..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/BatchReadSupport.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.reader;
-
-import org.apache.spark.annotation.Evolving;
-
-/**
- * An interface that defines how to load the data from data source for batch processing.
- *
- * The execution engine will get an instance of this interface from a data source provider
- * (e.g. {@link org.apache.spark.sql.sources.v2.BatchReadSupportProvider}) at the start of a batch
- * query, then call {@link #newScanConfigBuilder()} and create an instance of {@link ScanConfig}.
- * The {@link ScanConfigBuilder} can apply operator pushdown and keep the pushdown result in
- * {@link ScanConfig}. The {@link ScanConfig} will be used to create input partitions and reader
- * factory to scan data from the data source with a Spark job.
- */
-@Evolving
-public interface BatchReadSupport extends ReadSupport {
-
-  /**
-   * Returns a builder of {@link ScanConfig}. Spark will call this method and create a
-   * {@link ScanConfig} for each data scanning job.
-   *
-   * The builder can take some query specific information to do operators pushdown, and keep these
-   * information in the created {@link ScanConfig}.
-   *
-   * This is the first step of the data scan. All other methods in {@link BatchReadSupport} needs
-   * to take {@link ScanConfig} as an input.
-   */
-  ScanConfigBuilder newScanConfigBuilder();
-
-  /**
-   * Returns a factory, which produces one {@link PartitionReader} for one {@link InputPartition}.
-   */
-  PartitionReaderFactory createReaderFactory(ScanConfig config);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
index 5f5248084bad..413349782efa 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
@@ -23,7 +23,7 @@
 
 /**
  * A serializable representation of an input partition returned by
- * {@link ReadSupport#planInputPartitions(ScanConfig)}.
+ * {@link Batch#planInputPartitions()} and the corresponding ones in streaming .
  *
  * Note that {@link InputPartition} will be serialized and sent to executors, then
  * {@link PartitionReader} will be created by
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportPartitioning.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportPartitioning.java
deleted file mode 100644
index 347a465905ac..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportPartitioning.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.reader;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
-
-/**
- * A mix in interface for {@link BatchReadSupport}. Data sources can implement this interface to
- * report data partitioning and try to avoid shuffle at Spark side.
- *
- * Note that, when a {@link ReadSupport} implementation creates exactly one {@link InputPartition},
- * Spark may avoid adding a shuffle even if the reader does not implement this interface.
- */
-@Evolving
-// TODO: remove it, after we finish the API refactor completely.
-public interface OldSupportsReportPartitioning extends ReadSupport {
-
-  /**
-   * Returns the output data partitioning that this reader guarantees.
-   */
-  Partitioning outputPartitioning(ScanConfig config);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportStatistics.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportStatistics.java
deleted file mode 100644
index 0d3ec17107c1..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/OldSupportsReportStatistics.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.reader;
-
-import org.apache.spark.annotation.Evolving;
-
-/**
- * A mix in interface for {@link BatchReadSupport}. Data sources can implement this interface to
- * report statistics to Spark.
- *
- * As of Spark 2.4, statistics are reported to the optimizer before any operator is pushed to the
- * data source. Implementations that return more accurate statistics based on pushed operators will
- * not improve query performance until the planner can push operators before getting stats.
- */
-@Evolving
-// TODO: remove it, after we finish the API refactor completely.
-public interface OldSupportsReportStatistics extends ReadSupport {
-
-  /**
-   * Returns the estimated statistics of this data source scan.
-   */
-  Statistics estimateStatistics(ScanConfig config);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ReadSupport.java
deleted file mode 100644
index b1f610a82e8a..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ReadSupport.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.reader;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.types.StructType;
-
-/**
- * The base interface for all the batch and streaming read supports. Data sources should implement
- * concrete read support interfaces like {@link BatchReadSupport}.
- *
- * If Spark fails to execute any methods in the implementations of this interface (by throwing an
- * exception), the read action will fail and no Spark job will be submitted.
- */
-@Evolving
-public interface ReadSupport {
-
-  /**
-   * Returns the full schema of this data source, which is usually the physical schema of the
-   * underlying storage. This full schema should not be affected by column pruning or other
-   * optimizations.
-   */
-  StructType fullSchema();
-
-  /**
-   * Returns a list of {@link InputPartition input partitions}. Each {@link InputPartition}
-   * represents a data split that can be processed by one Spark task. The number of input
-   * partitions returned here is the same as the number of RDD partitions this scan outputs.
-   *
-   * Note that, this may not be a full scan if the data source supports optimization like filter
-   * push-down. Implementations should check the input {@link ScanConfig} and adjust the resulting
-   * {@link InputPartition input partitions}.
-   */
-  InputPartition[] planInputPartitions(ScanConfig config);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
index c60fb2ba0b0b..25ab06eee42e 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.sources.v2.reader;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousStream;
 import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.sources.v2.SupportsBatchRead;
+import org.apache.spark.sql.sources.v2.SupportsContinuousRead;
 import org.apache.spark.sql.sources.v2.SupportsMicroBatchRead;
 import org.apache.spark.sql.sources.v2.Table;
 
@@ -65,7 +67,7 @@ default String description() {
    * @throws UnsupportedOperationException
    */
   default Batch toBatch() {
-    throw new UnsupportedOperationException("Batch scans are not supported");
+    throw new UnsupportedOperationException(description() + ": Batch scan are not supported");
   }
 
   /**
@@ -81,6 +83,22 @@ default Batch toBatch() {
    * @throws UnsupportedOperationException
    */
   default MicroBatchStream toMicroBatchStream(String checkpointLocation) {
-    throw new UnsupportedOperationException("Micro-batch scans are not supported");
+    throw new UnsupportedOperationException(description() + ": Micro-batch scan are not supported");
+  }
+
+  /**
+   * Returns the physical representation of this scan for streaming query with continuous mode. By
+   * default this method throws exception, data sources must overwrite this method to provide an
+   * implementation, if the {@link Table} that creates this scan implements
+   * {@link SupportsContinuousRead}.
+   *
+   * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
+   *                           recovery. Data streams for the same logical source in the same query
+   *                           will be given the same checkpointLocation.
+   *
+   * @throws UnsupportedOperationException
+   */
+  default ContinuousStream toContinuousStream(String checkpointLocation) {
+    throw new UnsupportedOperationException(description() + ": Continuous scan are not supported");
   }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfig.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfig.java
deleted file mode 100644
index c8cff68c2ef7..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanConfig.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.reader;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.types.StructType;
-
-/**
- * An interface that carries query specific information for the data scanning job, like operator
- * pushdown information and streaming query offsets. This is defined as an empty interface, and data
- * sources should define their own {@link ScanConfig} classes.
- *
- * For APIs that take a {@link ScanConfig} as input, like
- * {@link ReadSupport#planInputPartitions(ScanConfig)},
- * {@link BatchReadSupport#createReaderFactory(ScanConfig)} and
- * {@link OldSupportsReportStatistics#estimateStatistics(ScanConfig)}, implementations mostly need
- * to cast the input {@link ScanConfig} to the concrete {@link ScanConfig} class of the data source.
- */
-@Evolving
-public interface ScanConfig {
-
-  /**
-   * Returns the actual schema of this data source reader, which may be different from the physical
-   * schema of the underlying storage, as column pruning or other optimizations may happen.
-   *
-   * If this method fails (by throwing an exception), the action will fail and no Spark job will be
-   * submitted.
-   */
-  StructType readSchema();
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
index 60e71c5dd008..862bd14bffed 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
@@ -35,7 +35,7 @@ public interface SupportsPushDownRequiredColumns extends ScanBuilder {
    * also OK to do the pruning partially, e.g., a data source may not be able to prune nested
    * fields, and only prune top-level columns.
    *
-   * Note that, {@link ScanConfig#readSchema()} implementation should take care of the column
+   * Note that, {@link Scan#readSchema()} implementation should take care of the column
    * pruning applied here.
    */
   void pruneColumns(StructType requiredSchema);
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
index ba175812a88d..4ce97bc5e76b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
@@ -21,14 +21,14 @@
 import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
 
 /**
- * A mix in interface for {@link Batch}. Data sources can implement this interface to
+ * A mix in interface for {@link Scan}. Data sources can implement this interface to
  * report data partitioning and try to avoid shuffle at Spark side.
  *
- * Note that, when a {@link Batch} implementation creates exactly one {@link InputPartition},
+ * Note that, when a {@link Scan} implementation creates exactly one {@link InputPartition},
  * Spark may avoid adding a shuffle even if the reader does not implement this interface.
  */
 @Evolving
-public interface SupportsReportPartitioning extends Batch {
+public interface SupportsReportPartitioning extends Scan {
 
   /**
    * Returns the output data partitioning that this reader guarantees.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
index d9f5fb64083a..d7364af69e89 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
@@ -20,7 +20,7 @@
 import org.apache.spark.annotation.Evolving;
 
 /**
- * A mix in interface for {@link Batch}. Data sources can implement this interface to
+ * A mix in interface for {@link Scan}. Data sources can implement this interface to
  * report statistics to Spark.
  *
  * As of Spark 2.4, statistics are reported to the optimizer before any operator is pushed to the
@@ -28,7 +28,7 @@
  * not improve query performance until the planner can push operators before getting stats.
  */
 @Evolving
-public interface SupportsReportStatistics extends Batch {
+public interface SupportsReportStatistics extends Scan {
 
   /**
    * Returns the estimated statistics of this data source scan.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousReadSupport.java
deleted file mode 100644
index 2b784ac0e9f3..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousReadSupport.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.reader.streaming;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.streaming.BaseStreamingSource;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.ScanConfig;
-import org.apache.spark.sql.sources.v2.reader.ScanConfigBuilder;
-
-/**
- * An interface that defines how to load the data from data source for continuous streaming
- * processing.
- *
- * The execution engine will get an instance of this interface from a data source provider
- * (e.g. {@link org.apache.spark.sql.sources.v2.ContinuousReadSupportProvider}) at the start of a
- * streaming query, then call {@link #newScanConfigBuilder(Offset)} and create an instance of
- * {@link ScanConfig} for the duration of the streaming query or until
- * {@link #needsReconfiguration(ScanConfig)} is true. The {@link ScanConfig} will be used to create
- * input partitions and reader factory to scan data with a Spark job for its duration. At the end
- * {@link #stop()} will be called when the streaming execution is completed. Note that a single
- * query may have multiple executions due to restart or failure recovery.
- */
-@Evolving
-public interface ContinuousReadSupport extends StreamingReadSupport, BaseStreamingSource {
-
-  /**
-   * Returns a builder of {@link ScanConfig}. Spark will call this method and create a
-   * {@link ScanConfig} for each data scanning job.
-   *
-   * The builder can take some query specific information to do operators pushdown, store streaming
-   * offsets, etc., and keep these information in the created {@link ScanConfig}.
-   *
-   * This is the first step of the data scan. All other methods in {@link ContinuousReadSupport}
-   * needs to take {@link ScanConfig} as an input.
-   */
-  ScanConfigBuilder newScanConfigBuilder(Offset start);
-
-  /**
-   * Returns a factory, which produces one {@link ContinuousPartitionReader} for one
-   * {@link InputPartition}.
-   */
-  ContinuousPartitionReaderFactory createContinuousReaderFactory(ScanConfig config);
-
-  /**
-   * Merge partitioned offsets coming from {@link ContinuousPartitionReader} instances
-   * for each partition to a single global offset.
-   */
-  Offset mergeOffsets(PartitionOffset[] offsets);
-
-  /**
-   * The execution engine will call this method in every epoch to determine if new input
-   * partitions need to be generated, which may be required if for example the underlying
-   * source system has had partitions added or removed.
-   *
-   * If true, the query will be shut down and restarted with a new {@link ContinuousReadSupport}
-   * instance.
-   */
-  default boolean needsReconfiguration(ScanConfig config) {
-    return false;
-  }
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java
new file mode 100644
index 000000000000..fff5b95a4de1
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.reader.streaming;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.sources.v2.reader.InputPartition;
+import org.apache.spark.sql.sources.v2.reader.Scan;
+
+/**
+ * A {@link SparkDataStream} for streaming queries with continuous mode.
+ */
+@Evolving
+public interface ContinuousStream extends SparkDataStream {
+
+  /**
+   * Returns a list of {@link InputPartition input partitions} given the start offset. Each
+   * {@link InputPartition} represents a data split that can be processed by one Spark task. The
+   * number of input partitions returned here is the same as the number of RDD partitions this scan
+   * outputs.
+   * <p>
+   * If the {@link Scan} supports filter pushdown, this stream is likely configured with a filter
+   * and is responsible for creating splits for that filter, which is not a full scan.
+   * </p>
+   * <p>
+   * This method will be called to launch one Spark job for reading the data stream. It will be
+   * called more than once, if {@link #needsReconfiguration()} returns true and Spark needs to
+   * launch a new job.
+   * </p>
+   */
+  InputPartition[] planInputPartitions(Offset start);
+
+  /**
+   * Returns a factory to create a {@link ContinuousPartitionReader} for each
+   * {@link InputPartition}.
+   */
+  ContinuousPartitionReaderFactory createContinuousReaderFactory();
+
+  /**
+   * Merge partitioned offsets coming from {@link ContinuousPartitionReader} instances
+   * for each partition to a single global offset.
+   */
+  Offset mergeOffsets(PartitionOffset[] offsets);
+
+  /**
+   * The execution engine will call this method in every epoch to determine if new input
+   * partitions need to be generated, which may be required if for example the underlying
+   * source system has had partitions added or removed.
+   *
+   * If true, the Spark job to scan this continuous data stream will be interrupted and Spark will
+   * launch it again with a new list of {@link InputPartition input partitions}.
+   */
+  default boolean needsReconfiguration() {
+    return false;
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
index 2fb3957293df..330f07ba4f2f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
@@ -51,7 +51,7 @@ public interface MicroBatchStream extends SparkDataStream {
   InputPartition[] planInputPartitions(Offset start, Offset end);
 
   /**
-   * Returns a factory, which produces one {@link PartitionReader} for one {@link InputPartition}.
+   * Returns a factory to create a {@link PartitionReader} for each {@link InputPartition}.
    */
   PartitionReaderFactory createReaderFactory();
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
index 67bff0c27e8a..a06671383ac5 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
@@ -21,7 +21,7 @@
 
 /**
  * An abstract representation of progress through a {@link MicroBatchStream} or
- * {@link ContinuousReadSupport}.
+ * {@link ContinuousStream}.
  * During execution, offsets provided by the data source implementation will be logged and used as
  * restart checkpoints. Each source should provide an offset implementation which the source can use
  * to reconstruct a position in the stream up to which data has been seen/processed.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
index 8ea34be8d839..30f38ce37c40 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
@@ -24,7 +24,8 @@
  * The base interface representing a readable data stream in a Spark streaming query. It's
  * responsible to manage the offsets of the streaming source in the streaming query.
  *
- * Data sources should implement concrete data stream interfaces: {@link MicroBatchStream}.
+ * Data sources should implement concrete data stream interfaces:
+ * {@link MicroBatchStream} and {@link ContinuousStream}.
  */
 @Evolving
 public interface SparkDataStream extends BaseStreamingSource {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
deleted file mode 100644
index 9a8c1bdd23be..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/StreamingReadSupport.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.reader.streaming;
-
-import com.google.common.annotations.VisibleForTesting;
-
-import org.apache.spark.sql.sources.v2.reader.ReadSupport;
-
-/**
- * A base interface for streaming read support. Data sources should implement concrete streaming
- * read support interfaces: {@link ContinuousReadSupport}.
- * This is exposed for a testing purpose.
- */
-@VisibleForTesting
-public interface StreamingReadSupport extends ReadSupport {
-
-  /**
-   * Returns the initial offset for a streaming query to start reading from. Note that the
-   * streaming data source should not assume that it will start reading from its initial offset:
-   * if Spark is restarting an existing query, it will restart from the check-pointed offset rather
-   * than the initial one.
-   */
-  Offset initialOffset();
-
-  /**
-   * Deserialize a JSON string into an Offset of the implementation-defined offset type.
-   *
-   * @throws IllegalArgumentException if the JSON does not encode a valid offset for this reader
-   */
-  Offset deserializeOffset(String json);
-
-  /**
-   * Informs the source that Spark has completed processing all data for offsets less than or
-   * equal to `end` and will only request offsets greater than `end` in the future.
-   */
-  void commit(Offset end);
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
new file mode 100644
index 000000000000..c7fcc6723e45
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.sources.v2.reader._
+
+/**
+ * Physical plan node for scanning a batch of data from a data source v2.
+ */
+case class BatchScanExec(
+    output: Seq[AttributeReference],
+    @transient scan: Scan) extends DataSourceV2ScanExecBase {
+
+  @transient lazy val batch = scan.toBatch
+
+  // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
+  override def equals(other: Any): Boolean = other match {
+    case other: BatchScanExec => this.batch == other.batch
+    case _ => false
+  }
+
+  override def hashCode(): Int = batch.hashCode()
+
+  override lazy val partitions: Seq[InputPartition] = batch.planInputPartitions()
+
+  override lazy val readerFactory: PartitionReaderFactory = batch.createReaderFactory()
+
+  override lazy val inputRDD: RDD[InternalRow] = {
+    new DataSourceRDD(sparkContext, partitions, readerFactory, supportsBatch)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
index c735b0ef68a0..f54ff608a53e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
@@ -20,99 +20,44 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.physical
-import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
-import org.apache.spark.sql.execution.{ColumnarBatchScan, LeafExecNode, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.streaming.continuous._
-import org.apache.spark.sql.sources.v2.DataSourceV2
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReaderFactory, ContinuousReadSupport}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, Offset}
 
 /**
  * Physical plan node for scanning data from a streaming data source with continuous mode.
  */
-// TODO: merge it and `MicroBatchScanExec`.
 case class ContinuousScanExec(
-    output: Seq[AttributeReference],
-    @transient source: DataSourceV2,
-    @transient options: Map[String, String],
-    @transient pushedFilters: Seq[Expression],
-    @transient readSupport: ReadSupport,
-    @transient scanConfig: ScanConfig)
-  extends LeafExecNode with DataSourceV2StringFormat with ColumnarBatchScan {
-
-  override def simpleString(maxFields: Int): String = "ScanV2 " + metadataString(maxFields)
+    output: Seq[Attribute],
+    @transient scan: Scan,
+    @transient stream: ContinuousStream,
+    @transient start: Offset) extends DataSourceV2ScanExecBase {
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
-    case other: ContinuousScanExec =>
-      output == other.output && readSupport.getClass == other.readSupport.getClass &&
-        options == other.options
+    case other: ContinuousScanExec => this.stream == other.stream
     case _ => false
   }
 
-  override def hashCode(): Int = {
-    Seq(output, source, options).hashCode()
-  }
-
-  override def outputPartitioning: physical.Partitioning = readSupport match {
-    case _ if partitions.length == 1 =>
-      SinglePartition
+  override def hashCode(): Int = stream.hashCode()
 
-    case s: OldSupportsReportPartitioning =>
-      new DataSourcePartitioning(
-        s.outputPartitioning(scanConfig), AttributeMap(output.map(a => a -> a.name)))
+  override lazy val partitions: Seq[InputPartition] = stream.planInputPartitions(start)
 
-    case _ => super.outputPartitioning
+  override lazy val readerFactory: ContinuousPartitionReaderFactory = {
+    stream.createContinuousReaderFactory()
   }
 
-  private lazy val partitions: Seq[InputPartition] = readSupport.planInputPartitions(scanConfig)
-
-  private lazy val readerFactory = readSupport match {
-    case r: ContinuousReadSupport => r.createContinuousReaderFactory(scanConfig)
-    case _ => throw new IllegalStateException("unknown read support: " + readSupport)
-  }
-
-  override val supportsBatch: Boolean = {
-    require(partitions.forall(readerFactory.supportColumnarReads) ||
-      !partitions.exists(readerFactory.supportColumnarReads),
-      "Cannot mix row-based and columnar input partitions.")
-
-    partitions.exists(readerFactory.supportColumnarReads)
-  }
-
-  private lazy val inputRDD: RDD[InternalRow] = readSupport match {
-    case _: ContinuousReadSupport =>
-      assert(!supportsBatch,
-        "continuous stream reader does not support columnar read yet.")
-      EpochCoordinatorRef.get(
-          sparkContext.getLocalProperty(ContinuousExecution.EPOCH_COORDINATOR_ID_KEY),
-          sparkContext.env)
-        .askSync[Unit](SetReaderPartitions(partitions.size))
-      new ContinuousDataSourceRDD(
-        sparkContext,
-        sqlContext.conf.continuousStreamingExecutorQueueSize,
-        sqlContext.conf.continuousStreamingExecutorPollIntervalMs,
-        partitions,
-        schema,
-        readerFactory.asInstanceOf[ContinuousPartitionReaderFactory])
-
-    case _ =>
-      new DataSourceRDD(
-        sparkContext, partitions, readerFactory.asInstanceOf[PartitionReaderFactory], supportsBatch)
-  }
-
-  override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(inputRDD)
-
-  override protected def doExecute(): RDD[InternalRow] = {
-    if (supportsBatch) {
-      WholeStageCodegenExec(this)(codegenStageId = 0).execute()
-    } else {
-      val numOutputRows = longMetric("numOutputRows")
-      inputRDD.map { r =>
-        numOutputRows += 1
-        r
-      }
-    }
+  override lazy val inputRDD: RDD[InternalRow] = {
+    EpochCoordinatorRef.get(
+      sparkContext.getLocalProperty(ContinuousExecution.EPOCH_COORDINATOR_ID_KEY),
+      sparkContext.env)
+      .askSync[Unit](SetReaderPartitions(partitions.size))
+    new ContinuousDataSourceRDD(
+      sparkContext,
+      sqlContext.conf.continuousStreamingExecutorQueueSize,
+      sqlContext.conf.continuousStreamingExecutorPollIntervalMs,
+      partitions,
+      schema,
+      readerFactory.asInstanceOf[ContinuousPartitionReaderFactory])
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 63e97e67dc64..47cf26dc9481 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.sources.v2._
@@ -58,8 +58,6 @@ case class DataSourceV2Relation(
     case _ => throw new AnalysisException(s"Table is not readable: ${table.name()}")
   }
 
-
-
   def newWriteBuilder(schema: StructType): WriteBuilder = table match {
     case s: SupportsBatchWrite =>
       val dsOptions = new DataSourceOptions(options.asJava)
@@ -94,7 +92,7 @@ case class DataSourceV2Relation(
  */
 case class StreamingDataSourceV2Relation(
     output: Seq[Attribute],
-    scanDesc: String,
+    scan: Scan,
     stream: SparkDataStream,
     startOffset: Option[Offset] = None,
     endOffset: Option[Offset] = None)
@@ -104,7 +102,7 @@ case class StreamingDataSourceV2Relation(
 
   override def newInstance(): LogicalPlan = copy(output = output.map(_.newInstance()))
 
-  override def computeStats(): Statistics = stream match {
+  override def computeStats(): Statistics = scan match {
     case r: SupportsReportStatistics =>
       val statistics = r.estimateStatistics()
       Statistics(sizeInBytes = statistics.sizeInBytes().orElse(conf.defaultSizeInBytes))
@@ -113,46 +111,6 @@ case class StreamingDataSourceV2Relation(
   }
 }
 
-// TODO: remove it after finish API refactor for continuous streaming.
-case class OldStreamingDataSourceV2Relation(
-    output: Seq[AttributeReference],
-    source: DataSourceV2,
-    options: Map[String, String],
-    readSupport: ReadSupport,
-    scanConfigBuilder: ScanConfigBuilder)
-  extends LeafNode with MultiInstanceRelation with DataSourceV2StringFormat {
-
-  override def isStreaming: Boolean = true
-
-  override def simpleString(maxFields: Int): String = {
-    "Streaming RelationV2 " + metadataString(maxFields)
-  }
-
-  override def pushedFilters: Seq[Expression] = Nil
-
-  override def newInstance(): LogicalPlan = copy(output = output.map(_.newInstance()))
-
-  // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
-  override def equals(other: Any): Boolean = other match {
-    case other: OldStreamingDataSourceV2Relation =>
-      output == other.output && readSupport.getClass == other.readSupport.getClass &&
-        options == other.options
-    case _ => false
-  }
-
-  override def hashCode(): Int = {
-    Seq(output, source, options).hashCode()
-  }
-
-  override def computeStats(): Statistics = readSupport match {
-    case r: OldSupportsReportStatistics =>
-      val statistics = r.estimateStatistics(scanConfigBuilder.build())
-      Statistics(sizeInBytes = statistics.sizeInBytes().orElse(conf.defaultSizeInBytes))
-    case _ =>
-      Statistics(sizeInBytes = conf.defaultSizeInBytes)
-  }
-}
-
 object DataSourceV2Relation {
   def create(table: Table, options: Map[String, String]): DataSourceV2Relation = {
     val output = table.schema().toAttributes
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
similarity index 66%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
index 53e4e77c65e2..da71e7873b52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
@@ -19,39 +19,26 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.AttributeMap
 import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.{ColumnarBatchScan, LeafExecNode, WholeStageCodegenExec}
-import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReaderFactory, Scan, SupportsReportPartitioning}
 
-/**
- * Physical plan node for scanning a batch of data from a data source.
- */
-case class DataSourceV2ScanExec(
-    output: Seq[AttributeReference],
-    scanDesc: String,
-    @transient batch: Batch)
-  extends LeafExecNode with ColumnarBatchScan {
-
-  override def simpleString(maxFields: Int): String = {
-    s"ScanV2${truncatedString(output, "[", ", ", "]", maxFields)} $scanDesc"
-  }
+trait DataSourceV2ScanExecBase extends LeafExecNode with ColumnarBatchScan {
 
-  // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
-  override def equals(other: Any): Boolean = other match {
-    case other: DataSourceV2ScanExec => this.batch == other.batch
-    case _ => false
-  }
+  def scan: Scan
 
-  override def hashCode(): Int = batch.hashCode()
+  def partitions: Seq[InputPartition]
 
-  private lazy val partitions = batch.planInputPartitions()
+  def readerFactory: PartitionReaderFactory
 
-  private lazy val readerFactory = batch.createReaderFactory()
+  override def simpleString(maxFields: Int): String = {
+    s"$nodeName${truncatedString(output, "[", ", ", "]", maxFields)} ${scan.description()}"
+  }
 
-  override def outputPartitioning: physical.Partitioning = batch match {
+  override def outputPartitioning: physical.Partitioning = scan match {
     case _ if partitions.length == 1 =>
       SinglePartition
 
@@ -70,13 +57,11 @@ case class DataSourceV2ScanExec(
     partitions.exists(readerFactory.supportColumnarReads)
   }
 
-  private lazy val inputRDD: RDD[InternalRow] = {
-    new DataSourceRDD(sparkContext, partitions, readerFactory, supportsBatch)
-  }
+  def inputRDD: RDD[InternalRow]
 
   override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(inputRDD)
 
-  override protected def doExecute(): RDD[InternalRow] = {
+  override def doExecute(): RDD[InternalRow] = {
     if (supportsBatch) {
       WholeStageCodegenExec(this)(codegenStageId = 0).execute()
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index b4c547104c4b..40ac5cf40298 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchStream}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode
 
 object DataSourceV2Strategy extends Strategy {
@@ -117,7 +117,7 @@ object DataSourceV2Strategy extends Strategy {
            |Output: ${output.mkString(", ")}
          """.stripMargin)
 
-      val plan = DataSourceV2ScanExec(output, scan.description(), scan.toBatch)
+      val plan = BatchScanExec(output, scan)
 
       val filterCondition = postScanFilters.reduceLeftOption(And)
       val withFilter = filterCondition.map(FilterExec(_, plan)).getOrElse(plan)
@@ -130,15 +130,14 @@ object DataSourceV2Strategy extends Strategy {
       // ensure there is a projection, which will produce unsafe rows required by some operators
       ProjectExec(r.output,
         MicroBatchScanExec(
-          r.output, r.scanDesc, microBatchStream, r.startOffset.get, r.endOffset.get)) :: Nil
+          r.output, r.scan, microBatchStream, r.startOffset.get, r.endOffset.get)) :: Nil
 
-    case r: OldStreamingDataSourceV2Relation =>
-      // TODO: support operator pushdown for streaming data sources.
-      val scanConfig = r.scanConfigBuilder.build()
+    case r: StreamingDataSourceV2Relation if r.startOffset.isDefined && r.endOffset.isEmpty =>
+      val continuousStream = r.stream.asInstanceOf[ContinuousStream]
       // ensure there is a projection, which will produce unsafe rows required by some operators
       ProjectExec(r.output,
         ContinuousScanExec(
-          r.output, r.source, r.options, r.pushedFilters, r.readSupport, scanConfig)) :: Nil
+          r.output, r.scan, continuousStream, r.startOffset.get)) :: Nil
 
     case WriteToDataSourceV2(writer, query) =>
       WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil
@@ -158,8 +157,7 @@ object DataSourceV2Strategy extends Strategy {
 
     case Repartition(1, false, child) =>
       val isContinuous = child.find {
-        case s: OldStreamingDataSourceV2Relation =>
-          s.readSupport.isInstanceOf[ContinuousReadSupport]
+        case r: StreamingDataSourceV2Relation => r.stream.isInstanceOf[ContinuousStream]
         case _ => false
       }.isDefined
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
index feea8bcb80c8..d2e33d4fa77c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
@@ -19,12 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
-import org.apache.spark.sql.catalyst.plans.physical
-import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.execution.{ColumnarBatchScan, LeafExecNode, WholeStageCodegenExec}
-import org.apache.spark.sql.sources.v2.reader.SupportsReportPartitioning
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReaderFactory, Scan}
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 
 /**
@@ -32,14 +28,10 @@ import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offse
  */
 case class MicroBatchScanExec(
     output: Seq[Attribute],
-    scanDesc: String,
+    @transient scan: Scan,
     @transient stream: MicroBatchStream,
     @transient start: Offset,
-    @transient end: Offset) extends LeafExecNode with ColumnarBatchScan {
-
-  override def simpleString(maxFields: Int): String = {
-    s"ScanV2${truncatedString(output, "[", ", ", "]", maxFields)} $scanDesc"
-  }
+    @transient end: Offset) extends DataSourceV2ScanExecBase {
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
@@ -49,44 +41,11 @@ case class MicroBatchScanExec(
 
   override def hashCode(): Int = stream.hashCode()
 
-  private lazy val partitions = stream.planInputPartitions(start, end)
-
-  private lazy val readerFactory = stream.createReaderFactory()
-
-  override def outputPartitioning: physical.Partitioning = stream match {
-    case _ if partitions.length == 1 =>
-      SinglePartition
-
-    case s: SupportsReportPartitioning =>
-      new DataSourcePartitioning(
-        s.outputPartitioning(), AttributeMap(output.map(a => a -> a.name)))
-
-    case _ => super.outputPartitioning
-  }
-
-  override def supportsBatch: Boolean = {
-    require(partitions.forall(readerFactory.supportColumnarReads) ||
-      !partitions.exists(readerFactory.supportColumnarReads),
-      "Cannot mix row-based and columnar input partitions.")
+  override lazy val partitions: Seq[InputPartition] = stream.planInputPartitions(start, end)
 
-    partitions.exists(readerFactory.supportColumnarReads)
-  }
+  override lazy val readerFactory: PartitionReaderFactory = stream.createReaderFactory()
 
-  private lazy val inputRDD: RDD[InternalRow] = {
+  override lazy val inputRDD: RDD[InternalRow] = {
     new DataSourceRDD(sparkContext, partitions, readerFactory, supportsBatch)
   }
-
-  override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(inputRDD)
-
-  override protected def doExecute(): RDD[InternalRow] = {
-    if (supportsBatch) {
-      WholeStageCodegenExec(this)(codegenStageId = 0).execute()
-    } else {
-      val numOutputRows = longMetric("numOutputRows")
-      inputRDD.map { r =>
-        numOutputRows += 1
-        r
-      }
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 792e3a38dda5..2c339759f95b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -99,7 +99,7 @@ class MicroBatchExecution(
           // TODO: operator pushdown.
           val scan = table.newScanBuilder(dsOptions).build()
           val stream = scan.toMicroBatchStream(metadataPath)
-          StreamingDataSourceV2Relation(output, scan.description(), stream)
+          StreamingDataSourceV2Relation(output, scan, stream)
         })
       case s @ StreamingRelationV2(ds, dsName, _, _, output, v1Relation) =>
         v2ToExecutionRelationMap.getOrElseUpdate(s, {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/SimpleStreamingScanConfigBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/SimpleStreamingScanConfigBuilder.scala
deleted file mode 100644
index 1be071614d92..000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/SimpleStreamingScanConfigBuilder.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming
-
-import org.apache.spark.sql.sources.v2.reader.{ScanConfig, ScanConfigBuilder}
-import org.apache.spark.sql.types.StructType
-
-/**
- * A very simple [[ScanConfigBuilder]] implementation that creates a simple [[ScanConfig]] to
- * carry schema and offsets for streaming data sources.
- */
-class SimpleStreamingScanConfigBuilder(
-    schema: StructType,
-    start: Offset,
-    end: Option[Offset] = None)
-  extends ScanConfigBuilder {
-
-  override def build(): ScanConfig = SimpleStreamingScanConfig(schema, start, end)
-}
-
-case class SimpleStreamingScanConfig(
-    readSchema: StructType,
-    start: Offset,
-    end: Option[Offset])
-  extends ScanConfig
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 535fa1c70b3f..83d38dcade7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceV2, Table}
+import org.apache.spark.sql.sources.v2.{DataSourceV2, Table}
 
 object StreamingRelation {
   def apply(dataSource: DataSource): StreamingRelation = {
@@ -110,30 +110,6 @@ case class StreamingRelationV2(
   override def newInstance(): LogicalPlan = this.copy(output = output.map(_.newInstance()))(session)
 }
 
-/**
- * Used to link a [[DataSourceV2]] into a continuous processing execution.
- */
-case class ContinuousExecutionRelation(
-    source: ContinuousReadSupportProvider,
-    extraOptions: Map[String, String],
-    output: Seq[Attribute])(session: SparkSession)
-  extends LeafNode with MultiInstanceRelation {
-
-  override def otherCopyArgs: Seq[AnyRef] = session :: Nil
-  override def isStreaming: Boolean = true
-  override def toString: String = source.toString
-
-  // There's no sensible value here. On the execution path, this relation will be
-  // swapped out with microbatches. But some dataframe operations (in particular explain) do lead
-  // to this node surviving analysis. So we satisfy the LeafNode contract with the session default
-  // value.
-  override def computeStats(): Statistics = Statistics(
-    sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes)
-  )
-
-  override def newInstance(): LogicalPlan = this.copy(output = output.map(_.newInstance()))(session)
-}
-
 /**
  * A dummy physical plan for [[StreamingRelation]] to support
  * [[org.apache.spark.sql.Dataset.explain]]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index c74fa141372d..b22795d20776 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -22,20 +22,18 @@ import java.util.concurrent.TimeUnit
 import java.util.function.UnaryOperator
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.{ArrayBuffer, Map => MutableMap}
+import scala.collection.mutable.{Map => MutableMap}
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentDate, CurrentTimestamp}
+import org.apache.spark.sql.catalyst.expressions.{CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
-import org.apache.spark.sql.execution.datasources.v2.{ContinuousScanExec, OldStreamingDataSourceV2Relation}
-import org.apache.spark.sql.execution.streaming.{ContinuousExecutionRelation, StreamingRelationV2, _}
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
+import org.apache.spark.sql.execution.streaming.{StreamingRelationV2, _}
 import org.apache.spark.sql.sources.v2
-import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions, StreamingWriteSupportProvider}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, PartitionOffset}
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, StreamingWriteSupportProvider, SupportsContinuousRead}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
 
@@ -54,25 +52,39 @@ class ContinuousExecution(
     sparkSession, name, checkpointRoot, analyzedPlan, sink,
     trigger, triggerClock, outputMode, deleteCheckpointOnStop) {
 
-  @volatile protected var continuousSources: Seq[ContinuousReadSupport] = Seq()
-  override protected def sources: Seq[BaseStreamingSource] = continuousSources
+  @volatile protected var sources: Seq[ContinuousStream] = Seq()
 
   // For use only in test harnesses.
   private[sql] var currentEpochCoordinatorId: String = _
 
   override val logicalPlan: LogicalPlan = {
-    val toExecutionRelationMap = MutableMap[StreamingRelationV2, ContinuousExecutionRelation]()
-    analyzedPlan.transform {
-      case r @ StreamingRelationV2(
-          source: ContinuousReadSupportProvider, _, _, extraReaderOptions, output, _) =>
-        // TODO: shall we create `ContinuousReadSupport` here instead of each reconfiguration?
-        toExecutionRelationMap.getOrElseUpdate(r, {
-          ContinuousExecutionRelation(source, extraReaderOptions, output)(sparkSession)
+    val v2ToRelationMap = MutableMap[StreamingRelationV2, StreamingDataSourceV2Relation]()
+    var nextSourceId = 0
+    val _logicalPlan = analyzedPlan.transform {
+      case s @ StreamingRelationV2(
+          ds, dsName, table: SupportsContinuousRead, options, output, _) =>
+        v2ToRelationMap.getOrElseUpdate(s, {
+          val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
+          nextSourceId += 1
+          logInfo(s"Reading table [$table] from DataSourceV2 named '$dsName' [$ds]")
+          val dsOptions = new DataSourceOptions(options.asJava)
+          // TODO: operator pushdown.
+          val scan = table.newScanBuilder(dsOptions).build()
+          val stream = scan.toContinuousStream(metadataPath)
+          StreamingDataSourceV2Relation(output, scan, stream)
         })
+
       case StreamingRelationV2(_, sourceName, _, _, _, _) =>
         throw new UnsupportedOperationException(
           s"Data source $sourceName does not support continuous processing.")
     }
+
+    sources = _logicalPlan.collect {
+      case r: StreamingDataSourceV2Relation => r.stream.asInstanceOf[ContinuousStream]
+    }
+    uniqueSources = sources.distinct
+
+    _logicalPlan
   }
 
   private val triggerExecutor = trigger match {
@@ -92,6 +104,8 @@ class ContinuousExecution(
     do {
       runContinuous(sparkSessionForStream)
     } while (state.updateAndGet(stateUpdate) == ACTIVE)
+
+    stopSources()
   }
 
   /**
@@ -135,7 +149,7 @@ class ContinuousExecution(
         updateStatusMessage("Starting new streaming query")
         logInfo(s"Starting new streaming query.")
         currentBatchId = 0
-        OffsetSeq.fill(continuousSources.map(_ => null): _*)
+        OffsetSeq.fill(sources.map(_ => null): _*)
     }
   }
 
@@ -144,47 +158,17 @@ class ContinuousExecution(
    * @param sparkSessionForQuery Isolated [[SparkSession]] to run the continuous query with.
    */
   private def runContinuous(sparkSessionForQuery: SparkSession): Unit = {
-    // A list of attributes that will need to be updated.
-    val replacements = new ArrayBuffer[(Attribute, Attribute)]
-    // Translate from continuous relation to the underlying data source.
-    var nextSourceId = 0
-    continuousSources = logicalPlan.collect {
-      case ContinuousExecutionRelation(dataSource, extraReaderOptions, output) =>
-        val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
-        nextSourceId += 1
-
-        dataSource.createContinuousReadSupport(
-          metadataPath,
-          new DataSourceOptions(extraReaderOptions.asJava))
-    }
-    uniqueSources = continuousSources.distinct
-
     val offsets = getStartOffsets(sparkSessionForQuery)
 
-    var insertedSourceId = 0
-    val withNewSources = logicalPlan transform {
-      case ContinuousExecutionRelation(source, options, output) =>
-        val readSupport = continuousSources(insertedSourceId)
-        insertedSourceId += 1
-        val newOutput = readSupport.fullSchema().toAttributes
-        val maxFields = SQLConf.get.maxToStringFields
-        assert(output.size == newOutput.size,
-          s"Invalid reader: ${truncatedString(output, ",", maxFields)} != " +
-            s"${truncatedString(newOutput, ",", maxFields)}")
-        replacements ++= output.zip(newOutput)
-
+    val withNewSources: LogicalPlan = logicalPlan transform {
+      case relation: StreamingDataSourceV2Relation =>
         val loggedOffset = offsets.offsets(0)
-        val realOffset = loggedOffset.map(off => readSupport.deserializeOffset(off.json))
-        val startOffset = realOffset.getOrElse(readSupport.initialOffset)
-        val scanConfigBuilder = readSupport.newScanConfigBuilder(startOffset)
-        OldStreamingDataSourceV2Relation(newOutput, source, options, readSupport, scanConfigBuilder)
+        val realOffset = loggedOffset.map(off => relation.stream.deserializeOffset(off.json))
+        val startOffset = realOffset.getOrElse(relation.stream.initialOffset)
+        relation.copy(startOffset = Some(startOffset))
     }
 
-    // Rewire the plan to use the new attributes that were returned by the source.
-    val replacementMap = AttributeMap(replacements)
-    val triggerLogicalPlan = withNewSources transformAllExpressions {
-      case a: Attribute if replacementMap.contains(a) =>
-        replacementMap(a).withMetadata(a.metadata)
+    withNewSources.transformAllExpressions {
       case (_: CurrentTimestamp | _: CurrentDate) =>
         throw new IllegalStateException(
           "CurrentTimestamp and CurrentDate not yet supported for continuous processing")
@@ -192,15 +176,15 @@ class ContinuousExecution(
 
     val writer = sink.createStreamingWriteSupport(
       s"$runId",
-      triggerLogicalPlan.schema,
+      withNewSources.schema,
       outputMode,
       new DataSourceOptions(extraOptions.asJava))
-    val withSink = WriteToContinuousDataSource(writer, triggerLogicalPlan)
+    val planWithSink = WriteToContinuousDataSource(writer, withNewSources)
 
     reportTimeTaken("queryPlanning") {
       lastExecution = new IncrementalExecution(
         sparkSessionForQuery,
-        withSink,
+        planWithSink,
         outputMode,
         checkpointFile("state"),
         id,
@@ -210,10 +194,9 @@ class ContinuousExecution(
       lastExecution.executedPlan // Force the lazy generation of execution plan
     }
 
-    val (readSupport, scanConfig) = lastExecution.executedPlan.collect {
-      case scan: ContinuousScanExec
-          if scan.readSupport.isInstanceOf[ContinuousReadSupport] =>
-        scan.readSupport.asInstanceOf[ContinuousReadSupport] -> scan.scanConfig
+    val stream = planWithSink.collect {
+      case relation: StreamingDataSourceV2Relation =>
+        relation.stream.asInstanceOf[ContinuousStream]
     }.head
 
     sparkSessionForQuery.sparkContext.setLocalProperty(
@@ -233,16 +216,14 @@ class ContinuousExecution(
     // Use the parent Spark session for the endpoint since it's where this query ID is registered.
     val epochEndpoint =
       EpochCoordinatorRef.create(
-        writer, readSupport, this, epochCoordinatorId, currentBatchId, sparkSession, SparkEnv.get)
+        writer, stream, this, epochCoordinatorId, currentBatchId, sparkSession, SparkEnv.get)
     val epochUpdateThread = new Thread(new Runnable {
       override def run: Unit = {
         try {
           triggerExecutor.execute(() => {
             startTrigger()
 
-            val shouldReconfigure = readSupport.needsReconfiguration(scanConfig) &&
-              state.compareAndSet(ACTIVE, RECONFIGURING)
-            if (shouldReconfigure) {
+            if (stream.needsReconfiguration && state.compareAndSet(ACTIVE, RECONFIGURING)) {
               if (queryExecutionThread.isAlive) {
                 queryExecutionThread.interrupt()
               }
@@ -289,7 +270,6 @@ class ContinuousExecution(
       epochUpdateThread.interrupt()
       epochUpdateThread.join()
 
-      stopSources()
       sparkSession.sparkContext.cancelJobGroup(runId.toString)
     }
   }
@@ -299,11 +279,11 @@ class ContinuousExecution(
    */
   def addOffset(
       epoch: Long,
-      readSupport: ContinuousReadSupport,
+      stream: ContinuousStream,
       partitionOffsets: Seq[PartitionOffset]): Unit = {
-    assert(continuousSources.length == 1, "only one continuous source supported currently")
+    assert(sources.length == 1, "only one continuous source supported currently")
 
-    val globalOffset = readSupport.mergeOffsets(partitionOffsets.toArray)
+    val globalOffset = stream.mergeOffsets(partitionOffsets.toArray)
     val oldOffset = synchronized {
       offsetLog.add(epoch, OffsetSeq.fill(globalOffset))
       offsetLog.get(epoch - 1)
@@ -329,7 +309,7 @@ class ContinuousExecution(
   def commit(epoch: Long): Unit = {
     updateStatusMessage(s"Committing epoch $epoch")
 
-    assert(continuousSources.length == 1, "only one continuous source supported currently")
+    assert(sources.length == 1, "only one continuous source supported currently")
     assert(offsetLog.get(epoch).isDefined, s"offset for epoch $epoch not reported before commit")
 
     synchronized {
@@ -338,9 +318,9 @@ class ContinuousExecution(
       if (queryExecutionThread.isAlive) {
         commitLog.add(epoch, CommitMetadata())
         val offset =
-          continuousSources(0).deserializeOffset(offsetLog.get(epoch).get.offsets(0).get.json)
-        committedOffsets ++= Seq(continuousSources(0) -> offset)
-        continuousSources(0).commit(offset.asInstanceOf[v2.reader.streaming.Offset])
+          sources(0).deserializeOffset(offsetLog.get(epoch).get.offsets(0).get.json)
+        committedOffsets ++= Seq(sources(0) -> offset)
+        sources(0).commit(offset.asInstanceOf[v2.reader.streaming.Offset])
       } else {
         return
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
index a6cde2b8a710..48ff70f9c9d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
@@ -22,23 +22,22 @@ import org.json4s.jackson.Serialization
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.streaming.{RateStreamOffset, SimpleStreamingScanConfig, SimpleStreamingScanConfigBuilder, ValueRunTimeMsPair}
-import org.apache.spark.sql.execution.streaming.sources.RateStreamProvider
+import org.apache.spark.sql.execution.streaming.{RateStreamOffset, ValueRunTimeMsPair}
 import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
-import org.apache.spark.sql.types.StructType
 
 case class RateStreamPartitionOffset(
    partition: Int, currentValue: Long, currentTimeMs: Long) extends PartitionOffset
 
-class RateStreamContinuousReadSupport(options: DataSourceOptions) extends ContinuousReadSupport {
+class RateStreamContinuousStream(
+    rowsPerSecond: Long,
+    numPartitions: Int,
+    options: DataSourceOptions) extends ContinuousStream {
   implicit val defaultFormats: DefaultFormats = DefaultFormats
 
   val creationTime = System.currentTimeMillis()
 
-  val numPartitions = options.get(RateStreamProvider.NUM_PARTITIONS).orElse("5").toInt
-  val rowsPerSecond = options.get(RateStreamProvider.ROWS_PER_SECOND).orElse("6").toLong
   val perPartitionRate = rowsPerSecond.toDouble / numPartitions.toDouble
 
   override def mergeOffsets(offsets: Array[PartitionOffset]): Offset = {
@@ -54,18 +53,10 @@ class RateStreamContinuousReadSupport(options: DataSourceOptions) extends Contin
     RateStreamOffset(Serialization.read[Map[Int, ValueRunTimeMsPair]](json))
   }
 
-  override def fullSchema(): StructType = RateStreamProvider.SCHEMA
-
-  override def newScanConfigBuilder(start: Offset): ScanConfigBuilder = {
-    new SimpleStreamingScanConfigBuilder(fullSchema(), start)
-  }
-
   override def initialOffset: Offset = createInitialOffset(numPartitions, creationTime)
 
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-    val startOffset = config.asInstanceOf[SimpleStreamingScanConfig].start
-
-    val partitionStartMap = startOffset match {
+  override def planInputPartitions(start: Offset): Array[InputPartition] = {
+    val partitionStartMap = start match {
       case off: RateStreamOffset => off.partitionToValueAndRunTimeMs
       case off =>
         throw new IllegalArgumentException(
@@ -91,8 +82,7 @@ class RateStreamContinuousReadSupport(options: DataSourceOptions) extends Contin
     }.toArray
   }
 
-  override def createContinuousReaderFactory(
-      config: ScanConfig): ContinuousPartitionReaderFactory = {
+  override def createContinuousReaderFactory(): ContinuousPartitionReaderFactory = {
     RateStreamContinuousReaderFactory
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
index 28ab2448a663..e7bc71394061 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
@@ -31,37 +31,29 @@ import org.json4s.jackson.Serialization
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.RpcEndpointRef
-import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.streaming.{Offset => _, _}
 import org.apache.spark.sql.execution.streaming.sources.TextSocketReader
 import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
-import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.RpcUtils
 
 
 /**
- * A ContinuousReadSupport that reads text lines through a TCP socket, designed only for tutorials
- * and debugging. This ContinuousReadSupport will *not* work in production applications due to
+ * A [[ContinuousStream]] that reads text lines through a TCP socket, designed only for tutorials
+ * and debugging. This ContinuousStream will *not* work in production applications due to
  * multiple reasons, including no support for fault recovery.
  *
  * The driver maintains a socket connection to the host-port, keeps the received messages in
  * buckets and serves the messages to the executors via a RPC endpoint.
  */
-class TextSocketContinuousReadSupport(options: DataSourceOptions)
-  extends ContinuousReadSupport with Logging {
+class TextSocketContinuousStream(
+    host: String, port: Int, numPartitions: Int, options: DataSourceOptions)
+  extends ContinuousStream with Logging {
 
   implicit val defaultFormats: DefaultFormats = DefaultFormats
 
-  private val host: String = options.get("host").get()
-  private val port: Int = options.get("port").get().toInt
-
-  assert(SparkSession.getActiveSession.isDefined)
-  private val spark = SparkSession.getActiveSession.get
-  private val numPartitions = spark.sparkContext.defaultParallelism
-
   @GuardedBy("this")
   private var socket: Socket = _
 
@@ -101,21 +93,9 @@ class TextSocketContinuousReadSupport(options: DataSourceOptions)
     startOffset
   }
 
-  override def newScanConfigBuilder(start: Offset): ScanConfigBuilder = {
-    new SimpleStreamingScanConfigBuilder(fullSchema(), start)
-  }
-
-  override def fullSchema(): StructType = {
-    if (includeTimestamp) {
-      TextSocketReader.SCHEMA_TIMESTAMP
-    } else {
-      TextSocketReader.SCHEMA_REGULAR
-    }
-  }
 
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-    val startOffset = config.asInstanceOf[SimpleStreamingScanConfig]
-      .start.asInstanceOf[TextSocketOffset]
+  override def planInputPartitions(start: Offset): Array[InputPartition] = {
+    val startOffset = start.asInstanceOf[TextSocketOffset]
     recordEndpoint.setStartOffsets(startOffset.offsets)
     val endpointName = s"TextSocketContinuousReaderEndpoint-${java.util.UUID.randomUUID()}"
     endpointRef = recordEndpoint.rpcEnv.setupEndpoint(endpointName, recordEndpoint)
@@ -140,8 +120,7 @@ class TextSocketContinuousReadSupport(options: DataSourceOptions)
     }.toArray
   }
 
-  override def createContinuousReaderFactory(
-      config: ScanConfig): ContinuousPartitionReaderFactory = {
+  override def createContinuousReaderFactory(): ContinuousPartitionReaderFactory = {
     TextSocketReaderFactory
   }
 
@@ -197,7 +176,7 @@ class TextSocketContinuousReadSupport(options: DataSourceOptions)
               logWarning(s"Stream closed by $host:$port")
               return
             }
-            TextSocketContinuousReadSupport.this.synchronized {
+            TextSocketContinuousStream.this.synchronized {
               currentOffset += 1
               val newData = (line,
                 Timestamp.valueOf(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
index 2238ce26e7b4..d1bda79f4b6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, PartitionOffset}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
 import org.apache.spark.util.RpcUtils
@@ -83,14 +83,14 @@ private[sql] object EpochCoordinatorRef extends Logging {
    */
   def create(
       writeSupport: StreamingWriteSupport,
-      readSupport: ContinuousReadSupport,
+      stream: ContinuousStream,
       query: ContinuousExecution,
       epochCoordinatorId: String,
       startEpoch: Long,
       session: SparkSession,
       env: SparkEnv): RpcEndpointRef = synchronized {
     val coordinator = new EpochCoordinator(
-      writeSupport, readSupport, query, startEpoch, session, env.rpcEnv)
+      writeSupport, stream, query, startEpoch, session, env.rpcEnv)
     val ref = env.rpcEnv.setupEndpoint(endpointName(epochCoordinatorId), coordinator)
     logInfo("Registered EpochCoordinator endpoint")
     ref
@@ -116,7 +116,7 @@ private[sql] object EpochCoordinatorRef extends Logging {
  */
 private[continuous] class EpochCoordinator(
     writeSupport: StreamingWriteSupport,
-    readSupport: ContinuousReadSupport,
+    stream: ContinuousStream,
     query: ContinuousExecution,
     startEpoch: Long,
     session: SparkSession,
@@ -220,7 +220,7 @@ private[continuous] class EpochCoordinator(
         partitionOffsets.collect { case ((e, _), o) if e == epoch => o }
       if (thisEpochOffsets.size == numReaderPartitions) {
         logDebug(s"Epoch $epoch has offsets reported from all partitions: $thisEpochOffsets")
-        query.addOffset(epoch, readSupport, thisEpochOffsets.toSeq)
+        query.addOffset(epoch, stream, thisEpochOffsets.toSeq)
         resolveCommitsAtEpoch(epoch)
       }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 5406679630e2..e71f81caeb97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -33,9 +33,9 @@ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUti
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsMicroBatchRead, Table, TableProvider}
+import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
@@ -68,7 +68,15 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Bas
 
   def fullSchema(): StructType = encoder.schema
 
-  protected def logicalPlan: LogicalPlan
+  protected val logicalPlan: LogicalPlan = {
+    StreamingRelationV2(
+      MemoryStreamTableProvider,
+      "memory",
+      new MemoryStreamTable(this),
+      Map.empty,
+      attributes,
+      None)(sqlContext.sparkSession)
+  }
 
   def addData(data: TraversableOnce[A]): Offset
 }
@@ -81,7 +89,8 @@ object MemoryStreamTableProvider extends TableProvider {
   }
 }
 
-class MemoryStreamTable(val stream: MemoryStreamBase[_]) extends Table with SupportsMicroBatchRead {
+class MemoryStreamTable(val stream: MemoryStreamBase[_]) extends Table
+  with SupportsMicroBatchRead with SupportsContinuousRead {
 
   override def name(): String = "MemoryStreamDataSource"
 
@@ -101,7 +110,11 @@ class MemoryStreamScanBuilder(stream: MemoryStreamBase[_]) extends ScanBuilder w
   override def readSchema(): StructType = stream.fullSchema()
 
   override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
-    stream.asInstanceOf[MemoryStream[_]]
+    stream.asInstanceOf[MicroBatchStream]
+  }
+
+  override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
+    stream.asInstanceOf[ContinuousStream]
   }
 }
 
@@ -113,16 +126,6 @@ class MemoryStreamScanBuilder(stream: MemoryStreamBase[_]) extends ScanBuilder w
 case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
     extends MemoryStreamBase[A](sqlContext) with MicroBatchStream with Logging {
 
-  protected val logicalPlan: LogicalPlan = {
-    StreamingRelationV2(
-      MemoryStreamTableProvider,
-      "memory",
-      new MemoryStreamTable(this),
-      Map.empty,
-      attributes,
-      None)(sqlContext.sparkSession)
-  }
-
   protected val output = logicalPlan.output
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
index 8c5c9eff55ef..41eaf84b7f9e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
@@ -30,8 +30,7 @@ import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.{Encoder, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.streaming.{Offset => _, _}
-import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions}
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, ScanConfig, ScanConfigBuilder}
+import org.apache.spark.sql.sources.v2.reader.InputPartition
 import org.apache.spark.sql.sources.v2.reader.streaming._
 import org.apache.spark.util.RpcUtils
 
@@ -44,15 +43,10 @@ import org.apache.spark.util.RpcUtils
  *    the specified offset within the list, or null if that offset doesn't yet have a record.
  */
 class ContinuousMemoryStream[A : Encoder](id: Int, sqlContext: SQLContext, numPartitions: Int = 2)
-  extends MemoryStreamBase[A](sqlContext)
-    with ContinuousReadSupportProvider with ContinuousReadSupport {
+  extends MemoryStreamBase[A](sqlContext) with ContinuousStream {
 
   private implicit val formats = Serialization.formats(NoTypeHints)
 
-  protected val logicalPlan =
-    // TODO: don't pass null as table after finish API refactor for continuous stream.
-    StreamingRelationV2(this, "memory", null, Map(), attributes, None)(sqlContext.sparkSession)
-
   // ContinuousReader implementation
 
   @GuardedBy("this")
@@ -87,13 +81,9 @@ class ContinuousMemoryStream[A : Encoder](id: Int, sqlContext: SQLContext, numPa
     )
   }
 
-  override def newScanConfigBuilder(start: Offset): ScanConfigBuilder = {
-    new SimpleStreamingScanConfigBuilder(fullSchema(), start)
-  }
 
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
-    val startOffset = config.asInstanceOf[SimpleStreamingScanConfig]
-      .start.asInstanceOf[ContinuousMemoryStreamOffset]
+  override def planInputPartitions(start: Offset): Array[InputPartition] = {
+    val startOffset = start.asInstanceOf[ContinuousMemoryStreamOffset]
     synchronized {
       val endpointName = s"ContinuousMemoryStreamRecordEndpoint-${java.util.UUID.randomUUID()}-$id"
       endpointRef =
@@ -105,8 +95,7 @@ class ContinuousMemoryStream[A : Encoder](id: Int, sqlContext: SQLContext, numPa
     }
   }
 
-  override def createContinuousReaderFactory(
-      config: ScanConfig): ContinuousPartitionReaderFactory = {
+  override def createContinuousReaderFactory(): ContinuousPartitionReaderFactory = {
     ContinuousMemoryStreamReaderFactory
   }
 
@@ -115,12 +104,6 @@ class ContinuousMemoryStream[A : Encoder](id: Int, sqlContext: SQLContext, numPa
   }
 
   override def commit(end: Offset): Unit = {}
-
-  // ContinuousReadSupportProvider implementation
-  // This is necessary because of how StreamTest finds the source for AddDataMemory steps.
-  override def createContinuousReadSupport(
-      checkpointLocation: String,
-      options: DataSourceOptions): ContinuousReadSupport = this
 }
 
 object ContinuousMemoryStream {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index 8d334f0afd0f..075c6b9362ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.execution.streaming.continuous.RateStreamContinuousReadSupport
+import org.apache.spark.sql.execution.streaming.continuous.RateStreamContinuousStream
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchStream}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types._
 
 /**
@@ -41,7 +41,7 @@ import org.apache.spark.sql.types._
  *    be resource constrained, and `numPartitions` can be tweaked to help reach the desired speed.
  */
 class RateStreamProvider extends DataSourceV2
-  with TableProvider with ContinuousReadSupportProvider with DataSourceRegister {
+  with TableProvider with DataSourceRegister {
   import RateStreamProvider._
 
   override def getTable(options: DataSourceOptions): Table = {
@@ -68,12 +68,6 @@ class RateStreamProvider extends DataSourceV2
     new RateStreamTable(rowsPerSecond, rampUpTimeSeconds, numPartitions)
   }
 
-  override def createContinuousReadSupport(
-     checkpointLocation: String,
-     options: DataSourceOptions): ContinuousReadSupport = {
-    new RateStreamContinuousReadSupport(options)
-  }
-
   override def shortName(): String = "rate"
 }
 
@@ -81,7 +75,7 @@ class RateStreamTable(
     rowsPerSecond: Long,
     rampUpTimeSeconds: Long,
     numPartitions: Int)
-  extends Table with SupportsMicroBatchRead {
+  extends Table with SupportsMicroBatchRead with SupportsContinuousRead {
 
   override def name(): String = {
     s"RateStream(rowsPerSecond=$rowsPerSecond, rampUpTimeSeconds=$rampUpTimeSeconds, " +
@@ -98,6 +92,10 @@ class RateStreamTable(
         new RateStreamMicroBatchStream(
           rowsPerSecond, rampUpTimeSeconds, numPartitions, options, checkpointLocation)
       }
+
+      override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
+        new RateStreamContinuousStream(rowsPerSecond, numPartitions, options)
+      }
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
index ddf398b7752e..540131c8de8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
@@ -26,7 +26,6 @@ import javax.annotation.concurrent.GuardedBy
 import scala.collection.mutable.ListBuffer
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.streaming.LongOffset
@@ -40,7 +39,8 @@ import org.apache.spark.unsafe.types.UTF8String
  * and debugging. This MicroBatchReadSupport will *not* work in production applications due to
  * multiple reasons, including no support for fault recovery.
  */
-class TextSocketMicroBatchStream(host: String, port: Int, options: DataSourceOptions)
+class TextSocketMicroBatchStream(
+    host: String, port: Int, numPartitions: Int, options: DataSourceOptions)
   extends MicroBatchStream with Logging {
 
   @GuardedBy("this")
@@ -124,10 +124,6 @@ class TextSocketMicroBatchStream(host: String, port: Int, options: DataSourceOpt
       batches.slice(sliceStart, sliceEnd)
     }
 
-    assert(SparkSession.getActiveSession.isDefined)
-    val spark = SparkSession.getActiveSession.get
-    val numPartitions = spark.sparkContext.defaultParallelism
-
     val slices = Array.fill(numPartitions)(new ListBuffer[(UTF8String, Long)])
     rawList.zipWithIndex.foreach { case (r, idx) =>
       slices(idx % numPartitions).append(r)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index 35007785b41a..c3b24a8f65dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -24,16 +24,15 @@ import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.streaming.continuous.TextSocketContinuousReadSupport
+import org.apache.spark.sql.execution.streaming.continuous.TextSocketContinuousStream
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, MicroBatchStream}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
 
 class TextSocketSourceProvider extends DataSourceV2
-  with TableProvider with ContinuousReadSupportProvider
-  with DataSourceRegister with Logging {
+  with TableProvider with DataSourceRegister with Logging {
 
   private def checkParameters(params: DataSourceOptions): Unit = {
     logWarning("The socket source should not be used for production applications! " +
@@ -58,22 +57,16 @@ class TextSocketSourceProvider extends DataSourceV2
     new TextSocketTable(
       options.get("host").get,
       options.getInt("port", -1),
+      options.getInt("numPartitions", SparkSession.active.sparkContext.defaultParallelism),
       options.getBoolean("includeTimestamp", false))
   }
 
-  override def createContinuousReadSupport(
-      checkpointLocation: String,
-      options: DataSourceOptions): ContinuousReadSupport = {
-    checkParameters(options)
-    new TextSocketContinuousReadSupport(options)
-  }
-
   /** String that represents the format that this data source provider uses. */
   override def shortName(): String = "socket"
 }
 
-class TextSocketTable(host: String, port: Int, includeTimestamp: Boolean)
-  extends Table with SupportsMicroBatchRead {
+class TextSocketTable(host: String, port: Int, numPartitions: Int, includeTimestamp: Boolean)
+  extends Table with SupportsMicroBatchRead with SupportsContinuousRead {
 
   override def name(): String = s"Socket[$host:$port]"
 
@@ -90,7 +83,11 @@ class TextSocketTable(host: String, port: Int, includeTimestamp: Boolean)
       override def readSchema(): StructType = schema()
 
       override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
-        new TextSocketMicroBatchStream(host, port, options)
+        new TextSocketMicroBatchStream(host, port, numPartitions, options)
+      }
+
+      override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
+        new TextSocketContinuousStream(host, port, numPartitions, options)
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 417dd5584b30..866681838af8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -30,9 +30,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
 import org.apache.spark.sql.sources.StreamSourceProvider
 import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.Utils
 
 /**
  * Interface used to load a streaming `Dataset` from external storage systems (e.g. file systems,
@@ -183,39 +181,12 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
           case _ => provider.getTable(dsOptions)
         }
         table match {
-          case s: SupportsMicroBatchRead =>
+          case _: SupportsMicroBatchRead | _: SupportsContinuousRead =>
             Dataset.ofRows(
               sparkSession,
               StreamingRelationV2(
-                provider, source, s, options,
-                table.schema.toAttributes, v1Relation)(sparkSession))
-
-          case _ if ds.isInstanceOf[ContinuousReadSupportProvider] =>
-            val provider = ds.asInstanceOf[ContinuousReadSupportProvider]
-            var tempReadSupport: ContinuousReadSupport = null
-            val schema = try {
-              val tmpCheckpointPath = Utils.createTempDir(namePrefix = s"tempCP").getCanonicalPath
-              tempReadSupport = if (userSpecifiedSchema.isDefined) {
-                provider.createContinuousReadSupport(
-                  userSpecifiedSchema.get, tmpCheckpointPath, dsOptions)
-              } else {
-                provider.createContinuousReadSupport(tmpCheckpointPath, dsOptions)
-              }
-              tempReadSupport.fullSchema()
-            } finally {
-              // Stop tempReader to avoid side-effect thing
-              if (tempReadSupport != null) {
-                tempReadSupport.stop()
-                tempReadSupport = null
-              }
-            }
-            Dataset.ofRows(
-              sparkSession,
-              // TODO: do not pass null as table after finish the API refactor for continuous
-              // stream.
-              StreamingRelationV2(
-                provider, source, table = null, options,
-                schema.toAttributes, v1Relation)(sparkSession))
+                provider, source, table, options, table.schema.toAttributes, v1Relation)(
+                sparkSession))
 
           // fallback to v1
           case _ => Dataset.ofRows(sparkSession, StreamingRelation(v1DataSource))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index d40a1fdec0bb..d0418f893143 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relati
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.sources.v2.{ContinuousReadSupportProvider, DataSourceOptions}
+import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader.streaming.Offset
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.util.ManualClock
@@ -308,30 +308,17 @@ class RateStreamProviderSuite extends StreamTest {
       "rate source does not support user-specified schema"))
   }
 
-  test("continuous in registry") {
-    DataSource.lookupDataSource("rate", spark.sqlContext.conf).
-      getConstructor().newInstance() match {
-      case ds: ContinuousReadSupportProvider =>
-        val readSupport = ds.createContinuousReadSupport(
-          "", DataSourceOptions.empty())
-        assert(readSupport.isInstanceOf[RateStreamContinuousReadSupport])
-      case _ =>
-        throw new IllegalStateException("Could not find read support for continuous rate")
-    }
-  }
-
   test("continuous data") {
-    val readSupport = new RateStreamContinuousReadSupport(
-      new DataSourceOptions(Map("numPartitions" -> "2", "rowsPerSecond" -> "20").asJava))
-    val config = readSupport.newScanConfigBuilder(readSupport.initialOffset).build()
-    val tasks = readSupport.planInputPartitions(config)
-    val readerFactory = readSupport.createContinuousReaderFactory(config)
-    assert(tasks.size == 2)
+    val stream = new RateStreamContinuousStream(
+      rowsPerSecond = 20, numPartitions = 2, options = DataSourceOptions.empty())
+    val partitions = stream.planInputPartitions(stream.initialOffset)
+    val readerFactory = stream.createContinuousReaderFactory()
+    assert(partitions.size == 2)
 
     val data = scala.collection.mutable.ListBuffer[InternalRow]()
-    tasks.foreach {
+    partitions.foreach {
       case t: RateStreamContinuousInputPartition =>
-        val startTimeMs = readSupport.initialOffset()
+        val startTimeMs = stream.initialOffset()
           .asInstanceOf[RateStreamOffset]
           .partitionToValueAndRunTimeMs(t.partitionIndex)
           .runTimeMs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index cf069d571081..33c65d784fba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -29,6 +29,7 @@ import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
@@ -294,25 +295,25 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
     serverThread = new ServerThread()
     serverThread.start()
 
-    val readSupport = new TextSocketContinuousReadSupport(
-      new DataSourceOptions(Map("numPartitions" -> "2", "host" -> "localhost",
-        "port" -> serverThread.port.toString).asJava))
-
-    val scanConfig = readSupport.newScanConfigBuilder(readSupport.initialOffset()).build()
-    val tasks = readSupport.planInputPartitions(scanConfig)
-    assert(tasks.size == 2)
+    val stream = new TextSocketContinuousStream(
+      host = "localhost",
+      port = serverThread.port,
+      numPartitions = 2,
+      options = DataSourceOptions.empty())
+    val partitions = stream.planInputPartitions(stream.initialOffset())
+    assert(partitions.length == 2)
 
     val numRecords = 10
     val data = scala.collection.mutable.ListBuffer[Int]()
     val offsets = scala.collection.mutable.ListBuffer[Int]()
-    val readerFactory = readSupport.createContinuousReaderFactory(scanConfig)
+    val readerFactory = stream.createContinuousReaderFactory()
     import org.scalatest.time.SpanSugar._
     failAfter(5 seconds) {
       // inject rows, read and check the data and offsets
       for (i <- 0 until numRecords) {
         serverThread.enqueue(i.toString)
       }
-      tasks.foreach {
+      partitions.foreach {
         case t: TextSocketContinuousInputPartition =>
           val r = readerFactory.createReader(t).asInstanceOf[TextSocketContinuousPartitionReader]
           for (i <- 0 until numRecords / 2) {
@@ -330,15 +331,15 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
           data.clear()
         case _ => throw new IllegalStateException("Unexpected task type")
       }
-      assert(readSupport.startOffset.offsets == List(3, 3))
-      readSupport.commit(TextSocketOffset(List(5, 5)))
-      assert(readSupport.startOffset.offsets == List(5, 5))
+      assert(stream.startOffset.offsets == List(3, 3))
+      stream.commit(TextSocketOffset(List(5, 5)))
+      assert(stream.startOffset.offsets == List(5, 5))
     }
 
     def commitOffset(partition: Int, offset: Int): Unit = {
-      val offsetsToCommit = readSupport.startOffset.offsets.updated(partition, offset)
-      readSupport.commit(TextSocketOffset(offsetsToCommit))
-      assert(readSupport.startOffset.offsets == offsetsToCommit)
+      val offsetsToCommit = stream.startOffset.offsets.updated(partition, offset)
+      stream.commit(TextSocketOffset(offsetsToCommit))
+      assert(stream.startOffset.offsets == offsetsToCommit)
     }
   }
 
@@ -346,13 +347,15 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
     serverThread = new ServerThread()
     serverThread.start()
 
-    val readSupport = new TextSocketContinuousReadSupport(
-      new DataSourceOptions(Map("numPartitions" -> "2", "host" -> "localhost",
-        "port" -> serverThread.port.toString).asJava))
+    val stream = new TextSocketContinuousStream(
+      host = "localhost",
+      port = serverThread.port,
+      numPartitions = 2,
+      options = DataSourceOptions.empty())
 
-    readSupport.startOffset = TextSocketOffset(List(5, 5))
+    stream.startOffset = TextSocketOffset(List(5, 5))
     assertThrows[IllegalStateException] {
-      readSupport.commit(TextSocketOffset(List(6, 6)))
+      stream.commit(TextSocketOffset(List(6, 6)))
     }
   }
 
@@ -360,21 +363,21 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
     serverThread = new ServerThread()
     serverThread.start()
 
-    val readSupport = new TextSocketContinuousReadSupport(
-      new DataSourceOptions(Map("numPartitions" -> "2", "host" -> "localhost",
-        "includeTimestamp" -> "true",
-        "port" -> serverThread.port.toString).asJava))
-    val scanConfig = readSupport.newScanConfigBuilder(readSupport.initialOffset()).build()
-    val tasks = readSupport.planInputPartitions(scanConfig)
-    assert(tasks.size == 2)
+    val stream = new TextSocketContinuousStream(
+      host = "localhost",
+      port = serverThread.port,
+      numPartitions = 2,
+      options = new DataSourceOptions(Map("includeTimestamp" -> "true").asJava))
+    val partitions = stream.planInputPartitions(stream.initialOffset())
+    assert(partitions.size == 2)
 
     val numRecords = 4
     // inject rows, read and check the data and offsets
     for (i <- 0 until numRecords) {
       serverThread.enqueue(i.toString)
     }
-    val readerFactory = readSupport.createContinuousReaderFactory(scanConfig)
-    tasks.foreach {
+    val readerFactory = stream.createContinuousReaderFactory()
+    partitions.foreach {
       case t: TextSocketContinuousInputPartition =>
         val r = readerFactory.createReader(t).asInstanceOf[TextSocketContinuousPartitionReader]
         for (_ <- 0 until numRecords / 2) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index c60ea4a2f9f5..511fdfe5c23a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -24,7 +24,7 @@ import test.org.apache.spark.sql.sources.v2._
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanExec}
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation}
 import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
 import org.apache.spark.sql.functions._
@@ -40,14 +40,14 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
 
   private def getBatch(query: DataFrame): AdvancedBatch = {
     query.queryExecution.executedPlan.collect {
-      case d: DataSourceV2ScanExec =>
+      case d: BatchScanExec =>
         d.batch.asInstanceOf[AdvancedBatch]
     }.head
   }
 
   private def getJavaBatch(query: DataFrame): JavaAdvancedDataSourceV2.AdvancedBatch = {
     query.queryExecution.executedPlan.collect {
-      case d: DataSourceV2ScanExec =>
+      case d: BatchScanExec =>
         d.batch.asInstanceOf[JavaAdvancedDataSourceV2.AdvancedBatch]
     }.head
   }
@@ -309,7 +309,7 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
       assert(logical.canonicalized.output.length == logicalNumOutput)
 
       val physical = df.queryExecution.executedPlan.collect {
-        case d: DataSourceV2ScanExec => d
+        case d: BatchScanExec => d
       }.head
       assert(physical.canonicalized.output.length == physicalNumOutput)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 868b43cc2b88..659deb8cbb51 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -495,7 +495,7 @@ class StreamSuite extends StreamTest {
       // `extended = false` only displays the physical plan.
       assert("StreamingDataSourceV2Relation".r
         .findAllMatchIn(explainWithoutExtended).size === 0)
-      assert("ScanV2".r
+      assert("BatchScan".r
         .findAllMatchIn(explainWithoutExtended).size === 1)
       // Use "StateStoreRestore" to verify that it does output a streaming physical plan
       assert(explainWithoutExtended.contains("StateStoreRestore"))
@@ -505,7 +505,7 @@ class StreamSuite extends StreamTest {
       // plan.
       assert("StreamingDataSourceV2Relation".r
         .findAllMatchIn(explainWithExtended).size === 3)
-      assert("ScanV2".r
+      assert("BatchScan".r
         .findAllMatchIn(explainWithExtended).size === 1)
       // Use "StateStoreRestore" to verify that it does output a streaming physical plan
       assert(explainWithExtended.contains("StateStoreRestore"))
@@ -548,17 +548,17 @@ class StreamSuite extends StreamTest {
       val explainWithoutExtended = q.explainInternal(false)
 
       // `extended = false` only displays the physical plan.
-      assert("Streaming RelationV2 ContinuousMemoryStream".r
+      assert("StreamingDataSourceV2Relation".r
         .findAllMatchIn(explainWithoutExtended).size === 0)
-      assert("ScanV2 ContinuousMemoryStream".r
+      assert("ContinuousScan".r
         .findAllMatchIn(explainWithoutExtended).size === 1)
 
       val explainWithExtended = q.explainInternal(true)
       // `extended = true` displays 3 logical plans (Parsed/Optimized/Optimized) and 1 physical
       // plan.
-      assert("Streaming RelationV2 ContinuousMemoryStream".r
+      assert("StreamingDataSourceV2Relation".r
         .findAllMatchIn(explainWithExtended).size === 3)
-      assert("ScanV2 ContinuousMemoryStream".r
+      assert("ContinuousScan".r
         .findAllMatchIn(explainWithExtended).size === 1)
     } finally {
       q.stop()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index b4bd6f6b2edc..da496837e7a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -39,12 +39,11 @@ import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder, Ro
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.AllTuples
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.execution.datasources.v2.{OldStreamingDataSourceV2Relation, StreamingDataSourceV2Relation}
+import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch}
 import org.apache.spark.sql.execution.streaming.sources.MemorySinkV2
 import org.apache.spark.sql.execution.streaming.state.StateStore
-import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReadSupport
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.{Clock, SystemClock, Utils}
@@ -692,16 +691,10 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
                   case r: StreamingExecutionRelation => r.source
                   // v2 source
                   case r: StreamingDataSourceV2Relation => r.stream
-                  case r: OldStreamingDataSourceV2Relation => r.readSupport
                   // We can add data to memory stream before starting it. Then the input plan has
                   // not been processed by the streaming engine and contains `StreamingRelationV2`.
                   case r: StreamingRelationV2 if r.sourceName == "memory" =>
-                    // TODO: remove this null hack after finish API refactor for continuous stream.
-                    if (r.table == null) {
-                      r.dataSource.asInstanceOf[ContinuousReadSupport]
-                    } else {
-                      r.table.asInstanceOf[MemoryStreamTable].stream
-                    }
+                    r.table.asInstanceOf[MemoryStreamTable].stream
                 }
                 .zipWithIndex
                 .find(_._1 == source)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 62fde98e40dc..dc22e31678fa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.TestForeachWriter
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, ScanConfig}
+import org.apache.spark.sql.sources.v2.reader.InputPartition
 import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.streaming.util.{BlockingSource, MockSourceProvider, StreamManualClock}
 import org.apache.spark.sql.types.StructType
@@ -911,7 +911,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
 
     testStream(df, useV2Sink = true)(
       StartStream(trigger = Trigger.Continuous(100)),
-      AssertOnQuery(_.logicalPlan.toJSON.contains("ContinuousExecutionRelation"))
+      AssertOnQuery(_.logicalPlan.toJSON.contains("StreamingDataSourceV2Relation"))
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
index d6819eacd07c..d3d210c02e90 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.execution.streaming.continuous._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReader, ContinuousReadSupport, PartitionOffset}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReader, ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types.{DataType, IntegerType, StructType}
@@ -44,7 +44,7 @@ class ContinuousQueuedDataReaderSuite extends StreamTest with MockitoSugar {
     super.beforeEach()
     epochEndpoint = EpochCoordinatorRef.create(
       mock[StreamingWriteSupport],
-      mock[ContinuousReadSupport],
+      mock[ContinuousStream],
       mock[ContinuousExecution],
       coordinatorId,
       startEpoch,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index f85cae9fa433..344a8aa55f0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -41,7 +41,7 @@ class ContinuousSuiteBase extends StreamTest {
       case s: ContinuousExecution =>
         assert(numTriggers >= 2, "must wait for at least 2 triggers to ensure query is initialized")
         val reader = s.lastExecution.executedPlan.collectFirst {
-          case ContinuousScanExec(_, _, _, _, r: RateStreamContinuousReadSupport, _) => r
+          case ContinuousScanExec(_, _, r: RateStreamContinuousStream, _) => r
         }.get
 
         val deltaMs = numTriggers * 1000 + 300
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
index e644c16ddfea..a0b56ec17f0b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.LocalSparkSession
 import org.apache.spark.sql.execution.streaming.continuous._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousReadSupport, PartitionOffset}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
 import org.apache.spark.sql.test.TestSparkSession
@@ -45,7 +45,7 @@ class EpochCoordinatorSuite
   private var orderVerifier: InOrder = _
 
   override def beforeEach(): Unit = {
-    val reader = mock[ContinuousReadSupport]
+    val stream = mock[ContinuousStream]
     writeSupport = mock[StreamingWriteSupport]
     query = mock[ContinuousExecution]
     orderVerifier = inOrder(writeSupport, query)
@@ -53,7 +53,7 @@ class EpochCoordinatorSuite
     spark = new TestSparkSession()
 
     epochCoordinator
-      = EpochCoordinatorRef.create(writeSupport, reader, query, "test", 1, spark, SparkEnv.get)
+      = EpochCoordinatorRef.create(writeSupport, stream, query, "test", 1, spark, SparkEnv.get)
   }
 
   test("single epoch") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index d98cc41de9b0..62f166602941 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -31,33 +31,23 @@ import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamTest, T
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-class FakeDataStream extends MicroBatchStream {
+class FakeDataStream extends MicroBatchStream with ContinuousStream {
   override def deserializeOffset(json: String): Offset = RateStreamOffset(Map())
   override def commit(end: Offset): Unit = {}
   override def stop(): Unit = {}
   override def initialOffset(): Offset = RateStreamOffset(Map())
   override def latestOffset(): Offset = RateStreamOffset(Map())
+  override def mergeOffsets(offsets: Array[PartitionOffset]): Offset = RateStreamOffset(Map())
   override def planInputPartitions(start: Offset, end: Offset): Array[InputPartition] = {
     throw new IllegalStateException("fake source - cannot actually read")
   }
-  override def createReaderFactory(): PartitionReaderFactory = {
+  override def planInputPartitions(start: Offset): Array[InputPartition] = {
     throw new IllegalStateException("fake source - cannot actually read")
   }
-}
-
-case class FakeReadSupport() extends ContinuousReadSupport {
-  override def deserializeOffset(json: String): Offset = RateStreamOffset(Map())
-  override def commit(end: Offset): Unit = {}
-  override def stop(): Unit = {}
-  override def mergeOffsets(offsets: Array[PartitionOffset]): Offset = RateStreamOffset(Map())
-  override def fullSchema(): StructType = StructType(Seq())
-  override def initialOffset(): Offset = RateStreamOffset(Map())
-  override def newScanConfigBuilder(start: Offset): ScanConfigBuilder = null
-  override def createContinuousReaderFactory(
-      config: ScanConfig): ContinuousPartitionReaderFactory = {
+  override def createReaderFactory(): PartitionReaderFactory = {
     throw new IllegalStateException("fake source - cannot actually read")
   }
-  override def planInputPartitions(config: ScanConfig): Array[InputPartition] = {
+  override def createContinuousReaderFactory(): ContinuousPartitionReaderFactory = {
     throw new IllegalStateException("fake source - cannot actually read")
   }
 }
@@ -66,21 +56,19 @@ class FakeScanBuilder extends ScanBuilder with Scan {
   override def build(): Scan = this
   override def readSchema(): StructType = StructType(Seq())
   override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = new FakeDataStream
+  override def toContinuousStream(checkpointLocation: String): ContinuousStream = new FakeDataStream
 }
 
-class FakeMicroBatchReadTable extends Table with SupportsMicroBatchRead {
+trait FakeMicroBatchReadTable extends Table with SupportsMicroBatchRead {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
   override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new FakeScanBuilder
 }
 
-trait FakeContinuousReadSupportProvider extends ContinuousReadSupportProvider {
-  override def createContinuousReadSupport(
-      checkpointLocation: String,
-      options: DataSourceOptions): ContinuousReadSupport = {
-    LastReadOptions.options = options
-    FakeReadSupport()
-  }
+trait FakeContinuousReadTable extends Table with SupportsContinuousRead {
+  override def name(): String = "fake"
+  override def schema(): StructType = StructType(Seq())
+  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new FakeScanBuilder
 }
 
 trait FakeStreamingWriteSupportProvider extends StreamingWriteSupportProvider {
@@ -111,27 +99,34 @@ class FakeReadMicroBatchOnly
 class FakeReadContinuousOnly
     extends DataSourceRegister
     with TableProvider
-    with FakeContinuousReadSupportProvider
     with SessionConfigSupport {
   override def shortName(): String = "fake-read-continuous-only"
 
   override def keyPrefix: String = shortName()
 
-  override def getTable(options: DataSourceOptions): Table = new Table {
-    override def schema(): StructType = StructType(Seq())
-    override def name(): String = "fake"
+  override def getTable(options: DataSourceOptions): Table = {
+    LastReadOptions.options = options
+    new FakeContinuousReadTable {}
   }
 }
 
-class FakeReadBothModes extends DataSourceRegister
-    with TableProvider with FakeContinuousReadSupportProvider {
+class FakeReadBothModes extends DataSourceRegister with TableProvider {
   override def shortName(): String = "fake-read-microbatch-continuous"
 
-  override def getTable(options: DataSourceOptions): Table = new FakeMicroBatchReadTable {}
+  override def getTable(options: DataSourceOptions): Table = {
+    new Table with FakeMicroBatchReadTable with FakeContinuousReadTable {}
+  }
 }
 
-class FakeReadNeitherMode extends DataSourceRegister {
+class FakeReadNeitherMode extends DataSourceRegister with TableProvider {
   override def shortName(): String = "fake-read-neither-mode"
+
+  override def getTable(options: DataSourceOptions): Table = {
+    new Table {
+      override def name(): String = "fake"
+      override def schema(): StructType = StructType(Nil)
+    }
+  }
 }
 
 class FakeWriteSupportProvider
@@ -324,33 +319,25 @@ class StreamingDataSourceV2Suite extends StreamTest {
 
   for ((read, write, trigger) <- cases) {
     testQuietly(s"stream with read format $read, write format $write, trigger $trigger") {
-      val readSource = DataSource.lookupDataSource(read, spark.sqlContext.conf).
-        getConstructor().newInstance()
+      val table = DataSource.lookupDataSource(read, spark.sqlContext.conf).getConstructor()
+        .newInstance().asInstanceOf[TableProvider].getTable(DataSourceOptions.empty())
       val writeSource = DataSource.lookupDataSource(write, spark.sqlContext.conf).
         getConstructor().newInstance()
 
-      def isMicroBatch(ds: Any): Boolean = ds match {
-        case provider: TableProvider =>
-          val table = provider.getTable(DataSourceOptions.empty())
-          table.isInstanceOf[SupportsMicroBatchRead]
-        case _ => false
-      }
-
-      (readSource, writeSource, trigger) match {
+      (table, writeSource, trigger) match {
         // Valid microbatch queries.
-        case (_: TableProvider, _: StreamingWriteSupportProvider, t)
-          if isMicroBatch(readSource) && !t.isInstanceOf[ContinuousTrigger] =>
+        case (_: SupportsMicroBatchRead, _: StreamingWriteSupportProvider, t)
+            if !t.isInstanceOf[ContinuousTrigger] =>
           testPositiveCase(read, write, trigger)
 
         // Valid continuous queries.
-        case (_: ContinuousReadSupportProvider, _: StreamingWriteSupportProvider,
+        case (_: SupportsContinuousRead, _: StreamingWriteSupportProvider,
               _: ContinuousTrigger) =>
           testPositiveCase(read, write, trigger)
 
         // Invalid - can't read at all
-        case (r, _, _)
-            if !r.isInstanceOf[TableProvider]
-              && !r.isInstanceOf[ContinuousReadSupportProvider] =>
+        case (r, _, _) if !r.isInstanceOf[SupportsMicroBatchRead] &&
+            !r.isInstanceOf[SupportsContinuousRead] =>
           testNegativeCase(read, write, trigger,
             s"Data source $read does not support streamed reading")
 
@@ -361,14 +348,13 @@ class StreamingDataSourceV2Suite extends StreamTest {
 
         // Invalid - trigger is continuous but reader is not
         case (r, _: StreamingWriteSupportProvider, _: ContinuousTrigger)
-            if !r.isInstanceOf[ContinuousReadSupportProvider] =>
+            if !r.isInstanceOf[SupportsContinuousRead] =>
           testNegativeCase(read, write, trigger,
             s"Data source $read does not support continuous processing")
 
         // Invalid - trigger is microbatch but reader is not
-        case (r, _, t)
-           if !isMicroBatch(r) &&
-             !t.isInstanceOf[ContinuousTrigger] =>
+        case (r, _, t) if !r.isInstanceOf[SupportsMicroBatchRead] &&
+            !t.isInstanceOf[ContinuousTrigger] =>
           testPostCreationNegativeCase(read, write, trigger,
             s"Data source $read does not support microbatch processing")
       }

From 5d672b7f3e07cfd7710df319fc6c7d2b9056a068 Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Tue, 29 Jan 2019 23:11:11 +0800
Subject: [PATCH 0388/1072] [SPARK-26763][SQL] Using fileStatus cache when
 filterPartitions

## What changes were proposed in this pull request?

We should pass the existed `fileStatusCache` to `InMemoryFileIndex` even though there aren't partition columns.

## How was this patch tested?

Existed test. Extra tests can be added if there is a requirement.

Closes #23683 from ConeyLiu/filestatuscache.

Authored-by: Xianyang Liu <xianyang.liu@intel.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/datasources/CatalogFileIndex.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
index a66a07673e25..8736d0713e0b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
@@ -84,8 +84,8 @@ class CatalogFileIndex(
       new PrunedInMemoryFileIndex(
         sparkSession, new Path(baseLocation.get), fileStatusCache, partitionSpec, Option(timeNs))
     } else {
-      new InMemoryFileIndex(
-        sparkSession, rootPaths, table.storage.properties, userSpecifiedSchema = None)
+      new InMemoryFileIndex(sparkSession, rootPaths, table.storage.properties,
+        userSpecifiedSchema = None, fileStatusCache = fileStatusCache)
     }
   }
 

From 33107897ada29d1ed17f091f93260dfcef11c2e7 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 29 Jan 2019 09:21:25 -0600
Subject: [PATCH 0389/1072] [SPARK-11215][ML] Add multiple columns support to
 StringIndexer

## What changes were proposed in this pull request?

This takes over #19621 to add multi-column support to StringIndexer:

1. Supports encoding multiple columns.
2. Previously, when specifying `frequencyDesc` or `frequencyAsc` as `stringOrderType` param in `StringIndexer`, in case of equal frequency, the order of strings is undefined. After this change, the strings with equal frequency are further sorted alphabetically.

## How was this patch tested?

Added tests.

Closes #20146 from viirya/SPARK-11215.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../fulltests/test_mllib_classification.R     |   6 +-
 R/pkg/tests/fulltests/test_mllib_regression.R |  42 +-
 docs/ml-features.md                           |   6 +-
 docs/ml-guide.md                              |   9 +
 .../spark/ml/feature/StringIndexer.scala      | 409 ++++++++++++++----
 ...4c42-b8a7-a5a94265c479-c000.snappy.parquet | Bin 0 -> 478 bytes
 .../strIndexerModel/metadata/part-00000       |   1 +
 .../spark/ml/feature/RFormulaSuite.scala      |  28 +-
 .../spark/ml/feature/StringIndexerSuite.scala | 139 +++++-
 project/MimaExcludes.scala                    |   4 +
 10 files changed, 531 insertions(+), 113 deletions(-)
 create mode 100644 mllib/src/test/resources/test-data/strIndexerModel/data/part-00000-cfefeb56-2980-4c42-b8a7-a5a94265c479-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/test-data/strIndexerModel/metadata/part-00000

diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
index 023686e75d50..9fdb0cfd9b61 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -313,7 +313,7 @@ test_that("spark.mlp", {
   # Test predict method
   mlpTestDF <- df
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "0.0", "0.0", "0.0", "0.0", "0.0"))
+  expect_equal(head(mlpPredictions$prediction, 6), c("0.0", "1.0", "1.0", "1.0", "1.0", "1.0"))
 
   # Test model save/load
   if (windows_with_hadoop()) {
@@ -348,12 +348,12 @@ test_that("spark.mlp", {
 
   # Test random seed
   # default seed
-  model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3), maxIter = 10)
+  model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3), maxIter = 100)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
                c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
   # seed equals 10
-  model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
+  model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3), maxIter = 100, seed = 10)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
                c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
diff --git a/R/pkg/tests/fulltests/test_mllib_regression.R b/R/pkg/tests/fulltests/test_mllib_regression.R
index 23daca75fcc2..b40c4cb9a969 100644
--- a/R/pkg/tests/fulltests/test_mllib_regression.R
+++ b/R/pkg/tests/fulltests/test_mllib_regression.R
@@ -102,10 +102,18 @@ test_that("spark.glm and predict", {
 })
 
 test_that("spark.glm summary", {
+  # prepare dataset
+  Sepal.Length <- c(2.0, 1.5, 1.8, 3.4, 5.1, 1.8, 1.0, 2.3)
+  Sepal.Width <- c(2.1, 2.3, 5.4, 4.7, 3.1, 2.1, 3.1, 5.5)
+  Petal.Length <- c(1.8, 2.1, 7.1, 2.5, 3.7, 6.3, 2.2, 7.2)
+  Species <- c("setosa", "versicolor", "versicolor", "versicolor", "virginica", "virginica",
+               "versicolor", "virginica")
+  dataset <- data.frame(Sepal.Length, Sepal.Width, Petal.Length, Species, stringsAsFactors = TRUE)
+
   # gaussian family
-  training <- suppressWarnings(createDataFrame(iris))
+  training <- suppressWarnings(createDataFrame(dataset))
   stats <- summary(spark.glm(training, Sepal_Width ~ Sepal_Length + Species))
-  rStats <- summary(glm(Sepal.Width ~ Sepal.Length + Species, data = iris))
+  rStats <- summary(glm(Sepal.Width ~ Sepal.Length + Species, data = dataset))
 
   # test summary coefficients return matrix type
   expect_true(class(stats$coefficients) == "matrix")
@@ -126,15 +134,15 @@ test_that("spark.glm summary", {
 
   out <- capture.output(print(stats))
   expect_match(out[2], "Deviance Residuals:")
-  expect_true(any(grepl("AIC: 59.22", out)))
+  expect_true(any(grepl("AIC: 35.84", out)))
 
   # binomial family
-  df <- suppressWarnings(createDataFrame(iris))
+  df <- suppressWarnings(createDataFrame(dataset))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
   stats <- summary(spark.glm(training, Species ~ Sepal_Length + Sepal_Width,
                              family = binomial(link = "logit")))
 
-  rTraining <- iris[iris$Species %in% c("versicolor", "virginica"), ]
+  rTraining <- dataset[dataset$Species %in% c("versicolor", "virginica"), ]
   rStats <- summary(glm(Species ~ Sepal.Length + Sepal.Width, data = rTraining,
                         family = binomial(link = "logit")))
 
@@ -174,17 +182,17 @@ test_that("spark.glm summary", {
   expect_equal(stats$aic, rStats$aic)
 
   # Test spark.glm works with offset
-  training <- suppressWarnings(createDataFrame(iris))
+  training <- suppressWarnings(createDataFrame(dataset))
   stats <- summary(spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
                              family = poisson(), offsetCol = "Petal_Length"))
   rStats <- suppressWarnings(summary(glm(Sepal.Width ~ Sepal.Length + Species,
-                        data = iris, family = poisson(), offset = iris$Petal.Length)))
+                        data = dataset, family = poisson(), offset = dataset$Petal.Length)))
   expect_true(all(abs(rStats$coefficients - stats$coefficients) < 1e-3))
 
   # Test summary works on base GLM models
-  baseModel <- stats::glm(Sepal.Width ~ Sepal.Length + Species, data = iris)
+  baseModel <- stats::glm(Sepal.Width ~ Sepal.Length + Species, data = dataset)
   baseSummary <- summary(baseModel)
-  expect_true(abs(baseSummary$deviance - 12.19313) < 1e-4)
+  expect_true(abs(baseSummary$deviance - 11.84013) < 1e-4)
 
   # Test spark.glm works with regularization parameter
   data <- as.data.frame(cbind(a1, a2, b))
@@ -300,11 +308,19 @@ test_that("glm and predict", {
 })
 
 test_that("glm summary", {
+  # prepare dataset
+  Sepal.Length <- c(2.0, 1.5, 1.8, 3.4, 5.1, 1.8, 1.0, 2.3)
+  Sepal.Width <- c(2.1, 2.3, 5.4, 4.7, 3.1, 2.1, 3.1, 5.5)
+  Petal.Length <- c(1.8, 2.1, 7.1, 2.5, 3.7, 6.3, 2.2, 7.2)
+  Species <- c("setosa", "versicolor", "versicolor", "versicolor", "virginica", "virginica",
+               "versicolor", "virginica")
+  dataset <- data.frame(Sepal.Length, Sepal.Width, Petal.Length, Species, stringsAsFactors = TRUE)
+
   # gaussian family
-  training <- suppressWarnings(createDataFrame(iris))
+  training <- suppressWarnings(createDataFrame(dataset))
   stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training))
 
-  rStats <- summary(glm(Sepal.Width ~ Sepal.Length + Species, data = iris))
+  rStats <- summary(glm(Sepal.Width ~ Sepal.Length + Species, data = dataset))
 
   coefs <- stats$coefficients
   rCoefs <- rStats$coefficients
@@ -320,12 +336,12 @@ test_that("glm summary", {
   expect_equal(stats$aic, rStats$aic)
 
   # binomial family
-  df <- suppressWarnings(createDataFrame(iris))
+  df <- suppressWarnings(createDataFrame(dataset))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
   stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training,
                        family = binomial(link = "logit")))
 
-  rTraining <- iris[iris$Species %in% c("versicolor", "virginica"), ]
+  rTraining <- dataset[dataset$Species %in% c("versicolor", "virginica"), ]
   rStats <- summary(glm(Species ~ Sepal.Length + Sepal.Width, data = rTraining,
                         family = binomial(link = "logit")))
 
diff --git a/docs/ml-features.md b/docs/ml-features.md
index a140bc6e7a22..33373e094656 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -585,11 +585,13 @@ for more details on the API.
 ## StringIndexer
 
 `StringIndexer` encodes a string column of labels to a column of label indices.
-The indices are in `[0, numLabels)`, and four ordering options are supported:
+`StringIndexer` can encode multiple columns. The indices are in `[0, numLabels)`, and four ordering options are supported:
 "frequencyDesc": descending order by label frequency (most frequent label assigned 0),
 "frequencyAsc": ascending order by label frequency (least frequent label assigned 0),
 "alphabetDesc": descending alphabetical order, and "alphabetAsc": ascending alphabetical order 
-(default = "frequencyDesc").
+(default = "frequencyDesc"). Note that in case of equal frequency when under
+"frequencyDesc"/"frequencyAsc", the strings are further sorted by alphabet.
+
 The unseen labels will be put at index numLabels if user chooses to keep them.
 If the input column is numeric, we cast it to string and index the string
 values. When downstream pipeline components such as `Estimator` or
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 57d4e1fe9d33..cffe41940eed 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -110,6 +110,15 @@ and the migration guide below will explain all changes between releases.
 
 * `OneHotEncoder` which is deprecated in 2.3, is removed in 3.0 and `OneHotEncoderEstimator` is now renamed to `OneHotEncoder`.
 
+### Changes of behavior
+
+* [SPARK-11215](https://issues.apache.org/jira/browse/SPARK-11215):
+ In Spark 2.4 and previous versions, when specifying `frequencyDesc` or `frequencyAsc` as
+ `stringOrderType` param in `StringIndexer`, in case of equal frequency, the order of
+ strings is undefined. Since Spark 3.0, the strings with equal frequency are further
+ sorted by alphabet. And since Spark 3.0, `StringIndexer` supports encoding multiple
+ columns.
+
 ## From 2.2 to 2.3
 
 ### Breaking changes
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index a833d8b270cf..f2e6012050af 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -26,18 +26,22 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model, Transformer}
 import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.{Column, DataFrame, Dataset, Encoder, Encoders, Row}
+import org.apache.spark.sql.catalyst.expressions.{If, Literal}
+import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.VersionUtils.majorMinorVersion
 import org.apache.spark.util.collection.OpenHashMap
 
 /**
  * Base trait for [[StringIndexer]] and [[StringIndexerModel]].
  */
 private[feature] trait StringIndexerBase extends Params with HasHandleInvalid with HasInputCol
-  with HasOutputCol {
+  with HasOutputCol with HasInputCols with HasOutputCols {
 
   /**
    * Param for how to handle invalid data (unseen labels or NULL values).
@@ -66,6 +70,9 @@ private[feature] trait StringIndexerBase extends Params with HasHandleInvalid wi
    *   - 'alphabetAsc': ascending alphabetical order
    * Default is 'frequencyDesc'.
    *
+   * Note: In case of equal frequency when under frequencyDesc/Asc, the strings are further sorted
+   *       alphabetically.
+   *
    * @group param
    */
   @Since("2.3.0")
@@ -79,26 +86,56 @@ private[feature] trait StringIndexerBase extends Params with HasHandleInvalid wi
   @Since("2.3.0")
   def getStringOrderType: String = $(stringOrderType)
 
-  /** Validates and transforms the input schema. */
-  protected def validateAndTransformSchema(schema: StructType): StructType = {
-    val inputColName = $(inputCol)
+  /** Returns the input and output column names corresponding in pair. */
+  private[feature] def getInOutCols(): (Array[String], Array[String]) = {
+    ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol), Seq(outputCols))
+
+    if (isSet(inputCol)) {
+      (Array($(inputCol)), Array($(outputCol)))
+    } else {
+      require($(inputCols).length == $(outputCols).length,
+        "The number of input columns does not match output columns")
+      ($(inputCols), $(outputCols))
+    }
+  }
+
+  private def validateAndTransformField(
+      schema: StructType,
+      inputColName: String,
+      outputColName: String): StructField = {
     val inputDataType = schema(inputColName).dataType
     require(inputDataType == StringType || inputDataType.isInstanceOf[NumericType],
       s"The input column $inputColName must be either string type or numeric type, " +
         s"but got $inputDataType.")
-    val inputFields = schema.fields
-    val outputColName = $(outputCol)
-    require(inputFields.forall(_.name != outputColName),
+    require(schema.fields.forall(_.name != outputColName),
       s"Output column $outputColName already exists.")
-    val attr = NominalAttribute.defaultAttr.withName($(outputCol))
-    val outputFields = inputFields :+ attr.toStructField()
-    StructType(outputFields)
+    NominalAttribute.defaultAttr.withName($(outputCol)).toStructField()
+  }
+
+  /** Validates and transforms the input schema. */
+  protected def validateAndTransformSchema(
+      schema: StructType,
+      skipNonExistsCol: Boolean = false): StructType = {
+    val (inputColNames, outputColNames) = getInOutCols()
+
+    require(outputColNames.distinct.length == outputColNames.length,
+      s"Output columns should not be duplicate.")
+
+    val outputFields = inputColNames.zip(outputColNames).flatMap {
+      case (inputColName, outputColName) =>
+        schema.fieldNames.contains(inputColName) match {
+          case true => Some(validateAndTransformField(schema, inputColName, outputColName))
+          case false if skipNonExistsCol => None
+          case _ => throw new SparkException(s"Input column $inputColName does not exist.")
+        }
+    }
+    StructType(schema.fields ++ outputFields)
   }
 }
 
 /**
- * A label indexer that maps a string column of labels to an ML column of label indices.
- * If the input column is numeric, we cast it to string and index the string values.
+ * A label indexer that maps string column(s) of labels to ML column(s) of label indices.
+ * If the input columns are numeric, we cast them to string and index the string values.
  * The indices are in [0, numLabels). By default, this is ordered by label frequencies
  * so the most frequent label gets index 0. The ordering behavior is controlled by
  * setting `stringOrderType`.
@@ -130,21 +167,86 @@ class StringIndexer @Since("1.4.0") (
   @Since("1.4.0")
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCols(value: Array[String]): this.type = set(inputCols, value)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
+
+  private def countByValue(
+      dataset: Dataset[_],
+      inputCols: Array[String]): Array[OpenHashMap[String, Long]] = {
+
+    val aggregator = new StringIndexerAggregator(inputCols.length)
+    implicit val encoder = Encoders.kryo[Array[OpenHashMap[String, Long]]]
+
+    val selectedCols = inputCols.map { colName =>
+      val col = dataset.col(colName)
+      if (col.expr.dataType == StringType) {
+        col
+      } else {
+        // We don't count for NaN values. Because `StringIndexerAggregator` only processes strings,
+        // we replace NaNs with null in advance.
+        new Column(If(col.isNaN.expr, Literal(null), col.expr)).cast(StringType)
+      }
+    }
+
+    dataset.select(selectedCols: _*)
+      .toDF
+      .groupBy().agg(aggregator.toColumn)
+      .as[Array[OpenHashMap[String, Long]]]
+      .collect()(0)
+  }
+
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): StringIndexerModel = {
     transformSchema(dataset.schema, logging = true)
-    val values = dataset.na.drop(Array($(inputCol)))
-      .select(col($(inputCol)).cast(StringType))
-      .rdd.map(_.getString(0))
-    val labels = $(stringOrderType) match {
-      case StringIndexer.frequencyDesc => values.countByValue().toSeq.sortBy(-_._2)
-        .map(_._1).toArray
-      case StringIndexer.frequencyAsc => values.countByValue().toSeq.sortBy(_._2)
-        .map(_._1).toArray
-      case StringIndexer.alphabetDesc => values.distinct.collect.sortWith(_ > _)
-      case StringIndexer.alphabetAsc => values.distinct.collect.sortWith(_ < _)
-    }
-    copyValues(new StringIndexerModel(uid, labels).setParent(this))
+
+    val (inputCols, _) = getInOutCols()
+
+    // If input dataset is not originally cached, we need to unpersist it
+    // once we persist it later.
+    val needUnpersist = dataset.storageLevel == StorageLevel.NONE
+
+    // In case of equal frequency when frequencyDesc/Asc, the strings are further sorted
+    // alphabetically.
+    val labelsArray = $(stringOrderType) match {
+      case StringIndexer.frequencyDesc =>
+        val sortFunc = StringIndexer.getSortFunc(ascending = false)
+        countByValue(dataset, inputCols).map { counts =>
+          counts.toSeq.sortWith(sortFunc).map(_._1).toArray
+        }
+      case StringIndexer.frequencyAsc =>
+        val sortFunc = StringIndexer.getSortFunc(ascending = true)
+        countByValue(dataset, inputCols).map { counts =>
+          counts.toSeq.sortWith(sortFunc).map(_._1).toArray
+        }
+      case StringIndexer.alphabetDesc =>
+        import dataset.sparkSession.implicits._
+        dataset.persist()
+        val labels = inputCols.map { inputCol =>
+          dataset.select(inputCol).na.drop().distinct().sort(dataset(s"$inputCol").desc)
+            .as[String].collect()
+        }
+        if (needUnpersist) {
+          dataset.unpersist()
+        }
+        labels
+      case StringIndexer.alphabetAsc =>
+        import dataset.sparkSession.implicits._
+        dataset.persist()
+        val labels = inputCols.map { inputCol =>
+          dataset.select(inputCol).na.drop().distinct().sort(dataset(s"$inputCol").asc)
+            .as[String].collect()
+        }
+        if (needUnpersist) {
+          dataset.unpersist()
+        }
+        labels
+     }
+    copyValues(new StringIndexerModel(uid, labelsArray).setParent(this))
   }
 
   @Since("1.4.0")
@@ -172,37 +274,76 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
 
   @Since("1.6.0")
   override def load(path: String): StringIndexer = super.load(path)
+
+  // Returns a function used to sort strings by frequency (ascending or descending).
+  // In case of equal frequency, it sorts strings by alphabet (ascending).
+  private[feature] def getSortFunc(
+      ascending: Boolean): ((String, Long), (String, Long)) => Boolean = {
+    if (ascending) {
+      case ((strA: String, freqA: Long), (strB: String, freqB: Long)) =>
+        if (freqA == freqB) {
+          strA < strB
+        } else {
+          freqA < freqB
+        }
+    } else {
+      case ((strA: String, freqA: Long), (strB: String, freqB: Long)) =>
+        if (freqA == freqB) {
+          strA < strB
+        } else {
+          freqA > freqB
+        }
+    }
+  }
 }
 
 /**
  * Model fitted by [[StringIndexer]].
  *
- * @param labels  Ordered list of labels, corresponding to indices to be assigned.
+ * @param labelsArray Array of ordered list of labels, corresponding to indices to be assigned
+ *                    for each input column.
  *
- * @note During transformation, if the input column does not exist,
- * `StringIndexerModel.transform` would return the input dataset unmodified.
+ * @note During transformation, if any input column does not exist,
+ * `StringIndexerModel.transform` would skip the input column.
+ * If all input columns do not exist, it returns the input dataset unmodified.
  * This is a temporary fix for the case when target labels do not exist during prediction.
  */
 @Since("1.4.0")
 class StringIndexerModel (
     @Since("1.4.0") override val uid: String,
-    @Since("1.5.0") val labels: Array[String])
+    @Since("3.0.0") val labelsArray: Array[Array[String]])
   extends Model[StringIndexerModel] with StringIndexerBase with MLWritable {
 
   import StringIndexerModel._
 
   @Since("1.5.0")
-  def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), labels)
-
-  private val labelToIndex: OpenHashMap[String, Double] = {
-    val n = labels.length
-    val map = new OpenHashMap[String, Double](n)
-    var i = 0
-    while (i < n) {
-      map.update(labels(i), i)
-      i += 1
+  def this(uid: String, labels: Array[String]) = this(uid, Array(labels))
+
+  @Since("1.5.0")
+  def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), Array(labels))
+
+  @Since("3.0.0")
+  def this(labelsArray: Array[Array[String]]) = this(Identifiable.randomUID("strIdx"), labelsArray)
+
+  @deprecated("`labels` is deprecated and will be removed in 3.1.0. Use `labelsArray` " +
+    "instead.", "3.0.0")
+  @Since("1.5.0")
+  def labels: Array[String] = {
+    require(labelsArray.length == 1, "This StringIndexerModel is fit on multiple columns. " +
+      "Call `labelsArray` instead.")
+    labelsArray(0)
+  }
+
+  // Prepares the maps for string values to corresponding index values.
+  private val labelsToIndexArray: Array[OpenHashMap[String, Double]] = {
+    for (labels <- labelsArray) yield {
+      val n = labels.length
+      val map = new OpenHashMap[String, Double](n)
+      labels.zipWithIndex.foreach { case (label, idx) =>
+        map.update(label, idx)
+      }
+      map
     }
-    map
   }
 
   /** @group setParam */
@@ -217,33 +358,39 @@ class StringIndexerModel (
   @Since("1.4.0")
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
-  @Since("2.0.0")
-  override def transform(dataset: Dataset[_]): DataFrame = {
-    if (!dataset.schema.fieldNames.contains($(inputCol))) {
-      logInfo(s"Input column ${$(inputCol)} does not exist during transformation. " +
-        "Skip StringIndexerModel.")
-      return dataset.toDF
-    }
-    transformSchema(dataset.schema, logging = true)
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCols(value: Array[String]): this.type = set(inputCols, value)
 
-    val filteredLabels = getHandleInvalid match {
-      case StringIndexer.KEEP_INVALID => labels :+ "__unknown"
-      case _ => labels
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
+
+  // This filters out any null values and also the input labels which are not in
+  // the dataset used for fitting.
+  private def filterInvalidData(dataset: Dataset[_], inputColNames: Seq[String]): Dataset[_] = {
+    val conditions: Seq[Column] = (0 until inputColNames.length).map { i =>
+      val inputColName = inputColNames(i)
+      val labelToIndex = labelsToIndexArray(i)
+      // We have this additional lookup at `labelToIndex` when `handleInvalid` is set to
+      // `StringIndexer.SKIP_INVALID`. Another idea is to do this lookup natively by SQL
+      // expression, however, lookup for a key in a map is not efficient in SparkSQL now.
+      // See `ElementAt` and `GetMapValue` expressions. If SQL's map lookup is improved,
+      // we can consider to change this.
+      val filter = udf { label: String =>
+        labelToIndex.contains(label)
+      }
+      filter(dataset(inputColName))
     }
 
-    val metadata = NominalAttribute.defaultAttr
-      .withName($(outputCol)).withValues(filteredLabels).toMetadata()
-    // If we are skipping invalid records, filter them out.
-    val (filteredDataset, keepInvalid) = $(handleInvalid) match {
-      case StringIndexer.SKIP_INVALID =>
-        val filterer = udf { label: String =>
-          labelToIndex.contains(label)
-        }
-        (dataset.na.drop(Array($(inputCol))).where(filterer(dataset($(inputCol)))), false)
-      case _ => (dataset, getHandleInvalid == StringIndexer.KEEP_INVALID)
-    }
+    dataset.na.drop(inputColNames.filter(dataset.schema.fieldNames.contains(_)))
+      .where(conditions.reduce(_ and _))
+  }
 
-    val indexer = udf { label: String =>
+  private def getIndexer(labels: Seq[String], labelToIndex: OpenHashMap[String, Double]) = {
+    val keepInvalid = (getHandleInvalid == StringIndexer.KEEP_INVALID)
+
+    udf { label: String =>
       if (label == null) {
         if (keepInvalid) {
           labels.length
@@ -257,29 +404,73 @@ class StringIndexerModel (
         } else if (keepInvalid) {
           labels.length
         } else {
-          throw new SparkException(s"Unseen label: $label.  To handle unseen labels, " +
+          throw new SparkException(s"Unseen label: $label. To handle unseen labels, " +
             s"set Param handleInvalid to ${StringIndexer.KEEP_INVALID}.")
         }
       }
     }.asNondeterministic()
+  }
 
-    filteredDataset.select(col("*"),
-      indexer(dataset($(inputCol)).cast(StringType)).as($(outputCol), metadata))
+  @Since("2.0.0")
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    transformSchema(dataset.schema, logging = true)
+
+    var (inputColNames, outputColNames) = getInOutCols()
+    val outputColumns = new Array[Column](outputColNames.length)
+
+    // Skips invalid rows if `handleInvalid` is set to `StringIndexer.SKIP_INVALID`.
+    val filteredDataset = if (getHandleInvalid == StringIndexer.SKIP_INVALID) {
+      filterInvalidData(dataset, inputColNames)
+    } else {
+      dataset
+    }
+
+    for (i <- 0 until outputColNames.length) {
+      val inputColName = inputColNames(i)
+      val outputColName = outputColNames(i)
+      val labelToIndex = labelsToIndexArray(i)
+      val labels = labelsArray(i)
+
+      if (!dataset.schema.fieldNames.contains(inputColName)) {
+        logWarning(s"Input column ${inputColName} does not exist during transformation. " +
+          "Skip StringIndexerModel for this column.")
+        outputColNames(i) = null
+      } else {
+        val filteredLabels = getHandleInvalid match {
+          case StringIndexer.KEEP_INVALID => labels :+ "__unknown"
+          case _ => labels
+        }
+        val metadata = NominalAttribute.defaultAttr
+          .withName(outputColName)
+          .withValues(filteredLabels)
+          .toMetadata()
+
+        val indexer = getIndexer(labels, labelToIndex)
+
+        outputColumns(i) = indexer(dataset(inputColName).cast(StringType))
+          .as(outputColName, metadata)
+      }
+    }
+
+    val filteredOutputColNames = outputColNames.filter(_ != null)
+    val filteredOutputColumns = outputColumns.filter(_ != null)
+
+    require(filteredOutputColNames.length == filteredOutputColumns.length)
+    if (filteredOutputColNames.length > 0) {
+      filteredDataset.withColumns(filteredOutputColNames, filteredOutputColumns)
+    } else {
+      filteredDataset.toDF()
+    }
   }
 
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
-    if (schema.fieldNames.contains($(inputCol))) {
-      validateAndTransformSchema(schema)
-    } else {
-      // If the input column does not exist during transformation, we skip StringIndexerModel.
-      schema
-    }
+    validateAndTransformSchema(schema, skipNonExistsCol = true)
   }
 
   @Since("1.4.1")
   override def copy(extra: ParamMap): StringIndexerModel = {
-    val copied = new StringIndexerModel(uid, labels)
+    val copied = new StringIndexerModel(uid, labelsArray)
     copyValues(copied, extra).setParent(parent)
   }
 
@@ -293,11 +484,11 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
   private[StringIndexerModel]
   class StringIndexModelWriter(instance: StringIndexerModel) extends MLWriter {
 
-    private case class Data(labels: Array[String])
+    private case class Data(labelsArray: Array[Array[String]])
 
     override protected def saveImpl(path: String): Unit = {
       DefaultParamsWriter.saveMetadata(instance, path, sc)
-      val data = Data(instance.labels)
+      val data = Data(instance.labelsArray)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
     }
@@ -310,11 +501,25 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
     override def load(path: String): StringIndexerModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
       val dataPath = new Path(path, "data").toString
-      val data = sparkSession.read.parquet(dataPath)
-        .select("labels")
-        .head()
-      val labels = data.getAs[Seq[String]](0).toArray
-      val model = new StringIndexerModel(metadata.uid, labels)
+
+      // We support loading old `StringIndexerModel` saved by previous Spark versions.
+      // Previous model has `labels`, but new model has `labelsArray`.
+      val (majorVersion, minorVersion) = majorMinorVersion(metadata.sparkVersion)
+      val labelsArray = if (majorVersion < 3) {
+        // Spark 2.4 and before.
+        val data = sparkSession.read.parquet(dataPath)
+          .select("labels")
+          .head()
+        val labels = data.getAs[Seq[String]](0).toArray
+        Array(labels)
+      } else {
+        // After Spark 3.0.
+        val data = sparkSession.read.parquet(dataPath)
+          .select("labelsArray")
+          .head()
+        data.getAs[Seq[Seq[String]]](0).map(_.toArray).toArray
+      }
+      val model = new StringIndexerModel(metadata.uid, labelsArray)
       metadata.getAndSetParams(model)
       model
     }
@@ -421,3 +626,47 @@ object IndexToString extends DefaultParamsReadable[IndexToString] {
   @Since("1.6.0")
   override def load(path: String): IndexToString = super.load(path)
 }
+
+/**
+ * A SQL `Aggregator` used by `StringIndexer` to count labels in string columns during fitting.
+ */
+private class StringIndexerAggregator(numColumns: Int)
+  extends Aggregator[Row, Array[OpenHashMap[String, Long]], Array[OpenHashMap[String, Long]]] {
+
+  override def zero: Array[OpenHashMap[String, Long]] =
+    Array.fill(numColumns)(new OpenHashMap[String, Long]())
+
+  def reduce(
+      array: Array[OpenHashMap[String, Long]],
+      row: Row): Array[OpenHashMap[String, Long]] = {
+    for (i <- 0 until numColumns) {
+      val stringValue = row.getString(i)
+      // We don't count for null values.
+      if (stringValue != null) {
+        array(i).changeValue(stringValue, 1L, _ + 1)
+      }
+    }
+    array
+  }
+
+  def merge(
+      array1: Array[OpenHashMap[String, Long]],
+      array2: Array[OpenHashMap[String, Long]]): Array[OpenHashMap[String, Long]] = {
+    for (i <- 0 until numColumns) {
+      array2(i).foreach { case (key: String, count: Long) =>
+        array1(i).changeValue(key, count, _ + count)
+      }
+    }
+    array1
+  }
+
+  def finish(array: Array[OpenHashMap[String, Long]]): Array[OpenHashMap[String, Long]] = array
+
+  override def bufferEncoder: Encoder[Array[OpenHashMap[String, Long]]] = {
+    Encoders.kryo[Array[OpenHashMap[String, Long]]]
+  }
+
+  override def outputEncoder: Encoder[Array[OpenHashMap[String, Long]]] = {
+    Encoders.kryo[Array[OpenHashMap[String, Long]]]
+  }
+}
diff --git a/mllib/src/test/resources/test-data/strIndexerModel/data/part-00000-cfefeb56-2980-4c42-b8a7-a5a94265c479-c000.snappy.parquet b/mllib/src/test/resources/test-data/strIndexerModel/data/part-00000-cfefeb56-2980-4c42-b8a7-a5a94265c479-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..917984c2608be1128fcbf635ee995d88fcc0029d
GIT binary patch
literal 478
zcmX|;O-sW-5QaCYtUdI)A%Q)Vg_ag-Ncv?Af_f7X5%DG>lVsK4W?QpcMM{5zcYm#u
zT6-C0-<f%NX6N?eE@p%!EFCe531yVe$-v2d2M8hDcQLkT%c19I#KAmkLf!;mZg73s
zF-xf5o=pBKD^$qL^{IkPD#Khx$?U*CP^Gb0VV$1qA4(-FscnxC&kcCM`)$rTL66WN
z+0AYH|NEHhQBQlze*1Kf6>tP4=!hQLb4kGc;i7&CVF^ePZh=Dcu0kYxUr8I*i_fr<
z77DQ7e>-bozm`&@!q|G1TQL&PrBsCx>BCxRsG8E>3RwDU-H$+B!xzfCHSYIDrG7y<
zUuX-ZHa9D!M2hxuGZGpfkSVlngpv415$SrfdE5~HY6p5$)!h$Sn{uJMaTrHo)E#82
yQWf2362p8xn?})zoMg#38b?t)hB%o{rui%xXT|X}Kc2ua;2gi@2k&!>m;D9ek9wg1

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/test-data/strIndexerModel/metadata/part-00000 b/mllib/src/test/resources/test-data/strIndexerModel/metadata/part-00000
new file mode 100644
index 000000000000..5650199c36dc
--- /dev/null
+++ b/mllib/src/test/resources/test-data/strIndexerModel/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.feature.StringIndexerModel","timestamp":1545536052048,"sparkVersion":"2.4.1-SNAPSHOT","uid":"strIdx_056bb5da1bf2","paramMap":{"outputCol":"index","inputCol":"str"},"defaultParamMap":{"outputCol":"strIdx_056bb5da1bf2__output","stringOrderType":"frequencyDesc","handleInvalid":"error"}}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
index 0de6528c4cf2..675e7b631172 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
@@ -137,14 +137,17 @@ class RFormulaSuite extends MLTest with DefaultReadWriteTest {
 
   test("encodes string terms") {
     val formula = new RFormula().setFormula("id ~ a + b")
-    val original = Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5))
+    val original = Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5),
+      (5, "bar", 6), (6, "foo", 6))
       .toDF("id", "a", "b")
     val model = formula.fit(original)
     val expected = Seq(
         (1, "foo", 4, Vectors.dense(0.0, 1.0, 4.0), 1.0),
         (2, "bar", 4, Vectors.dense(1.0, 0.0, 4.0), 2.0),
         (3, "bar", 5, Vectors.dense(1.0, 0.0, 5.0), 3.0),
-        (4, "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 4.0)
+        (4, "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 4.0),
+        (5, "bar", 6, Vectors.dense(1.0, 0.0, 6.0), 5.0),
+        (6, "foo", 6, Vectors.dense(0.0, 1.0, 6.0), 6.0)
       ).toDF("id", "a", "b", "features", "label")
     testRFormulaTransform[(Int, String, Int)](original, model, expected)
   }
@@ -303,7 +306,8 @@ class RFormulaSuite extends MLTest with DefaultReadWriteTest {
   test("index string label") {
     val formula = new RFormula().setFormula("id ~ a + b")
     val original =
-      Seq(("male", "foo", 4), ("female", "bar", 4), ("female", "bar", 5), ("male", "baz", 5))
+      Seq(("male", "foo", 4), ("female", "bar", 4), ("female", "bar", 5), ("male", "baz", 5),
+        ("female", "bar", 6), ("female", "foo", 6))
         .toDF("id", "a", "b")
     val model = formula.fit(original)
     val attr = NominalAttribute.defaultAttr
@@ -311,7 +315,9 @@ class RFormulaSuite extends MLTest with DefaultReadWriteTest {
         ("male", "foo", 4, Vectors.dense(0.0, 1.0, 4.0), 1.0),
         ("female", "bar", 4, Vectors.dense(1.0, 0.0, 4.0), 0.0),
         ("female", "bar", 5, Vectors.dense(1.0, 0.0, 5.0), 0.0),
-        ("male", "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 1.0)
+        ("male", "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 1.0),
+        ("female", "bar", 6, Vectors.dense(1.0, 0.0, 6.0), 0.0),
+        ("female", "foo", 6, Vectors.dense(0.0, 1.0, 6.0), 0.0)
     ).toDF("id", "a", "b", "features", "label")
       .select($"id", $"a", $"b", $"features", $"label".as("label", attr.toMetadata()))
     testRFormulaTransform[(String, String, Int)](original, model, expected)
@@ -320,7 +326,8 @@ class RFormulaSuite extends MLTest with DefaultReadWriteTest {
   test("force to index label even it is numeric type") {
     val formula = new RFormula().setFormula("id ~ a + b").setForceIndexLabel(true)
     val original = spark.createDataFrame(
-      Seq((1.0, "foo", 4), (1.0, "bar", 4), (0.0, "bar", 5), (1.0, "baz", 5))
+      Seq((1.0, "foo", 4), (1.0, "bar", 4), (0.0, "bar", 5), (1.0, "baz", 5),
+      (1.0, "bar", 6), (0.0, "foo", 6))
     ).toDF("id", "a", "b")
     val model = formula.fit(original)
     val attr = NominalAttribute.defaultAttr
@@ -328,7 +335,9 @@ class RFormulaSuite extends MLTest with DefaultReadWriteTest {
         (1.0, "foo", 4, Vectors.dense(0.0, 1.0, 4.0), 0.0),
         (1.0, "bar", 4, Vectors.dense(1.0, 0.0, 4.0), 0.0),
         (0.0, "bar", 5, Vectors.dense(1.0, 0.0, 5.0), 1.0),
-        (1.0, "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 0.0))
+        (1.0, "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 0.0),
+        (1.0, "bar", 6, Vectors.dense(1.0, 0.0, 6.0), 0.0),
+        (0.0, "foo", 6, Vectors.dense(0.0, 1.0, 6.0), 1.0))
       .toDF("id", "a", "b", "features", "label")
       .select($"id", $"a", $"b", $"features", $"label".as("label", attr.toMetadata()))
     testRFormulaTransform[(Double, String, Int)](original, model, expected)
@@ -336,14 +345,17 @@ class RFormulaSuite extends MLTest with DefaultReadWriteTest {
 
   test("attribute generation") {
     val formula = new RFormula().setFormula("id ~ a + b")
-    val original = Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5))
+    val original = Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5),
+      (1, "bar", 6), (0, "foo", 6))
       .toDF("id", "a", "b")
     val model = formula.fit(original)
     val expected = Seq(
       (1, "foo", 4, Vectors.dense(0.0, 1.0, 4.0), 1.0),
       (2, "bar", 4, Vectors.dense(1.0, 0.0, 4.0), 2.0),
       (3, "bar", 5, Vectors.dense(1.0, 0.0, 5.0), 3.0),
-      (4, "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 4.0))
+      (4, "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 4.0),
+      (1, "bar", 6, Vectors.dense(1.0, 0.0, 6.0), 1.0),
+      (0, "foo", 6, Vectors.dense(0.0, 1.0, 6.0), 0.0))
       .toDF("id", "a", "b", "features", "label")
     val expectedAttrs = new AttributeGroup(
       "features",
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index df2436717701..f542e342ffaa 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -30,12 +30,46 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest {
 
   test("params") {
     ParamsSuite.checkParams(new StringIndexer)
-    val model = new StringIndexerModel("indexer", Array("a", "b"))
+    val model = new StringIndexerModel("indexer", Array(Array("a", "b")))
     val modelWithoutUid = new StringIndexerModel(Array("a", "b"))
     ParamsSuite.checkParams(model)
     ParamsSuite.checkParams(modelWithoutUid)
   }
 
+  test("params: input/output columns") {
+    val stringIndexerSingleCol = new StringIndexer()
+      .setInputCol("in").setOutputCol("out")
+    val inOutCols1 = stringIndexerSingleCol.getInOutCols()
+    assert(inOutCols1._1 === Array("in"))
+    assert(inOutCols1._2 === Array("out"))
+
+    val stringIndexerMultiCol = new StringIndexer()
+      .setInputCols(Array("in1", "in2")).setOutputCols(Array("out1", "out2"))
+    val inOutCols2 = stringIndexerMultiCol.getInOutCols()
+    assert(inOutCols2._1 === Array("in1", "in2"))
+    assert(inOutCols2._2 === Array("out1", "out2"))
+
+
+    val df = Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")).toDF("id", "label")
+
+    intercept[IllegalArgumentException] {
+      new StringIndexer().setInputCol("in").setOutputCols(Array("out1", "out2")).fit(df)
+    }
+    intercept[IllegalArgumentException] {
+      new StringIndexer().setInputCols(Array("in1", "in2")).setOutputCol("out1").fit(df)
+    }
+    intercept[IllegalArgumentException] {
+      new StringIndexer().setInputCols(Array("in1", "in2"))
+        .setOutputCols(Array("out1", "out2", "out3"))
+        .fit(df)
+    }
+    intercept[IllegalArgumentException] {
+      new StringIndexer().setInputCols(Array("in1", "in2"))
+        .setOutputCols(Array("out1", "out1"))
+        .fit(df)
+    }
+  }
+
   test("StringIndexer") {
     val data = Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
     val df = data.toDF("id", "label")
@@ -51,7 +85,7 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest {
       (2, 1.0),
       (3, 0.0),
       (4, 0.0),
-       (5, 1.0)
+      (5, 1.0)
     ).toDF("id", "labelIndex")
 
     testTransformerByGlobalCheckFunc[(Int, String)](df, indexerModel, "id", "labelIndex") { rows =>
@@ -167,7 +201,7 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("StringIndexerModel should keep silent if the input column does not exist.") {
-    val indexerModel = new StringIndexerModel("indexer", Array("a", "b", "c"))
+    val indexerModel = new StringIndexerModel("indexer", Array(Array("a", "b", "c")))
       .setInputCol("label")
       .setOutputCol("labelIndex")
     val df = spark.range(0L, 10L).toDF()
@@ -207,7 +241,7 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("StringIndexerModel read/write") {
-    val instance = new StringIndexerModel("myStringIndexerModel", Array("a", "b", "c"))
+    val instance = new StringIndexerModel("myStringIndexerModel", Array(Array("a", "b", "c")))
       .setInputCol("myInputCol")
       .setOutputCol("myOutputCol")
       .setHandleInvalid("skip")
@@ -323,11 +357,32 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("StringIndexer order types: secondary sort by alphabets when frequency equal") {
+    val data = Seq((0, "a"), (1, "a"), (2, "b"), (3, "b"), (4, "c"), (5, "d"))
+    val df = data.toDF("id", "label")
+    val indexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("labelIndex")
+
+    val expected = Seq(Set((0, 0.0), (1, 0.0), (2, 1.0), (3, 1.0), (4, 2.0), (5, 3.0)),
+      Set((0, 2.0), (1, 2.0), (2, 3.0), (3, 3.0), (4, 0.0), (5, 1.0)))
+
+    var idx = 0
+    for (orderType <- Seq("frequencyDesc", "frequencyAsc")) {
+      val transformed = indexer.setStringOrderType(orderType).fit(df).transform(df)
+      val output = transformed.select("id", "labelIndex").rdd.map { r =>
+        (r.getInt(0), r.getDouble(1))
+      }.collect().toSet
+      assert(output === expected(idx))
+      idx += 1
+    }
+  }
+
   test("SPARK-22446: StringIndexerModel's indexer UDF should not apply on filtered data") {
     val df = List(
-         ("A", "London", "StrA"),
-         ("B", "Bristol", null),
-         ("C", "New York", "StrC")).toDF("ID", "CITY", "CONTENT")
+      ("A", "London", "StrA"),
+      ("B", "Bristol", null),
+      ("C", "New York", "StrC")).toDF("ID", "CITY", "CONTENT")
 
     val dfNoBristol = df.filter($"CONTENT".isNotNull)
 
@@ -343,4 +398,74 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest {
       assert(rows.toList.count(_.getDouble(0) == 1.0) === 1)
     }
   }
+
+  test("StringIndexer multiple input columns") {
+    val data = Seq(
+      Row("a", 0.0, "e", 1.0),
+      Row("b", 2.0, "f", 0.0),
+      Row("c", 1.0, "e", 1.0),
+      Row("a", 0.0, "f", 0.0),
+      Row("a", 0.0, "f", 0.0),
+      Row("c", 1.0, "f", 0.0))
+
+    val schema = StructType(Array(
+      StructField("label1", StringType),
+      StructField("expected1", DoubleType),
+      StructField("label2", StringType),
+      StructField("expected2", DoubleType)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val indexer = new StringIndexer()
+      .setInputCols(Array("label1", "label2"))
+      .setOutputCols(Array("labelIndex1", "labelIndex2"))
+    val indexerModel = indexer.fit(df)
+
+    MLTestingUtils.checkCopyAndUids(indexer, indexerModel)
+
+    val transformed = indexerModel.transform(df)
+
+    // Checks output attribute correctness.
+    val attr1 = Attribute.fromStructField(transformed.schema("labelIndex1"))
+      .asInstanceOf[NominalAttribute]
+    assert(attr1.values.get === Array("a", "c", "b"))
+    val attr2 = Attribute.fromStructField(transformed.schema("labelIndex2"))
+      .asInstanceOf[NominalAttribute]
+    assert(attr2.values.get === Array("f", "e"))
+
+    transformed.select("labelIndex1", "expected1").rdd.map { r =>
+      (r.getDouble(0), r.getDouble(1))
+    }.collect().foreach { case (index, expected) =>
+      assert(index == expected)
+    }
+
+    transformed.select("labelIndex2", "expected2").rdd.map { r =>
+      (r.getDouble(0), r.getDouble(1))
+    }.collect().foreach { case (index, expected) =>
+      assert(index == expected)
+    }
+  }
+
+  test("Correctly skipping NULL and NaN values") {
+    val df = Seq(("a", Double.NaN), (null, 1.0), ("b", 2.0), (null, 3.0)).toDF("str", "double")
+
+    val indexer = new StringIndexer()
+      .setInputCols(Array("str", "double"))
+      .setOutputCols(Array("strIndex", "doubleIndex"))
+
+    val model = indexer.fit(df)
+    assert(model.labelsArray(0) === Array("a", "b"))
+    assert(model.labelsArray(1) === Array("1.0", "2.0", "3.0"))
+  }
+
+  test("Load StringIndexderModel prior to Spark 3.0") {
+    val modelPath = testFile("test-data/strIndexerModel")
+
+    val loadedModel = StringIndexerModel.load(modelPath)
+    assert(loadedModel.labelsArray === Array(Array("b", "c", "a")))
+
+    val metadata = spark.read.json(s"$modelPath/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr == "2.4.1-SNAPSHOT")
+  }
 }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 0cdef0074cad..a13ee51941e7 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -278,6 +278,10 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.productPrefix"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.expressions.UserDefinedFunction.copy$default$3"),
 
+    // [SPARK-11215][ML] Add multiple columns support to StringIndexer
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.StringIndexer.validateAndTransformSchema"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.StringIndexerModel.validateAndTransformSchema"),
+
     // [SPARK-26616][MLlib] Expose document frequency in IDFModel
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.IDFModel.this"),
     ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.feature.IDF#DocumentFrequencyAggregator.idf")

From 66afd869d1287ac0d069c2bf6b73712fd2dec19a Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 29 Jan 2019 07:31:42 -0800
Subject: [PATCH 0390/1072] [SPARK-26702][SQL][TEST] Create a test trait for
 Parquet and Orc test

## What changes were proposed in this pull request?

For making test suite supporting both Parquet and Orc by reusing test cases, this patch extracts the methods for testing. For example, if we need to test a common feature shared by Parquet and Orc, we should be able to write test cases once and reuse them to test both formats.

This patch extracts the methods for testing and uses a variable `dataSourceName` to set up data format to test against with.

## How was this patch tested?

Existing tests.

Closes #23628 from viirya/datasource-test.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/FileBasedDataSourceTest.scala | 106 ++++++++++++++++++
 .../datasources/orc/OrcQuerySuite.scala       |  12 +-
 .../execution/datasources/orc/OrcTest.scala   |  40 +++----
 .../datasources/parquet/ParquetTest.scala     |  46 +++-----
 4 files changed, 142 insertions(+), 62 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala
new file mode 100644
index 000000000000..bdb161d59a33
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.File
+
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.sql.{DataFrame, SaveMode}
+import org.apache.spark.sql.test.SQLTestUtils
+
+/**
+ * A helper trait that provides convenient facilities for file-based data source testing.
+ * Specifically, it is used for Parquet and Orc testing. It can be used to write tests
+ * that are shared between Parquet and Orc.
+ */
+private[sql] trait FileBasedDataSourceTest extends SQLTestUtils {
+
+  // Defines the data source name to run the test.
+  protected val dataSourceName: String
+  // The SQL config key for enabling vectorized reader.
+  protected val vectorizedReaderEnabledKey: String
+
+  /**
+   * Reads data source file from given `path` as `DataFrame` and passes it to given function.
+   *
+   * @param path           The path to file
+   * @param testVectorized Whether to read the file with vectorized reader.
+   * @param f              The given function that takes a `DataFrame` as input.
+   */
+  protected def readFile(path: String, testVectorized: Boolean = true)
+      (f: DataFrame => Unit): Unit = {
+    withSQLConf(vectorizedReaderEnabledKey -> "false") {
+      f(spark.read.format(dataSourceName).load(path.toString))
+    }
+    if (testVectorized) {
+      withSQLConf(vectorizedReaderEnabledKey -> "true") {
+        f(spark.read.format(dataSourceName).load(path.toString))
+      }
+    }
+  }
+
+  /**
+   * Writes `data` to a data source file, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   */
+  protected def withDataSourceFile[T <: Product : ClassTag : TypeTag]
+      (data: Seq[T])
+      (f: String => Unit): Unit = {
+    withTempPath { file =>
+      spark.createDataFrame(data).write.format(dataSourceName).save(file.getCanonicalPath)
+      f(file.getCanonicalPath)
+    }
+  }
+
+  /**
+   * Writes `data` to a data source file and reads it back as a [[DataFrame]],
+   * which is then passed to `f`. The file will be deleted after `f` returns.
+   */
+  protected def withDataSourceDataFrame[T <: Product : ClassTag : TypeTag]
+      (data: Seq[T], testVectorized: Boolean = true)
+      (f: DataFrame => Unit): Unit = {
+    withDataSourceFile(data)(path => readFile(path.toString, testVectorized)(f))
+  }
+
+  /**
+   * Writes `data` to a data source file, reads it back as a [[DataFrame]] and registers it as a
+   * temporary table named `tableName`, then call `f`. The temporary table together with the
+   * data file will be dropped/deleted after `f` returns.
+   */
+  protected def withDataSourceTable[T <: Product : ClassTag : TypeTag]
+      (data: Seq[T], tableName: String, testVectorized: Boolean = true)
+      (f: => Unit): Unit = {
+    withDataSourceDataFrame(data, testVectorized) { df =>
+      df.createOrReplaceTempView(tableName)
+      withTempView(tableName)(f)
+    }
+  }
+
+  protected def makeDataSourceFile[T <: Product : ClassTag : TypeTag](
+      data: Seq[T], path: File): Unit = {
+    spark.createDataFrame(data).write.mode(SaveMode.Overwrite).format(dataSourceName)
+      .save(path.getCanonicalPath)
+  }
+
+  protected def makeDataSourceFile[T <: Product : ClassTag : TypeTag](
+      df: DataFrame, path: File): Unit = {
+    df.write.mode(SaveMode.Overwrite).format(dataSourceName).save(path.getCanonicalPath)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index d0b386b88c59..c0d26011e791 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -270,10 +270,16 @@ abstract class OrcQueryTest extends OrcTest {
   test("appending") {
     val data = (0 until 10).map(i => (i, i.toString))
     spark.createDataFrame(data).toDF("c1", "c2").createOrReplaceTempView("tmp")
-    withOrcTable(data, "t") {
-      sql("INSERT INTO TABLE t SELECT * FROM tmp")
-      checkAnswer(spark.table("t"), (data ++ data).map(Row.fromTuple))
+
+    withOrcFile(data) { file =>
+      withTempView("t") {
+        spark.read.orc(file).createOrReplaceTempView("t")
+        checkAnswer(spark.table("t"), data.map(Row.fromTuple))
+        sql("INSERT INTO TABLE t SELECT * FROM tmp")
+        checkAnswer(spark.table("t"), (data ++ data).map(Row.fromTuple))
+      }
     }
+
     spark.sessionState.catalog.dropTable(
       TableIdentifier("tmp"),
       ignoreIfNotExists = true,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index a35c536038c7..411e632f95c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -25,8 +25,9 @@ import scala.reflect.runtime.universe.TypeTag
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.execution.datasources.FileBasedDataSourceTest
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
-import org.apache.spark.sql.test.SQLTestUtils
 
 /**
  * OrcTest
@@ -42,13 +43,16 @@ import org.apache.spark.sql.test.SQLTestUtils
  *   -> OrcFilterSuite
  *   -> HiveOrcFilterSuite
  */
-abstract class OrcTest extends QueryTest with SQLTestUtils with BeforeAndAfterAll {
-  import testImplicits._
+abstract class OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfterAll {
 
   val orcImp: String = "native"
 
   private var originalConfORCImplementation = "native"
 
+  override protected val dataSourceName: String = "orc"
+  override protected val vectorizedReaderEnabledKey: String =
+    SQLConf.ORC_VECTORIZED_READER_ENABLED.key
+
   protected override def beforeAll(): Unit = {
     super.beforeAll()
     originalConfORCImplementation = spark.conf.get(ORC_IMPLEMENTATION)
@@ -66,22 +70,15 @@ abstract class OrcTest extends QueryTest with SQLTestUtils with BeforeAndAfterAl
    */
   protected def withOrcFile[T <: Product: ClassTag: TypeTag]
       (data: Seq[T])
-      (f: String => Unit): Unit = {
-    withTempPath { file =>
-      sparkContext.parallelize(data).toDF().write.orc(file.getCanonicalPath)
-      f(file.getCanonicalPath)
-    }
-  }
+      (f: String => Unit): Unit = withDataSourceFile(data)(f)
 
   /**
    * Writes `data` to a Orc file and reads it back as a `DataFrame`,
    * which is then passed to `f`. The Orc file will be deleted after `f` returns.
    */
   protected def withOrcDataFrame[T <: Product: ClassTag: TypeTag]
-      (data: Seq[T])
-      (f: DataFrame => Unit): Unit = {
-    withOrcFile(data)(path => f(spark.read.orc(path)))
-  }
+      (data: Seq[T], testVectorized: Boolean = true)
+      (f: DataFrame => Unit): Unit = withDataSourceDataFrame(data, testVectorized)(f)
 
   /**
    * Writes `data` to a Orc file, reads it back as a `DataFrame` and registers it as a
@@ -89,23 +86,14 @@ abstract class OrcTest extends QueryTest with SQLTestUtils with BeforeAndAfterAl
    * Orc file will be dropped/deleted after `f` returns.
    */
   protected def withOrcTable[T <: Product: ClassTag: TypeTag]
-      (data: Seq[T], tableName: String)
-      (f: => Unit): Unit = {
-    withOrcDataFrame(data) { df =>
-      df.createOrReplaceTempView(tableName)
-      withTempView(tableName)(f)
-    }
-  }
+      (data: Seq[T], tableName: String, testVectorized: Boolean = true)
+      (f: => Unit): Unit = withDataSourceTable(data, tableName, testVectorized)(f)
 
   protected def makeOrcFile[T <: Product: ClassTag: TypeTag](
-      data: Seq[T], path: File): Unit = {
-    data.toDF().write.mode(SaveMode.Overwrite).orc(path.getCanonicalPath)
-  }
+      data: Seq[T], path: File): Unit = makeDataSourceFile(data, path)
 
   protected def makeOrcFile[T <: Product: ClassTag: TypeTag](
-      df: DataFrame, path: File): Unit = {
-    df.write.mode(SaveMode.Overwrite).orc(path.getCanonicalPath)
-  }
+      df: DataFrame, path: File): Unit = makeDataSourceFile(df, path)
 
   protected def checkPredicatePushDown(df: DataFrame, numRows: Int, predicate: String): Unit = {
     withTempPath { file =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
index f05f5722af51..828ba6aee026 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
@@ -30,9 +30,9 @@ import org.apache.parquet.hadoop.{Footer, ParquetFileReader, ParquetFileWriter}
 import org.apache.parquet.hadoop.metadata.{BlockMetaData, FileMetaData, ParquetMetadata}
 import org.apache.parquet.schema.MessageType
 
-import org.apache.spark.sql.{DataFrame, SaveMode}
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.datasources.FileBasedDataSourceTest
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -42,21 +42,17 @@ import org.apache.spark.sql.types.StructType
  * convenient to use tuples rather than special case classes when writing test cases/suites.
  * Especially, `Tuple1.apply` can be used to easily wrap a single type/value.
  */
-private[sql] trait ParquetTest extends SQLTestUtils {
+private[sql] trait ParquetTest extends FileBasedDataSourceTest {
+
+  override protected val dataSourceName: String = "parquet"
+  override protected val vectorizedReaderEnabledKey: String =
+    SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key
 
   /**
    * Reads the parquet file at `path`
    */
   protected def readParquetFile(path: String, testVectorized: Boolean = true)
-      (f: DataFrame => Unit) = {
-    (true :: false :: Nil).foreach { vectorized =>
-      if (!vectorized || testVectorized) {
-        withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
-          f(spark.read.parquet(path.toString))
-        }
-      }
-    }
-  }
+      (f: DataFrame => Unit) = readFile(path, testVectorized)(f)
 
   /**
    * Writes `data` to a Parquet file, which is then passed to `f` and will be deleted after `f`
@@ -64,12 +60,7 @@ private[sql] trait ParquetTest extends SQLTestUtils {
    */
   protected def withParquetFile[T <: Product: ClassTag: TypeTag]
       (data: Seq[T])
-      (f: String => Unit): Unit = {
-    withTempPath { file =>
-      spark.createDataFrame(data).write.parquet(file.getCanonicalPath)
-      f(file.getCanonicalPath)
-    }
-  }
+      (f: String => Unit): Unit = withDataSourceFile(data)(f)
 
   /**
    * Writes `data` to a Parquet file and reads it back as a [[DataFrame]],
@@ -77,9 +68,7 @@ private[sql] trait ParquetTest extends SQLTestUtils {
    */
   protected def withParquetDataFrame[T <: Product: ClassTag: TypeTag]
       (data: Seq[T], testVectorized: Boolean = true)
-      (f: DataFrame => Unit): Unit = {
-    withParquetFile(data)(path => readParquetFile(path.toString, testVectorized)(f))
-  }
+      (f: DataFrame => Unit): Unit = withDataSourceDataFrame(data, testVectorized)(f)
 
   /**
    * Writes `data` to a Parquet file, reads it back as a [[DataFrame]] and registers it as a
@@ -88,22 +77,13 @@ private[sql] trait ParquetTest extends SQLTestUtils {
    */
   protected def withParquetTable[T <: Product: ClassTag: TypeTag]
       (data: Seq[T], tableName: String, testVectorized: Boolean = true)
-      (f: => Unit): Unit = {
-    withParquetDataFrame(data, testVectorized) { df =>
-      df.createOrReplaceTempView(tableName)
-      withTempView(tableName)(f)
-    }
-  }
+      (f: => Unit): Unit = withDataSourceTable(data, tableName, testVectorized)(f)
 
   protected def makeParquetFile[T <: Product: ClassTag: TypeTag](
-      data: Seq[T], path: File): Unit = {
-    spark.createDataFrame(data).write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath)
-  }
+      data: Seq[T], path: File): Unit = makeDataSourceFile(data, path)
 
   protected def makeParquetFile[T <: Product: ClassTag: TypeTag](
-      df: DataFrame, path: File): Unit = {
-    df.write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath)
-  }
+      df: DataFrame, path: File): Unit = makeDataSourceFile(df, path)
 
   protected def makePartitionDir(
       basePath: File,

From 1beed0d7c253da4fde12bfeea96cc0fbcc1aae25 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 30 Jan 2019 00:17:33 +0800
Subject: [PATCH 0391/1072] [SPARK-26765][SQL] Avro: Validate input and output
 schema

## What changes were proposed in this pull request?

The API `supportDataType` in `FileFormat` helps to validate the output/input schema before exection starts. So that we can avoid some invalid data source IO, and users can see clean error messages.

This PR is to override the validation API in Avro data source.
Also, as per the spec of Avro(https://avro.apache.org/docs/1.8.2/spec.html), `NullType` is supported. This PR fixes the handling of `NullType`.

## How was this patch tested?

Unit test

Closes #23684 from gengliangwang/avroSupportDataType.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/avro/AvroFileFormat.scala       | 19 ++++++-
 .../spark/sql/avro/SchemaConverters.scala     |  5 +-
 .../org/apache/spark/sql/avro/AvroSuite.scala | 50 +++++++++++--------
 3 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
index e60fa88cbeba..7391665e42ce 100755
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
 import org.apache.spark.sql.sources.{DataSourceRegister, Filter}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types._
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 private[avro] class AvroFileFormat extends FileFormat
@@ -243,6 +243,23 @@ private[avro] class AvroFileFormat extends FileFormat
       }
     }
   }
+
+  override def supportDataType(dataType: DataType): Boolean = dataType match {
+    case _: AtomicType => true
+
+    case st: StructType => st.forall { f => supportDataType(f.dataType) }
+
+    case ArrayType(elementType, _) => supportDataType(elementType)
+
+    case MapType(keyType, valueType, _) =>
+      supportDataType(keyType) && supportDataType(valueType)
+
+    case udt: UserDefinedType[_] => supportDataType(udt.sqlType)
+
+    case _: NullType => true
+
+    case _ => false
+  }
 }
 
 private[avro] object AvroFileFormat {
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index 64127af73881..3947d327dfac 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -70,6 +70,8 @@ object SchemaConverters {
 
       case ENUM => SchemaType(StringType, nullable = false)
 
+      case NULL => SchemaType(NullType, nullable = true)
+
       case RECORD =>
         if (existingRecordNames.contains(avroSchema.getFullName)) {
           throw new IncompatibleSchemaException(s"""
@@ -151,6 +153,7 @@ object SchemaConverters {
       case FloatType => builder.floatType()
       case DoubleType => builder.doubleType()
       case StringType => builder.stringType()
+      case NullType => builder.nullType()
       case d: DecimalType =>
         val avroType = LogicalTypes.decimal(d.precision, d.scale)
         val fixedSize = minBytesForPrecision(d.precision)
@@ -181,7 +184,7 @@ object SchemaConverters {
       // This should never happen.
       case other => throw new IncompatibleSchemaException(s"Unexpected type $other.")
     }
-    if (nullable) {
+    if (nullable && catalystType != NullType) {
       Schema.createUnion(schema, nullSchema)
     } else {
       schema
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 207c54ce75f4..d80353764140 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -21,7 +21,7 @@ import java.io._
 import java.net.URL
 import java.nio.file.{Files, Paths}
 import java.sql.{Date, Timestamp}
-import java.util.{TimeZone, UUID}
+import java.util.{Locale, TimeZone, UUID}
 
 import scala.collection.JavaConverters._
 
@@ -35,6 +35,7 @@ import org.apache.commons.io.FileUtils
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql._
+import org.apache.spark.sql.TestingUDT.{IntervalData, NullData, NullUDT}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
@@ -135,27 +136,6 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
     }
   }
 
-  test("test NULL avro type") {
-    withTempPath { dir =>
-      val fields =
-        Seq(new Field("null", Schema.create(Type.NULL), "doc", null.asInstanceOf[AnyVal])).asJava
-      val schema = Schema.createRecord("name", "docs", "namespace", false)
-      schema.setFields(fields)
-      val datumWriter = new GenericDatumWriter[GenericRecord](schema)
-      val dataFileWriter = new DataFileWriter[GenericRecord](datumWriter)
-      dataFileWriter.create(schema, new File(s"$dir.avro"))
-      val avroRec = new GenericData.Record(schema)
-      avroRec.put("null", null)
-      dataFileWriter.append(avroRec)
-      dataFileWriter.flush()
-      dataFileWriter.close()
-
-      intercept[IncompatibleSchemaException] {
-        spark.read.format("avro").load(s"$dir.avro")
-      }
-    }
-  }
-
   test("union(int, long) is read as long") {
     withTempPath { dir =>
       val avroSchema: Schema = {
@@ -903,6 +883,32 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
     }
   }
 
+  test("error handling for unsupported Interval data types") {
+    withTempDir { dir =>
+      val tempDir = new File(dir, "files").getCanonicalPath
+      var msg = intercept[AnalysisException] {
+        sql("select interval 1 days").write.format("avro").mode("overwrite").save(tempDir)
+      }.getMessage
+      assert(msg.contains("Cannot save interval data type into external storage."))
+
+      msg = intercept[AnalysisException] {
+        spark.udf.register("testType", () => new IntervalData())
+        sql("select testType()").write.format("avro").mode("overwrite").save(tempDir)
+      }.getMessage
+      assert(msg.toLowerCase(Locale.ROOT)
+        .contains(s"data source does not support calendarinterval data type."))
+    }
+  }
+
+  test("support Null data types") {
+    withTempDir { dir =>
+      val tempDir = new File(dir, "files").getCanonicalPath
+      val df = sql("select null")
+      df.write.format("avro").mode("overwrite").save(tempDir)
+      checkAnswer(spark.read.format("avro").load(tempDir), df)
+    }
+  }
+
   test("throw exception if unable to write with user provided Avro schema") {
     val input: Seq[(DataType, Schema.Type)] = Seq(
       (NullType, NULL),

From fbc3c5e8a33c28e46d839a2d1db81d9a89b29327 Mon Sep 17 00:00:00 2001
From: "ryne.yang" <ryne.yang@acuityads.com>
Date: Tue, 29 Jan 2019 10:58:10 -0800
Subject: [PATCH 0392/1072] [SPARK-26718][SS] Fixed integer overflow in SS
 kafka rateLimit calculation

## What changes were proposed in this pull request?

Fix the integer overflow issue in rateLimit.

## How was this patch tested?

Pass the Jenkins with newly added UT for the possible case where integer could be overflowed.

Closes #23666 from linehrr/master.

Authored-by: ryne.yang <ryne.yang@acuityads.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/kafka010/KafkaMicroBatchStream.scala  | 10 +++++-
 .../spark/sql/kafka010/KafkaSource.scala      | 10 +++++-
 .../kafka010/KafkaMicroBatchSourceSuite.scala | 35 +++++++++++++++++++
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 3ae9bd3399f4..337a51ef7fd8 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -231,7 +231,15 @@ private[kafka010] class KafkaMicroBatchStream(
             val begin = from.get(tp).getOrElse(fromNew(tp))
             val prorate = limit * (size / total)
             // Don't completely starve small topicpartitions
-            val off = begin + (if (prorate < 1) Math.ceil(prorate) else Math.floor(prorate)).toLong
+            val prorateLong = (if (prorate < 1) Math.ceil(prorate) else Math.floor(prorate)).toLong
+            // need to be careful of integer overflow
+            // therefore added canary checks where to see if off variable could be overflowed
+            // refer to [https://issues.apache.org/jira/browse/SPARK-26718]
+            val off = if (prorateLong > Long.MaxValue - begin) {
+              Long.MaxValue
+            } else {
+              begin + prorateLong
+            }
             // Paranoia, make sure not to return an offset that's past end
             Math.min(end, off)
           }.getOrElse(end)
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index da5533409935..624c79679714 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -191,7 +191,15 @@ private[kafka010] class KafkaSource(
             val prorate = limit * (size / total)
             logDebug(s"rateLimit $tp prorated amount is $prorate")
             // Don't completely starve small topicpartitions
-            val off = begin + (if (prorate < 1) Math.ceil(prorate) else Math.floor(prorate)).toLong
+            val prorateLong = (if (prorate < 1) Math.ceil(prorate) else Math.floor(prorate)).toLong
+            // need to be careful of integer overflow
+            // therefore added canary checks where to see if off variable could be overflowed
+            // refer to [https://issues.apache.org/jira/browse/SPARK-26718]
+            val off = if (prorateLong > Long.MaxValue - begin) {
+              Long.MaxValue
+            } else {
+              begin + prorateLong
+            }
             logDebug(s"rateLimit $tp new offset is $off")
             // Paranoia, make sure not to return an offset that's past end
             Math.min(end, off)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index aa7baac2e9e2..8fd5790d753a 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -196,6 +196,41 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
       StopStream)
   }
 
+  test("SPARK-26718 Rate limit set to Long.Max should not overflow integer " +
+    "during end offset calculation") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 1)
+    // fill in 5 messages to trigger potential integer overflow
+    testUtils.sendMessages(topic, (0 until 5).map(_.toString).toArray, Some(0))
+
+    val partitionOffsets = Map(
+      new TopicPartition(topic, 0) -> 5L
+    )
+    val startingOffsets = JsonUtils.partitionOffsets(partitionOffsets)
+
+    val kafka = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      // use latest to force begin to be 5
+      .option("startingOffsets", startingOffsets)
+      // use Long.Max to try to trigger overflow
+      .option("maxOffsetsPerTrigger", Long.MaxValue)
+      .option("subscribe", topic)
+      .option("kafka.metadata.max.age.ms", "1")
+      .load()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+    val mapped: org.apache.spark.sql.Dataset[_] = kafka.map(kv => kv._2.toInt)
+
+    testStream(mapped)(
+      makeSureGetOffsetCalled,
+      AddKafkaData(Set(topic), 30, 31, 32, 33, 34),
+      CheckAnswer(30, 31, 32, 33, 34),
+      StopStream
+    )
+  }
+
   test("maxOffsetsPerTrigger") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 3)

From c08021cd8734b3cc183e7f65312d14cdaa8541b7 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 30 Jan 2019 12:24:27 +0800
Subject: [PATCH 0393/1072] [SPARK-26776][PYTHON] Reduce Py4J communication
 cost in PySpark's execution barrier check

## What changes were proposed in this pull request?

I am investigating flaky tests. I realised that:

```
      File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/rdd.py", line 2512, in __init__
        self.is_barrier = prev._is_barrier() or isFromBarrier
      File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/rdd.py", line 2412, in _is_barrier
        return self._jrdd.rdd().isBarrier()
      File "/home/jenkins/workspace/SparkPullRequestBuilder/python/lib/py4j-0.10.8.1-src.zip/py4j/java_gateway.py", line 1286, in __call__
        answer, self.gateway_client, self.target_id, self.name)
      File "/home/jenkins/workspace/SparkPullRequestBuilder/python/lib/py4j-0.10.8.1-src.zip/py4j/protocol.py", line 342, in get_return_value
        return OUTPUT_CONVERTER[type](answer[2:], gateway_client)
      File "/home/jenkins/workspace/SparkPullRequestBuilder/python/lib/py4j-0.10.8.1-src.zip/py4j/java_gateway.py", line 2492, in <lambda>
        lambda target_id, gateway_client: JavaObject(target_id, gateway_client))
      File "/home/jenkins/workspace/SparkPullRequestBuilder/python/lib/py4j-0.10.8.1-src.zip/py4j/java_gateway.py", line 1324, in __init__
        ThreadSafeFinalizer.add_finalizer(key, value)
      File "/home/jenkins/workspace/SparkPullRequestBuilder/python/lib/py4j-0.10.8.1-src.zip/py4j/finalizer.py", line 43, in add_finalizer
        cls.finalizers[id] = weak_ref
      File "/usr/lib64/pypy-2.5.1/lib-python/2.7/threading.py", line 216, in __exit__
        self.release()
      File "/usr/lib64/pypy-2.5.1/lib-python/2.7/threading.py", line 208, in release
        self.__block.release()
    error: release unlocked lock
```

I assume it might not be directly related with the test itself but I noticed that it `prev._is_barrier()` attempts to access via Py4J.

Accessing via Py4J is expensive. Therefore, this PR proposes to avoid Py4J access when `isFromBarrier` is `True`.

## How was this patch tested?

Unittests should cover this.

Closes #23690 from HyukjinKwon/minor-barrier.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/rdd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 73969307a5e8..751df4400049 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2509,7 +2509,7 @@ def pipeline_func(split, iterator):
         self._jrdd_deserializer = self.ctx.serializer
         self._bypass_serializer = False
         self.partitioner = prev.partitioner if self.preservesPartitioning else None
-        self.is_barrier = prev._is_barrier() or isFromBarrier
+        self.is_barrier = isFromBarrier or prev._is_barrier()
 
     def getNumPartitions(self):
         return self._prev_jrdd.partitions().size()

From 7781c6fd7334979b6b4222d2271765219593f08e Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Wed, 30 Jan 2019 15:15:29 +0800
Subject: [PATCH 0394/1072] [SPARK-26378][SQL] Restore performance of queries
 against wide CSV/JSON tables

## What changes were proposed in this pull request?

After [recent changes](https://github.com/apache/spark/commit/11e5f1bcd49eec8ab4225d6e68a051b5c6a21cb2) to CSV parsing to return partial results for bad CSV records, queries of wide CSV tables slowed considerably. That recent change resulted in every row being recreated, even when the associated input record had no parsing issues and the user specified no corrupt record field in his/her schema.

The change to FailureSafeParser.scala also impacted queries against wide JSON tables as well.

In this PR, I propose that a row should be recreated only if columns need to be shifted due to the existence of a corrupt column field in the user-supplied schema. Otherwise, the code should use the row as-is (For CSV input, it will have values for the columns that could be converted, and also null values for columns that could not be converted).

See benchmarks below. The CSV benchmark for 1000 columns went from 120144 ms to 89069 ms, a savings of 25% (this only brings the cost down to baseline levels. Again, see benchmarks below).

Similarly, the JSON benchmark for 1000 columns (added in this PR) went from 109621 ms to 80871 ms, also a savings of 25%.

Still, partial results functionality is preserved:

<pre>
bash-3.2$ cat test2.csv
"hello",1999-08-01,"last"
"there","bad date","field"
"again","2017-11-22","in file"
bash-3.2$ bin/spark-shell
...etc...
scala> val df = spark.read.schema("a string, b date, c string").csv("test2.csv")
df: org.apache.spark.sql.DataFrame = [a: string, b: date ... 1 more field]
scala> df.show
+-----+----------+-------+
|    a|         b|      c|
+-----+----------+-------+
|hello|1999-08-01|   last|
|there|      null|  field|
|again|2017-11-22|in file|
+-----+----------+-------+
scala> val df = spark.read.schema("badRecord string, a string, b date, c string").
     | option("columnNameOfCorruptRecord", "badRecord").
     | csv("test2.csv")
df: org.apache.spark.sql.DataFrame = [badRecord: string, a: string ... 2 more fields]
scala> df.show
+--------------------+-----+----------+-------+
|           badRecord|    a|         b|      c|
+--------------------+-----+----------+-------+
|                null|hello|1999-08-01|   last|
|"there","bad date...|there|      null|  field|
|                null|again|2017-11-22|in file|
+--------------------+-----+----------+-------+
scala>
</pre>

### CSVBenchmark Benchmarks:

baseline = commit before partial results change
PR = this PR
master = master branch

[baseline_CSVBenchmark-results.txt](https://github.com/apache/spark/files/2697109/baseline_CSVBenchmark-results.txt)
[pr_CSVBenchmark-results.txt](https://github.com/apache/spark/files/2697110/pr_CSVBenchmark-results.txt)
[master_CSVBenchmark-results.txt](https://github.com/apache/spark/files/2697111/master_CSVBenchmark-results.txt)

### JSONBenchmark Benchmarks:

baseline = commit before partial results change
PR = this PR
master = master branch

[baseline_JSONBenchmark-results.txt](https://github.com/apache/spark/files/2711040/baseline_JSONBenchmark-results.txt)
[pr_JSONBenchmark-results.txt](https://github.com/apache/spark/files/2711041/pr_JSONBenchmark-results.txt)
[master_JSONBenchmark-results.txt](https://github.com/apache/spark/files/2711042/master_JSONBenchmark-results.txt)

## How was this patch tested?

- All SQL unit tests.
- Added 2 CSV benchmarks
- Python core and SQL tests

Closes #23336 from bersprockets/csv-wide-row-opt2.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/util/FailureSafeParser.scala | 21 +++++---
 sql/core/benchmarks/CSVBenchmark-results.txt  | 31 ++++++-----
 sql/core/benchmarks/JSONBenchmark-results.txt | 54 +++++++++++--------
 .../datasources/csv/CSVBenchmark.scala        | 16 ++++++
 .../datasources/json/JsonBenchmark.scala      | 48 ++++++++++++++++-
 5 files changed, 124 insertions(+), 46 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
index 4baf052bfe56..76745b11c84c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
@@ -33,21 +33,26 @@ class FailureSafeParser[IN](
   private val corruptFieldIndex = schema.getFieldIndex(columnNameOfCorruptRecord)
   private val actualSchema = StructType(schema.filterNot(_.name == columnNameOfCorruptRecord))
   private val resultRow = new GenericInternalRow(schema.length)
+  private val nullResult = new GenericInternalRow(schema.length)
 
   // This function takes 2 parameters: an optional partial result, and the bad record. If the given
   // schema doesn't contain a field for corrupted record, we just return the partial result or a
   // row with all fields null. If the given schema contains a field for corrupted record, we will
   // set the bad record to this field, and set other fields according to the partial result or null.
   private val toResultRow: (Option[InternalRow], () => UTF8String) => InternalRow = {
-    (row, badRecord) => {
-      var i = 0
-      while (i < actualSchema.length) {
-        val from = actualSchema(i)
-        resultRow(schema.fieldIndex(from.name)) = row.map(_.get(i, from.dataType)).orNull
-        i += 1
+    if (corruptFieldIndex.isDefined) {
+      (row, badRecord) => {
+        var i = 0
+        while (i < actualSchema.length) {
+          val from = actualSchema(i)
+          resultRow(schema.fieldIndex(from.name)) = row.map(_.get(i, from.dataType)).orNull
+          i += 1
+        }
+        resultRow(corruptFieldIndex.get) = badRecord()
+        resultRow
       }
-      corruptFieldIndex.foreach(index => resultRow(index) = badRecord())
-      resultRow
+    } else {
+      (row, _) => row.getOrElse(nullResult)
     }
   }
 
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt
index 865575bec83d..4fef15bb00f7 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,26 +2,29 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
 Parsing quoted values:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-One quoted string                           64733 / 64839          0.0     1294653.1       1.0X
+One quoted string                           49754 / 50158          0.0      995072.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
 Wide rows with 1000 columns:             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-Select 1000 columns                       185609 / 189735          0.0      185608.6       1.0X
-Select 100 columns                          50195 / 51808          0.0       50194.8       3.7X
-Select one column                           39266 / 39293          0.0       39265.6       4.7X
-count()                                     10959 / 11000          0.1       10958.5      16.9X
+Select 1000 columns                       149402 / 151785          0.0      149401.9       1.0X
+Select 100 columns                          42986 / 43985          0.0       42986.1       3.5X
+Select one column                           33764 / 34057          0.0       33763.6       4.4X
+count()                                       9332 / 9508          0.1        9332.2      16.0X
+Select 100 columns, one bad input field     50963 / 51512          0.0       50962.5       2.9X
+Select 100 columns, corrupt record field    69627 / 71029          0.0       69627.5       2.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
 Count a dataset with 10 columns:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-Select 10 columns + count()                 24637 / 24768          0.4        2463.7       1.0X
-Select 1 column + count()                   20026 / 20076          0.5        2002.6       1.2X
-count()                                       3754 / 3877          2.7         375.4       6.6X
+Select 10 columns + count()                 22588 / 22623          0.4        2258.8       1.0X
+Select 1 column + count()                   14649 / 14690          0.7        1464.9       1.5X
+count()                                       3385 / 3453          3.0         338.5       6.7X
+
 
diff --git a/sql/core/benchmarks/JSONBenchmark-results.txt b/sql/core/benchmarks/JSONBenchmark-results.txt
index 477429430cdd..947f57da0844 100644
--- a/sql/core/benchmarks/JSONBenchmark-results.txt
+++ b/sql/core/benchmarks/JSONBenchmark-results.txt
@@ -3,46 +3,54 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
 JSON schema inferring:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-No encoding                                 71832 / 72149          1.4         718.3       1.0X
-UTF-8 is set                              101700 / 101819          1.0        1017.0       0.7X
+No encoding                                 80821 / 82526          1.2         808.2       1.0X
+UTF-8 is set                              129478 / 130381          0.8        1294.8       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
 count a short column:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-No encoding                                 16501 / 16519          6.1         165.0       1.0X
-UTF-8 is set                                16477 / 16516          6.1         164.8       1.0X
+No encoding                                 16804 / 16948          6.0         168.0       1.0X
+UTF-8 is set                                16648 / 16757          6.0         166.5       1.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
 count a wide column:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-No encoding                                 39871 / 40242          0.3        3987.1       1.0X
-UTF-8 is set                                39581 / 39721          0.3        3958.1       1.0X
+No encoding                                 30949 / 31058          0.3        3094.9       1.0X
+UTF-8 is set                                30629 / 33896          0.3        3062.9       1.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
+select wide row:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+No encoding                               123050 / 124199          0.0      246099.8       1.0X
+UTF-8 is set                              139306 / 142569          0.0      278612.7       0.9X
+
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
 Select a subset of 10 columns:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-Select 10 columns + count()                 16011 / 16033          0.6        1601.1       1.0X
-Select 1 column + count()                   14350 / 14392          0.7        1435.0       1.1X
-count()                                       3007 / 3034          3.3         300.7       5.3X
+Select 10 columns + count()                 19539 / 19896          0.5        1953.9       1.0X
+Select 1 column + count()                   16412 / 16445          0.6        1641.2       1.2X
+count()                                       2783 / 2801          3.6         278.3       7.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
+Intel(R) Xeon(R) CPU @ 2.50GHz
 creation of JSON parser per line:        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-Short column without encoding                 8334 / 8453          1.2         833.4       1.0X
-Short column with UTF-8                     13627 / 13784          0.7        1362.7       0.6X
-Wide column without encoding              155073 / 155351          0.1       15507.3       0.1X
-Wide column with UTF-8                    212114 / 212263          0.0       21211.4       0.0X
+Short column without encoding                 9576 / 9612          1.0         957.6       1.0X
+Short column with UTF-8                     13555 / 13698          0.7        1355.5       0.7X
+Wide column without encoding              174761 / 175665          0.1       17476.1       0.1X
+Wide column with UTF-8                    203219 / 205151          0.0       20321.9       0.0X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
index ce38b08b6fdf..6e6fc4715f31 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
@@ -92,6 +92,22 @@ object CSVBenchmark extends SqlBasedBenchmark {
         ds.count()
       }
 
+      val schemaErr1 = StructType(StructField("col0", DateType) +:
+        (1 until colsNum).map(i => StructField(s"col$i", IntegerType)))
+      val dsErr1 = spark.read.schema(schemaErr1).csv(path.getAbsolutePath)
+      benchmark.addCase(s"Select 100 columns, one bad input field", 3) { _ =>
+        dsErr1.select(cols100: _*).filter((row: Row) => true).count()
+      }
+
+      val badRecColName = "badRecord"
+      val schemaErr2 = schemaErr1.add(StructField(badRecColName, StringType))
+      val dsErr2 = spark.read.schema(schemaErr2)
+        .option("columnNameOfCorruptRecord", badRecColName)
+        .csv(path.getAbsolutePath)
+      benchmark.addCase(s"Select 100 columns, corrupt record field", 3) { _ =>
+        dsErr2.select((Column(badRecColName) +: cols100): _*).filter((row: Row) => true).count()
+      }
+
       benchmark.run()
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index f50c25ecfc1f..27f702348c0b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -59,12 +59,15 @@ object JSONBenchmark extends SqlBasedBenchmark {
         .json(path.getAbsolutePath)
 
       benchmark.addCase("No encoding", numIters) { _ =>
-        spark.read.json(path.getAbsolutePath)
+        spark.read
+          .option("inferTimestamp", false)
+          .json(path.getAbsolutePath)
       }
 
       benchmark.addCase("UTF-8 is set", numIters) { _ =>
         spark.read
           .option("encoding", "UTF-8")
+          .option("inferTimestamp", false)
           .json(path.getAbsolutePath)
       }
 
@@ -121,6 +124,18 @@ object JSONBenchmark extends SqlBasedBenchmark {
       .add("z", StringType)
   }
 
+  def writeWideRow(path: String, rowsNum: Int): StructType = {
+    val colsNum = 1000
+    val fields = Seq.tabulate(colsNum)(i => StructField(s"col$i", IntegerType))
+    val schema = StructType(fields)
+
+    spark.range(rowsNum)
+      .select(Seq.tabulate(colsNum)(i => lit(i).as(s"col$i")): _*)
+      .write.json(path)
+
+    schema
+  }
+
   def countWideColumn(rowsNum: Int, numIters: Int): Unit = {
     val benchmark = new Benchmark("count a wide column", rowsNum, output = output)
 
@@ -147,6 +162,36 @@ object JSONBenchmark extends SqlBasedBenchmark {
     }
   }
 
+  def countWideRow(rowsNum: Int, numIters: Int): Unit = {
+    val benchmark = new Benchmark("select wide row", rowsNum, output = output)
+
+    withTempPath { path =>
+      prepareDataInfo(benchmark)
+      val schema = writeWideRow(path.getAbsolutePath, rowsNum)
+
+      benchmark.addCase("No encoding", numIters) { _ =>
+        spark.read
+          .schema(schema)
+          .json(path.getAbsolutePath)
+          .select("*")
+          .filter((row: Row) => true)
+          .count()
+      }
+
+      benchmark.addCase("UTF-8 is set", numIters) { _ =>
+        spark.read
+          .option("encoding", "UTF-8")
+          .schema(schema)
+          .json(path.getAbsolutePath)
+          .select("*")
+          .filter((row: Row) => true)
+          .count()
+      }
+
+      benchmark.run()
+    }
+  }
+
   def selectSubsetOfColumns(rowsNum: Int, numIters: Int): Unit = {
     val colsNum = 10
     val benchmark =
@@ -236,6 +281,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
       schemaInferring(100 * 1000 * 1000, numIters)
       countShortColumn(100 * 1000 * 1000, numIters)
       countWideColumn(10 * 1000 * 1000, numIters)
+      countWideRow(500 * 1000, numIters)
       selectSubsetOfColumns(10 * 1000 * 1000, numIters)
       jsonParserCreation(10 * 1000 * 1000, numIters)
     }

From 6a2f3dcc2bd601fd1fe7610854bc0f5bf90300f4 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 31 Jan 2019 00:10:23 +0900
Subject: [PATCH 0395/1072] [SPARK-26732][CORE][TEST] Wait for listener bus to
 process events in SparkContextInfoSuite.

Otherwise the RDD data may be out of date by the time the test tries to check it.

Tested with an artificial delay inserted in AppStatusListener.

Closes #23654 from vanzin/SPARK-26732.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
index 8feb3dee050d..051a13c9413e 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
@@ -60,6 +60,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
     assert(sc.getRDDStorageInfo.size === 0)
     rdd.collect()
+    sc.listenerBus.waitUntilEmpty(10000)
     assert(sc.getRDDStorageInfo.size === 1)
     assert(sc.getRDDStorageInfo.head.isCached)
     assert(sc.getRDDStorageInfo.head.memSize > 0)

From a8da41061fb38529e90c41571b38294ab5ddc441 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 30 Jan 2019 09:52:50 -0800
Subject: [PATCH 0396/1072] [SPARK-25689][CORE] Follow up: don't get delegation
 tokens when kerberos not available.

This avoids trying to get delegation tokens when a TGT is not available, e.g.
when running in yarn-cluster mode without a keytab. That would result in an
error since that is not allowed.

Tested with some (internal) integration tests that started failing with the
patch for SPARK-25689.

Closes #23689 from vanzin/SPARK-25689.followup.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scheduler/cluster/CoarseGrainedSchedulerBackend.scala  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 6bc0bdd97f7d..0111b83e347a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -401,12 +401,15 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     if (UserGroupInformation.isSecurityEnabled()) {
       delegationTokenManager = createTokenManager()
       delegationTokenManager.foreach { dtm =>
+        val ugi = UserGroupInformation.getCurrentUser()
         val tokens = if (dtm.renewalEnabled) {
           dtm.start()
-        } else {
-          val creds = UserGroupInformation.getCurrentUser().getCredentials()
+        } else if (ugi.hasKerberosCredentials()) {
+          val creds = ugi.getCredentials()
           dtm.obtainDelegationTokens(creds)
           SparkHadoopUtil.get.serialize(creds)
+        } else {
+          null
         }
         if (tokens != null) {
           updateDelegationTokens(tokens)

From 25b97a41ceede7b99a5f835077de94b38743203a Mon Sep 17 00:00:00 2001
From: ankurgupta <ankur.gupta@cloudera.com>
Date: Wed, 30 Jan 2019 10:54:09 -0800
Subject: [PATCH 0397/1072] [SPARK-26753][CORE] Fixed custom log levels for
 spark-shell by using Filter instead of Threshold

This fix replaces the Threshold with a Filter for ConsoleAppender which checks
to ensure that either the logLevel is greater than thresholdLevel (shell log
level) or the log originated from a custom defined logger. In these cases, it
lets a log event go through, otherwise it doesn't.

1. Ensured that custom log level works when set by default (via log4j.properties)
2. Ensured that logs are not printed twice when log level is changed by setLogLevel
3. Ensured that custom logs are printed when log level is changed back by setLogLevel

Closes #23675 from ankuriitg/ankurgupta/SPARK-26753.

Authored-by: ankurgupta <ankur.gupta@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/internal/Logging.scala   | 49 ++++++++++-----
 .../scala/org/apache/spark/util/Utils.scala   |  6 +-
 .../apache/spark/internal/LoggingSuite.scala  | 59 +++++++++++++++++++
 3 files changed, 94 insertions(+), 20 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/internal/LoggingSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala
index 00db9af846ab..0987917bac0e 100644
--- a/core/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark.internal
 
-import java.util.concurrent.ConcurrentHashMap
-
 import scala.collection.JavaConverters._
 
 import org.apache.log4j._
+import org.apache.log4j.spi.{Filter, LoggingEvent}
 import org.slf4j.{Logger, LoggerFactory}
 import org.slf4j.impl.StaticLoggerBinder
 
@@ -154,16 +153,10 @@ trait Logging {
             System.err.println("To adjust logging level use sc.setLogLevel(newLevel). " +
               "For SparkR, use setLogLevel(newLevel).")
           }
+          Logging.sparkShellThresholdLevel = replLevel
           rootLogger.getAllAppenders().asScala.foreach {
             case ca: ConsoleAppender =>
-              Option(ca.getThreshold()) match {
-                case Some(t) =>
-                  Logging.consoleAppenderToThreshold.put(ca, t)
-                  if (!t.isGreaterOrEqual(replLevel)) {
-                    ca.setThreshold(replLevel)
-                  }
-                case None => ca.setThreshold(replLevel)
-              }
+              ca.addFilter(new SparkShellLoggingFilter())
             case _ => // no-op
           }
         }
@@ -182,7 +175,7 @@ private[spark] object Logging {
   @volatile private var initialized = false
   @volatile private var defaultRootLevel: Level = null
   @volatile private var defaultSparkLog4jConfig = false
-  private val consoleAppenderToThreshold = new ConcurrentHashMap[ConsoleAppender, Priority]()
+  @volatile private[spark] var sparkShellThresholdLevel: Level = null
 
   val initLock = new Object()
   try {
@@ -211,11 +204,7 @@ private[spark] object Logging {
       } else {
         val rootLogger = LogManager.getRootLogger()
         rootLogger.setLevel(defaultRootLevel)
-        rootLogger.getAllAppenders().asScala.foreach {
-          case ca: ConsoleAppender =>
-            ca.setThreshold(consoleAppenderToThreshold.get(ca))
-          case _ => // no-op
-        }
+        sparkShellThresholdLevel = null
       }
     }
     this.initialized = false
@@ -229,3 +218,31 @@ private[spark] object Logging {
     "org.slf4j.impl.Log4jLoggerFactory".equals(binderClass)
   }
 }
+
+private class SparkShellLoggingFilter extends Filter {
+
+  /**
+   * If sparkShellThresholdLevel is not defined, this filter is a no-op.
+   * If log level of event is not equal to root level, the event is allowed. Otherwise,
+   * the decision is made based on whether the log came from root or some custom configuration
+   * @param loggingEvent
+   * @return decision for accept/deny log event
+   */
+  def decide(loggingEvent: LoggingEvent): Int = {
+    if (Logging.sparkShellThresholdLevel == null) {
+      return Filter.NEUTRAL
+    }
+    val rootLevel = LogManager.getRootLogger().getLevel()
+    if (!loggingEvent.getLevel().eq(rootLevel)) {
+      return Filter.NEUTRAL
+    }
+    var logger = loggingEvent.getLogger()
+    while (logger.getParent() != null) {
+      if (logger.getLevel() != null) {
+        return Filter.NEUTRAL
+      }
+      logger = logger.getParent()
+    }
+    return Filter.DENY
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 0ad1ffc28b09..37b21e2eddba 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2284,10 +2284,8 @@ private[spark] object Utils extends Logging {
   def setLogLevel(l: org.apache.log4j.Level) {
     val rootLogger = org.apache.log4j.Logger.getRootLogger()
     rootLogger.setLevel(l)
-    rootLogger.getAllAppenders().asScala.foreach {
-      case ca: org.apache.log4j.ConsoleAppender => ca.setThreshold(l)
-      case _ => // no-op
-    }
+    // Setting threshold to null as rootLevel will define log level for spark-shell
+    Logging.sparkShellThresholdLevel = null
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/internal/LoggingSuite.scala b/core/src/test/scala/org/apache/spark/internal/LoggingSuite.scala
new file mode 100644
index 000000000000..250ac3dafcab
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/internal/LoggingSuite.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal
+
+import org.apache.log4j.{Level, Logger}
+import org.apache.log4j.spi.{Filter, LoggingEvent}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.util.Utils
+
+class LoggingSuite extends SparkFunSuite {
+
+  test("spark-shell logging filter") {
+    val ssf = new SparkShellLoggingFilter()
+    val rootLogger = Logger.getRootLogger()
+    val originalLevel = rootLogger.getLevel()
+    rootLogger.setLevel(Level.INFO)
+    val originalThreshold = Logging.sparkShellThresholdLevel
+    Logging.sparkShellThresholdLevel = Level.WARN
+    try {
+      val logger = Logger.getLogger("a.b.c.D")
+      val logEvent = new LoggingEvent(logger.getName(), logger, Level.INFO, "Test", null)
+      assert(ssf.decide(logEvent) === Filter.DENY)
+
+      // log level is less than threshold level but different from root level
+      val logEvent1 = new LoggingEvent(logger.getName(), logger, Level.DEBUG, "Test", null)
+      assert(ssf.decide(logEvent1) != Filter.DENY)
+
+      // custom log level configured
+      val parentLogger = Logger.getLogger("a.b.c")
+      parentLogger.setLevel(Level.INFO)
+      assert(ssf.decide(logEvent) != Filter.DENY)
+
+      // log level is greater than or equal to threshold level
+      val logger2 = Logger.getLogger("a.b.E")
+      val logEvent2 = new LoggingEvent(logger2.getName(), logger2, Level.INFO, "Test", null)
+      Utils.setLogLevel(Level.INFO)
+      assert(ssf.decide(logEvent2) != Filter.DENY)
+    } finally {
+      rootLogger.setLevel(originalLevel)
+      Logging.sparkShellThresholdLevel = originalThreshold
+    }
+  }
+}

From ae5b2a6a92be4986ef5b8062d7fb59318cff6430 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Wed, 30 Jan 2019 11:52:30 -0800
Subject: [PATCH 0398/1072] [SPARK-26311][CORE] New feature: apply custom log
 URL pattern for executor log URLs in SHS

## What changes were proposed in this pull request?

This patch proposes adding a new configuration on SHS: custom executor log URL pattern. This will enable end users to replace executor logs to other than RM provide, like external log service, which enables to serve executor logs when NodeManager becomes unavailable in case of YARN.

End users can build their own of custom executor log URLs with pre-defined patterns which would be vary on each resource manager. This patch adds some patterns to YARN resource manager. (For others, there's even no executor log url available so cannot define patterns as well.)

Please refer the doc change as well as added UTs in this patch to see how to set up the feature.

## How was this patch tested?

Added UT, as well as manual test with YARN cluster

Closes #23260 from HeartSaVioR/SPARK-26311.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/SparkContext.scala |   3 +-
 .../deploy/history/FsHistoryProvider.scala    |   7 +-
 .../history/HistoryAppStatusStore.scala       | 133 +++++++++++
 .../CoarseGrainedExecutorBackend.scala        |   9 +-
 .../spark/internal/config/History.scala       |  18 ++
 .../spark/scheduler/SchedulerBackend.scala    |   7 +
 .../spark/scheduler/SparkListener.scala       |   3 +-
 .../cluster/CoarseGrainedClusterMessage.scala |   3 +-
 .../CoarseGrainedSchedulerBackend.scala       |   4 +-
 .../scheduler/cluster/ExecutorData.scala      |   5 +-
 .../scheduler/cluster/ExecutorInfo.scala      |  12 +-
 .../local/LocalSchedulerBackend.scala         |   3 +-
 .../spark/status/AppStatusListener.scala      |   2 +
 .../org/apache/spark/status/LiveEntity.scala  |   4 +-
 .../org/apache/spark/status/api/v1/api.scala  |   3 +-
 .../org/apache/spark/util/JsonProtocol.scala  |  16 +-
 .../executor_list_json_expectation.json       |   3 +-
 ...ith_executor_metrics_json_expectation.json |  24 +-
 ...process_tree_metrics_json_expectation.json |   6 +-
 .../executor_memory_usage_expectation.json    |  15 +-
 ...xecutor_node_blacklisting_expectation.json |  15 +-
 ...acklisting_unblacklisting_expectation.json |  15 +-
 .../ExecutorAllocationManagerSuite.scala      |  16 +-
 .../apache/spark/HeartbeatReceiverSuite.scala |   4 +-
 .../StandaloneDynamicAllocationSuite.scala    |   4 +-
 .../history/FsHistoryProviderSuite.scala      | 209 +++++++++++++++++-
 .../scheduler/EventLoggingListenerSuite.scala |   3 +-
 .../spark/status/AppStatusListenerSuite.scala |   7 +-
 .../apache/spark/util/JsonProtocolSuite.scala |  10 +-
 docs/monitoring.md                            |  24 ++
 docs/running-on-yarn.md                       |  32 ++-
 project/MimaExcludes.scala                    |   6 +
 .../MesosFineGrainedSchedulerBackend.scala    |   2 +-
 ...osCoarseGrainedSchedulerBackendSuite.scala |   3 +-
 ...esosFineGrainedSchedulerBackendSuite.scala |  15 +-
 .../spark/deploy/yarn/ExecutorRunnable.scala  |  31 ++-
 .../cluster/YarnClusterSchedulerBackend.scala |  36 +--
 .../spark/util/YarnContainerInfoHelper.scala  | 103 +++++++++
 .../spark/deploy/yarn/YarnClusterSuite.scala  |  28 ++-
 39 files changed, 717 insertions(+), 126 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
 create mode 100644 resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index f2b79b84b63c..6273601d9986 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2355,7 +2355,8 @@ class SparkContext(config: SparkConf) extends Logging {
     // Note: this code assumes that the task scheduler has been initialized and has contacted
     // the cluster manager to get an application ID (in case the cluster manager provides one).
     listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId),
-      startTime, sparkUser, applicationAttemptId, schedulerBackend.getDriverLogUrls))
+      startTime, sparkUser, applicationAttemptId, schedulerBackend.getDriverLogUrls,
+      schedulerBackend.getDriverAttributes))
     _driverLogger.foreach(_.startSync(_hadoopConfiguration))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 33e89c393680..56aea0c53d49 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -359,10 +359,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         return None
     }
 
-    val ui = SparkUI.create(None, new AppStatusStore(kvstore), conf, secManager, app.info.name,
-      HistoryServer.getAttemptURI(appId, attempt.info.attemptId),
-      attempt.info.startTime.getTime(),
-      attempt.info.appSparkVersion)
+    val ui = SparkUI.create(None, new HistoryAppStatusStore(conf, kvstore), conf, secManager,
+      app.info.name, HistoryServer.getAttemptURI(appId, attempt.info.attemptId),
+      attempt.info.startTime.getTime(), attempt.info.appSparkVersion)
     loadPlugins().foreach(_.setupUI(ui))
 
     val loadedUI = LoadedAppUI(ui)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
new file mode 100644
index 000000000000..751382c9a2e5
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.util.matching.Regex
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.History._
+import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.api.v1
+import org.apache.spark.util.kvstore.KVStore
+
+private[spark] class HistoryAppStatusStore(
+    conf: SparkConf,
+    store: KVStore)
+  extends AppStatusStore(store, None) with Logging {
+
+  import HistoryAppStatusStore._
+
+  private val logUrlPattern: Option[String] = {
+    val appInfo = super.applicationInfo()
+    val applicationCompleted = appInfo.attempts.nonEmpty && appInfo.attempts.head.completed
+    if (applicationCompleted || conf.get(APPLY_CUSTOM_EXECUTOR_LOG_URL_TO_INCOMPLETE_APP)) {
+      conf.get(CUSTOM_EXECUTOR_LOG_URL)
+    } else {
+      None
+    }
+  }
+
+  private val informedForMissingAttributes = new AtomicBoolean(false)
+
+  override def executorList(activeOnly: Boolean): Seq[v1.ExecutorSummary] = {
+    val execList = super.executorList(activeOnly)
+    logUrlPattern match {
+      case Some(pattern) => execList.map(replaceLogUrls(_, pattern))
+      case None => execList
+    }
+  }
+
+  override def executorSummary(executorId: String): v1.ExecutorSummary = {
+    val execSummary = super.executorSummary(executorId)
+    logUrlPattern match {
+      case Some(pattern) => replaceLogUrls(execSummary, pattern)
+      case None => execSummary
+    }
+  }
+
+  private def replaceLogUrls(exec: v1.ExecutorSummary, urlPattern: String): v1.ExecutorSummary = {
+    val attributes = exec.attributes
+
+    // Relation between pattern {{FILE_NAME}} and attribute {{LOG_FILES}}
+    // Given that HistoryAppStatusStore don't know which types of log files can be provided
+    // from resource manager, we require resource manager to provide available types of log
+    // files, which are encouraged to be same as types of log files provided in original log URLs.
+    // Once we get the list of log files, we need to expose them to end users as a pattern
+    // so that end users can compose custom log URL(s) including log file name(s).
+    val allPatterns = CUSTOM_URL_PATTERN_REGEX.findAllMatchIn(urlPattern).map(_.group(1)).toSet
+    val allPatternsExceptFileName = allPatterns.filter(_ != "FILE_NAME")
+    val allAttributeKeys = attributes.keySet
+    val allAttributeKeysExceptLogFiles = allAttributeKeys.filter(_ != "LOG_FILES")
+
+    if (allPatternsExceptFileName.diff(allAttributeKeysExceptLogFiles).nonEmpty) {
+      logFailToRenewLogUrls("some of required attributes are missing in app's event log.",
+        allPatternsExceptFileName, allAttributeKeys)
+      return exec
+    } else if (allPatterns.contains("FILE_NAME") && !allAttributeKeys.contains("LOG_FILES")) {
+      logFailToRenewLogUrls("'FILE_NAME' parameter is provided, but file information is " +
+        "missing in app's event log.", allPatternsExceptFileName, allAttributeKeys)
+      return exec
+    }
+
+    val updatedUrl = allPatternsExceptFileName.foldLeft(urlPattern) { case (orig, patt) =>
+      // we already checked the existence of attribute when comparing keys
+      orig.replace(s"{{$patt}}", attributes(patt))
+    }
+
+    val newLogUrlMap = if (allPatterns.contains("FILE_NAME")) {
+      // allAttributeKeys should contain "LOG_FILES"
+      attributes("LOG_FILES").split(",").map { file =>
+        file -> updatedUrl.replace("{{FILE_NAME}}", file)
+      }.toMap
+    } else {
+      Map("log" -> updatedUrl)
+    }
+
+    replaceExecutorLogs(exec, newLogUrlMap)
+  }
+
+  private def logFailToRenewLogUrls(
+      reason: String,
+      allPatterns: Set[String],
+      allAttributes: Set[String]): Unit = {
+    if (informedForMissingAttributes.compareAndSet(false, true)) {
+      logInfo(s"Fail to renew executor log urls: $reason. Required: $allPatterns / " +
+        s"available: $allAttributes. Falling back to show app's original log urls.")
+    }
+  }
+
+  private def replaceExecutorLogs(
+      source: v1.ExecutorSummary,
+      newExecutorLogs: Map[String, String]): v1.ExecutorSummary = {
+    new v1.ExecutorSummary(source.id, source.hostPort, source.isActive, source.rddBlocks,
+      source.memoryUsed, source.diskUsed, source.totalCores, source.maxTasks, source.activeTasks,
+      source.failedTasks, source.completedTasks, source.totalTasks, source.totalDuration,
+      source.totalGCTime, source.totalInputBytes, source.totalShuffleRead,
+      source.totalShuffleWrite, source.isBlacklisted, source.maxMemory, source.addTime,
+      source.removeTime, source.removeReason, newExecutorLogs, source.memoryMetrics,
+      source.blacklistedInStages, source.peakMemoryMetrics, source.attributes)
+  }
+
+}
+
+private[spark] object HistoryAppStatusStore {
+  val CUSTOM_URL_PATTERN_REGEX: Regex = "\\{\\{([A-Za-z0-9_\\-]+)\\}\\}".r
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 48d3630abd1f..2865c3bc86e4 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -60,7 +60,8 @@ private[spark] class CoarseGrainedExecutorBackend(
     rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
       // This is a very fast action so we can use "ThreadUtils.sameThread"
       driver = Some(ref)
-      ref.ask[Boolean](RegisterExecutor(executorId, self, hostname, cores, extractLogUrls))
+      ref.ask[Boolean](RegisterExecutor(executorId, self, hostname, cores, extractLogUrls,
+        extractAttributes))
     }(ThreadUtils.sameThread).onComplete {
       // This is a very fast action so we can use "ThreadUtils.sameThread"
       case Success(msg) =>
@@ -76,6 +77,12 @@ private[spark] class CoarseGrainedExecutorBackend(
       .map(e => (e._1.substring(prefix.length).toLowerCase(Locale.ROOT), e._2))
   }
 
+  def extractAttributes: Map[String, String] = {
+    val prefix = "SPARK_EXECUTOR_ATTRIBUTE_"
+    sys.env.filterKeys(_.startsWith(prefix))
+      .map(e => (e._1.substring(prefix.length).toUpperCase(Locale.ROOT), e._2))
+  }
+
   override def receive: PartialFunction[Any, Unit] = {
     case RegisteredExecutor =>
       logInfo("Successfully registered with driver")
diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
index e7d25bfe33dc..1d73f01cb84d 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/History.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -125,4 +125,22 @@ private[spark] object History {
   val KERBEROS_KEYTAB = ConfigBuilder("spark.history.kerberos.keytab")
     .stringConf
     .createOptional
+
+  val CUSTOM_EXECUTOR_LOG_URL = ConfigBuilder("spark.history.custom.executor.log.url")
+    .doc("Specifies custom spark executor log url for supporting external log service instead of " +
+      "using cluster managers' application log urls in the history server. Spark will support " +
+      "some path variables via patterns which can vary on cluster manager. Please check the " +
+      "documentation for your cluster manager to see which patterns are supported, if any. " +
+      "This configuration has no effect on a live application, it only affects the history server.")
+    .stringConf
+    .createOptional
+
+  val APPLY_CUSTOM_EXECUTOR_LOG_URL_TO_INCOMPLETE_APP =
+    ConfigBuilder("spark.history.custom.executor.log.url.applyIncompleteApplication")
+      .doc("Whether to apply custom executor log url, as specified by " +
+        "`spark.history.custom.executor.log.url`, to incomplete application as well. " +
+        "Even if this is true, this still only affects the behavior of the history server, " +
+        "not running spark applications.")
+      .booleanConf
+      .createWithDefault(true)
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
index c187ee146301..9159d2a0158d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
@@ -69,6 +69,13 @@ private[spark] trait SchedulerBackend {
    */
   def getDriverLogUrls: Option[Map[String, String]] = None
 
+  /**
+   * Get the attributes on driver. These attributes are used to replace log URLs when
+   * custom log url pattern is specified.
+   * @return Map containing attributes on driver.
+   */
+  def getDriverAttributes: Option[Map[String, String]] = None
+
   /**
    * Get the max number of tasks that can be concurrent launched currently.
    * Note that please don't cache the value returned by this method, because the number can change
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index e92b8a2718df..b332c0f46f14 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -192,7 +192,8 @@ case class SparkListenerApplicationStart(
     time: Long,
     sparkUser: String,
     appAttemptId: Option[String],
-    driverLogs: Option[Map[String, String]] = None) extends SparkListenerEvent
+    driverLogs: Option[Map[String, String]] = None,
+    driverAttributes: Option[Map[String, String]] = None) extends SparkListenerEvent
 
 @DeveloperApi
 case class SparkListenerApplicationEnd(time: Long) extends SparkListenerEvent
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index 9e768c22c17e..afb48a31754f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -63,7 +63,8 @@ private[spark] object CoarseGrainedClusterMessages {
       executorRef: RpcEndpointRef,
       hostname: String,
       cores: Int,
-      logUrls: Map[String, String])
+      logUrls: Map[String, String],
+      attributes: Map[String, String])
     extends CoarseGrainedClusterMessage
 
   case class StatusUpdate(executorId: String, taskId: Long, state: TaskState,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 0111b83e347a..fb28e902d242 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -183,7 +183,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
 
-      case RegisterExecutor(executorId, executorRef, hostname, cores, logUrls) =>
+      case RegisterExecutor(executorId, executorRef, hostname, cores, logUrls, attributes) =>
         if (executorDataMap.contains(executorId)) {
           executorRef.send(RegisterExecutorFailed("Duplicate executor ID: " + executorId))
           context.reply(true)
@@ -207,7 +207,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
           val data = new ExecutorData(executorRef, executorAddress, hostname,
-            cores, cores, logUrls)
+            cores, cores, logUrls, attributes)
           // This must be synchronized because variables mutated
           // in this block are read when requesting executors
           CoarseGrainedSchedulerBackend.this.synchronized {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
index b25a4bfb501f..ebe1c1eb0a35 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
@@ -34,5 +34,6 @@ private[cluster] class ExecutorData(
    override val executorHost: String,
    var freeCores: Int,
    override val totalCores: Int,
-   override val logUrlMap: Map[String, String]
-) extends ExecutorInfo(executorHost, totalCores, logUrlMap)
+   override val logUrlMap: Map[String, String],
+   override val attributes: Map[String, String]
+) extends ExecutorInfo(executorHost, totalCores, logUrlMap, attributes)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
index 7f218566146a..3197e06fcd13 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
@@ -26,7 +26,12 @@ import org.apache.spark.annotation.DeveloperApi
 class ExecutorInfo(
    val executorHost: String,
    val totalCores: Int,
-   val logUrlMap: Map[String, String]) {
+   val logUrlMap: Map[String, String],
+   val attributes: Map[String, String]) {
+
+  def this(executorHost: String, totalCores: Int, logUrlMap: Map[String, String]) = {
+    this(executorHost, totalCores, logUrlMap, Map.empty)
+  }
 
   def canEqual(other: Any): Boolean = other.isInstanceOf[ExecutorInfo]
 
@@ -35,12 +40,13 @@ class ExecutorInfo(
       (that canEqual this) &&
         executorHost == that.executorHost &&
         totalCores == that.totalCores &&
-        logUrlMap == that.logUrlMap
+        logUrlMap == that.logUrlMap &&
+        attributes == that.attributes
     case _ => false
   }
 
   override def hashCode(): Int = {
-    val state = Seq(executorHost, totalCores, logUrlMap)
+    val state = Seq(executorHost, totalCores, logUrlMap, attributes)
     state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
index 6ff8bf29b006..fde2a328f02f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
@@ -129,7 +129,8 @@ private[spark] class LocalSchedulerBackend(
     listenerBus.post(SparkListenerExecutorAdded(
       System.currentTimeMillis,
       executorEndpoint.localExecutorId,
-      new ExecutorInfo(executorEndpoint.localExecutorHostname, totalCores, Map.empty)))
+      new ExecutorInfo(executorEndpoint.localExecutorHostname, totalCores, Map.empty,
+        Map.empty)))
     launcherBackend.setAppId(appId)
     launcherBackend.setState(SparkAppHandle.State.RUNNING)
   }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 1067cdc84ceb..f69c7dd2cb6d 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -131,6 +131,7 @@ private[spark] class AppStatusListener(
         .orElse(liveExecutors.get(SparkContext.LEGACY_DRIVER_IDENTIFIER))
       driver.foreach { d =>
         d.executorLogs = logs.toMap
+        d.attributes = event.driverAttributes.getOrElse(Map.empty).toMap
         update(d, System.nanoTime())
       }
     }
@@ -190,6 +191,7 @@ private[spark] class AppStatusListener(
     exec.totalCores = event.executorInfo.totalCores
     exec.maxTasks = event.executorInfo.totalCores / coresPerTask
     exec.executorLogs = event.executorInfo.logUrlMap
+    exec.attributes = event.executorInfo.attributes
     liveUpdate(exec, System.nanoTime())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
index 7f7b83a54d79..6d7b34ae979f 100644
--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -258,6 +258,7 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
   var blacklistedInStages: Set[Int] = TreeSet()
 
   var executorLogs = Map[String, String]()
+  var attributes = Map[String, String]()
 
   // Memory metrics. They may not be recorded (e.g. old event logs) so if totalOnHeap is not
   // initialized, the store will not contain this information.
@@ -306,7 +307,8 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
       executorLogs,
       memoryMetrics,
       blacklistedInStages,
-      Some(peakExecutorMetrics).filter(_.isSet))
+      Some(peakExecutorMetrics).filter(_.isSet),
+      attributes)
     new ExecutorSummaryWrapper(info)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index 825fc5427806..60b8bf892181 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -106,7 +106,8 @@ class ExecutorSummary private[spark](
     val blacklistedInStages: Set[Int],
     @JsonSerialize(using = classOf[ExecutorMetricsJsonSerializer])
     @JsonDeserialize(using = classOf[ExecutorMetricsJsonDeserializer])
-    val peakMemoryMetrics: Option[ExecutorMetrics])
+    val peakMemoryMetrics: Option[ExecutorMetrics],
+    val attributes: Map[String, String])
 
 class MemoryMetrics private[spark](
     val usedOnHeapStorageMemory: Long,
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 3370152975a5..d84dd5800ebb 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -211,7 +211,8 @@ private[spark] object JsonProtocol {
     ("Timestamp" -> applicationStart.time) ~
     ("User" -> applicationStart.sparkUser) ~
     ("App Attempt ID" -> applicationStart.appAttemptId.map(JString(_)).getOrElse(JNothing)) ~
-    ("Driver Logs" -> applicationStart.driverLogs.map(mapToJson).getOrElse(JNothing))
+    ("Driver Logs" -> applicationStart.driverLogs.map(mapToJson).getOrElse(JNothing)) ~
+    ("Driver Attributes" -> applicationStart.driverAttributes.map(mapToJson).getOrElse(JNothing))
   }
 
   def applicationEndToJson(applicationEnd: SparkListenerApplicationEnd): JValue = {
@@ -486,7 +487,8 @@ private[spark] object JsonProtocol {
   def executorInfoToJson(executorInfo: ExecutorInfo): JValue = {
     ("Host" -> executorInfo.executorHost) ~
     ("Total Cores" -> executorInfo.totalCores) ~
-    ("Log Urls" -> mapToJson(executorInfo.logUrlMap))
+    ("Log Urls" -> mapToJson(executorInfo.logUrlMap)) ~
+    ("Attributes" -> mapToJson(executorInfo.attributes))
   }
 
   def blockUpdatedInfoToJson(blockUpdatedInfo: BlockUpdatedInfo): JValue = {
@@ -693,7 +695,9 @@ private[spark] object JsonProtocol {
     val sparkUser = (json \ "User").extract[String]
     val appAttemptId = jsonOption(json \ "App Attempt ID").map(_.extract[String])
     val driverLogs = jsonOption(json \ "Driver Logs").map(mapFromJson)
-    SparkListenerApplicationStart(appName, appId, time, sparkUser, appAttemptId, driverLogs)
+    val driverAttributes = jsonOption(json \ "Driver Attributes").map(mapFromJson)
+    SparkListenerApplicationStart(appName, appId, time, sparkUser, appAttemptId, driverLogs,
+      driverAttributes)
   }
 
   def applicationEndFromJson(json: JValue): SparkListenerApplicationEnd = {
@@ -1061,7 +1065,11 @@ private[spark] object JsonProtocol {
     val executorHost = (json \ "Host").extract[String]
     val totalCores = (json \ "Total Cores").extract[Int]
     val logUrls = mapFromJson(json \ "Log Urls").toMap
-    new ExecutorInfo(executorHost, totalCores, logUrls)
+    val attributes = jsonOption(json \ "Attributes") match {
+      case Some(attr) => mapFromJson(attr).toMap
+      case None => Map.empty[String, String]
+    }
+    new ExecutorInfo(executorHost, totalCores, logUrls, attributes)
   }
 
   def blockUpdatedInfoFromJson(json: JValue): BlockUpdatedInfo = {
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
index 7bb8fe8fd8f9..209164e3f137 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -20,5 +20,6 @@
   "maxMemory" : 278302556,
   "addTime" : "2015-02-03T16:43:00.906GMT",
   "executorLogs" : { },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
index 75674778dd1f..f282d483a4fb 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
@@ -44,7 +44,8 @@
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
     "ProcessTreeOtherRSSMemory": 0
-  }
+  },
+  "attributes" : { }
 }, {
   "id" : "7",
   "hostPort" : "node6340.grid.company.com:5933",
@@ -76,7 +77,8 @@
     "totalOnHeapStorageMemory" : 956615884,
     "totalOffHeapStorageMemory" : 0
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "6",
   "hostPort" : "node6644.grid.company.com:8445",
@@ -108,7 +110,8 @@
     "totalOnHeapStorageMemory" : 956615884,
     "totalOffHeapStorageMemory" : 0
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "5",
   "hostPort" : "node2477.grid.company.com:20123",
@@ -140,7 +143,8 @@
     "totalOnHeapStorageMemory" : 956615884,
     "totalOffHeapStorageMemory" : 0
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "4",
   "hostPort" : "node4243.grid.company.com:16084",
@@ -190,7 +194,8 @@
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
     "ProcessTreeOtherRSSMemory": 0
-  }
+  },
+  "attributes" : { }
 }, {
   "id" : "3",
   "hostPort" : "node0998.grid.company.com:45265",
@@ -240,7 +245,8 @@
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
     "ProcessTreeOtherRSSMemory": 0
-  }
+  },
+  "attributes" : { }
 }, {
   "id" : "2",
   "hostPort" : "node4045.grid.company.com:29262",
@@ -290,7 +296,8 @@
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
     "ProcessTreeOtherRSSMemory": 0
-  }
+  },
+  "attributes" : { }
 }, {
   "id" : "1",
   "hostPort" : "node1404.grid.company.com:34043",
@@ -340,5 +347,6 @@
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
     "ProcessTreeOtherRSSMemory": 0
-  }
+  },
+  "attributes" : { }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
index 69efefe736dd..980ec85aadad 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
@@ -44,7 +44,8 @@
     "ProcessTreePythonRSSMemory" : 40284160,
     "ProcessTreeOtherVMemory" : 0,
     "ProcessTreeOtherRSSMemory" : 0
-  }
+  },
+  "attributes" : { }
 }, {
   "id" : "9",
   "hostPort" : "rezamemory-2.gce.something.com:40797",
@@ -94,5 +95,6 @@
     "ProcessTreePythonRSSMemory" : 69013504,
     "ProcessTreeOtherVMemory" : 0,
     "ProcessTreeOtherRSSMemory" : 0
-  }
+  },
+  "attributes" : { }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
index dd5b1dcb7372..c7f0a86211ad 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
@@ -26,7 +26,8 @@
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.167:51485",
@@ -58,7 +59,8 @@
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 } ,{
   "id" : "2",
   "hostPort" : "172.22.0.167:51487",
@@ -90,7 +92,8 @@
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.167:51490",
@@ -122,7 +125,8 @@
     "totalOnHeapStorageMemory": 384093388,
     "totalOffHeapStorageMemory": 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.167:51491",
@@ -154,5 +158,6 @@
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
index 3e55d3d9d7eb..02a0ecd7f4d8 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
@@ -26,7 +26,8 @@
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.167:51485",
@@ -58,7 +59,8 @@
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "2",
   "hostPort" : "172.22.0.167:51487",
@@ -90,7 +92,8 @@
     "totalOnHeapStorageMemory" : 384093388,
     "totalOffHeapStorageMemory" : 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.167:51490",
@@ -122,7 +125,8 @@
     "totalOnHeapStorageMemory": 384093388,
     "totalOffHeapStorageMemory": 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.167:51491",
@@ -154,5 +158,6 @@
     "totalOnHeapStorageMemory": 384093388,
     "totalOffHeapStorageMemory": 524288000
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json
index e87f3e78f2dc..4d31a387fbc5 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json
@@ -20,7 +20,8 @@
   "maxMemory" : 384093388,
   "addTime" : "2016-11-15T23:20:38.836GMT",
   "executorLogs" : { },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.111:64543",
@@ -46,7 +47,8 @@
     "stdout" : "http://172.22.0.111:64521/logPage/?appId=app-20161115172038-0000&executorId=3&logType=stdout",
     "stderr" : "http://172.22.0.111:64521/logPage/?appId=app-20161115172038-0000&executorId=3&logType=stderr"
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "2",
   "hostPort" : "172.22.0.111:64539",
@@ -72,7 +74,8 @@
     "stdout" : "http://172.22.0.111:64519/logPage/?appId=app-20161115172038-0000&executorId=2&logType=stdout",
     "stderr" : "http://172.22.0.111:64519/logPage/?appId=app-20161115172038-0000&executorId=2&logType=stderr"
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.111:64541",
@@ -98,7 +101,8 @@
     "stdout" : "http://172.22.0.111:64518/logPage/?appId=app-20161115172038-0000&executorId=1&logType=stdout",
     "stderr" : "http://172.22.0.111:64518/logPage/?appId=app-20161115172038-0000&executorId=1&logType=stderr"
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.111:64540",
@@ -124,5 +128,6 @@
     "stdout" : "http://172.22.0.111:64517/logPage/?appId=app-20161115172038-0000&executorId=0&logType=stdout",
     "stderr" : "http://172.22.0.111:64517/logPage/?appId=app-20161115172038-0000&executorId=0&logType=stderr"
   },
-  "blacklistedInStages" : [ ]
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
 } ]
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index fdaea28305b4..ce0ef2e94695 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -201,7 +201,7 @@ class ExecutorAllocationManagerSuite
     // Verify that running a task doesn't affect the target
     post(sc.listenerBus, SparkListenerStageSubmitted(createStageInfo(1, 3)))
     post(sc.listenerBus, SparkListenerExecutorAdded(
-      0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty)))
+      0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     post(sc.listenerBus, SparkListenerTaskStart(1, 0, createTaskInfo(0, 0, "executor-1")))
     assert(numExecutorsTarget(manager) === 5)
     assert(addExecutors(manager) === 1)
@@ -809,13 +809,13 @@ class ExecutorAllocationManagerSuite
 
     // New executors have registered
     post(sc.listenerBus, SparkListenerExecutorAdded(
-      0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty)))
+      0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     assert(executorIds(manager).size === 1)
     assert(executorIds(manager).contains("executor-1"))
     assert(removeTimes(manager).size === 1)
     assert(removeTimes(manager).contains("executor-1"))
     post(sc.listenerBus, SparkListenerExecutorAdded(
-      0L, "executor-2", new ExecutorInfo("host2", 1, Map.empty)))
+      0L, "executor-2", new ExecutorInfo("host2", 1, Map.empty, Map.empty)))
     assert(executorIds(manager).size === 2)
     assert(executorIds(manager).contains("executor-2"))
     assert(removeTimes(manager).size === 2)
@@ -842,7 +842,7 @@ class ExecutorAllocationManagerSuite
 
     post(sc.listenerBus, SparkListenerTaskStart(0, 0, createTaskInfo(0, 0, "executor-1")))
     post(sc.listenerBus, SparkListenerExecutorAdded(
-      0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty)))
+      0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     assert(executorIds(manager).size === 1)
     assert(executorIds(manager).contains("executor-1"))
     assert(removeTimes(manager).size === 0)
@@ -854,7 +854,7 @@ class ExecutorAllocationManagerSuite
     assert(executorIds(manager).isEmpty)
     assert(removeTimes(manager).isEmpty)
     post(sc.listenerBus, SparkListenerExecutorAdded(
-      0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty)))
+      0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     post(sc.listenerBus, SparkListenerTaskStart(0, 0, createTaskInfo(0, 0, "executor-1")))
 
     assert(executorIds(manager).size === 1)
@@ -862,7 +862,7 @@ class ExecutorAllocationManagerSuite
     assert(removeTimes(manager).size === 0)
 
     post(sc.listenerBus, SparkListenerExecutorAdded(
-      0L, "executor-2", new ExecutorInfo("host1", 1, Map.empty)))
+      0L, "executor-2", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     assert(executorIds(manager).size === 2)
     assert(executorIds(manager).contains("executor-2"))
     assert(removeTimes(manager).size === 1)
@@ -1112,7 +1112,7 @@ class ExecutorAllocationManagerSuite
     // test setup -- job with 2 tasks, scale up to two executors
     assert(numExecutorsTarget(manager) === 1)
     manager.listener.onExecutorAdded(SparkListenerExecutorAdded(
-      clock.getTimeMillis(), "executor-1", new ExecutorInfo("host1", 1, Map.empty)))
+      clock.getTimeMillis(), "executor-1", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     manager.listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(0, 2)))
     clock.advance(1000)
     manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.getTimeMillis())
@@ -1120,7 +1120,7 @@ class ExecutorAllocationManagerSuite
     val taskInfo0 = createTaskInfo(0, 0, "executor-1")
     manager.listener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo0))
     manager.listener.onExecutorAdded(SparkListenerExecutorAdded(
-      clock.getTimeMillis(), "executor-2", new ExecutorInfo("host1", 1, Map.empty)))
+      clock.getTimeMillis(), "executor-2", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     val taskInfo1 = createTaskInfo(1, 1, "executor-2")
     manager.listener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo1))
     assert(numExecutorsTarget(manager) === 2)
diff --git a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
index dbe187d54ccc..061aeb366cf5 100644
--- a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
@@ -174,9 +174,9 @@ class HeartbeatReceiverSuite
     val dummyExecutorEndpointRef1 = rpcEnv.setupEndpoint("fake-executor-1", dummyExecutorEndpoint1)
     val dummyExecutorEndpointRef2 = rpcEnv.setupEndpoint("fake-executor-2", dummyExecutorEndpoint2)
     fakeSchedulerBackend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor(executorId1, dummyExecutorEndpointRef1, "1.2.3.4", 0, Map.empty))
+      RegisterExecutor(executorId1, dummyExecutorEndpointRef1, "1.2.3.4", 0, Map.empty, Map.empty))
     fakeSchedulerBackend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor(executorId2, dummyExecutorEndpointRef2, "1.2.3.5", 0, Map.empty))
+      RegisterExecutor(executorId2, dummyExecutorEndpointRef2, "1.2.3.5", 0, Map.empty, Map.empty))
     heartbeatReceiverRef.askSync[Boolean](TaskSchedulerIsSet)
     addExecutorAndVerify(executorId1)
     addExecutorAndVerify(executorId2)
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index a6d5066423d1..573a49648d78 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -497,7 +497,7 @@ class StandaloneDynamicAllocationSuite
     val endpointRef = mock(classOf[RpcEndpointRef])
     val mockAddress = mock(classOf[RpcAddress])
     when(endpointRef.address).thenReturn(mockAddress)
-    val message = RegisterExecutor("one", endpointRef, "blacklisted-host", 10, Map.empty)
+    val message = RegisterExecutor("one", endpointRef, "blacklisted-host", 10, Map.empty, Map.empty)
 
     // Get "localhost" on a blacklist.
     val taskScheduler = mock(classOf[TaskSchedulerImpl])
@@ -621,7 +621,7 @@ class StandaloneDynamicAllocationSuite
       val endpointRef = mock(classOf[RpcEndpointRef])
       val mockAddress = mock(classOf[RpcAddress])
       when(endpointRef.address).thenReturn(mockAddress)
-      val message = RegisterExecutor(id, endpointRef, "localhost", 10, Map.empty)
+      val message = RegisterExecutor(id, endpointRef, "localhost", 10, Map.empty, Map.empty)
       val backend = sc.schedulerBackend.asInstanceOf[CoarseGrainedSchedulerBackend]
       backend.driverEndpoint.askSync[Boolean](message)
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 74574e2024c1..bce17334a47b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -47,8 +47,9 @@ import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.UI.{ADMIN_ACLS, ADMIN_ACLS_GROUPS, USER_GROUPS_MAPPING}
 import org.apache.spark.io._
 import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.security.GroupMappingServiceProvider
-import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.{AppStatusStore, ExecutorSummaryWrapper}
 import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
 import org.apache.spark.util.{Clock, JsonProtocol, ManualClock, Utils}
 import org.apache.spark.util.logging.DriverLogger
@@ -291,6 +292,192 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     }
   }
 
+  test("log urls without customization") {
+    val conf = createTestConf()
+    val executorInfos = (1 to 5).map(createTestExecutorInfo("app1", "user1", _))
+
+    val expected: Map[ExecutorInfo, Map[String, String]] = executorInfos.map { execInfo =>
+      execInfo -> execInfo.logUrlMap
+    }.toMap
+
+    testHandlingExecutorLogUrl(conf, expected)
+  }
+
+  test("custom log urls, including FILE_NAME") {
+    val conf = createTestConf()
+      .set(CUSTOM_EXECUTOR_LOG_URL, getCustomExecutorLogUrl(includeFileName = true))
+
+    // some of available attributes are not used in pattern which should be OK
+
+    val executorInfos = (1 to 5).map(createTestExecutorInfo("app1", "user1", _))
+
+    val expected: Map[ExecutorInfo, Map[String, String]] = executorInfos.map { execInfo =>
+      val attr = execInfo.attributes
+      val newLogUrlMap = attr("LOG_FILES").split(",").map { file =>
+        val newLogUrl = getExpectedExecutorLogUrl(attr, Some(file))
+        file -> newLogUrl
+      }.toMap
+
+      execInfo -> newLogUrlMap
+    }.toMap
+
+    testHandlingExecutorLogUrl(conf, expected)
+  }
+
+  test("custom log urls, excluding FILE_NAME") {
+    val conf = createTestConf()
+      .set(CUSTOM_EXECUTOR_LOG_URL, getCustomExecutorLogUrl(includeFileName = false))
+
+    // some of available attributes are not used in pattern which should be OK
+
+    val executorInfos = (1 to 5).map(createTestExecutorInfo("app1", "user1", _))
+
+    val expected: Map[ExecutorInfo, Map[String, String]] = executorInfos.map { execInfo =>
+      val attr = execInfo.attributes
+      val newLogUrl = getExpectedExecutorLogUrl(attr, None)
+
+      execInfo -> Map("log" -> newLogUrl)
+    }.toMap
+
+    testHandlingExecutorLogUrl(conf, expected)
+  }
+
+  test("custom log urls with invalid attribute") {
+    // Here we are referring {{NON_EXISTING}} which is not available in attributes,
+    // which Spark will fail back to provide origin log url with warning log.
+
+    val conf = createTestConf()
+      .set(CUSTOM_EXECUTOR_LOG_URL, getCustomExecutorLogUrl(includeFileName = true) +
+        "/{{NON_EXISTING}}")
+
+    val executorInfos = (1 to 5).map(createTestExecutorInfo("app1", "user1", _))
+
+    val expected: Map[ExecutorInfo, Map[String, String]] = executorInfos.map { execInfo =>
+      execInfo -> execInfo.logUrlMap
+    }.toMap
+
+    testHandlingExecutorLogUrl(conf, expected)
+  }
+
+  test("custom log urls, LOG_FILES not available while FILE_NAME is specified") {
+    // For this case Spark will fail back to provide origin log url with warning log.
+
+    val conf = createTestConf()
+      .set(CUSTOM_EXECUTOR_LOG_URL, getCustomExecutorLogUrl(includeFileName = true))
+
+    val executorInfos = (1 to 5).map(
+      createTestExecutorInfo("app1", "user1", _, includingLogFiles = false))
+
+    val expected: Map[ExecutorInfo, Map[String, String]] = executorInfos.map { execInfo =>
+      execInfo -> execInfo.logUrlMap
+    }.toMap
+
+    testHandlingExecutorLogUrl(conf, expected)
+  }
+
+  test("custom log urls, app not finished, applyIncompleteApplication: true") {
+    val conf = createTestConf()
+      .set(CUSTOM_EXECUTOR_LOG_URL, getCustomExecutorLogUrl(includeFileName = true))
+      .set(APPLY_CUSTOM_EXECUTOR_LOG_URL_TO_INCOMPLETE_APP, true)
+
+    // ensure custom log urls are applied to incomplete application
+
+    val executorInfos = (1 to 5).map(createTestExecutorInfo("app1", "user1", _))
+
+    val expected: Map[ExecutorInfo, Map[String, String]] = executorInfos.map { execInfo =>
+      val attr = execInfo.attributes
+      val newLogUrlMap = attr("LOG_FILES").split(",").map { file =>
+        val newLogUrl = getExpectedExecutorLogUrl(attr, Some(file))
+        file -> newLogUrl
+      }.toMap
+
+      execInfo -> newLogUrlMap
+    }.toMap
+
+    testHandlingExecutorLogUrl(conf, expected, isCompletedApp = false)
+  }
+
+  test("custom log urls, app not finished, applyIncompleteApplication: false") {
+    val conf = createTestConf()
+      .set(CUSTOM_EXECUTOR_LOG_URL, getCustomExecutorLogUrl(includeFileName = true))
+      .set(APPLY_CUSTOM_EXECUTOR_LOG_URL_TO_INCOMPLETE_APP, false)
+
+    // ensure custom log urls are NOT applied to incomplete application
+
+    val executorInfos = (1 to 5).map(createTestExecutorInfo("app1", "user1", _))
+
+    val expected: Map[ExecutorInfo, Map[String, String]] = executorInfos.map { execInfo =>
+      execInfo -> execInfo.logUrlMap
+    }.toMap
+
+    testHandlingExecutorLogUrl(conf, expected, isCompletedApp = false)
+  }
+
+  private def getCustomExecutorLogUrl(includeFileName: Boolean): String = {
+    val baseUrl = "http://newhost:9999/logs/clusters/{{CLUSTER_ID}}/users/{{USER}}/containers/" +
+      "{{CONTAINER_ID}}"
+    if (includeFileName) baseUrl + "/{{FILE_NAME}}" else baseUrl
+  }
+
+  private def getExpectedExecutorLogUrl(
+      attributes: Map[String, String],
+      fileName: Option[String]): String = {
+    val baseUrl = s"http://newhost:9999/logs/clusters/${attributes("CLUSTER_ID")}" +
+      s"/users/${attributes("USER")}/containers/${attributes("CONTAINER_ID")}"
+
+    fileName match {
+      case Some(file) => baseUrl + s"/$file"
+      case None => baseUrl
+    }
+  }
+
+  private def testHandlingExecutorLogUrl(
+      conf: SparkConf,
+      expectedLogUrlMap: Map[ExecutorInfo, Map[String, String]],
+      isCompletedApp: Boolean = true): Unit = {
+    val provider = new FsHistoryProvider(conf)
+
+    val attempt1 = newLogFile("app1", Some("attempt1"), inProgress = true)
+
+    val executorAddedEvents = expectedLogUrlMap.keys.zipWithIndex.map { case (execInfo, idx) =>
+      SparkListenerExecutorAdded(1 + idx, s"exec$idx", execInfo)
+    }.toList.sortBy(_.time)
+    val allEvents = List(SparkListenerApplicationStart("app1", Some("app1"), 1L,
+      "test", Some("attempt1"))) ++ executorAddedEvents ++
+      (if (isCompletedApp) List(SparkListenerApplicationEnd(1000L)) else Seq())
+
+    writeFile(attempt1, true, None, allEvents: _*)
+
+    updateAndCheck(provider) { list =>
+      list.size should be (1)
+      list.head.attempts.size should be (1)
+
+      list.foreach { app =>
+        app.attempts.foreach { attempt =>
+          val appUi = provider.getAppUI(app.id, attempt.attemptId)
+          appUi should not be null
+          val executors = appUi.get.ui.store.executorList(false).iterator
+          executors should not be null
+
+          val iterForExpectation = expectedLogUrlMap.iterator
+
+          var executorCount = 0
+          while (executors.hasNext) {
+            val executor = executors.next()
+            val (expectedExecInfo, expectedLogs) = iterForExpectation.next()
+
+            executor.hostPort should startWith(expectedExecInfo.executorHost)
+            executor.executorLogs should be(expectedLogs)
+
+            executorCount += 1
+          }
+
+          executorCount should be (expectedLogUrlMap.size)
+        }
+      }
+    }
+  }
+
   test("log cleaner") {
     val maxAge = TimeUnit.SECONDS.toMillis(10)
     val clock = new ManualClock(maxAge / 2)
@@ -1048,6 +1235,26 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     conf
   }
 
+  private def createTestExecutorInfo(
+      appId: String,
+      user: String,
+      executorSeqNum: Int,
+      includingLogFiles: Boolean = true): ExecutorInfo = {
+    val host = s"host$executorSeqNum"
+    val container = s"container$executorSeqNum"
+    val cluster = s"cluster$executorSeqNum"
+    val logUrlPrefix = s"http://$host:8888/$appId/$container/origin"
+
+    val executorLogUrlMap = Map("stdout" -> s"$logUrlPrefix/stdout",
+      "stderr" -> s"$logUrlPrefix/stderr")
+
+    val extraAttributes = if (includingLogFiles) Map("LOG_FILES" -> "stdout,stderr") else Map.empty
+    val executorAttributes = Map("CONTAINER_ID" -> container, "CLUSTER_ID" -> cluster,
+      "USER" -> user) ++ extraAttributes
+
+    new ExecutorInfo(host, 1, executorLogUrlMap, executorAttributes)
+  }
+
   private class SafeModeTestProvider(conf: SparkConf, clock: Clock)
     extends FsHistoryProvider(conf, clock) {
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index 811b9fe46fdf..40521f073217 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -422,7 +422,8 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
   }
 
   private def createExecutorAddedEvent(executorId: Int) = {
-    SparkListenerExecutorAdded(0L, executorId.toString, new ExecutorInfo("host1", 1, Map.empty))
+    SparkListenerExecutorAdded(0L, executorId.toString,
+      new ExecutorInfo("host1", 1, Map.empty, Map.empty))
   }
 
   private def createExecutorRemovedEvent(executorId: Int) = {
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index ede9466b3d63..356e6d165190 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -135,7 +135,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     execIds.foreach { id =>
       listener.onExecutorAdded(SparkListenerExecutorAdded(time, id,
-        new ExecutorInfo(s"$id.example.com", 1, Map())))
+        new ExecutorInfo(s"$id.example.com", 1, Map.empty, Map.empty)))
     }
 
     execIds.foreach { id =>
@@ -685,7 +685,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     val bm2 = BlockManagerId("2", "2.example.com", 84)
     Seq(bm1, bm2).foreach { bm =>
       listener.onExecutorAdded(SparkListenerExecutorAdded(1L, bm.executorId,
-        new ExecutorInfo(bm.host, 1, Map())))
+        new ExecutorInfo(bm.host, 1, Map.empty, Map.empty)))
       listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm, maxMemory))
       check[ExecutorSummaryWrapper](bm.executorId) { exec =>
         assert(exec.info.maxMemory === maxMemory)
@@ -1553,7 +1553,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
   /** Create an executor added event for the specified executor Id. */
   private def createExecutorAddedEvent(executorId: Int) = {
-    SparkListenerExecutorAdded(0L, executorId.toString, new ExecutorInfo("host1", 1, Map.empty))
+    SparkListenerExecutorAdded(0L, executorId.toString,
+      new ExecutorInfo("host1", 1, Map.empty, Map.empty))
   }
 
   /** Create an executor added event for the specified executor Id. */
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index c63f04db184d..c3ff379c84ff 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -76,13 +76,14 @@ class JsonProtocolSuite extends SparkFunSuite {
       BlockManagerId("Scarce", "to be counted...", 100))
     val unpersistRdd = SparkListenerUnpersistRDD(12345)
     val logUrlMap = Map("stderr" -> "mystderr", "stdout" -> "mystdout").toMap
+    val attributes = Map("ContainerId" -> "ct1", "User" -> "spark").toMap
     val applicationStart = SparkListenerApplicationStart("The winner of all", Some("appId"),
       42L, "Garfield", Some("appAttempt"))
     val applicationStartWithLogs = SparkListenerApplicationStart("The winner of all", Some("appId"),
       42L, "Garfield", Some("appAttempt"), Some(logUrlMap))
     val applicationEnd = SparkListenerApplicationEnd(42L)
     val executorAdded = SparkListenerExecutorAdded(executorAddedTime, "exec1",
-      new ExecutorInfo("Hostee.awesome.com", 11, logUrlMap))
+      new ExecutorInfo("Hostee.awesome.com", 11, logUrlMap, attributes))
     val executorRemoved = SparkListenerExecutorRemoved(executorRemovedTime, "exec2", "test reason")
     val executorBlacklisted = SparkListenerExecutorBlacklisted(executorBlacklistedTime, "exec1", 22)
     val executorUnblacklisted =
@@ -138,13 +139,14 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("Dependent Classes") {
     val logUrlMap = Map("stderr" -> "mystderr", "stdout" -> "mystdout").toMap
+    val attributes = Map("ContainerId" -> "ct1", "User" -> "spark").toMap
     testRDDInfo(makeRddInfo(2, 3, 4, 5L, 6L))
     testStageInfo(makeStageInfo(10, 20, 30, 40L, 50L))
     testTaskInfo(makeTaskInfo(999L, 888, 55, 777L, false))
     testTaskMetrics(makeTaskMetrics(
       33333L, 44444L, 55555L, 66666L, 7, 8, hasHadoopInput = false, hasOutput = false))
     testBlockManagerId(BlockManagerId("Hong", "Kong", 500))
-    testExecutorInfo(new ExecutorInfo("host", 43, logUrlMap))
+    testExecutorInfo(new ExecutorInfo("host", 43, logUrlMap, attributes))
 
     // StorageLevel
     testStorageLevel(StorageLevel.NONE)
@@ -1858,6 +1860,10 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Log Urls" : {
       |      "stderr" : "mystderr",
       |      "stdout" : "mystdout"
+      |    },
+      |    "Attributes" : {
+      |      "ContainerId" : "ct1",
+      |      "User" : "spark"
       |    }
       |  }
       |}
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 6bb620a2e5f6..1e292d2111f3 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -248,6 +248,30 @@ Security options for the Spark History Server are covered more detail in the
         written to disk will be re-used in the event of a history server restart.
     </td>
   </tr>
+  <tr>
+    <td>spark.history.custom.executor.log.url</td>
+    <td>(none)</td>
+    <td>
+        Specifies custom spark executor log URL for supporting external log service instead of using cluster
+        managers' application log URLs in the history server. Spark will support some path variables via patterns
+        which can vary on cluster manager. Please check the documentation for your cluster manager to
+        see which patterns are supported, if any. This configuration has no effect on a live application, it only
+        affects the history server.
+        <p/>
+        For now, only YARN mode supports this configuration
+    </td>
+  </tr>
+  <tr>
+    <td>spark.history.custom.executor.log.url.applyIncompleteApplication</td>
+    <td>false</td>
+    <td>
+        Specifies whether to apply custom spark executor log URL to incomplete applications as well.
+        If executor logs for running applications should be provided as origin log URLs, set this to `false`.
+        Please note that incomplete applications may include applications which didn't shutdown gracefully.
+        Even this is set to `true`, this configuration has no effect on a live application, it only affects the history server.
+    </td>
+  </tr>
+
 </table>
 
 Note that in all of these UIs, the tables are sortable by clicking their headers,
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index a7a448fbeb65..c3ebadb5c9cb 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -472,6 +472,36 @@ To use a custom metrics.properties for the application master and executors, upd
 </tr>
 </table>
 
+#### Available patterns for SHS custom executor log URL
+
+<table class="table">
+    <tr><th>Pattern</th><th>Meaning</th></tr>
+    <tr>
+      <td>{{HTTP_SCHEME}}</td>
+      <td>`http://` or `https://` according to YARN HTTP policy. (Configured via `yarn.http.policy`)</td>
+    </tr>
+    <tr>
+      <td>{{NM_HTTP_ADDRESS}}</td>
+      <td>Http URI of the node on which the container is allocated.</td>
+    </tr>
+    <tr>
+      <td>{{CLUSTER_ID}}</td>
+      <td>The cluster ID of Resource Manager. (Configured via `yarn.resourcemanager.cluster-id`)</td>
+    </tr>
+    <tr>
+      <td>{{CONTAINER_ID}}</td>
+      <td>The ID of container.</td>
+    </tr>
+    <tr>
+      <td>{{USER}}</td>
+      <td>'SPARK_USER' on system environment.</td>
+    </tr>
+    <tr>
+      <td>{{FILE_NAME}}</td>
+      <td>`stdout`, `stderr`.</td>
+    </tr>
+</table>
+
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
@@ -659,4 +689,4 @@ do the following:
 - On the Spark History Server, add <code>org.apache.spark.deploy.yarn.YarnProxyRedirectFilter</code>
   to the list of filters in the <code>spark.ui.filters</code> configuration.
 
-Be aware that the history server information may not be up-to-date with the application's state.
+Be aware that the history server information may not be up-to-date with the application's state.
\ No newline at end of file
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index a13ee51941e7..5d5d01f57a44 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -39,6 +39,12 @@ object MimaExcludes {
     // [SPARK-26254][CORE] Extract Hive + Kafka dependencies from Core.
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.deploy.security.HiveDelegationTokenProvider"),
 
+    // [SPARK-26311][CORE]New feature: apply custom log URL pattern for executor log URLs
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart.apply"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart.copy"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart.this"),
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart$"),
+
     // [SPARK-25765][ML] Add training cost to BisectingKMeans summary
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel.this"),
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index 192f9407a1ba..c5c78422023b 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -328,7 +328,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
         slaveIdToWorkerOffer.get(slaveId).foreach(o =>
           listenerBus.post(SparkListenerExecutorAdded(System.currentTimeMillis(), slaveId,
             // TODO: Add support for log urls for Mesos
-            new ExecutorInfo(o.host, o.cores, Map.empty)))
+            new ExecutorInfo(o.host, o.cores, Map.empty, Map.empty)))
         )
         logTrace(s"Launching Mesos tasks on slave '$slaveId', tasks:\n${getTasksSummary(tasks)}")
         d.launchTasks(Collections.singleton(slaveIdToOffer(slaveId).getId), tasks, filters)
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index f2fd5e606b6c..a4e9cc3f5d35 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -687,7 +687,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   private def registerMockExecutor(executorId: String, slaveId: String, cores: Integer) = {
     val mockEndpointRef = mock[RpcEndpointRef]
     val mockAddress = mock[RpcAddress]
-    val message = RegisterExecutor(executorId, mockEndpointRef, slaveId, cores, Map.empty)
+    val message = RegisterExecutor(executorId, mockEndpointRef, slaveId, cores, Map.empty,
+      Map.empty)
 
     backend.driverEndpoint.askSync[Boolean](message)
   }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
index c9b7e6c439c4..0b6d93f62d40 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
@@ -86,7 +86,8 @@ class MesosFineGrainedSchedulerBackendSuite
 
     val listenerBus = mock[LiveListenerBus]
     listenerBus.post(
-      SparkListenerExecutorAdded(anyLong, "s1", new ExecutorInfo("host1", 2, Map.empty)))
+      SparkListenerExecutorAdded(anyLong, "s1",
+        new ExecutorInfo("host1", 2, Map.empty, Map.empty)))
 
     val sc = mock[SparkContext]
     when(sc.getSparkHome()).thenReturn(Option("/spark-home"))
@@ -117,7 +118,8 @@ class MesosFineGrainedSchedulerBackendSuite
 
     val listenerBus = mock[LiveListenerBus]
     listenerBus.post(
-      SparkListenerExecutorAdded(anyLong, "s1", new ExecutorInfo("host1", 2, Map.empty)))
+      SparkListenerExecutorAdded(anyLong, "s1",
+        new ExecutorInfo("host1", 2, Map.empty, Map.empty)))
 
     val sc = mock[SparkContext]
     when(sc.getSparkHome()).thenReturn(Option("/spark-home"))
@@ -157,7 +159,8 @@ class MesosFineGrainedSchedulerBackendSuite
 
     val listenerBus = mock[LiveListenerBus]
     listenerBus.post(
-      SparkListenerExecutorAdded(anyLong, "s1", new ExecutorInfo("host1", 2, Map.empty)))
+      SparkListenerExecutorAdded(anyLong, "s1",
+        new ExecutorInfo("host1", 2, Map.empty, Map.empty)))
 
     val sc = mock[SparkContext]
     when(sc.executorMemory).thenReturn(100)
@@ -217,7 +220,8 @@ class MesosFineGrainedSchedulerBackendSuite
 
     val listenerBus = mock[LiveListenerBus]
     listenerBus.post(
-      SparkListenerExecutorAdded(anyLong, "s1", new ExecutorInfo("host1", 2, Map.empty)))
+      SparkListenerExecutorAdded(anyLong, "s1",
+        new ExecutorInfo("host1", 2, Map.empty, Map.empty)))
 
     val sc = mock[SparkContext]
     when(sc.executorMemory).thenReturn(100)
@@ -308,7 +312,8 @@ class MesosFineGrainedSchedulerBackendSuite
 
     val listenerBus = mock[LiveListenerBus]
     listenerBus.post(
-      SparkListenerExecutorAdded(anyLong, "s1", new ExecutorInfo("host1", 2, Map.empty)))
+      SparkListenerExecutorAdded(anyLong, "s1",
+        new ExecutorInfo("host1", 2, Map.empty, Map.empty)))
 
     val sc = mock[SparkContext]
     when(sc.executorMemory).thenReturn(100)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 77ce2f65245a..0b909d15c2fa 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -19,11 +19,12 @@ package org.apache.spark.deploy.yarn
 
 import java.io.File
 import java.nio.ByteBuffer
-import java.util.Collections
+import java.util.{Collections, Locale}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{HashMap, ListBuffer}
 
+import org.apache.hadoop.HadoopIllegalArgumentException
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.DataOutputBuffer
 import org.apache.hadoop.security.UserGroupInformation
@@ -33,13 +34,13 @@ import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.NMClient
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
+import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{Utils, YarnContainerInfoHelper}
 
 private[yarn] class ExecutorRunnable(
     container: Option[Container],
@@ -220,13 +221,6 @@ private[yarn] class ExecutorRunnable(
     val env = new HashMap[String, String]()
     Client.populateClasspath(null, conf, sparkConf, env, sparkConf.get(EXECUTOR_CLASS_PATH))
 
-    // lookup appropriate http scheme for container log urls
-    val yarnHttpPolicy = conf.get(
-      YarnConfiguration.YARN_HTTP_POLICY_KEY,
-      YarnConfiguration.YARN_HTTP_POLICY_DEFAULT
-    )
-    val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://"
-
     System.getenv().asScala.filterKeys(_.startsWith("SPARK"))
       .foreach { case (k, v) => env(k) = v }
 
@@ -241,15 +235,18 @@ private[yarn] class ExecutorRunnable(
       }
     }
 
-    // Add log urls
+    // Add log urls, as well as executor attributes
     container.foreach { c =>
-      sys.env.filterKeys(_.endsWith("USER")).foreach { user =>
-        val containerId = ConverterUtils.toString(c.getId)
-        val address = c.getNodeHttpAddress
-        val baseUrl = s"$httpScheme$address/node/containerlogs/$containerId/$user"
+      YarnContainerInfoHelper.getLogUrls(conf, Some(c)).foreach { m =>
+        m.foreach { case (fileName, url) =>
+          env("SPARK_LOG_URL_" + fileName.toUpperCase(Locale.ROOT)) = url
+        }
+      }
 
-        env("SPARK_LOG_URL_STDERR") = s"$baseUrl/stderr?start=-4096"
-        env("SPARK_LOG_URL_STDOUT") = s"$baseUrl/stdout?start=-4096"
+      YarnContainerInfoHelper.getAttributes(conf, Some(c)).foreach { m =>
+        m.foreach { case (attr, value) =>
+          env("SPARK_EXECUTOR_ATTRIBUTE_" + attr.toUpperCase(Locale.ROOT)) = value
+        }
       }
     }
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index e6680e182958..b5575a10a05a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -17,14 +17,10 @@
 
 package org.apache.spark.scheduler.cluster
 
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-
 import org.apache.spark.SparkContext
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil}
+import org.apache.spark.deploy.yarn.ApplicationMaster
 import org.apache.spark.scheduler.TaskSchedulerImpl
-import org.apache.spark.util.Utils
+import org.apache.spark.util.YarnContainerInfoHelper
 
 private[spark] class YarnClusterSchedulerBackend(
     scheduler: TaskSchedulerImpl,
@@ -40,30 +36,10 @@ private[spark] class YarnClusterSchedulerBackend(
   }
 
   override def getDriverLogUrls: Option[Map[String, String]] = {
-    var driverLogs: Option[Map[String, String]] = None
-    try {
-      val yarnConf = new YarnConfiguration(sc.hadoopConfiguration)
-      val containerId = YarnSparkHadoopUtil.getContainerId
+    YarnContainerInfoHelper.getLogUrls(sc.hadoopConfiguration, container = None)
+  }
 
-      val httpAddress = System.getenv(Environment.NM_HOST.name()) +
-        ":" + System.getenv(Environment.NM_HTTP_PORT.name())
-      // lookup appropriate http scheme for container log urls
-      val yarnHttpPolicy = yarnConf.get(
-        YarnConfiguration.YARN_HTTP_POLICY_KEY,
-        YarnConfiguration.YARN_HTTP_POLICY_DEFAULT
-      )
-      val user = Utils.getCurrentUserName()
-      val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://"
-      val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user"
-      logDebug(s"Base URL for logs: $baseUrl")
-      driverLogs = Some(Map(
-        "stdout" -> s"$baseUrl/stdout?start=-4096",
-        "stderr" -> s"$baseUrl/stderr?start=-4096"))
-    } catch {
-      case e: Exception =>
-        logInfo("Error while building AM log links, so AM" +
-          " logs link will not appear in application UI", e)
-    }
-    driverLogs
+  override def getDriverAttributes: Option[Map[String, String]] = {
+    YarnContainerInfoHelper.getAttributes(sc.hadoopConfiguration, container = None)
   }
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala
new file mode 100644
index 000000000000..96350cdece55
--- /dev/null
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import org.apache.hadoop.HadoopIllegalArgumentException
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
+import org.apache.hadoop.yarn.api.records.{Container, ContainerId}
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.util.ConverterUtils
+
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
+import org.apache.spark.internal.Logging
+
+private[spark] object YarnContainerInfoHelper extends Logging {
+  def getLogUrls(
+      conf: Configuration,
+      container: Option[Container]): Option[Map[String, String]] = {
+    try {
+      val yarnConf = new YarnConfiguration(conf)
+
+      val containerId = getContainerId(container)
+      val user = Utils.getCurrentUserName()
+      val httpScheme = getYarnHttpScheme(yarnConf)
+      val httpAddress = getNodeManagerHttpAddress(container)
+
+      val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user"
+      logDebug(s"Base URL for logs: $baseUrl")
+
+      Some(Map(
+        "stdout" -> s"$baseUrl/stdout?start=-4096",
+        "stderr" -> s"$baseUrl/stderr?start=-4096"))
+    } catch {
+      case e: Exception =>
+        logInfo("Error while building executor logs - executor logs will not be available", e)
+        None
+    }
+  }
+
+  def getAttributes(
+      conf: Configuration,
+      container: Option[Container]): Option[Map[String, String]] = {
+    try {
+      val yarnConf = new YarnConfiguration(conf)
+      Some(Map(
+        "HTTP_SCHEME" -> getYarnHttpScheme(yarnConf),
+        "NM_HTTP_ADDRESS" -> getNodeManagerHttpAddress(container),
+        "CLUSTER_ID" -> getClusterId(yarnConf).getOrElse(""),
+        "CONTAINER_ID" -> ConverterUtils.toString(getContainerId(container)),
+        "USER" -> Utils.getCurrentUserName(),
+        "LOG_FILES" -> "stderr,stdout"
+      ))
+    } catch {
+      case e: Exception =>
+        logInfo("Error while retrieving executor attributes - executor logs will not be replaced " +
+          "with custom log pattern", e)
+        None
+    }
+  }
+
+  def getContainerId(container: Option[Container]): ContainerId = container match {
+    case Some(c) => c.getId
+    case None => YarnSparkHadoopUtil.getContainerId
+  }
+
+  def getClusterId(yarnConf: YarnConfiguration): Option[String] = {
+    try {
+      Some(YarnConfiguration.getClusterId(yarnConf))
+    } catch {
+      case _: HadoopIllegalArgumentException => None
+    }
+  }
+
+  def getYarnHttpScheme(yarnConf: YarnConfiguration): String = {
+    // lookup appropriate http scheme for container log urls
+    val yarnHttpPolicy = yarnConf.get(
+      YarnConfiguration.YARN_HTTP_POLICY_KEY,
+      YarnConfiguration.YARN_HTTP_POLICY_DEFAULT
+    )
+    if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://"
+  }
+
+  def getNodeManagerHttpAddress(container: Option[Container]): String = container match {
+    case Some(c) => c.getNodeHttpAddress
+    case None => System.getenv(Environment.NM_HOST.name()) + ":" +
+      System.getenv(Environment.NM_HTTP_PORT.name())
+  }
+}
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 70ad231e9926..b3c5bbd263ed 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -28,7 +28,10 @@ import scala.io.Source
 import scala.language.postfixOps
 
 import com.google.common.io.{ByteStreams, Files}
+import org.apache.hadoop.HadoopIllegalArgumentException
+import org.apache.hadoop.yarn.api.ApplicationConstants
 import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.util.ConverterUtils
 import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
 
@@ -39,11 +42,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher._
-import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart,
-  SparkListenerExecutorAdded}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart, SparkListenerExecutorAdded}
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.tags.ExtendedYarnTest
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{Utils, YarnContainerInfoHelper}
 
 /**
  * Integration tests for YARN; these tests use a mini Yarn cluster to run Spark-on-YARN
@@ -332,6 +334,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
 private[spark] class SaveExecutorInfo extends SparkListener {
   val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()
   var driverLogs: Option[collection.Map[String, String]] = None
+  var driverAttributes: Option[collection.Map[String, String]] = None
 
   override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
     addedExecutorInfos(executor.executorId) = executor.executorInfo
@@ -339,6 +342,7 @@ private[spark] class SaveExecutorInfo extends SparkListener {
 
   override def onApplicationStart(appStart: SparkListenerApplicationStart): Unit = {
     driverLogs = appStart.driverLogs
+    driverAttributes = appStart.driverAttributes
   }
 }
 
@@ -443,6 +447,7 @@ private object YarnClusterDriver extends Logging with Matchers {
             s"Executor logs contain sensitive info (${SECRET_PASSWORD}): \n${log} "
           )
         }
+        assert(info.attributes.nonEmpty)
       }
 
       // If we are running in yarn-cluster mode, verify that driver logs links and present and are
@@ -461,9 +466,24 @@ private object YarnClusterDriver extends Logging with Matchers {
             s"Driver logs contain sensitive info (${SECRET_PASSWORD}): \n${log} "
           )
         }
-        val containerId = YarnSparkHadoopUtil.getContainerId
+
+        val yarnConf = new YarnConfiguration(sc.hadoopConfiguration)
+        val containerId = YarnContainerInfoHelper.getContainerId(container = None)
         val user = Utils.getCurrentUserName()
+
         assert(urlStr.endsWith(s"/node/containerlogs/$containerId/$user/stderr?start=-4096"))
+
+        assert(listener.driverAttributes.nonEmpty)
+        val driverAttributes = listener.driverAttributes.get
+        val expectationAttributes = Map(
+          "HTTP_SCHEME" -> YarnContainerInfoHelper.getYarnHttpScheme(yarnConf),
+          "NM_HTTP_ADDRESS" -> YarnContainerInfoHelper.getNodeManagerHttpAddress(container = None),
+          "CLUSTER_ID" -> YarnContainerInfoHelper.getClusterId(yarnConf).getOrElse(""),
+          "CONTAINER_ID" -> ConverterUtils.toString(containerId),
+          "USER" -> user,
+          "LOG_FILES" -> "stderr,stdout")
+
+        assert(driverAttributes === expectationAttributes)
       }
     } finally {
       Files.write(result, status, StandardCharsets.UTF_8)

From d8d2736fd1e3cd47941153327ad50a4d36099476 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 31 Jan 2019 11:04:33 +0800
Subject: [PATCH 0399/1072] [SPARK-26708][SQL][FOLLOWUP] put the special
 handling of non-cascade uncache in the uncache method

## What changes were proposed in this pull request?

This is a follow up of https://github.com/apache/spark/pull/23644/files , to make these methods less coupled with each other.

## How was this patch tested?

existing tests

Closes #23687 from cloud-fan/cache.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/CacheManager.scala    | 48 +++++++++----------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 00c446107d54..398d7b44807d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -160,7 +160,22 @@ class CacheManager extends Logging {
     }
     // Re-compile dependent cached queries after removing the cached query.
     if (!cascade) {
-      recacheByCondition(spark, _.find(_.sameResult(plan)).isDefined, clearCache = false)
+      recacheByCondition(spark, cd => {
+        // If the cache buffer has already been loaded, we don't need to recompile the cached plan,
+        // as it does not rely on the plan that has been uncached anymore, it will just produce
+        // data from the cache buffer.
+        // Note that the `CachedRDDBuilder.isCachedColumnBuffersLoaded` call is a non-locking
+        // status test and may not return the most accurate cache buffer state. So the worse case
+        // scenario can be:
+        // 1) The buffer has been loaded, but `isCachedColumnBuffersLoaded` returns false, then we
+        //    will clear the buffer and re-compiled the plan. It is inefficient but doesn't affect
+        //    correctness.
+        // 2) The buffer has been cleared, but `isCachedColumnBuffersLoaded` returns true, then we
+        //    will keep it as it is. It means the physical plan has been re-compiled already in the
+        //    other thread.
+        val cacheAlreadyLoaded = cd.cachedRepresentation.cacheBuilder.isCachedColumnBuffersLoaded
+        cd.plan.find(_.sameResult(plan)).isDefined && !cacheAlreadyLoaded
+      })
     }
   }
 
@@ -168,38 +183,21 @@ class CacheManager extends Logging {
    * Tries to re-cache all the cache entries that refer to the given plan.
    */
   def recacheByPlan(spark: SparkSession, plan: LogicalPlan): Unit = {
-    recacheByCondition(spark, _.find(_.sameResult(plan)).isDefined)
+    recacheByCondition(spark, _.plan.find(_.sameResult(plan)).isDefined)
   }
 
+  /**
+   *  Re-caches all the cache entries that satisfies the given `condition`.
+   */
   private def recacheByCondition(
       spark: SparkSession,
-      condition: LogicalPlan => Boolean,
-      clearCache: Boolean = true): Unit = {
+      condition: CachedData => Boolean): Unit = {
     val needToRecache = scala.collection.mutable.ArrayBuffer.empty[CachedData]
     writeLock {
       val it = cachedData.iterator()
       while (it.hasNext) {
         val cd = it.next()
-        // If `clearCache` is false (which means the recache request comes from a non-cascading
-        // cache invalidation) and the cache buffer has already been loaded, we do not need to
-        // re-compile a physical plan because the old plan will not be used any more by the
-        // CacheManager although it still lives in compiled `Dataset`s and it could still work.
-        // Otherwise, it means either `clearCache` is true, then we have to clear the cache buffer
-        // and re-compile the physical plan; or it is a non-cascading cache invalidation and cache
-        // buffer is still empty, then we could have a more efficient new plan by removing
-        // dependency on the previously removed cache entries.
-        // Note that the `CachedRDDBuilder`.`isCachedColumnBuffersLoaded` call is a non-locking
-        // status test and may not return the most accurate cache buffer state. So the worse case
-        // scenario can be:
-        // 1) The buffer has been loaded, but `isCachedColumnBuffersLoaded` returns false, then we
-        //    will clear the buffer and build a new plan. It is inefficient but doesn't affect
-        //    correctness.
-        // 2) The buffer has been cleared, but `isCachedColumnBuffersLoaded` returns true, then we
-        //    will keep it as it is. It means the physical plan has been re-compiled already in the
-        //    other thread.
-        val buildNewPlan =
-          clearCache || !cd.cachedRepresentation.cacheBuilder.isCachedColumnBuffersLoaded
-        if (condition(cd.plan) && buildNewPlan) {
+        if (condition(cd)) {
           needToRecache += cd
           // Remove the cache entry before we create a new one, so that we can have a different
           // physical plan.
@@ -267,7 +265,7 @@ class CacheManager extends Logging {
       (fs, fs.makeQualified(path))
     }
 
-    recacheByCondition(spark, _.find(lookupAndRefresh(_, fs, qualifiedPath)).isDefined)
+    recacheByCondition(spark, _.plan.find(lookupAndRefresh(_, fs, qualifiedPath)).isDefined)
   }
 
   /**

From aeff69bd879661367367f39b5dfecd9a76223c0b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 30 Jan 2019 20:33:21 -0800
Subject: [PATCH 0400/1072] [SPARK-24360][SQL] Support Hive 3.1 metastore

## What changes were proposed in this pull request?

Hive 3.1.1 is released. This PR aims to support Hive 3.1.x metastore.
Please note that Hive 3.0.0 Metastore is skipped intentionally.

## How was this patch tested?

Pass the Jenkins with the updated test cases including 3.1.

Closes #23694 from dongjoon-hyun/SPARK-24360-3.1.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/sql-data-sources-hive-tables.md          |   2 +-
 .../org/apache/spark/sql/hive/HiveUtils.scala |   3 +-
 .../sql/hive/client/HiveClientImpl.scala      |  17 ++-
 .../spark/sql/hive/client/HiveShim.scala      | 126 ++++++++++++++++++
 .../hive/client/IsolatedClientLoader.scala    |   1 +
 .../spark/sql/hive/client/package.scala       |   9 +-
 .../sql/hive/execution/SaveAsHiveFile.scala   |   2 +-
 .../sql/hive/client/HiveClientSuite.scala     |  23 +++-
 .../sql/hive/client/HiveClientVersions.scala  |   2 +-
 .../sql/hive/client/HiveVersionSuite.scala    |   7 +-
 .../spark/sql/hive/client/VersionsSuite.scala |  33 ++++-
 11 files changed, 206 insertions(+), 19 deletions(-)

diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index 3b39a32d4324..14773ca4ec52 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -115,7 +115,7 @@ The following options can be used to configure the version of Hive that is used
     <td><code>1.2.1</code></td>
     <td>
       Version of the Hive metastore. Available
-      options are <code>0.12.0</code> through <code>2.3.4</code>.
+      options are <code>0.12.0</code> through <code>2.3.4</code> and <code>3.1.0</code> through <code>3.1.1</code>.
     </td>
   </tr>
   <tr>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 597eef129f63..38bbe643f5fa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -62,7 +62,8 @@ private[spark] object HiveUtils extends Logging {
 
   val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
-        s"<code>0.12.0</code> through <code>2.3.4</code>.")
+        "<code>0.12.0</code> through <code>2.3.4</code> and " +
+        "<code>3.1.0</code> through <code>3.1.1</code>.")
     .stringConf
     .createWithDefault(builtinHiveVersion)
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 5e9b324a168e..bfe19c26ef4e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -105,6 +105,7 @@ private[hive] class HiveClientImpl(
     case hive.v2_1 => new Shim_v2_1()
     case hive.v2_2 => new Shim_v2_2()
     case hive.v2_3 => new Shim_v2_3()
+    case hive.v3_1 => new Shim_v3_1()
   }
 
   // Create an internal session state for this HiveClientImpl.
@@ -852,11 +853,17 @@ private[hive] class HiveClientImpl(
     client.getAllTables("default").asScala.foreach { t =>
       logDebug(s"Deleting table $t")
       val table = client.getTable("default", t)
-      client.getIndexes("default", t, 255).asScala.foreach { index =>
-        shim.dropIndex(client, "default", t, index.getIndexName)
-      }
-      if (!table.isIndexTable) {
-        client.dropTable("default", t)
+      try {
+        client.getIndexes("default", t, 255).asScala.foreach { index =>
+          shim.dropIndex(client, "default", t, index.getIndexName)
+        }
+        if (!table.isIndexTable) {
+          client.dropTable("default", t)
+        }
+      } catch {
+        case _: NoSuchMethodError =>
+          // HIVE-18448 Hive 3.0 remove index APIs
+          client.dropTable("default", t)
       }
     }
     client.getAllDatabases.asScala.filterNot(_ == "default").foreach { db =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 4d484904d2c2..a8ebb23f78e2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -40,6 +40,7 @@ import org.apache.hadoop.hive.serde.serdeConstants
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTablePartition, CatalogUtils, FunctionResource, FunctionResourceType}
@@ -1179,3 +1180,128 @@ private[client] class Shim_v2_1 extends Shim_v2_0 {
 private[client] class Shim_v2_2 extends Shim_v2_1
 
 private[client] class Shim_v2_3 extends Shim_v2_1
+
+private[client] class Shim_v3_1 extends Shim_v2_3 {
+  // Spark supports only non-ACID operations
+  protected lazy val isAcidIUDoperation = JBoolean.FALSE
+
+  // Writer ID can be 0 for non-ACID operations
+  protected lazy val writeIdInLoadTableOrPartition: JLong = 0L
+
+  // Statement ID
+  protected lazy val stmtIdInLoadTableOrPartition: JInteger = 0
+
+  protected lazy val listBucketingLevel: JInteger = 0
+
+  private lazy val clazzLoadFileType = getClass.getClassLoader.loadClass(
+    "org.apache.hadoop.hive.ql.plan.LoadTableDesc$LoadFileType")
+
+  private lazy val loadPartitionMethod =
+    findMethod(
+      classOf[Hive],
+      "loadPartition",
+      classOf[Path],
+      classOf[Table],
+      classOf[JMap[String, String]],
+      clazzLoadFileType,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      classOf[JLong],
+      JInteger.TYPE,
+      JBoolean.TYPE)
+  private lazy val loadTableMethod =
+    findMethod(
+      classOf[Hive],
+      "loadTable",
+      classOf[Path],
+      classOf[String],
+      clazzLoadFileType,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      classOf[JLong],
+      JInteger.TYPE,
+      JBoolean.TYPE)
+  private lazy val loadDynamicPartitionsMethod =
+    findMethod(
+      classOf[Hive],
+      "loadDynamicPartitions",
+      classOf[Path],
+      classOf[String],
+      classOf[JMap[String, String]],
+      clazzLoadFileType,
+      JInteger.TYPE,
+      JInteger.TYPE,
+      JBoolean.TYPE,
+      JLong.TYPE,
+      JInteger.TYPE,
+      JBoolean.TYPE,
+      classOf[AcidUtils.Operation],
+      JBoolean.TYPE)
+
+  override def loadPartition(
+      hive: Hive,
+      loadPath: Path,
+      tableName: String,
+      partSpec: JMap[String, String],
+      replace: Boolean,
+      inheritTableSpecs: Boolean,
+      isSkewedStoreAsSubdir: Boolean,
+      isSrcLocal: Boolean): Unit = {
+    val session = SparkSession.getActiveSession
+    assert(session.nonEmpty)
+    val database = session.get.sessionState.catalog.getCurrentDatabase
+    val table = hive.getTable(database, tableName)
+    val loadFileType = if (replace) {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL"))
+    } else {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("KEEP_EXISTING"))
+    }
+    assert(loadFileType.isDefined)
+    loadPartitionMethod.invoke(hive, loadPath, table, partSpec, loadFileType.get,
+      inheritTableSpecs: JBoolean, isSkewedStoreAsSubdir: JBoolean,
+      isSrcLocal: JBoolean, isAcid, hasFollowingStatsTask,
+      writeIdInLoadTableOrPartition, stmtIdInLoadTableOrPartition, replace: JBoolean)
+  }
+
+  override def loadTable(
+      hive: Hive,
+      loadPath: Path,
+      tableName: String,
+      replace: Boolean,
+      isSrcLocal: Boolean): Unit = {
+    val loadFileType = if (replace) {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL"))
+    } else {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("KEEP_EXISTING"))
+    }
+    assert(loadFileType.isDefined)
+    loadTableMethod.invoke(hive, loadPath, tableName, loadFileType.get, isSrcLocal: JBoolean,
+      isSkewedStoreAsSubdir, isAcidIUDoperation, hasFollowingStatsTask,
+      writeIdInLoadTableOrPartition, stmtIdInLoadTableOrPartition: JInteger, replace: JBoolean)
+  }
+
+  override def loadDynamicPartitions(
+      hive: Hive,
+      loadPath: Path,
+      tableName: String,
+      partSpec: JMap[String, String],
+      replace: Boolean,
+      numDP: Int,
+      listBucketingEnabled: Boolean): Unit = {
+    val loadFileType = if (replace) {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL"))
+    } else {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("KEEP_EXISTING"))
+    }
+    assert(loadFileType.isDefined)
+    loadDynamicPartitionsMethod.invoke(hive, loadPath, tableName, partSpec, loadFileType.get,
+      numDP: JInteger, listBucketingLevel, isAcid, writeIdInLoadTableOrPartition,
+      stmtIdInLoadTableOrPartition, hasFollowingStatsTask, AcidUtils.Operation.NOT_ACID,
+      replace: JBoolean)
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index ca98c30add16..1f7ab9bb6034 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -100,6 +100,7 @@ private[hive] object IsolatedClientLoader extends Logging {
     case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
     case "2.2" | "2.2.0" => hive.v2_2
     case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" => hive.v2_3
+    case "3.1" | "3.1.0" | "3.1.1" => hive.v3_1
     case version =>
       throw new UnsupportedOperationException(s"Unsupported Hive Metastore version ($version). " +
         s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid version.")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index e4cf7299d2af..b6a49497e480 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -79,7 +79,14 @@ package object client {
       exclusions = Seq("org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
-    val allSupportedHiveVersions = Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, v2_1, v2_2, v2_3)
+    // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings
+    case object v3_1 extends HiveVersion("3.1.1",
+      extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0",
+        "org.apache.derby:derby:10.14.1.0"),
+      exclusions = Seq("org.apache.curator:*",
+        "org.pentaho:pentaho-aggdesigner-algorithm"))
+
+    val allSupportedHiveVersions = Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_1)
   }
   // scalastyle:on
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
index 078968ed0145..4ddba500cdf6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
@@ -114,7 +114,7 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
     // be removed by Hive when Hive is trying to empty the table directory.
     val hiveVersionsUsingOldExternalTempPath: Set[HiveVersion] = Set(v12, v13, v14, v1_0)
     val hiveVersionsUsingNewExternalTempPath: Set[HiveVersion] =
-      Set(v1_1, v1_2, v2_0, v2_1, v2_2, v2_3)
+      Set(v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_1)
 
     // Ensure all the supported versions are considered here.
     assert(hiveVersionsUsingNewExternalTempPath ++ hiveVersionsUsingOldExternalTempPath ==
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index 7a325bf26b4c..f3d8c2ad440f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -19,12 +19,16 @@ package org.apache.spark.sql.hive.client
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+import org.apache.hadoop.mapred.TextInputFormat
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType}
+import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StructType}
 
 // TODO: Refactor this to `HivePartitionFilteringSuite`
 class HiveClientSuite(version: String)
@@ -46,7 +50,22 @@ class HiveClientSuite(version: String)
     val hadoopConf = new Configuration()
     hadoopConf.setBoolean(tryDirectSqlKey, tryDirectSql)
     val client = buildClient(hadoopConf)
-    client.runSqlHive("CREATE TABLE test (value INT) PARTITIONED BY (ds INT, h INT, chunk STRING)")
+    val tableSchema =
+      new StructType().add("value", "int").add("ds", "int").add("h", "int").add("chunk", "string")
+    val table = CatalogTable(
+      identifier = TableIdentifier("test", Some("default")),
+      tableType = CatalogTableType.MANAGED,
+      schema = tableSchema,
+      partitionColumnNames = Seq("ds", "h", "chunk"),
+      storage = CatalogStorageFormat(
+        locationUri = None,
+        inputFormat = Some(classOf[TextInputFormat].getName),
+        outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName),
+        serde = Some(classOf[LazySimpleSerDe].getName()),
+        compressed = false,
+        properties = Map.empty
+      ))
+    client.createTable(table, ignoreIfExists = false)
 
     val partitions =
       for {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
index 30592a3f8542..9b9af79354c2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
@@ -23,5 +23,5 @@ import org.apache.spark.SparkFunSuite
 
 private[client] trait HiveClientVersions {
   protected val versions =
-    IndexedSeq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3")
+    IndexedSeq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "3.1")
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
index e5963d03f6b5..a45ad1f30042 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
@@ -34,10 +34,15 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu
     // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and
     // hive.metastore.schema.verification from false to true since 2.0
     // For details, see the JIRA HIVE-6113 and HIVE-12463
-    if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3") {
+    if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3" ||
+        version == "3.1") {
       hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
       hadoopConf.set("hive.metastore.schema.verification", "false")
     }
+    // Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if `hive.in.test=false`.
+    if (version == "3.1") {
+      hadoopConf.set("hive.in.test", "true")
+    }
     HiveClientBuilder.buildClient(
       version,
       hadoopConf,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 218bd18e5dc9..b323871630d6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -103,7 +103,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
   }
 
   private val versions =
-    Seq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3")
+    Seq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "3.1")
 
   private var client: HiveClient = null
 
@@ -118,10 +118,15 @@ class VersionsSuite extends SparkFunSuite with Logging {
       // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and
       // hive.metastore.schema.verification from false to true since 2.0
       // For details, see the JIRA HIVE-6113 and HIVE-12463
-      if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3") {
+      if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3" ||
+          version == "3.1") {
         hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
         hadoopConf.set("hive.metastore.schema.verification", "false")
       }
+      // Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if `hive.in.test=false`.
+      if (version == "3.1") {
+        hadoopConf.set("hive.in.test", "true")
+      }
       client = buildClient(version, hadoopConf, HiveUtils.formatTimeVarsForHiveClient(hadoopConf))
       if (versionSpark != null) versionSpark.reset()
       versionSpark = TestHiveVersion(client)
@@ -318,7 +323,20 @@ class VersionsSuite extends SparkFunSuite with Logging {
       properties = Map.empty)
 
     test(s"$version: sql create partitioned table") {
-      client.runSqlHive("CREATE TABLE src_part (value INT) PARTITIONED BY (key1 INT, key2 INT)")
+      val table = CatalogTable(
+        identifier = TableIdentifier("src_part", Some("default")),
+        tableType = CatalogTableType.MANAGED,
+        schema = new StructType().add("value", "int").add("key1", "int").add("key2", "int"),
+        partitionColumnNames = Seq("key1", "key2"),
+        storage = CatalogStorageFormat(
+          locationUri = None,
+          inputFormat = Some(classOf[TextInputFormat].getName),
+          outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName),
+          serde = Some(classOf[LazySimpleSerDe].getName()),
+          compressed = false,
+          properties = Map.empty
+        ))
+      client.createTable(table, ignoreIfExists = false)
     }
 
     val testPartitionCount = 2
@@ -556,9 +574,12 @@ class VersionsSuite extends SparkFunSuite with Logging {
     }
 
     test(s"$version: sql create index and reset") {
-      client.runSqlHive("CREATE TABLE indexed_table (key INT)")
-      client.runSqlHive("CREATE INDEX index_1 ON TABLE indexed_table(key) " +
-        "as 'COMPACT' WITH DEFERRED REBUILD")
+      // HIVE-18448 Since Hive 3.0, INDEX is not supported.
+      if (version != "3.1") {
+        client.runSqlHive("CREATE TABLE indexed_table (key INT)")
+        client.runSqlHive("CREATE INDEX index_1 ON TABLE indexed_table(key) " +
+          "as 'COMPACT' WITH DEFERRED REBUILD")
+      }
     }
 
     ///////////////////////////////////////////////////////////////////////////

From d4d6df2f7d97168f0f3073aa42608294030ece55 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 31 Jan 2019 14:32:31 +0800
Subject: [PATCH 0401/1072] [SPARK-26745][SQL] Revert count optimization in
 JSON datasource by SPARK-24959

## What changes were proposed in this pull request?

This PR reverts JSON count optimization part of #21909.

We cannot distinguish the cases below without parsing:

```
[{...}, {...}]
```

```
[]
```

```
{...}
```

```bash
# empty string
```

when we `count()`. One line (input: IN) can be, 0 record, 1 record and multiple records and this is dependent on each input.

See also https://github.com/apache/spark/pull/23665#discussion_r251276720.

## How was this patch tested?

Manually tested.

Closes #23667 from HyukjinKwon/revert-SPARK-24959.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/csv/UnivocityParser.scala | 16 +++++++++++-----
 .../catalyst/expressions/csvExpressions.scala    |  3 +--
 .../catalyst/expressions/jsonExpressions.scala   |  3 +--
 .../sql/catalyst/util/FailureSafeParser.scala    | 11 ++---------
 sql/core/benchmarks/JSONBenchmark-results.txt    |  1 -
 .../org/apache/spark/sql/DataFrameReader.scala   |  6 ++----
 .../datasources/json/JsonDataSource.scala        |  6 ++----
 .../datasources/json/JsonBenchmark.scala         |  3 ---
 8 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 82a5b3c302b1..79dff6fda33d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -188,11 +188,19 @@ class UnivocityParser(
     }
   }
 
+  private val doParse = if (requiredSchema.nonEmpty) {
+    (input: String) => convert(tokenizer.parseLine(input))
+  } else {
+    // If `columnPruning` enabled and partition attributes scanned only,
+    // `schema` gets empty.
+    (_: String) => InternalRow.empty
+  }
+
   /**
    * Parses a single CSV string and turns it into either one resulting row or no row (if the
    * the record is malformed).
    */
-  def parse(input: String): InternalRow = convert(tokenizer.parseLine(input))
+  def parse(input: String): InternalRow = doParse(input)
 
   private val getToken = if (options.columnPruning) {
     (tokens: Array[String], index: Int) => tokens(index)
@@ -282,8 +290,7 @@ private[sql] object UnivocityParser {
       input => Seq(parser.convert(input)),
       parser.options.parseMode,
       schema,
-      parser.options.columnNameOfCorruptRecord,
-      parser.options.multiLine)
+      parser.options.columnNameOfCorruptRecord)
 
     val handleHeader: () => Unit =
       () => headerChecker.checkHeaderColumnNames(tokenizer)
@@ -336,8 +343,7 @@ private[sql] object UnivocityParser {
       input => Seq(parser.parse(input)),
       parser.options.parseMode,
       schema,
-      parser.options.columnNameOfCorruptRecord,
-      parser.options.multiLine)
+      parser.options.columnNameOfCorruptRecord)
     filteredLines.flatMap(safeParser.parse)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 83b0299bac44..65b10f36373d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -117,8 +117,7 @@ case class CsvToStructs(
       input => Seq(rawParser.parse(input)),
       mode,
       nullableSchema,
-      parsedOptions.columnNameOfCorruptRecord,
-      parsedOptions.multiLine)
+      parsedOptions.columnNameOfCorruptRecord)
   }
 
   override def dataType: DataType = nullableSchema
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 3403349c8974..655e44e4e491 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -582,8 +582,7 @@ case class JsonToStructs(
       input => rawParser.parse(input, createParser, identity[UTF8String]),
       mode,
       parserSchema,
-      parsedOptions.columnNameOfCorruptRecord,
-      parsedOptions.multiLine)
+      parsedOptions.columnNameOfCorruptRecord)
   }
 
   override def dataType: DataType = nullableSchema
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
index 76745b11c84c..361c8b29db33 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
@@ -27,8 +27,7 @@ class FailureSafeParser[IN](
     rawParser: IN => Seq[InternalRow],
     mode: ParseMode,
     schema: StructType,
-    columnNameOfCorruptRecord: String,
-    isMultiLine: Boolean) {
+    columnNameOfCorruptRecord: String) {
 
   private val corruptFieldIndex = schema.getFieldIndex(columnNameOfCorruptRecord)
   private val actualSchema = StructType(schema.filterNot(_.name == columnNameOfCorruptRecord))
@@ -56,15 +55,9 @@ class FailureSafeParser[IN](
     }
   }
 
-  private val skipParsing = !isMultiLine && mode == PermissiveMode && schema.isEmpty
-
   def parse(input: IN): Iterator[InternalRow] = {
     try {
-     if (skipParsing) {
-       Iterator.single(InternalRow.empty)
-     } else {
-       rawParser.apply(input).toIterator.map(row => toResultRow(Some(row), () => null))
-     }
+      rawParser.apply(input).toIterator.map(row => toResultRow(Some(row), () => null))
     } catch {
       case e: BadRecordException => mode match {
         case PermissiveMode =>
diff --git a/sql/core/benchmarks/JSONBenchmark-results.txt b/sql/core/benchmarks/JSONBenchmark-results.txt
index 947f57da0844..f16e60c33374 100644
--- a/sql/core/benchmarks/JSONBenchmark-results.txt
+++ b/sql/core/benchmarks/JSONBenchmark-results.txt
@@ -41,7 +41,6 @@ Select a subset of 10 columns:           Best/Avg Time(ms)    Rate(M/s)   Per Ro
 ------------------------------------------------------------------------------------------------
 Select 10 columns + count()                 19539 / 19896          0.5        1953.9       1.0X
 Select 1 column + count()                   16412 / 16445          0.6        1641.2       1.2X
-count()                                       2783 / 2801          3.6         278.3       7.0X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 2b1521730bc0..713c9a9faa74 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -468,8 +468,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         input => rawParser.parse(input, createParser, UTF8String.fromString),
         parsedOptions.parseMode,
         schema,
-        parsedOptions.columnNameOfCorruptRecord,
-        parsedOptions.multiLine)
+        parsedOptions.columnNameOfCorruptRecord)
       iter.flatMap(parser.parse)
     }
     sparkSession.internalCreateDataFrame(parsed, schema, isStreaming = jsonDataset.isStreaming)
@@ -538,8 +537,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         input => Seq(rawParser.parse(input)),
         parsedOptions.parseMode,
         schema,
-        parsedOptions.columnNameOfCorruptRecord,
-        parsedOptions.multiLine)
+        parsedOptions.columnNameOfCorruptRecord)
       iter.flatMap(parser.parse)
     }
     sparkSession.internalCreateDataFrame(parsed, schema, isStreaming = csvDataset.isStreaming)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
index 456f08a2a2ee..7ec2267e3461 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
@@ -140,8 +140,7 @@ object TextInputJsonDataSource extends JsonDataSource {
       input => parser.parse(input, textParser, textToUTF8String),
       parser.options.parseMode,
       schema,
-      parser.options.columnNameOfCorruptRecord,
-      parser.options.multiLine)
+      parser.options.columnNameOfCorruptRecord)
     linesReader.flatMap(safeParser.parse)
   }
 
@@ -225,8 +224,7 @@ object MultiLineJsonDataSource extends JsonDataSource {
       input => parser.parse[InputStream](input, streamParser, partitionedFileString),
       parser.options.parseMode,
       schema,
-      parser.options.columnNameOfCorruptRecord,
-      parser.options.multiLine)
+      parser.options.columnNameOfCorruptRecord)
 
     safeParser.parse(
       CodecStreams.createInputStreamWithCloseResource(conf, new Path(new URI(file.filePath))))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index 27f702348c0b..25f76205f86d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -217,9 +217,6 @@ object JSONBenchmark extends SqlBasedBenchmark {
       benchmark.addCase(s"Select 1 column + count()", numIters) { _ =>
         ds.select($"col1").filter((_: Row) => true).count()
       }
-      benchmark.addCase(s"count()", numIters) { _ =>
-        ds.count()
-      }
 
       benchmark.run()
     }

From 308996bc72c95582577843d22fcca5f1051d242a Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 31 Jan 2019 15:44:44 +0800
Subject: [PATCH 0402/1072] [SPARK-26716][SPARK-26765][FOLLOWUP][SQL] Clean up
 schema validation methods and override toString method in Avro

## What changes were proposed in this pull request?

In #23639, the API `supportDataType` is refactored. We should also remove the method `verifyWriteSchema` and `verifyReadSchema` in `DataSourceUtils`.

Since the error message use `FileFormat.toString` to specify the data source naming,  this PR also overriding the `toString` method in `AvroFileFormat`.

## How was this patch tested?

Unit test.

Closes #23699 from gengliangwang/SPARK-26716-followup.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/avro/AvroFileFormat.scala  |  2 ++
 .../org/apache/spark/sql/avro/AvroSuite.scala   |  2 +-
 .../sql/execution/datasources/DataSource.scala  |  2 +-
 .../execution/datasources/DataSourceUtils.scala | 17 +----------------
 .../datasources/FileFormatWriter.scala          |  2 +-
 5 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
index 7391665e42ce..c2a7f3175943 100755
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
@@ -124,6 +124,8 @@ private[avro] class AvroFileFormat extends FileFormat
 
   override def shortName(): String = "avro"
 
+  override def toString(): String = "Avro"
+
   override def isSplitable(
       sparkSession: SparkSession,
       options: Map[String, String],
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index d80353764140..81a5cb7cd31b 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -896,7 +896,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
         sql("select testType()").write.format("avro").mode("overwrite").save(tempDir)
       }.getMessage
       assert(msg.toLowerCase(Locale.ROOT)
-        .contains(s"data source does not support calendarinterval data type."))
+        .contains(s"avro data source does not support calendarinterval data type."))
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index d48261e783dc..db81fbd09853 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -412,7 +412,7 @@ case class DataSource(
           hs.partitionSchema.map(_.name),
           "in the partition schema",
           equality)
-        DataSourceUtils.verifyReadSchema(hs.fileFormat, hs.dataSchema)
+        DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema)
       case _ =>
         SchemaUtils.checkColumnNameDuplication(
           relation.schema.map(_.name),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index a32a9405676f..74eae94e65b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -24,26 +24,11 @@ import org.apache.spark.sql.types._
 
 
 object DataSourceUtils {
-
-  /**
-   * Verify if the schema is supported in datasource in write path.
-   */
-  def verifyWriteSchema(format: FileFormat, schema: StructType): Unit = {
-    verifySchema(format, schema, isReadPath = false)
-  }
-
-  /**
-   * Verify if the schema is supported in datasource in read path.
-   */
-  def verifyReadSchema(format: FileFormat, schema: StructType): Unit = {
-    verifySchema(format, schema, isReadPath = true)
-  }
-
   /**
    * Verify if the schema is supported in datasource. This verification should be done
    * in a driver side.
    */
-  private def verifySchema(format: FileFormat, schema: StructType, isReadPath: Boolean): Unit = {
+  def verifySchema(format: FileFormat, schema: StructType): Unit = {
     schema.foreach { field =>
       if (!format.supportDataType(field.dataType)) {
         throw new AnalysisException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 91e92d884ed6..2232da4ce842 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -98,7 +98,7 @@ object FileFormatWriter extends Logging {
     val caseInsensitiveOptions = CaseInsensitiveMap(options)
 
     val dataSchema = dataColumns.toStructType
-    DataSourceUtils.verifyWriteSchema(fileFormat, dataSchema)
+    DataSourceUtils.verifySchema(fileFormat, dataSchema)
     // Note: prepareWrite has side effect. It sets "job".
     val outputWriterFactory =
       fileFormat.prepareWrite(sparkSession, job, caseInsensitiveOptions, dataSchema)

From 0d77d575e14e535fbe29b42e5612f3ddc64d42f4 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 31 Jan 2019 15:51:40 +0800
Subject: [PATCH 0403/1072] [MINOR][DOCS] Add a note that
 'spark.executor.pyspark.memory' is dependent on 'resource'

## What changes were proposed in this pull request?

This PR adds a note that explicitly `spark.executor.pyspark.memory` is dependent on resource module's behaviours at Python memory usage.

For instance, I at least see some difference at https://github.com/apache/spark/pull/21977#discussion_r251220966

## How was this patch tested?

Manually built the doc.

Closes #23664 from HyukjinKwon/note-resource-dependent.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/configuration.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 806e16af3640..5b5891ee524f 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -190,8 +190,10 @@ of the most common options to set are:
     and it is up to the application to avoid exceeding the overhead memory space
     shared with other non-JVM processes. When PySpark is run in YARN or Kubernetes, this memory
     is added to executor resource requests.
-
-    NOTE: Python memory usage may not be limited on platforms that do not support resource limiting, such as Windows.
+    <br/>
+    <em>Note:</em> This feature is dependent on Python's `resource` module; therefore, the behaviors and 
+    limitations are inherited. For instance, Windows does not support resource limiting and actual 
+    resource is not limited on MacOS.
   </td>
 </tr>
 <tr>
@@ -223,7 +225,8 @@ of the most common options to set are:
     stored on disk. This should be on a fast, local disk in your system. It can also be a
     comma-separated list of multiple directories on different disks.
 
-    NOTE: In Spark 1.0 and later this will be overridden by SPARK_LOCAL_DIRS (Standalone), MESOS_SANDBOX (Mesos) or
+    <br/>
+    <em>Note:</em> This will be overridden by SPARK_LOCAL_DIRS (Standalone), MESOS_SANDBOX (Mesos) or
     LOCAL_DIRS (YARN) environment variables set by the cluster manager.
   </td>
 </tr>

From 0e2c4874596269dd835bf69a5592b316345597c5 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 31 Jan 2019 16:20:18 +0800
Subject: [PATCH 0404/1072] [SPARK-26448][SQL][FOLLOWUP] should not normalize
 grouping expressions for final aggregate

## What changes were proposed in this pull request?

A followup of https://github.com/apache/spark/pull/23388 .

`AggUtils.createAggregate` is not the right place to normalize the grouping expressions, as final aggregate is also created by it. The grouping expressions of final aggregate should be attributes which refer to the grouping expressions in partial aggregate.

This PR moves the normalization to the caller side of `AggUtils`.

## How was this patch tested?

existing tests

Closes #23692 from cloud-fan/follow.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/NormalizeFloatingNumbers.scala  | 16 ++++++------
 .../spark/sql/execution/SparkStrategies.scala | 25 ++++++++++++++++---
 .../sql/execution/aggregate/AggUtils.scala    | 14 +++--------
 3 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
index 520f24aa22e4..a5921ebe7751 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -98,8 +98,10 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
   }
 
   private[sql] def normalize(expr: Expression): Expression = expr match {
-    case _ if expr.dataType == FloatType || expr.dataType == DoubleType =>
-      NormalizeNaNAndZero(expr)
+    case _ if !needNormalize(expr.dataType) => expr
+
+    case a: Alias =>
+      a.withNewChildren(Seq(normalize(a.child)))
 
     case CreateNamedStruct(children) =>
       CreateNamedStruct(children.map(normalize))
@@ -113,22 +115,22 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
     case CreateMap(children) =>
       CreateMap(children.map(normalize))
 
-    case a: Alias if needNormalize(a.dataType) =>
-      a.withNewChildren(Seq(normalize(a.child)))
+    case _ if expr.dataType == FloatType || expr.dataType == DoubleType =>
+      NormalizeNaNAndZero(expr)
 
-    case _ if expr.dataType.isInstanceOf[StructType] && needNormalize(expr.dataType) =>
+    case _ if expr.dataType.isInstanceOf[StructType] =>
       val fields = expr.dataType.asInstanceOf[StructType].fields.indices.map { i =>
         normalize(GetStructField(expr, i))
       }
       CreateStruct(fields)
 
-    case _ if expr.dataType.isInstanceOf[ArrayType] && needNormalize(expr.dataType) =>
+    case _ if expr.dataType.isInstanceOf[ArrayType] =>
       val ArrayType(et, containsNull) = expr.dataType
       val lv = NamedLambdaVariable("arg", et, containsNull)
       val function = normalize(lv)
       ArrayTransform(expr, LambdaFunction(function, Seq(lv)))
 
-    case _ => expr
+    case _ => throw new IllegalStateException(s"fail to normalize $expr")
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index b7cc373b2df1..edfa70403ad1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.optimizer.NormalizeFloatingNumbers
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -331,8 +332,17 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
         val stateVersion = conf.getConf(SQLConf.STREAMING_AGGREGATION_STATE_FORMAT_VERSION)
 
+        // Ideally this should be done in `NormalizeFloatingNumbers`, but we do it here because
+        // `groupingExpressions` is not extracted during logical phase.
+        val normalizedGroupingExpressions = namedGroupingExpressions.map { e =>
+          NormalizeFloatingNumbers.normalize(e) match {
+            case n: NamedExpression => n
+            case other => Alias(other, e.name)(exprId = e.exprId)
+          }
+        }
+
         aggregate.AggUtils.planStreamingAggregation(
-          namedGroupingExpressions,
+          normalizedGroupingExpressions,
           aggregateExpressions.map(expr => expr.asInstanceOf[AggregateExpression]),
           rewrittenResultExpressions,
           stateVersion,
@@ -414,16 +424,25 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
               "Spark user mailing list.")
         }
 
+        // Ideally this should be done in `NormalizeFloatingNumbers`, but we do it here because
+        // `groupingExpressions` is not extracted during logical phase.
+        val normalizedGroupingExpressions = groupingExpressions.map { e =>
+          NormalizeFloatingNumbers.normalize(e) match {
+            case n: NamedExpression => n
+            case other => Alias(other, e.name)(exprId = e.exprId)
+          }
+        }
+
         val aggregateOperator =
           if (functionsWithDistinct.isEmpty) {
             aggregate.AggUtils.planAggregateWithoutDistinct(
-              groupingExpressions,
+              normalizedGroupingExpressions,
               aggregateExpressions,
               resultExpressions,
               planLater(child))
           } else {
             aggregate.AggUtils.planAggregateWithOneDistinct(
-              groupingExpressions,
+              normalizedGroupingExpressions,
               functionsWithDistinct,
               functionsWithoutDistinct,
               resultExpressions,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
index 8b7556b0c6c5..4d762c5ea9f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -35,20 +35,12 @@ object AggUtils {
       initialInputBufferOffset: Int = 0,
       resultExpressions: Seq[NamedExpression] = Nil,
       child: SparkPlan): SparkPlan = {
-    // Ideally this should be done in `NormalizeFloatingNumbers`, but we do it here because
-    // `groupingExpressions` is not extracted during logical phase.
-    val normalizedGroupingExpressions = groupingExpressions.map { e =>
-      NormalizeFloatingNumbers.normalize(e) match {
-        case n: NamedExpression => n
-        case other => Alias(other, e.name)(exprId = e.exprId)
-      }
-    }
     val useHash = HashAggregateExec.supportsAggregate(
       aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes))
     if (useHash) {
       HashAggregateExec(
         requiredChildDistributionExpressions = requiredChildDistributionExpressions,
-        groupingExpressions = normalizedGroupingExpressions,
+        groupingExpressions = groupingExpressions,
         aggregateExpressions = aggregateExpressions,
         aggregateAttributes = aggregateAttributes,
         initialInputBufferOffset = initialInputBufferOffset,
@@ -61,7 +53,7 @@ object AggUtils {
       if (objectHashEnabled && useObjectHash) {
         ObjectHashAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
-          groupingExpressions = normalizedGroupingExpressions,
+          groupingExpressions = groupingExpressions,
           aggregateExpressions = aggregateExpressions,
           aggregateAttributes = aggregateAttributes,
           initialInputBufferOffset = initialInputBufferOffset,
@@ -70,7 +62,7 @@ object AggUtils {
       } else {
         SortAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
-          groupingExpressions = normalizedGroupingExpressions,
+          groupingExpressions = groupingExpressions,
           aggregateExpressions = aggregateExpressions,
           aggregateAttributes = aggregateAttributes,
           initialInputBufferOffset = initialInputBufferOffset,

From bc6f19145192835cdfa4fc263b1c35b294c1e0ac Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Thu, 31 Jan 2019 19:38:32 +0800
Subject: [PATCH 0405/1072] [SPARK-24779][R] Add map_concat / map_from_entries
 / an option in months_between UDF to disable rounding-off

## What changes were proposed in this pull request?

Add the R version of map_concat / map_from_entries / an option in months_between UDF to disable rounding-off

## How was this patch tested?

Add test in test_sparkSQL.R

Closes #21835 from huaxingao/spark-24779.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                       |  2 +
 R/pkg/R/functions.R                   | 60 ++++++++++++++++++++++++---
 R/pkg/R/generics.R                    | 10 ++++-
 R/pkg/tests/fulltests/test_sparkSQL.R | 22 ++++++++++
 4 files changed, 87 insertions(+), 7 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index cfad20db16c7..1dcad16cebd8 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -312,8 +312,10 @@ exportMethods("%<=>%",
               "lower",
               "lpad",
               "ltrim",
+              "map_concat",
               "map_entries",
               "map_from_arrays",
+              "map_from_entries",
               "map_keys",
               "map_values",
               "max",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 58fc4104b0f0..8f425b12937e 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -80,6 +80,11 @@ NULL
 #'          \item \code{from_utc_timestamp}, \code{to_utc_timestamp}: time zone to use.
 #'          \item \code{next_day}: day of the week string.
 #'          }
+#' @param ... additional argument(s).
+#'          \itemize{
+#'          \item \code{months_between}, this contains an optional parameter to specify the
+#'              the result is rounded off to 8 digits.
+#'          }
 #'
 #' @name column_datetime_diff_functions
 #' @rdname column_datetime_diff_functions
@@ -217,6 +222,7 @@ NULL
 #'              additional named properties to control how it is converted and accepts the
 #'              same options as the CSV data source.
 #'          \item \code{arrays_zip}, this contains additional Columns of arrays to be merged.
+#'          \item \code{map_concat}, this contains additional Columns of maps to be unioned.
 #'          }
 #' @name column_collection_functions
 #' @rdname column_collection_functions
@@ -229,7 +235,7 @@ NULL
 #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1), shuffle(tmp$v1)))
 #' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1), array_distinct(tmp$v1)))
 #' head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
-#' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1), array_remove(tmp$v1, 21)))
+#' head(select(tmp, reverse(tmp$v1), array_remove(tmp$v1, 21)))
 #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
 #' head(tmp2)
 #' head(select(tmp, posexplode(tmp$v1)))
@@ -238,15 +244,21 @@ NULL
 #' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
 #' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
 #' head(select(tmp3, map_entries(tmp3$v3), map_keys(tmp3$v3), map_values(tmp3$v3)))
-#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
+#' head(select(tmp3, element_at(tmp3$v3, "Valiant"), map_concat(tmp3$v3, tmp3$v3)))
 #' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
 #' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
 #' head(select(tmp4, array_except(tmp4$v4, tmp4$v5), array_intersect(tmp4$v4, tmp4$v5)))
 #' head(select(tmp4, array_union(tmp4$v4, tmp4$v5)))
-#' head(select(tmp4, arrays_zip(tmp4$v4, tmp4$v5), map_from_arrays(tmp4$v4, tmp4$v5)))
+#' head(select(tmp4, arrays_zip(tmp4$v4, tmp4$v5)))
 #' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))
 #' tmp5 <- mutate(df, v6 = create_array(df$model, df$model))
-#' head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL")))}
+#' head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL")))
+#' tmp6 <- mutate(df, v7 = create_array(create_array(df$model, df$model)))
+#' head(select(tmp6, flatten(tmp6$v7)))
+#' tmp7 <- mutate(df, v8 = create_array(df$model, df$cyl), v9 = create_array(df$model, df$hp))
+#' head(select(tmp7, map_from_arrays(tmp7$v8, tmp7$v9)))
+#' tmp8 <- mutate(df, v10 = create_array(struct(df$model, df$cyl)))
+#' head(select(tmp8, map_from_entries(tmp8$v10)))}
 NULL
 
 #' Window functions for Column operations
@@ -2074,15 +2086,21 @@ setMethod("levenshtein", signature(y = "Column"),
 #' are on the same day of month, or both are the last day of month, time of day will be ignored.
 #' Otherwise, the difference is calculated based on 31 days per month, and rounded to 8 digits.
 #'
+#' @param roundOff an optional parameter to specify if the result is rounded off to 8 digits
 #' @rdname column_datetime_diff_functions
 #' @aliases months_between months_between,Column-method
 #' @note months_between since 1.5.0
 setMethod("months_between", signature(y = "Column"),
-          function(y, x) {
+          function(y, x, roundOff = NULL) {
             if (class(x) == "Column") {
               x <- x@jc
             }
-            jc <- callJStatic("org.apache.spark.sql.functions", "months_between", y@jc, x)
+            jc <- if (is.null(roundOff)) {
+              callJStatic("org.apache.spark.sql.functions", "months_between", y@jc, x)
+            } else {
+              callJStatic("org.apache.spark.sql.functions", "months_between", y@jc, x,
+                           as.logical(roundOff))
+            }
             column(jc)
           })
 
@@ -3448,6 +3466,23 @@ setMethod("flatten",
             column(jc)
           })
 
+#' @details
+#' \code{map_concat}: Returns the union of all the given maps.
+#'
+#' @rdname column_collection_functions
+#' @aliases map_concat map_concat,Column-method
+#' @note map_concat since 3.0.0
+setMethod("map_concat",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function(arg) {
+              stopifnot(class(arg) == "Column")
+              arg@jc
+            })
+            jc <- callJStatic("org.apache.spark.sql.functions", "map_concat", jcols)
+            column(jc)
+          })
+
 #' @details
 #' \code{map_entries}: Returns an unordered array of all entries in the given map.
 #'
@@ -3476,6 +3511,19 @@ setMethod("map_from_arrays",
             column(jc)
          })
 
+#' @details
+#' \code{map_from_entries}: Returns a map created from the given array of entries.
+#'
+#' @rdname column_collection_functions
+#' @aliases map_from_entries map_from_entries,Column-method
+#' @note map_from_entries since 3.0.0
+setMethod("map_from_entries",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "map_from_entries", x@jc)
+            column(jc)
+         })
+
 #' @details
 #' \code{map_keys}: Returns an unordered array containing the keys of the map.
 #'
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 09d817127edd..fcb511ed251a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1078,6 +1078,10 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
 #' @name NULL
 setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
 
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("map_concat", function(x, ...) { standardGeneric("map_concat") })
+
 #' @rdname column_collection_functions
 #' @name NULL
 setGeneric("map_entries", function(x) { standardGeneric("map_entries") })
@@ -1086,6 +1090,10 @@ setGeneric("map_entries", function(x) { standardGeneric("map_entries") })
 #' @name NULL
 setGeneric("map_from_arrays", function(x, y) { standardGeneric("map_from_arrays") })
 
+#' @rdname column_collection_functions
+#' @name NULL
+setGeneric("map_from_entries", function(x) { standardGeneric("map_from_entries") })
+
 #' @rdname column_collection_functions
 #' @name NULL
 setGeneric("map_keys", function(x) { standardGeneric("map_keys") })
@@ -1113,7 +1121,7 @@ setGeneric("month", function(x) { standardGeneric("month") })
 
 #' @rdname column_datetime_diff_functions
 #' @name NULL
-setGeneric("months_between", function(y, x) { standardGeneric("months_between") })
+setGeneric("months_between", function(y, x, ...) { standardGeneric("months_between") })
 
 #' @rdname count
 setGeneric("n", function(x) { standardGeneric("n") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 93cb8903fc61..a5dde2084da7 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1497,6 +1497,14 @@ test_that("column functions", {
   df5 <- createDataFrame(list(list(a = "010101")))
   expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")
 
+  # Test months_between()
+  df <- createDataFrame(list(list(a = as.Date("1997-02-28"),
+                                  b = as.Date("1996-10-30"))))
+  result1 <- collect(select(df, alias(months_between(df[[1]], df[[2]]), "month")))[[1]]
+  expect_equal(result1, 3.93548387)
+  result2 <- collect(select(df, alias(months_between(df[[1]], df[[2]], FALSE), "month")))[[1]]
+  expect_equal(result2, 3.935483870967742)
+
   # Test array_contains(), array_max(), array_min(), array_position(), element_at() and reverse()
   df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
   result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
@@ -1542,6 +1550,13 @@ test_that("column functions", {
   expected_entries <- list(as.environment(list(x = 1, y = 2)))
   expect_equal(result, expected_entries)
 
+  # Test map_from_entries()
+  df <- createDataFrame(list(list(list(listToStruct(list(c1 = "x", c2 = 1L)),
+                                       listToStruct(list(c1 = "y", c2 = 2L))))))
+  result <- collect(select(df, map_from_entries(df[[1]])))[[1]]
+  expected_entries <- list(as.environment(list(x = 1L, y = 2L)))
+  expect_equal(result, expected_entries)
+
   # Test array_repeat()
   df <- createDataFrame(list(list("a", 3L), list("b", 2L)))
   result <- collect(select(df, array_repeat(df[[1]], df[[2]])))[[1]]
@@ -1600,6 +1615,13 @@ test_that("column functions", {
   result <- collect(select(df, flatten(df[[1]])))[[1]]
   expect_equal(result, list(list(1L, 2L, 3L, 4L), list(5L, 6L, 7L, 8L)))
 
+  # Test map_concat
+  df <- createDataFrame(list(list(map1 = as.environment(list(x = 1, y = 2)),
+                                  map2 = as.environment(list(a = 3, b = 4)))))
+  result <- collect(select(df, map_concat(df[[1]], df[[2]])))[[1]]
+  expected_entries <- list(as.environment(list(x = 1, y = 2, a = 3, b = 4)))
+  expect_equal(result, expected_entries)
+
   # Test map_entries(), map_keys(), map_values() and element_at()
   df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
   result <- collect(select(df, map_entries(df$map)))[[1]]

From b3b62ba303af9daad4826d274856c61acb88a6a1 Mon Sep 17 00:00:00 2001
From: Ilya Matiach <ilmat@microsoft.com>
Date: Thu, 31 Jan 2019 05:44:55 -0600
Subject: [PATCH 0406/1072] [SPARK-19591][ML][MLLIB][FOLLOWUP] Add sample
 weights to decision trees - fix tolerance

This is a follow-up to PR:
https://github.com/apache/spark/pull/21632

## What changes were proposed in this pull request?

This PR tunes the tolerance used for deciding whether to add zero feature values to a value-count map (where the key is the feature value and the value is the weighted count of those feature values).
In the previous PR the tolerance scaled by the square of the unweighted number of samples, which is too aggressive for a large number of unweighted samples.  Unfortunately using just "Utils.EPSILON * unweightedNumSamples" is not enough either, so I multiplied that by a factor tuned by the testing procedure below.

## How was this patch tested?

This involved manually running the sample weight tests for decision tree regressor to see whether the tolerance was large enough to exclude zero feature values.

Eg in SBT:
```
./build/sbt
> project mllib
> testOnly *DecisionTreeRegressorSuite -- -z "training with sample weights"
```

For validation, I added a print inside the if in the code below and validated that the tolerance was large enough so that we would not include zero features (which don't exist in that test):
```
      val valueCountMap = if (weightedNumSamples - partNumSamples > tolerance) {
        print("should not print this")
        partValueCountMap + (0.0 -> (weightedNumSamples - partNumSamples))
      } else {
        partValueCountMap
      }
```

Closes #23682 from imatiach-msft/ilmat/sample-weights-tol.

Authored-by: Ilya Matiach <ilmat@microsoft.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/ml/tree/impl/RandomForest.scala   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index fb4c321a146f..b041dd42d6da 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -1050,8 +1050,11 @@ private[spark] object RandomForest extends Logging with Serializable {
       // Calculate the expected number of samples for finding splits
       val weightedNumSamples = samplesFractionForFindSplits(metadata) *
         metadata.weightedNumExamples
+      // scale tolerance by number of samples with constant factor
+      // Note: constant factor was tuned by running some tests where there were no zero
+      // feature values and validating we are never within tolerance
+      val tolerance = Utils.EPSILON * unweightedNumSamples * 100
       // add expected zero value count and get complete statistics
-      val tolerance = Utils.EPSILON * unweightedNumSamples * unweightedNumSamples
       val valueCountMap = if (weightedNumSamples - partNumSamples > tolerance) {
         partValueCountMap + (0.0 -> (weightedNumSamples - partNumSamples))
       } else {

From df4c53e44bc9837a470ec66486237403868cb04f Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 31 Jan 2019 21:29:01 +0800
Subject: [PATCH 0407/1072] [SPARK-26673][SQL] File source V2 writes: create
 framework and migrate ORC

## What changes were proposed in this pull request?

Create a framework for write path of File Source V2.
Also, migrate write path of ORC to V2.

Supported:
* Write to file as Dataframe

Not Supported:
* Partitioning, which is still under development in the data source V2 project.
* Bucketing, which is still under development in the data source V2 project.
* Catalog.

## How was this patch tested?

Unit test

Closes #23601 from gengliangwang/orc_write.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  10 ++
 .../apache/spark/sql/DataFrameWriter.scala    |  17 +-
 .../execution/datasources/DataSource.scala    |   2 +-
 .../datasources/FallbackOrcDataSourceV2.scala |   4 +-
 .../datasources/FileFormatDataWriter.scala    |   6 +-
 .../datasources/FileFormatWriter.scala        |   2 +-
 .../datasources/orc/OrcOutputWriter.scala     |   2 +-
 .../datasources/v2/FileBatchWrite.scala       |  53 ++++++
 .../execution/datasources/v2/FileTable.scala  |   5 +-
 .../datasources/v2/FileWriteBuilder.scala     | 158 ++++++++++++++++++
 .../datasources/v2/FileWriterFactory.scala    |  58 +++++++
 .../v2/WriteToDataSourceV2Exec.scala          |  13 +-
 .../datasources/v2/orc/OrcTable.scala         |   4 +
 .../datasources/v2/orc/OrcWriteBuilder.scala  |  66 ++++++++
 .../spark/sql/FileBasedDataSourceSuite.scala  |  67 ++++----
 .../v2/FileDataSourceV2FallBackSuite.scala    |  67 +++++++-
 16 files changed, 486 insertions(+), 48 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 44842734251d..11e1a5eefe93 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1440,6 +1440,14 @@ object SQLConf {
     .stringConf
     .createWithDefault("")
 
+  val USE_V1_SOURCE_WRITER_LIST = buildConf("spark.sql.sources.write.useV1SourceList")
+    .internal()
+    .doc("A comma-separated list of data source short names or fully qualified data source" +
+      " register class names for which data source V2 write paths are disabled. Writes from these" +
+      " sources will fall back to the V1 sources.")
+    .stringConf
+    .createWithDefault("")
+
   val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
       " StreamWriteSupport is disabled. Writes to these sources will fall back to the V1 Sinks.")
@@ -2026,6 +2034,8 @@ class SQLConf extends Serializable with Logging {
 
   def userV1SourceReaderList: String = getConf(USE_V1_SOURCE_READER_LIST)
 
+  def userV1SourceWriterList: String = getConf(USE_V1_SOURCE_WRITER_LIST)
+
   def disabledV2StreamingWriters: String = getConf(DISABLED_V2_STREAMING_WRITERS)
 
   def disabledV2StreamingMicroBatchReaders: String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index d9404cd92992..e5f947337c94 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertIntoTable,
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation}
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, WriteToDataSourceV2}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, FileDataSourceV2, WriteToDataSourceV2}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode
@@ -243,8 +243,19 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     assertNotBucketed("save")
 
     val session = df.sparkSession
-    val cls = DataSource.lookupDataSource(source, session.sessionState.conf)
-    if (classOf[TableProvider].isAssignableFrom(cls)) {
+    val useV1Sources =
+      session.sessionState.conf.userV1SourceWriterList.toLowerCase(Locale.ROOT).split(",")
+    val lookupCls = DataSource.lookupDataSource(source, session.sessionState.conf)
+    val cls = lookupCls.newInstance() match {
+      case f: FileDataSourceV2 if useV1Sources.contains(f.shortName()) ||
+        useV1Sources.contains(lookupCls.getCanonicalName.toLowerCase(Locale.ROOT)) =>
+        f.fallBackFileFormat
+      case _ => lookupCls
+    }
+    // In Data Source V2 project, partitioning is still under development.
+    // Here we fallback to V1 if partitioning columns are specified.
+    // TODO(SPARK-26778): use V2 implementations when partitioning feature is supported.
+    if (classOf[TableProvider].isAssignableFrom(cls) && partitioningColumns.isEmpty) {
       val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         provider, session.sessionState.conf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index db81fbd09853..10dae8a55b47 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -763,7 +763,7 @@ object DataSource extends Logging {
    * supplied schema is not empty.
    * @param schema
    */
-  private def validateSchema(schema: StructType): Unit = {
+  def validateSchema(schema: StructType): Unit = {
     def hasEmptySchema(schema: StructType): Boolean = {
       schema.size == 0 || schema.find {
         case StructField(_, b: StructType, _, _) => hasEmptySchema(b)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
index cefdfd14b3b3..254c09001f7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
@@ -28,8 +28,8 @@ import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
  * Replace the ORC V2 data source of table in [[InsertIntoTable]] to V1 [[FileFormat]].
  * E.g, with temporary view `t` using [[FileDataSourceV2]], inserting into  view `t` fails
  * since there is no corresponding physical plan.
- * SPARK-23817: This is a temporary hack for making current data source V2 work. It should be
- * removed when write path of file data source v2 is finished.
+ * This is a temporary hack for making current data source V2 work. It should be
+ * removed when Catalog support of file data source v2 is finished.
  */
 class FallbackOrcDataSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
index 10733810b641..b6b0d7a6ced9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriterCommitMessage}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.util.SerializableConfiguration
 
@@ -37,7 +38,7 @@ import org.apache.spark.util.SerializableConfiguration
 abstract class FileFormatDataWriter(
     description: WriteJobDescription,
     taskAttemptContext: TaskAttemptContext,
-    committer: FileCommitProtocol) {
+    committer: FileCommitProtocol) extends DataWriter[InternalRow] {
   /**
    * Max number of files a single task writes out due to file size. In most cases the number of
    * files written should be very small. This is just a safe guard to protect some really bad
@@ -70,7 +71,7 @@ abstract class FileFormatDataWriter(
    * to the driver and used to update the catalog. Other information will be sent back to the
    * driver too and used to e.g. update the metrics in UI.
    */
-  def commit(): WriteTaskResult = {
+  override def commit(): WriteTaskResult = {
     releaseResources()
     val summary = ExecutedWriteSummary(
       updatedPartitions = updatedPartitions.toSet,
@@ -301,6 +302,7 @@ class WriteJobDescription(
 
 /** The result of a successful write task. */
 case class WriteTaskResult(commitMsg: TaskCommitMessage, summary: ExecutedWriteSummary)
+  extends WriterCommitMessage
 
 /**
  * Wrapper class for the metrics of writing data out.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 2232da4ce842..a8d24ceaf67c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -259,7 +259,7 @@ object FileFormatWriter extends Logging {
    * For every registered [[WriteJobStatsTracker]], call `processStats()` on it, passing it
    * the corresponding [[WriteTaskStats]] from all executors.
    */
-  private def processStats(
+  private[datasources] def processStats(
       statsTrackers: Seq[WriteJobStatsTracker],
       statsPerTask: Seq[Seq[WriteTaskStats]])
   : Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
index 7e38fc651a31..08086bcd91f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.OutputWriter
 import org.apache.spark.sql.types._
 
-private[orc] class OrcOutputWriter(
+private[sql] class OrcOutputWriter(
     path: String,
     dataSchema: StructType,
     context: TaskAttemptContext)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
new file mode 100644
index 000000000000..7b836111a59b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.hadoop.mapreduce.Job
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.sql.execution.datasources.{WriteJobDescription, WriteTaskResult}
+import org.apache.spark.sql.execution.datasources.FileFormatWriter.processStats
+import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.util.SerializableConfiguration
+
+class FileBatchWrite(
+    job: Job,
+    description: WriteJobDescription,
+    committer: FileCommitProtocol)
+  extends BatchWrite with Logging {
+  override def commit(messages: Array[WriterCommitMessage]): Unit = {
+    val results = messages.map(_.asInstanceOf[WriteTaskResult])
+    committer.commitJob(job, results.map(_.commitMsg))
+    logInfo(s"Write Job ${description.uuid} committed.")
+
+    processStats(description.statsTrackers, results.map(_.summary.stats))
+    logInfo(s"Finished processing stats for write job ${description.uuid}.")
+  }
+
+  override def useCommitCoordinator(): Boolean = false
+
+  override def abort(messages: Array[WriterCommitMessage]): Unit = {
+    committer.abortJob(job)
+  }
+
+  override def createBatchWriterFactory(): DataWriterFactory = {
+    val conf = new SerializableConfiguration(job.getConfiguration)
+    FileWriterFactory(description, committer, conf)
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index b1786541a805..0dbef145f732 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -20,13 +20,14 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.sources.v2.{SupportsBatchRead, Table}
+import org.apache.spark.sql.sources.v2.{SupportsBatchRead, SupportsBatchWrite, Table}
 import org.apache.spark.sql.types.StructType
 
 abstract class FileTable(
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
-    userSpecifiedSchema: Option[StructType]) extends Table with SupportsBatchRead {
+    userSpecifiedSchema: Option[StructType])
+  extends Table with SupportsBatchRead with SupportsBatchWrite {
   def getFileIndex: PartitioningAwareFileIndex = this.fileIndex
 
   lazy val dataSchema: StructType = userSpecifiedSchema.orElse {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
new file mode 100644
index 000000000000..ce9b52f29d7b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.util.UUID
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.Job
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
+
+import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, DataSource, OutputWriterFactory, WriteJobDescription}
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.sources.v2.writer.{BatchWrite, SupportsSaveMode, WriteBuilder}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.SerializableConfiguration
+
+abstract class FileWriteBuilder(options: DataSourceOptions)
+  extends WriteBuilder with SupportsSaveMode {
+  private var schema: StructType = _
+  private var queryId: String = _
+  private var mode: SaveMode = _
+
+  override def withInputDataSchema(schema: StructType): WriteBuilder = {
+    this.schema = schema
+    this
+  }
+
+  override def withQueryId(queryId: String): WriteBuilder = {
+    this.queryId = queryId
+    this
+  }
+
+  override def mode(mode: SaveMode): WriteBuilder = {
+    this.mode = mode
+    this
+  }
+
+  override def buildForBatch(): BatchWrite = {
+    validateInputs()
+    val pathName = options.paths().head
+    val path = new Path(pathName)
+    val sparkSession = SparkSession.active
+    val optionsAsScala = options.asMap().asScala.toMap
+    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(optionsAsScala)
+    val job = getJobInstance(hadoopConf, path)
+    val committer = FileCommitProtocol.instantiate(
+      sparkSession.sessionState.conf.fileCommitProtocolClass,
+      jobId = java.util.UUID.randomUUID().toString,
+      outputPath = pathName)
+    lazy val description =
+      createWriteJobDescription(sparkSession, hadoopConf, job, pathName, optionsAsScala)
+
+    val fs = path.getFileSystem(hadoopConf)
+    mode match {
+      case SaveMode.ErrorIfExists if fs.exists(path) =>
+        val qualifiedOutputPath = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+        throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
+
+      case SaveMode.Ignore if fs.exists(path) =>
+        null
+
+      case SaveMode.Overwrite =>
+        committer.deleteWithJob(fs, path, true)
+        committer.setupJob(job)
+        new FileBatchWrite(job, description, committer)
+
+      case _ =>
+        committer.setupJob(job)
+        new FileBatchWrite(job, description, committer)
+    }
+  }
+
+  /**
+   * Prepares a write job and returns an [[OutputWriterFactory]].  Client side job preparation can
+   * be put here.  For example, user defined output committer can be configured here
+   * by setting the output committer class in the conf of spark.sql.sources.outputCommitterClass.
+   */
+  def prepareWrite(
+      sqlConf: SQLConf,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory
+
+  private def validateInputs(): Unit = {
+    assert(schema != null, "Missing input data schema")
+    assert(queryId != null, "Missing query ID")
+    assert(mode != null, "Missing save mode")
+    assert(options.paths().length == 1)
+    DataSource.validateSchema(schema)
+  }
+
+  private def getJobInstance(hadoopConf: Configuration, path: Path): Job = {
+    val job = Job.getInstance(hadoopConf)
+    job.setOutputKeyClass(classOf[Void])
+    job.setOutputValueClass(classOf[InternalRow])
+    FileOutputFormat.setOutputPath(job, path)
+    job
+  }
+
+  private def createWriteJobDescription(
+      sparkSession: SparkSession,
+      hadoopConf: Configuration,
+      job: Job,
+      pathName: String,
+      options: Map[String, String]): WriteJobDescription = {
+    val caseInsensitiveOptions = CaseInsensitiveMap(options)
+    // Note: prepareWrite has side effect. It sets "job".
+    val outputWriterFactory =
+      prepareWrite(sparkSession.sessionState.conf, job, caseInsensitiveOptions, schema)
+    val allColumns = schema.toAttributes
+    val metrics: Map[String, SQLMetric] = BasicWriteJobStatsTracker.metrics
+    val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
+    val statsTracker = new BasicWriteJobStatsTracker(serializableHadoopConf, metrics)
+    // TODO: after partitioning is supported in V2:
+    //       1. filter out partition columns in `dataColumns`.
+    //       2. Don't use Seq.empty for `partitionColumns`.
+    new WriteJobDescription(
+      uuid = UUID.randomUUID().toString,
+      serializableHadoopConf = new SerializableConfiguration(job.getConfiguration),
+      outputWriterFactory = outputWriterFactory,
+      allColumns = allColumns,
+      dataColumns = allColumns,
+      partitionColumns = Seq.empty,
+      bucketIdExpression = None,
+      path = pathName,
+      customPartitionLocations = Map.empty,
+      maxRecordsPerFile = caseInsensitiveOptions.get("maxRecordsPerFile").map(_.toLong)
+        .getOrElse(sparkSession.sessionState.conf.maxRecordsPerFile),
+      timeZoneId = caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
+        .getOrElse(sparkSession.sessionState.conf.sessionLocalTimeZone),
+      statsTrackers = Seq(statsTracker)
+    )
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
new file mode 100644
index 000000000000..be9c180a901b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.util.Date
+
+import org.apache.hadoop.mapreduce.{TaskAttemptID, TaskID, TaskType}
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+
+import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.{DynamicPartitionDataWriter, SingleDirectoryDataWriter, WriteJobDescription}
+import org.apache.spark.sql.sources.v2.writer.{DataWriter, DataWriterFactory}
+import org.apache.spark.util.SerializableConfiguration
+
+case class FileWriterFactory (
+    description: WriteJobDescription,
+    committer: FileCommitProtocol,
+    conf: SerializableConfiguration) extends DataWriterFactory {
+  override def createWriter(partitionId: Int, realTaskId: Long): DataWriter[InternalRow] = {
+    val taskAttemptContext = createTaskAttemptContext(partitionId)
+    committer.setupTask(taskAttemptContext)
+    if (description.partitionColumns.isEmpty) {
+      new SingleDirectoryDataWriter(description, taskAttemptContext, committer)
+    } else {
+      new DynamicPartitionDataWriter(description, taskAttemptContext, committer)
+    }
+  }
+
+  private def createTaskAttemptContext(partitionId: Int): TaskAttemptContextImpl = {
+    val jobId = SparkHadoopWriterUtils.createJobID(new Date, 0)
+    val taskId = new TaskID(jobId, TaskType.MAP, partitionId)
+    val taskAttemptId = new TaskAttemptID(taskId, 0)
+    // Set up the configuration object
+    val hadoopConf = conf.value
+    hadoopConf.set("mapreduce.job.id", jobId.toString)
+    hadoopConf.set("mapreduce.task.id", taskId.toString)
+    hadoopConf.set("mapreduce.task.attempt.id", taskAttemptId.toString)
+    hadoopConf.setBoolean("mapreduce.task.ismap", true)
+    hadoopConf.setInt("mapreduce.task.partition", 0)
+
+    new TaskAttemptContextImpl(hadoopConf, taskAttemptId)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 406fb8c3a383..50c5e4f2ad7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -56,7 +56,14 @@ case class WriteToDataSourceV2Exec(batchWrite: BatchWrite, query: SparkPlan)
     val writerFactory = batchWrite.createBatchWriterFactory()
     val useCommitCoordinator = batchWrite.useCommitCoordinator
     val rdd = query.execute()
-    val messages = new Array[WriterCommitMessage](rdd.partitions.length)
+    // SPARK-23271 If we are attempting to write a zero partition rdd, create a dummy single
+    // partition rdd to make sure we at least set up one write task to write the metadata.
+    val rddWithNonEmptyPartitions = if (rdd.partitions.length == 0) {
+      sparkContext.parallelize(Array.empty[InternalRow], 1)
+    } else {
+      rdd
+    }
+    val messages = new Array[WriterCommitMessage](rddWithNonEmptyPartitions.partitions.length)
     val totalNumRowsAccumulator = new LongAccumulator()
 
     logInfo(s"Start processing data source write support: $batchWrite. " +
@@ -64,10 +71,10 @@ case class WriteToDataSourceV2Exec(batchWrite: BatchWrite, query: SparkPlan)
 
     try {
       sparkContext.runJob(
-        rdd,
+        rddWithNonEmptyPartitions,
         (context: TaskContext, iter: Iterator[InternalRow]) =>
           DataWritingSparkTask.run(writerFactory, context, iter, useCommitCoordinator),
-        rdd.partitions.indices,
+        rddWithNonEmptyPartitions.partitions.indices,
         (index, result: DataWritingSparkTaskResult) => {
           val commitMessage = result.writerCommitMessage
           messages(index) = commitMessage
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index 719e757c33cb..b467e505f1ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.orc.OrcUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
 import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types.StructType
 
 case class OrcTable(
@@ -36,4 +37,7 @@ case class OrcTable(
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     OrcUtils.readSchema(sparkSession, files)
+
+  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder =
+    new OrcWriteBuilder(options)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
new file mode 100644
index 000000000000..80429d91d5e4
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.orc
+
+import org.apache.hadoop.mapred.JobConf
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+import org.apache.orc.OrcConf.{COMPRESS, MAPRED_OUTPUT_SCHEMA}
+import org.apache.orc.mapred.OrcStruct
+
+import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory}
+import org.apache.spark.sql.execution.datasources.orc.{OrcFileFormat, OrcOptions, OrcOutputWriter, OrcUtils}
+import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.types._
+
+class OrcWriteBuilder(options: DataSourceOptions) extends FileWriteBuilder(options) {
+  override def prepareWrite(
+      sqlConf: SQLConf,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory = {
+    val orcOptions = new OrcOptions(options, sqlConf)
+
+    val conf = job.getConfiguration
+
+    conf.set(MAPRED_OUTPUT_SCHEMA.getAttribute, OrcFileFormat.getQuotedSchemaString(dataSchema))
+
+    conf.set(COMPRESS.getAttribute, orcOptions.compressionCodec)
+
+    conf.asInstanceOf[JobConf]
+      .setOutputFormat(classOf[org.apache.orc.mapred.OrcOutputFormat[OrcStruct]])
+
+    new OutputWriterFactory {
+      override def newInstance(
+          path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): OutputWriter = {
+        new OrcOutputWriter(path, dataSchema, context)
+      }
+
+      override def getFileExtension(context: TaskAttemptContext): String = {
+        val compressionExtension: String = {
+          val name = context.getConfiguration.get(COMPRESS.getAttribute)
+          OrcUtils.extensionsForCompressionCodecNames.getOrElse(name, "")
+        }
+
+        compressionExtension + ".orc"
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 5e6705094e60..fc87b0462bc2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -329,46 +329,49 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
   test("SPARK-24204 error handling for unsupported Interval data types - csv, json, parquet, orc") {
     withTempDir { dir =>
       val tempDir = new File(dir, "files").getCanonicalPath
+      // TODO(SPARK-26744): support data type validating in V2 data source, and test V2 as well.
+      withSQLConf(SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> "orc") {
+        // write path
+        Seq("csv", "json", "parquet", "orc").foreach { format =>
+          var msg = intercept[AnalysisException] {
+            sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir)
+          }.getMessage
+          assert(msg.contains("Cannot save interval data type into external storage."))
 
-      // write path
-      Seq("csv", "json", "parquet", "orc").foreach { format =>
-        var msg = intercept[AnalysisException] {
-          sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.contains("Cannot save interval data type into external storage."))
-
-        msg = intercept[AnalysisException] {
-          spark.udf.register("testType", () => new IntervalData())
-          sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support calendarinterval data type."))
-      }
+          msg = intercept[AnalysisException] {
+            spark.udf.register("testType", () => new IntervalData())
+            sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
+          }.getMessage
+          assert(msg.toLowerCase(Locale.ROOT)
+            .contains(s"$format data source does not support calendarinterval data type."))
+        }
 
-      // read path
-      Seq("parquet", "csv").foreach { format =>
-        var msg = intercept[AnalysisException] {
-          val schema = StructType(StructField("a", CalendarIntervalType, true) :: Nil)
-          spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-          spark.read.schema(schema).format(format).load(tempDir).collect()
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support calendarinterval data type."))
-
-        msg = intercept[AnalysisException] {
-          val schema = StructType(StructField("a", new IntervalUDT(), true) :: Nil)
-          spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-          spark.read.schema(schema).format(format).load(tempDir).collect()
-        }.getMessage
-        assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"$format data source does not support calendarinterval data type."))
+        // read path
+        Seq("parquet", "csv").foreach { format =>
+          var msg = intercept[AnalysisException] {
+            val schema = StructType(StructField("a", CalendarIntervalType, true) :: Nil)
+            spark.range(1).write.format(format).mode("overwrite").save(tempDir)
+            spark.read.schema(schema).format(format).load(tempDir).collect()
+          }.getMessage
+          assert(msg.toLowerCase(Locale.ROOT)
+            .contains(s"$format data source does not support calendarinterval data type."))
+
+          msg = intercept[AnalysisException] {
+            val schema = StructType(StructField("a", new IntervalUDT(), true) :: Nil)
+            spark.range(1).write.format(format).mode("overwrite").save(tempDir)
+            spark.read.schema(schema).format(format).load(tempDir).collect()
+          }.getMessage
+          assert(msg.toLowerCase(Locale.ROOT)
+            .contains(s"$format data source does not support calendarinterval data type."))
+        }
       }
     }
   }
 
   test("SPARK-24204 error handling for unsupported Null data types - csv, parquet, orc") {
     // TODO(SPARK-26744): support data type validating in V2 data source, and test V2 as well.
-    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "orc") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "orc",
+      SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> "orc") {
       withTempDir { dir =>
         val tempDir = new File(dir, "files").getCanonicalPath
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
index f57c581fd800..fd19a48497fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, Pa
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.StructType
 
@@ -46,9 +47,30 @@ class DummyReadOnlyFileTable extends Table with SupportsBatchRead {
   }
 }
 
-class FileDataSourceV2FallBackSuite extends QueryTest with ParquetTest with SharedSQLContext {
+class DummyWriteOnlyFileDataSourceV2 extends FileDataSourceV2 {
+
+  override def fallBackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
+
+  override def shortName(): String = "parquet"
+
+  override def getTable(options: DataSourceOptions): Table = {
+    new DummyWriteOnlyFileTable
+  }
+}
+
+class DummyWriteOnlyFileTable extends Table with SupportsBatchWrite {
+  override def name(): String = "dummy"
+
+  override def schema(): StructType = StructType(Nil)
+
+  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder =
+    throw new AnalysisException("Dummy file writer")
+}
+
+class FileDataSourceV2FallBackSuite extends QueryTest with SharedSQLContext {
 
   private val dummyParquetReaderV2 = classOf[DummyReadOnlyFileDataSourceV2].getName
+  private val dummyParquetWriterV2 = classOf[DummyWriteOnlyFileDataSourceV2].getName
 
   test("Fall back to v1 when writing to file with read only FileDataSourceV2") {
     val df = spark.range(10).toDF()
@@ -94,4 +116,47 @@ class FileDataSourceV2FallBackSuite extends QueryTest with ParquetTest with Shar
       }
     }
   }
+
+  test("Fall back to v1 when reading file with write only FileDataSourceV2") {
+    val df = spark.range(10).toDF()
+    withTempPath { file =>
+      val path = file.getCanonicalPath
+      // Dummy File writer should fail as expected.
+      val exception = intercept[AnalysisException] {
+        df.write.format(dummyParquetWriterV2).save(path)
+      }
+      assert(exception.message.equals("Dummy file writer"))
+      df.write.parquet(path)
+      // Fallback reads to V1
+      checkAnswer(spark.read.format(dummyParquetWriterV2).load(path), df)
+    }
+  }
+
+  test("Fall back write path to v1 with configuration USE_V1_SOURCE_WRITER_LIST") {
+    val df = spark.range(10).toDF()
+    Seq(
+      "foo,parquet,bar",
+      "ParQuet,bar,foo",
+      s"foobar,$dummyParquetWriterV2"
+    ).foreach { fallbackWriters =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> fallbackWriters) {
+        withTempPath { file =>
+          val path = file.getCanonicalPath
+          // Writes should fall back to v1 and succeed.
+          df.write.format(dummyParquetWriterV2).save(path)
+          checkAnswer(spark.read.parquet(path), df)
+        }
+      }
+    }
+    withSQLConf(SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> "foo,bar") {
+      withTempPath { file =>
+        val path = file.getCanonicalPath
+        // Dummy File reader should fail as USE_V1_SOURCE_READER_LIST doesn't include it.
+        val exception = intercept[AnalysisException] {
+          df.write.format(dummyParquetWriterV2).save(path)
+        }
+        assert(exception.message.equals("Dummy file writer"))
+      }
+    }
+  }
 }

From f4a17e916b729f9dc46e859b50a416db1e37b92e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=94=B0=E7=94=B000222924?=
 <han.tiantian@zte.com.cn>
Date: Thu, 31 Jan 2019 09:17:33 -0800
Subject: [PATCH 0408/1072] [SPARK-26726] Synchronize the amount of memory used
 by the broadcast variable to the UI display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…not synchronized to the UI display

## What changes were proposed in this pull request?
The amount of memory used by the broadcast variable is not synchronized to the UI display.
I added the case for BroadcastBlockId and updated the memory usage.

## How was this patch tested?

We can test this patch with unit tests.

Closes #23649 from httfighter/SPARK-26726.

Lead-authored-by: 韩田田00222924 <han.tiantian@zte.com.cn>
Co-authored-by: han.tiantian@zte.com.cn <han.tiantian@zte.com.cn>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/status/AppStatusListener.scala      | 44 +++++++++++++++----
 .../spark/status/AppStatusListenerSuite.scala | 18 ++++++++
 2 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index f69c7dd2cb6d..3089f05c4772 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -829,6 +829,7 @@ private[spark] class AppStatusListener(
     event.blockUpdatedInfo.blockId match {
       case block: RDDBlockId => updateRDDBlock(event, block)
       case stream: StreamBlockId => updateStreamBlock(event, stream)
+      case broadcast: BroadcastBlockId => updateBroadcastBlock(event, broadcast)
       case _ =>
     }
   }
@@ -887,15 +888,7 @@ private[spark] class AppStatusListener(
     // Update the executor stats first, since they are used to calculate the free memory
     // on tracked RDD distributions.
     maybeExec.foreach { exec =>
-      if (exec.hasMemoryInfo) {
-        if (storageLevel.useOffHeap) {
-          exec.usedOffHeap = addDeltaToValue(exec.usedOffHeap, memoryDelta)
-        } else {
-          exec.usedOnHeap = addDeltaToValue(exec.usedOnHeap, memoryDelta)
-        }
-      }
-      exec.memoryUsed = addDeltaToValue(exec.memoryUsed, memoryDelta)
-      exec.diskUsed = addDeltaToValue(exec.diskUsed, diskDelta)
+      updateExecutorMemoryDiskInfo(exec, storageLevel, memoryDelta, diskDelta)
     }
 
     // Update the block entry in the RDD info, keeping track of the deltas above so that we
@@ -997,6 +990,39 @@ private[spark] class AppStatusListener(
     }
   }
 
+  private def updateBroadcastBlock(
+      event: SparkListenerBlockUpdated,
+      broadcast: BroadcastBlockId): Unit = {
+    val executorId = event.blockUpdatedInfo.blockManagerId.executorId
+    liveExecutors.get(executorId).foreach { exec =>
+      val now = System.nanoTime()
+      val storageLevel = event.blockUpdatedInfo.storageLevel
+
+      // Whether values are being added to or removed from the existing accounting.
+      val diskDelta = event.blockUpdatedInfo.diskSize * (if (storageLevel.useDisk) 1 else -1)
+      val memoryDelta = event.blockUpdatedInfo.memSize * (if (storageLevel.useMemory) 1 else -1)
+
+      updateExecutorMemoryDiskInfo(exec, storageLevel, memoryDelta, diskDelta)
+      maybeUpdate(exec, now)
+    }
+  }
+
+  private def updateExecutorMemoryDiskInfo(
+      exec: LiveExecutor,
+      storageLevel: StorageLevel,
+      memoryDelta: Long,
+      diskDelta: Long): Unit = {
+    if (exec.hasMemoryInfo) {
+      if (storageLevel.useOffHeap) {
+        exec.usedOffHeap = addDeltaToValue(exec.usedOffHeap, memoryDelta)
+      } else {
+        exec.usedOnHeap = addDeltaToValue(exec.usedOnHeap, memoryDelta)
+      }
+    }
+    exec.memoryUsed = addDeltaToValue(exec.memoryUsed, memoryDelta)
+    exec.diskUsed = addDeltaToValue(exec.diskUsed, diskDelta)
+  }
+
   private def getOrCreateStage(info: StageInfo): LiveStage = {
     val stage = liveStages.computeIfAbsent((info.stageId, info.attemptNumber),
       new Function[(Int, Int), LiveStage]() {
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 356e6d165190..9469a46b43fa 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -938,6 +938,24 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     intercept[NoSuchElementException] {
       check[StreamBlockData](stream1.name) { _ => () }
     }
+
+    // Update a BroadcastBlock.
+    val broadcast1 = BroadcastBlockId(1L)
+    listener.onBlockUpdated(SparkListenerBlockUpdated(
+      BlockUpdatedInfo(bm1, broadcast1, level, 1L, 1L)))
+
+    check[ExecutorSummaryWrapper](bm1.executorId) { exec =>
+      assert(exec.info.memoryUsed === 1L)
+      assert(exec.info.diskUsed === 1L)
+    }
+
+    // Drop a BroadcastBlock.
+    listener.onBlockUpdated(SparkListenerBlockUpdated(
+      BlockUpdatedInfo(bm1, broadcast1, StorageLevel.NONE, 1L, 1L)))
+    check[ExecutorSummaryWrapper](bm1.executorId) { exec =>
+      assert(exec.info.memoryUsed === 0)
+      assert(exec.info.diskUsed === 0)
+    }
   }
 
   test("eviction of old data") {

From 2514163366feb91de665b704a6fb703855db6bee Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Thu, 31 Jan 2019 14:39:32 -0800
Subject: [PATCH 0409/1072] [SPARK-26799][BUILD] Make ANTLR v4 version
 consistent between Maven and SBT

## What changes were proposed in this pull request?
Currently ANTLR v4 versions used by Maven and SBT are slightly different. Maven uses `4.7.1` while SBT uses `4.7`.

* Maven(`pom.xml`): `<antlr4.version>4.7.1</antlr4.version>`
* SBT(`project/SparkBuild.scala`): `antlr4Version in Antlr4 := "4.7"`

We should make Maven and SBT use a single version. Furthermore we'd better specify antlr4 version in one place to avoid mismatch between Maven and SBT in the future.

This PR lets SBT use antlr4 version specified in Maven POM file, rather than specify its own antlr4 version. This is in the same as how `hadoop.version` is specified in `project/SparkBuild.scala`

## How was this patch tested?
Test locally.

After run `sbt compile`, Java files generated by ANTLR are located at:

```
sql/catalyst/target/scala-2.12/src_managed/main/antlr4/org/apache/spark/sql/catalyst/parser/*.java
```

These Java files have a comment at the head. We can see now SBT uses ANTLR `4.7.1`.

```
// Generated from .../spark/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 by ANTLR 4.7.1
```

Closes #23713 from seancxmao/antlr4-version-consistent.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 project/SparkBuild.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 8d836dad0d15..65ba25c71941 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -568,7 +568,7 @@ object OldDeps {
 
 object Catalyst {
   lazy val settings = antlr4Settings ++ Seq(
-    antlr4Version in Antlr4 := "4.7",
+    antlr4Version in Antlr4 := SbtPomKeys.effectivePom.value.getProperties.get("antlr4.version").asInstanceOf[String],
     antlr4PackageName in Antlr4 := Some("org.apache.spark.sql.catalyst.parser"),
     antlr4GenListener in Antlr4 := true,
     antlr4GenVisitor in Antlr4 := true

From da526985c7574dccdcc0cca7452e2e999a5b3012 Mon Sep 17 00:00:00 2001
From: Huon Wilson <Huon.Wilson@data61.csiro.au>
Date: Thu, 31 Jan 2019 17:27:11 -0600
Subject: [PATCH 0410/1072] [SPARK-26757][GRAPHX] Return 0 for `count` on empty
 Edge/Vertex RDDs

## What changes were proposed in this pull request?

Previously a "java.lang.UnsupportedOperationException: empty
collection" exception would be thrown due to using `reduce`, rather
than `fold` or similar that can tolerate empty RDDs.

This behaviour has existed for the Vertex RDDs since it was introduced
in b30e0ae0351be1cbc0b1cf179293587b466ee026. It seems this behaviour
was inherited by the Edge RDDs via copy-paste in
ee29ef3800438501e0ff207feb00a28973fc0769.

## How was this patch tested?

Two new unit tests.

Closes #23681 from huonw/empty-graphx.

Authored-by: Huon Wilson <Huon.Wilson@data61.csiro.au>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/graphx/impl/EdgeRDDImpl.scala    |  2 +-
 .../org/apache/spark/graphx/impl/VertexRDDImpl.scala  |  2 +-
 .../org/apache/spark/graphx/lib/SVDPlusPlus.scala     |  2 +-
 .../scala/org/apache/spark/graphx/EdgeRDDSuite.scala  | 10 ++++++++++
 .../org/apache/spark/graphx/VertexRDDSuite.scala      | 11 +++++++++++
 .../apache/spark/graphx/lib/SVDPlusPlusSuite.scala    |  9 +++++++++
 6 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index 376c7b06f9d2..eb8abd1846d0 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -87,7 +87,7 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
 
   /** The number of edges in the RDD. */
   override def count(): Long = {
-    partitionsRDD.map(_._2.size.toLong).reduce(_ + _)
+    partitionsRDD.map(_._2.size.toLong).fold(0)(_ + _)
   }
 
   override def mapValues[ED2: ClassTag](f: Edge[ED] => ED2): EdgeRDDImpl[ED2, VD] =
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
index 3c6f22d97360..2da9762fb045 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -87,7 +87,7 @@ class VertexRDDImpl[VD] private[graphx] (
 
   /** The number of vertices in the RDD. */
   override def count(): Long = {
-    partitionsRDD.map(_.size.toLong).reduce(_ + _)
+    partitionsRDD.map(_.size.toLong).fold(0)(_ + _)
   }
 
   override private[graphx] def mapVertexPartitions[VD2: ClassTag](
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index 59fdd855e6f3..2847a4e172d4 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -72,7 +72,7 @@ object SVDPlusPlus {
 
     // calculate global rating mean
     edges.cache()
-    val (rs, rc) = edges.map(e => (e.attr, 1L)).reduce((a, b) => (a._1 + b._1, a._2 + b._2))
+    val (rs, rc) = edges.map(e => (e.attr, 1L)).fold((0, 0))((a, b) => (a._1 + b._1, a._2 + b._2))
     val u = rs / rc
 
     // construct graph
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala
index 7a24e320c3e0..8fd3e6f5229c 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala
@@ -60,4 +60,14 @@ class EdgeRDDSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("count") {
+    withSpark { sc =>
+      val empty = EdgeRDD.fromEdges(sc.emptyRDD[Edge[Int]])
+      assert(empty.count === 0)
+
+      val edges = List(Edge(0, 1, ()), Edge(1, 2, ()), Edge(2, 0, ()))
+      val nonempty = EdgeRDD.fromEdges(sc.parallelize(edges))
+      assert(nonempty.count === edges.size)
+    }
+  }
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
index 8e630435279d..434e6a84edf6 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
@@ -223,4 +223,15 @@ class VertexRDDSuite extends SparkFunSuite with LocalSparkContext {
       assert(verts.collect().toSeq === data) // test checkpointed RDD
     }
   }
+
+  test("count") {
+    withSpark { sc =>
+      val empty = VertexRDD(sc.emptyRDD[(Long, Unit)])
+      assert(empty.count === 0)
+
+      val n = 100
+      val nonempty = vertices(sc, n)
+      assert(nonempty.count === n + 1)
+    }
+  }
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
index 2991438f5e57..da0457c354b5 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
@@ -40,4 +40,13 @@ class SVDPlusPlusSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("Test SVD++ with no edges") {
+    withSpark { sc =>
+      val edges = sc.emptyRDD[Edge[Double]]
+      val conf = new SVDPlusPlus.Conf(10, 2, 0.0, 5.0, 0.007, 0.007, 0.005, 0.015) // 2 iterations
+      val (graph, _) = SVDPlusPlus.run(edges, conf)
+      assert(graph.vertices.count == 0)
+      assert(graph.edges.count == 0)
+    }
+  }
 }

From 0fe9c144fd45e042b41a8fc69b96c377b9f08c4b Mon Sep 17 00:00:00 2001
From: SongYadong <song.yadong1@zte.com.cn>
Date: Thu, 31 Jan 2019 18:30:17 -0600
Subject: [PATCH 0411/1072] [DOC][MINOR] Add metrics instance 'mesos_cluster'
 to monitoring doc

## What changes were proposed in this pull request?

Metrics instance "mesos_cluster" exists in spark, but not mentioned in monitoring.md. This PR add it.

## How was this patch tested?

Manually test.

Closes #23691 from SongYadong/doc_mesos_metrics_inst.

Authored-by: SongYadong <song.yadong1@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/monitoring.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index 1e292d2111f3..726fb5cd0ca6 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -659,6 +659,7 @@ set of sinks to which metrics are reported. The following instances are currentl
 * `driver`: The Spark driver process (the process in which your SparkContext is created).
 * `shuffleService`: The Spark shuffle service.
 * `applicationMaster`: The Spark ApplicationMaster when running on YARN.
+* `mesos_cluster`: The Spark cluster scheduler when running on Mesos.
 
 Each instance can report to zero or more _sinks_. Sinks are contained in the
 `org.apache.spark.metrics.sink` package:

From f7d87b1685eeac3a2c8b903ddb86e1921fcc193c Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Thu, 31 Jan 2019 19:33:44 -0600
Subject: [PATCH 0412/1072] [SPARK-25997][ML] add Python example code for Power
 Iteration Clustering in spark.ml

## What changes were proposed in this pull request?

Add python example for Power Iteration Clustering in spark.ml

## How was this patch tested?

Manually tested

Closes #22996 from huaxingao/spark-25997.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/ml-clustering.md                         |  6 +++
 .../ml/power_iteration_clustering_example.py  | 49 +++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 examples/src/main/python/ml/power_iteration_clustering_example.py

diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index 65f265256200..21e38cad1590 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -298,6 +298,12 @@ Refer to the [Java API docs](api/java/org/apache/spark/ml/clustering/PowerIterat
 {% include_example java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java %}
 </div>
 
+<div data-lang="python" markdown="1">
+Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.clustering.PowerIterationClustering) for more details.
+
+{% include_example python/ml/power_iteration_clustering_example.py %}
+</div>
+
 <div data-lang="r" markdown="1">
 
 Refer to the [R API docs](api/R/spark.powerIterationClustering.html) for more details.
diff --git a/examples/src/main/python/ml/power_iteration_clustering_example.py b/examples/src/main/python/ml/power_iteration_clustering_example.py
new file mode 100644
index 000000000000..c983c4ad2b0d
--- /dev/null
+++ b/examples/src/main/python/ml/power_iteration_clustering_example.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating PowerIterationClustering.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/power_iteration_clustering_example.py
+"""
+# $example on$
+from pyspark.ml.clustering import PowerIterationClustering
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("PowerIterationClusteringExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        (0, 1, 1.0),
+        (0, 2, 1.0),
+        (1, 2, 1.0),
+        (3, 4, 1.0),
+        (4, 0, 0.1)
+    ], ["src", "dst", "weight"])
+
+    pic = PowerIterationClustering(k=2, maxIter=20, initMode="degree", weightCol="weight")
+
+    # Shows the cluster assignment
+    pic.assignClusters(df).show()
+    # $example off$
+
+    spark.stop()

From e44f308593e8cb02cdaeb5533f387c465aa60c6c Mon Sep 17 00:00:00 2001
From: bscan <brianjscannell@gmail.com>
Date: Thu, 31 Jan 2019 19:50:18 -0600
Subject: [PATCH 0413/1072] [SPARK-26787] Fix standardizeLabels error message
 in WeightedLeastSquares

Error message falsely states standardization=True is causing a problem, even when standardization=False. The real issue is standardizeLabels=True, which is set automatically in LinearRegression and not currently available in the Public API.

## What changes were proposed in this pull request?

A simple change to an error message. More details here: https://jira.apache.org/jira/browse/SPARK-26787

## How was this patch tested?

This does not change any functionality.

Closes #23705 from bscan/bscan-errormsg-1.

Authored-by: bscan <brianjscannell@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 134d6a9b442a..9f32603b19a6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -133,7 +133,7 @@ private[ml] class WeightedLeastSquares(
         return new WeightedLeastSquaresModel(coefficients, intercept, diagInvAtWA, Array(0D))
       } else {
         require(!(regParam > 0.0 && standardizeLabel), "The standard deviation of the label is " +
-          "zero. Model cannot be regularized with standardization=true")
+          "zero. Model cannot be regularized when labels are standardized.")
         instr.logWarning(s"The standard deviation of the label is zero. Consider setting " +
           s"fitIntercept=true.")
       }

From cdd694c52b53165acba6faabaf3a1fbaa925ac2e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 1 Feb 2019 10:18:08 +0800
Subject: [PATCH 0414/1072] [SPARK-7721][INFRA] Run and generate test coverage
 report from Python via Jenkins

## What changes were proposed in this pull request?

### Background

For the current status, the test script that generates coverage information was merged
into Spark, https://github.com/apache/spark/pull/20204

So, we can generate the coverage report and site by, for example:

```
run-tests-with-coverage --python-executables=python3 --modules=pyspark-sql
```

like `run-tests` script in `./python`.

### Proposed change

The next step is to host this coverage report via `github.io` automatically
by Jenkins (see https://spark-test.github.io/pyspark-coverage-site/).

This uses my testing account for Spark, spark-test, which is shared to Felix and Shivaram a long time ago for testing purpose including AppVeyor.

To cut this short, this PR targets to run the coverage in
[spark-master-test-sbt-hadoop-2.7](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/)

In the specific job, it will clone the page, and rebase the up-to-date PySpark test coverage from the latest commit. For instance as below:

```bash
# Clone PySpark coverage site.
git clone https://github.com/spark-test/pyspark-coverage-site.git

# Remove existing HTMLs.
rm -fr pyspark-coverage-site/*

# Copy generated coverage HTMLs.
cp -r .../python/test_coverage/htmlcov/* pyspark-coverage-site/

# Check out to a temporary branch.
git symbolic-ref HEAD refs/heads/latest_branch

# Add all the files.
git add -A

# Commit current HTMLs.
git commit -am "Coverage report at latest commit in Apache Spark"

# Delete the old branch.
git branch -D gh-pages

# Rename the temporary branch to master.
git branch -m gh-pages

# Finally, force update to our repository.
git push -f origin gh-pages
```

So, it is a one single up-to-date coverage can be shown in the `github-io` page. The commands above were manually tested.

### TODOs

- [x] Write a draft HyukjinKwon
- [x] `pip install coverage` to all python implementations (pypy, python2, python3) in Jenkins workers  - shaneknapp
- [x] Set hidden `SPARK_TEST_KEY` for spark-test's password in Jenkins via Jenkins's feature
 This should be set in both PR builder and `spark-master-test-sbt-hadoop-2.7` so that later other PRs can test and fix the bugs - shaneknapp
- [x] Set an environment variable that indicates `spark-master-test-sbt-hadoop-2.7` so that that specific build can report and update the coverage site - shaneknapp
- [x] Make PR builder's test passed HyukjinKwon
- [x] Fix flaky test related with coverage HyukjinKwon
  -  6 consecutive passes out of 7 runs

This PR will be co-authored with me and shaneknapp

## How was this patch tested?

It will be tested via Jenkins.

Closes #23117 from HyukjinKwon/SPARK-7721.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: hyukjinkwon <gurwls223@apache.org>
Co-authored-by: shane knapp <incomplete@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 README.md                                     |  1 +
 dev/run-tests.py                              | 63 ++++++++++++++++++-
 .../pyspark/streaming/tests/test_dstream.py   | 10 +++
 3 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f3b90ceb923e..271f2f5f5b1c 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@
 
 [![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7)
 [![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
+[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site)
 
 Spark is a fast and general cluster computing system for Big Data. It provides
 high-level APIs in Scala, Java, Python, and R, and an optimized engine that
diff --git a/dev/run-tests.py b/dev/run-tests.py
index e1ed2744d78b..edd89c9f0890 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -25,6 +25,8 @@
 import re
 import sys
 import subprocess
+import glob
+import shutil
 from collections import namedtuple
 
 from sparktestsupport import SPARK_HOME, USER_HOME, ERROR_CODES
@@ -400,15 +402,66 @@ def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
         run_scala_tests_sbt(test_modules, test_profiles)
 
 
-def run_python_tests(test_modules, parallelism):
+def run_python_tests(test_modules, parallelism, with_coverage=False):
     set_title_and_block("Running PySpark tests", "BLOCK_PYSPARK_UNIT_TESTS")
 
-    command = [os.path.join(SPARK_HOME, "python", "run-tests")]
+    if with_coverage:
+        # Coverage makes the PySpark tests flaky due to heavy parallelism.
+        # When we run PySpark tests with coverage, it uses 4 for now as
+        # workaround.
+        parallelism = 4
+        script = "run-tests-with-coverage"
+    else:
+        script = "run-tests"
+    command = [os.path.join(SPARK_HOME, "python", script)]
     if test_modules != [modules.root]:
         command.append("--modules=%s" % ','.join(m.name for m in test_modules))
     command.append("--parallelism=%i" % parallelism)
     run_cmd(command)
 
+    if with_coverage:
+        post_python_tests_results()
+
+
+def post_python_tests_results():
+    if "SPARK_TEST_KEY" not in os.environ:
+        print("[error] 'SPARK_TEST_KEY' environment variable was not set. Unable to post "
+              "PySpark coverage results.")
+        sys.exit(1)
+    spark_test_key = os.environ.get("SPARK_TEST_KEY")
+    # The steps below upload HTMLs to 'github.com/spark-test/pyspark-coverage-site'.
+    # 1. Clone PySpark coverage site.
+    run_cmd([
+        "git",
+        "clone",
+        "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key])
+    # 2. Remove existing HTMLs.
+    run_cmd(["rm", "-fr"] + glob.glob("pyspark-coverage-site/*"))
+    # 3. Copy generated coverage HTMLs.
+    for f in glob.glob("%s/python/test_coverage/htmlcov/*" % SPARK_HOME):
+        shutil.copy(f, "pyspark-coverage-site/")
+    os.chdir("pyspark-coverage-site")
+    try:
+        # 4. Check out to a temporary branch.
+        run_cmd(["git", "symbolic-ref", "HEAD", "refs/heads/latest_branch"])
+        # 5. Add all the files.
+        run_cmd(["git", "add", "-A"])
+        # 6. Commit current HTMLs.
+        run_cmd([
+            "git",
+            "commit",
+            "-am",
+            "Coverage report at latest commit in Apache Spark",
+            '--author="Apache Spark Test Account <sparktestacc@gmail.com>"'])
+        # 7. Delete the old branch.
+        run_cmd(["git", "branch", "-D", "gh-pages"])
+        # 8. Rename the temporary branch to master.
+        run_cmd(["git", "branch", "-m", "gh-pages"])
+        # 9. Finally, force update to our repository.
+        run_cmd(["git", "push", "-f", "origin", "gh-pages"])
+    finally:
+        os.chdir("..")
+
 
 def run_python_packaging_tests():
     set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS")
@@ -567,7 +620,11 @@ def main():
 
     modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
     if modules_with_python_tests:
-        run_python_tests(modules_with_python_tests, opts.parallelism)
+        # We only run PySpark tests with coverage report in one specific job with
+        # Spark master with SBT in Jenkins.
+        is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ
+        run_python_tests(
+            modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job)
         run_python_packaging_tests()
     if any(m.should_run_r_tests for m in test_modules):
         run_sparkr_tests()
diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py
index d14e346b7a68..61a816160490 100644
--- a/python/pyspark/streaming/tests/test_dstream.py
+++ b/python/pyspark/streaming/tests/test_dstream.py
@@ -22,12 +22,16 @@
 import unittest
 from functools import reduce
 from itertools import chain
+import platform
 
 from pyspark import SparkConf, SparkContext, RDD
 from pyspark.streaming import StreamingContext
 from pyspark.testing.streamingutils import PySparkStreamingTestCase
 
 
+@unittest.skipIf(
+    "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ,
+    "PyPy implementation causes to hang DStream tests forever when Coverage report is used.")
 class BasicOperationTests(PySparkStreamingTestCase):
 
     def test_map(self):
@@ -389,6 +393,9 @@ def failed_func(i):
         self.fail("a failed func should throw an error")
 
 
+@unittest.skipIf(
+    "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ,
+    "PyPy implementation causes to hang DStream tests forever when Coverage report is used.")
 class WindowFunctionTests(PySparkStreamingTestCase):
 
     timeout = 15
@@ -466,6 +473,9 @@ def func(dstream):
         self._test_func(input, func, expected)
 
 
+@unittest.skipIf(
+    "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ,
+    "PyPy implementation causes to hang DStream tests forever when Coverage report is used.")
 class CheckpointTests(unittest.TestCase):
 
     setupCalled = False

From 8f968b4c064558d6a4c38e7e2592e49d88889507 Mon Sep 17 00:00:00 2001
From: wuyi <ngone_5451@163.com>
Date: Fri, 1 Feb 2019 10:48:37 +0800
Subject: [PATCH 0415/1072] [SPARK-26730][SQL] Strip redundant AssertNotNull
 for ExpressionEncoder's serializer

## What changes were proposed in this pull request?

For types like Product, we've already add AssertNotNull when we construct serializer(see code below), so we could strip redundant AssertNotNull for those types.

```
val fieldValue = Invoke(
    AssertNotNull(inputObject, walkedTypePath), fieldName, dataTypeFor(fieldType),
    returnNullable = !fieldType.typeSymbol.asClass.isPrimitive)
```
## How was this patch tested?

Existed.

Closes #23651 from Ngone51/dev-strip-redundant-assertnotnull-for-ecnoder-serializer.

Authored-by: wuyi <ngone_5451@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/ScalaReflection.scala    | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 1b068355b908..d5af91acd071 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -631,8 +631,11 @@ object ScalaReflection extends ScalaReflection {
               "cannot be used as field name\n" + walkedTypePath.mkString("\n"))
           }
 
-          val fieldValue = Invoke(
-            AssertNotNull(inputObject, walkedTypePath), fieldName, dataTypeFor(fieldType),
+          // SPARK-26730 inputObject won't be null with If's guard below. And KnownNotNul
+          // is necessary here. Because for a nullable nested inputObject with struct data
+          // type, e.g. StructType(IntegerType, StringType), it will return nullable=true
+          // for IntegerType without KnownNotNull. And that's what we do not expect to.
+          val fieldValue = Invoke(KnownNotNull(inputObject), fieldName, dataTypeFor(fieldType),
             returnNullable = !fieldType.typeSymbol.asClass.isPrimitive)
           val clsName = getClassNameFromType(fieldType)
           val newPath = s"""- field (class: "$clsName", name: "$fieldName")""" +: walkedTypePath

From aea5f506463c19fac97547ba7a28f9dd491e3a6a Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Fri, 1 Feb 2019 13:47:14 +0800
Subject: [PATCH 0416/1072] [SPARK-26525][SHUFFLE] Fast release
 ShuffleBlockFetcherIterator on completion of the iteration

## What changes were proposed in this pull request?

Currently, spark would not release ShuffleBlockFetcherIterator until the whole task finished.In some conditions, it incurs memory leak.

An example is `rdd.repartition(m).coalesce(n, shuffle = false).save`, each `ShuffleBlockFetcherIterator` contains  some metas about mapStatus(`blocksByAddress`) and each resultTask will keep n(max to shuffle partitions) shuffleBlockFetcherIterator and the memory would never released until the task completion, for they are referenced by the completion callbacks of TaskContext. In some case, it may take huge memory and incurs OOM.

Actually, We can release ShuffleBlockFetcherIterator as soon as it's consumed.
This PR is to resolve this problem.

## How was this patch tested?

unittest

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23438 from liupc/Fast-release-shuffleblockfetcheriterator.

Lead-authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Co-authored-by: liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../shuffle/BlockStoreShuffleReader.scala     |  2 +-
 .../storage/ShuffleBlockFetcherIterator.scala | 34 +++++++++++++++++--
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index daafe305c8f8..c5eefc7c5c04 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -55,7 +55,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
       SparkEnv.get.conf.get(config.REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS),
       SparkEnv.get.conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM),
       SparkEnv.get.conf.get(config.SHUFFLE_DETECT_CORRUPT),
-      readMetrics)
+      readMetrics).toCompletionIterator
 
     val serializerInstance = dep.serializer.newInstance()
 
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index f73c21b3486f..3966980a11ed 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -31,7 +31,7 @@ import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.shuffle._
 import org.apache.spark.network.util.TransportConf
 import org.apache.spark.shuffle.{FetchFailedException, ShuffleReadMetricsReporter}
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{CompletionIterator, TaskCompletionListener, Utils}
 import org.apache.spark.util.io.ChunkedByteBufferOutputStream
 
 /**
@@ -160,6 +160,8 @@ final class ShuffleBlockFetcherIterator(
   @GuardedBy("this")
   private[this] val shuffleFilesSet = mutable.HashSet[DownloadFile]()
 
+  private[this] val onCompleteCallback = new ShuffleFetchCompletionListener(this)
+
   initialize()
 
   // Decrements the buffer reference count.
@@ -192,7 +194,7 @@ final class ShuffleBlockFetcherIterator(
   /**
    * Mark the iterator as zombie, and release all buffers that haven't been deserialized yet.
    */
-  private[this] def cleanup() {
+  private[storage] def cleanup() {
     synchronized {
       isZombie = true
     }
@@ -364,7 +366,7 @@ final class ShuffleBlockFetcherIterator(
 
   private[this] def initialize(): Unit = {
     // Add a task completion callback (called in both success case and failure case) to cleanup.
-    context.addTaskCompletionListener[Unit](_ => cleanup())
+    context.addTaskCompletionListener(onCompleteCallback)
 
     // Split local and remote blocks.
     val remoteRequests = splitLocalRemoteBlocks()
@@ -509,6 +511,11 @@ final class ShuffleBlockFetcherIterator(
     (currentResult.blockId, new BufferReleasingInputStream(input, this))
   }
 
+  def toCompletionIterator: Iterator[(BlockId, InputStream)] = {
+    CompletionIterator[(BlockId, InputStream), this.type](this,
+      onCompleteCallback.onComplete(context))
+  }
+
   private def fetchUpToMaxBytes(): Unit = {
     // Send fetch requests up to maxBytesInFlight. If you cannot fetch from a remote host
     // immediately, defer the request until the next time it can be processed.
@@ -609,6 +616,27 @@ private class BufferReleasingInputStream(
   override def reset(): Unit = delegate.reset()
 }
 
+/**
+ * A listener to be called at the completion of the ShuffleBlockFetcherIterator
+ * @param data the ShuffleBlockFetcherIterator to process
+ */
+private class ShuffleFetchCompletionListener(var data: ShuffleBlockFetcherIterator)
+  extends TaskCompletionListener {
+
+  override def onTaskCompletion(context: TaskContext): Unit = {
+    if (data != null) {
+      data.cleanup()
+      // Null out the referent here to make sure we don't keep a reference to this
+      // ShuffleBlockFetcherIterator, after we're done reading from it, to let it be
+      // collected during GC. Otherwise we can hold metadata on block locations(blocksByAddress)
+      data = null
+    }
+  }
+
+  // Just an alias for onTaskCompletion to avoid confusing
+  def onComplete(context: TaskContext): Unit = this.onTaskCompletion(context)
+}
+
 private[storage]
 object ShuffleBlockFetcherIterator {
 

From 03a928cbecaf38bbbab3e6b957fcbb542771cfbd Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Fri, 1 Feb 2019 11:15:05 -0800
Subject: [PATCH 0417/1072] [SPARK-26806][SS] EventTimeStats.merge should
 handle zeros correctly

## What changes were proposed in this pull request?

Right now, EventTimeStats.merge doesn't handle `zero.merge(zero)` correctly. This will make `avg` become `NaN`. And whatever gets merged with the result of `zero.merge(zero)`, `avg` will still be `NaN`. Then finally, we call `NaN.toLong` and get `0`, and the user will see the following incorrect report:

```
"eventTime" : {
    "avg" : "1970-01-01T00:00:00.000Z",
    "max" : "2019-01-31T12:57:00.000Z",
    "min" : "2019-01-30T18:44:04.000Z",
    "watermark" : "1970-01-01T00:00:00.000Z"
  }
```

This issue was reported by liancheng .

This PR fixes the above issue.

## How was this patch tested?

The new unit tests.

Closes #23718 from zsxwing/merge-zero.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../streaming/EventTimeWatermarkExec.scala    | 17 +++++++---
 .../streaming/EventTimeWatermarkSuite.scala   | 32 +++++++++++++++++--
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index b161651c4e6a..6fa7ee0c3818 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -36,10 +36,19 @@ case class EventTimeStats(var max: Long, var min: Long, var avg: Double, var cou
   }
 
   def merge(that: EventTimeStats): Unit = {
-    this.max = math.max(this.max, that.max)
-    this.min = math.min(this.min, that.min)
-    this.count += that.count
-    this.avg += (that.avg - this.avg) * that.count / this.count
+    if (that.count == 0) {
+      // no-op
+    } else if (this.count == 0) {
+      this.max = that.max
+      this.min = that.min
+      this.count = that.count
+      this.avg = that.avg
+    } else {
+      this.max = math.max(this.max, that.max)
+      this.min = math.min(this.min, that.min)
+      this.count += that.count
+      this.avg += (that.avg - this.avg) * that.count / this.count
+    }
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index c696204cecc2..b79770a45220 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -43,9 +43,9 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     sqlContext.streams.active.foreach(_.stop())
   }
 
-  test("EventTimeStats") {
-    val epsilon = 10E-6
+  private val epsilon = 10E-6
 
+  test("EventTimeStats") {
     val stats = EventTimeStats(max = 100, min = 10, avg = 20.0, count = 5)
     stats.add(80L)
     stats.max should be (100)
@@ -62,7 +62,6 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
   }
 
   test("EventTimeStats: avg on large values") {
-    val epsilon = 10E-6
     val largeValue = 10000000000L // 10B
     // Make sure `largeValue` will cause overflow if we use a Long sum to calc avg.
     assert(largeValue * largeValue != BigInt(largeValue) * BigInt(largeValue))
@@ -80,6 +79,33 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     stats.avg should be ((largeValue + 0.5) +- epsilon)
   }
 
+  test("EventTimeStats: zero merge zero") {
+    val stats = EventTimeStats.zero
+    val stats2 = EventTimeStats.zero
+    stats.merge(stats2)
+    stats should be (EventTimeStats.zero)
+  }
+
+  test("EventTimeStats: non-zero merge zero") {
+    val stats = EventTimeStats(max = 10, min = 1, avg = 5.0, count = 3)
+    val stats2 = EventTimeStats.zero
+    stats.merge(stats2)
+    stats.max should be (10L)
+    stats.min should be (1L)
+    stats.avg should be (5.0 +- epsilon)
+    stats.count should be (3L)
+  }
+
+  test("EventTimeStats: zero merge non-zero") {
+    val stats = EventTimeStats.zero
+    val stats2 = EventTimeStats(max = 10, min = 1, avg = 5.0, count = 3)
+    stats.merge(stats2)
+    stats.max should be (10L)
+    stats.min should be (1L)
+    stats.avg should be (5.0 +- epsilon)
+    stats.count should be (3L)
+  }
+
   test("error on bad column") {
     val inputData = MemoryStream[Int].toDF()
     val e = intercept[AnalysisException] {

From 5bb9647e1019ea7eb17af7d2057fdacb7f4c560b Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Fri, 1 Feb 2019 17:29:58 -0600
Subject: [PATCH 0418/1072] [SPARK-26754][PYTHON] Add hasTrainingSummary to
 replace duplicate code in PySpark

## What changes were proposed in this pull request?

Python version of https://github.com/apache/spark/pull/17654

## How was this patch tested?

Existing Python unit test

Closes #23676 from huaxingao/spark26754.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/ml/classification.py | 19 +++++----------
 python/pyspark/ml/clustering.py     | 37 +++++------------------------
 python/pyspark/ml/regression.py     | 30 +++++------------------
 python/pyspark/ml/util.py           | 26 ++++++++++++++++++++
 4 files changed, 44 insertions(+), 68 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 89b927814c09..134b9e00557a 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -483,7 +483,8 @@ def getUpperBoundsOnIntercepts(self):
         return self.getOrDefault(self.upperBoundsOnIntercepts)
 
 
-class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
+class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable,
+                              HasTrainingSummary):
     """
     Model fitted by LogisticRegression.
 
@@ -532,24 +533,16 @@ def summary(self):
         trained on the training set. An exception is thrown if `trainingSummary is None`.
         """
         if self.hasSummary:
-            java_lrt_summary = self._call_java("summary")
             if self.numClasses <= 2:
-                return BinaryLogisticRegressionTrainingSummary(java_lrt_summary)
+                return BinaryLogisticRegressionTrainingSummary(super(LogisticRegressionModel,
+                                                                     self).summary)
             else:
-                return LogisticRegressionTrainingSummary(java_lrt_summary)
+                return LogisticRegressionTrainingSummary(super(LogisticRegressionModel,
+                                                               self).summary)
         else:
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @property
-    @since("2.0.0")
-    def hasSummary(self):
-        """
-        Indicates whether a training summary exists for this model
-        instance.
-        """
-        return self._call_java("hasSummary")
-
     @since("2.0.0")
     def evaluate(self, dataset):
         """
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index b9c6bdf52115..864e2a3e09d2 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -97,7 +97,7 @@ def numIter(self):
         return self._call_java("numIter")
 
 
-class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable):
+class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable, HasTrainingSummary):
     """
     Model fitted by GaussianMixture.
 
@@ -124,15 +124,6 @@ def gaussiansDF(self):
         """
         return self._call_java("gaussiansDF")
 
-    @property
-    @since("2.1.0")
-    def hasSummary(self):
-        """
-        Indicates whether a training summary exists for this model
-        instance.
-        """
-        return self._call_java("hasSummary")
-
     @property
     @since("2.1.0")
     def summary(self):
@@ -141,7 +132,7 @@ def summary(self):
         training set. An exception is thrown if no summary exists.
         """
         if self.hasSummary:
-            return GaussianMixtureSummary(self._call_java("summary"))
+            return GaussianMixtureSummary(super(GaussianMixtureModel, self).summary)
         else:
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
@@ -323,7 +314,7 @@ def trainingCost(self):
         return self._call_java("trainingCost")
 
 
-class KMeansModel(JavaModel, GeneralJavaMLWritable, JavaMLReadable):
+class KMeansModel(JavaModel, GeneralJavaMLWritable, JavaMLReadable, HasTrainingSummary):
     """
     Model fitted by KMeans.
 
@@ -335,14 +326,6 @@ def clusterCenters(self):
         """Get the cluster centers, represented as a list of NumPy arrays."""
         return [c.toArray() for c in self._call_java("clusterCenters")]
 
-    @property
-    @since("2.1.0")
-    def hasSummary(self):
-        """
-        Indicates whether a training summary exists for this model instance.
-        """
-        return self._call_java("hasSummary")
-
     @property
     @since("2.1.0")
     def summary(self):
@@ -351,7 +334,7 @@ def summary(self):
         training set. An exception is thrown if no summary exists.
         """
         if self.hasSummary:
-            return KMeansSummary(self._call_java("summary"))
+            return KMeansSummary(super(KMeansModel, self).summary)
         else:
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
@@ -507,7 +490,7 @@ def getDistanceMeasure(self):
         return self.getOrDefault(self.distanceMeasure)
 
 
-class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable):
+class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable, HasTrainingSummary):
     """
     Model fitted by BisectingKMeans.
 
@@ -534,14 +517,6 @@ def computeCost(self, dataset):
                       "dataset in the summary.", DeprecationWarning)
         return self._call_java("computeCost", dataset)
 
-    @property
-    @since("2.1.0")
-    def hasSummary(self):
-        """
-        Indicates whether a training summary exists for this model instance.
-        """
-        return self._call_java("hasSummary")
-
     @property
     @since("2.1.0")
     def summary(self):
@@ -550,7 +525,7 @@ def summary(self):
         training set. An exception is thrown if no summary exists.
         """
         if self.hasSummary:
-            return BisectingKMeansSummary(self._call_java("summary"))
+            return BisectingKMeansSummary(super(BisectingKMeansModel, self).summary)
         else:
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 9e1f8f88ca1c..7841de9c3d45 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -161,7 +161,8 @@ def getEpsilon(self):
         return self.getOrDefault(self.epsilon)
 
 
-class LinearRegressionModel(JavaModel, JavaPredictionModel, GeneralJavaMLWritable, JavaMLReadable):
+class LinearRegressionModel(JavaModel, JavaPredictionModel, GeneralJavaMLWritable, JavaMLReadable,
+                            HasTrainingSummary):
     """
     Model fitted by :class:`LinearRegression`.
 
@@ -201,21 +202,11 @@ def summary(self):
         `trainingSummary is None`.
         """
         if self.hasSummary:
-            java_lrt_summary = self._call_java("summary")
-            return LinearRegressionTrainingSummary(java_lrt_summary)
+            return LinearRegressionTrainingSummary(super(LinearRegressionModel, self).summary)
         else:
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @property
-    @since("2.0.0")
-    def hasSummary(self):
-        """
-        Indicates whether a training summary exists for this model
-        instance.
-        """
-        return self._call_java("hasSummary")
-
     @since("2.0.0")
     def evaluate(self, dataset):
         """
@@ -1648,7 +1639,7 @@ def getOffsetCol(self):
 
 
 class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable,
-                                       JavaMLReadable):
+                                       JavaMLReadable, HasTrainingSummary):
     """
     .. note:: Experimental
 
@@ -1682,21 +1673,12 @@ def summary(self):
         `trainingSummary is None`.
         """
         if self.hasSummary:
-            java_glrt_summary = self._call_java("summary")
-            return GeneralizedLinearRegressionTrainingSummary(java_glrt_summary)
+            return GeneralizedLinearRegressionTrainingSummary(
+                super(GeneralizedLinearRegressionModel, self).summary)
         else:
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @property
-    @since("2.0.0")
-    def hasSummary(self):
-        """
-        Indicates whether a training summary exists for this model
-        instance.
-        """
-        return self._call_java("hasSummary")
-
     @since("2.0.0")
     def evaluate(self, dataset):
         """
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index e846834761e4..e184e1ac77ab 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -611,3 +611,29 @@ def loadParamsInstance(path, sc):
         py_type = DefaultParamsReader.__get_class(pythonClassName)
         instance = py_type.load(path)
         return instance
+
+
+@inherit_doc
+class HasTrainingSummary(object):
+    """
+    Base class for models that provides Training summary.
+    .. versionadded:: 3.0.0
+    """
+
+    @property
+    @since("2.1.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model
+        instance.
+        """
+        return self._call_java("hasSummary")
+
+    @property
+    @since("2.1.0")
+    def summary(self):
+        """
+        Gets summary of the model trained on the training set. An exception is thrown if
+        no summary exists.
+        """
+        return (self._call_java("summary"))

From 8171b156ebf25a582318c71222716b5c52c8bbb3 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 1 Feb 2019 18:29:55 -0600
Subject: [PATCH 0419/1072] [SPARK-26771][CORE][GRAPHX] Make .unpersist(),
 .destroy() consistently non-blocking by default

## What changes were proposed in this pull request?

Make .unpersist(), .destroy() non-blocking by default and adjust callers to request blocking only where important.

This also adds an optional blocking argument to Pyspark's RDD.unpersist(), which never had one.

## How was this patch tested?

Existing tests.

Closes #23685 from srowen/SPARK-26771.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/MapOutputTracker.scala    |  2 +-
 .../scala/org/apache/spark/SparkContext.scala  |  2 +-
 .../org/apache/spark/broadcast/Broadcast.scala |  3 +--
 .../main/scala/org/apache/spark/rdd/RDD.scala  |  6 +++---
 .../rdd/util/PeriodicRDDCheckpointer.scala     |  2 +-
 .../org/apache/spark/DistributedSuite.scala    |  2 +-
 .../org/apache/spark/UnpersistSuite.scala      |  2 +-
 .../spark/broadcast/BroadcastSuite.scala       |  4 ++--
 .../org/apache/spark/ui/UISeleniumSuite.scala  |  4 ++--
 docs/graphx-programming-guide.md               |  2 +-
 docs/rdd-programming-guide.md                  |  9 ++++++++-
 .../examples/mllib/BinaryClassification.scala  |  2 +-
 .../examples/mllib/DecisionTreeRunner.scala    |  2 +-
 .../examples/mllib/LinearRegression.scala      |  2 +-
 .../spark/examples/mllib/MovieLensALS.scala    |  2 +-
 .../scala/org/apache/spark/graphx/Graph.scala  |  8 ++++++--
 .../scala/org/apache/spark/graphx/Pregel.scala |  6 +++---
 .../apache/spark/graphx/impl/EdgeRDDImpl.scala |  2 +-
 .../apache/spark/graphx/impl/GraphImpl.scala   |  4 ++--
 .../spark/graphx/impl/VertexRDDImpl.scala      |  2 +-
 .../org/apache/spark/graphx/lib/PageRank.scala | 10 +++++-----
 .../lib/StronglyConnectedComponents.scala      |  2 +-
 .../util/PeriodicGraphCheckpointer.scala       |  2 +-
 .../org/apache/spark/graphx/GraphSuite.scala   | 10 +++++-----
 .../spark/ml/classification/LinearSVC.scala    |  2 +-
 .../ml/classification/LogisticRegression.scala |  2 +-
 .../spark/ml/clustering/GaussianMixture.scala  |  6 +++---
 .../spark/ml/optim/loss/RDDLossFunction.scala  |  2 +-
 .../ml/regression/AFTSurvivalRegression.scala  |  4 ++--
 .../spark/ml/regression/LinearRegression.scala |  4 ++--
 .../ml/tree/impl/GradientBoostedTrees.scala    |  2 +-
 .../spark/ml/tree/impl/NodeIdCache.scala       |  6 +++---
 .../mllib/api/python/PythonMLLibAPI.scala      | 16 ++++++++--------
 .../mllib/clustering/BisectingKMeans.scala     |  8 ++++----
 .../mllib/clustering/GaussianMixture.scala     |  2 +-
 .../apache/spark/mllib/clustering/KMeans.scala | 12 ++++++------
 .../spark/mllib/clustering/KMeansModel.scala   |  2 +-
 .../spark/mllib/clustering/LDAModel.scala      |  2 +-
 .../spark/mllib/clustering/LDAOptimizer.scala  |  2 +-
 .../apache/spark/mllib/feature/Word2Vec.scala  | 10 +++++-----
 .../apache/spark/mllib/fpm/PrefixSpan.scala    |  2 +-
 .../mllib/optimization/GradientDescent.scala   |  2 +-
 .../spark/mllib/optimization/LBFGS.scala       |  2 +-
 .../GeneralizedLinearAlgorithm.scala           |  2 +-
 .../mllib/tree/model/treeEnsembleModels.scala  |  2 +-
 .../ml/classification/LinearSVCSuite.scala     |  4 ++--
 .../LogisticRegressionSuite.scala              |  6 +++---
 python/pyspark/broadcast.py                    | 11 +++++++----
 python/pyspark/rdd.py                          |  8 ++++++--
 python/pyspark/tests/test_rdd.py               |  2 +-
 .../spark/sql/DataFrameFunctionsSuite.scala    |  2 +-
 .../apache/spark/sql/DatasetCacheSuite.scala   | 18 +++++++++---------
 .../columnar/InMemoryColumnarQuerySuite.scala  |  6 +++---
 .../spark/sql/hive/CachedTableSuite.scala      |  2 +-
 .../spark/streaming/dstream/DStream.scala      |  2 +-
 55 files changed, 131 insertions(+), 114 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index a8b8e9688ac1..803703f5e4c6 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -201,7 +201,7 @@ private class ShuffleStatus(numPartitions: Int) {
       Utils.tryLogNonFatalError {
         // Use `blocking = false` so that this operation doesn't hang while trying to send cleanup
         // RPCs to dead executors.
-        cachedSerializedBroadcast.destroy(blocking = false)
+        cachedSerializedBroadcast.destroy()
       }
       cachedSerializedBroadcast = null
     }
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 6273601d9986..604242005ac1 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1753,7 +1753,7 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Unpersist an RDD from memory and/or disk storage
    */
-  private[spark] def unpersistRDD(rddId: Int, blocking: Boolean = true) {
+  private[spark] def unpersistRDD(rddId: Int, blocking: Boolean) {
     env.blockManager.master.removeRdd(rddId, blocking)
     persistentRdds.remove(rddId)
     listenerBus.post(SparkListenerUnpersistRDD(rddId))
diff --git a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index 24d953e77bfa..0e81ad198db6 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -92,10 +92,9 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
   /**
    * Destroy all data and metadata related to this broadcast variable. Use this with caution;
    * once a broadcast variable has been destroyed, it cannot be used again.
-   * This method blocks until destroy has completed
    */
   def destroy() {
-    destroy(blocking = true)
+    destroy(blocking = false)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 954582f7ce66..d39f418a21d0 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -212,11 +212,11 @@ abstract class RDD[T: ClassTag](
   /**
    * Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
    *
-   * @param blocking Whether to block until all blocks are deleted.
+   * @param blocking Whether to block until all blocks are deleted (default: false)
    * @return This RDD.
    */
-  def unpersist(blocking: Boolean = true): this.type = {
-    logInfo("Removing RDD " + id + " from persistence list")
+  def unpersist(blocking: Boolean = false): this.type = {
+    logInfo(s"Removing RDD $id from persistence list")
     sc.unpersistRDD(id, blocking)
     storageLevel = StorageLevel.NONE
     this
diff --git a/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
index 5e181a982253..4a6106984a49 100644
--- a/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
@@ -89,7 +89,7 @@ private[spark] class PeriodicRDDCheckpointer[T](
     }
   }
 
-  override protected def unpersist(data: RDD[T]): Unit = data.unpersist(blocking = false)
+  override protected def unpersist(data: RDD[T]): Unit = data.unpersist()
 
   override protected def getCheckpointFiles(data: RDD[T]): Iterable[String] = {
     data.getCheckpointFile.map(x => x)
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 17ffc7bd36c6..c0c889939bd0 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -323,7 +323,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     data.persist(StorageLevel.MEMORY_ONLY_2)
     data.count
     assert(sc.persistentRdds.isEmpty === false)
-    data.unpersist()
+    data.unpersist(blocking = true)
     assert(sc.persistentRdds.isEmpty === true)
 
     failAfter(Span(3000, Millis)) {
diff --git a/core/src/test/scala/org/apache/spark/UnpersistSuite.scala b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
index b58a3ebe6e4c..2432d6d36da3 100644
--- a/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
@@ -30,7 +30,7 @@ class UnpersistSuite extends SparkFunSuite with LocalSparkContext with TimeLimit
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
     rdd.count
     assert(sc.persistentRdds.isEmpty === false)
-    rdd.unpersist()
+    rdd.unpersist(blocking = true)
     assert(sc.persistentRdds.isEmpty === true)
 
     failAfter(Span(3000, Millis)) {
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 18ec60dd9e01..66b2f487dc1c 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -296,7 +296,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
       // Using this variable on the executors crashes them, which hangs the test.
       // Instead, crash the driver by directly accessing the broadcast value.
       intercept[SparkException] { broadcast.value }
-      intercept[SparkException] { broadcast.unpersist() }
+      intercept[SparkException] { broadcast.unpersist(blocking = true) }
       intercept[SparkException] { broadcast.destroy(blocking = true) }
     } else {
       val results = sc.parallelize(1 to partitions, partitions).map(x => (x, broadcast.value.sum))
@@ -309,7 +309,7 @@ package object testPackage extends Assertions {
 
   def runCallSiteTest(sc: SparkContext) {
     val broadcast = sc.broadcast(Array(1, 2, 3, 4))
-    broadcast.destroy()
+    broadcast.destroy(blocking = true)
     val thrown = intercept[SparkException] { broadcast.value }
     assert(thrown.getMessage.contains("BroadcastSuite.scala"))
   }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index b184b74bf3cb..cdc7185a3551 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -137,7 +137,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       val rddJson = getJson(ui, "storage/rdd/0")
       (rddJson  \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description)
 
-      rdd.unpersist()
+      rdd.unpersist(blocking = true)
       rdd.persist(StorageLevels.MEMORY_ONLY).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
         goToUi(ui, "/storage")
@@ -172,7 +172,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       dist0.onHeapMemoryUsed should not be (Some(0L))
       dist0.offHeapMemoryUsed should be (Some(0L))
 
-      rdd.unpersist()
+      rdd.unpersist(blocking = true)
       rdd.persist(StorageLevels.OFF_HEAP).count()
       val updatedStorageJson1 = getJson(ui, "storage/rdd")
       updatedStorageJson1.children.length should be (1)
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index ecedeaf958f1..6a0be7b0691f 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -283,7 +283,7 @@ class Graph[VD, ED] {
   // Functions for caching graphs ==================================================================
   def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
   def cache(): Graph[VD, ED]
-  def unpersistVertices(blocking: Boolean = true): Graph[VD, ED]
+  def unpersistVertices(blocking: Boolean = false): Graph[VD, ED]
   // Change the partitioning heuristic  ============================================================
   def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
   // Transform vertex and edge attributes ==========================================================
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 308a8ea65390..7f427659b185 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -1270,7 +1270,8 @@ waiting to recompute a lost partition.
 
 Spark automatically monitors cache usage on each node and drops out old data partitions in a
 least-recently-used (LRU) fashion. If you would like to manually remove an RDD instead of waiting for
-it to fall out of the cache, use the `RDD.unpersist()` method.
+it to fall out of the cache, use the `RDD.unpersist()` method. Note that this method does not
+block by default. To block until resources are freed, specify `blocking=true` when calling this method.
 
 # Shared Variables
 
@@ -1342,6 +1343,12 @@ run on the cluster so that `v` is not shipped to the nodes more than once. In ad
 `v` should not be modified after it is broadcast in order to ensure that all nodes get the same
 value of the broadcast variable (e.g. if the variable is shipped to a new node later).
 
+To release the resources that the broadcast variable copied onto executors, call `.unpersist()`.
+If the broadcast is used again afterwards, it will be re-broadcast. To permanently release all
+resources used by the broadcast variable, call `.destroy()`. The broadcast variable can't be used
+after that. Note that these methods do not block by default. To block until resources are freed,
+specify `blocking=true` when calling them.
+
 ## Accumulators
 
 Accumulators are variables that are only "added" to through an associative and commutative operation and can
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
index a1a5b5915264..e3cc1d9c8336 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
@@ -117,7 +117,7 @@ object BinaryClassification {
     val numTest = test.count()
     println(s"Training: $numTraining, test: $numTest.")
 
-    examples.unpersist(blocking = false)
+    examples.unpersist()
 
     val updater = params.regType match {
       case L1 => new L1Updater()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
index fa47e12857f0..dd363f534597 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -247,7 +247,7 @@ object DecisionTreeRunner {
     val numTest = test.count()
     println(s"numTraining = $numTraining, numTest = $numTest.")
 
-    examples.unpersist(blocking = false)
+    examples.unpersist()
 
     (training, test, numClasses)
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
index 86aec363ea42..03222b13ad27 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
@@ -104,7 +104,7 @@ object LinearRegression {
     val numTest = test.count()
     println(s"Training: $numTraining, test: $numTest.")
 
-    examples.unpersist(blocking = false)
+    examples.unpersist()
 
     val updater = params.regType match {
       case NONE => new SimpleUpdater()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index 9bd6927fb7fc..fd810155d6a8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -155,7 +155,7 @@ object MovieLensALS {
     val numTest = test.count()
     println(s"Training: $numTraining, test: $numTest.")
 
-    ratings.unpersist(blocking = false)
+    ratings.unpersist()
 
     val model = new ALS()
       .setRank(params.rank)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index b3a3420b8494..de94f5bd1184 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -119,16 +119,20 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
   /**
    * Uncaches both vertices and edges of this graph. This is useful in iterative algorithms that
    * build a new graph in each iteration.
+   *
+   * @param blocking Whether to block until all data is unpersisted (default: false)
    */
-  def unpersist(blocking: Boolean = true): Graph[VD, ED]
+  def unpersist(blocking: Boolean = false): Graph[VD, ED]
 
   /**
    * Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative
    * algorithms that modify the vertex attributes but reuse the edges. This method can be used to
    * uncache the vertex attributes of previous iterations once they are no longer needed, improving
    * GC performance.
+   *
+   * @param blocking Whether to block until all data is unpersisted (default: false)
    */
-  def unpersistVertices(blocking: Boolean = true): Graph[VD, ED]
+  def unpersistVertices(blocking: Boolean = false): Graph[VD, ED]
 
   /**
    * Repartitions the edges in the graph according to `partitionStrategy`.
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
index 755c6febc48e..75248d515ec7 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -163,9 +163,9 @@ object Pregel extends Logging {
       logInfo("Pregel finished iteration " + i)
 
       // Unpersist the RDDs hidden by newly-materialized RDDs
-      oldMessages.unpersist(blocking = false)
-      prevG.unpersistVertices(blocking = false)
-      prevG.edges.unpersist(blocking = false)
+      oldMessages.unpersist()
+      prevG.unpersistVertices()
+      prevG.edges.unpersist()
       // count the iteration
       i += 1
     }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index eb8abd1846d0..2b741c30d1a6 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -58,7 +58,7 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
     this
   }
 
-  override def unpersist(blocking: Boolean = true): this.type = {
+  override def unpersist(blocking: Boolean = false): this.type = {
     partitionsRDD.unpersist(blocking)
     this
   }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 34e1253ff42a..0a97ab492600 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -78,13 +78,13 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
     }
   }
 
-  override def unpersist(blocking: Boolean = true): Graph[VD, ED] = {
+  override def unpersist(blocking: Boolean = false): Graph[VD, ED] = {
     unpersistVertices(blocking)
     replicatedVertexView.edges.unpersist(blocking)
     this
   }
 
-  override def unpersistVertices(blocking: Boolean = true): Graph[VD, ED] = {
+  override def unpersistVertices(blocking: Boolean = false): Graph[VD, ED] = {
     vertices.unpersist(blocking)
     // TODO: unpersist the replicated vertices in `replicatedVertexView` but leave the edges alone
     this
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
index 2da9762fb045..915cf8b2e586 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -58,7 +58,7 @@ class VertexRDDImpl[VD] private[graphx] (
     this
   }
 
-  override def unpersist(blocking: Boolean = true): this.type = {
+  override def unpersist(blocking: Boolean = false): this.type = {
     partitionsRDD.unpersist(blocking)
     this
   }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 1305c059b89c..105ad57d4ed8 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -156,8 +156,8 @@ object PageRank extends Logging {
 
       rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
       logInfo(s"PageRank finished iteration $iteration.")
-      prevRankGraph.vertices.unpersist(false)
-      prevRankGraph.edges.unpersist(false)
+      prevRankGraph.vertices.unpersist()
+      prevRankGraph.edges.unpersist()
 
       iteration += 1
     }
@@ -234,9 +234,9 @@ object PageRank extends Logging {
           popActivations +:+ resetActivations
         }.cache()
 
-      rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
-      prevRankGraph.vertices.unpersist(false)
-      prevRankGraph.edges.unpersist(false)
+      rankGraph.edges.foreachPartition(_ => {}) // also materializes rankGraph.vertices
+      prevRankGraph.vertices.unpersist()
+      prevRankGraph.edges.unpersist()
 
       logInfo(s"Parallel Personalized PageRank finished iteration $i.")
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
index e4f80ffcb451..3393ea06ff24 100755
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
@@ -72,7 +72,7 @@ object StronglyConnectedComponents {
         sccGraph.vertices.count()
         sccGraph.edges.count()
         // sccGraph materialized so, unpersist can be done on previous
-        prevSccGraph.unpersist(blocking = false)
+        prevSccGraph.unpersist()
         prevSccGraph = sccGraph
 
         // only keep vertices that are not final
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointer.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointer.scala
index 539b66f747cc..3fbeeb47660e 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointer.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointer.scala
@@ -98,7 +98,7 @@ private[spark] class PeriodicGraphCheckpointer[VD, ED](
     }
   }
 
-  override protected def unpersist(data: Graph[VD, ED]): Unit = data.unpersist(blocking = false)
+  override protected def unpersist(data: Graph[VD, ED]): Unit = data.unpersist()
 
   override protected def getCheckpointFiles(data: Graph[VD, ED]): Iterable[String] = {
     data.getCheckpointFiles
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index 88b59a343a83..dede3b5d35ce 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -396,11 +396,11 @@ class GraphSuite extends SparkFunSuite with LocalSparkContext {
       val g = g0.partitionBy(PartitionStrategy.EdgePartition2D, 2)
       val cc = g.connectedComponents()
       assert(sc.getPersistentRDDs.nonEmpty)
-      cc.unpersist()
-      g.unpersist()
-      g0.unpersist()
-      vert.unpersist()
-      edges.unpersist()
+      cc.unpersist(blocking = true)
+      g.unpersist(blocking = true)
+      g0.unpersist(blocking = true)
+      vert.unpersist(blocking = true)
+      edges.unpersist(blocking = true)
       assert(sc.getPersistentRDDs.isEmpty)
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index ff801abef9a9..f0176052ed43 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -248,7 +248,7 @@ class LinearSVC @Since("2.2.0") (
         scaledObjectiveHistory += state.adjustedValue
       }
 
-      bcFeaturesStd.destroy(blocking = false)
+      bcFeaturesStd.destroy()
       if (state == null) {
         val msg = s"${optimizer.getClass.getName} failed."
         instr.logError(msg)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index f2a5c11a3486..c0f23661bcb7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -815,7 +815,7 @@ class LogisticRegression @Since("1.2.0") (
           state = states.next()
           arrayBuilder += state.adjustedValue
         }
-        bcFeaturesStd.destroy(blocking = false)
+        bcFeaturesStd.destroy()
 
         if (state == null) {
           val msg = s"${optimizer.getClass.getName} failed."
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index bb10b3228b93..c27ba5592930 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -369,8 +369,8 @@ class GaussianMixture @Since("2.0.0") (
           case (aggregator1, aggregator2) => aggregator1.merge(aggregator2)
         })
 
-      bcWeights.destroy(blocking = false)
-      bcGaussians.destroy(blocking = false)
+      bcWeights.destroy()
+      bcGaussians.destroy()
 
       if (iter == 0) {
         val numSamples = sums.count
@@ -409,7 +409,7 @@ class GaussianMixture @Since("2.0.0") (
       iter += 1
     }
 
-    instances.unpersist(false)
+    instances.unpersist()
     val gaussianDists = gaussians.map { case (mean, covVec) =>
       val cov = GaussianMixture.unpackUpperTriangularMatrix(numFeatures, covVec.values)
       new MultivariateGaussian(mean, cov)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/loss/RDDLossFunction.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/loss/RDDLossFunction.scala
index 387f7c5b1ff3..84977a6916f0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/loss/RDDLossFunction.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/loss/RDDLossFunction.scala
@@ -65,7 +65,7 @@ private[ml] class RDDLossFunction[
       BLAS.axpy(1.0, regGradient, gradient)
       regLoss
     }.getOrElse(0.0)
-    bcCoefficients.destroy(blocking = false)
+    bcCoefficients.destroy()
     (newAgg.loss + regLoss, gradient.asBreeze.toDenseVector)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 8d6e36697d2c..067dfa43433e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -275,7 +275,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
       state.x.toArray.clone()
     }
 
-    bcFeaturesStd.destroy(blocking = false)
+    bcFeaturesStd.destroy()
     if (handlePersistence) instances.unpersist()
 
     val rawCoefficients = parameters.slice(2, parameters.length)
@@ -633,7 +633,7 @@ private class AFTCostFun(
         case (aggregator1, aggregator2) => aggregator1.merge(aggregator2)
       }, depth = aggregationDepth)
 
-    bcParameters.destroy(blocking = false)
+    bcParameters.destroy()
     (aftAggregator.loss, aftAggregator.gradient)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 197828762d16..cd0b7d3ff8aa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -531,8 +531,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
         throw new SparkException(msg)
       }
 
-      bcFeaturesMean.destroy(blocking = false)
-      bcFeaturesStd.destroy(blocking = false)
+      bcFeaturesMean.destroy()
+      bcFeaturesStd.destroy()
 
       val parameters = state.x.toArray.clone()
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
index bd8c9afb5e20..c31334c92e1c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
@@ -230,7 +230,7 @@ private[spark] object GradientBoostedTrees extends Logging {
       (a, b) => treesIndices.map(idx => a(idx) + b(idx)))
     .map(_ / dataCount)
 
-    broadcastTrees.destroy(blocking = false)
+    broadcastTrees.destroy()
     evaluation.toArray
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala
index 5b14a63ada4e..c270e6c0bd5e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala
@@ -95,7 +95,7 @@ private[spark] class NodeIdCache(
       splits: Array[Array[Split]]): Unit = {
     if (prevNodeIdsForInstances != null) {
       // Unpersist the previous one if one exists.
-      prevNodeIdsForInstances.unpersist(false)
+      prevNodeIdsForInstances.unpersist()
     }
 
     prevNodeIdsForInstances = nodeIdsForInstances
@@ -168,11 +168,11 @@ private[spark] class NodeIdCache(
     }
     if (nodeIdsForInstances != null) {
       // Unpersist current one if one exists.
-      nodeIdsForInstances.unpersist(false)
+      nodeIdsForInstances.unpersist()
     }
     if (prevNodeIdsForInstances != null) {
       // Unpersist the previous one if one exists.
-      prevNodeIdsForInstances.unpersist(false)
+      prevNodeIdsForInstances.unpersist()
     }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index db3f074ecfba..438a616fd57c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -98,7 +98,7 @@ private[python] class PythonMLLibAPI extends Serializable {
         List(model.weights, model.intercept).map(_.asInstanceOf[Object]).asJava
       }
     } finally {
-      data.rdd.unpersist(blocking = false)
+      data.rdd.unpersist()
     }
   }
 
@@ -336,7 +336,7 @@ private[python] class PythonMLLibAPI extends Serializable {
       val model = isotonicRegressionAlg.run(input)
       List[AnyRef](model.boundaryVector, model.predictionVector).asJava
     } finally {
-      data.rdd.unpersist(blocking = false)
+      data.rdd.unpersist()
     }
   }
 
@@ -366,7 +366,7 @@ private[python] class PythonMLLibAPI extends Serializable {
     try {
       kMeansAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK))
     } finally {
-      data.rdd.unpersist(blocking = false)
+      data.rdd.unpersist()
     }
   }
 
@@ -411,7 +411,7 @@ private[python] class PythonMLLibAPI extends Serializable {
     try {
       new GaussianMixtureModelWrapper(gmmAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK)))
     } finally {
-      data.rdd.unpersist(blocking = false)
+      data.rdd.unpersist()
     }
   }
 
@@ -704,7 +704,7 @@ private[python] class PythonMLLibAPI extends Serializable {
       val model = word2vec.fit(dataJRDD.rdd.persist(StorageLevel.MEMORY_AND_DISK_SER))
       new Word2VecModelWrapper(model)
     } finally {
-      dataJRDD.rdd.unpersist(blocking = false)
+      dataJRDD.rdd.unpersist()
     }
   }
 
@@ -742,7 +742,7 @@ private[python] class PythonMLLibAPI extends Serializable {
     try {
       DecisionTree.train(data.rdd.persist(StorageLevel.MEMORY_AND_DISK), strategy)
     } finally {
-      data.rdd.unpersist(blocking = false)
+      data.rdd.unpersist()
     }
   }
 
@@ -783,7 +783,7 @@ private[python] class PythonMLLibAPI extends Serializable {
         RandomForest.trainRegressor(cached, strategy, numTrees, featureSubsetStrategy, intSeed)
       }
     } finally {
-      cached.unpersist(blocking = false)
+      cached.unpersist()
     }
   }
 
@@ -814,7 +814,7 @@ private[python] class PythonMLLibAPI extends Serializable {
     try {
       GradientBoostedTrees.train(cached, boostingStrategy)
     } finally {
-      cached.unpersist(blocking = false)
+      cached.unpersist()
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 696dff0f319a..b4a31d72f94b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -217,7 +217,7 @@ class BisectingKMeans private (
           newClusterCenters = newClusters.mapValues(_.center).map(identity)
         }
         if (preIndices != null) {
-          preIndices.unpersist(false)
+          preIndices.unpersist()
         }
         preIndices = indices
         indices = updateAssignments(assignments, divisibleIndices, newClusterCenters, dMeasure).keys
@@ -234,12 +234,12 @@ class BisectingKMeans private (
       level += 1
     }
     if (preIndices != null) {
-      preIndices.unpersist(false)
+      preIndices.unpersist()
     }
     if (indices != null) {
-      indices.unpersist(false)
+      indices.unpersist()
     }
-    norms.unpersist(false)
+    norms.unpersist()
     val clusters = activeClusters ++ inactiveClusters
     val root = buildTree(clusters, dMeasure)
     val totalCost = root.leafNodes.map(_.cost).sum
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index 4d952ac88c9b..a9ed36e90974 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -232,7 +232,7 @@ class GaussianMixture private (
       llhp = llh // current becomes previous
       llh = sums.logLikelihood // this is the freshly computed log-likelihood
       iter += 1
-      compute.destroy(blocking = false)
+      compute.destroy()
     }
 
     new GaussianMixtureModel(weights, gaussians)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index d967c672c581..d2118c2fcf78 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -327,7 +327,7 @@ class KMeans private (
         distanceMeasureInstance.centroid(sum, count)
       }
 
-      bcCenters.destroy(blocking = false)
+      bcCenters.destroy()
 
       // Update the cluster centers and costs
       converged = true
@@ -405,8 +405,8 @@ class KMeans private (
       }.persist(StorageLevel.MEMORY_AND_DISK)
       val sumCosts = costs.sum()
 
-      bcNewCenters.unpersist(blocking = false)
-      preCosts.unpersist(blocking = false)
+      bcNewCenters.unpersist()
+      preCosts.unpersist()
 
       val chosen = data.zip(costs).mapPartitionsWithIndex { (index, pointCosts) =>
         val rand = new XORShiftRandom(seed ^ (step << 16) ^ index)
@@ -417,8 +417,8 @@ class KMeans private (
       step += 1
     }
 
-    costs.unpersist(blocking = false)
-    bcNewCentersList.foreach(_.destroy(false))
+    costs.unpersist()
+    bcNewCentersList.foreach(_.destroy())
 
     val distinctCenters = centers.map(_.vector).distinct.map(new VectorWithNorm(_))
 
@@ -433,7 +433,7 @@ class KMeans private (
         .map(distanceMeasureInstance.findClosest(bcCenters.value, _)._1)
         .countByValue()
 
-      bcCenters.destroy(blocking = false)
+      bcCenters.destroy()
 
       val myWeights = distinctCenters.indices.map(countMap.getOrElse(_, 0L).toDouble).toArray
       LocalKMeans.kMeansPlusPlus(0, distinctCenters.toArray, myWeights, k, 30)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index b0709547ab1b..32a0bff1ffae 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -103,7 +103,7 @@ class KMeansModel (@Since("1.0.0") val clusterCenters: Array[Vector],
     val cost = data.map(p =>
       distanceMeasureInstance.pointCost(bcCentersWithNorm.value, new VectorWithNorm(p)))
       .sum()
-    bcCentersWithNorm.destroy(blocking = false)
+    bcCentersWithNorm.destroy()
     cost
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index f915062d7738..fc0469c31ffa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -338,7 +338,7 @@ class LocalLDAModel private[spark] (
 
         docBound
       }.sum()
-    ElogbetaBc.destroy(blocking = false)
+    ElogbetaBc.destroy()
 
     // Bound component for prob(topic-term distributions):
     //   E[log p(beta | eta) - log q(beta | lambda)]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index f8e5f3ed7645..df1da620fa82 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -505,7 +505,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer with Logging {
         elementWiseSum, elementWiseSum
       )
 
-    expElogbetaBc.destroy(false)
+    expElogbetaBc.destroy()
 
     if (nonEmptyDocsN == 0) {
       logWarning("No non-empty documents were submitted in the batch.")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index b8c306d86bac..9cdade100910 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -318,9 +318,9 @@ class Word2Vec extends Serializable with Logging {
     try {
       doFit(dataset, sc, expTable, bcVocab, bcVocabHash)
     } finally {
-      expTable.destroy(blocking = false)
-      bcVocab.destroy(blocking = false)
-      bcVocabHash.destroy(blocking = false)
+      expTable.destroy()
+      bcVocab.destroy()
+      bcVocabHash.destroy()
     }
   }
 
@@ -450,8 +450,8 @@ class Word2Vec extends Serializable with Logging {
         }
         i += 1
       }
-      bcSyn0Global.destroy(false)
-      bcSyn1Global.destroy(false)
+      bcSyn0Global.destroy()
+      bcSyn1Global.destroy()
     }
     newSentences.unpersist()
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index b2c09b408b40..55c2dc7c66d4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -179,7 +179,7 @@ class PrefixSpan private (
       freqSequences.persist(data.getStorageLevel)
       freqSequences.count()
     }
-    dataInternalRepr.unpersist(false)
+    dataInternalRepr.unpersist()
 
     new PrefixSpanModel(freqSequences)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 593cdd602faf..ffe3964981a1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -246,7 +246,7 @@ object GradientDescent extends Logging {
             // c: (grad, loss, count)
             (c1._1 += c2._1, c1._2 + c2._2, c1._3 + c2._3)
           })
-      bcWeights.destroy(blocking = false)
+      bcWeights.destroy()
 
       if (miniBatchSize > 0) {
         /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index 21ec287e497d..65c1159723c8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -261,7 +261,7 @@ object LBFGS extends Logging {
       val (gradientSum, lossSum) = data.treeAggregate((zeroSparseVector, 0.0))(seqOp, combOp)
 
       // broadcasted model is not needed anymore
-      bcW.destroy(blocking = false)
+      bcW.destroy()
 
       /**
        * regVal is sum of weight squares if it's L2 updater;
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 4d5aaf14111c..ef3ff2b98a5e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -366,7 +366,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
     // Unpersist cached data
     if (data.getStorageLevel != StorageLevel.NONE) {
-      data.unpersist(false)
+      data.unpersist()
     }
 
     createModel(weights, intercept)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index b1e82656a240..fc1d4125a564 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -167,7 +167,7 @@ class GradientBoostedTreesModel @Since("1.2.0") (
       (a, b) => treesIndices.map(idx => a(idx) + b(idx)))
     .map(_ / dataCount)
 
-    broadcastTrees.destroy(blocking = false)
+    broadcastTrees.destroy()
     evaluation.toArray
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
index c05c896df5cb..cb9b8f9b6b47 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
@@ -181,8 +181,8 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
     }
     assert(thrown.getMessage.contains("coefficients only supports dense"))
 
-    bcCoefficients.destroy(blocking = false)
-    bcFeaturesStd.destroy(blocking = false)
+    bcCoefficients.destroy()
+    bcFeaturesStd.destroy()
   }
 
   test("linearSVC with sample weights") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 84c10e2f85c8..24998926abd8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -549,9 +549,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       }
     }
     assert(thrown.getMessage.contains("coefficients only supports dense"))
-    bcCoefficientsBinary.destroy(blocking = false)
-    bcFeaturesStd.destroy(blocking = false)
-    bcCoefficientsMulti.destroy(blocking = false)
+    bcCoefficientsBinary.destroy()
+    bcFeaturesStd.destroy()
+    bcCoefficientsMulti.destroy()
   }
 
   test("overflow prediction for multiclass") {
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 69447a6fb561..43a5ead83e5c 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -159,16 +159,19 @@ def unpersist(self, blocking=False):
             raise Exception("Broadcast can only be unpersisted in driver")
         self._jbroadcast.unpersist(blocking)
 
-    def destroy(self):
+    def destroy(self, blocking=False):
         """
         Destroy all data and metadata related to this broadcast variable.
         Use this with caution; once a broadcast variable has been destroyed,
-        it cannot be used again. This method blocks until destroy has
-        completed.
+        it cannot be used again.
+
+        .. versionchanged:: 3.0.0
+           Added optional argument `blocking` to specify whether to block until all
+           blocks are deleted.
         """
         if self._jbroadcast is None:
             raise Exception("Broadcast can only be destroyed in driver")
-        self._jbroadcast.destroy()
+        self._jbroadcast.destroy(blocking)
         os.unlink(self._path)
 
     def __reduce__(self):
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 751df4400049..ef382f37ea21 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -244,13 +244,17 @@ def persist(self, storageLevel=StorageLevel.MEMORY_ONLY):
         self._jrdd.persist(javaStorageLevel)
         return self
 
-    def unpersist(self):
+    def unpersist(self, blocking=False):
         """
         Mark the RDD as non-persistent, and remove all blocks for it from
         memory and disk.
+
+        .. versionchanged:: 3.0.0
+           Added optional argument `blocking` to specify whether to block until all
+           blocks are deleted.
         """
         self.is_cached = False
-        self._jrdd.unpersist()
+        self._jrdd.unpersist(blocking)
         return self
 
     def checkpoint(self):
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index b2a544b8de78..bac2f78c90ef 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -324,7 +324,7 @@ def test_unpersist(self):
         bdata.unpersist()
         m = self.sc.parallelize(range(1), 1).map(lambda x: len(bdata.value)).sum()
         self.assertEqual(N, m)
-        bdata.destroy()
+        bdata.destroy(blocking=True)
         try:
             self.sc.parallelize(range(1), 1).map(lambda x: len(bdata.value)).sum()
         except Exception as e:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index b7fc9570af91..16b14a669a5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -1395,7 +1395,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
     }
 
     // Test with local relation, the Project will be evaluated without codegen
-    df.unpersist()
+    df.unpersist(blocking = true)
     nullTest()
     // Test with cached relation, the Project will be evaluated with codegen
     df.cache()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
index 1ff9f6d2347a..b3b2cdc3f162 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
@@ -49,7 +49,7 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
     assert(ds1.storageLevel == StorageLevel.MEMORY_AND_DISK)
     assert(ds2.storageLevel == StorageLevel.MEMORY_AND_DISK)
     // unpersist
-    ds1.unpersist()
+    ds1.unpersist(blocking = true)
     assert(ds1.storageLevel == StorageLevel.NONE)
     // non-default storage level
     ds1.persist(StorageLevel.MEMORY_ONLY_2)
@@ -71,7 +71,7 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
       cached,
       2, 3, 4)
     // Drop the cache.
-    cached.unpersist()
+    cached.unpersist(blocking = true)
     assert(cached.storageLevel == StorageLevel.NONE, "The Dataset should not be cached.")
   }
 
@@ -88,9 +88,9 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
     checkDataset(joined, ("2", 2))
     assertCached(joined, 2)
 
-    ds1.unpersist()
+    ds1.unpersist(blocking = true)
     assert(ds1.storageLevel == StorageLevel.NONE, "The Dataset ds1 should not be cached.")
-    ds2.unpersist()
+    ds2.unpersist(blocking = true)
     assert(ds2.storageLevel == StorageLevel.NONE, "The Dataset ds2 should not be cached.")
   }
 
@@ -105,9 +105,9 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
       ("b", 3))
     assertCached(agged.filter(_._1 == "b"))
 
-    ds.unpersist()
+    ds.unpersist(blocking = true)
     assert(ds.storageLevel == StorageLevel.NONE, "The Dataset ds should not be cached.")
-    agged.unpersist()
+    agged.unpersist(blocking = true)
     assert(agged.storageLevel == StorageLevel.NONE, "The Dataset agged should not be cached.")
   }
 
@@ -122,7 +122,7 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
     df.count()
     assertCached(df2)
 
-    df.unpersist()
+    df.unpersist(blocking = true)
     assert(df.storageLevel == StorageLevel.NONE)
   }
 
@@ -140,7 +140,7 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
       df2.collect()
     }
 
-    df.unpersist()
+    df.unpersist(blocking = true)
     assert(df.storageLevel == StorageLevel.NONE)
   }
 
@@ -166,7 +166,7 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
     df.count()
     df3.cache()
 
-    df.unpersist()
+    df.unpersist(blocking = true)
 
     // df un-cached; df2 and df3's cache plan re-compiled
     assert(df.storageLevel == StorageLevel.NONE)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 861aa179a4a8..1c0d60397c1c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -429,7 +429,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
       val df2 = Seq(("a", 1), ("b", 2), ("c", 3)).toDF("item", "id")
       val df3 = df1.join(df2, Seq("item")).select($"id", $"group".as("item")).distinct()
 
-      df3.unpersist()
+      df3.unpersist(blocking = true)
       val agg_without_cache = df3.groupBy($"item").count()
 
       df3.cache()
@@ -445,7 +445,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     // with a non-empty list
     assert(df.filter($"id".isin(2)).count() == 1)
     assert(df.filter($"id".isin(2, 3)).count() == 2)
-    df.unpersist()
+    df.unpersist(blocking = true)
     val dfNulls = spark.range(10).selectExpr("null as id").cache()
     // with null as value for the attribute
     assert(dfNulls.filter($"id".isin()).count() == 0)
@@ -466,7 +466,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   testWithWholeStageCodegenOnAndOff("SPARK-22348: table cache " +
     "should do partition batch pruning") { codegenEnabled =>
     val df1 = Seq((1, 1), (1, 1), (2, 2)).toDF("x", "y")
-    df1.unpersist()
+    df1.unpersist(blocking = true)
     df1.cache()
 
     // Push predicate to the cached table.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index b492f39df62f..9483a9cfcb8c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -123,7 +123,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       // no error will be reported in the following three ways to uncache a table.
       spark.catalog.uncacheTable(tableName)
       sql("UNCACHE TABLE newTable")
-      sparkSession.table(tableName).unpersist()
+      sparkSession.table(tableName).unpersist(blocking = true)
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 35243373daf9..41374b5e370f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -455,7 +455,7 @@ abstract class DStream[T: ClassTag] (
     if (unpersistData) {
       logDebug(s"Unpersisting old RDDs: ${oldRDDs.values.map(_.id).mkString(", ")}")
       oldRDDs.values.foreach { rdd =>
-        rdd.unpersist(false)
+        rdd.unpersist()
         // Explicitly remove blocks of BlockRDD
         rdd match {
           case b: BlockRDD[_] =>

From 421ff6f60e2f3da123fd941d9fa91d7228b21ebc Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 1 Feb 2019 18:34:13 -0600
Subject: [PATCH 0420/1072] [MINOR][DOC] Writing to partitioned Hive metastore
 Parquet tables is not supported for Spark SQL

## What changes were proposed in this pull request?

Even if `spark.sql.hive.convertMetastoreParquet` is true,  when writing to partitioned Hive metastore
Parquet tables,  Spark SQL still  can not use its own Parquet support instead of Hive SerDe.

Related code:
 https://github.com/apache/spark/blob/d53e11ffce3f721886918c1cb4525478971f02bc/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala#L198
## How was this patch tested?
N/A

Closes #23671 from 10110346/parquetdoc.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/sql-data-sources-parquet.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index 5532bf9fa3f6..f6e03fbaff4d 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -157,9 +157,10 @@ turned it off by default starting from 1.5.0. You may enable it by
 
 ### Hive metastore Parquet table conversion
 
-When reading from and writing to Hive metastore Parquet tables, Spark SQL will try to use its own
-Parquet support instead of Hive SerDe for better performance. This behavior is controlled by the
-`spark.sql.hive.convertMetastoreParquet` configuration, and is turned on by default.
+When reading from Hive metastore Parquet tables and writing to non-partitioned Hive metastore
+Parquet tables, Spark SQL will try to use its own Parquet support instead of Hive SerDe for
+better performance. This behavior is controlled by the `spark.sql.hive.convertMetastoreParquet`
+configuration, and is turned on by default.
 
 #### Hive/Parquet Schema Reconciliation
 

From a0faabf7b5901866945a5b4c9ae973de266ed887 Mon Sep 17 00:00:00 2001
From: xiaodeshan <xiaodeshan@xiaomi.com>
Date: Fri, 1 Feb 2019 18:38:27 -0600
Subject: [PATCH 0421/1072] [SPARK-26714][CORE][WEBUI] Show 0 partition job in
 WebUI

## What changes were proposed in this pull request?

When the job's partiton is zero, it will still get a jobid but not shown in ui. It's strange. This PR is to show this job in ui.

Example:
In bash:
mkdir -p /home/test/testdir

sc.textFile("/home/test/testdir")

Some logs:

```
19/01/24 17:26:19 INFO FileInputFormat: Total input paths to process : 0
19/01/24 17:26:19 INFO SparkContext: Starting job: collect at WordCount.scala:9
19/01/24 17:26:19 INFO DAGScheduler: Job 0 finished: collect at WordCount.scala:9, took 0.003735 s
```

## How was this patch tested?

UT

Closes #23637 from deshanxiao/spark-26714.

Authored-by: xiaodeshan <xiaodeshan@xiaomi.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/scheduler/DAGScheduler.scala  | 5 +++++
 .../org/apache/spark/status/AppStatusListener.scala      | 4 ++--
 core/src/main/scala/org/apache/spark/ui/UIUtils.scala    | 3 ++-
 .../main/scala/org/apache/spark/ui/jobs/StagePage.scala  | 9 +++++++--
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 75eb37ceb407..dd1b2595461f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -693,6 +693,11 @@ private[spark] class DAGScheduler(
 
     val jobId = nextJobId.getAndIncrement()
     if (partitions.size == 0) {
+      val time = clock.getTimeMillis()
+      listenerBus.post(
+        SparkListenerJobStart(jobId, time, Seq[StageInfo](), properties))
+      listenerBus.post(
+        SparkListenerJobEnd(jobId, time, JobSucceeded))
       // Return immediately if the job is running 0 tasks
       return new JobWaiter[U](this, jobId, 0, resultHandler)
     }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 3089f05c4772..a8b2153cfa84 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -321,7 +321,7 @@ private[spark] class AppStatusListener(
     }
 
     val lastStageInfo = event.stageInfos.sortBy(_.stageId).lastOption
-    val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)")
+    val jobName = lastStageInfo.map(_.name).getOrElse("")
     val jobGroup = Option(event.properties)
       .flatMap { p => Option(p.getProperty(SparkContext.SPARK_JOB_GROUP_ID)) }
     val sqlExecutionId = Option(event.properties)
@@ -329,7 +329,7 @@ private[spark] class AppStatusListener(
 
     val job = new LiveJob(
       event.jobId,
-      lastStageName,
+      jobName,
       if (event.time > 0) Some(new Date(event.time)) else None,
       event.stageIds,
       jobGroup,
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 967435030bc4..54f2750831d6 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -363,7 +363,8 @@ private[spark] object UIUtils extends Logging {
       skipped: Int,
       reasonToNumKilled: Map[String, Int],
       total: Int): Seq[Node] = {
-    val completeWidth = "width: %s%%".format((completed.toDouble/total)*100)
+    val ratio = if (total == 0) 100.0 else (completed.toDouble/total)*100
+    val completeWidth = "width: %s%%".format(ratio)
     // started + completed can be > total when there are speculative tasks
     val boundedStarted = math.min(started, total - completed)
     val startWidth = "width: %s%%".format((boundedStarted.toDouble/total)*100)
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 2c94853f312e..e399f7ee1920 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -857,8 +857,13 @@ private[spark] object ApiHelper {
   }
 
   def lastStageNameAndDescription(store: AppStatusStore, job: JobData): (String, String) = {
-    val stage = store.asOption(store.stageAttempt(job.stageIds.max, 0)._1)
-    (stage.map(_.name).getOrElse(""), stage.flatMap(_.description).getOrElse(job.name))
+    // Some jobs have only 0 partitions.
+    if (job.stageIds.isEmpty) {
+      ("", job.name)
+    } else {
+      val stage = store.asOption(store.stageAttempt(job.stageIds.max, 0)._1)
+      (stage.map(_.name).getOrElse(""), stage.flatMap(_.description).getOrElse(job.name))
+    }
   }
 
 }

From 75ea89ad94ca76646e4697cf98c78d14c6e2695f Mon Sep 17 00:00:00 2001
From: Boris Shminke <boris@shminke.me>
Date: Sat, 2 Feb 2019 10:49:45 +0800
Subject: [PATCH 0422/1072] [SPARK-18161][PYTHON] Update cloudpickle to v0.6.1

## What changes were proposed in this pull request?

In this PR we've done two things:
1) updated the Spark's copy of cloudpickle to 0.6.1 (current stable)
The main reason Spark stayed with cloudpickle 0.4.x was that the default pickle protocol was changed in later versions.

2) started using pickle.HIGHEST_PROTOCOL for both Python 2 and Python 3 for serializers and broadcast
[Pyrolite](https://github.com/irmen/Pyrolite) has such Pickle protocol version support: reading: 0,1,2,3,4; writing: 2.

## How was this patch tested?

Jenkins tests.

Authors: Sloane Simmons, Boris Shminke

This contribution is original work of Sloane Simmons and Boris Shminke and they licensed it to the project under the project's open source license.

Closes #20691 from inpefess/pickle_protocol_4.

Lead-authored-by: Boris Shminke <boris@shminke.me>
Co-authored-by: singularperturbation <sloanes.k@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/broadcast.py      |   4 +-
 python/pyspark/cloudpickle.py    | 259 ++++++++++++++++++++++---------
 python/pyspark/serializers.py    |   7 +-
 python/pyspark/tests/test_rdd.py |   2 +-
 4 files changed, 194 insertions(+), 78 deletions(-)

diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 43a5ead83e5c..cca64b5de3a1 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -23,7 +23,7 @@
 
 from pyspark.cloudpickle import print_exec
 from pyspark.java_gateway import local_connect_and_auth
-from pyspark.serializers import ChunkedStream
+from pyspark.serializers import ChunkedStream, pickle_protocol
 from pyspark.util import _exception_message
 
 if sys.version < '3':
@@ -109,7 +109,7 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None,
 
     def dump(self, value, f):
         try:
-            pickle.dump(value, f, 2)
+            pickle.dump(value, f, pickle_protocol)
         except pickle.PickleError:
             raise
         except Exception as e:
diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index 88519d7311fc..bf92569c1e8c 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -42,20 +42,26 @@
 """
 from __future__ import print_function
 
-import dis
-from functools import partial
-import imp
 import io
-import itertools
-import logging
+import dis
+import sys
+import types
 import opcode
-import operator
 import pickle
 import struct
-import sys
-import traceback
-import types
+import logging
 import weakref
+import operator
+import importlib
+import itertools
+import traceback
+from functools import partial
+
+
+# cloudpickle is meant for inter process communication: we expect all
+# communicating processes to run the same Python version hence we favor
+# communication speed over compatibility:
+DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
 
 
 if sys.version < '3':
@@ -72,6 +78,22 @@
     PY3 = True
 
 
+# Container for the global namespace to ensure consistent unpickling of
+# functions defined in dynamic modules (modules not registed in sys.modules).
+_dynamic_modules_globals = weakref.WeakValueDictionary()
+
+
+class _DynamicModuleFuncGlobals(dict):
+    """Global variables referenced by a function defined in a dynamic module
+
+    To avoid leaking references we store such context in a WeakValueDictionary
+    instance.  However instances of python builtin types such as dict cannot
+    be used directly as values in such a construct, hence the need for a
+    derived class.
+    """
+    pass
+
+
 def _make_cell_set_template_code():
     """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF
 
@@ -157,7 +179,7 @@ def cell_set(cell, value):
     )(value)
 
 
-#relevant opcodes
+# relevant opcodes
 STORE_GLOBAL = opcode.opmap['STORE_GLOBAL']
 DELETE_GLOBAL = opcode.opmap['DELETE_GLOBAL']
 LOAD_GLOBAL = opcode.opmap['LOAD_GLOBAL']
@@ -167,7 +189,7 @@ def cell_set(cell, value):
 
 
 def islambda(func):
-    return getattr(func,'__name__') == '<lambda>'
+    return getattr(func, '__name__') == '<lambda>'
 
 
 _BUILTIN_TYPE_NAMES = {}
@@ -248,7 +270,9 @@ class CloudPickler(Pickler):
     dispatch = Pickler.dispatch.copy()
 
     def __init__(self, file, protocol=None):
-        Pickler.__init__(self, file, protocol)
+        if protocol is None:
+            protocol = DEFAULT_PROTOCOL
+        Pickler.__init__(self, file, protocol=protocol)
         # set of modules to unpickle
         self.modules = set()
         # map ids to dictionary. used to ensure that functions can share global env
@@ -267,42 +291,26 @@ def dump(self, obj):
 
     def save_memoryview(self, obj):
         self.save(obj.tobytes())
+
     dispatch[memoryview] = save_memoryview
 
     if not PY3:
         def save_buffer(self, obj):
             self.save(str(obj))
-        dispatch[buffer] = save_buffer  # noqa: F821 'buffer' was removed in Python 3
-
-    def save_unsupported(self, obj):
-        raise pickle.PicklingError("Cannot pickle objects of type %s" % type(obj))
-    dispatch[types.GeneratorType] = save_unsupported
 
-    # itertools objects do not pickle!
-    for v in itertools.__dict__.values():
-        if type(v) is type:
-            dispatch[v] = save_unsupported
+        dispatch[buffer] = save_buffer  # noqa: F821 'buffer' was removed in Python 3
 
     def save_module(self, obj):
         """
         Save a module as an import
         """
-        mod_name = obj.__name__
-        # If module is successfully found then it is not a dynamically created module
-        if hasattr(obj, '__file__'):
-            is_dynamic = False
-        else:
-            try:
-                _find_module(mod_name)
-                is_dynamic = False
-            except ImportError:
-                is_dynamic = True
-
         self.modules.add(obj)
-        if is_dynamic:
-            self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), obj=obj)
+        if _is_dynamic(obj):
+            self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)),
+                             obj=obj)
         else:
             self.save_reduce(subimport, (obj.__name__,), obj=obj)
+
     dispatch[types.ModuleType] = save_module
 
     def save_codeobject(self, obj):
@@ -323,6 +331,7 @@ def save_codeobject(self, obj):
                 obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, obj.co_cellvars
             )
         self.save_reduce(types.CodeType, args, obj=obj)
+
     dispatch[types.CodeType] = save_codeobject
 
     def save_function(self, obj, name=None):
@@ -369,9 +378,14 @@ def save_function(self, obj, name=None):
         if modname == '__main__':
             themodule = None
 
+        try:
+            lookedup_by_name = getattr(themodule, name, None)
+        except Exception:
+            lookedup_by_name = None
+
         if themodule:
             self.modules.add(themodule)
-            if getattr(themodule, name, None) is obj:
+            if lookedup_by_name is obj:
                 return self.save_global(obj, name)
 
         # a builtin_function_or_method which comes in as an attribute of some
@@ -401,8 +415,7 @@ def save_function(self, obj, name=None):
             return
         else:
             # func is nested
-            klass = getattr(themodule, name, None)
-            if klass is None or klass is not obj:
+            if lookedup_by_name is None or lookedup_by_name is not obj:
                 self.save_function_tuple(obj)
                 return
 
@@ -416,6 +429,7 @@ def save_function(self, obj, name=None):
         else:
             write(pickle.GLOBAL + modname + '\n' + name + '\n')
             self.memoize(obj)
+
     dispatch[types.FunctionType] = save_function
 
     def _save_subimports(self, code, top_level_dependencies):
@@ -423,19 +437,22 @@ def _save_subimports(self, code, top_level_dependencies):
         Ensure de-pickler imports any package child-modules that
         are needed by the function
         """
+
         # check if any known dependency is an imported package
         for x in top_level_dependencies:
             if isinstance(x, types.ModuleType) and hasattr(x, '__package__') and x.__package__:
                 # check if the package has any currently loaded sub-imports
                 prefix = x.__name__ + '.'
-                for name, module in sys.modules.items():
+                # A concurrent thread could mutate sys.modules,
+                # make sure we iterate over a copy to avoid exceptions
+                for name in list(sys.modules):
                     # Older versions of pytest will add a "None" module to sys.modules.
                     if name is not None and name.startswith(prefix):
                         # check whether the function can address the sub-module
                         tokens = set(name[len(prefix):].split('.'))
                         if not tokens - set(code.co_names):
                             # ensure unpickler executes this import
-                            self.save(module)
+                            self.save(sys.modules[name])
                             # then discards the reference to it
                             self.write(pickle.POP)
 
@@ -450,6 +467,15 @@ def save_dynamic_class(self, obj):
         clsdict = dict(obj.__dict__)  # copy dict proxy to a dict
         clsdict.pop('__weakref__', None)
 
+        # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable.
+        # This is a fix which breaks the cache but this only makes the first
+        # calls to issubclass slower.
+        if "_abc_impl" in clsdict:
+            import abc
+            (registry, _, _, _) = abc._get_dump(obj)
+            clsdict["_abc_impl"] = [subclass_weakref()
+                                    for subclass_weakref in registry]
+
         # On PyPy, __doc__ is a readonly attribute, so we need to include it in
         # the initial skeleton class.  This is safe because we know that the
         # doc can't participate in a cycle with the original class.
@@ -541,9 +567,13 @@ def save_function_tuple(self, func):
             'globals': f_globals,
             'defaults': defaults,
             'dict': dct,
-            'module': func.__module__,
             'closure_values': closure_values,
+            'module': func.__module__,
+            'name': func.__name__,
+            'doc': func.__doc__,
         }
+        if hasattr(func, '__annotations__') and sys.version_info >= (3, 7):
+            state['annotations'] = func.__annotations__
         if hasattr(func, '__qualname__'):
             state['qualname'] = func.__qualname__
         save(state)
@@ -568,8 +598,7 @@ def extract_code_globals(cls, co):
                 # PyPy "builtin-code" object
                 out_names = set()
             else:
-                out_names = set(names[oparg]
-                                for op, oparg in _walk_global_ops(co))
+                out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
 
                 # see if nested function have any global refs
                 if co.co_consts:
@@ -610,7 +639,16 @@ def extract_func_data(self, func):
         # save the dict
         dct = func.__dict__
 
-        base_globals = self.globals_ref.get(id(func.__globals__), {})
+        base_globals = self.globals_ref.get(id(func.__globals__), None)
+        if base_globals is None:
+            # For functions defined in a well behaved module use
+            # vars(func.__module__) for base_globals. This is necessary to
+            # share the global variables across multiple pickled functions from
+            # this module.
+            if hasattr(func, '__module__') and func.__module__ is not None:
+                base_globals = func.__module__
+            else:
+                base_globals = {}
         self.globals_ref[id(func.__globals__)] = base_globals
 
         return (code, f_globals, defaults, closure, dct, base_globals)
@@ -619,6 +657,7 @@ def save_builtin_function(self, obj):
         if obj.__module__ == "__builtin__":
             return self.save_global(obj)
         return self.save_function(obj)
+
     dispatch[types.BuiltinFunctionType] = save_builtin_function
 
     def save_global(self, obj, name=None, pack=struct.pack):
@@ -628,6 +667,13 @@ def save_global(self, obj, name=None, pack=struct.pack):
         The name of this method is somewhat misleading: all types get
         dispatched here.
         """
+        if obj is type(None):
+            return self.save_reduce(type, (None,), obj=obj)
+        elif obj is type(Ellipsis):
+            return self.save_reduce(type, (Ellipsis,), obj=obj)
+        elif obj is type(NotImplemented):
+            return self.save_reduce(type, (NotImplemented,), obj=obj)
+
         if obj.__module__ == "__main__":
             return self.save_dynamic_class(obj)
 
@@ -657,7 +703,8 @@ def save_instancemethod(self, obj):
                 self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj)
             else:
                 self.save_reduce(types.MethodType, (obj.__func__, obj.__self__, obj.__self__.__class__),
-                         obj=obj)
+                                 obj=obj)
+
     dispatch[types.MethodType] = save_instancemethod
 
     def save_inst(self, obj):
@@ -711,11 +758,13 @@ def save_inst(self, obj):
     def save_property(self, obj):
         # properties not correctly saved in python
         self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj)
+
     dispatch[property] = save_property
 
     def save_classmethod(self, obj):
         orig_func = obj.__func__
         self.save_reduce(type(obj), (orig_func,), obj=obj)
+
     dispatch[classmethod] = save_classmethod
     dispatch[staticmethod] = save_classmethod
 
@@ -726,7 +775,7 @@ def __getitem__(self, item):
                 return item
         items = obj(Dummy())
         if not isinstance(items, tuple):
-            items = (items, )
+            items = (items,)
         return self.save_reduce(operator.itemgetter, items)
 
     if type(operator.itemgetter) is type:
@@ -757,16 +806,16 @@ def __getattribute__(self, item):
     def save_file(self, obj):
         """Save a file"""
         try:
-            import StringIO as pystringIO #we can't use cStringIO as it lacks the name attribute
+            import StringIO as pystringIO  # we can't use cStringIO as it lacks the name attribute
         except ImportError:
             import io as pystringIO
 
-        if not hasattr(obj, 'name') or  not hasattr(obj, 'mode'):
+        if not hasattr(obj, 'name') or not hasattr(obj, 'mode'):
             raise pickle.PicklingError("Cannot pickle files that do not map to an actual file")
         if obj is sys.stdout:
-            return self.save_reduce(getattr, (sys,'stdout'), obj=obj)
+            return self.save_reduce(getattr, (sys, 'stdout'), obj=obj)
         if obj is sys.stderr:
-            return self.save_reduce(getattr, (sys,'stderr'), obj=obj)
+            return self.save_reduce(getattr, (sys, 'stderr'), obj=obj)
         if obj is sys.stdin:
             raise pickle.PicklingError("Cannot pickle standard input")
         if obj.closed:
@@ -845,6 +894,7 @@ def is_tornado_coroutine(func):
         return False
     return gen.is_coroutine_function(func)
 
+
 def _rebuild_tornado_coroutine(func):
     from tornado import gen
     return gen.coroutine(func)
@@ -852,36 +902,55 @@ def _rebuild_tornado_coroutine(func):
 
 # Shorthands for legacy support
 
-def dump(obj, file, protocol=2):
-    CloudPickler(file, protocol).dump(obj)
+def dump(obj, file, protocol=None):
+    """Serialize obj as bytes streamed into file
 
+    protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
+    pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed
+    between processes running the same Python version.
+
+    Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
+    compatibility with older versions of Python.
+    """
+    CloudPickler(file, protocol=protocol).dump(obj)
 
-def dumps(obj, protocol=2):
+
+def dumps(obj, protocol=None):
+    """Serialize obj as a string of bytes allocated in memory
+
+    protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
+    pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed
+    between processes running the same Python version.
+
+    Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
+    compatibility with older versions of Python.
+    """
     file = StringIO()
     try:
-        cp = CloudPickler(file,protocol)
+        cp = CloudPickler(file, protocol=protocol)
         cp.dump(obj)
         return file.getvalue()
     finally:
         file.close()
 
+
 # including pickles unloading functions in this namespace
 load = pickle.load
 loads = pickle.loads
 
 
-#hack for __import__ not working as desired
+# hack for __import__ not working as desired
 def subimport(name):
     __import__(name)
     return sys.modules[name]
 
 
 def dynamic_subimport(name, vars):
-    mod = imp.new_module(name)
+    mod = types.ModuleType(name)
     mod.__dict__.update(vars)
-    sys.modules[name] = mod
     return mod
 
+
 # restores function attributes
 def _restore_attr(obj, attr):
     for key, val in attr.items():
@@ -908,7 +977,7 @@ def _modules_to_main(modList):
         if type(modname) is str:
             try:
                 mod = __import__(modname)
-            except Exception as e:
+            except Exception:
                 sys.stderr.write('warning: could not import %s\n.  '
                                  'Your function may unexpectedly error due to this import failing;'
                                  'A version mismatch is likely.  Specific error was:\n' % modname)
@@ -917,7 +986,7 @@ def _modules_to_main(modList):
                 setattr(main, mod.__name__, mod)
 
 
-#object generators:
+# object generators:
 def _genpartial(func, args, kwds):
     if not args:
         args = ()
@@ -925,9 +994,11 @@ def _genpartial(func, args, kwds):
         kwds = {}
     return partial(func, *args, **kwds)
 
+
 def _gen_ellipsis():
     return Ellipsis
 
+
 def _gen_not_implemented():
     return NotImplemented
 
@@ -988,9 +1059,19 @@ def _fill_function(*args):
     else:
         raise ValueError('Unexpected _fill_value arguments: %r' % (args,))
 
-    func.__globals__.update(state['globals'])
+    # Only set global variables that do not exist.
+    for k, v in state['globals'].items():
+        if k not in func.__globals__:
+            func.__globals__[k] = v
+
     func.__defaults__ = state['defaults']
     func.__dict__ = state['dict']
+    if 'annotations' in state:
+        func.__annotations__ = state['annotations']
+    if 'doc' in state:
+        func.__doc__  = state['doc']
+    if 'name' in state:
+        func.__name__ = state['name']
     if 'module' in state:
         func.__module__ = state['module']
     if 'qualname' in state:
@@ -1021,6 +1102,20 @@ def _make_skel_func(code, cell_count, base_globals=None):
     """
     if base_globals is None:
         base_globals = {}
+    elif isinstance(base_globals, str):
+        base_globals_name = base_globals
+        try:
+            # First try to reuse the globals from the module containing the
+            # function. If it is not possible to retrieve it, fallback to an
+            # empty dictionary.
+            base_globals = vars(importlib.import_module(base_globals))
+        except ImportError:
+            base_globals = _dynamic_modules_globals.get(
+                    base_globals_name, None)
+            if base_globals is None:
+                base_globals = _DynamicModuleFuncGlobals()
+            _dynamic_modules_globals[base_globals_name] = base_globals
+
     base_globals['__builtins__'] = __builtins__
 
     closure = (
@@ -1036,28 +1131,50 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict):
 
     See CloudPickler.save_dynamic_class for more info.
     """
+    registry = None
     for attrname, attr in class_dict.items():
-        setattr(skeleton_class, attrname, attr)
+        if attrname == "_abc_impl":
+            registry = attr
+        else:
+            setattr(skeleton_class, attrname, attr)
+    if registry is not None:
+        for subclass in registry:
+            skeleton_class.register(subclass)
+
     return skeleton_class
 
 
-def _find_module(mod_name):
+def _is_dynamic(module):
     """
-    Iterate over each part instead of calling imp.find_module directly.
-    This function is able to find submodules (e.g. sickit.tree)
+    Return True if the module is special module that cannot be imported by its
+    name.
     """
-    path = None
-    for part in mod_name.split('.'):
-        if path is not None:
-            path = [path]
-        file, path, description = imp.find_module(part, path)
-        if file is not None:
-            file.close()
-    return path, description
+    # Quick check: module that have __file__ attribute are not dynamic modules.
+    if hasattr(module, '__file__'):
+        return False
+
+    if hasattr(module, '__spec__'):
+        return module.__spec__ is None
+    else:
+        # Backward compat for Python 2
+        import imp
+        try:
+            path = None
+            for part in module.__name__.split('.'):
+                if path is not None:
+                    path = [path]
+                f, path, description = imp.find_module(part, path)
+                if f is not None:
+                    f.close()
+        except ImportError:
+            return True
+        return False
+
 
 """Constructors for 3rd party libraries
 Note: These can never be renamed due to client compatibility issues"""
 
+
 def _getobject(modname, attribute):
     mod = __import__(modname, fromlist=[attribute])
     return mod.__dict__[attribute]
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 1d170530d285..3db259551fa8 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -61,12 +61,11 @@
 
 if sys.version < '3':
     import cPickle as pickle
-    protocol = 2
     from itertools import izip as zip, imap as map
 else:
     import pickle
-    protocol = 3
     xrange = range
+pickle_protocol = pickle.HIGHEST_PROTOCOL
 
 from pyspark import cloudpickle
 from pyspark.util import _exception_message
@@ -617,7 +616,7 @@ class PickleSerializer(FramedSerializer):
     """
 
     def dumps(self, obj):
-        return pickle.dumps(obj, protocol)
+        return pickle.dumps(obj, pickle_protocol)
 
     if sys.version >= '3':
         def loads(self, obj, encoding="bytes"):
@@ -631,7 +630,7 @@ class CloudPickleSerializer(PickleSerializer):
 
     def dumps(self, obj):
         try:
-            return cloudpickle.dumps(obj, 2)
+            return cloudpickle.dumps(obj, pickle_protocol)
         except pickle.PickleError:
             raise
         except Exception as e:
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index bac2f78c90ef..9e4ac6c1bbd9 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -605,7 +605,7 @@ def test_distinct(self):
 
     def test_external_group_by_key(self):
         self.sc._conf.set("spark.python.worker.memory", "1m")
-        N = 200001
+        N = 2000001
         kv = self.sc.parallelize(xrange(N)).map(lambda x: (x % 3, x))
         gkv = kv.groupByKey().cache()
         self.assertEqual(3, gkv.count())

From b85974db85a881a2e8aebc31cb4f578008648ab9 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sat, 2 Feb 2019 11:17:33 +0800
Subject: [PATCH 0423/1072] [SPARK-26651][SQL][DOC] Collapse notes related to
 java.time API

## What changes were proposed in this pull request?

Collapsed notes about using Java 8 API for date/timestamp manipulations and Proleptic Gregorian calendar in the SQL migration guide.

Closes #23722 from MaxGekk/collapse-notes.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 41f27a36209d..dbf9df0e09e7 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -31,14 +31,10 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
 
-  - Since Spark 3.0, CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpose with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`.
-
   - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
 
   - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
 
-  - Since Spark 3.0, the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions use java.time API for parsing and formatting dates/timestamps from/to strings by using ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html) based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, java.text.SimpleDateFormat and java.util.GregorianCalendar (hybrid calendar that supports both the Julian and Gregorian calendar systems, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html) is used for the same purpose. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`.
-
   - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
 
   - In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting Pandas.Series to Arrow array during serialization. Arrow will raise errors when detecting unsafe type conversion like overflow. Setting `spark.sql.execution.pandas.arrowSafeTypeConversion` to true can enable it. The default setting is false. PySpark's behavior for Arrow versions is illustrated in the table below:
@@ -91,11 +87,15 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
 
-  - Since Spark 3.0, the `weekofyear`, `weekday` and `dayofweek` functions use java.time API for calculation week number of year and day number of week based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, the hybrid calendar (Julian + Gregorian) is used for the same purpose. Results of the functions returned by Spark 3.0 and previous versions can be different for dates before October 15, 1582 (Gregorian).
+  - Since Spark 3.0, Proleptic Gregorian calendar is used in parsing, formatting, and converting dates and timestamps as well as in extracting sub-components like years, days and etc. Spark 3.0 uses Java 8 API classes from the java.time packages that based on ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html). In Spark version 2.4 and earlier, those operations are performed by using the hybrid calendar (Julian + Gregorian, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html). The changes impact on the results for dates before October 15, 1582 (Gregorian) and affect on the following Spark 3.0 API:
+
+    - CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpose with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`.
+
+    - The `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`.
 
-  - Since Spark 3.0, the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
+    - The `weekofyear`, `weekday`, `dayofweek`, `date_trunc`, `from_utc_timestamp`, `to_utc_timestamp`, and `unix_timestamp` functions use java.time API for calculation week number of year, day number of week as well for conversion from/to TimestampType values in UTC time zone.
 
-  - Since Spark 3.0, the `date_trunc`, `from_utc_timestamp`, `to_utc_timestamp`, and `unix_timestamp` functions use java.time API based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, the hybrid calendar (Julian + Gregorian) is used for the same purpose. Results of the functions returned by Spark 3.0 and previous versions can be different for dates before October 15, 1582 (Gregorian).
+    - the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
 
 ## Upgrading From Spark SQL 2.3 to 2.4
 

From a5427a0067504484b0eb5e5f87b2658566aee324 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Sat, 2 Feb 2019 23:45:46 +0800
Subject: [PATCH 0424/1072] [MINOR][SQL][DOCS] Reformat the tables in SQL
 migration guide

## What changes were proposed in this pull request?

1. Reformat the tables to be located with a proper indentation under the corresponding item to be consistent.

2. Fix **Table 2**'s contents to be more readable with code blocks.

### Table 1

**Before:**

![screen shot 2019-02-02 at 11 37 30 am](https://user-images.githubusercontent.com/6477701/52159396-f1a18380-26de-11e9-9dca-f56b19f22bb4.png)

**After:**

![screen shot 2019-02-02 at 11 32 39 am](https://user-images.githubusercontent.com/6477701/52159370-7d66e000-26de-11e9-9e6d-81cf73691c05.png)

### Table 2

**Before:**

![screen shot 2019-02-02 at 11 35 51 am](https://user-images.githubusercontent.com/6477701/52159401-0ed65200-26df-11e9-8b0e-86d005c233b5.png)

**After:**

![screen shot 2019-02-02 at 11 32 44 am](https://user-images.githubusercontent.com/6477701/52159372-7f30a380-26de-11e9-8c04-a88c74b78cff.png)

## How was this patch tested?

Manually built the doc.

Closes #23723 from HyukjinKwon/minor-doc-fix.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md | 138 ++++++++++++++--------------
 1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index dbf9df0e09e7..1ae26e6cefa2 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -38,7 +38,7 @@ displayTitle: Spark SQL Upgrading Guide
   - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
 
   - In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting Pandas.Series to Arrow array during serialization. Arrow will raise errors when detecting unsafe type conversion like overflow. Setting `spark.sql.execution.pandas.arrowSafeTypeConversion` to true can enable it. The default setting is false. PySpark's behavior for Arrow versions is illustrated in the table below:
-  <table class="table">
+    <table class="table">
         <tr>
           <th>
             <b>PyArrow version</b>
@@ -51,39 +51,39 @@ displayTitle: Spark SQL Upgrading Guide
           </th>
         </tr>
         <tr>
-          <th>
-            <b>version < 0.11.0</b>
-          </th>
-          <th>
-            <b>Raise error</b>
-          </th>
-          <th>
-            <b>Silently allows</b>
-          </th>
+          <td>
+            version < 0.11.0
+          </td>
+          <td>
+            Raise error
+          </td>
+          <td>
+            Silently allows
+          </td>
         </tr>
         <tr>
-          <th>
-            <b>version > 0.11.0, arrowSafeTypeConversion=false</b>
-          </th>
-          <th>
-            <b>Silent overflow</b>
-          </th>
-          <th>
-            <b>Silently allows</b>
-          </th>
+          <td>
+            version > 0.11.0, arrowSafeTypeConversion=false
+          </td>
+          <td>
+            Silent overflow
+          </td>
+          <td>
+            Silently allows
+          </td>
         </tr>
         <tr>
-          <th>
-            <b>version > 0.11.0, arrowSafeTypeConversion=true</b>
-          </th>
-          <th>
-            <b>Raise error</b>
-          </th>
-          <th>
-            <b>Raise error</b>
-          </th>
+          <td>
+            version > 0.11.0, arrowSafeTypeConversion=true
+          </td>
+          <td>
+            Raise error
+          </td>
+          <td>
+            Raise error
+          </td>
         </tr>
-  </table>
+    </table>
 
   - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
 
@@ -100,64 +100,64 @@ displayTitle: Spark SQL Upgrading Guide
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
-  <table class="table">
+    <table class="table">
         <tr>
           <th>
             <b>Query</b>
           </th>
           <th>
-            <b>Result Spark 2.3 or Prior</b>
+            <b>Spark 2.3 or Prior</b>
           </th>
           <th>
-            <b>Result Spark 2.4</b>
+            <b>Spark 2.4</b>
           </th>
           <th>
             <b>Remarks</b>
           </th>
         </tr>
         <tr>
-          <th>
-            <b>SELECT <br> array_contains(array(1), 1.34D);</b>
-          </th>
-          <th>
-            <b>true</b>
-          </th>
-          <th>
-            <b>false</b>
-          </th>
-          <th>
-            <b>In Spark 2.4, left and right parameters are  promoted to array(double) and double type respectively.</b>
-          </th>
+          <td>
+            <code>SELECT array_contains(array(1), 1.34D);</code>
+          </td>
+          <td>
+            <code>true</code>
+          </td>
+          <td>
+            <code>false</code>
+          </td>
+          <td>
+            In Spark 2.4, left and right parameters are promoted to array type of double type and double type respectively.
+          </td>
         </tr>
         <tr>
-          <th>
-            <b>SELECT <br> array_contains(array(1), '1');</b>
-          </th>
-          <th>
-            <b>true</b>
-          </th>
-          <th>
-            <b>AnalysisException is thrown since integer type can not be promoted to string type in a loss-less manner.</b>
-          </th>
-          <th>
-            <b>Users can use explicit cast</b>
-          </th>
+          <td>
+            <code>SELECT array_contains(array(1), '1');</code>
+          </td>
+          <td>
+            <code>true</code>
+          </td>
+          <td>
+            <code>AnalysisException</code> is thrown.
+          </td>
+          <td>
+            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
+          </td>
         </tr>
         <tr>
-          <th>
-            <b>SELECT <br> array_contains(array(1), 'anystring');</b>
-          </th>
-          <th>
-            <b>null</b>
-          </th>
-          <th>
-            <b>AnalysisException is thrown since integer type can not be promoted to string type in a loss-less manner.</b>
-          </th>
-          <th>
-            <b>Users can use explicit cast</b>
-          </th>
+          <td>
+            <code>SELECT array_contains(array(1), 'anystring');</code>
+          </td>
+          <td>
+            <code>null</code>
+          </td>
+          <td>
+            <code>AnalysisException</code> is thrown.
+          </td>
+          <td>
+            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
+          </td>
         </tr>
-  </table>
+    </table>
 
   - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. Eg. if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite.
 

From f72d2177882dc47b043fdc7dec9a46fe65df4ee9 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Sat, 2 Feb 2019 09:17:52 -0800
Subject: [PATCH 0425/1072] [SPARK-26677][BUILD] Update Parquet to 1.10.1 with
 notEq pushdown fix.

## What changes were proposed in this pull request?

Update to Parquet Java 1.10.1.

## How was this patch tested?

Added a test from HyukjinKwon that validates the notEq case from SPARK-26677.

Closes #23704 from rdblue/SPARK-26677-fix-noteq-parquet-bug.

Lead-authored-by: Ryan Blue <blue@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Ryan Blue <rdblue@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7                    | 10 +++++-----
 dev/deps/spark-deps-hadoop-3.1                    | 10 +++++-----
 pom.xml                                           |  2 +-
 .../datasources/parquet/ParquetQuerySuite.scala   | 15 +++++++++++++++
 4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 0154fd26586b..d41be281a192 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -161,13 +161,13 @@ orc-shims-1.5.4.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.1.jar
 paranamer-2.8.jar
-parquet-column-1.10.0.jar
-parquet-common-1.10.0.jar
-parquet-encoding-1.10.0.jar
+parquet-column-1.10.1.jar
+parquet-common-1.10.1.jar
+parquet-encoding-1.10.1.jar
 parquet-format-2.4.0.jar
-parquet-hadoop-1.10.0.jar
+parquet-hadoop-1.10.1.jar
 parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.10.0.jar
+parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
 pyrolite-4.13.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 7d5325c55e2e..a6069c5f8ae8 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -178,13 +178,13 @@ orc-shims-1.5.4.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.1.jar
 paranamer-2.8.jar
-parquet-column-1.10.0.jar
-parquet-common-1.10.0.jar
-parquet-encoding-1.10.0.jar
+parquet-column-1.10.1.jar
+parquet-common-1.10.1.jar
+parquet-encoding-1.10.1.jar
 parquet-format-2.4.0.jar
-parquet-hadoop-1.10.0.jar
+parquet-hadoop-1.10.1.jar
 parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.10.0.jar
+parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
 pyrolite-4.13.jar
diff --git a/pom.xml b/pom.xml
index 6676c5dcf979..cbac15f1dfad 100644
--- a/pom.xml
+++ b/pom.xml
@@ -132,7 +132,7 @@
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
     <kafka.version>2.1.0</kafka.version>
     <derby.version>10.12.1.1</derby.version>
-    <parquet.version>1.10.0</parquet.version>
+    <parquet.version>1.10.1</parquet.version>
     <orc.version>1.5.4</orc.version>
     <orc.classifier>nohive</orc.classifier>
     <hive.parquet.version>1.6.0</hive.parquet.version>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index ce1dc6e159c6..beb89d91c926 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -890,6 +890,21 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
       }
     }
   }
+
+  test("SPARK-26677: negated null-safe equality comparison should not filter matched row groups") {
+    (true :: false :: Nil).foreach { vectorized =>
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
+        withTempPath { path =>
+          // Repeated values for dictionary encoding.
+          Seq(Some("A"), Some("A"), None).toDF.repartition(1)
+            .write.parquet(path.getAbsolutePath)
+          val df = spark.read.parquet(path.getAbsolutePath)
+          checkAnswer(stripSparkFilter(df.where("NOT (value <=> 'A')")), df)
+        }
+      }
+    }
+  }
+
 }
 
 object TestingUDT {

From 96c6c295cc909c8904b0eb6c333a62f4897461c6 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sat, 2 Feb 2019 18:20:16 -0600
Subject: [PATCH 0426/1072] [SPARK-26805][SQL] Eliminate double checking of
 stringToDate and stringToTimestamp inputs

## What changes were proposed in this pull request?

In the PR, I propose to eliminate checking of parsed segments inside of the `stringToDate` and `stringToTimestamp` because such checking is already performed while constructing *java.time* classes, in particular inside of `LocalDate` and `LocalTime`. As a consequence of removing the explicit checks, the `isInvalidDate` method is not needed any more, and it was removed from `DateTimeUtils`.

## How was this patch tested?

This was tested by `DateExpressionsSuite`, `DateFunctionsSuite`, `DateTimeUtilsSuite` and `CastSuite`.

Closes #23717 from MaxGekk/datetimeutils-refactoring.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/catalyst/util/DateTimeUtils.scala     | 83 +++++++------------
 1 file changed, 29 insertions(+), 54 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index a97c1f7100d5..b537695567c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -18,13 +18,15 @@
 package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
-import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, Month, ZonedDateTime}
+import java.time._
 import java.time.Year.isLeap
 import java.time.temporal.IsoFields
 import java.util.{Locale, TimeZone}
 import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import java.util.function.{Function => JFunction}
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -60,7 +62,6 @@ object DateTimeUtils {
   final val toYearZero = to2001 + 7304850
   final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
   final val TimeZoneUTC = TimeZone.getTimeZone("UTC")
-  final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
 
   val TIMEZONE_OPTION = "timeZone"
 
@@ -326,34 +327,27 @@ object DateTimeUtils {
       segments(6) /= 10
       digitsMilli -= 1
     }
-
-    if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) {
-      return None
-    }
-
-    if (segments(3) < 0 || segments(3) > 23 || segments(4) < 0 || segments(4) > 59 ||
-        segments(5) < 0 || segments(5) > 59 || segments(6) < 0 || segments(6) > 999999 ||
-        segments(7) < 0 || segments(7) > 23 || segments(8) < 0 || segments(8) > 59) {
-      return None
-    }
-
-    val zoneId = if (tz.isEmpty) {
-      timeZone.toZoneId
-    } else {
-      getTimeZone(f"GMT${tz.get.toChar}${segments(7)}%02d:${segments(8)}%02d").toZoneId
-    }
-    val nanoseconds = TimeUnit.MICROSECONDS.toNanos(segments(6))
-    val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
-    val localDate = if (justTime) {
-      LocalDate.now(zoneId)
-    } else {
-      LocalDate.of(segments(0), segments(1), segments(2))
+    try {
+      val zoneId = if (tz.isEmpty) {
+        timeZone.toZoneId
+      } else {
+        val sign = if (tz.get.toChar == '-') -1 else 1
+        ZoneId.ofOffset("GMT", ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8)))
+      }
+      val nanoseconds = TimeUnit.MICROSECONDS.toNanos(segments(6))
+      val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
+      val localDate = if (justTime) {
+        LocalDate.now(zoneId)
+      } else {
+        LocalDate.of(segments(0), segments(1), segments(2))
+      }
+      val localDateTime = LocalDateTime.of(localDate, localTime)
+      val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
+      val instant = Instant.from(zonedDateTime)
+      Some(instantToMicros(instant))
+    } catch {
+      case NonFatal(_) => None
     }
-    val localDateTime = LocalDateTime.of(localDate, localTime)
-    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
-    val instant = Instant.from(zonedDateTime)
-
-    Some(instantToMicros(instant))
   }
 
   def instantToMicros(instant: Instant): Long = {
@@ -414,32 +408,13 @@ object DateTimeUtils {
       return None
     }
     segments(i) = currentSegmentValue
-    if (isInvalidDate(segments(0), segments(1), segments(2))) {
-      return None
-    }
-
-    val localDate = LocalDate.of(segments(0), segments(1), segments(2))
-    val instant = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toInstant
-    Some(instantToDays(instant))
-  }
-
-  /**
-   * Return true if the date is invalid.
-   */
-  private def isInvalidDate(year: Int, month: Int, day: Int): Boolean = {
-    if (year < 0 || year > 9999 || month < 1 || month > 12 || day < 1 || day > 31) {
-      return true
-    }
-    if (month == 2) {
-      if (isLeap(year) && day > 29) {
-        return true
-      } else if (!isLeap(year) && day > 28) {
-        return true
-      }
-    } else if (!MonthOf31Days.contains(month) && day > 30) {
-      return true
+    try {
+      val localDate = LocalDate.of(segments(0), segments(1), segments(2))
+      val instant = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toInstant
+      Some(instantToDays(instant))
+    } catch {
+      case NonFatal(_) => None
     }
-    false
   }
 
   /**

From dfb880951a8de55c587c1bf8b696df50eae6e68a Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Sun, 3 Feb 2019 21:19:35 +0800
Subject: [PATCH 0427/1072] [SPARK-26818][ML] Make MLEvents JSON ser/de safe

## What changes were proposed in this pull request?

Currently, it looks it's not going to cause any virtually effective problem apparently (if I didn't misread the codes).

I see one place that JSON formatted events are being used.

https://github.com/apache/spark/blob/ec506bd30c2ca324c12c9ec811764081c2eb8c42/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala#L148

It's okay because it just logs when the exception is ignorable

https://github.com/apache/spark/blob/9690eba16efe6d25261934d8b73a221972b684f3/core/src/main/scala/org/apache/spark/util/ListenerBus.scala#L111

I guess it should be best to stay safe - I don't want this unstable experimental feature breaks anything in any case. It also disables `logEvent` in `SparkListenerEvent` for the same reason.

This is also to match SQL execution events side:

https://github.com/apache/spark/blob/ca545f79410a464ef24e3986fac225f53bb2ef02/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L41-L57

to make ML events JSON ser/de safe.

## How was this patch tested?

Manually tested, and unit tests were added.

Closes #23728 from HyukjinKwon/SPARK-26818.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/ml/events.scala    |  81 ++++++++++---
 .../org/apache/spark/ml/MLEventsSuite.scala   | 112 ++++++++++++++----
 2 files changed, 155 insertions(+), 38 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/events.scala b/mllib/src/main/scala/org/apache/spark/ml/events.scala
index c51600fcca46..dc4be4dd9efd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/events.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/events.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml
 
+import com.fasterxml.jackson.annotation.JsonIgnore
+
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.internal.Logging
@@ -29,53 +31,84 @@ import org.apache.spark.sql.{DataFrame, Dataset}
  * after each operation (the event should document this).
  *
  * @note This is supported via [[Pipeline]] and [[PipelineModel]].
+ * @note This is experimental and unstable. Do not use this unless you fully
+ *   understand what `Unstable` means.
  */
 @Unstable
-sealed trait MLEvent extends SparkListenerEvent
+sealed trait MLEvent extends SparkListenerEvent {
+  // Do not log ML events in event log. It should be revisited to see
+  // how it works with history server.
+  protected[spark] override def logEvent: Boolean = false
+}
 
 /**
  * Event fired before `Transformer.transform`.
  */
 @Unstable
-case class TransformStart(transformer: Transformer, input: Dataset[_]) extends MLEvent
+case class TransformStart() extends MLEvent {
+  @JsonIgnore var transformer: Transformer = _
+  @JsonIgnore var input: Dataset[_] = _
+}
+
 /**
  * Event fired after `Transformer.transform`.
  */
 @Unstable
-case class TransformEnd(transformer: Transformer, output: Dataset[_]) extends MLEvent
+case class TransformEnd() extends MLEvent {
+  @JsonIgnore var transformer: Transformer = _
+  @JsonIgnore var output: Dataset[_] = _
+}
 
 /**
  * Event fired before `Estimator.fit`.
  */
 @Unstable
-case class FitStart[M <: Model[M]](estimator: Estimator[M], dataset: Dataset[_]) extends MLEvent
+case class FitStart[M <: Model[M]]() extends MLEvent {
+  @JsonIgnore var estimator: Estimator[M] = _
+  @JsonIgnore var dataset: Dataset[_] = _
+}
+
 /**
  * Event fired after `Estimator.fit`.
  */
 @Unstable
-case class FitEnd[M <: Model[M]](estimator: Estimator[M], model: M) extends MLEvent
+case class FitEnd[M <: Model[M]]() extends MLEvent {
+  @JsonIgnore var estimator: Estimator[M] = _
+  @JsonIgnore var model: M = _
+}
 
 /**
  * Event fired before `MLReader.load`.
  */
 @Unstable
-case class LoadInstanceStart[T](reader: MLReader[T], path: String) extends MLEvent
+case class LoadInstanceStart[T](path: String) extends MLEvent {
+  @JsonIgnore var reader: MLReader[T] = _
+}
+
 /**
  * Event fired after `MLReader.load`.
  */
 @Unstable
-case class LoadInstanceEnd[T](reader: MLReader[T], instance: T) extends MLEvent
+case class LoadInstanceEnd[T]() extends MLEvent {
+  @JsonIgnore var reader: MLReader[T] = _
+  @JsonIgnore var instance: T = _
+}
 
 /**
  * Event fired before `MLWriter.save`.
  */
 @Unstable
-case class SaveInstanceStart(writer: MLWriter, path: String) extends MLEvent
+case class SaveInstanceStart(path: String) extends MLEvent {
+  @JsonIgnore var writer: MLWriter = _
+}
+
 /**
  * Event fired after `MLWriter.save`.
  */
 @Unstable
-case class SaveInstanceEnd(writer: MLWriter, path: String) extends MLEvent
+case class SaveInstanceEnd(path: String) extends MLEvent {
+  @JsonIgnore var writer: MLWriter = _
+}
 
 /**
  * A small trait that defines some methods to send [[org.apache.spark.ml.MLEvent]].
@@ -91,11 +124,15 @@ private[ml] trait MLEvents extends Logging {
 
   def withFitEvent[M <: Model[M]](
       estimator: Estimator[M], dataset: Dataset[_])(func: => M): M = {
-    val startEvent = FitStart(estimator, dataset)
+    val startEvent = FitStart[M]()
+    startEvent.estimator = estimator
+    startEvent.dataset = dataset
     logEvent(startEvent)
     listenerBus.post(startEvent)
     val model: M = func
-    val endEvent = FitEnd(estimator, model)
+    val endEvent = FitEnd[M]()
+    endEvent.estimator = estimator
+    endEvent.model = model
     logEvent(endEvent)
     listenerBus.post(endEvent)
     model
@@ -103,34 +140,42 @@ private[ml] trait MLEvents extends Logging {
 
   def withTransformEvent(
       transformer: Transformer, input: Dataset[_])(func: => DataFrame): DataFrame = {
-    val startEvent = TransformStart(transformer, input)
+    val startEvent = TransformStart()
+    startEvent.transformer = transformer
+    startEvent.input = input
     logEvent(startEvent)
     listenerBus.post(startEvent)
     val output: DataFrame = func
-    val endEvent = TransformEnd(transformer, output)
+    val endEvent = TransformEnd()
+    endEvent.transformer = transformer
+    endEvent.output = output
     logEvent(endEvent)
     listenerBus.post(endEvent)
     output
   }
 
   def withLoadInstanceEvent[T](reader: MLReader[T], path: String)(func: => T): T = {
-    val startEvent = LoadInstanceStart(reader, path)
+    val startEvent = LoadInstanceStart[T](path)
+    startEvent.reader = reader
     logEvent(startEvent)
     listenerBus.post(startEvent)
     val instance: T = func
-    val endEvent = LoadInstanceEnd(reader, instance)
+    val endEvent = LoadInstanceEnd[T]()
+    endEvent.reader = reader
+    endEvent.instance = instance
     logEvent(endEvent)
     listenerBus.post(endEvent)
     instance
   }
 
   def withSaveInstanceEvent(writer: MLWriter, path: String)(func: => Unit): Unit = {
-    listenerBus.post(SaveInstanceEnd(writer, path))
-    val startEvent = SaveInstanceStart(writer, path)
+    val startEvent = SaveInstanceStart(path)
+    startEvent.writer = writer
     logEvent(startEvent)
     listenerBus.post(startEvent)
     func
-    val endEvent = SaveInstanceEnd(writer, path)
+    val endEvent = SaveInstanceEnd(path)
+    endEvent.writer = writer
     logEvent(endEvent)
     listenerBus.post(endEvent)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
index 0a87328de643..80ae0c788ac5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.ml.util.{DefaultParamsReader, DefaultParamsWriter, MLWri
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
 import org.apache.spark.sql._
+import org.apache.spark.util.JsonProtocol
 
 
 class MLEventsSuite
@@ -107,20 +108,48 @@ class MLEventsSuite
       .setStages(Array(estimator1, transformer1, estimator2))
     assert(events.isEmpty)
     val pipelineModel = pipeline.fit(dataset1)
-    val expected =
-      FitStart(pipeline, dataset1) ::
-      FitStart(estimator1, dataset1) ::
-      FitEnd(estimator1, model1) ::
-      TransformStart(model1, dataset1) ::
-      TransformEnd(model1, dataset2) ::
-      TransformStart(transformer1, dataset2) ::
-      TransformEnd(transformer1, dataset3) ::
-      FitStart(estimator2, dataset3) ::
-      FitEnd(estimator2, model2) ::
-      FitEnd(pipeline, pipelineModel) :: Nil
+
+    val event0 = FitStart[PipelineModel]()
+    event0.estimator = pipeline
+    event0.dataset = dataset1
+    val event1 = FitStart[MyModel]()
+    event1.estimator = estimator1
+    event1.dataset = dataset1
+    val event2 = FitEnd[MyModel]()
+    event2.estimator = estimator1
+    event2.model = model1
+    val event3 = TransformStart()
+    event3.transformer = model1
+    event3.input = dataset1
+    val event4 = TransformEnd()
+    event4.transformer = model1
+    event4.output = dataset2
+    val event5 = TransformStart()
+    event5.transformer = transformer1
+    event5.input = dataset2
+    val event6 = TransformEnd()
+    event6.transformer = transformer1
+    event6.output = dataset3
+    val event7 = FitStart[MyModel]()
+    event7.estimator = estimator2
+    event7.dataset = dataset3
+    val event8 = FitEnd[MyModel]()
+    event8.estimator = estimator2
+    event8.model = model2
+    val event9 = FitEnd[PipelineModel]()
+    event9.estimator = pipeline
+    event9.model = pipelineModel
+
+    val expected = Seq(
+      event0, event1, event2, event3, event4, event5, event6, event7, event8, event9)
     eventually(timeout(10 seconds), interval(1 second)) {
       assert(events === expected)
     }
+    // Test if they can be ser/de via JSON protocol.
+    assert(events.nonEmpty)
+    events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+      assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
+    }
   }
 
   test("pipeline model transform events") {
@@ -144,18 +173,41 @@ class MLEventsSuite
       "pipeline0", Array(transformer1, model, transformer2))
     assert(events.isEmpty)
     val output = newPipelineModel.transform(dataset1)
-    val expected =
-      TransformStart(newPipelineModel, dataset1) ::
-      TransformStart(transformer1, dataset1) ::
-      TransformEnd(transformer1, dataset2) ::
-      TransformStart(model, dataset2) ::
-      TransformEnd(model, dataset3) ::
-      TransformStart(transformer2, dataset3) ::
-      TransformEnd(transformer2, dataset4) ::
-      TransformEnd(newPipelineModel, output) :: Nil
+
+    val event0 = TransformStart()
+    event0.transformer = newPipelineModel
+    event0.input = dataset1
+    val event1 = TransformStart()
+    event1.transformer = transformer1
+    event1.input = dataset1
+    val event2 = TransformEnd()
+    event2.transformer = transformer1
+    event2.output = dataset2
+    val event3 = TransformStart()
+    event3.transformer = model
+    event3.input = dataset2
+    val event4 = TransformEnd()
+    event4.transformer = model
+    event4.output = dataset3
+    val event5 = TransformStart()
+    event5.transformer = transformer2
+    event5.input = dataset3
+    val event6 = TransformEnd()
+    event6.transformer = transformer2
+    event6.output = dataset4
+    val event7 = TransformEnd()
+    event7.transformer = newPipelineModel
+    event7.output = output
+
+    val expected = Seq(event0, event1, event2, event3, event4, event5, event6, event7)
     eventually(timeout(10 seconds), interval(1 second)) {
       assert(events === expected)
     }
+    // Test if they can be ser/de via JSON protocol.
+    assert(events.nonEmpty)
+    events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+      assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
+    }
   }
 
   test("pipeline read/write events") {
@@ -182,6 +234,11 @@ class MLEventsSuite
           case e => fail(s"Unexpected event thrown: $e")
         }
       }
+      // Test if they can be ser/de via JSON protocol.
+      assert(events.nonEmpty)
+      events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+        assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
+      }
 
       events.clear()
       val pipelineReader = Pipeline.read
@@ -202,6 +259,11 @@ class MLEventsSuite
           case e => fail(s"Unexpected event thrown: $e")
         }
       }
+      // Test if they can be ser/de via JSON protocol.
+      assert(events.nonEmpty)
+      events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+        assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
+      }
     }
   }
 
@@ -230,6 +292,11 @@ class MLEventsSuite
           case e => fail(s"Unexpected event thrown: $e")
         }
       }
+      // Test if they can be ser/de via JSON protocol.
+      assert(events.nonEmpty)
+      events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+        assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
+      }
 
       events.clear()
       val pipelineModelReader = PipelineModel.read
@@ -250,6 +317,11 @@ class MLEventsSuite
           case e => fail(s"Unexpected event thrown: $e")
         }
       }
+      // Test if they can be ser/de via JSON protocol.
+      assert(events.nonEmpty)
+      events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+        assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
+      }
     }
   }
 }

From 255faaf3436e1f41838062ed460f801bb0be40ec Mon Sep 17 00:00:00 2001
From: zhoukang <zhoukang199191@gmail.com>
Date: Sun, 3 Feb 2019 08:45:57 -0600
Subject: [PATCH 0428/1072] [SPARK-26751][SQL] Fix memory leak when statement
 run in background and throw exception which is not HiveSQLException

## What changes were proposed in this pull request?
When we run in background and we get exception which is not HiveSQLException,
we may encounter memory leak since handleToOperation will not removed correctly.
The reason is below:
1. When calling operation.run() in HiveSessionImpl#executeStatementInternal we throw an exception which is not HiveSQLException
2. Then the opHandle generated by SparkSQLOperationManager will not be added into opHandleSet of HiveSessionImpl , and operationManager.closeOperation(opHandle) will not be called
3. When we close the session we will also call operationManager.closeOperation(opHandle),since we did not add this opHandle into the opHandleSet.

For the reasons above,the opHandled will always in SparkSQLOperationManager#handleToOperation,which will cause memory leak.
More details and a case has attached on https://issues.apache.org/jira/browse/SPARK-26751
This patch will always throw HiveSQLException when running in background

## How was this patch tested?
Exist UT

Closes #23673 from caneGuy/zhoukang/fix-hivesessionimpl-leak.

Authored-by: zhoukang <zhoukang199191@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/hive/thriftserver/SparkExecuteStatementOperation.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 83dfa74ace05..1772fe69622d 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -205,7 +205,7 @@ private[hive] class SparkExecuteStatementOperation(
         case NonFatal(e) =>
           logError(s"Error executing query in background", e)
           setState(OperationState.ERROR)
-          throw e
+          throw new HiveSQLException(e)
       }
     }
   }

From 196ca0c8f5f5caa2ae588ff4a63ce314aa42aecc Mon Sep 17 00:00:00 2001
From: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Date: Sun, 3 Feb 2019 17:15:20 -0800
Subject: [PATCH 0429/1072] [SPARK-26603][K8S] Update minikube backend

## What changes were proposed in this pull request?

- Covers latest minikube versions.
- keeps the older version support

Note: While I was facing disk pressure issues locally on machine, I noticed minikube status command would report that everything was working fine even if some kube-system pods were not up. I don't think the output is 100% reliable but it is good enough for most cases.

## How was this patch tested?

Run it against latest version of minikube (v0.32.0).

Author: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>

Closes #23520 from skonto/update-mini-backend.
---
 .../backend/minikube/Minikube.scala           | 64 ++++++++++++++++---
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index 6494cbc18f33..58aa177182a0 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -16,7 +16,6 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest.backend.minikube
 
-import java.io.File
 import java.nio.file.Paths
 
 import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient}
@@ -26,8 +25,14 @@ import org.apache.spark.internal.Logging
 
 // TODO support windows
 private[spark] object Minikube extends Logging {
-
   private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60
+  private val HOST_PREFIX = "host:"
+  private val KUBELET_PREFIX = "kubelet:"
+  private val APISERVER_PREFIX = "apiserver:"
+  private val KUBECTL_PREFIX = "kubectl:"
+  private val MINIKUBE_VM_PREFIX = "minikubeVM: "
+  private val MINIKUBE_PREFIX = "minikube: "
+  private val MINIKUBE_PATH = ".minikube"
 
   def getMinikubeIp: String = {
     val outputs = executeMinikube("ip")
@@ -38,12 +43,21 @@ private[spark] object Minikube extends Logging {
 
   def getMinikubeStatus: MinikubeStatus.Value = {
     val statusString = executeMinikube("status")
-      .filter(line => line.contains("minikubeVM: ") || line.contains("minikube:"))
-      .head
-      .replaceFirst("minikubeVM: ", "")
-      .replaceFirst("minikube: ", "")
-    MinikubeStatus.unapply(statusString)
+    logInfo(s"Minikube status command output:\n$statusString")
+    // up to minikube version v0.30.0 use this to check for minikube status
+    val oldMinikube = statusString
+      .filter(line => line.contains(MINIKUBE_VM_PREFIX) || line.contains(MINIKUBE_PREFIX))
+
+    if (oldMinikube.isEmpty) {
+      getIfNewMinikubeStatus(statusString)
+    } else {
+      val finalStatusString = oldMinikube
+        .head
+        .replaceFirst(MINIKUBE_VM_PREFIX, "")
+        .replaceFirst(MINIKUBE_PREFIX, "")
+      MinikubeStatus.unapply(finalStatusString)
         .getOrElse(throw new IllegalStateException(s"Unknown status $statusString"))
+    }
   }
 
   def getKubernetesClient: DefaultKubernetesClient = {
@@ -52,13 +66,43 @@ private[spark] object Minikube extends Logging {
     val kubernetesConf = new ConfigBuilder()
       .withApiVersion("v1")
       .withMasterUrl(kubernetesMaster)
-      .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath)
-      .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath)
-      .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath)
+      .withCaCertFile(Paths.get(userHome, MINIKUBE_PATH, "ca.crt").toFile.getAbsolutePath)
+      .withClientCertFile(Paths.get(userHome, MINIKUBE_PATH, "apiserver.crt").toFile.getAbsolutePath)
+      .withClientKeyFile(Paths.get(userHome, MINIKUBE_PATH, "apiserver.key").toFile.getAbsolutePath)
       .build()
     new DefaultKubernetesClient(kubernetesConf)
   }
 
+  // Covers minikube status output after Minikube V0.30.
+  private def getIfNewMinikubeStatus(statusString: Seq[String]): MinikubeStatus.Value = {
+    val hostString = statusString.find(_.contains(s"$HOST_PREFIX "))
+    val kubeletString = statusString.find(_.contains(s"$KUBELET_PREFIX "))
+    val apiserverString = statusString.find(_.contains(s"$APISERVER_PREFIX "))
+    val kubectlString = statusString.find(_.contains(s"$KUBECTL_PREFIX "))
+
+    if (hostString.isEmpty || kubeletString.isEmpty
+      || apiserverString.isEmpty || kubectlString.isEmpty) {
+      MinikubeStatus.NONE
+    } else {
+      val status1 = hostString.get.replaceFirst(s"$HOST_PREFIX ", "")
+      val status2 = kubeletString.get.replaceFirst(s"$KUBELET_PREFIX ", "")
+      val status3 = apiserverString.get.replaceFirst(s"$APISERVER_PREFIX ", "")
+      val status4 = kubectlString.get.replaceFirst(s"$KUBECTL_PREFIX ", "")
+      if (!status4.contains("Correctly Configured:")) {
+        MinikubeStatus.NONE
+      } else {
+        val stats = List(status1, status2, status3)
+          .map(MinikubeStatus.unapply)
+          .map(_.getOrElse(throw new IllegalStateException(s"Unknown status $statusString")))
+        if (stats.exists(_ != MinikubeStatus.RUNNING)) {
+          MinikubeStatus.NONE
+        } else {
+          MinikubeStatus.RUNNING
+        }
+      }
+    }
+  }
+
   private def executeMinikube(action: String, args: String*): Seq[String] = {
     ProcessUtils.executeProcess(
       Array("bash", "-c", s"minikube $action") ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS)

From 755f9c20761e3db900c6c2b202cd3d2c5bbfb7c0 Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Mon, 4 Feb 2019 08:56:24 -0600
Subject: [PATCH 0430/1072] [SPARK-26813][BUILD] Consolidate java version
 across language compilers and build tools

## What changes were proposed in this pull request?
The java version here means versions of javac source, javac target, scalac target. They could be consolidated as a single version (currently 1.8)

|      |javac|scalac   |
|------|-----|---------|
|source|1.8  |2.12/2.11|
|target|1.8  |1.8      |

The current issues are as follows

* Maven build defines a single property (`java.version`) to specify java version while SBT build defines different properties for javac (`javacJVMVersion`) and scalac (`scalacJVMVersion`). SBT build should use a single property as Maven build does.
* Furthermore, it's better for SBT build to refer to `java.version` defined by Maven build. This is possible since we've already been using sbt-pom-reader.

## How was this patch tested?
Tested locally.

```
build/mvn clean compile
build/sbt clean compile
```

Closes #23724 from seancxmao/specify-java-version-once.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 project/SparkBuild.scala | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 65ba25c71941..659659f45be7 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -76,8 +76,7 @@ object BuildCommons {
 
   val testTempDir = s"$sparkHome/target/tmp"
 
-  val javacJVMVersion = settingKey[String]("source and target JVM version for javac")
-  val scalacJVMVersion = settingKey[String]("source and target JVM version for scalac")
+  val javaVersion = settingKey[String]("source and target JVM version for javac and scalac")
 }
 
 object SparkBuild extends PomBuild {
@@ -240,23 +239,22 @@ object SparkBuild extends PomBuild {
       if (major >= 8) Seq("-Xdoclint:all", "-Xdoclint:-missing") else Seq.empty
     },
 
-    javacJVMVersion := "1.8",
-    scalacJVMVersion := "1.8",
+    javaVersion := SbtPomKeys.effectivePom.value.getProperties.get("java.version").asInstanceOf[String],
 
     javacOptions in Compile ++= Seq(
       "-encoding", "UTF-8",
-      "-source", javacJVMVersion.value
+      "-source", javaVersion.value
     ),
     // This -target and Xlint:unchecked options cannot be set in the Compile configuration scope since
     // `javadoc` doesn't play nicely with them; see https://github.com/sbt/sbt/issues/355#issuecomment-3817629
     // for additional discussion and explanation.
     javacOptions in (Compile, compile) ++= Seq(
-      "-target", javacJVMVersion.value,
+      "-target", javaVersion.value,
       "-Xlint:unchecked"
     ),
 
     scalacOptions in Compile ++= Seq(
-      s"-target:jvm-${scalacJVMVersion.value}",
+      s"-target:jvm-${javaVersion.value}",
       "-sourcepath", (baseDirectory in ThisBuild).value.getAbsolutePath  // Required for relative source links in scaladoc
     ),
 

From 1dd7419702c5bc7e36fee9fa1eec06b66f25806e Mon Sep 17 00:00:00 2001
From: sandeep-katta <sandeep.katta2007@gmail.com>
Date: Mon, 4 Feb 2019 20:13:22 -0800
Subject: [PATCH 0431/1072] [SPARK-26758][CORE] Idle Executors are not getting
 killed after spark.dynamiAllocation.executorIdleTimeout value

## What changes were proposed in this pull request?

**updateAndSyncNumExecutorsTarget**  API should be called after **initializing** flag is unset
## How was this patch tested?
Added UT and also manually tested

After Fix
![afterfix](https://user-images.githubusercontent.com/35216143/51983136-ed4a5000-24bd-11e9-90c8-c4a562c17a4b.png)

Closes #23697 from sandeep-katta/executorIssue.

Authored-by: sandeep-katta <sandeep.katta2007@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ExecutorAllocationManager.scala     |  4 +--
 .../ExecutorAllocationManagerSuite.scala      | 26 ++++++++++++++-----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index c9da30efebed..99f4d11e6368 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -312,8 +312,6 @@ private[spark] class ExecutorAllocationManager(
   private def schedule(): Unit = synchronized {
     val now = clock.getTimeMillis
 
-    updateAndSyncNumExecutorsTarget(now)
-
     val executorIdsToBeRemoved = ArrayBuffer[String]()
     removeTimes.retain { case (executorId, expireTime) =>
       val expired = now >= expireTime
@@ -323,6 +321,8 @@ private[spark] class ExecutorAllocationManager(
       }
       !expired
     }
+    // Update executor target number only after initializing flag is unset
+    updateAndSyncNumExecutorsTarget(now)
     if (executorIdsToBeRemoved.nonEmpty) {
       removeExecutors(executorIdsToBeRemoved)
     }
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index ce0ef2e94695..5500329b13a7 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -936,12 +936,7 @@ class ExecutorAllocationManagerSuite
 
     assert(maxNumExecutorsNeeded(manager) === 0)
     schedule(manager)
-    // Verify executor is timeout but numExecutorsTarget is not recalculated
-    assert(numExecutorsTarget(manager) === 3)
-
-    // Schedule again to recalculate the numExecutorsTarget after executor is timeout
-    schedule(manager)
-    // Verify that current number of executors should be ramp down when executor is timeout
+    // Verify executor is timeout,numExecutorsTarget is recalculated
     assert(numExecutorsTarget(manager) === 2)
   }
 
@@ -1148,6 +1143,25 @@ class ExecutorAllocationManagerSuite
     verify(mockAllocationClient).killExecutors(Seq("executor-1"), false, false, false)
   }
 
+  test("SPARK-26758 check executor target number after idle time out ") {
+    sc = createSparkContext(1, 5, 3)
+    val manager = sc.executorAllocationManager.get
+    val clock = new ManualClock(10000L)
+    manager.setClock(clock)
+    assert(numExecutorsTarget(manager) === 3)
+    manager.listener.onExecutorAdded(SparkListenerExecutorAdded(
+      clock.getTimeMillis(), "executor-1", new ExecutorInfo("host1", 1, Map.empty)))
+    manager.listener.onExecutorAdded(SparkListenerExecutorAdded(
+      clock.getTimeMillis(), "executor-2", new ExecutorInfo("host1", 2, Map.empty)))
+    manager.listener.onExecutorAdded(SparkListenerExecutorAdded(
+      clock.getTimeMillis(), "executor-3", new ExecutorInfo("host1", 3, Map.empty)))
+    // make all the executors as idle, so that it will be killed
+    clock.advance(executorIdleTimeout * 1000)
+    schedule(manager)
+    // once the schedule is run target executor number should be 1
+    assert(numExecutorsTarget(manager) === 1)
+  }
+
   private def createSparkContext(
       minExecutors: Int = 1,
       maxExecutors: Int = 5,

From 13c56346f42379a92c493ec488fe30871fbc839c Mon Sep 17 00:00:00 2001
From: schintap <schintap@oath.com>
Date: Tue, 5 Feb 2019 10:43:43 -0800
Subject: [PATCH 0432/1072] [SPARK-25692][CORE] Remove static initialization of
 worker eventLoop handling chunk fetch requests within TransportContext. This
 fixes ChunkFetchIntegrationSuite as well

## What changes were proposed in this pull request?

How to reproduce
./build/mvn test -Dtest=org.apache.spark.network.RequestTimeoutIntegrationSuite,org.apache.spark.network.ChunkFetchIntegrationSuite -DwildcardSuites=None test
furtherRequestsDelay Test within RequestTimeoutIntegrationSuite was holding onto buffer references within worker threads. The test does close the server context but since the threads are global and there is sleep of 60 secs to fetch a specific chunk within this test, it grabs on it and waits for the client to consume but however the test is testing for a request timeout and it times out after 10 secs, so the workers are just waiting there for the buffer to be consumed by client as per my understanding.

This tends to happen if you dont have enough IO threads available on the specific system and also the order of the tests being run determines its flakyness like if ChunkFetchIntegrationSuite runs first then there is no issue. For example on mac with 8 threads these tests run fine but on my vm with 4 threads it fails. It matches the number of fetch calls in RequestTimeoutIntegrationSuite.

So do we really need it to be static?

I dont think this requires a global declaration as these threads are only required on the shuffle server end and on the client TransportContext initialization i.e the Client don't initialize these threads. The Shuffle Server initializes one TransportContext object. So, I think this is fine to be an instance variable and I see no harm.

## How was this patch tested?
Integration tests, manual tests

Closes #23700 from redsanket/SPARK-25692.

Authored-by: schintap <schintap@oath.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/network/TransportContext.java       | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
index 1a3f3f2a6f24..0bc5dd5402d5 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -88,7 +88,7 @@ public class TransportContext {
   // Separate thread pool for handling ChunkFetchRequest. This helps to enable throttling
   // max number of TransportServer worker threads that are blocked on writing response
   // of ChunkFetchRequest message back to the client via the underlying channel.
-  private static EventLoopGroup chunkFetchWorkers;
+  private final EventLoopGroup chunkFetchWorkers;
 
   public TransportContext(TransportConf conf, RpcHandler rpcHandler) {
     this(conf, rpcHandler, false, false);
@@ -122,16 +122,15 @@ public TransportContext(
     this.closeIdleConnections = closeIdleConnections;
     this.isClientOnly = isClientOnly;
 
-    synchronized(TransportContext.class) {
-      if (chunkFetchWorkers == null &&
-          conf.getModuleName() != null &&
-          conf.getModuleName().equalsIgnoreCase("shuffle") &&
-          !isClientOnly) {
-        chunkFetchWorkers = NettyUtils.createEventLoop(
-            IOMode.valueOf(conf.ioMode()),
-            conf.chunkFetchHandlerThreads(),
-            "shuffle-chunk-fetch-handler");
-      }
+    if (conf.getModuleName() != null &&
+        conf.getModuleName().equalsIgnoreCase("shuffle") &&
+        !isClientOnly) {
+      chunkFetchWorkers = NettyUtils.createEventLoop(
+          IOMode.valueOf(conf.ioMode()),
+          conf.chunkFetchHandlerThreads(),
+          "shuffle-chunk-fetch-handler");
+    } else {
+      chunkFetchWorkers = null;
     }
   }
 

From 32ec528e63cb768f85644282978040157c3c2fb7 Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Tue, 5 Feb 2019 10:47:21 -0800
Subject: [PATCH 0433/1072] [SPARK-26768][CORE] Remove useless code in
 BlockManager

## What changes were proposed in this pull request?

Recently, when I was reading some code of `BlockManager.getBlockData`, I found that there are useless code that would never reach. The related codes is as below:

```
override def getBlockData(blockId: BlockId): ManagedBuffer = {
  if (blockId.isShuffle) {
    shuffleManager.shuffleBlockResolver.getBlockData(blockId.asInstanceOf[ShuffleBlockId])
  } else {
    getLocalBytes(blockId) match {
      case Some(blockData) =>
        new BlockManagerManagedBuffer(blockInfoManager, blockId, blockData, true)
      case None =>
        // If this block manager receives a request for a block that it doesn't have then it's
        // likely that the master has outdated block statuses for this block. Therefore, we send
        // an RPC so that this block is marked as being unavailable from this block manager.
        reportBlockStatus(blockId, BlockStatus.empty)
        throw new BlockNotFoundException(blockId.toString)
    }
  }
}
```
```
def getLocalBytes(blockId: BlockId): Option[BlockData] = {
  logDebug(s"Getting local block $blockId as bytes")
  // As an optimization for map output fetches, if the block is for a shuffle, return it
  // without acquiring a lock; the disk store never deletes (recent) items so this should work
  if (blockId.isShuffle) {
    val shuffleBlockResolver = shuffleManager.shuffleBlockResolver
    // TODO: This should gracefully handle case where local block is not available. Currently
    // downstream code will throw an exception.
    val buf = new ChunkedByteBuffer(
      shuffleBlockResolver.getBlockData(blockId.asInstanceOf[ShuffleBlockId]).nioByteBuffer())
    Some(new ByteBufferBlockData(buf, true))
  } else {
    blockInfoManager.lockForReading(blockId).map { info => doGetLocalBytes(blockId, info) }
  }
}
```
the `blockId.isShuffle` is checked twice, but however it seems that in the method calling hierarchy of `BlockManager.getLocalBytes`, the another callsite of the `BlockManager.getLocalBytes` is at `TorrentBroadcast.readBlocks` where the blockId can never be a `ShuffleBlockId`.

![image](https://user-images.githubusercontent.com/6747355/51963980-1fe55000-24a0-11e9-961a-e10fe67f8119.png)

So I think we should remove these useless code for easy reading.

## How was this patch tested?

NA

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23693 from liupc/Remove-useless-code-in-BlockManager.

Authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/storage/BlockManager.scala    | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index e02870cd0194..497a5f7c8afe 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -632,18 +632,8 @@ private[spark] class BlockManager(
    */
   def getLocalBytes(blockId: BlockId): Option[BlockData] = {
     logDebug(s"Getting local block $blockId as bytes")
-    // As an optimization for map output fetches, if the block is for a shuffle, return it
-    // without acquiring a lock; the disk store never deletes (recent) items so this should work
-    if (blockId.isShuffle) {
-      val shuffleBlockResolver = shuffleManager.shuffleBlockResolver
-      // TODO: This should gracefully handle case where local block is not available. Currently
-      // downstream code will throw an exception.
-      val buf = new ChunkedByteBuffer(
-        shuffleBlockResolver.getBlockData(blockId.asInstanceOf[ShuffleBlockId]).nioByteBuffer())
-      Some(new ByteBufferBlockData(buf, true))
-    } else {
-      blockInfoManager.lockForReading(blockId).map { info => doGetLocalBytes(blockId, info) }
-    }
+    assert(!blockId.isShuffle, s"Unexpected ShuffleBlockId $blockId")
+    blockInfoManager.lockForReading(blockId).map { info => doGetLocalBytes(blockId, info) }
   }
 
   /**

From c624f5d683dbe1338932f06cdcbe1ff172612dca Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 5 Feb 2019 16:00:18 -0800
Subject: [PATCH 0434/1072] [SPARK-26733][K8S] Cleanup entrypoint.sh.

Merge both case statements, and remove unused variables that
are not set by the Scala code anymore.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #23655 from vanzin/SPARK-26733.
---
 .../src/main/dockerfiles/spark/entrypoint.sh  | 32 ++++---------------
 1 file changed, 6 insertions(+), 26 deletions(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index 2d770075a074..613febca7e6d 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -36,19 +36,6 @@ if [ -z "$uidentry" ] ; then
     fi
 fi
 
-SPARK_K8S_CMD="$1"
-case "$SPARK_K8S_CMD" in
-    driver | driver-py | driver-r | executor)
-      shift 1
-      ;;
-    "")
-      ;;
-    *)
-      echo "Non-spark-on-k8s command provided, proceeding in pass-through mode..."
-      exec /sbin/tini -s -- "$@"
-      ;;
-esac
-
 SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
 env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
 readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt
@@ -61,16 +48,6 @@ if [ -n "$PYSPARK_FILES" ]; then
     PYTHONPATH="$PYTHONPATH:$PYSPARK_FILES"
 fi
 
-PYSPARK_ARGS=""
-if [ -n "$PYSPARK_APP_ARGS" ]; then
-    PYSPARK_ARGS="$PYSPARK_APP_ARGS"
-fi
-
-R_ARGS=""
-if [ -n "$R_APP_ARGS" ]; then
-    R_ARGS="$R_APP_ARGS"
-fi
-
 if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then
     pyv="$(python -V 2>&1)"
     export PYTHON_VERSION="${pyv:7}"
@@ -87,8 +64,9 @@ if ! [ -z ${HADOOP_CONF_DIR+x} ]; then
   SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
 fi
 
-case "$SPARK_K8S_CMD" in
+case "$1" in
   driver)
+    shift 1
     CMD=(
       "$SPARK_HOME/bin/spark-submit"
       --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
@@ -97,6 +75,7 @@ case "$SPARK_K8S_CMD" in
     )
     ;;
   executor)
+    shift 1
     CMD=(
       ${JAVA_HOME}/bin/java
       "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
@@ -113,8 +92,9 @@ case "$SPARK_K8S_CMD" in
     ;;
 
   *)
-    echo "Unknown command: $SPARK_K8S_CMD" 1>&2
-    exit 1
+    echo "Non-spark-on-k8s command provided, proceeding in pass-through mode..."
+    CMD=("$@")
+    ;;
 esac
 
 # Execute the container CMD under tini for better hygiene

From 63bced9375ec1ec6ded220d768cd746050861a09 Mon Sep 17 00:00:00 2001
From: Branden Smith <branden.smith@publicismedia.com>
Date: Wed, 6 Feb 2019 13:55:19 +0800
Subject: [PATCH 0435/1072] [SPARK-26745][SQL][TESTS] JsonSuite test case:
 empty line -> 0 record count

## What changes were proposed in this pull request?

This PR consists of the `test` components of #23665 only, minus the associated patch from that PR.

It adds a new unit test to `JsonSuite` which verifies that the `count()` returned from a `DataFrame` loaded from JSON containing empty lines does not include those empty lines in the record count. The test runs `count` prior to otherwise reading data from the `DataFrame`, so as to catch future cases where a pre-parsing optimization might result in `count` results inconsistent with existing behavior.

This PR is intended to be deployed alongside #23667; `master` currently causes the test to fail, as described in [SPARK-26745](https://issues.apache.org/jira/browse/SPARK-26745).

## How was this patch tested?

Manual testing, existing `JsonSuite` unit tests.

Closes #23674 from sumitsu/json_emptyline_count_test.

Authored-by: Branden Smith <branden.smith@publicismedia.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/execution/datasources/json/JsonSuite.scala   | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 49dd9c22e831..69761775e3d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2426,6 +2426,18 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     countForMalformedJSON(0, Seq(""))
   }
 
+  test("SPARK-26745: count() for non-multiline input with empty lines") {
+    withTempPath { tempPath =>
+      val path = tempPath.getCanonicalPath
+      Seq("""{ "a" : 1 }""", "", """     { "a" : 2 }""", " \t ")
+        .toDS()
+        .repartition(1)
+        .write
+        .text(path)
+      assert(spark.read.json(path).count() === 2)
+    }
+  }
+
   test("SPARK-25040: empty strings should be disallowed") {
     def failedOnEmptyString(dataType: DataType): Unit = {
        val df = spark.read.schema(s"a ${dataType.catalogString}")

From 8427e9ba5cae28233d1bdc54208b46889b83a821 Mon Sep 17 00:00:00 2001
From: Ross Lodge <rlodge@concentricsky.com>
Date: Wed, 6 Feb 2019 08:43:40 -0800
Subject: [PATCH 0436/1072] [SPARK-26734][STREAMING] Fix StackOverflowError
 with large block queue

## What changes were proposed in this pull request?

SPARK-23991 introduced a bug in `ReceivedBlockTracker#allocateBlocksToBatch`: when a queue with more than a few thousand blocks are in the queue, serializing the queue throws a StackOverflowError.  This change just adds `dequeueAll` to the new `clone` operation on the queue so that the fix in 23991 is preserved but the serialized data comes from an ArrayBuffer which doesn't have the serialization problems that mutable.Queue has.

## How was this patch tested?

A unit test was added.

Closes #23716 from rlodge/SPARK-26734.

Authored-by: Ross Lodge <rlodge@concentricsky.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scheduler/ReceivedBlockTracker.scala      |  6 ++++-
 .../streaming/ReceivedBlockTrackerSuite.scala | 25 +++++++++++++++++--
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
index cf4324578ea8..a9763cfe0453 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -111,8 +111,12 @@ private[streaming] class ReceivedBlockTracker(
    */
   def allocateBlocksToBatch(batchTime: Time): Unit = synchronized {
     if (lastAllocatedBatchTime == null || batchTime > lastAllocatedBatchTime) {
+      // We explicitly create an ArrayBuffer here because at least as of Scala 2.11 and 2.12
+      // a mutable.Queue fails serialization with a StackOverflow error if it has more than
+      // a few thousand elements.  So we explicitly allocate a collection for serialization which
+      // we know doesn't have this issue.  (See SPARK-26734).
       val streamIdToBlocks = streamIds.map { streamId =>
-        (streamId, getReceivedBlockQueue(streamId).clone())
+        (streamId, mutable.ArrayBuffer(getReceivedBlockQueue(streamId).clone(): _*))
       }.toMap
       val allocatedBlocks = AllocatedBlocks(streamIdToBlocks)
       if (writeToLog(BatchAllocationEvent(batchTime, allocatedBlocks))) {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
index bdaef9494915..8800f1c91b20 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -96,6 +96,27 @@ class ReceivedBlockTrackerSuite
     receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual blockInfos
   }
 
+  test("block addition, and block to batch allocation with many blocks") {
+    val receivedBlockTracker = createTracker()
+    receivedBlockTracker.isWriteAheadLogEnabled should be (true)
+
+    val blockInfos = generateBlockInfos(100000)
+    blockInfos.map(receivedBlockTracker.addBlock)
+    receivedBlockTracker.allocateBlocksToBatch(1)
+
+    receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual Seq.empty
+    receivedBlockTracker.hasUnallocatedReceivedBlocks should be (false)
+    receivedBlockTracker.getBlocksOfBatch(1) shouldEqual Map(streamId -> blockInfos)
+    receivedBlockTracker.getBlocksOfBatchAndStream(1, streamId) shouldEqual blockInfos
+
+    val expectedWrittenData1 = blockInfos.map(BlockAdditionEvent) :+
+      BatchAllocationEvent(1, AllocatedBlocks(Map(streamId -> blockInfos)))
+    getWrittenLogData() shouldEqual expectedWrittenData1
+    getWriteAheadLogFiles() should have size 1
+
+    receivedBlockTracker.stop()
+  }
+
   test("recovery with write ahead logs should remove only allocated blocks from received queue") {
     val manualClock = new ManualClock
     val batchTime = manualClock.getTimeMillis()
@@ -362,8 +383,8 @@ class ReceivedBlockTrackerSuite
   }
 
   /** Generate blocks infos using random ids */
-  def generateBlockInfos(): Seq[ReceivedBlockInfo] = {
-    List.fill(5)(ReceivedBlockInfo(streamId, Some(0L), None,
+  def generateBlockInfos(blockCount: Int = 5): Seq[ReceivedBlockInfo] = {
+    List.fill(blockCount)(ReceivedBlockInfo(streamId, Some(0L), None,
       BlockManagerBasedStoreResult(StreamBlockId(streamId, math.abs(Random.nextInt)), Some(0L))))
   }
 

From c0811e8b4d11892f60b7032ba4c8e3adc40fe82f Mon Sep 17 00:00:00 2001
From: mwlon <mloncaric@hmc.edu>
Date: Thu, 7 Feb 2019 01:21:31 -0800
Subject: [PATCH 0437/1072] [SPARK-26082][MESOS] Fix mesos fetch cache config
 name

## What changes were proposed in this pull request?

* change MesosClusterScheduler to use correct argument name for Mesos fetch cache (spark.mesos.fetchCache.enable -> spark.mesos.fetcherCache.enable)

## How was this patch tested?

Not sure this requires a test, since it's just a string change.

Closes #23734 from mwlon/SPARK-26082.

Authored-by: mwlon <mloncaric@hmc.edu>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/scheduler/cluster/mesos/MesosClusterScheduler.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 8c961a541aa1..01a294478a5f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -130,7 +130,7 @@ private[spark] class MesosClusterScheduler(
   private val queuedCapacity = conf.getInt("spark.mesos.maxDrivers", 200)
   private val retainedDrivers = conf.getInt("spark.mesos.retainedDrivers", 200)
   private val maxRetryWaitTime = conf.getInt("spark.mesos.cluster.retry.wait.max", 60) // 1 minute
-  private val useFetchCache = conf.getBoolean("spark.mesos.fetchCache.enable", false)
+  private val useFetchCache = conf.getBoolean("spark.mesos.fetcherCache.enable", false)
   private val schedulerState = engineFactory.createEngine("scheduler")
   private val stateLock = new Object()
   // Keyed by submission id

From b4e1d145135445eeed85784dab0c2c088930dd26 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Thu, 7 Feb 2019 08:51:55 -0800
Subject: [PATCH 0438/1072] [SPARK-26082][MESOS][FOLLOWUP] Add UT on fetcher
 cache option on MesosClusterScheduler

## What changes were proposed in this pull request?

This patch adds UT on testing SPARK-26082 to avoid regression. While #23743 reduces the possibility to make a similar mistake, the needed lines of code for adding tests are not that huge, so I guess it might be worth to add them.

## How was this patch tested?

Newly added UTs. Test "supports setting fetcher cache" fails when #23743 is not applied and succeeds when #23743 is applied.

Closes #23744 from HeartSaVioR/SPARK-26082-add-unit-test.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../mesos/MesosClusterSchedulerSuite.scala    | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 7adac1964e01..ff63987487b1 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -255,6 +255,54 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     assert(networkInfos.get(0).getLabels.getLabels(1).getValue == "val2")
   }
 
+  test("supports setting fetcher cache") {
+    setScheduler(Map("spark.mesos.fetcherCache.enable" -> "true"))
+
+    val mem = 1000
+    val cpu = 1
+
+    val response = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", mem, cpu, true,
+        command,
+        Map("spark.mesos.executor.home" -> "test",
+          "spark.app.name" -> "test"),
+        "s1",
+        new Date()))
+
+    assert(response.success)
+
+    val offer = Utils.createOffer("o1", "s1", mem, cpu)
+    scheduler.resourceOffers(driver, List(offer).asJava)
+
+    val launchedTasks = Utils.verifyTaskLaunched(driver, "o1")
+    val uris = launchedTasks.head.getCommand.getUrisList
+    assert(uris.stream().allMatch(_.getCache))
+  }
+
+  test("supports disabling fetcher cache") {
+    setScheduler(Map("spark.mesos.fetcherCache.enable" -> "false"))
+
+    val mem = 1000
+    val cpu = 1
+
+    val response = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", mem, cpu, true,
+        command,
+        Map("spark.mesos.executor.home" -> "test",
+          "spark.app.name" -> "test"),
+        "s1",
+        new Date()))
+
+    assert(response.success)
+
+    val offer = Utils.createOffer("o1", "s1", mem, cpu)
+    scheduler.resourceOffers(driver, List(offer).asJava)
+
+    val launchedTasks = Utils.verifyTaskLaunched(driver, "o1")
+    val uris = launchedTasks.head.getCommand.getUrisList
+    assert(uris.stream().allMatch(!_.getCache))
+  }
+
   test("accept/decline offers with driver constraints") {
     setScheduler()
 

From d893e3c5d3875bcfe68d80d502d227a51a68203a Mon Sep 17 00:00:00 2001
From: Stefaan Lippens <soxofaan@gmail.com>
Date: Fri, 8 Feb 2019 10:43:17 +0800
Subject: [PATCH 0439/1072] [SPARK-26831][PYTHON] Eliminates Python version
 check for executor at driver side when using IPython

## What changes were proposed in this pull request?

I was trying out pyspark on a system with only a `python3` command but no `python` command and got this error:

```bash
/opt/spark/bin/pyspark: line 45: python: command not found
```

I also noticed the bash syntax for the IPython version check is wrong:  `[[ !  $WORKS_WITH_IPYTHON ]]`  always evaluates to false when `$WORKS_WITH_IPYTHON` is non-empty

This PR simply eliminates the Python version check for executor at driver side when using IPython.

## How was this patch tested?

I manually tested the `pyspark` launch script and bash syntax stuff

Closes #23736 from soxofaan/master.

Authored-by: Stefaan Lippens <soxofaan@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 bin/pyspark | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/bin/pyspark b/bin/pyspark
index 1dcddcc6196b..44891aee2e0a 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -38,22 +38,15 @@ if [[ -n "$IPYTHON" || -n "$IPYTHON_OPTS" ]]; then
 fi
 
 # Default to standard python interpreter unless told otherwise
-if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
-  PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"python"}"
-fi
-
-WORKS_WITH_IPYTHON=$(python -c 'import sys; print(sys.version_info >= (2, 7, 0))')
-
-# Determine the Python executable to use for the executors:
 if [[ -z "$PYSPARK_PYTHON" ]]; then
-  if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && ! $WORKS_WITH_IPYTHON ]]; then
-    echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
-    exit 1
-  else
-    PYSPARK_PYTHON=python
-  fi
+  PYSPARK_PYTHON=python
+fi
+if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+  PYSPARK_DRIVER_PYTHON=$PYSPARK_PYTHON
 fi
 export PYSPARK_PYTHON
+export PYSPARK_DRIVER_PYTHON
+export PYSPARK_DRIVER_PYTHON_OPTS
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
@@ -72,6 +65,4 @@ if [[ -n "$SPARK_TESTING" ]]; then
   exit
 fi
 
-export PYSPARK_DRIVER_PYTHON
-export PYSPARK_DRIVER_PYTHON_OPTS
 exec "${SPARK_HOME}"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"

From 91e64e24d54287b1e4564358a2ef2bc8c0e6a22b Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Fri, 8 Feb 2019 09:46:54 -0800
Subject: [PATCH 0440/1072] [SPARK-26185][PYTHON] add weightCol in python
 MulticlassClassificationEvaluator

## What changes were proposed in this pull request?

add weightCol for python version of MulticlassClassificationEvaluator and MulticlassMetrics

## How was this patch tested?

add doc test

Closes #23157 from huaxingao/spark-26185.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Holden Karau <holden@pigscanfly.ca>
---
 python/pyspark/ml/evaluation.py    | 23 +++++++++++-----
 python/pyspark/mllib/evaluation.py | 42 ++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index 8eaf07645a37..f563a2d4d283 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -22,7 +22,7 @@
 from pyspark.ml.wrapper import JavaParams
 from pyspark.ml.param import Param, Params, TypeConverters
 from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol, \
-    HasFeaturesCol
+    HasFeaturesCol, HasWeightCol
 from pyspark.ml.common import inherit_doc
 from pyspark.ml.util import JavaMLReadable, JavaMLWritable
 
@@ -257,7 +257,7 @@ def setParams(self, predictionCol="prediction", labelCol="label",
 
 
 @inherit_doc
-class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
+class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol,
                                         JavaMLReadable, JavaMLWritable):
     """
     .. note:: Experimental
@@ -279,6 +279,17 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     >>> evaluator2 = MulticlassClassificationEvaluator.load(mce_path)
     >>> str(evaluator2.getPredictionCol())
     'prediction'
+    >>> scoreAndLabelsAndWeight = [(0.0, 0.0, 1.0), (0.0, 1.0, 1.0), (0.0, 0.0, 1.0),
+    ...     (1.0, 0.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0),
+    ...     (2.0, 2.0, 1.0), (2.0, 0.0, 1.0)]
+    >>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["prediction", "label", "weight"])
+    ...
+    >>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction",
+    ...     weightCol="weight")
+    >>> evaluator.evaluate(dataset)
+    0.66...
+    >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
+    0.66...
 
     .. versionadded:: 1.5.0
     """
@@ -289,10 +300,10 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
 
     @keyword_only
     def __init__(self, predictionCol="prediction", labelCol="label",
-                 metricName="f1"):
+                 metricName="f1", weightCol=None):
         """
         __init__(self, predictionCol="prediction", labelCol="label", \
-                 metricName="f1")
+                 metricName="f1", weightCol=None)
         """
         super(MulticlassClassificationEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -318,10 +329,10 @@ def getMetricName(self):
     @keyword_only
     @since("1.5.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
-                  metricName="f1"):
+                  metricName="f1", weightCol=None):
         """
         setParams(self, predictionCol="prediction", labelCol="label", \
-                  metricName="f1")
+                  metricName="f1", weightCol=None)
         Sets params for multiclass classification evaluator.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 6ca6df672f30..b0283941171a 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -162,7 +162,7 @@ class MulticlassMetrics(JavaModelWrapper):
     """
     Evaluator for multiclass classification.
 
-    :param predictionAndLabels: an RDD of (prediction, label) pairs.
+    :param predAndLabelsWithOptWeight: an RDD of prediction, label and optional weight.
 
     >>> predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
     ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)])
@@ -191,16 +191,48 @@ class MulticlassMetrics(JavaModelWrapper):
     0.66...
     >>> metrics.weightedFMeasure(2.0)
     0.65...
+    >>> predAndLabelsWithOptWeight = sc.parallelize([(0.0, 0.0, 1.0), (0.0, 1.0, 1.0),
+    ...      (0.0, 0.0, 1.0), (1.0, 0.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0),
+    ...      (2.0, 2.0, 1.0), (2.0, 0.0, 1.0)])
+    >>> metrics = MulticlassMetrics(predAndLabelsWithOptWeight)
+    >>> metrics.confusionMatrix().toArray()
+    array([[ 2.,  1.,  1.],
+           [ 1.,  3.,  0.],
+           [ 0.,  0.,  1.]])
+    >>> metrics.falsePositiveRate(0.0)
+    0.2...
+    >>> metrics.precision(1.0)
+    0.75...
+    >>> metrics.recall(2.0)
+    1.0...
+    >>> metrics.fMeasure(0.0, 2.0)
+    0.52...
+    >>> metrics.accuracy
+    0.66...
+    >>> metrics.weightedFalsePositiveRate
+    0.19...
+    >>> metrics.weightedPrecision
+    0.68...
+    >>> metrics.weightedRecall
+    0.66...
+    >>> metrics.weightedFMeasure()
+    0.66...
+    >>> metrics.weightedFMeasure(2.0)
+    0.65...
 
     .. versionadded:: 1.4.0
     """
 
-    def __init__(self, predictionAndLabels):
-        sc = predictionAndLabels.ctx
+    def __init__(self, predAndLabelsWithOptWeight):
+        sc = predAndLabelsWithOptWeight.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
-        df = sql_ctx.createDataFrame(predictionAndLabels, schema=StructType([
+        numCol = len(predAndLabelsWithOptWeight.first())
+        schema = StructType([
             StructField("prediction", DoubleType(), nullable=False),
-            StructField("label", DoubleType(), nullable=False)]))
+            StructField("label", DoubleType(), nullable=False)])
+        if (numCol == 3):
+            schema.add("weight", DoubleType(), False)
+        df = sql_ctx.createDataFrame(predAndLabelsWithOptWeight, schema)
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.MulticlassMetrics
         java_model = java_class(df._jdf)
         super(MulticlassMetrics, self).__init__(java_model)

From 701b06a7e2e76e5d9ed020c62e0ed3464fa2818b Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 8 Feb 2019 10:22:51 -0800
Subject: [PATCH 0441/1072] [SPARK-26389][SS] Add force delete temp checkpoint
 configuration

## What changes were proposed in this pull request?

Not all users wants to keep temporary checkpoint directories. Additionally hard to restore from it.

In this PR I've added a force delete flag which is default `false`. Additionally not clear for users when temporary checkpoint directory deleted so added log messages to explain this a bit more.

## How was this patch tested?

Existing + additional unit tests.

Closes #23732 from gaborgsomogyi/SPARK-26389.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  6 ++++
 .../execution/streaming/StreamExecution.scala | 11 +++++--
 .../sql/streaming/StreamingQueryManager.scala |  8 +++--
 .../test/DataStreamReaderWriterSuite.scala    | 32 ++++++++++++++++++-
 4 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 11e1a5eefe93..d285e007dac1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -907,6 +907,12 @@ object SQLConf {
     .stringConf
     .createOptional
 
+  val FORCE_DELETE_TEMP_CHECKPOINT_LOCATION =
+    buildConf("spark.sql.streaming.forceDeleteTempCheckpointLocation")
+      .doc("When true, enable temporary checkpoint locations force delete.")
+      .booleanConf
+      .createWithDefault(false)
+
   val MIN_BATCHES_TO_RETAIN = buildConf("spark.sql.streaming.minBatchesToRetain")
     .internal()
     .doc("The minimum number of batches that must be retained and made recoverable.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 90f7b477103a..dc9ed80b6420 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -55,7 +55,8 @@ case object RECONFIGURING extends State
  * and the results are committed transactionally to the given [[Sink]].
  *
  * @param deleteCheckpointOnStop whether to delete the checkpoint if the query is stopped without
- *                               errors
+ *                               errors. Checkpoint deletion can be forced with the appropriate
+ *                               Spark configuration.
  */
 abstract class StreamExecution(
     override val sparkSession: SparkSession,
@@ -92,6 +93,7 @@ abstract class StreamExecution(
     fs.mkdirs(checkpointPath)
     checkpointPath.makeQualified(fs.getUri, fs.getWorkingDirectory).toUri.toString
   }
+  logInfo(s"Checkpoint root $checkpointRoot resolved to $resolvedCheckpointRoot.")
 
   def logicalPlan: LogicalPlan
 
@@ -335,10 +337,13 @@ abstract class StreamExecution(
         postEvent(
           new QueryTerminatedEvent(id, runId, exception.map(_.cause).map(Utils.exceptionString)))
 
-        // Delete the temp checkpoint only when the query didn't fail
-        if (deleteCheckpointOnStop && exception.isEmpty) {
+        // Delete the temp checkpoint when either force delete enabled or the query didn't fail
+        if (deleteCheckpointOnStop &&
+            (sparkSession.sessionState.conf
+              .getConf(SQLConf.FORCE_DELETE_TEMP_CHECKPOINT_LOCATION) || exception.isEmpty)) {
           val checkpointPath = new Path(resolvedCheckpointRoot)
           try {
+            logInfo(s"Deleting checkpoint $checkpointPath.")
             val fs = checkpointPath.getFileSystem(sparkSession.sessionState.newHadoopConf())
             fs.delete(checkpointPath, true)
           } catch {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 881cd96cc9dc..cb9ca4c59b8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -221,9 +221,13 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       }
     }.getOrElse {
       if (useTempCheckpointLocation) {
-        // Delete the temp checkpoint when a query is being stopped without errors.
         deleteCheckpointOnStop = true
-        Utils.createTempDir(namePrefix = s"temporary").getCanonicalPath
+        val tempDir = Utils.createTempDir(namePrefix = s"temporary").getCanonicalPath
+        logWarning("Temporary checkpoint location created which is deleted normally when" +
+          s" the query didn't fail: $tempDir. If it's required to delete it under any" +
+          s" circumstances, please set ${SQLConf.FORCE_DELETE_TEMP_CHECKPOINT_LOCATION.key} to" +
+          s" true. Important to know deleting temp checkpoint folder is best effort.")
+        tempDir
       } else {
         throw new AnalysisException(
           "checkpointLocation must be specified either " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 74ea0bfacba5..c3c7dcbaaece 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -614,6 +614,21 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("configured checkpoint dir should not be deleted if a query is stopped without errors and" +
+    " force temp checkpoint deletion enabled") {
+    import testImplicits._
+    withTempDir { checkpointPath =>
+      withSQLConf(SQLConf.CHECKPOINT_LOCATION.key -> checkpointPath.getAbsolutePath,
+        SQLConf.FORCE_DELETE_TEMP_CHECKPOINT_LOCATION.key -> "true") {
+        val ds = MemoryStream[Int].toDS
+        val query = ds.writeStream.format("console").start()
+        assert(checkpointPath.exists())
+        query.stop()
+        assert(checkpointPath.exists())
+      }
+    }
+  }
+
   test("temp checkpoint dir should be deleted if a query is stopped without errors") {
     import testImplicits._
     val query = MemoryStream[Int].toDS.writeStream.format("console").start()
@@ -627,6 +642,17 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
   }
 
   testQuietly("temp checkpoint dir should not be deleted if a query is stopped with an error") {
+    testTempCheckpointWithFailedQuery(false)
+  }
+
+  testQuietly("temp checkpoint should be deleted if a query is stopped with an error and force" +
+    " temp checkpoint deletion enabled") {
+    withSQLConf(SQLConf.FORCE_DELETE_TEMP_CHECKPOINT_LOCATION.key -> "true") {
+      testTempCheckpointWithFailedQuery(true)
+    }
+  }
+
+  private def testTempCheckpointWithFailedQuery(checkpointMustBeDeleted: Boolean): Unit = {
     import testImplicits._
     val input = MemoryStream[Int]
     val query = input.toDS.map(_ / 0).writeStream.format("console").start()
@@ -638,7 +664,11 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
     intercept[StreamingQueryException] {
       query.awaitTermination()
     }
-    assert(fs.exists(checkpointDir))
+    if (!checkpointMustBeDeleted) {
+      assert(fs.exists(checkpointDir))
+    } else {
+      assert(!fs.exists(checkpointDir))
+    }
   }
 
   test("SPARK-20431: Specify a schema by using a DDL-formatted string") {

From d0443a74d185ec72b747fa39994fa9a40ce974cf Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 8 Feb 2019 13:41:52 -0800
Subject: [PATCH 0442/1072] [SPARK-26766][CORE] Remove the list of filesystems
 from HadoopDelegationTokenProvider.obtainDelegationTokens

## What changes were proposed in this pull request?

Delegation token providers interface now has a parameter `fileSystems` but this is needed only for `HadoopFSDelegationTokenProvider`.

In this PR I've addressed this issue in the following way:
* Removed `fileSystems` parameter from `HadoopDelegationTokenProvider`
* Moved `YarnSparkHadoopUtil.hadoopFSsToAccess` into `HadoopFSDelegationTokenProvider`
* Moved `spark.yarn.stagingDir` into core
* Moved `spark.yarn.access.namenodes` into core and renamed to `spark.kerberos.access.namenodes`
* Moved `spark.yarn.access.hadoopFileSystems` into core and renamed to `spark.kerberos.access.hadoopFileSystems`

## How was this patch tested?

Existing unit tests.

Closes #23698 from gaborgsomogyi/SPARK-26766.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/SparkConf.scala    |   7 +-
 .../HBaseDelegationTokenProvider.scala        |   1 -
 .../HadoopDelegationTokenManager.scala        |  10 +-
 .../HadoopDelegationTokenProvider.scala       |   2 -
 .../HadoopFSDelegationTokenProvider.scala     |  45 +++++++-
 .../spark/internal/config/package.scala       |  13 +++
 .../org/apache/spark/SparkConfSuite.scala     |   6 +
 .../HadoopDelegationTokenManagerSuite.scala   |   2 -
 ...HadoopFSDelegationTokenProviderSuite.scala | 109 ++++++++++++++++++
 docs/running-on-yarn.md                       |  18 +--
 docs/security.md                              |  16 +++
 .../KafkaDelegationTokenProvider.scala        |   2 -
 .../deploy/yarn/YarnSparkHadoopUtil.scala     |  41 +------
 .../org/apache/spark/deploy/yarn/config.scala |  19 ---
 .../YARNHadoopDelegationTokenManager.scala    |   6 -
 .../yarn/YarnSparkHadoopUtilSuite.scala       |  66 -----------
 .../hive/thriftserver/SparkSQLCLIDriver.scala |   2 +-
 .../HiveDelegationTokenProvider.scala         |   1 -
 18 files changed, 195 insertions(+), 171 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 7d49f10d4482..e686e079d1a1 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -700,8 +700,6 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.akka.frameSize", "1.6")),
     "spark.yarn.jars" -> Seq(
       AlternateConfig("spark.yarn.jar", "2.0")),
-    "spark.yarn.access.hadoopFileSystems" -> Seq(
-      AlternateConfig("spark.yarn.access.namenodes", "2.2")),
     MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM.key -> Seq(
       AlternateConfig("spark.reducer.maxReqSizeShuffleToMem", "2.3")),
     LISTENER_BUS_EVENT_QUEUE_CAPACITY.key -> Seq(
@@ -715,7 +713,10 @@ private[spark] object SparkConf extends Logging {
     PRINCIPAL.key -> Seq(
       AlternateConfig("spark.yarn.principal", "3.0")),
     KERBEROS_RELOGIN_PERIOD.key -> Seq(
-      AlternateConfig("spark.yarn.kerberos.relogin.period", "3.0"))
+      AlternateConfig("spark.yarn.kerberos.relogin.period", "3.0")),
+    KERBEROS_FILESYSTEMS_TO_ACCESS.key -> Seq(
+      AlternateConfig("spark.yarn.access.namenodes", "2.2"),
+      AlternateConfig("spark.yarn.access.hadoopFileSystems", "3.0"))
   )
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
index e345b0bbffea..d53eb4efe529 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
@@ -39,7 +39,6 @@ private[security] class HBaseDelegationTokenProvider
   override def obtainDelegationTokens(
       hadoopConf: Configuration,
       sparkConf: SparkConf,
-      fileSystems: Set[FileSystem],
       creds: Credentials): Option[Long] = {
     try {
       val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 487291e17605..6a18a8dd33d1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -144,7 +144,7 @@ private[spark] class HadoopDelegationTokenManager(
   def obtainDelegationTokens(creds: Credentials): Long = {
     delegationTokenProviders.values.flatMap { provider =>
       if (provider.delegationTokensRequired(sparkConf, hadoopConf)) {
-        provider.obtainDelegationTokens(hadoopConf, sparkConf, fileSystemsToAccess(), creds)
+        provider.obtainDelegationTokens(hadoopConf, sparkConf, creds)
       } else {
         logDebug(s"Service ${provider.serviceName} does not require a token." +
           s" Check your configuration to see if security is disabled or not.")
@@ -181,14 +181,6 @@ private[spark] class HadoopDelegationTokenManager(
       .getOrElse(isEnabledDeprecated)
   }
 
-  /**
-   * List of file systems for which to obtain delegation tokens. The base implementation
-   * returns just the default file system in the given Hadoop configuration.
-   */
-  protected def fileSystemsToAccess(): Set[FileSystem] = {
-    Set(FileSystem.get(hadoopConf))
-  }
-
   private def scheduleRenewal(delay: Long): Unit = {
     val _delay = math.max(0, delay)
     logInfo(s"Scheduling renewal in ${UIUtils.formatDuration(delay)}.")
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
index cb4c97bdc4c1..3dc952d54e73 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
@@ -44,13 +44,11 @@ private[spark] trait HadoopDelegationTokenProvider {
    * Obtain delegation tokens for this service and get the time of the next renewal.
    * @param hadoopConf Configuration of current Hadoop Compatible system.
    * @param creds Credentials to add tokens and security keys to.
-   * @param fileSystems List of file systems for which to obtain delegation tokens.
    * @return If the returned tokens are renewable and can be renewed, return the time of the next
    *         renewal, otherwise None should be returned.
    */
   def obtainDelegationTokens(
     hadoopConf: Configuration,
     sparkConf: SparkConf,
-    fileSystems: Set[FileSystem],
     creds: Credentials): Option[Long]
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 3bc40de776ec..725eefbda897 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -22,7 +22,7 @@ import scala.util.Try
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.mapred.Master
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
@@ -44,9 +44,9 @@ private[deploy] class HadoopFSDelegationTokenProvider
   override def obtainDelegationTokens(
       hadoopConf: Configuration,
       sparkConf: SparkConf,
-      fileSystems: Set[FileSystem],
       creds: Credentials): Option[Long] = {
     try {
+      val fileSystems = HadoopFSDelegationTokenProvider.hadoopFSsToAccess(sparkConf, hadoopConf)
       val fetchCreds = fetchDelegationTokens(getTokenRenewer(hadoopConf), fileSystems, creds)
 
       // Get the token renewal interval if it is not set. It will only be called once.
@@ -133,3 +133,44 @@ private[deploy] class HadoopFSDelegationTokenProvider
     if (renewIntervals.isEmpty) None else Some(renewIntervals.min)
   }
 }
+
+private[deploy] object HadoopFSDelegationTokenProvider {
+  def hadoopFSsToAccess(
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): Set[FileSystem] = {
+    val filesystemsToAccess = sparkConf.get(KERBEROS_FILESYSTEMS_TO_ACCESS)
+
+    val defaultFS = FileSystem.get(hadoopConf)
+    val master = sparkConf.get("spark.master", null)
+    val stagingFS = if (master != null && master.contains("yarn")) {
+      sparkConf.get(STAGING_DIR).map(new Path(_).getFileSystem(hadoopConf))
+    } else {
+      None
+    }
+
+    // Add the list of available namenodes for all namespaces in HDFS federation.
+    // If ViewFS is enabled, this is skipped as ViewFS already handles delegation tokens for its
+    // namespaces.
+    val hadoopFilesystems = if (!filesystemsToAccess.isEmpty || defaultFS.getScheme == "viewfs" ||
+      (stagingFS.isDefined && stagingFS.get.getScheme == "viewfs")) {
+      filesystemsToAccess.map(new Path(_).getFileSystem(hadoopConf)).toSet
+    } else {
+      val nameservices = hadoopConf.getTrimmedStrings("dfs.nameservices")
+      // Retrieving the filesystem for the nameservices where HA is not enabled
+      val filesystemsWithoutHA = nameservices.flatMap { ns =>
+        Option(hadoopConf.get(s"dfs.namenode.rpc-address.$ns")).map { nameNode =>
+          new Path(s"hdfs://$nameNode").getFileSystem(hadoopConf)
+        }
+      }
+      // Retrieving the filesystem for the nameservices where HA is enabled
+      val filesystemsWithHA = nameservices.flatMap { ns =>
+        Option(hadoopConf.get(s"dfs.ha.namenodes.$ns")).map { _ =>
+          new Path(s"hdfs://$ns").getFileSystem(hadoopConf)
+        }
+      }
+      (filesystemsWithoutHA ++ filesystemsWithHA).toSet
+    }
+
+    hadoopFilesystems ++ stagingFS + defaultFS
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 66b1ff76446a..3ef6cba8193f 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -364,6 +364,14 @@ package object config {
       .checkValues(Set("keytab", "ccache"))
       .createWithDefault("keytab")
 
+  private[spark] val KERBEROS_FILESYSTEMS_TO_ACCESS =
+    ConfigBuilder("spark.kerberos.access.hadoopFileSystems")
+    .doc("Extra Hadoop filesystem URLs for which to request delegation tokens. The filesystem " +
+      "that hosts fs.defaultFS does not need to be listed here.")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
   private[spark] val EXECUTOR_INSTANCES = ConfigBuilder("spark.executor.instances")
     .intConf
     .createOptional
@@ -1253,4 +1261,9 @@ package object config {
     ConfigBuilder("spark.speculation.quantile")
       .doubleConf
       .createWithDefault(0.75)
+
+  private[spark] val STAGING_DIR = ConfigBuilder("spark.yarn.stagingDir")
+    .doc("Staging directory used while submitting applications.")
+    .stringConf
+    .createOptional
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 079170d47d0b..c187bcb57701 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -264,6 +264,12 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
 
     conf.set("spark.scheduler.listenerbus.eventqueue.size", "84")
     assert(conf.get(LISTENER_BUS_EVENT_QUEUE_CAPACITY) === 84)
+
+    conf.set("spark.yarn.access.namenodes", "testNode")
+    assert(conf.get(KERBEROS_FILESYSTEMS_TO_ACCESS) === Array("testNode"))
+
+    conf.set("spark.yarn.access.hadoopFileSystems", "testNode")
+    assert(conf.get(KERBEROS_FILESYSTEMS_TO_ACCESS) === Array("testNode"))
   }
 
   test("akka deprecated configs") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
index 9cc4a914b90e..2f36dba05c64 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.deploy.security
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.Credentials
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -36,7 +35,6 @@ private class ExceptionThrowingDelegationTokenProvider extends HadoopDelegationT
   override def obtainDelegationTokens(
     hadoopConf: Configuration,
     sparkConf: SparkConf,
-    fileSystems: Set[FileSystem],
     creds: Credentials): Option[Long] = throw new IllegalArgumentException
 }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala
new file mode 100644
index 000000000000..0f1a9168e254
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.scalatest.Matchers
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.STAGING_DIR
+
+class HadoopFSDelegationTokenProviderSuite extends SparkFunSuite with Matchers {
+  test("hadoopFSsToAccess should return defaultFS even if not configured") {
+    val sparkConf = new SparkConf()
+    val defaultFS = "hdfs://localhost:8020"
+    val statingDir = "hdfs://localhost:8021"
+    sparkConf.set("spark.master", "yarn-client")
+    sparkConf.set(STAGING_DIR, statingDir)
+    val hadoopConf = new Configuration()
+    hadoopConf.set("fs.defaultFS", defaultFS)
+    val expected = Set(
+      new Path(defaultFS).getFileSystem(hadoopConf),
+      new Path(statingDir).getFileSystem(hadoopConf)
+    )
+    val result = HadoopFSDelegationTokenProvider.hadoopFSsToAccess(sparkConf, hadoopConf)
+    result should be (expected)
+  }
+
+  test("SPARK-24149: retrieve all namenodes from HDFS") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.master", "yarn-client")
+    val basicFederationConf = new Configuration()
+    basicFederationConf.set("fs.defaultFS", "hdfs://localhost:8020")
+    basicFederationConf.set("dfs.nameservices", "ns1,ns2")
+    basicFederationConf.set("dfs.namenode.rpc-address.ns1", "localhost:8020")
+    basicFederationConf.set("dfs.namenode.rpc-address.ns2", "localhost:8021")
+    val basicFederationExpected = Set(
+      new Path("hdfs://localhost:8020").getFileSystem(basicFederationConf),
+      new Path("hdfs://localhost:8021").getFileSystem(basicFederationConf))
+    val basicFederationResult = HadoopFSDelegationTokenProvider.hadoopFSsToAccess(
+      sparkConf, basicFederationConf)
+    basicFederationResult should be (basicFederationExpected)
+
+    // when viewfs is enabled, namespaces are handled by it, so we don't need to take care of them
+    val viewFsConf = new Configuration()
+    viewFsConf.addResource(basicFederationConf)
+    viewFsConf.set("fs.defaultFS", "viewfs://clusterX/")
+    viewFsConf.set("fs.viewfs.mounttable.clusterX.link./home", "hdfs://localhost:8020/")
+    val viewFsExpected = Set(new Path("viewfs://clusterX/").getFileSystem(viewFsConf))
+    HadoopFSDelegationTokenProvider
+      .hadoopFSsToAccess(sparkConf, viewFsConf) should be (viewFsExpected)
+
+    // invalid config should not throw NullPointerException
+    val invalidFederationConf = new Configuration()
+    invalidFederationConf.addResource(basicFederationConf)
+    invalidFederationConf.unset("dfs.namenode.rpc-address.ns2")
+    val invalidFederationExpected = Set(
+      new Path("hdfs://localhost:8020").getFileSystem(invalidFederationConf))
+    val invalidFederationResult = HadoopFSDelegationTokenProvider.hadoopFSsToAccess(
+      sparkConf, invalidFederationConf)
+    invalidFederationResult should be (invalidFederationExpected)
+
+    // no namespaces defined, ie. old case
+    val noFederationConf = new Configuration()
+    noFederationConf.set("fs.defaultFS", "hdfs://localhost:8020")
+    val noFederationExpected = Set(
+      new Path("hdfs://localhost:8020").getFileSystem(noFederationConf))
+    val noFederationResult = HadoopFSDelegationTokenProvider.hadoopFSsToAccess(sparkConf,
+      noFederationConf)
+    noFederationResult should be (noFederationExpected)
+
+    // federation and HA enabled
+    val federationAndHAConf = new Configuration()
+    federationAndHAConf.set("fs.defaultFS", "hdfs://clusterXHA")
+    federationAndHAConf.set("dfs.nameservices", "clusterXHA,clusterYHA")
+    federationAndHAConf.set("dfs.ha.namenodes.clusterXHA", "x-nn1,x-nn2")
+    federationAndHAConf.set("dfs.ha.namenodes.clusterYHA", "y-nn1,y-nn2")
+    federationAndHAConf.set("dfs.namenode.rpc-address.clusterXHA.x-nn1", "localhost:8020")
+    federationAndHAConf.set("dfs.namenode.rpc-address.clusterXHA.x-nn2", "localhost:8021")
+    federationAndHAConf.set("dfs.namenode.rpc-address.clusterYHA.y-nn1", "localhost:8022")
+    federationAndHAConf.set("dfs.namenode.rpc-address.clusterYHA.y-nn2", "localhost:8023")
+    federationAndHAConf.set("dfs.client.failover.proxy.provider.clusterXHA",
+      "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider")
+    federationAndHAConf.set("dfs.client.failover.proxy.provider.clusterYHA",
+      "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider")
+
+    val federationAndHAExpected = Set(
+      new Path("hdfs://clusterXHA").getFileSystem(federationAndHAConf),
+      new Path("hdfs://clusterYHA").getFileSystem(federationAndHAConf))
+    val federationAndHAResult = HadoopFSDelegationTokenProvider.hadoopFSsToAccess(
+      sparkConf, federationAndHAConf)
+    federationAndHAResult should be (federationAndHAExpected)
+  }
+}
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index c3ebadb5c9cb..51a13d342a67 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -520,10 +520,6 @@ for:
   filesystem if `spark.yarn.stagingDir` is not set);
 - if Hadoop federation is enabled, all the federated filesystems in the configuration.
 
-If an application needs to interact with other secure Hadoop filesystems, their URIs need to be
-explicitly provided to Spark at launch time. This is done by listing them in the
-`spark.yarn.access.hadoopFileSystems` property, described in the configuration section below.
-
 The YARN integration also supports custom delegation token providers using the Java Services
 mechanism (see `java.util.ServiceLoader`). Implementations of
 `org.apache.spark.deploy.yarn.security.ServiceCredentialProvider` can be made available to Spark
@@ -557,18 +553,6 @@ providers can be disabled individually by setting `spark.security.credentials.{s
   <br /> (Works also with the "local" master.)
   </td>
 </tr>
-<tr>
-  <td><code>spark.yarn.access.hadoopFileSystems</code></td>
-  <td>(none)</td>
-  <td>
-    A comma-separated list of secure Hadoop filesystems your Spark application is going to access. For
-    example, <code>spark.yarn.access.hadoopFileSystems=hdfs://nn1.com:8032,hdfs://nn2.com:8032,
-    webhdfs://nn3.com:50070</code>. The Spark application must have access to the filesystems listed
-    and Kerberos must be properly configured to be able to access them (either in the same realm
-    or in a trusted realm). Spark acquires security tokens for each of the filesystems so that
-    the Spark application can access those remote Hadoop filesystems.
-  </td>
-</tr>
 <tr>
   <td><code>spark.yarn.kerberos.relogin.period</code></td>
   <td>1m</td>
@@ -674,7 +658,7 @@ spark.security.credentials.hive.enabled   false
 spark.security.credentials.hbase.enabled  false
 ```
 
-The configuration option `spark.yarn.access.hadoopFileSystems` must be unset.
+The configuration option `spark.kerberos.access.hadoopFileSystems` must be unset.
 
 # Using the Spark History Server to replace the Spark Web UI
 
diff --git a/docs/security.md b/docs/security.md
index a1dc584705a4..d2cff41eb0f7 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -752,6 +752,10 @@ configuration has Kerberos authentication turned (`hbase.security.authentication
 Similarly, a Hive token will be obtained if Hive is in the classpath, and the configuration includes
 URIs for remote metastore services (`hive.metastore.uris` is not empty).
 
+If an application needs to interact with other secure Hadoop filesystems, their URIs need to be
+explicitly provided to Spark at launch time. This is done by listing them in the
+`spark.kerberos.access.hadoopFileSystems` property, described in the configuration section below.
+
 Delegation token support is currently only supported in YARN and Mesos modes. Consult the
 deployment-specific page for more information.
 
@@ -769,6 +773,18 @@ The following options provides finer-grained control for this feature:
   application being run.
   </td>
 </tr>
+<tr>
+  <td><code>spark.kerberos.access.hadoopFileSystems</code></td>
+  <td>(none)</td>
+  <td>
+    A comma-separated list of secure Hadoop filesystems your Spark application is going to access. For
+    example, <code>spark.kerberos.access.hadoopFileSystems=hdfs://nn1.com:8032,hdfs://nn2.com:8032,
+    webhdfs://nn3.com:50070</code>. The Spark application must have access to the filesystems listed
+    and Kerberos must be properly configured to be able to access them (either in the same realm
+    or in a trusted realm). Spark acquires security tokens for each of the filesystems so that
+    the Spark application can access those remote Hadoop filesystems.
+  </td>
+</tr>
 </table>
 
 ## Long-Running Applications
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
index ac6baa09e022..c69e8a320059 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
@@ -21,7 +21,6 @@ import scala.language.existentials
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.Credentials
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
 
@@ -38,7 +37,6 @@ private[spark] class KafkaDelegationTokenProvider
   override def obtainDelegationTokens(
       hadoopConf: Configuration,
       sparkConf: SparkConf,
-      fileSystems: Set[FileSystem],
       creds: Credentials): Option[Long] = {
     try {
       logDebug("Attempting to fetch Kafka security token.")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 3a3272216294..b904687dafaf 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -21,14 +21,11 @@ import java.util.regex.{Matcher, Pattern}
 
 import scala.collection.mutable.{HashMap, ListBuffer}
 
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.yarn.api.ApplicationConstants
 import org.apache.hadoop.yarn.api.records.{ApplicationAccessType, ContainerId, Priority}
 import org.apache.hadoop.yarn.util.ConverterUtils
 
-import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.deploy.yarn.config._
+import org.apache.spark.SecurityManager
 import org.apache.spark.launcher.YarnCommandBuilderUtils
 import org.apache.spark.util.Utils
 
@@ -185,40 +182,4 @@ object YarnSparkHadoopUtil {
     ConverterUtils.toContainerId(containerIdString)
   }
 
-  /** The filesystems for which YARN should fetch delegation tokens. */
-  def hadoopFSsToAccess(
-      sparkConf: SparkConf,
-      hadoopConf: Configuration): Set[FileSystem] = {
-    val filesystemsToAccess = sparkConf.get(FILESYSTEMS_TO_ACCESS)
-    val requestAllDelegationTokens = filesystemsToAccess.isEmpty
-
-    val stagingFS = sparkConf.get(STAGING_DIR)
-      .map(new Path(_).getFileSystem(hadoopConf))
-      .getOrElse(FileSystem.get(hadoopConf))
-
-    // Add the list of available namenodes for all namespaces in HDFS federation.
-    // If ViewFS is enabled, this is skipped as ViewFS already handles delegation tokens for its
-    // namespaces.
-    val hadoopFilesystems = if (!requestAllDelegationTokens || stagingFS.getScheme == "viewfs") {
-      filesystemsToAccess.map(new Path(_).getFileSystem(hadoopConf)).toSet
-    } else {
-      val nameservices = hadoopConf.getTrimmedStrings("dfs.nameservices")
-      // Retrieving the filesystem for the nameservices where HA is not enabled
-      val filesystemsWithoutHA = nameservices.flatMap { ns =>
-        Option(hadoopConf.get(s"dfs.namenode.rpc-address.$ns")).map { nameNode =>
-          new Path(s"hdfs://$nameNode").getFileSystem(hadoopConf)
-        }
-      }
-      // Retrieving the filesystem for the nameservices where HA is enabled
-      val filesystemsWithHA = nameservices.flatMap { ns =>
-        Option(hadoopConf.get(s"dfs.ha.namenodes.$ns")).map { _ =>
-          new Path(s"hdfs://$ns").getFileSystem(hadoopConf)
-        }
-      }
-      (filesystemsWithoutHA ++ filesystemsWithHA).toSet
-    }
-
-    hadoopFilesystems + stagingFS
-  }
-
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index 16adaec04802..bd5e13615165 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -122,11 +122,6 @@ package object config {
     .intConf
     .createOptional
 
-  private[spark] val STAGING_DIR = ConfigBuilder("spark.yarn.stagingDir")
-    .doc("Staging directory used while submitting applications.")
-    .stringConf
-    .createOptional
-
   /* Launcher configuration. */
 
   private[spark] val WAIT_FOR_APP_COMPLETION = ConfigBuilder("spark.yarn.submit.waitAppCompletion")
@@ -244,20 +239,6 @@ package object config {
     .booleanConf
     .createWithDefault(false)
 
-  /* Security configuration. */
-
-  private[spark] val NAMENODES_TO_ACCESS = ConfigBuilder("spark.yarn.access.namenodes")
-    .doc("Extra NameNode URLs for which to request delegation tokens. The NameNode that hosts " +
-      "fs.defaultFS does not need to be listed here.")
-    .stringConf
-    .toSequence
-    .createWithDefault(Nil)
-
-  private[spark] val FILESYSTEMS_TO_ACCESS = ConfigBuilder("spark.yarn.access.hadoopFileSystems")
-    .doc("Extra Hadoop filesystem URLs for which to request delegation tokens. The filesystem " +
-      "that hosts fs.defaultFS does not need to be listed here.")
-    .fallbackConf(NAMENODES_TO_ACCESS)
-
   /* Rolled log aggregation configuration. */
 
   private[spark] val ROLLED_LOG_INCLUDE_PATTERN =
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
index bb40ea801519..fc1f75254c57 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
@@ -22,12 +22,10 @@ import java.util.ServiceLoader
 import scala.collection.JavaConverters._
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.Credentials
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
-import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.Utils
 
@@ -74,8 +72,4 @@ private[spark] class YARNHadoopDelegationTokenManager(
     credentialProviders.contains(serviceName) || super.isProviderLoaded(serviceName)
   }
 
-  override protected def fileSystemsToAccess(): Set[FileSystem] = {
-    YarnSparkHadoopUtil.hadoopFSsToAccess(sparkConf, hadoopConf)
-  }
-
 }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index de7ff8238c9e..e7cde03a01b4 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -21,8 +21,6 @@ import java.io.{File, IOException}
 import java.nio.charset.StandardCharsets
 
 import com.google.common.io.{ByteStreams, Files}
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.scalatest.Matchers
@@ -30,7 +28,6 @@ import org.scalatest.Matchers
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
@@ -143,67 +140,4 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
     }
 
   }
-
-  test("SPARK-24149: retrieve all namenodes from HDFS") {
-    val sparkConf = new SparkConf()
-    val basicFederationConf = new Configuration()
-    basicFederationConf.set("fs.defaultFS", "hdfs://localhost:8020")
-    basicFederationConf.set("dfs.nameservices", "ns1,ns2")
-    basicFederationConf.set("dfs.namenode.rpc-address.ns1", "localhost:8020")
-    basicFederationConf.set("dfs.namenode.rpc-address.ns2", "localhost:8021")
-    val basicFederationExpected = Set(
-      new Path("hdfs://localhost:8020").getFileSystem(basicFederationConf),
-      new Path("hdfs://localhost:8021").getFileSystem(basicFederationConf))
-    val basicFederationResult = YarnSparkHadoopUtil.hadoopFSsToAccess(
-      sparkConf, basicFederationConf)
-    basicFederationResult should be (basicFederationExpected)
-
-    // when viewfs is enabled, namespaces are handled by it, so we don't need to take care of them
-    val viewFsConf = new Configuration()
-    viewFsConf.addResource(basicFederationConf)
-    viewFsConf.set("fs.defaultFS", "viewfs://clusterX/")
-    viewFsConf.set("fs.viewfs.mounttable.clusterX.link./home", "hdfs://localhost:8020/")
-    val viewFsExpected = Set(new Path("viewfs://clusterX/").getFileSystem(viewFsConf))
-    YarnSparkHadoopUtil.hadoopFSsToAccess(sparkConf, viewFsConf) should be (viewFsExpected)
-
-    // invalid config should not throw NullPointerException
-    val invalidFederationConf = new Configuration()
-    invalidFederationConf.addResource(basicFederationConf)
-    invalidFederationConf.unset("dfs.namenode.rpc-address.ns2")
-    val invalidFederationExpected = Set(
-      new Path("hdfs://localhost:8020").getFileSystem(invalidFederationConf))
-    val invalidFederationResult = YarnSparkHadoopUtil.hadoopFSsToAccess(
-      sparkConf, invalidFederationConf)
-    invalidFederationResult should be (invalidFederationExpected)
-
-    // no namespaces defined, ie. old case
-    val noFederationConf = new Configuration()
-    noFederationConf.set("fs.defaultFS", "hdfs://localhost:8020")
-    val noFederationExpected = Set(
-      new Path("hdfs://localhost:8020").getFileSystem(noFederationConf))
-    val noFederationResult = YarnSparkHadoopUtil.hadoopFSsToAccess(sparkConf, noFederationConf)
-    noFederationResult should be (noFederationExpected)
-
-    // federation and HA enabled
-    val federationAndHAConf = new Configuration()
-    federationAndHAConf.set("fs.defaultFS", "hdfs://clusterXHA")
-    federationAndHAConf.set("dfs.nameservices", "clusterXHA,clusterYHA")
-    federationAndHAConf.set("dfs.ha.namenodes.clusterXHA", "x-nn1,x-nn2")
-    federationAndHAConf.set("dfs.ha.namenodes.clusterYHA", "y-nn1,y-nn2")
-    federationAndHAConf.set("dfs.namenode.rpc-address.clusterXHA.x-nn1", "localhost:8020")
-    federationAndHAConf.set("dfs.namenode.rpc-address.clusterXHA.x-nn2", "localhost:8021")
-    federationAndHAConf.set("dfs.namenode.rpc-address.clusterYHA.y-nn1", "localhost:8022")
-    federationAndHAConf.set("dfs.namenode.rpc-address.clusterYHA.y-nn2", "localhost:8023")
-    federationAndHAConf.set("dfs.client.failover.proxy.provider.clusterXHA",
-      "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider")
-    federationAndHAConf.set("dfs.client.failover.proxy.provider.clusterYHA",
-      "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider")
-
-    val federationAndHAExpected = Set(
-      new Path("hdfs://clusterXHA").getFileSystem(federationAndHAConf),
-      new Path("hdfs://clusterYHA").getFileSystem(federationAndHAConf))
-    val federationAndHAResult = YarnSparkHadoopUtil.hadoopFSsToAccess(
-      sparkConf, federationAndHAConf)
-    federationAndHAResult should be (federationAndHAExpected)
-  }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 463cf1680efd..16cb78a3fdfa 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -126,7 +126,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     val tokenProvider = new HiveDelegationTokenProvider()
     if (tokenProvider.delegationTokensRequired(sparkConf, hadoopConf)) {
       val credentials = new Credentials()
-      tokenProvider.obtainDelegationTokens(hadoopConf, sparkConf, Set.empty, credentials)
+      tokenProvider.obtainDelegationTokens(hadoopConf, sparkConf, credentials)
       UserGroupInformation.getCurrentUser.addCredentials(credentials)
     }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
index 25eb2515a0c5..c0c46187b13a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
@@ -85,7 +85,6 @@ private[spark] class HiveDelegationTokenProvider
   override def obtainDelegationTokens(
       hadoopConf: Configuration,
       sparkConf: SparkConf,
-      fileSystems: Set[FileSystem],
       creds: Credentials): Option[Long] = {
     try {
       val conf = hiveConf(hadoopConf)

From b8d666940b9186d2118c70b218b66ef038479b0e Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Sat, 9 Feb 2019 13:46:52 -0800
Subject: [PATCH 0443/1072] [SPARK-26082][MESOS][FOLLOWUP] Fix Scala-2.11 build

## What changes were proposed in this pull request?

#23744 added a UT to prevent a future regression. However, it breaks Scala-2.11 build. This fixes that.

## How was this patch tested?

Manual test with Scala-2.11 profile.

Closes #23755 from HeartSaVioR/SPARK-26082-FOLLOW-UP-V2.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index ff63987487b1..858146b35fac 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -276,7 +276,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
 
     val launchedTasks = Utils.verifyTaskLaunched(driver, "o1")
     val uris = launchedTasks.head.getCommand.getUrisList
-    assert(uris.stream().allMatch(_.getCache))
+    assert(uris.asScala.forall(_.getCache))
   }
 
   test("supports disabling fetcher cache") {
@@ -300,7 +300,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
 
     val launchedTasks = Utils.verifyTaskLaunched(driver, "o1")
     val uris = launchedTasks.head.getCommand.getUrisList
-    assert(uris.stream().allMatch(!_.getCache))
+    assert(uris.asScala.forall(!_.getCache))
   }
 
   test("accept/decline offers with driver constraints") {

From dc46fb77ba70709374b4e5feec60e59e33a1ea52 Mon Sep 17 00:00:00 2001
From: cchung100m <cchung100m@cs.ccu.edu.tw>
Date: Sun, 10 Feb 2019 00:36:22 -0600
Subject: [PATCH 0444/1072] [SPARK-26822] Upgrade the deprecated module
 'optparse'

Follow the [official document](https://docs.python.org/2/library/argparse.html#upgrading-optparse-code)  to upgrade the deprecated module 'optparse' to  'argparse'.

## What changes were proposed in this pull request?

This PR proposes to replace 'optparse' module with 'argparse' module.

## How was this patch tested?

Follow the [previous testing](https://github.com/apache/spark/commit/7e3eb3cd209d83394ca2b2cec79b26b1bbe9d7ea), manually tested and negative tests were also done. My [test results](https://gist.github.com/cchung100m/1661e7df6e8b66940a6e52a20861f61d)

Closes #23730 from cchung100m/solve_deprecated_module_optparse.

Authored-by: cchung100m <cchung100m@cs.ccu.edu.tw>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/run-tests.py    | 21 +++++++++++----------
 python/run-tests.py | 41 ++++++++++++++++++++---------------------
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index edd89c9f0890..4e6f8971dfc0 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -19,7 +19,7 @@
 
 from __future__ import print_function
 import itertools
-from optparse import OptionParser
+from argparse import ArgumentParser
 import os
 import random
 import re
@@ -484,20 +484,20 @@ def run_sparkr_tests():
 
 
 def parse_opts():
-    parser = OptionParser(
+    parser = ArgumentParser(
         prog="run-tests"
     )
-    parser.add_option(
-        "-p", "--parallelism", type="int", default=8,
-        help="The number of suites to test in parallel (default %default)"
+    parser.add_argument(
+        "-p", "--parallelism", type=int, default=8,
+        help="The number of suites to test in parallel (default %(default)d)"
     )
 
-    (opts, args) = parser.parse_args()
-    if args:
-        parser.error("Unsupported arguments: %s" % ' '.join(args))
-    if opts.parallelism < 1:
+    args, unknown = parser.parse_known_args()
+    if unknown:
+        parser.error("Unsupported arguments: %s" % ' '.join(unknown))
+    if args.parallelism < 1:
         parser.error("Parallelism cannot be less than 1")
-    return opts
+    return args
 
 
 def main():
@@ -636,6 +636,7 @@ def _test():
     if failure_count:
         sys.exit(-1)
 
+
 if __name__ == "__main__":
     _test()
     main()
diff --git a/python/run-tests.py b/python/run-tests.py
index 7456170ba2d5..54db79c74aa5 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -19,7 +19,7 @@
 
 from __future__ import print_function
 import logging
-from optparse import OptionParser, OptionGroup
+from argparse import ArgumentParser
 import os
 import re
 import shutil
@@ -168,30 +168,30 @@ def get_default_python_executables():
 
 
 def parse_opts():
-    parser = OptionParser(
+    parser = ArgumentParser(
         prog="run-tests"
     )
-    parser.add_option(
-        "--python-executables", type="string", default=','.join(get_default_python_executables()),
-        help="A comma-separated list of Python executables to test against (default: %default)"
+    parser.add_argument(
+        "--python-executables", type=str, default=','.join(get_default_python_executables()),
+        help="A comma-separated list of Python executables to test against (default: %(default)s)"
     )
-    parser.add_option(
-        "--modules", type="string",
+    parser.add_argument(
+        "--modules", type=str,
         default=",".join(sorted(python_modules.keys())),
-        help="A comma-separated list of Python modules to test (default: %default)"
+        help="A comma-separated list of Python modules to test (default: %(default)s)"
     )
-    parser.add_option(
-        "-p", "--parallelism", type="int", default=4,
-        help="The number of suites to test in parallel (default %default)"
+    parser.add_argument(
+        "-p", "--parallelism", type=int, default=4,
+        help="The number of suites to test in parallel (default %(default)d)"
     )
-    parser.add_option(
+    parser.add_argument(
         "--verbose", action="store_true",
         help="Enable additional debug logging"
     )
 
-    group = OptionGroup(parser, "Developer Options")
-    group.add_option(
-        "--testnames", type="string",
+    group = parser.add_argument_group("Developer Options")
+    group.add_argument(
+        "--testnames", type=str,
         default=None,
         help=(
             "A comma-separated list of specific modules, classes and functions of doctest "
@@ -201,14 +201,13 @@ def parse_opts():
             "'pyspark.sql.tests FooTests.test_foo' to run the specific unittest in the class. "
             "'--modules' option is ignored if they are given.")
     )
-    parser.add_option_group(group)
 
-    (opts, args) = parser.parse_args()
-    if args:
-        parser.error("Unsupported arguments: %s" % ' '.join(args))
-    if opts.parallelism < 1:
+    args, unknown = parser.parse_known_args()
+    if unknown:
+        parser.error("Unsupported arguments: %s" % ' '.join(unknown))
+    if args.parallelism < 1:
         parser.error("Parallelism cannot be less than 1")
-    return opts
+    return args
 
 
 def _check_coverage(python_exec):

From bb985586f2ad874c81c37d1f857f254a66018dd5 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Sun, 10 Feb 2019 13:52:24 -0800
Subject: [PATCH 0445/1072] [SPARK-26816][CORE][TEST] Add XORShiftRandom
 Benchmark

## What changes were proposed in this pull request?

- The benchmark of `XORShiftRandom.nextInt` vis-a-vis `java.util.Random.nextInt` is moved from the `XORShiftRandom` object to `XORShiftRandomBenchmark`.
- Added benchmarks for `nextLong`, `nextDouble` and `nextGaussian` that are used in Spark as well.
- Added a separate benchmark for `XORShiftRandom.hashSeed`.

Closes #23752 from MaxGekk/xorshiftrandom-benchmark.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../XORShiftRandomBenchmark-results.txt       |  44 ++++++
 .../spark/util/random/XORShiftRandom.scala    |  41 ------
 .../util/random/XORShiftRandomBenchmark.scala | 129 ++++++++++++++++++
 3 files changed, 173 insertions(+), 41 deletions(-)
 create mode 100644 core/benchmarks/XORShiftRandomBenchmark-results.txt
 create mode 100644 core/src/test/scala/org/apache/spark/util/random/XORShiftRandomBenchmark.scala

diff --git a/core/benchmarks/XORShiftRandomBenchmark-results.txt b/core/benchmarks/XORShiftRandomBenchmark-results.txt
new file mode 100644
index 000000000000..1140489e4a7f
--- /dev/null
+++ b/core/benchmarks/XORShiftRandomBenchmark-results.txt
@@ -0,0 +1,44 @@
+================================================================================================
+Pseudo random
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextInt:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+java.util.Random                              1362 / 1362         73.4          13.6       1.0X
+XORShiftRandom                                 227 /  227        440.6           2.3       6.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextLong:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+java.util.Random                              2732 / 2732         36.6          27.3       1.0X
+XORShiftRandom                                 629 /  629        159.0           6.3       4.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextDouble:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+java.util.Random                              2730 / 2730         36.6          27.3       1.0X
+XORShiftRandom                                 629 /  629        159.0           6.3       4.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextGaussian:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+java.util.Random                            10288 / 10288          9.7         102.9       1.0X
+XORShiftRandom                                6351 / 6351         15.7          63.5       1.6X
+
+
+================================================================================================
+hash seed
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash seed:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+XORShiftRandom.hashSeed                       1193 / 1195          8.4         119.3       1.0X
+
+
diff --git a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
index e8cdb6e98bf3..e47275643267 100644
--- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
@@ -22,8 +22,6 @@ import java.util.{Random => JavaRandom}
 
 import scala.util.hashing.MurmurHash3
 
-import org.apache.spark.util.Utils.timeIt
-
 /**
  * This class implements a XORShift random number generator algorithm
  * Source:
@@ -66,43 +64,4 @@ private[spark] object XORShiftRandom {
     val highBits = MurmurHash3.bytesHash(bytes, lowBits)
     (highBits.toLong << 32) | (lowBits.toLong & 0xFFFFFFFFL)
   }
-
-  /**
-   * Main method for running benchmark
-   * @param args takes one argument - the number of random numbers to generate
-   */
-  def main(args: Array[String]): Unit = {
-    // scalastyle:off println
-    if (args.length != 1) {
-      println("Benchmark of XORShiftRandom vis-a-vis java.util.Random")
-      println("Usage: XORShiftRandom number_of_random_numbers_to_generate")
-      System.exit(1)
-    }
-    println(benchmark(args(0).toInt))
-    // scalastyle:on println
-  }
-
-  /**
-   * @param numIters Number of random numbers to generate while running the benchmark
-   * @return Map of execution times for {@link java.util.Random java.util.Random}
-   * and XORShift
-   */
-  def benchmark(numIters: Int): Map[String, Long] = {
-
-    val seed = 1L
-    val million = 1e6.toInt
-    val javaRand = new JavaRandom(seed)
-    val xorRand = new XORShiftRandom(seed)
-
-    // this is just to warm up the JIT - we're not timing anything
-    timeIt(million) {
-      javaRand.nextInt()
-      xorRand.nextInt()
-    }
-
-    /* Return results as a map instead of just printing to screen
-    in case the user wants to do something with them */
-    Map("javaTime" -> timeIt(numIters) { javaRand.nextInt() },
-        "xorTime" -> timeIt(numIters) { xorRand.nextInt() })
-  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomBenchmark.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomBenchmark.scala
new file mode 100644
index 000000000000..b0563fdb2bad
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomBenchmark.scala
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.random
+
+import java.util.{Random => JavaRandom}
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.util.Utils.times
+
+/**
+ * Benchmarks for pseudo random generators
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>
+ *   2. build/sbt "core/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      Results will be written to "benchmarks/XORShiftRandomBenchmark-results.txt".
+ * }}}
+ */
+object XORShiftRandomBenchmark extends BenchmarkBase {
+  val seed = 123456789101112L
+  val javaRand = new JavaRandom(seed)
+  val xorRand = new XORShiftRandom(seed)
+
+  private def nextInt(numIters: Int, valuesPerIteration: Int): Unit = {
+    val benchmark = new Benchmark("nextInt", valuesPerIteration, output = output)
+
+    benchmark.addCase("java.util.Random", numIters) { _ =>
+      times(valuesPerIteration) { javaRand.nextInt() }
+    }
+
+    benchmark.addCase("XORShiftRandom", numIters) { _ =>
+      times(valuesPerIteration) { xorRand.nextInt() }
+    }
+
+    benchmark.run()
+  }
+
+  private def nextLong(numIters: Int, valuesPerIteration: Int): Unit = {
+    val benchmark = new Benchmark("nextLong", valuesPerIteration, output = output)
+
+    benchmark.addCase("java.util.Random", numIters) { _ =>
+      times(valuesPerIteration) { javaRand.nextLong() }
+    }
+
+    benchmark.addCase("XORShiftRandom", numIters) { _ =>
+      times(valuesPerIteration) { xorRand.nextLong() }
+    }
+
+    benchmark.run()
+  }
+
+  private def nextDouble(numIters: Int, valuesPerIteration: Int): Unit = {
+    val benchmark = new Benchmark("nextDouble", valuesPerIteration, output = output)
+
+    benchmark.addCase("java.util.Random", numIters) { _ =>
+      times(valuesPerIteration) { javaRand.nextDouble() }
+    }
+
+    benchmark.addCase("XORShiftRandom", numIters) { _ =>
+      times(valuesPerIteration) { xorRand.nextDouble() }
+    }
+
+    benchmark.run()
+  }
+
+  private def nextGaussian(numIters: Int, valuesPerIteration: Int): Unit = {
+    val benchmark = new Benchmark("nextGaussian", valuesPerIteration, output = output)
+
+    benchmark.addCase("java.util.Random", numIters) { _ =>
+      times(valuesPerIteration) { javaRand.nextGaussian() }
+    }
+
+    benchmark.addCase("XORShiftRandom", numIters) { _ =>
+      times(valuesPerIteration) { xorRand.nextGaussian() }
+    }
+
+    benchmark.run()
+  }
+
+  private def hashSeed(numIters: Int, valuesPerIteration: Int): Unit = {
+    val benchmark = new Benchmark("Hash seed", valuesPerIteration, output = output)
+
+    benchmark.addCase("XORShiftRandom.hashSeed", numIters) { _ =>
+      var i = 0
+      while (i < valuesPerIteration) {
+        XORShiftRandom.hashSeed(seed + 9876543210L + i)
+        i += 1
+      }
+    }
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val numIters = 3
+    runBenchmark("Pseudo random") {
+      val valuesPerIteration = 100000000
+
+      nextInt(numIters, valuesPerIteration)
+      nextLong(numIters, valuesPerIteration)
+      nextDouble(numIters, valuesPerIteration)
+      nextGaussian(numIters, valuesPerIteration)
+    }
+    runBenchmark("hash seed") {
+      val valuesPerIteration = 10000000
+
+      hashSeed(numIters, valuesPerIteration)
+    }
+  }
+}
+

From af4c59c0fb18c33e171258a28eefd5fbcf5a8487 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Sun, 10 Feb 2019 16:34:33 -0800
Subject: [PATCH 0446/1072] [SPARK-26843][MESOS] Use ConfigEntry for hardcoded
 configs for "mesos" resource manager

## What changes were proposed in this pull request?

This patch makes hardcoded configs in "mesos" module to use ConfigEntry, avoiding issues on mistake like SPARK-26082.

Please note that there're some changes on type while migrating to ConfigEntry: specifically "comma-separated list on a string" becomes "sequence of strings". While SparkConf smoothly handles on the change (comma-separated list on a string is still supported so backward compatible), there're some methods in utility class (`mesos` package private) to depend on the type change, so this patch also modifies the method signature for them a bit.

## How was this patch tested?

Existing tests.

Closes #23743 from HeartSaVioR/SPARK-26843.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/deploy/mesos/config.scala    | 237 ++++++++++++++++++
 .../deploy/mesos/ui/MesosClusterPage.scala    |   2 +-
 .../cluster/mesos/MesosClusterManager.scala   |   3 +-
 .../cluster/mesos/MesosClusterScheduler.scala |  35 ++-
 .../MesosCoarseGrainedSchedulerBackend.scala  |  24 +-
 .../MesosFineGrainedSchedulerBackend.scala    |  19 +-
 .../mesos/MesosSchedulerBackendUtil.scala     |  33 ++-
 .../cluster/mesos/MesosSchedulerUtils.scala   |  42 ++--
 .../mesos/MesosClusterManagerSuite.scala      |   3 +-
 .../mesos/MesosClusterSchedulerSuite.scala    |  30 +--
 ...osCoarseGrainedSchedulerBackendSuite.scala |  82 +++---
 ...esosFineGrainedSchedulerBackendSuite.scala |  19 +-
 .../MesosSchedulerBackendUtilSuite.scala      |   8 +-
 .../mesos/MesosSchedulerUtilsSuite.scala      |  35 +--
 14 files changed, 403 insertions(+), 169 deletions(-)

diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
index 2b8655ceee0c..79a113792de9 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
@@ -56,6 +56,31 @@ package object config {
         .createOptional
   }
 
+  private[spark] val CREDENTIAL_PRINCIPAL =
+    ConfigBuilder("spark.mesos.principal")
+      .doc("Name of the Kerberos principal to authenticate Spark to Mesos.")
+      .stringConf
+      .createOptional
+
+  private[spark] val CREDENTIAL_PRINCIPAL_FILE =
+    ConfigBuilder("spark.mesos.principal.file")
+      .doc("The path of file which contains the name of the Kerberos principal " +
+        "to authenticate Spark to Mesos.")
+      .stringConf
+      .createOptional
+
+  private[spark] val CREDENTIAL_SECRET =
+    ConfigBuilder("spark.mesos.secret")
+      .doc("The secret value to authenticate Spark to Mesos.")
+      .stringConf
+      .createOptional
+
+  private[spark] val CREDENTIAL_SECRET_FILE =
+    ConfigBuilder("spark.mesos.secret.file")
+      .doc("The path of file which contains the secret value to authenticate Spark to Mesos.")
+      .stringConf
+      .createOptional
+
   /* Common app configuration. */
 
   private[spark] val SHUFFLE_CLEANER_INTERVAL_S =
@@ -85,6 +110,13 @@ package object config {
       .stringConf
       .createOptional
 
+  private[spark] val DRIVER_WEBUI_URL =
+    ConfigBuilder("spark.mesos.driver.webui.url")
+      .doc("Set the Spark Mesos driver webui_url for interacting with the framework. " +
+        "If unset it will point to Spark's internal web UI.")
+      .stringConf
+      .createOptional
+
   private[spark] val driverSecretConfig = new MesosSecretConfig("driver")
 
   private[spark] val executorSecretConfig = new MesosSecretConfig("executor")
@@ -118,6 +150,211 @@ package object config {
       .stringConf
       .createWithDefault("")
 
+  private[spark] val DRIVER_FRAMEWORK_ID =
+    ConfigBuilder("spark.mesos.driver.frameworkId")
+      .stringConf
+      .createOptional
+
   private[spark] val EXECUTOR_URI =
     ConfigBuilder("spark.executor.uri").stringConf.createOptional
+
+  private[spark] val PROXY_BASE_URL =
+    ConfigBuilder("spark.mesos.proxy.baseURL").stringConf.createOptional
+
+  private[spark] val COARSE_MODE =
+    ConfigBuilder("spark.mesos.coarse")
+      .doc("If set to true, runs over Mesos clusters in \"coarse-grained\" sharing mode, where " +
+        "Spark acquires one long-lived Mesos task on each machine. If set to false, runs over " +
+        "Mesos cluster in \"fine-grained\" sharing mode, where one Mesos task is created per " +
+        "Spark task.")
+      .booleanConf.createWithDefault(true)
+
+  private[spark] val COARSE_SHUTDOWN_TIMEOUT =
+    ConfigBuilder("spark.mesos.coarse.shutdownTimeout")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .checkValue(_ >= 0, s"spark.mesos.coarse.shutdownTimeout must be >= 0")
+      .createWithDefaultString("10s")
+
+  private[spark] val MAX_DRIVERS =
+    ConfigBuilder("spark.mesos.maxDrivers").intConf.createWithDefault(200)
+
+  private[spark] val RETAINED_DRIVERS =
+    ConfigBuilder("spark.mesos.retainedDrivers").intConf.createWithDefault(200)
+
+  private[spark] val CLUSTER_RETRY_WAIT_MAX_SECONDS =
+    ConfigBuilder("spark.mesos.cluster.retry.wait.max")
+      .intConf
+      .createWithDefault(60) // 1 minute
+
+  private[spark] val ENABLE_FETCHER_CACHE =
+    ConfigBuilder("spark.mesos.fetcherCache.enable")
+      .doc("If set to true, all URIs (example: `spark.executor.uri`, `spark.mesos.uris`) will be " +
+        "cached by the Mesos Fetcher Cache.")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val APP_JAR_LOCAL_RESOLUTION_MODE =
+    ConfigBuilder("spark.mesos.appJar.local.resolution.mode")
+      .doc("Provides support for the `local:///` scheme to reference the app jar resource in " +
+        "cluster mode. If user uses a local resource (`local:///path/to/jar`) and the config " +
+        "option is not used it defaults to `host` eg. the mesos fetcher tries to get the " +
+        "resource from the host's file system. If the value is unknown it prints a warning msg " +
+        "in the dispatcher logs and defaults to `host`. If the value is `container` then spark " +
+        "submit in the container will use the jar in the container's path: `/path/to/jar`.")
+      .stringConf
+      .checkValues(Set("host", "container"))
+      .createWithDefault("host")
+
+  private[spark] val REJECT_OFFER_DURATION =
+    ConfigBuilder("spark.mesos.rejectOfferDuration")
+      .doc("Time to consider unused resources refused, serves as a fallback of " +
+        "`spark.mesos.rejectOfferDurationForUnmetConstraints`, " +
+        "`spark.mesos.rejectOfferDurationForReachedMaxCores`.")
+      .timeConf(TimeUnit.SECONDS)
+      .createWithDefaultString("120s")
+
+  private[spark] val REJECT_OFFER_DURATION_FOR_UNMET_CONSTRAINTS =
+    ConfigBuilder("spark.mesos.rejectOfferDurationForUnmetConstraints")
+      .doc("Time to consider unused resources refused with unmet constraints.")
+      .timeConf(TimeUnit.SECONDS)
+      .createOptional
+
+  private[spark] val REJECT_OFFER_DURATION_FOR_REACHED_MAX_CORES =
+    ConfigBuilder("spark.mesos.rejectOfferDurationForReachedMaxCores")
+      .doc("Time to consider unused resources refused when maximum number of cores " +
+        "`spark.cores.max` is reached.")
+      .timeConf(TimeUnit.SECONDS)
+      .createOptional
+
+  private[spark] val URIS_TO_DOWNLOAD =
+    ConfigBuilder("spark.mesos.uris")
+      .doc("A comma-separated list of URIs to be downloaded to the sandbox when driver or " +
+        "executor is launched by Mesos. This applies to both coarse-grained and fine-grained " +
+        "mode.")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
+  private[spark] val EXECUTOR_HOME =
+    ConfigBuilder("spark.mesos.executor.home")
+      .doc("Set the directory in which Spark is installed on the executors in Mesos. " +
+        "By default, the executors will simply use the driver's Spark home directory, which may " +
+        "not be visible to them. Note that this is only relevant if a Spark binary package is " +
+        "not specified through `spark.executor.uri`.")
+      .stringConf
+      .createOptional
+
+  private[spark] val EXECUTOR_CORES =
+    ConfigBuilder("spark.mesos.mesosExecutor.cores")
+      .doc("(Fine-grained mode only) Number of cores to give each Mesos executor. This does not " +
+        "include the cores used to run the Spark tasks. In other words, even if no Spark task " +
+        "is being run, each Mesos executor will occupy the number of cores configured here. " +
+        "The value can be a floating point number.")
+      .doubleConf
+      .createWithDefault(1.0)
+
+  private[spark] val EXTRA_CORES_PER_EXECUTOR =
+    ConfigBuilder("spark.mesos.extra.cores")
+      .doc("Set the extra number of cores for an executor to advertise. This does not result in " +
+        "more cores allocated. It instead means that an executor will \"pretend\" it has more " +
+        "cores, so that the driver will send it more tasks. Use this to increase parallelism. " +
+        "This setting is only used for Mesos coarse-grained mode.")
+      .intConf
+      .createWithDefault(0)
+
+  private[spark] val EXECUTOR_MEMORY_OVERHEAD =
+    ConfigBuilder("spark.mesos.executor.memoryOverhead")
+      .doc("The amount of additional memory, specified in MiB, to be allocated per executor. " +
+        "By default, the overhead will be larger of either 384 or 10% of " +
+        "`spark.executor.memory`. If set, the final overhead will be this value.")
+      .intConf
+      .createOptional
+
+  private[spark] val EXECUTOR_DOCKER_IMAGE =
+    ConfigBuilder("spark.mesos.executor.docker.image")
+      .doc("Set the name of the docker image that the Spark executors will run in. The selected " +
+        "image must have Spark installed, as well as a compatible version of the Mesos library. " +
+        "The installed path of Spark in the image can be specified with " +
+        "`spark.mesos.executor.home`; the installed path of the Mesos library can be specified " +
+        "with `spark.executorEnv.MESOS_NATIVE_JAVA_LIBRARY`.")
+      .stringConf
+      .createOptional
+
+  private[spark] val EXECUTOR_DOCKER_FORCE_PULL_IMAGE =
+    ConfigBuilder("spark.mesos.executor.docker.forcePullImage")
+      .doc("Force Mesos agents to pull the image specified in " +
+        "`spark.mesos.executor.docker.image`. By default Mesos agents will not pull images they " +
+        "already have cached.")
+      .booleanConf
+      .createOptional
+
+  private[spark] val EXECUTOR_DOCKER_PORT_MAPS =
+    ConfigBuilder("spark.mesos.executor.docker.portmaps")
+      .stringConf
+      .toSequence
+      .createOptional
+
+  private[spark] val EXECUTOR_DOCKER_PARAMETERS =
+    ConfigBuilder("spark.mesos.executor.docker.parameters")
+      .doc("Set the list of custom parameters which will be passed into the `docker run` " +
+        "command when launching the Spark executor on Mesos using the docker containerizer. " +
+        "The format of this property is a list of key/value pairs which pair looks key1=value1.")
+      .stringConf
+      .toSequence
+      .createOptional
+
+  private[spark] val EXECUTOR_DOCKER_VOLUMES =
+    ConfigBuilder("spark.mesos.executor.docker.volumes")
+      .doc("Set the list of volumes which will be mounted into the Docker image, which was set " +
+        "using `spark.mesos.executor.docker.image`. The format of this property is a list of " +
+        "mappings following the form passed to `docker run -v`. That is they take the form:  " +
+        "`[host_path:]container_path[:ro|:rw]`")
+      .stringConf
+      .toSequence
+      .createOptional
+
+  private[spark] val MAX_GPUS =
+    ConfigBuilder("spark.mesos.gpus.max")
+      .doc("Set the maximum number GPU resources to acquire for this job. Note that executors " +
+        "will still launch when no GPU resources are found since this configuration is just an " +
+        "upper limit and not a guaranteed amount.")
+      .intConf
+      .createWithDefault(0)
+
+  private[spark] val TASK_LABELS =
+    ConfigBuilder("spark.mesos.task.labels")
+      .doc("Set the Mesos labels to add to each task. Labels are free-form key-value pairs. " +
+        "Key-value pairs should be separated by a colon, and commas used to list more than one. " +
+        "If your label includes a colon or comma, you can escape it with a backslash. " +
+        "Ex. key:value,key2:a\\:b.")
+      .stringConf
+      .createWithDefault("")
+
+  private[spark] val CONSTRAINTS =
+    ConfigBuilder("spark.mesos.constraints")
+      .doc("Attribute-based constraints on mesos resource offers. By default, all resource " +
+        "offers will be accepted. This setting applies only to executors. Refer to Mesos " +
+        "Attributes & Resources doc for more information on attributes.")
+      .stringConf
+      .createWithDefault("")
+
+  private[spark] val CONTAINERIZER =
+    ConfigBuilder("spark.mesos.containerizer")
+      .doc("This only affects docker containers, and must be one of \"docker\" or \"mesos\". " +
+        "Mesos supports two types of containerizers for docker: the \"docker\" containerizer, " +
+        "and the preferred \"mesos\" containerizer. " +
+        "Read more here: http://mesos.apache.org/documentation/latest/container-image/")
+      .stringConf
+      .checkValues(Set("docker", "mesos"))
+      .createWithDefault("docker")
+
+  private[spark] val ROLE =
+    ConfigBuilder("spark.mesos.role")
+      .doc("Set the role of this Spark framework for Mesos. Roles are used in Mesos for " +
+        "reservations and resource weight sharing.")
+      .stringConf
+      .createOptional
+
+  private[spark] val DRIVER_ENV_PREFIX = "spark.mesos.driverEnv."
+  private[spark] val DISPATCHER_DRIVER_DEFAULT_PREFIX = "spark.mesos.dispatcher.driverDefault."
 }
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
index c53285331ea6..1755cb7f6611 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
@@ -77,7 +77,7 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
 
   private def driverRow(state: MesosClusterSubmissionState): Seq[Node] = {
     val id = state.driverDescription.submissionId
-    val proxy = parent.conf.getOption("spark.mesos.proxy.baseURL")
+    val proxy = parent.conf.get(PROXY_BASE_URL)
 
     val sandboxCol = if (proxy.isDefined) {
       val clusterSchedulerId = parent.scheduler.getSchedulerState().frameworkId
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManager.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManager.scala
index da71f8f9e407..ed1b3d7a1673 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManager.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManager.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.scheduler.cluster.mesos
 
 import org.apache.spark.SparkContext
+import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.internal.config._
 import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 
@@ -42,7 +43,7 @@ private[spark] class MesosClusterManager extends ExternalClusterManager {
       "I/O encryption is currently not supported in Mesos.")
 
     val mesosUrl = MESOS_REGEX.findFirstMatchIn(masterURL).get.group(1)
-    val coarse = sc.conf.getBoolean("spark.mesos.coarse", defaultValue = true)
+    val coarse = sc.conf.get(COARSE_MODE)
     if (coarse) {
       new MesosCoarseGrainedSchedulerBackend(
         scheduler.asInstanceOf[TaskSchedulerImpl],
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 01a294478a5f..9df91699b26e 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -127,10 +127,10 @@ private[spark] class MesosClusterScheduler(
     MetricsSystem.createMetricsSystem("mesos_cluster", conf, new SecurityManager(conf))
   private val master = conf.get("spark.master")
   private val appName = conf.get("spark.app.name")
-  private val queuedCapacity = conf.getInt("spark.mesos.maxDrivers", 200)
-  private val retainedDrivers = conf.getInt("spark.mesos.retainedDrivers", 200)
-  private val maxRetryWaitTime = conf.getInt("spark.mesos.cluster.retry.wait.max", 60) // 1 minute
-  private val useFetchCache = conf.getBoolean("spark.mesos.fetcherCache.enable", false)
+  private val queuedCapacity = conf.get(config.MAX_DRIVERS)
+  private val retainedDrivers = conf.get(config.RETAINED_DRIVERS)
+  private val maxRetryWaitTime = conf.get(config.CLUSTER_RETRY_WAIT_MAX_SECONDS)
+  private val useFetchCache = conf.get(config.ENABLE_FETCHER_CACHE)
   private val schedulerState = engineFactory.createEngine("scheduler")
   private val stateLock = new Object()
   // Keyed by submission id
@@ -390,10 +390,10 @@ private[spark] class MesosClusterScheduler(
   private def getDriverEnvironment(desc: MesosDriverDescription): Environment = {
     // TODO(mgummelt): Don't do this here.  This should be passed as a --conf
     val commandEnv = adjust(desc.command.environment, "SPARK_SUBMIT_OPTS", "")(
-      v => s"$v -Dspark.mesos.driver.frameworkId=${getDriverFrameworkID(desc)}"
+      v => s"$v -D${config.DRIVER_FRAMEWORK_ID.key}=${getDriverFrameworkID(desc)}"
     )
 
-    val env = desc.conf.getAllWithPrefix("spark.mesos.driverEnv.") ++ commandEnv
+    val env = desc.conf.getAllWithPrefix(config.DRIVER_ENV_PREFIX) ++ commandEnv
 
     val envBuilder = Environment.newBuilder()
 
@@ -419,22 +419,17 @@ private[spark] class MesosClusterScheduler(
 
   private def isContainerLocalAppJar(desc: MesosDriverDescription): Boolean = {
     val isLocalJar = desc.jarUrl.startsWith("local://")
-    val isContainerLocal = desc.conf.getOption("spark.mesos.appJar.local.resolution.mode").exists {
+    val isContainerLocal = desc.conf.get(config.APP_JAR_LOCAL_RESOLUTION_MODE) match {
       case "container" => true
       case "host" => false
-      case other =>
-        logWarning(s"Unknown spark.mesos.appJar.local.resolution.mode $other, using host.")
-        false
-      }
+    }
     isLocalJar && isContainerLocal
   }
 
   private def getDriverUris(desc: MesosDriverDescription): List[CommandInfo.URI] = {
-    val confUris = List(conf.getOption("spark.mesos.uris"),
-      desc.conf.getOption("spark.mesos.uris"),
-      Some(desc.conf.get(SUBMIT_PYTHON_FILES).mkString(","))).flatMap(
-      _.map(_.split(",").map(_.trim))
-    ).flatten
+    val confUris = (conf.get(config.URIS_TO_DOWNLOAD) ++
+      desc.conf.get(config.URIS_TO_DOWNLOAD) ++
+      desc.conf.get(SUBMIT_PYTHON_FILES)).toList
 
     if (isContainerLocalAppJar(desc)) {
       (confUris ++ getDriverExecutorURI(desc).toList).map(uri =>
@@ -464,7 +459,7 @@ private[spark] class MesosClusterScheduler(
   }
 
   private def getDriverCommandValue(desc: MesosDriverDescription): String = {
-    val dockerDefined = desc.conf.contains("spark.mesos.executor.docker.image")
+    val dockerDefined = desc.conf.contains(config.EXECUTOR_DOCKER_IMAGE)
     val executorUri = getDriverExecutorURI(desc)
     // Gets the path to run spark-submit, and the path to the Mesos sandbox.
     val (executable, sandboxPath) = if (dockerDefined) {
@@ -484,11 +479,11 @@ private[spark] class MesosClusterScheduler(
       // Sandbox path points to the parent folder as we chdir into the folderBasename.
       (cmdExecutable, "..")
     } else {
-      val executorSparkHome = desc.conf.getOption("spark.mesos.executor.home")
+      val executorSparkHome = desc.conf.get(config.EXECUTOR_HOME)
         .orElse(conf.getOption("spark.home"))
         .orElse(Option(System.getenv("SPARK_HOME")))
         .getOrElse {
-          throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
+          throw new SparkException(s"Executor Spark home `${config.EXECUTOR_HOME}` is not set!")
         }
       val cmdExecutable = new File(executorSparkHome, "./bin/spark-submit").getPath
       // Sandbox points to the current directory by default with Mesos.
@@ -547,7 +542,7 @@ private[spark] class MesosClusterScheduler(
       "spark.submit.deployMode", // this would be set to `cluster`, but we need client
       "spark.master" // this contains the address of the dispatcher, not master
     )
-    val defaultConf = conf.getAllWithPrefix("spark.mesos.dispatcher.driverDefault.").toMap
+    val defaultConf = conf.getAllWithPrefix(config.DISPATCHER_DRIVER_DEFAULT_PREFIX).toMap
     val driverConf = desc.conf.getAll
       .filter { case (key, _) => !replicatedOptionsBlacklist.contains(key) }
       .toMap
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 7e2a8ba4b07d..8bd61c230d8e 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -76,15 +76,13 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     cores - (cores % minCoresPerExecutor)
   }
 
-  private val useFetcherCache = conf.getBoolean("spark.mesos.fetcherCache.enable", false)
+  private val useFetcherCache = conf.get(ENABLE_FETCHER_CACHE)
 
-  private val maxGpus = conf.getInt("spark.mesos.gpus.max", 0)
+  private val maxGpus = conf.get(MAX_GPUS)
 
-  private val taskLabels = conf.get("spark.mesos.task.labels", "")
+  private val taskLabels = conf.get(TASK_LABELS)
 
-  private[this] val shutdownTimeoutMS =
-    conf.getTimeAsMs("spark.mesos.coarse.shutdownTimeout", "10s")
-      .ensuring(_ >= 0, "spark.mesos.coarse.shutdownTimeout must be >= 0")
+  private[this] val shutdownTimeoutMS = conf.get(COARSE_SHUTDOWN_TIMEOUT)
 
   // Synchronization protected by stateLock
   private[this] var stopCalled: Boolean = false
@@ -144,11 +142,11 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   // may lead to deadlocks since the superclass might also try to lock
   private val stateLock = new ReentrantLock
 
-  private val extraCoresPerExecutor = conf.getInt("spark.mesos.extra.cores", 0)
+  private val extraCoresPerExecutor = conf.get(EXTRA_CORES_PER_EXECUTOR)
 
   // Offer constraints
   private val slaveOfferConstraints =
-    parseConstraintString(sc.conf.get("spark.mesos.constraints", ""))
+    parseConstraintString(sc.conf.get(CONSTRAINTS))
 
   // Reject offers with mismatched constraints in seconds
   private val rejectOfferDurationForUnmetConstraints =
@@ -208,10 +206,10 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       sc.sparkUser,
       sc.appName,
       sc.conf,
-      sc.conf.getOption("spark.mesos.driver.webui.url").orElse(sc.ui.map(_.webUrl)),
+      sc.conf.get(DRIVER_WEBUI_URL).orElse(sc.ui.map(_.webUrl)),
       None,
       Some(sc.conf.get(DRIVER_FAILOVER_TIMEOUT)),
-      sc.conf.getOption("spark.mesos.driver.frameworkId").map(_ + suffix)
+      sc.conf.get(DRIVER_FRAMEWORK_ID).map(_ + suffix)
     )
 
     launcherBackend.setState(SparkAppHandle.State.SUBMITTED)
@@ -264,10 +262,10 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     val uri = conf.get(EXECUTOR_URI).orElse(Option(System.getenv("SPARK_EXECUTOR_URI")))
 
     if (uri.isEmpty) {
-      val executorSparkHome = conf.getOption("spark.mesos.executor.home")
+      val executorSparkHome = conf.get(EXECUTOR_HOME)
         .orElse(sc.getSparkHome())
         .getOrElse {
-          throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
+          throw new SparkException(s"Executor Spark home `$EXECUTOR_HOME` is not set!")
         }
       val runScript = new File(executorSparkHome, "./bin/spark-class").getPath
       command.setValue(
@@ -293,7 +291,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       command.addUris(CommandInfo.URI.newBuilder().setValue(uri.get).setCache(useFetcherCache))
     }
 
-    conf.getOption("spark.mesos.uris").foreach(setupUris(_, command, useFetcherCache))
+    setupUris(conf.get(URIS_TO_DOWNLOAD), command, useFetcherCache)
 
     command.build()
   }
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index c5c78422023b..a03fecdb2abe 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -28,7 +28,7 @@ import org.apache.mesos.SchedulerDriver
 import org.apache.mesos.protobuf.ByteString
 
 import org.apache.spark.{SparkContext, SparkException, TaskState}
-import org.apache.spark.deploy.mesos.config.EXECUTOR_URI
+import org.apache.spark.deploy.mesos.{config => mesosConfig}
 import org.apache.spark.executor.MesosExecutorBackend
 import org.apache.spark.internal.config
 import org.apache.spark.scheduler._
@@ -60,11 +60,11 @@ private[spark] class MesosFineGrainedSchedulerBackend(
   // The listener bus to publish executor added/removed events.
   val listenerBus = sc.listenerBus
 
-  private[mesos] val mesosExecutorCores = sc.conf.getDouble("spark.mesos.mesosExecutor.cores", 1)
+  private[mesos] val mesosExecutorCores = sc.conf.get(mesosConfig.EXECUTOR_CORES)
 
   // Offer constraints
   private[this] val slaveOfferConstraints =
-    parseConstraintString(sc.conf.get("spark.mesos.constraints", ""))
+    parseConstraintString(sc.conf.get(mesosConfig.CONSTRAINTS))
 
   // reject offers with mismatched constraints in seconds
   private val rejectOfferDurationForUnmetConstraints =
@@ -82,10 +82,10 @@ private[spark] class MesosFineGrainedSchedulerBackend(
       sc.sparkUser,
       sc.appName,
       sc.conf,
-      sc.conf.getOption("spark.mesos.driver.webui.url").orElse(sc.ui.map(_.webUrl)),
+      sc.conf.get(mesosConfig.DRIVER_WEBUI_URL).orElse(sc.ui.map(_.webUrl)),
       Option.empty,
       Option.empty,
-      sc.conf.getOption("spark.mesos.driver.frameworkId")
+      sc.conf.get(mesosConfig.DRIVER_FRAMEWORK_ID)
     )
 
     unsetFrameworkID(sc)
@@ -102,10 +102,10 @@ private[spark] class MesosFineGrainedSchedulerBackend(
   def createExecutorInfo(
       availableResources: JList[Resource],
       execId: String): (MesosExecutorInfo, JList[Resource]) = {
-    val executorSparkHome = sc.conf.getOption("spark.mesos.executor.home")
+    val executorSparkHome = sc.conf.get(mesosConfig.EXECUTOR_HOME)
       .orElse(sc.getSparkHome()) // Fall back to driver Spark home for backward compatibility
       .getOrElse {
-      throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
+      throw new SparkException(s"Executor Spark home `${mesosConfig.EXECUTOR_HOME}` is not set!")
     }
     val environment = Environment.newBuilder()
     sc.conf.get(config.EXECUTOR_CLASS_PATH).foreach { cp =>
@@ -133,7 +133,8 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
     val command = CommandInfo.newBuilder()
       .setEnvironment(environment)
-    val uri = sc.conf.get(EXECUTOR_URI).orElse(Option(System.getenv("SPARK_EXECUTOR_URI")))
+    val uri = sc.conf.get(mesosConfig.EXECUTOR_URI)
+      .orElse(Option(System.getenv("SPARK_EXECUTOR_URI")))
 
     val executorBackendName = classOf[MesosExecutorBackend].getName
     if (uri.isEmpty) {
@@ -155,7 +156,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     builder.addAllResources(usedCpuResources.asJava)
     builder.addAllResources(usedMemResources.asJava)
 
-    sc.conf.getOption("spark.mesos.uris").foreach(setupUris(_, command))
+    setupUris(sc.conf.get(mesosConfig.URIS_TO_DOWNLOAD), command)
 
     val executorInfo = builder
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
index b4364a5e2eb3..a217deb8b49d 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
@@ -24,7 +24,7 @@ import org.apache.mesos.protobuf.ByteString
 
 import org.apache.spark.SparkConf
 import org.apache.spark.SparkException
-import org.apache.spark.deploy.mesos.config.{NETWORK_LABELS, NETWORK_NAME}
+import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.deploy.mesos.config.MesosSecretConfig
 import org.apache.spark.internal.Logging
 
@@ -34,11 +34,11 @@ import org.apache.spark.internal.Logging
  */
 private[mesos] object MesosSchedulerBackendUtil extends Logging {
   /**
-   * Parse a comma-delimited list of volume specs, each of which
+   * Parse a list of volume specs, each of which
    * takes the form [host-dir:]container-dir[:rw|:ro].
    */
-  def parseVolumesSpec(volumes: String): List[Volume] = {
-    volumes.split(",").map(_.split(":")).flatMap { spec =>
+  def parseVolumesSpec(volumes: Seq[String]): List[Volume] = {
+    volumes.map(_.split(":")).flatMap { spec =>
         val vol: Volume.Builder = Volume
           .newBuilder()
           .setMode(Volume.Mode.RW)
@@ -71,7 +71,7 @@ private[mesos] object MesosSchedulerBackendUtil extends Logging {
   }
 
   /**
-   * Parse a comma-delimited list of port mapping specs, each of which
+   * Parse a list of port mapping specs, each of which
    * takes the form host_port:container_port[:udp|:tcp]
    *
    * Note:
@@ -81,8 +81,8 @@ private[mesos] object MesosSchedulerBackendUtil extends Logging {
    * anticipates the expansion of the docker form to allow for a protocol
    * and leaves open the chance for mesos to begin to accept an 'ip' field
    */
-  def parsePortMappingsSpec(portmaps: String): List[DockerInfo.PortMapping] = {
-    portmaps.split(",").map(_.split(":")).flatMap { spec: Array[String] =>
+  def parsePortMappingsSpec(portmaps: Seq[String]): List[DockerInfo.PortMapping] = {
+    portmaps.map(_.split(":")).flatMap { spec: Array[String] =>
       val portmap: DockerInfo.PortMapping.Builder = DockerInfo.PortMapping
         .newBuilder()
         .setProtocol("tcp")
@@ -108,10 +108,10 @@ private[mesos] object MesosSchedulerBackendUtil extends Logging {
    * Parse a list of docker parameters, each of which
    * takes the form key=value
    */
-  private def parseParamsSpec(params: String): List[Parameter] = {
+  private def parseParamsSpec(params: Seq[String]): List[Parameter] = {
     // split with limit of 2 to avoid parsing error when '='
     // exists in the parameter value
-    params.split(",").map(_.split("=", 2)).flatMap { spec: Array[String] =>
+    params.map(_.split("=", 2)).flatMap { spec: Array[String] =>
       val param: Parameter.Builder = Parameter.newBuilder()
       spec match {
         case Array(key, value) =>
@@ -127,8 +127,8 @@ private[mesos] object MesosSchedulerBackendUtil extends Logging {
   }
 
   def buildContainerInfo(conf: SparkConf): ContainerInfo.Builder = {
-    val containerType = if (conf.contains("spark.mesos.executor.docker.image") &&
-      conf.get("spark.mesos.containerizer", "docker") == "docker") {
+    val containerType = if (conf.contains(EXECUTOR_DOCKER_IMAGE) &&
+      conf.get(CONTAINERIZER) == "docker") {
       ContainerInfo.Type.DOCKER
     } else {
       ContainerInfo.Type.MESOS
@@ -137,18 +137,17 @@ private[mesos] object MesosSchedulerBackendUtil extends Logging {
     val containerInfo = ContainerInfo.newBuilder()
       .setType(containerType)
 
-    conf.getOption("spark.mesos.executor.docker.image").map { image =>
+    conf.get(EXECUTOR_DOCKER_IMAGE).map { image =>
       val forcePullImage = conf
-        .getOption("spark.mesos.executor.docker.forcePullImage")
-        .exists(_.equals("true"))
+        .get(EXECUTOR_DOCKER_FORCE_PULL_IMAGE).contains(true)
 
       val portMaps = conf
-        .getOption("spark.mesos.executor.docker.portmaps")
+        .get(EXECUTOR_DOCKER_PORT_MAPS)
         .map(parsePortMappingsSpec)
         .getOrElse(List.empty)
 
       val params = conf
-        .getOption("spark.mesos.executor.docker.parameters")
+        .get(EXECUTOR_DOCKER_PARAMETERS)
         .map(parseParamsSpec)
         .getOrElse(List.empty)
 
@@ -159,7 +158,7 @@ private[mesos] object MesosSchedulerBackendUtil extends Logging {
       }
 
       val volumes = conf
-        .getOption("spark.mesos.executor.docker.volumes")
+        .get(EXECUTOR_DOCKER_VOLUMES)
         .map(parseVolumesSpec)
 
       volumes.foreach(_.foreach(containerInfo.addVolumes(_)))
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 634460686bb2..06993712035f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -36,6 +36,7 @@ import org.apache.mesos.protobuf.{ByteString, GeneratedMessageV3}
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.TaskState
+import org.apache.spark.deploy.mesos.{config => mesosConfig}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{Status => _, _}
 import org.apache.spark.util.Utils
@@ -83,10 +84,10 @@ trait MesosSchedulerUtils extends Logging {
       fwInfoBuilder.setId(FrameworkID.newBuilder().setValue(id).build())
     }
 
-    conf.getOption("spark.mesos.role").foreach { role =>
+    conf.get(mesosConfig.ROLE).foreach { role =>
       fwInfoBuilder.setRole(role)
     }
-    val maxGpus = conf.getInt("spark.mesos.gpus.max", 0)
+    val maxGpus = conf.get(mesosConfig.MAX_GPUS)
     if (maxGpus > 0) {
       fwInfoBuilder.addCapabilities(Capability.newBuilder().setType(Capability.Type.GPU_RESOURCES))
     }
@@ -103,10 +104,10 @@ trait MesosSchedulerUtils extends Logging {
       conf: SparkConf,
       fwInfoBuilder: Protos.FrameworkInfo.Builder): Protos.Credential.Builder = {
     val credBuilder = Credential.newBuilder()
-    conf.getOption("spark.mesos.principal")
+    conf.get(mesosConfig.CREDENTIAL_PRINCIPAL)
       .orElse(Option(conf.getenv("SPARK_MESOS_PRINCIPAL")))
       .orElse(
-        conf.getOption("spark.mesos.principal.file")
+        conf.get(mesosConfig.CREDENTIAL_PRINCIPAL_FILE)
           .orElse(Option(conf.getenv("SPARK_MESOS_PRINCIPAL_FILE")))
           .map { principalFile =>
             Files.toString(new File(principalFile), StandardCharsets.UTF_8)
@@ -115,10 +116,10 @@ trait MesosSchedulerUtils extends Logging {
         fwInfoBuilder.setPrincipal(principal)
         credBuilder.setPrincipal(principal)
       }
-    conf.getOption("spark.mesos.secret")
+    conf.get(mesosConfig.CREDENTIAL_SECRET)
       .orElse(Option(conf.getenv("SPARK_MESOS_SECRET")))
       .orElse(
-        conf.getOption("spark.mesos.secret.file")
+        conf.get(mesosConfig.CREDENTIAL_SECRET_FILE)
          .orElse(Option(conf.getenv("SPARK_MESOS_SECRET_FILE")))
          .map { secretFile =>
            Files.toString(new File(secretFile), StandardCharsets.UTF_8)
@@ -128,7 +129,8 @@ trait MesosSchedulerUtils extends Logging {
       }
     if (credBuilder.hasSecret && !fwInfoBuilder.hasPrincipal) {
       throw new SparkException(
-        "spark.mesos.principal must be configured when spark.mesos.secret is set")
+        s"${mesosConfig.CREDENTIAL_PRINCIPAL} must be configured when " +
+          s"${mesosConfig.CREDENTIAL_SECRET} is set")
     }
     credBuilder
   }
@@ -399,37 +401,31 @@ trait MesosSchedulerUtils extends Logging {
    *         (whichever is larger)
    */
   def executorMemory(sc: SparkContext): Int = {
-    sc.conf.getInt("spark.mesos.executor.memoryOverhead",
+    sc.conf.get(mesosConfig.EXECUTOR_MEMORY_OVERHEAD).getOrElse(
       math.max(MEMORY_OVERHEAD_FRACTION * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) +
       sc.executorMemory
   }
 
-  def setupUris(uris: String,
+  def setupUris(uris: Seq[String],
                 builder: CommandInfo.Builder,
                 useFetcherCache: Boolean = false): Unit = {
-    uris.split(",").foreach { uri =>
+    uris.foreach { uri =>
       builder.addUris(CommandInfo.URI.newBuilder().setValue(uri.trim()).setCache(useFetcherCache))
     }
   }
 
-  private def getRejectOfferDurationStr(conf: SparkConf): String = {
-    conf.get("spark.mesos.rejectOfferDuration", "120s")
-  }
-
   protected def getRejectOfferDuration(conf: SparkConf): Long = {
-    Utils.timeStringAsSeconds(getRejectOfferDurationStr(conf))
+    conf.get(mesosConfig.REJECT_OFFER_DURATION)
   }
 
   protected def getRejectOfferDurationForUnmetConstraints(conf: SparkConf): Long = {
-    conf.getTimeAsSeconds(
-      "spark.mesos.rejectOfferDurationForUnmetConstraints",
-      getRejectOfferDurationStr(conf))
+    conf.get(mesosConfig.REJECT_OFFER_DURATION_FOR_UNMET_CONSTRAINTS)
+      .getOrElse(getRejectOfferDuration(conf))
   }
 
   protected def getRejectOfferDurationForReachedMaxCores(conf: SparkConf): Long = {
-    conf.getTimeAsSeconds(
-      "spark.mesos.rejectOfferDurationForReachedMaxCores",
-      getRejectOfferDurationStr(conf))
+    conf.get(mesosConfig.REJECT_OFFER_DURATION_FOR_REACHED_MAX_CORES)
+      .getOrElse(getRejectOfferDuration(conf))
   }
 
   /**
@@ -558,8 +554,8 @@ trait MesosSchedulerUtils extends Logging {
    * framework ID, the driver calls this method after the first registration.
    */
   def unsetFrameworkID(sc: SparkContext) {
-    sc.conf.remove("spark.mesos.driver.frameworkId")
-    System.clearProperty("spark.mesos.driver.frameworkId")
+    sc.conf.remove(mesosConfig.DRIVER_FRAMEWORK_ID)
+    System.clearProperty(mesosConfig.DRIVER_FRAMEWORK_ID.key)
   }
 
   def mesosToTaskState(state: MesosTaskState): TaskState.TaskState = state match {
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
index a55855428b47..e8520061ac38 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.scheduler.cluster.mesos
 
 import org.apache.spark._
+import org.apache.spark.deploy.mesos.{config => mesosConfig}
 import org.apache.spark.internal.config._
 
 class MesosClusterManagerSuite extends SparkFunSuite with LocalSparkContext {
     def testURL(masterURL: String, expectedClass: Class[_], coarse: Boolean) {
-      val conf = new SparkConf().set("spark.mesos.coarse", coarse.toString)
+      val conf = new SparkConf().set(mesosConfig.COARSE_MODE, coarse)
       sc = new SparkContext("local", "test", conf)
       val clusterManager = new MesosClusterManager()
 
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 858146b35fac..f26ff04a9a89 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -105,7 +105,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 1200, 1.5, true,
         command,
-        Map(("spark.mesos.executor.home", "test"), ("spark.app.name", "test")),
+        Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test")),
         "s1",
         new Date()))
     assert(response.success)
@@ -209,9 +209,9 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", mem, cpu, true,
         command,
-        Map("spark.mesos.executor.home" -> "test",
+        Map(config.EXECUTOR_HOME.key -> "test",
           "spark.app.name" -> "test",
-          "spark.mesos.driverEnv.TEST_ENV" -> "TEST_VAL"),
+          config.DRIVER_ENV_PREFIX + "TEST_ENV" -> "TEST_VAL"),
         "s1",
         new Date()))
     assert(response.success)
@@ -233,10 +233,10 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", mem, cpu, true,
         command,
-        Map("spark.mesos.executor.home" -> "test",
+        Map(config.EXECUTOR_HOME.key -> "test",
           "spark.app.name" -> "test",
-          "spark.mesos.network.name" -> "test-network-name",
-          "spark.mesos.network.labels" -> "key1:val1,key2:val2"),
+          config.NETWORK_NAME.key -> "test-network-name",
+          config.NETWORK_LABELS.key -> "key1:val1,key2:val2"),
         "s1",
         new Date()))
 
@@ -256,7 +256,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
   }
 
   test("supports setting fetcher cache") {
-    setScheduler(Map("spark.mesos.fetcherCache.enable" -> "true"))
+    setScheduler(Map(config.ENABLE_FETCHER_CACHE.key -> "true"))
 
     val mem = 1000
     val cpu = 1
@@ -264,7 +264,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", mem, cpu, true,
         command,
-        Map("spark.mesos.executor.home" -> "test",
+        Map(config.EXECUTOR_HOME.key -> "test",
           "spark.app.name" -> "test"),
         "s1",
         new Date()))
@@ -280,7 +280,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
   }
 
   test("supports disabling fetcher cache") {
-    setScheduler(Map("spark.mesos.fetcherCache.enable" -> "false"))
+    setScheduler(Map(config.ENABLE_FETCHER_CACHE.key -> "false"))
 
     val mem = 1000
     val cpu = 1
@@ -288,7 +288,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", mem, cpu, true,
         command,
-        Map("spark.mesos.executor.home" -> "test",
+        Map(config.EXECUTOR_HOME.key -> "test",
           "spark.app.name" -> "test"),
         "s1",
         new Date()))
@@ -321,7 +321,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       val response = scheduler.submitDriver(
         new MesosDriverDescription("d1", "jar", mem, cpu, true,
           command,
-          Map("spark.mesos.executor.home" -> "test",
+          Map(config.EXECUTOR_HOME.key -> "test",
             "spark.app.name" -> "test",
             config.DRIVER_CONSTRAINTS.key -> driverConstraints),
           "s1",
@@ -359,9 +359,9 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", mem, cpu, true,
         command,
-        Map("spark.mesos.executor.home" -> "test",
+        Map(config.EXECUTOR_HOME.key -> "test",
           "spark.app.name" -> "test",
-          "spark.mesos.driver.labels" -> "key:value"),
+          config.DRIVER_LABELS.key -> "key:value"),
         "s1",
         new Date()))
 
@@ -385,7 +385,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
 
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map(("spark.mesos.executor.home", "test"), ("spark.app.name", "test")), "s1", new Date()))
+        Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test")), "s1", new Date()))
     assert(response.success)
     val slaveId = SlaveID.newBuilder().setValue("s1").build()
     val offer = Offer.newBuilder()
@@ -471,7 +471,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       cpu,
       true,
       command,
-      Map("spark.mesos.executor.home" -> "test",
+      Map(config.EXECUTOR_HOME.key -> "test",
         "spark.app.name" -> "test") ++
         addlSparkConfVars,
       "s1",
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index a4e9cc3f5d35..37c0f5f45075 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -31,7 +31,7 @@ import org.scalatest.concurrent.ScalaFutures
 import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark.{LocalSparkContext, SecurityManager, SparkConf, SparkContext, SparkFunSuite}
-import org.apache.spark.deploy.mesos.config._
+import org.apache.spark.deploy.mesos.{config => mesosConfig}
 import org.apache.spark.internal.config._
 import org.apache.spark.network.shuffle.mesos.MesosExternalShuffleClient
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef}
@@ -59,8 +59,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("mesos supports killing and limiting executors") {
     setBackend()
-    sparkConf.set("spark.driver.host", "driverHost")
-    sparkConf.set("spark.driver.port", "1234")
+    sparkConf.set(DRIVER_HOST_ADDRESS, "driverHost")
+    sparkConf.set(DRIVER_PORT, 1234)
 
     val minMem = backend.executorMemory(sc)
     val minCpu = 4
@@ -109,7 +109,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("mesos supports spark.executor.cores") {
     val executorCores = 4
-    setBackend(Map("spark.executor.cores" -> executorCores.toString))
+    setBackend(Map(EXECUTOR_CORES.key -> executorCores.toString))
 
     val executorMemory = backend.executorMemory(sc)
     val offers = List(Resources(executorMemory * 2, executorCores + 1))
@@ -138,7 +138,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("mesos does not acquire more than spark.cores.max") {
     val maxCores = 10
-    setBackend(Map("spark.cores.max" -> maxCores.toString))
+    setBackend(Map(CORES_MAX.key -> maxCores.toString))
 
     val executorMemory = backend.executorMemory(sc)
     offerResources(List(Resources(executorMemory, maxCores + 1)))
@@ -166,7 +166,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("mesos does not acquire more than spark.mesos.gpus.max") {
     val maxGpus = 5
-    setBackend(Map("spark.mesos.gpus.max" -> maxGpus.toString))
+    setBackend(Map(mesosConfig.MAX_GPUS.key -> maxGpus.toString))
 
     val executorMemory = backend.executorMemory(sc)
     offerResources(List(Resources(executorMemory, 1, maxGpus + 1)))
@@ -180,14 +180,14 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
 
   test("mesos declines offers that violate attribute constraints") {
-    setBackend(Map("spark.mesos.constraints" -> "x:true"))
+    setBackend(Map(mesosConfig.CONSTRAINTS.key -> "x:true"))
     offerResources(List(Resources(backend.executorMemory(sc), 4)))
     verifyDeclinedOffer(driver, createOfferId("o1"), true)
   }
 
   test("mesos declines offers with a filter when reached spark.cores.max") {
     val maxCores = 3
-    setBackend(Map("spark.cores.max" -> maxCores.toString))
+    setBackend(Map(CORES_MAX.key -> maxCores.toString))
 
     val executorMemory = backend.executorMemory(sc)
     offerResources(List(
@@ -202,8 +202,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     val maxCores = 4
     val executorCores = 3
     setBackend(Map(
-      "spark.cores.max" -> maxCores.toString,
-      "spark.executor.cores" -> executorCores.toString
+      CORES_MAX.key -> maxCores.toString,
+      EXECUTOR_CORES.key -> executorCores.toString
     ))
     val executorMemory = backend.executorMemory(sc)
     offerResources(List(
@@ -218,8 +218,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     val maxCores = 4
     val executorCores = 2
     setBackend(Map(
-      "spark.cores.max" -> maxCores.toString,
-      "spark.executor.cores" -> executorCores.toString
+      CORES_MAX.key -> maxCores.toString,
+      EXECUTOR_CORES.key -> executorCores.toString
     ))
     val executorMemory = backend.executorMemory(sc)
     offerResources(List(
@@ -235,8 +235,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   test("mesos assigns tasks round-robin on offers") {
     val executorCores = 4
     val maxCores = executorCores * 2
-    setBackend(Map("spark.executor.cores" -> executorCores.toString,
-      "spark.cores.max" -> maxCores.toString))
+    setBackend(Map(EXECUTOR_CORES.key -> executorCores.toString,
+      CORES_MAX.key -> maxCores.toString))
 
     val executorMemory = backend.executorMemory(sc)
     offerResources(List(
@@ -249,7 +249,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("mesos creates multiple executors on a single slave") {
     val executorCores = 4
-    setBackend(Map("spark.executor.cores" -> executorCores.toString))
+    setBackend(Map(EXECUTOR_CORES.key -> executorCores.toString))
 
     // offer with room for two executors
     val executorMemory = backend.executorMemory(sc)
@@ -370,7 +370,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("failover timeout is set in created scheduler driver") {
     val failoverTimeoutIn = 3600.0
-    initializeSparkConf(Map(DRIVER_FAILOVER_TIMEOUT.key -> failoverTimeoutIn.toString))
+    initializeSparkConf(Map(mesosConfig.DRIVER_FAILOVER_TIMEOUT.key -> failoverTimeoutIn.toString))
     sc = new SparkContext(sparkConf)
 
     val taskScheduler = mock[TaskSchedulerImpl]
@@ -404,7 +404,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   }
 
   test("honors unset spark.mesos.containerizer") {
-    setBackend(Map("spark.mesos.executor.docker.image" -> "test"))
+    setBackend(Map(mesosConfig.EXECUTOR_DOCKER_IMAGE.key -> "test"))
 
     val (mem, cpu) = (backend.executorMemory(sc), 4)
 
@@ -417,8 +417,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("honors spark.mesos.containerizer=\"mesos\"") {
     setBackend(Map(
-      "spark.mesos.executor.docker.image" -> "test",
-      "spark.mesos.containerizer" -> "mesos"))
+      mesosConfig.EXECUTOR_DOCKER_IMAGE.key -> "test",
+      mesosConfig.CONTAINERIZER.key -> "mesos"))
 
     val (mem, cpu) = (backend.executorMemory(sc), 4)
 
@@ -431,10 +431,10 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("docker settings are reflected in created tasks") {
     setBackend(Map(
-      "spark.mesos.executor.docker.image" -> "some_image",
-      "spark.mesos.executor.docker.forcePullImage" -> "true",
-      "spark.mesos.executor.docker.volumes" -> "/host_vol:/container_vol:ro",
-      "spark.mesos.executor.docker.portmaps" -> "8080:80:tcp"
+      mesosConfig.EXECUTOR_DOCKER_IMAGE.key -> "some_image",
+      mesosConfig.EXECUTOR_DOCKER_FORCE_PULL_IMAGE.key -> "true",
+      mesosConfig.EXECUTOR_DOCKER_VOLUMES.key -> "/host_vol:/container_vol:ro",
+      mesosConfig.EXECUTOR_DOCKER_PORT_MAPS.key -> "8080:80:tcp"
     ))
 
     val (mem, cpu) = (backend.executorMemory(sc), 4)
@@ -469,7 +469,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("force-pull-image option is disabled by default") {
     setBackend(Map(
-      "spark.mesos.executor.docker.image" -> "some_image"
+      mesosConfig.EXECUTOR_DOCKER_IMAGE.key -> "some_image"
     ))
 
     val (mem, cpu) = (backend.executorMemory(sc), 4)
@@ -492,7 +492,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   test("mesos supports spark.executor.uri") {
     val url = "spark.spark.spark.com"
     setBackend(Map(
-      "spark.executor.uri" -> url
+      mesosConfig.EXECUTOR_URI.key -> url
     ), null)
 
     val (mem, cpu) = (backend.executorMemory(sc), 4)
@@ -507,8 +507,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   test("mesos supports setting fetcher cache") {
     val url = "spark.spark.spark.com"
     setBackend(Map(
-      "spark.mesos.fetcherCache.enable" -> "true",
-      "spark.executor.uri" -> url
+      mesosConfig.ENABLE_FETCHER_CACHE.key -> "true",
+      mesosConfig.EXECUTOR_URI.key -> url
     ), null)
     val offers = List(Resources(backend.executorMemory(sc), 1))
     offerResources(offers)
@@ -521,8 +521,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   test("mesos supports disabling fetcher cache") {
     val url = "spark.spark.spark.com"
     setBackend(Map(
-      "spark.mesos.fetcherCache.enable" -> "false",
-      "spark.executor.uri" -> url
+      mesosConfig.ENABLE_FETCHER_CACHE.key -> "false",
+      mesosConfig.EXECUTOR_URI.key -> url
     ), null)
     val offers = List(Resources(backend.executorMemory(sc), 1))
     offerResources(offers)
@@ -546,7 +546,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   test("mesos sets configurable labels on tasks") {
     val taskLabelsString = "mesos:test,label:test"
     setBackend(Map(
-      "spark.mesos.task.labels" -> taskLabelsString
+      mesosConfig.TASK_LABELS.key -> taskLabelsString
     ))
 
     // Build up the labels
@@ -568,8 +568,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
   test("mesos supports spark.mesos.network.name and spark.mesos.network.labels") {
     setBackend(Map(
-      "spark.mesos.network.name" -> "test-network-name",
-      "spark.mesos.network.labels" -> "key1:val1,key2:val2"
+      mesosConfig.NETWORK_NAME.key -> "test-network-name",
+      mesosConfig.NETWORK_LABELS.key -> "key1:val1,key2:val2"
     ))
 
     val (mem, cpu) = (backend.executorMemory(sc), 4)
@@ -590,7 +590,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   test("supports spark.scheduler.minRegisteredResourcesRatio") {
     val expectedCores = 1
     setBackend(Map(
-      "spark.cores.max" -> expectedCores.toString,
+      CORES_MAX.key -> expectedCores.toString,
       SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO.key -> "1.0"))
 
     val offers = List(Resources(backend.executorMemory(sc), expectedCores))
@@ -606,7 +606,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     setBackend(Map(
       DYN_ALLOCATION_ENABLED.key -> "true",
       DYN_ALLOCATION_TESTING.key -> "true",
-      "spark.locality.wait" -> "1s"))
+      LOCALITY_WAIT.key -> "1s"))
 
     assert(backend.getExecutorIds().isEmpty)
 
@@ -652,22 +652,26 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   }
 
   test("Creates an env-based reference secrets.") {
-    val launchedTasks = launchExecutorTasks(configEnvBasedRefSecrets(executorSecretConfig))
+    val launchedTasks = launchExecutorTasks(
+      configEnvBasedRefSecrets(mesosConfig.executorSecretConfig))
     verifyEnvBasedRefSecrets(launchedTasks)
   }
 
   test("Creates an env-based value secrets.") {
-    val launchedTasks = launchExecutorTasks(configEnvBasedValueSecrets(executorSecretConfig))
+    val launchedTasks = launchExecutorTasks(
+      configEnvBasedValueSecrets(mesosConfig.executorSecretConfig))
     verifyEnvBasedValueSecrets(launchedTasks)
   }
 
   test("Creates file-based reference secrets.") {
-    val launchedTasks = launchExecutorTasks(configFileBasedRefSecrets(executorSecretConfig))
+    val launchedTasks = launchExecutorTasks(
+      configFileBasedRefSecrets(mesosConfig.executorSecretConfig))
     verifyFileBasedRefSecrets(launchedTasks)
   }
 
   test("Creates a file-based value secrets.") {
-    val launchedTasks = launchExecutorTasks(configFileBasedValueSecrets(executorSecretConfig))
+    val launchedTasks = launchExecutorTasks(
+      configFileBasedValueSecrets(mesosConfig.executorSecretConfig))
     verifyFileBasedValueSecrets(launchedTasks)
   }
 
@@ -770,7 +774,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     sparkConf = (new SparkConf)
       .setMaster("local[*]")
       .setAppName("test-mesos-dynamic-alloc")
-      .set("spark.mesos.driver.webui.url", "http://webui")
+      .set(mesosConfig.DRIVER_WEBUI_URL, "http://webui")
 
     if (home != null) {
       sparkConf.setSparkHome(home)
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
index 0b6d93f62d40..3e63c35361a5 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
@@ -36,6 +36,7 @@ import org.mockito.Mockito._
 import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.executor.MesosExecutorBackend
 import org.apache.spark.scheduler.{LiveListenerBus, SparkListenerExecutorAdded,
   TaskDescription, TaskSchedulerImpl, WorkerOffer}
@@ -46,7 +47,7 @@ class MesosFineGrainedSchedulerBackendSuite
 
   test("weburi is set in created scheduler driver") {
     val conf = new SparkConf
-    conf.set("spark.mesos.driver.webui.url", "http://webui")
+    conf.set(DRIVER_WEBUI_URL, "http://webui")
     conf.set("spark.app.name", "name1")
 
     val sc = mock[SparkContext]
@@ -80,9 +81,9 @@ class MesosFineGrainedSchedulerBackendSuite
   }
 
   test("Use configured mesosExecutor.cores for ExecutorInfo") {
-    val mesosExecutorCores = 3
+    val mesosExecutorCores = 3.0
     val conf = new SparkConf
-    conf.set("spark.mesos.mesosExecutor.cores", mesosExecutorCores.toString)
+    conf.set(EXECUTOR_CORES, mesosExecutorCores)
 
     val listenerBus = mock[LiveListenerBus]
     listenerBus.post(
@@ -114,7 +115,7 @@ class MesosFineGrainedSchedulerBackendSuite
 
   test("check spark-class location correctly") {
     val conf = new SparkConf
-    conf.set("spark.mesos.executor.home", "/mesos-home")
+    conf.set(EXECUTOR_HOME, "/mesos-home")
 
     val listenerBus = mock[LiveListenerBus]
     listenerBus.post(
@@ -142,7 +143,7 @@ class MesosFineGrainedSchedulerBackendSuite
       s" /mesos-home/bin/spark-class ${classOf[MesosExecutorBackend].getName}")
 
     // uri exists.
-    conf.set("spark.executor.uri", "hdfs:///test-app-1.0.0.tgz")
+    conf.set(EXECUTOR_URI, "hdfs:///test-app-1.0.0.tgz")
     val (executorInfo1, _) = mesosSchedulerBackend.createExecutorInfo(resources, "test-id")
     assert(executorInfo1.getCommand.getValue ===
       s"cd test-app-1*;  ./bin/spark-class ${classOf[MesosExecutorBackend].getName}")
@@ -152,10 +153,10 @@ class MesosFineGrainedSchedulerBackendSuite
     val taskScheduler = mock[TaskSchedulerImpl]
 
     val conf = new SparkConf()
-      .set("spark.mesos.executor.docker.image", "spark/mock")
-      .set("spark.mesos.executor.docker.forcePullImage", "true")
-      .set("spark.mesos.executor.docker.volumes", "/a,/b:/b,/c:/c:rw,/d:ro,/e:/e:ro")
-      .set("spark.mesos.executor.docker.portmaps", "80:8080,53:53:tcp")
+      .set(EXECUTOR_DOCKER_IMAGE, "spark/mock")
+      .set(EXECUTOR_DOCKER_FORCE_PULL_IMAGE, true)
+      .set(EXECUTOR_DOCKER_VOLUMES, Seq("/a", "/b:/b", "/c:/c:rw", "/d:ro", "/e:/e:ro"))
+      .set(EXECUTOR_DOCKER_PORT_MAPS, Seq("80:8080", "53:53:tcp"))
 
     val listenerBus = mock[LiveListenerBus]
     listenerBus.post(
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtilSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtilSuite.scala
index 442c43960ec1..6b7ae900768e 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtilSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtilSuite.scala
@@ -24,8 +24,8 @@ class MesosSchedulerBackendUtilSuite extends SparkFunSuite {
 
   test("ContainerInfo fails to parse invalid docker parameters") {
     val conf = new SparkConf()
-    conf.set("spark.mesos.executor.docker.parameters", "a,b")
-    conf.set("spark.mesos.executor.docker.image", "test")
+    conf.set(config.EXECUTOR_DOCKER_PARAMETERS, Seq("a", "b"))
+    conf.set(config.EXECUTOR_DOCKER_IMAGE, "test")
 
     val containerInfo = MesosSchedulerBackendUtil.buildContainerInfo(
       conf)
@@ -36,8 +36,8 @@ class MesosSchedulerBackendUtilSuite extends SparkFunSuite {
 
   test("ContainerInfo parses docker parameters") {
     val conf = new SparkConf()
-    conf.set("spark.mesos.executor.docker.parameters", "a=1,b=2,c=3")
-    conf.set("spark.mesos.executor.docker.image", "test")
+    conf.set(config.EXECUTOR_DOCKER_PARAMETERS, Seq("a=1", "b=2", "c=3"))
+    conf.set(config.EXECUTOR_DOCKER_IMAGE, "test")
 
     val containerInfo = MesosSchedulerBackendUtil.buildContainerInfo(
       conf)
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
index 8d90e1a8591a..dfbdae18f23c 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
@@ -29,6 +29,7 @@ import org.scalatest._
 import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkFunSuite}
+import org.apache.spark.deploy.mesos.{config => mesosConfig}
 import org.apache.spark.internal.config._
 import org.apache.spark.util.SparkConfWithEnv
 
@@ -94,7 +95,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
   test("use spark.mesos.executor.memoryOverhead (if set)") {
     val f = fixture
     when(f.sc.executorMemory).thenReturn(1024)
-    f.sparkConf.set("spark.mesos.executor.memoryOverhead", "512")
+    f.sparkConf.set(mesosConfig.EXECUTOR_MEMORY_OVERHEAD, 512)
     utils.executorMemory(f.sc) shouldBe 1536
   }
 
@@ -215,7 +216,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   test("Port reservation is done correctly with user specified ports only - multiple ranges") {
     val conf = new SparkConf()
-    conf.set("spark.blockManager.port", "4000")
+    conf.set(BLOCK_MANAGER_PORT, 4000)
     val portResourceList = List(createTestPortResource((3000, 5000), Some("my_role")),
       createTestPortResource((2000, 2500), Some("other_role")))
     val (resourcesLeft, resourcesToBeUsed) = utils
@@ -244,7 +245,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   test("Principal specified via spark.mesos.principal") {
     val conf = new SparkConf()
-    conf.set("spark.mesos.principal", "test-principal")
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
     credBuilder.hasPrincipal shouldBe true
@@ -256,7 +257,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
     pFile.deleteOnExit()
     Files.write("test-principal".getBytes("UTF-8"), pFile);
     val conf = new SparkConf()
-    conf.set("spark.mesos.principal.file", pFile.getAbsolutePath())
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL_FILE, pFile.getAbsolutePath())
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
     credBuilder.hasPrincipal shouldBe true
@@ -265,7 +266,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   test("Principal specified via spark.mesos.principal.file that does not exist") {
     val conf = new SparkConf()
-    conf.set("spark.mesos.principal.file", "/tmp/does-not-exist")
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL_FILE, "/tmp/does-not-exist")
 
     intercept[FileNotFoundException] {
       utils.buildCredentials(conf, FrameworkInfo.newBuilder())
@@ -301,8 +302,8 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   test("Secret specified via spark.mesos.secret") {
     val conf = new SparkConf()
-    conf.set("spark.mesos.principal", "test-principal")
-    conf.set("spark.mesos.secret", "my-secret")
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
+    conf.set(mesosConfig.CREDENTIAL_SECRET, "my-secret")
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
     credBuilder.hasPrincipal shouldBe true
@@ -316,8 +317,8 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
     sFile.deleteOnExit()
     Files.write("my-secret".getBytes("UTF-8"), sFile);
     val conf = new SparkConf()
-    conf.set("spark.mesos.principal", "test-principal")
-    conf.set("spark.mesos.secret.file", sFile.getAbsolutePath())
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
+    conf.set(mesosConfig.CREDENTIAL_SECRET_FILE, sFile.getAbsolutePath())
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
     credBuilder.hasPrincipal shouldBe true
@@ -328,8 +329,8 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   test("Principal specified via spark.mesos.secret.file that does not exist") {
     val conf = new SparkConf()
-    conf.set("spark.mesos.principal", "test-principal")
-    conf.set("spark.mesos.secret.file", "/tmp/does-not-exist")
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
+    conf.set(mesosConfig.CREDENTIAL_SECRET_FILE, "/tmp/does-not-exist")
 
     intercept[FileNotFoundException] {
       utils.buildCredentials(conf, FrameworkInfo.newBuilder())
@@ -339,7 +340,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
   test("Principal specified via SPARK_MESOS_SECRET") {
     val env = Map("SPARK_MESOS_SECRET" -> "my-secret")
     val conf = new SparkConfWithEnv(env)
-    conf.set("spark.mesos.principal", "test-principal")
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
     credBuilder.hasPrincipal shouldBe true
@@ -356,7 +357,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
     val sFilePath = sFile.getAbsolutePath()
     val env = Map("SPARK_MESOS_SECRET_FILE" -> sFilePath)
     val conf = new SparkConfWithEnv(env)
-    conf.set("spark.mesos.principal", "test-principal")
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
     credBuilder.hasPrincipal shouldBe true
@@ -367,7 +368,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   test("Secret specified with no principal") {
     val conf = new SparkConf()
-    conf.set("spark.mesos.secret", "my-secret")
+    conf.set(mesosConfig.CREDENTIAL_SECRET, "my-secret")
 
     intercept[SparkException] {
       utils.buildCredentials(conf, FrameworkInfo.newBuilder())
@@ -376,7 +377,7 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   test("Principal specification preference") {
     val conf = new SparkConfWithEnv(Map("SPARK_MESOS_PRINCIPAL" -> "other-principal"))
-    conf.set("spark.mesos.principal", "test-principal")
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
     credBuilder.hasPrincipal shouldBe true
@@ -385,8 +386,8 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   test("Secret specification preference") {
     val conf = new SparkConfWithEnv(Map("SPARK_MESOS_SECRET" -> "other-secret"))
-    conf.set("spark.mesos.principal", "test-principal")
-    conf.set("spark.mesos.secret", "my-secret")
+    conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
+    conf.set(mesosConfig.CREDENTIAL_SECRET, "my-secret")
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
     credBuilder.hasPrincipal shouldBe true

From 9c6efd0427b9268dafc901fbdecf8a6dec738654 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 12 Feb 2019 10:58:00 +0800
Subject: [PATCH 0447/1072] [SPARK-26740][SPARK-26654][SQL] Make statistics of
 timestamp/date columns independent from system time zones

## What changes were proposed in this pull request?

In the PR, I propose to covert underlying types of timestamp/date columns to strings, and store the converted values as column statistics. This makes statistics for timestamp/date columns independent from system time zone while saving and retrieving such statistics.

I bumped versions of stored statistics from 1 to 2 since the PR changes the format.

## How was this patch tested?

The changes were tested by `StatisticsCollectionSuite` and by `StatisticsSuite`.

Closes #23662 from MaxGekk/column-stats-time-date.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/interface.scala      | 36 +++++++----
 .../catalyst/plans/logical/Statistics.scala   |  7 ++-
 .../spark/sql/StatisticsCollectionSuite.scala | 43 +++++++++++++
 .../sql/StatisticsCollectionTestBase.scala    | 62 ++++++++++---------
 4 files changed, 105 insertions(+), 43 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 817abebd72ac..69b5cb4512e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -415,7 +415,8 @@ case class CatalogColumnStat(
     nullCount: Option[BigInt] = None,
     avgLen: Option[Long] = None,
     maxLen: Option[Long] = None,
-    histogram: Option[Histogram] = None) {
+    histogram: Option[Histogram] = None,
+    version: Int = CatalogColumnStat.VERSION) {
 
   /**
    * Returns a map from string to string that can be used to serialize the column stats.
@@ -429,7 +430,7 @@ case class CatalogColumnStat(
    */
   def toMap(colName: String): Map[String, String] = {
     val map = new scala.collection.mutable.HashMap[String, String]
-    map.put(s"${colName}.${CatalogColumnStat.KEY_VERSION}", "1")
+    map.put(s"${colName}.${CatalogColumnStat.KEY_VERSION}", CatalogColumnStat.VERSION.toString)
     distinctCount.foreach { v =>
       map.put(s"${colName}.${CatalogColumnStat.KEY_DISTINCT_COUNT}", v.toString)
     }
@@ -452,12 +453,13 @@ case class CatalogColumnStat(
       dataType: DataType): ColumnStat =
     ColumnStat(
       distinctCount = distinctCount,
-      min = min.map(CatalogColumnStat.fromExternalString(_, colName, dataType)),
-      max = max.map(CatalogColumnStat.fromExternalString(_, colName, dataType)),
+      min = min.map(CatalogColumnStat.fromExternalString(_, colName, dataType, version)),
+      max = max.map(CatalogColumnStat.fromExternalString(_, colName, dataType, version)),
       nullCount = nullCount,
       avgLen = avgLen,
       maxLen = maxLen,
-      histogram = histogram)
+      histogram = histogram,
+      version = version)
 }
 
 object CatalogColumnStat extends Logging {
@@ -472,14 +474,23 @@ object CatalogColumnStat extends Logging {
   private val KEY_MAX_LEN = "maxLen"
   private val KEY_HISTOGRAM = "histogram"
 
+  val VERSION = 2
+
+  private def getTimestampFormatter(): TimestampFormatter = {
+    TimestampFormatter(format = "yyyy-MM-dd HH:mm:ss.SSSSSS", timeZone = DateTimeUtils.TimeZoneUTC)
+  }
+
   /**
    * Converts from string representation of data type to the corresponding Catalyst data type.
    */
-  def fromExternalString(s: String, name: String, dataType: DataType): Any = {
+  def fromExternalString(s: String, name: String, dataType: DataType, version: Int): Any = {
     dataType match {
       case BooleanType => s.toBoolean
-      case DateType => DateTimeUtils.fromJavaDate(java.sql.Date.valueOf(s))
-      case TimestampType => DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(s))
+      case DateType if version == 1 => DateTimeUtils.fromJavaDate(java.sql.Date.valueOf(s))
+      case DateType => DateFormatter().parse(s)
+      case TimestampType if version == 1 =>
+        DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(s))
+      case TimestampType => getTimestampFormatter().parse(s)
       case ByteType => s.toByte
       case ShortType => s.toShort
       case IntegerType => s.toInt
@@ -501,8 +512,8 @@ object CatalogColumnStat extends Logging {
    */
   def toExternalString(v: Any, colName: String, dataType: DataType): String = {
     val externalValue = dataType match {
-      case DateType => DateTimeUtils.toJavaDate(v.asInstanceOf[Int])
-      case TimestampType => DateTimeUtils.toJavaTimestamp(v.asInstanceOf[Long])
+      case DateType => DateFormatter().format(v.asInstanceOf[Int])
+      case TimestampType => getTimestampFormatter().format(v.asInstanceOf[Long])
       case BooleanType | _: IntegralType | FloatType | DoubleType => v
       case _: DecimalType => v.asInstanceOf[Decimal].toJavaBigDecimal
       // This version of Spark does not use min/max for binary/string types so we ignore it.
@@ -532,7 +543,8 @@ object CatalogColumnStat extends Logging {
         nullCount = map.get(s"${colName}.${KEY_NULL_COUNT}").map(v => BigInt(v.toLong)),
         avgLen = map.get(s"${colName}.${KEY_AVG_LEN}").map(_.toLong),
         maxLen = map.get(s"${colName}.${KEY_MAX_LEN}").map(_.toLong),
-        histogram = map.get(s"${colName}.${KEY_HISTOGRAM}").map(HistogramSerializer.deserialize)
+        histogram = map.get(s"${colName}.${KEY_HISTOGRAM}").map(HistogramSerializer.deserialize),
+        version = map(s"${colName}.${KEY_VERSION}").toInt
       ))
     } catch {
       case NonFatal(e) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index 5a388117a6c0..c008d776e479 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -90,6 +90,7 @@ case class Statistics(
  * @param avgLen average length of the values. For fixed-length types, this should be a constant.
  * @param maxLen maximum length of the values. For fixed-length types, this should be a constant.
  * @param histogram histogram of the values
+ * @param version version of statistics saved to or retrieved from the catalog
  */
 case class ColumnStat(
     distinctCount: Option[BigInt] = None,
@@ -98,7 +99,8 @@ case class ColumnStat(
     nullCount: Option[BigInt] = None,
     avgLen: Option[Long] = None,
     maxLen: Option[Long] = None,
-    histogram: Option[Histogram] = None) {
+    histogram: Option[Histogram] = None,
+    version: Int = CatalogColumnStat.VERSION) {
 
   // Are distinctCount and nullCount statistics defined?
   val hasCountStats = distinctCount.isDefined && nullCount.isDefined
@@ -117,7 +119,8 @@ case class ColumnStat(
       nullCount = nullCount,
       avgLen = avgLen,
       maxLen = maxLen,
-      histogram = histogram)
+      histogram = histogram,
+      version = version)
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 99842680cedf..7ba9f9ff7aa3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -18,12 +18,15 @@
 package org.apache.spark.sql
 
 import java.io.File
+import java.util.TimeZone
+import java.util.concurrent.TimeUnit
 
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData.ArrayData
@@ -427,4 +430,44 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       }
     }
   }
+
+  test("store and retrieve column stats in different time zones") {
+    val (start, end) = (0, TimeUnit.DAYS.toSeconds(2))
+
+    def checkTimestampStats(
+        t: DataType,
+        srcTimeZone: TimeZone,
+        dstTimeZone: TimeZone)(checker: ColumnStat => Unit): Unit = {
+      val table = "time_table"
+      val column = "T"
+      val original = TimeZone.getDefault
+      try {
+        withTable(table) {
+          TimeZone.setDefault(srcTimeZone)
+          spark.range(start, end)
+            .select('id.cast(TimestampType).cast(t).as(column))
+            .write.saveAsTable(table)
+          sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS $column")
+
+          TimeZone.setDefault(dstTimeZone)
+          val stats = getCatalogTable(table)
+            .stats.get.colStats(column).toPlanStat(column, t)
+          checker(stats)
+        }
+      } finally {
+        TimeZone.setDefault(original)
+      }
+    }
+
+    DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
+      checkTimestampStats(DateType, DateTimeUtils.TimeZoneUTC, timeZone) { stats =>
+        assert(stats.min.get.asInstanceOf[Int] == TimeUnit.SECONDS.toDays(start))
+        assert(stats.max.get.asInstanceOf[Int] == TimeUnit.SECONDS.toDays(end - 1))
+      }
+      checkTimestampStats(TimestampType, DateTimeUtils.TimeZoneUTC, timeZone) { stats =>
+        assert(stats.min.get.asInstanceOf[Long] == TimeUnit.SECONDS.toMicros(start))
+        assert(stats.max.get.asInstanceOf[Long] == TimeUnit.SECONDS.toMicros(end - 1))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
index bf4abb6e625c..346fb765fd3d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.{lang => jl}
 import java.sql.{Date, Timestamp}
+import java.util.concurrent.TimeUnit
 
 import scala.collection.mutable
 import scala.util.Random
@@ -26,12 +27,10 @@ import scala.util.Random
 import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTable, HiveTableRelation}
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Histogram, HistogramBin, HistogramSerializer, LogicalPlan}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.sql.types.Decimal
-
 
 /**
  * The base for statistics test cases that we want to include in both the hive module (for
@@ -42,14 +41,18 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
 
   private val dec1 = new java.math.BigDecimal("1.000000000000000000")
   private val dec2 = new java.math.BigDecimal("8.000000000000000000")
-  private val d1 = Date.valueOf("2016-05-08")
-  private val d2 = Date.valueOf("2016-05-09")
-  private val t1 = Timestamp.valueOf("2016-05-08 00:00:01")
-  private val t2 = Timestamp.valueOf("2016-05-09 00:00:02")
-  private val d1Internal = DateTimeUtils.fromJavaDate(d1)
-  private val d2Internal = DateTimeUtils.fromJavaDate(d2)
-  private val t1Internal = DateTimeUtils.fromJavaTimestamp(t1)
-  private val t2Internal = DateTimeUtils.fromJavaTimestamp(t2)
+  private val d1Str = "2016-05-08"
+  private val d1Internal = days(2016, 5, 8)
+  private val d1 = Date.valueOf(d1Str)
+  private val d2Str = "2016-05-09"
+  private val d2Internal = days(2016, 5, 9)
+  private val d2 = Date.valueOf(d2Str)
+  private val t1Str = "2016-05-08 00:00:01.000000"
+  private val t1Internal = date(2016, 5, 8, 0, 0, 1)
+  private val t1 = new Timestamp(TimeUnit.MICROSECONDS.toMillis(t1Internal))
+  private val t2Str = "2016-05-09 00:00:02.000000"
+  private val t2Internal = date(2016, 5, 9, 0, 0, 2)
+  private val t2 = new Timestamp(TimeUnit.MICROSECONDS.toMillis(t2Internal))
 
   /**
    * Define a very simple 3 row table used for testing column serialization.
@@ -78,10 +81,10 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
       Some(16), Some(16)),
     "cstring" -> CatalogColumnStat(Some(2), None, None, Some(1), Some(3), Some(3)),
     "cbinary" -> CatalogColumnStat(Some(2), None, None, Some(1), Some(3), Some(3)),
-    "cdate" -> CatalogColumnStat(Some(2), Some(d1.toString), Some(d2.toString), Some(1), Some(4),
-      Some(4)),
-    "ctimestamp" -> CatalogColumnStat(Some(2), Some(t1.toString), Some(t2.toString), Some(1),
-      Some(8), Some(8))
+    "cdate" -> CatalogColumnStat(Some(2), Some(d1Str), Some(d2Str),
+      Some(1), Some(4), Some(4)),
+    "ctimestamp" -> CatalogColumnStat(Some(2), Some(t1Str),
+      Some(t2Str), Some(1), Some(8), Some(8))
   )
 
   /**
@@ -113,87 +116,88 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
     colStats
   }
 
+  private val strVersion = CatalogColumnStat.VERSION.toString
   val expectedSerializedColStats = Map(
     "spark.sql.statistics.colStats.cbinary.avgLen" -> "3",
     "spark.sql.statistics.colStats.cbinary.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cbinary.maxLen" -> "3",
     "spark.sql.statistics.colStats.cbinary.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cbinary.version" -> "1",
+    "spark.sql.statistics.colStats.cbinary.version" -> strVersion,
     "spark.sql.statistics.colStats.cbool.avgLen" -> "1",
     "spark.sql.statistics.colStats.cbool.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cbool.max" -> "true",
     "spark.sql.statistics.colStats.cbool.maxLen" -> "1",
     "spark.sql.statistics.colStats.cbool.min" -> "false",
     "spark.sql.statistics.colStats.cbool.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cbool.version" -> "1",
+    "spark.sql.statistics.colStats.cbool.version" -> strVersion,
     "spark.sql.statistics.colStats.cbyte.avgLen" -> "1",
     "spark.sql.statistics.colStats.cbyte.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cbyte.max" -> "2",
     "spark.sql.statistics.colStats.cbyte.maxLen" -> "1",
     "spark.sql.statistics.colStats.cbyte.min" -> "1",
     "spark.sql.statistics.colStats.cbyte.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cbyte.version" -> "1",
+    "spark.sql.statistics.colStats.cbyte.version" -> strVersion,
     "spark.sql.statistics.colStats.cdate.avgLen" -> "4",
     "spark.sql.statistics.colStats.cdate.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cdate.max" -> "2016-05-09",
     "spark.sql.statistics.colStats.cdate.maxLen" -> "4",
     "spark.sql.statistics.colStats.cdate.min" -> "2016-05-08",
     "spark.sql.statistics.colStats.cdate.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cdate.version" -> "1",
+    "spark.sql.statistics.colStats.cdate.version" -> strVersion,
     "spark.sql.statistics.colStats.cdecimal.avgLen" -> "16",
     "spark.sql.statistics.colStats.cdecimal.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cdecimal.max" -> "8.000000000000000000",
     "spark.sql.statistics.colStats.cdecimal.maxLen" -> "16",
     "spark.sql.statistics.colStats.cdecimal.min" -> "1.000000000000000000",
     "spark.sql.statistics.colStats.cdecimal.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cdecimal.version" -> "1",
+    "spark.sql.statistics.colStats.cdecimal.version" -> strVersion,
     "spark.sql.statistics.colStats.cdouble.avgLen" -> "8",
     "spark.sql.statistics.colStats.cdouble.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cdouble.max" -> "6.0",
     "spark.sql.statistics.colStats.cdouble.maxLen" -> "8",
     "spark.sql.statistics.colStats.cdouble.min" -> "1.0",
     "spark.sql.statistics.colStats.cdouble.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cdouble.version" -> "1",
+    "spark.sql.statistics.colStats.cdouble.version" -> strVersion,
     "spark.sql.statistics.colStats.cfloat.avgLen" -> "4",
     "spark.sql.statistics.colStats.cfloat.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cfloat.max" -> "7.0",
     "spark.sql.statistics.colStats.cfloat.maxLen" -> "4",
     "spark.sql.statistics.colStats.cfloat.min" -> "1.0",
     "spark.sql.statistics.colStats.cfloat.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cfloat.version" -> "1",
+    "spark.sql.statistics.colStats.cfloat.version" -> strVersion,
     "spark.sql.statistics.colStats.cint.avgLen" -> "4",
     "spark.sql.statistics.colStats.cint.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cint.max" -> "4",
     "spark.sql.statistics.colStats.cint.maxLen" -> "4",
     "spark.sql.statistics.colStats.cint.min" -> "1",
     "spark.sql.statistics.colStats.cint.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cint.version" -> "1",
+    "spark.sql.statistics.colStats.cint.version" -> strVersion,
     "spark.sql.statistics.colStats.clong.avgLen" -> "8",
     "spark.sql.statistics.colStats.clong.distinctCount" -> "2",
     "spark.sql.statistics.colStats.clong.max" -> "5",
     "spark.sql.statistics.colStats.clong.maxLen" -> "8",
     "spark.sql.statistics.colStats.clong.min" -> "1",
     "spark.sql.statistics.colStats.clong.nullCount" -> "1",
-    "spark.sql.statistics.colStats.clong.version" -> "1",
+    "spark.sql.statistics.colStats.clong.version" -> strVersion,
     "spark.sql.statistics.colStats.cshort.avgLen" -> "2",
     "spark.sql.statistics.colStats.cshort.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cshort.max" -> "3",
     "spark.sql.statistics.colStats.cshort.maxLen" -> "2",
     "spark.sql.statistics.colStats.cshort.min" -> "1",
     "spark.sql.statistics.colStats.cshort.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cshort.version" -> "1",
+    "spark.sql.statistics.colStats.cshort.version" -> strVersion,
     "spark.sql.statistics.colStats.cstring.avgLen" -> "3",
     "spark.sql.statistics.colStats.cstring.distinctCount" -> "2",
     "spark.sql.statistics.colStats.cstring.maxLen" -> "3",
     "spark.sql.statistics.colStats.cstring.nullCount" -> "1",
-    "spark.sql.statistics.colStats.cstring.version" -> "1",
+    "spark.sql.statistics.colStats.cstring.version" -> strVersion,
     "spark.sql.statistics.colStats.ctimestamp.avgLen" -> "8",
     "spark.sql.statistics.colStats.ctimestamp.distinctCount" -> "2",
-    "spark.sql.statistics.colStats.ctimestamp.max" -> "2016-05-09 00:00:02.0",
+    "spark.sql.statistics.colStats.ctimestamp.max" -> "2016-05-09 00:00:02.000000",
     "spark.sql.statistics.colStats.ctimestamp.maxLen" -> "8",
-    "spark.sql.statistics.colStats.ctimestamp.min" -> "2016-05-08 00:00:01.0",
+    "spark.sql.statistics.colStats.ctimestamp.min" -> "2016-05-08 00:00:01.000000",
     "spark.sql.statistics.colStats.ctimestamp.nullCount" -> "1",
-    "spark.sql.statistics.colStats.ctimestamp.version" -> "1"
+    "spark.sql.statistics.colStats.ctimestamp.version" -> strVersion
   )
 
   val expectedSerializedHistograms = Map(

From b34b4c59b4bcfa2c6007aa36e705d55af961d13f Mon Sep 17 00:00:00 2001
From: Simeon Simeonov <sim@fastignite.com>
Date: Tue, 12 Feb 2019 11:04:26 +0800
Subject: [PATCH 0448/1072] [SPARK-26696][SQL] Makes Dataset encoder public

## What changes were proposed in this pull request?

Implements the solution proposed in [SPARK-26696](https://issues.apache.org/jira/browse/SPARK-26696), a minor refactoring that allows frameworks to perform advanced type-preserving dataset transformations without carrying `Encoder` implicits from user code.

The change allows

```scala
def foo[A](ds: Dataset[A]): Dataset[A] =
  ds.toDF().as[A](ds.encoder)
```

instead of

```scala
def foo[A: Encoder](ds: Dataset[A]): Dataset[A] =
  ds.toDF().as[A](implicitly[Encoder[A]])
```

## How was this patch tested?

This patch was tested with an automated test that was later removed as it was deemed unnecessary per the discussion in this PR.

Closes #23620 from ssimeonov/ss_SPARK-26696.

Authored-by: Simeon Simeonov <sim@fastignite.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 32f623439678..8a26152271a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -178,7 +178,7 @@ private[sql] object Dataset {
 class Dataset[T] private[sql](
     @transient val sparkSession: SparkSession,
     @DeveloperApi @Unstable @transient val queryExecution: QueryExecution,
-    encoder: Encoder[T])
+    @DeveloperApi @Unstable @transient val encoder: Encoder[T])
   extends Serializable {
 
   queryExecution.assertAnalyzed()

From 5864e8e47496c3a841b97632e5137de87a91efea Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 12 Feb 2019 12:16:33 +0800
Subject: [PATCH 0449/1072] [SPARK-25158][SQL] Executor accidentally exit
 because ScriptTransformationWriterThread throw Exception.

## What changes were proposed in this pull request?

Run Spark-Sql job use transform features(`ScriptTransformationExec`) with config `spark.speculation = true`, sometimes job fails and we found many Executor Dead through `Executor Tab`, through analysis log and code we found :

`ScriptTransformationExec` start a new thread(`ScriptTransformationWriterThread`), the new thread is very likely to throw `TaskKilledException`(from iter.map.foreach part) when speculation is on, this exception will captured by `SparkUncaughtExceptionHandler` which registered during Executor start, `SparkUncaughtExceptionHandler` will call `System.exit (SparkExitCode.UNCAUGHT_EXCEPTION)` to shutdown `Executor`, this is unexpected.

We should not kill the executor just because `ScriptTransformationWriterThread` fails. log the error(not only `TaskKilledException`) instead of throwing it is enough, Exception already pass to `ScriptTransformationExec` and handle by `TaskRunner`.

## How was this patch tested?

Register `TestUncaughtExceptionHandler` to test case in `ScriptTransformationSuite`, then assert there is no Uncaught Exception handled.

Before this patch "script transformation should not swallow errors from upstream operators (no serde)" and "script transformation should not swallow errors from upstream operators (with serde)"  throwing `IllegalArgumentException` and handle by `TestUncaughtExceptionHandler` .

Closes #22149 from LuciferYang/fix-transformation-task-kill.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/ScriptTransformationExec.scala  |  5 ++-
 .../sql/hive/execution/SQLQuerySuite.scala    | 33 ++++++++++++++++++-
 .../execution/ScriptTransformationSuite.scala | 32 +++++++++++++++++-
 .../TestUncaughtExceptionHandler.scala        | 31 +++++++++++++++++
 4 files changed, 98 insertions(+), 3 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/TestUncaughtExceptionHandler.scala

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
index 7b35a5f920ae..905cb5272d01 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
@@ -308,12 +308,15 @@ private class ScriptTransformationWriterThread(
       }
       threwException = false
     } catch {
+      // SPARK-25158 Exception should not be thrown again, otherwise it will be captured by
+      // SparkUncaughtExceptionHandler, then Executor will exit because of this Uncaught Exception,
+      // so pass the exception to `ScriptTransformationExec` is enough.
       case t: Throwable =>
         // An error occurred while writing input, so kill the child process. According to the
         // Javadoc this call will not throw an exception:
         _exception = t
         proc.destroy()
-        throw t
+        logError("Thread-ScriptTransformation-Feed exit cause by: ", t)
     } finally {
       try {
         Utils.tryLogNonFatalError(outputStream.close())
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index d506edc0cf08..ce7661a91455 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -26,7 +26,7 @@ import java.util.{Locale, Set}
 import com.google.common.io.Files
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.TestUtils
+import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, FunctionRegistry}
@@ -2348,4 +2348,35 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
+  test("SPARK-25158: " +
+    "Executor accidentally exit because ScriptTransformationWriterThread throw Exception") {
+    withTempView("test") {
+      val defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler
+      try {
+        val uncaughtExceptionHandler = new TestUncaughtExceptionHandler
+        Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler)
+
+        // Use a bad udf to generate failed inputs.
+        val badUDF = org.apache.spark.sql.functions.udf((x: Int) => {
+          if (x < 1) x
+          else throw new RuntimeException("Failed to produce data.")
+        })
+        spark
+          .range(5)
+          .select(badUDF('id).as("a"))
+          .createOrReplaceTempView("test")
+        val scriptFilePath = getTestResourcePath("data")
+        val e = intercept[SparkException] {
+          sql(
+            s"""FROM test SELECT TRANSFORM(a)
+               |USING 'python $scriptFilePath/scripts/test_transform.py "\t"'
+             """.stripMargin).collect()
+        }
+        assert(e.getMessage.contains("Failed to produce data."))
+        assert(uncaughtExceptionHandler.exception.isEmpty)
+      } finally {
+        Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler)
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala
index 5f73b7170c61..ed3b376f6eda 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+import org.scalatest.BeforeAndAfterEach
 import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.{SparkException, TaskContext, TestUtils}
@@ -29,7 +30,8 @@ import org.apache.spark.sql.execution.{SparkPlan, SparkPlanTest, UnaryExecNode}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.types.StringType
 
-class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton {
+class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton with
+  BeforeAndAfterEach {
   import spark.implicits._
 
   private val noSerdeIOSchema = HiveScriptIOSchema(
@@ -49,6 +51,26 @@ class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton {
     outputSerdeClass = Some(classOf[LazySimpleSerDe].getCanonicalName)
   )
 
+  private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _
+
+  private val uncaughtExceptionHandler = new TestUncaughtExceptionHandler
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler
+    Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler)
+  }
+
+  protected override def afterAll(): Unit = {
+    super.afterAll()
+    Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler)
+  }
+
+  override protected def afterEach(): Unit = {
+    super.afterEach()
+    uncaughtExceptionHandler.cleanStatus()
+  }
+
   test("cat without SerDe") {
     assume(TestUtils.testCommandAvailable("/bin/bash"))
 
@@ -63,6 +85,7 @@ class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton {
         ioschema = noSerdeIOSchema
       ),
       rowsDf.collect())
+    assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 
   test("cat with LazySimpleSerDe") {
@@ -79,6 +102,7 @@ class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton {
         ioschema = serdeIOSchema
       ),
       rowsDf.collect())
+    assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 
   test("script transformation should not swallow errors from upstream operators (no serde)") {
@@ -98,6 +122,8 @@ class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton {
         rowsDf.collect())
     }
     assert(e.getMessage().contains("intentional exception"))
+    // Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException
+    assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 
   test("script transformation should not swallow errors from upstream operators (with serde)") {
@@ -117,6 +143,8 @@ class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton {
         rowsDf.collect())
     }
     assert(e.getMessage().contains("intentional exception"))
+    // Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException
+    assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 
   test("SPARK-14400 script transformation should fail for bad script command") {
@@ -135,6 +163,7 @@ class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton {
       SparkPlanTest.executePlan(plan, hiveContext)
     }
     assert(e.getMessage.contains("Subprocess exited with status"))
+    assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 
   test("SPARK-24339 verify the result after pruning the unused columns") {
@@ -154,6 +183,7 @@ class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton {
         ioschema = serdeIOSchema
       ),
       rowsDf.select("name").collect())
+    assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/TestUncaughtExceptionHandler.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/TestUncaughtExceptionHandler.scala
new file mode 100644
index 000000000000..681eb4e255db
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/TestUncaughtExceptionHandler.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+class TestUncaughtExceptionHandler extends Thread.UncaughtExceptionHandler {
+
+  @volatile private var _exception: Throwable = _
+
+  def exception: Option[Throwable] = Option(_exception)
+
+  def cleanStatus(): Unit = _exception = null
+
+  override def uncaughtException(t: Thread, e: Throwable): Unit = {
+    _exception = e
+  }
+}

From 5a7403623d0525c23ab8ae575e9d1383e3e10635 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Mon, 11 Feb 2019 23:24:54 -0800
Subject: [PATCH 0450/1072] [SPARK-26853][SQL] Add example and version for
 commonly used aggregate function descriptions

## What changes were proposed in this pull request?
This improves the expression description for commonly used aggregate functions such as Max, Min, Count, etc.

## How was this patch tested?
Verified the function description manually from the shell.

Closes #23756 from dilipbiswal/dkb_expr_description_aggregate.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../aggregate/ApproximatePercentile.scala     |  3 +-
 .../expressions/aggregate/Average.scala       | 10 +++-
 .../aggregate/CentralMomentAgg.scala          | 52 ++++++++++++++++---
 .../catalyst/expressions/aggregate/Corr.scala |  8 ++-
 .../expressions/aggregate/Count.scala         | 12 ++++-
 .../aggregate/CountMinSketchAgg.scala         |  3 +-
 .../expressions/aggregate/Covariance.scala    | 16 +++++-
 .../expressions/aggregate/First.scala         | 13 ++++-
 .../aggregate/HyperLogLogPlusPlus.scala       |  9 +++-
 .../catalyst/expressions/aggregate/Last.scala | 13 ++++-
 .../catalyst/expressions/aggregate/Max.scala  |  8 ++-
 .../catalyst/expressions/aggregate/Min.scala  |  8 ++-
 .../expressions/aggregate/Percentile.scala    | 10 +++-
 .../catalyst/expressions/aggregate/Sum.scala  | 12 ++++-
 .../aggregate/UnevaluableAggs.scala           | 27 ++++++++++
 .../expressions/aggregate/collect.scala       | 16 +++++-
 16 files changed, 195 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index c790d87492c7..ea0ed2e8fa11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -64,7 +64,8 @@ import org.apache.spark.sql.types._
        [10.0,10.0,10.0]
       > SELECT _FUNC_(10.0, 0.5, 100);
        10.0
-  """)
+  """,
+  since = "2.1.0")
 case class ApproximatePercentile(
     child: Expression,
     percentageExpression: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
index 8dd80dc06ab2..66ac73087b4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
@@ -24,7 +24,15 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the mean calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the mean calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col);
+       2.0
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (NULL) AS tab(col);
+       1.5
+  """,
+  since = "1.0.0")
 case class Average(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes {
 
   override def prettyName: String = "avg"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index e2ff0efba07c..1870c58c548c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -135,7 +135,13 @@ abstract class CentralMomentAgg(child: Expression)
 // Compute the population standard deviation of a column
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the population standard deviation calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the population standard deviation calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col);
+       0.816496580927726
+  """,
+  since = "1.6.0")
 // scalastyle:on line.size.limit
 case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
 
@@ -151,7 +157,13 @@ case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
 // Compute the sample standard deviation of a column
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the sample standard deviation calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the sample standard deviation calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col);
+       1.0
+  """,
+  since = "1.6.0")
 // scalastyle:on line.size.limit
 case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
 
@@ -167,7 +179,13 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
 
 // Compute the population variance of a column
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the population variance calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the population variance calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col);
+       0.6666666666666666
+  """,
+  since = "1.6.0")
 case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
 
   override protected def momentOrder = 2
@@ -181,7 +199,13 @@ case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
 
 // Compute the sample variance of a column
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the sample variance calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the sample variance calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col);
+       1.0
+  """,
+  since = "1.6.0")
 case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
 
   override protected def momentOrder = 2
@@ -195,7 +219,15 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the skewness value calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the skewness value calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
+       1.1135657469022013
+      > SELECT _FUNC_(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col);
+       -1.1135657469022011
+  """,
+  since = "1.6.0")
 case class Skewness(child: Expression) extends CentralMomentAgg(child) {
 
   override def prettyName: String = "skewness"
@@ -209,7 +241,15 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the kurtosis value calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the kurtosis value calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
+       -0.7014368047529618
+      > SELECT _FUNC_(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col);
+       0.19432323191698986
+  """,
+  since = "1.6.0")
 case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
 
   override protected def momentOrder = 4
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
index e14cc716ea22..91446e05d853 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
@@ -93,7 +93,13 @@ abstract class PearsonCorrelation(x: Expression, y: Expression)
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2) - Returns Pearson coefficient of correlation between a set of number pairs.")
+  usage = "_FUNC_(expr1, expr2) - Returns Pearson coefficient of correlation between a set of number pairs.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2);
+       0.8660254037844387
+  """,
+  since = "1.6.0")
 // scalastyle:on line.size.limit
 case class Corr(x: Expression, y: Expression)
   extends PearsonCorrelation(x, y) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
index d402f2d592b4..2a8edac502c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
@@ -29,7 +29,17 @@ import org.apache.spark.sql.types._
     _FUNC_(expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are all non-null.
 
     _FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-null.
-  """)
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col);
+       4
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (5), (5), (20) AS tab(col);
+       3
+      > SELECT _FUNC_(DISTINCT col) FROM VALUES (NULL), (5), (5), (10) AS tab(col);
+       2
+  """,
+  since = "1.0.0")
 // scalastyle:on line.size.limit
 case class Count(children: Seq[Expression]) extends DeclarativeAggregate {
   override def nullable: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
index dae88c7b1861..4bd13cf28493 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
@@ -43,7 +43,8 @@ import org.apache.spark.util.sketch.CountMinSketch
       confidence and seed. The result is an array of bytes, which can be deserialized to a
       `CountMinSketch` before usage. Count-min sketch is a probabilistic data structure used for
       cardinality estimation using sub-linear space.
-  """)
+  """,
+  since = "2.2.0")
 case class CountMinSketchAgg(
     child: Expression,
     epsExpression: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
index ee28eb591882..ac99fa8049f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
@@ -80,7 +80,13 @@ abstract class Covariance(x: Expression, y: Expression)
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2) - Returns the population covariance of a set of number pairs.")
+  usage = "_FUNC_(expr1, expr2) - Returns the population covariance of a set of number pairs.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);
+       0.6666666666666666
+  """,
+  since = "2.0.0")
 case class CovPopulation(left: Expression, right: Expression) extends Covariance(left, right) {
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType), ck / n)
@@ -90,7 +96,13 @@ case class CovPopulation(left: Expression, right: Expression) extends Covariance
 
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2) - Returns the sample covariance of a set of number pairs.")
+  usage = "_FUNC_(expr1, expr2) - Returns the sample covariance of a set of number pairs.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);
+       1.0
+  """,
+  since = "2.0.0")
 case class CovSample(left: Expression, right: Expression) extends Covariance(left, right) {
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
index f51bfd591204..9f351395846e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -33,8 +33,17 @@ import org.apache.spark.sql.types._
 @ExpressionDescription(
   usage = """
     _FUNC_(expr[, isIgnoreNull]) - Returns the first value of `expr` for a group of rows.
-      If `isIgnoreNull` is true, returns only non-null values.
-  """)
+      If `isIgnoreNull` is true, returns only non-null values.""",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (10), (5), (20) AS tab(col);
+       10
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (5), (20) AS tab(col);
+       NULL
+      > SELECT _FUNC_(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
+       5
+  """,
+  since = "2.0.0")
 case class First(child: Expression, ignoreNullsExpr: Expression)
   extends DeclarativeAggregate with ExpectsInputTypes {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
index 0b737885f4b1..e3c628e70d11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
@@ -47,8 +47,13 @@ import org.apache.spark.sql.types._
 @ExpressionDescription(
   usage = """
     _FUNC_(expr[, relativeSD]) - Returns the estimated cardinality by HyperLogLog++.
-      `relativeSD` defines the maximum estimation error allowed.
-  """)
+      `relativeSD` defines the maximum estimation error allowed.""",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1);
+       3
+  """,
+  since = "1.6.0")
 case class HyperLogLogPlusPlus(
     child: Expression,
     relativeSD: Double = 0.05,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index 2650d7b5908f..405719faaeb5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -33,8 +33,17 @@ import org.apache.spark.sql.types._
 @ExpressionDescription(
   usage = """
     _FUNC_(expr[, isIgnoreNull]) - Returns the last value of `expr` for a group of rows.
-      If `isIgnoreNull` is true, returns only non-null values.
-  """)
+      If `isIgnoreNull` is true, returns only non-null values""",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (10), (5), (20) AS tab(col);
+       20
+      > SELECT _FUNC_(col) FROM VALUES (10), (5), (NULL) AS tab(col);
+       NULL
+      > SELECT _FUNC_(col, true) FROM VALUES (10), (5), (NULL) AS tab(col);
+       5
+  """,
+  since = "2.0.0")
 case class Last(child: Expression, ignoreNullsExpr: Expression)
   extends DeclarativeAggregate with ExpectsInputTypes {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
index 71099eba0fc7..7520db146ba6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
@@ -24,7 +24,13 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the maximum value of `expr`.")
+  usage = "_FUNC_(expr) - Returns the maximum value of `expr`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (10), (50), (20) AS tab(col);
+       50
+  """,
+  since = "1.0.0")
 case class Max(child: Expression) extends DeclarativeAggregate {
 
   override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
index 8c4ba93231cb..106eb968e391 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
@@ -24,7 +24,13 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the minimum value of `expr`.")
+  usage = "_FUNC_(expr) - Returns the minimum value of `expr`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (10), (-1), (20) AS tab(col);
+       -1
+  """,
+  since = "1.0.0")
 case class Min(child: Expression) extends DeclarativeAggregate {
 
   override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
index 4894036e2746..a91a6d7d166e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
@@ -54,7 +54,15 @@ import org.apache.spark.util.collection.OpenHashMap
       of the percentage array must be between 0.0 and 1.0. The value of frequency should be
       positive integral
 
-      """)
+      """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col, 0.3) FROM VALUES (0), (10) AS tab(col);
+       3.0
+      > SELECT _FUNC_(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col);
+       [2.5,7.5]
+  """,
+  since = "2.1.0")
 case class Percentile(
     child: Expression,
     percentageExpression: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
index 761dba111c07..ef204ec82c52 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -24,7 +24,17 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (5), (10), (15) AS tab(col);
+       30
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (10), (15) AS tab(col);
+       25
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col);
+       NULL
+  """,
+  since = "1.0.0")
 case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes {
 
   override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala
index fc33ef919498..4562fbcff5f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala
@@ -42,6 +42,15 @@ abstract class UnevaluableBooleanAggBase(arg: Expression)
 
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Returns true if all values of `expr` are true.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (true), (true), (true) AS tab(col);
+       true
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (true) AS tab(col);
+       true
+      > SELECT _FUNC_(col) FROM VALUES (true), (false), (true) AS tab(col);
+       false
+  """,
   since = "3.0.0")
 case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
   override def nodeName: String = "Every"
@@ -49,6 +58,15 @@ case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
 
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Returns true if at least one value of `expr` is true.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (true), (false), (false) AS tab(col);
+       true
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (false) AS tab(col);
+       true
+      > SELECT _FUNC_(col) FROM VALUES (false), (false), (NULL) AS tab(col);
+       false
+  """,
   since = "3.0.0")
 case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
   override def nodeName: String = "Any"
@@ -56,6 +74,15 @@ case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
 
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Returns true if at least one value of `expr` is true.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (true), (false), (false) AS tab(col);
+       true
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (false) AS tab(col);
+       true
+      > SELECT _FUNC_(col) FROM VALUES (false), (false), (NULL) AS tab(col);
+       false
+  """,
   since = "3.0.0")
 case class SomeAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
   override def nodeName: String = "Some"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index be972f006352..7cc43dfdfac3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -86,7 +86,13 @@ abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImper
  * Collect a list of elements.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Collects and returns a list of non-unique elements.")
+  usage = "_FUNC_(expr) - Collects and returns a list of non-unique elements.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (1) AS tab(col);
+       [1,2,1]
+  """,
+  since = "2.0.0")
 case class CollectList(
     child: Expression,
     mutableAggBufferOffset: Int = 0,
@@ -109,7 +115,13 @@ case class CollectList(
  * Collect a set of unique elements.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Collects and returns a set of unique elements.")
+  usage = "_FUNC_(expr) - Collects and returns a set of unique elements.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (1) AS tab(col);
+       [1,2]
+  """,
+  since = "2.0.0")
 case class CollectSet(
     child: Expression,
     mutableAggBufferOffset: Int = 0,

From 72a349a95d7661cd442851b62e7e830f213ed05c Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 13 Feb 2019 10:23:31 +0900
Subject: [PATCH 0451/1072] [SPARK-26857][SQL] Return UnsafeArrayData for
 date/timestamp type in ColumnarArray.copy()

## What changes were proposed in this pull request?

In https://github.com/apache/spark/issues/23569, the copy method of `ColumnarArray` is implemented.
To further improve it, we can return `UnsafeArrayData` for `date`/`timestamp` type in `ColumnarArray.copy()`.

## How was this patch tested?

Unit test

Closes #23761 from gengliangwang/copyDateAndTS.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/vectorized/ColumnarArray.java   |  4 +--
 .../vectorized/ColumnVectorSuite.scala        | 28 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index 147162729fb0..8dc7b11c2e9b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -56,9 +56,9 @@ public ArrayData copy() {
       return UnsafeArrayData.fromPrimitiveArray(toByteArray());
     } else if (dt instanceof ShortType) {
       return UnsafeArrayData.fromPrimitiveArray(toShortArray());
-    } else if (dt instanceof IntegerType) {
+    } else if (dt instanceof IntegerType || dt instanceof DateType) {
       return UnsafeArrayData.fromPrimitiveArray(toIntArray());
-    } else if (dt instanceof LongType) {
+    } else if (dt instanceof LongType || dt instanceof TimestampType) {
       return UnsafeArrayData.fromPrimitiveArray(toLongArray());
     } else if (dt instanceof FloatType) {
       return UnsafeArrayData.fromPrimitiveArray(toFloatArray());
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 866fcb1aeade..c2e783dd515b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -108,6 +108,20 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
   }
 
+  testVectors("date", 10, DateType) { testVector =>
+    (0 until 10).foreach { i =>
+      testVector.appendInt(i)
+    }
+
+    val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
+
+    (0 until 10).foreach { i =>
+      assert(array.get(i, DateType) === i)
+      assert(arrayCopy.get(i, DateType) === i)
+    }
+  }
+
   testVectors("long", 10, LongType) { testVector =>
     (0 until 10).foreach { i =>
       testVector.appendLong(i)
@@ -122,6 +136,20 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
   }
 
+  testVectors("timestamp", 10, TimestampType) { testVector =>
+    (0 until 10).foreach { i =>
+      testVector.appendLong(i)
+    }
+
+    val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
+
+    (0 until 10).foreach { i =>
+      assert(array.get(i, TimestampType) === i)
+      assert(arrayCopy.get(i, TimestampType) === i)
+    }
+  }
+
   testVectors("float", 10, FloatType) { testVector =>
     (0 until 10).foreach { i =>
       testVector.appendFloat(i.toFloat)

From 8126d09fb5b969c1e293f1f8c41bec35357f74b5 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 13 Feb 2019 11:19:58 +0800
Subject: [PATCH 0452/1072] [SPARK-26761][SQL][R] Vectorized R gapply()
 implementation

## What changes were proposed in this pull request?

This PR targets to add vectorized `gapply()` in R, Arrow optimization.

This can be tested as below:

```bash
$ ./bin/sparkR --conf spark.sql.execution.arrow.enabled=true
```

```r
df <- createDataFrame(mtcars)
collect(gapply(df,
               "gear",
               function(key, group) {
                 data.frame(gear = key[[1]], disp = mean(group$disp) > group$disp)
               },
               structType("gear double, disp boolean")))
```

### Requirements
  - R 3.5.x
  - Arrow package 0.12+
    ```bash
    Rscript -e 'remotes::install_github("apache/arrowapache-arrow-0.12.0", subdir = "r")'
    ```

**Note:** currently, Arrow R package is not in CRAN. Please take a look at ARROW-3204.
**Note:** currently, Arrow R package seems not supporting Windows. Please take a look at ARROW-3204.

### Benchmarks

**Shall**

```bash
sync && sudo purge
./bin/sparkR --conf spark.sql.execution.arrow.enabled=false
```

```bash
sync && sudo purge
./bin/sparkR --conf spark.sql.execution.arrow.enabled=true
```

**R code**

```r
rdf <- read.csv("500000.csv")
rdf <- rdf[, c("Month.of.Joining", "Weight.in.Kgs.")]  # We're only interested in the key and values to calculate.
df <- cache(createDataFrame(rdf))
count(df)

test <- function() {
  options(digits.secs = 6) # milliseconds
  start.time <- Sys.time()
  count(gapply(df,
               "Month_of_Joining",
               function(key, group) {
                 data.frame(Month_of_Joining = key[[1]], Weight_in_Kgs_ = mean(group$Weight_in_Kgs_) > group$Weight_in_Kgs_)
               },
               structType("Month_of_Joining integer, Weight_in_Kgs_ boolean")))
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  print(time.taken)
}

test()
```

**Data (350 MB):**

```r
object.size(read.csv("500000.csv"))
350379504 bytes
```

"500000 Records"  http://eforexcel.com/wp/downloads-16-sample-csv-files-data-sets-for-testing/

**Results**

```
Time difference of 35.67459 secs
```

```
Time difference of 4.301399 secs
```

The performance improvement was around **829%**.

**Note that** I am 100% sure this PR improves more then 829% because I gave up testing it with non-Arrow optimization because it took super super super long when the data size becomes bigger.

### Limitations

- For now, Arrow optimization with R does not support when the data is `raw`, and when user explicitly gives float type in the schema. They produce corrupt values.

- Due to ARROW-4512, it cannot send and receive batch by batch. It has to send all batches in Arrow stream format at once. It needs improvement later.

## How was this patch tested?

Unit tests were added

**TODOs:**
- [x] Draft codes
- [x] make the tests passed
- [x] make the CRAN check pass
- [x] Performance measurement
- [x] Supportability investigation (for instance types)

Closes #23746 from HyukjinKwon/SPARK-26759.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/deserialize.R                         |  27 +++
 R/pkg/R/group.R                               |  23 ++
 R/pkg/inst/worker/worker.R                    |  33 ++-
 R/pkg/tests/fulltests/test_sparkSQL.R         | 110 ++++++++++
 .../org/apache/spark/api/r/RRunner.scala      | 116 +++++-----
 .../sql/catalyst/plans/logical/object.scala   |  63 ++++--
 .../spark/sql/execution/SparkStrategies.scala |   3 +
 .../apache/spark/sql/execution/objects.scala  |  68 ++++++
 .../spark/sql/execution/r/ArrowRRunner.scala  | 205 ++++++++++++++++++
 9 files changed, 578 insertions(+), 70 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala

diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index cb03f1667629..4c5d2bcb9f03 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -231,6 +231,33 @@ readMultipleObjectsWithKeys <- function(inputCon) {
   list(keys = keys, data = data) # this is a list of keys and corresponding data
 }
 
+readDeserializeInArrow <- function(inputCon) {
+  # This is a hack to avoid CRAN check. Arrow is not uploaded into CRAN now. See ARROW-3204.
+  requireNamespace1 <- requireNamespace
+  if (requireNamespace1("arrow", quietly = TRUE)) {
+    RecordBatchStreamReader <- get(
+      "RecordBatchStreamReader", envir = asNamespace("arrow"), inherits = FALSE)
+    as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+
+    # Currently, there looks no way to read batch by batch by socket connection in R side,
+    # See ARROW-4512. Therefore, it reads the whole Arrow streaming-formatted binary at once
+    # for now.
+    dataLen <- readInt(inputCon)
+    arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
+    batches <- RecordBatchStreamReader(arrowData)$batches()
+
+    # Read all groupped batches. Tibble -> data.frame is cheap.
+    data <- lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
+
+    # Read keys to map with each groupped batch.
+    keys <- readMultipleObjects(inputCon)
+
+    list(keys = keys, data = data)
+  } else {
+    stop("'arrow' package should be installed.")
+  }
+}
+
 readRowList <- function(obj) {
   # readRowList is meant for use inside an lapply. As a result, it is
   # necessary to open a standalone connection for the row and consume
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index f751b952f391..32592f92b325 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -229,6 +229,29 @@ gapplyInternal <- function(x, func, schema) {
   if (is.character(schema)) {
     schema <- structType(schema)
   }
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
+  if (arrowEnabled) {
+    requireNamespace1 <- requireNamespace
+    if (!requireNamespace1("arrow", quietly = TRUE)) {
+      stop("'arrow' package should be installed.")
+    }
+    # Currenty Arrow optimization does not support raw for now.
+    # Also, it does not support explicit float type set by users.
+    if (inherits(schema, "structType")) {
+      if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
+        stop("Arrow optimization with gapply do not support FloatType yet.")
+      }
+      if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "BinaryType"))) {
+        stop("Arrow optimization with gapply do not support BinaryType yet.")
+      }
+    } else if (is.null(schema)) {
+      stop(paste0("Arrow optimization does not support gapplyCollect yet. Please use ",
+                  "'collect' and 'gapply' APIs instead."))
+    } else {
+      stop("'schema' should be DDL-formatted string or structType.")
+    }
+  }
+
   packageNamesArr <- serialize(.sparkREnv[[".packages"]],
                        connection = NULL)
   broadcastArr <- lapply(ls(.broadcastNames),
diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R
index c2adf613acb0..eed4c843f9a3 100644
--- a/R/pkg/inst/worker/worker.R
+++ b/R/pkg/inst/worker/worker.R
@@ -49,7 +49,7 @@ compute <- function(mode, partition, serializer, deserializer, key,
       names(inputData) <- colNames
     } else {
       # Check to see if inputData is a valid data.frame
-      stopifnot(deserializer == "byte")
+      stopifnot(deserializer == "byte" || deserializer == "arrow")
       stopifnot(class(inputData) == "data.frame")
     }
 
@@ -63,7 +63,7 @@ compute <- function(mode, partition, serializer, deserializer, key,
       output <- split(output, seq(nrow(output)))
     } else {
       # Serialize the output to a byte array
-      stopifnot(serializer == "byte")
+      stopifnot(serializer == "byte" || serializer == "arrow")
     }
   } else {
     output <- computeFunc(partition, inputData)
@@ -171,6 +171,10 @@ if (isEmpty != 0) {
       data <- dataWithKeys$data
     } else if (deserializer == "row") {
       data <- SparkR:::readMultipleObjects(inputCon)
+    } else if (deserializer == "arrow" && mode == 2) {
+      dataWithKeys <- SparkR:::readDeserializeInArrow(inputCon)
+      keys <- dataWithKeys$keys
+      data <- dataWithKeys$data
     }
 
     # Timing reading input data for execution
@@ -181,17 +185,40 @@ if (isEmpty != 0) {
                     colNames, computeFunc, data)
        } else {
         # gapply mode
+        outputs <- list()
         for (i in 1:length(data)) {
           # Timing reading input data for execution
           inputElap <- elapsedSecs()
           output <- compute(mode, partition, serializer, deserializer, keys[[i]],
                       colNames, computeFunc, data[[i]])
           computeElap <- elapsedSecs()
-          outputResult(serializer, output, outputCon)
+          if (deserializer == "arrow") {
+            outputs[[length(outputs) + 1L]] <- output
+          } else {
+            outputResult(serializer, output, outputCon)
+          }
           outputElap <- elapsedSecs()
           computeInputElapsDiff <-  computeInputElapsDiff + (computeElap - inputElap)
           outputComputeElapsDiff <- outputComputeElapsDiff + (outputElap - computeElap)
         }
+
+        if (deserializer == "arrow") {
+          # This is a hack to avoid CRAN check. Arrow is not uploaded into CRAN now. See ARROW-3204.
+          requireNamespace1 <- requireNamespace
+          if (requireNamespace1("arrow", quietly = TRUE)) {
+            write_arrow <- get("write_arrow", envir = asNamespace("arrow"), inherits = FALSE)
+            # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html
+            # rbind.fill might be an anternative to make it faster if plyr is installed.
+            combined <- do.call("rbind", outputs)
+
+            # Likewise, there looks no way to send each batch in streaming format via socket
+            # connection. See ARROW-4512.
+            # So, it writes the whole Arrow streaming-formatted binary at once for now.
+            SparkR:::writeRaw(outputCon, write_arrow(combined, raw()))
+          } else {
+            stop("'arrow' package should be installed.")
+          }
+        }
       }
     } else {
       output <- compute(mode, partition, serializer, deserializer, NULL,
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index a5dde2084da7..9dc699c09a1e 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3529,6 +3529,116 @@ test_that("gapply() and gapplyCollect() on a DataFrame", {
   })
 })
 
+test_that("gapply() Arrow optimization", {
+  skip_if_not_installed("arrow")
+  df <- createDataFrame(mtcars)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    ret <- gapply(df,
+                 "gear",
+                 function(key, grouped) {
+                   if (length(key) > 0) {
+                     stopifnot(is.numeric(key[[1]]))
+                   }
+                   stopifnot(class(grouped) == "data.frame")
+                   grouped
+                 },
+                 schema(df))
+    expected <- collect(ret)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- gapply(df,
+                 "gear",
+                 function(key, grouped) {
+                   if (length(key) > 0) {
+                     stopifnot(is.numeric(key[[1]]))
+                   }
+                   stopifnot(class(grouped) == "data.frame")
+                   grouped
+                 },
+                 schema(df))
+    actual <- collect(ret)
+    expect_equal(actual, expected)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("gapply() Arrow optimization - type specification", {
+  skip_if_not_installed("arrow")
+  # Note that regular gapply() seems not supporting date and timestamps
+  # whereas Arrow-optimized gapply() does.
+  rdf <- data.frame(list(list(a = 1,
+                              b = "a",
+                              c = TRUE,
+                              d = 1.1,
+                              e = 1L)))
+  df <- createDataFrame(rdf)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    ret <- gapply(df,
+                 "a",
+                 function(key, grouped) { grouped }, schema(df))
+    expected <- collect(ret)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- gapply(df,
+                 "a",
+                 function(key, grouped) { grouped }, schema(df))
+    actual <- collect(ret)
+    expect_equal(actual, expected)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("gapply() Arrow optimization - type specification (date and timestamp)", {
+  skip_if_not_installed("arrow")
+  rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
+                              b = as.POSIXct("1990-02-24 12:34:56"))))
+  df <- createDataFrame(rdf)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- gapply(df,
+                  "a",
+                  function(key, grouped) { grouped }, schema(df))
+    expect_equal(collect(ret), rdf)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
 test_that("Window functions on a DataFrame", {
   df <- createDataFrame(list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")),
                         schema = c("key", "value"))
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index b367c7ffb159..971d11f84173 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -45,7 +45,7 @@ private[spark] class RRunner[U](
     colNames: Array[String] = null,
     mode: Int = RRunnerModes.RDD)
   extends Logging {
-  private var bootTime: Double = _
+  protected var bootTime: Double = _
   private var dataStream: DataInputStream = _
   val readData = numPartitions match {
     case -1 =>
@@ -91,28 +91,70 @@ private[spark] class RRunner[U](
     }
 
     try {
-      return new Iterator[U] {
-        def next(): U = {
-          val obj = _nextObj
-          if (hasNext) {
-            _nextObj = read()
-          }
-          obj
+      newReaderIterator(dataStream, errThread)
+    } catch {
+      case e: Exception =>
+        throw new SparkException("R computation failed with\n " + errThread.getLines(), e)
+    }
+  }
+
+  protected def newReaderIterator(
+      dataStream: DataInputStream, errThread: BufferedStreamThread): Iterator[U] = {
+    new Iterator[U] {
+      def next(): U = {
+        val obj = _nextObj
+        if (hasNext()) {
+          _nextObj = read()
         }
+        obj
+      }
 
-        var _nextObj = read()
+      private var _nextObj = read()
 
-        def hasNext(): Boolean = {
-          val hasMore = (_nextObj != null)
-          if (!hasMore) {
-            dataStream.close()
-          }
-          hasMore
+      def hasNext(): Boolean = {
+        val hasMore = _nextObj != null
+        if (!hasMore) {
+          dataStream.close()
         }
+        hasMore
+      }
+    }
+  }
+
+  protected def writeData(
+      dataOut: DataOutputStream,
+      printOut: PrintStream,
+      iter: Iterator[_]): Unit = {
+    def writeElem(elem: Any): Unit = {
+      if (deserializer == SerializationFormats.BYTE) {
+        val elemArr = elem.asInstanceOf[Array[Byte]]
+        dataOut.writeInt(elemArr.length)
+        dataOut.write(elemArr)
+      } else if (deserializer == SerializationFormats.ROW) {
+        dataOut.write(elem.asInstanceOf[Array[Byte]])
+      } else if (deserializer == SerializationFormats.STRING) {
+        // write string(for StringRRDD)
+        // scalastyle:off println
+        printOut.println(elem)
+        // scalastyle:on println
+      }
+    }
+
+    for (elem <- iter) {
+      elem match {
+        case (key, innerIter: Iterator[_]) =>
+          for (innerElem <- innerIter) {
+            writeElem(innerElem)
+          }
+          // Writes key which can be used as a boundary in group-aggregate
+          dataOut.writeByte('r')
+          writeElem(key)
+        case (key, value) =>
+          writeElem(key)
+          writeElem(value)
+        case _ =>
+          writeElem(elem)
       }
-    } catch {
-      case e: Exception =>
-        throw new SparkException("R computation failed with\n " + errThread.getLines())
     }
   }
 
@@ -171,37 +213,7 @@ private[spark] class RRunner[U](
 
           val printOut = new PrintStream(stream)
 
-          def writeElem(elem: Any): Unit = {
-            if (deserializer == SerializationFormats.BYTE) {
-              val elemArr = elem.asInstanceOf[Array[Byte]]
-              dataOut.writeInt(elemArr.length)
-              dataOut.write(elemArr)
-            } else if (deserializer == SerializationFormats.ROW) {
-              dataOut.write(elem.asInstanceOf[Array[Byte]])
-            } else if (deserializer == SerializationFormats.STRING) {
-              // write string(for StringRRDD)
-              // scalastyle:off println
-              printOut.println(elem)
-              // scalastyle:on println
-            }
-          }
-
-          for (elem <- iter) {
-            elem match {
-              case (key, innerIter: Iterator[_]) =>
-                for (innerElem <- innerIter) {
-                  writeElem(innerElem)
-                }
-                // Writes key which can be used as a boundary in group-aggregate
-                dataOut.writeByte('r')
-                writeElem(key)
-              case (key, value) =>
-                writeElem(key)
-                writeElem(value)
-              case _ =>
-                writeElem(elem)
-            }
-          }
+          writeData(dataOut, printOut, iter)
 
           stream.flush()
         } catch {
@@ -261,7 +273,7 @@ private[spark] class RRunner[U](
     }
   }
 
-  private def readByteArrayData(length: Int): Array[Byte] = {
+  protected def readByteArrayData(length: Int): Array[Byte] = {
     length match {
       case length if length > 0 =>
         val obj = new Array[Byte](length)
@@ -280,7 +292,7 @@ private[spark] class RRunner[U](
   }
 }
 
-private object SpecialLengths {
+private[spark] object SpecialLengths {
   val TIMING_DATA = -1
 }
 
@@ -290,7 +302,7 @@ private[spark] object RRunnerModes {
   val DATAFRAME_GAPPLY = 2
 }
 
-private[r] class BufferedStreamThread(
+private[spark] class BufferedStreamThread(
     in: InputStream,
     name: String,
     errBufferSize: Int) extends Thread(name) with Logging {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index bfb70c2ef4c8..58bb1915b3c7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -26,7 +26,8 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedDeserializer
 import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
-import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode }
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -437,19 +438,30 @@ object FlatMapGroupsInR {
       groupingAttributes: Seq[Attribute],
       dataAttributes: Seq[Attribute],
       child: LogicalPlan): LogicalPlan = {
-    val mapped = FlatMapGroupsInR(
-      func,
-      packageNames,
-      broadcastVars,
-      inputSchema,
-      schema,
-      UnresolvedDeserializer(keyDeserializer, groupingAttributes),
-      UnresolvedDeserializer(valueDeserializer, dataAttributes),
-      groupingAttributes,
-      dataAttributes,
-      CatalystSerde.generateObjAttr(RowEncoder(schema)),
-      child)
-    CatalystSerde.serialize(mapped)(RowEncoder(schema))
+    if (SQLConf.get.arrowEnabled) {
+      FlatMapGroupsInRWithArrow(
+        func,
+        packageNames,
+        broadcastVars,
+        inputSchema,
+        schema.toAttributes,
+        UnresolvedDeserializer(keyDeserializer, groupingAttributes),
+        groupingAttributes,
+        child)
+    } else {
+      CatalystSerde.serialize(FlatMapGroupsInR(
+        func,
+        packageNames,
+        broadcastVars,
+        inputSchema,
+        schema,
+        UnresolvedDeserializer(keyDeserializer, groupingAttributes),
+        UnresolvedDeserializer(valueDeserializer, dataAttributes),
+        groupingAttributes,
+        dataAttributes,
+        CatalystSerde.generateObjAttr(RowEncoder(schema)),
+        child))(RowEncoder(schema))
+    }
   }
 }
 
@@ -464,7 +476,7 @@ case class FlatMapGroupsInR(
     groupingAttributes: Seq[Attribute],
     dataAttributes: Seq[Attribute],
     outputObjAttr: Attribute,
-    child: LogicalPlan) extends UnaryNode with ObjectProducer{
+    child: LogicalPlan) extends UnaryNode with ObjectProducer {
 
   override lazy val schema = outputSchema
 
@@ -473,6 +485,27 @@ case class FlatMapGroupsInR(
     child)
 }
 
+/**
+ * Similar with `FlatMapGroupsInR` but serializes and deserializes input/output in
+ * Arrow format.
+ * This is also somewhat similar with [[FlatMapGroupsInPandas]].
+ */
+case class FlatMapGroupsInRWithArrow(
+    func: Array[Byte],
+    packageNames: Array[Byte],
+    broadcastVars: Array[Broadcast[Object]],
+    inputSchema: StructType,
+    output: Seq[Attribute],
+    keyDeserializer: Expression,
+    groupingAttributes: Seq[Attribute],
+    child: LogicalPlan) extends UnaryNode {
+
+  override protected def stringArgs: Iterator[Any] = Iterator(
+    inputSchema, StructType.fromAttributes(output), keyDeserializer, groupingAttributes, child)
+
+  override val producedAttributes = AttributeSet(output)
+}
+
 /** Factory for constructing new `CoGroup` nodes. */
 object CoGroup {
   def apply[K : Encoder, L : Encoder, R : Encoder, OUT : Encoder](
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index edfa70403ad1..6827ba6fe474 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -596,6 +596,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case logical.FlatMapGroupsInR(f, p, b, is, os, key, value, grouping, data, objAttr, child) =>
         execution.FlatMapGroupsInRExec(f, p, b, is, os, key, value, grouping,
           data, objAttr, planLater(child)) :: Nil
+      case logical.FlatMapGroupsInRWithArrow(f, p, b, is, ot, key, grouping, child) =>
+        execution.FlatMapGroupsInRWithArrowExec(
+          f, p, b, is, ot, key, grouping, planLater(child)) :: Nil
       case logical.FlatMapGroupsInPandas(grouping, func, output, child) =>
         execution.python.FlatMapGroupsInPandasExec(grouping, func, output, planLater(child)) :: Nil
       case logical.MapElements(f, _, _, objAttr, child) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index 03d1bbf2ab88..dd76efd22a10 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution
 
+import scala.collection.JavaConverters._
 import scala.language.existentials
 
 import org.apache.spark.api.java.function.MapFunction
@@ -31,7 +32,9 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, FunctionUtils, LogicalGroupState}
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.execution.r.ArrowRRunner
 import org.apache.spark.sql.execution.streaming.GroupStateImpl
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.GroupStateTimeout
 import org.apache.spark.sql.types._
 
@@ -437,6 +440,71 @@ case class FlatMapGroupsInRExec(
   }
 }
 
+/**
+ * Similar with [[FlatMapGroupsInRExec]] but serializes and deserializes input/output in
+ * Arrow format.
+ * This is also somewhat similar with
+ * [[org.apache.spark.sql.execution.python.FlatMapGroupsInPandasExec]].
+ */
+case class FlatMapGroupsInRWithArrowExec(
+    func: Array[Byte],
+    packageNames: Array[Byte],
+    broadcastVars: Array[Broadcast[Object]],
+    inputSchema: StructType,
+    output: Seq[Attribute],
+    keyDeserializer: Expression,
+    groupingAttributes: Seq[Attribute],
+    child: SparkPlan) extends UnaryExecNode {
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
+  override def producedAttributes: AttributeSet = AttributeSet(output)
+
+  override def requiredChildDistribution: Seq[Distribution] =
+    if (groupingAttributes.isEmpty) {
+      AllTuples :: Nil
+    } else {
+      ClusteredDistribution(groupingAttributes) :: Nil
+    }
+
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] =
+    Seq(groupingAttributes.map(SortOrder(_, Ascending)))
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    child.execute().mapPartitionsInternal { iter =>
+      val grouped = GroupedIterator(iter, groupingAttributes, child.output)
+      val getKey = ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
+      val runner = new ArrowRRunner(
+        func, packageNames, broadcastVars, inputSchema, SQLConf.get.sessionLocalTimeZone)
+
+      val groupedByRKey = grouped.map { case (key, rowIter) =>
+        val newKey = rowToRBytes(getKey(key).asInstanceOf[Row])
+        (newKey, rowIter)
+      }
+
+      // The communication mechanism is as follows:
+      //
+      //    JVM side                           R side
+      //
+      // 1. Group internal rows
+      // 2. Grouped internal rows    --------> Arrow record batches
+      // 3. Grouped keys             --------> Regular serialized keys
+      // 4.                                    Converts each Arrow record batch to each R data frame
+      // 5.                                    Deserializes keys
+      // 6.                                    Maps each key to each R Data frame
+      // 7.                                    Computes R native function on each key/R data frame
+      // 8.                                    Converts all R data frames to Arrow record batches
+      // 9. Columnar batches         <-------- Arrow record batches
+      // 10. Each row from each batch
+      //
+      // Note that, unlike Python vectorization implementation, R side sends Arrow formatted
+      // binary in a batch due to the limitation of R API. See also ARROW-4512.
+      val columnarBatchIter = runner.compute(groupedByRKey, -1)
+      val outputProject = UnsafeProjection.create(output, output)
+      columnarBatchIter.flatMap(_.rowIterator().asScala).map(outputProject)
+    }
+  }
+}
+
 /**
  * Co-groups the data from left and right children, and calls the function with each group and 2
  * iterators containing all elements in the group from left and right side.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
new file mode 100644
index 000000000000..a8d0bf17c7a6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.r
+
+import java.io._
+import java.nio.channels.Channels
+
+import scala.collection.JavaConverters._
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter}
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
+
+import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.api.r._
+import org.apache.spark.api.r.SpecialLengths
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.arrow.{ArrowUtils, ArrowWriter}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+import org.apache.spark.util.Utils
+
+
+/**
+ * Similar to `ArrowPythonRunner`, but exchange data with R worker via Arrow stream.
+ */
+class ArrowRRunner(
+    func: Array[Byte],
+    packageNames: Array[Byte],
+    broadcastVars: Array[Broadcast[Object]],
+    schema: StructType,
+    timeZoneId: String)
+  extends RRunner[ColumnarBatch](
+    func,
+    "arrow",
+    "arrow",
+    packageNames,
+    broadcastVars,
+    numPartitions = -1,
+    isDataFrame = true,
+    schema.fieldNames,
+    RRunnerModes.DATAFRAME_GAPPLY) {
+
+  protected override def writeData(
+      dataOut: DataOutputStream,
+      printOut: PrintStream,
+      iter: Iterator[_]): Unit = if (iter.hasNext) {
+    val inputIterator = iter.asInstanceOf[Iterator[(Array[Byte], Iterator[InternalRow])]]
+    val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
+    val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+      "stdout writer for R", 0, Long.MaxValue)
+    val root = VectorSchemaRoot.create(arrowSchema, allocator)
+    val out = new ByteArrayOutputStream()
+    val keys = collection.mutable.ArrayBuffer.empty[Array[Byte]]
+
+    Utils.tryWithSafeFinally {
+      val arrowWriter = ArrowWriter.create(root)
+      val writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))
+      writer.start()
+
+      while (inputIterator.hasNext) {
+        val (key, nextBatch) = inputIterator.next()
+        keys.append(key)
+
+        while (nextBatch.hasNext) {
+          arrowWriter.write(nextBatch.next())
+        }
+
+        arrowWriter.finish()
+        writer.writeBatch()
+        arrowWriter.reset()
+      }
+      writer.end()
+    } {
+      // Don't close root and allocator in TaskCompletionListener to prevent
+      // a race condition. See `ArrowPythonRunner`.
+      root.close()
+      allocator.close()
+    }
+
+    // Currently, there looks no way to read batch by batch by socket connection in R side,
+    // See ARROW-4512. Therefore, it writes the whole Arrow streaming-formatted binary at
+    // once for now.
+    val data = out.toByteArray
+    dataOut.writeInt(data.length)
+    dataOut.write(data)
+
+    keys.foreach(dataOut.write)
+  }
+
+  protected override def newReaderIterator(
+      dataStream: DataInputStream, errThread: BufferedStreamThread): Iterator[ColumnarBatch] = {
+    new Iterator[ColumnarBatch] {
+      private val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+        "stdin reader for R", 0, Long.MaxValue)
+
+      private var reader: ArrowStreamReader = _
+      private var root: VectorSchemaRoot = _
+      private var vectors: Array[ColumnVector] = _
+
+      TaskContext.get().addTaskCompletionListener[Unit] { _ =>
+        if (reader != null) {
+          reader.close(false)
+        }
+        allocator.close()
+      }
+
+      private var batchLoaded = true
+      private var nextObj: ColumnarBatch = _
+      private var eos = false
+
+      override def hasNext: Boolean = nextObj != null || {
+        if (!eos) {
+          nextObj = read()
+          hasNext
+        } else {
+          false
+        }
+      }
+
+      override def next(): ColumnarBatch = {
+        if (hasNext) {
+          val obj = nextObj
+          nextObj = null.asInstanceOf[ColumnarBatch]
+          obj
+        } else {
+          Iterator.empty.next()
+        }
+      }
+
+      private def read(): ColumnarBatch = try {
+        if (reader != null && batchLoaded) {
+          batchLoaded = reader.loadNextBatch()
+          if (batchLoaded) {
+            val batch = new ColumnarBatch(vectors)
+            batch.setNumRows(root.getRowCount)
+            batch
+          } else {
+            reader.close(false)
+            allocator.close()
+            eos = true
+            null
+          }
+        } else {
+          dataStream.readInt() match {
+            case SpecialLengths.TIMING_DATA =>
+              // Timing data from R worker
+              val boot = dataStream.readDouble - bootTime
+              val init = dataStream.readDouble
+              val broadcast = dataStream.readDouble
+              val input = dataStream.readDouble
+              val compute = dataStream.readDouble
+              val output = dataStream.readDouble
+              logInfo(
+                ("Times: boot = %.3f s, init = %.3f s, broadcast = %.3f s, " +
+                  "read-input = %.3f s, compute = %.3f s, write-output = %.3f s, " +
+                  "total = %.3f s").format(
+                  boot,
+                  init,
+                  broadcast,
+                  input,
+                  compute,
+                  output,
+                  boot + init + broadcast + input + compute + output))
+              read()
+            case length if length > 0 =>
+              // Likewise, there looks no way to send each batch in streaming format via socket
+              // connection. See ARROW-4512.
+              // So, it reads the whole Arrow streaming-formatted binary at once for now.
+              val in = new ByteArrayReadableSeekableByteChannel(readByteArrayData(length))
+              reader = new ArrowStreamReader(in, allocator)
+              root = reader.getVectorSchemaRoot
+              vectors = root.getFieldVectors.asScala.map { vector =>
+                new ArrowColumnVector(vector)
+              }.toArray[ColumnVector]
+              read()
+            case length if length == 0 =>
+              eos = true
+              null
+          }
+        }
+      } catch {
+        case eof: EOFException =>
+          throw new SparkException(
+            "R worker exited unexpectedly (crashed)\n " + errThread.getLines(), eof)
+      }
+    }
+  }
+}

From 7f44c9a2522f17e72733f6bf8c3f9614d3001250 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Wed, 13 Feb 2019 21:14:19 +0800
Subject: [PATCH 0453/1072] [SPARK-26864][SQL] Query may return incorrect
 result when python udf is used as a join condition and the udf uses
 attributes from both legs of left semi join.

## What changes were proposed in this pull request?
In SPARK-25314, we supported the scenario of having a python UDF that refers to attributes from both legs of a join condition by rewriting the plan to convert an inner join or left semi join to a filter over a cross join. In case of left semi join, this transformation may cause incorrect results when the right leg of join condition produces duplicate rows based on the join condition. This fix disallows the rewrite for left semi join and raises an error in the case like we do for other types of join. In future, we should have separate rule in optimizer to convert left semi join to inner join (I am aware of one case we could do it if we leverage informational constraint i.e when we know the right side does not produce duplicates).

**Python**

```SQL
>>> from pyspark import SparkContext
>>> from pyspark.sql import SparkSession, Column, Row
>>> from pyspark.sql.functions import UserDefinedFunction, udf
>>> from pyspark.sql.types import *
>>> from pyspark.sql.utils import AnalysisException
>>>
>>> spark.conf.set("spark.sql.crossJoin.enabled", "True")
>>> left = spark.createDataFrame([Row(lc1=1, lc2=1), Row(lc1=2, lc2=2)])
>>> right = spark.createDataFrame([Row(rc1=1, rc2=1), Row(rc1=1, rc2=1)])
>>> func = udf(lambda a, b: a == b, BooleanType())
>>> df = left.join(right, func("lc1", "rc1"), "leftsemi").show()
19/02/12 16:07:10 WARN PullOutPythonUDFInJoinCondition: The join condition:<lambda>(lc1#0L, rc1#4L) of the join plan contains PythonUDF only, it will be moved out and the join plan will be turned to cross join.
+---+---+
|lc1|lc2|
+---+---+
|  1|  1|
|  1|  1|
+---+---+
```

**Scala**

```SQL
scala> val left = Seq((1, 1), (2, 2)).toDF("lc1", "lc2")
left: org.apache.spark.sql.DataFrame = [lc1: int, lc2: int]

scala> val right = Seq((1, 1), (1, 1)).toDF("rc1", "rc2")
right: org.apache.spark.sql.DataFrame = [rc1: int, rc2: int]

scala> val equal = udf((p1: Integer, p2: Integer) => {
     |   p1 == p2
     | })
equal: org.apache.spark.sql.expressions.UserDefinedFunction = SparkUserDefinedFunction($Lambda$2141/11016292394666f1b5,BooleanType,List(Some(Schema(IntegerType,true)), Some(Schema(IntegerType,true))),None,false,true)

scala> val df = left.join(right, equal(col("lc1"), col("rc1")), "leftsemi")
df: org.apache.spark.sql.DataFrame = [lc1: int, lc2: int]

scala> df.show()
+---+---+
|lc1|lc2|
+---+---+
|  1|  1|
+---+---+

```

## How was this patch tested?
Modified existing tests.

Closes #23769 from dilipbiswal/dkb_python_udf_in_join.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/sql/tests/test_udf.py          | 24 ++-----------------
 .../spark/sql/catalyst/optimizer/joins.scala  |  8 ++-----
 ...PullOutPythonUDFInJoinConditionSuite.scala | 23 ++----------------
 3 files changed, 6 insertions(+), 49 deletions(-)

diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 12cf8c7de1da..0a56ba8f21f5 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -215,17 +215,6 @@ def test_udf_in_left_outer_join_condition(self):
         with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
             self.assertEqual(df.collect(), [Row(a=1, b=1)])
 
-    def test_udf_in_left_semi_join_condition(self):
-        # regression test for SPARK-25314
-        left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
-        right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1)])
-        f = udf(lambda a, b: a == b, BooleanType())
-        df = left.join(right, f("a", "b"), "leftsemi")
-        with self.assertRaisesRegexp(AnalysisException, 'Detected implicit cartesian product'):
-            df.collect()
-        with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
-            self.assertEqual(df.collect(), [Row(a=1, a1=1, a2=1)])
-
     def test_udf_and_common_filter_in_join_condition(self):
         # regression test for SPARK-25314
         # test the complex scenario with both udf and common filter
@@ -236,19 +225,9 @@ def test_udf_and_common_filter_in_join_condition(self):
         # do not need spark.sql.crossJoin.enabled=true for udf is not the only join condition.
         self.assertEqual(df.collect(), [Row(a=1, a1=1, a2=1, b=1, b1=1, b2=1)])
 
-    def test_udf_and_common_filter_in_left_semi_join_condition(self):
-        # regression test for SPARK-25314
-        # test the complex scenario with both udf and common filter
-        left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
-        right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1), Row(b=1, b1=3, b2=1)])
-        f = udf(lambda a, b: a == b, BooleanType())
-        df = left.join(right, [f("a", "b"), left.a1 == right.b1], "left_semi")
-        # do not need spark.sql.crossJoin.enabled=true for udf is not the only join condition.
-        self.assertEqual(df.collect(), [Row(a=1, a1=1, a2=1)])
-
     def test_udf_not_supported_in_join_condition(self):
         # regression test for SPARK-25314
-        # test python udf is not supported in join type besides left_semi and inner join.
+        # test python udf is not supported in join type except inner join.
         left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
         right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1), Row(b=1, b1=3, b2=1)])
         f = udf(lambda a, b: a == b, BooleanType())
@@ -262,6 +241,7 @@ def runWithJoinType(join_type, type_string):
         runWithJoinType("left", "LeftOuter")
         runWithJoinType("right", "RightOuter")
         runWithJoinType("leftanti", "LeftAnti")
+        runWithJoinType("leftsemi", "LeftSemi")
 
     def test_udf_without_arguments(self):
         self.spark.catalog.registerFunction("foo", lambda: "bar")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index 251ece315f6a..22704b2d3cff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -180,8 +180,8 @@ object PullOutPythonUDFInJoinCondition extends Rule[LogicalPlan] with PredicateH
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case j @ Join(_, _, joinType, Some(cond), _) if hasUnevaluablePythonUDF(cond, j) =>
-      if (!joinType.isInstanceOf[InnerLike] && joinType != LeftSemi) {
-        // The current strategy only support InnerLike and LeftSemi join because for other type,
+      if (!joinType.isInstanceOf[InnerLike]) {
+        // The current strategy supports only InnerLike join because for other types,
         // it breaks SQL semantic if we run the join condition as a filter after join. If we pass
         // the plan here, it'll still get a an invalid PythonUDF RuntimeException with message
         // `requires attributes from more than one child`, we throw firstly here for better
@@ -202,10 +202,6 @@ object PullOutPythonUDFInJoinCondition extends Rule[LogicalPlan] with PredicateH
       val newJoin = j.copy(condition = newCondition)
       joinType match {
         case _: InnerLike => Filter(udf.reduceLeft(And), newJoin)
-        case LeftSemi =>
-          Project(
-            j.left.output.map(_.toAttribute),
-            Filter(udf.reduceLeft(And), newJoin.copy(joinType = Inner)))
         case _ =>
           throw new AnalysisException("Using PythonUDF in join condition of join type" +
             s" $joinType is not supported.")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala
index 3f1c91df7f2e..4a25ddf3ed9e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala
@@ -64,7 +64,7 @@ class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
     PythonEvalType.SQL_BATCHED_UDF,
     udfDeterministic = true)
 
-  val unsupportedJoinTypes = Seq(LeftOuter, RightOuter, FullOuter, LeftAnti)
+  val unsupportedJoinTypes = Seq(LeftOuter, RightOuter, FullOuter, LeftAnti, LeftSemi)
 
   private def comparePlanWithCrossJoinEnable(query: LogicalPlan, expected: LogicalPlan): Unit = {
     // AnalysisException thrown by CheckCartesianProducts while spark.sql.crossJoin.enabled=false
@@ -99,25 +99,6 @@ class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
     comparePlans(Optimize.execute(query2), query2)
   }
 
-  test("left semi join condition with python udf") {
-    val query1 = testRelationLeft.join(
-      testRelationRight,
-      joinType = LeftSemi,
-      condition = Some(unevaluableJoinCond))
-    val expected1 = testRelationLeft.join(
-      testRelationRight,
-      joinType = Inner,
-      condition = None).where(unevaluableJoinCond).select('a, 'b).analyze
-    comparePlanWithCrossJoinEnable(query1, expected1)
-
-    // evaluable PythonUDF will not be touched
-    val query2 = testRelationLeft.join(
-      testRelationRight,
-      joinType = LeftSemi,
-      condition = Some(evaluableJoinCond))
-    comparePlans(Optimize.execute(query2), query2)
-  }
-
   test("unevaluable python udf and common condition") {
     val query = testRelationLeft.join(
       testRelationRight,
@@ -195,7 +176,7 @@ class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
     comparePlans(optimized, expected)
   }
 
-  test("throw an exception for not support join type") {
+  test("throw an exception for not supported join types") {
     for (joinType <- unsupportedJoinTypes) {
       val e = intercept[AnalysisException] {
         val query = testRelationLeft.join(

From 5894f767d1f159fc05e11d77d61089efcd0c50b4 Mon Sep 17 00:00:00 2001
From: Viktor Tarasenko <v.tarasenko@vezet.ru>
Date: Wed, 13 Feb 2019 08:01:20 -0600
Subject: [PATCH 0454/1072] [MINOR][DOCS] Fix for contradiction in condition
 formula of keeping intermediate state of window in structured streaming docs

This change solves contradiction in structured streaming documentation in formula which tests if specific window will be updated by calculating watermark and comparing with "T" parameter(intermediate state is cleared as (max event time seen by the engine - late threshold > T), otherwise kept(written as "until")). By further examples the "T" seems to be the end of the window, not start like documentation says firstly. For more information please take a look at my question in stackoverflow https://stackoverflow.com/questions/54599594/understanding-window-with-watermark-in-apache-spark-structured-streaming

Can be tested by building documentation.

Closes #23765 from vitektarasenko/master.

Authored-by: Viktor Tarasenko <v.tarasenko@vezet.ru>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/structured-streaming-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index e76b53dbb4dc..d425bcc1256f 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -922,7 +922,7 @@ late data for that aggregate any more. To enable this, in Spark 2.1, we have int
 **watermarking**, which lets the engine automatically track the current event time in the data
 and attempt to clean up old state accordingly. You can define the watermark of a query by 
 specifying the event time column and the threshold on how late the data is expected to be in terms of 
-event time. For a specific window starting at time `T`, the engine will maintain state and allow late
+event time. For a specific window ending at time `T`, the engine will maintain state and allow late
 data to update the state until `(max event time seen by the engine - late threshold > T)`. 
 In other words, late data within the threshold will be aggregated, 
 but data later than the threshold will start getting dropped

From 653d1bc232622282031cf6f5eab08ac8f39ec296 Mon Sep 17 00:00:00 2001
From: "Peter G. Horvath" <peter.gergely.horvath@gmail.com>
Date: Wed, 13 Feb 2019 08:02:51 -0600
Subject: [PATCH 0455/1072] [SPARK-26835][DOCS] Notes API documentation for
 available options of Data sources in SparkSQL guide

## What changes were proposed in this pull request?

This PR proposes to add some pointers of available options of Data source in Spark SQL guide.

## How was this patch tested?
N/A: documentation change

Closes #23742 from peter-gergely-horvath/SPARK-26835.

Authored-by: Peter G. Horvath <peter.gergely.horvath@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/sql-data-sources-load-save-functions.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
index 4386caedb38b..b45561cf93b9 100644
--- a/docs/sql-data-sources-load-save-functions.md
+++ b/docs/sql-data-sources-load-save-functions.md
@@ -41,6 +41,11 @@ name (i.e., `org.apache.spark.sql.parquet`), but for built-in sources you can al
 names (`json`, `parquet`, `jdbc`, `orc`, `libsvm`, `csv`, `text`). DataFrames loaded from any data
 source type can be converted into other types using this syntax.
 
+Please refer the API documentation for available options of built-in sources, for example,
+`org.apache.spark.sql.DataFrameReader` and `org.apache.spark.sql.DataFrameWriter`. The
+options documented there should be applicable through non-Scala Spark APIs (e.g. PySpark)
+as well. For other formats, refer to the API documentation of the particular format.
+
 To load a JSON file you can use:
 
 <div class="codetabs">

From 974f52499271848705c9eb209373f27d808fb72b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 14 Feb 2019 00:22:11 +0900
Subject: [PATCH 0456/1072] [SPARK-26798][SQL] HandleNullInputsForUDF should
 trust nullability

## What changes were proposed in this pull request?

There is a very old TODO in `HandleNullInputsForUDF`, saying that we can skip the null check if input is not nullable. We leverage the nullability info at many places, we can trust it here too.

## How was this patch tested?

re-enable an ignored test

Closes #23712 from cloud-fan/minor.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Xiao Li <gatorsmile@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 47 ++++++++++--------
 .../sql/catalyst/expressions/ScalaUDF.scala   |  9 ++--
 .../sql/catalyst/analysis/AnalysisSuite.scala | 36 ++++++++------
 .../catalyst/expressions/ScalaUDFSuite.scala  |  6 +--
 .../sql/catalyst/trees/TreeNodeSuite.scala    |  2 +-
 .../apache/spark/sql/UDFRegistration.scala    | 48 +++++++++----------
 .../datasources/FileFormatDataWriter.scala    |  2 +-
 .../sql/expressions/UserDefinedFunction.scala |  8 ++--
 .../scala/org/apache/spark/sql/UDFSuite.scala | 37 +++++++++-----
 9 files changed, 114 insertions(+), 81 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a84bb7653c52..793c337ffcb1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2147,27 +2147,36 @@ class Analyzer(
 
       case p => p transformExpressionsUp {
 
-        case udf @ ScalaUDF(_, _, inputs, inputsNullSafe, _, _, _, _)
-            if inputsNullSafe.contains(false) =>
+        case udf @ ScalaUDF(_, _, inputs, inputPrimitives, _, _, _, _)
+            if inputPrimitives.contains(true) =>
           // Otherwise, add special handling of null for fields that can't accept null.
           // The result of operations like this, when passed null, is generally to return null.
-          assert(inputsNullSafe.length == inputs.length)
-
-          // TODO: skip null handling for not-nullable primitive inputs after we can completely
-          // trust the `nullable` information.
-          val inputsNullCheck = inputsNullSafe.zip(inputs)
-            .filter { case (nullSafe, _) => !nullSafe }
-            .map { case (_, expr) => IsNull(expr) }
-            .reduceLeftOption[Expression]((e1, e2) => Or(e1, e2))
-          // Once we add an `If` check above the udf, it is safe to mark those checked inputs
-          // as null-safe (i.e., set `inputsNullSafe` all `true`), because the null-returning
-          // branch of `If` will be called if any of these checked inputs is null. Thus we can
-          // prevent this rule from being applied repeatedly.
-          val newInputsNullSafe = inputsNullSafe.map(_ => true)
-          inputsNullCheck
-            .map(If(_, Literal.create(null, udf.dataType),
-              udf.copy(inputsNullSafe = newInputsNullSafe)))
-            .getOrElse(udf)
+          assert(inputPrimitives.length == inputs.length)
+
+          val inputPrimitivesPair = inputPrimitives.zip(inputs)
+          val inputNullCheck = inputPrimitivesPair.collect {
+            case (isPrimitive, input) if isPrimitive && input.nullable =>
+              IsNull(input)
+          }.reduceLeftOption[Expression](Or)
+
+          if (inputNullCheck.isDefined) {
+            // Once we add an `If` check above the udf, it is safe to mark those checked inputs
+            // as null-safe (i.e., wrap with `KnownNotNull`), because the null-returning
+            // branch of `If` will be called if any of these checked inputs is null. Thus we can
+            // prevent this rule from being applied repeatedly.
+            val newInputs = inputPrimitivesPair.map {
+              case (isPrimitive, input) =>
+                if (isPrimitive && input.nullable) {
+                  KnownNotNull(input)
+                } else {
+                  input
+                }
+            }
+            val newUDF = udf.copy(children = newInputs)
+            If(inputNullCheck.get, Literal.create(null, udf.dataType), newUDF)
+          } else {
+            udf
+          }
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index c9e0a2e6a2e0..eb45f0806455 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -31,9 +31,10 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType}
  *                  null. Use boxed type or [[Option]] if you wanna do the null-handling yourself.
  * @param dataType  Return type of function.
  * @param children  The input expressions of this UDF.
- * @param inputsNullSafe Whether the inputs are of non-primitive types or not nullable. Null values
- *                       of Scala primitive types will be converted to the type's default value and
- *                       lead to wrong results, thus need special handling before calling the UDF.
+ * @param inputPrimitives The analyzer should be aware of Scala primitive types so as to make the
+ *                        UDF return null if there is any null input value of these types. On the
+ *                        other hand, Java UDFs can only have boxed types, thus this parameter will
+ *                        always be all false.
  * @param inputTypes  The expected input types of this UDF, used to perform type coercion. If we do
  *                    not want to perform coercion, simply use "Nil". Note that it would've been
  *                    better to use Option of Seq[DataType] so we can use "None" as the case for no
@@ -47,7 +48,7 @@ case class ScalaUDF(
     function: AnyRef,
     dataType: DataType,
     children: Seq[Expression],
-    inputsNullSafe: Seq[Boolean],
+    inputPrimitives: Seq[Boolean],
     inputTypes: Seq[AbstractDataType] = Nil,
     udfName: Option[String] = None,
     nullable: Boolean = true,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 982948483fa1..80387336e164 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -303,51 +303,57 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-11725: correctly handle null inputs for ScalaUDF") {
-    val string = testRelation2.output(0)
-    val double = testRelation2.output(2)
-    val short = testRelation2.output(4)
+    val testRelation = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", DoubleType)(),
+      AttributeReference("c", ShortType)(),
+      AttributeReference("d", DoubleType, nullable = false)())
+
+    val string = testRelation.output(0)
+    val double = testRelation.output(1)
+    val short = testRelation.output(2)
+    val nonNullableDouble = testRelation.output(3)
     val nullResult = Literal.create(null, StringType)
 
     def checkUDF(udf: Expression, transformed: Expression): Unit = {
       checkAnalysis(
-        Project(Alias(udf, "")() :: Nil, testRelation2),
-        Project(Alias(transformed, "")() :: Nil, testRelation2)
+        Project(Alias(udf, "")() :: Nil, testRelation),
+        Project(Alias(transformed, "")() :: Nil, testRelation)
       )
     }
 
     // non-primitive parameters do not need special null handling
-    val udf1 = ScalaUDF((s: String) => "x", StringType, string :: Nil, true :: Nil)
+    val udf1 = ScalaUDF((s: String) => "x", StringType, string :: Nil, false :: Nil)
     val expected1 = udf1
     checkUDF(udf1, expected1)
 
     // only primitive parameter needs special null handling
     val udf2 = ScalaUDF((s: String, d: Double) => "x", StringType, string :: double :: Nil,
-      true :: false :: Nil)
+      false :: true :: Nil)
     val expected2 =
-      If(IsNull(double), nullResult, udf2.copy(inputsNullSafe = true :: true :: Nil))
+      If(IsNull(double), nullResult, udf2.copy(children = string :: KnownNotNull(double) :: Nil))
     checkUDF(udf2, expected2)
 
     // special null handling should apply to all primitive parameters
     val udf3 = ScalaUDF((s: Short, d: Double) => "x", StringType, short :: double :: Nil,
-      false :: false :: Nil)
+      true :: true :: Nil)
     val expected3 = If(
       IsNull(short) || IsNull(double),
       nullResult,
-      udf3.copy(inputsNullSafe = true :: true :: Nil))
+      udf3.copy(children = KnownNotNull(short) :: KnownNotNull(double) :: Nil))
     checkUDF(udf3, expected3)
 
     // we can skip special null handling for primitive parameters that are not nullable
-    // TODO: this is disabled for now as we can not completely trust `nullable`.
     val udf4 = ScalaUDF(
       (s: Short, d: Double) => "x",
       StringType,
-      short :: double.withNullability(false) :: Nil,
-      false :: false :: Nil)
+      short :: nonNullableDouble :: Nil,
+      true :: true :: Nil)
     val expected4 = If(
       IsNull(short),
       nullResult,
-      udf4.copy(inputsNullSafe = true :: true :: Nil))
-    // checkUDF(udf4, expected4)
+      udf4.copy(children = KnownNotNull(short) :: nonNullableDouble :: Nil))
+    checkUDF(udf4, expected4)
   }
 
   test("SPARK-24891 Fix HandleNullInputsForUDF rule") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
index 467cfd5598ff..df92fa3475bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
@@ -29,7 +29,7 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
     val intUdf = ScalaUDF((i: Int) => i + 1, IntegerType, Literal(1) :: Nil, true :: Nil)
     checkEvaluation(intUdf, 2)
 
-    val stringUdf = ScalaUDF((s: String) => s + "x", StringType, Literal("a") :: Nil, true :: Nil)
+    val stringUdf = ScalaUDF((s: String) => s + "x", StringType, Literal("a") :: Nil, false :: Nil)
     checkEvaluation(stringUdf, "ax")
   }
 
@@ -38,7 +38,7 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
       (s: String) => s.toLowerCase(Locale.ROOT),
       StringType,
       Literal.create(null, StringType) :: Nil,
-      true :: Nil)
+      false :: Nil)
 
     val e1 = intercept[SparkException](udf.eval())
     assert(e1.getMessage.contains("Failed to execute user defined function"))
@@ -51,7 +51,7 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("SPARK-22695: ScalaUDF should not use global variables") {
     val ctx = new CodegenContext
-    ScalaUDF((s: String) => s + "x", StringType, Literal("a") :: Nil, true :: Nil).genCode(ctx)
+    ScalaUDF((s: String) => s + "x", StringType, Literal("a") :: Nil, false :: Nil).genCode(ctx)
     assert(ctx.inlinedMutableStates.isEmpty)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 64aa1ee39046..cb911d7af808 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -564,7 +564,7 @@ class TreeNodeSuite extends SparkFunSuite {
   }
 
   test("toJSON should not throws java.lang.StackOverflowError") {
-    val udf = ScalaUDF(SelfReferenceUDF(), BooleanType, Seq("col1".attr), true :: Nil)
+    val udf = ScalaUDF(SelfReferenceUDF(), BooleanType, Seq("col1".attr), false :: Nil)
     // Should not throw java.lang.StackOverflowError
     udf.toJSON
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index fe5d1afd8478..83425de70060 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -151,7 +151,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = {
         |  val func = f$anyCast.call($anyParams)
         |  def builder(e: Seq[Expression]) = if (e.length == $i) {
-        |    ScalaUDF($funcCall, returnType, e, e.map(_ => true), udfName = Some(name))
+        |    ScalaUDF($funcCall, returnType, e, e.map(_ => false), udfName = Some(name))
         |  } else {
         |    throw new AnalysisException("Invalid number of arguments for function " + name +
         |      ". Expected: $i; Found: " + e.length)
@@ -719,7 +719,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF0[_], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF0[Any]].call()
     def builder(e: Seq[Expression]) = if (e.length == 0) {
-      ScalaUDF(() => func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(() => func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 0; Found: " + e.length)
@@ -734,7 +734,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
     def builder(e: Seq[Expression]) = if (e.length == 1) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 1; Found: " + e.length)
@@ -749,7 +749,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 2) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 2; Found: " + e.length)
@@ -764,7 +764,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 3) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 3; Found: " + e.length)
@@ -779,7 +779,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 4) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 4; Found: " + e.length)
@@ -794,7 +794,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 5) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 5; Found: " + e.length)
@@ -809,7 +809,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 6) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 6; Found: " + e.length)
@@ -824,7 +824,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 7) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 7; Found: " + e.length)
@@ -839,7 +839,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 8) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 8; Found: " + e.length)
@@ -854,7 +854,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 9) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 9; Found: " + e.length)
@@ -869,7 +869,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 10) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 10; Found: " + e.length)
@@ -884,7 +884,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 11) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 11; Found: " + e.length)
@@ -899,7 +899,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 12) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 12; Found: " + e.length)
@@ -914,7 +914,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 13) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 13; Found: " + e.length)
@@ -929,7 +929,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 14) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 14; Found: " + e.length)
@@ -944,7 +944,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 15) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 15; Found: " + e.length)
@@ -959,7 +959,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 16) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 16; Found: " + e.length)
@@ -974,7 +974,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 17) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 17; Found: " + e.length)
@@ -989,7 +989,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 18) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 18; Found: " + e.length)
@@ -1004,7 +1004,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 19) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 19; Found: " + e.length)
@@ -1019,7 +1019,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 20) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 20; Found: " + e.length)
@@ -1034,7 +1034,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 21) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 21; Found: " + e.length)
@@ -1049,7 +1049,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 22) {
-      ScalaUDF(func, returnType, e, e.map(_ => true), udfName = Some(name))
+      ScalaUDF(func, returnType, e, e.map(_ => false), udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 22; Found: " + e.length)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
index b6b0d7a6ced9..2595cc6371bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
@@ -181,7 +181,7 @@ class DynamicPartitionDataWriter(
         ExternalCatalogUtils.getPartitionPathString _,
         StringType,
         Seq(Literal(c.name), Cast(c, StringType, Option(description.timeZoneId))),
-        Seq(true, true))
+        Seq(false, false))
       if (i == 0) Seq(partitionName) else Seq(Literal(Path.SEPARATOR), partitionName)
     })
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 4d8e1c5fa42a..0c956ecbf936 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -100,14 +100,16 @@ private[sql] case class SparkUserDefinedFunction(
 
   private[sql] def createScalaUDF(exprs: Seq[Expression]): ScalaUDF = {
     // It's possible that some of the inputs don't have a specific type(e.g. `Any`),  skip type
-    // check and null check for them.
+    // check.
     val inputTypes = inputSchemas.map(_.map(_.dataType).getOrElse(AnyDataType))
-    val inputsNullSafe = inputSchemas.map(_.map(_.nullable).getOrElse(true))
+    // `ScalaReflection.Schema.nullable` is false iff the type is primitive. Also `Any` is not
+    // primitive.
+    val inputsPrimitive = inputSchemas.map(_.map(!_.nullable).getOrElse(false))
     ScalaUDF(
       f,
       dataType,
       exprs,
-      inputsNullSafe,
+      inputsPrimitive,
       inputTypes,
       udfName = name,
       nullable = nullable,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 5ac2093799b8..e515800e6898 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -400,17 +400,23 @@ class UDFSuite extends QueryTest with SharedSQLContext {
   }
 
   test("SPARK-25044 Verify null input handling for primitive types - with udf()") {
-    val udf1 = udf((x: Long, y: Any) => x * 2 + (if (y == null) 1 else 0))
-    val df = spark.range(0, 3).toDF("a")
-      .withColumn("b", udf1($"a", lit(null)))
-      .withColumn("c", udf1(lit(null), $"a"))
-
-    checkAnswer(
-      df,
-      Seq(
-        Row(0, 1, null),
-        Row(1, 3, null),
-        Row(2, 5, null)))
+    val input = Seq(
+      (null, Integer.valueOf(1), "x"),
+      ("M", null, "y"),
+      ("N", Integer.valueOf(3), null)).toDF("a", "b", "c")
+
+    val udf1 = udf((a: String, b: Int, c: Any) => a + b + c)
+    val df = input.select(udf1('a, 'b, 'c))
+    checkAnswer(df, Seq(Row("null1x"), Row(null), Row("N3null")))
+
+    // test Java UDF. Java UDF can't have primitive inputs, as it's generic typed.
+    val udf2 = udf(new UDF3[String, Integer, Object, String] {
+      override def call(t1: String, t2: Integer, t3: Object): String = {
+        t1 + t2 + t3
+      }
+    }, StringType)
+    val df2 = input.select(udf2('a, 'b, 'c))
+    checkAnswer(df2, Seq(Row("null1x"), Row("Mnully"), Row("N3null")))
   }
 
   test("SPARK-25044 Verify null input handling for primitive types - with udf.register") {
@@ -420,6 +426,15 @@ class UDFSuite extends QueryTest with SharedSQLContext {
       spark.udf.register("f", (a: String, b: Int, c: Any) => a + b + c)
       val df = spark.sql("SELECT f(a, b, c) FROM t")
       checkAnswer(df, Seq(Row("null1x"), Row(null), Row("N3null")))
+
+      // test Java UDF. Java UDF can't have primitive inputs, as it's generic typed.
+      spark.udf.register("f2", new UDF3[String, Integer, Object, String] {
+        override def call(t1: String, t2: Integer, t3: Object): String = {
+          t1 + t2 + t3
+        }
+      }, StringType)
+      val df2 = spark.sql("SELECT f2(a, b, c) FROM t")
+      checkAnswer(df2, Seq(Row("null1x"), Row("Mnully"), Row("N3null")))
     }
   }
 

From a829234df35c87c169425f2c79fd1963b5420888 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 13 Feb 2019 13:12:16 -0600
Subject: [PATCH 0457/1072] [SPARK-26817][CORE] Use System.nanoTime to measure
 time intervals

## What changes were proposed in this pull request?

In the PR, I propose to use `System.nanoTime()` instead of `System.currentTimeMillis()` in measurements of time intervals.

`System.currentTimeMillis()` returns current wallclock time and will follow changes to the system clock. Thus, negative wallclock adjustments can cause timeouts to "hang" for a long time (until wallclock time has caught up to its previous value again). This can happen when ntpd does a "step" after the network has been disconnected for some time. The most canonical example is during system bootup when DHCP takes longer than usual. This can lead to failures that are really hard to understand/reproduce. `System.nanoTime()` is guaranteed to be monotonically increasing irrespective of wallclock changes.

## How was this patch tested?

By existing test suites.

Closes #23727 from MaxGekk/system-nanotime.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/MapOutputTracker.scala   |  6 +--
 .../api/python/PythonWorkerFactory.scala      | 12 ++---
 .../spark/broadcast/TorrentBroadcast.scala    |  4 +-
 .../org/apache/spark/executor/Executor.scala  | 45 +++++++++++--------
 .../apache/spark/scheduler/ResultTask.scala   |  4 +-
 .../spark/scheduler/ShuffleMapTask.scala      |  4 +-
 .../org/apache/spark/scheduler/Task.scala     |  4 +-
 .../CoarseGrainedSchedulerBackend.scala       | 10 ++---
 .../spark/security/CryptoStreamUtils.scala    |  7 +--
 .../apache/spark/storage/BlockManager.scala   | 35 +++++++--------
 .../org/apache/spark/storage/DiskStore.scala  | 11 ++---
 .../storage/ShuffleBlockFetcherIterator.scala | 16 +++----
 .../scala/org/apache/spark/util/Utils.scala   | 17 +++----
 .../apache/spark/ContextCleanerSuite.scala    |  5 ++-
 .../apache/spark/JobCancellationSuite.scala   |  6 +--
 .../org/apache/spark/SparkConfSuite.scala     |  4 +-
 .../org/apache/spark/ThreadingSuite.scala     |  7 +--
 .../org/apache/spark/util/UtilsSuite.scala    | 15 ++++---
 .../util/collection/SizeTrackerSuite.scala    |  6 ++-
 .../spark/util/collection/SorterSuite.scala   |  6 ++-
 .../org/apache/spark/examples/HdfsTest.scala  |  8 ++--
 .../KafkaDontFailOnDataLossSuite.scala        |  7 +--
 .../streaming/kafka010/KafkaTestUtils.scala   |  8 ++--
 .../kinesis/KinesisBackedBlockRDD.scala       |  9 +++-
 .../streaming/kinesis/KinesisTestUtils.scala  |  5 +--
 .../org/apache/spark/graphx/GraphLoader.scala |  7 ++-
 .../apache/spark/ml/util/stopwatches.scala    |  4 +-
 .../apache/spark/ml/util/StopwatchSuite.scala |  3 +-
 .../MesosCoarseGrainedSchedulerBackend.scala  | 11 ++---
 .../spark/deploy/yarn/ApplicationMaster.scala |  8 ++--
 .../deploy/yarn/BaseYarnClusterSuite.scala    |  4 +-
 .../sql/streaming/StreamingQueryManager.scala |  7 ++-
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 13 +++---
 .../apache/spark/streaming/Checkpoint.scala   | 11 +++--
 .../streaming/scheduler/JobGenerator.scala    |  7 ++-
 .../spark/streaming/JavaReceiverAPISuite.java |  7 +--
 .../spark/streaming/InputStreamsSuite.scala   |  4 +-
 .../spark/streaming/MasterFailureTest.scala   |  9 ++--
 .../spark/streaming/ReceiverSuite.scala       |  6 +--
 .../spark/streaming/TestSuiteBase.scala       |  8 ++--
 40 files changed, 202 insertions(+), 168 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 803703f5e4c6..1d4b1ef9c9a1 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -18,7 +18,7 @@
 package org.apache.spark
 
 import java.io._
-import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue, ThreadPoolExecutor}
+import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit}
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.JavaConverters._
@@ -706,7 +706,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
     val statuses = mapStatuses.get(shuffleId).orNull
     if (statuses == null) {
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
-      val startTime = System.currentTimeMillis
+      val startTimeNs = System.nanoTime()
       var fetchedStatuses: Array[MapStatus] = null
       fetching.synchronized {
         // Someone else is fetching it; wait for them to be done
@@ -744,7 +744,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
         }
       }
       logDebug(s"Fetching map output statuses for shuffle $shuffleId took " +
-        s"${System.currentTimeMillis - startTime} ms")
+        s"${TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)} ms")
 
       if (fetchedStatuses != null) {
         fetchedStatuses
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 09e219fef5a1..97aab7415c13 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -19,8 +19,8 @@ package org.apache.spark.api.python
 
 import java.io.{DataInputStream, DataOutputStream, EOFException, InputStream, OutputStreamWriter}
 import java.net.{InetAddress, ServerSocket, Socket, SocketException}
-import java.nio.charset.StandardCharsets
 import java.util.Arrays
+import java.util.concurrent.TimeUnit
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
@@ -84,7 +84,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   @GuardedBy("self")
   private val idleWorkers = new mutable.Queue[Socket]()
   @GuardedBy("self")
-  private var lastActivity = 0L
+  private var lastActivityNs = 0L
   new MonitorThread().start()
 
   @GuardedBy("self")
@@ -291,9 +291,9 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     override def run() {
       while (true) {
         self.synchronized {
-          if (lastActivity + IDLE_WORKER_TIMEOUT_MS < System.currentTimeMillis()) {
+          if (IDLE_WORKER_TIMEOUT_NS < System.nanoTime() - lastActivityNs) {
             cleanupIdleWorkers()
-            lastActivity = System.currentTimeMillis()
+            lastActivityNs = System.nanoTime()
           }
         }
         Thread.sleep(10000)
@@ -358,7 +358,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   def releaseWorker(worker: Socket) {
     if (useDaemon) {
       self.synchronized {
-        lastActivity = System.currentTimeMillis()
+        lastActivityNs = System.nanoTime()
         idleWorkers.enqueue(worker)
       }
     } else {
@@ -375,5 +375,5 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
 private object PythonWorkerFactory {
   val PROCESS_WAIT_TIMEOUT_MS = 10000
-  val IDLE_WORKER_TIMEOUT_MS = 60000  // kill idle workers after 1 minute
+  val IDLE_WORKER_TIMEOUT_NS = TimeUnit.MINUTES.toNanos(1)  // kill idle workers after 1 minute
 }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 7680587c9bfc..f416be883dcf 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -239,9 +239,9 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
             val estimatedTotalSize = Utils.bytesToString(numBlocks * blockSize)
             logInfo(s"Started reading broadcast variable $id with $numBlocks pieces " +
               s"(estimated total size $estimatedTotalSize)")
-            val startTimeMs = System.currentTimeMillis()
+            val startTimeNs = System.nanoTime()
             val blocks = readBlocks()
-            logInfo("Reading broadcast variable " + id + " took" + Utils.getUsedTimeMs(startTimeMs))
+            logInfo(s"Reading broadcast variable $id took ${Utils.getUsedTimeNs(startTimeNs)}")
 
             try {
               val obj = TorrentBroadcast.unBlockifyObject[T](
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index fccb1aca719e..6b23b2694527 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -348,10 +348,11 @@ private[spark] class Executor(
      *    2. Collect accumulator updates
      *    3. Set the finished flag to true and clear current thread's interrupt status
      */
-    private def collectAccumulatorsAndResetStatusOnFailure(taskStartTime: Long) = {
+    private def collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs: Long) = {
       // Report executor runtime and JVM gc time
       Option(task).foreach(t => {
-        t.metrics.setExecutorRunTime(System.currentTimeMillis() - taskStartTime)
+        t.metrics.setExecutorRunTime(TimeUnit.NANOSECONDS.toMillis(
+          System.nanoTime() - taskStartTimeNs))
         t.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
       })
 
@@ -369,7 +370,7 @@ private[spark] class Executor(
       Thread.currentThread.setName(threadName)
       val threadMXBean = ManagementFactory.getThreadMXBean
       val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)
-      val deserializeStartTime = System.currentTimeMillis()
+      val deserializeStartTimeNs = System.nanoTime()
       val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
         threadMXBean.getCurrentThreadCpuTime
       } else 0L
@@ -377,7 +378,7 @@ private[spark] class Executor(
       val ser = env.closureSerializer.newInstance()
       logInfo(s"Running $taskName (TID $taskId)")
       execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
-      var taskStartTime: Long = 0
+      var taskStartTimeNs: Long = 0
       var taskStartCpu: Long = 0
       startGCTime = computeTotalGcTime()
 
@@ -413,7 +414,7 @@ private[spark] class Executor(
         }
 
         // Run the actual task and measure its runtime.
-        taskStartTime = System.currentTimeMillis()
+        taskStartTimeNs = System.nanoTime()
         taskStartCpu = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
           threadMXBean.getCurrentThreadCpuTime
         } else 0L
@@ -457,7 +458,7 @@ private[spark] class Executor(
             s"unrecoverable fetch failures!  Most likely this means user code is incorrectly " +
             s"swallowing Spark's internal ${classOf[FetchFailedException]}", fetchFailure)
         }
-        val taskFinish = System.currentTimeMillis()
+        val taskFinishNs = System.nanoTime()
         val taskFinishCpu = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
           threadMXBean.getCurrentThreadCpuTime
         } else 0L
@@ -466,22 +467,24 @@ private[spark] class Executor(
         task.context.killTaskIfInterrupted()
 
         val resultSer = env.serializer.newInstance()
-        val beforeSerialization = System.currentTimeMillis()
+        val beforeSerializationNs = System.nanoTime()
         val valueBytes = resultSer.serialize(value)
-        val afterSerialization = System.currentTimeMillis()
+        val afterSerializationNs = System.nanoTime()
 
         // Deserialization happens in two parts: first, we deserialize a Task object, which
         // includes the Partition. Second, Task.run() deserializes the RDD and function to be run.
-        task.metrics.setExecutorDeserializeTime(
-          (taskStartTime - deserializeStartTime) + task.executorDeserializeTime)
+        task.metrics.setExecutorDeserializeTime(TimeUnit.NANOSECONDS.toMillis(
+          (taskStartTimeNs - deserializeStartTimeNs) + task.executorDeserializeTimeNs))
         task.metrics.setExecutorDeserializeCpuTime(
           (taskStartCpu - deserializeStartCpuTime) + task.executorDeserializeCpuTime)
         // We need to subtract Task.run()'s deserialization time to avoid double-counting
-        task.metrics.setExecutorRunTime((taskFinish - taskStartTime) - task.executorDeserializeTime)
+        task.metrics.setExecutorRunTime(TimeUnit.NANOSECONDS.toMillis(
+          (taskFinishNs - taskStartTimeNs) - task.executorDeserializeTimeNs))
         task.metrics.setExecutorCpuTime(
           (taskFinishCpu - taskStartCpu) - task.executorDeserializeCpuTime)
         task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
-        task.metrics.setResultSerializationTime(afterSerialization - beforeSerialization)
+        task.metrics.setResultSerializationTime(TimeUnit.NANOSECONDS.toMillis(
+          afterSerializationNs - beforeSerializationNs))
 
         // Expose task metrics using the Dropwizard metrics system.
         // Update task metrics counters
@@ -560,7 +563,7 @@ private[spark] class Executor(
         case t: TaskKilledException =>
           logInfo(s"Executor killed $taskName (TID $taskId), reason: ${t.reason}")
 
-          val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTime)
+          val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
           val serializedTK = ser.serialize(TaskKilled(t.reason, accUpdates, accums))
           execBackend.statusUpdate(taskId, TaskState.KILLED, serializedTK)
 
@@ -569,7 +572,7 @@ private[spark] class Executor(
           val killReason = task.reasonIfKilled.getOrElse("unknown reason")
           logInfo(s"Executor interrupted and killed $taskName (TID $taskId), reason: $killReason")
 
-          val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTime)
+          val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
           val serializedTK = ser.serialize(TaskKilled(killReason, accUpdates, accums))
           execBackend.statusUpdate(taskId, TaskState.KILLED, serializedTK)
 
@@ -604,7 +607,7 @@ private[spark] class Executor(
           // the task failure would not be ignored if the shutdown happened because of premption,
           // instead of an app issue).
           if (!ShutdownHookManager.inShutdown()) {
-            val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTime)
+            val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
 
             val serializedTaskEndReason = {
               try {
@@ -669,14 +672,16 @@ private[spark] class Executor(
 
     private[this] val killPollingIntervalMs: Long = conf.get(TASK_REAPER_POLLING_INTERVAL)
 
-    private[this] val killTimeoutMs: Long = conf.get(TASK_REAPER_KILL_TIMEOUT)
+    private[this] val killTimeoutNs: Long = {
+      TimeUnit.MILLISECONDS.toNanos(conf.get(TASK_REAPER_KILL_TIMEOUT))
+    }
 
     private[this] val takeThreadDump: Boolean = conf.get(TASK_REAPER_THREAD_DUMP)
 
     override def run(): Unit = {
-      val startTimeMs = System.currentTimeMillis()
-      def elapsedTimeMs = System.currentTimeMillis() - startTimeMs
-      def timeoutExceeded(): Boolean = killTimeoutMs > 0 && elapsedTimeMs > killTimeoutMs
+      val startTimeNs = System.nanoTime()
+      def elapsedTimeNs = System.nanoTime() - startTimeNs
+      def timeoutExceeded(): Boolean = killTimeoutNs > 0 && elapsedTimeNs > killTimeoutNs
       try {
         // Only attempt to kill the task once. If interruptThread = false then a second kill
         // attempt would be a no-op and if interruptThread = true then it may not be safe or
@@ -701,6 +706,7 @@ private[spark] class Executor(
           if (taskRunner.isFinished) {
             finished = true
           } else {
+            val elapsedTimeMs = TimeUnit.NANOSECONDS.toMillis(elapsedTimeNs)
             logWarning(s"Killed task $taskId is still running after $elapsedTimeMs ms")
             if (takeThreadDump) {
               try {
@@ -718,6 +724,7 @@ private[spark] class Executor(
         }
 
         if (!taskRunner.isFinished && timeoutExceeded()) {
+          val killTimeoutMs = TimeUnit.NANOSECONDS.toMillis(killTimeoutNs)
           if (isLocal) {
             logError(s"Killed task $taskId could not be stopped within $killTimeoutMs ms; " +
               "not killing JVM because we are running in local mode.")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index aafeae05b566..857c89d7a98f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -75,14 +75,14 @@ private[spark] class ResultTask[T, U](
   override def runTask(context: TaskContext): U = {
     // Deserialize the RDD and the func using the broadcast variables.
     val threadMXBean = ManagementFactory.getThreadMXBean
-    val deserializeStartTime = System.currentTimeMillis()
+    val deserializeStartTimeNs = System.nanoTime()
     val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
       threadMXBean.getCurrentThreadCpuTime
     } else 0L
     val ser = SparkEnv.get.closureSerializer.newInstance()
     val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
       ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
-    _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime
+    _executorDeserializeTimeNs = System.nanoTime() - deserializeStartTimeNs
     _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
       threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
     } else 0L
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 35664ff515d4..189e35ee8311 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -80,14 +80,14 @@ private[spark] class ShuffleMapTask(
   override def runTask(context: TaskContext): MapStatus = {
     // Deserialize the RDD using the broadcast variable.
     val threadMXBean = ManagementFactory.getThreadMXBean
-    val deserializeStartTime = System.currentTimeMillis()
+    val deserializeStartTimeNs = System.nanoTime()
     val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
       threadMXBean.getCurrentThreadCpuTime
     } else 0L
     val ser = SparkEnv.get.closureSerializer.newInstance()
     val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
       ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
-    _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime
+    _executorDeserializeTimeNs = System.nanoTime() - deserializeStartTimeNs
     _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
       threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
     } else 0L
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index eb059f12be6d..c17a9038ba14 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -182,7 +182,7 @@ private[spark] abstract class Task[T](
   // context is not yet initialized when kill() is invoked.
   @volatile @transient private var _reasonIfKilled: String = null
 
-  protected var _executorDeserializeTime: Long = 0
+  protected var _executorDeserializeTimeNs: Long = 0
   protected var _executorDeserializeCpuTime: Long = 0
 
   /**
@@ -193,7 +193,7 @@ private[spark] abstract class Task[T](
   /**
    * Returns the amount of time spent deserializing the RDD and function to be run.
    */
-  def executorDeserializeTime: Long = _executorDeserializeTime
+  def executorDeserializeTimeNs: Long = _executorDeserializeTimeNs
   def executorDeserializeCpuTime: Long = _executorDeserializeCpuTime
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index fb28e902d242..6bba157c8c14 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -63,9 +63,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     math.min(1, conf.get(SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO).getOrElse(0.0))
   // Submit tasks after maxRegisteredWaitingTime milliseconds
   // if minRegisteredRatio has not yet been reached
-  private val maxRegisteredWaitingTimeMs =
-    conf.get(SCHEDULER_MAX_REGISTERED_RESOURCE_WAITING_TIME)
-  private val createTime = System.currentTimeMillis()
+  private val maxRegisteredWaitingTimeNs = TimeUnit.MILLISECONDS.toNanos(
+    conf.get(SCHEDULER_MAX_REGISTERED_RESOURCE_WAITING_TIME))
+  private val createTimeNs = System.nanoTime()
 
   // Accessing `executorDataMap` in `DriverEndpoint.receive/receiveAndReply` doesn't need any
   // protection. But accessing `executorDataMap` out of `DriverEndpoint.receive/receiveAndReply`
@@ -499,9 +499,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         s"reached minRegisteredResourcesRatio: $minRegisteredRatio")
       return true
     }
-    if ((System.currentTimeMillis() - createTime) >= maxRegisteredWaitingTimeMs) {
+    if ((System.nanoTime() - createTimeNs) >= maxRegisteredWaitingTimeNs) {
       logInfo("SchedulerBackend is ready for scheduling beginning after waiting " +
-        s"maxRegisteredResourcesWaitingTime: $maxRegisteredWaitingTimeMs(ms)")
+        s"maxRegisteredResourcesWaitingTime: $maxRegisteredWaitingTimeNs(ns)")
       return true
     }
     false
diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
index 18b735b8035a..a4df0d543ecb 100644
--- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
+++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
@@ -20,6 +20,7 @@ import java.io.{Closeable, InputStream, IOException, OutputStream}
 import java.nio.ByteBuffer
 import java.nio.channels.{ReadableByteChannel, WritableByteChannel}
 import java.util.Properties
+import java.util.concurrent.TimeUnit
 import javax.crypto.KeyGenerator
 import javax.crypto.spec.{IvParameterSpec, SecretKeySpec}
 
@@ -133,10 +134,10 @@ private[spark] object CryptoStreamUtils extends Logging {
    */
   private[this] def createInitializationVector(properties: Properties): Array[Byte] = {
     val iv = new Array[Byte](IV_LENGTH_IN_BYTES)
-    val initialIVStart = System.currentTimeMillis()
+    val initialIVStart = System.nanoTime()
     CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv)
-    val initialIVFinish = System.currentTimeMillis()
-    val initialIVTime = initialIVFinish - initialIVStart
+    val initialIVFinish = System.nanoTime()
+    val initialIVTime = TimeUnit.NANOSECONDS.toMillis(initialIVFinish - initialIVStart)
     if (initialIVTime > 2000) {
       logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " +
         s"used by CryptoStream")
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 497a5f7c8afe..79e9ee7977de 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -22,7 +22,7 @@ import java.lang.ref.{ReferenceQueue => JReferenceQueue, WeakReference}
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
 import java.util.Collections
-import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 
 import scala.collection.mutable
 import scala.collection.mutable.HashMap
@@ -210,7 +210,7 @@ private[spark] class BlockManager(
   // Field related to peer block managers that are necessary for block replication
   @volatile private var cachedPeers: Seq[BlockManagerId] = _
   private val peerFetchLock = new Object
-  private var lastPeerFetchTime = 0L
+  private var lastPeerFetchTimeNs = 0L
 
   private var blockReplicationPolicy: BlockReplicationPolicy = _
 
@@ -558,9 +558,9 @@ private[spark] class BlockManager(
    * Get locations of an array of blocks.
    */
   private def getLocationBlockIds(blockIds: Array[BlockId]): Array[Seq[BlockManagerId]] = {
-    val startTimeMs = System.currentTimeMillis
+    val startTimeNs = System.nanoTime()
     val locations = master.getLocations(blockIds).toArray
-    logDebug("Got multiple block location in %s".format(Utils.getUsedTimeMs(startTimeMs)))
+    logDebug(s"Got multiple block location in ${Utils.getUsedTimeNs(startTimeNs)}")
     locations
   }
 
@@ -978,7 +978,7 @@ private[spark] class BlockManager(
       tellMaster: Boolean = true,
       keepReadLock: Boolean = false): Boolean = {
     doPut(blockId, level, classTag, tellMaster = tellMaster, keepReadLock = keepReadLock) { info =>
-      val startTimeMs = System.currentTimeMillis
+      val startTimeNs = System.nanoTime()
       // Since we're storing bytes, initiate the replication before storing them locally.
       // This is faster as data is already serialized and ready to send.
       val replicationFuture = if (level.replication > 1) {
@@ -1038,7 +1038,7 @@ private[spark] class BlockManager(
         }
         addUpdatedBlockStatusToTaskMetrics(blockId, putBlockStatus)
       }
-      logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))
+      logDebug(s"Put block ${blockId} locally took ${Utils.getUsedTimeNs(startTimeNs)}")
       if (level.replication > 1) {
         // Wait for asynchronous replication to finish
         try {
@@ -1086,7 +1086,7 @@ private[spark] class BlockManager(
       }
     }
 
-    val startTimeMs = System.currentTimeMillis
+    val startTimeNs = System.nanoTime()
     var exceptionWasThrown: Boolean = true
     val result: Option[T] = try {
       val res = putBody(putBlockInfo)
@@ -1125,12 +1125,11 @@ private[spark] class BlockManager(
         addUpdatedBlockStatusToTaskMetrics(blockId, BlockStatus.empty)
       }
     }
+    val usedTimeMs = Utils.getUsedTimeNs(startTimeNs)
     if (level.replication > 1) {
-      logDebug("Putting block %s with replication took %s"
-        .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
+      logDebug(s"Putting block ${blockId} with replication took $usedTimeMs")
     } else {
-      logDebug("Putting block %s without replication took %s"
-        .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
+      logDebug(s"Putting block ${blockId} without replication took ${usedTimeMs}")
     }
     result
   }
@@ -1155,7 +1154,7 @@ private[spark] class BlockManager(
       tellMaster: Boolean = true,
       keepReadLock: Boolean = false): Option[PartiallyUnrolledIterator[T]] = {
     doPut(blockId, level, classTag, tellMaster = tellMaster, keepReadLock = keepReadLock) { info =>
-      val startTimeMs = System.currentTimeMillis
+      val startTimeNs = System.nanoTime()
       var iteratorFromFailedMemoryStorePut: Option[PartiallyUnrolledIterator[T]] = None
       // Size of the block in bytes
       var size = 0L
@@ -1215,9 +1214,9 @@ private[spark] class BlockManager(
           reportBlockStatus(blockId, putBlockStatus)
         }
         addUpdatedBlockStatusToTaskMetrics(blockId, putBlockStatus)
-        logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))
+        logDebug(s"Put block $blockId locally took ${Utils.getUsedTimeNs(startTimeNs)}")
         if (level.replication > 1) {
-          val remoteStartTime = System.currentTimeMillis
+          val remoteStartTimeNs = System.nanoTime()
           val bytesToReplicate = doGetLocalBytes(blockId, info)
           // [SPARK-16550] Erase the typed classTag when using default serialization, since
           // NettyBlockRpcServer crashes when deserializing repl-defined classes.
@@ -1232,8 +1231,7 @@ private[spark] class BlockManager(
           } finally {
             bytesToReplicate.dispose()
           }
-          logDebug("Put block %s remotely took %s"
-            .format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
+          logDebug(s"Put block $blockId remotely took ${Utils.getUsedTimeNs(remoteStartTimeNs)}")
         }
       }
       assert(blockWasSuccessfullyStored == iteratorFromFailedMemoryStorePut.isEmpty)
@@ -1331,10 +1329,11 @@ private[spark] class BlockManager(
   private def getPeers(forceFetch: Boolean): Seq[BlockManagerId] = {
     peerFetchLock.synchronized {
       val cachedPeersTtl = conf.get(config.STORAGE_CACHED_PEERS_TTL) // milliseconds
-      val timeout = System.currentTimeMillis - lastPeerFetchTime > cachedPeersTtl
+      val diff = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lastPeerFetchTimeNs)
+      val timeout = diff > cachedPeersTtl
       if (cachedPeers == null || forceFetch || timeout) {
         cachedPeers = master.getPeers(blockManagerId).sortBy(_.hashCode)
-        lastPeerFetchTime = System.currentTimeMillis
+        lastPeerFetchTimeNs = System.nanoTime()
         logDebug("Fetched peers from master: " + cachedPeers.mkString("[", ",", "]"))
       }
       cachedPeers
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 36cbaebfcc32..aefa2ae020a9 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -21,7 +21,7 @@ import java.io._
 import java.nio.ByteBuffer
 import java.nio.channels.{Channels, ReadableByteChannel, WritableByteChannel}
 import java.nio.channels.FileChannel.MapMode
-import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 
 import scala.collection.mutable.ListBuffer
 
@@ -61,7 +61,7 @@ private[spark] class DiskStore(
       throw new IllegalStateException(s"Block $blockId is already present in the disk store")
     }
     logDebug(s"Attempting to put block $blockId")
-    val startTime = System.currentTimeMillis
+    val startTimeNs = System.nanoTime()
     val file = diskManager.getFile(blockId)
     val out = new CountingWritableChannel(openForWrite(file))
     var threwException: Boolean = true
@@ -84,11 +84,8 @@ private[spark] class DiskStore(
         }
       }
     }
-    val finishTime = System.currentTimeMillis
-    logDebug("Block %s stored as %s file on disk in %d ms".format(
-      file.getName,
-      Utils.bytesToString(file.length()),
-      finishTime - startTime))
+    logDebug(s"Block ${file.getName} stored as ${Utils.bytesToString(file.length())} file" +
+      s" on disk in ${TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)} ms")
   }
 
   def putBytes(blockId: BlockId, bytes: ChunkedByteBuffer): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 3966980a11ed..c75b20906918 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import java.io.{InputStream, IOException}
 import java.nio.ByteBuffer
-import java.util.concurrent.LinkedBlockingQueue
+import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
@@ -92,7 +92,7 @@ final class ShuffleBlockFetcherIterator(
    */
   private[this] var numBlocksProcessed = 0
 
-  private[this] val startTime = System.currentTimeMillis
+  private[this] val startTimeNs = System.nanoTime()
 
   /** Local blocks to fetch, excluding zero-sized blocks. */
   private[this] val localBlocks = scala.collection.mutable.LinkedHashSet[BlockId]()
@@ -250,7 +250,7 @@ final class ShuffleBlockFetcherIterator(
             logDebug("remainingBlocks: " + remainingBlocks)
           }
         }
-        logTrace("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
+        logTrace(s"Got remote block $blockId after ${Utils.getUsedTimeNs(startTimeNs)}")
       }
 
       override def onBlockFetchFailure(blockId: String, e: Throwable): Unit = {
@@ -380,11 +380,11 @@ final class ShuffleBlockFetcherIterator(
     fetchUpToMaxBytes()
 
     val numFetches = remoteRequests.size - fetchRequests.size
-    logInfo("Started " + numFetches + " remote fetches in" + Utils.getUsedTimeMs(startTime))
+    logInfo(s"Started $numFetches remote fetches in ${Utils.getUsedTimeNs(startTimeNs)}")
 
     // Get Local Blocks
     fetchLocalBlocks()
-    logDebug("Got local blocks in " + Utils.getUsedTimeMs(startTime))
+    logDebug(s"Got local blocks in ${Utils.getUsedTimeNs(startTimeNs)}")
   }
 
   override def hasNext: Boolean = !isZombie && (numBlocksProcessed < numBlocksToFetch)
@@ -411,10 +411,10 @@ final class ShuffleBlockFetcherIterator(
     // is also corrupt, so the previous stage could be retried.
     // For local shuffle block, throw FailureFetchResult for the first IOException.
     while (!isZombie && result == null) {
-      val startFetchWait = System.currentTimeMillis()
+      val startFetchWait = System.nanoTime()
       result = results.take()
-      val stopFetchWait = System.currentTimeMillis()
-      shuffleMetrics.incFetchWaitTime(stopFetchWait - startFetchWait)
+      val fetchWaitTime = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startFetchWait)
+      shuffleMetrics.incFetchWaitTime(fetchWaitTime)
 
       result match {
         case r @ SuccessFetchResult(blockId, address, size, buf, isNetworkReqDone) =>
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 37b21e2eddba..3065bdf06311 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1005,9 +1005,10 @@ private[spark] object Utils extends Logging {
 
   /**
    * Return the string to tell how long has passed in milliseconds.
+   * @param startTimeNs - a timestamp in nanoseconds returned by `System.nanoTime`.
    */
-  def getUsedTimeMs(startTimeMs: Long): String = {
-    " " + (System.currentTimeMillis - startTimeMs) + " ms"
+  def getUsedTimeNs(startTimeNs: Long): String = {
+    s"${TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)} ms"
   }
 
   /**
@@ -1738,23 +1739,23 @@ private[spark] object Utils extends Logging {
    *
    * @param numIters number of iterations
    * @param f function to be executed. If prepare is not None, the running time of each call to f
-   *          must be an order of magnitude longer than one millisecond for accurate timing.
+   *          must be an order of magnitude longer than one nanosecond for accurate timing.
    * @param prepare function to be executed before each call to f. Its running time doesn't count.
-   * @return the total time across all iterations (not counting preparation time)
+   * @return the total time across all iterations (not counting preparation time) in nanoseconds.
    */
   def timeIt(numIters: Int)(f: => Unit, prepare: Option[() => Unit] = None): Long = {
     if (prepare.isEmpty) {
-      val start = System.currentTimeMillis
+      val startNs = System.nanoTime()
       times(numIters)(f)
-      System.currentTimeMillis - start
+      System.nanoTime() - startNs
     } else {
       var i = 0
       var sum = 0L
       while (i < numIters) {
         prepare.get.apply()
-        val start = System.currentTimeMillis
+        val startNs = System.nanoTime()
         f
-        sum += System.currentTimeMillis - start
+        sum += System.nanoTime() - startNs
         i += 1
       }
       sum
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index b9b47d4dcd25..d4bf20ca8107 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import java.lang.ref.WeakReference
+import java.util.concurrent.TimeUnit
 
 import scala.collection.mutable.HashSet
 import scala.language.existentials
@@ -98,10 +99,10 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So
   /** Run GC and make sure it actually has run */
   protected def runGC() {
     val weakRef = new WeakReference(new Object())
-    val startTime = System.currentTimeMillis
+    val startTimeNs = System.nanoTime()
     System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC.
     // Wait until a weak reference object has been GCed
-    while (System.currentTimeMillis - startTime < 10000 && weakRef.get != null) {
+    while (System.nanoTime() - startTimeNs < TimeUnit.SECONDS.toNanos(10) && weakRef.get != null) {
       System.gc()
       Thread.sleep(200)
     }
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index 0c59259db544..00fecd4ca53e 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark
 
-import java.util.concurrent.Semaphore
+import java.util.concurrent.{Semaphore, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.concurrent.ExecutionContext.Implicits.global
@@ -273,8 +273,8 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     val jobA = Future {
       sc.setJobGroup("jobA", "this is a job to be cancelled", interruptOnCancel = true)
       sc.parallelize(1 to 2, 2).map { i =>
-        val startTime = System.currentTimeMillis()
-        while (System.currentTimeMillis() < startTime + 10000) { }
+        val startTimeNs = System.nanoTime()
+        while (System.nanoTime() < startTimeNs + TimeUnit.SECONDS.toNanos(10)) { }
       }.count()
     }
 
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index c187bcb57701..ea31a7585c3b 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -170,8 +170,8 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     }, 0, 1, TimeUnit.MILLISECONDS)
 
     try {
-      val t0 = System.currentTimeMillis()
-      while ((System.currentTimeMillis() - t0) < 1000) {
+      val t0 = System.nanoTime()
+      while ((System.nanoTime() - t0) < TimeUnit.SECONDS.toNanos(1)) {
         val conf = Try(new SparkConf(loadDefaults = true))
         assert(conf.isSuccess === true)
       }
diff --git a/core/src/test/scala/org/apache/spark/ThreadingSuite.scala b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
index 36273d722f50..5cf9c087e1dc 100644
--- a/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark
 
-import java.util.concurrent.Semaphore
+import java.util.concurrent.{Semaphore, TimeUnit}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
 
 import org.apache.spark.internal.Logging
@@ -126,8 +126,9 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
             val ans = nums.map(number => {
               val running = ThreadingSuiteState.runningThreads
               running.getAndIncrement()
-              val time = System.currentTimeMillis()
-              while (running.get() != 4 && System.currentTimeMillis() < time + 1000) {
+              val timeNs = System.nanoTime()
+              while (running.get() != 4 &&
+                (System.nanoTime() - timeNs < TimeUnit.SECONDS.toNanos(1))) {
                 Thread.sleep(100)
               }
               if (running.get() != 4) {
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 0086a7951a27..6c2159eb4534 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -632,7 +632,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     }
     val time = Utils.timeIt(2)({}, Some(prepare))
     require(cnt === 2, "prepare should be called twice")
-    require(time < 500, "preparation time should not count")
+    require(time < TimeUnit.MILLISECONDS.toNanos(500), "preparation time should not count")
   }
 
   test("fetch hcfs dir") {
@@ -907,7 +907,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
 
       // Start up a process that runs 'sleep 10'. Terminate the process and assert it takes
       // less time and the process is no longer there.
-      val startTimeMs = System.currentTimeMillis()
+      val startTimeNs = System.nanoTime()
       val process = new ProcessBuilder("sleep", "10").start()
       val pid = getPid(process)
       try {
@@ -915,8 +915,8 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
         val terminated = Utils.terminateProcess(process, 5000)
         assert(terminated.isDefined)
         process.waitFor(5, TimeUnit.SECONDS)
-        val durationMs = System.currentTimeMillis() - startTimeMs
-        assert(durationMs < 5000)
+        val durationNs = System.nanoTime() - startTimeNs
+        assert(durationNs < TimeUnit.SECONDS.toNanos(5))
         assert(!pidExists(pid))
       } finally {
         // Forcibly kill the test process just in case.
@@ -943,12 +943,13 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
         assert(pidExists(pid))
         try {
           signal(pid, "SIGSTOP")
-          val start = System.currentTimeMillis()
+          val startNs = System.nanoTime()
           val terminated = Utils.terminateProcess(process, 5000)
           assert(terminated.isDefined)
           process.waitFor(5, TimeUnit.SECONDS)
-          val duration = System.currentTimeMillis() - start
-          assert(duration < 6000) // add a little extra time to allow a force kill to finish
+          val duration = System.nanoTime() - startNs
+          // add a little extra time to allow a force kill to finish
+          assert(duration < TimeUnit.SECONDS.toNanos(6))
           assert(!pidExists(pid))
         } finally {
           signal(pid, "SIGKILL")
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
index 4f382414a8dd..4759a830da4c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.util.collection
 
+import java.util.concurrent.TimeUnit
+
 import scala.reflect.ClassTag
 import scala.util.Random
 
@@ -192,9 +194,9 @@ private object SizeTrackerSuite {
   }
 
   def time(f: => Unit): Long = {
-    val start = System.currentTimeMillis()
+    val startNs = System.nanoTime()
     f
-    System.currentTimeMillis() - start
+    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNs)
   }
 
   def averageTime(v: Seq[Long]): Long = {
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index 46a05e2ba798..a14fb76203ae 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -19,9 +19,11 @@ package org.apache.spark.util.collection
 
 import java.lang.{Float => JFloat}
 import java.util.{Arrays, Comparator}
+import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
+import org.apache.spark.util.Utils.timeIt
 import org.apache.spark.util.random.XORShiftRandom
 
 class SorterSuite extends SparkFunSuite with Logging {
@@ -79,13 +81,13 @@ class SorterSuite extends SparkFunSuite with Logging {
       return
     }
 
-    val firstTry = org.apache.spark.util.Utils.timeIt(1)(f, Some(prepare))
+    val firstTry = TimeUnit.NANOSECONDS.toMillis(timeIt(1)(f, Some(prepare)))
     System.gc()
 
     var i = 0
     var next10: Long = 0
     while (i < 10) {
-      val time = org.apache.spark.util.Utils.timeIt(1)(f, Some(prepare))
+      val time = TimeUnit.NANOSECONDS.toMillis(timeIt(1)(f, Some(prepare)))
       next10 += time
       logInfo(s"$name: Took $time ms")
       i += 1
diff --git a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
index 08af3306a96f..b327e13533b8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -18,6 +18,8 @@
 // scalastyle:off println
 package org.apache.spark.examples
 
+import java.util.concurrent.TimeUnit
+
 import org.apache.spark.sql.SparkSession
 
 
@@ -36,10 +38,10 @@ object HdfsTest {
     val file = spark.read.text(args(0)).rdd
     val mapped = file.map(s => s.length).cache()
     for (iter <- 1 to 10) {
-      val start = System.currentTimeMillis()
+      val startTimeNs = System.nanoTime()
       for (x <- mapped) { x + 2 }
-      val end = System.currentTimeMillis()
-      println(s"Iteration $iter took ${end-start} ms")
+      val durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)
+      println(s"Iteration $iter took $durationMs ms")
     }
     println(s"File contents: ${file.map(_.toString).take(1).mkString(",").slice(0, 10)}")
     println(s"Returned length(s) of: ${file.map(_.length).sum().toString}")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
index 491a9c669bdb..d9edf3c38a05 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.kafka010
 
 import java.util.Properties
+import java.util.concurrent.TimeUnit
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable
@@ -221,13 +222,13 @@ class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with KafkaM
       .as[(String, String)]
     val query = startStream(kafka.map(kv => kv._2.toInt))
 
-    val testTime = 20.seconds
-    val startTime = System.currentTimeMillis()
+    val testTimeNs = TimeUnit.SECONDS.toNanos(20)
+    val startTimeNs = System.nanoTime()
     // Track the current existing topics
     val topics = mutable.ArrayBuffer[String]()
     // Track topics that have been deleted
     val deletedTopics = mutable.Set[String]()
-    while (System.currentTimeMillis() - testTime.toMillis < startTime) {
+    while (System.nanoTime() - startTimeNs < testTimeNs) {
       Random.nextInt(10) match {
         case 0 => // Create a new topic
           val topic = newTopic()
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
index efcd5d6a5cdd..fd9fd00f2676 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
@@ -21,7 +21,7 @@ import java.io.{File, IOException}
 import java.lang.{Integer => JInt}
 import java.net.InetSocketAddress
 import java.util.{Map => JMap, Properties}
-import java.util.concurrent.TimeoutException
+import java.util.concurrent.{TimeoutException, TimeUnit}
 
 import scala.annotation.tailrec
 import scala.collection.JavaConverters._
@@ -265,14 +265,14 @@ private[kafka010] class KafkaTestUtils extends Logging {
       }
     }
 
-    val startTime = System.currentTimeMillis()
+    val startTimeNs = System.nanoTime()
     @tailrec
     def tryAgain(attempt: Int): T = {
       makeAttempt() match {
         case Right(result) => result
         case Left(e) =>
-          val duration = System.currentTimeMillis() - startTime
-          if (duration < timeout.milliseconds) {
+          val durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)
+          if (durationMs < timeout.milliseconds) {
             Thread.sleep(interval.milliseconds)
           } else {
             throw new TimeoutException(e.getMessage)
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
index 88b294246bb3..5072b3ae21d8 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.streaming.kinesis
 
+import java.util.concurrent.TimeUnit
+
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
@@ -251,13 +253,16 @@ class KinesisSequenceRangeIterator(
 
   /** Helper method to retry Kinesis API request with exponential backoff and timeouts */
   private def retryOrTimeout[T](message: String)(body: => T): T = {
-    val startTimeMs = System.currentTimeMillis()
+    val startTimeNs = System.nanoTime()
     var retryCount = 0
     var result: Option[T] = None
     var lastError: Throwable = null
     var waitTimeInterval = kinesisReadConfigs.retryWaitTimeMs
 
-    def isTimedOut = (System.currentTimeMillis() - startTimeMs) >= kinesisReadConfigs.retryTimeoutMs
+    def isTimedOut = {
+      val retryTimeoutNs = TimeUnit.MILLISECONDS.toNanos(kinesisReadConfigs.retryTimeoutMs)
+      (System.nanoTime() - startTimeNs) >= retryTimeoutNs
+    }
     def isMaxRetryDone = retryCount >= kinesisReadConfigs.maxRetries
 
     while (result.isEmpty && !isTimedOut && !isMaxRetryDone) {
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
index 73ac7a3cd235..2ac83c8ee6b3 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
@@ -183,9 +183,8 @@ private[kinesis] class KinesisTestUtils(streamShardCount: Int = 2) extends Loggi
   }
 
   private def waitForStreamToBeActive(streamNameToWaitFor: String): Unit = {
-    val startTime = System.currentTimeMillis()
-    val endTime = startTime + TimeUnit.SECONDS.toMillis(createStreamTimeoutSeconds)
-    while (System.currentTimeMillis() < endTime) {
+    val startTimeNs = System.nanoTime()
+    while (System.nanoTime() - startTimeNs < TimeUnit.SECONDS.toNanos(createStreamTimeoutSeconds)) {
       Thread.sleep(TimeUnit.SECONDS.toMillis(describeStreamPollTimeSeconds))
       describeStream(streamNameToWaitFor).foreach { description =>
         val streamStatus = description.getStreamStatus()
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index f665727ef90d..6aeea913b460 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.graphx
 
+import java.util.concurrent.TimeUnit
+
 import org.apache.spark.SparkContext
 import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl}
 import org.apache.spark.internal.Logging
@@ -63,7 +65,7 @@ object GraphLoader extends Logging {
       vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
     : Graph[Int, Int] =
   {
-    val startTime = System.currentTimeMillis
+    val startTimeNs = System.nanoTime()
 
     // Parse the edge data table directly into edge partitions
     val lines =
@@ -93,7 +95,8 @@ object GraphLoader extends Logging {
     }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path))
     edges.count()
 
-    logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime))
+    logInfo(s"It took ${TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)} ms" +
+      " to load the edges")
 
     GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel,
       vertexStorageLevel = vertexStorageLevel)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/stopwatches.scala b/mllib/src/main/scala/org/apache/spark/ml/util/stopwatches.scala
index e539deca4b03..ca23e6b55240 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/stopwatches.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/stopwatches.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.util
 
+import java.util.concurrent.TimeUnit
+
 import scala.collection.mutable
 
 import org.apache.spark.SparkContext
@@ -73,7 +75,7 @@ private[spark] abstract class Stopwatch extends Serializable {
   /**
    * Gets the current time in milliseconds.
    */
-  protected def now: Long = System.currentTimeMillis()
+  protected def now: Long = TimeUnit.NANOSECONDS.toMillis(System.nanoTime())
 
   /**
    * Adds input duration to total elapsed time.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/StopwatchSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/util/StopwatchSuite.scala
index 54e363a8b9f2..de3726facaf3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/StopwatchSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/StopwatchSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.util
 
 import java.util.Random
+import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -121,5 +122,5 @@ private object StopwatchSuite extends SparkFunSuite {
   }
 
   /** The current time in milliseconds. */
-  private def now: Long = System.currentTimeMillis()
+  private def now: Long = TimeUnit.NANOSECONDS.toMillis(System.nanoTime())
 }
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 8bd61c230d8e..6681ca3778f4 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -19,6 +19,7 @@ package org.apache.spark.scheduler.cluster.mesos
 
 import java.io.File
 import java.util.{Collections, List => JList}
+import java.util.concurrent.TimeUnit
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
 import java.util.concurrent.locks.ReentrantLock
 
@@ -107,9 +108,9 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   private var totalGpusAcquired = 0
 
   // The amount of time to wait for locality scheduling
-  private val localityWait = conf.get(config.LOCALITY_WAIT)
+  private val localityWaitNs = TimeUnit.MILLISECONDS.toNanos(conf.get(config.LOCALITY_WAIT))
   // The start of the waiting, for data local scheduling
-  private var localityWaitStartTime = System.currentTimeMillis()
+  private var localityWaitStartTimeNs = System.nanoTime()
   // If true, the scheduler is in the process of launching executors to reach the requested
   // executor limit
   private var launchingExecutors = false
@@ -357,7 +358,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       } else {
         if (!launchingExecutors) {
           launchingExecutors = true
-          localityWaitStartTime = System.currentTimeMillis()
+          localityWaitStartTimeNs = System.nanoTime()
         }
       }
 
@@ -591,7 +592,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     // increase coverage.
     val remainingHosts = allDesiredHosts -- currentHosts
     if (!remainingHosts.contains(offerHostname) &&
-      (System.currentTimeMillis() - localityWaitStartTime <= localityWait)) {
+      (System.nanoTime() - localityWaitStartTimeNs <= localityWaitNs)) {
       logDebug("Skipping host and waiting for locality. host: " + offerHostname)
       return false
     }
@@ -750,7 +751,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     logInfo("Capping the total amount of executors to " + requestedTotal)
     executorLimitOption = Some(requestedTotal)
     // Update the locality wait start time to continue trying for locality.
-    localityWaitStartTime = System.currentTimeMillis()
+    localityWaitStartTimeNs = System.nanoTime()
     true
   }
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 7458e1314c9b..1482b5209a86 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -587,7 +587,7 @@ private[spark] class ApplicationMaster(
           }
           try {
             val numPendingAllocate = allocator.getPendingAllocate.size
-            var sleepStart = 0L
+            var sleepStartNs = 0L
             var sleepInterval = 200L // ms
             allocatorLock.synchronized {
               sleepInterval =
@@ -600,11 +600,11 @@ private[spark] class ApplicationMaster(
                   nextAllocationInterval = initialAllocationInterval
                   heartbeatInterval
                 }
-              sleepStart = System.currentTimeMillis()
+              sleepStartNs = System.nanoTime()
               allocatorLock.wait(sleepInterval)
             }
-            val sleepDuration = System.currentTimeMillis() - sleepStart
-            if (sleepDuration < sleepInterval) {
+            val sleepDuration = System.nanoTime() - sleepStartNs
+            if (sleepDuration < TimeUnit.MILLISECONDS.toNanos(sleepInterval)) {
               // log when sleep is interrupted
               logDebug(s"Number of pending allocations is $numPendingAllocate. " +
                   s"Slept for $sleepDuration/$sleepInterval ms.")
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 3a7913122dd8..49367e0c77c7 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -100,9 +100,9 @@ abstract class BaseYarnClusterSuite
     // This hack loops for a bit waiting for the port to change, and fails the test if it hasn't
     // done so in a timely manner (defined to be 10 seconds).
     val config = yarnCluster.getConfig()
-    val deadline = System.currentTimeMillis() + TimeUnit.SECONDS.toMillis(10)
+    val startTimeNs = System.nanoTime()
     while (config.get(YarnConfiguration.RM_ADDRESS).split(":")(1) == "0") {
-      if (System.currentTimeMillis() > deadline) {
+      if (System.nanoTime() - startTimeNs > TimeUnit.SECONDS.toNanos(10)) {
         throw new IllegalStateException("Timed out waiting for RM to come up.")
       }
       logDebug("RM address still not set in configuration, waiting...")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index cb9ca4c59b8e..0bd8a9299ef4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import java.util.UUID
+import java.util.concurrent.TimeUnit
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
@@ -151,8 +152,10 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
   @throws[StreamingQueryException]
   def awaitAnyTermination(timeoutMs: Long): Boolean = {
 
-    val startTime = System.currentTimeMillis
-    def isTimedout = System.currentTimeMillis - startTime >= timeoutMs
+    val startTime = System.nanoTime()
+    def isTimedout = {
+      System.nanoTime() - startTime >= TimeUnit.MILLISECONDS.toNanos(timeoutMs)
+    }
 
     awaitTerminationLock.synchronized {
       while (!isTimedout && lastTerminatedQuery == null) {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 16cb78a3fdfa..d9e4842e8fe9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.io._
 import java.util.{ArrayList => JArrayList, Locale}
+import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 
@@ -343,10 +344,10 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     }
     if (tokens(0).toLowerCase(Locale.ROOT).equals("source") ||
       cmd_trimmed.startsWith("!") || isRemoteMode) {
-      val start = System.currentTimeMillis()
+      val startTimeNs = System.nanoTime()
       super.processCmd(cmd)
-      val end = System.currentTimeMillis()
-      val timeTaken: Double = (end - start) / 1000.0
+      val endTimeNs = System.nanoTime()
+      val timeTaken: Double = TimeUnit.NANOSECONDS.toMillis(endTimeNs - startTimeNs) / 1000.0
       console.printInfo(s"Time taken: $timeTaken seconds")
       0
     } else {
@@ -364,13 +365,13 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           driver.init()
           val out = sessionState.out
           val err = sessionState.err
-          val start: Long = System.currentTimeMillis()
+          val startTimeNs: Long = System.nanoTime()
           if (sessionState.getIsVerbose) {
             out.println(cmd)
           }
           val rc = driver.run(cmd)
-          val end = System.currentTimeMillis()
-          val timeTaken: Double = (end - start) / 1000.0
+          val endTimeNs = System.nanoTime()
+          val timeTaken: Double = TimeUnit.NANOSECONDS.toMillis(endTimeNs - startTimeNs) / 1000.0
 
           ret = rc.getResponseCode
           if (ret != 0) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index e042adaa8611..54f91ff1c69d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -218,7 +218,7 @@ class CheckpointWriter(
         latestCheckpointTime = checkpointTime
       }
       var attempts = 0
-      val startTime = System.currentTimeMillis()
+      val startTimeNs = System.nanoTime()
       val tempFile = new Path(checkpointDir, "temp")
       // We will do checkpoint when generating a batch and completing a batch. When the processing
       // time of a batch is greater than the batch interval, checkpointing for completing an old
@@ -272,9 +272,9 @@ class CheckpointWriter(
           }
 
           // All done, print success
-          val finishTime = System.currentTimeMillis()
           logInfo(s"Checkpoint for time $checkpointTime saved to file '$checkpointFile'" +
-            s", took ${bytes.length} bytes and ${finishTime - startTime} ms")
+            s", took ${bytes.length} bytes and " +
+            s"${TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)} ms")
           jobGenerator.onCheckpointCompletion(checkpointTime, clearCheckpointDataLater)
           return
         } catch {
@@ -304,14 +304,13 @@ class CheckpointWriter(
     if (stopped) return
 
     executor.shutdown()
-    val startTime = System.currentTimeMillis()
+    val startTimeNs = System.nanoTime()
     val terminated = executor.awaitTermination(10, java.util.concurrent.TimeUnit.SECONDS)
     if (!terminated) {
       executor.shutdownNow()
     }
-    val endTime = System.currentTimeMillis()
     logInfo(s"CheckpointWriter executor terminated? $terminated," +
-      s" waited for ${endTime - startTime} ms.")
+      s" waited for ${TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)} ms.")
     stopped = true
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index 6f0b46b6a4cb..4f3fa059ea30 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.streaming.scheduler
 
+import java.util.concurrent.TimeUnit
+
 import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.internal.Logging
@@ -111,14 +113,15 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
 
     if (processReceivedData) {
       logInfo("Stopping JobGenerator gracefully")
-      val timeWhenStopStarted = System.currentTimeMillis()
+      val timeWhenStopStarted = System.nanoTime()
       val stopTimeoutMs = conf.getTimeAsMs(
         "spark.streaming.gracefulStopTimeout", s"${10 * ssc.graph.batchDuration.milliseconds}ms")
       val pollTime = 100
 
       // To prevent graceful stop to get stuck permanently
       def hasTimedOut: Boolean = {
-        val timedOut = (System.currentTimeMillis() - timeWhenStopStarted) > stopTimeoutMs
+        val diff = TimeUnit.NANOSECONDS.toMillis((System.nanoTime() - timeWhenStopStarted))
+        val timedOut = diff > stopTimeoutMs
         if (timedOut) {
           logWarning("Timed out while stopping the job generator (timeout = " + stopTimeoutMs + ")")
         }
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaReceiverAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaReceiverAPISuite.java
index 91560472446a..e9b35ac89f86 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaReceiverAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaReceiverAPISuite.java
@@ -39,6 +39,7 @@
 import java.net.ConnectException;
 import java.net.Socket;
 import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
 public class JavaReceiverAPISuite implements Serializable {
@@ -71,15 +72,15 @@ public void testReceiver() throws InterruptedException {
       });
 
       ssc.start();
-      long startTime = System.currentTimeMillis();
-      long timeout = 10000;
+      long startTimeNs = System.nanoTime();
+      long timeout = TimeUnit.SECONDS.toNanos(10);
 
       Thread.sleep(200);
       for (int i = 0; i < 6; i++) {
         server.send(i + "\n"); // \n to make sure these are separate lines
         Thread.sleep(100);
       }
-      while (dataCounter.get() == 0 && System.currentTimeMillis() - startTime < timeout) {
+      while (dataCounter.get() == 0 && System.nanoTime() - startTimeNs < timeout) {
         Thread.sleep(100);
       }
       ssc.stop();
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 7376741f64a1..e81cfb5b20f3 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -335,9 +335,9 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
       // Let the data from the receiver be received
       val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      val startTime = System.currentTimeMillis()
+      val startTimeNs = System.nanoTime()
       while ((!MultiThreadTestReceiver.haveAllThreadsFinished || output.sum < numTotalRecords) &&
-        System.currentTimeMillis() - startTime < 5000) {
+        System.nanoTime() - startTimeNs < TimeUnit.SECONDS.toNanos(5)) {
         Thread.sleep(100)
         clock.advance(batchDuration.milliseconds)
       }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
index fff2d6fbace3..cf8dd10571f4 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
@@ -20,6 +20,7 @@ package org.apache.spark.streaming
 import java.io.{File, IOException}
 import java.nio.charset.StandardCharsets
 import java.util.UUID
+import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
@@ -234,12 +235,12 @@ object MasterFailureTest extends Logging {
         System.clearProperty("spark.streaming.clock")
         System.clearProperty("spark.driver.port")
         ssc.start()
-        val startTime = System.currentTimeMillis()
+        val startTimeNs = System.nanoTime()
         while (!killed && !isLastOutputGenerated && !isTimedOut) {
           Thread.sleep(100)
-          timeRan = System.currentTimeMillis() - startTime
+          timeRan = System.nanoTime() - startTimeNs
           isLastOutputGenerated = (output.nonEmpty && output.last == lastExpectedOutput)
-          isTimedOut = (timeRan + totalTimeRan > maxTimeToRun)
+          isTimedOut = (timeRan + totalTimeRan > TimeUnit.MILLISECONDS.toNanos(maxTimeToRun))
         }
       } catch {
         case e: Exception => logError("Error running streaming context", e)
@@ -265,7 +266,7 @@ object MasterFailureTest extends Logging {
       logInfo("New output = " + output.toSeq)
       logInfo("Merged output = " + mergedOutput)
       logInfo("Time ran = " + timeRan)
-      logInfo("Total time ran = " + totalTimeRan)
+      logInfo("Total time ran = " + TimeUnit.NANOSECONDS.toMillis(totalTimeRan))
 
       if (!isLastOutputGenerated && !isTimedOut) {
         val sleepTime = Random.nextInt(batchDuration.milliseconds.toInt * 10)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
index 9d1203b7632c..130e98199170 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming
 
 import java.io.File
 import java.nio.ByteBuffer
-import java.util.concurrent.Semaphore
+import java.util.concurrent.{Semaphore, TimeUnit}
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
@@ -147,10 +147,10 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     val generatedData = new ArrayBuffer[Int]
 
     // Generate blocks
-    val startTime = System.currentTimeMillis()
+    val startTimeNs = System.nanoTime()
     blockGenerator.start()
     var count = 0
-    while(System.currentTimeMillis - startTime < waitTime) {
+    while(System.nanoTime() - startTimeNs < TimeUnit.MILLISECONDS.toNanos(waitTime)) {
       blockGenerator.addData(count)
       generatedData += count
       count += 1
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 6a0f523e4b49..f2ae77896a5d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.streaming
 
 import java.io.{File, IOException, ObjectInputStream}
-import java.util.concurrent.ConcurrentLinkedQueue
+import java.util.concurrent.{ConcurrentLinkedQueue, TimeUnit}
 
 import scala.collection.JavaConverters._
 import scala.language.implicitConversions
@@ -423,13 +423,13 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       logInfo("Manual clock after advancing = " + clock.getTimeMillis())
 
       // Wait until expected number of output items have been generated
-      val startTime = System.currentTimeMillis()
+      val startTimeNs = System.nanoTime()
       while (output.size < numExpectedOutput &&
-        System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
+        TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs) < maxWaitTimeMillis) {
         logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
         ssc.awaitTerminationOrTimeout(50)
       }
-      val timeTaken = System.currentTimeMillis() - startTime
+      val timeTaken = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)
       logInfo("Output generated in " + timeTaken + " milliseconds")
       output.asScala.foreach(x => logInfo("[" + x.mkString(",") + "]"))
       assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")

From 7a8ff15ff7c8cb73838f67a3ad2d3971961f9c4d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 13 Feb 2019 16:04:27 -0800
Subject: [PATCH 0458/1072] [SPARK-26865][SQL] DataSourceV2Strategy should push
 normalized filters

## What changes were proposed in this pull request?

This PR aims to make `DataSourceV2Strategy` normalize filters like [FileSourceStrategy](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala#L150-L158) when it pushes them into `SupportsPushDownFilters.pushFilters`.

## How was this patch tested?

Pass the Jenkins with the newly added test case.

Closes #23770 from dongjoon-hyun/SPARK-26865.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/DataSourceStrategy.scala         | 16 ++++++++++++++++
 .../datasources/FileSourceStrategy.scala         | 10 +---------
 .../datasources/v2/DataSourceV2Strategy.scala    |  7 +++++--
 .../datasources/DataSourceStrategySuite.scala    |  7 +++++++
 4 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index b5cf8c9515bf..273cc3b19302 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -426,6 +426,22 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
 }
 
 object DataSourceStrategy {
+  /**
+   * The attribute name of predicate could be different than the one in schema in case of
+   * case insensitive, we should change them to match the one in schema, so we do not need to
+   * worry about case sensitivity anymore.
+   */
+  protected[sql] def normalizeFilters(
+      filters: Seq[Expression],
+      attributes: Seq[AttributeReference]): Seq[Expression] = {
+    filters.filterNot(SubqueryExpression.hasSubquery).map { e =>
+      e transform {
+        case a: AttributeReference =>
+          a.withName(attributes.find(_.semanticEquals(a)).get.name)
+      }
+    }
+  }
+
   /**
    * Tries to translate a Catalyst [[Expression]] into data source [[Filter]].
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 62ab5c80d47c..970cbda6355e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -147,15 +147,7 @@ object FileSourceStrategy extends Strategy with Logging {
       //  - filters that need to be evaluated again after the scan
       val filterSet = ExpressionSet(filters)
 
-      // The attribute name of predicate could be different than the one in schema in case of
-      // case insensitive, we should change them to match the one in schema, so we do not need to
-      // worry about case sensitivity anymore.
-      val normalizedFilters = filters.filterNot(SubqueryExpression.hasSubquery).map { e =>
-        e transform {
-          case a: AttributeReference =>
-            a.withName(l.output.find(_.semanticEquals(a)).get.name)
-        }
-      }
+      val normalizedFilters = DataSourceStrategy.normalizeFilters(filters, l.output)
 
       val partitionColumns =
         l.resolve(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 40ac5cf40298..d6d17d6df7b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable
 
 import org.apache.spark.sql.{sources, AnalysisException, SaveMode, Strategy}
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression}
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, Repartition}
 import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
@@ -104,10 +104,13 @@ object DataSourceV2Strategy extends Strategy {
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
     case PhysicalOperation(project, filters, relation: DataSourceV2Relation) =>
       val scanBuilder = relation.newScanBuilder()
+
+      val normalizedFilters = DataSourceStrategy.normalizeFilters(filters, relation.output)
+
       // `pushedFilters` will be pushed down and evaluated in the underlying data sources.
       // `postScanFilters` need to be evaluated after the scan.
       // `postScanFilters` and `pushedFilters` can overlap, e.g. the parquet row group filter.
-      val (pushedFilters, postScanFilters) = pushFilters(scanBuilder, filters)
+      val (pushedFilters, postScanFilters) = pushFilters(scanBuilder, normalizedFilters)
       val (scan, output) = pruneColumns(scanBuilder, relation, project ++ postScanFilters)
       logInfo(
         s"""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
index f20aded169e4..2f5d5551c5df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
@@ -219,6 +219,13 @@ class DataSourceStrategySuite extends PlanTest with SharedSQLContext {
         IsNotNull(attrInt))), None)
   }
 
+  test("SPARK-26865 DataSourceV2Strategy should push normalized filters") {
+    val attrInt = 'cint.int
+    assertResult(Seq(IsNotNull(attrInt))) {
+      DataSourceStrategy.normalizeFilters(Seq(IsNotNull(attrInt.withName("CiNt"))), Seq(attrInt))
+    }
+  }
+
   /**
    * Translate the given Catalyst [[Expression]] into data source [[sources.Filter]]
    * then verify against the given [[sources.Filter]].

From 94ab4901dadbf5de95a88ec5b1b77efee2e764b7 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 14 Feb 2019 11:40:10 +0800
Subject: [PATCH 0459/1072] [SPARK-26650][CORE] Demote noisy HBase-related log
 message.

Make it a debug message so that it doesn't show up in the vast
majority of cases, where HBase classes are not available.

Closes #23776 from vanzin/SPARK-26650.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/deploy/security/HBaseDelegationTokenProvider.scala  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
index d53eb4efe529..e56d03401dca 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
@@ -120,7 +120,9 @@ private[security] class HBaseDelegationTokenProvider
       confCreate.invoke(null, conf).asInstanceOf[Configuration]
     } catch {
       case NonFatal(e) =>
-        logWarning("Fail to invoke HBaseConfiguration", e)
+        // Keep at debug level since this is executed even when HBase tokens are not needed.
+        // Avoids a noisy warning for users who don't care about HBase.
+        logDebug("Unable to load HBaseConfiguration.", e)
         conf
     }
   }

From f34b872aed0040d120c54b704edf80e75cfe815e Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Thu, 14 Feb 2019 14:57:25 +0800
Subject: [PATCH 0460/1072] [SPARK-26851][SQL] Fix double-checked locking in
 CachedRDDBuilder

## What changes were proposed in this pull request?

According to Brian Goetz et al in Java Concurrency in Practice, the double checked locking pattern has worked since Java 5, but only if the resource is declared volatile:

> Subsequent changes in the JMM (Java 5.0 and later) have enabled DCL to work if resource is made volatile, and the performance impact of this is small since volatile reads are usually only slightly more expensive than nonvolatile reads.

CachedRDDBuilder. cachedColumnBuffers and CachedRDDBuilder.clearCache both use DCL to manage the resource ``_cachedColumnBuffers``. The missing ingredient is that ``_cachedColumnBuffers`` is not volatile.

Because of this, clearCache may see ``_cachedColumnBuffers`` as null, when in fact it is not, and therefore fail to un-cache the RDD. There may be other, more subtle bugs due to visibility issues.

To avoid these issues, this PR makes ``_cachedColumnBuffers`` volatile.

## How was this patch tested?

- Existing SQL unit tests
- Existing pyspark-sql tests

Closes #23768 from bersprockets/SPARK-26851.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/execution/columnar/InMemoryRelation.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index efdb82f3254f..bc6e958f3ea2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -49,7 +49,7 @@ case class CachedRDDBuilder(
     storageLevel: StorageLevel,
     @transient cachedPlan: SparkPlan,
     tableName: Option[String])(
-    @transient private var _cachedColumnBuffers: RDD[CachedBatch] = null) {
+    @transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null) {
 
   val sizeInBytesStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator
 

From ac9c0536bc518f173f2ff53bee42b7a89d28ee20 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Thu, 14 Feb 2019 15:07:22 +0800
Subject: [PATCH 0461/1072] [SPARK-26794][SQL] SparkSession enableHiveSupport
 does not point to hive but in-memory while the SparkContext exists

## What changes were proposed in this pull request?

```java
public class SqlDemo {
    public static void main(final String[] args) throws Exception {
        SparkConf conf = new SparkConf().setAppName("spark-sql-demo");
        JavaSparkContext sc = new JavaSparkContext(conf);
        SparkSession ss = SparkSession.builder().enableHiveSupport().getOrCreate();
        ss.sql("show databases").show();
    }
}
```
Before https://issues.apache.org/jira/browse/SPARK-20946, the demo above point to the right hive metastore if the hive-site.xml is present. But now it can only point to the default in-memory one.

Catalog is now as a variable shared across SparkSessions, it is instantiated with SparkContext's conf. After https://issues.apache.org/jira/browse/SPARK-20946, Session level configs are not pass to SparkContext's conf anymore, so the enableHiveSupport API takes no affect on the catalog instance.

You can set spark.sql.catalogImplementation=hive application wide to solve the problem, or never create a sc before you call SparkSession.builder().enableHiveSupport().getOrCreate()

Here we respect the SparkSession level configuration at the first time to generate catalog within SharedState

## How was this patch tested?

1. add ut
2. manually
```scala
test("enableHiveSupport has right to determine the catalog while using an existing sc") {
    val conf = new SparkConf().setMaster("local").setAppName("SharedState Test")
    val sc = SparkContext.getOrCreate(conf)
    val ss = SparkSession.builder().enableHiveSupport().getOrCreate()
    assert(ss.sharedState.externalCatalog.unwrapped.isInstanceOf[HiveExternalCatalog],
      "The catalog should be hive ")

    val ss2 = SparkSession.builder().getOrCreate()
    assert(ss2.sharedState.externalCatalog.unwrapped.isInstanceOf[HiveExternalCatalog],
      "The catalog should be shared across sessions")
  }
```

Without this fix, the above test will fail.
You can apply it to `org.apache.spark.sql.hive.HiveSharedStateSuite`,
and run,
```sbt
./build/sbt  -Phadoop-2.7 -Phive  "hive/testOnly org.apache.spark.sql.hive.HiveSharedStateSuite"
```
to verify.

Closes #23709 from yaooqinn/SPARK-26794.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/SparkSession.scala   |  2 +-
 .../spark/sql/internal/SharedState.scala      | 37 +++++++++--
 .../apache/spark/sql/hive/test/TestHive.scala |  2 +-
 .../spark/sql/hive/HiveSharedStateSuite.scala | 66 +++++++++++++++++++
 4 files changed, 99 insertions(+), 8 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 1c13a6819fe5..a7bd2ef67eec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -127,7 +127,7 @@ class SparkSession private(
   @Unstable
   @transient
   lazy val sharedState: SharedState = {
-    existingSharedState.getOrElse(new SharedState(sparkContext))
+    existingSharedState.getOrElse(new SharedState(sparkContext, initialSessionOptions))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 5b6160e2b408..3dd2ae61e8cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -39,8 +39,14 @@ import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
 /**
  * A class that holds all state shared across sessions in a given [[SQLContext]].
+ *
+ * @param sparkContext The Spark context associated with this SharedState
+ * @param initialConfigs The configs from the very first created SparkSession
  */
-private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
+private[sql] class SharedState(
+    val sparkContext: SparkContext,
+    initialConfigs: scala.collection.Map[String, String])
+  extends Logging {
 
   // Load hive-site.xml into hadoopConf and determine the warehouse path we want to use, based on
   // the config from both hive and Spark SQL. Finally set the warehouse config value to sparkConf.
@@ -77,6 +83,27 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
   }
   logInfo(s"Warehouse path is '$warehousePath'.")
 
+  // These 2 variables should be initiated after `warehousePath`, because in the first place we need
+  // to load hive-site.xml into hadoopConf and determine the warehouse path which will be set into
+  // both spark conf and hadoop conf avoiding be affected by any SparkSession level options
+  private val (conf, hadoopConf) = {
+    val confClone = sparkContext.conf.clone()
+    val hadoopConfClone = new Configuration(sparkContext.hadoopConfiguration)
+    // If `SparkSession` is instantiated using an existing `SparkContext` instance and no existing
+    // `SharedState`, all `SparkSession` level configurations have higher priority to generate a
+    // `SharedState` instance. This will be done only once then shared across `SparkSession`s
+    initialConfigs.foreach {
+      case (k, _)  if k == "hive.metastore.warehouse.dir" || k == WAREHOUSE_PATH.key =>
+        logWarning(s"Not allowing to set ${WAREHOUSE_PATH.key} or hive.metastore.warehouse.dir " +
+          s"in SparkSession's options, it should be set statically for cross-session usages")
+      case (k, v) =>
+        logDebug(s"Applying initial SparkSession options to SparkConf/HadoopConf: $k -> $v")
+        confClone.set(k, v)
+        hadoopConfClone.set(k, v)
+
+    }
+    (confClone, hadoopConfClone)
+  }
 
   /**
    * Class for caching query results reused in future executions.
@@ -89,7 +116,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
    */
   val statusStore: SQLAppStatusStore = {
     val kvStore = sparkContext.statusStore.store.asInstanceOf[ElementTrackingStore]
-    val listener = new SQLAppStatusListener(sparkContext.conf, kvStore, live = true)
+    val listener = new SQLAppStatusListener(conf, kvStore, live = true)
     sparkContext.listenerBus.addToStatusQueue(listener)
     val statusStore = new SQLAppStatusStore(kvStore, Some(listener))
     sparkContext.ui.foreach(new SQLTab(statusStore, _))
@@ -101,9 +128,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
    */
   lazy val externalCatalog: ExternalCatalogWithListener = {
     val externalCatalog = SharedState.reflect[ExternalCatalog, SparkConf, Configuration](
-      SharedState.externalCatalogClassName(sparkContext.conf),
-      sparkContext.conf,
-      sparkContext.hadoopConfiguration)
+      SharedState.externalCatalogClassName(conf), conf, hadoopConf)
 
     val defaultDbDefinition = CatalogDatabase(
       SessionCatalog.DEFAULT_DATABASE,
@@ -137,7 +162,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
     // System preserved database should not exists in metastore. However it's hard to guarantee it
     // for every session, because case-sensitivity differs. Here we always lowercase it to make our
     // life easier.
-    val globalTempDB = sparkContext.conf.get(GLOBAL_TEMP_DATABASE).toLowerCase(Locale.ROOT)
+    val globalTempDB = conf.get(GLOBAL_TEMP_DATABASE).toLowerCase(Locale.ROOT)
     if (externalCatalog.databaseExists(globalTempDB)) {
       throw new SparkException(
         s"$globalTempDB is a system preserved database, please rename your existing database " +
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 1db57b76ac24..c45f3e706e9e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -90,7 +90,7 @@ private[hive] class TestHiveExternalCatalog(
 private[hive] class TestHiveSharedState(
     sc: SparkContext,
     hiveClient: Option[HiveClient] = None)
-  extends SharedState(sc) {
+  extends SharedState(sc, initialConfigs = Map.empty[String, String]) {
 
   override lazy val externalCatalog: ExternalCatalogWithListener = {
     new ExternalCatalogWithListener(new TestHiveExternalCatalog(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
new file mode 100644
index 000000000000..6e2dcfc04d49
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+
+import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.sql.internal.SharedState
+import org.apache.spark.sql.internal.StaticSQLConf._
+import org.apache.spark.util.Utils
+
+class HiveSharedStateSuite extends SparkFunSuite {
+
+  test("initial configs should be passed to SharedState but not SparkContext") {
+    val conf = new SparkConf().setMaster("local").setAppName("SharedState Test")
+    val sc = SparkContext.getOrCreate(conf)
+    val invalidPath = "invalid/path"
+    val metastorePath = Utils.createTempDir()
+    val tmpDb = "tmp_db"
+
+    // The initial configs used to generate SharedState, none of these should affect the global
+    // shared SparkContext's configurations. Especially, all these configs are passed to the cloned
+    // confs inside SharedState except metastore warehouse dir.
+    val initialConfigs = Map("spark.foo" -> "bar",
+      WAREHOUSE_PATH.key -> invalidPath,
+      ConfVars.METASTOREWAREHOUSE.varname -> invalidPath,
+      CATALOG_IMPLEMENTATION.key -> "hive",
+      ConfVars.METASTORECONNECTURLKEY.varname ->
+        s"jdbc:derby:;databaseName=$metastorePath/metastore_db;create=true",
+      GLOBAL_TEMP_DATABASE.key -> tmpDb)
+
+    val state = new SharedState(sc, initialConfigs)
+    assert(state.warehousePath !== invalidPath, "warehouse path can't determine by session options")
+    assert(sc.conf.get(WAREHOUSE_PATH.key) !== invalidPath,
+      "warehouse conf in session options can't affect application wide spark conf")
+    assert(sc.hadoopConfiguration.get(ConfVars.METASTOREWAREHOUSE.varname) !== invalidPath,
+      "warehouse conf in session options can't affect application wide hadoop conf")
+
+    assert(!state.sparkContext.conf.contains("spark.foo"),
+      "static spark conf should not be affected by session")
+    assert(state.externalCatalog.unwrapped.isInstanceOf[HiveExternalCatalog],
+      "Initial SparkSession options can determine the catalog")
+    val client = state.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
+    assert(client.getConf("spark.foo", "") === "bar",
+      "session level conf should be passed to catalog")
+    assert(client.getConf(ConfVars.METASTOREWAREHOUSE.varname, invalidPath) !== invalidPath,
+      "session level conf should be passed to catalog except warehouse dir")
+
+    assert(state.globalTempViewManager.database === tmpDb)
+  }
+}

From 2228ee51ce3550d7e6740a1833aae21ab8596764 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Thu, 14 Feb 2019 23:02:56 +0800
Subject: [PATCH 0462/1072] [SPARK-26572][SQL] fix aggregate codegen result
 evaluation

## What changes were proposed in this pull request?

This PR is a correctness fix in `HashAggregateExec` code generation. It forces evaluation of result expressions before calling `consume()` to avoid multiple executions.

This PR fixes a use case where an aggregate is nested into a broadcast join and appears on the "stream" side. The issue is that Broadcast join generates it's own loop. And without forcing evaluation of `resultExpressions` of `HashAggregateExec` before the join's loop these expressions can be executed multiple times giving incorrect results.

## How was this patch tested?

New UT was added.

Closes #23731 from peter-toth/SPARK-26572.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/WholeStageCodegenExec.scala | 12 ++++++++
 .../aggregate/HashAggregateExec.scala         | 12 ++++++--
 .../execution/WholeStageCodegenSuite.scala    | 30 ++++++++++++++++++-
 3 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index d3a93f5eb395..443ce8cc46a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -290,6 +290,18 @@ trait CodegenSupport extends SparkPlan {
     evaluateVars.toString()
   }
 
+  /**
+   * Returns source code to evaluate the variables for non-deterministic expressions, and clear the
+   * code of evaluated variables, to prevent them to be evaluated twice.
+   */
+  protected def evaluateNondeterministicVariables(
+      attributes: Seq[Attribute],
+      variables: Seq[ExprCode],
+      expressions: Seq[NamedExpression]): String = {
+    val nondeterministicAttrs = expressions.filterNot(_.deterministic).map(_.toAttribute)
+    evaluateRequiredVariables(attributes, variables, AttributeSet(nondeterministicAttrs))
+  }
+
   /**
    * The subset of inputSet those should be evaluated before this plan.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 19a47ffc6dd0..17cc7fde42bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -466,10 +466,13 @@ case class HashAggregateExec(
       val resultVars = bindReferences[Expression](
         resultExpressions,
         inputAttrs).map(_.genCode(ctx))
+      val evaluateNondeterministicResults =
+        evaluateNondeterministicVariables(output, resultVars, resultExpressions)
       s"""
        $evaluateKeyVars
        $evaluateBufferVars
        $evaluateAggResults
+       $evaluateNondeterministicResults
        ${consume(ctx, resultVars)}
        """
     } else if (modes.contains(Partial) || modes.contains(PartialMerge)) {
@@ -506,10 +509,15 @@ case class HashAggregateExec(
       // generate result based on grouping key
       ctx.INPUT_ROW = keyTerm
       ctx.currentVars = null
-      val eval = bindReferences[Expression](
+      val resultVars = bindReferences[Expression](
         resultExpressions,
         groupingAttributes).map(_.genCode(ctx))
-      consume(ctx, eval)
+      val evaluateNondeterministicResults =
+        evaluateNondeterministicVariables(output, resultVars, resultExpressions)
+      s"""
+       $evaluateNondeterministicResults
+       ${consume(ctx, resultVars)}
+       """
     }
     ctx.addNewFunction(funcName,
       s"""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index e03f08417162..3c9a0908147a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.expressions.scalalang.typed
-import org.apache.spark.sql.functions.{avg, broadcast, col, lit, max}
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
@@ -339,4 +339,32 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
 
     checkAnswer(df, Seq(Row(1, 3), Row(2, 3)))
   }
+
+  test("SPARK-26572: evaluate non-deterministic expressions for aggregate results") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> Long.MaxValue.toString,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      val baseTable = Seq(1, 1).toDF("idx")
+
+      // BroadcastHashJoinExec with a HashAggregateExec child containing no aggregate expressions
+      val distinctWithId = baseTable.distinct().withColumn("id", monotonically_increasing_id())
+        .join(baseTable, "idx")
+      assert(distinctWithId.queryExecution.executedPlan.collectFirst {
+        case WholeStageCodegenExec(
+          ProjectExec(_, BroadcastHashJoinExec(_, _, _, _, _, _: HashAggregateExec, _))) => true
+      }.isDefined)
+      checkAnswer(distinctWithId, Seq(Row(1, 0), Row(1, 0)))
+
+      // BroadcastHashJoinExec with a HashAggregateExec child containing a Final mode aggregate
+      // expression
+      val groupByWithId =
+        baseTable.groupBy("idx").sum().withColumn("id", monotonically_increasing_id())
+        .join(baseTable, "idx")
+      assert(groupByWithId.queryExecution.executedPlan.collectFirst {
+        case WholeStageCodegenExec(
+          ProjectExec(_, BroadcastHashJoinExec(_, _, _, _, _, _: HashAggregateExec, _))) => true
+      }.isDefined)
+      checkAnswer(groupByWithId, Seq(Row(1, 2, 0), Row(1, 2, 0)))
+    }
+  }
 }

From 33334e2728f8d2e4cf7d542049435b589ed05a5e Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Thu, 14 Feb 2019 08:25:33 -0800
Subject: [PATCH 0463/1072] [SPARK-26873][SQL] Use a consistent timestamp to
 build Hadoop Job IDs.

## What changes were proposed in this pull request?

Updates FileFormatWriter to create a consistent Hadoop Job ID for a write.

## How was this patch tested?

Existing tests for regressions.

Closes #23777 from rdblue/SPARK-26873-fix-file-format-writer-job-ids.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/sql/execution/datasources/FileFormatWriter.scala   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index a8d24ceaf67c..a9de64908c08 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -162,12 +162,14 @@ object FileFormatWriter extends Logging {
         rdd
       }
 
+      val jobIdInstant = new Date().getTime
       val ret = new Array[WriteTaskResult](rddWithNonEmptyPartitions.partitions.length)
       sparkSession.sparkContext.runJob(
         rddWithNonEmptyPartitions,
         (taskContext: TaskContext, iter: Iterator[InternalRow]) => {
           executeTask(
             description = description,
+            jobIdInstant = jobIdInstant,
             sparkStageId = taskContext.stageId(),
             sparkPartitionId = taskContext.partitionId(),
             sparkAttemptNumber = taskContext.taskAttemptId().toInt & Integer.MAX_VALUE,
@@ -200,13 +202,14 @@ object FileFormatWriter extends Logging {
   /** Writes data out in a single Spark task. */
   private def executeTask(
       description: WriteJobDescription,
+      jobIdInstant: Long,
       sparkStageId: Int,
       sparkPartitionId: Int,
       sparkAttemptNumber: Int,
       committer: FileCommitProtocol,
       iterator: Iterator[InternalRow]): WriteTaskResult = {
 
-    val jobId = SparkHadoopWriterUtils.createJobID(new Date, sparkStageId)
+    val jobId = SparkHadoopWriterUtils.createJobID(new Date(jobIdInstant), sparkStageId)
     val taskId = new TaskID(jobId, TaskType.MAP, sparkPartitionId)
     val taskAttemptId = new TaskAttemptID(taskId, sparkAttemptNumber)
 

From 8656af98c000877a815c38df8827af8e9981bcac Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 14 Feb 2019 16:54:39 -0800
Subject: [PATCH 0464/1072] [SPARK-26861][SQL] deprecate typed
 sum/count/average

## What changes were proposed in this pull request?

These builtin typed aggregate functions are not very useful:
1. users can just call the untyped ones and turn the resulting dataframe to a dataset. It has better performance.
2. the typed aggregate functions have subtle different behaviors regarding empty input.

I think we should get rid of these builtin typed agg functions and suggest users to use the untyped ones.

However, these functions are still useful as a demo of the `Aggregator` API, so I copied them to the example module.

## How was this patch tested?

N/A

Closes #23763 from cloud-fan/example.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../examples/sql/SimpleTypedAggregator.scala  | 86 +++++++++++++++++++
 .../spark/sql/expressions/javalang/typed.java |  6 +-
 .../sql/expressions/scalalang/typed.scala     | 15 +---
 3 files changed, 89 insertions(+), 18 deletions(-)
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/sql/SimpleTypedAggregator.scala

diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SimpleTypedAggregator.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SimpleTypedAggregator.scala
new file mode 100644
index 000000000000..f8af91980bde
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SimpleTypedAggregator.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.sql
+
+import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
+import org.apache.spark.sql.expressions.Aggregator
+
+// scalastyle:off println
+object SimpleTypedAggregator {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .master("local")
+      .appName("common typed aggregator implementations")
+      .getOrCreate()
+
+    import spark.implicits._
+    val ds = spark.range(20).select(('id % 3).as("key"), 'id).as[(Long, Long)]
+    println("input data:")
+    ds.show()
+
+    println("running typed sum:")
+    ds.groupByKey(_._1).agg(new TypedSum[(Long, Long)](_._2).toColumn).show()
+
+    println("running typed count:")
+    ds.groupByKey(_._1).agg(new TypedCount[(Long, Long)](_._2).toColumn).show()
+
+    println("running typed average:")
+    ds.groupByKey(_._1).agg(new TypedAverage[(Long, Long)](_._2.toDouble).toColumn).show()
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
+class TypedSum[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] {
+  override def zero: Long = 0L
+  override def reduce(b: Long, a: IN): Long = b + f(a)
+  override def merge(b1: Long, b2: Long): Long = b1 + b2
+  override def finish(reduction: Long): Long = reduction
+
+  override def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+  override def outputEncoder: Encoder[Long] = Encoders.scalaLong
+}
+
+class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] {
+  override def zero: Long = 0
+  override def reduce(b: Long, a: IN): Long = {
+    if (f(a) == null) b else b + 1
+  }
+  override def merge(b1: Long, b2: Long): Long = b1 + b2
+  override def finish(reduction: Long): Long = reduction
+
+  override def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+  override def outputEncoder: Encoder[Long] = Encoders.scalaLong
+}
+
+class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long), Double] {
+  override def zero: (Double, Long) = (0.0, 0L)
+  override def reduce(b: (Double, Long), a: IN): (Double, Long) = (f(a) + b._1, 1 + b._2)
+  override def finish(reduction: (Double, Long)): Double = reduction._1 / reduction._2
+  override def merge(b1: (Double, Long), b2: (Double, Long)): (Double, Long) = {
+    (b1._1 + b2._1, b1._2 + b2._2)
+  }
+
+  override def bufferEncoder: Encoder[(Double, Long)] = {
+    Encoders.tuple(Encoders.scalaDouble, Encoders.scalaLong)
+  }
+  override def outputEncoder: Encoder[Double] = Encoders.scalaDouble
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java b/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
index 5a72f0c6a255..859b936acdf7 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.expressions.javalang;
 
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.annotation.Experimental;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.TypedColumn;
 import org.apache.spark.sql.execution.aggregate.TypedAverage;
@@ -33,9 +31,9 @@
  * Scala users should use {@link org.apache.spark.sql.expressions.scalalang.typed}.
  *
  * @since 2.0.0
+ * @deprecated As of release 3.0.0, please use the untyped builtin aggregate functions.
  */
-@Experimental
-@Evolving
+@Deprecated
 public class typed {
   // Note: make sure to keep in sync with typed.scala
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index 1cb579c4faa7..da7ed69e4839 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.expressions.scalalang
 
-import org.apache.spark.annotation.{Evolving, Experimental}
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.aggregate._
 
@@ -29,8 +28,7 @@ import org.apache.spark.sql.execution.aggregate._
  *
  * @since 2.0.0
  */
-@Experimental
-@Evolving
+@deprecated("please use untyped builtin aggregate functions.", "3.0.0")
 // scalastyle:off
 object typed {
   // scalastyle:on
@@ -76,15 +74,4 @@ object typed {
    * @since 2.0.0
    */
   def sumLong[IN](f: IN => Long): TypedColumn[IN, Long] = new TypedSumLong[IN](f).toColumn
-
-  // TODO:
-  // stddevOf: Double
-  // varianceOf: Double
-  // approx_count_distinct: Long
-
-  // minOf: T
-  // maxOf: T
-
-  // firstOf: T
-  // lastOf: T
 }

From a7e3da42cdfbdba031ae2a923da4628f1cb6f8a3 Mon Sep 17 00:00:00 2001
From: maryannxue <maryannxue@apache.org>
Date: Thu, 14 Feb 2019 16:56:55 -0800
Subject: [PATCH 0465/1072] [SPARK-26840][SQL] Avoid cost-based join reorder in
 presence of join hints

## What changes were proposed in this pull request?

This is a fix for https://github.com/apache/spark/pull/23524, which did not stop cost-based join reorder when the CostBasedJoinReorder rule recurses down the tree and applies join reorder for nested joins with hints.

The issue had not been detected by the existing tests because CBO is disabled by default.

## How was this patch tested?

Enabled CBO for JoinHintSuite.

Closes #23759 from maryannxue/spark-26840.

Lead-authored-by: maryannxue <maryannxue@apache.org>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../optimizer/CostBasedJoinReorder.scala      | 14 +--
 .../catalyst/optimizer/JoinReorderSuite.scala | 25 +++++
 .../org/apache/spark/sql/JoinHintSuite.scala  | 92 ++++++++++---------
 3 files changed, 79 insertions(+), 52 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 6540e95b01e3..f92d8f5b8e53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -43,10 +43,10 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
       val result = plan transformDown {
         // Start reordering with a joinable item, which is an InnerLike join with conditions.
         // Avoid reordering if a join hint is present.
-        case j @ Join(_, _, _: InnerLike, Some(cond), hint) if hint == JoinHint.NONE =>
+        case j @ Join(_, _, _: InnerLike, Some(cond), JoinHint.NONE) =>
           reorder(j, j.output)
-        case p @ Project(projectList, Join(_, _, _: InnerLike, Some(cond), hint))
-          if projectList.forall(_.isInstanceOf[Attribute]) && hint == JoinHint.NONE =>
+        case p @ Project(projectList, Join(_, _, _: InnerLike, Some(cond), JoinHint.NONE))
+          if projectList.forall(_.isInstanceOf[Attribute]) =>
           reorder(p, p.output)
       }
       // After reordering is finished, convert OrderedJoin back to Join.
@@ -77,12 +77,12 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
    */
   private def extractInnerJoins(plan: LogicalPlan): (Seq[LogicalPlan], Set[Expression]) = {
     plan match {
-      case Join(left, right, _: InnerLike, Some(cond), _) =>
+      case Join(left, right, _: InnerLike, Some(cond), JoinHint.NONE) =>
         val (leftPlans, leftConditions) = extractInnerJoins(left)
         val (rightPlans, rightConditions) = extractInnerJoins(right)
         (leftPlans ++ rightPlans, splitConjunctivePredicates(cond).toSet ++
           leftConditions ++ rightConditions)
-      case Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), _))
+      case Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), JoinHint.NONE))
         if projectList.forall(_.isInstanceOf[Attribute]) =>
         extractInnerJoins(j)
       case _ =>
@@ -91,11 +91,11 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
   }
 
   private def replaceWithOrderedJoin(plan: LogicalPlan): LogicalPlan = plan match {
-    case j @ Join(left, right, jt: InnerLike, Some(cond), _) =>
+    case j @ Join(left, right, jt: InnerLike, Some(cond), JoinHint.NONE) =>
       val replacedLeft = replaceWithOrderedJoin(left)
       val replacedRight = replaceWithOrderedJoin(right)
       OrderedJoin(replacedLeft, replacedRight, jt, Some(cond))
-    case p @ Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), _)) =>
+    case p @ Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), JoinHint.NONE)) =>
       p.copy(child = replaceWithOrderedJoin(j))
     case _ =>
       plan
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
index f1da0a8e865b..18516ee7872a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
@@ -312,6 +312,14 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(t3, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2")))
 
     assertEqualPlans(originalPlan2, originalPlan2)
+
+    val originalPlan3 =
+      t1.join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .join(t4).hint("broadcast")
+        .join(t3, Inner, Some(nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
+        .join(t5, Inner, Some(nameToAttr("t5.v-1-5") === nameToAttr("t3.v-1-100")))
+
+    assertEqualPlans(originalPlan3, originalPlan3)
   }
 
   test("reorder below and above the hint node") {
@@ -342,6 +350,23 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
         .join(t4.hint("broadcast"))
 
     assertEqualPlans(originalPlan2, bestPlan2)
+
+    val originalPlan3 =
+      t1.join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .join(t3, Inner, Some(nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
+        .hint("broadcast")
+        .join(t4, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+        .join(t5, Inner, Some(nameToAttr("t5.v-1-5") === nameToAttr("t3.v-1-100")))
+
+    val bestPlan3 =
+      t1.join(t3, Inner, Some(nameToAttr("t1.v-1-10") === nameToAttr("t3.v-1-100")))
+        .join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5")))
+        .select(outputsOf(t1, t2, t3): _*)
+        .hint("broadcast")
+        .join(t4, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100")))
+        .join(t5, Inner, Some(nameToAttr("t5.v-1-5") === nameToAttr("t3.v-1-100")))
+
+    assertEqualPlans(originalPlan3, bestPlan3)
   }
 
   private def assertEqualPlans(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
index 30a3d54fd833..67f0f1a6fd23 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
@@ -102,58 +102,60 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
   }
 
   test("hints prevent join reorder") {
-    withTempView("a", "b", "c") {
-      df1.createOrReplaceTempView("a")
-      df2.createOrReplaceTempView("b")
-      df3.createOrReplaceTempView("c")
-      verifyJoinHint(
-        sql("select /*+ broadcast(a, c)*/ * from a, b, c " +
-          "where a.a1 = b.b1 and b.b1 = c.c1"),
-        JoinHint(
-          None,
-          Some(HintInfo(broadcast = true))) ::
-          JoinHint(
-            Some(HintInfo(broadcast = true)),
-            None):: Nil
-      )
-      verifyJoinHint(
-        sql("select /*+ broadcast(a, c)*/ * from a, c, b " +
-          "where a.a1 = b.b1 and b.b1 = c.c1"),
-        JoinHint.NONE ::
+    withSQLConf(SQLConf.CBO_ENABLED.key -> "true", SQLConf.JOIN_REORDER_ENABLED.key -> "true") {
+      withTempView("a", "b", "c") {
+        df1.createOrReplaceTempView("a")
+        df2.createOrReplaceTempView("b")
+        df3.createOrReplaceTempView("c")
+        verifyJoinHint(
+          sql("select /*+ broadcast(a, c)*/ * from a, b, c " +
+            "where a.a1 = b.b1 and b.b1 = c.c1"),
           JoinHint(
-            Some(HintInfo(broadcast = true)),
-            Some(HintInfo(broadcast = true))):: Nil
-      )
-      verifyJoinHint(
-        sql("select /*+ broadcast(b, c)*/ * from a, c, b " +
-          "where a.a1 = b.b1 and b.b1 = c.c1"),
-        JoinHint(
-          None,
-          Some(HintInfo(broadcast = true))) ::
+            None,
+            Some(HintInfo(broadcast = true))) ::
+            JoinHint(
+              Some(HintInfo(broadcast = true)),
+              None) :: Nil
+        )
+        verifyJoinHint(
+          sql("select /*+ broadcast(a, c)*/ * from a, c, b " +
+            "where a.a1 = b.b1 and b.b1 = c.c1"),
+          JoinHint.NONE ::
+            JoinHint(
+              Some(HintInfo(broadcast = true)),
+              Some(HintInfo(broadcast = true))) :: Nil
+        )
+        verifyJoinHint(
+          sql("select /*+ broadcast(b, c)*/ * from a, c, b " +
+            "where a.a1 = b.b1 and b.b1 = c.c1"),
           JoinHint(
             None,
-            Some(HintInfo(broadcast = true))):: Nil
-      )
-
-      verifyJoinHint(
-        df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
-          .join(df3, 'b1 === 'c1 && 'a1 < 10),
-        JoinHint(
-          Some(HintInfo(broadcast = true)),
-          None) ::
-          JoinHint.NONE:: Nil
-      )
+            Some(HintInfo(broadcast = true))) ::
+            JoinHint(
+              None,
+              Some(HintInfo(broadcast = true))) :: Nil
+        )
 
-      verifyJoinHint(
-        df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
-          .join(df3, 'b1 === 'c1 && 'a1 < 10)
-          .join(df, 'b1 === 'id),
-        JoinHint.NONE ::
+        verifyJoinHint(
+          df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
+            .join(df3, 'b1 === 'c1 && 'a1 < 10),
           JoinHint(
             Some(HintInfo(broadcast = true)),
             None) ::
-          JoinHint.NONE:: Nil
-      )
+            JoinHint.NONE :: Nil
+        )
+
+        verifyJoinHint(
+          df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
+            .join(df3, 'b1 === 'c1 && 'a1 < 10)
+            .join(df, 'b1 === 'id),
+          JoinHint.NONE ::
+            JoinHint(
+              Some(HintInfo(broadcast = true)),
+              None) ::
+            JoinHint.NONE :: Nil
+        )
+      }
     }
   }
 

From c40647297031e0cd3e2682990c2067b59b2aeaa1 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 15 Feb 2019 10:24:35 +0800
Subject: [PATCH 0466/1072] [SPARK-26870][SQL] Move to_avro/from_avro into
 functions object due to Java compatibility

## What changes were proposed in this pull request?

Currently, looks, to use `from_avro` and `to_avro` in Java APIs side,

```java
import static org.apache.spark.sql.avro.package$.MODULE$;

MODULE$.to_avro
MODULE$.from_avro
```

This PR targets to deprecate and move both functions under `avro` package into `functions` object like the way of our `org.apache.spark.sql.functions`.

Therefore, Java side can import:

```java
import static org.apache.spark.sql.avro.functions.*;
```

and Scala side can import:

```scala
import org.apache.spark.sql.avro.functions._
```

## How was this patch tested?

Manually tested, and unit tests for Java APIs were added.

Closes #23784 from HyukjinKwon/SPARK-26870.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-data-sources-avro.md                 |  5 +-
 .../org/apache/spark/sql/avro/functions.scala | 77 +++++++++++++++++++
 .../org/apache/spark/sql/avro/package.scala   | 32 ++------
 .../sql/avro/JavaAvroFunctionsSuite.java      | 74 ++++++++++++++++++
 .../spark/sql/avro/AvroFunctionsSuite.scala   |  7 +-
 5 files changed, 164 insertions(+), 31 deletions(-)
 create mode 100755 external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
 create mode 100644 external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java

diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index b403a66fad79..afb91ae86885 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -78,7 +78,7 @@ Both functions are currently only available in Scala and Java.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 {% highlight scala %}
-import org.apache.spark.sql.avro._
+import org.apache.spark.sql.avro.functions._
 
 // `from_avro` requires Avro schema in JSON string format.
 val jsonFormatSchema = new String(Files.readAllBytes(Paths.get("./examples/src/main/resources/user.avsc")))
@@ -109,7 +109,8 @@ val query = output
 </div>
 <div data-lang="java" markdown="1">
 {% highlight java %}
-import org.apache.spark.sql.avro.*;
+import static org.apache.spark.sql.functions.col;
+import static org.apache.spark.sql.avro.functions.*;
 
 // `from_avro` requires Avro schema in JSON string format.
 String jsonFormatSchema = new String(Files.readAllBytes(Paths.get("./examples/src/main/resources/user.avsc")));
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
new file mode 100755
index 000000000000..5ed7828510d5
--- /dev/null
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.Column
+
+
+// scalastyle:off: object.name
+object functions {
+// scalastyle:on: object.name
+
+  /**
+   * Converts a binary column of avro format into its corresponding catalyst value. The specified
+   * schema must match the read data, otherwise the behavior is undefined: it may fail or return
+   * arbitrary result.
+   *
+   * @param data the binary column.
+   * @param jsonFormatSchema the avro schema in JSON string format.
+   *
+   * @since 3.0.0
+   */
+  @Experimental
+  def from_avro(
+      data: Column,
+      jsonFormatSchema: String): Column = {
+    new Column(AvroDataToCatalyst(data.expr, jsonFormatSchema, Map.empty))
+  }
+
+  /**
+   * Converts a binary column of avro format into its corresponding catalyst value. The specified
+   * schema must match the read data, otherwise the behavior is undefined: it may fail or return
+   * arbitrary result.
+   *
+   * @param data the binary column.
+   * @param jsonFormatSchema the avro schema in JSON string format.
+   * @param options options to control how the Avro record is parsed.
+   *
+   * @since 3.0.0
+   */
+  @Experimental
+  def from_avro(
+      data: Column,
+      jsonFormatSchema: String,
+      options: java.util.Map[String, String]): Column = {
+    new Column(AvroDataToCatalyst(data.expr, jsonFormatSchema, options.asScala.toMap))
+  }
+
+  /**
+   * Converts a column into binary of avro format.
+   *
+   * @param data the data column.
+   *
+   * @since 3.0.0
+   */
+  @Experimental
+  def to_avro(data: Column): Column = {
+    new Column(CatalystDataToAvro(data.expr))
+  }
+}
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
index dee8575c621c..af0752e569ea 100755
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql
 
-import scala.collection.JavaConverters._
-
 import org.apache.spark.annotation.Experimental
 
 package object avro {
@@ -34,30 +32,11 @@ package object avro {
    * @since 2.4.0
    */
   @Experimental
+  @deprecated("Please use 'org.apache.spark.sql.avro.functions.from_avro' instead.", "3.0.0")
   def from_avro(
       data: Column,
-      jsonFormatSchema: String): Column = {
-    new Column(AvroDataToCatalyst(data.expr, jsonFormatSchema, Map.empty))
-  }
-
-  /**
-   * Converts a binary column of avro format into its corresponding catalyst value. The specified
-   * schema must match the read data, otherwise the behavior is undefined: it may fail or return
-   * arbitrary result.
-   *
-   * @param data the binary column.
-   * @param jsonFormatSchema the avro schema in JSON string format.
-   * @param options options to control how the Avro record is parsed.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def from_avro(
-      data: Column,
-      jsonFormatSchema: String,
-      options: java.util.Map[String, String]): Column = {
-    new Column(AvroDataToCatalyst(data.expr, jsonFormatSchema, options.asScala.toMap))
-  }
+      jsonFormatSchema: String): Column =
+    org.apache.spark.sql.avro.functions.from_avro(data, jsonFormatSchema)
 
   /**
    * Converts a column into binary of avro format.
@@ -67,7 +46,6 @@ package object avro {
    * @since 2.4.0
    */
   @Experimental
-  def to_avro(data: Column): Column = {
-    new Column(CatalystDataToAvro(data.expr))
-  }
+  @deprecated("Please use 'org.apache.spark.sql.avro.functions.to_avro' instead.", "3.0.0")
+  def to_avro(data: Column): Column = org.apache.spark.sql.avro.functions.to_avro(data)
 }
diff --git a/external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java b/external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java
new file mode 100644
index 000000000000..a448583dddfb
--- /dev/null
+++ b/external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.QueryTest$;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.test.TestSparkSession;
+
+import static org.apache.spark.sql.avro.functions.to_avro;
+import static org.apache.spark.sql.avro.functions.from_avro;
+
+
+public class JavaAvroFunctionsSuite {
+  private transient TestSparkSession spark;
+
+  @Before
+  public void setUp() {
+    spark = new TestSparkSession();
+  }
+
+  @After
+  public void tearDown() {
+    spark.stop();
+  }
+
+  private static void checkAnswer(Dataset<Row> actual, Dataset<Row> expected) {
+    String errorMessage = QueryTest$.MODULE$.checkAnswer(actual, expected.collectAsList());
+    if (errorMessage != null) {
+      Assert.fail(errorMessage);
+    }
+  }
+
+  @Test
+  public void testToAvroFromAvro() {
+    Dataset<Long> rangeDf = spark.range(10);
+    Dataset<Row> df = rangeDf.select(
+      rangeDf.col("id"), rangeDf.col("id").cast("string").as("str"));
+
+    Dataset<Row> avroDF =
+      df.select(
+        to_avro(df.col("id")).as("a"),
+        to_avro(df.col("str")).as("b"));
+
+    String avroTypeLong = "{\"type\": \"int\", \"name\": \"id\"}";
+    String avroTypeStr = "{\"type\": \"string\", \"name\": \"str\"}";
+
+    Dataset<Row> actual = avroDF.select(
+      from_avro(avroDF.col("a"), avroTypeLong),
+      from_avro(avroDF.col("b"), avroTypeStr));
+
+    checkAnswer(actual, df);
+  }
+}
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index 46a37d8759da..148a66cc52ab 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -79,13 +79,16 @@ class AvroFunctionsSuite extends QueryTest with SharedSQLContext with SQLTestUti
 
     intercept[SparkException] {
       avroStructDF.select(
-        from_avro('avro, avroTypeStruct, Map("mode" -> "FAILFAST").asJava)).collect()
+        org.apache.spark.sql.avro.functions.from_avro(
+          'avro, avroTypeStruct, Map("mode" -> "FAILFAST").asJava)).collect()
     }
 
     // For PERMISSIVE mode, the result should be row of null columns.
     val expected = (0 until count).map(_ => Row(Row(null, null)))
     checkAnswer(
-      avroStructDF.select(from_avro('avro, avroTypeStruct, Map("mode" -> "PERMISSIVE").asJava)),
+      avroStructDF.select(
+        org.apache.spark.sql.avro.functions.from_avro(
+          'avro, avroTypeStruct, Map("mode" -> "PERMISSIVE").asJava)),
       expected)
   }
 

From 71170e74df5c7ec657f61154212d1dc2ba7d0613 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 15 Feb 2019 14:57:23 +0800
Subject: [PATCH 0467/1072] [SPARK-26871][SQL] File Source V2: avoid creating
 unnecessary FileIndex in the write path

## What changes were proposed in this pull request?

In https://github.com/apache/spark/pull/23383, the file source V2 framework is implemented. In the PR, `FileIndex` is created as a member of `FileTable`, so that we can implement partition pruning like https://github.com/apache/spark/commit/0f9fcabb4ac2e8afec14d010e86467372a85d334 in the future(As data source V2 catalog is under development, partition pruning is removed from the PR)

However, after write path of file source V2 is implemented, I find that a simple write will create an unnecessary `FileIndex`, which is required by `FileTable`. This is a sort of regression. And we can see there is a warning message when writing to ORC files
```
WARN InMemoryFileIndex: The directory file:/tmp/foo was not found. Was it deleted very recently?
```
This PR is to make `FileIndex` as a lazy value in `FileTable`, so that we can avoid creating unnecessary `FileIndex` in the write path.

## How was this patch tested?

Existing unit test

Closes #23774 from gengliangwang/moveFileIndexInV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/FallbackOrcDataSourceV2.scala |  2 +-
 .../datasources/v2/FileDataSourceV2.scala     | 20 ++-----------------
 .../execution/datasources/v2/FileTable.scala  | 17 +++++++++++++---
 .../datasources/v2/orc/OrcDataSourceV2.scala  |  6 ++----
 .../datasources/v2/orc/OrcTable.scala         |  5 ++---
 5 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
index 254c09001f7e..e22d6a6d399a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
@@ -35,7 +35,7 @@ class FallbackOrcDataSourceV2(sparkSession: SparkSession) extends Rule[LogicalPl
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case i @ InsertIntoTable(d @DataSourceV2Relation(table: OrcTable, _, _), _, _, _, _) =>
       val v1FileFormat = new OrcFileFormat
-      val relation = HadoopFsRelation(table.getFileIndex, table.getFileIndex.partitionSchema,
+      val relation = HadoopFsRelation(table.fileIndex, table.fileIndex.partitionSchema,
         table.schema(), None, v1FileFormat, d.options)(sparkSession)
       i.copy(table = LogicalRelation(relation))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index a0c932cbb0e0..06c57066aa24 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -16,13 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
-import scala.collection.JavaConverters._
-
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, SupportsBatchRead, TableProvider}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.sources.v2.TableProvider
 
 /**
  * A base interface for data source v2 implementations of the built-in file-based data sources.
@@ -38,17 +35,4 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
   def fallBackFileFormat: Class[_ <: FileFormat]
 
   lazy val sparkSession = SparkSession.active
-
-  def getFileIndex(
-      options: DataSourceOptions,
-      userSpecifiedSchema: Option[StructType]): PartitioningAwareFileIndex = {
-    val filePaths = options.paths()
-    val hadoopConf =
-      sparkSession.sessionState.newHadoopConfWithOptions(options.asMap().asScala.toMap)
-    val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(filePaths, hadoopConf,
-      checkEmptyGlobPath = true, checkFilesExist = options.checkFilesExist())
-    val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
-    new InMemoryFileIndex(sparkSession, rootPathsSpecified,
-      options.asMap().asScala.toMap, userSpecifiedSchema, fileStatusCache)
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 0dbef145f732..21d3e5e29cfb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -16,19 +16,30 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
+import scala.collection.JavaConverters._
+
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.sources.v2.{SupportsBatchRead, SupportsBatchWrite, Table}
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsBatchRead, SupportsBatchWrite, Table}
 import org.apache.spark.sql.types.StructType
 
 abstract class FileTable(
     sparkSession: SparkSession,
-    fileIndex: PartitioningAwareFileIndex,
+    options: DataSourceOptions,
     userSpecifiedSchema: Option[StructType])
   extends Table with SupportsBatchRead with SupportsBatchWrite {
-  def getFileIndex: PartitioningAwareFileIndex = this.fileIndex
+  lazy val fileIndex: PartitioningAwareFileIndex = {
+    val filePaths = options.paths()
+    val hadoopConf =
+      sparkSession.sessionState.newHadoopConfWithOptions(options.asMap().asScala.toMap)
+    val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(filePaths, hadoopConf,
+      checkEmptyGlobPath = true, checkFilesExist = options.checkFilesExist())
+    val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
+    new InMemoryFileIndex(sparkSession, rootPathsSpecified,
+      options.asMap().asScala.toMap, userSpecifiedSchema, fileStatusCache)
+  }
 
   lazy val dataSchema: StructType = userSpecifiedSchema.orElse {
     inferSchema(fileIndex.allFiles())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
index db1f2f793422..74739b4fe2d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -34,13 +34,11 @@ class OrcDataSourceV2 extends FileDataSourceV2 {
 
   override def getTable(options: DataSourceOptions): Table = {
     val tableName = getTableName(options)
-    val fileIndex = getFileIndex(options, None)
-    OrcTable(tableName, sparkSession, fileIndex, None)
+    OrcTable(tableName, sparkSession, options, None)
   }
 
   override def getTable(options: DataSourceOptions, schema: StructType): Table = {
     val tableName = getTableName(options)
-    val fileIndex = getFileIndex(options, Some(schema))
-    OrcTable(tableName, sparkSession, fileIndex, Some(schema))
+    OrcTable(tableName, sparkSession, options, Some(schema))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index b467e505f1ba..249df8b8622f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.v2.orc
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.orc.OrcUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
 import org.apache.spark.sql.sources.v2.DataSourceOptions
@@ -29,9 +28,9 @@ import org.apache.spark.sql.types.StructType
 case class OrcTable(
     name: String,
     sparkSession: SparkSession,
-    fileIndex: PartitioningAwareFileIndex,
+    options: DataSourceOptions,
     userSpecifiedSchema: Option[StructType])
-  extends FileTable(sparkSession, fileIndex, userSpecifiedSchema) {
+  extends FileTable(sparkSession, options, userSpecifiedSchema) {
   override def newScanBuilder(options: DataSourceOptions): OrcScanBuilder =
     new OrcScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
 

From b6c68755715e36f199c172443862ca4bfde3ced5 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 15 Feb 2019 12:44:14 -0800
Subject: [PATCH 0468/1072] [SPARK-26790][CORE] Change approach for retrieving
 executor logs and attributes: self-retrieve

## What changes were proposed in this pull request?

This patch proposes to change the approach on extracting log urls as well as attributes from YARN executor:

 - AS-IS: extract information from `Container` API and include them to container launch context
- TO-BE: let YARN executor self-extracting information

This approach leads us to populate more attributes like nodemanager's IPC port which can let us configure custom log url to JHS log url directly.

## How was this patch tested?

Existing unit tests.

Closes #23706 from HeartSaVioR/SPARK-26790.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../CoarseGrainedExecutorBackend.scala        | 52 ++++++++-----
 docs/running-on-yarn.md                       | 18 +++++
 .../spark/deploy/yarn/ExecutorRunnable.scala  | 17 +---
 .../YarnCoarseGrainedExecutorBackend.scala    | 77 +++++++++++++++++++
 .../spark/util/YarnContainerInfoHelper.scala  | 22 +++++-
 .../spark/deploy/yarn/YarnClusterSuite.scala  |  3 +
 6 files changed, 151 insertions(+), 38 deletions(-)
 create mode 100644 resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 2865c3bc86e4..645f58716de6 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -181,33 +181,47 @@ private[spark] class CoarseGrainedExecutorBackend(
 
 private[spark] object CoarseGrainedExecutorBackend extends Logging {
 
-  private def run(
+  case class Arguments(
       driverUrl: String,
       executorId: String,
       hostname: String,
       cores: Int,
       appId: String,
       workerUrl: Option[String],
-      userClassPath: Seq[URL]) {
+      userClassPath: mutable.ListBuffer[URL])
+
+  def main(args: Array[String]): Unit = {
+    val createFn: (RpcEnv, Arguments, SparkEnv) =>
+      CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env) =>
+      new CoarseGrainedExecutorBackend(rpcEnv, arguments.driverUrl, arguments.executorId,
+        arguments.hostname, arguments.cores, arguments.userClassPath, env)
+    }
+    run(parseArguments(args, this.getClass.getCanonicalName.stripSuffix("$")), createFn)
+    System.exit(0)
+  }
+
+  def run(
+      arguments: Arguments,
+      backendCreateFn: (RpcEnv, Arguments, SparkEnv) => CoarseGrainedExecutorBackend): Unit = {
 
     Utils.initDaemon(log)
 
     SparkHadoopUtil.get.runAsSparkUser { () =>
       // Debug code
-      Utils.checkHost(hostname)
+      Utils.checkHost(arguments.hostname)
 
       // Bootstrap to fetch the driver's Spark properties.
       val executorConf = new SparkConf
       val fetcher = RpcEnv.create(
         "driverPropsFetcher",
-        hostname,
+        arguments.hostname,
         -1,
         executorConf,
         new SecurityManager(executorConf),
         clientMode = true)
-      val driver = fetcher.setupEndpointRefByURI(driverUrl)
+      val driver = fetcher.setupEndpointRefByURI(arguments.driverUrl)
       val cfg = driver.askSync[SparkAppConfig](RetrieveSparkAppConfig)
-      val props = cfg.sparkProperties ++ Seq[(String, String)](("spark.app.id", appId))
+      val props = cfg.sparkProperties ++ Seq[(String, String)](("spark.app.id", arguments.appId))
       fetcher.shutdown()
 
       // Create SparkEnv using properties we fetched from the driver.
@@ -225,19 +239,18 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         SparkHadoopUtil.get.addDelegationTokens(tokens, driverConf)
       }
 
-      val env = SparkEnv.createExecutorEnv(
-        driverConf, executorId, hostname, cores, cfg.ioEncryptionKey, isLocal = false)
+      val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.hostname,
+        arguments.cores, cfg.ioEncryptionKey, isLocal = false)
 
-      env.rpcEnv.setupEndpoint("Executor", new CoarseGrainedExecutorBackend(
-        env.rpcEnv, driverUrl, executorId, hostname, cores, userClassPath, env))
-      workerUrl.foreach { url =>
+      env.rpcEnv.setupEndpoint("Executor", backendCreateFn(env.rpcEnv, arguments, env))
+      arguments.workerUrl.foreach { url =>
         env.rpcEnv.setupEndpoint("WorkerWatcher", new WorkerWatcher(env.rpcEnv, url))
       }
       env.rpcEnv.awaitTermination()
     }
   }
 
-  def main(args: Array[String]) {
+  def parseArguments(args: Array[String], classNameForEntry: String): Arguments = {
     var driverUrl: String = null
     var executorId: String = null
     var hostname: String = null
@@ -276,24 +289,24 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
           // scalastyle:off println
           System.err.println(s"Unrecognized options: ${tail.mkString(" ")}")
           // scalastyle:on println
-          printUsageAndExit()
+          printUsageAndExit(classNameForEntry)
       }
     }
 
     if (driverUrl == null || executorId == null || hostname == null || cores <= 0 ||
       appId == null) {
-      printUsageAndExit()
+      printUsageAndExit(classNameForEntry)
     }
 
-    run(driverUrl, executorId, hostname, cores, appId, workerUrl, userClassPath)
-    System.exit(0)
+    Arguments(driverUrl, executorId, hostname, cores, appId, workerUrl,
+      userClassPath)
   }
 
-  private def printUsageAndExit() = {
+  private def printUsageAndExit(classNameForEntry: String): Unit = {
     // scalastyle:off println
     System.err.println(
-      """
-      |Usage: CoarseGrainedExecutorBackend [options]
+      s"""
+      |Usage: $classNameForEntry [options]
       |
       | Options are:
       |   --driver-url <driverUrl>
@@ -307,5 +320,4 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
     // scalastyle:on println
     System.exit(1)
   }
-
 }
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 51a13d342a67..8f1a12726b06 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -480,6 +480,18 @@ To use a custom metrics.properties for the application master and executors, upd
       <td>{{HTTP_SCHEME}}</td>
       <td>`http://` or `https://` according to YARN HTTP policy. (Configured via `yarn.http.policy`)</td>
     </tr>
+    <tr>
+      <td>{{NM_HOST}}</td>
+      <td>The "host" of node where container was run.</td>
+    </tr>
+    <tr>
+      <td>{{NM_PORT}}</td>
+      <td>The "port" of node manager where container was run.</td>
+    </tr>
+    <tr>
+      <td>{{NM_HTTP_PORT}}</td>
+      <td>The "port" of node manager's http server where container was run.</td>
+    </tr>
     <tr>
       <td>{{NM_HTTP_ADDRESS}}</td>
       <td>Http URI of the node on which the container is allocated.</td>
@@ -502,6 +514,12 @@ To use a custom metrics.properties for the application master and executors, upd
     </tr>
 </table>
 
+For example, suppose you would like to point log url link to Job History Server directly instead of let NodeManager http server redirects it, you can configure `spark.history.custom.executor.log.url` as below:
+
+ `{{HTTP_SCHEME}}<JHS_HOST>:<JHS_PORT>/jobhistory/logs/{{NM_HOST}}:{{NM_PORT}}/{{CONTAINER_ID}}/{{CONTAINER_ID}}/{{USER}}/{{FILE_NAME}}?start=-4096`
+
+ NOTE: you need to replace `<JHS_POST>` and `<JHS_PORT>` with actual value.
+
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 0b909d15c2fa..2f8f2a0a119c 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -202,7 +202,7 @@ private[yarn] class ExecutorRunnable(
     val commands = prefixEnv ++
       Seq(Environment.JAVA_HOME.$$() + "/bin/java", "-server") ++
       javaOpts ++
-      Seq("org.apache.spark.executor.CoarseGrainedExecutorBackend",
+      Seq("org.apache.spark.executor.YarnCoarseGrainedExecutorBackend",
         "--driver-url", masterAddress,
         "--executor-id", executorId,
         "--hostname", hostname,
@@ -235,21 +235,6 @@ private[yarn] class ExecutorRunnable(
       }
     }
 
-    // Add log urls, as well as executor attributes
-    container.foreach { c =>
-      YarnContainerInfoHelper.getLogUrls(conf, Some(c)).foreach { m =>
-        m.foreach { case (fileName, url) =>
-          env("SPARK_LOG_URL_" + fileName.toUpperCase(Locale.ROOT)) = url
-        }
-      }
-
-      YarnContainerInfoHelper.getAttributes(conf, Some(c)).foreach { m =>
-        m.foreach { case (attr, value) =>
-          env("SPARK_EXECUTOR_ATTRIBUTE_" + attr.toUpperCase(Locale.ROOT)) = value
-        }
-      }
-    }
-
     env
   }
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala
new file mode 100644
index 000000000000..53e99d992db8
--- /dev/null
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.executor
+
+import java.net.URL
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.Logging
+import org.apache.spark.rpc.RpcEnv
+import org.apache.spark.util.YarnContainerInfoHelper
+
+/**
+ * Custom implementation of CoarseGrainedExecutorBackend for YARN resource manager.
+ * This class extracts executor log URLs and executor attributes from system environment which
+ * properties are available for container being set via YARN.
+ */
+private[spark] class YarnCoarseGrainedExecutorBackend(
+    rpcEnv: RpcEnv,
+    driverUrl: String,
+    executorId: String,
+    hostname: String,
+    cores: Int,
+    userClassPath: Seq[URL],
+    env: SparkEnv)
+  extends CoarseGrainedExecutorBackend(
+    rpcEnv,
+    driverUrl,
+    executorId,
+    hostname,
+    cores,
+    userClassPath,
+    env) with Logging {
+
+  private lazy val hadoopConfiguration = SparkHadoopUtil.get.newConfiguration(env.conf)
+
+  override def extractLogUrls: Map[String, String] = {
+    YarnContainerInfoHelper.getLogUrls(hadoopConfiguration, container = None)
+      .getOrElse(Map())
+  }
+
+  override def extractAttributes: Map[String, String] = {
+    YarnContainerInfoHelper.getAttributes(hadoopConfiguration, container = None)
+      .getOrElse(Map())
+  }
+}
+
+private[spark] object YarnCoarseGrainedExecutorBackend extends Logging {
+
+  def main(args: Array[String]): Unit = {
+    val createFn: (RpcEnv, CoarseGrainedExecutorBackend.Arguments, SparkEnv) =>
+      CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env) =>
+      new YarnCoarseGrainedExecutorBackend(rpcEnv, arguments.driverUrl, arguments.executorId,
+        arguments.hostname, arguments.cores, arguments.userClassPath, env)
+    }
+    val backendArgs = CoarseGrainedExecutorBackend.parseArguments(args,
+      this.getClass.getCanonicalName.stripSuffix("$"))
+    CoarseGrainedExecutorBackend.run(backendArgs, createFn)
+    System.exit(0)
+  }
+
+}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala
index 96350cdece55..5e39422e868b 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala
@@ -59,6 +59,9 @@ private[spark] object YarnContainerInfoHelper extends Logging {
       val yarnConf = new YarnConfiguration(conf)
       Some(Map(
         "HTTP_SCHEME" -> getYarnHttpScheme(yarnConf),
+        "NM_HOST" -> getNodeManagerHost(container),
+        "NM_PORT" -> getNodeManagerPort(container),
+        "NM_HTTP_PORT" -> getNodeManagerHttpPort(container),
         "NM_HTTP_ADDRESS" -> getNodeManagerHttpAddress(container),
         "CLUSTER_ID" -> getClusterId(yarnConf).getOrElse(""),
         "CONTAINER_ID" -> ConverterUtils.toString(getContainerId(container)),
@@ -97,7 +100,22 @@ private[spark] object YarnContainerInfoHelper extends Logging {
 
   def getNodeManagerHttpAddress(container: Option[Container]): String = container match {
     case Some(c) => c.getNodeHttpAddress
-    case None => System.getenv(Environment.NM_HOST.name()) + ":" +
-      System.getenv(Environment.NM_HTTP_PORT.name())
+    case None => getNodeManagerHost(None) + ":" + getNodeManagerHttpPort(None)
   }
+
+  def getNodeManagerHost(container: Option[Container]): String = container match {
+    case Some(c) => c.getNodeHttpAddress.split(":")(0)
+    case None => System.getenv(Environment.NM_HOST.name())
+  }
+
+  def getNodeManagerHttpPort(container: Option[Container]): String = container match {
+    case Some(c) => c.getNodeHttpAddress.split(":")(1)
+    case None => System.getenv(Environment.NM_HTTP_PORT.name())
+  }
+
+  def getNodeManagerPort(container: Option[Container]): String = container match {
+    case Some(_) => "-1" // Just return invalid port given we cannot retrieve the value
+    case None => System.getenv(Environment.NM_PORT.name())
+  }
+
 }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index b3c5bbd263ed..56b7dfc13699 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -477,6 +477,9 @@ private object YarnClusterDriver extends Logging with Matchers {
         val driverAttributes = listener.driverAttributes.get
         val expectationAttributes = Map(
           "HTTP_SCHEME" -> YarnContainerInfoHelper.getYarnHttpScheme(yarnConf),
+          "NM_HOST" -> YarnContainerInfoHelper.getNodeManagerHost(container = None),
+          "NM_PORT" -> YarnContainerInfoHelper.getNodeManagerPort(container = None),
+          "NM_HTTP_PORT" -> YarnContainerInfoHelper.getNodeManagerHttpPort(container = None),
           "NM_HTTP_ADDRESS" -> YarnContainerInfoHelper.getNodeManagerHttpAddress(container = None),
           "CLUSTER_ID" -> YarnContainerInfoHelper.getClusterId(yarnConf).getOrElse(""),
           "CONTAINER_ID" -> ConverterUtils.toString(containerId),

From 28ced387b9ef0b4c9d3b72913b839786fa0bfa38 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 15 Feb 2019 14:43:13 -0800
Subject: [PATCH 0469/1072] [SPARK-26772][YARN] Delete
 ServiceCredentialProvider and make HadoopDelegationTokenProvider a developer
 API

## What changes were proposed in this pull request?

`HadoopDelegationTokenProvider` has basically the same functionality just like `ServiceCredentialProvider` so the interfaces can be merged.

`YARNHadoopDelegationTokenManager` now loads `ServiceCredentialProvider`s in one step. The drawback of this if one provider fails all others are not loaded. `HadoopDelegationTokenManager` loads `HadoopDelegationTokenProvider`s independently so it provides more robust behaviour.

In this PR I've I've made the following changes:
* Deleted `YARNHadoopDelegationTokenManager` and `ServiceCredentialProvider`
* Made `HadoopDelegationTokenProvider` a `DeveloperApi`

## How was this patch tested?

Existing unit tests.

Closes #23686 from gaborgsomogyi/SPARK-26772.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 ...rk.security.HadoopDelegationTokenProvider} |  0
 .../HBaseDelegationTokenProvider.scala        |  2 +-
 .../HadoopDelegationTokenManager.scala        |  2 +-
 .../HadoopFSDelegationTokenProvider.scala     |  1 +
 .../HadoopDelegationTokenProvider.scala       |  8 +-
 ...rk.security.HadoopDelegationTokenProvider} |  0
 .../HadoopDelegationTokenManagerSuite.scala   |  1 +
 docs/running-on-yarn.md                       |  7 --
 docs/security.md                              |  5 ++
 ...rk.security.HadoopDelegationTokenProvider} |  0
 .../KafkaDelegationTokenProvider.scala        |  2 +-
 .../org/apache/spark/deploy/yarn/Client.scala |  9 +--
 .../security/ServiceCredentialProvider.scala  | 58 --------------
 .../YARNHadoopDelegationTokenManager.scala    | 75 -------------------
 .../cluster/YarnSchedulerBackend.scala        |  4 +-
 ...oy.yarn.security.ServiceCredentialProvider |  1 -
 ...ARNHadoopDelegationTokenManagerSuite.scala | 51 -------------
 ...rk.security.HadoopDelegationTokenProvider} |  0
 .../HiveDelegationTokenProvider.scala         |  3 +-
 19 files changed, 21 insertions(+), 208 deletions(-)
 rename core/src/main/resources/META-INF/services/{org.apache.spark.deploy.security.HadoopDelegationTokenProvider => org.apache.spark.security.HadoopDelegationTokenProvider} (100%)
 rename core/src/main/scala/org/apache/spark/{deploy => }/security/HadoopDelegationTokenProvider.scala (92%)
 rename core/src/test/resources/META-INF/services/{org.apache.spark.deploy.security.HadoopDelegationTokenProvider => org.apache.spark.security.HadoopDelegationTokenProvider} (100%)
 rename external/kafka-0-10-token-provider/src/main/resources/META-INF/services/{org.apache.spark.deploy.security.HadoopDelegationTokenProvider => org.apache.spark.security.HadoopDelegationTokenProvider} (100%)
 delete mode 100644 resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/ServiceCredentialProvider.scala
 delete mode 100644 resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
 delete mode 100644 resource-managers/yarn/src/test/resources/META-INF/services/org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
 delete mode 100644 resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
 rename sql/hive/src/main/resources/META-INF/services/{org.apache.spark.deploy.security.HadoopDelegationTokenProvider => org.apache.spark.security.HadoopDelegationTokenProvider} (100%)

diff --git a/core/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider b/core/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
similarity index 100%
rename from core/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
rename to core/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
index e56d03401dca..2e21adac86a1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
@@ -23,12 +23,12 @@ import scala.reflect.runtime.universe
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.Credentials
 import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.security.HadoopDelegationTokenProvider
 import org.apache.spark.util.Utils
 
 private[security] class HBaseDelegationTokenProvider
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 6a18a8dd33d1..4db86ba8f172 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -26,7 +26,6 @@ import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 import scala.collection.mutable
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
 import org.apache.spark.SparkConf
@@ -35,6 +34,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.UpdateDelegationTokens
+import org.apache.spark.security.HadoopDelegationTokenProvider
 import org.apache.spark.ui.UIUtils
 import org.apache.spark.util.{ThreadUtils, Utils}
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 725eefbda897..ac432e7581e9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdenti
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.security.HadoopDelegationTokenProvider
 
 private[deploy] class HadoopFSDelegationTokenProvider
     extends HadoopDelegationTokenProvider with Logging {
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/security/HadoopDelegationTokenProvider.scala
similarity index 92%
rename from core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
rename to core/src/main/scala/org/apache/spark/security/HadoopDelegationTokenProvider.scala
index 3dc952d54e73..cff8d81443ef 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/security/HadoopDelegationTokenProvider.scala
@@ -15,18 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.security
+package org.apache.spark.security
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.security.Credentials
 
 import org.apache.spark.SparkConf
+import org.apache.spark.annotation.DeveloperApi
 
 /**
+ * ::DeveloperApi::
  * Hadoop delegation token provider.
  */
-private[spark] trait HadoopDelegationTokenProvider {
+@DeveloperApi
+trait HadoopDelegationTokenProvider {
 
   /**
    * Name of the service to provide delegation tokens. This name should be unique.  Spark will
diff --git a/core/src/test/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider b/core/src/test/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
similarity index 100%
rename from core/src/test/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
rename to core/src/test/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
index 2f36dba05c64..70174f7ff939 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
@@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.security.Credentials
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.security.HadoopDelegationTokenProvider
 
 private class ExceptionThrowingDelegationTokenProvider extends HadoopDelegationTokenProvider {
   ExceptionThrowingDelegationTokenProvider.constructed = true
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 8f1a12726b06..6ee4b3d4103b 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -538,13 +538,6 @@ for:
   filesystem if `spark.yarn.stagingDir` is not set);
 - if Hadoop federation is enabled, all the federated filesystems in the configuration.
 
-The YARN integration also supports custom delegation token providers using the Java Services
-mechanism (see `java.util.ServiceLoader`). Implementations of
-`org.apache.spark.deploy.yarn.security.ServiceCredentialProvider` can be made available to Spark
-by listing their names in the corresponding file in the jar's `META-INF/services` directory. These
-providers can be disabled individually by setting `spark.security.credentials.{service}.enabled` to
-`false`, where `{service}` is the name of the credential provider.
-
 ## YARN-specific Kerberos Configuration
 
 <table class="table">
diff --git a/docs/security.md b/docs/security.md
index d2cff41eb0f7..20492d871b1a 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -756,6 +756,11 @@ If an application needs to interact with other secure Hadoop filesystems, their
 explicitly provided to Spark at launch time. This is done by listing them in the
 `spark.kerberos.access.hadoopFileSystems` property, described in the configuration section below.
 
+Spark also supports custom delegation token providers using the Java Services
+mechanism (see `java.util.ServiceLoader`). Implementations of
+`org.apache.spark.security.HadoopDelegationTokenProvider` can be made available to Spark
+by listing their names in the corresponding file in the jar's `META-INF/services` directory.
+
 Delegation token support is currently only supported in YARN and Mesos modes. Consult the
 deployment-specific page for more information.
 
diff --git a/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider b/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
similarity index 100%
rename from external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
rename to external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
index c69e8a320059..cba4b40ca7f4 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
@@ -25,9 +25,9 @@ import org.apache.hadoop.security.Credentials
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
 
 import org.apache.spark.SparkConf
-import org.apache.spark.deploy.security.HadoopDelegationTokenProvider
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Kafka
+import org.apache.spark.security.HadoopDelegationTokenProvider
 
 private[spark] class KafkaDelegationTokenProvider
   extends HadoopDelegationTokenProvider with Logging {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 7523e3c42c53..6ca81fb97c75 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -21,7 +21,6 @@ import java.io.{FileSystem => _, _}
 import java.net.{InetAddress, UnknownHostException, URI}
 import java.nio.ByteBuffer
 import java.nio.charset.StandardCharsets
-import java.security.PrivilegedExceptionAction
 import java.util.{Locale, Properties, UUID}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
@@ -34,9 +33,9 @@ import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
-import org.apache.hadoop.io.{DataOutputBuffer, Text}
+import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapreduce.MRJobConfig
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
@@ -50,8 +49,8 @@ import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil}
+import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python._
@@ -315,7 +314,7 @@ private[spark] class Client(
     val credentials = currentUser.getCredentials()
 
     if (isClusterMode) {
-      val credentialManager = new YARNHadoopDelegationTokenManager(sparkConf, hadoopConf, null)
+      val credentialManager = new HadoopDelegationTokenManager(sparkConf, hadoopConf, null)
       credentialManager.obtainDelegationTokens(credentials)
     }
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/ServiceCredentialProvider.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/ServiceCredentialProvider.scala
deleted file mode 100644
index cc24ac4d9bcf..000000000000
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/ServiceCredentialProvider.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn.security
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
-
-import org.apache.spark.SparkConf
-
-/**
- * A credential provider for a service. User must implement this if they need to access a
- * secure service from Spark.
- */
-trait ServiceCredentialProvider {
-
-  /**
-   * Name of the service to provide credentials. This name should unique, Spark internally will
-   * use this name to differentiate credential provider.
-   */
-  def serviceName: String
-
-  /**
-   * Returns true if credentials are required by this service. By default, it is based on whether
-   * Hadoop security is enabled.
-   */
-  def credentialsRequired(hadoopConf: Configuration): Boolean = {
-    UserGroupInformation.isSecurityEnabled
-  }
-
-  /**
-   * Obtain credentials for this service and get the time of the next renewal.
-   *
-   * @param hadoopConf Configuration of current Hadoop Compatible system.
-   * @param sparkConf Spark configuration.
-   * @param creds Credentials to add tokens and security keys to.
-   * @return If this Credential is renewable and can be renewed, return the time of the next
-   *         renewal, otherwise None should be returned.
-   */
-  def obtainCredentials(
-      hadoopConf: Configuration,
-      sparkConf: SparkConf,
-      creds: Credentials): Option[Long]
-}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
deleted file mode 100644
index fc1f75254c57..000000000000
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManager.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn.security
-
-import java.util.ServiceLoader
-
-import scala.collection.JavaConverters._
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.security.Credentials
-
-import org.apache.spark.SparkConf
-import org.apache.spark.deploy.security.HadoopDelegationTokenManager
-import org.apache.spark.rpc.RpcEndpointRef
-import org.apache.spark.util.Utils
-
-/**
- * This class loads delegation token providers registered under the YARN-specific
- * [[ServiceCredentialProvider]] interface, as well as the builtin providers defined
- * in [[HadoopDelegationTokenManager]].
- */
-private[spark] class YARNHadoopDelegationTokenManager(
-    _sparkConf: SparkConf,
-    _hadoopConf: Configuration,
-    _schedulerRef: RpcEndpointRef)
-  extends HadoopDelegationTokenManager(_sparkConf, _hadoopConf, _schedulerRef) {
-
-  private val credentialProviders = {
-    ServiceLoader.load(classOf[ServiceCredentialProvider], Utils.getContextOrSparkClassLoader)
-      .asScala
-      .toList
-      .filter { p => isServiceEnabled(p.serviceName) }
-      .map { p => (p.serviceName, p) }
-      .toMap
-  }
-  if (credentialProviders.nonEmpty) {
-    logDebug("Using the following YARN-specific credential providers: " +
-      s"${credentialProviders.keys.mkString(", ")}.")
-  }
-
-  override def obtainDelegationTokens(creds: Credentials): Long = {
-    val superInterval = super.obtainDelegationTokens(creds)
-
-    credentialProviders.values.flatMap { provider =>
-      if (provider.credentialsRequired(hadoopConf)) {
-        provider.obtainCredentials(hadoopConf, sparkConf, creds)
-      } else {
-        logDebug(s"Service ${provider.serviceName} does not require a token." +
-          s" Check your configuration to see if security is disabled or not.")
-        None
-      }
-    }.foldLeft(superInterval)(math.min)
-  }
-
-  // For testing.
-  override def isProviderLoaded(serviceName: String): Boolean = {
-    credentialProviders.contains(serviceName) || super.isProviderLoaded(serviceName)
-  }
-
-}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index 821fbcd956d5..78cd6a200ac2 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -31,14 +31,12 @@ import org.eclipse.jetty.servlet.{FilterHolder, FilterMapping}
 
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
-import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
-import org.apache.spark.ui.JettyUtils
 import org.apache.spark.util.{RpcUtils, ThreadUtils}
 
 /**
@@ -223,7 +221,7 @@ private[spark] abstract class YarnSchedulerBackend(
   }
 
   override protected def createTokenManager(): Option[HadoopDelegationTokenManager] = {
-    Some(new YARNHadoopDelegationTokenManager(sc.conf, sc.hadoopConfiguration, driverEndpoint))
+    Some(new HadoopDelegationTokenManager(sc.conf, sc.hadoopConfiguration, driverEndpoint))
   }
 
   /**
diff --git a/resource-managers/yarn/src/test/resources/META-INF/services/org.apache.spark.deploy.yarn.security.ServiceCredentialProvider b/resource-managers/yarn/src/test/resources/META-INF/services/org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
deleted file mode 100644
index f31c23269313..000000000000
--- a/resource-managers/yarn/src/test/resources/META-INF/services/org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
+++ /dev/null
@@ -1 +0,0 @@
-org.apache.spark.deploy.yarn.security.YARNTestCredentialProvider
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
deleted file mode 100644
index f00453cb9c59..000000000000
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn.security
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.security.Credentials
-
-import org.apache.spark.{SparkConf, SparkFunSuite}
-
-class YARNHadoopDelegationTokenManagerSuite extends SparkFunSuite {
-  private var credentialManager: YARNHadoopDelegationTokenManager = null
-  private var sparkConf: SparkConf = null
-  private var hadoopConf: Configuration = null
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sparkConf = new SparkConf()
-    hadoopConf = new Configuration()
-  }
-
-  test("Correctly loads credential providers") {
-    credentialManager = new YARNHadoopDelegationTokenManager(sparkConf, hadoopConf, null)
-    assert(credentialManager.isProviderLoaded("yarn-test"))
-  }
-}
-
-class YARNTestCredentialProvider extends ServiceCredentialProvider {
-  override def serviceName: String = "yarn-test"
-
-  override def credentialsRequired(conf: Configuration): Boolean = true
-
-  override def obtainCredentials(
-      hadoopConf: Configuration,
-      sparkConf: SparkConf,
-      creds: Credentials): Option[Long] = None
-}
diff --git a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
similarity index 100%
rename from sql/hive/src/main/resources/META-INF/services/org.apache.spark.deploy.security.HadoopDelegationTokenProvider
rename to sql/hive/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
index c0c46187b13a..faee405d70cd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
@@ -23,7 +23,6 @@ import java.security.PrivilegedExceptionAction
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.metadata.Hive
@@ -33,9 +32,9 @@ import org.apache.hadoop.security.token.Token
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.deploy.security.HadoopDelegationTokenProvider
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.KEYTAB
+import org.apache.spark.security.HadoopDelegationTokenProvider
 import org.apache.spark.util.Utils
 
 private[spark] class HiveDelegationTokenProvider

From 3d6066e9b6bcd24d1ece46f80689b1ff0fcddea3 Mon Sep 17 00:00:00 2001
From: Peter Parente <parente@cs.unc.edu>
Date: Fri, 15 Feb 2019 18:08:06 -0800
Subject: [PATCH 0470/1072] [SPARK-21094][PYTHON] Add popen_kwargs to
 launch_gateway

## What changes were proposed in this pull request?

Allow the caller to customize the py4j JVM subprocess pipes and buffers for programmatic capturing of its output.

https://issues.apache.org/jira/browse/SPARK-21094 has more detail about the use case.

## How was this patch tested?

Tested by running the pyspark unit tests locally.

Closes #18339 from parente/feature/SPARK-21094-popen-args.

Lead-authored-by: Peter Parente <parente@cs.unc.edu>
Co-authored-by: Peter Parente <peter.parente@maxpoint.com>
Signed-off-by: Holden Karau <holden@pigscanfly.ca>
---
 python/pyspark/java_gateway.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index d8315c63a8fc..5a55401db53d 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -36,15 +36,21 @@
 from pyspark.util import _exception_message
 
 
-def launch_gateway(conf=None):
+def launch_gateway(conf=None, popen_kwargs=None):
     """
     launch jvm gateway
     :param conf: spark configuration passed to spark-submit
+    :param popen_kwargs: Dictionary of kwargs to pass to Popen when spawning
+        the py4j JVM. This is a developer feature intended for use in
+        customizing how pyspark interacts with the py4j JVM (e.g., capturing
+        stdout/stderr).
     :return:
     """
     if "PYSPARK_GATEWAY_PORT" in os.environ:
         gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
         gateway_secret = os.environ["PYSPARK_GATEWAY_SECRET"]
+        # Process already exists
+        proc = None
     else:
         SPARK_HOME = _find_spark_home()
         # Launch the Py4j gateway using Spark's run command so that we pick up the
@@ -75,15 +81,20 @@ def launch_gateway(conf=None):
             env["_PYSPARK_DRIVER_CONN_INFO_PATH"] = conn_info_file
 
             # Launch the Java gateway.
+            popen_kwargs = {} if popen_kwargs is None else popen_kwargs
             # We open a pipe to stdin so that the Java gateway can die when the pipe is broken
+            popen_kwargs['stdin'] = PIPE
+            # We always set the necessary environment variables.
+            popen_kwargs['env'] = env
             if not on_windows:
                 # Don't send ctrl-c / SIGINT to the Java gateway:
                 def preexec_func():
                     signal.signal(signal.SIGINT, signal.SIG_IGN)
-                proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
+                popen_kwargs['preexec_fn'] = preexec_func
+                proc = Popen(command, **popen_kwargs)
             else:
                 # preexec_fn not supported on Windows
-                proc = Popen(command, stdin=PIPE, env=env)
+                proc = Popen(command, **popen_kwargs)
 
             # Wait for the file to appear, or for the process to exit, whichever happens first.
             while not proc.poll() and not os.path.isfile(conn_info_file):
@@ -118,6 +129,8 @@ def killChild():
     gateway = JavaGateway(
         gateway_parameters=GatewayParameters(port=gateway_port, auth_token=gateway_secret,
                                              auto_convert=True))
+    # Store a reference to the Popen object for use by the caller (e.g., in reading stdout/stderr)
+    gateway.proc = proc
 
     # Import the classes used by PySpark
     java_import(gateway.jvm, "org.apache.spark.SparkConf")

From 4cabab81716463c526fc24b385aa046951898151 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Sat, 16 Feb 2019 14:44:37 +0800
Subject: [PATCH 0471/1072] [SPARK-26673][FOLLOWUP][SQL] File source V2: remove
 duplicated broadcast object in FileWriterFactory

## What changes were proposed in this pull request?

This is a followup PR to fix two issues in #23601:
1.  the class `FileWriterFactory` contains `conf: SerializableConfiguration` as a member, which is duplicated with `WriteJobDescription. serializableHadoopConf `. By removing it we can reduce the broadcast task binary size by around 70KB
2. The test suite `OrcV1QuerySuite`/`OrcV1QuerySuite`/`OrcV1PartitionDiscoverySuite` didn't change the configuration `SQLConf.USE_V1_SOURCE_WRITER_LIST` to `"orc"`. We should set the conf.

## How was this patch tested?

Unit test

Closes #23800 from gengliangwang/reduceWriteTaskSize.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/execution/datasources/v2/FileBatchWrite.scala  | 3 +--
 .../sql/execution/datasources/v2/FileWriterFactory.scala     | 5 ++---
 .../datasources/orc/OrcPartitionDiscoverySuite.scala         | 5 ++++-
 .../spark/sql/execution/datasources/orc/OrcQuerySuite.scala  | 5 ++++-
 .../sql/execution/datasources/orc/OrcV1FilterSuite.scala     | 5 ++++-
 5 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
index 7b836111a59b..db31927fa73b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
@@ -46,8 +46,7 @@ class FileBatchWrite(
   }
 
   override def createBatchWriterFactory(): DataWriterFactory = {
-    val conf = new SerializableConfiguration(job.getConfiguration)
-    FileWriterFactory(description, committer, conf)
+    FileWriterFactory(description, committer)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
index be9c180a901b..eb573b317142 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
@@ -29,8 +29,7 @@ import org.apache.spark.util.SerializableConfiguration
 
 case class FileWriterFactory (
     description: WriteJobDescription,
-    committer: FileCommitProtocol,
-    conf: SerializableConfiguration) extends DataWriterFactory {
+    committer: FileCommitProtocol) extends DataWriterFactory {
   override def createWriter(partitionId: Int, realTaskId: Long): DataWriter[InternalRow] = {
     val taskAttemptContext = createTaskAttemptContext(partitionId)
     committer.setupTask(taskAttemptContext)
@@ -46,7 +45,7 @@ case class FileWriterFactory (
     val taskId = new TaskID(jobId, TaskType.MAP, partitionId)
     val taskAttemptId = new TaskAttemptID(taskId, 0)
     // Set up the configuration object
-    val hadoopConf = conf.value
+    val hadoopConf = description.serializableHadoopConf.value
     hadoopConf.set("mapreduce.job.id", jobId.toString)
     hadoopConf.set("mapreduce.task.id", taskId.toString)
     hadoopConf.set("mapreduce.task.attempt.id", taskAttemptId.toString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
index 4a695ac74c47..bc5a30e97fae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
@@ -232,5 +232,8 @@ class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQ
 
 class OrcV1PartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext {
   override protected def sparkConf: SparkConf =
-    super.sparkConf.set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+      .set(SQLConf.USE_V1_SOURCE_WRITER_LIST, "orc")
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index c0d26011e791..9bf122766687 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -690,5 +690,8 @@ class OrcQuerySuite extends OrcQueryTest with SharedSQLContext {
 
 class OrcV1QuerySuite extends OrcQuerySuite {
   override protected def sparkConf: SparkConf =
-    super.sparkConf.set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+      .set(SQLConf.USE_V1_SOURCE_WRITER_LIST, "orc")
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
index cf5bbb3fff70..5a1bf9b43756 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
@@ -31,7 +31,10 @@ import org.apache.spark.sql.internal.SQLConf
 class OrcV1FilterSuite extends OrcFilterSuite {
 
   override protected def sparkConf: SparkConf =
-    super.sparkConf.set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+      .set(SQLConf.USE_V1_SOURCE_WRITER_LIST, "orc")
 
   override def checkFilterPredicate(
       df: DataFrame,

From 4dce45a5992e6a89a26b5a0739b33cfeaf979208 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Sat, 16 Feb 2019 17:11:36 +0800
Subject: [PATCH 0472/1072] [SPARK-26744][SQL] Support schema validation in
 FileDataSourceV2 framework

## What changes were proposed in this pull request?

The file source has a schema validation feature, which validates 2 schemas:
1. the user-specified schema when reading.
2. the schema of input data when writing.

If a file source doesn't support the schema, we can fail the query earlier.

This PR is to implement the same feature  in the `FileDataSourceV2` framework. Comparing to `FileFormat`, `FileDataSourceV2` has multiple layers. The API is added in two places:
1. Read path: the table schema is determined in `TableProvider.getTable`. The actual read schema can be a subset of the table schema.  This PR proposes to validate the actual read schema in  `FileScan`.
2.  Write path: validate the actual output schema in `FileWriteBuilder`.

## How was this patch tested?

Unit test

Closes #23714 from gengliangwang/schemaValidationV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/v2/FileScan.scala   |  33 +++-
 .../datasources/v2/FileWriteBuilder.scala     |  24 ++-
 .../datasources/v2/orc/OrcDataSourceV2.scala  |  19 ++-
 .../datasources/v2/orc/OrcScan.scala          |  10 +-
 .../datasources/v2/orc/OrcWriteBuilder.scala  |   6 +
 .../spark/sql/FileBasedDataSourceSuite.scala  | 152 ++++++++++--------
 6 files changed, 167 insertions(+), 77 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 3615b15be6fd..bdd6a48df20c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -18,15 +18,16 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{DataType, StructType}
 
 abstract class FileScan(
     sparkSession: SparkSession,
-    fileIndex: PartitioningAwareFileIndex) extends Scan with Batch {
+    fileIndex: PartitioningAwareFileIndex,
+    readSchema: StructType) extends Scan with Batch {
   /**
    * Returns whether a file with `path` could be split or not.
    */
@@ -34,6 +35,22 @@ abstract class FileScan(
     false
   }
 
+  /**
+   * Returns whether this format supports the given [[DataType]] in write path.
+   * By default all data types are supported.
+   */
+  def supportsDataType(dataType: DataType): Boolean = true
+
+  /**
+   * The string that represents the format that this data source provider uses. This is
+   * overridden by children to provide a nice alias for the data source. For example:
+   *
+   * {{{
+   *   override def formatName(): String = "ORC"
+   * }}}
+   */
+  def formatName: String
+
   protected def partitions: Seq[FilePartition] = {
     val selectedPartitions = fileIndex.listFiles(Seq.empty, Seq.empty)
     val maxSplitBytes = FilePartition.maxSplitBytes(sparkSession, selectedPartitions)
@@ -57,5 +74,13 @@ abstract class FileScan(
     partitions.toArray
   }
 
-  override def toBatch: Batch = this
+  override def toBatch: Batch = {
+    readSchema.foreach { field =>
+      if (!supportsDataType(field.dataType)) {
+        throw new AnalysisException(
+          s"$formatName data source does not support ${field.dataType.catalogString} data type.")
+      }
+    }
+    this
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index ce9b52f29d7b..6a94248a6f0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.writer.{BatchWrite, SupportsSaveMode, WriteBuilder}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.util.SerializableConfiguration
 
 abstract class FileWriteBuilder(options: DataSourceOptions)
@@ -104,12 +104,34 @@ abstract class FileWriteBuilder(options: DataSourceOptions)
       options: Map[String, String],
       dataSchema: StructType): OutputWriterFactory
 
+  /**
+   * Returns whether this format supports the given [[DataType]] in write path.
+   * By default all data types are supported.
+   */
+  def supportsDataType(dataType: DataType): Boolean = true
+
+  /**
+   * The string that represents the format that this data source provider uses. This is
+   * overridden by children to provide a nice alias for the data source. For example:
+   *
+   * {{{
+   *   override def formatName(): String = "ORC"
+   * }}}
+   */
+  def formatName: String
+
   private def validateInputs(): Unit = {
     assert(schema != null, "Missing input data schema")
     assert(queryId != null, "Missing query ID")
     assert(mode != null, "Missing save mode")
     assert(options.paths().length == 1)
     DataSource.validateSchema(schema)
+    schema.foreach { field =>
+      if (!supportsDataType(field.dataType)) {
+        throw new AnalysisException(
+          s"$formatName data source does not support ${field.dataType.catalogString} data type.")
+      }
+    }
   }
 
   private def getJobInstance(hadoopConf: Configuration, path: Path): Job = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
index 74739b4fe2d4..f279af49ba9c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -20,7 +20,7 @@ import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.sources.v2.{DataSourceOptions, Table}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types._
 
 class OrcDataSourceV2 extends FileDataSourceV2 {
 
@@ -42,3 +42,20 @@ class OrcDataSourceV2 extends FileDataSourceV2 {
     OrcTable(tableName, sparkSession, options, Some(schema))
   }
 }
+
+object OrcDataSourceV2 {
+  def supportsDataType(dataType: DataType): Boolean = dataType match {
+    case _: AtomicType => true
+
+    case st: StructType => st.forall { f => supportsDataType(f.dataType) }
+
+    case ArrayType(elementType, _) => supportsDataType(elementType)
+
+    case MapType(keyType, valueType, _) =>
+      supportsDataType(keyType) && supportsDataType(valueType)
+
+    case udt: UserDefinedType[_] => supportsDataType(udt.sqlType)
+
+    case _ => false
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index a792ad318b39..3c5dc1f50d7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScan
 import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.util.SerializableConfiguration
 
 case class OrcScan(
@@ -31,7 +31,7 @@ case class OrcScan(
     hadoopConf: Configuration,
     fileIndex: PartitioningAwareFileIndex,
     dataSchema: StructType,
-    readSchema: StructType) extends FileScan(sparkSession, fileIndex) {
+    readSchema: StructType) extends FileScan(sparkSession, fileIndex, readSchema) {
   override def isSplitable(path: Path): Boolean = true
 
   override def createReaderFactory(): PartitionReaderFactory = {
@@ -40,4 +40,10 @@ case class OrcScan(
     OrcPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
       dataSchema, fileIndex.partitionSchema, readSchema)
   }
+
+  override def supportsDataType(dataType: DataType): Boolean = {
+    OrcDataSourceV2.supportsDataType(dataType)
+  }
+
+  override def formatName: String = "ORC"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
index 80429d91d5e4..1aec4d872a64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
@@ -63,4 +63,10 @@ class OrcWriteBuilder(options: DataSourceOptions) extends FileWriteBuilder(optio
       }
     }
   }
+
+  override def supportsDataType(dataType: DataType): Boolean = {
+    OrcDataSourceV2.supportsDataType(dataType)
+  }
+
+  override def formatName: String = "ORC"
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index fc87b0462bc2..e0c0484593d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -329,83 +329,97 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
   test("SPARK-24204 error handling for unsupported Interval data types - csv, json, parquet, orc") {
     withTempDir { dir =>
       val tempDir = new File(dir, "files").getCanonicalPath
-      // TODO(SPARK-26744): support data type validating in V2 data source, and test V2 as well.
-      withSQLConf(SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> "orc") {
-        // write path
-        Seq("csv", "json", "parquet", "orc").foreach { format =>
-          var msg = intercept[AnalysisException] {
-            sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir)
-          }.getMessage
-          assert(msg.contains("Cannot save interval data type into external storage."))
-
-          msg = intercept[AnalysisException] {
-            spark.udf.register("testType", () => new IntervalData())
-            sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
-          }.getMessage
-          assert(msg.toLowerCase(Locale.ROOT)
-            .contains(s"$format data source does not support calendarinterval data type."))
+      Seq(true, false).foreach { useV1 =>
+        val useV1List = if (useV1) {
+          "orc"
+        } else {
+          ""
         }
+        def errorMessage(format: String, isWrite: Boolean): String = {
+          if (isWrite && (useV1 || format != "orc")) {
+            "cannot save interval data type into external storage."
+          } else {
+            s"$format data source does not support calendarinterval data type."
+          }
+        }
+
+        withSQLConf(SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> useV1List) {
+          // write path
+          Seq("csv", "json", "parquet", "orc").foreach { format =>
+            var msg = intercept[AnalysisException] {
+              sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir)
+            }.getMessage
+            assert(msg.toLowerCase(Locale.ROOT).contains(errorMessage(format, true)))
+          }
 
-        // read path
-        Seq("parquet", "csv").foreach { format =>
-          var msg = intercept[AnalysisException] {
-            val schema = StructType(StructField("a", CalendarIntervalType, true) :: Nil)
-            spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-            spark.read.schema(schema).format(format).load(tempDir).collect()
-          }.getMessage
-          assert(msg.toLowerCase(Locale.ROOT)
-            .contains(s"$format data source does not support calendarinterval data type."))
-
-          msg = intercept[AnalysisException] {
-            val schema = StructType(StructField("a", new IntervalUDT(), true) :: Nil)
-            spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-            spark.read.schema(schema).format(format).load(tempDir).collect()
-          }.getMessage
-          assert(msg.toLowerCase(Locale.ROOT)
-            .contains(s"$format data source does not support calendarinterval data type."))
+          // read path
+          Seq("parquet", "csv").foreach { format =>
+            var msg = intercept[AnalysisException] {
+              val schema = StructType(StructField("a", CalendarIntervalType, true) :: Nil)
+              spark.range(1).write.format(format).mode("overwrite").save(tempDir)
+              spark.read.schema(schema).format(format).load(tempDir).collect()
+            }.getMessage
+            assert(msg.toLowerCase(Locale.ROOT).contains(errorMessage(format, false)))
+
+            msg = intercept[AnalysisException] {
+              val schema = StructType(StructField("a", new IntervalUDT(), true) :: Nil)
+              spark.range(1).write.format(format).mode("overwrite").save(tempDir)
+              spark.read.schema(schema).format(format).load(tempDir).collect()
+            }.getMessage
+            assert(msg.toLowerCase(Locale.ROOT).contains(errorMessage(format, false)))
+          }
         }
       }
     }
   }
 
   test("SPARK-24204 error handling for unsupported Null data types - csv, parquet, orc") {
-    // TODO(SPARK-26744): support data type validating in V2 data source, and test V2 as well.
-    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "orc",
-      SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> "orc") {
-      withTempDir { dir =>
-        val tempDir = new File(dir, "files").getCanonicalPath
-
-        Seq("parquet", "csv", "orc").foreach { format =>
-          // write path
-          var msg = intercept[AnalysisException] {
-            sql("select null").write.format(format).mode("overwrite").save(tempDir)
-          }.getMessage
-          assert(msg.toLowerCase(Locale.ROOT)
-            .contains(s"$format data source does not support null data type."))
-
-          msg = intercept[AnalysisException] {
-            spark.udf.register("testType", () => new NullData())
-            sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
-          }.getMessage
-          assert(msg.toLowerCase(Locale.ROOT)
-            .contains(s"$format data source does not support null data type."))
-
-          // read path
-          msg = intercept[AnalysisException] {
-            val schema = StructType(StructField("a", NullType, true) :: Nil)
-            spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-            spark.read.schema(schema).format(format).load(tempDir).collect()
-          }.getMessage
-          assert(msg.toLowerCase(Locale.ROOT)
-            .contains(s"$format data source does not support null data type."))
-
-          msg = intercept[AnalysisException] {
-            val schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
-            spark.range(1).write.format(format).mode("overwrite").save(tempDir)
-            spark.read.schema(schema).format(format).load(tempDir).collect()
-          }.getMessage
-          assert(msg.toLowerCase(Locale.ROOT)
-            .contains(s"$format data source does not support null data type."))
+    Seq(true, false).foreach { useV1 =>
+      val useV1List = if (useV1) {
+        "orc"
+      } else {
+        ""
+      }
+      def errorMessage(format: String): String = {
+        s"$format data source does not support null data type."
+      }
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> useV1List,
+        SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> useV1List) {
+        withTempDir { dir =>
+          val tempDir = new File(dir, "files").getCanonicalPath
+
+          Seq("parquet", "csv", "orc").foreach { format =>
+            // write path
+            var msg = intercept[AnalysisException] {
+              sql("select null").write.format(format).mode("overwrite").save(tempDir)
+            }.getMessage
+            assert(msg.toLowerCase(Locale.ROOT)
+              .contains(errorMessage(format)))
+
+            msg = intercept[AnalysisException] {
+              spark.udf.register("testType", () => new NullData())
+              sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
+            }.getMessage
+            assert(msg.toLowerCase(Locale.ROOT)
+              .contains(errorMessage(format)))
+
+            // read path
+            msg = intercept[AnalysisException] {
+              val schema = StructType(StructField("a", NullType, true) :: Nil)
+              spark.range(1).write.format(format).mode("overwrite").save(tempDir)
+              spark.read.schema(schema).format(format).load(tempDir).collect()
+            }.getMessage
+            assert(msg.toLowerCase(Locale.ROOT)
+              .contains(errorMessage(format)))
+
+            msg = intercept[AnalysisException] {
+              val schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
+              spark.range(1).write.format(format).mode("overwrite").save(tempDir)
+              spark.read.schema(schema).format(format).load(tempDir).collect()
+            }.getMessage
+            assert(msg.toLowerCase(Locale.ROOT)
+              .contains(errorMessage(format)))
+          }
         }
       }
     }

From 5d8a934c13420dcce9d68cbf1f5f30381978d32e Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Sat, 16 Feb 2019 16:51:01 -0600
Subject: [PATCH 0473/1072] [SPARK-26721][ML] Avoid per-tree normalization in
 featureImportance for GBT

## What changes were proposed in this pull request?

Our feature importance calculation is taken from sklearn's one, which has been recently fixed (in https://github.com/scikit-learn/scikit-learn/pull/11176). Citing the description of that PR:

> Because the feature importances are (currently, by default) normalized and then averaged, feature importances from later stages are overweighted.

The PR performs a fix similar to sklearn's one. The per-tree normalization of the feature importance is skipped and GBT.

Credits for pointing out clearly the issue and the sklearn's PR to Daniel Jumper.

## How was this patch tested?

modified UT, checked that the computed `featureImportance` in that test is similar to sklearn's one (ti can't be the same, because the trees may be slightly different)

Closes #23773 from mgaido91/SPARK-26721.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../ml/classification/GBTClassifier.scala     |  5 ++--
 .../spark/ml/regression/GBTRegressor.scala    |  3 ++-
 .../org/apache/spark/ml/tree/treeModels.scala | 23 +++++++++++++++----
 .../classification/GBTClassifierSuite.scala   |  3 ++-
 .../ml/regression/GBTRegressorSuite.scala     |  3 ++-
 5 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index abe2d1febfdf..a5ed4a38a886 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -341,11 +341,12 @@ class GBTClassificationModel private[ml](
    * The importance vector is normalized to sum to 1. This method is suggested by Hastie et al.
    * (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
    * and follows the implementation from scikit-learn.
-
+   *
    * See `DecisionTreeClassificationModel.featureImportances`
    */
   @Since("2.0.0")
-  lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures)
+  lazy val featureImportances: Vector =
+    TreeEnsembleModel.featureImportances(trees, numFeatures, perTreeNormalization = false)
 
   /** Raw prediction for the positive class. */
   private def margin(features: Vector): Double = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 9a5b7d59e9ae..9f0f567a5b53 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -285,7 +285,8 @@ class GBTRegressionModel private[ml](
    * @see `DecisionTreeRegressionModel.featureImportances`
    */
   @Since("2.0.0")
-  lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures)
+  lazy val featureImportances: Vector =
+    TreeEnsembleModel.featureImportances(trees, numFeatures, perTreeNormalization = false)
 
   /** (private[ml]) Convert to a model in the old API */
   private[ml] def toOld: OldGBTModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index 51d5d5c58c57..e95c55f6048f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -135,7 +135,7 @@ private[ml] object TreeEnsembleModel {
    *  - Average over trees:
    *     - importance(feature j) = sum (over nodes which split on feature j) of the gain,
    *       where gain is scaled by the number of instances passing through node
-   *     - Normalize importances for tree to sum to 1.
+   *     - Normalize importances for tree to sum to 1 (only if `perTreeNormalization` is `true`).
    *  - Normalize feature importance vector to sum to 1.
    *
    *  References:
@@ -145,9 +145,15 @@ private[ml] object TreeEnsembleModel {
    * @param numFeatures  Number of features in model (even if not all are explicitly used by
    *                     the model).
    *                     If -1, then numFeatures is set based on the max feature index in all trees.
+   * @param perTreeNormalization By default this is set to `true` and it means that the importances
+   *                             of each tree are normalized before being summed. If set to `false`,
+   *                             the normalization is skipped.
    * @return  Feature importance values, of length numFeatures.
    */
-  def featureImportances[M <: DecisionTreeModel](trees: Array[M], numFeatures: Int): Vector = {
+  def featureImportances[M <: DecisionTreeModel](
+      trees: Array[M],
+      numFeatures: Int,
+      perTreeNormalization: Boolean = true): Vector = {
     val totalImportances = new OpenHashMap[Int, Double]()
     trees.foreach { tree =>
       // Aggregate feature importance vector for this tree
@@ -155,10 +161,19 @@ private[ml] object TreeEnsembleModel {
       computeFeatureImportance(tree.rootNode, importances)
       // Normalize importance vector for this tree, and add it to total.
       // TODO: In the future, also support normalizing by tree.rootNode.impurityStats.count?
-      val treeNorm = importances.map(_._2).sum
+      val treeNorm = if (perTreeNormalization) {
+        importances.map(_._2).sum
+      } else {
+        // We won't use it
+        Double.NaN
+      }
       if (treeNorm != 0) {
         importances.foreach { case (idx, impt) =>
-          val normImpt = impt / treeNorm
+          val normImpt = if (perTreeNormalization) {
+            impt / treeNorm
+          } else {
+            impt
+          }
           totalImportances.changeValue(idx, normImpt, _ + normImpt)
         }
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index cedbaf1858ef..cd59900c521c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -363,7 +363,8 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
     val gbtWithFeatureSubset = gbt.setFeatureSubsetStrategy("1")
     val importanceFeatures = gbtWithFeatureSubset.fit(df).featureImportances
     val mostIF = importanceFeatures.argmax
-    assert(mostImportantFeature !== mostIF)
+    assert(mostIF === 1)
+    assert(importances(mostImportantFeature) !== importanceFeatures(mostIF))
   }
 
   test("model evaluateEachIteration") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index b145c7a3dc95..46fa3767efdc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -200,7 +200,8 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
     val gbtWithFeatureSubset = gbt.setFeatureSubsetStrategy("1")
     val importanceFeatures = gbtWithFeatureSubset.fit(df).featureImportances
     val mostIF = importanceFeatures.argmax
-    assert(mostImportantFeature !== mostIF)
+    assert(mostIF === 1)
+    assert(importances(mostImportantFeature) !== importanceFeatures(mostIF))
   }
 
   test("model evaluateEachIteration") {

From dcdbd06b687fafbf29df504949db0a5f77608c8e Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 18 Feb 2019 08:05:49 +0900
Subject: [PATCH 0474/1072] [SPARK-26897][SQL][TEST] Update Spark 2.3.x testing
 from HiveExternalCatalogVersionsSuite

## What changes were proposed in this pull request?
The maintenance release of `branch-2.3` (v2.3.3) vote passed, so this issue updates PROCESS_TABLES.testingVersions in HiveExternalCatalogVersionsSuite

## How was this patch tested?
Pass the Jenkins.

Closes #23807 from maropu/SPARK-26897.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index dd0e1bd0fe30..8086f75b4bae 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -206,7 +206,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
 
 object PROCESS_TABLES extends QueryTest with SQLTestUtils {
   // Tests the latest version of every release line.
-  val testingVersions = Seq("2.3.2", "2.4.0")
+  val testingVersions = Seq("2.3.3", "2.4.0")
 
   protected var spark: SparkSession = _
 

From 36902e10c6395cb378eb8743fe94ccd0aa33e616 Mon Sep 17 00:00:00 2001
From: Ala Luszczak <ala@databricks.com>
Date: Mon, 18 Feb 2019 10:39:31 +0800
Subject: [PATCH 0475/1072] [SPARK-26878] QueryTest.compare() does not handle
 maps with array keys correctly

## What changes were proposed in this pull request?

The previous strategy for comparing Maps leveraged sorting (key, value) tuples by their _.toString. However, the _.toString representation of an arrays has nothing to do with it's content. If a map has array keys, it's (key, value) pairs would be compared with other maps essentially at random. This could results in false negatives in tests.

This changes first compares keys together to find the matching ones, and then compares associated values.

## How was this patch tested?

New unit test added.

Closes #23789 from ala/compare-map.

Authored-by: Ala Luszczak <ala@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/DatasetSuite.scala   | 37 +++++++++++++++++++
 .../org/apache/spark/sql/QueryTest.scala      |  6 +--
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 8c34e47314db..64c4aabd4cdc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.sql.{Date, Timestamp}
 
+import org.scalatest.exceptions.TestFailedException
+
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.sql.catalyst.ScroogeLikeExample
 import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
@@ -67,6 +69,41 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       data: _*)
   }
 
+  test("toDS should compare map with byte array keys correctly") {
+    // Choose the order of arrays in such way, that sorting keys of different maps by _.toString
+    // will not incidentally put equal keys together.
+    val arrays = (1 to 5).map(_ => Array[Byte](0.toByte, 0.toByte)).sortBy(_.toString).toArray
+    arrays(0)(1) = 1.toByte
+    arrays(1)(1) = 2.toByte
+    arrays(2)(1) = 2.toByte
+    arrays(3)(1) = 1.toByte
+
+    val mapA = Map(arrays(0) -> "one", arrays(2) -> "two")
+    val subsetOfA = Map(arrays(0) -> "one")
+    val equalToA = Map(arrays(1) -> "two", arrays(3) -> "one")
+    val notEqualToA1 = Map(arrays(1) -> "two", arrays(3) -> "not one")
+    val notEqualToA2 = Map(arrays(1) -> "two", arrays(4) -> "one")
+
+    // Comparing map with itself
+    checkDataset(Seq(mapA).toDS(), mapA)
+
+    // Comparing map with equivalent map
+    checkDataset(Seq(equalToA).toDS(), mapA)
+    checkDataset(Seq(mapA).toDS(), equalToA)
+
+    // Comparing map with it's subset
+    intercept[TestFailedException](checkDataset(Seq(subsetOfA).toDS(), mapA))
+    intercept[TestFailedException](checkDataset(Seq(mapA).toDS(), subsetOfA))
+
+    // Comparing map with another map differing by single value
+    intercept[TestFailedException](checkDataset(Seq(notEqualToA1).toDS(), mapA))
+    intercept[TestFailedException](checkDataset(Seq(mapA).toDS(), notEqualToA1))
+
+    // Comparing map with another map differing by single key
+    intercept[TestFailedException](checkDataset(Seq(notEqualToA2).toDS(), mapA))
+    intercept[TestFailedException](checkDataset(Seq(mapA).toDS(), notEqualToA2))
+  }
+
   test("toDS with RDD") {
     val ds = sparkContext.makeRDD(Seq("a", "b", "c"), 3).toDS()
     checkDataset(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index d83deb17a090..f8298c9da97e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -341,9 +341,9 @@ object QueryTest {
     case (a: Array[_], b: Array[_]) =>
       a.length == b.length && a.zip(b).forall { case (l, r) => compare(l, r)}
     case (a: Map[_, _], b: Map[_, _]) =>
-      val entries1 = a.iterator.toSeq.sortBy(_.toString())
-      val entries2 = b.iterator.toSeq.sortBy(_.toString())
-      compare(entries1, entries2)
+      a.size == b.size && a.keys.forall { aKey =>
+        b.keys.find(bKey => compare(aKey, bKey)).exists(bKey => compare(a(aKey), b(bKey)))
+      }
     case (a: Iterable[_], b: Iterable[_]) =>
       a.size == b.size && a.zip(b).forall { case (l, r) => compare(l, r)}
     case (a: Product, b: Product) =>

From e2b8cc65cd579374ddbd70b93c9fcefe9b8873d9 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 18 Feb 2019 11:24:36 +0800
Subject: [PATCH 0476/1072] [SPARK-26897][SQL][TEST][FOLLOW-UP] Remove
 workaround for 2.2.0 and 2.1.x in HiveExternalCatalogVersionsSuite

## What changes were proposed in this pull request?
This pr just removed workaround for 2.2.0 and 2.1.x in HiveExternalCatalogVersionsSuite.

## How was this patch tested?
Pass the Jenkins.

Closes #23817 from maropu/SPARK-26607-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../hive/HiveExternalCatalogVersionsSuite.scala | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 8086f75b4bae..1dd60c6757d4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -260,19 +260,10 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
 
       // SPARK-22356: overlapped columns between data and partition schema in data source tables
       val tbl_with_col_overlap = s"tbl_with_col_overlap_$index"
-      // For Spark 2.2.0 and 2.1.x, the behavior is different from Spark 2.0, 2.2.1, 2.3+
-      if (testingVersions(index).startsWith("2.1") || testingVersions(index) == "2.2.0") {
-        spark.sql("msck repair table " + tbl_with_col_overlap)
-        assert(spark.table(tbl_with_col_overlap).columns === Array("i", "j", "p"))
-        checkAnswer(spark.table(tbl_with_col_overlap), Row(1, 1, 1) :: Row(1, 1, 1) :: Nil)
-        assert(sql("desc " + tbl_with_col_overlap).select("col_name")
-          .as[String].collect().mkString(",").contains("i,j,p"))
-      } else {
-        assert(spark.table(tbl_with_col_overlap).columns === Array("i", "p", "j"))
-        checkAnswer(spark.table(tbl_with_col_overlap), Row(1, 1, 1) :: Row(1, 1, 1) :: Nil)
-        assert(sql("desc " + tbl_with_col_overlap).select("col_name")
-          .as[String].collect().mkString(",").contains("i,p,j"))
-      }
+      assert(spark.table(tbl_with_col_overlap).columns === Array("i", "p", "j"))
+      checkAnswer(spark.table(tbl_with_col_overlap), Row(1, 1, 1) :: Row(1, 1, 1) :: Nil)
+      assert(sql("desc " + tbl_with_col_overlap).select("col_name")
+        .as[String].collect().mkString(",").contains("i,p,j"))
     }
   }
 }

From 4a4e7aeca79738d5788628d67d97d704f067e8d7 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Mon, 18 Feb 2019 11:48:10 +0800
Subject: [PATCH 0477/1072] [SPARK-26887][SQL][PYTHON][NS] Create datetime.date
 directly instead of creating datetime64 as intermediate data.

## What changes were proposed in this pull request?

Currently `DataFrame.toPandas()` with arrow enabled or `ArrowStreamPandasSerializer` for pandas UDF with pyarrow<0.12 creates `datetime64[ns]` type series as intermediate data and then convert to `datetime.date` series, but the intermediate `datetime64[ns]` might cause an overflow even if the date is valid.

```
>>> import datetime
>>>
>>> t = [datetime.date(2262, 4, 12), datetime.date(2263, 4, 12)]
>>>
>>> df = spark.createDataFrame(t, 'date')
>>> df.show()
+----------+
|     value|
+----------+
|2262-04-12|
|2263-04-12|
+----------+

>>>
>>> spark.conf.set("spark.sql.execution.arrow.enabled", "true")
>>>
>>> df.toPandas()
        value
0  1677-09-21
1  1678-09-21
```

We should avoid creating such intermediate data and create `datetime.date` series directly instead.

## How was this patch tested?

Modified some tests to include the date which overflow caused by the intermediate conversion.
Run tests with pyarrow 0.8, 0.10, 0.11, 0.12 in my local environment.

Closes #23795 from ueshin/issues/SPARK-26887/date_as_object.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/serializers.py                 |  5 +-
 python/pyspark/sql/dataframe.py               |  5 +-
 python/pyspark/sql/tests/test_arrow.py        |  5 +-
 .../sql/tests/test_pandas_udf_scalar.py       |  3 +-
 python/pyspark/sql/types.py                   | 54 ++++++++++++-------
 5 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 3db259551fa8..a2c59fedfc8c 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -311,10 +311,9 @@ def __init__(self, timezone, safecheck):
 
     def arrow_to_pandas(self, arrow_column):
         from pyspark.sql.types import from_arrow_type, \
-            _check_series_convert_date, _check_series_localize_timestamps
+            _arrow_column_to_pandas, _check_series_localize_timestamps
 
-        s = arrow_column.to_pandas()
-        s = _check_series_convert_date(s, from_arrow_type(arrow_column.type))
+        s = _arrow_column_to_pandas(arrow_column, from_arrow_type(arrow_column.type))
         s = _check_series_localize_timestamps(s, self._timezone)
         return s
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index a1056d0b787e..472d2969b3e1 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2107,14 +2107,13 @@ def toPandas(self):
             # of PyArrow is found, if 'spark.sql.execution.arrow.enabled' is enabled.
             if use_arrow:
                 try:
-                    from pyspark.sql.types import _check_dataframe_convert_date, \
+                    from pyspark.sql.types import _arrow_table_to_pandas, \
                         _check_dataframe_localize_timestamps
                     import pyarrow
                     batches = self._collectAsArrow()
                     if len(batches) > 0:
                         table = pyarrow.Table.from_batches(batches)
-                        pdf = table.to_pandas()
-                        pdf = _check_dataframe_convert_date(pdf, self.schema)
+                        pdf = _arrow_table_to_pandas(table, self.schema)
                         return _check_dataframe_localize_timestamps(pdf, timezone)
                     else:
                         return pd.DataFrame.from_records([], columns=self.columns)
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 8a62500b17f2..38a6402c0132 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -68,7 +68,9 @@ def setUpClass(cls):
                     (u"b", 2, 20, 0.4, 4.0, Decimal("4.0"),
                      date(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2)),
                     (u"c", 3, 30, 0.8, 6.0, Decimal("6.0"),
-                     date(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3))]
+                     date(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3)),
+                    (u"d", 4, 40, 1.0, 8.0, Decimal("8.0"),
+                     date(2262, 4, 12), datetime(2262, 3, 3, 3, 3, 3))]
 
         # TODO: remove version check once minimum pyarrow version is 0.10.0
         if LooseVersion("0.10.0") <= LooseVersion(pa.__version__):
@@ -76,6 +78,7 @@ def setUpClass(cls):
             cls.data[0] = cls.data[0] + (bytearray(b"a"),)
             cls.data[1] = cls.data[1] + (bytearray(b"bb"),)
             cls.data[2] = cls.data[2] + (bytearray(b"ccc"),)
+            cls.data[3] = cls.data[3] + (bytearray(b"dddd"),)
 
     @classmethod
     def tearDownClass(cls):
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 6a6865a9fb16..28ef98d7b3f1 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -349,7 +349,8 @@ def test_vectorized_udf_dates(self):
         data = [(0, date(1969, 1, 1),),
                 (1, date(2012, 2, 2),),
                 (2, None,),
-                (3, date(2100, 4, 4),)]
+                (3, date(2100, 4, 4),),
+                (4, date(2262, 4, 12),)]
         df = self.spark.createDataFrame(data, schema=schema)
 
         date_copy = pandas_udf(lambda t: t, returnType=DateType())
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 4b8f2efff4ac..348cb5b11859 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1681,38 +1681,52 @@ def from_arrow_schema(arrow_schema):
          for field in arrow_schema])
 
 
-def _check_series_convert_date(series, data_type):
-    """
-    Cast the series to datetime.date if it's a date type, otherwise returns the original series.
+def _arrow_column_to_pandas(column, data_type):
+    """ Convert Arrow Column to pandas Series.
 
-    :param series: pandas.Series
-    :param data_type: a Spark data type for the series
+    :param series: pyarrow.lib.Column
+    :param data_type: a Spark data type for the column
     """
-    import pyarrow
+    import pandas as pd
+    import pyarrow as pa
     from distutils.version import LooseVersion
-    # As of Arrow 0.12.0, date_as_objects is True by default, see ARROW-3910
-    if LooseVersion(pyarrow.__version__) < LooseVersion("0.12.0") and type(data_type) == DateType:
-        return series.dt.date
+    # If the given column is a date type column, creates a series of datetime.date directly instead
+    # of creating datetime64[ns] as intermediate data to avoid overflow caused by datetime64[ns]
+    # type handling.
+    if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
+        if type(data_type) == DateType:
+            return pd.Series(column.to_pylist(), name=column.name)
+        else:
+            return column.to_pandas()
     else:
-        return series
+        # Since Arrow 0.11.0, support date_as_object to return datetime.date instead of
+        # np.datetime64.
+        return column.to_pandas(date_as_object=True)
 
 
-def _check_dataframe_convert_date(pdf, schema):
-    """ Correct date type value to use datetime.date.
+def _arrow_table_to_pandas(table, schema):
+    """ Convert Arrow Table to pandas DataFrame.
 
     Pandas DataFrame created from PyArrow uses datetime64[ns] for date type values, but we should
     use datetime.date to match the behavior with when Arrow optimization is disabled.
 
-    :param pdf: pandas.DataFrame
-    :param schema: a Spark schema of the pandas.DataFrame
+    :param table: pyarrow.lib.Table
+    :param schema: a Spark schema of the pyarrow.lib.Table
     """
-    import pyarrow
+    import pandas as pd
+    import pyarrow as pa
     from distutils.version import LooseVersion
-    # As of Arrow 0.12.0, date_as_objects is True by default, see ARROW-3910
-    if LooseVersion(pyarrow.__version__) < LooseVersion("0.12.0"):
-        for field in schema:
-            pdf[field.name] = _check_series_convert_date(pdf[field.name], field.dataType)
-    return pdf
+    # If the given table contains a date type column, use `_arrow_column_to_pandas` for pyarrow<0.11
+    # or use `date_as_object` option for pyarrow>=0.11 to avoid creating datetime64[ns] as
+    # intermediate data.
+    if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
+        if any(type(field.dataType) == DateType for field in schema):
+            return pd.concat([_arrow_column_to_pandas(column, field.dataType)
+                              for column, field in zip(table.itercolumns(), schema)], axis=1)
+        else:
+            return table.to_pandas()
+    else:
+        return table.to_pandas(date_as_object=True)
 
 
 def _get_local_timezone():

From 60caa92deaf6941f58da82dcc0962ebf3a598ced Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Mon, 18 Feb 2019 13:16:28 +0800
Subject: [PATCH 0478/1072] [SPARK-26666][SQL] Support DSv2 overwrite and
 dynamic partition overwrite.

## What changes were proposed in this pull request?

This adds two logical plans that implement the ReplaceData operation from the [logical plans SPIP](https://docs.google.com/document/d/1gYm5Ji2Mge3QBdOliFV5gSPTKlX4q1DCBXIkiyMv62A/edit?ts=5a987801#heading=h.m45webtwxf2d). These two plans will be used to implement Spark's `INSERT OVERWRITE` behavior for v2.

Specific changes:
* Add `SupportsTruncate`, `SupportsOverwrite`, and `SupportsDynamicOverwrite` to DSv2 write API
* Add `OverwriteByExpression` and `OverwritePartitionsDynamic` plans (logical and physical)
* Add new plans to DSv2 write validation rule `ResolveOutputRelation`
* Refactor `WriteToDataSourceV2Exec` into trait used by all DSv2 write exec nodes

## How was this patch tested?

* The v2 analysis suite has been updated to validate the new overwrite plans
* The analysis suite for `OverwriteByExpression` checks that the delete expression is resolved using the table's columns
* Existing tests validate that overwrite exec plan works
* Updated existing v2 test because schema is used to validate overwrite

Closes #23606 from rdblue/SPARK-26666-add-overwrite.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  27 ++-
 .../plans/logical/basicLogicalOperators.scala |  69 ++++++-
 .../apache/spark/sql/internal/SQLConf.scala   |   2 +-
 .../analysis/DataSourceV2AnalysisSuite.scala  | 191 +++++++++++++-----
 .../v2/reader/SupportsPushDownFilters.java    |   3 +
 .../v2/writer/SupportsDynamicOverwrite.java   |  37 ++++
 .../sources/v2/writer/SupportsOverwrite.java  |  45 +++++
 .../sources/v2/writer/SupportsTruncate.java   |  32 +++
 .../apache/spark/sql/DataFrameWriter.scala    |  54 +++--
 .../datasources/DataSourceStrategy.scala      |   6 +
 .../v2/DataSourceV2Implicits.scala            |  49 +++++
 .../datasources/v2/DataSourceV2Relation.scala |  24 +--
 .../datasources/v2/DataSourceV2Strategy.scala |  35 ++--
 .../v2/WriteToDataSourceV2Exec.scala          | 135 ++++++++++++-
 .../apache/spark/sql/sources/filters.scala    |  26 ++-
 .../sql/sources/v2/DataSourceV2Suite.scala    |   8 +-
 16 files changed, 613 insertions(+), 130 deletions(-)
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 793c337ffcb1..42904c5c04c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -978,6 +978,11 @@ class Analyzer(
       case a @ Aggregate(groupingExprs, aggExprs, appendColumns: AppendColumns) =>
         a.mapExpressions(resolveExpressionTopDown(_, appendColumns))
 
+      case o: OverwriteByExpression if !o.outputResolved =>
+        // do not resolve expression attributes until the query attributes are resolved against the
+        // table by ResolveOutputRelation. that rule will alias the attributes to the table's names.
+        o
+
       case q: LogicalPlan =>
         logTrace(s"Attempting to resolve ${q.simpleString(SQLConf.get.maxToStringFields)}")
         q.mapExpressions(resolveExpressionTopDown(_, q))
@@ -2246,7 +2251,7 @@ class Analyzer(
   object ResolveOutputRelation extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case append @ AppendData(table, query, isByName)
-          if table.resolved && query.resolved && !append.resolved =>
+          if table.resolved && query.resolved && !append.outputResolved =>
         val projection = resolveOutputColumns(table.name, table.output, query, isByName)
 
         if (projection != query) {
@@ -2254,6 +2259,26 @@ class Analyzer(
         } else {
           append
         }
+
+      case overwrite @ OverwriteByExpression(table, _, query, isByName)
+          if table.resolved && query.resolved && !overwrite.outputResolved =>
+        val projection = resolveOutputColumns(table.name, table.output, query, isByName)
+
+        if (projection != query) {
+          overwrite.copy(query = projection)
+        } else {
+          overwrite
+        }
+
+      case overwrite @ OverwritePartitionsDynamic(table, query, isByName)
+          if table.resolved && query.resolved && !overwrite.outputResolved =>
+        val projection = resolveOutputColumns(table.name, table.output, query, isByName)
+
+        if (projection != query) {
+          overwrite.copy(query = projection)
+        } else {
+          overwrite
+        }
     }
 
     def resolveOutputColumns(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 639d68f4ecd7..f7f701cea51f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -365,16 +365,17 @@ case class Join(
 }
 
 /**
- * Append data to an existing table.
+ * Base trait for DataSourceV2 write commands
  */
-case class AppendData(
-    table: NamedRelation,
-    query: LogicalPlan,
-    isByName: Boolean) extends LogicalPlan {
+trait V2WriteCommand extends Command {
+  def table: NamedRelation
+  def query: LogicalPlan
+
   override def children: Seq[LogicalPlan] = Seq(query)
-  override def output: Seq[Attribute] = Seq.empty
 
-  override lazy val resolved: Boolean = {
+  override lazy val resolved: Boolean = outputResolved
+
+  def outputResolved: Boolean = {
     table.resolved && query.resolved && query.output.size == table.output.size &&
         query.output.zip(table.output).forall {
           case (inAttr, outAttr) =>
@@ -386,16 +387,66 @@ case class AppendData(
   }
 }
 
+/**
+ * Append data to an existing table.
+ */
+case class AppendData(
+    table: NamedRelation,
+    query: LogicalPlan,
+    isByName: Boolean) extends V2WriteCommand
+
 object AppendData {
   def byName(table: NamedRelation, df: LogicalPlan): AppendData = {
-    new AppendData(table, df, true)
+    new AppendData(table, df, isByName = true)
   }
 
   def byPosition(table: NamedRelation, query: LogicalPlan): AppendData = {
-    new AppendData(table, query, false)
+    new AppendData(table, query, isByName = false)
   }
 }
 
+/**
+ * Overwrite data matching a filter in an existing table.
+ */
+case class OverwriteByExpression(
+    table: NamedRelation,
+    deleteExpr: Expression,
+    query: LogicalPlan,
+    isByName: Boolean) extends V2WriteCommand {
+  override lazy val resolved: Boolean = outputResolved && deleteExpr.resolved
+}
+
+object OverwriteByExpression {
+  def byName(
+      table: NamedRelation, df: LogicalPlan, deleteExpr: Expression): OverwriteByExpression = {
+    OverwriteByExpression(table, deleteExpr, df, isByName = true)
+  }
+
+  def byPosition(
+      table: NamedRelation, query: LogicalPlan, deleteExpr: Expression): OverwriteByExpression = {
+    OverwriteByExpression(table, deleteExpr, query, isByName = false)
+  }
+}
+
+/**
+ * Dynamically overwrite partitions in an existing table.
+ */
+case class OverwritePartitionsDynamic(
+    table: NamedRelation,
+    query: LogicalPlan,
+    isByName: Boolean) extends V2WriteCommand
+
+object OverwritePartitionsDynamic {
+  def byName(table: NamedRelation, df: LogicalPlan): OverwritePartitionsDynamic = {
+    OverwritePartitionsDynamic(table, df, isByName = true)
+  }
+
+  def byPosition(table: NamedRelation, query: LogicalPlan): OverwritePartitionsDynamic = {
+    OverwritePartitionsDynamic(table, query, isByName = false)
+  }
+}
+
+
 /**
  * Insert some data into a table. Note that this plan is unresolved and has to be replaced by the
  * concrete implementations during analysis.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d285e007dac1..0b7b67ed56d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1452,7 +1452,7 @@ object SQLConf {
       " register class names for which data source V2 write paths are disabled. Writes from these" +
       " sources will fall back to the V1 sources.")
     .stringConf
-    .createWithDefault("")
+    .createWithDefault("orc")
 
   val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index 6c899b610ac5..0c4854861426 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -19,15 +19,92 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util.Locale
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, UpCast}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LeafNode, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LeafNode, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Project}
 import org.apache.spark.sql.types.{DoubleType, FloatType, StructField, StructType}
 
+class V2AppendDataAnalysisSuite extends DataSourceV2AnalysisSuite {
+  override def byName(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    AppendData.byName(table, query)
+  }
+
+  override def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    AppendData.byPosition(table, query)
+  }
+}
+
+class V2OverwritePartitionsDynamicAnalysisSuite extends DataSourceV2AnalysisSuite {
+  override def byName(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    OverwritePartitionsDynamic.byName(table, query)
+  }
+
+  override def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    OverwritePartitionsDynamic.byPosition(table, query)
+  }
+}
+
+class V2OverwriteByExpressionAnalysisSuite extends DataSourceV2AnalysisSuite {
+  override def byName(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    OverwriteByExpression.byName(table, query, Literal(true))
+  }
+
+  override def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    OverwriteByExpression.byPosition(table, query, Literal(true))
+  }
+
+  test("delete expression is resolved using table fields") {
+    val table = TestRelation(StructType(Seq(
+      StructField("x", DoubleType, nullable = false),
+      StructField("y", DoubleType))).toAttributes)
+
+    val query = TestRelation(StructType(Seq(
+      StructField("a", DoubleType, nullable = false),
+      StructField("b", DoubleType))).toAttributes)
+
+    val a = query.output.head
+    val b = query.output.last
+    val x = table.output.head
+
+    val parsedPlan = OverwriteByExpression.byPosition(table, query,
+      LessThanOrEqual(UnresolvedAttribute(Seq("x")), Literal(15.0d)))
+
+    val expectedPlan = OverwriteByExpression.byPosition(table,
+      Project(Seq(
+        Alias(Cast(a, DoubleType, Some(conf.sessionLocalTimeZone)), "x")(),
+        Alias(Cast(b, DoubleType, Some(conf.sessionLocalTimeZone)), "y")()),
+        query),
+      LessThanOrEqual(
+        AttributeReference("x", DoubleType, nullable = false)(x.exprId),
+        Literal(15.0d)))
+
+    assertNotResolved(parsedPlan)
+    checkAnalysis(parsedPlan, expectedPlan)
+    assertResolved(expectedPlan)
+  }
+
+  test("delete expression is not resolved using query fields") {
+    val xRequiredTable = TestRelation(StructType(Seq(
+      StructField("x", DoubleType, nullable = false),
+      StructField("y", DoubleType))).toAttributes)
+
+    val query = TestRelation(StructType(Seq(
+      StructField("a", DoubleType, nullable = false),
+      StructField("b", DoubleType))).toAttributes)
+
+    // the write is resolved (checked above). this test plan is not because of the expression.
+    val parsedPlan = OverwriteByExpression.byPosition(xRequiredTable, query,
+      LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
+
+    assertNotResolved(parsedPlan)
+    assertAnalysisError(parsedPlan, Seq("cannot resolve", "`a`", "given input columns", "x, y"))
+  }
+}
+
 case class TestRelation(output: Seq[AttributeReference]) extends LeafNode with NamedRelation {
   override def name: String = "table-name"
 }
 
-class DataSourceV2AnalysisSuite extends AnalysisTest {
+abstract class DataSourceV2AnalysisSuite extends AnalysisTest {
   val table = TestRelation(StructType(Seq(
     StructField("x", FloatType),
     StructField("y", FloatType))).toAttributes)
@@ -40,21 +117,25 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     StructField("x", DoubleType),
     StructField("y", DoubleType))).toAttributes)
 
-  test("Append.byName: basic behavior") {
+  def byName(table: NamedRelation, query: LogicalPlan): LogicalPlan
+
+  def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan
+
+  test("byName: basic behavior") {
     val query = TestRelation(table.schema.toAttributes)
 
-    val parsedPlan = AppendData.byName(table, query)
+    val parsedPlan = byName(table, query)
 
     checkAnalysis(parsedPlan, parsedPlan)
     assertResolved(parsedPlan)
   }
 
-  test("Append.byName: does not match by position") {
+  test("byName: does not match by position") {
     val query = TestRelation(StructType(Seq(
       StructField("a", FloatType),
       StructField("b", FloatType))).toAttributes)
 
-    val parsedPlan = AppendData.byName(table, query)
+    val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -62,12 +143,12 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Cannot find data for output column", "'x'", "'y'"))
   }
 
-  test("Append.byName: case sensitive column resolution") {
+  test("byName: case sensitive column resolution") {
     val query = TestRelation(StructType(Seq(
       StructField("X", FloatType), // doesn't match case!
       StructField("y", FloatType))).toAttributes)
 
-    val parsedPlan = AppendData.byName(table, query)
+    val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -76,7 +157,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       caseSensitive = true)
   }
 
-  test("Append.byName: case insensitive column resolution") {
+  test("byName: case insensitive column resolution") {
     val query = TestRelation(StructType(Seq(
       StructField("X", FloatType), // doesn't match case!
       StructField("y", FloatType))).toAttributes)
@@ -84,8 +165,8 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     val X = query.output.head
     val y = query.output.last
 
-    val parsedPlan = AppendData.byName(table, query)
-    val expectedPlan = AppendData.byName(table,
+    val parsedPlan = byName(table, query)
+    val expectedPlan = byName(table,
       Project(Seq(
         Alias(Cast(toLower(X), FloatType, Some(conf.sessionLocalTimeZone)), "x")(),
         Alias(Cast(y, FloatType, Some(conf.sessionLocalTimeZone)), "y")()),
@@ -96,7 +177,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     assertResolved(expectedPlan)
   }
 
-  test("Append.byName: data columns are reordered by name") {
+  test("byName: data columns are reordered by name") {
     // out of order
     val query = TestRelation(StructType(Seq(
       StructField("y", FloatType),
@@ -105,8 +186,8 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     val y = query.output.head
     val x = query.output.last
 
-    val parsedPlan = AppendData.byName(table, query)
-    val expectedPlan = AppendData.byName(table,
+    val parsedPlan = byName(table, query)
+    val expectedPlan = byName(table,
       Project(Seq(
         Alias(Cast(x, FloatType, Some(conf.sessionLocalTimeZone)), "x")(),
         Alias(Cast(y, FloatType, Some(conf.sessionLocalTimeZone)), "y")()),
@@ -117,26 +198,26 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     assertResolved(expectedPlan)
   }
 
-  test("Append.byName: fail nullable data written to required columns") {
-    val parsedPlan = AppendData.byName(requiredTable, table)
+  test("byName: fail nullable data written to required columns") {
+    val parsedPlan = byName(requiredTable, table)
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
       "Cannot write incompatible data to table", "'table-name'",
       "Cannot write nullable values to non-null column", "'x'", "'y'"))
   }
 
-  test("Append.byName: allow required data written to nullable columns") {
-    val parsedPlan = AppendData.byName(table, requiredTable)
+  test("byName: allow required data written to nullable columns") {
+    val parsedPlan = byName(table, requiredTable)
     assertResolved(parsedPlan)
     checkAnalysis(parsedPlan, parsedPlan)
   }
 
-  test("Append.byName: missing required columns cause failure and are identified by name") {
+  test("byName: missing required columns cause failure and are identified by name") {
     // missing required field x
     val query = TestRelation(StructType(Seq(
       StructField("y", FloatType, nullable = false))).toAttributes)
 
-    val parsedPlan = AppendData.byName(requiredTable, query)
+    val parsedPlan = byName(requiredTable, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -144,12 +225,12 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Cannot find data for output column", "'x'"))
   }
 
-  test("Append.byName: missing optional columns cause failure and are identified by name") {
+  test("byName: missing optional columns cause failure and are identified by name") {
     // missing optional field x
     val query = TestRelation(StructType(Seq(
       StructField("y", FloatType))).toAttributes)
 
-    val parsedPlan = AppendData.byName(table, query)
+    val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -157,8 +238,8 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Cannot find data for output column", "'x'"))
   }
 
-  test("Append.byName: fail canWrite check") {
-    val parsedPlan = AppendData.byName(table, widerTable)
+  test("byName: fail canWrite check") {
+    val parsedPlan = byName(table, widerTable)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -166,12 +247,12 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Cannot safely cast", "'x'", "'y'", "DoubleType to FloatType"))
   }
 
-  test("Append.byName: insert safe cast") {
+  test("byName: insert safe cast") {
     val x = table.output.head
     val y = table.output.last
 
-    val parsedPlan = AppendData.byName(widerTable, table)
-    val expectedPlan = AppendData.byName(widerTable,
+    val parsedPlan = byName(widerTable, table)
+    val expectedPlan = byName(widerTable,
       Project(Seq(
         Alias(Cast(x, DoubleType, Some(conf.sessionLocalTimeZone)), "x")(),
         Alias(Cast(y, DoubleType, Some(conf.sessionLocalTimeZone)), "y")()),
@@ -182,13 +263,13 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     assertResolved(expectedPlan)
   }
 
-  test("Append.byName: fail extra data fields") {
+  test("byName: fail extra data fields") {
     val query = TestRelation(StructType(Seq(
       StructField("x", FloatType),
       StructField("y", FloatType),
       StructField("z", FloatType))).toAttributes)
 
-    val parsedPlan = AppendData.byName(table, query)
+    val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -197,7 +278,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Data columns: 'x', 'y', 'z'"))
   }
 
-  test("Append.byName: multiple field errors are reported") {
+  test("byName: multiple field errors are reported") {
     val xRequiredTable = TestRelation(StructType(Seq(
       StructField("x", FloatType, nullable = false),
       StructField("y", DoubleType))).toAttributes)
@@ -206,7 +287,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       StructField("x", DoubleType),
       StructField("b", FloatType))).toAttributes)
 
-    val parsedPlan = AppendData.byName(xRequiredTable, query)
+    val parsedPlan = byName(xRequiredTable, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -216,7 +297,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Cannot find data for output column", "'y'"))
   }
 
-  test("Append.byPosition: basic behavior") {
+  test("byPosition: basic behavior") {
     val query = TestRelation(StructType(Seq(
       StructField("a", FloatType),
       StructField("b", FloatType))).toAttributes)
@@ -224,8 +305,8 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     val a = query.output.head
     val b = query.output.last
 
-    val parsedPlan = AppendData.byPosition(table, query)
-    val expectedPlan = AppendData.byPosition(table,
+    val parsedPlan = byPosition(table, query)
+    val expectedPlan = byPosition(table,
       Project(Seq(
         Alias(Cast(a, FloatType, Some(conf.sessionLocalTimeZone)), "x")(),
         Alias(Cast(b, FloatType, Some(conf.sessionLocalTimeZone)), "y")()),
@@ -236,7 +317,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     assertResolved(expectedPlan)
   }
 
-  test("Append.byPosition: data columns are not reordered") {
+  test("byPosition: data columns are not reordered") {
     // out of order
     val query = TestRelation(StructType(Seq(
       StructField("y", FloatType),
@@ -245,8 +326,8 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     val y = query.output.head
     val x = query.output.last
 
-    val parsedPlan = AppendData.byPosition(table, query)
-    val expectedPlan = AppendData.byPosition(table,
+    val parsedPlan = byPosition(table, query)
+    val expectedPlan = byPosition(table,
       Project(Seq(
         Alias(Cast(y, FloatType, Some(conf.sessionLocalTimeZone)), "x")(),
         Alias(Cast(x, FloatType, Some(conf.sessionLocalTimeZone)), "y")()),
@@ -257,26 +338,26 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     assertResolved(expectedPlan)
   }
 
-  test("Append.byPosition: fail nullable data written to required columns") {
-    val parsedPlan = AppendData.byPosition(requiredTable, table)
+  test("byPosition: fail nullable data written to required columns") {
+    val parsedPlan = byPosition(requiredTable, table)
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
       "Cannot write incompatible data to table", "'table-name'",
       "Cannot write nullable values to non-null column", "'x'", "'y'"))
   }
 
-  test("Append.byPosition: allow required data written to nullable columns") {
-    val parsedPlan = AppendData.byPosition(table, requiredTable)
+  test("byPosition: allow required data written to nullable columns") {
+    val parsedPlan = byPosition(table, requiredTable)
     assertResolved(parsedPlan)
     checkAnalysis(parsedPlan, parsedPlan)
   }
 
-  test("Append.byPosition: missing required columns cause failure") {
+  test("byPosition: missing required columns cause failure") {
     // missing optional field x
     val query = TestRelation(StructType(Seq(
       StructField("y", FloatType, nullable = false))).toAttributes)
 
-    val parsedPlan = AppendData.byPosition(requiredTable, query)
+    val parsedPlan = byPosition(requiredTable, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -285,12 +366,12 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Data columns: 'y'"))
   }
 
-  test("Append.byPosition: missing optional columns cause failure") {
+  test("byPosition: missing optional columns cause failure") {
     // missing optional field x
     val query = TestRelation(StructType(Seq(
       StructField("y", FloatType))).toAttributes)
 
-    val parsedPlan = AppendData.byPosition(table, query)
+    val parsedPlan = byPosition(table, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -299,12 +380,12 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Data columns: 'y'"))
   }
 
-  test("Append.byPosition: fail canWrite check") {
+  test("byPosition: fail canWrite check") {
     val widerTable = TestRelation(StructType(Seq(
       StructField("a", DoubleType),
       StructField("b", DoubleType))).toAttributes)
 
-    val parsedPlan = AppendData.byPosition(table, widerTable)
+    val parsedPlan = byPosition(table, widerTable)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -312,7 +393,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Cannot safely cast", "'x'", "'y'", "DoubleType to FloatType"))
   }
 
-  test("Append.byPosition: insert safe cast") {
+  test("byPosition: insert safe cast") {
     val widerTable = TestRelation(StructType(Seq(
       StructField("a", DoubleType),
       StructField("b", DoubleType))).toAttributes)
@@ -320,8 +401,8 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     val x = table.output.head
     val y = table.output.last
 
-    val parsedPlan = AppendData.byPosition(widerTable, table)
-    val expectedPlan = AppendData.byPosition(widerTable,
+    val parsedPlan = byPosition(widerTable, table)
+    val expectedPlan = byPosition(widerTable,
       Project(Seq(
         Alias(Cast(x, DoubleType, Some(conf.sessionLocalTimeZone)), "a")(),
         Alias(Cast(y, DoubleType, Some(conf.sessionLocalTimeZone)), "b")()),
@@ -332,13 +413,13 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
     assertResolved(expectedPlan)
   }
 
-  test("Append.byPosition: fail extra data fields") {
+  test("byPosition: fail extra data fields") {
     val query = TestRelation(StructType(Seq(
       StructField("a", FloatType),
       StructField("b", FloatType),
       StructField("c", FloatType))).toAttributes)
 
-    val parsedPlan = AppendData.byName(table, query)
+    val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
@@ -347,7 +428,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Data columns: 'a', 'b', 'c'"))
   }
 
-  test("Append.byPosition: multiple field errors are reported") {
+  test("byPosition: multiple field errors are reported") {
     val xRequiredTable = TestRelation(StructType(Seq(
       StructField("x", FloatType, nullable = false),
       StructField("y", DoubleType))).toAttributes)
@@ -356,7 +437,7 @@ class DataSourceV2AnalysisSuite extends AnalysisTest {
       StructField("x", DoubleType),
       StructField("b", FloatType))).toAttributes)
 
-    val parsedPlan = AppendData.byPosition(xRequiredTable, query)
+    val parsedPlan = byPosition(xRequiredTable, query)
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
index 296d3e47e732..f10fd884daab 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
@@ -29,6 +29,9 @@ public interface SupportsPushDownFilters extends ScanBuilder {
 
   /**
    * Pushes down filters, and returns filters that need to be evaluated after scanning.
+   * <p>
+   * Rows should be returned from the data source if and only if all of the filters match. That is,
+   * filters must be interpreted as ANDed together.
    */
   Filter[] pushFilters(Filter[] filters);
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java
new file mode 100644
index 000000000000..8058964b662b
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.writer;
+
+/**
+ * Write builder trait for tables that support dynamic partition overwrite.
+ * <p>
+ * A write that dynamically overwrites partitions removes all existing data in each logical
+ * partition for which the write will commit new data. Any existing logical partition for which the
+ * write does not contain data will remain unchanged.
+ * <p>
+ * This is provided to implement SQL compatible with Hive table operations but is not recommended.
+ * Instead, use the {@link SupportsOverwrite overwrite by filter API} to explicitly replace data.
+ */
+public interface SupportsDynamicOverwrite extends WriteBuilder {
+  /**
+   * Configures a write to dynamically replace partitions with data committed in the write.
+   *
+   * @return this write builder for method chaining
+   */
+  WriteBuilder overwriteDynamicPartitions();
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java
new file mode 100644
index 000000000000..b443b3c3aeb4
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.writer;
+
+import org.apache.spark.sql.sources.AlwaysTrue$;
+import org.apache.spark.sql.sources.Filter;
+
+/**
+ * Write builder trait for tables that support overwrite by filter.
+ * <p>
+ * Overwriting data by filter will delete any data that matches the filter and replace it with data
+ * that is committed in the write.
+ */
+public interface SupportsOverwrite extends WriteBuilder, SupportsTruncate {
+  /**
+   * Configures a write to replace data matching the filters with data committed in the write.
+   * <p>
+   * Rows must be deleted from the data source if and only if all of the filters match. That is,
+   * filters must be interpreted as ANDed together.
+   *
+   * @param filters filters used to match data to overwrite
+   * @return this write builder for method chaining
+   */
+  WriteBuilder overwrite(Filter[] filters);
+
+  @Override
+  default WriteBuilder truncate() {
+    return overwrite(new Filter[] { AlwaysTrue$.MODULE$ });
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java
new file mode 100644
index 000000000000..69c2ba5e01a4
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.writer;
+
+/**
+ * Write builder trait for tables that support truncation.
+ * <p>
+ * Truncation removes all data in a table and replaces it with data that is committed in the write.
+ */
+public interface SupportsTruncate extends WriteBuilder {
+  /**
+   * Configures a write to replace all existing data with data committed in the write.
+   *
+   * @return this write builder for method chaining
+   */
+  WriteBuilder truncate();
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e5f947337c94..450828172b93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -25,7 +25,8 @@ import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertIntoTable, LogicalPlan, OverwriteByExpression}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation}
@@ -264,29 +265,38 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       val dsOptions = new DataSourceOptions(options.asJava)
       provider.getTable(dsOptions) match {
         case table: SupportsBatchWrite =>
-          if (mode == SaveMode.Append) {
-            val relation = DataSourceV2Relation.create(table, options)
-            runCommand(df.sparkSession, "save") {
-              AppendData.byName(relation, df.logicalPlan)
-            }
-          } else {
-            val writeBuilder = table.newWriteBuilder(dsOptions)
-              .withQueryId(UUID.randomUUID().toString)
-              .withInputDataSchema(df.logicalPlan.schema)
-            writeBuilder match {
-              case s: SupportsSaveMode =>
-                val write = s.mode(mode).buildForBatch()
-                // It can only return null with `SupportsSaveMode`. We can clean it up after
-                // removing `SupportsSaveMode`.
-                if (write != null) {
-                  runCommand(df.sparkSession, "save") {
-                    WriteToDataSourceV2(write, df.logicalPlan)
+          lazy val relation = DataSourceV2Relation.create(table, options)
+          mode match {
+            case SaveMode.Append =>
+              runCommand(df.sparkSession, "save") {
+                AppendData.byName(relation, df.logicalPlan)
+              }
+
+            case SaveMode.Overwrite =>
+              // truncate the table
+              runCommand(df.sparkSession, "save") {
+                OverwriteByExpression.byName(relation, df.logicalPlan, Literal(true))
+              }
+
+            case _ =>
+              table.newWriteBuilder(dsOptions) match {
+                case writeBuilder: SupportsSaveMode =>
+                  val write = writeBuilder.mode(mode)
+                      .withQueryId(UUID.randomUUID().toString)
+                      .withInputDataSchema(df.logicalPlan.schema)
+                      .buildForBatch()
+                  // It can only return null with `SupportsSaveMode`. We can clean it up after
+                  // removing `SupportsSaveMode`.
+                  if (write != null) {
+                    runCommand(df.sparkSession, "save") {
+                      WriteToDataSourceV2(write, df.logicalPlan)
+                    }
                   }
-                }
 
-              case _ => throw new AnalysisException(
-                s"data source ${table.name} does not support SaveMode $mode")
-            }
+                case _ =>
+                  throw new AnalysisException(
+                    s"data source ${table.name} does not support SaveMode $mode")
+              }
           }
 
         // Streaming also uses the data source V2 API. So it may be that the data source implements
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 273cc3b19302..b73dc30d6f23 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -529,6 +529,12 @@ object DataSourceStrategy {
       case expressions.Contains(a: Attribute, Literal(v: UTF8String, StringType)) =>
         Some(sources.StringContains(a.name, v.toString))
 
+      case expressions.Literal(true, BooleanType) =>
+        Some(sources.AlwaysTrue)
+
+      case expressions.Literal(false, BooleanType) =>
+        Some(sources.AlwaysFalse)
+
       case _ => None
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
new file mode 100644
index 000000000000..c8542bfe5e59
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsBatchRead, SupportsBatchWrite, Table}
+
+object DataSourceV2Implicits {
+  implicit class TableHelper(table: Table) {
+    def asBatchReadable: SupportsBatchRead = {
+      table match {
+        case support: SupportsBatchRead =>
+          support
+        case _ =>
+          throw new AnalysisException(s"Table does not support batch reads: ${table.name}")
+      }
+    }
+
+    def asBatchWritable: SupportsBatchWrite = {
+      table match {
+        case support: SupportsBatchWrite =>
+          support
+        case _ =>
+          throw new AnalysisException(s"Table does not support batch writes: ${table.name}")
+      }
+    }
+  }
+
+  implicit class OptionsHelper(options: Map[String, String]) {
+    def toDataSourceOptions: DataSourceOptions = new DataSourceOptions(options.asJava)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 47cf26dc9481..53677782c95f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -17,11 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import java.util.UUID
-
-import scala.collection.JavaConverters._
-
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
@@ -30,7 +25,6 @@ import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.types.StructType
 
 /**
  * A logical plan representing a data source v2 table.
@@ -45,26 +39,16 @@ case class DataSourceV2Relation(
     options: Map[String, String])
   extends LeafNode with MultiInstanceRelation with NamedRelation {
 
+  import DataSourceV2Implicits._
+
   override def name: String = table.name()
 
   override def simpleString(maxFields: Int): String = {
     s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
   }
 
-  def newScanBuilder(): ScanBuilder = table match {
-    case s: SupportsBatchRead =>
-      val dsOptions = new DataSourceOptions(options.asJava)
-      s.newScanBuilder(dsOptions)
-    case _ => throw new AnalysisException(s"Table is not readable: ${table.name()}")
-  }
-
-  def newWriteBuilder(schema: StructType): WriteBuilder = table match {
-    case s: SupportsBatchWrite =>
-      val dsOptions = new DataSourceOptions(options.asJava)
-      s.newWriteBuilder(dsOptions)
-        .withQueryId(UUID.randomUUID().toString)
-        .withInputDataSchema(schema)
-    case _ => throw new AnalysisException(s"Table is not writable: ${table.name()}")
+  def newScanBuilder(): ScanBuilder = {
+    table.asBatchReadable.newScanBuilder(options.toDataSourceOptions)
   }
 
   override def computeStats(): Statistics = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index d6d17d6df7b1..55d7b0a18cbc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -19,18 +19,18 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.{sources, AnalysisException, SaveMode, Strategy}
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, SubqueryExpression}
+import org.apache.spark.sql.{AnalysisException, Strategy}
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, PredicateHelper}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, Repartition}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Repartition}
 import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
+import org.apache.spark.sql.sources
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
-import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode
 
-object DataSourceV2Strategy extends Strategy {
+object DataSourceV2Strategy extends Strategy with PredicateHelper {
 
   /**
    * Pushes down filters to the data source reader
@@ -100,6 +100,7 @@ object DataSourceV2Strategy extends Strategy {
     }
   }
 
+  import DataSourceV2Implicits._
 
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
     case PhysicalOperation(project, filters, relation: DataSourceV2Relation) =>
@@ -146,14 +147,22 @@ object DataSourceV2Strategy extends Strategy {
       WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil
 
     case AppendData(r: DataSourceV2Relation, query, _) =>
-      val writeBuilder = r.newWriteBuilder(query.schema)
-      writeBuilder match {
-        case s: SupportsSaveMode =>
-          val write = s.mode(SaveMode.Append).buildForBatch()
-          assert(write != null)
-          WriteToDataSourceV2Exec(write, planLater(query)) :: Nil
-        case _ => throw new AnalysisException(s"data source ${r.name} does not support SaveMode")
-      }
+      AppendDataExec(
+        r.table.asBatchWritable, r.options.toDataSourceOptions, planLater(query)) :: Nil
+
+    case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, _) =>
+      // fail if any filter cannot be converted. correctness depends on removing all matching data.
+      val filters = splitConjunctivePredicates(deleteExpr).map {
+        filter => DataSourceStrategy.translateFilter(deleteExpr).getOrElse(
+          throw new AnalysisException(s"Cannot translate expression to source filter: $filter"))
+      }.toArray
+
+      OverwriteByExpressionExec(
+        r.table.asBatchWritable, filters, r.options.toDataSourceOptions, planLater(query)) :: Nil
+
+    case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _) =>
+      OverwritePartitionsDynamicExec(r.table.asBatchWritable,
+        r.options.toDataSourceOptions, planLater(query)) :: Nil
 
     case WriteToContinuousDataSource(writer, query) =>
       WriteToContinuousDataSourceExec(writer, planLater(query)) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 50c5e4f2ad7d..d7cb2457433b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -17,17 +17,22 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import java.util.UUID
+
 import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.executor.CommitDeniedException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsBatchWrite}
+import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriterFactory, SupportsDynamicOverwrite, SupportsOverwrite, SupportsSaveMode, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.util.{LongAccumulator, Utils}
 
 /**
@@ -42,17 +47,137 @@ case class WriteToDataSourceV2(batchWrite: BatchWrite, query: LogicalPlan)
 }
 
 /**
- * The physical plan for writing data into data source v2.
+ * Physical plan node for append into a v2 table.
+ *
+ * Rows in the output data set are appended.
+ */
+case class AppendDataExec(
+    table: SupportsBatchWrite,
+    writeOptions: DataSourceOptions,
+    query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    val batchWrite = newWriteBuilder() match {
+      case builder: SupportsSaveMode =>
+        builder.mode(SaveMode.Append).buildForBatch()
+
+      case builder =>
+        builder.buildForBatch()
+    }
+    doWrite(batchWrite)
+  }
+}
+
+/**
+ * Physical plan node for overwrite into a v2 table.
+ *
+ * Overwrites data in a table matched by a set of filters. Rows matching all of the filters will be
+ * deleted and rows in the output data set are appended.
+ *
+ * This plan is used to implement SaveMode.Overwrite. The behavior of SaveMode.Overwrite is to
+ * truncate the table -- delete all rows -- and append the output data set. This uses the filter
+ * AlwaysTrue to delete all rows.
  */
-case class WriteToDataSourceV2Exec(batchWrite: BatchWrite, query: SparkPlan)
-  extends UnaryExecNode {
+case class OverwriteByExpressionExec(
+    table: SupportsBatchWrite,
+    deleteWhere: Array[Filter],
+    writeOptions: DataSourceOptions,
+    query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
+
+  private def isTruncate(filters: Array[Filter]): Boolean = {
+    filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
+  }
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    val batchWrite = newWriteBuilder() match {
+      case builder: SupportsTruncate if isTruncate(deleteWhere) =>
+        builder.truncate().buildForBatch()
+
+      case builder: SupportsSaveMode if isTruncate(deleteWhere) =>
+        builder.mode(SaveMode.Overwrite).buildForBatch()
+
+      case builder: SupportsOverwrite =>
+        builder.overwrite(deleteWhere).buildForBatch()
+
+      case _ =>
+        throw new SparkException(s"Table does not support dynamic partition overwrite: $table")
+    }
+
+    doWrite(batchWrite)
+  }
+}
+
+/**
+ * Physical plan node for dynamic partition overwrite into a v2 table.
+ *
+ * Dynamic partition overwrite is the behavior of Hive INSERT OVERWRITE ... PARTITION queries, and
+ * Spark INSERT OVERWRITE queries when spark.sql.sources.partitionOverwriteMode=dynamic. Each
+ * partition in the output data set replaces the corresponding existing partition in the table or
+ * creates a new partition. Existing partitions for which there is no data in the output data set
+ * are not modified.
+ */
+case class OverwritePartitionsDynamicExec(
+    table: SupportsBatchWrite,
+    writeOptions: DataSourceOptions,
+    query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    val batchWrite = newWriteBuilder() match {
+      case builder: SupportsDynamicOverwrite =>
+        builder.overwriteDynamicPartitions().buildForBatch()
+
+      case builder: SupportsSaveMode =>
+        builder.mode(SaveMode.Overwrite).buildForBatch()
+
+      case _ =>
+        throw new SparkException(s"Table does not support dynamic partition overwrite: $table")
+    }
+
+    doWrite(batchWrite)
+  }
+}
+
+case class WriteToDataSourceV2Exec(
+    batchWrite: BatchWrite,
+    query: SparkPlan
+  ) extends V2TableWriteExec {
+
+  import DataSourceV2Implicits._
+
+  def writeOptions: DataSourceOptions = Map.empty[String, String].toDataSourceOptions
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    doWrite(batchWrite)
+  }
+}
+
+/**
+ * Helper for physical plans that build batch writes.
+ */
+trait BatchWriteHelper {
+  def table: SupportsBatchWrite
+  def query: SparkPlan
+  def writeOptions: DataSourceOptions
+
+  def newWriteBuilder(): WriteBuilder = {
+    table.newWriteBuilder(writeOptions)
+        .withInputDataSchema(query.schema)
+        .withQueryId(UUID.randomUUID().toString)
+  }
+}
+
+/**
+ * The base physical plan for writing data into data source v2.
+ */
+trait V2TableWriteExec extends UnaryExecNode {
+  def query: SparkPlan
 
   var commitProgress: Option[StreamWriterCommitProgress] = None
 
   override def child: SparkPlan = query
   override def output: Seq[Attribute] = Nil
 
-  override protected def doExecute(): RDD[InternalRow] = {
+  protected def doWrite(batchWrite: BatchWrite): RDD[InternalRow] = {
     val writerFactory = batchWrite.createBatchWriterFactory()
     val useCommitCoordinator = batchWrite.useCommitCoordinator
     val rdd = query.execute()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
index 3f941cc6e107..a1ab55a7185c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.sources
 
-import org.apache.spark.annotation.Stable
+import org.apache.spark.annotation.{Evolving, Stable}
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // This file defines all the filters that we can push down to the data sources.
@@ -218,3 +218,27 @@ case class StringEndsWith(attribute: String, value: String) extends Filter {
 case class StringContains(attribute: String, value: String) extends Filter {
   override def references: Array[String] = Array(attribute)
 }
+
+/**
+ * A filter that always evaluates to `true`.
+ */
+@Evolving
+case class AlwaysTrue() extends Filter {
+  override def references: Array[String] = Array.empty
+}
+
+@Evolving
+object AlwaysTrue extends AlwaysTrue {
+}
+
+/**
+ * A filter that always evaluates to `false`.
+ */
+@Evolving
+case class AlwaysFalse() extends Filter {
+  override def references: Array[String] = Array.empty
+}
+
+@Evolving
+object AlwaysFalse extends AlwaysFalse {
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index 511fdfe5c23a..6b5c45e40ab0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -351,19 +351,21 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("SPARK-25700: do not read schema when writing in other modes except append mode") {
+  test("SPARK-25700: do not read schema when writing in other modes except append and overwrite") {
     withTempPath { file =>
       val cls = classOf[SimpleWriteOnlyDataSource]
       val path = file.getCanonicalPath
       val df = spark.range(5).select('id as 'i, -'id as 'j)
       // non-append mode should not throw exception, as they don't access schema.
       df.write.format(cls.getName).option("path", path).mode("error").save()
-      df.write.format(cls.getName).option("path", path).mode("overwrite").save()
       df.write.format(cls.getName).option("path", path).mode("ignore").save()
-      // append mode will access schema and should throw exception.
+      // append and overwrite modes will access the schema and should throw exception.
       intercept[SchemaReadAttemptException] {
         df.write.format(cls.getName).option("path", path).mode("append").save()
       }
+      intercept[SchemaReadAttemptException] {
+        df.write.format(cls.getName).option("path", path).mode("overwrite").save()
+      }
     }
   }
 }

From 7f53116f77bac6302bb727769b4a4c684b6b0b5b Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 17 Feb 2019 23:35:45 -0800
Subject: [PATCH 0479/1072] [SPARK-24570][SQL] Implement Spark own
 GetTablesOperation to fix SQL client tools cannot show tables

## What changes were proposed in this pull request?

For SQL client tools([DBeaver](https://dbeaver.io/))'s Navigator use [`GetTablesOperation`](https://github.com/apache/spark/blob/a7444570764b0a08b7e908dc7931744f9dbdf3c6/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java) to obtain table names.

We should use [`metadataHive`](https://github.com/apache/spark/blob/95d172da2b370ff6257bfd6fcd102ac553f6f6af/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala#L52-L53), but it use [`executionHive`](https://github.com/apache/spark/blob/24f5bbd770033dacdea62555488bfffb61665279/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala#L93-L95).

This PR implement Spark own `GetTablesOperation` to use `metadataHive`.

## How was this patch tested?

unit test and manual tests

![image](https://user-images.githubusercontent.com/5399861/47430696-acf77980-d7cc-11e8-824d-f28d78f60a00.png)
![image](https://user-images.githubusercontent.com/5399861/47440576-09649400-d7e1-11e8-97a8-a96f73f70361.png)

Closes #22794 from wangyum/SPARK-24570.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../cli/operation/GetTablesOperation.java     |  2 +-
 .../SparkGetTablesOperation.scala             | 99 +++++++++++++++++++
 .../server/SparkSQLOperationManager.scala     | 22 ++++-
 .../HiveThriftServer2Suites.scala             |  2 +-
 .../SparkMetadataOperationSuite.scala         | 87 +++++++++++++++-
 5 files changed, 206 insertions(+), 6 deletions(-)
 create mode 100644 sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala

diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
index 1a7ca79163d7..2af17a662a29 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
@@ -46,7 +46,7 @@ public class GetTablesOperation extends MetadataOperation {
   private final String schemaName;
   private final String tableName;
   private final List<String> tableTypes = new ArrayList<String>();
-  private final RowSet rowSet;
+  protected final RowSet rowSet;
   private final TableTypeMapping tableTypeMapping;
 
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
new file mode 100644
index 000000000000..369650047b10
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.util.{List => JList}
+
+import scala.collection.JavaConverters.seqAsJavaListConverter
+
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils
+import org.apache.hive.service.cli._
+import org.apache.hive.service.cli.operation.GetTablesOperation
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
+
+/**
+ * Spark's own GetTablesOperation
+ *
+ * @param sqlContext SQLContext to use
+ * @param parentSession a HiveSession from SessionManager
+ * @param catalogName catalog name. null if not applicable
+ * @param schemaName database name, null or a concrete database name
+ * @param tableName table name pattern
+ * @param tableTypes list of allowed table types, e.g. "TABLE", "VIEW"
+ */
+private[hive] class SparkGetTablesOperation(
+    sqlContext: SQLContext,
+    parentSession: HiveSession,
+    catalogName: String,
+    schemaName: String,
+    tableName: String,
+    tableTypes: JList[String])
+  extends GetTablesOperation(parentSession, catalogName, schemaName, tableName, tableTypes) {
+
+  if (tableTypes != null) {
+    this.tableTypes.addAll(tableTypes)
+  }
+
+  override def runInternal(): Unit = {
+    setState(OperationState.RUNNING)
+    // Always use the latest class loader provided by executionHive's state.
+    val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
+    Thread.currentThread().setContextClassLoader(executionHiveClassLoader)
+
+    val catalog = sqlContext.sessionState.catalog
+    val schemaPattern = convertSchemaPattern(schemaName)
+    val matchingDbs = catalog.listDatabases(schemaPattern)
+
+    if (isAuthV2Enabled) {
+      val privObjs =
+        HivePrivilegeObjectUtils.getHivePrivDbObjects(seqAsJavaListConverter(matchingDbs).asJava)
+      val cmdStr = s"catalog : $catalogName, schemaPattern : $schemaName"
+      authorizeMetaGets(HiveOperationType.GET_TABLES, privObjs, cmdStr)
+    }
+
+    val tablePattern = convertIdentifierPattern(tableName, true)
+    matchingDbs.foreach { dbName =>
+      catalog.listTables(dbName, tablePattern).foreach { tableIdentifier =>
+        val catalogTable = catalog.getTableMetadata(tableIdentifier)
+        val tableType = tableTypeString(catalogTable.tableType)
+        if (tableTypes == null || tableTypes.isEmpty || tableTypes.contains(tableType)) {
+          val rowData = Array[AnyRef](
+            "",
+            catalogTable.database,
+            catalogTable.identifier.table,
+            tableType,
+            catalogTable.comment.getOrElse(""))
+          rowSet.addRow(rowData)
+        }
+      }
+    }
+    setState(OperationState.FINISHED)
+  }
+
+  private def tableTypeString(tableType: CatalogTableType): String = tableType match {
+    case EXTERNAL | MANAGED => "TABLE"
+    case VIEW => "VIEW"
+    case t =>
+      throw new IllegalArgumentException(s"Unknown table type is found at showCreateHiveTable: $t")
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
index 85b6c7134755..7947d1785a8f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -17,17 +17,17 @@
 
 package org.apache.spark.sql.hive.thriftserver.server
 
-import java.util.{Map => JMap}
+import java.util.{List => JList, Map => JMap}
 import java.util.concurrent.ConcurrentHashMap
 
 import org.apache.hive.service.cli._
-import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, GetSchemasOperation, Operation, OperationManager}
+import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, GetSchemasOperation, MetadataOperation, Operation, OperationManager}
 import org.apache.hive.service.cli.session.HiveSession
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.hive.thriftserver.{ReflectionUtils, SparkExecuteStatementOperation, SparkGetSchemasOperation}
+import org.apache.spark.sql.hive.thriftserver.{ReflectionUtils, SparkExecuteStatementOperation, SparkGetSchemasOperation, SparkGetTablesOperation}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -76,6 +76,22 @@ private[thriftserver] class SparkSQLOperationManager()
     operation
   }
 
+  override def newGetTablesOperation(
+      parentSession: HiveSession,
+      catalogName: String,
+      schemaName: String,
+      tableName: String,
+      tableTypes: JList[String]): MetadataOperation = synchronized {
+    val sqlContext = sessionToContexts.get(parentSession.getSessionHandle)
+    require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" +
+      " initialized or had already closed.")
+    val operation = new SparkGetTablesOperation(sqlContext, parentSession,
+      catalogName, schemaName, tableName, tableTypes)
+    handleToOperation.put(operation.getHandle, operation)
+    logDebug(s"Created GetTablesOperation with session=$parentSession.")
+    operation
+  }
+
   def setConfMap(conf: SQLConf, confMap: java.util.Map[String, String]): Unit = {
     val iterator = confMap.entrySet().iterator()
     while (iterator.hasNext) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index f9509aed4aaa..0f53fcd327f1 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -280,7 +280,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
     var defaultV2: String = null
     var data: ArrayBuffer[Int] = null
 
-    withMultipleConnectionJdbcStatement("test_map")(
+    withMultipleConnectionJdbcStatement("test_map", "db1.test_map2")(
       // create table
       { statement =>
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index 9a997ae01df9..bf9982388d6b 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.util.Properties
+import java.util.{Arrays => JArrays, List => JList, Properties}
 
 import org.apache.hive.jdbc.{HiveConnection, HiveQueryResultSet, Utils => JdbcUtils}
 import org.apache.hive.service.auth.PlainSaslHelper
@@ -100,4 +100,89 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
       }
     }
   }
+
+  test("Spark's own GetTablesOperation(SparkGetTablesOperation)") {
+    def testGetTablesOperation(
+        schema: String,
+        tableNamePattern: String,
+        tableTypes: JList[String])(f: HiveQueryResultSet => Unit): Unit = {
+      val rawTransport = new TSocket("localhost", serverPort)
+      val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties)
+      val user = System.getProperty("user.name")
+      val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
+      val client = new TCLIService.Client(new TBinaryProtocol(transport))
+      transport.open()
+
+      var rs: HiveQueryResultSet = null
+
+      try {
+        val openResp = client.OpenSession(new TOpenSessionReq)
+        val sessHandle = openResp.getSessionHandle
+
+        val getTableReq = new TGetTablesReq(sessHandle)
+        getTableReq.setSchemaName(schema)
+        getTableReq.setTableName(tableNamePattern)
+        getTableReq.setTableTypes(tableTypes)
+
+        val getTableResp = client.GetTables(getTableReq)
+
+        JdbcUtils.verifySuccess(getTableResp.getStatus)
+
+        rs = new HiveQueryResultSet.Builder(connection)
+          .setClient(client)
+          .setSessionHandle(sessHandle)
+          .setStmtHandle(getTableResp.getOperationHandle)
+          .build()
+
+        f(rs)
+      } finally {
+        rs.close()
+        connection.close()
+        transport.close()
+        rawTransport.close()
+      }
+    }
+
+    def checkResult(tableNames: Seq[String], rs: HiveQueryResultSet): Unit = {
+      if (tableNames.nonEmpty) {
+        for (i <- tableNames.indices) {
+          assert(rs.next())
+          assert(rs.getString("TABLE_NAME") === tableNames(i))
+        }
+      } else {
+        assert(!rs.next())
+      }
+    }
+
+    withJdbcStatement("table1", "table2") { statement =>
+      Seq(
+        "CREATE TABLE table1(key INT, val STRING)",
+        "CREATE TABLE table2(key INT, val STRING)",
+        "CREATE VIEW view1 AS SELECT * FROM table2").foreach(statement.execute)
+
+      testGetTablesOperation("%", "%", null) { rs =>
+        checkResult(Seq("table1", "table2", "view1"), rs)
+      }
+
+      testGetTablesOperation("%", "table1", null) { rs =>
+        checkResult(Seq("table1"), rs)
+      }
+
+      testGetTablesOperation("%", "table_not_exist", null) { rs =>
+        checkResult(Seq.empty, rs)
+      }
+
+      testGetTablesOperation("%", "%", JArrays.asList("TABLE")) { rs =>
+        checkResult(Seq("table1", "table2"), rs)
+      }
+
+      testGetTablesOperation("%", "%", JArrays.asList("VIEW")) { rs =>
+        checkResult(Seq("view1"), rs)
+      }
+
+      testGetTablesOperation("%", "%", JArrays.asList("TABLE", "VIEW")) { rs =>
+        checkResult(Seq("table1", "table2", "view1"), rs)
+      }
+    }
+  }
 }

From 8290e5eccb22e6b865075a0fe62772ce072f0a08 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Mon, 18 Feb 2019 17:20:58 +0800
Subject: [PATCH 0480/1072] [SPARK-26353][SQL] Add typed aggregate
 functions(max/min) to the example module.

## What changes were proposed in this pull request?

Add typed aggregate functions(max/min) to the example module.

## How was this patch tested?
Manual testing:
running typed minimum:

```
+-----+----------------------+
|value|TypedMin(scala.Tuple2)|
+-----+----------------------+
|    0|                 [0.0]|
|    2|                 [2.0]|
|    1|                 [1.0]|
+-----+----------------------+
```

running typed maximum:

```
+-----+----------------------+
|value|TypedMax(scala.Tuple2)|
+-----+----------------------+
|    0|                  [18]|
|    2|                  [17]|
|    1|                  [19]|
+-----+----------------------+
```

Closes #23304 from 10110346/typedminmax.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../examples/sql/SimpleTypedAggregator.scala  | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SimpleTypedAggregator.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SimpleTypedAggregator.scala
index f8af91980bde..5510f0019353 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SimpleTypedAggregator.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SimpleTypedAggregator.scala
@@ -44,6 +44,12 @@ object SimpleTypedAggregator {
     println("running typed average:")
     ds.groupByKey(_._1).agg(new TypedAverage[(Long, Long)](_._2.toDouble).toColumn).show()
 
+    println("running typed minimum:")
+    ds.groupByKey(_._1).agg(new TypedMin[(Long, Long)](_._2.toDouble).toColumn).show()
+
+    println("running typed maximum:")
+    ds.groupByKey(_._1).agg(new TypedMax[(Long, Long)](_._2).toColumn).show()
+
     spark.stop()
   }
 }
@@ -84,3 +90,71 @@ class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long
   }
   override def outputEncoder: Encoder[Double] = Encoders.scalaDouble
 }
+
+class TypedMin[IN](val f: IN => Double) extends Aggregator[IN, MutableDouble, Option[Double]] {
+  override def zero: MutableDouble = null
+  override def reduce(b: MutableDouble, a: IN): MutableDouble = {
+    if (b == null) {
+      new MutableDouble(f(a))
+    } else {
+      b.value = math.min(b.value, f(a))
+      b
+    }
+  }
+  override def merge(b1: MutableDouble, b2: MutableDouble): MutableDouble = {
+    if (b1 == null) {
+      b2
+    } else if (b2 == null) {
+      b1
+    } else {
+      b1.value = math.min(b1.value, b2.value)
+      b1
+    }
+  }
+  override def finish(reduction: MutableDouble): Option[Double] = {
+    if (reduction != null) {
+      Some(reduction.value)
+    } else {
+      None
+    }
+  }
+
+  override def bufferEncoder: Encoder[MutableDouble] = Encoders.kryo[MutableDouble]
+  override def outputEncoder: Encoder[Option[Double]] = Encoders.product[Option[Double]]
+}
+
+class TypedMax[IN](val f: IN => Long) extends Aggregator[IN, MutableLong, Option[Long]] {
+  override def zero: MutableLong = null
+  override def reduce(b: MutableLong, a: IN): MutableLong = {
+    if (b == null) {
+      new MutableLong(f(a))
+    } else {
+      b.value = math.max(b.value, f(a))
+      b
+    }
+  }
+  override def merge(b1: MutableLong, b2: MutableLong): MutableLong = {
+    if (b1 == null) {
+      b2
+    } else if (b2 == null) {
+      b1
+    } else {
+      b1.value = math.max(b1.value, b2.value)
+      b1
+    }
+  }
+  override def finish(reduction: MutableLong): Option[Long] = {
+    if (reduction != null) {
+      Some(reduction.value)
+    } else {
+      None
+    }
+  }
+
+  override def bufferEncoder: Encoder[MutableLong] = Encoders.kryo[MutableLong]
+  override def outputEncoder: Encoder[Option[Long]] = Encoders.product[Option[Long]]
+}
+
+class MutableLong(var value: Long) extends Serializable
+
+class MutableDouble(var value: Double) extends Serializable

From 59eb34b82c023ac56dcd08a4ceccdf612bfa7f29 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Mon, 18 Feb 2019 17:22:06 +0800
Subject: [PATCH 0481/1072] [SPARK-26889][SS][DOCS] Fix timestamp type in
 Structured Streaming + Kafka Integration Guide

## What changes were proposed in this pull request?

```
$ spark-shell --packages org.apache.spark:spark-sql-kafka-0-10_2.11:3.0.0-SNAPSHOT
...
scala> val df = spark.read.format("kafka").option("kafka.bootstrap.servers", "foo").option("subscribe", "bar").load().printSchema()
root
 |-- key: binary (nullable = true)
 |-- value: binary (nullable = true)
 |-- topic: string (nullable = true)
 |-- partition: integer (nullable = true)
 |-- offset: long (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- timestampType: integer (nullable = true)

df: Unit = ()
```
In the doc timestamp type is `long` and in this PR I've changed it to `timestamp`.

## How was this patch tested?

cd docs/
SKIP_API=1 jekyll build
Manual webpage check.

Closes #23796 from gaborgsomogyi/SPARK-26889.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/structured-streaming-kafka-integration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index c19aa5c504b0..425110a8bfa5 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -265,7 +265,7 @@ Each row in the source has the following schema:
 </tr>
 <tr>
   <td>timestamp</td>
-  <td>long</td>
+  <td>timestamp</td>
 </tr>
 <tr>
   <td>timestampType</td>

From a0e81fcfe8a6dbb246f8b170b6f5e203ab194d7e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 18 Feb 2019 21:13:00 +0800
Subject: [PATCH 0482/1072] [SPARK-26744][SPARK-26744][SQL][HOTFOX] Disable
 schema validation tests for FileDataSourceV2 (partially revert )

## What changes were proposed in this pull request?

This PR partially revert SPARK-26744.

https://github.com/apache/spark/commit/60caa92deaf6941f58da82dcc0962ebf3a598ced and https://github.com/apache/spark/commit/4dce45a5992e6a89a26b5a0739b33cfeaf979208 were merged at similar time range independently. So the test failures were not caught.

- https://github.com/apache/spark/commit/60caa92deaf6941f58da82dcc0962ebf3a598ced happened to add a schema reading logic in writing path for overwrite mode as well.

- https://github.com/apache/spark/commit/4dce45a5992e6a89a26b5a0739b33cfeaf979208 added some tests with overwrite modes with migrated ORC v2.

And the tests looks starting to fail.

I guess the discussion won't be short (see https://github.com/apache/spark/pull/23606#discussion_r257675083) and this PR proposes to disable the tests added at https://github.com/apache/spark/commit/4dce45a5992e6a89a26b5a0739b33cfeaf979208 to unblock other PRs for now.

## How was this patch tested?

Existing tests.

Closes #23828 from HyukjinKwon/SPARK-26744.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index e0c0484593d9..58522f7b1376 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -329,7 +329,7 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
   test("SPARK-24204 error handling for unsupported Interval data types - csv, json, parquet, orc") {
     withTempDir { dir =>
       val tempDir = new File(dir, "files").getCanonicalPath
-      Seq(true, false).foreach { useV1 =>
+      Seq(true).foreach { useV1 =>
         val useV1List = if (useV1) {
           "orc"
         } else {
@@ -374,7 +374,7 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
   }
 
   test("SPARK-24204 error handling for unsupported Null data types - csv, parquet, orc") {
-    Seq(true, false).foreach { useV1 =>
+    Seq(true).foreach { useV1 =>
       val useV1List = if (useV1) {
         "orc"
       } else {

From f85ed9a3e55083b0de0e20a37775efa92d248a4f Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 18 Feb 2019 16:17:24 -0800
Subject: [PATCH 0483/1072] [SPARK-26785][SQL] data source v2 API refactor:
 streaming write

## What changes were proposed in this pull request?

Continue the API refactor for streaming write, according to the [doc](https://docs.google.com/document/d/1vI26UEuDpVuOjWw4WPoH2T6y8WAekwtI7qoowhOFnI4/edit?usp=sharing).

The major changes:
1. rename `StreamingWriteSupport` to `StreamingWrite`
2. add `WriteBuilder.buildForStreaming`
3. update existing sinks, to move the creation of `StreamingWrite` to `Table`

## How was this patch tested?

existing tests

Closes #23702 from cloud-fan/stream-write.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/kafka010/KafkaSourceProvider.scala    | 42 +++++----
 ...upport.scala => KafkaStreamingWrite.scala} |  8 +-
 .../sql/sources/v2/SessionConfigSupport.java  |  4 +-
 .../v2/StreamingWriteSupportProvider.java     | 54 ------------
 .../sql/sources/v2/SupportsBatchWrite.java    |  2 +-
 .../sources/v2/SupportsStreamingWrite.java    | 33 +++++++
 .../spark/sql/sources/v2/TableProvider.java   |  3 +-
 .../sql/sources/v2/writer/WriteBuilder.java   |  9 +-
 .../v2/writer/WriterCommitMessage.java        |  4 +-
 .../streaming/StreamingDataWriterFactory.java |  2 +-
 ...gWriteSupport.java => StreamingWrite.java} | 21 ++++-
 .../streaming/SupportsOutputMode.java}        | 17 ++--
 .../apache/spark/sql/DataFrameReader.scala    |  2 +-
 .../datasources/noop/NoopDataSource.scala     | 26 ++----
 .../v2/DataSourceV2StringFormat.scala         | 88 -------------------
 .../datasources/v2/DataSourceV2Utils.scala    | 43 ++++-----
 .../streaming/MicroBatchExecution.scala       | 20 +++--
 .../streaming/StreamingRelation.scala         |  6 +-
 .../sql/execution/streaming/console.scala     | 43 ++++++---
 .../continuous/ContinuousExecution.scala      | 25 +++---
 .../continuous/EpochCoordinator.scala         |  6 +-
 .../WriteToContinuousDataSource.scala         |  6 +-
 .../WriteToContinuousDataSourceExec.scala     | 13 +--
 ...eWriteSupport.scala => ConsoleWrite.scala} |  6 +-
 ...rovider.scala => ForeachWriterTable.scala} | 76 +++++++++-------
 .../streaming/sources/MicroBatchWrite.scala   |  4 +-
 .../sources/RateStreamProvider.scala          |  3 +-
 .../sources/TextSocketSourceProvider.scala    |  3 +-
 .../streaming/sources/memoryV2.scala          | 42 ++++++---
 .../sql/streaming/DataStreamReader.scala      |  2 +-
 .../sql/streaming/DataStreamWriter.scala      | 50 ++++++-----
 .../sql/streaming/StreamingQueryManager.scala |  4 +-
 ...pache.spark.sql.sources.DataSourceRegister |  2 +-
 .../streaming/MemorySinkV2Suite.scala         |  6 +-
 .../sources/v2/DataSourceV2UtilsSuite.scala   |  4 +-
 .../sources/v2/SimpleWritableDataSource.scala |  3 +-
 .../ContinuousQueuedDataReaderSuite.scala     |  4 +-
 .../continuous/EpochCoordinatorSuite.scala    |  6 +-
 .../sources/StreamingDataSourceV2Suite.scala  | 70 +++++++++------
 39 files changed, 373 insertions(+), 389 deletions(-)
 rename external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/{KafkaStreamingWriteSupport.scala => KafkaStreamingWrite.scala} (95%)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/StreamingWriteSupportProvider.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
 rename sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/{StreamingWriteSupport.java => StreamingWrite.java} (73%)
 rename sql/core/src/main/java/org/apache/spark/sql/sources/v2/{DataSourceV2.java => writer/streaming/SupportsOutputMode.java} (67%)
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StringFormat.scala
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/{ConsoleWriteSupport.scala => ConsoleWrite.scala} (94%)
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/{ForeachWriteSupportProvider.scala => ForeachWriterTable.scala} (66%)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 9238899b0c00..6994517b27d6 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -33,7 +33,8 @@ import org.apache.spark.sql.sources._
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingWrite, SupportsOutputMode}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
@@ -47,7 +48,6 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     with StreamSinkProvider
     with RelationProvider
     with CreatableRelationProvider
-    with StreamingWriteSupportProvider
     with TableProvider
     with Logging {
   import KafkaSourceProvider._
@@ -180,20 +180,6 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     }
   }
 
-  override def createStreamingWriteSupport(
-      queryId: String,
-      schema: StructType,
-      mode: OutputMode,
-      options: DataSourceOptions): StreamingWriteSupport = {
-    import scala.collection.JavaConverters._
-
-    val topic = Option(options.get(TOPIC_OPTION_KEY).orElse(null)).map(_.trim)
-    // We convert the options argument from V2 -> Java map -> scala mutable -> scala immutable.
-    val producerParams = kafkaParamsForProducer(options.asMap.asScala.toMap)
-
-    new KafkaStreamingWriteSupport(topic, producerParams, schema)
-  }
-
   private def strategy(caseInsensitiveParams: Map[String, String]) =
       caseInsensitiveParams.find(x => STRATEGY_OPTION_KEYS.contains(x._1)).get match {
     case ("assign", value) =>
@@ -365,7 +351,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
   }
 
   class KafkaTable(strategy: => ConsumerStrategy) extends Table
-    with SupportsMicroBatchRead with SupportsContinuousRead {
+    with SupportsMicroBatchRead with SupportsContinuousRead with SupportsStreamingWrite {
 
     override def name(): String = s"Kafka $strategy"
 
@@ -374,6 +360,28 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new ScanBuilder {
       override def build(): Scan = new KafkaScan(options)
     }
+
+    override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+      new WriteBuilder with SupportsOutputMode {
+        private var inputSchema: StructType = _
+
+        override def withInputDataSchema(schema: StructType): WriteBuilder = {
+          this.inputSchema = schema
+          this
+        }
+
+        override def outputMode(mode: OutputMode): WriteBuilder = this
+
+        override def buildForStreaming(): StreamingWrite = {
+          import scala.collection.JavaConverters._
+
+          assert(inputSchema != null)
+          val topic = Option(options.get(TOPIC_OPTION_KEY).orElse(null)).map(_.trim)
+          val producerParams = kafkaParamsForProducer(options.asMap.asScala.toMap)
+          new KafkaStreamingWrite(topic, producerParams, inputSchema)
+        }
+      }
+    }
   }
 
   class KafkaScan(options: DataSourceOptions) extends Scan {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWriteSupport.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
similarity index 95%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWriteSupport.scala
rename to external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
index 0d831c388460..e3101e157208 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWriteSupport.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.kafka010.KafkaWriter.validateQuery
 import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWriteSupport}
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -33,18 +33,18 @@ import org.apache.spark.sql.types.StructType
 case object KafkaWriterCommitMessage extends WriterCommitMessage
 
 /**
- * A [[StreamingWriteSupport]] for Kafka writing. Responsible for generating the writer factory.
+ * A [[StreamingWrite]] for Kafka writing. Responsible for generating the writer factory.
  *
  * @param topic The topic this writer is responsible for. If None, topic will be inferred from
  *              a `topic` field in the incoming data.
  * @param producerParams Parameters for Kafka producers in each task.
  * @param schema The schema of the input data.
  */
-class KafkaStreamingWriteSupport(
+class KafkaStreamingWrite(
     topic: Option[String],
     producerParams: ju.Map[String, Object],
     schema: StructType)
-  extends StreamingWriteSupport {
+  extends StreamingWrite {
 
   validateQuery(schema.toAttributes, producerParams, topic)
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
index c00abd9b685b..d27fbfdd1461 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
@@ -20,12 +20,12 @@
 import org.apache.spark.annotation.Evolving;
 
 /**
- * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
+ * A mix-in interface for {@link TableProvider}. Data sources can implement this interface to
  * propagate session configs with the specified key-prefix to all data source operations in this
  * session.
  */
 @Evolving
-public interface SessionConfigSupport extends DataSourceV2 {
+public interface SessionConfigSupport extends TableProvider {
 
   /**
    * Key prefix of the session configs to propagate, which is usually the data source name. Spark
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/StreamingWriteSupportProvider.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/StreamingWriteSupportProvider.java
deleted file mode 100644
index 8ac9c5175086..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/StreamingWriteSupportProvider.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.streaming.BaseStreamingSink;
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport;
-import org.apache.spark.sql.streaming.OutputMode;
-import org.apache.spark.sql.types.StructType;
-
-/**
- * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
- * provide data writing ability for structured streaming.
- *
- * This interface is used to create {@link StreamingWriteSupport} instances when end users run
- * {@code Dataset.writeStream.format(...).option(...).start()}.
- */
-@Evolving
-public interface StreamingWriteSupportProvider extends DataSourceV2, BaseStreamingSink {
-
-  /**
-   * Creates a {@link StreamingWriteSupport} instance to save the data to this data source, which is
-   * called by Spark at the beginning of each streaming query.
-   *
-   * @param queryId A unique string for the writing query. It's possible that there are many
-   *                writing queries running at the same time, and the returned
-   *                {@link StreamingWriteSupport} can use this id to distinguish itself from others.
-   * @param schema the schema of the data to be written.
-   * @param mode the output mode which determines what successive epoch output means to this
-   *             sink, please refer to {@link OutputMode} for more details.
-   * @param options the options for the returned data source writer, which is an immutable
-   *                case-insensitive string-to-string map.
-   */
-  StreamingWriteSupport createStreamingWriteSupport(
-      String queryId,
-      StructType schema,
-      OutputMode mode,
-      DataSourceOptions options);
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
index 08caadd5308e..b2cd97a2f533 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
@@ -24,7 +24,7 @@
  * An empty mix-in interface for {@link Table}, to indicate this table supports batch write.
  * <p>
  * If a {@link Table} implements this interface, the
- * {@link SupportsWrite#newWriteBuilder(DataSourceOptions)}  must return a {@link WriteBuilder}
+ * {@link SupportsWrite#newWriteBuilder(DataSourceOptions)} must return a {@link WriteBuilder}
  * with {@link WriteBuilder#buildForBatch()} implemented.
  * </p>
  */
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
new file mode 100644
index 000000000000..1050d35250c1
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.execution.streaming.BaseStreamingSink;
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
+
+/**
+ * An empty mix-in interface for {@link Table}, to indicate this table supports streaming write.
+ * <p>
+ * If a {@link Table} implements this interface, the
+ * {@link SupportsWrite#newWriteBuilder(DataSourceOptions)} must return a {@link WriteBuilder}
+ * with {@link WriteBuilder#buildForStreaming()} implemented.
+ * </p>
+ */
+@Evolving
+public interface SupportsStreamingWrite extends SupportsWrite, BaseStreamingSink { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
index 855d5efe0c69..a9b83b6de995 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
@@ -29,8 +29,7 @@
  * </p>
  */
 @Evolving
-// TODO: do not extend `DataSourceV2`, after we finish the API refactor completely.
-public interface TableProvider extends DataSourceV2 {
+public interface TableProvider {
 
   /**
    * Return a {@link Table} instance to do read/write with user-specified options.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
index e861c72af9e6..07529fe1dee9 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
@@ -20,6 +20,7 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.v2.SupportsBatchWrite;
 import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite;
 import org.apache.spark.sql.types.StructType;
 
 /**
@@ -64,6 +65,12 @@ default WriteBuilder withInputDataSchema(StructType schema) {
    * {@link SupportsSaveMode}.
    */
   default BatchWrite buildForBatch() {
-    throw new UnsupportedOperationException("Batch scans are not supported");
+    throw new UnsupportedOperationException(getClass().getName() +
+      " does not support batch write");
+  }
+
+  default StreamingWrite buildForStreaming() {
+    throw new UnsupportedOperationException(getClass().getName() +
+      " does not support streaming write");
   }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
index 6334c8f64309..23e8580c404d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
@@ -20,12 +20,12 @@
 import java.io.Serializable;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport;
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite;
 
 /**
  * A commit message returned by {@link DataWriter#commit()} and will be sent back to the driver side
  * as the input parameter of {@link BatchWrite#commit(WriterCommitMessage[])} or
- * {@link StreamingWriteSupport#commit(long, WriterCommitMessage[])}.
+ * {@link StreamingWrite#commit(long, WriterCommitMessage[])}.
  *
  * This is an empty interface, data sources should define their own message class and use it when
  * generating messages at executor side and handling the messages at driver side.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java
index 7d3d21cb2b63..af2f03c9d419 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java
@@ -26,7 +26,7 @@
 
 /**
  * A factory of {@link DataWriter} returned by
- * {@link StreamingWriteSupport#createStreamingWriterFactory()}, which is responsible for creating
+ * {@link StreamingWrite#createStreamingWriterFactory()}, which is responsible for creating
  * and initializing the actual data writer at executor side.
  *
  * Note that, the writer factory will be serialized and sent to executors, then the data writer
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWriteSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java
similarity index 73%
rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWriteSupport.java
rename to sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java
index 84cfbf2dda48..5617f1cdc0ef 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWriteSupport.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java
@@ -22,13 +22,26 @@
 import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
 
 /**
- * An interface that defines how to write the data to data source for streaming processing.
+ * An interface that defines how to write the data to data source in streaming queries.
  *
- * Streaming queries are divided into intervals of data called epochs, with a monotonically
- * increasing numeric ID. This writer handles commits and aborts for each successive epoch.
+ * The writing procedure is:
+ *   1. Create a writer factory by {@link #createStreamingWriterFactory()}, serialize and send it to
+ *      all the partitions of the input data(RDD).
+ *   2. For each epoch in each partition, create the data writer, and write the data of the epoch in
+ *      the partition with this writer. If all the data are written successfully, call
+ *      {@link DataWriter#commit()}. If exception happens during the writing, call
+ *      {@link DataWriter#abort()}.
+ *   3. If writers in all partitions of one epoch are successfully committed, call
+ *      {@link #commit(long, WriterCommitMessage[])}. If some writers are aborted, or the job failed
+ *      with an unknown reason, call {@link #abort(long, WriterCommitMessage[])}.
+ *
+ * While Spark will retry failed writing tasks, Spark won't retry failed writing jobs. Users should
+ * do it manually in their Spark applications if they want to retry.
+ *
+ * Please refer to the documentation of commit/abort methods for detailed specifications.
  */
 @Evolving
-public interface StreamingWriteSupport {
+public interface StreamingWrite {
 
   /**
    * Creates a writer factory which will be serialized and sent to executors.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/SupportsOutputMode.java
similarity index 67%
rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
rename to sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/SupportsOutputMode.java
index 43bdcca70cb0..832dcfa145d1 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/SupportsOutputMode.java
@@ -15,12 +15,15 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.sources.v2.writer.streaming;
 
-import org.apache.spark.annotation.Evolving;
+import org.apache.spark.annotation.Unstable;
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
+import org.apache.spark.sql.streaming.OutputMode;
 
-/**
- * TODO: remove it when we finish the API refactor for streaming write side.
- */
-@Evolving
-public interface DataSourceV2 {}
+// TODO: remove it when we have `SupportsTruncate`
+@Unstable
+public interface SupportsOutputMode extends WriteBuilder {
+
+  WriteBuilder outputMode(OutputMode mode);
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 713c9a9faa74..e757785e5664 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -205,7 +205,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     if (classOf[TableProvider].isAssignableFrom(cls)) {
       val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
-        ds = provider, conf = sparkSession.sessionState.conf)
+        source = provider, conf = sparkSession.sessionState.conf)
       val pathsOption = {
         val objectMapper = new ObjectMapper()
         DataSourceOptions.PATHS_KEY -> objectMapper.writeValueAsString(paths.toArray)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 452ebbbeb99c..8f2072c586a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWriteSupport}
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite, SupportsOutputMode}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
@@ -30,30 +30,23 @@ import org.apache.spark.sql.types.StructType
  * This is no-op datasource. It does not do anything besides consuming its input.
  * This can be useful for benchmarking or to cache data without any additional overhead.
  */
-class NoopDataSource
-  extends DataSourceV2
-  with TableProvider
-  with DataSourceRegister
-  with StreamingWriteSupportProvider {
-
+class NoopDataSource extends TableProvider with DataSourceRegister {
   override def shortName(): String = "noop"
   override def getTable(options: DataSourceOptions): Table = NoopTable
-  override def createStreamingWriteSupport(
-      queryId: String,
-      schema: StructType,
-      mode: OutputMode,
-      options: DataSourceOptions): StreamingWriteSupport = NoopStreamingWriteSupport
 }
 
-private[noop] object NoopTable extends Table with SupportsBatchWrite {
+private[noop] object NoopTable extends Table with SupportsBatchWrite with SupportsStreamingWrite {
   override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = NoopWriteBuilder
   override def name(): String = "noop-table"
   override def schema(): StructType = new StructType()
 }
 
-private[noop] object NoopWriteBuilder extends WriteBuilder with SupportsSaveMode {
-  override def buildForBatch(): BatchWrite = NoopBatchWrite
+private[noop] object NoopWriteBuilder extends WriteBuilder
+  with SupportsSaveMode with SupportsOutputMode {
   override def mode(mode: SaveMode): WriteBuilder = this
+  override def outputMode(mode: OutputMode): WriteBuilder = this
+  override def buildForBatch(): BatchWrite = NoopBatchWrite
+  override def buildForStreaming(): StreamingWrite = NoopStreamingWrite
 }
 
 private[noop] object NoopBatchWrite extends BatchWrite {
@@ -72,7 +65,7 @@ private[noop] object NoopWriter extends DataWriter[InternalRow] {
   override def abort(): Unit = {}
 }
 
-private[noop] object NoopStreamingWriteSupport extends StreamingWriteSupport {
+private[noop] object NoopStreamingWrite extends StreamingWrite {
   override def createStreamingWriterFactory(): StreamingDataWriterFactory =
     NoopStreamingDataWriterFactory
   override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
@@ -85,4 +78,3 @@ private[noop] object NoopStreamingDataWriterFactory extends StreamingDataWriterF
       taskId: Long,
       epochId: Long): DataWriter[InternalRow] = NoopWriter
 }
-
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StringFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StringFormat.scala
deleted file mode 100644
index f11703c8a277..000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StringFormat.scala
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.commons.lang3.StringUtils
-
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2.DataSourceV2
-import org.apache.spark.util.Utils
-
-/**
- * A trait that can be used by data source v2 related query plans(both logical and physical), to
- * provide a string format of the data source information for explain.
- */
-trait DataSourceV2StringFormat {
-
-  /**
-   * The instance of this data source implementation. Note that we only consider its class in
-   * equals/hashCode, not the instance itself.
-   */
-  def source: DataSourceV2
-
-  /**
-   * The output of the data source reader, w.r.t. column pruning.
-   */
-  def output: Seq[Attribute]
-
-  /**
-   * The options for this data source reader.
-   */
-  def options: Map[String, String]
-
-  /**
-   * The filters which have been pushed to the data source.
-   */
-  def pushedFilters: Seq[Expression]
-
-  private def sourceName: String = source match {
-    case registered: DataSourceRegister => registered.shortName()
-    // source.getClass.getSimpleName can cause Malformed class name error,
-    // call safer `Utils.getSimpleName` instead
-    case _ => Utils.getSimpleName(source.getClass)
-  }
-
-  def metadataString(maxFields: Int): String = {
-    val entries = scala.collection.mutable.ArrayBuffer.empty[(String, String)]
-
-    if (pushedFilters.nonEmpty) {
-      entries += "Filters" -> pushedFilters.mkString("[", ", ", "]")
-    }
-
-    // TODO: we should only display some standard options like path, table, etc.
-    if (options.nonEmpty) {
-      entries += "Options" -> Utils.redact(options).map {
-        case (k, v) => s"$k=$v"
-      }.mkString("[", ",", "]")
-    }
-
-    val outputStr = truncatedString(output, "[", ", ", "]", maxFields)
-
-    val entriesStr = if (entries.nonEmpty) {
-      truncatedString(entries.map {
-        case (key, value) => key + ": " + StringUtils.abbreviate(value, 100)
-      }, " (", ", ", ")", maxFields)
-    } else {
-      ""
-    }
-
-    s"$sourceName$outputStr$entriesStr"
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
index e9cc3991155c..30897d86f817 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
@@ -21,8 +21,7 @@ import java.util.regex.Pattern
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2.{DataSourceV2, SessionConfigSupport}
+import org.apache.spark.sql.sources.v2.{SessionConfigSupport, TableProvider}
 
 private[sql] object DataSourceV2Utils extends Logging {
 
@@ -34,34 +33,28 @@ private[sql] object DataSourceV2Utils extends Logging {
    * `spark.datasource.$keyPrefix`. A session config `spark.datasource.$keyPrefix.xxx -> yyy` will
    * be transformed into `xxx -> yyy`.
    *
-   * @param ds a [[DataSourceV2]] object
+   * @param source a [[TableProvider]] object
    * @param conf the session conf
    * @return an immutable map that contains all the extracted and transformed k/v pairs.
    */
-  def extractSessionConfigs(ds: DataSourceV2, conf: SQLConf): Map[String, String] = ds match {
-    case cs: SessionConfigSupport =>
-      val keyPrefix = cs.keyPrefix()
-      require(keyPrefix != null, "The data source config key prefix can't be null.")
-
-      val pattern = Pattern.compile(s"^spark\\.datasource\\.$keyPrefix\\.(.+)")
-
-      conf.getAllConfs.flatMap { case (key, value) =>
-        val m = pattern.matcher(key)
-        if (m.matches() && m.groupCount() > 0) {
-          Seq((m.group(1), value))
-        } else {
-          Seq.empty
+  def extractSessionConfigs(source: TableProvider, conf: SQLConf): Map[String, String] = {
+    source match {
+      case cs: SessionConfigSupport =>
+        val keyPrefix = cs.keyPrefix()
+        require(keyPrefix != null, "The data source config key prefix can't be null.")
+
+        val pattern = Pattern.compile(s"^spark\\.datasource\\.$keyPrefix\\.(.+)")
+
+        conf.getAllConfs.flatMap { case (key, value) =>
+          val m = pattern.matcher(key)
+          if (m.matches() && m.groupCount() > 0) {
+            Seq((m.group(1), value))
+          } else {
+            Seq.empty
+          }
         }
-      }
-
-    case _ => Map.empty
-  }
 
-  def failForUserSpecifiedSchema[T](ds: DataSourceV2): T = {
-    val name = ds match {
-      case register: DataSourceRegister => register.shortName()
-      case _ => ds.getClass.getName
+      case _ => Map.empty
     }
-    throw new UnsupportedOperationException(name + " source does not support user-specified schema")
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 2c339759f95b..cca279030dfa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, RateCo
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2}
+import org.apache.spark.sql.sources.v2.writer.streaming.SupportsOutputMode
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
 
@@ -513,13 +514,16 @@ class MicroBatchExecution(
 
     val triggerLogicalPlan = sink match {
       case _: Sink => newAttributePlan
-      case s: StreamingWriteSupportProvider =>
-        val writer = s.createStreamingWriteSupport(
-          s"$runId",
-          newAttributePlan.schema,
-          outputMode,
-          new DataSourceOptions(extraOptions.asJava))
-        WriteToDataSourceV2(new MicroBatchWrite(currentBatchId, writer), newAttributePlan)
+      case s: SupportsStreamingWrite =>
+        // TODO: we should translate OutputMode to concrete write actions like truncate, but
+        // the truncate action is being developed in SPARK-26666.
+        val writeBuilder = s.newWriteBuilder(new DataSourceOptions(extraOptions.asJava))
+          .withQueryId(runId.toString)
+          .withInputDataSchema(newAttributePlan.schema)
+        val streamingWrite = writeBuilder.asInstanceOf[SupportsOutputMode]
+          .outputMode(outputMode)
+          .buildForStreaming()
+        WriteToDataSourceV2(new MicroBatchWrite(currentBatchId, streamingWrite), newAttributePlan)
       case _ => throw new IllegalArgumentException(s"unknown sink type for $sink")
     }
 
@@ -549,7 +553,7 @@ class MicroBatchExecution(
       SQLExecution.withNewExecutionId(sparkSessionToRunBatch, lastExecution) {
         sink match {
           case s: Sink => s.addBatch(currentBatchId, nextBatch)
-          case _: StreamingWriteSupportProvider =>
+          case _: SupportsStreamingWrite =>
             // This doesn't accumulate any data - it just forces execution of the microbatch writer.
             nextBatch.collect()
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 83d38dcade7e..1b7aa548e6d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.sources.v2.{DataSourceV2, Table}
+import org.apache.spark.sql.sources.v2.{Table, TableProvider}
 
 object StreamingRelation {
   def apply(dataSource: DataSource): StreamingRelation = {
@@ -86,13 +86,13 @@ case class StreamingExecutionRelation(
 // know at read time whether the query is continuous or not, so we need to be able to
 // swap a V1 relation back in.
 /**
- * Used to link a [[DataSourceV2]] into a streaming
+ * Used to link a [[TableProvider]] into a streaming
  * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. This is only used for creating
  * a streaming [[org.apache.spark.sql.DataFrame]] from [[org.apache.spark.sql.DataFrameReader]],
  * and should be converted before passing to [[StreamExecution]].
  */
 case class StreamingRelationV2(
-    dataSource: DataSourceV2,
+    source: TableProvider,
     sourceName: String,
     table: Table,
     extraOptions: Map[String, String],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index 9c5c16f4f5d1..348bc767b2c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -18,10 +18,11 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.streaming.sources.ConsoleWriteSupport
+import org.apache.spark.sql.execution.streaming.sources.ConsoleWrite
 import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister}
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, StreamingWriteSupportProvider}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
+import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingWrite, SupportsOutputMode}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
@@ -30,17 +31,12 @@ case class ConsoleRelation(override val sqlContext: SQLContext, data: DataFrame)
   override def schema: StructType = data.schema
 }
 
-class ConsoleSinkProvider extends DataSourceV2
-  with StreamingWriteSupportProvider
+class ConsoleSinkProvider extends TableProvider
   with DataSourceRegister
   with CreatableRelationProvider {
 
-  override def createStreamingWriteSupport(
-      queryId: String,
-      schema: StructType,
-      mode: OutputMode,
-      options: DataSourceOptions): StreamingWriteSupport = {
-    new ConsoleWriteSupport(schema, options)
+  override def getTable(options: DataSourceOptions): Table = {
+    ConsoleTable
   }
 
   def createRelation(
@@ -60,3 +56,28 @@ class ConsoleSinkProvider extends DataSourceV2
 
   def shortName(): String = "console"
 }
+
+object ConsoleTable extends Table with SupportsStreamingWrite {
+
+  override def name(): String = "console"
+
+  override def schema(): StructType = StructType(Nil)
+
+  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+    new WriteBuilder with SupportsOutputMode {
+      private var inputSchema: StructType = _
+
+      override def withInputDataSchema(schema: StructType): WriteBuilder = {
+        this.inputSchema = schema
+        this
+      }
+
+      override def outputMode(mode: OutputMode): WriteBuilder = this
+
+      override def buildForStreaming(): StreamingWrite = {
+        assert(inputSchema != null)
+        new ConsoleWrite(inputSchema, options)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index b22795d20776..20101c7fda32 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -32,8 +32,9 @@ import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming.{StreamingRelationV2, _}
 import org.apache.spark.sql.sources.v2
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, StreamingWriteSupportProvider, SupportsContinuousRead}
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsContinuousRead, SupportsStreamingWrite}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
+import org.apache.spark.sql.sources.v2.writer.streaming.SupportsOutputMode
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
 
@@ -42,7 +43,7 @@ class ContinuousExecution(
     name: String,
     checkpointRoot: String,
     analyzedPlan: LogicalPlan,
-    sink: StreamingWriteSupportProvider,
+    sink: SupportsStreamingWrite,
     trigger: Trigger,
     triggerClock: Clock,
     outputMode: OutputMode,
@@ -174,12 +175,15 @@ class ContinuousExecution(
           "CurrentTimestamp and CurrentDate not yet supported for continuous processing")
     }
 
-    val writer = sink.createStreamingWriteSupport(
-      s"$runId",
-      withNewSources.schema,
-      outputMode,
-      new DataSourceOptions(extraOptions.asJava))
-    val planWithSink = WriteToContinuousDataSource(writer, withNewSources)
+    // TODO: we should translate OutputMode to concrete write actions like truncate, but
+    // the truncate action is being developed in SPARK-26666.
+    val writeBuilder = sink.newWriteBuilder(new DataSourceOptions(extraOptions.asJava))
+      .withQueryId(runId.toString)
+      .withInputDataSchema(withNewSources.schema)
+    val streamingWrite = writeBuilder.asInstanceOf[SupportsOutputMode]
+      .outputMode(outputMode)
+      .buildForStreaming()
+    val planWithSink = WriteToContinuousDataSource(streamingWrite, withNewSources)
 
     reportTimeTaken("queryPlanning") {
       lastExecution = new IncrementalExecution(
@@ -214,9 +218,8 @@ class ContinuousExecution(
       trigger.asInstanceOf[ContinuousTrigger].intervalMs.toString)
 
     // Use the parent Spark session for the endpoint since it's where this query ID is registered.
-    val epochEndpoint =
-      EpochCoordinatorRef.create(
-        writer, stream, this, epochCoordinatorId, currentBatchId, sparkSession, SparkEnv.get)
+    val epochEndpoint = EpochCoordinatorRef.create(
+      streamingWrite, stream, this, epochCoordinatorId, currentBatchId, sparkSession, SparkEnv.get)
     val epochUpdateThread = new Thread(new Runnable {
       override def run: Unit = {
         try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
index d1bda79f4b6e..a99842220424 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
@@ -25,7 +25,7 @@ import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeR
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.util.RpcUtils
 
 private[continuous] sealed trait EpochCoordinatorMessage extends Serializable
@@ -82,7 +82,7 @@ private[sql] object EpochCoordinatorRef extends Logging {
    * Create a reference to a new [[EpochCoordinator]].
    */
   def create(
-      writeSupport: StreamingWriteSupport,
+      writeSupport: StreamingWrite,
       stream: ContinuousStream,
       query: ContinuousExecution,
       epochCoordinatorId: String,
@@ -115,7 +115,7 @@ private[sql] object EpochCoordinatorRef extends Logging {
  *   have both committed and reported an end offset for a given epoch.
  */
 private[continuous] class EpochCoordinator(
-    writeSupport: StreamingWriteSupport,
+    writeSupport: StreamingWrite,
     stream: ContinuousStream,
     query: ContinuousExecution,
     startEpoch: Long,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
index 7ad21cc304e7..54f484c4adae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
@@ -19,13 +19,13 @@ package org.apache.spark.sql.execution.streaming.continuous
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 
 /**
  * The logical plan for writing data in a continuous stream.
  */
-case class WriteToContinuousDataSource(
-    writeSupport: StreamingWriteSupport, query: LogicalPlan) extends LogicalPlan {
+case class WriteToContinuousDataSource(write: StreamingWrite, query: LogicalPlan)
+  extends LogicalPlan {
   override def children: Seq[LogicalPlan] = Seq(query)
   override def output: Seq[Attribute] = Nil
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
index 2178466d6314..2f3af6a6544c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
@@ -26,21 +26,22 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.streaming.StreamExecution
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 
 /**
- * The physical plan for writing data into a continuous processing [[StreamingWriteSupport]].
+ * The physical plan for writing data into a continuous processing [[StreamingWrite]].
  */
-case class WriteToContinuousDataSourceExec(writeSupport: StreamingWriteSupport, query: SparkPlan)
-    extends UnaryExecNode with Logging {
+case class WriteToContinuousDataSourceExec(write: StreamingWrite, query: SparkPlan)
+  extends UnaryExecNode with Logging {
+
   override def child: SparkPlan = query
   override def output: Seq[Attribute] = Nil
 
   override protected def doExecute(): RDD[InternalRow] = {
-    val writerFactory = writeSupport.createStreamingWriterFactory()
+    val writerFactory = write.createStreamingWriterFactory()
     val rdd = new ContinuousWriteRDD(query.execute(), writerFactory)
 
-    logInfo(s"Start processing data source write support: $writeSupport. " +
+    logInfo(s"Start processing data source write support: $write. " +
       s"The input RDD has ${rdd.partitions.length} partitions.")
     EpochCoordinatorRef.get(
       sparkContext.getLocalProperty(ContinuousExecution.EPOCH_COORDINATOR_ID_KEY),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
similarity index 94%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupport.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
index 833e62f35ede..f2ff30bcf1be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
@@ -22,12 +22,12 @@ import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWriteSupport}
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 
 /** Common methods used to create writes for the the console sink */
-class ConsoleWriteSupport(schema: StructType, options: DataSourceOptions)
-    extends StreamingWriteSupport with Logging {
+class ConsoleWrite(schema: StructType, options: DataSourceOptions)
+    extends StreamingWrite with Logging {
 
   // Number of rows to display, by default 20 rows
   protected val numRowsToShow = options.getInt("numRows", 20)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriteSupportProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
similarity index 66%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriteSupportProvider.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index 4218fd51ad20..6fbb59c43625 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriteSupportProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -22,63 +22,73 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.python.PythonForeachWriter
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, StreamingWriteSupportProvider}
-import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWriteSupport}
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite, Table}
+import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite, SupportsOutputMode}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
 /**
- * A [[org.apache.spark.sql.sources.v2.DataSourceV2]] for forwarding data into the specified
- * [[ForeachWriter]].
+ * A write-only table for forwarding data into the specified [[ForeachWriter]].
  *
  * @param writer The [[ForeachWriter]] to process all data.
  * @param converter An object to convert internal rows to target type T. Either it can be
  *                  a [[ExpressionEncoder]] or a direct converter function.
  * @tparam T The expected type of the sink.
  */
-case class ForeachWriteSupportProvider[T](
+case class ForeachWriterTable[T](
     writer: ForeachWriter[T],
     converter: Either[ExpressionEncoder[T], InternalRow => T])
-  extends StreamingWriteSupportProvider {
-
-  override def createStreamingWriteSupport(
-      queryId: String,
-      schema: StructType,
-      mode: OutputMode,
-      options: DataSourceOptions): StreamingWriteSupport = {
-    new StreamingWriteSupport {
-      override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
-      override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
-
-      override def createStreamingWriterFactory(): StreamingDataWriterFactory = {
-        val rowConverter: InternalRow => T = converter match {
-          case Left(enc) =>
-            val boundEnc = enc.resolveAndBind(
-              schema.toAttributes,
-              SparkSession.getActiveSession.get.sessionState.analyzer)
-            boundEnc.fromRow
-          case Right(func) =>
-            func
-        }
-        ForeachWriterFactory(writer, rowConverter)
+  extends Table with SupportsStreamingWrite {
+
+  override def name(): String = "ForeachSink"
+
+  override def schema(): StructType = StructType(Nil)
+
+  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+    new WriteBuilder with SupportsOutputMode {
+      private var inputSchema: StructType = _
+
+      override def withInputDataSchema(schema: StructType): WriteBuilder = {
+        this.inputSchema = schema
+        this
       }
 
-      override def toString: String = "ForeachSink"
+      override def outputMode(mode: OutputMode): WriteBuilder = this
+
+      override def buildForStreaming(): StreamingWrite = {
+        new StreamingWrite {
+          override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
+          override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
+
+          override def createStreamingWriterFactory(): StreamingDataWriterFactory = {
+            val rowConverter: InternalRow => T = converter match {
+              case Left(enc) =>
+                val boundEnc = enc.resolveAndBind(
+                  inputSchema.toAttributes,
+                  SparkSession.getActiveSession.get.sessionState.analyzer)
+                boundEnc.fromRow
+              case Right(func) =>
+                func
+            }
+            ForeachWriterFactory(writer, rowConverter)
+          }
+        }
+      }
     }
   }
 }
 
-object ForeachWriteSupportProvider {
+object ForeachWriterTable {
   def apply[T](
       writer: ForeachWriter[T],
-      encoder: ExpressionEncoder[T]): ForeachWriteSupportProvider[_] = {
+      encoder: ExpressionEncoder[T]): ForeachWriterTable[_] = {
     writer match {
       case pythonWriter: PythonForeachWriter =>
-        new ForeachWriteSupportProvider[UnsafeRow](
+        new ForeachWriterTable[UnsafeRow](
           pythonWriter, Right((x: InternalRow) => x.asInstanceOf[UnsafeRow]))
       case _ =>
-        new ForeachWriteSupportProvider[T](writer, Left(encoder))
+        new ForeachWriterTable[T](writer, Left(encoder))
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
index 143235efee81..f3951897ea74 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
@@ -19,14 +19,14 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWriteSupport}
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 
 /**
  * A [[BatchWrite]] used to hook V2 stream writers into a microbatch plan. It implements
  * the non-streaming interface, forwarding the epoch ID determined at construction to a wrapped
  * streaming write support.
  */
-class MicroBatchWrite(eppchId: Long, val writeSupport: StreamingWriteSupport) extends BatchWrite {
+class MicroBatchWrite(eppchId: Long, val writeSupport: StreamingWrite) extends BatchWrite {
 
   override def commit(messages: Array[WriterCommitMessage]): Unit = {
     writeSupport.commit(eppchId, messages)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index 075c6b9362ba..3a0082536512 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -40,8 +40,7 @@ import org.apache.spark.sql.types._
  *    generated rows. The source will try its best to reach `rowsPerSecond`, but the query may
  *    be resource constrained, and `numPartitions` can be tweaked to help reach the desired speed.
  */
-class RateStreamProvider extends DataSourceV2
-  with TableProvider with DataSourceRegister {
+class RateStreamProvider extends TableProvider with DataSourceRegister {
   import RateStreamProvider._
 
   override def getTable(options: DataSourceOptions): Table = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index c3b24a8f65dd..8ac5bfc307aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -31,8 +31,7 @@ import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
 
-class TextSocketSourceProvider extends DataSourceV2
-  with TableProvider with DataSourceRegister with Logging {
+class TextSocketSourceProvider extends TableProvider with DataSourceRegister with Logging {
 
   private def checkParameters(params: DataSourceOptions): Unit = {
     logWarning("The socket source should not be used for production applications! " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
index c50dc7bcb8da..3fc2cbe0fde5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
@@ -32,9 +32,9 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.{Append, Complete, Update}
 import org.apache.spark.sql.execution.streaming.{MemorySinkBase, Sink}
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, StreamingWriteSupportProvider}
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite}
 import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWriteSupport}
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite, SupportsOutputMode}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
@@ -42,15 +42,31 @@ import org.apache.spark.sql.types.StructType
  * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
  * tests and does not provide durability.
  */
-class MemorySinkV2 extends DataSourceV2 with StreamingWriteSupportProvider
-  with MemorySinkBase with Logging {
-
-  override def createStreamingWriteSupport(
-      queryId: String,
-      schema: StructType,
-      mode: OutputMode,
-      options: DataSourceOptions): StreamingWriteSupport = {
-    new MemoryStreamingWriteSupport(this, mode, schema)
+class MemorySinkV2 extends SupportsStreamingWrite with MemorySinkBase with Logging {
+
+  override def name(): String = "MemorySinkV2"
+
+  override def schema(): StructType = StructType(Nil)
+
+  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+    new WriteBuilder with SupportsOutputMode {
+      private var mode: OutputMode = _
+      private var inputSchema: StructType = _
+
+      override def outputMode(mode: OutputMode): WriteBuilder = {
+        this.mode = mode
+        this
+      }
+
+      override def withInputDataSchema(schema: StructType): WriteBuilder = {
+        this.inputSchema = schema
+        this
+      }
+
+      override def buildForStreaming(): StreamingWrite = {
+        new MemoryStreamingWrite(MemorySinkV2.this, mode, inputSchema)
+      }
+    }
   }
 
   private case class AddedData(batchId: Long, data: Array[Row])
@@ -122,9 +138,9 @@ class MemorySinkV2 extends DataSourceV2 with StreamingWriteSupportProvider
 case class MemoryWriterCommitMessage(partition: Int, data: Seq[Row])
   extends WriterCommitMessage {}
 
-class MemoryStreamingWriteSupport(
+class MemoryStreamingWrite(
     val sink: MemorySinkV2, outputMode: OutputMode, schema: StructType)
-  extends StreamingWriteSupport {
+  extends StreamingWrite {
 
   override def createStreamingWriterFactory: MemoryWriterFactory = {
     MemoryWriterFactory(outputMode, schema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 866681838af8..ef21caa3ac29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -173,7 +173,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
     ds match {
       case provider: TableProvider =>
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
-          ds = provider, conf = sparkSession.sessionState.conf)
+          source = provider, conf = sparkSession.sessionState.conf)
         val options = sessionOptions ++ extraOptions
         val dsOptions = new DataSourceOptions(options.asJava)
         val table = userSpecifiedSchema match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index ea596ba728c1..984199488fa7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.execution.streaming.sources._
-import org.apache.spark.sql.sources.v2.StreamingWriteSupportProvider
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite, TableProvider}
 
 /**
  * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
@@ -278,7 +278,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       query
     } else if (source == "foreach") {
       assertNotPartitioned("foreach")
-      val sink = ForeachWriteSupportProvider[T](foreachWriter, ds.exprEnc)
+      val sink = ForeachWriterTable[T](foreachWriter, ds.exprEnc)
       df.sparkSession.sessionState.streamingQueryManager.startQuery(
         extraOptions.get("queryName"),
         extraOptions.get("checkpointLocation"),
@@ -304,30 +304,29 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         useTempCheckpointLocation = true,
         trigger = trigger)
     } else {
-      val ds = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
+      val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
       val disabledSources = df.sparkSession.sqlContext.conf.disabledV2StreamingWriters.split(",")
-      var options = extraOptions.toMap
-      val sink = ds.getConstructor().newInstance() match {
-        case w: StreamingWriteSupportProvider
-            if !disabledSources.contains(w.getClass.getCanonicalName) =>
-          val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
-            w, df.sparkSession.sessionState.conf)
-          options = sessionOptions ++ extraOptions
-          w
-        case _ =>
-          val ds = DataSource(
-            df.sparkSession,
-            className = source,
-            options = options,
-            partitionColumns = normalizedParCols.getOrElse(Nil))
-          ds.createSink(outputMode)
+      val useV1Source = disabledSources.contains(cls.getCanonicalName)
+
+      val sink = if (classOf[TableProvider].isAssignableFrom(cls) && !useV1Source) {
+        val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
+        val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
+          source = provider, conf = df.sparkSession.sessionState.conf)
+        val options = sessionOptions ++ extraOptions
+        val dsOptions = new DataSourceOptions(options.asJava)
+        provider.getTable(dsOptions) match {
+          case s: SupportsStreamingWrite => s
+          case _ => createV1Sink()
+        }
+      } else {
+        createV1Sink()
       }
 
       df.sparkSession.sessionState.streamingQueryManager.startQuery(
-        options.get("queryName"),
-        options.get("checkpointLocation"),
+        extraOptions.get("queryName"),
+        extraOptions.get("checkpointLocation"),
         df,
-        options,
+        extraOptions.toMap,
         sink,
         outputMode,
         useTempCheckpointLocation = source == "console" || source == "noop",
@@ -336,6 +335,15 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     }
   }
 
+  private def createV1Sink(): BaseStreamingSink = {
+    val ds = DataSource(
+      df.sparkSession,
+      className = source,
+      options = extraOptions.toMap,
+      partitionColumns = normalizedParCols.getOrElse(Nil))
+    ds.createSink(outputMode)
+  }
+
   /**
    * Sets the output of the streaming query to be processed using the provided writer object.
    * object. See [[org.apache.spark.sql.ForeachWriter]] for more details on the lifecycle and
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 0bd8a9299ef4..a7fa800a49d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution,
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.STREAMING_QUERY_LISTENERS
-import org.apache.spark.sql.sources.v2.StreamingWriteSupportProvider
+import org.apache.spark.sql.sources.v2.SupportsStreamingWrite
 import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
@@ -261,7 +261,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
     }
 
     (sink, trigger) match {
-      case (v2Sink: StreamingWriteSupportProvider, trigger: ContinuousTrigger) =>
+      case (v2Sink: SupportsStreamingWrite, trigger: ContinuousTrigger) =>
         if (operationCheckEnabled) {
           UnsupportedOperationChecker.checkForContinuous(analyzedPlan, outputMode)
         }
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index a36b0cfa6ff1..914af589384d 100644
--- a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -9,6 +9,6 @@ org.apache.spark.sql.streaming.sources.FakeReadMicroBatchOnly
 org.apache.spark.sql.streaming.sources.FakeReadContinuousOnly
 org.apache.spark.sql.streaming.sources.FakeReadBothModes
 org.apache.spark.sql.streaming.sources.FakeReadNeitherMode
-org.apache.spark.sql.streaming.sources.FakeWriteSupportProvider
+org.apache.spark.sql.streaming.sources.FakeWriteOnly
 org.apache.spark.sql.streaming.sources.FakeNoWrite
 org.apache.spark.sql.streaming.sources.FakeWriteSupportProviderV1Fallback
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
index 61857365ac98..e80437754051 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
@@ -43,9 +43,9 @@ class MemorySinkV2Suite extends StreamTest with BeforeAndAfter {
 
   test("streaming writer") {
     val sink = new MemorySinkV2
-    val writeSupport = new MemoryStreamingWriteSupport(
+    val write = new MemoryStreamingWrite(
       sink, OutputMode.Append(), new StructType().add("i", "int"))
-    writeSupport.commit(0,
+    write.commit(0,
       Array(
         MemoryWriterCommitMessage(0, Seq(Row(1), Row(2))),
         MemoryWriterCommitMessage(1, Seq(Row(3), Row(4))),
@@ -53,7 +53,7 @@ class MemorySinkV2Suite extends StreamTest with BeforeAndAfter {
       ))
     assert(sink.latestBatchId.contains(0))
     assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7))
-    writeSupport.commit(19,
+    write.commit(19,
       Array(
         MemoryWriterCommitMessage(3, Seq(Row(11), Row(22))),
         MemoryWriterCommitMessage(0, Seq(Row(33)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
index f903c17923d0..0b1e3b5fb076 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
@@ -33,8 +33,8 @@ class DataSourceV2UtilsSuite extends SparkFunSuite {
     conf.setConfString(s"spark.sql.$keyPrefix.config.name", "false")
     conf.setConfString("spark.datasource.another.config.name", "123")
     conf.setConfString(s"spark.datasource.$keyPrefix.", "123")
-    val cs = classOf[DataSourceV2WithSessionConfig].getConstructor().newInstance()
-    val confs = DataSourceV2Utils.extractSessionConfigs(cs.asInstanceOf[DataSourceV2], conf)
+    val source = new DataSourceV2WithSessionConfig
+    val confs = DataSourceV2Utils.extractSessionConfigs(source, conf)
     assert(confs.size == 2)
     assert(confs.keySet.filter(_.startsWith("spark.datasource")).size == 0)
     assert(confs.keySet.filter(_.startsWith("not.exist.prefix")).size == 0)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
index daca65fd1ad2..c56a54598cd4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
@@ -38,8 +38,7 @@ import org.apache.spark.util.SerializableConfiguration
  * Each task writes data to `target/_temporary/uniqueId/$jobId-$partitionId-$attemptNumber`.
  * Each job moves files from `target/_temporary/uniqueId/` to `target`.
  */
-class SimpleWritableDataSource extends DataSourceV2
-  with TableProvider with SessionConfigSupport {
+class SimpleWritableDataSource extends TableProvider with SessionConfigSupport {
 
   private val tableSchema = new StructType().add("i", "long").add("j", "long")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
index d3d210c02e90..bad22590807a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReader, ContinuousStream, PartitionOffset}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types.{DataType, IntegerType, StructType}
 
@@ -43,7 +43,7 @@ class ContinuousQueuedDataReaderSuite extends StreamTest with MockitoSugar {
   override def beforeEach(): Unit = {
     super.beforeEach()
     epochEndpoint = EpochCoordinatorRef.create(
-      mock[StreamingWriteSupport],
+      mock[StreamingWrite],
       mock[ContinuousStream],
       mock[ContinuousExecution],
       coordinatorId,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
index a0b56ec17f0b..f74285f4b0fb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.LocalSparkSession
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.test.TestSparkSession
 
 class EpochCoordinatorSuite
@@ -40,13 +40,13 @@ class EpochCoordinatorSuite
 
   private var epochCoordinator: RpcEndpointRef = _
 
-  private var writeSupport: StreamingWriteSupport = _
+  private var writeSupport: StreamingWrite = _
   private var query: ContinuousExecution = _
   private var orderVerifier: InOrder = _
 
   override def beforeEach(): Unit = {
     val stream = mock[ContinuousStream]
-    writeSupport = mock[StreamingWriteSupport]
+    writeSupport = mock[StreamingWrite]
     query = mock[ContinuousExecution]
     orderVerifier = inOrder(writeSupport, query)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 62f166602941..c841793fdd4a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWriteSupport
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamTest, Trigger}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
@@ -71,13 +71,10 @@ trait FakeContinuousReadTable extends Table with SupportsContinuousRead {
   override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new FakeScanBuilder
 }
 
-trait FakeStreamingWriteSupportProvider extends StreamingWriteSupportProvider {
-  override def createStreamingWriteSupport(
-      queryId: String,
-      schema: StructType,
-      mode: OutputMode,
-      options: DataSourceOptions): StreamingWriteSupport = {
-    LastWriteOptions.options = options
+trait FakeStreamingWriteTable extends Table with SupportsStreamingWrite {
+  override def name(): String = "fake"
+  override def schema(): StructType = StructType(Seq())
+  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
     throw new IllegalStateException("fake sink - cannot actually write")
   }
 }
@@ -129,20 +126,33 @@ class FakeReadNeitherMode extends DataSourceRegister with TableProvider {
   }
 }
 
-class FakeWriteSupportProvider
+class FakeWriteOnly
     extends DataSourceRegister
-    with FakeStreamingWriteSupportProvider
+    with TableProvider
     with SessionConfigSupport {
   override def shortName(): String = "fake-write-microbatch-continuous"
 
   override def keyPrefix: String = shortName()
+
+  override def getTable(options: DataSourceOptions): Table = {
+    LastWriteOptions.options = options
+    new Table with FakeStreamingWriteTable {
+      override def name(): String = "fake"
+      override def schema(): StructType = StructType(Nil)
+    }
+  }
 }
 
-class FakeNoWrite extends DataSourceRegister {
+class FakeNoWrite extends DataSourceRegister with TableProvider {
   override def shortName(): String = "fake-write-neither-mode"
+  override def getTable(options: DataSourceOptions): Table = {
+    new Table {
+      override def name(): String = "fake"
+      override def schema(): StructType = StructType(Nil)
+    }
+  }
 }
 
-
 case class FakeWriteV1FallbackException() extends Exception
 
 class FakeSink extends Sink {
@@ -150,17 +160,24 @@ class FakeSink extends Sink {
 }
 
 class FakeWriteSupportProviderV1Fallback extends DataSourceRegister
-  with FakeStreamingWriteSupportProvider with StreamSinkProvider {
+  with TableProvider with StreamSinkProvider {
 
   override def createSink(
-    sqlContext: SQLContext,
-    parameters: Map[String, String],
-    partitionColumns: Seq[String],
-    outputMode: OutputMode): Sink = {
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      partitionColumns: Seq[String],
+      outputMode: OutputMode): Sink = {
     new FakeSink()
   }
 
   override def shortName(): String = "fake-write-v1-fallback"
+
+  override def getTable(options: DataSourceOptions): Table = {
+    new Table with FakeStreamingWriteTable {
+      override def name(): String = "fake"
+      override def schema(): StructType = StructType(Nil)
+    }
+  }
 }
 
 object LastReadOptions {
@@ -260,7 +277,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
     testPositiveCaseWithQuery(
       "fake-read-microbatch-continuous", "fake-write-v1-fallback", Trigger.Once()) { v2Query =>
       assert(v2Query.asInstanceOf[StreamingQueryWrapper].streamingQuery.sink
-        .isInstanceOf[FakeWriteSupportProviderV1Fallback])
+        .isInstanceOf[Table])
     }
 
     // Ensure we create a V1 sink with the config. Note the config is a comma separated
@@ -319,19 +336,20 @@ class StreamingDataSourceV2Suite extends StreamTest {
 
   for ((read, write, trigger) <- cases) {
     testQuietly(s"stream with read format $read, write format $write, trigger $trigger") {
-      val table = DataSource.lookupDataSource(read, spark.sqlContext.conf).getConstructor()
+      val sourceTable = DataSource.lookupDataSource(read, spark.sqlContext.conf).getConstructor()
+        .newInstance().asInstanceOf[TableProvider].getTable(DataSourceOptions.empty())
+
+      val sinkTable = DataSource.lookupDataSource(write, spark.sqlContext.conf).getConstructor()
         .newInstance().asInstanceOf[TableProvider].getTable(DataSourceOptions.empty())
-      val writeSource = DataSource.lookupDataSource(write, spark.sqlContext.conf).
-        getConstructor().newInstance()
 
-      (table, writeSource, trigger) match {
+      (sourceTable, sinkTable, trigger) match {
         // Valid microbatch queries.
-        case (_: SupportsMicroBatchRead, _: StreamingWriteSupportProvider, t)
+        case (_: SupportsMicroBatchRead, _: SupportsStreamingWrite, t)
             if !t.isInstanceOf[ContinuousTrigger] =>
           testPositiveCase(read, write, trigger)
 
         // Valid continuous queries.
-        case (_: SupportsContinuousRead, _: StreamingWriteSupportProvider,
+        case (_: SupportsContinuousRead, _: SupportsStreamingWrite,
               _: ContinuousTrigger) =>
           testPositiveCase(read, write, trigger)
 
@@ -342,12 +360,12 @@ class StreamingDataSourceV2Suite extends StreamTest {
             s"Data source $read does not support streamed reading")
 
         // Invalid - can't write
-        case (_, w, _) if !w.isInstanceOf[StreamingWriteSupportProvider] =>
+        case (_, w, _) if !w.isInstanceOf[SupportsStreamingWrite] =>
           testNegativeCase(read, write, trigger,
             s"Data source $write does not support streamed writing")
 
         // Invalid - trigger is continuous but reader is not
-        case (r, _: StreamingWriteSupportProvider, _: ContinuousTrigger)
+        case (r, _: SupportsStreamingWrite, _: ContinuousTrigger)
             if !r.isInstanceOf[SupportsContinuousRead] =>
           testNegativeCase(read, write, trigger,
             s"Data source $read does not support continuous processing")

From 865c88f9c735b15dd1a0d275533f086665e8abd8 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Tue, 19 Feb 2019 09:42:21 +0800
Subject: [PATCH 0484/1072] [MINOR][DOC] Add note regarding proper usage of
 QueryExecution.toRdd

## What changes were proposed in this pull request?

This proposes adding a note on `QueryExecution.toRdd` regarding Spark's internal optimization callers would need to indicate.

## How was this patch tested?

This patch is a documentation change.

Closes #23822 from HeartSaVioR/MINOR-doc-add-note-query-execution-to-rdd.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../apache/spark/sql/execution/QueryExecution.scala   | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 72499aa936a5..49d6acf65dd1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -85,7 +85,16 @@ class QueryExecution(
     prepareForExecution(sparkPlan)
   }
 
-  /** Internal version of the RDD. Avoids copies and has no schema */
+  /**
+   * Internal version of the RDD. Avoids copies and has no schema.
+   * Note for callers: Spark may apply various optimization including reusing object: this means
+   * the row is valid only for the iteration it is retrieved. You should avoid storing row and
+   * accessing after iteration. (Calling `collect()` is one of known bad usage.)
+   * If you want to store these rows into collection, please apply some converter or copy row
+   * which produces new object per iteration.
+   * Given QueryExecution is not a public class, end users are discouraged to use this: please
+   * use `Dataset.rdd` instead where conversion will be applied.
+   */
   lazy val toRdd: RDD[InternalRow] = executedPlan.execute()
 
   /**

From 743b73daf7fbbb6cd0f763955ed331ac3889ba6f Mon Sep 17 00:00:00 2001
From: yucai <yyu1@ebay.com>
Date: Tue, 19 Feb 2019 13:01:10 +0800
Subject: [PATCH 0485/1072] [SPARK-26909][FOLLOWUP][SQL] use
 unsafeRow.hashCode() as hash value in HashAggregate

## What changes were proposed in this pull request?

This is a followup PR for #21149.

New way uses unsafeRow.hashCode() as hash value in HashAggregate.
The unsafe row has [null bit set] etc., so the hash should be different from shuffle hash, and then we don't need a special seed.

## How was this patch tested?

UTs.

Closes #23821 from yucai/unsafe_hash.

Authored-by: yucai <yyu1@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/aggregate/HashAggregateExec.scala    | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 17cc7fde42bb..23ae1f0e2590 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -742,6 +742,7 @@ case class HashAggregateExec(
     val fastRowKeys = ctx.generateExpressions(
       bindReferences[Expression](groupingExpressions, child.output))
     val unsafeRowKeys = unsafeRowKeyCode.value
+    val unsafeRowKeyHash = ctx.freshName("unsafeRowKeyHash")
     val unsafeRowBuffer = ctx.freshName("unsafeRowAggBuffer")
     val fastRowBuffer = ctx.freshName("fastAggBuffer")
 
@@ -755,13 +756,6 @@ case class HashAggregateExec(
       }
     }
 
-    // generate hash code for key
-    // SPARK-24076: HashAggregate uses the same hash algorithm on the same expressions
-    // as ShuffleExchange, it may lead to bad hash conflict when shuffle.partitions=8192*n,
-    // pick a different seed to avoid this conflict
-    val hashExpr = Murmur3Hash(groupingExpressions, 48)
-    val hashEval = BindReferences.bindReference(hashExpr, child.output).genCode(ctx)
-
     val (checkFallbackForGeneratedHashMap, checkFallbackForBytesToBytesMap, resetCounter,
     incCounter) = if (testFallbackStartsAt.isDefined) {
       val countTerm = ctx.addMutableState(CodeGenerator.JAVA_INT, "fallbackCounter")
@@ -777,11 +771,11 @@ case class HashAggregateExec(
       s"""
          |// generate grouping key
          |${unsafeRowKeyCode.code}
-         |${hashEval.code}
+         |int $unsafeRowKeyHash = ${unsafeRowKeyCode.value}.hashCode();
          |if ($checkFallbackForBytesToBytesMap) {
          |  // try to get the buffer from hash map
          |  $unsafeRowBuffer =
-         |    $hashMapTerm.getAggregationBufferFromUnsafeRow($unsafeRowKeys, ${hashEval.value});
+         |    $hashMapTerm.getAggregationBufferFromUnsafeRow($unsafeRowKeys, $unsafeRowKeyHash);
          |}
          |// Can't allocate buffer from the hash map. Spill the map and fallback to sort-based
          |// aggregation after processing all input rows.
@@ -795,7 +789,7 @@ case class HashAggregateExec(
          |  // the hash map had be spilled, it should have enough memory now,
          |  // try to allocate buffer again.
          |  $unsafeRowBuffer = $hashMapTerm.getAggregationBufferFromUnsafeRow(
-         |    $unsafeRowKeys, ${hashEval.value});
+         |    $unsafeRowKeys, $unsafeRowKeyHash);
          |  if ($unsafeRowBuffer == null) {
          |    // failed to allocate the first page
          |    throw new $oomeClassName("No enough memory for aggregation");

From 885aa553c5e8f478b370f8a733102b67f6cd2d99 Mon Sep 17 00:00:00 2001
From: joelgenter <joelgenter@outlook.com>
Date: Tue, 19 Feb 2019 08:40:59 -0600
Subject: [PATCH 0486/1072] [MINOR][DOCS] Fix the update rule in
 StreamingKMeansModel documentation

## What changes were proposed in this pull request?
The formatting for the update rule (in the documentation) now appears as
![image](https://user-images.githubusercontent.com/14948437/52933807-5a0c7980-3309-11e9-8573-642a73e77c26.png)
instead of
![image](https://user-images.githubusercontent.com/14948437/52933897-a8ba1380-3309-11e9-8e16-e47c27b4a044.png)

Closes #23819 from joelgenter/patch-1.

Authored-by: joelgenter <joelgenter@outlook.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/mllib/clustering/StreamingKMeans.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index ed8543da4d4c..ff4ca0ac40fe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -42,8 +42,8 @@ import org.apache.spark.util.random.XORShiftRandom
  * <blockquote>
  *    $$
  *    \begin{align}
- *     c_t+1 &= [(c_t * n_t * a) + (x_t * m_t)] / [n_t + m_t] \\
- *     n_t+1 &= n_t * a + m_t
+ *     c_{t+1} &= [(c_t * n_t * a) + (x_t * m_t)] / [n_t + m_t] \\
+ *     n_{t+1} &= n_t * a + m_t
  *    \end{align}
  *    $$
  * </blockquote>

From e4e4e2b842bffba6805623f2258b27b162b451ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Tue, 19 Feb 2019 13:22:40 -0800
Subject: [PATCH 0487/1072] [SPARK-26891][YARN] Fixing flaky test in
 YarnSchedulerBackendSuite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The test "RequestExecutors reflects node blacklist and is serializable" is flaky because of multi threaded access of the mock task scheduler. For details check [Mockito FAQ (occasional exceptions like: WrongTypeOfReturnValue)](https://github.com/mockito/mockito/wiki/FAQ#is-mockito-thread-safe). So instead of mocking the task scheduler in the test TaskSchedulerImpl is simply subclassed.

This multithreaded access of the `nodeBlacklist()` method is coming from:
1) the unit test thread via calling of the method `prepareRequestExecutors()`
2) the `DriverEndpoint.onStart` which runs a periodic task that ends up calling this method

Existing unittest.

Closes #23801 from attilapiros/SPARK-26891.

Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../cluster/YarnSchedulerBackendSuite.scala   | 37 +++++++++++++++----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 5d285f89f22f..583694412322 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.scheduler.cluster
 
 import java.net.URL
+import java.util.concurrent.atomic.AtomicReference
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
 import scala.language.reflectiveCalls
@@ -32,15 +33,35 @@ import org.apache.spark.ui.TestFilter
 
 class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with LocalSparkContext {
 
+  private var yarnSchedulerBackend: YarnSchedulerBackend = _
+
+  override def afterEach(): Unit = {
+    try {
+      if (yarnSchedulerBackend != null) {
+        yarnSchedulerBackend.stop()
+      }
+    } finally {
+      super.afterEach()
+    }
+  }
+
   test("RequestExecutors reflects node blacklist and is serializable") {
     sc = new SparkContext("local", "YarnSchedulerBackendSuite")
-    val sched = mock[TaskSchedulerImpl]
-    when(sched.sc).thenReturn(sc)
-    val yarnSchedulerBackend = new YarnSchedulerBackend(sched, sc) {
+    // Subclassing the TaskSchedulerImpl here instead of using Mockito. For details see SPARK-26891.
+    val sched = new TaskSchedulerImpl(sc) {
+      val blacklistedNodes = new AtomicReference[Set[String]]()
+
+      def setNodeBlacklist(nodeBlacklist: Set[String]): Unit = blacklistedNodes.set(nodeBlacklist)
+
+      override def nodeBlacklist(): Set[String] = blacklistedNodes.get()
+    }
+
+    val yarnSchedulerBackendExtended = new YarnSchedulerBackend(sched, sc) {
       def setHostToLocalTaskCount(hostToLocalTaskCount: Map[String, Int]): Unit = {
         this.hostToLocalTaskCount = hostToLocalTaskCount
       }
     }
+    yarnSchedulerBackend = yarnSchedulerBackendExtended
     val ser = new JavaSerializer(sc.conf).newInstance()
     for {
       blacklist <- IndexedSeq(Set[String](), Set("a", "b", "c"))
@@ -50,9 +71,9 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
         Map("a" -> 1, "b" -> 2)
       )
     } {
-      yarnSchedulerBackend.setHostToLocalTaskCount(hostToLocalCount)
-      when(sched.nodeBlacklist()).thenReturn(blacklist)
-      val req = yarnSchedulerBackend.prepareRequestExecutors(numRequested)
+      yarnSchedulerBackendExtended.setHostToLocalTaskCount(hostToLocalCount)
+      sched.setNodeBlacklist(blacklist)
+      val req = yarnSchedulerBackendExtended.prepareRequestExecutors(numRequested)
       assert(req.requestedTotal === numRequested)
       assert(req.nodeBlacklist === blacklist)
       assert(req.hostToLocalTaskCount.keySet.intersect(blacklist).isEmpty)
@@ -75,9 +96,9 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
     // Before adding the "YARN" filter, should get the code from the filter in SparkConf.
     assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_BAD_GATEWAY)
 
-    val backend = new YarnSchedulerBackend(sched, sc) { }
+    yarnSchedulerBackend = new YarnSchedulerBackend(sched, sc) { }
 
-    backend.addWebUIFilter(classOf[TestFilter2].getName(),
+    yarnSchedulerBackend.addWebUIFilter(classOf[TestFilter2].getName(),
       Map("responseCode" -> HttpServletResponse.SC_NOT_ACCEPTABLE.toString), "")
 
     sc.ui.get.getHandlers.foreach { h =>

From 6b3c832dacbc724e6e4e60b3009d165ad281fbaf Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@pigscanfly.ca>
Date: Tue, 19 Feb 2019 13:49:47 -0800
Subject: [PATCH 0488/1072] [SPARK-26882] Check the Kubernetes integration
 tests scalatyle

## What changes were proposed in this pull request?

Add the kubernetes integration tests to the scalastyle profiles.

## How was this patch tested?

Run ./dev/scalastyle with a bad change manually

## Follow on work

See SPARK-26898 to add scalastyle for k8s integration to the CI

Closes #23792 from holdenk/SPARK-26882-check-k8s-integration-tests-when-linting.

Authored-by: Holden Karau <holden@pigscanfly.ca>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 dev/scalastyle                                           | 1 +
 .../k8s/integrationtest/backend/minikube/Minikube.scala  | 9 ++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/dev/scalastyle b/dev/scalastyle
index ff6dba5b536a..d5cebb3943a9 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -29,6 +29,7 @@ ERRORS=$(echo -e "q\n" \
         -Phive-thriftserver \
         -Pspark-ganglia-lgpl \
         -Pdocker-integration-tests \
+	-Pkubernetes-integration-tests \
         scalastyle test:scalastyle \
     | awk '{if($1~/error/)print}' \
 )
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index 58aa177182a0..8f406d8ae976 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -66,9 +66,12 @@ private[spark] object Minikube extends Logging {
     val kubernetesConf = new ConfigBuilder()
       .withApiVersion("v1")
       .withMasterUrl(kubernetesMaster)
-      .withCaCertFile(Paths.get(userHome, MINIKUBE_PATH, "ca.crt").toFile.getAbsolutePath)
-      .withClientCertFile(Paths.get(userHome, MINIKUBE_PATH, "apiserver.crt").toFile.getAbsolutePath)
-      .withClientKeyFile(Paths.get(userHome, MINIKUBE_PATH, "apiserver.key").toFile.getAbsolutePath)
+      .withCaCertFile(
+        Paths.get(userHome, MINIKUBE_PATH, "ca.crt").toFile.getAbsolutePath)
+      .withClientCertFile(
+        Paths.get(userHome, MINIKUBE_PATH, "apiserver.crt").toFile.getAbsolutePath)
+      .withClientKeyFile(
+        Paths.get(userHome, MINIKUBE_PATH, "apiserver.key").toFile.getAbsolutePath)
       .build()
     new DefaultKubernetesClient(kubernetesConf)
   }

From 61c3cdc7061cb503818d64c897d8144af9ef8554 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 19 Feb 2019 15:19:59 -0800
Subject: [PATCH 0489/1072] [SPARK-24894][K8S] Make sure valid host names are
 created for executors.

Since the host name is derived from the app name, which can contain arbitrary
characters, it needs to be sanitized so that only valid characters are allowed.

On top of that, take extra care that truncation doesn't leave characters that
are valid except at the start of a host name.

Closes #23781 from vanzin/SPARK-24894.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../k8s/features/BasicExecutorFeatureStep.scala       |  5 +++++
 .../k8s/features/BasicExecutorFeatureStepSuite.scala  | 11 +++++++++++
 2 files changed, 16 insertions(+)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 6c3a6b39fa5c..af162839fef7 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -83,6 +83,11 @@ private[spark] class BasicExecutorFeatureStep(
     // name as the hostname.  This preserves uniqueness since the end of name contains
     // executorId
     val hostname = name.substring(Math.max(0, name.length - 63))
+      // Remove non-word characters from the start of the hostname
+      .replaceAll("^[^\\w]+", "")
+      // Replace dangerous characters in the remaining string with a safe alternative.
+      .replaceAll("[^\\w-]+", "_")
+
     val executorMemoryQuantity = new QuantityBuilder(false)
       .withAmount(s"${executorMemoryTotal}Mi")
       .build()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index 36bfb7d41ec3..cb4cccc06303 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -22,6 +22,7 @@ import java.nio.file.Files
 
 import scala.collection.JavaConverters._
 
+import com.google.common.net.InternetDomainName
 import io.fabric8.kubernetes.api.model._
 import org.scalatest.BeforeAndAfter
 
@@ -124,6 +125,16 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     assert(step.configurePod(SparkPod.initialPod()).pod.getSpec.getHostname.length === 63)
   }
 
+  test("hostname truncation generates valid host names") {
+    val invalidPrefix = "abcdef-*_/[]{}+==.,;'\"-----------------------------------------------"
+
+    baseConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, invalidPrefix)
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+    val hostname = step.configurePod(SparkPod.initialPod()).pod.getSpec().getHostname()
+    assert(hostname.length <= 63)
+    assert(InternetDomainName.isValid(hostname))
+  }
+
   test("classpath and extra java options get translated into environment variables") {
     baseConf.set(config.EXECUTOR_JAVA_OPTIONS, "foo=bar")
     baseConf.set(config.EXECUTOR_CLASS_PATH, "bar=baz")

From ab850c02f7f2a2fd7edea99c1279d23a01aeab34 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 20 Feb 2019 10:24:40 +0800
Subject: [PATCH 0490/1072] [SPARK-26901][SQL][R] Adds child's output into
 references to avoid column-pruning for vectorized gapply()

## What changes were proposed in this pull request?

Currently, looks column pruning is done to vectorized `gapply()`. Given R native function could use all referred fields so it shouldn't be pruned. To avoid this, it adds child's output into `references` like `OutputConsumer`.

```
$ ./bin/sparkR --conf spark.sql.execution.arrow.enabled=true
```

```r
df <- createDataFrame(mtcars)
explain(count(groupBy(gapply(df,
                             "gear",
                             function(key, group) {
                               data.frame(gear = key[[1]], disp = mean(group$disp))
                             },
                             structType("gear double, disp double")))), TRUE)
```

**Before:**

```
== Optimized Logical Plan ==
Aggregate [count(1) AS count#41L]
+- Project
   +- FlatMapGroupsInRWithArrow [...]
      +- Project [gear#9]
         +- LogicalRDD [mpg#0, cyl#1, disp#2, hp#3, drat#4, wt#5, qsec#6, vs#7, am#8, gear#9, carb#10], false

== Physical Plan ==
*(4) HashAggregate(keys=[], functions=[count(1)], output=[count#41L])
+- Exchange SinglePartition
   +- *(3) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#44L])
      +- *(3) Project
         +- FlatMapGroupsInRWithArrow [...]
            +- *(2) Sort [gear#9 ASC NULLS FIRST], false, 0
               +- Exchange hashpartitioning(gear#9, 200)
                  +- *(1) Project [gear#9]
                     +- *(1) Scan ExistingRDD arrow[mpg#0,cyl#1,disp#2,hp#3,drat#4,wt#5,qsec#6,vs#7,am#8,gear#9,carb#10]
```

**After:**

```
== Optimized Logical Plan ==
Aggregate [count(1) AS count#91L]
+- Project
   +- FlatMapGroupsInRWithArrow [...]
      +- LogicalRDD [mpg#0, cyl#1, disp#2, hp#3, drat#4, wt#5, qsec#6, vs#7, am#8, gear#9, carb#10], false

== Physical Plan ==
*(4) HashAggregate(keys=[], functions=[count(1)], output=[count#91L])
+- Exchange SinglePartition
   +- *(3) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#94L])
      +- *(3) Project
         +- FlatMapGroupsInRWithArrow [...]
            +- *(2) Sort [gear#9 ASC NULLS FIRST], false, 0
               +- Exchange hashpartitioning(gear#9, 200)
                  +- *(1) Scan ExistingRDD arrow[mpg#0,cyl#1,disp#2,hp#3,drat#4,wt#5,qsec#6,vs#7,am#8,gear#9,carb#10]
```

Currently, it adds corrupt values for missing columns (via pruned columnar batches to Arrow writers that requires non-pruned columns) such as:

```r
...
  c(7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 7.90505033345994e-323, 0, 0, 4.17777978645388e-314)
  c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1.04669129845114e+219)
  c(3.4482690635875e-313, 3.4482690635875e-313, 3.4482690635875e-313,
  c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.47032822920623e-323)
...
```

which should be something like:

```r
...
  c(4, 4, 1, 2, 2, 4, 4, 1, 2, 1, 1, 2)
  c(26, 30.4, 15.8, 19.7, 15)
  c(4, 4, 8, 6, 8)
  c(120.3, 95.1, 351, 145, 301)
...
```

## How was this patch tested?

Manually tested, and unit tests were added.

The test code is basiaclly:

```r
df <- createDataFrame(mtcars)
count(gapply(df,
             c("gear"),
             function(key, group) {
                stopifnot(all(group$hp > 50))
                group
             },
             schema(df)))
```

`mtcars`'s hp is all more then 50.

```r
> mtcars$hp > 50
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE
```

However, due to corrpt value, (like 0 or 7.xxxxx), werid values were found. So, it's currently being failed as below in the master:

```
Error in handleErrors(returnStatus, conn) :
  org.apache.spark.SparkException: Job aborted due to stage failure: Task 82 in stage 1.0 failed 1 times, most recent failure: Lost task 82.0 in stage 1.0 (TID 198, localhost, executor driver): org.apache.spark.SparkException: R worker exited unexpectedly (crashed)
 Error in computeFunc(key, inputData) : all(group$hp > 50) is not TRUE
Error in computeFunc(key, inputData) : all(group$hp > 50) is not TRUE
Error in computeFunc(key, inputData) : all(group$hp > 50) is not TRUE
```

I also compared the total length while I am here. Regular `gapply` without Arrow has some holes .. so I had to compare the results with R data frame.

Closes #23810 from HyukjinKwon/SPARK-26901.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/tests/fulltests/test_sparkSQL.R                         | 4 ++++
 .../org/apache/spark/sql/catalyst/plans/logical/object.scala  | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 9dc699c09a1e..4d1360b2a03a 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3564,11 +3564,15 @@ test_that("gapply() Arrow optimization", {
                      stopifnot(is.numeric(key[[1]]))
                    }
                    stopifnot(class(grouped) == "data.frame")
+                   stopifnot(length(colnames(grouped)) == 11)
+                   # mtcars' hp is more then 50.
+                   stopifnot(all(grouped$hp > 50))
                    grouped
                  },
                  schema(df))
     actual <- collect(ret)
     expect_equal(actual, expected)
+    expect_equal(count(ret), nrow(mtcars))
   },
   finally = {
     # Resetting the conf back to default value
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index 58bb1915b3c7..f875af37de27 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -499,6 +499,8 @@ case class FlatMapGroupsInRWithArrow(
     keyDeserializer: Expression,
     groupingAttributes: Seq[Attribute],
     child: LogicalPlan) extends UnaryNode {
+  // This operator always need all columns of its child, even it doesn't reference to.
+  override def references: AttributeSet = child.outputSet
 
   override protected def stringArgs: Iterator[Any] = Iterator(
     inputSchema, StructType.fromAttributes(output), keyDeserializer, groupingAttributes, child)

From 3c15d8b71c1515dd7f599a902d888fa0c33d40f0 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 20 Feb 2019 11:35:17 +0800
Subject: [PATCH 0491/1072] [SPARK-26762][SQL][R] Arrow optimization for
 conversion from Spark DataFrame to R DataFrame

## What changes were proposed in this pull request?

This PR targets to support Arrow optimization for conversion from Spark DataFrame to R DataFrame.
Like PySpark side, it falls back to non-optimization code path when it's unable to use Arrow optimization.

This can be tested as below:

```bash
$ ./bin/sparkR --conf spark.sql.execution.arrow.enabled=true
```

```r
collect(createDataFrame(mtcars))
```

### Requirements
  - R 3.5.x
  - Arrow package 0.12+
    ```bash
    Rscript -e 'remotes::install_github("apache/arrowapache-arrow-0.12.0", subdir = "r")'
    ```

**Note:** currently, Arrow R package is not in CRAN. Please take a look at ARROW-3204.
**Note:** currently, Arrow R package seems not supporting Windows. Please take a look at ARROW-3204.

### Benchmarks

**Shall**

```bash
sync && sudo purge
./bin/sparkR --conf spark.sql.execution.arrow.enabled=false --driver-memory 4g
```

```bash
sync && sudo purge
./bin/sparkR --conf spark.sql.execution.arrow.enabled=true --driver-memory 4g
```

**R code**

```r
df <- cache(createDataFrame(read.csv("500000.csv")))
count(df)

test <- function() {
  options(digits.secs = 6) # milliseconds
  start.time <- Sys.time()
  collect(df)
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  print(time.taken)
}

test()
```

**Data (350 MB):**

```r
object.size(read.csv("500000.csv"))
350379504 bytes
```

"500000 Records"  http://eforexcel.com/wp/downloads-16-sample-csv-files-data-sets-for-testing/

**Results**

```
Time difference of 221.32014 secs
```

```
Time difference of 15.51145 secs
```

The performance improvement was around **1426%**.

### Limitations:

- For now, Arrow optimization with R does not support when the data is `raw`, and when user explicitly gives float type in the schema. They produce corrupt values. In this case, we decide to fall back to non-optimization code path.

- Due to ARROW-4512, it cannot send and receive batch by batch. It has to send all batches in Arrow stream format at once. It needs improvement later.

## How was this patch tested?

Existing tests related with Arrow optimization cover this change. Also, manually tested.

Closes #23760 from HyukjinKwon/SPARK-26762.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/DataFrame.R                           | 56 +++++++++++++++
 R/pkg/tests/fulltests/test_sparkSQL.R         | 21 +++++-
 .../apache/spark/api/python/PythonRDD.scala   |  6 ++
 .../scala/org/apache/spark/api/r/RRDD.scala   |  9 ++-
 .../scala/org/apache/spark/sql/Dataset.scala  | 72 +++++++++++++++++--
 5 files changed, 153 insertions(+), 11 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 24ed449f2a7d..fe836bf4c1b3 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1177,11 +1177,67 @@ setMethod("dim",
 setMethod("collect",
           signature(x = "SparkDataFrame"),
           function(x, stringsAsFactors = FALSE) {
+            connectionTimeout <- as.numeric(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
+            useArrow <- FALSE
+            arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
+            if (arrowEnabled) {
+              useArrow <- tryCatch({
+                requireNamespace1 <- requireNamespace
+                if (!requireNamespace1("arrow", quietly = TRUE)) {
+                  stop("'arrow' package should be installed.")
+                }
+                # Currenty Arrow optimization does not support raw for now.
+                # Also, it does not support explicit float type set by users.
+                if (inherits(schema(x), "structType")) {
+                  if (any(sapply(schema(x)$fields(),
+                                 function(x) x$dataType.toString() == "FloatType"))) {
+                    stop(paste0("Arrow optimization in the conversion from Spark DataFrame to R ",
+                                "DataFrame does not support FloatType yet."))
+                  }
+                  if (any(sapply(schema(x)$fields(),
+                                 function(x) x$dataType.toString() == "BinaryType"))) {
+                    stop(paste0("Arrow optimization in the conversion from Spark DataFrame to R ",
+                                "DataFrame does not support BinaryType yet."))
+                  }
+                }
+                TRUE
+              }, error = function(e) {
+                warning(paste0("The conversion from Spark DataFrame to R DataFrame was attempted ",
+                               "with Arrow optimization because ",
+                               "'spark.sql.execution.arrow.enabled' is set to true; however, ",
+                               "failed, attempting non-optimization. Reason: ",
+                               e))
+                FALSE
+              })
+            }
+
             dtypes <- dtypes(x)
             ncol <- length(dtypes)
             if (ncol <= 0) {
               # empty data.frame with 0 columns and 0 rows
               data.frame()
+            } else if (useArrow) {
+              requireNamespace1 <- requireNamespace
+              if (requireNamespace1("arrow", quietly = TRUE)) {
+                read_arrow <- get("read_arrow", envir = asNamespace("arrow"), inherits = FALSE)
+                as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+
+                portAuth <- callJMethod(x@sdf, "collectAsArrowToR")
+                port <- portAuth[[1]]
+                authSecret <- portAuth[[2]]
+                conn <- socketConnection(
+                  port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
+                output <- tryCatch({
+                  doServerAuth(conn, authSecret)
+                  arrowTable <- read_arrow(readRaw(conn))
+                  as.data.frame(as_tibble(arrowTable), stringsAsFactors = stringsAsFactors)
+                }, finally = {
+                  close(conn)
+                })
+                return(output)
+              } else {
+                stop("'arrow' package should be installed.")
+              }
             } else {
               # listCols is a list of columns
               listCols <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "dfToCols", x@sdf)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 4d1360b2a03a..19c41ef44943 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -307,7 +307,7 @@ test_that("create DataFrame from RDD", {
   unsetHiveContext()
 })
 
-test_that("createDataFrame Arrow optimization", {
+test_that("createDataFrame/collect Arrow optimization", {
   skip_if_not_installed("arrow")
 
   conf <- callJMethod(sparkSession, "conf")
@@ -332,7 +332,24 @@ test_that("createDataFrame Arrow optimization", {
   })
 })
 
-test_that("createDataFrame Arrow optimization - type specification", {
+test_that("createDataFrame/collect Arrow optimization - many partitions (partition order test)", {
+  skip_if_not_installed("arrow")
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    expect_equal(collect(createDataFrame(mtcars, numPartitions = 32)),
+                 collect(createDataFrame(mtcars, numPartitions = 1)))
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("createDataFrame/collect Arrow optimization - type specification", {
   skip_if_not_installed("arrow")
   rdf <- data.frame(list(list(a = 1,
                               b = "a",
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 14ea289e5f90..0937a63dad19 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -430,6 +430,12 @@ private[spark] object PythonRDD extends Logging {
    */
   private[spark] def serveToStream(
       threadName: String)(writeFunc: OutputStream => Unit): Array[Any] = {
+    serveToStream(threadName, authHelper)(writeFunc)
+  }
+
+  private[spark] def serveToStream(
+      threadName: String, authHelper: SocketAuthHelper)(writeFunc: OutputStream => Unit)
+    : Array[Any] = {
     val (port, secret) = PythonServer.setupOneConnectionServer(authHelper, threadName) { s =>
       val out = new BufferedOutputStream(s.getOutputStream())
       Utils.tryWithSafeFinally {
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
index 1dc61c7eef33..04fc6e18c1e5 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.api.r
 
-import java.io.{DataInputStream, File}
+import java.io.{DataInputStream, File, OutputStream}
 import java.net.Socket
 import java.nio.charset.StandardCharsets.UTF_8
 import java.util.{Map => JMap}
@@ -104,7 +104,7 @@ private class StringRRDD[T: ClassTag](
   lazy val asJavaRDD : JavaRDD[String] = JavaRDD.fromRDD(this)
 }
 
-private[r] object RRDD {
+private[spark] object RRDD {
   def createSparkContext(
       master: String,
       appName: String,
@@ -165,6 +165,11 @@ private[r] object RRDD {
   JavaRDD[Array[Byte]] = {
     PythonRDD.readRDDFromFile(jsc, fileName, parallelism)
   }
+
+  private[spark] def serveToStream(
+      threadName: String)(writeFunc: OutputStream => Unit): Array[Any] = {
+    PythonRDD.serveToStream(threadName, new RSocketAuthHelper())(writeFunc)
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 8a26152271a8..bd1ae509cf54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.io.{CharArrayWriter, DataOutputStream}
+import java.io.{ByteArrayOutputStream, CharArrayWriter, DataOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
@@ -31,6 +31,7 @@ import org.apache.spark.annotation.{DeveloperApi, Evolving, Experimental, Stable
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.function._
 import org.apache.spark.api.python.{PythonRDD, SerDeUtil}
+import org.apache.spark.api.r.RRDD
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
@@ -3198,9 +3199,66 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Collect a Dataset as Arrow batches and serve stream to PySpark.
+   * Collect a Dataset as Arrow batches and serve stream to SparkR. It sends
+   * arrow batches in an ordered manner with buffering. This is inevitable
+   * due to missing R API that reads batches from socket directly. See ARROW-4512.
+   * Eventually, this code should be deduplicated by `collectAsArrowToPython`.
    */
-  private[sql] def collectAsArrowToPython(): Array[Any] = {
+  private[sql] def collectAsArrowToR(): Array[Any] = {
+    val timeZoneId = sparkSession.sessionState.conf.sessionLocalTimeZone
+
+    withAction("collectAsArrowToR", queryExecution) { plan =>
+      RRDD.serveToStream("serve-Arrow") { outputStream =>
+        val buffer = new ByteArrayOutputStream()
+        val out = new DataOutputStream(outputStream)
+        val batchWriter = new ArrowBatchStreamWriter(schema, buffer, timeZoneId)
+        val arrowBatchRdd = toArrowBatchRdd(plan)
+        val numPartitions = arrowBatchRdd.partitions.length
+
+        // Store collection results for worst case of 1 to N-1 partitions
+        val results = new Array[Array[Array[Byte]]](numPartitions - 1)
+        var lastIndex = -1  // index of last partition written
+
+        // Handler to eagerly write partitions to Python in order
+        def handlePartitionBatches(index: Int, arrowBatches: Array[Array[Byte]]): Unit = {
+          // If result is from next partition in order
+          if (index - 1 == lastIndex) {
+            batchWriter.writeBatches(arrowBatches.iterator)
+            lastIndex += 1
+            // Write stored partitions that come next in order
+            while (lastIndex < results.length && results(lastIndex) != null) {
+              batchWriter.writeBatches(results(lastIndex).iterator)
+              results(lastIndex) = null
+              lastIndex += 1
+            }
+            // After last batch, end the stream
+            if (lastIndex == results.length) {
+              batchWriter.end()
+              val batches = buffer.toByteArray
+              out.writeInt(batches.length)
+              out.write(batches)
+            }
+          } else {
+            // Store partitions received out of order
+            results(index - 1) = arrowBatches
+          }
+        }
+
+        sparkSession.sparkContext.runJob(
+          arrowBatchRdd,
+          (ctx: TaskContext, it: Iterator[Array[Byte]]) => it.toArray,
+          0 until numPartitions,
+          handlePartitionBatches)
+      }
+    }
+  }
+
+  /**
+   * Collect a Dataset as Arrow batches and serve stream to PySpark. It sends
+   * arrow batches in an un-ordered manner without buffering, and then batch order
+   * information at the end. The batches should be reordered at Python side.
+   */
+  private[sql] def collectAsArrowToPython: Array[Any] = {
     val timeZoneId = sparkSession.sessionState.conf.sessionLocalTimeZone
 
     withAction("collectAsArrowToPython", queryExecution) { plan =>
@@ -3211,7 +3269,7 @@ class Dataset[T] private[sql](
         val numPartitions = arrowBatchRdd.partitions.length
 
         // Batches ordered by (index of partition, batch index in that partition) tuple
-        val batchOrder = new ArrayBuffer[(Int, Int)]()
+        val batchOrder = ArrayBuffer.empty[(Int, Int)]
         var partitionCount = 0
 
         // Handler to eagerly write batches to Python as they arrive, un-ordered
@@ -3220,7 +3278,7 @@ class Dataset[T] private[sql](
             // Write all batches (can be more than 1) in the partition, store the batch order tuple
             batchWriter.writeBatches(arrowBatches.iterator)
             arrowBatches.indices.foreach {
-              partition_batch_index => batchOrder.append((index, partition_batch_index))
+              partitionBatchIndex => batchOrder.append((index, partitionBatchIndex))
             }
           }
           partitionCount += 1
@@ -3232,8 +3290,8 @@ class Dataset[T] private[sql](
             // Sort by (index of partition, batch index in that partition) tuple to get the
             // overall_batch_index from 0 to N-1 batches, which can be used to put the
             // transferred batches in the correct order
-            batchOrder.zipWithIndex.sortBy(_._1).foreach { case (_, overall_batch_index) =>
-              out.writeInt(overall_batch_index)
+            batchOrder.zipWithIndex.sortBy(_._1).foreach { case (_, overallBatchIndex) =>
+              out.writeInt(overallBatchIndex)
             }
             out.flush()
           }

From f87153a3acbab9260db356a451ddae86adba41b2 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 19 Feb 2019 20:29:11 -0800
Subject: [PATCH 0492/1072] [SPARK-26916][SS] Upgrade to Kafka 2.1.1

## What changes were proposed in this pull request?

As a part of preparing the official JDK 11 support ([SPARK-24417](https://issues.apache.org/jira/browse/SPARK-24417)), Spark 3.0.0 upgraded KAFKA version to 2.1.0. This PR updates Kafka dependency to 2.1.1 to bring the following 42 bug fixes.
- https://issues.apache.org/jira/projects/KAFKA/versions/12344250

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #23831 from dongjoon-hyun/SPARK-26916.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index cbac15f1dfad..746541b7114c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
     <!-- Version used for internal directory structure -->
     <hive.version.short>1.2.1</hive.version.short>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
-    <kafka.version>2.1.0</kafka.version>
+    <kafka.version>2.1.1</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
     <orc.version>1.5.4</orc.version>

From 096552ae4d6fcef5e20c54384a2687db41ba2fa1 Mon Sep 17 00:00:00 2001
From: Ivan Vergiliev <ivan.vergiliev@gmail.com>
Date: Wed, 20 Feb 2019 21:49:38 +0800
Subject: [PATCH 0493/1072] [SPARK-26859][SQL] Fix field writer index bug in
 non-vectorized ORC deserializer

## What changes were proposed in this pull request?

This happens in a schema evolution use case only when a user specifies the schema manually and use non-vectorized ORC deserializer code path.

There is a bug in `OrcDeserializer.scala` that results in `null`s being set at the wrong column position, and for state from previous records to remain uncleared in next records. There are more details for when exactly the bug gets triggered and what the outcome is in the [JIRA issue](https://jira.apache.org/jira/browse/SPARK-26859).

The high-level summary is that this bug results in severe data correctness issues, but fortunately the set of conditions to expose the bug are complicated and make the surface area somewhat small.

This change fixes the problem and adds a respective test.

## How was this patch tested?

Pass the Jenkins with the newly added test cases.

Closes #23766 from IvanVergiliev/fix-orc-deserializer.

Lead-authored-by: Ivan Vergiliev <ivan.vergiliev@gmail.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/orc/OrcDeserializer.scala     | 34 +++++++++-------
 .../datasources/ReadSchemaSuite.scala         |  6 +++
 .../datasources/ReadSchemaTest.scala          | 39 ++++++++++++++++++-
 3 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
index ee16b3ab07f5..62e16707a8e3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
@@ -37,28 +37,34 @@ class OrcDeserializer(
 
   private val resultRow = new SpecificInternalRow(requiredSchema.map(_.dataType))
 
+  // `fieldWriters(index)` is
+  // - null if the respective source column is missing, since the output value
+  //   is always null in this case
+  // - a function that updates target column `index` otherwise.
   private val fieldWriters: Array[WritableComparable[_] => Unit] = {
     requiredSchema.zipWithIndex
-      // The value of missing columns are always null, do not need writers.
-      .filterNot { case (_, index) => requestedColIds(index) == -1 }
       .map { case (f, index) =>
-        val writer = newWriter(f.dataType, new RowUpdater(resultRow))
-        (value: WritableComparable[_]) => writer(index, value)
+        if (requestedColIds(index) == -1) {
+          null
+        } else {
+          val writer = newWriter(f.dataType, new RowUpdater(resultRow))
+          (value: WritableComparable[_]) => writer(index, value)
+        }
       }.toArray
   }
 
-  private val validColIds = requestedColIds.filterNot(_ == -1)
-
   def deserialize(orcStruct: OrcStruct): InternalRow = {
-    var i = 0
-    while (i < validColIds.length) {
-      val value = orcStruct.getFieldValue(validColIds(i))
-      if (value == null) {
-        resultRow.setNullAt(i)
-      } else {
-        fieldWriters(i)(value)
+    var targetColumnIndex = 0
+    while (targetColumnIndex < fieldWriters.length) {
+      if (fieldWriters(targetColumnIndex) != null) {
+        val value = orcStruct.getFieldValue(requestedColIds(targetColumnIndex))
+        if (value == null) {
+          resultRow.setNullAt(targetColumnIndex)
+        } else {
+          fieldWriters(targetColumnIndex)(value)
+        }
       }
-      i += 1
+      targetColumnIndex += 1
     }
     resultRow
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
index 23c58e175fe5..de234c14c7b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
@@ -72,6 +72,7 @@ class HeaderCSVReadSchemaSuite
 
 class JsonReadSchemaSuite
   extends ReadSchemaSuite
+  with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
   with ChangePositionTest
   with IntegralTypeTest
@@ -84,6 +85,7 @@ class JsonReadSchemaSuite
 
 class OrcReadSchemaSuite
   extends ReadSchemaSuite
+  with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
   with ChangePositionTest {
 
@@ -103,6 +105,7 @@ class OrcReadSchemaSuite
 
 class VectorizedOrcReadSchemaSuite
   extends ReadSchemaSuite
+  with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
   with ChangePositionTest
   with BooleanTypeTest
@@ -125,6 +128,7 @@ class VectorizedOrcReadSchemaSuite
 
 class ParquetReadSchemaSuite
   extends ReadSchemaSuite
+  with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
   with ChangePositionTest {
 
@@ -144,6 +148,7 @@ class ParquetReadSchemaSuite
 
 class VectorizedParquetReadSchemaSuite
   extends ReadSchemaSuite
+  with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
   with ChangePositionTest {
 
@@ -163,6 +168,7 @@ class VectorizedParquetReadSchemaSuite
 
 class MergedParquetReadSchemaSuite
   extends ReadSchemaSuite
+  with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
   with ChangePositionTest {
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
index 2a5457e00b4e..17d9d43a3e08 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
@@ -69,7 +69,7 @@ trait ReadSchemaTest extends QueryTest with SQLTestUtils with SharedSQLContext {
 }
 
 /**
- * Add column (Case 1).
+ * Add column (Case 1-1).
  * This test suite assumes that the missing column should be `null`.
  */
 trait AddColumnTest extends ReadSchemaTest {
@@ -108,6 +108,43 @@ trait AddColumnTest extends ReadSchemaTest {
   }
 }
 
+/**
+ * Add column into the middle (Case 1-2).
+ */
+trait AddColumnIntoTheMiddleTest extends ReadSchemaTest {
+  import testImplicits._
+
+  test("append column into middle") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      val df1 = Seq((1, 2, "abc"), (4, 5, "def"), (8, 9, null)).toDF("col1", "col2", "col3")
+      val df2 = Seq((10, null, 20, null), (40, "uvw", 50, "xyz"), (80, null, 90, null))
+        .toDF("col1", "col4", "col2", "col3")
+
+      val dir1 = s"$path${File.separator}part=one"
+      val dir2 = s"$path${File.separator}part=two"
+
+      df1.write.format(format).options(options).save(dir1)
+      df2.write.format(format).options(options).save(dir2)
+
+      val df = spark.read
+        .schema(df2.schema)
+        .format(format)
+        .options(options)
+        .load(path)
+
+      checkAnswer(df, Seq(
+        Row(1, null, 2, "abc", "one"),
+        Row(4, null, 5, "def", "one"),
+        Row(8, null, 9, null, "one"),
+        Row(10, null, 20, null, "two"),
+        Row(40, "uvw", 50, "xyz", "two"),
+        Row(80, null, 90, null, "two")))
+    }
+  }
+}
+
 /**
  * Hide column (Case 2-1).
  */

From 74e9e1c192f00920b69aa47813e2ac2f4e9b4325 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Wed, 20 Feb 2019 08:52:46 -0600
Subject: [PATCH 0494/1072] [SPARK-22798][PYTHON][ML] Add multiple column
 support to PySpark StringIndexer

## What changes were proposed in this pull request?

Add multiple column support to PySpark StringIndexer

## How was this patch tested?

Add doctest

Closes #23741 from huaxingao/spark-22798.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/ml/feature.py            | 60 +++++++++++++++++++++----
 python/pyspark/ml/tests/test_wrapper.py |  8 +++-
 python/pyspark/ml/wrapper.py            | 22 +++++++--
 3 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 23d56c8ad357..0d1e9bd9da79 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2290,7 +2290,8 @@ def mean(self):
         return self._call_java("mean")
 
 
-class _StringIndexerParams(JavaParams, HasHandleInvalid, HasInputCol, HasOutputCol):
+class _StringIndexerParams(JavaParams, HasHandleInvalid, HasInputCol, HasOutputCol,
+                           HasInputCols, HasOutputCols):
     """
     Params for :py:attr:`StringIndexer` and :py:attr:`StringIndexerModel`.
     """
@@ -2371,16 +2372,37 @@ class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLW
     >>> sorted(set([(i[0], i[1]) for i in result.select(result.id, result.indexed).collect()]),
     ...     key=lambda x: x[0])
     [(0, 0.0), (1, 1.0), (2, 2.0), (3, 0.0), (4, 0.0), (5, 2.0)]
+    >>> testData = sc.parallelize([Row(id=0, label1="a", label2="e"),
+    ...                            Row(id=1, label1="b", label2="f"),
+    ...                            Row(id=2, label1="c", label2="e"),
+    ...                            Row(id=3, label1="a", label2="f"),
+    ...                            Row(id=4, label1="a", label2="f"),
+    ...                            Row(id=5, label1="c", label2="f")], 3)
+    >>> multiRowDf = spark.createDataFrame(testData)
+    >>> inputs = ["label1", "label2"]
+    >>> outputs = ["index1", "index2"]
+    >>> stringIndexer = StringIndexer(inputCols=inputs, outputCols=outputs)
+    >>> model = stringIndexer.fit(multiRowDf)
+    >>> result = model.transform(multiRowDf)
+    >>> sorted(set([(i[0], i[1], i[2]) for i in result.select(result.id, result.index1,
+    ...     result.index2).collect()]), key=lambda x: x[0])
+    [(0, 0.0, 1.0), (1, 2.0, 0.0), (2, 1.0, 1.0), (3, 0.0, 0.0), (4, 0.0, 0.0), (5, 1.0, 0.0)]
+    >>> fromlabelsModel = StringIndexerModel.from_arrays_of_labels([["a", "b", "c"], ["e", "f"]],
+    ...     inputCols=inputs, outputCols=outputs)
+    >>> result = fromlabelsModel.transform(multiRowDf)
+    >>> sorted(set([(i[0], i[1], i[2]) for i in result.select(result.id, result.index1,
+    ...     result.index2).collect()]), key=lambda x: x[0])
+    [(0, 0.0, 0.0), (1, 1.0, 1.0), (2, 2.0, 0.0), (3, 0.0, 1.0), (4, 0.0, 1.0), (5, 2.0, 1.0)]
 
     .. versionadded:: 1.4.0
     """
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None, handleInvalid="error",
-                 stringOrderType="frequencyDesc"):
+    def __init__(self, inputCol=None, outputCol=None, inputCols=None, outputCols=None,
+                 handleInvalid="error", stringOrderType="frequencyDesc"):
         """
-        __init__(self, inputCol=None, outputCol=None, handleInvalid="error", \
-                 stringOrderType="frequencyDesc")
+        __init__(self, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \
+                 handleInvalid="error", stringOrderType="frequencyDesc")
         """
         super(StringIndexer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid)
@@ -2389,11 +2411,11 @@ def __init__(self, inputCol=None, outputCol=None, handleInvalid="error",
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, inputCol=None, outputCol=None, handleInvalid="error",
-                  stringOrderType="frequencyDesc"):
+    def setParams(self, inputCol=None, outputCol=None, inputCols=None, outputCols=None,
+                  handleInvalid="error", stringOrderType="frequencyDesc"):
         """
-        setParams(self, inputCol=None, outputCol=None, handleInvalid="error", \
-                  stringOrderType="frequencyDesc")
+        setParams(self, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \
+                  handleInvalid="error", stringOrderType="frequencyDesc")
         Sets params for this StringIndexer.
         """
         kwargs = self._input_kwargs
@@ -2436,6 +2458,26 @@ def from_labels(cls, labels, inputCol, outputCol=None, handleInvalid=None):
             model.setHandleInvalid(handleInvalid)
         return model
 
+    @classmethod
+    @since("3.0.0")
+    def from_arrays_of_labels(cls, arrayOfLabels, inputCols, outputCols=None,
+                              handleInvalid=None):
+        """
+        Construct the model directly from an array of array of label strings,
+        requires an active SparkContext.
+        """
+        sc = SparkContext._active_spark_context
+        java_class = sc._gateway.jvm.java.lang.String
+        jlabels = StringIndexerModel._new_java_array(arrayOfLabels, java_class)
+        model = StringIndexerModel._create_from_java_class(
+            "org.apache.spark.ml.feature.StringIndexerModel", jlabels)
+        model.setInputCols(inputCols)
+        if outputCols is not None:
+            model.setOutputCols(outputCols)
+        if handleInvalid is not None:
+            model.setHandleInvalid(handleInvalid)
+        return model
+
     @property
     @since("1.5.0")
     def labels(self):
diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py
index ae672a00c1dc..c01521ee389c 100644
--- a/python/pyspark/ml/tests/test_wrapper.py
+++ b/python/pyspark/ml/tests/test_wrapper.py
@@ -99,7 +99,13 @@ def test_new_java_array(self):
         java_class = self.sc._gateway.jvm.java.lang.Integer
         java_array = JavaWrapper._new_java_array([], java_class)
         self.assertEqual(_java2py(self.sc, java_array), [])
-
+        # test array of array of strings
+        str_list = [["a", "b", "c"], ["d", "e"], ["f", "g", "h", "i"], []]
+        expected_str_list = [("a", "b", "c", None), ("d", "e", None, None), ("f", "g", "h", "i"),
+                             (None, None, None, None)]
+        java_class = self.sc._gateway.jvm.java.lang.String
+        java_array = JavaWrapper._new_java_array(str_list, java_class)
+        self.assertEqual(_java2py(self.sc, java_array), expected_str_list)
 
 if __name__ == "__main__":
     from pyspark.ml.tests.test_wrapper import *
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index d325633195dd..9bb1262a5450 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -71,6 +71,10 @@ def _new_java_array(pylist, java_class):
         """
         Create a Java array of given java_class type. Useful for
         calling a method with a Scala Array from Python with Py4J.
+        If the param pylist is a 2D array, then a 2D java array will be returned.
+        The returned 2D java array is a square, non-jagged 2D array that is big
+        enough for all elements. The empty slots in the inner Java arrays will
+        be filled with null to make the non-jagged 2D array.
 
         :param pylist:
           Python list to convert to a Java Array.
@@ -87,9 +91,21 @@ def _new_java_array(pylist, java_class):
           - bool -> sc._gateway.jvm.java.lang.Boolean
         """
         sc = SparkContext._active_spark_context
-        java_array = sc._gateway.new_array(java_class, len(pylist))
-        for i in xrange(len(pylist)):
-            java_array[i] = pylist[i]
+        java_array = None
+        if len(pylist) > 0 and isinstance(pylist[0], list):
+            # If pylist is a 2D array, then a 2D java array will be created.
+            # The 2D array is a square, non-jagged 2D array that is big enough for all elements.
+            inner_array_length = 0
+            for i in xrange(len(pylist)):
+                inner_array_length = max(inner_array_length, len(pylist[i]))
+            java_array = sc._gateway.new_array(java_class, len(pylist), inner_array_length)
+            for i in xrange(len(pylist)):
+                for j in xrange(len(pylist[i])):
+                    java_array[i][j] = pylist[i][j]
+        else:
+            java_array = sc._gateway.new_array(java_class, len(pylist))
+            for i in xrange(len(pylist)):
+                java_array[i] = pylist[i]
         return java_array
 
 
From 331ac60f2818f191efb583ba4e73467f2a9d7d17 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 20 Feb 2019 08:55:08 -0600
Subject: [PATCH 0495/1072] [SPARK-26900][SQL] Simplify truncation to quarter
 of year

## What changes were proposed in this pull request?

In the PR, I propose to simplify timestamp truncation to quarter of year by using *java.time* API directly. The `LocalDate` instance can be truncation to quarter timestamp via adjusting by chrono field `IsoFields.DAY_OF_QUARTER`.

## How was this patch tested?

This was checked by existing test suite - `DateTimeUtilsSuite`.

Closes #23808 from MaxGekk/date-quarter-of-year.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/catalyst/util/DateTimeUtils.scala    | 14 +++-----------
 sql/core/benchmarks/DateTimeBenchmark-results.txt  |  6 +++---
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index b537695567c3..28fb6a91608f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -719,17 +719,9 @@ object DateTimeUtils {
         daysToMillis(prevMonday, timeZone)
       case TRUNC_TO_QUARTER =>
         val dDays = millisToDays(millis, timeZone)
-        val month = getQuarter(dDays) match {
-          case 1 => Month.JANUARY
-          case 2 => Month.APRIL
-          case 3 => Month.JULY
-          case 4 => Month.OCTOBER
-        }
-        millis = daysToMillis(truncDate(dDays, TRUNC_TO_MONTH), timeZone)
-        val instant = Instant.ofEpochMilli(millis)
-        val localDateTime = LocalDateTime.ofInstant(instant, timeZone.toZoneId)
-        val truncated = localDateTime.withMonth(month.getValue)
-        truncated.atZone(timeZone.toZoneId).toInstant.toEpochMilli
+        val daysOfQuarter = LocalDate.ofEpochDay(dDays)
+          .`with`(IsoFields.DAY_OF_QUARTER, 1L).toEpochDay.toInt
+        daysToMillis(daysOfQuarter, timeZone)
       case _ =>
         // caller make sure that this should never be reached
         sys.error(s"Invalid trunc level: $level")
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 8bbe3108ffb9..bc824afcfa78 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -324,12 +324,12 @@ date_trunc WEEK:                         Best/Avg Time(ms)    Rate(M/s)   Per Ro
 date_trunc WEEK wholestage off                 794 /  797         12.6          79.4       1.0X
 date_trunc WEEK wholestage on                  754 /  761         13.3          75.4       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc QUARTER:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off             5261 / 5271          1.9         526.1       1.0X
-date_trunc QUARTER wholestage on              5145 / 5151          1.9         514.5       1.0X
+date_trunc QUARTER wholestage off             1465 / 1467          6.8         146.5       1.0X
+date_trunc QUARTER wholestage on              1419 / 1423          7.0         141.9       1.0X
 
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz

From eb6fd7eab77d3d5b2e7e827a21b127b146e5c089 Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Wed, 20 Feb 2019 11:45:12 -0800
Subject: [PATCH 0496/1072] =?UTF-8?q?[SPARK-26877][YARN]=20Support=20user-?=
 =?UTF-8?q?level=20app=20staging=20directory=20in=20yarn=20mode=20when=20s?=
 =?UTF-8?q?park.yarn=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, when running applications on yarn mode, the app staging directory of  is controlled by `spark.yarn.stagingDir` config if specified, and this directory cannot separate different users, sometimes, it's inconvenient for file and quota management for users.

Sometimes, there might be an unexpected increasing of the staging files, two possible reasons are:
1. The `spark.yarn.preserve.staging.files` provided can be misused by users
2. cron task constantly starting new applications on non-existent yarn queue(wrong configuration).

But now, we are not easy to find out the which user obtains the most HDFS files or spaces.
what's more, even we want set HDFS name quota or space quota for each user to limit the increase is impossible.

So I propose to add user sub directories under this app staging directory which is more clear.

existing UT

Closes #23786 from liupc/Support-user-level-app-staging-dir.

Authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 6ca81fb97c75..e0dba8c21bf6 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -177,7 +177,8 @@ private[spark] class Client(
 
       // The app staging dir based on the STAGING_DIR configuration if configured
       // otherwise based on the users home directory.
-      val appStagingBaseDir = sparkConf.get(STAGING_DIR).map { new Path(_) }
+      val appStagingBaseDir = sparkConf.get(STAGING_DIR)
+        .map { new Path(_, UserGroupInformation.getCurrentUser.getShortUserName) }
         .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory())
       stagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
 

From 2153b316bda119ede8c80ceda522027a6581031b Mon Sep 17 00:00:00 2001
From: liupengcheng <liupengcheng@xiaomi.com>
Date: Wed, 20 Feb 2019 16:42:55 -0600
Subject: [PATCH 0497/1072] [SPARK-26892][CORE] Fix saveAsTextFile throws
 NullPointerException when null row present

## What changes were proposed in this pull request?

Currently, RDD.saveAsTextFile may throw NullPointerException then null row is present.
```
scala> sc.parallelize(Seq(1,null),1).saveAsTextFile("/tmp/foobar.dat")
19/02/15 21:39:17 ERROR Utils: Aborting task
java.lang.NullPointerException
at org.apache.spark.rdd.RDD.$anonfun$saveAsTextFile$3(RDD.scala:1510)
at scala.collection.Iterator$$anon$10.next(Iterator.scala:459)
at org.apache.spark.internal.io.SparkHadoopWriter$.$anonfun$executeTask$1(SparkHadoopWriter.scala:129)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1352)
at org.apache.spark.internal.io.SparkHadoopWriter$.executeTask(SparkHadoopWriter.scala:127)
at org.apache.spark.internal.io.SparkHadoopWriter$.$anonfun$write$1(SparkHadoopWriter.scala:83)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:425)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1318)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:428)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
```

This PR write "Null" for null row to avoid NPE and fix it.

## How was this patch tested?

NA

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23799 from liupc/Fix-saveAsTextFile-throws-NullPointerException-when-null-row-present.

Lead-authored-by: liupengcheng <liupengcheng@xiaomi.com>
Co-authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/rdd/PairRDDFunctions.scala   |  2 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala | 32 +++----------------
 .../scala/org/apache/spark/FileSuite.scala    |  8 +++++
 3 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 8b5f9bba50dc..7f8064f01ec4 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -1012,7 +1012,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       outputFormatClass: Class[_ <: OutputFormat[_, _]],
       codec: Class[_ <: CompressionCodec]): Unit = self.withScope {
     saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass,
-      new JobConf(self.context.hadoopConfiguration), Some(codec))
+      new JobConf(self.context.hadoopConfiguration), Option(codec))
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index d39f418a21d0..1b67e9906457 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1492,45 +1492,21 @@ abstract class RDD[T: ClassTag](
    * Save this RDD as a text file, using string representations of elements.
    */
   def saveAsTextFile(path: String): Unit = withScope {
-    // https://issues.apache.org/jira/browse/SPARK-2075
-    //
-    // NullWritable is a `Comparable` in Hadoop 1.+, so the compiler cannot find an implicit
-    // Ordering for it and will use the default `null`. However, it's a `Comparable[NullWritable]`
-    // in Hadoop 2.+, so the compiler will call the implicit `Ordering.ordered` method to create an
-    // Ordering for `NullWritable`. That's why the compiler will generate different anonymous
-    // classes for `saveAsTextFile` in Hadoop 1.+ and Hadoop 2.+.
-    //
-    // Therefore, here we provide an explicit Ordering `null` to make sure the compiler generate
-    // same bytecodes for `saveAsTextFile`.
-    val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
-    val textClassTag = implicitly[ClassTag[Text]]
-    val r = this.mapPartitions { iter =>
-      val text = new Text()
-      iter.map { x =>
-        text.set(x.toString)
-        (NullWritable.get(), text)
-      }
-    }
-    RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
-      .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path)
+    saveAsTextFile(path, null)
   }
 
   /**
    * Save this RDD as a compressed text file, using string representations of elements.
    */
   def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit = withScope {
-    // https://issues.apache.org/jira/browse/SPARK-2075
-    val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
-    val textClassTag = implicitly[ClassTag[Text]]
-    val r = this.mapPartitions { iter =>
+    this.mapPartitions { iter =>
       val text = new Text()
       iter.map { x =>
+        require(x != null, "text files do not allow null rows")
         text.set(x.toString)
         (NullWritable.get(), text)
       }
-    }
-    RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
-      .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec)
+    }.saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 983a7917e8aa..c540d7baea98 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -94,6 +94,14 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     assert(compressedFile.length < normalFile.length)
   }
 
+  test("text files do not allow null rows") {
+    sc = new SparkContext("local", "test")
+    val outputDir = new File(tempDir, "output").getAbsolutePath
+    val nums = sc.makeRDD((1 to 100) ++ Seq(null))
+    val exception = intercept[SparkException](nums.saveAsTextFile(outputDir))
+    assert(Utils.exceptionString(exception).contains("text files do not allow null rows"))
+  }
+
   test("SequenceFiles") {
     sc = new SparkContext("local", "test")
     val outputDir = new File(tempDir, "output").getAbsolutePath

From 77b99af57330cf2e5016a6acc69642d54041b041 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Wed, 20 Feb 2019 15:44:20 -0800
Subject: [PATCH 0498/1072] [SPARK-26824][SS] Fix the checkpoint location and
 _spark_metadata when it contains special chars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

When a user specifies a checkpoint location or a file sink output using a path containing special chars that need to be escaped in a path, the streaming query will store checkpoint and file sink metadata in a wrong place. In this PR, I uploaded a checkpoint that was generated by the following codes using Spark 2.4.0 to show this issue:

```
implicit val s = spark.sqlContext
val input = org.apache.spark.sql.execution.streaming.MemoryStream[Int]
input.addData(1, 2, 3)
val q = input.toDF.writeStream.format("parquet").option("checkpointLocation", ".../chk %#chk").start(".../output %#output")
q.stop()
```
Here is the structure of the directory:
```
sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0
├── chk%252520%252525%252523chk
│   ├── commits
│   │   └── 0
│   ├── metadata
│   └── offsets
│       └── 0
├── output %#output
│   └── part-00000-97f675a2-bb82-4201-8245-05f3dae4c372-c000.snappy.parquet
└── output%20%25%23output
    └── _spark_metadata
        └── 0
```

In this checkpoint, the user specified checkpoint location is `.../chk %#chk` but the real path to store the checkpoint is `.../chk%252520%252525%252523chk` (this is generated by escaping the original path three times). The user specified output path is `.../output %#output` but the path to store `_spark_metadata` is `.../output%20%25%23output/_spark_metadata` (this is generated by escaping the original path once). The data files are still in the correct path (such as `.../output %#output/part-00000-97f675a2-bb82-4201-8245-05f3dae4c372-c000.snappy.parquet`).

This checkpoint will be used in unit tests in this PR.

The fix is just simply removing improper `Path.toUri` calls to fix the issue.

However, as the user may not read the release note and is not aware of this checkpoint location change, if they upgrade Spark without moving checkpoint to the new location, their query will just start from the scratch. In order to not surprise the users, this PR also adds a check to **detect the impacted paths and throws an error** to include the migration guide. This check can be turned off by an internal sql conf `spark.sql.streaming.checkpoint.escapedPathCheck.enabled`. Here are examples of errors that will be reported:

- Streaming checkpoint error:
```
Error: we detected a possible problem with the location of your checkpoint and you
likely need to move it before restarting this query.

Earlier version of Spark incorrectly escaped paths when writing out checkpoints for
structured streaming. While this was corrected in Spark 3.0, it appears that your
query was started using an earlier version that incorrectly handled the checkpoint
path.

Correct Checkpoint Directory: /.../chk %#chk
Incorrect Checkpoint Directory: /.../chk%252520%252525%252523chk

Please move the data from the incorrect directory to the correct one, delete the
incorrect directory, and then restart this query. If you believe you are receiving
this message in error, you can disable it with the SQL conf
spark.sql.streaming.checkpoint.escapedPathCheck.enabled.
```

- File sink error (`_spark_metadata`):
```
Error: we detected a possible problem with the location of your "_spark_metadata"
directory and you likely need to move it before restarting this query.

Earlier version of Spark incorrectly escaped paths when writing out the
"_spark_metadata" directory for structured streaming. While this was corrected in
Spark 3.0, it appears that your query was started using an earlier version that
incorrectly handled the "_spark_metadata" path.

Correct "_spark_metadata" Directory: /.../output %#output/_spark_metadata
Incorrect "_spark_metadata" Directory: /.../output%20%25%23output/_spark_metadata

Please move the data from the incorrect directory to the correct one, delete the
incorrect directory, and then restart this query. If you believe you are receiving
this message in error, you can disable it with the SQL conf
spark.sql.streaming.checkpoint.escapedPathCheck.enabled.
```

## How was this patch tested?

The new unit tests.

Closes #23733 from zsxwing/path-fix.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |   8 +
 .../execution/datasources/DataSource.scala    |   3 +-
 .../execution/streaming/FileStreamSink.scala  |  66 +++++--
 .../streaming/FileStreamSource.scala          |   4 +-
 .../streaming/MetadataLogFileIndex.scala      |  10 +-
 .../execution/streaming/StreamExecution.scala |  48 ++++-
 .../sql/streaming/StreamingQueryManager.scala |   4 +-
 .../chk%252520%252525@%252523chk/commits/0    |   2 +
 .../chk%252520%252525@%252523chk/metadata     |   1 +
 .../chk%252520%252525@%252523chk/offsets/0    |   3 +
 ...4201-8245-05f3dae4c372-c000.snappy.parquet | Bin 0 -> 404 bytes
 .../output%20%25@%23output/_spark_metadata/0  |   2 +
 .../sql/streaming/FileStreamSinkSuite.scala   |  24 +++
 .../sql/streaming/StreamingQuerySuite.scala   | 186 ++++++++++++++++++
 .../test/DataStreamReaderWriterSuite.scala    |   8 +-
 15 files changed, 341 insertions(+), 28 deletions(-)
 create mode 100644 sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/output %@#output/part-00000-97f675a2-bb82-4201-8245-05f3dae4c372-c000.snappy.parquet
 create mode 100644 sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/output%20%25@%23output/_spark_metadata/0

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 0b7b67ed56d2..7f8754622a36 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1112,6 +1112,14 @@ object SQLConf {
       .internal()
       .stringConf
 
+  val STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED =
+    buildConf("spark.sql.streaming.checkpoint.escapedPathCheck.enabled")
+      .doc("Whether to detect a streaming query may pick up an incorrect checkpoint path due " +
+        "to SPARK-26824.")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
   val PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION =
     buildConf("spark.sql.statistics.parallelFileListingInStatsComputation.enabled")
       .internal()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 10dae8a55b47..516c56eee4e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -346,7 +346,8 @@ case class DataSource(
       case (format: FileFormat, _)
           if FileStreamSink.hasMetadata(
             caseInsensitiveOptions.get("path").toSeq ++ paths,
-            sparkSession.sessionState.newHadoopConf()) =>
+            sparkSession.sessionState.newHadoopConf(),
+            sparkSession.sessionState.conf) =>
         val basePath = new Path((caseInsensitiveOptions.get("path").toSeq ++ paths).head)
         val fileCatalog = new MetadataLogFileIndex(sparkSession, basePath, userSpecifiedSchema)
         val dataSchema = userSpecifiedSchema.orElse {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index b3d12f67b5d6..b679f163fc56 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -20,13 +20,15 @@ package org.apache.spark.sql.execution.streaming
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileSystem, Path}
 
+import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FileFormat, FileFormatWriter}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.SerializableConfiguration
 
 object FileStreamSink extends Logging {
@@ -37,23 +39,54 @@ object FileStreamSink extends Logging {
    * Returns true if there is a single path that has a metadata log indicating which files should
    * be read.
    */
-  def hasMetadata(path: Seq[String], hadoopConf: Configuration): Boolean = {
+  def hasMetadata(path: Seq[String], hadoopConf: Configuration, sqlConf: SQLConf): Boolean = {
     path match {
       case Seq(singlePath) =>
+        val hdfsPath = new Path(singlePath)
+        val fs = hdfsPath.getFileSystem(hadoopConf)
+        if (fs.isDirectory(hdfsPath)) {
+          val metadataPath = new Path(hdfsPath, metadataDir)
+          checkEscapedMetadataPath(fs, metadataPath, sqlConf)
+          fs.exists(metadataPath)
+        } else {
+          false
+        }
+      case _ => false
+    }
+  }
+
+  def checkEscapedMetadataPath(fs: FileSystem, metadataPath: Path, sqlConf: SQLConf): Unit = {
+    if (sqlConf.getConf(SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED)
+        && StreamExecution.containsSpecialCharsInPath(metadataPath)) {
+      val legacyMetadataPath = new Path(metadataPath.toUri.toString)
+      val legacyMetadataPathExists =
         try {
-          val hdfsPath = new Path(singlePath)
-          val fs = hdfsPath.getFileSystem(hadoopConf)
-          if (fs.isDirectory(hdfsPath)) {
-            fs.exists(new Path(hdfsPath, metadataDir))
-          } else {
-            false
-          }
+          fs.exists(legacyMetadataPath)
         } catch {
           case NonFatal(e) =>
-            logWarning(s"Error while looking for metadata directory.")
+            // We may not have access to this directory. Don't fail the query if that happens.
+            logWarning(e.getMessage, e)
             false
         }
-      case _ => false
+      if (legacyMetadataPathExists) {
+        throw new SparkException(
+          s"""Error: we detected a possible problem with the location of your "_spark_metadata"
+             |directory and you likely need to move it before restarting this query.
+             |
+             |Earlier version of Spark incorrectly escaped paths when writing out the
+             |"_spark_metadata" directory for structured streaming. While this was corrected in
+             |Spark 3.0, it appears that your query was started using an earlier version that
+             |incorrectly handled the "_spark_metadata" path.
+             |
+             |Correct "_spark_metadata" Directory: $metadataPath
+             |Incorrect "_spark_metadata" Directory: $legacyMetadataPath
+             |
+             |Please move the data from the incorrect directory to the correct one, delete the
+             |incorrect directory, and then restart this query. If you believe you are receiving
+             |this message in error, you can disable it with the SQL conf
+             |${SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED.key}."""
+            .stripMargin)
+      }
     }
   }
 
@@ -92,11 +125,16 @@ class FileStreamSink(
     partitionColumnNames: Seq[String],
     options: Map[String, String]) extends Sink with Logging {
 
+  private val hadoopConf = sparkSession.sessionState.newHadoopConf()
   private val basePath = new Path(path)
-  private val logPath = new Path(basePath, FileStreamSink.metadataDir)
+  private val logPath = {
+    val metadataDir = new Path(basePath, FileStreamSink.metadataDir)
+    val fs = metadataDir.getFileSystem(hadoopConf)
+    FileStreamSink.checkEscapedMetadataPath(fs, metadataDir, sparkSession.sessionState.conf)
+    metadataDir
+  }
   private val fileLog =
-    new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession, logPath.toUri.toString)
-  private val hadoopConf = sparkSession.sessionState.newHadoopConf()
+    new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession, logPath.toString)
 
   private def basicWriteJobStatsTracker: BasicWriteJobStatsTracker = {
     val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 103fa7ce9066..43b70ae0a51b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -208,7 +208,7 @@ class FileStreamSource(
     var allFiles: Seq[FileStatus] = null
     sourceHasMetadata match {
       case None =>
-        if (FileStreamSink.hasMetadata(Seq(path), hadoopConf)) {
+        if (FileStreamSink.hasMetadata(Seq(path), hadoopConf, sparkSession.sessionState.conf)) {
           sourceHasMetadata = Some(true)
           allFiles = allFilesUsingMetadataLogFileIndex()
         } else {
@@ -220,7 +220,7 @@ class FileStreamSource(
             // double check whether source has metadata, preventing the extreme corner case that
             // metadata log and data files are only generated after the previous
             // `FileStreamSink.hasMetadata` check
-            if (FileStreamSink.hasMetadata(Seq(path), hadoopConf)) {
+            if (FileStreamSink.hasMetadata(Seq(path), hadoopConf, sparkSession.sessionState.conf)) {
               sourceHasMetadata = Some(true)
               allFiles = allFilesUsingMetadataLogFileIndex()
             } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
index 5cacdd070b73..80eed7b27721 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
@@ -39,10 +39,16 @@ class MetadataLogFileIndex(
     userSpecifiedSchema: Option[StructType])
   extends PartitioningAwareFileIndex(sparkSession, Map.empty, userSpecifiedSchema) {
 
-  private val metadataDirectory = new Path(path, FileStreamSink.metadataDir)
+  private val metadataDirectory = {
+    val metadataDir = new Path(path, FileStreamSink.metadataDir)
+    val fs = metadataDir.getFileSystem(sparkSession.sessionState.newHadoopConf())
+    FileStreamSink.checkEscapedMetadataPath(fs, metadataDir, sparkSession.sessionState.conf)
+    metadataDir
+  }
+
   logInfo(s"Reading streaming file log from $metadataDirectory")
   private val metadataLog =
-    new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession, metadataDirectory.toUri.toString)
+    new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession, metadataDirectory.toString)
   private val allFilesFromLog = metadataLog.allFiles().map(_.toFileStatus).filterNot(_.isDirectory)
   private var cachedPartitionSpec: PartitionSpec = _
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index dc9ed80b6420..5c21dfedb6f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -90,8 +90,45 @@ abstract class StreamExecution(
   val resolvedCheckpointRoot = {
     val checkpointPath = new Path(checkpointRoot)
     val fs = checkpointPath.getFileSystem(sparkSession.sessionState.newHadoopConf())
-    fs.mkdirs(checkpointPath)
-    checkpointPath.makeQualified(fs.getUri, fs.getWorkingDirectory).toUri.toString
+    if (sparkSession.conf.get(SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED)
+        && StreamExecution.containsSpecialCharsInPath(checkpointPath)) {
+      // In Spark 2.4 and earlier, the checkpoint path is escaped 3 times (3 `Path.toUri.toString`
+      // calls). If this legacy checkpoint path exists, we will throw an error to tell the user how
+      // to migrate.
+      val legacyCheckpointDir =
+        new Path(new Path(checkpointPath.toUri.toString).toUri.toString).toUri.toString
+      val legacyCheckpointDirExists =
+        try {
+          fs.exists(new Path(legacyCheckpointDir))
+        } catch {
+          case NonFatal(e) =>
+            // We may not have access to this directory. Don't fail the query if that happens.
+            logWarning(e.getMessage, e)
+            false
+        }
+      if (legacyCheckpointDirExists) {
+        throw new SparkException(
+          s"""Error: we detected a possible problem with the location of your checkpoint and you
+             |likely need to move it before restarting this query.
+             |
+             |Earlier version of Spark incorrectly escaped paths when writing out checkpoints for
+             |structured streaming. While this was corrected in Spark 3.0, it appears that your
+             |query was started using an earlier version that incorrectly handled the checkpoint
+             |path.
+             |
+             |Correct Checkpoint Directory: $checkpointPath
+             |Incorrect Checkpoint Directory: $legacyCheckpointDir
+             |
+             |Please move the data from the incorrect directory to the correct one, delete the
+             |incorrect directory, and then restart this query. If you believe you are receiving
+             |this message in error, you can disable it with the SQL conf
+             |${SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED.key}."""
+            .stripMargin)
+      }
+    }
+    val checkpointDir = checkpointPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+    fs.mkdirs(checkpointDir)
+    checkpointDir.toString
   }
   logInfo(s"Checkpoint root $checkpointRoot resolved to $resolvedCheckpointRoot.")
 
@@ -227,7 +264,7 @@ abstract class StreamExecution(
 
   /** Returns the path of a file with `name` in the checkpoint directory. */
   protected def checkpointFile(name: String): String =
-    new Path(new Path(resolvedCheckpointRoot), name).toUri.toString
+    new Path(new Path(resolvedCheckpointRoot), name).toString
 
   /**
    * Starts the execution. This returns only after the thread has started and [[QueryStartedEvent]]
@@ -573,6 +610,11 @@ object StreamExecution {
     case _ =>
       false
   }
+
+  /** Whether the path contains special chars that will be escaped when converting to a `URI`. */
+  def containsSpecialCharsInPath(path: Path): Boolean = {
+    path.toUri.getPath != new Path(path.toUri.toString).toUri.getPath
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index a7fa800a49d6..2e019ba614fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -217,10 +217,10 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       triggerClock: Clock): StreamingQueryWrapper = {
     var deleteCheckpointOnStop = false
     val checkpointLocation = userSpecifiedCheckpointLocation.map { userSpecified =>
-      new Path(userSpecified).toUri.toString
+      new Path(userSpecified).toString
     }.orElse {
       df.sparkSession.sessionState.conf.checkpointLocation.map { location =>
-        new Path(location, userSpecifiedName.getOrElse(UUID.randomUUID().toString)).toUri.toString
+        new Path(location, userSpecifiedName.getOrElse(UUID.randomUUID().toString)).toString
       }
     }.getOrElse {
       if (useTempCheckpointLocation) {
diff --git a/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/commits/0 b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/commits/0
new file mode 100644
index 000000000000..9c1e3021c3ea
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/metadata b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/metadata
new file mode 100644
index 000000000000..3071b0dfc550
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/metadata
@@ -0,0 +1 @@
+{"id":"09be7fb3-49d8-48a6-840d-e9c2ad92a898"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/offsets/0 b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/offsets/0
new file mode 100644
index 000000000000..a0a567631fd1
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/chk%252520%252525@%252523chk/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1549649384149,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"200"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/output %@#output/part-00000-97f675a2-bb82-4201-8245-05f3dae4c372-c000.snappy.parquet b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/output %@#output/part-00000-97f675a2-bb82-4201-8245-05f3dae4c372-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..1b2919b25c381a343c47e0704e84c845700e3a76
GIT binary patch
literal 404
zcmaKp&uhXk6vwm9l^(pNB#=W1C<<Ljf9Ra-vfCI8WY-}hZKEqotER1Ei2wVZ;$Wv8
z^2qzXFYohSvUq%rFv0}05W8^ku*XR68X-h_R}g(P`jdgs4^OfK=VZbT4WIhWR@u+@
zCjZct!VZW7L~i!VbQ<|CWt0s)2F`(p=takl@A}r?550$fXq3T3&<;Rgb`fP{ys>Mc
zwhCC{j1|q!2;lEJ-3q(;5|wUMp;|?j2i|^fHJ|gQwO!uwkgrN@iiXeM4!l;?sdSza
zDi>#2w|cED2z6(S$_#i`$}~FUzkT`qa6B%Lh&*4Yx0Ma{+BW5;4k8grA%juYm8J+}
s5~=BQl1h26lWZ2tSjs3?(JW2Ud@_r(;x5ha5;dlb!HzHRvJ3c+AJ?T>vj6}9

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/output%20%25@%23output/_spark_metadata/0 b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/output%20%25@%23output/_spark_metadata/0
new file mode 100644
index 000000000000..79768f89d6ec
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/escaped-path-2.4.0/output%20%25@%23output/_spark_metadata/0
@@ -0,0 +1,2 @@
+v1
+{"path":"file://TEMPDIR/output%20%25@%23output/part-00000-97f675a2-bb82-4201-8245-05f3dae4c372-c000.snappy.parquet","size":404,"isDir":false,"modificationTime":1549649385000,"blockReplication":1,"blockSize":33554432,"action":"add"}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index ed53def556cb..619d118e2087 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.streaming
 
+import java.io.File
 import java.util.Locale
 
 import org.apache.hadoop.fs.Path
@@ -454,4 +455,27 @@ class FileStreamSinkSuite extends StreamTest {
       }
     }
   }
+
+  test("special characters in output path") {
+    withTempDir { tempDir =>
+      val checkpointDir = new File(tempDir, "chk")
+      val outputDir = new File(tempDir, "output @#output")
+      val inputData = MemoryStream[Int]
+      inputData.addData(1, 2, 3)
+      val q = inputData.toDF()
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getCanonicalPath)
+        .format("parquet")
+        .start(outputDir.getCanonicalPath)
+      try {
+        q.processAllAvailable()
+      } finally {
+        q.stop()
+      }
+      // The "_spark_metadata" directory should be in "outputDir"
+      assert(outputDir.listFiles.map(_.getName).contains(FileStreamSink.metadataDir))
+      val outputDf = spark.read.parquet(outputDir.getCanonicalPath).as[Int]
+      checkDatasetUnorderly(outputDf, 1, 2, 3)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index dc22e31678fa..729173cb7104 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.sql.streaming
 
+import java.io.File
 import java.util.concurrent.CountDownLatch
 
 import scala.collection.mutable
 
+import org.apache.commons.io.{FileUtils, IOUtils}
 import org.apache.commons.lang3.RandomStringUtils
+import org.apache.hadoop.fs.Path
 import org.scalactic.TolerantNumerics
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
@@ -915,6 +918,189 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     )
   }
 
+  test("special characters in checkpoint path") {
+    withTempDir { tempDir =>
+      val checkpointDir = new File(tempDir, "chk @#chk")
+      val inputData = MemoryStream[Int]
+      inputData.addData(1)
+      val q = inputData.toDF()
+        .writeStream
+        .format("noop")
+        .option("checkpointLocation", checkpointDir.getCanonicalPath)
+        .start()
+      try {
+        q.processAllAvailable()
+        assert(checkpointDir.listFiles().toList.nonEmpty)
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
+  /**
+   * Copy the checkpoint generated by Spark 2.4.0 from test resource to `dir` to set up a legacy
+   * streaming checkpoint.
+   */
+  private def setUp2dot4dot0Checkpoint(dir: File): Unit = {
+    val input = getClass.getResource("/structured-streaming/escaped-path-2.4.0")
+    assert(input != null, "cannot find test resource '/structured-streaming/escaped-path-2.4.0'")
+    val inputDir = new File(input.toURI)
+
+    // Copy test files to tempDir so that we won't modify the original data.
+    FileUtils.copyDirectory(inputDir, dir)
+
+    // Spark 2.4 and earlier escaped the _spark_metadata path once
+    val legacySparkMetadataDir = new File(
+      dir,
+      new Path("output %@#output/_spark_metadata").toUri.toString)
+
+    // Migrate from legacy _spark_metadata directory to the new _spark_metadata directory.
+    // Ideally we should copy "_spark_metadata" directly like what the user is supposed to do to
+    // migrate to new version. However, in our test, "tempDir" will be different in each run and
+    // we need to fix the absolute path in the metadata to match "tempDir".
+    val sparkMetadata = FileUtils.readFileToString(new File(legacySparkMetadataDir, "0"), "UTF-8")
+    FileUtils.write(
+      new File(legacySparkMetadataDir, "0"),
+      sparkMetadata.replaceAll("TEMPDIR", dir.getCanonicalPath),
+      "UTF-8")
+  }
+
+  test("detect escaped path and report the migration guide") {
+    // Assert that the error message contains the migration conf, path and the legacy path.
+    def assertMigrationError(errorMessage: String, path: File, legacyPath: File): Unit = {
+      Seq(SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED.key,
+          path.getCanonicalPath,
+          legacyPath.getCanonicalPath).foreach { msg =>
+        assert(errorMessage.contains(msg))
+      }
+    }
+
+    withTempDir { tempDir =>
+      setUp2dot4dot0Checkpoint(tempDir)
+
+      // Here are the paths we will use to create the query
+      val outputDir = new File(tempDir, "output %@#output")
+      val checkpointDir = new File(tempDir, "chk %@#chk")
+      val sparkMetadataDir = new File(tempDir, "output %@#output/_spark_metadata")
+
+      // The escaped paths used by Spark 2.4 and earlier.
+      // Spark 2.4 and earlier escaped the checkpoint path three times
+      val legacyCheckpointDir = new File(
+        tempDir,
+        new Path(new Path(new Path("chk %@#chk").toUri.toString).toUri.toString).toUri.toString)
+      // Spark 2.4 and earlier escaped the _spark_metadata path once
+      val legacySparkMetadataDir = new File(
+        tempDir,
+        new Path("output %@#output/_spark_metadata").toUri.toString)
+
+      // Reading a file sink output in a batch query should detect the legacy _spark_metadata
+      // directory and throw an error
+      val e = intercept[SparkException] {
+        spark.read.load(outputDir.getCanonicalPath).as[Int]
+      }
+      assertMigrationError(e.getMessage, sparkMetadataDir, legacySparkMetadataDir)
+
+      // Restarting the streaming query should detect the legacy _spark_metadata directory and throw
+      // an error
+      val inputData = MemoryStream[Int]
+      val e2 = intercept[SparkException] {
+        inputData.toDF()
+          .writeStream
+          .format("parquet")
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .start(outputDir.getCanonicalPath)
+      }
+      assertMigrationError(e2.getMessage, sparkMetadataDir, legacySparkMetadataDir)
+
+      // Move "_spark_metadata" to fix the file sink and test the checkpoint path.
+      FileUtils.moveDirectory(legacySparkMetadataDir, sparkMetadataDir)
+
+      // Restarting the streaming query should detect the legacy checkpoint path and throw an error
+      val e3 = intercept[SparkException] {
+        inputData.toDF()
+          .writeStream
+          .format("parquet")
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .start(outputDir.getCanonicalPath)
+      }
+      assertMigrationError(e3.getMessage, checkpointDir, legacyCheckpointDir)
+
+      // Fix the checkpoint path and verify that the user can migrate the issue by moving files.
+      FileUtils.moveDirectory(legacyCheckpointDir, checkpointDir)
+
+      val q = inputData.toDF()
+        .writeStream
+        .format("parquet")
+        .option("checkpointLocation", checkpointDir.getCanonicalPath)
+        .start(outputDir.getCanonicalPath)
+      try {
+        q.processAllAvailable()
+        // Check the query id to make sure it did use checkpoint
+        assert(q.id.toString == "09be7fb3-49d8-48a6-840d-e9c2ad92a898")
+
+        // Verify that the batch query can read "_spark_metadata" correctly after migration.
+        val df = spark.read.load(outputDir.getCanonicalPath)
+        assert(df.queryExecution.executedPlan.toString contains "MetadataLogFileIndex")
+        checkDatasetUnorderly(df.as[Int], 1, 2, 3)
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
+  test("ignore the escaped path check when the flag is off") {
+    withTempDir { tempDir =>
+      setUp2dot4dot0Checkpoint(tempDir)
+      val outputDir = new File(tempDir, "output %@#output")
+      val checkpointDir = new File(tempDir, "chk %@#chk")
+
+      withSQLConf(SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED.key -> "false") {
+        // Verify that the batch query ignores the legacy "_spark_metadata"
+        val df = spark.read.load(outputDir.getCanonicalPath)
+        assert(!(df.queryExecution.executedPlan.toString contains "MetadataLogFileIndex"))
+        checkDatasetUnorderly(df.as[Int], 1, 2, 3)
+
+        val inputData = MemoryStream[Int]
+        val q = inputData.toDF()
+          .writeStream
+          .format("parquet")
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .start(outputDir.getCanonicalPath)
+        try {
+          q.processAllAvailable()
+          // Check the query id to make sure it ignores the legacy checkpoint
+          assert(q.id.toString != "09be7fb3-49d8-48a6-840d-e9c2ad92a898")
+        } finally {
+          q.stop()
+        }
+      }
+    }
+  }
+
+  test("containsSpecialCharsInPath") {
+    Seq("foo/b ar",
+        "/foo/b ar",
+        "file:/foo/b ar",
+        "file://foo/b ar",
+        "file:///foo/b ar",
+        "file://foo:bar@bar/foo/b ar").foreach { p =>
+      assert(StreamExecution.containsSpecialCharsInPath(new Path(p)), s"failed to check $p")
+    }
+    Seq("foo/bar",
+        "/foo/bar",
+        "file:/foo/bar",
+        "file://foo/bar",
+        "file:///foo/bar",
+        "file://foo:bar@bar/foo/bar",
+        // Special chars not in a path should not be considered as such urls won't hit the escaped
+        // path issue.
+        "file://foo:b ar@bar/foo/bar",
+        "file://foo:bar@b ar/foo/bar",
+        "file://f oo:bar@bar/foo/bar").foreach { p =>
+      assert(!StreamExecution.containsSpecialCharsInPath(new Path(p)), s"failed to check $p")
+    }
+  }
+
   /** Create a streaming DF that only execute one batch in which it returns the given static DF */
   private def createSingleTriggerStreamingDF(triggerDF: DataFrame): DataFrame = {
     require(!triggerDF.isStreaming)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index c3c7dcbaaece..99dc0769a3d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -359,7 +359,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
   test("source metadataPath") {
     LastOptions.clear()
 
-    val checkpointLocationURI = new Path(newMetadataDir).toUri
+    val checkpointLocation = new Path(newMetadataDir)
 
     val df1 = spark.readStream
       .format("org.apache.spark.sql.streaming.test")
@@ -371,7 +371,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
 
     val q = df1.union(df2).writeStream
       .format("org.apache.spark.sql.streaming.test")
-      .option("checkpointLocation", checkpointLocationURI.toString)
+      .option("checkpointLocation", checkpointLocation.toString)
       .trigger(ProcessingTime(10.seconds))
       .start()
     q.processAllAvailable()
@@ -379,14 +379,14 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
 
     verify(LastOptions.mockStreamSourceProvider).createSource(
       any(),
-      meq(s"${makeQualifiedPath(checkpointLocationURI.toString)}/sources/0"),
+      meq(s"${new Path(makeQualifiedPath(checkpointLocation.toString)).toString}/sources/0"),
       meq(None),
       meq("org.apache.spark.sql.streaming.test"),
       meq(Map.empty))
 
     verify(LastOptions.mockStreamSourceProvider).createSource(
       any(),
-      meq(s"${makeQualifiedPath(checkpointLocationURI.toString)}/sources/1"),
+      meq(s"${new Path(makeQualifiedPath(checkpointLocation.toString)).toString}/sources/1"),
       meq(None),
       meq("org.apache.spark.sql.streaming.test"),
       meq(Map.empty))

From 927081dd959217ed6bf014557db20026d7e22672 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Thu, 21 Feb 2019 19:00:36 +0800
Subject: [PATCH 0499/1072] [R] update package description

## What changes were proposed in this pull request?

update package description

Closes #23852 from felixcheung/rdesccran.

Authored-by: Felix Cheung <felixcheung_m@hotmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/DESCRIPTION | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 736da46eaa8d..4d48cd7c659d 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: SparkR
 Type: Package
 Version: 3.0.0
-Title: R Frontend for Apache Spark
-Description: Provides an R Frontend for Apache Spark.
+Title: R Front end for 'Apache Spark'
+Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),
              person("Xiangrui", "Meng", role = "aut",
@@ -11,8 +11,8 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "felixcheung@apache.org"),
              person(family = "The Apache Software Foundation", role = c("aut", "cph")))
 License: Apache License (== 2.0)
-URL: http://www.apache.org/ http://spark.apache.org/
-BugReports: http://spark.apache.org/contributing.html
+URL: https://www.apache.org/ https://spark.apache.org/
+BugReports: https://spark.apache.org/contributing.html
 SystemRequirements: Java (== 8)
 Depends:
     R (>= 3.1),

From 17d0cfcaa4a43fd55b81065d907538a9c1bf569b Mon Sep 17 00:00:00 2001
From: Dave DeCaprio <daved@alum.mit.edu>
Date: Thu, 21 Feb 2019 09:04:50 -0600
Subject: [PATCH 0500/1072] [SPARK-26917][SQL] Cache lock recache by condition

## What changes were proposed in this pull request?

Related to SPARK-26617 and SPARK-26548.  There was a new location we found where we were still seeing the locks.  We traced it to the recacheByCondition function.  In this PR I have changed that function so that the writeLock is not held while the condition is being evaluated.

cloud-fan & gatorsmile This is a further tweak to the other cache PRs we have done (which have helped us tremendously).

## How was this patch tested?

Has been tested on a live system where the blocking was causing major issues and it is working well.
CacheManager has no explicit unit test but is used in many places internally as part of the SharedState.

Closes #23833 from DaveDeCaprio/cache-lock-recacheByCondition.

Lead-authored-by: Dave DeCaprio <daved@alum.mit.edu>
Co-authored-by: David DeCaprio <daved@alum.mit.edu>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/execution/CacheManager.scala       | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 398d7b44807d..c6ee7354d0c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -145,17 +145,19 @@ class CacheManager extends Logging {
         _.sameResult(plan)
       }
     val plansToUncache = mutable.Buffer[CachedData]()
-    writeLock {
+    readLock {
       val it = cachedData.iterator()
       while (it.hasNext) {
         val cd = it.next()
         if (shouldRemove(cd.plan)) {
           plansToUncache += cd
-          it.remove()
         }
       }
     }
     plansToUncache.foreach { cd =>
+      writeLock {
+        cachedData.remove(cd)
+      }
       cd.cachedRepresentation.cacheBuilder.clearCache(blocking)
     }
     // Re-compile dependent cached queries after removing the cached query.
@@ -193,19 +195,21 @@ class CacheManager extends Logging {
       spark: SparkSession,
       condition: CachedData => Boolean): Unit = {
     val needToRecache = scala.collection.mutable.ArrayBuffer.empty[CachedData]
-    writeLock {
+    readLock {
       val it = cachedData.iterator()
       while (it.hasNext) {
         val cd = it.next()
         if (condition(cd)) {
           needToRecache += cd
-          // Remove the cache entry before we create a new one, so that we can have a different
-          // physical plan.
-          it.remove()
         }
       }
     }
     needToRecache.map { cd =>
+      writeLock {
+        // Remove the cache entry before we create a new one, so that we can have a different
+        // physical plan.
+        cachedData.remove(cd)
+      }
       cd.cachedRepresentation.cacheBuilder.clearCache()
       val plan = spark.sessionState.executePlan(cd.plan).executedPlan
       val newCache = InMemoryRelation(

From 91caf0bfce4706a264fcfe222fa500354ce69ff1 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 21 Feb 2019 08:36:48 -0800
Subject: [PATCH 0501/1072] [DOCS] MINOR Complement the document of
 stringOrderType for StringIndexer in PySpark

## What changes were proposed in this pull request?

We revised the behavior of the param `stringOrderType` of `StringIndexer` in case of equal frequency when under frequencyDesc/Asc. This isn't reflected in PySpark's document. We should do it.

## How was this patch tested?

Only document change.

Closes #23849 from viirya/py-stringindexer-doc.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Holden Karau <holden@pigscanfly.ca>
---
 python/pyspark/ml/feature.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 0d1e9bd9da79..85830464ef5a 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2299,7 +2299,10 @@ class _StringIndexerParams(JavaParams, HasHandleInvalid, HasInputCol, HasOutputC
     stringOrderType = Param(Params._dummy(), "stringOrderType",
                             "How to order labels of string column. The first label after " +
                             "ordering is assigned an index of 0. Supported options: " +
-                            "frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc.",
+                            "frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc. " +
+                            "Default is frequencyDesc. In case of equal frequency when " +
+                            "under frequencyDesc/Asc, the strings are further sorted " +
+                            "alphabetically",
                             typeConverter=TypeConverters.toString)
 
     handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid data (unseen " +

From 6bd995b1013f3753bf1df68d75b4bd9cd2337fae Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 21 Feb 2019 23:39:36 +0000
Subject: [PATCH 0502/1072] [SPARK-26958][SQL][TEST] Add
 NestedSchemaPruningBenchmark

## What changes were proposed in this pull request?

This adds `NestedSchemaPruningBenchmark` to show the nested schema pruning performance clearly and to verify new PR's performance benefit and to prevent the future performance degradation.

## How was this patch tested?

Manually run the benchmark.

```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.NestedSchemaPruningBenchmark"
```

Closes #23862 from dongjoon-hyun/SPARK-NESTED-SCHEMA-PRUNING-BM.

Lead-authored-by: Dongjoon Hyun <dhyun@apple.com>
Co-authored-by: DB Tsai <d_tsai@apple.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 .../NestedSchemaPruningBenchmark-results.txt  |  40 +++++
 .../NestedSchemaPruningBenchmark.scala        | 163 ++++++++++++++++++
 2 files changed, 203 insertions(+)
 create mode 100644 sql/core/benchmarks/NestedSchemaPruningBenchmark-results.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala

diff --git a/sql/core/benchmarks/NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/NestedSchemaPruningBenchmark-results.txt
new file mode 100644
index 000000000000..7585caeafe11
--- /dev/null
+++ b/sql/core/benchmarks/NestedSchemaPruningBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Nested Schema Pruning Benchmark
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
+Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
+Selection:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Top-level column                                59 /   68         16.9          59.1       1.0X
+Nested column                                  180 /  186          5.6         179.7       0.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
+Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
+Limiting:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Top-level column                               241 /  246          4.2         240.9       1.0X
+Nested column                                 1828 / 1904          0.5        1827.5       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
+Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
+Repartitioning:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Top-level column                               201 /  208          5.0         200.8       1.0X
+Nested column                                 1811 / 1864          0.6        1811.4       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
+Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
+Repartitioning by exprs:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Top-level column                               206 /  212          4.9         205.8       1.0X
+Nested column                                 1814 / 1863          0.6        1814.3       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
+Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
+Sorting:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Top-level column                               282 /  302          3.5         281.7       1.0X
+Nested column                                 2093 / 2199          0.5        2093.1       0.1X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
new file mode 100644
index 000000000000..ddfc8aebf7cb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Synthetic benchmark for nested schema pruning performance.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/NestedSchemaPruningBenchmark-results.txt".
+ * }}}
+ */
+object NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
+
+  import spark.implicits._
+
+  private val N = 1000000
+  private val numIters = 10
+
+  // We use `col1 BIGINT, col2 STRUCT<_1: BIGINT, _2: STRING>` as a test schema.
+  // col1 and col2._1 is used for comparision. col2._2 mimics the burden for the other columns
+  private val df = spark
+    .range(N * 10)
+    .sample(false, 0.1)
+    .map(x => (x, (x, s"$x" * 100)))
+    .toDF("col1", "col2")
+
+  private def addCase(benchmark: Benchmark, name: String, sql: String): Unit = {
+    benchmark.addCase(name) { _ =>
+      spark.sql(sql).write.format("noop").save()
+    }
+  }
+
+  private def selectBenchmark(numRows: Int, numIters: Int): Unit = {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      Seq(1, 2).foreach { i =>
+        df.write.parquet(path + s"/$i")
+        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+      }
+
+      val benchmark = new Benchmark(s"Selection", numRows, numIters, output = output)
+
+      addCase(benchmark, "Top-level column", "SELECT col1 FROM (SELECT col1 FROM t1)")
+      addCase(benchmark, "Nested column", "SELECT col2._1 FROM (SELECT col2 FROM t2)")
+
+      benchmark.run()
+    }
+  }
+
+  private def limitBenchmark(numRows: Int, numIters: Int): Unit = {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      Seq(1, 2).foreach { i =>
+        df.write.parquet(path + s"/$i")
+        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+      }
+
+      val benchmark = new Benchmark(s"Limiting", numRows, numIters, output = output)
+
+      addCase(benchmark, "Top-level column",
+        s"SELECT col1 FROM (SELECT col1 FROM t1 LIMIT ${Int.MaxValue})")
+      addCase(benchmark, "Nested column",
+        s"SELECT col2._1 FROM (SELECT col2 FROM t2 LIMIT ${Int.MaxValue})")
+
+      benchmark.run()
+    }
+  }
+
+  private def repartitionBenchmark(numRows: Int, numIters: Int): Unit = {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      Seq(1, 2).foreach { i =>
+        df.write.parquet(path + s"/$i")
+        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+      }
+
+      val benchmark = new Benchmark(s"Repartitioning", numRows, numIters, output = output)
+
+      addCase(benchmark, "Top-level column",
+        s"SELECT col1 FROM (SELECT /*+ REPARTITION(1) */ col1 FROM t1)")
+      addCase(benchmark, "Nested column",
+        s"SELECT col2._1 FROM (SELECT /*+ REPARTITION(1) */ col2 FROM t2)")
+
+      benchmark.run()
+    }
+  }
+
+  private def repartitionByExprBenchmark(numRows: Int, numIters: Int): Unit = {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      Seq(1, 2).foreach { i =>
+        df.write.parquet(path + s"/$i")
+        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+      }
+
+      val benchmark = new Benchmark(s"Repartitioning by exprs", numRows, numIters, output = output)
+
+      addCase(benchmark, "Top-level column",
+        s"SELECT col1 FROM (SELECT col1 FROM t1 DISTRIBUTE BY col1)")
+      addCase(benchmark, "Nested column",
+        s"SELECT col2._1 FROM (SELECT col2 FROM t2 DISTRIBUTE BY col2._1)")
+
+      benchmark.run()
+    }
+  }
+
+  private def sortBenchmark(numRows: Int, numIters: Int): Unit = {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      Seq(1, 2).foreach { i =>
+        df.write.parquet(path + s"/$i")
+        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+      }
+
+      val benchmark = new Benchmark(s"Sorting", numRows, numIters, output = output)
+
+      addCase(benchmark, "Top-level column", "SELECT col1 FROM t1 ORDER BY col1")
+      addCase(benchmark, "Nested column", "SELECT col2._1 FROM t2 ORDER BY col2._1")
+
+      benchmark.run()
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark(s"Nested Schema Pruning Benchmark") {
+      withSQLConf (SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+        selectBenchmark (N, numIters)
+        limitBenchmark (N, numIters)
+        repartitionBenchmark (N, numIters)
+        repartitionByExprBenchmark (N, numIters)
+        sortBenchmark (N, numIters)
+      }
+    }
+  }
+}

From be1cadf16dc70e22eae144b3dfce9e269ef95acc Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Fri, 22 Feb 2019 10:08:16 +0800
Subject: [PATCH 0503/1072] [SPARK-26960][ML] Wait for listener bus to clear in
 MLEventsSuite to reduce test flakiness

## What changes were proposed in this pull request?

This patch aims to address flakiness I've observed in MLEventsSuite in these tests:
*  test("pipeline read/write events")
*  test("pipeline model read/write events")

The issue is in the "read/write events" tests, which work as follows:
* write
* wait until we see at least 1 write-related SparkListenerEvent
* read
* wait until we see at least 1 read-related SparkListenerEvent

The problem is that the last step does NOT allow any write-related SparkListenerEvents, but some of those events may be delayed enough that they are seen in this last step. We should ideally add logic before "read" to wait until the listener events are cleared/complete. Looking into other SparkListener tests, we need to use `sc.listenerBus.waitUntilEmpty(TIMEOUT)`.

This patch adds the waitUntilEmpty() call.

## How was this patch tested?

It's a test!

Closes #23863 from jkbradley/SPARK-26960.

Authored-by: Joseph K. Bradley <joseph@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
index 80ae0c788ac5..4fe69b6afb46 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
@@ -239,6 +239,7 @@ class MLEventsSuite
       events.map(JsonProtocol.sparkEventToJson).foreach { event =>
         assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
       }
+      sc.listenerBus.waitUntilEmpty(timeoutMillis = 10000)
 
       events.clear()
       val pipelineReader = Pipeline.read
@@ -297,6 +298,7 @@ class MLEventsSuite
       events.map(JsonProtocol.sparkEventToJson).foreach { event =>
         assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
       }
+      sc.listenerBus.waitUntilEmpty(timeoutMillis = 10000)
 
       events.clear()
       val pipelineModelReader = PipelineModel.read

From f9776e389215255dc61efaa2eddd92a1fa754b48 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Thu, 21 Feb 2019 22:13:47 -0600
Subject: [PATCH 0504/1072] [MINOR][SQL] Fix typo in exception about set table
 properties.

## What changes were proposed in this pull request?

The function of the method named verifyTableProperties is

`If the given table properties contains datasource properties, throw an exception. We will do this check when create or alter a table, i.e. when we try to write table metadata to Hive metastore.`

But the message of AnalysisException in verifyTableProperties contains one typo and one unsuited word.
So I change the exception from

`Cannot persistent ${table.qualifiedName} into hive metastore`

to

`Cannot persist ${table.qualifiedName} into Hive metastore`

## How was this patch tested?

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23574 from beliefer/incorrect-analysis-exception.

Authored-by: gengjiaan <gengjiaan@360.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index c1178ad4a84f..11a219231875 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -128,7 +128,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   private def verifyTableProperties(table: CatalogTable): Unit = {
     val invalidKeys = table.properties.keys.filter(_.startsWith(SPARK_SQL_PREFIX))
     if (invalidKeys.nonEmpty) {
-      throw new AnalysisException(s"Cannot persistent ${table.qualifiedName} into hive metastore " +
+      throw new AnalysisException(s"Cannot persist ${table.qualifiedName} into Hive metastore " +
         s"as table property keys may not start with '$SPARK_SQL_PREFIX': " +
         invalidKeys.mkString("[", ", ", "]"))
     }

From 13049745391f97601ebc1402e77210638d908e40 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 21 Feb 2019 22:18:23 -0600
Subject: [PATCH 0505/1072] [SPARK-26955][CORE] Align Spark's TimSort to jdk11
 implementation

## What changes were proposed in this pull request?

Spark's TimSort deviates from JDK 11 TimSort in a couple places:
- `stackLen` was increased in jdk
- additional cases for break in `mergeCollapse`: `n < 0`

In the PR, I propose to align Spark TimSort to jdk implementation.

## How was this patch tested?

By existing test suites, in particular, `SorterSuite`.

Closes #23858 from MaxGekk/timsort-java-alignment.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/util/collection/TimSort.java   | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/TimSort.java b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
index 40b5fb7fe4b4..31428665f963 100644
--- a/core/src/main/java/org/apache/spark/util/collection/TimSort.java
+++ b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
@@ -409,10 +409,14 @@ private SortState(Buffer a, Comparator<? super K> c, int len) {
        * large) stack lengths for smaller arrays.  The "magic numbers" in the
        * computation below must be changed if MIN_MERGE is decreased.  See
        * the MIN_MERGE declaration above for more information.
+       * The maximum value of 49 allows for an array up to length
+       * Integer.MAX_VALUE-4, if array is filled by the worst case stack size
+       * increasing scenario. More explanations are given in section 4 of:
+       * http://envisage-project.eu/wp-content/uploads/2015/02/sorting.pdf
        */
       int stackLen = (len <    120  ?  5 :
                       len <   1542  ? 10 :
-                      len < 119151  ? 19 : 40);
+                      len < 119151  ? 24 : 49);
       runBase = new int[stackLen];
       runLen = new int[stackLen];
     }
@@ -439,15 +443,20 @@ private void pushRun(int runBase, int runLen) {
      * This method is called each time a new run is pushed onto the stack,
      * so the invariants are guaranteed to hold for i < stackSize upon
      * entry to the method.
+     *
+     * Thanks to Stijn de Gouw, Jurriaan Rot, Frank S. de Boer,
+     * Richard Bubel and Reiner Hahnle, this is fixed with respect to
+     * the analysis in "On the Worst-Case Complexity of TimSort" by
+     * Nicolas Auger, Vincent Jug, Cyril Nicaud, and Carine Pivoteau.
      */
     private void mergeCollapse() {
       while (stackSize > 1) {
         int n = stackSize - 2;
-        if ( (n >= 1 && runLen[n-1] <= runLen[n] + runLen[n+1])
-          || (n >= 2 && runLen[n-2] <= runLen[n] + runLen[n-1])) {
+        if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1] ||
+            n > 1 && runLen[n-2] <= runLen[n] + runLen[n-1]) {
           if (runLen[n - 1] < runLen[n + 1])
             n--;
-        } else if (runLen[n] > runLen[n + 1]) {
+        } else if (n < 0 || runLen[n] > runLen[n + 1]) {
           break; // Invariant is established
         }
         mergeAt(n);

From 89d42dc6d38c9508b7009652323d6b343742c5b8 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Thu, 21 Feb 2019 22:21:28 -0600
Subject: [PATCH 0506/1072] [SPARK-25097][ML] Support prediction on single
 instance in KMeans/BiKMeans/GMM

## What changes were proposed in this pull request?
expose method `predict` in KMeans/BiKMeans/GMM

## How was this patch tested?
added testsuites

Closes #22087 from zhengruifeng/clu_pre_instance.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ml/clustering/BisectingKMeans.scala |  6 ++--
 .../spark/ml/clustering/GaussianMixture.scala |  6 ++--
 .../apache/spark/ml/clustering/KMeans.scala   |  7 ++---
 .../ml/clustering/BisectingKMeansSuite.scala  |  7 +++++
 .../ml/clustering/GaussianMixtureSuite.scala  | 10 +++++++
 .../spark/ml/clustering/KMeansSuite.scala     |  7 +++++
 .../org/apache/spark/ml/util/MLTest.scala     | 28 ++++++++++++++++++-
 7 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index d846f17e7f54..03afdbede799 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -19,7 +19,6 @@ package org.apache.spark.ml.clustering
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.SparkException
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.Vector
@@ -30,7 +29,7 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.clustering.{BisectingKMeans => MLlibBisectingKMeans,
   BisectingKMeansModel => MLlibBisectingKMeansModel}
 import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions.udf
 import org.apache.spark.sql.types.{IntegerType, StructType}
 
@@ -118,7 +117,8 @@ class BisectingKMeansModel private[ml] (
     validateAndTransformSchema(schema)
   }
 
-  private[clustering] def predict(features: Vector): Int = parentModel.predict(features)
+  @Since("3.0.0")
+  def predict(features: Vector): Int = parentModel.predict(features)
 
   @Since("2.0.0")
   def clusterCenters: Array[Vector] = parentModel.clusterCenters.map(_.asML)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index c27ba5592930..3d6d1e36e416 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -121,12 +121,14 @@ class GaussianMixtureModel private[ml] (
     validateAndTransformSchema(schema)
   }
 
-  private[clustering] def predict(features: Vector): Int = {
+  @Since("3.0.0")
+  def predict(features: Vector): Int = {
     val r = predictProbability(features)
     r.argmax
   }
 
-  private[clustering] def predictProbability(features: Vector): Vector = {
+  @Since("3.0.0")
+  def predictProbability(features: Vector): Vector = {
     val probs: Array[Double] =
       GaussianMixtureModel.computeProbabilities(features.asBreeze.toDenseVector, gaussians, weights)
     Vectors.dense(probs)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 319747d4a193..b48a9665ec88 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -21,7 +21,6 @@ import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.SparkException
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.{Estimator, Model, PipelineStage}
 import org.apache.spark.ml.linalg.Vector
@@ -32,8 +31,7 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.clustering.{DistanceMeasure, KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel}
 import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.functions.udf
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.storage.StorageLevel
@@ -139,7 +137,8 @@ class KMeansModel private[ml] (
     validateAndTransformSchema(schema)
   }
 
-  private[clustering] def predict(features: Vector): Int = parentModel.predict(features)
+  @Since("3.0.0")
+  def predict(features: Vector): Int = parentModel.predict(features)
 
   @Since("2.0.0")
   def clusterCenters: Array[Vector] = parentModel.clusterCenters.map(_.asML)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index 461f8b8d211d..57080972559c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -205,6 +205,13 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(trueCost ~== doubleArrayCost absTol 1e-6)
     assert(trueCost ~== floatArrayCost absTol 1e-6)
   }
+
+  test("prediction on single instance") {
+    val bikm = new BisectingKMeans().setSeed(123L)
+    val model = bikm.fit(dataset)
+    testClusteringModelSinglePrediction(model, model.predict, dataset,
+      model.getFeaturesCol, model.getPredictionCol)
+  }
 }
 
 object BisectingKMeansSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
index 13bed9dbe3e8..11fdd3a60216 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
@@ -268,6 +268,16 @@ class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
     assert(trueLikelihood ~== doubleLikelihood absTol 1e-6)
     assert(trueLikelihood ~== floatLikelihood absTol 1e-6)
   }
+
+  test("prediction on single instance") {
+    val gmm = new GaussianMixture().setSeed(123L)
+    val model = gmm.fit(dataset)
+    testClusteringModelSinglePrediction(model, model.predict, dataset,
+      model.getFeaturesCol, model.getPredictionCol)
+
+    testClusteringModelSingleProbabilisticPrediction(model, model.predictProbability, dataset,
+      model.getFeaturesCol, model.getProbabilityCol)
+  }
 }
 
 object GaussianMixtureSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index 4f47d91f0d0d..b377582a8812 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -244,6 +244,13 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     }
     testPMMLWrite(sc, kmeansModel, checkModel)
   }
+
+  test("prediction on single instance") {
+    val kmeans = new KMeans().setSeed(123L)
+    val model = kmeans.fit(dataset)
+    testClusteringModelSinglePrediction(model, model.predict, dataset,
+      model.getFeaturesCol, model.getPredictionCol)
+  }
 }
 
 object KMeansSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
index 514fa7f2e1b8..c23b6d8cb355 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
@@ -23,7 +23,7 @@ import org.scalatest.Suite
 
 import org.apache.spark.{DebugFilesystem, SparkConf, SparkContext}
 import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
-import org.apache.spark.ml.{PredictionModel, Transformer}
+import org.apache.spark.ml.{Model, PredictionModel, Transformer}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.sql.{DataFrame, Dataset, Encoder, Row}
 import org.apache.spark.sql.execution.streaming.MemoryStream
@@ -156,4 +156,30 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
         assert(prediction === model.predict(features))
     }
   }
+
+  def testClusteringModelSinglePrediction(
+    model: Model[_],
+    transform: Vector => Int,
+    dataset: Dataset[_],
+    input: String,
+    output: String): Unit = {
+    model.transform(dataset).select(input, output)
+      .collect().foreach {
+      case Row(features: Vector, prediction: Int) =>
+        assert(prediction === transform(features))
+    }
+  }
+
+  def testClusteringModelSingleProbabilisticPrediction(
+    model: Model[_],
+    transform: Vector => Vector,
+    dataset: Dataset[_],
+    input: String,
+    output: String): Unit = {
+    model.transform(dataset).select(input, output)
+      .collect().foreach {
+      case Row(features: Vector, prediction: Vector) =>
+        assert(prediction === transform(features))
+    }
+  }
 }

From ffef3d40741b0be321421aa52a6e17a26d89f541 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 22 Feb 2019 12:25:26 +0800
Subject: [PATCH 0507/1072] [SPARK-26950][SQL][TEST] Make RandomDataGenerator
 use Float.NaN or Double.NaN for all NaN values

## What changes were proposed in this pull request?

Apache Spark uses the predefined `Float.NaN` and `Double.NaN` for NaN values, but there exists more NaN values with different binary presentations.

```scala
scala> java.nio.ByteBuffer.allocate(4).putFloat(Float.NaN).array
res1: Array[Byte] = Array(127, -64, 0, 0)

scala> val x = java.lang.Float.intBitsToFloat(-6966608)
x: Float = NaN

scala> java.nio.ByteBuffer.allocate(4).putFloat(x).array
res2: Array[Byte] = Array(-1, -107, -78, -80)
```

Since users can have these values, `RandomDataGenerator` generates these NaN values. However, this causes `checkEvaluationWithUnsafeProjection` failures due to the difference between `UnsafeRow` binary presentation. The following is the UT failure instance. This PR aims to fix this UT flakiness.

- https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/102528/testReport/

## How was this patch tested?

Pass the Jenkins with the newly added test cases.

Closes #23851 from dongjoon-hyun/SPARK-26950.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/RandomDataGenerator.scala       | 24 ++++++++++++--
 .../spark/sql/RandomDataGeneratorSuite.scala  | 31 +++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 8ae3ff5043e6..d361e6248e2f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql
 
-import java.lang.Double.longBitsToDouble
-import java.lang.Float.intBitsToFloat
 import java.math.MathContext
 
 import scala.collection.mutable
@@ -69,6 +67,28 @@ object RandomDataGenerator {
     Some(f)
   }
 
+  /**
+   * A wrapper of Float.intBitsToFloat to use a unique NaN value for all NaN values.
+   * This prevents `checkEvaluationWithUnsafeProjection` from failing due to
+   * the difference between `UnsafeRow` binary presentation for NaN.
+   * This is visible for testing.
+   */
+  def intBitsToFloat(bits: Int): Float = {
+    val value = java.lang.Float.intBitsToFloat(bits)
+    if (value.isNaN) Float.NaN else value
+  }
+
+  /**
+   * A wrapper of Double.longBitsToDouble to use a unique NaN value for all NaN values.
+   * This prevents `checkEvaluationWithUnsafeProjection` from failing due to
+   * the difference between `UnsafeRow` binary presentation for NaN.
+   * This is visible for testing.
+   */
+  def longBitsToDouble(bits: Long): Double = {
+    val value = java.lang.Double.longBitsToDouble(bits)
+    if (value.isNaN) Double.NaN else value
+  }
+
   /**
    * Returns a randomly generated schema, based on the given accepted types.
    *
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
index 3c2f8a28875f..3e62ca069e9e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql
 
+import java.nio.ByteBuffer
+import java.util.Arrays
+
 import scala.util.Random
 
 import org.apache.spark.SparkFunSuite
@@ -106,4 +109,32 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
       assert(deviation.toDouble / expectedTotalElements < 2e-1)
     }
   }
+
+  test("Use Float.NaN for all NaN values") {
+    val bits = -6966608
+    val nan1 = java.lang.Float.intBitsToFloat(bits)
+    val nan2 = RandomDataGenerator.intBitsToFloat(bits)
+    assert(nan1.isNaN)
+    assert(nan2.isNaN)
+
+    val arrayExpected = ByteBuffer.allocate(4).putFloat(Float.NaN).array
+    val array1 = ByteBuffer.allocate(4).putFloat(nan1).array
+    val array2 = ByteBuffer.allocate(4).putFloat(nan2).array
+    assert(!Arrays.equals(array1, arrayExpected))
+    assert(Arrays.equals(array2, arrayExpected))
+  }
+
+  test("Use Double.NaN for all NaN values") {
+    val bits = -6966608
+    val nan1 = java.lang.Double.longBitsToDouble(bits)
+    val nan2 = RandomDataGenerator.longBitsToDouble(bits)
+    assert(nan1.isNaN)
+    assert(nan2.isNaN)
+
+    val arrayExpected = ByteBuffer.allocate(8).putDouble(Double.NaN).array
+    val array1 = ByteBuffer.allocate(8).putDouble(nan1).array
+    val array2 = ByteBuffer.allocate(8).putDouble(nan2).array
+    assert(!Arrays.equals(array1, arrayExpected))
+    assert(Arrays.equals(array2, arrayExpected))
+  }
 }

From 066379783af154f1c9e2fae6daaf444b6e383ab0 Mon Sep 17 00:00:00 2001
From: nandorKollar <nandorKollar@users.noreply.github.com>
Date: Fri, 22 Feb 2019 14:07:55 +0800
Subject: [PATCH 0508/1072] [SPARK-26930][SQL] Tests in ParquetFilterSuite
 don't verify filter class

## What changes were proposed in this pull request?

Add assert to verify predicate class in ParquetFilterSuite

## How was this patch tested?

Ran ParquetFilterSuite, tests passed

Closes #23855 from nandorKollar/SPARK-26930.

Lead-authored-by: nandorKollar <nandorKollar@users.noreply.github.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Co-authored-by: Nandor Kollar <nkollar@cloudera.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../parquet/ParquetFilterSuite.scala          | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 9cfc943cd2b3..255f7db8d135 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.functions._
@@ -91,6 +92,10 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
       SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key -> "true",
       SQLConf.PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED.key -> "true",
       SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED.key -> "true",
+      // Disable adding filters from constraints because it adds, for instance,
+      // is-not-null to pushed filters, which makes it hard to test if the pushed
+      // filter is expected or not (this had to be fixed with SPARK-13495).
+      SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> InferFiltersFromConstraints.ruleName,
       SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
         val query = df
           .select(output.map(e => Column(e)): _*)
@@ -109,13 +114,16 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
           DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
         assert(selectedFilters.nonEmpty, "No filter is pushed down")
 
-        selectedFilters.foreach { pred =>
+        val pushedParquetFilters = selectedFilters.map { pred =>
           val maybeFilter = parquetFilters.createFilter(
             new SparkToParquetSchemaConverter(conf).convert(df.schema), pred)
           assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pred")
-          // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
-          maybeFilter.exists(_.getClass === filterClass)
+          maybeFilter.get
         }
+        // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
+        assert(pushedParquetFilters.exists(_.getClass === filterClass),
+          s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
+
         checker(stripSparkFilter(query), expected)
     }
   }
@@ -1073,20 +1081,20 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
 
       checkFilterPredicate(
         !'_1.startsWith("").asInstanceOf[Predicate],
-        classOf[UserDefinedByInstance[_, _]],
+        classOf[Operators.Not],
         Seq().map(Row(_)))
 
       Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix =>
         checkFilterPredicate(
           !'_1.startsWith(prefix).asInstanceOf[Predicate],
-          classOf[UserDefinedByInstance[_, _]],
+          classOf[Operators.Not],
           Seq("1str1", "3str3", "4str4").map(Row(_)))
       }
 
       Seq("2S", "null", "2str22").foreach { prefix =>
         checkFilterPredicate(
           !'_1.startsWith(prefix).asInstanceOf[Predicate],
-          classOf[UserDefinedByInstance[_, _]],
+          classOf[Operators.Not],
           Seq("1str1", "2str2", "3str3", "4str4").map(Row(_)))
       }
 

From 95bb01282cc94f95bbc69aafcbc1550b137238be Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 22 Feb 2019 15:22:52 +0900
Subject: [PATCH 0509/1072] [SPARK-26851][SQL][FOLLOWUP] Fix
 cachedColumnBuffers field for Scala 2.11 build

## What changes were proposed in this pull request?

Per https://github.com/apache/spark/pull/23768/files#r259083019 the last change to this line here caused the 2.11 build to fail. It's worked around by making `_cachedColumnBuffers` a field, as it was never set by callers to anything other than its default of null.

## How was this patch tested?

Existing tests.

Closes #23864 from srowen/SPARK-26851.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../org/apache/spark/sql/execution/CacheManager.scala      | 3 +--
 .../spark/sql/execution/columnar/InMemoryRelation.scala    | 7 ++++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index c6ee7354d0c4..f7a78ea0cb55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -213,8 +213,7 @@ class CacheManager extends Logging {
       cd.cachedRepresentation.cacheBuilder.clearCache()
       val plan = spark.sessionState.executePlan(cd.plan).executedPlan
       val newCache = InMemoryRelation(
-        cacheBuilder = cd.cachedRepresentation
-          .cacheBuilder.copy(cachedPlan = plan)(_cachedColumnBuffers = null),
+        cacheBuilder = cd.cachedRepresentation.cacheBuilder.copy(cachedPlan = plan),
         logicalPlan = cd.plan)
       val recomputedPlan = cd.copy(cachedRepresentation = newCache)
       writeLock {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index bc6e958f3ea2..7180853e4cd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -48,8 +48,9 @@ case class CachedRDDBuilder(
     batchSize: Int,
     storageLevel: StorageLevel,
     @transient cachedPlan: SparkPlan,
-    tableName: Option[String])(
-    @transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null) {
+    tableName: Option[String]) {
+
+  @transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null
 
   val sizeInBytesStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator
 
@@ -143,7 +144,7 @@ object InMemoryRelation {
       child: SparkPlan,
       tableName: Option[String],
       logicalPlan: LogicalPlan): InMemoryRelation = {
-    val cacheBuilder = CachedRDDBuilder(useCompression, batchSize, storageLevel, child, tableName)()
+    val cacheBuilder = CachedRDDBuilder(useCompression, batchSize, storageLevel, child, tableName)
     new InMemoryRelation(child.output, cacheBuilder, logicalPlan.outputOrdering)(
       statsOfPlanToCache = logicalPlan.stats)
   }

From 79a650494fe7030450bd5add81e34cb95fe9e883 Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@gmail.com>
Date: Fri, 22 Feb 2019 11:15:20 -0800
Subject: [PATCH 0510/1072] [SPARK-26895][CORE] prepareSubmitEnvironment should
 be called within doAs for proxy users

## What changes were proposed in this pull request?

`prepareSubmitEnvironment` performs globbing that will fail in the case where a proxy user (`--proxy-user`) doesn't have permission to the file. This is a bug also with 2.3, so we should backport, as currently you can't launch an application that for instance is passing a file under `--archives`, and that file is owned by the target user.

The solution is to call `prepareSubmitEnvironment` within a doAs context if proxying.

## How was this patch tested?

Manual tests running with `--proxy-user` and `--archives`, before and after, showing that the globbing is successful when the resource is owned by the target user.

I've looked at writing unit tests, but I am not sure I can do that cleanly (perhaps with a custom FileSystem). Open to ideas.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23806 from abellina/SPARK-26895_prepareSubmitEnvironment_from_doAs.

Lead-authored-by: Alessandro Bellina <abellina@gmail.com>
Co-authored-by: Alessandro Bellina <abellina@yahoo-inc.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 32 ++++++++-----------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index d5e17ffb55d9..f4d9fe06639e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -131,17 +131,11 @@ private[spark] class SparkSubmit extends Logging {
   }
 
   /**
-   * Submit the application using the provided parameters.
-   *
-   * This runs in two steps. First, we prepare the launch environment by setting up
-   * the appropriate classpath, system properties, and application arguments for
-   * running the child main class based on the cluster manager and the deploy mode.
-   * Second, we use this launch environment to invoke the main method of the child
-   * main class.
+   * Submit the application using the provided parameters, ensuring to first wrap
+   * in a doAs when --proxy-user is specified.
    */
   @tailrec
   private def submit(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
-    val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args)
 
     def doRunMain(): Unit = {
       if (args.proxyUser != null) {
@@ -150,7 +144,7 @@ private[spark] class SparkSubmit extends Logging {
         try {
           proxyUser.doAs(new PrivilegedExceptionAction[Unit]() {
             override def run(): Unit = {
-              runMain(childArgs, childClasspath, sparkConf, childMainClass, args.verbose)
+              runMain(args)
             }
           })
         } catch {
@@ -165,7 +159,7 @@ private[spark] class SparkSubmit extends Logging {
             }
         }
       } else {
-        runMain(childArgs, childClasspath, sparkConf, childMainClass, args.verbose)
+        runMain(args)
       }
     }
 
@@ -774,18 +768,20 @@ private[spark] class SparkSubmit extends Logging {
   }
 
   /**
-   * Run the main method of the child class using the provided launch environment.
+   * Run the main method of the child class using the submit arguments.
+   *
+   * This runs in two steps. First, we prepare the launch environment by setting up
+   * the appropriate classpath, system properties, and application arguments for
+   * running the child main class based on the cluster manager and the deploy mode.
+   * Second, we use this launch environment to invoke the main method of the child
+   * main class.
    *
    * Note that this main class will not be the one provided by the user if we're
    * running cluster deploy mode or python applications.
    */
-  private def runMain(
-      childArgs: Seq[String],
-      childClasspath: Seq[String],
-      sparkConf: SparkConf,
-      childMainClass: String,
-      verbose: Boolean): Unit = {
-    if (verbose) {
+  private def runMain(args: SparkSubmitArguments): Unit = {
+    val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args)
+    if (args.verbose) {
       logInfo(s"Main class:\n$childMainClass")
       logInfo(s"Arguments:\n${childArgs.mkString("\n")}")
       // sysProps may contain sensitive information, so redact before printing

From 967e4cb0112e2dd94bc75251c23bb9e854ee97a0 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sat, 23 Feb 2019 08:38:47 +0900
Subject: [PATCH 0511/1072] [SPARK-26215][SQL] Define reserved/non-reserved
 keywords based on the ANSI SQL standard

## What changes were proposed in this pull request?
This pr targeted to define reserved/non-reserved keywords for Spark SQL based on the ANSI SQL standards and the other database-like systems (e.g., PostgreSQL). We assume that they basically follow the ANSI SQL-2011 standard, but it is slightly different between each other. Therefore, this pr documented all the keywords in `docs/sql-reserved-and-non-reserved-key-words.md`.

NOTE: This pr only added a small set of keywords as reserved ones and these keywords are reserved in all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011, and SQL-2016) and PostgreSQL. This is because there is room to discuss which keyword should be reserved or not, .e.g., interval units (day, hour, minute, second, ...) are reserved in the ANSI SQL standards though, they are not reserved in PostgreSQL. Therefore, we need more researches about the other database-like systems (e.g., Oracle Databases, DB2, SQL server) in follow-up activities.

References:
 - The reserved/non-reserved SQL keywords in the ANSI SQL standards: https://developer.mimer.com/wp-content/uploads/2018/05/Standard-SQL-Reserved-Words-Summary.pdf
 - SQL Key Words in PostgreSQL: https://www.postgresql.org/docs/current/sql-keywords-appendix.html

## How was this patch tested?
Added tests in `TableIdentifierParserSuite`.

Closes #23259 from maropu/SPARK-26215-WIP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/_data/menu-sql.yaml                      |   2 +
 .../sql-reserved-and-non-reserved-keywords.md | 574 ++++++++++++++++++
 .../spark/sql/catalyst/parser/SqlBase.g4      | 127 ++--
 .../sql/catalyst/parser/ParseDriver.scala     |   2 +
 .../apache/spark/sql/internal/SQLConf.scala   |   8 +
 5 files changed, 675 insertions(+), 38 deletions(-)
 create mode 100644 docs/sql-reserved-and-non-reserved-keywords.md

diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index cd065ea01dda..9bbb115bcdda 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -70,6 +70,8 @@
       url: sql-migration-guide-upgrade.html
     - text: Compatibility with Apache Hive
       url: sql-migration-guide-hive-compatibility.html
+    - text: SQL Reserved/Non-Reserved Keywords
+      url: sql-reserved-and-non-reserved-keywords.html
 - text: Reference
   url: sql-reference.html
   subitems:
diff --git a/docs/sql-reserved-and-non-reserved-keywords.md b/docs/sql-reserved-and-non-reserved-keywords.md
new file mode 100644
index 000000000000..321fb3f00acb
--- /dev/null
+++ b/docs/sql-reserved-and-non-reserved-keywords.md
@@ -0,0 +1,574 @@
+---
+layout: global
+title: SQL Reserved/Non-Reserved Keywords
+displayTitle: SQL Reserved/Non-Reserved Keywords
+---
+
+In Spark SQL, there are 2 kinds of keywords: non-reserved and reserved. Non-reserved keywords have a
+special meaning only in particular contexts and can be used as identifiers (e.g., table names, view names,
+column names, column aliases, table aliases) in other contexts. Reserved keywords can't be used as
+table alias, but can be used as other identifiers.
+
+The list of reserved and non-reserved keywords can change according to the config
+`spark.sql.parser.ansi.enabled`, which is false by default.
+
+<table class="table">
+  <tr><th rowspan="2" style="vertical-align: middle;"><b>Keyword</b></th><th colspan="2"><b>Spark SQL</b></th><th rowspan="2" style="vertical-align: middle;"><b>SQL-2011</b></th></tr>
+  <tr><th><b>ANSI mode</b></th><th><b>default mode</b></th></tr>
+  <tr><td>ABS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ABSOLUTE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ACOS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ACTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ADD</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>AFTER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ALL</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ALLOCATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ALTER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ANALYZE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>AND</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ANTI</td><td>reserved</td><td>reserved</td><td>non-reserved</td></tr>
+  <tr><td>ANY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ARE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ARCHIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ARRAY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ARRAY_AGG</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ARRAY_MAX_CARDINALITY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>AS</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ASC</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ASENSITIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ASIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ASSERTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ASYMMETRIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>AT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ATAN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ATOMIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>AUTHORIZATION</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>AVG</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BEFORE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>BEGIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BEGIN_FRAME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BEGIN_PARTITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BETWEEN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BIGINT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BINARY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>BIT_LENGTH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>BLOB</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BOOLEAN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BOTH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>BREADTH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>BUCKET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>BUCKETS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>BY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CACHE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CALL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CALLED</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CARDINALITY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CASCADE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CASCADED</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CASE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CAST</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CATALOG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CEIL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CEILING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CHANGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CHAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CHAR_LENGTH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CHARACTER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CHARACTER_LENGTH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CHECK</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CLASSIFIER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CLEAR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CLOB</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CLOSE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CLUSTER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CLUSTERED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COALESCE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CODEGEN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COLLATE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>COLLATION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COLLECT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>COLLECTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COLUMN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>COLUMNS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COMMENT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COMMIT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>COMPACT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COMPACTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COMPUTE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CONCATENATE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CONDITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CONNECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CONNECTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CONSTRAINT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CONSTRAINTS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CONSTRUCTOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CONTAINS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CONTINUE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CONVERT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>COPY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>CORR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CORRESPONDING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>COS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COSH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>COUNT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>COVAR_POP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>COVAR_SAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CREATE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CROSS</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>CUBE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CUME_DIST</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_CATALOG</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_DATE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_DEFAULT_TRANSFORM_GROUP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_PATH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_ROLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_ROW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_SCHEMA</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_TIME</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_TIMESTAMP</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_TRANSFORM_GROUP_FOR_TYPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURRENT_USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CURSOR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>CYCLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DATA</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DATABASE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DATABASES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DAY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DBPROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DEALLOCATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DEC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DECFLOAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DECIMAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DECLARE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DEFAULT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DEFERRABLE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DEFERRED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DEFINE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DEFINED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DELETE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DELIMITED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DENSE_RANK</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DEPTH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DEREF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DESC</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DESCRIBE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DESCRIPTOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DETERMINISTIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DFS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DIAGNOSTICS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DIRECTORIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DIRECTORY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DISCONNECT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DISTINCT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DISTRIBUTE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DIV</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DOMAIN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>DOUBLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DROP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DYNAMIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>EACH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ELEMENT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ELSE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ELSEIF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>EMPTY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>END</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>END_FRAME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>END_PARTITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>EQUALS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ESCAPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ESCAPED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>EVERY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>EXCEPT</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>EXCEPTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>EXCHANGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>EXEC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>EXECUTE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>EXISTS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>EXIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>EXP</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>EXPLAIN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>EXPORT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>EXTENDED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>EXTERNAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>EXTRACT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FALSE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FETCH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FIELDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FILEFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FILTER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FIRST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FIRST_VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FLOAT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FOLLOWING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FOR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FOREIGN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FORMATTED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FOUND</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FRAME_ROW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FREE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FROM</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FULL</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>FUNCTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>FUNCTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FUSION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>GENERAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>GET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>GLOBAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>GO</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>GOTO</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>GRANT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>GROUP</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>GROUPING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>GROUPS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>HANDLER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>HAVING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>HOLD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>HOUR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>IDENTITY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>IF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>IGNORE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>IMMEDIATE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>IMPORT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>IN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INDICATOR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INDEX</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>INDEXES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>INITIAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>INITIALLY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>INNER</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>INOUT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INPATH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>INPUT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>INPUTFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>INSENSITIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INSERT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INTEGER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INTERSECT</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>INTERSECTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INTERVAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>INTO</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>IS</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ISOLATION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ITEMS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ITERATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>JOIN</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>JSON_ARRAY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>JSON_ARRAYAGG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>JSON_EXISTS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>JSON_OBJECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>JSON_OBJECTAGG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>JSON_QUERY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>JSON_TABLE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>JSON_TABLE_PRIMITIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>JSON_VALUE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>KEY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>KEYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LAG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LANGUAGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LARGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LAST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LAST_VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LATERAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LAZY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LEAD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LEADING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LEAVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LEFT</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>LEVEL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LIKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LIKE_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LIMIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LINES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LIST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LISTAGG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LOAD</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LOCAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LOCALTIME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LOCALTIMESTAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LOCATION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LOCATOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LOCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LOCKS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LOG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LOG10</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LOGICAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>LOOP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>LOWER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MACRO</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MAP</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MATCH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MATCH_NUMBER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MATCH_RECOGNIZE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MATCHES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MAX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MEMBER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MERGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>METHOD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MINUS</td><td>reserved</td><td>reserved</td><td>non-reserved</td></tr>
+  <tr><td>MINUTE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MOD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MODIFIES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MODULE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MONTH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MSCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MULTISET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NAMES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>NATIONAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NATURAL</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>NCHAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NCLOB</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NEW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NEXT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>NO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NONE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NORMALIZE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NOT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NTH_VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NTILE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NULL</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NULLS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>NULLIF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>NUMERIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OBJECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>OCCURRENCES_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>OCTET_LENGTH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OFFSET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OLD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OMIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ON</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>ONE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ONLY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OPEN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OPTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>OPTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>OR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ORDER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ORDINALITY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>OUT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OUTER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OUTPUT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>OUTPUTFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>OVER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>OVERLAPS</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OVERLAY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>OVERWRITE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PAD</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PARAMETER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PARTIAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PARTITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PARTITIONED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PARTITIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PATH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PATTERN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PERCENT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PERCENT_RANK</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PERCENTILE_CONT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PERCENTILE_DISC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PERCENTLIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PERIOD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PIVOT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PORTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>POSITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>POSITION_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>POWER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PRECEDES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PRECEDING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PRECISION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PREPARE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PRESERVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PRIMARY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PRINCIPALS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PRIOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PRIVILEGES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PROCEDURE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PTF</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PUBLIC</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PURGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>RANGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RANK</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>READ</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>READS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RECORDREADER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>RECORDWRITER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>RECURSIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RECOVER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>REDUCE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>REF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REFERENCES</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REFERENCING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REFRESH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>REGR_AVGX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REGR_AVGY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REGR_COUNT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REGR_INTERCEPT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REGR_R2</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REGR_SLOPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REGR_SXX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REGR_SXY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REGR_SYY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RELATIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>RELEASE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RENAME</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>REPAIR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>REPEAT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REPLACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>RESET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>RESIGNAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RESTRICT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>RESULT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RETURN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RETURNS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>REVOKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RIGHT</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>RLIKE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ROLE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ROLES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ROLLBACK</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ROLLUP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ROUTINE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>ROW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ROW_NUMBER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ROWS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>RUNNING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SAVEPOINT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SCHEMA</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SCOPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SCROLL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SEARCH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SECOND</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SECTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SEEK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SELECT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SEMI</td><td>reserved</td><td>reserved</td><td>non-reserved</td></tr>
+  <tr><td>SENSITIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SEPARATED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SERDE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SERDEPROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SESSION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SESSION_USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SETS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SHOW</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SIGNAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SIMILAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SIN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SINH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SIZE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SKIP</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SKEWED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SMALLINT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SOME</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SORT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SORTED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SPACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SPECIFIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SPECIFICTYPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SQL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SQLCODE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SQLERROR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SQLEXCEPTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SQLSTATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SQLWARNING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SQRT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>START</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>STATE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>STATIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>STATISTICS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>STDDEV_POP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>STDDEV_SAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>STORED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>STRATIFY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>STRUCT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SUBMULTISET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SUBSET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>SUBSTRING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SUBSTRING_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SUCCEEDS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SUM</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SYMMETRIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SYSTEM</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SYSTEM_TIME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SYSTEM_USER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TABLE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TABLES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TABLESAMPLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TAN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TANH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TBLPROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TEMPORARY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TERMINATED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>THEN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TIME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TIMESTAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TIMEZONE_HOUR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TIMEZONE_MINUTE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TO</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TOUCH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TRAILING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TRANSACTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TRANSACTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TRANSFORM</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>TRANSLATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TRANSLATE_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TRANSLATION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TREAT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TRIGGER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TRIM</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TRIM_ARRAY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TRUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>TRUNCATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>UESCAPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>UNARCHIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>UNBOUNDED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>UNCACHE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>UNDER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>UNDO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>UNION</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>UNIQUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>UNKNOWN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>UNLOCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>UNNEST</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>UNSET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>UNTIL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>UPDATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>UPPER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>USAGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>USE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>USING</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VALUES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VALUE_OF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VAR_POP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VAR_SAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VARBINARY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VARCHAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VARYING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VERSIONING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>VIEW</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>WHEN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WHENEVER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WHERE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WHILE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WIDTH_BUCKET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WINDOW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WITH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WITHIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WITHOUT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WORK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>WRITE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>YEAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ZONE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+</table>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index b39681d886c5..69deeb518d6e 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -44,6 +44,11 @@ grammar SqlBase;
       return true;
     }
   }
+
+  /**
+   * When true, ANSI SQL parsing mode is enabled.
+   */
+  public boolean ansi = false;
 }
 
 singleStatement
@@ -720,14 +725,15 @@ qualifiedName
 
 identifier
     : strictIdentifier
-    | ANTI | FULL | INNER | LEFT | SEMI | RIGHT | NATURAL | JOIN | CROSS | ON
-    | UNION | INTERSECT | EXCEPT | SETMINUS
+    | {ansi}? ansiReserved
+    | {!ansi}? defaultReserved
     ;
 
 strictIdentifier
-    : IDENTIFIER             #unquotedIdentifier
-    | quotedIdentifier       #quotedIdentifierAlternative
-    | nonReserved            #unquotedIdentifier
+    : IDENTIFIER              #unquotedIdentifier
+    | quotedIdentifier        #quotedIdentifierAlternative
+    | {ansi}? ansiNonReserved #unquotedIdentifier
+    | {!ansi}? nonReserved    #unquotedIdentifier
     ;
 
 quotedIdentifier
@@ -744,40 +750,67 @@ number
     | MINUS? BIGDECIMAL_LITERAL       #bigDecimalLiteral
     ;
 
+// NOTE: You must follow a rule below when you add a new ANTLR taken in this file:
+//  - All the ANTLR tokens = UNION(`ansiReserved`, `ansiNonReserved`) = UNION(`defaultReserved`, `nonReserved`)
+//
+// Let's say you add a new token `NEWTOKEN` and this is not reserved regardless of a `spark.sql.parser.ansi.enabled`
+// value. In this case, you must add a token `NEWTOKEN` in both `ansiNonReserved` and `nonReserved`.
+
+// The list of the reserved keywords when `spark.sql.parser.ansi.enabled` is true. Currently, we only reserve
+// the ANSI keywords that almost all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011,
+// and SQL-2016) and PostgreSQL reserve.
+ansiReserved
+    : ALL | AND | ANTI | ANY | AS | AUTHORIZATION | BOTH | CASE | CAST | CHECK | COLLATE | COLUMN | CONSTRAINT | CREATE
+    | CROSS | CURRENT_DATE | CURRENT_TIME | CURRENT_TIMESTAMP | CURRENT_USER | DISTINCT | ELSE | END | EXCEPT | FALSE
+    | FETCH | FOR | FOREIGN | FROM | FULL | GRANT | GROUP | HAVING | IN | INNER | INTERSECT | INTO | JOIN | IS
+    | LEADING | LEFT | NATURAL | NOT | NULL | ON | ONLY | OR | ORDER | OUTER | OVERLAPS | PRIMARY | REFERENCES | RIGHT
+    | SELECT | SEMI | SESSION_USER | SETMINUS | SOME | TABLE | THEN | TO | TRAILING | UNION | UNIQUE | USER | USING
+    | WHEN | WHERE | WITH
+    ;
+
+
+// The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` is true.
+ansiNonReserved
+    : ADD | AFTER | ALTER | ANALYZE | ARCHIVE | ARRAY | ASC | AT | BETWEEN | BUCKET | BUCKETS | BY | CACHE | CASCADE
+    | CHANGE | CLEAR | CLUSTER | CLUSTERED | CODEGEN | COLLECTION | COLUMNS | COMMENT | COMMIT | COMPACT | COMPACTIONS
+    | COMPUTE | CONCATENATE | COST | CUBE | CURRENT | DATA | DATABASE | DATABASES | DBPROPERTIES | DEFINED | DELETE
+    | DELIMITED | DESC | DESCRIBE | DFS | DIRECTORIES | DIRECTORY | DISTRIBUTE | DIV | DROP | ESCAPED | EXCHANGE
+    | EXISTS | EXPLAIN | EXPORT | EXTENDED | EXTERNAL | EXTRACT | FIELDS | FILEFORMAT | FIRST | FOLLOWING | FORMAT
+    | FORMATTED | FUNCTION | FUNCTIONS | GLOBAL | GROUPING | IF | IGNORE | IMPORT | INDEX | INDEXES | INPATH
+    | INPUTFORMAT | INSERT | INTERVAL | ITEMS | KEYS | LAST | LATERAL | LAZY | LIKE | LIMIT | LINES | LIST | LOAD
+    | LOCAL | LOCATION | LOCK | LOCKS | LOGICAL | MACRO | MAP | MSCK | NO | NULLS | OF | OPTION | OPTIONS | OUT
+    | OUTPUTFORMAT | OVER | OVERWRITE | PARTITION | PARTITIONED | PARTITIONS | PERCENT | PERCENTLIT | PIVOT | PRECEDING
+    | PRINCIPALS | PURGE | RANGE | RECORDREADER | RECORDWRITER | RECOVER | REDUCE | REFRESH | RENAME | REPAIR | REPLACE
+    | RESET | RESTRICT | REVOKE | RLIKE | ROLE | ROLES | ROLLBACK | ROLLUP | ROW | ROWS | SCHEMA | SEPARATED | SERDE
+    | SERDEPROPERTIES | SET | SETS | SHOW | SKEWED | SORT | SORTED | START | STATISTICS | STORED | STRATIFY | STRUCT
+    | TABLES | TABLESAMPLE | TBLPROPERTIES | TEMPORARY | TERMINATED | TOUCH | TRANSACTION | TRANSACTIONS | TRANSFORM
+    | TRUE | TRUNCATE | UNARCHIVE | UNBOUNDED | UNCACHE | UNLOCK | UNSET | USE | VALUES | VIEW | WINDOW
+    ;
+
+defaultReserved
+    : ANTI | CROSS | EXCEPT | FULL | INNER | INTERSECT | JOIN | LEFT | NATURAL | ON | RIGHT | SEMI | SETMINUS | UNION
+    | USING
+    ;
+
 nonReserved
-    : SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS | DATABASES
-    | ADD
-    | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | LAST | FIRST | AFTER
-    | MAP | ARRAY | STRUCT
-    | PIVOT | LATERAL | WINDOW | REDUCE | TRANSFORM | SERDE | SERDEPROPERTIES | RECORDREADER
-    | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED
-    | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | GLOBAL | TEMPORARY | OPTIONS
-    | GROUPING | CUBE | ROLLUP
-    | EXPLAIN | FORMAT | LOGICAL | FORMATTED | CODEGEN | COST
-    | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF
-    | SET | RESET
-    | VIEW | REPLACE
-    | IF
-    | POSITION
-    | EXTRACT
-    | NO | DATA
-    | START | TRANSACTION | COMMIT | ROLLBACK | IGNORE
-    | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
-    | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE
-    | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT
-    | DBPROPERTIES | DFS | TRUNCATE | COMPUTE | LIST
-    | STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER
-    | REVOKE | GRANT | LOCK | UNLOCK | MSCK | REPAIR | RECOVER | EXPORT | IMPORT | LOAD | VALUES | COMMENT | ROLE
-    | ROLES | COMPACTIONS | PRINCIPALS | TRANSACTIONS | INDEX | INDEXES | LOCKS | OPTION | LOCAL | INPATH
-    | ASC | DESC | LIMIT | RENAME | SETS
-    | AT | NULLS | OVERWRITE | ALL | ANY | ALTER | AS | BETWEEN | BY | CREATE | DELETE
-    | DESCRIBE | DROP | EXISTS | FALSE | FOR | GROUP | IN | INSERT | INTO | IS |LIKE
-    | NULL | ORDER | OUTER | TABLE | TRUE | WITH | RLIKE
-    | AND | CASE | CAST | DISTINCT | DIV | ELSE | END | FUNCTION | INTERVAL | MACRO | OR | STRATIFY | THEN
-    | UNBOUNDED | WHEN
-    | DATABASE | SELECT | FROM | WHERE | HAVING | TO | TABLE | WITH | NOT
-    | DIRECTORY
-    | BOTH | LEADING | TRAILING
+    : ADD | AFTER | ALL | ALTER | ANALYZE | AND | ANY | ARCHIVE | ARRAY | AS | ASC | AT | AUTHORIZATION | BETWEEN
+    | BOTH | BUCKET | BUCKETS | BY | CACHE | CASCADE | CASE | CAST | CHANGE | CHECK | CLEAR | CLUSTER | CLUSTERED
+    | CODEGEN | COLLATE | COLLECTION | COLUMN | COLUMNS | COMMENT | COMMIT | COMPACT | COMPACTIONS | COMPUTE
+    | CONCATENATE | CONSTRAINT | COST | CREATE | CUBE | CURRENT | CURRENT_DATE | CURRENT_TIME | CURRENT_TIMESTAMP
+    | CURRENT_USER | DATA | DATABASE | DATABASES | DBPROPERTIES | DEFINED | DELETE | DELIMITED | DESC | DESCRIBE | DFS
+    | DIRECTORIES | DIRECTORY | DISTINCT | DISTRIBUTE | DIV | DROP | ELSE | END | ESCAPED | EXCHANGE | EXISTS | EXPLAIN
+    | EXPORT | EXTENDED | EXTERNAL | EXTRACT | FALSE | FETCH | FIELDS | FILEFORMAT | FIRST | FOLLOWING | FOR | FOREIGN
+    | FORMAT | FORMATTED | FROM | FUNCTION | FUNCTIONS | GLOBAL | GRANT | GROUP | GROUPING | HAVING | IF | IGNORE
+    | IMPORT | IN | INDEX | INDEXES | INPATH | INPUTFORMAT | INSERT | INTERVAL | INTO | IS | ITEMS | KEYS | LAST
+    | LATERAL | LAZY | LEADING | LIKE | LIMIT | LINES | LIST | LOAD | LOCAL | LOCATION | LOCK | LOCKS | LOGICAL | MACRO
+    | MAP | MSCK | NO | NOT | NULL | NULLS | OF | ONLY | OPTION | OPTIONS | OR | ORDER | OUT | OUTER | OUTPUTFORMAT
+    | OVER | OVERLAPS | OVERWRITE | PARTITION | PARTITIONED | PARTITIONS | PERCENTLIT | PIVOT | POSITION | PRECEDING
+    | PRIMARY | PRINCIPALS | PURGE | RANGE | RECORDREADER | RECORDWRITER | RECOVER | REDUCE | REFERENCES | REFRESH
+    | RENAME | REPAIR | REPLACE | RESET | RESTRICT | REVOKE | RLIKE | ROLE | ROLES | ROLLBACK | ROLLUP | ROW | ROWS
+    | SELECT | SEPARATED | SERDE | SERDEPROPERTIES | SESSION_USER | SET | SETS | SHOW | SKEWED | SOME | SORT | SORTED
+    | START | STATISTICS | STORED | STRATIFY | STRUCT | TABLE | TABLES | TABLESAMPLE | TBLPROPERTIES | TEMPORARY
+    | TERMINATED | THEN | TO | TOUCH | TRAILING | TRANSACTION | TRANSACTIONS | TRANSFORM | TRUE | TRUNCATE | UNARCHIVE
+    | UNBOUNDED | UNCACHE | UNLOCK | UNIQUE | UNSET | USE | USER | VALUES | VIEW | WHEN | WHERE | WINDOW | WITH
     ;
 
 SELECT: 'SELECT';
@@ -1014,6 +1047,24 @@ OPTION: 'OPTION';
 ANTI: 'ANTI';
 LOCAL: 'LOCAL';
 INPATH: 'INPATH';
+AUTHORIZATION: 'AUTHORIZATION';
+CHECK: 'CHECK';
+COLLATE: 'COLLATE';
+CONSTRAINT: 'CONSTRAINT';
+CURRENT_DATE: 'CURRENT_DATE';
+CURRENT_TIME: 'CURRENT_TIME';
+CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
+CURRENT_USER: 'CURRENT_USER';
+FETCH: 'FETCH';
+FOREIGN: 'FOREIGN';
+ONLY: 'ONLY';
+OVERLAPS: 'OVERLAPS';
+PRIMARY: 'PRIMARY';
+REFERENCES: 'REFERENCES';
+SESSION_USER: 'SESSION_USER';
+SOME: 'SOME';
+UNIQUE: 'UNIQUE';
+USER: 'USER';
 
 STRING
     : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 7d8cb1f18b4b..446cb0964188 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -85,6 +85,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     lexer.removeErrorListeners()
     lexer.addErrorListener(ParseErrorListener)
     lexer.legacy_setops_precedence_enbled = SQLConf.get.setOpsPrecedenceEnforced
+    lexer.ansi = SQLConf.get.ansiParserEnabled
 
     val tokenStream = new CommonTokenStream(lexer)
     val parser = new SqlBaseParser(tokenStream)
@@ -92,6 +93,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     parser.removeErrorListeners()
     parser.addErrorListener(ParseErrorListener)
     parser.legacy_setops_precedence_enbled = SQLConf.get.setOpsPrecedenceEnforced
+    parser.ansi = SQLConf.get.ansiParserEnabled
 
     try {
       try {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 7f8754622a36..bbb79cdccc4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -314,6 +314,12 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val ANSI_SQL_PARSER =
+    buildConf("spark.sql.parser.ansi.enabled")
+      .doc("When true, tries to conform to ANSI SQL syntax.")
+      .booleanConf
+      .createWithDefault(false)
+
   val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals")
     .internal()
     .doc("When true, string literals (including regex patterns) remain escaped in our SQL " +
@@ -1840,6 +1846,8 @@ class SQLConf extends Serializable with Logging {
 
   def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED)
 
+  def ansiParserEnabled: Boolean = getConf(ANSI_SQL_PARSER)
+
   def escapedStringLiterals: Boolean = getConf(ESCAPED_STRING_LITERALS)
 
   def fileCompressionFactor: Double = getConf(FILE_COMRESSION_FACTOR)

From a56b3511fc229f42fb385419bc18a90f0d1cd635 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Sat, 23 Feb 2019 14:57:04 +0800
Subject: [PATCH 0512/1072] [SPARK-26945][PYTHON][SS][TESTS] Fix flaky
 test_*_await_termination in PySpark SS tests

## What changes were proposed in this pull request?

This PR proposes to make sure processing all available data before stopping and delete the temp directory.

See https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/102518/console

```
ERROR: test_query_manager_await_termination (pyspark.sql.tests.test_streaming.StreamingTests)
----------------------------------------------------------------------
Traceback (most recent call last):
 File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/sql/tests/test_streaming.py", line 259, in test_query_manager_await_termination
 shutil.rmtree(tmpPath)
 File "/home/anaconda/lib/python2.7/shutil.py", line 256, in rmtree
 onerror(os.rmdir, path, sys.exc_info())
 File "/home/anaconda/lib/python2.7/shutil.py", line 254, in rmtree
 os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/home/jenkins/workspace/SparkPullRequestBuilder/python/target/072153bd-f981-47be-bda2-e2b657a16f65/tmp4WGp7n'
```

See https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/102311/console

```
ERROR: test_stream_await_termination (pyspark.sql.tests.test_streaming.StreamingTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/pyspark/sql/tests/test_streaming.py", line 202, in test_stream_await_termination
    shutil.rmtree(tmpPath)
  File "/usr/lib64/pypy-2.5.1/lib-python/2.7/shutil.py", line 256, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/usr/lib64/pypy-2.5.1/lib-python/2.7/shutil.py", line 254, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/home/jenkins/workspace/SparkPullRequestBuilder2/python/target/7244f4ff-6b60-4f6c-b787-de4f15922bf5/tmpQbMZSo'
```

## How was this patch tested?

Jenkins tests - I should run multiple times to see if there are other flaky tests + if this PR really fixes it.

Closes #23870 from HyukjinKwon/SPARK-26945.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_streaming.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py
index 4b71759f74a5..ac4b6918030a 100644
--- a/python/pyspark/sql/tests/test_streaming.py
+++ b/python/pyspark/sql/tests/test_streaming.py
@@ -198,6 +198,7 @@ def test_stream_await_termination(self):
             self.assertTrue(duration >= 2)
             self.assertFalse(res)
         finally:
+            q.processAllAvailable()
             q.stop()
             shutil.rmtree(tmpPath)
 
@@ -255,6 +256,7 @@ def test_query_manager_await_termination(self):
             self.assertTrue(duration >= 2)
             self.assertFalse(res)
         finally:
+            q.processAllAvailable()
             q.stop()
             shutil.rmtree(tmpPath)
 

From d0f2fd05e1079d580ffd7d9d263f662e92849175 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sat, 23 Feb 2019 09:44:22 -0600
Subject: [PATCH 0513/1072] [SPARK-26903][SQL] Remove the TimeZone cache

## What changes were proposed in this pull request?

In the PR, I propose to convert time zone string to `TimeZone` by converting it to `ZoneId` which uses `ZoneOffset` internally. The `ZoneOffset` class of JDK 8 has a cache already: http://hg.openjdk.java.net/jdk8/jdk8/jdk/file/687fd7c7986d/src/share/classes/java/time/ZoneOffset.java#l205 . In this way, there is no need to support cache of time zones in Spark.

The PR removes `computedTimeZones` from `DateTimeUtils`, and uses `ZoneId.of` to convert time zone id string to `ZoneId` and to `TimeZone` at the end.

## How was this patch tested?

The changes were tested by

Closes #23812 from MaxGekk/timezone-cache.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |   2 +
 .../sql/catalyst/util/DateTimeUtils.scala     |  11 +-
 .../expressions/DateExpressionsSuite.scala    |  25 +-
 .../benchmarks/DateTimeBenchmark-results.txt  | 312 +++++++++---------
 4 files changed, 181 insertions(+), 169 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 1ae26e6cefa2..c2010566e409 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -97,6 +97,8 @@ displayTitle: Spark SQL Upgrading Guide
 
     - the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
 
+  - In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`. 
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 28fb6a91608f..5a432bac48da 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -22,8 +22,7 @@ import java.time._
 import java.time.Year.isLeap
 import java.time.temporal.IsoFields
 import java.util.{Locale, TimeZone}
-import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
-import java.util.function.{Function => JFunction}
+import java.util.concurrent.TimeUnit
 
 import scala.util.control.NonFatal
 
@@ -67,13 +66,9 @@ object DateTimeUtils {
 
   def defaultTimeZone(): TimeZone = TimeZone.getDefault()
 
-  private val computedTimeZones = new ConcurrentHashMap[String, TimeZone]
-  private val computeTimeZone = new JFunction[String, TimeZone] {
-    override def apply(timeZoneId: String): TimeZone = TimeZone.getTimeZone(timeZoneId)
-  }
-
   def getTimeZone(timeZoneId: String): TimeZone = {
-    computedTimeZones.computeIfAbsent(timeZoneId, computeTimeZone)
+    val zoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS)
+    TimeZone.getTimeZone(zoneId)
   }
 
   // we should use the exact day as Int, for example, (year, month, day) -> day
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index c3c29e31e4b3..ce576ec4fa0d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -819,9 +819,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     test(null, "UTC", null)
     test("2015-07-24 00:00:00", null, null)
     test(null, null, null)
-    // Test escaping of timezone
-    GenerateUnsafeProjection.generate(
-      ToUTCTimestamp(Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal("\"quote")) :: Nil)
+  }
+
+  test("to_utc_timestamp - invalid time zone id") {
+    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+      val msg = intercept[java.time.DateTimeException] {
+        GenerateUnsafeProjection.generate(
+          ToUTCTimestamp(
+            Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
+      }.getMessage
+      assert(msg.contains(invalidTz))
+    }
   }
 
   test("from_utc_timestamp") {
@@ -842,7 +850,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     test(null, "UTC", null)
     test("2015-07-24 00:00:00", null, null)
     test(null, null, null)
-    // Test escaping of timezone
-    GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal("\"quote")) :: Nil)
+  }
+
+  test("from_utc_timestamp - invalid time zone id") {
+    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+      val msg = intercept[java.time.DateTimeException] {
+        GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
+      }.getMessage
+      assert(msg.contains(invalidTz))
+    }
   }
 }
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index bc824afcfa78..6c39f8f3b019 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,327 +2,327 @@
 Extract components
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 cast to timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off               276 /  290         36.2          27.6       1.0X
-cast to timestamp wholestage on                254 /  267         39.4          25.4       1.1X
+cast to timestamp wholestage off               275 /  287         36.4          27.5       1.0X
+cast to timestamp wholestage on                243 /  253         41.2          24.3       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 year of timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-year of timestamp wholestage off               699 /  700         14.3          69.9       1.0X
-year of timestamp wholestage on                680 /  689         14.7          68.0       1.0X
+year of timestamp wholestage off               661 /  667         15.1          66.1       1.0X
+year of timestamp wholestage on                659 /  669         15.2          65.9       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 quarter of timestamp:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off            848 /  864         11.8          84.8       1.0X
-quarter of timestamp wholestage on             784 /  797         12.8          78.4       1.1X
+quarter of timestamp wholestage off            820 /  822         12.2          82.0       1.0X
+quarter of timestamp wholestage on             768 /  776         13.0          76.8       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 month of timestamp:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-month of timestamp wholestage off              652 /  653         15.3          65.2       1.0X
-month of timestamp wholestage on               671 /  677         14.9          67.1       1.0X
+month of timestamp wholestage off              636 /  638         15.7          63.6       1.0X
+month of timestamp wholestage on               648 /  654         15.4          64.8       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 weekofyear of timestamp:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off        1233 / 1233          8.1         123.3       1.0X
-weekofyear of timestamp wholestage on         1236 / 1240          8.1         123.6       1.0X
+weekofyear of timestamp wholestage off        1093 / 1097          9.2         109.3       1.0X
+weekofyear of timestamp wholestage on         1101 / 1107          9.1         110.1       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 day of timestamp:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                649 /  655         15.4          64.9       1.0X
-day of timestamp wholestage on                 670 /  678         14.9          67.0       1.0X
+day of timestamp wholestage off                643 /  644         15.6          64.3       1.0X
+day of timestamp wholestage on                 655 /  657         15.3          65.5       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 dayofyear of timestamp:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off          692 /  704         14.5          69.2       1.0X
-dayofyear of timestamp wholestage on           695 /  698         14.4          69.5       1.0X
+dayofyear of timestamp wholestage off          681 /  692         14.7          68.1       1.0X
+dayofyear of timestamp wholestage on           675 /  680         14.8          67.5       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 dayofmonth of timestamp:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off         660 /  660         15.1          66.0       1.0X
-dayofmonth of timestamp wholestage on          667 /  671         15.0          66.7       1.0X
+dayofmonth of timestamp wholestage off         656 /  657         15.2          65.6       1.0X
+dayofmonth of timestamp wholestage on          651 /  658         15.4          65.1       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 dayofweek of timestamp:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off          798 /  802         12.5          79.8       1.0X
-dayofweek of timestamp wholestage on           804 /  820         12.4          80.4       1.0X
+dayofweek of timestamp wholestage off          775 /  776         12.9          77.5       1.0X
+dayofweek of timestamp wholestage on           777 /  781         12.9          77.7       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 weekday of timestamp:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off            759 /  760         13.2          75.9       1.0X
-weekday of timestamp wholestage on             774 /  813         12.9          77.4       1.0X
+weekday of timestamp wholestage off            737 /  737         13.6          73.7       1.0X
+weekday of timestamp wholestage on             737 /  739         13.6          73.7       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 hour of timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off               465 /  468         21.5          46.5       1.0X
-hour of timestamp wholestage on                443 /  451         22.6          44.3       1.0X
+hour of timestamp wholestage off               425 /  426         23.5          42.5       1.0X
+hour of timestamp wholestage on                430 /  434         23.2          43.0       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 minute of timestamp:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off             439 /  440         22.8          43.9       1.0X
-minute of timestamp wholestage on              448 /  452         22.3          44.8       1.0X
+minute of timestamp wholestage off             430 /  439         23.3          43.0       1.0X
+minute of timestamp wholestage on              436 /  438         23.0          43.6       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 second of timestamp:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-second of timestamp wholestage off             438 /  448         22.8          43.8       1.0X
-second of timestamp wholestage on              430 /  445         23.3          43.0       1.0X
+second of timestamp wholestage off             413 /  413         24.2          41.3       1.0X
+second of timestamp wholestage on              413 /  425         24.2          41.3       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 current_date:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-current_date wholestage off                    212 /  213         47.2          21.2       1.0X
-current_date wholestage on                     225 /  230         44.4          22.5       0.9X
+current_date wholestage off                    205 /  206         48.7          20.5       1.0X
+current_date wholestage on                     219 /  224         45.8          21.9       0.9X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 current_timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-current_timestamp wholestage off               216 /  217         46.3          21.6       1.0X
-current_timestamp wholestage on                210 /  216         47.7          21.0       1.0X
+current_timestamp wholestage off               212 /  213         47.3          21.2       1.0X
+current_timestamp wholestage on                202 /  205         49.6          20.2       1.0X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 cast to date:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-cast to date wholestage off                    463 /  464         21.6          46.3       1.0X
-cast to date wholestage on                     502 /  508         19.9          50.2       0.9X
+cast to date wholestage off                    459 /  462         21.8          45.9       1.0X
+cast to date wholestage on                     493 /  500         20.3          49.3       0.9X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 last_day:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-last_day wholestage off                        693 /  701         14.4          69.3       1.0X
-last_day wholestage on                         697 /  706         14.3          69.7       1.0X
+last_day wholestage off                        680 /  686         14.7          68.0       1.0X
+last_day wholestage on                         671 /  681         14.9          67.1       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 next_day:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-next_day wholestage off                        553 /  570         18.1          55.3       1.0X
-next_day wholestage on                         595 /  597         16.8          59.5       0.9X
+next_day wholestage off                        532 /  533         18.8          53.2       1.0X
+next_day wholestage on                         576 /  580         17.4          57.6       0.9X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_add:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_add wholestage off                        459 /  463         21.8          45.9       1.0X
-date_add wholestage on                         473 /  477         21.1          47.3       1.0X
+date_add wholestage off                        445 /  446         22.5          44.5       1.0X
+date_add wholestage on                         455 /  457         22.0          45.5       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_sub:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_sub wholestage off                        473 /  475         21.1          47.3       1.0X
-date_sub wholestage on                         479 /  497         20.9          47.9       1.0X
+date_sub wholestage off                        454 /  457         22.0          45.4       1.0X
+date_sub wholestage on                         455 /  458         22.0          45.5       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 add_months:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-add_months wholestage off                      898 /  908         11.1          89.8       1.0X
-add_months wholestage on                       898 /  906         11.1          89.8       1.0X
+add_months wholestage off                      898 /  900         11.1          89.8       1.0X
+add_months wholestage on                       894 /  909         11.2          89.4       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 format date:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-format date wholestage off                    7181 / 7228          1.4         718.1       1.0X
-format date wholestage on                     7303 / 7356          1.4         730.3       1.0X
+format date wholestage off                    7180 / 7181          1.4         718.0       1.0X
+format date wholestage on                     7051 / 7194          1.4         705.1       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 from_unixtime:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                  7472 / 7476          1.3         747.2       1.0X
-from_unixtime wholestage on                   7453 / 7460          1.3         745.3       1.0X
+from_unixtime wholestage off                  7136 / 7163          1.4         713.6       1.0X
+from_unixtime wholestage on                   7144 / 7174          1.4         714.4       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 from_utc_timestamp:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off              894 /  895         11.2          89.4       1.0X
-from_utc_timestamp wholestage on               881 /  883         11.4          88.1       1.0X
+from_utc_timestamp wholestage off              880 /  888         11.4          88.0       1.0X
+from_utc_timestamp wholestage on               841 /  854         11.9          84.1       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 to_utc_timestamp:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                909 /  910         11.0          90.9       1.0X
-to_utc_timestamp wholestage on                 903 /  906         11.1          90.3       1.0X
+to_utc_timestamp wholestage off                879 /  884         11.4          87.9       1.0X
+to_utc_timestamp wholestage on                 862 /  876         11.6          86.2       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 cast interval:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-cast interval wholestage off                   245 /  256         40.7          24.5       1.0X
-cast interval wholestage on                    227 /  230         44.0          22.7       1.1X
+cast interval wholestage off                   242 /  250         41.3          24.2       1.0X
+cast interval wholestage on                    221 /  223         45.3          22.1       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 datediff:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-datediff wholestage off                        735 /  738         13.6          73.5       1.0X
-datediff wholestage on                         687 /  689         14.6          68.7       1.1X
+datediff wholestage off                        697 /  698         14.3          69.7       1.0X
+datediff wholestage on                         680 /  683         14.7          68.0       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 months_between:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-months_between wholestage off                 1653 / 1654          6.1         165.3       1.0X
-months_between wholestage on                  1630 / 1636          6.1         163.0       1.0X
+months_between wholestage off                 1675 / 1677          6.0         167.5       1.0X
+months_between wholestage on                  1636 / 1649          6.1         163.6       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 window:                                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-window wholestage off                         1636 / 1661          0.6        1635.9       1.0X
-window wholestage on                        19997 / 20240          0.1       19997.4       0.1X
+window wholestage off                         1600 / 1627          0.6        1599.7       1.0X
+window wholestage on                        19480 / 19530          0.1       19479.6       0.1X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc YEAR:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                 889 /  892         11.2          88.9       1.0X
-date_trunc YEAR wholestage on                  831 /  837         12.0          83.1       1.1X
+date_trunc YEAR wholestage off                 863 /  864         11.6          86.3       1.0X
+date_trunc YEAR wholestage on                  812 /  814         12.3          81.2       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc YYYY:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                 910 /  917         11.0          91.0       1.0X
-date_trunc YYYY wholestage on                  834 /  849         12.0          83.4       1.1X
+date_trunc YYYY wholestage off                 865 /  926         11.6          86.5       1.0X
+date_trunc YYYY wholestage on                  811 /  820         12.3          81.1       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc YY:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                   897 /  901         11.2          89.7       1.0X
-date_trunc YY wholestage on                    836 /  841         12.0          83.6       1.1X
+date_trunc YY wholestage off                   863 /  867         11.6          86.3       1.0X
+date_trunc YY wholestage on                    810 /  822         12.3          81.0       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc MON:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                  942 /  944         10.6          94.2       1.0X
-date_trunc MON wholestage on                   881 /  887         11.3          88.1       1.1X
+date_trunc MON wholestage off                  917 /  921         10.9          91.7       1.0X
+date_trunc MON wholestage on                   857 /  860         11.7          85.7       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc MONTH:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                935 /  937         10.7          93.5       1.0X
-date_trunc MONTH wholestage on                 881 /  886         11.4          88.1       1.1X
+date_trunc MONTH wholestage off                919 /  919         10.9          91.9       1.0X
+date_trunc MONTH wholestage on                 862 /  863         11.6          86.2       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc MM:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                   934 /  935         10.7          93.4       1.0X
-date_trunc MM wholestage on                    878 /  880         11.4          87.8       1.1X
+date_trunc MM wholestage off                   923 /  924         10.8          92.3       1.0X
+date_trunc MM wholestage on                    855 /  859         11.7          85.5       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc DAY:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                  472 /  478         21.2          47.2       1.0X
-date_trunc DAY wholestage on                   415 /  418         24.1          41.5       1.1X
+date_trunc DAY wholestage off                  444 /  444         22.5          44.4       1.0X
+date_trunc DAY wholestage on                   404 /  406         24.7          40.4       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc DD:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                   472 /  485         21.2          47.2       1.0X
-date_trunc DD wholestage on                    414 /  417         24.1          41.4       1.1X
+date_trunc DD wholestage off                   445 /  446         22.5          44.5       1.0X
+date_trunc DD wholestage on                    404 /  406         24.7          40.4       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc HOUR:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                 451 /  462         22.2          45.1       1.0X
-date_trunc HOUR wholestage on                  422 /  429         23.7          42.2       1.1X
+date_trunc HOUR wholestage off                 462 /  464         21.6          46.2       1.0X
+date_trunc HOUR wholestage on                  416 /  422         24.1          41.6       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc MINUTE:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off               291 /  291         34.4          29.1       1.0X
-date_trunc MINUTE wholestage on                268 /  272         37.3          26.8       1.1X
+date_trunc MINUTE wholestage off               294 /  294         34.0          29.4       1.0X
+date_trunc MINUTE wholestage on                258 /  266         38.8          25.8       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc SECOND:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off               290 /  290         34.5          29.0       1.0X
-date_trunc SECOND wholestage on                266 /  270         37.6          26.6       1.1X
+date_trunc SECOND wholestage off               292 /  295         34.2          29.2       1.0X
+date_trunc SECOND wholestage on                271 /  276         36.9          27.1       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 date_trunc WEEK:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                 794 /  797         12.6          79.4       1.0X
-date_trunc WEEK wholestage on                  754 /  761         13.3          75.4       1.1X
+date_trunc WEEK wholestage off                 739 /  740         13.5          73.9       1.0X
+date_trunc WEEK wholestage on                  712 /  715         14.0          71.2       1.0X
 
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
@@ -331,86 +331,86 @@ date_trunc QUARTER:                      Best/Avg Time(ms)    Rate(M/s)   Per Ro
 date_trunc QUARTER wholestage off             1465 / 1467          6.8         146.5       1.0X
 date_trunc QUARTER wholestage on              1419 / 1423          7.0         141.9       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 trunc year:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-trunc year wholestage off                      233 /  233         43.0          23.3       1.0X
-trunc year wholestage on                       213 /  216         46.9          21.3       1.1X
+trunc year wholestage off                      222 /  222         45.0          22.2       1.0X
+trunc year wholestage on                       207 /  214         48.3          20.7       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 trunc yyyy:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                      232 /  233         43.1          23.2       1.0X
-trunc yyyy wholestage on                       214 /  223         46.7          21.4       1.1X
+trunc yyyy wholestage off                      221 /  225         45.2          22.1       1.0X
+trunc yyyy wholestage on                       208 /  212         48.0          20.8       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 trunc yy:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-trunc yy wholestage off                        228 /  228         43.9          22.8       1.0X
-trunc yy wholestage on                         215 /  219         46.6          21.5       1.1X
+trunc yy wholestage off                        221 /  222         45.3          22.1       1.0X
+trunc yy wholestage on                         208 /  210         48.0          20.8       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 trunc mon:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-trunc mon wholestage off                       232 /  232         43.0          23.2       1.0X
-trunc mon wholestage on                        216 /  217         46.4          21.6       1.1X
+trunc mon wholestage off                       231 /  239         43.3          23.1       1.0X
+trunc mon wholestage on                        208 /  214         48.0          20.8       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 trunc month:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-trunc month wholestage off                     233 /  233         43.0          23.3       1.0X
-trunc month wholestage on                      215 /  217         46.4          21.5       1.1X
+trunc month wholestage off                     222 /  222         45.1          22.2       1.0X
+trunc month wholestage on                      208 /  224         48.1          20.8       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 trunc mm:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-trunc mm wholestage off                        226 /  233         44.2          22.6       1.0X
-trunc mm wholestage on                         214 /  220         46.7          21.4       1.1X
+trunc mm wholestage off                        222 /  226         45.1          22.2       1.0X
+trunc mm wholestage on                         208 /  216         48.0          20.8       1.1X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 to timestamp str:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                166 /  170          6.0         165.8       1.0X
-to timestamp str wholestage on                 165 /  167          6.1         164.9       1.0X
+to timestamp str wholestage off                165 /  166          6.1         164.7       1.0X
+to timestamp str wholestage on                 160 /  163          6.2         160.5       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 to_timestamp:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                   1439 / 1445          0.7        1439.4       1.0X
-to_timestamp wholestage on                    1366 / 1368          0.7        1365.7       1.1X
+to_timestamp wholestage off                   1316 / 1320          0.8        1315.7       1.0X
+to_timestamp wholestage on                    1288 / 1294          0.8        1287.5       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 to_unix_timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off              1386 / 1389          0.7        1385.6       1.0X
-to_unix_timestamp wholestage on               1388 / 1392          0.7        1387.8       1.0X
+to_unix_timestamp wholestage off              1295 / 1297          0.8        1295.1       1.0X
+to_unix_timestamp wholestage on               1409 / 1414          0.7        1409.2       0.9X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 to date str:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-to date str wholestage off                     163 /  166          6.1         163.3       1.0X
-to date str wholestage on                      160 /  163          6.2         160.3       1.0X
+to date str wholestage off                     155 /  157          6.4         155.4       1.0X
+to date str wholestage on                      154 /  156          6.5         154.3       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-ea-b03 on Mac OS X 10.14.2
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 to_date:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------
-to_date wholestage off                        1474 / 1474          0.7        1473.9       1.0X
-to_date wholestage on                         1460 / 1465          0.7        1459.6       1.0X
+to_date wholestage off                        1477 / 1479          0.7        1477.3       1.0X
+to_date wholestage on                         1468 / 1473          0.7        1468.2       1.0X
 
 
From ce3a157f00e052ca4dc0c6d4ef282cb21588837e Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Sat, 23 Feb 2019 10:52:53 -0600
Subject: [PATCH 0514/1072] [SPARK-26939][CORE][DOC] Fix some outdated comments
 about task schedulers

## What changes were proposed in this pull request?
This PR aims to fix some outdated comments about task schedulers.

1. Change "ClusterScheduler" to "YarnScheduler" in comments of `YarnClusterScheduler`

According to [SPARK-1140 Remove references to ClusterScheduler](https://issues.apache.org/jira/browse/SPARK-1140), ClusterScheduler is not used anymore.

I also searched "ClusterScheduler" within the whole project, no other occurrences are found in comments or test cases. Note classes like `YarnClusterSchedulerBackend` or `MesosClusterScheduler` are not relevant.

2. Update comments about `statusUpdate` from `TaskSetManager`
`statusUpdate` has been moved to `TaskSchedulerImpl`. StatusUpdate event handling is delegated to `handleSuccessfulTask`/`handleFailedTask`.

## How was this patch tested?
N/A. Fix comments only.

Closes #23844 from seancxmao/taskscheduler-comments.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../main/scala/org/apache/spark/scheduler/TaskSetManager.scala | 3 ++-
 .../apache/spark/scheduler/cluster/YarnClusterScheduler.scala  | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index b7bf06974fd5..453939aaf190 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -38,7 +38,8 @@ import org.apache.spark.util.collection.MedianHeap
  * each task, retries tasks if they fail (up to a limited number of times), and
  * handles locality-aware scheduling for this TaskSet via delay scheduling. The main interfaces
  * to it are resourceOffer, which asks the TaskSet whether it wants to run a task on one node,
- * and statusUpdate, which tells it that one of its tasks changed state (e.g. finished).
+ * and handleSuccessfulTask/handleFailedTask, which tells it that one of its tasks changed state
+ *  (e.g. finished/failed).
  *
  * THREADING: This class is designed to only be called from code with a lock on the
  * TaskScheduler (e.g. its event handlers). It should not be called from other threads.
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index 96c9151fc351..1f622a02a62a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -21,7 +21,7 @@ import org.apache.spark._
 import org.apache.spark.deploy.yarn.ApplicationMaster
 
 /**
- * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of
+ * This is a simple extension to YarnScheduler - to ensure that appropriate initialization of
  * ApplicationMaster, etc is done
  */
 private[spark] class YarnClusterScheduler(sc: SparkContext) extends YarnScheduler(sc) {

From ab4e83aca7cecc6cce0f0c51f3674481b381ba34 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 23 Feb 2019 11:01:47 -0600
Subject: [PATCH 0515/1072] [SPARK-26963][MLLIB] SizeEstimator can't make some
 JDK fields accessible in Java 9+

## What changes were proposed in this pull request?

Don't use inaccessible fields in SizeEstimator, which comes up in Java 9+

## How was this patch tested?

Manually ran tests with Java 11; it causes these tests that failed before to pass.
This ought to pass on Java 8 as there's effectively no change for Java 8.

Closes #23866 from srowen/SPARK-26963.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/util/SizeEstimator.scala    | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index e12b6b71578c..4837b01d817a 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -334,9 +334,21 @@ object SizeEstimator extends Logging {
         if (fieldClass.isPrimitive) {
           sizeCount(primitiveSize(fieldClass)) += 1
         } else {
-          field.setAccessible(true) // Enable future get()'s on this field
+          // Note: in Java 9+ this would be better with trySetAccessible and canAccess
+          try {
+            field.setAccessible(true) // Enable future get()'s on this field
+            pointerFields = field :: pointerFields
+          } catch {
+            // If the field isn't accessible, we can still record the pointer size
+            // but can't know more about the field, so ignore it
+            case _: SecurityException =>
+              // do nothing
+            // Java 9+ can throw InaccessibleObjectException but the class is Java 9+-only
+            case re: RuntimeException
+                if re.getClass.getSimpleName == "InaccessibleObjectException" =>
+              // do nothing
+          }
           sizeCount(pointerSize) += 1
-          pointerFields = field :: pointerFields
         }
       }
     }

From a07b07fd85345a195623af205e346b494ae92fe2 Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Sat, 23 Feb 2019 11:03:05 -0600
Subject: [PATCH 0516/1072] [MINOR][DOCS] Remove references to Shark

## What changes were proposed in this pull request?
This PR aims to remove references to "Shark", which is a precursor to Spark SQL. I searched the whole project for the text "Shark" (ignore case) and just found a single match. Note that occurrences like nickname or test data are irrelevant.

## How was this patch tested?
N/A. Change comments only.

Closes #23876 from seancxmao/remove-Shark.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkEnv.scala | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 93d5cd79335e..799e7e45df57 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -51,9 +51,6 @@ import org.apache.spark.util.{RpcUtils, Utils}
  * including the serializer, RpcEnv, block manager, map output tracker, etc. Currently
  * Spark code finds the SparkEnv through a global variable, so all the threads can access the same
  * SparkEnv. It can be accessed by SparkEnv.get (e.g. after creating a SparkContext).
- *
- * NOTE: This is not intended for external use. This is exposed for Shark and may be made private
- *       in a future release.
  */
 @DeveloperApi
 class SparkEnv (

From 75c48ac36d3c75f405b7ad8fd989ff4ee00eccbb Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sat, 23 Feb 2019 11:35:11 -0600
Subject: [PATCH 0517/1072] [SPARK-26908][SQL] Fix DateTimeUtils.toMillis and
 millisToDays

## What changes were proposed in this pull request?

The `DateTimeUtils.toMillis` can produce inaccurate result for some negative values (timestamps before epoch). The error can be around 1ms. In the PR, I propose to use `Math.floorDiv` in casting microseconds to milliseconds, and milliseconds to days since epoch.

## How was this patch tested?

Added new test to `DateTimeUtilsSuite`, and tested by `CastSuite` as well.

Closes #23815 from MaxGekk/micros-to-millis.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/sql/catalyst/expressions/Cast.scala   |  6 ++++--
 .../apache/spark/sql/catalyst/util/DateTimeUtils.scala | 10 +++++-----
 .../spark/sql/catalyst/util/DateTimeUtilsSuite.scala   |  6 ++++++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index a6926d8996bb..b20249fcb4e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -396,7 +396,9 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // converting seconds to us
   private[this] def longToTimestamp(t: Long): Long = t * 1000000L
   // converting us to seconds
-  private[this] def timestampToLong(ts: Long): Long = math.floor(ts.toDouble / 1000000L).toLong
+  private[this] def timestampToLong(ts: Long): Long = {
+    Math.floorDiv(ts, DateTimeUtils.MICROS_PER_SECOND)
+  }
   // converting us to seconds in double
   private[this] def timestampToDouble(ts: Long): Double = {
     ts / 1000000.0
@@ -1072,7 +1074,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   }
   private[this] def longToTimeStampCode(l: ExprValue): Block = code"$l * 1000000L"
   private[this] def timestampToIntegerCode(ts: ExprValue): Block =
-    code"java.lang.Math.floor((double) $ts / 1000000L)"
+    code"java.lang.Math.floorDiv($ts, 1000000L)"
   private[this] def timestampToDoubleCode(ts: ExprValue): Block =
     code"$ts / 1000000.0"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 5a432bac48da..d714d29a310b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -77,10 +77,10 @@ object DateTimeUtils {
   }
 
   def millisToDays(millisUtc: Long, timeZone: TimeZone): SQLDate = {
-    // SPARK-6785: use Math.floor so negative number of days (dates before 1970)
+    // SPARK-6785: use Math.floorDiv so negative number of days (dates before 1970)
     // will correctly work as input for function toJavaDate(Int)
     val millisLocal = millisUtc + timeZone.getOffset(millisUtc)
-    Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
+    Math.floorDiv(millisLocal, MILLIS_PER_DAY).toInt
   }
 
   // reverse of millisToDays
@@ -179,14 +179,14 @@ object DateTimeUtils {
     // When the timestamp is negative i.e before 1970, we need to adjust the millseconds portion.
     // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision.
     // In millis precision the above needs to be represented as (-157700927877).
-    Math.floor(us.toDouble / MILLIS_PER_SECOND).toLong
+    Math.floorDiv(us, MICROS_PER_MILLIS)
   }
 
   /*
-   * Converts millseconds since epoch to SQLTimestamp.
+   * Converts milliseconds since epoch to SQLTimestamp.
    */
   def fromMillis(millis: Long): SQLTimestamp = {
-    millis * 1000L
+    millis * MICROS_PER_MILLIS
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index b71790e2562b..e270b9160e8a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -515,6 +515,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     val input = TimeUnit.MICROSECONDS.toMillis(date(2015, 12, 31, 16, tz = TimeZonePST))
     assert(millisToDays(input, TimeZonePST) === 16800)
     assert(millisToDays(input, TimeZoneGMT) === 16801)
+    assert(millisToDays(-1 * MILLIS_PER_DAY + 1, TimeZoneGMT) == -1)
 
     var expected = TimeUnit.MICROSECONDS.toMillis(date(2015, 12, 31, tz = TimeZonePST))
     assert(daysToMillis(16800, TimeZonePST) === expected)
@@ -541,4 +542,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       }
     }
   }
+
+  test("toMillis") {
+    assert(DateTimeUtils.toMillis(-9223372036844776001L) === -9223372036844777L)
+    assert(DateTimeUtils.toMillis(-157700927876544L) === -157700927877L)
+  }
 }

From faa61980c475f48b83694501d3c86e1709a595da Mon Sep 17 00:00:00 2001
From: Douglas R Colkitt <douglas.colkitt@gmail.com>
Date: Sat, 23 Feb 2019 14:00:57 -0600
Subject: [PATCH 0518/1072] [SPARK-26935][SQL] Skip DataFrameReader's CSV first
 line scan when not used

Prior to this patch, all DataFrameReader.csv() calls would collect the first
line from the CSV input iterator. This is done to allow schema inference from the
header row.

However when schema is already specified this is a wasteful operation. It results
in an unncessary compute step on the first partition. This can be expensive if
the CSV itself is expensive to generate (e.g. it's the product of a long-running
external pipe()).

This patch short-circuits the first-line collection in DataFrameReader.csv() when
schema is specified. Thereby improving CSV read performance in certain cases.

## What changes were proposed in this pull request?

Short-circuiting DataFrameReader.csv() first-line read when schema is user-specified.

## How was this patch tested?

Compiled and tested against several CSV datasets.

Closes #23830 from Mister-Meeseeks/master.

Authored-by: Douglas R Colkitt <douglas.colkitt@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/sql/DataFrameReader.scala     | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index e757785e5664..ff295b85a9ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -508,7 +508,19 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
       sparkSession.sessionState.conf.sessionLocalTimeZone)
     val filteredLines: Dataset[String] =
       CSVUtils.filterCommentAndEmpty(csvDataset, parsedOptions)
-    val maybeFirstLine: Option[String] = filteredLines.take(1).headOption
+
+    // For performance, short-circuit the collection of the first line when it won't be used:
+    //   - TextInputCSVDataSource - Only uses firstLine to infer an unspecified schema
+    //   - CSVHeaderChecker       - Only uses firstLine to check header, when headerFlag is true
+    //   - CSVUtils               - Only uses firstLine to filter headers, when headerFlag is true
+    // (If the downstream logic grows more complicated, consider refactoring to an approach that
+    //  delegates this decision to the constituent consumers themselves.)
+    val maybeFirstLine: Option[String] =
+      if (userSpecifiedSchema.isEmpty || parsedOptions.headerFlag) {
+        filteredLines.take(1).headOption
+      } else {
+        None
+      }
 
     val schema = userSpecifiedSchema.getOrElse {
       TextInputCSVDataSource.inferFromDataset(

From 2d2fb34b93a55af7e45191c8974bb55a0ed76e87 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Sun, 24 Feb 2019 17:37:32 -0600
Subject: [PATCH 0519/1072] [SPARK-26953][CORE][TEST] Test TimSort for
 ArrayIndexOutOfBoundsException

## What changes were proposed in this pull request?

In the PR, I propose to test the input showed at the end of the article: https://arxiv.org/pdf/1805.08612.pdf . The difference of the test and paper's test is type of array. This test allocates arrays of bytes instead of array of ints.

## How was this patch tested?

New test is added to `SorterSuite`.

Closes #23856 from MaxGekk/timsort-bug-fix.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/util/collection/SorterSuite.scala   | 129 ++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index a14fb76203ae..35a369e7b4a3 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -74,6 +74,103 @@ class SorterSuite extends SparkFunSuite with Logging {
     (0 to data.length - 2).foreach(i => assert(data(i) <= data(i + 1)))
   }
 
+  test("java.lang.ArrayIndexOutOfBoundsException in TimSort") {
+    // scalastyle:off
+    val runLengths = Array(76405736, 74830360, 1181532, 787688, 1575376, 2363064, 3938440, 6301504,
+      1181532, 393844, 15753760, 1575376, 787688, 393844, 1969220, 3150752, 1181532,787688, 5513816, 3938440,
+      1181532, 787688, 1575376, 18116824, 1181532, 787688, 1575376, 2363064, 3938440,787688, 26781392, 1181532,
+      787688, 1575376, 2363064, 393844, 4332284, 1181532, 787688, 1575376, 12209164,1181532, 787688, 1575376,
+      2363064, 787688, 393844, 4726128, 1575376, 787688, 1969220, 76405758, 53168940,1181532, 787688, 1575376,
+      2363064, 3938440, 1575376, 787688, 393844, 10633788, 1181532, 787688, 1575376,2363064, 4332284, 1181532,
+      787688, 1575376, 12996852, 1181532, 787688, 1575376, 2363064, 393844, 17329136,1575376, 787688, 393844,
+      1969220, 3150752, 1181532, 393844, 7483036, 1575376, 787688, 1969220, 2756908,1181532, 787600, 76405780,
+      38202802, 114608494, 66, 44, 88, 176, 352, 704, 1408, 2816, 5632, 11264, 22528,45056, 90112, 180224,
+      360448, 720896, 1441792, 2883584, 5767168, 11387222, 22495132, 319836, 213224,426448, 639672, 1066120,
+      1705792, 426448, 213224, 106612, 4584316, 426448, 213224, 106612, 533060,106612, 852896, 426448, 213224,
+      1599180, 1172732, 319836, 213224, 426448, 5223988, 319836, 213224, 426448,639672, 1066120, 319836, 213224,
+      7782676, 426448, 213224, 533060, 746284, 213224, 1705792, 319836, 213224,426448, 639672, 2238852, 426448,
+      213224, 106612, 2345464, 426448, 213224, 106612, 533060, 106612, 852896,426448, 213224, 106612, 22921602,
+      15245516, 319836, 213224, 426448, 639672, 1172732, 319836, 213224, 426448,3304972, 319836, 213224, 426448,
+      639672, 213224, 1279344, 426448, 213224, 533060, 3838032, 319836, 213224,426448, 639672, 213224, 106612,
+      5330600, 319836, 213224, 426448, 639672, 1066120, 213224, 2345464, 426448,213224, 106612, 533060, 106612,
+      852896, 426448, 213224, 106524, 22921624, 11460724, 34382260, 66, 44, 88, 176,352, 704, 1408, 2816, 5632,
+      11264, 22528, 45056, 90112, 180224, 360448, 720896, 1001792, 1783584, 2649020,6739370, 102630, 68420,
+      136840, 205260, 342100, 547360, 102630, 68420, 1436820, 102630, 68420, 136840,205260, 342100, 547360,
+      102630, 68420, 136840, 205260, 68420, 34210, 1607870, 102630, 68420, 136840,205260, 342100, 68420, 34210,
+      2428910, 102630, 68420, 136840, 205260, 34210, 410520, 102630, 68420, 136840,1094720, 102630, 68420,
+      136840, 205260, 68420, 34210, 444730, 136840, 68420, 34210, 171050, 34210,6876232, 4618350, 102630, 68420,
+      136840, 205260, 34210, 342100, 136840, 68420, 34210, 992090, 102630, 68420,136840, 205260, 68420, 342100,
+      205260, 102630, 68420, 1163140, 102630, 68420, 136840, 205260, 68420, 1607870,102630, 68420, 136840,
+      205260, 342100, 34210, 684200, 136840, 68420, 171050, 239470, 102630, 68332,6876254, 3438028, 10314194, 66,
+      44, 88, 176, 352, 704, 1408, 2816, 5632, 11264, 22528, 45056, 90112, 180224,360448, 500896, 840554,
+      2018720, 32736, 21824, 43648, 65472, 21824, 10912, 141856, 43648, 21824, 10912,54560, 10912, 425568, 43648,
+      21824, 54560, 76384, 21824, 10912, 185504, 32736, 21824, 43648, 65472, 21824,10912, 491040, 32736, 21824,
+      43648, 65472, 109120, 10912, 731104, 32736, 21824, 43648, 65472, 120032, 32736,21824, 43648, 327360, 32736,
+      21824, 43648, 65472, 21824, 130944, 43648, 21824, 54560, 2062390, 1396736,32736, 21824, 43648, 65472,
+      109120, 43648, 21824, 10912, 294624, 32736, 21824, 43648, 65472, 120032, 32736,21824, 43648, 360096, 32736,
+      21824, 43648, 65472, 10912, 480128, 32736, 21824, 43648, 65472, 109120, 196416,65472, 32736, 10912, 76384,
+      21824, 10824, 2062412, 1031118, 3093442, 66, 44, 88, 176, 352, 704, 1408, 2816,5632, 11264, 22528, 45056,
+      90112, 180224, 258170, 605616, 9768, 6512, 13024, 19536, 6512, 3256, 42328,13024, 6512, 3256, 16280, 3256,
+      126984, 13024, 6512, 16280, 22792, 6512, 3256, 55352, 9768, 6512, 13024, 19536,6512, 3256, 146520, 9768,
+      6512, 13024, 19536, 32560, 3256, 218152, 9768, 6512, 13024, 19536, 35816, 9768,6512, 13024, 100936, 9768,
+      6512, 13024, 19536, 6512, 3256, 39072, 13024, 6512, 16280, 618662, 416768,9768, 6512, 13024, 19536, 32560,
+      13024, 6512, 3256, 87912, 9768, 6512, 13024, 19536, 35816, 9768, 6512, 13024,107448, 9768, 6512, 13024,
+      19536, 3256, 143264, 13024, 6512, 3256, 16280, 26048, 9768, 3256, 61864, 13024,6512, 16280, 22792, 9768,
+      3168, 618684, 309254, 927850, 66, 44, 88, 176, 352, 704, 1408, 2816, 5632,11264, 22440, 23056, 45056,
+      72314, 181632, 2838, 1892, 3784, 5676, 9460, 15136, 2838, 946, 37840, 3784,1892, 946, 4730, 7568, 2838,
+      1892, 13244, 9460, 2838, 1892, 3784, 43516, 2838, 1892, 3784, 5676, 9460, 1892,65274, 2838, 1892, 3784,
+      5676, 946, 10406, 2838, 1892, 3784, 30272, 2838, 1892, 3784, 5676, 1892, 946,12298, 3784, 1892, 946, 4730,
+      185438, 127710, 2838, 1892, 3784, 5676, 9460, 3784, 1892, 946, 26488, 2838,1892, 3784, 5676, 946, 10406,
+      2838, 1892, 3784, 31218, 2838, 1892, 3784, 5676, 946, 42570, 2838, 1892, 3784,5676, 9460, 17974, 3784,
+      1892, 4730, 6622, 2838, 1804, 185460, 92642, 278014, 66, 44, 88, 176, 352, 704,1408, 2816, 5632, 9064,
+      11528, 23606, 54340, 858, 572, 1144, 1716, 2860, 4576, 858, 286, 11440, 1144,572, 286, 1430, 2288, 858,
+      572, 4004, 2860, 858, 572, 1144, 13156, 858, 572, 1144, 1716, 2860, 572, 19448,858, 572, 1144, 1716, 286,
+      3146, 858, 572, 1144, 8866, 858, 572, 1144, 1716, 572, 286, 3432, 1144, 572,1430, 55506, 38610, 858, 572,
+      1144, 1716, 2860, 1144, 572, 286, 7722, 858, 572, 1144, 1716, 3146, 858, 572,1144, 9438, 858, 572, 1144,
+      1716, 286, 12584, 1144, 572, 286, 1430, 2288, 858, 286, 5434, 1144, 572, 1430,2002, 858, 484, 55528, 27676,
+      83116, 66, 44, 88, 176, 352, 704, 1408, 1716, 3872, 8118, 16192, 264, 176, 352,528, 176, 88, 1144, 352,
+      176, 88, 440, 88, 3432, 352, 176, 440, 616, 176, 88, 1408, 264, 176, 352, 528,176, 88, 3960, 264, 176, 352,
+      528, 880, 88, 5808, 264, 176, 352, 528, 968, 264, 176, 352, 2640, 264, 176,352, 528, 176, 1056, 352, 176,
+      440, 16566, 11264, 264, 176, 352, 528, 880, 352, 176, 2376, 264, 176, 352, 528,968, 264, 176, 352, 2816,
+      264, 176, 352, 528, 88, 3872, 264, 176, 352, 528, 880, 1584, 528, 264, 88, 616,176, 16588, 8206, 24706, 66,
+      44, 88, 176, 352, 704, 1408, 2090, 4708, 66, 44, 88, 132, 220, 352, 66, 44, 88,990, 66, 44, 88, 132, 220,
+      418, 88, 44, 110, 154, 66, 44, 1122, 66, 44, 88, 132, 220, 88, 44, 22, 1716,88, 44, 110, 154, 44, 352, 66,
+      44, 88, 132, 44, 22, 462, 110, 44, 22, 528, 88, 44, 22, 110, 22, 176, 88, 44,22, 4950, 3256, 66, 44, 88,
+      132, 22, 242, 66, 44, 88, 704, 66, 44, 88, 132, 44, 22, 264, 88, 44, 110, 814,88, 44, 110, 154, 22, 1144,
+      66, 44, 88, 132, 220, 44, 22, 506, 88, 44, 22, 110, 22, 176, 88, 44, 22, 4972,2398, 7282, 66, 44, 88, 176,
+      242, 418, 660, 1496, 66, 44, 88, 132, 242, 66, 44, 88, 682, 66, 44, 88, 132,44, 22, 264, 88, 44, 110, 1716,
+      858, 88, 44, 110, 154, 44, 22, 352, 66, 44, 88, 132, 44, 22, 1738, 198, 1760,175, 156, 18, 17, 19, 36, 65,
+      21, 20, 22, 18, 452, 114, 95, 18, 17, 21, 36, 18, 17, 115, 76, 144, 44, 38, 61,20, 19, 21, 17)
+    // scalastyle:on
+    val arrayToSortSize = 1091482190
+    val arrayToSort = new Array[Byte](arrayToSortSize)
+    var sum: Int = -1
+    for (i <- runLengths) {
+      sum += i
+      arrayToSort(sum) = 1
+    }
+    val sorter = new Sorter(new ByteArraySortDataFormat())
+    sorter.sort(arrayToSort, 0, arrayToSort.length, Ordering.Byte)
+    // The sort must finish without ArrayIndexOutOfBoundsException
+    // The arrayToSort contains runLengths.length elements of 1, and others are 0.
+    // Those 1 must be placed at the end of arrayToSort after sorting.
+    var i: Int = 0
+    sum = 0
+    val amountOfZeros = arrayToSort.length - runLengths.length
+    do {
+      sum += arrayToSort(i)
+      i += 1
+    } while (i < amountOfZeros)
+    assert(sum === 0)
+
+    val sizeOfArrayToSort = arrayToSort.length
+    do {
+      sum += arrayToSort(i)
+      i += 1
+    } while (i < sizeOfArrayToSort)
+    assert(sum === runLengths.length)
+  }
+
   /** Runs an experiment several times. */
   def runExperiment(name: String, skip: Boolean = false)(f: => Unit, prepare: () => Unit): Unit = {
     if (skip) {
@@ -233,6 +330,31 @@ abstract class AbstractIntArraySortDataFormat[K] extends SortDataFormat[K, Array
   }
 }
 
+abstract class AbstractByteArraySortDataFormat[K] extends SortDataFormat[K, Array[Byte]] {
+
+  override def swap(data: Array[Byte], pos0: Int, pos1: Int): Unit = {
+    val tmp = data(pos0)
+    data(pos0) = data(pos1)
+    data(pos1) = tmp
+  }
+
+  override def copyElement(src: Array[Byte], srcPos: Int, dst: Array[Byte], dstPos: Int) {
+    dst(dstPos) = src(srcPos)
+  }
+
+  /** Copy a range of elements starting at src(srcPos) to dest, starting at destPos. */
+  override def copyRange(src: Array[Byte],
+                         srcPos: Int, dst: Array[Byte], dstPos: Int, length: Int) {
+    System.arraycopy(src, srcPos, dst, dstPos, length)
+  }
+
+  /** Allocates a new structure that can hold up to 'length' elements. */
+  override def allocate(length: Int): Array[Byte] = {
+    new Array[Byte](length)
+  }
+}
+
+
 /** Format to sort a simple Array[Int]. Could be easily generified and specialized. */
 class IntArraySortDataFormat extends AbstractIntArraySortDataFormat[Int] {
 
@@ -241,6 +363,13 @@ class IntArraySortDataFormat extends AbstractIntArraySortDataFormat[Int] {
   }
 }
 
+class ByteArraySortDataFormat extends AbstractByteArraySortDataFormat[Byte] {
+
+  override protected def getKey(data: Array[Byte], pos: Int): Byte = {
+    data(pos)
+  }
+}
+
 /** Wrapper of Int for key reuse. */
 class IntWrapper(var key: Int = 0) extends Ordered[IntWrapper] {
 

From d2529788ed07980ab2ef2472138cb16eb32a15e8 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 25 Feb 2019 04:37:45 -0600
Subject: [PATCH 0520/1072] [SPARK-26966][ML] Update to JPMML 1.4.8

## What changes were proposed in this pull request?

JPMML apparently only supports Java 9 in 1.4.2+. We are seeing text failures from JPMML relating to JAXB when running on Java 11. It's shaded and not a big change, so should be safe.

## How was this patch tested?

Existing tests.

Closes #23868 from srowen/SPARK-26966.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../BinaryClassificationPMMLModelExport.scala    | 14 ++++++++------
 .../GeneralizedLinearPMMLModelExport.scala       | 10 ++++++----
 .../pmml/export/KMeansPMMLModelExport.scala      | 10 ++++++----
 .../pmml/export/PMMLModelExportFactory.scala     |  6 +++---
 .../apache/spark/ml/clustering/KMeansSuite.scala |  3 ++-
 .../ml/regression/LinearRegressionSuite.scala    |  4 ++--
 .../org/apache/spark/ml/util/PMMLUtils.scala     | 16 ++++++++--------
 ...inaryClassificationPMMLModelExportSuite.scala |  9 +++++----
 .../GeneralizedLinearPMMLModelExportSuite.scala  |  2 +-
 .../pmml/export/KMeansPMMLModelExportSuite.scala |  2 +-
 pom.xml                                          |  2 +-
 11 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
index a8c32f72bfde..27935c6f5291 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
@@ -19,7 +19,9 @@ package org.apache.spark.mllib.pmml.export
 
 import scala.{Array => SArray}
 
-import org.dmg.pmml._
+import org.dmg.pmml.{DataDictionary, DataField, DataType, FieldName, MiningField,
+  MiningFunction, MiningSchema, OpType}
+import org.dmg.pmml.regression.{NumericPredictor, RegressionModel, RegressionTable}
 
 import org.apache.spark.mllib.regression.GeneralizedLinearModel
 
@@ -29,7 +31,7 @@ import org.apache.spark.mllib.regression.GeneralizedLinearModel
 private[mllib] class BinaryClassificationPMMLModelExport(
     model: GeneralizedLinearModel,
     description: String,
-    normalizationMethod: RegressionNormalizationMethodType,
+    normalizationMethod: RegressionModel.NormalizationMethod,
     threshold: Double)
   extends PMMLModelExport {
 
@@ -47,7 +49,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
        val miningSchema = new MiningSchema
        val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
        var interceptNO = threshold
-       if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
+       if (RegressionModel.NormalizationMethod.LOGIT == normalizationMethod) {
          if (threshold <= 0) {
            interceptNO = Double.MinValue
          } else if (threshold >= 1) {
@@ -58,7 +60,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
        }
        val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
        val regressionModel = new RegressionModel()
-         .setFunctionName(MiningFunctionType.CLASSIFICATION)
+         .setMiningFunction(MiningFunction.CLASSIFICATION)
          .setMiningSchema(miningSchema)
          .setModelName(description)
          .setNormalizationMethod(normalizationMethod)
@@ -69,7 +71,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
          dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
          miningSchema
            .addMiningFields(new MiningField(fields(i))
-           .setUsageType(FieldUsageType.ACTIVE))
+           .setUsageType(MiningField.UsageType.ACTIVE))
          regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
        }
 
@@ -79,7 +81,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
          .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
        miningSchema
          .addMiningFields(new MiningField(targetField)
-         .setUsageType(FieldUsageType.TARGET))
+         .setUsageType(MiningField.UsageType.TARGET))
 
        dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
index 4d951d2973a6..723224de168e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
@@ -19,7 +19,9 @@ package org.apache.spark.mllib.pmml.export
 
 import scala.{Array => SArray}
 
-import org.dmg.pmml._
+import org.dmg.pmml.{DataDictionary, DataField, DataType, FieldName, MiningField,
+  MiningFunction, MiningSchema, OpType}
+import org.dmg.pmml.regression.{NumericPredictor, RegressionModel, RegressionTable}
 
 import org.apache.spark.mllib.regression.GeneralizedLinearModel
 
@@ -45,7 +47,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
       val miningSchema = new MiningSchema
       val regressionTable = new RegressionTable(model.intercept)
       val regressionModel = new RegressionModel()
-        .setFunctionName(MiningFunctionType.REGRESSION)
+        .setMiningFunction(MiningFunction.REGRESSION)
         .setMiningSchema(miningSchema)
         .setModelName(description)
         .addRegressionTables(regressionTable)
@@ -55,7 +57,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
         dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .addMiningFields(new MiningField(fields(i))
-          .setUsageType(FieldUsageType.ACTIVE))
+          .setUsageType(MiningField.UsageType.ACTIVE))
         regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }
 
@@ -64,7 +66,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
       dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
       miningSchema
         .addMiningFields(new MiningField(targetField)
-        .setUsageType(FieldUsageType.TARGET))
+        .setUsageType(MiningField.UsageType.TARGET))
 
       dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
index 255c6140e541..d043c9e58eeb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
@@ -19,7 +19,9 @@ package org.apache.spark.mllib.pmml.export
 
 import scala.{Array => SArray}
 
-import org.dmg.pmml._
+import org.dmg.pmml.{Array, CompareFunction, ComparisonMeasure, DataDictionary, DataField, DataType,
+  FieldName, MiningField, MiningFunction, MiningSchema, OpType, SquaredEuclidean}
+import org.dmg.pmml.clustering.{Cluster, ClusteringField, ClusteringModel}
 
 import org.apache.spark.mllib.clustering.KMeansModel
 
@@ -48,7 +50,7 @@ private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModel
         .setModelName("k-means")
         .setMiningSchema(miningSchema)
         .setComparisonMeasure(comparisonMeasure)
-        .setFunctionName(MiningFunctionType.CLUSTERING)
+        .setMiningFunction(MiningFunction.CLUSTERING)
         .setModelClass(ClusteringModel.ModelClass.CENTER_BASED)
         .setNumberOfClusters(model.clusterCenters.length)
 
@@ -57,9 +59,9 @@ private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModel
         dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .addMiningFields(new MiningField(fields(i))
-          .setUsageType(FieldUsageType.ACTIVE))
+          .setUsageType(MiningField.UsageType.ACTIVE))
         clusteringModel.addClusteringFields(
-          new ClusteringField(fields(i)).setCompareFunction(CompareFunctionType.ABS_DIFF))
+          new ClusteringField(fields(i)).setCompareFunction(CompareFunction.ABS_DIFF))
       }
 
       dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala
index 29bd689e1185..84e63041dc2d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.pmml.export
 
-import org.dmg.pmml.RegressionNormalizationMethodType
+import org.dmg.pmml.regression.RegressionModel
 
 import org.apache.spark.mllib.classification.LogisticRegressionModel
 import org.apache.spark.mllib.classification.SVMModel
@@ -44,12 +44,12 @@ private[mllib] object PMMLModelExportFactory {
         new GeneralizedLinearPMMLModelExport(lasso, "lasso regression")
       case svm: SVMModel =>
         new BinaryClassificationPMMLModelExport(
-          svm, "linear SVM", RegressionNormalizationMethodType.NONE,
+          svm, "linear SVM", RegressionModel.NormalizationMethod.NONE,
           svm.getThreshold.getOrElse(0.0))
       case logistic: LogisticRegressionModel =>
         if (logistic.numClasses == 2) {
           new BinaryClassificationPMMLModelExport(
-            logistic, "logistic regression", RegressionNormalizationMethodType.LOGIT,
+            logistic, "logistic regression", RegressionModel.NormalizationMethod.LOGIT,
             logistic.getThreshold.getOrElse(0.5))
         } else {
           throw new IllegalArgumentException(
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index b377582a8812..a5159bcb0bbc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -20,7 +20,8 @@ package org.apache.spark.ml.clustering
 import scala.language.existentials
 import scala.util.Random
 
-import org.dmg.pmml.{ClusteringModel, PMML}
+import org.dmg.pmml.PMML
+import org.dmg.pmml.clustering.ClusteringModel
 
 import org.apache.spark.SparkException
 import org.apache.spark.ml.linalg.{Vector, Vectors}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index 76532897dff2..c4db3365a677 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.ml.regression
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable
 import scala.util.Random
 
-import org.dmg.pmml.{OpType, PMML, RegressionModel => PMMLRegressionModel}
+import org.dmg.pmml.{OpType, PMML}
+import org.dmg.pmml.regression.{RegressionModel => PMMLRegressionModel}
 
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.feature.LabeledPoint
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
index dbdc69f95d84..620c754a7ba0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
@@ -16,13 +16,12 @@
  */
 package org.apache.spark.ml.util
 
-import java.io.StringReader
-import javax.xml.bind.Unmarshaller
-import javax.xml.transform.Source
+import java.io.ByteArrayInputStream
+import java.nio.charset.StandardCharsets
 
-import org.dmg.pmml._
-import org.jpmml.model.{ImportFilter, JAXBUtil}
-import org.xml.sax.InputSource
+import org.dmg.pmml.PMML
+import org.jpmml.model.{JAXBUtil, SAXUtil}
+import org.jpmml.model.filters.ImportFilter
 
 /**
  * Testing utils for working with PMML.
@@ -36,8 +35,9 @@ private[spark] object PMMLUtils {
    * through external spark-packages.
    */
   def loadFromString(input: String): PMML = {
-    val is = new StringReader(input)
-    val transformed = ImportFilter.apply(new InputSource(is))
+    val transformed = SAXUtil.createFilteredSource(
+      new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)),
+      new ImportFilter())
     JAXBUtil.unmarshalPMML(transformed)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala
index 4c6e76e47419..08c581cd470d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.mllib.pmml.export
 
-import org.dmg.pmml.RegressionModel
-import org.dmg.pmml.RegressionNormalizationMethodType
+import org.dmg.pmml.regression.RegressionModel
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.classification.LogisticRegressionModel
@@ -51,7 +50,8 @@ class BinaryClassificationPMMLModelExportSuite extends SparkFunSuite {
     assert(pmmlRegressionModel.getRegressionTables.get(1).getTargetCategory === "0")
     assert(pmmlRegressionModel.getRegressionTables.get(1).getNumericPredictors.size === 0)
     // ensure logistic regression has normalization method set to LOGIT
-    assert(pmmlRegressionModel.getNormalizationMethod() == RegressionNormalizationMethodType.LOGIT)
+    assert(pmmlRegressionModel.getNormalizationMethod() ===
+      RegressionModel.NormalizationMethod.LOGIT)
   }
 
   test("linear SVM PMML export") {
@@ -78,7 +78,8 @@ class BinaryClassificationPMMLModelExportSuite extends SparkFunSuite {
     assert(pmmlRegressionModel.getRegressionTables.get(1).getTargetCategory === "0")
     assert(pmmlRegressionModel.getRegressionTables.get(1).getNumericPredictors.size === 0)
     // ensure linear SVM has normalization method set to NONE
-    assert(pmmlRegressionModel.getNormalizationMethod() == RegressionNormalizationMethodType.NONE)
+    assert(pmmlRegressionModel.getNormalizationMethod() ===
+      RegressionModel.NormalizationMethod.NONE)
   }
 
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala
index 1d3230948178..bf1a0fd8e607 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.pmml.export
 
-import org.dmg.pmml.RegressionModel
+import org.dmg.pmml.regression.RegressionModel
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.regression.{LassoModel, LinearRegressionModel, RidgeRegressionModel}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala
index b3f9750afa73..b61c6225e901 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.pmml.export
 
-import org.dmg.pmml.ClusteringModel
+import org.dmg.pmml.clustering.ClusteringModel
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.clustering.KMeansModel
diff --git a/pom.xml b/pom.xml
index 746541b7114c..f06afba965e1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -398,7 +398,7 @@
       <dependency>
         <groupId>org.jpmml</groupId>
         <artifactId>pmml-model</artifactId>
-        <version>1.2.15</version>
+        <version>1.4.8</version>
         <scope>provided</scope>
         <exclusions>
           <exclusion>

From a3192d966a7a66ee8bb4977d4406ec739cc0ceaa Mon Sep 17 00:00:00 2001
From: Jiaxin Shan <seedjeffwan@gmail.com>
Date: Mon, 25 Feb 2019 04:56:04 -0600
Subject: [PATCH 0521/1072] [SPARK-26742][K8S] Update Kubernetes-Client version
 to 4.1.2

## What changes were proposed in this pull request?
Changed the `kubernetes-client` version to 4.1.2.  Latest version fix error with exec credentials (used by aws eks) and this will be used to talk with kubernetes API server. Users can submit spark job to EKS api endpoint now with this patch.

## How was this patch tested?
unit tests and manual tests.

Closes #23814 from Jeffwan/update_k8s_sdk.

Authored-by: Jiaxin Shan <seedjeffwan@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/deps/spark-deps-hadoop-2.7                         | 7 ++++---
 dev/deps/spark-deps-hadoop-3.1                         | 7 ++++---
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index d41be281a192..fb6792a5c9b7 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -133,13 +133,14 @@ jta-1.1.jar
 jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-4.0.2.jar
-kubernetes-client-4.1.0.jar
-kubernetes-model-4.1.0.jar
+kubernetes-client-4.1.2.jar
+kubernetes-model-4.1.2.jar
+kubernetes-model-common-4.1.2.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
-logging-interceptor-3.9.1.jar
+logging-interceptor-3.12.0.jar
 lz4-java-1.5.0.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index a6069c5f8ae8..da55d4cef78e 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -147,13 +147,14 @@ kerby-pkix-1.0.1.jar
 kerby-util-1.0.1.jar
 kerby-xdr-1.0.1.jar
 kryo-shaded-4.0.2.jar
-kubernetes-client-4.1.0.jar
-kubernetes-model-4.1.0.jar
+kubernetes-client-4.1.2.jar
+kubernetes-model-4.1.2.jar
+kubernetes-model-common-4.1.2.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
-logging-interceptor-3.9.1.jar
+logging-interceptor-3.12.0.jar
 lz4-java-1.5.0.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 8d594ee8f147..23106cb7ec68 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -29,7 +29,7 @@
   <name>Spark Project Kubernetes</name>
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
-    <kubernetes.client.version>4.1.0</kubernetes.client.version>
+    <kubernetes.client.version>4.1.2</kubernetes.client.version>
   </properties>
 
   <dependencies>
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index f16b536de514..91ef0e23bf25 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -29,7 +29,7 @@
     <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
     <exec-maven-plugin.version>1.4.0</exec-maven-plugin.version>
     <extraScalaTestArgs></extraScalaTestArgs>
-    <kubernetes-client.version>4.1.0</kubernetes-client.version>
+    <kubernetes-client.version>4.1.2</kubernetes-client.version>
     <scala-maven-plugin.version>3.2.2</scala-maven-plugin.version>
     <scalatest-maven-plugin.version>1.0</scalatest-maven-plugin.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>

From c5de804093540509929f6de211dbbe644b33e6db Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Mon, 25 Feb 2019 11:25:53 -0800
Subject: [PATCH 0522/1072] [MINOR][BUILD] Update all checkstyle dtd to use
 "https://checkstyle.org"

## What changes were proposed in this pull request?

Below build failed with Java checkstyle test, but instead of violation it shows FileNotFound on dtd file.
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/102751/

Looks like the link of dtd file is dead `http://www.puppycrawl.com/dtds/configuration_1_3.dtd`.

This patch updates the dtd link to "https://checkstyle.org/dtds/" given checkstyle repository also updated the URL path.
https://github.com/checkstyle/checkstyle/issues/5601

## How was this patch tested?

Checked the new links.

Closes #23887 from HeartSaVioR/java-checkstyle-dtd-change-url.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 dev/checkstyle-suppressions.xml | 2 +-
 dev/checkstyle.xml              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
index bbda824dd13b..945686de4996 100644
--- a/dev/checkstyle-suppressions.xml
+++ b/dev/checkstyle-suppressions.xml
@@ -17,7 +17,7 @@
 
 <!DOCTYPE suppressions PUBLIC
 "-//Puppy Crawl//DTD Suppressions 1.1//EN"
-"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
+"https://checkstyle.org/dtds/suppressions_1_1.dtd">
 
 <!--
 
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index e8859c01f2bd..00700a7791b3 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -17,7 +17,7 @@
 
 <!DOCTYPE module PUBLIC
           "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
-          "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
+          "https://checkstyle.org/dtds/configuration_1_3.dtd">
 
 <!--
 

From 0ac516bebd4cf647fe073af45298186103134f33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Mon, 25 Feb 2019 11:42:55 -0800
Subject: [PATCH 0523/1072] [SPARK-25035][CORE] Avoiding memory mapping at
 disk-stored blocks replication
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before this PR the method `BlockManager#putBlockDataAsStream()` (which is used during block replication where the block data is received as a stream) was reading the whole block content into the memory even at DISK_ONLY storage level.

With this change the received block data (which was temporary stored in a file) is just simply moved into the right location backing the target block. This way a possible OOM error is avoided.

In this implementation to save code duplications the method `doPutBytes` is refactored into a template method called `BlockStoreUpdater` which has a separate implementation to handle byte buffer based and temporary file based block store updates.

With existing unit tests of `DistributedSuite` (the ones dealing with replications):
- caching on disk, replicated (encryption = off) (with replication as stream)
- caching on disk, replicated (encryption = on) (with replication as stream)
- caching in memory, serialized, replicated (encryption = on) (with replication as stream)
- caching in memory, serialized, replicated (encryption = off) (with replication as stream)
- etc.

And with new unit tests testing `putBlockDataAsStream` method directly:
- test putBlockDataAsStream with caching (encryption = off)
- test putBlockDataAsStream with caching (encryption = on)
- test putBlockDataAsStream with caching on disk (encryption = off)
- test putBlockDataAsStream with caching on disk (encryption = on)

Closes #23688 from attilapiros/SPARK-25035.

Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/storage/BlockManager.scala   | 315 +++++++++++-------
 .../org/apache/spark/storage/DiskStore.scala  |  30 +-
 .../spark/storage/BlockManagerSuite.scala     |  60 +++-
 3 files changed, 263 insertions(+), 142 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 79e9ee7977de..09928e47634c 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -33,6 +33,7 @@ import scala.util.Random
 import scala.util.control.NonFatal
 
 import com.codahale.metrics.{MetricRegistry, MetricSet}
+import org.apache.commons.io.IOUtils
 
 import org.apache.spark._
 import org.apache.spark.executor.DataReadMethod
@@ -221,6 +222,187 @@ private[spark] class BlockManager(
     new BlockManager.RemoteBlockDownloadFileManager(this)
   private val maxRemoteBlockToMem = conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM)
 
+  /**
+   * Abstraction for storing blocks from bytes, whether they start in memory or on disk.
+   *
+   * @param blockSize the decrypted size of the block
+   */
+  private abstract class BlockStoreUpdater[T](
+      blockSize: Long,
+      blockId: BlockId,
+      level: StorageLevel,
+      classTag: ClassTag[T],
+      tellMaster: Boolean,
+      keepReadLock: Boolean) {
+
+    /**
+     *  Reads the block content into the memory. If the update of the block store is based on a
+     *  temporary file this could lead to loading the whole file into a ChunkedByteBuffer.
+     */
+    protected def readToByteBuffer(): ChunkedByteBuffer
+
+    protected def blockData(): BlockData
+
+    protected def saveToDiskStore(): Unit
+
+    private def saveDeserializedValuesToMemoryStore(inputStream: InputStream): Boolean = {
+      try {
+        val values = serializerManager.dataDeserializeStream(blockId, inputStream)(classTag)
+        memoryStore.putIteratorAsValues(blockId, values, classTag) match {
+          case Right(_) => true
+          case Left(iter) =>
+            // If putting deserialized values in memory failed, we will put the bytes directly
+            // to disk, so we don't need this iterator and can close it to free resources
+            // earlier.
+            iter.close()
+            false
+        }
+      } finally {
+        IOUtils.closeQuietly(inputStream)
+      }
+    }
+
+    private def saveSerializedValuesToMemoryStore(bytes: ChunkedByteBuffer): Boolean = {
+      val memoryMode = level.memoryMode
+      memoryStore.putBytes(blockId, blockSize, memoryMode, () => {
+        if (memoryMode == MemoryMode.OFF_HEAP && bytes.chunks.exists(!_.isDirect)) {
+          bytes.copy(Platform.allocateDirectBuffer)
+        } else {
+          bytes
+        }
+      })
+    }
+
+    /**
+     * Put the given data according to the given level in one of the block stores, replicating
+     * the values if necessary.
+     *
+     * If the block already exists, this method will not overwrite it.
+     *
+     * If keepReadLock is true, this method will hold the read lock when it returns (even if the
+     * block already exists). If false, this method will hold no locks when it returns.
+     *
+     * @return true if the block was already present or if the put succeeded, false otherwise.
+     */
+     def save(): Boolean = {
+      doPut(blockId, level, classTag, tellMaster, keepReadLock) { info =>
+        val startTimeNs = System.nanoTime()
+
+        // Since we're storing bytes, initiate the replication before storing them locally.
+        // This is faster as data is already serialized and ready to send.
+        val replicationFuture = if (level.replication > 1) {
+          Future {
+            // This is a blocking action and should run in futureExecutionContext which is a cached
+            // thread pool.
+            replicate(blockId, blockData(), level, classTag)
+          }(futureExecutionContext)
+        } else {
+          null
+        }
+        if (level.useMemory) {
+          // Put it in memory first, even if it also has useDisk set to true;
+          // We will drop it to disk later if the memory store can't hold it.
+          val putSucceeded = if (level.deserialized) {
+            saveDeserializedValuesToMemoryStore(blockData().toInputStream())
+          } else {
+            saveSerializedValuesToMemoryStore(readToByteBuffer())
+          }
+          if (!putSucceeded && level.useDisk) {
+            logWarning(s"Persisting block $blockId to disk instead.")
+            saveToDiskStore()
+          }
+        } else if (level.useDisk) {
+          saveToDiskStore()
+        }
+        val putBlockStatus = getCurrentBlockStatus(blockId, info)
+        val blockWasSuccessfullyStored = putBlockStatus.storageLevel.isValid
+        if (blockWasSuccessfullyStored) {
+          // Now that the block is in either the memory or disk store,
+          // tell the master about it.
+          info.size = blockSize
+          if (tellMaster && info.tellMaster) {
+            reportBlockStatus(blockId, putBlockStatus)
+          }
+          addUpdatedBlockStatusToTaskMetrics(blockId, putBlockStatus)
+        }
+        logDebug(s"Put block ${blockId} locally took ${Utils.getUsedTimeNs(startTimeNs)}")
+        if (level.replication > 1) {
+          // Wait for asynchronous replication to finish
+          try {
+            ThreadUtils.awaitReady(replicationFuture, Duration.Inf)
+          } catch {
+            case NonFatal(t) =>
+              throw new Exception("Error occurred while waiting for replication to finish", t)
+          }
+        }
+        if (blockWasSuccessfullyStored) {
+          None
+        } else {
+          Some(blockSize)
+        }
+      }.isEmpty
+    }
+  }
+
+  /**
+   * Helper for storing a block from bytes already in memory.
+   * '''Important!''' Callers must not mutate or release the data buffer underlying `bytes`. Doing
+   * so may corrupt or change the data stored by the `BlockManager`.
+   */
+  private case class ByteBufferBlockStoreUpdater[T](
+      blockId: BlockId,
+      level: StorageLevel,
+      classTag: ClassTag[T],
+      bytes: ChunkedByteBuffer,
+      tellMaster: Boolean = true,
+      keepReadLock: Boolean = false)
+    extends BlockStoreUpdater[T](bytes.size, blockId, level, classTag, tellMaster, keepReadLock) {
+
+    override def readToByteBuffer(): ChunkedByteBuffer = bytes
+
+    /**
+     * The ByteBufferBlockData wrapper is not disposed of to avoid releasing buffers that are
+     * owned by the caller.
+     */
+    override def blockData(): BlockData = new ByteBufferBlockData(bytes, false)
+
+    override def saveToDiskStore(): Unit = diskStore.putBytes(blockId, bytes)
+
+  }
+
+  /**
+   * Helper for storing a block based from bytes already in a local temp file.
+   */
+  private case class TempFileBasedBlockStoreUpdater[T](
+      blockId: BlockId,
+      level: StorageLevel,
+      classTag: ClassTag[T],
+      tmpFile: File,
+      blockSize: Long,
+      tellMaster: Boolean = true,
+      keepReadLock: Boolean = false)
+    extends BlockStoreUpdater[T](blockSize, blockId, level, classTag, tellMaster, keepReadLock) {
+
+    override def readToByteBuffer(): ChunkedByteBuffer = {
+      val allocator = level.memoryMode match {
+        case MemoryMode.ON_HEAP => ByteBuffer.allocate _
+        case MemoryMode.OFF_HEAP => Platform.allocateDirectBuffer _
+      }
+      blockData().toChunkedByteBuffer(allocator)
+    }
+
+    override def blockData(): BlockData = diskStore.getBytes(tmpFile, blockSize)
+
+    override def saveToDiskStore(): Unit = diskStore.moveFileToBlock(tmpFile, blockSize, blockId)
+
+    override def save(): Boolean = {
+      val res = super.save()
+      tmpFile.delete()
+      res
+    }
+
+  }
+
   /**
    * Initializes the BlockManager with the given appId. This is not performed in the constructor as
    * the appId may not be known at BlockManager instantiation time (in particular for the driver,
@@ -412,10 +594,7 @@ private[spark] class BlockManager(
       blockId: BlockId,
       level: StorageLevel,
       classTag: ClassTag[_]): StreamCallbackWithID = {
-    // TODO if we're going to only put the data in the disk store, we should just write it directly
-    // to the final location, but that would require a deeper refactor of this code.  So instead
-    // we just write to a temp file, and call putBytes on the data in that file.
-    val tmpFile = diskBlockManager.createTempLocalBlock()._2
+    val (_, tmpFile) = diskBlockManager.createTempLocalBlock()
     val channel = new CountingWritableChannel(
       Channels.newChannel(serializerManager.wrapForEncryption(new FileOutputStream(tmpFile))))
     logTrace(s"Streaming block $blockId to tmp file $tmpFile")
@@ -431,28 +610,11 @@ private[spark] class BlockManager(
 
       override def onComplete(streamId: String): Unit = {
         logTrace(s"Done receiving block $blockId, now putting into local blockManager")
-        // Read the contents of the downloaded file as a buffer to put into the blockManager.
         // Note this is all happening inside the netty thread as soon as it reads the end of the
         // stream.
         channel.close()
-        // TODO SPARK-25035 Even if we're only going to write the data to disk after this, we end up
-        // using a lot of memory here. We'll read the whole file into a regular
-        // byte buffer and OOM.  We could at least read the tmp file as a stream.
-        val buffer = securityManager.getIOEncryptionKey() match {
-          case Some(key) =>
-            // we need to pass in the size of the unencrypted block
-            val blockSize = channel.getCount
-            val allocator = level.memoryMode match {
-              case MemoryMode.ON_HEAP => ByteBuffer.allocate _
-              case MemoryMode.OFF_HEAP => Platform.allocateDirectBuffer _
-            }
-            new EncryptedBlockData(tmpFile, blockSize, conf, key).toChunkedByteBuffer(allocator)
-
-          case None =>
-            ChunkedByteBuffer.fromFile(tmpFile)
-        }
-        putBytes(blockId, buffer, level)(classTag)
-        tmpFile.delete()
+        val blockSize = channel.getCount
+        TempFileBasedBlockStoreUpdater(blockId, level, classTag, tmpFile, blockSize).save()
       }
 
       override def onFailure(streamId: String, cause: Throwable): Unit = {
@@ -953,111 +1115,14 @@ private[spark] class BlockManager(
       level: StorageLevel,
       tellMaster: Boolean = true): Boolean = {
     require(bytes != null, "Bytes is null")
-    doPutBytes(blockId, bytes, level, implicitly[ClassTag[T]], tellMaster)
-  }
-
-  /**
-   * Put the given bytes according to the given level in one of the block stores, replicating
-   * the values if necessary.
-   *
-   * If the block already exists, this method will not overwrite it.
-   *
-   * '''Important!''' Callers must not mutate or release the data buffer underlying `bytes`. Doing
-   * so may corrupt or change the data stored by the `BlockManager`.
-   *
-   * @param keepReadLock if true, this method will hold the read lock when it returns (even if the
-   *                     block already exists). If false, this method will hold no locks when it
-   *                     returns.
-   * @return true if the block was already present or if the put succeeded, false otherwise.
-   */
-  private def doPutBytes[T](
-      blockId: BlockId,
-      bytes: ChunkedByteBuffer,
-      level: StorageLevel,
-      classTag: ClassTag[T],
-      tellMaster: Boolean = true,
-      keepReadLock: Boolean = false): Boolean = {
-    doPut(blockId, level, classTag, tellMaster = tellMaster, keepReadLock = keepReadLock) { info =>
-      val startTimeNs = System.nanoTime()
-      // Since we're storing bytes, initiate the replication before storing them locally.
-      // This is faster as data is already serialized and ready to send.
-      val replicationFuture = if (level.replication > 1) {
-        Future {
-          // This is a blocking action and should run in futureExecutionContext which is a cached
-          // thread pool. The ByteBufferBlockData wrapper is not disposed of to avoid releasing
-          // buffers that are owned by the caller.
-          replicate(blockId, new ByteBufferBlockData(bytes, false), level, classTag)
-        }(futureExecutionContext)
-      } else {
-        null
-      }
-
-      val size = bytes.size
-
-      if (level.useMemory) {
-        // Put it in memory first, even if it also has useDisk set to true;
-        // We will drop it to disk later if the memory store can't hold it.
-        val putSucceeded = if (level.deserialized) {
-          val values =
-            serializerManager.dataDeserializeStream(blockId, bytes.toInputStream())(classTag)
-          memoryStore.putIteratorAsValues(blockId, values, classTag) match {
-            case Right(_) => true
-            case Left(iter) =>
-              // If putting deserialized values in memory failed, we will put the bytes directly to
-              // disk, so we don't need this iterator and can close it to free resources earlier.
-              iter.close()
-              false
-          }
-        } else {
-          val memoryMode = level.memoryMode
-          memoryStore.putBytes(blockId, size, memoryMode, () => {
-            if (memoryMode == MemoryMode.OFF_HEAP &&
-                bytes.chunks.exists(buffer => !buffer.isDirect)) {
-              bytes.copy(Platform.allocateDirectBuffer)
-            } else {
-              bytes
-            }
-          })
-        }
-        if (!putSucceeded && level.useDisk) {
-          logWarning(s"Persisting block $blockId to disk instead.")
-          diskStore.putBytes(blockId, bytes)
-        }
-      } else if (level.useDisk) {
-        diskStore.putBytes(blockId, bytes)
-      }
-
-      val putBlockStatus = getCurrentBlockStatus(blockId, info)
-      val blockWasSuccessfullyStored = putBlockStatus.storageLevel.isValid
-      if (blockWasSuccessfullyStored) {
-        // Now that the block is in either the memory or disk store,
-        // tell the master about it.
-        info.size = size
-        if (tellMaster && info.tellMaster) {
-          reportBlockStatus(blockId, putBlockStatus)
-        }
-        addUpdatedBlockStatusToTaskMetrics(blockId, putBlockStatus)
-      }
-      logDebug(s"Put block ${blockId} locally took ${Utils.getUsedTimeNs(startTimeNs)}")
-      if (level.replication > 1) {
-        // Wait for asynchronous replication to finish
-        try {
-          ThreadUtils.awaitReady(replicationFuture, Duration.Inf)
-        } catch {
-          case NonFatal(t) =>
-            throw new Exception("Error occurred while waiting for replication to finish", t)
-        }
-      }
-      if (blockWasSuccessfullyStored) {
-        None
-      } else {
-        Some(bytes)
-      }
-    }.isEmpty
+    val blockStoreUpdater =
+      ByteBufferBlockStoreUpdater(blockId, level, implicitly[ClassTag[T]], bytes, tellMaster)
+    blockStoreUpdater.save()
   }
 
   /**
-   * Helper method used to abstract common code from [[doPutBytes()]] and [[doPutIterator()]].
+   * Helper method used to abstract common code from [[BlockStoreUpdater.save()]]
+   * and [[doPutIterator()]].
    *
    * @param putBody a function which attempts the actual put() and returns None on success
    *                or Some on failure.
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index aefa2ae020a9..fbda4912e15a 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -27,6 +27,7 @@ import scala.collection.mutable.ListBuffer
 
 import com.google.common.io.Closeables
 import io.netty.channel.DefaultFileRegion
+import org.apache.commons.io.FileUtils
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.{config, Logging}
@@ -95,18 +96,17 @@ private[spark] class DiskStore(
   }
 
   def getBytes(blockId: BlockId): BlockData = {
-    val file = diskManager.getFile(blockId.name)
-    val blockSize = getSize(blockId)
+    getBytes(diskManager.getFile(blockId.name), getSize(blockId))
+  }
 
-    securityManager.getIOEncryptionKey() match {
-      case Some(key) =>
-        // Encrypted blocks cannot be memory mapped; return a special object that does decryption
-        // and provides InputStream / FileRegion implementations for reading the data.
-        new EncryptedBlockData(file, blockSize, conf, key)
+  def getBytes(f: File, blockSize: Long): BlockData = securityManager.getIOEncryptionKey() match {
+    case Some(key) =>
+      // Encrypted blocks cannot be memory mapped; return a special object that does decryption
+      // and provides InputStream / FileRegion implementations for reading the data.
+      new EncryptedBlockData(f, blockSize, conf, key)
 
-      case _ =>
-        new DiskBlockData(minMemoryMapBytes, maxMemoryMapBytes, file, blockSize)
-    }
+    case _ =>
+      new DiskBlockData(minMemoryMapBytes, maxMemoryMapBytes, f, blockSize)
   }
 
   def remove(blockId: BlockId): Boolean = {
@@ -123,6 +123,16 @@ private[spark] class DiskStore(
     }
   }
 
+  /**
+   * @param blockSize if encryption is configured, the file is assumed to already be encrypted and
+   *                  blockSize should be the decrypted size
+   */
+  def moveFileToBlock(sourceFile: File, blockSize: Long, targetBlockId: BlockId): Unit = {
+    blockSizes.put(targetBlockId, blockSize)
+    val targetFile = diskManager.getFile(targetBlockId.name)
+    FileUtils.moveFile(sourceFile, targetFile)
+  }
+
   def contains(blockId: BlockId): Boolean = {
     val file = diskManager.getFile(blockId.name)
     file.exists()
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 40c6424b0cb1..ac35ac3fe0fb 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -80,6 +80,16 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value)
   def rdd(rddId: Int, splitId: Int): RDDBlockId = RDDBlockId(rddId, splitId)
 
+  private def init(sparkConf: SparkConf): Unit = {
+    sparkConf
+      .set("spark.app.id", "test")
+      .set(IS_TESTING, true)
+      .set(MEMORY_FRACTION, 1.0)
+      .set(MEMORY_STORAGE_FRACTION, 0.999)
+      .set("spark.kryoserializer.buffer", "1m")
+      .set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
+  }
+
   private def makeBlockManager(
       maxMem: Long,
       name: String = SparkContext.DRIVER_IDENTIFIER,
@@ -113,12 +123,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
     System.setProperty("os.arch", "amd64")
     conf = new SparkConf(false)
-      .set("spark.app.id", "test")
-      .set(IS_TESTING, true)
-      .set(MEMORY_FRACTION, 1.0)
-      .set(MEMORY_STORAGE_FRACTION, 0.999)
-      .set("spark.kryoserializer.buffer", "1m")
-      .set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
+    init(conf)
 
     rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
     conf.set(DRIVER_PORT, rpcEnv.address.port)
@@ -890,7 +895,6 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val store = new BlockManager(SparkContext.DRIVER_IDENTIFIER, rpcEnv, master,
       serializerManager, conf, memoryManager, mapOutputTracker,
       shuffleManager, transfer, securityMgr, 0)
-    memoryManager.setMemoryStore(store.memoryStore)
     store.initialize("app-id")
 
     // The put should fail since a1 is not serializable.
@@ -906,6 +910,48 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     }
   }
 
+  def testPutBlockDataAsStream(blockManager: BlockManager, storageLevel: StorageLevel): Unit = {
+    val message = "message"
+    val ser = serializer.newInstance().serialize(message).array()
+    val blockId = new RDDBlockId(0, 0)
+    val streamCallbackWithId =
+      blockManager.putBlockDataAsStream(blockId, storageLevel, ClassTag(message.getClass))
+    streamCallbackWithId.onData("0", ByteBuffer.wrap(ser))
+    streamCallbackWithId.onComplete("0")
+    val blockStatusOption = blockManager.getStatus(blockId)
+    assert(!blockStatusOption.isEmpty)
+    val blockStatus = blockStatusOption.get
+    assert((blockStatus.diskSize > 0) === !storageLevel.useMemory)
+    assert((blockStatus.memSize > 0) === storageLevel.useMemory)
+    assert(blockManager.getBlockData(blockId).nioByteBuffer().array() === ser)
+  }
+
+  Seq(
+    "caching" -> StorageLevel.MEMORY_ONLY,
+    "caching, serialized" -> StorageLevel.MEMORY_ONLY_SER,
+    "caching on disk" -> StorageLevel.DISK_ONLY
+  ).foreach { case (name, storageLevel) =>
+    encryptionTest(s"test putBlockDataAsStream with $name") { conf =>
+      init(conf)
+      val ioEncryptionKey =
+        if (conf.get(IO_ENCRYPTION_ENABLED)) Some(CryptoStreamUtils.createKey(conf)) else None
+      val securityMgr = new SecurityManager(conf, ioEncryptionKey)
+      val serializerManager = new SerializerManager(serializer, conf, ioEncryptionKey)
+      val transfer =
+        new NettyBlockTransferService(conf, securityMgr, "localhost", "localhost", 0, 1)
+      val memoryManager = UnifiedMemoryManager(conf, numCores = 1)
+      val blockManager = new BlockManager(SparkContext.DRIVER_IDENTIFIER, rpcEnv, master,
+        serializerManager, conf, memoryManager, mapOutputTracker,
+        shuffleManager, transfer, securityMgr, 0)
+      try {
+        blockManager.initialize("app-id")
+        testPutBlockDataAsStream(blockManager, storageLevel)
+      } finally {
+        blockManager.stop()
+      }
+    }
+  }
+
   test("turn off updated block statuses") {
     val conf = new SparkConf()
     conf.set(TASK_METRICS_TRACK_UPDATED_BLOCK_STATUSES, false)

From 4808393449ccad2c6bc73c91d0ed8dd8f60c7054 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 25 Feb 2019 13:57:37 -0600
Subject: [PATCH 0524/1072] [SPARK-26788][YARN] Remove
 SchedulerExtensionService.

Since the yarn module is actually private to Spark, this interface was never
actually "public". Since it has no use inside of Spark, let's avoid adding
a yarn-specific extension that isn't public, and point any potential users
are more general solutions (like using a SparkListener).

Closes #23839 from vanzin/SPARK-26788.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/SparkConf.scala    |   3 +-
 .../org/apache/spark/deploy/yarn/config.scala |   6 -
 .../cluster/SchedulerExtensionService.scala   | 143 ------------------
 .../cluster/YarnSchedulerBackend.scala        |   7 -
 .../ExtensionServiceIntegrationSuite.scala    |  72 ---------
 .../cluster/SimpleExtensionService.scala      |  34 -----
 .../cluster/StubApplicationAttemptId.scala    |  48 ------
 .../scheduler/cluster/StubApplicationId.scala |  42 -----
 8 files changed, 2 insertions(+), 353 deletions(-)
 delete mode 100644 resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerExtensionService.scala
 delete mode 100644 resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/ExtensionServiceIntegrationSuite.scala
 delete mode 100644 resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/SimpleExtensionService.scala
 delete mode 100644 resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/StubApplicationAttemptId.scala
 delete mode 100644 resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/StubApplicationId.scala

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index e686e079d1a1..529ca3faac13 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -638,7 +638,8 @@ private[spark] object SparkConf extends Logging {
       DeprecatedConfig("spark.shuffle.service.index.cache.entries", "2.3.0",
         "Not used anymore. Please use spark.shuffle.service.index.cache.size"),
       DeprecatedConfig("spark.yarn.credentials.file.retention.count", "2.4.0", "Not used anymore."),
-      DeprecatedConfig("spark.yarn.credentials.file.retention.days", "2.4.0", "Not used anymore.")
+      DeprecatedConfig("spark.yarn.credentials.file.retention.days", "2.4.0", "Not used anymore."),
+      DeprecatedConfig("spark.yarn.services", "3.0.0", "Feature no longer available.")
     )
 
     Map(configs.map { cfg => (cfg.key -> cfg) } : _*)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index bd5e13615165..5f8739b4097a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -181,12 +181,6 @@ package object config {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("200ms")
 
-  private[spark] val SCHEDULER_SERVICES = ConfigBuilder("spark.yarn.services")
-    .doc("A comma-separated list of class names of services to add to the scheduler.")
-    .stringConf
-    .toSequence
-    .createWithDefault(Nil)
-
   private[spark] val AM_FINAL_MSG_LIMIT = ConfigBuilder("spark.yarn.am.finalMessageLimit")
     .doc("The limit size of final diagnostic message for our ApplicationMaster to unregister from" +
       " the ResourceManager.")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerExtensionService.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerExtensionService.scala
deleted file mode 100644
index 7d15f0e2fbac..000000000000
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerExtensionService.scala
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import java.util.concurrent.atomic.AtomicBoolean
-
-import org.apache.hadoop.yarn.api.records.{ApplicationAttemptId, ApplicationId}
-
-import org.apache.spark.SparkContext
-import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
-
-/**
- * An extension service that can be loaded into a Spark YARN scheduler.
- * A Service that can be started and stopped.
- *
- * 1. For implementations to be loadable by `SchedulerExtensionServices`,
- * they must provide an empty constructor.
- * 2. The `stop()` operation MUST be idempotent, and succeed even if `start()` was
- * never invoked.
- */
-trait SchedulerExtensionService {
-
-  /**
-   * Start the extension service. This should be a no-op if
-   * called more than once.
-   * @param binding binding to the spark application and YARN
-   */
-  def start(binding: SchedulerExtensionServiceBinding): Unit
-
-  /**
-   * Stop the service
-   * The `stop()` operation MUST be idempotent, and succeed even if `start()` was
-   * never invoked.
-   */
-  def stop(): Unit
-}
-
-/**
- * Binding information for a [[SchedulerExtensionService]].
- *
- * The attempt ID will be set if the service is started within a YARN application master;
- * there is then a different attempt ID for every time that AM is restarted.
- * When the service binding is instantiated in client mode, there's no attempt ID, as it lacks
- * this information.
- * @param sparkContext current spark context
- * @param applicationId YARN application ID
- * @param attemptId YARN attemptID. This will always be unset in client mode, and always set in
- *                  cluster mode.
- */
-case class SchedulerExtensionServiceBinding(
-    sparkContext: SparkContext,
-    applicationId: ApplicationId,
-    attemptId: Option[ApplicationAttemptId] = None)
-
-/**
- * Container for [[SchedulerExtensionService]] instances.
- *
- * Loads Extension Services from the configuration property
- * `"spark.yarn.services"`, instantiates and starts them.
- * When stopped, it stops all child entries.
- *
- * The order in which child extension services are started and stopped
- * is undefined.
- */
-private[spark] class SchedulerExtensionServices extends SchedulerExtensionService
-    with Logging {
-  private var serviceOption: Option[String] = None
-  private var services: List[SchedulerExtensionService] = Nil
-  private val started = new AtomicBoolean(false)
-  private var binding: SchedulerExtensionServiceBinding = _
-
-  /**
-   * Binding operation will load the named services and call bind on them too; the
-   * entire set of services are then ready for `init()` and `start()` calls.
-   *
-   * @param binding binding to the spark application and YARN
-   */
-  def start(binding: SchedulerExtensionServiceBinding): Unit = {
-    if (started.getAndSet(true)) {
-      logWarning("Ignoring re-entrant start operation")
-      return
-    }
-    require(binding.sparkContext != null, "Null context parameter")
-    require(binding.applicationId != null, "Null appId parameter")
-    this.binding = binding
-    val sparkContext = binding.sparkContext
-    val appId = binding.applicationId
-    val attemptId = binding.attemptId
-    logInfo(s"Starting Yarn extension services with app $appId and attemptId $attemptId")
-
-    services = sparkContext.conf.get(SCHEDULER_SERVICES).map { sClass =>
-      val instance = Utils.classForName(sClass)
-        .getConstructor().newInstance()
-        .asInstanceOf[SchedulerExtensionService]
-      // bind this service
-      instance.start(binding)
-      logInfo(s"Service $sClass started")
-      instance
-    }.toList
-  }
-
-  /**
-   * Get the list of services.
-   *
-   * @return a list of services; Nil until the service is started
-   */
-  def getServices: List[SchedulerExtensionService] = services
-
-  /**
-   * Stop the services; idempotent.
-   *
-   */
-  override def stop(): Unit = {
-    if (started.getAndSet(false)) {
-      logInfo(s"Stopping $this")
-      services.foreach { s =>
-        Utils.tryLogNonFatalError(s.stop())
-      }
-    }
-  }
-
-  override def toString(): String = s"""SchedulerExtensionServices
-    |(serviceOption=$serviceOption,
-    | services=$services,
-    | started=$started)""".stripMargin
-}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index 78cd6a200ac2..a8472b49ae27 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -73,10 +73,6 @@ private[spark] abstract class YarnSchedulerBackend(
   /** Attempt ID. This is unset for client-mode schedulers */
   private var attemptId: Option[ApplicationAttemptId] = None
 
-  /** Scheduler extension services. */
-  private val services: SchedulerExtensionServices = new SchedulerExtensionServices()
-
-
   /**
    * Bind to YARN. This *must* be done before calling [[start()]].
    *
@@ -90,8 +86,6 @@ private[spark] abstract class YarnSchedulerBackend(
 
   protected def startBindings(): Unit = {
     require(appId.isDefined, "application ID unset")
-    val binding = SchedulerExtensionServiceBinding(sc, appId.get, attemptId)
-    services.start(binding)
   }
 
   override def stop(): Unit = {
@@ -102,7 +96,6 @@ private[spark] abstract class YarnSchedulerBackend(
       super.stop()
     } finally {
       stopped.set(true)
-      services.stop()
     }
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/ExtensionServiceIntegrationSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/ExtensionServiceIntegrationSuite.scala
deleted file mode 100644
index 6ea7984c6451..000000000000
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/ExtensionServiceIntegrationSuite.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import org.scalatest.BeforeAndAfter
-
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
-import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.Logging
-
-/**
- * Test the integration with [[SchedulerExtensionServices]]
- */
-class ExtensionServiceIntegrationSuite extends SparkFunSuite
-  with LocalSparkContext with BeforeAndAfter
-  with Logging {
-
-  val applicationId = new StubApplicationId(0, 1111L)
-  val attemptId = new StubApplicationAttemptId(applicationId, 1)
-
-  /*
-   * Setup phase creates the spark context
-   */
-  before {
-    val sparkConf = new SparkConf()
-    sparkConf.set(SCHEDULER_SERVICES, Seq(classOf[SimpleExtensionService].getName()))
-    sparkConf.setMaster("local").setAppName("ExtensionServiceIntegrationSuite")
-    sc = new SparkContext(sparkConf)
-  }
-
-  test("Instantiate") {
-    val services = new SchedulerExtensionServices()
-    assertResult(Nil, "non-nil service list") {
-      services.getServices
-    }
-    services.start(SchedulerExtensionServiceBinding(sc, applicationId))
-    services.stop()
-  }
-
-  test("Contains SimpleExtensionService Service") {
-    val services = new SchedulerExtensionServices()
-    try {
-      services.start(SchedulerExtensionServiceBinding(sc, applicationId))
-      val serviceList = services.getServices
-      assert(serviceList.nonEmpty, "empty service list")
-      val (service :: Nil) = serviceList
-      val simpleService = service.asInstanceOf[SimpleExtensionService]
-      assert(simpleService.started.get, "service not started")
-      services.stop()
-      assert(!simpleService.started.get, "service not stopped")
-    } finally {
-      services.stop()
-    }
-  }
-}
-
-
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/SimpleExtensionService.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/SimpleExtensionService.scala
deleted file mode 100644
index 9b8c98cda8da..000000000000
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/SimpleExtensionService.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import java.util.concurrent.atomic.AtomicBoolean
-
-private[spark] class SimpleExtensionService extends SchedulerExtensionService {
-
-  /** started flag; set in the `start()` call, stopped in `stop()`. */
-  val started = new AtomicBoolean(false)
-
-  override def start(binding: SchedulerExtensionServiceBinding): Unit = {
-    started.set(true)
-  }
-
-  override def stop(): Unit = {
-    started.set(false)
-  }
-}
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/StubApplicationAttemptId.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/StubApplicationAttemptId.scala
deleted file mode 100644
index 4b57b9509a65..000000000000
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/StubApplicationAttemptId.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import org.apache.hadoop.yarn.api.records.{ApplicationAttemptId, ApplicationId}
-
-/**
- * A stub application ID; can be set in constructor and/or updated later.
- * @param applicationId application ID
- * @param attempt an attempt counter
- */
-class StubApplicationAttemptId(var applicationId: ApplicationId, var attempt: Int)
-    extends ApplicationAttemptId {
-
-  override def setApplicationId(appID: ApplicationId): Unit = {
-    applicationId = appID
-  }
-
-  override def getAttemptId: Int = {
-    attempt
-  }
-
-  override def setAttemptId(attemptId: Int): Unit = {
-    attempt = attemptId
-  }
-
-  override def getApplicationId: ApplicationId = {
-    applicationId
-  }
-
-  override def build(): Unit = {
-  }
-}
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/StubApplicationId.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/StubApplicationId.scala
deleted file mode 100644
index bffa0e09befd..000000000000
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/StubApplicationId.scala
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import org.apache.hadoop.yarn.api.records.ApplicationId
-
-/**
- * Simple Testing Application Id; ID and cluster timestamp are set in constructor
- * and cannot be updated.
- * @param id app id
- * @param clusterTimestamp timestamp
- */
-private[spark] class StubApplicationId(id: Int, clusterTimestamp: Long) extends ApplicationId {
-  override def getId: Int = {
-    id
-  }
-
-  override def getClusterTimestamp: Long = {
-    clusterTimestamp
-  }
-
-  override def setId(id: Int): Unit = {}
-
-  override def setClusterTimestamp(clusterTimestamp: Long): Unit = {}
-
-  override def build(): Unit = {}
-}

From b66be0e49098b904d52d0e6412744828890d5354 Mon Sep 17 00:00:00 2001
From: Ilya Matiach <ilmat@microsoft.com>
Date: Mon, 25 Feb 2019 17:16:51 -0600
Subject: [PATCH 0525/1072] [SPARK-24103][ML][MLLIB] ML Evaluators should use
 weight column - added weight column for binary classification evaluator

## What changes were proposed in this pull request?

The evaluators BinaryClassificationEvaluator, RegressionEvaluator, and MulticlassClassificationEvaluator and the corresponding metrics classes BinaryClassificationMetrics, RegressionMetrics and MulticlassMetrics should use sample weight data.

I've closed the PR: https://github.com/apache/spark/pull/16557
as recommended in favor of creating three pull requests, one for each of the evaluators (binary/regression/multiclass) to make it easier to review/update.

## How was this patch tested?
I added tests to the metrics and evaluators classes.

Closes #17084 from imatiach-msft/ilmat/binary-evalute.

Authored-by: Ilya Matiach <ilmat@microsoft.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../BinaryClassificationEvaluator.scala       | 26 ++++++++---
 .../BinaryClassificationMetrics.scala         | 41 ++++++++++++-----
 .../mllib/evaluation/MulticlassMetrics.scala  | 19 +++++---
 .../BinaryClassificationMetricComputers.scala | 14 +++---
 .../binary/BinaryConfusionMatrix.scala        | 38 ++++++++--------
 .../binary/BinaryLabelCounter.scala           | 26 +++++++----
 .../BinaryClassificationEvaluatorSuite.scala  | 45 +++++++++++++++----
 .../BinaryClassificationMetricsSuite.scala    | 28 ++++++++++++
 python/pyspark/ml/evaluation.py               | 20 ++++++---
 python/pyspark/mllib/evaluation.py            | 30 +++++++++----
 10 files changed, 207 insertions(+), 80 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
index bff72b20e1c3..c6b04333885a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -36,7 +36,8 @@ import org.apache.spark.sql.types.DoubleType
 @Since("1.2.0")
 @Experimental
 class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String)
-  extends Evaluator with HasRawPredictionCol with HasLabelCol with DefaultParamsWritable {
+  extends Evaluator with HasRawPredictionCol with HasLabelCol
+    with HasWeightCol with DefaultParamsWritable {
 
   @Since("1.2.0")
   def this() = this(Identifiable.randomUID("binEval"))
@@ -68,6 +69,10 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
   @Since("1.2.0")
   def setLabelCol(value: String): this.type = set(labelCol, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   setDefault(metricName -> "areaUnderROC")
 
   @Since("2.0.0")
@@ -75,14 +80,23 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
     val schema = dataset.schema
     SchemaUtils.checkColumnTypes(schema, $(rawPredictionCol), Seq(DoubleType, new VectorUDT))
     SchemaUtils.checkNumericType(schema, $(labelCol))
+    if (isDefined(weightCol)) {
+      SchemaUtils.checkNumericType(schema, $(weightCol))
+    }
 
     // TODO: When dataset metadata has been implemented, check rawPredictionCol vector length = 2.
-    val scoreAndLabels =
-      dataset.select(col($(rawPredictionCol)), col($(labelCol)).cast(DoubleType)).rdd.map {
-        case Row(rawPrediction: Vector, label: Double) => (rawPrediction(1), label)
-        case Row(rawPrediction: Double, label: Double) => (rawPrediction, label)
+    val scoreAndLabelsWithWeights =
+      dataset.select(
+        col($(rawPredictionCol)),
+        col($(labelCol)).cast(DoubleType),
+        if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0)
+        else col($(weightCol)).cast(DoubleType)).rdd.map {
+        case Row(rawPrediction: Vector, label: Double, weight: Double) =>
+          (rawPrediction(1), label, weight)
+        case Row(rawPrediction: Double, label: Double, weight: Double) =>
+          (rawPrediction, label, weight)
       }
-    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+    val metrics = new BinaryClassificationMetrics(scoreAndLabelsWithWeights)
     val metric = $(metricName) match {
       case "areaUnderROC" => metrics.areaUnderROC()
       case "areaUnderPR" => metrics.areaUnderPR()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 2cfcf38eb4ca..cc89edc286a3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -21,12 +21,12 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.evaluation.binary._
 import org.apache.spark.rdd.{RDD, UnionRDD}
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, Row}
 
 /**
  * Evaluator for binary classification.
  *
- * @param scoreAndLabels an RDD of (score, label) pairs.
+ * @param scoreAndLabels an RDD of (score, label) or (score, label, weight) tuples.
  * @param numBins if greater than 0, then the curves (ROC curve, PR curve) computed internally
  *                will be down-sampled to this many "bins". If 0, no down-sampling will occur.
  *                This is useful because the curve contains a point for each distinct score
@@ -41,9 +41,19 @@ import org.apache.spark.sql.DataFrame
  *                partition boundaries.
  */
 @Since("1.0.0")
-class BinaryClassificationMetrics @Since("1.3.0") (
-    @Since("1.3.0") val scoreAndLabels: RDD[(Double, Double)],
-    @Since("1.3.0") val numBins: Int) extends Logging {
+class BinaryClassificationMetrics @Since("3.0.0") (
+    @Since("1.3.0") val scoreAndLabels: RDD[_ <: Product],
+    @Since("1.3.0") val numBins: Int = 1000)
+  extends Logging {
+  val scoreLabelsWeight: RDD[(Double, (Double, Double))] = scoreAndLabels.map {
+    case (prediction: Double, label: Double, weight: Double) =>
+      require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0")
+      (prediction, (label, weight))
+    case (prediction: Double, label: Double) =>
+      (prediction, (label, 1.0))
+    case other =>
+      throw new IllegalArgumentException(s"Expected tuples, got $other")
+  }
 
   require(numBins >= 0, "numBins must be nonnegative")
 
@@ -58,7 +68,14 @@ class BinaryClassificationMetrics @Since("1.3.0") (
    * @param scoreAndLabels a DataFrame with two double columns: score and label
    */
   private[mllib] def this(scoreAndLabels: DataFrame) =
-    this(scoreAndLabels.rdd.map(r => (r.getDouble(0), r.getDouble(1))))
+    this(scoreAndLabels.rdd.map {
+      case Row(prediction: Double, label: Double, weight: Double) =>
+        (prediction, label, weight)
+      case Row(prediction: Double, label: Double) =>
+        (prediction, label, 1.0)
+      case other =>
+        throw new IllegalArgumentException(s"Expected Row of tuples, got $other")
+    })
 
   /**
    * Unpersist intermediate RDDs used in the computation.
@@ -146,11 +163,13 @@ class BinaryClassificationMetrics @Since("1.3.0") (
   private lazy val (
     cumulativeCounts: RDD[(Double, BinaryLabelCounter)],
     confusions: RDD[(Double, BinaryConfusionMatrix)]) = {
-    // Create a bin for each distinct score value, count positives and negatives within each bin,
-    // and then sort by score values in descending order.
-    val counts = scoreAndLabels.combineByKey(
-      createCombiner = (label: Double) => new BinaryLabelCounter(0L, 0L) += label,
-      mergeValue = (c: BinaryLabelCounter, label: Double) => c += label,
+    // Create a bin for each distinct score value, count weighted positives and
+    // negatives within each bin, and then sort by score values in descending order.
+    val counts = scoreLabelsWeight.combineByKey(
+      createCombiner = (labelAndWeight: (Double, Double)) =>
+        new BinaryLabelCounter(0.0, 0.0) += (labelAndWeight._1, labelAndWeight._2),
+      mergeValue = (c: BinaryLabelCounter, labelAndWeight: (Double, Double)) =>
+        c += (labelAndWeight._1, labelAndWeight._2),
       mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2
     ).sortByKey(ascending = false)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index ad83c24ede96..a10f26ba4640 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -22,17 +22,17 @@ import scala.collection.Map
 import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.{Matrices, Matrix}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, Row}
 
 /**
  * Evaluator for multiclass classification.
  *
- * @param predAndLabelsWithOptWeight an RDD of (prediction, label, weight) or
- *                         (prediction, label) pairs.
+ * @param predictionAndLabels an RDD of (prediction, label, weight) or
+ *                         (prediction, label) tuples.
  */
 @Since("1.1.0")
-class MulticlassMetrics @Since("1.1.0") (predAndLabelsWithOptWeight: RDD[_ <: Product]) {
-  val predLabelsWeight: RDD[(Double, Double, Double)] = predAndLabelsWithOptWeight.map {
+class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product]) {
+  val predLabelsWeight: RDD[(Double, Double, Double)] = predictionAndLabels.map {
     case (prediction: Double, label: Double, weight: Double) =>
       (prediction, label, weight)
     case (prediction: Double, label: Double) =>
@@ -46,7 +46,14 @@ class MulticlassMetrics @Since("1.1.0") (predAndLabelsWithOptWeight: RDD[_ <: Pr
    * @param predictionAndLabels a DataFrame with two double columns: prediction and label
    */
   private[mllib] def this(predictionAndLabels: DataFrame) =
-    this(predictionAndLabels.rdd.map(r => (r.getDouble(0), r.getDouble(1))))
+    this(predictionAndLabels.rdd.map {
+      case Row(prediction: Double, label: Double, weight: Double) =>
+        (prediction, label, weight)
+      case Row(prediction: Double, label: Double) =>
+        (prediction, label, 1.0)
+      case other =>
+        throw new IllegalArgumentException(s"Expected Row of tuples, got $other")
+    })
 
   private lazy val labelCountByClass: Map[Double, Double] =
     predLabelsWeight.map {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
index 5a4c6aef50b7..d98ca2bdc9de 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
@@ -27,11 +27,11 @@ private[evaluation] trait BinaryClassificationMetricComputer extends Serializabl
 /** Precision. Defined as 1.0 when there are no positive examples. */
 private[evaluation] object Precision extends BinaryClassificationMetricComputer {
   override def apply(c: BinaryConfusionMatrix): Double = {
-    val totalPositives = c.numTruePositives + c.numFalsePositives
-    if (totalPositives == 0) {
+    val totalPositives = c.weightedTruePositives + c.weightedFalsePositives
+    if (totalPositives == 0.0) {
       1.0
     } else {
-      c.numTruePositives.toDouble / totalPositives
+      c.weightedTruePositives / totalPositives
     }
   }
 }
@@ -39,10 +39,10 @@ private[evaluation] object Precision extends BinaryClassificationMetricComputer
 /** False positive rate. Defined as 0.0 when there are no negative examples. */
 private[evaluation] object FalsePositiveRate extends BinaryClassificationMetricComputer {
   override def apply(c: BinaryConfusionMatrix): Double = {
-    if (c.numNegatives == 0) {
+    if (c.weightedNegatives == 0.0) {
       0.0
     } else {
-      c.numFalsePositives.toDouble / c.numNegatives
+      c.weightedFalsePositives / c.weightedNegatives
     }
   }
 }
@@ -50,10 +50,10 @@ private[evaluation] object FalsePositiveRate extends BinaryClassificationMetricC
 /** Recall. Defined as 0.0 when there are no positive examples. */
 private[evaluation] object Recall extends BinaryClassificationMetricComputer {
   override def apply(c: BinaryConfusionMatrix): Double = {
-    if (c.numPositives == 0) {
+    if (c.weightedPositives == 0.0) {
       0.0
     } else {
-      c.numTruePositives.toDouble / c.numPositives
+      c.weightedTruePositives / c.weightedPositives
     }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryConfusionMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryConfusionMatrix.scala
index 559c6ef7e725..192c9b1863fe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryConfusionMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryConfusionMatrix.scala
@@ -21,23 +21,23 @@ package org.apache.spark.mllib.evaluation.binary
  * Trait for a binary confusion matrix.
  */
 private[evaluation] trait BinaryConfusionMatrix {
-  /** number of true positives */
-  def numTruePositives: Long
+  /** weighted number of true positives */
+  def weightedTruePositives: Double
 
-  /** number of false positives */
-  def numFalsePositives: Long
+  /** weighted number of false positives */
+  def weightedFalsePositives: Double
 
-  /** number of false negatives */
-  def numFalseNegatives: Long
+  /** weighted number of false negatives */
+  def weightedFalseNegatives: Double
 
-  /** number of true negatives */
-  def numTrueNegatives: Long
+  /** weighted number of true negatives */
+  def weightedTrueNegatives: Double
 
-  /** number of positives */
-  def numPositives: Long = numTruePositives + numFalseNegatives
+  /** weighted number of positives */
+  def weightedPositives: Double = weightedTruePositives + weightedFalseNegatives
 
-  /** number of negatives */
-  def numNegatives: Long = numFalsePositives + numTrueNegatives
+  /** weighted number of negatives */
+  def weightedNegatives: Double = weightedFalsePositives + weightedTrueNegatives
 }
 
 /**
@@ -51,20 +51,22 @@ private[evaluation] case class BinaryConfusionMatrixImpl(
     totalCount: BinaryLabelCounter) extends BinaryConfusionMatrix {
 
   /** number of true positives */
-  override def numTruePositives: Long = count.numPositives
+  override def weightedTruePositives: Double = count.weightedNumPositives
 
   /** number of false positives */
-  override def numFalsePositives: Long = count.numNegatives
+  override def weightedFalsePositives: Double = count.weightedNumNegatives
 
   /** number of false negatives */
-  override def numFalseNegatives: Long = totalCount.numPositives - count.numPositives
+  override def weightedFalseNegatives: Double =
+    totalCount.weightedNumPositives - count.weightedNumPositives
 
   /** number of true negatives */
-  override def numTrueNegatives: Long = totalCount.numNegatives - count.numNegatives
+  override def weightedTrueNegatives: Double =
+    totalCount.weightedNumNegatives - count.weightedNumNegatives
 
   /** number of positives */
-  override def numPositives: Long = totalCount.numPositives
+  override def weightedPositives: Double = totalCount.weightedNumPositives
 
   /** number of negatives */
-  override def numNegatives: Long = totalCount.numNegatives
+  override def weightedNegatives: Double = totalCount.weightedNumNegatives
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryLabelCounter.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryLabelCounter.scala
index 1e610c20092a..1ad91966b214 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryLabelCounter.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryLabelCounter.scala
@@ -20,31 +20,39 @@ package org.apache.spark.mllib.evaluation.binary
 /**
  * A counter for positives and negatives.
  *
- * @param numPositives number of positive labels
- * @param numNegatives number of negative labels
+ * @param weightedNumPositives weighted number of positive labels
+ * @param weightedNumNegatives weighted number of negative labels
  */
 private[evaluation] class BinaryLabelCounter(
-    var numPositives: Long = 0L,
-    var numNegatives: Long = 0L) extends Serializable {
+    var weightedNumPositives: Double = 0.0,
+    var weightedNumNegatives: Double = 0.0) extends Serializable {
 
   /** Processes a label. */
   def +=(label: Double): BinaryLabelCounter = {
     // Though we assume 1.0 for positive and 0.0 for negative, the following check will handle
     // -1.0 for negative as well.
-    if (label > 0.5) numPositives += 1L else numNegatives += 1L
+    if (label > 0.5) weightedNumPositives += 1.0 else weightedNumNegatives += 1.0
+    this
+  }
+
+  /** Processes a label with a weight. */
+  def +=(label: Double, weight: Double): BinaryLabelCounter = {
+    // Though we assume 1.0 for positive and 0.0 for negative, the following check will handle
+    // -1.0 for negative as well.
+    if (label > 0.5) weightedNumPositives += weight else weightedNumNegatives += weight
     this
   }
 
   /** Merges another counter. */
   def +=(other: BinaryLabelCounter): BinaryLabelCounter = {
-    numPositives += other.numPositives
-    numNegatives += other.numNegatives
+    weightedNumPositives += other.weightedNumPositives
+    weightedNumNegatives += other.weightedNumNegatives
     this
   }
 
   override def clone: BinaryLabelCounter = {
-    new BinaryLabelCounter(numPositives, numNegatives)
+    new BinaryLabelCounter(weightedNumPositives, weightedNumNegatives)
   }
 
-  override def toString: String = s"{numPos: $numPositives, numNeg: $numNegatives}"
+  override def toString: String = s"{numPos: $weightedNumPositives, numNeg: $weightedNumNegatives}"
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
index 2b0909acf69c..83b213ab51d4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
@@ -45,23 +45,23 @@ class BinaryClassificationEvaluatorSuite
       .setMetricName("areaUnderPR")
 
     val vectorDF = Seq(
-      (0d, Vectors.dense(12, 2.5)),
-      (1d, Vectors.dense(1, 3)),
-      (0d, Vectors.dense(10, 2))
+      (0.0, Vectors.dense(12, 2.5)),
+      (1.0, Vectors.dense(1, 3)),
+      (0.0, Vectors.dense(10, 2))
     ).toDF("label", "rawPrediction")
     assert(evaluator.evaluate(vectorDF) === 1.0)
 
     val doubleDF = Seq(
-      (0d, 0d),
-      (1d, 1d),
-      (0d, 0d)
+      (0.0, 0.0),
+      (1.0, 1.0),
+      (0.0, 0.0)
     ).toDF("label", "rawPrediction")
     assert(evaluator.evaluate(doubleDF) === 1.0)
 
     val stringDF = Seq(
-      (0d, "0d"),
-      (1d, "1d"),
-      (0d, "0d")
+      (0.0, "0.0"),
+      (1.0, "1.0"),
+      (0.0, "0.0")
     ).toDF("label", "rawPrediction")
     val thrown = intercept[IllegalArgumentException] {
       evaluator.evaluate(stringDF)
@@ -71,6 +71,33 @@ class BinaryClassificationEvaluatorSuite
     assert(thrown.getMessage.replace("\n", "") contains "but was actually of type string.")
   }
 
+  test("should accept weight column") {
+    val weightCol = "weight"
+    // get metric with weight column
+    val evaluator = new BinaryClassificationEvaluator()
+      .setMetricName("areaUnderROC").setWeightCol(weightCol)
+    val vectorDF = Seq(
+      (0.0, Vectors.dense(2.5, 12), 1.0),
+      (1.0, Vectors.dense(1, 3), 1.0),
+      (0.0, Vectors.dense(10, 2), 1.0)
+    ).toDF("label", "rawPrediction", weightCol)
+    val result = evaluator.evaluate(vectorDF)
+    // without weight column
+    val evaluator2 = new BinaryClassificationEvaluator()
+      .setMetricName("areaUnderROC")
+    val result2 = evaluator2.evaluate(vectorDF)
+    assert(result === result2)
+    // use different weights, validate metrics change
+    val vectorDF2 = Seq(
+      (0.0, Vectors.dense(2.5, 12), 2.5),
+      (1.0, Vectors.dense(1, 3), 0.1),
+      (0.0, Vectors.dense(10, 2), 2.0)
+    ).toDF("label", "rawPrediction", weightCol)
+    val result3 = evaluator.evaluate(vectorDF2)
+    // Since wrong result weighted more heavily, expect the score to be lower
+    assert(result3 < result)
+  }
+
   test("should support all NumericType labels and not support other types") {
     val evaluator = new BinaryClassificationEvaluator().setRawPredictionCol("prediction")
     MLTestingUtils.checkNumericTypes(evaluator, spark)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
index a08917ac1ebe..06a522f43482 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
@@ -82,6 +82,34 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
     validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
   }
 
+  test("binary evaluation metrics with weights") {
+    val w1 = 1.5
+    val w2 = 0.7
+    val w3 = 0.4
+    val scoreAndLabelsWithWeights = sc.parallelize(
+      Seq((0.1, 0.0, w1), (0.1, 1.0, w2), (0.4, 0.0, w1), (0.6, 0.0, w3),
+        (0.6, 1.0, w2), (0.6, 1.0, w2), (0.8, 1.0, w1)), 2)
+    val metrics = new BinaryClassificationMetrics(scoreAndLabelsWithWeights, 0)
+    val thresholds = Seq(0.8, 0.6, 0.4, 0.1)
+    val numTruePositives =
+      Seq(1 * w1, 1 * w1 + 2 * w2, 1 * w1 + 2 * w2, 3 * w2 + 1 * w1)
+    val numFalsePositives = Seq(0.0, 1.0 * w3, 1.0 * w1 + 1.0 * w3, 1.0 * w3 + 2.0 * w1)
+    val numPositives = 3 * w2 + 1 * w1
+    val numNegatives = 2 * w1 + w3
+    val precisions = numTruePositives.zip(numFalsePositives).map { case (t, f) =>
+      t.toDouble / (t + f)
+    }
+    val recalls = numTruePositives.map(_ / numPositives)
+    val fpr = numFalsePositives.map(_ / numNegatives)
+    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recalls) ++ Seq((1.0, 1.0))
+    val pr = recalls.zip(precisions)
+    val prCurve = Seq((0.0, 1.0)) ++ pr
+    val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
+    val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
+
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
+  }
+
   test("binary evaluation metrics for RDD where all examples have positive label") {
     val scoreAndLabels = sc.parallelize(Seq((0.5, 1.0), (0.5, 1.0)), 2)
     val metrics = new BinaryClassificationMetrics(scoreAndLabels)
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index f563a2d4d283..0f70860ceaf0 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -106,7 +106,7 @@ def isLargerBetter(self):
 
 
 @inherit_doc
-class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPredictionCol,
+class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPredictionCol, HasWeightCol,
                                     JavaMLReadable, JavaMLWritable):
     """
     .. note:: Experimental
@@ -130,6 +130,16 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     >>> evaluator2 = BinaryClassificationEvaluator.load(bce_path)
     >>> str(evaluator2.getRawPredictionCol())
     'raw'
+    >>> scoreAndLabelsAndWeight = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1], x[2]),
+    ...    [(0.1, 0.0, 1.0), (0.1, 1.0, 0.9), (0.4, 0.0, 0.7), (0.6, 0.0, 0.9),
+    ...     (0.6, 1.0, 1.0), (0.6, 1.0, 0.3), (0.8, 1.0, 1.0)])
+    >>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["raw", "label", "weight"])
+    ...
+    >>> evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw", weightCol="weight")
+    >>> evaluator.evaluate(dataset)
+    0.70...
+    >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
+    0.82...
 
     .. versionadded:: 1.4.0
     """
@@ -140,10 +150,10 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
 
     @keyword_only
     def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
-                 metricName="areaUnderROC"):
+                 metricName="areaUnderROC", weightCol=None):
         """
         __init__(self, rawPredictionCol="rawPrediction", labelCol="label", \
-                 metricName="areaUnderROC")
+                 metricName="areaUnderROC", weightCol=None)
         """
         super(BinaryClassificationEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -169,10 +179,10 @@ def getMetricName(self):
     @keyword_only
     @since("1.4.0")
     def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
-                  metricName="areaUnderROC"):
+                  metricName="areaUnderROC", weightCol=None):
         """
         setParams(self, rawPredictionCol="rawPrediction", labelCol="label", \
-                  metricName="areaUnderROC")
+                  metricName="areaUnderROC", weightCol=None)
         Sets params for binary classification evaluator.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index b0283941171a..5d8d20dcfcfc 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -30,7 +30,7 @@ class BinaryClassificationMetrics(JavaModelWrapper):
     """
     Evaluator for binary classification.
 
-    :param scoreAndLabels: an RDD of (score, label) pairs
+    :param scoreAndLabels: an RDD of score, label and optional weight.
 
     >>> scoreAndLabels = sc.parallelize([
     ...     (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)], 2)
@@ -40,6 +40,14 @@ class BinaryClassificationMetrics(JavaModelWrapper):
     >>> metrics.areaUnderPR
     0.83...
     >>> metrics.unpersist()
+    >>> scoreAndLabelsWithOptWeight = sc.parallelize([
+    ...     (0.1, 0.0, 1.0), (0.1, 1.0, 0.4), (0.4, 0.0, 0.2), (0.6, 0.0, 0.6), (0.6, 1.0, 0.9),
+    ...     (0.6, 1.0, 0.5), (0.8, 1.0, 0.7)], 2)
+    >>> metrics = BinaryClassificationMetrics(scoreAndLabelsWithOptWeight)
+    >>> metrics.areaUnderROC
+    0.79...
+    >>> metrics.areaUnderPR
+    0.88...
 
     .. versionadded:: 1.4.0
     """
@@ -47,9 +55,13 @@ class BinaryClassificationMetrics(JavaModelWrapper):
     def __init__(self, scoreAndLabels):
         sc = scoreAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
-        df = sql_ctx.createDataFrame(scoreAndLabels, schema=StructType([
+        numCol = len(scoreAndLabels.first())
+        schema = StructType([
             StructField("score", DoubleType(), nullable=False),
-            StructField("label", DoubleType(), nullable=False)]))
+            StructField("label", DoubleType(), nullable=False)])
+        if numCol == 3:
+            schema.add("weight", DoubleType(), False)
+        df = sql_ctx.createDataFrame(scoreAndLabels, schema=schema)
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
         java_model = java_class(df._jdf)
         super(BinaryClassificationMetrics, self).__init__(java_model)
@@ -162,7 +174,7 @@ class MulticlassMetrics(JavaModelWrapper):
     """
     Evaluator for multiclass classification.
 
-    :param predAndLabelsWithOptWeight: an RDD of prediction, label and optional weight.
+    :param predictionAndLabels: an RDD of prediction, label and optional weight.
 
     >>> predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
     ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)])
@@ -223,16 +235,16 @@ class MulticlassMetrics(JavaModelWrapper):
     .. versionadded:: 1.4.0
     """
 
-    def __init__(self, predAndLabelsWithOptWeight):
-        sc = predAndLabelsWithOptWeight.ctx
+    def __init__(self, predictionAndLabels):
+        sc = predictionAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
-        numCol = len(predAndLabelsWithOptWeight.first())
+        numCol = len(predictionAndLabels.first())
         schema = StructType([
             StructField("prediction", DoubleType(), nullable=False),
             StructField("label", DoubleType(), nullable=False)])
-        if (numCol == 3):
+        if numCol == 3:
             schema.add("weight", DoubleType(), False)
-        df = sql_ctx.createDataFrame(predAndLabelsWithOptWeight, schema)
+        df = sql_ctx.createDataFrame(predictionAndLabels, schema)
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.MulticlassMetrics
         java_model = java_class(df._jdf)
         super(MulticlassMetrics, self).__init__(java_model)

From 4baa2d4449e103b15370d284b0ffdf09b4a9c1b7 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 25 Feb 2019 16:20:06 -0800
Subject: [PATCH 0526/1072] [SPARK-26673][FOLLOWUP][SQL] File Source V2: check
 existence of output path before delete it

## What changes were proposed in this pull request?
This is a followup PR to resolve comment: https://github.com/apache/spark/pull/23601#pullrequestreview-207101115

When Spark writes DataFrame with "overwrite" mode, it deletes the output path before actual writes. To safely handle the case that the output path doesn't exist,  it is suggested to follow the V1 code by checking the existence.

## How was this patch tested?

Apply https://github.com/apache/spark/pull/23836 and run unit tests

Closes #23889 from gengliangwang/checkFileBeforeOverwrite.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/execution/datasources/v2/FileWriteBuilder.scala      | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index 6a94248a6f0f..75c922424e8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
+import java.io.IOException
 import java.util.UUID
 
 import scala.collection.JavaConverters._
@@ -83,7 +84,9 @@ abstract class FileWriteBuilder(options: DataSourceOptions)
         null
 
       case SaveMode.Overwrite =>
-        committer.deleteWithJob(fs, path, true)
+        if (fs.exists(path) && !committer.deleteWithJob(fs, path, true)) {
+          throw new IOException(s"Unable to clear directory $path prior to writing to it")
+        }
         committer.setupJob(job)
         new FileBatchWrite(job, description, committer)
 

From 52a180f25fced92cb504a7e013dcc3b7c1f86625 Mon Sep 17 00:00:00 2001
From: liupengcheng <liupengcheng@xiaomi.com>
Date: Mon, 25 Feb 2019 16:40:46 -0800
Subject: [PATCH 0527/1072] [SPARK-26674][CORE] Consolidate CompositeByteBuf
 when reading large frame

## What changes were proposed in this pull request?

Currently, TransportFrameDecoder will not consolidate the buffers read from network which may cause memory waste. Actually, bytebuf's writtenIndex is far less than it's capacity  in most cases, so we can optimize it by doing consolidation.

This PR will do this optimization.

Related codes:
https://github.com/apache/spark/blob/9a30e23211e165a44acc0dbe19693950f7a7cc73/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java#L143

## How was this patch tested?

UT

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23602 from liupc/Reduce-memory-consumption-in-TransportFrameDecoder.

Lead-authored-by: liupengcheng <liupengcheng@xiaomi.com>
Co-authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../network/util/TransportFrameDecoder.java   | 77 +++++++++++++++----
 .../util/TransportFrameDecoderSuite.java      | 67 ++++++++++++++++
 2 files changed, 127 insertions(+), 17 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
index 8e73ab077a5c..1980361a1552 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
@@ -19,6 +19,7 @@
 
 import java.util.LinkedList;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.CompositeByteBuf;
@@ -48,14 +49,30 @@ public class TransportFrameDecoder extends ChannelInboundHandlerAdapter {
   private static final int LENGTH_SIZE = 8;
   private static final int MAX_FRAME_SIZE = Integer.MAX_VALUE;
   private static final int UNKNOWN_FRAME_SIZE = -1;
+  private static final long CONSOLIDATE_THRESHOLD = 20 * 1024 * 1024;
 
   private final LinkedList<ByteBuf> buffers = new LinkedList<>();
   private final ByteBuf frameLenBuf = Unpooled.buffer(LENGTH_SIZE, LENGTH_SIZE);
+  private final long consolidateThreshold;
+
+  private CompositeByteBuf frameBuf = null;
+  private long consolidatedFrameBufSize = 0;
+  private int consolidatedNumComponents = 0;
 
   private long totalSize = 0;
   private long nextFrameSize = UNKNOWN_FRAME_SIZE;
+  private int frameRemainingBytes = UNKNOWN_FRAME_SIZE;
   private volatile Interceptor interceptor;
 
+  public TransportFrameDecoder() {
+    this(CONSOLIDATE_THRESHOLD);
+  }
+
+  @VisibleForTesting
+  TransportFrameDecoder(long consolidateThreshold) {
+    this.consolidateThreshold = consolidateThreshold;
+  }
+
   @Override
   public void channelRead(ChannelHandlerContext ctx, Object data) throws Exception {
     ByteBuf in = (ByteBuf) data;
@@ -123,30 +140,56 @@ private long decodeFrameSize() {
 
   private ByteBuf decodeNext() {
     long frameSize = decodeFrameSize();
-    if (frameSize == UNKNOWN_FRAME_SIZE || totalSize < frameSize) {
+    if (frameSize == UNKNOWN_FRAME_SIZE) {
       return null;
     }
 
-    // Reset size for next frame.
-    nextFrameSize = UNKNOWN_FRAME_SIZE;
-
-    Preconditions.checkArgument(frameSize < MAX_FRAME_SIZE, "Too large frame: %s", frameSize);
-    Preconditions.checkArgument(frameSize > 0, "Frame length should be positive: %s", frameSize);
+    if (frameBuf == null) {
+      Preconditions.checkArgument(frameSize < MAX_FRAME_SIZE,
+          "Too large frame: %s", frameSize);
+      Preconditions.checkArgument(frameSize > 0,
+          "Frame length should be positive: %s", frameSize);
+      frameRemainingBytes = (int) frameSize;
 
-    // If the first buffer holds the entire frame, return it.
-    int remaining = (int) frameSize;
-    if (buffers.getFirst().readableBytes() >= remaining) {
-      return nextBufferForFrame(remaining);
+      // If buffers is empty, then return immediately for more input data.
+      if (buffers.isEmpty()) {
+        return null;
+      }
+      // Otherwise, if the first buffer holds the entire frame, we attempt to
+      // build frame with it and return.
+      if (buffers.getFirst().readableBytes() >= frameRemainingBytes) {
+        // Reset buf and size for next frame.
+        frameBuf = null;
+        nextFrameSize = UNKNOWN_FRAME_SIZE;
+        return nextBufferForFrame(frameRemainingBytes);
+      }
+      // Other cases, create a composite buffer to manage all the buffers.
+      frameBuf = buffers.getFirst().alloc().compositeBuffer(Integer.MAX_VALUE);
     }
 
-    // Otherwise, create a composite buffer.
-    CompositeByteBuf frame = buffers.getFirst().alloc().compositeBuffer(Integer.MAX_VALUE);
-    while (remaining > 0) {
-      ByteBuf next = nextBufferForFrame(remaining);
-      remaining -= next.readableBytes();
-      frame.addComponent(next).writerIndex(frame.writerIndex() + next.readableBytes());
+    while (frameRemainingBytes > 0 && !buffers.isEmpty()) {
+      ByteBuf next = nextBufferForFrame(frameRemainingBytes);
+      frameRemainingBytes -= next.readableBytes();
+      frameBuf.addComponent(true, next);
     }
-    assert remaining == 0;
+    // If the delta size of frameBuf exceeds the threshold, then we do consolidation
+    // to reduce memory consumption.
+    if (frameBuf.capacity() - consolidatedFrameBufSize > consolidateThreshold) {
+      int newNumComponents = frameBuf.numComponents() - consolidatedNumComponents;
+      frameBuf.consolidate(consolidatedNumComponents, newNumComponents);
+      consolidatedFrameBufSize = frameBuf.capacity();
+      consolidatedNumComponents = frameBuf.numComponents();
+    }
+    if (frameRemainingBytes > 0) {
+      return null;
+    }
+
+    // Reset buf and size for next frame.
+    ByteBuf frame = frameBuf;
+    frameBuf = null;
+    consolidatedFrameBufSize = 0;
+    consolidatedNumComponents = 0;
+    nextFrameSize = UNKNOWN_FRAME_SIZE;
     return frame;
   }
 
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
index 7d40387c5f1a..4b67aa80351d 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
@@ -27,11 +27,15 @@
 import io.netty.channel.ChannelHandlerContext;
 import org.junit.AfterClass;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import static org.junit.Assert.*;
 import static org.mockito.Mockito.*;
 
 public class TransportFrameDecoderSuite {
 
+  private static final Logger logger = LoggerFactory.getLogger(TransportFrameDecoderSuite.class);
   private static Random RND = new Random();
 
   @AfterClass
@@ -47,6 +51,69 @@ public void testFrameDecoding() throws Exception {
     verifyAndCloseDecoder(decoder, ctx, data);
   }
 
+  @Test
+  public void testConsolidationPerf() throws Exception {
+    long[] testingConsolidateThresholds = new long[] {
+        ByteUnit.MiB.toBytes(1),
+        ByteUnit.MiB.toBytes(5),
+        ByteUnit.MiB.toBytes(10),
+        ByteUnit.MiB.toBytes(20),
+        ByteUnit.MiB.toBytes(30),
+        ByteUnit.MiB.toBytes(50),
+        ByteUnit.MiB.toBytes(80),
+        ByteUnit.MiB.toBytes(100),
+        ByteUnit.MiB.toBytes(300),
+        ByteUnit.MiB.toBytes(500),
+        Long.MAX_VALUE };
+    for (long threshold : testingConsolidateThresholds) {
+      TransportFrameDecoder decoder = new TransportFrameDecoder(threshold);
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      List<ByteBuf> retained = new ArrayList<>();
+      when(ctx.fireChannelRead(any())).thenAnswer(in -> {
+        ByteBuf buf = (ByteBuf) in.getArguments()[0];
+        retained.add(buf);
+        return null;
+      });
+
+      // Testing multiple messages
+      int numMessages = 3;
+      long targetBytes = ByteUnit.MiB.toBytes(300);
+      int pieceBytes = (int) ByteUnit.KiB.toBytes(32);
+      for (int i = 0; i < numMessages; i++) {
+        try {
+          long writtenBytes = 0;
+          long totalTime = 0;
+          ByteBuf buf = Unpooled.buffer(8);
+          buf.writeLong(8 + targetBytes);
+          decoder.channelRead(ctx, buf);
+          while (writtenBytes < targetBytes) {
+            buf = Unpooled.buffer(pieceBytes * 2);
+            ByteBuf writtenBuf = Unpooled.buffer(pieceBytes).writerIndex(pieceBytes);
+            buf.writeBytes(writtenBuf);
+            writtenBuf.release();
+            long start = System.currentTimeMillis();
+            decoder.channelRead(ctx, buf);
+            long elapsedTime = System.currentTimeMillis() - start;
+            totalTime += elapsedTime;
+            writtenBytes += pieceBytes;
+          }
+          logger.info("Writing 300MiB frame buf with consolidation of threshold " + threshold
+              + " took " + totalTime + " milis");
+        } finally {
+          for (ByteBuf buf : retained) {
+            release(buf);
+          }
+        }
+      }
+      long totalBytesGot = 0;
+      for (ByteBuf buf : retained) {
+        totalBytesGot += buf.capacity();
+      }
+      assertEquals(numMessages, retained.size());
+      assertEquals(targetBytes * numMessages, totalBytesGot);
+    }
+  }
+
   @Test
   public void testInterception() throws Exception {
     int interceptedReads = 3;

From bc03c8b3faacd23edf40b8e75ffd9abb5881c50c Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Tue, 26 Feb 2019 14:10:54 +0800
Subject: [PATCH 0528/1072] [SPARK-26952][SQL] Row count statics should respect
 the data reported by data source

## What changes were proposed in this pull request?

In data source v2, if the data source scan implemented `SupportsReportStatistics`. `DataSourceV2Relation` should respect the row count reported by the data source.

## How was this patch tested?

New UT test.

Closes #23853 from ConeyLiu/report-row-count.

Authored-by: Xianyang Liu <xianyang.liu@intel.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/DataSourceV2Relation.scala | 23 ++++++-
 .../v2/JavaReportStatisticsDataSource.java    | 65 +++++++++++++++++++
 .../sql/sources/v2/DataSourceV2Suite.scala    | 46 ++++++++++++-
 3 files changed, 130 insertions(+), 4 deletions(-)
 create mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 53677782c95f..891694be4629 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.sources.v2.reader.{Statistics => V2Statistics, _}
 import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.sources.v2.writer._
 
@@ -56,7 +56,7 @@ case class DataSourceV2Relation(
     scan match {
       case r: SupportsReportStatistics =>
         val statistics = r.estimateStatistics()
-        Statistics(sizeInBytes = statistics.sizeInBytes().orElse(conf.defaultSizeInBytes))
+        DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes)
       case _ =>
         Statistics(sizeInBytes = conf.defaultSizeInBytes)
     }
@@ -89,7 +89,7 @@ case class StreamingDataSourceV2Relation(
   override def computeStats(): Statistics = scan match {
     case r: SupportsReportStatistics =>
       val statistics = r.estimateStatistics()
-      Statistics(sizeInBytes = statistics.sizeInBytes().orElse(conf.defaultSizeInBytes))
+      DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes)
     case _ =>
       Statistics(sizeInBytes = conf.defaultSizeInBytes)
   }
@@ -100,4 +100,21 @@ object DataSourceV2Relation {
     val output = table.schema().toAttributes
     DataSourceV2Relation(table, output, options)
   }
+
+  /**
+   * This is used to transform data source v2 statistics to logical.Statistics.
+   */
+  def transformV2Stats(
+      v2Statistics: V2Statistics,
+      defaultRowCount: Option[BigInt],
+      defaultSizeInBytes: Long): Statistics = {
+    val numRows: Option[BigInt] = if (v2Statistics.numRows().isPresent) {
+      Some(v2Statistics.numRows().getAsLong)
+    } else {
+      defaultRowCount
+    }
+    Statistics(
+      sizeInBytes = v2Statistics.sizeInBytes().orElse(defaultSizeInBytes),
+      rowCount = numRows)
+  }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
new file mode 100644
index 000000000000..bbc8492ec4e1
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql.sources.v2;
+
+import java.util.OptionalLong;
+
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableProvider;
+import org.apache.spark.sql.sources.v2.reader.InputPartition;
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.sources.v2.reader.Statistics;
+import org.apache.spark.sql.sources.v2.reader.SupportsReportStatistics;
+
+public class JavaReportStatisticsDataSource implements TableProvider {
+  class MyScanBuilder extends JavaSimpleScanBuilder implements SupportsReportStatistics {
+    @Override
+    public Statistics estimateStatistics() {
+      return new Statistics() {
+        @Override
+        public OptionalLong sizeInBytes() {
+          return OptionalLong.of(80);
+        }
+
+        @Override
+        public OptionalLong numRows() {
+          return OptionalLong.of(10);
+        }
+      };
+    }
+
+    @Override
+    public InputPartition[] planInputPartitions() {
+      InputPartition[] partitions = new InputPartition[2];
+      partitions[0] = new JavaRangeInputPartition(0, 5);
+      partitions[1] = new JavaRangeInputPartition(5, 10);
+      return partitions;
+    }
+  }
+
+  @Override
+  public Table getTable(DataSourceOptions options) {
+    return new JavaSimpleBatchTable() {
+      @Override
+      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+        return new MyScanBuilder();
+      }
+    };
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index 6b5c45e40ab0..b8572448f736 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.sources.v2
 
 import java.io.File
+import java.util.OptionalLong
 
 import test.org.apache.spark.sql.sources.v2._
 
@@ -182,6 +183,24 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test ("statistics report data source") {
+    Seq(classOf[ReportStatisticsDataSource], classOf[JavaReportStatisticsDataSource]).foreach {
+      cls =>
+        withClue(cls.getName) {
+          val df = spark.read.format(cls.getName).load()
+          val logical = df.queryExecution.optimizedPlan.collect {
+            case d: DataSourceV2Relation => d
+          }.head
+
+          val statics = logical.computeStats()
+          assert(statics.rowCount.isDefined && statics.rowCount.get === 10,
+            "Row count statics should be reported by data source")
+          assert(statics.sizeInBytes === 80,
+            "Size in bytes statics should be reported by data source")
+        }
+    }
+  }
+
   test("SPARK-23574: no shuffle exchange with single partition") {
     val df = spark.read.format(classOf[SimpleSinglePartitionSource].getName).load().agg(count("*"))
     assert(df.queryExecution.executedPlan.collect { case e: Exchange => e }.isEmpty)
@@ -621,7 +640,6 @@ object ColumnarReaderFactory extends PartitionReaderFactory {
   }
 }
 
-
 class PartitionAwareDataSource extends TableProvider {
 
   class MyScanBuilder extends SimpleScanBuilder
@@ -689,3 +707,29 @@ class SimpleWriteOnlyDataSource extends SimpleWritableDataSource {
     }
   }
 }
+
+class ReportStatisticsDataSource extends TableProvider {
+
+  class MyScanBuilder extends SimpleScanBuilder
+    with SupportsReportStatistics {
+    override def estimateStatistics(): Statistics = {
+      new Statistics {
+        override def sizeInBytes(): OptionalLong = OptionalLong.of(80)
+
+        override def numRows(): OptionalLong = OptionalLong.of(10)
+      }
+    }
+
+    override def planInputPartitions(): Array[InputPartition] = {
+      Array(RangeInputPartition(0, 5), RangeInputPartition(5, 10))
+    }
+  }
+
+  override def getTable(options: DataSourceOptions): Table = {
+    new SimpleBatchTable {
+      override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+        new MyScanBuilder
+      }
+    }
+  }
+}

From 3d2e55abd0ec7ee06c87681132aa0d8c8f8de3f3 Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Tue, 26 Feb 2019 08:31:02 -0600
Subject: [PATCH 0529/1072] [MINOR][DOCS] Remove Akka leftover

## What changes were proposed in this pull request?
Since Spark 2.0, Akka is not used anymore and Akka related stuff were removed. However there are still some leftover. This PR aims to remove these leftover.

* `/pom.xml` has a comment about Akka, which is not needed anymore.

## How was this patch tested?
Existing tests.

Closes #23885 from seancxmao/remove-akka-leftover.

Authored-by: seancxmao <seancxmao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 pom.xml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/pom.xml b/pom.xml
index f06afba965e1..e66d0b88ed6b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2005,13 +2005,6 @@
                   </requireJavaVersion>
                   <bannedDependencies>
                     <excludes>
-                      <!--
-                        Akka depends on io.netty:netty, which puts classes under the org.jboss.netty
-                        package. This conflicts with the classes in org.jboss.netty:netty
-                        artifact, so we have to ban that artifact here. In Netty 4.x, the classes
-                        are under the io.netty package, so it's fine for us to depend on both
-                        io.netty:netty and io.netty:netty-all.
-                      -->
                       <exclude>org.jboss.netty</exclude>
                       <exclude>org.codehaus.groovy</exclude>
                       <exclude>*:*_2.11</exclude>

From a2a41b7bf2bfdcd1cff242013716ac7bd84bdacd Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 26 Feb 2019 09:08:12 -0600
Subject: [PATCH 0530/1072] [SPARK-26978][CORE][SQL] Avoid magic time constants

## What changes were proposed in this pull request?

In the PR, I propose to refactor existing code related to date/time conversions, and replace constants like `1000` and `1000000` by `DateTimeUtils` constants and transformation functions from `java.util.concurrent.TimeUnit._`.

## How was this patch tested?

The changes are tested by existing test suites.

Closes #23878 from MaxGekk/magic-time-constants.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/BarrierTaskContext.scala | 12 +--
 .../apache/spark/deploy/master/Master.scala   |  5 +-
 .../apache/spark/benchmark/Benchmark.scala    |  4 +-
 .../spark/sql/catalyst/expressions/Cast.scala | 34 +++----
 .../expressions/codegen/CodeGenerator.scala   |  6 +-
 .../expressions/datetimeExpressions.scala     |  8 +-
 .../spark/sql/catalyst/expressions/hash.scala |  6 +-
 .../catalyst/optimizer/finishAnalysis.scala   |  6 +-
 .../plans/logical/EventTimeWatermark.scala    |  4 +-
 .../rules/QueryExecutionMetering.scala        |  4 +-
 .../sql/catalyst/util/DateTimeUtils.scala     | 90 ++++++++++---------
 .../spark/sql/catalyst/util/package.scala     | 20 -----
 .../sql/catalyst/expressions/CastSuite.scala  | 25 +++---
 .../expressions/DateExpressionsSuite.scala    | 17 ++--
 .../org/apache/spark/sql/SparkSession.scala   |  3 +-
 .../sql/execution/DataSourceScanExec.scala    | 10 ++-
 .../apache/spark/sql/execution/SortExec.scala |  7 +-
 .../aggregate/HashAggregateExec.scala         |  9 +-
 .../aggregate/ObjectHashAggregateExec.scala   |  4 +-
 .../execution/basicPhysicalOperators.scala    |  4 +-
 .../spark/sql/execution/command/ddl.scala     |  3 +-
 .../exchange/BroadcastExchangeExec.scala      |  6 +-
 .../joins/ShuffledHashJoinExec.scala          |  4 +-
 .../streaming/EventTimeWatermarkExec.scala    |  3 +-
 .../streaming/FileStreamSource.scala          |  5 +-
 .../execution/streaming/GroupStateImpl.scala  |  3 +-
 .../streaming/ProgressReporter.scala          |  8 +-
 .../continuous/ContinuousTrigger.scala        |  2 +-
 .../spark/sql/streaming/ProcessingTime.scala  |  2 +-
 .../apache/spark/sql/DateFunctionsSuite.scala | 47 +++++-----
 .../sources/TextSocketStreamSuite.scala       |  4 +-
 .../streaming/EventTimeWatermarkSuite.scala   |  9 +-
 .../spark/sql/hive/HiveInspectors.scala       |  5 +-
 .../sql/hive/client/HiveClientImpl.scala      |  9 +-
 .../spark/sql/hive/client/HiveShim.scala      |  2 +-
 .../util/RateLimitedOutputStream.scala        |  2 +-
 36 files changed, 210 insertions(+), 182 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index 6a497afac444..2d842b98ead4 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -109,8 +109,8 @@ class BarrierTaskContext private[spark] (
       override def run(): Unit = {
         logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt $stageAttemptNumber) waiting " +
           s"under the global sync since $startTime, has been waiting for " +
-          s"${(System.currentTimeMillis() - startTime) / 1000} seconds, current barrier epoch " +
-          s"is $barrierEpoch.")
+          s"${MILLISECONDS.toSeconds(System.currentTimeMillis() - startTime)} seconds, " +
+          s"current barrier epoch is $barrierEpoch.")
       }
     }
     // Log the update of global sync every 60 seconds.
@@ -126,14 +126,14 @@ class BarrierTaskContext private[spark] (
       barrierEpoch += 1
       logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt $stageAttemptNumber) finished " +
         "global sync successfully, waited for " +
-        s"${(System.currentTimeMillis() - startTime) / 1000} seconds, current barrier epoch is " +
-        s"$barrierEpoch.")
+        s"${MILLISECONDS.toSeconds(System.currentTimeMillis() - startTime)} seconds, " +
+        s"current barrier epoch is $barrierEpoch.")
     } catch {
       case e: SparkException =>
         logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt $stageAttemptNumber) failed " +
           "to perform global sync, waited for " +
-          s"${(System.currentTimeMillis() - startTime) / 1000} seconds, current barrier epoch " +
-          s"is $barrierEpoch.")
+          s"${MILLISECONDS.toSeconds(System.currentTimeMillis() - startTime)} seconds, " +
+          s"current barrier epoch is $barrierEpoch.")
         throw e
     } finally {
       timerTask.cancel()
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index b26da8a467fd..3dd804b6c8af 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -994,9 +994,10 @@ private[deploy] class Master(
     val toRemove = workers.filter(_.lastHeartbeat < currentTime - workerTimeoutMs).toArray
     for (worker <- toRemove) {
       if (worker.state != WorkerState.DEAD) {
+        val workerTimeoutSecs = TimeUnit.MILLISECONDS.toSeconds(workerTimeoutMs)
         logWarning("Removing %s because we got no heartbeat in %d seconds".format(
-          worker.id, workerTimeoutMs / 1000))
-        removeWorker(worker, s"Not receiving heartbeat for ${workerTimeoutMs / 1000} seconds")
+          worker.id, workerTimeoutSecs))
+        removeWorker(worker, s"Not receiving heartbeat for $workerTimeoutSecs seconds")
       } else {
         if (worker.lastHeartbeat < currentTime - ((reaperIterations + 1) * workerTimeoutMs)) {
           workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it
diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
index bb389cdb39df..df1ed2847790 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
@@ -148,13 +148,13 @@ private[spark] class Benchmark(
 
       if (outputPerIteration) {
         // scalastyle:off
-        println(s"Iteration $i took ${runTime / 1000} microseconds")
+        println(s"Iteration $i took ${NANOSECONDS.toMicros(runTime)} microseconds")
         // scalastyle:on
       }
       i += 1
     }
     // scalastyle:off
-    println(s"  Stopped after $i iterations, ${runTimes.sum / 1000000} ms")
+    println(s"  Stopped after $i iterations, ${NANOSECONDS.toMillis(runTimes.sum)} ms")
     // scalastyle:on
     val best = runTimes.min
     val avg = runTimes.sum / runTimes.size
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index b20249fcb4e5..d591c588a95e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.math.{BigDecimal => JavaBigDecimal}
+import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
@@ -25,6 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper}
@@ -374,7 +376,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case ByteType =>
       buildCast[Byte](_, b => longToTimestamp(b.toLong))
     case DateType =>
-      buildCast[Int](_, d => DateTimeUtils.daysToMillis(d, timeZone) * 1000)
+      buildCast[Int](_, d => MILLISECONDS.toMicros(DateTimeUtils.daysToMillis(d, timeZone)))
     // TimestampWritable.decimalToTimestamp
     case DecimalType() =>
       buildCast[Decimal](_, d => decimalToTimestamp(d))
@@ -387,21 +389,21 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   }
 
   private[this] def decimalToTimestamp(d: Decimal): Long = {
-    (d.toBigDecimal * 1000000L).longValue()
+    (d.toBigDecimal * MICROS_PER_SECOND).longValue()
   }
   private[this] def doubleToTimestamp(d: Double): Any = {
-    if (d.isNaN || d.isInfinite) null else (d * 1000000L).toLong
+    if (d.isNaN || d.isInfinite) null else (d * MICROS_PER_SECOND).toLong
   }
 
   // converting seconds to us
-  private[this] def longToTimestamp(t: Long): Long = t * 1000000L
+  private[this] def longToTimestamp(t: Long): Long = SECONDS.toMicros(t)
   // converting us to seconds
   private[this] def timestampToLong(ts: Long): Long = {
-    Math.floorDiv(ts, DateTimeUtils.MICROS_PER_SECOND)
+    Math.floorDiv(ts, MICROS_PER_SECOND)
   }
   // converting us to seconds in double
   private[this] def timestampToDouble(ts: Long): Double = {
-    ts / 1000000.0
+    ts / MICROS_PER_SECOND.toDouble
   }
 
   // DateConverter
@@ -411,7 +413,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case TimestampType =>
       // throw valid precision more than seconds, according to Hive.
       // Timestamp.nanos is in 0 to 999,999,999, no more than a second.
-      buildCast[Long](_, t => DateTimeUtils.millisToDays(t / 1000L, timeZone))
+      buildCast[Long](_, t => DateTimeUtils.millisToDays(MICROSECONDS.toMillis(t), timeZone))
   }
 
   // IntervalConverter
@@ -927,7 +929,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
       (c, evPrim, evNull) =>
         code"""$evPrim =
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToDays($c / 1000L, $tz);"""
+          org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToDays(
+            $c / $MICROS_PER_MILLIS, $tz);"""
     case _ =>
       (c, evPrim, evNull) => code"$evNull = true;"
   }
@@ -1034,7 +1037,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
       (c, evPrim, evNull) =>
         code"""$evPrim =
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMillis($c, $tz) * 1000;"""
+          org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMillis(
+            $c, $tz) * $MICROS_PER_MILLIS;"""
     case DecimalType() =>
       (c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};"
     case DoubleType =>
@@ -1043,7 +1047,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
           if (Double.isNaN($c) || Double.isInfinite($c)) {
             $evNull = true;
           } else {
-            $evPrim = (long)($c * 1000000L);
+            $evPrim = (long)($c * $MICROS_PER_SECOND);
           }
         """
     case FloatType =>
@@ -1052,7 +1056,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
           if (Float.isNaN($c) || Float.isInfinite($c)) {
             $evNull = true;
           } else {
-            $evPrim = (long)($c * 1000000L);
+            $evPrim = (long)($c * $MICROS_PER_SECOND);
           }
         """
   }
@@ -1069,14 +1073,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   }
 
   private[this] def decimalToTimestampCode(d: ExprValue): Block = {
-    val block = inline"new java.math.BigDecimal(1000000L)"
+    val block = inline"new java.math.BigDecimal($MICROS_PER_SECOND)"
     code"($d.toBigDecimal().bigDecimal().multiply($block)).longValue()"
   }
-  private[this] def longToTimeStampCode(l: ExprValue): Block = code"$l * 1000000L"
+  private[this] def longToTimeStampCode(l: ExprValue): Block = code"$l * (long)$MICROS_PER_SECOND"
   private[this] def timestampToIntegerCode(ts: ExprValue): Block =
-    code"java.lang.Math.floorDiv($ts, 1000000L)"
+    code"java.lang.Math.floorDiv($ts, $MICROS_PER_SECOND)"
   private[this] def timestampToDoubleCode(ts: ExprValue): Block =
-    code"$ts / 1000000.0"
+    code"$ts / (double)$MICROS_PER_SECOND"
 
   private[this] def castToBooleanCode(from: DataType): CastFunction = from match {
     case StringType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 7c8f7cd4315b..b9365f005234 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -32,7 +32,7 @@ import org.codehaus.commons.compiler.CompileException
 import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, InternalCompilerException, SimpleCompiler}
 import org.codehaus.janino.util.ClassFile
 
-import org.apache.spark.{SparkEnv, TaskContext, TaskKilledException}
+import org.apache.spark.{TaskContext, TaskKilledException}
 import org.apache.spark.executor.InputMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.CodegenMetrics
@@ -40,10 +40,10 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
-import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types._
 import org.apache.spark.util.{ParentClassLoader, Utils}
 
@@ -1372,7 +1372,7 @@ object CodeGenerator extends Logging {
           val startTime = System.nanoTime()
           val result = doCompile(code)
           val endTime = System.nanoTime()
-          def timeMs: Double = (endTime - startTime).toDouble / 1000000
+          def timeMs: Double = (endTime - startTime).toDouble / NANOS_PER_MILLIS
           CodegenMetrics.METRIC_SOURCE_CODE_SIZE.update(code.body.length)
           CodegenMetrics.METRIC_COMPILATION_TIME.update(timeMs.toLong)
           logInfo(s"Code generated in $timeMs ms")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index ec595023ef57..4cb0031f2af0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -694,7 +694,7 @@ abstract class UnixTime
             $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
             if (!${ev.isNull}) {
               try {
-                ${ev.value} = $formatterName.parse(${eval1.value}.toString()) / 1000000L;
+                ${ev.value} = $formatterName.parse(${eval1.value}.toString()) / $MICROS_PER_SECOND;
               } catch (java.lang.IllegalArgumentException e) {
                 ${ev.isNull} = true;
               } catch (java.text.ParseException e) {
@@ -714,7 +714,7 @@ abstract class UnixTime
           s"""
             try {
               ${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $tz, $locale)
-                .parse($string.toString()) / 1000000L;
+                .parse($string.toString()) / $MICROS_PER_SECOND;
             } catch (java.lang.IllegalArgumentException e) {
               ${ev.isNull} = true;
             } catch (java.text.ParseException e) {
@@ -733,7 +733,7 @@ abstract class UnixTime
           boolean ${ev.isNull} = ${eval1.isNull};
           $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
           if (!${ev.isNull}) {
-            ${ev.value} = ${eval1.value} / 1000000L;
+            ${ev.value} = ${eval1.value} / $MICROS_PER_SECOND;
           }""")
       case DateType =>
         val tz = ctx.addReferenceObj("timeZone", timeZone)
@@ -744,7 +744,7 @@ abstract class UnixTime
           boolean ${ev.isNull} = ${eval1.isNull};
           $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
           if (!${ev.isNull}) {
-            ${ev.value} = $dtu.daysToMillis(${eval1.value}, $tz) / 1000L;
+            ${ev.value} = $dtu.daysToMillis(${eval1.value}, $tz) / $MILLIS_PER_SECOND;
           }""")
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 742a4f87a9c0..8d17b07eb544 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.math.{BigDecimal, RoundingMode}
 import java.security.{MessageDigest, NoSuchAlgorithmException}
+import java.util.concurrent.TimeUnit._
 import java.util.zip.CRC32
 
 import scala.annotation.tailrec
@@ -30,6 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.hash.Murmur3_x86_32
@@ -863,8 +865,8 @@ object HiveHashFunction extends InterpretedHashFunction {
    * Mimics TimestampWritable.hashCode() in Hive
    */
   def hashTimestamp(timestamp: Long): Long = {
-    val timestampInSeconds = timestamp / 1000000
-    val nanoSecondsPortion = (timestamp % 1000000) * 1000
+    val timestampInSeconds = MICROSECONDS.toSeconds(timestamp)
+    val nanoSecondsPortion = (timestamp % MICROS_PER_SECOND) * NANOS_PER_MICROS
 
     var result = timestampInSeconds
     result <<= 30 // the nanosecond part fits in 30 bits
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index fe196ec7c9d5..4094864f952e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import java.util.concurrent.TimeUnit._
+
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
@@ -65,7 +67,9 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
       case CurrentDate(Some(timeZoneId)) =>
         currentDates.getOrElseUpdate(timeZoneId, {
           Literal.create(
-            DateTimeUtils.millisToDays(timestamp / 1000L, DateTimeUtils.getTimeZone(timeZoneId)),
+            DateTimeUtils.millisToDays(
+              MICROSECONDS.toMillis(timestamp),
+              DateTimeUtils.getTimeZone(timeZoneId)),
             DateType)
         })
       case CurrentTimestamp() => currentTime
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
index 7a927e1e083b..8441c2c481ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
+import java.util.concurrent.TimeUnit
+
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -27,7 +29,7 @@ object EventTimeWatermark {
 
   def getDelayMs(delay: CalendarInterval): Long = {
     // We define month as `31 days` to simplify calculation.
-    val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
+    val millisPerMonth = TimeUnit.MICROSECONDS.toMillis(CalendarInterval.MICROS_PER_DAY) * 31
     delay.milliseconds + delay.months * millisPerMonth
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
index 62f7541150a6..e4d5fa91dc25 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
@@ -21,6 +21,8 @@ import scala.collection.JavaConverters._
 
 import com.google.common.util.concurrent.AtomicLongMap
 
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.NANOS_PER_SECOND
+
 case class QueryExecutionMetering() {
   private val timeMap = AtomicLongMap.create[String]()
   private val numRunsMap = AtomicLongMap.create[String]()
@@ -82,7 +84,7 @@ case class QueryExecutionMetering() {
     s"""
        |=== Metrics of Analyzer/Optimizer Rules ===
        |Total number of runs: $totalNumRuns
-       |Total time: ${totalTime / 1000000000D} seconds
+       |Total time: ${totalTime / NANOS_PER_SECOND.toDouble} seconds
        |
        |$colRuleName $colRunTime $colNumRuns
        |$ruleMetrics
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index d714d29a310b..627670afec9f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -22,7 +22,7 @@ import java.time._
 import java.time.Year.isLeap
 import java.time.temporal.IsoFields
 import java.util.{Locale, TimeZone}
-import java.util.concurrent.TimeUnit
+import java.util.concurrent.TimeUnit._
 
 import scala.util.control.NonFatal
 
@@ -44,14 +44,18 @@ object DateTimeUtils {
   // see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian
   // it's 2440587.5, rounding up to compatible with Hive
   final val JULIAN_DAY_OF_EPOCH = 2440588
-  final val SECONDS_PER_DAY = 60 * 60 * 24L
-  final val MICROS_PER_MILLIS = 1000L
-  final val MICROS_PER_SECOND = MICROS_PER_MILLIS * MILLIS_PER_SECOND
-  final val MILLIS_PER_SECOND = 1000L
-  final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L
-  final val MICROS_PER_DAY = MICROS_PER_SECOND * SECONDS_PER_DAY
-  final val NANOS_PER_MICROS = 1000L
-  final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L
+
+  final val NANOS_PER_MICROS = MICROSECONDS.toNanos(1)
+  final val NANOS_PER_MILLIS = MILLISECONDS.toNanos(1)
+  final val NANOS_PER_SECOND = SECONDS.toNanos(1)
+  final val MICROS_PER_MILLIS = MILLISECONDS.toMicros(1)
+  final val MICROS_PER_SECOND = SECONDS.toMicros(1)
+  final val MICROS_PER_DAY = DAYS.toMicros(1)
+  final val MILLIS_PER_SECOND = SECONDS.toMillis(1)
+  final val MILLIS_PER_MINUTE = MINUTES.toMillis(1)
+  final val MILLIS_PER_HOUR = HOURS.toMillis(1)
+  final val MILLIS_PER_DAY = DAYS.toMillis(1)
+  final val SECONDS_PER_DAY = DAYS.toSeconds(1)
 
   // number of days between 1.1.1970 and 1.1.2001
   final val to2001 = -11323
@@ -133,8 +137,8 @@ object DateTimeUtils {
       micros += MICROS_PER_SECOND
       seconds -= 1
     }
-    val t = new Timestamp(seconds * 1000)
-    t.setNanos(micros.toInt * 1000)
+    val t = new Timestamp(SECONDS.toMillis(seconds))
+    t.setNanos(MICROSECONDS.toNanos(micros).toInt)
     t
   }
 
@@ -143,7 +147,7 @@ object DateTimeUtils {
    */
   def fromJavaTimestamp(t: Timestamp): SQLTimestamp = {
     if (t != null) {
-      t.getTime() * 1000L + (t.getNanos().toLong / 1000) % 1000L
+      MILLISECONDS.toMicros(t.getTime()) + NANOSECONDS.toMicros(t.getNanos()) % NANOS_PER_MICROS
     } else {
       0L
     }
@@ -156,7 +160,7 @@ object DateTimeUtils {
   def fromJulianDay(day: Int, nanoseconds: Long): SQLTimestamp = {
     // use Long to avoid rounding errors
     val seconds = (day - JULIAN_DAY_OF_EPOCH).toLong * SECONDS_PER_DAY
-    seconds * MICROS_PER_SECOND + nanoseconds / 1000L
+    SECONDS.toMicros(seconds) + NANOSECONDS.toMicros(nanoseconds)
   }
 
   /**
@@ -168,7 +172,7 @@ object DateTimeUtils {
     val julian_us = us + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY
     val day = julian_us / MICROS_PER_DAY
     val micros = julian_us % MICROS_PER_DAY
-    (day.toInt, micros * 1000L)
+    (day.toInt, MICROSECONDS.toNanos(micros))
   }
 
   /*
@@ -186,7 +190,7 @@ object DateTimeUtils {
    * Converts milliseconds since epoch to SQLTimestamp.
    */
   def fromMillis(millis: Long): SQLTimestamp = {
-    millis * MICROS_PER_MILLIS
+    MILLISECONDS.toMicros(millis)
   }
 
   /**
@@ -329,7 +333,7 @@ object DateTimeUtils {
         val sign = if (tz.get.toChar == '-') -1 else 1
         ZoneId.ofOffset("GMT", ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8)))
       }
-      val nanoseconds = TimeUnit.MICROSECONDS.toNanos(segments(6))
+      val nanoseconds = MICROSECONDS.toNanos(segments(6))
       val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
       val localDate = if (justTime) {
         LocalDate.now(zoneId)
@@ -346,8 +350,8 @@ object DateTimeUtils {
   }
 
   def instantToMicros(instant: Instant): Long = {
-    val sec = Math.multiplyExact(instant.getEpochSecond, MICROS_PER_SECOND)
-    val result = Math.addExact(sec, instant.getNano / NANOS_PER_MICROS)
+    val us = Math.multiplyExact(instant.getEpochSecond, MICROS_PER_SECOND)
+    val result = Math.addExact(us, NANOSECONDS.toMicros(instant.getNano))
     result
   }
 
@@ -420,14 +424,15 @@ object DateTimeUtils {
   }
 
   private def localTimestamp(microsec: SQLTimestamp, timeZone: TimeZone): SQLTimestamp = {
-    absoluteMicroSecond(microsec) + timeZone.getOffset(microsec / 1000) * 1000L
+    val zoneOffsetUs = MILLISECONDS.toMicros(timeZone.getOffset(MICROSECONDS.toMillis(microsec)))
+    absoluteMicroSecond(microsec) + zoneOffsetUs
   }
 
   /**
    * Returns the hour value of a given timestamp value. The timestamp is expressed in microseconds.
    */
   def getHours(microsec: SQLTimestamp, timeZone: TimeZone): Int = {
-    ((localTimestamp(microsec, timeZone) / MICROS_PER_SECOND / 3600) % 24).toInt
+    (MICROSECONDS.toHours(localTimestamp(microsec, timeZone)) % 24).toInt
   }
 
   /**
@@ -435,7 +440,7 @@ object DateTimeUtils {
    * microseconds.
    */
   def getMinutes(microsec: SQLTimestamp, timeZone: TimeZone): Int = {
-    ((localTimestamp(microsec, timeZone) / MICROS_PER_SECOND / 60) % 60).toInt
+    (MICROSECONDS.toMinutes(localTimestamp(microsec, timeZone)) % 60).toInt
   }
 
   /**
@@ -443,7 +448,7 @@ object DateTimeUtils {
    * microseconds.
    */
   def getSeconds(microsec: SQLTimestamp, timeZone: TimeZone): Int = {
-    ((localTimestamp(microsec, timeZone) / MICROS_PER_SECOND) % 60).toInt
+    (MICROSECONDS.toSeconds(localTimestamp(microsec, timeZone)) % 60).toInt
   }
 
   /**
@@ -560,10 +565,10 @@ object DateTimeUtils {
       months: Int,
       microseconds: Long,
       timeZone: TimeZone): SQLTimestamp = {
-    val days = millisToDays(start / 1000L, timeZone)
+    val days = millisToDays(MICROSECONDS.toMillis(start), timeZone)
     val newDays = dateAddMonths(days, months)
     start +
-      daysToMillis(newDays, timeZone) * 1000L - daysToMillis(days, timeZone) * 1000L +
+      MILLISECONDS.toMicros(daysToMillis(newDays, timeZone) - daysToMillis(days, timeZone)) +
       microseconds
   }
 
@@ -582,8 +587,8 @@ object DateTimeUtils {
       time2: SQLTimestamp,
       roundOff: Boolean,
       timeZone: TimeZone): Double = {
-    val millis1 = time1 / 1000L
-    val millis2 = time2 / 1000L
+    val millis1 = MICROSECONDS.toMillis(time1)
+    val millis2 = MICROSECONDS.toMillis(time2)
     val date1 = millisToDays(millis1, timeZone)
     val date2 = millisToDays(millis2, timeZone)
     val (year1, monthInYear1, dayInMonth1, daysToMonthEnd1) = splitDate(date1)
@@ -599,12 +604,11 @@ object DateTimeUtils {
     }
     // using milliseconds can cause precision loss with more than 8 digits
     // we follow Hive's implementation which uses seconds
-    val secondsInDay1 = (millis1 - daysToMillis(date1, timeZone)) / 1000L
-    val secondsInDay2 = (millis2 - daysToMillis(date2, timeZone)) / 1000L
+    val secondsInDay1 = MILLISECONDS.toSeconds(millis1 - daysToMillis(date1, timeZone))
+    val secondsInDay2 = MILLISECONDS.toSeconds(millis2 - daysToMillis(date2, timeZone))
     val secondsDiff = (dayInMonth1 - dayInMonth2) * SECONDS_PER_DAY + secondsInDay1 - secondsInDay2
-    // 2678400D is the number of seconds in 31 days
-    // every month is considered to be 31 days long in this function
-    val diff = monthDiff + secondsDiff / 2678400D
+    val secondsInMonth = DAYS.toSeconds(31)
+    val diff = monthDiff + secondsDiff / secondsInMonth.toDouble
     if (roundOff) {
       // rounding to 8 digits
       math.round(diff * 1e8) / 1e8
@@ -688,7 +692,7 @@ object DateTimeUtils {
    * Trunc level should be generated using `parseTruncLevel()`, should be between 1 and 8
    */
   def truncTimestamp(t: SQLTimestamp, level: Int, timeZone: TimeZone): SQLTimestamp = {
-    var millis = t / MICROS_PER_MILLIS
+    var millis = MICROSECONDS.toMillis(t)
     val truncated = level match {
       case TRUNC_TO_YEAR =>
         val dDays = millisToDays(millis, timeZone)
@@ -699,13 +703,13 @@ object DateTimeUtils {
       case TRUNC_TO_DAY =>
         val offset = timeZone.getOffset(millis)
         millis += offset
-        millis - millis % (MILLIS_PER_SECOND * SECONDS_PER_DAY) - offset
+        millis - millis % MILLIS_PER_DAY - offset
       case TRUNC_TO_HOUR =>
         val offset = timeZone.getOffset(millis)
         millis += offset
-        millis - millis % (60 * 60 * MILLIS_PER_SECOND) - offset
+        millis - millis % MILLIS_PER_HOUR - offset
       case TRUNC_TO_MINUTE =>
-        millis - millis % (60 * MILLIS_PER_SECOND)
+        millis - millis % MILLIS_PER_MINUTE
       case TRUNC_TO_SECOND =>
         millis - millis % MILLIS_PER_SECOND
       case TRUNC_TO_WEEK =>
@@ -761,8 +765,8 @@ object DateTimeUtils {
       if (guess != offset) {
         // fallback to do the reverse lookup using java.time.LocalDateTime
         // this should only happen near the start or end of DST
-        val localDate = LocalDate.ofEpochDay(TimeUnit.MILLISECONDS.toDays(millisLocal))
-        val localTime = LocalTime.ofNanoOfDay(TimeUnit.MILLISECONDS.toNanos(
+        val localDate = LocalDate.ofEpochDay(MILLISECONDS.toDays(millisLocal))
+        val localTime = LocalTime.ofNanoOfDay(MILLISECONDS.toNanos(
           Math.floorMod(millisLocal, MILLIS_PER_DAY)))
         val localDateTime = LocalDateTime.of(localDate, localTime)
         val millisEpoch = localDateTime.atZone(tz.toZoneId).toInstant.toEpochMilli
@@ -787,15 +791,19 @@ object DateTimeUtils {
       ts
     } else {
       // get the human time using local time zone, that actually is in fromZone.
-      val localTs = ts + localZone.getOffset(ts / 1000L) * 1000L  // in fromZone
-      localTs - getOffsetFromLocalMillis(localTs / 1000L, fromZone) * 1000L
+      val localZoneOffsetMs = localZone.getOffset(MICROSECONDS.toMillis(ts))
+      val localTsUs = ts + MILLISECONDS.toMicros(localZoneOffsetMs)  // in fromZone
+      val offsetFromLocalMs = getOffsetFromLocalMillis(MICROSECONDS.toMillis(localTsUs), fromZone)
+      localTsUs - MILLISECONDS.toMicros(offsetFromLocalMs)
     }
     if (toZone.getID == localZone.getID) {
       utcTs
     } else {
-      val localTs = utcTs + toZone.getOffset(utcTs / 1000L) * 1000L  // in toZone
+      val toZoneOffsetMs = toZone.getOffset(MICROSECONDS.toMillis(utcTs))
+      val localTsUs = utcTs + MILLISECONDS.toMicros(toZoneOffsetMs)  // in toZone
       // treat it as local timezone, convert to UTC (we could get the expected human time back)
-      localTs - getOffsetFromLocalMillis(localTs / 1000L, localZone) * 1000L
+      val offsetFromLocalMs = getOffsetFromLocalMillis(MICROSECONDS.toMillis(localTsUs), localZone)
+      localTsUs - MILLISECONDS.toMicros(offsetFromLocalMs)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index 7f5860e12cfd..a5dbc75d2c0d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -125,18 +125,6 @@ package object util extends Logging {
     new String(out.toByteArray, StandardCharsets.UTF_8)
   }
 
-  def stringOrNull(a: AnyRef): String = if (a == null) null else a.toString
-
-  def benchmark[A](f: => A): A = {
-    val startTime = System.nanoTime()
-    val ret = f
-    val endTime = System.nanoTime()
-    // scalastyle:off println
-    println(s"${(endTime - startTime).toDouble / 1000000}ms")
-    // scalastyle:on println
-    ret
-  }
-
   // Replaces attributes, string literals, complex type extractors with their pretty form so that
   // generated column names don't contain back-ticks or double-quotes.
   def usePrettyExpression(e: Expression): Expression = e transform {
@@ -158,7 +146,6 @@ package object util extends Logging {
 
   def toPrettySQL(e: Expression): String = usePrettyExpression(e).sql
 
-
   def escapeSingleQuotedString(str: String): String = {
     val builder = StringBuilder.newBuilder
 
@@ -203,11 +190,4 @@ package object util extends Logging {
   def truncatedString[T](seq: Seq[T], sep: String, maxFields: Int): String = {
     truncatedString(seq, "", sep, "", maxFields)
   }
-
-  /* FIX ME
-  implicit class debugLogging(a: Any) {
-    def debugLogging() {
-      org.apache.log4j.Logger.getLogger(a.getClass.getName).setLevel(org.apache.log4j.Level.DEBUG)
-    }
-  } */
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 11956e1b7f19..d81250470cb2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -18,9 +18,8 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
-import java.util.{Calendar, Locale, TimeZone}
-
-import scala.util.Random
+import java.util.{Calendar, TimeZone}
+import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
@@ -29,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -321,13 +320,13 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(
         cast(cast(new Timestamp(c.getTimeInMillis), StringType, timeZoneId),
           TimestampType, timeZoneId),
-        c.getTimeInMillis * 1000)
+        MILLISECONDS.toMicros(c.getTimeInMillis))
       c = Calendar.getInstance(TimeZoneGMT)
       c.set(2015, 10, 1, 2, 30, 0)
       checkEvaluation(
         cast(cast(new Timestamp(c.getTimeInMillis), StringType, timeZoneId),
           TimestampType, timeZoneId),
-        c.getTimeInMillis * 1000)
+        MILLISECONDS.toMicros(c.getTimeInMillis))
     }
 
     val gmtId = Option("GMT")
@@ -522,17 +521,17 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast(ts, FloatType), 15.003f)
     checkEvaluation(cast(ts, DoubleType), 15.003)
     checkEvaluation(cast(cast(tss, ShortType), TimestampType),
-      DateTimeUtils.fromJavaTimestamp(ts) * 1000)
+      DateTimeUtils.fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
     checkEvaluation(cast(cast(tss, IntegerType), TimestampType),
-      DateTimeUtils.fromJavaTimestamp(ts) * 1000)
+      DateTimeUtils.fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
     checkEvaluation(cast(cast(tss, LongType), TimestampType),
-      DateTimeUtils.fromJavaTimestamp(ts) * 1000)
+      DateTimeUtils.fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
     checkEvaluation(
-      cast(cast(millis.toFloat / 1000, TimestampType), FloatType),
-      millis.toFloat / 1000)
+      cast(cast(millis.toFloat / MILLIS_PER_SECOND, TimestampType), FloatType),
+      millis.toFloat / MILLIS_PER_SECOND)
     checkEvaluation(
-      cast(cast(millis.toDouble / 1000, TimestampType), DoubleType),
-      millis.toDouble / 1000)
+      cast(cast(millis.toDouble / MILLIS_PER_SECOND, TimestampType), DoubleType),
+      millis.toDouble / MILLIS_PER_SECOND)
     checkEvaluation(
       cast(cast(Decimal(1), TimestampType), DecimalType.SYSTEM_DEFAULT),
       Decimal(1))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index ce576ec4fa0d..8bec32d97f2b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.{Calendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit
+import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
@@ -705,14 +706,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           1000L)
         checkEvaluation(
           UnixTimestamp(Literal(date1), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
-          DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz) / 1000L)
+          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
         checkEvaluation(
           UnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId),
           -1000L)
         checkEvaluation(UnixTimestamp(
           Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId),
-          DateTimeUtils.daysToMillis(
-            DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz) / 1000L)
+          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(
+            DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz)))
         val t1 = UnixTimestamp(
           CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
         val t2 = UnixTimestamp(
@@ -727,7 +728,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           null)
         checkEvaluation(
           UnixTimestamp(Literal(date1), Literal.create(null, StringType), timeZoneId),
-          DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz) / 1000L)
+          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
         checkEvaluation(
           UnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null)
       }
@@ -759,14 +760,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           1000L)
         checkEvaluation(
           ToUnixTimestamp(Literal(date1), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
-          DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz) / 1000L)
+          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
         checkEvaluation(
           ToUnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId),
           -1000L)
         checkEvaluation(ToUnixTimestamp(
           Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId),
-          DateTimeUtils.daysToMillis(
-            DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz) / 1000L)
+          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(
+            DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz)))
         val t1 = ToUnixTimestamp(
           CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
         val t2 = ToUnixTimestamp(
@@ -780,7 +781,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           null)
         checkEvaluation(ToUnixTimestamp(
           Literal(date1), Literal.create(null, StringType), timeZoneId),
-          DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz) / 1000L)
+          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
         checkEvaluation(
           ToUnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index a7bd2ef67eec..f6fab76b5301 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import java.io.Closeable
+import java.util.concurrent.TimeUnit._
 import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.JavaConverters._
@@ -690,7 +691,7 @@ class SparkSession private(
     val ret = f
     val end = System.nanoTime()
     // scalastyle:off println
-    println(s"Time taken: ${(end - start) / 1000 / 1000} ms")
+    println(s"Time taken: ${NANOSECONDS.toMillis(end - start)} ms")
     // scalastyle:on println
     ret
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index f852a5296493..3aed2cecc411 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -17,17 +17,18 @@
 
 package org.apache.spark.sql.execution
 
-import scala.collection.mutable.{ArrayBuffer, HashMap}
+import java.util.concurrent.TimeUnit._
+
+import scala.collection.mutable.HashMap
 
 import org.apache.commons.lang3.StringUtils
-import org.apache.hadoop.fs.{BlockLocation, FileStatus, LocatedFileStatus, Path}
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
@@ -180,7 +181,8 @@ case class FileSourceScanExec(
     val startTime = System.nanoTime()
     val ret = relation.location.listFiles(partitionFilters, dataFilters)
     driverMetrics("numFiles") = ret.map(_.files.size.toLong).sum
-    val timeTakenMs = ((System.nanoTime() - startTime) + optimizerMetadataTimeNs) / 1000 / 1000
+    val timeTakenMs = NANOSECONDS.toMillis(
+      (System.nanoTime() - startTime) + optimizerMetadataTimeNs)
     driverMetrics("metadataTime") = timeTakenMs
     ret
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
index f1470e45f129..0a955d6a7523 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution
 
+import java.util.concurrent.TimeUnit._
+
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.rdd.RDD
@@ -24,6 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.execution.metric.SQLMetrics
 
 /**
@@ -106,7 +109,7 @@ case class SortExec(
       // figure out how many bytes we spilled for this operator.
       val spillSizeBefore = metrics.memoryBytesSpilled
       val sortedIterator = sorter.sort(iter.asInstanceOf[Iterator[UnsafeRow]])
-      sortTime += sorter.getSortTimeNanos / 1000000
+      sortTime += NANOSECONDS.toMillis(sorter.getSortTimeNanos)
       peakMemory += sorter.getPeakMemoryUsage
       spillSize += metrics.memoryBytesSpilled - spillSizeBefore
       metrics.incPeakExecutionMemory(sorter.getPeakMemoryUsage)
@@ -157,7 +160,7 @@ case class SortExec(
        |   long $spillSizeBefore = $metrics.memoryBytesSpilled();
        |   $addToSorterFuncName();
        |   $sortedIterator = $sorterVariable.sort();
-       |   $sortTime.add($sorterVariable.getSortTimeNanos() / 1000000);
+       |   $sortTime.add($sorterVariable.getSortTimeNanos() / $NANOS_PER_MILLIS);
        |   $peakMemory.add($sorterVariable.getPeakMemoryUsage());
        |   $spillSize.add($metrics.memoryBytesSpilled() - $spillSizeBefore);
        |   $metrics.incPeakExecutionMemory($sorterVariable.getPeakMemoryUsage());
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 23ae1f0e2590..25ff6584360e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.aggregate
 
+import java.util.concurrent.TimeUnit._
+
 import org.apache.spark.TaskContext
 import org.apache.spark.memory.{SparkOutOfMemoryError, TaskMemoryManager}
 import org.apache.spark.rdd.RDD
@@ -28,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
@@ -135,7 +138,7 @@ case class HashAggregateExec(
           aggregationIterator
         }
       }
-      aggTime += (System.nanoTime() - beforeAgg) / 1000000
+      aggTime += NANOSECONDS.toMillis(System.nanoTime() - beforeAgg)
       res
     }
   }
@@ -240,7 +243,7 @@ case class HashAggregateExec(
        |   $initAgg = true;
        |   long $beforeAgg = System.nanoTime();
        |   $doAggFuncName();
-       |   $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
+       |   $aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
        |
        |   // output the result
        |   ${genResult.trim}
@@ -726,7 +729,7 @@ case class HashAggregateExec(
        $initAgg = true;
        long $beforeAgg = System.nanoTime();
        $doAggFuncName();
-       $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
+       $aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
      }
 
      // output the result
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
index 5b340eead39e..151da241144b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.aggregate
 
+import java.util.concurrent.TimeUnit._
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
@@ -132,7 +134,7 @@ case class ObjectHashAggregateExec(
           aggregationIterator
         }
       }
-      aggTime += (System.nanoTime() - beforeAgg) / 1000000
+      aggTime += NANOSECONDS.toMillis(System.nanoTime() - beforeAgg)
       res
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 4352721b46b2..eacd35b0771f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution
 
+import java.util.concurrent.TimeUnit._
+
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 
@@ -684,7 +686,7 @@ case class SubqueryExec(name: String, child: SparkPlan) extends UnaryExecNode {
         // Note that we use .executeCollect() because we don't want to convert data to Scala types
         val rows: Array[InternalRow] = child.executeCollect()
         val beforeBuild = System.nanoTime()
-        longMetric("collectTime") += (beforeBuild - beforeCollect) / 1000000
+        longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
         val dataSize = rows.map(_.asInstanceOf[UnsafeRow].getSizeInBytes.toLong).sum
         longMetric("dataSize") += dataSize
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 096481f68275..bcd8908e11f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.command
 
 import java.util.Locale
+import java.util.concurrent.TimeUnit._
 
 import scala.collection.{GenMap, GenSeq}
 import scala.collection.parallel.ForkJoinTaskSupport
@@ -739,7 +740,7 @@ case class AlterTableRecoverPartitionsCommand(
     // do this in parallel.
     val batchSize = 100
     partitionSpecsAndLocs.toIterator.grouped(batchSize).foreach { batch =>
-      val now = System.currentTimeMillis() / 1000
+      val now = MILLISECONDS.toSeconds(System.currentTimeMillis())
       val parts = batch.map { case (spec, location) =>
         val params = partitionStats.get(location.toString).map {
           case PartitionStatistics(numFiles, totalSize) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index d55d4fa3aa90..b9972b8cf017 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -83,7 +83,7 @@ case class BroadcastExchangeExec(
           }
 
           val beforeBuild = System.nanoTime()
-          longMetric("collectTime") += (beforeBuild - beforeCollect) / 1000000
+          longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
 
           // Construct the relation.
           val relation = mode.transform(input, Some(numRows))
@@ -105,11 +105,11 @@ case class BroadcastExchangeExec(
           }
 
           val beforeBroadcast = System.nanoTime()
-          longMetric("buildTime") += (beforeBroadcast - beforeBuild) / 1000000
+          longMetric("buildTime") += NANOSECONDS.toMillis(beforeBroadcast - beforeBuild)
 
           // Broadcast the relation
           val broadcasted = sparkContext.broadcast(relation)
-          longMetric("broadcastTime") += (System.nanoTime() - beforeBroadcast) / 1000000
+          longMetric("broadcastTime") += NANOSECONDS.toMillis(System.nanoTime() - beforeBroadcast)
 
           SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
           broadcasted
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
index 524804d61e59..a8361fd7dd55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.joins
 
+import java.util.concurrent.TimeUnit._
+
 import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -53,7 +55,7 @@ case class ShuffledHashJoinExec(
     val start = System.nanoTime()
     val context = TaskContext.get()
     val relation = HashedRelation(iter, buildKeys, taskMemoryManager = context.taskMemoryManager())
-    buildTime += (System.nanoTime() - start) / 1000000
+    buildTime += NANOSECONDS.toMillis(System.nanoTime() - start)
     buildDataSize += relation.estimatedSize
     // This relation is usually used until the end of task.
     context.addTaskCompletionListener[Unit](_ => relation.close())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index 6fa7ee0c3818..6d1131e6939d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -99,7 +100,7 @@ case class EventTimeWatermarkExec(
     child.execute().mapPartitions { iter =>
       val getEventTime = UnsafeProjection.create(eventTime :: Nil, child.output)
       iter.map { row =>
-        eventTimeStats.add(getEventTime(row).getLong(0) / 1000)
+        eventTimeStats.add(getEventTime(row).getLong(0) / MICROS_PER_MILLIS)
         row
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 43b70ae0a51b..cef814b5b6d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.execution.streaming
 
 import java.net.URI
-
-import scala.collection.JavaConverters._
+import java.util.concurrent.TimeUnit._
 
 import org.apache.hadoop.fs.{FileStatus, Path}
 
@@ -237,7 +236,7 @@ class FileStreamSource(
       (status.getPath.toUri.toString, status.getModificationTime)
     }
     val endTime = System.nanoTime
-    val listingTimeMs = (endTime.toDouble - startTime) / 1000000
+    val listingTimeMs = NANOSECONDS.toMillis(endTime - startTime)
     if (listingTimeMs > 2000) {
       // Output a warning when listing files uses more than 2 seconds.
       logWarning(s"Listed ${files.size} file(s) in $listingTimeMs ms")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
index 7f65e3ea9dd5..fcb230bd088d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.streaming
 
 import java.sql.Date
+import java.util.concurrent.TimeUnit
 
 import org.apache.commons.lang3.StringUtils
 
@@ -178,7 +179,7 @@ private[sql] class GroupStateImpl[S] private(
       throw new IllegalArgumentException(s"Provided duration ($duration) is not positive")
     }
 
-    val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
+    val millisPerMonth = TimeUnit.MICROSECONDS.toMillis(CalendarInterval.MICROS_PER_DAY) * 31
     cal.milliseconds + cal.months * millisPerMonth
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 25283515b882..859c327d757d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -26,7 +26,7 @@ import scala.collection.mutable
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.{MicroBatchScanExec, StreamingDataSourceV2Relation, StreamWriterCommitProgress}
 import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream
@@ -87,7 +87,7 @@ trait ProgressReporter extends Logging {
   private var lastNoDataProgressEventTime = Long.MinValue
 
   private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
-  timestampFormat.setTimeZone(DateTimeUtils.getTimeZone("UTC"))
+  timestampFormat.setTimeZone(getTimeZone("UTC"))
 
   @volatile
   protected var currentStatus: StreamingQueryStatus = {
@@ -147,10 +147,10 @@ trait ProgressReporter extends Logging {
 
     val executionStats = extractExecutionStats(hasNewData)
     val processingTimeSec =
-      (currentTriggerEndTimestamp - currentTriggerStartTimestamp).toDouble / 1000
+      (currentTriggerEndTimestamp - currentTriggerStartTimestamp).toDouble / MILLIS_PER_SECOND
 
     val inputTimeSec = if (lastTriggerStartTimestamp >= 0) {
-      (currentTriggerStartTimestamp - lastTriggerStartTimestamp).toDouble / 1000
+      (currentTriggerStartTimestamp - lastTriggerStartTimestamp).toDouble / MILLIS_PER_SECOND
     } else {
       Double.NaN
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala
index caffcc3c4c1a..fd0ff31199bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala
@@ -53,7 +53,7 @@ private[sql] object ContinuousTrigger {
     if (cal.months > 0) {
       throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
     }
-    new ContinuousTrigger(cal.microseconds / 1000)
+    new ContinuousTrigger(TimeUnit.MICROSECONDS.toMillis(cal.microseconds))
   }
 
   def apply(interval: Duration): ContinuousTrigger = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
index 236bd55ee621..38b0776ec1fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
@@ -91,7 +91,7 @@ object ProcessingTime {
     if (cal.months > 0) {
       throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
     }
-    new ProcessingTime(cal.microseconds / 1000)
+    new ProcessingTime(TimeUnit.MICROSECONDS.toMillis(cal.microseconds))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 62bb72dd6ea2..b06d52ddebc7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.Locale
+import java.util.concurrent.TimeUnit
 
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.functions._
@@ -515,6 +516,8 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
   }
 
+  private def secs(millis: Long): Long = TimeUnit.MILLISECONDS.toSeconds(millis)
+
   test("unix_timestamp") {
     val date1 = Date.valueOf("2015-07-24")
     val date2 = Date.valueOf("2015-07-25")
@@ -527,21 +530,21 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
     val fmt = "yyyy/MM/dd HH:mm:ss.S"
     val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
     checkAnswer(df.select(unix_timestamp(col("ts"))), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
     checkAnswer(df.select(unix_timestamp(col("ss"))), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
     checkAnswer(df.select(unix_timestamp(col("d"), fmt)), Seq(
-      Row(date1.getTime / 1000L), Row(date2.getTime / 1000L)))
+      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
     checkAnswer(df.select(unix_timestamp(col("s"), fmt)), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
     checkAnswer(df.selectExpr("unix_timestamp(ts)"), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
     checkAnswer(df.selectExpr("unix_timestamp(ss)"), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
     checkAnswer(df.selectExpr(s"unix_timestamp(d, '$fmt')"), Seq(
-      Row(date1.getTime / 1000L), Row(date2.getTime / 1000L)))
+      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
     checkAnswer(df.selectExpr(s"unix_timestamp(s, '$fmt')"), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
 
     val x1 = "2015-07-24 10:00:00"
     val x2 = "2015-25-07 02:02:02"
@@ -552,13 +555,13 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
 
     val df1 = Seq(x1, x2, x3, x4).toDF("x")
     checkAnswer(df1.select(unix_timestamp(col("x"))), Seq(
-      Row(ts1.getTime / 1000L), Row(null), Row(null), Row(null)))
+      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
     checkAnswer(df1.selectExpr("unix_timestamp(x)"), Seq(
-      Row(ts1.getTime / 1000L), Row(null), Row(null), Row(null)))
+      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
     checkAnswer(df1.select(unix_timestamp(col("x"), "yyyy-dd-MM HH:mm:ss")), Seq(
-      Row(null), Row(ts2.getTime / 1000L), Row(null), Row(null)))
+      Row(null), Row(secs(ts2.getTime)), Row(null), Row(null)))
     checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
-      Row(ts4.getTime / 1000L), Row(null), Row(ts3.getTime / 1000L), Row(null)))
+      Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
 
     // invalid format
     checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')"), Seq(
@@ -570,10 +573,12 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
     val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
     val df2 = Seq(y1, y2).toDF("y")
     checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
-      Row(ts5.getTime / 1000L), Row(null)))
+      Row(secs(ts5.getTime)), Row(null)))
 
     val now = sql("select unix_timestamp()").collect().head.getLong(0)
-    checkAnswer(sql(s"select cast ($now as timestamp)"), Row(new java.util.Date(now * 1000)))
+    checkAnswer(
+      sql(s"select cast ($now as timestamp)"),
+      Row(new java.util.Date(TimeUnit.SECONDS.toMillis(now))))
   }
 
   test("to_unix_timestamp") {
@@ -588,13 +593,13 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
     val fmt = "yyyy/MM/dd HH:mm:ss.S"
     val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
     checkAnswer(df.selectExpr("to_unix_timestamp(ts)"), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
     checkAnswer(df.selectExpr("to_unix_timestamp(ss)"), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
     checkAnswer(df.selectExpr(s"to_unix_timestamp(d, '$fmt')"), Seq(
-      Row(date1.getTime / 1000L), Row(date2.getTime / 1000L)))
+      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
     checkAnswer(df.selectExpr(s"to_unix_timestamp(s, '$fmt')"), Seq(
-      Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
+      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
 
     val x1 = "2015-07-24 10:00:00"
     val x2 = "2015-25-07 02:02:02"
@@ -605,9 +610,9 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
 
     val df1 = Seq(x1, x2, x3, x4).toDF("x")
     checkAnswer(df1.selectExpr("to_unix_timestamp(x)"), Seq(
-      Row(ts1.getTime / 1000L), Row(null), Row(null), Row(null)))
+      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
     checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
-      Row(ts4.getTime / 1000L), Row(null), Row(ts3.getTime / 1000L), Row(null)))
+      Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
 
     // february
     val y1 = "2016-02-29"
@@ -615,7 +620,7 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
     val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
     val df2 = Seq(y1, y2).toDF("y")
     checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
-      Row(ts5.getTime / 1000L), Row(null)))
+      Row(secs(ts5.getTime)), Row(null)))
 
     // invalid format
     checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')"), Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index 33c65d784fba..e1769fb0b288 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer
 import java.nio.channels.ServerSocketChannel
 import java.sql.Timestamp
 import java.util.concurrent.LinkedBlockingQueue
+import java.util.concurrent.TimeUnit._
 
 import scala.collection.JavaConverters._
 
@@ -29,7 +30,6 @@ import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
@@ -168,7 +168,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
       // Timestamp for rate stream is round to second which leads to milliseconds lost, that will
       // make batch1stamp smaller than current timestamp if both of them are in the same second.
       // Comparing by second to make sure the correct behavior.
-      assert(batch1Stamp.getTime >= curr / 1000 * 1000)
+      assert(batch1Stamp.getTime >= SECONDS.toMillis(MILLISECONDS.toSeconds(curr)))
       assert(!batch2Stamp.before(batch1Stamp))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index b79770a45220..1ff9dec9a4e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -21,6 +21,7 @@ import java.{util => ju}
 import java.io.File
 import java.text.SimpleDateFormat
 import java.util.{Calendar, Date, Locale}
+import java.util.concurrent.TimeUnit._
 
 import org.apache.commons.io.FileUtils
 import org.scalatest.{BeforeAndAfter, Matchers}
@@ -28,7 +29,6 @@ import org.scalatest.{BeforeAndAfter, Matchers}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Dataset}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.internal.SQLConf
@@ -347,12 +347,13 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     }
 
     testStream(aggWithWatermark)(
-      AddData(input, currentTimeMs / 1000),
+      AddData(input, MILLISECONDS.toSeconds(currentTimeMs)),
       CheckAnswer(),
-      AddData(input, currentTimeMs / 1000),
+      AddData(input, MILLISECONDS.toSeconds(currentTimeMs)),
       CheckAnswer(),
       assertEventStats { e =>
-        assert(timestampFormat.parse(e.get("max")).getTime === (currentTimeMs / 1000) * 1000)
+        assert(timestampFormat.parse(e.get("max")).getTime ===
+          SECONDS.toMillis(MILLISECONDS.toSeconds((currentTimeMs))))
         val watermarkTime = timestampFormat.parse(e.get("watermark"))
         val monthDiff = monthsSinceEpoch(currentTime) - monthsSinceEpoch(watermarkTime)
         // monthsSinceEpoch is like `math.floor(num)`, so monthDiff has two possible values.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 4dec2f71b8a5..178fced08674 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import java.lang.reflect.{ParameterizedType, Type, WildcardType}
+import java.util.concurrent.TimeUnit._
 
 import scala.collection.JavaConverters._
 
@@ -460,7 +461,7 @@ private[hive] trait HiveInspectors {
         _ => constant
       case poi: WritableConstantTimestampObjectInspector =>
         val t = poi.getWritableConstantValue
-        val constant = t.getSeconds * 1000000L + t.getNanos / 1000L
+        val constant = SECONDS.toMicros(t.getSeconds) + NANOSECONDS.toMicros(t.getNanos)
         _ => constant
       case poi: WritableConstantIntObjectInspector =>
         val constant = poi.getWritableConstantValue.get()
@@ -629,7 +630,7 @@ private[hive] trait HiveInspectors {
           data: Any => {
             if (data != null) {
               val t = x.getPrimitiveWritableObject(data)
-              t.getSeconds * 1000000L + t.getNanos / 1000L
+              SECONDS.toMicros(t.getSeconds) + NANOSECONDS.toMicros(t.getNanos)
             } else {
               null
             }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index bfe19c26ef4e..77ac606cd715 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.client
 import java.io.{File, PrintStream}
 import java.lang.{Iterable => JIterable}
 import java.util.{Locale, Map => JMap}
+import java.util.concurrent.TimeUnit._
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -948,8 +949,8 @@ private[hive] object HiveClientImpl {
     hiveTable.setFields(schema.asJava)
     hiveTable.setPartCols(partCols.asJava)
     userName.foreach(hiveTable.setOwner)
-    hiveTable.setCreateTime((table.createTime / 1000).toInt)
-    hiveTable.setLastAccessTime((table.lastAccessTime / 1000).toInt)
+    hiveTable.setCreateTime(MILLISECONDS.toSeconds(table.createTime).toInt)
+    hiveTable.setLastAccessTime(MILLISECONDS.toSeconds(table.lastAccessTime).toInt)
     table.storage.locationUri.map(CatalogUtils.URIToString).foreach { loc =>
       hiveTable.getTTable.getSd.setLocation(loc)}
     table.storage.inputFormat.map(toInputFormat).foreach(hiveTable.setInputFormatClass)
@@ -1012,8 +1013,8 @@ private[hive] object HiveClientImpl {
     tpart.setTableName(ht.getTableName)
     tpart.setValues(partValues.asJava)
     tpart.setSd(storageDesc)
-    tpart.setCreateTime((p.createTime / 1000).toInt)
-    tpart.setLastAccessTime((p.lastAccessTime / 1000).toInt)
+    tpart.setCreateTime(MILLISECONDS.toSeconds(p.createTime).toInt)
+    tpart.setLastAccessTime(MILLISECONDS.toSeconds(p.lastAccessTime).toInt)
     tpart.setParameters(mutable.Map(p.parameters.toSeq: _*).asJava)
     new HivePartition(ht, tpart)
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index a8ebb23f78e2..af5ea59429b5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -516,7 +516,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       f.className,
       null,
       PrincipalType.USER,
-      (System.currentTimeMillis / 1000).toInt,
+      TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis).toInt,
       FunctionType.JAVA,
       resourceUris.asJava)
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
index 29cc1fa00ac0..342f20f47a39 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
@@ -79,7 +79,7 @@ class RateLimitedOutputStream(out: OutputStream, desiredBytesPerSec: Int)
     } else {
       // Calculate how much time we should sleep to bring ourselves to the desired rate.
       val targetTimeInMillis = bytesWrittenSinceSync * 1000 / desiredBytesPerSec
-      val elapsedTimeInMillis = elapsedNanosecs / 1000000
+      val elapsedTimeInMillis = NANOSECONDS.toMillis(elapsedNanosecs)
       val sleepTimeInMillis = targetTimeInMillis - elapsedTimeInMillis
       if (sleepTimeInMillis > 0) {
         logTrace("Natural rate is " + rate + " per second but desired rate is " +

From afbff6446f5e86a4867b3ff993972c5aa9228ba6 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 26 Feb 2019 13:42:07 -0800
Subject: [PATCH 0531/1072] Revert "[SPARK-26742][K8S] Update Kubernetes-Client
 version to 4.1.2"

This reverts commit a3192d966a7a66ee8bb4977d4406ec739cc0ceaa.
---
 dev/deps/spark-deps-hadoop-2.7                         | 7 +++----
 dev/deps/spark-deps-hadoop-3.1                         | 7 +++----
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index fb6792a5c9b7..d41be281a192 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -133,14 +133,13 @@ jta-1.1.jar
 jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-4.0.2.jar
-kubernetes-client-4.1.2.jar
-kubernetes-model-4.1.2.jar
-kubernetes-model-common-4.1.2.jar
+kubernetes-client-4.1.0.jar
+kubernetes-model-4.1.0.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
-logging-interceptor-3.12.0.jar
+logging-interceptor-3.9.1.jar
 lz4-java-1.5.0.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index da55d4cef78e..a6069c5f8ae8 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -147,14 +147,13 @@ kerby-pkix-1.0.1.jar
 kerby-util-1.0.1.jar
 kerby-xdr-1.0.1.jar
 kryo-shaded-4.0.2.jar
-kubernetes-client-4.1.2.jar
-kubernetes-model-4.1.2.jar
-kubernetes-model-common-4.1.2.jar
+kubernetes-client-4.1.0.jar
+kubernetes-model-4.1.0.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
-logging-interceptor-3.12.0.jar
+logging-interceptor-3.9.1.jar
 lz4-java-1.5.0.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 23106cb7ec68..8d594ee8f147 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -29,7 +29,7 @@
   <name>Spark Project Kubernetes</name>
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
-    <kubernetes.client.version>4.1.2</kubernetes.client.version>
+    <kubernetes.client.version>4.1.0</kubernetes.client.version>
   </properties>
 
   <dependencies>
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 91ef0e23bf25..f16b536de514 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -29,7 +29,7 @@
     <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
     <exec-maven-plugin.version>1.4.0</exec-maven-plugin.version>
     <extraScalaTestArgs></extraScalaTestArgs>
-    <kubernetes-client.version>4.1.2</kubernetes-client.version>
+    <kubernetes-client.version>4.1.0</kubernetes-client.version>
     <scala-maven-plugin.version>3.2.2</scala-maven-plugin.version>
     <scalatest-maven-plugin.version>1.0</scalatest-maven-plugin.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>

From c17150a5f5a6c4f4a83ce8c055eab9fea78df08e Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Tue, 26 Feb 2019 14:49:46 -0800
Subject: [PATCH 0532/1072] [SPARK-22860][CORE][YARN] Redact command line
 arguments for running Driver and Executor before logging (standalone and
 YARN)

## What changes were proposed in this pull request?

This patch applies redaction to command line arguments before logging them. This applies to two resource managers: standalone cluster and YARN.

This patch only concerns about arguments starting with `-D` since Spark is likely passing the Spark configuration to command line arguments as `-Dspark.blabla=blabla`. More change is necessary if we also want to handle the case of `--conf spark.blabla=blabla`.

## How was this patch tested?

Added UT for redact logic. This patch only touches how to log so not easy to add UT regarding it.

Closes #23820 from HeartSaVioR/MINOR-redact-command-line-args-for-running-driver-executor.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/deploy/worker/DriverRunner.scala    |  9 ++--
 .../spark/deploy/worker/ExecutorRunner.scala  |  9 ++--
 .../scala/org/apache/spark/util/Utils.scala   | 13 +++++
 .../org/apache/spark/util/UtilsSuite.scala    | 52 +++++++++++++++++++
 .../spark/deploy/yarn/ExecutorRunnable.scala  |  2 +-
 5 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 8c2a907b8689..0c88119441ad 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -193,8 +193,9 @@ private[deploy] class DriverRunner(
       CommandUtils.redirectStream(process.getInputStream, stdout)
 
       val stderr = new File(baseDir, "stderr")
-      val formattedCommand = builder.command.asScala.mkString("\"", "\" \"", "\"")
-      val header = "Launch Command: %s\n%s\n\n".format(formattedCommand, "=" * 40)
+      val redactedCommand = Utils.redactCommandLineArgs(conf, builder.command.asScala)
+        .mkString("\"", "\" \"", "\"")
+      val header = "Launch Command: %s\n%s\n\n".format(redactedCommand, "=" * 40)
       Files.append(header, stderr, StandardCharsets.UTF_8)
       CommandUtils.redirectStream(process.getErrorStream, stderr)
     }
@@ -210,8 +211,10 @@ private[deploy] class DriverRunner(
     val successfulRunDuration = 5
     var keepTrying = !killed
 
+    val redactedCommand = Utils.redactCommandLineArgs(conf, command.command)
+      .mkString("\"", "\" \"", "\"")
     while (keepTrying) {
-      logInfo("Launch Command: " + command.command.mkString("\"", "\" \"", "\""))
+      logInfo("Launch Command: " + redactedCommand)
 
       synchronized {
         if (killed) { return exitCode }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index c74a95718d82..ead28f13b51d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
 import com.google.common.io.Files
 
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
+import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.UI._
@@ -150,8 +150,9 @@ private[deploy] class ExecutorRunner(
       val builder = CommandUtils.buildProcessBuilder(subsCommand, new SecurityManager(conf),
         memory, sparkHome.getAbsolutePath, substituteVariables)
       val command = builder.command()
-      val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"")
-      logInfo(s"Launch command: $formattedCommand")
+      val redactedCommand = Utils.redactCommandLineArgs(conf, command.asScala)
+        .mkString("\"", "\" \"", "\"")
+      logInfo(s"Launch command: $redactedCommand")
 
       builder.directory(executorDir)
       builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))
@@ -171,7 +172,7 @@ private[deploy] class ExecutorRunner(
 
       process = builder.start()
       val header = "Spark Executor Command: %s\n%s\n\n".format(
-        formattedCommand, "=" * 40)
+        redactedCommand, "=" * 40)
 
       // Redirect its stdout and stderr to files
       val stdout = new File(executorDir, "stdout")
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 3065bdf06311..cade0dd88fc7 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -98,6 +98,8 @@ private[spark] object Utils extends Logging {
   /** Scheme used for files that are locally available on worker nodes in the cluster. */
   val LOCAL_SCHEME = "local"
 
+  private val PATTERN_FOR_COMMAND_LINE_ARG = "-D(.+?)=(.+)".r
+
   /** Serialize an object using Java serialization */
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream()
@@ -2617,6 +2619,17 @@ private[spark] object Utils extends Logging {
     redact(redactionPattern, kvs.toArray)
   }
 
+  def redactCommandLineArgs(conf: SparkConf, commands: Seq[String]): Seq[String] = {
+    val redactionPattern = conf.get(SECRET_REDACTION_PATTERN)
+    commands.map {
+      case PATTERN_FOR_COMMAND_LINE_ARG(key, value) =>
+        val (_, newValue) = redact(redactionPattern, Seq((key, value))).head
+        s"-D$key=$newValue"
+
+      case cmd => cmd
+    }
+  }
+
   def stringToSeq(str: String): Seq[String] = {
     str.split(",").map(_.trim()).filter(_.nonEmpty)
   }
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 6c2159eb4534..fdd9771b6e7d 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1016,6 +1016,58 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
 
   }
 
+  test("redact sensitive information in command line args") {
+    val sparkConf = new SparkConf
+
+    // Set some secret keys
+    val secretKeysWithSameValue = Seq(
+      "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD",
+      "spark.my.password",
+      "spark.my.sECreT")
+    val cmdArgsForSecretWithSameValue = secretKeysWithSameValue.map(s => s"-D$s=sensitive_value")
+
+    val secretKeys = secretKeysWithSameValue ++ Seq("spark.your.password")
+    val cmdArgsForSecret = cmdArgsForSecretWithSameValue ++ Seq(
+      // Have '=' twice
+      "-Dspark.your.password=sensitive=sensitive2"
+    )
+
+    val ignoredArgs = Seq(
+      // starts with -D but no assignment
+      "-Ddummy",
+      // secret value contained not starting with -D (we don't care about this case for now)
+      "spark.my.password=sensitive_value",
+      // edge case: not started with -D, but matched pattern after first '-'
+      "--Dspark.my.password=sensitive_value")
+
+    val cmdArgs = cmdArgsForSecret ++ ignoredArgs ++ Seq(
+      // Set a non-secret key
+      "-Dspark.regular.property=regular_value",
+      // Set a property with a regular key but secret in the value
+      "-Dspark.sensitive.property=has_secret_in_value")
+
+    // Redact sensitive information
+    val redactedCmdArgs = Utils.redactCommandLineArgs(sparkConf, cmdArgs)
+
+    // These arguments should be left as they were:
+    // 1) argument without -D option is not applied
+    // 2) -D option without key-value assignment is not applied
+    assert(ignoredArgs.forall(redactedCmdArgs.contains))
+
+    val redactedCmdArgMap = redactedCmdArgs.filterNot(ignoredArgs.contains).map { cmd =>
+      val keyValue = cmd.substring("-D".length).split("=")
+      keyValue(0) -> keyValue.tail.mkString("=")
+    }.toMap
+
+    // Assert that secret information got redacted while the regular property remained the same
+    secretKeys.foreach { key =>
+      assert(redactedCmdArgMap(key) === Utils.REDACTION_REPLACEMENT_TEXT)
+    }
+
+    assert(redactedCmdArgMap("spark.regular.property") === "regular_value")
+    assert(redactedCmdArgMap("spark.sensitive.property") === Utils.REDACTION_REPLACEMENT_TEXT)
+  }
+
   test("tryWithSafeFinally") {
     var e = new Error("Block0")
     val finallyBlockError = new Error("Finally Block")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 2f8f2a0a119c..7046ad74056f 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -76,7 +76,7 @@ private[yarn] class ExecutorRunnable(
     |  env:
     |${Utils.redact(sparkConf, env.toSeq).map { case (k, v) => s"    $k -> $v\n" }.mkString}
     |  command:
-    |    ${commands.mkString(" \\ \n      ")}
+    |    ${Utils.redactCommandLineArgs(sparkConf, commands).mkString(" \\ \n      ")}
     |
     |  resources:
     |${localResources.map { case (k, v) => s"    $k -> $v\n" }.mkString}

From 387efe29b7a5e2c64a898e50bd7dc98b641f7e96 Mon Sep 17 00:00:00 2001
From: Hellsen83 <erik.christiansen83@gmail.com>
Date: Tue, 26 Feb 2019 18:22:36 -0600
Subject: [PATCH 0533/1072] [SPARK-26449][PYTHON] Add transform method to
 DataFrame API

## What changes were proposed in this pull request?

Added .transform() method to Python DataFrame API to be in sync with Scala API.

## How was this patch tested?

Addition has been tested manually.

Closes #23877 from Hellsen83/pyspark-dataframe-transform.

Authored-by: Hellsen83 <erik.christiansen83@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/sql/dataframe.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 472d2969b3e1..75dd9fb42340 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2046,6 +2046,31 @@ def toDF(self, *cols):
         jdf = self._jdf.toDF(self._jseq(cols))
         return DataFrame(jdf, self.sql_ctx)
 
+    @since(3.0)
+    def transform(self, func):
+        """Returns a new class:`DataFrame`. Concise syntax for chaining custom transformations.
+
+        :param func: a function that takes and returns a class:`DataFrame`.
+
+        >>> from pyspark.sql.functions import col
+        >>> df = spark.createDataFrame([(1, 1.0), (2, 2.0)], ["int", "float"])
+        >>> def cast_all_to_int(input_df):
+        ...     return input_df.select([col(col_name).cast("int") for col_name in input_df.columns])
+        >>> def sort_columns_asc(input_df):
+        ...     return input_df.select(*sorted(input_df.columns))
+        >>> df.transform(cast_all_to_int).transform(sort_columns_asc).show()
+        +-----+---+
+        |float|int|
+        +-----+---+
+        |    1|  1|
+        |    2|  2|
+        +-----+---+
+        """
+        result = func(self)
+        assert isinstance(result, DataFrame), "Func returned an instance of type [%s], " \
+                                              "should have been DataFrame." % type(result)
+        return result
+
     @since(1.3)
     def toPandas(self):
         """

From 9c283662c6457413474fdcf005e71fb3dd8a963b Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Tue, 26 Feb 2019 18:26:49 -0600
Subject: [PATCH 0534/1072] [SPARK-26986][ML] Add JAXB reference impl to build
 for Java 9+

## What changes were proposed in this pull request?

Add reference JAXB impl for Java 9+ from Glassfish. Right now it's only apparently necessary in MLlib but can be expanded later.

## How was this patch tested?

Existing tests particularly PMML-related ones, which use JAXB.
This works on Java 11.

Closes #23890 from srowen/SPARK-26986.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 LICENSE-binary                                     | 12 ++++++++++++
 dev/deps/spark-deps-hadoop-2.7                     |  7 +++++++
 dev/deps/spark-deps-hadoop-3.1                     |  7 +++++++
 licenses-binary/LICENSE-istack-commons-runtime.txt | 11 +++++++++++
 licenses-binary/LICENSE-jakarta.activation-api.txt | 11 +++++++++++
 licenses-binary/LICENSE-jakarta.xml.bind-api.txt   | 11 +++++++++++
 licenses-binary/LICENSE-jaxb-runtime.txt           | 11 +++++++++++
 licenses-binary/LICENSE-stax-ex.txt                | 11 +++++++++++
 licenses-binary/LICENSE-txw2.txt                   | 11 +++++++++++
 mllib/pom.xml                                      |  5 +++++
 pom.xml                                            |  8 ++++++++
 11 files changed, 105 insertions(+)
 create mode 100644 licenses-binary/LICENSE-istack-commons-runtime.txt
 create mode 100644 licenses-binary/LICENSE-jakarta.activation-api.txt
 create mode 100644 licenses-binary/LICENSE-jakarta.xml.bind-api.txt
 create mode 100644 licenses-binary/LICENSE-jaxb-runtime.txt
 create mode 100644 licenses-binary/LICENSE-stax-ex.txt
 create mode 100644 licenses-binary/LICENSE-txw2.txt

diff --git a/LICENSE-binary b/LICENSE-binary
index b94ea90de08b..a21f594cbe4e 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -372,6 +372,7 @@ org.eclipse.jetty:jetty-servlets
 org.eclipse.jetty:jetty-util
 org.eclipse.jetty:jetty-webapp
 org.eclipse.jetty:jetty-xml
+com.sun.xml.fastinfoset:FastInfoset
 
 core/src/main/java/org/apache/spark/util/collection/TimSort.java
 core/src/main/resources/org/apache/spark/ui/static/bootstrap*
@@ -487,6 +488,17 @@ org.glassfish.jersey.core:jersey-server
 org.glassfish.jersey.media:jersey-media-jaxb
 
 
+Eclipse Distribution License (EDL) 1.0
+--------------------------------------
+
+org.glassfish.jaxb:jaxb-runtime
+jakarta.xml.bind:jakarta.xml.bind-api
+org.glassfish.jaxb:txw2
+com.sun.istack:istack-commons-runtime
+org.jvnet.staxex:stax-ex
+jakarta.activation:jakarta.activation-api
+
+
 Mozilla Public License (MPL) 1.1
 --------------------------------
 
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index d41be281a192..5eb61f7ae7dd 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -1,3 +1,4 @@
+FastInfoset-1.2.16.jar
 JavaEWAH-0.3.2.jar
 RoaringBitmap-0.5.11.jar
 ST4-4.0.4.jar
@@ -86,6 +87,7 @@ hppc-0.7.2.jar
 htrace-core-3.1.0-incubating.jar
 httpclient-4.5.6.jar
 httpcore-4.4.10.jar
+istack-commons-runtime-3.0.8.jar
 ivy-2.4.0.jar
 jackson-annotations-2.9.6.jar
 jackson-core-2.9.6.jar
@@ -98,6 +100,8 @@ jackson-module-jaxb-annotations-2.9.6.jar
 jackson-module-paranamer-2.9.6.jar
 jackson-module-scala_2.12-2.9.6.jar
 jackson-xc-1.9.13.jar
+jakarta.activation-api-1.2.1.jar
+jakarta.xml.bind-api-2.3.2.jar
 janino-3.0.11.jar
 javassist-3.18.1-GA.jar
 javax.annotation-api-1.2.jar
@@ -107,6 +111,7 @@ javax.servlet-api-3.1.0.jar
 javax.ws.rs-api-2.0.1.jar
 javolution-5.5.1.jar
 jaxb-api-2.2.2.jar
+jaxb-runtime-2.3.2.jar
 jcl-over-slf4j-1.7.16.jar
 jdo-api-3.0.1.jar
 jersey-client-2.22.2.jar
@@ -186,9 +191,11 @@ spire-macros_2.12-0.13.0.jar
 spire_2.12-0.13.0.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
+stax-ex-1.8.1.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
+txw2-2.3.2.jar
 univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
 xbean-asm7-shaded-4.12.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index a6069c5f8ae8..893d0a06c2a0 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -1,3 +1,4 @@
+FastInfoset-1.2.16.jar
 HikariCP-java7-2.4.12.jar
 JavaEWAH-0.3.2.jar
 RoaringBitmap-0.5.11.jar
@@ -85,6 +86,7 @@ hppc-0.7.2.jar
 htrace-core4-4.1.0-incubating.jar
 httpclient-4.5.6.jar
 httpcore-4.4.10.jar
+istack-commons-runtime-3.0.8.jar
 ivy-2.4.0.jar
 jackson-annotations-2.9.6.jar
 jackson-core-2.9.6.jar
@@ -97,6 +99,8 @@ jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations-2.9.6.jar
 jackson-module-paranamer-2.9.6.jar
 jackson-module-scala_2.12-2.9.6.jar
+jakarta.activation-api-1.2.1.jar
+jakarta.xml.bind-api-2.3.2.jar
 janino-3.0.11.jar
 javassist-3.18.1-GA.jar
 javax.annotation-api-1.2.jar
@@ -106,6 +110,7 @@ javax.servlet-api-3.1.0.jar
 javax.ws.rs-api-2.0.1.jar
 javolution-5.5.1.jar
 jaxb-api-2.2.11.jar
+jaxb-runtime-2.3.2.jar
 jcip-annotations-1.0-1.jar
 jcl-over-slf4j-1.7.16.jar
 jdo-api-3.0.1.jar
@@ -203,11 +208,13 @@ snappy-java-1.1.7.1.jar
 spire-macros_2.12-0.13.0.jar
 spire_2.12-0.13.0.jar
 stax-api-1.0.1.jar
+stax-ex-1.8.1.jar
 stax2-api-3.1.4.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 token-provider-1.0.1.jar
+txw2-2.3.2.jar
 univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
 woodstox-core-5.0.3.jar
diff --git a/licenses-binary/LICENSE-istack-commons-runtime.txt b/licenses-binary/LICENSE-istack-commons-runtime.txt
new file mode 100644
index 000000000000..02319e94f5f1
--- /dev/null
+++ b/licenses-binary/LICENSE-istack-commons-runtime.txt
@@ -0,0 +1,11 @@
+Eclipse Distribution License - v 1.0
+Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+Neither the name of the Eclipse Foundation, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-jakarta.activation-api.txt b/licenses-binary/LICENSE-jakarta.activation-api.txt
new file mode 100644
index 000000000000..02319e94f5f1
--- /dev/null
+++ b/licenses-binary/LICENSE-jakarta.activation-api.txt
@@ -0,0 +1,11 @@
+Eclipse Distribution License - v 1.0
+Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+Neither the name of the Eclipse Foundation, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-jakarta.xml.bind-api.txt b/licenses-binary/LICENSE-jakarta.xml.bind-api.txt
new file mode 100644
index 000000000000..02319e94f5f1
--- /dev/null
+++ b/licenses-binary/LICENSE-jakarta.xml.bind-api.txt
@@ -0,0 +1,11 @@
+Eclipse Distribution License - v 1.0
+Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+Neither the name of the Eclipse Foundation, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-jaxb-runtime.txt b/licenses-binary/LICENSE-jaxb-runtime.txt
new file mode 100644
index 000000000000..02319e94f5f1
--- /dev/null
+++ b/licenses-binary/LICENSE-jaxb-runtime.txt
@@ -0,0 +1,11 @@
+Eclipse Distribution License - v 1.0
+Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+Neither the name of the Eclipse Foundation, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-stax-ex.txt b/licenses-binary/LICENSE-stax-ex.txt
new file mode 100644
index 000000000000..02319e94f5f1
--- /dev/null
+++ b/licenses-binary/LICENSE-stax-ex.txt
@@ -0,0 +1,11 @@
+Eclipse Distribution License - v 1.0
+Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+Neither the name of the Eclipse Foundation, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-txw2.txt b/licenses-binary/LICENSE-txw2.txt
new file mode 100644
index 000000000000..02319e94f5f1
--- /dev/null
+++ b/licenses-binary/LICENSE-txw2.txt
@@ -0,0 +1,11 @@
+Eclipse Distribution License - v 1.0
+Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+Neither the name of the Eclipse Foundation, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 0b17345064a7..11769ef548d7 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -121,6 +121,11 @@
       <artifactId>pmml-model</artifactId>
       <scope>compile</scope>
     </dependency>
+    <!-- JPMML seems to be the piece that needs JAXB right now: -->
+    <dependency>
+      <groupId>org.glassfish.jaxb</groupId>
+      <artifactId>jaxb-runtime</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/pom.xml b/pom.xml
index e66d0b88ed6b..dca87f4f1b24 100644
--- a/pom.xml
+++ b/pom.xml
@@ -408,6 +408,14 @@
         </exclusions>
       </dependency>
       <!-- End of shaded deps -->
+
+      <!-- Provide a JAXB impl; no longer auto available in Java 9+ in the JDK -->
+      <dependency>
+        <groupId>org.glassfish.jaxb</groupId>
+        <artifactId>jaxb-runtime</artifactId>
+        <version>2.3.2</version>
+        <scope>runtime</scope>
+      </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-lang3</artifactId>

From 0f2c0b53e8fb18c86c67b5dd679c006db93f94a5 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 27 Feb 2019 12:45:24 +0800
Subject: [PATCH 0535/1072] [SPARK-26837][SQL] Pruning nested fields from
 object serializers

## What changes were proposed in this pull request?

In SPARK-26619, we make change to prune unnecessary individual serializers when serializing objects. This is extension to SPARK-26619. We can further prune nested fields from object serializers if they are not used.

For example, in following query, we only use one field in a struct column:

```scala
val data = Seq((("a", 1), 1), (("b", 2), 2), (("c", 3), 3))
val df = data.toDS().map(t => (t._1, t._2 + 1)).select("_1._1")
```

So, instead of having a serializer to create a two fields struct, we can prune unnecessary field from it. This is what this PR proposes to do.

In order to make this change conservative and safer, a SQL config is added to control it. It is disabled by default.

TODO: Support to prune nested fields inside MapType's key and value.

## How was this patch tested?

Added tests.

Closes #23740 from viirya/nested-pruning-serializer-2.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions}/GetStructFieldObject.scala   |   5 +-
 .../expressions}/ProjectionOverSchema.scala   |   5 +-
 .../catalyst/expressions/SchemaPruning.scala  | 147 ++++++++++++++++++
 .../catalyst/expressions}/SelectedField.scala |   5 +-
 .../sql/catalyst/optimizer/Optimizer.scala    |   8 +-
 .../sql/catalyst/optimizer/objects.scala      | 102 ++++++++++++
 .../apache/spark/sql/internal/SQLConf.scala   |  12 ++
 .../expressions/SchemaPruningSuite.scala      |  62 ++++++++
 .../expressions}/SelectedFieldSuite.scala     |   3 +-
 .../optimizer/ColumnPruningSuite.scala        |  10 --
 .../ObjectSerializerPruningSuite.scala        | 103 ++++++++++++
 .../parquet/ParquetSchemaPruning.scala        | 125 +--------------
 .../spark/sql/DatasetOptimizationSuite.scala  | 107 +++++++++++++
 .../org/apache/spark/sql/DatasetSuite.scala   |  11 --
 14 files changed, 545 insertions(+), 160 deletions(-)
 rename sql/{core/src/main/scala/org/apache/spark/sql/execution => catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions}/GetStructFieldObject.scala (88%)
 rename sql/{core/src/main/scala/org/apache/spark/sql/execution => catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions}/ProjectionOverSchema.scala (94%)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruning.scala
 rename sql/{core/src/main/scala/org/apache/spark/sql/execution => catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions}/SelectedField.scala (97%)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
 rename sql/{core/src/test/scala/org/apache/spark/sql/execution => catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions}/SelectedFieldSuite.scala (99%)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GetStructFieldObject.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/GetStructFieldObject.scala
similarity index 88%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/GetStructFieldObject.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/GetStructFieldObject.scala
index c88b2f8c034f..0bea0cbfa232 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GetStructFieldObject.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/GetStructFieldObject.scala
@@ -15,9 +15,8 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution
+package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField}
 import org.apache.spark.sql.types.StructField
 
 /**
@@ -25,7 +24,7 @@ import org.apache.spark.sql.types.StructField
  * This is in contrast to the [[GetStructField]] case class extractor which returns the field
  * ordinal instead of the field itself.
  */
-private[execution] object GetStructFieldObject {
+object GetStructFieldObject {
   def unapply(getStructField: GetStructField): Option[(Expression, StructField)] =
     Some((
       getStructField.child,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
similarity index 94%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/ProjectionOverSchema.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
index 612a7b87b983..f4956a7e54d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ProjectionOverSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
@@ -15,9 +15,8 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution
+package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
 
 /**
@@ -26,7 +25,7 @@ import org.apache.spark.sql.types._
  * are adjusted to fit the schema. All other expressions are left as-is. This
  * class is motivated by columnar nested schema pruning.
  */
-private[execution] case class ProjectionOverSchema(schema: StructType) {
+case class ProjectionOverSchema(schema: StructType) {
   private val fieldNames = schema.fieldNames.toSet
 
   def unapply(expr: Expression): Option[Expression] = getProjection(expr)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruning.scala
new file mode 100644
index 000000000000..6213267c41c6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruning.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.types._
+
+object SchemaPruning {
+  /**
+   * Filters the schema by the requested fields. For example, if the schema is struct<a:int, b:int>,
+   * and given requested field are "a", the field "b" is pruned in the returned schema.
+   * Note that schema field ordering at original schema is still preserved in pruned schema.
+   */
+  def pruneDataSchema(
+      dataSchema: StructType,
+      requestedRootFields: Seq[RootField]): StructType = {
+    // Merge the requested root fields into a single schema. Note the ordering of the fields
+    // in the resulting schema may differ from their ordering in the logical relation's
+    // original schema
+    val mergedSchema = requestedRootFields
+      .map { case root: RootField => StructType(Array(root.field)) }
+      .reduceLeft(_ merge _)
+    val dataSchemaFieldNames = dataSchema.fieldNames.toSet
+    val mergedDataSchema =
+      StructType(mergedSchema.filter(f => dataSchemaFieldNames.contains(f.name)))
+    // Sort the fields of mergedDataSchema according to their order in dataSchema,
+    // recursively. This makes mergedDataSchema a pruned schema of dataSchema
+    sortLeftFieldsByRight(mergedDataSchema, dataSchema).asInstanceOf[StructType]
+  }
+
+  /**
+   * Sorts the fields and descendant fields of structs in left according to their order in
+   * right. This function assumes that the fields of left are a subset of the fields of
+   * right, recursively. That is, left is a "subschema" of right, ignoring order of
+   * fields.
+   */
+  private def sortLeftFieldsByRight(left: DataType, right: DataType): DataType =
+    (left, right) match {
+      case (ArrayType(leftElementType, containsNull), ArrayType(rightElementType, _)) =>
+        ArrayType(
+          sortLeftFieldsByRight(leftElementType, rightElementType),
+          containsNull)
+      case (MapType(leftKeyType, leftValueType, containsNull),
+          MapType(rightKeyType, rightValueType, _)) =>
+        MapType(
+          sortLeftFieldsByRight(leftKeyType, rightKeyType),
+          sortLeftFieldsByRight(leftValueType, rightValueType),
+          containsNull)
+      case (leftStruct: StructType, rightStruct: StructType) =>
+        val filteredRightFieldNames = rightStruct.fieldNames.filter(leftStruct.fieldNames.contains)
+        val sortedLeftFields = filteredRightFieldNames.map { fieldName =>
+          val leftFieldType = leftStruct(fieldName).dataType
+          val rightFieldType = rightStruct(fieldName).dataType
+          val sortedLeftFieldType = sortLeftFieldsByRight(leftFieldType, rightFieldType)
+          StructField(fieldName, sortedLeftFieldType, nullable = leftStruct(fieldName).nullable)
+        }
+        StructType(sortedLeftFields)
+      case _ => left
+    }
+
+  /**
+   * Returns the set of fields from projection and filtering predicates that the query plan needs.
+   */
+  def identifyRootFields(
+      projects: Seq[NamedExpression],
+      filters: Seq[Expression]): Seq[RootField] = {
+    val projectionRootFields = projects.flatMap(getRootFields)
+    val filterRootFields = filters.flatMap(getRootFields)
+
+    // Kind of expressions don't need to access any fields of a root fields, e.g., `IsNotNull`.
+    // For them, if there are any nested fields accessed in the query, we don't need to add root
+    // field access of above expressions.
+    // For example, for a query `SELECT name.first FROM contacts WHERE name IS NOT NULL`,
+    // we don't need to read nested fields of `name` struct other than `first` field.
+    val (rootFields, optRootFields) = (projectionRootFields ++ filterRootFields)
+      .distinct.partition(!_.prunedIfAnyChildAccessed)
+
+    optRootFields.filter { opt =>
+      !rootFields.exists { root =>
+        root.field.name == opt.field.name && {
+          // Checking if current optional root field can be pruned.
+          // For each required root field, we merge it with the optional root field:
+          // 1. If this optional root field has nested fields and any nested field of it is used
+          //    in the query, the merged field type must equal to the optional root field type.
+          //    We can prune this optional root field. For example, for optional root field
+          //    `struct<name:struct<middle:string,last:string>>`, if its field
+          //    `struct<name:struct<last:string>>` is used, we don't need to add this optional
+          //    root field.
+          // 2. If this optional root field has no nested fields, the merged field type equals
+          //    to the optional root field only if they are the same. If they are, we can prune
+          //    this optional root field too.
+          val rootFieldType = StructType(Array(root.field))
+          val optFieldType = StructType(Array(opt.field))
+          val merged = optFieldType.merge(rootFieldType)
+          merged.sameType(optFieldType)
+        }
+      }
+    } ++ rootFields
+  }
+
+  /**
+   * Gets the root (aka top-level, no-parent) [[StructField]]s for the given [[Expression]].
+   * When expr is an [[Attribute]], construct a field around it and indicate that that
+   * field was derived from an attribute.
+   */
+  private def getRootFields(expr: Expression): Seq[RootField] = {
+    expr match {
+      case att: Attribute =>
+        RootField(StructField(att.name, att.dataType, att.nullable), derivedFromAtt = true) :: Nil
+      case SelectedField(field) => RootField(field, derivedFromAtt = false) :: Nil
+      // Root field accesses by `IsNotNull` and `IsNull` are special cases as the expressions
+      // don't actually use any nested fields. These root field accesses might be excluded later
+      // if there are any nested fields accesses in the query plan.
+      case IsNotNull(SelectedField(field)) =>
+        RootField(field, derivedFromAtt = false, prunedIfAnyChildAccessed = true) :: Nil
+      case IsNull(SelectedField(field)) =>
+        RootField(field, derivedFromAtt = false, prunedIfAnyChildAccessed = true) :: Nil
+      case IsNotNull(_: Attribute) | IsNull(_: Attribute) =>
+        expr.children.flatMap(getRootFields).map(_.copy(prunedIfAnyChildAccessed = true))
+      case _ =>
+        expr.children.flatMap(getRootFields)
+    }
+  }
+
+  /**
+   * This represents a "root" schema field (aka top-level, no-parent). `field` is the
+   * `StructField` for field name and datatype. `derivedFromAtt` indicates whether it
+   * was derived from an attribute or had a proper child. `prunedIfAnyChildAccessed` means
+   * whether this root field can be pruned if any of child field is used in the query.
+   */
+  case class RootField(field: StructField, derivedFromAtt: Boolean,
+    prunedIfAnyChildAccessed: Boolean = false)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
similarity index 97%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/SelectedField.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
index 68f797a856a1..38a04814c231 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SelectedField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
@@ -15,10 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution
+package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
 
 /**
@@ -53,7 +52,7 @@ import org.apache.spark.sql.types._
  * is "name" and its data type is a [[org.apache.spark.sql.types.StructType]] with a single string
  * field named "first".
  */
-private[execution] object SelectedField {
+object SelectedField {
   def unapply(expr: Expression): Option[StructField] = {
     // If this expression is an alias, work on its child instead
     val unaliased = expr match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 38a051c15476..ad258986f785 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -197,7 +197,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       DecimalAggregates) :+
     Batch("Object Expressions Optimization", fixedPoint,
       EliminateMapObjects,
-      CombineTypedFilters) :+
+      CombineTypedFilters,
+      ObjectSerializerPruning) :+
     Batch("LocalRelation", fixedPoint,
       ConvertToLocalRelation,
       PropagateEmptyRelation) :+
@@ -594,11 +595,6 @@ object ColumnPruning extends Rule[LogicalPlan] {
     case d @ DeserializeToObject(_, _, child) if !child.outputSet.subsetOf(d.references) =>
       d.copy(child = prunedChild(child, d.references))
 
-    case p @ Project(_, s: SerializeFromObject) if p.references != s.outputSet =>
-      val usedRefs = p.references
-      val prunedSerializer = s.serializer.filter(usedRefs.contains)
-      p.copy(child = SerializeFromObject(prunedSerializer, s.child))
-
     // Prunes the unused columns from child of Aggregate/Expand/Generate/ScriptTransformation
     case a @ Aggregate(_, _, child) if !child.outputSet.subsetOf(a.references) =>
       a.copy(child = prunedChild(child, a.references))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index 8cdc6425bcad..96a172c5c593 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -17,11 +17,15 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.api.java.function.FilterFunction
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{ArrayType, DataType, StructType}
 
 /*
  * This file defines optimization rules related to object manipulation (for the Dataset API).
@@ -109,3 +113,101 @@ object EliminateMapObjects extends Rule[LogicalPlan] {
      case MapObjects(_, _, _, LambdaVariable(_, _, _, false), inputData, None) => inputData
   }
 }
+
+/**
+ * Prunes unnecessary object serializers from query plan. This rule prunes both individual
+ * serializer and nested fields in serializers.
+ */
+object ObjectSerializerPruning extends Rule[LogicalPlan] {
+
+  /**
+   * Collects all struct types from given data type object, recursively. Supports struct and array
+   * types for now.
+   * TODO(SPARK-26847): support map type.
+   */
+  def collectStructType(dt: DataType, structs: ArrayBuffer[StructType]): ArrayBuffer[StructType] = {
+    dt match {
+      case s @ StructType(fields) =>
+        structs += s
+        fields.map(f => collectStructType(f.dataType, structs))
+      case ArrayType(elementType, _) =>
+        collectStructType(elementType, structs)
+      case _ =>
+    }
+    structs
+  }
+
+  /**
+   * This method prunes given serializer expression by given pruned data type. For example,
+   * given a serializer creating struct(a int, b int) and pruned data type struct(a int),
+   * this method returns pruned serializer creating struct(a int). For now it supports to
+   * prune nested fields in struct and array of struct.
+   * TODO(SPARK-26847): support to prune nested fields in key and value of map type.
+   */
+  def pruneSerializer(
+      serializer: NamedExpression,
+      prunedDataType: DataType): NamedExpression = {
+    val prunedStructTypes = collectStructType(prunedDataType, ArrayBuffer.empty[StructType])
+    var structTypeIndex = 0
+
+    val prunedSerializer = serializer.transformDown {
+      case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
+        val prunedType = prunedStructTypes(structTypeIndex)
+
+        // Filters out the pruned fields.
+        val prunedFields = s.nameExprs.zip(s.valExprs).filter { case (nameExpr, _) =>
+          val name = nameExpr.eval(EmptyRow).toString
+          prunedType.fieldNames.exists { fieldName =>
+            if (SQLConf.get.caseSensitiveAnalysis) {
+              fieldName.equals(name)
+            } else {
+              fieldName.equalsIgnoreCase(name)
+            }
+          }
+        }.flatMap(pair => Seq(pair._1, pair._2))
+
+        structTypeIndex += 1
+        CreateNamedStruct(prunedFields)
+    }.transformUp {
+      // When we change nested serializer data type, `If` expression will be unresolved because
+      // literal null's data type doesn't match now. We need to align it with new data type.
+      // Note: we should do `transformUp` explicitly to change data types.
+      case i @ If(_: IsNull, Literal(null, dt), ser) if !dt.sameType(ser.dataType) =>
+        i.copy(trueValue = Literal(null, ser.dataType))
+    }.asInstanceOf[NamedExpression]
+
+    if (prunedSerializer.dataType.sameType(prunedDataType)) {
+      prunedSerializer
+    } else {
+      serializer
+    }
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case p @ Project(_, s: SerializeFromObject) =>
+      // Prunes individual serializer if it is not used at all by above projection.
+      val usedRefs = p.references
+      val prunedSerializer = s.serializer.filter(usedRefs.contains)
+
+      val rootFields = SchemaPruning.identifyRootFields(p.projectList, Seq.empty)
+
+      if (SQLConf.get.serializerNestedSchemaPruningEnabled && rootFields.nonEmpty) {
+        // Prunes nested fields in serializers.
+        val prunedSchema = SchemaPruning.pruneDataSchema(
+          StructType.fromAttributes(prunedSerializer.map(_.toAttribute)), rootFields)
+        val nestedPrunedSerializer = prunedSerializer.zipWithIndex.map { case (serializer, idx) =>
+          pruneSerializer(serializer, prunedSchema(idx).dataType)
+        }
+
+        // Builds new projection.
+        val projectionOverSchema = ProjectionOverSchema(prunedSchema)
+        val newProjects = p.projectList.map(_.transformDown {
+          case projectionOverSchema(expr) => expr
+        }).map { case expr: NamedExpression => expr }
+        p.copy(projectList = newProjects,
+          child = SerializeFromObject(nestedPrunedSerializer, s.child))
+      } else {
+        p.copy(child = SerializeFromObject(prunedSerializer, s.child))
+      }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index bbb79cdccc4d..e74c2af476ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1528,6 +1528,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED =
+    buildConf("spark.sql.optimizer.serializer.nestedSchemaPruning.enabled")
+      .internal()
+      .doc("Prune nested fields from object serialization operator which are unnecessary in " +
+        "satisfying a query. This optimization allows object serializers to avoid " +
+        "executing unnecessary nested expressions.")
+      .booleanConf
+      .createWithDefault(false)
+
   val TOP_K_SORT_FALLBACK_THRESHOLD =
     buildConf("spark.sql.execution.topKSortFallbackThreshold")
       .internal()
@@ -2077,6 +2086,9 @@ class SQLConf extends Serializable with Logging {
 
   def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)
 
+  def serializerNestedSchemaPruningEnabled: Boolean =
+    getConf(SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED)
+
   def csvColumnPruning: Boolean = getConf(SQLConf.CSV_PARSER_COLUMN_PRUNING)
 
   def legacySizeOfNull: Boolean = getConf(SQLConf.LEGACY_SIZE_OF_NULL)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
new file mode 100644
index 000000000000..c04f59ebb1b1
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types._
+
+class SchemaPruningSuite extends SparkFunSuite {
+  test("prune schema by the requested fields") {
+    def testPrunedSchema(
+        schema: StructType,
+        requestedFields: StructField*): Unit = {
+      val requestedRootFields = requestedFields.map { f =>
+        // `derivedFromAtt` doesn't affect the result of pruned schema.
+        SchemaPruning.RootField(field = f, derivedFromAtt = true)
+      }
+      val expectedSchema = SchemaPruning.pruneDataSchema(schema, requestedRootFields)
+      assert(expectedSchema == StructType(requestedFields))
+    }
+
+    testPrunedSchema(StructType.fromDDL("a int, b int"), StructField("a", IntegerType))
+    testPrunedSchema(StructType.fromDDL("a int, b int"), StructField("b", IntegerType))
+
+    val structOfStruct = StructType.fromDDL("a struct<a:int, b:int>, b int")
+    testPrunedSchema(structOfStruct, StructField("a", StructType.fromDDL("a int, b int")))
+    testPrunedSchema(structOfStruct, StructField("b", IntegerType))
+    testPrunedSchema(structOfStruct, StructField("a", StructType.fromDDL("b int")))
+
+    val arrayOfStruct = StructField("a", ArrayType(StructType.fromDDL("a int, b int, c string")))
+    val mapOfStruct = StructField("d", MapType(StructType.fromDDL("a int, b int, c string"),
+      StructType.fromDDL("d int, e int, f string")))
+
+    val complexStruct = StructType(
+      arrayOfStruct :: StructField("b", structOfStruct) :: StructField("c", IntegerType) ::
+        mapOfStruct :: Nil)
+
+    testPrunedSchema(complexStruct, StructField("a", ArrayType(StructType.fromDDL("b int"))),
+      StructField("b", StructType.fromDDL("a int")))
+    testPrunedSchema(complexStruct,
+      StructField("a", ArrayType(StructType.fromDDL("b int, c string"))),
+      StructField("b", StructType.fromDDL("b int")))
+
+    val selectFieldInMap = StructField("d", MapType(StructType.fromDDL("a int, b int"),
+      StructType.fromDDL("e int, f string")))
+    testPrunedSchema(complexStruct, StructField("c", IntegerType), selectFieldInMap)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SelectedFieldSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
similarity index 99%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/SelectedFieldSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
index 05f7e3ce8388..7cfe4bfbef36 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SelectedFieldSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
@@ -15,14 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution
+package org.apache.spark.sql.catalyst.expressions
 
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.types._
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 73112e3d3bb4..41bc4d8e93c3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -399,15 +399,5 @@ class ColumnPruningSuite extends PlanTest {
     val expected = input.where(rand(0L) > 0.5).where('key < 10).select('key).analyze
     comparePlans(optimized, expected)
   }
-
-  test("SPARK-26619: Prune the unused serializers from SerializeFromObject") {
-    val testRelation = LocalRelation('_1.int, '_2.int)
-    val serializerObject = CatalystSerde.serialize[(Int, Int)](
-      CatalystSerde.deserialize[(Int, Int)](testRelation))
-    val query = serializerObject.select('_1)
-    val optimized = Optimize.execute(query.analyze)
-    val expected = serializerObject.copy(serializer = Seq(serializerObject.serializer.head)).analyze
-    comparePlans(optimized, expected)
-  }
   // todo: add more tests for column pruning
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
new file mode 100644
index 000000000000..dee685afb22f
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class ObjectSerializerPruningSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("Object serializer pruning", FixedPoint(100),
+      ObjectSerializerPruning,
+      RemoveNoopOperators) :: Nil
+  }
+
+  implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]()
+
+  test("collect struct types") {
+    val dataTypes = Seq(
+      IntegerType,
+      ArrayType(IntegerType),
+      StructType.fromDDL("a int, b int"),
+      ArrayType(StructType.fromDDL("a int, b int, c string")),
+      StructType.fromDDL("a struct<a:int, b:int>, b int")
+    )
+
+    val expectedTypes = Seq(
+      Seq.empty[StructType],
+      Seq.empty[StructType],
+      Seq(StructType.fromDDL("a int, b int")),
+      Seq(StructType.fromDDL("a int, b int, c string")),
+      Seq(StructType.fromDDL("a struct<a:int, b:int>, b int"),
+        StructType.fromDDL("a int, b int"))
+    )
+
+    dataTypes.zipWithIndex.foreach { case (dt, idx) =>
+      val structs = ObjectSerializerPruning.collectStructType(dt, ArrayBuffer.empty[StructType])
+      assert(structs === expectedTypes(idx))
+    }
+  }
+
+  test("SPARK-26619: Prune the unused serializers from SerializeFromObject") {
+    val testRelation = LocalRelation('_1.int, '_2.int)
+    val serializerObject = CatalystSerde.serialize[(Int, Int)](
+      CatalystSerde.deserialize[(Int, Int)](testRelation))
+    val query = serializerObject.select('_1)
+    val optimized = Optimize.execute(query.analyze)
+    val expected = serializerObject.copy(serializer = Seq(serializerObject.serializer.head)).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("Prune nested serializers") {
+    withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+      val testRelation = LocalRelation('_1.struct(StructType.fromDDL("_1 int, _2 string")), '_2.int)
+      val serializerObject = CatalystSerde.serialize[((Int, String), Int)](
+        CatalystSerde.deserialize[((Int, String), Int)](testRelation))
+      val query = serializerObject.select($"_1._1")
+      val optimized = Optimize.execute(query.analyze)
+
+      val prunedSerializer = serializerObject.serializer.head.transformDown {
+        case CreateNamedStruct(children) =>
+          CreateNamedStruct(children.take(2))
+      }.transformUp {
+        // Aligns null literal in `If` expression to make it resolvable.
+        case i @ If(_: IsNull, Literal(null, dt), ser) if !dt.sameType(ser.dataType) =>
+          i.copy(trueValue = Literal(null, ser.dataType))
+      }.asInstanceOf[NamedExpression]
+
+      // `name` in `GetStructField` affects `comparePlans`. Maybe we can ignore
+      // `name` in `GetStructField.equals`?
+      val expected = serializerObject.copy(serializer = Seq(prunedSerializer))
+        .select($"_1._1").analyze.transformAllExpressions {
+        case g: GetStructField => g.copy(name = None)
+      }
+      comparePlans(optimized, expected)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
index 840fcae8c691..cc33db91e5f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.{ProjectionOverSchema, SelectedField}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
@@ -32,7 +31,9 @@ import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, St
  * Parquet format. In Spark SQL, a root-level Parquet column corresponds to a
  * SQL column, and a nested Parquet column corresponds to a [[StructField]].
  */
-private[sql] object ParquetSchemaPruning extends Rule[LogicalPlan] {
+object ParquetSchemaPruning extends Rule[LogicalPlan] {
+  import org.apache.spark.sql.catalyst.expressions.SchemaPruning._
+
   override def apply(plan: LogicalPlan): LogicalPlan =
     if (SQLConf.get.nestedSchemaPruningEnabled) {
       apply0(plan)
@@ -103,44 +104,6 @@ private[sql] object ParquetSchemaPruning extends Rule[LogicalPlan] {
     (normalizedProjects, normalizedFilters)
   }
 
-  /**
-   * Returns the set of fields from the Parquet file that the query plan needs.
-   */
-  private def identifyRootFields(projects: Seq[NamedExpression], filters: Seq[Expression]) = {
-    val projectionRootFields = projects.flatMap(getRootFields)
-    val filterRootFields = filters.flatMap(getRootFields)
-
-    // Kind of expressions don't need to access any fields of a root fields, e.g., `IsNotNull`.
-    // For them, if there are any nested fields accessed in the query, we don't need to add root
-    // field access of above expressions.
-    // For example, for a query `SELECT name.first FROM contacts WHERE name IS NOT NULL`,
-    // we don't need to read nested fields of `name` struct other than `first` field.
-    val (rootFields, optRootFields) = (projectionRootFields ++ filterRootFields)
-      .distinct.partition(!_.prunedIfAnyChildAccessed)
-
-    optRootFields.filter { opt =>
-      !rootFields.exists { root =>
-        root.field.name == opt.field.name && {
-          // Checking if current optional root field can be pruned.
-          // For each required root field, we merge it with the optional root field:
-          // 1. If this optional root field has nested fields and any nested field of it is used
-          //    in the query, the merged field type must equal to the optional root field type.
-          //    We can prune this optional root field. For example, for optional root field
-          //    `struct<name:struct<middle:string,last:string>>`, if its field
-          //    `struct<name:struct<last:string>>` is used, we don't need to add this optional
-          //    root field.
-          // 2. If this optional root field has no nested fields, the merged field type equals
-          //    to the optional root field only if they are the same. If they are, we can prune
-          //    this optional root field too.
-          val rootFieldType = StructType(Array(root.field))
-          val optFieldType = StructType(Array(opt.field))
-          val merged = optFieldType.merge(rootFieldType)
-          merged.sameType(optFieldType)
-        }
-      }
-    } ++ rootFields
-  }
-
   /**
    * Builds the new output [[Project]] Spark SQL operator that has the pruned output relation.
    */
@@ -173,27 +136,6 @@ private[sql] object ParquetSchemaPruning extends Rule[LogicalPlan] {
     Project(newProjects, projectionChild)
   }
 
-  /**
-   * Filters the schema from the given file by the requested fields.
-   * Schema field ordering from the file is preserved.
-   */
-  private def pruneDataSchema(
-      fileDataSchema: StructType,
-      requestedRootFields: Seq[RootField]) = {
-    // Merge the requested root fields into a single schema. Note the ordering of the fields
-    // in the resulting schema may differ from their ordering in the logical relation's
-    // original schema
-    val mergedSchema = requestedRootFields
-      .map { case root: RootField => StructType(Array(root.field)) }
-      .reduceLeft(_ merge _)
-    val dataSchemaFieldNames = fileDataSchema.fieldNames.toSet
-    val mergedDataSchema =
-      StructType(mergedSchema.filter(f => dataSchemaFieldNames.contains(f.name)))
-    // Sort the fields of mergedDataSchema according to their order in dataSchema,
-    // recursively. This makes mergedDataSchema a pruned schema of dataSchema
-    sortLeftFieldsByRight(mergedDataSchema, fileDataSchema).asInstanceOf[StructType]
-  }
-
   /**
    * Builds a pruned logical relation from the output of the output relation and the schema of the
    * pruned base relation.
@@ -217,30 +159,6 @@ private[sql] object ParquetSchemaPruning extends Rule[LogicalPlan] {
     outputRelation.copy(relation = prunedBaseRelation, output = prunedRelationOutput)
   }
 
-  /**
-   * Gets the root (aka top-level, no-parent) [[StructField]]s for the given [[Expression]].
-   * When expr is an [[Attribute]], construct a field around it and indicate that that
-   * field was derived from an attribute.
-   */
-  private def getRootFields(expr: Expression): Seq[RootField] = {
-    expr match {
-      case att: Attribute =>
-        RootField(StructField(att.name, att.dataType, att.nullable), derivedFromAtt = true) :: Nil
-      case SelectedField(field) => RootField(field, derivedFromAtt = false) :: Nil
-      // Root field accesses by `IsNotNull` and `IsNull` are special cases as the expressions
-      // don't actually use any nested fields. These root field accesses might be excluded later
-      // if there are any nested fields accesses in the query plan.
-      case IsNotNull(SelectedField(field)) =>
-        RootField(field, derivedFromAtt = false, prunedIfAnyChildAccessed = true) :: Nil
-      case IsNull(SelectedField(field)) =>
-        RootField(field, derivedFromAtt = false, prunedIfAnyChildAccessed = true) :: Nil
-      case IsNotNull(_: Attribute) | IsNull(_: Attribute) =>
-        expr.children.flatMap(getRootFields).map(_.copy(prunedIfAnyChildAccessed = true))
-      case _ =>
-        expr.children.flatMap(getRootFields)
-    }
-  }
-
   /**
    * Counts the "leaf" fields of the given dataType. Informally, this is the
    * number of fields of non-complex data type in the tree representation of
@@ -256,42 +174,5 @@ private[sql] object ParquetSchemaPruning extends Rule[LogicalPlan] {
     }
   }
 
-  /**
-  * Sorts the fields and descendant fields of structs in left according to their order in
-  * right. This function assumes that the fields of left are a subset of the fields of
-  * right, recursively. That is, left is a "subschema" of right, ignoring order of
-  * fields.
-  */
-  private def sortLeftFieldsByRight(left: DataType, right: DataType): DataType =
-    (left, right) match {
-      case (ArrayType(leftElementType, containsNull), ArrayType(rightElementType, _)) =>
-        ArrayType(
-          sortLeftFieldsByRight(leftElementType, rightElementType),
-          containsNull)
-      case (MapType(leftKeyType, leftValueType, containsNull),
-          MapType(rightKeyType, rightValueType, _)) =>
-        MapType(
-          sortLeftFieldsByRight(leftKeyType, rightKeyType),
-          sortLeftFieldsByRight(leftValueType, rightValueType),
-          containsNull)
-      case (leftStruct: StructType, rightStruct: StructType) =>
-        val filteredRightFieldNames = rightStruct.fieldNames.filter(leftStruct.fieldNames.contains)
-        val sortedLeftFields = filteredRightFieldNames.map { fieldName =>
-          val leftFieldType = leftStruct(fieldName).dataType
-          val rightFieldType = rightStruct(fieldName).dataType
-          val sortedLeftFieldType = sortLeftFieldsByRight(leftFieldType, rightFieldType)
-          StructField(fieldName, sortedLeftFieldType)
-        }
-        StructType(sortedLeftFields)
-      case _ => left
-    }
 
-  /**
-   * This represents a "root" schema field (aka top-level, no-parent). `field` is the
-   * `StructField` for field name and datatype. `derivedFromAtt` indicates whether it
-   * was derived from an attribute or had a proper child. `prunedIfAnyChildAccessed` means
-   * whether this root field can be pruned if any of child field is used in the query.
-   */
-  private case class RootField(field: StructField, derivedFromAtt: Boolean,
-    prunedIfAnyChildAccessed: Boolean = false)
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
new file mode 100644
index 000000000000..2b1dbf0f24df
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions.CreateNamedStruct
+import org.apache.spark.sql.catalyst.plans.logical.SerializeFromObject
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
+
+class DatasetOptimizationSuite extends QueryTest with SharedSQLContext {
+  import testImplicits._
+
+  test("SPARK-26619: Prune the unused serializers from SerializeFromObject") {
+    val data = Seq(("a", 1), ("b", 2), ("c", 3))
+    val ds = data.toDS().map(t => (t._1, t._2 + 1)).select("_1")
+    val serializer = ds.queryExecution.optimizedPlan.collect {
+      case s: SerializeFromObject => s
+    }.head
+    assert(serializer.serializer.size == 1)
+    checkAnswer(ds, Seq(Row("a"), Row("b"), Row("c")))
+  }
+
+
+  // This methods checks if the given DataFrame has specified struct fields in object
+  // serializer. The varargs parameter `structFields` is the struct fields for object
+  // serializers. The first `structFields` is aligned with first serializer and ditto
+  // for other `structFields`.
+  private def testSerializer(df: DataFrame, structFields: Seq[Seq[String]]*): Unit = {
+    val serializer = df.queryExecution.optimizedPlan.collect {
+      case s: SerializeFromObject => s
+    }.head
+
+    serializer.serializer.zip(structFields).foreach { case (serializer, fields) =>
+      val structs = serializer.collect {
+        case c: CreateNamedStruct => c
+      }
+      assert(structs.size == fields.size)
+      structs.zip(fields).foreach { case (struct, fieldNames) =>
+        assert(struct.names.map(_.toString) == fieldNames)
+      }
+    }
+  }
+
+  test("Prune nested serializers: struct") {
+    withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+      val data = Seq((("a", 1, ("aa", 1.0)), 1), (("b", 2, ("bb", 2.0)), 2),
+        (("c", 3, ("cc", 3.0)), 3))
+      val ds = data.toDS().map(t => (t._1, t._2 + 1))
+
+      val df1 = ds.select("_1._1")
+      testSerializer(df1, Seq(Seq("_1")))
+      checkAnswer(df1, Seq(Row("a"), Row("b"), Row("c")))
+
+      val df2 = ds.select("_1._2")
+      testSerializer(df2, Seq(Seq("_2")))
+      checkAnswer(df2, Seq(Row(1), Row(2), Row(3)))
+
+      val df3 = ds.select("_1._3._1")
+      testSerializer(df3, Seq(Seq("_3"), Seq("_1")))
+      checkAnswer(df3, Seq(Row("aa"), Row("bb"), Row("cc")))
+
+      val df4 = ds.select("_1._3._1", "_1._2")
+      testSerializer(df4, Seq(Seq("_2", "_3"), Seq("_1")))
+      checkAnswer(df4, Seq(Row("aa", 1), Row("bb", 2), Row("cc", 3)))
+    }
+  }
+
+  test("Prune nested serializers: array of struct") {
+    withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+      val arrayData = Seq((Seq(("a", 1, ("a_1", 11)), ("b", 2, ("b_1", 22))), 1, ("aa", 1.0)),
+        (Seq(("c", 3, ("c_1", 33)), ("d", 4, ("d_1", 44))), 2, ("bb", 2.0)))
+      val arrayDs = arrayData.toDS().map(t => (t._1, t._2 + 1, t._3))
+      val df1 = arrayDs.select("_1._1")
+      // The serializer creates array of struct of one field "_1".
+      testSerializer(df1, Seq(Seq("_1")))
+      checkAnswer(df1, Seq(Row(Seq("a", "b")), Row(Seq("c", "d"))))
+
+      val df2 = arrayDs.select("_3._2")
+      testSerializer(df2, Seq(Seq("_2")))
+      checkAnswer(df2, Seq(Row(1.0), Row(2.0)))
+
+      // This is a more complex case. We select two root fields "_1" and "_3".
+      // The first serializer creates array of struct of two fields ("_1", "_3") and
+      // the field "_3" is a struct of one field "_2".
+      // The second serializer creates a struct of just one field "_1".
+      val df3 = arrayDs.select("_1._1", "_1._3._2", "_3._1")
+      testSerializer(df3, Seq(Seq("_1", "_3"), Seq("_2")), Seq(Seq("_1")))
+      checkAnswer(df3, Seq(Row(Seq("a", "b"), Seq(11, 22), "aa"),
+        Row(Seq("c", "d"), Seq(33, 44), "bb")))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 64c4aabd4cdc..a4ca9e6be6ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -26,7 +26,6 @@ import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.sql.catalyst.ScroogeLikeExample
 import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
-import org.apache.spark.sql.catalyst.plans.logical.SerializeFromObject
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec}
@@ -1707,16 +1706,6 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkAnswer(inputDF.except(exceptDF), Seq(Row("1", null)))
   }
 
-  test("SPARK-26619: Prune the unused serializers from SerializeFromObjec") {
-    val data = Seq(("a", 1), ("b", 2), ("c", 3))
-    val ds = data.toDS().map(t => (t._1, t._2 + 1)).select("_1")
-    val serializer = ds.queryExecution.optimizedPlan.collect {
-      case s: SerializeFromObject => s
-    }.head
-    assert(serializer.serializer.size == 1)
-    checkAnswer(ds, Seq(Row("a"), Row("b"), Row("c")))
-  }
-
   test("SPARK-26706: Fix Cast.mayTruncate for bytes") {
     val thrownException = intercept[AnalysisException] {
       spark.range(Long.MaxValue - 10, Long.MaxValue).as[Byte]

From 88bc481b9e46f9bcd3374979e5bdb522a97f52ed Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 27 Feb 2019 14:29:58 +0900
Subject: [PATCH 0536/1072] [SPARK-26830][SQL][R] Vectorized R dapply()
 implementation

## What changes were proposed in this pull request?

This PR targets to add vectorized `dapply()` in R, Arrow optimization.

This can be tested as below:

```bash
$ ./bin/sparkR --conf spark.sql.execution.arrow.enabled=true
```

```r
df <- createDataFrame(mtcars)
collect(dapply(df, function(rdf) { data.frame(rdf$gear + 1) }, structType("gear double")))
```

### Requirements
  - R 3.5.x
  - Arrow package 0.12+
    ```bash
    Rscript -e 'remotes::install_github("apache/arrowapache-arrow-0.12.0", subdir = "r")'
    ```

**Note:** currently, Arrow R package is not in CRAN. Please take a look at ARROW-3204.
**Note:** currently, Arrow R package seems not supporting Windows. Please take a look at ARROW-3204.

### Benchmarks

**Shall**

```bash
sync && sudo purge
./bin/sparkR --conf spark.sql.execution.arrow.enabled=false --driver-memory 4g
```

```bash
sync && sudo purge
./bin/sparkR --conf spark.sql.execution.arrow.enabled=true --driver-memory 4g
```

**R code**

```r
rdf <- read.csv("500000.csv")
df <- cache(createDataFrame(rdf))
count(df)

test <- function() {
  options(digits.secs = 6) # milliseconds
  start.time <- Sys.time()
  count(cache(dapply(df, function(rdf) { rdf }, schema(df))))
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  print(time.taken)
}

test()
```

**Data (350 MB):**

```r
object.size(read.csv("500000.csv"))
350379504 bytes
```

"500000 Records"  http://eforexcel.com/wp/downloads-16-sample-csv-files-data-sets-for-testing/

**Results**

```
Time difference of 13.42037 mins
```

```
Time difference of 30.64156 secs
```

The performance improvement was around **2627%**.

### Limitations

- For now, Arrow optimization with R does not support when the data is `raw`, and when user explicitly gives float type in the schema. They produce corrupt values.

- Due to ARROW-4512, it cannot send and receive batch by batch. It has to send all batches in Arrow stream format at once. It needs improvement later.

## How was this patch tested?

Unit tests were added, and manually tested.

Closes #23787 from HyukjinKwon/SPARK-26830-1.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/DataFrame.R                           | 23 +++++
 R/pkg/R/deserialize.R                         | 16 +--
 R/pkg/R/serialize.R                           | 15 +++
 R/pkg/inst/worker/worker.R                    | 33 +++----
 R/pkg/tests/fulltests/test_sparkSQL.R         | 99 +++++++++++++++++++
 .../sql/catalyst/plans/logical/object.scala   | 51 ++++++++--
 .../spark/sql/execution/SparkStrategies.scala |  3 +
 .../apache/spark/sql/execution/objects.scala  | 79 ++++++++++++++-
 .../python/ArrowEvalPythonExec.scala          |  2 +-
 .../spark/sql/execution/r/ArrowRRunner.scala  | 31 ++++--
 10 files changed, 309 insertions(+), 43 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index fe836bf4c1b3..789c5d400aea 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1493,6 +1493,29 @@ dapplyInternal <- function(x, func, schema) {
     schema <- structType(schema)
   }
 
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
+  if (arrowEnabled) {
+    requireNamespace1 <- requireNamespace
+    if (!requireNamespace1("arrow", quietly = TRUE)) {
+      stop("'arrow' package should be installed.")
+    }
+    # Currenty Arrow optimization does not support raw for now.
+    # Also, it does not support explicit float type set by users.
+    if (inherits(schema, "structType")) {
+      if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
+        stop("Arrow optimization with dapply do not support FloatType yet.")
+      }
+      if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "BinaryType"))) {
+        stop("Arrow optimization with dapply do not support BinaryType yet.")
+      }
+    } else if (is.null(schema)) {
+      stop(paste0("Arrow optimization does not support 'dapplyCollect' yet. Please disable ",
+                  "Arrow optimization or use 'collect' and 'dapply' APIs instead."))
+    } else {
+      stop("'schema' should be DDL-formatted string or structType.")
+    }
+  }
+
   packageNamesArr <- serialize(.sparkREnv[[".packages"]],
                                connection = NULL)
 
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 4c5d2bcb9f03..191c51e2e41f 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -247,17 +247,21 @@ readDeserializeInArrow <- function(inputCon) {
     batches <- RecordBatchStreamReader(arrowData)$batches()
 
     # Read all groupped batches. Tibble -> data.frame is cheap.
-    data <- lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
-
-    # Read keys to map with each groupped batch.
-    keys <- readMultipleObjects(inputCon)
-
-    list(keys = keys, data = data)
+    lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
   } else {
     stop("'arrow' package should be installed.")
   }
 }
 
+readDeserializeWithKeysInArrow <- function(inputCon) {
+  data <- readDeserializeInArrow(inputCon)
+
+  keys <- readMultipleObjects(inputCon)
+
+  # Read keys to map with each groupped batch later.
+  list(keys = keys, data = data)
+}
+
 readRowList <- function(obj) {
   # readRowList is meant for use inside an lapply. As a result, it is
   # necessary to open a standalone connection for the row and consume
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 263b9b576c0c..0d6f32c8f7e1 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -220,3 +220,18 @@ writeArgs <- function(con, args) {
     }
   }
 }
+
+writeSerializeInArrow <- function(conn, df) {
+  # This is a hack to avoid CRAN check. Arrow is not uploaded into CRAN now. See ARROW-3204.
+  requireNamespace1 <- requireNamespace
+  if (requireNamespace1("arrow", quietly = TRUE)) {
+    write_arrow <- get("write_arrow", envir = asNamespace("arrow"), inherits = FALSE)
+
+    # There looks no way to send each batch in streaming format via socket
+    # connection. See ARROW-4512.
+    # So, it writes the whole Arrow streaming-formatted binary at once for now.
+    writeRaw(conn, write_arrow(df, raw()))
+  } else {
+    stop("'arrow' package should be installed.")
+  }
+}
diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R
index eed4c843f9a3..80dc4ee63451 100644
--- a/R/pkg/inst/worker/worker.R
+++ b/R/pkg/inst/worker/worker.R
@@ -76,6 +76,8 @@ outputResult <- function(serializer, output, outputCon) {
     SparkR:::writeRawSerialize(outputCon, output)
   } else if (serializer == "row") {
     SparkR:::writeRowSerialize(outputCon, output)
+  } else if (serializer == "arrow") {
+    SparkR:::writeSerializeInArrow(outputCon, output)
   } else {
     # write lines one-by-one with flag
     lapply(output, function(line) SparkR:::writeString(outputCon, line))
@@ -172,9 +174,15 @@ if (isEmpty != 0) {
     } else if (deserializer == "row") {
       data <- SparkR:::readMultipleObjects(inputCon)
     } else if (deserializer == "arrow" && mode == 2) {
-      dataWithKeys <- SparkR:::readDeserializeInArrow(inputCon)
+      dataWithKeys <- SparkR:::readDeserializeWithKeysInArrow(inputCon)
       keys <- dataWithKeys$keys
       data <- dataWithKeys$data
+    } else if (deserializer == "arrow" && mode == 1) {
+      data <- SparkR:::readDeserializeInArrow(inputCon)
+      # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html
+      # rbind.fill might be an anternative to make it faster if plyr is installed.
+      # Also, note that, 'dapply' applies a function to each partition.
+      data <- do.call("rbind", data)
     }
 
     # Timing reading input data for execution
@@ -192,7 +200,7 @@ if (isEmpty != 0) {
           output <- compute(mode, partition, serializer, deserializer, keys[[i]],
                       colNames, computeFunc, data[[i]])
           computeElap <- elapsedSecs()
-          if (deserializer == "arrow") {
+          if (serializer == "arrow") {
             outputs[[length(outputs) + 1L]] <- output
           } else {
             outputResult(serializer, output, outputCon)
@@ -202,22 +210,11 @@ if (isEmpty != 0) {
           outputComputeElapsDiff <- outputComputeElapsDiff + (outputElap - computeElap)
         }
 
-        if (deserializer == "arrow") {
-          # This is a hack to avoid CRAN check. Arrow is not uploaded into CRAN now. See ARROW-3204.
-          requireNamespace1 <- requireNamespace
-          if (requireNamespace1("arrow", quietly = TRUE)) {
-            write_arrow <- get("write_arrow", envir = asNamespace("arrow"), inherits = FALSE)
-            # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html
-            # rbind.fill might be an anternative to make it faster if plyr is installed.
-            combined <- do.call("rbind", outputs)
-
-            # Likewise, there looks no way to send each batch in streaming format via socket
-            # connection. See ARROW-4512.
-            # So, it writes the whole Arrow streaming-formatted binary at once for now.
-            SparkR:::writeRaw(outputCon, write_arrow(combined, raw()))
-          } else {
-            stop("'arrow' package should be installed.")
-          }
+        if (serializer == "arrow") {
+          # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html
+          # rbind.fill might be an anternative to make it faster if plyr is installed.
+          combined <- do.call("rbind", outputs)
+          SparkR:::writeSerializeInArrow(outputCon, combined)
         }
       }
     } else {
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 19c41ef44943..16d423f08f61 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3317,6 +3317,105 @@ test_that("dapplyCollect() on DataFrame with a binary column", {
 
 })
 
+test_that("dapply() Arrow optimization", {
+  skip_if_not_installed("arrow")
+  df <- createDataFrame(mtcars)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    ret <- dapply(df,
+    function(rdf) {
+      stopifnot(class(rdf) == "data.frame")
+      rdf
+    },
+    schema(df))
+    expected <- collect(ret)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- dapply(df,
+                  function(rdf) {
+                    stopifnot(class(rdf) == "data.frame")
+                    # mtcars' hp is more then 50.
+                    stopifnot(all(rdf$hp > 50))
+                    rdf
+                  },
+                  schema(df))
+    actual <- collect(ret)
+    expect_equal(actual, expected)
+    expect_equal(count(ret), nrow(mtcars))
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("dapply() Arrow optimization - type specification", {
+  skip_if_not_installed("arrow")
+  # Note that regular dapply() seems not supporting date and timestamps
+  # whereas Arrow-optimized dapply() does.
+  rdf <- data.frame(list(list(a = 1,
+                              b = "a",
+                              c = TRUE,
+                              d = 1.1,
+                              e = 1L)))
+  # numPartitions are set to 8 intentionally to test empty partitions as well.
+  df <- createDataFrame(rdf, numPartitions = 8)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    ret <- dapply(df, function(rdf) { rdf }, schema(df))
+    expected <- collect(ret)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- dapply(df, function(rdf) { rdf }, schema(df))
+    actual <- collect(ret)
+    expect_equal(actual, expected)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("dapply() Arrow optimization - type specification (date and timestamp)", {
+  skip_if_not_installed("arrow")
+  rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
+                              b = as.POSIXct("1990-02-24 12:34:56"))))
+  df <- createDataFrame(rdf)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- dapply(df, function(rdf) { rdf }, schema(df))
+    expect_equal(collect(ret), rdf)
+  },
+  finally = {
+    # Resetting the conf back to default value
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
 test_that("repartition by columns on DataFrame", {
   # The tasks here launch R workers with shuffles. So, we decrease the number of shuffle
   # partitions to reduce the number of the tasks to speed up the test. This is particularly
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index f875af37de27..1d44cc31cf29 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -123,16 +123,25 @@ object MapPartitionsInR {
       schema: StructType,
       encoder: ExpressionEncoder[Row],
       child: LogicalPlan): LogicalPlan = {
-    val deserialized = CatalystSerde.deserialize(child)(encoder)
-    val mapped = MapPartitionsInR(
-      func,
-      packageNames,
-      broadcastVars,
-      encoder.schema,
-      schema,
-      CatalystSerde.generateObjAttr(RowEncoder(schema)),
-      deserialized)
-    CatalystSerde.serialize(mapped)(RowEncoder(schema))
+    if (SQLConf.get.arrowEnabled) {
+      MapPartitionsInRWithArrow(
+        func,
+        packageNames,
+        broadcastVars,
+        encoder.schema,
+        schema.toAttributes,
+        child)
+    } else {
+      val deserialized = CatalystSerde.deserialize(child)(encoder)
+      CatalystSerde.serialize(MapPartitionsInR(
+        func,
+        packageNames,
+        broadcastVars,
+        encoder.schema,
+        schema,
+        CatalystSerde.generateObjAttr(RowEncoder(schema)),
+        deserialized))(RowEncoder(schema))
+    }
   }
 }
 
@@ -154,6 +163,28 @@ case class MapPartitionsInR(
     outputObjAttr, child)
 }
 
+/**
+ * Similar with `MapPartitionsInR` but serializes and deserializes input/output in
+ * Arrow format.
+ *
+ * This is somewhat similar with `org.apache.spark.sql.execution.python.ArrowEvalPython`
+ */
+case class MapPartitionsInRWithArrow(
+    func: Array[Byte],
+    packageNames: Array[Byte],
+    broadcastVars: Array[Broadcast[Object]],
+    inputSchema: StructType,
+    output: Seq[Attribute],
+    child: LogicalPlan) extends UnaryNode {
+  // This operator always need all columns of its child, even it doesn't reference to.
+  override def references: AttributeSet = child.outputSet
+
+  override protected def stringArgs: Iterator[Any] = Iterator(
+    inputSchema, StructType.fromAttributes(output), child)
+
+  override val producedAttributes = AttributeSet(output)
+}
+
 object MapElements {
   def apply[T : Encoder, U : Encoder](
       func: AnyRef,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 6827ba6fe474..bf38189c2fb5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -599,6 +599,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case logical.FlatMapGroupsInRWithArrow(f, p, b, is, ot, key, grouping, child) =>
         execution.FlatMapGroupsInRWithArrowExec(
           f, p, b, is, ot, key, grouping, planLater(child)) :: Nil
+      case logical.MapPartitionsInRWithArrow(f, p, b, is, ot, child) =>
+        execution.MapPartitionsInRWithArrowExec(
+          f, p, b, is, ot, planLater(child)) :: Nil
       case logical.FlatMapGroupsInPandas(grouping, func, output, child) =>
         execution.python.FlatMapGroupsInPandasExec(grouping, func, output, planLater(child)) :: Nil
       case logical.MapElements(f, _, _, objAttr, child) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index dd76efd22a10..d2982451770e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, FunctionUtils, LogicalGroupState}
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.execution.python.BatchIterator
 import org.apache.spark.sql.execution.r.ArrowRRunner
 import org.apache.spark.sql.execution.streaming.GroupStateImpl
 import org.apache.spark.sql.internal.SQLConf
@@ -193,6 +194,80 @@ case class MapPartitionsExec(
   }
 }
 
+/**
+ * Similar with [[MapPartitionsExec]] and
+ * [[org.apache.spark.sql.execution.r.MapPartitionsRWrapper]] but serializes and deserializes
+ * input/output in Arrow format.
+ *
+ * This is somewhat similar with [[org.apache.spark.sql.execution.python.ArrowEvalPythonExec]]
+ */
+case class MapPartitionsInRWithArrowExec(
+    func: Array[Byte],
+    packageNames: Array[Byte],
+    broadcastVars: Array[Broadcast[Object]],
+    inputSchema: StructType,
+    output: Seq[Attribute],
+    child: SparkPlan) extends UnaryExecNode {
+  override def producedAttributes: AttributeSet = AttributeSet(output)
+
+  private val batchSize = conf.arrowMaxRecordsPerBatch
+
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    child.execute().mapPartitionsInternal { inputIter =>
+      val outputTypes = schema.map(_.dataType)
+
+      // DO NOT use iter.grouped(). See BatchIterator.
+      val batchIter =
+        if (batchSize > 0) new BatchIterator(inputIter, batchSize) else Iterator(inputIter)
+
+      val runner = new ArrowRRunner(func, packageNames, broadcastVars, inputSchema,
+        SQLConf.get.sessionLocalTimeZone, RRunnerModes.DATAFRAME_DAPPLY)
+
+      // The communication mechanism is as follows:
+      //
+      //    JVM side                           R side
+      //
+      // 1. Internal rows            --------> Arrow record batches
+      // 2.                                    Converts each Arrow record batch to each R data frame
+      // 3.                                    Combine R data frames into one R data frame
+      // 4.                                    Computes R native function on the data frame
+      // 5.                                    Converts the R data frame to Arrow record batches
+      // 6. Columnar batches         <-------- Arrow record batches
+      // 7. Each row from each batch
+      //
+      // Note that, unlike Python vectorization implementation, R side sends Arrow formatted
+      // binary in a batch due to the limitation of R API. See also ARROW-4512.
+      val columnarBatchIter = runner.compute(batchIter, -1)
+      val outputProject = UnsafeProjection.create(output, output)
+      new Iterator[InternalRow] {
+
+        private var currentIter = if (columnarBatchIter.hasNext) {
+          val batch = columnarBatchIter.next()
+          val actualDataTypes = (0 until batch.numCols()).map(i => batch.column(i).dataType())
+          assert(outputTypes == actualDataTypes, "Invalid schema from dapply(): " +
+            s"expected ${outputTypes.mkString(", ")}, got ${actualDataTypes.mkString(", ")}")
+          batch.rowIterator.asScala
+        } else {
+          Iterator.empty
+        }
+
+        override def hasNext: Boolean = currentIter.hasNext || {
+          if (columnarBatchIter.hasNext) {
+            currentIter = columnarBatchIter.next().rowIterator.asScala
+            hasNext
+          } else {
+            false
+          }
+        }
+
+        override def next(): InternalRow = currentIter.next()
+      }.map(outputProject)
+    }
+  }
+}
+
 /**
  * Applies the given function to each input object.
  * The output of its child must be a single-field row containing the input object.
@@ -473,8 +548,8 @@ case class FlatMapGroupsInRWithArrowExec(
     child.execute().mapPartitionsInternal { iter =>
       val grouped = GroupedIterator(iter, groupingAttributes, child.output)
       val getKey = ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
-      val runner = new ArrowRRunner(
-        func, packageNames, broadcastVars, inputSchema, SQLConf.get.sessionLocalTimeZone)
+      val runner = new ArrowRRunner(func, packageNames, broadcastVars, inputSchema,
+        SQLConf.get.sessionLocalTimeZone, RRunnerModes.DATAFRAME_GAPPLY)
 
       val groupedByRKey = grouped.map { case (key, rowIter) =>
         val newKey = rowToRBytes(getKey(key).asInstanceOf[Row])
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
index a5203daea9cd..61b167f50fd6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types.StructType
  * This is necessary because sometimes we cannot hold reference of input rows
  * because the some input rows are mutable and can be reused.
  */
-private class BatchIterator[T](iter: Iterator[T], batchSize: Int)
+private[spark] class BatchIterator[T](iter: Iterator[T], batchSize: Int)
   extends Iterator[Iterator[T]] {
 
   override def hasNext: Boolean = iter.hasNext
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
index a8d0bf17c7a6..ee1f2e3379b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
@@ -45,7 +45,8 @@ class ArrowRRunner(
     packageNames: Array[Byte],
     broadcastVars: Array[Broadcast[Object]],
     schema: StructType,
-    timeZoneId: String)
+    timeZoneId: String,
+    mode: Int)
   extends RRunner[ColumnarBatch](
     func,
     "arrow",
@@ -55,13 +56,32 @@ class ArrowRRunner(
     numPartitions = -1,
     isDataFrame = true,
     schema.fieldNames,
-    RRunnerModes.DATAFRAME_GAPPLY) {
+    mode) {
+
+  // TODO: it needs to refactor to share the same code with RRunner, and have separate
+  // ArrowRRunners.
+  private val getNextBatch = {
+    if (mode == RRunnerModes.DATAFRAME_GAPPLY) {
+      // gapply
+      (inputIterator: Iterator[_], keys: collection.mutable.ArrayBuffer[Array[Byte]]) => {
+        val (key, nextBatch) = inputIterator
+          .asInstanceOf[Iterator[(Array[Byte], Iterator[InternalRow])]].next()
+        keys.append(key)
+        nextBatch
+      }
+    } else {
+      // dapply
+      (inputIterator: Iterator[_], keys: collection.mutable.ArrayBuffer[Array[Byte]]) => {
+        inputIterator
+          .asInstanceOf[Iterator[Iterator[InternalRow]]].next()
+      }
+    }
+  }
 
   protected override def writeData(
       dataOut: DataOutputStream,
       printOut: PrintStream,
-      iter: Iterator[_]): Unit = if (iter.hasNext) {
-    val inputIterator = iter.asInstanceOf[Iterator[(Array[Byte], Iterator[InternalRow])]]
+      inputIterator: Iterator[_]): Unit = if (inputIterator.hasNext) {
     val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
     val allocator = ArrowUtils.rootAllocator.newChildAllocator(
       "stdout writer for R", 0, Long.MaxValue)
@@ -75,8 +95,7 @@ class ArrowRRunner(
       writer.start()
 
       while (inputIterator.hasNext) {
-        val (key, nextBatch) = inputIterator.next()
-        keys.append(key)
+        val nextBatch: Iterator[InternalRow] = getNextBatch(inputIterator, keys)
 
         while (nextBatch.hasNext) {
           arrowWriter.write(nextBatch.next())

From dea18ee85b53511a012f3d0ca95626776a5241ba Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Wed, 27 Feb 2019 13:47:20 +0800
Subject: [PATCH 0537/1072] [SPARK-22000][SQL] Address missing Upcast in
 JavaTypeInference.deserializerFor

## What changes were proposed in this pull request?

Spark expects the type of column and the type of matching field is same when deserializing to Object, but Spark hasn't actually restrict it (at least for Java bean encoder) and some users just do it and experience undefined behavior (in SPARK-22000, Spark throws compilation failure on generated code because it calls `.toString()` against primitive type.

It doesn't produce error in Scala side because `ScalaReflection.deserializerFor` properly inject Upcast if necessary. This patch proposes applying same thing to `JavaTypeInference.deserializerFor` as well.

Credit to srowen, maropu, and cloud-fan since they provided various approaches to solve this.

## How was this patch tested?

Added UT which query is slightly modified based on sample code in attachment on JIRA issue.

Closes #23854 from HeartSaVioR/SPARK-22000.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/DeserializerBuildHelper.scala    | 158 +++++++++++
 .../sql/catalyst/JavaTypeInference.scala      | 142 +++++-----
 .../spark/sql/catalyst/ScalaReflection.scala  | 221 ++++++----------
 .../sql/JavaBeanDeserializationSuite.java     | 248 +++++++++++++++++-
 4 files changed, 559 insertions(+), 210 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
new file mode 100644
index 000000000000..e71955ab4e75
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
+import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField, UpCast}
+import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, Invoke, StaticInvoke}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.types._
+
+object DeserializerBuildHelper {
+  /** Returns the current path with a sub-field extracted. */
+  def addToPath(
+      path: Expression,
+      part: String,
+      dataType: DataType,
+      walkedTypePath: Seq[String]): Expression = {
+    val newPath = UnresolvedExtractValue(path, expressions.Literal(part))
+    upCastToExpectedType(newPath, dataType, walkedTypePath)
+  }
+
+  /** Returns the current path with a field at ordinal extracted. */
+  def addToPathOrdinal(
+      path: Expression,
+      ordinal: Int,
+      dataType: DataType,
+      walkedTypePath: Seq[String]): Expression = {
+    val newPath = GetStructField(path, ordinal)
+    upCastToExpectedType(newPath, dataType, walkedTypePath)
+  }
+
+  def deserializerForWithNullSafety(
+      expr: Expression,
+      dataType: DataType,
+      nullable: Boolean,
+      walkedTypePath: Seq[String],
+      funcForCreatingNewExpr: (Expression, Seq[String]) => Expression): Expression = {
+    val newExpr = funcForCreatingNewExpr(expr, walkedTypePath)
+    expressionWithNullSafety(newExpr, nullable, walkedTypePath)
+  }
+
+  def deserializerForWithNullSafetyAndUpcast(
+      expr: Expression,
+      dataType: DataType,
+      nullable: Boolean,
+      walkedTypePath: Seq[String],
+      funcForCreatingNewExpr: (Expression, Seq[String]) => Expression): Expression = {
+    val casted = upCastToExpectedType(expr, dataType, walkedTypePath)
+    deserializerForWithNullSafety(casted, dataType, nullable, walkedTypePath,
+      funcForCreatingNewExpr)
+  }
+
+  private def expressionWithNullSafety(
+      expr: Expression,
+      nullable: Boolean,
+      walkedTypePath: Seq[String]): Expression = {
+    if (nullable) {
+      expr
+    } else {
+      AssertNotNull(expr, walkedTypePath)
+    }
+  }
+
+  def createDeserializerForTypesSupportValueOf(
+      path: Expression,
+      clazz: Class[_]): Expression = {
+    StaticInvoke(
+      clazz,
+      ObjectType(clazz),
+      "valueOf",
+      path :: Nil,
+      returnNullable = false)
+  }
+
+  def createDeserializerForString(path: Expression, returnNullable: Boolean): Expression = {
+    Invoke(path, "toString", ObjectType(classOf[java.lang.String]),
+      returnNullable = returnNullable)
+  }
+
+  def createDeserializerForSqlDate(path: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      ObjectType(classOf[java.sql.Date]),
+      "toJavaDate",
+      path :: Nil,
+      returnNullable = false)
+  }
+
+  def createDeserializerForSqlTimestamp(path: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      ObjectType(classOf[java.sql.Timestamp]),
+      "toJavaTimestamp",
+      path :: Nil,
+      returnNullable = false)
+  }
+
+  def createDeserializerForJavaBigDecimal(
+      path: Expression,
+      returnNullable: Boolean): Expression = {
+    Invoke(path, "toJavaBigDecimal", ObjectType(classOf[java.math.BigDecimal]),
+      returnNullable = returnNullable)
+  }
+
+  def createDeserializerForScalaBigDecimal(
+      path: Expression,
+      returnNullable: Boolean): Expression = {
+    Invoke(path, "toBigDecimal", ObjectType(classOf[BigDecimal]), returnNullable = returnNullable)
+  }
+
+  def createDeserializerForJavaBigInteger(
+      path: Expression,
+      returnNullable: Boolean): Expression = {
+    Invoke(path, "toJavaBigInteger", ObjectType(classOf[java.math.BigInteger]),
+      returnNullable = returnNullable)
+  }
+
+  def createDeserializerForScalaBigInt(path: Expression): Expression = {
+    Invoke(path, "toScalaBigInt", ObjectType(classOf[scala.math.BigInt]),
+      returnNullable = false)
+  }
+
+  /**
+   * When we build the `deserializer` for an encoder, we set up a lot of "unresolved" stuff
+   * and lost the required data type, which may lead to runtime error if the real type doesn't
+   * match the encoder's schema.
+   * For example, we build an encoder for `case class Data(a: Int, b: String)` and the real type
+   * is [a: int, b: long], then we will hit runtime error and say that we can't construct class
+   * `Data` with int and long, because we lost the information that `b` should be a string.
+   *
+   * This method help us "remember" the required data type by adding a `UpCast`. Note that we
+   * only need to do this for leaf nodes.
+   */
+  private def upCastToExpectedType(
+      expr: Expression,
+      expected: DataType,
+      walkedTypePath: Seq[String]): Expression = expected match {
+    case _: StructType => expr
+    case _: ArrayType => expr
+    case _: MapType => expr
+    case _ => UpCast(expr, expected, walkedTypePath)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 311060e5961c..dafa87839ec6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -26,7 +26,8 @@ import scala.language.existentials
 
 import com.google.common.reflect.TypeToken
 
-import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedExtractValue}
+import org.apache.spark.sql.catalyst.DeserializerBuildHelper._
+import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
@@ -194,14 +195,20 @@ object JavaTypeInference {
    */
   def deserializerFor(beanClass: Class[_]): Expression = {
     val typeToken = TypeToken.of(beanClass)
-    deserializerFor(typeToken, GetColumnByOrdinal(0, inferDataType(typeToken)._1))
+    val walkedTypePath = s"""- root class: "${beanClass.getCanonicalName}"""" :: Nil
+    val (dataType, nullable) = inferDataType(typeToken)
+
+    // Assumes we are deserializing the first column of a row.
+    deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType,
+      nullable = nullable, walkedTypePath, (casted, walkedTypePath) => {
+        deserializerFor(typeToken, casted, walkedTypePath)
+      })
   }
 
-  private def deserializerFor(typeToken: TypeToken[_], path: Expression): Expression = {
-    /** Returns the current path with a sub-field extracted. */
-    def addToPath(part: String): Expression = UnresolvedExtractValue(path,
-      expressions.Literal(part))
-
+  private def deserializerFor(
+      typeToken: TypeToken[_],
+      path: Expression,
+      walkedTypePath: Seq[String]): Expression = {
     typeToken.getRawType match {
       case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
 
@@ -212,74 +219,79 @@ object JavaTypeInference {
                 c == classOf[java.lang.Float] ||
                 c == classOf[java.lang.Byte] ||
                 c == classOf[java.lang.Boolean] =>
-        StaticInvoke(
-          c,
-          ObjectType(c),
-          "valueOf",
-          path :: Nil,
-          returnNullable = false)
+        createDeserializerForTypesSupportValueOf(path, c)
 
       case c if c == classOf[java.sql.Date] =>
-        StaticInvoke(
-          DateTimeUtils.getClass,
-          ObjectType(c),
-          "toJavaDate",
-          path :: Nil,
-          returnNullable = false)
+        createDeserializerForSqlDate(path)
 
       case c if c == classOf[java.sql.Timestamp] =>
-        StaticInvoke(
-          DateTimeUtils.getClass,
-          ObjectType(c),
-          "toJavaTimestamp",
-          path :: Nil,
-          returnNullable = false)
+        createDeserializerForSqlTimestamp(path)
 
       case c if c == classOf[java.lang.String] =>
-        Invoke(path, "toString", ObjectType(classOf[String]))
+        createDeserializerForString(path, returnNullable = true)
 
       case c if c == classOf[java.math.BigDecimal] =>
-        Invoke(path, "toJavaBigDecimal", ObjectType(classOf[java.math.BigDecimal]))
+        createDeserializerForJavaBigDecimal(path, returnNullable = true)
+
+      case c if c == classOf[java.math.BigInteger] =>
+        createDeserializerForJavaBigInteger(path, returnNullable = true)
 
       case c if c.isArray =>
         val elementType = c.getComponentType
-        val primitiveMethod = elementType match {
-          case c if c == java.lang.Boolean.TYPE => Some("toBooleanArray")
-          case c if c == java.lang.Byte.TYPE => Some("toByteArray")
-          case c if c == java.lang.Short.TYPE => Some("toShortArray")
-          case c if c == java.lang.Integer.TYPE => Some("toIntArray")
-          case c if c == java.lang.Long.TYPE => Some("toLongArray")
-          case c if c == java.lang.Float.TYPE => Some("toFloatArray")
-          case c if c == java.lang.Double.TYPE => Some("toDoubleArray")
-          case _ => None
+        val newTypePath = s"""- array element class: "${elementType.getCanonicalName}"""" +:
+          walkedTypePath
+        val (dataType, elementNullable) = inferDataType(elementType)
+        val mapFunction: Expression => Expression = element => {
+          // upcast the array element to the data type the encoder expected.
+          deserializerForWithNullSafetyAndUpcast(
+            element,
+            dataType,
+            nullable = elementNullable,
+            newTypePath,
+            (casted, typePath) => deserializerFor(typeToken.getComponentType, casted, typePath))
         }
 
-        primitiveMethod.map { method =>
-          Invoke(path, method, ObjectType(c))
-        }.getOrElse {
-          Invoke(
-            MapObjects(
-              p => deserializerFor(typeToken.getComponentType, p),
-              path,
-              inferDataType(elementType)._1),
-            "array",
-            ObjectType(c))
+        val arrayData = UnresolvedMapObjects(mapFunction, path)
+
+        val methodName = elementType match {
+          case c if c == java.lang.Integer.TYPE => "toIntArray"
+          case c if c == java.lang.Long.TYPE => "toLongArray"
+          case c if c == java.lang.Double.TYPE => "toDoubleArray"
+          case c if c == java.lang.Float.TYPE => "toFloatArray"
+          case c if c == java.lang.Short.TYPE => "toShortArray"
+          case c if c == java.lang.Byte.TYPE => "toByteArray"
+          case c if c == java.lang.Boolean.TYPE => "toBooleanArray"
+          // non-primitive
+          case _ => "array"
         }
+        Invoke(arrayData, methodName, ObjectType(c))
 
       case c if listType.isAssignableFrom(typeToken) =>
         val et = elementType(typeToken)
-        UnresolvedMapObjects(
-          p => deserializerFor(et, p),
-          path,
-          customCollectionCls = Some(c))
+        val newTypePath = s"""- array element class: "${et.getType.getTypeName}"""" +:
+          walkedTypePath
+        val (dataType, elementNullable) = inferDataType(et)
+        val mapFunction: Expression => Expression = element => {
+          // upcast the array element to the data type the encoder expected.
+          deserializerForWithNullSafetyAndUpcast(
+            element,
+            dataType,
+            nullable = elementNullable,
+            newTypePath,
+            (casted, typePath) => deserializerFor(et, casted, typePath))
+        }
+
+        UnresolvedMapObjects(mapFunction, path, customCollectionCls = Some(c))
 
       case _ if mapType.isAssignableFrom(typeToken) =>
         val (keyType, valueType) = mapKeyValueType(typeToken)
+        val newTypePath = (s"""- map key class: "${keyType.getType.getTypeName}"""" +
+          s""", value class: "${valueType.getType.getTypeName}"""") +: walkedTypePath
 
         val keyData =
           Invoke(
             UnresolvedMapObjects(
-              p => deserializerFor(keyType, p),
+              p => deserializerFor(keyType, p, newTypePath),
               MapKeys(path)),
             "array",
             ObjectType(classOf[Array[Any]]))
@@ -287,7 +299,7 @@ object JavaTypeInference {
         val valueData =
           Invoke(
             UnresolvedMapObjects(
-              p => deserializerFor(valueType, p),
+              p => deserializerFor(valueType, p, newTypePath),
               MapValues(path)),
             "array",
             ObjectType(classOf[Array[Any]]))
@@ -300,25 +312,25 @@ object JavaTypeInference {
           returnNullable = false)
 
       case other if other.isEnum =>
-        StaticInvoke(
-          other,
-          ObjectType(other),
-          "valueOf",
-          Invoke(path, "toString", ObjectType(classOf[String]), returnNullable = false) :: Nil,
-          returnNullable = false)
+        createDeserializerForTypesSupportValueOf(
+          createDeserializerForString(path, returnNullable = false),
+          other)
 
       case other =>
         val properties = getJavaBeanReadableAndWritableProperties(other)
         val setters = properties.map { p =>
           val fieldName = p.getName
           val fieldType = typeToken.method(p.getReadMethod).getReturnType
-          val (_, nullable) = inferDataType(fieldType)
-          val constructor = deserializerFor(fieldType, addToPath(fieldName))
-          val setter = if (nullable) {
-            constructor
-          } else {
-            AssertNotNull(constructor, Seq("currently no type path record in java"))
-          }
+          val (dataType, nullable) = inferDataType(fieldType)
+          val newTypePath = (s"""- field (class: "${fieldType.getType.getTypeName}"""" +
+            s""", name: "$fieldName")""") +: walkedTypePath
+          val setter = deserializerForWithNullSafety(
+            path,
+            dataType,
+            nullable = nullable,
+            newTypePath,
+            (expr, typePath) => deserializerFor(fieldType,
+              addToPath(expr, fieldName, dataType, typePath), typePath))
           p.getWriteMethod.getName -> setter
         }.toMap
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index d5af91acd071..741cba80640b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.sql.catalyst
 
-import java.lang.reflect.Constructor
-
-import scala.util.Properties
-
 import org.apache.commons.lang3.reflect.ConstructorUtils
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedExtractValue}
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.DeserializerBuildHelper._
+import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
+import org.apache.spark.sql.catalyst.expressions.{Expression, _}
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils, GenericArrayData, MapData}
 import org.apache.spark.sql.types._
@@ -128,25 +125,6 @@ object ScalaReflection extends ScalaReflection {
     case _ => false
   }
 
-  /**
-   * When we build the `deserializer` for an encoder, we set up a lot of "unresolved" stuff
-   * and lost the required data type, which may lead to runtime error if the real type doesn't
-   * match the encoder's schema.
-   * For example, we build an encoder for `case class Data(a: Int, b: String)` and the real type
-   * is [a: int, b: long], then we will hit runtime error and say that we can't construct class
-   * `Data` with int and long, because we lost the information that `b` should be a string.
-   *
-   * This method help us "remember" the required data type by adding a `UpCast`. Note that we
-   * only need to do this for leaf nodes.
-   */
-  private def upCastToExpectedType(expr: Expression, expected: DataType,
-      walkedTypePath: Seq[String]): Expression = expected match {
-    case _: StructType => expr
-    case _: ArrayType => expr
-    case _: MapType => expr
-    case _ => UpCast(expr, expected, walkedTypePath)
-  }
-
   /**
    * Returns an expression that can be used to deserialize a Spark SQL representation to an object
    * of type `T` with a compatible schema. The Spark SQL representation is located at ordinal 0 of
@@ -162,15 +140,9 @@ object ScalaReflection extends ScalaReflection {
     val Schema(dataType, nullable) = schemaFor(tpe)
 
     // Assumes we are deserializing the first column of a row.
-    val input = upCastToExpectedType(
-      GetColumnByOrdinal(0, dataType), dataType, walkedTypePath)
-
-    val expr = deserializerFor(tpe, input, walkedTypePath)
-    if (nullable) {
-      expr
-    } else {
-      AssertNotNull(expr, walkedTypePath)
-    }
+    deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType,
+      nullable = nullable, walkedTypePath,
+      (casted, typePath) => deserializerFor(tpe, casted, typePath))
   }
 
   /**
@@ -185,22 +157,6 @@ object ScalaReflection extends ScalaReflection {
       tpe: `Type`,
       path: Expression,
       walkedTypePath: Seq[String]): Expression = cleanUpReflectionObjects {
-
-    /** Returns the current path with a sub-field extracted. */
-    def addToPath(part: String, dataType: DataType, walkedTypePath: Seq[String]): Expression = {
-      val newPath = UnresolvedExtractValue(path, expressions.Literal(part))
-      upCastToExpectedType(newPath, dataType, walkedTypePath)
-    }
-
-    /** Returns the current path with a field at ordinal extracted. */
-    def addToPathOrdinal(
-        ordinal: Int,
-        dataType: DataType,
-        walkedTypePath: Seq[String]): Expression = {
-      val newPath = GetStructField(path, ordinal)
-      upCastToExpectedType(newPath, dataType, walkedTypePath)
-    }
-
     tpe.dealias match {
       case t if !dataTypeFor(t).isInstanceOf[ObjectType] => path
 
@@ -211,73 +167,53 @@ object ScalaReflection extends ScalaReflection {
         WrapOption(deserializerFor(optType, path, newTypePath), dataTypeFor(optType))
 
       case t if t <:< localTypeOf[java.lang.Integer] =>
-        val boxedType = classOf[java.lang.Integer]
-        val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", path :: Nil, returnNullable = false)
+        createDeserializerForTypesSupportValueOf(path,
+          classOf[java.lang.Integer])
 
       case t if t <:< localTypeOf[java.lang.Long] =>
-        val boxedType = classOf[java.lang.Long]
-        val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", path :: Nil, returnNullable = false)
+        createDeserializerForTypesSupportValueOf(path,
+          classOf[java.lang.Long])
 
       case t if t <:< localTypeOf[java.lang.Double] =>
-        val boxedType = classOf[java.lang.Double]
-        val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", path :: Nil, returnNullable = false)
+        createDeserializerForTypesSupportValueOf(path,
+          classOf[java.lang.Double])
 
       case t if t <:< localTypeOf[java.lang.Float] =>
-        val boxedType = classOf[java.lang.Float]
-        val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", path :: Nil, returnNullable = false)
+        createDeserializerForTypesSupportValueOf(path,
+          classOf[java.lang.Float])
 
       case t if t <:< localTypeOf[java.lang.Short] =>
-        val boxedType = classOf[java.lang.Short]
-        val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", path :: Nil, returnNullable = false)
+        createDeserializerForTypesSupportValueOf(path,
+          classOf[java.lang.Short])
 
       case t if t <:< localTypeOf[java.lang.Byte] =>
-        val boxedType = classOf[java.lang.Byte]
-        val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", path :: Nil, returnNullable = false)
+        createDeserializerForTypesSupportValueOf(path,
+          classOf[java.lang.Byte])
 
       case t if t <:< localTypeOf[java.lang.Boolean] =>
-        val boxedType = classOf[java.lang.Boolean]
-        val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", path :: Nil, returnNullable = false)
+        createDeserializerForTypesSupportValueOf(path,
+          classOf[java.lang.Boolean])
 
       case t if t <:< localTypeOf[java.sql.Date] =>
-        StaticInvoke(
-          DateTimeUtils.getClass,
-          ObjectType(classOf[java.sql.Date]),
-          "toJavaDate",
-          path :: Nil,
-          returnNullable = false)
+        createDeserializerForSqlDate(path)
 
       case t if t <:< localTypeOf[java.sql.Timestamp] =>
-        StaticInvoke(
-          DateTimeUtils.getClass,
-          ObjectType(classOf[java.sql.Timestamp]),
-          "toJavaTimestamp",
-          path :: Nil,
-          returnNullable = false)
+        createDeserializerForSqlTimestamp(path)
 
       case t if t <:< localTypeOf[java.lang.String] =>
-        Invoke(path, "toString", ObjectType(classOf[String]), returnNullable = false)
+        createDeserializerForString(path, returnNullable = false)
 
       case t if t <:< localTypeOf[java.math.BigDecimal] =>
-        Invoke(path, "toJavaBigDecimal", ObjectType(classOf[java.math.BigDecimal]),
-          returnNullable = false)
+        createDeserializerForJavaBigDecimal(path, returnNullable = false)
 
       case t if t <:< localTypeOf[BigDecimal] =>
-        Invoke(path, "toBigDecimal", ObjectType(classOf[BigDecimal]), returnNullable = false)
+        createDeserializerForScalaBigDecimal(path, returnNullable = false)
 
       case t if t <:< localTypeOf[java.math.BigInteger] =>
-        Invoke(path, "toJavaBigInteger", ObjectType(classOf[java.math.BigInteger]),
-          returnNullable = false)
+        createDeserializerForJavaBigInteger(path, returnNullable = false)
 
       case t if t <:< localTypeOf[scala.math.BigInt] =>
-        Invoke(path, "toScalaBigInt", ObjectType(classOf[scala.math.BigInt]),
-          returnNullable = false)
+        createDeserializerForScalaBigInt(path)
 
       case t if t <:< localTypeOf[Array[_]] =>
         val TypeRef(_, _, Seq(elementType)) = t
@@ -287,34 +223,29 @@ object ScalaReflection extends ScalaReflection {
 
         val mapFunction: Expression => Expression = element => {
           // upcast the array element to the data type the encoder expected.
-          val casted = upCastToExpectedType(element, dataType, newTypePath)
-          val converter = deserializerFor(elementType, casted, newTypePath)
-          if (elementNullable) {
-            converter
-          } else {
-            AssertNotNull(converter, newTypePath)
-          }
+          deserializerForWithNullSafetyAndUpcast(
+            element,
+            dataType,
+            nullable = elementNullable,
+            newTypePath,
+            (casted, typePath) => deserializerFor(elementType, casted, typePath))
         }
 
         val arrayData = UnresolvedMapObjects(mapFunction, path)
         val arrayCls = arrayClassFor(elementType)
 
-        if (elementNullable) {
-          Invoke(arrayData, "array", arrayCls, returnNullable = false)
-        } else {
-          val primitiveMethod = elementType match {
-            case t if t <:< definitions.IntTpe => "toIntArray"
-            case t if t <:< definitions.LongTpe => "toLongArray"
-            case t if t <:< definitions.DoubleTpe => "toDoubleArray"
-            case t if t <:< definitions.FloatTpe => "toFloatArray"
-            case t if t <:< definitions.ShortTpe => "toShortArray"
-            case t if t <:< definitions.ByteTpe => "toByteArray"
-            case t if t <:< definitions.BooleanTpe => "toBooleanArray"
-            case other => throw new IllegalStateException("expect primitive array element type " +
-              "but got " + other)
-          }
-          Invoke(arrayData, primitiveMethod, arrayCls, returnNullable = false)
+        val methodName = elementType match {
+          case t if t <:< definitions.IntTpe => "toIntArray"
+          case t if t <:< definitions.LongTpe => "toLongArray"
+          case t if t <:< definitions.DoubleTpe => "toDoubleArray"
+          case t if t <:< definitions.FloatTpe => "toFloatArray"
+          case t if t <:< definitions.ShortTpe => "toShortArray"
+          case t if t <:< definitions.ByteTpe => "toByteArray"
+          case t if t <:< definitions.BooleanTpe => "toBooleanArray"
+          // non-primitive
+          case _ => "array"
         }
+        Invoke(arrayData, methodName, arrayCls, returnNullable = false)
 
       // We serialize a `Set` to Catalyst array. When we deserialize a Catalyst array
       // to a `Set`, if there are duplicated elements, the elements will be de-duplicated.
@@ -326,14 +257,12 @@ object ScalaReflection extends ScalaReflection {
         val newTypePath = s"""- array element class: "$className"""" +: walkedTypePath
 
         val mapFunction: Expression => Expression = element => {
-          // upcast the array element to the data type the encoder expected.
-          val casted = upCastToExpectedType(element, dataType, newTypePath)
-          val converter = deserializerFor(elementType, casted, newTypePath)
-          if (elementNullable) {
-            converter
-          } else {
-            AssertNotNull(converter, newTypePath)
-          }
+          deserializerForWithNullSafetyAndUpcast(
+            element,
+            dataType,
+            nullable = elementNullable,
+            newTypePath,
+            (casted, typePath) => deserializerFor(elementType, casted, typePath))
         }
 
         val companion = t.dealias.typeSymbol.companion.typeSignature
@@ -346,13 +275,18 @@ object ScalaReflection extends ScalaReflection {
         UnresolvedMapObjects(mapFunction, path, Some(cls))
 
       case t if t <:< localTypeOf[Map[_, _]] =>
-        // TODO: add walked type path for map
         val TypeRef(_, _, Seq(keyType, valueType)) = t
 
+        val classNameForKey = getClassNameFromType(keyType)
+        val classNameForValue = getClassNameFromType(valueType)
+
+        val newTypePath = (s"""- map key class: "${classNameForKey}"""" +
+          s""", value class: "${classNameForValue}"""") +: walkedTypePath
+
         UnresolvedCatalystToExternalMap(
           path,
-          p => deserializerFor(keyType, p, walkedTypePath),
-          p => deserializerFor(valueType, p, walkedTypePath),
+          p => deserializerFor(keyType, p, newTypePath),
+          p => deserializerFor(valueType, p, newTypePath),
           mirror.runtimeClass(t.typeSymbol.asClass)
         )
 
@@ -382,25 +316,28 @@ object ScalaReflection extends ScalaReflection {
         val arguments = params.zipWithIndex.map { case ((fieldName, fieldType), i) =>
           val Schema(dataType, nullable) = schemaFor(fieldType)
           val clsName = getClassNameFromType(fieldType)
-          val newTypePath = s"""- field (class: "$clsName", name: "$fieldName")""" +: walkedTypePath
-          // For tuples, we based grab the inner fields by ordinal instead of name.
-          val constructor = if (cls.getName startsWith "scala.Tuple") {
-            deserializerFor(
-              fieldType,
-              addToPathOrdinal(i, dataType, newTypePath),
-              newTypePath)
-          } else {
-            deserializerFor(
-              fieldType,
-              addToPath(fieldName, dataType, newTypePath),
-              newTypePath)
-          }
+          val newTypePath = (s"""- field (class: "$clsName", """ +
+              s"""name: "$fieldName")""") +: walkedTypePath
 
-          if (!nullable) {
-            AssertNotNull(constructor, newTypePath)
-          } else {
-            constructor
-          }
+          // For tuples, we based grab the inner fields by ordinal instead of name.
+          deserializerForWithNullSafety(
+            path,
+            dataType,
+            nullable = nullable,
+            newTypePath,
+            (expr, typePath) => {
+              if (cls.getName startsWith "scala.Tuple") {
+                deserializerFor(
+                  fieldType,
+                  addToPathOrdinal(expr, i, dataType, typePath),
+                  newTypePath)
+              } else {
+                deserializerFor(
+                  fieldType,
+                  addToPath(expr, fieldName, dataType, typePath),
+                  newTypePath)
+              }
+            })
         }
 
         val newInstance = NewInstance(cls, arguments, ObjectType(cls), propagateNull = false)
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
index 8f35abeb579b..49ff522cee8e 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
@@ -20,11 +20,12 @@
 import java.io.Serializable;
 import java.util.*;
 
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.catalyst.expressions.GenericRow;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
 import org.junit.*;
 
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoder;
-import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.test.TestSparkSession;
 
 public class JavaBeanDeserializationSuite implements Serializable {
@@ -115,6 +116,109 @@ public void testBeanWithMapFieldsDeserialization() {
     Assert.assertEquals(records, MAP_RECORDS);
   }
 
+  @Test
+  public void testSpark22000() {
+    List<Row> inputRows = new ArrayList<>();
+    List<RecordSpark22000> expectedRecords = new ArrayList<>();
+
+    for (long idx = 0 ; idx < 5 ; idx++) {
+      Row row = createRecordSpark22000Row(idx);
+      inputRows.add(row);
+      expectedRecords.add(createRecordSpark22000(row));
+    }
+
+    // Here we try to convert the fields, from any types to string.
+    // Before applying SPARK-22000, Spark called toString() against variable which type might
+    // be primitive.
+    // SPARK-22000 it calls String.valueOf() which finally calls toString() but handles boxing
+    // if the type is primitive.
+    Encoder<RecordSpark22000> encoder = Encoders.bean(RecordSpark22000.class);
+
+    StructType schema = new StructType()
+      .add("shortField", DataTypes.ShortType)
+      .add("intField", DataTypes.IntegerType)
+      .add("longField", DataTypes.LongType)
+      .add("floatField", DataTypes.FloatType)
+      .add("doubleField", DataTypes.DoubleType)
+      .add("stringField", DataTypes.StringType)
+      .add("booleanField", DataTypes.BooleanType)
+      .add("timestampField", DataTypes.TimestampType)
+      // explicitly setting nullable = true to make clear the intention
+      .add("nullIntField", DataTypes.IntegerType, true);
+
+    Dataset<Row> dataFrame = spark.createDataFrame(inputRows, schema);
+    Dataset<RecordSpark22000> dataset = dataFrame.as(encoder);
+
+    List<RecordSpark22000> records = dataset.collectAsList();
+
+    Assert.assertEquals(records, records);
+  }
+
+  @Test
+  public void testSpark22000FailToUpcast() {
+    List<Row> inputRows = new ArrayList<>();
+    for (long idx = 0 ; idx < 5 ; idx++) {
+      Row row = createRecordSpark22000FailToUpcastRow(idx);
+      inputRows.add(row);
+    }
+
+    // Here we try to convert the fields, from string type to int, which upcast doesn't help.
+    Encoder<RecordSpark22000FailToUpcast> encoder =
+            Encoders.bean(RecordSpark22000FailToUpcast.class);
+
+    StructType schema = new StructType().add("id", DataTypes.StringType);
+
+    Dataset<Row> dataFrame = spark.createDataFrame(inputRows, schema);
+
+    try {
+      dataFrame.as(encoder).collect();
+      Assert.fail("Expected AnalysisException, but passed.");
+    } catch (Throwable e) {
+      // Here we need to handle weird case: compiler complains AnalysisException never be thrown
+      // in try statement, but it can be thrown actually. Maybe Scala-Java interop issue?
+      if (e instanceof AnalysisException) {
+        Assert.assertTrue(e.getMessage().contains("Cannot up cast "));
+      } else {
+        throw e;
+      }
+    }
+  }
+
+  private static Row createRecordSpark22000Row(Long index) {
+    Object[] values = new Object[] {
+            index.shortValue(),
+            index.intValue(),
+            index,
+            index.floatValue(),
+            index.doubleValue(),
+            String.valueOf(index),
+            index % 2 == 0,
+            new java.sql.Timestamp(System.currentTimeMillis()),
+            null
+    };
+    return new GenericRow(values);
+  }
+
+  private static RecordSpark22000 createRecordSpark22000(Row recordRow) {
+    RecordSpark22000 record = new RecordSpark22000();
+    record.setShortField(String.valueOf(recordRow.getShort(0)));
+    record.setIntField(String.valueOf(recordRow.getInt(1)));
+    record.setLongField(String.valueOf(recordRow.getLong(2)));
+    record.setFloatField(String.valueOf(recordRow.getFloat(3)));
+    record.setDoubleField(String.valueOf(recordRow.getDouble(4)));
+    record.setStringField(recordRow.getString(5));
+    record.setBooleanField(String.valueOf(recordRow.getBoolean(6)));
+    record.setTimestampField(String.valueOf(recordRow.getTimestamp(7).getTime() * 1000));
+    // This would figure out that null value will not become "null".
+    record.setNullIntField(null);
+    return record;
+  }
+
+  private static Row createRecordSpark22000FailToUpcastRow(Long index) {
+    Object[] values = new Object[] { String.valueOf(index) };
+    return new GenericRow(values);
+  }
+
   public static class ArrayRecord {
 
     private int id;
@@ -252,4 +356,142 @@ public String toString() {
       return String.format("[%d,%d]", startTime, endTime);
     }
   }
+
+  public static final class RecordSpark22000 {
+    private String shortField;
+    private String intField;
+    private String longField;
+    private String floatField;
+    private String doubleField;
+    private String stringField;
+    private String booleanField;
+    private String timestampField;
+    private String nullIntField;
+
+    public RecordSpark22000() { }
+
+    public String getShortField() {
+      return shortField;
+    }
+
+    public void setShortField(String shortField) {
+      this.shortField = shortField;
+    }
+
+    public String getIntField() {
+      return intField;
+    }
+
+    public void setIntField(String intField) {
+      this.intField = intField;
+    }
+
+    public String getLongField() {
+      return longField;
+    }
+
+    public void setLongField(String longField) {
+      this.longField = longField;
+    }
+
+    public String getFloatField() {
+      return floatField;
+    }
+
+    public void setFloatField(String floatField) {
+      this.floatField = floatField;
+    }
+
+    public String getDoubleField() {
+      return doubleField;
+    }
+
+    public void setDoubleField(String doubleField) {
+      this.doubleField = doubleField;
+    }
+
+    public String getStringField() {
+      return stringField;
+    }
+
+    public void setStringField(String stringField) {
+      this.stringField = stringField;
+    }
+
+    public String getBooleanField() {
+      return booleanField;
+    }
+
+    public void setBooleanField(String booleanField) {
+      this.booleanField = booleanField;
+    }
+
+    public String getTimestampField() {
+      return timestampField;
+    }
+
+    public void setTimestampField(String timestampField) {
+      this.timestampField = timestampField;
+    }
+
+    public String getNullIntField() {
+      return nullIntField;
+    }
+
+    public void setNullIntField(String nullIntField) {
+      this.nullIntField = nullIntField;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      RecordSpark22000 that = (RecordSpark22000) o;
+      return Objects.equals(shortField, that.shortField) &&
+              Objects.equals(intField, that.intField) &&
+              Objects.equals(longField, that.longField) &&
+              Objects.equals(floatField, that.floatField) &&
+              Objects.equals(doubleField, that.doubleField) &&
+              Objects.equals(stringField, that.stringField) &&
+              Objects.equals(booleanField, that.booleanField) &&
+              Objects.equals(timestampField, that.timestampField) &&
+              Objects.equals(nullIntField, that.nullIntField);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(shortField, intField, longField, floatField, doubleField, stringField,
+              booleanField, timestampField, nullIntField);
+    }
+
+    @Override
+    public String toString() {
+      return com.google.common.base.Objects.toStringHelper(this)
+              .add("shortField", shortField)
+              .add("intField", intField)
+              .add("longField", longField)
+              .add("floatField", floatField)
+              .add("doubleField", doubleField)
+              .add("stringField", stringField)
+              .add("booleanField", booleanField)
+              .add("timestampField", timestampField)
+              .add("nullIntField", nullIntField)
+              .toString();
+    }
+  }
+
+  public static final class RecordSpark22000FailToUpcast {
+    private Integer id;
+
+    public RecordSpark22000FailToUpcast() {
+    }
+
+    public Integer getId() {
+      return id;
+    }
+
+    public void setId(Integer id) {
+      this.id = id;
+    }
+  }
 }

From 95e55720d4ba12e870fc6edaa9d264698fd23560 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 27 Feb 2019 14:38:35 +0800
Subject: [PATCH 0538/1072] [SPARK-26990][SQL] FileIndex: use user specified
 field names if possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

WIth the following file structure:
```
/tmp/data
└── a=5
```

In the previous release:
```
scala> spark.read.schema("A int, ID long").parquet("/tmp/data/").printSchema
root
 |-- ID: long (nullable = true)
 |-- A: integer (nullable = true)
```

While in current code:
```
scala> spark.read.schema("A int, ID long").parquet("/tmp/data/").printSchema
root
 |-- ID: long (nullable = true)
 |-- a: integer (nullable = true)
```

We can see that the partition column name `a` is different from `A` as user specifed. This PR is to fix the case and make it more user-friendly.

## How was this patch tested?

Unit test

Closes #23894 from gengliangwang/fileIndexSchema.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/PartitioningUtils.scala | 11 ++++++++++-
 .../execution/datasources/FileIndexSuite.scala    | 15 +++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index a2e08180cc50..0625cfb772da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -122,6 +122,13 @@ object PartitioningUtils {
       Map.empty[String, DataType]
     }
 
+    // SPARK-26990: use user specified field names if case insensitive.
+    val userSpecifiedNames = if (userSpecifiedSchema.isDefined && !caseSensitive) {
+      CaseInsensitiveMap(userSpecifiedSchema.get.fields.map(f => f.name -> f.name).toMap)
+    } else {
+      Map.empty[String, String]
+    }
+
     val dateFormatter = DateFormatter()
     val timestampFormatter = TimestampFormatter(timestampPartitionPattern, timeZone)
     // First, we need to parse every partition's path and see if we can find partition values.
@@ -170,7 +177,9 @@ object PartitioningUtils {
         columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
           // We always assume partition columns are nullable since we've no idea whether null values
           // will be appended in the future.
-          StructField(name, userSpecifiedDataTypes.getOrElse(name, dataType), nullable = true)
+          val resultName = userSpecifiedNames.getOrElse(name, name)
+          val resultDataType = userSpecifiedDataTypes.getOrElse(name, dataType)
+          StructField(resultName, resultDataType, nullable = true)
         }
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 6bd0a2591fc1..e0a364121834 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -65,6 +65,21 @@ class FileIndexSuite extends SharedSQLContext {
     }
   }
 
+  test("SPARK-26990: use user specified field names if possible") {
+    withTempDir { dir =>
+      val partitionDirectory = new File(dir, "a=foo")
+      partitionDirectory.mkdir()
+      val file = new File(partitionDirectory, "text.txt")
+      stringToFile(file, "text")
+      val path = new Path(dir.getCanonicalPath)
+      val schema = StructType(Seq(StructField("A", StringType, false)))
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, Some(schema))
+        assert(fileIndex.partitionSchema.length == 1 && fileIndex.partitionSchema.head.name == "A")
+      }
+    }
+  }
+
   test("SPARK-26230: if case sensitive, validate partitions with original column names") {
     withTempDir { dir =>
       val partitionDirectory = new File(dir, "a=1")

From b0450d07bd5a77a519a662351ca5b5d562e61a58 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Wed, 27 Feb 2019 21:05:19 +0800
Subject: [PATCH 0539/1072] [SPARK-26902][SQL] Support java.time.Instant as an
 external type of TimestampType

## What changes were proposed in this pull request?

In the PR, I propose to add new Catalyst type converter for `TimestampType`. It should be able to convert `java.time.Instant` to/from `TimestampType`.

Main motivations for the changes:
- Smoothly support Java 8 time API
- Avoid inconsistency of calendars used inside of Spark 3.0 (Proleptic Gregorian calendar) and `java.sql.Timestamp` (hybrid calendar - Julian + Gregorian).
- Make conversion independent from current system timezone.

By default, Spark converts values of `TimestampType` to `java.sql.Timestamp` instances but the SQL config `spark.sql.catalyst.timestampType` can change the behavior. It accepts two values `Timestamp` (default) and `Instant`. If the former one is set, Spark returns `java.time.Instant` instances for timestamp values.

## How was this patch tested?

Added new testes to `CatalystTypeConvertersSuite` to check conversion of `TimestampType` to/from `java.time.Instant`.

Closes #23811 from MaxGekk/timestamp-instant.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/CatalystTypeConverters.scala | 14 +++++++
 .../catalyst/DeserializerBuildHelper.scala    |  9 +++++
 .../sql/catalyst/JavaTypeInference.scala      |  3 ++
 .../spark/sql/catalyst/ScalaReflection.scala  | 12 ++++++
 .../sql/catalyst/encoders/RowEncoder.scala    | 22 ++++++++++-
 .../sql/catalyst/util/DateTimeUtils.scala     |  6 +++
 .../catalyst/util/TimestampFormatter.scala    |  5 +--
 .../apache/spark/sql/internal/SQLConf.scala   |  8 ++++
 .../CatalystTypeConvertersSuite.scala         | 38 ++++++++++++++++++-
 .../catalyst/encoders/RowEncoderSuite.scala   | 15 +++++++-
 .../scala/org/apache/spark/sql/UDFSuite.scala | 11 ++++++
 11 files changed, 135 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 93df73ab1eaf..a20625b5d5f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -21,6 +21,7 @@ import java.lang.{Iterable => JavaIterable}
 import java.math.{BigDecimal => JavaBigDecimal}
 import java.math.{BigInteger => JavaBigInteger}
 import java.sql.{Date, Timestamp}
+import java.time.Instant
 import java.util.{Map => JavaMap}
 import javax.annotation.Nullable
 
@@ -29,6 +30,7 @@ import scala.language.existentials
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -61,6 +63,7 @@ object CatalystTypeConverters {
       case structType: StructType => StructConverter(structType)
       case StringType => StringConverter
       case DateType => DateConverter
+      case TimestampType if SQLConf.get.timestampExternalType == "Instant" => InstantConverter
       case TimestampType => TimestampConverter
       case dt: DecimalType => new DecimalConverter(dt)
       case BooleanType => BooleanConverter
@@ -315,6 +318,16 @@ object CatalystTypeConverters {
       DateTimeUtils.toJavaTimestamp(row.getLong(column))
   }
 
+  private object InstantConverter extends CatalystTypeConverter[Instant, Instant, Any] {
+    override def toCatalystImpl(scalaValue: Instant): Long =
+      DateTimeUtils.instantToMicros(scalaValue)
+    override def toScala(catalystValue: Any): Instant =
+      if (catalystValue == null) null
+      else DateTimeUtils.microsToInstant(catalystValue.asInstanceOf[Long])
+    override def toScalaImpl(row: InternalRow, column: Int): Instant =
+      DateTimeUtils.microsToInstant(row.getLong(column))
+  }
+
   private class DecimalConverter(dataType: DecimalType)
     extends CatalystTypeConverter[Any, JavaBigDecimal, Decimal] {
     override def toCatalystImpl(scalaValue: Any): Decimal = {
@@ -421,6 +434,7 @@ object CatalystTypeConverters {
     case s: String => StringConverter.toCatalyst(s)
     case d: Date => DateConverter.toCatalyst(d)
     case t: Timestamp => TimestampConverter.toCatalyst(t)
+    case i: Instant => InstantConverter.toCatalyst(i)
     case d: BigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d)
     case d: JavaBigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d)
     case seq: Seq[Any] => new GenericArrayData(seq.map(convertToCatalyst).toArray)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
index e71955ab4e75..3a2f38622d00 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
@@ -101,6 +101,15 @@ object DeserializerBuildHelper {
       returnNullable = false)
   }
 
+  def createDeserializerForInstant(path: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      ObjectType(classOf[java.time.Instant]),
+      "microsToInstant",
+      path :: Nil,
+      returnNullable = false)
+  }
+
   def createDeserializerForSqlTimestamp(path: Expression): Expression = {
     StaticInvoke(
       DateTimeUtils.getClass,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index dafa87839ec6..1822f9b036f7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -224,6 +224,9 @@ object JavaTypeInference {
       case c if c == classOf[java.sql.Date] =>
         createDeserializerForSqlDate(path)
 
+      case c if c == classOf[java.time.Instant] =>
+        createDeserializerForInstant(path)
+
       case c if c == classOf[java.sql.Timestamp] =>
         createDeserializerForSqlTimestamp(path)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 741cba80640b..26cc7b4d7ad8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -197,6 +197,9 @@ object ScalaReflection extends ScalaReflection {
       case t if t <:< localTypeOf[java.sql.Date] =>
         createDeserializerForSqlDate(path)
 
+      case t if t <:< localTypeOf[java.time.Instant] =>
+        createDeserializerForInstant(path)
+
       case t if t <:< localTypeOf[java.sql.Timestamp] =>
         createDeserializerForSqlTimestamp(path)
 
@@ -474,6 +477,14 @@ object ScalaReflection extends ScalaReflection {
           inputObject :: Nil,
           returnNullable = false)
 
+      case t if t <:< localTypeOf[java.time.Instant] =>
+        StaticInvoke(
+          DateTimeUtils.getClass,
+          TimestampType,
+          "instantToMicros",
+          inputObject :: Nil,
+          returnNullable = false)
+
       case t if t <:< localTypeOf[java.sql.Timestamp] =>
         StaticInvoke(
           DateTimeUtils.getClass,
@@ -691,6 +702,7 @@ object ScalaReflection extends ScalaReflection {
         val Schema(dataType, nullable) = schemaFor(elementType)
         Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
       case t if t <:< localTypeOf[String] => Schema(StringType, nullable = true)
+      case t if t <:< localTypeOf[java.time.Instant] => Schema(TimestampType, nullable = true)
       case t if t <:< localTypeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true)
       case t if t <:< localTypeOf[java.sql.Date] => Schema(DateType, nullable = true)
       case t if t <:< localTypeOf[BigDecimal] => Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index 8ca3d356f3bd..eba68810790c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -46,7 +47,8 @@ import org.apache.spark.unsafe.types.UTF8String
  *   DecimalType -> java.math.BigDecimal or scala.math.BigDecimal or Decimal
  *
  *   DateType -> java.sql.Date
- *   TimestampType -> java.sql.Timestamp
+ *   TimestampType -> java.sql.Timestamp when spark.sql.catalyst.timestampType is set to Timestamp
+ *   TimestampType -> java.time.Instant when spark.sql.catalyst.timestampType is set to Instant
  *
  *   BinaryType -> byte array
  *   ArrayType -> scala.collection.Seq or Array
@@ -89,6 +91,14 @@ object RowEncoder {
         dataType = ObjectType(udtClass), false)
       Invoke(obj, "serialize", udt, inputObject :: Nil, returnNullable = false)
 
+    case TimestampType if SQLConf.get.timestampExternalType == "Instant" =>
+      StaticInvoke(
+        DateTimeUtils.getClass,
+        TimestampType,
+        "instantToMicros",
+        inputObject :: Nil,
+        returnNullable = false)
+
     case TimestampType =>
       StaticInvoke(
         DateTimeUtils.getClass,
@@ -224,6 +234,8 @@ object RowEncoder {
 
   def externalDataTypeFor(dt: DataType): DataType = dt match {
     case _ if ScalaReflection.isNativeType(dt) => dt
+    case TimestampType if SQLConf.get.timestampExternalType == "Instant" =>
+      ObjectType(classOf[java.time.Instant])
     case TimestampType => ObjectType(classOf[java.sql.Timestamp])
     case DateType => ObjectType(classOf[java.sql.Date])
     case _: DecimalType => ObjectType(classOf[java.math.BigDecimal])
@@ -267,6 +279,14 @@ object RowEncoder {
         dataType = ObjectType(udtClass))
       Invoke(obj, "deserialize", ObjectType(udt.userClass), input :: Nil)
 
+    case TimestampType if SQLConf.get.timestampExternalType == "Instant" =>
+      StaticInvoke(
+        DateTimeUtils.getClass,
+        ObjectType(classOf[java.time.Instant]),
+        "microsToInstant",
+        input :: Nil,
+        returnNullable = false)
+
     case TimestampType =>
       StaticInvoke(
         DateTimeUtils.getClass,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 627670afec9f..742cee6255bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -355,6 +355,12 @@ object DateTimeUtils {
     result
   }
 
+  def microsToInstant(us: Long): Instant = {
+    val secs = Math.floorDiv(us, MICROS_PER_SECOND)
+    val mos = Math.floorMod(us, MICROS_PER_SECOND)
+    Instant.ofEpochSecond(secs, mos * NANOS_PER_MICROS)
+  }
+
   def instantToDays(instant: Instant): Int = {
     val seconds = instant.getEpochSecond
     val days = Math.floorDiv(seconds, SECONDS_PER_DAY)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 4ec61e1ca4a5..c25481586e73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -61,10 +61,7 @@ class Iso8601TimestampFormatter(
   override def parse(s: String): Long = instantToMicros(toInstant(s))
 
   override def format(us: Long): String = {
-    val secs = Math.floorDiv(us, DateTimeUtils.MICROS_PER_SECOND)
-    val mos = Math.floorMod(us, DateTimeUtils.MICROS_PER_SECOND)
-    val instant = Instant.ofEpochSecond(secs, mos * DateTimeUtils.NANOS_PER_MICROS)
-
+    val instant = DateTimeUtils.microsToInstant(us)
     formatter.withZone(timeZone.toZoneId).format(instant)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index e74c2af476ee..2cf471e47d74 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1682,6 +1682,12 @@ object SQLConf {
         "a SparkConf entry.")
       .booleanConf
       .createWithDefault(true)
+
+  val TIMESTAMP_EXTERNAL_TYPE = buildConf("spark.sql.catalyst.timestampType")
+    .doc("Java class to/from which an instance of TimestampType is converted.")
+    .stringConf
+    .checkValues(Set("Timestamp", "Instant"))
+    .createWithDefault("Timestamp")
 }
 
 /**
@@ -1869,6 +1875,8 @@ class SQLConf extends Serializable with Logging {
 
   def fastHashAggregateRowMaxCapacityBit: Int = getConf(FAST_HASH_AGGREGATE_MAX_ROWS_CAPACITY_BIT)
 
+  def timestampExternalType: String = getConf(TIMESTAMP_EXTERNAL_TYPE)
+
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
    * identifiers are equal.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 89452ee05cff..828e8d89977b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.sql.catalyst
 
+import java.time.Instant
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
-import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
-class CatalystTypeConvertersSuite extends SparkFunSuite {
+class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
 
   private val simpleTypes: Seq[DataType] = Seq(
     StringType,
@@ -147,4 +151,34 @@ class CatalystTypeConvertersSuite extends SparkFunSuite {
     val expected = UTF8String.fromString("X")
     assert(converter(chr) === expected)
   }
+
+  test("converting java.time.Instant to TimestampType") {
+    Seq(
+      "0101-02-16T10:11:32Z",
+      "1582-10-02T01:02:03.04Z",
+      "1582-12-31T23:59:59.999999Z",
+      "1970-01-01T00:00:01.123Z",
+      "1972-12-31T23:59:59.123456Z",
+      "2019-02-16T18:12:30Z",
+      "2119-03-16T19:13:31Z").foreach { timestamp =>
+      val input = Instant.parse(timestamp)
+      val result = CatalystTypeConverters.convertToCatalyst(input)
+      val expected = DateTimeUtils.instantToMicros(input)
+      assert(result === expected)
+    }
+  }
+
+  test("converting TimestampType to java.time.Instant") {
+    withSQLConf(SQLConf.TIMESTAMP_EXTERNAL_TYPE.key -> "Instant") {
+      Seq(
+        -9463427405253013L,
+        -244000001L,
+        0L,
+        99628200102030L,
+        1543749753123456L).foreach { us =>
+        val instant = DateTimeUtils.microsToInstant(us)
+        assert(CatalystTypeConverters.createToScalaConverter(TimestampType)(us) === instant)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index ab819bec72e8..691056db6e7c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -21,7 +21,8 @@ import scala.util.Random
 
 import org.apache.spark.sql.{RandomDataGenerator, Row}
 import org.apache.spark.sql.catalyst.plans.CodegenInterpretedPlanTest
-import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 @SQLUserDefinedType(udt = classOf[ExamplePointUDT])
@@ -281,6 +282,18 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
     assert(encoder.serializer(0).dataType == pythonUDT.sqlType)
   }
 
+  test("encoding/decoding TimestampType to/from java.time.Instant") {
+    withSQLConf(SQLConf.TIMESTAMP_EXTERNAL_TYPE.key -> "Instant") {
+      val schema = new StructType().add("t", TimestampType)
+      val encoder = RowEncoder(schema).resolveAndBind()
+      val instant = java.time.Instant.parse("2019-02-26T16:56:00Z")
+      val row = encoder.toRow(Row(instant))
+      assert(row.getLong(0) === DateTimeUtils.instantToMicros(instant))
+      val readback = encoder.fromRow(row)
+      assert(readback.get(0) === instant)
+    }
+  }
+
   for {
     elementType <- Seq(IntegerType, StringType)
     containsNull <- Seq(true, false)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index e515800e6898..0f5f0efb0fe7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, ExplainCommand}
 import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
 import org.apache.spark.sql.functions.{lit, udf}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData._
 import org.apache.spark.sql.types._
@@ -493,4 +494,14 @@ class UDFSuite extends QueryTest with SharedSQLContext {
       checkAnswer(df, Seq(Row(1L), Row(2L)))
     }
   }
+
+  test("Using java.time.Instant in UDF") {
+    withSQLConf(SQLConf.TIMESTAMP_EXTERNAL_TYPE.key -> "Instant") {
+      val expected = java.time.Instant.parse("2019-02-27T00:00:00Z")
+      val plusSec = udf((i: java.time.Instant) => i.plusSeconds(1))
+      val df = spark.sql("SELECT TIMESTAMP '2019-02-26 23:59:59Z' as t")
+        .select(plusSec('t))
+      assert(df.collect().toSeq === Seq(Row(expected)))
+    }
+  }
 }

From 7912dbb88fe0a2dcf2aad61c73d26ae7254ca644 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Wed, 27 Feb 2019 08:38:00 -0600
Subject: [PATCH 0540/1072] [MINOR] Simplify boolean expression

## What changes were proposed in this pull request?

Comparing whether Boolean expression is equal to true is redundant
For example:
The datatype of `a` is boolean.
Before:
if (a == true)
After:
if (a)

## How was this patch tested?
N/A

Closes #23884 from 10110346/simplifyboolean.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/rpc/netty/Outbox.scala   |   2 +-
 .../org/apache/spark/CheckpointSuite.scala    |   6 +-
 .../org/apache/spark/DistributedSuite.scala   |   4 +-
 .../scala/org/apache/spark/FileSuite.scala    |  20 +--
 .../org/apache/spark/SSLOptionsSuite.scala    |  22 +--
 .../apache/spark/SecurityManagerSuite.scala   | 136 +++++++++---------
 .../org/apache/spark/SparkConfSuite.scala     |   2 +-
 .../apache/spark/SparkContextInfoSuite.scala  |   4 +-
 .../org/apache/spark/UnpersistSuite.scala     |   6 +-
 .../apache/spark/rdd/RDDBarrierSuite.scala    |   4 +-
 .../scala/org/apache/spark/rdd/RDDSuite.scala |   6 +-
 .../org/apache/spark/rpc/RpcEnvSuite.scala    |   4 +-
 .../CoarseGrainedSchedulerBackendSuite.scala  |   2 +-
 .../spark/scheduler/DAGSchedulerSuite.scala   |   4 +-
 .../spark/scheduler/TaskContextSuite.scala    |   2 +-
 .../spark/scheduler/TaskSetManagerSuite.scala |   2 +-
 .../spark/status/AppStatusListenerSuite.scala |   2 +-
 .../spark/storage/BlockManagerSuite.scala     |   2 +-
 .../util/BoundedPriorityQueueSuite.scala      |   4 +-
 .../apache/spark/util/ThreadUtilsSuite.scala  |   4 +-
 .../util/UninterruptibleThreadSuite.scala     |   6 +-
 .../org/apache/spark/util/UtilsSuite.scala    |   8 +-
 .../util/collection/AppendOnlyMapSuite.scala  |   4 +-
 .../kinesis/KinesisStreamSuite.scala          |   2 +-
 .../spark/ml/feature/OneHotEncoderSuite.scala |   4 +-
 .../k8s/KubernetesVolumeUtilsSuite.scala      |   8 +-
 .../BasicExecutorFeatureStepSuite.scala       |   2 +-
 .../MountVolumesFeatureStepSuite.scala        |   2 +-
 .../YarnAllocatorBlacklistTrackerSuite.scala  |   2 +-
 .../catalyst/plans/logical/LogicalPlan.scala  |   2 +-
 .../analysis/DecimalPrecisionSuite.scala      |  14 +-
 .../encoders/ExpressionEncoderSuite.scala     |   8 +-
 .../expressions/AttributeSetSuite.scala       |  10 +-
 .../sql/catalyst/expressions/CastSuite.scala  |  70 ++++-----
 .../CollectionExpressionsSuite.scala          |  18 +--
 .../expressions/ComplexTypeSuite.scala        |   6 +-
 .../ConditionalExpressionSuite.scala          |  24 ++--
 .../expressions/DateExpressionsSuite.scala    |   6 +-
 .../expressions/StringExpressionsSuite.scala  |   7 +-
 .../CodegenExpressionCachingSuite.scala       |   6 +-
 .../expressions/xml/UDFXPathUtilSuite.scala   |   2 +-
 .../sql/catalyst/plans/LogicalPlanSuite.scala |   6 +-
 .../sql/catalyst/util/MetadataSuite.scala     |  24 ++--
 .../DataTypeWriteCompatibilitySuite.scala     |   2 +-
 .../spark/sql/types/MetadataSuite.scala       |   2 +-
 .../state/HDFSBackedStateStoreProvider.scala  |   2 +-
 .../spark/sql/DatasetAggregatorSuite.scala    |   4 +-
 .../org/apache/spark/sql/DatasetSuite.scala   |  30 ++--
 .../spark/sql/execution/PlannerSuite.scala    |   2 +-
 .../ProcessingTimeExecutorSuite.scala         |   2 +-
 .../state/StateStoreCoordinatorSuite.scala    |  14 +-
 .../vectorized/ColumnVectorSuite.scala        |  14 +-
 .../vectorized/ColumnarBatchSuite.scala       |   8 +-
 .../sql/internal/SQLConfEntrySuite.scala      |   6 +-
 .../spark/sql/internal/SQLConfSuite.scala     |   4 +-
 .../FlatMapGroupsWithStateSuite.scala         |   4 +-
 .../StreamingQueryListenerSuite.scala         |   8 +-
 .../sql/streaming/StreamingQuerySuite.scala   |  20 +--
 .../sources/StreamingDataSourceV2Suite.scala  |   4 +-
 .../spark/sql/hive/client/VersionsSuite.scala |   8 +-
 .../spark/streaming/CheckpointSuite.scala     |   4 +-
 .../spark/streaming/StateMapSuite.scala       |   2 +-
 .../streaming/StreamingContextSuite.scala     |   6 +-
 .../streaming/rdd/MapWithStateRDDSuite.scala  |   6 +-
 .../receiver/BlockGeneratorSuite.scala        |  16 +--
 .../ExecutorAllocationManagerSuite.scala      |   2 +-
 .../streaming/util/WriteAheadLogSuite.scala   |   2 +-
 67 files changed, 325 insertions(+), 326 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
index b7e068aa6835..3db63934813a 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
@@ -166,7 +166,7 @@ private[netty] class Outbox(nettyEnv: NettyRpcEnv, val address: RpcAddress) {
         if (_client != null) {
           message.sendWith(_client)
         } else {
-          assert(stopped == true)
+          assert(stopped)
         }
       } catch {
         case NonFatal(e) =>
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index 0e019e473a88..3a43f1a033da 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -511,8 +511,8 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS
     assert(rdd.isCheckpointed === false)
     assert(rdd.isCheckpointedAndMaterialized === false)
     assert(rdd.count() === 0)
-    assert(rdd.isCheckpointed === true)
-    assert(rdd.isCheckpointedAndMaterialized === true)
+    assert(rdd.isCheckpointed)
+    assert(rdd.isCheckpointedAndMaterialized)
     assert(rdd.partitions.size === 0)
   }
 
@@ -531,7 +531,7 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS
       checkpoint(rdd2, reliableCheckpoint)
       rdd2.count()
       assert(rdd1.isCheckpointed === checkpointAllMarkedAncestors)
-      assert(rdd2.isCheckpointed === true)
+      assert(rdd2.isCheckpointed)
     } finally {
       sc.setLocalProperty(RDD.CHECKPOINT_ALL_MARKED_ANCESTORS, null)
     }
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index c0c889939bd0..aad20545bafb 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -322,9 +322,9 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     val data = sc.parallelize(Seq(true, false, false, false), 4)
     data.persist(StorageLevel.MEMORY_ONLY_2)
     data.count
-    assert(sc.persistentRdds.isEmpty === false)
+    assert(sc.persistentRdds.nonEmpty)
     data.unpersist(blocking = true)
-    assert(sc.persistentRdds.isEmpty === true)
+    assert(sc.persistentRdds.isEmpty)
 
     failAfter(Span(3000, Millis)) {
       try {
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index c540d7baea98..766cb0a23fc1 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -388,7 +388,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     sc = new SparkContext("local", "test")
     val randomRDD = sc.parallelize(Array((1, "a"), (1, "a"), (2, "b"), (3, "c")), 1)
     randomRDD.saveAsTextFile(tempDir.getPath + "/output")
-    assert(new File(tempDir.getPath + "/output/part-00000").exists() === true)
+    assert(new File(tempDir.getPath + "/output/part-00000").exists())
     intercept[FileAlreadyExistsException] {
       randomRDD.saveAsTextFile(tempDir.getPath + "/output")
     }
@@ -400,9 +400,9 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     sc = new SparkContext(conf)
     val randomRDD = sc.parallelize(Array((1, "a"), (1, "a"), (2, "b"), (3, "c")), 1)
     randomRDD.saveAsTextFile(tempDir.getPath + "/output")
-    assert(new File(tempDir.getPath + "/output/part-00000").exists() === true)
+    assert(new File(tempDir.getPath + "/output/part-00000").exists())
     randomRDD.saveAsTextFile(tempDir.getPath + "/output")
-    assert(new File(tempDir.getPath + "/output/part-00000").exists() === true)
+    assert(new File(tempDir.getPath + "/output/part-00000").exists())
   }
 
   test ("prevent user from overwriting the empty directory (new Hadoop API)") {
@@ -420,7 +420,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
       Array(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
     randomRDD.saveAsNewAPIHadoopFile[NewTextOutputFormat[String, String]](
       tempDir.getPath + "/output")
-    assert(new File(tempDir.getPath + "/output/part-r-00000").exists() === true)
+    assert(new File(tempDir.getPath + "/output/part-r-00000").exists())
     intercept[FileAlreadyExistsException] {
       randomRDD.saveAsNewAPIHadoopFile[NewTextOutputFormat[String, String]](tempDir.getPath)
     }
@@ -434,10 +434,10 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
       Array(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
     randomRDD.saveAsNewAPIHadoopFile[NewTextOutputFormat[String, String]](
       tempDir.getPath + "/output")
-    assert(new File(tempDir.getPath + "/output/part-r-00000").exists() === true)
+    assert(new File(tempDir.getPath + "/output/part-r-00000").exists())
     randomRDD.saveAsNewAPIHadoopFile[NewTextOutputFormat[String, String]](
       tempDir.getPath + "/output")
-    assert(new File(tempDir.getPath + "/output/part-r-00000").exists() === true)
+    assert(new File(tempDir.getPath + "/output/part-r-00000").exists())
   }
 
   test ("save Hadoop Dataset through old Hadoop API") {
@@ -450,7 +450,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     job.set("mapred.output.format.class", classOf[TextOutputFormat[String, String]].getName)
     job.set("mapreduce.output.fileoutputformat.outputdir", tempDir.getPath + "/outputDataset_old")
     randomRDD.saveAsHadoopDataset(job)
-    assert(new File(tempDir.getPath + "/outputDataset_old/part-00000").exists() === true)
+    assert(new File(tempDir.getPath + "/outputDataset_old/part-00000").exists())
   }
 
   test ("save Hadoop Dataset through new Hadoop API") {
@@ -465,7 +465,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     jobConfig.set("mapreduce.output.fileoutputformat.outputdir",
       tempDir.getPath + "/outputDataset_new")
     randomRDD.saveAsNewAPIHadoopDataset(jobConfig)
-    assert(new File(tempDir.getPath + "/outputDataset_new/part-r-00000").exists() === true)
+    assert(new File(tempDir.getPath + "/outputDataset_new/part-r-00000").exists())
   }
 
   test("Get input files via old Hadoop API") {
@@ -572,7 +572,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
       sc.parallelize(data, actualPartitionNum)
         .saveAsHadoopFile[TextOutputFormat[String, String]](output.getPath)
       for (i <- 0 until actualPartitionNum) {
-        assert(new File(output, s"part-0000$i").exists() === true)
+        assert(new File(output, s"part-0000$i").exists())
       }
       val hadoopRDD = sc.textFile(new File(output, "part-*").getPath)
       assert(hadoopRDD.partitions.length === expectedPartitionNum)
@@ -613,7 +613,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
       sc.parallelize(data, actualPartitionNum)
         .saveAsNewAPIHadoopFile[NewTextOutputFormat[String, String]](output.getPath)
       for (i <- 0 until actualPartitionNum) {
-        assert(new File(output, s"part-r-0000$i").exists() === true)
+        assert(new File(output, s"part-r-0000$i").exists())
       }
       val hadoopRDD = sc.newAPIHadoopFile(new File(output, "part-r-*").getPath,
         classOf[NewTextInputFormat], classOf[LongWritable], classOf[Text])
diff --git a/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala b/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
index 5dbfc5c10a6f..57d33971a57c 100644
--- a/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
@@ -55,11 +55,11 @@ class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     val opts = SSLOptions.parse(conf, hadoopConf, "spark.ssl")
 
-    assert(opts.enabled === true)
-    assert(opts.trustStore.isDefined === true)
+    assert(opts.enabled)
+    assert(opts.trustStore.isDefined)
     assert(opts.trustStore.get.getName === "truststore")
     assert(opts.trustStore.get.getAbsolutePath === trustStorePath)
-    assert(opts.keyStore.isDefined === true)
+    assert(opts.keyStore.isDefined)
     assert(opts.keyStore.get.getName === "keystore")
     assert(opts.keyStore.get.getAbsolutePath === keyStorePath)
     assert(opts.trustStorePassword === Some("password"))
@@ -88,11 +88,11 @@ class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
     val defaultOpts = SSLOptions.parse(conf, hadoopConf, "spark.ssl", defaults = None)
     val opts = SSLOptions.parse(conf, hadoopConf, "spark.ssl.ui", defaults = Some(defaultOpts))
 
-    assert(opts.enabled === true)
-    assert(opts.trustStore.isDefined === true)
+    assert(opts.enabled)
+    assert(opts.trustStore.isDefined)
     assert(opts.trustStore.get.getName === "truststore")
     assert(opts.trustStore.get.getAbsolutePath === trustStorePath)
-    assert(opts.keyStore.isDefined === true)
+    assert(opts.keyStore.isDefined)
     assert(opts.keyStore.get.getName === "keystore")
     assert(opts.keyStore.get.getAbsolutePath === keyStorePath)
     assert(opts.trustStorePassword === Some("password"))
@@ -128,10 +128,10 @@ class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     assert(opts.enabled === false)
     assert(opts.port === Some(4242))
-    assert(opts.trustStore.isDefined === true)
+    assert(opts.trustStore.isDefined)
     assert(opts.trustStore.get.getName === "truststore")
     assert(opts.trustStore.get.getAbsolutePath === trustStorePath)
-    assert(opts.keyStore.isDefined === true)
+    assert(opts.keyStore.isDefined)
     assert(opts.keyStore.get.getName === "keystore")
     assert(opts.keyStore.get.getAbsolutePath === keyStorePath)
     assert(opts.trustStorePassword === Some("password"))
@@ -179,11 +179,11 @@ class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
     val defaultOpts = SSLOptions.parse(conf, hadoopConf, "spark.ssl", defaults = None)
     val opts = SSLOptions.parse(conf, hadoopConf, "spark.ssl.ui", defaults = Some(defaultOpts))
 
-    assert(opts.enabled === true)
-    assert(opts.trustStore.isDefined === true)
+    assert(opts.enabled)
+    assert(opts.trustStore.isDefined)
     assert(opts.trustStore.get.getName === "truststore")
     assert(opts.trustStore.get.getAbsolutePath === trustStorePath)
-    assert(opts.keyStore.isDefined === true)
+    assert(opts.keyStore.isDefined)
     assert(opts.keyStore.get.getName === "keystore")
     assert(opts.keyStore.get.getAbsolutePath === keyStorePath)
     assert(opts.trustStorePassword === Some("password"))
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index 9f0d2ac82ead..20421456cefb 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -49,10 +49,10 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     conf.set(ACLS_ENABLE, true)
     conf.set(UI_VIEW_ACLS, Seq("user1", "user2"))
     val securityManager = new SecurityManager(conf)
-    assert(securityManager.isAuthenticationEnabled() === true)
-    assert(securityManager.aclsEnabled() === true)
-    assert(securityManager.checkUIViewPermissions("user1") === true)
-    assert(securityManager.checkUIViewPermissions("user2") === true)
+    assert(securityManager.isAuthenticationEnabled())
+    assert(securityManager.aclsEnabled())
+    assert(securityManager.checkUIViewPermissions("user1"))
+    assert(securityManager.checkUIViewPermissions("user2"))
     assert(securityManager.checkUIViewPermissions("user3") === false)
   }
 
@@ -78,8 +78,8 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
     val securityManager2 = new SecurityManager(conf2)
     // group4,group5 do not match
-    assert(securityManager2.checkUIViewPermissions("user1") === true)
-    assert(securityManager2.checkUIViewPermissions("user2") === true)
+    assert(securityManager2.checkUIViewPermissions("user1"))
+    assert(securityManager2.checkUIViewPermissions("user2"))
 
     val conf3 = new SparkConf
     conf3.set(NETWORK_AUTH_ENABLED, true)
@@ -100,22 +100,22 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     conf.set(UI_VIEW_ACLS, Seq("user1", "user2"))
     val securityManager = new SecurityManager(conf);
     securityManager.setAcls(true)
-    assert(securityManager.aclsEnabled() === true)
+    assert(securityManager.aclsEnabled())
     securityManager.setAcls(false)
     assert(securityManager.aclsEnabled() === false)
 
     // acls are off so doesn't matter what view acls set to
-    assert(securityManager.checkUIViewPermissions("user4") === true)
+    assert(securityManager.checkUIViewPermissions("user4"))
 
     securityManager.setAcls(true)
-    assert(securityManager.aclsEnabled() === true)
+    assert(securityManager.aclsEnabled())
     securityManager.setViewAcls(Set[String]("user5"), Seq("user6", "user7"))
     assert(securityManager.checkUIViewPermissions("user1") === false)
-    assert(securityManager.checkUIViewPermissions("user5") === true)
-    assert(securityManager.checkUIViewPermissions("user6") === true)
-    assert(securityManager.checkUIViewPermissions("user7") === true)
+    assert(securityManager.checkUIViewPermissions("user5"))
+    assert(securityManager.checkUIViewPermissions("user6"))
+    assert(securityManager.checkUIViewPermissions("user7"))
     assert(securityManager.checkUIViewPermissions("user8") === false)
-    assert(securityManager.checkUIViewPermissions(null) === true)
+    assert(securityManager.checkUIViewPermissions(null))
   }
 
   test("set security with api for groups") {
@@ -127,8 +127,8 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     securityManager.setViewAclsGroups(Seq("group1", "group2"))
 
     // group1,group2 match
-    assert(securityManager.checkUIViewPermissions("user1") === true)
-    assert(securityManager.checkUIViewPermissions("user2") === true)
+    assert(securityManager.checkUIViewPermissions("user1"))
+    assert(securityManager.checkUIViewPermissions("user2"))
 
     // change groups so they do not match
     securityManager.setViewAclsGroups(Seq("group4", "group5"))
@@ -158,22 +158,22 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
     val securityManager = new SecurityManager(conf);
     securityManager.setAcls(true)
-    assert(securityManager.aclsEnabled() === true)
+    assert(securityManager.aclsEnabled())
     securityManager.setAcls(false)
     assert(securityManager.aclsEnabled() === false)
 
     // acls are off so doesn't matter what view acls set to
-    assert(securityManager.checkModifyPermissions("user4") === true)
+    assert(securityManager.checkModifyPermissions("user4"))
 
     securityManager.setAcls(true)
-    assert(securityManager.aclsEnabled() === true)
+    assert(securityManager.aclsEnabled())
     securityManager.setModifyAcls(Set("user5"), Seq("user6", "user7"))
     assert(securityManager.checkModifyPermissions("user1") === false)
-    assert(securityManager.checkModifyPermissions("user5") === true)
-    assert(securityManager.checkModifyPermissions("user6") === true)
-    assert(securityManager.checkModifyPermissions("user7") === true)
+    assert(securityManager.checkModifyPermissions("user5"))
+    assert(securityManager.checkModifyPermissions("user6"))
+    assert(securityManager.checkModifyPermissions("user7"))
     assert(securityManager.checkModifyPermissions("user8") === false)
-    assert(securityManager.checkModifyPermissions(null) === true)
+    assert(securityManager.checkModifyPermissions(null))
   }
 
   test("set security modify acls for groups") {
@@ -185,8 +185,8 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     securityManager.setModifyAclsGroups(Seq("group1", "group2"))
 
     // group1,group2 match
-    assert(securityManager.checkModifyPermissions("user1") === true)
-    assert(securityManager.checkModifyPermissions("user2") === true)
+    assert(securityManager.checkModifyPermissions("user1"))
+    assert(securityManager.checkModifyPermissions("user2"))
 
     // change groups so they do not match
     securityManager.setModifyAclsGroups(Seq("group4", "group5"))
@@ -196,8 +196,8 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     // change so they match again
     securityManager.setModifyAclsGroups(Seq("group2", "group3"))
 
-    assert(securityManager.checkModifyPermissions("user1") === true)
-    assert(securityManager.checkModifyPermissions("user2") === true)
+    assert(securityManager.checkModifyPermissions("user1"))
+    assert(securityManager.checkModifyPermissions("user2"))
   }
 
   test("set security admin acls") {
@@ -208,36 +208,36 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
     val securityManager = new SecurityManager(conf)
     securityManager.setAcls(true)
-    assert(securityManager.aclsEnabled() === true)
+    assert(securityManager.aclsEnabled())
 
-    assert(securityManager.checkModifyPermissions("user1") === true)
-    assert(securityManager.checkModifyPermissions("user2") === true)
-    assert(securityManager.checkModifyPermissions("user4") === true)
+    assert(securityManager.checkModifyPermissions("user1"))
+    assert(securityManager.checkModifyPermissions("user2"))
+    assert(securityManager.checkModifyPermissions("user4"))
     assert(securityManager.checkModifyPermissions("user3") === false)
     assert(securityManager.checkModifyPermissions("user5") === false)
-    assert(securityManager.checkModifyPermissions(null) === true)
-    assert(securityManager.checkUIViewPermissions("user1") === true)
-    assert(securityManager.checkUIViewPermissions("user2") === true)
-    assert(securityManager.checkUIViewPermissions("user3") === true)
+    assert(securityManager.checkModifyPermissions(null))
+    assert(securityManager.checkUIViewPermissions("user1"))
+    assert(securityManager.checkUIViewPermissions("user2"))
+    assert(securityManager.checkUIViewPermissions("user3"))
     assert(securityManager.checkUIViewPermissions("user4") === false)
     assert(securityManager.checkUIViewPermissions("user5") === false)
-    assert(securityManager.checkUIViewPermissions(null) === true)
+    assert(securityManager.checkUIViewPermissions(null))
 
     securityManager.setAdminAcls(Seq("user6"))
     securityManager.setViewAcls(Set[String]("user8"), Seq("user9"))
     securityManager.setModifyAcls(Set("user11"), Seq("user9"))
-    assert(securityManager.checkModifyPermissions("user6") === true)
-    assert(securityManager.checkModifyPermissions("user11") === true)
-    assert(securityManager.checkModifyPermissions("user9") === true)
+    assert(securityManager.checkModifyPermissions("user6"))
+    assert(securityManager.checkModifyPermissions("user11"))
+    assert(securityManager.checkModifyPermissions("user9"))
     assert(securityManager.checkModifyPermissions("user1") === false)
     assert(securityManager.checkModifyPermissions("user4") === false)
-    assert(securityManager.checkModifyPermissions(null) === true)
-    assert(securityManager.checkUIViewPermissions("user6") === true)
-    assert(securityManager.checkUIViewPermissions("user8") === true)
-    assert(securityManager.checkUIViewPermissions("user9") === true)
+    assert(securityManager.checkModifyPermissions(null))
+    assert(securityManager.checkUIViewPermissions("user6"))
+    assert(securityManager.checkUIViewPermissions("user8"))
+    assert(securityManager.checkUIViewPermissions("user9"))
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user3") === false)
-    assert(securityManager.checkUIViewPermissions(null) === true)
+    assert(securityManager.checkUIViewPermissions(null))
   }
 
   test("set security admin acls for groups") {
@@ -249,11 +249,11 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
     val securityManager = new SecurityManager(conf)
     securityManager.setAcls(true)
-    assert(securityManager.aclsEnabled() === true)
+    assert(securityManager.aclsEnabled())
 
     // group1,group2,group3 match
-    assert(securityManager.checkModifyPermissions("user1") === true)
-    assert(securityManager.checkUIViewPermissions("user1") === true)
+    assert(securityManager.checkModifyPermissions("user1"))
+    assert(securityManager.checkUIViewPermissions("user1"))
 
     // change admin groups so they do not match. view and modify groups are set to admin groups
     securityManager.setAdminAclsGroups(Seq("group4", "group5"))
@@ -266,14 +266,14 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
     // change modify groups so they match
     securityManager.setModifyAclsGroups(Seq("group3"))
-    assert(securityManager.checkModifyPermissions("user1") === true)
+    assert(securityManager.checkModifyPermissions("user1"))
     assert(securityManager.checkUIViewPermissions("user1") === false)
 
     // change view groups so they match
     securityManager.setViewAclsGroups(Seq("group2"))
     securityManager.setModifyAclsGroups(Seq("group4"))
     assert(securityManager.checkModifyPermissions("user1") === false)
-    assert(securityManager.checkUIViewPermissions("user1") === true)
+    assert(securityManager.checkUIViewPermissions("user1"))
 
     // change modify and view groups so they do not match
     securityManager.setViewAclsGroups(Seq("group7"))
@@ -290,20 +290,20 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     conf.set(MODIFY_ACLS, Seq("user4"))
 
     val securityManager = new SecurityManager(conf)
-    assert(securityManager.aclsEnabled() === true)
+    assert(securityManager.aclsEnabled())
 
     // check for viewAcls with *
-    assert(securityManager.checkUIViewPermissions("user1") === true)
-    assert(securityManager.checkUIViewPermissions("user5") === true)
-    assert(securityManager.checkUIViewPermissions("user6") === true)
-    assert(securityManager.checkModifyPermissions("user4") === true)
+    assert(securityManager.checkUIViewPermissions("user1"))
+    assert(securityManager.checkUIViewPermissions("user5"))
+    assert(securityManager.checkUIViewPermissions("user6"))
+    assert(securityManager.checkModifyPermissions("user4"))
     assert(securityManager.checkModifyPermissions("user7") === false)
     assert(securityManager.checkModifyPermissions("user8") === false)
 
     // check for modifyAcls with *
     securityManager.setModifyAcls(Set("user4"), Seq("*"))
-    assert(securityManager.checkModifyPermissions("user7") === true)
-    assert(securityManager.checkModifyPermissions("user8") === true)
+    assert(securityManager.checkModifyPermissions("user7"))
+    assert(securityManager.checkModifyPermissions("user8"))
 
     securityManager.setAdminAcls(Seq("user1", "user2"))
     securityManager.setModifyAcls(Set("user1"), Seq("user2"))
@@ -317,10 +317,10 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     securityManager.setAdminAcls(Seq("user1", "*"))
     securityManager.setModifyAcls(Set("user1"), Seq("user2"))
     securityManager.setViewAcls(Set("user1"), Seq("user2"))
-    assert(securityManager.checkUIViewPermissions("user5") === true)
-    assert(securityManager.checkUIViewPermissions("user6") === true)
-    assert(securityManager.checkModifyPermissions("user7") === true)
-    assert(securityManager.checkModifyPermissions("user8") === true)
+    assert(securityManager.checkUIViewPermissions("user5"))
+    assert(securityManager.checkUIViewPermissions("user6"))
+    assert(securityManager.checkModifyPermissions("user7"))
+    assert(securityManager.checkModifyPermissions("user8"))
   }
 
   test("set security with * in acls for groups") {
@@ -331,11 +331,11 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     conf.set(MODIFY_ACLS_GROUPS, Seq("group6"))
 
     val securityManager = new SecurityManager(conf)
-    assert(securityManager.aclsEnabled() === true)
+    assert(securityManager.aclsEnabled())
 
     // check for viewAclsGroups with *
-    assert(securityManager.checkUIViewPermissions("user1") === true)
-    assert(securityManager.checkUIViewPermissions("user2") === true)
+    assert(securityManager.checkUIViewPermissions("user1"))
+    assert(securityManager.checkUIViewPermissions("user2"))
     assert(securityManager.checkModifyPermissions("user1") === false)
     assert(securityManager.checkModifyPermissions("user2") === false)
 
@@ -344,17 +344,17 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     securityManager.setViewAclsGroups(Seq("group6"))
     assert(securityManager.checkUIViewPermissions("user1") === false)
     assert(securityManager.checkUIViewPermissions("user2") === false)
-    assert(securityManager.checkModifyPermissions("user1") === true)
-    assert(securityManager.checkModifyPermissions("user2") === true)
+    assert(securityManager.checkModifyPermissions("user1"))
+    assert(securityManager.checkModifyPermissions("user2"))
 
     // check for adminAcls with *
     securityManager.setAdminAclsGroups(Seq("group9", "*"))
     securityManager.setModifyAclsGroups(Seq("group4", "group5"))
     securityManager.setViewAclsGroups(Seq("group6", "group7"))
-    assert(securityManager.checkUIViewPermissions("user5") === true)
-    assert(securityManager.checkUIViewPermissions("user6") === true)
-    assert(securityManager.checkModifyPermissions("user7") === true)
-    assert(securityManager.checkModifyPermissions("user8") === true)
+    assert(securityManager.checkUIViewPermissions("user5"))
+    assert(securityManager.checkUIViewPermissions("user6"))
+    assert(securityManager.checkModifyPermissions("user7"))
+    assert(securityManager.checkModifyPermissions("user8"))
   }
 
   test("security for groups default behavior") {
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index ea31a7585c3b..4ba8a3ab1c85 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -173,7 +173,7 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
       val t0 = System.nanoTime()
       while ((System.nanoTime() - t0) < TimeUnit.SECONDS.toNanos(1)) {
         val conf = Try(new SparkConf(loadDefaults = true))
-        assert(conf.isSuccess === true)
+        assert(conf.isSuccess)
       }
     } finally {
       executor.shutdownNow()
diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
index 051a13c9413e..a57afdfeadf0 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
@@ -24,10 +24,10 @@ import org.apache.spark.storage.StorageLevel
 class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
   test("getPersistentRDDs only returns RDDs that are marked as cached") {
     sc = new SparkContext("local", "test")
-    assert(sc.getPersistentRDDs.isEmpty === true)
+    assert(sc.getPersistentRDDs.isEmpty)
 
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
-    assert(sc.getPersistentRDDs.isEmpty === true)
+    assert(sc.getPersistentRDDs.isEmpty)
 
     rdd.cache()
     assert(sc.getPersistentRDDs.size === 1)
diff --git a/core/src/test/scala/org/apache/spark/UnpersistSuite.scala b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
index 2432d6d36da3..ff6ee7977066 100644
--- a/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
@@ -29,9 +29,9 @@ class UnpersistSuite extends SparkFunSuite with LocalSparkContext with TimeLimit
     sc = new SparkContext("local", "test")
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
     rdd.count
-    assert(sc.persistentRdds.isEmpty === false)
+    assert(sc.persistentRdds.nonEmpty)
     rdd.unpersist(blocking = true)
-    assert(sc.persistentRdds.isEmpty === true)
+    assert(sc.persistentRdds.isEmpty)
 
     failAfter(Span(3000, Millis)) {
       try {
@@ -44,6 +44,6 @@ class UnpersistSuite extends SparkFunSuite with LocalSparkContext with TimeLimit
           // is racing this thread to remove entries from the driver.
       }
     }
-    assert(sc.getRDDStorageInfo.isEmpty === true)
+    assert(sc.getRDDStorageInfo.isEmpty)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDBarrierSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDBarrierSuite.scala
index d57ea4d5501e..2f6c4d6a42ea 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDBarrierSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDBarrierSuite.scala
@@ -26,13 +26,13 @@ class RDDBarrierSuite extends SparkFunSuite with SharedSparkContext {
     assert(rdd.isBarrier() === false)
 
     val rdd2 = rdd.barrier().mapPartitions(iter => iter)
-    assert(rdd2.isBarrier() === true)
+    assert(rdd2.isBarrier())
   }
 
   test("create an RDDBarrier in the middle of a chain of RDDs") {
     val rdd = sc.parallelize(1 to 10, 4).map(x => x * 2)
     val rdd2 = rdd.barrier().mapPartitions(iter => iter).map(x => (x, x + 1))
-    assert(rdd2.isBarrier() === true)
+    assert(rdd2.isBarrier())
   }
 
   test("RDDBarrier with shuffle") {
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index e957340826e8..557cb11c8042 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -142,7 +142,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext {
     val actual = parallelUnion.collect().toList
     sc.conf.remove(RDD_PARALLEL_LISTING_THRESHOLD.key)
 
-    assert(parallelUnion.asInstanceOf[UnionRDD[Int]].isPartitionListingParallel === true)
+    assert(parallelUnion.asInstanceOf[UnionRDD[Int]].isPartitionListingParallel)
     assert(expected === actual)
   }
 
@@ -445,7 +445,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext {
     val splits = coalesced1.glom().collect().map(_.toList).toList
     assert(splits.length === 3, "Supposed to coalesce to 3 but got " + splits.length)
 
-    assert(splits.forall(_.length >= 1) === true, "Some partitions were empty")
+    assert(splits.forall(_.length >= 1), "Some partitions were empty")
 
     // If we try to coalesce into more partitions than the original RDD, it should just
     // keep the original number of partitions.
@@ -472,7 +472,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext {
     val splits = coalesced1.glom().collect().map(_.toList).toList
     assert(splits.length === 3, "Supposed to coalesce to 3 but got " + splits.length)
 
-    assert(splits.forall(_.length >= 1) === true, "Some partitions were empty")
+    assert(splits.forall(_.length >= 1), "Some partitions were empty")
 
     // If we try to coalesce into more partitions than the original RDD, it should just
     // keep the original number of partitions.
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 1b7f166fbffc..178d42096c20 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -305,7 +305,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     eventually(timeout(5 seconds), interval(10 millis)) {
       // Calling `self` in `onStart` is fine
-      assert(callSelfSuccessfully === true)
+      assert(callSelfSuccessfully)
     }
   }
 
@@ -326,7 +326,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     eventually(timeout(5 seconds), interval(10 millis)) {
       // Calling `self` in `receive` is fine
-      assert(callSelfSuccessfully === true)
+      assert(callSelfSuccessfully)
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index b31b8cf95c35..480e861d33f8 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -110,7 +110,7 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
 
       eventually(timeout(10.seconds)) {
         // Ensure some tasks have started and no task finished, so some executors must be busy.
-        assert(taskStarted.get() == true)
+        assert(taskStarted.get())
         assert(taskEnded.get() == false)
         // Assert we count in slots on both busy and free executors.
         assert(sc.maxNumConcurrentTasks() == 4)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index ed6a3d93b312..e17d264cced9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -890,7 +890,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // Confirm job finished successfully
     sc.listenerBus.waitUntilEmpty(1000)
-    assert(ended === true)
+    assert(ended)
     assert(results === (0 until parts).map { idx => idx -> 42 }.toMap)
     assertDataStructuresEmpty()
   }
@@ -1049,7 +1049,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     assertDataStructuresEmpty()
     sc.listenerBus.waitUntilEmpty(1000)
-    assert(ended === true)
+    assert(ended)
     assert(results === Map(0 -> 42))
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 27369759fad5..2f677776fe82 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -72,7 +72,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     intercept[RuntimeException] {
       task.run(0, 0, null)
     }
-    assert(TaskContextSuite.completed === true)
+    assert(TaskContextSuite.completed)
   }
 
   test("calls TaskFailureListeners after failure") {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 60acd3ed4cd4..32a2bdbae7ee 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -736,7 +736,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     // Complete one copy of the task, which should result in the task set manager
     // being marked as a zombie, because at least one copy of its only task has completed.
     manager.handleSuccessfulTask(task1.taskId, directTaskResult)
-    assert(manager.isZombie === true)
+    assert(manager.isZombie)
     assert(resubmittedTasks === 0)
     assert(manager.runningTasks === 1)
 
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 9469a46b43fa..9f51bd73db15 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -294,7 +294,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     assert(executorStageSummaryWrappersForNode.nonEmpty)
     executorStageSummaryWrappersForNode.foreach { exec =>
       // both executor is expected to be blacklisted
-      assert(exec.info.isBlacklistedForStage === true)
+      assert(exec.info.isBlacklistedForStage)
     }
 
     // Fail one of the tasks, re-start it.
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index ac35ac3fe0fb..5dec4f536b8d 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -411,7 +411,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
 
     val reregister = !master.driverEndpoint.askSync[Boolean](
       BlockManagerHeartbeat(store.blockManagerId))
-    assert(reregister == true)
+    assert(reregister)
   }
 
   test("reregistration on block update") {
diff --git a/core/src/test/scala/org/apache/spark/util/BoundedPriorityQueueSuite.scala b/core/src/test/scala/org/apache/spark/util/BoundedPriorityQueueSuite.scala
index 9465ca70e94f..341760138d32 100644
--- a/core/src/test/scala/org/apache/spark/util/BoundedPriorityQueueSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/BoundedPriorityQueueSuite.scala
@@ -29,12 +29,12 @@ class BoundedPriorityQueueSuite extends SparkFunSuite {
     pq += 0.3
     pq += 0.01
 
-    assert(pq.isEmpty == false)
+    assert(pq.nonEmpty)
     assert(pq.poll() == 0.1)
     assert(pq.poll() == 0.3)
     assert(pq.poll() == 1.0)
     assert(pq.poll() == 1.5)
-    assert(pq.isEmpty == true)
+    assert(pq.isEmpty)
 
     val pq2 = new BoundedPriorityQueue[(Int, Double)](4)(Ordering.by(_._2))
     pq2 += 1 -> 0.5
diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
index 604f1e1ca310..c181553f3996 100644
--- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
@@ -116,7 +116,7 @@ class ThreadUtilsSuite extends SparkFunSuite {
   test("runInNewThread") {
     import ThreadUtils._
     assert(runInNewThread("thread-name") { Thread.currentThread().getName } === "thread-name")
-    assert(runInNewThread("thread-name") { Thread.currentThread().isDaemon } === true)
+    assert(runInNewThread("thread-name") { Thread.currentThread().isDaemon })
     assert(
       runInNewThread("thread-name", isDaemon = false) { Thread.currentThread().isDaemon } === false
     )
@@ -126,7 +126,7 @@ class ThreadUtilsSuite extends SparkFunSuite {
     }
     assert(exception.asInstanceOf[IllegalArgumentException].getMessage === uniqueExceptionMessage)
     assert(exception.getStackTrace.mkString("\n").contains(
-      "... run in separate thread using org.apache.spark.util.ThreadUtils ...") === true,
+      "... run in separate thread using org.apache.spark.util.ThreadUtils ..."),
       "stack trace does not contain expected place holder"
     )
     assert(exception.getStackTrace.mkString("\n").contains("ThreadUtils.scala") === false,
diff --git a/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala b/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala
index 6a190f63ac9d..9c0ee1e1303e 100644
--- a/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala
@@ -56,7 +56,7 @@ class UninterruptibleThreadSuite extends SparkFunSuite {
     t.interrupt()
     t.join()
     assert(hasInterruptedException === false)
-    assert(interruptStatusBeforeExit === true)
+    assert(interruptStatusBeforeExit)
   }
 
   test("interrupt before runUninterruptibly runs") {
@@ -80,7 +80,7 @@ class UninterruptibleThreadSuite extends SparkFunSuite {
     interruptLatch.countDown()
     t.join()
     assert(hasInterruptedException === false)
-    assert(interruptStatusBeforeExit === true)
+    assert(interruptStatusBeforeExit)
   }
 
   test("nested runUninterruptibly") {
@@ -112,7 +112,7 @@ class UninterruptibleThreadSuite extends SparkFunSuite {
     interruptLatch.countDown()
     t.join()
     assert(hasInterruptedException === false)
-    assert(interruptStatusBeforeExit === true)
+    assert(interruptStatusBeforeExit)
   }
 
   test("stress test") {
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index fdd9771b6e7d..188e3f6907da 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -619,7 +619,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
         .filter { case (k, v) => k.startsWith("spark.")}
         .foreach { case (k, v) => sys.props.getOrElseUpdate(k, v)}
       val sparkConf = new SparkConf
-      assert(sparkConf.getBoolean("spark.test.fileNameLoadA", false) === true)
+      assert(sparkConf.getBoolean("spark.test.fileNameLoadA", false))
       assert(sparkConf.getInt("spark.test.fileNameLoadB", 1) === 2)
     }
   }
@@ -835,11 +835,11 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(Utils.isDynamicAllocationEnabled(
       conf.set(DYN_ALLOCATION_ENABLED, false)) === false)
     assert(Utils.isDynamicAllocationEnabled(
-      conf.set(DYN_ALLOCATION_ENABLED, true)) === true)
+      conf.set(DYN_ALLOCATION_ENABLED, true)))
     assert(Utils.isDynamicAllocationEnabled(
-      conf.set("spark.executor.instances", "1")) === true)
+      conf.set("spark.executor.instances", "1")))
     assert(Utils.isDynamicAllocationEnabled(
-      conf.set("spark.executor.instances", "0")) === true)
+      conf.set("spark.executor.instances", "0")))
     assert(Utils.isDynamicAllocationEnabled(conf.set("spark.master", "local")) === false)
     assert(Utils.isDynamicAllocationEnabled(conf.set(DYN_ALLOCATION_TESTING, true)))
   }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
index 6b4e92879d6c..b28489a6441b 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
@@ -113,7 +113,7 @@ class AppendOnlyMapSuite extends SparkFunSuite {
     assert(map.size === 100)
     for (i <- 1 to 100) {
       val res = map.changeValue("" + i, (hadValue, oldValue) => {
-        assert(hadValue === true)
+        assert(hadValue)
         assert(oldValue === "" + i)
         oldValue + "!"
       })
@@ -136,7 +136,7 @@ class AppendOnlyMapSuite extends SparkFunSuite {
     })
     assert(map.size === 401)
     map.changeValue(null, (hadValue, oldValue) => {
-      assert(hadValue === true)
+      assert(hadValue)
       assert(oldValue === "null!")
       "null!!"
     })
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
index 6d27445c5b60..5733721f3544 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
@@ -152,7 +152,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       _.asInstanceOf[KinesisBackedBlockRDDPartition] }.toSeq
     assert(partitions.map { _.seqNumberRanges } === Seq(seqNumRanges1, seqNumRanges2))
     assert(partitions.map { _.blockId } === Seq(blockId1, blockId2))
-    assert(partitions.forall { _.isBlockIdValid === true })
+    assert(partitions.forall { _.isBlockIdValid })
 
     // Verify that KinesisBackedBlockRDD is generated even when there are no blocks
     val emptyRDD = kinesisStream.createBlockRDD(time, Seq.empty)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
index d92313f4ce03..70f8c029a257 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
@@ -52,7 +52,7 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
     val encoder = new OneHotEncoder()
       .setInputCols(Array("input"))
       .setOutputCols(Array("output"))
-    assert(encoder.getDropLast === true)
+    assert(encoder.getDropLast)
     encoder.setDropLast(false)
     assert(encoder.getDropLast === false)
     val model = encoder.fit(df)
@@ -193,7 +193,7 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
     val encoder = new OneHotEncoder()
       .setInputCols(Array("input1", "input2"))
       .setOutputCols(Array("output1", "output2"))
-    assert(encoder.getDropLast === true)
+    assert(encoder.getDropLast)
     encoder.setDropLast(false)
     assert(encoder.getDropLast === false)
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
index c0790898e097..16fba354c2af 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
@@ -28,7 +28,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
-    assert(volumeSpec.mountReadOnly === true)
+    assert(volumeSpec.mountReadOnly)
     assert(volumeSpec.volumeConf.asInstanceOf[KubernetesHostPathVolumeConf] ===
       KubernetesHostPathVolumeConf("/hostPath"))
   }
@@ -54,7 +54,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
-    assert(volumeSpec.mountReadOnly === true)
+    assert(volumeSpec.mountReadOnly)
     assert(volumeSpec.volumeConf.asInstanceOf[KubernetesPVCVolumeConf] ===
       KubernetesPVCVolumeConf("claimeName"))
   }
@@ -69,7 +69,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
-    assert(volumeSpec.mountReadOnly === true)
+    assert(volumeSpec.mountReadOnly)
     assert(volumeSpec.volumeConf.asInstanceOf[KubernetesEmptyDirVolumeConf] ===
       KubernetesEmptyDirVolumeConf(Some("medium"), Some("5G")))
   }
@@ -82,7 +82,7 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
-    assert(volumeSpec.mountReadOnly === true)
+    assert(volumeSpec.mountReadOnly)
     assert(volumeSpec.volumeConf.asInstanceOf[KubernetesEmptyDirVolumeConf] ===
       KubernetesEmptyDirVolumeConf(None, None))
   }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index cb4cccc06303..93268c6c3131 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -197,7 +197,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
     assert(executor.getMetadata.getOwnerReferences.size() === 1)
     assert(executor.getMetadata.getOwnerReferences.get(0).getUid === driverPodUid)
-    assert(executor.getMetadata.getOwnerReferences.get(0).getController === true)
+    assert(executor.getMetadata.getOwnerReferences.get(0).getController)
   }
 
   // Check that the expected environment variables are present.
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index e6f1dd640e3e..8c430eeb3fa7 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -60,7 +60,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(configuredPod.container.getVolumeMounts.size() === 1)
     assert(configuredPod.container.getVolumeMounts.get(0).getMountPath === "/tmp")
     assert(configuredPod.container.getVolumeMounts.get(0).getName === "testVolume")
-    assert(configuredPod.container.getVolumeMounts.get(0).getReadOnly === true)
+    assert(configuredPod.container.getVolumeMounts.get(0).getReadOnly)
 
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
index 201910731e93..259d7580df8d 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
@@ -135,6 +135,6 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
     yarnBlacklistTracker.handleResourceAllocationFailure(Some("host4"))
 
     verify(amClientMock).updateBlacklist(Arrays.asList("host4"), Collections.emptyList())
-    assert(yarnBlacklistTracker.isAllNodeBlacklisted === true)
+    assert(yarnBlacklistTracker.isAllNodeBlacklisted)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 51e0f4b4c84d..db272b3d415e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -34,7 +34,7 @@ abstract class LogicalPlan
   with Logging {
 
   /** Returns true if this subtree has data from a streaming data source. */
-  def isStreaming: Boolean = children.exists(_.isStreaming == true)
+  def isStreaming: Boolean = children.exists(_.isStreaming)
 
   override def verboseStringWithSuffix(maxFields: Int): String = {
     super.verboseString(maxFields) + statsCache.map(", " + _.toString).getOrElse("")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index bd87ca6017e9..79fc38c4d30e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -174,18 +174,18 @@ class DecimalPrecisionSuite extends AnalysisTest with BeforeAndAfter {
     assert(d0.isWiderThan(d1) === false)
     assert(d1.isWiderThan(d0) === false)
     assert(d1.isWiderThan(d2) === false)
-    assert(d2.isWiderThan(d1) === true)
+    assert(d2.isWiderThan(d1))
     assert(d2.isWiderThan(d3) === false)
-    assert(d3.isWiderThan(d2) === true)
-    assert(d4.isWiderThan(d3) === true)
+    assert(d3.isWiderThan(d2))
+    assert(d4.isWiderThan(d3))
 
     assert(d1.isWiderThan(ByteType) === false)
-    assert(d2.isWiderThan(ByteType) === true)
+    assert(d2.isWiderThan(ByteType))
     assert(d2.isWiderThan(ShortType) === false)
-    assert(d3.isWiderThan(ShortType) === true)
-    assert(d3.isWiderThan(IntegerType) === true)
+    assert(d3.isWiderThan(ShortType))
+    assert(d3.isWiderThan(IntegerType))
     assert(d3.isWiderThan(LongType) === false)
-    assert(d4.isWiderThan(LongType) === true)
+    assert(d4.isWiderThan(LongType))
     assert(d4.isWiderThan(FloatType) === false)
     assert(d4.isWiderThan(DoubleType) === false)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index be8fd90c4c52..1b00506c55dd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -329,8 +329,8 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
     {
       val schema = ExpressionEncoder[(Int, (String, Int))].schema
       assert(schema(0).nullable === false)
-      assert(schema(1).nullable === true)
-      assert(schema(1).dataType.asInstanceOf[StructType](0).nullable === true)
+      assert(schema(1).nullable)
+      assert(schema(1).dataType.asInstanceOf[StructType](0).nullable)
       assert(schema(1).dataType.asInstanceOf[StructType](1).nullable === false)
     }
 
@@ -340,8 +340,8 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
         ExpressionEncoder[Int],
         ExpressionEncoder[(String, Int)]).schema
       assert(schema(0).nullable === false)
-      assert(schema(1).nullable === true)
-      assert(schema(1).dataType.asInstanceOf[StructType](0).nullable === true)
+      assert(schema(1).nullable)
+      assert(schema(1).dataType.asInstanceOf[StructType](0).nullable)
       assert(schema(1).dataType.asInstanceOf[StructType](1).nullable === false)
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeSetSuite.scala
index b6e8b667a240..ca855afd53e1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeSetSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeSetSuite.scala
@@ -39,8 +39,8 @@ class AttributeSetSuite extends SparkFunSuite {
   }
 
   test("checks by id not name") {
-    assert(aSet.contains(aUpper) === true)
-    assert(aSet.contains(aLower) === true)
+    assert(aSet.contains(aUpper))
+    assert(aSet.contains(aLower))
     assert(aSet.contains(fakeA) === false)
 
     assert(aSet.contains(bUpper) === false)
@@ -48,8 +48,8 @@ class AttributeSetSuite extends SparkFunSuite {
   }
 
   test("++ preserves AttributeSet")  {
-    assert((aSet ++ bSet).contains(aUpper) === true)
-    assert((aSet ++ bSet).contains(aLower) === true)
+    assert((aSet ++ bSet).contains(aUpper))
+    assert((aSet ++ bSet).contains(aLower))
   }
 
   test("extracts all references ") {
@@ -65,7 +65,7 @@ class AttributeSetSuite extends SparkFunSuite {
   }
 
   test("subset") {
-    assert(aSet.subsetOf(aAndBSet) === true)
+    assert(aSet.subsetOf(aAndBSet))
     assert(aAndBSet.subsetOf(aSet) === false)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index d81250470cb2..1b7f2581f895 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -293,16 +293,16 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("cast from string") {
     assert(cast("abcdef", StringType).nullable === false)
     assert(cast("abcdef", BinaryType).nullable === false)
-    assert(cast("abcdef", BooleanType).nullable === true)
-    assert(cast("abcdef", TimestampType).nullable === true)
-    assert(cast("abcdef", LongType).nullable === true)
-    assert(cast("abcdef", IntegerType).nullable === true)
-    assert(cast("abcdef", ShortType).nullable === true)
-    assert(cast("abcdef", ByteType).nullable === true)
-    assert(cast("abcdef", DecimalType.USER_DEFAULT).nullable === true)
-    assert(cast("abcdef", DecimalType(4, 2)).nullable === true)
-    assert(cast("abcdef", DoubleType).nullable === true)
-    assert(cast("abcdef", FloatType).nullable === true)
+    assert(cast("abcdef", BooleanType).nullable)
+    assert(cast("abcdef", TimestampType).nullable)
+    assert(cast("abcdef", LongType).nullable)
+    assert(cast("abcdef", IntegerType).nullable)
+    assert(cast("abcdef", ShortType).nullable)
+    assert(cast("abcdef", ByteType).nullable)
+    assert(cast("abcdef", DecimalType.USER_DEFAULT).nullable)
+    assert(cast("abcdef", DecimalType(4, 2)).nullable)
+    assert(cast("abcdef", DoubleType).nullable)
+    assert(cast("abcdef", FloatType).nullable)
   }
 
   test("data type casting") {
@@ -402,33 +402,33 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("casting to fixed-precision decimals") {
     assert(cast(123, DecimalType.USER_DEFAULT).nullable === false)
-    assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable === true)
-    assert(cast(10.03, DecimalType.SYSTEM_DEFAULT).nullable === true)
+    assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable)
+    assert(cast(10.03, DecimalType.SYSTEM_DEFAULT).nullable)
     assert(cast(Decimal(10.03), DecimalType.SYSTEM_DEFAULT).nullable === false)
 
-    assert(cast(123, DecimalType(2, 1)).nullable === true)
-    assert(cast(10.03f, DecimalType(2, 1)).nullable === true)
-    assert(cast(10.03, DecimalType(2, 1)).nullable === true)
-    assert(cast(Decimal(10.03), DecimalType(2, 1)).nullable === true)
+    assert(cast(123, DecimalType(2, 1)).nullable)
+    assert(cast(10.03f, DecimalType(2, 1)).nullable)
+    assert(cast(10.03, DecimalType(2, 1)).nullable)
+    assert(cast(Decimal(10.03), DecimalType(2, 1)).nullable)
 
     assert(cast(123, DecimalType.IntDecimal).nullable === false)
-    assert(cast(10.03f, DecimalType.FloatDecimal).nullable === true)
-    assert(cast(10.03, DecimalType.DoubleDecimal).nullable === true)
+    assert(cast(10.03f, DecimalType.FloatDecimal).nullable)
+    assert(cast(10.03, DecimalType.DoubleDecimal).nullable)
     assert(cast(Decimal(10.03), DecimalType(4, 2)).nullable === false)
     assert(cast(Decimal(10.03), DecimalType(5, 3)).nullable === false)
 
-    assert(cast(Decimal(10.03), DecimalType(3, 1)).nullable === true)
+    assert(cast(Decimal(10.03), DecimalType(3, 1)).nullable)
     assert(cast(Decimal(10.03), DecimalType(4, 1)).nullable === false)
-    assert(cast(Decimal(9.95), DecimalType(2, 1)).nullable === true)
+    assert(cast(Decimal(9.95), DecimalType(2, 1)).nullable)
     assert(cast(Decimal(9.95), DecimalType(3, 1)).nullable === false)
 
-    assert(cast(Decimal("1003"), DecimalType(3, -1)).nullable === true)
+    assert(cast(Decimal("1003"), DecimalType(3, -1)).nullable)
     assert(cast(Decimal("1003"), DecimalType(4, -1)).nullable === false)
-    assert(cast(Decimal("995"), DecimalType(2, -1)).nullable === true)
+    assert(cast(Decimal("995"), DecimalType(2, -1)).nullable)
     assert(cast(Decimal("995"), DecimalType(3, -1)).nullable === false)
 
     assert(cast(true, DecimalType.SYSTEM_DEFAULT).nullable === false)
-    assert(cast(true, DecimalType(1, 1)).nullable === true)
+    assert(cast(true, DecimalType(1, 1)).nullable)
 
 
     checkEvaluation(cast(10.03, DecimalType.SYSTEM_DEFAULT), Decimal(10.03))
@@ -555,7 +555,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     {
       val ret = cast(array, ArrayType(IntegerType, containsNull = true))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, Seq(123, null, null, null))
     }
     {
@@ -564,7 +564,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
     {
       val ret = cast(array, ArrayType(BooleanType, containsNull = true))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, Seq(null, true, false, null))
     }
     {
@@ -574,7 +574,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     {
       val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = true))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, Seq(123, null, null))
     }
     {
@@ -583,7 +583,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
     {
       val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = true))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, Seq(null, true, false))
     }
     {
@@ -609,7 +609,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     {
       val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = true))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null, "d" -> null))
     }
     {
@@ -618,7 +618,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
     {
       val ret = cast(map, MapType(StringType, BooleanType, valueContainsNull = true))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false, "d" -> null))
     }
     {
@@ -632,7 +632,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     {
       val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = true))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null))
     }
     {
@@ -641,7 +641,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
     {
       val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = true))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false))
     }
     {
@@ -695,7 +695,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
         StructField("b", IntegerType, nullable = true),
         StructField("c", IntegerType, nullable = true),
         StructField("d", IntegerType, nullable = true))))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, InternalRow(123, null, null, null))
     }
     {
@@ -712,7 +712,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
         StructField("b", BooleanType, nullable = true),
         StructField("c", BooleanType, nullable = true),
         StructField("d", BooleanType, nullable = true))))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, InternalRow(null, true, false, null))
     }
     {
@@ -729,7 +729,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
         StructField("a", IntegerType, nullable = true),
         StructField("b", IntegerType, nullable = true),
         StructField("c", IntegerType, nullable = true))))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, InternalRow(123, null, null))
     }
     {
@@ -744,7 +744,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
         StructField("a", BooleanType, nullable = true),
         StructField("b", BooleanType, nullable = true),
         StructField("c", BooleanType, nullable = true))))
-      assert(ret.resolved === true)
+      assert(ret.resolved)
       checkEvaluation(ret, InternalRow(null, true, false))
     }
     {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index bed8547dbc83..910e6c83ac97 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -1137,9 +1137,9 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Concat(Seq(aa0, aa1)), Seq(Seq("a", "b"), Seq("c"), Seq("d"), Seq("e", "f")))
 
     assert(Concat(Seq(ai0, ai1)).dataType.asInstanceOf[ArrayType].containsNull === false)
-    assert(Concat(Seq(ai0, ai2)).dataType.asInstanceOf[ArrayType].containsNull === true)
+    assert(Concat(Seq(ai0, ai2)).dataType.asInstanceOf[ArrayType].containsNull)
     assert(Concat(Seq(as0, as1)).dataType.asInstanceOf[ArrayType].containsNull === false)
-    assert(Concat(Seq(as0, as2)).dataType.asInstanceOf[ArrayType].containsNull === true)
+    assert(Concat(Seq(as0, as2)).dataType.asInstanceOf[ArrayType].containsNull)
     assert(Concat(Seq(aa0, aa1)).dataType ===
       ArrayType(ArrayType(StringType, containsNull = false), containsNull = false))
     assert(Concat(Seq(aa0, aa2)).dataType ===
@@ -1450,9 +1450,9 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       Seq[Seq[Int]](Seq[Int](1, 2), Seq[Int](3, 4), Seq[Int](5, 6), Seq[Int](2, 1)))
 
     assert(ArrayUnion(a00, a01).dataType.asInstanceOf[ArrayType].containsNull === false)
-    assert(ArrayUnion(a00, a02).dataType.asInstanceOf[ArrayType].containsNull === true)
+    assert(ArrayUnion(a00, a02).dataType.asInstanceOf[ArrayType].containsNull)
     assert(ArrayUnion(a20, a21).dataType.asInstanceOf[ArrayType].containsNull === false)
-    assert(ArrayUnion(a20, a22).dataType.asInstanceOf[ArrayType].containsNull === true)
+    assert(ArrayUnion(a20, a22).dataType.asInstanceOf[ArrayType].containsNull)
   }
 
   test("Shuffle") {
@@ -1631,10 +1631,10 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(ArrayExcept(aa1, aa0), Seq[Seq[Int]](Seq[Int](2, 1)))
 
     assert(ArrayExcept(a00, a01).dataType.asInstanceOf[ArrayType].containsNull === false)
-    assert(ArrayExcept(a04, a02).dataType.asInstanceOf[ArrayType].containsNull === true)
-    assert(ArrayExcept(a04, a05).dataType.asInstanceOf[ArrayType].containsNull === true)
+    assert(ArrayExcept(a04, a02).dataType.asInstanceOf[ArrayType].containsNull)
+    assert(ArrayExcept(a04, a05).dataType.asInstanceOf[ArrayType].containsNull)
     assert(ArrayExcept(a20, a21).dataType.asInstanceOf[ArrayType].containsNull === false)
-    assert(ArrayExcept(a24, a22).dataType.asInstanceOf[ArrayType].containsNull === true)
+    assert(ArrayExcept(a24, a22).dataType.asInstanceOf[ArrayType].containsNull)
   }
 
   test("Array Except - null handling") {
@@ -1757,9 +1757,9 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     assert(ArrayIntersect(a00, a01).dataType.asInstanceOf[ArrayType].containsNull === false)
     assert(ArrayIntersect(a00, a04).dataType.asInstanceOf[ArrayType].containsNull === false)
-    assert(ArrayIntersect(a04, a05).dataType.asInstanceOf[ArrayType].containsNull === true)
+    assert(ArrayIntersect(a04, a05).dataType.asInstanceOf[ArrayType].containsNull)
     assert(ArrayIntersect(a20, a21).dataType.asInstanceOf[ArrayType].containsNull === false)
-    assert(ArrayIntersect(a23, a24).dataType.asInstanceOf[ArrayType].containsNull === true)
+    assert(ArrayIntersect(a23, a24).dataType.asInstanceOf[ArrayType].containsNull)
   }
 
   test("Array Intersect - null handling") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index d65b49f11884..67f748cb792c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -154,9 +154,9 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     val nullStruct_fieldNotNullable = Literal.create(null, typeS_fieldNotNullable)
 
     assert(getStructField(struct_fieldNotNullable, "a").nullable === false)
-    assert(getStructField(struct, "a").nullable === true)
-    assert(getStructField(nullStruct_fieldNotNullable, "a").nullable === true)
-    assert(getStructField(nullStruct, "a").nullable === true)
+    assert(getStructField(struct, "a").nullable)
+    assert(getStructField(nullStruct_fieldNotNullable, "a").nullable)
+    assert(getStructField(nullStruct, "a").nullable)
   }
 
   test("GetArrayStructFields") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
index f489d330cf45..572116575776 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
@@ -90,27 +90,27 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(CaseWhen(Seq((c1, c4), (c2, c5)), c6), "c", row)
     checkEvaluation(CaseWhen(Seq((c1, c4), (c2, c5))), null, row)
 
-    assert(CaseWhen(Seq((c2, c4)), c6).nullable === true)
-    assert(CaseWhen(Seq((c2, c4), (c3, c5)), c6).nullable === true)
-    assert(CaseWhen(Seq((c2, c4), (c3, c5))).nullable === true)
+    assert(CaseWhen(Seq((c2, c4)), c6).nullable)
+    assert(CaseWhen(Seq((c2, c4), (c3, c5)), c6).nullable)
+    assert(CaseWhen(Seq((c2, c4), (c3, c5))).nullable)
 
     val c4_notNull = 'a.boolean.notNull.at(3)
     val c5_notNull = 'a.boolean.notNull.at(4)
     val c6_notNull = 'a.boolean.notNull.at(5)
 
     assert(CaseWhen(Seq((c2, c4_notNull)), c6_notNull).nullable === false)
-    assert(CaseWhen(Seq((c2, c4)), c6_notNull).nullable === true)
-    assert(CaseWhen(Seq((c2, c4_notNull))).nullable === true)
-    assert(CaseWhen(Seq((c2, c4_notNull)), c6).nullable === true)
+    assert(CaseWhen(Seq((c2, c4)), c6_notNull).nullable)
+    assert(CaseWhen(Seq((c2, c4_notNull))).nullable)
+    assert(CaseWhen(Seq((c2, c4_notNull)), c6).nullable)
 
     assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5_notNull)), c6_notNull).nullable === false)
-    assert(CaseWhen(Seq((c2, c4), (c3, c5_notNull)), c6_notNull).nullable === true)
-    assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5)), c6_notNull).nullable === true)
-    assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5_notNull)), c6).nullable === true)
+    assert(CaseWhen(Seq((c2, c4), (c3, c5_notNull)), c6_notNull).nullable)
+    assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5)), c6_notNull).nullable)
+    assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5_notNull)), c6).nullable)
 
-    assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5_notNull))).nullable === true)
-    assert(CaseWhen(Seq((c2, c4), (c3, c5_notNull))).nullable === true)
-    assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5))).nullable === true)
+    assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5_notNull))).nullable)
+    assert(CaseWhen(Seq((c2, c4), (c3, c5_notNull))).nullable)
+    assert(CaseWhen(Seq((c2, c4_notNull), (c3, c5))).nullable)
   }
 
   test("if/case when - null flags of non-primitive types") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 8bec32d97f2b..62d194f7f66a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -181,7 +181,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("Seconds") {
     assert(Second(Literal.create(null, DateType), gmtId).resolved === false)
-    assert(Second(Cast(Literal(d), TimestampType, gmtId), gmtId).resolved === true)
+    assert(Second(Cast(Literal(d), TimestampType, gmtId), gmtId).resolved )
     checkEvaluation(Second(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
     checkEvaluation(Second(Cast(Literal(date), TimestampType, gmtId), gmtId), 15)
     checkEvaluation(Second(Literal(ts), gmtId), 15)
@@ -268,7 +268,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("Hour") {
     assert(Hour(Literal.create(null, DateType), gmtId).resolved === false)
-    assert(Hour(Literal(ts), gmtId).resolved === true)
+    assert(Hour(Literal(ts), gmtId).resolved)
     checkEvaluation(Hour(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
     checkEvaluation(Hour(Cast(Literal(date), TimestampType, gmtId), gmtId), 13)
     checkEvaluation(Hour(Literal(ts), gmtId), 13)
@@ -294,7 +294,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("Minute") {
     assert(Minute(Literal.create(null, DateType), gmtId).resolved === false)
-    assert(Minute(Literal(ts), gmtId).resolved === true)
+    assert(Minute(Literal(ts), gmtId).resolved)
     checkEvaluation(Minute(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
     checkEvaluation(
       Minute(Cast(Literal(date), TimestampType, gmtId), gmtId), 10)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index e95f2dff231b..1e7737b6a88e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -231,15 +231,14 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     val s_notNull = 'a.string.notNull.at(0)
 
-    assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable
-      === true)
+    assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable)
     assert(
       Substring(s_notNull, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable
         === false)
     assert(Substring(s_notNull,
-      Literal.create(null, IntegerType), Literal.create(2, IntegerType)).nullable === true)
+      Literal.create(null, IntegerType), Literal.create(2, IntegerType)).nullable)
     assert(Substring(s_notNull,
-      Literal.create(0, IntegerType), Literal.create(null, IntegerType)).nullable === true)
+      Literal.create(0, IntegerType), Literal.create(null, IntegerType)).nullable)
 
     checkEvaluation(s.substr(0, 2), "ex", row)
     checkEvaluation(s.substr(0), "example", row)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenExpressionCachingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenExpressionCachingSuite.scala
index fe5cb8eda824..ee0167b4005a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenExpressionCachingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenExpressionCachingSuite.scala
@@ -58,7 +58,7 @@ class CodegenExpressionCachingSuite extends SparkFunSuite {
     expr2.mutableState = true
     val instance2 = UnsafeProjection.create(Seq(expr2))
     assert(instance1.apply(null).getBoolean(0) === false)
-    assert(instance2.apply(null).getBoolean(0) === true)
+    assert(instance2.apply(null).getBoolean(0))
   }
 
   test("GenerateMutableProjection should not share expression instances") {
@@ -70,7 +70,7 @@ class CodegenExpressionCachingSuite extends SparkFunSuite {
     expr2.mutableState = true
     val instance2 = GenerateMutableProjection.generate(Seq(expr2))
     assert(instance1.apply(null).getBoolean(0) === false)
-    assert(instance2.apply(null).getBoolean(0) === true)
+    assert(instance2.apply(null).getBoolean(0))
   }
 
   test("GeneratePredicate should not share expression instances") {
@@ -82,7 +82,7 @@ class CodegenExpressionCachingSuite extends SparkFunSuite {
     expr2.mutableState = true
     val instance2 = GeneratePredicate.generate(expr2)
     assert(instance1.eval(null) === false)
-    assert(instance2.eval(null) === true)
+    assert(instance2.eval(null))
   }
 
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtilSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtilSuite.scala
index 0fec15bc42c1..8de972f25f6a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtilSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtilSuite.scala
@@ -57,7 +57,7 @@ class UDFXPathUtilSuite extends SparkFunSuite {
   test("boolean eval") {
     var ret =
       util.evalBoolean("<a><b>true</b><b>false</b><b>b3</b><c>c1</c><c>c2</c></a>", "a/b[1]/text()")
-    assert(ret == true)
+    assert(ret)
 
     ret = util.evalBoolean("<a><b>true</b><b>false</b><b>b3</b><c>c1</c><c>c2</c></a>", "a/b[4]")
     assert(ret == false)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
index aaab3ff1bf12..84452399de82 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
@@ -69,10 +69,10 @@ class LogicalPlanSuite extends SparkFunSuite {
     }
 
     require(relation.isStreaming === false)
-    require(incrementalRelation.isStreaming === true)
+    require(incrementalRelation.isStreaming)
     assert(TestBinaryRelation(relation, relation).isStreaming === false)
-    assert(TestBinaryRelation(incrementalRelation, relation).isStreaming === true)
-    assert(TestBinaryRelation(relation, incrementalRelation).isStreaming === true)
+    assert(TestBinaryRelation(incrementalRelation, relation).isStreaming)
+    assert(TestBinaryRelation(relation, incrementalRelation).isStreaming)
     assert(TestBinaryRelation(incrementalRelation, incrementalRelation).isStreaming)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala
index a0c1d97bfc3a..a16fcfc850ea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala
@@ -59,29 +59,29 @@ class MetadataSuite extends SparkFunSuite {
 
   test("metadata builder and getters") {
     assert(age.contains("summary") === false)
-    assert(age.contains("index") === true)
+    assert(age.contains("index"))
     assert(age.getLong("index") === 1L)
-    assert(age.contains("average") === true)
+    assert(age.contains("average"))
     assert(age.getDouble("average") === 45.0)
-    assert(age.contains("categorical") === true)
+    assert(age.contains("categorical"))
     assert(age.getBoolean("categorical") === false)
-    assert(age.contains("name") === true)
+    assert(age.contains("name"))
     assert(age.getString("name") === "age")
-    assert(metadata.contains("purpose") === true)
+    assert(metadata.contains("purpose"))
     assert(metadata.getString("purpose") === "ml")
-    assert(metadata.contains("isBase") === true)
+    assert(metadata.contains("isBase"))
     assert(metadata.getBoolean("isBase") === false)
-    assert(metadata.contains("summary") === true)
+    assert(metadata.contains("summary"))
     assert(metadata.getMetadata("summary") === summary)
-    assert(metadata.contains("long[]") === true)
+    assert(metadata.contains("long[]"))
     assert(metadata.getLongArray("long[]").toSeq === Seq(0L, 1L))
-    assert(metadata.contains("double[]") === true)
+    assert(metadata.contains("double[]"))
     assert(metadata.getDoubleArray("double[]").toSeq === Seq(3.0, 4.0))
-    assert(metadata.contains("boolean[]") === true)
+    assert(metadata.contains("boolean[]"))
     assert(metadata.getBooleanArray("boolean[]").toSeq === Seq(true, false))
-    assert(gender.contains("categories") === true)
+    assert(gender.contains("categories"))
     assert(gender.getStringArray("categories").toSeq === Seq("male", "female"))
-    assert(metadata.contains("features") === true)
+    assert(metadata.contains("features"))
     assert(metadata.getMetadataArray("features").toSeq === Seq(age, gender))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
index d92f52f3248a..87d1cd4d60be 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
@@ -373,7 +373,7 @@ class DataTypeWriteCompatibilitySuite extends SparkFunSuite {
   def assertAllowed(writeType: DataType, readType: DataType, name: String, desc: String): Unit = {
     assert(
       DataType.canWrite(writeType, readType, analysis.caseSensitiveResolution, name,
-        errMsg => fail(s"Should not produce errors but was called with: $errMsg")) === true, desc)
+        errMsg => fail(s"Should not produce errors but was called with: $errMsg")), desc)
   }
 
   def assertSingleError(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/MetadataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/MetadataSuite.scala
index b4aeac562d2b..c3ae798e85fa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/MetadataSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/MetadataSuite.scala
@@ -57,7 +57,7 @@ class MetadataSuite extends SparkFunSuite {
     val meta = new MetadataBuilder().putBoolean("key", true).build()
     assert(meta === meta)
     assert(meta.## !== 0)
-    assert(meta.getBoolean("key") === true)
+    assert(meta.getBoolean("key"))
     assert(meta.contains("key"))
     assert(meta === Metadata.fromJson(meta.json))
     intercept[NoSuchElementException](meta.getBoolean("no_such_key"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 92a2480e8b01..6ee54b948a7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -629,7 +629,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
     require(allFiles.exists(_.version == version))
 
     val latestSnapshotFileBeforeVersion = allFiles
-      .filter(_.isSnapshot == true)
+      .filter(_.isSnapshot)
       .takeWhile(_.version <= version)
       .lastOption
     val deltaBatchFiles = latestSnapshotFileBeforeVersion match {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
index 97c3f358c0e7..e581211e4e76 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
@@ -394,9 +394,9 @@ class DatasetAggregatorSuite extends QueryTest with SharedSQLContext {
     val ds1 = Seq(1, 3, 2, 5).toDS()
     assert(ds1.select(typed.sum((i: Int) => i)).schema.head.nullable === false)
     val ds2 = Seq(AggData(1, "a"), AggData(2, "a")).toDS()
-    assert(ds2.select(SeqAgg.toColumn).schema.head.nullable === true)
+    assert(ds2.select(SeqAgg.toColumn).schema.head.nullable)
     val ds3 = sql("SELECT 'Some String' AS b, 1279869254 AS a").as[AggData]
-    assert(ds3.select(NameAgg.toColumn).schema.head.nullable === true)
+    assert(ds3.select(NameAgg.toColumn).schema.head.nullable)
   }
 
   test("SPARK-18147: very complex aggregator result type") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index a4ca9e6be6ab..4eef11567af8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1209,14 +1209,14 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val df1 = Seq(1, 2, 3, 4).toDF
     assert(df1.schema(0).nullable == false)
     val df2 = Seq(Integer.valueOf(1), Integer.valueOf(2)).toDF
-    assert(df2.schema(0).nullable == true)
+    assert(df2.schema(0).nullable)
 
     val df3 = Seq(Seq(1, 2), Seq(3, 4)).toDF
-    assert(df3.schema(0).nullable == true)
+    assert(df3.schema(0).nullable)
     assert(df3.schema(0).dataType.asInstanceOf[ArrayType].containsNull == false)
     val df4 = Seq(Seq("a", "b"), Seq("c", "d")).toDF
-    assert(df4.schema(0).nullable == true)
-    assert(df4.schema(0).dataType.asInstanceOf[ArrayType].containsNull == true)
+    assert(df4.schema(0).nullable)
+    assert(df4.schema(0).dataType.asInstanceOf[ArrayType].containsNull)
 
     val df5 = Seq((0, 1.0), (2, 2.0)).toDF("id", "v")
     assert(df5.schema(0).nullable == false)
@@ -1224,32 +1224,32 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val df6 = Seq((0, 1.0, "a"), (2, 2.0, "b")).toDF("id", "v1", "v2")
     assert(df6.schema(0).nullable == false)
     assert(df6.schema(1).nullable == false)
-    assert(df6.schema(2).nullable == true)
+    assert(df6.schema(2).nullable)
 
     val df7 = (Tuple1(Array(1, 2, 3)) :: Nil).toDF("a")
-    assert(df7.schema(0).nullable == true)
+    assert(df7.schema(0).nullable)
     assert(df7.schema(0).dataType.asInstanceOf[ArrayType].containsNull == false)
 
     val df8 = (Tuple1(Array((null: Integer), (null: Integer))) :: Nil).toDF("a")
-    assert(df8.schema(0).nullable == true)
-    assert(df8.schema(0).dataType.asInstanceOf[ArrayType].containsNull == true)
+    assert(df8.schema(0).nullable)
+    assert(df8.schema(0).dataType.asInstanceOf[ArrayType].containsNull)
 
     val df9 = (Tuple1(Map(2 -> 3)) :: Nil).toDF("m")
-    assert(df9.schema(0).nullable == true)
+    assert(df9.schema(0).nullable)
     assert(df9.schema(0).dataType.asInstanceOf[MapType].valueContainsNull == false)
 
     val df10 = (Tuple1(Map(1 -> (null: Integer))) :: Nil).toDF("m")
-    assert(df10.schema(0).nullable == true)
-    assert(df10.schema(0).dataType.asInstanceOf[MapType].valueContainsNull == true)
+    assert(df10.schema(0).nullable)
+    assert(df10.schema(0).dataType.asInstanceOf[MapType].valueContainsNull)
 
     val df11 = Seq(TestDataPoint(1, 2.2, "a", null),
                    TestDataPoint(3, 4.4, "null", (TestDataPoint2(33, "b")))).toDF
     assert(df11.schema(0).nullable == false)
     assert(df11.schema(1).nullable == false)
-    assert(df11.schema(2).nullable == true)
-    assert(df11.schema(3).nullable == true)
+    assert(df11.schema(2).nullable)
+    assert(df11.schema(3).nullable)
     assert(df11.schema(3).dataType.asInstanceOf[StructType].fields(0).nullable == false)
-    assert(df11.schema(3).dataType.asInstanceOf[StructType].fields(1).nullable == true)
+    assert(df11.schema(3).dataType.asInstanceOf[StructType].fields(1).nullable)
   }
 
   Seq(true, false).foreach { eager =>
@@ -1517,7 +1517,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val ds1 = spark.emptyDataset[Int]
     val ds2 = Seq(1, 2, 3).toDS()
 
-    assert(ds1.isEmpty == true)
+    assert(ds1.isEmpty)
     assert(ds2.isEmpty == false)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 142ab6170a73..71bd09609821 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -293,7 +293,7 @@ class PlannerSuite extends SharedSQLContext {
       case Repartition (numPartitions, shuffle, Repartition(_, shuffleChild, _)) =>
         assert(numPartitions === 5)
         assert(shuffle === false)
-        assert(shuffleChild === true)
+        assert(shuffleChild)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ProcessingTimeExecutorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ProcessingTimeExecutorSuite.scala
index 2d338ab92211..723764c77727 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ProcessingTimeExecutorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ProcessingTimeExecutorSuite.scala
@@ -146,7 +146,7 @@ class ProcessingTimeExecutorSuite extends SparkFunSuite with TimeLimits {
     eventually { assert(clock.isStreamWaitingFor(200)) }
     clock.advance(200)
     waitForThreadJoin(t)
-    assert(batchFallingBehindCalled === true)
+    assert(batchFallingBehindCalled)
   }
 
   private def eventually(body: => Unit): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
index 9a7595eee7bd..3dd3210cf520 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
@@ -42,7 +42,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
 
       coordinatorRef.reportActiveInstance(id, "hostX", "exec1")
       eventually(timeout(5 seconds)) {
-        assert(coordinatorRef.verifyIfInstanceActive(id, "exec1") === true)
+        assert(coordinatorRef.verifyIfInstanceActive(id, "exec1"))
         assert(
           coordinatorRef.getLocation(id) ===
             Some(ExecutorCacheTaskLocation("hostX", "exec1").toString))
@@ -52,7 +52,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
 
       eventually(timeout(5 seconds)) {
         assert(coordinatorRef.verifyIfInstanceActive(id, "exec1") === false)
-        assert(coordinatorRef.verifyIfInstanceActive(id, "exec2") === true)
+        assert(coordinatorRef.verifyIfInstanceActive(id, "exec2"))
 
         assert(
           coordinatorRef.getLocation(id) ===
@@ -76,15 +76,15 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
       coordinatorRef.reportActiveInstance(id3, host, exec)
 
       eventually(timeout(5 seconds)) {
-        assert(coordinatorRef.verifyIfInstanceActive(id1, exec) === true)
-        assert(coordinatorRef.verifyIfInstanceActive(id2, exec) === true)
-        assert(coordinatorRef.verifyIfInstanceActive(id3, exec) === true)
+        assert(coordinatorRef.verifyIfInstanceActive(id1, exec))
+        assert(coordinatorRef.verifyIfInstanceActive(id2, exec))
+        assert(coordinatorRef.verifyIfInstanceActive(id3, exec))
       }
 
       coordinatorRef.deactivateInstances(runId1)
 
       assert(coordinatorRef.verifyIfInstanceActive(id1, exec) === false)
-      assert(coordinatorRef.verifyIfInstanceActive(id2, exec) === true)
+      assert(coordinatorRef.verifyIfInstanceActive(id2, exec))
       assert(coordinatorRef.verifyIfInstanceActive(id3, exec) === false)
 
       assert(coordinatorRef.getLocation(id1) === None)
@@ -108,7 +108,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
       coordRef1.reportActiveInstance(id, "hostX", "exec1")
 
       eventually(timeout(5 seconds)) {
-        assert(coordRef2.verifyIfInstanceActive(id, "exec1") === true)
+        assert(coordRef2.verifyIfInstanceActive(id, "exec1"))
         assert(
           coordRef2.getLocation(id) ===
             Some(ExecutorCacheTaskLocation("hostX", "exec1").toString))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index c2e783dd515b..247efd5554a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -298,7 +298,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
       ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0) == true)
+      assert(testVector.isNullAt(0))
       for (i <- 1 until 16) {
         assert(testVector.isNullAt(i) == false)
         assert(testVector.getBoolean(i) == (i % 2 == 0))
@@ -322,7 +322,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
       ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0) == true)
+      assert(testVector.isNullAt(0))
       for (i <- 1 until 16) {
         assert(testVector.isNullAt(i) == false)
         assert(testVector.getByte(i) == i)
@@ -346,7 +346,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
       ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0) == true)
+      assert(testVector.isNullAt(0))
       for (i <- 1 until 16) {
         assert(testVector.isNullAt(i) == false)
         assert(testVector.getShort(i) == i)
@@ -370,7 +370,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
       ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0) == true)
+      assert(testVector.isNullAt(0))
       for (i <- 1 until 16) {
         assert(testVector.isNullAt(i) == false)
         assert(testVector.getInt(i) == i)
@@ -394,7 +394,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
       ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0) == true)
+      assert(testVector.isNullAt(0))
       for (i <- 1 until 16) {
         assert(testVector.isNullAt(i) == false)
         assert(testVector.getLong(i) == i.toLong)
@@ -418,7 +418,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
       ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0) == true)
+      assert(testVector.isNullAt(0))
       for (i <- 1 until 16) {
         assert(testVector.isNullAt(i) == false)
         assert(testVector.getFloat(i) == i.toFloat)
@@ -442,7 +442,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
       ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0) == true)
+      assert(testVector.isNullAt(0))
       for (i <- 1 until 16) {
         assert(testVector.isNullAt(i) == false)
         assert(testVector.getDouble(i) == i.toDouble)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index e8062dbb91e3..cbfd9d9b4b81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -988,14 +988,14 @@ class ColumnarBatchSuite extends SparkFunSuite {
       // Verify the results of the row.
       assert(batch.numCols() == 4)
       assert(batch.numRows() == 1)
-      assert(batch.rowIterator().hasNext == true)
-      assert(batch.rowIterator().hasNext == true)
+      assert(batch.rowIterator().hasNext)
+      assert(batch.rowIterator().hasNext)
 
       assert(columns(0).getInt(0) == 1)
       assert(columns(0).isNullAt(0) == false)
       assert(columns(1).getDouble(0) == 1.1)
       assert(columns(1).isNullAt(0) == false)
-      assert(columns(2).isNullAt(0) == true)
+      assert(columns(2).isNullAt(0))
       assert(columns(3).getUTF8String(0).toString == "Hello")
 
       // Verify the iterator works correctly.
@@ -1006,7 +1006,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(row.isNullAt(0) == false)
       assert(row.getDouble(1) == 1.1)
       assert(row.isNullAt(1) == false)
-      assert(row.isNullAt(2) == true)
+      assert(row.isNullAt(2))
       assert(columns(3).getUTF8String(0).toString == "Hello")
       assert(it.hasNext == false)
       assert(it.hasNext == false)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
index 135370bd1d67..71e0d867ab66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
@@ -71,15 +71,15 @@ class SQLConfEntrySuite extends SparkFunSuite {
     assert(conf.getConf(confEntry, false) === false)
 
     conf.setConf(confEntry, true)
-    assert(conf.getConf(confEntry, false) === true)
+    assert(conf.getConf(confEntry, false))
 
     conf.setConfString(key, "true")
     assert(conf.getConfString(key, "false") === "true")
     assert(conf.getConfString(key) === "true")
-    assert(conf.getConf(confEntry, false) === true)
+    assert(conf.getConf(confEntry, false))
 
     conf.setConfString(key, " true ")
-    assert(conf.getConf(confEntry, false) === true)
+    assert(conf.getConf(confEntry, false))
     val e = intercept[IllegalArgumentException] {
       conf.setConfString(key, "abc")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index c9a6975da6be..62cf705e2391 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -118,12 +118,12 @@ class SQLConfSuite extends QueryTest with SharedSQLContext {
     spark.sessionState.conf.clear()
     val original = spark.conf.get(SQLConf.GROUP_BY_ORDINAL)
     try {
-      assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL) === true)
+      assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL))
       sql(s"set ${SQLConf.GROUP_BY_ORDINAL.key}=false")
       assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL) === false)
       assert(sql(s"set").where(s"key = '${SQLConf.GROUP_BY_ORDINAL.key}'").count() == 1)
       sql(s"reset")
-      assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL) === true)
+      assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL))
       assert(sql(s"set").where(s"key = '${SQLConf.GROUP_BY_ORDINAL.key}'").count() == 0)
     } finally {
       sql(s"set ${SQLConf.GROUP_BY_ORDINAL}=$original")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index 43463a84093c..df7e9217f914 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -267,7 +267,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
 
         val state2 = GroupStateImpl.createForStreaming(
           initState, 1000, 1000, timeoutConf, hasTimedOut = true, watermarkPresent = false)
-        assert(state2.hasTimedOut === true)
+        assert(state2.hasTimedOut)
       }
 
       // for batch queries
@@ -1162,7 +1162,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
 
     test(s"InputProcessor - process timed out state - $testName") {
       val mapGroupsFunc = (key: Int, values: Iterator[Int], state: GroupState[Int]) => {
-        assert(state.hasTimedOut === true, "hasTimedOut not true")
+        assert(state.hasTimedOut, "hasTimedOut not true")
         assert(values.isEmpty, "values not empty")
         stateUpdates(state)
         Iterator.empty
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index d00f2e3bf4d1..10b2966fd7b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -215,14 +215,14 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
       val listener2 = new EventCollector
 
       spark.streams.addListener(listener1)
-      assert(isListenerActive(listener1) === true)
+      assert(isListenerActive(listener1))
       assert(isListenerActive(listener2) === false)
       spark.streams.addListener(listener2)
-      assert(isListenerActive(listener1) === true)
-      assert(isListenerActive(listener2) === true)
+      assert(isListenerActive(listener1))
+      assert(isListenerActive(listener2))
       spark.streams.removeListener(listener1)
       assert(isListenerActive(listener1) === false)
-      assert(isListenerActive(listener2) === true)
+      assert(isListenerActive(listener2))
     } finally {
       addedListeners().foreach(spark.streams.removeListener)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 729173cb7104..97a6ba84e7c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -134,7 +134,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val mapped = inputData.toDS().map { 6 / _}
 
     testStream(mapped)(
-      AssertOnQuery(_.isActive === true),
+      AssertOnQuery(_.isActive),
       AssertOnQuery(_.exception.isEmpty),
       AddData(inputData, 1, 2),
       CheckAnswer(6, 3),
@@ -148,7 +148,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       TestAwaitTermination(ExpectNotBlocked, timeoutMs = 2000, expectedReturnValue = true),
       TestAwaitTermination(ExpectNotBlocked, timeoutMs = 10, expectedReturnValue = true),
       StartStream(),
-      AssertOnQuery(_.isActive === true),
+      AssertOnQuery(_.isActive),
       AddData(inputData, 0),
       ExpectFailure[SparkException](),
       AssertOnQuery(_.isActive === false),
@@ -170,7 +170,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val mapped = inputData.toDS().map { 6 / _}
 
     testStream(mapped)(
-      AssertOnQuery(_.isActive === true),
+      AssertOnQuery(_.isActive),
       StopStream,
       AddData(inputData, 1, 2),
       StartStream(trigger = Once),
@@ -272,7 +272,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       AdvanceManualClock(1000), // time = 1000 to start new trigger, will block on `latestOffset`
       AssertStreamExecThreadIsWaitingForTime(1050),
       AssertOnQuery(_.status.isDataAvailable === false),
-      AssertOnQuery(_.status.isTriggerActive === true),
+      AssertOnQuery(_.status.isTriggerActive),
       AssertOnQuery(_.status.message.startsWith("Getting offsets from")),
       AssertOnQuery(_.recentProgress.count(_.numInputRows > 0) === 0),
 
@@ -280,16 +280,16 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       AssertClockTime(1050),
       // will block on `planInputPartitions` that needs 1350
       AssertStreamExecThreadIsWaitingForTime(1150),
-      AssertOnQuery(_.status.isDataAvailable === true),
-      AssertOnQuery(_.status.isTriggerActive === true),
+      AssertOnQuery(_.status.isDataAvailable),
+      AssertOnQuery(_.status.isTriggerActive),
       AssertOnQuery(_.status.message === "Processing new data"),
       AssertOnQuery(_.recentProgress.count(_.numInputRows > 0) === 0),
 
       AdvanceManualClock(100), // time = 1150 to unblock `planInputPartitions`
       AssertClockTime(1150),
       AssertStreamExecThreadIsWaitingForTime(1500), // will block on map task that needs 1500
-      AssertOnQuery(_.status.isDataAvailable === true),
-      AssertOnQuery(_.status.isTriggerActive === true),
+      AssertOnQuery(_.status.isDataAvailable),
+      AssertOnQuery(_.status.isTriggerActive),
       AssertOnQuery(_.status.message === "Processing new data"),
       AssertOnQuery(_.recentProgress.count(_.numInputRows > 0) === 0),
 
@@ -298,7 +298,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       AssertClockTime(1500),
       CheckAnswer(2),
       AssertStreamExecThreadIsWaitingForTime(2000),  // will block until the next trigger
-      AssertOnQuery(_.status.isDataAvailable === true),
+      AssertOnQuery(_.status.isDataAvailable),
       AssertOnQuery(_.status.isTriggerActive === false),
       AssertOnQuery(_.status.message === "Waiting for next trigger"),
       AssertOnQuery { query =>
@@ -341,7 +341,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       AssertClockTime(2000),
       AssertStreamExecThreadIsWaitingForTime(3000),  // will block waiting for next trigger time
       CheckAnswer(4),
-      AssertOnQuery(_.status.isDataAvailable === true),
+      AssertOnQuery(_.status.isDataAvailable),
       AssertOnQuery(_.status.isTriggerActive === false),
       AssertOnQuery(_.status.message === "Waiting for next trigger"),
       AssertOnQuery { query =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index c841793fdd4a..97b694eafc61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -307,7 +307,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
           eventually(timeout(streamingTimeout)) {
             // Write options should not be set.
             assert(LastWriteOptions.options.getBoolean(readOptionName, false) == false)
-            assert(LastReadOptions.options.getBoolean(readOptionName, false) == true)
+            assert(LastReadOptions.options.getBoolean(readOptionName, false))
           }
         }
       }
@@ -318,7 +318,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
           eventually(timeout(streamingTimeout)) {
             // Read options should not be set.
             assert(LastReadOptions.options.getBoolean(writeOptionName, false) == false)
-            assert(LastWriteOptions.options.getBoolean(writeOptionName, false) == true)
+            assert(LastWriteOptions.options.getBoolean(writeOptionName, false))
           }
         }
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index b323871630d6..bdbcfe9645bc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -183,7 +183,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
     }
 
     test(s"$version: databaseExists") {
-      assert(client.databaseExists("default") == true)
+      assert(client.databaseExists("default"))
       assert(client.databaseExists("nonexist") == false)
     }
 
@@ -198,7 +198,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
     }
 
     test(s"$version: dropDatabase") {
-      assert(client.databaseExists("temporary") == true)
+      assert(client.databaseExists("temporary"))
       client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = true)
       assert(client.databaseExists("temporary") == false)
     }
@@ -492,7 +492,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
         // Hive 0.12 doesn't allow customized permanent functions
         assert(client.functionExists("default", "func1") == false)
       } else {
-        assert(client.functionExists("default", "func1") == true)
+        assert(client.functionExists("default", "func1"))
       }
     }
 
@@ -504,7 +504,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
         }
       } else {
         client.renameFunction("default", "func1", "func2")
-        assert(client.functionExists("default", "func2") == true)
+        assert(client.functionExists("default", "func2"))
       }
     }
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 2332ee2ab9de..19da1813b722 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -912,8 +912,8 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
       }
     ssc.start()
     batchCounter.waitUntilBatchesCompleted(1, 10000)
-    assert(shouldCheckpointAllMarkedRDDs === true)
-    assert(rddsCheckpointed === true)
+    assert(shouldCheckpointAllMarkedRDDs)
+    assert(rddsCheckpointed)
   }
 
   /**
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
index e444132d3a62..d0c56ecffcb8 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
@@ -165,7 +165,7 @@ class StateMapSuite extends SparkFunSuite {
       map = map.copy().asInstanceOf[OpenHashMapBasedStateMap[Int, Int]]
     }
     assert(map.deltaChainLength > deltaChainThreshold)
-    assert(map.shouldCompact === true)
+    assert(map.shouldCompact)
 
     val deser_map = testSerialization(map, "Deserialized + compacted map not same as original map")
     assert(deser_map.deltaChainLength < deltaChainThreshold)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 52c8959351fe..8d0721068ca2 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -212,7 +212,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     ssc.sc.setLocalProperty("customPropKey", "value2")
 
     eventually(timeout(10 seconds), interval(10 milliseconds)) {
-      assert(allFound === true)
+      assert(allFound)
     }
 
     // Verify streaming jobs have expected thread-local properties
@@ -493,7 +493,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
         }
       }
       t.start()
-      assert(ssc.awaitTerminationOrTimeout(10000) === true)
+      assert(ssc.awaitTerminationOrTimeout(10000))
     }
     // SparkContext.stop will set SparkEnv.env to null. We need to make sure SparkContext is stopped
     // before running the next test. Otherwise, it's possible that we set SparkEnv.env to null after
@@ -605,7 +605,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
       sc = new SparkContext(conf)
       ssc = StreamingContext.getActiveOrCreate(creatingFunc _)
       assert(ssc != null, "no context created")
-      assert(newContextCreated === true, "new context not created")
+      assert(newContextCreated, "new context not created")
       assert(StreamingContext.getActive().isEmpty,
         "new initialized context returned before starting")
       ssc.start()
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
index 9b6bc71c7a5b..58ce3a93251a 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
@@ -117,7 +117,7 @@ class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with B
             state.remove()
             None
           case None =>
-            assert(state.isTimingOut() === true, "State is not timing out when data = None")
+            assert(state.isTimingOut(), "State is not timing out when data = None")
             timingOutStates += state.get()
             None
           case _ =>
@@ -153,9 +153,9 @@ class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with B
     // Data present, function should be called irrespective of whether state exists
     assertRecordUpdate(initStates = Seq(0), data = Seq("noop"),
       expectedStates = Seq((0, initialTime)))
-    assert(functionCalled === true)
+    assert(functionCalled)
     assertRecordUpdate(initStates = None, data = Some("noop"), expectedStates = None)
-    assert(functionCalled === true)
+    assert(functionCalled)
 
     // Function called with right state data
     assertRecordUpdate(initStates = None, data = Seq("get-state"),
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
index 580f831548cd..7b839aeb0f21 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
@@ -66,7 +66,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
 
     // Verify start marks the generator active, but does not call the callbacks
     blockGenerator.start()
-    assert(blockGenerator.isActive() === true, "block generator active after start()")
+    assert(blockGenerator.isActive(), "block generator active after start()")
     assert(blockGenerator.isStopped() === false, "block generator stopped after start()")
     withClue("callbacks called before adding data") {
       assert(listener.onAddDataCalled === false)
@@ -85,8 +85,8 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     clock.advance(blockIntervalMs)  // advance clock to generate blocks
     withClue("blocks not generated or pushed") {
       eventually(timeout(1 second)) {
-        assert(listener.onGenerateBlockCalled === true)
-        assert(listener.onPushBlockCalled === true)
+        assert(listener.onGenerateBlockCalled)
+        assert(listener.onPushBlockCalled)
       }
     }
     listener.pushedData.asScala.toSeq should contain theSameElementsInOrderAs (data1)
@@ -96,7 +96,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     val data2 = 11 to 20
     val metadata2 = data2.map { _.toString }
     data2.zip(metadata2).foreach { case (d, m) => blockGenerator.addDataWithCallback(d, m) }
-    assert(listener.onAddDataCalled === true)
+    assert(listener.onAddDataCalled)
     listener.addedData.asScala.toSeq should contain theSameElementsInOrderAs (data2)
     listener.addedMetadata.asScala.toSeq should contain theSameElementsInOrderAs (metadata2)
     clock.advance(blockIntervalMs)  // advance clock to generate blocks
@@ -122,7 +122,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     val thread = stopBlockGenerator(blockGenerator)
     eventually(timeout(1 second), interval(10 milliseconds)) {
       clock.advance(blockIntervalMs)
-      assert(blockGenerator.isStopped() === true)
+      assert(blockGenerator.isStopped())
     }
     thread.join()
 
@@ -148,7 +148,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     blockGenerator = new BlockGenerator(listener, 0, conf, clock)
     require(listener.onGenerateBlockCalled === false)
     blockGenerator.start()
-    assert(blockGenerator.isActive() === true, "block generator")
+    assert(blockGenerator.isActive(), "block generator")
     assert(blockGenerator.isStopped() === false)
 
     val data = 1 to 1000
@@ -198,7 +198,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
       clock.advance(blockIntervalMs)
       assert(thread.isAlive === false)
     }
-    assert(blockGenerator.isStopped() === true) // generator has finally been completely stopped
+    assert(blockGenerator.isStopped()) // generator has finally been completely stopped
     assert(listener.pushedData.asScala.toSeq === data, "All data not pushed by stop()")
   }
 
@@ -214,7 +214,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     assert(listener.onErrorCalled === false)
     blockGenerator.addData(1)
     eventually(timeout(1 second), interval(10 milliseconds)) {
-      assert(listener.onErrorCalled === true)
+      assert(listener.onErrorCalled)
     }
     blockGenerator.stop()
   }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index 4707d6e31fd7..fcbba006a816 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -335,7 +335,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
       .set("spark.streaming.dynamicAllocation.enabled", "true")
       .set(DYN_ALLOCATION_ENABLED, true)
       .set(DYN_ALLOCATION_TESTING, true)
-    require(Utils.isDynamicAllocationEnabled(confWithBothDynamicAllocationEnabled) === true)
+    require(Utils.isDynamicAllocationEnabled(confWithBothDynamicAllocationEnabled))
     withStreamingContext(confWithBothDynamicAllocationEnabled) { ssc =>
       intercept[IllegalArgumentException] {
         ssc.start()
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
index c20380d8490d..dc9305cf20f1 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
@@ -83,7 +83,7 @@ abstract class CommonWriteAheadLogTests(
 
     val logDirectoryPath = new Path(testDir)
     val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf)
-    assert(fileSystem.exists(logDirectoryPath) === true)
+    assert(fileSystem.exists(logDirectoryPath))
 
     // Read data using manager and verify
     val readData = readDataUsingWriteAheadLog(testDir, closeFileAfterWrite, allowBatching)

From 28e1695e17fef077b275dc835bb17b24db7e16bb Mon Sep 17 00:00:00 2001
From: Oliver Urs Lenz <oliver.urs.lenz@gmail.com>
Date: Wed, 27 Feb 2019 08:39:55 -0600
Subject: [PATCH 0541/1072] [SPARK-26803][PYTHON] Add sbin subdirectory to
 pyspark

## What changes were proposed in this pull request?

Modifies `setup.py` so that `sbin` subdirectory is included in pyspark

## How was this patch tested?

Manually tested with python 2.7 and python 3.7

```sh
$ ./build/mvn -D skipTests -P hive -P hive-thriftserver -P yarn -P mesos clean package
$ cd python
$ python setup.py sdist
$ pip install  dist/pyspark-2.1.0.dev0.tar.gz
```

Checked manually that `sbin` is now present in install directory.

srowen holdenk

Closes #23715 from oulenz/pyspark_sbin.

Authored-by: Oliver Urs Lenz <oliver.urs.lenz@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/MANIFEST.in |  1 +
 python/setup.py    | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 40f1fb2f1ee7..2d78a001a4d9 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -17,6 +17,7 @@
 global-exclude *.py[cod] __pycache__ .DS_Store
 recursive-include deps/jars *.jar
 graft deps/bin
+recursive-include deps/sbin spark-config.sh spark-daemon.sh start-history-server.sh stop-history-server.sh
 recursive-include deps/data *.data *.txt
 recursive-include deps/licenses *.txt
 recursive-include deps/examples *.py
diff --git a/python/setup.py b/python/setup.py
index 22f0940db93e..3c129c9251b1 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -69,10 +69,12 @@
 
 EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
 SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
+USER_SCRIPTS_PATH = os.path.join(SPARK_HOME, "sbin")
 DATA_PATH = os.path.join(SPARK_HOME, "data")
 LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")
 
 SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
+USER_SCRIPTS_TARGET = os.path.join(TEMP_PATH, "sbin")
 JARS_TARGET = os.path.join(TEMP_PATH, "jars")
 EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
 DATA_TARGET = os.path.join(TEMP_PATH, "data")
@@ -122,6 +124,7 @@ def _supports_symlinks():
         if _supports_symlinks():
             os.symlink(JARS_PATH, JARS_TARGET)
             os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
+            os.symlink(USER_SCRIPTS_PATH, USER_SCRIPTS_TARGET)
             os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
             os.symlink(DATA_PATH, DATA_TARGET)
             os.symlink(LICENSES_PATH, LICENSES_TARGET)
@@ -129,6 +132,7 @@ def _supports_symlinks():
             # For windows fall back to the slower copytree
             copytree(JARS_PATH, JARS_TARGET)
             copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
+            copytree(USER_SCRIPTS_PATH, USER_SCRIPTS_TARGET)
             copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
             copytree(DATA_PATH, DATA_TARGET)
             copytree(LICENSES_PATH, LICENSES_TARGET)
@@ -177,6 +181,7 @@ def _supports_symlinks():
                   'pyspark.sql',
                   'pyspark.streaming',
                   'pyspark.bin',
+                  'pyspark.sbin',
                   'pyspark.jars',
                   'pyspark.python.pyspark',
                   'pyspark.python.lib',
@@ -187,6 +192,7 @@ def _supports_symlinks():
         package_dir={
             'pyspark.jars': 'deps/jars',
             'pyspark.bin': 'deps/bin',
+            'pyspark.sbin': 'deps/sbin',
             'pyspark.python.lib': 'lib',
             'pyspark.data': 'deps/data',
             'pyspark.licenses': 'deps/licenses',
@@ -195,6 +201,9 @@ def _supports_symlinks():
         package_data={
             'pyspark.jars': ['*.jar'],
             'pyspark.bin': ['*'],
+            'pyspark.sbin': ['spark-config.sh', 'spark-daemon.sh',
+                             'start-history-server.sh',
+                             'stop-history-server.sh', ],
             'pyspark.python.lib': ['*.zip'],
             'pyspark.data': ['*.txt', '*.data'],
             'pyspark.licenses': ['*.txt'],
@@ -231,12 +240,14 @@ def _supports_symlinks():
         if _supports_symlinks():
             os.remove(os.path.join(TEMP_PATH, "jars"))
             os.remove(os.path.join(TEMP_PATH, "bin"))
+            os.remove(os.path.join(TEMP_PATH, "sbin"))
             os.remove(os.path.join(TEMP_PATH, "examples"))
             os.remove(os.path.join(TEMP_PATH, "data"))
             os.remove(os.path.join(TEMP_PATH, "licenses"))
         else:
             rmtree(os.path.join(TEMP_PATH, "jars"))
             rmtree(os.path.join(TEMP_PATH, "bin"))
+            rmtree(os.path.join(TEMP_PATH, "sbin"))
             rmtree(os.path.join(TEMP_PATH, "examples"))
             rmtree(os.path.join(TEMP_PATH, "data"))
             rmtree(os.path.join(TEMP_PATH, "licenses"))

From 5fd28e8f5c319492db4ef1e2c0a5a77f85ac029b Mon Sep 17 00:00:00 2001
From: Luca Canali <luca.canali@cern.ch>
Date: Wed, 27 Feb 2019 10:07:15 -0600
Subject: [PATCH 0542/1072] [SPARK-26890][DOC] Add list of available Dropwizard
 metrics in Spark and add additional configuration details to the monitoring
 documentation

## What changes were proposed in this pull request?

This PR proposes to extend the documentation of the Spark metrics system in the monitoring guide. In particular by:
- adding a list of the available metrics grouped per component instance
- adding information on configuration parameters that can be used to configure the metrics system in alternative to the metrics.properties file
- adding information on the configuration parameters needed to enable certain metrics
- it also propose to add an example of Graphite sink configuration in metrics.properties.template

Closes #23798 from LucaCanali/metricsDocUpdate.

Authored-by: Luca Canali <luca.canali@cern.ch>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 conf/metrics.properties.template |  14 +-
 docs/monitoring.md               | 256 ++++++++++++++++++++++++++++++-
 2 files changed, 267 insertions(+), 3 deletions(-)

diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
index 4c008a13607c..23407e1f7075 100644
--- a/conf/metrics.properties.template
+++ b/conf/metrics.properties.template
@@ -64,6 +64,10 @@
 #    "/metrics/applications/json" endpoints can be sent separately to get
 #    metrics snapshots of the master instance and applications. This
 #    MetricsServlet does not have to be configured.
+#    6. The metrics system can also be configured using Spark configuration
+#    parameters. The relevant parameter names are formed by adding the
+#    prefix "spark.metrics.conf." to the configuration entries detailed in
+#    this file (see examples below).
 
 ## List of available common sources and their properties.
 
@@ -172,6 +176,14 @@
 # Unit of the polling period for the Slf4jSink
 #*.sink.slf4j.unit=minutes
 
+# Example configuration for Graphite sink
+#*.sink.graphite.class=org.apache.spark.metrics.sink.GraphiteSink
+#*.sink.graphite.host=<graphiteEndPoint_hostName>
+#*.sink.graphite.port=<listening_port>
+#*.sink.graphite.period=10
+#*.sink.graphite.unit=seconds
+#*.sink.graphite.prefix=<optional_value>
+
 # Enable JvmSource for instance master, worker, driver and executor
 #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
 
@@ -179,4 +191,4 @@
 
 #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
 
-#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
\ No newline at end of file
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 726fb5cd0ca6..a92dd6f71a09 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -633,9 +633,13 @@ keep the paths consistent in both modes.
 Spark has a configurable metrics system based on the
 [Dropwizard Metrics Library](http://metrics.dropwizard.io/).
 This allows users to report Spark metrics to a variety of sinks including HTTP, JMX, and CSV
-files. The metrics system is configured via a configuration file that Spark expects to be present
+files. The metrics are generated by sources embedded in the Spark code base. They
+provide instrumentation for specific activities and Spark components.
+The metrics system is configured via a configuration file that Spark expects to be present
 at `$SPARK_HOME/conf/metrics.properties`. A custom file location can be specified via the
 `spark.metrics.conf` [configuration property](configuration.html#spark-properties).
+Instead of using the configuration file, a set of configuration parameters with prefix
+`spark.metrics.conf.` can be used.
 By default, the root namespace used for driver or executor metrics is 
 the value of `spark.app.id`. However, often times, users want to be able to track the metrics 
 across apps for driver and executors, which is hard to do with application ID 
@@ -684,9 +688,257 @@ code in your Spark package**_. For sbt users, set the
 the `-Pspark-ganglia-lgpl` profile. In addition to modifying the cluster's Spark build
 user applications will need to link to the `spark-ganglia-lgpl` artifact.
 
-The syntax of the metrics configuration file is defined in an example configuration file,
+The syntax of the metrics configuration file and the parameters available for each sink are defined
+in an example configuration file,
 `$SPARK_HOME/conf/metrics.properties.template`.
 
+When using Spark configuration parameters instead of the metrics configuration file, the relevant
+parameter names are composed by the prefix `spark.metrics.conf.` followed by the configuration
+details, i.e. the parameters take the following form:
+`spark.metrics.conf.[instance|*].sink.[sink_name].[parameter_name]`.
+This example shows a list of Spark configuration parameters for a Graphite sink:
+```
+"spark.metrics.conf.*.sink.graphite.class"="org.apache.spark.metrics.sink.GraphiteSink"
+"spark.metrics.conf.*.sink.graphite.host"="graphiteEndPoint_hostName>"
+"spark.metrics.conf.*.sink.graphite.port"=<graphite_listening_port>
+"spark.metrics.conf.*.sink.graphite.period"=10
+"spark.metrics.conf.*.sink.graphite.unit"=seconds
+"spark.metrics.conf.*.sink.graphite.prefix"="optional_prefix"
+```
+
+Default values of the Spark metrics configuration are as follows:
+```
+"*.sink.servlet.class" = "org.apache.spark.metrics.sink.MetricsServlet"
+"*.sink.servlet.path" = "/metrics/json"
+"master.sink.servlet.path" = "/metrics/master/json"
+"applications.sink.servlet.path" = "/metrics/applications/json"
+```
+
+Additional sources can be configured using the metrics configuration file or the configuration
+parameter `spark.metrics.conf.[component_name].source.jvm.class=[source_name]`. At present the 
+JVM source is the only available optional source. For example the following configuration parameter
+activates the JVM source:
+`"spark.metrics.conf.*.source.jvm.class"="org.apache.spark.metrics.source.JvmSource"`
+
+## List of available metrics providers 
+
+Metrics used by Spark are of multiple types: gauge, counter, histogram, meter and timer, 
+see [Dropwizard library documentation for details](https://metrics.dropwizard.io/3.1.0/getting-started/).
+The following list of components and metrics reports the name and some details about the available metrics,
+grouped per component instance and source namespace.
+The most common time of metrics used in Spark instrumentation are gauges and counters. 
+Counters can be recognized as they have the `.count` suffix. Timers, meters and histograms are annotated
+in the list, the rest of the list elements are metrics of type gauge.
+The large majority of metrics are active as soon as their parent component instance is configured,
+some metrics require also to be enabled via an additional configuration parameter, the details are
+reported in the list.
+
+### Component instance = Driver
+This is the component with the largest amount of instrumented metrics
+
+- namespace=BlockManager
+  - disk.diskSpaceUsed_MB
+  - memory.maxMem_MB
+  - memory.maxOffHeapMem_MB
+  - memory.maxOnHeapMem_MB
+  - memory.memUsed_MB
+  - memory.offHeapMemUsed_MB
+  - memory.onHeapMemUsed_MB
+  - memory.remainingMem_MB
+  - memory.remainingOffHeapMem_MB
+  - memory.remainingOnHeapMem_MB
+
+- namespace=HiveExternalCatalog
+  - fileCacheHits.count
+  - filesDiscovered.count
+  - hiveClientCalls.count
+  - parallelListingJobCount.count
+  - partitionsFetched.count
+
+- namespace=CodeGenerator
+  - compilationTime (histogram)
+  - generatedClassSize (histogram)
+  - generatedMethodSize (histogram)
+  - hiveClientCalls.count
+  - sourceCodeSize (histogram)
+
+- namespace=DAGScheduler
+  - job.activeJobs 
+  - job.allJobs
+  - messageProcessingTime (timer)
+  - stage.failedStages
+  - stage.runningStages
+  - stage.waitingStages
+
+- namespace=LiveListenerBus
+  - listenerProcessingTime.org.apache.spark.HeartbeatReceiver (timer)
+  - listenerProcessingTime.org.apache.spark.scheduler.EventLoggingListener (timer)
+  - listenerProcessingTime.org.apache.spark.status.AppStatusListener (timer)
+  - numEventsPosted.count
+  - queue.appStatus.listenerProcessingTime (timer)
+  - queue.appStatus.numDroppedEvents.count
+  - queue.appStatus.size
+  - queue.eventLog.listenerProcessingTime (timer)
+  - queue.eventLog.numDroppedEvents.count
+  - queue.eventLog.size
+  - queue.executorManagement.listenerProcessingTime (timer)
+
+- namespace=appStatus (all metrics of type=counter)
+  - **note:** Introduced in Spark 3.0. Conditional to configuration parameter:  
+   `spark.app.status.metrics.enabled=true` (default is false)
+  - stages.failedStages.count
+  - stages.skippedStages.count
+  - tasks.blackListedExecutors.count
+  - tasks.completedTasks.count
+  - tasks.failedTasks.count
+  - tasks.killedTasks.count
+  - tasks.skippedTasks.count
+  - tasks.unblackListedExecutors.count
+  - jobs.succeededJobs
+  - jobs.failedJobs
+  - jobDuration
+  
+- namespace=AccumulatorSource  
+  - **note:** User-configurable sources to attach accumulators to metric system
+  - DoubleAccumulatorSource
+  - LongAccumulatorSource
+
+- namespace=spark.streaming
+  - **note** applies to Spark Structured Streaming only. Conditional to a configuration
+  parameter: `spark.sql.streaming.metricsEnabled=true` (default is false) 
+  - eventTime-watermark
+  - inputRate-total
+  - latency
+  - processingRate-total
+  - states-rowsTotal
+  - states-usedBytes
+
+### Component instance = Executor
+These metrics are exposed by Spark executors. Note, currently they are not available
+when running in local mode.
+ 
+- namespace=executor (metrics are of type counter or gauge)
+  - bytesRead.count
+  - bytesWritten.count
+  - cpuTime.count
+  - deserializeCpuTime.count
+  - deserializeTime.count
+  - diskBytesSpilled.count
+  - filesystem.file.largeRead_ops
+  - filesystem.file.read_bytes
+  - filesystem.file.read_ops
+  - filesystem.file.write_bytes
+  - filesystem.file.write_ops
+  - filesystem.hdfs.largeRead_ops
+  - filesystem.hdfs.read_bytes
+  - filesystem.hdfs.read_ops
+  - filesystem.hdfs.write_bytes
+  - filesystem.hdfs.write_ops
+  - jvmCpuTime
+  - jvmGCTime.count
+  - memoryBytesSpilled.count
+  - recordsRead.count
+  - recordsWritten.count
+  - resultSerializationTime.count
+  - resultSize.count
+  - runTime.count
+  - shuffleBytesWritten.count
+  - shuffleFetchWaitTime.count
+  - shuffleLocalBlocksFetched.count
+  - shuffleLocalBytesRead.count
+  - shuffleRecordsRead.count
+  - shuffleRecordsWritten.count
+  - shuffleRemoteBlocksFetched.count
+  - shuffleRemoteBytesRead.count
+  - shuffleRemoteBytesReadToDisk.count
+  - shuffleTotalBytesRead.count
+  - shuffleWriteTime.count
+  - threadpool.activeTasks
+  - threadpool.completeTasks
+  - threadpool.currentPool_size
+  - threadpool.maxPool_size
+
+- namespace=NettyBlockTransfer
+  - shuffle-client.usedDirectMemory
+  - shuffle-client.usedHeapMemory
+  - shuffle-server.usedDirectMemory
+  - shuffle-server.usedHeapMemory
+
+- namespace=HiveExternalCatalog
+  - fileCacheHits.count
+  - filesDiscovered.count
+  - hiveClientCalls.count
+  - parallelListingJobCount.count
+  - partitionsFetched.count
+
+- namespace=CodeGenerator
+  - compilationTime (histogram)
+  - generatedClassSize (histogram)
+  - generatedMethodSize (histogram)
+  - hiveClientCalls.count
+  - sourceCodeSize (histogram)
+
+### Source = JVM Source 
+Notes: 
+  - Activate this source by setting the relevant `metrics.properties` file entry or the 
+  configuration parameter:`spark.metrics.conf.*.source.jvm.class=org.apache.spark.metrics.source.JvmSource`  
+  - This source is available for driver and executor instances and is also available for other instances.  
+  - This source provides information on JVM metrics using the 
+  [Dropwizard/Codahale Metric Sets for JVM instrumentation](https://metrics.dropwizard.io/3.1.0/manual/jvm/)
+   and in particular the metric sets BufferPoolMetricSet, GarbageCollectorMetricSet and MemoryUsageGaugeSet. 
+
+### Component instance = applicationMaster
+Note: applies when running on YARN
+
+- numContainersPendingAllocate
+- numExecutorsFailed
+- numExecutorsRunning
+- numLocalityAwareTasks
+- numReleasedContainers
+
+### Component instance = mesos_cluster
+Note: applies when running on mesos
+
+- waitingDrivers
+- launchedDrivers
+- retryDrivers
+
+### Component instance = master
+Note: applies when running in Spark standalone as master
+
+- workers
+- aliveWorkers
+- apps
+- waitingApps
+
+### Component instance = ApplicationSource
+Note: applies when running in Spark standalone as master
+
+- status
+- runtime_ms
+- cores
+
+### Component instance = worker
+Note: applies when running in Spark standalone as worker
+
+- executors
+- coresUsed
+- memUsed_MB
+- coresFree
+- memFree_MB
+
+## Component instance = shuffleService
+Note: applies to the shuffle service
+
+- blockTransferRateBytes (meter)
+- numActiveConnections.count
+- numRegisteredConnections.count
+- openBlockRequestLatencyMillis (histogram)
+- registerExecutorRequestLatencyMillis (histogram)
+- registeredExecutorsSize
+- shuffle-server.usedDirectMemory
+- shuffle-server.usedHeapMemory
+
 # Advanced Instrumentation
 
 Several external tools can be used to help profile the performance of Spark jobs:

From a67e8426e37af3a19ff44fb1203c94a85c5f23c4 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 28 Feb 2019 02:33:10 +0900
Subject: [PATCH 0543/1072] [SPARK-27000][PYTHON] Upgrades cloudpickle to
 v0.8.0

## What changes were proposed in this pull request?

After upgrading cloudpickle to 0.6.1 at https://github.com/apache/spark/pull/20691, one regression was found. Cloudpickle had a critical https://github.com/cloudpipe/cloudpickle/pull/240 for that.

Basically, it currently looks existing globals would override globals shipped in a function's, meaning:

**Before:**

```python
>>> def hey():
...     return "Hi"
...
>>> spark.range(1).rdd.map(lambda _: hey()).collect()
['Hi']
>>> def hey():
...     return "Yeah"
...
>>> spark.range(1).rdd.map(lambda _: hey()).collect()
['Hi']
```

**After:**

```python
>>> def hey():
...     return "Hi"
...
>>> spark.range(1).rdd.map(lambda _: hey()).collect()
['Hi']
>>>
>>> def hey():
...     return "Yeah"
...
>>> spark.range(1).rdd.map(lambda _: hey()).collect()
['Yeah']
```

Therefore, this PR upgrades cloudpickle to 0.8.0.

Note that cloudpickle's release cycle is quite short.

Between 0.6.1 and 0.7.0, it contains minor bug fixes. I don't see notable changes to double check and/or avoid.

There is virtually only this fix between 0.7.0 and 0.8.1 - other fixes are about testing.

## How was this patch tested?

Manually tested, tests were added. Verified unit tests were added in cloudpickle.

Closes #23904 from HyukjinKwon/SPARK-27000.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/cloudpickle.py    | 163 +++++++++++++++++--------------
 python/pyspark/tests/test_rdd.py |  10 ++
 2 files changed, 100 insertions(+), 73 deletions(-)

diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index bf92569c1e8c..54d745cbcc7f 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -42,21 +42,20 @@
 """
 from __future__ import print_function
 
-import io
 import dis
-import sys
-import types
+from functools import partial
+import importlib
+import io
+import itertools
+import logging
 import opcode
+import operator
 import pickle
 import struct
-import logging
-import weakref
-import operator
-import importlib
-import itertools
+import sys
 import traceback
-from functools import partial
-
+import types
+import weakref
 
 # cloudpickle is meant for inter process communication: we expect all
 # communicating processes to run the same Python version hence we favor
@@ -64,36 +63,22 @@
 DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
 
 
-if sys.version < '3':
+if sys.version_info[0] < 3:  # pragma: no branch
     from pickle import Pickler
     try:
         from cStringIO import StringIO
     except ImportError:
         from StringIO import StringIO
+    string_types = (basestring,)  # noqa
     PY3 = False
 else:
     types.ClassType = type
     from pickle import _Pickler as Pickler
     from io import BytesIO as StringIO
+    string_types = (str,)
     PY3 = True
 
 
-# Container for the global namespace to ensure consistent unpickling of
-# functions defined in dynamic modules (modules not registed in sys.modules).
-_dynamic_modules_globals = weakref.WeakValueDictionary()
-
-
-class _DynamicModuleFuncGlobals(dict):
-    """Global variables referenced by a function defined in a dynamic module
-
-    To avoid leaking references we store such context in a WeakValueDictionary
-    instance.  However instances of python builtin types such as dict cannot
-    be used directly as values in such a construct, hence the need for a
-    derived class.
-    """
-    pass
-
-
 def _make_cell_set_template_code():
     """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF
 
@@ -112,7 +97,7 @@ def _stub(value):
 
            return _stub
 
-        _cell_set_template_code = f()
+        _cell_set_template_code = f().__code__
 
     This function is _only_ a LOAD_FAST(arg); STORE_DEREF, but that is
     invalid syntax on Python 2. If we use this function we also don't need
@@ -127,7 +112,7 @@ def inner(value):
     # NOTE: we are marking the cell variable as a free variable intentionally
     # so that we simulate an inner function instead of the outer function. This
     # is what gives us the ``nonlocal`` behavior in a Python 2 compatible way.
-    if not PY3:
+    if not PY3:  # pragma: no branch
         return types.CodeType(
             co.co_argcount,
             co.co_nlocals,
@@ -228,14 +213,14 @@ def _factory():
 }
 
 
-if sys.version_info < (3, 4):
+if sys.version_info < (3, 4):  # pragma: no branch
     def _walk_global_ops(code):
         """
         Yield (opcode, argument number) tuples for all
         global-referencing instructions in *code*.
         """
         code = getattr(code, 'co_code', b'')
-        if not PY3:
+        if not PY3:  # pragma: no branch
             code = map(ord, code)
 
         n = len(code)
@@ -273,8 +258,6 @@ def __init__(self, file, protocol=None):
         if protocol is None:
             protocol = DEFAULT_PROTOCOL
         Pickler.__init__(self, file, protocol=protocol)
-        # set of modules to unpickle
-        self.modules = set()
         # map ids to dictionary. used to ensure that functions can share global env
         self.globals_ref = {}
 
@@ -294,7 +277,7 @@ def save_memoryview(self, obj):
 
     dispatch[memoryview] = save_memoryview
 
-    if not PY3:
+    if not PY3:  # pragma: no branch
         def save_buffer(self, obj):
             self.save(str(obj))
 
@@ -304,7 +287,6 @@ def save_module(self, obj):
         """
         Save a module as an import
         """
-        self.modules.add(obj)
         if _is_dynamic(obj):
             self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)),
                              obj=obj)
@@ -317,7 +299,7 @@ def save_codeobject(self, obj):
         """
         Save a code object
         """
-        if PY3:
+        if PY3:  # pragma: no branch
             args = (
                 obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
                 obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, obj.co_varnames,
@@ -384,7 +366,6 @@ def save_function(self, obj, name=None):
             lookedup_by_name = None
 
         if themodule:
-            self.modules.add(themodule)
             if lookedup_by_name is obj:
                 return self.save_global(obj, name)
 
@@ -396,7 +377,7 @@ def save_function(self, obj, name=None):
         # So we pickle them here using save_reduce; have to do it differently
         # for different python versions.
         if not hasattr(obj, '__code__'):
-            if PY3:
+            if PY3:  # pragma: no branch
                 rv = obj.__reduce_ex__(self.proto)
             else:
                 if hasattr(obj, '__self__'):
@@ -434,8 +415,31 @@ def save_function(self, obj, name=None):
 
     def _save_subimports(self, code, top_level_dependencies):
         """
-        Ensure de-pickler imports any package child-modules that
-        are needed by the function
+        Save submodules used by a function but not listed in its globals.
+
+        In the example below:
+
+        ```
+        import concurrent.futures
+        import cloudpickle
+
+
+        def func():
+            x = concurrent.futures.ThreadPoolExecutor
+
+
+        if __name__ == '__main__':
+            cloudpickle.dumps(func)
+        ```
+
+        the globals extracted by cloudpickle in the function's state include
+        the concurrent module, but not its submodule (here,
+        concurrent.futures), which is the module used by func.
+
+        To ensure that calling the depickled function does not raise an
+        AttributeError, this function looks for any currently loaded submodule
+        that the function uses and whose parent is present in the function
+        globals, and saves it before saving the function.
         """
 
         # check if any known dependency is an imported package
@@ -481,6 +485,17 @@ def save_dynamic_class(self, obj):
         # doc can't participate in a cycle with the original class.
         type_kwargs = {'__doc__': clsdict.pop('__doc__', None)}
 
+        if hasattr(obj, "__slots__"):
+            type_kwargs['__slots__'] = obj.__slots__
+            # pickle string length optimization: member descriptors of obj are
+            # created automatically from obj's __slots__ attribute, no need to
+            # save them in obj's state
+            if isinstance(obj.__slots__, string_types):
+                clsdict.pop(obj.__slots__)
+            else:
+                for k in obj.__slots__:
+                    clsdict.pop(k, None)
+
         # If type overrides __dict__ as a property, include it in the type kwargs.
         # In Python 2, we can't set this attribute after construction.
         __dict__ = clsdict.pop('__dict__', None)
@@ -639,17 +654,17 @@ def extract_func_data(self, func):
         # save the dict
         dct = func.__dict__
 
-        base_globals = self.globals_ref.get(id(func.__globals__), None)
-        if base_globals is None:
-            # For functions defined in a well behaved module use
-            # vars(func.__module__) for base_globals. This is necessary to
-            # share the global variables across multiple pickled functions from
-            # this module.
-            if hasattr(func, '__module__') and func.__module__ is not None:
-                base_globals = func.__module__
-            else:
-                base_globals = {}
-        self.globals_ref[id(func.__globals__)] = base_globals
+        # base_globals represents the future global namespace of func at
+        # unpickling time. Looking it up and storing it in globals_ref allow
+        # functions sharing the same globals at pickling time to also
+        # share them once unpickled, at one condition: since globals_ref is
+        # an attribute of a Cloudpickler instance, and that a new CloudPickler is
+        # created each time pickle.dump or pickle.dumps is called, functions
+        # also need to be saved within the same invokation of
+        # cloudpickle.dump/cloudpickle.dumps (for example: cloudpickle.dumps([f1, f2])). There
+        # is no such limitation when using Cloudpickler.dump, as long as the
+        # multiple invokations are bound to the same Cloudpickler.
+        base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
 
         return (code, f_globals, defaults, closure, dct, base_globals)
 
@@ -699,7 +714,7 @@ def save_instancemethod(self, obj):
         if obj.__self__ is None:
             self.save_reduce(getattr, (obj.im_class, obj.__name__))
         else:
-            if PY3:
+            if PY3:  # pragma: no branch
                 self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj)
             else:
                 self.save_reduce(types.MethodType, (obj.__func__, obj.__self__, obj.__self__.__class__),
@@ -752,7 +767,7 @@ def save_inst(self, obj):
         save(stuff)
         write(pickle.BUILD)
 
-    if not PY3:
+    if not PY3:  # pragma: no branch
         dispatch[types.InstanceType] = save_inst
 
     def save_property(self, obj):
@@ -852,7 +867,7 @@ def save_not_implemented(self, obj):
 
     try:               # Python 2
         dispatch[file] = save_file
-    except NameError:  # Python 3
+    except NameError:  # Python 3  # pragma: no branch
         dispatch[io.TextIOWrapper] = save_file
 
     dispatch[type(Ellipsis)] = save_ellipsis
@@ -873,6 +888,12 @@ def save_root_logger(self, obj):
 
     dispatch[logging.RootLogger] = save_root_logger
 
+    if hasattr(types, "MappingProxyType"):  # pragma: no branch
+        def save_mappingproxy(self, obj):
+            self.save_reduce(types.MappingProxyType, (dict(obj),), obj=obj)
+
+        dispatch[types.MappingProxyType] = save_mappingproxy
+
     """Special functions for Add-on libraries"""
     def inject_addons(self):
         """Plug in system. Register additional pickling functions if modules already loaded"""
@@ -1059,10 +1080,16 @@ def _fill_function(*args):
     else:
         raise ValueError('Unexpected _fill_value arguments: %r' % (args,))
 
-    # Only set global variables that do not exist.
-    for k, v in state['globals'].items():
-        if k not in func.__globals__:
-            func.__globals__[k] = v
+    # - At pickling time, any dynamic global variable used by func is
+    #   serialized by value (in state['globals']).
+    # - At unpickling time, func's __globals__ attribute is initialized by
+    #   first retrieving an empty isolated namespace that will be shared
+    #   with other functions pickled from the same original module
+    #   by the same CloudPickler instance and then updated with the
+    #   content of state['globals'] to populate the shared isolated
+    #   namespace with all the global variables that are specifically
+    #   referenced for this function.
+    func.__globals__.update(state['globals'])
 
     func.__defaults__ = state['defaults']
     func.__dict__ = state['dict']
@@ -1100,21 +1127,11 @@ def _make_skel_func(code, cell_count, base_globals=None):
         code and the correct number of cells in func_closure.  All other
         func attributes (e.g. func_globals) are empty.
     """
-    if base_globals is None:
+    # This is backward-compatibility code: for cloudpickle versions between
+    # 0.5.4 and 0.7, base_globals could be a string or None. base_globals
+    # should now always be a dictionary.
+    if base_globals is None or isinstance(base_globals, str):
         base_globals = {}
-    elif isinstance(base_globals, str):
-        base_globals_name = base_globals
-        try:
-            # First try to reuse the globals from the module containing the
-            # function. If it is not possible to retrieve it, fallback to an
-            # empty dictionary.
-            base_globals = vars(importlib.import_module(base_globals))
-        except ImportError:
-            base_globals = _dynamic_modules_globals.get(
-                    base_globals_name, None)
-            if base_globals is None:
-                base_globals = _DynamicModuleFuncGlobals()
-            _dynamic_modules_globals[base_globals_name] = base_globals
 
     base_globals['__builtins__'] = __builtins__
 
@@ -1182,7 +1199,7 @@ def _getobject(modname, attribute):
 
 """ Use copy_reg to extend global pickle definitions """
 
-if sys.version_info < (3, 4):
+if sys.version_info < (3, 4):  # pragma: no branch
     method_descriptor = type(str.upper)
 
     def _reduce_method_descriptor(obj):
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index 9e4ac6c1bbd9..e789cbe90d27 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -32,6 +32,9 @@
     xrange = range
 
 
+global_func = lambda: "Hi"
+
+
 class RDDTests(ReusedPySparkTestCase):
 
     def test_range(self):
@@ -726,6 +729,13 @@ def stopit(*x):
         self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg,
                                 seq_rdd.aggregate, 0, lambda *x: 1, stopit)
 
+    def test_overwritten_global_func(self):
+        # Regression test for SPARK-27000
+        global global_func
+        self.assertEqual(self.sc.parallelize([1]).map(lambda _: global_func()).first(), "Hi")
+        global_func = lambda: "Yeah"
+        self.assertEqual(self.sc.parallelize([1]).map(lambda _: global_func()).first(), "Yeah")
+
 
 if __name__ == "__main__":
     import unittest

From a6ddc9d08352540e492c8c7aa3b602c3e5902538 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 27 Feb 2019 09:49:31 -0800
Subject: [PATCH 0544/1072] [SPARK-24736][K8S] Let spark-submit handle
 dependency resolution.

Before this change, there was some code in the k8s backend to deal
with how to resolve dependencies and make them available to the
Spark application. It turns out that none of that code is necessary,
since spark-submit already handles all that for applications started
in client mode - like the k8s driver that is run inside a Spark-created
pod.

For that reason, specifically for pyspark, there's no need for the
k8s backend to deal with PYTHONPATH; or, in general, to change the URIs
provided by the user at all. spark-submit takes care of that.

For testing, I created a pyspark script that depends on another module
that is shipped with --py-files. Then I used:

- --py-files http://.../dep.py http://.../test.py
- --py-files http://.../dep.zip http://.../test.py
- --py-files local:/.../dep.py local:/.../test.py
- --py-files local:/.../dep.zip local:/.../test.py

Without this change, all of the above commands fail. With the change, the
driver is able to see the dependencies in all the above cases; but executors
don't see the dependencies in the last two. That's a bug in shared Spark code
that deals with local: dependencies in pyspark (SPARK-26934).

I also tested a Scala app using the main jar from an http server.

Closes #23793 from vanzin/SPARK-24736.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala |  6 +-
 .../apache/spark/deploy/k8s/Constants.scala   |  1 -
 .../spark/deploy/k8s/KubernetesConf.scala     | 10 +--
 .../spark/deploy/k8s/KubernetesUtils.scala    | 20 ------
 .../k8s/features/BasicDriverFeatureStep.scala |  9 ---
 .../features/DriverCommandFeatureStep.scala   | 67 +++++--------------
 .../submit/KubernetesClientApplication.scala  | 13 +---
 .../deploy/k8s/KubernetesConfSuite.scala      |  3 +-
 .../spark/deploy/k8s/KubernetesTestConf.scala |  3 +-
 .../BasicDriverFeatureStepSuite.scala         | 23 -------
 .../DriverCommandFeatureStepSuite.scala       | 64 ++++--------------
 .../spark/bindings/python/Dockerfile          |  1 -
 .../src/main/dockerfiles/spark/entrypoint.sh  |  4 --
 13 files changed, 38 insertions(+), 186 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index f4d9fe06639e..2843bd5b33be 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -520,6 +520,7 @@ private[spark] class SparkSubmit extends Logging {
         confKey = PRINCIPAL.key),
       OptionAssigner(args.keytab, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         confKey = KEYTAB.key),
+      OptionAssigner(args.pyFiles, ALL_CLUSTER_MGRS, CLUSTER, confKey = SUBMIT_PYTHON_FILES.key),
 
       // Propagate attributes for dependency resolution at the driver side
       OptionAssigner(args.packages, STANDALONE | MESOS, CLUSTER, confKey = "spark.jars.packages"),
@@ -694,9 +695,6 @@ private[spark] class SparkSubmit extends Logging {
         if (args.isPython) {
           childArgs ++= Array("--primary-py-file", args.primaryResource)
           childArgs ++= Array("--main-class", "org.apache.spark.deploy.PythonRunner")
-          if (args.pyFiles != null) {
-            childArgs ++= Array("--other-py-files", args.pyFiles)
-          }
         } else if (args.isR) {
           childArgs ++= Array("--primary-r-file", args.primaryResource)
           childArgs ++= Array("--main-class", "org.apache.spark.deploy.RRunner")
@@ -744,7 +742,7 @@ private[spark] class SparkSubmit extends Logging {
     // explicitly sets `spark.submit.pyFiles` in his/her default properties file.
     val pyFiles = sparkConf.get(SUBMIT_PYTHON_FILES)
     val resolvedPyFiles = Utils.resolveURIs(pyFiles.mkString(","))
-    val formattedPyFiles = if (!isYarnCluster && !isMesosCluster) {
+    val formattedPyFiles = if (deployMode != CLUSTER) {
       PythonRunner.formatPaths(resolvedPyFiles).mkString(",")
     } else {
       // Ignoring formatting python path in yarn and mesos cluster mode, these two modes
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
index 76041e7de518..a3c74ff7b288 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
@@ -69,7 +69,6 @@ private[spark] object Constants {
   val ENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"
 
   // BINDINGS
-  val ENV_PYSPARK_FILES = "PYSPARK_FILES"
   val ENV_PYSPARK_MAJOR_PYTHON_VERSION = "PYSPARK_MAJOR_PYTHON_VERSION"
 
   // Pod spec templates
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index 6febad981af5..4a63ea9a86ed 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -73,8 +73,7 @@ private[spark] class KubernetesDriverConf(
     val appId: String,
     val mainAppResource: MainAppResource,
     val mainClass: String,
-    val appArgs: Array[String],
-    val pyFiles: Seq[String])
+    val appArgs: Array[String])
   extends KubernetesConf(sparkConf) {
 
   override val resourceNamePrefix: String = {
@@ -175,14 +174,11 @@ private[spark] object KubernetesConf {
       appId: String,
       mainAppResource: MainAppResource,
       mainClass: String,
-      appArgs: Array[String],
-      maybePyFiles: Option[String]): KubernetesDriverConf = {
+      appArgs: Array[String]): KubernetesDriverConf = {
     // Parse executor volumes in order to verify configuration before the driver pod is created.
     KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX)
 
-    val pyFiles = maybePyFiles.map(Utils.stringToSeq).getOrElse(Nil)
-    new KubernetesDriverConf(sparkConf.clone(), appId, mainAppResource, mainClass, appArgs,
-      pyFiles)
+    new KubernetesDriverConf(sparkConf.clone(), appId, mainAppResource, mainClass, appArgs)
   }
 
   def createExecutorConf(
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index 6fafac3ee13c..b3f58b03231f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -66,26 +66,6 @@ private[spark] object KubernetesUtils extends Logging {
     opt2.foreach { _ => require(opt1.isEmpty, errMessage) }
   }
 
-  /**
-   * For the given collection of file URIs, resolves them as follows:
-   * - File URIs with scheme local:// resolve to just the path of the URI.
-   * - Otherwise, the URIs are returned as-is.
-   */
-  def resolveFileUrisAndPath(fileUris: Iterable[String]): Iterable[String] = {
-    fileUris.map { uri =>
-      resolveFileUri(uri)
-    }
-  }
-
-  def resolveFileUri(uri: String): String = {
-    val fileUri = Utils.resolveURI(uri)
-    val fileScheme = Option(fileUri.getScheme).getOrElse("file")
-    fileScheme match {
-      case "local" => fileUri.getPath
-      case _ => uri
-    }
-  }
-
   def loadPodFromTemplate(
       kubernetesClient: KubernetesClient,
       templateFile: File,
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index e664b647bd1e..17c00eb7c3a5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -153,15 +153,6 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> conf.resourceNamePrefix,
       KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true",
       MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString)
-
-    Seq(JARS, FILES).foreach { key =>
-      val value = conf.get(key)
-      val resolved = KubernetesUtils.resolveFileUrisAndPath(value)
-      if (resolved.nonEmpty) {
-        additionalProps.put(key.key, resolved.mkString(","))
-      }
-    }
-
     additionalProps.toMap
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
index bd3f8a1681e5..9c9cd1e1ac1c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
@@ -37,8 +37,8 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
 
   override def configurePod(pod: SparkPod): SparkPod = {
     conf.mainAppResource match {
-      case JavaMainAppResource(_) =>
-        configureForJava(pod)
+      case JavaMainAppResource(res) =>
+        configureForJava(pod, res.getOrElse(SparkLauncher.NO_RESOURCE))
 
       case PythonMainAppResource(res) =>
         configureForPython(pod, res)
@@ -49,45 +49,33 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   }
 
   override def getAdditionalPodSystemProperties(): Map[String, String] = {
-    conf.mainAppResource match {
-      case JavaMainAppResource(res) =>
-        res.map(additionalJavaProperties).getOrElse(Map.empty)
+    val appType = conf.mainAppResource match {
+      case JavaMainAppResource(_) =>
+        APP_RESOURCE_TYPE_JAVA
 
-      case PythonMainAppResource(res) =>
-        additionalPythonProperties(res)
+      case PythonMainAppResource(_) =>
+        APP_RESOURCE_TYPE_PYTHON
 
-      case RMainAppResource(res) =>
-        additionalRProperties(res)
+      case RMainAppResource(_) =>
+        APP_RESOURCE_TYPE_R
     }
+
+    Map(APP_RESOURCE_TYPE.key -> appType)
   }
 
-  private def configureForJava(pod: SparkPod): SparkPod = {
-    // The user application jar is merged into the spark.jars list and managed through that
-    // property, so use a "blank" resource for the Java driver.
-    val driverContainer = baseDriverContainer(pod, SparkLauncher.NO_RESOURCE).build()
+  private def configureForJava(pod: SparkPod, res: String): SparkPod = {
+    val driverContainer = baseDriverContainer(pod, res).build()
     SparkPod(pod.pod, driverContainer)
   }
 
   private def configureForPython(pod: SparkPod, res: String): SparkPod = {
-    val maybePythonFiles = if (conf.pyFiles.nonEmpty) {
-      // Delineation by ":" is to append the PySpark Files to the PYTHONPATH
-      // of the respective PySpark pod
-      val resolved = KubernetesUtils.resolveFileUrisAndPath(conf.pyFiles)
-      Some(new EnvVarBuilder()
-        .withName(ENV_PYSPARK_FILES)
-        .withValue(resolved.mkString(":"))
-        .build())
-    } else {
-      None
-    }
     val pythonEnvs =
       Seq(new EnvVarBuilder()
           .withName(ENV_PYSPARK_MAJOR_PYTHON_VERSION)
           .withValue(conf.get(PYSPARK_MAJOR_PYTHON_VERSION))
-        .build()) ++
-      maybePythonFiles
+        .build())
 
-    val pythonContainer = baseDriverContainer(pod, KubernetesUtils.resolveFileUri(res))
+    val pythonContainer = baseDriverContainer(pod, res)
       .addAllToEnv(pythonEnvs.asJava)
       .build()
 
@@ -95,7 +83,7 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   }
 
   private def configureForR(pod: SparkPod, res: String): SparkPod = {
-    val rContainer = baseDriverContainer(pod, KubernetesUtils.resolveFileUri(res)).build()
+    val rContainer = baseDriverContainer(pod, res).build()
     SparkPod(pod.pod, rContainer)
   }
 
@@ -107,27 +95,4 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
       .addToArgs(resource)
       .addToArgs(conf.appArgs: _*)
   }
-
-  private def additionalJavaProperties(resource: String): Map[String, String] = {
-    resourceType(APP_RESOURCE_TYPE_JAVA) ++ mergeFileList(JARS, Seq(resource))
-  }
-
-  private def additionalPythonProperties(resource: String): Map[String, String] = {
-    resourceType(APP_RESOURCE_TYPE_PYTHON) ++
-      mergeFileList(FILES, Seq(resource) ++ conf.pyFiles)
-  }
-
-  private def additionalRProperties(resource: String): Map[String, String] = {
-    resourceType(APP_RESOURCE_TYPE_R) ++ mergeFileList(FILES, Seq(resource))
-  }
-
-  private def mergeFileList(key: ConfigEntry[Seq[String]], filesToAdd: Seq[String])
-    : Map[String, String] = {
-    val existing = conf.get(key)
-    Map(key.key -> (existing ++ filesToAdd).distinct.mkString(","))
-  }
-
-  private def resourceType(resType: String): Map[String, String] = {
-    Map(APP_RESOURCE_TYPE.key -> resType)
-  }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 3888778bf84c..042012e9c74a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -39,13 +39,11 @@ import org.apache.spark.util.Utils
  * @param mainAppResource the main application resource if any
  * @param mainClass the main class of the application to run
  * @param driverArgs arguments to the driver
- * @param maybePyFiles additional Python files via --py-files
  */
 private[spark] case class ClientArguments(
     mainAppResource: MainAppResource,
     mainClass: String,
-    driverArgs: Array[String],
-    maybePyFiles: Option[String])
+    driverArgs: Array[String])
 
 private[spark] object ClientArguments {
 
@@ -53,7 +51,6 @@ private[spark] object ClientArguments {
     var mainAppResource: MainAppResource = JavaMainAppResource(None)
     var mainClass: Option[String] = None
     val driverArgs = mutable.ArrayBuffer.empty[String]
-    var maybePyFiles : Option[String] = None
 
     args.sliding(2, 2).toList.foreach {
       case Array("--primary-java-resource", primaryJavaResource: String) =>
@@ -62,8 +59,6 @@ private[spark] object ClientArguments {
         mainAppResource = PythonMainAppResource(primaryPythonResource)
       case Array("--primary-r-file", primaryRFile: String) =>
         mainAppResource = RMainAppResource(primaryRFile)
-      case Array("--other-py-files", pyFiles: String) =>
-        maybePyFiles = Some(pyFiles)
       case Array("--main-class", clazz: String) =>
         mainClass = Some(clazz)
       case Array("--arg", arg: String) =>
@@ -78,8 +73,7 @@ private[spark] object ClientArguments {
     ClientArguments(
       mainAppResource,
       mainClass.get,
-      driverArgs.toArray,
-      maybePyFiles)
+      driverArgs.toArray)
   }
 }
 
@@ -214,8 +208,7 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
       kubernetesAppId,
       clientArguments.mainAppResource,
       clientArguments.mainClass,
-      clientArguments.driverArgs,
-      clientArguments.maybePyFiles)
+      clientArguments.driverArgs)
     // The master URL has been checked for validity already in SparkSubmit.
     // We just need to get rid of the "k8s://" prefix here.
     val master = KubernetesUtils.parseMasterUrl(sparkConf.get("spark.master"))
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index f4d40b0b3590..d51b1e661bb1 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -69,8 +69,7 @@ class KubernetesConfSuite extends SparkFunSuite {
       KubernetesTestConf.APP_ID,
       JavaMainAppResource(None),
       KubernetesTestConf.MAIN_CLASS,
-      APP_ARGS,
-      None)
+      APP_ARGS)
     assert(conf.labels === Map(
       SPARK_APP_ID_LABEL -> KubernetesTestConf.APP_ID,
       SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) ++
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
index 1d77a6d18152..ee830a91f327 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
@@ -44,7 +44,6 @@ object KubernetesTestConf {
       mainAppResource: MainAppResource = JavaMainAppResource(None),
       mainClass: String = MAIN_CLASS,
       appArgs: Array[String] = Array.empty,
-      pyFiles: Seq[String] = Nil,
       resourceNamePrefix: Option[String] = None,
       labels: Map[String, String] = Map.empty,
       environment: Map[String, String] = Map.empty,
@@ -64,7 +63,7 @@ object KubernetesTestConf {
     setPrefixedConfigs(conf, KUBERNETES_DRIVER_SECRET_KEY_REF_PREFIX, secretEnvNamesToKeyRefs)
     setVolumeSpecs(conf, KUBERNETES_DRIVER_VOLUMES_PREFIX, volumes)
 
-    new KubernetesDriverConf(conf, appId, mainAppResource, mainClass, appArgs, pyFiles)
+    new KubernetesDriverConf(conf, appId, mainAppResource, mainClass, appArgs)
   }
   // scalastyle:on argcount
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index ccf88cc53f8c..7cfc4d2774cf 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -137,29 +137,6 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     assert(configuredPythonPod.container.getImage === "spark-driver-py:latest")
   }
 
-  test("Additional system properties resolve jars and set cluster-mode confs.") {
-    val allJars = Seq("local:///opt/spark/jar1.jar", "hdfs:///opt/spark/jar2.jar")
-    val allFiles = Seq("https://localhost:9000/file1.txt", "local:///opt/spark/file2.txt")
-    val sparkConf = new SparkConf()
-      .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod")
-      .setJars(allJars)
-      .set(FILES, allFiles)
-      .set(CONTAINER_IMAGE, "spark-driver:latest")
-    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
-
-    val step = new BasicDriverFeatureStep(kubernetesConf)
-    val additionalProperties = step.getAdditionalPodSystemProperties()
-    val expectedSparkConf = Map(
-      KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
-      "spark.app.id" -> KubernetesTestConf.APP_ID,
-      KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> kubernetesConf.resourceNamePrefix,
-      "spark.kubernetes.submitInDriver" -> "true",
-      JARS.key -> "/opt/spark/jar1.jar,hdfs:///opt/spark/jar2.jar",
-      FILES.key -> "https://localhost:9000/file1.txt,/opt/spark/file2.txt",
-      MEMORY_OVERHEAD_FACTOR.key -> MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString)
-    assert(additionalProperties === expectedSparkConf)
-  }
-
   // Memory overhead tests. Tuples are:
   //   test name, main resource, overhead factor, expected factor
   Seq(
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
index f74ac928028c..de80c5614cbf 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
@@ -23,12 +23,13 @@ import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
+import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
 class DriverCommandFeatureStepSuite extends SparkFunSuite {
 
   test("java resource") {
-    val mainResource = "local:///main.jar"
+    val mainResource = "local:/main.jar"
     val spec = applyFeatureStep(
       JavaMainAppResource(Some(mainResource)),
       appArgs = Array("5", "7"))
@@ -36,72 +37,33 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
       "driver",
       "--properties-file", SPARK_CONF_PATH,
       "--class", KubernetesTestConf.MAIN_CLASS,
-      "spark-internal", "5", "7"))
-
-    val jars = Utils.stringToSeq(spec.systemProperties("spark.jars"))
-    assert(jars.toSet === Set(mainResource))
+      mainResource, "5", "7"))
   }
 
-  test("python resource with no extra files") {
-    val mainResource = "local:///main.py"
+  test("python resource") {
+    val mainResource = "local:/main.py"
     val sparkConf = new SparkConf(false)
-      .set(PYSPARK_MAJOR_PYTHON_VERSION, "3")
-
-    val spec = applyFeatureStep(
-      PythonMainAppResource(mainResource),
-      conf = sparkConf)
-    assert(spec.pod.container.getArgs.asScala === List(
-      "driver",
-      "--properties-file", SPARK_CONF_PATH,
-      "--class", KubernetesTestConf.MAIN_CLASS,
-      "/main.py"))
-    val envs = spec.pod.container.getEnv.asScala
-      .map { env => (env.getName, env.getValue) }
-      .toMap
-    assert(envs(ENV_PYSPARK_MAJOR_PYTHON_VERSION) === "3")
-
-    val files = Utils.stringToSeq(spec.systemProperties("spark.files"))
-    assert(files.toSet === Set(mainResource))
-  }
-
-  test("python resource with extra files") {
-    val expectedMainResource = "/main.py"
-    val expectedPySparkFiles = "/example2.py:/example3.py"
-    val filesInConf = Set("local:///example.py")
-
-    val mainResource = s"local://$expectedMainResource"
-    val pyFiles = Seq("local:///example2.py", "local:///example3.py")
-
-    val sparkConf = new SparkConf(false)
-      .set("spark.files", filesInConf.mkString(","))
       .set(PYSPARK_MAJOR_PYTHON_VERSION, "2")
     val spec = applyFeatureStep(
       PythonMainAppResource(mainResource),
       conf = sparkConf,
-      appArgs = Array("5", "7", "9"),
-      pyFiles = pyFiles)
+      appArgs = Array("5", "7", "9"))
 
     assert(spec.pod.container.getArgs.asScala === List(
       "driver",
       "--properties-file", SPARK_CONF_PATH,
       "--class", KubernetesTestConf.MAIN_CLASS,
-      "/main.py", "5", "7", "9"))
+      mainResource, "5", "7", "9"))
 
     val envs = spec.pod.container.getEnv.asScala
       .map { env => (env.getName, env.getValue) }
       .toMap
-    val expected = Map(
-      ENV_PYSPARK_FILES -> expectedPySparkFiles,
-      ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "2")
+    val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "2")
     assert(envs === expected)
-
-    val files = Utils.stringToSeq(spec.systemProperties("spark.files"))
-    assert(files.toSet === pyFiles.toSet ++ filesInConf ++ Set(mainResource))
   }
 
   test("R resource") {
-    val expectedMainResource = "/main.R"
-    val mainResource = s"local://$expectedMainResource"
+    val mainResource = "local:/main.R"
 
     val spec = applyFeatureStep(
       RMainAppResource(mainResource),
@@ -111,19 +73,17 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
       "driver",
       "--properties-file", SPARK_CONF_PATH,
       "--class", KubernetesTestConf.MAIN_CLASS,
-      "/main.R", "5", "7", "9"))
+      mainResource, "5", "7", "9"))
   }
 
   private def applyFeatureStep(
       resource: MainAppResource,
       conf: SparkConf = new SparkConf(false),
-      appArgs: Array[String] = Array(),
-      pyFiles: Seq[String] = Nil): KubernetesDriverSpec = {
+      appArgs: Array[String] = Array()): KubernetesDriverSpec = {
     val kubernetesConf = KubernetesTestConf.createDriverConf(
       sparkConf = conf,
       mainAppResource = resource,
-      appArgs = appArgs,
-      pyFiles = pyFiles)
+      appArgs = appArgs)
     val step = new DriverCommandFeatureStep(kubernetesConf)
     val pod = step.configurePod(SparkPod.initialPod())
     val props = step.getAdditionalPodSystemProperties()
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
index 5044900d1d8a..8237c9223223 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -39,7 +39,6 @@ RUN apk add --no-cache python && \
 
 COPY python/pyspark ${SPARK_HOME}/python/pyspark
 COPY python/lib ${SPARK_HOME}/python/lib
-ENV PYTHONPATH ${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-*.zip
 
 WORKDIR /opt/spark/work-dir
 ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index 613febca7e6d..2097fb8865de 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -44,10 +44,6 @@ if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
   SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
 fi
 
-if [ -n "$PYSPARK_FILES" ]; then
-    PYTHONPATH="$PYTHONPATH:$PYSPARK_FILES"
-fi
-
 if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then
     pyv="$(python -V 2>&1)"
     export PYTHON_VERSION="${pyv:7}"

From c4bbfd177b4e7cb46f47b39df9fd71d2d9a12c6d Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Wed, 27 Feb 2019 09:52:43 -0800
Subject: [PATCH 0545/1072] [SPARK-24063][SS] Add maximum epoch queue threshold
 for ContinuousExecution

## What changes were proposed in this pull request?

Continuous processing is waiting on epochs which are not yet complete (for example one partition is not making progress) and stores pending items in queues. These queues are unbounded and can consume up all the memory easily. In this PR I've added `spark.sql.streaming.continuous.epochBacklogQueueSize` configuration possibility to make them bounded. If the related threshold reached then the query will stop with `IllegalStateException`.

## How was this patch tested?

Existing + additional unit tests.

Closes #23156 from gaborgsomogyi/SPARK-24063.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 10 +++
 .../continuous/ContinuousExecution.scala      | 38 +++++++++
 .../continuous/EpochCoordinator.scala         | 20 +++++
 .../continuous/ContinuousSuite.scala          | 31 +++++++
 .../continuous/EpochCoordinatorSuite.scala    | 81 ++++++++++++++++++-
 5 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2cf471e47d74..4d31bcd2d194 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1436,6 +1436,13 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE =
+    buildConf("spark.sql.streaming.continuous.epochBacklogQueueSize")
+      .doc("The max number of entries to be stored in queue to wait for late epochs. " +
+        "If this parameter is exceeded by the size of the queue, stream will stop with an error.")
+      .intConf
+      .createWithDefault(10000)
+
   val CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE =
     buildConf("spark.sql.streaming.continuous.executorQueueSize")
     .internal()
@@ -2066,6 +2073,9 @@ class SQLConf extends Serializable with Logging {
 
   def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION)
 
+  def continuousStreamingEpochBacklogQueueSize: Int =
+    getConf(CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE)
+
   def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE)
 
   def continuousStreamingExecutorPollIntervalMs: Long =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 20101c7fda32..3f88bcf48b86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming.continuous
 
 import java.util.UUID
 import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicReference
 import java.util.function.UnaryOperator
 
 import scala.collection.JavaConverters._
@@ -58,6 +59,9 @@ class ContinuousExecution(
   // For use only in test harnesses.
   private[sql] var currentEpochCoordinatorId: String = _
 
+  // Throwable that caused the execution to fail
+  private val failure: AtomicReference[Throwable] = new AtomicReference[Throwable](null)
+
   override val logicalPlan: LogicalPlan = {
     val v2ToRelationMap = MutableMap[StreamingRelationV2, StreamingDataSourceV2Relation]()
     var nextSourceId = 0
@@ -261,6 +265,11 @@ class ContinuousExecution(
           lastExecution.toRdd
         }
       }
+
+      val f = failure.get()
+      if (f != null) {
+        throw f
+      }
     } catch {
       case t: Throwable if StreamExecution.isInterruptionException(t, sparkSession.sparkContext) &&
           state.get() == RECONFIGURING =>
@@ -373,6 +382,35 @@ class ContinuousExecution(
     }
   }
 
+  /**
+   * Stores error and stops the query execution thread to terminate the query in new thread.
+   */
+  def stopInNewThread(error: Throwable): Unit = {
+    if (failure.compareAndSet(null, error)) {
+      logError(s"Query $prettyIdString received exception $error")
+      stopInNewThread()
+    }
+  }
+
+  /**
+   * Stops the query execution thread to terminate the query in new thread.
+   */
+  private def stopInNewThread(): Unit = {
+    new Thread("stop-continuous-execution") {
+      setDaemon(true)
+
+      override def run(): Unit = {
+        try {
+          ContinuousExecution.this.stop()
+        } catch {
+          case e: Throwable =>
+            logError(e.getMessage, e)
+            throw e
+        }
+      }
+    }.start()
+  }
+
   /**
    * Stops the query execution thread to terminate the query.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
index a99842220424..decf524f7167 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
@@ -123,6 +123,9 @@ private[continuous] class EpochCoordinator(
     override val rpcEnv: RpcEnv)
   extends ThreadSafeRpcEndpoint with Logging {
 
+  private val epochBacklogQueueSize =
+    session.sqlContext.conf.continuousStreamingEpochBacklogQueueSize
+
   private var queryWritesStopped: Boolean = false
 
   private var numReaderPartitions: Int = _
@@ -212,6 +215,7 @@ private[continuous] class EpochCoordinator(
       if (!partitionCommits.isDefinedAt((epoch, partitionId))) {
         partitionCommits.put((epoch, partitionId), message)
         resolveCommitsAtEpoch(epoch)
+        checkProcessingQueueBoundaries()
       }
 
     case ReportPartitionOffset(partitionId, epoch, offset) =>
@@ -223,6 +227,22 @@ private[continuous] class EpochCoordinator(
         query.addOffset(epoch, stream, thisEpochOffsets.toSeq)
         resolveCommitsAtEpoch(epoch)
       }
+      checkProcessingQueueBoundaries()
+  }
+
+  private def checkProcessingQueueBoundaries() = {
+    if (partitionOffsets.size > epochBacklogQueueSize) {
+      query.stopInNewThread(new IllegalStateException("Size of the partition offset queue has " +
+        "exceeded its maximum"))
+    }
+    if (partitionCommits.size > epochBacklogQueueSize) {
+      query.stopInNewThread(new IllegalStateException("Size of the partition commit queue has " +
+        "exceeded its maximum"))
+    }
+    if (epochsWaitingToBeCommitted.size > epochBacklogQueueSize) {
+      query.stopInNewThread(new IllegalStateException("Size of the epoch queue has " +
+        "exceeded its maximum"))
+    }
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index 344a8aa55f0c..d2e489a7d4ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf.CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE
 import org.apache.spark.sql.streaming.{StreamTest, Trigger}
 import org.apache.spark.sql.test.TestSparkSession
 
@@ -343,3 +344,33 @@ class ContinuousMetaSuite extends ContinuousSuiteBase {
     }
   }
 }
+
+class ContinuousEpochBacklogSuite extends ContinuousSuiteBase {
+  import testImplicits._
+
+  override protected def createSparkSession = new TestSparkSession(
+    new SparkContext(
+      "local[1]",
+      "continuous-stream-test-sql-context",
+      sparkConf.set("spark.sql.testkey", "true")))
+
+  // This test forces the backlog to overflow by not standing up enough executors for the query
+  // to make progress.
+  test("epoch backlog overflow") {
+    withSQLConf((CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE.key, "10")) {
+      val df = spark.readStream
+        .format("rate")
+        .option("numPartitions", "2")
+        .option("rowsPerSecond", "500")
+        .load()
+        .select('value)
+
+      testStream(df, useV2Sink = true)(
+        StartStream(Trigger.Continuous(1)),
+        ExpectFailure[IllegalStateException] { e =>
+          e.getMessage.contains("queue has exceeded its maximum")
+        }
+      )
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
index f74285f4b0fb..e3498db4194e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.streaming.continuous
 
+import org.mockito.{ArgumentCaptor, InOrder}
 import org.mockito.ArgumentMatchers.{any, eq => eqTo}
-import org.mockito.InOrder
-import org.mockito.Mockito.{inOrder, never, verify}
+import org.mockito.Mockito._
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.mockito.MockitoSugar
 
@@ -27,6 +27,7 @@ import org.apache.spark._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.LocalSparkSession
 import org.apache.spark.sql.execution.streaming.continuous._
+import org.apache.spark.sql.internal.SQLConf.CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
@@ -43,6 +44,7 @@ class EpochCoordinatorSuite
   private var writeSupport: StreamingWrite = _
   private var query: ContinuousExecution = _
   private var orderVerifier: InOrder = _
+  private val epochBacklogQueueSize = 10
 
   override def beforeEach(): Unit = {
     val stream = mock[ContinuousStream]
@@ -50,7 +52,11 @@ class EpochCoordinatorSuite
     query = mock[ContinuousExecution]
     orderVerifier = inOrder(writeSupport, query)
 
-    spark = new TestSparkSession()
+    spark = new TestSparkSession(
+    new SparkContext(
+      "local[2]", "test-sql-context",
+      new SparkConf().set("spark.sql.testkey", "true")
+        .set(CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE, epochBacklogQueueSize)))
 
     epochCoordinator
       = EpochCoordinatorRef.create(writeSupport, stream, query, "test", 1, spark, SparkEnv.get)
@@ -186,6 +192,66 @@ class EpochCoordinatorSuite
     verifyCommitsInOrderOf(List(1, 2, 3, 4, 5))
   }
 
+  test("several epochs, max epoch backlog reached by partitionOffsets") {
+    setWriterPartitions(1)
+    setReaderPartitions(1)
+
+    reportPartitionOffset(0, 1)
+    // Commit messages not arriving
+    for (i <- 2 to epochBacklogQueueSize + 1) {
+      reportPartitionOffset(0, i)
+    }
+
+    makeSynchronousCall()
+
+    for (i <- 1 to epochBacklogQueueSize + 1) {
+      verifyNoCommitFor(i)
+    }
+    verifyStoppedWithException("Size of the partition offset queue has exceeded its maximum")
+  }
+
+  test("several epochs, max epoch backlog reached by partitionCommits") {
+    setWriterPartitions(1)
+    setReaderPartitions(1)
+
+    commitPartitionEpoch(0, 1)
+    // Offset messages not arriving
+    for (i <- 2 to epochBacklogQueueSize + 1) {
+      commitPartitionEpoch(0, i)
+    }
+
+    makeSynchronousCall()
+
+    for (i <- 1 to epochBacklogQueueSize + 1) {
+      verifyNoCommitFor(i)
+    }
+    verifyStoppedWithException("Size of the partition commit queue has exceeded its maximum")
+  }
+
+  test("several epochs, max epoch backlog reached by epochsWaitingToBeCommitted") {
+    setWriterPartitions(2)
+    setReaderPartitions(2)
+
+    commitPartitionEpoch(0, 1)
+    reportPartitionOffset(0, 1)
+
+    // For partition 2 epoch 1 messages never arriving
+    // +2 because the first epoch not yet arrived
+    for (i <- 2 to epochBacklogQueueSize + 2) {
+      commitPartitionEpoch(0, i)
+      reportPartitionOffset(0, i)
+      commitPartitionEpoch(1, i)
+      reportPartitionOffset(1, i)
+    }
+
+    makeSynchronousCall()
+
+    for (i <- 1 to epochBacklogQueueSize + 2) {
+      verifyNoCommitFor(i)
+    }
+    verifyStoppedWithException("Size of the epoch queue has exceeded its maximum")
+  }
+
   private def setWriterPartitions(numPartitions: Int): Unit = {
     epochCoordinator.askSync[Unit](SetWriterPartitions(numPartitions))
   }
@@ -221,4 +287,13 @@ class EpochCoordinatorSuite
   private def verifyCommitsInOrderOf(epochs: Seq[Long]): Unit = {
     epochs.foreach(verifyCommit)
   }
+
+  private def verifyStoppedWithException(msg: String): Unit = {
+    val exceptionCaptor = ArgumentCaptor.forClass(classOf[Throwable]);
+    verify(query, atLeastOnce()).stopInNewThread(exceptionCaptor.capture())
+
+    import scala.collection.JavaConverters._
+    val throwable = exceptionCaptor.getAllValues.asScala.find(_.getMessage === msg)
+    assert(throwable != null, "Stream stopped with an exception but expected message is missing")
+  }
 }

From 76e0b6bafb66a5cd02edcf924a90fc389fddea8e Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Wed, 27 Feb 2019 10:07:02 -0800
Subject: [PATCH 0546/1072] [SPARK-27002][SS] Get kafka delegation tokens right
 before consumer/producer created

## What changes were proposed in this pull request?

Spark not always picking up the latest Kafka delegation tokens even if a new one properly obtained.
In the PR I'm setting delegation tokens right before `KafkaConsumer` and `KafkaProducer` creation to be on the safe side.

## How was this patch tested?

Long running Kafka to Kafka tests on 4 node cluster with randomly thrown artificial exceptions.

Test scenario:
* 4 node cluster
* Yarn
* Kafka broker version 2.1.0
* security.protocol = SASL_SSL
* sasl.mechanism = SCRAM-SHA-512

Kafka broker settings:
* delegation.token.expiry.time.ms=600000 (10 min)
* delegation.token.max.lifetime.ms=1200000 (20 min)
* delegation.token.expiry.check.interval.ms=300000 (5 min)

After each 7.5 minutes new delegation token obtained from Kafka broker (10 min * 0.75).
But when token expired after 10 minutes (Spark obtains new one and doesn't renew the old), the brokers expiring thread comes after each 5 minutes (invalidates expired tokens) and artificial exception has been thrown inside the Spark application (such case Spark closes connection), then the latest delegation token not always picked up.

Closes #23906 from gaborgsomogyi/SPARK-27002.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../sql/kafka010/CachedKafkaProducer.scala     |  8 ++++++--
 .../spark/sql/kafka010/ConsumerStrategy.scala  | 18 +++++++++++++++---
 .../sql/kafka010/KafkaConfigUpdater.scala      |  2 +-
 .../spark/sql/kafka010/KafkaDataConsumer.scala |  5 ++++-
 .../sql/kafka010/KafkaSourceProvider.scala     |  3 ---
 5 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
index cd680adf4436..f24001f4ae3a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
@@ -64,8 +64,12 @@ private[kafka010] object CachedKafkaProducer extends Logging {
       .build[Seq[(String, Object)], Producer](cacheLoader)
 
   private def createKafkaProducer(producerConfiguration: ju.Map[String, Object]): Producer = {
-    val kafkaProducer: Producer = new Producer(producerConfiguration)
-    logDebug(s"Created a new instance of KafkaProducer for $producerConfiguration.")
+    val updatedKafkaProducerConfiguration =
+      KafkaConfigUpdater("executor", producerConfiguration.asScala.toMap)
+        .setAuthenticationConfigIfNeeded()
+        .build()
+    val kafkaProducer: Producer = new Producer(updatedKafkaProducerConfiguration)
+    logDebug(s"Created a new instance of KafkaProducer for $updatedKafkaProducerConfiguration.")
     kafkaProducer
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
index 66511b306541..dfdafce3c053 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
@@ -37,6 +37,15 @@ import org.apache.kafka.common.TopicPartition
 sealed trait ConsumerStrategy {
   /** Create a [[KafkaConsumer]] and subscribe to topics according to a desired strategy */
   def createConsumer(kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]]
+
+  /**
+   * Updates the parameters with security if needed.
+   * Added a function to hide internals and reduce code duplications because all strategy uses it.
+   */
+  protected def setAuthenticationConfigIfNeeded(kafkaParams: ju.Map[String, Object]) =
+    KafkaConfigUpdater("source", kafkaParams.asScala.toMap)
+      .setAuthenticationConfigIfNeeded()
+      .build()
 }
 
 /**
@@ -45,7 +54,8 @@ sealed trait ConsumerStrategy {
 case class AssignStrategy(partitions: Array[TopicPartition]) extends ConsumerStrategy {
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
-    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
+    val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
+    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](updatedKafkaParams)
     consumer.assign(ju.Arrays.asList(partitions: _*))
     consumer
   }
@@ -59,7 +69,8 @@ case class AssignStrategy(partitions: Array[TopicPartition]) extends ConsumerStr
 case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
-    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
+    val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
+    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](updatedKafkaParams)
     consumer.subscribe(topics.asJava)
     consumer
   }
@@ -73,7 +84,8 @@ case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
 case class SubscribePatternStrategy(topicPattern: String) extends ConsumerStrategy {
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
-    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
+    val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
+    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](updatedKafkaParams)
     consumer.subscribe(
       ju.regex.Pattern.compile(topicPattern),
       new NoOpConsumerRebalanceListener())
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
index 38bf5d7dc323..978dfe6cfbd6 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
@@ -31,7 +31,7 @@ import org.apache.spark.kafka010.KafkaTokenUtil
 /**
  * Class to conveniently update Kafka config params, while logging the changes
  */
-private[kafka010] case class KafkaConfigUpdater(module: String, kafkaParams: Map[String, String])
+private[kafka010] case class KafkaConfigUpdater(module: String, kafkaParams: Map[String, Object])
     extends Logging {
   private val map = new ju.HashMap[String, Object](kafkaParams.asJava)
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
index 7b1314bc8c3c..a0255a1ad219 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
@@ -197,7 +197,10 @@ private[kafka010] case class InternalKafkaConsumer(
 
   /** Create a KafkaConsumer to fetch records for `topicPartition` */
   private def createConsumer: KafkaConsumer[Array[Byte], Array[Byte]] = {
-    val c = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
+    val updatedKafkaParams = KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
+      .setAuthenticationConfigIfNeeded()
+      .build()
+    val c = new KafkaConsumer[Array[Byte], Array[Byte]](updatedKafkaParams)
     val tps = new ju.ArrayList[TopicPartition]()
     tps.add(topicPartition)
     c.assign(tps)
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 6994517b27d6..a1395731e5cf 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -525,7 +525,6 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       // If buffer config is not set, set it to reasonable value to work around
       // buffer issues (see KAFKA-3135)
       .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
-      .setAuthenticationConfigIfNeeded()
       .build()
 
   def kafkaParamsForExecutors(
@@ -547,7 +546,6 @@ private[kafka010] object KafkaSourceProvider extends Logging {
       // If buffer config is not set, set it to reasonable value to work around
       // buffer issues (see KAFKA-3135)
       .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
-      .setAuthenticationConfigIfNeeded()
       .build()
 
   /**
@@ -582,7 +580,6 @@ private[kafka010] object KafkaSourceProvider extends Logging {
     KafkaConfigUpdater("executor", specifiedKafkaParams)
       .set(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, serClassName)
       .set(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, serClassName)
-      .setAuthenticationConfigIfNeeded()
       .build()
   }
 

From 6e31ccf2a196881c7b4ffbe0afba7d93a7f2875c Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 27 Feb 2019 17:01:30 -0800
Subject: [PATCH 0547/1072] [SPARK-26895][CORE][FOLLOW-UP] Uninitializing log
 after `prepareSubmitEnvironment` in SparkSubmit

## What changes were proposed in this pull request?

Currently, if I run `spark-shell` in my local, it started to show the logs as below:

```
$ ./bin/spark-shell
...
19/02/28 04:42:43 INFO SecurityManager: Changing view acls to: hkwon
19/02/28 04:42:43 INFO SecurityManager: Changing modify acls to: hkwon
19/02/28 04:42:43 INFO SecurityManager: Changing view acls groups to:
19/02/28 04:42:43 INFO SecurityManager: Changing modify acls groups to:
19/02/28 04:42:43 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(hkwon); groups with view permissions: Set(); users  with modify permissions: Set(hkwon); groups with modify permissions: Set()
19/02/28 04:42:43 INFO SignalUtils: Registered signal handler for INT
19/02/28 04:42:48 INFO SparkContext: Running Spark version 3.0.0-SNAPSHOT
19/02/28 04:42:48 INFO SparkContext: Submitted application: Spark shell
19/02/28 04:42:48 INFO SecurityManager: Changing view acls to: hkwon
```

Seems to be the cause is https://github.com/apache/spark/pull/23806 and `prepareSubmitEnvironment` looks actually reinitializing the logging again.

This PR proposes to uninitializing log later after `prepareSubmitEnvironment`.

## How was this patch tested?

Manually tested.

Closes #23911 from HyukjinKwon/SPARK-26895.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 2843bd5b33be..45ad7b391b33 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -144,7 +144,7 @@ private[spark] class SparkSubmit extends Logging {
         try {
           proxyUser.doAs(new PrivilegedExceptionAction[Unit]() {
             override def run(): Unit = {
-              runMain(args)
+              runMain(args, uninitLog)
             }
           })
         } catch {
@@ -159,15 +159,10 @@ private[spark] class SparkSubmit extends Logging {
             }
         }
       } else {
-        runMain(args)
+        runMain(args, uninitLog)
       }
     }
 
-    // Let the main class re-initialize the logging system once it starts.
-    if (uninitLog) {
-      Logging.uninitialize()
-    }
-
     // In standalone cluster mode, there are two submission gateways:
     //   (1) The traditional RPC gateway using o.a.s.deploy.Client as a wrapper
     //   (2) The new REST-based gateway introduced in Spark 1.3
@@ -777,8 +772,13 @@ private[spark] class SparkSubmit extends Logging {
    * Note that this main class will not be the one provided by the user if we're
    * running cluster deploy mode or python applications.
    */
-  private def runMain(args: SparkSubmitArguments): Unit = {
+  private def runMain(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
     val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args)
+    // Let the main class re-initialize the logging system once it starts.
+    if (uninitLog) {
+      Logging.uninitialize()
+    }
+
     if (args.verbose) {
       logInfo(s"Main class:\n$childMainClass")
       logInfo(s"Arguments:\n${childArgs.mkString("\n")}")

From acd086f207bbd3e6d3654eb8b06900793a781f27 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Wed, 27 Feb 2019 21:11:30 -0600
Subject: [PATCH 0548/1072] [SPARK-19591][ML][PYSPARK][FOLLOWUP] Add sample
 weights to decision trees

## What changes were proposed in this pull request?
Add sample weights to decision trees

## How was this patch tested?
updated testsuites

Closes #23818 from zhengruifeng/py_tree_support_sample_weight.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../DecisionTreeClassifier.scala              |  1 -
 .../ml/regression/DecisionTreeRegressor.scala |  1 -
 python/pyspark/ml/classification.py           | 28 +++++++++++++------
 python/pyspark/ml/regression.py               | 24 +++++++++++-----
 4 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 200ac0032e51..cbb7e4f5f409 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -25,7 +25,6 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.{DecisionTreeModel, Node, TreeClassifierParams}
 import org.apache.spark.ml.tree.DecisionTreeModelReadWrite._
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 525479151aa0..f4f4e56a3578 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -26,7 +26,6 @@ import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.DecisionTreeModelReadWrite._
 import org.apache.spark.ml.tree.impl.RandomForest
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 134b9e00557a..131756bd8ee0 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -889,10 +889,10 @@ def getImpurity(self):
 
 
 @inherit_doc
-class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                             HasProbabilityCol, HasRawPredictionCol, DecisionTreeParams,
-                             TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
-                             JavaMLReadable):
+class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeightCol,
+                             HasPredictionCol, HasProbabilityCol, HasRawPredictionCol,
+                             DecisionTreeParams, TreeClassifierParams, HasCheckpointInterval,
+                             HasSeed, JavaMLWritable, JavaMLReadable):
     """
     `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
     learning algorithm for classification.
@@ -944,6 +944,18 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     >>> model.featureImportances == model2.featureImportances
     True
 
+    >>> df3 = spark.createDataFrame([
+    ...     (1.0, 0.2, Vectors.dense(1.0)),
+    ...     (1.0, 0.8, Vectors.dense(1.0)),
+    ...     (0.0, 1.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])
+    >>> si3 = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> si_model3 = si3.fit(df3)
+    >>> td3 = si_model3.transform(df3)
+    >>> dt3 = DecisionTreeClassifier(maxDepth=2, weightCol="weight", labelCol="indexed")
+    >>> model3 = dt3.fit(td3)
+    >>> print(model3.toDebugString)
+    DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes...
+
     .. versionadded:: 1.4.0
     """
 
@@ -952,13 +964,13 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  probabilityCol="probability", rawPredictionCol="rawPrediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
-                 seed=None):
+                 seed=None, weightCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
-                 seed=None)
+                 seed=None, weightCol=None)
         """
         super(DecisionTreeClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -975,13 +987,13 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   probabilityCol="probability", rawPredictionCol="rawPrediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                  impurity="gini", seed=None):
+                  impurity="gini", seed=None, weightCol=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   probabilityCol="probability", rawPredictionCol="rawPrediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
-                  seed=None)
+                  seed=None, weightCol=None)
         Sets params for the DecisionTreeClassifier.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 7841de9c3d45..927cc77e201a 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -748,9 +748,10 @@ def getLossType(self):
 
 
 @inherit_doc
-class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                            DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval,
-                            HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol):
+class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeightCol,
+                            HasPredictionCol, DecisionTreeParams, TreeRegressorParams,
+                            HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable,
+                            HasVarianceCol):
     """
     `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
     learning algorithm for regression.
@@ -791,6 +792,15 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     >>> model.transform(test1).head().variance
     0.0
 
+    >>> df3 = spark.createDataFrame([
+    ...     (1.0, 0.2, Vectors.dense(1.0)),
+    ...     (1.0, 0.8, Vectors.dense(1.0)),
+    ...     (0.0, 1.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])
+    >>> dt3 = DecisionTreeRegressor(maxDepth=2, weightCol="weight", varianceCol="variance")
+    >>> model3 = dt3.fit(df3)
+    >>> print(model3.toDebugString)
+    DecisionTreeRegressionModel (uid=...) of depth 1 with 3 nodes...
+
     .. versionadded:: 1.4.0
     """
 
@@ -798,12 +808,12 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
-                 seed=None, varianceCol=None):
+                 seed=None, varianceCol=None, weightCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
-                 impurity="variance", seed=None, varianceCol=None)
+                 impurity="variance", seed=None, varianceCol=None, weightCol=None)
         """
         super(DecisionTreeRegressor, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -819,12 +829,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                  impurity="variance", seed=None, varianceCol=None):
+                  impurity="variance", seed=None, varianceCol=None, weightCol=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
-                  impurity="variance", seed=None, varianceCol=None)
+                  impurity="variance", seed=None, varianceCol=None, weightCol=None)
         Sets params for the DecisionTreeRegressor.
         """
         kwargs = self._input_kwargs

From c8e7eb1fa7b504ececfb36aa48860762dc747351 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 28 Feb 2019 11:30:20 -0800
Subject: [PATCH 0549/1072] [SPARK-26774][CORE] Update some docs on
 TaskSchedulerImpl.

A couple of places in TaskSchedulerImpl could use a minor doc update on
threading concerns.  There is one bug fix here, but only in
sc.killTaskAttempt() which is probably not used much.

Closes #23874 from squito/SPARK-26774.

Authored-by: Imran Rashid <irashid@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/scheduler/TaskSchedulerImpl.scala    | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index d551fb71a103..3f23bfe59523 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -51,7 +51,12 @@ import org.apache.spark.util.{AccumulatorV2, SystemClock, ThreadUtils, Utils}
  * threads, so it needs locks in public API methods to maintain its state. In addition, some
  * [[SchedulerBackend]]s synchronize on themselves when they want to send events here, and then
  * acquire a lock on us, so we need to make sure that we don't try to lock the backend while
- * we are holding a lock on ourselves.
+ * we are holding a lock on ourselves.  This class is called from many threads, notably:
+ *   * The DAGScheduler Event Loop
+ *   * The RPCHandler threads, responding to status updates from Executors
+ *   * Periodic revival of all offers from the CoarseGrainedSchedulerBackend, to accomodate delay
+ *      scheduling
+ *   * task-result-getter threads
  */
 private[spark] class TaskSchedulerImpl(
     val sc: SparkContext,
@@ -89,11 +94,12 @@ private[spark] class TaskSchedulerImpl(
   val CPUS_PER_TASK = conf.get(config.CPUS_PER_TASK)
 
   // TaskSetManagers are not thread safe, so any access to one should be synchronized
-  // on this class.
+  // on this class.  Protected by `this`
   private val taskSetsByStageIdAndAttempt = new HashMap[Int, HashMap[Int, TaskSetManager]]
 
   // Protected by `this`
   private[scheduler] val taskIdToTaskSetManager = new ConcurrentHashMap[Long, TaskSetManager]
+  // Protected by `this`
   val taskIdToExecutorId = new HashMap[Long, String]
 
   @volatile private var hasReceivedTask = false
@@ -254,7 +260,10 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def killTaskAttempt(taskId: Long, interruptThread: Boolean, reason: String): Boolean = {
+  override def killTaskAttempt(
+      taskId: Long,
+      interruptThread: Boolean,
+      reason: String): Boolean = synchronized {
     logInfo(s"Killing task $taskId: $reason")
     val execId = taskIdToExecutorId.get(taskId)
     if (execId.isDefined) {
@@ -825,9 +834,10 @@ private[spark] class TaskSchedulerImpl(
 
   override def applicationAttemptId(): Option[String] = backend.applicationAttemptId()
 
+  // exposed for testing
   private[scheduler] def taskSetManagerForAttempt(
       stageId: Int,
-      stageAttemptId: Int): Option[TaskSetManager] = {
+      stageAttemptId: Int): Option[TaskSetManager] = synchronized {
     for {
       attempts <- taskSetsByStageIdAndAttempt.get(stageId)
       manager <- attempts.get(stageAttemptId)

From 8e5f9995cad409799f3646b3d03761a771ea1664 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Fri, 1 Mar 2019 11:04:28 +0800
Subject: [PATCH 0550/1072] [SPARK-27008][SQL] Support java.time.LocalDate as
 an external type of DateType

## What changes were proposed in this pull request?

In the PR, I propose to add new Catalyst type converter for `DateType`. It should be able to convert `java.time.LocalDate` to/from `DateType`.

Main motivations for the changes:
- Smoothly support Java 8 time API
- Avoid inconsistency of calendars used inside of Spark 3.0 (Proleptic Gregorian calendar) and `java.sql.Date` (hybrid calendar - Julian + Gregorian).
- Make conversion independent from current system timezone.

By default, Spark converts values of `DateType` to `java.sql.Date` instances but the SQL config `spark.sql.datetime.java8API.enabled` can change the behavior. If it is set to `true`, Spark uses `java.time.LocalDate` as external type for `DateType`.

## How was this patch tested?

Added new testes to `CatalystTypeConvertersSuite` to check conversion of `DateType` to/from `java.time.LocalDate`, `JavaUDFSuite`/ `UDFSuite` to test usage of `LocalDate` type in Scala/Java UDFs.

Closes #23913 from MaxGekk/date-localdate.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/CatalystTypeConverters.scala | 18 ++++++++-
 .../catalyst/DeserializerBuildHelper.scala    |  9 +++++
 .../sql/catalyst/JavaTypeInference.scala      | 11 ++++++
 .../spark/sql/catalyst/ScalaReflection.scala  | 12 ++++++
 .../sql/catalyst/encoders/RowEncoder.scala    | 32 ++++++++++++---
 .../sql/catalyst/util/DateTimeUtils.scala     |  4 ++
 .../apache/spark/sql/internal/SQLConf.scala   | 14 ++++---
 .../CatalystTypeConvertersSuite.scala         | 39 ++++++++++++++++++-
 .../catalyst/encoders/RowEncoderSuite.scala   | 14 ++++++-
 .../org/apache/spark/sql/JavaUDFSuite.java    | 18 +++++++++
 .../scala/org/apache/spark/sql/UDFSuite.scala | 12 +++++-
 11 files changed, 165 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index a20625b5d5f5..925d12c16bae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -21,7 +21,7 @@ import java.lang.{Iterable => JavaIterable}
 import java.math.{BigDecimal => JavaBigDecimal}
 import java.math.{BigInteger => JavaBigInteger}
 import java.sql.{Date, Timestamp}
-import java.time.Instant
+import java.time.{Instant, LocalDate}
 import java.util.{Map => JavaMap}
 import javax.annotation.Nullable
 
@@ -62,8 +62,9 @@ object CatalystTypeConverters {
       case mapType: MapType => MapConverter(mapType.keyType, mapType.valueType)
       case structType: StructType => StructConverter(structType)
       case StringType => StringConverter
+      case DateType if SQLConf.get.datetimeJava8ApiEnabled => LocalDateConverter
       case DateType => DateConverter
-      case TimestampType if SQLConf.get.timestampExternalType == "Instant" => InstantConverter
+      case TimestampType if SQLConf.get.datetimeJava8ApiEnabled => InstantConverter
       case TimestampType => TimestampConverter
       case dt: DecimalType => new DecimalConverter(dt)
       case BooleanType => BooleanConverter
@@ -308,6 +309,18 @@ object CatalystTypeConverters {
       DateTimeUtils.toJavaDate(row.getInt(column))
   }
 
+  private object LocalDateConverter extends CatalystTypeConverter[LocalDate, LocalDate, Any] {
+    override def toCatalystImpl(scalaValue: LocalDate): Int = {
+      DateTimeUtils.localDateToDays(scalaValue)
+    }
+    override def toScala(catalystValue: Any): LocalDate = {
+      if (catalystValue == null) null
+      else DateTimeUtils.daysToLocalDate(catalystValue.asInstanceOf[Int])
+    }
+    override def toScalaImpl(row: InternalRow, column: Int): LocalDate =
+      DateTimeUtils.daysToLocalDate(row.getInt(column))
+  }
+
   private object TimestampConverter extends CatalystTypeConverter[Timestamp, Timestamp, Any] {
     override def toCatalystImpl(scalaValue: Timestamp): Long =
       DateTimeUtils.fromJavaTimestamp(scalaValue)
@@ -433,6 +446,7 @@ object CatalystTypeConverters {
   def convertToCatalyst(a: Any): Any = a match {
     case s: String => StringConverter.toCatalyst(s)
     case d: Date => DateConverter.toCatalyst(d)
+    case ld: LocalDate => LocalDateConverter.toCatalyst(ld)
     case t: Timestamp => TimestampConverter.toCatalyst(t)
     case i: Instant => InstantConverter.toCatalyst(i)
     case d: BigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
index 3a2f38622d00..d75d3ca918c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
@@ -101,6 +101,15 @@ object DeserializerBuildHelper {
       returnNullable = false)
   }
 
+  def createDeserializerForLocalDate(path: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      ObjectType(classOf[java.time.LocalDate]),
+      "daysToLocalDate",
+      path :: Nil,
+      returnNullable = false)
+  }
+
   def createDeserializerForInstant(path: Expression): Expression = {
     StaticInvoke(
       DateTimeUtils.getClass,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 1822f9b036f7..87b2ae8cdf7e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -221,6 +221,9 @@ object JavaTypeInference {
                 c == classOf[java.lang.Boolean] =>
         createDeserializerForTypesSupportValueOf(path, c)
 
+      case c if c == classOf[java.time.LocalDate] =>
+        createDeserializerForLocalDate(path)
+
       case c if c == classOf[java.sql.Date] =>
         createDeserializerForSqlDate(path)
 
@@ -393,6 +396,14 @@ object JavaTypeInference {
             inputObject :: Nil,
             returnNullable = false)
 
+        case c if c == classOf[java.time.LocalDate] =>
+          StaticInvoke(
+            DateTimeUtils.getClass,
+            DateType,
+            "localDateToDays",
+            inputObject :: Nil,
+            returnNullable = false)
+
         case c if c == classOf[java.sql.Date] =>
           StaticInvoke(
             DateTimeUtils.getClass,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 26cc7b4d7ad8..bbddd3312a58 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -194,6 +194,9 @@ object ScalaReflection extends ScalaReflection {
         createDeserializerForTypesSupportValueOf(path,
           classOf[java.lang.Boolean])
 
+      case t if t <:< localTypeOf[java.time.LocalDate] =>
+        createDeserializerForLocalDate(path)
+
       case t if t <:< localTypeOf[java.sql.Date] =>
         createDeserializerForSqlDate(path)
 
@@ -493,6 +496,14 @@ object ScalaReflection extends ScalaReflection {
           inputObject :: Nil,
           returnNullable = false)
 
+      case t if t <:< localTypeOf[java.time.LocalDate] =>
+        StaticInvoke(
+          DateTimeUtils.getClass,
+          DateType,
+          "localDateToDays",
+          inputObject :: Nil,
+          returnNullable = false)
+
       case t if t <:< localTypeOf[java.sql.Date] =>
         StaticInvoke(
           DateTimeUtils.getClass,
@@ -704,6 +715,7 @@ object ScalaReflection extends ScalaReflection {
       case t if t <:< localTypeOf[String] => Schema(StringType, nullable = true)
       case t if t <:< localTypeOf[java.time.Instant] => Schema(TimestampType, nullable = true)
       case t if t <:< localTypeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true)
+      case t if t <:< localTypeOf[java.time.LocalDate] => Schema(DateType, nullable = true)
       case t if t <:< localTypeOf[java.sql.Date] => Schema(DateType, nullable = true)
       case t if t <:< localTypeOf[BigDecimal] => Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
       case t if t <:< localTypeOf[java.math.BigDecimal] =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index eba68810790c..68a603b95ad5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -46,9 +46,11 @@ import org.apache.spark.unsafe.types.UTF8String
  *   StringType -> String
  *   DecimalType -> java.math.BigDecimal or scala.math.BigDecimal or Decimal
  *
- *   DateType -> java.sql.Date
- *   TimestampType -> java.sql.Timestamp when spark.sql.catalyst.timestampType is set to Timestamp
- *   TimestampType -> java.time.Instant when spark.sql.catalyst.timestampType is set to Instant
+ *   DateType -> java.sql.Date if spark.sql.datetime.java8API.enabled is false
+ *   DateType -> java.time.LocalDate if spark.sql.datetime.java8API.enabled is true
+ *
+ *   TimestampType -> java.sql.Timestamp if spark.sql.datetime.java8API.enabled is false
+ *   TimestampType -> java.time.Instant if spark.sql.datetime.java8API.enabled is true
  *
  *   BinaryType -> byte array
  *   ArrayType -> scala.collection.Seq or Array
@@ -91,7 +93,7 @@ object RowEncoder {
         dataType = ObjectType(udtClass), false)
       Invoke(obj, "serialize", udt, inputObject :: Nil, returnNullable = false)
 
-    case TimestampType if SQLConf.get.timestampExternalType == "Instant" =>
+    case TimestampType if SQLConf.get.datetimeJava8ApiEnabled =>
       StaticInvoke(
         DateTimeUtils.getClass,
         TimestampType,
@@ -107,6 +109,14 @@ object RowEncoder {
         inputObject :: Nil,
         returnNullable = false)
 
+    case DateType if SQLConf.get.datetimeJava8ApiEnabled =>
+      StaticInvoke(
+        DateTimeUtils.getClass,
+        DateType,
+        "localDateToDays",
+        inputObject :: Nil,
+        returnNullable = false)
+
     case DateType =>
       StaticInvoke(
         DateTimeUtils.getClass,
@@ -234,9 +244,11 @@ object RowEncoder {
 
   def externalDataTypeFor(dt: DataType): DataType = dt match {
     case _ if ScalaReflection.isNativeType(dt) => dt
-    case TimestampType if SQLConf.get.timestampExternalType == "Instant" =>
+    case TimestampType if SQLConf.get.datetimeJava8ApiEnabled =>
       ObjectType(classOf[java.time.Instant])
     case TimestampType => ObjectType(classOf[java.sql.Timestamp])
+    case DateType if SQLConf.get.datetimeJava8ApiEnabled =>
+      ObjectType(classOf[java.time.LocalDate])
     case DateType => ObjectType(classOf[java.sql.Date])
     case _: DecimalType => ObjectType(classOf[java.math.BigDecimal])
     case StringType => ObjectType(classOf[java.lang.String])
@@ -279,7 +291,7 @@ object RowEncoder {
         dataType = ObjectType(udtClass))
       Invoke(obj, "deserialize", ObjectType(udt.userClass), input :: Nil)
 
-    case TimestampType if SQLConf.get.timestampExternalType == "Instant" =>
+    case TimestampType if SQLConf.get.datetimeJava8ApiEnabled =>
       StaticInvoke(
         DateTimeUtils.getClass,
         ObjectType(classOf[java.time.Instant]),
@@ -295,6 +307,14 @@ object RowEncoder {
         input :: Nil,
         returnNullable = false)
 
+    case DateType if SQLConf.get.datetimeJava8ApiEnabled =>
+      StaticInvoke(
+        DateTimeUtils.getClass,
+        ObjectType(classOf[java.time.LocalDate]),
+        "daysToLocalDate",
+        input :: Nil,
+        returnNullable = false)
+
     case DateType =>
       StaticInvoke(
         DateTimeUtils.getClass,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 742cee6255bf..5064220b9562 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -367,6 +367,10 @@ object DateTimeUtils {
     days.toInt
   }
 
+  def localDateToDays(localDate: LocalDate): Int = localDate.toEpochDay.toInt
+
+  def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days)
+
   /**
    * Trim and parse a given UTF8 date string to a corresponding [[Int]] value.
    * The return type is [[Option]] in order to distinguish between 0 and null. The following
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 4d31bcd2d194..62533ddc2dd7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1690,11 +1690,13 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val TIMESTAMP_EXTERNAL_TYPE = buildConf("spark.sql.catalyst.timestampType")
-    .doc("Java class to/from which an instance of TimestampType is converted.")
-    .stringConf
-    .checkValues(Set("Timestamp", "Instant"))
-    .createWithDefault("Timestamp")
+  val DATETIME_JAVA8API_EANBLED = buildConf("spark.sql.datetime.java8API.enabled")
+    .doc("If the configuration property is set to true, java.time.Instant and " +
+      "java.time.LocalDate classes of Java 8 API are used as external types for " +
+      "Catalyst's TimestampType and DateType. If it is set to false, java.sql.Timestamp " +
+      "and java.sql.Date are used for the same purpose.")
+    .booleanConf
+    .createWithDefault(false)
 }
 
 /**
@@ -1882,7 +1884,7 @@ class SQLConf extends Serializable with Logging {
 
   def fastHashAggregateRowMaxCapacityBit: Int = getConf(FAST_HASH_AGGREGATE_MAX_ROWS_CAPACITY_BIT)
 
-  def timestampExternalType: String = getConf(TIMESTAMP_EXTERNAL_TYPE)
+  def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_EANBLED)
 
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 828e8d89977b..6999526c801c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst
 
-import java.time.Instant
+import java.time.{Instant, LocalDate}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
@@ -169,7 +169,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("converting TimestampType to java.time.Instant") {
-    withSQLConf(SQLConf.TIMESTAMP_EXTERNAL_TYPE.key -> "Instant") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
       Seq(
         -9463427405253013L,
         -244000001L,
@@ -181,4 +181,39 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
       }
     }
   }
+
+  test("converting java.time.LocalDate to DateType") {
+    Seq(
+      "0101-02-16",
+      "1582-10-02",
+      "1582-12-31",
+      "1970-01-01",
+      "1972-12-31",
+      "2019-02-16",
+      "2119-03-16").foreach { timestamp =>
+      val input = LocalDate.parse(timestamp)
+      val result = CatalystTypeConverters.convertToCatalyst(input)
+      val expected = DateTimeUtils.localDateToDays(input)
+      assert(result === expected)
+    }
+  }
+
+  test("converting DateType to java.time.LocalDate") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+      Seq(
+        -701265,
+        -371419,
+        -199722,
+        -1,
+        0,
+        967,
+        2094,
+        17877,
+        24837,
+        1110657).foreach { days =>
+        val localDate = DateTimeUtils.daysToLocalDate(days)
+        assert(CatalystTypeConverters.createToScalaConverter(DateType)(days) === localDate)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 691056db6e7c..79c8abbcdc91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -283,7 +283,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
   }
 
   test("encoding/decoding TimestampType to/from java.time.Instant") {
-    withSQLConf(SQLConf.TIMESTAMP_EXTERNAL_TYPE.key -> "Instant") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
       val schema = new StructType().add("t", TimestampType)
       val encoder = RowEncoder(schema).resolveAndBind()
       val instant = java.time.Instant.parse("2019-02-26T16:56:00Z")
@@ -294,6 +294,18 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
     }
   }
 
+  test("encoding/decoding DateType to/from java.time.LocalDate") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+      val schema = new StructType().add("d", DateType)
+      val encoder = RowEncoder(schema).resolveAndBind()
+      val localDate = java.time.LocalDate.parse("2019-02-27")
+      val row = encoder.toRow(Row(localDate))
+      assert(row.getLong(0) === DateTimeUtils.localDateToDays(localDate))
+      val readback = encoder.fromRow(row)
+      assert(readback.get(0).equals(localDate))
+    }
+  }
+
   for {
     elementType <- Seq(IntegerType, StringType)
     containsNull <- Seq(true, false)
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
index 5bf188882618..a8ba8753aab5 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
@@ -18,8 +18,10 @@
 package test.org.apache.spark.sql;
 
 import java.io.Serializable;
+import java.time.LocalDate;
 import java.util.List;
 
+import org.apache.spark.sql.internal.SQLConf;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -121,4 +123,20 @@ public void udf6Test() {
     Row result = spark.sql("SELECT returnOne()").head();
     Assert.assertEquals(1, result.getInt(0));
   }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void udf7Test() {
+    String originConf = spark.conf().get(SQLConf.DATETIME_JAVA8API_EANBLED().key());
+    try {
+      spark.conf().set(SQLConf.DATETIME_JAVA8API_EANBLED().key(), "true");
+      spark.udf().register(
+          "plusDay",
+          (java.time.LocalDate ld) -> ld.plusDays(1), DataTypes.DateType);
+      Row result = spark.sql("SELECT plusDay(DATE '2019-02-26')").head();
+      Assert.assertEquals(LocalDate.parse("2019-02-27"), result.get(0));
+    } finally {
+      spark.conf().set(SQLConf.DATETIME_JAVA8API_EANBLED().key(), originConf);
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 0f5f0efb0fe7..794c597d3f65 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -496,7 +496,7 @@ class UDFSuite extends QueryTest with SharedSQLContext {
   }
 
   test("Using java.time.Instant in UDF") {
-    withSQLConf(SQLConf.TIMESTAMP_EXTERNAL_TYPE.key -> "Instant") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
       val expected = java.time.Instant.parse("2019-02-27T00:00:00Z")
       val plusSec = udf((i: java.time.Instant) => i.plusSeconds(1))
       val df = spark.sql("SELECT TIMESTAMP '2019-02-26 23:59:59Z' as t")
@@ -504,4 +504,14 @@ class UDFSuite extends QueryTest with SharedSQLContext {
       assert(df.collect().toSeq === Seq(Row(expected)))
     }
   }
+
+  test("Using java.time.LocalDate in UDF") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+      val expected = java.time.LocalDate.parse("2019-02-27")
+      val plusDay = udf((i: java.time.LocalDate) => i.plusDays(1))
+      val df = spark.sql("SELECT DATE '2019-02-26' as d")
+        .select(plusDay('d))
+      assert(df.collect().toSeq === Seq(Row(expected)))
+    }
+  }
 }

From 14f714fb304d82842336e0a772173fc9da1f6e22 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 28 Feb 2019 20:39:13 -0800
Subject: [PATCH 0551/1072] [SPARK-26420][K8S] Generate more unique IDs when
 creating k8s resource names.

Using the current time as an ID is more prone to clashes than people generally
realize, so try to make things a bit more unique without necessarily using a
UUID, which would eat too much space in the names otherwise.

The implemented approach uses some bits from the current time, plus some random
bits, which should be more resistant to clashes.

Closes #23805 from vanzin/SPARK-26420.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/deploy/k8s/KubernetesConf.scala     |  4 +-
 .../spark/deploy/k8s/KubernetesUtils.scala    | 26 ++++++++-
 .../features/DriverServiceFeatureStep.scala   |  6 +-
 .../DriverServiceFeatureStepSuite.scala       | 56 ++++++++++---------
 4 files changed, 61 insertions(+), 31 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index 4a63ea9a86ed..5e741112fc7e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -190,8 +190,8 @@ private[spark] object KubernetesConf {
   }
 
   def getResourceNamePrefix(appName: String): String = {
-    val launchTime = System.currentTimeMillis()
-    s"$appName-$launchTime"
+    val id = KubernetesUtils.uniqueID()
+    s"$appName-$id"
       .trim
       .toLowerCase(Locale.ROOT)
       .replaceAll("\\s+", "-")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index b3f58b03231f..3f7fcecb7392 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -17,18 +17,23 @@
 package org.apache.spark.deploy.k8s
 
 import java.io.File
+import java.security.SecureRandom
 
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, Pod, PodBuilder}
 import io.fabric8.kubernetes.client.KubernetesClient
+import org.apache.commons.codec.binary.Hex
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 private[spark] object KubernetesUtils extends Logging {
 
+  private val systemClock = new SystemClock()
+  private lazy val RNG = new SecureRandom()
+
   /**
    * Extract and parse Spark configuration properties with a given name prefix and
    * return the result as a Map. Keys must not have more than one value.
@@ -185,4 +190,23 @@ private[spark] object KubernetesUtils extends Logging {
   def formatTime(time: String): String = {
     if (time != null) time else "N/A"
   }
+
+  /**
+   * Generates a unique ID to be used as part of identifiers. The returned ID is a hex string
+   * of a 64-bit value containing the 40 LSBs from the current time + 24 random bits from a
+   * cryptographically strong RNG. (40 bits gives about 30 years worth of "unique" timestamps.)
+   *
+   * This avoids using a UUID for uniqueness (too long), and relying solely on the current time
+   * (not unique enough).
+   */
+  def uniqueID(clock: Clock = systemClock): String = {
+    val random = new Array[Byte](3)
+    synchronized {
+      RNG.nextBytes(random)
+    }
+
+    val time = java.lang.Long.toHexString(clock.getTimeMillis() & 0xFFFFFFFFFFL)
+    Hex.encodeHexString(random) + time
+  }
+
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
index 15671179b18b..cec8769b8378 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
@@ -20,14 +20,14 @@ import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.{HasMetadata, ServiceBuilder}
 
-import org.apache.spark.deploy.k8s.{KubernetesDriverConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesDriverConf, KubernetesUtils, SparkPod}
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.util.{Clock, SystemClock}
 
 private[spark] class DriverServiceFeatureStep(
     kubernetesConf: KubernetesDriverConf,
-    clock: Clock = new SystemClock)
+    clock: Clock = new SystemClock())
   extends KubernetesFeatureConfigStep with Logging {
   import DriverServiceFeatureStep._
 
@@ -42,7 +42,7 @@ private[spark] class DriverServiceFeatureStep(
   private val resolvedServiceName = if (preferredServiceName.length <= MAX_SERVICE_NAME_LENGTH) {
     preferredServiceName
   } else {
-    val randomServiceId = clock.getTimeMillis()
+    val randomServiceId = KubernetesUtils.uniqueID(clock = clock)
     val shorterServiceName = s"spark-$randomServiceId$DRIVER_SVC_POSTFIX"
     logWarning(s"Driver's hostname would preferably be $preferredServiceName, but this is " +
       s"too long (must be <= $MAX_SERVICE_NAME_LENGTH characters). Falling back to use " +
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
index 822f1e32968c..fbd99b73b37a 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -18,6 +18,7 @@ package org.apache.spark.deploy.k8s.features
 
 import scala.collection.JavaConverters._
 
+import com.google.common.net.InternetDomainName
 import io.fabric8.kubernetes.api.model.Service
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -71,7 +72,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     val expectedServiceName = kconf.resourceNamePrefix + DriverServiceFeatureStep.DRIVER_SVC_POSTFIX
     val expectedHostName = s"$expectedServiceName.my-namespace.svc"
     val additionalProps = configurationStep.getAdditionalPodSystemProperties()
-    verifySparkConfHostNames(additionalProps, expectedHostName)
+    assert(additionalProps(DRIVER_HOST_ADDRESS.key) === expectedHostName)
   }
 
   test("Ports should resolve to defaults in SparkConf and in the service.") {
@@ -91,26 +92,37 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     assert(additionalProps(DRIVER_BLOCK_MANAGER_PORT.key) === DEFAULT_BLOCKMANAGER_PORT.toString)
   }
 
-  test("Long prefixes should switch to using a generated name.") {
-    val clock = new ManualClock()
-    clock.setTime(10000)
+  test("Long prefixes should switch to using a generated unique name.") {
     val sparkConf = new SparkConf(false)
       .set(KUBERNETES_NAMESPACE, "my-namespace")
-    val configurationStep = new DriverServiceFeatureStep(
-      KubernetesTestConf.createDriverConf(
-        sparkConf = sparkConf,
-        resourceNamePrefix = Some(LONG_RESOURCE_NAME_PREFIX),
-        labels = DRIVER_LABELS),
-      clock)
-    val driverService = configurationStep
-      .getAdditionalKubernetesResources()
-      .head
-      .asInstanceOf[Service]
-    val expectedServiceName = s"spark-10000${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}"
-    assert(driverService.getMetadata.getName === expectedServiceName)
-    val expectedHostName = s"$expectedServiceName.my-namespace.svc"
-    val additionalProps = configurationStep.getAdditionalPodSystemProperties()
-    verifySparkConfHostNames(additionalProps, expectedHostName)
+    val kconf = KubernetesTestConf.createDriverConf(
+      sparkConf = sparkConf,
+      resourceNamePrefix = Some(LONG_RESOURCE_NAME_PREFIX),
+      labels = DRIVER_LABELS)
+    val clock = new ManualClock()
+
+    // Ensure that multiple services created at the same time generate unique names.
+    val services = (1 to 10).map { _ =>
+      val configurationStep = new DriverServiceFeatureStep(kconf, clock = clock)
+      val serviceName = configurationStep
+        .getAdditionalKubernetesResources()
+        .head
+        .asInstanceOf[Service]
+        .getMetadata
+        .getName
+
+      val hostAddress = configurationStep
+        .getAdditionalPodSystemProperties()(DRIVER_HOST_ADDRESS.key)
+
+      (serviceName -> hostAddress)
+    }.toMap
+
+    assert(services.size === 10)
+    services.foreach { case (name, address) =>
+      assert(!name.startsWith(kconf.resourceNamePrefix))
+      assert(!address.startsWith(kconf.resourceNamePrefix))
+      assert(InternetDomainName.isValid(address))
+    }
   }
 
   test("Disallow bind address and driver host to be set explicitly.") {
@@ -156,10 +168,4 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     assert(driverServicePorts(1).getPort.intValue() === blockManagerPort)
     assert(driverServicePorts(1).getTargetPort.getIntVal === blockManagerPort)
   }
-
-  private def verifySparkConfHostNames(
-      driverSparkConf: Map[String, String], expectedHostName: String): Unit = {
-    assert(driverSparkConf(
-      org.apache.spark.internal.config.DRIVER_HOST_ADDRESS.key) === expectedHostName)
-  }
 }

From bc7592ba1186001127ecfae327ac22a0727b8bab Mon Sep 17 00:00:00 2001
From: Yifei Huang <yifeih@palantir.com>
Date: Thu, 28 Feb 2019 20:55:55 -0800
Subject: [PATCH 0552/1072] [SPARK-27009][TEST] Add Standard Deviation to
 benchmark results

## What changes were proposed in this pull request?

Add standard deviation to the stats taken during benchmark testing.

## How was this patch tested?

Manually ran a few benchmark tests locally and visually inspected the output

Closes #23914 from yifeih/spark-27009-stdev.

Authored-by: Yifei Huang <yifeih@palantir.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/benchmark/Benchmark.scala | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
index df1ed2847790..73f9d0e2bc0e 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
@@ -111,13 +111,15 @@ private[spark] class Benchmark(
     // The results are going to be processor specific so it is useful to include that.
     out.println(Benchmark.getJVMOSInfo())
     out.println(Benchmark.getProcessorName())
-    out.printf("%-40s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)",
+    out.printf("%-40s %14s %14s %11s %12s %13s %10s\n", name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)", "Rate(M/s)",
       "Per Row(ns)", "Relative")
-    out.println("-" * 96)
+    out.println("-" * 120)
     results.zip(benchmarks).foreach { case (result, benchmark) =>
-      out.printf("%-40s %16s %12s %13s %10s\n",
+      out.printf("%-40s %14s %14s %11s %12s %13s %10s\n",
         benchmark.name,
-        "%5.0f / %4.0f" format (result.bestMs, result.avgMs),
+        "%5.0f" format result.bestMs,
+        "%4.0f" format result.avgMs,
+        "%5.0f" format result.stdevMs,
         "%10.1f" format result.bestRate,
         "%6.1f" format (1000 / result.bestRate),
         "%3.1fX" format (firstBest / result.bestMs))
@@ -156,9 +158,13 @@ private[spark] class Benchmark(
     // scalastyle:off
     println(s"  Stopped after $i iterations, ${NANOSECONDS.toMillis(runTimes.sum)} ms")
     // scalastyle:on
+    assert(runTimes.nonEmpty)
     val best = runTimes.min
     val avg = runTimes.sum / runTimes.size
-    Result(avg / 1000000.0, num / (best / 1000.0), best / 1000000.0)
+    val stdev = if (runTimes.size > 1) {
+      math.sqrt(runTimes.map(time => (time - avg) * (time - avg)).sum / (runTimes.size - 1))
+    } else 0
+    Result(avg / 1000000.0, num / (best / 1000.0), best / 1000000.0, stdev / 1000000.0)
   }
 }
 
@@ -191,7 +197,7 @@ private[spark] object Benchmark {
   }
 
   case class Case(name: String, fn: Timer => Unit, numIters: Int)
-  case class Result(avgMs: Double, bestRate: Double, bestMs: Double)
+  case class Result(avgMs: Double, bestRate: Double, bestMs: Double, stdevMs: Double)
 
   /**
    * This should return a user helpful processor information. Getting at this depends on the OS.

From 131b464d0c54cdeab376e47a31972524076c5477 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 1 Mar 2019 11:23:40 -0600
Subject: [PATCH 0553/1072] [SPARK-26986][ML][FOLLOWUP] Add JAXB reference impl
 to build for Java 9+

## What changes were proposed in this pull request?

Remove a few new JAXB dependencies that shouldn't be necessary now.
See https://github.com/apache/spark/pull/23890#issuecomment-468299922

## How was this patch tested?

Existing tests

Closes #23923 from srowen/SPARK-26986.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 LICENSE-binary                      |  3 ---
 dev/deps/spark-deps-hadoop-2.7      |  3 ---
 dev/deps/spark-deps-hadoop-3.1      |  3 ---
 licenses-binary/LICENSE-stax-ex.txt | 11 -----------
 licenses-binary/LICENSE-txw2.txt    | 11 -----------
 pom.xml                             | 15 +++++++++++++++
 6 files changed, 15 insertions(+), 31 deletions(-)
 delete mode 100644 licenses-binary/LICENSE-stax-ex.txt
 delete mode 100644 licenses-binary/LICENSE-txw2.txt

diff --git a/LICENSE-binary b/LICENSE-binary
index a21f594cbe4e..541cc4c544f3 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -372,7 +372,6 @@ org.eclipse.jetty:jetty-servlets
 org.eclipse.jetty:jetty-util
 org.eclipse.jetty:jetty-webapp
 org.eclipse.jetty:jetty-xml
-com.sun.xml.fastinfoset:FastInfoset
 
 core/src/main/java/org/apache/spark/util/collection/TimSort.java
 core/src/main/resources/org/apache/spark/ui/static/bootstrap*
@@ -493,9 +492,7 @@ Eclipse Distribution License (EDL) 1.0
 
 org.glassfish.jaxb:jaxb-runtime
 jakarta.xml.bind:jakarta.xml.bind-api
-org.glassfish.jaxb:txw2
 com.sun.istack:istack-commons-runtime
-org.jvnet.staxex:stax-ex
 jakarta.activation:jakarta.activation-api
 
 
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 5eb61f7ae7dd..b8817ca971b2 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -1,4 +1,3 @@
-FastInfoset-1.2.16.jar
 JavaEWAH-0.3.2.jar
 RoaringBitmap-0.5.11.jar
 ST4-4.0.4.jar
@@ -191,11 +190,9 @@ spire-macros_2.12-0.13.0.jar
 spire_2.12-0.13.0.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
-stax-ex-1.8.1.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
-txw2-2.3.2.jar
 univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
 xbean-asm7-shaded-4.12.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 893d0a06c2a0..9ea063355650 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -1,4 +1,3 @@
-FastInfoset-1.2.16.jar
 HikariCP-java7-2.4.12.jar
 JavaEWAH-0.3.2.jar
 RoaringBitmap-0.5.11.jar
@@ -208,13 +207,11 @@ snappy-java-1.1.7.1.jar
 spire-macros_2.12-0.13.0.jar
 spire_2.12-0.13.0.jar
 stax-api-1.0.1.jar
-stax-ex-1.8.1.jar
 stax2-api-3.1.4.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 token-provider-1.0.1.jar
-txw2-2.3.2.jar
 univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
 woodstox-core-5.0.3.jar
diff --git a/licenses-binary/LICENSE-stax-ex.txt b/licenses-binary/LICENSE-stax-ex.txt
deleted file mode 100644
index 02319e94f5f1..000000000000
--- a/licenses-binary/LICENSE-stax-ex.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-Eclipse Distribution License - v 1.0
-Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-Neither the name of the Eclipse Foundation, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-txw2.txt b/licenses-binary/LICENSE-txw2.txt
deleted file mode 100644
index 02319e94f5f1..000000000000
--- a/licenses-binary/LICENSE-txw2.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-Eclipse Distribution License - v 1.0
-Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-Neither the name of the Eclipse Foundation, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index dca87f4f1b24..b7fec2d95ac1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -415,6 +415,21 @@
         <artifactId>jaxb-runtime</artifactId>
         <version>2.3.2</version>
         <scope>runtime</scope>
+        <exclusions>
+          <!-- for now, we only write XML in PMML export, and these can be excluded -->
+          <exclusion>
+            <groupId>com.sun.xml.fastinfoset</groupId>
+            <artifactId>FastInfoset</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.glassfish.jaxb</groupId>
+            <artifactId>txw2</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jvnet.staxex</groupId>
+            <artifactId>stax-ex</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>

From 02bbe977abaf7006b845a7e99d612b0235aa0025 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 1 Mar 2019 11:48:07 -0600
Subject: [PATCH 0554/1072] [MINOR] Remove unnecessary gets when getting a
 value from map.

## What changes were proposed in this pull request?

Redundant `get`  when getting a value from `Map` given a key.

## How was this patch tested?

N/A

Closes #23901 from 10110346/removegetfrommap.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/deploy/SparkSubmitArguments.scala   |  2 +-
 .../apache/spark/deploy/master/Master.scala   |  2 +-
 .../apache/spark/deploy/worker/Worker.scala   |  2 +-
 .../apache/spark/ui/ConsoleProgressBar.scala  |  6 +----
 .../org/apache/spark/ui/storage/RDDPage.scala |  2 +-
 .../BypassMergeSortShuffleWriterSuite.scala   |  2 +-
 .../spark/util/TimeStampedHashMapSuite.scala  | 12 +++++-----
 .../sql/kafka010/KafkaMicroBatchStream.scala  |  2 +-
 .../spark/sql/kafka010/KafkaRelation.scala    |  9 ++++---
 .../spark/sql/kafka010/KafkaSource.scala      | 14 +++++------
 .../spark/ml/feature/Word2VecSuite.scala      |  4 ++--
 .../org/apache/spark/sql/types/Metadata.scala |  2 +-
 .../expressions/CodeGenerationSuite.scala     |  4 ++--
 .../sql/execution/stat/StatFunctions.scala    |  2 +-
 .../sql/execution/ui/SparkPlanGraph.scala     |  2 +-
 .../ui/SQLAppStatusListenerSuite.scala        |  2 +-
 .../sql/sources/v2/DataSourceV2Suite.scala    |  2 +-
 .../streaming/StreamingAggregationSuite.scala | 24 +++++++++----------
 .../sql/hive/client/HiveClientImpl.scala      |  6 ++---
 19 files changed, 46 insertions(+), 55 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index e23d1f87b766..f8c533021283 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -512,7 +512,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     if (unknownParam != null) {
       logInfo("Unknown/unsupported param " + unknownParam)
     }
-    val command = sys.env.get("_SPARK_CMD_USAGE").getOrElse(
+    val command = sys.env.getOrElse("_SPARK_CMD_USAGE",
       """Usage: spark-submit [options] <app jar | python file | R file> [app arguments]
         |Usage: spark-submit --kill [submission ID] --master [spark://...]
         |Usage: spark-submit --status [submission ID] --master [spark://...]
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 3dd804b6c8af..a5aecd22a1af 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -370,7 +370,7 @@ private[deploy] class Master(
 
           val validExecutors = executors.filter(exec => idToApp.get(exec.appId).isDefined)
           for (exec <- validExecutors) {
-            val app = idToApp.get(exec.appId).get
+            val app = idToApp(exec.appId)
             val execInfo = app.addExecutor(worker, exec.cores, Some(exec.execId))
             worker.addExecutor(execInfo)
             execInfo.copyState(exec)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 5f7ca5c29b07..49cda0f5e1dd 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -130,7 +130,7 @@ private[deploy] class Worker(
       assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
       new File(sys.props("spark.test.home"))
     } else {
-      new File(sys.env.get("SPARK_HOME").getOrElse("."))
+      new File(sys.env.getOrElse("SPARK_HOME", "."))
     }
 
   var workDir: File = null
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
index f0ae26e7a88e..f36b31c65a63 100644
--- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -39,11 +39,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
   private val firstDelayMSec = 500L
 
   // The width of terminal
-  private val TerminalWidth = if (!sys.env.getOrElse("COLUMNS", "").isEmpty) {
-    sys.env.get("COLUMNS").get.toInt
-  } else {
-    80
-  }
+  private val TerminalWidth = sys.env.getOrElse("COLUMNS", "80").toInt
 
   private var lastFinishTime = 0L
   private var lastUpdateTime = 0L
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index dde441abe590..4bd3bbd2e1ac 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -189,7 +189,7 @@ private[ui] class BlockDataSource(
       rddPartition.memoryUsed,
       rddPartition.diskUsed,
       rddPartition.executors
-        .map { id => executorIdToAddress.get(id).getOrElse(id) }
+        .map { id => executorIdToAddress.getOrElse(id, id) }
         .sorted
         .mkString(" "))
   }
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
index 7f956c26d0ff..72a1a4fb500f 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
@@ -115,7 +115,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     when(diskBlockManager.getFile(any[BlockId])).thenAnswer(
       new Answer[File] {
         override def answer(invocation: InvocationOnMock): File = {
-          blockIdToFileMap.get(invocation.getArguments.head.asInstanceOf[BlockId]).get
+          blockIdToFileMap(invocation.getArguments.head.asInstanceOf[BlockId])
         }
     })
   }
diff --git a/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
index dcae78900fde..77a92e7e1eb4 100644
--- a/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
@@ -73,14 +73,14 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
       // put, get, and apply
       testMap1 += (("k1", "v1"))
       assert(testMap1.get("k1").isDefined)
-      assert(testMap1.get("k1").get === "v1")
+      assert(testMap1("k1") === "v1")
       testMap1("k2") = "v2"
       assert(testMap1.get("k2").isDefined)
-      assert(testMap1.get("k2").get === "v2")
+      assert(testMap1("k2") === "v2")
       assert(testMap1("k2") === "v2")
       testMap1.update("k3", "v3")
       assert(testMap1.get("k3").isDefined)
-      assert(testMap1.get("k3").get === "v3")
+      assert(testMap1("k3") === "v3")
 
       // remove
       testMap1.remove("k1")
@@ -121,15 +121,15 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
       val testMap3 = testMap2 + (("k0", "v0"))
       assert(testMap3.size === 2)
       assert(testMap3.get("k1").isDefined)
-      assert(testMap3.get("k1").get === "v1")
+      assert(testMap3("k1") === "v1")
       assert(testMap3.get("k0").isDefined)
-      assert(testMap3.get("k0").get === "v0")
+      assert(testMap3("k0") === "v0")
 
       // -
       val testMap4 = testMap3 - "k0"
       assert(testMap4.size === 1)
       assert(testMap4.get("k1").isDefined)
-      assert(testMap4.get("k1").get === "v1")
+      assert(testMap4("k1") === "v1")
     }
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 337a51ef7fd8..a6303461445f 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -228,7 +228,7 @@ private[kafka010] class KafkaMicroBatchStream(
       until.map {
         case (tp, end) =>
           tp -> sizes.get(tp).map { size =>
-            val begin = from.get(tp).getOrElse(fromNew(tp))
+            val begin = from.getOrElse(tp, fromNew(tp))
             val prorate = limit * (size / total)
             // Don't completely starve small topicpartitions
             val prorateLong = (if (prorate < 1) Math.ceil(prorate) else Math.floor(prorate)).toLong
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index e6f9d1259e43..9effa2959110 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -89,11 +89,10 @@ private[kafka010] class KafkaRelation(
 
     // Calculate offset ranges
     val offsetRanges = untilPartitionOffsets.keySet.map { tp =>
-      val fromOffset = fromPartitionOffsets.get(tp).getOrElse {
-          // This should not happen since topicPartitions contains all partitions not in
-          // fromPartitionOffsets
-          throw new IllegalStateException(s"$tp doesn't have a from offset")
-      }
+      val fromOffset = fromPartitionOffsets.getOrElse(tp,
+        // This should not happen since topicPartitions contains all partitions not in
+        // fromPartitionOffsets
+        throw new IllegalStateException(s"$tp doesn't have a from offset"))
       val untilOffset = untilPartitionOffsets(tp)
       KafkaSourceRDDOffsetRange(tp, fromOffset, untilOffset, None)
     }.toArray
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 624c79679714..82d746ef0fbd 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -187,7 +187,7 @@ private[kafka010] class KafkaSource(
       until.map {
         case (tp, end) =>
           tp -> sizes.get(tp).map { size =>
-            val begin = from.get(tp).getOrElse(fromNew(tp))
+            val begin = from.getOrElse(tp, fromNew(tp))
             val prorate = limit * (size / total)
             logDebug(s"rateLimit $tp prorated amount is $prorate")
             // Don't completely starve small topicpartitions
@@ -273,13 +273,11 @@ private[kafka010] class KafkaSource(
 
     // Calculate offset ranges
     val offsetRanges = topicPartitions.map { tp =>
-      val fromOffset = fromPartitionOffsets.get(tp).getOrElse {
-        newPartitionOffsets.getOrElse(tp, {
-          // This should not happen since newPartitionOffsets contains all partitions not in
-          // fromPartitionOffsets
-          throw new IllegalStateException(s"$tp doesn't have a from offset")
-        })
-      }
+      val fromOffset = fromPartitionOffsets.getOrElse(tp, newPartitionOffsets.getOrElse(tp, {
+        // This should not happen since newPartitionOffsets contains all partitions not in
+        // fromPartitionOffsets
+        throw new IllegalStateException(s"$tp doesn't have a from offset")
+      }))
       val untilOffset = untilPartitionOffsets(tp)
       val preferredLoc = if (numExecutors > 0) {
         // This allows cached KafkaConsumers in the executors to be re-used to read the same
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index b59c4e796733..70d11774d1c7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -130,14 +130,14 @@ class Word2VecSuite extends MLTest with DefaultReadWriteTest {
     expected.foreach {
       case (expectedSynonym, expectedSimilarity) =>
         assert(findSynonymsResult.contains(expectedSynonym))
-        assert(expectedSimilarity ~== findSynonymsResult.get(expectedSynonym).get absTol 1E-5)
+        assert(expectedSimilarity ~== findSynonymsResult(expectedSynonym) absTol 1E-5)
     }
 
     val findSynonymsArrayResult = model.findSynonymsArray("a", 2).toMap
     findSynonymsResult.foreach {
       case (expectedSynonym, expectedSimilarity) =>
         assert(findSynonymsArrayResult.contains(expectedSynonym))
-        assert(expectedSimilarity ~== findSynonymsArrayResult.get(expectedSynonym).get absTol 1E-5)
+        assert(expectedSimilarity ~== findSynonymsArrayResult(expectedSynonym) absTol 1E-5)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
index b6a859b75c37..982f6244f8a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
@@ -88,7 +88,7 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any])
         map.keysIterator.forall { key =>
           that.map.get(key) match {
             case Some(otherValue) =>
-              val ourValue = map.get(key).get
+              val ourValue = map(key)
               (ourValue, otherValue) match {
                 case (v0: Array[Long], v1: Array[Long]) => java.util.Arrays.equals(v0, v1)
                 case (v0: Array[Double], v1: Array[Double]) => java.util.Arrays.equals(v0, v1)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 7e6fe5b4e206..baa1b3bda848 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -424,7 +424,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(ctx1.inlinedMutableStates.size == CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD)
     // When the number of primitive type mutable states is over the threshold, others are
     // allocated into an array
-    assert(ctx1.arrayCompactedMutableStates.get(CodeGenerator.JAVA_INT).get.arrayNames.size == 1)
+    assert(ctx1.arrayCompactedMutableStates(CodeGenerator.JAVA_INT).arrayNames.size == 1)
     assert(ctx1.mutableStateInitCode.size == CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD + 10)
 
     val ctx2 = new CodegenContext
@@ -434,7 +434,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     // When the number of non-primitive type mutable states is over the threshold, others are
     // allocated into a new array
     assert(ctx2.inlinedMutableStates.isEmpty)
-    assert(ctx2.arrayCompactedMutableStates.get("InternalRow[]").get.arrayNames.size == 2)
+    assert(ctx2.arrayCompactedMutableStates("InternalRow[]").arrayNames.size == 2)
     assert(ctx2.arrayCompactedMutableStates("InternalRow[]").getCurrentIndex == 10)
     assert(ctx2.mutableStateInitCode.size == CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT + 10)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index ac25a8fd90bc..a6c9c2972df6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -203,7 +203,7 @@ object StatFunctions extends Logging {
         // row.get(0) is column 1
         // row.get(1) is column 2
         // row.get(2) is the frequency
-        val columnIndex = distinctCol2.get(cleanElement(row.get(1))).get
+        val columnIndex = distinctCol2(cleanElement(row.get(1)))
         countsRow.setLong(columnIndex + 1, row.getLong(2))
       }
       // the value of col1 is the first value, the rest are the counts
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
index e57d080dadf7..838ee76c8d43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
@@ -184,7 +184,7 @@ private[ui] class SparkPlanGraphCluster(
       require(duration.length == 1)
       val id = duration(0).accumulatorId
       if (metricsValue.contains(duration(0).accumulatorId)) {
-        name + "\n\n" + metricsValue.get(id).get
+        name + "\n\n" + metricsValue(id)
       } else {
         name
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index 4c008c4402a1..c8d862c72c10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -140,7 +140,7 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with
         // TODO: this is brittle. There is no requirement that the actual string needs to start
         // with the accumulator value.
         assert(actual.contains(id))
-        val v = actual.get(id).get.trim
+        val v = actual(id).trim
         assert(v.startsWith(value.toString), s"Wrong value for accumulator $id")
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index b8572448f736..e184bf57fa7d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -349,7 +349,7 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
       val options = df.queryExecution.optimizedPlan.collectFirst {
         case d: DataSourceV2Relation => d.options
       }.get
-      assert(options.get(optionName).get == "false")
+      assert(options(optionName) === "false")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 97dbb9b0360e..116fd74352fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -207,15 +207,15 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
       AddData(inputData, 1),
       CheckLastBatch((1, 1), (2, 1)),
       AssertOnQuery { _.stateNodes.size === 1 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numOutputRows").get.value === 2 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numUpdatedStateRows").get.value === 2 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numTotalStateRows").get.value === 2 },
+      AssertOnQuery { _.stateNodes.head.metrics("numOutputRows").value === 2 },
+      AssertOnQuery { _.stateNodes.head.metrics("numUpdatedStateRows").value === 2 },
+      AssertOnQuery { _.stateNodes.head.metrics("numTotalStateRows").value === 2 },
       AddData(inputData, 2, 3),
       CheckLastBatch((2, 2), (3, 2), (4, 1)),
       AssertOnQuery { _.stateNodes.size === 1 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numOutputRows").get.value === 3 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numUpdatedStateRows").get.value === 3 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numTotalStateRows").get.value === 4 }
+      AssertOnQuery { _.stateNodes.head.metrics("numOutputRows").value === 3 },
+      AssertOnQuery { _.stateNodes.head.metrics("numUpdatedStateRows").value === 3 },
+      AssertOnQuery { _.stateNodes.head.metrics("numTotalStateRows").value === 4 }
     )
 
     // Test with Complete mode
@@ -224,15 +224,15 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
       AddData(inputData, 1),
       CheckLastBatch((1, 1), (2, 1)),
       AssertOnQuery { _.stateNodes.size === 1 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numOutputRows").get.value === 2 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numUpdatedStateRows").get.value === 2 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numTotalStateRows").get.value === 2 },
+      AssertOnQuery { _.stateNodes.head.metrics("numOutputRows").value === 2 },
+      AssertOnQuery { _.stateNodes.head.metrics("numUpdatedStateRows").value === 2 },
+      AssertOnQuery { _.stateNodes.head.metrics("numTotalStateRows").value === 2 },
       AddData(inputData, 2, 3),
       CheckLastBatch((1, 1), (2, 2), (3, 2), (4, 1)),
       AssertOnQuery { _.stateNodes.size === 1 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numOutputRows").get.value === 4 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numUpdatedStateRows").get.value === 3 },
-      AssertOnQuery { _.stateNodes.head.metrics.get("numTotalStateRows").get.value === 4 }
+      AssertOnQuery { _.stateNodes.head.metrics("numOutputRows").value === 4 },
+      AssertOnQuery { _.stateNodes.head.metrics("numUpdatedStateRows").value === 3 },
+      AssertOnQuery { _.stateNodes.head.metrics("numTotalStateRows").value === 4 }
     )
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 77ac606cd715..2ca54afa31e6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -996,10 +996,8 @@ private[hive] object HiveClientImpl {
       ht: HiveTable): HivePartition = {
     val tpart = new org.apache.hadoop.hive.metastore.api.Partition
     val partValues = ht.getPartCols.asScala.map { hc =>
-      p.spec.get(hc.getName).getOrElse {
-        throw new IllegalArgumentException(
-          s"Partition spec is missing a value for column '${hc.getName}': ${p.spec}")
-      }
+      p.spec.getOrElse(hc.getName, throw new IllegalArgumentException(
+        s"Partition spec is missing a value for column '${hc.getName}': ${p.spec}"))
     }
     val storageDesc = new StorageDescriptor
     val serdeInfo = new SerDeInfo

From 86b25c43506a0a858601bd1082e9f96bb0415eb8 Mon Sep 17 00:00:00 2001
From: SongYadong <song.yadong1@zte.com.cn>
Date: Fri, 1 Mar 2019 11:49:43 -0600
Subject: [PATCH 0555/1072] [SPARK-26967][CORE] Put MetricsSystem instance
 names together for clearer management

## What changes were proposed in this pull request?

`MetricsSystem` instance creations have a scattered distribution in the project code. So do their names. It may cause some inconvenience for browsing and management.
This PR tries to put them together. In this way, we can have a uniform location for adding or removing them, and have a overall view of `MetircsSystem `instances in current project.
It's also helpful for maintaining user documents by avoiding missing something.

## How was this patch tested?

Existing unit tests.

Closes #23869 from SongYadong/metrics_system_inst_manage.

Lead-authored-by: SongYadong <song.yadong1@zte.com.cn>
Co-authored-by: walter2001 <ydsong2007@163.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/SparkEnv.scala     |  7 ++---
 .../spark/deploy/ExternalShuffleService.scala |  5 ++--
 .../apache/spark/deploy/master/Master.scala   |  9 ++++---
 .../apache/spark/deploy/worker/Worker.scala   |  5 ++--
 .../apache/spark/metrics/MetricsSystem.scala  | 26 +++++++++++++++++++
 .../spark/metrics/MetricsSystemSuite.scala    | 18 ++++++-------
 .../cluster/mesos/MesosClusterScheduler.scala |  5 ++--
 .../spark/deploy/yarn/ApplicationMaster.scala |  5 ++--
 8 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 799e7e45df57..0ac6f08d8cca 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -34,7 +34,7 @@ import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
 import org.apache.spark.memory.{MemoryManager, UnifiedMemoryManager}
-import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.network.netty.NettyBlockTransferService
 import org.apache.spark.rpc.{RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.{LiveListenerBus, OutputCommitCoordinator}
@@ -346,13 +346,14 @@ object SparkEnv extends Logging {
       // Don't start metrics system right now for Driver.
       // We need to wait for the task scheduler to give us an app ID.
       // Then we can start the metrics system.
-      MetricsSystem.createMetricsSystem("driver", conf, securityManager)
+      MetricsSystem.createMetricsSystem(MetricsSystemInstances.DRIVER, conf, securityManager)
     } else {
       // We need to set the executor ID before the MetricsSystem is created because sources and
       // sinks specified in the metrics configuration file will want to incorporate this executor's
       // ID into the metrics they report.
       conf.set(EXECUTOR_ID, executorId)
-      val ms = MetricsSystem.createMetricsSystem("executor", conf, securityManager)
+      val ms = MetricsSystem.createMetricsSystem(MetricsSystemInstances.EXECUTOR, conf,
+        securityManager)
       ms.start()
       ms
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index 03e3abb3ce56..edfd2ea76e44 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.{config, Logging}
-import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.network.TransportContext
 import org.apache.spark.network.crypto.AuthServerBootstrap
 import org.apache.spark.network.netty.SparkTransportConf
@@ -43,7 +43,8 @@ private[deploy]
 class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityManager)
   extends Logging {
   protected val masterMetricsSystem =
-    MetricsSystem.createMetricsSystem("shuffleService", sparkConf, securityManager)
+    MetricsSystem.createMetricsSystem(MetricsSystemInstances.SHUFFLE_SERVICE,
+      sparkConf, securityManager)
 
   private val enabled = sparkConf.get(config.SHUFFLE_SERVICE_ENABLED)
   private val port = sparkConf.get(config.SHUFFLE_SERVICE_PORT)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index a5aecd22a1af..356e52cd9934 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -37,7 +37,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.internal.config.Worker._
-import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.rpc._
 import org.apache.spark.serializer.{JavaSerializer, Serializer}
 import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
@@ -86,9 +86,10 @@ private[deploy] class Master(
 
   Utils.checkHost(address.host)
 
-  private val masterMetricsSystem = MetricsSystem.createMetricsSystem("master", conf, securityMgr)
-  private val applicationMetricsSystem = MetricsSystem.createMetricsSystem("applications", conf,
-    securityMgr)
+  private val masterMetricsSystem =
+    MetricsSystem.createMetricsSystem(MetricsSystemInstances.MASTER, conf, securityMgr)
+  private val applicationMetricsSystem =
+    MetricsSystem.createMetricsSystem(MetricsSystemInstances.APPLICATIONS, conf, securityMgr)
   private val masterSource = new MasterSource(this)
 
   // After onStart, webUi will be set
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 49cda0f5e1dd..07a9545f8bea 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -40,7 +40,7 @@ import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.internal.config.Worker._
-import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.rpc._
 import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
 
@@ -159,7 +159,8 @@ private[deploy] class Worker(
 
   private var connectionAttemptCount = 0
 
-  private val metricsSystem = MetricsSystem.createMetricsSystem("worker", conf, securityMgr)
+  private val metricsSystem =
+    MetricsSystem.createMetricsSystem(MetricsSystemInstances.WORKER, conf, securityMgr)
   private val workerSource = new WorkerSource(this)
 
   val reverseProxy = conf.get(UI_REVERSE_PROXY)
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index b1e311ada459..89dd74e13465 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -235,3 +235,29 @@ private[spark] object MetricsSystem {
     new MetricsSystem(instance, conf, securityMgr)
   }
 }
+
+private[spark] object MetricsSystemInstances {
+  // The Spark standalone master process
+  val MASTER = "master"
+
+  // A component within the master which reports on various applications
+  val APPLICATIONS = "applications"
+
+  // A Spark standalone worker process
+  val WORKER = "worker"
+
+  // A Spark executor
+  val EXECUTOR = "executor"
+
+  // The Spark driver process (the process in which your SparkContext is created)
+  val DRIVER = "driver"
+
+  // The Spark shuffle service
+  val SHUFFLE_SERVICE = "shuffleService"
+
+  // The Spark ApplicationMaster when running on YARN
+  val APPLICATION_MASTER = "applicationMaster"
+
+  // The Spark cluster scheduler when running on Mesos
+  val MESOS_CLUSTER = "mesos_cluster"
+}
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
index c512f29c8442..99c9dde1cf23 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
@@ -76,7 +76,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     conf.set("spark.app.id", appId)
     conf.set("spark.executor.id", executorId)
 
-    val instanceName = "driver"
+    val instanceName = MetricsSystemInstances.DRIVER
     val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = driverMetricsSystem.buildRegistryName(source)
@@ -92,7 +92,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     val executorId = "driver"
     conf.set("spark.executor.id", executorId)
 
-    val instanceName = "driver"
+    val instanceName = MetricsSystemInstances.DRIVER
     val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = driverMetricsSystem.buildRegistryName(source)
@@ -108,7 +108,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     val appId = "testId"
     conf.set("spark.app.id", appId)
 
-    val instanceName = "driver"
+    val instanceName = MetricsSystemInstances.DRIVER
     val driverMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = driverMetricsSystem.buildRegistryName(source)
@@ -126,7 +126,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     conf.set("spark.app.id", appId)
     conf.set("spark.executor.id", executorId)
 
-    val instanceName = "executor"
+    val instanceName = MetricsSystemInstances.EXECUTOR
     val executorMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = executorMetricsSystem.buildRegistryName(source)
@@ -142,7 +142,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     val executorId = "1"
     conf.set("spark.executor.id", executorId)
 
-    val instanceName = "executor"
+    val instanceName = MetricsSystemInstances.EXECUTOR
     val executorMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = executorMetricsSystem.buildRegistryName(source)
@@ -158,7 +158,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     val appId = "testId"
     conf.set("spark.app.id", appId)
 
-    val instanceName = "executor"
+    val instanceName = MetricsSystemInstances.EXECUTOR
     val executorMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = executorMetricsSystem.buildRegistryName(source)
@@ -200,7 +200,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     conf.set("spark.executor.id", executorId)
     conf.set(METRICS_NAMESPACE, "${spark.app.name}")
 
-    val instanceName = "executor"
+    val instanceName = MetricsSystemInstances.EXECUTOR
     val executorMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = executorMetricsSystem.buildRegistryName(source)
@@ -218,7 +218,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     conf.set("spark.executor.id", executorId)
     conf.set(METRICS_NAMESPACE, namespaceToResolve)
 
-    val instanceName = "executor"
+    val instanceName = MetricsSystemInstances.EXECUTOR
     val executorMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = executorMetricsSystem.buildRegistryName(source)
@@ -238,7 +238,7 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
     conf.set("spark.app.name", appId)
     conf.set(METRICS_NAMESPACE, "${spark.app.name}")
 
-    val instanceName = "executor"
+    val instanceName = MetricsSystemInstances.EXECUTOR
     val executorMetricsSystem = MetricsSystem.createMetricsSystem(instanceName, conf, securityMgr)
 
     val metricName = executorMetricsSystem.buildRegistryName(source)
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 9df91699b26e..cd5bd534c33c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -33,7 +33,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkException, TaskState}
 import org.apache.spark.deploy.mesos.{config, MesosDriverDescription}
 import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KillSubmissionResponse, SubmissionStatusResponse}
 import org.apache.spark.internal.config._
-import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.util.Utils
 
 /**
@@ -124,7 +124,8 @@ private[spark] class MesosClusterScheduler(
   extends Scheduler with MesosSchedulerUtils {
   var frameworkUrl: String = _
   private val metricsSystem =
-    MetricsSystem.createMetricsSystem("mesos_cluster", conf, new SecurityManager(conf))
+    MetricsSystem.createMetricsSystem(MetricsSystemInstances.MESOS_CLUSTER, conf,
+      new SecurityManager(conf))
   private val master = conf.get("spark.master")
   private val appName = conf.get("spark.app.name")
   private val queuedCapacity = conf.get(config.MAX_DRIVERS)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 1482b5209a86..91e1fbd17ff5 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -46,7 +46,7 @@ import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
-import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, YarnSchedulerBackend}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -471,7 +471,8 @@ private[spark] class ApplicationMaster(
     rpcEnv.setupEndpoint("YarnAM", new AMEndpoint(rpcEnv, driverRef))
 
     allocator.allocateResources()
-    val ms = MetricsSystem.createMetricsSystem("applicationMaster", sparkConf, securityMgr)
+    val ms = MetricsSystem.createMetricsSystem(MetricsSystemInstances.APPLICATION_MASTER,
+      sparkConf, securityMgr)
     val prefix = _sparkConf.get(YARN_METRICS_NAMESPACE).getOrElse(appId)
     ms.registerSource(new ApplicationMasterSource(prefix, allocator))
     // do not register static sources in this case as per SPARK-25277

From 9f16af636661ec1f7af057764409fe359da0026a Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Fri, 1 Mar 2019 11:24:08 -0800
Subject: [PATCH 0556/1072] [K8S][MINOR] Log minikube version when running
 integration tests.

Closes #23893 from vanzin/minikube-version.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../k8s/integrationtest/backend/minikube/Minikube.scala       | 4 ++++
 .../backend/minikube/MinikubeTestBackend.scala                | 1 +
 2 files changed, 5 insertions(+)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index 8f406d8ae976..78ef44be7fb7 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -34,6 +34,10 @@ private[spark] object Minikube extends Logging {
   private val MINIKUBE_PREFIX = "minikube: "
   private val MINIKUBE_PATH = ".minikube"
 
+  def logVersion(): Unit = {
+    logInfo(executeMinikube("version").mkString("\n"))
+  }
+
   def getMinikubeIp: String = {
     val outputs = executeMinikube("ip")
       .filter(_.matches("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"))
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
index cb9324179d70..f92977ddacdf 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
@@ -25,6 +25,7 @@ private[spark] object MinikubeTestBackend extends IntegrationTestBackend {
   private var defaultClient: DefaultKubernetesClient = _
 
   override def initialize(): Unit = {
+    Minikube.logVersion()
     val minikubeStatus = Minikube.getMinikubeStatus
     require(minikubeStatus == MinikubeStatus.RUNNING,
         s"Minikube must be running to use the Minikube backend for integration tests." +

From 5fd62ca65a97dd20e8e5057ce9d4bc366a67e238 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Fri, 1 Mar 2019 12:34:15 -0800
Subject: [PATCH 0557/1072] [SPARK-26215][SQL][FOLLOW-UP][MINOR] Fix the
 warning from ANTR4

## What changes were proposed in this pull request?
I see the following new warning from ANTR4 after SPARK-26215 after it added `SCHEMA` keyword in the reserved/unreserved list. This is a minor PR to cleanup the warning.

```
WARNING] warning(125): org/apache/spark/sql/catalyst/parser/SqlBase.g4:784:90: implicit definition of token SCHEMA in parser
[WARNING] .../apache/spark/org/apache/spark/sql/catalyst/parser/SqlBase.g4 [784:90]: implicit definition of token SCHEMA in parser
```
## How was this patch tested?
Manually built catalyst after the fix to verify

Closes #23897 from dilipbiswal/minor_parser_token.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4       | 18 ++++++++++++------
 .../sql/execution/SparkSqlParserSuite.scala    |  8 ++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 69deeb518d6e..4aab9b5ab53d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -78,11 +78,11 @@ singleTableSchema
 statement
     : query                                                            #statementDefault
     | USE db=identifier                                                #use
-    | CREATE DATABASE (IF NOT EXISTS)? identifier
+    | CREATE database (IF NOT EXISTS)? identifier
         (COMMENT comment=STRING)? locationSpec?
         (WITH DBPROPERTIES tablePropertyList)?                         #createDatabase
-    | ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList     #setDatabaseProperties
-    | DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)?      #dropDatabase
+    | ALTER database identifier SET DBPROPERTIES tablePropertyList     #setDatabaseProperties
+    | DROP database (IF EXISTS)? identifier (RESTRICT | CASCADE)?      #dropDatabase
     | createTableHeader ('(' colTypeList ')')? tableProvider
         ((OPTIONS options=tablePropertyList) |
         (PARTITIONED BY partitionColumnNames=identifierList) |
@@ -163,7 +163,7 @@ statement
         (LIKE? (qualifiedName | pattern=STRING))?                      #showFunctions
     | SHOW CREATE TABLE tableIdentifier                                #showCreateTable
     | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName            #describeFunction
-    | (DESC | DESCRIBE) DATABASE EXTENDED? identifier                  #describeDatabase
+    | (DESC | DESCRIBE) database EXTENDED? identifier                  #describeDatabase
     | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
         tableIdentifier partitionSpec? describeColName?                #describeTable
     | REFRESH TABLE tableIdentifier                                    #refreshTable
@@ -274,6 +274,11 @@ partitionVal
     : identifier (EQ constant)?
     ;
 
+database
+    : DATABASE
+    | SCHEMA
+    ;
+
 describeFuncName
     : qualifiedName
     | STRING
@@ -750,7 +755,7 @@ number
     | MINUS? BIGDECIMAL_LITERAL       #bigDecimalLiteral
     ;
 
-// NOTE: You must follow a rule below when you add a new ANTLR taken in this file:
+// NOTE: You must follow a rule below when you add a new ANTLR token in this file:
 //  - All the ANTLR tokens = UNION(`ansiReserved`, `ansiNonReserved`) = UNION(`defaultReserved`, `nonReserved`)
 //
 // Let's say you add a new token `NEWTOKEN` and this is not reserved regardless of a `spark.sql.parser.ansi.enabled`
@@ -1014,7 +1019,8 @@ SORTED: 'SORTED';
 PURGE: 'PURGE';
 INPUTFORMAT: 'INPUTFORMAT';
 OUTPUTFORMAT: 'OUTPUTFORMAT';
-DATABASE: 'DATABASE' | 'SCHEMA';
+SCHEMA: 'SCHEMA';
+DATABASE: 'DATABASE';
 DATABASES: 'DATABASES' | 'SCHEMAS';
 DFS: 'DFS';
 TRUNCATE: 'TRUNCATE';
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 31b9bcdafbab..038871cb1fc1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -387,4 +387,12 @@ class SparkSqlParserSuite extends AnalysisTest {
       "INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
       "Operation not allowed: ALTER VIEW ... AS FROM ... [INSERT INTO ...]+")
   }
+
+  test("database and schema tokens are interchangeable") {
+    assertEqual("CREATE DATABASE foo", parser.parsePlan("CREATE SCHEMA foo"))
+    assertEqual("DROP DATABASE foo", parser.parsePlan("DROP SCHEMA foo"))
+    assertEqual("ALTER DATABASE foo SET DBPROPERTIES ('x' = 'y')",
+      parser.parsePlan("ALTER SCHEMA foo SET DBPROPERTIES ('x' = 'y')"))
+    assertEqual("DESC DATABASE foo", parser.parsePlan("DESC SCHEMA foo"))
+  }
 }

From 81dd21fda99da48ed76adb739a07d1dabf1ffb51 Mon Sep 17 00:00:00 2001
From: manuzhang <owenzhang1990@gmail.com>
Date: Fri, 1 Mar 2019 17:37:58 -0600
Subject: [PATCH 0558/1072] [SPARK-26977][CORE] Fix warn against subclassing
 scala.App

## What changes were proposed in this pull request?

Fix warn against subclassing scala.App

## How was this patch tested?

Manual test

Closes #23903 from manuzhang/fix_submit_warning.

Authored-by: manuzhang <owenzhang1990@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala      | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 45ad7b391b33..dd687b666756 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -826,9 +826,17 @@ private[spark] class SparkSubmit extends Logging {
     val app: SparkApplication = if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
       mainClass.getConstructor().newInstance().asInstanceOf[SparkApplication]
     } else {
-      // SPARK-4170
-      if (classOf[scala.App].isAssignableFrom(mainClass)) {
-        logWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")
+      // Scala object subclassing scala.App has its whole class body executed in the
+      // main method it inherits. Fields of the object will not have been initialized
+      // before the main method has been executed, which will cause problems like SPARK-4170
+      // Note two Java classes are generated, the childMainClass and childMainClass$.
+      // Users will pass in childMainClass which will delegate all invocations to childMainClass$
+      // but it's childMainClass$ that subclasses scala.App and we should check for.
+      Try {
+        if (classOf[scala.App].isAssignableFrom(Utils.classForName(s"$childMainClass$$"))) {
+          logWarning("Subclasses of scala.App may not work correctly. " +
+            "Use a main() method instead.")
+        }
       }
       new JavaMainApplication(mainClass)
     }

From 4a486d671686f1d0796f84238c5537ed9ae23601 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Sat, 2 Mar 2019 11:21:23 +0800
Subject: [PATCH 0559/1072] [SPARK-26982][SQL] Enhance describe framework to
 describe the output of a query.

## What changes were proposed in this pull request?
Currently we can use `df.printSchema` to discover the schema information for a query. We should have a way to describe the output schema of a query using SQL interface.

Example:

DESCRIBE SELECT * FROM desc_table
DESCRIBE QUERY SELECT * FROM desc_table
```SQL

spark-sql> create table desc_table (c1 int comment 'c1-comment', c2 decimal comment 'c2-comment', c3 string);

spark-sql> desc select * from desc_table;
c1	int	        c1-comment
c2	decimal(10,0)	c2-comment
c3	string	        NULL

```
## How was this patch tested?
Added a new test under SQLQueryTestSuite and SparkSqlParserSuite

Closes #23883 from dilipbiswal/dkb_describe_query.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql-reserved-and-non-reserved-keywords.md |   1 +
 .../spark/sql/catalyst/parser/SqlBase.g4      |  10 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   4 +
 .../parser/TableIdentifierParserSuite.scala   |   4 +-
 .../spark/sql/execution/HiveResult.scala      |   4 +-
 .../spark/sql/execution/SparkSqlParser.scala  |   7 +
 .../spark/sql/execution/command/tables.scala  |  81 ++++++---
 .../sql-tests/inputs/describe-query.sql       |  27 +++
 .../sql-tests/results/describe-query.sql.out  | 171 ++++++++++++++++++
 .../apache/spark/sql/SQLQueryTestSuite.scala  |   4 +-
 .../sql/execution/SparkSqlParserSuite.scala   |  17 +-
 .../hive/execution/HiveComparisonTest.scala   |   4 +-
 12 files changed, 291 insertions(+), 43 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/describe-query.sql.out

diff --git a/docs/sql-reserved-and-non-reserved-keywords.md b/docs/sql-reserved-and-non-reserved-keywords.md
index 321fb3f00acb..53eb9988f6c8 100644
--- a/docs/sql-reserved-and-non-reserved-keywords.md
+++ b/docs/sql-reserved-and-non-reserved-keywords.md
@@ -156,6 +156,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>DEREF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DESC</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DESCRIBE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>QUERY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DESCRIPTOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DETERMINISTIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DFS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 4aab9b5ab53d..46fff8077e68 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -166,6 +166,7 @@ statement
     | (DESC | DESCRIBE) database EXTENDED? identifier                  #describeDatabase
     | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
         tableIdentifier partitionSpec? describeColName?                #describeTable
+    | (DESC | DESCRIBE) QUERY? queryToDesc                             #describeQuery
     | REFRESH TABLE tableIdentifier                                    #refreshTable
     | REFRESH (STRING | .*?)                                           #refreshResource
     | CACHE LAZY? TABLE tableIdentifier
@@ -255,6 +256,10 @@ query
     : ctes? queryNoWith
     ;
 
+queryToDesc
+    : queryTerm queryOrganization
+    ;
+
 insertInto
     : INSERT OVERWRITE TABLE tableIdentifier (partitionSpec (IF NOT EXISTS)?)?                              #insertOverwriteTable
     | INSERT INTO TABLE? tableIdentifier partitionSpec?                                                     #insertIntoTable
@@ -785,7 +790,7 @@ ansiNonReserved
     | INPUTFORMAT | INSERT | INTERVAL | ITEMS | KEYS | LAST | LATERAL | LAZY | LIKE | LIMIT | LINES | LIST | LOAD
     | LOCAL | LOCATION | LOCK | LOCKS | LOGICAL | MACRO | MAP | MSCK | NO | NULLS | OF | OPTION | OPTIONS | OUT
     | OUTPUTFORMAT | OVER | OVERWRITE | PARTITION | PARTITIONED | PARTITIONS | PERCENT | PERCENTLIT | PIVOT | PRECEDING
-    | PRINCIPALS | PURGE | RANGE | RECORDREADER | RECORDWRITER | RECOVER | REDUCE | REFRESH | RENAME | REPAIR | REPLACE
+    | PRINCIPALS | PURGE | QUERY | RANGE | RECORDREADER | RECORDWRITER | RECOVER | REDUCE | REFRESH | RENAME | REPAIR | REPLACE
     | RESET | RESTRICT | REVOKE | RLIKE | ROLE | ROLES | ROLLBACK | ROLLUP | ROW | ROWS | SCHEMA | SEPARATED | SERDE
     | SERDEPROPERTIES | SET | SETS | SHOW | SKEWED | SORT | SORTED | START | STATISTICS | STORED | STRATIFY | STRUCT
     | TABLES | TABLESAMPLE | TBLPROPERTIES | TEMPORARY | TERMINATED | TOUCH | TRANSACTION | TRANSACTIONS | TRANSFORM
@@ -810,7 +815,7 @@ nonReserved
     | LATERAL | LAZY | LEADING | LIKE | LIMIT | LINES | LIST | LOAD | LOCAL | LOCATION | LOCK | LOCKS | LOGICAL | MACRO
     | MAP | MSCK | NO | NOT | NULL | NULLS | OF | ONLY | OPTION | OPTIONS | OR | ORDER | OUT | OUTER | OUTPUTFORMAT
     | OVER | OVERLAPS | OVERWRITE | PARTITION | PARTITIONED | PARTITIONS | PERCENTLIT | PIVOT | POSITION | PRECEDING
-    | PRIMARY | PRINCIPALS | PURGE | RANGE | RECORDREADER | RECORDWRITER | RECOVER | REDUCE | REFERENCES | REFRESH
+    | PRIMARY | PRINCIPALS | PURGE | QUERY | RANGE | RECORDREADER | RECORDWRITER | RECOVER | REDUCE | REFERENCES | REFRESH
     | RENAME | REPAIR | REPLACE | RESET | RESTRICT | REVOKE | RLIKE | ROLE | ROLES | ROLLBACK | ROLLUP | ROW | ROWS
     | SELECT | SEPARATED | SERDE | SERDEPROPERTIES | SESSION_USER | SET | SETS | SHOW | SKEWED | SOME | SORT | SORTED
     | START | STATISTICS | STORED | STRATIFY | STRUCT | TABLE | TABLES | TABLESAMPLE | TBLPROPERTIES | TEMPORARY
@@ -888,6 +893,7 @@ WITH: 'WITH';
 VALUES: 'VALUES';
 CREATE: 'CREATE';
 TABLE: 'TABLE';
+QUERY: 'QUERY';
 DIRECTORY: 'DIRECTORY';
 VIEW: 'VIEW';
 REPLACE: 'REPLACE';
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 24bbe116ad89..111057c9a8b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -117,6 +117,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
   }
 
+  override def visitQueryToDesc(ctx: QueryToDescContext): LogicalPlan = withOrigin(ctx) {
+    plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
+  }
+
   /**
    * Create a named logical plan.
    *
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index ff0de0fb7c1f..489b7f328f8f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -47,8 +47,8 @@ class TableIdentifierParserSuite extends SparkFunSuite {
     "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
     "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
     "insert", "int", "into", "is", "pivot", "lateral", "like", "local", "none", "null",
-    "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
-    "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
+    "of", "order", "out", "outer", "partition", "percent", "procedure", "query", "range", "reads",
+    "revoke", "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
     "true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
     "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
     "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", "extract")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index d3934a0e52de..24fba7901ca5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp}
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
-import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExecutedCommandExec, ShowTablesCommand}
+import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -35,7 +35,7 @@ object HiveResult {
    * `SparkSQLDriver` for CLI applications.
    */
   def hiveResultString(executedPlan: SparkPlan): Seq[String] = executedPlan match {
-    case ExecutedCommandExec(desc: DescribeTableCommand) =>
+    case ExecutedCommandExec(_: DescribeCommandBase) =>
       // If it is a describe command for a Hive table, we want to have the output format
       // be similar with Hive.
       executedPlan.executeCollectPublic().map {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 8deb55b00a9d..c17cf5de9066 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -369,6 +369,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     }
   }
 
+  /**
+   * Create a [[DescribeQueryCommand]] logical command.
+   */
+  override def visitDescribeQuery(ctx: DescribeQueryContext): LogicalPlan = withOrigin(ctx) {
+    DescribeQueryCommand(visitQueryToDesc(ctx.queryToDesc()))
+  }
+
   /**
    * Type to keep track of a table header: (identifier, isTemporary, ifNotExists, isExternal).
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index d24e66e58385..8b70e336c14b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -29,12 +29,12 @@ import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, UnresolvedAttribute, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.catalyst.plans.logical.Histogram
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}
 import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
@@ -494,6 +494,34 @@ case class TruncateTableCommand(
   }
 }
 
+abstract class DescribeCommandBase extends RunnableCommand {
+  override val output: Seq[Attribute] = Seq(
+    // Column names are based on Hive.
+    AttributeReference("col_name", StringType, nullable = false,
+      new MetadataBuilder().putString("comment", "name of the column").build())(),
+    AttributeReference("data_type", StringType, nullable = false,
+      new MetadataBuilder().putString("comment", "data type of the column").build())(),
+    AttributeReference("comment", StringType, nullable = true,
+      new MetadataBuilder().putString("comment", "comment of the column").build())()
+  )
+
+  protected def describeSchema(
+      schema: StructType,
+      buffer: ArrayBuffer[Row],
+      header: Boolean): Unit = {
+    if (header) {
+      append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
+    }
+    schema.foreach { column =>
+      append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull)
+    }
+  }
+
+  protected def append(
+    buffer: ArrayBuffer[Row], column: String, dataType: String, comment: String): Unit = {
+    buffer += Row(column, dataType, comment)
+  }
+}
 /**
  * Command that looks like
  * {{{
@@ -504,17 +532,7 @@ case class DescribeTableCommand(
     table: TableIdentifier,
     partitionSpec: TablePartitionSpec,
     isExtended: Boolean)
-  extends RunnableCommand {
-
-  override val output: Seq[Attribute] = Seq(
-    // Column names are based on Hive.
-    AttributeReference("col_name", StringType, nullable = false,
-      new MetadataBuilder().putString("comment", "name of the column").build())(),
-    AttributeReference("data_type", StringType, nullable = false,
-      new MetadataBuilder().putString("comment", "data type of the column").build())(),
-    AttributeReference("comment", StringType, nullable = true,
-      new MetadataBuilder().putString("comment", "comment of the column").build())()
-  )
+  extends DescribeCommandBase {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val result = new ArrayBuffer[Row]
@@ -603,22 +621,31 @@ case class DescribeTableCommand(
     }
     table.storage.toLinkedHashMap.foreach(s => append(buffer, s._1, s._2, ""))
   }
+}
 
-  private def describeSchema(
-      schema: StructType,
-      buffer: ArrayBuffer[Row],
-      header: Boolean): Unit = {
-    if (header) {
-      append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
-    }
-    schema.foreach { column =>
-      append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull)
-    }
-  }
+/**
+ * Command that looks like
+ * {{{
+ *   DESCRIBE [QUERY] statement
+ * }}}
+ *
+ * Parameter 'statement' can be one of the following types :
+ * 1. SELECT statements
+ * 2. SELECT statements inside set operators (UNION, INTERSECT etc)
+ * 3. VALUES statement.
+ * 4. TABLE statement. Example : TABLE table_name
+ * 5. statements of the form 'FROM table SELECT *'
+ *
+ * TODO : support CTEs.
+ */
+case class DescribeQueryCommand(query: LogicalPlan)
+  extends DescribeCommandBase {
 
-  private def append(
-      buffer: ArrayBuffer[Row], column: String, dataType: String, comment: String): Unit = {
-    buffer += Row(column, dataType, comment)
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val result = new ArrayBuffer[Row]
+    val queryExecution = sparkSession.sessionState.executePlan(query)
+    describeSchema(queryExecution.analyzed.schema, result, header = false)
+    result
   }
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
new file mode 100644
index 000000000000..bc144d01cee6
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
@@ -0,0 +1,27 @@
+-- Test tables
+CREATE table  desc_temp1 (key int COMMENT 'column_comment', val string) USING PARQUET;
+CREATE table  desc_temp2 (key int, val string) USING PARQUET;
+
+-- Simple Describe query
+DESC SELECT key, key + 1 as plusone FROM desc_temp1;
+DESC QUERY SELECT * FROM desc_temp2;
+DESC SELECT key, COUNT(*) as count FROM desc_temp1 group by key;
+DESC SELECT 10.00D as col1;
+DESC QUERY SELECT key FROM desc_temp1 UNION ALL select CAST(1 AS DOUBLE);
+DESC QUERY VALUES(1.00D, 'hello') as tab1(col1, col2);
+DESC QUERY FROM desc_temp1 a SELECT *;
+
+
+-- Error cases.
+DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s;
+DESCRIBE QUERY WITH s AS (SELECT * from desc_temp1) SELECT * FROM s;
+DESCRIBE INSERT INTO desc_temp1 values (1, 'val1');
+DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2;
+DESCRIBE
+   FROM desc_temp1 a
+     insert into desc_temp1 select *
+     insert into desc_temp2 select *;
+
+-- cleanup
+DROP TABLE desc_temp1;
+DROP TABLE desc_temp2;
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
new file mode 100644
index 000000000000..36cb31488477
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -0,0 +1,171 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 16
+
+
+-- !query 0
+CREATE table  desc_temp1 (key int COMMENT 'column_comment', val string) USING PARQUET
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE table  desc_temp2 (key int, val string) USING PARQUET
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+DESC SELECT key, key + 1 as plusone FROM desc_temp1
+-- !query 2 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 2 output
+key                 	int                 	column_comment      
+plusone             	int
+
+
+-- !query 3
+DESC QUERY SELECT * FROM desc_temp2
+-- !query 3 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 3 output
+key                 	int                 	                    
+val                 	string
+
+
+-- !query 4
+DESC SELECT key, COUNT(*) as count FROM desc_temp1 group by key
+-- !query 4 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 4 output
+key                 	int                 	column_comment      
+count               	bigint
+
+
+-- !query 5
+DESC SELECT 10.00D as col1
+-- !query 5 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 5 output
+col1                	double
+
+
+-- !query 6
+DESC QUERY SELECT key FROM desc_temp1 UNION ALL select CAST(1 AS DOUBLE)
+-- !query 6 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 6 output
+key                 	double
+
+
+-- !query 7
+DESC QUERY VALUES(1.00D, 'hello') as tab1(col1, col2)
+-- !query 7 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 7 output
+col1                	double              	                    
+col2                	string
+
+
+-- !query 8
+DESC QUERY FROM desc_temp1 a SELECT *
+-- !query 8 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 8 output
+key                 	int                 	column_comment      
+val                 	string
+
+
+-- !query 9
+DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input 'AS' expecting {<EOF>, '.'}(line 1, pos 12)
+
+== SQL ==
+DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
+------------^^^
+
+
+-- !query 10
+DESCRIBE QUERY WITH s AS (SELECT * from desc_temp1) SELECT * FROM s
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input 's' expecting {<EOF>, '.'}(line 1, pos 20)
+
+== SQL ==
+DESCRIBE QUERY WITH s AS (SELECT * from desc_temp1) SELECT * FROM s
+--------------------^^^
+
+
+-- !query 11
+DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21)
+
+== SQL ==
+DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
+---------------------^^^
+
+
+-- !query 12
+DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21)
+
+== SQL ==
+DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
+---------------------^^^
+
+
+-- !query 13
+DESCRIBE
+   FROM desc_temp1 a
+     insert into desc_temp1 select *
+     insert into desc_temp2 select *
+-- !query 13 schema
+struct<>
+-- !query 13 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input 'insert' expecting {<EOF>, '(', ',', 'SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'JOIN', 'CROSS', 'INNER', 'LEFT', 'RIGHT', 'FULL', 'NATURAL', 'PIVOT', 'LATERAL', 'WINDOW', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'SORT', 'CLUSTER', 'DISTRIBUTE', 'ANTI'}(line 3, pos 5)
+
+== SQL ==
+DESCRIBE
+   FROM desc_temp1 a
+     insert into desc_temp1 select *
+-----^^^
+     insert into desc_temp2 select *
+
+
+-- !query 14
+DROP TABLE desc_temp1
+-- !query 14 schema
+struct<>
+-- !query 14 output
+
+
+
+-- !query 15
+DROP TABLE desc_temp2
+-- !query 15 schema
+struct<>
+-- !query 15 output
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 24b312348bd6..62f3f98bf28a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
 import org.apache.spark.sql.execution.HiveResult.hiveResultString
-import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeTableCommand}
+import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.StructType
@@ -277,7 +277,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
     // Returns true if the plan is supposed to be sorted.
     def isSorted(plan: LogicalPlan): Boolean = plan match {
       case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
-      case _: DescribeTableCommand | _: DescribeColumnCommand => true
+      case _: DescribeCommandBase | _: DescribeColumnCommand => true
       case PhysicalOperation(_, _, Sort(_, true, _)) => true
       case _ => plan.children.iterator.exists(isSorted)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 038871cb1fc1..425a96b871ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -240,15 +240,20 @@ class SparkSqlParserSuite extends AnalysisTest {
   }
 
   test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
+    assertEqual("describe t",
+      DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = false))
     assertEqual("describe table t",
-      DescribeTableCommand(
-        TableIdentifier("t"), Map.empty, isExtended = false))
+      DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = false))
     assertEqual("describe table extended t",
-      DescribeTableCommand(
-        TableIdentifier("t"), Map.empty, isExtended = true))
+      DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = true))
     assertEqual("describe table formatted t",
-      DescribeTableCommand(
-        TableIdentifier("t"), Map.empty, isExtended = true))
+      DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = true))
+  }
+
+  test("describe query") {
+    val query = "SELECT * FROM t"
+    assertEqual("DESCRIBE QUERY " + query, DescribeQueryCommand(parser.parsePlan(query)))
+    assertEqual("DESCRIBE " + query, DescribeQueryCommand(parser.parsePlan(query)))
   }
 
   test("describe table column") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 66426824573c..a4587abbf389 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -172,7 +172,7 @@ abstract class HiveComparisonTest
       // and does not return it as a query answer.
       case _: SetCommand => Seq("0")
       case _: ExplainCommand => answer
-      case _: DescribeTableCommand | ShowColumnsCommand(_, _) =>
+      case _: DescribeCommandBase | ShowColumnsCommand(_, _) =>
         // Filter out non-deterministic lines and lines which do not have actual results but
         // can introduce problems because of the way Hive formats these lines.
         // Then, remove empty lines. Do not sort the results.
@@ -375,7 +375,7 @@ abstract class HiveComparisonTest
             if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) &&
                 (!hiveQuery.logical.isInstanceOf[ShowFunctionsCommand]) &&
                 (!hiveQuery.logical.isInstanceOf[DescribeFunctionCommand]) &&
-                (!hiveQuery.logical.isInstanceOf[DescribeTableCommand]) &&
+                (!hiveQuery.logical.isInstanceOf[DescribeCommandBase]) &&
                 preparedHive != catalyst) {
 
               val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive

From a97a19dd9342a651be9551025029788876e9d1d3 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 2 Mar 2019 14:23:53 +0900
Subject: [PATCH 0560/1072] [SPARK-26807][DOCS] Clarify that Pyspark is on PyPi
 now

## What changes were proposed in this pull request?

Docs still say that Spark will be available on PyPi "in the future"; just needs to be updated.

## How was this patch tested?

Doc build

Closes #23933 from srowen/SPARK-26807.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/index.md b/docs/index.md
index 8864239eb164..a85dd9e553ed 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -20,7 +20,7 @@ Please see [Spark Security](security.html) before downloading and running Spark.
 Get Spark from the [downloads page](https://spark.apache.org/downloads.html) of the project website. This documentation is for Spark version {{site.SPARK_VERSION}}. Spark uses Hadoop's client libraries for HDFS and YARN. Downloads are pre-packaged for a handful of popular Hadoop versions.
 Users can also download a "Hadoop free" binary and run Spark with any Hadoop version
 [by augmenting Spark's classpath](hadoop-provided.html).
-Scala and Java users can include Spark in their projects using its Maven coordinates and in the future Python users can also install Spark from PyPI.
+Scala and Java users can include Spark in their projects using its Maven coordinates and Python users can install Spark from PyPI.
 
 
 If you'd like to build Spark from 

From be5d95adc63d0677e79aec50a0dd072b8b3d5128 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Sat, 2 Mar 2019 09:09:28 -0600
Subject: [PATCH 0561/1072] [SPARK-27007][PYTHON] add rawPrediction to
 OneVsRest in PySpark

## What changes were proposed in this pull request?

Add RawPrediction to OneVsRest in PySpark to make it consistent with scala implementation

## How was this patch tested?

Add doctest

Closes #23910 from huaxingao/spark-27007.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/ml/classification.py        | 52 +++++++++++++++-------
 python/pyspark/ml/tests/test_algorithms.py |  2 +-
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 131756bd8ee0..c257ace02cfe 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -28,6 +28,7 @@
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
 from pyspark.ml.wrapper import JavaWrapper
 from pyspark.ml.common import inherit_doc, _java2py, _py2java
+from pyspark.ml.linalg import Vectors
 from pyspark.sql import DataFrame
 from pyspark.sql.functions import udf, when
 from pyspark.sql.types import ArrayType, DoubleType
@@ -1717,7 +1718,8 @@ def weights(self):
         return self._call_java("weights")
 
 
-class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol):
+class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol,
+                      HasRawPredictionCol):
     """
     Parameters for OneVsRest and OneVsRestModel.
     """
@@ -1758,6 +1760,8 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
     >>> df = spark.read.format("libsvm").load(data_path)
     >>> lr = LogisticRegression(regParam=0.01)
     >>> ovr = OneVsRest(classifier=lr)
+    >>> ovr.getRawPredictionCol()
+    'rawPrediction'
     >>> model = ovr.fit(df)
     >>> model.models[0].coefficients
     DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
@@ -1781,16 +1785,18 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
     >>> model2 = OneVsRestModel.load(model_path)
     >>> model2.transform(test0).head().prediction
     0.0
+    >>> model.transform(test2).columns
+    ['features', 'rawPrediction', 'prediction']
 
     .. versionadded:: 2.0.0
     """
 
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 classifier=None, weightCol=None, parallelism=1):
+                 rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 classifier=None, weightCol=None, parallelism=1):
+                 rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1):
         """
         super(OneVsRest, self).__init__()
         self._setDefault(parallelism=1)
@@ -1800,10 +1806,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
     @keyword_only
     @since("2.0.0")
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  classifier=None, weightCol=None, parallelism=1):
+                  rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                  classifier=None, weightCol=None, parallelism=1):
+                  rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1):
         Sets params for OneVsRest.
         """
         kwargs = self._input_kwargs
@@ -1814,8 +1820,6 @@ def _fit(self, dataset):
         featuresCol = self.getFeaturesCol()
         predictionCol = self.getPredictionCol()
         classifier = self.getClassifier()
-        assert isinstance(classifier, HasRawPredictionCol),\
-            "Classifier %s doesn't extend from HasRawPredictionCol." % type(classifier)
 
         numClasses = int(dataset.agg({labelCol: "max"}).head()["max("+labelCol+")"]) + 1
 
@@ -1884,10 +1888,12 @@ def _from_java(cls, java_stage):
         featuresCol = java_stage.getFeaturesCol()
         labelCol = java_stage.getLabelCol()
         predictionCol = java_stage.getPredictionCol()
+        rawPredictionCol = java_stage.getRawPredictionCol()
         classifier = JavaParams._from_java(java_stage.getClassifier())
         parallelism = java_stage.getParallelism()
         py_stage = cls(featuresCol=featuresCol, labelCol=labelCol, predictionCol=predictionCol,
-                       classifier=classifier, parallelism=parallelism)
+                       rawPredictionCol=rawPredictionCol, classifier=classifier,
+                       parallelism=parallelism)
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
@@ -1904,6 +1910,7 @@ def _to_java(self):
         _java_obj.setFeaturesCol(self.getFeaturesCol())
         _java_obj.setLabelCol(self.getLabelCol())
         _java_obj.setPredictionCol(self.getPredictionCol())
+        _java_obj.setRawPredictionCol(self.getRawPredictionCol())
         return _java_obj
 
     def _make_java_param_pair(self, param, value):
@@ -1994,7 +2001,8 @@ def _transform(self, dataset):
         # update the accumulator column with the result of prediction of models
         aggregatedDataset = newDataset
         for index, model in enumerate(self.models):
-            rawPredictionCol = model._call_java("getRawPredictionCol")
+            rawPredictionCol = self.getRawPredictionCol()
+
             columns = origCols + [rawPredictionCol, accColName]
 
             # add temporary column to store intermediate scores and update
@@ -2015,14 +2023,24 @@ def _transform(self, dataset):
         if handlePersistence:
             newDataset.unpersist()
 
-        # output the index of the classifier with highest confidence as prediction
-        labelUDF = udf(
-            lambda predictions: float(max(enumerate(predictions), key=operator.itemgetter(1))[0]),
-            DoubleType())
-
-        # output label and label metadata as prediction
-        return aggregatedDataset.withColumn(
-            self.getPredictionCol(), labelUDF(aggregatedDataset[accColName])).drop(accColName)
+        if self.getRawPredictionCol():
+            def func(predictions):
+                predArray = []
+                for x in predictions:
+                    predArray.append(x)
+                return Vectors.dense(predArray)
+
+            rawPredictionUDF = udf(func)
+            aggregatedDataset = aggregatedDataset.withColumn(
+                self.getRawPredictionCol(), rawPredictionUDF(aggregatedDataset[accColName]))
+
+        if self.getPredictionCol():
+            # output the index of the classifier with highest confidence as prediction
+            labelUDF = udf(lambda predictions: float(max(enumerate(predictions),
+                           key=operator.itemgetter(1))[0]), DoubleType())
+            aggregatedDataset = aggregatedDataset.withColumn(
+                self.getPredictionCol(), labelUDF(aggregatedDataset[accColName]))
+        return aggregatedDataset.drop(accColName)
 
     @since("2.0.0")
     def copy(self, extra=None):
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index 516bb563402e..6082082c1809 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -114,7 +114,7 @@ def test_output_columns(self):
         ovr = OneVsRest(classifier=lr, parallelism=1)
         model = ovr.fit(df)
         output = model.transform(df)
-        self.assertEqual(output.columns, ["label", "features", "prediction"])
+        self.assertEqual(output.columns, ["label", "features", "rawPrediction", "prediction"])
 
     def test_parallelism_doesnt_change_output(self):
         df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),

From d00eca75b37e6b21afbd183f92fbd17305bf1683 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Sat, 2 Mar 2019 08:14:06 -0800
Subject: [PATCH 0562/1072] [SPARK-26048][BUILD] Enable flume profile when
 creating 2.x releases.

Closes #23931 from vanzin/SPARK-26048.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/create-release/release-build.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 3b5f9ef2afe8..7b32a9bd767f 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -116,7 +116,7 @@ SCALA_2_11_PROFILES=
 if [[ $SPARK_VERSION > "2.3" ]]; then
   BASE_PROFILES="$BASE_PROFILES -Pkubernetes"
   if [[ $SPARK_VERSION < "3.0." ]]; then
-    SCALA_2_11_PROFILES="-Pkafka-0-8"
+    SCALA_2_11_PROFILES="-Pkafka-0-8 -Pflume"
   fi
 else
   PUBLISH_SCALA_2_10=1
@@ -124,6 +124,9 @@ fi
 
 PUBLISH_SCALA_2_12=0
 SCALA_2_12_PROFILES="-Pscala-2.12"
+if [[ $SPARK_VERSION < "3.0." ]]; then
+  SCALA_2_12_PROFILES="-Pflume"
+fi
 if [[ $SPARK_VERSION > "2.4" ]]; then
   PUBLISH_SCALA_2_12=1
 fi

From d8754df2bf60d87f6a69de70f64a0f722c36e6d8 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 2 Mar 2019 17:28:37 -0800
Subject: [PATCH 0563/1072] [SPARK-27029][BUILD] Update Thrift to 0.12.0

## What changes were proposed in this pull request?

Update Thrift to 0.12.0 to pick up bug and security fixes.
Changes: https://github.com/apache/thrift/blob/master/CHANGES.md
The important one is for https://issues.apache.org/jira/browse/THRIFT-4506

## How was this patch tested?

Existing tests. A quick local test suggests this works.

Closes #23935 from srowen/SPARK-27029.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.1 | 2 +-
 pom.xml                        | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index b8817ca971b2..829aa8e2a13c 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -141,7 +141,7 @@ kubernetes-client-4.1.0.jar
 kubernetes-model-4.1.0.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
-libthrift-0.9.3.jar
+libthrift-0.12.0.jar
 log4j-1.2.17.jar
 logging-interceptor-3.9.1.jar
 lz4-java-1.5.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 9ea063355650..3aed5ff8c32c 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -155,7 +155,7 @@ kubernetes-client-4.1.0.jar
 kubernetes-model-4.1.0.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
-libthrift-0.9.3.jar
+libthrift-0.12.0.jar
 log4j-1.2.17.jar
 logging-interceptor-3.9.1.jar
 lz4-java-1.5.0.jar
diff --git a/pom.xml b/pom.xml
index b7fec2d95ac1..05a45bf7744d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -179,7 +179,7 @@
     <joda.version>2.9.3</joda.version>
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
-    <libthrift.version>0.9.3</libthrift.version>
+    <libthrift.version>0.12.0</libthrift.version>
     <antlr4.version>4.7.1</antlr4.version>
     <jpam.version>1.1</jpam.version>
     <selenium.version>2.52.0</selenium.version>
@@ -1945,7 +1945,7 @@
       <dependency>
         <groupId>org.apache.thrift</groupId>
         <artifactId>libfb303</artifactId>
-        <version>${libthrift.version}</version>
+        <version>0.9.3</version>
         <exclusions>
           <exclusion>
             <groupId>org.slf4j</groupId>

From 04ad559ab64df19ddfdf9e6fe0de3f8484daf86e Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Sun, 3 Mar 2019 10:02:25 -0600
Subject: [PATCH 0564/1072] [SPARK-27016][SQL][BUILD] Treat all antlr warnings
 as errors while generating parser from the sql grammar file.

## What changes were proposed in this pull request?
Use the maven plugin option `treatWarningsAsErrors` to make sure the warnings are treated as errors while generating the parser file. In the absence of it, we may inadvertently introducing problems while making grammar changes.  Please refer to [PR-23897](https://github.com/apache/spark/pull/23897) to know more about the context.
## How was this patch tested?
We can use two ways to build Spark 1) sbt 2) Maven
This PR, we made a change to configure the maven antlr plugin to include a parameter that makes antlr4 report error on warning. However, when spark is built using sbt, we use the sbt antlr plugin which does not allow us to pass this additional compilation flag.  More info on sbt-antlr plugin can be found at [link](https://github.com/ihji/sbt-antlr4/blob/master/src/main/scala/com/simplytyped/Antlr4Plugin.scala)
In summary, this fix only applicable when we use maven to build.

Closes #23925 from dilipbiswal/antlr_fix.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 sql/catalyst/pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 20cc5d03fbe5..15fc5e3c2686 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -156,6 +156,7 @@
         <configuration>
           <visitor>true</visitor>
           <sourceDirectory>../catalyst/src/main/antlr4</sourceDirectory>
+          <treatWarningsAsErrors>true</treatWarningsAsErrors>
         </configuration>
       </plugin>
     </plugins>

From 34f606678a90e860711a5f9f9618cf00788c9eb0 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Mon, 4 Mar 2019 10:45:48 +0800
Subject: [PATCH 0565/1072] [SPARK-27001][SQL] Refactor "serializerFor" method
 between ScalaReflection and JavaTypeInference

## What changes were proposed in this pull request?

This patch proposes refactoring `serializerFor` method between `ScalaReflection` and `JavaTypeInference`, being consistent with what we refactored for `deserializerFor` in #23854.

This patch also extracts the logic on recording walk type path since the logic is duplicated across `serializerFor` and `deserializerFor` with `ScalaReflection` and `JavaTypeInference`.

## How was this patch tested?

Existing tests.

Closes #23908 from HeartSaVioR/SPARK-27001.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/DeserializerBuildHelper.scala    |  32 +--
 .../sql/catalyst/JavaTypeInference.scala      | 143 ++++--------
 .../spark/sql/catalyst/ScalaReflection.scala  | 220 ++++++------------
 .../sql/catalyst/SerializerBuildHelper.scala  | 198 ++++++++++++++++
 .../spark/sql/catalyst/WalkedTypePath.scala   |  57 +++++
 .../sql/catalyst/analysis/Analyzer.scala      |   2 +-
 .../catalyst/encoders/ExpressionEncoder.scala |   2 +-
 .../sql/catalyst/encoders/RowEncoder.scala    |   2 +-
 .../spark/sql/catalyst/expressions/Cast.scala |   4 +-
 .../expressions/CodeGenerationSuite.scala     |   2 +-
 .../expressions/NullExpressionsSuite.scala    |   2 +-
 11 files changed, 394 insertions(+), 270 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
index d75d3ca918c4..e55c25c4b0c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
@@ -29,7 +29,7 @@ object DeserializerBuildHelper {
       path: Expression,
       part: String,
       dataType: DataType,
-      walkedTypePath: Seq[String]): Expression = {
+      walkedTypePath: WalkedTypePath): Expression = {
     val newPath = UnresolvedExtractValue(path, expressions.Literal(part))
     upCastToExpectedType(newPath, dataType, walkedTypePath)
   }
@@ -39,40 +39,30 @@ object DeserializerBuildHelper {
       path: Expression,
       ordinal: Int,
       dataType: DataType,
-      walkedTypePath: Seq[String]): Expression = {
+      walkedTypePath: WalkedTypePath): Expression = {
     val newPath = GetStructField(path, ordinal)
     upCastToExpectedType(newPath, dataType, walkedTypePath)
   }
 
-  def deserializerForWithNullSafety(
-      expr: Expression,
-      dataType: DataType,
-      nullable: Boolean,
-      walkedTypePath: Seq[String],
-      funcForCreatingNewExpr: (Expression, Seq[String]) => Expression): Expression = {
-    val newExpr = funcForCreatingNewExpr(expr, walkedTypePath)
-    expressionWithNullSafety(newExpr, nullable, walkedTypePath)
-  }
-
   def deserializerForWithNullSafetyAndUpcast(
       expr: Expression,
       dataType: DataType,
       nullable: Boolean,
-      walkedTypePath: Seq[String],
-      funcForCreatingNewExpr: (Expression, Seq[String]) => Expression): Expression = {
+      walkedTypePath: WalkedTypePath,
+      funcForCreatingDeserializer: (Expression, WalkedTypePath) => Expression): Expression = {
     val casted = upCastToExpectedType(expr, dataType, walkedTypePath)
-    deserializerForWithNullSafety(casted, dataType, nullable, walkedTypePath,
-      funcForCreatingNewExpr)
+    expressionWithNullSafety(funcForCreatingDeserializer(casted, walkedTypePath),
+      nullable, walkedTypePath)
   }
 
-  private def expressionWithNullSafety(
+  def expressionWithNullSafety(
       expr: Expression,
       nullable: Boolean,
-      walkedTypePath: Seq[String]): Expression = {
+      walkedTypePath: WalkedTypePath): Expression = {
     if (nullable) {
       expr
     } else {
-      AssertNotNull(expr, walkedTypePath)
+      AssertNotNull(expr, walkedTypePath.getPaths)
     }
   }
 
@@ -167,10 +157,10 @@ object DeserializerBuildHelper {
   private def upCastToExpectedType(
       expr: Expression,
       expected: DataType,
-      walkedTypePath: Seq[String]): Expression = expected match {
+      walkedTypePath: WalkedTypePath): Expression = expected match {
     case _: StructType => expr
     case _: ArrayType => expr
     case _: MapType => expr
-    case _ => UpCast(expr, expected, walkedTypePath)
+    case _ => UpCast(expr, expected, walkedTypePath.getPaths)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 87b2ae8cdf7e..933a6dbeb705 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -27,12 +27,12 @@ import scala.language.existentials
 import com.google.common.reflect.TypeToken
 
 import org.apache.spark.sql.catalyst.DeserializerBuildHelper._
+import org.apache.spark.sql.catalyst.SerializerBuildHelper._
 import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects._
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * Type-inference utilities for POJOs and Java collections.
@@ -195,7 +195,7 @@ object JavaTypeInference {
    */
   def deserializerFor(beanClass: Class[_]): Expression = {
     val typeToken = TypeToken.of(beanClass)
-    val walkedTypePath = s"""- root class: "${beanClass.getCanonicalName}"""" :: Nil
+    val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName)
     val (dataType, nullable) = inferDataType(typeToken)
 
     // Assumes we are deserializing the first column of a row.
@@ -208,7 +208,7 @@ object JavaTypeInference {
   private def deserializerFor(
       typeToken: TypeToken[_],
       path: Expression,
-      walkedTypePath: Seq[String]): Expression = {
+      walkedTypePath: WalkedTypePath): Expression = {
     typeToken.getRawType match {
       case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
 
@@ -244,8 +244,7 @@ object JavaTypeInference {
 
       case c if c.isArray =>
         val elementType = c.getComponentType
-        val newTypePath = s"""- array element class: "${elementType.getCanonicalName}"""" +:
-          walkedTypePath
+        val newTypePath = walkedTypePath.recordArray(elementType.getCanonicalName)
         val (dataType, elementNullable) = inferDataType(elementType)
         val mapFunction: Expression => Expression = element => {
           // upcast the array element to the data type the encoder expected.
@@ -274,8 +273,7 @@ object JavaTypeInference {
 
       case c if listType.isAssignableFrom(typeToken) =>
         val et = elementType(typeToken)
-        val newTypePath = s"""- array element class: "${et.getType.getTypeName}"""" +:
-          walkedTypePath
+        val newTypePath = walkedTypePath.recordArray(et.getType.getTypeName)
         val (dataType, elementNullable) = inferDataType(et)
         val mapFunction: Expression => Expression = element => {
           // upcast the array element to the data type the encoder expected.
@@ -291,8 +289,8 @@ object JavaTypeInference {
 
       case _ if mapType.isAssignableFrom(typeToken) =>
         val (keyType, valueType) = mapKeyValueType(typeToken)
-        val newTypePath = (s"""- map key class: "${keyType.getType.getTypeName}"""" +
-          s""", value class: "${valueType.getType.getTypeName}"""") +: walkedTypePath
+        val newTypePath = walkedTypePath.recordMap(keyType.getType.getTypeName,
+          valueType.getType.getTypeName)
 
         val keyData =
           Invoke(
@@ -328,15 +326,12 @@ object JavaTypeInference {
           val fieldName = p.getName
           val fieldType = typeToken.method(p.getReadMethod).getReturnType
           val (dataType, nullable) = inferDataType(fieldType)
-          val newTypePath = (s"""- field (class: "${fieldType.getType.getTypeName}"""" +
-            s""", name: "$fieldName")""") +: walkedTypePath
-          val setter = deserializerForWithNullSafety(
-            path,
-            dataType,
+          val newTypePath = walkedTypePath.recordField(fieldType.getType.getTypeName, fieldName)
+          val setter = expressionWithNullSafety(
+            deserializerFor(fieldType, addToPath(path, fieldName, dataType, newTypePath),
+              newTypePath),
             nullable = nullable,
-            newTypePath,
-            (expr, typePath) => deserializerFor(fieldType,
-              addToPath(expr, fieldName, dataType, typePath), typePath))
+            newTypePath)
           p.getWriteMethod.getName -> setter
         }.toMap
 
@@ -367,12 +362,10 @@ object JavaTypeInference {
     def toCatalystArray(input: Expression, elementType: TypeToken[_]): Expression = {
       val (dataType, nullable) = inferDataType(elementType)
       if (ScalaReflection.isNativeType(dataType)) {
-        NewInstance(
-          classOf[GenericArrayData],
-          input :: Nil,
-          dataType = ArrayType(dataType, nullable))
+        createSerializerForGenericArray(input, dataType, nullable = nullable)
       } else {
-        MapObjects(serializerFor(_, elementType), input, ObjectType(elementType.getRawType))
+        createSerializerForMapObjects(input, ObjectType(elementType.getRawType),
+          serializerFor(_, elementType))
       }
     }
 
@@ -380,60 +373,26 @@ object JavaTypeInference {
       inputObject
     } else {
       typeToken.getRawType match {
-        case c if c == classOf[String] =>
-          StaticInvoke(
-            classOf[UTF8String],
-            StringType,
-            "fromString",
-            inputObject :: Nil,
-            returnNullable = false)
-
-        case c if c == classOf[java.sql.Timestamp] =>
-          StaticInvoke(
-            DateTimeUtils.getClass,
-            TimestampType,
-            "fromJavaTimestamp",
-            inputObject :: Nil,
-            returnNullable = false)
-
-        case c if c == classOf[java.time.LocalDate] =>
-          StaticInvoke(
-            DateTimeUtils.getClass,
-            DateType,
-            "localDateToDays",
-            inputObject :: Nil,
-            returnNullable = false)
-
-        case c if c == classOf[java.sql.Date] =>
-          StaticInvoke(
-            DateTimeUtils.getClass,
-            DateType,
-            "fromJavaDate",
-            inputObject :: Nil,
-            returnNullable = false)
+        case c if c == classOf[String] => createSerializerForString(inputObject)
+
+        case c if c == classOf[java.time.Instant] => createSerializerForJavaInstant(inputObject)
+
+        case c if c == classOf[java.sql.Timestamp] => createSerializerForSqlTimestamp(inputObject)
+
+        case c if c == classOf[java.time.LocalDate] => createSerializerForJavaLocalDate(inputObject)
+
+        case c if c == classOf[java.sql.Date] => createSerializerForSqlDate(inputObject)
 
         case c if c == classOf[java.math.BigDecimal] =>
-          StaticInvoke(
-            Decimal.getClass,
-            DecimalType.SYSTEM_DEFAULT,
-            "apply",
-            inputObject :: Nil,
-            returnNullable = false)
-
-        case c if c == classOf[java.lang.Boolean] =>
-          Invoke(inputObject, "booleanValue", BooleanType)
-        case c if c == classOf[java.lang.Byte] =>
-          Invoke(inputObject, "byteValue", ByteType)
-        case c if c == classOf[java.lang.Short] =>
-          Invoke(inputObject, "shortValue", ShortType)
-        case c if c == classOf[java.lang.Integer] =>
-          Invoke(inputObject, "intValue", IntegerType)
-        case c if c == classOf[java.lang.Long] =>
-          Invoke(inputObject, "longValue", LongType)
-        case c if c == classOf[java.lang.Float] =>
-          Invoke(inputObject, "floatValue", FloatType)
-        case c if c == classOf[java.lang.Double] =>
-          Invoke(inputObject, "doubleValue", DoubleType)
+          createSerializerForJavaBigDecimal(inputObject)
+
+        case c if c == classOf[java.lang.Boolean] => createSerializerForBoolean(inputObject)
+        case c if c == classOf[java.lang.Byte] => createSerializerForByte(inputObject)
+        case c if c == classOf[java.lang.Short] => createSerializerForShort(inputObject)
+        case c if c == classOf[java.lang.Integer] => createSerializerForInteger(inputObject)
+        case c if c == classOf[java.lang.Long] => createSerializerForLong(inputObject)
+        case c if c == classOf[java.lang.Float] => createSerializerForFloat(inputObject)
+        case c if c == classOf[java.lang.Double] => createSerializerForDouble(inputObject)
 
         case _ if typeToken.isArray =>
           toCatalystArray(inputObject, typeToken.getComponentType)
@@ -444,38 +403,34 @@ object JavaTypeInference {
         case _ if mapType.isAssignableFrom(typeToken) =>
           val (keyType, valueType) = mapKeyValueType(typeToken)
 
-          ExternalMapToCatalyst(
+          createSerializerForMap(
             inputObject,
-            ObjectType(keyType.getRawType),
-            serializerFor(_, keyType),
-            keyNullable = true,
-            ObjectType(valueType.getRawType),
-            serializerFor(_, valueType),
-            valueNullable = true
+            MapElementInformation(
+              ObjectType(keyType.getRawType),
+              nullable = true,
+              serializerFor(_, keyType)),
+            MapElementInformation(
+              ObjectType(valueType.getRawType),
+              nullable = true,
+              serializerFor(_, valueType))
           )
 
         case other if other.isEnum =>
-          StaticInvoke(
-            classOf[UTF8String],
-            StringType,
-            "fromString",
-            Invoke(inputObject, "name", ObjectType(classOf[String]), returnNullable = false) :: Nil,
-            returnNullable = false)
+          createSerializerForString(
+            Invoke(inputObject, "name", ObjectType(classOf[String]), returnNullable = false))
 
         case other =>
           val properties = getJavaBeanReadableAndWritableProperties(other)
-          val nonNullOutput = CreateNamedStruct(properties.flatMap { p =>
+          val fields = properties.map { p =>
             val fieldName = p.getName
             val fieldType = typeToken.method(p.getReadMethod).getReturnType
             val fieldValue = Invoke(
               inputObject,
               p.getReadMethod.getName,
               inferExternalType(fieldType.getRawType))
-            expressions.Literal(fieldName) :: serializerFor(fieldValue, fieldType) :: Nil
-          })
-
-          val nullOutput = expressions.Literal.create(null, nonNullOutput.dataType)
-          expressions.If(IsNull(inputObject), nullOutput, nonNullOutput)
+            (fieldName, serializerFor(fieldValue, fieldType))
+          }
+          createSerializerForObject(inputObject, fields)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index bbddd3312a58..5b3109af6a53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -21,10 +21,11 @@ import org.apache.commons.lang3.reflect.ConstructorUtils
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.DeserializerBuildHelper._
+import org.apache.spark.sql.catalyst.SerializerBuildHelper._
 import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
 import org.apache.spark.sql.catalyst.expressions.{Expression, _}
 import org.apache.spark.sql.catalyst.expressions.objects._
-import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils, GenericArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
@@ -136,7 +137,7 @@ object ScalaReflection extends ScalaReflection {
    */
   def deserializerForType(tpe: `Type`): Expression = {
     val clsName = getClassNameFromType(tpe)
-    val walkedTypePath = s"""- root class: "$clsName"""" :: Nil
+    val walkedTypePath = new WalkedTypePath().recordRoot(clsName)
     val Schema(dataType, nullable) = schemaFor(tpe)
 
     // Assumes we are deserializing the first column of a row.
@@ -156,14 +157,14 @@ object ScalaReflection extends ScalaReflection {
   private def deserializerFor(
       tpe: `Type`,
       path: Expression,
-      walkedTypePath: Seq[String]): Expression = cleanUpReflectionObjects {
+      walkedTypePath: WalkedTypePath): Expression = cleanUpReflectionObjects {
     tpe.dealias match {
       case t if !dataTypeFor(t).isInstanceOf[ObjectType] => path
 
       case t if t <:< localTypeOf[Option[_]] =>
         val TypeRef(_, _, Seq(optType)) = t
         val className = getClassNameFromType(optType)
-        val newTypePath = s"""- option value class: "$className"""" +: walkedTypePath
+        val newTypePath = walkedTypePath.recordOption(className)
         WrapOption(deserializerFor(optType, path, newTypePath), dataTypeFor(optType))
 
       case t if t <:< localTypeOf[java.lang.Integer] =>
@@ -225,7 +226,7 @@ object ScalaReflection extends ScalaReflection {
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, elementNullable) = schemaFor(elementType)
         val className = getClassNameFromType(elementType)
-        val newTypePath = s"""- array element class: "$className"""" +: walkedTypePath
+        val newTypePath = walkedTypePath.recordArray(className)
 
         val mapFunction: Expression => Expression = element => {
           // upcast the array element to the data type the encoder expected.
@@ -260,7 +261,7 @@ object ScalaReflection extends ScalaReflection {
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, elementNullable) = schemaFor(elementType)
         val className = getClassNameFromType(elementType)
-        val newTypePath = s"""- array element class: "$className"""" +: walkedTypePath
+        val newTypePath = walkedTypePath.recordArray(className)
 
         val mapFunction: Expression => Expression = element => {
           deserializerForWithNullSafetyAndUpcast(
@@ -286,8 +287,7 @@ object ScalaReflection extends ScalaReflection {
         val classNameForKey = getClassNameFromType(keyType)
         val classNameForValue = getClassNameFromType(valueType)
 
-        val newTypePath = (s"""- map key class: "${classNameForKey}"""" +
-          s""", value class: "${classNameForValue}"""") +: walkedTypePath
+        val newTypePath = walkedTypePath.recordMap(classNameForKey, classNameForValue)
 
         UnresolvedCatalystToExternalMap(
           path,
@@ -322,28 +322,24 @@ object ScalaReflection extends ScalaReflection {
         val arguments = params.zipWithIndex.map { case ((fieldName, fieldType), i) =>
           val Schema(dataType, nullable) = schemaFor(fieldType)
           val clsName = getClassNameFromType(fieldType)
-          val newTypePath = (s"""- field (class: "$clsName", """ +
-              s"""name: "$fieldName")""") +: walkedTypePath
+          val newTypePath = walkedTypePath.recordField(clsName, fieldName)
 
           // For tuples, we based grab the inner fields by ordinal instead of name.
-          deserializerForWithNullSafety(
-            path,
-            dataType,
+          val newPath = if (cls.getName startsWith "scala.Tuple") {
+            deserializerFor(
+              fieldType,
+              addToPathOrdinal(path, i, dataType, newTypePath),
+              newTypePath)
+          } else {
+            deserializerFor(
+              fieldType,
+              addToPath(path, fieldName, dataType, newTypePath),
+              newTypePath)
+          }
+          expressionWithNullSafety(
+            newPath,
             nullable = nullable,
-            newTypePath,
-            (expr, typePath) => {
-              if (cls.getName startsWith "scala.Tuple") {
-                deserializerFor(
-                  fieldType,
-                  addToPathOrdinal(expr, i, dataType, typePath),
-                  newTypePath)
-              } else {
-                deserializerFor(
-                  fieldType,
-                  addToPath(expr, fieldName, dataType, typePath),
-                  newTypePath)
-              }
-            })
+            newTypePath)
         }
 
         val newInstance = NewInstance(cls, arguments, ObjectType(cls), propagateNull = false)
@@ -371,7 +367,7 @@ object ScalaReflection extends ScalaReflection {
    */
   def serializerForType(tpe: `Type`): Expression = ScalaReflection.cleanUpReflectionObjects {
     val clsName = getClassNameFromType(tpe)
-    val walkedTypePath = s"""- root class: "$clsName"""" :: Nil
+    val walkedTypePath = new WalkedTypePath().recordRoot(clsName)
 
     // The input object to `ExpressionEncoder` is located at first column of an row.
     val isPrimitive = tpe.typeSymbol.asClass.isPrimitive
@@ -387,38 +383,28 @@ object ScalaReflection extends ScalaReflection {
   private def serializerFor(
       inputObject: Expression,
       tpe: `Type`,
-      walkedTypePath: Seq[String],
+      walkedTypePath: WalkedTypePath,
       seenTypeSet: Set[`Type`] = Set.empty): Expression = cleanUpReflectionObjects {
 
     def toCatalystArray(input: Expression, elementType: `Type`): Expression = {
       dataTypeFor(elementType) match {
         case dt: ObjectType =>
           val clsName = getClassNameFromType(elementType)
-          val newPath = s"""- array element class: "$clsName"""" +: walkedTypePath
-          MapObjects(serializerFor(_, elementType, newPath, seenTypeSet), input, dt)
+          val newPath = walkedTypePath.recordArray(clsName)
+          createSerializerForMapObjects(input, dt,
+            serializerFor(_, elementType, newPath, seenTypeSet))
 
          case dt @ (BooleanType | ByteType | ShortType | IntegerType | LongType |
                     FloatType | DoubleType) =>
           val cls = input.dataType.asInstanceOf[ObjectType].cls
           if (cls.isArray && cls.getComponentType.isPrimitive) {
-            StaticInvoke(
-              classOf[UnsafeArrayData],
-              ArrayType(dt, false),
-              "fromPrimitiveArray",
-              input :: Nil,
-              returnNullable = false)
+            createSerializerForPrimitiveArray(input, dt)
           } else {
-            NewInstance(
-              classOf[GenericArrayData],
-              input :: Nil,
-              dataType = ArrayType(dt, schemaFor(elementType).nullable))
+            createSerializerForGenericArray(input, dt, nullable = schemaFor(elementType).nullable)
           }
 
         case dt =>
-          NewInstance(
-            classOf[GenericArrayData],
-            input :: Nil,
-            dataType = ArrayType(dt, schemaFor(elementType).nullable))
+          createSerializerForGenericArray(input, dt, nullable = schemaFor(elementType).nullable)
       }
     }
 
@@ -428,7 +414,7 @@ object ScalaReflection extends ScalaReflection {
       case t if t <:< localTypeOf[Option[_]] =>
         val TypeRef(_, _, Seq(optType)) = t
         val className = getClassNameFromType(optType)
-        val newPath = s"""- option value class: "$className"""" +: walkedTypePath
+        val newPath = walkedTypePath.recordOption(className)
         val unwrapped = UnwrapOption(dataTypeFor(optType), inputObject)
         serializerFor(unwrapped, optType, newPath, seenTypeSet)
 
@@ -447,17 +433,20 @@ object ScalaReflection extends ScalaReflection {
         val TypeRef(_, _, Seq(keyType, valueType)) = t
         val keyClsName = getClassNameFromType(keyType)
         val valueClsName = getClassNameFromType(valueType)
-        val keyPath = s"""- map key class: "$keyClsName"""" +: walkedTypePath
-        val valuePath = s"""- map value class: "$valueClsName"""" +: walkedTypePath
+        val keyPath = walkedTypePath.recordKeyForMap(keyClsName)
+        val valuePath = walkedTypePath.recordValueForMap(valueClsName)
 
-        ExternalMapToCatalyst(
+        createSerializerForMap(
           inputObject,
-          dataTypeFor(keyType),
-          serializerFor(_, keyType, keyPath, seenTypeSet),
-          keyNullable = !keyType.typeSymbol.asClass.isPrimitive,
-          dataTypeFor(valueType),
-          serializerFor(_, valueType, valuePath, seenTypeSet),
-          valueNullable = !valueType.typeSymbol.asClass.isPrimitive)
+          MapElementInformation(
+            dataTypeFor(keyType),
+            nullable = !keyType.typeSymbol.asClass.isPrimitive,
+            serializerFor(_, keyType, keyPath, seenTypeSet)),
+          MapElementInformation(
+            dataTypeFor(valueType),
+            nullable = !valueType.typeSymbol.asClass.isPrimitive,
+            serializerFor(_, valueType, valuePath, seenTypeSet))
+        )
 
       case t if t <:< localTypeOf[scala.collection.Set[_]] =>
         val TypeRef(_, _, Seq(elementType)) = t
@@ -472,110 +461,47 @@ object ScalaReflection extends ScalaReflection {
 
         toCatalystArray(newInput, elementType)
 
-      case t if t <:< localTypeOf[String] =>
-        StaticInvoke(
-          classOf[UTF8String],
-          StringType,
-          "fromString",
-          inputObject :: Nil,
-          returnNullable = false)
+      case t if t <:< localTypeOf[String] => createSerializerForString(inputObject)
 
-      case t if t <:< localTypeOf[java.time.Instant] =>
-        StaticInvoke(
-          DateTimeUtils.getClass,
-          TimestampType,
-          "instantToMicros",
-          inputObject :: Nil,
-          returnNullable = false)
+      case t if t <:< localTypeOf[java.time.Instant] => createSerializerForJavaInstant(inputObject)
 
       case t if t <:< localTypeOf[java.sql.Timestamp] =>
-        StaticInvoke(
-          DateTimeUtils.getClass,
-          TimestampType,
-          "fromJavaTimestamp",
-          inputObject :: Nil,
-          returnNullable = false)
+        createSerializerForSqlTimestamp(inputObject)
 
       case t if t <:< localTypeOf[java.time.LocalDate] =>
-        StaticInvoke(
-          DateTimeUtils.getClass,
-          DateType,
-          "localDateToDays",
-          inputObject :: Nil,
-          returnNullable = false)
+        createSerializerForJavaLocalDate(inputObject)
 
-      case t if t <:< localTypeOf[java.sql.Date] =>
-        StaticInvoke(
-          DateTimeUtils.getClass,
-          DateType,
-          "fromJavaDate",
-          inputObject :: Nil,
-          returnNullable = false)
+      case t if t <:< localTypeOf[java.sql.Date] => createSerializerForSqlDate(inputObject)
 
-      case t if t <:< localTypeOf[BigDecimal] =>
-        StaticInvoke(
-          Decimal.getClass,
-          DecimalType.SYSTEM_DEFAULT,
-          "apply",
-          inputObject :: Nil,
-          returnNullable = false)
+      case t if t <:< localTypeOf[BigDecimal] => createSerializerForScalaBigDecimal(inputObject)
 
       case t if t <:< localTypeOf[java.math.BigDecimal] =>
-        StaticInvoke(
-          Decimal.getClass,
-          DecimalType.SYSTEM_DEFAULT,
-          "apply",
-          inputObject :: Nil,
-          returnNullable = false)
+        createSerializerForJavaBigDecimal(inputObject)
 
       case t if t <:< localTypeOf[java.math.BigInteger] =>
-        StaticInvoke(
-          Decimal.getClass,
-          DecimalType.BigIntDecimal,
-          "apply",
-          inputObject :: Nil,
-          returnNullable = false)
+        createSerializerForJavaBigInteger(inputObject)
 
-      case t if t <:< localTypeOf[scala.math.BigInt] =>
-        StaticInvoke(
-          Decimal.getClass,
-          DecimalType.BigIntDecimal,
-          "apply",
-          inputObject :: Nil,
-          returnNullable = false)
+      case t if t <:< localTypeOf[scala.math.BigInt] => createSerializerForScalaBigInt(inputObject)
 
-      case t if t <:< localTypeOf[java.lang.Integer] =>
-        Invoke(inputObject, "intValue", IntegerType)
-      case t if t <:< localTypeOf[java.lang.Long] =>
-        Invoke(inputObject, "longValue", LongType)
-      case t if t <:< localTypeOf[java.lang.Double] =>
-        Invoke(inputObject, "doubleValue", DoubleType)
-      case t if t <:< localTypeOf[java.lang.Float] =>
-        Invoke(inputObject, "floatValue", FloatType)
-      case t if t <:< localTypeOf[java.lang.Short] =>
-        Invoke(inputObject, "shortValue", ShortType)
-      case t if t <:< localTypeOf[java.lang.Byte] =>
-        Invoke(inputObject, "byteValue", ByteType)
-      case t if t <:< localTypeOf[java.lang.Boolean] =>
-        Invoke(inputObject, "booleanValue", BooleanType)
+      case t if t <:< localTypeOf[java.lang.Integer] => createSerializerForInteger(inputObject)
+      case t if t <:< localTypeOf[java.lang.Long] => createSerializerForLong(inputObject)
+      case t if t <:< localTypeOf[java.lang.Double] => createSerializerForDouble(inputObject)
+      case t if t <:< localTypeOf[java.lang.Float] => createSerializerForFloat(inputObject)
+      case t if t <:< localTypeOf[java.lang.Short] => createSerializerForShort(inputObject)
+      case t if t <:< localTypeOf[java.lang.Byte] => createSerializerForByte(inputObject)
+      case t if t <:< localTypeOf[java.lang.Boolean] => createSerializerForBoolean(inputObject)
 
       case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t)
           .getAnnotation(classOf[SQLUserDefinedType]).udt().getConstructor().newInstance()
-        val obj = NewInstance(
-          udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt(),
-          Nil,
-          dataType = ObjectType(udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()))
-        Invoke(obj, "serialize", udt, inputObject :: Nil)
+        val udtClass = udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()
+        createSerializerForUserDefinedType(inputObject, udt, udtClass)
 
       case t if UDTRegistration.exists(getClassNameFromType(t)) =>
         val udt = UDTRegistration.getUDTFor(getClassNameFromType(t)).get.getConstructor().
           newInstance().asInstanceOf[UserDefinedType[_]]
-        val obj = NewInstance(
-          udt.getClass,
-          Nil,
-          dataType = ObjectType(udt.getClass))
-        Invoke(obj, "serialize", udt, inputObject :: Nil)
+        val udtClass = udt.getClass
+        createSerializerForUserDefinedType(inputObject, udt, udtClass)
 
       case t if definedByConstructorParams(t) =>
         if (seenTypeSet.contains(t)) {
@@ -584,10 +510,10 @@ object ScalaReflection extends ScalaReflection {
         }
 
         val params = getConstructorParameters(t)
-        val nonNullOutput = CreateNamedStruct(params.flatMap { case (fieldName, fieldType) =>
+        val fields = params.map { case (fieldName, fieldType) =>
           if (javaKeywords.contains(fieldName)) {
             throw new UnsupportedOperationException(s"`$fieldName` is a reserved keyword and " +
-              "cannot be used as field name\n" + walkedTypePath.mkString("\n"))
+              "cannot be used as field name\n" + walkedTypePath)
           }
 
           // SPARK-26730 inputObject won't be null with If's guard below. And KnownNotNul
@@ -597,16 +523,14 @@ object ScalaReflection extends ScalaReflection {
           val fieldValue = Invoke(KnownNotNull(inputObject), fieldName, dataTypeFor(fieldType),
             returnNullable = !fieldType.typeSymbol.asClass.isPrimitive)
           val clsName = getClassNameFromType(fieldType)
-          val newPath = s"""- field (class: "$clsName", name: "$fieldName")""" +: walkedTypePath
-          expressions.Literal(fieldName) ::
-          serializerFor(fieldValue, fieldType, newPath, seenTypeSet + t) :: Nil
-        })
-        val nullOutput = expressions.Literal.create(null, nonNullOutput.dataType)
-        expressions.If(IsNull(inputObject), nullOutput, nonNullOutput)
+          val newPath = walkedTypePath.recordField(clsName, fieldName)
+          (fieldName, serializerFor(fieldValue, fieldType, newPath, seenTypeSet + t))
+        }
+        createSerializerForObject(inputObject, fields)
 
-      case other =>
+      case _ =>
         throw new UnsupportedOperationException(
-          s"No Encoder found for $tpe\n" + walkedTypePath.mkString("\n"))
+          s"No Encoder found for $tpe\n" + walkedTypePath)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
new file mode 100644
index 000000000000..e035c4be9724
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression, IsNull, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.expressions.objects._
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+object SerializerBuildHelper {
+
+  def createSerializerForBoolean(inputObject: Expression): Expression = {
+    Invoke(inputObject, "booleanValue", BooleanType)
+  }
+
+  def createSerializerForByte(inputObject: Expression): Expression = {
+    Invoke(inputObject, "byteValue", ByteType)
+  }
+
+  def createSerializerForShort(inputObject: Expression): Expression = {
+    Invoke(inputObject, "shortValue", ShortType)
+  }
+
+  def createSerializerForInteger(inputObject: Expression): Expression = {
+    Invoke(inputObject, "intValue", IntegerType)
+  }
+
+  def createSerializerForLong(inputObject: Expression): Expression = {
+    Invoke(inputObject, "longValue", LongType)
+  }
+
+  def createSerializerForFloat(inputObject: Expression): Expression = {
+    Invoke(inputObject, "floatValue", FloatType)
+  }
+
+  def createSerializerForDouble(inputObject: Expression): Expression = {
+    Invoke(inputObject, "doubleValue", DoubleType)
+  }
+
+  def createSerializerForString(inputObject: Expression): Expression = {
+    StaticInvoke(
+      classOf[UTF8String],
+      StringType,
+      "fromString",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForJavaInstant(inputObject: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      TimestampType,
+      "instantToMicros",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForSqlTimestamp(inputObject: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      TimestampType,
+      "fromJavaTimestamp",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForJavaLocalDate(inputObject: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      DateType,
+      "localDateToDays",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForSqlDate(inputObject: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      DateType,
+      "fromJavaDate",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForJavaBigDecimal(inputObject: Expression): Expression = {
+    StaticInvoke(
+      Decimal.getClass,
+      DecimalType.SYSTEM_DEFAULT,
+      "apply",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForScalaBigDecimal(inputObject: Expression): Expression = {
+    createSerializerForJavaBigDecimal(inputObject)
+  }
+
+  def createSerializerForJavaBigInteger(inputObject: Expression): Expression = {
+    StaticInvoke(
+      Decimal.getClass,
+      DecimalType.BigIntDecimal,
+      "apply",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForScalaBigInt(inputObject: Expression): Expression = {
+    createSerializerForJavaBigInteger(inputObject)
+  }
+
+  def createSerializerForPrimitiveArray(
+      inputObject: Expression,
+      dataType: DataType): Expression = {
+    StaticInvoke(
+      classOf[UnsafeArrayData],
+      ArrayType(dataType, false),
+      "fromPrimitiveArray",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForGenericArray(
+      inputObject: Expression,
+      dataType: DataType,
+      nullable: Boolean): Expression = {
+    NewInstance(
+      classOf[GenericArrayData],
+      inputObject :: Nil,
+      dataType = ArrayType(dataType, nullable))
+  }
+
+  def createSerializerForMapObjects(
+      inputObject: Expression,
+      dataType: ObjectType,
+      funcForNewExpr: Expression => Expression): Expression = {
+    MapObjects(funcForNewExpr, inputObject, dataType)
+  }
+
+  case class MapElementInformation(
+      dataType: DataType,
+      nullable: Boolean,
+      funcForNewExpr: Expression => Expression)
+
+  def createSerializerForMap(
+      inputObject: Expression,
+      keyInformation: MapElementInformation,
+      valueInformation: MapElementInformation): Expression = {
+    ExternalMapToCatalyst(
+      inputObject,
+      keyInformation.dataType,
+      keyInformation.funcForNewExpr,
+      keyNullable = keyInformation.nullable,
+      valueInformation.dataType,
+      valueInformation.funcForNewExpr,
+      valueNullable = valueInformation.nullable
+    )
+  }
+
+  private def argumentsForFieldSerializer(
+      fieldName: String,
+      serializerForFieldValue: Expression): Seq[Expression] = {
+    expressions.Literal(fieldName) :: serializerForFieldValue :: Nil
+  }
+
+  def createSerializerForObject(
+      inputObject: Expression,
+      fields: Seq[(String, Expression)]): Expression = {
+    val nonNullOutput = CreateNamedStruct(fields.flatMap { case(fieldName, fieldExpr) =>
+      argumentsForFieldSerializer(fieldName, fieldExpr)
+    })
+    val nullOutput = expressions.Literal.create(null, nonNullOutput.dataType)
+    expressions.If(IsNull(inputObject), nullOutput, nonNullOutput)
+  }
+
+  def createSerializerForUserDefinedType(
+      inputObject: Expression,
+      udt: UserDefinedType[_],
+      udtClass: Class[_]): Expression = {
+    val obj = NewInstance(udtClass, Nil, dataType = ObjectType(udtClass))
+    Invoke(obj, "serialize", udt, inputObject :: Nil)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala
new file mode 100644
index 000000000000..cdb55b8f6fc4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+/**
+ * This class records the paths the serializer and deserializer walk through to reach current path.
+ * Note that this class adds new path in prior to recorded paths so it maintains
+ * the paths as reverse order.
+ */
+case class WalkedTypePath(private val walkedPaths: Seq[String] = Nil) extends Serializable {
+  def recordRoot(className: String): WalkedTypePath =
+    newInstance(s"""- root class: "$className"""")
+
+  def recordOption(className: String): WalkedTypePath =
+    newInstance(s"""- option value class: "$className"""")
+
+  def recordArray(elementClassName: String): WalkedTypePath =
+    newInstance(s"""- array element class: "$elementClassName"""")
+
+  def recordMap(keyClassName: String, valueClassName: String): WalkedTypePath = {
+    newInstance(s"""- map key class: "$keyClassName"""" +
+        s""", value class: "$valueClassName"""")
+  }
+
+  def recordKeyForMap(keyClassName: String): WalkedTypePath =
+    newInstance(s"""- map key class: "$keyClassName"""")
+
+  def recordValueForMap(valueClassName: String): WalkedTypePath =
+    newInstance(s"""- map value class: "$valueClassName"""")
+
+  def recordField(className: String, fieldName: String): WalkedTypePath =
+    newInstance(s"""- field (class: "$className", name: "$fieldName")""")
+
+  override def toString: String = {
+    walkedPaths.mkString("\n")
+  }
+
+  def getPaths: Seq[String] = walkedPaths
+
+  private def newInstance(newRecord: String): WalkedTypePath =
+    WalkedTypePath(newRecord +: walkedPaths)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 42904c5c04c3..ab9cedc1306c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2348,7 +2348,7 @@ class Analyzer(
       } else {
         // always add an UpCast. it will be removed in the optimizer if it is unnecessary.
         Some(Alias(
-          UpCast(queryExpr, tableAttr.dataType, Seq()), tableAttr.name
+          UpCast(queryExpr, tableAttr.dataType), tableAttr.name
         )(
           explicitMetadata = Option(tableAttr.metadata)
         ))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index da5c1fd0feb0..abffda7127b0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -21,7 +21,7 @@ import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.{typeTag, TypeTag}
 
 import org.apache.spark.sql.Encoder
-import org.apache.spark.sql.catalyst.{InternalRow, JavaTypeInference, ScalaReflection}
+import org.apache.spark.sql.catalyst.{InternalRow, JavaTypeInference, ScalaReflection, WalkedTypePath}
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, GetColumnByOrdinal, SimpleAnalyzer, UnresolvedAttribute, UnresolvedExtractValue}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateSafeProjection, GenerateUnsafeProjection}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index 68a603b95ad5..97709bd6d188 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -155,7 +155,7 @@ object RowEncoder {
           element => {
             val value = serializerFor(ValidateExternalType(element, et), et)
             if (!containsNull) {
-              AssertNotNull(value, Seq.empty)
+              AssertNotNull(value)
             } else {
               value
             }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index d591c588a95e..84087ae51032 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -21,7 +21,7 @@ import java.math.{BigDecimal => JavaBigDecimal}
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{InternalRow, WalkedTypePath}
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
@@ -1378,7 +1378,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
  * Cast the child expression to the target data type, but will throw error if the cast might
  * truncate, e.g. long -> int, timestamp -> data.
  */
-case class UpCast(child: Expression, dataType: DataType, walkedTypePath: Seq[String])
+case class UpCast(child: Expression, dataType: DataType, walkedTypePath: Seq[String] = Nil)
   extends UnaryExpression with Unevaluable {
   override lazy val resolved = false
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index baa1b3bda848..7d49866b1796 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -338,7 +338,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("should not apply common subexpression elimination on conditional expressions") {
     val row = InternalRow(null)
     val bound = BoundReference(0, IntegerType, true)
-    val assertNotNull = AssertNotNull(bound, Nil)
+    val assertNotNull = AssertNotNull(bound)
     val expr = If(IsNull(bound), Literal(1), Add(assertNotNull, assertNotNull))
     val projection = GenerateUnsafeProjection.generate(
       Seq(expr), subexpressionEliminationEnabled = true)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
index b7ce36723081..49fd59c8694f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
@@ -53,7 +53,7 @@ class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("AssertNotNUll") {
     val ex = intercept[RuntimeException] {
-      evaluateWithoutCodegen(AssertNotNull(Literal(null), Seq.empty[String]))
+      evaluateWithoutCodegen(AssertNotNull(Literal(null)))
     }.getMessage
     assert(ex.contains("Null value appeared in non-nullable field"))
   }

From b76f262fc84a1fbe52bbc6bf74573dec4d1ae4df Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 4 Mar 2019 13:36:41 +0900
Subject: [PATCH 0566/1072] [SPARK-27032][TEST] De-flake
 org.apache.spark.sql.execution.streaming.HDFSMetadataLogSuite.HDFSMetadataLog:
 metadata directory collision

## What changes were proposed in this pull request?

Reduce work in HDFSMetadataLogSuite test to possibly de-flake it.

## How was this patch tested?

Existing tests

Closes #23937 from srowen/SPARK-27032.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../execution/streaming/HDFSMetadataLogSuite.scala    | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index 0e36e7f5da12..370683570713 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -131,9 +131,10 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
 
   testQuietly("HDFSMetadataLog: metadata directory collision") {
     withTempDir { temp =>
-      val waiter = new Waiter
-      val maxBatchId = 100
-      for (id <- 0 until 10) {
+      val waiter = new Waiter()
+      val maxBatchId = 10
+      val numThreads = 5
+      for (id <- 0 until numThreads) {
         new UninterruptibleThread(s"HDFSMetadataLog: metadata directory collision - thread $id") {
           override def run(): Unit = waiter {
             val metadataLog =
@@ -146,7 +147,7 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
                 nextBatchId += 1
               }
             } catch {
-              case e: ConcurrentModificationException =>
+              case _: ConcurrentModificationException =>
               // This is expected since there are multiple writers
             } finally {
               waiter.dismiss()
@@ -155,7 +156,7 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
         }.start()
       }
 
-      waiter.await(timeout(10.seconds), dismissals(10))
+      waiter.await(timeout(10.seconds), dismissals(numThreads))
       val metadataLog = new HDFSMetadataLog[String](spark, temp.getAbsolutePath)
       assert(metadataLog.getLatest() === Some(maxBatchId -> maxBatchId.toString))
       assert(

From 82820f8e2574ca18faa00b54dff1a3a44d105359 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Mon, 4 Mar 2019 13:38:22 +0800
Subject: [PATCH 0567/1072] [SPARK-26893][SQL] Allow partition pruning with
 subquery filters on file source

## What changes were proposed in this pull request?

This PR introduces leveraging of subquery filters for partition pruning in file source.

Subquery expressions are not allowed to be used for partition pruning in `FileSourceStrategy` now, instead a `FilterExec` is added around the `FileSourceScanExec` to do the job.
This PR optimizes the process by allowing partition pruning subquery expressions as partition filters.

## How was this patch tested?

Added new UT and run existing UTs especially SPARK-25482 and SPARK-24085 related ones.

Closes #23802 from peter-toth/SPARK-26893.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/DataSourceScanExec.scala    | 22 +++++++++++++------
 .../datasources/DataSourceStrategy.scala      |  2 +-
 .../datasources/FileSourceStrategy.scala      | 10 +++++++--
 .../PruneFileSourcePartitions.scala           | 11 ++--------
 .../datasources/v2/DataSourceV2Strategy.scala |  5 +++--
 .../apache/spark/sql/execution/subquery.scala | 12 ++++++++++
 .../org/apache/spark/sql/SubquerySuite.scala  | 21 ++++++++++++++++++
 7 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 3aed2cecc411..92f7d664b66b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -187,6 +187,14 @@ case class FileSourceScanExec(
     ret
   }
 
+  /**
+   * [[partitionFilters]] can contain subqueries whose results are available only at runtime so
+   * accessing [[selectedPartitions]] should be guarded by this method during planning
+   */
+  private def hasPartitionsAvailableAtRunTime: Boolean = {
+    partitionFilters.exists(ExecSubqueryExpression.hasSubquery)
+  }
+
   override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) = {
     val bucketSpec = if (relation.sparkSession.sessionState.conf.bucketingEnabled) {
       relation.bucketSpec
@@ -223,7 +231,7 @@ case class FileSourceScanExec(
           val sortColumns =
             spec.sortColumnNames.map(x => toAttribute(x)).takeWhile(x => x.isDefined).map(_.get)
 
-          val sortOrder = if (sortColumns.nonEmpty) {
+          val sortOrder = if (sortColumns.nonEmpty && !hasPartitionsAvailableAtRunTime) {
             // In case of bucketing, its possible to have multiple files belonging to the
             // same bucket in a given relation. Each of these files are locally sorted
             // but those files combined together are not globally sorted. Given that,
@@ -272,12 +280,12 @@ case class FileSourceScanExec(
         "PushedFilters" -> seqToString(pushedDownFilters),
         "DataFilters" -> seqToString(dataFilters),
         "Location" -> locationDesc)
-    val withOptPartitionCount =
-      relation.partitionSchemaOption.map { _ =>
-        metadata + ("PartitionCount" -> selectedPartitions.size.toString)
-      } getOrElse {
-        metadata
-      }
+    val withOptPartitionCount = if (relation.partitionSchemaOption.isDefined &&
+      !hasPartitionsAvailableAtRunTime) {
+      metadata + ("PartitionCount" -> selectedPartitions.size.toString)
+    } else {
+      metadata
+    }
 
     val withSelectedBucketsCount = relation.bucketSpec.map { spec =>
       val numSelectedBuckets = optionalBucketSet.map { b =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index b73dc30d6f23..a1252ee0d846 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -434,7 +434,7 @@ object DataSourceStrategy {
   protected[sql] def normalizeFilters(
       filters: Seq[Expression],
       attributes: Seq[AttributeReference]): Seq[Expression] = {
-    filters.filterNot(SubqueryExpression.hasSubquery).map { e =>
+    filters.map { e =>
       e transform {
         case a: AttributeReference =>
           a.withName(attributes.find(_.semanticEquals(a)).get.name)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 970cbda6355e..c8a42f043f15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -159,9 +159,14 @@ object FileSourceStrategy extends Strategy with Logging {
 
       logInfo(s"Pruning directories with: ${partitionKeyFilters.mkString(",")}")
 
+      // subquery expressions are filtered out because they can't be used to prune buckets or pushed
+      // down as data filters, yet they would be executed
+      val normalizedFiltersWithoutSubqueries =
+        normalizedFilters.filterNot(SubqueryExpression.hasSubquery)
+
       val bucketSpec: Option[BucketSpec] = fsRelation.bucketSpec
       val bucketSet = if (shouldPruneBuckets(bucketSpec)) {
-        genBucketSet(normalizedFilters, bucketSpec.get)
+        genBucketSet(normalizedFiltersWithoutSubqueries, bucketSpec.get)
       } else {
         None
       }
@@ -170,7 +175,8 @@ object FileSourceStrategy extends Strategy with Logging {
         l.resolve(fsRelation.dataSchema, fsRelation.sparkSession.sessionState.analyzer.resolver)
 
       // Partition keys are not available in the statistics of the files.
-      val dataFilters = normalizedFilters.filter(_.references.intersect(partitionSet).isEmpty)
+      val dataFilters =
+        normalizedFiltersWithoutSubqueries.filter(_.references.intersect(partitionSet).isEmpty)
 
       // Predicates with both partition keys and attributes need to be evaluated after the scan.
       val afterScanFilters = filterSet -- partitionKeyFilters.filter(_.references.nonEmpty)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
index 329b9539f52e..9db7c30b2320 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
@@ -39,15 +39,8 @@ private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
             _,
             _))
         if filters.nonEmpty && fsRelation.partitionSchemaOption.isDefined =>
-      // The attribute name of predicate could be different than the one in schema in case of
-      // case insensitive, we should change them to match the one in schema, so we donot need to
-      // worry about case sensitivity anymore.
-      val normalizedFilters = filters.filterNot(SubqueryExpression.hasSubquery).map { e =>
-        e transform {
-          case a: AttributeReference =>
-            a.withName(logicalRelation.output.find(_.semanticEquals(a)).get.name)
-        }
-      }
+      val normalizedFilters = DataSourceStrategy.normalizeFilters(
+        filters.filterNot(SubqueryExpression.hasSubquery), logicalRelation.output)
 
       val sparkSession = fsRelation.sparkSession
       val partitionColumns =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 55d7b0a18cbc..bf606267aa34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable
 
 import org.apache.spark.sql.{AnalysisException, Strategy}
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, PredicateHelper}
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Repartition}
 import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
@@ -106,7 +106,8 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
     case PhysicalOperation(project, filters, relation: DataSourceV2Relation) =>
       val scanBuilder = relation.newScanBuilder()
 
-      val normalizedFilters = DataSourceStrategy.normalizeFilters(filters, relation.output)
+      val normalizedFilters = DataSourceStrategy.normalizeFilters(
+        filters.filterNot(SubqueryExpression.hasSubquery), relation.output)
 
       // `pushedFilters` will be pushed down and evaluated in the underlying data sources.
       // `postScanFilters` need to be evaluated after the scan.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index e180d2228c3b..e084c79a619d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -38,6 +38,18 @@ abstract class ExecSubqueryExpression extends PlanExpression[SubqueryExec] {
   def updateResult(): Unit
 }
 
+object ExecSubqueryExpression {
+  /**
+   * Returns true when an expression contains a subquery
+   */
+  def hasSubquery(e: Expression): Boolean = {
+    e.find {
+      case _: ExecSubqueryExpression => true
+      case _ => false
+    }.isDefined
+  }
+}
+
 /**
  * A subquery that will return only one row and one column.
  *
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 8ade43300987..53eb7ea7a0a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -21,6 +21,8 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort}
+import org.apache.spark.sql.execution.{ExecSubqueryExpression, FileSourceScanExec, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.datasources.FileScanRDD
 import org.apache.spark.sql.test.SharedSQLContext
 
 class SubquerySuite extends QueryTest with SharedSQLContext {
@@ -1281,6 +1283,25 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("SPARK-26893: Allow pushdown of partition pruning subquery filters to file source") {
+    withTable("a", "b") {
+      spark.range(4).selectExpr("id", "id % 2 AS p").write.partitionBy("p").saveAsTable("a")
+      spark.range(2).write.saveAsTable("b")
+
+      val df = sql("SELECT * FROM a WHERE p <= (SELECT MIN(id) FROM b)")
+      checkAnswer(df, Seq(Row(0, 0), Row(2, 0)))
+      // need to execute the query before we can examine fs.inputRDDs()
+      assert(df.queryExecution.executedPlan match {
+        case WholeStageCodegenExec(fs @ FileSourceScanExec(_, _, _, partitionFilters, _, _, _)) =>
+          partitionFilters.exists(ExecSubqueryExpression.hasSubquery) &&
+            fs.inputRDDs().forall(
+              _.asInstanceOf[FileScanRDD].filePartitions.forall(
+                _.files.forall(_.filePath.contains("p=0"))))
+        case _ => false
+      })
+    }
+  }
+
   test("SPARK-26078: deduplicate fake self joins for IN subqueries") {
     withTempView("a", "b") {
       Seq("a" -> 2, "b" -> 1).toDF("id", "num").createTempView("a")

From 382d5a82b0e8ef1dd01209e828eae67fe7993a56 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 3 Mar 2019 22:20:31 -0800
Subject: [PATCH 0568/1072] [SPARK-26956][SS] remove streaming output mode from
 data source v2 APIs

## What changes were proposed in this pull request?

Similar to `SaveMode`, we should remove streaming `OutputMode` from data source v2 API, and use operations that has clear semantic.

The changes are:
1. append mode: create `StreamingWrite` directly. By default, the `WriteBuilder` will create `Write` to append data.
2. complete mode: call `SupportsTruncate#truncate`. Complete mode means truncating all the old data and appending new data of the current epoch. `SupportsTruncate` has exactly the same semantic.
3. update mode: fail. The current streaming framework can't propagate the update keys, so v2 sinks are not able to implement update mode. In the future we can introduce a `SupportsUpdate` trait.

The behavior changes:
1. all the v2 sinks(foreach, console, memory, kafka, noop) don't support update mode. The fact is, previously all the v2 sinks implement the update mode wrong. None of them can really support it.
2. kafka sink doesn't support complete mode. The fact is, the kafka sink can only append data.

## How was this patch tested?

existing tests

Closes #23859 from cloud-fan/update.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/kafka010/KafkaSourceProvider.scala    |  6 +--
 .../writer/streaming/SupportsOutputMode.java  | 29 -----------
 .../datasources/noop/NoopDataSource.scala     |  7 ++-
 .../streaming/MicroBatchExecution.scala       | 10 +---
 .../execution/streaming/StreamExecution.scala | 34 +++++++++++++
 .../sql/execution/streaming/console.scala     | 10 ++--
 .../continuous/ContinuousExecution.scala      | 10 +---
 .../sources/ForeachWriterTable.scala          | 11 ++--
 .../streaming/sources/memoryV2.scala          | 51 ++++++++-----------
 .../streaming/MemorySinkV2Suite.scala         |  4 +-
 10 files changed, 75 insertions(+), 97 deletions(-)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/SupportsOutputMode.java

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index a1395731e5cf..4dc6955fc0d0 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingWrite, SupportsOutputMode}
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
@@ -362,7 +362,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     }
 
     override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
-      new WriteBuilder with SupportsOutputMode {
+      new WriteBuilder {
         private var inputSchema: StructType = _
 
         override def withInputDataSchema(schema: StructType): WriteBuilder = {
@@ -370,8 +370,6 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
           this
         }
 
-        override def outputMode(mode: OutputMode): WriteBuilder = this
-
         override def buildForStreaming(): StreamingWrite = {
           import scala.collection.JavaConverters._
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/SupportsOutputMode.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/SupportsOutputMode.java
deleted file mode 100644
index 832dcfa145d1..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/SupportsOutputMode.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2.writer.streaming;
-
-import org.apache.spark.annotation.Unstable;
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
-import org.apache.spark.sql.streaming.OutputMode;
-
-// TODO: remove it when we have `SupportsTruncate`
-@Unstable
-public interface SupportsOutputMode extends WriteBuilder {
-
-  WriteBuilder outputMode(OutputMode mode);
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 8f2072c586a9..22a74e3ccaee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -22,8 +22,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite, SupportsOutputMode}
-import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -42,9 +41,9 @@ private[noop] object NoopTable extends Table with SupportsBatchWrite with Suppor
 }
 
 private[noop] object NoopWriteBuilder extends WriteBuilder
-  with SupportsSaveMode with SupportsOutputMode {
+  with SupportsSaveMode with SupportsTruncate {
   override def mode(mode: SaveMode): WriteBuilder = this
-  override def outputMode(mode: OutputMode): WriteBuilder = this
+  override def truncate(): WriteBuilder = this
   override def buildForBatch(): BatchWrite = NoopBatchWrite
   override def buildForStreaming(): StreamingWrite = NoopStreamingWrite
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index cca279030dfa..de7cbe25ceb3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -31,7 +31,6 @@ import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, RateCo
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2}
-import org.apache.spark.sql.sources.v2.writer.streaming.SupportsOutputMode
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
 
@@ -515,14 +514,7 @@ class MicroBatchExecution(
     val triggerLogicalPlan = sink match {
       case _: Sink => newAttributePlan
       case s: SupportsStreamingWrite =>
-        // TODO: we should translate OutputMode to concrete write actions like truncate, but
-        // the truncate action is being developed in SPARK-26666.
-        val writeBuilder = s.newWriteBuilder(new DataSourceOptions(extraOptions.asJava))
-          .withQueryId(runId.toString)
-          .withInputDataSchema(newAttributePlan.schema)
-        val streamingWrite = writeBuilder.asInstanceOf[SupportsOutputMode]
-          .outputMode(outputMode)
-          .buildForStreaming()
+        val streamingWrite = createStreamingWrite(s, extraOptions, newAttributePlan)
         WriteToDataSourceV2(new MicroBatchWrite(currentBatchId, streamingWrite), newAttributePlan)
       case _ => throw new IllegalArgumentException(s"unknown sink type for $sink")
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 5c21dfedb6f3..bba640eea7e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -24,6 +24,7 @@ import java.util.concurrent.{CountDownLatch, ExecutionException, TimeUnit}
 import java.util.concurrent.atomic.AtomicReference
 import java.util.concurrent.locks.{Condition, ReentrantLock}
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.{Map => MutableMap}
 import scala.util.control.NonFatal
 
@@ -34,10 +35,14 @@ import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite}
+import org.apache.spark.sql.sources.v2.writer.SupportsTruncate
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming._
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
 
@@ -574,6 +579,35 @@ abstract class StreamExecution(
     Option(name).map(_ + "<br/>").getOrElse("") +
       s"id = $id<br/>runId = $runId<br/>batch = $batchDescription"
   }
+
+  protected def createStreamingWrite(
+      table: SupportsStreamingWrite,
+      options: Map[String, String],
+      inputPlan: LogicalPlan): StreamingWrite = {
+    val writeBuilder = table.newWriteBuilder(new DataSourceOptions(options.asJava))
+      .withQueryId(runId.toString)
+      .withInputDataSchema(inputPlan.schema)
+    outputMode match {
+      case Append =>
+        writeBuilder.buildForStreaming()
+
+      case Complete =>
+        // TODO: we should do this check earlier when we have capability API.
+        require(writeBuilder.isInstanceOf[SupportsTruncate],
+          table.name + " does not support Complete mode.")
+        writeBuilder.asInstanceOf[SupportsTruncate].truncate().buildForStreaming()
+
+      case Update =>
+        // Although no v2 sinks really support Update mode now, but during tests we do want them
+        // to pretend to support Update mode, and treat Update mode same as Append mode.
+        if (Utils.isTesting) {
+          writeBuilder.buildForStreaming()
+        } else {
+          throw new IllegalArgumentException(
+            "Data source v2 streaming sinks does not support Update mode.")
+        }
+    }
+  }
 }
 
 object StreamExecution {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index 348bc767b2c4..923bd749b29b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -21,9 +21,8 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming.sources.ConsoleWrite
 import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister}
 import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingWrite, SupportsOutputMode}
-import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.sources.v2.writer.{SupportsTruncate, WriteBuilder}
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.types.StructType
 
 case class ConsoleRelation(override val sqlContext: SQLContext, data: DataFrame)
@@ -64,7 +63,7 @@ object ConsoleTable extends Table with SupportsStreamingWrite {
   override def schema(): StructType = StructType(Nil)
 
   override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
-    new WriteBuilder with SupportsOutputMode {
+    new WriteBuilder with SupportsTruncate {
       private var inputSchema: StructType = _
 
       override def withInputDataSchema(schema: StructType): WriteBuilder = {
@@ -72,7 +71,8 @@ object ConsoleTable extends Table with SupportsStreamingWrite {
         this
       }
 
-      override def outputMode(mode: OutputMode): WriteBuilder = this
+      // Do nothing for truncate. Console sink is special that it just prints all the records.
+      override def truncate(): WriteBuilder = this
 
       override def buildForStreaming(): StreamingWrite = {
         assert(inputSchema != null)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 3f88bcf48b86..26b564201033 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -35,7 +35,6 @@ import org.apache.spark.sql.execution.streaming.{StreamingRelationV2, _}
 import org.apache.spark.sql.sources.v2
 import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsContinuousRead, SupportsStreamingWrite}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
-import org.apache.spark.sql.sources.v2.writer.streaming.SupportsOutputMode
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
 
@@ -179,14 +178,7 @@ class ContinuousExecution(
           "CurrentTimestamp and CurrentDate not yet supported for continuous processing")
     }
 
-    // TODO: we should translate OutputMode to concrete write actions like truncate, but
-    // the truncate action is being developed in SPARK-26666.
-    val writeBuilder = sink.newWriteBuilder(new DataSourceOptions(extraOptions.asJava))
-      .withQueryId(runId.toString)
-      .withInputDataSchema(withNewSources.schema)
-    val streamingWrite = writeBuilder.asInstanceOf[SupportsOutputMode]
-      .outputMode(outputMode)
-      .buildForStreaming()
+    val streamingWrite = createStreamingWrite(sink, extraOptions, withNewSources)
     val planWithSink = WriteToContinuousDataSource(streamingWrite, withNewSources)
 
     reportTimeTaken("queryPlanning") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index 6fbb59c43625..c0ae44a128ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -23,9 +23,8 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.python.PythonForeachWriter
 import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite, Table}
-import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriteBuilder, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite, SupportsOutputMode}
-import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.sources.v2.writer.{DataWriter, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -46,7 +45,7 @@ case class ForeachWriterTable[T](
   override def schema(): StructType = StructType(Nil)
 
   override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
-    new WriteBuilder with SupportsOutputMode {
+    new WriteBuilder with SupportsTruncate {
       private var inputSchema: StructType = _
 
       override def withInputDataSchema(schema: StructType): WriteBuilder = {
@@ -54,7 +53,9 @@ case class ForeachWriterTable[T](
         this
       }
 
-      override def outputMode(mode: OutputMode): WriteBuilder = this
+      // Do nothing for truncate. Foreach sink is special that it just forwards all the records to
+      // ForeachWriter.
+      override def truncate(): WriteBuilder = this
 
       override def buildForStreaming(): StreamingWrite = {
         new StreamingWrite {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
index 3fc2cbe0fde5..397c5ff0dcb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
@@ -30,12 +30,10 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
-import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.{Append, Complete, Update}
 import org.apache.spark.sql.execution.streaming.{MemorySinkBase, Sink}
 import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite}
 import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite, SupportsOutputMode}
-import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -49,12 +47,12 @@ class MemorySinkV2 extends SupportsStreamingWrite with MemorySinkBase with Loggi
   override def schema(): StructType = StructType(Nil)
 
   override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
-    new WriteBuilder with SupportsOutputMode {
-      private var mode: OutputMode = _
+    new WriteBuilder with SupportsTruncate {
+      private var needTruncate: Boolean = false
       private var inputSchema: StructType = _
 
-      override def outputMode(mode: OutputMode): WriteBuilder = {
-        this.mode = mode
+      override def truncate(): WriteBuilder = {
+        this.needTruncate = true
         this
       }
 
@@ -64,7 +62,7 @@ class MemorySinkV2 extends SupportsStreamingWrite with MemorySinkBase with Loggi
       }
 
       override def buildForStreaming(): StreamingWrite = {
-        new MemoryStreamingWrite(MemorySinkV2.this, mode, inputSchema)
+        new MemoryStreamingWrite(MemorySinkV2.this, inputSchema, needTruncate)
       }
     }
   }
@@ -101,27 +99,20 @@ class MemorySinkV2 extends SupportsStreamingWrite with MemorySinkBase with Loggi
     }.mkString("\n")
   }
 
-  def write(batchId: Long, outputMode: OutputMode, newRows: Array[Row]): Unit = {
+  def write(batchId: Long, needTruncate: Boolean, newRows: Array[Row]): Unit = {
     val notCommitted = synchronized {
       latestBatchId.isEmpty || batchId > latestBatchId.get
     }
     if (notCommitted) {
       logDebug(s"Committing batch $batchId to $this")
-      outputMode match {
-        case Append | Update =>
-          val rows = AddedData(batchId, newRows)
-          synchronized { batches += rows }
-
-        case Complete =>
-          val rows = AddedData(batchId, newRows)
-          synchronized {
-            batches.clear()
-            batches += rows
-          }
-
-        case _ =>
-          throw new IllegalArgumentException(
-            s"Output mode $outputMode is not supported by MemorySinkV2")
+      val rows = AddedData(batchId, newRows)
+      if (needTruncate) {
+        synchronized {
+          batches.clear()
+          batches += rows
+        }
+      } else {
+        synchronized { batches += rows }
       }
     } else {
       logDebug(s"Skipping already committed batch: $batchId")
@@ -139,18 +130,18 @@ case class MemoryWriterCommitMessage(partition: Int, data: Seq[Row])
   extends WriterCommitMessage {}
 
 class MemoryStreamingWrite(
-    val sink: MemorySinkV2, outputMode: OutputMode, schema: StructType)
+    val sink: MemorySinkV2, schema: StructType, needTruncate: Boolean)
   extends StreamingWrite {
 
   override def createStreamingWriterFactory: MemoryWriterFactory = {
-    MemoryWriterFactory(outputMode, schema)
+    MemoryWriterFactory(schema)
   }
 
   override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
     val newRows = messages.flatMap {
       case message: MemoryWriterCommitMessage => message.data
     }
-    sink.write(epochId, outputMode, newRows)
+    sink.write(epochId, needTruncate, newRows)
   }
 
   override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
@@ -158,13 +149,13 @@ class MemoryStreamingWrite(
   }
 }
 
-case class MemoryWriterFactory(outputMode: OutputMode, schema: StructType)
+case class MemoryWriterFactory(schema: StructType)
   extends DataWriterFactory with StreamingDataWriterFactory {
 
   override def createWriter(
       partitionId: Int,
       taskId: Long): DataWriter[InternalRow] = {
-    new MemoryDataWriter(partitionId, outputMode, schema)
+    new MemoryDataWriter(partitionId, schema)
   }
 
   override def createWriter(
@@ -175,7 +166,7 @@ case class MemoryWriterFactory(outputMode: OutputMode, schema: StructType)
   }
 }
 
-class MemoryDataWriter(partition: Int, outputMode: OutputMode, schema: StructType)
+class MemoryDataWriter(partition: Int, schema: StructType)
   extends DataWriter[InternalRow] with Logging {
 
   private val data = mutable.Buffer[Row]()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
index e80437754051..a90acf85c016 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
@@ -29,7 +29,7 @@ class MemorySinkV2Suite extends StreamTest with BeforeAndAfter {
   test("data writer") {
     val partition = 1234
     val writer = new MemoryDataWriter(
-      partition, OutputMode.Append(), new StructType().add("i", "int"))
+      partition, new StructType().add("i", "int"))
     writer.write(InternalRow(1))
     writer.write(InternalRow(2))
     writer.write(InternalRow(44))
@@ -44,7 +44,7 @@ class MemorySinkV2Suite extends StreamTest with BeforeAndAfter {
   test("streaming writer") {
     val sink = new MemorySinkV2
     val write = new MemoryStreamingWrite(
-      sink, OutputMode.Append(), new StructType().add("i", "int"))
+      sink, new StructType().add("i", "int"), needTruncate = false)
     write.commit(0,
       Array(
         MemoryWriterCommitMessage(0, Seq(Row(1), Row(2))),

From ad4823c99dcbb3d48f0cb8c556450ff857208709 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Mon, 4 Mar 2019 19:09:24 +0800
Subject: [PATCH 0569/1072] [SPARK-19712][SQL] Pushing Left Semi and Left Anti
 joins through Project, Aggregate, Window, Union etc.

## What changes were proposed in this pull request?
This PR adds support for pushing down LeftSemi and LeftAnti joins below operators such as Project, Aggregate, Window, Union etc.  This is the initial piece of work that will be needed for
the subsequent work of moving the subquery rewrites to the beginning of optimization phase.

The larger  PR is [here](https://github.com/apache/spark/pull/23211) . This PR addresses the comment at [link](https://github.com/apache/spark/pull/23211#issuecomment-445705922).
## How was this patch tested?
Added a new test suite LeftSemiAntiJoinPushDownSuite.

Closes #23750 from dilipbiswal/SPARK-19712-pushleftsemi.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  34 ++-
 .../optimizer/PushDownLeftSemiAntiJoin.scala  | 197 +++++++++++++
 .../spark/sql/catalyst/plans/joinTypes.scala  |   7 +
 .../LeftSemiAntiJoinPushDownSuite.scala       | 279 ++++++++++++++++++
 .../execution/metric/SQLMetricsSuite.scala    |   4 +-
 5 files changed, 505 insertions(+), 16 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index ad258986f785..3c59e4d6c534 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -95,6 +95,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
         EliminateOuterJoin,
         PushPredicateThroughJoin,
         PushDownPredicate,
+        PushDownLeftSemiAntiJoin,
         LimitPushDown,
         ColumnPruning,
         InferFiltersFromConstraints,
@@ -1012,24 +1013,13 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
     // This also applies to Aggregate.
     case Filter(condition, project @ Project(fields, grandChild))
       if fields.forall(_.deterministic) && canPushThroughCondition(grandChild, condition) =>
-
-      // Create a map of Aliases to their values from the child projection.
-      // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> a + b).
-      val aliasMap = AttributeMap(fields.collect {
-        case a: Alias => (a.toAttribute, a.child)
-      })
-
+      val aliasMap = getAliasMap(project)
       project.copy(child = Filter(replaceAlias(condition, aliasMap), grandChild))
 
     case filter @ Filter(condition, aggregate: Aggregate)
       if aggregate.aggregateExpressions.forall(_.deterministic)
         && aggregate.groupingExpressions.nonEmpty =>
-      // Find all the aliased expressions in the aggregate list that don't include any actual
-      // AggregateExpression, and create a map from the alias to the expression
-      val aliasMap = AttributeMap(aggregate.aggregateExpressions.collect {
-        case a: Alias if a.child.find(_.isInstanceOf[AggregateExpression]).isEmpty =>
-          (a.toAttribute, a.child)
-      })
+      val aliasMap = getAliasMap(aggregate)
 
       // For each filter, expand the alias and check if the filter can be evaluated using
       // attributes produced by the aggregate operator's child operator.
@@ -1127,7 +1117,23 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
       }
   }
 
-  private def canPushThrough(p: UnaryNode): Boolean = p match {
+  def getAliasMap(plan: Project): AttributeMap[Expression] = {
+    // Create a map of Aliases to their values from the child projection.
+    // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> a + b).
+    AttributeMap(plan.projectList.collect { case a: Alias => (a.toAttribute, a.child) })
+  }
+
+  def getAliasMap(plan: Aggregate): AttributeMap[Expression] = {
+    // Find all the aliased expressions in the aggregate list that don't include any actual
+    // AggregateExpression, and create a map from the alias to the expression
+    val aliasMap = plan.aggregateExpressions.collect {
+      case a: Alias if a.child.find(_.isInstanceOf[AggregateExpression]).isEmpty =>
+        (a.toAttribute, a.child)
+    }
+    AttributeMap(aliasMap)
+  }
+
+  def canPushThrough(p: UnaryNode): Boolean = p match {
     // Note that some operators (e.g. project, aggregate, union) are being handled separately
     // (earlier in this rule).
     case _: AppendColumns => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
new file mode 100644
index 000000000000..bc868df3dbb0
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * This rule is a variant of [[PushDownPredicate]] which can handle
+ * pushing down Left semi and Left Anti joins below the following operators.
+ *  1) Project
+ *  2) Window
+ *  3) Union
+ *  4) Aggregate
+ *  5) Other permissible unary operators. please see [[PushDownPredicate.canPushThrough]].
+ */
+object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    // LeftSemi/LeftAnti over Project
+    case Join(p @ Project(pList, gChild), rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
+      if pList.forall(_.deterministic) &&
+        !pList.exists(ScalarSubquery.hasCorrelatedScalarSubquery) &&
+        canPushThroughCondition(Seq(gChild), joinCond, rightOp) =>
+      if (joinCond.isEmpty) {
+        // No join condition, just push down the Join below Project
+        p.copy(child = Join(gChild, rightOp, joinType, joinCond, hint))
+      } else {
+        val aliasMap = PushDownPredicate.getAliasMap(p)
+        val newJoinCond = if (aliasMap.nonEmpty) {
+          Option(replaceAlias(joinCond.get, aliasMap))
+        } else {
+          joinCond
+        }
+        p.copy(child = Join(gChild, rightOp, joinType, newJoinCond, hint))
+      }
+
+    // LeftSemi/LeftAnti over Aggregate
+    case join @ Join(agg: Aggregate, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
+      if agg.aggregateExpressions.forall(_.deterministic) && agg.groupingExpressions.nonEmpty &&
+        !agg.aggregateExpressions.exists(ScalarSubquery.hasCorrelatedScalarSubquery) =>
+      if (joinCond.isEmpty) {
+        // No join condition, just push down Join below Aggregate
+        agg.copy(child = Join(agg.child, rightOp, joinType, joinCond, hint))
+      } else {
+        val aliasMap = PushDownPredicate.getAliasMap(agg)
+
+        // For each join condition, expand the alias and check if the condition can be evaluated
+        // using attributes produced by the aggregate operator's child operator.
+        val (pushDown, stayUp) = splitConjunctivePredicates(joinCond.get).partition { cond =>
+          val replaced = replaceAlias(cond, aliasMap)
+          cond.references.nonEmpty &&
+            replaced.references.subsetOf(agg.child.outputSet ++ rightOp.outputSet)
+        }
+
+        // Check if the remaining predicates do not contain columns from the right
+        // hand side of the join. Since the remaining predicates will be kept
+        // as a filter over aggregate, this check is necessary after the left semi
+        // or left anti join is moved below aggregate. The reason is, for this kind
+        // of join, we only output from the left leg of the join.
+        val rightOpColumns = AttributeSet(stayUp.toSet).intersect(rightOp.outputSet)
+
+        if (pushDown.nonEmpty && rightOpColumns.isEmpty) {
+          val pushDownPredicate = pushDown.reduce(And)
+          val replaced = replaceAlias(pushDownPredicate, aliasMap)
+          val newAgg = agg.copy(child = Join(agg.child, rightOp, joinType, Option(replaced), hint))
+          // If there is no more filter to stay up, just return the Aggregate over Join.
+          // Otherwise, create "Filter(stayUp) <- Aggregate <- Join(pushDownPredicate)".
+          if (stayUp.isEmpty) newAgg else Filter(stayUp.reduce(And), newAgg)
+        } else {
+          // The join condition is not a subset of the Aggregate's GROUP BY columns,
+          // no push down.
+          join
+        }
+      }
+
+    // LeftSemi/LeftAnti over Window
+    case join @ Join(w: Window, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
+      if w.partitionSpec.forall(_.isInstanceOf[AttributeReference]) =>
+      if (joinCond.isEmpty) {
+        // No join condition, just push down Join below Window
+        w.copy(child = Join(w.child, rightOp, joinType, joinCond, hint))
+      } else {
+        val partitionAttrs = AttributeSet(w.partitionSpec.flatMap(_.references)) ++
+          rightOp.outputSet
+
+        val (pushDown, stayUp) = splitConjunctivePredicates(joinCond.get).partition { cond =>
+          cond.references.subsetOf(partitionAttrs)
+        }
+
+        // Check if the remaining predicates do not contain columns from the right
+        // hand side of the join. Since the remaining predicates will be kept
+        // as a filter over window, this check is necessary after the left semi
+        // or left anti join is moved below window. The reason is, for this kind
+        // of join, we only output from the left leg of the join.
+        val rightOpColumns = AttributeSet(stayUp.toSet).intersect(rightOp.outputSet)
+
+        if (pushDown.nonEmpty && rightOpColumns.isEmpty) {
+          val predicate = pushDown.reduce(And)
+          val newPlan = w.copy(child = Join(w.child, rightOp, joinType, Option(predicate), hint))
+          if (stayUp.isEmpty) newPlan else Filter(stayUp.reduce(And), newPlan)
+        } else {
+          // The join condition is not a subset of the Window's PARTITION BY clause,
+          // no push down.
+          join
+        }
+      }
+
+    // LeftSemi/LeftAnti over Union
+    case join @ Join(union: Union, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
+      if canPushThroughCondition(union.children, joinCond, rightOp) =>
+      if (joinCond.isEmpty) {
+        // Push down the Join below Union
+        val newGrandChildren = union.children.map { Join(_, rightOp, joinType, joinCond, hint) }
+        union.withNewChildren(newGrandChildren)
+      } else {
+        val output = union.output
+        val newGrandChildren = union.children.map { grandchild =>
+          val newCond = joinCond.get transform {
+            case e if output.exists(_.semanticEquals(e)) =>
+              grandchild.output(output.indexWhere(_.semanticEquals(e)))
+          }
+          assert(newCond.references.subsetOf(grandchild.outputSet ++ rightOp.outputSet))
+          Join(grandchild, rightOp, joinType, Option(newCond), hint)
+        }
+        union.withNewChildren(newGrandChildren)
+      }
+
+    // LeftSemi/LeftAnti over UnaryNode
+    case join @ Join(u: UnaryNode, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
+      if PushDownPredicate.canPushThrough(u) && u.expressions.forall(_.deterministic) =>
+      pushDownJoin(join, u.child) { joinCond =>
+        u.withNewChildren(Seq(Join(u.child, rightOp, joinType, joinCond, hint)))
+      }
+  }
+
+  /**
+   * Check if we can safely push a join through a project or union by making sure that attributes
+   * referred in join condition do not contain the same attributes as the plan they are moved
+   * into. This can happen when both sides of join refers to the same source (self join). This
+   * function makes sure that the join condition refers to attributes that are not ambiguous (i.e
+   * present in both the legs of the join) or else the resultant plan will be invalid.
+   */
+  private def canPushThroughCondition(
+      plans: Seq[LogicalPlan],
+      condition: Option[Expression],
+      rightOp: LogicalPlan): Boolean = {
+    val attributes = AttributeSet(plans.flatMap(_.output))
+    if (condition.isDefined) {
+      val matched = condition.get.references.intersect(rightOp.outputSet).intersect(attributes)
+      matched.isEmpty
+    } else {
+      true
+    }
+  }
+
+
+  private def pushDownJoin(
+      join: Join,
+      grandchild: LogicalPlan)(insertJoin: Option[Expression] => LogicalPlan): LogicalPlan = {
+    if (join.condition.isEmpty) {
+      insertJoin(None)
+    } else {
+      val (pushDown, stayUp) = splitConjunctivePredicates(join.condition.get)
+        .partition {_.references.subsetOf(grandchild.outputSet ++ join.right.outputSet)}
+
+      val rightOpColumns = AttributeSet(stayUp.toSet).intersect(join.right.outputSet)
+      if (pushDown.nonEmpty && rightOpColumns.isEmpty) {
+        val newChild = insertJoin(Option(pushDown.reduceLeft(And)))
+        if (stayUp.nonEmpty) {
+          Filter(stayUp.reduceLeft(And), newChild)
+        } else {
+          newChild
+        }
+      } else {
+        join
+      }
+    }
+  }
+}
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index c77849035a97..86cdc261329c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -114,3 +114,10 @@ object LeftExistence {
     case _ => None
   }
 }
+
+object LeftSemiOrAnti {
+  def unapply(joinType: JoinType): Option[JoinType] = joinType match {
+    case LeftSemi | LeftAnti => Some(joinType)
+    case _ => None
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
new file mode 100644
index 000000000000..1a0231ed2d99
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.unsafe.types.CalendarInterval
+
+class LeftSemiPushdownSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Subqueries", Once,
+        EliminateSubqueryAliases) ::
+      Batch("Filter Pushdown", FixedPoint(10),
+        CombineFilters,
+        PushDownPredicate,
+        PushDownLeftSemiAntiJoin,
+        BooleanSimplification,
+        CollapseProject) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+
+  val testRelation1 = LocalRelation('d.int)
+
+  test("Project: LeftSemiAnti join pushdown") {
+    val originalQuery = testRelation
+      .select(star())
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .select('a, 'b, 'c)
+      .analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Project: LeftSemiAnti join no pushdown because of non-deterministic proj exprs") {
+    val originalQuery = testRelation
+      .select(Rand('a), 'b, 'c)
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+
+  test("Project: LeftSemiAnti join non correlated scalar subq") {
+    val subq = ScalarSubquery(testRelation.groupBy('b)(sum('c).as("sum")).analyze)
+    val originalQuery = testRelation
+      .select(subq.as("sum"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('sum === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = Some(subq === 'd))
+      .select(subq.as("sum"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Project: LeftSemiAnti join no pushdown - correlated scalar subq in projection list") {
+    val testRelation2 = LocalRelation('e.int, 'f.int)
+    val subqPlan = testRelation2.groupBy('e)(sum('f).as("sum")).where('e === 'a)
+    val subqExpr = ScalarSubquery(subqPlan)
+    val originalQuery = testRelation
+      .select(subqExpr.as("sum"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('sum === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+
+  test("Aggregate: LeftSemiAnti join pushdown") {
+    val originalQuery = testRelation
+      .groupBy('b)('b, sum('c))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .groupBy('b)('b, sum('c))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Aggregate: LeftSemiAnti join no pushdown due to non-deterministic aggr expressions") {
+    val originalQuery = testRelation
+      .groupBy('b)('b, Rand(10).as('c))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+
+  test("Aggregate: LeftSemiAnti join partial pushdown") {
+    val originalQuery = testRelation
+      .groupBy('b)('b, sum('c).as('sum))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'sum === 10))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .groupBy('b)('b, sum('c).as('sum))
+      .where('sum === 10)
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("LeftSemiAnti join over aggregate - no pushdown") {
+    val originalQuery = testRelation
+      .groupBy('b)('b, sum('c).as('sum))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'sum === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+
+  test("Aggregate: LeftSemiAnti join non-correlated scalar subq aggr exprs") {
+    val subq = ScalarSubquery(testRelation.groupBy('b)(sum('c).as("sum")).analyze)
+    val originalQuery = testRelation
+      .groupBy('a) ('a, subq.as("sum"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('sum === 'd && 'a === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = Some(subq === 'd && 'a === 'd))
+      .groupBy('a) ('a, subq.as("sum"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("LeftSemiAnti join over Window") {
+    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+
+    val originalQuery = testRelation
+      .select('a, 'b, 'c, winExpr.as('window))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd))
+      .select('a, 'b, 'c)
+      .window(winExpr.as('window) :: Nil, 'a :: Nil, 'b.asc :: Nil)
+      .select('a, 'b, 'c, 'window).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Window: LeftSemiAnti partial pushdown") {
+    // Attributes from join condition which does not refer to the window partition spec
+    // are kept up in the plan as a Filter operator above Window.
+    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+
+    val originalQuery = testRelation
+      .select('a, 'b, 'c, winExpr.as('window))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd && 'b > 5))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd))
+      .select('a, 'b, 'c)
+      .window(winExpr.as('window) :: Nil, 'a :: Nil, 'b.asc :: Nil)
+      .where('b > 5)
+      .select('a, 'b, 'c, 'window).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Union: LeftSemiAnti join pushdown") {
+    val testRelation2 = LocalRelation('x.int, 'y.int, 'z.int)
+
+    val originalQuery = Union(Seq(testRelation, testRelation2))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    val correctAnswer = Union(Seq(
+      testRelation.join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd)),
+      testRelation2.join(testRelation1, joinType = LeftSemi, condition = Some('x === 'd))))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Union: LeftSemiAnti join no pushdown in self join scenario") {
+    val testRelation2 = LocalRelation('x.int, 'y.int, 'z.int)
+
+    val originalQuery = Union(Seq(testRelation, testRelation2))
+      .join(testRelation2, joinType = LeftSemi, condition = Some('a === 'x))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+
+  test("Unary: LeftSemiAnti join pushdown") {
+    val originalQuery = testRelation
+      .select(star())
+      .repartition(1)
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .select('a, 'b, 'c)
+      .repartition(1)
+      .analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Unary: LeftSemiAnti join pushdown - empty join condition") {
+    val originalQuery = testRelation
+      .select(star())
+      .repartition(1)
+      .join(testRelation1, joinType = LeftSemi, condition = None)
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .join(testRelation1, joinType = LeftSemi, condition = None)
+      .select('a, 'b, 'c)
+      .repartition(1)
+      .analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Unary: LeftSemiAnti join pushdown - partial pushdown") {
+    val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
+    val originalQuery = testRelationWithArrayType
+      .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'b === 'out_col))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelationWithArrayType
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
+      .where('b === 'out_col)
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Unary: LeftSemiAnti join pushdown - no pushdown") {
+    val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
+    val originalQuery = testRelationWithArrayType
+      .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'd === 'out_col))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 98a8ad5eeb2b..b77048aba779 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -345,10 +345,10 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     val df1 = Seq((1, "1"), (2, "2")).toDF("key", "value")
     val df2 = Seq((1, "1"), (2, "2"), (3, "3"), (4, "4")).toDF("key2", "value")
     // Assume the execution plan is
-    // ... -> BroadcastHashJoin(nodeId = 0)
+    // ... -> BroadcastHashJoin(nodeId = 1)
     val df = df1.join(broadcast(df2), $"key" === $"key2", "leftsemi")
     testSparkPlanMetrics(df, 2, Map(
-      0L -> (("BroadcastHashJoin", Map(
+      1L -> (("BroadcastHashJoin", Map(
         "number of output rows" -> 2L))))
     )
   }

From 68fbbbea4e13b53ae8304f21ae727a159bd12559 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 4 Mar 2019 21:27:18 +0800
Subject: [PATCH 0570/1072] [SPARK-26965][SQL] Makes ElementAt nullability more
 precise for array cases

## What changes were proposed in this pull request?
In master, `ElementAt` nullable is always true;
https://github.com/apache/spark/blob/be1cadf16dc70e22eae144b3dfce9e269ef95acc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L1977

But, If input is an array and foldable, we could make its nullability more precise.
This fix is based on  SPARK-26637(#23566).

## How was this patch tested?
Added tests in `CollectionExpressionsSuite`.

Closes #23867 from maropu/SPARK-26965.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/collectionOperations.scala    |  8 +-
 .../expressions/complexTypeExtractors.scala   | 74 +++++++++----------
 .../CollectionExpressionsSuite.scala          | 33 +++++++++
 .../expressions/ComplexTypeSuite.scala        | 19 -----
 4 files changed, 76 insertions(+), 58 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 67f6739b1e18..018b6b9c9c37 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1929,7 +1929,8 @@ case class ArrayPosition(left: Expression, right: Expression)
        b
   """,
   since = "2.4.0")
-case class ElementAt(left: Expression, right: Expression) extends GetMapValueUtil {
+case class ElementAt(left: Expression, right: Expression)
+  extends GetMapValueUtil with GetArrayItemUtil {
 
   @transient private lazy val mapKeyType = left.dataType.asInstanceOf[MapType].keyType
 
@@ -1974,7 +1975,10 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti
     }
   }
 
-  override def nullable: Boolean = true
+  override def nullable: Boolean = left.dataType match {
+    case _: ArrayType => computeNullabilityFromArray(left, right)
+    case _: MapType => true
+  }
 
   override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 55ed617e2904..e9d60ed3a481 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -221,7 +221,8 @@ case class GetArrayStructFields(
  * We need to do type checking here as `ordinal` expression maybe unresolved.
  */
 case class GetArrayItem(child: Expression, ordinal: Expression)
-  extends BinaryExpression with ExpectsInputTypes with ExtractValue with NullIntolerant {
+  extends BinaryExpression with GetArrayItemUtil with ExpectsInputTypes with ExtractValue
+  with NullIntolerant {
 
   // We have done type checking for child in `ExtractValue`, so only need to check the `ordinal`.
   override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, IntegralType)
@@ -231,23 +232,7 @@ case class GetArrayItem(child: Expression, ordinal: Expression)
 
   override def left: Expression = child
   override def right: Expression = ordinal
-
-  /** `Null` is returned for invalid ordinals. */
-  override def nullable: Boolean = if (ordinal.foldable && !ordinal.nullable) {
-    val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue()
-    child match {
-      case CreateArray(ar) if intOrdinal < ar.length =>
-        ar(intOrdinal).nullable
-      case GetArrayStructFields(CreateArray(elements), field, _, _, _)
-          if intOrdinal < elements.length =>
-        elements(intOrdinal).nullable || field.nullable
-      case _ =>
-        true
-    }
-  } else {
-    true
-  }
-
+  override def nullable: Boolean = computeNullabilityFromArray(left, right)
   override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType
 
   protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
@@ -281,10 +266,34 @@ case class GetArrayItem(child: Expression, ordinal: Expression)
 }
 
 /**
- * Common base class for [[GetMapValue]] and [[ElementAt]].
+ * Common trait for [[GetArrayItem]] and [[ElementAt]].
  */
+trait GetArrayItemUtil {
+
+  /** `Null` is returned for invalid ordinals. */
+  protected def computeNullabilityFromArray(child: Expression, ordinal: Expression): Boolean = {
+    if (ordinal.foldable && !ordinal.nullable) {
+      val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue()
+      child match {
+        case CreateArray(ar) if intOrdinal < ar.length =>
+          ar(intOrdinal).nullable
+        case GetArrayStructFields(CreateArray(elements), field, _, _, _)
+          if intOrdinal < elements.length =>
+          elements(intOrdinal).nullable || field.nullable
+        case _ =>
+          true
+      }
+    } else {
+      true
+    }
+  }
+}
+
+/**
+ * Common trait for [[GetMapValue]] and [[ElementAt]].
+ */
+trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
 
-abstract class GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
   // todo: current search is O(n), improve it.
   def getValueEval(value: Any, ordinal: Any, keyType: DataType, ordering: Ordering[Any]): Any = {
     val map = value.asInstanceOf[MapData]
@@ -380,23 +389,14 @@ case class GetMapValue(child: Expression, key: Expression)
   override def left: Expression = child
   override def right: Expression = key
 
-  /** `Null` is returned for invalid ordinals. */
-  override def nullable: Boolean = if (key.foldable && !key.nullable) {
-    val keyObj = key.eval()
-    child match {
-      case m: CreateMap if m.resolved =>
-        m.keys.zip(m.values).filter { case (k, _) => k.foldable && !k.nullable }.find {
-          case (k, _) if k.eval() == keyObj => true
-          case _ => false
-        }.map(_._2.nullable).getOrElse(true)
-      case _ =>
-        true
-    }
-  } else {
-    true
-  }
-
-
+  /**
+   * `Null` is returned for invalid ordinals.
+   *
+   * TODO: We could make nullability more precise in foldable cases (e.g., literal input).
+   * But, since the key search is O(n), it takes much time to compute nullability.
+   * If we find efficient key searches, revisit this.
+   */
+  override def nullable: Boolean = true
   override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType
 
   // todo: current search is O(n), improve it.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 910e6c83ac97..c1c045938cad 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -1092,6 +1092,39 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(ElementAt(mb0, Literal(Array[Byte](3, 4))), null)
   }
 
+  test("correctly handles ElementAt nullability for arrays") {
+    // CreateArray case
+    val a = AttributeReference("a", IntegerType, nullable = false)()
+    val b = AttributeReference("b", IntegerType, nullable = true)()
+    val array = CreateArray(a :: b :: Nil)
+    assert(!ElementAt(array, Literal(0)).nullable)
+    assert(ElementAt(array, Literal(1)).nullable)
+    assert(!ElementAt(array, Subtract(Literal(2), Literal(2))).nullable)
+    assert(ElementAt(array, AttributeReference("ordinal", IntegerType)()).nullable)
+
+    // GetArrayStructFields case
+    val f1 = StructField("a", IntegerType, nullable = false)
+    val f2 = StructField("b", IntegerType, nullable = true)
+    val structType = StructType(f1 :: f2 :: Nil)
+    val c = AttributeReference("c", structType, nullable = false)()
+    val inputArray1 = CreateArray(c :: Nil)
+    val inputArray1ContainsNull = c.nullable
+    val stArray1 = GetArrayStructFields(inputArray1, f1, 0, 2, inputArray1ContainsNull)
+    assert(!ElementAt(stArray1, Literal(0)).nullable)
+    val stArray2 = GetArrayStructFields(inputArray1, f2, 1, 2, inputArray1ContainsNull)
+    assert(ElementAt(stArray2, Literal(0)).nullable)
+
+    val d = AttributeReference("d", structType, nullable = true)()
+    val inputArray2 = CreateArray(c :: d :: Nil)
+    val inputArray2ContainsNull = c.nullable || d.nullable
+    val stArray3 = GetArrayStructFields(inputArray2, f1, 0, 2, inputArray2ContainsNull)
+    assert(!ElementAt(stArray3, Literal(0)).nullable)
+    assert(ElementAt(stArray3, Literal(1)).nullable)
+    val stArray4 = GetArrayStructFields(inputArray2, f2, 1, 2, inputArray2ContainsNull)
+    assert(ElementAt(stArray4, Literal(0)).nullable)
+    assert(ElementAt(stArray4, Literal(1)).nullable)
+  }
+
   test("Concat") {
     // Primitive-type elements
     val ai0 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType, containsNull = false))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 67f748cb792c..0c4438987cd2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -110,25 +110,6 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(GetMapValue(nestedMap, Literal("a")), Map("b" -> "c"))
   }
 
-  test("SPARK-26747 handles GetMapValue nullability correctly when input key is foldable") {
-    // String key test
-    val k1 = Literal("k1")
-    val v1 = AttributeReference("v1", StringType, nullable = true)()
-    val k2 = Literal("k2")
-    val v2 = AttributeReference("v2", StringType, nullable = false)()
-    val map1 = CreateMap(k1 :: v1 :: k2 :: v2 :: Nil)
-    assert(GetMapValue(map1, Literal("k1")).nullable)
-    assert(!GetMapValue(map1, Literal("k2")).nullable)
-    assert(GetMapValue(map1, Literal("non-existent-key")).nullable)
-
-    // Complex type key test
-    val k3 = Literal.create((1, "a"))
-    val k4 = Literal.create((2, "b"))
-    val map2 = CreateMap(k3 :: v1 :: k4 :: v2 :: Nil)
-    assert(GetMapValue(map2, Literal.create((1, "a"))).nullable)
-    assert(!GetMapValue(map2, Literal.create((2, "b"))).nullable)
-  }
-
   test("GetStructField") {
     val typeS = StructType(StructField("a", IntegerType) :: Nil)
     val struct = Literal.create(create_row(1), typeS)

From 68fa601d62c1e5e7b37a1b7d6b0236019239a00a Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 4 Mar 2019 22:26:11 +0800
Subject: [PATCH 0571/1072] [SPARK-27040][SQL] Avoid using unnecessary JoinRow
 in FileFormat

## What changes were proposed in this pull request?

When reading files with empty partition columns, we can avoid using JoinRow.

## How was this patch tested?

Existing unit tests.

Closes #23953 from gengliangwang/avoidJoinRow.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/datasources/FileFormat.scala      | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index f0b49715c8da..a72a9c27a989 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -132,8 +132,6 @@ trait FileFormat {
     new (PartitionedFile => Iterator[InternalRow]) with Serializable {
       private val fullSchema = requiredSchema.toAttributes ++ partitionSchema.toAttributes
 
-      private val joinedRow = new JoinedRow()
-
       // Using lazy val to avoid serialization
       private lazy val appendPartitionColumns =
         GenerateUnsafeProjection.generate(fullSchema, fullSchema)
@@ -145,8 +143,15 @@ trait FileFormat {
         // Note that we have to apply the converter even though `file.partitionValues` is empty.
         // This is because the converter is also responsible for converting safe `InternalRow`s into
         // `UnsafeRow`s.
-        dataReader(file).map { dataRow =>
-          converter(joinedRow(dataRow, file.partitionValues))
+        if (partitionSchema.isEmpty) {
+          dataReader(file).map { dataRow =>
+            converter(dataRow)
+          }
+        } else {
+          val joinedRow = new JoinedRow()
+          dataReader(file).map { dataRow =>
+            converter(joinedRow(dataRow, file.partitionValues))
+          }
         }
       }
     }

From e64c110d213a089d3f97d0aec942f7a237d861a6 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 4 Mar 2019 08:43:17 -0600
Subject: [PATCH 0572/1072] [MINOR][BUILD] Remove useless add-source

## What changes were proposed in this pull request?

The source directory(`sql/hive-thriftserver/v${hive.version.short}/src/main/scala`) removed from  [SPARK-6909](https://issues.apache.org/jira/browse/SPARK-6909) and [SPARK-7850](https://issues.apache.org/jira/browse/SPARK-7850). We should also remove the `add-source`.
It seems that removed this `add-source` makes it easier to import `src/gen` source code in IDEA:
![image](https://user-images.githubusercontent.com/5399861/53715396-a967b380-3e8c-11e9-8aa1-c59d819b4c06.png)
![image](https://user-images.githubusercontent.com/5399861/53715402-acfb3a80-3e8c-11e9-8aa9-a716931160c6.png)

## How was this patch tested?

manual tests

Closes #23949 from wangyum/SPARK-7850.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 sql/hive-thriftserver/pom.xml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 4a4629fae270..8aff383d54d1 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -120,18 +120,6 @@
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>build-helper-maven-plugin</artifactId>
         <executions>
-          <execution>
-            <id>add-default-sources</id>
-            <phase>generate-sources</phase>
-            <goals>
-              <goal>add-source</goal>
-            </goals>
-            <configuration>
-              <sources>
-                <source>v${hive.version.short}/src/main/scala</source>
-              </sources>
-            </configuration>
-          </execution>
           <execution>
             <id>add-source</id>
             <phase>generate-sources</phase>

From 5252d8b9872cbf200651b0bb7b8c6edd649ebb58 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Mon, 4 Mar 2019 09:31:46 -0600
Subject: [PATCH 0573/1072] [SPARK-27046][DSTREAMS] Remove SPARK-19185 related
 references from documentation

## What changes were proposed in this pull request?

SPARK-19185 is resolved so the reference can be removed from the documentation.

## How was this patch tested?

cd docs/
SKIP_API=1 jekyll build
Manual webpage check.

Closes #23959 from gaborgsomogyi/SPARK-27046.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/streaming-kafka-0-10-integration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/streaming-kafka-0-10-integration.md b/docs/streaming-kafka-0-10-integration.md
index c78459cd27d1..975adacca2f1 100644
--- a/docs/streaming-kafka-0-10-integration.md
+++ b/docs/streaming-kafka-0-10-integration.md
@@ -96,7 +96,7 @@ In most cases, you should use `LocationStrategies.PreferConsistent` as shown abo
 
 The cache for consumers has a default maximum size of 64.  If you expect to be handling more than (64 * number of executors) Kafka partitions, you can change this setting via `spark.streaming.kafka.consumer.cache.maxCapacity`.
 
-If you would like to disable the caching for Kafka consumers, you can set `spark.streaming.kafka.consumer.cache.enabled` to `false`. Disabling the cache may be needed to workaround the problem described in SPARK-19185. This property may be removed in later versions of Spark, once SPARK-19185 is resolved.
+If you would like to disable the caching for Kafka consumers, you can set `spark.streaming.kafka.consumer.cache.enabled` to `false`.
 
 The cache is keyed by topicpartition and group.id, so use a **separate** `group.id` for each call to `createDirectStream`.
 

From f13ea15d79fb4752a0a75a05a4a89bd8625ea3d5 Mon Sep 17 00:00:00 2001
From: Luca Canali <luca.canali@cern.ch>
Date: Mon, 4 Mar 2019 09:59:12 -0800
Subject: [PATCH 0574/1072] [SPARK-26995][K8S] Make ld-linux-x86-64.so.2
 visible to snappy native library under /lib in docker image with Alpine Linux

Running Spark in Docker image with Alpine Linux 3.9.0 throws errors when using snappy.

The issue can be reproduced for example as follows: `Seq(1,2).toDF("id").write.format("parquet").save("DELETEME1")`
The key part of the error stack is as follows `SparkException: Task failed while writing rows. .... Caused by: java.lang.UnsatisfiedLinkError: /tmp/snappy-1.1.7-2b4872f1-7c41-4b84-bda1-dbcb8dd0ce4c-libsnappyjava.so: Error loading shared library ld-linux-x86-64.so.2: Noded by /tmp/snappy-1.1.7-2b4872f1-7c41-4b84-bda1-dbcb8dd0ce4c-libsnappyjava.so)`

The source of the error appears to be that libsnappyjava.so needs ld-linux-x86-64.so.2 and looks for it in /lib, while in Alpine Linux 3.9.0 with libc6-compat version 1.1.20-r3 ld-linux-x86-64.so.2 is located in /lib64.
Note: this issue is not present with Alpine Linux 3.8 and libc6-compat version 1.1.19-r10

## What changes were proposed in this pull request?

A possible workaround proposed with this PR is to modify the Dockerfile by adding a symbolic link between /lib and /lib64 so that linux-x86-64.so.2 can be found in /lib. This is probably not the cleanest solution, but I have observed that this is what happened/happens already when using Alpine Linux 3.8.1 (a version of Alpine Linux which was not affected by the issue reported here).

## How was this patch tested?

Manually tested by running a simple workload with spark-shell, using docker on a client machine and using Spark on a Kubernetes cluster. The test workload is: `Seq(1,2).toDF("id").write.format("parquet").save("DELETEME1")`

Added a test to the KubernetesSuite / BasicTestsSuite

Closes #23898 from LucaCanali/dockerfileUpdateSPARK26995.

Authored-by: Luca Canali <luca.canali@cern.ch>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../kubernetes/docker/src/main/dockerfiles/spark/Dockerfile    | 1 +
 .../spark/deploy/k8s/integrationtest/BasicTestsSuite.scala     | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 084304032470..1d8ac3ce89c7 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -28,6 +28,7 @@ ARG spark_uid=185
 
 RUN set -ex && \
     apk upgrade --no-cache && \
+    ln -s /lib /lib64 && \
     apk add --no-cache bash tini libc6-compat linux-pam krb5 krb5-libs && \
     mkdir -p /opt/spark && \
     mkdir -p /opt/spark/examples && \
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
index 4e749c40563d..3c1d9ea54ab2 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
@@ -52,6 +52,9 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
   }
 
   test("Run SparkPi with an argument.", k8sTestTag) {
+    // This additional configuration with snappy is for SPARK-26995
+    sparkAppConf
+      .set("spark.io.compression.codec", "snappy")
     runSparkPiAndVerifyCompletion(appArgs = Array("5"))
   }
 

From d5bda2c9e8dde6afc075cc7f65b15fa9aa82231c Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Mon, 4 Mar 2019 10:36:04 -0800
Subject: [PATCH 0575/1072] [SPARK-26792][CORE] Apply custom log URL to Spark
 UI

## What changes were proposed in this pull request?

[SPARK-23155](https://issues.apache.org/jira/browse/SPARK-23155) enables SHS to set up custom executor log URLs. This patch proposes to extend this feature to to Spark UI as well.

Unlike the approach we did for SHS (replace executor log URLs when executor information is requested so it's like a change of view), here this patch replaces executor log URLs while registering executor, which also affects event log as well. In point of SHS's view, it will be treated as original log url when custom log url is applied to Spark UI.

## How was this patch tested?

Added UT.

Closes #23790 from HeartSaVioR/SPARK-26792.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../history/HistoryAppStatusStore.scala       | 61 +-----------
 .../executor/ExecutorLogUrlHandler.scala      | 93 +++++++++++++++++++
 .../org/apache/spark/internal/config/UI.scala | 11 +++
 .../CoarseGrainedSchedulerBackend.scala       |  6 +-
 .../CoarseGrainedSchedulerBackendSuite.scala  | 56 ++++++++++-
 docs/configuration.md                         | 15 +++
 6 files changed, 182 insertions(+), 60 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/executor/ExecutorLogUrlHandler.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
index 751382c9a2e5..73b2dc26a85d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
@@ -17,11 +17,8 @@
 
 package org.apache.spark.deploy.history
 
-import java.util.concurrent.atomic.AtomicBoolean
-
-import scala.util.matching.Regex
-
 import org.apache.spark.SparkConf
+import org.apache.spark.executor.ExecutorLogUrlHandler
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.History._
 import org.apache.spark.status.AppStatusStore
@@ -33,8 +30,6 @@ private[spark] class HistoryAppStatusStore(
     store: KVStore)
   extends AppStatusStore(store, None) with Logging {
 
-  import HistoryAppStatusStore._
-
   private val logUrlPattern: Option[String] = {
     val appInfo = super.applicationInfo()
     val applicationCompleted = appInfo.attempts.nonEmpty && appInfo.attempts.head.completed
@@ -45,7 +40,7 @@ private[spark] class HistoryAppStatusStore(
     }
   }
 
-  private val informedForMissingAttributes = new AtomicBoolean(false)
+  private val logUrlHandler = new ExecutorLogUrlHandler(logUrlPattern)
 
   override def executorList(activeOnly: Boolean): Seq[v1.ExecutorSummary] = {
     val execList = super.executorList(activeOnly)
@@ -64,56 +59,10 @@ private[spark] class HistoryAppStatusStore(
   }
 
   private def replaceLogUrls(exec: v1.ExecutorSummary, urlPattern: String): v1.ExecutorSummary = {
-    val attributes = exec.attributes
-
-    // Relation between pattern {{FILE_NAME}} and attribute {{LOG_FILES}}
-    // Given that HistoryAppStatusStore don't know which types of log files can be provided
-    // from resource manager, we require resource manager to provide available types of log
-    // files, which are encouraged to be same as types of log files provided in original log URLs.
-    // Once we get the list of log files, we need to expose them to end users as a pattern
-    // so that end users can compose custom log URL(s) including log file name(s).
-    val allPatterns = CUSTOM_URL_PATTERN_REGEX.findAllMatchIn(urlPattern).map(_.group(1)).toSet
-    val allPatternsExceptFileName = allPatterns.filter(_ != "FILE_NAME")
-    val allAttributeKeys = attributes.keySet
-    val allAttributeKeysExceptLogFiles = allAttributeKeys.filter(_ != "LOG_FILES")
-
-    if (allPatternsExceptFileName.diff(allAttributeKeysExceptLogFiles).nonEmpty) {
-      logFailToRenewLogUrls("some of required attributes are missing in app's event log.",
-        allPatternsExceptFileName, allAttributeKeys)
-      return exec
-    } else if (allPatterns.contains("FILE_NAME") && !allAttributeKeys.contains("LOG_FILES")) {
-      logFailToRenewLogUrls("'FILE_NAME' parameter is provided, but file information is " +
-        "missing in app's event log.", allPatternsExceptFileName, allAttributeKeys)
-      return exec
-    }
-
-    val updatedUrl = allPatternsExceptFileName.foldLeft(urlPattern) { case (orig, patt) =>
-      // we already checked the existence of attribute when comparing keys
-      orig.replace(s"{{$patt}}", attributes(patt))
-    }
-
-    val newLogUrlMap = if (allPatterns.contains("FILE_NAME")) {
-      // allAttributeKeys should contain "LOG_FILES"
-      attributes("LOG_FILES").split(",").map { file =>
-        file -> updatedUrl.replace("{{FILE_NAME}}", file)
-      }.toMap
-    } else {
-      Map("log" -> updatedUrl)
-    }
-
+    val newLogUrlMap = logUrlHandler.applyPattern(exec.executorLogs, exec.attributes)
     replaceExecutorLogs(exec, newLogUrlMap)
   }
 
-  private def logFailToRenewLogUrls(
-      reason: String,
-      allPatterns: Set[String],
-      allAttributes: Set[String]): Unit = {
-    if (informedForMissingAttributes.compareAndSet(false, true)) {
-      logInfo(s"Fail to renew executor log urls: $reason. Required: $allPatterns / " +
-        s"available: $allAttributes. Falling back to show app's original log urls.")
-    }
-  }
-
   private def replaceExecutorLogs(
       source: v1.ExecutorSummary,
       newExecutorLogs: Map[String, String]): v1.ExecutorSummary = {
@@ -127,7 +76,3 @@ private[spark] class HistoryAppStatusStore(
   }
 
 }
-
-private[spark] object HistoryAppStatusStore {
-  val CUSTOM_URL_PATTERN_REGEX: Regex = "\\{\\{([A-Za-z0-9_\\-]+)\\}\\}".r
-}
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorLogUrlHandler.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorLogUrlHandler.scala
new file mode 100644
index 000000000000..0ddeef8e9a82
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorLogUrlHandler.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.executor
+
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.util.matching.Regex
+
+import org.apache.spark.internal.Logging
+
+private[spark] class ExecutorLogUrlHandler(logUrlPattern: Option[String]) extends Logging {
+  import ExecutorLogUrlHandler._
+
+  private val informedForMissingAttributes = new AtomicBoolean(false)
+
+  def applyPattern(
+      logUrls: Map[String, String],
+      attributes: Map[String, String]): Map[String, String] = {
+    logUrlPattern match {
+      case Some(pattern) => doApplyPattern(logUrls, attributes, pattern)
+      case None => logUrls
+    }
+  }
+
+  private def doApplyPattern(
+      logUrls: Map[String, String],
+      attributes: Map[String, String],
+      urlPattern: String): Map[String, String] = {
+    // Relation between pattern {{FILE_NAME}} and attribute {{LOG_FILES}}
+    // Given that this class don't know which types of log files can be provided
+    // from resource manager, we require resource manager to provide available types of log
+    // files, which are encouraged to be same as types of log files provided in original log URLs.
+    // Once we get the list of log files, we need to expose them to end users as a pattern
+    // so that end users can compose custom log URL(s) including log file name(s).
+    val allPatterns = CUSTOM_URL_PATTERN_REGEX.findAllMatchIn(urlPattern).map(_.group(1)).toSet
+    val allPatternsExceptFileName = allPatterns.filter(_ != "FILE_NAME")
+    val allAttributeKeys = attributes.keySet
+    val allAttributeKeysExceptLogFiles = allAttributeKeys.filter(_ != "LOG_FILES")
+
+    if (allPatternsExceptFileName.diff(allAttributeKeysExceptLogFiles).nonEmpty) {
+      logFailToRenewLogUrls("some of required attributes are missing in app's event log.",
+        allPatternsExceptFileName, allAttributeKeys)
+      logUrls
+    } else if (allPatterns.contains("FILE_NAME") && !allAttributeKeys.contains("LOG_FILES")) {
+      logFailToRenewLogUrls("'FILE_NAME' parameter is provided, but file information is " +
+        "missing in app's event log.", allPatternsExceptFileName, allAttributeKeys)
+      logUrls
+    } else {
+      val updatedUrl = allPatternsExceptFileName.foldLeft(urlPattern) { case (orig, patt) =>
+        // we already checked the existence of attribute when comparing keys
+        orig.replace(s"{{$patt}}", attributes(patt))
+      }
+
+      if (allPatterns.contains("FILE_NAME")) {
+        // allAttributeKeys should contain "LOG_FILES"
+        attributes("LOG_FILES").split(",").map { file =>
+          file -> updatedUrl.replace("{{FILE_NAME}}", file)
+        }.toMap
+      } else {
+        Map("log" -> updatedUrl)
+      }
+    }
+  }
+
+  private def logFailToRenewLogUrls(
+      reason: String,
+      allPatterns: Set[String],
+      allAttributes: Set[String]): Unit = {
+    if (informedForMissingAttributes.compareAndSet(false, true)) {
+      logInfo(s"Fail to renew executor log urls: $reason. Required: $allPatterns / " +
+        s"available: $allAttributes. Falling back to show app's original log urls.")
+    }
+  }
+}
+
+private[spark] object ExecutorLogUrlHandler {
+  val CUSTOM_URL_PATTERN_REGEX: Regex = "\\{\\{([A-Za-z0-9_\\-]+)\\}\\}".r
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
index 6c04f0dd2bbb..a11970ec73d8 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
@@ -142,4 +142,15 @@ private[spark] object UI {
   val USER_GROUPS_MAPPING = ConfigBuilder("spark.user.groups.mapping")
     .stringConf
     .createWithDefault("org.apache.spark.security.ShellBasedGroupsMappingProvider")
+
+  val CUSTOM_EXECUTOR_LOG_URL = ConfigBuilder("spark.ui.custom.executor.log.url")
+    .doc("Specifies custom spark executor log url for supporting external log service instead of " +
+      "using cluster managers' application log urls in the Spark UI. Spark will support " +
+      "some path variables via patterns which can vary on cluster manager. Please check the " +
+      "documentation for your cluster manager to see which patterns are supported, if any. " +
+      "This configuration replaces original log urls in event log, which will be also effective " +
+      "when accessing the application on history server. The new log urls must be permanent, " +
+      "otherwise you might have dead link for executor log urls.")
+    .stringConf
+    .createOptional
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 6bba157c8c14..6e8aa47fc8bf 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -29,6 +29,7 @@ import org.apache.hadoop.security.UserGroupInformation
 import org.apache.spark.{ExecutorAllocationClient, SparkEnv, SparkException, TaskState}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
+import org.apache.spark.executor.ExecutorLogUrlHandler
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Network._
@@ -125,6 +126,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       .filter { case (k, _) => k.startsWith("spark.") }
       .toSeq
 
+    private val logUrlHandler: ExecutorLogUrlHandler = new ExecutorLogUrlHandler(
+      conf.get(UI.CUSTOM_EXECUTOR_LOG_URL))
+
     override def onStart() {
       // Periodically revive offers to allow delay scheduling to work
       val reviveIntervalMs = conf.get(SCHEDULER_REVIVE_INTERVAL).getOrElse(1000L)
@@ -207,7 +211,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
           val data = new ExecutorData(executorRef, executorAddress, hostname,
-            cores, cores, logUrls, attributes)
+            cores, cores, logUrlHandler.applyPattern(logUrls, attributes), attributes)
           // This must be synchronized because variables mutated
           // in this block are read when requesting executors
           CoarseGrainedSchedulerBackend.this.synchronized {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 480e861d33f8..ae306d39a5a9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -22,11 +22,15 @@ import java.util.concurrent.atomic.AtomicBoolean
 import scala.concurrent.duration._
 
 import org.scalatest.concurrent.Eventually
+import org.scalatest.mockito.MockitoSugar._
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite}
-import org.apache.spark.internal.config.CPUS_PER_TASK
+import org.apache.spark.internal.config.{CPUS_PER_TASK, UI}
 import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 import org.apache.spark.rdd.RDD
+import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef}
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RegisterExecutor
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.{RpcUtils, SerializableBuffer}
 
 class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkContext
@@ -120,6 +124,56 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     }
   }
 
+  // Here we just have test for one happy case instead of all cases: other cases are covered in
+  // FsHistoryProviderSuite.
+  test("custom log url for Spark UI is applied") {
+    val customExecutorLogUrl = "http://newhost:9999/logs/clusters/{{CLUSTER_ID}}/users/{{USER}}" +
+      "/containers/{{CONTAINER_ID}}/{{FILE_NAME}}"
+
+    val conf = new SparkConf()
+      .set(UI.CUSTOM_EXECUTOR_LOG_URL, customExecutorLogUrl)
+      .setMaster("local-cluster[0, 3, 1024]")
+      .setAppName("test")
+
+    sc = new SparkContext(conf)
+    val backend = sc.schedulerBackend.asInstanceOf[CoarseGrainedSchedulerBackend]
+    val mockEndpointRef = mock[RpcEndpointRef]
+    val mockAddress = mock[RpcAddress]
+
+    val logUrls = Map(
+      "stdout" -> "http://oldhost:8888/logs/dummy/stdout",
+      "stderr" -> "http://oldhost:8888/logs/dummy/stderr")
+    val attributes = Map(
+      "CLUSTER_ID" -> "cl1",
+      "USER" -> "dummy",
+      "CONTAINER_ID" -> "container1",
+      "LOG_FILES" -> "stdout,stderr")
+    val baseUrl = s"http://newhost:9999/logs/clusters/${attributes("CLUSTER_ID")}" +
+      s"/users/${attributes("USER")}/containers/${attributes("CONTAINER_ID")}"
+
+    var executorAddedCount: Int = 0
+    val listener = new SparkListener() {
+      override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+        executorAddedCount += 1
+        assert(executorAdded.executorInfo.logUrlMap === Seq("stdout", "stderr").map { file =>
+          file -> (baseUrl + s"/$file")
+        }.toMap)
+      }
+    }
+
+    sc.addSparkListener(listener)
+
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, logUrls, attributes))
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, logUrls, attributes))
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, logUrls, attributes))
+
+    sc.listenerBus.waitUntilEmpty(executorUpTimeout.toMillis)
+    assert(executorAddedCount === 3)
+  }
+
   private def testSubmitJob(sc: SparkContext, rdd: RDD[Int]): Unit = {
     sc.submitJob(
       rdd,
diff --git a/docs/configuration.md b/docs/configuration.md
index 5b5891ee524f..f23dc7c611ae 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -913,6 +913,21 @@ Apart from these, the following properties are also available, and may be useful
     progress bars will be displayed on the same line.
   </td>
 </tr>
+<tr>
+  <td><code>spark.ui.custom.executor.log.url</code></td>
+  <td>(none)</td>
+  <td>
+    Specifies custom spark executor log URL for supporting external log service instead of using cluster
+    managers' application log URLs in Spark UI. Spark will support some path variables via patterns
+    which can vary on cluster manager. Please check the documentation for your cluster manager to
+    see which patterns are supported, if any. <p/>
+    Please note that this configuration also replaces original log urls in event log,
+    which will be also effective when accessing the application on history server. The new log urls must be
+    permanent, otherwise you might have dead link for executor log urls.
+    <p/>
+    For now, only YARN mode supports this configuration
+  </td>
+</tr>
 <tr>
   <td><code>spark.worker.ui.retainedExecutors</code></td>
   <td>1000</td>

From 5fd4d7499c9f2925268d84b5d74ecafaebe2113d Mon Sep 17 00:00:00 2001
From: mwlon <mloncaric@hmc.edu>
Date: Mon, 4 Mar 2019 12:10:48 -0800
Subject: [PATCH 0576/1072] [SPARK-26192][MESOS] Retrieve enableFetcherCache
 option from submission for driver URIs

## What changes were proposed in this pull request?

Retrieve enableFetcherCache option from submission conf rather than dispatcher conf. This resolves some confusing behavior where Spark drivers currently get this conf from the dispatcher, whereas Spark executors get this conf from the submission. After this change, the conf will only need to be specified once.

## How was this patch tested?

With (updated) existing tests.

Closes #23924 from mwlon/SPARK-26192.

Authored-by: mwlon <mloncaric@hmc.edu>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../cluster/mesos/MesosClusterScheduler.scala |  3 +-
 .../mesos/MesosClusterSchedulerSuite.scala    | 28 ++++++++++++++++++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index cd5bd534c33c..a5277836a7ef 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -131,7 +131,6 @@ private[spark] class MesosClusterScheduler(
   private val queuedCapacity = conf.get(config.MAX_DRIVERS)
   private val retainedDrivers = conf.get(config.RETAINED_DRIVERS)
   private val maxRetryWaitTime = conf.get(config.CLUSTER_RETRY_WAIT_MAX_SECONDS)
-  private val useFetchCache = conf.get(config.ENABLE_FETCHER_CACHE)
   private val schedulerState = engineFactory.createEngine("scheduler")
   private val stateLock = new Object()
   // Keyed by submission id
@@ -428,6 +427,8 @@ private[spark] class MesosClusterScheduler(
   }
 
   private def getDriverUris(desc: MesosDriverDescription): List[CommandInfo.URI] = {
+    val useFetchCache = desc.conf.get(config.ENABLE_FETCHER_CACHE) ||
+        conf.get(config.ENABLE_FETCHER_CACHE)
     val confUris = (conf.get(config.URIS_TO_DOWNLOAD) ++
       desc.conf.get(config.URIS_TO_DOWNLOAD) ++
       desc.conf.get(SUBMIT_PYTHON_FILES)).toList
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index f26ff04a9a89..81b5250b7234 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -256,6 +256,31 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
   }
 
   test("supports setting fetcher cache") {
+    setScheduler()
+
+    val mem = 1000
+    val cpu = 1
+
+    val response = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", mem, cpu, true,
+        command,
+        Map(config.EXECUTOR_HOME.key -> "test",
+          config.ENABLE_FETCHER_CACHE.key -> "true",
+          "spark.app.name" -> "test"),
+        "s1",
+        new Date()))
+
+    assert(response.success)
+
+    val offer = Utils.createOffer("o1", "s1", mem, cpu)
+    scheduler.resourceOffers(driver, List(offer).asJava)
+
+    val launchedTasks = Utils.verifyTaskLaunched(driver, "o1")
+    val uris = launchedTasks.head.getCommand.getUrisList
+    assert(uris.asScala.forall(_.getCache))
+  }
+
+  test("supports setting fetcher cache on the dispatcher") {
     setScheduler(Map(config.ENABLE_FETCHER_CACHE.key -> "true"))
 
     val mem = 1000
@@ -280,7 +305,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
   }
 
   test("supports disabling fetcher cache") {
-    setScheduler(Map(config.ENABLE_FETCHER_CACHE.key -> "false"))
+    setScheduler()
 
     val mem = 1000
     val cpu = 1
@@ -289,6 +314,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       new MesosDriverDescription("d1", "jar", mem, cpu, true,
         command,
         Map(config.EXECUTOR_HOME.key -> "test",
+          config.ENABLE_FETCHER_CACHE.key -> "false",
           "spark.app.name" -> "test"),
         "s1",
         new Date()))

From caceaec93203edaea1d521b88e82ef67094cdea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Mon, 4 Mar 2019 14:14:20 -0600
Subject: [PATCH 0577/1072] [SPARK-26688][YARN] Provide configuration of
 initially blacklisted YARN nodes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Introducing new config for initially blacklisted YARN nodes.

## How was this patch tested?

With existing and a new unit test.

Closes #23616 from attilapiros/SPARK-26688.

Lead-authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Co-authored-by: Attila Zsolt Piros <2017933+attilapiros@users.noreply.github.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 docs/running-on-yarn.md                       |  7 ++++
 .../yarn/YarnAllocatorBlacklistTracker.scala  |  4 ++-
 .../org/apache/spark/deploy/yarn/config.scala |  6 ++++
 .../YarnAllocatorBlacklistTrackerSuite.scala  | 33 ++++++++++++++-----
 4 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 6ee4b3d4103b..261f7e31eaa6 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -462,6 +462,13 @@ To use a custom metrics.properties for the application master and executors, upd
   <code>spark.blacklist.application.maxFailedExecutorsPerNode</code>.
   </td>
 </tr>
+<tr>
+  <td><code>spark.yarn.exclude.nodes</code></td>
+  <td>(none)</td>
+  <td>
+  Comma-separated list of YARN node names which are excluded from resource allocation.
+  </td>
+</tr>
 <tr>
   <td><code>spark.yarn.metrics.namespace</code></td>
   <td>(none)</td>
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala
index 268976b62950..fa8c9610220c 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala
@@ -56,6 +56,8 @@ private[spark] class YarnAllocatorBlacklistTracker(
 
   private val maxFailuresPerHost = sparkConf.get(MAX_FAILED_EXEC_PER_NODE)
 
+  private val excludeNodes = sparkConf.get(YARN_EXCLUDE_NODES).toSet
+
   private val allocatorBlacklist = new HashMap[String, Long]()
 
   private var currentBlacklistedYarnNodes = Set.empty[String]
@@ -105,7 +107,7 @@ private[spark] class YarnAllocatorBlacklistTracker(
 
   private def refreshBlacklistedNodes(): Unit = {
     removeExpiredYarnBlacklistedNodes()
-    val allBlacklistedNodes = schedulerBlacklist ++ allocatorBlacklist.keySet
+    val allBlacklistedNodes = excludeNodes ++ schedulerBlacklist ++ allocatorBlacklist.keySet
     synchronizeBlacklistedNodeWithYarn(allBlacklistedNodes)
   }
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index 5f8739b4097a..4c187b2cc68e 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -310,6 +310,12 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  /* Initially blacklisted YARN nodes. */
+  private[spark] val YARN_EXCLUDE_NODES = ConfigBuilder("spark.yarn.exclude.nodes")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
   private[yarn] val YARN_EXECUTOR_RESOURCE_TYPES_PREFIX = "spark.yarn.executor.resource."
   private[yarn] val YARN_DRIVER_RESOURCE_TYPES_PREFIX = "spark.yarn.driver.resource."
   private[yarn] val YARN_AM_RESOURCE_TYPES_PREFIX = "spark.yarn.am.resource."
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
index 259d7580df8d..c07a4ac76b98 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
@@ -25,7 +25,7 @@ import org.mockito.Mockito._
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.yarn.config.YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED
+import org.apache.spark.deploy.yarn.config.{YARN_EXCLUDE_NODES, YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED}
 import org.apache.spark.internal.config.{BLACKLIST_TIMEOUT_CONF, MAX_FAILED_EXEC_PER_NODE}
 import org.apache.spark.util.ManualClock
 
@@ -35,27 +35,31 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
   val BLACKLIST_TIMEOUT = 100L
   val MAX_FAILED_EXEC_PER_NODE_VALUE = 2
 
+  var sparkConf: SparkConf = _
   var amClientMock: AMRMClient[ContainerRequest] = _
-  var yarnBlacklistTracker: YarnAllocatorBlacklistTracker = _
-  var failureTracker: FailureTracker = _
   var clock: ManualClock = _
 
   override def beforeEach(): Unit = {
-    val sparkConf = new SparkConf()
+    sparkConf = new SparkConf()
     sparkConf.set(BLACKLIST_TIMEOUT_CONF, BLACKLIST_TIMEOUT)
     sparkConf.set(YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED, true)
     sparkConf.set(MAX_FAILED_EXEC_PER_NODE, MAX_FAILED_EXEC_PER_NODE_VALUE)
     clock = new ManualClock()
-
     amClientMock = mock(classOf[AMRMClient[ContainerRequest]])
-    failureTracker = new FailureTracker(sparkConf, clock)
-    yarnBlacklistTracker =
+    super.beforeEach()
+  }
+
+  private def createYarnAllocatorBlacklistTracker(
+      sparkConf: SparkConf = sparkConf): YarnAllocatorBlacklistTracker = {
+    val failureTracker = new FailureTracker(sparkConf, clock)
+    val yarnBlacklistTracker =
       new YarnAllocatorBlacklistTracker(sparkConf, amClientMock, failureTracker)
     yarnBlacklistTracker.setNumClusterNodes(4)
-    super.beforeEach()
+    yarnBlacklistTracker
   }
 
   test("expiring its own blacklisted nodes") {
+    val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker()
     (1 to MAX_FAILED_EXEC_PER_NODE_VALUE).foreach {
       _ => {
         yarnBlacklistTracker.handleResourceAllocationFailure(Some("host"))
@@ -77,6 +81,8 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
   }
 
   test("not handling the expiry of scheduler blacklisted nodes") {
+    val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker()
+
     yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2"))
     verify(amClientMock)
       .updateBlacklist(Arrays.asList("host1", "host2"), Collections.emptyList())
@@ -91,6 +97,15 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
   }
 
   test("combining scheduler and allocation blacklist") {
+    sparkConf.set(YARN_EXCLUDE_NODES, Seq("initial1", "initial2"))
+    val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker(sparkConf)
+    yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set())
+
+    // initial1 and initial2 is added as blacklisted nodes at the very first updateBlacklist call
+    // and they are never removed
+    verify(amClientMock)
+      .updateBlacklist(Arrays.asList("initial1", "initial2"), Collections.emptyList())
+
     (1 to MAX_FAILED_EXEC_PER_NODE_VALUE).foreach {
       _ => {
         yarnBlacklistTracker.handleResourceAllocationFailure(Some("host1"))
@@ -117,6 +132,7 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
   }
 
   test("blacklist all available nodes") {
+    val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker()
     yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2", "host3"))
     verify(amClientMock)
       .updateBlacklist(Arrays.asList("host1", "host2", "host3"), Collections.emptyList())
@@ -137,4 +153,5 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
     verify(amClientMock).updateBlacklist(Arrays.asList("host4"), Collections.emptyList())
     assert(yarnBlacklistTracker.isAllNodeBlacklisted)
   }
+
 }

From e5c502c596563dce8eb58f86e42c1aea2c51ed17 Mon Sep 17 00:00:00 2001
From: LantaoJin <jinlantao@gmail.com>
Date: Mon, 4 Mar 2019 14:26:02 -0600
Subject: [PATCH 0578/1072] [SPARK-25865][CORE] Add GC information to
 ExecutorMetrics

## What changes were proposed in this pull request?

Only memory usage without GC information could not help us to determinate the proper settings of memory. We need the GC metrics about frequency of major & minor GC. For example, two cases, their configured memory for executor are all 10GB and their usages are all near 10GB. So should we increase or decrease the configured memory for them? This metrics may be helpful. We can increase configured memory for the first one if it has very frequency major GC and decrease the second one if only some minor GC and none major GC.
GC metrics are only useful in entire lifetime of executors instead of separated stages.

## How was this patch tested?

Adding UT.

Closes #22874 from LantaoJin/SPARK-25865.

Authored-by: LantaoJin <jinlantao@gmail.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../spark/internal/config/package.scala       |  19 ++
 .../spark/metrics/ExecutorMetricType.scala    |  63 +++++-
 .../application_list_json_expectation.json    |  18 ++
 .../completed_app_list_json_expectation.json  |  18 ++
 ...e_collection_metrics_json_expectation.json | 122 ++++++++++
 ...ith_executor_metrics_json_expectation.json |  30 ++-
 ...process_tree_metrics_json_expectation.json |  12 +-
 .../limit_app_list_json_expectation.json      |  33 +--
 .../minDate_app_list_json_expectation.json    |  18 ++
 .../minEndDate_app_list_json_expectation.json |  18 ++
 .../application_1536831636016_59384_1         | 214 ++++++++++++++++++
 .../deploy/history/HistoryServerSuite.scala   |   2 +
 .../scheduler/EventLoggingListenerSuite.scala |  36 +--
 .../apache/spark/util/JsonProtocolSuite.scala |  18 +-
 dev/.rat-excludes                             |   1 +
 15 files changed, 576 insertions(+), 46 deletions(-)
 create mode 100644 core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_garbage_collection_metrics_json_expectation.json
 create mode 100755 core/src/test/resources/spark-events/application_1536831636016_59384_1

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 3ef6cba8193f..d6a359db66f4 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -20,6 +20,7 @@ package org.apache.spark.internal
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.launcher.SparkLauncher
+import org.apache.spark.metrics.GarbageCollectionMetrics
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.{EventLoggingListener, SchedulingMode}
 import org.apache.spark.storage.{DefaultTopologyMapper, RandomBlockReplicationPolicy}
@@ -114,6 +115,24 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val EVENT_LOG_GC_METRICS_YOUNG_GENERATION_GARBAGE_COLLECTORS =
+    ConfigBuilder("spark.eventLog.gcMetrics.youngGenerationGarbageCollectors")
+      .doc("Names of supported young generation garbage collector. A name usually is " +
+        " the return of GarbageCollectorMXBean.getName. The built-in young generation garbage " +
+        s"collectors are ${GarbageCollectionMetrics.YOUNG_GENERATION_BUILTIN_GARBAGE_COLLECTORS}")
+      .stringConf
+      .toSequence
+      .createWithDefault(GarbageCollectionMetrics.YOUNG_GENERATION_BUILTIN_GARBAGE_COLLECTORS)
+
+  private[spark] val EVENT_LOG_GC_METRICS_OLD_GENERATION_GARBAGE_COLLECTORS =
+    ConfigBuilder("spark.eventLog.gcMetrics.oldGenerationGarbageCollectors")
+      .doc("Names of supported old generation garbage collector. A name usually is " +
+        "the return of GarbageCollectorMXBean.getName. The built-in old generation garbage " +
+        s"collectors are ${GarbageCollectionMetrics.OLD_GENERATION_BUILTIN_GARBAGE_COLLECTORS}")
+      .stringConf
+      .toSequence
+      .createWithDefault(GarbageCollectionMetrics.OLD_GENERATION_BUILTIN_GARBAGE_COLLECTORS)
+
   private[spark] val EVENT_LOG_OVERWRITE =
     ConfigBuilder("spark.eventLog.overwrite").booleanConf.createWithDefault(false)
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
index 704b36d3118b..6d8e9a8af7b3 100644
--- a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
@@ -19,9 +19,12 @@ package org.apache.spark.metrics
 import java.lang.management.{BufferPoolMXBean, ManagementFactory}
 import javax.management.ObjectName
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 
+import org.apache.spark.SparkEnv
 import org.apache.spark.executor.ProcfsMetricsGetter
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.memory.MemoryManager
 
 /**
@@ -99,6 +102,63 @@ case object ProcessTreeMetrics extends ExecutorMetricType {
   }
 }
 
+case object GarbageCollectionMetrics extends ExecutorMetricType with Logging {
+  private var nonBuiltInCollectors: Seq[String] = Nil
+
+  override val names = Seq(
+    "MinorGCCount",
+    "MinorGCTime",
+    "MajorGCCount",
+    "MajorGCTime"
+  )
+
+  /* We builtin some common GC collectors which categorized as young generation and old */
+  private[spark] val YOUNG_GENERATION_BUILTIN_GARBAGE_COLLECTORS = Seq(
+    "Copy",
+    "PS Scavenge",
+    "ParNew",
+    "G1 Young Generation"
+  )
+
+  private[spark] val OLD_GENERATION_BUILTIN_GARBAGE_COLLECTORS = Seq(
+    "MarkSweepCompact",
+    "PS MarkSweep",
+    "ConcurrentMarkSweep",
+    "G1 Old Generation"
+  )
+
+  private lazy val youngGenerationGarbageCollector: Seq[String] = {
+    SparkEnv.get.conf.get(config.EVENT_LOG_GC_METRICS_YOUNG_GENERATION_GARBAGE_COLLECTORS)
+  }
+
+  private lazy val oldGenerationGarbageCollector: Seq[String] = {
+    SparkEnv.get.conf.get(config.EVENT_LOG_GC_METRICS_OLD_GENERATION_GARBAGE_COLLECTORS)
+  }
+
+  override private[spark] def getMetricValues(memoryManager: MemoryManager): Array[Long] = {
+    val gcMetrics = new Array[Long](names.length) // minorCount, minorTime, majorCount, majorTime
+    ManagementFactory.getGarbageCollectorMXBeans.asScala.foreach { mxBean =>
+      if (youngGenerationGarbageCollector.contains(mxBean.getName)) {
+        gcMetrics(0) = mxBean.getCollectionCount
+        gcMetrics(1) = mxBean.getCollectionTime
+      } else if (oldGenerationGarbageCollector.contains(mxBean.getName)) {
+        gcMetrics(2) = mxBean.getCollectionCount
+        gcMetrics(3) = mxBean.getCollectionTime
+      } else if (!nonBuiltInCollectors.contains(mxBean.getName)) {
+        nonBuiltInCollectors = mxBean.getName +: nonBuiltInCollectors
+        // log it when first seen
+        logWarning(s"To enable non-built-in garbage collector(s) " +
+          s"$nonBuiltInCollectors, users should configure it(them) to " +
+          s"${config.EVENT_LOG_GC_METRICS_YOUNG_GENERATION_GARBAGE_COLLECTORS.key} or " +
+          s"${config.EVENT_LOG_GC_METRICS_OLD_GENERATION_GARBAGE_COLLECTORS.key}")
+      } else {
+        // do nothing
+      }
+    }
+    gcMetrics
+  }
+}
+
 case object OnHeapExecutionMemory extends MemoryManagerExecutorMetricType(
   _.onHeapExecutionMemoryUsed)
 
@@ -137,7 +197,8 @@ private[spark] object ExecutorMetricType {
     OffHeapUnifiedMemory,
     DirectPoolMemory,
     MappedPoolMemory,
-    ProcessTreeMetrics
+    ProcessTreeMetrics,
+    GarbageCollectionMetrics
   )
 
 
diff --git a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
index 0f0ccf9858a3..71ccedced788 100644
--- a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
@@ -1,4 +1,22 @@
 [ {
+  "id": "application_1536831636016_59384",
+  "name": "Spark Pi",
+  "attempts": [
+    {
+      "attemptId": "1",
+      "startTime": "2019-01-08T04:33:43.607GMT",
+      "endTime": "2019-01-08T04:33:58.745GMT",
+      "lastUpdated": "",
+      "duration": 15138,
+      "sparkUser": "lajin",
+      "completed": true,
+      "appSparkVersion": "3.0.0-SNAPSHOT",
+      "lastUpdatedEpoch": 0,
+      "startTimeEpoch": 1546922023607,
+      "endTimeEpoch": 1546922038745
+    }
+  ]
+}, {
   "id" : "application_1538416563558_0014",
   "name" : "PythonBisectingKMeansExample",
   "attempts" : [ {
diff --git a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
index e136a35a1e3a..ad5f0ea9699d 100644
--- a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
@@ -1,4 +1,22 @@
 [ {
+  "id": "application_1536831636016_59384",
+  "name": "Spark Pi",
+  "attempts": [
+    {
+      "attemptId": "1",
+      "startTime": "2019-01-08T04:33:43.607GMT",
+      "endTime": "2019-01-08T04:33:58.745GMT",
+      "lastUpdated": "",
+      "duration": 15138,
+      "sparkUser": "lajin",
+      "completed": true,
+      "appSparkVersion": "3.0.0-SNAPSHOT",
+      "lastUpdatedEpoch": 0,
+      "startTimeEpoch": 1546922023607,
+      "endTimeEpoch": 1546922038745
+    }
+  ]
+}, {
   "id" : "application_1538416563558_0014",
   "name" : "PythonBisectingKMeansExample",
   "attempts" : [ {
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_garbage_collection_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_garbage_collection_metrics_json_expectation.json
new file mode 100644
index 000000000000..f0f39e60bdd5
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_garbage_collection_metrics_json_expectation.json
@@ -0,0 +1,122 @@
+[ {
+  "id" : "driver",
+  "hostPort" : "047.company.com:42509",
+  "isActive" : true,
+  "rddBlocks" : 0,
+  "memoryUsed" : 0,
+  "diskUsed" : 0,
+  "totalCores" : 0,
+  "maxTasks" : 0,
+  "activeTasks" : 0,
+  "failedTasks" : 0,
+  "completedTasks" : 0,
+  "totalTasks" : 0,
+  "totalDuration" : 0,
+  "totalGCTime" : 0,
+  "totalInputBytes" : 0,
+  "totalShuffleRead" : 0,
+  "totalShuffleWrite" : 0,
+  "isBlacklisted" : false,
+  "maxMemory" : 100977868,
+  "addTime" : "2019-01-08T04:33:44.502GMT",
+  "executorLogs" : {
+    "stdout" : "https://047.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000001/lajin/stdout?start=-4096",
+    "stderr" : "https://047.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000001/lajin/stderr?start=-4096"
+  },
+  "memoryMetrics" : {
+    "usedOnHeapStorageMemory" : 0,
+    "usedOffHeapStorageMemory" : 0,
+    "totalOnHeapStorageMemory" : 100977868,
+    "totalOffHeapStorageMemory" : 0
+  },
+  "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 211171816,
+    "JVMOffHeapMemory" : 90237256,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 4876,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 4876,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 806275,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 2646888448,
+    "ProcessTreeJVMRSSMemory" : 520900608,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 8,
+    "MinorGCTime" : 374,
+    "MajorGCCount" : 3,
+    "MajorGCTime" : 170
+  },
+  "attributes" : { }
+}, {
+  "id" : "2",
+  "hostPort" : "028.company.com:46325",
+  "isActive" : true,
+  "rddBlocks" : 0,
+  "memoryUsed" : 0,
+  "diskUsed" : 0,
+  "totalCores" : 4,
+  "maxTasks" : 4,
+  "activeTasks" : 0,
+  "failedTasks" : 0,
+  "completedTasks" : 52,
+  "totalTasks" : 52,
+  "totalDuration" : 8879,
+  "totalGCTime" : 420,
+  "totalInputBytes" : 0,
+  "totalShuffleRead" : 0,
+  "totalShuffleWrite" : 0,
+  "isBlacklisted" : false,
+  "maxMemory" : 97832140,
+  "addTime" : "2019-01-08T04:33:54.270GMT",
+  "executorLogs" : {
+    "stdout" : "https://028.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000003/lajin/stdout?start=-4096",
+    "stderr" : "https://028.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000003/lajin/stderr?start=-4096"
+  },
+  "memoryMetrics" : {
+    "usedOnHeapStorageMemory" : 0,
+    "usedOffHeapStorageMemory" : 0,
+    "totalOnHeapStorageMemory" : 97832140,
+    "totalOffHeapStorageMemory" : 0
+  },
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
+}, {
+  "id" : "1",
+  "hostPort" : "036.company.com:35126",
+  "isActive" : true,
+  "rddBlocks" : 0,
+  "memoryUsed" : 0,
+  "diskUsed" : 0,
+  "totalCores" : 4,
+  "maxTasks" : 4,
+  "activeTasks" : 0,
+  "failedTasks" : 0,
+  "completedTasks" : 48,
+  "totalTasks" : 48,
+  "totalDuration" : 8837,
+  "totalGCTime" : 1192,
+  "totalInputBytes" : 0,
+  "totalShuffleRead" : 0,
+  "totalShuffleWrite" : 0,
+  "isBlacklisted" : false,
+  "maxMemory" : 97832140,
+  "addTime" : "2019-01-08T04:33:55.929GMT",
+  "executorLogs" : {
+    "stdout" : "https://036.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000002/lajin/stdout?start=-4096",
+    "stderr" : "https://036.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000002/lajin/stderr?start=-4096"
+  },
+  "memoryMetrics" : {
+    "usedOnHeapStorageMemory" : 0,
+    "usedOffHeapStorageMemory" : 0,
+    "totalOnHeapStorageMemory" : 97832140,
+    "totalOffHeapStorageMemory" : 0
+  },
+  "blacklistedInStages" : [ ],
+  "attributes" : { }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
index f282d483a4fb..3db537799b29 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
@@ -43,7 +43,11 @@
     "ProcessTreePythonVMemory": 0,
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
-    "ProcessTreeOtherRSSMemory": 0
+    "ProcessTreeOtherRSSMemory": 0,
+    "MinorGCCount": 0,
+    "MinorGCTime": 0,
+    "MajorGCCount": 0,
+    "MajorGCTime": 0
   },
   "attributes" : { }
 }, {
@@ -193,7 +197,11 @@
     "ProcessTreePythonVMemory": 0,
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
-    "ProcessTreeOtherRSSMemory": 0
+    "ProcessTreeOtherRSSMemory": 0,
+    "MinorGCCount": 0,
+    "MinorGCTime": 0,
+    "MajorGCCount": 0,
+    "MajorGCTime": 0
   },
   "attributes" : { }
 }, {
@@ -244,7 +252,11 @@
     "ProcessTreePythonVMemory": 0,
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
-    "ProcessTreeOtherRSSMemory": 0
+    "ProcessTreeOtherRSSMemory": 0,
+    "MinorGCCount": 0,
+    "MinorGCTime": 0,
+    "MajorGCCount": 0,
+    "MajorGCTime": 0
   },
   "attributes" : { }
 }, {
@@ -295,7 +307,11 @@
     "ProcessTreePythonVMemory": 0,
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
-    "ProcessTreeOtherRSSMemory": 0
+    "ProcessTreeOtherRSSMemory": 0,
+    "MinorGCCount": 0,
+    "MinorGCTime": 0,
+    "MajorGCCount": 0,
+    "MajorGCTime": 0
   },
   "attributes" : { }
 }, {
@@ -346,7 +362,11 @@
     "ProcessTreePythonVMemory": 0,
     "ProcessTreePythonRSSMemory": 0,
     "ProcessTreeOtherVMemory": 0,
-    "ProcessTreeOtherRSSMemory": 0
+    "ProcessTreeOtherRSSMemory": 0,
+    "MinorGCCount": 0,
+    "MinorGCTime": 0,
+    "MajorGCCount": 0,
+    "MajorGCTime": 0
   },
   "attributes" : { }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
index 980ec85aadad..2c2efb58341b 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_process_tree_metrics_json_expectation.json
@@ -43,7 +43,11 @@
     "ProcessTreePythonVMemory" : 408375296,
     "ProcessTreePythonRSSMemory" : 40284160,
     "ProcessTreeOtherVMemory" : 0,
-    "ProcessTreeOtherRSSMemory" : 0
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount": 0,
+    "MinorGCTime": 0,
+    "MajorGCCount": 0,
+    "MajorGCTime": 0
   },
   "attributes" : { }
 }, {
@@ -94,7 +98,11 @@
     "ProcessTreePythonVMemory" : 625926144,
     "ProcessTreePythonRSSMemory" : 69013504,
     "ProcessTreeOtherVMemory" : 0,
-    "ProcessTreeOtherRSSMemory" : 0
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount": 0,
+    "MinorGCTime": 0,
+    "MajorGCCount": 0,
+    "MajorGCTime": 0
   },
   "attributes" : { }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
index 0ef9377dcb08..c303b6c565e3 100644
--- a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
@@ -1,4 +1,22 @@
 [ {
+  "id": "application_1536831636016_59384",
+  "name": "Spark Pi",
+  "attempts": [
+    {
+      "attemptId": "1",
+      "startTime": "2019-01-08T04:33:43.607GMT",
+      "endTime": "2019-01-08T04:33:58.745GMT",
+      "lastUpdated": "",
+      "duration": 15138,
+      "sparkUser": "lajin",
+      "completed": true,
+      "appSparkVersion": "3.0.0-SNAPSHOT",
+      "lastUpdatedEpoch": 0,
+      "startTimeEpoch": 1546922023607,
+      "endTimeEpoch": 1546922038745
+    }
+  ]
+}, {
   "id" : "application_1538416563558_0014",
   "name" : "PythonBisectingKMeansExample",
   "attempts" : [ {
@@ -28,19 +46,4 @@
     "startTimeEpoch" : 1524182082734,
     "endTimeEpoch" : 1524182189134
   } ]
-}, {
-  "id" : "application_1516285256255_0012",
-  "name" : "Spark shell",
-  "attempts" : [ {
-    "startTime" : "2018-01-18T18:30:35.119GMT",
-    "endTime" : "2018-01-18T18:38:27.938GMT",
-    "lastUpdated" : "",
-    "duration" : 472819,
-    "sparkUser" : "attilapiros",
-    "completed" : true,
-    "appSparkVersion" : "2.3.0-SNAPSHOT",
-    "lastUpdatedEpoch" : 0,
-    "startTimeEpoch" : 1516300235119,
-    "endTimeEpoch" : 1516300707938
-  } ]
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
index ea9dc1b97afc..ba834d5f18d2 100644
--- a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
@@ -1,4 +1,22 @@
 [ {
+  "id": "application_1536831636016_59384",
+  "name": "Spark Pi",
+  "attempts": [
+    {
+      "attemptId": "1",
+      "startTime": "2019-01-08T04:33:43.607GMT",
+      "endTime": "2019-01-08T04:33:58.745GMT",
+      "lastUpdated": "",
+      "duration": 15138,
+      "sparkUser": "lajin",
+      "completed": true,
+      "appSparkVersion": "3.0.0-SNAPSHOT",
+      "lastUpdatedEpoch": 0,
+      "startTimeEpoch": 1546922023607,
+      "endTimeEpoch": 1546922038745
+    }
+  ]
+}, {
   "id" : "application_1538416563558_0014",
   "name" : "PythonBisectingKMeansExample",
   "attempts" : [ {
diff --git a/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json
index 2a77071a9ffd..62d3544e0ba1 100644
--- a/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json
@@ -1,4 +1,22 @@
 [ {
+  "id": "application_1536831636016_59384",
+  "name": "Spark Pi",
+  "attempts": [
+    {
+      "attemptId": "1",
+      "startTime": "2019-01-08T04:33:43.607GMT",
+      "endTime": "2019-01-08T04:33:58.745GMT",
+      "lastUpdated": "",
+      "duration": 15138,
+      "sparkUser": "lajin",
+      "completed": true,
+      "appSparkVersion": "3.0.0-SNAPSHOT",
+      "lastUpdatedEpoch": 0,
+      "startTimeEpoch": 1546922023607,
+      "endTimeEpoch": 1546922038745
+    }
+  ]
+}, {
   "id" : "application_1538416563558_0014",
   "name" : "PythonBisectingKMeansExample",
   "attempts" : [ {
diff --git a/core/src/test/resources/spark-events/application_1536831636016_59384_1 b/core/src/test/resources/spark-events/application_1536831636016_59384_1
new file mode 100755
index 000000000000..69924afead19
--- /dev/null
+++ b/core/src/test/resources/spark-events/application_1536831636016_59384_1
@@ -0,0 +1,214 @@
+{"Event":"SparkListenerLogStart","Spark Version":"3.0.0-SNAPSHOT"}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"047.company.com","Port":42509},"Maximum Memory":100977868,"Timestamp":1546922024502,"Maximum Onheap Memory":100977868,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/apache/releases/jdk1.8.0_121/jre","Java Version":"1.8.0_121 (Oracle Corporation)","Scala Version":"version 2.12.8"},"Spark Properties":{"spark.history.kerberos.keytab":"/etc/security/keytabs/spark.service.keytab","spark.executor.extraJavaOptions":"-XX:MaxMetaspaceSize=512m -XX:ParallelGCThreads=3","spark.driver.host":"047.company.com","spark.history.fs.logDirectory":"hdfs://hercules-sub/spark-logs","spark.eventLog.enabled":"true","spark.ssl.historyServer.trustStore":"/etc/hadoop/truststore.jks","spark.ui.port":"0","spark.driver.port":"36796","spark.shuffle.service.enabled":"true","spark.driver.extraLibraryPath":"/apache/hadoop/lib/native/Linux-amd64-64/lib:/apache/hadoop/lib/native","spark.yarn.queue":"default","spark.history.fs.update.interval":"20s","spark.yarn.historyServer.address":"master2.company.com:50070","spark.yarn.app.id":"application_1536831636016_59384","spark.sql.function.eltOutputAsString":"true","spark.yarn.access.namenodes":"hdfs://hercules-sub,hdfs://hercules","spark.app.name":"Spark Pi","spark.scheduler.mode":"FIFO","spark.history.fs.numReplayThreads":"30","spark.eventLog.logStageExecutorMetrics.enabled":"true","spark.driver.memory":"512m","spark.executor.instances":"2","spark.history.kerberos.principal":"spark/master2.company.com@COMPANY.COM","spark.yarn.am.extraJavaOptions":"-XX:MaxMetaspaceSize=512m -XX:ParallelGCThreads=3","spark.eventLog.logStageExecutorGCMetrics.enabled":"true","spark.ssl.historyServer.port":"49670","spark.ssl.historyServer.keyStoreType":"JKS","spark.sql.function.concatBinaryAsString":"true","spark.history.fs.cleaner.enabled":"true","spark.executor.id":"driver","spark.yarn.am.memory":"4g","spark.yarn.app.container.log.dir":"/hadoop/12/yarn/log/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001","spark.driver.extraJavaOptions":"-XX:MaxMetaspaceSize=512m -XX:ParallelGCThreads=3","spark.eventLog.logStageExecutorProcessTreeMetrics.enabled":"true","spark.sql.hive.caseSensitiveInferenceMode":"NEVER_INFER","spark.submit.deployMode":"cluster","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.RM_HA_URLS":"master1.company.com:50030,master2.company.com:50030","spark.master":"yarn","spark.ssl.historyServer.keyStorePassword":"*********(redacted)","spark.ui.filters":"org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter","spark.executor.extraLibraryPath":"/apache/hadoop/lib/native/Linux-amd64-64/lib:/apache/hadoop/lib/native","spark.executor.memory":"512m","spark.driver.extraClassPath":"/apache/hadoop/share/hadoop/common/lib/hadoop-ebay-2.7.1.2.4.2.0-258.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.2.4.2.66-4.jar","spark.eventLog.dir":"hdfs://hercules-sub/spark-logs","spark.ssl.historyServer.enabled":"true","spark.ssl.historyServer.keyStore":"/etc/hadoop/keystore.jks","spark.dynamicAllocation.enabled":"false","spark.executor.extraClassPath":"/apache/hadoop/share/hadoop/common/lib/hadoop-ebay-2.7.1.2.4.2.0-258.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.2.4.2.66-4.jar","spark.executor.cores":"4","spark.history.ui.port":"49670","spark.ssl.historyServer.trustStorePassword":"*********(redacted)","spark.ssl.historyServer.protocol":"TLSv1.2","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS":"master1.company.com,master2.company.com","spark.history.kerberos.enabled":"true","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES":"https://master1.company.com:50030/proxy/application_1536831636016_59384,https://master2.company.com:50030/proxy/application_1536831636016_59384","spark.ssl.historyServer.trustStoreType":"JKS","spark.app.id":"application_1536831636016_59384"},"System Properties":{"java.io.tmpdir":"/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/tmp","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/home/lajin","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","sun.arch.data.model":"64","sun.boot.library.path":"/apache/releases/jdk1.8.0_121/jre/lib/amd64","user.dir":"/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001","java.library.path":"/apache/hadoop/lib/native/Linux-amd64-64/lib:/apache/hadoop/lib/native::/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib","sun.cpu.isalist":"","os.arch":"amd64","java.vm.version":"25.121-b13","jetty.git.hash":"27208684755d94a92186989f695db2d7b21ebc51","java.endorsed.dirs":"/apache/releases/jdk1.8.0_121/jre/lib/endorsed","java.runtime.version":"1.8.0_121-b13","java.vm.info":"mixed mode","java.ext.dirs":"/apache/releases/jdk1.8.0_121/jre/lib/ext:/usr/java/packages/lib/ext","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/apache/releases/jdk1.8.0_121/jre/lib/resources.jar:/apache/releases/jdk1.8.0_121/jre/lib/rt.jar:/apache/releases/jdk1.8.0_121/jre/lib/sunrsasign.jar:/apache/releases/jdk1.8.0_121/jre/lib/jsse.jar:/apache/releases/jdk1.8.0_121/jre/lib/jce.jar:/apache/releases/jdk1.8.0_121/jre/lib/charsets.jar:/apache/releases/jdk1.8.0_121/jre/lib/jfr.jar:/apache/releases/jdk1.8.0_121/jre/classes","file.encoding":"UTF-8","user.timezone":"Etc/GMT+7","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"3.10.0-514.21.2.el7.x86_64","sun.os.patch.level":"unknown","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","user.language":"en","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.print.PSPrinterJob","java.awt.graphicsenv":"sun.awt.X11GraphicsEnvironment","awt.toolkit":"sun.awt.X11.XToolkit","os.name":"Linux","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"lajin","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.yarn.ApplicationMaster --class org.apache.spark.examples.SparkPi --jar file:/filer/home/lajin/SPARK-25865_new/spark-3.0.0-SNAPSHOT-bin-SPARK-25865_new/examples/jars/spark-examples_2.12-3.0.0-SNAPSHOT.jar --arg 100 --properties-file /hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_conf__/__spark_conf__.properties","java.home":"/apache/releases/jdk1.8.0_121/jre","java.version":"1.8.0_121","sun.io.unicode.encoding":"UnicodeLittle"},"Classpath Entries":{"/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/parquet-format-2.4.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-repl_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hive-metastore-1.2.1.spark2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/arrow-memory-0.10.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jersey-client-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/netty-all-4.1.30.Final.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/azure-storage-2.2.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/stream-2.7.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/metrics-jvm-3.1.5.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-configuration-1.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/json4s-ast_2.12-3.5.3.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/xz-1.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jetty-util-6.1.26.hwx.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/javax.inject-1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/orc-core-1.5.3-nohive.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/scala-xml_2.12-1.0.5.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-httpclient-3.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/gson-2.2.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jersey-json-1.9.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hive-beeline-1.2.1.spark2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hk2-locator-2.4.0-b34.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/objenesis-2.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jersey-guava-2.22.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/log4j-1.2.17.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/libthrift-0.9.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/avro-mapred-1.8.2-hadoop2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/jersey-server-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/pyrolite-4.13.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/activation-1.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/xz-1.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jackson-core-2.2.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jetty-util-6.1.26.hwx.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/paranamer-2.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-mapreduce-client-core-2.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-mllib-local_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/snappy-java-1.0.4.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-launcher_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-io-2.4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/activation-1.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/protobuf-java-2.5.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jersey-guice-1.9.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/leveldbjni-all-1.8.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-server-sharedcachemanager-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/curator-client-2.7.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/antlr4-runtime-4.7.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/slf4j-api-1.7.10.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/curator-recipes-2.7.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/scala-library-2.12.8.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/parquet-hadoop-1.10.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/htrace-core-3.1.0-incubating.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/curator-client-2.7.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-server-timeline-plugins-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/junit-4.11.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jackson-core-2.2.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/zookeeper-3.4.6.2.4.2.66-4-tests.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/httpcore-4.2.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/JavaEWAH-0.3.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/eigenbase-properties-1.1.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/apacheds-i18n-2.0.0-M15.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/guice-servlet-3.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-configuration-1.6.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/snappy-java-1.0.4.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jackson-databind-2.2.3.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-server-applicationhistoryservice-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/parquet-hadoop-bundle-1.6.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/curator-framework-2.7.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-cli-1.2.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jackson-core-asl-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-pool-1.5.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/netty-3.6.2.Final.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jackson-databind-2.2.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/paranamer-2.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/log4j-1.2.17.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/minlog-1.3.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/aopalliance-1.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/xmlenc-0.52.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/kryo-shaded-4.0.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/microsoft-windowsazure-storage-sdk-0.6.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jets3t-0.9.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-xc-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-module-scala_2.12-2.9.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/xz-1.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-sql_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/netty-3.6.2.Final.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-configuration-1.6.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jackson-core-asl-1.9.13.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/javax.inject-1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/slf4j-log4j12-1.7.16.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jettison-1.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/commons-logging-1.1.3.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/netty-3.6.2.Final.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/chill_2.12-0.9.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/apacheds-i18n-2.0.0-M15.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/hadoop-annotations-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jersey-core-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/shapeless_2.12-2.3.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/asm-3.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/netty-3.6.2.Final.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/javassist-3.18.1-GA.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/bonecp-0.8.0.RELEASE.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/hadoop-auth-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.7.1.2.4.2.66-5.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/commons-io-2.4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/java-xmlbuilder-0.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-collections-3.2.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/stax-api-1.0-2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/guava-11.0.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/protobuf-java-2.5.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-codec-1.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jettison-1.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-logging-1.1.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jersey-server-1.9.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/servlet-api-2.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/opencsv-2.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-mapreduce-client-app-2.7.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/curator-framework-2.7.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-compress-1.4.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/leveldbjni-all-1.8.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/guava-14.0.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/netty-all-4.0.23.Final.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/xmlenc-0.52.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/curator-client-2.7.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jackson-mapper-asl-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/json4s-core_2.12-3.5.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/api-asn1-api-1.0.0-M20.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-digester-1.8.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-lang-2.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-yarn-server-common-2.7.4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-client-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jersey-server-2.22.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/ST4-4.0.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/activation-1.1.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-collections-3.2.2.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/httpclient-4.2.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-configuration-1.6.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jetty-6.1.26.hwx.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jetty-util-6.1.26.hwx.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-graphx_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-server-sharedcachemanager-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-beanutils-1.7.0.jar":"System Classpath","/apache/confs/hive/conf":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/java-xmlbuilder-0.4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-common-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jsp-api-2.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/guice-3.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-collections-3.2.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jsch-0.1.42.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/httpcore-4.2.5.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-net-3.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/osgi-resource-locator-1.0.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jsp-api-2.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-core_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-network-common_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/orc-mapreduce-1.5.3-nohive.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-streaming_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-beanutils-core-1.8.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-lang-2.6.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/api-asn1-api-1.0.0-M20.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/breeze-macros_2.12-0.13.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hk2-utils-2.4.0-b34.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/paranamer-2.8.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/api-util-1.0.0-M20.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-server-applicationhistoryservice-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/zookeeper-3.4.6.2.4.2.66-4-tests.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/javolution-5.5.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-common-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/guice-3.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-common-2.7.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/commons-cli-1.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/servlet-api-2.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/scala-parser-combinators_2.12-1.1.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jackson-annotations-2.2.3.jar":"System Classpath","/apache/confs/hadoop/conf":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/gson-2.2.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-cli-1.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/calcite-linq4j-1.2.0-incubating.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/zookeeper-3.4.6.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/snappy-java-1.0.4.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-network-shuffle_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/hadoop-lzo-0.6.0.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/jetty-6.1.26.hwx.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/compress-lzf-1.0.3.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/guava-11.0.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/junit-4.11.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.7.1.2.4.2.66-5.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/hadoop-hdfs-nfs-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jackson-xc-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-mapreduce-client-shuffle-2.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/javax.ws.rs-api-2.0.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/hamcrest-core-1.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/javassist-3.18.1-GA.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-unsafe_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/httpcore-4.2.5.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/log4j-1.2.17.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/leveldbjni-all-1.8.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-compress-1.4.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/RoaringBitmap-0.5.11.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jackson-databind-2.2.3.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/aopalliance-1.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/flatbuffers-1.2.0-3f79e055.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jetty-6.1.26.hwx.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/stringtemplate-3.2.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/log4j-1.2.17.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-logging-1.1.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-client-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jsr305-3.0.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-annotations-2.9.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/derby-10.12.1.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/objenesis-2.5.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/arrow-format-0.10.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jsr305-3.0.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-httpclient-3.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-codec-1.10.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-beanutils-1.7.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/jackson-core-asl-1.9.13.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/aws-java-sdk-1.7.4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/api-asn1-api-1.0.0-M20.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-lang3-3.8.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/xz-1.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/gson-2.2.4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jersey-json-1.9.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/apacheds-kerberos-codec-2.0.0-M15.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-client-2.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/javax.annotation-api-1.2.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jackson-mapper-asl-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jta-1.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/metrics-core-3.1.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/apache-log4j-extras-1.2.17.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jsr305-3.0.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/log4j-1.2.17.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/microsoft-windowsazure-storage-sdk-0.6.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/janino-3.0.11.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-compress-1.8.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/jsr305-3.0.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-api-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-registry-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jackson-jaxrs-1.9.13.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/api-util-1.0.0-M20.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jersey-server-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-net-3.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/ivy-2.4.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/javax.inject-1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/mockito-all-1.8.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jersey-media-jaxb-2.22.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-registry-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/paranamer-2.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-lang-2.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/javax.inject-2.4.0-b34.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/hadoop-common-2.7.1.2.4.2.66-5.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/javax.inject-1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-math3-3.1.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/asm-3.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-hive-thriftserver_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/apacheds-i18n-2.0.0-M15.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jtransforms-2.4.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/chill-java-0.9.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jersey-core-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jersey-container-servlet-2.22.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-yarn-common-2.7.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/asm-3.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hive-exec-1.2.1.spark2.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/curator-client-2.7.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/htrace-core-3.1.0-incubating.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/asm-3.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/hadoop-hdfs-2.7.1.2.4.2.66-4-tests.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-digester-1.8.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/antlr-2.7.7.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-math3-3.1.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/zstd-jni-1.3.2-2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jersey-json-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-mapreduce-client-common-2.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/oro-2.0.8.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/calcite-core-1.2.0-incubating.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/servlet-api-2.5.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jackson-annotations-2.2.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-core-asl-1.9.13.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jsr305-3.0.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-yarn-server-web-proxy-2.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/univocity-parsers-2.7.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/paranamer-2.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/xercesImpl-2.9.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/arrow-vector-0.10.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/api-util-1.0.0-M20.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-io-2.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jackson-core-asl-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-math3-3.4.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-sketch_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-mllib_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/avro-1.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jpam-1.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/microsoft-windowsazure-storage-sdk-0.6.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-digester-1.8.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jets3t-0.9.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-hive_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/guice-servlet-3.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jetty-6.1.26.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jodd-core-3.5.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-httpclient-3.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/aircompressor-0.10.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/httpclient-4.5.6.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-crypto-1.0.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-collections-3.2.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/hadoop-azure-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/curator-recipes-2.7.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/apacheds-kerberos-codec-2.0.0-M15.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jersey-client-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/avro-1.8.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jline-2.14.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hive-jdbc-1.2.1.spark2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jackson-jaxrs-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-jaxrs-1.9.13.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/avro-1.7.4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/aopalliance-1.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/avro-ipc-1.8.2.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/curator-framework-2.7.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-databind-2.9.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/json4s-scalap_2.12-3.5.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_conf__/__hadoop_conf__":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-server-tests-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/activation-1.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/py4j-0.10.8.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-server-timeline-plugins-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/xz-1.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/calcite-avatica-1.2.0-incubating.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/stax-api-1.0-2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/macro-compat_2.12-1.1.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-hdfs-2.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/core-1.1.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-beanutils-1.7.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/curator-recipes-2.7.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/parquet-encoding-1.10.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-core-2.9.6.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/api-asn1-api-1.0.0-M20.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_conf__":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/parquet-jackson-1.10.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/guava-11.0.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/snappy-java-1.1.7.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/xbean-asm7-shaded-4.12.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-yarn-api-2.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jersey-container-servlet-core-2.22.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hk2-api-2.4.0-b34.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/datanucleus-core-3.2.10.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/objenesis-2.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-annotations-2.7.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jersey-server-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-compiler-3.0.11.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/orc-shims-1.5.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-server-common-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/spark-2.2.1-yarn-shuffle.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jackson-annotations-2.2.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/aopalliance-1.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/joda-time-2.9.3.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/curator-recipes-2.7.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-mapper-asl-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/aopalliance-repackaged-2.4.0-b34.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/xmlenc-0.52.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-yarn_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/hadoop-hdfs-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jsp-api-2.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-auth-2.7.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/spark-2.2.1-yarn-shuffle.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-yarn-client-2.7.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/jersey-core-1.9.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/htrace-core-3.1.0-incubating.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jackson-mapper-asl-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jdo-api-3.0.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-io-2.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jul-to-slf4j-1.7.16.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/leveldbjni-all-1.8.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/jersey-server-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jetty-util-6.1.26.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/super-csv-2.2.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/slf4j-api-1.7.16.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/lz4-java-1.5.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/okio-1.4.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spire-macros_2.12-0.13.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jackson-module-paranamer-2.9.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/netty-3.9.9.Final.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/java-xmlbuilder-0.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-beanutils-core-1.8.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.7.1.2.4.2.66-4-tests.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/gson-2.2.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/guice-3.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jsch-0.1.42.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-logging-1.1.3.jar":"System Classpath","/apache/hadoop/share/hadoop/common/lib/hadoop-ebay-2.7.1.2.4.2.0-258.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jackson-core-2.2.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/commons-codec-1.4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-lang-2.6.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jackson-jaxrs-1.9.13.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/httpclient-4.2.5.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/avro-1.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/parquet-common-1.10.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/arpack_combined_all-0.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jettison-1.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/httpcore-4.4.10.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/parquet-column-1.10.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hadoop-mapreduce-client-jobclient-2.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-dbcp-1.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/stax-api-1.0.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spire_2.12-0.13.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/stax-api-1.0-2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/javax.servlet-api-3.1.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jersey-guice-1.9.jar":"System Classpath","/apache/hadoop/lib/hadoop-lzo-0.6.0.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jackson-xc-1.9.13.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/jetty-util-6.1.26.hwx.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/htrace-core-3.1.0-incubating.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/hadoop-yarn-api-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jaxb-impl-2.2.3-1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jsch-0.1.42.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/fst-2.24.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/zookeeper-3.4.6.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/antlr-runtime-3.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-math3-3.1.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/asm-3.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/breeze_2.12-0.13.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/leveldbjni-all-1.8.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-cli-1.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-tags_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/jackson-core-asl-1.9.13.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-beanutils-core-1.8.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hppc-0.7.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jersey-client-2.22.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/validation-api-1.1.0.Final.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jsp-api-2.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/zookeeper-3.4.6.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jaxb-api-2.2.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/hadoop-nfs-2.7.1.2.4.2.66-4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jetty-sslengine-6.1.26.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/servlet-api-2.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/metrics-json-3.1.5.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jackson-xc-1.9.13.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/commons-io-2.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/scala-compiler-2.12.8.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/guice-3.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jaxb-api-2.2.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/snappy-0.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-logging-1.1.3.jar":"System Classpath","/contrib/capacity-scheduler/*.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/hive-cli-1.2.1.spark2.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-net-3.1.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/javassist-3.18.1-GA.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-beanutils-1.7.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/jackson-mapper-asl-1.9.13.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/libfb303-0.9.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jaxb-api-2.2.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/datanucleus-rdbms-3.2.9.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/okhttp-2.4.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/avro-1.7.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/apacheds-kerberos-codec-2.0.0-M15.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/commons-codec-1.4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/zookeeper-3.4.6.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/jetty-6.1.26.hwx.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/fst-2.24.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-server-tests-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jaxb-api-2.2.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/hamcrest-core-1.3.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/jets3t-0.9.0.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-io-2.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/json4s-jackson_2.12-3.5.3.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-catalyst_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/lib/jersey-core-1.9.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/metrics-graphite-3.1.5.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/api-util-1.0.0-M20.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jaxb-impl-2.2.3-1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/mysql-connector-java-5.1.40-bin.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/jersey-core-1.9.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/apacheds-i18n-2.0.0-M15.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/scala-reflect-2.12.8.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/curator-framework-2.7.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/xmlenc-0.52.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-httpclient-3.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/htrace-core-3.1.0-incubating.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/xml-apis-1.3.04.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/xmlenc-0.52.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/commons-digester-1.8.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jersey-common-2.22.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/xercesImpl-2.9.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/hadoop-common-2.7.1.2.4.2.66-4-tests.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/guava-11.0.2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/commons-codec-1.4.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/machinist_2.12-0.6.1.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/spark-kvstore_2.12-3.0.0-SNAPSHOT.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/hadoop-ebay-2.7.1.2.4.2.0-258.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/log4j-1.2.17.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/hadoop-aws-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/hadoop-yarn-server-common-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/netty-3.6.2.Final.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/apacheds-kerberos-codec-2.0.0-M15.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-compress-1.4.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/httpclient-4.2.5.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/guice-servlet-3.0.jar":"System Classpath","/apache/hadoop/share/hadoop/yarn/lib/stax-api-1.0-2.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/common/lib/hadoop-annotations-2.7.1.2.4.2.66-4.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-cli-1.2.jar":"System Classpath","/hadoop/11/yarn/local/usercache/lajin/appcache/application_1536831636016_59384/container_e136_1536831636016_59384_01_000001/__spark_libs__/jcl-over-slf4j-1.7.16.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/hdfs/lib/commons-lang-2.6.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/yarn/lib/commons-net-3.1.jar":"System Classpath","/apache/releases/hadoop-2.7.1.2.4.2.66-4/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.7.1.2.4.2.66-4.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark Pi","App ID":"application_1536831636016_59384","Timestamp":1546922023607,"User":"lajin","App Attempt ID":"1","Driver Logs":{"stdout":"https://047.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000001/lajin/stdout?start=-4096","stderr":"https://047.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000001/lajin/stderr?start=-4096"}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1546922034270,"Executor ID":"2","Executor Info":{"Host":"028.company.com","Total Cores":4,"Log Urls":{"stdout":"https://028.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000003/lajin/stdout?start=-4096","stderr":"https://028.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000003/lajin/stderr?start=-4096"}}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"2","Host":"028.company.com","Port":46325},"Maximum Memory":97832140,"Timestamp":1546922034365,"Maximum Onheap Memory":97832140,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1546922035929,"Executor ID":"1","Executor Info":{"Host":"036.company.com","Total Cores":4,"Log Urls":{"stdout":"https://036.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000002/lajin/stdout?start=-4096","stderr":"https://036.company.com:50060/node/containerlogs/container_e136_1536831636016_59384_01_000002/lajin/stderr?start=-4096"}}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"1","Host":"036.company.com","Port":35126},"Maximum Memory":97832140,"Timestamp":1546922036120,"Maximum Onheap Memory":97832140,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerJobStart","Job ID":0,"Submission Time":1546922036359,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"reduce at SparkPi.scala:38","Number of Tasks":100,"RDD Info":[{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"map\"}","Callsite":"map at SparkPi.scala:34","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"0\",\"name\":\"parallelize\"}","Callsite":"parallelize at SparkPi.scala:34","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.reduce(RDD.scala:1031)\norg.apache.spark.examples.SparkPi$.main(SparkPi.scala:38)\norg.apache.spark.examples.SparkPi.main(SparkPi.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:677)","Accumulables":[]}],"Stage IDs":[0],"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"2\",\"name\":\"reduce\"}"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"reduce at SparkPi.scala:38","Number of Tasks":100,"RDD Info":[{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"map\"}","Callsite":"map at SparkPi.scala:34","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"0\",\"name\":\"parallelize\"}","Callsite":"parallelize at SparkPi.scala:34","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.reduce(RDD.scala:1031)\norg.apache.spark.examples.SparkPi$.main(SparkPi.scala:38)\norg.apache.spark.examples.SparkPi.main(SparkPi.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:677)","Submission Time":1546922036383,"Accumulables":[]},"Properties":{"spark.rdd.scope.noOverride":"true","spark.rdd.scope":"{\"id\":\"2\",\"name\":\"reduce\"}"}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1546922036494,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1546922036515,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1546922036515,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1546922036515,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1546922036516,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1546922036516,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1546922036516,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1546922036517,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1546922037414,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1546922037416,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1546922036516,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037421,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":86,"Value":86,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":834,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":114738938,"Value":114738938,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":134,"Value":134,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":296991078,"Value":296991078,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":725,"Value":725,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":725,"Executor Deserialize CPU Time":296991078,"Executor Run Time":134,"Executor CPU Time":114738938,"Result Size":834,"JVM GC Time":86,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1546922036517,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037424,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":86,"Value":172,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":1668,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":116821463,"Value":231560401,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":134,"Value":268,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":250458033,"Value":547449111,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":725,"Value":1450,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":725,"Executor Deserialize CPU Time":250458033,"Executor Run Time":134,"Executor CPU Time":116821463,"Result Size":834,"JVM GC Time":86,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1546922037431,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1546922036515,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037432,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":93,"Value":265,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":2502,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":139987945,"Value":371548346,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":165,"Value":433,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":443768774,"Value":991217885,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":725,"Value":2175,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":725,"Executor Deserialize CPU Time":443768774,"Executor Run Time":165,"Executor CPU Time":139987945,"Result Size":834,"JVM GC Time":93,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1546922037432,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1546922036515,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037433,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":93,"Value":358,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":3336,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":143148800,"Value":514697146,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":165,"Value":598,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":257929339,"Value":1249147224,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":725,"Value":2900,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":725,"Executor Deserialize CPU Time":257929339,"Executor Run Time":165,"Executor CPU Time":143148800,"Result Size":834,"JVM GC Time":93,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1546922037499,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1546922037414,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037500,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":7,"Value":365,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":4170,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":68955072,"Value":583652218,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":69,"Value":667,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2463324,"Value":1251610548,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":10,"Value":2910,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":10,"Executor Deserialize CPU Time":2463324,"Executor Run Time":69,"Executor CPU Time":68955072,"Result Size":834,"JVM GC Time":7,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1546922037500,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1546922037416,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037501,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":7,"Value":372,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":5004,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":69409062,"Value":653061280,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":77,"Value":744,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2197208,"Value":1253807756,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":2912,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2197208,"Executor Run Time":77,"Executor CPU Time":69409062,"Result Size":834,"JVM GC Time":7,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1546922037546,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1546922037499,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037547,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":5795,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":41068546,"Value":694129826,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":41,"Value":785,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1546605,"Value":1255354361,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":2914,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1546605,"Executor Run Time":41,"Executor CPU Time":41068546,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1546922037583,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1546922036516,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037584,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":280,"Value":652,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":6629,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":121468700,"Value":815598526,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":136,"Value":921,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":239941709,"Value":1495296070,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":900,"Value":3814,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":900,"Executor Deserialize CPU Time":239941709,"Executor Run Time":136,"Executor CPU Time":121468700,"Result Size":834,"JVM GC Time":280,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":16,"Attempt":0,"Launch Time":1546922037587,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1546922037546,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037588,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":7420,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":34831594,"Value":850430120,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":35,"Value":956,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2083011,"Value":1497379081,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":3816,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2083011,"Executor Run Time":35,"Executor CPU Time":34831594,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":17,"Attempt":0,"Launch Time":1546922037624,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":16,"Attempt":0,"Launch Time":1546922037587,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037624,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":8211,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":31573348,"Value":882003468,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":32,"Value":988,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1326191,"Value":1498705272,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":3817,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1326191,"Executor Run Time":32,"Executor CPU Time":31573348,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":18,"Attempt":0,"Launch Time":1546922037633,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1546922036494,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037634,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":286,"Value":938,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":9045,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":174111771,"Value":1056115239,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":194,"Value":1182,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":414632138,"Value":1913337410,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":900,"Value":4717,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":900,"Executor Deserialize CPU Time":414632138,"Executor Run Time":194,"Executor CPU Time":174111771,"Result Size":834,"JVM GC Time":286,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":19,"Attempt":0,"Launch Time":1546922037639,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1546922037583,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037639,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":944,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":9879,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":40168837,"Value":1096284076,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":46,"Value":1228,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2240849,"Value":1915578259,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":4720,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":2240849,"Executor Run Time":46,"Executor CPU Time":40168837,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":20,"Attempt":0,"Launch Time":1546922037641,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1546922036515,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037641,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":286,"Value":1230,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":10713,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":189780021,"Value":1286064097,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":203,"Value":1431,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":265791948,"Value":2181370207,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":900,"Value":5620,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":900,"Executor Deserialize CPU Time":265791948,"Executor Run Time":203,"Executor CPU Time":189780021,"Result Size":834,"JVM GC Time":286,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":21,"Attempt":0,"Launch Time":1546922037642,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1546922036516,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037642,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":286,"Value":1516,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":11547,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":184630125,"Value":1470694222,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":203,"Value":1634,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":241252253,"Value":2422622460,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":900,"Value":6520,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":900,"Executor Deserialize CPU Time":241252253,"Executor Run Time":203,"Executor CPU Time":184630125,"Result Size":834,"JVM GC Time":286,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":22,"Attempt":0,"Launch Time":1546922037662,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":17,"Attempt":0,"Launch Time":1546922037624,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037662,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":12338,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":31605508,"Value":1502299730,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":31,"Value":1665,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1157338,"Value":2423779798,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6522,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1157338,"Executor Run Time":31,"Executor CPU Time":31605508,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":23,"Attempt":0,"Launch Time":1546922037665,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1546922037431,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037665,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":13129,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":227941261,"Value":1730240991,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":228,"Value":1893,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1632598,"Value":2425412396,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6524,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1632598,"Executor Run Time":228,"Executor CPU Time":227941261,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":24,"Attempt":0,"Launch Time":1546922037671,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1546922037432,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037672,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":13920,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":234080439,"Value":1964321430,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":234,"Value":2127,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1842114,"Value":2427254510,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6526,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1842114,"Executor Run Time":234,"Executor CPU Time":234080439,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":25,"Attempt":0,"Launch Time":1546922037685,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":21,"Attempt":0,"Launch Time":1546922037642,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037685,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":14711,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":36307061,"Value":2000628491,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":37,"Value":2164,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2588273,"Value":2429842783,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6528,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2588273,"Executor Run Time":37,"Executor CPU Time":36307061,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":26,"Attempt":0,"Launch Time":1546922037699,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":22,"Attempt":0,"Launch Time":1546922037662,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037700,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":15502,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":32277689,"Value":2032906180,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":32,"Value":2196,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1148300,"Value":2430991083,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6530,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1148300,"Executor Run Time":32,"Executor CPU Time":32277689,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":27,"Attempt":0,"Launch Time":1546922037724,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":25,"Attempt":0,"Launch Time":1546922037685,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037724,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":16293,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":32762038,"Value":2065668218,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":33,"Value":2229,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1655404,"Value":2432646487,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6532,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1655404,"Executor Run Time":33,"Executor CPU Time":32762038,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":28,"Attempt":0,"Launch Time":1546922037740,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":26,"Attempt":0,"Launch Time":1546922037699,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037740,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":17084,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":30991307,"Value":2096659525,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":31,"Value":2260,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2219464,"Value":2434865951,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6534,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2219464,"Executor Run Time":31,"Executor CPU Time":30991307,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":29,"Attempt":0,"Launch Time":1546922037744,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":18,"Attempt":0,"Launch Time":1546922037633,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037745,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":17875,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":104659378,"Value":2201318903,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":104,"Value":2364,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1617759,"Value":2436483710,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":6537,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":1617759,"Executor Run Time":104,"Executor CPU Time":104659378,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":30,"Attempt":0,"Launch Time":1546922037758,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":27,"Attempt":0,"Launch Time":1546922037724,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037759,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":18666,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":29044222,"Value":2230363125,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":30,"Value":2394,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2017183,"Value":2438500893,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6539,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2017183,"Executor Run Time":30,"Executor CPU Time":29044222,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":31,"Attempt":0,"Launch Time":1546922037779,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":28,"Attempt":0,"Launch Time":1546922037740,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037780,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":19457,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":34945007,"Value":2265308132,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":35,"Value":2429,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":998897,"Value":2439499790,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6540,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":998897,"Executor Run Time":35,"Executor CPU Time":34945007,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":32,"Attempt":0,"Launch Time":1546922037781,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1546922037500,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037781,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":20248,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":275257849,"Value":2540565981,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":275,"Value":2704,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1522428,"Value":2441022218,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6542,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1522428,"Executor Run Time":275,"Executor CPU Time":275257849,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":33,"Index":33,"Attempt":0,"Launch Time":1546922037791,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":30,"Attempt":0,"Launch Time":1546922037758,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037792,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":20996,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":28454753,"Value":2569020734,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":30,"Value":2734,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1143042,"Value":2442165260,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":1143042,"Executor Run Time":30,"Executor CPU Time":28454753,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":34,"Index":34,"Attempt":0,"Launch Time":1546922037822,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":32,"Attempt":0,"Launch Time":1546922037781,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037822,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":21830,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":36300767,"Value":2605321501,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":35,"Value":2769,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2138947,"Value":2444304207,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":6545,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":2138947,"Executor Run Time":35,"Executor CPU Time":36300767,"Result Size":834,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":35,"Index":35,"Attempt":0,"Launch Time":1546922037825,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":33,"Index":33,"Attempt":0,"Launch Time":1546922037791,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037825,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":22621,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":28452489,"Value":2633773990,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":28,"Value":2797,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1182965,"Value":2445487172,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6547,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1182965,"Executor Run Time":28,"Executor CPU Time":28452489,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":36,"Index":36,"Attempt":0,"Launch Time":1546922037857,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":35,"Index":35,"Attempt":0,"Launch Time":1546922037825,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037858,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":23412,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":28114352,"Value":2661888342,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":29,"Value":2826,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1597988,"Value":2447085160,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6548,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1597988,"Executor Run Time":29,"Executor CPU Time":28114352,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":37,"Index":37,"Attempt":0,"Launch Time":1546922037865,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":34,"Index":34,"Attempt":0,"Launch Time":1546922037822,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037866,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":24203,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":39843327,"Value":2701731669,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":39,"Value":2865,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":979795,"Value":2448064955,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6550,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":979795,"Executor Run Time":39,"Executor CPU Time":39843327,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":38,"Index":38,"Attempt":0,"Launch Time":1546922037890,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":36,"Index":36,"Attempt":0,"Launch Time":1546922037857,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037890,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":24994,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":27673779,"Value":2729405448,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":27,"Value":2892,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1462274,"Value":2449527229,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6552,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1462274,"Executor Run Time":27,"Executor CPU Time":27673779,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":39,"Index":39,"Attempt":0,"Launch Time":1546922037894,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":19,"Attempt":0,"Launch Time":1546922037639,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037894,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":25785,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":250723087,"Value":2980128535,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":250,"Value":3142,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1520638,"Value":2451047867,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6554,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1520638,"Executor Run Time":250,"Executor CPU Time":250723087,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":40,"Index":40,"Attempt":0,"Launch Time":1546922037917,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":37,"Index":37,"Attempt":0,"Launch Time":1546922037865,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037917,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1522,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":26619,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":40203057,"Value":3020331592,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":46,"Value":3188,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":987313,"Value":2452035180,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6556,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":987313,"Executor Run Time":46,"Executor CPU Time":40203057,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":41,"Index":41,"Attempt":0,"Launch Time":1546922037919,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":23,"Attempt":0,"Launch Time":1546922037665,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037919,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1528,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":27453,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":241618343,"Value":3261949935,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":248,"Value":3436,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1531323,"Value":2453566503,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6558,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1531323,"Executor Run Time":248,"Executor CPU Time":241618343,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":42,"Index":42,"Attempt":0,"Launch Time":1546922037922,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":38,"Index":38,"Attempt":0,"Launch Time":1546922037890,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037923,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":28201,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":28219664,"Value":3290169599,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":29,"Value":3465,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1047569,"Value":2454614072,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":1047569,"Executor Run Time":29,"Executor CPU Time":28219664,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":43,"Index":43,"Attempt":0,"Launch Time":1546922037949,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":24,"Attempt":0,"Launch Time":1546922037671,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037952,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1534,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":29035,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":239368713,"Value":3529538312,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":246,"Value":3711,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1606167,"Value":2456220239,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":6561,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":1606167,"Executor Run Time":246,"Executor CPU Time":239368713,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":44,"Index":44,"Attempt":0,"Launch Time":1546922037952,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":20,"Attempt":0,"Launch Time":1546922037641,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037953,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":29826,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":281427189,"Value":3810965501,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":282,"Value":3993,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1654852,"Value":2457875091,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6563,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1654852,"Executor Run Time":282,"Executor CPU Time":281427189,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":45,"Index":45,"Attempt":0,"Launch Time":1546922037953,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":29,"Attempt":0,"Launch Time":1546922037744,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037953,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":30617,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":188231506,"Value":3999197007,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":188,"Value":4181,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1169748,"Value":2459044839,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6565,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1169748,"Executor Run Time":188,"Executor CPU Time":188231506,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":46,"Index":46,"Attempt":0,"Launch Time":1546922037954,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":39,"Index":39,"Attempt":0,"Launch Time":1546922037894,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037954,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":31408,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":42202483,"Value":4041399490,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":42,"Value":4223,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1131763,"Value":2460176602,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6567,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1131763,"Executor Run Time":42,"Executor CPU Time":42202483,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":47,"Index":47,"Attempt":0,"Launch Time":1546922037955,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":41,"Index":41,"Attempt":0,"Launch Time":1546922037919,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037955,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":32242,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":32046930,"Value":4073446420,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":31,"Value":4254,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1193077,"Value":2461369679,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6569,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1193077,"Executor Run Time":31,"Executor CPU Time":32046930,"Result Size":834,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":48,"Index":48,"Attempt":0,"Launch Time":1546922037990,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":47,"Index":47,"Attempt":0,"Launch Time":1546922037955,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037990,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":33033,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":31475569,"Value":4104921989,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":31,"Value":4285,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1060814,"Value":2462430493,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6571,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1060814,"Executor Run Time":31,"Executor CPU Time":31475569,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":49,"Index":49,"Attempt":0,"Launch Time":1546922037991,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":46,"Index":46,"Attempt":0,"Launch Time":1546922037954,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922037992,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":33781,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":33893704,"Value":4138815693,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":35,"Value":4320,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":995078,"Value":2463425571,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":995078,"Executor Run Time":35,"Executor CPU Time":33893704,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":50,"Index":50,"Attempt":0,"Launch Time":1546922038023,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":49,"Index":49,"Attempt":0,"Launch Time":1546922037991,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038024,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":34529,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":27659093,"Value":4166474786,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":29,"Value":4349,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":936644,"Value":2464362215,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":936644,"Executor Run Time":29,"Executor CPU Time":27659093,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":51,"Index":51,"Attempt":0,"Launch Time":1546922038025,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":48,"Index":48,"Attempt":0,"Launch Time":1546922037990,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038025,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":35320,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":30657164,"Value":4197131950,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":30,"Value":4379,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1037555,"Value":2465399770,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6572,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1037555,"Executor Run Time":30,"Executor CPU Time":30657164,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":52,"Index":52,"Attempt":0,"Launch Time":1546922038060,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":51,"Index":51,"Attempt":0,"Launch Time":1546922038025,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038061,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":36111,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":31510921,"Value":4228642871,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":32,"Value":4411,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":895198,"Value":2466294968,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6573,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":895198,"Executor Run Time":32,"Executor CPU Time":31510921,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":53,"Index":53,"Attempt":0,"Launch Time":1546922038063,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":50,"Index":50,"Attempt":0,"Launch Time":1546922038023,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038064,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1540,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":877,"Value":36988,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":30458180,"Value":4259101051,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":35,"Value":4446,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":955927,"Value":2467250895,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6575,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":955927,"Executor Run Time":35,"Executor CPU Time":30458180,"Result Size":877,"JVM GC Time":6,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":54,"Index":54,"Attempt":0,"Launch Time":1546922038095,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":52,"Index":52,"Attempt":0,"Launch Time":1546922038060,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038095,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":37779,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":31390887,"Value":4290491938,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":31,"Value":4477,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":893866,"Value":2468144761,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6576,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":893866,"Executor Run Time":31,"Executor CPU Time":31390887,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":55,"Index":55,"Attempt":0,"Launch Time":1546922038097,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":53,"Index":53,"Attempt":0,"Launch Time":1546922038063,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038097,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":38570,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":28537105,"Value":4319029043,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":28,"Value":4505,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1137737,"Value":2469282498,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6578,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1137737,"Executor Run Time":28,"Executor CPU Time":28537105,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":56,"Index":56,"Attempt":0,"Launch Time":1546922038113,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":31,"Attempt":0,"Launch Time":1546922037779,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038113,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1546,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":39404,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":320279036,"Value":4639308079,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":327,"Value":4832,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2309726,"Value":2471592224,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6580,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2309726,"Executor Run Time":327,"Executor CPU Time":320279036,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":57,"Index":57,"Attempt":0,"Launch Time":1546922038131,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":54,"Index":54,"Attempt":0,"Launch Time":1546922038095,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038131,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":40195,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":32175125,"Value":4671483204,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":32,"Value":4864,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":861437,"Value":2472453661,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6581,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":861437,"Executor Run Time":32,"Executor CPU Time":32175125,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":58,"Index":58,"Attempt":0,"Launch Time":1546922038137,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":44,"Index":44,"Attempt":0,"Launch Time":1546922037952,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038137,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1552,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":41029,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":175258771,"Value":4846741975,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":180,"Value":5044,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1272588,"Value":2473726249,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6583,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1272588,"Executor Run Time":180,"Executor CPU Time":175258771,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":59,"Index":59,"Attempt":0,"Launch Time":1546922038148,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":55,"Index":55,"Attempt":0,"Launch Time":1546922038097,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038148,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":41820,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":46067636,"Value":4892809611,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":47,"Value":5091,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1539643,"Value":2475265892,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6584,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1539643,"Executor Run Time":47,"Executor CPU Time":46067636,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":60,"Index":60,"Attempt":0,"Launch Time":1546922038168,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":57,"Index":57,"Attempt":0,"Launch Time":1546922038131,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038169,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":42611,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":33746415,"Value":4926556026,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":34,"Value":5125,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":939846,"Value":2476205738,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6585,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":939846,"Executor Run Time":34,"Executor CPU Time":33746415,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":61,"Index":61,"Attempt":0,"Launch Time":1546922038175,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":40,"Index":40,"Attempt":0,"Launch Time":1546922037917,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038175,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":43402,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":253536114,"Value":5180092140,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":254,"Value":5379,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1121119,"Value":2477326857,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6586,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1121119,"Executor Run Time":254,"Executor CPU Time":253536114,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":62,"Index":62,"Attempt":0,"Launch Time":1546922038185,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":59,"Index":59,"Attempt":0,"Launch Time":1546922038148,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038185,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":44193,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":33229424,"Value":5213321564,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":33,"Value":5412,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":856494,"Value":2478183351,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6587,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":856494,"Executor Run Time":33,"Executor CPU Time":33229424,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":63,"Index":63,"Attempt":0,"Launch Time":1546922038204,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":60,"Index":60,"Attempt":0,"Launch Time":1546922038168,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038204,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":44941,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":31762584,"Value":5245084148,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":32,"Value":5444,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":819419,"Value":2479002770,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":819419,"Executor Run Time":32,"Executor CPU Time":31762584,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":64,"Index":64,"Attempt":0,"Launch Time":1546922038217,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":42,"Index":42,"Attempt":0,"Launch Time":1546922037922,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038217,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1558,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":45775,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":254396405,"Value":5499480553,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":260,"Value":5704,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1523395,"Value":2480526165,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6589,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1523395,"Executor Run Time":260,"Executor CPU Time":254396405,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":65,"Index":65,"Attempt":0,"Launch Time":1546922038218,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":62,"Index":62,"Attempt":0,"Launch Time":1546922038185,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038218,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":46523,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":29041950,"Value":5528522503,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":30,"Value":5734,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":920529,"Value":2481446694,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":920529,"Executor Run Time":30,"Executor CPU Time":29041950,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":66,"Index":66,"Attempt":0,"Launch Time":1546922038220,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":45,"Index":45,"Attempt":0,"Launch Time":1546922037953,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038221,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1564,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":47357,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":257839847,"Value":5786362350,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":264,"Value":5998,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1031380,"Value":2482478074,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6590,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1031380,"Executor Run Time":264,"Executor CPU Time":257839847,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":67,"Index":67,"Attempt":0,"Launch Time":1546922038239,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":63,"Index":63,"Attempt":0,"Launch Time":1546922038204,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038239,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":48191,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":31072580,"Value":5817434930,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":30,"Value":6028,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":960275,"Value":2483438349,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6592,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":960275,"Executor Run Time":30,"Executor CPU Time":31072580,"Result Size":834,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":68,"Index":68,"Attempt":0,"Launch Time":1546922038251,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":64,"Index":64,"Attempt":0,"Launch Time":1546922038217,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038252,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":5,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":48982,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":30559817,"Value":5847994747,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":31,"Value":6059,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":860099,"Value":2484298448,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":860099,"Executor Run Time":31,"Executor CPU Time":30559817,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":69,"Index":69,"Attempt":0,"Launch Time":1546922038253,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":43,"Index":43,"Attempt":0,"Launch Time":1546922037949,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038253,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":49773,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":296727303,"Value":6144722050,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":297,"Value":6356,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1526695,"Value":2485825143,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6593,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1526695,"Executor Run Time":297,"Executor CPU Time":296727303,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":70,"Index":70,"Attempt":0,"Launch Time":1546922038275,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":67,"Index":67,"Attempt":0,"Launch Time":1546922038239,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038276,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":50521,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":33148590,"Value":6177870640,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":34,"Value":6390,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":841498,"Value":2486666641,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":841498,"Executor Run Time":34,"Executor CPU Time":33148590,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":71,"Index":71,"Attempt":0,"Launch Time":1546922038291,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":68,"Index":68,"Attempt":0,"Launch Time":1546922038251,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038292,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":51269,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":35277840,"Value":6213148480,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":36,"Value":6426,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":981161,"Value":2487647802,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":981161,"Executor Run Time":36,"Executor CPU Time":35277840,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":72,"Index":72,"Attempt":0,"Launch Time":1546922038294,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":58,"Index":58,"Attempt":0,"Launch Time":1546922038137,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038295,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":52060,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":152692713,"Value":6365841193,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":153,"Value":6579,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1007896,"Value":2488655698,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6594,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1007896,"Executor Run Time":153,"Executor CPU Time":152692713,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":73,"Index":73,"Attempt":0,"Launch Time":1546922038313,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":70,"Index":70,"Attempt":0,"Launch Time":1546922038275,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038313,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":52808,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":33306826,"Value":6399148019,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":34,"Value":6613,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":956832,"Value":2489612530,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":956832,"Executor Run Time":34,"Executor CPU Time":33306826,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":74,"Index":74,"Attempt":0,"Launch Time":1546922038329,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":71,"Index":71,"Attempt":0,"Launch Time":1546922038291,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038330,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":53599,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":33756595,"Value":6432904614,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":33,"Value":6646,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":882246,"Value":2490494776,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6595,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":882246,"Executor Run Time":33,"Executor CPU Time":33756595,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":75,"Index":75,"Attempt":0,"Launch Time":1546922038347,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":73,"Index":73,"Attempt":0,"Launch Time":1546922038313,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038348,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":54390,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":30684763,"Value":6463589377,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":30,"Value":6676,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":943134,"Value":2491437910,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6597,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":943134,"Executor Run Time":30,"Executor CPU Time":30684763,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":76,"Index":76,"Attempt":0,"Launch Time":1546922038368,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":74,"Index":74,"Attempt":0,"Launch Time":1546922038329,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038368,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":55181,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":34465927,"Value":6498055304,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":34,"Value":6710,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":964913,"Value":2492402823,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6598,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":964913,"Executor Run Time":34,"Executor CPU Time":34465927,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":77,"Index":77,"Attempt":0,"Launch Time":1546922038390,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":75,"Index":75,"Attempt":0,"Launch Time":1546922038347,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038391,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1570,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":56015,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":31793636,"Value":6529848940,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":37,"Value":6747,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":975320,"Value":2493378143,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6600,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":975320,"Executor Run Time":37,"Executor CPU Time":31793636,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":78,"Index":78,"Attempt":0,"Launch Time":1546922038409,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":76,"Index":76,"Attempt":0,"Launch Time":1546922038368,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038409,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":56806,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":36274008,"Value":6566122948,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":36,"Value":6783,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":972306,"Value":2494350449,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6602,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":972306,"Executor Run Time":36,"Executor CPU Time":36274008,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":79,"Index":79,"Attempt":0,"Launch Time":1546922038416,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":61,"Index":61,"Attempt":0,"Launch Time":1546922038175,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038416,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1576,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":57640,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":230475162,"Value":6796598110,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":236,"Value":7019,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":989658,"Value":2495340107,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6603,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":989658,"Executor Run Time":236,"Executor CPU Time":230475162,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":80,"Index":80,"Attempt":0,"Launch Time":1546922038424,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":56,"Index":56,"Attempt":0,"Launch Time":1546922038113,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038424,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1582,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":58474,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":301424170,"Value":7098022280,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":308,"Value":7327,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1012899,"Value":2496353006,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6604,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1012899,"Executor Run Time":308,"Executor CPU Time":301424170,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":81,"Index":81,"Attempt":0,"Launch Time":1546922038436,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":69,"Index":69,"Attempt":0,"Launch Time":1546922038253,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038436,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1588,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":59308,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":172594019,"Value":7270616299,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":179,"Value":7506,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1197294,"Value":2497550300,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6605,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1197294,"Executor Run Time":179,"Executor CPU Time":172594019,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":82,"Index":82,"Attempt":0,"Launch Time":1546922038448,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":78,"Index":78,"Attempt":0,"Launch Time":1546922038409,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038448,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":60099,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":34408818,"Value":7305025117,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":34,"Value":7540,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":989551,"Value":2498539851,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6606,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":989551,"Executor Run Time":34,"Executor CPU Time":34408818,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":83,"Index":83,"Attempt":0,"Launch Time":1546922038483,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":65,"Index":65,"Attempt":0,"Launch Time":1546922038218,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038483,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1594,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":60890,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":254383686,"Value":7559408803,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":261,"Value":7801,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1135212,"Value":2499675063,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":1135212,"Executor Run Time":261,"Executor CPU Time":254383686,"Result Size":791,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":84,"Index":84,"Attempt":0,"Launch Time":1546922038492,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":82,"Index":82,"Attempt":0,"Launch Time":1546922038448,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038493,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1600,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":61681,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":34883743,"Value":7594292546,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":42,"Value":7843,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1012178,"Value":2500687241,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":1012178,"Executor Run Time":42,"Executor CPU Time":34883743,"Result Size":791,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":85,"Index":85,"Attempt":0,"Launch Time":1546922038495,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":66,"Index":66,"Attempt":0,"Launch Time":1546922038220,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038495,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1606,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":62515,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":264938303,"Value":7859230849,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":270,"Value":8113,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":934827,"Value":2501622068,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6608,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":934827,"Executor Run Time":270,"Executor CPU Time":264938303,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":86,"Index":86,"Attempt":0,"Launch Time":1546922038507,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":77,"Index":77,"Attempt":0,"Launch Time":1546922038390,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038507,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":63263,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":112064631,"Value":7971295480,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":114,"Value":8227,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1053290,"Value":2502675358,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":1053290,"Executor Run Time":114,"Executor CPU Time":112064631,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":87,"Index":87,"Attempt":0,"Launch Time":1546922038517,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":80,"Index":80,"Attempt":0,"Launch Time":1546922038424,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038518,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":64054,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":89355066,"Value":8060650546,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":89,"Value":8316,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":998260,"Value":2503673618,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6610,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":998260,"Executor Run Time":89,"Executor CPU Time":89355066,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":88,"Index":88,"Attempt":0,"Launch Time":1546922038533,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":84,"Index":84,"Attempt":0,"Launch Time":1546922038492,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038533,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":64802,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":35725959,"Value":8096376505,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":37,"Value":8353,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1128802,"Value":2504802420,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":1128802,"Executor Run Time":37,"Executor CPU Time":35725959,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":89,"Index":89,"Attempt":0,"Launch Time":1546922038536,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":81,"Index":81,"Attempt":0,"Launch Time":1546922038436,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038537,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":65593,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":96824193,"Value":8193200698,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":96,"Value":8449,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":857593,"Value":2505660013,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6612,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":857593,"Executor Run Time":96,"Executor CPU Time":96824193,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":90,"Index":90,"Attempt":0,"Launch Time":1546922038550,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":72,"Index":72,"Attempt":0,"Launch Time":1546922038294,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038550,"Failed":false,"Killed":false,"Accumulables":[{"ID":5,"Name":"internal.metrics.jvmGCTime","Update":6,"Value":1612,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":834,"Value":66427,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":245593808,"Value":8438794506,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":251,"Value":8700,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1199629,"Value":2506859642,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6614,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1199629,"Executor Run Time":251,"Executor CPU Time":245593808,"Result Size":834,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":91,"Index":91,"Attempt":0,"Launch Time":1546922038551,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":79,"Index":79,"Attempt":0,"Launch Time":1546922038416,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038552,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":67175,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":130530302,"Value":8569324808,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":132,"Value":8832,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1409511,"Value":2508269153,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":1409511,"Executor Run Time":132,"Executor CPU Time":130530302,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":92,"Index":92,"Attempt":0,"Launch Time":1546922038574,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":88,"Index":88,"Attempt":0,"Launch Time":1546922038533,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038574,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":67966,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":36971815,"Value":8606296623,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":36,"Value":8868,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1067084,"Value":2509336237,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6616,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1067084,"Executor Run Time":36,"Executor CPU Time":36971815,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":93,"Index":93,"Attempt":0,"Launch Time":1546922038612,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":86,"Index":86,"Attempt":0,"Launch Time":1546922038507,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038612,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":68757,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":100349075,"Value":8706645698,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":100,"Value":8968,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1073198,"Value":2510409435,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6617,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1073198,"Executor Run Time":100,"Executor CPU Time":100349075,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":94,"Index":94,"Attempt":0,"Launch Time":1546922038614,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":85,"Index":85,"Attempt":0,"Launch Time":1546922038495,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038614,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":6,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":69548,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":113541356,"Value":8820187054,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":114,"Value":9082,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":904680,"Value":2511314115,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":904680,"Executor Run Time":114,"Executor CPU Time":113541356,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":95,"Index":95,"Attempt":0,"Launch Time":1546922038621,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":87,"Index":87,"Attempt":0,"Launch Time":1546922038517,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038621,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":70339,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":100446517,"Value":8920633571,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":100,"Value":9182,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":895108,"Value":2512209223,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6618,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":895108,"Executor Run Time":100,"Executor CPU Time":100446517,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":96,"Index":96,"Attempt":0,"Launch Time":1546922038636,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":89,"Index":89,"Attempt":0,"Launch Time":1546922038536,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038637,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":7,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":71130,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":96681743,"Value":9017315314,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":97,"Value":9279,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":881317,"Value":2513090540,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":881317,"Executor Run Time":97,"Executor CPU Time":96681743,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":97,"Index":97,"Attempt":0,"Launch Time":1546922038640,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":92,"Index":92,"Attempt":0,"Launch Time":1546922038574,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038641,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":71921,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":61426798,"Value":9078742112,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":61,"Value":9340,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1009168,"Value":2514099708,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6620,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1009168,"Executor Run Time":61,"Executor CPU Time":61426798,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":98,"Index":98,"Attempt":0,"Launch Time":1546922038664,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":91,"Index":91,"Attempt":0,"Launch Time":1546922038551,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038664,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":72669,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":108784588,"Value":9187526700,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":110,"Value":9450,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1001879,"Value":2515101587,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":1001879,"Executor Run Time":110,"Executor CPU Time":108784588,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":99,"Index":99,"Attempt":0,"Launch Time":1546922038680,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":94,"Index":94,"Attempt":0,"Launch Time":1546922038614,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038681,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":73460,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":60115853,"Value":9247642553,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":61,"Value":9511,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1466213,"Value":2516567800,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6621,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1466213,"Executor Run Time":61,"Executor CPU Time":60115853,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":83,"Index":83,"Attempt":0,"Launch Time":1546922038483,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038705,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":74251,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":214285427,"Value":9461927980,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":214,"Value":9725,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1060594,"Value":2517628394,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6623,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1060594,"Executor Run Time":214,"Executor CPU Time":214285427,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":97,"Index":97,"Attempt":0,"Launch Time":1546922038640,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038705,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":75042,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":57994238,"Value":9519922218,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":58,"Value":9783,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1405974,"Value":2519034368,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6625,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1405974,"Executor Run Time":58,"Executor CPU Time":57994238,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":93,"Index":93,"Attempt":0,"Launch Time":1546922038612,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038717,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":75833,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":101684514,"Value":9621606732,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":101,"Value":9884,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":968858,"Value":2520003226,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6627,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":968858,"Executor Run Time":101,"Executor CPU Time":101684514,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":99,"Index":99,"Attempt":0,"Launch Time":1546922038680,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038724,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":76624,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":38985492,"Value":9660592224,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":39,"Value":9923,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1411378,"Value":2521414604,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6629,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1411378,"Executor Run Time":39,"Executor CPU Time":38985492,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":90,"Index":90,"Attempt":0,"Launch Time":1546922038550,"Executor ID":"1","Host":"036.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038725,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":77415,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":169783721,"Value":9830375945,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":170,"Value":10093,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":805689,"Value":2522220293,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6630,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":805689,"Executor Run Time":170,"Executor CPU Time":169783721,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":95,"Index":95,"Attempt":0,"Launch Time":1546922038621,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038726,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":78206,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":101866107,"Value":9932242052,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":102,"Value":10195,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":805277,"Value":2523025570,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":6631,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":805277,"Executor Run Time":102,"Executor CPU Time":101866107,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":96,"Index":96,"Attempt":0,"Launch Time":1546922038636,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038729,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":748,"Value":78954,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":89123011,"Value":10021365063,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":90,"Value":10285,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":896962,"Value":2523922532,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":0,"Executor Deserialize CPU Time":896962,"Executor Run Time":90,"Executor CPU Time":89123011,"Result Size":748,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":98,"Index":98,"Attempt":0,"Launch Time":1546922038664,"Executor ID":"2","Host":"028.company.com","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1546922038733,"Failed":false,"Killed":false,"Accumulables":[{"ID":4,"Name":"internal.metrics.resultSize","Update":791,"Value":79745,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Update":64532656,"Value":10085897719,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorRunTime","Update":64,"Value":10349,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1115835,"Value":2525038367,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":6633,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1115835,"Executor Run Time":64,"Executor CPU Time":64532656,"Result Size":791,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":0,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":211171816,"JVMOffHeapMemory":90237256,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":4876,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":4876,"OffHeapUnifiedMemory":0,"DirectPoolMemory":806275,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":2646888448,"ProcessTreeJVMRSSMemory":520900608,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":8,"MinorGCTime":374,"MajorGCCount":3,"MajorGCTime":170}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"reduce at SparkPi.scala:38","Number of Tasks":100,"RDD Info":[{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"map\"}","Callsite":"map at SparkPi.scala:34","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"0\",\"name\":\"parallelize\"}","Callsite":"parallelize at SparkPi.scala:34","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.rdd.RDD.reduce(RDD.scala:1031)\norg.apache.spark.examples.SparkPi$.main(SparkPi.scala:38)\norg.apache.spark.examples.SparkPi.main(SparkPi.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:677)","Submission Time":1546922036383,"Completion Time":1546922038734,"Accumulables":[{"ID":2,"Name":"internal.metrics.executorRunTime","Value":10349,"Internal":true,"Count Failed Values":true},{"ID":5,"Name":"internal.metrics.jvmGCTime","Value":1612,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.resultSize","Value":79745,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeCpuTime","Value":2525038367,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorCpuTime","Value":10085897719,"Internal":true,"Count Failed Values":true},{"ID":6,"Name":"internal.metrics.resultSerializationTime","Value":7,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"internal.metrics.executorDeserializeTime","Value":6633,"Internal":true,"Count Failed Values":true}]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1546922038738,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1546922038745}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 1a071fa77133..6665a890220e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -137,6 +137,8 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       "applications/application_1506645932520_24630151/executors",
     "executor list with executor process tree metrics json" ->
       "applications/application_1538416563558_0014/executors",
+    "executor list with executor garbage collection metrics json" ->
+      "applications/application_1536831636016_59384/1/executors",
     "stage list json" -> "applications/local-1422981780767/stages",
     "complete stage list json" -> "applications/local-1422981780767/stages?status=complete",
     "failed stage list json" -> "applications/local-1422981780767/stages?status=failed",
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index 40521f073217..350fc2a48f17 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -285,66 +285,66 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       // with different peak updates for each executor
       createExecutorMetricsUpdateEvent(1,
         new ExecutorMetrics(Array(4000L, 50L, 20L, 0L, 40L, 0L, 60L, 0L, 70L, 20L, 7500L, 3500L,
-          6500L, 2500L, 5500L, 1500L))),
+          6500L, 2500L, 5500L, 1500L, 10L, 90L, 2L, 20L))),
       createExecutorMetricsUpdateEvent(2,
         new ExecutorMetrics(Array(1500L, 50L, 20L, 0L, 0L, 0L, 20L, 0L, 70L, 0L, 8500L, 3500L,
-          7500L, 2500L, 6500L, 1500L))),
+          7500L, 2500L, 6500L, 1500L, 10L, 90L, 2L, 20L))),
       // exec 1: new stage 0 peaks for metrics at indexes:  2, 4, 6
       createExecutorMetricsUpdateEvent(1,
         new ExecutorMetrics(Array(4000L, 50L, 50L, 0L, 50L, 0L, 100L, 0L, 70L, 20L, 8000L, 4000L,
-          7000L, 3000L, 6000L, 2000L))),
+          7000L, 3000L, 6000L, 2000L, 10L, 90L, 2L, 20L))),
       // exec 2: new stage 0 peaks for metrics at indexes: 0, 4, 6
       createExecutorMetricsUpdateEvent(2,
         new ExecutorMetrics(Array(2000L, 50L, 10L, 0L, 10L, 0L, 30L, 0L, 70L, 0L, 9000L, 4000L,
-          8000L, 3000L, 7000L, 2000L))),
+          8000L, 3000L, 7000L, 2000L, 10L, 90L, 2L, 20L))),
       // exec 1: new stage 0 peaks for metrics at indexes: 5, 7
       createExecutorMetricsUpdateEvent(1,
         new ExecutorMetrics(Array(2000L, 40L, 50L, 0L, 40L, 10L, 90L, 10L, 50L, 0L, 8000L, 3500L,
-          7000L, 2500L, 6000L, 1500L))),
+          7000L, 2500L, 6000L, 1500L, 10L, 90L, 2L, 20L))),
       // exec 2: new stage 0 peaks for metrics at indexes: 0, 5, 6, 7, 8
       createExecutorMetricsUpdateEvent(2,
         new ExecutorMetrics(Array(3500L, 50L, 15L, 0L, 10L, 10L, 35L, 10L, 80L, 0L, 8500L, 3500L,
-          7500L, 2500L, 6500L, 1500L))),
+          7500L, 2500L, 6500L, 1500L, 10L, 90L, 2L, 20L))),
       // now start stage 1, one more metric update for each executor, and new
       // peaks for some stage 1 metrics (as listed), initialize stage 1 peaks
       createStageSubmittedEvent(1),
       // exec 1: new stage 0 peaks for metrics at indexes: 0, 3, 7; initialize stage 1 peaks
       createExecutorMetricsUpdateEvent(1,
         new ExecutorMetrics(Array(5000L, 30L, 50L, 20L, 30L, 10L, 80L, 30L, 50L,
-          0L, 5000L, 3000L, 4000L, 2000L, 3000L, 1000L))),
+          0L, 5000L, 3000L, 4000L, 2000L, 3000L, 1000L, 10L, 90L, 2L, 20L))),
       // exec 2: new stage 0 peaks for metrics at indexes: 0, 1, 3, 6, 7, 9;
       // initialize stage 1 peaks
       createExecutorMetricsUpdateEvent(2,
         new ExecutorMetrics(Array(7000L, 70L, 50L, 20L, 0L, 10L, 50L, 30L, 10L,
-          40L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L))),
+          40L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L, 10L, 90L, 2L, 20L))),
       // complete stage 0, and 3 more updates for each executor with just
       // stage 1 running
       createStageCompletedEvent(0),
       // exec 1: new stage 1 peaks for metrics at indexes: 0, 1, 3
       createExecutorMetricsUpdateEvent(1,
         new ExecutorMetrics(Array(6000L, 70L, 20L, 30L, 10L, 0L, 30L, 30L, 30L, 0L, 5000L, 3000L,
-          4000L, 2000L, 3000L, 1000L))),
+          4000L, 2000L, 3000L, 1000L, 10L, 90L, 2L, 20L))),
       // exec 2: new stage 1 peaks for metrics at indexes: 3, 4, 7, 8
       createExecutorMetricsUpdateEvent(2,
         new ExecutorMetrics(Array(5500L, 30L, 20L, 40L, 10L, 0L, 30L, 40L, 40L,
-          20L, 8000L, 5000L, 7000L, 4000L, 6000L, 3000L, 5000L, 2000L))),
+          20L, 8000L, 5000L, 7000L, 4000L, 6000L, 3000L, 10L, 90L, 2L, 20L))),
       // exec 1: new stage 1 peaks for metrics at indexes: 0, 4, 5, 7
       createExecutorMetricsUpdateEvent(1,
         new ExecutorMetrics(Array(7000L, 70L, 5L, 25L, 60L, 30L, 65L, 55L, 30L, 0L, 3000L, 2500L,
-          2000L, 1500L, 1000L, 500L))),
+          2000L, 1500L, 1000L, 500L, 10L, 90L, 2L, 20L))),
       // exec 2: new stage 1 peak for metrics at index: 7
       createExecutorMetricsUpdateEvent(2,
         new ExecutorMetrics(Array(5500L, 40L, 25L, 30L, 10L, 30L, 35L, 60L, 0L,
-          20L, 7000L, 3000L, 6000L, 2000L, 5000L, 1000L))),
+          20L, 7000L, 3000L, 6000L, 2000L, 5000L, 1000L, 10L, 90L, 2L, 20L))),
       // exec 1: no new stage 1 peaks
       createExecutorMetricsUpdateEvent(1,
         new ExecutorMetrics(Array(5500L, 70L, 15L, 20L, 55L, 20L, 70L, 40L, 20L,
-          0L, 4000L, 2500L, 3000L, 1500L, 2000L, 500L))),
+          0L, 4000L, 2500L, 3000L, 1500L, 2000L, 500L, 10L, 90L, 2L, 20L))),
       createExecutorRemovedEvent(1),
       // exec 2: new stage 1 peak for metrics at index: 6
       createExecutorMetricsUpdateEvent(2,
         new ExecutorMetrics(Array(4000L, 20L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 0L, 7000L,
-          4000L, 6000L, 3000L, 5000L, 2000L))),
+          4000L, 6000L, 3000L, 5000L, 2000L, 10L, 90L, 2L, 20L))),
       createStageCompletedEvent(1),
       SparkListenerApplicationEnd(1000L))
 
@@ -362,19 +362,19 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       ((0, "1"),
         new SparkListenerStageExecutorMetrics("1", 0, 0,
           new ExecutorMetrics(Array(5000L, 50L, 50L, 20L, 50L, 10L, 100L, 30L,
-            70L, 20L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L)))),
+            70L, 20L, 8000L, 4000L, 7000L, 3000L, 6000L, 2000L, 10L, 90L, 2L, 20L)))),
       ((0, "2"),
         new SparkListenerStageExecutorMetrics("2", 0, 0,
           new ExecutorMetrics(Array(7000L, 70L, 50L, 20L, 10L, 10L, 50L, 30L,
-            80L, 40L, 9000L, 4000L, 8000L, 3000L, 7000L, 2000L)))),
+            80L, 40L, 9000L, 4000L, 8000L, 3000L, 7000L, 2000L, 10L, 90L, 2L, 20L)))),
       ((1, "1"),
         new SparkListenerStageExecutorMetrics("1", 1, 0,
           new ExecutorMetrics(Array(7000L, 70L, 50L, 30L, 60L, 30L, 80L, 55L,
-            50L, 0L, 5000L, 3000L, 4000L, 2000L, 3000L, 1000L)))),
+            50L, 0L, 5000L, 3000L, 4000L, 2000L, 3000L, 1000L, 10L, 90L, 2L, 20L)))),
       ((1, "2"),
         new SparkListenerStageExecutorMetrics("2", 1, 0,
           new ExecutorMetrics(Array(7000L, 70L, 50L, 40L, 10L, 30L, 50L, 60L,
-            40L, 40L, 8000L, 5000L, 7000L, 4000L, 6000L, 3000L)))))
+            40L, 40L, 8000L, 5000L, 7000L, 4000L, 6000L, 3000L, 10L, 90L, 2L, 20L)))))
     // Verify the log file contains the expected events.
     // Posted events should be logged, except for ExecutorMetricsUpdate events -- these
     // are consolidated, and the peak values for each stage are logged at stage end.
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index c3ff379c84ff..f2f62d6d4df1 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -99,7 +99,7 @@ class JsonProtocolSuite extends SparkFunSuite {
           .zipWithIndex.map { case (a, i) => a.copy(id = i) }
       val executorUpdates = new ExecutorMetrics(
         Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L,
-          321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L))
+          321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L, 10L, 90L, 2L, 20L))
       SparkListenerExecutorMetricsUpdate("exec3", Seq((1L, 2, 3, accumUpdates)),
         Some(executorUpdates))
     }
@@ -109,7 +109,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     val stageExecutorMetrics =
       SparkListenerStageExecutorMetrics("1", 2, 3,
         new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L,
-          321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L)))
+          321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L, 10L, 90L, 2L, 20L)))
     testEvent(stageSubmitted, stageSubmittedJsonString)
     testEvent(stageCompleted, stageCompletedJsonString)
     testEvent(taskStart, taskStartJsonString)
@@ -888,7 +888,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     val executorMetricsUpdate =
       if (includeExecutorMetrics) {
         Some(new ExecutorMetrics(Array(123456L, 543L, 0L, 0L, 0L, 0L, 0L,
-          0L, 0L, 0L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L)))
+          0L, 0L, 0L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L, 10L, 90L, 2L, 20L)))
       } else {
         None
       }
@@ -2106,7 +2106,11 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "ProcessTreePythonVMemory": 123456,
       |    "ProcessTreePythonRSSMemory": 61728,
       |    "ProcessTreeOtherVMemory": 30364,
-      |    "ProcessTreeOtherRSSMemory": 15182
+      |    "ProcessTreeOtherRSSMemory": 15182,
+      |    "MinorGCCount": 10,
+      |    "MinorGCTime": 90,
+      |    "MajorGCCount": 2,
+      |    "MajorGCTime": 20
       |  }
       |
       |}
@@ -2135,7 +2139,11 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "ProcessTreePythonVMemory": 123456,
       |    "ProcessTreePythonRSSMemory": 61728,
       |    "ProcessTreeOtherVMemory": 30364,
-      |    "ProcessTreeOtherRSSMemory": 15182
+      |    "ProcessTreeOtherRSSMemory": 15182,
+      |    "MinorGCCount": 10,
+      |    "MinorGCTime": 90,
+      |    "MajorGCCount": 2,
+      |    "MajorGCTime": 20
       |  }
       |}
     """.stripMargin
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 8239cbc3a381..59e619ba109b 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -83,6 +83,7 @@ app-20161116163331-0000
 application_1516285256255_0012
 application_1506645932520_24630151
 application_1538416563558_0014
+application_1536831636016_59384_1
 stat
 local-1422981759269
 local-1422981780767

From 827d3718770e2c65821a1f28b889743ba872b7cf Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 4 Mar 2019 13:21:24 -0800
Subject: [PATCH 0579/1072] [SPARK-25689][FOLLOW-UP][CORE] Get proxy user's
 delegation tokens

## What changes were proposed in this pull request?

This pr makes it get proxy user's delegation token, otherwise throws `AccessControlException`([full log](https://issues.apache.org/jira/browse/SPARK-25689?focusedCommentId=16780609&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16780609)):
```java
org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS]
...
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:95)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:62)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:185)
```

How to reproduce this issue:
```shell
$ ssh user_admspark-getaway-host1
$ export HADOOP_PROXY_USER=user_a
$ spark-sql --master yarn
```

## How was this patch tested?

Test on our production environment.

Closes #23922 from wangyum/SPARK-25689.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 6e8aa47fc8bf..dc0f21c5e91d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -408,7 +408,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         val ugi = UserGroupInformation.getCurrentUser()
         val tokens = if (dtm.renewalEnabled) {
           dtm.start()
-        } else if (ugi.hasKerberosCredentials()) {
+        } else if (ugi.hasKerberosCredentials() || SparkHadoopUtil.get.isProxyUser(ugi)) {
           val creds = ugi.getCredentials()
           dtm.obtainDelegationTokens(creds)
           SparkHadoopUtil.get.serialize(creds)

From 0deebd382037a53cd63a0207009c2d21b7dd0a70 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Tue, 5 Mar 2019 08:03:39 +0900
Subject: [PATCH 0580/1072] [SPARK-26016][DOCS] Clarify that text DataSource
 read/write, and RDD methods that read text, always use UTF-8

## What changes were proposed in this pull request?

Clarify that text DataSource read/write, and RDD methods that read text, always use UTF-8 as they use Hadoop's implementation underneath. I think these are all the places that this needs a mention in the user-facing docs.

## How was this patch tested?

Doc tests.

Closes #23962 from srowen/SPARK-26016.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/DataFrame.R                                           | 2 +-
 R/pkg/R/SQLContext.R                                          | 2 +-
 R/pkg/R/context.R                                             | 2 +-
 core/src/main/scala/org/apache/spark/SparkContext.scala       | 3 +++
 .../scala/org/apache/spark/api/java/JavaSparkContext.scala    | 4 ++++
 python/pyspark/context.py                                     | 2 ++
 python/pyspark/sql/readwriter.py                              | 2 ++
 python/pyspark/sql/streaming.py                               | 1 +
 python/pyspark/streaming/context.py                           | 1 +
 .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 2 ++
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 1 +
 .../spark/sql/execution/datasources/text/TextFileFormat.scala | 2 +-
 .../org/apache/spark/sql/streaming/DataStreamReader.scala     | 2 ++
 .../scala/org/apache/spark/streaming/StreamingContext.scala   | 2 ++
 .../spark/streaming/api/java/JavaStreamingContext.scala       | 2 ++
 15 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 789c5d400aea..5908a13f15c7 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -950,7 +950,7 @@ setMethod("write.parquet",
 #'
 #' Save the content of the SparkDataFrame in a text file at the specified path.
 #' The SparkDataFrame must have only one column of string type with the name "value".
-#' Each row becomes a new line in the output file.
+#' Each row becomes a new line in the output file. The text files will be encoded as UTF-8.
 #'
 #' @param x A SparkDataFrame
 #' @param path The directory where the file is saved
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 2e5506a4097e..568691271eee 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -469,7 +469,7 @@ read.parquet <- function(path, ...) {
 #'
 #' Loads text files and returns a SparkDataFrame whose schema starts with
 #' a string column named "value", and followed by partitioned columns if
-#' there are any.
+#' there are any. The text files must be encoded as UTF-8.
 #'
 #' Each line in the text file is a new row in the resulting SparkDataFrame.
 #'
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index bac3efd03e8b..405a3d6799b7 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -29,7 +29,7 @@ getMinPartitions <- function(sc, minPartitions) {
 #'
 #' This function reads a text file from HDFS, a local file system (available on all
 #' nodes), or any Hadoop-supported file system URI, and creates an
-#' RDD of strings from it.
+#' RDD of strings from it. The text files must be encoded as UTF-8.
 #'
 #' @param sc SparkContext to use
 #' @param path Path of file to read. A vector of multiple paths is allowed.
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 604242005ac1..dc0ea24d29b0 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -827,6 +827,8 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Read a text file from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI, and return it as an RDD of Strings.
+   * The text files must be encoded as UTF-8.
+   *
    * @param path path to the text file on a supported file system
    * @param minPartitions suggested minimum number of partitions for the resulting RDD
    * @return RDD of lines of the text file
@@ -843,6 +845,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Read a directory of text files from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI. Each file is read as a single record and returned in a
    * key-value pair, where the key is the path of each file, the value is the content of each file.
+   * The text files must be encoded as UTF-8.
    *
    * <p> For example, if you have the following files:
    * {{{
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 2f74d09b3a2b..c5ef1908ac43 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -167,12 +167,14 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
   /**
    * Read a text file from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI, and return it as an RDD of Strings.
+   * The text files must be encoded as UTF-8.
    */
   def textFile(path: String): JavaRDD[String] = sc.textFile(path)
 
   /**
    * Read a text file from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI, and return it as an RDD of Strings.
+   * The text files must be encoded as UTF-8.
    */
   def textFile(path: String, minPartitions: Int): JavaRDD[String] =
     sc.textFile(path, minPartitions)
@@ -183,6 +185,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    * Read a directory of text files from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI. Each file is read as a single record and returned in a
    * key-value pair, where the key is the path of each file, the value is the content of each file.
+   * The text files must be encoded as UTF-8.
    *
    * <p> For example, if you have the following files:
    * {{{
@@ -216,6 +219,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    * Read a directory of text files from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI. Each file is read as a single record and returned in a
    * key-value pair, where the key is the path of each file, the value is the content of each file.
+   * The text files must be encoded as UTF-8.
    *
    * @see `wholeTextFiles(path: String, minPartitions: Int)`.
    */
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 94c6f4adab77..5a4bd574c7a4 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -584,6 +584,7 @@ def textFile(self, name, minPartitions=None, use_unicode=True):
         Read a text file from HDFS, a local file system (available on all
         nodes), or any Hadoop-supported file system URI, and return it as an
         RDD of Strings.
+        The text files must be encoded as UTF-8.
 
         If use_unicode is False, the strings will be kept as `str` (encoding
         as `utf-8`), which is faster and smaller than unicode. (Added in
@@ -608,6 +609,7 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
         URI. Each file is read as a single record and returned in a
         key-value pair, where the key is the path of each file, the
         value is the content of each file.
+        The text files must be encoded as UTF-8.
 
         If use_unicode is False, the strings will be kept as `str` (encoding
         as `utf-8`), which is faster and smaller than unicode. (Added in
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 3da052391a95..d555bad75a3e 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -327,6 +327,7 @@ def text(self, paths, wholetext=False, lineSep=None):
         Loads text files and returns a :class:`DataFrame` whose schema starts with a
         string column named "value", and followed by partitioned columns if there
         are any.
+        The text files must be encoded as UTF-8.
 
         By default, each line in the text file is a new row in the resulting DataFrame.
 
@@ -856,6 +857,7 @@ def parquet(self, path, mode=None, partitionBy=None, compression=None):
     @since(1.6)
     def text(self, path, compression=None, lineSep=None):
         """Saves the content of the DataFrame in a text file at the specified path.
+        The text files will be encoded as UTF-8.
 
         :param path: the path in any Hadoop supported file system
         :param compression: compression codec to use when saving to file. This can be one of the
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index b981fdc4edc7..fa25267e078f 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -548,6 +548,7 @@ def text(self, path, wholetext=False, lineSep=None):
         Loads a text file stream and returns a :class:`DataFrame` whose schema starts with a
         string column named "value", and followed by partitioned columns if there
         are any.
+        The text files must be encoded as UTF-8.
 
         By default, each line in the text file is a new row in the resulting DataFrame.
 
diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
index 2d84373fb28a..6fbe26b64949 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -258,6 +258,7 @@ def textFileStream(self, directory):
         for new files and reads them as text files. Files must be wrriten to the
         monitored directory by "moving" them from another location within the same
         file system. File names starting with . are ignored.
+        The text files must be encoded as UTF-8.
         """
         return DStream(self._jssc.textFileStream(directory), self, UTF8Deserializer())
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index ff295b85a9ff..a8562581ee85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -716,6 +716,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   /**
    * Loads text files and returns a `DataFrame` whose schema starts with a string column named
    * "value", and followed by partitioned columns if there are any.
+   * The text files must be encoded as UTF-8.
    *
    * By default, each line in the text files is a new row in the resulting DataFrame. For example:
    * {{{
@@ -753,6 +754,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   /**
    * Loads text files and returns a [[Dataset]] of String. The underlying schema of the Dataset
    * contains a single string column named "value".
+   * The text files must be encoded as UTF-8.
    *
    * If the directory structure of the text files contains partitioning information, those are
    * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 450828172b93..8d4d60ebdb54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -629,6 +629,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    *   // Java:
    *   df.write().text("/path/to/output")
    * }}}
+   * The text files will be encoded as UTF-8.
    *
    * You can set the following option(s) for writing text files:
    * <ul>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index f8a24eb08029..60756e7b1da9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.types.{DataType, StringType, StructType}
 import org.apache.spark.util.SerializableConfiguration
 
 /**
- * A data source for reading text files.
+ * A data source for reading text files. The text files must be encoded as UTF-8.
  */
 class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index ef21caa3ac29..96b3a86f5df4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -387,6 +387,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
   /**
    * Loads text files and returns a `DataFrame` whose schema starts with a string column named
    * "value", and followed by partitioned columns if there are any.
+   * The text files must be encoded as UTF-8.
    *
    * By default, each line in the text files is a new row in the resulting DataFrame. For example:
    * {{{
@@ -414,6 +415,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
   /**
    * Loads text file(s) and returns a `Dataset` of String. The underlying schema of the Dataset
    * contains a single string column named "value".
+   * The text files must be encoded as UTF-8.
    *
    * If the directory structure of the text files contains partitioning information, those are
    * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index c09cbb330844..15ebef2b325c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -410,6 +410,8 @@ class StreamingContext private[streaming] (
    * as Text and input format as TextInputFormat). Files must be written to the
    * monitored directory by "moving" them from another location within the same
    * file system. File names starting with . are ignored.
+   * The text files must be encoded as UTF-8.
+   *
    * @param directory HDFS directory to monitor for new file
    */
   def textFileStream(directory: String): DStream[String] = withNamedScope("text file stream") {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index e61c0d4ea5af..d4f03bedc7ed 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -207,6 +207,8 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * as Text and input format as TextInputFormat). Files must be written to the
    * monitored directory by "moving" them from another location within the same
    * file system. File names starting with . are ignored.
+   * The text files must be encoded as UTF-8.
+   *
    * @param directory HDFS directory to monitor for new file
    */
   def textFileStream(directory: String): JavaDStream[String] = {

From 0c23a39384b7ae5fb4aeb4f7f6fe72007b84bbd2 Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Mon, 4 Mar 2019 15:40:04 -0800
Subject: [PATCH 0581/1072] [SPARK-26205][SQL] Optimize InSet Expression for
 bytes, shorts, ints, dates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

This PR optimizes `InSet` expressions for byte, short, integer, date types. It is a follow-up on PR #21442 from dbtsai.

`In` expressions are compiled into a sequence of if-else statements, which results in O\(n\) time complexity. `InSet` is an optimized version of `In`, which is supposed to improve the performance if all values are literals and the number of elements is big enough. However, `InSet` actually worsens the performance in many cases due to various reasons.

The main idea of this PR is to use Java `switch` statements to significantly improve the performance of `InSet` expressions for bytes, shorts, ints, dates. All `switch` statements are compiled into `tableswitch` and `lookupswitch` bytecode instructions. We will have O\(1\) time complexity if our case values are compact and `tableswitch` can be used. Otherwise, `lookupswitch` will give us O\(log n\).

Locally, I tried Spark `OpenHashSet` and primitive collections from `fastutils` in order to solve the boxing issue in `InSet`. Both options significantly decreased the memory consumption and `fastutils` improved the time compared to `HashSet` from Scala. However, the switch-based approach was still more than two times faster even on 500+ non-compact elements.

I also noticed that applying the switch-based approach on less than 10 elements gives a relatively minor improvement compared to the if-else approach. Therefore, I placed the switch-based logic into `InSet` and added a new config to track when it is applied. Even if we migrate to primitive collections at some point, the switch logic will be still faster unless the number of elements is really big. Another option is to have a separate `InSwitch` expression. However, this would mean we need to modify other places (e.g., `DataSourceStrategy`).

See [here](https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-3.html#jvms-3.10) and [here](https://stackoverflow.com/questions/10287700/difference-between-jvms-lookupswitch-and-tableswitch) for more information.

This PR does not cover long values as Java `switch` statements cannot be used on them. However, we can have a follow-up PR with an approach similar to binary search.

## How was this patch tested?

There are new tests that verify the logic of the proposed optimization.

The performance was evaluated using existing benchmarks. This PR was also tested on an EC2 instance (OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 4.14.77-70.59.amzn1.x86_64, Intel(R) Xeon(R) CPU E5-2686 v4  2.30GHz).

## Notes

- [This link](http://hg.openjdk.java.net/jdk8/jdk8/langtools/file/30db5e0aaf83/src/share/classes/com/sun/tools/javac/jvm/Gen.java#l1153) contains source code that decides between `tableswitch` and `lookupswitch`. The logic was re-used in the benchmarks. See the `isLookupSwitch` method.

Closes #23171 from aokolnychyi/spark-26205.

Lead-authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/codegen/javaCode.scala        |   2 +-
 .../sql/catalyst/expressions/predicates.scala |  42 +
 .../apache/spark/sql/internal/SQLConf.scala   |  12 +
 .../catalyst/expressions/PredicateSuite.scala |  49 +-
 .../InExpressionBenchmark-results.txt         | 813 +++++++++++-------
 .../spark/sql/ColumnExpressionSuite.scala     |  39 +
 .../benchmark/InExpressionBenchmark.scala     |  66 +-
 7 files changed, 695 insertions(+), 328 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
index 17fff64a1b7d..2b73b96e9f25 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
@@ -224,7 +224,7 @@ object Block {
       } else {
         args.foreach {
           case _: ExprValue | _: Inline | _: Block =>
-          case _: Int | _: Long | _: Float | _: Double | _: String =>
+          case _: Boolean | _: Int | _: Long | _: Float | _: Double | _: String =>
           case other => throw new IllegalArgumentException(
             s"Can not interpolate ${other.getClass.getName} into code block.")
         }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 37fe22f4556e..06985ac85b70 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGe
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -375,6 +376,19 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    if (canBeComputedUsingSwitch && hset.size <= SQLConf.get.optimizerInSetSwitchThreshold) {
+      genCodeWithSwitch(ctx, ev)
+    } else {
+      genCodeWithSet(ctx, ev)
+    }
+  }
+
+  private def canBeComputedUsingSwitch: Boolean = child.dataType match {
+    case ByteType | ShortType | IntegerType | DateType => true
+    case _ => false
+  }
+
+  private def genCodeWithSet(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, c => {
       val setTerm = ctx.addReferenceObj("set", set)
       val setIsNull = if (hasNull) {
@@ -389,6 +403,34 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
     })
   }
 
+  // spark.sql.optimizer.inSetSwitchThreshold has an appropriate upper limit,
+  // so the code size should not exceed 64KB
+  private def genCodeWithSwitch(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val caseValuesGen = hset.filter(_ != null).map(Literal(_).genCode(ctx))
+    val valueGen = child.genCode(ctx)
+
+    val caseBranches = caseValuesGen.map(literal =>
+      code"""
+        case ${literal.value}:
+          ${ev.value} = true;
+          break;
+       """)
+
+    ev.copy(code =
+      code"""
+        ${valueGen.code}
+        ${CodeGenerator.JAVA_BOOLEAN} ${ev.isNull} = ${valueGen.isNull};
+        ${CodeGenerator.JAVA_BOOLEAN} ${ev.value} = false;
+        if (!${valueGen.isNull}) {
+          switch (${valueGen.value}) {
+            ${caseBranches.mkString("\n")}
+            default:
+              ${ev.isNull} = $hasNull;
+          }
+        }
+       """)
+  }
+
   override def sql: String = {
     val valueSQL = child.sql
     val listSQL = hset.toSeq.map(Literal(_).sql).mkString(", ")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 62533ddc2dd7..e6cfd9d9b8f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -171,6 +171,16 @@ object SQLConf {
       .intConf
       .createWithDefault(10)
 
+  val OPTIMIZER_INSET_SWITCH_THRESHOLD =
+    buildConf("spark.sql.optimizer.inSetSwitchThreshold")
+      .internal()
+      .doc("Configures the max set size in InSet for which Spark will generate code with " +
+        "switch statements. This is applicable only to bytes, shorts, ints, dates.")
+      .intConf
+      .checkValue(threshold => threshold >= 0 && threshold <= 600, "The max set size " +
+        "for using switch statements in InSet must be non-negative and less than or equal to 600")
+      .createWithDefault(400)
+
   val OPTIMIZER_PLAN_CHANGE_LOG_LEVEL = buildConf("spark.sql.optimizer.planChangeLog.level")
     .internal()
     .doc("Configures the log level for logging the change from the original plan to the new " +
@@ -1725,6 +1735,8 @@ class SQLConf extends Serializable with Logging {
 
   def optimizerInSetConversionThreshold: Int = getConf(OPTIMIZER_INSET_CONVERSION_THRESHOLD)
 
+  def optimizerInSetSwitchThreshold: Int = getConf(OPTIMIZER_INSET_SWITCH_THRESHOLD)
+
   def optimizerPlanChangeLogLevel: String = getConf(OPTIMIZER_PLAN_CHANGE_LOG_LEVEL)
 
   def optimizerPlanChangeRules: Option[String] = getConf(OPTIMIZER_PLAN_CHANGE_LOG_RULES)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 3541afcd2144..e38bdeb6153d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -23,11 +23,12 @@ import scala.collection.immutable.HashSet
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.RandomDataGenerator
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -241,6 +242,52 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("switch statements in InSet for bytes, shorts, ints, dates") {
+    val byteValues = Set[Any](1.toByte, 2.toByte, Byte.MinValue, Byte.MaxValue)
+    val shortValues = Set[Any](-10.toShort, 20.toShort, Short.MinValue, Short.MaxValue)
+    val intValues = Set[Any](20, -100, 30, Int.MinValue, Int.MaxValue)
+    val dateValues = Set[Any](
+      CatalystTypeConverters.convertToCatalyst(Date.valueOf("2017-01-01")),
+      CatalystTypeConverters.convertToCatalyst(Date.valueOf("1950-01-02")))
+
+    def check(presentValue: Expression, absentValue: Expression, values: Set[Any]): Unit = {
+      require(presentValue.dataType == absentValue.dataType)
+
+      val nullLiteral = Literal(null, presentValue.dataType)
+
+      checkEvaluation(InSet(nullLiteral, values), expected = null)
+      checkEvaluation(InSet(nullLiteral, values + null), expected = null)
+      checkEvaluation(InSet(presentValue, values), expected = true)
+      checkEvaluation(InSet(presentValue, values + null), expected = true)
+      checkEvaluation(InSet(absentValue, values), expected = false)
+      checkEvaluation(InSet(absentValue, values + null), expected = null)
+    }
+
+    def checkAllTypes(): Unit = {
+      check(presentValue = Literal(2.toByte), absentValue = Literal(3.toByte), byteValues)
+      check(presentValue = Literal(Byte.MinValue), absentValue = Literal(5.toByte), byteValues)
+      check(presentValue = Literal(20.toShort), absentValue = Literal(-14.toShort), shortValues)
+      check(presentValue = Literal(Short.MaxValue), absentValue = Literal(30.toShort), shortValues)
+      check(presentValue = Literal(20), absentValue = Literal(-14), intValues)
+      check(presentValue = Literal(Int.MinValue), absentValue = Literal(2), intValues)
+      check(
+        presentValue = Literal(Date.valueOf("2017-01-01")),
+        absentValue = Literal(Date.valueOf("2017-01-02")),
+        dateValues)
+      check(
+        presentValue = Literal(Date.valueOf("1950-01-02")),
+        absentValue = Literal(Date.valueOf("2017-10-02")),
+        dateValues)
+    }
+
+    withSQLConf(SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> "0") {
+      checkAllTypes()
+    }
+    withSQLConf(SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> "20") {
+      checkAllTypes()
+    }
+  }
+
   test("SPARK-22501: In should not generate codes beyond 64KB") {
     val N = 3000
     val sets = (1 to N).map(i => Literal(i.toDouble))
diff --git a/sql/core/benchmarks/InExpressionBenchmark-results.txt b/sql/core/benchmarks/InExpressionBenchmark-results.txt
index d2adbded6614..f6685bfc4508 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-results.txt
@@ -4,548 +4,737 @@ In Expression Benchmark
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 bytes:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  101 /  138         98.7          10.1       1.0X
-InSet expression                               125 /  136         79.7          12.5       0.8X
+5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       105            148          42         94.8          10.5       1.0X
+InSet expression                                     79             98          19        126.9           7.9       1.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 bytes:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  101 /  111         99.3          10.1       1.0X
-InSet expression                               126 /  133         79.6          12.6       0.8X
+10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       101            115          20         99.3          10.1       1.0X
+InSet expression                                     76             84           8        131.4           7.6       1.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 bytes:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  176 /  183         56.9          17.6       1.0X
-InSet expression                               174 /  184         57.4          17.4       1.0X
+25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       172            176           3         58.0          17.2       1.0X
+InSet expression                                    100            107           9         99.6          10.0       1.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 bytes:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  299 /  312         33.5          29.9       1.0X
-InSet expression                               243 /  246         41.2          24.3       1.2X
+50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       299            302           4         33.5          29.9       1.0X
+InSet expression                                    145            149           5         69.0          14.5       2.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 bytes:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  512 /  518         19.5          51.2       1.0X
-InSet expression                               388 /  400         25.8          38.8       1.3X
+100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       518            524          11         19.3          51.8       1.0X
+InSet expression                                    240            250          12         41.6          24.0       2.2X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 bytes:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  854 /  866         11.7          85.4       1.0X
-InSet expression                               686 /  694         14.6          68.6       1.2X
+200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       831            844          14         12.0          83.1       1.0X
+InSet expression                                    425            432           4         23.5          42.5       2.0X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 shorts:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   59 /   62        169.6           5.9       1.0X
-InSet expression                               163 /  168         61.3          16.3       0.4X
+5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        58             62           5        171.9           5.8       1.0X
+InSet expression                                     56             58           5        178.0           5.6       1.0X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 shorts:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   76 /   78        132.0           7.6       1.0X
-InSet expression                               182 /  186         54.9          18.2       0.4X
+10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        76             79           5        131.9           7.6       1.0X
+InSet expression                                     50             55           7        198.2           5.0       1.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 shorts:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  126 /  128         79.4          12.6       1.0X
-InSet expression                               190 /  193         52.7          19.0       0.7X
+25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       129            139          23         77.3          12.9       1.0X
+InSet expression                                     48             50           5        210.5           4.8       2.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 shorts:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  227 /  227         44.1          22.7       1.0X
-InSet expression                               232 /  235         43.1          23.2       1.0X
+50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       225            226           0         44.4          22.5       1.0X
+InSet expression                                     52             56           7        191.2           5.2       4.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 shorts:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  408 /  414         24.5          40.8       1.0X
-InSet expression                               203 /  209         49.3          20.3       2.0X
+100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       400            406          11         25.0          40.0       1.0X
+InSet expression                                     54             58           7        185.0           5.4       7.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 shorts:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  762 /  765         13.1          76.2       1.0X
-InSet expression                               192 /  196         52.1          19.2       4.0X
+200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       761            762           1         13.1          76.1       1.0X
+InSet expression                                     60             61           2        167.1           6.0      12.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 ints:                                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   53 /   57        187.3           5.3       1.0X
-InSet expression                               156 /  160         63.9          15.6       0.3X
+300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1118           1119           1          8.9         111.8       1.0X
+InSet expression                                     66             67           2        152.2           6.6      17.0X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 ints:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   70 /   74        142.4           7.0       1.0X
-InSet expression                               170 /  176         58.9          17.0       0.4X
+400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1478           1487          19          6.8         147.8       1.0X
+InSet expression                                     71             75          11        141.7           7.1      20.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 ints:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  125 /  126         80.2          12.5       1.0X
-InSet expression                               174 /  179         57.4          17.4       0.7X
+500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1836           1854          27          5.4         183.6       1.0X
+InSet expression                                    248            253           3         40.2          24.8       7.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 ints:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  220 /  222         45.5          22.0       1.0X
-InSet expression                               215 /  221         46.6          21.5       1.0X
+5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        55             68          19        180.3           5.5       1.0X
+InSet expression                                     60             63           7        167.0           6.0       0.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 ints:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  404 /  407         24.8          40.4       1.0X
-InSet expression                               189 /  192         53.0          18.9       2.1X
+10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        72             76           5        138.0           7.2       1.0X
+InSet expression                                     63             68          11        157.7           6.3       1.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 ints:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  760 /  764         13.2          76.0       1.0X
-InSet expression                               176 /  179         56.8          17.6       4.3X
+25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       133            136           5         75.0          13.3       1.0X
+InSet expression                                     73             78          10        137.2           7.3       1.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 longs:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   50 /   52        200.3           5.0       1.0X
-InSet expression                               147 /  151         68.1          14.7       0.3X
+50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       223            225           2         44.8          22.3       1.0X
+InSet expression                                     81             84          14        124.1           8.1       2.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 longs:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   65 /   66        154.8           6.5       1.0X
-InSet expression                               162 /  166         61.6          16.2       0.4X
+100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       402            404           1         24.9          40.2       1.0X
+InSet expression                                     90             91           2        111.6           9.0       4.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 longs:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  117 /  119         85.1          11.7       1.0X
-InSet expression                               170 /  175         58.8          17.0       0.7X
+200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       758            759           0         13.2          75.8       1.0X
+InSet expression                                    110            119          20         91.0          11.0       6.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 longs:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  207 /  208         48.3          20.7       1.0X
-InSet expression                               211 /  214         47.4          21.1       1.0X
+300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1121           1123           3          8.9         112.1       1.0X
+InSet expression                                    121            122           2         82.6          12.1       9.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 longs:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  387 /  389         25.9          38.7       1.0X
-InSet expression                               185 /  187         54.2          18.5       2.1X
+400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1482           1484           2          6.7         148.2       1.0X
+InSet expression                                    134            135           2         74.6          13.4      11.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 longs:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  742 /  744         13.5          74.2       1.0X
-InSet expression                               172 /  173         58.3          17.2       4.3X
+500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1838           1882          92          5.4         183.8       1.0X
+InSet expression                                    251            254           3         39.8          25.1       7.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 floats:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   88 /   91        113.0           8.8       1.0X
-InSet expression                               170 /  171         58.9          17.0       0.5X
+5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        51             52           2        197.1           5.1       1.0X
+InSet expression                                     61             63           3        162.8           6.1       0.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 floats:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  129 /  132         77.5          12.9       1.0X
-InSet expression                               188 /  189         53.2          18.8       0.7X
+10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        69             73          10        145.0           6.9       1.0X
+InSet expression                                     43             46           7        231.2           4.3       1.6X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 floats:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  243 /  244         41.2          24.3       1.0X
-InSet expression                               192 /  194         52.2          19.2       1.3X
+25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       123            129          19         81.4          12.3       1.0X
+InSet expression                                     43             46           8        230.0           4.3       2.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 floats:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  421 /  424         23.7          42.1       1.0X
-InSet expression                               237 /  240         42.2          23.7       1.8X
+50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       222            223           1         45.1          22.2       1.0X
+InSet expression                                     49             50           2        206.2           4.9       4.6X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 floats:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  775 /  777         12.9          77.5       1.0X
-InSet expression                               205 /  209         48.8          20.5       3.8X
+100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       401            402           0         24.9          40.1       1.0X
+InSet expression                                     51             56          11        196.6           5.1       7.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 floats:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 3052 / 3151          3.3         305.2       1.0X
-InSet expression                               197 /  199         50.8          19.7      15.5X
+200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       755            756           1         13.2          75.5       1.0X
+InSet expression                                     56             57           2        179.5           5.6      13.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 doubles:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   82 /   84        121.6           8.2       1.0X
-InSet expression                               167 /  169         60.0          16.7       0.5X
+300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1115           1116           1          9.0         111.5       1.0X
+InSet expression                                     61             62           4        165.2           6.1      18.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 doubles:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  124 /  131         80.3          12.4       1.0X
-InSet expression                               186 /  187         53.9          18.6       0.7X
+400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1476           1478           1          6.8         147.6       1.0X
+InSet expression                                     66             67           2        152.2           6.6      22.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 doubles:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  237 /  239         42.1          23.7       1.0X
-InSet expression                               193 /  194         51.8          19.3       1.2X
+500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1834           1873          85          5.5         183.4       1.0X
+InSet expression                                    230            233           3         43.5          23.0       8.0X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 doubles:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  416 /  418         24.0          41.6       1.0X
-InSet expression                               239 /  241         41.8          23.9       1.7X
+5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        40             42           2        247.6           4.0       1.0X
+InSet expression                                     37             39           3        271.6           3.7       1.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 doubles:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  771 /  774         13.0          77.1       1.0X
-InSet expression                               204 /  207         49.1          20.4       3.8X
+10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        59             60           3        170.0           5.9       1.0X
+InSet expression                                     42             44           3        237.6           4.2       1.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 doubles:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 3755 / 3801          2.7         375.5       1.0X
-InSet expression                               194 /  197         51.5          19.4      19.3X
+25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       114            116           6         87.5          11.4       1.0X
+InSet expression                                     53             58          10        188.0           5.3       2.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 small decimals:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   46 /   49         21.6          46.4       1.0X
-InSet expression                               136 /  141          7.4         135.7       0.3X
+50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       207            214          14         48.3          20.7       1.0X
+InSet expression                                     62             63           2        162.1           6.2       3.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 small decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   57 /   61         17.5          57.1       1.0X
-InSet expression                               137 /  140          7.3         137.2       0.4X
+100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       385            391           6         26.0          38.5       1.0X
+InSet expression                                     71             73           2        140.4           7.1       5.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 small decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   89 /   92         11.2          89.4       1.0X
-InSet expression                               139 /  141          7.2         138.7       0.6X
+200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       746            750           2         13.4          74.6       1.0X
+InSet expression                                    101            105           8         98.5          10.1       7.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 small decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  170 /  171          5.9         169.5       1.0X
-InSet expression                               146 /  148          6.9         145.8       1.2X
+300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1100           1106           4          9.1         110.0       1.0X
+InSet expression                                    109            111           2         91.6          10.9      10.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 small decimals:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  429 /  431          2.3         429.2       1.0X
-InSet expression                               145 /  148          6.9         144.9       3.0X
+400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1470           1480           7          6.8         147.0       1.0X
+InSet expression                                    115            116           2         87.1          11.5      12.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 small decimals:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  995 / 1207          1.0         995.0       1.0X
-InSet expression                               154 /  158          6.5         154.1       6.5X
+500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1838           1907         152          5.4         183.8       1.0X
+InSet expression                                    231            233           2         43.3          23.1       8.0X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 large decimals:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  268 /  307          3.7         268.3       1.0X
-InSet expression                               171 /  176          5.8         171.1       1.6X
+5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        48             52           6        206.5           4.8       1.0X
+InSet expression                                    150            152           4         66.8          15.0       0.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 large decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  455 /  458          2.2         455.2       1.0X
-InSet expression                               173 /  176          5.8         173.1       2.6X
+10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        62             63           1        161.3           6.2       1.0X
+InSet expression                                    165            168           5         60.7          16.5       0.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 large decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 1095 / 1099          0.9        1095.2       1.0X
-InSet expression                               179 /  183          5.6         178.7       6.1X
+25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       116            116           0         86.1          11.6       1.0X
+InSet expression                                    173            175           3         57.9          17.3       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 large decimals:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 2099 / 2110          0.5        2098.6       1.0X
-InSet expression                               183 /  187          5.5         183.2      11.5X
+50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       206            208           6         48.6          20.6       1.0X
+InSet expression                                    212            214           2         47.1          21.2       1.0X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 large decimals:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 3885 / 3911          0.3        3885.4       1.0X
-InSet expression                               207 /  223          4.8         206.6      18.8X
+100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       384            386           2         26.0          38.4       1.0X
+InSet expression                                    183            185           2         54.6          18.3       2.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 large decimals:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 7759 / 7867          0.1        7759.2       1.0X
-InSet expression                               214 /  217          4.7         214.4      36.2X
+200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       742            748          13         13.5          74.2       1.0X
+InSet expression                                    175            177           2         57.1          17.5       4.2X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 strings:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  126 /  127          7.9         126.0       1.0X
-InSet expression                               139 /  142          7.2         139.0       0.9X
+5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        88             89           1        114.2           8.8       1.0X
+InSet expression                                    168            170           2         59.5          16.8       0.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 strings:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  128 /  132          7.8         128.2       1.0X
-InSet expression                               142 /  144          7.0         142.0       0.9X
+10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       128            129           3         78.0          12.8       1.0X
+InSet expression                                    187            188           2         53.6          18.7       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 strings:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  151 /  153          6.6         150.9       1.0X
-InSet expression                               150 /  152          6.7         150.1       1.0X
+25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       242            242           0         41.3          24.2       1.0X
+InSet expression                                    192            194           2         52.0          19.2       1.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 strings:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  238 /  240          4.2         238.5       1.0X
-InSet expression                               152 /  154          6.6         152.4       1.6X
+50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       419            420           0         23.8          41.9       1.0X
+InSet expression                                    235            236           1         42.5          23.5       1.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 strings:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  431 /  432          2.3         431.2       1.0X
-InSet expression                               149 /  151          6.7         148.8       2.9X
+100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       774            775           1         12.9          77.4       1.0X
+InSet expression                                    205            206           3         48.9          20.5       3.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 strings:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  822 / 1060          1.2         821.7       1.0X
-InSet expression                               153 /  162          6.5         152.9       5.4X
+200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      3036           3123         191          3.3         303.6       1.0X
+InSet expression                                    197            198           1         50.8          19.7      15.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 timestamps:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   42 /   44        240.5           4.2       1.0X
-InSet expression                               158 /  161         63.5          15.8       0.3X
+5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        83             84           2        120.9           8.3       1.0X
+InSet expression                                    167            168           2         60.0          16.7       0.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 timestamps:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   57 /   59        174.5           5.7       1.0X
-InSet expression                               173 /  176         57.8          17.3       0.3X
+10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       125            126           3         80.3          12.5       1.0X
+InSet expression                                    186            188           2         53.7          18.6       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 timestamps:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  110 /  113         91.1          11.0       1.0X
-InSet expression                               223 /  226         44.9          22.3       0.5X
+25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       237            238           1         42.1          23.7       1.0X
+InSet expression                                    192            195           3         52.0          19.2       1.2X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 timestamps:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  190 /  193         52.6          19.0       1.0X
-InSet expression                               238 /  240         42.1          23.8       0.8X
+50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       414            415           0         24.1          41.4       1.0X
+InSet expression                                    239            242           3         41.9          23.9       1.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 timestamps:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  355 /  367         28.2          35.5       1.0X
-InSet expression                               221 /  222         45.2          22.1       1.6X
+100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       769            771           3         13.0          76.9       1.0X
+InSet expression                                    203            213          22         49.3          20.3       3.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 timestamps:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  692 /  694         14.5          69.2       1.0X
-InSet expression                               220 /  222         45.4          22.0       3.1X
+200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      3757           3796          85          2.7         375.7       1.0X
+InSet expression                                    193            194           2         51.9          19.3      19.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 dates:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  143 /  145         70.0          14.3       1.0X
-InSet expression                               264 /  269         37.9          26.4       0.5X
+5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        47             48           3         21.3          47.0       1.0X
+InSet expression                                    155            168          29          6.4         155.3       0.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 dates:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  141 /  142         71.1          14.1       1.0X
-InSet expression                               268 /  269         37.3          26.8       0.5X
+10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        58             59           2         17.4          57.6       1.0X
+InSet expression                                    157            160           2          6.4         157.4       0.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 dates:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  196 /  196         51.1          19.6       1.0X
-InSet expression                               277 /  282         36.1          27.7       0.7X
+25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        92             92           2         10.9          91.5       1.0X
+InSet expression                                    160            162           2          6.3         159.6       0.6X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 dates:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  338 /  351         29.5          33.8       1.0X
-InSet expression                               287 /  290         34.9          28.7       1.2X
+50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       171            171           0          5.9         170.8       1.0X
+InSet expression                                    169            172           3          5.9         169.3       1.0X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 dates:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  541 /  542         18.5          54.1       1.0X
-InSet expression                               299 /  300         33.5          29.9       1.8X
+100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       429            429           0          2.3         428.6       1.0X
+InSet expression                                    170            172           2          5.9         170.4       2.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 dates:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  883 /  900         11.3          88.3       1.0X
-InSet expression                               296 /  298         33.8          29.6       3.0X
+200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       996           1144         328          1.0         996.3       1.0X
+InSet expression                                    177            179           3          5.7         176.8       5.6X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 arrays:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   51 /   53         19.6          51.0       1.0X
-InSet expression                                96 /   97         10.5          95.7       0.5X
+5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       248            251           2          4.0         248.0       1.0X
+InSet expression                                    175            177           2          5.7         174.9       1.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 arrays:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   77 /   79         13.1          76.6       1.0X
-InSet expression                                96 /   98         10.4          96.0       0.8X
+10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       420            426          11          2.4         420.0       1.0X
+InSet expression                                    177            180           3          5.7         176.9       2.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 arrays:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  275 /  276          3.6         274.6       1.0X
-InSet expression                               119 /  121          8.4         119.1       2.3X
+25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1005           1008           4          1.0        1004.9       1.0X
+InSet expression                                    184            187           3          5.4         183.7       5.5X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 arrays:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  592 /  663          1.7         592.1       1.0X
-InSet expression                               164 /  172          6.1         164.3       3.6X
+50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1922           1933          13          0.5        1922.2       1.0X
+InSet expression                                    189            193           7          5.3         188.9      10.2X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 arrays:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 2555 / 2733          0.4        2554.7       1.0X
-InSet expression                               194 /  198          5.2         193.9      13.2X
+100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      3861           3871          12          0.3        3860.5       1.0X
+InSet expression                                    213            225          30          4.7         213.5      18.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 arrays:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 9215 / 9778          0.1        9214.8       1.0X
-InSet expression                               253 /  256          3.9         253.2      36.4X
+200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      7731           7774          25          0.1        7731.5       1.0X
+InSet expression                                    222            225           3          4.5         222.4      34.8X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-5 structs:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   46 /   47         22.0          45.5       1.0X
-InSet expression                               157 /  162          6.4         156.5       0.3X
+5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       127            133           9          7.9         126.8       1.0X
+InSet expression                                    142            143           2          7.0         141.9       0.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-10 structs:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                   61 /   63         16.5          60.7       1.0X
-InSet expression                               158 /  161          6.3         158.2       0.4X
+10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       132            133           2          7.6         131.7       1.0X
+InSet expression                                    144            146           2          6.9         144.1       0.9X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-25 structs:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  133 /  140          7.5         132.8       1.0X
-InSet expression                               199 /  202          5.0         198.8       0.7X
+25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       153            154           2          6.5         152.9       1.0X
+InSet expression                                    151            153           2          6.6         151.2       1.0X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-50 structs:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                  369 /  372          2.7         369.1       1.0X
-InSet expression                               283 /  294          3.5         282.7       1.3X
+50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       186            187           2          5.4         185.8       1.0X
+InSet expression                                    154            156           3          6.5         153.7       1.2X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-100 structs:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 1570 / 1731          0.6        1569.8       1.0X
-InSet expression                               332 /  334          3.0         332.0       4.7X
+100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       260            263           3          3.8         260.3       1.0X
+InSet expression                                    151            153           2          6.6         151.3       1.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-200 structs:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-In expression                                 6332 / 6794          0.2        6331.8       1.0X
-InSet expression                               441 /  444          2.3         440.9      14.4X
+200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       731            891         352          1.4         731.4       1.0X
+InSet expression                                    155            157           3          6.4         155.4       4.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        42             43           2        240.1           4.2       1.0X
+InSet expression                                    159            160           2         63.0          15.9       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        58             59           2        171.4           5.8       1.0X
+InSet expression                                    174            183          21         57.5          17.4       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       111            113           3         90.0          11.1       1.0X
+InSet expression                                    228            229           2         43.9          22.8       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       192            193           1         52.1          19.2       1.0X
+InSet expression                                    250            250           1         40.1          25.0       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       373            384          12         26.8          37.3       1.0X
+InSet expression                                    229            236           7         43.7          22.9       1.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       694            707          25         14.4          69.4       1.0X
+InSet expression                                    221            226           7         45.2          22.1       3.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       196            198           2         50.9          19.6       1.0X
+InSet expression                                    169            170           0         59.2          16.9       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       212            212           0         47.3          21.2       1.0X
+InSet expression                                    197            197           0         50.8          19.7       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       266            266           1         37.7          26.6       1.0X
+InSet expression                                    203            217          23         49.4          20.3       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       356            367          12         28.1          35.6       1.0X
+InSet expression                                    212            213           1         47.1          21.2       1.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       559            573          26         17.9          55.9       1.0X
+InSet expression                                    221            223           2         45.2          22.1       2.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       901            916           9         11.1          90.1       1.0X
+InSet expression                                    238            241           9         42.1          23.8       3.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1264           1282          10          7.9         126.4       1.0X
+InSet expression                                    253            262          15         39.5          25.3       5.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1628           1646          11          6.1         162.8       1.0X
+InSet expression                                    264            265           1         37.8          26.4       6.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1993           2015          15          5.0         199.3       1.0X
+InSet expression                                    355            368          10         28.2          35.5       5.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        52             63          14         19.3          51.8       1.0X
+InSet expression                                     96             98           2         10.4          95.9       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        78             80           3         12.8          77.9       1.0X
+InSet expression                                     97            154          48         10.3          97.1       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       269            279          27          3.7         268.7       1.0X
+InSet expression                                    120            124          13          8.3         119.9       2.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       579            626          96          1.7         579.2       1.0X
+InSet expression                                    165            167           3          6.1         165.1       3.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      2582           2775         415          0.4        2582.1       1.0X
+InSet expression                                    196            201          10          5.1         196.0      13.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      9438           9939         763          0.1        9437.9       1.0X
+InSet expression                                    256            258           3          3.9         255.8      36.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        47             48           2         21.4          46.8       1.0X
+InSet expression                                    158            160           2          6.3         157.6       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        62             63           4         16.2          61.9       1.0X
+InSet expression                                    158            161           4          6.3         158.4       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       138            140           3          7.3         137.9       1.0X
+InSet expression                                    202            219          43          5.0         201.7       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       366            367           1          2.7         365.7       1.0X
+InSet expression                                    286            289           4          3.5         285.6       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1055           1212         346          0.9        1054.7       1.0X
+InSet expression                                    348            354           6          2.9         347.9       3.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      6463           6772         650          0.2        6463.3       1.0X
+InSet expression                                    450            455           4          2.2         449.6      14.4X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index f984a1b722e3..ee258f8ac366 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.sql.Date
 import java.util.Locale
 
 import scala.collection.JavaConverters._
@@ -28,6 +29,7 @@ import org.scalatest.Matchers._
 import org.apache.spark.sql.catalyst.expressions.NamedExpression
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -413,6 +415,43 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
       }
   }
 
+  test("IN/INSET with bytes, shorts, ints, dates") {
+    def check(): Unit = {
+      val values = Seq(
+        (Byte.MinValue, Some(Short.MinValue), Int.MinValue, Date.valueOf("2017-01-01")),
+        (Byte.MaxValue, None, Int.MaxValue, null))
+      val df = values.toDF("b", "s", "i", "d")
+      checkAnswer(df.select($"b".isin(Byte.MinValue, Byte.MaxValue)), Seq(Row(true), Row(true)))
+      checkAnswer(df.select($"b".isin(-1.toByte, 2.toByte)), Seq(Row(false), Row(false)))
+      checkAnswer(df.select($"s".isin(Short.MinValue, 1.toShort)), Seq(Row(true), Row(null)))
+      checkAnswer(df.select($"s".isin(0.toShort, null)), Seq(Row(null), Row(null)))
+      checkAnswer(df.select($"i".isin(0, Int.MinValue)), Seq(Row(true), Row(false)))
+      checkAnswer(df.select($"i".isin(null, Int.MinValue)), Seq(Row(true), Row(null)))
+      checkAnswer(
+        df.select($"d".isin(Date.valueOf("1950-01-01"), Date.valueOf("2017-01-01"))),
+        Seq(Row(true), Row(null)))
+      checkAnswer(
+        df.select($"d".isin(Date.valueOf("1950-01-01"), null)),
+        Seq(Row(null), Row(null)))
+    }
+
+    withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "10") {
+      check()
+    }
+
+    withSQLConf(
+      SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "0",
+      SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> "0") {
+      check()
+    }
+
+    withSQLConf(
+      SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "0",
+      SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> "20") {
+      check()
+    }
+  }
+
   test("isInCollection: Scala Collection") {
     val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
     // Test with different types of collections
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
index cf4a34b20627..611f582b6660 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
@@ -55,6 +55,17 @@ object InExpressionBenchmark extends SqlBasedBenchmark {
     runBenchmark(name, df, values, numRows, minNumIters)
   }
 
+  private def runNonCompactShortBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val step = (Short.MaxValue.toInt - Short.MinValue.toInt) / numItems
+    val maxValue = Short.MinValue + numItems * step
+    val rangeSize = maxValue - Short.MinValue
+    require(isLookupSwitch(rangeSize, numItems))
+    val name = s"$numItems shorts (non-compact)"
+    val values = (Short.MinValue until maxValue by step).map(v => s"${v}S")
+    val df = spark.range(0, numRows).select($"id".cast(ShortType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
   private def runIntBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
     val name = s"$numItems ints"
     val values = 1 to numItems
@@ -62,6 +73,17 @@ object InExpressionBenchmark extends SqlBasedBenchmark {
     runBenchmark(name, df, values, numRows, minNumIters)
   }
 
+  private def runNonCompactIntBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
+    val step = (Int.MaxValue.toLong - Int.MinValue.toLong) / numItems
+    val maxValue = Int.MinValue + numItems * step
+    val rangeSize = maxValue - Int.MinValue
+    require(isLookupSwitch(rangeSize, numItems))
+    val name = s"$numItems ints (non-compact)"
+    val values = Int.MinValue until maxValue.toInt by step.toInt
+    val df = spark.range(0, numRows).select($"id".cast(IntegerType))
+    runBenchmark(name, df, values, numRows, minNumIters)
+  }
+
   private def runLongBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
     val name = s"$numItems longs"
     val values = (1 to numItems).map(v => s"${v}L")
@@ -163,50 +185,66 @@ object InExpressionBenchmark extends SqlBasedBenchmark {
     benchmark.run()
   }
 
+  // this logic is derived from visitSwitch in com.sun.tools.javac.jvm.Gen
+  private def isLookupSwitch(rangeSize: Long, numLabels: Int): Boolean = {
+    val tableSpaceCost = 4 + rangeSize
+    val tableTimeCost = 3
+    val lookupSpaceCost = 3 + 2 * numLabels
+    val lookupTimeCost = numLabels
+    lookupSpaceCost + 3 * lookupTimeCost < tableSpaceCost + 3 * tableTimeCost
+  }
+
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
-    val numItemsSeq = Seq(5, 10, 25, 50, 100, 200)
+    val smallNumItemsSeq = Seq(5, 10, 25, 50, 100, 200)
+    val largeNumItemsSeq = Seq(300, 400, 500)
     val largeNumRows = 10000000
     val smallNumRows = 1000000
     val minNumIters = 5
 
     runBenchmark("In Expression Benchmark") {
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runByteBenchmark(numItems, largeNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      (smallNumItemsSeq ++ largeNumItemsSeq).foreach { numItems =>
         runShortBenchmark(numItems, largeNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      (smallNumItemsSeq ++ largeNumItemsSeq).foreach { numItems =>
+        runNonCompactShortBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      (smallNumItemsSeq ++ largeNumItemsSeq).foreach { numItems =>
         runIntBenchmark(numItems, largeNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      (smallNumItemsSeq ++ largeNumItemsSeq).foreach { numItems =>
+        runNonCompactIntBenchmark(numItems, largeNumRows, minNumIters)
+      }
+      smallNumItemsSeq.foreach { numItems =>
         runLongBenchmark(numItems, largeNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runFloatBenchmark(numItems, largeNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runDoubleBenchmark(numItems, largeNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runSmallDecimalBenchmark(numItems, smallNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runLargeDecimalBenchmark(numItems, smallNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runStringBenchmark(numItems, smallNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runTimestampBenchmark(numItems, largeNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      (smallNumItemsSeq ++ largeNumItemsSeq).foreach { numItems =>
         runDateBenchmark(numItems, largeNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runArrayBenchmark(numItems, smallNumRows, minNumIters)
       }
-      numItemsSeq.foreach { numItems =>
+      smallNumItemsSeq.foreach { numItems =>
         runStructBenchmark(numItems, smallNumRows, minNumIters)
       }
     }

From 7857c6d633f3df426a6ac4618316eb83b1cefe2b Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 5 Mar 2019 11:46:51 +0900
Subject: [PATCH 0582/1072] [SPARK-27051][CORE] Bump Jackson version to 2.9.8

## What changes were proposed in this pull request?
Fasterxml Jackson version before 2.9.8 is affected by multiple [CVEs](https://github.com/FasterXML/jackson-databind/issues/2186), we need to fix bump the dependent Jackson to 2.9.8.

## How was this patch tested?
Existing tests and offline benchmark.
I have run ```SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.datasources.json.JSONBenchmark"``` to check there is no performance degradation for this upgrade.

Closes #23965 from yanboliang/SPARK-27051.

Authored-by: Yanbo Liang <ybliang8@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 core/pom.xml                   |  4 ++++
 dev/deps/spark-deps-hadoop-2.7 | 16 ++++++++--------
 dev/deps/spark-deps-hadoop-3.1 | 16 ++++++++--------
 pom.xml                        |  2 +-
 4 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/core/pom.xml b/core/pom.xml
index c87d9d5571c7..b9f78b2f5d37 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -224,6 +224,10 @@
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-library</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-reflect</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.json4s</groupId>
       <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 829aa8e2a13c..d53039fad179 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -88,16 +88,16 @@ httpclient-4.5.6.jar
 httpcore-4.4.10.jar
 istack-commons-runtime-3.0.8.jar
 ivy-2.4.0.jar
-jackson-annotations-2.9.6.jar
-jackson-core-2.9.6.jar
+jackson-annotations-2.9.8.jar
+jackson-core-2.9.8.jar
 jackson-core-asl-1.9.13.jar
-jackson-databind-2.9.6.jar
-jackson-dataformat-yaml-2.9.6.jar
+jackson-databind-2.9.8.jar
+jackson-dataformat-yaml-2.9.8.jar
 jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations-2.9.6.jar
-jackson-module-paranamer-2.9.6.jar
-jackson-module-scala_2.12-2.9.6.jar
+jackson-module-jaxb-annotations-2.9.8.jar
+jackson-module-paranamer-2.9.8.jar
+jackson-module-scala_2.12-2.9.8.jar
 jackson-xc-1.9.13.jar
 jakarta.activation-api-1.2.1.jar
 jakarta.xml.bind-api-2.3.2.jar
@@ -183,7 +183,7 @@ scala-xml_2.12-1.0.5.jar
 shapeless_2.12-2.3.2.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
-snakeyaml-1.18.jar
+snakeyaml-1.23.jar
 snappy-0.2.jar
 snappy-java-1.1.7.1.jar
 spire-macros_2.12-0.13.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 3aed5ff8c32c..d1a6b274df8c 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -87,17 +87,17 @@ httpclient-4.5.6.jar
 httpcore-4.4.10.jar
 istack-commons-runtime-3.0.8.jar
 ivy-2.4.0.jar
-jackson-annotations-2.9.6.jar
-jackson-core-2.9.6.jar
+jackson-annotations-2.9.8.jar
+jackson-core-2.9.8.jar
 jackson-core-asl-1.9.13.jar
-jackson-databind-2.9.6.jar
-jackson-dataformat-yaml-2.9.6.jar
+jackson-databind-2.9.8.jar
+jackson-dataformat-yaml-2.9.8.jar
 jackson-jaxrs-base-2.7.8.jar
 jackson-jaxrs-json-provider-2.7.8.jar
 jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations-2.9.6.jar
-jackson-module-paranamer-2.9.6.jar
-jackson-module-scala_2.12-2.9.6.jar
+jackson-module-jaxb-annotations-2.9.8.jar
+jackson-module-paranamer-2.9.8.jar
+jackson-module-scala_2.12-2.9.8.jar
 jakarta.activation-api-1.2.1.jar
 jakarta.xml.bind-api-2.3.2.jar
 janino-3.0.11.jar
@@ -201,7 +201,7 @@ scala-xml_2.12-1.0.5.jar
 shapeless_2.12-2.3.2.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
-snakeyaml-1.18.jar
+snakeyaml-1.23.jar
 snappy-0.2.jar
 snappy-java-1.1.7.1.jar
 spire-macros_2.12-0.13.0.jar
diff --git a/pom.xml b/pom.xml
index 05a45bf7744d..ec870d3d1d1a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -163,7 +163,7 @@
     <!-- for now, not running scalafmt as part of default verify pipeline -->
     <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.9.6</fasterxml.jackson.version>
+    <fasterxml.jackson.version>2.9.8</fasterxml.jackson.version>
     <snappy.version>1.1.7.1</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <calcite.version>1.2.0-incubating</calcite.version>

From 940626b72499d8914141c0452bbf5ad689951f28 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 5 Mar 2019 07:42:25 -0600
Subject: [PATCH 0583/1072] [SPARK-15095][FOLLOW-UP][SQL] Remove
 HiveSessionHook related code from ThriftServer

## What changes were proposed in this pull request?

https://github.com/apache/spark/pull/12881 removed `HiveSessionHook`. But there are still some code related to `HiveSessionHook`.
This PR removes all `HiveSessionHook` related code.

## How was this patch tested?

manual tests

Closes #23957 from wangyum/SPARK-15095.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../cli/session/HiveSessionHookContext.java   | 46 ----------------
 .../session/HiveSessionHookContextImpl.java   | 52 -------------------
 2 files changed, 98 deletions(-)
 delete mode 100644 sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
 delete mode 100644 sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java

diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
deleted file mode 100644
index c56a107d4246..000000000000
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.session;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-/**
- * HiveSessionHookContext.
- * Interface passed to the HiveServer2 session hook execution. This enables
- * the hook implementation to access session config, user and session handle
- */
-public interface HiveSessionHookContext {
-
-  /**
-   * Retrieve session conf
-   * @return
-   */
-  HiveConf getSessionConf();
-
-  /**
-   * The get the username starting the session
-   * @return
-   */
-  String getSessionUser();
-
-  /**
-   * Retrieve handle for the session
-   * @return
-   */
-  String getSessionHandle();
-}
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java
deleted file mode 100644
index 1ee4ac8a1d39..000000000000
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.session;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-
-/**
- *
- * HiveSessionHookContextImpl.
- * Session hook context implementation which is created by session  manager
- * and passed to hook invocation.
- */
-public class HiveSessionHookContextImpl implements HiveSessionHookContext {
-
-  private final HiveSession hiveSession;
-
-  HiveSessionHookContextImpl(HiveSession hiveSession) {
-    this.hiveSession = hiveSession;
-  }
-
-  @Override
-  public HiveConf getSessionConf() {
-    return hiveSession.getHiveConf();
-  }
-
-
-  @Override
-  public String getSessionUser() {
-    return hiveSession.getUserName();
-  }
-
-  @Override
-  public String getSessionHandle() {
-    return hiveSession.getSessionHandle().toString();
-  }
-}

From 39092236819da097e9c8a3b2fa975105f08ae5b9 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Tue, 5 Mar 2019 08:26:30 -0600
Subject: [PATCH 0584/1072] [MINOR][DOCS] Clarify that Spark apps should mark
 Spark as a 'provided' dependency, not package it

## What changes were proposed in this pull request?

Spark apps do not need to package Spark. In fact it can cause problems in some cases. Our examples should show depending on Spark as a 'provided' dependency.

Packaging Spark makes the app much bigger by tens of megabytes. It can also bring in conflicting dependencies that wouldn't otherwise be a problem. https://issues.apache.org/jira/browse/SPARK-26146 was what reminded me of this.

## How was this patch tested?

Doc build

Closes #23938 from srowen/Provided.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/cloud-integration.md           | 1 +
 docs/quick-start.md                 | 1 +
 docs/streaming-programming-guide.md | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/cloud-integration.md b/docs/cloud-integration.md
index 5368e1372733..a014d6fe0bf9 100644
--- a/docs/cloud-integration.md
+++ b/docs/cloud-integration.md
@@ -87,6 +87,7 @@ is set to the chosen version of Spark:
     <groupId>org.apache.spark</groupId>
     <artifactId>hadoop-cloud_{{site.SCALA_BINARY_VERSION}}</artifactId>
     <version>${spark.version}</version>
+    <scope>provided</scope>
   </dependency>
   ...
 </dependencyManagement>
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 28186c11887f..8bf0d6ee6bba 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -341,6 +341,7 @@ Note that Spark artifacts are tagged with a Scala version.
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_{{site.SCALA_BINARY_VERSION}}</artifactId>
       <version>{{site.SPARK_VERSION}}</version>
+      <scope>provided</scope>
     </dependency>
   </dependencies>
 </project>
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 94c61205bd53..30a0160a650e 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -385,11 +385,12 @@ Similar to Spark, Spark Streaming is available through Maven Central. To write y
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-streaming_{{site.SCALA_BINARY_VERSION}}</artifactId>
         <version>{{site.SPARK_VERSION}}</version>
+        <scope>provided</scope>
     </dependency>
 </div>
 <div data-lang="SBT" markdown="1">
 
-	libraryDependencies += "org.apache.spark" % "spark-streaming_{{site.SCALA_BINARY_VERSION}}" % "{{site.SPARK_VERSION}}"
+	libraryDependencies += "org.apache.spark" % "spark-streaming_{{site.SCALA_BINARY_VERSION}}" % "{{site.SPARK_VERSION}}" % "provided"
 </div>
 </div>
 

From 4490fd0ff012eeec3abe9745e677ea0dc5b5fcf8 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Tue, 5 Mar 2019 23:05:50 +0800
Subject: [PATCH 0585/1072] [SPARK-27001][SQL][FOLLOW-UP] Drop Serializable in
 WalkedTypePath

## What changes were proposed in this pull request?
This pr tried to drop `Serializable` in `WalkedTypePath`.

## How was this patch tested?
Pass Jenkins.

Closes #23973 from maropu/SPARK-27001-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala
index cdb55b8f6fc4..cbf1f01344c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala
@@ -22,7 +22,7 @@ package org.apache.spark.sql.catalyst
  * Note that this class adds new path in prior to recorded paths so it maintains
  * the paths as reverse order.
  */
-case class WalkedTypePath(private val walkedPaths: Seq[String] = Nil) extends Serializable {
+case class WalkedTypePath(private val walkedPaths: Seq[String] = Nil) {
   def recordRoot(className: String): WalkedTypePath =
     newInstance(s"""- root class: "$className"""")
 

From b99610e9ed94420291c27f3a174a107ba6f4db90 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Tue, 5 Mar 2019 09:58:51 -0800
Subject: [PATCH 0586/1072] [SPARK-26592][SS][DOC] Add Kafka proxy user caveat
 to documentation

## What changes were proposed in this pull request?

Since this caveat added to the DStreams documentation it would be good to add to Structured Streaming as well.

## How was this patch tested?

cd docs/
SKIP_API=1 jekyll build
Manual webpage check.

Closes #23974 from gaborgsomogyi/SPARK-26592_.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 docs/structured-streaming-kafka-integration.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 425110a8bfa5..7c043acb499c 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -693,6 +693,10 @@ must match with Kafka broker configuration.
 
 When delegation token is available on an executor it can be overridden with JAAS login configuration.
 
+#### Caveats
+
+- Obtaining delegation token for proxy user is not yet supported ([KAFKA-6945](https://issues.apache.org/jira/browse/KAFKA-6945)).
+
 ### JAAS login configuration
 
 JAAS login configuration must placed on all nodes where Spark tries to access Kafka cluster.

From 6207360b00ed5e888ac460e6cf9de7cdd2478bff Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Tue, 5 Mar 2019 10:40:38 -0800
Subject: [PATCH 0587/1072] [SPARK-27012][CORE] Storage tab shows rdd details
 even after executor ended

## What changes were proposed in this pull request?

After we cache a table, we can see its details in Storage Tab of spark UI. If the executor has shutdown ( graceful shutdown/ Dynamic executor scenario) UI still shows the rdd as cached and when we click the link it throws error. This is because on executor remove event, we fail to adjust rdd partition details  org.apache.spark.status.AppStatusListener#onExecutorRemoved

## How was this patch tested?

Have tested this fix in UI manually
Edit: Added UT

Closes #23920 from ajithme/cachestorage.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/status/AppStatusListener.scala      |  24 +++++
 .../spark/status/AppStatusListenerSuite.scala | 100 ++++++++++++++++++
 2 files changed, 124 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index a8b2153cfa84..fc64bef8cbc3 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -211,6 +211,30 @@ private[spark] class AppStatusListener(
           update(rdd, now)
         }
       }
+      // Remove all RDD partitions that reference the removed executor
+      liveRDDs.values.foreach { rdd =>
+        rdd.getPartitions.values
+          .filter(_.executors.contains(event.executorId))
+          .foreach { partition =>
+            if (partition.executors.length == 1) {
+              rdd.removePartition(partition.blockName)
+              rdd.memoryUsed = addDeltaToValue(rdd.memoryUsed, partition.memoryUsed * -1)
+              rdd.diskUsed = addDeltaToValue(rdd.diskUsed, partition.diskUsed * -1)
+            } else {
+              rdd.memoryUsed = addDeltaToValue(rdd.memoryUsed,
+                (partition.memoryUsed / partition.executors.length) * -1)
+              rdd.diskUsed = addDeltaToValue(rdd.diskUsed,
+                (partition.diskUsed / partition.executors.length) * -1)
+              partition.update(partition.executors
+                .filter(!_.equals(event.executorId)), rdd.storageLevel,
+                addDeltaToValue(partition.memoryUsed,
+                  (partition.memoryUsed / partition.executors.length) * -1),
+                addDeltaToValue(partition.diskUsed,
+                  (partition.diskUsed / partition.executors.length) * -1))
+            }
+          }
+        update(rdd, now)
+      }
       if (isExecutorActiveForLiveStages(exec)) {
         // the executor was running for a currently active stage, so save it for now in
         // deadExecutors, and remove when there are no active stages overlapping with the
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 9f51bd73db15..b5800661efa7 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -1520,6 +1520,106 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     }
   }
 
+  test("storage information on executor lost/down") {
+    val listener = new AppStatusListener(store, conf, true)
+    val maxMemory = 42L
+
+    // Register a couple of block managers.
+    val bm1 = BlockManagerId("1", "1.example.com", 42)
+    val bm2 = BlockManagerId("2", "2.example.com", 84)
+    Seq(bm1, bm2).foreach { bm =>
+      listener.onExecutorAdded(SparkListenerExecutorAdded(1L, bm.executorId,
+        new ExecutorInfo(bm.host, 1, Map.empty, Map.empty)))
+      listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm, maxMemory))
+    }
+
+    val rdd1b1 = RddBlock(1, 1, 1L, 2L)
+    val rdd1b2 = RddBlock(1, 2, 3L, 4L)
+    val level = StorageLevel.MEMORY_AND_DISK
+
+    // Submit a stage and make sure the RDDs are recorded.
+    val rdd1Info = new RDDInfo(rdd1b1.rddId, "rdd1", 2, level, Nil)
+    val stage = new StageInfo(1, 0, "stage1", 4, Seq(rdd1Info), Nil, "details1")
+    listener.onStageSubmitted(SparkListenerStageSubmitted(stage, new Properties()))
+
+    // Add partition 1 replicated on two block managers.
+    listener.onBlockUpdated(SparkListenerBlockUpdated(
+      BlockUpdatedInfo(bm1, rdd1b1.blockId, level, rdd1b1.memSize, rdd1b1.diskSize)))
+
+    listener.onBlockUpdated(SparkListenerBlockUpdated(
+      BlockUpdatedInfo(bm2, rdd1b1.blockId, level, rdd1b1.memSize, rdd1b1.diskSize)))
+
+    // Add a second partition only to bm 1.
+    listener.onBlockUpdated(SparkListenerBlockUpdated(
+      BlockUpdatedInfo(bm1, rdd1b2.blockId, level, rdd1b2.memSize, rdd1b2.diskSize)))
+
+    check[RDDStorageInfoWrapper](rdd1b1.rddId) { wrapper =>
+      assert(wrapper.info.numCachedPartitions === 2L)
+      assert(wrapper.info.memoryUsed === 2 * rdd1b1.memSize + rdd1b2.memSize)
+      assert(wrapper.info.diskUsed === 2 * rdd1b1.diskSize + rdd1b2.diskSize)
+      assert(wrapper.info.dataDistribution.get.size === 2L)
+      assert(wrapper.info.partitions.get.size === 2L)
+
+      val dist = wrapper.info.dataDistribution.get.find(_.address == bm1.hostPort).get
+      assert(dist.memoryUsed === rdd1b1.memSize + rdd1b2.memSize)
+      assert(dist.diskUsed === rdd1b1.diskSize + rdd1b2.diskSize)
+      assert(dist.memoryRemaining === maxMemory - dist.memoryUsed)
+
+      val part1 = wrapper.info.partitions.get.find(_.blockName === rdd1b1.blockId.name).get
+      assert(part1.storageLevel === level.description)
+      assert(part1.memoryUsed === 2 * rdd1b1.memSize)
+      assert(part1.diskUsed === 2 * rdd1b1.diskSize)
+      assert(part1.executors === Seq(bm1.executorId, bm2.executorId))
+
+      val part2 = wrapper.info.partitions.get.find(_.blockName === rdd1b2.blockId.name).get
+      assert(part2.storageLevel === level.description)
+      assert(part2.memoryUsed === rdd1b2.memSize)
+      assert(part2.diskUsed === rdd1b2.diskSize)
+      assert(part2.executors === Seq(bm1.executorId))
+    }
+
+    check[ExecutorSummaryWrapper](bm1.executorId) { exec =>
+      assert(exec.info.rddBlocks === 2L)
+      assert(exec.info.memoryUsed === rdd1b1.memSize + rdd1b2.memSize)
+      assert(exec.info.diskUsed === rdd1b1.diskSize + rdd1b2.diskSize)
+    }
+
+    // Remove Executor 1.
+    listener.onExecutorRemoved(createExecutorRemovedEvent(1))
+
+    // check that partition info now contains only details about what is remaining in bm2
+    check[RDDStorageInfoWrapper](rdd1b1.rddId) { wrapper =>
+      assert(wrapper.info.numCachedPartitions === 1L)
+      assert(wrapper.info.memoryUsed === rdd1b1.memSize)
+      assert(wrapper.info.diskUsed === rdd1b1.diskSize)
+      assert(wrapper.info.dataDistribution.get.size === 1L)
+      assert(wrapper.info.partitions.get.size === 1L)
+
+      val dist = wrapper.info.dataDistribution.get.find(_.address == bm2.hostPort).get
+      assert(dist.memoryUsed === rdd1b1.memSize)
+      assert(dist.diskUsed === rdd1b1.diskSize)
+      assert(dist.memoryRemaining === maxMemory - dist.memoryUsed)
+
+      val part = wrapper.info.partitions.get.find(_.blockName === rdd1b1.blockId.name).get
+      assert(part.storageLevel === level.description)
+      assert(part.memoryUsed === rdd1b1.memSize)
+      assert(part.diskUsed === rdd1b1.diskSize)
+      assert(part.executors === Seq(bm2.executorId))
+    }
+
+    // Remove Executor 2.
+    listener.onExecutorRemoved(createExecutorRemovedEvent(2))
+    // Check that storage cost is zero as both exec are down
+    check[RDDStorageInfoWrapper](rdd1b1.rddId) { wrapper =>
+      assert(wrapper.info.numCachedPartitions === 0)
+      assert(wrapper.info.memoryUsed === 0)
+      assert(wrapper.info.diskUsed === 0)
+      assert(wrapper.info.dataDistribution.isEmpty)
+      assert(wrapper.info.partitions.get.isEmpty)
+    }
+  }
+
+
   private def key(stage: StageInfo): Array[Int] = Array(stage.stageId, stage.attemptNumber)
 
   private def check[T: ClassTag](key: Any)(fn: T => Unit): Unit = {

From 25d28506652625c86b569dffa99e732a191ee626 Mon Sep 17 00:00:00 2001
From: Luca Canali <luca.canali@cern.ch>
Date: Tue, 5 Mar 2019 10:47:39 -0800
Subject: [PATCH 0588/1072] [SPARK-26928][CORE] Add driver CPU Time to the
 metrics system

## What changes were proposed in this pull request?

This proposes to add instrumentation for the driver's JVM CPU time via the Spark Dropwizard/Codahale metrics system. It follows directly from previous work SPARK-25228 and shares similar motivations: it is intended as an improvement to be used for Spark performance dashboards and monitoring tools/instrumentation.

Implementation details: this PR takes the code introduced in SPARK-25228 and moves it to a new separate Source JVMCPUSource, which is then used to register the jvmCpuTime gauge metric for both executor and driver.
The registration of the jvmCpuTime metric for the driver is conditional, a new configuration parameter `spark.metrics.cpu.time.driver.enabled` (proposed default: false) is introduced for this purpose.

## How was this patch tested?

Manually tested, using local mode and using YARN.

Closes #23838 from LucaCanali/addCPUTimeMetricDriver.

Authored-by: Luca Canali <luca.canali@cern.ch>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/SparkContext.scala |  2 +
 .../org/apache/spark/executor/Executor.scala  |  2 +
 .../spark/executor/ExecutorSource.scala       | 18 -------
 .../apache/spark/metrics/source/JVMCPU.scala  | 48 +++++++++++++++++++
 docs/monitoring.md                            |  7 ++-
 5 files changed, 58 insertions(+), 19 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/metrics/source/JVMCPU.scala

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index dc0ea24d29b0..01d979270c44 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -48,6 +48,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.io.CompressionCodec
+import org.apache.spark.metrics.source.JVMCPUSource
 import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd._
 import org.apache.spark.rpc.RpcEndpointRef
@@ -568,6 +569,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _taskScheduler.postStartHook()
     _env.metricsSystem.registerSource(_dagScheduler.metricsSource)
     _env.metricsSystem.registerSource(new BlockManagerSource(_env.blockManager))
+    _env.metricsSystem.registerSource(new JVMCPUSource())
     _executorAllocationManager.foreach { e =>
       _env.metricsSystem.registerSource(e.executorAllocationManagerSource)
     }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 6b23b2694527..740080e1ccbb 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -38,6 +38,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.memory.{SparkOutOfMemoryError, TaskMemoryManager}
+import org.apache.spark.metrics.source.JVMCPUSource
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.scheduler._
 import org.apache.spark.shuffle.FetchFailedException
@@ -117,6 +118,7 @@ private[spark] class Executor(
   if (!isLocal) {
     env.blockManager.initialize(conf.getAppId)
     env.metricsSystem.registerSource(executorSource)
+    env.metricsSystem.registerSource(new JVMCPUSource())
     env.metricsSystem.registerSource(env.blockManager.shuffleMetricsSource)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
index a8264022a0af..e8523769c5aa 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -76,24 +76,6 @@ class ExecutorSource(threadPool: ThreadPoolExecutor, executorId: String) extends
     registerFileSystemStat(scheme, "write_ops", _.getWriteOps(), 0)
   }
 
-  // Dropwizard metrics gauge measuring the executor's process CPU time.
-  // This Gauge will try to get and return the JVM Process CPU time or return -1 otherwise.
-  // The CPU time value is returned in nanoseconds.
-  // It will use proprietary extensions such as com.sun.management.OperatingSystemMXBean or
-  // com.ibm.lang.management.OperatingSystemMXBean, if available.
-  metricRegistry.register(MetricRegistry.name("jvmCpuTime"), new Gauge[Long] {
-    val mBean: MBeanServer = ManagementFactory.getPlatformMBeanServer
-    val name = new ObjectName("java.lang", "type", "OperatingSystem")
-    override def getValue: Long = {
-      try {
-        // return JVM process CPU time if the ProcessCpuTime method is available
-        mBean.getAttribute(name, "ProcessCpuTime").asInstanceOf[Long]
-      } catch {
-        case NonFatal(_) => -1L
-      }
-    }
-  })
-
   // Expose executor task metrics using the Dropwizard metrics system.
   // The list is taken from TaskMetrics.scala
   val METRIC_CPU_TIME = metricRegistry.counter(MetricRegistry.name("cpuTime"))
diff --git a/core/src/main/scala/org/apache/spark/metrics/source/JVMCPU.scala b/core/src/main/scala/org/apache/spark/metrics/source/JVMCPU.scala
new file mode 100644
index 000000000000..6ea86b88806a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/source/JVMCPU.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.source
+
+import java.lang.management.ManagementFactory
+
+import com.codahale.metrics.{Counter, Gauge, MetricRegistry}
+import javax.management.{MBeanServer, ObjectName}
+import scala.util.control.NonFatal
+
+private[spark] class JVMCPUSource extends Source {
+
+  override val metricRegistry = new MetricRegistry()
+  override val sourceName = "JVMCPU"
+
+  // Dropwizard/Codahale metrics gauge measuring the JVM process CPU time.
+  // This Gauge will try to get and return the JVM Process CPU time or return -1 otherwise.
+  // The CPU time value is returned in nanoseconds.
+  // It will use proprietary extensions such as com.sun.management.OperatingSystemMXBean or
+  // com.ibm.lang.management.OperatingSystemMXBean, if available.
+  metricRegistry.register(MetricRegistry.name("jvmCpuTime"), new Gauge[Long] {
+    val mBean: MBeanServer = ManagementFactory.getPlatformMBeanServer
+    val name = new ObjectName("java.lang", "type", "OperatingSystem")
+    override def getValue: Long = {
+      try {
+        // return JVM process CPU time if the ProcessCpuTime method is available
+        mBean.getAttribute(name, "ProcessCpuTime").asInstanceOf[Long]
+      } catch {
+        case NonFatal(_) => -1L
+      }
+    }
+  })
+}
diff --git a/docs/monitoring.md b/docs/monitoring.md
index a92dd6f71a09..72e4f47e197d 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -813,6 +813,9 @@ This is the component with the largest amount of instrumented metrics
   - states-rowsTotal
   - states-usedBytes
 
+- namespace=JVMCPU
+  - jvmCpuTime
+
 ### Component instance = Executor
 These metrics are exposed by Spark executors. Note, currently they are not available
 when running in local mode.
@@ -834,7 +837,6 @@ when running in local mode.
   - filesystem.hdfs.read_ops
   - filesystem.hdfs.write_bytes
   - filesystem.hdfs.write_ops
-  - jvmCpuTime
   - jvmGCTime.count
   - memoryBytesSpilled.count
   - recordsRead.count
@@ -858,6 +860,9 @@ when running in local mode.
   - threadpool.currentPool_size
   - threadpool.maxPool_size
 
+- namespace=JVMCPU
+  - jvmCpuTime
+
 - namespace=NettyBlockTransfer
   - shuffle-client.usedDirectMemory
   - shuffle-client.usedHeapMemory

From 83857496e53520aa0fdf3978fcbcdd6c49c3ab5c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 5 Mar 2019 11:12:57 -0800
Subject: [PATCH 0589/1072] [SPARK-27043][SQL] Add ORC nested schema pruning
 benchmarks

## What changes were proposed in this pull request?

We have benchmark of nested schema pruning, but only for Parquet. This adds similar benchmark for ORC. This is used with nested schema pruning of ORC.

## How was this patch tested?

Added test.

Closes #23955 from viirya/orc-nested-schema-pruning-benchmark.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../NestedSchemaPruningBenchmark-results.txt  | 40 --------------
 ...rcNestedSchemaPruningBenchmark-results.txt | 40 ++++++++++++++
 ...V2NestedSchemaPruningBenchmark-results.txt | 40 ++++++++++++++
 ...etNestedSchemaPruningBenchmark-results.txt | 40 ++++++++++++++
 .../NestedSchemaPruningBenchmark.scala        | 54 +++++++++----------
 .../OrcNestedSchemaPruningBenchmark.scala     | 44 +++++++++++++++
 .../OrcV2NestedSchemaPruningBenchmark.scala   | 35 ++++++++++++
 .../ParquetNestedSchemaPruningBenchmark.scala | 35 ++++++++++++
 8 files changed, 258 insertions(+), 70 deletions(-)
 delete mode 100644 sql/core/benchmarks/NestedSchemaPruningBenchmark-results.txt
 create mode 100644 sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
 create mode 100644 sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
 create mode 100644 sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcNestedSchemaPruningBenchmark.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcV2NestedSchemaPruningBenchmark.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedSchemaPruningBenchmark.scala

diff --git a/sql/core/benchmarks/NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/NestedSchemaPruningBenchmark-results.txt
deleted file mode 100644
index 7585caeafe11..000000000000
--- a/sql/core/benchmarks/NestedSchemaPruningBenchmark-results.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-================================================================================================
-Nested Schema Pruning Benchmark
-================================================================================================
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
-Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
-Selection:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Top-level column                                59 /   68         16.9          59.1       1.0X
-Nested column                                  180 /  186          5.6         179.7       0.3X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
-Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
-Limiting:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Top-level column                               241 /  246          4.2         240.9       1.0X
-Nested column                                 1828 / 1904          0.5        1827.5       0.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
-Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
-Repartitioning:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Top-level column                               201 /  208          5.0         200.8       1.0X
-Nested column                                 1811 / 1864          0.6        1811.4       0.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
-Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
-Repartitioning by exprs:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Top-level column                               206 /  212          4.9         205.8       1.0X
-Nested column                                 1814 / 1863          0.6        1814.3       0.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Mac OS X 10.14.3
-Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
-Sorting:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Top-level column                               282 /  302          3.5         281.7       1.0X
-Nested column                                 2093 / 2199          0.5        2093.1       0.1X
-
-
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
new file mode 100644
index 000000000000..f73825628ad7
--- /dev/null
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Nested Schema Pruning Benchmark For ORC v1
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    113            196          89          8.8         113.0       1.0X
+Nested column                                      1316           1639         240          0.8        1315.5       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    260            474         211          3.8         260.4       1.0X
+Nested column                                      2322           3312         701          0.4        2322.3       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    275            318          55          3.6         274.8       1.0X
+Nested column                                      2482           3263         759          0.4        2482.2       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    274            288          11          3.7         273.9       1.0X
+Nested column                                      2783           2905          86          0.4        2782.7       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    382            419          23          2.6         382.4       1.0X
+Nested column                                      2974           3517         699          0.3        2974.1       0.1X
+
+
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
new file mode 100644
index 000000000000..ad43ffb7c369
--- /dev/null
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Nested Schema Pruning Benchmark For ORC v2
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                     91            102           9         11.0          91.2       1.0X
+Nested column                                      1459           1548          80          0.7        1458.5       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    101            112          10          9.9         100.7       1.0X
+Nested column                                      1459           1619         109          0.7        1458.9       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    268            284          12          3.7         268.2       1.0X
+Nested column                                      2781           2865          73          0.4        2780.8       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    309            318           6          3.2         309.2       1.0X
+Nested column                                      2426           2891         253          0.4        2425.8       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    179            194           8          5.6         179.3       1.0X
+Nested column                                      2084           2277         243          0.5        2083.7       0.1X
+
+
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
new file mode 100644
index 000000000000..d51ebc6d1f08
--- /dev/null
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Nested Schema Pruning Benchmark For Parquet
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                     88            114          16         11.4          87.5       1.0X
+Nested column                                       201            223          27          5.0         200.5       0.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    263            315          36          3.8         263.2       1.0X
+Nested column                                      2111           2622         613          0.5        2111.1       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    222            250          34          4.5         222.2       1.0X
+Nested column                                      2084           2339         266          0.5        2084.2       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    238            306          96          4.2         238.1       1.0X
+Nested column                                      2080           2373         218          0.5        2079.5       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    328            383          57          3.1         327.6       1.0X
+Nested column                                      2595           3136         638          0.4        2595.1       0.1X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
index ddfc8aebf7cb..e852de1fedad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
@@ -21,23 +21,17 @@ import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.internal.SQLConf
 
 /**
- * Synthetic benchmark for nested schema pruning performance.
- * To run this benchmark:
- * {{{
- *   1. without sbt:
- *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
- *      Results will be written to "benchmarks/NestedSchemaPruningBenchmark-results.txt".
- * }}}
+ * The base class for synthetic benchmark for nested schema pruning performance.
  */
-object NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
+abstract class NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
 
   import spark.implicits._
 
-  private val N = 1000000
-  private val numIters = 10
+  val dataSourceName: String
+  val benchmarkName: String
+
+  protected val N = 1000000
+  protected val numIters = 10
 
   // We use `col1 BIGINT, col2 STRUCT<_1: BIGINT, _2: STRING>` as a test schema.
   // col1 and col2._1 is used for comparision. col2._2 mimics the burden for the other columns
@@ -53,13 +47,13 @@ object NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
     }
   }
 
-  private def selectBenchmark(numRows: Int, numIters: Int): Unit = {
+  protected def selectBenchmark(numRows: Int, numIters: Int): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
       Seq(1, 2).foreach { i =>
-        df.write.parquet(path + s"/$i")
-        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+        df.write.format(dataSourceName).save(path + s"/$i")
+        spark.read.format(dataSourceName).load(path + s"/$i").createOrReplaceTempView(s"t$i")
       }
 
       val benchmark = new Benchmark(s"Selection", numRows, numIters, output = output)
@@ -71,13 +65,13 @@ object NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
     }
   }
 
-  private def limitBenchmark(numRows: Int, numIters: Int): Unit = {
+  protected def limitBenchmark(numRows: Int, numIters: Int): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
       Seq(1, 2).foreach { i =>
-        df.write.parquet(path + s"/$i")
-        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+        df.write.format(dataSourceName).save(path + s"/$i")
+        spark.read.format(dataSourceName).load(path + s"/$i").createOrReplaceTempView(s"t$i")
       }
 
       val benchmark = new Benchmark(s"Limiting", numRows, numIters, output = output)
@@ -91,13 +85,13 @@ object NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
     }
   }
 
-  private def repartitionBenchmark(numRows: Int, numIters: Int): Unit = {
+  protected def repartitionBenchmark(numRows: Int, numIters: Int): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
       Seq(1, 2).foreach { i =>
-        df.write.parquet(path + s"/$i")
-        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+        df.write.format(dataSourceName).save(path + s"/$i")
+        spark.read.format(dataSourceName).load(path + s"/$i").createOrReplaceTempView(s"t$i")
       }
 
       val benchmark = new Benchmark(s"Repartitioning", numRows, numIters, output = output)
@@ -111,13 +105,13 @@ object NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
     }
   }
 
-  private def repartitionByExprBenchmark(numRows: Int, numIters: Int): Unit = {
+  protected def repartitionByExprBenchmark(numRows: Int, numIters: Int): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
       Seq(1, 2).foreach { i =>
-        df.write.parquet(path + s"/$i")
-        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+        df.write.format(dataSourceName).save(path + s"/$i")
+        spark.read.format(dataSourceName).load(path + s"/$i").createOrReplaceTempView(s"t$i")
       }
 
       val benchmark = new Benchmark(s"Repartitioning by exprs", numRows, numIters, output = output)
@@ -131,13 +125,13 @@ object NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
     }
   }
 
-  private def sortBenchmark(numRows: Int, numIters: Int): Unit = {
+  protected def sortBenchmark(numRows: Int, numIters: Int): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
       Seq(1, 2).foreach { i =>
-        df.write.parquet(path + s"/$i")
-        spark.read.parquet(path + s"/$i").createOrReplaceTempView(s"t$i")
+        df.write.format(dataSourceName).save(path + s"/$i")
+        spark.read.format(dataSourceName).load(path + s"/$i").createOrReplaceTempView(s"t$i")
       }
 
       val benchmark = new Benchmark(s"Sorting", numRows, numIters, output = output)
@@ -150,8 +144,8 @@ object NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
-    runBenchmark(s"Nested Schema Pruning Benchmark") {
-      withSQLConf (SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+    runBenchmark(benchmarkName) {
+      withSQLConf(SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
         selectBenchmark (N, numIters)
         limitBenchmark (N, numIters)
         repartitionBenchmark (N, numIters)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcNestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcNestedSchemaPruningBenchmark.scala
new file mode 100644
index 000000000000..947fc67bdeae
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcNestedSchemaPruningBenchmark.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Synthetic benchmark for nested schema pruning performance for ORC V1 datasource.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/OrcNestedSchemaPruningBenchmark-results.txt".
+ * }}}
+ */
+object OrcNestedSchemaPruningBenchmark extends NestedSchemaPruningBenchmark {
+  override val dataSourceName: String = "orc"
+  override val benchmarkName: String = "Nested Schema Pruning Benchmark For ORC v1"
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "orc",
+        SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> "orc") {
+      super.runBenchmarkSuite(mainArgs)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcV2NestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcV2NestedSchemaPruningBenchmark.scala
new file mode 100644
index 000000000000..e735d1cf257d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcV2NestedSchemaPruningBenchmark.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+/**
+ * Synthetic benchmark for nested schema pruning performance for ORC V2 datasource.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt".
+ * }}}
+ */
+object OrcV2NestedSchemaPruningBenchmark extends NestedSchemaPruningBenchmark {
+  override val dataSourceName: String = "orc"
+  override val benchmarkName: String = "Nested Schema Pruning Benchmark For ORC v2"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedSchemaPruningBenchmark.scala
new file mode 100644
index 000000000000..1c9cc2c37117
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedSchemaPruningBenchmark.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+/**
+ * Synthetic benchmark for nested schema pruning performance for Parquet datasource.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt".
+ * }}}
+ */
+object ParquetNestedSchemaPruningBenchmark extends NestedSchemaPruningBenchmark {
+    override val dataSourceName: String = "parquet"
+    override val benchmarkName: String = "Nested Schema Pruning Benchmark For Parquet"
+}

From c27caead43423d1f994f42502496d57ea8389dc0 Mon Sep 17 00:00:00 2001
From: Bo Hai <haibo-self@163.com>
Date: Tue, 5 Mar 2019 11:57:04 -0800
Subject: [PATCH 0590/1072] [SPARK-26932][DOC] Add a warning for Hive 2.1.1 ORC
 reader issue

Hive 2.1.1 cannot read ORC table created by Spark 2.4.0 in default, and I add the information into sql-migration-guide-upgrade.md. for details to see:  [SPARK-26932](https://issues.apache.org/jira/browse/SPARK-26932)

doc build

Closes #23944 from haiboself/SPARK-26932.

Authored-by: Bo Hai <haibo-self@163.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-migration-guide-upgrade.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index c2010566e409..af3d03eb560c 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -169,7 +169,7 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Since Spark 2.4, Spark will display table description column Last Access value as UNKNOWN when the value was Jan 01 1970.
 
-  - Since Spark 2.4, Spark maximizes the usage of a vectorized ORC reader for ORC files by default. To do that, `spark.sql.orc.impl` and `spark.sql.orc.filterPushdown` change their default values to `native` and `true` respectively.
+  - Since Spark 2.4, Spark maximizes the usage of a vectorized ORC reader for ORC files by default. To do that, `spark.sql.orc.impl` and `spark.sql.orc.filterPushdown` change their default values to `native` and `true` respectively. ORC files created by native ORC writer cannot be read by some old Apache Hive releases. Use `spark.sql.orc.impl=hive` to create the files shared with Hive 2.1.1 and older.
 
   - In PySpark, when Arrow optimization is enabled, previously `toPandas` just failed when Arrow optimization is unable to be used whereas `createDataFrame` from Pandas DataFrame allowed the fallback to non-optimization. Now, both `toPandas` and `createDataFrame` from Pandas DataFrame allow the fallback by default, which can be switched off by `spark.sql.execution.arrow.fallback.enabled`.
 

From 5668c42edf20bc577305437622272bf803b6019e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Tue, 5 Mar 2019 12:31:06 -0800
Subject: [PATCH 0591/1072] [SPARK-27021][CORE] Cleanup of Netty event loop
 group for shuffle chunk fetch requests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Creating an Netty `EventLoopGroup` leads to creating a new Thread pool for handling the events. For stopping the threads of the pool the event loop group should be shut down which is properly done for transport servers and clients by calling for example the `shutdownGracefully()` method (for details see the `close()` method of `TransportClientFactory` and `TransportServer`). But there is a separate event loop group for shuffle chunk fetch requests which is in pipeline for handling fetch request (shared between the client and server) and owned by the `TransportContext` and this was never shut down.

## How was this patch tested?

With existing unittest.

This leak is in the production system too but its effect is spiking in the unittest.

Checking the core unittest logs before the PR:
```
$ grep "LEAK IN SUITE" unit-tests.log | grep -o shuffle-chunk-fetch-handler | wc -l
381
```

And after the PR without whitelisting in thread audit and with an extra `await` after the
` chunkFetchWorkers.shutdownGracefully()`:
```
$ grep "LEAK IN SUITE" unit-tests.log | grep -o shuffle-chunk-fetch-handler | wc -l
0
```

Closes #23930 from attilapiros/SPARK-27021.

Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/network/TransportContext.java       |  15 ++-
 .../network/ChunkFetchIntegrationSuite.java   |   4 +-
 .../RequestTimeoutIntegrationSuite.java       |  10 +-
 .../spark/network/RpcIntegrationSuite.java    |   4 +-
 .../org/apache/spark/network/StreamSuite.java |   4 +-
 .../network/TransportClientFactorySuite.java  |  78 ++++++------
 .../network/crypto/AuthIntegrationSuite.java  |   3 +
 .../spark/network/sasl/SparkSaslSuite.java    |   6 +-
 .../network/util/NettyMemoryMetricsSuite.java |   5 +-
 .../network/sasl/SaslIntegrationSuite.java    |  91 +++++++-------
 .../ExternalShuffleIntegrationSuite.java      |   4 +-
 .../shuffle/ExternalShuffleSecuritySuite.java |  10 +-
 .../network/yarn/YarnShuffleService.java      |   7 +-
 .../spark/deploy/ExternalShuffleService.scala |   8 +-
 .../netty/NettyBlockTransferService.scala     |   3 +
 .../apache/spark/rpc/netty/NettyRpcEnv.scala  |   3 +
 .../spark/ExternalShuffleServiceSuite.scala   |  15 ++-
 .../scala/org/apache/spark/ThreadAudit.scala  |  16 ++-
 .../spark/storage/BlockManagerSuite.scala     | 115 +++++++++---------
 19 files changed, 228 insertions(+), 173 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
index 0bc5dd5402d5..d99b9bdbce39 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.network;
 
+import java.io.Closeable;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -60,13 +61,12 @@
  * channel. As each TransportChannelHandler contains a TransportClient, this enables server
  * processes to send messages back to the client on an existing channel.
  */
-public class TransportContext {
+public class TransportContext implements Closeable {
   private static final Logger logger = LoggerFactory.getLogger(TransportContext.class);
 
   private final TransportConf conf;
   private final RpcHandler rpcHandler;
   private final boolean closeIdleConnections;
-  private final boolean isClientOnly;
   // Number of registered connections to the shuffle service
   private Counter registeredConnections = new Counter();
 
@@ -120,7 +120,6 @@ public TransportContext(
     this.conf = conf;
     this.rpcHandler = rpcHandler;
     this.closeIdleConnections = closeIdleConnections;
-    this.isClientOnly = isClientOnly;
 
     if (conf.getModuleName() != null &&
         conf.getModuleName().equalsIgnoreCase("shuffle") &&
@@ -200,9 +199,7 @@ public TransportChannelHandler initializePipeline(
         // would require more logic to guarantee if this were not part of the same event loop.
         .addLast("handler", channelHandler);
       // Use a separate EventLoopGroup to handle ChunkFetchRequest messages for shuffle rpcs.
-      if (conf.getModuleName() != null &&
-          conf.getModuleName().equalsIgnoreCase("shuffle")
-          && !isClientOnly) {
+      if (chunkFetchWorkers != null) {
         pipeline.addLast(chunkFetchWorkers, "chunkFetchHandler", chunkFetchHandler);
       }
       return channelHandler;
@@ -240,4 +237,10 @@ private ChunkFetchRequestHandler createChunkFetchHandler(TransportChannelHandler
   public Counter getRegisteredConnections() {
     return registeredConnections;
   }
+
+  public void close() {
+    if (chunkFetchWorkers != null) {
+      chunkFetchWorkers.shutdownGracefully();
+    }
+  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
index ab4dd04a1298..5999b6255b37 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
@@ -56,6 +56,7 @@ public class ChunkFetchIntegrationSuite {
   static final int BUFFER_CHUNK_INDEX = 0;
   static final int FILE_CHUNK_INDEX = 1;
 
+  static TransportContext context;
   static TransportServer server;
   static TransportClientFactory clientFactory;
   static StreamManager streamManager;
@@ -117,7 +118,7 @@ public StreamManager getStreamManager() {
         return streamManager;
       }
     };
-    TransportContext context = new TransportContext(conf, handler);
+    context = new TransportContext(conf, handler);
     server = context.createServer();
     clientFactory = context.createClientFactory();
   }
@@ -127,6 +128,7 @@ public static void tearDown() {
     bufferChunk.release();
     server.close();
     clientFactory.close();
+    context.close();
     testFile.delete();
   }
 
diff --git a/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java
index c0724e018263..15a28ba249b8 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java
@@ -48,6 +48,7 @@
  */
 public class RequestTimeoutIntegrationSuite {
 
+  private TransportContext context;
   private TransportServer server;
   private TransportClientFactory clientFactory;
 
@@ -79,6 +80,9 @@ public void tearDown() {
     if (clientFactory != null) {
       clientFactory.close();
     }
+    if (context !=  null) {
+      context.close();
+    }
   }
 
   // Basic suite: First request completes quickly, and second waits for longer than network timeout.
@@ -106,7 +110,7 @@ public StreamManager getStreamManager() {
       }
     };
 
-    TransportContext context = new TransportContext(conf, handler);
+    context = new TransportContext(conf, handler);
     server = context.createServer();
     clientFactory = context.createClientFactory();
     TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
@@ -153,7 +157,7 @@ public StreamManager getStreamManager() {
       }
     };
 
-    TransportContext context = new TransportContext(conf, handler);
+    context = new TransportContext(conf, handler);
     server = context.createServer();
     clientFactory = context.createClientFactory();
 
@@ -204,7 +208,7 @@ public StreamManager getStreamManager() {
       }
     };
 
-    TransportContext context = new TransportContext(conf, handler);
+    context = new TransportContext(conf, handler);
     server = context.createServer();
     clientFactory = context.createClientFactory();
     TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
diff --git a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
index 1c0aa4da27ff..117f1e4d00fe 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
@@ -44,6 +44,7 @@
 
 public class RpcIntegrationSuite {
   static TransportConf conf;
+  static TransportContext context;
   static TransportServer server;
   static TransportClientFactory clientFactory;
   static RpcHandler rpcHandler;
@@ -90,7 +91,7 @@ public void receive(TransportClient client, ByteBuffer message) {
       @Override
       public StreamManager getStreamManager() { return new OneForOneStreamManager(); }
     };
-    TransportContext context = new TransportContext(conf, rpcHandler);
+    context = new TransportContext(conf, rpcHandler);
     server = context.createServer();
     clientFactory = context.createClientFactory();
     oneWayMsgs = new ArrayList<>();
@@ -160,6 +161,7 @@ public String getID() {
   public static void tearDown() {
     server.close();
     clientFactory.close();
+    context.close();
     testData.cleanup();
   }
 
diff --git a/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java b/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java
index f3050cb79cdf..485d8ad55414 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java
@@ -51,6 +51,7 @@ public class StreamSuite {
   private static final String[] STREAMS = StreamTestHelper.STREAMS;
   private static StreamTestHelper testData;
 
+  private static TransportContext context;
   private static TransportServer server;
   private static TransportClientFactory clientFactory;
 
@@ -93,7 +94,7 @@ public StreamManager getStreamManager() {
         return streamManager;
       }
     };
-    TransportContext context = new TransportContext(conf, handler);
+    context = new TransportContext(conf, handler);
     server = context.createServer();
     clientFactory = context.createClientFactory();
   }
@@ -103,6 +104,7 @@ public static void tearDown() {
     server.close();
     clientFactory.close();
     testData.cleanup();
+    context.close();
   }
 
   @Test
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
index e95d25fe6ae9..2c621140db83 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
@@ -64,6 +64,7 @@ public void setUp() {
   public void tearDown() {
     JavaUtils.closeQuietly(server1);
     JavaUtils.closeQuietly(server2);
+    JavaUtils.closeQuietly(context);
   }
 
   /**
@@ -80,49 +81,50 @@ private void testClientReuse(int maxConnections, boolean concurrent)
     TransportConf conf = new TransportConf("shuffle", new MapConfigProvider(configMap));
 
     RpcHandler rpcHandler = new NoOpRpcHandler();
-    TransportContext context = new TransportContext(conf, rpcHandler);
-    TransportClientFactory factory = context.createClientFactory();
-    Set<TransportClient> clients = Collections.synchronizedSet(
-      new HashSet<TransportClient>());
-
-    AtomicInteger failed = new AtomicInteger();
-    Thread[] attempts = new Thread[maxConnections * 10];
-
-    // Launch a bunch of threads to create new clients.
-    for (int i = 0; i < attempts.length; i++) {
-      attempts[i] = new Thread(() -> {
-        try {
-          TransportClient client =
-            factory.createClient(TestUtils.getLocalHost(), server1.getPort());
-          assertTrue(client.isActive());
-          clients.add(client);
-        } catch (IOException e) {
-          failed.incrementAndGet();
-        } catch (InterruptedException e) {
-          throw new RuntimeException(e);
+    try (TransportContext context = new TransportContext(conf, rpcHandler)) {
+      TransportClientFactory factory = context.createClientFactory();
+      Set<TransportClient> clients = Collections.synchronizedSet(
+          new HashSet<TransportClient>());
+
+      AtomicInteger failed = new AtomicInteger();
+      Thread[] attempts = new Thread[maxConnections * 10];
+
+      // Launch a bunch of threads to create new clients.
+      for (int i = 0; i < attempts.length; i++) {
+        attempts[i] = new Thread(() -> {
+          try {
+            TransportClient client =
+                factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+            assertTrue(client.isActive());
+            clients.add(client);
+          } catch (IOException e) {
+            failed.incrementAndGet();
+          } catch (InterruptedException e) {
+            throw new RuntimeException(e);
+          }
+        });
+
+        if (concurrent) {
+          attempts[i].start();
+        } else {
+          attempts[i].run();
         }
-      });
+      }
 
-      if (concurrent) {
-        attempts[i].start();
-      } else {
-        attempts[i].run();
+      // Wait until all the threads complete.
+      for (Thread attempt : attempts) {
+        attempt.join();
       }
-    }
 
-    // Wait until all the threads complete.
-    for (Thread attempt : attempts) {
-      attempt.join();
-    }
+      Assert.assertEquals(0, failed.get());
+      Assert.assertEquals(clients.size(), maxConnections);
 
-    Assert.assertEquals(0, failed.get());
-    Assert.assertEquals(clients.size(), maxConnections);
+      for (TransportClient client : clients) {
+        client.close();
+      }
 
-    for (TransportClient client : clients) {
-      client.close();
+      factory.close();
     }
-
-    factory.close();
   }
 
   @Test
@@ -204,8 +206,8 @@ public Iterable<Map.Entry<String, String>> getAll() {
         throw new UnsupportedOperationException();
       }
     });
-    TransportContext context = new TransportContext(conf, new NoOpRpcHandler(), true);
-    try (TransportClientFactory factory = context.createClientFactory()) {
+    try (TransportContext context = new TransportContext(conf, new NoOpRpcHandler(), true);
+         TransportClientFactory factory = context.createClientFactory()) {
       TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
       assertTrue(c1.isActive());
       long expiredTime = System.currentTimeMillis() + 10000; // 10 seconds
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
index 8751944a1c2a..8a0ff54ee224 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
@@ -196,6 +196,9 @@ void close() {
       if (server != null) {
         server.close();
       }
+      if (ctx != null) {
+        ctx.close();
+      }
     }
 
     private SecretKeyHolder createKeyHolder(String secret) {
diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
index 59adf9704cbf..cf2d72f71e8d 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -365,6 +365,7 @@ private static class SaslTestCtx {
 
     final TransportClient client;
     final TransportServer server;
+    final TransportContext ctx;
 
     private final boolean encrypt;
     private final boolean disableClientEncryption;
@@ -396,7 +397,7 @@ private static class SaslTestCtx {
       when(keyHolder.getSaslUser(anyString())).thenReturn("user");
       when(keyHolder.getSecretKey(anyString())).thenReturn("secret");
 
-      TransportContext ctx = new TransportContext(conf, rpcHandler);
+      this.ctx = new TransportContext(conf, rpcHandler);
 
       this.checker = new EncryptionCheckerBootstrap(SaslEncryption.ENCRYPTION_HANDLER_NAME);
 
@@ -431,6 +432,9 @@ void close() {
       if (server != null) {
         server.close();
       }
+      if (ctx != null) {
+        ctx.close();
+      }
     }
 
   }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/NettyMemoryMetricsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/NettyMemoryMetricsSuite.java
index 400b385c9703..f049cad83ff6 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/NettyMemoryMetricsSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/NettyMemoryMetricsSuite.java
@@ -60,11 +60,14 @@ public void tearDown() {
       JavaUtils.closeQuietly(clientFactory);
       clientFactory = null;
     }
-
     if (server != null) {
       JavaUtils.closeQuietly(server);
       server = null;
     }
+    if (context != null) {
+      JavaUtils.closeQuietly(context);
+      context = null;
+    }
   }
 
   @Test
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
index 02e6eb3a4467..57c1c5e7722c 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -91,6 +91,7 @@ public static void beforeAll() throws IOException {
   @AfterClass
   public static void afterAll() {
     server.close();
+    context.close();
   }
 
   @After
@@ -153,13 +154,14 @@ public void testNoSaslClient() throws IOException, InterruptedException {
   @Test
   public void testNoSaslServer() {
     RpcHandler handler = new TestRpcHandler();
-    TransportContext context = new TransportContext(conf, handler);
-    clientFactory = context.createClientFactory(
-      Arrays.asList(new SaslClientBootstrap(conf, "app-1", secretKeyHolder)));
-    try (TransportServer server = context.createServer()) {
-      clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
-    } catch (Exception e) {
-      assertTrue(e.getMessage(), e.getMessage().contains("Digest-challenge format violation"));
+    try (TransportContext context = new TransportContext(conf, handler)) {
+      clientFactory = context.createClientFactory(
+          Arrays.asList(new SaslClientBootstrap(conf, "app-1", secretKeyHolder)));
+      try (TransportServer server = context.createServer()) {
+        clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+      } catch (Exception e) {
+        assertTrue(e.getMessage(), e.getMessage().contains("Digest-challenge format violation"));
+      }
     }
   }
 
@@ -174,18 +176,15 @@ public void testAppIsolation() throws Exception {
     ExternalShuffleBlockHandler blockHandler = new ExternalShuffleBlockHandler(
       new OneForOneStreamManager(), blockResolver);
     TransportServerBootstrap bootstrap = new SaslServerBootstrap(conf, secretKeyHolder);
-    TransportContext blockServerContext = new TransportContext(conf, blockHandler);
-    TransportServer blockServer = blockServerContext.createServer(Arrays.asList(bootstrap));
 
-    TransportClient client1 = null;
-    TransportClient client2 = null;
-    TransportClientFactory clientFactory2 = null;
-    try {
+    try (
+      TransportContext blockServerContext = new TransportContext(conf, blockHandler);
+      TransportServer blockServer = blockServerContext.createServer(Arrays.asList(bootstrap));
       // Create a client, and make a request to fetch blocks from a different app.
-      clientFactory = blockServerContext.createClientFactory(
+      TransportClientFactory clientFactory1 = blockServerContext.createClientFactory(
           Arrays.asList(new SaslClientBootstrap(conf, "app-1", secretKeyHolder)));
-      client1 = clientFactory.createClient(TestUtils.getLocalHost(),
-        blockServer.getPort());
+      TransportClient client1 = clientFactory1.createClient(
+          TestUtils.getLocalHost(), blockServer.getPort())) {
 
       AtomicReference<Throwable> exception = new AtomicReference<>();
 
@@ -223,41 +222,33 @@ public void onBlockFetchFailure(String blockId, Throwable t) {
       StreamHandle stream = (StreamHandle) BlockTransferMessage.Decoder.fromByteBuffer(response);
       long streamId = stream.streamId;
 
-      // Create a second client, authenticated with a different app ID, and try to read from
-      // the stream created for the previous app.
-      clientFactory2 = blockServerContext.createClientFactory(
-          Arrays.asList(new SaslClientBootstrap(conf, "app-2", secretKeyHolder)));
-      client2 = clientFactory2.createClient(TestUtils.getLocalHost(),
-        blockServer.getPort());
-
-      CountDownLatch chunkReceivedLatch = new CountDownLatch(1);
-      ChunkReceivedCallback callback = new ChunkReceivedCallback() {
-        @Override
-        public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
-          chunkReceivedLatch.countDown();
-        }
-        @Override
-        public void onFailure(int chunkIndex, Throwable t) {
-          exception.set(t);
-          chunkReceivedLatch.countDown();
-        }
-      };
-
-      exception.set(null);
-      client2.fetchChunk(streamId, 0, callback);
-      chunkReceivedLatch.await();
-      checkSecurityException(exception.get());
-    } finally {
-      if (client1 != null) {
-        client1.close();
-      }
-      if (client2 != null) {
-        client2.close();
-      }
-      if (clientFactory2 != null) {
-        clientFactory2.close();
+      try (
+        // Create a second client, authenticated with a different app ID, and try to read from
+        // the stream created for the previous app.
+        TransportClientFactory clientFactory2 = blockServerContext.createClientFactory(
+            Arrays.asList(new SaslClientBootstrap(conf, "app-2", secretKeyHolder)));
+        TransportClient client2 = clientFactory2.createClient(
+            TestUtils.getLocalHost(), blockServer.getPort())
+      ) {
+        CountDownLatch chunkReceivedLatch = new CountDownLatch(1);
+        ChunkReceivedCallback callback = new ChunkReceivedCallback() {
+          @Override
+          public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
+            chunkReceivedLatch.countDown();
+          }
+
+          @Override
+          public void onFailure(int chunkIndex, Throwable t) {
+            exception.set(t);
+            chunkReceivedLatch.countDown();
+          }
+        };
+
+        exception.set(null);
+        client2.fetchChunk(streamId, 0, callback);
+        chunkReceivedLatch.await();
+        checkSecurityException(exception.get());
       }
-      blockServer.close();
     }
   }
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index 526b96b36447..f5b1ec9d46da 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -58,6 +58,7 @@ public class ExternalShuffleIntegrationSuite {
   static ExternalShuffleBlockHandler handler;
   static TransportServer server;
   static TransportConf conf;
+  static TransportContext transportContext;
 
   static byte[][] exec0Blocks = new byte[][] {
     new byte[123],
@@ -87,7 +88,7 @@ public static void beforeAll() throws IOException {
 
     conf = new TransportConf("shuffle", MapConfigProvider.EMPTY);
     handler = new ExternalShuffleBlockHandler(conf, null);
-    TransportContext transportContext = new TransportContext(conf, handler);
+    transportContext = new TransportContext(conf, handler);
     server = transportContext.createServer();
   }
 
@@ -95,6 +96,7 @@ public static void beforeAll() throws IOException {
   public static void afterAll() {
     dataContext0.cleanup();
     server.close();
+    transportContext.close();
   }
 
   @After
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
index 82caf392b821..67f79021daf5 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
@@ -41,14 +41,14 @@ public class ExternalShuffleSecuritySuite {
 
   TransportConf conf = new TransportConf("shuffle", MapConfigProvider.EMPTY);
   TransportServer server;
+  TransportContext transportContext;
 
   @Before
   public void beforeEach() throws IOException {
-    TransportContext context =
-      new TransportContext(conf, new ExternalShuffleBlockHandler(conf, null));
+    transportContext = new TransportContext(conf, new ExternalShuffleBlockHandler(conf, null));
     TransportServerBootstrap bootstrap = new SaslServerBootstrap(conf,
         new TestSecretKeyHolder("my-app-id", "secret"));
-    this.server = context.createServer(Arrays.asList(bootstrap));
+    this.server = transportContext.createServer(Arrays.asList(bootstrap));
   }
 
   @After
@@ -57,6 +57,10 @@ public void afterEach() {
       server.close();
       server = null;
     }
+    if (transportContext != null) {
+      transportContext.close();
+      transportContext = null;
+    }
   }
 
   @Test
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index 7e8d3b2bc3ba..25592e9873ff 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -113,6 +113,8 @@ public class YarnShuffleService extends AuxiliaryService {
   // The actual server that serves shuffle files
   private TransportServer shuffleServer = null;
 
+  private TransportContext transportContext = null;
+
   private Configuration _conf = null;
 
   // The recovery path used to shuffle service recovery
@@ -184,7 +186,7 @@ protected void serviceInit(Configuration conf) throws Exception {
 
       int port = conf.getInt(
         SPARK_SHUFFLE_SERVICE_PORT_KEY, DEFAULT_SPARK_SHUFFLE_SERVICE_PORT);
-      TransportContext transportContext = new TransportContext(transportConf, blockHandler);
+      transportContext = new TransportContext(transportConf, blockHandler);
       shuffleServer = transportContext.createServer(port, bootstraps);
       // the port should normally be fixed, but for tests its useful to find an open port
       port = shuffleServer.getPort();
@@ -318,6 +320,9 @@ protected void serviceStop() {
       if (shuffleServer != null) {
         shuffleServer.close();
       }
+      if (transportContext != null) {
+        transportContext.close();
+      }
       if (blockHandler != null) {
         blockHandler.close();
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index edfd2ea76e44..12ed1893ed61 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -52,8 +52,7 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
   private val transportConf =
     SparkTransportConf.fromSparkConf(sparkConf, "shuffle", numUsableCores = 0)
   private val blockHandler = newShuffleBlockHandler(transportConf)
-  private val transportContext: TransportContext =
-    new TransportContext(transportConf, blockHandler, true)
+  private var transportContext: TransportContext = _
 
   private var server: TransportServer = _
 
@@ -82,6 +81,7 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
       } else {
         Nil
       }
+    transportContext = new TransportContext(transportConf, blockHandler, true)
     server = transportContext.createServer(port, bootstraps.asJava)
 
     shuffleServiceSource.registerMetricSet(server.getAllMetrics)
@@ -107,6 +107,10 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
       server.close()
       server = null
     }
+    if (transportContext != null) {
+      transportContext.close()
+      transportContext = null
+    }
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index dc55685b1e7b..864e8ad1a6f9 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -182,5 +182,8 @@ private[spark] class NettyBlockTransferService(
     if (clientFactory != null) {
       clientFactory.close()
     }
+    if (transportContext != null) {
+      transportContext.close()
+    }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index 25401966fd16..472db45490e9 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -315,6 +315,9 @@ private[netty] class NettyRpcEnv(
     if (fileDownloadFactory != null) {
       fileDownloadFactory.close()
     }
+    if (transportContext != null) {
+      transportContext.close()
+    }
   }
 
   override def deserialize[T](deserializationAction: () => T): T = {
diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
index 262e2a7ef5c9..8b737cd8c81f 100644
--- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.network.TransportContext
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.server.TransportServer
 import org.apache.spark.network.shuffle.{ExternalShuffleBlockHandler, ExternalShuffleClient}
+import org.apache.spark.util.Utils
 
 /**
  * This suite creates an external shuffle server and routes all shuffle fetches through it.
@@ -33,13 +34,14 @@ import org.apache.spark.network.shuffle.{ExternalShuffleBlockHandler, ExternalSh
  */
 class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll {
   var server: TransportServer = _
+  var transportContext: TransportContext = _
   var rpcHandler: ExternalShuffleBlockHandler = _
 
   override def beforeAll() {
     super.beforeAll()
     val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle", numUsableCores = 2)
     rpcHandler = new ExternalShuffleBlockHandler(transportConf, null)
-    val transportContext = new TransportContext(transportConf, rpcHandler)
+    transportContext = new TransportContext(transportConf, rpcHandler)
     server = transportContext.createServer()
 
     conf.set(config.SHUFFLE_MANAGER, "sort")
@@ -48,11 +50,16 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll {
   }
 
   override def afterAll() {
-    try {
+    Utils.tryLogNonFatalError{
       server.close()
-    } finally {
-      super.afterAll()
     }
+    Utils.tryLogNonFatalError{
+      rpcHandler.close()
+    }
+    Utils.tryLogNonFatalError{
+      transportContext.close()
+    }
+    super.afterAll()
   }
 
   // This test ensures that the external shuffle service is actually in use for the other tests.
diff --git a/core/src/test/scala/org/apache/spark/ThreadAudit.scala b/core/src/test/scala/org/apache/spark/ThreadAudit.scala
index b3cea9de8f30..6b91162aedd4 100644
--- a/core/src/test/scala/org/apache/spark/ThreadAudit.scala
+++ b/core/src/test/scala/org/apache/spark/ThreadAudit.scala
@@ -55,18 +55,26 @@ trait ThreadAudit extends Logging {
      * creates event loops. One is wrapped inside
      * [[org.apache.spark.network.server.TransportServer]]
      * the other one is inside [[org.apache.spark.network.client.TransportClient]].
-     * The thread pools behind shut down asynchronously triggered by [[SparkContext#stop]].
-     * Manually checked and all of them stopped properly.
+     * Calling [[SparkContext#stop]] will shut down the thread pool of this event group
+     * asynchronously. In each case proper stopping is checked manually.
      */
     "rpc-client.*",
     "rpc-server.*",
 
+    /**
+     * During [[org.apache.spark.network.TransportContext]] construction a separate event loop could
+     * be created for handling ChunkFetchRequest.
+     * Calling [[org.apache.spark.network.TransportContext#close]] will shut down the thread pool
+     * of this event group asynchronously. In each case proper stopping is checked manually.
+     */
+    "shuffle-chunk-fetch-handler.*",
+
     /**
      * During [[SparkContext]] creation BlockManager creates event loops. One is wrapped inside
      * [[org.apache.spark.network.server.TransportServer]]
      * the other one is inside [[org.apache.spark.network.client.TransportClient]].
-     * The thread pools behind shut down asynchronously triggered by [[SparkContext#stop]].
-     * Manually checked and all of them stopped properly.
+     * Calling [[SparkContext#stop]] will shut down the thread pool of this event group
+     * asynchronously. In each case proper stopping is checked manually.
      */
     "shuffle-client.*",
     "shuffle-server.*"
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 5dec4f536b8d..115103fad6a1 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -895,6 +895,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val store = new BlockManager(SparkContext.DRIVER_IDENTIFIER, rpcEnv, master,
       serializerManager, conf, memoryManager, mapOutputTracker,
       shuffleManager, transfer, securityMgr, 0)
+    allStores += store
     store.initialize("app-id")
 
     // The put should fail since a1 is not serializable.
@@ -1360,74 +1361,76 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val tryAgainExecutor = "tryAgainExecutor"
     val succeedingExecutor = "succeedingExecutor"
 
-    // a server which delays response 50ms and must try twice for success.
-    def newShuffleServer(port: Int): (TransportServer, Int) = {
-      val failure = new Exception(tryAgainMsg)
-      val success = ByteBuffer.wrap(new Array[Byte](0))
+    val failure = new Exception(tryAgainMsg)
+    val success = ByteBuffer.wrap(new Array[Byte](0))
 
-      var secondExecutorFailedOnce = false
-      var thirdExecutorFailedOnce = false
+    var secondExecutorFailedOnce = false
+    var thirdExecutorFailedOnce = false
 
-      val handler = new NoOpRpcHandler {
-        override def receive(
-            client: TransportClient,
-            message: ByteBuffer,
-            callback: RpcResponseCallback): Unit = {
-          val msgObj = BlockTransferMessage.Decoder.fromByteBuffer(message)
-          msgObj match {
+    val handler = new NoOpRpcHandler {
+      override def receive(
+          client: TransportClient,
+          message: ByteBuffer,
+          callback: RpcResponseCallback): Unit = {
+        val msgObj = BlockTransferMessage.Decoder.fromByteBuffer(message)
+        msgObj match {
 
-            case exec: RegisterExecutor if exec.execId == timingoutExecutor =>
-              () // No reply to generate client-side timeout
+          case exec: RegisterExecutor if exec.execId == timingoutExecutor =>
+            () // No reply to generate client-side timeout
 
-            case exec: RegisterExecutor
-              if exec.execId == tryAgainExecutor && !secondExecutorFailedOnce =>
-              secondExecutorFailedOnce = true
-              callback.onFailure(failure)
+          case exec: RegisterExecutor
+            if exec.execId == tryAgainExecutor && !secondExecutorFailedOnce =>
+            secondExecutorFailedOnce = true
+            callback.onFailure(failure)
 
-            case exec: RegisterExecutor if exec.execId == tryAgainExecutor =>
-              callback.onSuccess(success)
+          case exec: RegisterExecutor if exec.execId == tryAgainExecutor =>
+            callback.onSuccess(success)
 
-            case exec: RegisterExecutor
-              if exec.execId == succeedingExecutor && !thirdExecutorFailedOnce =>
-              thirdExecutorFailedOnce = true
-              callback.onFailure(failure)
+          case exec: RegisterExecutor
+            if exec.execId == succeedingExecutor && !thirdExecutorFailedOnce =>
+            thirdExecutorFailedOnce = true
+            callback.onFailure(failure)
 
-            case exec: RegisterExecutor if exec.execId == succeedingExecutor =>
-              callback.onSuccess(success)
+          case exec: RegisterExecutor if exec.execId == succeedingExecutor =>
+            callback.onSuccess(success)
 
-          }
         }
       }
-
-      val transConf = SparkTransportConf.fromSparkConf(conf, "shuffle", numUsableCores = 0)
-      val transCtx = new TransportContext(transConf, handler, true)
-      (transCtx.createServer(port, Seq.empty[TransportServerBootstrap].asJava), port)
     }
 
-    val candidatePort = RandomUtils.nextInt(1024, 65536)
-    val (server, shufflePort) = Utils.startServiceOnPort(candidatePort,
-      newShuffleServer, conf, "ShuffleServer")
-
-    conf.set(SHUFFLE_SERVICE_ENABLED.key, "true")
-    conf.set(SHUFFLE_SERVICE_PORT.key, shufflePort.toString)
-    conf.set(SHUFFLE_REGISTRATION_TIMEOUT.key, "40")
-    conf.set(SHUFFLE_REGISTRATION_MAX_ATTEMPTS.key, "1")
-    var e = intercept[SparkException] {
-      makeBlockManager(8000, timingoutExecutor)
-    }.getMessage
-    assert(e.contains("TimeoutException"))
-
-    conf.set(SHUFFLE_REGISTRATION_TIMEOUT.key, "1000")
-    conf.set(SHUFFLE_REGISTRATION_MAX_ATTEMPTS.key, "1")
-    e = intercept[SparkException] {
-      makeBlockManager(8000, tryAgainExecutor)
-    }.getMessage
-    assert(e.contains(tryAgainMsg))
-
-    conf.set(SHUFFLE_REGISTRATION_TIMEOUT.key, "1000")
-    conf.set(SHUFFLE_REGISTRATION_MAX_ATTEMPTS.key, "2")
-    makeBlockManager(8000, succeedingExecutor)
-    server.close()
+    val transConf = SparkTransportConf.fromSparkConf(conf, "shuffle", numUsableCores = 0)
+
+    Utils.tryWithResource(new TransportContext(transConf, handler, true)) { transCtx =>
+      // a server which delays response 50ms and must try twice for success.
+      def newShuffleServer(port: Int): (TransportServer, Int) = {
+        (transCtx.createServer(port, Seq.empty[TransportServerBootstrap].asJava), port)
+      }
+
+      val candidatePort = RandomUtils.nextInt(1024, 65536)
+      val (server, shufflePort) = Utils.startServiceOnPort(candidatePort,
+        newShuffleServer, conf, "ShuffleServer")
+
+      conf.set(SHUFFLE_SERVICE_ENABLED.key, "true")
+      conf.set(SHUFFLE_SERVICE_PORT.key, shufflePort.toString)
+      conf.set(SHUFFLE_REGISTRATION_TIMEOUT.key, "40")
+      conf.set(SHUFFLE_REGISTRATION_MAX_ATTEMPTS.key, "1")
+      var e = intercept[SparkException] {
+        makeBlockManager(8000, timingoutExecutor)
+      }.getMessage
+      assert(e.contains("TimeoutException"))
+
+      conf.set(SHUFFLE_REGISTRATION_TIMEOUT.key, "1000")
+      conf.set(SHUFFLE_REGISTRATION_MAX_ATTEMPTS.key, "1")
+      e = intercept[SparkException] {
+        makeBlockManager(8000, tryAgainExecutor)
+      }.getMessage
+      assert(e.contains(tryAgainMsg))
+
+      conf.set(SHUFFLE_REGISTRATION_TIMEOUT.key, "1000")
+      conf.set(SHUFFLE_REGISTRATION_MAX_ATTEMPTS.key, "2")
+      makeBlockManager(8000, succeedingExecutor)
+      server.close()
+    }
   }
 
   test("fetch remote block to local disk if block size is larger than threshold") {

From 0ba19543d22bd447f1a40179401e4b9dfa00fdd2 Mon Sep 17 00:00:00 2001
From: mwlon <mloncaric@hmc.edu>
Date: Tue, 5 Mar 2019 13:05:37 -0800
Subject: [PATCH 0592/1072] [SPARK-27015][MESOS] properly escape mesos
 scheduler arguments

## What changes were proposed in this pull request?

Escape arguments for submissions sent to a Mesos dispatcher; analogous change to https://issues.apache.org/jira/browse/SPARK-24380 for confs.

Since this changes behavior than some users are undoubtedly already working around, probably best to only only merge into master.

## How was this patch tested?

Added a new unit test, covering some existing behavior as well.

Closes #23967 from mwlon/SPARK-27015.

Authored-by: mwlon <mloncaric@hmc.edu>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../cluster/mesos/MesosClusterScheduler.scala |  4 +-
 .../mesos/MesosClusterSchedulerSuite.scala    | 37 +++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index a5277836a7ef..3ffccb095c41 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -460,7 +460,7 @@ private[spark] class MesosClusterScheduler(
     containerInfo
   }
 
-  private def getDriverCommandValue(desc: MesosDriverDescription): String = {
+  private[mesos] def getDriverCommandValue(desc: MesosDriverDescription): String = {
     val dockerDefined = desc.conf.contains(config.EXECUTOR_DOCKER_IMAGE)
     val executorUri = getDriverExecutorURI(desc)
     // Gets the path to run spark-submit, and the path to the Mesos sandbox.
@@ -500,7 +500,7 @@ private[spark] class MesosClusterScheduler(
       }
     }
 
-    val appArguments = desc.command.arguments.mkString(" ")
+    val appArguments = desc.command.arguments.map(shellEscape).mkString(" ")
 
     s"$executable $cmdOptions $primaryResource $appArguments"
   }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 81b5250b7234..536f5a23c0df 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -486,6 +486,43 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     Utils.verifyFileBasedValueSecrets(launchedTasks)
   }
 
+  test("assembles a valid driver command, escaping all confs and args") {
+    setScheduler()
+
+    val mem = 1000
+    val cpu = 1
+    val driverDesc = new MesosDriverDescription(
+      "d1",
+      "jar",
+      mem,
+      cpu,
+      true,
+      new Command(
+        "Main",
+        Seq("--a=$2", "--b", "x y z"),
+        Map(),
+        Seq(),
+        Seq(),
+        Seq()),
+      Map("spark.app.name" -> "app name",
+        config.EXECUTOR_URI.key -> "s3a://bucket/spark-version.tgz",
+        "another.conf" -> "\\value"),
+      "s1",
+      new Date())
+
+    val expectedCmd = "cd spark-version*;  " +
+        "bin/spark-submit --name \"app name\" --master mesos://mesos://localhost:5050 " +
+        "--driver-cores 1.0 --driver-memory 1000M --class Main --py-files  " +
+        "--conf spark.executor.uri=s3a://bucket/spark-version.tgz " +
+        "--conf \"another.conf=\\\\value\" " +
+        "--conf \"spark.app.name=app name\" " +
+        "../jar " +
+        "\"--a=\\$2\" " +
+        "--b \"x y z\""
+
+    assert(scheduler.getDriverCommandValue(driverDesc) == expectedCmd)
+  }
+
   private def launchDriverTask(addlSparkConfVars: Map[String, String]): List[TaskInfo] = {
     setScheduler()
     val mem = 1000

From 543cd572c6682646405f7852be6b267d19703584 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 6 Mar 2019 08:58:49 +0900
Subject: [PATCH 0593/1072] [SPARK-26922][R] Set socket timeout consistently in
 Arrow optimization

## What changes were proposed in this pull request?

This PR sets socket timeout consistently across Arrow optimization by `SPARKR_BACKEND_CONNECTION_TIMEOUT`. There looks only one place left.

## How was this patch tested?

Existing tests should cover.

Closes #23971 from HyukjinKwon/SPARK-26922.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/context.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 405a3d6799b7..1c064a63ef42 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -176,12 +176,14 @@ parallelize <- function(sc, coll, numSlices = 1) {
     jrdd <- callJStatic("org.apache.spark.api.r.RRDD", "createRDDFromArray", sc, serializedSlices)
   } else {
     if (callJStatic("org.apache.spark.api.r.RUtils", "getEncryptionEnabled", sc)) {
+      connectionTimeout <- as.numeric(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
       # the length of slices here is the parallelism to use in the jvm's sc.parallelize()
       parallelism <- as.integer(numSlices)
       jserver <- newJObject("org.apache.spark.api.r.RParallelizeServer", sc, parallelism)
       authSecret <- callJMethod(jserver, "secret")
       port <- callJMethod(jserver, "port")
-      conn <- socketConnection(port = port, blocking = TRUE, open = "wb", timeout = 1500)
+      conn <- socketConnection(
+        port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
       doServerAuth(conn, authSecret)
       writeToConnection(serializedSlices, conn)
       jrdd <- callJMethod(jserver, "getResult")

From 3fcbc7fb9f7b58b040a85289a82cf551d51bac37 Mon Sep 17 00:00:00 2001
From: moqimoqidea <39821951+moqimoqidea@users.noreply.github.com>
Date: Wed, 6 Mar 2019 16:29:07 +0900
Subject: [PATCH 0594/1072] [MINOR] Spelling mistake: forword -> forward

## What changes were proposed in this pull request?

Spelling mistake: forword -> forward

## How was this patch tested?

This is a private function, there is no place to call this function outside of this file.

Closes #23978 from moqimoqidea/master.

Authored-by: moqimoqidea <39821951+moqimoqidea@users.noreply.github.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/deploy/worker/Worker.scala      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 07a9545f8bea..2fc2be581683 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -64,7 +64,7 @@ private[deploy] class Worker(
   assert (port > 0)
 
   // A scheduled executor used to send messages at the specified time.
-  private val forwordMessageScheduler =
+  private val forwardMessageScheduler =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("worker-forward-message-scheduler")
 
   // A separated thread to clean up the workDir and the directories of finished applications.
@@ -325,7 +325,7 @@ private[deploy] class Worker(
         if (connectionAttemptCount == INITIAL_REGISTRATION_RETRIES) {
           registrationRetryTimer.foreach(_.cancel(true))
           registrationRetryTimer = Some(
-            forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
+            forwardMessageScheduler.scheduleAtFixedRate(new Runnable {
               override def run(): Unit = Utils.tryLogNonFatalError {
                 self.send(ReregisterWithMaster)
               }
@@ -360,7 +360,7 @@ private[deploy] class Worker(
         registered = false
         registerMasterFutures = tryRegisterAllMasters()
         connectionAttemptCount = 0
-        registrationRetryTimer = Some(forwordMessageScheduler.scheduleAtFixedRate(
+        registrationRetryTimer = Some(forwardMessageScheduler.scheduleAtFixedRate(
           new Runnable {
             override def run(): Unit = Utils.tryLogNonFatalError {
               Option(self).foreach(_.send(ReregisterWithMaster))
@@ -407,7 +407,7 @@ private[deploy] class Worker(
         }
         registered = true
         changeMaster(masterRef, masterWebUiUrl, masterAddress)
-        forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
+        forwardMessageScheduler.scheduleAtFixedRate(new Runnable {
           override def run(): Unit = Utils.tryLogNonFatalError {
             self.send(SendHeartbeat)
           }
@@ -415,7 +415,7 @@ private[deploy] class Worker(
         if (CLEANUP_ENABLED) {
           logInfo(
             s"Worker cleanup enabled; old application directories will be deleted in: $workDir")
-          forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
+          forwardMessageScheduler.scheduleAtFixedRate(new Runnable {
             override def run(): Unit = Utils.tryLogNonFatalError {
               self.send(WorkDirCleanup)
             }
@@ -668,7 +668,7 @@ private[deploy] class Worker(
     cleanupThreadExecutor.shutdownNow()
     metricsSystem.report()
     cancelLastRegistrationRetry()
-    forwordMessageScheduler.shutdownNow()
+    forwardMessageScheduler.shutdownNow()
     registerMasterThreadPool.shutdownNow()
     executors.values.foreach(_.kill())
     drivers.values.foreach(_.kill())

From 9b55722161e050fc104f5aac86cffd79bcd6905c Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 6 Mar 2019 08:26:59 -0600
Subject: [PATCH 0595/1072] [SPARK-27031][SQL] Avoid double formatting in
 timestampToString

## What changes were proposed in this pull request?

Removed unnecessary conversion of microseconds in `DateTimeUtils.timestampToString` to `java.sql.Timestamp` which aims to output fraction of seconds by casting it to string. This was replaced by special `TimestampFormatter` which appends the fraction formatter to `DateTimeFormatterBuilder`: `appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)`. The former one means trailing zeros in second's fraction should be truncated while formatting.

## How was this patch tested?

By existing test suites like `CastSuite`, `DateTimeUtilsSuite`, `JDBCSuite`, and by new test in `TimestampFormatterSuite`.

Closes #23936 from MaxGekk/timestamp-to-string.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala |  2 +-
 .../util/DateTimeFormatterHelper.scala        | 26 ++++++++++++++++---
 .../sql/catalyst/util/DateTimeUtils.scala     | 10 +------
 .../catalyst/util/TimestampFormatter.scala    | 21 ++++++++++++++-
 .../catalyst/util/DateTimeUtilsSuite.scala    |  2 +-
 .../sql/util/TimestampFormatterSuite.scala    | 11 +++++++-
 .../spark/sql/execution/HiveResult.scala      |  2 +-
 .../datasources/jdbc/JDBCRelation.scala       |  3 ++-
 8 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 84087ae51032..c238ccb5c360 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -233,7 +233,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
 
   private lazy val dateFormatter = DateFormatter()
-  private lazy val timestampFormatter = TimestampFormatter(timeZone)
+  private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(timeZone)
 
   // UDFToString
   private[this] def castToString(from: DataType): Any => Any = from match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index 81ad6ad8ca40..a0c0db36a7b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -62,10 +62,12 @@ private object DateTimeFormatterHelper {
     .maximumSize(128)
     .build[(String, Locale), DateTimeFormatter]()
 
-  def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
-    new DateTimeFormatterBuilder()
-      .parseCaseInsensitive()
-      .appendPattern(pattern)
+  def createBuilder(): DateTimeFormatterBuilder = {
+    new DateTimeFormatterBuilder().parseCaseInsensitive()
+  }
+
+  def toFormatter(builder: DateTimeFormatterBuilder, locale: Locale): DateTimeFormatter = {
+    builder
       .parseDefaulting(ChronoField.ERA, 1)
       .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
       .parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
@@ -75,4 +77,20 @@ private object DateTimeFormatterHelper {
       .withChronology(IsoChronology.INSTANCE)
       .withResolverStyle(ResolverStyle.STRICT)
   }
+
+  def buildFormatter(pattern: String, locale: Locale): DateTimeFormatter = {
+    val builder = createBuilder().appendPattern(pattern)
+    toFormatter(builder, locale)
+  }
+
+  lazy val fractionFormatter: DateTimeFormatter = {
+    val builder = createBuilder()
+      .append(DateTimeFormatter.ISO_LOCAL_DATE)
+      .appendLiteral(' ')
+      .appendValue(ChronoField.HOUR_OF_DAY, 2).appendLiteral(':')
+      .appendValue(ChronoField.MINUTE_OF_HOUR, 2).appendLiteral(':')
+      .appendValue(ChronoField.SECOND_OF_MINUTE, 2)
+      .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
+    toFormatter(builder, TimestampFormatter.defaultLocale)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 5064220b9562..627ee143d8fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -99,15 +99,7 @@ object DateTimeUtils {
 
   // Converts Timestamp to string according to Hive TimestampWritable convention.
   def timestampToString(tf: TimestampFormatter, us: SQLTimestamp): String = {
-    val ts = toJavaTimestamp(us)
-    val timestampString = ts.toString
-    val formatted = tf.format(us)
-
-    if (timestampString.length > 19 && timestampString.substring(19) != ".0") {
-      formatted + timestampString.substring(19)
-    } else {
-      formatted
-    }
+    tf.format(us)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index c25481586e73..e559b6a97229 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -47,7 +47,7 @@ class Iso8601TimestampFormatter(
     timeZone: TimeZone,
     locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
   @transient
-  private lazy val formatter = getOrCreateFormatter(pattern, locale)
+  protected lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   private def toInstant(s: String): Instant = {
     val temporalAccessor = formatter.parse(s)
@@ -66,6 +66,21 @@ class Iso8601TimestampFormatter(
   }
 }
 
+/**
+ * The formatter parses/formats timestamps according to the pattern `yyyy-MM-dd HH:mm:ss.[..fff..]`
+ * where `[..fff..]` is a fraction of second up to microsecond resolution. The formatter does not
+ * output trailing zeros in the fraction. For example, the timestamp `2019-03-05 15:00:01.123400` is
+ * formatted as the string `2019-03-05 15:00:01.1234`.
+ *
+ * @param timeZone the time zone in which the formatter parses or format timestamps
+ */
+class FractionTimestampFormatter(timeZone: TimeZone)
+  extends Iso8601TimestampFormatter("", timeZone, TimestampFormatter.defaultLocale) {
+
+  @transient
+  override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
+}
+
 object TimestampFormatter {
   val defaultPattern: String = "yyyy-MM-dd HH:mm:ss"
   val defaultLocale: Locale = Locale.US
@@ -81,4 +96,8 @@ object TimestampFormatter {
   def apply(timeZone: TimeZone): TimestampFormatter = {
     apply(defaultPattern, timeZone, defaultLocale)
   }
+
+  def getFractionFormatter(timeZone: TimeZone): TimestampFormatter = {
+    new FractionTimestampFormatter(timeZone)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index e270b9160e8a..39eb7d144988 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -33,7 +33,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   private def defaultTz = DateTimeUtils.defaultTimeZone()
 
   test("nanoseconds truncation") {
-    val tf = TimestampFormatter(DateTimeUtils.defaultTimeZone())
+    val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone())
     def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
       val parsedTimestampOp = DateTimeUtils.stringToTimestamp(
         UTF8String.fromString(originalTime), defaultTz)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index d007adf3aab8..1675b61d07db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.util
 
 import java.time.{LocalDateTime, ZoneOffset}
-import java.util.{Locale, TimeZone}
+import java.util.TimeZone
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
@@ -117,4 +117,13 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
     assert(micros === TimeUnit.SECONDS.toMicros(
       LocalDateTime.of(2009, 3, 20, 11, 30, 1).toEpochSecond(ZoneOffset.UTC)))
   }
+
+  test("format fraction of second") {
+    val formatter = TimestampFormatter.getFractionFormatter(TimeZone.getTimeZone("UTC"))
+    assert(formatter.format(0) === "1970-01-01 00:00:00")
+    assert(formatter.format(1) === "1970-01-01 00:00:00.000001")
+    assert(formatter.format(1000) === "1970-01-01 00:00:00.001")
+    assert(formatter.format(900000) === "1970-01-01 00:00:00.9")
+    assert(formatter.format(1000000) === "1970-01-01 00:00:01")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 24fba7901ca5..38ef72ebfad2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -78,7 +78,7 @@ object HiveResult {
     BinaryType)
 
   private lazy val dateFormatter = DateFormatter()
-  private lazy val timestampFormatter = TimestampFormatter(
+  private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(
     DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone))
 
   /** Hive outputs fields of structs slightly differently than top level attributes. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index c0f78b527388..724a0f3ba679 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -199,7 +199,8 @@ private[sql] object JDBCRelation extends Logging {
       val dateTimeStr = columnType match {
         case DateType => DateFormatter().format(value.toInt)
         case TimestampType =>
-          val timestampFormatter = TimestampFormatter(DateTimeUtils.getTimeZone(timeZoneId))
+          val timestampFormatter = TimestampFormatter.getFractionFormatter(
+            DateTimeUtils.getTimeZone(timeZoneId))
           DateTimeUtils.timestampToString(timestampFormatter, value)
       }
       s"'$dateTimeStr'"

From 5fa4ba0cfb126bfadee7451fe9a46cee3d60b67c Mon Sep 17 00:00:00 2001
From: masa3141 <masahiro@kazama.tv>
Date: Wed, 6 Mar 2019 08:28:53 -0600
Subject: [PATCH 0596/1072] [SPARK-26981][MLLIB] Add 'Recall_at_k' metric to
 RankingMetrics

## What changes were proposed in this pull request?

Add 'Recall_at_k' metric to RankingMetrics

## How was this patch tested?

Add test to RankingMetricsSuite.

Closes #23881 from masa3141/SPARK-26981.

Authored-by: masa3141 <masahiro@kazama.tv>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../mllib/JavaRankingMetricsExample.java      |  3 +-
 .../mllib/RankingMetricsExample.scala         |  5 ++
 .../mllib/evaluation/RankingMetrics.scala     | 75 ++++++++++++++-----
 .../evaluation/RankingMetricsSuite.scala      | 14 +++-
 python/pyspark/mllib/evaluation.py            | 20 +++++
 5 files changed, 97 insertions(+), 20 deletions(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java
index dc9970d88527..414d3763ddc5 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java
@@ -99,11 +99,12 @@ public static void main(String[] args) {
     // Instantiate the metrics object
     RankingMetrics<Integer> metrics = RankingMetrics.of(relevantDocs);
 
-    // Precision and NDCG at k
+    // Precision, NDCG and Recall at k
     Integer[] kVector = {1, 3, 5};
     for (Integer k : kVector) {
       System.out.format("Precision at %d = %f\n", k, metrics.precisionAt(k));
       System.out.format("NDCG at %d = %f\n", k, metrics.ndcgAt(k));
+      System.out.format("Recall at %d = %f\n", k, metrics.recallAt(k));
     }
 
     // Mean average precision
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
index d514891da78f..34fbe0851dd9 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
@@ -89,6 +89,11 @@ object RankingMetricsExample {
       println(s"NDCG at $k = ${metrics.ndcgAt(k)}")
     }
 
+    // Recall at K
+    Array(1, 3, 5).foreach { k =>
+      println(s"Recall at $k = ${metrics.recallAt(k)}")
+    }
+
     // Get predictions for each data point
     val allPredictions = model.predict(ratings.map(r => (r.user, r.product))).map(r => ((r.user,
       r.product), r.rating))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 4935d1141113..ff9663abcc92 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -59,23 +59,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
   def precisionAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
     predictionAndLabels.map { case (pred, lab) =>
-      val labSet = lab.toSet
-
-      if (labSet.nonEmpty) {
-        val n = math.min(pred.length, k)
-        var i = 0
-        var cnt = 0
-        while (i < n) {
-          if (labSet.contains(pred(i))) {
-            cnt += 1
-          }
-          i += 1
-        }
-        cnt.toDouble / k
-      } else {
-        logWarning("Empty ground truth set, check input data")
-        0.0
-      }
+      countRelevantItemRatio(pred, lab, k, k)
     }.mean()
   }
 
@@ -157,6 +141,63 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
     }.mean()
   }
 
+  /**
+   * Compute the average recall of all the queries, truncated at ranking position k.
+   *
+   * If for a query, the ranking algorithm returns n results, the recall value will be
+   * computed as #(relevant items retrieved) / #(ground truth set). This formula
+   * also applies when the size of the ground truth set is less than k.
+   *
+   * If a query has an empty ground truth set, zero will be used as recall together with
+   * a log warning.
+   *
+   * See the following paper for detail:
+   *
+   * IR evaluation methods for retrieving highly relevant documents. K. Jarvelin and J. Kekalainen
+   *
+   * @param k the position to compute the truncated recall, must be positive
+   * @return the average recall at the first k ranking positions
+   */
+  @Since("3.0.0")
+  def recallAt(k: Int): Double = {
+    require(k > 0, "ranking position k should be positive")
+    predictionAndLabels.map { case (pred, lab) =>
+      countRelevantItemRatio(pred, lab, k, lab.toSet.size)
+    }.mean()
+  }
+
+  /**
+   * Returns the relevant item ratio computed as #(relevant items retrieved) / denominator.
+   * If a query has an empty ground truth set, the value will be zero and a log
+   * warning is generated.
+   *
+   * @param pred predicted ranking
+   * @param lab ground truth
+   * @param k use the top k predicted ranking, must be positive
+   * @param denominator the denominator of ratio
+   * @return relevant item ratio at the first k ranking positions
+   */
+  private def countRelevantItemRatio(pred: Array[T],
+                                     lab: Array[T],
+                                     k: Int,
+                                     denominator: Int): Double = {
+    val labSet = lab.toSet
+    if (labSet.nonEmpty) {
+      val n = math.min(pred.length, k)
+      var i = 0
+      var cnt = 0
+      while (i < n) {
+        if (labSet.contains(pred(i))) {
+          cnt += 1
+        }
+        i += 1
+      }
+      cnt.toDouble / denominator
+    } else {
+      logWarning("Empty ground truth set, check input data")
+      0.0
+    }
+  }
 }
 
 object RankingMetrics {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
index f334be2c2ba8..1969098a51c5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.mllib.util.TestingUtils._
 
 class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  test("Ranking metrics: MAP, NDCG") {
+  test("Ranking metrics: MAP, NDCG, Recall") {
     val predictionAndLabels = sc.parallelize(
       Seq(
         (Array(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Array(1, 2, 3, 4, 5)),
@@ -49,9 +49,17 @@ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(metrics.ndcgAt(5) ~== 0.328788 absTol eps)
     assert(metrics.ndcgAt(10) ~== 0.487913 absTol eps)
     assert(metrics.ndcgAt(15) ~== metrics.ndcgAt(10) absTol eps)
+
+    assert(metrics.recallAt(1) ~== 1.0/15 absTol eps)
+    assert(metrics.recallAt(2) ~== 8.0/45 absTol eps)
+    assert(metrics.recallAt(3) ~== 11.0/45 absTol eps)
+    assert(metrics.recallAt(4) ~== 11.0/45 absTol eps)
+    assert(metrics.recallAt(5) ~== 16.0/45 absTol eps)
+    assert(metrics.recallAt(10) ~== 2.0/3 absTol eps)
+    assert(metrics.recallAt(15) ~== 2.0/3 absTol eps)
   }
 
-  test("MAP, NDCG with few predictions (SPARK-14886)") {
+  test("MAP, NDCG, Recall with few predictions (SPARK-14886)") {
     val predictionAndLabels = sc.parallelize(
       Seq(
         (Array(1, 6, 2), Array(1, 2, 3, 4, 5)),
@@ -64,6 +72,8 @@ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(metrics.precisionAt(2) ~== 0.25 absTol eps)
     assert(metrics.ndcgAt(1) ~== 0.5 absTol eps)
     assert(metrics.ndcgAt(2) ~== 0.30657 absTol eps)
+    assert(metrics.recallAt(1) ~== 0.1 absTol eps)
+    assert(metrics.recallAt(2) ~== 0.1 absTol eps)
   }
 
 }
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 5d8d20dcfcfc..171c62ce97f9 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -373,6 +373,12 @@ class RankingMetrics(JavaModelWrapper):
     0.33...
     >>> metrics.ndcgAt(10)
     0.48...
+    >>> metrics.recallAt(1)
+    0.06...
+    >>> metrics.recallAt(5)
+    0.35...
+    >>> metrics.recallAt(15)
+    0.66...
 
     .. versionadded:: 1.4.0
     """
@@ -422,6 +428,20 @@ def ndcgAt(self, k):
         """
         return self.call("ndcgAt", int(k))
 
+    @since('3.0.0')
+    def recallAt(self, k):
+        """
+        Compute the average recall of all the queries, truncated at ranking position k.
+
+        If for a query, the ranking algorithm returns n results, the recall value
+        will be computed as #(relevant items retrieved) / #(ground truth set).
+        This formula also applies when the size of the ground truth set is less than k.
+
+        If a query has an empty ground truth set, zero will be used as recall together
+        with a log warning.
+        """
+        return self.call("recallAt", int(k))
+
 
 class MultilabelMetrics(JavaModelWrapper):
     """

From 60012583985ac0e44490261dafebdf680271e8d7 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 6 Mar 2019 08:32:16 -0600
Subject: [PATCH 0597/1072] [SPARK-27035][SQL] Get more precise current time

## What changes were proposed in this pull request?

In the PR, I propose to replace `System.currentTimeMillis()` by `Instant.now()` in the `CurrentTimestamp` expression. `Instant.now()` uses the best available clock in the system to take current time. See [JDK-8068730](https://bugs.openjdk.java.net/browse/JDK-8068730) for more details. In JDK8, `Instant.now()` provides results with millisecond resolution but starting from JDK9 resolution of results is increased up to microseconds.

## How was this patch tested?

The changes were tested by `DateTimeUtilsSuite` and by `DateFunctionsSuite`.

Closes #23945 from MaxGekk/current-time.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md                           | 4 +++-
 .../spark/sql/catalyst/expressions/datetimeExpressions.scala  | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index af3d03eb560c..ca604b965972 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -97,7 +97,9 @@ displayTitle: Spark SQL Upgrading Guide
 
     - the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
 
-  - In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`. 
+  - In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`.
+
+  - In Spark version 2.4 and earlier, the `current_timestamp` function returns a timestamp with millisecond resolution only. Since Spark 3.0, the function can return the result with microsecond resolution if the underlying clock available on the system offers such resolution.
 
 ## Upgrading From Spark SQL 2.3 to 2.4
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 4cb0031f2af0..7aa1e70684c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
-import java.time.LocalDate
+import java.time.{Instant, LocalDate}
 import java.time.temporal.IsoFields
 import java.util.{Locale, TimeZone}
 
@@ -96,7 +96,7 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
   override def dataType: DataType = TimestampType
 
   override def eval(input: InternalRow): Any = {
-    System.currentTimeMillis() * 1000L
+    instantToMicros(Instant.now())
   }
 
   override def prettyName: String = "current_timestamp"

From 190a3a4ad8e648d4ed4b38c5189b3baf75b1fc52 Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Wed, 6 Mar 2019 09:12:24 -0600
Subject: [PATCH 0598/1072] [SPARK-27047] Document stop-slave.sh in
 spark-standalone

## What changes were proposed in this pull request?

spark-standalone documentation do not mention about stop-slave.sh script

## How was this patch tested?

Manually tested the changes

Closes #23960 from ajithme/slavedoc.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/spark-standalone.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 672a4d0f3199..60b84d30268f 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -85,12 +85,13 @@ If you do not have a password-less setup, you can set the environment variable S
 Once you've set up this file, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/sbin`:
 
 - `sbin/start-master.sh` - Starts a master instance on the machine the script is executed on.
-- `sbin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file.
-- `sbin/start-slave.sh` - Starts a slave instance on the machine the script is executed on.
-- `sbin/start-all.sh` - Starts both a master and a number of slaves as described above.
+- `sbin/start-slaves.sh` - Starts a worker instance on each machine specified in the `conf/slaves` file.
+- `sbin/start-slave.sh` - Starts a worker instance on the machine the script is executed on.
+- `sbin/start-all.sh` - Starts both a master and a number of workers as described above.
 - `sbin/stop-master.sh` - Stops the master that was started via the `sbin/start-master.sh` script.
-- `sbin/stop-slaves.sh` - Stops all slave instances on the machines specified in the `conf/slaves` file.
-- `sbin/stop-all.sh` - Stops both the master and the slaves as described above.
+- `sbin/stop-slave.sh` - Stops all worker instances on the machine the script is executed on.
+- `sbin/stop-slaves.sh` - Stops all worker instances on the machines specified in the `conf/slaves` file.
+- `sbin/stop-all.sh` - Stops both the master and the workers as described above.
 
 Note that these scripts must be executed on the machine you want to run the Spark master on, not your local machine.
 

From 9bddf7180e9e76e1cabc580eee23962dd66f84c3 Mon Sep 17 00:00:00 2001
From: Udbhav30 <u.agrawal30@gmail.com>
Date: Wed, 6 Mar 2019 09:06:10 -0800
Subject: [PATCH 0599/1072] [SPARK-24669][SQL] Invalidate tables in case of
 DROP DATABASE CASCADE

  ## What changes were proposed in this pull request?
Before dropping database refresh the tables of that database, so as to refresh all cached entries associated with those tables.
We follow the same when dropping a table.

## How was this patch tested?
UT is added

Closes #23905 from Udbhav30/SPARK-24669.

Authored-by: Udbhav30 <u.agrawal30@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala |  5 +++
 .../sql/execution/command/DDLSuite.scala      | 38 ++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 1dbe946503e5..4b862a579cc3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -218,6 +218,11 @@ class SessionCatalog(
     if (dbName == DEFAULT_DATABASE) {
       throw new AnalysisException(s"Can not drop default database")
     }
+    if (cascade && databaseExists(dbName)) {
+      listTables(dbName).foreach { t =>
+        invalidateCachedTable(QualifiedTableName(dbName, t.table))
+      }
+    }
     externalCatalog.dropDatabase(dbName, ignoreIfNotExists, cascade)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 052a5e757c44..001e4aca22da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
-import java.io.File
+import java.io.{File, PrintWriter}
 import java.net.URI
 import java.util.Locale
 
@@ -2715,4 +2715,40 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
     assert(ex.getMessage.contains("Spark config"))
   }
+
+  test("Refresh table before drop database cascade") {
+    withTempDir { tempDir =>
+      val file1 = new File(tempDir + "/first.csv")
+      val writer1 = new PrintWriter(file1)
+      writer1.write("first")
+      writer1.close()
+
+      val file2 = new File(tempDir + "/second.csv")
+      val writer2 = new PrintWriter(file2)
+      writer2.write("second")
+      writer2.close()
+
+      withDatabase("foo") {
+        withTable("foo.first") {
+          sql("CREATE DATABASE foo")
+          sql(
+            s"""CREATE TABLE foo.first (id STRING)
+               |USING csv OPTIONS (path='${file1.toURI}')
+             """.stripMargin)
+          sql("SELECT * FROM foo.first")
+          checkAnswer(spark.table("foo.first"), Row("first"))
+
+          // Dropping the database and again creating same table with different path
+          sql("DROP DATABASE foo CASCADE")
+          sql("CREATE DATABASE foo")
+          sql(
+            s"""CREATE TABLE foo.first (id STRING)
+               |USING csv OPTIONS (path='${file2.toURI}')
+             """.stripMargin)
+          sql("SELECT * FROM foo.first")
+          checkAnswer(spark.table("foo.first"), Row("second"))
+        }
+      }
+    }
+  }
 }

From e5c61436a5720f13eb6d530ebf80635522bd64c6 Mon Sep 17 00:00:00 2001
From: wuyi <ngone_5451@163.com>
Date: Wed, 6 Mar 2019 11:53:07 -0600
Subject: [PATCH 0600/1072] [SPARK-23433][SPARK-25250][CORE] Later created
 TaskSet should learn about the finished partitions

## What changes were proposed in this pull request?

This is an optional solution for #22806 .

#21131 firstly implement that a previous successful completed task from zombie TaskSetManager could also succeed the active TaskSetManager, which based on an assumption that an active TaskSetManager always exists for that stage when this happen.  But that's not always true as an active TaskSetManager may haven't been created when a previous task succeed, and this is the reason why #22806 hit the issue.

This pr extends #21131 's behavior by adding `stageIdToFinishedPartitions` into TaskSchedulerImpl, which recording the finished partition whenever a task(from zombie or active) succeed. Thus, a later created active TaskSetManager could also learn about the finished partition by looking into `stageIdToFinishedPartitions ` and won't launch any duplicate tasks.

## How was this patch tested?

Add.

Closes #23871 from Ngone51/dev-23433-25250.

Lead-authored-by: wuyi <ngone_5451@163.com>
Co-authored-by: Ngone51 <ngone_5451@163.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../spark/scheduler/TaskSchedulerImpl.scala   | 36 +++++++++++++--
 .../spark/scheduler/TaskSetManager.scala      | 19 +++++---
 .../scheduler/TaskSchedulerImplSuite.scala    | 44 ++++++++++++++-----
 3 files changed, 79 insertions(+), 20 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 3f23bfe59523..c23b20155e00 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.Set
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.collection.mutable.{ArrayBuffer, BitSet, HashMap, HashSet}
 import scala.util.Random
 
 import org.apache.spark._
@@ -102,6 +102,9 @@ private[spark] class TaskSchedulerImpl(
   // Protected by `this`
   val taskIdToExecutorId = new HashMap[Long, String]
 
+  // Protected by `this`
+  private[scheduler] val stageIdToFinishedPartitions = new HashMap[Int, BitSet]
+
   @volatile private var hasReceivedTask = false
   @volatile private var hasLaunchedTask = false
   private val starvationTimer = new Timer(true)
@@ -244,7 +247,20 @@ private[spark] class TaskSchedulerImpl(
   private[scheduler] def createTaskSetManager(
       taskSet: TaskSet,
       maxTaskFailures: Int): TaskSetManager = {
-    new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt)
+    // only create a BitSet once for a certain stage since we only remove
+    // that stage when an active TaskSetManager succeed.
+    stageIdToFinishedPartitions.getOrElseUpdate(taskSet.stageId, new BitSet)
+    val tsm = new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt)
+    // TaskSet got submitted by DAGScheduler may have some already completed
+    // tasks since DAGScheduler does not always know all the tasks that have
+    // been completed by other tasksets when completing a stage, so we mark
+    // those tasks as finished here to avoid launching duplicate tasks, while
+    // holding the TaskSchedulerImpl lock.
+    // See SPARK-25250 and `markPartitionCompletedInAllTaskSets()`
+    stageIdToFinishedPartitions.get(taskSet.stageId).foreach {
+      finishedPartitions => finishedPartitions.foreach(tsm.markPartitionCompleted(_, None))
+    }
+    tsm
   }
 
   override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = synchronized {
@@ -847,19 +863,31 @@ private[spark] class TaskSchedulerImpl(
   }
 
   /**
-   * Marks the task has completed in all TaskSetManagers for the given stage.
+   * Marks the task has completed in all TaskSetManagers(active / zombie) for the given stage.
    *
    * After stage failure and retry, there may be multiple TaskSetManagers for the stage.
    * If an earlier attempt of a stage completes a task, we should ensure that the later attempts
    * do not also submit those same tasks.  That also means that a task completion from an earlier
    * attempt can lead to the entire stage getting marked as successful.
+   * And there is also the possibility that the DAGScheduler submits another taskset at the same
+   * time as we're marking a task completed here -- that taskset would have a task for a partition
+   * that was already completed. We maintain the set of finished partitions in
+   * stageIdToFinishedPartitions, protected by this, so we can detect those tasks when the taskset
+   * is submitted. See SPARK-25250 for more details.
+   *
+   * note: this method must be called with a lock on this.
    */
   private[scheduler] def markPartitionCompletedInAllTaskSets(
       stageId: Int,
       partitionId: Int,
       taskInfo: TaskInfo) = {
+    // if we do not find a BitSet for this stage, which means an active TaskSetManager
+    // has already succeeded and removed the stage.
+    stageIdToFinishedPartitions.get(stageId).foreach{
+      finishedPartitions => finishedPartitions += partitionId
+    }
     taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm =>
-      tsm.markPartitionCompleted(partitionId, taskInfo)
+      tsm.markPartitionCompleted(partitionId, Some(taskInfo))
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 453939aaf190..ea31fe80ef56 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -21,7 +21,7 @@ import java.io.NotSerializableException
 import java.nio.ByteBuffer
 import java.util.concurrent.ConcurrentLinkedQueue
 
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.collection.mutable.{ArrayBuffer, BitSet, HashMap, HashSet}
 import scala.math.max
 import scala.util.control.NonFatal
 
@@ -779,7 +779,11 @@ private[spark] class TaskSetManager(
       // Mark successful and stop if all the tasks have succeeded.
       successful(index) = true
       if (tasksSuccessful == numTasks) {
-        isZombie = true
+        // clean up finished partitions for the stage when the active TaskSetManager succeed
+        if (!isZombie) {
+          sched.stageIdToFinishedPartitions -= stageId
+          isZombie = true
+        }
       }
     } else {
       logInfo("Ignoring task-finished event for " + info.id + " in stage " + taskSet.id +
@@ -798,16 +802,21 @@ private[spark] class TaskSetManager(
     maybeFinishTaskSet()
   }
 
-  private[scheduler] def markPartitionCompleted(partitionId: Int, taskInfo: TaskInfo): Unit = {
+  private[scheduler] def markPartitionCompleted(
+      partitionId: Int,
+      taskInfo: Option[TaskInfo]): Unit = {
     partitionToIndex.get(partitionId).foreach { index =>
       if (!successful(index)) {
         if (speculationEnabled && !isZombie) {
-          successfulTaskDurations.insert(taskInfo.duration)
+          taskInfo.foreach { info => successfulTaskDurations.insert(info.duration) }
         }
         tasksSuccessful += 1
         successful(index) = true
         if (tasksSuccessful == numTasks) {
-          isZombie = true
+          if (!isZombie) {
+            sched.stageIdToFinishedPartitions -= stageId
+            isZombie = true
+          }
         }
         maybeFinishTaskSet()
       }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 016adb8b70e7..a1046902e60e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -1102,7 +1102,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
   }
 
-  test("Completions in zombie tasksets update status of non-zombie taskset") {
+  test("SPARK-23433/25250 Completions in zombie tasksets update status of non-zombie taskset") {
     val taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
     val valueSer = SparkEnv.get.serializer.newInstance()
 
@@ -1114,9 +1114,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
 
     // Submit a task set, have it fail with a fetch failed, and then re-submit the task attempt,
-    // two times, so we have three active task sets for one stage.  (For this to really happen,
-    // you'd need the previous stage to also get restarted, and then succeed, in between each
-    // attempt, but that happens outside what we're mocking here.)
+    // two times, so we have three TaskSetManagers(2 zombie, 1 active) for one stage.  (For this
+    // to really happen, you'd need the previous stage to also get restarted, and then succeed,
+    // in between each attempt, but that happens outside what we're mocking here.)
     val zombieAttempts = (0 until 2).map { stageAttempt =>
       val attempt = FakeTask.createTaskSet(10, stageAttemptId = stageAttempt)
       taskScheduler.submitTasks(attempt)
@@ -1133,13 +1133,33 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       assert(tsm.runningTasks === 9)
       tsm
     }
+    // we've now got 2 zombie attempts, each with 9 tasks still running. And there's no active
+    // attempt exists in taskScheduler by now.
+
+    // finish partition 1,2 by completing the tasks before a new attempt for the same stage submit.
+    // This is possible since the behaviour of submitting new attempt and handling successful task
+    // is from two different threads, which are "task-result-getter" and "dag-scheduler-event-loop"
+    // separately.
+    (0 until 2).foreach { i =>
+      completeTaskSuccessfully(zombieAttempts(i), i + 1)
+      assert(taskScheduler.stageIdToFinishedPartitions(0).contains(i + 1))
+    }
 
-    // we've now got 2 zombie attempts, each with 9 tasks still active.  Submit the 3rd attempt for
-    // the stage, but this time with insufficient resources so not all tasks are active.
-
+    // Submit the 3rd attempt still with 10 tasks, this happens due to the race between thread
+    // "task-result-getter" and "dag-scheduler-event-loop", where a TaskSet gets submitted with
+    // already completed tasks. And this time with insufficient resources so not all tasks are
+    // active.
     val finalAttempt = FakeTask.createTaskSet(10, stageAttemptId = 2)
     taskScheduler.submitTasks(finalAttempt)
     val finalTsm = taskScheduler.taskSetManagerForAttempt(0, 2).get
+    // Though finalTSM gets submitted with 10 tasks, the call to taskScheduler.submitTasks should
+    // realize that 2 tasks have already completed, and mark them appropriately, so it won't launch
+    // any duplicate tasks later (SPARK-25250).
+    (0 until 2).map(_ + 1).foreach { partitionId =>
+      val index = finalTsm.partitionToIndex(partitionId)
+      assert(finalTsm.successful(index))
+    }
+
     val offers = (0 until 5).map{ idx => WorkerOffer(s"exec-$idx", s"host-$idx", 1) }
     val finalAttemptLaunchedPartitions = taskScheduler.resourceOffers(offers).flatten.map { task =>
       finalAttempt.tasks(task.index).partitionId
@@ -1147,16 +1167,17 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(finalTsm.runningTasks === 5)
     assert(!finalTsm.isZombie)
 
-    // We simulate late completions from our zombie tasksets, corresponding to all the pending
-    // partitions in our final attempt.  This means we're only waiting on the tasks we've already
-    // launched.
+    // We continually simulate late completions from our zombie tasksets(but this time, there's one
+    // active attempt exists in taskScheduler), corresponding to all the pending partitions in our
+    // final attempt. This means we're only waiting on the tasks we've already launched.
     val finalAttemptPendingPartitions = (0 until 10).toSet.diff(finalAttemptLaunchedPartitions)
     finalAttemptPendingPartitions.foreach { partition =>
       completeTaskSuccessfully(zombieAttempts(0), partition)
+      assert(taskScheduler.stageIdToFinishedPartitions(0).contains(partition))
     }
 
     // If there is another resource offer, we shouldn't run anything.  Though our final attempt
-    // used to have pending tasks, now those tasks have been completed by zombie attempts.  The
+    // used to have pending tasks, now those tasks have been completed by zombie attempts. The
     // remaining tasks to compute are already active in the non-zombie attempt.
     assert(
       taskScheduler.resourceOffers(IndexedSeq(WorkerOffer("exec-1", "host-1", 1))).flatten.isEmpty)
@@ -1204,6 +1225,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       // perspective, as the failures weren't from a problem w/ the tasks themselves.
       verify(blacklist).updateBlacklistForSuccessfulTaskSet(meq(0), meq(stageAttempt), any())
     }
+    assert(taskScheduler.stageIdToFinishedPartitions.isEmpty)
   }
 
   test("don't schedule for a barrier taskSet if available slots are less than pending tasks") {

From cb20fbc43e7f54af1ed30b9eb6d76ca50b4eb750 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 6 Mar 2019 12:00:33 -0600
Subject: [PATCH 0601/1072] [SPARK-27065][CORE] avoid more than one active task
 set managers for a stage

## What changes were proposed in this pull request?

This is another attempt to fix the more-than-one-active-task-set-managers bug.

https://github.com/apache/spark/pull/17208 is the first attempt. It marks the TSM as zombie before sending a task completion event to DAGScheduler. This is necessary, because when the DAGScheduler gets the task completion event, and it's for the last partition, then the stage is finished. However, if it's a shuffle stage and it has missing map outputs, DAGScheduler will resubmit it(see the [code](https://github.com/apache/spark/blob/v2.4.0/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala#L1416-L1422)) and create a new TSM for this stage. This leads to more than one active TSM of a stage and fail.

This fix has a hole: Let's say a stage has 10 partitions and 2 task set managers: TSM1(zombie) and TSM2(active). TSM1 has a running task for partition 10 and it completes. TSM2 finishes tasks for partitions 1-9, and thinks he is still active because he hasn't finished partition 10 yet. However, DAGScheduler gets task completion events for all the 10 partitions and thinks the stage is finished. Then the same problem occurs: DAGScheduler may resubmit the stage and cause more than one actice TSM error.

https://github.com/apache/spark/pull/21131 fixed this hole by notifying all the task set managers when a task finishes. For the above case, TSM2 will know that partition 10 is already completed, so he can mark himself as zombie after partitions 1-9 are completed.

However, #21131 still has a hole: TSM2 may be created after the task from TSM1 is completed. Then TSM2 can't get notified about the task completion, and leads to the more than one active TSM error.

#22806 and #23871 are created to fix this hole. However the fix is complicated and there are still ongoing discussions.

This PR proposes a simple fix, which can be easy to backport: mark all existing task set managers as zombie when trying to create a new task set manager.

After this PR, #21131 is still necessary, to avoid launching unnecessary tasks and fix [SPARK-25250](https://issues.apache.org/jira/browse/SPARK-25250 ). #22806 and #23871 are its followups to fix the hole.

## How was this patch tested?

existing tests.

Closes #23927 from cloud-fan/scheduler.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../spark/scheduler/TaskSchedulerImpl.scala   | 20 +++++++----
 .../scheduler/TaskSchedulerImplSuite.scala    | 33 ++++++++++++-------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index c23b20155e00..db0fbfe29f65 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -215,14 +215,20 @@ private[spark] class TaskSchedulerImpl(
       val stage = taskSet.stageId
       val stageTaskSets =
         taskSetsByStageIdAndAttempt.getOrElseUpdate(stage, new HashMap[Int, TaskSetManager])
-      stageTaskSets(taskSet.stageAttemptId) = manager
-      val conflictingTaskSet = stageTaskSets.exists { case (_, ts) =>
-        ts.taskSet != taskSet && !ts.isZombie
-      }
-      if (conflictingTaskSet) {
-        throw new IllegalStateException(s"more than one active taskSet for stage $stage:" +
-          s" ${stageTaskSets.toSeq.map{_._2.taskSet.id}.mkString(",")}")
+
+      // Mark all the existing TaskSetManagers of this stage as zombie, as we are adding a new one.
+      // This is necessary to handle a corner case. Let's say a stage has 10 partitions and has 2
+      // TaskSetManagers: TSM1(zombie) and TSM2(active). TSM1 has a running task for partition 10
+      // and it completes. TSM2 finishes tasks for partition 1-9, and thinks he is still active
+      // because partition 10 is not completed yet. However, DAGScheduler gets task completion
+      // events for all the 10 partitions and thinks the stage is finished. If it's a shuffle stage
+      // and somehow it has missing map outputs, then DAGScheduler will resubmit it and create a
+      // TSM3 for it. As a stage can't have more than one active task set managers, we must mark
+      // TSM2 as zombie (it actually is).
+      stageTaskSets.foreach { case (_, ts) =>
+        ts.isZombie = true
       }
+      stageTaskSets(taskSet.stageAttemptId) = manager
       schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
 
       if (!isLocal && !hasReceivedTask) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index a1046902e60e..1a81f556e061 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -201,28 +201,39 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     // Even if one of the task sets has not-serializable tasks, the other task set should
     // still be processed without error
     taskScheduler.submitTasks(FakeTask.createTaskSet(1))
-    taskScheduler.submitTasks(taskSet)
+    val taskSet2 = new TaskSet(
+      Array(new NotSerializableFakeTask(1, 0), new NotSerializableFakeTask(0, 1)), 1, 0, 0, null)
+    taskScheduler.submitTasks(taskSet2)
     taskDescriptions = taskScheduler.resourceOffers(multiCoreWorkerOffers).flatten
     assert(taskDescriptions.map(_.executorId) === Seq("executor0"))
   }
 
-  test("refuse to schedule concurrent attempts for the same stage (SPARK-8103)") {
+  test("concurrent attempts for the same stage only have one active taskset") {
     val taskScheduler = setupScheduler()
+    def isTasksetZombie(taskset: TaskSet): Boolean = {
+      taskScheduler.taskSetManagerForAttempt(taskset.stageId, taskset.stageAttemptId).get.isZombie
+    }
+
     val attempt1 = FakeTask.createTaskSet(1, 0)
-    val attempt2 = FakeTask.createTaskSet(1, 1)
     taskScheduler.submitTasks(attempt1)
-    intercept[IllegalStateException] { taskScheduler.submitTasks(attempt2) }
+    // The first submitted taskset is active
+    assert(!isTasksetZombie(attempt1))
 
-    // OK to submit multiple if previous attempts are all zombie
-    taskScheduler.taskSetManagerForAttempt(attempt1.stageId, attempt1.stageAttemptId)
-      .get.isZombie = true
+    val attempt2 = FakeTask.createTaskSet(1, 1)
     taskScheduler.submitTasks(attempt2)
+    // The first submitted taskset is zombie now
+    assert(isTasksetZombie(attempt1))
+    // The newly submitted taskset is active
+    assert(!isTasksetZombie(attempt2))
+
     val attempt3 = FakeTask.createTaskSet(1, 2)
-    intercept[IllegalStateException] { taskScheduler.submitTasks(attempt3) }
-    taskScheduler.taskSetManagerForAttempt(attempt2.stageId, attempt2.stageAttemptId)
-      .get.isZombie = true
     taskScheduler.submitTasks(attempt3)
-    assert(!failedTaskSet)
+    // The first submitted taskset remains zombie
+    assert(isTasksetZombie(attempt1))
+    // The second submitted taskset is zombie now
+    assert(isTasksetZombie(attempt2))
+    // The newly submitted taskset is active
+    assert(!isTasksetZombie(attempt3))
   }
 
   test("don't schedule more tasks after a taskset is zombie") {

From e9e8bb33ef9ad785473ded168bc85867dad4ee70 Mon Sep 17 00:00:00 2001
From: Onur Satici <osatici@palantir.com>
Date: Wed, 6 Mar 2019 11:14:39 -0800
Subject: [PATCH 0602/1072] [SPARK-27023][K8S] Make k8s client timeouts
 configurable

## What changes were proposed in this pull request?

Make k8s client timeouts configurable. No test suite exists for the client factory class, happy to add one if needed

Closes #23928 from onursatici/os/k8s-client-timeouts.

Lead-authored-by: Onur Satici <osatici@palantir.com>
Co-authored-by: Onur Satici <onursatici@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/running-on-kubernetes.md                 | 28 +++++++++++++++++++
 .../org/apache/spark/deploy/k8s/Config.scala  | 24 ++++++++++++++++
 .../k8s/SparkKubernetesClientFactory.scala    | 20 +++++++++++++
 .../submit/KubernetesClientApplication.scala  |  1 +
 .../k8s/KubernetesClusterManager.scala        |  1 +
 5 files changed, 74 insertions(+)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index e23f28cf755c..d82dc64d47d1 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -990,6 +990,34 @@ See the [configuration page](configuration.html) for information on Spark config
   Specify whether executor pods should be deleted in case of failure or normal termination.
   </td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.submission.connectionTimeout</code></td>
+  <td>10000</td>
+  <td>
+    Connection timeout in milliseconds for the kubernetes client to use for starting the driver.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.submission.requestTimeout</code></td>
+  <td>10000</td>
+  <td>
+    Request timeout in milliseconds for the kubernetes client to use for starting the driver.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.driver.connectionTimeout</code></td>
+  <td>10000</td>
+  <td>
+    Connection timeout in milliseconds for the kubernetes client in driver to use when requesting executors.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.driver.requestTimeout</code></td>
+  <td>10000</td>
+  <td>
+    Request timeout in milliseconds for the kubernetes client in driver to use when requesting executors.
+  </td>
+</tr>
 </table>
 
 #### Pod template properties
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 4cca1e22bd10..83b5a758f0f5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -86,6 +86,30 @@ private[spark] object Config extends Logging {
   val CLIENT_CERT_FILE_CONF_SUFFIX = "clientCertFile"
   val CA_CERT_FILE_CONF_SUFFIX = "caCertFile"
 
+  val SUBMISSION_CLIENT_REQUEST_TIMEOUT =
+    ConfigBuilder("spark.kubernetes.submission.requestTimeout")
+      .doc("request timeout to be used in milliseconds for starting the driver")
+      .intConf
+      .createWithDefault(10000)
+
+  val SUBMISSION_CLIENT_CONNECTION_TIMEOUT =
+    ConfigBuilder("spark.kubernetes.submission.connectionTimeout")
+      .doc("connection timeout to be used in milliseconds for starting the driver")
+      .intConf
+      .createWithDefault(10000)
+
+  val DRIVER_CLIENT_REQUEST_TIMEOUT =
+    ConfigBuilder("spark.kubernetes.driver.requestTimeout")
+      .doc("request timeout to be used in milliseconds for driver to request executors")
+      .intConf
+      .createWithDefault(10000)
+
+  val DRIVER_CLIENT_CONNECTION_TIMEOUT =
+    ConfigBuilder("spark.kubernetes.driver.connectionTimeout")
+      .doc("connection timeout to be used in milliseconds for driver to request executors")
+      .intConf
+      .createWithDefault(10000)
+
   val KUBERNETES_SERVICE_ACCOUNT_NAME =
     ConfigBuilder(s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.serviceAccountName")
       .doc("Service account that is used when running the driver pod. The driver pod uses " +
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
index 06dea42cc135..459259f77796 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
@@ -28,6 +28,7 @@ import okhttp3.Dispatcher
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -41,6 +42,7 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
       master: String,
       namespace: Option[String],
       kubernetesAuthConfPrefix: String,
+      clientType: ClientType.Value,
       sparkConf: SparkConf,
       defaultServiceAccountToken: Option[File],
       defaultServiceAccountCaCert: Option[File]): KubernetesClient = {
@@ -79,6 +81,8 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
       .withApiVersion("v1")
       .withMasterUrl(master)
       .withWebsocketPingInterval(0)
+      .withRequestTimeout(clientType.requestTimeout(sparkConf))
+      .withConnectionTimeout(clientType.connectionTimeout(sparkConf))
       .withOption(oauthTokenValue) {
         (token, configBuilder) => configBuilder.withOauthToken(token)
       }.withOption(oauthTokenFile) {
@@ -111,4 +115,20 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
       }.getOrElse(configBuilder)
     }
   }
+
+  object ClientType extends Enumeration {
+    import scala.language.implicitConversions
+    val Driver = Val(DRIVER_CLIENT_REQUEST_TIMEOUT, DRIVER_CLIENT_CONNECTION_TIMEOUT)
+    val Submission = Val(SUBMISSION_CLIENT_REQUEST_TIMEOUT, SUBMISSION_CLIENT_CONNECTION_TIMEOUT)
+
+    protected case class Val(
+        requestTimeoutEntry: ConfigEntry[Int],
+        connectionTimeoutEntry: ConfigEntry[Int])
+      extends super.Val {
+      def requestTimeout(conf: SparkConf): Int = conf.get(requestTimeoutEntry)
+      def connectionTimeout(conf: SparkConf): Int = conf.get(connectionTimeoutEntry)
+    }
+
+    implicit def convert(value: Value): Val = value.asInstanceOf[Val]
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 042012e9c74a..92d5176baa4d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -220,6 +220,7 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
       master,
       Some(kubernetesConf.namespace),
       KUBERNETES_AUTH_SUBMISSION_CONF_PREFIX,
+      SparkKubernetesClientFactory.ClientType.Submission,
       sparkConf,
       None,
       None)) { kubernetesClient =>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index 809bdf8ca8c2..31ca06b721c5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -65,6 +65,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
       apiServerUri,
       Some(sc.conf.get(KUBERNETES_NAMESPACE)),
       authConfPrefix,
+      SparkKubernetesClientFactory.ClientType.Driver,
       sc.conf,
       defaultServiceAccountToken,
       defaultServiceAccountCaCrt)

From b6375097bcb35127a1087bad39d9525fe62c13c1 Mon Sep 17 00:00:00 2001
From: DB Tsai <d_tsai@apple.com>
Date: Wed, 6 Mar 2019 13:58:21 -0800
Subject: [PATCH 0603/1072] [SPARK-27026][BUILD] Upgrade Docker image for
 release build to Ubuntu 18.04 LTS

## What changes were proposed in this pull request?

Upgrade Docker image for release build to Ubuntu 18.04LTS

## How was this patch tested?

Manually tested.

Closes #23932 from dbtsai/ubuntu18.04.

Authored-by: DB Tsai <d_tsai@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/create-release/release-build.sh    |  2 +-
 dev/create-release/spark-rm/Dockerfile | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 7b32a9bd767f..52e50b6e288a 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -84,7 +84,7 @@ GIT_REF=${GIT_REF:-master}
 
 RELEASE_STAGING_LOCATION="https://dist.apache.org/repos/dist/dev/spark"
 
-GPG="gpg -u $GPG_KEY --no-tty --batch"
+GPG="gpg -u $GPG_KEY --no-tty --batch --pinentry-mode loopback"
 NEXUS_ROOT=https://repository.apache.org/service/local/staging
 NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads
 BASE_DIR=$(pwd)
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index 42315446016c..7ed9f7288f13 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -15,16 +15,20 @@
 # limitations under the License.
 #
 
-# Image for building Spark releases. Based on Ubuntu 16.04.
+# Image for building Spark releases. Based on Ubuntu 18.04.
 #
 # Includes:
 # * Java 8
 # * Ivy
-# * Python/PyPandoc (2.7.12/3.5.2)
-# * R-base/R-base-dev (3.3.2+)
+# * Python/PyPandoc (2.7.15/3.6.7)
+# * R-base/R-base-dev (3.5.0+)
 # * Ruby 2.3 build utilities
 
-FROM ubuntu:16.04
+FROM ubuntu:18.04
+
+# For apt to be noninteractive
+ENV DEBIAN_FRONTEND noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN true
 
 # These arguments are just for reuse and not really meant to be customized.
 ARG APT_INSTALL="apt-get install --no-install-recommends -y"
@@ -38,7 +42,8 @@ ARG PIP_PKGS="pyopenssl pypandoc numpy pygments sphinx"
 #
 # This is all in a single "RUN" command so that if anything changes, "apt update" is run to fetch
 # the most current package versions (instead of potentially using old versions cached by docker).
-RUN echo 'deb http://cran.cnr.Berkeley.edu/bin/linux/ubuntu xenial/' >> /etc/apt/sources.list && \
+RUN apt-get clean && apt-get update && $APT_INSTALL gnupg && \
+  echo 'deb http://cran.cnr.Berkeley.edu/bin/linux/ubuntu bionic-cran35/' >> /etc/apt/sources.list && \
   gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 && \
   gpg -a --export E084DAB9 | apt-key add - && \
   apt-get clean && \
@@ -54,8 +59,7 @@ RUN echo 'deb http://cran.cnr.Berkeley.edu/bin/linux/ubuntu xenial/' >> /etc/apt
   # Install build / source control tools
   $APT_INSTALL curl wget git maven ivy subversion make gcc lsof libffi-dev \
     pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev && \
-  ln -s -T /usr/share/java/ivy.jar /usr/share/ant/lib/ivy.jar && \
-  curl -sL https://deb.nodesource.com/setup_4.x | bash && \
+  curl -sL https://deb.nodesource.com/setup_11.x | bash && \
   $APT_INSTALL nodejs && \
   # Install needed python packages. Use pip for installing packages (for consistency).
   $APT_INSTALL libpython2.7-dev libpython3-dev python-pip python3-pip && \

From 62fd133f744ab2d1aa3c409165914b5940e4d328 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Wed, 6 Mar 2019 14:02:30 -0800
Subject: [PATCH 0604/1072] [SPARK-27019][SQL][WEBUI] onJobStart happens after
 onExecutionEnd shouldn't overwrite kvstore

## What changes were proposed in this pull request?
Currently, when the event reordering happens, especially onJobStart event come after onExecutionEnd event, SQL page in the UI displays weirdly.(for eg:test mentioned in JIRA and also this issue randomly occurs when the TPCDS query  fails due to broadcast timeout etc.)

The reason is that, In the SQLAppstatusListener, we remove the liveExecutions entry once the execution ends. So, if a jobStart event come after that, then we create a new liveExecution entry corresponding to the execId. Eventually this will overwrite the kvstore and UI displays confusing entries.

## How was this patch tested?

Added UT, Also manually tested with the eventLog, provided in the jira, of the failed query.

Before fix:
![screenshot from 2019-03-03 03-05-52](https://user-images.githubusercontent.com/23054875/53687929-53e2b800-3d61-11e9-9dca-620fa41e605c.png)

After fix:
![screenshot from 2019-03-03 02-40-18](https://user-images.githubusercontent.com/23054875/53687928-4f1e0400-3d61-11e9-86aa-584646ac68f9.png)

Closes #23939 from shahidki31/SPARK-27019.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../execution/ui/SQLAppStatusListener.scala   | 45 +++++++++++++++----
 .../ui/SQLAppStatusListenerSuite.scala        | 30 +++++++++++++
 2 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index 45954f21c592..daf7f2cfe84f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.execution.ui
 
-import java.util.Date
+import java.util.{Date, NoSuchElementException}
 import java.util.concurrent.ConcurrentHashMap
 import java.util.function.Function
 
@@ -77,7 +77,29 @@ class SQLAppStatusListener(
 
     val executionId = executionIdString.toLong
     val jobId = event.jobId
-    val exec = getOrCreateExecution(executionId)
+    val exec = Option(liveExecutions.get(executionId))
+      .orElse {
+        try {
+          // Should not overwrite the kvstore with new entry, if it already has the SQLExecution
+          // data corresponding to the execId.
+          val sqlStoreData = kvstore.read(classOf[SQLExecutionUIData], executionId)
+          val executionData = new LiveExecutionData(executionId)
+          executionData.description = sqlStoreData.description
+          executionData.details = sqlStoreData.details
+          executionData.physicalPlanDescription = sqlStoreData.physicalPlanDescription
+          executionData.metrics = sqlStoreData.metrics
+          executionData.submissionTime = sqlStoreData.submissionTime
+          executionData.completionTime = sqlStoreData.completionTime
+          executionData.jobs = sqlStoreData.jobs
+          executionData.stages = sqlStoreData.stages
+          executionData.metricsValues = sqlStoreData.metricValues
+          executionData.endEvents = sqlStoreData.jobs.size + 1
+          liveExecutions.put(executionId, executionData)
+          Some(executionData)
+        } catch {
+          case _: NoSuchElementException => None
+        }
+      }.getOrElse(getOrCreateExecution(executionId))
 
     // Record the accumulator IDs for the stages of this job, so that the code that keeps
     // track of the metrics knows which accumulators to look at.
@@ -275,16 +297,20 @@ class SQLAppStatusListener(
       exec.endEvents += 1
       update(exec)
 
-      // Remove stale LiveStageMetrics objects for stages that are not active anymore.
-      val activeStages = liveExecutions.values().asScala.flatMap { other =>
-        if (other != exec) other.stages else Nil
-      }.toSet
-      stageMetrics.keySet().asScala
-        .filter(!activeStages.contains(_))
-        .foreach(stageMetrics.remove)
+      removeStaleMetricsData(exec)
     }
   }
 
+  private def removeStaleMetricsData(exec: LiveExecutionData): Unit = {
+    // Remove stale LiveStageMetrics objects for stages that are not active anymore.
+    val activeStages = liveExecutions.values().asScala.flatMap { other =>
+      if (other != exec) other.stages else Nil
+    }.toSet
+    stageMetrics.keySet().asScala
+      .filter(!activeStages.contains(_))
+      .foreach(stageMetrics.remove)
+  }
+
   private def onDriverAccumUpdates(event: SparkListenerDriverAccumUpdates): Unit = {
     val SparkListenerDriverAccumUpdates(executionId, accumUpdates) = event
     Option(liveExecutions.get(executionId)).foreach { exec =>
@@ -311,6 +337,7 @@ class SQLAppStatusListener(
     val now = System.nanoTime()
     if (exec.endEvents >= exec.jobs.size + 1) {
       exec.write(kvstore, now)
+      removeStaleMetricsData(exec)
       liveExecutions.remove(exec.executionId)
     } else if (force) {
       exec.write(kvstore, now)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index c8d862c72c10..f19bf5fac2f3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -384,6 +384,36 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with
     assertJobs(statusStore.execution(executionId), failed = Seq(0))
   }
 
+  test("onJobStart happens after onExecutionEnd shouldn't overwrite kvstore") {
+    val statusStore = createStatusStore()
+    val listener = statusStore.listener.get
+
+    val executionId = 0
+    val df = createTestDataFrame
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
+      executionId,
+      "test",
+      "test",
+      df.queryExecution.toString,
+      SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      System.currentTimeMillis()))
+    listener.onOtherEvent(SparkListenerSQLExecutionEnd(
+      executionId, System.currentTimeMillis()))
+    listener.onJobStart(SparkListenerJobStart(
+      jobId = 0,
+      time = System.currentTimeMillis(),
+      stageInfos = Seq(createStageInfo(0, 0)),
+      createProperties(executionId)))
+    listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(0, 0)))
+    listener.onJobEnd(SparkListenerJobEnd(
+      jobId = 0,
+      time = System.currentTimeMillis(),
+      JobFailed(new RuntimeException("Oops"))))
+
+    assert(listener.noLiveData())
+    assert(statusStore.execution(executionId).get.completionTime.nonEmpty)
+  }
+
   test("handle one execution with multiple jobs") {
     val statusStore = createStatusStore()
     val listener = statusStore.listener.get

From 9513d82edd5f52e791ae811e331b5d1a77895ec5 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 7 Mar 2019 08:47:52 +0800
Subject: [PATCH 0605/1072] [SPARK-27057][SQL] Common trait for limit exec
 operators

## What changes were proposed in this pull request?

I would like to refactor `limit.scala` slightly and introduce common trait `LimitExec` for `CollectLimitExec` and `BaseLimitExec` (`LocalLimitExec` and `GlobalLimitExec`). This will allow to distinguish those operators from others, and to get the `limit` value without casting to concrete class.

## How was this patch tested?

by existing test suites.

Closes #23976 from MaxGekk/limit-exec.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/execution/limit.scala      | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 56973af8fd64..2ff08883d5ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -27,13 +27,21 @@ import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.metric.{SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter}
 
+/**
+ * The operator takes limited number of elements from its child operator.
+ */
+trait LimitExec extends UnaryExecNode {
+  /** Number of element should be taken from child operator */
+  def limit: Int
+}
+
 /**
  * Take the first `limit` elements and collect them to a single partition.
  *
  * This operator will be used when a logical `Limit` operation is the final operator in an
  * logical plan, which happens when the user is collecting results back to the driver.
  */
-case class CollectLimitExec(limit: Int, child: SparkPlan) extends UnaryExecNode {
+case class CollectLimitExec(limit: Int, child: SparkPlan) extends LimitExec {
   override def output: Seq[Attribute] = child.output
   override def outputPartitioning: Partitioning = SinglePartition
   override def executeCollect(): Array[InternalRow] = child.executeTake(limit)
@@ -70,8 +78,7 @@ object BaseLimitExec {
  * Helper trait which defines methods that are shared by both
  * [[LocalLimitExec]] and [[GlobalLimitExec]].
  */
-trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
-  val limit: Int
+trait BaseLimitExec extends LimitExec with CodegenSupport {
   override def output: Seq[Attribute] = child.output
 
   protected override def doExecute(): RDD[InternalRow] = child.execute().mapPartitions { iter =>

From 32848eecc55946ad91e62e231d2e310a0270a63d Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 6 Mar 2019 16:56:32 -0800
Subject: [PATCH 0606/1072] [SPARK-27078][SQL] Fix NoSuchFieldError when read
 Hive materialized views

## What changes were proposed in this pull request?

This pr fix `NoSuchFieldError` when reading Hive materialized views from Hive 2.3.4.

How to reproduce:
Hive side:
```sql
CREATE TABLE materialized_view_tbl (key INT);
CREATE MATERIALIZED VIEW view_1 DISABLE REWRITE AS SELECT * FROM materialized_view_tbl;
```
Spark side:
```java
bin/spark-sql --conf spark.sql.hive.metastore.version=2.3.4 --conf spark.sql.hive.metastore.jars=maven

spark-sql> select * from view_1;
19/03/05 19:55:37 ERROR SparkSQLDriver: Failed in [select * from view_1]
java.lang.NoSuchFieldError: INDEX_TABLE
	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$getTableOption$3(HiveClientImpl.scala:438)
	at scala.Option.map(Option.scala:163)
	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$getTableOption$1(HiveClientImpl.scala:370)
	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:277)
	at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:215)
	at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:214)
	at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:260)
	at org.apache.spark.sql.hive.client.HiveClientImpl.getTableOption(HiveClientImpl.scala:368)
```

## How was this patch tested?

unit tests

Closes #23984 from wangyum/SPARK-24360.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/hive/client/HiveClientImpl.scala |  5 +++--
 .../apache/spark/sql/hive/client/VersionsSuite.scala  | 11 +++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 2ca54afa31e6..74237a94c879 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -435,8 +435,9 @@ private[hive] class HiveClientImpl(
           case HiveTableType.EXTERNAL_TABLE => CatalogTableType.EXTERNAL
           case HiveTableType.MANAGED_TABLE => CatalogTableType.MANAGED
           case HiveTableType.VIRTUAL_VIEW => CatalogTableType.VIEW
-          case HiveTableType.INDEX_TABLE =>
-            throw new AnalysisException("Hive index table is not supported.")
+          case unsupportedType =>
+            val tableTypeStr = unsupportedType.toString.toLowerCase(Locale.ROOT).replace("_", " ")
+            throw new AnalysisException(s"Hive $tableTypeStr is not supported.")
         },
         schema = schema,
         partitionColumnNames = partCols.map(_.name),
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index bdbcfe9645bc..8f2365c43669 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -582,6 +582,17 @@ class VersionsSuite extends SparkFunSuite with Logging {
       }
     }
 
+    test(s"$version: sql read hive materialized view") {
+      // HIVE-14249 Since Hive 2.3.0, materialized view is supported.
+      // But skip Hive 3.1 because of SPARK-27074.
+      if (version == "2.3") {
+        client.runSqlHive("CREATE TABLE materialized_view_tbl (c1 INT)")
+        client.runSqlHive("CREATE MATERIALIZED VIEW mv1 AS SELECT * FROM materialized_view_tbl")
+        val e = intercept[AnalysisException](versionSpark.table("mv1").collect()).getMessage
+        assert(e.contains("Hive materialized view is not supported"))
+      }
+    }
+
     ///////////////////////////////////////////////////////////////////////////
     // Miscellaneous API
     ///////////////////////////////////////////////////////////////////////////

From 340c8b8387bbaebdd23958c9ee7a352461abc1f8 Mon Sep 17 00:00:00 2001
From: Brooke Wenig <brookewenig@gmail.com>
Date: Wed, 6 Mar 2019 19:42:41 -0600
Subject: [PATCH 0607/1072] [MINOR][DOC] Updated PySpark Binarizer docstring to
 match Scala's.

## What changes were proposed in this pull request?

PySpark's Binarizer docstring had two issues:
1) The values did not need to be in the range [0, 1].
2) It can be used for binary classification prediction.

This change corrects both of these issues by making it consistent with Scala's docstring for Binarizer.

## How was this patch tested?

Not applicable because I only changed the docstring. But if I need to do any testing, let me know and I'll do it.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23934 from brookewenig/binarizer-docs-fix.

Authored-by: Brooke Wenig <brookewenig@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/ml/feature.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 85830464ef5a..3f9de9ca207a 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -86,7 +86,9 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
     """
 
     threshold = Param(Params._dummy(), "threshold",
-                      "threshold in binary classification prediction, in range [0, 1]",
+                      "Param for threshold used to binarize continuous features. " +
+                      "The features greater than the threshold will be binarized to 1.0. " +
+                      "The features equal to or less than the threshold will be binarized to 0.0",
                       typeConverter=TypeConverters.toFloat)
 
     @keyword_only

From a543f917e086baa1151475dcb2d122d83b18a619 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 7 Mar 2019 11:24:15 +0800
Subject: [PATCH 0608/1072] [SPARK-27049][SQL] Create util class to support
 handling partition values in file source V2

## What changes were proposed in this pull request?

While I am migrating other data sources, I find that we should abstract the logic that:
1. converting safe `InternalRow`s into `UnsafeRow`s
2. appending partition values to the end of the result row if existed

This PR proposes to support handling partition values in file source v2 abstraction by adding a util class `PartitionReaderWithPartitionValues`.

## How was this patch tested?

Existing unit tests

Closes #23987 from gengliangwang/SPARK-27049.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../v2/FilePartitionReaderFactory.scala       | 16 +++++-
 .../PartitionReaderWithPartitionValues.scala  | 53 +++++++++++++++++++
 .../v2/PartitionRecordReader.scala            | 10 ----
 .../v2/orc/OrcPartitionReaderFactory.scala    | 33 ++++--------
 4 files changed, 77 insertions(+), 35 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
index 101a70ee92ce..1daf8ae72b63 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
@@ -17,8 +17,9 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, PartitioningUtils}
 import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
@@ -45,6 +46,19 @@ abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
   def buildColumnarReader(partitionedFile: PartitionedFile): PartitionReader[ColumnarBatch] = {
     throw new UnsupportedOperationException("Cannot create columnar reader.")
   }
+
+  protected def getReadDataSchema(
+      readSchema: StructType,
+      partitionSchema: StructType,
+      isCaseSensitive: Boolean): StructType = {
+    val partitionNameSet =
+      partitionSchema.fields.map(PartitioningUtils.getColName(_, isCaseSensitive)).toSet
+    val fields = readSchema.fields.filterNot { field =>
+      partitionNameSet.contains(PartitioningUtils.getColName(field, isCaseSensitive))
+    }
+
+    StructType(fields)
+  }
 }
 
 // A compound class for combining file and its corresponding reader.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala
new file mode 100644
index 000000000000..072465b56857
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.JoinedRow
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.types.StructType
+
+/**
+ * A wrapper reader that always appends partition values to [[InternalRow]]s produced by the input
+ * reader [[fileReader]].
+ */
+class PartitionReaderWithPartitionValues(
+    fileReader: PartitionReader[InternalRow],
+    readDataSchema: StructType,
+    partitionSchema: StructType,
+    partitionValues: InternalRow) extends PartitionReader[InternalRow] {
+  private val fullSchema = readDataSchema.toAttributes ++ partitionSchema.toAttributes
+  private val unsafeProjection = GenerateUnsafeProjection.generate(fullSchema, fullSchema)
+  // Note that we have to apply the converter even though `file.partitionValues` is empty.
+  // This is because the converter is also responsible for converting safe `InternalRow`s into
+  // `UnsafeRow`s
+  private val rowConverter = {
+    if (partitionSchema.isEmpty) {
+      () => unsafeProjection(fileReader.get())}
+    else {
+      val joinedRow = new JoinedRow()
+      () => unsafeProjection(joinedRow(fileReader.get(), partitionValues))
+    }
+  }
+
+  override def next(): Boolean = fileReader.next()
+
+  override def get(): InternalRow = rowConverter()
+
+  override def close(): Unit = fileReader.close()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
index ff78ef3220c1..baa8cb6b2465 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
@@ -29,13 +29,3 @@ class PartitionRecordReader[T](
 
   override def close(): Unit = rowReader.close()
 }
-
-class PartitionRecordReaderWithProject[X, T](
-    private[this] var rowReader: RecordReader[_, X],
-    project: X => T) extends PartitionReader[T] {
-  override def next(): Boolean = rowReader.nextKeyValue()
-
-  override def get(): T = project(rowReader.getCurrentValue)
-
-  override def close(): Unit = rowReader.close()
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index f6fc0ca83908..4ae10a656e5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -90,22 +90,20 @@ case class OrcPartitionReaderFactory(
       val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
       val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
 
-      val requiredDataSchema = subtractSchema(readSchema, partitionSchema)
+      val readDataSchema = getReadDataSchema(readSchema, partitionSchema, isCaseSensitive)
       val orcRecordReader = new OrcInputFormat[OrcStruct]
         .createRecordReader(fileSplit, taskAttemptContext)
+      val deserializer = new OrcDeserializer(dataSchema, readDataSchema, requestedColIds)
+      val fileReader = new PartitionReader[InternalRow] {
+        override def next(): Boolean = orcRecordReader.nextKeyValue()
 
-      val fullSchema = requiredDataSchema.toAttributes ++ partitionSchema.toAttributes
-      val unsafeProjection = GenerateUnsafeProjection.generate(fullSchema, fullSchema)
-      val deserializer = new OrcDeserializer(dataSchema, requiredDataSchema, requestedColIds)
+        override def get(): InternalRow = deserializer.deserialize(orcRecordReader.getCurrentValue)
 
-      val projection = if (partitionSchema.length == 0) {
-        (value: OrcStruct) => unsafeProjection(deserializer.deserialize(value))
-      } else {
-        val joinedRow = new JoinedRow()
-        (value: OrcStruct) =>
-          unsafeProjection(joinedRow(deserializer.deserialize(value), file.partitionValues))
+        override def close(): Unit = orcRecordReader.close()
       }
-      new PartitionRecordReaderWithProject(orcRecordReader, projection)
+
+      new PartitionReaderWithPartitionValues(fileReader, readDataSchema,
+        partitionSchema, file.partitionValues)
     }
   }
 
@@ -153,17 +151,4 @@ case class OrcPartitionReaderFactory(
     }
   }
 
-  /**
-   * Returns a new StructType that is a copy of the original StructType, removing any items that
-   * also appear in other StructType. The order is preserved from the original StructType.
-   */
-  private def subtractSchema(original: StructType, other: StructType): StructType = {
-    val otherNameSet = other.fields.map(PartitioningUtils.getColName(_, isCaseSensitive)).toSet
-    val fields = original.fields.filterNot { field =>
-      otherNameSet.contains(PartitioningUtils.getColName(field, isCaseSensitive))
-    }
-
-    StructType(fields)
-  }
-
 }

From a0e26cffc5c0163a780e86d37183b0cebbc7b24c Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Thu, 7 Mar 2019 13:06:23 +0900
Subject: [PATCH 0609/1072] [MINOR][SQL][TEST] Include usage example for
 generating output for single test in SQLQueryTestSuite

## What changes were proposed in this pull request?
This is a very minor pr to include the usage example to generate output for single test in SQLQueryTestSuite. I tried to deduce it from the existing example and ran into a scenario
where sbt is simply looping to run the same test over and over again. Here is the example
of running a single test.

```
build/sbt "~sql/test-only *SQLQueryTestSuite -- -z inline-table.sql"
```
I tried to generate the output for a single test by prepending `SPARK_GENERATE_GOLDEN_FILES=1` like following
```
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "~sql/test-only *SQLQueryTestSuite -- -z describe.sql"
```
In this case i found that sbt is looping trying to run describe.sql over and over again as we are running the test in on continuous mode (because of `~` prefix ) where it detects a change in
the generated result file which in turn triggers a build and test. I have included an example where
we don't run it in continuous mode when generating the output. Hopefully it saves other developers some time.
## How was this patch tested?
Verified manually in my dev setup.

Closes #23995 from dilipbiswal/dkb_sqlquerytest_usage.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala     | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 62f3f98bf28a..d45ceca16714 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -48,11 +48,16 @@ import org.apache.spark.sql.types.StructType
  *   build/sbt "~sql/test-only *SQLQueryTestSuite -- -z inline-table.sql"
  * }}}
  *
- * To re-generate golden files, run:
+ * To re-generate golden files for entire suite, run:
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"
  * }}}
  *
+ * To re-generate golden file for a single test, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite -- -z describe.sql"
+ * }}}
+ *
  * The format for input files is simple:
  *  1. A list of SQL queries separated by semicolon.
  *  2. Lines starting with -- are treated as comments and ignored.

From 315c95c39953f677220aebc4592ad434019005c0 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 7 Mar 2019 17:25:22 +0900
Subject: [PATCH 0610/1072] [SPARK-25863][SPARK-21871][SQL] Check if code size
 statistics is empty or not in updateAndGetCompilationStats

## What changes were proposed in this pull request?
`CodeGenerator.updateAndGetCompilationStats` throws an unsupported exception for empty code size statistics. This pr added code to check if it is empty or not.

## How was this patch tested?
Pass Jenkins.

Closes #23947 from maropu/SPARK-21871-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/expressions/codegen/CodeGenerator.scala    | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index b9365f005234..de3223efce54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1352,7 +1352,11 @@ object CodeGenerator extends Logging {
       }
     }.flatten
 
-    codeSizes.max
+    if (codeSizes.nonEmpty) {
+      codeSizes.max
+    } else {
+      0
+    }
   }
 
   /**

From ddc2052ebd247aa2a8dad34fd5c1cd345fa45118 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 7 Mar 2019 08:52:24 -0800
Subject: [PATCH 0611/1072] [SPARK-23836][PYTHON] Add support for StructType
 return in Scalar Pandas UDF

## What changes were proposed in this pull request?

This change adds support for returning StructType from a scalar Pandas UDF, where the return value of the function is a pandas.DataFrame. Nested structs are not supported and an error will be raised, child types can be any other type currently supported.

## How was this patch tested?

Added additional unit tests to `test_pandas_udf_scalar`

Closes #23900 from BryanCutler/pyspark-support-scalar_udf-StructType-SPARK-23836.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 python/pyspark/serializers.py                 | 39 ++++++++-
 python/pyspark/sql/functions.py               | 12 ++-
 python/pyspark/sql/session.py                 |  3 +-
 .../sql/tests/test_pandas_udf_grouped_map.py  |  1 +
 .../sql/tests/test_pandas_udf_scalar.py       | 81 ++++++++++++++++++-
 python/pyspark/sql/types.py                   |  8 +-
 python/pyspark/sql/udf.py                     |  5 +-
 python/pyspark/worker.py                      | 12 ++-
 8 files changed, 149 insertions(+), 12 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index a2c59fedfc8c..0c3c68ec0bd9 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -64,6 +64,7 @@
     from itertools import izip as zip, imap as map
 else:
     import pickle
+    basestring = unicode = str
     xrange = range
 pickle_protocol = pickle.HIGHEST_PROTOCOL
 
@@ -244,7 +245,7 @@ def __repr__(self):
         return "ArrowStreamSerializer"
 
 
-def _create_batch(series, timezone, safecheck):
+def _create_batch(series, timezone, safecheck, assign_cols_by_name):
     """
     Create an Arrow record batch from the given pandas.Series or list of Series, with optional type.
 
@@ -254,6 +255,7 @@ def _create_batch(series, timezone, safecheck):
     """
     import decimal
     from distutils.version import LooseVersion
+    import pandas as pd
     import pyarrow as pa
     from pyspark.sql.types import _check_series_convert_timestamps_internal
     # Make input conform to [(series1, type1), (series2, type2), ...]
@@ -295,7 +297,34 @@ def create_array(s, t):
             raise RuntimeError(error_msg % (s.dtype, t), e)
         return array
 
-    arrs = [create_array(s, t) for s, t in series]
+    arrs = []
+    for s, t in series:
+        if t is not None and pa.types.is_struct(t):
+            if not isinstance(s, pd.DataFrame):
+                raise ValueError("A field of type StructType expects a pandas.DataFrame, "
+                                 "but got: %s" % str(type(s)))
+
+            # Input partition and result pandas.DataFrame empty, make empty Arrays with struct
+            if len(s) == 0 and len(s.columns) == 0:
+                arrs_names = [(pa.array([], type=field.type), field.name) for field in t]
+            # Assign result columns by schema name if user labeled with strings
+            elif assign_cols_by_name and any(isinstance(name, basestring) for name in s.columns):
+                arrs_names = [(create_array(s[field.name], field.type), field.name) for field in t]
+            # Assign result columns by  position
+            else:
+                arrs_names = [(create_array(s[s.columns[i]], field.type), field.name)
+                              for i, field in enumerate(t)]
+
+            struct_arrs, struct_names = zip(*arrs_names)
+
+            # TODO: from_arrays args switched for v0.9.0, remove when bump minimum pyarrow version
+            if LooseVersion(pa.__version__) < LooseVersion("0.9.0"):
+                arrs.append(pa.StructArray.from_arrays(struct_names, struct_arrs))
+            else:
+                arrs.append(pa.StructArray.from_arrays(struct_arrs, struct_names))
+        else:
+            arrs.append(create_array(s, t))
+
     return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
 
 
@@ -304,10 +333,11 @@ class ArrowStreamPandasSerializer(Serializer):
     Serializes Pandas.Series as Arrow data with Arrow streaming format.
     """
 
-    def __init__(self, timezone, safecheck):
+    def __init__(self, timezone, safecheck, assign_cols_by_name):
         super(ArrowStreamPandasSerializer, self).__init__()
         self._timezone = timezone
         self._safecheck = safecheck
+        self._assign_cols_by_name = assign_cols_by_name
 
     def arrow_to_pandas(self, arrow_column):
         from pyspark.sql.types import from_arrow_type, \
@@ -326,7 +356,8 @@ def dump_stream(self, iterator, stream):
         writer = None
         try:
             for series in iterator:
-                batch = _create_batch(series, self._timezone, self._safecheck)
+                batch = _create_batch(series, self._timezone, self._safecheck,
+                                      self._assign_cols_by_name)
                 if writer is None:
                     write_int(SpecialLengths.START_ARROW_STREAM, stream)
                     writer = pa.RecordBatchStreamWriter(stream, batch.schema)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3c33e2bed92d..a36423e67d75 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2842,8 +2842,9 @@ def pandas_udf(f=None, returnType=None, functionType=None):
 
        A scalar UDF defines a transformation: One or more `pandas.Series` -> A `pandas.Series`.
        The length of the returned `pandas.Series` must be of the same as the input `pandas.Series`.
+       If the return type is :class:`StructType`, the returned value should be a `pandas.DataFrame`.
 
-       :class:`MapType`, :class:`StructType` are currently not supported as output types.
+       :class:`MapType`, nested :class:`StructType` are currently not supported as output types.
 
        Scalar UDFs are used with :meth:`pyspark.sql.DataFrame.withColumn` and
        :meth:`pyspark.sql.DataFrame.select`.
@@ -2868,6 +2869,15 @@ def pandas_udf(f=None, returnType=None, functionType=None):
        +----------+--------------+------------+
        |         8|      JOHN DOE|          22|
        +----------+--------------+------------+
+       >>> @pandas_udf("first string, last string")  # doctest: +SKIP
+       ... def split_expand(n):
+       ...     return n.str.split(expand=True)
+       >>> df.select(split_expand("name")).show()  # doctest: +SKIP
+       +------------------+
+       |split_expand(name)|
+       +------------------+
+       |       [John, Doe]|
+       +------------------+
 
        .. note:: The length of `pandas.Series` within a scalar UDF is not that of the whole input
            column, but is the length of an internal batch used for each call to the function.
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index bdf1701a5895..32a2c8a67252 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -557,8 +557,9 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
 
         # Create Arrow record batches
         safecheck = self._wrapped._conf.arrowSafeTypeConversion()
+        col_by_name = True  # col by name only applies to StructType columns, can't happen here
         batches = [_create_batch([(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)],
-                                 timezone, safecheck)
+                                 timezone, safecheck, col_by_name)
                    for pdf_slice in pdf_slices]
 
         # Create the Spark schema from the first Arrow batch (always at least 1 batch after slicing)
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
index a0a25359d1e0..f7684d3fbcff 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
@@ -273,6 +273,7 @@ def test_unsupported_types(self):
             StructField('map', MapType(StringType(), IntegerType())),
             StructField('arr_ts', ArrayType(TimestampType())),
             StructField('null', NullType()),
+            StructField('struct', StructType([StructField('l', LongType())])),
         ]
 
         # TODO: Remove this if-statement once minimum pyarrow version is 0.10.0
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 28ef98d7b3f1..28b6db216d00 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -23,13 +23,16 @@
 import time
 import unittest
 
+if sys.version >= '3':
+    unicode = str
+
 from datetime import date, datetime
 from decimal import Decimal
 from distutils.version import LooseVersion
 
 from pyspark.rdd import PythonEvalType
 from pyspark.sql import Column
-from pyspark.sql.functions import array, col, expr, lit, sum, udf, pandas_udf
+from pyspark.sql.functions import array, col, expr, lit, sum, struct, udf, pandas_udf
 from pyspark.sql.types import Row
 from pyspark.sql.types import *
 from pyspark.sql.utils import AnalysisException
@@ -265,6 +268,64 @@ def test_vectorized_udf_null_array(self):
         result = df.select(array_f(col('array')))
         self.assertEquals(df.collect(), result.collect())
 
+    def test_vectorized_udf_struct_type(self):
+        import pandas as pd
+
+        df = self.spark.range(10)
+        return_type = StructType([
+            StructField('id', LongType()),
+            StructField('str', StringType())])
+
+        def func(id):
+            return pd.DataFrame({'id': id, 'str': id.apply(unicode)})
+
+        f = pandas_udf(func, returnType=return_type)
+
+        expected = df.select(struct(col('id'), col('id').cast('string').alias('str'))
+                             .alias('struct')).collect()
+
+        actual = df.select(f(col('id')).alias('struct')).collect()
+        self.assertEqual(expected, actual)
+
+        g = pandas_udf(func, 'id: long, str: string')
+        actual = df.select(g(col('id')).alias('struct')).collect()
+        self.assertEqual(expected, actual)
+
+    def test_vectorized_udf_struct_complex(self):
+        import pandas as pd
+
+        df = self.spark.range(10)
+        return_type = StructType([
+            StructField('ts', TimestampType()),
+            StructField('arr', ArrayType(LongType()))])
+
+        @pandas_udf(returnType=return_type)
+        def f(id):
+            return pd.DataFrame({'ts': id.apply(lambda i: pd.Timestamp(i)),
+                                 'arr': id.apply(lambda i: [i, i + 1])})
+
+        actual = df.withColumn('f', f(col('id'))).collect()
+        for i, row in enumerate(actual):
+            id, f = row
+            self.assertEqual(i, id)
+            self.assertEqual(pd.Timestamp(i).to_pydatetime(), f[0])
+            self.assertListEqual([i, i + 1], f[1])
+
+    def test_vectorized_udf_nested_struct(self):
+        nested_type = StructType([
+            StructField('id', IntegerType()),
+            StructField('nested', StructType([
+                StructField('foo', StringType()),
+                StructField('bar', FloatType())
+            ]))
+        ])
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    Exception,
+                    'Invalid returnType with scalar Pandas UDFs'):
+                pandas_udf(lambda x: x, returnType=nested_type)
+
     def test_vectorized_udf_complex(self):
         df = self.spark.range(10).select(
             col('id').cast('int').alias('a'),
@@ -331,6 +392,20 @@ def test_vectorized_udf_empty_partition(self):
         res = df.select(f(col('id')))
         self.assertEquals(df.collect(), res.collect())
 
+    def test_vectorized_udf_struct_with_empty_partition(self):
+        df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))\
+            .withColumn('name', lit('John Doe'))
+
+        @pandas_udf("first string, last string")
+        def split_expand(n):
+            return n.str.split(expand=True)
+
+        result = df.select(split_expand('name')).collect()
+        self.assertEqual(1, len(result))
+        row = result[0]
+        self.assertEqual('John', row[0]['first'])
+        self.assertEqual('Doe', row[0]['last'])
+
     def test_vectorized_udf_varargs(self):
         df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))
         f = pandas_udf(lambda *v: v[0], LongType())
@@ -343,6 +418,10 @@ def test_vectorized_udf_unsupported_types(self):
                     NotImplementedError,
                     'Invalid returnType.*scalar Pandas UDF.*MapType'):
                 pandas_udf(lambda x: x, MapType(StringType(), IntegerType()))
+            with self.assertRaisesRegexp(
+                    NotImplementedError,
+                    'Invalid returnType.*scalar Pandas UDF.*ArrayType.StructType'):
+                pandas_udf(lambda x: x, ArrayType(StructType([StructField('a', IntegerType())])))
 
     def test_vectorized_udf_dates(self):
         schema = StructType().add("idx", LongType()).add("date", DateType())
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 348cb5b11859..d87f0f91499a 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1613,9 +1613,15 @@ def to_arrow_type(dt):
         # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
         arrow_type = pa.timestamp('us', tz='UTC')
     elif type(dt) == ArrayType:
-        if type(dt.elementType) == TimestampType:
+        if type(dt.elementType) in [StructType, TimestampType]:
             raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
         arrow_type = pa.list_(to_arrow_type(dt.elementType))
+    elif type(dt) == StructType:
+        if any(type(field.dataType) == StructType for field in dt):
+            raise TypeError("Nested StructType not supported in conversion to Arrow")
+        fields = [pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
+                  for field in dt]
+        arrow_type = pa.struct(fields)
     else:
         raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
     return arrow_type
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 58f4e0dff5ee..275abe9c85d1 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -123,7 +123,7 @@ def returnType(self):
         elif self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
             if isinstance(self._returnType_placeholder, StructType):
                 try:
-                    to_arrow_schema(self._returnType_placeholder)
+                    to_arrow_type(self._returnType_placeholder)
                 except TypeError:
                     raise NotImplementedError(
                         "Invalid returnType with grouped map Pandas UDFs: "
@@ -133,6 +133,9 @@ def returnType(self):
                                 "UDFs: returnType must be a StructType.")
         elif self.evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
             try:
+                # StructType is not yet allowed as a return type, explicitly check here to fail fast
+                if isinstance(self._returnType_placeholder, StructType):
+                    raise TypeError
                 to_arrow_type(self._returnType_placeholder)
             except TypeError:
                 raise NotImplementedError(
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 01934a0e7275..0e9b6d665a36 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -39,7 +39,7 @@
 from pyspark.serializers import write_with_length, write_int, read_long, read_bool, \
     write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \
     BatchedSerializer, ArrowStreamPandasSerializer
-from pyspark.sql.types import to_arrow_type
+from pyspark.sql.types import to_arrow_type, StructType
 from pyspark.util import _get_argspec, fail_on_stopiteration
 from pyspark import shuffle
 
@@ -90,8 +90,9 @@ def wrap_scalar_pandas_udf(f, return_type):
     def verify_result_length(*a):
         result = f(*a)
         if not hasattr(result, "__len__"):
+            pd_type = "Pandas.DataFrame" if type(return_type) == StructType else "Pandas.Series"
             raise TypeError("Return type of the user-defined function should be "
-                            "Pandas.Series, but is {}".format(type(result)))
+                            "{}, but is {}".format(pd_type, type(result)))
         if len(result) != len(a[0]):
             raise RuntimeError("Result vector from pandas_udf was not the required length: "
                                "expected %d, got %d" % (len(a[0]), len(result)))
@@ -254,7 +255,12 @@ def read_udfs(pickleSer, infile, eval_type):
         timezone = runner_conf.get("spark.sql.session.timeZone", None)
         safecheck = runner_conf.get("spark.sql.execution.pandas.arrowSafeTypeConversion",
                                     "false").lower() == 'true'
-        ser = ArrowStreamPandasSerializer(timezone, safecheck)
+        # NOTE: this is duplicated from wrap_grouped_map_pandas_udf
+        assign_cols_by_name = runner_conf.get(
+            "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true")\
+            .lower() == "true"
+
+        ser = ArrowStreamPandasSerializer(timezone, safecheck, assign_cols_by_name)
     else:
         ser = BatchedSerializer(PickleSerializer(), 100)
 

From 98a8725e66ee992e8db7035449e73225a795b530 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Thu, 7 Mar 2019 11:36:37 -0800
Subject: [PATCH 0612/1072] [SPARK-27022][DSTREAMS] Add kafka delegation token
 support.

## What changes were proposed in this pull request?

It adds Kafka delegation token support for DStreams. Please be aware as Kafka native sink is not available for DStreams this PR contains delegation token usage only on consumer side.

What this PR contains:
* Usage of token through dynamic JAAS configuration
* `KafkaConfigUpdater` moved to `kafka-0-10-token-provider`
* `KafkaSecurityHelper` functionality moved into `KafkaTokenUtil`
* Documentation

## How was this patch tested?

Existing unit tests + on cluster.

Long running Kafka to file tests on 4 node cluster with randomly thrown artificial exceptions.

Test scenario:

* 4 node cluster
* Yarn
* Kafka broker version 2.1.0
* security.protocol = SASL_SSL
* sasl.mechanism = SCRAM-SHA-512

Kafka broker settings:

* delegation.token.expiry.time.ms=600000 (10 min)
* delegation.token.max.lifetime.ms=1200000 (20 min)
* delegation.token.expiry.check.interval.ms=300000 (5 min)

After each 7.5 minutes new delegation token obtained from Kafka broker (10 min * 0.75).
When token expired after 10 minutes (Spark obtains new one and doesn't renew the old), the brokers expiring thread comes after each 5 minutes (invalidates expired tokens) and artificial exception has been thrown inside the Spark application (such case Spark closes connection), then the latest delegation token picked up correctly.

cd docs/
SKIP_API=1 jekyll build
Manual webpage check.

Closes #23929 from gaborgsomogyi/SPARK-27022.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 docs/streaming-kafka-0-10-integration.md      |  7 +++
 .../sql/kafka010/CachedKafkaProducer.scala    |  1 +
 .../spark/sql/kafka010/ConsumerStrategy.scala |  2 +
 .../sql/kafka010/KafkaDataConsumer.scala      |  1 +
 .../sql/kafka010/KafkaSecurityHelper.scala    | 53 -----------------
 .../sql/kafka010/KafkaSourceProvider.scala    |  1 +
 .../kafka010/KafkaSecurityHelperSuite.scala   | 43 --------------
 external/kafka-0-10-token-provider/pom.xml    |  5 ++
 .../spark}/kafka010/KafkaConfigUpdater.scala  |  9 ++-
 .../spark/kafka010/KafkaTokenUtil.scala       | 30 +++++++++-
 .../kafka010/KafkaConfigUpdaterSuite.scala    |  2 +-
 .../kafka010/KafkaDelegationTokenTest.scala   |  3 +-
 .../spark/kafka010/KafkaTokenUtilSuite.scala  | 59 +++++++------------
 external/kafka-0-10/pom.xml                   |  5 ++
 .../streaming/kafka010/ConsumerStrategy.scala | 19 +++++-
 .../kafka010/KafkaDataConsumer.scala          |  8 ++-
 16 files changed, 101 insertions(+), 147 deletions(-)
 delete mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
 delete mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
 rename external/{kafka-0-10-sql/src/main/scala/org/apache/spark/sql => kafka-0-10-token-provider/src/main/scala/org/apache/spark}/kafka010/KafkaConfigUpdater.scala (89%)
 rename external/{kafka-0-10-sql/src/test/scala/org/apache/spark/sql => kafka-0-10-token-provider/src/test/scala/org/apache/spark}/kafka010/KafkaConfigUpdaterSuite.scala (98%)
 rename external/{kafka-0-10-sql/src/test/scala/org/apache/spark/sql => kafka-0-10-token-provider/src/test/scala/org/apache/spark}/kafka010/KafkaDelegationTokenTest.scala (97%)

diff --git a/docs/streaming-kafka-0-10-integration.md b/docs/streaming-kafka-0-10-integration.md
index 975adacca2f1..3fb627105497 100644
--- a/docs/streaming-kafka-0-10-integration.md
+++ b/docs/streaming-kafka-0-10-integration.md
@@ -315,3 +315,10 @@ As with any Spark applications, `spark-submit` is used to launch your applicatio
 
 For Scala and Java applications, if you are using SBT or Maven for project management, then package `spark-streaming-kafka-0-10_{{site.SCALA_BINARY_VERSION}}` and its dependencies into the application JAR. Make sure `spark-core_{{site.SCALA_BINARY_VERSION}}` and `spark-streaming_{{site.SCALA_BINARY_VERSION}}` are marked as `provided` dependencies as those are already present in a Spark installation. Then use `spark-submit` to launch your application (see [Deploying section](streaming-programming-guide.html#deploying-applications) in the main programming guide).
 
+### Security
+
+See [Structured Streaming Security](structured-streaming-kafka-integration.html#security).
+
+##### Additional Caveats
+
+- Kafka native sink is not available so delegation token used only on consumer side.
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
index f24001f4ae3a..062ce9a57994 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
@@ -28,6 +28,7 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
+import org.apache.spark.kafka010.KafkaConfigUpdater
 
 private[kafka010] object CachedKafkaProducer extends Logging {
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
index dfdafce3c053..2326619b9d83 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
@@ -25,6 +25,8 @@ import org.apache.kafka.clients.consumer.{Consumer, KafkaConsumer}
 import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
 import org.apache.kafka.common.TopicPartition
 
+import org.apache.spark.kafka010.KafkaConfigUpdater
+
 /**
  * Subscribe allows you to subscribe to a fixed collection of topics.
  * SubscribePattern allows you to use a regex to specify topics of interest.
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
index a0255a1ad219..83bf4b18f0ff 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
@@ -27,6 +27,7 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.kafka010.KafkaConfigUpdater
 import org.apache.spark.sql.kafka010.KafkaDataConsumer.AvailableOffsetRange
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.util.UninterruptibleThread
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
deleted file mode 100644
index a11d54f992bd..000000000000
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelper.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import org.apache.hadoop.security.UserGroupInformation
-import org.apache.kafka.common.security.scram.ScramLoginModule
-
-import org.apache.spark.SparkConf
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
-import org.apache.spark.kafka010.KafkaTokenUtil
-
-private[kafka010] object KafkaSecurityHelper extends Logging {
-  def isTokenAvailable(): Boolean = {
-    UserGroupInformation.getCurrentUser().getCredentials.getToken(
-      KafkaTokenUtil.TOKEN_SERVICE) != null
-  }
-
-  def getTokenJaasParams(sparkConf: SparkConf): String = {
-    val token = UserGroupInformation.getCurrentUser().getCredentials.getToken(
-      KafkaTokenUtil.TOKEN_SERVICE)
-    val username = new String(token.getIdentifier)
-    val password = new String(token.getPassword)
-
-    val loginModuleName = classOf[ScramLoginModule].getName
-    val params =
-      s"""
-      |$loginModuleName required
-      | tokenauth=true
-      | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}"
-      | username="$username"
-      | password="$password";
-      """.stripMargin.replace("\n", "")
-    logDebug(s"Scram JAAS params: ${params.replaceAll("password=\".*\"", "password=\"[hidden]\"")}")
-
-    params
-  }
-}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 4dc6955fc0d0..b39e0d40fd31 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -27,6 +27,7 @@ import org.apache.kafka.clients.producer.ProducerConfig
 import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.kafka010.KafkaConfigUpdater
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
deleted file mode 100644
index d908bbfc2c5f..000000000000
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSecurityHelperSuite.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import org.apache.spark.{SparkConf, SparkFunSuite}
-
-class KafkaSecurityHelperSuite extends SparkFunSuite with KafkaDelegationTokenTest {
-  test("isTokenAvailable without token should return false") {
-    assert(!KafkaSecurityHelper.isTokenAvailable())
-  }
-
-  test("isTokenAvailable with token should return true") {
-    addTokenToUGI()
-
-    assert(KafkaSecurityHelper.isTokenAvailable())
-  }
-
-  test("getTokenJaasParams with token should return scram module") {
-    addTokenToUGI()
-
-    val jaasParams = KafkaSecurityHelper.getTokenJaasParams(new SparkConf())
-
-    assert(jaasParams.contains("ScramLoginModule required"))
-    assert(jaasParams.contains("tokenauth=true"))
-    assert(jaasParams.contains(tokenId))
-    assert(jaasParams.contains(tokenPassword))
-  }
-}
diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
index b2abcd909958..40ef1f71e021 100644
--- a/external/kafka-0-10-token-provider/pom.xml
+++ b/external/kafka-0-10-token-provider/pom.xml
@@ -52,6 +52,11 @@
       <artifactId>kafka-clients</artifactId>
       <version>${kafka.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
similarity index 89%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
rename to external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
index 978dfe6cfbd6..d24eb4ae1952 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdater.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.kafka010
 
 import java.{util => ju}
 
@@ -26,12 +26,11 @@ import org.apache.kafka.common.config.SaslConfigs
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Kafka
-import org.apache.spark.kafka010.KafkaTokenUtil
 
 /**
  * Class to conveniently update Kafka config params, while logging the changes
  */
-private[kafka010] case class KafkaConfigUpdater(module: String, kafkaParams: Map[String, Object])
+private[spark] case class KafkaConfigUpdater(module: String, kafkaParams: Map[String, Object])
     extends Logging {
   private val map = new ju.HashMap[String, Object](kafkaParams.asJava)
 
@@ -58,9 +57,9 @@ private[kafka010] case class KafkaConfigUpdater(module: String, kafkaParams: Map
     //   configuration.
     if (KafkaTokenUtil.isGlobalJaasConfigurationProvided) {
       logDebug("JVM global security configuration detected, using it for login.")
-    } else if (KafkaSecurityHelper.isTokenAvailable()) {
+    } else if (KafkaTokenUtil.isTokenAvailable()) {
       logDebug("Delegation token detected, using it for login.")
-      val jaasParams = KafkaSecurityHelper.getTokenJaasParams(SparkEnv.get.conf)
+      val jaasParams = KafkaTokenUtil.getTokenJaasParams(SparkEnv.get.conf)
       set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
       val mechanism = SparkEnv.get.conf.get(Kafka.TOKEN_SASL_MECHANISM)
       require(mechanism.startsWith("SCRAM"),
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index 574d58bf2f40..e5604f2b14cf 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -31,6 +31,7 @@ import org.apache.kafka.clients.admin.{AdminClient, CreateDelegationTokenOptions
 import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.security.JaasContext
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
+import org.apache.kafka.common.security.scram.ScramLoginModule
 import org.apache.kafka.common.security.token.delegation.DelegationToken
 
 import org.apache.spark.SparkConf
@@ -154,7 +155,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     }
   }
 
-  private[kafka010] def getKeytabJaasParams(sparkConf: SparkConf): String = {
+  private def getKeytabJaasParams(sparkConf: SparkConf): String = {
     val params =
       s"""
       |${getKrb5LoginModuleName} required
@@ -167,7 +168,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     params
   }
 
-  def getTicketCacheJaasParams(sparkConf: SparkConf): String = {
+  private def getTicketCacheJaasParams(sparkConf: SparkConf): String = {
     val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
     require(serviceName.nonEmpty, "Kerberos service name must be defined")
 
@@ -208,4 +209,29 @@ private[spark] object KafkaTokenUtil extends Logging {
         dateFormat.format(tokenInfo.maxTimestamp)))
     }
   }
+
+  def isTokenAvailable(): Boolean = {
+    UserGroupInformation.getCurrentUser().getCredentials.getToken(
+      KafkaTokenUtil.TOKEN_SERVICE) != null
+  }
+
+  def getTokenJaasParams(sparkConf: SparkConf): String = {
+    val token = UserGroupInformation.getCurrentUser().getCredentials.getToken(
+      KafkaTokenUtil.TOKEN_SERVICE)
+    val username = new String(token.getIdentifier)
+    val password = new String(token.getPassword)
+
+    val loginModuleName = classOf[ScramLoginModule].getName
+    val params =
+      s"""
+      |$loginModuleName required
+      | tokenauth=true
+      | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}"
+      | username="$username"
+      | password="$password";
+      """.stripMargin.replace("\n", "")
+    logDebug(s"Scram JAAS params: ${params.replaceAll("password=\".*\"", "password=\"[hidden]\"")}")
+
+    params
+  }
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdaterSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
similarity index 98%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdaterSuite.scala
rename to external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
index 25ccca3cb984..538486b9912a 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaConfigUpdaterSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.kafka010
 
 import org.apache.kafka.common.config.SaslConfigs
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
similarity index 97%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
rename to external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
index d0cefc46c56f..bd9b87374dde 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenTest.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.kafka010
 
 import java.{util => ju}
 import javax.security.auth.login.{AppConfigurationEntry, Configuration}
@@ -26,7 +26,6 @@ import org.mockito.Mockito.mock
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
-import org.apache.spark.kafka010.KafkaTokenUtil
 import org.apache.spark.kafka010.KafkaTokenUtil.KafkaDelegationTokenIdentifier
 
 /**
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
index 5da626056ba4..0a5af1d751bc 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
@@ -17,20 +17,17 @@
 
 package org.apache.spark.kafka010
 
-import java.{util => ju}
 import java.security.PrivilegedExceptionAction
-import javax.security.auth.login.{AppConfigurationEntry, Configuration}
 
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config._
 
-class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
+class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
   private val bootStrapServers = "127.0.0.1:0"
   private val trustStoreLocation = "/path/to/trustStore"
   private val trustStorePassword = "trustStoreSecret"
@@ -42,44 +39,11 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
 
   private var sparkConf: SparkConf = null
 
-  private class KafkaJaasConfiguration extends Configuration {
-    val entry =
-      new AppConfigurationEntry(
-        "DummyModule",
-        AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
-        ju.Collections.emptyMap[String, Object]()
-      )
-
-    override def getAppConfigurationEntry(name: String): Array[AppConfigurationEntry] = {
-      if (name.equals("KafkaClient")) {
-        Array(entry)
-      } else {
-        null
-      }
-    }
-  }
-
   override def beforeEach(): Unit = {
     super.beforeEach()
     sparkConf = new SparkConf()
   }
 
-  override def afterEach(): Unit = {
-    try {
-      resetGlobalConfig()
-    } finally {
-      super.afterEach()
-    }
-  }
-
-  private def setGlobalKafkaClientConfig(): Unit = {
-    Configuration.setConfiguration(new KafkaJaasConfiguration)
-  }
-
-  private def resetGlobalConfig(): Unit = {
-    Configuration.setConfiguration(null)
-  }
-
   test("checkProxyUser with proxy current user should throw exception") {
     val realUser = UserGroupInformation.createUserForTesting("realUser", Array())
     UserGroupInformation.createProxyUserForTesting("proxyUser", realUser, Array()).doAs(
@@ -229,4 +193,25 @@ class KafkaTokenUtilSuite extends SparkFunSuite with BeforeAndAfterEach {
 
     assert(KafkaTokenUtil.isGlobalJaasConfigurationProvided)
   }
+
+  test("isTokenAvailable without token should return false") {
+    assert(!KafkaTokenUtil.isTokenAvailable())
+  }
+
+  test("isTokenAvailable with token should return true") {
+    addTokenToUGI()
+
+    assert(KafkaTokenUtil.isTokenAvailable())
+  }
+
+  test("getTokenJaasParams with token should return scram module") {
+    addTokenToUGI()
+
+    val jaasParams = KafkaTokenUtil.getTokenJaasParams(new SparkConf())
+
+    assert(jaasParams.contains("ScramLoginModule required"))
+    assert(jaasParams.contains("tokenauth=true"))
+    assert(jaasParams.contains(tokenId))
+    assert(jaasParams.contains(tokenPassword))
+  }
 }
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 333572e99b1c..f78bdace3523 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -34,6 +34,11 @@
   <url>http://spark.apache.org/</url>
 
   <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
index 07960d14b0bf..3e32b592b3a3 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
@@ -27,6 +27,7 @@ import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.kafka010.KafkaConfigUpdater
 
 /**
  * Choice of how to create and configure underlying Kafka Consumers on driver and executors.
@@ -54,6 +55,15 @@ abstract class ConsumerStrategy[K, V] {
    * checkpoint.
    */
   def onStart(currentOffsets: ju.Map[TopicPartition, jl.Long]): Consumer[K, V]
+
+  /**
+   * Updates the parameters with security if needed.
+   * Added a function to hide internals and reduce code duplications because all strategy uses it.
+   */
+  protected def setAuthenticationConfigIfNeeded(kafkaParams: ju.Map[String, Object]) =
+    KafkaConfigUpdater("source", kafkaParams.asScala.toMap)
+      .setAuthenticationConfigIfNeeded()
+      .build()
 }
 
 /**
@@ -78,7 +88,8 @@ private case class Subscribe[K, V](
   def executorKafkaParams: ju.Map[String, Object] = kafkaParams
 
   def onStart(currentOffsets: ju.Map[TopicPartition, jl.Long]): Consumer[K, V] = {
-    val consumer = new KafkaConsumer[K, V](kafkaParams)
+    val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
+    val consumer = new KafkaConsumer[K, V](updatedKafkaParams)
     consumer.subscribe(topics)
     val toSeek = if (currentOffsets.isEmpty) {
       offsets
@@ -134,7 +145,8 @@ private case class SubscribePattern[K, V](
   def executorKafkaParams: ju.Map[String, Object] = kafkaParams
 
   def onStart(currentOffsets: ju.Map[TopicPartition, jl.Long]): Consumer[K, V] = {
-    val consumer = new KafkaConsumer[K, V](kafkaParams)
+    val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
+    val consumer = new KafkaConsumer[K, V](updatedKafkaParams)
     consumer.subscribe(pattern, new NoOpConsumerRebalanceListener())
     val toSeek = if (currentOffsets.isEmpty) {
       offsets
@@ -186,7 +198,8 @@ private case class Assign[K, V](
   def executorKafkaParams: ju.Map[String, Object] = kafkaParams
 
   def onStart(currentOffsets: ju.Map[TopicPartition, jl.Long]): Consumer[K, V] = {
-    val consumer = new KafkaConsumer[K, V](kafkaParams)
+    val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
+    val consumer = new KafkaConsumer[K, V](updatedKafkaParams)
     consumer.assign(topicPartitions)
     val toSeek = if (currentOffsets.isEmpty) {
       offsets
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
index 68c5fe9ab066..142e946188ac 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
@@ -19,11 +19,14 @@ package org.apache.spark.streaming.kafka010
 
 import java.{util => ju}
 
+import scala.collection.JavaConverters._
+
 import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer}
 import org.apache.kafka.common.{KafkaException, TopicPartition}
 
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
+import org.apache.spark.kafka010.KafkaConfigUpdater
 
 private[kafka010] sealed trait KafkaDataConsumer[K, V] {
   /**
@@ -109,7 +112,10 @@ private[kafka010] class InternalKafkaConsumer[K, V](
 
   /** Create a KafkaConsumer to fetch records for `topicPartition` */
   private def createConsumer: KafkaConsumer[K, V] = {
-    val c = new KafkaConsumer[K, V](kafkaParams)
+    val updatedKafkaParams = KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
+      .setAuthenticationConfigIfNeeded()
+      .build()
+    val c = new KafkaConsumer[K, V](updatedKafkaParams)
     val topics = ju.Arrays.asList(topicPartition)
     c.assign(topics)
     c

From 713646ddc20188331a142f48a4f4ec11cf4c52ed Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Thu, 7 Mar 2019 12:52:46 -0800
Subject: [PATCH 0613/1072] [SPARK-27075] Remove duplicate execution tag
 parameters from the url, when accessing the execution table in the SQL page

## What changes were proposed in this pull request?

When we sort any columns in the execution table of the SQL page in the WEBUI, it throws IllegalArgumentException. The root cause is that,  in the url, we are duplicating the execution tag parameters in the 'parameterPath'. Actually we should filter out the executionTag related entries while getting the 'parameterOtherTable'
https://github.com/apache/spark/blob/e9e8bb33ef9ad785473ded168bc85867dad4ee70/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala#L161-L163
https://github.com/apache/spark/blob/e9e8bb33ef9ad785473ded168bc85867dad4ee70/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala#L241
https://github.com/apache/spark/blob/e9e8bb33ef9ad785473ded168bc85867dad4ee70/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala#L263-L266

## How was this patch tested?
Manually tested
Test steps:
Sort any column in the sql page execution table
Before fix:
![screenshot from 2019-03-07 01-38-17](https://user-images.githubusercontent.com/23054875/53913261-f0b69580-4080-11e9-88ea-f238b47a21d5.png)

After fix:
![screenshot from 2019-03-07 02-01-40](https://user-images.githubusercontent.com/23054875/53913285-01670b80-4081-11e9-81b6-78cdbf5a0817.png)

Closes #23994 from shahidki31/SPARK-27075.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/sql/execution/ui/AllExecutionsPage.scala | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
index 05890de5e126..43ff1e13a858 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
@@ -158,9 +158,11 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L
     showSucceededJobs: Boolean,
     showFailedJobs: Boolean): Seq[Node] = {
 
-    val parameterOtherTable = request.getParameterMap().asScala.map { case (name, vals) =>
-      name + "=" + vals(0)
-    }
+    val parameterOtherTable = request.getParameterMap().asScala
+      .filterNot(_._1.startsWith(executionTag))
+      .map { case (name, vals) =>
+        name + "=" + vals(0)
+      }
 
     val parameterExecutionPage = request.getParameter(s"$executionTag.page")
     val parameterExecutionSortColumn = request.getParameter(s"$executionTag.sort")

From d70b6a39e1d22aa0ecda26efbcf25c772ba591a5 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 7 Mar 2019 16:46:07 -0600
Subject: [PATCH 0614/1072] [MINOR][BUILD] Add 2 maven
 properties(hive.classifier and hive.parquet.group)

## What changes were proposed in this pull request?

This pr adds 2 maven properties to help us upgrade the built-in Hive.

| Property Name | Default | In future |
| ------ | ------ | ------ |
| hive.classifier | (none) | core |
| hive.parquet.group | com.twitter | org.apache.parquet |

## How was this patch tested?

existing tests

Closes #23996 from wangyum/add_2_maven_properties.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 examples/pom.xml                | 2 +-
 pom.xml                         | 8 ++++----
 resource-managers/mesos/pom.xml | 1 +
 resource-managers/yarn/pom.xml  | 1 +
 sql/hive/pom.xml                | 6 ++----
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 0636406595f6..ac148ef4c9c0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -110,7 +110,7 @@
       <version>3.7.0</version>
     </dependency>
     <dependency>
-      <groupId>com.twitter</groupId>
+      <groupId>${hive.parquet.group}</groupId>
       <artifactId>parquet-hadoop-bundle</artifactId>
       <scope>provided</scope>
     </dependency>
diff --git a/pom.xml b/pom.xml
index ec870d3d1d1a..0e1913ac816c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -125,6 +125,7 @@
     <zookeeper.version>3.4.6</zookeeper.version>
     <curator.version>2.7.1</curator.version>
     <hive.group>org.spark-project.hive</hive.group>
+    <hive.classifier></hive.classifier>
     <!-- Version used in Maven Hive dependency -->
     <hive.version>1.2.1.spark2</hive.version>
     <!-- Version used for internal directory structure -->
@@ -135,6 +136,7 @@
     <parquet.version>1.10.1</parquet.version>
     <orc.version>1.5.4</orc.version>
     <orc.classifier>nohive</orc.classifier>
+    <hive.parquet.group>com.twitter</hive.parquet.group>
     <hive.parquet.version>1.6.0</hive.parquet.version>
     <jetty.version>9.4.12.v20180830</jetty.version>
     <javaxservlet.version>3.1.0</javaxservlet.version>
@@ -1415,9 +1417,7 @@
       <dependency>
         <groupId>${hive.group}</groupId>
         <artifactId>hive-exec</artifactId>
-<!--
-        <classifier>core</classifier>
--->
+        <classifier>${hive.classifier}</classifier>
         <version>${hive.version}</version>
         <scope>${hive.deps.scope}</scope>
         <exclusions>
@@ -1836,7 +1836,7 @@
         <scope>${parquet.test.deps.scope}</scope>
       </dependency>
       <dependency>
-        <groupId>com.twitter</groupId>
+        <groupId>${hive.parquet.group}</groupId>
         <artifactId>parquet-hadoop-bundle</artifactId>
         <version>${hive.parquet.version}</version>
         <scope>compile</scope>
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 7b3aad4d6ce3..107ba365aa77 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -77,6 +77,7 @@
     <dependency>
       <groupId>${hive.group}</groupId>
       <artifactId>hive-exec</artifactId>
+      <classifier>${hive.classifier}</classifier>
       <scope>provided</scope>
     </dependency>
     <dependency>
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index d18df9955bb1..df910c159787 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -172,6 +172,7 @@
     <dependency>
       <groupId>${hive.group}</groupId>
       <artifactId>hive-exec</artifactId>
+      <classifier>${hive.classifier}</classifier>
       <scope>provided</scope>
     </dependency>
     <dependency>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index fe144c76af7d..dc8b73314c15 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -37,7 +37,7 @@
   <dependencies>
     <!-- Added for Hive Parquet SerDe -->
     <dependency>
-      <groupId>com.twitter</groupId>
+      <groupId>${hive.parquet.group}</groupId>
       <artifactId>parquet-hadoop-bundle</artifactId>
     </dependency>
     <dependency>
@@ -97,9 +97,7 @@
     <dependency>
       <groupId>${hive.group}</groupId>
       <artifactId>hive-exec</artifactId>
-<!--
-      <classifier>core</classifier>
--->
+      <classifier>${hive.classifier}</classifier>
     </dependency>
     <dependency>
       <groupId>${hive.group}</groupId>

From 43dcb91a4cb25aa7e1cc5967194f098029a0361e Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 8 Mar 2019 11:47:49 +0800
Subject: [PATCH 0615/1072] [SPARK-19678][FOLLOW-UP][SQL] Add behavior change
 test when table statistics are incorrect

## What changes were proposed in this pull request?

Since Spark 2.2.0 ([SPARK-19678](https://issues.apache.org/jira/browse/SPARK-19678)), the below SQL changed from `broadcast join` to `sort merge join`:
```sql
-- small external table with incorrect statistics
CREATE EXTERNAL TABLE t1(c1 int)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES (
  'serialization.format' = '1'
)
STORED AS
  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION 'file:///tmp/t1'
TBLPROPERTIES (
'rawDataSize'='-1', 'numFiles'='0', 'totalSize'='0', 'COLUMN_STATS_ACCURATE'='false', 'numRows'='-1'
);

-- big table
CREATE TABLE t2 (c1 int)
LOCATION 'file:///tmp/t2'
TBLPROPERTIES (
'rawDataSize'='23437737', 'numFiles'='12222', 'totalSize'='333442230', 'COLUMN_STATS_ACCURATE'='false', 'numRows'='443442223'
);

explain SELECT t1.c1 FROM t1 INNER JOIN t2 ON t1.c1 = t2.c1;
```
This pr add a test case for this behavior change.

## How was this patch tested?

unit tests

Closes #24003 from wangyum/SPARK-19678.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/StatisticsSuite.scala      | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index db2024e8b5d1..630f02c8e2f8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -110,6 +110,41 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
   }
 
+  test("Hive serde table with incorrect statistics") {
+    withTempDir { tempDir =>
+      withTable("t1") {
+        spark.range(5).write.mode(SaveMode.Overwrite).parquet(tempDir.getCanonicalPath)
+        val dataSize = tempDir.listFiles.filter(!_.getName.endsWith(".crc")).map(_.length).sum
+        spark.sql(
+          s"""
+             |CREATE EXTERNAL TABLE t1(id BIGINT)
+             |ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+             |STORED AS
+             |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+             |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+             |LOCATION '${tempDir.getCanonicalPath}'
+             |TBLPROPERTIES (
+             |'rawDataSize'='-1', 'numFiles'='0', 'totalSize'='0',
+             |'COLUMN_STATS_ACCURATE'='false', 'numRows'='-1'
+             |)""".stripMargin)
+
+        spark.sql("REFRESH TABLE t1")
+        // Before SPARK-19678, sizeInBytes should be equal to dataSize.
+        // After SPARK-19678, sizeInBytes should be equal to DEFAULT_SIZE_IN_BYTES.
+        val relation1 = spark.table("t1").queryExecution.analyzed.children.head
+        assert(relation1.stats.sizeInBytes === spark.sessionState.conf.defaultSizeInBytes)
+
+        spark.sql("REFRESH TABLE t1")
+        // After SPARK-19678 and enable ENABLE_FALL_BACK_TO_HDFS_FOR_STATS,
+        // sizeInBytes should be equal to dataSize.
+        withSQLConf(SQLConf.ENABLE_FALL_BACK_TO_HDFS_FOR_STATS.key -> "true") {
+          val relation2 = spark.table("t1").queryExecution.analyzed.children.head
+          assert(relation2.stats.sizeInBytes === dataSize)
+        }
+      }
+    }
+  }
+
   test("analyze Hive serde tables") {
     def queryTotalSize(tableName: String): BigInt =
       spark.table(tableName).queryExecution.analyzed.stats.sizeInBytes

From d8f77e11a42bf664a02124a8b6830797979550b4 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 8 Mar 2019 11:54:04 +0800
Subject: [PATCH 0616/1072] [SPARK-27001][SQL][FOLLOWUP] Address primitive
 array type for serializer

## What changes were proposed in this pull request?

This is follow-up PR which addresses review comment in PR for SPARK-27001:
https://github.com/apache/spark/pull/23908#discussion_r261511454

This patch proposes addressing primitive array type for serializer - instead of handling it to generic one, Spark now handles it efficiently as primitive array.

## How was this patch tested?

UT modified to include primitive array.

Closes #24015 from HeartSaVioR/SPARK-27001-FOLLOW-UP-java-primitive-array.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/JavaTypeInference.scala      |  7 +++-
 .../spark/sql/catalyst/ScalaReflection.scala  |  4 +--
 .../sql/JavaBeanDeserializationSuite.java     | 33 ++++++++++++++-----
 .../test-data/with-array-fields.json          |  6 ++--
 4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 933a6dbeb705..39132139237c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -362,7 +362,12 @@ object JavaTypeInference {
     def toCatalystArray(input: Expression, elementType: TypeToken[_]): Expression = {
       val (dataType, nullable) = inferDataType(elementType)
       if (ScalaReflection.isNativeType(dataType)) {
-        createSerializerForGenericArray(input, dataType, nullable = nullable)
+        val cls = input.dataType.asInstanceOf[ObjectType].cls
+        if (cls.isArray && cls.getComponentType.isPrimitive) {
+          createSerializerForPrimitiveArray(input, dataType)
+        } else {
+          createSerializerForGenericArray(input, dataType, nullable = nullable)
+        }
       } else {
         createSerializerForMapObjects(input, ObjectType(elementType.getRawType),
           serializerFor(_, elementType))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 5b3109af6a53..d8d268a77ca1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -394,8 +394,8 @@ object ScalaReflection extends ScalaReflection {
           createSerializerForMapObjects(input, dt,
             serializerFor(_, elementType, newPath, seenTypeSet))
 
-         case dt @ (BooleanType | ByteType | ShortType | IntegerType | LongType |
-                    FloatType | DoubleType) =>
+        case dt @ (BooleanType | ByteType | ShortType | IntegerType | LongType |
+                   FloatType | DoubleType) =>
           val cls = input.dataType.asInstanceOf[ObjectType].cls
           if (cls.isArray && cls.getComponentType.isPrimitive) {
             createSerializerForPrimitiveArray(input, dt)
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
index 49ff522cee8e..f59afef36a5a 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
@@ -47,25 +47,28 @@ public void tearDown() {
 
   static {
     ARRAY_RECORDS.add(
-      new ArrayRecord(1, Arrays.asList(new Interval(111, 211), new Interval(121, 221)))
+      new ArrayRecord(1, Arrays.asList(new Interval(111, 211), new Interval(121, 221)),
+              new int[] { 11, 12, 13, 14 })
     );
     ARRAY_RECORDS.add(
-      new ArrayRecord(2, Arrays.asList(new Interval(112, 212), new Interval(122, 222)))
+      new ArrayRecord(2, Arrays.asList(new Interval(112, 212), new Interval(122, 222)),
+              new int[] { 21, 22, 23, 24 })
     );
     ARRAY_RECORDS.add(
-      new ArrayRecord(3, Arrays.asList(new Interval(113, 213), new Interval(123, 223)))
+      new ArrayRecord(3, Arrays.asList(new Interval(113, 213), new Interval(123, 223)),
+              new int[] { 31, 32, 33, 34 })
     );
   }
 
   @Test
   public void testBeanWithArrayFieldDeserialization() {
-
     Encoder<ArrayRecord> encoder = Encoders.bean(ArrayRecord.class);
 
     Dataset<ArrayRecord> dataset = spark
       .read()
       .format("json")
-      .schema("id int, intervals array<struct<startTime: bigint, endTime: bigint>>")
+      .schema("id int, intervals array<struct<startTime: bigint, endTime: bigint>>, " +
+          "ints array<int>")
       .load("src/test/resources/test-data/with-array-fields.json")
       .as(encoder);
 
@@ -223,12 +226,14 @@ public static class ArrayRecord {
 
     private int id;
     private List<Interval> intervals;
+    private int[] ints;
 
     public ArrayRecord() { }
 
-    ArrayRecord(int id, List<Interval> intervals) {
+    ArrayRecord(int id, List<Interval> intervals, int[] ints) {
       this.id = id;
       this.intervals = intervals;
+      this.ints = ints;
     }
 
     public int getId() {
@@ -247,21 +252,31 @@ public void setIntervals(List<Interval> intervals) {
       this.intervals = intervals;
     }
 
+    public int[] getInts() {
+      return ints;
+    }
+
+    public void setInts(int[] ints) {
+      this.ints = ints;
+    }
+
     @Override
     public int hashCode() {
-      return id ^ Objects.hashCode(intervals);
+      return id ^ Objects.hashCode(intervals) ^ Objects.hashCode(ints);
     }
 
     @Override
     public boolean equals(Object obj) {
       if (!(obj instanceof ArrayRecord)) return false;
       ArrayRecord other = (ArrayRecord) obj;
-      return (other.id == this.id) && Objects.equals(other.intervals, this.intervals);
+      return (other.id == this.id) && Objects.equals(other.intervals, this.intervals) &&
+              Arrays.equals(other.ints, ints);
     }
 
     @Override
     public String toString() {
-      return String.format("{ id: %d, intervals: %s }", id, intervals);
+      return String.format("{ id: %d, intervals: %s, ints: %s }", id, intervals,
+              Arrays.toString(ints));
     }
   }
 
diff --git a/sql/core/src/test/resources/test-data/with-array-fields.json b/sql/core/src/test/resources/test-data/with-array-fields.json
index ff3674af2fbc..09022ec02895 100644
--- a/sql/core/src/test/resources/test-data/with-array-fields.json
+++ b/sql/core/src/test/resources/test-data/with-array-fields.json
@@ -1,3 +1,3 @@
-{ "id": 1, "intervals": [{ "startTime": 111, "endTime": 211 }, { "startTime": 121, "endTime": 221 }]}
-{ "id": 2, "intervals": [{ "startTime": 112, "endTime": 212 }, { "startTime": 122, "endTime": 222 }]}
-{ "id": 3, "intervals": [{ "startTime": 113, "endTime": 213 }, { "startTime": 123, "endTime": 223 }]}
\ No newline at end of file
+{ "id": 1, "intervals": [{ "startTime": 111, "endTime": 211 }, { "startTime": 121, "endTime": 221 }], "ints": [11, 12, 13, 14]}
+{ "id": 2, "intervals": [{ "startTime": 112, "endTime": 212 }, { "startTime": 122, "endTime": 222 }], "ints": [21, 22, 23, 24]}
+{ "id": 3, "intervals": [{ "startTime": 113, "endTime": 213 }, { "startTime": 123, "endTime": 223 }], "ints": [31, 32, 33, 34]}
\ No newline at end of file

From 5ebb4b572318abf1b483b5a8f41aa9b021eb5327 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 8 Mar 2019 14:09:53 +0900
Subject: [PATCH 0617/1072] [SPARK-24783][SQL] spark.sql.shuffle.partitions=0
 should throw exception

## What changes were proposed in this pull request?

Throw an exception if spark.sql.shuffle.partitions=0
This takes over https://github.com/apache/spark/pull/23835

## How was this patch tested?

Existing tests.

Closes #24008 from srowen/SPARK-24783.2.

Lead-authored-by: Sean Owen <sean.owen@databricks.com>
Co-authored-by: WindCanDie <491237260@qq.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/internal/SQLConf.scala      |  1 +
 .../org/apache/spark/sql/internal/SQLConfSuite.scala | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index e6cfd9d9b8f6..2271bacb35bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -277,6 +277,7 @@ object SQLConf {
       "Note: For structured streaming, this configuration cannot be changed between query " +
       "restarts from the same checkpoint location.")
     .intConf
+    .checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be positive")
     .createWithDefault(200)
 
   val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 62cf705e2391..5ecb79bcfcc2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.internal
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.sql.test.{SharedSQLContext, TestSQLContext}
 import org.apache.spark.util.Utils
@@ -310,4 +309,15 @@ class SQLConfSuite extends QueryTest with SharedSQLContext {
     SQLConf.unregister(fallback)
   }
 
+  test("SPARK-24783: spark.sql.shuffle.partitions=0 should throw exception ") {
+    val e = intercept[IllegalArgumentException] {
+      spark.conf.set(SQLConf.SHUFFLE_PARTITIONS.key, 0)
+    }
+    assert(e.getMessage.contains("spark.sql.shuffle.partitions"))
+    val e2 = intercept[IllegalArgumentException] {
+      spark.conf.set(SQLConf.SHUFFLE_PARTITIONS.key, -1)
+    }
+    assert(e2.getMessage.contains("spark.sql.shuffle.partitions"))
+  }
+
 }

From 2036074b996d3cdd6aaace31a79c825797a6997c Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 8 Mar 2019 19:18:32 +0800
Subject: [PATCH 0618/1072] [SPARK-26004][SQL] InMemoryTable support StartsWith
 predicate push down

## What changes were proposed in this pull request?

[SPARK-24638](https://issues.apache.org/jira/browse/SPARK-24638) adds support for Parquet file `StartsWith` predicate push down.
`InMemoryTable` can also support this feature.

This is an example to explain how it works, Imagine that the `id` column stored as below:

Partition ID | lowerBound | upperBound
-- | -- | --
p1 | '1' | '9'
p2 | '10' | '19'
p3 | '20' | '29'
p4 | '30' | '39'
p5 | '40' | '49'

A filter ```df.filter($"id".startsWith("2"))``` or ```id like '2%'```
then we substr lowerBound and upperBound:

Partition ID | lowerBound.substr(0, Length("2")) | upperBound.substr(0, Length("2"))
-- | -- | --
p1 | '1' | '9'
p2 | '1' | '1'
p3 | '2' | '2'
p4 | '3' | '3'
p5 | '4' | '4'

We can see that we only need to read `p1` and `p3`.

## How was this patch tested?

 unit tests and benchmark tests

benchmark test result:
```
================================================================================================
Pushdown benchmark for StringStartsWith
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.12.6
Intel(R) Core(TM) i7-7820HQ CPU  2.90GHz
StringStartsWith filter: (value like '10%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------
InMemoryTable Vectorized                    12068 / 14198          1.3         767.3       1.0X
InMemoryTable Vectorized (Pushdown)           5457 / 8662          2.9         347.0       2.2X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.12.6
Intel(R) Core(TM) i7-7820HQ CPU  2.90GHz
StringStartsWith filter: (value like '1000%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------
InMemoryTable Vectorized                      5246 / 5355          3.0         333.5       1.0X
InMemoryTable Vectorized (Pushdown)           2185 / 2346          7.2         138.9       2.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.12.6
Intel(R) Core(TM) i7-7820HQ CPU  2.90GHz
StringStartsWith filter: (value like '786432%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------
InMemoryTable Vectorized                      5112 / 5312          3.1         325.0       1.0X
InMemoryTable Vectorized (Pushdown)           2292 / 2522          6.9         145.7       2.2X
```

Closes #23004 from wangyum/SPARK-26004.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../columnar/InMemoryTableScanExec.scala      | 30 ++++++++++++++++++-
 .../columnar/PartitionBatchPruningSuite.scala |  9 ++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index 8f8d8010e26b..b827878f82fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -194,7 +194,7 @@ case class InMemoryTableScanExec(
   }
 
   // Returned filter predicate should return false iff it is impossible for the input expression
-  // to evaluate to `true' based on statistics collected about this partition batch.
+  // to evaluate to `true` based on statistics collected about this partition batch.
   @transient lazy val buildFilter: PartialFunction[Expression, Expression] = {
     case And(lhs: Expression, rhs: Expression)
       if buildFilter.isDefinedAt(lhs) || buildFilter.isDefinedAt(rhs) =>
@@ -237,6 +237,34 @@ case class InMemoryTableScanExec(
       if list.forall(ExtractableLiteral.unapply(_).isDefined) && list.nonEmpty =>
       list.map(l => statsFor(a).lowerBound <= l.asInstanceOf[Literal] &&
         l.asInstanceOf[Literal] <= statsFor(a).upperBound).reduce(_ || _)
+
+    // This is an example to explain how it works, imagine that the id column stored as follows:
+    // __________________________________________
+    // | Partition ID | lowerBound | upperBound |
+    // |--------------|------------|------------|
+    // |      p1      |    '1'     |    '9'     |
+    // |      p2      |    '10'    |    '19'    |
+    // |      p3      |    '20'    |    '29'    |
+    // |      p4      |    '30'    |    '39'    |
+    // |      p5      |    '40'    |    '49'    |
+    // |______________|____________|____________|
+    //
+    // A filter: df.filter($"id".startsWith("2")).
+    // In this case it substr lowerBound and upperBound:
+    // ________________________________________________________________________________________
+    // | Partition ID | lowerBound.substr(0, Length("2")) | upperBound.substr(0, Length("2")) |
+    // |--------------|-----------------------------------|-----------------------------------|
+    // |      p1      |    '1'                            |    '9'                            |
+    // |      p2      |    '1'                            |    '1'                            |
+    // |      p3      |    '2'                            |    '2'                            |
+    // |      p4      |    '3'                            |    '3'                            |
+    // |      p5      |    '4'                            |    '4'                            |
+    // |______________|___________________________________|___________________________________|
+    //
+    // We can see that we only need to read p1 and p3.
+    case StartsWith(a: AttributeReference, ExtractableLiteral(l)) =>
+      statsFor(a).lowerBound.substr(0, Length(l)) <= l &&
+        l <= statsFor(a).upperBound.substr(0, Length(l))
   }
 
   lazy val partitionFilters: Seq[Expression] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala
index af493e93b519..b3a5c687f775 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala
@@ -170,6 +170,15 @@ class PartitionBatchPruningSuite
     }
   }
 
+  // Support `StartsWith` predicate
+  checkBatchPruning("SELECT CAST(s AS INT) FROM pruningStringData WHERE s like '18%'", 1, 1)(
+    180 to 189
+  )
+  checkBatchPruning("SELECT CAST(s AS INT) FROM pruningStringData WHERE s like '%'", 5, 11)(
+    100 to 200
+  )
+  checkBatchPruning("SELECT CAST(s AS INT) FROM pruningStringData WHERE '18%' like s", 5, 11)(Seq())
+
   // With disable IN_MEMORY_PARTITION_PRUNING option
   test("disable IN_MEMORY_PARTITION_PRUNING") {
     spark.conf.set(SQLConf.IN_MEMORY_PARTITION_PRUNING.key, false)

From 6170e40c152a9fa9cca32318dc6bcf9d298723b7 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Fri, 8 Mar 2019 19:31:49 +0800
Subject: [PATCH 0619/1072] [SPARK-24252][SQL] Add v2 catalog plugin system

## What changes were proposed in this pull request?

This adds a v2 API for adding new catalog plugins to Spark.

* Catalog implementations extend `CatalogPlugin` and are loaded via reflection, similar to data sources
* `Catalogs` loads and initializes catalogs using configuration from a `SQLConf`
* `CaseInsensitiveStringMap` is used to pass configuration to `CatalogPlugin` via `initialize`

Catalogs are configured by adding config properties starting with `spark.sql.catalog.(name)`. The name property must specify a class that implements `CatalogPlugin`. Other properties under the namespace (`spark.sql.catalog.(name).(prop)`) are passed to the provider during initialization along with the catalog name.

This replaces #21306, which will be implemented in two multiple parts: the catalog plugin system (this commit) and specific catalog APIs, like `TableCatalog`.

## How was this patch tested?

Added test suites for `CaseInsensitiveStringMap` and for catalog loading.

Closes #23915 from rdblue/SPARK-24252-add-v2-catalog-plugins.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalog/v2/CatalogPlugin.java   |  61 +++++
 .../apache/spark/sql/catalog/v2/Catalogs.java | 109 +++++++++
 .../sql/util/CaseInsensitiveStringMap.java    | 110 +++++++++
 .../sql/catalog/v2/CatalogLoadingSuite.java   | 208 ++++++++++++++++++
 .../util/CaseInsensitiveStringMapSuite.java   |  48 ++++
 .../org/apache/spark/sql/SparkSession.scala   |   8 +
 6 files changed, 544 insertions(+)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
 create mode 100644 sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
 create mode 100644 sql/catalyst/src/test/java/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.java

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java
new file mode 100644
index 000000000000..5d4995a05d23
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.internal.SQLConf;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * A marker interface to provide a catalog implementation for Spark.
+ * <p>
+ * Implementations can provide catalog functions by implementing additional interfaces for tables,
+ * views, and functions.
+ * <p>
+ * Catalog implementations must implement this marker interface to be loaded by
+ * {@link Catalogs#load(String, SQLConf)}. The loader will instantiate catalog classes using the
+ * required public no-arg constructor. After creating an instance, it will be configured by calling
+ * {@link #initialize(String, CaseInsensitiveStringMap)}.
+ * <p>
+ * Catalog implementations are registered to a name by adding a configuration option to Spark:
+ * {@code spark.sql.catalog.catalog-name=com.example.YourCatalogClass}. All configuration properties
+ * in the Spark configuration that share the catalog name prefix,
+ * {@code spark.sql.catalog.catalog-name.(key)=(value)} will be passed in the case insensitive
+ * string map of options in initialization with the prefix removed.
+ * {@code name}, is also passed and is the catalog's name; in this case, "catalog-name".
+ */
+@Experimental
+public interface CatalogPlugin {
+  /**
+   * Called to initialize configuration.
+   * <p>
+   * This method is called once, just after the provider is instantiated.
+   *
+   * @param name the name used to identify and load this catalog
+   * @param options a case-insensitive string map of configuration
+   */
+  void initialize(String name, CaseInsensitiveStringMap options);
+
+  /**
+   * Called to get this catalog's name.
+   * <p>
+   * This method is only called after {@link #initialize(String, CaseInsensitiveStringMap)} is
+   * called to pass the catalog's name.
+   */
+  String name();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
new file mode 100644
index 000000000000..efae26636a4b
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2;
+
+import org.apache.spark.SparkException;
+import org.apache.spark.annotation.Private;
+import org.apache.spark.sql.internal.SQLConf;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+import org.apache.spark.util.Utils;
+
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static scala.collection.JavaConverters.mapAsJavaMapConverter;
+
+@Private
+public class Catalogs {
+  private Catalogs() {
+  }
+
+  /**
+   * Load and configure a catalog by name.
+   * <p>
+   * This loads, instantiates, and initializes the catalog plugin for each call; it does not cache
+   * or reuse instances.
+   *
+   * @param name a String catalog name
+   * @param conf a SQLConf
+   * @return an initialized CatalogPlugin
+   * @throws SparkException If the plugin class cannot be found or instantiated
+   */
+  public static CatalogPlugin load(String name, SQLConf conf) throws SparkException {
+    String pluginClassName = conf.getConfString("spark.sql.catalog." + name, null);
+    if (pluginClassName == null) {
+      throw new SparkException(String.format(
+          "Catalog '%s' plugin class not found: spark.sql.catalog.%s is not defined", name, name));
+    }
+
+    ClassLoader loader = Utils.getContextOrSparkClassLoader();
+
+    try {
+      Class<?> pluginClass = loader.loadClass(pluginClassName);
+
+      if (!CatalogPlugin.class.isAssignableFrom(pluginClass)) {
+        throw new SparkException(String.format(
+            "Plugin class for catalog '%s' does not implement CatalogPlugin: %s",
+            name, pluginClassName));
+      }
+
+      CatalogPlugin plugin = CatalogPlugin.class.cast(pluginClass.newInstance());
+
+      plugin.initialize(name, catalogOptions(name, conf));
+
+      return plugin;
+
+    } catch (ClassNotFoundException e) {
+      throw new SparkException(String.format(
+          "Cannot find catalog plugin class for catalog '%s': %s", name, pluginClassName));
+
+    } catch (IllegalAccessException e) {
+      throw new SparkException(String.format(
+          "Failed to call public no-arg constructor for catalog '%s': %s", name, pluginClassName),
+          e);
+
+    } catch (InstantiationException e) {
+      throw new SparkException(String.format(
+          "Failed while instantiating plugin for catalog '%s': %s", name, pluginClassName),
+          e.getCause());
+    }
+  }
+
+  /**
+   * Extracts a named catalog's configuration from a SQLConf.
+   *
+   * @param name a catalog name
+   * @param conf a SQLConf
+   * @return a case insensitive string map of options starting with spark.sql.catalog.(name).
+   */
+  private static CaseInsensitiveStringMap catalogOptions(String name, SQLConf conf) {
+    Map<String, String> allConfs = mapAsJavaMapConverter(conf.getAllConfs()).asJava();
+    Pattern prefix = Pattern.compile("^spark\\.sql\\.catalog\\." + name + "\\.(.+)");
+
+    CaseInsensitiveStringMap options = CaseInsensitiveStringMap.empty();
+    for (Map.Entry<String, String> entry : allConfs.entrySet()) {
+      Matcher matcher = prefix.matcher(entry.getKey());
+      if (matcher.matches() && matcher.groupCount() > 0) {
+        options.put(matcher.group(1), entry.getValue());
+      }
+    }
+
+    return options;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
new file mode 100644
index 000000000000..8c5a6c61d865
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util;
+
+import org.apache.spark.annotation.Experimental;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Case-insensitive map of string keys to string values.
+ * <p>
+ * This is used to pass options to v2 implementations to ensure consistent case insensitivity.
+ * <p>
+ * Methods that return keys in this map, like {@link #entrySet()} and {@link #keySet()}, return
+ * keys converted to lower case.
+ */
+@Experimental
+public class CaseInsensitiveStringMap implements Map<String, String> {
+
+  public static CaseInsensitiveStringMap empty() {
+    return new CaseInsensitiveStringMap();
+  }
+
+  private final Map<String, String> delegate;
+
+  private CaseInsensitiveStringMap() {
+    this.delegate = new HashMap<>();
+  }
+
+  @Override
+  public int size() {
+    return delegate.size();
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return delegate.isEmpty();
+  }
+
+  @Override
+  public boolean containsKey(Object key) {
+    return delegate.containsKey(key.toString().toLowerCase(Locale.ROOT));
+  }
+
+  @Override
+  public boolean containsValue(Object value) {
+    return delegate.containsValue(value);
+  }
+
+  @Override
+  public String get(Object key) {
+    return delegate.get(key.toString().toLowerCase(Locale.ROOT));
+  }
+
+  @Override
+  public String put(String key, String value) {
+    return delegate.put(key.toLowerCase(Locale.ROOT), value);
+  }
+
+  @Override
+  public String remove(Object key) {
+    return delegate.remove(key.toString().toLowerCase(Locale.ROOT));
+  }
+
+  @Override
+  public void putAll(Map<? extends String, ? extends String> m) {
+    for (Map.Entry<? extends String, ? extends String> entry : m.entrySet()) {
+      put(entry.getKey(), entry.getValue());
+    }
+  }
+
+  @Override
+  public void clear() {
+    delegate.clear();
+  }
+
+  @Override
+  public Set<String> keySet() {
+    return delegate.keySet();
+  }
+
+  @Override
+  public Collection<String> values() {
+    return delegate.values();
+  }
+
+  @Override
+  public Set<Map.Entry<String, String>> entrySet() {
+    return delegate.entrySet();
+  }
+}
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
new file mode 100644
index 000000000000..2f55da83e2a4
--- /dev/null
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2;
+
+import org.apache.spark.SparkException;
+import org.apache.spark.sql.internal.SQLConf;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.concurrent.Callable;
+
+public class CatalogLoadingSuite {
+  @Test
+  public void testLoad() throws SparkException {
+    SQLConf conf = new SQLConf();
+    conf.setConfString("spark.sql.catalog.test-name", TestCatalogPlugin.class.getCanonicalName());
+
+    CatalogPlugin plugin = Catalogs.load("test-name", conf);
+    Assert.assertNotNull("Should instantiate a non-null plugin", plugin);
+    Assert.assertEquals("Plugin should have correct implementation",
+        TestCatalogPlugin.class, plugin.getClass());
+
+    TestCatalogPlugin testPlugin = (TestCatalogPlugin) plugin;
+    Assert.assertEquals("Options should contain no keys", 0, testPlugin.options.size());
+    Assert.assertEquals("Catalog should have correct name", "test-name", testPlugin.name());
+  }
+
+  @Test
+  public void testInitializationOptions() throws SparkException {
+    SQLConf conf = new SQLConf();
+    conf.setConfString("spark.sql.catalog.test-name", TestCatalogPlugin.class.getCanonicalName());
+    conf.setConfString("spark.sql.catalog.test-name.name", "not-catalog-name");
+    conf.setConfString("spark.sql.catalog.test-name.kEy", "valUE");
+
+    CatalogPlugin plugin = Catalogs.load("test-name", conf);
+    Assert.assertNotNull("Should instantiate a non-null plugin", plugin);
+    Assert.assertEquals("Plugin should have correct implementation",
+        TestCatalogPlugin.class, plugin.getClass());
+
+    TestCatalogPlugin testPlugin = (TestCatalogPlugin) plugin;
+
+    Assert.assertEquals("Options should contain only two keys", 2, testPlugin.options.size());
+    Assert.assertEquals("Options should contain correct value for name (not overwritten)",
+        "not-catalog-name", testPlugin.options.get("name"));
+    Assert.assertEquals("Options should contain correct value for key",
+        "valUE", testPlugin.options.get("key"));
+  }
+
+  @Test
+  public void testLoadWithoutConfig() {
+    SQLConf conf = new SQLConf();
+
+    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("missing", conf));
+
+    Assert.assertTrue("Should complain that implementation is not configured",
+        exc.getMessage()
+            .contains("plugin class not found: spark.sql.catalog.missing is not defined"));
+    Assert.assertTrue("Should identify the catalog by name",
+        exc.getMessage().contains("missing"));
+  }
+
+  @Test
+  public void testLoadMissingClass() {
+    SQLConf conf = new SQLConf();
+    conf.setConfString("spark.sql.catalog.missing", "com.example.NoSuchCatalogPlugin");
+
+    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("missing", conf));
+
+    Assert.assertTrue("Should complain that the class is not found",
+        exc.getMessage().contains("Cannot find catalog plugin class"));
+    Assert.assertTrue("Should identify the catalog by name",
+        exc.getMessage().contains("missing"));
+    Assert.assertTrue("Should identify the missing class",
+        exc.getMessage().contains("com.example.NoSuchCatalogPlugin"));
+  }
+
+  @Test
+  public void testLoadNonCatalogPlugin() {
+    SQLConf conf = new SQLConf();
+    String invalidClassName = InvalidCatalogPlugin.class.getCanonicalName();
+    conf.setConfString("spark.sql.catalog.invalid", invalidClassName);
+
+    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("invalid", conf));
+
+    Assert.assertTrue("Should complain that class does not implement CatalogPlugin",
+        exc.getMessage().contains("does not implement CatalogPlugin"));
+    Assert.assertTrue("Should identify the catalog by name",
+        exc.getMessage().contains("invalid"));
+    Assert.assertTrue("Should identify the class",
+        exc.getMessage().contains(invalidClassName));
+  }
+
+  @Test
+  public void testLoadConstructorFailureCatalogPlugin() {
+    SQLConf conf = new SQLConf();
+    String invalidClassName = ConstructorFailureCatalogPlugin.class.getCanonicalName();
+    conf.setConfString("spark.sql.catalog.invalid", invalidClassName);
+
+    RuntimeException exc = intercept(RuntimeException.class, () -> Catalogs.load("invalid", conf));
+
+    Assert.assertTrue("Should have expected error message",
+        exc.getMessage().contains("Expected failure"));
+  }
+
+  @Test
+  public void testLoadAccessErrorCatalogPlugin() {
+    SQLConf conf = new SQLConf();
+    String invalidClassName = AccessErrorCatalogPlugin.class.getCanonicalName();
+    conf.setConfString("spark.sql.catalog.invalid", invalidClassName);
+
+    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("invalid", conf));
+
+    Assert.assertTrue("Should complain that no public constructor is provided",
+        exc.getMessage().contains("Failed to call public no-arg constructor for catalog"));
+    Assert.assertTrue("Should identify the catalog by name",
+        exc.getMessage().contains("invalid"));
+    Assert.assertTrue("Should identify the class",
+        exc.getMessage().contains(invalidClassName));
+  }
+
+  @SuppressWarnings("unchecked")
+  public static <E extends Exception> E intercept(Class<E> expected, Callable<?> callable) {
+    try {
+      callable.call();
+      Assert.fail("No exception was thrown, expected: " +
+          expected.getName());
+    } catch (Exception actual) {
+      try {
+        Assert.assertEquals(expected, actual.getClass());
+        return (E) actual;
+      } catch (AssertionError e) {
+        e.addSuppressed(actual);
+        throw e;
+      }
+    }
+    // Compiler doesn't catch that Assert.fail will always throw an exception.
+    throw new UnsupportedOperationException("[BUG] Should not reach this statement");
+  }
+}
+
+class TestCatalogPlugin implements CatalogPlugin {
+  String name = null;
+  CaseInsensitiveStringMap options = null;
+
+  TestCatalogPlugin() {
+  }
+
+  @Override
+  public void initialize(String name, CaseInsensitiveStringMap options) {
+    this.name = name;
+    this.options = options;
+  }
+
+  @Override
+  public String name() {
+    return name;
+  }
+}
+
+class ConstructorFailureCatalogPlugin implements CatalogPlugin { // fails in its constructor
+  ConstructorFailureCatalogPlugin() {
+    throw new RuntimeException("Expected failure.");
+  }
+
+  @Override
+  public void initialize(String name, CaseInsensitiveStringMap options) {
+  }
+
+  @Override
+  public String name() {
+    return null;
+  }
+}
+
+class AccessErrorCatalogPlugin implements CatalogPlugin { // no public constructor
+  private AccessErrorCatalogPlugin() {
+  }
+
+  @Override
+  public void initialize(String name, CaseInsensitiveStringMap options) {
+  }
+
+  @Override
+  public String name() {
+    return null;
+  }
+}
+
+class InvalidCatalogPlugin { // doesn't implement CatalogPlugin
+  public void initialize(CaseInsensitiveStringMap options) {
+  }
+}
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.java
new file mode 100644
index 000000000000..76392777d42a
--- /dev/null
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class CaseInsensitiveStringMapSuite {
+  @Test
+  public void testPutAndGet() {
+    CaseInsensitiveStringMap options = CaseInsensitiveStringMap.empty();
+    options.put("kEy", "valUE");
+
+    Assert.assertEquals("Should return correct value for lower-case key",
+        "valUE", options.get("key"));
+    Assert.assertEquals("Should return correct value for upper-case key",
+        "valUE", options.get("KEY"));
+  }
+
+  @Test
+  public void testKeySet() {
+    CaseInsensitiveStringMap options = CaseInsensitiveStringMap.empty();
+    options.put("kEy", "valUE");
+
+    Set<String> expectedKeySet = new HashSet<>();
+    expectedKeySet.add("key");
+
+    Assert.assertEquals("Should return lower-case key set", expectedKeySet, options.keySet());
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index f6fab76b5301..5208b9a545cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit._
 import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
@@ -32,6 +33,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
 import org.apache.spark.sql.catalog.Catalog
+import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Catalogs}
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.encoders._
@@ -620,6 +622,12 @@ class SparkSession private(
    */
   @transient lazy val catalog: Catalog = new CatalogImpl(self)
 
+  @transient private lazy val catalogs = new mutable.HashMap[String, CatalogPlugin]()
+
+  private[sql] def catalog(name: String): CatalogPlugin = synchronized {
+    catalogs.getOrElseUpdate(name, Catalogs.load(name, sessionState.conf))
+  }
+
   /**
    * Returns the specified table/view as a `DataFrame`.
    *

From d3d9c7bb0a5397c88e601703ba2b69fc4b92b0a5 Mon Sep 17 00:00:00 2001
From: "wangguangxin.cn" <wangguangxin.cn@gmail.com>
Date: Fri, 8 Mar 2019 12:14:04 -0600
Subject: [PATCH 0620/1072] [SPARK-27079][MINOR][SQL] Fix typo & Remove useless
 imports & Add missing `override` annotation

## What changes were proposed in this pull request?

1. Fix two typos
2. Remove useless imports in `CSVExprUtils.scala`
3. Add missing `override` annotation

## How was this patch tested?

test by existing uts

Closes #24000 from WangGuangxin/SPARK-27079.

Authored-by: wangguangxin.cn <wangguangxin.cn@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/catalyst/expressions/UnsafeArrayData.java    |  8 ++++++++
 .../expressions/codegen/UnsafeArrayWriter.java       | 12 ++++++++++++
 .../expressions/codegen/UnsafeRowWriter.java         |  8 ++++++++
 .../apache/spark/sql/catalyst/csv/CSVExprUtils.scala |  4 ----
 .../sql/catalyst/encoders/ExpressionEncoder.scala    |  2 +-
 .../catalyst/optimizer/ReplaceOperatorSuite.scala    |  2 +-
 6 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index d5f679fe23d4..a07d6deaaaa6 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -300,6 +300,7 @@ public UnsafeMapData getMap(int ordinal) {
   @Override
   public void update(int ordinal, Object value) { throw new UnsupportedOperationException(); }
 
+  @Override
   public void setNullAt(int ordinal) {
     assertIndexIsValid(ordinal);
     BitSetMethods.set(baseObject, baseOffset + 8, ordinal);
@@ -308,36 +309,43 @@ public void setNullAt(int ordinal) {
        will be set to 0 later by the caller side */
   }
 
+  @Override
   public void setBoolean(int ordinal, boolean value) {
     assertIndexIsValid(ordinal);
     Platform.putBoolean(baseObject, getElementOffset(ordinal, 1), value);
   }
 
+  @Override
   public void setByte(int ordinal, byte value) {
     assertIndexIsValid(ordinal);
     Platform.putByte(baseObject, getElementOffset(ordinal, 1), value);
   }
 
+  @Override
   public void setShort(int ordinal, short value) {
     assertIndexIsValid(ordinal);
     Platform.putShort(baseObject, getElementOffset(ordinal, 2), value);
   }
 
+  @Override
   public void setInt(int ordinal, int value) {
     assertIndexIsValid(ordinal);
     Platform.putInt(baseObject, getElementOffset(ordinal, 4), value);
   }
 
+  @Override
   public void setLong(int ordinal, long value) {
     assertIndexIsValid(ordinal);
     Platform.putLong(baseObject, getElementOffset(ordinal, 8), value);
   }
 
+  @Override
   public void setFloat(int ordinal, float value) {
     assertIndexIsValid(ordinal);
     Platform.putFloat(baseObject, getElementOffset(ordinal, 4), value);
   }
 
+  @Override
   public void setDouble(int ordinal, double value) {
     assertIndexIsValid(ordinal);
     Platform.putDouble(baseObject, getElementOffset(ordinal, 8), value);
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
index 997eecd839d8..bf6792313ae2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
@@ -82,24 +82,28 @@ private void setNullBit(int ordinal) {
     BitSetMethods.set(getBuffer(), startingOffset + 8, ordinal);
   }
 
+  @Override
   public void setNull1Bytes(int ordinal) {
     setNullBit(ordinal);
     // put zero into the corresponding field when set null
     writeByte(getElementOffset(ordinal), (byte)0);
   }
 
+  @Override
   public void setNull2Bytes(int ordinal) {
     setNullBit(ordinal);
     // put zero into the corresponding field when set null
     writeShort(getElementOffset(ordinal), (short)0);
   }
 
+  @Override
   public void setNull4Bytes(int ordinal) {
     setNullBit(ordinal);
     // put zero into the corresponding field when set null
     writeInt(getElementOffset(ordinal), 0);
   }
 
+  @Override
   public void setNull8Bytes(int ordinal) {
     setNullBit(ordinal);
     // put zero into the corresponding field when set null
@@ -108,41 +112,49 @@ public void setNull8Bytes(int ordinal) {
 
   public void setNull(int ordinal) { setNull8Bytes(ordinal); }
 
+  @Override
   public void write(int ordinal, boolean value) {
     assertIndexIsValid(ordinal);
     writeBoolean(getElementOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, byte value) {
     assertIndexIsValid(ordinal);
     writeByte(getElementOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, short value) {
     assertIndexIsValid(ordinal);
     writeShort(getElementOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, int value) {
     assertIndexIsValid(ordinal);
     writeInt(getElementOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, long value) {
     assertIndexIsValid(ordinal);
     writeLong(getElementOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, float value) {
     assertIndexIsValid(ordinal);
     writeFloat(getElementOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, double value) {
     assertIndexIsValid(ordinal);
     writeDouble(getElementOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, Decimal input, int precision, int scale) {
     // make sure Decimal object has the same scale as DecimalType
     assertIndexIsValid(ordinal);
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java
index d2298aa26364..582882374e18 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriter.java
@@ -135,44 +135,52 @@ public long getFieldOffset(int ordinal) {
     return startingOffset + nullBitsSize + 8L * ordinal;
   }
 
+  @Override
   public void write(int ordinal, boolean value) {
     final long offset = getFieldOffset(ordinal);
     writeLong(offset, 0L);
     writeBoolean(offset, value);
   }
 
+  @Override
   public void write(int ordinal, byte value) {
     final long offset = getFieldOffset(ordinal);
     writeLong(offset, 0L);
     writeByte(offset, value);
   }
 
+  @Override
   public void write(int ordinal, short value) {
     final long offset = getFieldOffset(ordinal);
     writeLong(offset, 0L);
     writeShort(offset, value);
   }
 
+  @Override
   public void write(int ordinal, int value) {
     final long offset = getFieldOffset(ordinal);
     writeLong(offset, 0L);
     writeInt(offset, value);
   }
 
+  @Override
   public void write(int ordinal, long value) {
     writeLong(getFieldOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, float value) {
     final long offset = getFieldOffset(ordinal);
     writeLong(offset, 0);
     writeFloat(offset, value);
   }
 
+  @Override
   public void write(int ordinal, double value) {
     writeDouble(getFieldOffset(ordinal), value);
   }
 
+  @Override
   public void write(int ordinal, Decimal input, int precision, int scale) {
     if (precision <= Decimal.MAX_LONG_DIGITS()) {
       // make sure Decimal object has the same scale as DecimalType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
index 6c982a1de9a4..bbe27831f01d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
@@ -17,10 +17,6 @@
 
 package org.apache.spark.sql.catalyst.csv
 
-import java.math.BigDecimal
-import java.text.{DecimalFormat, DecimalFormatSymbols, ParsePosition}
-import java.util.Locale
-
 object CSVExprUtils {
   /**
    * Filter ignorable rows for CSV iterator (lines empty and starting with `comment`).
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index abffda7127b0..bcc489656975 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -242,7 +242,7 @@ case class ExpressionEncoder[T](
   }
 
   // The schema after converting `T` to a Spark SQL row. This schema is dependent on the given
-  // serialier.
+  // serializer.
   val schema: StructType = StructType(serializer.map { s =>
     StructField(s.name, s.dataType, s.nullable)
   })
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
index 6d1af12e68b2..9bf864f5201f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
@@ -240,7 +240,7 @@ class ReplaceOperatorSuite extends PlanTest {
     comparePlans(result, correctAnswer)
   }
 
-  test("SPARK-26366: ReplaceExceptWithFilter should not transform non-detrministic") {
+  test("SPARK-26366: ReplaceExceptWithFilter should not transform non-deterministic") {
     val basePlan = LocalRelation(Seq('a.int, 'b.int))
     val otherPlan = basePlan.where('a > rand(1L))
     val except = Except(basePlan, otherPlan, false)

From 14b13127276012163b3db6eebb5e6e8708efc50e Mon Sep 17 00:00:00 2001
From: SongYadong <song.yadong1@zte.com.cn>
Date: Fri, 8 Mar 2019 10:51:39 -0800
Subject: [PATCH 0621/1072] [SPARK-27103][SQL][MINOR] List SparkSql reserved
 keywords in alphabet order

## What changes were proposed in this pull request?

This PR tries to correct spark-sql reserved keywords' position in list if they are not in alphabetical order.
In test suite some repeated words are removed. Also some comments are added for remind.

## How was this patch tested?

Existing unit tests.

Closes #23985 from SongYadong/sql_reserved_alphabet.

Authored-by: SongYadong <song.yadong1@zte.com.cn>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      | 501 ++++++++++++++++--
 .../parser/TableIdentifierParserSuite.scala   | 288 ++++++++--
 2 files changed, 718 insertions(+), 71 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 46fff8077e68..d2164ec4cf1d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -765,62 +765,485 @@ number
 //
 // Let's say you add a new token `NEWTOKEN` and this is not reserved regardless of a `spark.sql.parser.ansi.enabled`
 // value. In this case, you must add a token `NEWTOKEN` in both `ansiNonReserved` and `nonReserved`.
+//
+// It is recommended to list them in alphabetical order.
 
 // The list of the reserved keywords when `spark.sql.parser.ansi.enabled` is true. Currently, we only reserve
 // the ANSI keywords that almost all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011,
 // and SQL-2016) and PostgreSQL reserve.
 ansiReserved
-    : ALL | AND | ANTI | ANY | AS | AUTHORIZATION | BOTH | CASE | CAST | CHECK | COLLATE | COLUMN | CONSTRAINT | CREATE
-    | CROSS | CURRENT_DATE | CURRENT_TIME | CURRENT_TIMESTAMP | CURRENT_USER | DISTINCT | ELSE | END | EXCEPT | FALSE
-    | FETCH | FOR | FOREIGN | FROM | FULL | GRANT | GROUP | HAVING | IN | INNER | INTERSECT | INTO | JOIN | IS
-    | LEADING | LEFT | NATURAL | NOT | NULL | ON | ONLY | OR | ORDER | OUTER | OVERLAPS | PRIMARY | REFERENCES | RIGHT
-    | SELECT | SEMI | SESSION_USER | SETMINUS | SOME | TABLE | THEN | TO | TRAILING | UNION | UNIQUE | USER | USING
-    | WHEN | WHERE | WITH
+    : ALL
+    | AND
+    | ANTI
+    | ANY
+    | AS
+    | AUTHORIZATION
+    | BOTH
+    | CASE
+    | CAST
+    | CHECK
+    | COLLATE
+    | COLUMN
+    | CONSTRAINT
+    | CREATE
+    | CROSS
+    | CURRENT_DATE
+    | CURRENT_TIME
+    | CURRENT_TIMESTAMP
+    | CURRENT_USER
+    | DISTINCT
+    | ELSE
+    | END
+    | EXCEPT
+    | FALSE
+    | FETCH
+    | FOR
+    | FOREIGN
+    | FROM
+    | FULL
+    | GRANT
+    | GROUP
+    | HAVING
+    | IN
+    | INNER
+    | INTERSECT
+    | INTO
+    | IS
+    | JOIN
+    | LEADING
+    | LEFT
+    | NATURAL
+    | NOT
+    | NULL
+    | ON
+    | ONLY
+    | OR
+    | ORDER
+    | OUTER
+    | OVERLAPS
+    | PRIMARY
+    | REFERENCES
+    | RIGHT
+    | SELECT
+    | SEMI
+    | SESSION_USER
+    | SETMINUS
+    | SOME
+    | TABLE
+    | THEN
+    | TO
+    | TRAILING
+    | UNION
+    | UNIQUE
+    | USER
+    | USING
+    | WHEN
+    | WHERE
+    | WITH
     ;
 
 
 // The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` is true.
 ansiNonReserved
-    : ADD | AFTER | ALTER | ANALYZE | ARCHIVE | ARRAY | ASC | AT | BETWEEN | BUCKET | BUCKETS | BY | CACHE | CASCADE
-    | CHANGE | CLEAR | CLUSTER | CLUSTERED | CODEGEN | COLLECTION | COLUMNS | COMMENT | COMMIT | COMPACT | COMPACTIONS
-    | COMPUTE | CONCATENATE | COST | CUBE | CURRENT | DATA | DATABASE | DATABASES | DBPROPERTIES | DEFINED | DELETE
-    | DELIMITED | DESC | DESCRIBE | DFS | DIRECTORIES | DIRECTORY | DISTRIBUTE | DIV | DROP | ESCAPED | EXCHANGE
-    | EXISTS | EXPLAIN | EXPORT | EXTENDED | EXTERNAL | EXTRACT | FIELDS | FILEFORMAT | FIRST | FOLLOWING | FORMAT
-    | FORMATTED | FUNCTION | FUNCTIONS | GLOBAL | GROUPING | IF | IGNORE | IMPORT | INDEX | INDEXES | INPATH
-    | INPUTFORMAT | INSERT | INTERVAL | ITEMS | KEYS | LAST | LATERAL | LAZY | LIKE | LIMIT | LINES | LIST | LOAD
-    | LOCAL | LOCATION | LOCK | LOCKS | LOGICAL | MACRO | MAP | MSCK | NO | NULLS | OF | OPTION | OPTIONS | OUT
-    | OUTPUTFORMAT | OVER | OVERWRITE | PARTITION | PARTITIONED | PARTITIONS | PERCENT | PERCENTLIT | PIVOT | PRECEDING
-    | PRINCIPALS | PURGE | QUERY | RANGE | RECORDREADER | RECORDWRITER | RECOVER | REDUCE | REFRESH | RENAME | REPAIR | REPLACE
-    | RESET | RESTRICT | REVOKE | RLIKE | ROLE | ROLES | ROLLBACK | ROLLUP | ROW | ROWS | SCHEMA | SEPARATED | SERDE
-    | SERDEPROPERTIES | SET | SETS | SHOW | SKEWED | SORT | SORTED | START | STATISTICS | STORED | STRATIFY | STRUCT
-    | TABLES | TABLESAMPLE | TBLPROPERTIES | TEMPORARY | TERMINATED | TOUCH | TRANSACTION | TRANSACTIONS | TRANSFORM
-    | TRUE | TRUNCATE | UNARCHIVE | UNBOUNDED | UNCACHE | UNLOCK | UNSET | USE | VALUES | VIEW | WINDOW
+    : ADD
+    | AFTER
+    | ALTER
+    | ANALYZE
+    | ARCHIVE
+    | ARRAY
+    | ASC
+    | AT
+    | BETWEEN
+    | BUCKET
+    | BUCKETS
+    | BY
+    | CACHE
+    | CASCADE
+    | CHANGE
+    | CLEAR
+    | CLUSTER
+    | CLUSTERED
+    | CODEGEN
+    | COLLECTION
+    | COLUMNS
+    | COMMENT
+    | COMMIT
+    | COMPACT
+    | COMPACTIONS
+    | COMPUTE
+    | CONCATENATE
+    | COST
+    | CUBE
+    | CURRENT
+    | DATA
+    | DATABASE
+    | DATABASES
+    | DBPROPERTIES
+    | DEFINED
+    | DELETE
+    | DELIMITED
+    | DESC
+    | DESCRIBE
+    | DFS
+    | DIRECTORIES
+    | DIRECTORY
+    | DISTRIBUTE
+    | DIV
+    | DROP
+    | ESCAPED
+    | EXCHANGE
+    | EXISTS
+    | EXPLAIN
+    | EXPORT
+    | EXTENDED
+    | EXTERNAL
+    | EXTRACT
+    | FIELDS
+    | FILEFORMAT
+    | FIRST
+    | FOLLOWING
+    | FORMAT
+    | FORMATTED
+    | FUNCTION
+    | FUNCTIONS
+    | GLOBAL
+    | GROUPING
+    | IF
+    | IGNORE
+    | IMPORT
+    | INDEX
+    | INDEXES
+    | INPATH
+    | INPUTFORMAT
+    | INSERT
+    | INTERVAL
+    | ITEMS
+    | KEYS
+    | LAST
+    | LATERAL
+    | LAZY
+    | LIKE
+    | LIMIT
+    | LINES
+    | LIST
+    | LOAD
+    | LOCAL
+    | LOCATION
+    | LOCK
+    | LOCKS
+    | LOGICAL
+    | MACRO
+    | MAP
+    | MSCK
+    | NO
+    | NULLS
+    | OF
+    | OPTION
+    | OPTIONS
+    | OUT
+    | OUTPUTFORMAT
+    | OVER
+    | OVERWRITE
+    | PARTITION
+    | PARTITIONED
+    | PARTITIONS
+    | PERCENT
+    | PERCENTLIT
+    | PIVOT
+    | PRECEDING
+    | PRINCIPALS
+    | PURGE
+    | QUERY
+    | RANGE
+    | RECORDREADER
+    | RECORDWRITER
+    | RECOVER
+    | REDUCE
+    | REFRESH
+    | RENAME
+    | REPAIR
+    | REPLACE
+    | RESET
+    | RESTRICT
+    | REVOKE
+    | RLIKE
+    | ROLE
+    | ROLES
+    | ROLLBACK
+    | ROLLUP
+    | ROW
+    | ROWS
+    | SCHEMA
+    | SEPARATED
+    | SERDE
+    | SERDEPROPERTIES
+    | SET
+    | SETS
+    | SHOW
+    | SKEWED
+    | SORT
+    | SORTED
+    | START
+    | STATISTICS
+    | STORED
+    | STRATIFY
+    | STRUCT
+    | TABLES
+    | TABLESAMPLE
+    | TBLPROPERTIES
+    | TEMPORARY
+    | TERMINATED
+    | TOUCH
+    | TRANSACTION
+    | TRANSACTIONS
+    | TRANSFORM
+    | TRUE
+    | TRUNCATE
+    | UNARCHIVE
+    | UNBOUNDED
+    | UNCACHE
+    | UNLOCK
+    | UNSET
+    | USE
+    | VALUES
+    | VIEW
+    | WINDOW
     ;
 
 defaultReserved
-    : ANTI | CROSS | EXCEPT | FULL | INNER | INTERSECT | JOIN | LEFT | NATURAL | ON | RIGHT | SEMI | SETMINUS | UNION
+    : ANTI
+    | CROSS
+    | EXCEPT
+    | FULL
+    | INNER
+    | INTERSECT
+    | JOIN
+    | LEFT
+    | NATURAL
+    | ON
+    | RIGHT
+    | SEMI
+    | SETMINUS
+    | UNION
     | USING
     ;
 
 nonReserved
-    : ADD | AFTER | ALL | ALTER | ANALYZE | AND | ANY | ARCHIVE | ARRAY | AS | ASC | AT | AUTHORIZATION | BETWEEN
-    | BOTH | BUCKET | BUCKETS | BY | CACHE | CASCADE | CASE | CAST | CHANGE | CHECK | CLEAR | CLUSTER | CLUSTERED
-    | CODEGEN | COLLATE | COLLECTION | COLUMN | COLUMNS | COMMENT | COMMIT | COMPACT | COMPACTIONS | COMPUTE
-    | CONCATENATE | CONSTRAINT | COST | CREATE | CUBE | CURRENT | CURRENT_DATE | CURRENT_TIME | CURRENT_TIMESTAMP
-    | CURRENT_USER | DATA | DATABASE | DATABASES | DBPROPERTIES | DEFINED | DELETE | DELIMITED | DESC | DESCRIBE | DFS
-    | DIRECTORIES | DIRECTORY | DISTINCT | DISTRIBUTE | DIV | DROP | ELSE | END | ESCAPED | EXCHANGE | EXISTS | EXPLAIN
-    | EXPORT | EXTENDED | EXTERNAL | EXTRACT | FALSE | FETCH | FIELDS | FILEFORMAT | FIRST | FOLLOWING | FOR | FOREIGN
-    | FORMAT | FORMATTED | FROM | FUNCTION | FUNCTIONS | GLOBAL | GRANT | GROUP | GROUPING | HAVING | IF | IGNORE
-    | IMPORT | IN | INDEX | INDEXES | INPATH | INPUTFORMAT | INSERT | INTERVAL | INTO | IS | ITEMS | KEYS | LAST
-    | LATERAL | LAZY | LEADING | LIKE | LIMIT | LINES | LIST | LOAD | LOCAL | LOCATION | LOCK | LOCKS | LOGICAL | MACRO
-    | MAP | MSCK | NO | NOT | NULL | NULLS | OF | ONLY | OPTION | OPTIONS | OR | ORDER | OUT | OUTER | OUTPUTFORMAT
-    | OVER | OVERLAPS | OVERWRITE | PARTITION | PARTITIONED | PARTITIONS | PERCENTLIT | PIVOT | POSITION | PRECEDING
-    | PRIMARY | PRINCIPALS | PURGE | QUERY | RANGE | RECORDREADER | RECORDWRITER | RECOVER | REDUCE | REFERENCES | REFRESH
-    | RENAME | REPAIR | REPLACE | RESET | RESTRICT | REVOKE | RLIKE | ROLE | ROLES | ROLLBACK | ROLLUP | ROW | ROWS
-    | SELECT | SEPARATED | SERDE | SERDEPROPERTIES | SESSION_USER | SET | SETS | SHOW | SKEWED | SOME | SORT | SORTED
-    | START | STATISTICS | STORED | STRATIFY | STRUCT | TABLE | TABLES | TABLESAMPLE | TBLPROPERTIES | TEMPORARY
-    | TERMINATED | THEN | TO | TOUCH | TRAILING | TRANSACTION | TRANSACTIONS | TRANSFORM | TRUE | TRUNCATE | UNARCHIVE
-    | UNBOUNDED | UNCACHE | UNLOCK | UNIQUE | UNSET | USE | USER | VALUES | VIEW | WHEN | WHERE | WINDOW | WITH
+    : ADD
+    | AFTER
+    | ALL
+    | ALTER
+    | ANALYZE
+    | AND
+    | ANY
+    | ARCHIVE
+    | ARRAY
+    | AS
+    | ASC
+    | AT
+    | AUTHORIZATION
+    | BETWEEN
+    | BOTH
+    | BUCKET
+    | BUCKETS
+    | BY
+    | CACHE
+    | CASCADE
+    | CASE
+    | CAST
+    | CHANGE
+    | CHECK
+    | CLEAR
+    | CLUSTER
+    | CLUSTERED
+    | CODEGEN
+    | COLLATE
+    | COLLECTION
+    | COLUMN
+    | COLUMNS
+    | COMMENT
+    | COMMIT
+    | COMPACT
+    | COMPACTIONS
+    | COMPUTE
+    | CONCATENATE
+    | CONSTRAINT
+    | COST
+    | CREATE
+    | CUBE
+    | CURRENT
+    | CURRENT_DATE
+    | CURRENT_TIME
+    | CURRENT_TIMESTAMP
+    | CURRENT_USER
+    | DATA
+    | DATABASE
+    | DATABASES
+    | DBPROPERTIES
+    | DEFINED
+    | DELETE
+    | DELIMITED
+    | DESC
+    | DESCRIBE
+    | DFS
+    | DIRECTORIES
+    | DIRECTORY
+    | DISTINCT
+    | DISTRIBUTE
+    | DIV
+    | DROP
+    | ELSE
+    | END
+    | ESCAPED
+    | EXCHANGE
+    | EXISTS
+    | EXPLAIN
+    | EXPORT
+    | EXTENDED
+    | EXTERNAL
+    | EXTRACT
+    | FALSE
+    | FETCH
+    | FIELDS
+    | FILEFORMAT
+    | FIRST
+    | FOLLOWING
+    | FOR
+    | FOREIGN
+    | FORMAT
+    | FORMATTED
+    | FROM
+    | FUNCTION
+    | FUNCTIONS
+    | GLOBAL
+    | GRANT
+    | GROUP
+    | GROUPING
+    | HAVING
+    | IF
+    | IGNORE
+    | IMPORT
+    | IN
+    | INDEX
+    | INDEXES
+    | INPATH
+    | INPUTFORMAT
+    | INSERT
+    | INTERVAL
+    | INTO
+    | IS
+    | ITEMS
+    | KEYS
+    | LAST
+    | LATERAL
+    | LAZY
+    | LEADING
+    | LIKE
+    | LIMIT
+    | LINES
+    | LIST
+    | LOAD
+    | LOCAL
+    | LOCATION
+    | LOCK
+    | LOCKS
+    | LOGICAL
+    | MACRO
+    | MAP
+    | MSCK
+    | NO
+    | NOT
+    | NULL
+    | NULLS
+    | OF
+    | ONLY
+    | OPTION
+    | OPTIONS
+    | OR
+    | ORDER
+    | OUT
+    | OUTER
+    | OUTPUTFORMAT
+    | OVER
+    | OVERLAPS
+    | OVERWRITE
+    | PARTITION
+    | PARTITIONED
+    | PARTITIONS
+    | PERCENTLIT
+    | PIVOT
+    | POSITION
+    | PRECEDING
+    | PRIMARY
+    | PRINCIPALS
+    | PURGE
+    | QUERY
+    | RANGE
+    | RECORDREADER
+    | RECORDWRITER
+    | RECOVER
+    | REDUCE
+    | REFERENCES
+    | REFRESH
+    | RENAME
+    | REPAIR
+    | REPLACE
+    | RESET
+    | RESTRICT
+    | REVOKE
+    | RLIKE
+    | ROLE
+    | ROLES
+    | ROLLBACK
+    | ROLLUP
+    | ROW
+    | ROWS
+    | SELECT
+    | SEPARATED
+    | SERDE
+    | SERDEPROPERTIES
+    | SESSION_USER
+    | SET
+    | SETS
+    | SHOW
+    | SKEWED
+    | SOME
+    | SORT
+    | SORTED
+    | START
+    | STATISTICS
+    | STORED
+    | STRATIFY
+    | STRUCT
+    | TABLE
+    | TABLES
+    | TABLESAMPLE
+    | TBLPROPERTIES
+    | TEMPORARY
+    | TERMINATED
+    | THEN
+    | TO
+    | TOUCH
+    | TRAILING
+    | TRANSACTION
+    | TRANSACTIONS
+    | TRANSFORM
+    | TRUE
+    | TRUNCATE
+    | UNARCHIVE
+    | UNBOUNDED
+    | UNCACHE
+    | UNIQUE
+    | UNLOCK
+    | UNSET
+    | USE
+    | USER
+    | VALUES
+    | VIEW
+    | WHEN
+    | WHERE
+    | WINDOW
+    | WITH
     ;
 
 SELECT: 'SELECT';
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 489b7f328f8f..3d41c27f217d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -23,39 +23,263 @@ class TableIdentifierParserSuite extends SparkFunSuite {
   import CatalystSqlParser._
 
   // Add "$elem$", "$value$" & "$key$"
-  val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
-    "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
-    "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "cost",
-    "data", "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
-    "dependency", "desc", "directories", "directory", "disable", "distribute",
-    "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
-    "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
-    "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
-    "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
-    "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
-    "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
-    "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
-    "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
-    "replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
-    "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
-    "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
-    "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
-    "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
-    "view", "while", "year", "work", "transaction", "write", "isolation", "level", "snapshot",
-    "autocommit", "all", "any", "alter", "array", "as", "authorization", "between", "bigint",
-    "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
-    "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
-    "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
-    "insert", "int", "into", "is", "pivot", "lateral", "like", "local", "none", "null",
-    "of", "order", "out", "outer", "partition", "percent", "procedure", "query", "range", "reads",
-    "revoke", "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
-    "true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
-    "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
-    "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", "extract")
+  // It is recommended to list them in alphabetical order.
+  val hiveNonReservedKeyword = Array(
+    "add",
+    "admin",
+    "after",
+    "all",
+    "alter",
+    "analyze",
+    "any",
+    "archive",
+    "array",
+    "as",
+    "asc",
+    "at",
+    "authorization",
+    "autocommit",
+    "before",
+    "between",
+    "bigint",
+    "binary",
+    "boolean",
+    "both",
+    "bucket",
+    "buckets",
+    "by",
+    "cascade",
+    "change",
+    "cluster",
+    "clustered",
+    "clusterstatus",
+    "collection",
+    "columns",
+    "comment",
+    "compact",
+    "compactions",
+    "compute",
+    "concatenate",
+    "continue",
+    "cost",
+    "create",
+    "cube",
+    "current_date",
+    "current_timestamp",
+    "cursor",
+    "data",
+    "databases",
+    "date",
+    "datetime",
+    "day",
+    "dbproperties",
+    "decimal",
+    "deferred",
+    "defined",
+    "delete",
+    "delimited",
+    "dependency",
+    "desc",
+    "describe",
+    "directories",
+    "directory",
+    "disable",
+    "distribute",
+    "double",
+    "drop",
+    "enable",
+    "escaped",
+    "exclusive",
+    "exists",
+    "explain",
+    "export",
+    "external",
+    "extract",
+    "false",
+    "fetch",
+    "fields",
+    "file",
+    "fileformat",
+    "first",
+    "float",
+    "for",
+    "format",
+    "formatted",
+    "functions",
+    "grant",
+    "group",
+    "grouping",
+    "hold_ddltime",
+    "hour",
+    "idxproperties",
+    "ignore",
+    "import",
+    "in",
+    "index",
+    "indexes",
+    "inpath",
+    "inputdriver",
+    "inputformat",
+    "insert",
+    "int",
+    "into",
+    "is",
+    "isolation",
+    "items",
+    "jar",
+    "key_type",
+    "keys",
+    "last",
+    "lateral",
+    "leading",
+    "level",
+    "like",
+    "limit",
+    "lines",
+    "load",
+    "local",
+    "location",
+    "lock",
+    "locks",
+    "logical",
+    "long",
+    "mapjoin",
+    "materialized",
+    "metadata",
+    "minus",
+    "minute",
+    "month",
+    "msck",
+    "no_drop",
+    "none",
+    "noscan",
+    "null",
+    "nulls",
+    "of",
+    "offline",
+    "offset",
+    "option",
+    "order",
+    "out",
+    "outer",
+    "outputdriver",
+    "outputformat",
+    "overwrite",
+    "owner",
+    "partition",
+    "partitioned",
+    "partitions",
+    "percent",
+    "pivot",
+    "plus",
+    "position",
+    "pretty",
+    "principals",
+    "procedure",
+    "protection",
+    "purge",
+    "query",
+    "range",
+    "read",
+    "readonly",
+    "reads",
+    "rebuild",
+    "recordreader",
+    "recordwriter",
+    "regexp",
+    "reload",
+    "rename",
+    "repair",
+    "replace",
+    "replication",
+    "restrict",
+    "revoke",
+    "rewrite",
+    "rlike",
+    "role",
+    "roles",
+    "rollup",
+    "row",
+    "rows",
+    "schemas",
+    "second",
+    "serde",
+    "serdeproperties",
+    "server",
+    "set",
+    "sets",
+    "shared",
+    "show",
+    "show_database",
+    "skewed",
+    "smallint",
+    "snapshot",
+    "sort",
+    "sorted",
+    "ssl",
+    "statistics",
+    "stored",
+    "streamtable",
+    "string",
+    "struct",
+    "table",
+    "tables",
+    "tblproperties",
+    "temporary",
+    "terminated",
+    "timestamp",
+    "tinyint",
+    "to",
+    "touch",
+    "trailing",
+    "transaction",
+    "transactions",
+    "trigger",
+    "true",
+    "truncate",
+    "unarchive",
+    "undo",
+    "uniontype",
+    "unlock",
+    "unset",
+    "unsigned",
+    "update",
+    "uri",
+    "use",
+    "user",
+    "utc",
+    "utctimestamp",
+    "values",
+    "view",
+    "while",
+    "with",
+    "work",
+    "write",
+    "year")
 
-  val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
-    "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
-    "where", "having", "from", "to", "table", "with", "not")
+  val hiveStrictNonReservedKeyword = Seq(
+    "anti",
+    "cross",
+    "database",
+    "except",
+    "from",
+    "full",
+    "having",
+    "inner",
+    "intersect",
+    "join",
+    "left",
+    "natural",
+    "not",
+    "on",
+    "right",
+    "select",
+    "semi",
+    "table",
+    "to",
+    "union",
+    "where",
+    "with")
 
   test("table identifier") {
     // Regular names.

From bd2710bd79a140be3c49e6076d1e90bd00728753 Mon Sep 17 00:00:00 2001
From: Sunitha Kambhampati <skambha@us.ibm.com>
Date: Sat, 9 Mar 2019 08:51:19 +0900
Subject: [PATCH 0622/1072] [MINOR][SQL] Fix the typo in the
 spark.sql.extensions conf doc

## What changes were proposed in this pull request?
Fix the  typo (missing the s)  in the class name (SparkSessionExtensions)  in the doc for Spark conf spark.sql.extensions.

## How was this patch tested?
Verified by checking that the configuration doc shows up correctly in spark-shell using the SET -v

Closes #24020 from skambha/fixnametypo.

Authored-by: Sunitha Kambhampati <skambha@us.ibm.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/internal/StaticSQLConf.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index 0a8dc2835ea4..fc07efbafb4e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -100,7 +100,7 @@ object StaticSQLConf {
 
   val SPARK_SESSION_EXTENSIONS = buildStaticConf("spark.sql.extensions")
     .doc("A comma-separated list of classes that implement " +
-      "Function1[SparkSessionExtension, Unit] used to configure Spark Session extensions. The " +
+      "Function1[SparkSessionExtensions, Unit] used to configure Spark Session extensions. The " +
       "classes must have a no-args constructor. If multiple extensions are specified, they are " +
       "applied in the specified order. For the case of rules and planner strategies, they are " +
       "applied in the specified order. For the case of parsers, the last parser is used and each " +

From 14f2286e563c400881c00673fe8ceea99142497b Mon Sep 17 00:00:00 2001
From: sandeep-katta <sandeep.katta2007@gmail.com>
Date: Sat, 9 Mar 2019 09:12:33 +0900
Subject: [PATCH 0623/1072] [SPARK-27101][PYTHON] Drop the created database
 after the test in test_session

## What changes were proposed in this pull request?

Cleaning the testcase, drop the database after use

## How was this patch tested?

existing UT

Closes #24021 from sandeep-katta/cleanPythonTest.

Authored-by: sandeep-katta <sandeep.katta2007@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_session.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py
index c6b9e0b2ca55..518fad429e7e 100644
--- a/python/pyspark/sql/tests/test_session.py
+++ b/python/pyspark/sql/tests/test_session.py
@@ -114,6 +114,7 @@ def test_SparkSession(self):
             self.assertEqual(spark.table("table1").columns, ['name', 'age'])
             self.assertEqual(spark.range(3).count(), 3)
         finally:
+            spark.sql("DROP DATABASE test_db CASCADE")
             spark.stop()
 
     def test_global_default_session(self):

From 326fc746d37b2bea9009ca6ee9b11713179cd3dd Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 8 Mar 2019 18:51:38 -0600
Subject: [PATCH 0624/1072] [SPARK-27056][MESOS] Remove
 start-shuffle-service.sh

## What changes were proposed in this pull request?

 `start-shuffle-service.sh`  was only used by Mesos  before `start-mesos-shuffle-service.sh`.
Obviously, `start-mesos-shuffle-service.sh` solves some problems, it is better than  `start-shuffle-service.sh`.
So now we should delete `start-shuffle-service.sh` in case users use it.

## How was this patch tested?
N/A

Closes #23975 from 10110346/rmshuffleservice_sh.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 sbin/start-mesos-shuffle-service.sh |  4 ++--
 sbin/start-shuffle-service.sh       | 34 -----------------------------
 sbin/stop-shuffle-service.sh        | 26 ----------------------
 3 files changed, 2 insertions(+), 62 deletions(-)
 delete mode 100755 sbin/start-shuffle-service.sh
 delete mode 100755 sbin/stop-shuffle-service.sh

diff --git a/sbin/start-mesos-shuffle-service.sh b/sbin/start-mesos-shuffle-service.sh
index 184584567602..7b7dc9cf5d86 100755
--- a/sbin/start-mesos-shuffle-service.sh
+++ b/sbin/start-mesos-shuffle-service.sh
@@ -17,11 +17,11 @@
 # limitations under the License.
 #
 
-# Starts the Mesos external shuffle server on the machine this script is executed on.
+# Starts the Mesos external shuffle service on the machine this script is executed on.
 # The Mesos external shuffle service detects when an application exits and automatically
 # cleans up its shuffle files.
 #
-# Usage: start-mesos-shuffle-server.sh
+# Usage: start-mesos-shuffle-service.sh
 #
 # Use the SPARK_SHUFFLE_OPTS environment variable to set shuffle service configuration.
 #
diff --git a/sbin/start-shuffle-service.sh b/sbin/start-shuffle-service.sh
deleted file mode 100755
index 793e165be6c7..000000000000
--- a/sbin/start-shuffle-service.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Starts the external shuffle server on the machine this script is executed on.
-#
-# Usage: start-shuffle-server.sh
-#
-# Use the SPARK_SHUFFLE_OPTS environment variable to set shuffle server configuration.
-#
-
-if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
-fi
-
-. "${SPARK_HOME}/sbin/spark-config.sh"
-. "${SPARK_HOME}/bin/load-spark-env.sh"
-
-exec "${SPARK_HOME}/sbin"/spark-daemon.sh start org.apache.spark.deploy.ExternalShuffleService 1
diff --git a/sbin/stop-shuffle-service.sh b/sbin/stop-shuffle-service.sh
deleted file mode 100755
index 50d69cf34e0a..000000000000
--- a/sbin/stop-shuffle-service.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Stops the external shuffle service on the machine this script is executed on.
-
-if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
-fi
-
-"${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.deploy.ExternalShuffleService 1

From 57ae251f75cef4ad5478c42709870a1666ddad46 Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Sat, 9 Mar 2019 01:20:32 +0000
Subject: [PATCH 0625/1072] [SPARK-27097] Avoid embedding platform-dependent
 offsets literally in whole-stage generated code

## What changes were proposed in this pull request?

Spark SQL performs whole-stage code generation to speed up query execution. There are two steps to it:
- Java source code is generated from the physical query plan on the driver. A single version of the source code is generated from a query plan, and sent to all executors.
  - It's compiled to bytecode on the driver to catch compilation errors before sending to executors, but currently only the generated source code gets sent to the executors. The bytecode compilation is for fail-fast only.
- Executors receive the generated source code and compile to bytecode, then the query runs like a hand-written Java program.

In this model, there's an implicit assumption about the driver and executors being run on similar platforms. Some code paths accidentally embedded platform-dependent object layout information into the generated code, such as:
```java
Platform.putLong(buffer, /* offset */ 24, /* value */ 1);
```
This code expects a field to be at offset +24 of the `buffer` object, and sets a value to that field.
But whole-stage code generation generally uses platform-dependent information from the driver. If the object layout is significantly different on the driver and executors, the generated code can be reading/writing to wrong offsets on the executors, causing all kinds of data corruption.

One code pattern that leads to such problem is the use of `Platform.XXX` constants in generated code, e.g. `Platform.BYTE_ARRAY_OFFSET`.

Bad:
```scala
val baseOffset = Platform.BYTE_ARRAY_OFFSET
// codegen template:
s"Platform.putLong($buffer, $baseOffset, $value);"
```
This will embed the value of `Platform.BYTE_ARRAY_OFFSET` on the driver into the generated code.

Good:
```scala
val baseOffset = "Platform.BYTE_ARRAY_OFFSET"
// codegen template:
s"Platform.putLong($buffer, $baseOffset, $value);"
```
This will generate the offset symbolically -- `Platform.putLong(buffer, Platform.BYTE_ARRAY_OFFSET, value)`, which will be able to pick up the correct value on the executors.

Caveat: these offset constants are declared as runtime-initialized `static final` in Java, so they're not compile-time constants from the Java language's perspective. It does lead to a slightly increased size of the generated code, but this is necessary for correctness.

NOTE: there can be other patterns that generate platform-dependent code on the driver which is invalid on the executors. e.g. if the endianness is different between the driver and the executors, and if some generated code makes strong assumption about endianness, it would also be problematic.

## How was this patch tested?

Added a new test suite `WholeStageCodegenSparkSubmitSuite`. This test suite needs to set the driver's extraJavaOptions to force the driver and executor use different Java object layouts, so it's run as an actual SparkSubmit job.

Authored-by: Kris Mok <kris.mokdatabricks.com>

Closes #24031 from gatorsmile/cherrypickSPARK-27097.

Lead-authored-by: Kris Mok <kris.mok@databricks.com>
Co-authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 .../unsafe/sort/UnsafeSorterSpillReader.java  |  3 +-
 .../codegen/GenerateUnsafeRowJoiner.scala     | 20 ++--
 .../expressions/collectionOperations.scala    |  2 +-
 .../WholeStageCodegenSparkSubmitSuite.scala   | 93 +++++++++++++++++++
 4 files changed, 105 insertions(+), 13 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index 99303e60e4a3..a524c4790407 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -49,7 +49,6 @@ public final class UnsafeSorterSpillReader extends UnsafeSorterIterator implemen
 
   private byte[] arr = new byte[1024 * 1024];
   private Object baseObject = arr;
-  private final long baseOffset = Platform.BYTE_ARRAY_OFFSET;
   private final TaskContext taskContext = TaskContext.get();
 
   public UnsafeSorterSpillReader(
@@ -125,7 +124,7 @@ public Object getBaseObject() {
 
   @Override
   public long getBaseOffset() {
-    return baseOffset;
+    return Platform.BYTE_ARRAY_OFFSET;
   }
 
   @Override
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
index febf7b0c96c2..070570d8f20b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
@@ -55,7 +55,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
 
   def create(schema1: StructType, schema2: StructType): UnsafeRowJoiner = {
     val ctx = new CodegenContext
-    val offset = Platform.BYTE_ARRAY_OFFSET
+    val offset = "Platform.BYTE_ARRAY_OFFSET"
     val getLong = "Platform.getLong"
     val putLong = "Platform.putLong"
 
@@ -92,7 +92,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
           s"$getLong(obj2, offset2 + ${(i - bitset1Words) * 8})"
         }
       }
-      s"$putLong(buf, ${offset + i * 8}, $bits);\n"
+      s"$putLong(buf, $offset + ${i * 8}, $bits);\n"
     }
 
     val copyBitsets = ctx.splitExpressions(
@@ -102,12 +102,12 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
                   ("java.lang.Object", "obj2") :: ("long", "offset2") :: Nil)
 
     // --------------------- copy fixed length portion from row 1 ----------------------- //
-    var cursor = offset + outputBitsetWords * 8
+    var cursor = outputBitsetWords * 8
     val copyFixedLengthRow1 = s"""
        |// Copy fixed length data for row1
        |Platform.copyMemory(
        |  obj1, offset1 + ${bitset1Words * 8},
-       |  buf, $cursor,
+       |  buf, $offset + $cursor,
        |  ${schema1.size * 8});
      """.stripMargin
     cursor += schema1.size * 8
@@ -117,7 +117,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
        |// Copy fixed length data for row2
        |Platform.copyMemory(
        |  obj2, offset2 + ${bitset2Words * 8},
-       |  buf, $cursor,
+       |  buf, $offset + $cursor,
        |  ${schema2.size * 8});
      """.stripMargin
     cursor += schema2.size * 8
@@ -129,7 +129,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
        |long numBytesVariableRow1 = row1.getSizeInBytes() - $numBytesBitsetAndFixedRow1;
        |Platform.copyMemory(
        |  obj1, offset1 + ${(bitset1Words + schema1.size) * 8},
-       |  buf, $cursor,
+       |  buf, $offset + $cursor,
        |  numBytesVariableRow1);
      """.stripMargin
 
@@ -140,7 +140,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
        |long numBytesVariableRow2 = row2.getSizeInBytes() - $numBytesBitsetAndFixedRow2;
        |Platform.copyMemory(
        |  obj2, offset2 + ${(bitset2Words + schema2.size) * 8},
-       |  buf, $cursor + numBytesVariableRow1,
+       |  buf, $offset + $cursor + numBytesVariableRow1,
        |  numBytesVariableRow2);
      """.stripMargin
 
@@ -161,7 +161,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
           } else {
             s"(${(outputBitsetWords - bitset2Words + schema1.size) * 8}L + numBytesVariableRow1)"
           }
-        val cursor = offset + outputBitsetWords * 8 + i * 8
+        val cursor = outputBitsetWords * 8 + i * 8
         // UnsafeRow is a little underspecified, so in what follows we'll treat UnsafeRowWriter's
         // output as a de-facto specification for the internal layout of data.
         //
@@ -198,9 +198,9 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
         // Thus it is safe to perform `existingOffset != 0` checks here in the place of
         // more expensive null-bit checks.
         s"""
-           |existingOffset = $getLong(buf, $cursor);
+           |existingOffset = $getLong(buf, $offset + $cursor);
            |if (existingOffset != 0) {
-           |    $putLong(buf, $cursor, existingOffset + ($shift << 32));
+           |    $putLong(buf, $offset + $cursor, existingOffset + ($shift << 32));
            |}
          """.stripMargin
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 018b6b9c9c37..6cc5bc85bcfb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -452,7 +452,7 @@ case class MapEntries(child: Expression) extends UnaryExpression with ExpectsInp
     val z = ctx.freshName("z")
     val calculateHeader = "UnsafeArrayData.calculateHeaderPortionInBytes"
 
-    val baseOffset = Platform.BYTE_ARRAY_OFFSET
+    val baseOffset = "Platform.BYTE_ARRAY_OFFSET"
     val wordSize = UnsafeRow.WORD_SIZE
     val structSizeAsLong = s"${structSize}L"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
new file mode 100644
index 000000000000..f985386eee29
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.scalatest.{Assertions, BeforeAndAfterEach, Matchers}
+import org.scalatest.concurrent.TimeLimits
+
+import org.apache.spark.{SparkFunSuite, TestUtils}
+import org.apache.spark.deploy.SparkSubmitSuite
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{LocalSparkSession, QueryTest, Row, SparkSession}
+import org.apache.spark.sql.functions.{array, col, count, lit}
+import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.unsafe.Platform
+import org.apache.spark.util.ResetSystemProperties
+
+// Due to the need to set driver's extraJavaOptions, this test needs to use actual SparkSubmit.
+class WholeStageCodegenSparkSubmitSuite extends SparkFunSuite
+  with Matchers
+  with BeforeAndAfterEach
+  with ResetSystemProperties {
+
+  test("Generated code on driver should not embed platform-specific constant") {
+    val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+
+    // HotSpot JVM specific: Set up a local cluster with the driver/executor using mismatched
+    // settings of UseCompressedOops JVM option.
+    val argsForSparkSubmit = Seq(
+      "--class", WholeStageCodegenSparkSubmitSuite.getClass.getName.stripSuffix("$"),
+      "--master", "local-cluster[1,1,1024]",
+      "--driver-memory", "1g",
+      "--conf", "spark.ui.enabled=false",
+      "--conf", "spark.master.rest.enabled=false",
+      "--conf", "spark.driver.extraJavaOptions=-XX:-UseCompressedOops",
+      "--conf", "spark.executor.extraJavaOptions=-XX:+UseCompressedOops",
+      unusedJar.toString)
+    SparkSubmitSuite.runSparkSubmit(argsForSparkSubmit, "../..")
+  }
+}
+
+object WholeStageCodegenSparkSubmitSuite extends Assertions with Logging {
+
+  var spark: SparkSession = _
+
+  def main(args: Array[String]): Unit = {
+    TestUtils.configTestLog4j("INFO")
+
+    spark = SparkSession.builder().getOrCreate()
+
+    // Make sure the test is run where the driver and the executors uses different object layouts
+    val driverArrayHeaderSize = Platform.BYTE_ARRAY_OFFSET
+    val executorArrayHeaderSize =
+      spark.sparkContext.range(0, 1).map(_ => Platform.BYTE_ARRAY_OFFSET).collect.head.toInt
+    assert(driverArrayHeaderSize > executorArrayHeaderSize)
+
+    val df = spark.range(71773).select((col("id") % lit(10)).cast(IntegerType) as "v")
+      .groupBy(array(col("v"))).agg(count(col("*")))
+    val plan = df.queryExecution.executedPlan
+    assert(plan.find(_.isInstanceOf[WholeStageCodegenExec]).isDefined)
+
+    val expectedAnswer =
+      Row(Array(0), 7178) ::
+        Row(Array(1), 7178) ::
+        Row(Array(2), 7178) ::
+        Row(Array(3), 7177) ::
+        Row(Array(4), 7177) ::
+        Row(Array(5), 7177) ::
+        Row(Array(6), 7177) ::
+        Row(Array(7), 7177) ::
+        Row(Array(8), 7177) ::
+        Row(Array(9), 7177) :: Nil
+    val result = df.collect
+    QueryTest.sameRows(result.toSeq, expectedAnswer) match {
+      case Some(errMsg) => fail(errMsg)
+      case _ =>
+    }
+  }
+}

From a29df5fa02111f57965be2ab5e208f5c815265fe Mon Sep 17 00:00:00 2001
From: CodeGod <>
Date: Sat, 9 Mar 2019 21:28:10 +0800
Subject: [PATCH 0626/1072] [SPARK-27080][SQL] bug fix:
 mergeWithMetastoreSchema with uniform lower case comparison

## What changes were proposed in this pull request?
When reading parquet file with merging metastore schema and file schema, we should compare field names using uniform case. In current implementation, lowercase is used but one omission. And this patch fix it.

## How was this patch tested?
Unit test

Closes #24001 from codeborui/mergeSchemaBugFix.

Authored-by: CodeGod <>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  2 +-
 .../sql/hive/HiveSchemaInferenceSuite.scala   | 26 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 03f4b8d83e35..d6b2945b2ea7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -326,8 +326,8 @@ private[hive] object HiveMetastoreCatalog {
     // Merge missing nullable fields to inferred schema and build a case-insensitive field map.
     val inferredFields = StructType(inferredSchema ++ missingNullables)
       .map(f => f.name.toLowerCase -> f).toMap
+    StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name.toLowerCase).name)))
     // scalastyle:on caselocale
-    StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name).name)))
   } catch {
     case NonFatal(_) =>
       val msg = s"""Detected conflicting schemas when merging the schema obtained from the Hive
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
index aa4fc13333c4..590ef949ffbd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
@@ -264,6 +264,32 @@ class HiveSchemaInferenceSuite
         StructType(Seq(StructField("lowerCase", BinaryType))))
     }
 
+    // Parquet schema is subset of metaStore schema and has uppercase field name
+    assertResult(
+      StructType(Seq(
+        StructField("UPPERCase", DoubleType, nullable = true),
+        StructField("lowerCase", BinaryType, nullable = true)))) {
+
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("UPPERCase", DoubleType, nullable = true),
+          StructField("lowerCase", BinaryType, nullable = true))),
+
+        StructType(Seq(
+          StructField("lowerCase", BinaryType, nullable = true))))
+    }
+
+    // Metastore schema contains additional nullable fields.
+    assert(intercept[Throwable] {
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("UPPERCase", DoubleType, nullable = false),
+          StructField("lowerCase", BinaryType, nullable = true))),
+
+        StructType(Seq(
+          StructField("lowerCase", BinaryType, nullable = true))))
+    }.getMessage.contains("Detected conflicting schemas"))
+
     // Check that merging missing nullable fields works as expected.
     assertResult(
       StructType(Seq(

From 25bcf59b3b566b77bfc8a40a4f4253b81f340aa4 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Sat, 9 Mar 2019 09:44:20 -0600
Subject: [PATCH 0627/1072] [SPARK-25838][ML] Remove formatVersion from
 Saveable

## What changes were proposed in this pull request?

`Saveable` interface introduces `formatVersion` which is protected and it is used nowhere. So the PR proposes to remove it.

## How was this patch tested?

existing tests

Closes #22830 from mgaido91/SPARK-25838.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../classification/LogisticRegression.scala   |  2 --
 .../mllib/classification/NaiveBayes.scala     |  2 --
 .../spark/mllib/classification/SVM.scala      |  2 --
 .../clustering/BisectingKMeansModel.scala     |  2 --
 .../clustering/GaussianMixtureModel.scala     |  2 --
 .../spark/mllib/clustering/KMeansModel.scala  |  2 --
 .../spark/mllib/clustering/LDAModel.scala     |  4 ----
 .../clustering/PowerIterationClustering.scala |  2 --
 .../spark/mllib/feature/ChiSqSelector.scala   |  2 --
 .../apache/spark/mllib/feature/Word2Vec.scala |  2 --
 .../org/apache/spark/mllib/fpm/FPGrowth.scala |  2 --
 .../apache/spark/mllib/fpm/PrefixSpan.scala   |  2 --
 .../MatrixFactorizationModel.scala            |  2 --
 .../mllib/regression/IsotonicRegression.scala |  2 --
 .../apache/spark/mllib/regression/Lasso.scala |  2 --
 .../mllib/regression/LinearRegression.scala   |  2 --
 .../mllib/regression/RidgeRegression.scala    |  2 --
 .../mllib/tree/model/DecisionTreeModel.scala  |  4 ----
 .../mllib/tree/model/treeEnsembleModels.scala |  8 -------
 .../spark/mllib/util/modelSaveLoad.scala      |  3 ---
 project/MimaExcludes.scala                    | 21 +++++++++++++++++++
 21 files changed, 21 insertions(+), 51 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 4b650000736e..d86aa01c9195 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -163,8 +163,6 @@ class LogisticRegressionModel @Since("1.3.0") (
       numFeatures, numClasses, weights, intercept, threshold)
   }
 
-  override protected def formatVersion: String = "1.0"
-
   override def toString: String = {
     s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 16ba6cabdc82..79bb4adac88f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -170,8 +170,6 @@ class NaiveBayesModel private[spark] (
     val data = NaiveBayesModel.SaveLoadV2_0.Data(labels, pi, theta, modelType)
     NaiveBayesModel.SaveLoadV2_0.save(sc, path, data)
   }
-
-  override protected def formatVersion: String = "2.0"
 }
 
 @Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 5fb04ed0ee9a..087c2c263983 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -85,8 +85,6 @@ class SVMModel @Since("1.1.0") (
       numFeatures = weights.size, numClasses = 2, weights, intercept, threshold)
   }
 
-  override protected def formatVersion: String = "1.0"
-
   override def toString: String = {
     s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index b54b8917e060..c3979118de40 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -112,8 +112,6 @@ class BisectingKMeansModel private[clustering] (
   override def save(sc: SparkContext, path: String): Unit = {
     BisectingKMeansModel.SaveLoadV3_0.save(sc, this, path)
   }
-
-  override protected def formatVersion: String = "3.0"
 }
 
 @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index 1933d5499c3b..5d2ecf33129c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -48,8 +48,6 @@ class GaussianMixtureModel @Since("1.3.0") (
 
   require(weights.length == gaussians.length, "Length of weight and Gaussian arrays must match")
 
-  override protected def formatVersion = "1.0"
-
   @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GaussianMixtureModel.SaveLoadV1_0.save(sc, path, weights, gaussians)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 32a0bff1ffae..09c38f72c1ad 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -112,8 +112,6 @@ class KMeansModel (@Since("1.0.0") val clusterCenters: Array[Vector],
   override def save(sc: SparkContext, path: String): Unit = {
     KMeansModel.SaveLoadV2_0.save(sc, this, path)
   }
-
-  override protected def formatVersion: String = "2.0"
 }
 
 @Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index fc0469c31ffa..91bc01cc0b55 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -216,8 +216,6 @@ class LocalLDAModel private[spark] (
     }.toArray
   }
 
-  override protected def formatVersion = "1.0"
-
   /**
    * Random seed for cluster initialization.
    */
@@ -835,8 +833,6 @@ class DistributedLDAModel private[clustering] (
   // TODO:
   // override def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = ???
 
-  override protected def formatVersion = "1.0"
-
   @Since("1.5.0")
   override def save(sc: SparkContext, path: String): Unit = {
     // Note: This intentionally does not save checkpointFiles.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index 765f2727b4fe..48172f07216c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -48,8 +48,6 @@ class PowerIterationClusteringModel @Since("1.3.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     PowerIterationClusteringModel.SaveLoadV1_0.save(sc, this, path)
   }
-
-  override protected def formatVersion: String = "1.0"
 }
 
 @Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index aa78e91b679a..fc0a45c6af53 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -113,8 +113,6 @@ class ChiSqSelectorModel @Since("1.3.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     ChiSqSelectorModel.SaveLoadV1_0.save(sc, this, path)
   }
-
-  override protected def formatVersion: String = "1.0"
 }
 
 object ChiSqSelectorModel extends Loader[ChiSqSelectorModel] {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 9cdade100910..94c4fcc76925 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -511,8 +511,6 @@ class Word2VecModel private[spark] (
     this(Word2VecModel.buildWordIndex(model), Word2VecModel.buildWordVectors(model))
   }
 
-  override protected def formatVersion = "1.0"
-
   @Since("1.4.0")
   def save(sc: SparkContext, path: String): Unit = {
     Word2VecModel.SaveLoadV1_0.save(sc, path, getVectors)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index 519c1ea47c1d..3531822e77b7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -84,8 +84,6 @@ class FPGrowthModel[Item: ClassTag] @Since("2.4.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     FPGrowthModel.SaveLoadV1_0.save(this, path)
   }
-
-  override protected val formatVersion: String = "1.0"
 }
 
 @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index 55c2dc7c66d4..69e4b76b2d8c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -628,8 +628,6 @@ class PrefixSpanModel[Item] @Since("1.5.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     PrefixSpanModel.SaveLoadV1_0.save(this, path)
   }
-
-  override protected val formatVersion: String = "1.0"
 }
 
 @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 7b49d4d0812f..e5e82d19f1cb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -196,8 +196,6 @@ class MatrixFactorizationModel @Since("0.8.0") (
       .map(t => Rating(t._1, product, t._2))
   }
 
-  protected override val formatVersion: String = "1.0"
-
   /**
    * Save this model to the given path.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 8347ccad6b71..649f9816e6a5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -160,8 +160,6 @@ class IsotonicRegressionModel @Since("1.3.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
   }
-
-  override protected def formatVersion: String = "1.0"
 }
 
 @Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index cef1b4f51b84..ead9f5b30037 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -51,8 +51,6 @@ class LassoModel @Since("1.1.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
-
-  override protected def formatVersion: String = "1.0"
 }
 
 @Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 60262fdc497a..cb08216fbf69 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -51,8 +51,6 @@ class LinearRegressionModel @Since("1.1.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
-
-  override protected def formatVersion: String = "1.0"
 }
 
 @Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 52977ac4f062..43c3154dd053 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -52,8 +52,6 @@ class RidgeRegressionModel @Since("1.1.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
-
-  override protected def formatVersion: String = "1.0"
 }
 
 @Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index 27618e122aef..9983ca7dc5e8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -126,15 +126,11 @@ class DecisionTreeModel @Since("1.0.0") (
   override def save(sc: SparkContext, path: String): Unit = {
     DecisionTreeModel.SaveLoadV1_0.save(sc, path, this)
   }
-
-  override protected def formatVersion: String = DecisionTreeModel.formatVersion
 }
 
 @Since("1.3.0")
 object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
 
-  private[spark] def formatVersion: String = "1.0"
-
   private[tree] object SaveLoadV1_0 {
 
     def thisFormatVersion: String = "1.0"
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index fc1d4125a564..810f528c7190 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -66,15 +66,11 @@ class RandomForestModel @Since("1.2.0") (
     TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this,
       RandomForestModel.SaveLoadV1_0.thisClassName)
   }
-
-  override protected def formatVersion: String = RandomForestModel.formatVersion
 }
 
 @Since("1.3.0")
 object RandomForestModel extends Loader[RandomForestModel] {
 
-  private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion
-
   /**
    *
    * @param sc  Spark context used for loading model files.
@@ -170,8 +166,6 @@ class GradientBoostedTreesModel @Since("1.2.0") (
     broadcastTrees.destroy()
     evaluation.toArray
   }
-
-  override protected def formatVersion: String = GradientBoostedTreesModel.formatVersion
 }
 
 /**
@@ -235,8 +229,6 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
     newPredError
   }
 
-  private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion
-
   /**
    * @param sc  Spark context used for loading model files.
    * @param path  Path specifying the directory to which the model was saved.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
index da0eb04764c5..e8889bfd7c38 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
@@ -54,9 +54,6 @@ trait Saveable {
   @Since("1.3.0")
   def save(sc: SparkContext, path: String): Unit
 
-  /** Current version of model save/load format. */
-  protected def formatVersion: String
-
 }
 
 /**
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 5d5d01f57a44..fdc5cf16af15 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,27 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    // [SPARK-25838] Remove formatVersion from Saveable
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.DistributedLDAModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.LocalLDAModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeansModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.PowerIterationClusteringModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.GaussianMixtureModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.recommendation.MatrixFactorizationModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.ChiSqSelectorModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.Word2VecModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.SVMModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.LogisticRegressionModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.NaiveBayesModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.Saveable.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.fpm.FPGrowthModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.fpm.PrefixSpanModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.IsotonicRegressionModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.RidgeRegressionModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LassoModel.formatVersion"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionModel.formatVersion"),
+
     // [SPARK-26254][CORE] Extract Hive + Kafka dependencies from Core.
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.deploy.security.HiveDelegationTokenProvider"),
 

From 6e1c0827ece1cdc615196e60cb11c76b917b8eeb Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Sat, 9 Mar 2019 14:26:58 -0800
Subject: [PATCH 0628/1072] [SPARK-27111][SS] Fix a race that a continuous
 query may fail with InterruptedException

## What changes were proposed in this pull request?

Before a Kafka consumer gets assigned with partitions, its offset will contain 0 partitions. However, runContinuous will still run and launch a Spark job having 0 partitions. In this case, there is a race that epoch may interrupt the query execution thread after `lastExecution.toRdd`, and either `epochEndpoint.askSync[Unit](StopContinuousExecutionWrites)` or the next `runContinuous` will get interrupted unintentionally.

To handle this case, this PR has the following changes:

- Clean up the resources in `queryExecutionThread.runUninterruptibly`. This may increase the waiting time of `stop` but should be minor because the operations here are very fast (just sending an RPC message in the same process and stopping a very simple thread).
- Clear the interrupted status at the end so that it won't impact the `runContinuous` call. We may clear the interrupted status set by `stop`, but it doesn't affect the query termination because `runActivatedStream` will check `state` and exit accordingly.

I also updated the clean up codes to make sure exceptions thrown from `epochEndpoint.askSync[Unit](StopContinuousExecutionWrites)` won't stop the clean up.

## How was this patch tested?

Jenkins

Closes #24034 from zsxwing/SPARK-27111.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../continuous/ContinuousExecution.scala      | 30 ++++++++++++++-----
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 26b564201033..aef556d92cc8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -268,13 +268,29 @@ class ContinuousExecution(
         logInfo(s"Query $id ignoring exception from reconfiguring: $t")
         // interrupted by reconfiguration - swallow exception so we can restart the query
     } finally {
-      epochEndpoint.askSync[Unit](StopContinuousExecutionWrites)
-      SparkEnv.get.rpcEnv.stop(epochEndpoint)
-
-      epochUpdateThread.interrupt()
-      epochUpdateThread.join()
-
-      sparkSession.sparkContext.cancelJobGroup(runId.toString)
+      // The above execution may finish before getting interrupted, for example, a Spark job having
+      // 0 partitions will complete immediately. Then the interrupted status will sneak here.
+      //
+      // To handle this case, we do the two things here:
+      //
+      // 1. Clean up the resources in `queryExecutionThread.runUninterruptibly`. This may increase
+      //    the waiting time of `stop` but should be minor because the operations here are very fast
+      //    (just sending an RPC message in the same process and stopping a very simple thread).
+      // 2. Clear the interrupted status at the end so that it won't impact the `runContinuous`
+      //    call. We may clear the interrupted status set by `stop`, but it doesn't affect the query
+      //    termination because `runActivatedStream` will check `state` and exit accordingly.
+      queryExecutionThread.runUninterruptibly {
+        try {
+          epochEndpoint.askSync[Unit](StopContinuousExecutionWrites)
+        } finally {
+          SparkEnv.get.rpcEnv.stop(epochEndpoint)
+          epochUpdateThread.interrupt()
+          epochUpdateThread.join()
+          // The following line must be the last line because it may fail if SparkContext is stopped
+          sparkSession.sparkContext.cancelJobGroup(runId.toString)
+        }
+      }
+      Thread.interrupted()
     }
   }
 

From f732647ae4f486531440dd1239719085c367181e Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 9 Mar 2019 16:34:24 -0800
Subject: [PATCH 0629/1072] [SPARK-27054][BUILD][SQL] Remove the Calcite
 dependency

## What changes were proposed in this pull request?

Calcite is only used for [runSqlHive](https://github.com/apache/spark/blob/02bbe977abaf7006b845a7e99d612b0235aa0025/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala#L699-L705) when `hive.cbo.enable=true`([SemanticAnalyzer](https://github.com/apache/hive/blob/release-1.2.1/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java#L278-L280)).
So we can disable `hive.cbo.enable` and remove Calcite dependency.

## How was this patch tested?

Exist tests

Closes #23970 from wangyum/SPARK-27054.

Lead-authored-by: Yuming Wang <yumwang@ebay.com>
Co-authored-by: Yuming Wang <wgyumg@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 LICENSE-binary                                |  3 -
 NOTICE-binary                                 |  9 ---
 dev/deps/spark-deps-hadoop-2.7                |  4 --
 dev/deps/spark-deps-hadoop-3.1                |  4 --
 pom.xml                                       | 72 ++-----------------
 sql/hive/pom.xml                              |  8 ---
 .../sql/hive/client/HiveClientImpl.scala      |  2 +
 .../spark/sql/hive/client/package.scala       | 41 ++++++++---
 8 files changed, 37 insertions(+), 106 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index 541cc4c544f3..0c157cfe1fac 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -260,9 +260,6 @@ net.sf.supercsv:super-csv
 org.apache.arrow:arrow-format
 org.apache.arrow:arrow-memory
 org.apache.arrow:arrow-vector
-org.apache.calcite:calcite-avatica
-org.apache.calcite:calcite-core
-org.apache.calcite:calcite-linq4j
 org.apache.commons:commons-crypto
 org.apache.commons:commons-lang3
 org.apache.hadoop:hadoop-annotations
diff --git a/NOTICE-binary b/NOTICE-binary
index b707c436983f..df416181a1c1 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -792,15 +792,6 @@ Copyright 2005-2006 The Apache Software Foundation
 Apache Jakarta HttpClient
 Copyright 1999-2007 The Apache Software Foundation
 
-Calcite Avatica
-Copyright 2012-2015 The Apache Software Foundation
-
-Calcite Core
-Copyright 2012-2015 The Apache Software Foundation
-
-Calcite Linq4j
-Copyright 2012-2015 The Apache Software Foundation
-
 Apache HttpClient
 Copyright 1999-2017 The Apache Software Foundation
 
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index d53039fad179..53267ea0e36c 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -24,9 +24,6 @@ avro-mapred-1.8.2-hadoop2.jar
 bonecp-0.8.0.RELEASE.jar
 breeze-macros_2.12-0.13.2.jar
 breeze_2.12-0.13.2.jar
-calcite-avatica-1.2.0-incubating.jar
-calcite-core-1.2.0-incubating.jar
-calcite-linq4j-1.2.0-incubating.jar
 chill-java-0.9.3.jar
 chill_2.12-0.9.3.jar
 commons-beanutils-1.7.0.jar
@@ -57,7 +54,6 @@ datanucleus-api-jdo-3.2.6.jar
 datanucleus-core-3.2.10.jar
 datanucleus-rdbms-3.2.9.jar
 derby-10.12.1.1.jar
-eigenbase-properties-1.1.5.jar
 flatbuffers-java-1.9.0.jar
 generex-1.0.1.jar
 gson-2.2.4.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index d1a6b274df8c..367bd45dd367 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -22,9 +22,6 @@ avro-mapred-1.8.2-hadoop2.jar
 bonecp-0.8.0.RELEASE.jar
 breeze-macros_2.12-0.13.2.jar
 breeze_2.12-0.13.2.jar
-calcite-avatica-1.2.0-incubating.jar
-calcite-core-1.2.0-incubating.jar
-calcite-linq4j-1.2.0-incubating.jar
 chill-java-0.9.3.jar
 chill_2.12-0.9.3.jar
 commons-beanutils-1.9.3.jar
@@ -56,7 +53,6 @@ datanucleus-rdbms-3.2.9.jar
 derby-10.12.1.1.jar
 dnsjava-2.1.7.jar
 ehcache-3.3.1.jar
-eigenbase-properties-1.1.5.jar
 flatbuffers-java-1.9.0.jar
 generex-1.0.1.jar
 geronimo-jcache_1.0_spec-1.0-alpha-1.jar
diff --git a/pom.xml b/pom.xml
index 0e1913ac816c..160830946f5c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -168,7 +168,6 @@
     <fasterxml.jackson.version>2.9.8</fasterxml.jackson.version>
     <snappy.version>1.1.7.1</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
-    <calcite.version>1.2.0-incubating</calcite.version>
     <commons-codec.version>1.10</commons-codec.version>
     <commons-io.version>2.4</commons-io.version>
     <!-- org.apache.commons/commons-lang/-->
@@ -1467,11 +1466,15 @@
             <groupId>org.apache.avro</groupId>
             <artifactId>avro-mapred</artifactId>
           </exclusion>
-          <!--  this is needed and must be explicitly included later-->
+          <!--  Do not need Calcite because we disabled hive.cbo.enable -->
           <exclusion>
             <groupId>org.apache.calcite</groupId>
             <artifactId>calcite-core</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>org.apache.calcite</groupId>
+            <artifactId>calcite-avatica</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>org.apache.curator</groupId>
             <artifactId>apache-curator</artifactId>
@@ -1841,71 +1844,6 @@
         <version>${hive.parquet.version}</version>
         <scope>compile</scope>
       </dependency>
-      <dependency>
-        <groupId>org.apache.calcite</groupId>
-        <artifactId>calcite-core</artifactId>
-        <version>${calcite.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.google.code.findbugs</groupId>
-            <artifactId>jsr305</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.janino</groupId>
-            <artifactId>janino</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.janino</groupId>
-            <artifactId>commons-compiler</artifactId>
-          </exclusion>
-          <!-- hsqldb interferes with the use of derby as the default db
-            in hive's use of datanucleus.
-          -->
-          <exclusion>
-            <groupId>org.hsqldb</groupId>
-            <artifactId>hsqldb</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.pentaho</groupId>
-            <artifactId>pentaho-aggdesigner-algorithm</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.calcite</groupId>
-        <artifactId>calcite-avatica</artifactId>
-        <version>${calcite.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
       <dependency>
         <groupId>org.codehaus.janino</groupId>
         <artifactId>janino</artifactId>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index dc8b73314c15..55afbe7c5d65 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -129,14 +129,6 @@
       <groupId>commons-httpclient</groupId>
       <artifactId>commons-httpclient</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.calcite</groupId>
-      <artifactId>calcite-avatica</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.calcite</groupId>
-      <artifactId>calcite-core</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.httpcomponents</groupId>
       <artifactId>httpclient</artifactId>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 74237a94c879..8132dee286b3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -178,6 +178,8 @@ private[hive] class HiveClientImpl(
          """.stripMargin)
       hiveConf.set(k, v)
     }
+    // Disable CBO because we removed the Calcite dependency.
+    hiveConf.setBoolean("hive.cbo.enable", false)
     val state = new SessionState(hiveConf)
     if (clientLoader.cachedHive != null) {
       Hive.set(clientLoader.cachedHive.asInstanceOf[Hive])
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index b6a49497e480..70d042e0410e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -29,19 +29,20 @@ package object client {
     case object v12 extends HiveVersion("0.12.0")
     case object v13 extends HiveVersion("0.13.1")
 
-    // Hive 0.14 depends on calcite 0.9.2-incubating-SNAPSHOT which does not exist in
-    // maven central anymore, so override those with a version that exists.
+    // Do not need Calcite because we disabled hive.cbo.enable.
     //
-    // The other excluded dependencies are also nowhere to be found, so exclude them explicitly. If
+    // The other excluded dependencies are nowhere to be found, so exclude them explicitly. If
     // they're needed by the metastore client, users will have to dig them out of somewhere and use
     // configuration to point Spark at the correct jars.
     case object v14 extends HiveVersion("0.14.0",
-      extraDeps = Seq("org.apache.calcite:calcite-core:1.3.0-incubating",
-        "org.apache.calcite:calcite-avatica:1.3.0-incubating"),
-      exclusions = Seq("org.pentaho:pentaho-aggdesigner-algorithm"))
+      exclusions = Seq("org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
+        "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     case object v1_0 extends HiveVersion("1.0.0",
       exclusions = Seq("eigenbase:eigenbase-properties",
+        "org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
         "org.pentaho:pentaho-aggdesigner-algorithm",
         "net.hydromatic:linq4j",
         "net.hydromatic:quidem"))
@@ -51,6 +52,8 @@ package object client {
     // and fails.
     case object v1_1 extends HiveVersion("1.1.0",
       exclusions = Seq("eigenbase:eigenbase-properties",
+        "org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
         "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm",
         "net.hydromatic:linq4j",
@@ -58,32 +61,48 @@ package object client {
 
     case object v1_2 extends HiveVersion("1.2.2",
       exclusions = Seq("eigenbase:eigenbase-properties",
+        "org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
         "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm",
         "net.hydromatic:linq4j",
         "net.hydromatic:quidem"))
 
     case object v2_0 extends HiveVersion("2.0.1",
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     case object v2_1 extends HiveVersion("2.1.1",
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     case object v2_2 extends HiveVersion("2.2.0",
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-druid",
+        "org.apache.calcite.avatica:avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
+    // Since HIVE-14496, Hive materialized view need calcite-core.
+    // For spark, only VersionsSuite currently creates a hive materialized view for testing.
     case object v2_3 extends HiveVersion("2.3.4",
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-druid",
+        "org.apache.calcite.avatica:avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings
+    // Since HIVE-14496, Hive.java uses calcite-core
     case object v3_1 extends HiveVersion("3.1.1",
       extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0",
         "org.apache.derby:derby:10.14.1.0"),
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-druid",
+        "org.apache.calcite.avatica:avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     val allSupportedHiveVersions = Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_1)

From 470313e6602600d9f81d954741a3fa108790d449 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 9 Mar 2019 16:50:10 -0800
Subject: [PATCH 0630/1072] [SPARK-27118][SQL] Upgrade Hive Metastore Client to
 the latest versions for Hive 1.0.x/1.1.x

## What changes were proposed in this pull request?

Hive 1.1.1 and Hive 1.0.1 released. We should upgrade Hive Metastore Client version.

https://issues.apache.org/jira/secure/ReleaseNote.jspa?version=12329444&styleName=Text&projectId=12310843
https://issues.apache.org/jira/secure/ReleaseNote.jspa?version=12329557&styleName=Text&projectId=12310843

## How was this patch tested?

N/A

Closes #24040 from wangyum/SPARK-27118.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/hive/client/IsolatedClientLoader.scala   | 4 ++--
 .../main/scala/org/apache/spark/sql/hive/client/package.scala | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 1f7ab9bb6034..efa97b2aa4e2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -93,8 +93,8 @@ private[hive] object IsolatedClientLoader extends Logging {
     case "12" | "0.12" | "0.12.0" => hive.v12
     case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13
     case "14" | "0.14" | "0.14.0" => hive.v14
-    case "1.0" | "1.0.0" => hive.v1_0
-    case "1.1" | "1.1.0" => hive.v1_1
+    case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0
+    case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1
     case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2
     case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
     case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index 70d042e0410e..040f445be2c8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -39,7 +39,7 @@ package object client {
         "org.apache.calcite:calcite-avatica",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
-    case object v1_0 extends HiveVersion("1.0.0",
+    case object v1_0 extends HiveVersion("1.0.1",
       exclusions = Seq("eigenbase:eigenbase-properties",
         "org.apache.calcite:calcite-core",
         "org.apache.calcite:calcite-avatica",
@@ -50,7 +50,7 @@ package object client {
     // The curator dependency was added to the exclusions here because it seems to confuse the ivy
     // library. org.apache.curator:curator is a pom dependency but ivy tries to find the jar for it,
     // and fails.
-    case object v1_1 extends HiveVersion("1.1.0",
+    case object v1_1 extends HiveVersion("1.1.1",
       exclusions = Seq("eigenbase:eigenbase-properties",
         "org.apache.calcite:calcite-core",
         "org.apache.calcite:calcite-avatica",

From 28d003097b114623b891e0ae5dbe1709a54da891 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Sun, 10 Mar 2019 15:08:23 +0900
Subject: [PATCH 0631/1072] [SPARK-27102][R][PYTHON][CORE] Remove the
 references to Python's Scala codes in R's Scala codes

## What changes were proposed in this pull request?

Currently, R's Scala codes happened to refer Python's Scala codes for code deduplications. It's a bit odd. For instance, when we face an exception from R, it shows python related code path, which makes confusing to debug. It should rather have one code base and R's and Python's should share.

This PR proposes:

1. Make a `SocketAuthServer` and move `PythonServer` so that `PythonRDD` and `RRDD` can share it.
2. Move `readRDDFromFile` and `readRDDFromInputStream` into `JavaRDD`.
3. Reuse `RAuthHelper` and remove `RSocketAuthHelper` in `RRDD`.
4. Rename `getEncryptionEnabled` to `isEncryptionEnabled` while I am here.

So, now, the places below:

- `sql/core/src/main/scala/org/apache/spark/sql/api/r`
- `core/src/main/scala/org/apache/spark/api/r`
- `mllib/src/main/scala/org/apache/spark/ml/r`

don't refer Python's Scala codes.

## How was this patch tested?

Existing tests should cover this.

Closes #24023 from HyukjinKwon/SPARK-27102.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/context.R                             |   2 +-
 R/pkg/tests/fulltests/test_Serde.R            |   2 +-
 .../org/apache/spark/api/java/JavaRDD.scala   |  33 +++++
 .../apache/spark/api/python/PythonRDD.scala   | 135 ++----------------
 .../apache/spark/api/python/PythonUtils.scala |   2 +-
 .../scala/org/apache/spark/api/r/RRDD.scala   |  28 +---
 .../scala/org/apache/spark/api/r/RUtils.scala |   5 +-
 .../spark/security/SocketAuthHelper.scala     |  18 ++-
 .../spark/security/SocketAuthServer.scala     | 108 ++++++++++++++
 .../spark/api/python/PythonRDDSuite.scala     |   4 +-
 python/pyspark/context.py                     |   2 +-
 11 files changed, 188 insertions(+), 151 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala

diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 1c064a63ef42..619153645d92 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -175,7 +175,7 @@ parallelize <- function(sc, coll, numSlices = 1) {
   if (objectSize < sizeLimit) {
     jrdd <- callJStatic("org.apache.spark.api.r.RRDD", "createRDDFromArray", sc, serializedSlices)
   } else {
-    if (callJStatic("org.apache.spark.api.r.RUtils", "getEncryptionEnabled", sc)) {
+    if (callJStatic("org.apache.spark.api.r.RUtils", "isEncryptionEnabled", sc)) {
       connectionTimeout <- as.numeric(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
       # the length of slices here is the parallelism to use in the jvm's sc.parallelize()
       parallelism <- as.integer(numSlices)
diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R
index 1525bdb2f5c8..e01f6ee00521 100644
--- a/R/pkg/tests/fulltests/test_Serde.R
+++ b/R/pkg/tests/fulltests/test_Serde.R
@@ -138,7 +138,7 @@ test_that("createDataFrame large objects", {
                                     enableHiveSupport = FALSE))
 
     sc <- getSparkContext()
-    actual <- callJStatic("org.apache.spark.api.r.RUtils", "getEncryptionEnabled", sc)
+    actual <- callJStatic("org.apache.spark.api.r.RUtils", "isEncryptionEnabled", sc)
     expected <- as.logical(encryptionEnabled)
     expect_equal(actual, expected)
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index 41b5cab601c3..6f0182255e5f 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.api.java
 
+import java.io.{DataInputStream, EOFException, FileInputStream, InputStream}
+
+import scala.collection.mutable
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
@@ -213,4 +216,34 @@ object JavaRDD {
   implicit def fromRDD[T: ClassTag](rdd: RDD[T]): JavaRDD[T] = new JavaRDD[T](rdd)
 
   implicit def toRDD[T](rdd: JavaRDD[T]): RDD[T] = rdd.rdd
+
+  private[api] def readRDDFromFile(
+      sc: JavaSparkContext,
+      filename: String,
+      parallelism: Int): JavaRDD[Array[Byte]] = {
+    readRDDFromInputStream(sc.sc, new FileInputStream(filename), parallelism)
+  }
+
+  private[api] def readRDDFromInputStream(
+      sc: SparkContext,
+      in: InputStream,
+      parallelism: Int): JavaRDD[Array[Byte]] = {
+    val din = new DataInputStream(in)
+    try {
+      val objs = new mutable.ArrayBuffer[Array[Byte]]
+      try {
+        while (true) {
+          val length = din.readInt()
+          val obj = new Array[Byte](length)
+          din.readFully(obj)
+          objs += obj
+        }
+      } catch {
+        case eof: EOFException => // No-op
+      }
+      JavaRDD.fromRDD(sc.parallelize(objs, parallelism))
+    } finally {
+      din.close()
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 0937a63dad19..5b492b1f3991 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -42,7 +42,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rdd.RDD
-import org.apache.spark.security.SocketAuthHelper
+import org.apache.spark.security.{SocketAuthHelper, SocketAuthServer}
 import org.apache.spark.util._
 
 
@@ -171,32 +171,18 @@ private[spark] object PythonRDD extends Logging {
     serveIterator(rdd.toLocalIterator, s"serve toLocalIterator")
   }
 
-  def readRDDFromFile(sc: JavaSparkContext, filename: String, parallelism: Int):
-  JavaRDD[Array[Byte]] = {
-    readRDDFromInputStream(sc.sc, new FileInputStream(filename), parallelism)
+  def readRDDFromFile(
+      sc: JavaSparkContext,
+      filename: String,
+      parallelism: Int): JavaRDD[Array[Byte]] = {
+    JavaRDD.readRDDFromFile(sc, filename, parallelism)
   }
 
   def readRDDFromInputStream(
       sc: SparkContext,
       in: InputStream,
       parallelism: Int): JavaRDD[Array[Byte]] = {
-    val din = new DataInputStream(in)
-    try {
-      val objs = new mutable.ArrayBuffer[Array[Byte]]
-      try {
-        while (true) {
-          val length = din.readInt()
-          val obj = new Array[Byte](length)
-          din.readFully(obj)
-          objs += obj
-        }
-      } catch {
-        case eof: EOFException => // No-op
-      }
-      JavaRDD.fromRDD(sc.parallelize(objs, parallelism))
-    } finally {
-      din.close()
-    }
+    JavaRDD.readRDDFromInputStream(sc, in, parallelism)
   }
 
   def setupBroadcast(path: String): PythonBroadcast = {
@@ -430,21 +416,7 @@ private[spark] object PythonRDD extends Logging {
    */
   private[spark] def serveToStream(
       threadName: String)(writeFunc: OutputStream => Unit): Array[Any] = {
-    serveToStream(threadName, authHelper)(writeFunc)
-  }
-
-  private[spark] def serveToStream(
-      threadName: String, authHelper: SocketAuthHelper)(writeFunc: OutputStream => Unit)
-    : Array[Any] = {
-    val (port, secret) = PythonServer.setupOneConnectionServer(authHelper, threadName) { s =>
-      val out = new BufferedOutputStream(s.getOutputStream())
-      Utils.tryWithSafeFinally {
-        writeFunc(out)
-      } {
-        out.close()
-      }
-    }
-    Array(port, secret)
+    SocketAuthHelper.serveToStream(threadName, authHelper)(writeFunc)
   }
 
   private def getMergedConf(confAsMap: java.util.HashMap[String, String],
@@ -666,8 +638,8 @@ private[spark] class PythonAccumulatorV2(
 private[spark] class PythonBroadcast(@transient var path: String) extends Serializable
     with Logging {
 
-  private var encryptionServer: PythonServer[Unit] = null
-  private var decryptionServer: PythonServer[Unit] = null
+  private var encryptionServer: SocketAuthServer[Unit] = null
+  private var decryptionServer: SocketAuthServer[Unit] = null
 
   /**
    * Read data from disks, then copy it to `out`
@@ -712,7 +684,7 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial
   }
 
   def setupEncryptionServer(): Array[Any] = {
-    encryptionServer = new PythonServer[Unit]("broadcast-encrypt-server") {
+    encryptionServer = new SocketAuthServer[Unit]("broadcast-encrypt-server") {
       override def handleConnection(sock: Socket): Unit = {
         val env = SparkEnv.get
         val in = sock.getInputStream()
@@ -725,7 +697,7 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial
   }
 
   def setupDecryptionServer(): Array[Any] = {
-    decryptionServer = new PythonServer[Unit]("broadcast-decrypt-server-for-driver") {
+    decryptionServer = new SocketAuthServer[Unit]("broadcast-decrypt-server-for-driver") {
       override def handleConnection(sock: Socket): Unit = {
         val out = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream()))
         Utils.tryWithSafeFinally {
@@ -820,91 +792,13 @@ private[spark] object DechunkedInputStream {
   }
 }
 
-/**
- * Creates a server in the jvm to communicate with python for handling one batch of data, with
- * authentication and error handling.
- */
-private[spark] abstract class PythonServer[T](
-    authHelper: SocketAuthHelper,
-    threadName: String) {
-
-  def this(env: SparkEnv, threadName: String) = this(new SocketAuthHelper(env.conf), threadName)
-  def this(threadName: String) = this(SparkEnv.get, threadName)
-
-  val (port, secret) = PythonServer.setupOneConnectionServer(authHelper, threadName) { sock =>
-    promise.complete(Try(handleConnection(sock)))
-  }
-
-  /**
-   * Handle a connection which has already been authenticated.  Any error from this function
-   * will clean up this connection and the entire server, and get propogated to [[getResult]].
-   */
-  def handleConnection(sock: Socket): T
-
-  val promise = Promise[T]()
-
-  /**
-   * Blocks indefinitely for [[handleConnection]] to finish, and returns that result.  If
-   * handleConnection throws an exception, this will throw an exception which includes the original
-   * exception as a cause.
-   */
-  def getResult(): T = {
-    getResult(Duration.Inf)
-  }
-
-  def getResult(wait: Duration): T = {
-    ThreadUtils.awaitResult(promise.future, wait)
-  }
-
-}
-
-private[spark] object PythonServer {
-
-  /**
-   * Create a socket server and run user function on the socket in a background thread.
-   *
-   * The socket server can only accept one connection, or close if no connection
-   * in 15 seconds.
-   *
-   * The thread will terminate after the supplied user function, or if there are any exceptions.
-   *
-   * If you need to get a result of the supplied function, create a subclass of [[PythonServer]]
-   *
-   * @return The port number of a local socket and the secret for authentication.
-   */
-  def setupOneConnectionServer(
-      authHelper: SocketAuthHelper,
-      threadName: String)
-      (func: Socket => Unit): (Int, String) = {
-    val serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
-    // Close the socket if no connection in 15 seconds
-    serverSocket.setSoTimeout(15000)
-
-    new Thread(threadName) {
-      setDaemon(true)
-      override def run(): Unit = {
-        var sock: Socket = null
-        try {
-          sock = serverSocket.accept()
-          authHelper.authClient(sock)
-          func(sock)
-        } finally {
-          JavaUtils.closeQuietly(serverSocket)
-          JavaUtils.closeQuietly(sock)
-        }
-      }
-    }.start()
-    (serverSocket.getLocalPort, authHelper.secret)
-  }
-}
-
 /**
  * Sends decrypted broadcast data to python worker.  See [[PythonRunner]] for entire protocol.
  */
 private[spark] class EncryptedPythonBroadcastServer(
     val env: SparkEnv,
     val idsAndFiles: Seq[(Long, String)])
-    extends PythonServer[Unit]("broadcast-decrypt-server") with Logging {
+    extends SocketAuthServer[Unit]("broadcast-decrypt-server") with Logging {
 
   override def handleConnection(socket: Socket): Unit = {
     val out = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream()))
@@ -942,7 +836,7 @@ private[spark] class EncryptedPythonBroadcastServer(
  * over a socket.  This is used in preference to writing data to a file when encryption is enabled.
  */
 private[spark] abstract class PythonRDDServer
-    extends PythonServer[JavaRDD[Array[Byte]]]("pyspark-parallelize-server") {
+    extends SocketAuthServer[JavaRDD[Array[Byte]]]("pyspark-parallelize-server") {
 
   def handleConnection(sock: Socket): JavaRDD[Array[Byte]] = {
     val in = sock.getInputStream()
@@ -961,4 +855,3 @@ private[spark] class PythonParallelizeServer(sc: SparkContext, parallelism: Int)
     PythonRDD.readRDDFromInputStream(sc, input, parallelism)
   }
 }
-
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index b6b0cac910d6..ab1bf69e27d3 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -76,7 +76,7 @@ private[spark] object PythonUtils {
     jm.asScala.toMap
   }
 
-  def getEncryptionEnabled(sc: JavaSparkContext): Boolean = {
+  def isEncryptionEnabled(sc: JavaSparkContext): Boolean = {
     sc.conf.get(org.apache.spark.internal.config.IO_ENCRYPTION_ENABLED)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
index 04fc6e18c1e5..4a59c3e20994 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.api.r
 
-import java.io.{DataInputStream, File, OutputStream}
+import java.io.{File, OutputStream}
 import java.net.Socket
-import java.nio.charset.StandardCharsets.UTF_8
 import java.util.{Map => JMap}
 
 import scala.collection.JavaConverters._
@@ -27,11 +26,10 @@ import scala.reflect.ClassTag
 
 import org.apache.spark._
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext}
-import org.apache.spark.api.python.{PythonRDD, PythonServer}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.security.SocketAuthHelper
+import org.apache.spark.security.{SocketAuthHelper, SocketAuthServer}
 
 private abstract class BaseRRDD[T: ClassTag, U: ClassTag](
     parent: RDD[T],
@@ -163,12 +161,12 @@ private[spark] object RRDD {
    */
   def createRDDFromFile(jsc: JavaSparkContext, fileName: String, parallelism: Int):
   JavaRDD[Array[Byte]] = {
-    PythonRDD.readRDDFromFile(jsc, fileName, parallelism)
+    JavaRDD.readRDDFromFile(jsc, fileName, parallelism)
   }
 
   private[spark] def serveToStream(
       threadName: String)(writeFunc: OutputStream => Unit): Array[Any] = {
-    PythonRDD.serveToStream(threadName, new RSocketAuthHelper())(writeFunc)
+    SocketAuthHelper.serveToStream(threadName, new RAuthHelper(SparkEnv.get.conf))(writeFunc)
   }
 }
 
@@ -177,23 +175,11 @@ private[spark] object RRDD {
  * over a socket. This is used in preference to writing data to a file when encryption is enabled.
  */
 private[spark] class RParallelizeServer(sc: JavaSparkContext, parallelism: Int)
-    extends PythonServer[JavaRDD[Array[Byte]]](
-      new RSocketAuthHelper(), "sparkr-parallelize-server") {
+    extends SocketAuthServer[JavaRDD[Array[Byte]]](
+      new RAuthHelper(SparkEnv.get.conf), "sparkr-parallelize-server") {
 
   override def handleConnection(sock: Socket): JavaRDD[Array[Byte]] = {
     val in = sock.getInputStream()
-    PythonRDD.readRDDFromInputStream(sc.sc, in, parallelism)
-  }
-}
-
-private[spark] class RSocketAuthHelper extends SocketAuthHelper(SparkEnv.get.conf) {
-  override protected def readUtf8(s: Socket): String = {
-    val din = new DataInputStream(s.getInputStream())
-    val len = din.readInt()
-    val bytes = new Array[Byte](len)
-    din.readFully(bytes)
-    // The R code adds a null terminator to serialized strings, so ignore it here.
-    assert(bytes(bytes.length - 1) == 0) // sanity check.
-    new String(bytes, 0, bytes.length - 1, UTF_8)
+    JavaRDD.readRDDFromInputStream(sc.sc, in, parallelism)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
index 6832223a5daf..5a433022acdf 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
@@ -22,7 +22,6 @@ import java.util.Arrays
 
 import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.api.python.PythonUtils
 import org.apache.spark.internal.config._
 
 private[spark] object RUtils {
@@ -108,5 +107,7 @@ private[spark] object RUtils {
     }
   }
 
-  def getEncryptionEnabled(sc: JavaSparkContext): Boolean = PythonUtils.getEncryptionEnabled(sc)
+  def isEncryptionEnabled(sc: JavaSparkContext): Boolean = {
+    sc.conf.get(org.apache.spark.internal.config.IO_ENCRYPTION_ENABLED)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
index ea38ccb289c3..3a107c076492 100644
--- a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
+++ b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.security
 
-import java.io.{DataInputStream, DataOutputStream, InputStream}
+import java.io.{BufferedOutputStream, DataInputStream, DataOutputStream, OutputStream}
 import java.net.Socket
 import java.nio.charset.StandardCharsets.UTF_8
 
@@ -115,3 +115,19 @@ private[spark] class SocketAuthHelper(conf: SparkConf) {
   }
 
 }
+
+private[spark] object SocketAuthHelper {
+  def serveToStream(
+      threadName: String,
+      authHelper: SocketAuthHelper)(writeFunc: OutputStream => Unit): Array[Any] = {
+    val (port, secret) = SocketAuthServer.setupOneConnectionServer(authHelper, threadName) { s =>
+      val out = new BufferedOutputStream(s.getOutputStream())
+      Utils.tryWithSafeFinally {
+        writeFunc(out)
+      } {
+        out.close()
+      }
+    }
+    Array(port, secret)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
new file mode 100644
index 000000000000..c65c8fd6e3e1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.security
+
+import java.net.{InetAddress, ServerSocket, Socket}
+
+import scala.concurrent.Promise
+import scala.concurrent.duration.Duration
+import scala.language.existentials
+import scala.util.Try
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.network.util.JavaUtils
+import org.apache.spark.util.ThreadUtils
+
+
+/**
+ * Creates a server in the JVM to communicate with external processes (e.g., Python and R) for
+ * handling one batch of data, with authentication and error handling.
+ */
+private[spark] abstract class SocketAuthServer[T](
+    authHelper: SocketAuthHelper,
+    threadName: String) {
+
+  def this(env: SparkEnv, threadName: String) = this(new SocketAuthHelper(env.conf), threadName)
+  def this(threadName: String) = this(SparkEnv.get, threadName)
+
+  private val promise = Promise[T]()
+
+  val (port, secret) = SocketAuthServer.setupOneConnectionServer(authHelper, threadName) { sock =>
+    promise.complete(Try(handleConnection(sock)))
+  }
+
+  /**
+   * Handle a connection which has already been authenticated.  Any error from this function
+   * will clean up this connection and the entire server, and get propagated to [[getResult]].
+   */
+  def handleConnection(sock: Socket): T
+
+  /**
+   * Blocks indefinitely for [[handleConnection]] to finish, and returns that result.  If
+   * handleConnection throws an exception, this will throw an exception which includes the original
+   * exception as a cause.
+   */
+  def getResult(): T = {
+    getResult(Duration.Inf)
+  }
+
+  def getResult(wait: Duration): T = {
+    ThreadUtils.awaitResult(promise.future, wait)
+  }
+
+}
+
+private[spark] object SocketAuthServer {
+
+  /**
+   * Create a socket server and run user function on the socket in a background thread.
+   *
+   * The socket server can only accept one connection, or close if no connection
+   * in 15 seconds.
+   *
+   * The thread will terminate after the supplied user function, or if there are any exceptions.
+   *
+   * If you need to get a result of the supplied function, create a subclass of [[SocketAuthServer]]
+   *
+   * @return The port number of a local socket and the secret for authentication.
+   */
+  def setupOneConnectionServer(
+      authHelper: SocketAuthHelper,
+      threadName: String)
+      (func: Socket => Unit): (Int, String) = {
+    val serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
+    // Close the socket if no connection in 15 seconds
+    serverSocket.setSoTimeout(15000)
+
+    new Thread(threadName) {
+      setDaemon(true)
+      override def run(): Unit = {
+        var sock: Socket = null
+        try {
+          sock = serverSocket.accept()
+          authHelper.authClient(sock)
+          func(sock)
+        } finally {
+          JavaUtils.closeQuietly(serverSocket)
+          JavaUtils.closeQuietly(sock)
+        }
+      }
+    }.start()
+    (serverSocket.getLocalPort, authHelper.secret)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala b/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
index 6f9b583898c3..e2ec50fb1f17 100644
--- a/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
@@ -24,7 +24,7 @@ import java.nio.charset.StandardCharsets
 import scala.concurrent.duration.Duration
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.security.SocketAuthHelper
+import org.apache.spark.security.{SocketAuthHelper, SocketAuthServer}
 
 class PythonRDDSuite extends SparkFunSuite {
 
@@ -59,7 +59,7 @@ class PythonRDDSuite extends SparkFunSuite {
   }
 
   class ExceptionPythonServer(authHelper: SocketAuthHelper)
-      extends PythonServer[Unit](authHelper, "error-server") {
+      extends SocketAuthServer[Unit](authHelper, "error-server") {
 
     override def handleConnection(sock: Socket): Unit = {
       throw new Exception("exception within handleConnection")
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 5a4bd574c7a4..63c3043a9407 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -204,7 +204,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
         # If encryption is enabled, we need to setup a server in the jvm to read broadcast
         # data via a socket.
         # scala's mangled names w/ $ in them require special treatment.
-        self._encryption_enabled = self._jvm.PythonUtils.getEncryptionEnabled(self._jsc)
+        self._encryption_enabled = self._jvm.PythonUtils.isEncryptionEnabled(self._jsc)
 
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
         self.pythonVer = "%d.%d" % sys.version_info[:2]

From f0927d8ac4e427a8d4ee058df7075cabf74a5f0c Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sun, 10 Mar 2019 15:17:46 -0700
Subject: [PATCH 0632/1072] [SPARK-27110][SQL] Moves some functions from
 AnalyzeColumnCommand to command/CommandUtils

## What changes were proposed in this pull request?
To reuse some common logics for improving `Analyze` commands (See the description of `SPARK-25196` for details), this pr moved some functions from `AnalyzeColumnCommand` to `command/CommandUtils`.  A follow-up pr will add code to extend `Analyze` commands for cached tables.

## How was this patch tested?
Existing tests.

Closes #22204 from maropu/SPARK-25196.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../command/AnalyzeColumnCommand.scala        | 199 +----------------
 .../sql/execution/command/CommandUtils.scala  | 200 +++++++++++++++++-
 2 files changed, 202 insertions(+), 197 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 04a944242435..5d91f33b8256 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -17,17 +17,11 @@
 
 package org.apache.spark.sql.execution.command
 
-import scala.collection.mutable
-
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTableType}
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.ArrayData
-import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.types._
 
 
@@ -42,7 +36,7 @@ case class AnalyzeColumnCommand(
     allColumns: Boolean) extends RunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    require((columnNames.isDefined ^ allColumns), "Parameter `columnNames` or `allColumns` are " +
+    require(columnNames.isDefined ^ allColumns, "Parameter `columnNames` or `allColumns` are " +
       "mutually exclusive. Only one of them should be specified.")
     val sessionState = sparkSession.sessionState
     val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase)
@@ -57,7 +51,7 @@ case class AnalyzeColumnCommand(
 
     // Compute stats for the computed list of columns.
     val (rowCount, newColStats) =
-      computeColumnStats(sparkSession, relation, columnsToAnalyze)
+      CommandUtils.computeColumnStats(sparkSession, relation, columnsToAnalyze)
 
     // We also update table-level stats in order to keep them consistent with column-level stats.
     val statistics = CatalogStatistics(
@@ -95,82 +89,6 @@ case class AnalyzeColumnCommand(
     columnsToAnalyze
   }
 
-  /**
-   * Compute stats for the given columns.
-   * @return (row count, map from column name to CatalogColumnStats)
-   */
-  private def computeColumnStats(
-      sparkSession: SparkSession,
-      relation: LogicalPlan,
-      columns: Seq[Attribute]): (Long, Map[String, CatalogColumnStat]) = {
-    val conf = sparkSession.sessionState.conf
-
-    // Collect statistics per column.
-    // If no histogram is required, we run a job to compute basic column stats such as
-    // min, max, ndv, etc. Otherwise, besides basic column stats, histogram will also be
-    // generated. Currently we only support equi-height histogram.
-    // To generate an equi-height histogram, we need two jobs:
-    // 1. compute percentiles p(0), p(1/n) ... p((n-1)/n), p(1).
-    // 2. use the percentiles as value intervals of bins, e.g. [p(0), p(1/n)],
-    // [p(1/n), p(2/n)], ..., [p((n-1)/n), p(1)], and then count ndv in each bin.
-    // Basic column stats will be computed together in the second job.
-    val attributePercentiles = computePercentiles(columns, sparkSession, relation)
-
-    // The first element in the result will be the overall row count, the following elements
-    // will be structs containing all column stats.
-    // The layout of each struct follows the layout of the ColumnStats.
-    val expressions = Count(Literal(1)).toAggregateExpression() +:
-      columns.map(statExprs(_, conf, attributePercentiles))
-
-    val namedExpressions = expressions.map(e => Alias(e, e.toString)())
-    val statsRow = new QueryExecution(sparkSession, Aggregate(Nil, namedExpressions, relation))
-      .executedPlan.executeTake(1).head
-
-    val rowCount = statsRow.getLong(0)
-    val columnStats = columns.zipWithIndex.map { case (attr, i) =>
-      // according to `statExprs`, the stats struct always have 7 fields.
-      (attr.name, rowToColumnStat(statsRow.getStruct(i + 1, 7), attr, rowCount,
-        attributePercentiles.get(attr)).toCatalogColumnStat(attr.name, attr.dataType))
-    }.toMap
-    (rowCount, columnStats)
-  }
-
-  /** Computes percentiles for each attribute. */
-  private def computePercentiles(
-      attributesToAnalyze: Seq[Attribute],
-      sparkSession: SparkSession,
-      relation: LogicalPlan): AttributeMap[ArrayData] = {
-    val attrsToGenHistogram = if (conf.histogramEnabled) {
-      attributesToAnalyze.filter(a => supportsHistogram(a.dataType))
-    } else {
-      Nil
-    }
-    val attributePercentiles = mutable.HashMap[Attribute, ArrayData]()
-    if (attrsToGenHistogram.nonEmpty) {
-      val percentiles = (0 to conf.histogramNumBins)
-        .map(i => i.toDouble / conf.histogramNumBins).toArray
-
-      val namedExprs = attrsToGenHistogram.map { attr =>
-        val aggFunc =
-          new ApproximatePercentile(attr, Literal(percentiles), Literal(conf.percentileAccuracy))
-        val expr = aggFunc.toAggregateExpression()
-        Alias(expr, expr.toString)()
-      }
-
-      val percentilesRow = new QueryExecution(sparkSession, Aggregate(Nil, namedExprs, relation))
-        .executedPlan.executeTake(1).head
-      attrsToGenHistogram.zipWithIndex.foreach { case (attr, i) =>
-        val percentiles = percentilesRow.getArray(i)
-        // When there is no non-null value, `percentiles` is null. In such case, there is no
-        // need to generate histogram.
-        if (percentiles != null) {
-          attributePercentiles += attr -> percentiles
-        }
-      }
-    }
-    AttributeMap(attributePercentiles.toSeq)
-  }
-
   /** Returns true iff the we support gathering column statistics on column of the given type. */
   private def supportsType(dataType: DataType): Boolean = dataType match {
     case _: IntegralType => true
@@ -182,109 +100,4 @@ case class AnalyzeColumnCommand(
     case BinaryType | StringType => true
     case _ => false
   }
-
-  /** Returns true iff the we support gathering histogram on column of the given type. */
-  private def supportsHistogram(dataType: DataType): Boolean = dataType match {
-    case _: IntegralType => true
-    case _: DecimalType => true
-    case DoubleType | FloatType => true
-    case DateType => true
-    case TimestampType => true
-    case _ => false
-  }
-
-  /**
-   * Constructs an expression to compute column statistics for a given column.
-   *
-   * The expression should create a single struct column with the following schema:
-   * distinctCount: Long, min: T, max: T, nullCount: Long, avgLen: Long, maxLen: Long,
-   * distinctCountsForIntervals: Array[Long]
-   *
-   * Together with [[rowToColumnStat]], this function is used to create [[ColumnStat]] and
-   * as a result should stay in sync with it.
-   */
-  private def statExprs(
-    col: Attribute,
-    conf: SQLConf,
-    colPercentiles: AttributeMap[ArrayData]): CreateNamedStruct = {
-    def struct(exprs: Expression*): CreateNamedStruct = CreateStruct(exprs.map { expr =>
-      expr.transformUp { case af: AggregateFunction => af.toAggregateExpression() }
-    })
-    val one = Literal(1L, LongType)
-
-    // the approximate ndv (num distinct value) should never be larger than the number of rows
-    val numNonNulls = if (col.nullable) Count(col) else Count(one)
-    val ndv = Least(Seq(HyperLogLogPlusPlus(col, conf.ndvMaxError), numNonNulls))
-    val numNulls = Subtract(Count(one), numNonNulls)
-    val defaultSize = Literal(col.dataType.defaultSize.toLong, LongType)
-    val nullArray = Literal(null, ArrayType(LongType))
-
-    def fixedLenTypeStruct: CreateNamedStruct = {
-      val genHistogram =
-        supportsHistogram(col.dataType) && colPercentiles.contains(col)
-      val intervalNdvsExpr = if (genHistogram) {
-        ApproxCountDistinctForIntervals(col,
-          Literal(colPercentiles(col), ArrayType(col.dataType)), conf.ndvMaxError)
-      } else {
-        nullArray
-      }
-      // For fixed width types, avg size should be the same as max size.
-      struct(ndv, Cast(Min(col), col.dataType), Cast(Max(col), col.dataType), numNulls,
-        defaultSize, defaultSize, intervalNdvsExpr)
-    }
-
-    col.dataType match {
-      case _: IntegralType => fixedLenTypeStruct
-      case _: DecimalType => fixedLenTypeStruct
-      case DoubleType | FloatType => fixedLenTypeStruct
-      case BooleanType => fixedLenTypeStruct
-      case DateType => fixedLenTypeStruct
-      case TimestampType => fixedLenTypeStruct
-      case BinaryType | StringType =>
-        // For string and binary type, we don't compute min, max or histogram
-        val nullLit = Literal(null, col.dataType)
-        struct(
-          ndv, nullLit, nullLit, numNulls,
-          // Set avg/max size to default size if all the values are null or there is no value.
-          Coalesce(Seq(Ceil(Average(Length(col))), defaultSize)),
-          Coalesce(Seq(Cast(Max(Length(col)), LongType), defaultSize)),
-          nullArray)
-      case _ =>
-        throw new AnalysisException("Analyzing column statistics is not supported for column " +
-          s"${col.name} of data type: ${col.dataType}.")
-    }
-  }
-
-  /** Convert a struct for column stats (defined in `statExprs`) into [[ColumnStat]]. */
-  private def rowToColumnStat(
-    row: InternalRow,
-    attr: Attribute,
-    rowCount: Long,
-    percentiles: Option[ArrayData]): ColumnStat = {
-    // The first 6 fields are basic column stats, the 7th is ndvs for histogram bins.
-    val cs = ColumnStat(
-      distinctCount = Option(BigInt(row.getLong(0))),
-      // for string/binary min/max, get should return null
-      min = Option(row.get(1, attr.dataType)),
-      max = Option(row.get(2, attr.dataType)),
-      nullCount = Option(BigInt(row.getLong(3))),
-      avgLen = Option(row.getLong(4)),
-      maxLen = Option(row.getLong(5))
-    )
-    if (row.isNullAt(6) || cs.nullCount.isEmpty) {
-      cs
-    } else {
-      val ndvs = row.getArray(6).toLongArray()
-      assert(percentiles.get.numElements() == ndvs.length + 1)
-      val endpoints = percentiles.get.toArray[Any](attr.dataType).map(_.toString.toDouble)
-      // Construct equi-height histogram
-      val bins = ndvs.zipWithIndex.map { case (ndv, i) =>
-        HistogramBin(endpoints(i), endpoints(i + 1), ndv)
-      }
-      val nonNullRows = rowCount - cs.nullCount.get
-      val histogram = Histogram(nonNullRows.toDouble / ndvs.length, bins)
-      cs.copy(histogram = Some(histogram))
-    }
-  }
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index df71bc9effb3..0ea928b0e89f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -19,16 +19,23 @@ package org.apache.spark.sql.execution.command
 
 import java.net.URI
 
+import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable, CatalogTablePartition}
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTable}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.{DataSourceUtils, InMemoryFileIndex}
-import org.apache.spark.sql.internal.SessionState
+import org.apache.spark.sql.internal.{SessionState, SQLConf}
+import org.apache.spark.sql.types._
 
 
 object CommandUtils extends Logging {
@@ -153,4 +160,189 @@ object CommandUtils extends Logging {
     }
     newStats
   }
+
+  /**
+   * Compute stats for the given columns.
+   * @return (row count, map from column name to CatalogColumnStats)
+   */
+  private[sql] def computeColumnStats(
+      sparkSession: SparkSession,
+      relation: LogicalPlan,
+      columns: Seq[Attribute]): (Long, Map[String, CatalogColumnStat]) = {
+    val conf = sparkSession.sessionState.conf
+
+    // Collect statistics per column.
+    // If no histogram is required, we run a job to compute basic column stats such as
+    // min, max, ndv, etc. Otherwise, besides basic column stats, histogram will also be
+    // generated. Currently we only support equi-height histogram.
+    // To generate an equi-height histogram, we need two jobs:
+    // 1. compute percentiles p(0), p(1/n) ... p((n-1)/n), p(1).
+    // 2. use the percentiles as value intervals of bins, e.g. [p(0), p(1/n)],
+    // [p(1/n), p(2/n)], ..., [p((n-1)/n), p(1)], and then count ndv in each bin.
+    // Basic column stats will be computed together in the second job.
+    val attributePercentiles = computePercentiles(columns, sparkSession, relation)
+
+    // The first element in the result will be the overall row count, the following elements
+    // will be structs containing all column stats.
+    // The layout of each struct follows the layout of the ColumnStats.
+    val expressions = Count(Literal(1)).toAggregateExpression() +:
+      columns.map(statExprs(_, conf, attributePercentiles))
+
+    val namedExpressions = expressions.map(e => Alias(e, e.toString)())
+    val statsRow = new QueryExecution(sparkSession, Aggregate(Nil, namedExpressions, relation))
+      .executedPlan.executeTake(1).head
+
+    val rowCount = statsRow.getLong(0)
+    val columnStats = columns.zipWithIndex.map { case (attr, i) =>
+      // according to `statExprs`, the stats struct always have 7 fields.
+      (attr.name, rowToColumnStat(statsRow.getStruct(i + 1, 7), attr, rowCount,
+        attributePercentiles.get(attr)).toCatalogColumnStat(attr.name, attr.dataType))
+    }.toMap
+    (rowCount, columnStats)
+  }
+
+  /** Computes percentiles for each attribute. */
+  private def computePercentiles(
+      attributesToAnalyze: Seq[Attribute],
+      sparkSession: SparkSession,
+      relation: LogicalPlan): AttributeMap[ArrayData] = {
+    val conf = sparkSession.sessionState.conf
+    val attrsToGenHistogram = if (conf.histogramEnabled) {
+      attributesToAnalyze.filter(a => supportsHistogram(a.dataType))
+    } else {
+      Nil
+    }
+    val attributePercentiles = mutable.HashMap[Attribute, ArrayData]()
+    if (attrsToGenHistogram.nonEmpty) {
+      val percentiles = (0 to conf.histogramNumBins)
+        .map(i => i.toDouble / conf.histogramNumBins).toArray
+
+      val namedExprs = attrsToGenHistogram.map { attr =>
+        val aggFunc =
+          new ApproximatePercentile(attr, Literal(percentiles), Literal(conf.percentileAccuracy))
+        val expr = aggFunc.toAggregateExpression()
+        Alias(expr, expr.toString)()
+      }
+
+      val percentilesRow = new QueryExecution(sparkSession, Aggregate(Nil, namedExprs, relation))
+        .executedPlan.executeTake(1).head
+      attrsToGenHistogram.zipWithIndex.foreach { case (attr, i) =>
+        val percentiles = percentilesRow.getArray(i)
+        // When there is no non-null value, `percentiles` is null. In such case, there is no
+        // need to generate histogram.
+        if (percentiles != null) {
+          attributePercentiles += attr -> percentiles
+        }
+      }
+    }
+    AttributeMap(attributePercentiles.toSeq)
+  }
+
+
+  /** Returns true iff the we support gathering histogram on column of the given type. */
+  private def supportsHistogram(dataType: DataType): Boolean = dataType match {
+    case _: IntegralType => true
+    case _: DecimalType => true
+    case DoubleType | FloatType => true
+    case DateType => true
+    case TimestampType => true
+    case _ => false
+  }
+
+  /**
+   * Constructs an expression to compute column statistics for a given column.
+   *
+   * The expression should create a single struct column with the following schema:
+   * distinctCount: Long, min: T, max: T, nullCount: Long, avgLen: Long, maxLen: Long,
+   * distinctCountsForIntervals: Array[Long]
+   *
+   * Together with [[rowToColumnStat]], this function is used to create [[ColumnStat]] and
+   * as a result should stay in sync with it.
+   */
+  private def statExprs(
+    col: Attribute,
+    conf: SQLConf,
+    colPercentiles: AttributeMap[ArrayData]): CreateNamedStruct = {
+    def struct(exprs: Expression*): CreateNamedStruct = CreateStruct(exprs.map { expr =>
+      expr.transformUp { case af: AggregateFunction => af.toAggregateExpression() }
+    })
+    val one = Literal(1.toLong, LongType)
+
+    // the approximate ndv (num distinct value) should never be larger than the number of rows
+    val numNonNulls = if (col.nullable) Count(col) else Count(one)
+    val ndv = Least(Seq(HyperLogLogPlusPlus(col, conf.ndvMaxError), numNonNulls))
+    val numNulls = Subtract(Count(one), numNonNulls)
+    val defaultSize = Literal(col.dataType.defaultSize.toLong, LongType)
+    val nullArray = Literal(null, ArrayType(LongType))
+
+    def fixedLenTypeStruct: CreateNamedStruct = {
+      val genHistogram =
+        supportsHistogram(col.dataType) && colPercentiles.contains(col)
+      val intervalNdvsExpr = if (genHistogram) {
+        ApproxCountDistinctForIntervals(col,
+          Literal(colPercentiles(col), ArrayType(col.dataType)), conf.ndvMaxError)
+      } else {
+        nullArray
+      }
+      // For fixed width types, avg size should be the same as max size.
+      struct(ndv, Cast(Min(col), col.dataType), Cast(Max(col), col.dataType), numNulls,
+        defaultSize, defaultSize, intervalNdvsExpr)
+    }
+
+    col.dataType match {
+      case _: IntegralType => fixedLenTypeStruct
+      case _: DecimalType => fixedLenTypeStruct
+      case DoubleType | FloatType => fixedLenTypeStruct
+      case BooleanType => fixedLenTypeStruct
+      case DateType => fixedLenTypeStruct
+      case TimestampType => fixedLenTypeStruct
+      case BinaryType | StringType =>
+        // For string and binary type, we don't compute min, max or histogram
+        val nullLit = Literal(null, col.dataType)
+        struct(
+          ndv, nullLit, nullLit, numNulls,
+          // Set avg/max size to default size if all the values are null or there is no value.
+          Coalesce(Seq(Ceil(Average(Length(col))), defaultSize)),
+          Coalesce(Seq(Cast(Max(Length(col)), LongType), defaultSize)),
+          nullArray)
+      case _ =>
+        throw new AnalysisException("Analyzing column statistics is not supported for column " +
+          s"${col.name} of data type: ${col.dataType}.")
+    }
+  }
+
+  /**
+   * Convert a struct for column stats (defined in `statExprs`) into
+   * [[org.apache.spark.sql.catalyst.plans.logical.ColumnStat]].
+   */
+  private def rowToColumnStat(
+    row: InternalRow,
+    attr: Attribute,
+    rowCount: Long,
+    percentiles: Option[ArrayData]): ColumnStat = {
+    // The first 6 fields are basic column stats, the 7th is ndvs for histogram bins.
+    val cs = ColumnStat(
+      distinctCount = Option(BigInt(row.getLong(0))),
+      // for string/binary min/max, get should return null
+      min = Option(row.get(1, attr.dataType)),
+      max = Option(row.get(2, attr.dataType)),
+      nullCount = Option(BigInt(row.getLong(3))),
+      avgLen = Option(row.getLong(4)),
+      maxLen = Option(row.getLong(5))
+    )
+    if (row.isNullAt(6) || cs.nullCount.isEmpty) {
+      cs
+    } else {
+      val ndvs = row.getArray(6).toLongArray()
+      assert(percentiles.get.numElements() == ndvs.length + 1)
+      val endpoints = percentiles.get.toArray[Any](attr.dataType).map(_.toString.toDouble)
+      // Construct equi-height histogram
+      val bins = ndvs.zipWithIndex.map { case (ndv, i) =>
+        HistogramBin(endpoints(i), endpoints(i + 1), ndv)
+      }
+      val nonNullRows = rowCount - cs.nullCount.get
+      val histogram = Histogram(nonNullRows.toDouble / ndvs.length, bins)
+      cs.copy(histogram = Some(histogram))
+    }
+  }
 }

From eed3091a608d540dd4ac3e2d89a28f769341e108 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 10 Mar 2019 15:22:52 -0700
Subject: [PATCH 0633/1072] [SPARK-27120][BUILD][TEST] Upgrade scalatest
 version to 3.0.5

## What changes were proposed in this pull request?

**ScalaTest 3.0.5 Release Notes**

**Bug Fixes**

- Fixed the implicit view not available problem when used with compile macro.
- Fixed a stack depth problem in RefSpecLike and fixture.SpecLike under Scala 2.13.
- Changed Framework and ScalaTestFramework to set spanScaleFactor for Runner object instances for different Runners using different class loaders. This fixed a problem whereby an incorrect Runner.spanScaleFactor could be used when the tests for multiple sbt project's were run concurrently.
- Fixed a bug in endsWith regex matcher.

**Improvements**
- Removed duplicated parsing code for -C in ArgsParser.
- Improved performance in WebBrowser.
- Documentation typo rectification.
- Improve validity of Junit XML reports.
- Improved performance by replacing all .size == 0 and .length == 0 to .isEmpty.

**Enhancements**
- Added 'C' option to -P, which will tell -P to use cached thread pool.
- External Dependencies Update
- Bumped up scala-js version to 0.6.22.
- Changed to depend on mockito-core, not mockito-all.
- Bumped up jmock version to 2.8.3.
- Bumped up junit version to 4.12.
- Removed dependency to scala-parser-combinators.

More details:
http://www.scalatest.org/release_notes/3.0.5

## How was this patch tested?

manual tests on local machine:
```
nohup build/sbt clean -Djline.terminal=jline.UnsupportedTerminal -Phadoop-2.7  -Pkubernetes -Phive-thriftserver -Pyarn -Pspark-ganglia-lgpl -Phive -Pkinesis-asl -Pmesos test > run.scalatest.log &
```

Closes #24042 from wangyum/SPARK-27120.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.1 | 2 +-
 pom.xml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 53267ea0e36c..232596c5cbac 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -175,7 +175,7 @@ scala-compiler-2.12.8.jar
 scala-library-2.12.8.jar
 scala-parser-combinators_2.12-1.1.0.jar
 scala-reflect-2.12.8.jar
-scala-xml_2.12-1.0.5.jar
+scala-xml_2.12-1.0.6.jar
 shapeless_2.12-2.3.2.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 367bd45dd367..bc77e359cf9e 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -193,7 +193,7 @@ scala-compiler-2.12.8.jar
 scala-library-2.12.8.jar
 scala-parser-combinators_2.12-1.1.0.jar
 scala-reflect-2.12.8.jar
-scala-xml_2.12-1.0.5.jar
+scala-xml_2.12-1.0.6.jar
 shapeless_2.12-2.3.2.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
diff --git a/pom.xml b/pom.xml
index 160830946f5c..405e43fd91ea 100644
--- a/pom.xml
+++ b/pom.xml
@@ -783,7 +783,7 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>3.0.3</version>
+        <version>3.0.5</version>
         <scope>test</scope>
       </dependency>
       <dependency>

From 29d902124581b2163e49baa6d379025a6cf1fa1a Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Sun, 10 Mar 2019 19:28:35 -0500
Subject: [PATCH 0634/1072] [SPARK-24621][WEBUI] Show secure URLs on web pages

## What changes were proposed in this pull request?

Web UI URLs are pointing to `http://` targets even if SSL is enabled. In this PR I've changed the code to point to `https://` URLs.

## How was this patch tested?

Existing unit tests + manually by starting standalone master/worker/spark-shell. Please see jira.

Closes #23991 from gaborgsomogyi/SPARK-24621.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/deploy/master/Master.scala  |  2 +-
 .../apache/spark/deploy/worker/ExecutorRunner.scala    |  3 ++-
 .../scala/org/apache/spark/deploy/worker/Worker.scala  |  3 ++-
 core/src/main/scala/org/apache/spark/ui/WebUI.scala    | 10 ++++++++--
 .../org/apache/spark/deploy/DeployTestUtils.scala      |  1 +
 .../spark/deploy/worker/ExecutorRunnerTest.scala       |  4 ++--
 6 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 356e52cd9934..50e282f35090 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -143,7 +143,7 @@ private[deploy] class Master(
     logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
     webUi = new MasterWebUI(this, webUiPort)
     webUi.bind()
-    masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort
+    masterWebUiUrl = s"${webUi.scheme}$masterPublicAddress:${webUi.boundPort}"
     if (reverseProxy) {
       masterWebUiUrl = conf.get(UI_REVERSE_PROXY_URL).orElse(Some(masterWebUiUrl)).get
       webUi.addProxy()
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index ead28f13b51d..9d2301cea9b3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -45,6 +45,7 @@ private[deploy] class ExecutorRunner(
     val memory: Int,
     val worker: RpcEndpointRef,
     val workerId: String,
+    val webUiScheme: String,
     val host: String,
     val webUiPort: Int,
     val publicAddress: String,
@@ -165,7 +166,7 @@ private[deploy] class ExecutorRunner(
         if (conf.get(UI_REVERSE_PROXY)) {
           s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
         } else {
-          s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
+          s"$webUiScheme$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
         }
       builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
       builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout")
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 2fc2be581683..4bd7aaa605a6 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -211,7 +211,7 @@ private[deploy] class Worker(
     webUi = new WorkerWebUI(this, workDir, webUiPort)
     webUi.bind()
 
-    workerWebUiUrl = s"http://$publicAddress:${webUi.boundPort}"
+    workerWebUiUrl = s"${webUi.scheme}$publicAddress:${webUi.boundPort}"
     registerWithMaster()
 
     metricsSystem.registerSource(workerSource)
@@ -528,6 +528,7 @@ private[deploy] class Worker(
             memory_,
             self,
             workerId,
+            webUi.scheme,
             host,
             webUi.boundPort,
             publicAddress,
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index ebf8655ce8c2..81659426792c 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -139,11 +139,17 @@ private[spark] abstract class WebUI(
     }
   }
 
+  /** @return Whether SSL enabled. Only valid after [[bind]]. */
+  def isSecure: Boolean = serverInfo.map(_.securePort.isDefined).getOrElse(false)
+
+  /** @return The scheme of web interface. Only valid after [[bind]]. */
+  def scheme: String = if (isSecure) "https://" else "http://"
+
   /** @return The url of web interface. Only valid after [[bind]]. */
-  def webUrl: String = s"http://$publicHostName:$boundPort"
+  def webUrl: String = s"${scheme}$publicHostName:${boundPort}"
 
   /** @return The actual port to which this server is bound. Only valid after [[bind]]. */
-  def boundPort: Int = serverInfo.map(_.boundPort).getOrElse(-1)
+  def boundPort: Int = serverInfo.map(si => si.securePort.getOrElse(si.boundPort)).getOrElse(-1)
 
   /** Stops the server behind this web interface. Only valid after [[bind]]. */
   def stop(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/DeployTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/DeployTestUtils.scala
index 55a541d60ea3..784981ef99cd 100644
--- a/core/src/test/scala/org/apache/spark/deploy/DeployTestUtils.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/DeployTestUtils.scala
@@ -64,6 +64,7 @@ private[deploy] object DeployTestUtils {
       1234,
       null,
       "workerId",
+      "http://",
       "host",
       123,
       "publicAddress",
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index 0240bf8aed4c..988c65fd20fe 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -29,8 +29,8 @@ class ExecutorRunnerTest extends SparkFunSuite {
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
     val appDesc = new ApplicationDescription("app name", Some(8), 500,
       Command("foo", Seq(appId), Map(), Seq(), Seq(), Seq()), "appUiUrl")
-    val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "worker321", 123,
-      "publicAddr", new File(sparkHome), new File("ooga"), "blah", conf, Seq("localDir"),
+    val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "http://", "worker321",
+      123, "publicAddr", new File(sparkHome), new File("ooga"), "blah", conf, Seq("localDir"),
       ExecutorState.RUNNING)
     val builder = CommandUtils.buildProcessBuilder(
       appDesc.command, new SecurityManager(conf), 512, sparkHome, er.substituteVariables)

From 3729efb4d0420700a396c79a83a1d5db25ac3bcb Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Mon, 11 Mar 2019 10:15:07 +0900
Subject: [PATCH 0635/1072] [SPARK-26856][PYSPARK] Python support for from_avro
 and to_avro APIs

## What changes were proposed in this pull request?

Avro is built-in but external data source module since Spark 2.4 but  `from_avro` and `to_avro` APIs not yet supported in pyspark.

In this PR I've made them available from pyspark.

## How was this patch tested?

Please see the python API examples what I've added.

cd docs/
SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll build
Manual webpage check.

Closes #23797 from gaborgsomogyi/SPARK-26856.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/sparktestsupport/modules.py          |   3 +-
 docs/sql-data-sources-avro.md            |  31 ++++++
 python/docs/pyspark.sql.rst              |   6 +
 python/pyspark/sql/avro/__init__.py      |  18 +++
 python/pyspark/sql/avro/functions.py     | 134 +++++++++++++++++++++++
 python/pyspark/streaming/kinesis.py      |  30 ++---
 python/pyspark/testing/streamingutils.py |  31 +-----
 python/pyspark/testing/utils.py          |  25 +++++
 python/pyspark/util.py                   |  27 +++++
 9 files changed, 253 insertions(+), 52 deletions(-)
 create mode 100644 python/pyspark/sql/avro/__init__.py
 create mode 100644 python/pyspark/sql/avro/functions.py

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index eef7f259391b..d496eecd4d70 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -341,7 +341,7 @@ def __hash__(self):
 
 pyspark_sql = Module(
     name="pyspark-sql",
-    dependencies=[pyspark_core, hive],
+    dependencies=[pyspark_core, hive, avro],
     source_file_regexes=[
         "python/pyspark/sql"
     ],
@@ -360,6 +360,7 @@ def __hash__(self):
         "pyspark.sql.streaming",
         "pyspark.sql.udf",
         "pyspark.sql.window",
+        "pyspark.sql.avro.functions",
         # unittests
         "pyspark.sql.tests.test_appsubmit",
         "pyspark.sql.tests.test_arrow",
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index afb91ae86885..40d53fbe9871 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -137,6 +137,37 @@ StreamingQuery query = output
   .option("topic", "topic2")
   .start();
 
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.sql.avro.functions import from_avro, to_avro
+
+# `from_avro` requires Avro schema in JSON string format.
+jsonFormatSchema = open("examples/src/main/resources/user.avsc", "r").read()
+
+df = spark\
+  .readStream\
+  .format("kafka")\
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")\
+  .option("subscribe", "topic1")\
+  .load()
+
+# 1. Decode the Avro data into a struct;
+# 2. Filter by column `favorite_color`;
+# 3. Encode the column `name` in Avro format.
+output = df\
+  .select(from_avro("value", jsonFormatSchema).alias("user"))\
+  .where('user.favorite_color == "red"')\
+  .select(to_avro("user.name").alias("value"))
+
+query = output\
+  .writeStream\
+  .format("kafka")\
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")\
+  .option("topic", "topic2")\
+  .start()
+
 {% endhighlight %}
 </div>
 </div>
diff --git a/python/docs/pyspark.sql.rst b/python/docs/pyspark.sql.rst
index 5c3b7e274857..5da7b44a952a 100644
--- a/python/docs/pyspark.sql.rst
+++ b/python/docs/pyspark.sql.rst
@@ -23,6 +23,12 @@ pyspark.sql.functions module
     :members:
     :undoc-members:
 
+pyspark.sql.avro.functions module
+---------------------------------
+.. automodule:: pyspark.sql.avro.functions
+    :members:
+    :undoc-members:
+
 pyspark.sql.streaming module
 ----------------------------
 .. automodule:: pyspark.sql.streaming
diff --git a/python/pyspark/sql/avro/__init__.py b/python/pyspark/sql/avro/__init__.py
new file mode 100644
index 000000000000..8f01fbbf6482
--- /dev/null
+++ b/python/pyspark/sql/avro/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+__all__ = ['functions']
diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
new file mode 100644
index 000000000000..81686df86679
--- /dev/null
+++ b/python/pyspark/sql/avro/functions.py
@@ -0,0 +1,134 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A collections of builtin avro functions
+"""
+
+
+from pyspark import since, SparkContext
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql.column import Column, _to_java_column
+from pyspark.util import _print_missing_jar
+
+
+@ignore_unicode_prefix
+@since(3.0)
+def from_avro(data, jsonFormatSchema, options={}):
+    """
+    Converts a binary column of avro format into its corresponding catalyst value. The specified
+    schema must match the read data, otherwise the behavior is undefined: it may fail or return
+    arbitrary result.
+
+    Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the
+    application as per the deployment section of "Apache Avro Data Source Guide".
+
+    :param data: the binary column.
+    :param jsonFormatSchema: the avro schema in JSON string format.
+    :param options: options to control how the Avro record is parsed.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.sql.avro.functions import from_avro, to_avro
+    >>> data = [(1, Row(name='Alice', age=2))]
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> avroDf = df.select(to_avro(df.value).alias("avro"))
+    >>> avroDf.collect()
+    [Row(avro=bytearray(b'\\x00\\x00\\x04\\x00\\nAlice'))]
+    >>> jsonFormatSchema = '''{"type":"record","name":"topLevelRecord","fields":
+    ...     [{"name":"avro","type":[{"type":"record","name":"value","namespace":"topLevelRecord",
+    ...     "fields":[{"name":"age","type":["long","null"]},
+    ...     {"name":"name","type":["string","null"]}]},"null"]}]}'''
+    >>> avroDf.select(from_avro(avroDf.avro, jsonFormatSchema).alias("value")).collect()
+    [Row(value=Row(avro=Row(age=2, name=u'Alice')))]
+    """
+
+    sc = SparkContext._active_spark_context
+    try:
+        jc = sc._jvm.org.apache.spark.sql.avro.functions.from_avro(
+            _to_java_column(data), jsonFormatSchema, options)
+    except TypeError as e:
+        if str(e) == "'JavaPackage' object is not callable":
+            _print_missing_jar("Avro", "avro", "avro", sc.version)
+        raise
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(3.0)
+def to_avro(data):
+    """
+    Converts a column into binary of avro format.
+
+    Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the
+    application as per the deployment section of "Apache Avro Data Source Guide".
+
+    :param data: the data column.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.sql.avro.functions import to_avro
+    >>> data = [(1, Row(name='Alice', age=2))]
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(to_avro(df.value).alias("avro")).collect()
+    [Row(avro=bytearray(b'\\x00\\x00\\x04\\x00\\nAlice'))]
+    """
+
+    sc = SparkContext._active_spark_context
+    try:
+        jc = sc._jvm.org.apache.spark.sql.avro.functions.to_avro(_to_java_column(data))
+    except TypeError as e:
+        if str(e) == "'JavaPackage' object is not callable":
+            _print_missing_jar("Avro", "avro", "avro", sc.version)
+        raise
+    return Column(jc)
+
+
+def _test():
+    import os
+    import sys
+    from pyspark.testing.utils import search_jar
+    avro_jar = search_jar("external/avro", "spark-avro")
+    if avro_jar is None:
+        print(
+            "Skipping all Avro Python tests as the optional Avro project was "
+            "not compiled into a JAR. To run these tests, "
+            "you need to build Spark with 'build/sbt -Pavro package' or "
+            "'build/mvn -Pavro package' before running this test.")
+        sys.exit(0)
+    else:
+        existing_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
+        jars_args = "--jars %s" % avro_jar
+        os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([jars_args, existing_args])
+
+    import doctest
+    from pyspark.sql import Row, SparkSession
+    import pyspark.sql.avro.functions
+    globs = pyspark.sql.avro.functions.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.avro.functions tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.avro.functions, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py
index b3348828fdf6..4ed9f2a40c3a 100644
--- a/python/pyspark/streaming/kinesis.py
+++ b/python/pyspark/streaming/kinesis.py
@@ -18,6 +18,8 @@
 from pyspark.serializers import NoOpSerializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.streaming import DStream
+from pyspark.util import _print_missing_jar
+
 
 __all__ = ['KinesisUtils', 'InitialPositionInStream', 'utf8_decoder']
 
@@ -82,7 +84,11 @@ def createStream(ssc, kinesisAppName, streamName, endpointUrl, regionName,
             helper = ssc._jvm.org.apache.spark.streaming.kinesis.KinesisUtilsPythonHelper()
         except TypeError as e:
             if str(e) == "'JavaPackage' object is not callable":
-                KinesisUtils._printErrorMsg(ssc.sparkContext)
+                _print_missing_jar(
+                    "Streaming's Kinesis",
+                    "streaming-kinesis-asl",
+                    "streaming-kinesis-asl-assembly",
+                    ssc.sparkContext.version)
             raise
         jstream = helper.createStream(ssc._jssc, kinesisAppName, streamName, endpointUrl,
                                       regionName, initialPositionInStream, jduration, jlevel,
@@ -91,28 +97,6 @@ def createStream(ssc, kinesisAppName, streamName, endpointUrl, regionName,
         stream = DStream(jstream, ssc, NoOpSerializer())
         return stream.map(lambda v: decoder(v))
 
-    @staticmethod
-    def _printErrorMsg(sc):
-        print("""
-________________________________________________________________________________________________
-
-  Spark Streaming's Kinesis libraries not found in class path. Try one of the following.
-
-  1. Include the Kinesis library and its dependencies with in the
-     spark-submit command as
-
-     $ bin/spark-submit --packages org.apache.spark:spark-streaming-kinesis-asl:%s ...
-
-  2. Download the JAR of the artifact from Maven Central http://search.maven.org/,
-     Group Id = org.apache.spark, Artifact Id = spark-streaming-kinesis-asl-assembly, Version = %s.
-     Then, include the jar in the spark-submit command as
-
-     $ bin/spark-submit --jars <spark-streaming-kinesis-asl-assembly.jar> ...
-
-________________________________________________________________________________________________
-
-""" % (sc.version, sc.version))
-
 
 class InitialPositionInStream(object):
     LATEST, TRIM_HORIZON = (0, 1)
diff --git a/python/pyspark/testing/streamingutils.py b/python/pyspark/testing/streamingutils.py
index 85a2fa14b936..3bed50721a98 100644
--- a/python/pyspark/testing/streamingutils.py
+++ b/python/pyspark/testing/streamingutils.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import glob
 import os
 import tempfile
 import time
@@ -22,32 +21,7 @@
 
 from pyspark import SparkConf, SparkContext, RDD
 from pyspark.streaming import StreamingContext
-
-
-def search_kinesis_asl_assembly_jar():
-    kinesis_asl_assembly_dir = os.path.join(
-        os.environ["SPARK_HOME"], "external/kinesis-asl-assembly")
-
-    # We should ignore the following jars
-    ignored_jar_suffixes = ("javadoc.jar", "sources.jar", "test-sources.jar", "tests.jar")
-
-    # Search jar in the project dir using the jar name_prefix for both sbt build and maven
-    # build because the artifact jars are in different directories.
-    name_prefix = "spark-streaming-kinesis-asl-assembly"
-    sbt_build = glob.glob(os.path.join(
-        kinesis_asl_assembly_dir, "target/scala-*/%s-*.jar" % name_prefix))
-    maven_build = glob.glob(os.path.join(
-        kinesis_asl_assembly_dir, "target/%s_*.jar" % name_prefix))
-    jar_paths = sbt_build + maven_build
-    jars = [jar for jar in jar_paths if not jar.endswith(ignored_jar_suffixes)]
-
-    if not jars:
-        return None
-    elif len(jars) > 1:
-        raise Exception(("Found multiple Spark Streaming Kinesis ASL assembly JARs: %s; please "
-                         "remove all but one") % (", ".join(jars)))
-    else:
-        return jars[0]
+from pyspark.testing.utils import search_jar
 
 
 # Must be same as the variable and condition defined in KinesisTestUtils.scala and modules.py
@@ -59,7 +33,8 @@ def search_kinesis_asl_assembly_jar():
         "Skipping all Kinesis Python tests as environmental variable 'ENABLE_KINESIS_TESTS' "
         "was not set.")
 else:
-    kinesis_asl_assembly_jar = search_kinesis_asl_assembly_jar()
+    kinesis_asl_assembly_jar = search_jar("external/kinesis-asl-assembly",
+                                          "spark-streaming-kinesis-asl-assembly")
     if kinesis_asl_assembly_jar is None:
         kinesis_requirement_message = (
             "Skipping all Kinesis Python tests as the optional Kinesis project was "
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index 7df0acae026f..c6a528194f00 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import glob
 import os
 import struct
 import sys
@@ -100,3 +101,27 @@ def write(self, b):
 
     def close(self):
         pass
+
+
+def search_jar(project_relative_path, jar_name_prefix):
+    project_full_path = os.path.join(
+        os.environ["SPARK_HOME"], project_relative_path)
+
+    # We should ignore the following jars
+    ignored_jar_suffixes = ("javadoc.jar", "sources.jar", "test-sources.jar", "tests.jar")
+
+    # Search jar in the project dir using the jar name_prefix for both sbt build and maven
+    # build because the artifact jars are in different directories.
+    sbt_build = glob.glob(os.path.join(
+        project_full_path, "target/scala-*/%s*.jar" % jar_name_prefix))
+    maven_build = glob.glob(os.path.join(
+        project_full_path, "target/%s*.jar" % jar_name_prefix))
+    jar_paths = sbt_build + maven_build
+    jars = [jar for jar in jar_paths if not jar.endswith(ignored_jar_suffixes)]
+
+    if not jars:
+        return None
+    elif len(jars) > 1:
+        raise Exception("Found multiple JARs: %s; please remove all but one" % (", ".join(jars)))
+    else:
+        return jars[0]
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index f906f4959543..d0ecd43ead5a 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -106,6 +106,33 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
+def _print_missing_jar(lib_name, pkg_name, jar_name, spark_version):
+    print("""
+________________________________________________________________________________________________
+
+  Spark %(lib_name)s libraries not found in class path. Try one of the following.
+
+  1. Include the %(lib_name)s library and its dependencies with in the
+     spark-submit command as
+
+     $ bin/spark-submit --packages org.apache.spark:spark-%(pkg_name)s:%(spark_version)s ...
+
+  2. Download the JAR of the artifact from Maven Central http://search.maven.org/,
+     Group Id = org.apache.spark, Artifact Id = spark-%(jar_name)s, Version = %(spark_version)s.
+     Then, include the jar in the spark-submit command as
+
+     $ bin/spark-submit --jars <spark-%(jar_name)s.jar> ...
+
+________________________________________________________________________________________________
+
+""" % {
+        "lib_name": lib_name,
+        "pkg_name": pkg_name,
+        "jar_name": jar_name,
+        "spark_version": spark_version
+    })
+
+
 if __name__ == "__main__":
     import doctest
     (failure_count, test_count) = doctest.testmod()

From 93ea353cae0ccc187028f79024b239893ebaeb96 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 11 Mar 2019 10:23:24 +0900
Subject: [PATCH 0636/1072] [SPARK-26920][R] Deduplicate type checking across
 Arrow optimization in SparkR

## What changes were proposed in this pull request?

This PR proposes two things.

1.. Deduplicates the type checking logic. While I am here, I checked each type. Currently, binary type, float type, nested struct type and array type are not supported.

**For map and nested struct types:**

 it's expected to be unsupported per Spark's arrow optimization.

```
Exception in thread "serve-Arrow" java.lang.UnsupportedOperationException: Unsupported data type: map<string,double>
...
```
```
Exception in thread "serve-Arrow" java.lang.UnsupportedOperationException: Unsupported data type:  struct<type:tinyint,size:int,indices:array<int>,values:array<double>>
...
```

Please track the trace below to double check.

```
	at org.apache.spark.sql.execution.arrow.ArrowUtils$.toArrowType(ArrowUtils.scala:56)
	at org.apache.spark.sql.execution.arrow.ArrowUtils$.toArrowField(ArrowUtils.scala:92)
	at org.apache.spark.sql.execution.arrow.ArrowUtils$.$anonfun$toArrowSchema$1(ArrowUtils.scala:116)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237)
	at scala.collection.Iterator.foreach(Iterator.scala:941)
	at scala.collection.Iterator.foreach$(Iterator.scala:941)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
	at org.apache.spark.sql.types.StructType.foreach(StructType.scala:99)
	at scala.collection.TraversableLike.map(TraversableLike.scala:237)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:230)
	at org.apache.spark.sql.types.StructType.map(StructType.scala:99)
	at org.apache.spark.sql.execution.arrow.ArrowUtils$.toArrowSchema(ArrowUtils.scala:115)
	at org.apache.spark.sql.execution.arrow.ArrowBatchStreamWriter.<init>(ArrowConverters.scala:50)
	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToR$2(Dataset.scala:3215)
	at org.apache.spark.sql.Dataset.$anonfun$collectAsArrowToR$2$adapted(Dataset.scala:3212)
```

**For float and binary types:**

They cause corrupt values in some cases. It needs to be investigated separately.

**For array type:**

```
Error in Table__to_dataframe(x, use_threads = use_threads) :
  cannot handle Array of type list
```

Seems to be Arrow's R library limitation. It needs to be investigated separately as well.

2.. While I am touching the type specification codes across Arrow optimization, I move the Arrow optimization related tests into a separate filed called `test_arrow.R`.

## How was this patch tested?

Tests were added and also manually tested.

Closes #23969 from HyukjinKwon/SPARK-26920.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/DataFrame.R                         |  32 +-
 R/pkg/R/SQLContext.R                        |  82 +++--
 R/pkg/R/group.R                             |  17 +-
 R/pkg/R/types.R                             |  35 +++
 R/pkg/tests/fulltests/test_sparkSQL.R       | 287 ------------------
 R/pkg/tests/fulltests/test_sparkSQL_arrow.R | 315 ++++++++++++++++++++
 6 files changed, 392 insertions(+), 376 deletions(-)
 create mode 100644 R/pkg/tests/fulltests/test_sparkSQL_arrow.R

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 5908a13f15c7..9ad64a71a126 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1182,24 +1182,7 @@ setMethod("collect",
             arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
             if (arrowEnabled) {
               useArrow <- tryCatch({
-                requireNamespace1 <- requireNamespace
-                if (!requireNamespace1("arrow", quietly = TRUE)) {
-                  stop("'arrow' package should be installed.")
-                }
-                # Currenty Arrow optimization does not support raw for now.
-                # Also, it does not support explicit float type set by users.
-                if (inherits(schema(x), "structType")) {
-                  if (any(sapply(schema(x)$fields(),
-                                 function(x) x$dataType.toString() == "FloatType"))) {
-                    stop(paste0("Arrow optimization in the conversion from Spark DataFrame to R ",
-                                "DataFrame does not support FloatType yet."))
-                  }
-                  if (any(sapply(schema(x)$fields(),
-                                 function(x) x$dataType.toString() == "BinaryType"))) {
-                    stop(paste0("Arrow optimization in the conversion from Spark DataFrame to R ",
-                                "DataFrame does not support BinaryType yet."))
-                  }
-                }
+                checkSchemaInArrow(schema(x))
                 TRUE
               }, error = function(e) {
                 warning(paste0("The conversion from Spark DataFrame to R DataFrame was attempted ",
@@ -1495,19 +1478,8 @@ dapplyInternal <- function(x, func, schema) {
 
   arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
   if (arrowEnabled) {
-    requireNamespace1 <- requireNamespace
-    if (!requireNamespace1("arrow", quietly = TRUE)) {
-      stop("'arrow' package should be installed.")
-    }
-    # Currenty Arrow optimization does not support raw for now.
-    # Also, it does not support explicit float type set by users.
     if (inherits(schema, "structType")) {
-      if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
-        stop("Arrow optimization with dapply do not support FloatType yet.")
-      }
-      if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "BinaryType"))) {
-        stop("Arrow optimization with dapply do not support BinaryType yet.")
-      }
+      checkSchemaInArrow(schema)
     } else if (is.null(schema)) {
       stop(paste0("Arrow optimization does not support 'dapplyCollect' yet. Please disable ",
                   "Arrow optimization or use 'collect' and 'dapply' APIs instead."))
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 568691271eee..592866188e79 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -197,17 +197,40 @@ writeToFileInArrow <- function(fileName, rdf, numPartitions) {
   }
 }
 
-checkTypeRequirementForArrow <- function(dataHead, schema) {
-  # Currenty Arrow optimization does not support raw for now.
-  # Also, it does not support explicit float type set by users. It leads to
-  # incorrect conversion. We will fall back to the path without Arrow optimization.
-  if (any(sapply(dataHead, is.raw))) {
-    stop("Arrow optimization with R DataFrame does not support raw type yet.")
-  }
-  if (inherits(schema, "structType")) {
-    if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
-      stop("Arrow optimization with R DataFrame does not support FloatType type yet.")
+getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
+  if (is.null(schema) || (!inherits(schema, "structType") && is.null(names(schema)))) {
+    if (is.null(firstRow)) {
+      stopifnot(!is.null(rdd))
+      firstRow <- firstRDD(rdd)
     }
+    names <- if (is.null(schema)) {
+      names(firstRow)
+    } else {
+      as.list(schema)
+    }
+    if (is.null(names)) {
+      names <- lapply(1:length(firstRow), function(x) {
+        paste0("_", as.character(x))
+      })
+    }
+
+    # SPAKR-SQL does not support '.' in column name, so replace it with '_'
+    # TODO(davies): remove this once SPARK-2775 is fixed
+    names <- lapply(names, function(n) {
+      nn <- gsub("[.]", "_", n)
+      if (nn != n) {
+        warning(paste("Use", nn, "instead of", n, "as column name"))
+      }
+      nn
+    })
+
+    types <- lapply(firstRow, infer_type)
+    fields <- lapply(1:length(firstRow), function(i) {
+      structField(names[[i]], types[[i]], TRUE)
+    })
+    schema <- do.call(structType, fields)
+  } else {
+    schema
   }
 }
 
@@ -260,8 +283,9 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
     if (arrowEnabled) {
       useArrow <- tryCatch({
         stopifnot(length(data) > 0)
-        dataHead <- head(data, 1)
-        checkTypeRequirementForArrow(data, schema)
+        firstRow <- do.call(mapply, append(args, head(data, 1)))[[1]]
+        schema <- getSchema(schema, firstRow = firstRow)
+        checkSchemaInArrow(schema)
         fileName <- tempfile(pattern = "sparwriteToFileInArrowk-arrow", fileext = ".tmp")
         tryCatch({
           writeToFileInArrow(fileName, data, numPartitions)
@@ -274,8 +298,6 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
           # File might not be created.
           suppressWarnings(file.remove(fileName))
         })
-
-        firstRow <- do.call(mapply, append(args, dataHead))[[1]]
         TRUE
       },
       error = function(e) {
@@ -318,37 +340,7 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0,
     stop(paste("unexpected type:", class(data)))
   }
 
-  if (is.null(schema) || (!inherits(schema, "structType") && is.null(names(schema)))) {
-    if (is.null(firstRow)) {
-      firstRow <- firstRDD(rdd)
-    }
-    names <- if (is.null(schema)) {
-      names(firstRow)
-    } else {
-      as.list(schema)
-    }
-    if (is.null(names)) {
-      names <- lapply(1:length(firstRow), function(x) {
-        paste("_", as.character(x), sep = "")
-      })
-    }
-
-    # SPAKR-SQL does not support '.' in column name, so replace it with '_'
-    # TODO(davies): remove this once SPARK-2775 is fixed
-    names <- lapply(names, function(n) {
-      nn <- gsub("[.]", "_", n)
-      if (nn != n) {
-        warning(paste("Use", nn, "instead of", n, " as column name"))
-      }
-      nn
-    })
-
-    types <- lapply(firstRow, infer_type)
-    fields <- lapply(1:length(firstRow), function(i) {
-      structField(names[[i]], types[[i]], TRUE)
-    })
-    schema <- do.call(structType, fields)
-  }
+  schema <- getSchema(schema, firstRow, rdd)
 
   stopifnot(class(schema) == "structType")
 
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 32592f92b325..7b3913cdd1cd 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -231,22 +231,11 @@ gapplyInternal <- function(x, func, schema) {
   }
   arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
   if (arrowEnabled) {
-    requireNamespace1 <- requireNamespace
-    if (!requireNamespace1("arrow", quietly = TRUE)) {
-      stop("'arrow' package should be installed.")
-    }
-    # Currenty Arrow optimization does not support raw for now.
-    # Also, it does not support explicit float type set by users.
     if (inherits(schema, "structType")) {
-      if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
-        stop("Arrow optimization with gapply do not support FloatType yet.")
-      }
-      if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "BinaryType"))) {
-        stop("Arrow optimization with gapply do not support BinaryType yet.")
-      }
+      checkSchemaInArrow(schema)
     } else if (is.null(schema)) {
-      stop(paste0("Arrow optimization does not support gapplyCollect yet. Please use ",
-                  "'collect' and 'gapply' APIs instead."))
+      stop(paste0("Arrow optimization does not support 'gapplyCollect' yet. Please disable ",
+                  "Arrow optimization or use 'collect' and 'gapply' APIs instead."))
     } else {
       stop("'schema' should be DDL-formatted string or structType.")
     }
diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R
index ade0f05c0254..55f75508e88f 100644
--- a/R/pkg/R/types.R
+++ b/R/pkg/R/types.R
@@ -83,3 +83,38 @@ specialtypeshandle <- function(type) {
   }
   returntype
 }
+
+# Helper function that checks supported types in Arrow.
+checkSchemaInArrow <- function(schema) {
+  stopifnot(inherits(schema, "structType"))
+
+  requireNamespace1 <- requireNamespace
+  if (!requireNamespace1("arrow", quietly = TRUE)) {
+    stop("'arrow' package should be installed.")
+  }
+
+  # Both cases below produce a corrupt value for unknown reason. It needs to be investigated.
+  if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
+    stop("Arrow optimization in R does not support float type yet.")
+  }
+  if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "BinaryType"))) {
+    stop("Arrow optimization in R does not support binary type yet.")
+  }
+  if (any(sapply(schema$fields(),
+                 function(x) startsWith(x$dataType.toString(),
+                 "ArrayType")))) {
+    stop("Arrow optimization in R does not support array type yet.")
+  }
+
+  # Arrow optimization in Spark does not yet support both cases below.
+  if (any(sapply(schema$fields(),
+                 function(x) startsWith(x$dataType.toString(),
+                 "StructType")))) {
+    stop("Arrow optimization in R does not support nested struct type yet.")
+  }
+  if (any(sapply(schema$fields(),
+                 function(x) startsWith(x$dataType.toString(),
+                 "MapType")))) {
+    stop("Arrow optimization in R does not support map type yet.")
+  }
+}
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 16d423f08f61..c60c9517189d 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -307,80 +307,6 @@ test_that("create DataFrame from RDD", {
   unsetHiveContext()
 })
 
-test_that("createDataFrame/collect Arrow optimization", {
-  skip_if_not_installed("arrow")
-
-  conf <- callJMethod(sparkSession, "conf")
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
-  tryCatch({
-    expected <- collect(createDataFrame(mtcars))
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    expect_equal(collect(createDataFrame(mtcars)), expected)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
-test_that("createDataFrame/collect Arrow optimization - many partitions (partition order test)", {
-  skip_if_not_installed("arrow")
-
-  conf <- callJMethod(sparkSession, "conf")
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    expect_equal(collect(createDataFrame(mtcars, numPartitions = 32)),
-                 collect(createDataFrame(mtcars, numPartitions = 1)))
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
-test_that("createDataFrame/collect Arrow optimization - type specification", {
-  skip_if_not_installed("arrow")
-  rdf <- data.frame(list(list(a = 1,
-                              b = "a",
-                              c = TRUE,
-                              d = 1.1,
-                              e = 1L,
-                              f = as.Date("1990-02-24"),
-                              g = as.POSIXct("1990-02-24 12:34:56"))))
-
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-  conf <- callJMethod(sparkSession, "conf")
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
-  tryCatch({
-    expected <- collect(createDataFrame(rdf))
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    expect_equal(collect(createDataFrame(rdf)), expected)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
 test_that("read/write csv as DataFrame", {
   if (windows_with_hadoop()) {
     csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
@@ -3317,105 +3243,6 @@ test_that("dapplyCollect() on DataFrame with a binary column", {
 
 })
 
-test_that("dapply() Arrow optimization", {
-  skip_if_not_installed("arrow")
-  df <- createDataFrame(mtcars)
-
-  conf <- callJMethod(sparkSession, "conf")
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
-  tryCatch({
-    ret <- dapply(df,
-    function(rdf) {
-      stopifnot(class(rdf) == "data.frame")
-      rdf
-    },
-    schema(df))
-    expected <- collect(ret)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    ret <- dapply(df,
-                  function(rdf) {
-                    stopifnot(class(rdf) == "data.frame")
-                    # mtcars' hp is more then 50.
-                    stopifnot(all(rdf$hp > 50))
-                    rdf
-                  },
-                  schema(df))
-    actual <- collect(ret)
-    expect_equal(actual, expected)
-    expect_equal(count(ret), nrow(mtcars))
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
-test_that("dapply() Arrow optimization - type specification", {
-  skip_if_not_installed("arrow")
-  # Note that regular dapply() seems not supporting date and timestamps
-  # whereas Arrow-optimized dapply() does.
-  rdf <- data.frame(list(list(a = 1,
-                              b = "a",
-                              c = TRUE,
-                              d = 1.1,
-                              e = 1L)))
-  # numPartitions are set to 8 intentionally to test empty partitions as well.
-  df <- createDataFrame(rdf, numPartitions = 8)
-
-  conf <- callJMethod(sparkSession, "conf")
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
-  tryCatch({
-    ret <- dapply(df, function(rdf) { rdf }, schema(df))
-    expected <- collect(ret)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    ret <- dapply(df, function(rdf) { rdf }, schema(df))
-    actual <- collect(ret)
-    expect_equal(actual, expected)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
-test_that("dapply() Arrow optimization - type specification (date and timestamp)", {
-  skip_if_not_installed("arrow")
-  rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
-                              b = as.POSIXct("1990-02-24 12:34:56"))))
-  df <- createDataFrame(rdf)
-
-  conf <- callJMethod(sparkSession, "conf")
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    ret <- dapply(df, function(rdf) { rdf }, schema(df))
-    expect_equal(collect(ret), rdf)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
 test_that("repartition by columns on DataFrame", {
   # The tasks here launch R workers with shuffles. So, we decrease the number of shuffle
   # partitions to reduce the number of the tasks to speed up the test. This is particularly
@@ -3645,120 +3472,6 @@ test_that("gapply() and gapplyCollect() on a DataFrame", {
   })
 })
 
-test_that("gapply() Arrow optimization", {
-  skip_if_not_installed("arrow")
-  df <- createDataFrame(mtcars)
-
-  conf <- callJMethod(sparkSession, "conf")
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
-  tryCatch({
-    ret <- gapply(df,
-                 "gear",
-                 function(key, grouped) {
-                   if (length(key) > 0) {
-                     stopifnot(is.numeric(key[[1]]))
-                   }
-                   stopifnot(class(grouped) == "data.frame")
-                   grouped
-                 },
-                 schema(df))
-    expected <- collect(ret)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    ret <- gapply(df,
-                 "gear",
-                 function(key, grouped) {
-                   if (length(key) > 0) {
-                     stopifnot(is.numeric(key[[1]]))
-                   }
-                   stopifnot(class(grouped) == "data.frame")
-                   stopifnot(length(colnames(grouped)) == 11)
-                   # mtcars' hp is more then 50.
-                   stopifnot(all(grouped$hp > 50))
-                   grouped
-                 },
-                 schema(df))
-    actual <- collect(ret)
-    expect_equal(actual, expected)
-    expect_equal(count(ret), nrow(mtcars))
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
-test_that("gapply() Arrow optimization - type specification", {
-  skip_if_not_installed("arrow")
-  # Note that regular gapply() seems not supporting date and timestamps
-  # whereas Arrow-optimized gapply() does.
-  rdf <- data.frame(list(list(a = 1,
-                              b = "a",
-                              c = TRUE,
-                              d = 1.1,
-                              e = 1L)))
-  df <- createDataFrame(rdf)
-
-  conf <- callJMethod(sparkSession, "conf")
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
-  tryCatch({
-    ret <- gapply(df,
-                 "a",
-                 function(key, grouped) { grouped }, schema(df))
-    expected <- collect(ret)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    ret <- gapply(df,
-                 "a",
-                 function(key, grouped) { grouped }, schema(df))
-    actual <- collect(ret)
-    expect_equal(actual, expected)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
-test_that("gapply() Arrow optimization - type specification (date and timestamp)", {
-  skip_if_not_installed("arrow")
-  rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
-                              b = as.POSIXct("1990-02-24 12:34:56"))))
-  df <- createDataFrame(rdf)
-
-  conf <- callJMethod(sparkSession, "conf")
-  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
-
-  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
-  tryCatch({
-    ret <- gapply(df,
-                  "a",
-                  function(key, grouped) { grouped }, schema(df))
-    expect_equal(collect(ret), rdf)
-  },
-  finally = {
-    # Resetting the conf back to default value
-    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
-  })
-})
-
 test_that("Window functions on a DataFrame", {
   df <- createDataFrame(list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")),
                         schema = c("key", "value"))
diff --git a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
new file mode 100644
index 000000000000..25a6d3c6ce36
--- /dev/null
+++ b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
@@ -0,0 +1,315 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+library(testthat)
+
+context("SparkSQL Arrow optimization")
+
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+
+test_that("createDataFrame/collect Arrow optimization", {
+  skip_if_not_installed("arrow")
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    expected <- collect(createDataFrame(mtcars))
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    expect_equal(collect(createDataFrame(mtcars)), expected)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("createDataFrame/collect Arrow optimization - many partitions (partition order test)", {
+  skip_if_not_installed("arrow")
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    expect_equal(collect(createDataFrame(mtcars, numPartitions = 32)),
+                 collect(createDataFrame(mtcars, numPartitions = 1)))
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("createDataFrame/collect Arrow optimization - type specification", {
+  skip_if_not_installed("arrow")
+  rdf <- data.frame(list(list(a = 1,
+                              b = "a",
+                              c = TRUE,
+                              d = 1.1,
+                              e = 1L,
+                              f = as.Date("1990-02-24"),
+                              g = as.POSIXct("1990-02-24 12:34:56"))))
+
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+  conf <- callJMethod(sparkSession, "conf")
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    expected <- collect(createDataFrame(rdf))
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    expect_equal(collect(createDataFrame(rdf)), expected)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("dapply() Arrow optimization", {
+  skip_if_not_installed("arrow")
+  df <- createDataFrame(mtcars)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    ret <- dapply(df,
+    function(rdf) {
+      stopifnot(class(rdf) == "data.frame")
+      rdf
+    },
+    schema(df))
+    expected <- collect(ret)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- dapply(df,
+                  function(rdf) {
+                    stopifnot(class(rdf) == "data.frame")
+                    # mtcars' hp is more then 50.
+                    stopifnot(all(rdf$hp > 50))
+                    rdf
+                  },
+                  schema(df))
+    actual <- collect(ret)
+    expect_equal(actual, expected)
+    expect_equal(count(ret), nrow(mtcars))
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("dapply() Arrow optimization - type specification", {
+  skip_if_not_installed("arrow")
+  # Note that regular dapply() seems not supporting date and timestamps
+  # whereas Arrow-optimized dapply() does.
+  rdf <- data.frame(list(list(a = 1,
+                              b = "a",
+                              c = TRUE,
+                              d = 1.1,
+                              e = 1L)))
+  # numPartitions are set to 8 intentionally to test empty partitions as well.
+  df <- createDataFrame(rdf, numPartitions = 8)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    ret <- dapply(df, function(rdf) { rdf }, schema(df))
+    expected <- collect(ret)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- dapply(df, function(rdf) { rdf }, schema(df))
+    actual <- collect(ret)
+    expect_equal(actual, expected)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("dapply() Arrow optimization - type specification (date and timestamp)", {
+  skip_if_not_installed("arrow")
+  rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
+                              b = as.POSIXct("1990-02-24 12:34:56"))))
+  df <- createDataFrame(rdf)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- dapply(df, function(rdf) { rdf }, schema(df))
+    expect_equal(collect(ret), rdf)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("gapply() Arrow optimization", {
+  skip_if_not_installed("arrow")
+  df <- createDataFrame(mtcars)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    ret <- gapply(df,
+                 "gear",
+                 function(key, grouped) {
+                   if (length(key) > 0) {
+                     stopifnot(is.numeric(key[[1]]))
+                   }
+                   stopifnot(class(grouped) == "data.frame")
+                   grouped
+                 },
+                 schema(df))
+    expected <- collect(ret)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- gapply(df,
+                 "gear",
+                 function(key, grouped) {
+                   if (length(key) > 0) {
+                     stopifnot(is.numeric(key[[1]]))
+                   }
+                   stopifnot(class(grouped) == "data.frame")
+                   stopifnot(length(colnames(grouped)) == 11)
+                   # mtcars' hp is more then 50.
+                   stopifnot(all(grouped$hp > 50))
+                   grouped
+                 },
+                 schema(df))
+    actual <- collect(ret)
+    expect_equal(actual, expected)
+    expect_equal(count(ret), nrow(mtcars))
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("gapply() Arrow optimization - type specification", {
+  skip_if_not_installed("arrow")
+  # Note that regular gapply() seems not supporting date and timestamps
+  # whereas Arrow-optimized gapply() does.
+  rdf <- data.frame(list(list(a = 1,
+                              b = "a",
+                              c = TRUE,
+                              d = 1.1,
+                              e = 1L)))
+  df <- createDataFrame(rdf)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "false")
+  tryCatch({
+    ret <- gapply(df,
+                 "a",
+                 function(key, grouped) { grouped }, schema(df))
+    expected <- collect(ret)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- gapply(df,
+                 "a",
+                 function(key, grouped) { grouped }, schema(df))
+    actual <- collect(ret)
+    expect_equal(actual, expected)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("gapply() Arrow optimization - type specification (date and timestamp)", {
+  skip_if_not_installed("arrow")
+  rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
+                              b = as.POSIXct("1990-02-24 12:34:56"))))
+  df <- createDataFrame(rdf)
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    ret <- gapply(df,
+                  "a",
+                  function(key, grouped) { grouped }, schema(df))
+    expect_equal(collect(ret), rdf)
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+test_that("Arrow optimization - unsupported types", {
+  skip_if_not_installed("arrow")
+
+  conf <- callJMethod(sparkSession, "conf")
+  arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]]
+  callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", "true")
+  tryCatch({
+    expect_error(checkSchemaInArrow(structType("a FLOAT")), "not support float type")
+    expect_error(checkSchemaInArrow(structType("a BINARY")), "not support binary type")
+    expect_error(checkSchemaInArrow(structType("a ARRAY<INT>")), "not support array type")
+    expect_error(checkSchemaInArrow(structType("a MAP<INT, INT>")), "not support map type")
+    expect_error(checkSchemaInArrow(structType("a STRUCT<a: INT>")),
+                 "not support nested struct type")
+  },
+  finally = {
+    callJMethod(conf, "set", "spark.sql.execution.arrow.enabled", arrowEnabled)
+  })
+})
+
+sparkR.session.stop()

From 8ab13065f626a12d18f6ac14a4dbf3d258919825 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 11 Mar 2019 11:15:58 +0900
Subject: [PATCH 0637/1072] [SPARK-23807][FOLLOW-UP][BUILD][TEST-HADOOP3.1] Add
 test-hadoop3.1 phrase
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Add `test-hadoop3.1` phrase to test Spark against Spark’s Hadoop 3.1 profile.

## How was this patch tested?
Tested on jenkins. This is output:
```
[info] Using build tool sbt with Hadoop profile hadoop3.1 under environment amplab_jenkins
...
[info] Building Spark (w/Hive 1.2.1) using SBT with these arguments:  -Phadoop-3.1 -Pkubernetes -Phive-thriftserver -Pkinesis-asl -Pyarn -Pspark-ganglia-lgpl -Phive -Pmesos test:package streaming-kinesis-asl-assembly/assembly
```
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/103282/console

Closes #24045 from wangyum/SPARK-23807.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/run-tests-jenkins.py | 2 ++
 dev/run-tests.py         | 1 +
 2 files changed, 3 insertions(+)

diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 77d751f49b9f..e01bcbef8f5a 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -173,6 +173,8 @@ def main():
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.6"
     if "test-hadoop2.7" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7"
+    if "test-hadoop3.1" in ghprb_pull_title:
+        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.1"
 
     build_display_name = os.environ["BUILD_DISPLAY_NAME"]
     build_url = os.environ["BUILD_URL"]
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 4e6f8971dfc0..122c5c6b2d3f 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -274,6 +274,7 @@ def get_hadoop_profiles(hadoop_version):
 
     sbt_maven_hadoop_profiles = {
         "hadoop2.7": ["-Phadoop-2.7"],
+        "hadoop3.1": ["-Phadoop-3.1"],
     }
 
     if hadoop_version in sbt_maven_hadoop_profiles:

From f0bde69ebc086df398405e3836dd8958c725ab7f Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Mon, 11 Mar 2019 13:44:45 +0900
Subject: [PATCH 0638/1072] [MINOR][SQL] Throw better exception for Encoder
 with tuple more than 22 elements

## What changes were proposed in this pull request?

This patch proposes to throw better exception with better error message when encoding to tuple which elements are more than 22.

**BEFORE**
```scala
scala> import org.apache.spark.sql.catalyst.encoders._
scala> val encoders = (0 to 22).map(_ => org.apache.spark.sql.Encoders.scalaInt.asInstanceOf[ExpressionEncoder[_]])
scala> ExpressionEncoder.tuple(encoders)
java.lang.ClassNotFoundException: scala.Tuple23
```

**AFTER**
```scala
scala> ExpressionEncoder.tuple(encoders)
java.lang.UnsupportedOperationException: Due to Scala's limited support of tuple, tuple with more than 22 elements are not supported.
```

## How was this patch tested?

Added UT.

Closes #24046 from HeartSaVioR/MINOR-throw-better-exception-for-tuple-more-than-22.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/catalyst/encoders/ExpressionEncoder.scala  | 6 +++++-
 .../sql/catalyst/encoders/ExpressionEncoderSuite.scala   | 9 +++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index bcc489656975..279972052a48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -80,7 +80,11 @@ object ExpressionEncoder {
    * name/positional binding is preserved.
    */
   def tuple(encoders: Seq[ExpressionEncoder[_]]): ExpressionEncoder[_] = {
-    // TODO: check if encoders length is more than 22 and throw exception for it.
+    if (encoders.length > 22) {
+      throw new UnsupportedOperationException("Due to Scala's limited support of tuple, " +
+        "tuple with more than 22 elements are not supported.")
+    }
+
     encoders.foreach(_.assertUnresolved())
 
     val cls = Utils.getContextOrSparkClassLoader.loadClass(s"scala.Tuple${encoders.size}")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 1b00506c55dd..86e43d71e460 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -370,6 +370,15 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
     assert(e.getMessage.contains("Cannot use null as map key"))
   }
 
+  test("throw exception for tuples with more than 22 elements") {
+    val encoders = (0 to 22).map(_ => Encoders.scalaInt.asInstanceOf[ExpressionEncoder[_]])
+
+    val e = intercept[UnsupportedOperationException] {
+      ExpressionEncoder.tuple(encoders)
+    }
+    assert(e.getMessage.contains("tuple with more than 22 elements are not supported"))
+  }
+
   private def encodeDecodeTest[T : ExpressionEncoder](
       input: T,
       testName: String): Unit = {

From 7a9537c338a7a947a51dcdc26a68bee20082e5bd Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 11 Mar 2019 13:34:14 +0800
Subject: [PATCH 0639/1072] [SPARK-21351][SQL] Remove the
 UpdateAttributeNullability rule from the optimizer

## What changes were proposed in this pull request?
This pr removed `UpdateAttributeNullability` from the optimizer because the same logic happens in the analyzer. See SPARK-26459(#23390) for more detailed discussion.

## How was this patch tested?
N/A

Closes #23508 from maropu/SPARK-21351.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/UpdateAttributeNullability.scala |  3 -
 .../sql/catalyst/optimizer/Optimizer.scala    |  1 -
 ...AttributeNullabilityInOptimizerSuite.scala | 58 -------------------
 3 files changed, 62 deletions(-)
 delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateAttributeNullabilityInOptimizerSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
index 8655decdcf27..2210e180bc75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
@@ -29,9 +29,6 @@ import org.apache.spark.sql.catalyst.rules.Rule
  * nullable field can be actually set as non-nullable, which cause illegal optimization
  * (e.g., NULL propagation) and wrong answers.
  * See SPARK-13484 and SPARK-13801 for the concrete queries of this case.
- *
- * This rule should be executed again at the end of optimization phase, as optimizer may change
- * some expressions and their nullabilities as well. See SPARK-21351 for more details.
  */
 object UpdateAttributeNullability extends Rule[LogicalPlan] {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 3c59e4d6c534..97a53f2bf986 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -214,7 +214,6 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       ColumnPruning,
       CollapseProject,
       RemoveNoopOperators) :+
-    Batch("UpdateNullability", Once, UpdateAttributeNullability) :+
     // This batch must be executed after the `RewriteSubquery` batch, which creates joins.
     Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateAttributeNullabilityInOptimizerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateAttributeNullabilityInOptimizerSuite.scala
deleted file mode 100644
index 6d6f799b830f..000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UpdateAttributeNullabilityInOptimizerSuite.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.analysis.UpdateAttributeNullability
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{CreateArray, GetArrayItem}
-import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
-import org.apache.spark.sql.catalyst.rules.RuleExecutor
-
-
-class UpdateAttributeNullabilityInOptimizerSuite extends PlanTest {
-
-  object Optimizer extends RuleExecutor[LogicalPlan] {
-    val batches =
-      Batch("Constant Folding", FixedPoint(10),
-          NullPropagation,
-          ConstantFolding,
-          BooleanSimplification,
-          SimplifyConditionals,
-          SimplifyBinaryComparison,
-          SimplifyExtractValueOps) ::
-      Batch("UpdateNullability", Once,
-        UpdateAttributeNullability) :: Nil
-  }
-
-  test("update nullability in AttributeReference")  {
-    val rel = LocalRelation('a.long.notNull)
-    // In the 'original' plans below, the Aggregate node produced by groupBy() has a
-    // nullable AttributeReference to `b`, because both array indexing and map lookup are
-    // nullable expressions. After optimization, the same attribute is now non-nullable,
-    // but the AttributeReference is not updated to reflect this. So, we need to update nullability
-    // by the `UpdateAttributeNullability` rule.
-    val original = rel
-      .select(GetArrayItem(CreateArray(Seq('a, 'a + 1L)), 0) as "b")
-      .groupBy($"b")("1")
-    val expected = rel.select('a as "b").groupBy($"b")("1").analyze
-    val optimized = Optimizer.execute(original.analyze)
-    comparePlans(optimized, expected)
-  }
-}

From 1b9fd67904671ea08526bfb7a97d694815d47665 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Mon, 11 Mar 2019 14:02:21 +0800
Subject: [PATCH 0640/1072] [SPARK-27096][SQL] Reconcile the join types between
 data frame and sql interface

## What changes were proposed in this pull request?
Currently in the grammar file, we have the joinType rule defined as following :
```
joinType
    : INNER?
   ....
   ....
    | LEFT SEMI
    | LEFT? ANTI
    ;
```
The keyword LEFT is optional for ANTI join even though its not optional for SEMI join. When
using data frame interface join type "anti" is not allowed. The allowed types are "left_anti" or
"leftanti" for anti joins. ~~In this PR, i am making the LEFT keyword mandatory for ANTI joins so
it aligns better with the LEFT SEMI join in the grammar file and also the join types allowed from dataframe api.~~

This PR makes LEFT optional for SEMI join in .g4 and add "semi" and "anti" join types from dataframe.

~~I have not opened any JIRA for this as probably we may need some discussion to see if
we are going to address this or not.~~

## How was this patch tested?
Modified the join type tests.

Closes #23982 from dilipbiswal/join_fix.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/parser/SqlBase.g4  |  2 +-
 .../apache/spark/sql/catalyst/plans/joinTypes.scala  |  8 ++++----
 .../spark/sql/catalyst/parser/PlanParserSuite.scala  |  1 +
 .../spark/sql/catalyst/plans/JoinTypesTest.scala     |  2 ++
 .../main/scala/org/apache/spark/sql/Dataset.scala    |  4 ++--
 .../sql-tests/results/describe-query.sql.out         |  2 +-
 .../org/apache/spark/sql/DataFrameJoinSuite.scala    | 12 ++++++++++++
 .../scala/org/apache/spark/sql/DatasetSuite.scala    | 12 +++++++++++-
 8 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d2164ec4cf1d..c1727998ca03 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -477,7 +477,7 @@ joinType
     : INNER?
     | CROSS
     | LEFT OUTER?
-    | LEFT SEMI
+    | LEFT? SEMI
     | RIGHT OUTER?
     | FULL OUTER?
     | LEFT? ANTI
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index 86cdc261329c..feea1d2177ef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -27,8 +27,8 @@ object JoinType {
     case "outer" | "full" | "fullouter" => FullOuter
     case "leftouter" | "left" => LeftOuter
     case "rightouter" | "right" => RightOuter
-    case "leftsemi" => LeftSemi
-    case "leftanti" => LeftAnti
+    case "leftsemi" | "semi" => LeftSemi
+    case "leftanti" | "anti" => LeftAnti
     case "cross" => Cross
     case _ =>
       val supported = Seq(
@@ -36,8 +36,8 @@ object JoinType {
         "outer", "full", "fullouter", "full_outer",
         "leftouter", "left", "left_outer",
         "rightouter", "right", "right_outer",
-        "leftsemi", "left_semi",
-        "leftanti", "left_anti",
+        "leftsemi", "left_semi", "semi",
+        "leftanti", "left_anti", "anti",
         "cross")
 
       throw new IllegalArgumentException(s"Unsupported join type '$typ'. " +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index f5da90f7cf0c..d628150d3ecf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -370,6 +370,7 @@ class PlanParserSuite extends AnalysisTest {
     test("full join", FullOuter, testAll)
     test("full outer join", FullOuter, testAll)
     test("left semi join", LeftSemi, testExistence)
+    test("semi join", LeftSemi, testExistence)
     test("left anti join", LeftAnti, testExistence)
     test("anti join", LeftAnti, testExistence)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
index d56f97970122..43221bf60ca3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
@@ -48,11 +48,13 @@ class JoinTypesTest extends SparkFunSuite {
   test("construct a LeftSemi type") {
     assert(JoinType("leftsemi") === LeftSemi)
     assert(JoinType("left_semi") === LeftSemi)
+    assert(JoinType("semi") === LeftSemi)
   }
 
   test("construct a LeftAnti type") {
     assert(JoinType("leftanti") === LeftAnti)
     assert(JoinType("left_anti") === LeftAnti)
+    assert(JoinType("anti") === LeftAnti)
   }
 
   test("construct a Cross type") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index bd1ae509cf54..24f5c817da93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -929,7 +929,7 @@ class Dataset[T] private[sql](
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
    * @param joinType Type of join to perform. Default `inner`. Must be one of:
    *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
-   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
+   *                 `right`, `right_outer`, `left_semi`, `semi`, `left_anti`, `anti`.
    *
    * @note If you perform a self-join using this function without aliasing the input
    * `DataFrame`s, you will NOT be able to reference any columns after the join, since
@@ -987,7 +987,7 @@ class Dataset[T] private[sql](
    * @param joinExprs Join expression.
    * @param joinType Type of join to perform. Default `inner`. Must be one of:
    *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
-   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
+   *                 `right`, `right_outer`, `left_semi`, `semi`, `left_anti`, `anti`.
    *
    * @group untypedrel
    * @since 2.0.0
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index 36cb31488477..9209ce124c7b 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -145,7 +145,7 @@ struct<>
 -- !query 13 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'insert' expecting {<EOF>, '(', ',', 'SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'JOIN', 'CROSS', 'INNER', 'LEFT', 'RIGHT', 'FULL', 'NATURAL', 'PIVOT', 'LATERAL', 'WINDOW', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'SORT', 'CLUSTER', 'DISTRIBUTE', 'ANTI'}(line 3, pos 5)
+mismatched input 'insert' expecting {<EOF>, '(', ',', 'SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'JOIN', 'CROSS', 'INNER', 'LEFT', 'SEMI', 'RIGHT', 'FULL', 'NATURAL', 'PIVOT', 'LATERAL', 'WINDOW', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'SORT', 'CLUSTER', 'DISTRIBUTE', 'ANTI'}(line 3, pos 5)
 
 == SQL ==
 DESCRIBE
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 6bd12cbf0135..ba120dca712d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -84,6 +84,18 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     checkAnswer(
       df.join(df2, Seq("int", "str"), "left_semi"),
       Row(1, "1", 2) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Seq("int", "str"), "semi"),
+      Row(1, "1", 2) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Seq("int", "str"), "left_anti"),
+      Row(3, "3", 4) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Seq("int", "str"), "anti"),
+      Row(3, "3", 4) :: Nil)
   }
 
   test("join - join using self join") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 4eef11567af8..db9f251e1dca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -470,9 +470,19 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     assert(e1.contains("Invalid join type in joinWith: " + LeftSemi.sql))
 
     val e2 = intercept[AnalysisException] {
+      ds1.joinWith(ds2, $"a.value" === $"b.value", "semi")
+    }.getMessage
+    assert(e2.contains("Invalid join type in joinWith: " + LeftSemi.sql))
+
+    val e3 = intercept[AnalysisException] {
       ds1.joinWith(ds2, $"a.value" === $"b.value", "left_anti")
     }.getMessage
-    assert(e2.contains("Invalid join type in joinWith: " + LeftAnti.sql))
+    assert(e3.contains("Invalid join type in joinWith: " + LeftAnti.sql))
+
+    val e4 = intercept[AnalysisException] {
+      ds1.joinWith(ds2, $"a.value" === $"b.value", "anti")
+    }.getMessage
+    assert(e4.contains("Invalid join type in joinWith: " + LeftAnti.sql))
   }
 
   test("groupBy function, keys") {

From 8114b63d56f51f7de2e0c27b1244d8d8e3127284 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 11 Mar 2019 20:40:03 +0800
Subject: [PATCH 0641/1072] [SPARK-27117][SQL] current_date/current_timestamp
 should not refer to columns with ansi parser mode

## What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/19559 .

It revisits https://issues.apache.org/jira/browse/SPARK-27117 , which should be an invalid use case according to the SQL standard.

`current_date/current_timestamp` are reserved keywords, if users want to access columns named `current_date/current_timestamp`, they should quote the name like ```select `current_date` from tbl```

If ansi mode is not enabled(which is the default), this PR won't introduce any changes.

## How was this patch tested?

a new test case

Closes #24039 from cloud-fan/current_datetime.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/parser/SqlBase.g4   |  3 ++-
 .../spark/sql/catalyst/parser/AstBuilder.scala    | 15 +++++++++++++++
 .../spark/sql/catalyst/parser/ParseDriver.scala   |  2 +-
 .../catalyst/parser/ExpressionParserSuite.scala   | 12 ++++++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index c1727998ca03..c61cda8b6005 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -603,7 +603,8 @@ valueExpression
     ;
 
 primaryExpression
-    : CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
+    : name=(CURRENT_DATE | CURRENT_TIMESTAMP)                                                  #currentDatetime
+    | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | CAST '(' expression AS dataType ')'                                                      #cast
     | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')'             #struct
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 111057c9a8b3..4cce1c1c70ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1212,6 +1212,21 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
   }
 
+  override def visitCurrentDatetime(ctx: CurrentDatetimeContext): Expression = withOrigin(ctx) {
+    if (conf.ansiParserEnabled) {
+      ctx.name.getType match {
+        case SqlBaseParser.CURRENT_DATE =>
+          CurrentDate()
+        case SqlBaseParser.CURRENT_TIMESTAMP =>
+          CurrentTimestamp()
+      }
+    } else {
+      // If the parser is not in ansi mode, we should return `UnresolvedAttribute`, in case there
+      // are columns named `CURRENT_DATE` or `CURRENT_TIMESTAMP`.
+      UnresolvedAttribute.quoted(ctx.name.getText)
+    }
+  }
+
   /**
    * Create a [[Cast]] expression.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 446cb0964188..1d5de0a35330 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -133,7 +133,7 @@ class CatalystSqlParser(conf: SQLConf) extends AbstractSqlParser {
 
 /** For test-only. */
 object CatalystSqlParser extends AbstractSqlParser {
-  val astBuilder = new AstBuilder(new SQLConf())
+  val astBuilder = new AstBuilder(SQLConf.get)
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 7541d9d089b2..7ecad5927dc5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -722,4 +722,16 @@ class ExpressionParserSuite extends PlanTest {
       }
     }
   }
+
+  test("current date/timestamp braceless expressions") {
+    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+      assertEqual("current_date", CurrentDate())
+      assertEqual("current_timestamp", CurrentTimestamp())
+    }
+
+    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "false") {
+      assertEqual("current_date", UnresolvedAttribute.quoted("current_date"))
+      assertEqual("current_timestamp", UnresolvedAttribute.quoted("current_timestamp"))
+    }
+  }
 }

From b98922abf2dadc6cc0620e86daca5d8eb3b105cb Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Mon, 11 Mar 2019 08:43:49 -0500
Subject: [PATCH 0642/1072] [SPARK-27116] Environment tab must sort Hadoop
 Configuration by default

## What changes were proposed in this pull request?

Environment tab in SparkUI do not have Hadoop Configuration sorted. All other tables in the same page like Spark Configrations, System Configuration etc are sorted by keys by default

## How was this patch tested?

Manually tested on SparkUI

Closes #24038 from ajithme/sqluisort.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkEnv.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 0ac6f08d8cca..a5064cc25113 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -438,7 +438,8 @@ object SparkEnv extends Logging {
 
     // Add Hadoop properties, it will not ignore configs including in Spark. Some spark
     // conf starting with "spark.hadoop" may overwrite it.
-    val hadoopProperties = hadoopConf.asScala.map(entry => (entry.getKey, entry.getValue)).toSeq
+    val hadoopProperties = hadoopConf.asScala
+      .map(entry => (entry.getKey, entry.getValue)).toSeq.sorted
     Map[String, Seq[(String, String)]](
       "JVM Information" -> jvmInformation,
       "Spark Properties" -> sparkProperties,

From d9978fb4e4d4de3a320b012373c18bd278462780 Mon Sep 17 00:00:00 2001
From: Jagadesh Kiran <jagadesh.n@in.verizon.com>
Date: Mon, 11 Mar 2019 08:53:09 -0500
Subject: [PATCH 0643/1072] [SPARK-26860][PYSPARK][SPARKR] Fix for RangeBetween
 and RowsBetween docs to be in sync with spark documentation

The docs describing RangeBetween & RowsBetween for pySpark & SparkR are not in sync with Spark description.

a. Edited PySpark and SparkR docs  and made description same for both RangeBetween and RowsBetween
b. created executable examples in both pySpark and SparkR documentation
c. Locally tested the patch for scala Style checks and UT for checking no testcase failures

Closes #23946 from jagadesh-kiran/master.

Authored-by: Jagadesh Kiran <jagadesh.n@in.verizon.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 R/pkg/R/WindowSpec.R                          | 41 ++++++++++++-
 python/pyspark/sql/window.py                  | 61 ++++++++++++++++++-
 .../apache/spark/sql/expressions/Window.scala |  4 +-
 3 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index ee7f4adf726e..037809cd0923 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -127,6 +127,16 @@ setMethod("orderBy",
 #' "0" means "current row", while "-1" means the row before the current row, and "5" means the
 #' fifth row after the current row.
 #'
+#' We recommend users use \code{Window.unboundedPreceding}, \code{Window.unboundedFollowing},
+#' and \code{Window.currentRow} to specify special boundary values, rather than using long values
+#' directly.
+#'
+#' A row based boundary is based on the position of the row within the partition.
+#' An offset indicates the number of rows above or below the current row, the frame for the
+#' current row starts or ends. For instance, given a row based sliding frame with a lower bound
+#' offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
+#' index 4 to index 6.
+#'
 #' @param x a WindowSpec
 #' @param start boundary start, inclusive.
 #'              The frame is unbounded if this is the minimum long value.
@@ -139,7 +149,14 @@ setMethod("orderBy",
 #' @family windowspec_method
 #' @examples
 #' \dontrun{
-#'   rowsBetween(ws, 0, 3)
+#'   id <- c(rep(1, 3), rep(2, 3), 3)
+#'   desc <- c('New', 'New', 'Good', 'New', 'Good', 'Good', 'New')
+#'   df <- data.frame(id, desc)
+#'   df <- createDataFrame(df)
+#'   w1 <- orderBy(windowPartitionBy('desc'), df$id)
+#'   w2 <- rowsBetween(w1, 0, 3)
+#'   df1 <- withColumn(df, "sum", over(sum(df$id), w2))
+#'   head(df1)
 #' }
 #' @note rowsBetween since 2.0.0
 setMethod("rowsBetween",
@@ -158,6 +175,19 @@ setMethod("rowsBetween",
 #' "current row", while "-1" means one off before the current row, and "5" means the five off
 #' after the current row.
 #'
+#' We recommend users use \code{Window.unboundedPreceding}, \code{Window.unboundedFollowing},
+#' and \code{Window.currentRow} to specify special boundary values, rather than using long values
+#' directly.
+#'
+#' A range-based boundary is based on the actual value of the ORDER BY
+#' expression(s). An offset is used to alter the value of the ORDER BY expression,
+#' for instance if the current ORDER BY expression has a value of 10 and the lower bound offset
+#' is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
+#' number of constraints on the ORDER BY expressions: there can be only one expression and this
+#' expression must have a numerical data type. An exception can be made when the offset is
+#' unbounded, because no value modification is needed, in this case multiple and non-numeric
+#' ORDER BY expression are allowed.
+#'
 #' @param x a WindowSpec
 #' @param start boundary start, inclusive.
 #'              The frame is unbounded if this is the minimum long value.
@@ -170,7 +200,14 @@ setMethod("rowsBetween",
 #' @family windowspec_method
 #' @examples
 #' \dontrun{
-#'   rangeBetween(ws, 0, 3)
+#'   id <- c(rep(1, 3), rep(2, 3), 3)
+#'   desc <- c('New', 'New', 'Good', 'New', 'Good', 'Good', 'New')
+#'   df <- data.frame(id, desc)
+#'   df <- createDataFrame(df)
+#'   w1 <- orderBy(windowPartitionBy('desc'), df$id)
+#'   w2 <- rangeBetween(w1, 0, 3)
+#'   df1 <- withColumn(df, "sum", over(sum(df$id), w2))
+#'   head(df1)
 #' }
 #' @note rangeBetween since 2.0.0
 setMethod("rangeBetween",
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index e76563dfaa9c..65c3ff5cec27 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -97,6 +97,32 @@ def rowsBetween(start, end):
         and ``Window.currentRow`` to specify special boundary values, rather than using integral
         values directly.
 
+        A row based boundary is based on the position of the row within the partition.
+        An offset indicates the number of rows above or below the current row, the frame for the
+        current row starts or ends. For instance, given a row based sliding frame with a lower bound
+        offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
+        index 4 to index 6.
+
+        >>> from pyspark.sql import Window
+        >>> from pyspark.sql import functions as func
+        >>> from pyspark.sql import SQLContext
+        >>> sc = SparkContext.getOrCreate()
+        >>> sqlContext = SQLContext(sc)
+        >>> tup = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
+        >>> df = sqlContext.createDataFrame(tup, ["id", "category"])
+        >>> window = Window.partitionBy("category").orderBy("id").rowsBetween(Window.currentRow, 1)
+        >>> df.withColumn("sum", func.sum("id").over(window)).show()
+        +---+--------+---+
+        | id|category|sum|
+        +---+--------+---+
+        |  1|       b|  3|
+        |  2|       b|  5|
+        |  3|       b|  3|
+        |  1|       a|  2|
+        |  1|       a|  3|
+        |  2|       a|  2|
+        +---+--------+---+
+
         :param start: boundary start, inclusive.
                       The frame is unbounded if this is ``Window.unboundedPreceding``, or
                       any value less than or equal to -9223372036854775808.
@@ -127,6 +153,35 @@ def rangeBetween(start, end):
         and ``Window.currentRow`` to specify special boundary values, rather than using integral
         values directly.
 
+        A range-based boundary is based on the actual value of the ORDER BY
+        expression(s). An offset is used to alter the value of the ORDER BY expression, for
+        instance if the current ORDER BY expression has a value of 10 and the lower bound offset
+        is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
+        number of constraints on the ORDER BY expressions: there can be only one expression and this
+        expression must have a numerical data type. An exception can be made when the offset is
+        unbounded, because no value modification is needed, in this case multiple and non-numeric
+        ORDER BY expression are allowed.
+
+        >>> from pyspark.sql import Window
+        >>> from pyspark.sql import functions as func
+        >>> from pyspark.sql import SQLContext
+        >>> sc = SparkContext.getOrCreate()
+        >>> sqlContext = SQLContext(sc)
+        >>> tup = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
+        >>> df = sqlContext.createDataFrame(tup, ["id", "category"])
+        >>> window = Window.partitionBy("category").orderBy("id").rangeBetween(Window.currentRow, 1)
+        >>> df.withColumn("sum", func.sum("id").over(window)).show()
+        +---+--------+---+
+        | id|category|sum|
+        +---+--------+---+
+        |  1|       b|  3|
+        |  2|       b|  5|
+        |  3|       b|  3|
+        |  1|       a|  4|
+        |  1|       a|  4|
+        |  2|       a|  2|
+        +---+--------+---+
+
         :param start: boundary start, inclusive.
                       The frame is unbounded if this is ``Window.unboundedPreceding``, or
                       any value less than or equal to max(-sys.maxsize, -9223372036854775808).
@@ -231,8 +286,12 @@ def rangeBetween(self, start, end):
 
 def _test():
     import doctest
+    import pyspark.sql.window
     SparkContext('local[4]', 'PythonTest')
-    (failure_count, test_count) = doctest.testmod()
+    globs = pyspark.sql.window.__dict__.copy()
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.window, globs=globs,
+        optionflags=doctest.NORMALIZE_WHITESPACE)
     if failure_count:
         sys.exit(-1)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index 3d8d931af218..9a4ad444479c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -175,8 +175,8 @@ object Window {
    * directly.
    *
    * A range-based boundary is based on the actual value of the ORDER BY
-   * expression(s). An offset is used to alter the value of the ORDER BY expression, for
-   * instance if the current order by expression has a value of 10 and the lower bound offset
+   * expression(s). An offset is used to alter the value of the ORDER BY expression,
+   * for instance if the current ORDER BY expression has a value of 10 and the lower bound offset
    * is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
    * number of constraints on the ORDER BY expressions: there can be only one expression and this
    * expression must have a numerical data type. An exception can be made when the offset is

From 31878c9daafb2c05e380b83cb4a04b32d5ff648f Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 11 Mar 2019 09:44:29 -0700
Subject: [PATCH 0644/1072] [SPARK-27119][SQL] Do not infer schema when reading
 Hive serde table with native data source

## What changes were proposed in this pull request?

In Spark 2.1, we hit a correctness bug. When reading a Hive serde parquet table with the native parquet data source, and the actual file schema doesn't match the table schema in Hive metastore(only upper/lower case difference), the query returns 0 results.

The reason is that, the parquet reader is case sensitive. If we push down filters with column names that don't match the file physical schema case-sensitively, no data will be returned.

To fix this bug, there were 2 solutions proposed at that time:
1. Add a config to optionally disable parquet filter pushdown, and make parquet column pruning case insensitive.
https://github.com/apache/spark/pull/16797

2. Infer the actual schema from data files, when reading Hive serde table with native data source. A config is provided to disable it.
https://github.com/apache/spark/pull/17229

Solution 2 was accepted and merged to Spark 2.1.1

In Spark 2.4, we refactored the parquet data source a little:
1. do parquet filter pushdown with the actual file schema.
https://github.com/apache/spark/pull/21696

2. make parquet filter pushdown case insensitive.
https://github.com/apache/spark/pull/22197

3. make parquet column pruning case insensitive.
https://github.com/apache/spark/pull/22148

With these patches, the correctness bug in Spark 2.1 no longer exists, and the schema inference becomes unnecessary.

To be safe, this PR just changes the default value to NEVER_INFER, so that users can set it back to INFER_AND_SAVE. If we don't receive any bug reports for it, we can remove the related code in the next release.

## How was this patch tested?

existing tests

Closes #24041 from cloud-fan/infer.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 docs/sql-migration-guide-upgrade.md            |  2 ++
 .../apache/spark/sql/internal/SQLConf.scala    | 18 ++++++++----------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index ca604b965972..2c05527c49ac 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -101,6 +101,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, the `current_timestamp` function returns a timestamp with millisecond resolution only. Since Spark 3.0, the function can return the result with microsecond resolution if the underlying clock available on the system offers such resolution.
 
+  - In Spark version 2.4 abd earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2271bacb35bd..6f483a763808 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -584,19 +584,17 @@ object SQLConf {
   }
 
   val HIVE_CASE_SENSITIVE_INFERENCE = buildConf("spark.sql.hive.caseSensitiveInferenceMode")
-    .doc("Sets the action to take when a case-sensitive schema cannot be read from a Hive " +
-      "table's properties. Although Spark SQL itself is not case-sensitive, Hive compatible file " +
-      "formats such as Parquet are. Spark SQL must use a case-preserving schema when querying " +
-      "any table backed by files containing case-sensitive field names or queries may not return " +
-      "accurate results. Valid options include INFER_AND_SAVE (the default mode-- infer the " +
-      "case-sensitive schema from the underlying data files and write it back to the table " +
-      "properties), INFER_ONLY (infer the schema but don't attempt to write it to the table " +
-      "properties) and NEVER_INFER (fallback to using the case-insensitive metastore schema " +
-      "instead of inferring).")
+    .internal()
+    .doc("Sets the action to take when a case-sensitive schema cannot be read from a Hive Serde " +
+      "table's properties when reading the table with Spark native data sources. Valid options " +
+      "include INFER_AND_SAVE (infer the case-sensitive schema from the underlying data files " +
+      "and write it back to the table properties), INFER_ONLY (infer the schema but don't " +
+      "attempt to write it to the table properties) and NEVER_INFER (the default mode-- fallback " +
+      "to using the case-insensitive metastore schema instead of inferring).")
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .checkValues(HiveCaseSensitiveInferenceMode.values.map(_.toString))
-    .createWithDefault(HiveCaseSensitiveInferenceMode.INFER_AND_SAVE.toString)
+    .createWithDefault(HiveCaseSensitiveInferenceMode.NEVER_INFER.toString)
 
   val OPTIMIZER_METADATA_ONLY = buildConf("spark.sql.optimizer.metadataOnly")
     .internal()

From d4542a8ba80c833f8ca9a1d2a7fd25e5a3792c74 Mon Sep 17 00:00:00 2001
From: "chandulal.kavar" <cckavar@gmail.com>
Date: Mon, 11 Mar 2019 10:41:31 -0700
Subject: [PATCH 0645/1072] =?UTF-8?q?[SPARK-27061][K8S]=20Expose=20Driver?=
 =?UTF-8?q?=20UI=20port=20on=20driver=20service=20to=20access=20=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Expose Spark UI port on driver service to access logs from service.

## How was this patch tested?

The patch was tested using unit tests being contributed as a part of the PR

Closes #23990 from chandulal/SPARK-27061.

Authored-by: chandulal.kavar <cckavar@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../k8s/features/DriverServiceFeatureStep.scala      |  6 ++++++
 .../k8s/features/DriverServiceFeatureStepSuite.scala | 12 ++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
index cec8769b8378..925bcdf3e637 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
@@ -54,6 +54,7 @@ private[spark] class DriverServiceFeatureStep(
     config.DRIVER_PORT.key, DEFAULT_DRIVER_PORT)
   private val driverBlockManagerPort = kubernetesConf.sparkConf.getInt(
     config.DRIVER_BLOCK_MANAGER_PORT.key, DEFAULT_BLOCKMANAGER_PORT)
+  private val  driverUIPort = kubernetesConf.get(config.UI.UI_PORT)
 
   override def configurePod(pod: SparkPod): SparkPod = pod
 
@@ -82,6 +83,11 @@ private[spark] class DriverServiceFeatureStep(
           .withPort(driverBlockManagerPort)
           .withNewTargetPort(driverBlockManagerPort)
           .endPort()
+        .addNewPort()
+          .withName(UI_PORT_NAME)
+          .withPort(driverUIPort)
+          .withNewTargetPort(driverUIPort)
+          .endPort()
         .endSpec()
       .build()
     Seq(driverService)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
index fbd99b73b37a..9068289bab58 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.ManualClock
 
 class DriverServiceFeatureStepSuite extends SparkFunSuite {
@@ -38,10 +39,11 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     "label1key" -> "label1value",
     "label2key" -> "label2value")
 
-  test("Headless service has a port for the driver RPC and the block manager.") {
+  test("Headless service has a port for the driver RPC, the block manager and driver ui.") {
     val sparkConf = new SparkConf(false)
       .set(DRIVER_PORT, 9000)
       .set(DRIVER_BLOCK_MANAGER_PORT, 8080)
+      .set(UI_PORT, 4080)
     val kconf = KubernetesTestConf.createDriverConf(
       sparkConf = sparkConf,
       labels = DRIVER_LABELS)
@@ -56,6 +58,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     verifyService(
       9000,
       8080,
+      4080,
       s"${kconf.resourceNamePrefix}${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
       driverService)
   }
@@ -85,6 +88,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     verifyService(
       DEFAULT_DRIVER_PORT,
       DEFAULT_BLOCKMANAGER_PORT,
+      UI_PORT.defaultValue.get,
       s"${kconf.resourceNamePrefix}${DriverServiceFeatureStep.DRIVER_SVC_POSTFIX}",
       resolvedService)
     val additionalProps = configurationStep.getAdditionalPodSystemProperties()
@@ -152,6 +156,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
   private def verifyService(
       driverPort: Int,
       blockManagerPort: Int,
+      drierUIPort: Int,
       expectedServiceName: String,
       service: Service): Unit = {
     assert(service.getMetadata.getName === expectedServiceName)
@@ -159,7 +164,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     DRIVER_LABELS.foreach { case (k, v) =>
       assert(service.getSpec.getSelector.get(k) === v)
     }
-    assert(service.getSpec.getPorts.size() === 2)
+    assert(service.getSpec.getPorts.size() === 3)
     val driverServicePorts = service.getSpec.getPorts.asScala
     assert(driverServicePorts.head.getName === DRIVER_PORT_NAME)
     assert(driverServicePorts.head.getPort.intValue() === driverPort)
@@ -167,5 +172,8 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     assert(driverServicePorts(1).getName === BLOCK_MANAGER_PORT_NAME)
     assert(driverServicePorts(1).getPort.intValue() === blockManagerPort)
     assert(driverServicePorts(1).getTargetPort.getIntVal === blockManagerPort)
+    assert(driverServicePorts(2).getName === UI_PORT_NAME)
+    assert(driverServicePorts(2).getPort.intValue() === drierUIPort)
+    assert(driverServicePorts(2).getTargetPort.getIntVal === drierUIPort)
   }
 }

From f1e223bfa37d7b42666af22c5ced38923807ef0a Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 11 Mar 2019 12:27:25 -0700
Subject: [PATCH 0646/1072] [SPARK-27004][CORE] Remove stale HTTP auth code.

This code is from the era when Spark used an HTTP server to distribute
dependencies, which is long gone. Nowadays it only causes problems when
someone is using dependencies from an HTTP server with Spark auth on.

Closes #24033 from vanzin/SPARK-27004.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/SecurityManager.scala    | 26 -----------------
 .../scala/org/apache/spark/util/Utils.scala   | 28 +------------------
 2 files changed, 1 insertion(+), 53 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 0661b3067122..26b18564be77 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -93,25 +93,6 @@ private[spark] class SecurityManager(
     "; users  with modify permissions: " + modifyAcls.toString() +
     "; groups with modify permissions: " + modifyAclsGroups.toString())
 
-  // Set our own authenticator to properly negotiate user/password for HTTP connections.
-  // This is needed by the HTTP client fetching from the HttpServer. Put here so its
-  // only set once.
-  if (authOn) {
-    Authenticator.setDefault(
-      new Authenticator() {
-        override def getPasswordAuthentication(): PasswordAuthentication = {
-          var passAuth: PasswordAuthentication = null
-          val userInfo = getRequestingURL().getUserInfo()
-          if (userInfo != null) {
-            val  parts = userInfo.split(":", 2)
-            passAuth = new PasswordAuthentication(parts(0), parts(1).toCharArray())
-          }
-          return passAuth
-        }
-      }
-    )
-  }
-
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf)
   // the default SSL configuration - it will be used by all communication layers unless overwritten
   private val defaultSSLOptions =
@@ -291,13 +272,6 @@ private[spark] class SecurityManager(
     sparkConf.get(Network.NETWORK_CRYPTO_ENABLED) || sparkConf.get(SASL_ENCRYPTION_ENABLED)
   }
 
-  /**
-   * Gets the user used for authenticating HTTP connections.
-   * For now use a single hardcoded user.
-   * @return the HTTP user as a String
-   */
-  def getHttpUser(): String = "sparkHttpUser"
-
   /**
    * Gets the user used for authenticating SASL connections.
    * For now use a single hardcoded user.
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index cade0dd88fc7..af5323876ccc 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -368,22 +368,6 @@ private[spark] object Utils extends Logging {
            """.stripMargin)
   }
 
-  /**
-   * Construct a URI container information used for authentication.
-   * This also sets the default authenticator to properly negotiation the
-   * user/password based on the URI.
-   *
-   * Note this relies on the Authenticator.setDefault being set properly to decode
-   * the user name and password. This is currently set in the SecurityManager.
-   */
-  def constructURIForAuthentication(uri: URI, securityMgr: SecurityManager): URI = {
-    val userCred = securityMgr.getSecretKey()
-    if (userCred == null) throw new Exception("Secret key is null with authentication on")
-    val userInfo = securityMgr.getHttpUser()  + ":" + userCred
-    new URI(uri.getScheme(), userInfo, uri.getHost(), uri.getPort(), uri.getPath(),
-      uri.getQuery(), uri.getFragment())
-  }
-
   /**
    * A file name may contain some invalid URI characters, such as " ". This method will convert the
    * file name to a raw path accepted by `java.net.URI(String)`.
@@ -654,17 +638,7 @@ private[spark] object Utils extends Logging {
         val is = Channels.newInputStream(source)
         downloadFile(url, is, targetFile, fileOverwrite)
       case "http" | "https" | "ftp" =>
-        var uc: URLConnection = null
-        if (securityMgr.isAuthenticationEnabled()) {
-          logDebug("fetchFile with security enabled")
-          val newuri = constructURIForAuthentication(uri, securityMgr)
-          uc = newuri.toURL().openConnection()
-          uc.setAllowUserInteraction(false)
-        } else {
-          logDebug("fetchFile not using security")
-          uc = new URL(url).openConnection()
-        }
-
+        val uc = new URL(url).openConnection()
         val timeoutMs =
           conf.getTimeAsSeconds("spark.files.fetchTimeout", "60s").toInt * 1000
         uc.setConnectTimeout(timeoutMs)

From 064604aaa768de4c33425249be9c73948b2aeac9 Mon Sep 17 00:00:00 2001
From: sychen <sychen@ctrip.com>
Date: Mon, 11 Mar 2019 15:16:16 -0700
Subject: [PATCH 0647/1072] [SPARK-27073][CORE] Fix a race condition when
 handling of IdleStateEvent

## What changes were proposed in this pull request?

When TransportChannelHandler processes IdleStateEvent, it first calculates whether the last request time has timed out.
At this time, TransportClient.sendRpc initiates a request.
TransportChannelHandler gets responseHandler.numOutstandingRequests() > 0, causing the normal connection to be closed.

## How was this patch tested?

Closes #23989 from cxzl25/fix_IdleStateEvent_timeout.

Authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/network/client/TransportResponseHandler.java  | 2 +-
 .../apache/spark/network/server/TransportChannelHandler.java   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index 596b0ea5dba9..2f143f77fa4a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -91,7 +91,7 @@ public void removeRpcRequest(long requestId) {
   }
 
   public void addStreamCallback(String streamId, StreamCallback callback) {
-    timeOfLastRequestNs.set(System.nanoTime());
+    updateTimeOfLastRequest();
     streamCallbacks.offer(ImmutablePair.of(streamId, callback));
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
index ca81099c4d5c..31371f6970ff 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -155,10 +155,11 @@ public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exc
       // To avoid a race between TransportClientFactory.createClient() and this code which could
       // result in an inactive client being returned, this needs to run in a synchronized block.
       synchronized (this) {
+        boolean hasInFlightRequests = responseHandler.numOutstandingRequests() > 0;
         boolean isActuallyOverdue =
           System.nanoTime() - responseHandler.getTimeOfLastRequestNs() > requestTimeoutNs;
         if (e.state() == IdleState.ALL_IDLE && isActuallyOverdue) {
-          if (responseHandler.numOutstandingRequests() > 0) {
+          if (hasInFlightRequests) {
             String address = getRemoteAddress(ctx.channel());
             logger.error("Connection to {} has been quiet for {} ms while there are outstanding " +
               "requests. Assuming connection is dead; please adjust spark.network.timeout if " +

From fd1852b344f4e8c7596d303d99fa6b8883e52309 Mon Sep 17 00:00:00 2001
From: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Date: Mon, 11 Mar 2019 16:26:47 -0700
Subject: [PATCH 0648/1072] [MINOR][DOC] Fix
 spark.kubernetes.executor.label.[LabelName] parameter meaning

## What changes were proposed in this pull request?

It would be better to change the explanation to this spark.kubernetes.executor.label.[LabelName].
Before:
    Note that Spark also adds its own labels to the **driver pod** for bookkeeping purposes.

After modification:
   Note that Spark also adds its own labels to the **executor pod** for bookkeeping purposes.

Closes #24054 from hehuiyuan/hehuiyuan-patch-3.

Authored-by: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/running-on-kubernetes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index d82dc64d47d1..78b7c517513d 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -702,7 +702,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>
     Add the label specified by <code>LabelName</code> to the executor pods.
     For example, <code>spark.kubernetes.executor.label.something=true</code>.
-    Note that Spark also adds its own labels to the driver pod
+    Note that Spark also adds its own labels to the executor pod
     for bookkeeping purposes.
   </td>
 </tr>

From 3725b1324f731d57dc776c256bc1a100ec9e6cd0 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 12 Mar 2019 08:45:29 +0900
Subject: [PATCH 0649/1072] [SPARK-26923][SQL][R] Refactor ArrowRRunner and
 RRunner to share one BaseRRunner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

This PR proposes to have one base R runner.

In the high level,

Previously, it had `ArrowRRunner` and it inherited `RRunner`:

```
└── RRunner
    └── ArrowRRunner
```

After this PR, now it has a `BaseRRunner`, and `ArrowRRunner` and `RRunner` inherit `BaseRRunner`:

```
└── BaseRRunner
    ├── ArrowRRunner
    └── RRunner
```

This way is consistent with Python's.

In more details, see below:

```scala
class BaseRRunner[IN, OUT] {

  def compute: Iterator[OUT] = {
    ...
    newWriterThread(...).start()
    ...
    newReaderIterator(...)
    ...
  }

  // Make a thread that writes data from JVM to R process
  abstract protected def newWriterThread(..., iter: Iterator[IN], ...): WriterThread

  // Make an iterator that reads data from the R process to JVM
  abstract protected def newReaderIterator(...): ReaderIterator

  abstract class WriterThread(..., iter: Iterator[IN], ...) extends Thread {
    override def run(): Unit {
      ...
      writeIteratorToStream(...)
      ...
    }

    // Actually writing logic to the socket stream.
    abstract protected def writeIteratorToStream(dataOut: DataOutputStream): Unit
  }

  abstract class ReaderIterator extends Iterator[OUT] {
    override def hasNext(): Boolean = {
      ...
      read(...)
      ...
    }

    override def next(): OUT = {
      ...
      hasNext()
      ...
    }

    // Actually reading logic from the socket stream.
    abstract protected def read(...): OUT
  }
}
```

```scala
case [Arrow]RRunner extends BaseRRunner {
  override def newWriterThread(...) {
    new WriterThread(...) {
      override def writeIteratorToStream(...) {
        ...
      }
    }
  }

  override def newReaderIterator(...) {
    new ReaderIterator(...) {
      override def read(...) {
        ...
      }
    }
  }
}
```

## How was this patch tested?

Manually tested and existing tests should cover.

Closes #23977 from HyukjinKwon/SPARK-26923.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/api/r/BaseRRunner.scala  | 345 +++++++++++++
 .../scala/org/apache/spark/api/r/RRDD.scala   |   2 +-
 .../org/apache/spark/api/r/RRunner.scala      | 478 +++++-------------
 .../apache/spark/sql/execution/objects.scala  |  24 +-
 .../spark/sql/execution/r/ArrowRRunner.scala  | 140 +++--
 .../execution/r/MapPartitionsRWrapper.scala   |   2 +-
 6 files changed, 542 insertions(+), 449 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala

diff --git a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
new file mode 100644
index 000000000000..f96c5215cf0a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
@@ -0,0 +1,345 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.r
+
+import java.io._
+import java.net.{InetAddress, ServerSocket}
+import java.util.Arrays
+
+import scala.io.Source
+import scala.util.Try
+
+import org.apache.spark._
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.BUFFER_SIZE
+import org.apache.spark.internal.config.R._
+import org.apache.spark.util.Utils
+
+/**
+ * A helper class to run R UDFs in Spark.
+ */
+private[spark] abstract class BaseRRunner[IN, OUT](
+    func: Array[Byte],
+    deserializer: String,
+    serializer: String,
+    packageNames: Array[Byte],
+    broadcastVars: Array[Broadcast[Object]],
+    numPartitions: Int,
+    isDataFrame: Boolean,
+    colNames: Array[String],
+    mode: Int)
+  extends Logging {
+  protected var bootTime: Double = _
+  protected var dataStream: DataInputStream = _
+
+  def compute(
+      inputIterator: Iterator[IN],
+      partitionIndex: Int): Iterator[OUT] = {
+    // Timing start
+    bootTime = System.currentTimeMillis / 1000.0
+
+    // we expect two connections
+    val serverSocket = new ServerSocket(0, 2, InetAddress.getByName("localhost"))
+    val listenPort = serverSocket.getLocalPort()
+
+    // The stdout/stderr is shared by multiple tasks, because we use one daemon
+    // to launch child process as worker.
+    val errThread = BaseRRunner.createRWorker(listenPort)
+
+    // We use two sockets to separate input and output, then it's easy to manage
+    // the lifecycle of them to avoid deadlock.
+    // TODO: optimize it to use one socket
+
+    // the socket used to send out the input of task
+    serverSocket.setSoTimeout(10000)
+    dataStream = try {
+      val inSocket = serverSocket.accept()
+      BaseRRunner.authHelper.authClient(inSocket)
+      newWriterThread(inSocket.getOutputStream(), inputIterator, partitionIndex).start()
+
+      // the socket used to receive the output of task
+      val outSocket = serverSocket.accept()
+      BaseRRunner.authHelper.authClient(outSocket)
+      val inputStream = new BufferedInputStream(outSocket.getInputStream)
+      new DataInputStream(inputStream)
+    } finally {
+      serverSocket.close()
+    }
+
+    try {
+      newReaderIterator(dataStream, errThread)
+    } catch {
+      case e: Exception =>
+        throw new SparkException("R computation failed with\n " + errThread.getLines(), e)
+    }
+  }
+
+  /**
+   * Creates an iterator that reads data from R process.
+   */
+  protected def newReaderIterator(
+      dataStream: DataInputStream, errThread: BufferedStreamThread): ReaderIterator
+
+  /**
+   * Start a thread to write RDD data to the R process.
+   */
+  protected def newWriterThread(
+      output: OutputStream,
+      iter: Iterator[IN],
+      partitionIndex: Int): WriterThread
+
+  abstract class ReaderIterator(
+      stream: DataInputStream,
+      errThread: BufferedStreamThread)
+    extends Iterator[OUT] {
+
+    private var nextObj: OUT = _
+    // eos should be marked as true when the stream is ended.
+    protected var eos = false
+
+    override def hasNext: Boolean = nextObj != null || {
+      if (!eos) {
+        nextObj = read()
+        hasNext
+      } else {
+        false
+      }
+    }
+
+    override def next(): OUT = {
+      if (hasNext) {
+        val obj = nextObj
+        nextObj = null.asInstanceOf[OUT]
+        obj
+      } else {
+        Iterator.empty.next()
+      }
+    }
+
+    /**
+     * Reads next object from the stream.
+     * When the stream reaches end of data, needs to process the following sections,
+     * and then returns null.
+     */
+    protected def read(): OUT
+  }
+
+  /**
+   * The thread responsible for writing the iterator to the R process.
+   */
+  abstract class WriterThread(
+      output: OutputStream,
+      iter: Iterator[IN],
+      partitionIndex: Int)
+    extends Thread("writer for R") {
+
+    private val env = SparkEnv.get
+    private val taskContext = TaskContext.get()
+    private val bufferSize = System.getProperty(BUFFER_SIZE.key,
+      BUFFER_SIZE.defaultValueString).toInt
+    private val stream = new BufferedOutputStream(output, bufferSize)
+    protected lazy val dataOut = new DataOutputStream(stream)
+    protected lazy val printOut = new PrintStream(stream)
+
+    override def run(): Unit = {
+      try {
+        SparkEnv.set(env)
+        TaskContext.setTaskContext(taskContext)
+        dataOut.writeInt(partitionIndex)
+
+        SerDe.writeString(dataOut, deserializer)
+        SerDe.writeString(dataOut, serializer)
+
+        dataOut.writeInt(packageNames.length)
+        dataOut.write(packageNames)
+
+        dataOut.writeInt(func.length)
+        dataOut.write(func)
+
+        dataOut.writeInt(broadcastVars.length)
+        broadcastVars.foreach { broadcast =>
+          // TODO(shivaram): Read a Long in R to avoid this cast
+          dataOut.writeInt(broadcast.id.toInt)
+          // TODO: Pass a byte array from R to avoid this cast ?
+          val broadcastByteArr = broadcast.value.asInstanceOf[Array[Byte]]
+          dataOut.writeInt(broadcastByteArr.length)
+          dataOut.write(broadcastByteArr)
+        }
+
+        dataOut.writeInt(numPartitions)
+        dataOut.writeInt(mode)
+
+        if (isDataFrame) {
+          SerDe.writeObject(dataOut, colNames, jvmObjectTracker = null)
+        }
+
+        if (!iter.hasNext) {
+          dataOut.writeInt(0)
+        } else {
+          dataOut.writeInt(1)
+        }
+
+        writeIteratorToStream(dataOut)
+
+        stream.flush()
+      } catch {
+        // TODO: We should propagate this error to the task thread
+        case e: Exception =>
+          logError("R Writer thread got an exception", e)
+      } finally {
+        Try(output.close())
+      }
+    }
+
+    /**
+     * Writes input data to the stream connected to the R worker.
+     */
+    protected def writeIteratorToStream(dataOut: DataOutputStream): Unit
+  }
+}
+
+private[spark] object SpecialLengths {
+  val TIMING_DATA = -1
+}
+
+private[spark] object RRunnerModes {
+  val RDD = 0
+  val DATAFRAME_DAPPLY = 1
+  val DATAFRAME_GAPPLY = 2
+}
+
+private[spark] class BufferedStreamThread(
+    in: InputStream,
+    name: String,
+    errBufferSize: Int) extends Thread(name) with Logging {
+  val lines = new Array[String](errBufferSize)
+  var lineIdx = 0
+  override def run() {
+    for (line <- Source.fromInputStream(in).getLines) {
+      synchronized {
+        lines(lineIdx) = line
+        lineIdx = (lineIdx + 1) % errBufferSize
+      }
+      logInfo(line)
+    }
+  }
+
+  def getLines(): String = synchronized {
+    (0 until errBufferSize).filter { x =>
+      lines((x + lineIdx) % errBufferSize) != null
+    }.map { x =>
+      lines((x + lineIdx) % errBufferSize)
+    }.mkString("\n")
+  }
+}
+
+private[r] object BaseRRunner {
+  // Because forking processes from Java is expensive, we prefer to launch
+  // a single R daemon (daemon.R) and tell it to fork new workers for our tasks.
+  // This daemon currently only works on UNIX-based systems now, so we should
+  // also fall back to launching workers (worker.R) directly.
+  private[this] var errThread: BufferedStreamThread = _
+  private[this] var daemonChannel: DataOutputStream = _
+
+  private lazy val authHelper = {
+    val conf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
+    new RAuthHelper(conf)
+  }
+
+  /**
+   * Start a thread to print the process's stderr to ours
+   */
+  private def startStdoutThread(proc: Process): BufferedStreamThread = {
+    val BUFFER_SIZE = 100
+    val thread = new BufferedStreamThread(proc.getInputStream, "stdout reader for R", BUFFER_SIZE)
+    thread.setDaemon(true)
+    thread.start()
+    thread
+  }
+
+  private def createRProcess(port: Int, script: String): BufferedStreamThread = {
+    // "spark.sparkr.r.command" is deprecated and replaced by "spark.r.command",
+    // but kept here for backward compatibility.
+    val sparkConf = SparkEnv.get.conf
+    var rCommand = sparkConf.get(SPARKR_COMMAND)
+    rCommand = sparkConf.get(R_COMMAND).orElse(Some(rCommand)).get
+
+    val rConnectionTimeout = sparkConf.get(R_BACKEND_CONNECTION_TIMEOUT)
+    val rOptions = "--vanilla"
+    val rLibDir = RUtils.sparkRPackagePath(isDriver = false)
+    val rExecScript = rLibDir(0) + "/SparkR/worker/" + script
+    val pb = new ProcessBuilder(Arrays.asList(rCommand, rOptions, rExecScript))
+    // Unset the R_TESTS environment variable for workers.
+    // This is set by R CMD check as startup.Rs
+    // (http://svn.r-project.org/R/trunk/src/library/tools/R/testing.R)
+    // and confuses worker script which tries to load a non-existent file
+    pb.environment().put("R_TESTS", "")
+    pb.environment().put("SPARKR_RLIBDIR", rLibDir.mkString(","))
+    pb.environment().put("SPARKR_WORKER_PORT", port.toString)
+    pb.environment().put("SPARKR_BACKEND_CONNECTION_TIMEOUT", rConnectionTimeout.toString)
+    pb.environment().put("SPARKR_SPARKFILES_ROOT_DIR", SparkFiles.getRootDirectory())
+    pb.environment().put("SPARKR_IS_RUNNING_ON_WORKER", "TRUE")
+    pb.environment().put("SPARKR_WORKER_SECRET", authHelper.secret)
+    pb.redirectErrorStream(true)  // redirect stderr into stdout
+    val proc = pb.start()
+    val errThread = startStdoutThread(proc)
+    errThread
+  }
+
+  /**
+   * ProcessBuilder used to launch worker R processes.
+   */
+  def createRWorker(port: Int): BufferedStreamThread = {
+    val useDaemon = SparkEnv.get.conf.getBoolean("spark.sparkr.use.daemon", true)
+    if (!Utils.isWindows && useDaemon) {
+      synchronized {
+        if (daemonChannel == null) {
+          // we expect one connections
+          val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost"))
+          val daemonPort = serverSocket.getLocalPort
+          errThread = createRProcess(daemonPort, "daemon.R")
+          // the socket used to send out the input of task
+          serverSocket.setSoTimeout(10000)
+          val sock = serverSocket.accept()
+          try {
+            authHelper.authClient(sock)
+            daemonChannel = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream))
+          } finally {
+            serverSocket.close()
+          }
+        }
+        try {
+          daemonChannel.writeInt(port)
+          daemonChannel.flush()
+        } catch {
+          case e: IOException =>
+            // daemon process died
+            daemonChannel.close()
+            daemonChannel = null
+            errThread = null
+            // fail the current task, retry by scheduler
+            throw e
+        }
+        errThread
+      }
+    } else {
+      createRProcess(port, "worker.R")
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
index 4a59c3e20994..07f84057abd5 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
@@ -43,7 +43,7 @@ private abstract class BaseRRDD[T: ClassTag, U: ClassTag](
   override def getPartitions: Array[Partition] = parent.partitions
 
   override def compute(partition: Partition, context: TaskContext): Iterator[U] = {
-    val runner = new RRunner[U](
+    val runner = new RRunner[T, U](
       func, deserializer, serializer, packageNames, broadcastVars, numPartitions)
 
     // The parent may be also an RRDD, so we should launch it first.
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index 971d11f84173..0327386b45ed 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -18,23 +18,14 @@
 package org.apache.spark.api.r
 
 import java.io._
-import java.net.{InetAddress, ServerSocket}
-import java.util.Arrays
-
-import scala.io.Source
-import scala.util.Try
 
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.BUFFER_SIZE
-import org.apache.spark.internal.config.R._
-import org.apache.spark.util.Utils
 
 /**
  * A helper class to run R UDFs in Spark.
  */
-private[spark] class RRunner[U](
+private[spark] class RRunner[IN, OUT](
     func: Array[Byte],
     deserializer: String,
     serializer: String,
@@ -44,380 +35,149 @@ private[spark] class RRunner[U](
     isDataFrame: Boolean = false,
     colNames: Array[String] = null,
     mode: Int = RRunnerModes.RDD)
-  extends Logging {
-  protected var bootTime: Double = _
-  private var dataStream: DataInputStream = _
-  val readData = numPartitions match {
-    case -1 =>
-      serializer match {
-        case SerializationFormats.STRING => readStringData _
-        case _ => readByteArrayData _
-      }
-    case _ => readShuffledData _
-  }
-
-  def compute(
-      inputIterator: Iterator[_],
-      partitionIndex: Int): Iterator[U] = {
-    // Timing start
-    bootTime = System.currentTimeMillis / 1000.0
-
-    // we expect two connections
-    val serverSocket = new ServerSocket(0, 2, InetAddress.getByName("localhost"))
-    val listenPort = serverSocket.getLocalPort()
-
-    // The stdout/stderr is shared by multiple tasks, because we use one daemon
-    // to launch child process as worker.
-    val errThread = RRunner.createRWorker(listenPort)
-
-    // We use two sockets to separate input and output, then it's easy to manage
-    // the lifecycle of them to avoid deadlock.
-    // TODO: optimize it to use one socket
-
-    // the socket used to send out the input of task
-    serverSocket.setSoTimeout(10000)
-    dataStream = try {
-      val inSocket = serverSocket.accept()
-      RRunner.authHelper.authClient(inSocket)
-      startStdinThread(inSocket.getOutputStream(), inputIterator, partitionIndex)
-
-      // the socket used to receive the output of task
-      val outSocket = serverSocket.accept()
-      RRunner.authHelper.authClient(outSocket)
-      val inputStream = new BufferedInputStream(outSocket.getInputStream)
-      new DataInputStream(inputStream)
-    } finally {
-      serverSocket.close()
-    }
-
-    try {
-      newReaderIterator(dataStream, errThread)
-    } catch {
-      case e: Exception =>
-        throw new SparkException("R computation failed with\n " + errThread.getLines(), e)
-    }
-  }
+  extends BaseRRunner[IN, OUT](
+    func,
+    deserializer,
+    serializer,
+    packageNames,
+    broadcastVars,
+    numPartitions,
+    isDataFrame,
+    colNames,
+    mode) {
 
   protected def newReaderIterator(
-      dataStream: DataInputStream, errThread: BufferedStreamThread): Iterator[U] = {
-    new Iterator[U] {
-      def next(): U = {
-        val obj = _nextObj
-        if (hasNext()) {
-          _nextObj = read()
-        }
-        obj
+      dataStream: DataInputStream, errThread: BufferedStreamThread): ReaderIterator = {
+    new ReaderIterator(dataStream, errThread) {
+      private val readData = numPartitions match {
+        case -1 =>
+          serializer match {
+            case SerializationFormats.STRING => readStringData _
+            case _ => readByteArrayData _
+          }
+        case _ => readShuffledData _
       }
 
-      private var _nextObj = read()
-
-      def hasNext(): Boolean = {
-        val hasMore = _nextObj != null
-        if (!hasMore) {
-          dataStream.close()
+      private def readShuffledData(length: Int): (Int, Array[Byte]) = {
+        length match {
+          case length if length == 2 =>
+            val hashedKey = dataStream.readInt()
+            val contentPairsLength = dataStream.readInt()
+            val contentPairs = new Array[Byte](contentPairsLength)
+            dataStream.readFully(contentPairs)
+            (hashedKey, contentPairs)
+          case _ => null
         }
-        hasMore
       }
-    }
-  }
 
-  protected def writeData(
-      dataOut: DataOutputStream,
-      printOut: PrintStream,
-      iter: Iterator[_]): Unit = {
-    def writeElem(elem: Any): Unit = {
-      if (deserializer == SerializationFormats.BYTE) {
-        val elemArr = elem.asInstanceOf[Array[Byte]]
-        dataOut.writeInt(elemArr.length)
-        dataOut.write(elemArr)
-      } else if (deserializer == SerializationFormats.ROW) {
-        dataOut.write(elem.asInstanceOf[Array[Byte]])
-      } else if (deserializer == SerializationFormats.STRING) {
-        // write string(for StringRRDD)
-        // scalastyle:off println
-        printOut.println(elem)
-        // scalastyle:on println
+      private def readByteArrayData(length: Int): Array[Byte] = {
+        length match {
+          case length if length > 0 =>
+            val obj = new Array[Byte](length)
+            dataStream.readFully(obj)
+            obj
+          case _ => null
+        }
       }
-    }
 
-    for (elem <- iter) {
-      elem match {
-        case (key, innerIter: Iterator[_]) =>
-          for (innerElem <- innerIter) {
-            writeElem(innerElem)
-          }
-          // Writes key which can be used as a boundary in group-aggregate
-          dataOut.writeByte('r')
-          writeElem(key)
-        case (key, value) =>
-          writeElem(key)
-          writeElem(value)
-        case _ =>
-          writeElem(elem)
+      private def readStringData(length: Int): String = {
+        length match {
+          case length if length > 0 =>
+            SerDe.readStringBytes(dataStream, length)
+          case _ => null
+        }
       }
-    }
-  }
-
-  /**
-   * Start a thread to write RDD data to the R process.
-   */
-  private def startStdinThread(
-      output: OutputStream,
-      iter: Iterator[_],
-      partitionIndex: Int): Unit = {
-    val env = SparkEnv.get
-    val taskContext = TaskContext.get()
-    val bufferSize = System.getProperty(BUFFER_SIZE.key,
-      BUFFER_SIZE.defaultValueString).toInt
-    val stream = new BufferedOutputStream(output, bufferSize)
 
-    new Thread("writer for R") {
-      override def run(): Unit = {
+      /**
+       * Reads next object from the stream.
+       * When the stream reaches end of data, needs to process the following sections,
+       * and then returns null.
+       */
+      override protected def read(): OUT = {
         try {
-          SparkEnv.set(env)
-          TaskContext.setTaskContext(taskContext)
-          val dataOut = new DataOutputStream(stream)
-          dataOut.writeInt(partitionIndex)
-
-          SerDe.writeString(dataOut, deserializer)
-          SerDe.writeString(dataOut, serializer)
-
-          dataOut.writeInt(packageNames.length)
-          dataOut.write(packageNames)
-
-          dataOut.writeInt(func.length)
-          dataOut.write(func)
-
-          dataOut.writeInt(broadcastVars.length)
-          broadcastVars.foreach { broadcast =>
-            // TODO(shivaram): Read a Long in R to avoid this cast
-            dataOut.writeInt(broadcast.id.toInt)
-            // TODO: Pass a byte array from R to avoid this cast ?
-            val broadcastByteArr = broadcast.value.asInstanceOf[Array[Byte]]
-            dataOut.writeInt(broadcastByteArr.length)
-            dataOut.write(broadcastByteArr)
-          }
-
-          dataOut.writeInt(numPartitions)
-          dataOut.writeInt(mode)
-
-          if (isDataFrame) {
-            SerDe.writeObject(dataOut, colNames, jvmObjectTracker = null)
+          val length = dataStream.readInt()
+
+          length match {
+            case SpecialLengths.TIMING_DATA =>
+              // Timing data from R worker
+              val boot = dataStream.readDouble - bootTime
+              val init = dataStream.readDouble
+              val broadcast = dataStream.readDouble
+              val input = dataStream.readDouble
+              val compute = dataStream.readDouble
+              val output = dataStream.readDouble
+              logInfo(
+                ("Times: boot = %.3f s, init = %.3f s, broadcast = %.3f s, " +
+                  "read-input = %.3f s, compute = %.3f s, write-output = %.3f s, " +
+                  "total = %.3f s").format(
+                  boot,
+                  init,
+                  broadcast,
+                  input,
+                  compute,
+                  output,
+                  boot + init + broadcast + input + compute + output))
+              read()
+            case length if length > 0 =>
+              readData(length).asInstanceOf[OUT]
+            case length if length == 0 =>
+              // End of stream
+              eos = true
+              null.asInstanceOf[OUT]
           }
-
-          if (!iter.hasNext) {
-            dataOut.writeInt(0)
-          } else {
-            dataOut.writeInt(1)
-          }
-
-          val printOut = new PrintStream(stream)
-
-          writeData(dataOut, printOut, iter)
-
-          stream.flush()
         } catch {
-          // TODO: We should propagate this error to the task thread
-          case e: Exception =>
-            logError("R Writer thread got an exception", e)
-        } finally {
-          Try(output.close())
+          case eof: EOFException =>
+            throw new SparkException("R worker exited unexpectedly (cranshed)", eof)
         }
       }
-    }.start()
-  }
-
-  private def read(): U = {
-    try {
-      val length = dataStream.readInt()
-
-      length match {
-        case SpecialLengths.TIMING_DATA =>
-          // Timing data from R worker
-          val boot = dataStream.readDouble - bootTime
-          val init = dataStream.readDouble
-          val broadcast = dataStream.readDouble
-          val input = dataStream.readDouble
-          val compute = dataStream.readDouble
-          val output = dataStream.readDouble
-          logInfo(
-            ("Times: boot = %.3f s, init = %.3f s, broadcast = %.3f s, " +
-              "read-input = %.3f s, compute = %.3f s, write-output = %.3f s, " +
-              "total = %.3f s").format(
-                boot,
-                init,
-                broadcast,
-                input,
-                compute,
-                output,
-                boot + init + broadcast + input + compute + output))
-          read()
-        case length if length >= 0 =>
-          readData(length).asInstanceOf[U]
-      }
-    } catch {
-      case eof: EOFException =>
-        throw new SparkException("R worker exited unexpectedly (cranshed)", eof)
     }
   }
 
-  private def readShuffledData(length: Int): (Int, Array[Byte]) = {
-    length match {
-      case length if length == 2 =>
-        val hashedKey = dataStream.readInt()
-        val contentPairsLength = dataStream.readInt()
-        val contentPairs = new Array[Byte](contentPairsLength)
-        dataStream.readFully(contentPairs)
-        (hashedKey, contentPairs)
-      case _ => null
-    }
-  }
-
-  protected def readByteArrayData(length: Int): Array[Byte] = {
-    length match {
-      case length if length > 0 =>
-        val obj = new Array[Byte](length)
-        dataStream.readFully(obj)
-        obj
-      case _ => null
-    }
-  }
-
-  private def readStringData(length: Int): String = {
-    length match {
-      case length if length > 0 =>
-        SerDe.readStringBytes(dataStream, length)
-      case _ => null
-    }
-  }
-}
-
-private[spark] object SpecialLengths {
-  val TIMING_DATA = -1
-}
-
-private[spark] object RRunnerModes {
-  val RDD = 0
-  val DATAFRAME_DAPPLY = 1
-  val DATAFRAME_GAPPLY = 2
-}
-
-private[spark] class BufferedStreamThread(
-    in: InputStream,
-    name: String,
-    errBufferSize: Int) extends Thread(name) with Logging {
-  val lines = new Array[String](errBufferSize)
-  var lineIdx = 0
-  override def run() {
-    for (line <- Source.fromInputStream(in).getLines) {
-      synchronized {
-        lines(lineIdx) = line
-        lineIdx = (lineIdx + 1) % errBufferSize
-      }
-      logInfo(line)
-    }
-  }
-
-  def getLines(): String = synchronized {
-    (0 until errBufferSize).filter { x =>
-      lines((x + lineIdx) % errBufferSize) != null
-    }.map { x =>
-      lines((x + lineIdx) % errBufferSize)
-    }.mkString("\n")
-  }
-}
-
-private[r] object RRunner {
-  // Because forking processes from Java is expensive, we prefer to launch
-  // a single R daemon (daemon.R) and tell it to fork new workers for our tasks.
-  // This daemon currently only works on UNIX-based systems now, so we should
-  // also fall back to launching workers (worker.R) directly.
-  private[this] var errThread: BufferedStreamThread = _
-  private[this] var daemonChannel: DataOutputStream = _
-
-  private lazy val authHelper = {
-    val conf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
-    new RAuthHelper(conf)
-  }
-
-  /**
-   * Start a thread to print the process's stderr to ours
-   */
-  private def startStdoutThread(proc: Process): BufferedStreamThread = {
-    val BUFFER_SIZE = 100
-    val thread = new BufferedStreamThread(proc.getInputStream, "stdout reader for R", BUFFER_SIZE)
-    thread.setDaemon(true)
-    thread.start()
-    thread
-  }
-
-  private def createRProcess(port: Int, script: String): BufferedStreamThread = {
-    // "spark.sparkr.r.command" is deprecated and replaced by "spark.r.command",
-    // but kept here for backward compatibility.
-    val sparkConf = SparkEnv.get.conf
-    var rCommand = sparkConf.get(SPARKR_COMMAND)
-    rCommand = sparkConf.get(R_COMMAND).orElse(Some(rCommand)).get
-
-    val rConnectionTimeout = sparkConf.get(R_BACKEND_CONNECTION_TIMEOUT)
-    val rOptions = "--vanilla"
-    val rLibDir = RUtils.sparkRPackagePath(isDriver = false)
-    val rExecScript = rLibDir(0) + "/SparkR/worker/" + script
-    val pb = new ProcessBuilder(Arrays.asList(rCommand, rOptions, rExecScript))
-    // Unset the R_TESTS environment variable for workers.
-    // This is set by R CMD check as startup.Rs
-    // (http://svn.r-project.org/R/trunk/src/library/tools/R/testing.R)
-    // and confuses worker script which tries to load a non-existent file
-    pb.environment().put("R_TESTS", "")
-    pb.environment().put("SPARKR_RLIBDIR", rLibDir.mkString(","))
-    pb.environment().put("SPARKR_WORKER_PORT", port.toString)
-    pb.environment().put("SPARKR_BACKEND_CONNECTION_TIMEOUT", rConnectionTimeout.toString)
-    pb.environment().put("SPARKR_SPARKFILES_ROOT_DIR", SparkFiles.getRootDirectory())
-    pb.environment().put("SPARKR_IS_RUNNING_ON_WORKER", "TRUE")
-    pb.environment().put("SPARKR_WORKER_SECRET", authHelper.secret)
-    pb.redirectErrorStream(true)  // redirect stderr into stdout
-    val proc = pb.start()
-    val errThread = startStdoutThread(proc)
-    errThread
-  }
-
   /**
-   * ProcessBuilder used to launch worker R processes.
+   * Start a thread to write RDD data to the R process.
    */
-  def createRWorker(port: Int): BufferedStreamThread = {
-    val useDaemon = SparkEnv.get.conf.getBoolean("spark.sparkr.use.daemon", true)
-    if (!Utils.isWindows && useDaemon) {
-      synchronized {
-        if (daemonChannel == null) {
-          // we expect one connections
-          val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost"))
-          val daemonPort = serverSocket.getLocalPort
-          errThread = createRProcess(daemonPort, "daemon.R")
-          // the socket used to send out the input of task
-          serverSocket.setSoTimeout(10000)
-          val sock = serverSocket.accept()
-          try {
-            authHelper.authClient(sock)
-            daemonChannel = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream))
-          } finally {
-            serverSocket.close()
+  protected override def newWriterThread(
+      output: OutputStream,
+      iter: Iterator[IN],
+      partitionIndex: Int): WriterThread = {
+    new WriterThread(output, iter, partitionIndex) {
+
+      /**
+       * Writes input data to the stream connected to the R worker.
+       */
+      override protected def writeIteratorToStream(dataOut: DataOutputStream): Unit = {
+        def writeElem(elem: Any): Unit = {
+          if (deserializer == SerializationFormats.BYTE) {
+            val elemArr = elem.asInstanceOf[Array[Byte]]
+            dataOut.writeInt(elemArr.length)
+            dataOut.write(elemArr)
+          } else if (deserializer == SerializationFormats.ROW) {
+            dataOut.write(elem.asInstanceOf[Array[Byte]])
+          } else if (deserializer == SerializationFormats.STRING) {
+            // write string(for StringRRDD)
+            // scalastyle:off println
+            printOut.println(elem)
+            // scalastyle:on println
           }
         }
-        try {
-          daemonChannel.writeInt(port)
-          daemonChannel.flush()
-        } catch {
-          case e: IOException =>
-            // daemon process died
-            daemonChannel.close()
-            daemonChannel = null
-            errThread = null
-            // fail the current task, retry by scheduler
-            throw e
+
+        for (elem <- iter) {
+          elem match {
+            case (key, innerIter: Iterator[_]) =>
+              for (innerElem <- innerIter) {
+                writeElem(innerElem)
+              }
+              // Writes key which can be used as a boundary in group-aggregate
+              dataOut.writeByte('r')
+              writeElem(key)
+            case (key, value) =>
+              writeElem(key)
+              writeElem(value)
+            case _ =>
+              writeElem(elem)
+          }
         }
-        errThread
       }
-    } else {
-      createRProcess(port, "worker.R")
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index d2982451770e..bedfa9c4722c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution
 
+import java.io.{ByteArrayOutputStream, DataOutputStream}
+
 import scala.collection.JavaConverters._
 import scala.language.existentials
 
@@ -490,7 +492,7 @@ case class FlatMapGroupsInRExec(
       val getKey = ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
       val getValue = ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
       val outputObject = ObjectOperator.wrapObjectToRow(outputObjAttr.dataType)
-      val runner = new RRunner[Array[Byte]](
+      val runner = new RRunner[(Array[Byte], Iterator[Array[Byte]]), Array[Byte]](
         func, SerializationFormats.ROW, serializerForR, packageNames, broadcastVars,
         isDataFrame = true, colNames = inputSchema.fieldNames,
         mode = RRunnerModes.DATAFRAME_GAPPLY)
@@ -548,12 +550,22 @@ case class FlatMapGroupsInRWithArrowExec(
     child.execute().mapPartitionsInternal { iter =>
       val grouped = GroupedIterator(iter, groupingAttributes, child.output)
       val getKey = ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
-      val runner = new ArrowRRunner(func, packageNames, broadcastVars, inputSchema,
-        SQLConf.get.sessionLocalTimeZone, RRunnerModes.DATAFRAME_GAPPLY)
 
-      val groupedByRKey = grouped.map { case (key, rowIter) =>
-        val newKey = rowToRBytes(getKey(key).asInstanceOf[Row])
-        (newKey, rowIter)
+      val keys = collection.mutable.ArrayBuffer.empty[Array[Byte]]
+      val groupedByRKey: Iterator[Iterator[InternalRow]] =
+        grouped.map { case (key, rowIter) =>
+          keys.append(rowToRBytes(getKey(key).asInstanceOf[Row]))
+          rowIter
+        }
+
+      val runner = new ArrowRRunner(func, packageNames, broadcastVars, inputSchema,
+        SQLConf.get.sessionLocalTimeZone, RRunnerModes.DATAFRAME_GAPPLY) {
+        protected override def bufferedWrite(
+            dataOut: DataOutputStream)(writeFunc: ByteArrayOutputStream => Unit): Unit = {
+          super.bufferedWrite(dataOut)(writeFunc)
+          // Don't forget we're sending keys additionally.
+          keys.foreach(dataOut.write)
+        }
       }
 
       // The communication mechanism is as follows:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
index ee1f2e3379b4..a94cb0befba7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
@@ -47,7 +47,7 @@ class ArrowRRunner(
     schema: StructType,
     timeZoneId: String,
     mode: Int)
-  extends RRunner[ColumnarBatch](
+  extends BaseRRunner[Iterator[InternalRow], ColumnarBatch](
     func,
     "arrow",
     "arrow",
@@ -58,60 +58,10 @@ class ArrowRRunner(
     schema.fieldNames,
     mode) {
 
-  // TODO: it needs to refactor to share the same code with RRunner, and have separate
-  // ArrowRRunners.
-  private val getNextBatch = {
-    if (mode == RRunnerModes.DATAFRAME_GAPPLY) {
-      // gapply
-      (inputIterator: Iterator[_], keys: collection.mutable.ArrayBuffer[Array[Byte]]) => {
-        val (key, nextBatch) = inputIterator
-          .asInstanceOf[Iterator[(Array[Byte], Iterator[InternalRow])]].next()
-        keys.append(key)
-        nextBatch
-      }
-    } else {
-      // dapply
-      (inputIterator: Iterator[_], keys: collection.mutable.ArrayBuffer[Array[Byte]]) => {
-        inputIterator
-          .asInstanceOf[Iterator[Iterator[InternalRow]]].next()
-      }
-    }
-  }
-
-  protected override def writeData(
-      dataOut: DataOutputStream,
-      printOut: PrintStream,
-      inputIterator: Iterator[_]): Unit = if (inputIterator.hasNext) {
-    val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
-    val allocator = ArrowUtils.rootAllocator.newChildAllocator(
-      "stdout writer for R", 0, Long.MaxValue)
-    val root = VectorSchemaRoot.create(arrowSchema, allocator)
+  protected def bufferedWrite(
+      dataOut: DataOutputStream)(writeFunc: ByteArrayOutputStream => Unit): Unit = {
     val out = new ByteArrayOutputStream()
-    val keys = collection.mutable.ArrayBuffer.empty[Array[Byte]]
-
-    Utils.tryWithSafeFinally {
-      val arrowWriter = ArrowWriter.create(root)
-      val writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))
-      writer.start()
-
-      while (inputIterator.hasNext) {
-        val nextBatch: Iterator[InternalRow] = getNextBatch(inputIterator, keys)
-
-        while (nextBatch.hasNext) {
-          arrowWriter.write(nextBatch.next())
-        }
-
-        arrowWriter.finish()
-        writer.writeBatch()
-        arrowWriter.reset()
-      }
-      writer.end()
-    } {
-      // Don't close root and allocator in TaskCompletionListener to prevent
-      // a race condition. See `ArrowPythonRunner`.
-      root.close()
-      allocator.close()
-    }
+    writeFunc(out)
 
     // Currently, there looks no way to read batch by batch by socket connection in R side,
     // See ARROW-4512. Therefore, it writes the whole Arrow streaming-formatted binary at
@@ -119,13 +69,57 @@ class ArrowRRunner(
     val data = out.toByteArray
     dataOut.writeInt(data.length)
     dataOut.write(data)
+  }
 
-    keys.foreach(dataOut.write)
+  protected override def newWriterThread(
+      output: OutputStream,
+      inputIterator: Iterator[Iterator[InternalRow]],
+      partitionIndex: Int): WriterThread = {
+    new WriterThread(output, inputIterator, partitionIndex) {
+
+      /**
+       * Writes input data to the stream connected to the R worker.
+       */
+      override protected def writeIteratorToStream(dataOut: DataOutputStream): Unit = {
+        if (inputIterator.hasNext) {
+          val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
+          val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+            "stdout writer for R", 0, Long.MaxValue)
+          val root = VectorSchemaRoot.create(arrowSchema, allocator)
+
+          bufferedWrite(dataOut) { out =>
+            Utils.tryWithSafeFinally {
+              val arrowWriter = ArrowWriter.create(root)
+              val writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))
+              writer.start()
+
+              while (inputIterator.hasNext) {
+                val nextBatch: Iterator[InternalRow] = inputIterator.next()
+
+                while (nextBatch.hasNext) {
+                  arrowWriter.write(nextBatch.next())
+                }
+
+                arrowWriter.finish()
+                writer.writeBatch()
+                arrowWriter.reset()
+              }
+              writer.end()
+            } {
+              // Don't close root and allocator in TaskCompletionListener to prevent
+              // a race condition. See `ArrowPythonRunner`.
+              root.close()
+              allocator.close()
+            }
+          }
+        }
+      }
+    }
   }
 
   protected override def newReaderIterator(
-      dataStream: DataInputStream, errThread: BufferedStreamThread): Iterator[ColumnarBatch] = {
-    new Iterator[ColumnarBatch] {
+      dataStream: DataInputStream, errThread: BufferedStreamThread): ReaderIterator = {
+    new ReaderIterator(dataStream, errThread) {
       private val allocator = ArrowUtils.rootAllocator.newChildAllocator(
         "stdin reader for R", 0, Long.MaxValue)
 
@@ -141,29 +135,8 @@ class ArrowRRunner(
       }
 
       private var batchLoaded = true
-      private var nextObj: ColumnarBatch = _
-      private var eos = false
-
-      override def hasNext: Boolean = nextObj != null || {
-        if (!eos) {
-          nextObj = read()
-          hasNext
-        } else {
-          false
-        }
-      }
-
-      override def next(): ColumnarBatch = {
-        if (hasNext) {
-          val obj = nextObj
-          nextObj = null.asInstanceOf[ColumnarBatch]
-          obj
-        } else {
-          Iterator.empty.next()
-        }
-      }
 
-      private def read(): ColumnarBatch = try {
+      protected override def read(): ColumnarBatch = try {
         if (reader != null && batchLoaded) {
           batchLoaded = reader.loadNextBatch()
           if (batchLoaded) {
@@ -173,8 +146,8 @@ class ArrowRRunner(
           } else {
             reader.close(false)
             allocator.close()
-            eos = true
-            null
+            // Should read timing data after this.
+            read()
           }
         } else {
           dataStream.readInt() match {
@@ -202,7 +175,9 @@ class ArrowRRunner(
               // Likewise, there looks no way to send each batch in streaming format via socket
               // connection. See ARROW-4512.
               // So, it reads the whole Arrow streaming-formatted binary at once for now.
-              val in = new ByteArrayReadableSeekableByteChannel(readByteArrayData(length))
+              val buffer = new Array[Byte](length)
+              dataStream.readFully(buffer)
+              val in = new ByteArrayReadableSeekableByteChannel(buffer)
               reader = new ArrowStreamReader(in, allocator)
               root = reader.getVectorSchemaRoot
               vectors = root.getFieldVectors.asScala.map { vector =>
@@ -210,6 +185,7 @@ class ArrowRRunner(
               }.toArray[ColumnVector]
               read()
             case length if length == 0 =>
+              // End of stream
               eos = true
               null
           }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala
index a62016dac122..a3a40886f5f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala
@@ -51,7 +51,7 @@ case class MapPartitionsRWrapper(
       SerializationFormats.BYTE
     }
 
-    val runner = new RRunner[Array[Byte]](
+    val runner = new RRunner[Any, Array[Byte]](
       func, deserializer, serializer, packageNames, broadcastVars,
       isDataFrame = true, colNames = colNames, mode = RRunnerModes.DATAFRAME_DAPPLY)
     // Partition index is ignored. Dataset has no support for mapPartitionsWithIndex.

From 1029bf9c35eec07bcbee4f84a16458b1c8ea4371 Mon Sep 17 00:00:00 2001
From: hongdongdong <hongdongdong@cmss.chinamobile.com>
Date: Mon, 11 Mar 2019 19:00:26 -0500
Subject: [PATCH 0650/1072] Use variable instead of function to keep the format
 uniform

## What changes were proposed in this pull request?

The change just use  variable(_taskScheduler) instead of function(taskScheduler) to keep the format uniform in different situation.

## How was this patch tested?

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24048 from hddong/Use-variable-instead-of-function.

Authored-by: hongdongdong <hongdongdong@cmss.chinamobile.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 01d979270c44..d2ccd06b6e07 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -510,7 +510,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _taskScheduler.start()
 
     _applicationId = _taskScheduler.applicationId()
-    _applicationAttemptId = taskScheduler.applicationAttemptId()
+    _applicationAttemptId = _taskScheduler.applicationAttemptId()
     _conf.set("spark.app.id", _applicationId)
     if (_conf.get(UI_REVERSE_PROXY)) {
       System.setProperty("spark.ui.proxyBase", "/proxy/" + _applicationId)

From 60be6d2ea3560143515c1ce9d0a7da416f8f595a Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 11 Mar 2019 19:02:30 -0500
Subject: [PATCH 0651/1072] [SPARK-27109][SQL] Refactoring of
 TimestampFormatter and DateFormatter

## What changes were proposed in this pull request?

In PR, I propose to refactor the `parse()` method of `Iso8601DateFormatter`/`Iso8601DateFormatter` and `toInstantWithZoneId` of `toInstantWithZoneId` to achieve the following:
- Avoid unnecessary conversion of parsed input to `java.time.Instant` before converting it to micros and days. Necessary information exists in `ZonedDateTime` already, and micros/days can be extracted from the former one.
- Avoid additional extraction of LocalTime from parsed object, more precisely, double query of `TemporalQueries.localTime` from `temporalAccessor`.
- Avoid additional extraction of zone id from parsed object, in particular, double query of `TemporalQueries.offset()`.
- Using `ZoneOffset.UTC` instead of `ZoneId.of` in `DateFormatter`. This allows to avoid looking for zone offset by zone id.

## How was this patch tested?

By existing test suite `DateTimeUtilsSuite`, `TimestampFormatterSuite` and `DateFormatterSuite`.

Closes #24030 from MaxGekk/query-localtime.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/catalyst/util/DateFormatter.scala     | 18 ++++++-------
 .../util/DateTimeFormatterHelper.scala        | 22 ++++++++--------
 .../catalyst/util/TimestampFormatter.scala    | 25 ++++++++++---------
 .../spark/sql/util/DateFormatterSuite.scala   |  1 -
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 9535a369cb2e..20e043a0826c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.{Instant, ZoneId}
+import java.time.{Instant, ZoneOffset}
 import java.util.Locale
-
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToDays
+import java.util.concurrent.TimeUnit.SECONDS
 
 sealed trait DateFormatter extends Serializable {
   def parse(s: String): Int // returns days since epoch
@@ -33,18 +32,17 @@ class Iso8601DateFormatter(
 
   @transient
   private lazy val formatter = getOrCreateFormatter(pattern, locale)
-  private val UTC = ZoneId.of("UTC")
 
-  private def toInstant(s: String): Instant = {
-    val temporalAccessor = formatter.parse(s)
-    toInstantWithZoneId(temporalAccessor, UTC)
+  override def parse(s: String): Int = {
+    val parsed = formatter.parse(s)
+    val zonedDateTime = toZonedDateTime(parsed, ZoneOffset.UTC)
+    val seconds = zonedDateTime.toEpochSecond
+    SECONDS.toDays(seconds).toInt
   }
 
-  override def parse(s: String): Int = instantToDays(toInstant(s))
-
   override def format(days: Int): String = {
     val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
-    formatter.withZone(UTC).format(instant)
+    formatter.withZone(ZoneOffset.UTC).format(instant)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index a0c0db36a7b1..a7b6309baf61 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -28,16 +28,18 @@ import com.google.common.cache.CacheBuilder
 import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._
 
 trait DateTimeFormatterHelper {
-  protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = {
-    val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) {
-      LocalTime.ofNanoOfDay(0)
-    } else {
-      LocalTime.from(temporalAccessor)
-    }
-    val localDate = LocalDate.from(temporalAccessor)
-    val localDateTime = LocalDateTime.of(localDate, localTime)
-    val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
-    Instant.from(zonedDateTime)
+  // Converts the parsed temporal object to ZonedDateTime. It sets time components to zeros
+  // if they does not exist in the parsed object.
+  protected def toZonedDateTime(
+      temporalAccessor: TemporalAccessor,
+      zoneId: ZoneId): ZonedDateTime = {
+    // Parsed input might not have time related part. In that case, time component is set to zeros.
+    val parsedLocalTime = temporalAccessor.query(TemporalQueries.localTime)
+    val localTime = if (parsedLocalTime == null) LocalTime.MIDNIGHT else parsedLocalTime
+    // Parsed input must have date component. At least, year must present in temporalAccessor.
+    val localDate = temporalAccessor.query(TemporalQueries.localDate)
+
+    ZonedDateTime.of(localDate, localTime, zoneId)
   }
 
   // Gets a formatter from the cache or creates new one. The buildFormatter method can be called
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index e559b6a97229..c079691a2fa9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -20,10 +20,10 @@ package org.apache.spark.sql.catalyst.util
 import java.text.ParseException
 import java.time._
 import java.time.format.DateTimeParseException
+import java.time.temporal.ChronoField.MICRO_OF_SECOND
 import java.time.temporal.TemporalQueries
 import java.util.{Locale, TimeZone}
-
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
+import java.util.concurrent.TimeUnit.SECONDS
 
 sealed trait TimestampFormatter extends Serializable {
   /**
@@ -48,21 +48,22 @@ class Iso8601TimestampFormatter(
     locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
   @transient
   protected lazy val formatter = getOrCreateFormatter(pattern, locale)
+  private val timeZoneId = timeZone.toZoneId
 
-  private def toInstant(s: String): Instant = {
-    val temporalAccessor = formatter.parse(s)
-    if (temporalAccessor.query(TemporalQueries.offset()) == null) {
-      toInstantWithZoneId(temporalAccessor, timeZone.toZoneId)
-    } else {
-      Instant.from(temporalAccessor)
-    }
-  }
+  override def parse(s: String): Long = {
+    val parsed = formatter.parse(s)
+    val parsedZoneId = parsed.query(TemporalQueries.zone())
+    val zoneId = if (parsedZoneId == null) timeZoneId else parsedZoneId
+    val zonedDateTime = toZonedDateTime(parsed, zoneId)
+    val epochSeconds = zonedDateTime.toEpochSecond
+    val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
 
-  override def parse(s: String): Long = instantToMicros(toInstant(s))
+    Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
+  }
 
   override def format(us: Long): String = {
     val instant = DateTimeUtils.microsToInstant(us)
-    formatter.withZone(timeZone.toZoneId).format(instant)
+    formatter.withZone(timeZoneId).format(instant)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
index 4d5872c92f5a..602542fb33db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.util
 
 import java.time.LocalDate
-import java.util.Locale
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper

From b8dd84b9e4690efa9773f73787e19a4d60257e5e Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Tue, 12 Mar 2019 11:02:09 +0800
Subject: [PATCH 0652/1072] [SPARK-27011][SQL] reset command fails with cache

## What changes were proposed in this pull request?

When cache is enabled ( i.e once cache table command is executed), any following sql will trigger
 CacheManager#lookupCachedData which will create a copy of the tree node, which inturn calls TreeNode#makeCopy. Here the problem is it will try to create a copy instance. But as ResetCommand is a case object this will fail

## How was this patch tested?

Added UT to reproduce the issue

Closes #23918 from ajithme/reset.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/logical/IgnoreCachedData.scala      | 23 +++++++++++++++++++
 .../spark/sql/execution/CacheManager.scala    |  3 ++-
 .../sql/execution/command/SetCommand.scala    |  3 ++-
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 10 ++++++++
 4 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/IgnoreCachedData.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/IgnoreCachedData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/IgnoreCachedData.scala
new file mode 100644
index 000000000000..85958cb43d4f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/IgnoreCachedData.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+/**
+ * A [[LogicalPlan]] operator that does not use the cached results stored in CacheManager
+ */
+trait IgnoreCachedData extends LogicalPlan {}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index f7a78ea0cb55..2815a288be3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ResolvedHint}
+import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.storage.StorageLevel
@@ -239,6 +239,7 @@ class CacheManager extends Logging {
   /** Replaces segments of the given logical plan with cached versions where possible. */
   def useCachedData(plan: LogicalPlan): LogicalPlan = {
     val newPlan = plan transformDown {
+      case command: IgnoreCachedData => command
       // Do not lookup the cache by hint node. Hint node is special, we should ignore it when
       // canonicalizing plans, so that plans which are same except hint can hit the same cache.
       // However, we also want to keep the hint info after cache lookup. Here we skip the hint
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index 3c900be839aa..ca25dc585a0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
@@ -161,7 +162,7 @@ object SetCommand {
  *   reset;
  * }}}
  */
-case object ResetCommand extends RunnableCommand with Logging {
+case object ResetCommand extends RunnableCommand with IgnoreCachedData with Logging {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     sparkSession.sessionState.conf.clear()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b8c4d73f1b2b..3d374a1784fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3003,6 +3003,16 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("reset command should not fail with cache") {
+    withTable("tbl") {
+      val provider = spark.sessionState.conf.defaultDataSourceName
+      sql(s"CREATE TABLE tbl(i INT, j STRING) USING $provider")
+      sql("reset")
+      sql("cache table tbl")
+      sql("reset")
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From b15423361bc28c4cd2216683eb852fdbec3ea58f Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Mon, 11 Mar 2019 21:27:51 -0700
Subject: [PATCH 0653/1072] [SPARK-27016][SQL][BUILD][FOLLOW-UP] Treat all
 antlr warnings as errors while generating the parser.

## What changes were proposed in this pull request?
Use the sbt maven plugin option`antlr4TreatWarningsAsErrors` to make sure the warnings are treated as errors while generating the parser. In the absence of it, we may inadvertently introduce problems while making grammar changes. Please refer to PR-23897 to know more about the context. We made a change in [pr-23925](https://github.com/apache/spark/pull/23925) which handled only the maven build.

In this PR, we handle the sbt build. I had submitted [PR-23](https://github.com/ihji/sbt-antlr4/pull/23) to enhance the sbt-antlr plugin to make is possible to pass the error on warning option.

## How was this patch tested?
Force an warning in the grammar file to check if the build fails. Then remove the warning to verify the build succeeds.

Closes #24060 from dilipbiswal/sbt_build_antlr.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/SparkBuild.scala | 3 ++-
 project/plugins.sbt      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 659659f45be7..59215a48bae1 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -569,7 +569,8 @@ object Catalyst {
     antlr4Version in Antlr4 := SbtPomKeys.effectivePom.value.getProperties.get("antlr4.version").asInstanceOf[String],
     antlr4PackageName in Antlr4 := Some("org.apache.spark.sql.catalyst.parser"),
     antlr4GenListener in Antlr4 := true,
-    antlr4GenVisitor in Antlr4 := true
+    antlr4GenVisitor in Antlr4 := true,
+    antlr4TreatWarningsAsErrors in Antlr4 := true
   )
 }
 
diff --git a/project/plugins.sbt b/project/plugins.sbt
index c9354735a62f..21be62fe00a0 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -33,7 +33,7 @@ libraryDependencies += "org.ow2.asm"  % "asm" % "7.0"
 libraryDependencies += "org.ow2.asm"  % "asm-commons" % "7.0"
 
 // sbt 1.0.0 support: https://github.com/ihji/sbt-antlr4/issues/14
-addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.12")
+addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.13")
 
 // Spark uses a custom fork of the sbt-pom-reader plugin which contains a patch to fix issues
 // related to test-jar dependencies (https://github.com/sbt/sbt-pom-reader/pull/14). The source for

From 1853db3186de3ed63fda050ac90bf85efafea5bf Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Tue, 12 Mar 2019 10:15:28 -0500
Subject: [PATCH 0654/1072] [SPARK-27125][SQL][TEST] Add test suite for sql
 execution page

## What changes were proposed in this pull request?
Added test suite for AllExecutionsPage class. Checked the scenarios for SPARK-27019 and SPARK-27075.

## How was this patch tested?
Added UT, manually tested

Closes #24052 from shahidki31/SPARK-27125.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../execution/ui/AllExecutionsPageSuite.scala | 127 ++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
new file mode 100644
index 000000000000..5a3a923c9c92
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.ui
+
+import java.util
+import java.util.{Locale, Properties}
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
+
+import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
+
+import org.apache.spark.scheduler.{JobFailed, SparkListenerJobEnd, SparkListenerJobStart}
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.{SparkPlanInfo, SQLExecution}
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.status.ElementTrackingStore
+import org.apache.spark.util.kvstore.InMemoryStore
+
+class AllExecutionsPageSuite extends SharedSQLContext {
+
+  import testImplicits._
+
+  test("SPARK-27019: correctly display SQL page when event reordering happens") {
+    val statusStore = createStatusStore
+    val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS)
+    when(tab.sqlStore).thenReturn(statusStore)
+
+    val request = mock(classOf[HttpServletRequest])
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+
+    val html = renderSQLPage(request, tab, statusStore).toString().toLowerCase(Locale.ROOT)
+    assert(html.contains("failed queries"))
+    assert(!html.contains("1970"))
+  }
+
+  test("sorting should be successful") {
+    val statusStore = createStatusStore
+    val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS)
+    val request = mock(classOf[HttpServletRequest])
+
+    when(tab.sqlStore).thenReturn(statusStore)
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+    when(request.getParameter("failed.sort")).thenReturn("Duration")
+    val map = new util.HashMap[String, Array[String]]()
+    map.put("failed.sort", Array("duration"))
+    when(request.getParameterMap()).thenReturn(map)
+    val html = renderSQLPage(request, tab, statusStore).toString().toLowerCase(Locale.ROOT)
+    assert(!html.contains("IllegalArgumentException"))
+    assert(html.contains("duration"))
+  }
+
+
+  private def createStatusStore: SQLAppStatusStore = {
+    val conf = sparkContext.conf
+    val store = new ElementTrackingStore(new InMemoryStore, conf)
+    val listener = new SQLAppStatusListener(conf, store, live = true)
+    new SQLAppStatusStore(store, Some(listener))
+  }
+
+  private def createTestDataFrame: DataFrame = {
+    Seq(
+      (1, 1),
+      (2, 2)
+    ).toDF().filter("_1 > 1")
+  }
+
+  /**
+   * Render a stage page started with the given conf and return the HTML.
+   * This also runs a dummy execution page to populate the page with useful content.
+   */
+  private def renderSQLPage(
+    request: HttpServletRequest,
+    tab: SQLTab,
+    statusStore: SQLAppStatusStore): Seq[Node] = {
+
+    val listener = statusStore.listener.get
+
+    val page = new AllExecutionsPage(tab)
+    Seq(0, 1).foreach { executionId =>
+      val df = createTestDataFrame
+      listener.onOtherEvent(SparkListenerSQLExecutionStart(
+        executionId,
+        "test",
+        "test",
+        df.queryExecution.toString,
+        SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+        System.currentTimeMillis()))
+      listener.onOtherEvent(SparkListenerSQLExecutionEnd(
+        executionId, System.currentTimeMillis()))
+      listener.onJobStart(SparkListenerJobStart(
+        jobId = 0,
+        time = System.currentTimeMillis(),
+        stageInfos = Nil,
+        createProperties(executionId)))
+      listener.onJobEnd(SparkListenerJobEnd(
+        jobId = 0,
+        time = System.currentTimeMillis(),
+        JobFailed(new RuntimeException("Oops"))))
+    }
+    page.render(request)
+  }
+
+  private def createProperties(executionId: Long): Properties = {
+    val properties = new Properties()
+    properties.setProperty(SQLExecution.EXECUTION_ID_KEY, executionId.toString)
+    properties
+  }
+}
+

From 60a899b8c33e1e36bb80ab9fa054ba40bee9f4be Mon Sep 17 00:00:00 2001
From: TigerYang414 <39265202+TigerYang414@users.noreply.github.com>
Date: Tue, 12 Mar 2019 10:23:26 -0500
Subject: [PATCH 0655/1072] [SPARK-27041][PYSPARK] Use imap() for python 2.x to
 resolve oom issue

## What changes were proposed in this pull request?

With large partition, pyspark may exceeds executor memory limit and trigger out of memory for python 2.7.
This is because map() is used. Unlike in python3.x, python 2.7 map() will generate a list and need to read all data into memory.

The proposed fix will use imap in python 2.7 and it has been verified.

## How was this patch tested?
Manual test.
(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23954 from TigerYang414/patch-1.

Lead-authored-by: TigerYang414 <39265202+TigerYang414@users.noreply.github.com>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/sql/tests/test_udf.py | 7 +++++++
 python/pyspark/worker.py             | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 0a56ba8f21f5..ba00bba3bea8 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -600,6 +600,13 @@ def test_udf_in_subquery(self):
             result = sql("select i from values(0L) as data(i) where i in (select id from v)")
             self.assertEqual(result.collect(), [Row(i=0)])
 
+    def test_udf_globals_not_overwritten(self):
+        @udf('string')
+        def f():
+            assert "itertools" not in str(map)
+
+        self.spark.range(1).select(f()).collect()
+
 
 class UDFInitializationTests(unittest.TestCase):
     def tearDown(self):
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 0e9b6d665a36..7811012c8f12 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -45,6 +45,8 @@
 
 if sys.version >= '3':
     basestring = str
+else:
+    from itertools import imap as map  # use iterator map by default
 
 pickleSer = PickleSerializer()
 utf8_deserializer = UTF8Deserializer()

From 4b6d39d85d19ed90c1f289c99bfac24efe6b7181 Mon Sep 17 00:00:00 2001
From: shivusondur <shivusondur@gmail.com>
Date: Tue, 12 Mar 2019 13:29:39 -0500
Subject: [PATCH 0656/1072] [SPARK-27090][CORE] Removing old
 LEGACY_DRIVER_IDENTIFIER ("<driver>")

## What changes were proposed in this pull request?
LEGACY_DRIVER_IDENTIFIER and its reference are removed.
corresponding references test are updated.

## How was this patch tested?
tested  UT test cases

Closes #24026 from shivusondur/newjira2.

Authored-by: shivusondur <shivusondur@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/SparkContext.scala |   4 -
 .../spark/status/AppStatusListener.scala      |   1 -
 .../apache/spark/storage/BlockManagerId.scala |   3 +-
 .../executor_list_json_expectation.json       |   2 +-
 .../one_stage_attempt_json_expectation.json   |  18 +--
 .../one_stage_json_expectation.json           |  18 +--
 ...multi_attempt_app_json_1__expectation.json |  16 +--
 ...multi_attempt_app_json_2__expectation.json |  16 +--
 ...age_with_accumulable_json_expectation.json |  18 +--
 .../spark-events/local-1422981759269          | 134 +++++++++---------
 .../spark-events/local-1422981780767          | 130 ++++++++---------
 .../spark-events/local-1425081759269          | 134 +++++++++---------
 .../spark-events/local-1426533911241          |  34 ++---
 .../spark-events/local-1426633911242          |  34 ++---
 .../spark/storage/BlockManagerSuite.scala     |   3 +-
 .../org/apache/spark/ui/UIUtilsSuite.scala    |   4 -
 project/MimaExcludes.scala                    |   2 +
 .../streaming/state/StateStore.scala          |   3 +-
 18 files changed, 282 insertions(+), 292 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d2ccd06b6e07..5cd6c2b9c380 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2555,10 +2555,6 @@ object SparkContext extends Logging {
    */
   private[spark] val DRIVER_IDENTIFIER = "driver"
 
-  /**
-   * Legacy version of DRIVER_IDENTIFIER, retained for backwards-compatibility.
-   */
-  private[spark] val LEGACY_DRIVER_IDENTIFIER = "<driver>"
 
   private implicit def arrayToArrayWritable[T <: Writable : ClassTag](arr: Traversable[T])
     : ArrayWritable = {
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index fc64bef8cbc3..71e0390f39a4 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -128,7 +128,6 @@ private[spark] class AppStatusListener(
     // code registers the driver before this event is sent.
     event.driverLogs.foreach { logs =>
       val driver = liveExecutors.get(SparkContext.DRIVER_IDENTIFIER)
-        .orElse(liveExecutors.get(SparkContext.LEGACY_DRIVER_IDENTIFIER))
       driver.foreach { d =>
         d.executorLogs = logs.toMap
         d.attributes = event.driverAttributes.getOrElse(Map.empty).toMap
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index d4a59c33b974..d188bdd912e5 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -65,8 +65,7 @@ class BlockManagerId private (
   def topologyInfo: Option[String] = topologyInfo_
 
   def isDriver: Boolean = {
-    executorId == SparkContext.DRIVER_IDENTIFIER ||
-      executorId == SparkContext.LEGACY_DRIVER_IDENTIFIER
+    executorId == SparkContext.DRIVER_IDENTIFIER
   }
 
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
index 209164e3f137..8a977a7601ab 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -1,5 +1,5 @@
 [ {
-  "id" : "<driver>",
+  "id" : "driver",
   "hostPort" : "localhost:57971",
   "isActive" : true,
   "rddBlocks" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
index aa9471301fe3..dfa7385d3c06 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
@@ -35,7 +35,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.830GMT",
       "duration" : 456,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -85,7 +85,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.832GMT",
       "duration" : 450,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -135,7 +135,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.830GMT",
       "duration" : 454,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -185,7 +185,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.831GMT",
       "duration" : 452,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -235,7 +235,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.831GMT",
       "duration" : 454,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -285,7 +285,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.830GMT",
       "duration" : 454,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -335,7 +335,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.829GMT",
       "duration" : 454,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -385,7 +385,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.833GMT",
       "duration" : 450,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -431,7 +431,7 @@
     }
   },
   "executorSummary" : {
-    "<driver>" : {
+    "driver" : {
       "taskTime" : 3624,
       "failedTasks" : 0,
       "succeededTasks" : 8,
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
index 584803b5e863..e036627dd79c 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
@@ -35,7 +35,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.830GMT",
       "duration" : 456,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -85,7 +85,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.832GMT",
       "duration" : 450,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -135,7 +135,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.830GMT",
       "duration" : 454,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -185,7 +185,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.831GMT",
       "duration" : 452,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -235,7 +235,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.831GMT",
       "duration" : 454,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -285,7 +285,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.830GMT",
       "duration" : 454,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -335,7 +335,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.829GMT",
       "duration" : 454,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -385,7 +385,7 @@
       "attempt" : 0,
       "launchTime" : "2015-02-03T16:43:05.833GMT",
       "duration" : 450,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -431,7 +431,7 @@
     }
   },
   "executorSummary" : {
-    "<driver>" : {
+    "driver" : {
       "taskTime" : 3624,
       "failedTasks" : 0,
       "succeededTasks" : 8,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
index ea88ca116707..5a0d214ff304 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
@@ -4,7 +4,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-16T19:25:36.515GMT",
   "duration" : 61,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -58,7 +58,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-16T19:25:36.521GMT",
   "duration" : 53,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -112,7 +112,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-16T19:25:36.522GMT",
   "duration" : 48,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -166,7 +166,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-16T19:25:36.522GMT",
   "duration" : 50,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -220,7 +220,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-16T19:25:36.522GMT",
   "duration" : 52,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -274,7 +274,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-16T19:25:36.523GMT",
   "duration" : 52,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -328,7 +328,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-16T19:25:36.523GMT",
   "duration" : 51,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -382,7 +382,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-16T19:25:36.524GMT",
   "duration" : 51,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
index efd0a45bf01d..fb9a1699a9cd 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
@@ -4,7 +4,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-17T23:12:16.515GMT",
   "duration" : 61,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -58,7 +58,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-17T23:12:16.521GMT",
   "duration" : 53,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -112,7 +112,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-17T23:12:16.522GMT",
   "duration" : 48,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -166,7 +166,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-17T23:12:16.522GMT",
   "duration" : 50,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -220,7 +220,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-17T23:12:16.522GMT",
   "duration" : 52,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -274,7 +274,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-17T23:12:16.523GMT",
   "duration" : 52,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -328,7 +328,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-17T23:12:16.523GMT",
   "duration" : 51,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
@@ -382,7 +382,7 @@
   "attempt" : 0,
   "launchTime" : "2015-03-17T23:12:16.524GMT",
   "duration" : 51,
-  "executorId" : "<driver>",
+  "executorId" : "driver",
   "host" : "localhost",
   "status" : "SUCCESS",
   "taskLocality" : "PROCESS_LOCAL",
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
index a8e1fd303a42..b79f8a5baa85 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
@@ -39,7 +39,7 @@
       "attempt" : 0,
       "launchTime" : "2015-03-16T19:25:36.515GMT",
       "duration" : 61,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -94,7 +94,7 @@
       "attempt" : 0,
       "launchTime" : "2015-03-16T19:25:36.523GMT",
       "duration" : 52,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -149,7 +149,7 @@
       "attempt" : 0,
       "launchTime" : "2015-03-16T19:25:36.521GMT",
       "duration" : 53,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -204,7 +204,7 @@
       "attempt" : 0,
       "launchTime" : "2015-03-16T19:25:36.523GMT",
       "duration" : 51,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -259,7 +259,7 @@
       "attempt" : 0,
       "launchTime" : "2015-03-16T19:25:36.522GMT",
       "duration" : 48,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -314,7 +314,7 @@
       "attempt" : 0,
       "launchTime" : "2015-03-16T19:25:36.524GMT",
       "duration" : 51,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -369,7 +369,7 @@
       "attempt" : 0,
       "launchTime" : "2015-03-16T19:25:36.522GMT",
       "duration" : 50,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -424,7 +424,7 @@
       "attempt" : 0,
       "launchTime" : "2015-03-16T19:25:36.522GMT",
       "duration" : 52,
-      "executorId" : "<driver>",
+      "executorId" : "driver",
       "host" : "localhost",
       "status" : "SUCCESS",
       "taskLocality" : "PROCESS_LOCAL",
@@ -475,7 +475,7 @@
     }
   },
   "executorSummary" : {
-    "<driver>" : {
+    "driver" : {
       "taskTime" : 418,
       "failedTasks" : 0,
       "succeededTasks" : 8,
diff --git a/core/src/test/resources/spark-events/local-1422981759269 b/core/src/test/resources/spark-events/local-1422981759269
index 4794e56d1107..8223f3fc926e 100755
--- a/core/src/test/resources/spark-events/local-1422981759269
+++ b/core/src/test/resources/spark-events/local-1422981759269
@@ -1,88 +1,88 @@
-{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1422981759407}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1422981759407}
 {"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57965","spark.repl.class.uri":"http://192.168.1.103:57964","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57966","spark.tachyonStore.folderName":"spark-fd6c823a-8a18-4113-8306-1fa7bb623a7f","spark.app.id":"local-1422981759269"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
 {"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1422981759269","Timestamp":1422981758277,"User":"irashid"}
 {"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981762069,"Completion Time":1422981762637,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981763578,"Completion Time":1422981764005,"Accumulables":[]}}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981764014,"Completion Time":1422981764045,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3,4]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981764396,"Completion Time":1422981764648,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerJobStart","Job ID":3,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[5]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981765026,"Completion Time":1422981765050,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":3,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerApplicationEnd","Timestamp":1422981766912}
diff --git a/core/src/test/resources/spark-events/local-1422981780767 b/core/src/test/resources/spark-events/local-1422981780767
index f14a000bf2c2..9d00e06fdf0f 100755
--- a/core/src/test/resources/spark-events/local-1422981780767
+++ b/core/src/test/resources/spark-events/local-1422981780767
@@ -1,82 +1,82 @@
-{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57971},"Maximum Memory":278302556,"Timestamp":1422981780906}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"localhost","Port":57971},"Maximum Memory":278302556,"Timestamp":1422981780906}
 {"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57969","spark.repl.class.uri":"http://192.168.1.103:57968","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57970","spark.tachyonStore.folderName":"spark-3f19daee-844c-41d0-a3fc-5e3e508f9731","spark.app.id":"local-1422981780767"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
 {"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1422981780767","Timestamp":1422981779720,"User":"irashid"}
 {"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784812,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784814,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":541,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784812,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784814,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":541,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981784228,"Completion Time":1422981784819,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786282,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":88000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":94000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":79000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":73000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":83000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":436,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":98000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786285,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":101000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786286,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":76000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786282,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":88000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":94000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":79000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":73000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":83000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":436,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":98000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786285,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":101000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786286,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":76000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981785829,"Completion Time":1422981786286,"Accumulables":[]}}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786337,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786339,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":1,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786341,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786342,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"ExceptionFailure","Class Name":"java.lang.RuntimeException","Description":"got a 3, failing","Stack Trace":[{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"<console>","Line Number":18},{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"<console>","Line Number":17},{"Declaring Class":"scala.collection.Iterator$$anon$11","Method Name":"next","File Name":"Iterator.scala","Line Number":328},{"Declaring Class":"org.apache.spark.util.Utils$","Method Name":"getIteratorSize","File Name":"Utils.scala","Line Number":1311},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.scheduler.ResultTask","Method Name":"runTask","File Name":"ResultTask.scala","Line Number":61},{"Declaring Class":"org.apache.spark.scheduler.Task","Method Name":"run","File Name":"Task.scala","Line Number":56},{"Declaring Class":"org.apache.spark.executor.Executor$TaskRunner","Method Name":"run","File Name":"Executor.scala","Line Number":196},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor","Method Name":"runWorker","File Name":"ThreadPoolExecutor.java","Line Number":1145},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor$Worker","Method Name":"run","File Name":"ThreadPoolExecutor.java","Line Number":615},{"Declaring Class":"java.lang.Thread","Method Name":"run","File Name":"Thread.java","Line Number":745}],"Full Stack Trace":"java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n","Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}},"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786343,"Failed":true,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786337,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786339,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":1,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786341,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786342,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"ExceptionFailure","Class Name":"java.lang.RuntimeException","Description":"got a 3, failing","Stack Trace":[{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"<console>","Line Number":18},{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"<console>","Line Number":17},{"Declaring Class":"scala.collection.Iterator$$anon$11","Method Name":"next","File Name":"Iterator.scala","Line Number":328},{"Declaring Class":"org.apache.spark.util.Utils$","Method Name":"getIteratorSize","File Name":"Utils.scala","Line Number":1311},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.scheduler.ResultTask","Method Name":"runTask","File Name":"ResultTask.scala","Line Number":61},{"Declaring Class":"org.apache.spark.scheduler.Task","Method Name":"run","File Name":"Task.scala","Line Number":56},{"Declaring Class":"org.apache.spark.executor.Executor$TaskRunner","Method Name":"run","File Name":"Executor.scala","Line Number":196},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor","Method Name":"runWorker","File Name":"ThreadPoolExecutor.java","Line Number":1145},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor$Worker","Method Name":"run","File Name":"ThreadPoolExecutor.java","Line Number":615},{"Declaring Class":"java.lang.Thread","Method Name":"run","File Name":"Thread.java","Line Number":745}],"Full Stack Trace":"java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n","Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}},"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786343,"Failed":true,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981786296,"Completion Time":1422981786347,"Failure Reason":"Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost task 3.0 in stage 2.0 (TID 19, localhost): java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:","Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobFailed","Exception":{"Message":"Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost task 3.0 in stage 2.0 (TID 19, localhost): java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:","Stack Trace":[{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages","File Name":"DAGScheduler.scala","Line Number":1214},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":1203},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":1202},{"Declaring Class":"scala.collection.mutable.ResizableArray$class","Method Name":"foreach","File Name":"ResizableArray.scala","Line Number":59},{"Declaring Class":"scala.collection.mutable.ArrayBuffer","Method Name":"foreach","File Name":"ArrayBuffer.scala","Line Number":47},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"abortStage","File Name":"DAGScheduler.scala","Line Number":1202},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"scala.Option","Method Name":"foreach","File Name":"Option.scala","Line Number":236},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"handleTaskSetFailed","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2","Method Name":"applyOrElse","File Name":"DAGScheduler.scala","Line Number":1420},{"Declaring Class":"akka.actor.Actor$class","Method Name":"aroundReceive","File Name":"Actor.scala","Line Number":465},{"Declaring Class":"org.apache.spark.scheduler.DAGSchedulerEventProcessActor","Method Name":"aroundReceive","File Name":"DAGScheduler.scala","Line Number":1375},{"Declaring Class":"akka.actor.ActorCell","Method Name":"receiveMessage","File Name":"ActorCell.scala","Line Number":516},{"Declaring Class":"akka.actor.ActorCell","Method Name":"invoke","File Name":"ActorCell.scala","Line Number":487},{"Declaring Class":"akka.dispatch.Mailbox","Method Name":"processMailbox","File Name":"Mailbox.scala","Line Number":238},{"Declaring Class":"akka.dispatch.Mailbox","Method Name":"run","File Name":"Mailbox.scala","Line Number":220},{"Declaring Class":"akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask","Method Name":"exec","File Name":"AbstractDispatcher.scala","Line Number":393},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinTask","Method Name":"doExec","File Name":"ForkJoinTask.java","Line Number":260},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinPool$WorkQueue","Method Name":"runTask","File Name":"ForkJoinPool.java","Line Number":1339},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinPool","Method Name":"runWorker","File Name":"ForkJoinPool.java","Line Number":1979},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinWorkerThread","Method Name":"run","File Name":"ForkJoinWorkerThread.java","Line Number":107}]}}}
 {"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787222,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":21,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787222,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":21,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981787191,"Completion Time":1422981787226,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerApplicationEnd","Timestamp":1422981788731}
diff --git a/core/src/test/resources/spark-events/local-1425081759269 b/core/src/test/resources/spark-events/local-1425081759269
index 9745b36b09e4..9f7d0a7a4105 100755
--- a/core/src/test/resources/spark-events/local-1425081759269
+++ b/core/src/test/resources/spark-events/local-1425081759269
@@ -1,88 +1,88 @@
-{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1425081759407}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1425081759407}
 {"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57965","spark.repl.class.uri":"http://192.168.1.103:57964","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57966","spark.tachyonStore.folderName":"spark-fd6c823a-8a18-4113-8306-1fa7bb623a7f","spark.app.id":"local-1425081759269"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
 {"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1425081759269","Timestamp":1425081758277,"User":"irashid"}
 {"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081762069,"Completion Time":1425081762637,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081763578,"Completion Time":1425081764005,"Accumulables":[]}}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081764014,"Completion Time":1425081764045,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3,4]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081764396,"Completion Time":1425081764648,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerJobStart","Job ID":3,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[5]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
-{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081765026,"Completion Time":1425081765050,"Accumulables":[]}}
 {"Event":"SparkListenerJobEnd","Job ID":3,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerApplicationEnd","Timestamp":1425081766912}
diff --git a/core/src/test/resources/spark-events/local-1426533911241 b/core/src/test/resources/spark-events/local-1426533911241
index 9ef5bd5d92de..60ba9e12e8ef 100755
--- a/core/src/test/resources/spark-events/local-1426533911241
+++ b/core/src/test/resources/spark-events/local-1426533911241
@@ -1,24 +1,24 @@
-{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426533911361}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426533911361}
 {"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.105","spark.eventLog.enabled":"true","spark.driver.port":"58608","spark.repl.class.uri":"http://192.168.1.105:58607","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.105:58609","spark.tachyonStore.folderName":"spark-5e9b7f26-8e97-4b43-82d6-25c141530da9","spark.app.id":"local-1426533911241"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/etc/hadoop":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
 {"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1426533911241","Timestamp":1426533910242,"User":"irashid","App Attempt ID":"1"}
 {"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1426533936103,"Completion Time":1426533936579,"Accumulables":[{"ID":1,"Name":"my counter","Value":"5050"}]}}
 {"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerApplicationEnd","Timestamp":1426533945177}
diff --git a/core/src/test/resources/spark-events/local-1426633911242 b/core/src/test/resources/spark-events/local-1426633911242
index e7043282107d..886f9a9f6cd8 100755
--- a/core/src/test/resources/spark-events/local-1426633911242
+++ b/core/src/test/resources/spark-events/local-1426633911242
@@ -1,24 +1,24 @@
-{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426633911361}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426633911361}
 {"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.105","spark.eventLog.enabled":"true","spark.driver.port":"58608","spark.repl.class.uri":"http://192.168.1.105:58607","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.105:58609","spark.tachyonStore.folderName":"spark-5e9b7f26-8e97-4b43-82d6-25c141530da9","spark.app.id":"local-1426633911241"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/etc/hadoop":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
 {"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1426533911241","Timestamp":1426633910242,"User":"irashid","App Attempt ID":"2"}
 {"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
 {"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1426633936103,"Completion Time":1426633936579,"Accumulables":[{"ID":1,"Name":"my counter","Value":"5050"}]}}
 {"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
 {"Event":"SparkListenerApplicationEnd","Timestamp":1426633945177}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 115103fad6a1..576e5f55fc8f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -195,9 +195,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     assert(id2_.eq(id1), "Deserialized id2 is not the same object as original id1")
   }
 
-  test("BlockManagerId.isDriver() backwards-compatibility with legacy driver ids (SPARK-6716)") {
+  test("BlockManagerId.isDriver() with DRIVER_IDENTIFIER (SPARK-27090)") {
     assert(BlockManagerId(SparkContext.DRIVER_IDENTIFIER, "XXX", 1).isDriver)
-    assert(BlockManagerId(SparkContext.LEGACY_DRIVER_IDENTIFIER, "XXX", 1).isDriver)
     assert(!BlockManagerId("notADriverIdentifier", "XXX", 1).isDriver)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
index c770fd5da76f..de105b6f188f 100644
--- a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
@@ -122,15 +122,11 @@ class UIUtilsSuite extends SparkFunSuite {
   test("decodeURLParameter (SPARK-12708: Sorting task error in Stages Page when yarn mode.)") {
     val encoded1 = "%252F"
     val decoded1 = "/"
-    val encoded2 = "%253Cdriver%253E"
-    val decoded2 = "<driver>"
 
     assert(decoded1 === decodeURLParameter(encoded1))
-    assert(decoded2 === decodeURLParameter(encoded2))
 
     // verify that no affect to decoded URL.
     assert(decoded1 === decodeURLParameter(decoded1))
-    assert(decoded2 === decodeURLParameter(decoded2))
   }
 
   private def verify(
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index fdc5cf16af15..eaea6f0d0341 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,8 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    // [SPARK-27090][CORE] Removing old LEGACY_DRIVER_IDENTIFIER ("<driver>")
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.LEGACY_DRIVER_IDENTIFIER"),
     // [SPARK-25838] Remove formatVersion from Saveable
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.DistributedLDAModel.formatVersion"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.LocalLDAModel.formatVersion"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 7d785aa09cd9..7d80fd0c591f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -462,8 +462,7 @@ object StateStore extends Logging {
     val env = SparkEnv.get
     if (env != null) {
       val isDriver =
-        env.executorId == SparkContext.DRIVER_IDENTIFIER ||
-          env.executorId == SparkContext.LEGACY_DRIVER_IDENTIFIER
+        env.executorId == SparkContext.DRIVER_IDENTIFIER
       // If running locally, then the coordinator reference in _coordRef may be have become inactive
       // as SparkContext + SparkEnv may have been restarted. Hence, when running in driver,
       // always recreate the reference.

From 3f9247de1e0395a40071e80cd989e06eb1a85446 Mon Sep 17 00:00:00 2001
From: zuotingbing <zuo.tingbing9@zte.com.cn>
Date: Tue, 12 Mar 2019 13:38:41 -0500
Subject: [PATCH 0657/1072] [SPARK-27010][SQL] Find out the actual port number
 when hive.server2.thrift.port=0

## What changes were proposed in this pull request?
Currently, if we set hive.server2.thrift.port=0, it hard to find out the actual port number which one we should use when using beeline to connect.

before:
![2019-02-28_170942](https://user-images.githubusercontent.com/24823338/53557240-779ad800-3b80-11e9-9567-175f28aa61da.png)

after:
![2019-02-28_170904](https://user-images.githubusercontent.com/24823338/53557255-7f5a7c80-3b80-11e9-8ba6-9764c03e5407.png)

use beeline to connect success:
![2019-02-28_170844](https://user-images.githubusercontent.com/24823338/53557267-85e8f400-3b80-11e9-90a5-f7f53a51cc32.png)

## How was this patch tested?
 manual tests

Closes #23917 from zuotingbing/SPARK-27010.

Authored-by: zuotingbing <zuo.tingbing9@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/hive/service/cli/thrift/ThriftBinaryCLIService.java  | 2 +-
 .../apache/hive/service/cli/thrift/ThriftHttpCLIService.java    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
index 6c9efba9e59a..21b8bf7de75c 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -94,7 +94,7 @@ public void run() {
       server = new TThreadPoolServer(sargs);
       server.setServerEventHandler(serverEventHandler);
       String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port "
-          + portNum + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads";
+          + serverSocket.getServerSocket().getLocalPort() + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads";
       LOG.info(msg);
       server.serve();
     } catch (Throwable t) {
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
index a10245b372d7..c3bcaf1e2d6c 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
@@ -144,7 +144,7 @@ public void run() {
       // Finally, start the server
       httpServer.start();
       String msg = "Started " + ThriftHttpCLIService.class.getSimpleName() + " in " + schemeName
-          + " mode on port " + portNum + " path=" + httpPath + " with " + minWorkerThreads + "..."
+          + " mode on port " + connector.getLocalPort()+ " path=" + httpPath + " with " + minWorkerThreads + "..."
           + maxWorkerThreads + " worker threads";
       LOG.info(msg);
       httpServer.join();

From 688b0c01fac0db80f6473181673a89f1ce1be65b Mon Sep 17 00:00:00 2001
From: ankurgupta <ankur.gupta@cloudera.com>
Date: Tue, 12 Mar 2019 14:27:44 -0500
Subject: [PATCH 0658/1072] [SPARK-26089][CORE] Handle corruption in large
 shuffle blocks

## What changes were proposed in this pull request?

SPARK-4105 added corruption detection in shuffle blocks but that was limited to blocks which are
smaller than maxBytesInFlight/3. This commit adds upon that by adding corruption check for large
blocks. There are two changes/improvements that are made in this commit:

1. Large blocks are checked upto maxBytesInFlight/3 size in a similar way as smaller blocks, so if a
large block is corrupt in the starting, that block will be re-fetched and if that also fails,
FetchFailureException will be thrown.
2. If large blocks are corrupt after size maxBytesInFlight/3, then any IOException thrown while
reading the stream will be converted to FetchFailureException.  This is slightly more aggressive
than was originally intended but since the consumer of the stream may have already read some records and processed them, we can't just re-fetch the block, we need to fail the whole task. Additionally, we also thought about maybe adding a new type of TaskEndReason, which would re-try the task couple of times before failing the previous stage, but given the complexity involved in that solution we decided to not proceed in that direction.

Thanks to squito for direction and support.

## How was this patch tested?

Changed the junit test for big blocks to check for corruption.

Closes #23453 from ankuriitg/ankurgupta/SPARK-26089.

Authored-by: ankurgupta <ankur.gupta@cloudera.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../spark/internal/config/package.scala       |   9 ++
 .../shuffle/BlockStoreShuffleReader.scala     |   1 +
 .../storage/ShuffleBlockFetcherIterator.scala |  94 ++++++++---
 .../scala/org/apache/spark/util/Utils.scala   |  45 ++++++
 .../ShuffleBlockFetcherIteratorSuite.scala    | 148 +++++++++++++++---
 .../org/apache/spark/util/UtilsSuite.scala    |  54 ++++++-
 6 files changed, 306 insertions(+), 45 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index d6a359db66f4..850d6845684b 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -928,6 +928,15 @@ package object config {
       .booleanConf
       .createWithDefault(true)
 
+  private[spark] val SHUFFLE_DETECT_CORRUPT_MEMORY =
+    ConfigBuilder("spark.shuffle.detectCorrupt.useExtraMemory")
+      .doc("If enabled, part of a compressed/encrypted stream will be de-compressed/de-crypted " +
+        "by using extra memory to detect early corruption. Any IOException thrown will cause " +
+        "the task to be retried once and if it fails again with same exception, then " +
+        "FetchFailedException will be thrown to retry previous stage")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val SHUFFLE_SYNC =
     ConfigBuilder("spark.shuffle.sync")
       .doc("Whether to force outstanding writes to disk.")
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index c5eefc7c5c04..c7843710413d 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -55,6 +55,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
       SparkEnv.get.conf.get(config.REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS),
       SparkEnv.get.conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM),
       SparkEnv.get.conf.get(config.SHUFFLE_DETECT_CORRUPT),
+      SparkEnv.get.conf.get(config.SHUFFLE_DETECT_CORRUPT_MEMORY),
       readMetrics).toCompletionIterator
 
     val serializerInstance = dep.serializer.newInstance()
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index c75b20906918..c89d5cc971d2 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import java.io.{InputStream, IOException}
+import java.io.{InputStream, IOException, SequenceInputStream}
 import java.nio.ByteBuffer
 import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
 import javax.annotation.concurrent.GuardedBy
@@ -25,6 +25,8 @@ import javax.annotation.concurrent.GuardedBy
 import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Queue}
 
+import org.apache.commons.io.IOUtils
+
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
@@ -32,7 +34,6 @@ import org.apache.spark.network.shuffle._
 import org.apache.spark.network.util.TransportConf
 import org.apache.spark.shuffle.{FetchFailedException, ShuffleReadMetricsReporter}
 import org.apache.spark.util.{CompletionIterator, TaskCompletionListener, Utils}
-import org.apache.spark.util.io.ChunkedByteBufferOutputStream
 
 /**
  * An iterator that fetches multiple blocks. For local blocks, it fetches from the local block
@@ -73,6 +74,7 @@ final class ShuffleBlockFetcherIterator(
     maxBlocksInFlightPerAddress: Int,
     maxReqSizeShuffleToMem: Long,
     detectCorrupt: Boolean,
+    detectCorruptUseExtraMemory: Boolean,
     shuffleMetrics: ShuffleReadMetricsReporter)
   extends Iterator[(BlockId, InputStream)] with DownloadFileManager with Logging {
 
@@ -406,6 +408,7 @@ final class ShuffleBlockFetcherIterator(
 
     var result: FetchResult = null
     var input: InputStream = null
+    var streamCompressedOrEncrypted: Boolean = false
     // Take the next fetched result and try to decompress it to detect data corruption,
     // then fetch it one more time if it's corrupt, throw FailureFetchResult if the second fetch
     // is also corrupt, so the previous stage could be retried.
@@ -463,25 +466,22 @@ final class ShuffleBlockFetcherIterator(
               buf.release()
               throwFetchFailedException(blockId, address, e)
           }
-          var isStreamCopied: Boolean = false
           try {
             input = streamWrapper(blockId, in)
-            // Only copy the stream if it's wrapped by compression or encryption, also the size of
-            // block is small (the decompressed block is smaller than maxBytesInFlight)
-            if (detectCorrupt && !input.eq(in) && size < maxBytesInFlight / 3) {
-              isStreamCopied = true
-              val out = new ChunkedByteBufferOutputStream(64 * 1024, ByteBuffer.allocate)
-              // Decompress the whole block at once to detect any corruption, which could increase
-              // the memory usage tne potential increase the chance of OOM.
+            // If the stream is compressed or wrapped, then we optionally decompress/unwrap the
+            // first maxBytesInFlight/3 bytes into memory, to check for corruption in that portion
+            // of the data. But even if 'detectCorruptUseExtraMemory' configuration is off, or if
+            // the corruption is later, we'll still detect the corruption later in the stream.
+            streamCompressedOrEncrypted = !input.eq(in)
+            if (streamCompressedOrEncrypted && detectCorruptUseExtraMemory) {
               // TODO: manage the memory used here, and spill it into disk in case of OOM.
-              Utils.copyStream(input, out, closeStreams = true)
-              input = out.toChunkedByteBuffer.toInputStream(dispose = true)
+              input = Utils.copyStreamUpTo(input, maxBytesInFlight / 3)
             }
           } catch {
             case e: IOException =>
               buf.release()
               if (buf.isInstanceOf[FileSegmentManagedBuffer]
-                || corruptedBlocks.contains(blockId)) {
+                  || corruptedBlocks.contains(blockId)) {
                 throwFetchFailedException(blockId, address, e)
               } else {
                 logWarning(s"got an corrupted block $blockId from $address, fetch again", e)
@@ -491,7 +491,9 @@ final class ShuffleBlockFetcherIterator(
               }
           } finally {
             // TODO: release the buf here to free memory earlier
-            if (isStreamCopied) {
+            if (input == null) {
+              // Close the underlying stream if there was an issue in wrapping the stream using
+              // streamWrapper
               in.close()
             }
           }
@@ -508,7 +510,13 @@ final class ShuffleBlockFetcherIterator(
       throw new NoSuchElementException()
     }
     currentResult = result.asInstanceOf[SuccessFetchResult]
-    (currentResult.blockId, new BufferReleasingInputStream(input, this))
+    (currentResult.blockId,
+      new BufferReleasingInputStream(
+        input,
+        this,
+        currentResult.blockId,
+        currentResult.address,
+        detectCorrupt && streamCompressedOrEncrypted))
   }
 
   def toCompletionIterator: Iterator[(BlockId, InputStream)] = {
@@ -571,7 +579,10 @@ final class ShuffleBlockFetcherIterator(
     }
   }
 
-  private def throwFetchFailedException(blockId: BlockId, address: BlockManagerId, e: Throwable) = {
+  private[storage] def throwFetchFailedException(
+      blockId: BlockId,
+      address: BlockManagerId,
+      e: Throwable) = {
     blockId match {
       case ShuffleBlockId(shufId, mapId, reduceId) =>
         throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e)
@@ -583,15 +594,28 @@ final class ShuffleBlockFetcherIterator(
 }
 
 /**
- * Helper class that ensures a ManagedBuffer is released upon InputStream.close()
+ * Helper class that ensures a ManagedBuffer is released upon InputStream.close() and
+ * also detects stream corruption if streamCompressedOrEncrypted is true
  */
 private class BufferReleasingInputStream(
-    private val delegate: InputStream,
-    private val iterator: ShuffleBlockFetcherIterator)
+    // This is visible for testing
+    private[storage] val delegate: InputStream,
+    private val iterator: ShuffleBlockFetcherIterator,
+    private val blockId: BlockId,
+    private val address: BlockManagerId,
+    private val detectCorruption: Boolean)
   extends InputStream {
   private[this] var closed = false
 
-  override def read(): Int = delegate.read()
+  override def read(): Int = {
+    try {
+      delegate.read()
+    } catch {
+      case e: IOException if detectCorruption =>
+        IOUtils.closeQuietly(this)
+        iterator.throwFetchFailedException(blockId, address, e)
+    }
+  }
 
   override def close(): Unit = {
     if (!closed) {
@@ -605,13 +629,37 @@ private class BufferReleasingInputStream(
 
   override def mark(readlimit: Int): Unit = delegate.mark(readlimit)
 
-  override def skip(n: Long): Long = delegate.skip(n)
+  override def skip(n: Long): Long = {
+    try {
+      delegate.skip(n)
+    } catch {
+      case e: IOException if detectCorruption =>
+        IOUtils.closeQuietly(this)
+        iterator.throwFetchFailedException(blockId, address, e)
+    }
+  }
 
   override def markSupported(): Boolean = delegate.markSupported()
 
-  override def read(b: Array[Byte]): Int = delegate.read(b)
+  override def read(b: Array[Byte]): Int = {
+    try {
+      delegate.read(b)
+    } catch {
+      case e: IOException if detectCorruption =>
+        IOUtils.closeQuietly(this)
+        iterator.throwFetchFailedException(blockId, address, e)
+    }
+  }
 
-  override def read(b: Array[Byte], off: Int, len: Int): Int = delegate.read(b, off, len)
+  override def read(b: Array[Byte], off: Int, len: Int): Int = {
+    try {
+      delegate.read(b, off, len)
+    } catch {
+      case e: IOException if detectCorruption =>
+        IOUtils.closeQuietly(this)
+        iterator.throwFetchFailedException(blockId, address, e)
+    }
+  }
 
   override def reset(): Unit = delegate.reset()
 }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index af5323876ccc..2d00453159d6 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -67,6 +67,7 @@ import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
 import org.apache.spark.status.api.v1.{StackTrace, ThreadStackTrace}
+import org.apache.spark.util.io.ChunkedByteBufferOutputStream
 
 /** CallSite represents a place in user code. It can have a short and a long form. */
 private[spark] case class CallSite(shortForm: String, longForm: String)
@@ -337,6 +338,50 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  /**
+   * Copy the first `maxSize` bytes of data from the InputStream to an in-memory
+   * buffer, primarily to check for corruption.
+   *
+   * This returns a new InputStream which contains the same data as the original input stream.
+   * It may be entirely on in-memory buffer, or it may be a combination of in-memory data, and then
+   * continue to read from the original stream. The only real use of this is if the original input
+   * stream will potentially detect corruption while the data is being read (eg. from compression).
+   * This allows for an eager check of corruption in the first maxSize bytes of data.
+   *
+   * @return An InputStream which includes all data from the original stream (combining buffered
+   *         data and remaining data in the original stream)
+   */
+  def copyStreamUpTo(in: InputStream, maxSize: Long): InputStream = {
+    var count = 0L
+    val out = new ChunkedByteBufferOutputStream(64 * 1024, ByteBuffer.allocate)
+    val fullyCopied = tryWithSafeFinally {
+      val bufSize = Math.min(8192L, maxSize)
+      val buf = new Array[Byte](bufSize.toInt)
+      var n = 0
+      while (n != -1 && count < maxSize) {
+        n = in.read(buf, 0, Math.min(maxSize - count, bufSize).toInt)
+        if (n != -1) {
+          out.write(buf, 0, n)
+          count += n
+        }
+      }
+      count < maxSize
+    } {
+      try {
+        if (count < maxSize) {
+          in.close()
+        }
+      } finally {
+        out.close()
+      }
+    }
+    if (fullyCopied) {
+      out.toChunkedByteBuffer.toInputStream(dispose = true)
+    } else {
+      new SequenceInputStream( out.toChunkedByteBuffer.toInputStream(dispose = true), in)
+    }
+  }
+
   def copyFileStreamNIO(
       input: FileChannel,
       output: FileChannel,
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 98fe9663b621..a1c298ae9446 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.storage
 
-import java.io.{File, InputStream, IOException}
+import java.io._
+import java.nio.ByteBuffer
 import java.util.UUID
 import java.util.concurrent.Semaphore
 
@@ -118,6 +119,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       Int.MaxValue,
       true,
+      false,
       metrics)
 
     // 3 local blocks fetched in initialization
@@ -197,6 +199,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       Int.MaxValue,
       true,
+      false,
       taskContext.taskMetrics.createTempShuffleReadMetrics())
 
     verify(blocks(ShuffleBlockId(0, 0, 0)), times(0)).release()
@@ -265,6 +268,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       Int.MaxValue,
       true,
+      false,
       taskContext.taskMetrics.createTempShuffleReadMetrics())
 
 
@@ -325,6 +329,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       Int.MaxValue,
       true,
+      false,
       taskContext.taskMetrics.createTempShuffleReadMetrics())
 
     // Continue only after the mock calls onBlockFetchFailure
@@ -337,15 +342,34 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     intercept[FetchFailedException] { iterator.next() }
   }
 
-  private def mockCorruptBuffer(size: Long = 1L): ManagedBuffer = {
-    val corruptStream = mock(classOf[InputStream])
-    when(corruptStream.read(any(), any(), any())).thenThrow(new IOException("corrupt"))
+  private def mockCorruptBuffer(size: Long = 1L, corruptAt: Int = 0): ManagedBuffer = {
+    val corruptStream = new CorruptStream(corruptAt)
     val corruptBuffer = mock(classOf[ManagedBuffer])
     when(corruptBuffer.size()).thenReturn(size)
     when(corruptBuffer.createInputStream()).thenReturn(corruptStream)
     corruptBuffer
   }
 
+  private class CorruptStream(corruptAt: Long = 0L) extends InputStream {
+    var pos = 0
+    var closed = false
+
+    override def read(): Int = {
+      if (pos >= corruptAt) {
+        throw new IOException("corrupt")
+      } else {
+        pos += 1
+        pos
+      }
+    }
+
+    override def read(dest: Array[Byte], off: Int, len: Int): Int = {
+      super.read(dest, off, len)
+    }
+
+    override def close(): Unit = { closed = true }
+  }
+
   test("retry corrupt blocks") {
     val blockManager = mock(classOf[BlockManager])
     val localBmId = BlockManagerId("test-client", "test-client", 1)
@@ -396,6 +420,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       Int.MaxValue,
       true,
+      true,
       taskContext.taskMetrics.createTempShuffleReadMetrics())
 
     // Continue only after the mock calls onBlockFetchFailure
@@ -425,28 +450,98 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     intercept[FetchFailedException] { iterator.next() }
   }
 
-  test("big blocks are not checked for corruption") {
-    val corruptBuffer = mockCorruptBuffer(10000L)
-
+  test("big blocks are also checked for corruption") {
+    val streamLength = 10000L
     val blockManager = mock(classOf[BlockManager])
-    val localBmId = BlockManagerId("test-client", "test-client", 1)
-    doReturn(localBmId).when(blockManager).blockManagerId
-    doReturn(corruptBuffer).when(blockManager).getBlockData(ShuffleBlockId(0, 0, 0))
-    val localBlockLengths = Seq[Tuple2[BlockId, Long]](
-      ShuffleBlockId(0, 0, 0) -> corruptBuffer.size()
+    val localBlockManagerId = BlockManagerId("local-client", "local-client", 1)
+    doReturn(localBlockManagerId).when(blockManager).blockManagerId
+
+    // This stream will throw IOException when the first byte is read
+    val corruptBuffer1 = mockCorruptBuffer(streamLength, 0)
+    val blockManagerId1 = BlockManagerId("remote-client-1", "remote-client-1", 1)
+    val shuffleBlockId1 = ShuffleBlockId(0, 1, 0)
+    val blockLengths1 = Seq[Tuple2[BlockId, Long]](
+      shuffleBlockId1 -> corruptBuffer1.size()
     )
 
-    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
-    val remoteBlockLengths = Seq[Tuple2[BlockId, Long]](
-      ShuffleBlockId(0, 1, 0) -> corruptBuffer.size()
+    val streamNotCorruptTill = 8 * 1024
+    // This stream will throw exception after streamNotCorruptTill bytes are read
+    val corruptBuffer2 = mockCorruptBuffer(streamLength, streamNotCorruptTill)
+    val blockManagerId2 = BlockManagerId("remote-client-2", "remote-client-2", 2)
+    val shuffleBlockId2 = ShuffleBlockId(0, 2, 0)
+    val blockLengths2 = Seq[Tuple2[BlockId, Long]](
+      shuffleBlockId2 -> corruptBuffer2.size()
     )
 
     val transfer = createMockTransfer(
-      Map(ShuffleBlockId(0, 0, 0) -> corruptBuffer, ShuffleBlockId(0, 1, 0) -> corruptBuffer))
+      Map(shuffleBlockId1 -> corruptBuffer1, shuffleBlockId2 -> corruptBuffer2))
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+      (blockManagerId1, blockLengths1),
+      (blockManagerId2, blockLengths2)
+    ).toIterator
+    val taskContext = TaskContext.empty()
+    val maxBytesInFlight = 3 * 1024
+    val iterator = new ShuffleBlockFetcherIterator(
+      taskContext,
+      transfer,
+      blockManager,
+      blocksByAddress,
+      (_, in) => new LimitedInputStream(in, streamLength),
+      maxBytesInFlight,
+      Int.MaxValue,
+      Int.MaxValue,
+      Int.MaxValue,
+      true,
+      true,
+      taskContext.taskMetrics.createTempShuffleReadMetrics())
+
+    // We'll get back the block which has corruption after maxBytesInFlight/3 because the other
+    // block will detect corruption on first fetch, and then get added to the queue again for
+    // a retry
+    val (id, st) = iterator.next()
+    assert(id === shuffleBlockId2)
+
+    // The other block will throw a FetchFailedException
+    intercept[FetchFailedException] {
+      iterator.next()
+    }
+
+    // Following will succeed as it reads part of the stream which is not corrupt. This will read
+    // maxBytesInFlight/3 bytes from the portion copied into memory, and remaining from the
+    // underlying stream
+    new DataInputStream(st).readFully(
+      new Array[Byte](streamNotCorruptTill), 0, streamNotCorruptTill)
+
+    // Following will fail as it reads the remaining part of the stream which is corrupt
+    intercept[FetchFailedException] { st.read() }
+
+    // Buffers are mocked and they return the original input corrupt streams
+    assert(corruptBuffer1.createInputStream().asInstanceOf[CorruptStream].closed)
+    assert(corruptBuffer2.createInputStream().asInstanceOf[CorruptStream].closed)
+  }
 
+  test("ensure big blocks available as a concatenated stream can be read") {
+    val tmpDir = Utils.createTempDir()
+    val tmpFile = new File(tmpDir, "someFile.txt")
+    val os = new FileOutputStream(tmpFile)
+    val buf = ByteBuffer.allocate(10000)
+    for (i <- 1 to 2500) {
+      buf.putInt(i)
+    }
+    os.write(buf.array())
+    os.close()
+    val managedBuffer = new FileSegmentManagedBuffer(null, tmpFile, 0, 10000)
+
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+    doReturn(managedBuffer).when(blockManager).getBlockData(ShuffleBlockId(0, 0, 0))
+    val localBlockLengths = Seq[Tuple2[BlockId, Long]](
+      ShuffleBlockId(0, 0, 0) -> 10000
+    )
+    val transfer = createMockTransfer(Map(ShuffleBlockId(0, 0, 0) -> managedBuffer))
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (localBmId, localBlockLengths),
-      (remoteBmId, remoteBlockLengths)
+      (localBmId, localBlockLengths)
     ).toIterator
 
     val taskContext = TaskContext.empty()
@@ -461,10 +556,18 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       Int.MaxValue,
       true,
+      true,
       taskContext.taskMetrics.createTempShuffleReadMetrics())
-    // Blocks should be returned without exceptions.
-    assert(Set(iterator.next()._1, iterator.next()._1) ===
-        Set(ShuffleBlockId(0, 0, 0), ShuffleBlockId(0, 1, 0)))
+    val (id, st) = iterator.next()
+    // Check that the test setup is correct -- make sure we have a concatenated stream.
+    assert (st.asInstanceOf[BufferReleasingInputStream].delegate.isInstanceOf[SequenceInputStream])
+
+    val dst = new DataInputStream(st)
+    for (i <- 1 to 2500) {
+      assert(i === dst.readInt())
+    }
+    assert(dst.read() === -1)
+    dst.close()
   }
 
   test("retry corrupt blocks (disabled)") {
@@ -515,6 +618,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       Int.MaxValue,
       Int.MaxValue,
+      true,
       false,
       taskContext.taskMetrics.createTempShuffleReadMetrics())
 
@@ -578,6 +682,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         maxBlocksInFlightPerAddress = Int.MaxValue,
         maxReqSizeShuffleToMem = 200,
         detectCorrupt = true,
+        false,
         taskContext.taskMetrics.createTempShuffleReadMetrics())
     }
 
@@ -625,6 +730,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       Int.MaxValue,
       true,
+      false,
       taskContext.taskMetrics.createTempShuffleReadMetrics())
 
     // All blocks fetched return zero length and should trigger a receive-side error:
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 188e3f6907da..d2d9eb06339c 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.util
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataOutput, DataOutputStream, File,
-  FileOutputStream, PrintStream}
+  FileOutputStream, InputStream, PrintStream, SequenceInputStream}
 import java.lang.{Double => JDouble, Float => JFloat}
+import java.lang.reflect.Field
 import java.net.{BindException, ServerSocket, URI}
 import java.nio.{ByteBuffer, ByteOrder}
 import java.nio.charset.StandardCharsets
@@ -43,6 +44,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.SparkListener
+import org.apache.spark.util.io.ChunkedByteBufferInputStream
 
 class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
 
@@ -211,6 +213,56 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(os.toByteArray.toList.equals(bytes.toList))
   }
 
+  test("copyStreamUpTo") {
+    // input array initialization
+    val bytes = Array.ofDim[Byte](1200)
+    Random.nextBytes(bytes)
+
+    val limit = 1000
+    // testing for inputLength less than, equal to and greater than limit
+    (limit - 2 to limit + 2).foreach { inputLength =>
+      val in = new ByteArrayInputStream(bytes.take(inputLength))
+      val mergedStream = Utils.copyStreamUpTo(in, limit)
+      try {
+        // Get a handle on the buffered data, to make sure memory gets freed once we read past the
+        // end of it. Need to use reflection to get handle on inner structures for this check
+        val byteBufferInputStream = if (mergedStream.isInstanceOf[ChunkedByteBufferInputStream]) {
+          assert(inputLength < limit)
+          mergedStream.asInstanceOf[ChunkedByteBufferInputStream]
+        } else {
+          assert(inputLength >= limit)
+          val sequenceStream = mergedStream.asInstanceOf[SequenceInputStream]
+          val fieldValue = getFieldValue(sequenceStream, "in")
+          assert(fieldValue.isInstanceOf[ChunkedByteBufferInputStream])
+          fieldValue.asInstanceOf[ChunkedByteBufferInputStream]
+        }
+        (0 until inputLength).foreach { idx =>
+          assert(bytes(idx) === mergedStream.read().asInstanceOf[Byte])
+          if (idx == limit) {
+            assert(byteBufferInputStream.chunkedByteBuffer === null)
+          }
+        }
+        assert(mergedStream.read() === -1)
+        assert(byteBufferInputStream.chunkedByteBuffer === null)
+      } finally {
+        IOUtils.closeQuietly(mergedStream)
+        IOUtils.closeQuietly(in)
+      }
+    }
+  }
+
+  private def getFieldValue(obj: AnyRef, fieldName: String): Any = {
+    val field: Field = obj.getClass().getDeclaredField(fieldName)
+    if (field.isAccessible()) {
+      field.get(obj)
+    } else {
+      field.setAccessible(true)
+      val result = field.get(obj)
+      field.setAccessible(false)
+      result
+    }
+  }
+
   test("memoryStringToMb") {
     assert(Utils.memoryStringToMb("1") === 0)
     assert(Utils.memoryStringToMb("1048575") === 0)

From d5cfe08fdc7ad07e948f329c0bdeeca5c2574a18 Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Tue, 12 Mar 2019 13:53:42 -0700
Subject: [PATCH 0659/1072] [SPARK-26927][CORE] Ensure executor is active when
 processing events in dynamic allocation manager.

## What changes were proposed in this pull request?

There is a race condition in the `ExecutorAllocationManager` that the `SparkListenerExecutorRemoved` event is posted before the `SparkListenerTaskStart` event, which will cause the incorrect result of `executorIds`. Then, when some executor idles, the real executors will be removed even actual executor number is equal to `minNumExecutors` due to the incorrect computation of `newExecutorTotal`(may greater than the `minNumExecutors`), thus may finally causing zero available executors but a wrong positive number of executorIds was kept in memory.

What's more, even the `SparkListenerTaskEnd` event can not make the fake `executorIds` released, because later idle event for the fake executors can not cause the real removal of these executors, as they are already removed and they are not exist in the `executorDataMap`  of `CoaseGrainedSchedulerBackend`, so that the `onExecutorRemoved` method will never be called again.

For details see https://issues.apache.org/jira/browse/SPARK-26927

This PR is to fix this problem.

## How was this patch tested?

existUT and added UT

Closes #23842 from liupc/Fix-race-condition-that-casues-dyanmic-allocation-not-working.

Lead-authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Co-authored-by: liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/ExecutorAllocationManager.scala     | 13 +++++++---
 .../ExecutorAllocationManagerSuite.scala      | 26 ++++++++++++++++++-
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 99f4d11e6368..60d04046c7b2 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -725,10 +725,15 @@ private[spark] class ExecutorAllocationManager(
         if (stageIdToNumRunningTask.contains(stageId)) {
           stageIdToNumRunningTask(stageId) += 1
         }
-        // This guards against the race condition in which the `SparkListenerTaskStart`
-        // event is posted before the `SparkListenerBlockManagerAdded` event, which is
-        // possible because these events are posted in different threads. (see SPARK-4951)
-        if (!allocationManager.executorIds.contains(executorId)) {
+        // This guards against the following race condition:
+        // 1. The `SparkListenerTaskStart` event is posted before the
+        // `SparkListenerExecutorAdded` event
+        // 2. The `SparkListenerExecutorRemoved` event is posted before the
+        // `SparkListenerTaskStart` event
+        // Above cases are possible because these events are posted in different threads.
+        // (see SPARK-4951 SPARK-26927)
+        if (!allocationManager.executorIds.contains(executorId) &&
+            client.getExecutorIds().contains(executorId)) {
           allocationManager.onExecutorAdded(executorId)
         }
 
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 5500329b13a7..12c8a9d6c9c4 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -421,6 +421,7 @@ class ExecutorAllocationManagerSuite
     // Remove when numExecutorsTarget is the same as the current number of executors
     assert(addExecutors(manager) === 1)
     assert(addExecutors(manager) === 2)
+    (1 to 8).foreach(execId => onExecutorAdded(manager, execId.toString))
     (1 to 8).map { i => createTaskInfo(i, i, s"$i") }.foreach {
       info => post(sc.listenerBus, SparkListenerTaskStart(0, 0, info)) }
     assert(executorIds(manager).size === 8)
@@ -834,7 +835,7 @@ class ExecutorAllocationManagerSuite
     assert(removeTimes(manager).size === 1)
   }
 
-  test("SPARK-4951: call onTaskStart before onBlockManagerAdded") {
+  test("SPARK-4951: call onTaskStart before onExecutorAdded") {
     sc = createSparkContext(2, 10, 2)
     val manager = sc.executorAllocationManager.get
     assert(executorIds(manager).isEmpty)
@@ -1162,6 +1163,29 @@ class ExecutorAllocationManagerSuite
     assert(numExecutorsTarget(manager) === 1)
   }
 
+  test("SPARK-26927 call onExecutorRemoved before onTaskStart") {
+    sc = createSparkContext(2, 5)
+    val manager = sc.executorAllocationManager.get
+    assert(executorIds(manager).isEmpty)
+    post(sc.listenerBus, SparkListenerExecutorAdded(
+      0L, "1", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
+    post(sc.listenerBus, SparkListenerExecutorAdded(
+      0L, "2", new ExecutorInfo("host2", 1, Map.empty, Map.empty)))
+    post(sc.listenerBus, SparkListenerExecutorAdded(
+      0L, "3", new ExecutorInfo("host3", 1, Map.empty, Map.empty)))
+    assert(executorIds(manager).size === 3)
+
+    post(sc.listenerBus, SparkListenerExecutorRemoved(0L, "3", "disconnected"))
+    assert(executorIds(manager).size === 2)
+    assert(executorIds(manager) === Set("1", "2"))
+
+    val taskInfo1 = createTaskInfo(0, 0, "3")
+    post(sc.listenerBus, SparkListenerTaskStart(0, 0, taskInfo1))
+    // Verify taskStart not adding already removed executors.
+    assert(executorIds(manager).size === 2)
+    assert(executorIds(manager) === Set("1", "2"))
+  }
+
   private def createSparkContext(
       minExecutors: Int = 1,
       maxExecutors: Int = 5,

From 78314af580b38f773a148c6f035d2ddd79896b4c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 12 Mar 2019 21:45:40 +0000
Subject: [PATCH 0660/1072] [SPARK-27123][SQL] Improve CollapseProject to
 handle projects cross limit/repartition/sample

## What changes were proposed in this pull request?

`CollapseProject` optimizer rule simplifies some plans by merging the adjacent projects and performing alias substitutions.
```scala
scala> sql("SELECT b c FROM (SELECT a b FROM t)").explain
== Physical Plan ==
*(1) Project [a#5 AS c#1]
+- Scan hive default.t [a#5], HiveTableRelation `default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [a#5]
```

We can do that more complex cases like the following. This PR aims to handle adjacent projects across limit/repartition/sample. Here, repartition means `Repartition`, not `RepartitionByExpression`.

**BEFORE**
```scala
scala> sql("SELECT b c FROM (SELECT /*+ REPARTITION(1) */ a b FROM t)").explain
== Physical Plan ==
*(2) Project [b#0 AS c#1]
+- Exchange RoundRobinPartitioning(1)
   +- *(1) Project [a#5 AS b#0]
      +- Scan hive default.t [a#5], HiveTableRelation `default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [a#5]
```

**AFTER**
```scala
scala> sql("SELECT b c FROM (SELECT /*+ REPARTITION(1) */ a b FROM t)").explain
== Physical Plan ==
Exchange RoundRobinPartitioning(1)
+- *(1) Project [a#11 AS c#7]
   +- Scan hive default.t [a#11], HiveTableRelation `default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [a#11]
```

## How was this patch tested?

Pass the Jenkins with the newly added and updated test cases.

Closes #24049 from dongjoon-hyun/SPARK-27123.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 26 ++++++++++++++
 .../optimizer/CollapseProjectSuite.scala      | 34 ++++++++++++++++++-
 .../optimizer/ColumnPruningSuite.scala        |  2 +-
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 97a53f2bf986..1b7ff02f9a99 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -699,6 +699,24 @@ object CollapseProject extends Rule[LogicalPlan] {
         agg.copy(aggregateExpressions = buildCleanedProjectList(
           p.projectList, agg.aggregateExpressions))
       }
+    case p1 @ Project(_, g @ GlobalLimit(_, l @ LocalLimit(_, p2: Project))) =>
+      if (haveCommonNonDeterministicOutput(p1.projectList, p2.projectList)) {
+        p1
+      } else {
+        val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList)
+        g.copy(child = l.copy(child = p2.copy(projectList = newProjectList)))
+      }
+    case p1 @ Project(_, l @ LocalLimit(_, p2: Project)) =>
+      if (haveCommonNonDeterministicOutput(p1.projectList, p2.projectList)) {
+        p1
+      } else {
+        val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList)
+        l.copy(child = p2.copy(projectList = newProjectList))
+      }
+    case Project(l1, r @ Repartition(_, _, p @ Project(l2, _))) if isRenaming(l1, l2) =>
+      r.copy(child = p.copy(projectList = buildCleanedProjectList(l1, p.projectList)))
+    case Project(l1, s @ Sample(_, _, _, _, p2 @ Project(l2, _))) if isRenaming(l1, l2) =>
+      s.copy(child = p2.copy(projectList = buildCleanedProjectList(l1, p2.projectList)))
   }
 
   private def collectAliases(projectList: Seq[NamedExpression]): AttributeMap[Alias] = {
@@ -739,6 +757,14 @@ object CollapseProject extends Rule[LogicalPlan] {
       CleanupAliases.trimNonTopLevelAliases(p).asInstanceOf[NamedExpression]
     }
   }
+
+  private def isRenaming(list1: Seq[NamedExpression], list2: Seq[NamedExpression]): Boolean = {
+    list1.length == list2.length && list1.zip(list2).forall {
+      case (e1, e2) if e1.semanticEquals(e2) => true
+      case (Alias(a: Attribute, _), b) if a.metadata == Metadata.empty && a.name == b.name => true
+      case _ => false
+    }
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
index e7a5bcee420f..42bcd13ee378 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Alias, Rand}
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.types.MetadataBuilder
 
@@ -138,4 +138,36 @@ class CollapseProjectSuite extends PlanTest {
     assert(projects.size === 1)
     assert(hasMetadata(optimized))
   }
+
+  test("collapse redundant alias through limit") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val query = relation.select('a as 'b).limit(1).select('b as 'c).analyze
+    val optimized = Optimize.execute(query)
+    val expected = relation.select('a as 'c).limit(1).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("collapse redundant alias through local limit") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val query = LocalLimit(1, relation.select('a as 'b)).select('b as 'c).analyze
+    val optimized = Optimize.execute(query)
+    val expected = LocalLimit(1, relation.select('a as 'c)).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("collapse redundant alias through repartition") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val query = relation.select('a as 'b).repartition(1).select('b as 'c).analyze
+    val optimized = Optimize.execute(query)
+    val expected = relation.select('a as 'c).repartition(1).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("collapse redundant alias through sample") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val query = Sample(0.0, 0.6, false, 11L, relation.select('a as 'b)).select('b as 'c).analyze
+    val optimized = Optimize.execute(query)
+    val expected = Sample(0.0, 0.6, false, 11L, relation.select('a as 'c)).analyze
+    comparePlans(optimized, expected)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 41bc4d8e93c3..b738f308d239 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -388,7 +388,7 @@ class ColumnPruningSuite extends PlanTest {
 
     val query2 = Sample(0.0, 0.6, false, 11L, x).select('a as 'aa)
     val optimized2 = Optimize.execute(query2.analyze)
-    val expected2 = Sample(0.0, 0.6, false, 11L, x.select('a)).select('a as 'aa)
+    val expected2 = Sample(0.0, 0.6, false, 11L, x.select('a as 'aa))
     comparePlans(optimized2, expected2.analyze)
   }
 

From b0c2b3bfd9b43ab97b37532abfef22e14642125c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 12 Mar 2019 15:39:16 -0700
Subject: [PATCH 0661/1072] [SPARK-27034][SQL] Nested schema pruning for ORC

## What changes were proposed in this pull request?

We only supported nested schema pruning for Parquet previously. This proposes to support nested schema pruning for ORC too.

Note: This only covers ORC v1. For ORC v2, the necessary change is at the schema pruning rule. We should deal with ORC v2 as a TODO item, in order to reduce review burden.

## How was this patch tested?

Added tests.

Closes #23943 from viirya/nested-schema-pruning-orc.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |   4 +-
 ...rcNestedSchemaPruningBenchmark-results.txt |  20 +-
 .../datasources/orc/OrcFileFormat.scala       |   9 +-
 .../execution/datasources/orc/OrcUtils.scala  |  35 +-
 .../parquet/ParquetSchemaPruning.scala        |   4 +-
 .../v2/orc/OrcPartitionReaderFactory.scala    |  23 +-
 .../datasources/SchemaPruningSuite.scala      | 422 ++++++++++++++++++
 .../orc/OrcSchemaPruningSuite.scala           |  34 ++
 .../parquet/ParquetSchemaPruningSuite.scala   | 389 +---------------
 9 files changed, 524 insertions(+), 416 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6f483a763808..193d311d445e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1539,8 +1539,8 @@ object SQLConf {
       .internal()
       .doc("Prune nested fields from a logical relation's output which are unnecessary in " +
         "satisfying a query. This optimization allows columnar file format readers to avoid " +
-        "reading unnecessary nested column data. Currently Parquet is the only data source that " +
-        "implements this optimization.")
+        "reading unnecessary nested column data. Currently Parquet and ORC v1 are the " +
+        "data sources that implement this optimization.")
       .booleanConf
       .createWithDefault(false)
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index f73825628ad7..fdd35cd823a7 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -6,35 +6,35 @@ Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    113            196          89          8.8         113.0       1.0X
-Nested column                                      1316           1639         240          0.8        1315.5       0.1X
+Top-level column                                    116            151          36          8.6         116.3       1.0X
+Nested column                                       544            604          31          1.8         544.5       0.2X
 
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    260            474         211          3.8         260.4       1.0X
-Nested column                                      2322           3312         701          0.4        2322.3       0.1X
+Top-level column                                    360            397          32          2.8         360.4       1.0X
+Nested column                                      3322           3503         166          0.3        3322.4       0.1X
 
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    275            318          55          3.6         274.8       1.0X
-Nested column                                      2482           3263         759          0.4        2482.2       0.1X
+Top-level column                                    292            334          32          3.4         291.8       1.0X
+Nested column                                      3306           3489         123          0.3        3305.7       0.1X
 
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    274            288          11          3.7         273.9       1.0X
-Nested column                                      2783           2905          86          0.4        2782.7       0.1X
+Top-level column                                    302            333          27          3.3         302.0       1.0X
+Nested column                                      2697           3347         390          0.4        2697.4       0.1X
 
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    382            419          23          2.6         382.4       1.0X
-Nested column                                      2974           3517         699          0.3        2974.1       0.1X
+Top-level column                                    316            440         146          3.2         315.8       1.0X
+Nested column                                      2728           2928         205          0.4        2727.9       0.1X
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 2a764957be11..01f8ce7911d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.SerializableConfiguration
@@ -164,6 +165,10 @@ class OrcFileFormat
     val enableVectorizedReader = supportBatch(sparkSession, resultSchema)
     val capacity = sqlConf.orcVectorizedReaderBatchSize
 
+    val resultSchemaString = OrcUtils.orcTypeDescriptionString(resultSchema)
+    OrcConf.MAPRED_INPUT_SCHEMA.setString(hadoopConf, resultSchemaString)
+    OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(hadoopConf, sqlConf.caseSensitiveAnalysis)
+
     val broadcastedConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
     val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
@@ -187,8 +192,6 @@ class OrcFileFormat
         assert(requestedColIds.length == requiredSchema.length,
           "[BUG] requested column IDs do not match required schema")
         val taskConf = new Configuration(conf)
-        taskConf.set(OrcConf.INCLUDE_COLUMNS.getAttribute,
-          requestedColIds.filter(_ != -1).sorted.mkString(","))
 
         val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty)
         val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
@@ -206,7 +209,7 @@ class OrcFileFormat
             Array.fill(requiredSchema.length)(-1) ++ Range(0, partitionSchema.length)
           batchReader.initialize(fileSplit, taskAttemptContext)
           batchReader.initBatch(
-            reader.getSchema,
+            TypeDescription.fromString(resultSchemaString),
             resultSchema.fields,
             requestedDataColIds,
             requestedPartitionColIds,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index 57d2c56e87b4..fb9f87ccdddd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -32,6 +32,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{SPARK_VERSION_METADATA_KEY, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.types._
 
 object OrcUtils extends Logging {
@@ -120,24 +121,27 @@ object OrcUtils extends Logging {
         })
       } else {
         if (isCaseSensitive) {
-          Some(requiredSchema.fieldNames.map { name =>
-            orcFieldNames.indexWhere(caseSensitiveResolution(_, name))
+          Some(requiredSchema.fieldNames.zipWithIndex.map { case (name, idx) =>
+            if (orcFieldNames.indexWhere(caseSensitiveResolution(_, name)) != -1) {
+              idx
+            } else {
+              -1
+            }
           })
         } else {
           // Do case-insensitive resolution only if in case-insensitive mode
-          val caseInsensitiveOrcFieldMap =
-            orcFieldNames.zipWithIndex.groupBy(_._1.toLowerCase(Locale.ROOT))
-          Some(requiredSchema.fieldNames.map { requiredFieldName =>
+          val caseInsensitiveOrcFieldMap = orcFieldNames.groupBy(_.toLowerCase(Locale.ROOT))
+          Some(requiredSchema.fieldNames.zipWithIndex.map { case (requiredFieldName, idx) =>
             caseInsensitiveOrcFieldMap
               .get(requiredFieldName.toLowerCase(Locale.ROOT))
               .map { matchedOrcFields =>
                 if (matchedOrcFields.size > 1) {
                   // Need to fail if there is ambiguity, i.e. more than one field is matched.
-                  val matchedOrcFieldsString = matchedOrcFields.map(_._1).mkString("[", ", ", "]")
+                  val matchedOrcFieldsString = matchedOrcFields.mkString("[", ", ", "]")
                   throw new RuntimeException(s"""Found duplicate field(s) "$requiredFieldName": """
                     + s"$matchedOrcFieldsString in case-insensitive mode")
                 } else {
-                  matchedOrcFields.head._2
+                  idx
                 }
               }.getOrElse(-1)
           })
@@ -152,4 +156,21 @@ object OrcUtils extends Logging {
   def addSparkVersionMetadata(writer: Writer): Unit = {
     writer.addUserMetadata(SPARK_VERSION_METADATA_KEY, UTF_8.encode(SPARK_VERSION_SHORT))
   }
+
+  /**
+   * Given a `StructType` object, this methods converts it to corresponding string representation
+   * in ORC.
+   */
+  def orcTypeDescriptionString(dt: DataType): String = dt match {
+    case s: StructType =>
+      val fieldTypes = s.fields.map { f =>
+        s"${quoteIdentifier(f.name)}:${orcTypeDescriptionString(f.dataType)}"
+      }
+      s"struct<${fieldTypes.mkString(",")}>"
+    case a: ArrayType =>
+      s"array<${orcTypeDescriptionString(a.elementType)}>"
+    case m: MapType =>
+      s"map<${orcTypeDescriptionString(m.keyType)},${orcTypeDescriptionString(m.valueType)}>"
+    case _ => dt.catalogString
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
index cc33db91e5f4..47551a5b9bd1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
 
@@ -81,7 +82,8 @@ object ParquetSchemaPruning extends Rule[LogicalPlan] {
    * Checks to see if the given relation is Parquet and can be pruned.
    */
   private def canPruneRelation(fsRelation: HadoopFsRelation) =
-    fsRelation.fileFormat.isInstanceOf[ParquetFileFormat]
+    fsRelation.fileFormat.isInstanceOf[ParquetFileFormat] ||
+      fsRelation.fileFormat.isInstanceOf[OrcFileFormat]
 
   /**
    * Normalizes the names of the attribute references in the given projects and filters to reflect
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index 4ae10a656e5e..1da9469909f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
 import org.apache.hadoop.mapreduce.lib.input.FileSplit
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
-import org.apache.orc.{OrcConf, OrcFile}
+import org.apache.orc.{OrcConf, OrcFile, TypeDescription}
 import org.apache.orc.mapred.OrcStruct
 import org.apache.orc.mapreduce.OrcInputFormat
 
@@ -67,6 +67,11 @@ case class OrcPartitionReaderFactory(
   override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
     val conf = broadcastedConf.value.value
 
+    val readDataSchema = getReadDataSchema(readSchema, partitionSchema, isCaseSensitive)
+    val readDataSchemaString = OrcUtils.orcTypeDescriptionString(readDataSchema)
+    OrcConf.MAPRED_INPUT_SCHEMA.setString(conf, readDataSchemaString)
+    OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(conf, isCaseSensitive)
+
     val filePath = new Path(new URI(file.filePath))
 
     val fs = filePath.getFileSystem(conf)
@@ -74,23 +79,21 @@ case class OrcPartitionReaderFactory(
     val reader = OrcFile.createReader(filePath, readerOptions)
 
     val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
-      isCaseSensitive, dataSchema, readSchema, reader, conf)
+      isCaseSensitive, dataSchema, readDataSchema, reader, conf)
 
     if (requestedColIdsOrEmptyFile.isEmpty) {
       new EmptyPartitionReader[InternalRow]
     } else {
       val requestedColIds = requestedColIdsOrEmptyFile.get
-      assert(requestedColIds.length == readSchema.length,
+      assert(requestedColIds.length == readDataSchema.length,
         "[BUG] requested column IDs do not match required schema")
+
       val taskConf = new Configuration(conf)
-      taskConf.set(OrcConf.INCLUDE_COLUMNS.getAttribute,
-        requestedColIds.filter(_ != -1).sorted.mkString(","))
 
       val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty)
       val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
       val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
 
-      val readDataSchema = getReadDataSchema(readSchema, partitionSchema, isCaseSensitive)
       val orcRecordReader = new OrcInputFormat[OrcStruct]
         .createRecordReader(fileSplit, taskAttemptContext)
       val deserializer = new OrcDeserializer(dataSchema, readDataSchema, requestedColIds)
@@ -110,6 +113,10 @@ case class OrcPartitionReaderFactory(
   override def buildColumnarReader(file: PartitionedFile): PartitionReader[ColumnarBatch] = {
     val conf = broadcastedConf.value.value
 
+    val readSchemaString = OrcUtils.orcTypeDescriptionString(readSchema)
+    OrcConf.MAPRED_INPUT_SCHEMA.setString(conf, readSchemaString)
+    OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(conf, isCaseSensitive)
+
     val filePath = new Path(new URI(file.filePath))
 
     val fs = filePath.getFileSystem(conf)
@@ -126,8 +133,6 @@ case class OrcPartitionReaderFactory(
       assert(requestedColIds.length == readSchema.length,
         "[BUG] requested column IDs do not match required schema")
       val taskConf = new Configuration(conf)
-      taskConf.set(OrcConf.INCLUDE_COLUMNS.getAttribute,
-        requestedColIds.filter(_ != -1).sorted.mkString(","))
 
       val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty)
       val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
@@ -142,7 +147,7 @@ case class OrcPartitionReaderFactory(
       }
 
       batchReader.initBatch(
-        reader.getSchema,
+        TypeDescription.fromString(readSchemaString),
         readSchema.fields,
         requestedColIds,
         requestedPartitionColIds,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
new file mode 100644
index 000000000000..d328ef466064
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.File
+
+import org.scalactic.Equality
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.SchemaPruningTest
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.StructType
+
+abstract class SchemaPruningSuite
+  extends QueryTest
+  with FileBasedDataSourceTest
+  with SchemaPruningTest
+  with SharedSQLContext {
+  case class FullName(first: String, middle: String, last: String)
+  case class Company(name: String, address: String)
+  case class Employer(id: Int, company: Company)
+  case class Contact(
+    id: Int,
+    name: FullName,
+    address: String,
+    pets: Int,
+    friends: Array[FullName] = Array.empty,
+    relatives: Map[String, FullName] = Map.empty,
+    employer: Employer = null)
+
+  val janeDoe = FullName("Jane", "X.", "Doe")
+  val johnDoe = FullName("John", "Y.", "Doe")
+  val susanSmith = FullName("Susan", "Z.", "Smith")
+
+  val employer = Employer(0, Company("abc", "123 Business Street"))
+  val employerWithNullCompany = Employer(1, null)
+
+  val contacts =
+    Contact(0, janeDoe, "123 Main Street", 1, friends = Array(susanSmith),
+      relatives = Map("brother" -> johnDoe), employer = employer) ::
+    Contact(1, johnDoe, "321 Wall Street", 3, relatives = Map("sister" -> janeDoe),
+      employer = employerWithNullCompany) :: Nil
+
+  case class Name(first: String, last: String)
+  case class BriefContact(id: Int, name: Name, address: String)
+
+  private val briefContacts =
+    BriefContact(2, Name("Janet", "Jones"), "567 Maple Drive") ::
+    BriefContact(3, Name("Jim", "Jones"), "6242 Ash Street") :: Nil
+
+  case class ContactWithDataPartitionColumn(
+    id: Int,
+    name: FullName,
+    address: String,
+    pets: Int,
+    friends: Array[FullName] = Array(),
+    relatives: Map[String, FullName] = Map(),
+    employer: Employer = null,
+    p: Int)
+
+  case class BriefContactWithDataPartitionColumn(id: Int, name: Name, address: String, p: Int)
+
+  val contactsWithDataPartitionColumn =
+    contacts.map { case Contact(id, name, address, pets, friends, relatives, employer) =>
+      ContactWithDataPartitionColumn(id, name, address, pets, friends, relatives, employer, 1) }
+  val briefContactsWithDataPartitionColumn =
+    briefContacts.map { case BriefContact(id, name, address) =>
+      BriefContactWithDataPartitionColumn(id, name, address, 2) }
+
+  testSchemaPruning("select a single complex field") {
+    val query = sql("select name.middle from contacts")
+    checkScan(query, "struct<name:struct<middle:string>>")
+    checkAnswer(query.orderBy("id"), Row("X.") :: Row("Y.") :: Row(null) :: Row(null) :: Nil)
+  }
+
+  testSchemaPruning("select a single complex field and its parent struct") {
+    val query = sql("select name.middle, name from contacts")
+    checkScan(query, "struct<name:struct<first:string,middle:string,last:string>>")
+    checkAnswer(query.orderBy("id"),
+      Row("X.", Row("Jane", "X.", "Doe")) ::
+      Row("Y.", Row("John", "Y.", "Doe")) ::
+      Row(null, Row("Janet", null, "Jones")) ::
+      Row(null, Row("Jim", null, "Jones")) ::
+      Nil)
+  }
+
+  testSchemaPruning("select a single complex field array and its parent struct array") {
+    val query = sql("select friends.middle, friends from contacts where p=1")
+    checkScan(query,
+      "struct<friends:array<struct<first:string,middle:string,last:string>>>")
+    checkAnswer(query.orderBy("id"),
+      Row(Array("Z."), Array(Row("Susan", "Z.", "Smith"))) ::
+      Row(Array.empty[String], Array.empty[Row]) ::
+      Nil)
+  }
+
+  testSchemaPruning("select a single complex field from a map entry and its parent map entry") {
+    val query =
+      sql("select relatives[\"brother\"].middle, relatives[\"brother\"] from contacts where p=1")
+    checkScan(query,
+      "struct<relatives:map<string,struct<first:string,middle:string,last:string>>>")
+    checkAnswer(query.orderBy("id"),
+      Row("Y.", Row("John", "Y.", "Doe")) ::
+      Row(null, null) ::
+      Nil)
+  }
+
+  testSchemaPruning("select a single complex field and the partition column") {
+    val query = sql("select name.middle, p from contacts")
+    checkScan(query, "struct<name:struct<middle:string>>")
+    checkAnswer(query.orderBy("id"),
+      Row("X.", 1) :: Row("Y.", 1) :: Row(null, 2) :: Row(null, 2) :: Nil)
+  }
+
+  ignore("partial schema intersection - select missing subfield") {
+    val query = sql("select name.middle, address from contacts where p=2")
+    checkScan(query, "struct<name:struct<middle:string>,address:string>")
+    checkAnswer(query.orderBy("id"),
+      Row(null, "567 Maple Drive") ::
+      Row(null, "6242 Ash Street") :: Nil)
+  }
+
+  testSchemaPruning("no unnecessary schema pruning") {
+    val query =
+      sql("select id, name.last, name.middle, name.first, relatives[''].last, " +
+        "relatives[''].middle, relatives[''].first, friends[0].last, friends[0].middle, " +
+        "friends[0].first, pets, address from contacts where p=2")
+    // We've selected every field in the schema. Therefore, no schema pruning should be performed.
+    // We check this by asserting that the scanned schema of the query is identical to the schema
+    // of the contacts relation, even though the fields are selected in different orders.
+    checkScan(query,
+      "struct<id:int,name:struct<first:string,middle:string,last:string>,address:string,pets:int," +
+      "friends:array<struct<first:string,middle:string,last:string>>," +
+      "relatives:map<string,struct<first:string,middle:string,last:string>>>")
+    checkAnswer(query.orderBy("id"),
+      Row(2, "Jones", null, "Janet", null, null, null, null, null, null, null, "567 Maple Drive") ::
+      Row(3, "Jones", null, "Jim", null, null, null, null, null, null, null, "6242 Ash Street") ::
+      Nil)
+  }
+
+  testSchemaPruning("empty schema intersection") {
+    val query = sql("select name.middle from contacts where p=2")
+    checkScan(query, "struct<name:struct<middle:string>>")
+    checkAnswer(query.orderBy("id"),
+      Row(null) :: Row(null) :: Nil)
+  }
+
+  testSchemaPruning("select a single complex field and in where clause") {
+    val query1 = sql("select name.first from contacts where name.first = 'Jane'")
+    checkScan(query1, "struct<name:struct<first:string>>")
+    checkAnswer(query1, Row("Jane") :: Nil)
+
+    val query2 = sql("select name.first, name.last from contacts where name.first = 'Jane'")
+    checkScan(query2, "struct<name:struct<first:string,last:string>>")
+    checkAnswer(query2, Row("Jane", "Doe") :: Nil)
+
+    val query3 = sql("select name.first from contacts " +
+      "where employer.company.name = 'abc' and p = 1")
+    checkScan(query3, "struct<name:struct<first:string>," +
+      "employer:struct<company:struct<name:string>>>")
+    checkAnswer(query3, Row("Jane") :: Nil)
+
+    val query4 = sql("select name.first, employer.company.name from contacts " +
+      "where employer.company is not null and p = 1")
+    checkScan(query4, "struct<name:struct<first:string>," +
+      "employer:struct<company:struct<name:string>>>")
+    checkAnswer(query4, Row("Jane", "abc") :: Nil)
+  }
+
+  testSchemaPruning("select nullable complex field and having is not null predicate") {
+    val query = sql("select employer.company from contacts " +
+      "where employer is not null and p = 1")
+    checkScan(query, "struct<employer:struct<company:struct<name:string,address:string>>>")
+    checkAnswer(query, Row(Row("abc", "123 Business Street")) :: Row(null) :: Nil)
+  }
+
+  testSchemaPruning("select a single complex field and is null expression in project") {
+    val query = sql("select name.first, address is not null from contacts")
+    checkScan(query, "struct<name:struct<first:string>,address:string>")
+    checkAnswer(query.orderBy("id"),
+      Row("Jane", true) :: Row("John", true) :: Row("Janet", true) :: Row("Jim", true) :: Nil)
+  }
+
+  testSchemaPruning("select a single complex field array and in clause") {
+    val query = sql("select friends.middle from contacts where friends.first[0] = 'Susan'")
+    checkScan(query,
+      "struct<friends:array<struct<first:string,middle:string>>>")
+    checkAnswer(query.orderBy("id"),
+      Row(Array("Z.")) :: Nil)
+  }
+
+  testSchemaPruning("select a single complex field from a map entry and in clause") {
+    val query =
+      sql("select relatives[\"brother\"].middle from contacts " +
+        "where relatives[\"brother\"].first = 'John'")
+    checkScan(query,
+      "struct<relatives:map<string,struct<first:string,middle:string>>>")
+    checkAnswer(query.orderBy("id"),
+      Row("Y.") :: Nil)
+  }
+
+  testSchemaPruning("select one complex field and having is null predicate on another " +
+      "complex field") {
+    val query = sql("select * from contacts")
+      .where("name.middle is not null")
+      .select(
+        "id",
+        "name.first",
+        "name.middle",
+        "name.last"
+      )
+      .where("last = 'Jones'")
+      .select(count("id")).toDF()
+    checkScan(query,
+      "struct<id:int,name:struct<middle:string,last:string>>")
+    checkAnswer(query, Row(0) :: Nil)
+  }
+
+  testSchemaPruning("select one deep nested complex field and having is null predicate on " +
+      "another deep nested complex field") {
+    val query = sql("select * from contacts")
+      .where("employer.company.address is not null")
+      .selectExpr(
+        "id",
+        "name.first",
+        "name.middle",
+        "name.last",
+        "employer.id as employer_id"
+      )
+      .where("employer_id = 0")
+      .select(count("id")).toDF()
+    checkScan(query,
+      "struct<id:int,employer:struct<id:int,company:struct<address:string>>>")
+    checkAnswer(query, Row(1) :: Nil)
+  }
+
+  protected def testSchemaPruning(testName: String)(testThunk: => Unit) {
+    test(s"Spark vectorized reader - without partition data column - $testName") {
+      withSQLConf(vectorizedReaderEnabledKey -> "true") {
+        withContacts(testThunk)
+      }
+    }
+    test(s"Spark vectorized reader - with partition data column - $testName") {
+      withSQLConf(vectorizedReaderEnabledKey -> "true") {
+        withContactsWithDataPartitionColumn(testThunk)
+      }
+    }
+
+    test(s"Non-vectorized reader - without partition data column - $testName") {
+      withSQLConf(vectorizedReaderEnabledKey -> "false") {
+        withContacts(testThunk)
+      }
+    }
+    test(s"Non-vectorized reader - with partition data column - $testName") {
+      withSQLConf(vectorizedReaderEnabledKey-> "false") {
+        withContactsWithDataPartitionColumn(testThunk)
+      }
+    }
+  }
+
+  private def withContacts(testThunk: => Unit) {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      makeDataSourceFile(contacts, new File(path + "/contacts/p=1"))
+      makeDataSourceFile(briefContacts, new File(path + "/contacts/p=2"))
+
+      // Providing user specified schema. Inferred schema from different data sources might
+      // be different.
+      val schema = "`id` INT,`name` STRUCT<`first`: STRING, `middle`: STRING, `last`: STRING>, " +
+        "`address` STRING,`pets` INT,`friends` ARRAY<STRUCT<`first`: STRING, `middle`: STRING, " +
+        "`last`: STRING>>,`relatives` MAP<STRING, STRUCT<`first`: STRING, `middle`: STRING, " +
+        "`last`: STRING>>,`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, " +
+        "`address`: STRING>>,`p` INT"
+      spark.read.format(dataSourceName).schema(schema).load(path + "/contacts")
+        .createOrReplaceTempView("contacts")
+
+      testThunk
+    }
+  }
+
+  private def withContactsWithDataPartitionColumn(testThunk: => Unit) {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      makeDataSourceFile(contactsWithDataPartitionColumn, new File(path + "/contacts/p=1"))
+      makeDataSourceFile(briefContactsWithDataPartitionColumn, new File(path + "/contacts/p=2"))
+
+      // Providing user specified schema. Inferred schema from different data sources might
+      // be different.
+      val schema = "`id` INT,`name` STRUCT<`first`: STRING, `middle`: STRING, `last`: STRING>, " +
+        "`address` STRING,`pets` INT,`friends` ARRAY<STRUCT<`first`: STRING, `middle`: STRING, " +
+        "`last`: STRING>>,`relatives` MAP<STRING, STRUCT<`first`: STRING, `middle`: STRING, " +
+        "`last`: STRING>>,`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, " +
+        "`address`: STRING>>,`p` INT"
+      spark.read.format(dataSourceName).schema(schema).load(path + "/contacts")
+        .createOrReplaceTempView("contacts")
+
+      testThunk
+    }
+  }
+
+  case class MixedCaseColumn(a: String, B: Int)
+  case class MixedCase(id: Int, CoL1: String, coL2: MixedCaseColumn)
+
+  private val mixedCaseData =
+    MixedCase(0, "r0c1", MixedCaseColumn("abc", 1)) ::
+    MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) ::
+    Nil
+
+  testExactCaseQueryPruning("select with exact column names") {
+    val query = sql("select CoL1, coL2.B from mixedcase")
+    checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
+    checkAnswer(query.orderBy("id"),
+      Row("r0c1", 1) ::
+      Row("r1c1", 2) ::
+      Nil)
+  }
+
+  testMixedCaseQueryPruning("select with lowercase column names") {
+    val query = sql("select col1, col2.b from mixedcase")
+    checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
+    checkAnswer(query.orderBy("id"),
+      Row("r0c1", 1) ::
+      Row("r1c1", 2) ::
+      Nil)
+  }
+
+  testMixedCaseQueryPruning("select with different-case column names") {
+    val query = sql("select cOL1, cOl2.b from mixedcase")
+    checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
+    checkAnswer(query.orderBy("id"),
+      Row("r0c1", 1) ::
+      Row("r1c1", 2) ::
+      Nil)
+  }
+
+  testMixedCaseQueryPruning("filter with different-case column names") {
+    val query = sql("select id from mixedcase where Col2.b = 2")
+    checkScan(query, "struct<id:int,coL2:struct<B:int>>")
+    checkAnswer(query.orderBy("id"), Row(1) :: Nil)
+  }
+
+  // Tests schema pruning for a query whose column and field names are exactly the same as the table
+  // schema's column and field names. N.B. this implies that `testThunk` should pass using either a
+  // case-sensitive or case-insensitive query parser
+  private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
+    test(s"Case-sensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        withMixedCaseData(testThunk)
+      }
+    }
+    testMixedCaseQueryPruning(testName)(testThunk)
+  }
+
+  // Tests schema pruning for a query whose column and field names may differ in case from the table
+  // schema's column and field names
+  private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
+    test(s"Case-insensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        withMixedCaseData(testThunk)
+      }
+    }
+  }
+
+  // Tests given test function with Spark vectorized reader and non-vectorized reader.
+  private def withMixedCaseData(testThunk: => Unit) {
+    withDataSourceTable(mixedCaseData, "mixedcase") {
+      testThunk
+    }
+  }
+
+  private val schemaEquality = new Equality[StructType] {
+    override def areEqual(a: StructType, b: Any): Boolean =
+      b match {
+        case otherType: StructType => a.sameType(otherType)
+        case _ => false
+      }
+  }
+
+  protected def checkScan(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
+    checkScanSchemata(df, expectedSchemaCatalogStrings: _*)
+    // We check here that we can execute the query without throwing an exception. The results
+    // themselves are irrelevant, and should be checked elsewhere as needed
+    df.collect()
+  }
+
+  private def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
+    val fileSourceScanSchemata =
+      df.queryExecution.executedPlan.collect {
+        case scan: FileSourceScanExec => scan.requiredSchema
+      }
+    assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
+      s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
+        s"but expected $expectedSchemaCatalogStrings")
+    fileSourceScanSchemata.zip(expectedSchemaCatalogStrings).foreach {
+      case (scanSchema, expectedScanSchemaCatalogString) =>
+        val expectedScanSchema = CatalystSqlParser.parseDataType(expectedScanSchemaCatalogString)
+        implicit val equality = schemaEquality
+        assert(scanSchema === expectedScanSchema)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala
new file mode 100644
index 000000000000..5dade6f8ca14
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
+import org.apache.spark.sql.internal.SQLConf
+
+class OrcSchemaPruningSuite extends SchemaPruningSuite {
+  override protected val dataSourceName: String = "orc"
+  override protected val vectorizedReaderEnabledKey: String =
+    SQLConf.ORC_VECTORIZED_READER_ENABLED.key
+
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+      .set(SQLConf.USE_V1_SOURCE_WRITER_LIST, "orc")
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index 4d15f38321a2..3d97d64ba639 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -17,390 +17,11 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.io.File
-
-import org.scalactic.Equality
-
-import org.apache.spark.sql.{DataFrame, QueryTest, Row}
-import org.apache.spark.sql.catalyst.SchemaPruningTest
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.execution.FileSourceScanExec
-import org.apache.spark.sql.functions._
+import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.StructType
-
-class ParquetSchemaPruningSuite
-    extends QueryTest
-    with ParquetTest
-    with SchemaPruningTest
-    with SharedSQLContext {
-  case class FullName(first: String, middle: String, last: String)
-  case class Company(name: String, address: String)
-  case class Employer(id: Int, company: Company)
-  case class Contact(
-    id: Int,
-    name: FullName,
-    address: String,
-    pets: Int,
-    friends: Array[FullName] = Array.empty,
-    relatives: Map[String, FullName] = Map.empty,
-    employer: Employer = null)
-
-  val janeDoe = FullName("Jane", "X.", "Doe")
-  val johnDoe = FullName("John", "Y.", "Doe")
-  val susanSmith = FullName("Susan", "Z.", "Smith")
-
-  val employer = Employer(0, Company("abc", "123 Business Street"))
-  val employerWithNullCompany = Employer(1, null)
-
-  private val contacts =
-    Contact(0, janeDoe, "123 Main Street", 1, friends = Array(susanSmith),
-      relatives = Map("brother" -> johnDoe), employer = employer) ::
-    Contact(1, johnDoe, "321 Wall Street", 3, relatives = Map("sister" -> janeDoe),
-      employer = employerWithNullCompany) :: Nil
-
-  case class Name(first: String, last: String)
-  case class BriefContact(id: Int, name: Name, address: String)
-
-  private val briefContacts =
-    BriefContact(2, Name("Janet", "Jones"), "567 Maple Drive") ::
-    BriefContact(3, Name("Jim", "Jones"), "6242 Ash Street") :: Nil
-
-  case class ContactWithDataPartitionColumn(
-    id: Int,
-    name: FullName,
-    address: String,
-    pets: Int,
-    friends: Array[FullName] = Array(),
-    relatives: Map[String, FullName] = Map(),
-    employer: Employer = null,
-    p: Int)
-
-  case class BriefContactWithDataPartitionColumn(id: Int, name: Name, address: String, p: Int)
-
-  private val contactsWithDataPartitionColumn =
-    contacts.map { case Contact(id, name, address, pets, friends, relatives, employer) =>
-      ContactWithDataPartitionColumn(id, name, address, pets, friends, relatives, employer, 1) }
-  private val briefContactsWithDataPartitionColumn =
-    briefContacts.map { case BriefContact(id, name, address) =>
-      BriefContactWithDataPartitionColumn(id, name, address, 2) }
-
-  testSchemaPruning("select a single complex field") {
-    val query = sql("select name.middle from contacts")
-    checkScan(query, "struct<name:struct<middle:string>>")
-    checkAnswer(query.orderBy("id"), Row("X.") :: Row("Y.") :: Row(null) :: Row(null) :: Nil)
-  }
-
-  testSchemaPruning("select a single complex field and its parent struct") {
-    val query = sql("select name.middle, name from contacts")
-    checkScan(query, "struct<name:struct<first:string,middle:string,last:string>>")
-    checkAnswer(query.orderBy("id"),
-      Row("X.", Row("Jane", "X.", "Doe")) ::
-      Row("Y.", Row("John", "Y.", "Doe")) ::
-      Row(null, Row("Janet", null, "Jones")) ::
-      Row(null, Row("Jim", null, "Jones")) ::
-      Nil)
-  }
-
-  testSchemaPruning("select a single complex field array and its parent struct array") {
-    val query = sql("select friends.middle, friends from contacts where p=1")
-    checkScan(query,
-      "struct<friends:array<struct<first:string,middle:string,last:string>>>")
-    checkAnswer(query.orderBy("id"),
-      Row(Array("Z."), Array(Row("Susan", "Z.", "Smith"))) ::
-      Row(Array.empty[String], Array.empty[Row]) ::
-      Nil)
-  }
-
-  testSchemaPruning("select a single complex field from a map entry and its parent map entry") {
-    val query =
-      sql("select relatives[\"brother\"].middle, relatives[\"brother\"] from contacts where p=1")
-    checkScan(query,
-      "struct<relatives:map<string,struct<first:string,middle:string,last:string>>>")
-    checkAnswer(query.orderBy("id"),
-      Row("Y.", Row("John", "Y.", "Doe")) ::
-      Row(null, null) ::
-      Nil)
-  }
-
-  testSchemaPruning("select a single complex field and the partition column") {
-    val query = sql("select name.middle, p from contacts")
-    checkScan(query, "struct<name:struct<middle:string>>")
-    checkAnswer(query.orderBy("id"),
-      Row("X.", 1) :: Row("Y.", 1) :: Row(null, 2) :: Row(null, 2) :: Nil)
-  }
-
-  ignore("partial schema intersection - select missing subfield") {
-    val query = sql("select name.middle, address from contacts where p=2")
-    checkScan(query, "struct<name:struct<middle:string>,address:string>")
-    checkAnswer(query.orderBy("id"),
-      Row(null, "567 Maple Drive") ::
-      Row(null, "6242 Ash Street") :: Nil)
-  }
-
-  testSchemaPruning("no unnecessary schema pruning") {
-    val query =
-      sql("select id, name.last, name.middle, name.first, relatives[''].last, " +
-        "relatives[''].middle, relatives[''].first, friends[0].last, friends[0].middle, " +
-        "friends[0].first, pets, address from contacts where p=2")
-    // We've selected every field in the schema. Therefore, no schema pruning should be performed.
-    // We check this by asserting that the scanned schema of the query is identical to the schema
-    // of the contacts relation, even though the fields are selected in different orders.
-    checkScan(query,
-      "struct<id:int,name:struct<first:string,middle:string,last:string>,address:string,pets:int," +
-      "friends:array<struct<first:string,middle:string,last:string>>," +
-      "relatives:map<string,struct<first:string,middle:string,last:string>>>")
-    checkAnswer(query.orderBy("id"),
-      Row(2, "Jones", null, "Janet", null, null, null, null, null, null, null, "567 Maple Drive") ::
-      Row(3, "Jones", null, "Jim", null, null, null, null, null, null, null, "6242 Ash Street") ::
-      Nil)
-  }
-
-  testSchemaPruning("empty schema intersection") {
-    val query = sql("select name.middle from contacts where p=2")
-    checkScan(query, "struct<name:struct<middle:string>>")
-    checkAnswer(query.orderBy("id"),
-      Row(null) :: Row(null) :: Nil)
-  }
-
-  testSchemaPruning("select a single complex field and in where clause") {
-    val query1 = sql("select name.first from contacts where name.first = 'Jane'")
-    checkScan(query1, "struct<name:struct<first:string>>")
-    checkAnswer(query1, Row("Jane") :: Nil)
-
-    val query2 = sql("select name.first, name.last from contacts where name.first = 'Jane'")
-    checkScan(query2, "struct<name:struct<first:string,last:string>>")
-    checkAnswer(query2, Row("Jane", "Doe") :: Nil)
-
-    val query3 = sql("select name.first from contacts " +
-      "where employer.company.name = 'abc' and p = 1")
-    checkScan(query3, "struct<name:struct<first:string>," +
-      "employer:struct<company:struct<name:string>>>")
-    checkAnswer(query3, Row("Jane") :: Nil)
-
-    val query4 = sql("select name.first, employer.company.name from contacts " +
-      "where employer.company is not null and p = 1")
-    checkScan(query4, "struct<name:struct<first:string>," +
-      "employer:struct<company:struct<name:string>>>")
-    checkAnswer(query4, Row("Jane", "abc") :: Nil)
-  }
-
-  testSchemaPruning("select nullable complex field and having is not null predicate") {
-    val query = sql("select employer.company from contacts " +
-      "where employer is not null and p = 1")
-    checkScan(query, "struct<employer:struct<company:struct<name:string,address:string>>>")
-    checkAnswer(query, Row(Row("abc", "123 Business Street")) :: Row(null) :: Nil)
-  }
-
-  testSchemaPruning("select a single complex field and is null expression in project") {
-    val query = sql("select name.first, address is not null from contacts")
-    checkScan(query, "struct<name:struct<first:string>,address:string>")
-    checkAnswer(query.orderBy("id"),
-      Row("Jane", true) :: Row("John", true) :: Row("Janet", true) :: Row("Jim", true) :: Nil)
-  }
-
-  testSchemaPruning("select a single complex field array and in clause") {
-    val query = sql("select friends.middle from contacts where friends.first[0] = 'Susan'")
-    checkScan(query,
-      "struct<friends:array<struct<first:string,middle:string>>>")
-    checkAnswer(query.orderBy("id"),
-      Row(Array("Z.")) :: Nil)
-  }
-
-  testSchemaPruning("select a single complex field from a map entry and in clause") {
-    val query =
-      sql("select relatives[\"brother\"].middle from contacts " +
-        "where relatives[\"brother\"].first = 'John'")
-    checkScan(query,
-      "struct<relatives:map<string,struct<first:string,middle:string>>>")
-    checkAnswer(query.orderBy("id"),
-      Row("Y.") :: Nil)
-  }
-
-  testSchemaPruning("select one complex field and having is null predicate on another " +
-      "complex field") {
-    val query = sql("select * from contacts")
-      .where("name.middle is not null")
-      .select(
-        "id",
-        "name.first",
-        "name.middle",
-        "name.last"
-      )
-      .where("last = 'Jones'")
-      .select(count("id")).toDF()
-    checkScan(query,
-      "struct<id:int,name:struct<middle:string,last:string>>")
-    checkAnswer(query, Row(0) :: Nil)
-  }
-
-  testSchemaPruning("select one deep nested complex field and having is null predicate on " +
-      "another deep nested complex field") {
-    val query = sql("select * from contacts")
-      .where("employer.company.address is not null")
-      .selectExpr(
-        "id",
-        "name.first",
-        "name.middle",
-        "name.last",
-        "employer.id as employer_id"
-      )
-      .where("employer_id = 0")
-      .select(count("id")).toDF()
-    checkScan(query,
-      "struct<id:int,employer:struct<id:int,company:struct<address:string>>>")
-    checkAnswer(query, Row(1) :: Nil)
-  }
-
-  private def testSchemaPruning(testName: String)(testThunk: => Unit) {
-    test(s"Spark vectorized reader - without partition data column - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
-        withContacts(testThunk)
-      }
-    }
-    test(s"Spark vectorized reader - with partition data column - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
-        withContactsWithDataPartitionColumn(testThunk)
-      }
-    }
-
-    test(s"Parquet-mr reader - without partition data column - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-        withContacts(testThunk)
-      }
-    }
-    test(s"Parquet-mr reader - with partition data column - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-        withContactsWithDataPartitionColumn(testThunk)
-      }
-    }
-  }
-
-  private def withContacts(testThunk: => Unit) {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
-
-      makeParquetFile(contacts, new File(path + "/contacts/p=1"))
-      makeParquetFile(briefContacts, new File(path + "/contacts/p=2"))
-
-      spark.read.parquet(path + "/contacts").createOrReplaceTempView("contacts")
-
-      testThunk
-    }
-  }
-
-  private def withContactsWithDataPartitionColumn(testThunk: => Unit) {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
-
-      makeParquetFile(contactsWithDataPartitionColumn, new File(path + "/contacts/p=1"))
-      makeParquetFile(briefContactsWithDataPartitionColumn, new File(path + "/contacts/p=2"))
-
-      spark.read.parquet(path + "/contacts").createOrReplaceTempView("contacts")
-
-      testThunk
-    }
-  }
-
-  case class MixedCaseColumn(a: String, B: Int)
-  case class MixedCase(id: Int, CoL1: String, coL2: MixedCaseColumn)
-
-  private val mixedCaseData =
-    MixedCase(0, "r0c1", MixedCaseColumn("abc", 1)) ::
-    MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) ::
-    Nil
-
-  testExactCaseQueryPruning("select with exact column names") {
-    val query = sql("select CoL1, coL2.B from mixedcase")
-    checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
-    checkAnswer(query.orderBy("id"),
-      Row("r0c1", 1) ::
-      Row("r1c1", 2) ::
-      Nil)
-  }
-
-  testMixedCaseQueryPruning("select with lowercase column names") {
-    val query = sql("select col1, col2.b from mixedcase")
-    checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
-    checkAnswer(query.orderBy("id"),
-      Row("r0c1", 1) ::
-      Row("r1c1", 2) ::
-      Nil)
-  }
-
-  testMixedCaseQueryPruning("select with different-case column names") {
-    val query = sql("select cOL1, cOl2.b from mixedcase")
-    checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
-    checkAnswer(query.orderBy("id"),
-      Row("r0c1", 1) ::
-      Row("r1c1", 2) ::
-      Nil)
-  }
-
-  testMixedCaseQueryPruning("filter with different-case column names") {
-    val query = sql("select id from mixedcase where Col2.b = 2")
-    checkScan(query, "struct<id:int,coL2:struct<B:int>>")
-    checkAnswer(query.orderBy("id"), Row(1) :: Nil)
-  }
-
-  // Tests schema pruning for a query whose column and field names are exactly the same as the table
-  // schema's column and field names. N.B. this implies that `testThunk` should pass using either a
-  // case-sensitive or case-insensitive query parser
-  private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
-    test(s"Case-sensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        withMixedCaseData(testThunk)
-      }
-    }
-    testMixedCaseQueryPruning(testName)(testThunk)
-  }
-
-  // Tests schema pruning for a query whose column and field names may differ in case from the table
-  // schema's column and field names
-  private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
-    test(s"Case-insensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-        withMixedCaseData(testThunk)
-      }
-    }
-  }
-
-  // Tests given test function with Spark vectorized reader and Parquet-mr reader.
-  private def withMixedCaseData(testThunk: => Unit) {
-    withParquetTable(mixedCaseData, "mixedcase") {
-      testThunk
-    }
-  }
-
-  private val schemaEquality = new Equality[StructType] {
-    override def areEqual(a: StructType, b: Any): Boolean =
-      b match {
-        case otherType: StructType => a.sameType(otherType)
-        case _ => false
-      }
-  }
-
-  protected def checkScan(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
-    checkScanSchemata(df, expectedSchemaCatalogStrings: _*)
-    // We check here that we can execute the query without throwing an exception. The results
-    // themselves are irrelevant, and should be checked elsewhere as needed
-    df.collect()
-  }
 
-  private def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
-    val fileSourceScanSchemata =
-      df.queryExecution.executedPlan.collect {
-        case scan: FileSourceScanExec => scan.requiredSchema
-      }
-    assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
-      s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
-        s"but expected $expectedSchemaCatalogStrings")
-    fileSourceScanSchemata.zip(expectedSchemaCatalogStrings).foreach {
-      case (scanSchema, expectedScanSchemaCatalogString) =>
-        val expectedScanSchema = CatalystSqlParser.parseDataType(expectedScanSchemaCatalogString)
-        implicit val equality = schemaEquality
-        assert(scanSchema === expectedScanSchema)
-    }
-  }
+class ParquetSchemaPruningSuite extends SchemaPruningSuite {
+  override protected val dataSourceName: String = "parquet"
+  override protected val vectorizedReaderEnabledKey: String =
+    SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key
 }

From f57af2286f85bf67706e14fecfbfd9ef034c2927 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Tue, 12 Mar 2019 18:01:16 -0500
Subject: [PATCH 0662/1072] [MINOR][CORE] Use https for bintray spark-packages
 repository

## What changes were proposed in this pull request?

This patch changes the schema of url from http to https for bintray spark-packages repository. Looks like we already changed the schema of repository url for pom.xml but missed inside the code.

## How was this patch tested?

Manually ran the `--package` via `./bin/spark-shell --verbose  --packages "RedisLabs:spark-redis:0.3.2"`

```
...
Ivy Default Cache set to: /Users/jlim/.ivy2/cache
The jars for the packages stored in: /Users/jlim/.ivy2/jars
:: loading settings :: url = jar:file:/Users/jlim/WorkArea/ScalaProjects/spark/dist/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
RedisLabs#spark-redis added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-2fee2e18-7832-4a4d-9e97-7b3d0fef766d;1.0
	confs: [default]
	found RedisLabs#spark-redis;0.3.2 in spark-packages
	found redis.clients#jedis;2.7.2 in central
	found org.apache.commons#commons-pool2;2.3 in central
downloading https://dl.bintray.com/spark-packages/maven/RedisLabs/spark-redis/0.3.2/spark-redis-0.3.2.jar ...
	[SUCCESSFUL ] RedisLabs#spark-redis;0.3.2!spark-redis.jar (824ms)
downloading https://repo1.maven.org/maven2/redis/clients/jedis/2.7.2/jedis-2.7.2.jar ...
	[SUCCESSFUL ] redis.clients#jedis;2.7.2!jedis.jar (576ms)
downloading https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.3/commons-pool2-2.3.jar ...
	[SUCCESSFUL ] org.apache.commons#commons-pool2;2.3!commons-pool2.jar (150ms)
:: resolution report :: resolve 4586ms :: artifacts dl 1555ms
	:: modules in use:
	RedisLabs#spark-redis;0.3.2 from spark-packages in [default]
	org.apache.commons#commons-pool2;2.3 from central in [default]
	redis.clients#jedis;2.7.2 from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      default     |   3   |   3   |   3   |   0   ||   3   |   3   |
	---------------------------------------------------------------------
```

Closes #24061 from HeartSaVioR/MINOR-use-https-to-bintray-repository.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index dd687b666756..493cad0f14e1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -1087,7 +1087,7 @@ private[spark] object SparkSubmitUtils {
     val sp: IBiblioResolver = new IBiblioResolver
     sp.setM2compatible(true)
     sp.setUsepoms(true)
-    sp.setRoot("http://dl.bintray.com/spark-packages/maven")
+    sp.setRoot("https://dl.bintray.com/spark-packages/maven")
     sp.setName("spark-packages")
     cr.add(sp)
     cr

From dccf6615c34c9347e937838742dec88456843f13 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 13 Mar 2019 08:03:46 +0900
Subject: [PATCH 0663/1072] [SPARK-27130][BUILD] Automatically select profile
 when executing sbt-checkstyle

## What changes were proposed in this pull request?

This PR makes it automatically select profile when executing `sbt-checkstyle`. The reason for this is that `hadoop-2.7` and `hadoop-3.1` may have different `hive-thriftserver` module in the future.

## How was this patch tested?

manual tests:
```
Update AbstractService.java file.
export HADOOP_PROFILE=hadoop2.7
./dev/run-tests
```
The result:
![image](https://user-images.githubusercontent.com/5399861/54197992-5337e780-4500-11e9-930c-722982cdcd45.png)

Closes #24065 from wangyum/SPARK-27130.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/run-tests.py   | 10 +++++++---
 dev/sbt-checkstyle | 11 +++--------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 122c5c6b2d3f..aa106aff9e4e 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -180,9 +180,13 @@ def run_scala_style_checks():
     run_cmd([os.path.join(SPARK_HOME, "dev", "lint-scala")])
 
 
-def run_java_style_checks():
+def run_java_style_checks(build_profiles):
     set_title_and_block("Running Java style checks", "BLOCK_JAVA_STYLE")
-    run_cmd([os.path.join(SPARK_HOME, "dev", "sbt-checkstyle")])
+    # The same profiles used for building are used to run Checkstyle by SBT as well because
+    # the previous build looks reused for Checkstyle and affecting Checkstyle. See SPARK-27130.
+    profiles = " ".join(build_profiles)
+    print("[info] Checking Java style using SBT with these profiles: ", profiles)
+    run_cmd([os.path.join(SPARK_HOME, "dev", "sbt-checkstyle"), profiles])
 
 
 def run_python_style_checks():
@@ -333,7 +337,7 @@ def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
     exec_sbt(profiles_and_goals)
 
     if checkstyle:
-        run_java_style_checks()
+        run_java_style_checks(build_profiles)
 
     build_spark_unidoc_sbt(hadoop_version)
 
diff --git a/dev/sbt-checkstyle b/dev/sbt-checkstyle
index 18f3bd89cc9f..1ecad59125c8 100755
--- a/dev/sbt-checkstyle
+++ b/dev/sbt-checkstyle
@@ -17,17 +17,12 @@
 # limitations under the License.
 #
 
+profiles=${1:-"-Pkinesis-asl -Pmesos -Pkubernetes -Pyarn -Phive -Phive-thriftserver"}
+
 # NOTE: echo "q" is needed because SBT prompts the user for input on encountering a build file
 # with failure (either resolution or compilation); the "q" makes SBT quit.
 ERRORS=$(echo -e "q\n" \
-    | build/sbt \
-        -Pkinesis-asl \
-        -Pmesos \
-        -Pkubernetes \
-        -Pyarn \
-        -Phive \
-        -Phive-thriftserver \
-        checkstyle test:checkstyle \
+    | build/sbt ${profiles} checkstyle test:checkstyle \
     | awk '{if($1~/error/)print}' \
 )
 

From e60d8fce0b0cf2a6d766ea2fc5f994546550570a Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Tue, 12 Mar 2019 16:14:29 -0700
Subject: [PATCH 0664/1072] [SPARK-27045][SQL] SQL tab in UI shows actual SQL
 instead of callsite in case of SparkSQLDriver

## What changes were proposed in this pull request?

When we run sql in spark via SparkSQLDriver (thrift server, spark-sql), SQL string is siet via ``setJobDescription``. the SparkUI SQL tab must show SQL instead of stacktrace in case ``setJobDescription`` is set which is more useful to end user. Instead it currently shows in description column the callsite shortform which is less useful

![image](https://user-images.githubusercontent.com/22072336/53734682-aaa7d900-3eaa-11e9-957b-0e5006db417e.png)

## How was this patch tested?

Manually:
![image](https://user-images.githubusercontent.com/22072336/53734657-9f54ad80-3eaa-11e9-8dc5-2b38f6970f4e.png)

Closes #23958 from ajithme/sqlui.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/StaticSQLConf.scala |  6 ++++++
 .../apache/spark/sql/execution/SQLExecution.scala | 15 ++++++++++++++-
 .../sql/execution/ui/AllExecutionsPage.scala      |  5 +++--
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index fc07efbafb4e..e12f05bbc71d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -132,4 +132,10 @@ object StaticSQLConf {
       .intConf
       .createWithDefault(1000)
 
+  val SQL_EVENT_TRUNCATE_LENGTH = buildStaticConf("spark.sql.event.truncate.length")
+    .doc("Threshold of SQL length beyond which it will be truncated before adding to " +
+      "event. Defaults to no truncation. If set to 0, callsite will be logged instead.")
+    .intConf
+    .checkValue(_ >= 0, "Must be set greater or equal to zero")
+    .createWithDefault(Int.MaxValue)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index 5b38fe5c46bb..ca66337846a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -20,9 +20,12 @@ package org.apache.spark.sql.execution
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicLong
 
+import org.apache.spark.SparkContext
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
+import org.apache.spark.sql.internal.StaticSQLConf.SQL_EVENT_TRUNCATE_LENGTH
+import org.apache.spark.util.Utils
 
 object SQLExecution {
 
@@ -71,13 +74,23 @@ object SQLExecution {
       // streaming queries would give us call site like "run at <unknown>:0"
       val callSite = sc.getCallSite()
 
+      val truncateLength = sc.conf.get(SQL_EVENT_TRUNCATE_LENGTH)
+
+      val desc = Option(sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION))
+        .filter(_ => truncateLength > 0)
+        .map { sqlStr =>
+          val redactedStr = Utils
+            .redact(sparkSession.sessionState.conf.stringRedactionPattern, sqlStr)
+          redactedStr.substring(0, Math.min(truncateLength, redactedStr.length))
+        }.getOrElse(callSite.shortForm)
+
       withSQLConfPropagated(sparkSession) {
         var ex: Option[Exception] = None
         val startTime = System.nanoTime()
         try {
           sc.listenerBus.post(SparkListenerSQLExecutionStart(
             executionId = executionId,
-            description = callSite.shortForm,
+            description = desc,
             details = callSite.longForm,
             physicalPlanDescription = queryExecution.toString,
             // `queryExecution.executedPlan` triggers query planning. If it fails, the exception
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
index 43ff1e13a858..824c094220e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
@@ -388,14 +388,15 @@ private[ui] class ExecutionPagedTable(
         +details
       </span> ++
       <div class="stage-details collapsed">
-        <pre>{execution.details}</pre>
+        <pre>{execution.description}<br></br>{execution.details}</pre>
       </div>
     } else {
       Nil
     }
 
     val desc = if (execution.description != null && execution.description.nonEmpty) {
-      <a href={executionURL(execution.executionId)}>{execution.description}</a>
+      <a href={executionURL(execution.executionId)} class="description-input">
+        {execution.description}</a>
     } else {
       <a href={executionURL(execution.executionId)}>{execution.executionId}</a>
     }

From 7beb464564a04004c638d6b0e1d8aafd9f038a0a Mon Sep 17 00:00:00 2001
From: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Date: Tue, 12 Mar 2019 19:29:32 -0500
Subject: [PATCH 0665/1072] [MINOR][DOC] Fix the description of Pod Metadata's
 annotations

## What changes were proposed in this pull request?

![annotation](https://user-images.githubusercontent.com/18002496/54189638-2d551780-44ed-11e9-9efc-3691bec42130.jpg)

Closes #24064 from hehuiyuan/hehuiyuan-patch-4.

Authored-by: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/running-on-kubernetes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 78b7c517513d..40ff62cf43dc 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1055,7 +1055,7 @@ See the below table for the full list of pod specifications that will be overwri
   <td>annotations</td>
   <td>Adds the annotations from <code>spark.kubernetes.{driver,executor}.annotation.*</code></td>
   <td>
-    Spark will add additional labels specified by the spark configuration.
+    Spark will add additional annotations specified by the spark configuration.
   </td>
 </tr>
 </table>

From 1e9469bb7a71b06d610edaaebca933f4219a6eb3 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 13 Mar 2019 11:20:27 +0900
Subject: [PATCH 0666/1072] [SPARK-26976][SQL] Forbid reserved keywords as
 identifiers when ANSI mode is on

## What changes were proposed in this pull request?
This pr added code to forbid reserved keywords as identifiers when ANSI mode is on.
This is a follow-up of SPARK-26215(#23259).

## How was this patch tested?
Added tests in `TableIdentifierParserSuite`.

Closes #23880 from maropu/SPARK-26976.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  84 ---
 .../parser/TableIdentifierParserSuite.scala   | 650 +++++++++++++++++-
 2 files changed, 649 insertions(+), 85 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index c61cda8b6005..d11c28c18494 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -736,7 +736,6 @@ qualifiedName
 
 identifier
     : strictIdentifier
-    | {ansi}? ansiReserved
     | {!ansi}? defaultReserved
     ;
 
@@ -761,89 +760,6 @@ number
     | MINUS? BIGDECIMAL_LITERAL       #bigDecimalLiteral
     ;
 
-// NOTE: You must follow a rule below when you add a new ANTLR token in this file:
-//  - All the ANTLR tokens = UNION(`ansiReserved`, `ansiNonReserved`) = UNION(`defaultReserved`, `nonReserved`)
-//
-// Let's say you add a new token `NEWTOKEN` and this is not reserved regardless of a `spark.sql.parser.ansi.enabled`
-// value. In this case, you must add a token `NEWTOKEN` in both `ansiNonReserved` and `nonReserved`.
-//
-// It is recommended to list them in alphabetical order.
-
-// The list of the reserved keywords when `spark.sql.parser.ansi.enabled` is true. Currently, we only reserve
-// the ANSI keywords that almost all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011,
-// and SQL-2016) and PostgreSQL reserve.
-ansiReserved
-    : ALL
-    | AND
-    | ANTI
-    | ANY
-    | AS
-    | AUTHORIZATION
-    | BOTH
-    | CASE
-    | CAST
-    | CHECK
-    | COLLATE
-    | COLUMN
-    | CONSTRAINT
-    | CREATE
-    | CROSS
-    | CURRENT_DATE
-    | CURRENT_TIME
-    | CURRENT_TIMESTAMP
-    | CURRENT_USER
-    | DISTINCT
-    | ELSE
-    | END
-    | EXCEPT
-    | FALSE
-    | FETCH
-    | FOR
-    | FOREIGN
-    | FROM
-    | FULL
-    | GRANT
-    | GROUP
-    | HAVING
-    | IN
-    | INNER
-    | INTERSECT
-    | INTO
-    | IS
-    | JOIN
-    | LEADING
-    | LEFT
-    | NATURAL
-    | NOT
-    | NULL
-    | ON
-    | ONLY
-    | OR
-    | ORDER
-    | OUTER
-    | OVERLAPS
-    | PRIMARY
-    | REFERENCES
-    | RIGHT
-    | SELECT
-    | SEMI
-    | SESSION_USER
-    | SETMINUS
-    | SOME
-    | TABLE
-    | THEN
-    | TO
-    | TRAILING
-    | UNION
-    | UNIQUE
-    | USER
-    | USING
-    | WHEN
-    | WHERE
-    | WITH
-    ;
-
-
 // The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` is true.
 ansiNonReserved
     : ADD
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 3d41c27f217d..2725deb1d209 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -18,8 +18,10 @@ package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
 
-class TableIdentifierParserSuite extends SparkFunSuite {
+class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
   import CatalystSqlParser._
 
   // Add "$elem$", "$value$" & "$key$"
@@ -281,6 +283,635 @@ class TableIdentifierParserSuite extends SparkFunSuite {
     "where",
     "with")
 
+  // All the keywords in `docs/sql-reserved-and-non-reserved-key-words.md` are listed below:
+  val allCandidateKeywords = Set(
+    "abs",
+    "absolute",
+    "acos",
+    "action",
+    "add",
+    "after",
+    "all",
+    "allocate",
+    "alter",
+    "analyze",
+    "and",
+    "anti",
+    "any",
+    "archive",
+    "are",
+    "array",
+    "array_agg",
+    "array_max_cardinality",
+    "as",
+    "asc",
+    "asensitive",
+    "asin",
+    "assertion",
+    "asymmetric",
+    "at",
+    "atan",
+    "atomic",
+    "authorization",
+    "avg",
+    "before",
+    "begin",
+    "begin_frame",
+    "begin_partition",
+    "between",
+    "bigint",
+    "binary",
+    "bit",
+    "bit_length",
+    "blob",
+    "boolean",
+    "both",
+    "breadth",
+    "bucket",
+    "buckets",
+    "by",
+    "cache",
+    "call",
+    "called",
+    "cardinality",
+    "cascade",
+    "cascaded",
+    "case",
+    "cast",
+    "catalog",
+    "ceil",
+    "ceiling",
+    "change",
+    "char",
+    "char_length",
+    "character",
+    "character_length",
+    "check",
+    "classifier",
+    "clear",
+    "clob",
+    "close",
+    "cluster",
+    "clustered",
+    "coalesce",
+    "codegen",
+    "collate",
+    "collation",
+    "collect",
+    "collection",
+    "column",
+    "columns",
+    "comment",
+    "commit",
+    "compact",
+    "compactions",
+    "compute",
+    "concatenate",
+    "condition",
+    "connect",
+    "connection",
+    "constraint",
+    "constraints",
+    "constructor",
+    "contains",
+    "continue",
+    "convert",
+    "copy",
+    "corr",
+    "corresponding",
+    "cos",
+    "cosh",
+    "cost",
+    "count",
+    "covar_pop",
+    "covar_samp",
+    "create",
+    "cross",
+    "cube",
+    "cume_dist",
+    "current",
+    "current_catalog",
+    "current_date",
+    "current_default_transform_group",
+    "current_path",
+    "current_role",
+    "current_row",
+    "current_schema",
+    "current_time",
+    "current_timestamp",
+    "current_transform_group_for_type",
+    "current_user",
+    "cursor",
+    "cycle",
+    "data",
+    "database",
+    "databases",
+    "date",
+    "day",
+    "dbproperties",
+    "deallocate",
+    "dec",
+    "decfloat",
+    "decimal",
+    "declare",
+    "default",
+    "deferrable",
+    "deferred",
+    "define",
+    "defined",
+    "delete",
+    "delimited",
+    "dense_rank",
+    "depth",
+    "deref",
+    "desc",
+    "describe",
+    "descriptor",
+    "deterministic",
+    "dfs",
+    "diagnostics",
+    "directories",
+    "directory",
+    "disconnect",
+    "distinct",
+    "distribute",
+    "div",
+    "do",
+    "domain",
+    "double",
+    "drop",
+    "dynamic",
+    "each",
+    "element",
+    "else",
+    "elseif",
+    "empty",
+    "end",
+    "end_frame",
+    "end_partition",
+    "equals",
+    "escape",
+    "escaped",
+    "every",
+    "except",
+    "exception",
+    "exchange",
+    "exec",
+    "execute",
+    "exists",
+    "exit",
+    "exp",
+    "explain",
+    "export",
+    "extended",
+    "external",
+    "extract",
+    "false",
+    "fetch",
+    "fields",
+    "fileformat",
+    "filter",
+    "first",
+    "first_value",
+    "float",
+    "following",
+    "for",
+    "foreign",
+    "format",
+    "formatted",
+    "found",
+    "frame_row",
+    "free",
+    "from",
+    "full",
+    "function",
+    "functions",
+    "fusion",
+    "general",
+    "get",
+    "global",
+    "go",
+    "goto",
+    "grant",
+    "group",
+    "grouping",
+    "groups",
+    "handler",
+    "having",
+    "hold",
+    "hour",
+    "identity",
+    "if",
+    "ignore",
+    "immediate",
+    "import",
+    "in",
+    "index",
+    "indexes",
+    "indicator",
+    "initial",
+    "initially",
+    "inner",
+    "inout",
+    "inpath",
+    "input",
+    "inputformat",
+    "insensitive",
+    "insert",
+    "int",
+    "integer",
+    "intersect",
+    "intersection",
+    "interval",
+    "into",
+    "is",
+    "isolation",
+    "items",
+    "iterate",
+    "join",
+    "json_array",
+    "json_arrayagg",
+    "json_exists",
+    "json_object",
+    "json_objectagg",
+    "json_query",
+    "json_table",
+    "json_table_primitive",
+    "json_value",
+    "key",
+    "keys",
+    "lag",
+    "language",
+    "large",
+    "last",
+    "last_value",
+    "lateral",
+    "lazy",
+    "lead",
+    "leading",
+    "leave",
+    "left",
+    "level",
+    "like",
+    "like_regex",
+    "limit",
+    "lines",
+    "list",
+    "listagg",
+    "ln",
+    "load",
+    "local",
+    "localtime",
+    "localtimestamp",
+    "location",
+    "locator",
+    "lock",
+    "locks",
+    "log",
+    "log10",
+    "logical",
+    "loop",
+    "lower",
+    "macro",
+    "map",
+    "match",
+    "match_number",
+    "match_recognize",
+    "matches",
+    "max",
+    "member",
+    "merge",
+    "method",
+    "min",
+    "minus",
+    "minute",
+    "mod",
+    "modifies",
+    "module",
+    "month",
+    "msck",
+    "multiset",
+    "names",
+    "national",
+    "natural",
+    "nchar",
+    "nclob",
+    "new",
+    "next",
+    "no",
+    "none",
+    "normalize",
+    "not",
+    "nth_value",
+    "ntile",
+    "null",
+    "nullif",
+    "nulls",
+    "numeric",
+    "object",
+    "occurrences_regex",
+    "octet_length",
+    "of",
+    "offset",
+    "old",
+    "omit",
+    "on",
+    "one",
+    "only",
+    "open",
+    "option",
+    "options",
+    "or",
+    "order",
+    "ordinality",
+    "out",
+    "outer",
+    "output",
+    "outputformat",
+    "over",
+    "overlaps",
+    "overlay",
+    "overwrite",
+    "pad",
+    "parameter",
+    "partial",
+    "partition",
+    "partitioned",
+    "partitions",
+    "path",
+    "pattern",
+    "per",
+    "percent",
+    "percent_rank",
+    "percentile_cont",
+    "percentile_disc",
+    "percentlit",
+    "period",
+    "pivot",
+    "portion",
+    "power",
+    "precedes",
+    "preceding",
+    "precision",
+    "prepare",
+    "preserve",
+    "primary",
+    "principals",
+    "prior",
+    "privileges",
+    "procedure",
+    "ptf",
+    "public",
+    "purge",
+    "range",
+    "rank",
+    "read",
+    "reads",
+    "real",
+    "recordreader",
+    "recordwriter",
+    "recover",
+    "recursive",
+    "reduce",
+    "ref",
+    "references",
+    "referencing",
+    "refresh",
+    "regr_avgx",
+    "regr_avgy",
+    "regr_count",
+    "regr_intercept",
+    "regr_r2",
+    "regr_slope",
+    "regr_sxx",
+    "regr_sxy",
+    "regr_syy",
+    "relative",
+    "release",
+    "rename",
+    "repair",
+    "repeat",
+    "replace",
+    "reset",
+    "resignal",
+    "restrict",
+    "result",
+    "return",
+    "returns",
+    "revoke",
+    "right",
+    "rlike",
+    "role",
+    "roles",
+    "rollback",
+    "rollup",
+    "routine",
+    "row",
+    "row_number",
+    "rows",
+    "running",
+    "savepoint",
+    "schema",
+    "scope",
+    "scroll",
+    "search",
+    "second",
+    "section",
+    "seek",
+    "select",
+    "semi",
+    "sensitive",
+    "separated",
+    "serde",
+    "serdeproperties",
+    "session",
+    "session_user",
+    "set",
+    "sets",
+    "show",
+    "signal",
+    "similar",
+    "sin",
+    "sinh",
+    "size",
+    "skewed",
+    "skip",
+    "smallint",
+    "some",
+    "sort",
+    "sorted",
+    "space",
+    "specific",
+    "specifictype",
+    "sql",
+    "sqlcode",
+    "sqlerror",
+    "sqlexception",
+    "sqlstate",
+    "sqlwarning",
+    "sqrt",
+    "start",
+    "state",
+    "static",
+    "statistics",
+    "stddev_pop",
+    "stddev_samp",
+    "stored",
+    "stratify",
+    "struct",
+    "submultiset",
+    "subset",
+    "substring",
+    "substring_regex",
+    "succeeds",
+    "sum",
+    "symmetric",
+    "system",
+    "system_time",
+    "system_user",
+    "table",
+    "tables",
+    "tablesample",
+    "tan",
+    "tanh",
+    "tblproperties",
+    "temporary",
+    "terminated",
+    "then",
+    "time",
+    "timestamp",
+    "timezone_hour",
+    "timezone_minute",
+    "to",
+    "touch",
+    "trailing",
+    "transaction",
+    "transactions",
+    "transform",
+    "translate",
+    "translate_regex",
+    "translation",
+    "treat",
+    "trigger",
+    "trim",
+    "trim_array",
+    "true",
+    "truncate",
+    "uescape",
+    "unarchive",
+    "unbounded",
+    "uncache",
+    "under",
+    "undo",
+    "union",
+    "unique",
+    "unknown",
+    "unlock",
+    "unnest",
+    "unset",
+    "until",
+    "update",
+    "upper",
+    "usage",
+    "use",
+    "user",
+    "using",
+    "value",
+    "value_of",
+    "values",
+    "var_pop",
+    "var_samp",
+    "varbinary",
+    "varchar",
+    "varying",
+    "versioning",
+    "view",
+    "when",
+    "whenever",
+    "where",
+    "while",
+    "width_bucket",
+    "window",
+    "with",
+    "within",
+    "without",
+    "work",
+    "write",
+    "year",
+    "zone")
+
+  val reservedKeywordsInAnsiMode = Set(
+    "all",
+    "and",
+    "anti",
+    "any",
+    "as",
+    "authorization",
+    "both",
+    "case",
+    "cast",
+    "check",
+    "collate",
+    "column",
+    "constraint",
+    "create",
+    "cross",
+    "current_date",
+    "current_time",
+    "current_timestamp",
+    "current_user",
+    "distinct",
+    "else",
+    "end",
+    "except",
+    "false",
+    "fetch",
+    "for",
+    "foreign",
+    "from",
+    "full",
+    "grant",
+    "group",
+    "having",
+    "in",
+    "inner",
+    "intersect",
+    "into",
+    "join",
+    "is",
+    "leading",
+    "left",
+    "natural",
+    "not",
+    "null",
+    "on",
+    "only",
+    "or",
+    "order",
+    "outer",
+    "overlaps",
+    "primary",
+    "references",
+    "right",
+    "select",
+    "semi",
+    "session_user",
+    "minus",
+    "some",
+    "table",
+    "then",
+    "to",
+    "trailing",
+    "union",
+    "unique",
+    "user",
+    "using",
+    "when",
+    "where",
+    "with")
+
+  val nonReservedKeywordsInAnsiMode = allCandidateKeywords -- reservedKeywordsInAnsiMode
+
   test("table identifier") {
     // Regular names.
     assert(TableIdentifier("q") === parseTableIdentifier("q"))
@@ -300,6 +931,23 @@ class TableIdentifierParserSuite extends SparkFunSuite {
     assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`"))
   }
 
+  test("table identifier - reserved/non-reserved keywords if ANSI mode enabled") {
+    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+      reservedKeywordsInAnsiMode.foreach { keyword =>
+        val errMsg = intercept[ParseException] {
+          parseTableIdentifier(keyword)
+        }.getMessage
+        assert(errMsg.contains("no viable alternative at input"))
+        assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
+        assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
+      }
+      nonReservedKeywordsInAnsiMode.foreach { keyword =>
+        assert(TableIdentifier(keyword) === parseTableIdentifier(s"$keyword"))
+        assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.$keyword"))
+      }
+    }
+  }
+
   test("table identifier - strict keywords") {
     // SQL Keywords.
     hiveStrictNonReservedKeyword.foreach { keyword =>

From 1b06cda532b74ed555f759bcb4f73759966b71bb Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Wed, 13 Mar 2019 10:54:47 +0800
Subject: [PATCH 0667/1072] [MINOR][SQL] Refactor RowEncoder to use existing
 (De)serializerBuildHelper methods

## What changes were proposed in this pull request?

This patch proposes to reuse existing methods in (De)serializerBuildHelper in RowEncoder to achieve deduplication as well as consistent creation of serialization/deserialization of same type.

## How was this patch tested?

Existing UT.

Closes #24014 from HeartSaVioR/SPARK-27092.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/encoders/RowEncoder.scala    | 149 +++++++-----------
 1 file changed, 55 insertions(+), 94 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index 97709bd6d188..3a06f8d0c50f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -22,14 +22,15 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.{ScalaReflection, WalkedTypePath}
+import org.apache.spark.sql.catalyst.DeserializerBuildHelper._
+import org.apache.spark.sql.catalyst.SerializerBuildHelper._
 import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects._
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A factory for constructing encoders that convert external row to/from the Spark SQL
@@ -93,37 +94,19 @@ object RowEncoder {
         dataType = ObjectType(udtClass), false)
       Invoke(obj, "serialize", udt, inputObject :: Nil, returnNullable = false)
 
-    case TimestampType if SQLConf.get.datetimeJava8ApiEnabled =>
-      StaticInvoke(
-        DateTimeUtils.getClass,
-        TimestampType,
-        "instantToMicros",
-        inputObject :: Nil,
-        returnNullable = false)
-
     case TimestampType =>
-      StaticInvoke(
-        DateTimeUtils.getClass,
-        TimestampType,
-        "fromJavaTimestamp",
-        inputObject :: Nil,
-        returnNullable = false)
-
-    case DateType if SQLConf.get.datetimeJava8ApiEnabled =>
-      StaticInvoke(
-        DateTimeUtils.getClass,
-        DateType,
-        "localDateToDays",
-        inputObject :: Nil,
-        returnNullable = false)
+      if (SQLConf.get.datetimeJava8ApiEnabled) {
+        createSerializerForJavaInstant(inputObject)
+      } else {
+        createSerializerForSqlTimestamp(inputObject)
+      }
 
     case DateType =>
-      StaticInvoke(
-        DateTimeUtils.getClass,
-        DateType,
-        "fromJavaDate",
-        inputObject :: Nil,
-        returnNullable = false)
+      if (SQLConf.get.datetimeJava8ApiEnabled) {
+        createSerializerForJavaLocalDate(inputObject)
+      } else {
+        createSerializerForSqlDate(inputObject)
+      }
 
     case d: DecimalType =>
       CheckOverflow(StaticInvoke(
@@ -133,13 +116,7 @@ object RowEncoder {
         inputObject :: Nil,
         returnNullable = false), d)
 
-    case StringType =>
-      StaticInvoke(
-        classOf[UTF8String],
-        StringType,
-        "fromString",
-        inputObject :: Nil,
-        returnNullable = false)
+    case StringType => createSerializerForString(inputObject)
 
     case t @ ArrayType(et, containsNull) =>
       et match {
@@ -151,17 +128,14 @@ object RowEncoder {
             inputObject :: Nil,
             returnNullable = false)
 
-        case _ => MapObjects(
-          element => {
-            val value = serializerFor(ValidateExternalType(element, et), et)
-            if (!containsNull) {
-              AssertNotNull(value)
-            } else {
-              value
-            }
-          },
-          inputObject,
-          ObjectType(classOf[Object]))
+        case _ =>
+          createSerializerForMapObjects(
+            inputObject,
+            ObjectType(classOf[Object]),
+            element => {
+              val value = serializerFor(ValidateExternalType(element, et), et)
+              expressionWithNullSafety(value, containsNull, WalkedTypePath())
+            })
       }
 
     case t @ MapType(kt, vt, valueNullable) =>
@@ -188,9 +162,7 @@ object RowEncoder {
         propagateNull = false)
 
       if (inputObject.nullable) {
-        If(IsNull(inputObject),
-          Literal.create(null, nonNullOutput.dataType),
-          nonNullOutput)
+        expressionForNullableExpr(inputObject, nonNullOutput)
       } else {
         nonNullOutput
       }
@@ -217,9 +189,7 @@ object RowEncoder {
       })
 
       if (inputObject.nullable) {
-        If(IsNull(inputObject),
-          Literal.create(null, nonNullOutput.dataType),
-          nonNullOutput)
+        expressionForNullableExpr(inputObject, nonNullOutput)
       } else {
         nonNullOutput
       }
@@ -244,12 +214,18 @@ object RowEncoder {
 
   def externalDataTypeFor(dt: DataType): DataType = dt match {
     case _ if ScalaReflection.isNativeType(dt) => dt
-    case TimestampType if SQLConf.get.datetimeJava8ApiEnabled =>
-      ObjectType(classOf[java.time.Instant])
-    case TimestampType => ObjectType(classOf[java.sql.Timestamp])
-    case DateType if SQLConf.get.datetimeJava8ApiEnabled =>
-      ObjectType(classOf[java.time.LocalDate])
-    case DateType => ObjectType(classOf[java.sql.Date])
+    case TimestampType =>
+      if (SQLConf.get.datetimeJava8ApiEnabled) {
+        ObjectType(classOf[java.time.Instant])
+      } else {
+        ObjectType(classOf[java.sql.Timestamp])
+      }
+    case DateType =>
+      if (SQLConf.get.datetimeJava8ApiEnabled) {
+        ObjectType(classOf[java.time.LocalDate])
+      } else {
+        ObjectType(classOf[java.sql.Date])
+      }
     case _: DecimalType => ObjectType(classOf[java.math.BigDecimal])
     case StringType => ObjectType(classOf[java.lang.String])
     case _: ArrayType => ObjectType(classOf[scala.collection.Seq[_]])
@@ -291,44 +267,23 @@ object RowEncoder {
         dataType = ObjectType(udtClass))
       Invoke(obj, "deserialize", ObjectType(udt.userClass), input :: Nil)
 
-    case TimestampType if SQLConf.get.datetimeJava8ApiEnabled =>
-      StaticInvoke(
-        DateTimeUtils.getClass,
-        ObjectType(classOf[java.time.Instant]),
-        "microsToInstant",
-        input :: Nil,
-        returnNullable = false)
-
     case TimestampType =>
-      StaticInvoke(
-        DateTimeUtils.getClass,
-        ObjectType(classOf[java.sql.Timestamp]),
-        "toJavaTimestamp",
-        input :: Nil,
-        returnNullable = false)
-
-    case DateType if SQLConf.get.datetimeJava8ApiEnabled =>
-      StaticInvoke(
-        DateTimeUtils.getClass,
-        ObjectType(classOf[java.time.LocalDate]),
-        "daysToLocalDate",
-        input :: Nil,
-        returnNullable = false)
+      if (SQLConf.get.datetimeJava8ApiEnabled) {
+        createDeserializerForInstant(input)
+      } else {
+        createDeserializerForSqlTimestamp(input)
+      }
 
     case DateType =>
-      StaticInvoke(
-        DateTimeUtils.getClass,
-        ObjectType(classOf[java.sql.Date]),
-        "toJavaDate",
-        input :: Nil,
-        returnNullable = false)
+      if (SQLConf.get.datetimeJava8ApiEnabled) {
+        createDeserializerForLocalDate(input)
+      } else {
+        createDeserializerForSqlDate(input)
+      }
 
-    case _: DecimalType =>
-      Invoke(input, "toJavaBigDecimal", ObjectType(classOf[java.math.BigDecimal]),
-        returnNullable = false)
+    case _: DecimalType => createDeserializerForJavaBigDecimal(input, returnNullable = false)
 
-    case StringType =>
-      Invoke(input, "toString", ObjectType(classOf[String]), returnNullable = false)
+    case StringType => createDeserializerForString(input, returnNullable = false)
 
     case ArrayType(et, nullable) =>
       val arrayData =
@@ -368,4 +323,10 @@ object RowEncoder {
         Literal.create(null, externalDataTypeFor(input.dataType)),
         CreateExternalRow(convertedFields, schema))
   }
+
+  private def expressionForNullableExpr(
+      expr: Expression,
+      newExprWhenNotNull: Expression): Expression = {
+    If(IsNull(expr), Literal.create(null, newExprWhenNotNull.dataType), newExprWhenNotNull)
+  }
 }

From 2b9ad2516efa18fe1a6102e657f185a7313a249f Mon Sep 17 00:00:00 2001
From: DB Tsai <d_tsai@apple.com>
Date: Tue, 12 Mar 2019 20:08:52 -0700
Subject: [PATCH 0668/1072] [MINOR][BUILD] Add Scala 2.12 profile back for
 branch-2.4 build

Closes #24074 from dbtsai/scala-2.12.

Authored-by: DB Tsai <d_tsai@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/create-release/release-build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 52e50b6e288a..35deadfbbfb3 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -125,7 +125,7 @@ fi
 PUBLISH_SCALA_2_12=0
 SCALA_2_12_PROFILES="-Pscala-2.12"
 if [[ $SPARK_VERSION < "3.0." ]]; then
-  SCALA_2_12_PROFILES="-Pflume"
+  SCALA_2_12_PROFILES="-Pscala-2.12 -Pflume"
 fi
 if [[ $SPARK_VERSION > "2.4" ]]; then
   PUBLISH_SCALA_2_12=1

From f55c760df651e82c5f72038895b5989ab16e22b2 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 13 Mar 2019 20:12:01 +0900
Subject: [PATCH 0669/1072] [SPARK-27034][SQL][FOLLOWUP] Rename
 ParquetSchemaPruning to SchemaPruning

## What changes were proposed in this pull request?

This is a followup to #23943. This proposes to rename ParquetSchemaPruning to SchemaPruning as ParquetSchemaPruning supports both Parquet and ORC v1 now.

## How was this patch tested?

Existing tests.

Closes #24077 from viirya/nested-schema-pruning-orc-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/execution/SparkOptimizer.scala  |  4 ++--
 ...chemaPruning.scala => SchemaPruning.scala} | 20 +++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/{parquet/ParquetSchemaPruning.scala => SchemaPruning.scala} (90%)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 6c6d344240ce..31540e81d125 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.ExperimentalMethods
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions
-import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruning
+import org.apache.spark.sql.execution.datasources.SchemaPruning
 import org.apache.spark.sql.execution.python.{ExtractPythonUDFFromAggregate, ExtractPythonUDFs}
 
 class SparkOptimizer(
@@ -34,7 +34,7 @@ class SparkOptimizer(
     Batch("Extract Python UDFs", Once,
       Seq(ExtractPythonUDFFromAggregate, ExtractPythonUDFs): _*) :+
     Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+
-    Batch("Parquet Schema Pruning", Once, ParquetSchemaPruning)) ++
+    Batch("Schema Pruning", Once, SchemaPruning)) ++
     postHocOptimizationBatches :+
     Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
similarity index 90%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 47551a5b9bd1..3a37ca7c048e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -15,24 +15,24 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.datasources.parquet
+package org.apache.spark.sql.execution.datasources
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
 
 /**
- * Prunes unnecessary Parquet columns given a [[PhysicalOperation]] over a
- * [[ParquetRelation]]. By "Parquet column", we mean a column as defined in the
- * Parquet format. In Spark SQL, a root-level Parquet column corresponds to a
- * SQL column, and a nested Parquet column corresponds to a [[StructField]].
+ * Prunes unnecessary physical columns given a [[PhysicalOperation]] over a data source relation.
+ * By "physical column", we mean a column as defined in the data source format like Parquet format
+ * or ORC format. For example, in Spark SQL, a root-level Parquet column corresponds to a SQL
+ * column, and a nested Parquet column corresponds to a [[StructField]].
  */
-object ParquetSchemaPruning extends Rule[LogicalPlan] {
+object SchemaPruning extends Rule[LogicalPlan] {
   import org.apache.spark.sql.catalyst.expressions.SchemaPruning._
 
   override def apply(plan: LogicalPlan): LogicalPlan =
@@ -62,10 +62,10 @@ object ParquetSchemaPruning extends Rule[LogicalPlan] {
           // each schemata, assuming the fields in prunedDataSchema are a subset of the fields
           // in dataSchema.
           if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) {
-            val prunedParquetRelation =
+            val prunedHadoopRelation =
               hadoopFsRelation.copy(dataSchema = prunedDataSchema)(hadoopFsRelation.sparkSession)
 
-            val prunedRelation = buildPrunedRelation(l, prunedParquetRelation)
+            val prunedRelation = buildPrunedRelation(l, prunedHadoopRelation)
             val projectionOverSchema = ProjectionOverSchema(prunedDataSchema)
 
             buildNewProjection(normalizedProjects, normalizedFilters, prunedRelation,
@@ -79,7 +79,7 @@ object ParquetSchemaPruning extends Rule[LogicalPlan] {
     }
 
   /**
-   * Checks to see if the given relation is Parquet and can be pruned.
+   * Checks to see if the given relation can be pruned. Currently we support Parquet and ORC v1.
    */
   private def canPruneRelation(fsRelation: HadoopFsRelation) =
     fsRelation.fileFormat.isInstanceOf[ParquetFileFormat] ||

From d3813d8b210d127ea278015ef27ead9348365787 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 13 Mar 2019 19:47:54 +0800
Subject: [PATCH 0670/1072] [SPARK-27064][SS] create StreamingWrite at the
 beginning of streaming execution

## What changes were proposed in this pull request?

According to the [design](https://docs.google.com/document/d/1vI26UEuDpVuOjWw4WPoH2T6y8WAekwtI7qoowhOFnI4/edit?usp=sharing), the life cycle of `StreamingWrite` should be the same as the read side `MicroBatch/ContinuousStream`, i.e. each run of the stream query, instead of each epoch.

This PR fixes it.

## How was this patch tested?

existing tests

Closes #23981 from cloud-fan/dsv2.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../kafka010/KafkaContinuousSinkSuite.scala   | 101 +++++-------------
 .../streaming/MicroBatchExecution.scala       |  18 ++--
 .../execution/streaming/StreamExecution.scala |   2 +-
 .../continuous/ContinuousExecution.scala      |  20 ++--
 .../sources/WriteToMicroBatchDataSource.scala |  39 +++++++
 .../sources/StreamingDataSourceV2Suite.scala  |  18 +++-
 6 files changed, 104 insertions(+), 94 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala
index b21037b1340c..3c3aeebc48b7 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala
@@ -22,9 +22,8 @@ import java.util.Locale
 import org.apache.kafka.clients.producer.ProducerConfig
 import org.apache.kafka.common.serialization.ByteArraySerializer
 import org.scalatest.time.SpanSugar._
-import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, SpecificInternalRow, UnsafeProjection}
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.types.{BinaryType, DataType}
@@ -227,39 +226,23 @@ class KafkaContinuousSinkSuite extends KafkaContinuousTest {
     val topic = newTopic()
     testUtils.createTopic(topic)
 
-    /* No topic field or topic option */
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    try {
-      writer = createKafkaWriter(input.toDF())(
+    val ex = intercept[AnalysisException] {
+      /* No topic field or topic option */
+      createKafkaWriter(input.toDF())(
         withSelectExpr = "value as key", "value"
       )
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        assert(writer.exception.isDefined)
-        ex = writer.exception.get
-      }
-    } finally {
-      writer.stop()
     }
     assert(ex.getMessage
       .toLowerCase(Locale.ROOT)
       .contains("topic option required when no 'topic' attribute is present"))
 
-    try {
+    val ex2 = intercept[AnalysisException] {
       /* No value field */
-      writer = createKafkaWriter(input.toDF())(
+      createKafkaWriter(input.toDF())(
         withSelectExpr = s"'$topic' as topic", "value as key"
       )
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        assert(writer.exception.isDefined)
-        ex = writer.exception.get
-      }
-    } finally {
-      writer.stop()
     }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
+    assert(ex2.getMessage.toLowerCase(Locale.ROOT).contains(
       "required attribute 'value' not found"))
   }
 
@@ -278,53 +261,30 @@ class KafkaContinuousSinkSuite extends KafkaContinuousTest {
     val topic = newTopic()
     testUtils.createTopic(topic)
 
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    try {
+    val ex = intercept[AnalysisException] {
       /* topic field wrong type */
-      writer = createKafkaWriter(input.toDF())(
+      createKafkaWriter(input.toDF())(
         withSelectExpr = s"CAST('1' as INT) as topic", "value"
       )
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        assert(writer.exception.isDefined)
-        ex = writer.exception.get
-      }
-    } finally {
-      writer.stop()
     }
     assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("topic type must be a string"))
 
-    try {
+    val ex2 = intercept[AnalysisException] {
       /* value field wrong type */
-      writer = createKafkaWriter(input.toDF())(
+      createKafkaWriter(input.toDF())(
         withSelectExpr = s"'$topic' as topic", "CAST(value as INT) as value"
       )
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        assert(writer.exception.isDefined)
-        ex = writer.exception.get
-      }
-    } finally {
-      writer.stop()
     }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
+    assert(ex2.getMessage.toLowerCase(Locale.ROOT).contains(
       "value attribute type must be a string or binary"))
 
-    try {
+    val ex3 = intercept[AnalysisException] {
       /* key field wrong type */
-      writer = createKafkaWriter(input.toDF())(
+      createKafkaWriter(input.toDF())(
         withSelectExpr = s"'$topic' as topic", "CAST(value as INT) as key", "value"
       )
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        assert(writer.exception.isDefined)
-        ex = writer.exception.get
-      }
-    } finally {
-      writer.stop()
     }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
+    assert(ex3.getMessage.toLowerCase(Locale.ROOT).contains(
       "key attribute type must be a string or binary"))
   }
 
@@ -369,35 +329,22 @@ class KafkaContinuousSinkSuite extends KafkaContinuousTest {
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("subscribe", inputTopic)
       .load()
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    try {
-      writer = createKafkaWriter(
+
+    val ex = intercept[IllegalArgumentException] {
+      createKafkaWriter(
         input.toDF(),
         withOptions = Map("kafka.key.serializer" -> "foo"))()
-      eventually(timeout(streamingTimeout)) {
-        assert(writer.exception.isDefined)
-        ex = writer.exception.get
-      }
-      assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
-        "kafka option 'key.serializer' is not supported"))
-    } finally {
-      writer.stop()
     }
+    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
+      "kafka option 'key.serializer' is not supported"))
 
-    try {
-      writer = createKafkaWriter(
+    val ex2 = intercept[IllegalArgumentException] {
+      createKafkaWriter(
         input.toDF(),
         withOptions = Map("kafka.value.serializer" -> "foo"))()
-      eventually(timeout(streamingTimeout)) {
-        assert(writer.exception.isDefined)
-        ex = writer.exception.get
-      }
-      assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
-        "kafka option 'value.serializer' is not supported"))
-    } finally {
-      writer.stop()
     }
+    assert(ex2.getMessage.toLowerCase(Locale.ROOT).contains(
+      "kafka option 'value.serializer' is not supported"))
   }
 
   test("generic - write big data with small producer buffer") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index de7cbe25ceb3..bedcb9f8d4e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatch
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SQLExecution
-import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2, WriteToDataSourceV2Exec}
-import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, RateControlMicroBatchStream}
+import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2Exec}
+import org.apache.spark.sql.execution.streaming.sources.{RateControlMicroBatchStream, WriteToMicroBatchDataSource}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2}
@@ -122,7 +122,14 @@ class MicroBatchExecution(
       case r: StreamingDataSourceV2Relation => r.stream
     }
     uniqueSources = sources.distinct
-    _logicalPlan
+
+    sink match {
+      case s: SupportsStreamingWrite =>
+        val streamingWrite = createStreamingWrite(s, extraOptions, _logicalPlan)
+        WriteToMicroBatchDataSource(streamingWrite, _logicalPlan)
+
+      case _ => _logicalPlan
+    }
   }
 
   /**
@@ -513,9 +520,8 @@ class MicroBatchExecution(
 
     val triggerLogicalPlan = sink match {
       case _: Sink => newAttributePlan
-      case s: SupportsStreamingWrite =>
-        val streamingWrite = createStreamingWrite(s, extraOptions, newAttributePlan)
-        WriteToDataSourceV2(new MicroBatchWrite(currentBatchId, streamingWrite), newAttributePlan)
+      case _: SupportsStreamingWrite =>
+        newAttributePlan.asInstanceOf[WriteToMicroBatchDataSource].createPlan(currentBatchId)
       case _ => throw new IllegalArgumentException(s"unknown sink type for $sink")
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index bba640eea7e5..180a23c765dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -585,7 +585,7 @@ abstract class StreamExecution(
       options: Map[String, String],
       inputPlan: LogicalPlan): StreamingWrite = {
     val writeBuilder = table.newWriteBuilder(new DataSourceOptions(options.asJava))
-      .withQueryId(runId.toString)
+      .withQueryId(id.toString)
       .withInputDataSchema(inputPlan.schema)
     outputMode match {
       case Append =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index aef556d92cc8..f55a45d2cee7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -61,7 +61,7 @@ class ContinuousExecution(
   // Throwable that caused the execution to fail
   private val failure: AtomicReference[Throwable] = new AtomicReference[Throwable](null)
 
-  override val logicalPlan: LogicalPlan = {
+  override val logicalPlan: WriteToContinuousDataSource = {
     val v2ToRelationMap = MutableMap[StreamingRelationV2, StreamingDataSourceV2Relation]()
     var nextSourceId = 0
     val _logicalPlan = analyzedPlan.transform {
@@ -88,7 +88,8 @@ class ContinuousExecution(
     }
     uniqueSources = sources.distinct
 
-    _logicalPlan
+    WriteToContinuousDataSource(
+      createStreamingWrite(sink, extraOptions, _logicalPlan), _logicalPlan)
   }
 
   private val triggerExecutor = trigger match {
@@ -178,13 +179,10 @@ class ContinuousExecution(
           "CurrentTimestamp and CurrentDate not yet supported for continuous processing")
     }
 
-    val streamingWrite = createStreamingWrite(sink, extraOptions, withNewSources)
-    val planWithSink = WriteToContinuousDataSource(streamingWrite, withNewSources)
-
     reportTimeTaken("queryPlanning") {
       lastExecution = new IncrementalExecution(
         sparkSessionForQuery,
-        planWithSink,
+        withNewSources,
         outputMode,
         checkpointFile("state"),
         id,
@@ -194,7 +192,7 @@ class ContinuousExecution(
       lastExecution.executedPlan // Force the lazy generation of execution plan
     }
 
-    val stream = planWithSink.collect {
+    val stream = withNewSources.collect {
       case relation: StreamingDataSourceV2Relation =>
         relation.stream.asInstanceOf[ContinuousStream]
     }.head
@@ -215,7 +213,13 @@ class ContinuousExecution(
 
     // Use the parent Spark session for the endpoint since it's where this query ID is registered.
     val epochEndpoint = EpochCoordinatorRef.create(
-      streamingWrite, stream, this, epochCoordinatorId, currentBatchId, sparkSession, SparkEnv.get)
+      logicalPlan.write,
+      stream,
+      this,
+      epochCoordinatorId,
+      currentBatchId,
+      sparkSession,
+      SparkEnv.get)
     val epochUpdateThread = new Thread(new Runnable {
       override def run: Unit = {
         try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
new file mode 100644
index 000000000000..a3f58fa966fe
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.sources
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
+
+/**
+ * The logical plan for writing data to a micro-batch stream.
+ *
+ * Note that this logical plan does not have a corresponding physical plan, as it will be converted
+ * to [[WriteToDataSourceV2]] with [[MicroBatchWrite]] before execution.
+ */
+case class WriteToMicroBatchDataSource(write: StreamingWrite, query: LogicalPlan)
+  extends LogicalPlan {
+  override def children: Seq[LogicalPlan] = Seq(query)
+  override def output: Seq[Attribute] = Nil
+
+  def createPlan(batchId: Long): WriteToDataSourceV2 = {
+    WriteToDataSourceV2(new MicroBatchWrite(batchId, write), query)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 97b694eafc61..3c2c7004fcd2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -26,7 +26,8 @@ import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.sources.v2.writer.{WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamTest, Trigger}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
@@ -59,6 +60,19 @@ class FakeScanBuilder extends ScanBuilder with Scan {
   override def toContinuousStream(checkpointLocation: String): ContinuousStream = new FakeDataStream
 }
 
+class FakeWriteBuilder extends WriteBuilder with StreamingWrite {
+  override def buildForStreaming(): StreamingWrite = this
+  override def createStreamingWriterFactory(): StreamingDataWriterFactory = {
+    throw new IllegalStateException("fake sink - cannot actually write")
+  }
+  override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+    throw new IllegalStateException("fake sink - cannot actually write")
+  }
+  override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+    throw new IllegalStateException("fake sink - cannot actually write")
+  }
+}
+
 trait FakeMicroBatchReadTable extends Table with SupportsMicroBatchRead {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
@@ -75,7 +89,7 @@ trait FakeStreamingWriteTable extends Table with SupportsStreamingWrite {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
   override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
-    throw new IllegalStateException("fake sink - cannot actually write")
+    new FakeWriteBuilder
   }
 }
 

From 812ad5546148d2194ab0e4230ee85b8f6a5be2fb Mon Sep 17 00:00:00 2001
From: Dave DeCaprio <daved@alum.mit.edu>
Date: Wed, 13 Mar 2019 09:58:43 -0700
Subject: [PATCH 0671/1072] [SPARK-26103][SQL] Limit the length of debug
 strings for query plans

## What changes were proposed in this pull request?

The PR puts in a limit on the size of a debug string generated for a tree node.  Helps to fix out of memory errors when large plans have huge debug strings.   In addition to SPARK-26103, this should also address SPARK-23904 and SPARK-25380.  AN alternative solution was proposed in #23076, but that solution doesn't address all the cases that can cause a large query.  This limit is only on calls treeString that don't pass a Writer, which makes it play nicely with #22429, #23018 and #23039.  Full plans can be written to files, but truncated plans will be used when strings are held in memory, such as for the UI.

- A new configuration parameter called spark.sql.debug.maxPlanLength was added to control the length of the plans.
- When plans are truncated, "..." is printed to indicate that it isn't a full plan
- A warning is printed out the first time a truncated plan is displayed. The warning explains what happened and how to adjust the limit.

## How was this patch tested?

Unit tests were created for the new SizeLimitedWriter.  Also a unit test for TreeNode was created that checks that a long plan is correctly truncated.

Closes #23169 from DaveDeCaprio/text-plan-size.

Lead-authored-by: Dave DeCaprio <daved@alum.mit.edu>
Co-authored-by: David DeCaprio <daved@alum.mit.edu>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/sql/catalyst/trees/TreeNode.scala   |  4 +-
 .../spark/sql/catalyst/util/StringUtils.scala | 58 ++++++++++++++++---
 .../apache/spark/sql/internal/SQLConf.scala   | 13 +++++
 .../sql/catalyst/trees/TreeNodeSuite.scala    | 29 +++++++++-
 .../sql/catalyst/util/StringUtilsSuite.scala  | 33 +++++++++--
 .../spark/sql/execution/QueryExecution.scala  | 14 +++--
 6 files changed, 126 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index d214ebb30903..72b1931e47cf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, Partitioning}
-import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
+import org.apache.spark.sql.catalyst.util.StringUtils.{PlanStringConcat, StringConcat}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -480,7 +480,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       verbose: Boolean,
       addSuffix: Boolean = false,
       maxFields: Int = SQLConf.get.maxToStringFields): String = {
-    val concat = new StringConcat()
+    val concat = new PlanStringConcat()
 
     treeString(concat.append, verbose, addSuffix, maxFields)
     concat.toString
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index 643b83b1741a..6118d8c6fc28 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.util.concurrent.atomic.AtomicBoolean
 import java.util.regex.{Pattern, PatternSyntaxException}
 
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types.UTF8String
 
-object StringUtils {
+object StringUtils extends Logging {
 
   /**
    * Validate and convert SQL 'like' pattern to a Java regular expression.
@@ -92,20 +96,29 @@ object StringUtils {
 
   /**
    * Concatenation of sequence of strings to final string with cheap append method
-   * and one memory allocation for the final string.
+   * and one memory allocation for the final string.  Can also bound the final size of
+   * the string.
    */
-  class StringConcat {
-    private val strings = new ArrayBuffer[String]
-    private var length: Int = 0
+  class StringConcat(val maxLength: Int = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+    protected val strings = new ArrayBuffer[String]
+    protected var length: Int = 0
+
+    def atLimit: Boolean = length >= maxLength
 
     /**
      * Appends a string and accumulates its length to allocate a string buffer for all
-     * appended strings once in the toString method.
+     * appended strings once in the toString method.  Returns true if the string still
+     * has room for further appends before it hits its max limit.
      */
     def append(s: String): Unit = {
       if (s != null) {
-        strings.append(s)
-        length += s.length
+        val sLen = s.length
+        if (!atLimit) {
+          val available = maxLength - length
+          val stringToAppend = if (available >= sLen) s else s.substring(0, available)
+          strings.append(stringToAppend)
+        }
+        length += sLen
       }
     }
 
@@ -114,9 +127,36 @@ object StringUtils {
      * returns concatenated string.
      */
     override def toString: String = {
-      val result = new java.lang.StringBuilder(length)
+      val finalLength = if (atLimit) maxLength else length
+      val result = new java.lang.StringBuilder(finalLength)
       strings.foreach(result.append)
       result.toString
     }
   }
+
+  /**
+   * A string concatenator for plan strings.  Uses length from a configured value, and
+   *  prints a warning the first time a plan is truncated.
+   */
+  class PlanStringConcat extends StringConcat(Math.max(0, SQLConf.get.maxPlanStringLength - 30)) {
+    override def toString: String = {
+      if (atLimit) {
+        logWarning(
+          "Truncated the string representation of a plan since it was too long. The " +
+            s"plan had length ${length} and the maximum is ${maxLength}. This behavior " +
+            "can be adjusted by setting '${SQLConf.MAX_PLAN_STRING_LENGTH.key}'.")
+        val truncateMsg = if (maxLength == 0) {
+          s"Truncated plan of $length characters"
+        } else {
+          s"... ${length - maxLength} more characters"
+        }
+        val result = new java.lang.StringBuilder(maxLength + truncateMsg.length)
+        strings.foreach(result.append)
+        result.append(truncateMsg)
+        result.toString
+      } else {
+        super.toString
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 193d311d445e..20f4080c9859 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1691,6 +1691,17 @@ object SQLConf {
     .intConf
     .createWithDefault(25)
 
+  val MAX_PLAN_STRING_LENGTH = buildConf("spark.sql.maxPlanStringLength")
+    .doc("Maximum number of characters to output for a plan string.  If the plan is " +
+      "longer, further output will be truncated.  The default setting always generates a full " +
+      "plan.  Set this to a lower value such as 8k if plan strings are taking up too much " +
+      "memory or are causing OutOfMemory errors in the driver or UI processes.")
+    .bytesConf(ByteUnit.BYTE)
+    .checkValue(i => i >= 0 && i <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH, "Invalid " +
+      "value for 'spark.sql.maxPlanStringLength'.  Length must be a valid string length " +
+      "(nonnegative and shorter than the maximum size).")
+    .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
+
   val SET_COMMAND_REJECTS_SPARK_CORE_CONFS =
     buildConf("spark.sql.legacy.setCommandRejectsSparkCoreConfs")
       .internal()
@@ -2146,6 +2157,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxToStringFields: Int = getConf(SQLConf.MAX_TO_STRING_FIELDS)
 
+  def maxPlanStringLength: Int = getConf(SQLConf.MAX_PLAN_STRING_LENGTH).toInt
+
   def setCommandRejectsSparkCoreConfs: Boolean =
     getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index cb911d7af808..e7ad04f4af78 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -33,9 +33,10 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions.DslString
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.plans.{LeftOuter, NaturalJoin}
+import org.apache.spark.sql.catalyst.plans.{LeftOuter, NaturalJoin, SQLHelper}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Union}
 import org.apache.spark.sql.catalyst.plans.physical.{IdentityBroadcastMode, RoundRobinPartitioning, SinglePartition}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
 
@@ -81,7 +82,7 @@ case class SelfReferenceUDF(
   def apply(key: String): Boolean = config.contains(key)
 }
 
-class TreeNodeSuite extends SparkFunSuite {
+class TreeNodeSuite extends SparkFunSuite with SQLHelper {
   test("top node changed") {
     val after = Literal(1) transform { case Literal(1, _) => Literal(2) }
     assert(after === Literal(2))
@@ -595,4 +596,28 @@ class TreeNodeSuite extends SparkFunSuite {
     val expected = Coalesce(Stream(Literal(1), Literal(3)))
     assert(result === expected)
   }
+
+  test("treeString limits plan length") {
+    withSQLConf(SQLConf.MAX_PLAN_STRING_LENGTH.key -> "200") {
+      val ds = (1 until 20).foldLeft(Literal("TestLiteral"): Expression) { case (treeNode, x) =>
+        Add(Literal(x), treeNode)
+      }
+
+      val planString = ds.treeString
+      logWarning("Plan string: " + planString)
+      assert(planString.endsWith(" more characters"))
+      assert(planString.length <= SQLConf.get.maxPlanStringLength)
+    }
+  }
+
+  test("treeString limit at zero") {
+    withSQLConf(SQLConf.MAX_PLAN_STRING_LENGTH.key -> "0") {
+      val ds = (1 until 2).foldLeft(Literal("TestLiteral"): Expression) { case (treeNode, x) =>
+        Add(Literal(x), treeNode)
+      }
+
+      val planString = ds.treeString
+      assert(planString.startsWith("Truncated plan of"))
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
index 616ec12032db..63d3831404d4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
@@ -46,14 +46,35 @@ class StringUtilsSuite extends SparkFunSuite {
 
   test("string concatenation") {
     def concat(seq: String*): String = {
-      seq.foldLeft(new StringConcat())((acc, s) => {acc.append(s); acc}).toString
+      seq.foldLeft(new StringConcat()) { (acc, s) => acc.append(s); acc }.toString
     }
 
     assert(new StringConcat().toString == "")
-    assert(concat("") == "")
-    assert(concat(null) == "")
-    assert(concat("a") == "a")
-    assert(concat("1", "2") == "12")
-    assert(concat("abc", "\n", "123") == "abc\n123")
+    assert(concat("") === "")
+    assert(concat(null) === "")
+    assert(concat("a") === "a")
+    assert(concat("1", "2") === "12")
+    assert(concat("abc", "\n", "123") === "abc\n123")
+  }
+
+  test("string concatenation with limit") {
+    def concat(seq: String*): String = {
+      seq.foldLeft(new StringConcat(7)) { (acc, s) => acc.append(s); acc }.toString
+    }
+    assert(concat("under") === "under")
+    assert(concat("under", "over", "extra") === "underov")
+    assert(concat("underover") === "underov")
+    assert(concat("under", "ov") === "underov")
+  }
+
+  test("string concatenation return value") {
+    def checkLimit(s: String): Boolean = {
+      val sc = new StringConcat(7)
+      sc.append(s)
+      sc.atLimit
+    }
+    assert(!checkLimit("under"))
+    assert(checkLimit("1234567"))
+    assert(checkLimit("1234567890"))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 49d6acf65dd1..5d2710bdc4e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
+import org.apache.spark.sql.catalyst.util.StringUtils.{PlanStringConcat, StringConcat}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange}
 import org.apache.spark.sql.internal.SQLConf
@@ -114,7 +114,7 @@ class QueryExecution(
     ReuseSubquery(sparkSession.sessionState.conf))
 
   def simpleString: String = withRedaction {
-    val concat = new StringConcat()
+    val concat = new PlanStringConcat()
     concat.append("== Physical Plan ==\n")
     QueryPlan.append(executedPlan, concat.append, verbose = false, addSuffix = false)
     concat.append("\n")
@@ -142,13 +142,13 @@ class QueryExecution(
   }
 
   override def toString: String = withRedaction {
-    val concat = new StringConcat()
+    val concat = new PlanStringConcat()
     writePlans(concat.append, SQLConf.get.maxToStringFields)
     concat.toString
   }
 
   def stringWithStats: String = withRedaction {
-    val concat = new StringConcat()
+    val concat = new PlanStringConcat()
     val maxFields = SQLConf.get.maxToStringFields
 
     // trigger to compute stats for logical plans
@@ -203,9 +203,11 @@ class QueryExecution(
       val filePath = new Path(path)
       val fs = filePath.getFileSystem(sparkSession.sessionState.newHadoopConf())
       val writer = new BufferedWriter(new OutputStreamWriter(fs.create(filePath)))
-
+      val append = (s: String) => {
+        writer.write(s)
+      }
       try {
-        writePlans(writer.write, maxFields)
+        writePlans(append, maxFields)
         writer.write("\n== Whole Stage Codegen ==\n")
         org.apache.spark.sql.execution.debug.writeCodegen(writer.write, executedPlan)
       } finally {

From 2a80a4cd39c7bcee44b6f6432769ca9fdba137e4 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 14 Mar 2019 01:23:27 +0800
Subject: [PATCH 0672/1072] [SPARK-27106][SQL] merge CaseInsensitiveStringMap
 and DataSourceOptions

## What changes were proposed in this pull request?

It's a little awkward to have 2 different classes(`CaseInsensitiveStringMap` and `DataSourceOptions`) to present the options in data source and catalog API.

This PR merges these 2 classes, while keeping the name `CaseInsensitiveStringMap`, which is more precise.

## How was this patch tested?

existing tests

Closes #24025 from cloud-fan/option.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/kafka010/KafkaContinuousStream.scala  |   3 +-
 .../sql/kafka010/KafkaMicroBatchStream.scala  |   7 +-
 .../kafka010/KafkaOffsetRangeCalculator.scala |   6 +-
 .../sql/kafka010/KafkaSourceProvider.scala    |  19 +-
 .../kafka010/KafkaMicroBatchSourceSuite.scala |   4 +-
 .../KafkaOffsetRangeCalculatorSuite.scala     |   8 +-
 .../sql/util/CaseInsensitiveStringMap.java    |  66 +++++-
 .../util/CaseInsensitiveStringMapSuite.java   |  48 ----
 .../util/CaseInsensitiveStringMapSuite.scala} |  62 ++----
 .../sql/sources/v2/DataSourceOptions.java     | 210 ------------------
 .../sql/sources/v2/SupportsBatchRead.java     |   5 +-
 .../sql/sources/v2/SupportsBatchWrite.java    |   5 +-
 .../sources/v2/SupportsContinuousRead.java    |   5 +-
 .../sources/v2/SupportsMicroBatchRead.java    |   5 +-
 .../spark/sql/sources/v2/SupportsRead.java    |   7 +-
 .../sources/v2/SupportsStreamingWrite.java    |   5 +-
 .../spark/sql/sources/v2/SupportsWrite.java   |   5 +-
 .../spark/sql/sources/v2/TableProvider.java   |   5 +-
 .../apache/spark/sql/DataFrameReader.scala    |  18 +-
 .../apache/spark/sql/DataFrameWriter.scala    |   8 +-
 .../datasources/FallbackOrcDataSourceV2.scala |  13 +-
 .../datasources/noop/NoopDataSource.scala     |   5 +-
 .../v2/DataSourceV2Implicits.scala            |   8 +-
 .../datasources/v2/DataSourceV2Relation.scala |   7 +-
 .../datasources/v2/DataSourceV2Strategy.scala |   8 +-
 .../datasources/v2/FileDataSourceV2.scala     |  12 +
 .../execution/datasources/v2/FileTable.scala  |  22 +-
 .../datasources/v2/FileWriteBuilder.scala     |  15 +-
 .../v2/WriteToDataSourceV2Exec.scala          |  18 +-
 .../datasources/v2/orc/OrcDataSourceV2.scala  |  21 +-
 .../datasources/v2/orc/OrcScanBuilder.scala   |   7 +-
 .../datasources/v2/orc/OrcTable.scala         |  14 +-
 .../datasources/v2/orc/OrcWriteBuilder.scala  |   6 +-
 .../streaming/MicroBatchExecution.scala       |   3 +-
 .../execution/streaming/StreamExecution.scala |   5 +-
 .../streaming/StreamingRelation.scala         |   3 +-
 .../sql/execution/streaming/console.scala     |   5 +-
 .../continuous/ContinuousExecution.scala      |   6 +-
 .../ContinuousRateStreamSource.scala          |   6 +-
 .../ContinuousTextSocketSource.scala          |   4 +-
 .../sql/execution/streaming/memory.scala      |   7 +-
 .../streaming/sources/ConsoleWrite.scala      |   4 +-
 .../sources/ForeachWriterTable.scala          |   5 +-
 .../sources/RateStreamMicroBatchStream.scala  |   6 +-
 .../sources/RateStreamProvider.scala          |   9 +-
 .../sources/TextSocketMicroBatchStream.scala  |   4 +-
 .../sources/TextSocketSourceProvider.scala    |  17 +-
 .../streaming/sources/memoryV2.scala          |   5 +-
 .../sql/streaming/DataStreamReader.scala      |   5 +-
 .../sql/streaming/DataStreamWriter.scala      |   5 +-
 .../sources/v2/JavaAdvancedDataSourceV2.java  |   6 +-
 .../sources/v2/JavaColumnarDataSourceV2.java  |   6 +-
 .../v2/JavaPartitionAwareDataSource.java      |   6 +-
 .../v2/JavaReportStatisticsDataSource.java    |   6 +-
 .../v2/JavaSchemaRequiredDataSource.java      |   8 +-
 .../sources/v2/JavaSimpleDataSourceV2.java    |   6 +-
 .../datasources/orc/OrcFilterSuite.scala      |   5 +-
 .../sources/RateStreamProviderSuite.scala     |  11 +-
 .../sources/TextSocketStreamSuite.scala       |  18 +-
 .../sql/sources/v2/DataSourceV2Suite.scala    |  35 +--
 .../v2/FileDataSourceV2FallBackSuite.scala    |  11 +-
 .../sources/v2/SimpleWritableDataSource.scala |  14 +-
 .../sources/StreamingDataSourceV2Suite.scala  |  33 +--
 63 files changed, 363 insertions(+), 558 deletions(-)
 delete mode 100644 sql/catalyst/src/test/java/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.java
 rename sql/{core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceOptionsSuite.scala => catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala} (52%)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
index 0e6171724402..d60ee1cadd19 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
@@ -37,8 +37,7 @@ import org.apache.spark.sql.sources.v2.reader.streaming._
  * @param offsetReader  a reader used to get kafka offsets. Note that the actual data will be
  *                      read by per-task consumers generated later.
  * @param kafkaParams   String params for per-task Kafka consumers.
- * @param sourceOptions The [[org.apache.spark.sql.sources.v2.DataSourceOptions]] params which
- *                      are not Kafka consumer params.
+ * @param sourceOptions Params which are not Kafka consumer params.
  * @param metadataPath Path to a directory this reader can use for writing metadata.
  * @param initialOffsets The Kafka offsets to start reading data at.
  * @param failOnDataLoss Flag indicating whether reading should fail in data loss
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index a6303461445f..6972f391f285 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -33,9 +33,9 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.streaming.{HDFSMetadataLog, SerializedOffset}
 import org.apache.spark.sql.execution.streaming.sources.RateControlMicroBatchStream
 import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.UninterruptibleThread
 
 /**
@@ -57,7 +57,7 @@ import org.apache.spark.util.UninterruptibleThread
 private[kafka010] class KafkaMicroBatchStream(
     kafkaOffsetReader: KafkaOffsetReader,
     executorKafkaParams: ju.Map[String, Object],
-    options: DataSourceOptions,
+    options: CaseInsensitiveStringMap,
     metadataPath: String,
     startingOffsets: KafkaOffsetRangeLimit,
     failOnDataLoss: Boolean) extends RateControlMicroBatchStream with Logging {
@@ -66,8 +66,7 @@ private[kafka010] class KafkaMicroBatchStream(
     "kafkaConsumer.pollTimeoutMs",
     SparkEnv.get.conf.getTimeAsSeconds("spark.network.timeout", "120s") * 1000L)
 
-  private val maxOffsetsPerTrigger =
-    Option(options.get("maxOffsetsPerTrigger").orElse(null)).map(_.toLong)
+  private val maxOffsetsPerTrigger = Option(options.get("maxOffsetsPerTrigger")).map(_.toLong)
 
   private val rangeCalculator = KafkaOffsetRangeCalculator(options)
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
index 600879492405..1af8404b89c6 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.kafka010
 
 import org.apache.kafka.common.TopicPartition
 
-import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 
 /**
@@ -91,8 +91,8 @@ private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int
 
 private[kafka010] object KafkaOffsetRangeCalculator {
 
-  def apply(options: DataSourceOptions): KafkaOffsetRangeCalculator = {
-    val optionalValue = Option(options.get("minPartitions").orElse(null)).map(_.toInt)
+  def apply(options: CaseInsensitiveStringMap): KafkaOffsetRangeCalculator = {
+    val optionalValue = Option(options.get("minPartitions")).map(_.toInt)
     new KafkaOffsetRangeCalculator(optionalValue)
   }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index b39e0d40fd31..8496cbda261b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * The provider class for all Kafka readers and writers. It is designed such that it throws
@@ -103,8 +104,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       failOnDataLoss(caseInsensitiveParams))
   }
 
-  override def getTable(options: DataSourceOptions): KafkaTable = {
-    new KafkaTable(strategy(options.asMap().asScala.toMap))
+  override def getTable(options: CaseInsensitiveStringMap): KafkaTable = {
+    new KafkaTable(strategy(options.asScala.toMap))
   }
 
   /**
@@ -358,11 +359,11 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
 
     override def schema(): StructType = KafkaOffsetReader.kafkaSchema
 
-    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new ScanBuilder {
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
       override def build(): Scan = new KafkaScan(options)
     }
 
-    override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+    override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
       new WriteBuilder {
         private var inputSchema: StructType = _
 
@@ -375,20 +376,20 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
           import scala.collection.JavaConverters._
 
           assert(inputSchema != null)
-          val topic = Option(options.get(TOPIC_OPTION_KEY).orElse(null)).map(_.trim)
-          val producerParams = kafkaParamsForProducer(options.asMap.asScala.toMap)
+          val topic = Option(options.get(TOPIC_OPTION_KEY)).map(_.trim)
+          val producerParams = kafkaParamsForProducer(options.asScala.toMap)
           new KafkaStreamingWrite(topic, producerParams, inputSchema)
         }
       }
     }
   }
 
-  class KafkaScan(options: DataSourceOptions) extends Scan {
+  class KafkaScan(options: CaseInsensitiveStringMap) extends Scan {
 
     override def readSchema(): StructType = KafkaOffsetReader.kafkaSchema
 
     override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
-      val parameters = options.asMap().asScala.toMap
+      val parameters = options.asScala.toMap
       validateStreamOptions(parameters)
       // Each running query should use its own group id. Otherwise, the query may be only assigned
       // partial data since Kafka will assign partitions to multiple consumers having the same group
@@ -417,7 +418,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     }
 
     override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
-      val parameters = options.asMap().asScala.toMap
+      val parameters = options.asScala.toMap
       validateStreamOptions(parameters)
       // Each running query should use its own group id. Otherwise, the query may be only assigned
       // partial data since Kafka will assign partitions to multiple consumers having the same group
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 8fd5790d753a..21634ae2abfa 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -41,10 +41,10 @@ import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.streaming.{StreamTest, Trigger}
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 abstract class KafkaSourceTest extends StreamTest with SharedSQLContext with KafkaTest {
 
@@ -1118,7 +1118,7 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
           "kafka.bootstrap.servers" -> testUtils.brokerAddress,
           "subscribe" -> topic
         ) ++ Option(minPartitions).map { p => "minPartitions" -> p}
-        val dsOptions = new DataSourceOptions(options.asJava)
+        val dsOptions = new CaseInsensitiveStringMap(options.asJava)
         val table = provider.getTable(dsOptions)
         val stream = table.newScanBuilder(dsOptions).build().toMicroBatchStream(dir.getAbsolutePath)
         val inputPartitions = stream.planInputPartitions(
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
index 2ccf3e291bea..7ffdaab3e74f 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
@@ -22,13 +22,13 @@ import scala.collection.JavaConverters._
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.sources.v2.DataSourceOptions
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
 
   def testWithMinPartitions(name: String, minPartition: Int)
       (f: KafkaOffsetRangeCalculator => Unit): Unit = {
-    val options = new DataSourceOptions(Map("minPartitions" -> minPartition.toString).asJava)
+    val options = new CaseInsensitiveStringMap(Map("minPartitions" -> minPartition.toString).asJava)
     test(s"with minPartition = $minPartition: $name") {
       f(KafkaOffsetRangeCalculator(options))
     }
@@ -36,7 +36,7 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
 
 
   test("with no minPartition: N TopicPartitions to N offset ranges") {
-    val calc = KafkaOffsetRangeCalculator(DataSourceOptions.empty())
+    val calc = KafkaOffsetRangeCalculator(CaseInsensitiveStringMap.empty())
     assert(
       calc.getRanges(
         fromOffsets = Map(tp1 -> 1),
@@ -64,7 +64,7 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
   }
 
   test("with no minPartition: empty ranges ignored") {
-    val calc = KafkaOffsetRangeCalculator(DataSourceOptions.empty())
+    val calc = KafkaOffsetRangeCalculator(CaseInsensitiveStringMap.empty())
     assert(
       calc.getRanges(
         fromOffsets = Map(tp1 -> 1, tp2 -> 1),
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
index 8c5a6c61d865..704d90ed60ad 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
@@ -31,19 +31,20 @@
  * This is used to pass options to v2 implementations to ensure consistent case insensitivity.
  * <p>
  * Methods that return keys in this map, like {@link #entrySet()} and {@link #keySet()}, return
- * keys converted to lower case.
+ * keys converted to lower case. This map doesn't allow null key.
  */
 @Experimental
 public class CaseInsensitiveStringMap implements Map<String, String> {
 
   public static CaseInsensitiveStringMap empty() {
-    return new CaseInsensitiveStringMap();
+    return new CaseInsensitiveStringMap(new HashMap<>(0));
   }
 
   private final Map<String, String> delegate;
 
-  private CaseInsensitiveStringMap() {
-    this.delegate = new HashMap<>();
+  public CaseInsensitiveStringMap(Map<String, String> originalMap) {
+    this.delegate = new HashMap<>(originalMap.size());
+    putAll(originalMap);
   }
 
   @Override
@@ -56,9 +57,13 @@ public boolean isEmpty() {
     return delegate.isEmpty();
   }
 
+  private String toLowerCase(Object key) {
+    return key.toString().toLowerCase(Locale.ROOT);
+  }
+
   @Override
   public boolean containsKey(Object key) {
-    return delegate.containsKey(key.toString().toLowerCase(Locale.ROOT));
+    return delegate.containsKey(toLowerCase(key));
   }
 
   @Override
@@ -68,17 +73,17 @@ public boolean containsValue(Object value) {
 
   @Override
   public String get(Object key) {
-    return delegate.get(key.toString().toLowerCase(Locale.ROOT));
+    return delegate.get(toLowerCase(key));
   }
 
   @Override
   public String put(String key, String value) {
-    return delegate.put(key.toLowerCase(Locale.ROOT), value);
+    return delegate.put(toLowerCase(key), value);
   }
 
   @Override
   public String remove(Object key) {
-    return delegate.remove(key.toString().toLowerCase(Locale.ROOT));
+    return delegate.remove(toLowerCase(key));
   }
 
   @Override
@@ -107,4 +112,49 @@ public Collection<String> values() {
   public Set<Map.Entry<String, String>> entrySet() {
     return delegate.entrySet();
   }
+
+  /**
+   * Returns the boolean value to which the specified key is mapped,
+   * or defaultValue if there is no mapping for the key. The key match is case-insensitive.
+   */
+  public boolean getBoolean(String key, boolean defaultValue) {
+    String value = get(key);
+    // We can't use `Boolean.parseBoolean` here, as it returns false for invalid strings.
+    if (value == null) {
+      return defaultValue;
+    } else if (value.equalsIgnoreCase("true")) {
+      return true;
+    } else if (value.equalsIgnoreCase("false")) {
+      return false;
+    } else {
+      throw new IllegalArgumentException(value + " is not a boolean string.");
+    }
+  }
+
+  /**
+   * Returns the integer value to which the specified key is mapped,
+   * or defaultValue if there is no mapping for the key. The key match is case-insensitive.
+   */
+  public int getInt(String key, int defaultValue) {
+    String value = get(key);
+    return value == null ? defaultValue : Integer.parseInt(value);
+  }
+
+  /**
+   * Returns the long value to which the specified key is mapped,
+   * or defaultValue if there is no mapping for the key. The key match is case-insensitive.
+   */
+  public long getLong(String key, long defaultValue) {
+    String value = get(key);
+    return value == null ? defaultValue : Long.parseLong(value);
+  }
+
+  /**
+   * Returns the double value to which the specified key is mapped,
+   * or defaultValue if there is no mapping for the key. The key match is case-insensitive.
+   */
+  public double getDouble(String key, double defaultValue) {
+    String value = get(key);
+    return value == null ? defaultValue : Double.parseDouble(value);
+  }
 }
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.java
deleted file mode 100644
index 76392777d42a..000000000000
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.util;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.util.HashSet;
-import java.util.Set;
-
-public class CaseInsensitiveStringMapSuite {
-  @Test
-  public void testPutAndGet() {
-    CaseInsensitiveStringMap options = CaseInsensitiveStringMap.empty();
-    options.put("kEy", "valUE");
-
-    Assert.assertEquals("Should return correct value for lower-case key",
-        "valUE", options.get("key"));
-    Assert.assertEquals("Should return correct value for upper-case key",
-        "valUE", options.get("KEY"));
-  }
-
-  @Test
-  public void testKeySet() {
-    CaseInsensitiveStringMap options = CaseInsensitiveStringMap.empty();
-    options.put("kEy", "valUE");
-
-    Set<String> expectedKeySet = new HashSet<>();
-    expectedKeySet.add("key");
-
-    Assert.assertEquals("Should return lower-case key set", expectedKeySet, options.keySet());
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceOptionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
similarity index 52%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceOptionsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
index cfa69a86de1a..623ddeb14025 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceOptionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
@@ -15,31 +15,29 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.util
 
 import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkFunSuite
 
-/**
- * A simple test suite to verify `DataSourceOptions`.
- */
-class DataSourceOptionsSuite extends SparkFunSuite {
+class CaseInsensitiveStringMapSuite extends SparkFunSuite {
 
-  test("key is case-insensitive") {
-    val options = new DataSourceOptions(Map("foo" -> "bar").asJava)
-    assert(options.get("foo").get() == "bar")
-    assert(options.get("FoO").get() == "bar")
-    assert(!options.get("abc").isPresent)
+  test("put and get") {
+    val options = CaseInsensitiveStringMap.empty()
+    options.put("kEy", "valUE")
+    assert(options.get("key") == "valUE")
+    assert(options.get("KEY") == "valUE")
   }
 
-  test("value is case-sensitive") {
-    val options = new DataSourceOptions(Map("foo" -> "bAr").asJava)
-    assert(options.get("foo").get == "bAr")
+  test("key and value set") {
+    val options = new CaseInsensitiveStringMap(Map("kEy" -> "valUE").asJava)
+    assert(options.keySet().asScala == Set("key"))
+    assert(options.values().asScala.toSeq == Seq("valUE"))
   }
 
   test("getInt") {
-    val options = new DataSourceOptions(Map("numFOo" -> "1", "foo" -> "bar").asJava)
+    val options = new CaseInsensitiveStringMap(Map("numFOo" -> "1", "foo" -> "bar").asJava)
     assert(options.getInt("numFOO", 10) == 1)
     assert(options.getInt("numFOO2", 10) == 10)
 
@@ -49,17 +47,20 @@ class DataSourceOptionsSuite extends SparkFunSuite {
   }
 
   test("getBoolean") {
-    val options = new DataSourceOptions(
+    val options = new CaseInsensitiveStringMap(
       Map("isFoo" -> "true", "isFOO2" -> "false", "foo" -> "bar").asJava)
     assert(options.getBoolean("isFoo", false))
     assert(!options.getBoolean("isFoo2", true))
     assert(options.getBoolean("isBar", true))
     assert(!options.getBoolean("isBar", false))
-    assert(!options.getBoolean("FOO", true))
+
+    intercept[IllegalArgumentException] {
+      options.getBoolean("FOO", true)
+    }
   }
 
   test("getLong") {
-    val options = new DataSourceOptions(Map("numFoo" -> "9223372036854775807",
+    val options = new CaseInsensitiveStringMap(Map("numFoo" -> "9223372036854775807",
       "foo" -> "bar").asJava)
     assert(options.getLong("numFOO", 0L) == 9223372036854775807L)
     assert(options.getLong("numFoo2", -1L) == -1L)
@@ -70,7 +71,7 @@ class DataSourceOptionsSuite extends SparkFunSuite {
   }
 
   test("getDouble") {
-    val options = new DataSourceOptions(Map("numFoo" -> "922337.1",
+    val options = new CaseInsensitiveStringMap(Map("numFoo" -> "922337.1",
       "foo" -> "bar").asJava)
     assert(options.getDouble("numFOO", 0d) == 922337.1d)
     assert(options.getDouble("numFoo2", -1.02d) == -1.02d)
@@ -79,29 +80,4 @@ class DataSourceOptionsSuite extends SparkFunSuite {
       options.getDouble("foo", 0.1d)
     }
   }
-
-  test("standard options") {
-    val options = new DataSourceOptions(Map(
-      DataSourceOptions.PATH_KEY -> "abc",
-      DataSourceOptions.TABLE_KEY -> "tbl").asJava)
-
-    assert(options.paths().toSeq == Seq("abc"))
-    assert(options.tableName().get() == "tbl")
-    assert(!options.databaseName().isPresent)
-  }
-
-  test("standard options with both singular path and multi-paths") {
-    val options = new DataSourceOptions(Map(
-      DataSourceOptions.PATH_KEY -> "abc",
-      DataSourceOptions.PATHS_KEY -> """["c", "d"]""").asJava)
-
-    assert(options.paths().toSeq == Seq("abc", "c", "d"))
-  }
-
-  test("standard options with only multi-paths") {
-    val options = new DataSourceOptions(Map(
-      DataSourceOptions.PATHS_KEY -> """["c", "d\"e"]""").asJava)
-
-    assert(options.paths().toSeq == Seq("c", "d\"e"))
-  }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java
deleted file mode 100644
index 00af0bf1b172..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceOptions.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Optional;
-import java.util.stream.Stream;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import org.apache.spark.annotation.Evolving;
-
-/**
- * An immutable string-to-string map in which keys are case-insensitive. This is used to represent
- * data source options.
- *
- * Each data source implementation can define its own options and teach its users how to set them.
- * Spark doesn't have any restrictions about what options a data source should or should not have.
- * Instead Spark defines some standard options that data sources can optionally adopt. It's possible
- * that some options are very common and many data sources use them. However different data
- * sources may define the common options(key and meaning) differently, which is quite confusing to
- * end users.
- *
- * The standard options defined by Spark:
- * <table summary="standard data source options">
- *   <tr>
- *     <th><b>Option key</b></th>
- *     <th><b>Option value</b></th>
- *   </tr>
- *   <tr>
- *     <td>path</td>
- *     <td>A path string of the data files/directories, like
- *     <code>path1</code>, <code>/absolute/file2</code>, <code>path3/*</code>. The path can
- *     either be relative or absolute, points to either file or directory, and can contain
- *     wildcards. This option is commonly used by file-based data sources.</td>
- *   </tr>
- *   <tr>
- *     <td>paths</td>
- *     <td>A JSON array style paths string of the data files/directories, like
- *     <code>["path1", "/absolute/file2"]</code>. The format of each path is same as the
- *     <code>path</code> option, plus it should follow JSON string literal format, e.g. quotes
- *     should be escaped, <code>pa\"th</code> means pa"th.
- *     </td>
- *   </tr>
- *   <tr>
- *     <td>table</td>
- *     <td>A table name string representing the table name directly without any interpretation.
- *     For example, <code>db.tbl</code> means a table called db.tbl, not a table called tbl
- *     inside database db. <code>`t*b.l`</code> means a table called `t*b.l`, not t*b.l.</td>
- *   </tr>
- *   <tr>
- *     <td>database</td>
- *     <td>A database name string representing the database name directly without any
- *     interpretation, which is very similar to the table name option.</td>
- *   </tr>
- * </table>
- */
-@Evolving
-public class DataSourceOptions {
-  private final Map<String, String> keyLowerCasedMap;
-
-  private String toLowerCase(String key) {
-    return key.toLowerCase(Locale.ROOT);
-  }
-
-  public static DataSourceOptions empty() {
-    return new DataSourceOptions(new HashMap<>());
-  }
-
-  public DataSourceOptions(Map<String, String> originalMap) {
-    keyLowerCasedMap = new HashMap<>(originalMap.size());
-    for (Map.Entry<String, String> entry : originalMap.entrySet()) {
-      keyLowerCasedMap.put(toLowerCase(entry.getKey()), entry.getValue());
-    }
-  }
-
-  public Map<String, String> asMap() {
-    return new HashMap<>(keyLowerCasedMap);
-  }
-
-  /**
-   * Returns the option value to which the specified key is mapped, case-insensitively.
-   */
-  public Optional<String> get(String key) {
-    return Optional.ofNullable(keyLowerCasedMap.get(toLowerCase(key)));
-  }
-
-  /**
-   * Returns the boolean value to which the specified key is mapped,
-   * or defaultValue if there is no mapping for the key. The key match is case-insensitive
-   */
-  public boolean getBoolean(String key, boolean defaultValue) {
-    String lcaseKey = toLowerCase(key);
-    return keyLowerCasedMap.containsKey(lcaseKey) ?
-      Boolean.parseBoolean(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
-  }
-
-  /**
-   * Returns the integer value to which the specified key is mapped,
-   * or defaultValue if there is no mapping for the key. The key match is case-insensitive
-   */
-  public int getInt(String key, int defaultValue) {
-    String lcaseKey = toLowerCase(key);
-    return keyLowerCasedMap.containsKey(lcaseKey) ?
-      Integer.parseInt(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
-  }
-
-  /**
-   * Returns the long value to which the specified key is mapped,
-   * or defaultValue if there is no mapping for the key. The key match is case-insensitive
-   */
-  public long getLong(String key, long defaultValue) {
-    String lcaseKey = toLowerCase(key);
-    return keyLowerCasedMap.containsKey(lcaseKey) ?
-      Long.parseLong(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
-  }
-
-  /**
-   * Returns the double value to which the specified key is mapped,
-   * or defaultValue if there is no mapping for the key. The key match is case-insensitive
-   */
-  public double getDouble(String key, double defaultValue) {
-    String lcaseKey = toLowerCase(key);
-    return keyLowerCasedMap.containsKey(lcaseKey) ?
-      Double.parseDouble(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
-  }
-
-  /**
-   * The option key for singular path.
-   */
-  public static final String PATH_KEY = "path";
-
-  /**
-   * The option key for multiple paths.
-   */
-  public static final String PATHS_KEY = "paths";
-
-  /**
-   * The option key for table name.
-   */
-  public static final String TABLE_KEY = "table";
-
-  /**
-   * The option key for database name.
-   */
-  public static final String DATABASE_KEY = "database";
-
-  /**
-   * The option key for whether to check existence of files for a table.
-   */
-  public static final String CHECK_FILES_EXIST_KEY = "check_files_exist";
-
-  /**
-   * Returns all the paths specified by both the singular path option and the multiple
-   * paths option.
-   */
-  public String[] paths() {
-    String[] singularPath =
-      get(PATH_KEY).map(s -> new String[]{s}).orElseGet(() -> new String[0]);
-    Optional<String> pathsStr = get(PATHS_KEY);
-    if (pathsStr.isPresent()) {
-      ObjectMapper objectMapper = new ObjectMapper();
-      try {
-        String[] paths = objectMapper.readValue(pathsStr.get(), String[].class);
-        return Stream.of(singularPath, paths).flatMap(Stream::of).toArray(String[]::new);
-      } catch (IOException e) {
-        return singularPath;
-      }
-    } else {
-      return singularPath;
-    }
-  }
-
-  /**
-   * Returns the value of the table name option.
-   */
-  public Optional<String> tableName() {
-    return get(TABLE_KEY);
-  }
-
-  /**
-   * Returns the value of the database name option.
-   */
-  public Optional<String> databaseName() {
-    return get(DATABASE_KEY);
-  }
-
-  public Boolean checkFilesExist() {
-    Optional<String> result = get(CHECK_FILES_EXIST_KEY);
-    return result.isPresent() && result.get().equals("true");
-  }
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
index 6c5a95d2a75b..ea7c5d2b108f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
@@ -20,13 +20,14 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.v2.reader.Scan;
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * An empty mix-in interface for {@link Table}, to indicate this table supports batch scan.
  * <p>
  * If a {@link Table} implements this interface, the
- * {@link SupportsRead#newScanBuilder(DataSourceOptions)} must return a {@link ScanBuilder} that
- * builds {@link Scan} with {@link Scan#toBatch()} implemented.
+ * {@link SupportsRead#newScanBuilder(CaseInsensitiveStringMap)} must return a {@link ScanBuilder}
+ * that builds {@link Scan} with {@link Scan#toBatch()} implemented.
  * </p>
  */
 @Evolving
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
index b2cd97a2f533..09e23f84fd6b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
@@ -19,13 +19,14 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * An empty mix-in interface for {@link Table}, to indicate this table supports batch write.
  * <p>
  * If a {@link Table} implements this interface, the
- * {@link SupportsWrite#newWriteBuilder(DataSourceOptions)} must return a {@link WriteBuilder}
- * with {@link WriteBuilder#buildForBatch()} implemented.
+ * {@link SupportsWrite#newWriteBuilder(CaseInsensitiveStringMap)} must return a
+ * {@link WriteBuilder} with {@link WriteBuilder#buildForBatch()} implemented.
  * </p>
  */
 @Evolving
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
index b7fa3f24a238..5cc9848d9da8 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
@@ -20,14 +20,15 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.v2.reader.Scan;
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * An empty mix-in interface for {@link Table}, to indicate this table supports streaming scan with
  * continuous mode.
  * <p>
  * If a {@link Table} implements this interface, the
- * {@link SupportsRead#newScanBuilder(DataSourceOptions)} must return a {@link ScanBuilder} that
- * builds {@link Scan} with {@link Scan#toContinuousStream(String)} implemented.
+ * {@link SupportsRead#newScanBuilder(CaseInsensitiveStringMap)} must return a {@link ScanBuilder}
+ * that builds {@link Scan} with {@link Scan#toContinuousStream(String)} implemented.
  * </p>
  */
 @Evolving
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
index 9408e323f9da..c98f3f1aa5cb 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
@@ -20,14 +20,15 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.v2.reader.Scan;
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * An empty mix-in interface for {@link Table}, to indicate this table supports streaming scan with
  * micro-batch mode.
  * <p>
  * If a {@link Table} implements this interface, the
- * {@link SupportsRead#newScanBuilder(DataSourceOptions)} must return a {@link ScanBuilder} that
- * builds {@link Scan} with {@link Scan#toMicroBatchStream(String)} implemented.
+ * {@link SupportsRead#newScanBuilder(CaseInsensitiveStringMap)} must return a {@link ScanBuilder}
+ * that builds {@link Scan} with {@link Scan#toMicroBatchStream(String)} implemented.
  * </p>
  */
 @Evolving
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
index 5031c71c0fd4..14990effeda3 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
@@ -19,11 +19,12 @@
 
 import org.apache.spark.sql.sources.v2.reader.Scan;
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * An internal base interface of mix-in interfaces for readable {@link Table}. This adds
- * {@link #newScanBuilder(DataSourceOptions)} that is used to create a scan for batch, micro-batch,
- * or continuous processing.
+ * {@link #newScanBuilder(CaseInsensitiveStringMap)} that is used to create a scan for batch,
+ * micro-batch, or continuous processing.
  */
 interface SupportsRead extends Table {
 
@@ -34,5 +35,5 @@ interface SupportsRead extends Table {
    * @param options The options for reading, which is an immutable case-insensitive
    *                string-to-string map.
    */
-  ScanBuilder newScanBuilder(DataSourceOptions options);
+  ScanBuilder newScanBuilder(CaseInsensitiveStringMap options);
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
index 1050d35250c1..ac11e483c18c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
@@ -20,13 +20,14 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.execution.streaming.BaseStreamingSink;
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * An empty mix-in interface for {@link Table}, to indicate this table supports streaming write.
  * <p>
  * If a {@link Table} implements this interface, the
- * {@link SupportsWrite#newWriteBuilder(DataSourceOptions)} must return a {@link WriteBuilder}
- * with {@link WriteBuilder#buildForStreaming()} implemented.
+ * {@link SupportsWrite#newWriteBuilder(CaseInsensitiveStringMap)} must return a
+ * {@link WriteBuilder} with {@link WriteBuilder#buildForStreaming()} implemented.
  * </p>
  */
 @Evolving
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
index ecdfe2073025..f0d8e44f1528 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
@@ -19,10 +19,11 @@
 
 import org.apache.spark.sql.sources.v2.writer.BatchWrite;
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * An internal base interface of mix-in interfaces for writable {@link Table}. This adds
- * {@link #newWriteBuilder(DataSourceOptions)} that is used to create a write
+ * {@link #newWriteBuilder(CaseInsensitiveStringMap)} that is used to create a write
  * for batch or streaming.
  */
 interface SupportsWrite extends Table {
@@ -31,5 +32,5 @@ interface SupportsWrite extends Table {
    * Returns a {@link WriteBuilder} which can be used to create {@link BatchWrite}. Spark will call
    * this method to configure each data source write.
    */
-  WriteBuilder newWriteBuilder(DataSourceOptions options);
+  WriteBuilder newWriteBuilder(CaseInsensitiveStringMap options);
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
index a9b83b6de995..04ad8fd90be9 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
@@ -20,6 +20,7 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.DataSourceRegister;
 import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * The base interface for v2 data sources which don't have a real catalog. Implementations must
@@ -37,7 +38,7 @@ public interface TableProvider {
    * @param options the user-specified options that can identify a table, e.g. file path, Kafka
    *                topic name, etc. It's an immutable case-insensitive string-to-string map.
    */
-  Table getTable(DataSourceOptions options);
+  Table getTable(CaseInsensitiveStringMap options);
 
   /**
    * Return a {@link Table} instance to do read/write with user-specified schema and options.
@@ -50,7 +51,7 @@ public interface TableProvider {
    * @param schema the user-specified schema.
    * @throws UnsupportedOperationException
    */
-  default Table getTable(DataSourceOptions options, StructType schema) {
+  default Table getTable(CaseInsensitiveStringMap options, StructType schema) {
     String name;
     if (this instanceof DataSourceRegister) {
       name = ((DataSourceRegister) this).shortName();
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index a8562581ee85..2cc937034651 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, FileDataSourceV2, FileTable}
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -176,7 +177,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    */
   def load(path: String): DataFrame = {
     // force invocation of `load(...varargs...)`
-    option(DataSourceOptions.PATH_KEY, path).load(Seq.empty: _*)
+    option("path", path).load(Seq.empty: _*)
   }
 
   /**
@@ -206,20 +207,23 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
       val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         source = provider, conf = sparkSession.sessionState.conf)
-      val pathsOption = {
+      val pathsOption = if (paths.isEmpty) {
+        None
+      } else {
         val objectMapper = new ObjectMapper()
-        DataSourceOptions.PATHS_KEY -> objectMapper.writeValueAsString(paths.toArray)
+        Some("paths" -> objectMapper.writeValueAsString(paths.toArray))
       }
-      val checkFilesExistsOption = DataSourceOptions.CHECK_FILES_EXIST_KEY -> "true"
-      val finalOptions = sessionOptions ++ extraOptions.toMap + pathsOption + checkFilesExistsOption
-      val dsOptions = new DataSourceOptions(finalOptions.asJava)
+      // TODO SPARK-27113: remove this option.
+      val checkFilesExistsOpt = "check_files_exist" -> "true"
+      val finalOptions = sessionOptions ++ extraOptions.toMap ++ pathsOption + checkFilesExistsOpt
+      val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
       val table = userSpecifiedSchema match {
         case Some(schema) => provider.getTable(dsOptions, schema)
         case _ => provider.getTable(dsOptions)
       }
       table match {
         case _: SupportsBatchRead =>
-          Dataset.ofRows(sparkSession, DataSourceV2Relation.create(table, finalOptions))
+          Dataset.ofRows(sparkSession, DataSourceV2Relation.create(table, dsOptions))
 
         case _ => loadV1Source(paths: _*)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 8d4d60ebdb54..e58225e0f58a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * Interface used to write a [[Dataset]] to external storage systems (e.g. file systems,
@@ -260,12 +261,13 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         provider, session.sessionState.conf)
-      val checkFilesExistsOption = DataSourceOptions.CHECK_FILES_EXIST_KEY -> "false"
+      // TODO SPARK-27113: remove this option.
+      val checkFilesExistsOption = "check_files_exist" -> "false"
       val options = sessionOptions ++ extraOptions + checkFilesExistsOption
-      val dsOptions = new DataSourceOptions(options.asJava)
+      val dsOptions = new CaseInsensitiveStringMap(options.asJava)
       provider.getTable(dsOptions) match {
         case table: SupportsBatchWrite =>
-          lazy val relation = DataSourceV2Relation.create(table, options)
+          lazy val relation = DataSourceV2Relation.create(table, dsOptions)
           mode match {
             case SaveMode.Append =>
               runCommand(df.sparkSession, "save") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
index e22d6a6d399a..7c72495548e3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -33,10 +35,15 @@ import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
  */
 class FallbackOrcDataSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoTable(d @DataSourceV2Relation(table: OrcTable, _, _), _, _, _, _) =>
+    case i @ InsertIntoTable(d @ DataSourceV2Relation(table: OrcTable, _, _), _, _, _, _) =>
       val v1FileFormat = new OrcFileFormat
-      val relation = HadoopFsRelation(table.fileIndex, table.fileIndex.partitionSchema,
-        table.schema(), None, v1FileFormat, d.options)(sparkSession)
+      val relation = HadoopFsRelation(
+        table.fileIndex,
+        table.fileIndex.partitionSchema,
+        table.schema(),
+        None,
+        v1FileFormat,
+        d.options.asScala.toMap)(sparkSession)
       i.copy(table = LogicalRelation(relation))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 22a74e3ccaee..aa2a5e9a06fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * This is no-op datasource. It does not do anything besides consuming its input.
@@ -31,11 +32,11 @@ import org.apache.spark.sql.types.StructType
  */
 class NoopDataSource extends TableProvider with DataSourceRegister {
   override def shortName(): String = "noop"
-  override def getTable(options: DataSourceOptions): Table = NoopTable
+  override def getTable(options: CaseInsensitiveStringMap): Table = NoopTable
 }
 
 private[noop] object NoopTable extends Table with SupportsBatchWrite with SupportsStreamingWrite {
-  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = NoopWriteBuilder
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = NoopWriteBuilder
   override def name(): String = "noop-table"
   override def schema(): StructType = new StructType()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
index c8542bfe5e59..2081af35ce2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import scala.collection.JavaConverters._
-
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsBatchRead, SupportsBatchWrite, Table}
+import org.apache.spark.sql.sources.v2.{SupportsBatchRead, SupportsBatchWrite, Table}
 
 object DataSourceV2Implicits {
   implicit class TableHelper(table: Table) {
@@ -42,8 +40,4 @@ object DataSourceV2Implicits {
       }
     }
   }
-
-  implicit class OptionsHelper(options: Map[String, String]) {
-    def toDataSourceOptions: DataSourceOptions = new DataSourceOptions(options.asJava)
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 891694be4629..17407827d056 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.{Statistics => V2Statistics, _}
 import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A logical plan representing a data source v2 table.
@@ -36,7 +37,7 @@ import org.apache.spark.sql.sources.v2.writer._
 case class DataSourceV2Relation(
     table: Table,
     output: Seq[AttributeReference],
-    options: Map[String, String])
+    options: CaseInsensitiveStringMap)
   extends LeafNode with MultiInstanceRelation with NamedRelation {
 
   import DataSourceV2Implicits._
@@ -48,7 +49,7 @@ case class DataSourceV2Relation(
   }
 
   def newScanBuilder(): ScanBuilder = {
-    table.asBatchReadable.newScanBuilder(options.toDataSourceOptions)
+    table.asBatchReadable.newScanBuilder(options)
   }
 
   override def computeStats(): Statistics = {
@@ -96,7 +97,7 @@ case class StreamingDataSourceV2Relation(
 }
 
 object DataSourceV2Relation {
-  def create(table: Table, options: Map[String, String]): DataSourceV2Relation = {
+  def create(table: Table, options: CaseInsensitiveStringMap): DataSourceV2Relation = {
     val output = table.schema().toAttributes
     DataSourceV2Relation(table, output, options)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index bf606267aa34..424fbed6fc1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -148,8 +148,7 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil
 
     case AppendData(r: DataSourceV2Relation, query, _) =>
-      AppendDataExec(
-        r.table.asBatchWritable, r.options.toDataSourceOptions, planLater(query)) :: Nil
+      AppendDataExec(r.table.asBatchWritable, r.options, planLater(query)) :: Nil
 
     case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, _) =>
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
@@ -159,11 +158,10 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       }.toArray
 
       OverwriteByExpressionExec(
-        r.table.asBatchWritable, filters, r.options.toDataSourceOptions, planLater(query)) :: Nil
+        r.table.asBatchWritable, filters, r.options, planLater(query)) :: Nil
 
     case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _) =>
-      OverwritePartitionsDynamicExec(r.table.asBatchWritable,
-        r.options.toDataSourceOptions, planLater(query)) :: Nil
+      OverwritePartitionsDynamicExec(r.table.asBatchWritable, r.options, planLater(query)) :: Nil
 
     case WriteToContinuousDataSource(writer, query) =>
       WriteToContinuousDataSourceExec(writer, planLater(query)) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index 06c57066aa24..e9c7a1bb749d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -16,10 +16,13 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
+import com.fasterxml.jackson.databind.ObjectMapper
+
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2.TableProvider
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A base interface for data source v2 implementations of the built-in file-based data sources.
@@ -35,4 +38,13 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
   def fallBackFileFormat: Class[_ <: FileFormat]
 
   lazy val sparkSession = SparkSession.active
+
+  protected def getPaths(map: CaseInsensitiveStringMap): Seq[String] = {
+    val objectMapper = new ObjectMapper()
+    Option(map.get("paths")).map { pathStr =>
+      objectMapper.readValue(pathStr, classOf[Array[String]]).toSeq
+    }.getOrElse {
+      Option(map.get("path")).toSeq
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 21d3e5e29cfb..08873a3b5a64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -22,23 +22,27 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsBatchRead, SupportsBatchWrite, Table}
+import org.apache.spark.sql.sources.v2.{SupportsBatchRead, SupportsBatchWrite, Table}
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 abstract class FileTable(
     sparkSession: SparkSession,
-    options: DataSourceOptions,
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
     userSpecifiedSchema: Option[StructType])
   extends Table with SupportsBatchRead with SupportsBatchWrite {
+
   lazy val fileIndex: PartitioningAwareFileIndex = {
-    val filePaths = options.paths()
-    val hadoopConf =
-      sparkSession.sessionState.newHadoopConfWithOptions(options.asMap().asScala.toMap)
-    val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(filePaths, hadoopConf,
-      checkEmptyGlobPath = true, checkFilesExist = options.checkFilesExist())
+    val scalaMap = options.asScala.toMap
+    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(scalaMap)
+    // This is an internal config so must be present.
+    val checkFilesExist = options.get("check_files_exist").toBoolean
+    val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(paths, hadoopConf,
+      checkEmptyGlobPath = true, checkFilesExist = checkFilesExist)
     val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
-    new InMemoryFileIndex(sparkSession, rootPathsSpecified,
-      options.asMap().asScala.toMap, userSpecifiedSchema, fileStatusCache)
+    new InMemoryFileIndex(
+      sparkSession, rootPathsSpecified, scalaMap, userSpecifiedSchema, fileStatusCache)
   }
 
   lazy val dataSchema: StructType = userSpecifiedSchema.orElse {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index 75c922424e8e..e16ee4c460f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -33,12 +33,12 @@ import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, DataSource, OutputWriterFactory, WriteJobDescription}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.writer.{BatchWrite, SupportsSaveMode, WriteBuilder}
 import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
 
-abstract class FileWriteBuilder(options: DataSourceOptions)
+abstract class FileWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
   extends WriteBuilder with SupportsSaveMode {
   private var schema: StructType = _
   private var queryId: String = _
@@ -61,18 +61,17 @@ abstract class FileWriteBuilder(options: DataSourceOptions)
 
   override def buildForBatch(): BatchWrite = {
     validateInputs()
-    val pathName = options.paths().head
-    val path = new Path(pathName)
+    val path = new Path(paths.head)
     val sparkSession = SparkSession.active
-    val optionsAsScala = options.asMap().asScala.toMap
+    val optionsAsScala = options.asScala.toMap
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(optionsAsScala)
     val job = getJobInstance(hadoopConf, path)
     val committer = FileCommitProtocol.instantiate(
       sparkSession.sessionState.conf.fileCommitProtocolClass,
       jobId = java.util.UUID.randomUUID().toString,
-      outputPath = pathName)
+      outputPath = paths.head)
     lazy val description =
-      createWriteJobDescription(sparkSession, hadoopConf, job, pathName, optionsAsScala)
+      createWriteJobDescription(sparkSession, hadoopConf, job, paths.head, optionsAsScala)
 
     val fs = path.getFileSystem(hadoopConf)
     mode match {
@@ -127,7 +126,7 @@ abstract class FileWriteBuilder(options: DataSourceOptions)
     assert(schema != null, "Missing input data schema")
     assert(queryId != null, "Missing query ID")
     assert(mode != null, "Missing save mode")
-    assert(options.paths().length == 1)
+    assert(paths.length == 1)
     DataSource.validateSchema(schema)
     schema.foreach { field =>
       if (!supportsDataType(field.dataType)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index d7cb2457433b..51606abdb563 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -31,8 +31,9 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsBatchWrite}
+import org.apache.spark.sql.sources.v2.SupportsBatchWrite
 import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriterFactory, SupportsDynamicOverwrite, SupportsOverwrite, SupportsSaveMode, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{LongAccumulator, Utils}
 
 /**
@@ -53,7 +54,7 @@ case class WriteToDataSourceV2(batchWrite: BatchWrite, query: LogicalPlan)
  */
 case class AppendDataExec(
     table: SupportsBatchWrite,
-    writeOptions: DataSourceOptions,
+    writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
 
   override protected def doExecute(): RDD[InternalRow] = {
@@ -81,7 +82,7 @@ case class AppendDataExec(
 case class OverwriteByExpressionExec(
     table: SupportsBatchWrite,
     deleteWhere: Array[Filter],
-    writeOptions: DataSourceOptions,
+    writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
 
   private def isTruncate(filters: Array[Filter]): Boolean = {
@@ -118,7 +119,7 @@ case class OverwriteByExpressionExec(
  */
 case class OverwritePartitionsDynamicExec(
     table: SupportsBatchWrite,
-    writeOptions: DataSourceOptions,
+    writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
 
   override protected def doExecute(): RDD[InternalRow] = {
@@ -139,12 +140,9 @@ case class OverwritePartitionsDynamicExec(
 
 case class WriteToDataSourceV2Exec(
     batchWrite: BatchWrite,
-    query: SparkPlan
-  ) extends V2TableWriteExec {
+    query: SparkPlan) extends V2TableWriteExec {
 
-  import DataSourceV2Implicits._
-
-  def writeOptions: DataSourceOptions = Map.empty[String, String].toDataSourceOptions
+  def writeOptions: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()
 
   override protected def doExecute(): RDD[InternalRow] = {
     doWrite(batchWrite)
@@ -157,7 +155,7 @@ case class WriteToDataSourceV2Exec(
 trait BatchWriteHelper {
   def table: SupportsBatchWrite
   def query: SparkPlan
-  def writeOptions: DataSourceOptions
+  def writeOptions: CaseInsensitiveStringMap
 
   def newWriteBuilder(): WriteBuilder = {
     table.newWriteBuilder(writeOptions)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
index f279af49ba9c..900c94e937ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql.execution.datasources.v2.orc
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, Table}
+import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class OrcDataSourceV2 extends FileDataSourceV2 {
 
@@ -28,18 +29,20 @@ class OrcDataSourceV2 extends FileDataSourceV2 {
 
   override def shortName(): String = "orc"
 
-  private def getTableName(options: DataSourceOptions): String = {
-    shortName() + ":" + options.paths().mkString(";")
+  private def getTableName(paths: Seq[String]): String = {
+    shortName() + ":" + paths.mkString(";")
   }
 
-  override def getTable(options: DataSourceOptions): Table = {
-    val tableName = getTableName(options)
-    OrcTable(tableName, sparkSession, options, None)
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
+    val paths = getPaths(options)
+    val tableName = getTableName(paths)
+    OrcTable(tableName, sparkSession, options, paths, None)
   }
 
-  override def getTable(options: DataSourceOptions, schema: StructType): Table = {
-    val tableName = getTableName(options)
-    OrcTable(tableName, sparkSession, options, Some(schema))
+  override def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
+    val paths = getPaths(options)
+    val tableName = getTableName(paths)
+    OrcTable(tableName, sparkSession, options, paths, Some(schema))
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index eb27bbd3abea..0b153416b7bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -26,18 +26,17 @@ import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.orc.OrcFilters
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader.Scan
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class OrcScanBuilder(
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
     schema: StructType,
     dataSchema: StructType,
-    options: DataSourceOptions) extends FileScanBuilder(schema) {
-  lazy val hadoopConf =
-    sparkSession.sessionState.newHadoopConfWithOptions(options.asMap().asScala.toMap)
+    options: CaseInsensitiveStringMap) extends FileScanBuilder(schema) {
+  lazy val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options.asScala.toMap)
 
   override def build(): Scan = {
     OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema, readSchema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index 249df8b8622f..aac38fb3fa1f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -21,22 +21,24 @@ import org.apache.hadoop.fs.FileStatus
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources.orc.OrcUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class OrcTable(
     name: String,
     sparkSession: SparkSession,
-    options: DataSourceOptions,
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
     userSpecifiedSchema: Option[StructType])
-  extends FileTable(sparkSession, options, userSpecifiedSchema) {
-  override def newScanBuilder(options: DataSourceOptions): OrcScanBuilder =
+  extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): OrcScanBuilder =
     new OrcScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     OrcUtils.readSchema(sparkSession, files)
 
-  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder =
-    new OrcWriteBuilder(options)
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
+    new OrcWriteBuilder(options, paths)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
index 1aec4d872a64..829ab5fbe176 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
@@ -25,10 +25,12 @@ import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFac
 import org.apache.spark.sql.execution.datasources.orc.{OrcFileFormat, OrcOptions, OrcOutputWriter, OrcUtils}
 import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class OrcWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
+  extends FileWriteBuilder(options, paths) {
 
-class OrcWriteBuilder(options: DataSourceOptions) extends FileWriteBuilder(options) {
   override def prepareWrite(
       sqlConf: SQLConf,
       job: Job,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index bedcb9f8d4e1..fdd80ccaf052 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -95,9 +95,8 @@ class MicroBatchExecution(
           val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
           nextSourceId += 1
           logInfo(s"Reading table [$table] from DataSourceV2 named '$dsName' [$ds]")
-          val dsOptions = new DataSourceOptions(options.asJava)
           // TODO: operator pushdown.
-          val scan = table.newScanBuilder(dsOptions).build()
+          val scan = table.newScanBuilder(options).build()
           val stream = scan.toMicroBatchStream(metadataPath)
           StreamingDataSourceV2Relation(output, scan, stream)
         })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 180a23c765dd..cc441937ce70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -40,10 +40,11 @@ import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite}
+import org.apache.spark.sql.sources.v2.SupportsStreamingWrite
 import org.apache.spark.sql.sources.v2.writer.SupportsTruncate
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
 
 /** States for [[StreamExecution]]'s lifecycle. */
@@ -584,7 +585,7 @@ abstract class StreamExecution(
       table: SupportsStreamingWrite,
       options: Map[String, String],
       inputPlan: LogicalPlan): StreamingWrite = {
-    val writeBuilder = table.newWriteBuilder(new DataSourceOptions(options.asJava))
+    val writeBuilder = table.newWriteBuilder(new CaseInsensitiveStringMap(options.asJava))
       .withQueryId(id.toString)
       .withInputDataSchema(inputPlan.schema)
     outputMode match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 1b7aa548e6d2..0d7e9ba363d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Stati
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.sources.v2.{Table, TableProvider}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object StreamingRelation {
   def apply(dataSource: DataSource): StreamingRelation = {
@@ -95,7 +96,7 @@ case class StreamingRelationV2(
     source: TableProvider,
     sourceName: String,
     table: Table,
-    extraOptions: Map[String, String],
+    extraOptions: CaseInsensitiveStringMap,
     output: Seq[Attribute],
     v1Relation: Option[StreamingRelation])(session: SparkSession)
   extends LeafNode with MultiInstanceRelation {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index 923bd749b29b..dbdfcf808560 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer.{SupportsTruncate, WriteBuilder}
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class ConsoleRelation(override val sqlContext: SQLContext, data: DataFrame)
   extends BaseRelation {
@@ -34,7 +35,7 @@ class ConsoleSinkProvider extends TableProvider
   with DataSourceRegister
   with CreatableRelationProvider {
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     ConsoleTable
   }
 
@@ -62,7 +63,7 @@ object ConsoleTable extends Table with SupportsStreamingWrite {
 
   override def schema(): StructType = StructType(Nil)
 
-  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
       private var inputSchema: StructType = _
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index f55a45d2cee7..c8fb53df5259 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -22,7 +22,6 @@ import java.util.concurrent.TimeUnit
 import java.util.concurrent.atomic.AtomicReference
 import java.util.function.UnaryOperator
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable.{Map => MutableMap}
 
 import org.apache.spark.SparkEnv
@@ -33,7 +32,7 @@ import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming.{StreamingRelationV2, _}
 import org.apache.spark.sql.sources.v2
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsContinuousRead, SupportsStreamingWrite}
+import org.apache.spark.sql.sources.v2.{SupportsContinuousRead, SupportsStreamingWrite}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
@@ -71,9 +70,8 @@ class ContinuousExecution(
           val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
           nextSourceId += 1
           logInfo(s"Reading table [$table] from DataSourceV2 named '$dsName' [$ds]")
-          val dsOptions = new DataSourceOptions(options.asJava)
           // TODO: operator pushdown.
-          val scan = table.newScanBuilder(dsOptions).build()
+          val scan = table.newScanBuilder(options).build()
           val stream = scan.toContinuousStream(metadataPath)
           StreamingDataSourceV2Relation(output, scan, stream)
         })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
index 48ff70f9c9d0..d55f71c7be83 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
@@ -23,17 +23,13 @@ import org.json4s.jackson.Serialization
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.streaming.{RateStreamOffset, ValueRunTimeMsPair}
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
 
 case class RateStreamPartitionOffset(
    partition: Int, currentValue: Long, currentTimeMs: Long) extends PartitionOffset
 
-class RateStreamContinuousStream(
-    rowsPerSecond: Long,
-    numPartitions: Int,
-    options: DataSourceOptions) extends ContinuousStream {
+class RateStreamContinuousStream(rowsPerSecond: Long, numPartitions: Int) extends ContinuousStream {
   implicit val defaultFormats: DefaultFormats = DefaultFormats
 
   val creationTime = System.currentTimeMillis()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
index e7bc71394061..2263b42870a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
@@ -34,9 +34,9 @@ import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.streaming.{Offset => _, _}
 import org.apache.spark.sql.execution.streaming.sources.TextSocketReader
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.RpcUtils
 
 
@@ -49,7 +49,7 @@ import org.apache.spark.util.RpcUtils
  * buckets and serves the messages to the executors via a RPC endpoint.
  */
 class TextSocketContinuousStream(
-    host: String, port: Int, numPartitions: Int, options: DataSourceOptions)
+    host: String, port: Int, numPartitions: Int, options: CaseInsensitiveStringMap)
   extends ContinuousStream with Logging {
 
   implicit val defaultFormats: DefaultFormats = DefaultFormats
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index e71f81caeb97..df7990c6a652 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object MemoryStream {
   protected val currentBlockId = new AtomicInteger(0)
@@ -73,7 +74,7 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Bas
       MemoryStreamTableProvider,
       "memory",
       new MemoryStreamTable(this),
-      Map.empty,
+      CaseInsensitiveStringMap.empty(),
       attributes,
       None)(sqlContext.sparkSession)
   }
@@ -84,7 +85,7 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Bas
 // This class is used to indicate the memory stream data source. We don't actually use it, as
 // memory stream is for test only and we never look it up by name.
 object MemoryStreamTableProvider extends TableProvider {
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     throw new IllegalStateException("MemoryStreamTableProvider should not be used.")
   }
 }
@@ -96,7 +97,7 @@ class MemoryStreamTable(val stream: MemoryStreamBase[_]) extends Table
 
   override def schema(): StructType = stream.fullSchema()
 
-  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
     new MemoryStreamScanBuilder(stream)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
index f2ff30bcf1be..dbe242784986 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql.execution.streaming.sources
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /** Common methods used to create writes for the the console sink */
-class ConsoleWrite(schema: StructType, options: DataSourceOptions)
+class ConsoleWrite(schema: StructType, options: CaseInsensitiveStringMap)
     extends StreamingWrite with Logging {
 
   // Number of rows to display, by default 20 rows
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index c0ae44a128ca..44516bbb2a5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -22,10 +22,11 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.python.PythonForeachWriter
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite, Table}
+import org.apache.spark.sql.sources.v2.{SupportsStreamingWrite, Table}
 import org.apache.spark.sql.sources.v2.writer.{DataWriter, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A write-only table for forwarding data into the specified [[ForeachWriter]].
@@ -44,7 +45,7 @@ case class ForeachWriterTable[T](
 
   override def schema(): StructType = StructType(Nil)
 
-  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
       private var inputSchema: StructType = _
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
index a8feed34b96d..5403eafd54b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{ManualClock, SystemClock}
 
 class RateStreamMicroBatchStream(
@@ -38,7 +38,7 @@ class RateStreamMicroBatchStream(
     // The default values here are used in tests.
     rampUpTimeSeconds: Long = 0,
     numPartitions: Int = 1,
-    options: DataSourceOptions,
+    options: CaseInsensitiveStringMap,
     checkpointLocation: String)
   extends MicroBatchStream with Logging {
   import RateStreamProvider._
@@ -155,7 +155,7 @@ class RateStreamMicroBatchStream(
 
   override def toString: String = s"RateStreamV2[rowsPerSecond=$rowsPerSecond, " +
     s"rampUpTimeSeconds=$rampUpTimeSeconds, " +
-    s"numPartitions=${options.get(NUM_PARTITIONS).orElse("default")}"
+    s"numPartitions=${options.getOrDefault(NUM_PARTITIONS, "default")}"
 }
 
 case class RateStreamMicroBatchInputPartition(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index 3a0082536512..3d8a90e99b85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  *  A source that generates increment long values with timestamps. Each generated row has two
@@ -43,14 +44,14 @@ import org.apache.spark.sql.types._
 class RateStreamProvider extends TableProvider with DataSourceRegister {
   import RateStreamProvider._
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     val rowsPerSecond = options.getLong(ROWS_PER_SECOND, 1)
     if (rowsPerSecond <= 0) {
       throw new IllegalArgumentException(
         s"Invalid value '$rowsPerSecond'. The option 'rowsPerSecond' must be positive")
     }
 
-    val rampUpTimeSeconds = Option(options.get(RAMP_UP_TIME).orElse(null))
+    val rampUpTimeSeconds = Option(options.get(RAMP_UP_TIME))
       .map(JavaUtils.timeStringAsSec)
       .getOrElse(0L)
     if (rampUpTimeSeconds < 0) {
@@ -83,7 +84,7 @@ class RateStreamTable(
 
   override def schema(): StructType = RateStreamProvider.SCHEMA
 
-  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new ScanBuilder {
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
     override def build(): Scan = new Scan {
       override def readSchema(): StructType = RateStreamProvider.SCHEMA
 
@@ -93,7 +94,7 @@ class RateStreamTable(
       }
 
       override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
-        new RateStreamContinuousStream(rowsPerSecond, numPartitions, options)
+        new RateStreamContinuousStream(rowsPerSecond, numPartitions)
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
index 540131c8de8a..9168d46493ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
@@ -29,7 +29,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.streaming.LongOffset
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
 import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.unsafe.types.UTF8String
@@ -39,8 +38,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * and debugging. This MicroBatchReadSupport will *not* work in production applications due to
  * multiple reasons, including no support for fault recovery.
  */
-class TextSocketMicroBatchStream(
-    host: String, port: Int, numPartitions: Int, options: DataSourceOptions)
+class TextSocketMicroBatchStream(host: String, port: Int, numPartitions: Int)
   extends MicroBatchStream with Logging {
 
   @GuardedBy("this")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index 8ac5bfc307aa..0adbf1d9b368 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -30,20 +30,21 @@ import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class TextSocketSourceProvider extends TableProvider with DataSourceRegister with Logging {
 
-  private def checkParameters(params: DataSourceOptions): Unit = {
+  private def checkParameters(params: CaseInsensitiveStringMap): Unit = {
     logWarning("The socket source should not be used for production applications! " +
       "It does not support recovery.")
-    if (!params.get("host").isPresent) {
+    if (!params.containsKey("host")) {
       throw new AnalysisException("Set a host to read from with option(\"host\", ...).")
     }
-    if (!params.get("port").isPresent) {
+    if (!params.containsKey("port")) {
       throw new AnalysisException("Set a port to read from with option(\"port\", ...).")
     }
     Try {
-      params.get("includeTimestamp").orElse("false").toBoolean
+      params.getBoolean("includeTimestamp", false)
     } match {
       case Success(_) =>
       case Failure(_) =>
@@ -51,10 +52,10 @@ class TextSocketSourceProvider extends TableProvider with DataSourceRegister wit
     }
   }
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     checkParameters(options)
     new TextSocketTable(
-      options.get("host").get,
+      options.get("host"),
       options.getInt("port", -1),
       options.getInt("numPartitions", SparkSession.active.sparkContext.defaultParallelism),
       options.getBoolean("includeTimestamp", false))
@@ -77,12 +78,12 @@ class TextSocketTable(host: String, port: Int, numPartitions: Int, includeTimest
     }
   }
 
-  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new ScanBuilder {
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
     override def build(): Scan = new Scan {
       override def readSchema(): StructType = schema()
 
       override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
-        new TextSocketMicroBatchStream(host, port, numPartitions, options)
+        new TextSocketMicroBatchStream(host, port, numPartitions)
       }
 
       override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
index 397c5ff0dcb6..22adceba930f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
@@ -31,10 +31,11 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.execution.streaming.{MemorySinkBase, Sink}
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite}
+import org.apache.spark.sql.sources.v2.SupportsStreamingWrite
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
@@ -46,7 +47,7 @@ class MemorySinkV2 extends SupportsStreamingWrite with MemorySinkBase with Loggi
 
   override def schema(): StructType = StructType(Nil)
 
-  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
       private var needTruncate: Boolean = false
       private var inputSchema: StructType = _
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 96b3a86f5df4..01f29cdeddc2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRel
 import org.apache.spark.sql.sources.StreamSourceProvider
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * Interface used to load a streaming `Dataset` from external storage systems (e.g. file systems,
@@ -175,7 +176,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
           source = provider, conf = sparkSession.sessionState.conf)
         val options = sessionOptions ++ extraOptions
-        val dsOptions = new DataSourceOptions(options.asJava)
+        val dsOptions = new CaseInsensitiveStringMap(options.asJava)
         val table = userSpecifiedSchema match {
           case Some(schema) => provider.getTable(dsOptions, schema)
           case _ => provider.getTable(dsOptions)
@@ -185,7 +186,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
             Dataset.ofRows(
               sparkSession,
               StreamingRelationV2(
-                provider, source, table, options, table.schema.toAttributes, v1Relation)(
+                provider, source, table, dsOptions, table.schema.toAttributes, v1Relation)(
                 sparkSession))
 
           // fallback to v1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 984199488fa7..33d032eb78c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.execution.streaming.sources._
-import org.apache.spark.sql.sources.v2.{DataSourceOptions, SupportsStreamingWrite, TableProvider}
+import org.apache.spark.sql.sources.v2.{SupportsStreamingWrite, TableProvider}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
@@ -313,7 +314,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
           source = provider, conf = df.sparkSession.sessionState.conf)
         val options = sessionOptions ++ extraOptions
-        val dsOptions = new DataSourceOptions(options.asJava)
+        val dsOptions = new CaseInsensitiveStringMap(options.asJava)
         provider.getTable(dsOptions) match {
           case s: SupportsStreamingWrite => s
           case _ => createV1Sink()
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
index 2612b6185fd4..255a9f887878 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
@@ -24,19 +24,19 @@
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
 import org.apache.spark.sql.sources.Filter;
 import org.apache.spark.sql.sources.GreaterThan;
-import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 public class JavaAdvancedDataSourceV2 implements TableProvider {
 
   @Override
-  public Table getTable(DataSourceOptions options) {
+  public Table getTable(CaseInsensitiveStringMap options) {
     return new JavaSimpleBatchTable() {
       @Override
-      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+      public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
         return new AdvancedScanBuilder();
       }
     };
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
index d72ab5338aa8..699859cfaebe 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
@@ -21,11 +21,11 @@
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
-import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
@@ -49,10 +49,10 @@ public PartitionReaderFactory createReaderFactory() {
   }
 
   @Override
-  public Table getTable(DataSourceOptions options) {
+  public Table getTable(CaseInsensitiveStringMap options) {
     return new JavaSimpleBatchTable() {
       @Override
-      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+      public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
         return new MyScanBuilder();
       }
     };
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
index a513bfb26ef1..dfbea927e477 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
@@ -22,13 +22,13 @@
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.sources.v2.reader.partitioning.ClusteredDistribution;
 import org.apache.spark.sql.sources.v2.reader.partitioning.Distribution;
 import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 public class JavaPartitionAwareDataSource implements TableProvider {
 
@@ -54,10 +54,10 @@ public Partitioning outputPartitioning() {
   }
 
   @Override
-  public Table getTable(DataSourceOptions options) {
+  public Table getTable(CaseInsensitiveStringMap options) {
     return new JavaSimpleBatchTable() {
       @Override
-      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+      public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
         return new MyScanBuilder();
       }
     };
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
index bbc8492ec4e1..f3755e18b58d 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
@@ -19,13 +19,13 @@
 
 import java.util.OptionalLong;
 
-import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.InputPartition;
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
 import org.apache.spark.sql.sources.v2.reader.Statistics;
 import org.apache.spark.sql.sources.v2.reader.SupportsReportStatistics;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 public class JavaReportStatisticsDataSource implements TableProvider {
   class MyScanBuilder extends JavaSimpleScanBuilder implements SupportsReportStatistics {
@@ -54,10 +54,10 @@ public InputPartition[] planInputPartitions() {
   }
 
   @Override
-  public Table getTable(DataSourceOptions options) {
+  public Table getTable(CaseInsensitiveStringMap options) {
     return new JavaSimpleBatchTable() {
       @Override
-      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+      public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
         return new MyScanBuilder();
       }
     };
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
index 815d57ba9413..3800a94f8889 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
@@ -17,11 +17,11 @@
 
 package test.org.apache.spark.sql.sources.v2;
 
-import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 public class JavaSchemaRequiredDataSource implements TableProvider {
 
@@ -45,7 +45,7 @@ public InputPartition[] planInputPartitions() {
   }
 
   @Override
-  public Table getTable(DataSourceOptions options, StructType schema) {
+  public Table getTable(CaseInsensitiveStringMap options, StructType schema) {
     return new JavaSimpleBatchTable() {
 
       @Override
@@ -54,14 +54,14 @@ public StructType schema() {
       }
 
       @Override
-      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+      public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
         return new MyScanBuilder(schema);
       }
     };
   }
 
   @Override
-  public Table getTable(DataSourceOptions options) {
+  public Table getTable(CaseInsensitiveStringMap options) {
     throw new IllegalArgumentException("requires a user-supplied schema");
   }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
index 852c4546df88..7474f36c97f7 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
@@ -17,10 +17,10 @@
 
 package test.org.apache.spark.sql.sources.v2;
 
-import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.TableProvider;
 import org.apache.spark.sql.sources.v2.reader.*;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 public class JavaSimpleDataSourceV2 implements TableProvider {
 
@@ -36,10 +36,10 @@ public InputPartition[] planInputPartitions() {
   }
 
   @Override
-  public Table getTable(DataSourceOptions options) {
+  public Table getTable(CaseInsensitiveStringMap options) {
     return new JavaSimpleBatchTable() {
       @Override
-      public ScanBuilder newScanBuilder(DataSourceOptions options) {
+      public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
         return new MyScanBuilder();
       }
     };
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index cccd8e9ee8bd..034454d21d7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -32,7 +32,6 @@ import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsR
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -58,7 +57,7 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
       case PhysicalOperation(_, filters,
         DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
         assert(filters.nonEmpty, "No filter is analyzed from the given query")
-        val scanBuilder = orcTable.newScanBuilder(new DataSourceOptions(options.asJava))
+        val scanBuilder = orcTable.newScanBuilder(options)
         scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
         val pushedFilters = scanBuilder.pushedFilters()
         assert(pushedFilters.nonEmpty, "No filter is pushed down")
@@ -102,7 +101,7 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
       case PhysicalOperation(_, filters,
       DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
         assert(filters.nonEmpty, "No filter is analyzed from the given query")
-        val scanBuilder = orcTable.newScanBuilder(new DataSourceOptions(options.asJava))
+        val scanBuilder = orcTable.newScanBuilder(options)
         scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
         val pushedFilters = scanBuilder.pushedFilters()
         if (noneSupported) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index d0418f893143..c04f6e3f255c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -29,9 +29,9 @@ import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relati
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader.streaming.Offset
 import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ManualClock
 
 class RateStreamProviderSuite extends StreamTest {
@@ -135,7 +135,7 @@ class RateStreamProviderSuite extends StreamTest {
     withTempDir { temp =>
       val stream = new RateStreamMicroBatchStream(
         rowsPerSecond = 100,
-        options = new DataSourceOptions(Map("useManualClock" -> "true").asJava),
+        options = new CaseInsensitiveStringMap(Map("useManualClock" -> "true").asJava),
         checkpointLocation = temp.getCanonicalPath)
       stream.clock.asInstanceOf[ManualClock].advance(100000)
       val startOffset = stream.initialOffset()
@@ -154,7 +154,7 @@ class RateStreamProviderSuite extends StreamTest {
     withTempDir { temp =>
       val stream = new RateStreamMicroBatchStream(
         rowsPerSecond = 20,
-        options = DataSourceOptions.empty(),
+        options = CaseInsensitiveStringMap.empty(),
         checkpointLocation = temp.getCanonicalPath)
       val partitions = stream.planInputPartitions(LongOffset(0L), LongOffset(1L))
       val readerFactory = stream.createReaderFactory()
@@ -173,7 +173,7 @@ class RateStreamProviderSuite extends StreamTest {
       val stream = new RateStreamMicroBatchStream(
         rowsPerSecond = 33,
         numPartitions = 11,
-        options = DataSourceOptions.empty(),
+        options = CaseInsensitiveStringMap.empty(),
         checkpointLocation = temp.getCanonicalPath)
       val partitions = stream.planInputPartitions(LongOffset(0L), LongOffset(1L))
       val readerFactory = stream.createReaderFactory()
@@ -309,8 +309,7 @@ class RateStreamProviderSuite extends StreamTest {
   }
 
   test("continuous data") {
-    val stream = new RateStreamContinuousStream(
-      rowsPerSecond = 20, numPartitions = 2, options = DataSourceOptions.empty())
+    val stream = new RateStreamContinuousStream(rowsPerSecond = 20, numPartitions = 2)
     val partitions = stream.planInputPartitions(stream.initialOffset)
     val readerFactory = stream.createContinuousReaderFactory()
     assert(partitions.size == 2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index e1769fb0b288..a5ba4f9633e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -35,11 +35,11 @@ import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relati
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.DataSourceOptions
 import org.apache.spark.sql.sources.v2.reader.streaming.Offset
 import org.apache.spark.sql.streaming.{StreamingQueryException, StreamTest}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class TextSocketStreamSuite extends StreamTest with SharedSQLContext with BeforeAndAfterEach {
 
@@ -176,13 +176,13 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
   test("params not given") {
     val provider = new TextSocketSourceProvider
     intercept[AnalysisException] {
-      provider.getTable(new DataSourceOptions(Map.empty[String, String].asJava))
+      provider.getTable(CaseInsensitiveStringMap.empty())
     }
     intercept[AnalysisException] {
-      provider.getTable(new DataSourceOptions(Map("host" -> "localhost").asJava))
+      provider.getTable(new CaseInsensitiveStringMap(Map("host" -> "localhost").asJava))
     }
     intercept[AnalysisException] {
-      provider.getTable(new DataSourceOptions(Map("port" -> "1234").asJava))
+      provider.getTable(new CaseInsensitiveStringMap(Map("port" -> "1234").asJava))
     }
   }
 
@@ -190,7 +190,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
     val provider = new TextSocketSourceProvider
     val params = Map("host" -> "localhost", "port" -> "1234", "includeTimestamp" -> "fasle")
     intercept[AnalysisException] {
-      provider.getTable(new DataSourceOptions(params.asJava))
+      provider.getTable(new CaseInsensitiveStringMap(params.asJava))
     }
   }
 
@@ -201,7 +201,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
       StructField("area", StringType) :: Nil)
     val params = Map("host" -> "localhost", "port" -> "1234")
     val exception = intercept[UnsupportedOperationException] {
-      provider.getTable(new DataSourceOptions(params.asJava), userSpecifiedSchema)
+      provider.getTable(new CaseInsensitiveStringMap(params.asJava), userSpecifiedSchema)
     }
     assert(exception.getMessage.contains(
       "socket source does not support user-specified schema"))
@@ -299,7 +299,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
       host = "localhost",
       port = serverThread.port,
       numPartitions = 2,
-      options = DataSourceOptions.empty())
+      options = CaseInsensitiveStringMap.empty())
     val partitions = stream.planInputPartitions(stream.initialOffset())
     assert(partitions.length == 2)
 
@@ -351,7 +351,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
       host = "localhost",
       port = serverThread.port,
       numPartitions = 2,
-      options = DataSourceOptions.empty())
+      options = CaseInsensitiveStringMap.empty())
 
     stream.startOffset = TextSocketOffset(List(5, 5))
     assertThrows[IllegalStateException] {
@@ -367,7 +367,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
       host = "localhost",
       port = serverThread.port,
       numPartitions = 2,
-      options = new DataSourceOptions(Map("includeTimestamp" -> "true").asJava))
+      options = new CaseInsensitiveStringMap(Map("includeTimestamp" -> "true").asJava))
     val partitions = stream.planInputPartitions(stream.initialOffset())
     assert(partitions.size == 2)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index e184bf57fa7d..705559d099be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.partitioning.{ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 class DataSourceV2Suite extends QueryTest with SharedSQLContext {
@@ -349,7 +350,7 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
       val options = df.queryExecution.optimizedPlan.collectFirst {
         case d: DataSourceV2Relation => d.options
       }.get
-      assert(options(optionName) === "false")
+      assert(options.get(optionName) === "false")
     }
   }
 
@@ -437,8 +438,8 @@ class SimpleSinglePartitionSource extends TableProvider {
     }
   }
 
-  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
-    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
       new MyScanBuilder()
     }
   }
@@ -454,8 +455,8 @@ class SimpleDataSourceV2 extends TableProvider {
     }
   }
 
-  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
-    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
       new MyScanBuilder()
     }
   }
@@ -463,8 +464,8 @@ class SimpleDataSourceV2 extends TableProvider {
 
 class AdvancedDataSourceV2 extends TableProvider {
 
-  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
-    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
       new AdvancedScanBuilder()
     }
   }
@@ -559,16 +560,16 @@ class SchemaRequiredDataSource extends TableProvider {
     override def readSchema(): StructType = schema
   }
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     throw new IllegalArgumentException("requires a user-supplied schema")
   }
 
-  override def getTable(options: DataSourceOptions, schema: StructType): Table = {
+  override def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
     val userGivenSchema = schema
     new SimpleBatchTable {
       override def schema(): StructType = userGivenSchema
 
-      override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+      override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
         new MyScanBuilder(userGivenSchema)
       }
     }
@@ -588,8 +589,8 @@ class ColumnarDataSourceV2 extends TableProvider {
     }
   }
 
-  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
-    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
       new MyScanBuilder()
     }
   }
@@ -659,8 +660,8 @@ class PartitionAwareDataSource extends TableProvider {
     override def outputPartitioning(): Partitioning = new MyPartitioning
   }
 
-  override def getTable(options: DataSourceOptions): Table = new SimpleBatchTable {
-    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
       new MyScanBuilder()
     }
   }
@@ -699,7 +700,7 @@ class SchemaReadAttemptException(m: String) extends RuntimeException(m)
 
 class SimpleWriteOnlyDataSource extends SimpleWritableDataSource {
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new MyTable(options) {
       override def schema(): StructType = {
         throw new SchemaReadAttemptException("schema should not be read.")
@@ -725,9 +726,9 @@ class ReportStatisticsDataSource extends TableProvider {
     }
   }
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new SimpleBatchTable {
-      override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+      override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
         new MyScanBuilder
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
index fd19a48497fe..f9f9db35ac2d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
@@ -18,13 +18,14 @@ package org.apache.spark.sql.sources.v2
 
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.execution.datasources.FileFormat
-import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetTest}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class DummyReadOnlyFileDataSourceV2 extends FileDataSourceV2 {
 
@@ -32,7 +33,7 @@ class DummyReadOnlyFileDataSourceV2 extends FileDataSourceV2 {
 
   override def shortName(): String = "parquet"
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new DummyReadOnlyFileTable
   }
 }
@@ -42,7 +43,7 @@ class DummyReadOnlyFileTable extends Table with SupportsBatchRead {
 
   override def schema(): StructType = StructType(Nil)
 
-  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
     throw new AnalysisException("Dummy file reader")
   }
 }
@@ -53,7 +54,7 @@ class DummyWriteOnlyFileDataSourceV2 extends FileDataSourceV2 {
 
   override def shortName(): String = "parquet"
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new DummyWriteOnlyFileTable
   }
 }
@@ -63,7 +64,7 @@ class DummyWriteOnlyFileTable extends Table with SupportsBatchWrite {
 
   override def schema(): StructType = StructType(Nil)
 
-  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder =
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
     throw new AnalysisException("Dummy file writer")
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
index c56a54598cd4..160354520e43 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
@@ -25,12 +25,12 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.SparkContext
-import org.apache.spark.internal.config.SPECULATION_ENABLED
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
 
 /**
@@ -141,22 +141,24 @@ class SimpleWritableDataSource extends TableProvider with SessionConfigSupport {
     }
   }
 
-  class MyTable(options: DataSourceOptions) extends SimpleBatchTable with SupportsBatchWrite {
-    private val path = options.get("path").get()
+  class MyTable(options: CaseInsensitiveStringMap)
+    extends SimpleBatchTable with SupportsBatchWrite {
+
+    private val path = options.get("path")
     private val conf = SparkContext.getActive.get.hadoopConfiguration
 
     override def schema(): StructType = tableSchema
 
-    override def newScanBuilder(options: DataSourceOptions): ScanBuilder = {
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
       new MyScanBuilder(new Path(path).toUri.toString, conf)
     }
 
-    override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+    override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
       new MyWriteBuilder(path)
     }
   }
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new MyTable(options)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 3c2c7004fcd2..13bb686fbd3b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.sources.v2.writer.{WriteBuilder, WriterCommitMessage
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamTest, Trigger}
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
 class FakeDataStream extends MicroBatchStream with ContinuousStream {
@@ -76,19 +77,19 @@ class FakeWriteBuilder extends WriteBuilder with StreamingWrite {
 trait FakeMicroBatchReadTable extends Table with SupportsMicroBatchRead {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
-  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new FakeScanBuilder
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder
 }
 
 trait FakeContinuousReadTable extends Table with SupportsContinuousRead {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
-  override def newScanBuilder(options: DataSourceOptions): ScanBuilder = new FakeScanBuilder
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder
 }
 
 trait FakeStreamingWriteTable extends Table with SupportsStreamingWrite {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
-  override def newWriteBuilder(options: DataSourceOptions): WriteBuilder = {
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new FakeWriteBuilder
   }
 }
@@ -101,7 +102,7 @@ class FakeReadMicroBatchOnly
 
   override def keyPrefix: String = shortName()
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     LastReadOptions.options = options
     new FakeMicroBatchReadTable {}
   }
@@ -115,7 +116,7 @@ class FakeReadContinuousOnly
 
   override def keyPrefix: String = shortName()
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     LastReadOptions.options = options
     new FakeContinuousReadTable {}
   }
@@ -124,7 +125,7 @@ class FakeReadContinuousOnly
 class FakeReadBothModes extends DataSourceRegister with TableProvider {
   override def shortName(): String = "fake-read-microbatch-continuous"
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new Table with FakeMicroBatchReadTable with FakeContinuousReadTable {}
   }
 }
@@ -132,7 +133,7 @@ class FakeReadBothModes extends DataSourceRegister with TableProvider {
 class FakeReadNeitherMode extends DataSourceRegister with TableProvider {
   override def shortName(): String = "fake-read-neither-mode"
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new Table {
       override def name(): String = "fake"
       override def schema(): StructType = StructType(Nil)
@@ -148,7 +149,7 @@ class FakeWriteOnly
 
   override def keyPrefix: String = shortName()
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     LastWriteOptions.options = options
     new Table with FakeStreamingWriteTable {
       override def name(): String = "fake"
@@ -159,7 +160,7 @@ class FakeWriteOnly
 
 class FakeNoWrite extends DataSourceRegister with TableProvider {
   override def shortName(): String = "fake-write-neither-mode"
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new Table {
       override def name(): String = "fake"
       override def schema(): StructType = StructType(Nil)
@@ -186,7 +187,7 @@ class FakeWriteSupportProviderV1Fallback extends DataSourceRegister
 
   override def shortName(): String = "fake-write-v1-fallback"
 
-  override def getTable(options: DataSourceOptions): Table = {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
     new Table with FakeStreamingWriteTable {
       override def name(): String = "fake"
       override def schema(): StructType = StructType(Nil)
@@ -195,7 +196,7 @@ class FakeWriteSupportProviderV1Fallback extends DataSourceRegister
 }
 
 object LastReadOptions {
-  var options: DataSourceOptions = _
+  var options: CaseInsensitiveStringMap = _
 
   def clear(): Unit = {
     options = null
@@ -203,7 +204,7 @@ object LastReadOptions {
 }
 
 object LastWriteOptions {
-  var options: DataSourceOptions = _
+  var options: CaseInsensitiveStringMap = _
 
   def clear(): Unit = {
     options = null
@@ -320,7 +321,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
         testPositiveCaseWithQuery(readSource, writeSource, trigger) { _ =>
           eventually(timeout(streamingTimeout)) {
             // Write options should not be set.
-            assert(LastWriteOptions.options.getBoolean(readOptionName, false) == false)
+            assert(!LastWriteOptions.options.containsKey(readOptionName))
             assert(LastReadOptions.options.getBoolean(readOptionName, false))
           }
         }
@@ -331,7 +332,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
         testPositiveCaseWithQuery(readSource, writeSource, trigger) { _ =>
           eventually(timeout(streamingTimeout)) {
             // Read options should not be set.
-            assert(LastReadOptions.options.getBoolean(writeOptionName, false) == false)
+            assert(!LastReadOptions.options.containsKey(writeOptionName))
             assert(LastWriteOptions.options.getBoolean(writeOptionName, false))
           }
         }
@@ -351,10 +352,10 @@ class StreamingDataSourceV2Suite extends StreamTest {
   for ((read, write, trigger) <- cases) {
     testQuietly(s"stream with read format $read, write format $write, trigger $trigger") {
       val sourceTable = DataSource.lookupDataSource(read, spark.sqlContext.conf).getConstructor()
-        .newInstance().asInstanceOf[TableProvider].getTable(DataSourceOptions.empty())
+        .newInstance().asInstanceOf[TableProvider].getTable(CaseInsensitiveStringMap.empty())
 
       val sinkTable = DataSource.lookupDataSource(write, spark.sqlContext.conf).getConstructor()
-        .newInstance().asInstanceOf[TableProvider].getTable(DataSourceOptions.empty())
+        .newInstance().asInstanceOf[TableProvider].getTable(CaseInsensitiveStringMap.empty())
 
       (sourceTable, sinkTable, trigger) match {
         // Valid microbatch queries.

From 3221bf4cd5199a366aa1041e1a8bf9de9ed2eb89 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 13 Mar 2019 20:27:10 +0000
Subject: [PATCH 0673/1072] [SPARK-27034][SPARK-27123][SQL][FOLLOWUP] Update
 Nested Schema Pruning BM result with EC2

## What changes were proposed in this pull request?

This is a follow up PR for #23943 in order to update the benchmark result with EC2 `r3.xlarge` instance.

## How was this patch tested?

N/A. (Manually compare the diff)

Closes #24078 from dongjoon-hyun/SPARK-27034.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 ...rcNestedSchemaPruningBenchmark-results.txt | 40 +++++++++----------
 ...V2NestedSchemaPruningBenchmark-results.txt | 40 +++++++++----------
 ...etNestedSchemaPruningBenchmark-results.txt | 40 +++++++++----------
 3 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index fdd35cd823a7..078fbbb29e15 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -2,39 +2,39 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    116            151          36          8.6         116.3       1.0X
-Nested column                                       544            604          31          1.8         544.5       0.2X
+Top-level column                                    117            154          23          8.5         117.5       1.0X
+Nested column                                      1271           1295          26          0.8        1270.5       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    360            397          32          2.8         360.4       1.0X
-Nested column                                      3322           3503         166          0.3        3322.4       0.1X
+Top-level column                                    431            488          73          2.3         431.2       1.0X
+Nested column                                      1738           1777          24          0.6        1738.3       0.2X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    292            334          32          3.4         291.8       1.0X
-Nested column                                      3306           3489         123          0.3        3305.7       0.1X
+Top-level column                                    349            381          87          2.9         348.7       1.0X
+Nested column                                      4374           4456         125          0.2        4373.6       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    302            333          27          3.3         302.0       1.0X
-Nested column                                      2697           3347         390          0.4        2697.4       0.1X
+Top-level column                                    358            410          85          2.8         358.5       1.0X
+Nested column                                      4447           4517         125          0.2        4447.3       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    316            440         146          3.2         315.8       1.0X
-Nested column                                      2728           2928         205          0.4        2727.9       0.1X
+Top-level column                                    553            566          13          1.8         552.8       1.0X
+Nested column                                      5115           5248          84          0.2        5115.1       0.1X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index ad43ffb7c369..eaaf3b0abc9c 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -2,39 +2,39 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     91            102           9         11.0          91.2       1.0X
-Nested column                                      1459           1548          80          0.7        1458.5       0.1X
+Top-level column                                    120            148          24          8.3         120.0       1.0X
+Nested column                                      2367           2415          43          0.4        2367.0       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    101            112          10          9.9         100.7       1.0X
-Nested column                                      1459           1619         109          0.7        1458.9       0.1X
+Top-level column                                    129            153          16          7.8         128.5       1.0X
+Nested column                                      2368           2400          32          0.4        2367.7       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    268            284          12          3.7         268.2       1.0X
-Nested column                                      2781           2865          73          0.4        2780.8       0.1X
+Top-level column                                    359            396          59          2.8         358.9       1.0X
+Nested column                                      4100           4147          59          0.2        4099.9       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    309            318           6          3.2         309.2       1.0X
-Nested column                                      2426           2891         253          0.4        2425.8       0.1X
+Top-level column                                    367            394          56          2.7         366.8       1.0X
+Nested column                                      4182           4236          64          0.2        4181.5       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    179            194           8          5.6         179.3       1.0X
-Nested column                                      2084           2277         243          0.5        2083.7       0.1X
+Top-level column                                    262            269           6          3.8         262.5       1.0X
+Nested column                                      2950           3021          65          0.3        2949.8       0.1X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
index d51ebc6d1f08..1f1296eff8d6 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -2,39 +2,39 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     88            114          16         11.4          87.5       1.0X
-Nested column                                       201            223          27          5.0         200.5       0.4X
+Top-level column                                    145            174          23          6.9         145.1       1.0X
+Nested column                                       325            346          19          3.1         324.8       0.4X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    263            315          36          3.8         263.2       1.0X
-Nested column                                      2111           2622         613          0.5        2111.1       0.1X
+Top-level column                                    434            508         108          2.3         434.3       1.0X
+Nested column                                       625            647          23          1.6         624.8       0.7X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    222            250          34          4.5         222.2       1.0X
-Nested column                                      2084           2339         266          0.5        2084.2       0.1X
+Top-level column                                    357            368           9          2.8         356.9       1.0X
+Nested column                                      2897           2976          88          0.3        2897.4       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    238            306          96          4.2         238.1       1.0X
-Nested column                                      2080           2373         218          0.5        2079.5       0.1X
+Top-level column                                    365            413          77          2.7         364.9       1.0X
+Nested column                                      2902           2969          99          0.3        2902.4       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    328            383          57          3.1         327.6       1.0X
-Nested column                                      2595           3136         638          0.4        2595.1       0.1X
+Top-level column                                    555            600          80          1.8         555.4       1.0X
+Nested column                                      3448           3490          45          0.3        3447.9       0.2X
 
 
From 733f2c0b98208815f8408e36ab669d7c07e3767f Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Wed, 13 Mar 2019 15:52:21 -0500
Subject: [PATCH 0674/1072] [MINOR][SQL] Deduplicate huge if statements in get
 between specialized getters

## What changes were proposed in this pull request?

This patch deduplicates the huge if statements regarding getting value between specialized getters.

## How was this patch tested?

Existing UT.

Closes #24016 from HeartSaVioR/MINOR-deduplicate-get-from-specialized-getters.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../expressions/SpecializedGettersReader.java | 89 +++++++++++++++++++
 .../catalyst/expressions/UnsafeArrayData.java | 42 +--------
 .../sql/catalyst/expressions/UnsafeRow.java   | 41 +--------
 .../spark/sql/vectorized/ColumnarArray.java   | 38 +-------
 4 files changed, 93 insertions(+), 117 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java
new file mode 100644
index 000000000000..ea0648a6cb90
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions;
+
+import org.apache.spark.sql.types.*;
+
+public final class SpecializedGettersReader {
+  private SpecializedGettersReader() {}
+
+  public static Object read(
+      SpecializedGetters obj,
+      int ordinal,
+      DataType dataType,
+      boolean handleNull,
+      boolean handleUserDefinedType) {
+    if (handleNull && (obj.isNullAt(ordinal) || dataType instanceof NullType)) {
+      return null;
+    }
+    if (dataType instanceof BooleanType) {
+      return obj.getBoolean(ordinal);
+    }
+    if (dataType instanceof ByteType) {
+      return obj.getByte(ordinal);
+    }
+    if (dataType instanceof ShortType) {
+      return obj.getShort(ordinal);
+    }
+    if (dataType instanceof IntegerType) {
+      return obj.getInt(ordinal);
+    }
+    if (dataType instanceof LongType) {
+      return obj.getLong(ordinal);
+    }
+    if (dataType instanceof FloatType) {
+      return obj.getFloat(ordinal);
+    }
+    if (dataType instanceof DoubleType) {
+      return obj.getDouble(ordinal);
+    }
+    if (dataType instanceof StringType) {
+      return obj.getUTF8String(ordinal);
+    }
+    if (dataType instanceof DecimalType) {
+      DecimalType dt = (DecimalType) dataType;
+      return obj.getDecimal(ordinal, dt.precision(), dt.scale());
+    }
+    if (dataType instanceof DateType) {
+      return obj.getInt(ordinal);
+    }
+    if (dataType instanceof TimestampType) {
+      return obj.getLong(ordinal);
+    }
+    if (dataType instanceof CalendarIntervalType) {
+      return obj.getInterval(ordinal);
+    }
+    if (dataType instanceof BinaryType) {
+      return obj.getBinary(ordinal);
+    }
+    if (dataType instanceof StructType) {
+      return obj.getStruct(ordinal, ((StructType) dataType).size());
+    }
+    if (dataType instanceof ArrayType) {
+      return obj.getArray(ordinal);
+    }
+    if (dataType instanceof MapType) {
+      return obj.getMap(ordinal);
+    }
+    if (handleUserDefinedType && dataType instanceof UserDefinedType) {
+      return obj.get(ordinal, ((UserDefinedType)dataType).sqlType());
+    }
+
+    throw new UnsupportedOperationException("Unsupported data type " + dataType.simpleString());
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index a07d6deaaaa6..04fc98659b64 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -53,7 +53,6 @@
  */
 
 public final class UnsafeArrayData extends ArrayData {
-
   public static int calculateHeaderPortionInBytes(int numFields) {
     return (int)calculateHeaderPortionInBytes((long)numFields);
   }
@@ -137,46 +136,7 @@ public boolean isNullAt(int ordinal) {
 
   @Override
   public Object get(int ordinal, DataType dataType) {
-    if (isNullAt(ordinal) || dataType instanceof NullType) {
-      return null;
-    } else if (dataType instanceof BooleanType) {
-      return getBoolean(ordinal);
-    } else if (dataType instanceof ByteType) {
-      return getByte(ordinal);
-    } else if (dataType instanceof ShortType) {
-      return getShort(ordinal);
-    } else if (dataType instanceof IntegerType) {
-      return getInt(ordinal);
-    } else if (dataType instanceof LongType) {
-      return getLong(ordinal);
-    } else if (dataType instanceof FloatType) {
-      return getFloat(ordinal);
-    } else if (dataType instanceof DoubleType) {
-      return getDouble(ordinal);
-    } else if (dataType instanceof DecimalType) {
-      DecimalType dt = (DecimalType) dataType;
-      return getDecimal(ordinal, dt.precision(), dt.scale());
-    } else if (dataType instanceof DateType) {
-      return getInt(ordinal);
-    } else if (dataType instanceof TimestampType) {
-      return getLong(ordinal);
-    } else if (dataType instanceof BinaryType) {
-      return getBinary(ordinal);
-    } else if (dataType instanceof StringType) {
-      return getUTF8String(ordinal);
-    } else if (dataType instanceof CalendarIntervalType) {
-      return getInterval(ordinal);
-    } else if (dataType instanceof StructType) {
-      return getStruct(ordinal, ((StructType) dataType).size());
-    } else if (dataType instanceof ArrayType) {
-      return getArray(ordinal);
-    } else if (dataType instanceof MapType) {
-      return getMap(ordinal);
-    } else if (dataType instanceof UserDefinedType) {
-      return get(ordinal, ((UserDefinedType)dataType).sqlType());
-    } else {
-      throw new UnsupportedOperationException("Unsupported data type " + dataType.simpleString());
-    }
+    return SpecializedGettersReader.read(this, ordinal, dataType, true, true);
   }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 9bf9452855f5..11561fa7764a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -299,46 +299,7 @@ public void setDecimal(int ordinal, Decimal value, int precision) {
 
   @Override
   public Object get(int ordinal, DataType dataType) {
-    if (isNullAt(ordinal) || dataType instanceof NullType) {
-      return null;
-    } else if (dataType instanceof BooleanType) {
-      return getBoolean(ordinal);
-    } else if (dataType instanceof ByteType) {
-      return getByte(ordinal);
-    } else if (dataType instanceof ShortType) {
-      return getShort(ordinal);
-    } else if (dataType instanceof IntegerType) {
-      return getInt(ordinal);
-    } else if (dataType instanceof LongType) {
-      return getLong(ordinal);
-    } else if (dataType instanceof FloatType) {
-      return getFloat(ordinal);
-    } else if (dataType instanceof DoubleType) {
-      return getDouble(ordinal);
-    } else if (dataType instanceof DecimalType) {
-      DecimalType dt = (DecimalType) dataType;
-      return getDecimal(ordinal, dt.precision(), dt.scale());
-    } else if (dataType instanceof DateType) {
-      return getInt(ordinal);
-    } else if (dataType instanceof TimestampType) {
-      return getLong(ordinal);
-    } else if (dataType instanceof BinaryType) {
-      return getBinary(ordinal);
-    } else if (dataType instanceof StringType) {
-      return getUTF8String(ordinal);
-    } else if (dataType instanceof CalendarIntervalType) {
-      return getInterval(ordinal);
-    } else if (dataType instanceof StructType) {
-      return getStruct(ordinal, ((StructType) dataType).size());
-    } else if (dataType instanceof ArrayType) {
-      return getArray(ordinal);
-    } else if (dataType instanceof MapType) {
-      return getMap(ordinal);
-    } else if (dataType instanceof UserDefinedType) {
-      return get(ordinal, ((UserDefinedType)dataType).sqlType());
-    } else {
-      throw new UnsupportedOperationException("Unsupported data type " + dataType.simpleString());
-    }
+    return SpecializedGettersReader.read(this, ordinal, dataType, true, true);
   }
 
   @Override
diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index 8dc7b11c2e9b..147dd24e16ce 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -17,6 +17,7 @@
 package org.apache.spark.sql.vectorized;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader;
 import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
 import org.apache.spark.sql.catalyst.util.ArrayData;
 import org.apache.spark.sql.catalyst.util.GenericArrayData;
@@ -174,42 +175,7 @@ public ColumnarMap getMap(int ordinal) {
 
   @Override
   public Object get(int ordinal, DataType dataType) {
-    if (dataType instanceof BooleanType) {
-      return getBoolean(ordinal);
-    } else if (dataType instanceof ByteType) {
-      return getByte(ordinal);
-    } else if (dataType instanceof ShortType) {
-      return getShort(ordinal);
-    } else if (dataType instanceof IntegerType) {
-      return getInt(ordinal);
-    } else if (dataType instanceof LongType) {
-      return getLong(ordinal);
-    } else if (dataType instanceof FloatType) {
-      return getFloat(ordinal);
-    } else if (dataType instanceof DoubleType) {
-      return getDouble(ordinal);
-    } else if (dataType instanceof StringType) {
-      return getUTF8String(ordinal);
-    } else if (dataType instanceof BinaryType) {
-      return getBinary(ordinal);
-    } else if (dataType instanceof DecimalType) {
-      DecimalType t = (DecimalType) dataType;
-      return getDecimal(ordinal, t.precision(), t.scale());
-    } else if (dataType instanceof DateType) {
-      return getInt(ordinal);
-    } else if (dataType instanceof TimestampType) {
-      return getLong(ordinal);
-    } else if (dataType instanceof ArrayType) {
-      return getArray(ordinal);
-    } else if (dataType instanceof StructType) {
-      return getStruct(ordinal, ((StructType)dataType).fields().length);
-    } else if (dataType instanceof MapType) {
-      return getMap(ordinal);
-    } else if (dataType instanceof CalendarIntervalType) {
-      return getInterval(ordinal);
-    } else {
-      throw new UnsupportedOperationException("Datatype not supported " + dataType);
-    }
+    return SpecializedGettersReader.read(this, ordinal, dataType, false, false);
   }
 
   @Override

From 250946ff93c31936b9aa5f3a47f81faac2835f4f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 13 Mar 2019 15:01:01 -0700
Subject: [PATCH 0675/1072] [SPARK-27123][SQL][FOLLOWUP] Use isRenaming check
 for limit too.

## What changes were proposed in this pull request?

This is a followup for https://github.com/apache/spark/pull/24049 to reduce the scope of pattern based on the review comments.

## How was this patch tested?

Pass the existing test.

Closes #24082 from dongjoon-hyun/SPARK-27123-2.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 29 +++++++++----------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 1b7ff02f9a99..4babd408403d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -680,8 +680,12 @@ object ColumnPruning extends Rule[LogicalPlan] {
 }
 
 /**
- * Combines two adjacent [[Project]] operators into one and perform alias substitution,
- * merging the expressions into one single expression.
+ * Combines two [[Project]] operators into one and perform alias substitution,
+ * merging the expressions into one single expression for the following cases.
+ * 1. When two [[Project]] operators are adjacent.
+ * 2. When two [[Project]] operators have LocalLimit/Sample/Repartition operator between them
+ *    and the upper project consists of the same number of columns which is equal or aliasing.
+ *    `GlobalLimit(LocalLimit)` pattern is also considered.
  */
 object CollapseProject extends Rule[LogicalPlan] {
 
@@ -699,20 +703,13 @@ object CollapseProject extends Rule[LogicalPlan] {
         agg.copy(aggregateExpressions = buildCleanedProjectList(
           p.projectList, agg.aggregateExpressions))
       }
-    case p1 @ Project(_, g @ GlobalLimit(_, l @ LocalLimit(_, p2: Project))) =>
-      if (haveCommonNonDeterministicOutput(p1.projectList, p2.projectList)) {
-        p1
-      } else {
-        val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList)
-        g.copy(child = l.copy(child = p2.copy(projectList = newProjectList)))
-      }
-    case p1 @ Project(_, l @ LocalLimit(_, p2: Project)) =>
-      if (haveCommonNonDeterministicOutput(p1.projectList, p2.projectList)) {
-        p1
-      } else {
-        val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList)
-        l.copy(child = p2.copy(projectList = newProjectList))
-      }
+    case Project(l1, g @ GlobalLimit(_, limit @ LocalLimit(_, p2 @ Project(l2, _))))
+        if isRenaming(l1, l2) =>
+      val newProjectList = buildCleanedProjectList(l1, l2)
+      g.copy(child = limit.copy(child = p2.copy(projectList = newProjectList)))
+    case Project(l1, limit @ LocalLimit(_, p2 @ Project(l2, _))) if isRenaming(l1, l2) =>
+      val newProjectList = buildCleanedProjectList(l1, l2)
+      limit.copy(child = p2.copy(projectList = newProjectList))
     case Project(l1, r @ Repartition(_, _, p @ Project(l2, _))) if isRenaming(l1, l2) =>
       r.copy(child = p.copy(projectList = buildCleanedProjectList(l1, p.projectList)))
     case Project(l1, s @ Sample(_, _, _, _, p2 @ Project(l2, _))) if isRenaming(l1, l2) =>

From 2d0b7cfe441e6bd6024f977fa0c667e2155dc466 Mon Sep 17 00:00:00 2001
From: Jiaxin Shan <seedjeffwan@gmail.com>
Date: Wed, 13 Mar 2019 15:04:27 -0700
Subject: [PATCH 0676/1072] [SPARK-26742][K8S] Update Kubernetes-Client version
 to 4.1.2

## What changes were proposed in this pull request?
https://github.com/apache/spark/pull/23814 was reverted because of Jenkins integration tests failure. After minikube upgrade, Kubernetes client SDK v1.4.2 work with kubernetes v1.13. We can bring this change back.

Reference:
[Bump Kubernetes Client Version to 4.1.2](https://issues.apache.org/jira/browse/SPARK-26742)
[Original PR against master](https://github.com/apache/spark/pull/23814)
[Kubernetes client upgrade for Spark 2.4](https://github.com/apache/spark/pull/23993)

## How was this patch tested?
(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Unit Tests:
```
All tests passed.
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary for Spark Project Parent POM 3.0.0-SNAPSHOT:
[INFO]
[INFO] Spark Project Parent POM ........................... SUCCESS [  2.343 s]
[INFO] Spark Project Tags ................................. SUCCESS [  2.039 s]
[INFO] Spark Project Sketch ............................... SUCCESS [ 12.714 s]
[INFO] Spark Project Local DB ............................. SUCCESS [  2.185 s]
[INFO] Spark Project Networking ........................... SUCCESS [ 38.154 s]
[INFO] Spark Project Shuffle Streaming Service ............ SUCCESS [  7.989 s]
[INFO] Spark Project Unsafe ............................... SUCCESS [  2.297 s]
[INFO] Spark Project Launcher ............................. SUCCESS [  2.813 s]
[INFO] Spark Project Core ................................. SUCCESS [38:03 min]
[INFO] Spark Project ML Local Library ..................... SUCCESS [  3.848 s]
[INFO] Spark Project GraphX ............................... SUCCESS [ 56.084 s]
[INFO] Spark Project Streaming ............................ SUCCESS [04:58 min]
[INFO] Spark Project Catalyst ............................. SUCCESS [06:39 min]
[INFO] Spark Project SQL .................................. SUCCESS [37:12 min]
[INFO] Spark Project ML Library ........................... SUCCESS [18:59 min]
[INFO] Spark Project Tools ................................ SUCCESS [  0.767 s]
[INFO] Spark Project Hive ................................. SUCCESS [33:45 min]
[INFO] Spark Project REPL ................................. SUCCESS [01:14 min]
[INFO] Spark Project Assembly ............................. SUCCESS [  1.444 s]
[INFO] Spark Integration for Kafka 0.10 ................... SUCCESS [01:12 min]
[INFO] Kafka 0.10+ Token Provider for Streaming ........... SUCCESS [  6.719 s]
[INFO] Kafka 0.10+ Source for Structured Streaming ........ SUCCESS [07:00 min]
[INFO] Spark Project Examples ............................. SUCCESS [ 21.805 s]
[INFO] Spark Integration for Kafka 0.10 Assembly .......... SUCCESS [  0.906 s]
[INFO] Spark Avro ......................................... SUCCESS [ 50.486 s]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  02:32 h
[INFO] Finished at: 2019-03-07T08:39:34Z
[INFO] ------------------------------------------------------------------------

```

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24002 from Jeffwan/update_k8s_sdk_master.

Authored-by: Jiaxin Shan <seedjeffwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 dev/deps/spark-deps-hadoop-2.7                         | 7 ++++---
 dev/deps/spark-deps-hadoop-3.1                         | 7 ++++---
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 232596c5cbac..877abae446f0 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -133,13 +133,14 @@ jta-1.1.jar
 jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-4.0.2.jar
-kubernetes-client-4.1.0.jar
-kubernetes-model-4.1.0.jar
+kubernetes-client-4.1.2.jar
+kubernetes-model-4.1.2.jar
+kubernetes-model-common-4.1.2.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
 libthrift-0.12.0.jar
 log4j-1.2.17.jar
-logging-interceptor-3.9.1.jar
+logging-interceptor-3.12.0.jar
 lz4-java-1.5.0.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index bc77e359cf9e..e35690ec40eb 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -147,13 +147,14 @@ kerby-pkix-1.0.1.jar
 kerby-util-1.0.1.jar
 kerby-xdr-1.0.1.jar
 kryo-shaded-4.0.2.jar
-kubernetes-client-4.1.0.jar
-kubernetes-model-4.1.0.jar
+kubernetes-client-4.1.2.jar
+kubernetes-model-4.1.2.jar
+kubernetes-model-common-4.1.2.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
 libthrift-0.12.0.jar
 log4j-1.2.17.jar
-logging-interceptor-3.9.1.jar
+logging-interceptor-3.12.0.jar
 lz4-java-1.5.0.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 8d594ee8f147..23106cb7ec68 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -29,7 +29,7 @@
   <name>Spark Project Kubernetes</name>
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
-    <kubernetes.client.version>4.1.0</kubernetes.client.version>
+    <kubernetes.client.version>4.1.2</kubernetes.client.version>
   </properties>
 
   <dependencies>
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index f16b536de514..91ef0e23bf25 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -29,7 +29,7 @@
     <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
     <exec-maven-plugin.version>1.4.0</exec-maven-plugin.version>
     <extraScalaTestArgs></extraScalaTestArgs>
-    <kubernetes-client.version>4.1.0</kubernetes-client.version>
+    <kubernetes-client.version>4.1.2</kubernetes-client.version>
     <scala-maven-plugin.version>3.2.2</scala-maven-plugin.version>
     <scalatest-maven-plugin.version>1.0</scalatest-maven-plugin.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>

From bacffb8810434b36fdb6fdd622a0f8c8d99ee5ab Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 14 Mar 2019 10:45:29 +0900
Subject: [PATCH 0677/1072] [SPARK-23264][SQL] Make INTERVAL keyword optional
 in INTERVAL clauses when ANSI mode enabled

## What changes were proposed in this pull request?
This pr updated parsing rules in `SqlBase.g4` to support a SQL query below when ANSI mode enabled;
```
SELECT CAST('2017-08-04' AS DATE) + 1 days;
```
The current master cannot parse it though, other dbms-like systems support the syntax (e.g., hive and mysql). Also, the syntax is frequently used in the official TPC-DS queries.

This pr added new tokens as follows;
```
YEAR | YEARS | MONTH | MONTHS | WEEK | WEEKS | DAY | DAYS | HOUR | HOURS | MINUTE
MINUTES | SECOND | SECONDS | MILLISECOND | MILLISECONDS | MICROSECOND | MICROSECONDS
```
Then, it registered the keywords below as the ANSI reserved (this follows SQL-2011);
```
 DAY | HOUR | MINUTE | MONTH | SECOND | YEAR
```

## How was this patch tested?
Added tests in `SQLQuerySuite`, `ExpressionParserSuite`, and `TableIdentifierParserSuite`.

Closes #20433 from maropu/SPARK-23264.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql-reserved-and-non-reserved-keywords.md |  24 +-
 .../spark/sql/catalyst/parser/SqlBase.g4      |  74 ++-
 .../parser/ExpressionParserSuite.scala        |  91 ++--
 .../parser/TableIdentifierParserSuite.scala   |  34 +-
 .../sql-tests/inputs/ansi/interval.sql        | 188 ++++++++
 .../sql-tests/results/ansi/interval.sql.out   | 439 ++++++++++++++++++
 .../sql-tests/results/literals.sql.out        |   4 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  21 +-
 8 files changed, 827 insertions(+), 48 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out

diff --git a/docs/sql-reserved-and-non-reserved-keywords.md b/docs/sql-reserved-and-non-reserved-keywords.md
index 53eb9988f6c8..b1561fb593f3 100644
--- a/docs/sql-reserved-and-non-reserved-keywords.md
+++ b/docs/sql-reserved-and-non-reserved-keywords.md
@@ -137,7 +137,8 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>DATABASE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DATABASES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DAY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DAY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>DAYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DBPROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DEALLOCATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DEC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -230,7 +231,8 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>HANDLER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>HAVING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>HOLD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>HOUR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>HOUR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>HOURS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>IDENTITY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>IF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>IGNORE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -313,13 +315,19 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>MEMBER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MERGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>METHOD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MICROSECOND</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MICROSECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MILLISECOND</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MILLISECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MINUS</td><td>reserved</td><td>reserved</td><td>non-reserved</td></tr>
-  <tr><td>MINUTE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MINUTE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MINUTES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MOD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MODIFIES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MODULE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MONTH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MONTH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>MONTHS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MSCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MULTISET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NAMES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -448,7 +456,8 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>SCOPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SCROLL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SEARCH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SECOND</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SECOND</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>SECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SECTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SEEK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SELECT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -568,8 +577,11 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>WITH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>WITHIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>WITHOUT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>WEEK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>WEEKS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>WORK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>WRITE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>YEAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>YEAR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>YEARS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ZONE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
 </table>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d11c28c18494..be36aaa538b4 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -655,11 +655,12 @@ booleanValue
     ;
 
 interval
-    : INTERVAL intervalField*
+    : {ansi}? INTERVAL? intervalField+
+    | {!ansi}? INTERVAL intervalField*
     ;
 
 intervalField
-    : value=intervalValue unit=identifier (TO to=identifier)?
+    : value=intervalValue unit=intervalUnit (TO to=intervalUnit)?
     ;
 
 intervalValue
@@ -667,6 +668,27 @@ intervalValue
     | STRING
     ;
 
+intervalUnit
+    : DAY
+    | DAYS
+    | HOUR
+    | HOURS
+    | MICROSECOND
+    | MICROSECONDS
+    | MILLISECOND
+    | MILLISECONDS
+    | MINUTE
+    | MINUTES
+    | MONTH
+    | MONTHS
+    | SECOND
+    | SECONDS
+    | WEEK
+    | WEEKS
+    | YEAR
+    | YEARS
+    ;
+
 colPosition
     : FIRST | AFTER identifier
     ;
@@ -795,6 +817,7 @@ ansiNonReserved
     | DATA
     | DATABASE
     | DATABASES
+    | DAYS
     | DBPROPERTIES
     | DEFINED
     | DELETE
@@ -825,6 +848,7 @@ ansiNonReserved
     | FUNCTIONS
     | GLOBAL
     | GROUPING
+    | HOURS
     | IF
     | IGNORE
     | IMPORT
@@ -851,6 +875,12 @@ ansiNonReserved
     | LOGICAL
     | MACRO
     | MAP
+    | MICROSECOND
+    | MICROSECONDS
+    | MILLISECOND
+    | MILLISECONDS
+    | MINUTES
+    | MONTHS
     | MSCK
     | NO
     | NULLS
@@ -891,6 +921,7 @@ ansiNonReserved
     | ROW
     | ROWS
     | SCHEMA
+    | SECONDS
     | SEPARATED
     | SERDE
     | SERDEPROPERTIES
@@ -924,7 +955,10 @@ ansiNonReserved
     | USE
     | VALUES
     | VIEW
+    | WEEK
+    | WEEKS
     | WINDOW
+    | YEARS
     ;
 
 defaultReserved
@@ -996,6 +1030,8 @@ nonReserved
     | DATA
     | DATABASE
     | DATABASES
+    | DAY
+    | DAYS
     | DBPROPERTIES
     | DEFINED
     | DELETE
@@ -1037,6 +1073,8 @@ nonReserved
     | GROUP
     | GROUPING
     | HAVING
+    | HOUR
+    | HOURS
     | IF
     | IGNORE
     | IMPORT
@@ -1067,6 +1105,14 @@ nonReserved
     | LOGICAL
     | MACRO
     | MAP
+    | MICROSECOND
+    | MICROSECONDS
+    | MILLISECOND
+    | MILLISECONDS
+    | MINUTE
+    | MINUTES
+    | MONTH
+    | MONTHS
     | MSCK
     | NO
     | NOT
@@ -1115,6 +1161,8 @@ nonReserved
     | ROLLUP
     | ROW
     | ROWS
+    | SECOND
+    | SECONDS
     | SELECT
     | SEPARATED
     | SERDE
@@ -1157,10 +1205,14 @@ nonReserved
     | USER
     | VALUES
     | VIEW
+    | WEEK
+    | WEEKS
     | WHEN
     | WHERE
     | WINDOW
     | WITH
+    | YEAR
+    | YEARS
     ;
 
 SELECT: 'SELECT';
@@ -1199,6 +1251,24 @@ ASC: 'ASC';
 DESC: 'DESC';
 FOR: 'FOR';
 INTERVAL: 'INTERVAL';
+YEAR: 'YEAR';
+YEARS: 'YEARS';
+MONTH: 'MONTH';
+MONTHS: 'MONTHS';
+WEEK: 'WEEK';
+WEEKS: 'WEEKS';
+DAY: 'DAY';
+DAYS: 'DAYS';
+HOUR: 'HOUR';
+HOURS: 'HOURS';
+MINUTE: 'MINUTE';
+MINUTES: 'MINUTES';
+SECOND: 'SECOND';
+SECONDS: 'SECONDS';
+MILLISECOND: 'MILLISECOND';
+MILLISECONDS: 'MILLISECONDS';
+MICROSECOND: 'MICROSECOND';
+MICROSECONDS: 'MICROSECONDS';
 CASE: 'CASE';
 WHEN: 'WHEN';
 THEN: 'THEN';
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 7ecad5927dc5..d1d0d38f40d9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -39,7 +39,6 @@ import org.apache.spark.unsafe.types.CalendarInterval
  * CheckAnalysis classes.
  */
 class ExpressionParserSuite extends PlanTest {
-  import CatalystSqlParser._
   import org.apache.spark.sql.catalyst.dsl.expressions._
   import org.apache.spark.sql.catalyst.dsl.plans._
 
@@ -585,49 +584,60 @@ class ExpressionParserSuite extends PlanTest {
     }
   }
 
+  val intervalUnits = Seq(
+    "year",
+    "month",
+    "week",
+    "day",
+    "hour",
+    "minute",
+    "second",
+    "millisecond",
+    "microsecond")
+
+  def intervalLiteral(u: String, s: String): Literal = {
+    Literal(CalendarInterval.fromSingleUnitString(u, s))
+  }
+
   test("intervals") {
-    def intervalLiteral(u: String, s: String): Literal = {
-      Literal(CalendarInterval.fromSingleUnitString(u, s))
+    def checkIntervals(intervalValue: String, expected: Literal): Unit = {
+      assertEqual(s"interval $intervalValue", expected)
+
+      // SPARK-23264 Support interval values without INTERVAL clauses if ANSI SQL enabled
+      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+        assertEqual(intervalValue, expected)
+      }
     }
 
     // Empty interval statement
     intercept("interval", "at least one time unit should be given for interval literal")
 
     // Single Intervals.
-    val units = Seq(
-      "year",
-      "month",
-      "week",
-      "day",
-      "hour",
-      "minute",
-      "second",
-      "millisecond",
-      "microsecond")
     val forms = Seq("", "s")
     val values = Seq("0", "10", "-7", "21")
-    units.foreach { unit =>
+    intervalUnits.foreach { unit =>
       forms.foreach { form =>
          values.foreach { value =>
            val expected = intervalLiteral(unit, value)
-           assertEqual(s"interval $value $unit$form", expected)
-           assertEqual(s"interval '$value' $unit$form", expected)
+           checkIntervals(s"$value $unit$form", expected)
+           checkIntervals(s"'$value' $unit$form", expected)
          }
       }
     }
 
     // Hive nanosecond notation.
-    assertEqual("interval 13.123456789 seconds", intervalLiteral("second", "13.123456789"))
-    assertEqual("interval -13.123456789 second", intervalLiteral("second", "-13.123456789"))
+    checkIntervals("13.123456789 seconds", intervalLiteral("second", "13.123456789"))
+    checkIntervals("-13.123456789 second", intervalLiteral("second", "-13.123456789"))
 
     // Non Existing unit
-    intercept("interval 10 nanoseconds", "No interval can be constructed")
+    intercept("interval 10 nanoseconds",
+      "no viable alternative at input 'interval 10 nanoseconds'")
 
     // Year-Month intervals.
     val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
     yearMonthValues.foreach { value =>
       val result = Literal(CalendarInterval.fromYearMonthString(value))
-      assertEqual(s"interval '$value' year to month", result)
+      checkIntervals(s"'$value' year to month", result)
     }
 
     // Day-Time intervals.
@@ -640,22 +650,51 @@ class ExpressionParserSuite extends PlanTest {
       "1 0:0:1")
     datTimeValues.foreach { value =>
       val result = Literal(CalendarInterval.fromDayTimeString(value))
-      assertEqual(s"interval '$value' day to second", result)
+      checkIntervals(s"'$value' day to second", result)
     }
 
     // Unknown FROM TO intervals
-    intercept("interval 10 month to second", "Intervals FROM month TO second are not supported.")
+    intercept("interval 10 month to second",
+      "Intervals FROM month TO second are not supported.")
 
     // Composed intervals.
-    assertEqual(
-      "interval 3 months 22 seconds 1 millisecond",
+    checkIntervals(
+      "3 months 22 seconds 1 millisecond",
       Literal(new CalendarInterval(3, 22001000L)))
-    assertEqual(
-      "interval 3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second",
+    checkIntervals(
+      "3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second",
       Literal(new CalendarInterval(14,
         22 * CalendarInterval.MICROS_PER_DAY + 2 * CalendarInterval.MICROS_PER_SECOND)))
   }
 
+  test("SPARK-23264 Interval Compatibility tests") {
+    def checkIntervals(intervalValue: String, expected: Literal): Unit = {
+      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+        assertEqual(intervalValue, expected)
+      }
+
+      // Compatibility tests: If ANSI SQL disabled, `intervalValue` should be parsed as an alias
+      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "false") {
+        val aliases = defaultParser.parseExpression(intervalValue).collect {
+          case a @ Alias(_: Literal, name)
+            if intervalUnits.exists { unit => name.startsWith(unit) } => a
+        }
+        assert(aliases.size === 1)
+      }
+    }
+    val forms = Seq("", "s")
+    val values = Seq("5", "1", "-11", "8")
+    intervalUnits.foreach { unit =>
+      forms.foreach { form =>
+         values.foreach { value =>
+           val expected = intervalLiteral(unit, value)
+           checkIntervals(s"$value $unit$form", expected)
+           checkIntervals(s"'$value' $unit$form", expected)
+         }
+      }
+    }
+  }
+
   test("composed expressions") {
     assertEqual("1 + r.r As q", (Literal(1) + UnresolvedAttribute("r.r")).as("q"))
     assertEqual("1 - f('o', o(bar))", Literal(1) - 'f.function("o", 'o.function('bar)))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 2725deb1d209..4dfd8172cb2b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -74,6 +74,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "date",
     "datetime",
     "day",
+    "days",
     "dbproperties",
     "decimal",
     "deferred",
@@ -113,6 +114,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "grouping",
     "hold_ddltime",
     "hour",
+    "hours",
     "idxproperties",
     "ignore",
     "import",
@@ -148,9 +150,15 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "mapjoin",
     "materialized",
     "metadata",
+    "microsecond",
+    "microseconds",
+    "millisecond",
+    "milliseconds",
     "minus",
     "minute",
+    "minutes",
     "month",
+    "months",
     "msck",
     "no_drop",
     "none",
@@ -205,6 +213,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "rows",
     "schemas",
     "second",
+    "seconds",
     "serde",
     "serdeproperties",
     "server",
@@ -253,11 +262,14 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "utctimestamp",
     "values",
     "view",
+    "week",
+    "weeks",
     "while",
     "with",
     "work",
     "write",
-    "year")
+    "year",
+    "years")
 
   val hiveStrictNonReservedKeyword = Seq(
     "anti",
@@ -408,6 +420,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "databases",
     "date",
     "day",
+    "days",
     "dbproperties",
     "deallocate",
     "dec",
@@ -500,6 +513,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "having",
     "hold",
     "hour",
+    "hours",
     "identity",
     "if",
     "ignore",
@@ -582,13 +596,19 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "member",
     "merge",
     "method",
+    "microsecond",
+    "microseconds",
+    "millisecond",
+    "milliseconds",
     "min",
     "minus",
     "minute",
+    "minutes",
     "mod",
     "modifies",
     "module",
     "month",
+    "months",
     "msck",
     "multiset",
     "names",
@@ -716,6 +736,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "scroll",
     "search",
     "second",
+    "seconds",
     "section",
     "seek",
     "select",
@@ -826,6 +847,8 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "varying",
     "versioning",
     "view",
+    "week",
+    "weeks",
     "when",
     "whenever",
     "where",
@@ -838,6 +861,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "work",
     "write",
     "year",
+    "years",
     "zone")
 
   val reservedKeywordsInAnsiMode = Set(
@@ -860,6 +884,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "current_time",
     "current_timestamp",
     "current_user",
+    "day",
     "distinct",
     "else",
     "end",
@@ -873,6 +898,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "grant",
     "group",
     "having",
+    "hour",
     "in",
     "inner",
     "intersect",
@@ -881,6 +907,8 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "is",
     "leading",
     "left",
+    "minute",
+    "month",
     "natural",
     "not",
     "null",
@@ -897,6 +925,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "semi",
     "session_user",
     "minus",
+    "second",
     "some",
     "table",
     "then",
@@ -908,7 +937,8 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "using",
     "when",
     "where",
-    "with")
+    "with",
+    "year")
 
   val nonReservedKeywordsInAnsiMode = allCandidateKeywords -- reservedKeywordsInAnsiMode
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
new file mode 100644
index 000000000000..f2f4b02c8634
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
@@ -0,0 +1,188 @@
+-- Turns on ANSI mode
+SET spark.sql.parser.ansi.enabled=true;
+
+select
+  '1' second,
+  2  seconds,
+  '1' minute,
+  2  minutes,
+  '1' hour,
+  2  hours,
+  '1' day,
+  2  days,
+  '1' month,
+  2  months,
+  '1' year,
+  2  years;
+
+select
+  interval '10-11' year to month,
+  interval '10' year,
+  interval '11' month;
+
+select
+  '10-11' year to month,
+  '10' year,
+  '11' month;
+
+select
+  interval '10 9:8:7.987654321' day to second,
+  interval '10' day,
+  interval '11' hour,
+  interval '12' minute,
+  interval '13' second,
+  interval '13.123456789' second;
+
+select
+  '10 9:8:7.987654321' day to second,
+  '10' day,
+  '11' hour,
+  '12' minute,
+  '13' second,
+  '13.123456789' second;
+
+select map(1, interval 1 day, 2, interval 3 week);
+
+select map(1, 1 day, 2, 3 week);
+
+-- Interval year-month arithmetic
+
+create temporary view interval_arithmetic as
+  select CAST(dateval AS date), CAST(tsval AS timestamp) from values
+    ('2012-01-01', '2012-01-01')
+    as interval_arithmetic(dateval, tsval);
+
+select
+  dateval,
+  dateval - interval '2-2' year to month,
+  dateval - interval '-2-2' year to month,
+  dateval + interval '2-2' year to month,
+  dateval + interval '-2-2' year to month,
+  - interval '2-2' year to month + dateval,
+  interval '2-2' year to month + dateval
+from interval_arithmetic;
+
+select
+  dateval,
+  dateval - '2-2' year to month,
+  dateval - '-2-2' year to month,
+  dateval + '2-2' year to month,
+  dateval + '-2-2' year to month,
+  - '2-2' year to month + dateval,
+  '2-2' year to month + dateval
+from interval_arithmetic;
+
+select
+  tsval,
+  tsval - interval '2-2' year to month,
+  tsval - interval '-2-2' year to month,
+  tsval + interval '2-2' year to month,
+  tsval + interval '-2-2' year to month,
+  - interval '2-2' year to month + tsval,
+  interval '2-2' year to month + tsval
+from interval_arithmetic;
+
+select
+  tsval,
+  tsval - '2-2' year to month,
+  tsval - '-2-2' year to month,
+  tsval + '2-2' year to month,
+  tsval + '-2-2' year to month,
+  - '2-2' year to month + tsval,
+  '2-2' year to month + tsval
+from interval_arithmetic;
+
+select
+  interval '2-2' year to month + interval '3-3' year to month,
+  interval '2-2' year to month - interval '3-3' year to month
+from interval_arithmetic;
+
+select
+  '2-2' year to month + '3-3' year to month,
+  '2-2' year to month - '3-3' year to month
+from interval_arithmetic;
+
+-- Interval day-time arithmetic
+
+select
+  dateval,
+  dateval - interval '99 11:22:33.123456789' day to second,
+  dateval - interval '-99 11:22:33.123456789' day to second,
+  dateval + interval '99 11:22:33.123456789' day to second,
+  dateval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + dateval,
+  interval '99 11:22:33.123456789' day to second + dateval
+from interval_arithmetic;
+
+select
+  dateval,
+  dateval - '99 11:22:33.123456789' day to second,
+  dateval - '-99 11:22:33.123456789' day to second,
+  dateval + '99 11:22:33.123456789' day to second,
+  dateval + '-99 11:22:33.123456789' day to second,
+  - '99 11:22:33.123456789' day to second + dateval,
+  '99 11:22:33.123456789' day to second + dateval
+from interval_arithmetic;
+
+select
+  tsval,
+  tsval - interval '99 11:22:33.123456789' day to second,
+  tsval - interval '-99 11:22:33.123456789' day to second,
+  tsval + interval '99 11:22:33.123456789' day to second,
+  tsval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + tsval,
+  interval '99 11:22:33.123456789' day to second + tsval
+from interval_arithmetic;
+
+select
+  tsval,
+  tsval - '99 11:22:33.123456789' day to second,
+  tsval - '-99 11:22:33.123456789' day to second,
+  tsval + '99 11:22:33.123456789' day to second,
+  tsval + '-99 11:22:33.123456789' day to second,
+  - '99 11:22:33.123456789' day to second + tsval,
+  '99 11:22:33.123456789' day to second + tsval
+from interval_arithmetic;
+
+select
+  interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second,
+  interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second
+from interval_arithmetic;
+
+select
+  '99 11:22:33.123456789' day to second + '10 9:8:7.123456789' day to second,
+  '99 11:22:33.123456789' day to second - '10 9:8:7.123456789' day to second
+from interval_arithmetic;
+
+-- More tests for interval syntax alternatives
+
+select 30 day;
+
+select 30 day day;
+
+select 30 day day day;
+
+select date '2012-01-01' - 30 day;
+
+select date '2012-01-01' - 30 day day;
+
+select date '2012-01-01' - 30 day day day;
+
+select date '2012-01-01' + '-30' day;
+
+select date '2012-01-01' + interval '-30' day;
+
+-- Unsupported syntax for intervals
+
+select date '2012-01-01' + interval (-30) day;
+
+select date '2012-01-01' + (-30) day;
+
+create temporary view t as select * from values (1), (2) as t(a);
+
+select date '2012-01-01' + interval (a + 1) day from t;
+
+select date '2012-01-01' + (a + 1) day from t;
+
+-- Turns off ANSI mode
+SET spark.sql.parser.ansi.enabled=false;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
new file mode 100644
index 000000000000..1f8b5b6ebee7
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -0,0 +1,439 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 35
+
+
+-- !query 0
+SET spark.sql.parser.ansi.enabled=true
+-- !query 0 schema
+struct<key:string,value:string>
+-- !query 0 output
+spark.sql.parser.ansi.enabled	true
+
+
+-- !query 1
+select
+  '1' second,
+  2  seconds,
+  '1' minute,
+  2  minutes,
+  '1' hour,
+  2  hours,
+  '1' day,
+  2  days,
+  '1' month,
+  2  months,
+  '1' year,
+  2  years
+-- !query 1 schema
+struct<interval 1 seconds:calendarinterval,interval 2 seconds:calendarinterval,interval 1 minutes:calendarinterval,interval 2 minutes:calendarinterval,interval 1 hours:calendarinterval,interval 2 hours:calendarinterval,interval 1 days:calendarinterval,interval 2 days:calendarinterval,interval 1 months:calendarinterval,interval 2 months:calendarinterval,interval 1 years:calendarinterval,interval 2 years:calendarinterval>
+-- !query 1 output
+interval 1 seconds	interval 2 seconds	interval 1 minutes	interval 2 minutes	interval 1 hours	interval 2 hours	interval 1 days	interval 2 days	interval 1 months	interval 2 months	interval 1 years	interval 2 years
+
+
+-- !query 2
+select
+  interval '10-11' year to month,
+  interval '10' year,
+  interval '11' month
+-- !query 2 schema
+struct<interval 10 years 11 months:calendarinterval,interval 10 years:calendarinterval,interval 11 months:calendarinterval>
+-- !query 2 output
+interval 10 years 11 months	interval 10 years	interval 11 months
+
+
+-- !query 3
+select
+  '10-11' year to month,
+  '10' year,
+  '11' month
+-- !query 3 schema
+struct<interval 10 years 11 months:calendarinterval,interval 10 years:calendarinterval,interval 11 months:calendarinterval>
+-- !query 3 output
+interval 10 years 11 months	interval 10 years	interval 11 months
+
+
+-- !query 4
+select
+  interval '10 9:8:7.987654321' day to second,
+  interval '10' day,
+  interval '11' hour,
+  interval '12' minute,
+  interval '13' second,
+  interval '13.123456789' second
+-- !query 4 schema
+struct<interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 987 milliseconds 654 microseconds:calendarinterval,interval 1 weeks 3 days:calendarinterval,interval 11 hours:calendarinterval,interval 12 minutes:calendarinterval,interval 13 seconds:calendarinterval,interval 13 seconds 123 milliseconds 456 microseconds:calendarinterval>
+-- !query 4 output
+interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 987 milliseconds 654 microseconds	interval 1 weeks 3 days	interval 11 hours	interval 12 minutes	interval 13 seconds	interval 13 seconds 123 milliseconds 456 microseconds
+
+
+-- !query 5
+select
+  '10 9:8:7.987654321' day to second,
+  '10' day,
+  '11' hour,
+  '12' minute,
+  '13' second,
+  '13.123456789' second
+-- !query 5 schema
+struct<interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 987 milliseconds 654 microseconds:calendarinterval,interval 1 weeks 3 days:calendarinterval,interval 11 hours:calendarinterval,interval 12 minutes:calendarinterval,interval 13 seconds:calendarinterval,interval 13 seconds 123 milliseconds 456 microseconds:calendarinterval>
+-- !query 5 output
+interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 987 milliseconds 654 microseconds	interval 1 weeks 3 days	interval 11 hours	interval 12 minutes	interval 13 seconds	interval 13 seconds 123 milliseconds 456 microseconds
+
+
+-- !query 6
+select map(1, interval 1 day, 2, interval 3 week)
+-- !query 6 schema
+struct<map(1, interval 1 days, 2, interval 3 weeks):map<int,calendarinterval>>
+-- !query 6 output
+{1:interval 1 days,2:interval 3 weeks}
+
+
+-- !query 7
+select map(1, 1 day, 2, 3 week)
+-- !query 7 schema
+struct<map(1, interval 1 days, 2, interval 3 weeks):map<int,calendarinterval>>
+-- !query 7 output
+{1:interval 1 days,2:interval 3 weeks}
+
+
+-- !query 8
+create temporary view interval_arithmetic as
+  select CAST(dateval AS date), CAST(tsval AS timestamp) from values
+    ('2012-01-01', '2012-01-01')
+    as interval_arithmetic(dateval, tsval)
+-- !query 8 schema
+struct<>
+-- !query 8 output
+
+
+
+-- !query 9
+select
+  dateval,
+  dateval - interval '2-2' year to month,
+  dateval - interval '-2-2' year to month,
+  dateval + interval '2-2' year to month,
+  dateval + interval '-2-2' year to month,
+  - interval '2-2' year to month + dateval,
+  interval '2-2' year to month + dateval
+from interval_arithmetic
+-- !query 9 schema
+struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - interval 2 years 2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - interval -2 years -2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 2 years 2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval -2 years -2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- interval 2 years 2 months) AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 2 years 2 months AS DATE):date>
+-- !query 9 output
+2012-01-01	2009-11-01	2014-03-01	2014-03-01	2009-11-01	2009-11-01	2014-03-01
+
+
+-- !query 10
+select
+  dateval,
+  dateval - '2-2' year to month,
+  dateval - '-2-2' year to month,
+  dateval + '2-2' year to month,
+  dateval + '-2-2' year to month,
+  - '2-2' year to month + dateval,
+  '2-2' year to month + dateval
+from interval_arithmetic
+-- !query 10 schema
+struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - interval 2 years 2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - interval -2 years -2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 2 years 2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval -2 years -2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- interval 2 years 2 months) AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 2 years 2 months AS DATE):date>
+-- !query 10 output
+2012-01-01	2009-11-01	2014-03-01	2014-03-01	2009-11-01	2009-11-01	2014-03-01
+
+
+-- !query 11
+select
+  tsval,
+  tsval - interval '2-2' year to month,
+  tsval - interval '-2-2' year to month,
+  tsval + interval '2-2' year to month,
+  tsval + interval '-2-2' year to month,
+  - interval '2-2' year to month + tsval,
+  interval '2-2' year to month + tsval
+from interval_arithmetic
+-- !query 11 schema
+struct<tsval:timestamp,CAST(tsval - interval 2 years 2 months AS TIMESTAMP):timestamp,CAST(tsval - interval -2 years -2 months AS TIMESTAMP):timestamp,CAST(tsval + interval 2 years 2 months AS TIMESTAMP):timestamp,CAST(tsval + interval -2 years -2 months AS TIMESTAMP):timestamp,CAST(tsval + (- interval 2 years 2 months) AS TIMESTAMP):timestamp,CAST(tsval + interval 2 years 2 months AS TIMESTAMP):timestamp>
+-- !query 11 output
+2012-01-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00	2014-03-01 00:00:00	2009-11-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00
+
+
+-- !query 12
+select
+  tsval,
+  tsval - '2-2' year to month,
+  tsval - '-2-2' year to month,
+  tsval + '2-2' year to month,
+  tsval + '-2-2' year to month,
+  - '2-2' year to month + tsval,
+  '2-2' year to month + tsval
+from interval_arithmetic
+-- !query 12 schema
+struct<tsval:timestamp,CAST(tsval - interval 2 years 2 months AS TIMESTAMP):timestamp,CAST(tsval - interval -2 years -2 months AS TIMESTAMP):timestamp,CAST(tsval + interval 2 years 2 months AS TIMESTAMP):timestamp,CAST(tsval + interval -2 years -2 months AS TIMESTAMP):timestamp,CAST(tsval + (- interval 2 years 2 months) AS TIMESTAMP):timestamp,CAST(tsval + interval 2 years 2 months AS TIMESTAMP):timestamp>
+-- !query 12 output
+2012-01-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00	2014-03-01 00:00:00	2009-11-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00
+
+
+-- !query 13
+select
+  interval '2-2' year to month + interval '3-3' year to month,
+  interval '2-2' year to month - interval '3-3' year to month
+from interval_arithmetic
+-- !query 13 schema
+struct<(interval 2 years 2 months + interval 3 years 3 months):calendarinterval,(interval 2 years 2 months - interval 3 years 3 months):calendarinterval>
+-- !query 13 output
+interval 5 years 5 months	interval -1 years -1 months
+
+
+-- !query 14
+select
+  '2-2' year to month + '3-3' year to month,
+  '2-2' year to month - '3-3' year to month
+from interval_arithmetic
+-- !query 14 schema
+struct<(interval 2 years 2 months + interval 3 years 3 months):calendarinterval,(interval 2 years 2 months - interval 3 years 3 months):calendarinterval>
+-- !query 14 output
+interval 5 years 5 months	interval -1 years -1 months
+
+
+-- !query 15
+select
+  dateval,
+  dateval - interval '99 11:22:33.123456789' day to second,
+  dateval - interval '-99 11:22:33.123456789' day to second,
+  dateval + interval '99 11:22:33.123456789' day to second,
+  dateval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + dateval,
+  interval '99 11:22:33.123456789' day to second + dateval
+from interval_arithmetic
+-- !query 15 schema
+struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds) AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date>
+-- !query 15 output
+2012-01-01	2011-09-23	2012-04-09	2012-04-09	2011-09-23	2011-09-23	2012-04-09
+
+
+-- !query 16
+select
+  dateval,
+  dateval - '99 11:22:33.123456789' day to second,
+  dateval - '-99 11:22:33.123456789' day to second,
+  dateval + '99 11:22:33.123456789' day to second,
+  dateval + '-99 11:22:33.123456789' day to second,
+  - '99 11:22:33.123456789' day to second + dateval,
+  '99 11:22:33.123456789' day to second + dateval
+from interval_arithmetic
+-- !query 16 schema
+struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds) AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date>
+-- !query 16 output
+2012-01-01	2011-09-23	2012-04-09	2012-04-09	2011-09-23	2011-09-23	2012-04-09
+
+
+-- !query 17
+select
+  tsval,
+  tsval - interval '99 11:22:33.123456789' day to second,
+  tsval - interval '-99 11:22:33.123456789' day to second,
+  tsval + interval '99 11:22:33.123456789' day to second,
+  tsval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + tsval,
+  interval '99 11:22:33.123456789' day to second + tsval
+from interval_arithmetic
+-- !query 17 schema
+struct<tsval:timestamp,CAST(tsval - interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp,CAST(tsval - interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + (- interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds) AS TIMESTAMP):timestamp,CAST(tsval + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp>
+-- !query 17 output
+2012-01-01 00:00:00	2011-09-23 13:37:26.876544	2012-04-09 12:22:33.123456	2012-04-09 12:22:33.123456	2011-09-23 13:37:26.876544	2011-09-23 13:37:26.876544	2012-04-09 12:22:33.123456
+
+
+-- !query 18
+select
+  tsval,
+  tsval - '99 11:22:33.123456789' day to second,
+  tsval - '-99 11:22:33.123456789' day to second,
+  tsval + '99 11:22:33.123456789' day to second,
+  tsval + '-99 11:22:33.123456789' day to second,
+  - '99 11:22:33.123456789' day to second + tsval,
+  '99 11:22:33.123456789' day to second + tsval
+from interval_arithmetic
+-- !query 18 schema
+struct<tsval:timestamp,CAST(tsval - interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp,CAST(tsval - interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + (- interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds) AS TIMESTAMP):timestamp,CAST(tsval + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp>
+-- !query 18 output
+2012-01-01 00:00:00	2011-09-23 13:37:26.876544	2012-04-09 12:22:33.123456	2012-04-09 12:22:33.123456	2011-09-23 13:37:26.876544	2011-09-23 13:37:26.876544	2012-04-09 12:22:33.123456
+
+
+-- !query 19
+select
+  interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second,
+  interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second
+from interval_arithmetic
+-- !query 19 schema
+struct<(interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds + interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 123 milliseconds 456 microseconds):calendarinterval,(interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds - interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 123 milliseconds 456 microseconds):calendarinterval>
+-- !query 19 output
+interval 15 weeks 4 days 20 hours 30 minutes 40 seconds 246 milliseconds 912 microseconds	interval 12 weeks 5 days 2 hours 14 minutes 26 seconds
+
+
+-- !query 20
+select
+  '99 11:22:33.123456789' day to second + '10 9:8:7.123456789' day to second,
+  '99 11:22:33.123456789' day to second - '10 9:8:7.123456789' day to second
+from interval_arithmetic
+-- !query 20 schema
+struct<(interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds + interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 123 milliseconds 456 microseconds):calendarinterval,(interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds - interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 123 milliseconds 456 microseconds):calendarinterval>
+-- !query 20 output
+interval 15 weeks 4 days 20 hours 30 minutes 40 seconds 246 milliseconds 912 microseconds	interval 12 weeks 5 days 2 hours 14 minutes 26 seconds
+
+
+-- !query 21
+select 30 day
+-- !query 21 schema
+struct<interval 4 weeks 2 days:calendarinterval>
+-- !query 21 output
+interval 4 weeks 2 days
+
+
+-- !query 22
+select 30 day day
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 14)
+
+== SQL ==
+select 30 day day
+--------------^^^
+
+
+-- !query 23
+select 30 day day day
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 14)
+
+== SQL ==
+select 30 day day day
+--------------^^^
+
+
+-- !query 24
+select date '2012-01-01' - 30 day
+-- !query 24 schema
+struct<CAST(CAST(DATE '2012-01-01' AS TIMESTAMP) - interval 4 weeks 2 days AS DATE):date>
+-- !query 24 output
+2011-12-02
+
+
+-- !query 25
+select date '2012-01-01' - 30 day day
+-- !query 25 schema
+struct<>
+-- !query 25 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 34)
+
+== SQL ==
+select date '2012-01-01' - 30 day day
+----------------------------------^^^
+
+
+-- !query 26
+select date '2012-01-01' - 30 day day day
+-- !query 26 schema
+struct<>
+-- !query 26 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 34)
+
+== SQL ==
+select date '2012-01-01' - 30 day day day
+----------------------------------^^^
+
+
+-- !query 27
+select date '2012-01-01' + '-30' day
+-- !query 27 schema
+struct<CAST(CAST(DATE '2012-01-01' AS TIMESTAMP) + interval -4 weeks -2 days AS DATE):date>
+-- !query 27 output
+2011-12-02
+
+
+-- !query 28
+select date '2012-01-01' + interval '-30' day
+-- !query 28 schema
+struct<CAST(CAST(DATE '2012-01-01' AS TIMESTAMP) + interval -4 weeks -2 days AS DATE):date>
+-- !query 28 output
+2011-12-02
+
+
+-- !query 29
+select date '2012-01-01' + interval (-30) day
+-- !query 29 schema
+struct<>
+-- !query 29 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 42)
+
+== SQL ==
+select date '2012-01-01' + interval (-30) day
+------------------------------------------^^^
+
+
+-- !query 30
+select date '2012-01-01' + (-30) day
+-- !query 30 schema
+struct<>
+-- !query 30 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 33)
+
+== SQL ==
+select date '2012-01-01' + (-30) day
+---------------------------------^^^
+
+
+-- !query 31
+create temporary view t as select * from values (1), (2) as t(a)
+-- !query 31 schema
+struct<>
+-- !query 31 output
+
+
+
+-- !query 32
+select date '2012-01-01' + interval (a + 1) day from t
+-- !query 32 schema
+struct<>
+-- !query 32 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 44)
+
+== SQL ==
+select date '2012-01-01' + interval (a + 1) day from t
+--------------------------------------------^^^
+
+
+-- !query 33
+select date '2012-01-01' + (a + 1) day from t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 35)
+
+== SQL ==
+select date '2012-01-01' + (a + 1) day from t
+-----------------------------------^^^
+
+
+-- !query 34
+SET spark.sql.parser.ansi.enabled=false
+-- !query 34 schema
+struct<key:string,value:string>
+-- !query 34 output
+spark.sql.parser.ansi.enabled	false
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index 8d8decbdaca2..c0ce3d79fda5 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -343,11 +343,11 @@ struct<>
 -- !query 36 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-No interval can be constructed(line 1, pos 16)
+no viable alternative at input 'interval 10 nanoseconds'(line 1, pos 19)
 
 == SQL ==
 select interval 10 nanoseconds
-----------------^^^
+-------------------^^^
 
 
 -- !query 37
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 3d374a1784fe..7b856262a801 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import java.io.File
 import java.net.{MalformedURLException, URL}
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
 import java.util.concurrent.atomic.AtomicBoolean
 
 import org.apache.spark.{AccumulatorSuite, SparkException}
@@ -413,32 +413,32 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
 
     checkAnswer(sql(
       "SELECT time FROM timestamps WHERE time='1969-12-31 16:00:00.0'"),
-      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00")))
+      Row(Timestamp.valueOf("1969-12-31 16:00:00")))
 
     checkAnswer(sql(
       "SELECT time FROM timestamps WHERE time=CAST('1969-12-31 16:00:00.001' AS TIMESTAMP)"),
-      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")))
+      Row(Timestamp.valueOf("1969-12-31 16:00:00.001")))
 
     checkAnswer(sql(
       "SELECT time FROM timestamps WHERE time='1969-12-31 16:00:00.001'"),
-      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")))
+      Row(Timestamp.valueOf("1969-12-31 16:00:00.001")))
 
     checkAnswer(sql(
       "SELECT time FROM timestamps WHERE '1969-12-31 16:00:00.001'=time"),
-      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")))
+      Row(Timestamp.valueOf("1969-12-31 16:00:00.001")))
 
     checkAnswer(sql(
       """SELECT time FROM timestamps WHERE time<'1969-12-31 16:00:00.003'
           AND time>'1969-12-31 16:00:00.001'"""),
-      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.002")))
+      Row(Timestamp.valueOf("1969-12-31 16:00:00.002")))
 
     checkAnswer(sql(
       """
         |SELECT time FROM timestamps
         |WHERE time IN ('1969-12-31 16:00:00.001','1969-12-31 16:00:00.002')
       """.stripMargin),
-      Seq(Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")),
-        Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.002"))))
+      Seq(Row(Timestamp.valueOf("1969-12-31 16:00:00.001")),
+        Row(Timestamp.valueOf("1969-12-31 16:00:00.002"))))
 
     checkAnswer(sql(
       "SELECT time FROM timestamps WHERE time='123'"),
@@ -548,7 +548,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
   test("date row") {
     checkAnswer(sql(
       """select cast("2015-01-28" as date) from testData limit 1"""),
-      Row(java.sql.Date.valueOf("2015-01-28"))
+      Row(Date.valueOf("2015-01-28"))
     )
   }
 
@@ -1484,11 +1484,12 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       sql("select interval")
     }
     assert(e1.message.contains("at least one time unit should be given for interval literal"))
+
     // Currently we don't yet support nanosecond
     val e2 = intercept[AnalysisException] {
       sql("select interval 23 nanosecond")
     }
-    assert(e2.message.contains("No interval can be constructed"))
+    assert(e2.message.contains("no viable alternative at input 'interval 23 nanosecond'"))
   }
 
   test("SPARK-8945: add and subtract expressions for interval type") {

From 2a04de52dd7ea12ee6660ac1d385ba09617abf12 Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Thu, 14 Mar 2019 09:16:29 -0500
Subject: [PATCH 0678/1072] [SPARK-26152] Synchronize Worker Cleanup with
 Worker Shutdown

## What changes were proposed in this pull request?

The race between org.apache.spark.deploy.DeployMessages.WorkDirCleanup event and  org.apache.spark.deploy.worker.Worker#onStop. Here its possible that while the WorkDirCleanup event is being processed, org.apache.spark.deploy.worker.Worker#cleanupThreadExecutor was shutdown. hence any submission after ThreadPoolExecutor will result in java.util.concurrent.RejectedExecutionException

## How was this patch tested?

Manually

Closes #24056 from ajithme/workercleanup.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/deploy/worker/Worker.scala   | 68 +++++++++++--------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 4bd7aaa605a6..52892c329a50 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -450,27 +450,32 @@ private[deploy] class Worker(
       // rpcEndpoint.
       // Copy ids so that it can be used in the cleanup thread.
       val appIds = (executors.values.map(_.appId) ++ drivers.values.map(_.driverId)).toSet
-      val cleanupFuture = concurrent.Future {
-        val appDirs = workDir.listFiles()
-        if (appDirs == null) {
-          throw new IOException("ERROR: Failed to list files in " + appDirs)
-        }
-        appDirs.filter { dir =>
-          // the directory is used by an application - check that the application is not running
-          // when cleaning up
-          val appIdFromDir = dir.getName
-          val isAppStillRunning = appIds.contains(appIdFromDir)
-          dir.isDirectory && !isAppStillRunning &&
-          !Utils.doesDirectoryContainAnyNewFiles(dir, APP_DATA_RETENTION_SECONDS)
-        }.foreach { dir =>
-          logInfo(s"Removing directory: ${dir.getPath}")
-          Utils.deleteRecursively(dir)
-        }
-      }(cleanupThreadExecutor)
+      try {
+        val cleanupFuture: concurrent.Future[Unit] = concurrent.Future {
+          val appDirs = workDir.listFiles()
+          if (appDirs == null) {
+            throw new IOException("ERROR: Failed to list files in " + appDirs)
+          }
+          appDirs.filter { dir =>
+            // the directory is used by an application - check that the application is not running
+            // when cleaning up
+            val appIdFromDir = dir.getName
+            val isAppStillRunning = appIds.contains(appIdFromDir)
+            dir.isDirectory && !isAppStillRunning &&
+              !Utils.doesDirectoryContainAnyNewFiles(dir, APP_DATA_RETENTION_SECONDS)
+          }.foreach { dir =>
+            logInfo(s"Removing directory: ${dir.getPath}")
+            Utils.deleteRecursively(dir)
+          }
+        }(cleanupThreadExecutor)
 
-      cleanupFuture.failed.foreach(e =>
-        logError("App dir cleanup failed: " + e.getMessage, e)
-      )(cleanupThreadExecutor)
+        cleanupFuture.failed.foreach(e =>
+          logError("App dir cleanup failed: " + e.getMessage, e)
+        )(cleanupThreadExecutor)
+      } catch {
+        case _: RejectedExecutionException if cleanupThreadExecutor.isShutdown =>
+          logWarning("Failed to cleanup work dir as executor pool was shutdown")
+      }
 
     case MasterChanged(masterRef, masterWebUiUrl) =>
       logInfo("Master has changed, new master is at " + masterRef.address.toSparkURL)
@@ -634,15 +639,20 @@ private[deploy] class Worker(
     val shouldCleanup = finishedApps.contains(id) && !executors.values.exists(_.appId == id)
     if (shouldCleanup) {
       finishedApps -= id
-      appDirectories.remove(id).foreach { dirList =>
-        concurrent.Future {
-          logInfo(s"Cleaning up local directories for application $id")
-          dirList.foreach { dir =>
-            Utils.deleteRecursively(new File(dir))
-          }
-        }(cleanupThreadExecutor).failed.foreach(e =>
-          logError(s"Clean up app dir $dirList failed: ${e.getMessage}", e)
-        )(cleanupThreadExecutor)
+      try {
+        appDirectories.remove(id).foreach { dirList =>
+          concurrent.Future {
+            logInfo(s"Cleaning up local directories for application $id")
+            dirList.foreach { dir =>
+              Utils.deleteRecursively(new File(dir))
+            }
+          }(cleanupThreadExecutor).failed.foreach(e =>
+            logError(s"Clean up app dir $dirList failed: ${e.getMessage}", e)
+          )(cleanupThreadExecutor)
+        }
+      } catch {
+        case _: RejectedExecutionException if cleanupThreadExecutor.isShutdown =>
+          logWarning("Failed to cleanup application as executor pool was shutdown")
       }
       shuffleService.applicationRemoved(id)
     }

From 2fecc4a3fe910163b51b6017406b3f1f05658787 Mon Sep 17 00:00:00 2001
From: DylanGuedes <djmgguedes@gmail.com>
Date: Thu, 14 Mar 2019 09:20:30 -0500
Subject: [PATCH 0679/1072] [SPARK-27138][TESTS][KAFKA] Remove AdminUtils calls
 (fixes deprecation)

## What changes were proposed in this pull request?

To change calls to AdminUtils, currently used to create and delete topics in Kafka tests. With this change, it will rely on adminClient, the recommended way from now on.

## How was this patch tested?
I ran all unit tests and they are fine. Since it is already good tested, I thought that changes in the API wouldn't require new tests, as long as the current tests are working fine.

Closes #24071 from DylanGuedes/spark-27138.

Authored-by: DylanGuedes <djmgguedes@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/sql/kafka010/KafkaTestUtils.scala   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index dacfffa86753..9fa88b454f1b 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -20,20 +20,19 @@ package org.apache.spark.sql.kafka010
 import java.io.{File, IOException}
 import java.lang.{Integer => JInt}
 import java.net.InetSocketAddress
-import java.util.{Map => JMap, Properties, UUID}
+import java.util.{Collections, Map => JMap, Properties, UUID}
 import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 import scala.language.postfixOps
 import scala.util.Random
 
-import kafka.admin.AdminUtils
 import kafka.api.Request
 import kafka.server.{KafkaConfig, KafkaServer}
 import kafka.server.checkpoints.OffsetCheckpointFile
 import kafka.utils.ZkUtils
 import org.apache.kafka.clients.CommonClientConfigs
-import org.apache.kafka.clients.admin.{AdminClient, CreatePartitionsOptions, ListConsumerGroupsResult, NewPartitions}
+import org.apache.kafka.clients.admin.{AdminClient, CreatePartitionsOptions, ListConsumerGroupsResult, NewPartitions, NewTopic}
 import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer._
 import org.apache.kafka.common.TopicPartition
@@ -195,7 +194,8 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
     var created = false
     while (!created) {
       try {
-        AdminUtils.createTopic(zkUtils, topic, partitions, 1)
+        val newTopic = new NewTopic(topic, partitions, 1)
+        adminClient.createTopics(Collections.singleton(newTopic))
         created = true
       } catch {
         // Workaround fact that TopicExistsException is in kafka.common in 0.10.0 and
@@ -222,7 +222,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
   /** Delete a Kafka topic and wait until it is propagated to the whole cluster */
   def deleteTopic(topic: String): Unit = {
     val partitions = zkUtils.getPartitionsForTopics(Seq(topic))(topic).size
-    AdminUtils.deleteTopic(zkUtils, topic)
+    adminClient.deleteTopics(Collections.singleton(topic))
     verifyTopicDeletionWithRetries(zkUtils, topic, partitions, List(this.server))
   }
 
@@ -422,7 +422,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
           // As pushing messages into Kafka updates Zookeeper asynchronously, there is a small
           // chance that a topic will be recreated after deletion due to the asynchronous update.
           // Hence, delete the topic and retry.
-          AdminUtils.deleteTopic(zkUtils, topic)
+          adminClient.deleteTopics(Collections.singleton(topic))
           throw e
       }
     }

From 66c5cd2d9c27c992ad64261919d94f074c391aba Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 14 Mar 2019 11:36:16 -0700
Subject: [PATCH 0680/1072] [SPARK-27151][SQL] ClearCacheCommand extends
 IgnoreCahedData to avoid plan node copys

## What changes were proposed in this pull request?
In SPARK-27011, we introduced `IgnoreCahedData` to avoid plan node copys in `CacheManager`.
Since `ClearCacheCommand` has no argument, it also can extend `IgnoreCahedData`.

## How was this patch tested?
Pass Jenkins.

Closes #24081 from maropu/SPARK-27011-2.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/execution/SparkSqlParser.scala    | 2 +-
 .../apache/spark/sql/execution/command/SetCommand.scala    | 2 +-
 .../org/apache/spark/sql/execution/command/cache.scala     | 7 ++-----
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index c17cf5de9066..a9a5e6ec67e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -297,7 +297,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * Create a [[ClearCacheCommand]] logical plan.
    */
   override def visitClearCache(ctx: ClearCacheContext): LogicalPlan = withOrigin(ctx) {
-    ClearCacheCommand()
+    ClearCacheCommand
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index ca25dc585a0f..45c62b467657 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -162,7 +162,7 @@ object SetCommand {
  *   reset;
  * }}}
  */
-case object ResetCommand extends RunnableCommand with IgnoreCachedData with Logging {
+case object ResetCommand extends RunnableCommand with IgnoreCachedData {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     sparkSession.sessionState.conf.clear()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
index 728604ac5234..7b00769308a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.{Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.storage.StorageLevel
 
@@ -83,13 +83,10 @@ case class UncacheTableCommand(
 /**
  * Clear all cached data from the in-memory cache.
  */
-case class ClearCacheCommand() extends RunnableCommand {
+case object ClearCacheCommand extends RunnableCommand with IgnoreCachedData {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     sparkSession.catalog.clearCache()
     Seq.empty[Row]
   }
-
-  /** [[org.apache.spark.sql.catalyst.trees.TreeNode.makeCopy()]] does not support 0-arg ctor. */
-  override def makeCopy(newArgs: Array[AnyRef]): ClearCacheCommand = ClearCacheCommand()
 }

From da7db9abf61b85860ace714d34ab17810c775e25 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 14 Mar 2019 11:41:40 -0700
Subject: [PATCH 0681/1072] [SPARK-23749][SQL] Replace built-in Hive API
 (isSub/toKryo) and remove OrcProto.Type usage

## What changes were proposed in this pull request?

In order to make the upgrade built-in Hive changes smaller.
This pr workaround the simplest 3 API changes first.

## How was this patch tested?

manual tests

Closes #24018 from wangyum/SPARK-23749.

Lead-authored-by: Yuming Wang <yumwang@ebay.com>
Co-authored-by: Yuming Wang <wgyumg@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../hive/ql/io/orc/SparkOrcNewRecordReader.java  |  8 +++++---
 .../sql/hive/execution/SaveAsHiveFile.scala      |  9 ++++++++-
 .../spark/sql/hive/orc/OrcFileFormat.scala       | 16 +++++++++++++++-
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java b/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java
index f093637d412f..8e9362ab8afb 100644
--- a/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java
+++ b/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java
@@ -24,7 +24,6 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
 import java.io.IOException;
-import java.util.List;
 
 /**
  * This is based on hive-exec-1.2.1
@@ -42,8 +41,11 @@ public class SparkOrcNewRecordReader extends
 
   public SparkOrcNewRecordReader(Reader file, Configuration conf,
       long offset, long length) throws IOException {
-    List<OrcProto.Type> types = file.getTypes();
-    numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount();
+    if (file.getTypes().isEmpty()) {
+      numColumns = 0;
+    } else {
+      numColumns = file.getTypes().get(0).getSubtypesCount();
+    }
     value = new OrcStruct(numColumns);
     this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
         length);
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
index 4ddba500cdf6..73b3f20c2cbe 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
@@ -227,7 +227,7 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
     // SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the
     // staging directory needs to avoid being deleted when users set hive.exec.stagingdir
     // under the table directory.
-    if (FileUtils.isSubDir(new Path(stagingPathName), inputPath, fs) &&
+    if (isSubDir(new Path(stagingPathName), inputPath, fs) &&
       !stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) {
       logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
         "with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
@@ -253,6 +253,13 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
     dir
   }
 
+  // HIVE-14259 removed FileUtils.isSubDir(). Adapted it from Hive 1.2's FileUtils.isSubDir().
+  private def isSubDir(p1: Path, p2: Path, fs: FileSystem): Boolean = {
+    val path1 = fs.makeQualified(p1).toString + Path.SEPARATOR
+    val path2 = fs.makeQualified(p2).toString + Path.SEPARATOR
+    path1.startsWith(path2)
+  }
+
   private def executionId: String = {
     val rand: Random = new Random
     val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS", Locale.US)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index bfb0a95d4e70..9ac3e98f5f0b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -24,10 +24,14 @@ import java.util.Properties
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
+import com.esotericsoftware.kryo.Kryo
+import com.esotericsoftware.kryo.io.Output
+import org.apache.commons.codec.binary.Base64
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.io.orc._
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument
 import org.apache.hadoop.hive.serde2.objectinspector.{SettableStructObjectInspector, StructObjectInspector}
 import org.apache.hadoop.hive.serde2.typeinfo.{StructTypeInfo, TypeInfoUtils}
 import org.apache.hadoop.io.{NullWritable, Writable}
@@ -130,7 +134,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
     if (sparkSession.sessionState.conf.orcFilterPushDown) {
       // Sets pushed predicates
       OrcFilters.createFilter(requiredSchema, filters.toArray).foreach { f =>
-        hadoopConf.set(OrcFileFormat.SARG_PUSHDOWN, f.toKryo)
+        hadoopConf.set(OrcFileFormat.SARG_PUSHDOWN, toKryo(f))
         hadoopConf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, true)
       }
     }
@@ -195,6 +199,16 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
 
     case _ => false
   }
+
+  // HIVE-11253 moved `toKryo` from `SearchArgument` to `storage-api` module.
+  // This is copied from Hive 1.2's SearchArgumentImpl.toKryo().
+  private def toKryo(sarg: SearchArgument): String = {
+    val kryo = new Kryo()
+    val out = new Output(4 * 1024, 10 * 1024 * 1024)
+    kryo.writeObject(out, sarg)
+    out.close()
+    Base64.encodeBase64String(out.toBytes)
+  }
 }
 
 private[orc] class OrcSerializer(dataSchema: StructType, conf: Configuration)

From 8b5224097bf73b0b655ef2e8d08b245a5851d7f9 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Thu, 14 Mar 2019 13:08:41 -0700
Subject: [PATCH 0682/1072] [SPARK-27145][MINOR] Close store in the
 SQLAppStatusListenerSuite after test

## What changes were proposed in this pull request?
We create many stores in the SQLAppStatusListenerSuite, but we need to the close store after test.

## How was this patch tested?
Existing tests

Closes #24079 from shahidki31/SPARK-27145.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../execution/ui/AllExecutionsPageSuite.scala | 18 ++++++++++---
 .../ui/SQLAppStatusListenerSuite.scala        | 26 ++++++++++++++-----
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
index 5a3a923c9c92..95a6af3720d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
@@ -24,6 +24,7 @@ import javax.servlet.http.HttpServletRequest
 import scala.xml.Node
 
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
+import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.scheduler.{JobFailed, SparkListenerJobEnd, SparkListenerJobStart}
 import org.apache.spark.sql.DataFrame
@@ -32,10 +33,19 @@ import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.status.ElementTrackingStore
 import org.apache.spark.util.kvstore.InMemoryStore
 
-class AllExecutionsPageSuite extends SharedSQLContext {
+class AllExecutionsPageSuite extends SharedSQLContext with BeforeAndAfter {
 
   import testImplicits._
 
+  var kvstore: ElementTrackingStore = _
+
+  after {
+    if (kvstore != null) {
+      kvstore.close()
+      kvstore = null
+    }
+  }
+
   test("SPARK-27019: correctly display SQL page when event reordering happens") {
     val statusStore = createStatusStore
     val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS)
@@ -70,9 +80,9 @@ class AllExecutionsPageSuite extends SharedSQLContext {
 
   private def createStatusStore: SQLAppStatusStore = {
     val conf = sparkContext.conf
-    val store = new ElementTrackingStore(new InMemoryStore, conf)
-    val listener = new SQLAppStatusListener(conf, store, live = true)
-    new SQLAppStatusStore(store, Some(listener))
+    kvstore = new ElementTrackingStore(new InMemoryStore, conf)
+    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
+    new SQLAppStatusStore(kvstore, Some(listener))
   }
 
   private def createTestDataFrame: DataFrame = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index f19bf5fac2f3..d845117b58c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -22,6 +22,7 @@ import java.util.Properties
 import scala.collection.mutable.ListBuffer
 
 import org.json4s.jackson.JsonMethods._
+import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
 import org.apache.spark.LocalSparkContext._
@@ -43,13 +44,24 @@ import org.apache.spark.util.{AccumulatorMetadata, JsonProtocol, LongAccumulator
 import org.apache.spark.util.kvstore.InMemoryStore
 
 
-class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTestUtils {
+class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTestUtils
+  with BeforeAndAfter {
+
   import testImplicits._
 
   override protected def sparkConf = {
     super.sparkConf.set(LIVE_ENTITY_UPDATE_PERIOD, 0L).set(ASYNC_TRACKING_ENABLED, false)
   }
 
+  private var kvstore: ElementTrackingStore = _
+
+  after {
+    if (kvstore != null) {
+      kvstore.close()
+      kvstore = null
+    }
+  }
+
   private def createTestDataFrame: DataFrame = {
     Seq(
       (1, 1),
@@ -126,9 +138,9 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with
 
   private def createStatusStore(): SQLAppStatusStore = {
     val conf = sparkContext.conf
-    val store = new ElementTrackingStore(new InMemoryStore, conf)
-    val listener = new SQLAppStatusListener(conf, store, live = true)
-    new SQLAppStatusStore(store, Some(listener))
+    kvstore = new ElementTrackingStore(new InMemoryStore, conf)
+    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
+    new SQLAppStatusStore(kvstore, Some(listener))
   }
 
   test("basic") {
@@ -548,9 +560,9 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with
 
   test("eviction should respect execution completion time") {
     val conf = sparkContext.conf.clone().set(UI_RETAINED_EXECUTIONS.key, "2")
-    val store = new ElementTrackingStore(new InMemoryStore, conf)
-    val listener = new SQLAppStatusListener(conf, store, live = true)
-    val statusStore = new SQLAppStatusStore(store, Some(listener))
+    kvstore = new ElementTrackingStore(new InMemoryStore, conf)
+    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
+    val statusStore = new SQLAppStatusStore(kvstore, Some(listener))
 
     var time = 0
     val df = createTestDataFrame

From f0b6245ea4cef0dc61d4277f1d6874ccf315e47a Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 15 Mar 2019 08:20:42 +0900
Subject: [PATCH 0683/1072] [SPARK-27158][BUILD] dev/mima and dev/scalastyle
 support dynamic profiles

## What changes were proposed in this pull request?

`dev/mima` and `dev/scalastyle` support dynamic reading profiles from `modules.py`.

## How was this patch tested?

manual tests

Closes #24089 from wangyum/SPARK-27158.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/lint-scala     |  2 +-
 dev/mima           |  2 +-
 dev/run-tests.py   | 14 ++++++++++----
 dev/sbt-checkstyle |  4 ++--
 dev/scalastyle     | 12 ++++--------
 5 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/dev/lint-scala b/dev/lint-scala
index c676dfdf4f44..9c701ab463fe 100755
--- a/dev/lint-scala
+++ b/dev/lint-scala
@@ -20,4 +20,4 @@
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
 
-"$SCRIPT_DIR/scalastyle"
+"$SCRIPT_DIR/scalastyle" "$1"
diff --git a/dev/mima b/dev/mima
index dc7b08569849..f324c5c00a45 100755
--- a/dev/mima
+++ b/dev/mima
@@ -24,7 +24,7 @@ set -e
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
-SPARK_PROFILES="-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
+SPARK_PROFILES=${1:-"-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"}
 TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)"
 OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
 
diff --git a/dev/run-tests.py b/dev/run-tests.py
index aa106aff9e4e..535c8775fd5b 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -175,9 +175,11 @@ def run_apache_rat_checks():
     run_cmd([os.path.join(SPARK_HOME, "dev", "check-license")])
 
 
-def run_scala_style_checks():
+def run_scala_style_checks(build_profiles):
     set_title_and_block("Running Scala style checks", "BLOCK_SCALA_STYLE")
-    run_cmd([os.path.join(SPARK_HOME, "dev", "lint-scala")])
+    profiles = " ".join(build_profiles)
+    print("[info] Checking Scala style using SBT with these profiles: ", profiles)
+    run_cmd([os.path.join(SPARK_HOME, "dev", "lint-scala"), profiles])
 
 
 def run_java_style_checks(build_profiles):
@@ -359,7 +361,10 @@ def build_apache_spark(build_tool, hadoop_version):
 def detect_binary_inop_with_mima(hadoop_version):
     build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
     set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA")
-    run_cmd([os.path.join(SPARK_HOME, "dev", "mima")] + build_profiles)
+    profiles = " ".join(build_profiles)
+    print("[info] Detecting binary incompatibilities with MiMa using SBT with these profiles: ",
+          profiles)
+    run_cmd([os.path.join(SPARK_HOME, "dev", "mima"), profiles])
 
 
 def run_scala_tests_maven(test_profiles):
@@ -582,7 +587,8 @@ def main():
     if not changed_files or any(f.endswith(".scala")
                                 or f.endswith("scalastyle-config.xml")
                                 for f in changed_files):
-        run_scala_style_checks()
+        build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+        run_scala_style_checks(build_profiles)
     should_run_java_style_checks = False
     if not changed_files or any(f.endswith(".java")
                                 or f.endswith("checkstyle.xml")
diff --git a/dev/sbt-checkstyle b/dev/sbt-checkstyle
index 1ecad59125c8..415155fd1bb1 100755
--- a/dev/sbt-checkstyle
+++ b/dev/sbt-checkstyle
@@ -17,12 +17,12 @@
 # limitations under the License.
 #
 
-profiles=${1:-"-Pkinesis-asl -Pmesos -Pkubernetes -Pyarn -Phive -Phive-thriftserver"}
+SPARK_PROFILES=${1:-"-Pkinesis-asl -Pmesos -Pkubernetes -Pyarn -Phive -Phive-thriftserver"}
 
 # NOTE: echo "q" is needed because SBT prompts the user for input on encountering a build file
 # with failure (either resolution or compilation); the "q" makes SBT quit.
 ERRORS=$(echo -e "q\n" \
-    | build/sbt ${profiles} checkstyle test:checkstyle \
+    | build/sbt ${SPARK_PROFILES} checkstyle test:checkstyle \
     | awk '{if($1~/error/)print}' \
 )
 
diff --git a/dev/scalastyle b/dev/scalastyle
index d5cebb3943a9..212ef900eb9b 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -17,19 +17,15 @@
 # limitations under the License.
 #
 
+SPARK_PROFILES=${1:-"-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"}
+
 # NOTE: echo "q" is needed because SBT prompts the user for input on encountering a build file
 # with failure (either resolution or compilation); the "q" makes SBT quit.
 ERRORS=$(echo -e "q\n" \
     | build/sbt \
-        -Pkinesis-asl \
-        -Pmesos \
-        -Pkubernetes \
-        -Pyarn \
-        -Phive \
-        -Phive-thriftserver \
-        -Pspark-ganglia-lgpl \
+        ${SPARK_PROFILES} \
         -Pdocker-integration-tests \
-	-Pkubernetes-integration-tests \
+        -Pkubernetes-integration-tests \
         scalastyle test:scalastyle \
     | awk '{if($1~/error/)print}' \
 )

From 21db4336b08fcb93779d72ebbb0251f3a2d08934 Mon Sep 17 00:00:00 2001
From: fitermay <fiterman@gmail.com>
Date: Thu, 14 Mar 2019 20:13:18 -0500
Subject: [PATCH 0684/1072] [SPARK-27070] Fix performance bug in
 DefaultPartitionCoalescer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When trying to coalesce a UnionRDD of two large FileScanRDDs
(each with a few million partitions) into around 8k partitions
the driver can stall for over an hour.

Profiler shows that over 90% of the time is spent in TimSort
which is invoked by `pickBin`.  This patch replaces sorting with a more
efficient `min` for the purpose of finding the least occupied
PartitionGroup

Closes #23986 from fitermay/SPARK-27070.

Authored-by: fitermay <fiterman@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../CoalescedRDDBenchmark-results.txt         | 40 ++++++++++
 .../org/apache/spark/rdd/CoalescedRDD.scala   | 34 ++++----
 .../spark/rdd/CoalescedRDDBenchmark.scala     | 80 +++++++++++++++++++
 3 files changed, 137 insertions(+), 17 deletions(-)
 create mode 100644 core/benchmarks/CoalescedRDDBenchmark-results.txt
 create mode 100644 core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala

diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
new file mode 100644
index 000000000000..dd63b0adea4f
--- /dev/null
+++ b/core/benchmarks/CoalescedRDDBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Coalesced RDD , large scale
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Windows 10 10.0
+Intel64 Family 6 Model 63 Stepping 2, GenuineIntel
+Coalesced RDD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Coalesce Num Partitions: 100 Num Hosts: 1            346            364          24          0.3        3458.9       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5            258            264           6          0.4        2579.0       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 10            242            249           7          0.4        2415.2       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 20            237            242           7          0.4        2371.7       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 40            230            231           1          0.4        2299.8       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 80            222            233          14          0.4        2223.0       1.6X
+Coalesce Num Partitions: 500 Num Hosts: 1            659            665           5          0.2        6590.4       0.5X
+Coalesce Num Partitions: 500 Num Hosts: 5            340            381          47          0.3        3395.2       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10            279            307          47          0.4        2788.3       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 20            259            261           2          0.4        2591.9       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 40            241            250          15          0.4        2406.5       1.4X
+Coalesce Num Partitions: 500 Num Hosts: 80            235            237           3          0.4        2349.9       1.5X
+Coalesce Num Partitions: 1000 Num Hosts: 1           1050           1053           4          0.1       10503.2       0.3X
+Coalesce Num Partitions: 1000 Num Hosts: 5            405            407           2          0.2        4049.5       0.9X
+Coalesce Num Partitions: 1000 Num Hosts: 10            320            322           2          0.3        3202.7       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 20            276            277           0          0.4        2762.3       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 40            257            260           5          0.4        2571.2       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 80            245            252          13          0.4        2448.9       1.4X
+Coalesce Num Partitions: 5000 Num Hosts: 1           3099           3145          55          0.0       30988.6       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5           1037           1050          20          0.1       10374.4       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 10            626            633           8          0.2        6261.8       0.6X
+Coalesce Num Partitions: 5000 Num Hosts: 20            426            431           5          0.2        4258.6       0.8X
+Coalesce Num Partitions: 5000 Num Hosts: 40            328            341          22          0.3        3275.4       1.1X
+Coalesce Num Partitions: 5000 Num Hosts: 80            272            275           4          0.4        2721.4       1.3X
+Coalesce Num Partitions: 10000 Num Hosts: 1           5516           5526           9          0.0       55156.8       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 5           1956           1992          48          0.1       19560.9       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1045           1057          18          0.1       10447.4       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 20            637            658          24          0.2        6373.2       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 40            431            448          15          0.2        4312.9       0.8X
+Coalesce Num Partitions: 10000 Num Hosts: 80            326            328           2          0.3        3263.4       1.1X
+
+
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index 94e7d0b38cba..e006f635fdae 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -58,7 +58,7 @@ private[spark] case class CoalescedRDDPartition(
       val parentPreferredLocations = rdd.context.getPreferredLocs(rdd, p.index).map(_.host)
       preferredLocation.exists(parentPreferredLocations.contains)
     }
-    if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble)
+    if (parents.isEmpty) 0.0 else loc.toDouble / parents.size.toDouble
   }
 }
 
@@ -91,7 +91,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
     pc.coalesce(maxPartitions, prev).zipWithIndex.map {
       case (pg, i) =>
         val ids = pg.partitions.map(_.index).toArray
-        new CoalescedRDDPartition(i, prev, ids, pg.prefLoc)
+        CoalescedRDDPartition(i, prev, ids, pg.prefLoc)
     }
   }
 
@@ -116,7 +116,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
   /**
    * Returns the preferred machine for the partition. If split is of type CoalescedRDDPartition,
    * then the preferred machine will be one which most parent splits prefer too.
-   * @param partition
+   * @param partition the partition for which to retrieve the preferred machine, if exists
    * @return the machine most preferred by split
    */
   override def getPreferredLocations(partition: Partition): Seq[String] = {
@@ -156,9 +156,11 @@ private[spark] class CoalescedRDD[T: ClassTag](
 
 private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
   extends PartitionCoalescer {
-  def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.numPartitions < o2.numPartitions
-  def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean =
-    if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get)
+
+  implicit val partitionGroupOrdering: Ordering[PartitionGroup] =
+    (o1: PartitionGroup, o2: PartitionGroup) =>
+      java.lang.Integer.compare(o1.numPartitions, o2.numPartitions)
+
 
   val rnd = new scala.util.Random(7919) // keep this class deterministic
 
@@ -178,7 +180,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
     prev.context.getPreferredLocs(prev, part.index).map(tl => tl.host)
   }
 
-  class PartitionLocations(prev: RDD[_]) {
+  private class PartitionLocations(prev: RDD[_]) {
 
     // contains all the partitions from the previous RDD that don't have preferred locations
     val partsWithoutLocs = ArrayBuffer[Partition]()
@@ -213,15 +215,14 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
   }
 
   /**
-   * Sorts and gets the least element of the list associated with key in groupHash
+   * Gets the least element of the list associated with key in groupHash
    * The returned PartitionGroup is the least loaded of all groups that represent the machine "key"
    *
    * @param key string representing a partitioned group on preferred machine key
    * @return Option of [[PartitionGroup]] that has least elements for key
    */
-  def getLeastGroupHash(key: String): Option[PartitionGroup] = {
-    groupHash.get(key).map(_.sortWith(compare).head)
-  }
+  def getLeastGroupHash(key: String): Option[PartitionGroup] =
+    groupHash.get(key).filter(_.nonEmpty).map(_.min)
 
   def addPartToPGroup(part: Partition, pgroup: PartitionGroup): Boolean = {
     if (!initialHash.contains(part)) {
@@ -236,12 +237,12 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
    * is assigned a preferredLocation. This uses coupon collector to estimate how many
    * preferredLocations it must rotate through until it has seen most of the preferred
    * locations (2 * n log(n))
-   * @param targetLen
+   * @param targetLen The number of desired partition groups
    */
   def setupGroups(targetLen: Int, partitionLocs: PartitionLocations) {
     // deal with empty case, just create targetLen partition groups with no preferred location
     if (partitionLocs.partsWithLocs.isEmpty) {
-      (1 to targetLen).foreach(x => groupArr += new PartitionGroup())
+      (1 to targetLen).foreach(_ => groupArr += new PartitionGroup())
       return
     }
 
@@ -297,9 +298,8 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
       partitionLocs: PartitionLocations): PartitionGroup = {
     val slack = (balanceSlack * prev.partitions.length).toInt
     // least loaded pref locs
-    val pref = currPrefLocs(p, prev).map(getLeastGroupHash(_)).sortWith(compare)
-    val prefPart = if (pref == Nil) None else pref.head
-
+    val pref = currPrefLocs(p, prev).flatMap(getLeastGroupHash)
+    val prefPart = if (pref.isEmpty) None else Some(pref.min)
     val r1 = rnd.nextInt(groupArr.size)
     val r2 = rnd.nextInt(groupArr.size)
     val minPowerOfTwo = {
@@ -351,7 +351,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
       val partIter = partitionLocs.partsWithLocs.iterator
       groupArr.filter(pg => pg.numPartitions == 0).foreach { pg =>
         while (partIter.hasNext && pg.numPartitions == 0) {
-          var (nxt_replica, nxt_part) = partIter.next()
+          var (_, nxt_part) = partIter.next()
           if (!initialHash.contains(nxt_part)) {
             pg.partitions += nxt_part
             initialHash += nxt_part
diff --git a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
new file mode 100644
index 000000000000..42b30707f262
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import scala.collection.immutable
+
+import org.apache.spark.SparkContext
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+/**
+ * Benchmark for CoalescedRDD.
+ * Measures rdd.coalesce performance under various combinations of
+ * coalesced partitions and preferred hosts
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>
+ *   2. build/sbt "core/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      Results will be written to "benchmarks/CoalescedRDD-results.txt".
+ * }}}
+ * */
+object CoalescedRDDBenchmark extends BenchmarkBase {
+  val seed = 0x1337
+  val sc = new SparkContext(master = "local[4]", appName = "test")
+
+  private def coalescedRDD(numIters: Int): Unit = {
+    val numBlocks = 100000
+    val benchmark = new Benchmark("Coalesced RDD", numBlocks, output = output)
+    for (numPartitions <- Seq(100, 500, 1000, 5000, 10000)) {
+      for (numHosts <- Seq(1, 5, 10, 20, 40, 80)) {
+
+        import collection.mutable
+        val hosts = mutable.ArrayBuffer[String]()
+        (1 to numHosts).foreach(hosts += "m" + _)
+        hosts.length
+        val rnd = scala.util.Random
+        rnd.setSeed(seed)
+        val blocks: immutable.Seq[(Int, Seq[String])] = (1 to numBlocks).map { i =>
+          (i, hosts(rnd.nextInt(hosts.size)) :: Nil)
+        }
+
+        benchmark.addCase(
+          s"Coalesce Num Partitions: $numPartitions Num Hosts: $numHosts",
+          numIters) { _ =>
+          performCoalesce(blocks, numPartitions)
+        }
+      }
+    }
+
+    benchmark.run()
+  }
+
+  private def performCoalesce(blocks: immutable.Seq[(Int, Seq[String])], numPartitions: Int) {
+    sc.makeRDD(blocks).coalesce(numPartitions).partitions
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val numIters = 3
+    runBenchmark("Coalesced RDD , large scale") {
+      coalescedRDD(numIters)
+    }
+  }
+}

From 8819eaba4db9a20c60a423fe08bee82cb8595ad0 Mon Sep 17 00:00:00 2001
From: Dave DeCaprio <daved@alum.mit.edu>
Date: Fri, 15 Mar 2019 10:13:34 +0800
Subject: [PATCH 0685/1072] [SPARK-26917][SQL] Further reduce locks in
 CacheManager

## What changes were proposed in this pull request?

Further load increases in our production environment have shown that even the read locks can cause some contention, since they contain a mechanism that turns a read lock into an exclusive lock if a writer has been starved out.  This PR reduces the potential for lock contention even further than https://github.com/apache/spark/pull/23833.  Additionally, it uses more idiomatic scala than the previous implementation.

cloud-fan & gatorsmile This is a relatively minor improvement to the previous CacheManager changes.  At this point, I think we finally are doing the minimum possible amount of locking.

## How was this patch tested?

Has been tested on a live system where the blocking was causing major issues and it is working well.
CacheManager has no explicit unit test but is used in many places internally as part of the SharedState.

Closes #24028 from DaveDeCaprio/read-locks-master.

Lead-authored-by: Dave DeCaprio <daved@alum.mit.edu>
Co-authored-by: David DeCaprio <daved@alum.mit.edu>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/CacheManager.scala    | 93 ++++++-------------
 1 file changed, 28 insertions(+), 65 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 2815a288be3e..5a11a8f76a2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -19,8 +19,7 @@ package org.apache.spark.sql.execution
 
 import java.util.concurrent.locks.ReentrantReadWriteLock
 
-import scala.collection.JavaConverters._
-import scala.collection.mutable
+import scala.collection.immutable.IndexedSeq
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 
@@ -46,38 +45,22 @@ case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation)
  */
 class CacheManager extends Logging {
 
-  @transient
-  private val cachedData = new java.util.LinkedList[CachedData]
-
-  @transient
-  private val cacheLock = new ReentrantReadWriteLock
-
-  /** Acquires a read lock on the cache for the duration of `f`. */
-  private def readLock[A](f: => A): A = {
-    val lock = cacheLock.readLock()
-    lock.lock()
-    try f finally {
-      lock.unlock()
-    }
-  }
-
-  /** Acquires a write lock on the cache for the duration of `f`. */
-  private def writeLock[A](f: => A): A = {
-    val lock = cacheLock.writeLock()
-    lock.lock()
-    try f finally {
-      lock.unlock()
-    }
-  }
+  /**
+   * Maintains the list of cached plans as an immutable sequence.  Any updates to the list
+   * should be protected in a "this.synchronized" block which includes the reading of the
+   * existing value and the update of the cachedData var.
+   */
+  @transient @volatile
+  private var cachedData = IndexedSeq[CachedData]()
 
   /** Clears all cached tables. */
-  def clearCache(): Unit = writeLock {
-    cachedData.asScala.foreach(_.cachedRepresentation.cacheBuilder.clearCache())
-    cachedData.clear()
+  def clearCache(): Unit = this.synchronized {
+    cachedData.foreach(_.cachedRepresentation.cacheBuilder.clearCache())
+    cachedData = IndexedSeq[CachedData]()
   }
 
   /** Checks if the cache is empty. */
-  def isEmpty: Boolean = readLock {
+  def isEmpty: Boolean = {
     cachedData.isEmpty
   }
 
@@ -101,11 +84,11 @@ class CacheManager extends Logging {
         sparkSession.sessionState.executePlan(planToCache).executedPlan,
         tableName,
         planToCache)
-      writeLock {
+      this.synchronized {
         if (lookupCachedData(planToCache).nonEmpty) {
           logWarning("Data has already been cached.")
         } else {
-          cachedData.add(CachedData(planToCache, inMemoryRelation))
+          cachedData = CachedData(planToCache, inMemoryRelation) +: cachedData
         }
       }
     }
@@ -144,22 +127,12 @@ class CacheManager extends Logging {
       } else {
         _.sameResult(plan)
       }
-    val plansToUncache = mutable.Buffer[CachedData]()
-    readLock {
-      val it = cachedData.iterator()
-      while (it.hasNext) {
-        val cd = it.next()
-        if (shouldRemove(cd.plan)) {
-          plansToUncache += cd
-        }
-      }
-    }
-    plansToUncache.foreach { cd =>
-      writeLock {
-        cachedData.remove(cd)
-      }
-      cd.cachedRepresentation.cacheBuilder.clearCache(blocking)
+    val plansToUncache = cachedData.filter(cd => shouldRemove(cd.plan))
+    this.synchronized {
+      cachedData = cachedData.filterNot(cd => plansToUncache.exists(_ eq cd))
     }
+    plansToUncache.foreach { _.cachedRepresentation.cacheBuilder.clearCache(blocking) }
+
     // Re-compile dependent cached queries after removing the cached query.
     if (!cascade) {
       recacheByCondition(spark, cd => {
@@ -194,46 +167,36 @@ class CacheManager extends Logging {
   private def recacheByCondition(
       spark: SparkSession,
       condition: CachedData => Boolean): Unit = {
-    val needToRecache = scala.collection.mutable.ArrayBuffer.empty[CachedData]
-    readLock {
-      val it = cachedData.iterator()
-      while (it.hasNext) {
-        val cd = it.next()
-        if (condition(cd)) {
-          needToRecache += cd
-        }
-      }
+    val needToRecache = cachedData.filter(condition)
+    this.synchronized {
+      // Remove the cache entry before creating a new ones.
+      cachedData = cachedData.filterNot(cd => needToRecache.exists(_ eq cd))
     }
     needToRecache.map { cd =>
-      writeLock {
-        // Remove the cache entry before we create a new one, so that we can have a different
-        // physical plan.
-        cachedData.remove(cd)
-      }
       cd.cachedRepresentation.cacheBuilder.clearCache()
       val plan = spark.sessionState.executePlan(cd.plan).executedPlan
       val newCache = InMemoryRelation(
         cacheBuilder = cd.cachedRepresentation.cacheBuilder.copy(cachedPlan = plan),
         logicalPlan = cd.plan)
       val recomputedPlan = cd.copy(cachedRepresentation = newCache)
-      writeLock {
+      this.synchronized {
         if (lookupCachedData(recomputedPlan.plan).nonEmpty) {
           logWarning("While recaching, data was already added to cache.")
         } else {
-          cachedData.add(recomputedPlan)
+          cachedData = recomputedPlan +: cachedData
         }
       }
     }
   }
 
   /** Optionally returns cached data for the given [[Dataset]] */
-  def lookupCachedData(query: Dataset[_]): Option[CachedData] = readLock {
+  def lookupCachedData(query: Dataset[_]): Option[CachedData] = {
     lookupCachedData(query.logicalPlan)
   }
 
   /** Optionally returns cached data for the given [[LogicalPlan]]. */
-  def lookupCachedData(plan: LogicalPlan): Option[CachedData] = readLock {
-    cachedData.asScala.find(cd => plan.sameResult(cd.plan))
+  def lookupCachedData(plan: LogicalPlan): Option[CachedData] = {
+    cachedData.find(cd => plan.sameResult(cd.plan))
   }
 
   /** Replaces segments of the given logical plan with cached versions where possible. */

From 6d22ee3969597ed5e38333d5b6f0f891a859594e Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 15 Mar 2019 10:19:26 +0800
Subject: [PATCH 0686/1072] [SPARK-27136][SQL] Remove data source option
 check_files_exist

## What changes were proposed in this pull request?

The data source option check_files_exist is introduced in In #23383 when the file source V2 framework is implemented. In the PR, FileIndex was created as a member of FileTable, so that we could implement partition pruning like 0f9fcab in the future. At that time `FileIndex`es will always be created for file writes, so we needed the option to decide whether to check file existence.

After https://github.com/apache/spark/pull/23774, the option is not needed anymore, since Dataframe writes won't create unnecessary FileIndex. This PR is to remove the option.

## How was this patch tested?

Unit test.

Closes #24069 from gengliangwang/removeOptionCheckFilesExist.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/scala/org/apache/spark/sql/DataFrameReader.scala    | 5 ++---
 .../main/scala/org/apache/spark/sql/DataFrameWriter.scala    | 5 ++---
 .../spark/sql/execution/datasources/v2/FileTable.scala       | 4 +---
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 2cc937034651..dfba12a5856e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -213,9 +213,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         val objectMapper = new ObjectMapper()
         Some("paths" -> objectMapper.writeValueAsString(paths.toArray))
       }
-      // TODO SPARK-27113: remove this option.
-      val checkFilesExistsOpt = "check_files_exist" -> "true"
-      val finalOptions = sessionOptions ++ extraOptions.toMap ++ pathsOption + checkFilesExistsOpt
+
+      val finalOptions = sessionOptions ++ extraOptions.toMap ++ pathsOption
       val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
       val table = userSpecifiedSchema match {
         case Some(schema) => provider.getTable(dsOptions, schema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e58225e0f58a..3c51edd8ab60 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -261,10 +261,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         provider, session.sessionState.conf)
-      // TODO SPARK-27113: remove this option.
-      val checkFilesExistsOption = "check_files_exist" -> "false"
-      val options = sessionOptions ++ extraOptions + checkFilesExistsOption
+      val options = sessionOptions ++ extraOptions
       val dsOptions = new CaseInsensitiveStringMap(options.asJava)
+
       provider.getTable(dsOptions) match {
         case table: SupportsBatchWrite =>
           lazy val relation = DataSourceV2Relation.create(table, dsOptions)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 08873a3b5a64..21fb6fda98a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -36,10 +36,8 @@ abstract class FileTable(
   lazy val fileIndex: PartitioningAwareFileIndex = {
     val scalaMap = options.asScala.toMap
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(scalaMap)
-    // This is an internal config so must be present.
-    val checkFilesExist = options.get("check_files_exist").toBoolean
     val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(paths, hadoopConf,
-      checkEmptyGlobPath = true, checkFilesExist = checkFilesExist)
+      checkEmptyGlobPath = true, checkFilesExist = true)
     val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
     new InMemoryFileIndex(
       sparkSession, rootPathsSpecified, scalaMap, userSpecifiedSchema, fileStatusCache)

From ce89d09bdf9e290359e369e85472786ac6c6a181 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@pigscanfly.ca>
Date: Thu, 14 Mar 2019 19:39:43 -0700
Subject: [PATCH 0687/1072] [SPARK-26343][K8S] Try to speed up running local
 k8s integration tests

Speed up running k8s integration tests locally by allowing folks to skip the tgz dist build and extraction

Run tests locally without a distribution of Spark, just a local build

Closes #23380 from holdenk/SPARK-26343-Speed-up-running-the-kubernetes-integration-tests-locally.

Authored-by: Holden Karau <holden@pigscanfly.ca>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scripts/setup-integration-test-env.sh     | 36 ++++++++++++-------
 .../k8s/integrationtest/KubernetesSuite.scala | 12 +++++--
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
index 36e30d7b2cff..84c42cb19ef5 100755
--- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
+++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -16,6 +16,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+set -ex
 TEST_ROOT_DIR=$(git rev-parse --show-toplevel)
 UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked"
 IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt"
@@ -58,50 +59,59 @@ while (( "$#" )); do
   shift
 done
 
-if [[ $SPARK_TGZ == "N/A" ]];
+rm -rf "$UNPACKED_SPARK_TGZ"
+if [[ $SPARK_TGZ == "N/A" && $IMAGE_TAG == "N/A" ]];
 then
-  echo "Must specify a Spark tarball to build Docker images against with --spark-tgz." && exit 1;
+  # If there is no spark image tag to test with and no src dir, build from current
+  SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+  SPARK_INPUT_DIR="$(cd "$SCRIPT_DIR/"../../../../  >/dev/null 2>&1 && pwd )"
+  DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/resource-managers/kubernetes/docker/src/main/dockerfiles/spark"
+elif [[ $IMAGE_TAG == "N/A" ]];
+then
+  # If there is a test src tarball and no image tag we will want to build from that
+  mkdir -p $UNPACKED_SPARK_TGZ
+  tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ;
+  SPARK_INPUT_DIR="$UNPACKED_SPARK_TGZ"
+  DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/kubernetes/dockerfiles/spark"
 fi
 
-rm -rf $UNPACKED_SPARK_TGZ
-mkdir -p $UNPACKED_SPARK_TGZ
-tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ;
 
+# If there is a specific Spark image skip building and extraction/copy
 if [[ $IMAGE_TAG == "N/A" ]];
 then
   IMAGE_TAG=$(uuidgen);
-  cd $UNPACKED_SPARK_TGZ
+  cd $SPARK_INPUT_DIR
 
   # Build PySpark image
-  LANGUAGE_BINDING_BUILD_ARGS="-p $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/python/Dockerfile"
+  LANGUAGE_BINDING_BUILD_ARGS="-p $DOCKER_FILE_BASE_PATH/bindings/python/Dockerfile"
 
-  # Build SparkR image
-  LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/R/Dockerfile"
+  # Build SparkR image -- disabled since this fails, re-enable as part of SPARK-25152
+  # LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $DOCKER_FILE_BASE_PATH/bindings/R/Dockerfile"
 
   case $DEPLOY_MODE in
     cloud)
       # Build images
-      $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
+      $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
 
       # Push images appropriately
       if [[ $IMAGE_REPO == gcr.io* ]] ;
       then
         gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG
       else
-        $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push
+        $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push
       fi
       ;;
 
     docker-for-desktop)
        # Only need to build as this will place it in our local Docker repo which is all
        # we need for Docker for Desktop to work so no need to also push
-       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
+       $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
        ;;
 
     minikube)
        # Only need to build and if we do this with the -m option for minikube we will
        # build the images directly using the minikube Docker daemon so no need to push
-       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
+       $SPARK_INPUT_DIR/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
        ;;
     *)
        echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index f8f4b4177f3b..16b8714f10ac 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -103,8 +103,16 @@ class KubernetesSuite extends SparkFunSuite
       System.clearProperty(key)
     }
 
-    val sparkDirProp = System.getProperty(CONFIG_KEY_UNPACK_DIR)
-    require(sparkDirProp != null, "Spark home directory must be provided in system properties.")
+    val possible_spark_dirs = List(
+      // If someone specified the tgz for the tests look at the extraction dir
+      System.getProperty(CONFIG_KEY_UNPACK_DIR),
+      // Try the spark test home
+      sys.props("spark.test.home")
+    )
+    val sparkDirProp = possible_spark_dirs.filter(x =>
+      new File(Paths.get(x).toFile, "bin/spark-submit").exists).headOption.getOrElse(null)
+    require(sparkDirProp != null,
+      s"Spark home directory must be provided in system properties tested $possible_spark_dirs")
     sparkHomeDir = Paths.get(sparkDirProp)
     require(sparkHomeDir.toFile.isDirectory,
       s"No directory found for spark home specified at $sparkHomeDir.")

From f26a1f3d3766207595af6cb26d62d54218f5ac1d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 14 Mar 2019 20:14:31 -0700
Subject: [PATCH 0688/1072] [SPARK-27165][SPARK-27107][BUILD][SQL] Upgrade
 Apache ORC to 1.5.5

## What changes were proposed in this pull request?

This PR aims to update Apache ORC dependency to fix [SPARK-27107](https://issues.apache.org/jira/browse/SPARK-27107) .
```
[ORC-452] Support converting MAP column from JSON to ORC Improvement
[ORC-447] Change the docker scripts to keep a persistent m2 cache
[ORC-463] Add `version` command
[ORC-475] ORC reader should lazily get filesystem
[ORC-476] Make SearchAgument kryo buffer size configurable
```

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #24096 from dongjoon-hyun/SPARK-27165.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 6 +++---
 dev/deps/spark-deps-hadoop-3.1 | 6 +++---
 pom.xml                        | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 877abae446f0..cd4eed6e2937 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -156,9 +156,9 @@ objenesis-2.5.1.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar
-orc-core-1.5.4-nohive.jar
-orc-mapreduce-1.5.4-nohive.jar
-orc-shims-1.5.4.jar
+orc-core-1.5.5-nohive.jar
+orc-mapreduce-1.5.5-nohive.jar
+orc-shims-1.5.5.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.1.jar
 paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index e35690ec40eb..1f95c67d7a6c 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -173,9 +173,9 @@ okhttp-2.7.5.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar
-orc-core-1.5.4-nohive.jar
-orc-mapreduce-1.5.4-nohive.jar
-orc-shims-1.5.4.jar
+orc-core-1.5.5-nohive.jar
+orc-mapreduce-1.5.5-nohive.jar
+orc-shims-1.5.5.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.1.jar
 paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index 405e43fd91ea..e68d470e55d6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -134,7 +134,7 @@
     <kafka.version>2.1.1</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
-    <orc.version>1.5.4</orc.version>
+    <orc.version>1.5.5</orc.version>
     <orc.classifier>nohive</orc.classifier>
     <hive.parquet.group>com.twitter</hive.parquet.group>
     <hive.parquet.version>1.6.0</hive.parquet.version>

From 74d2f04183a876406ca24c59b53f02eafd993ce5 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 14 Mar 2019 20:27:55 -0700
Subject: [PATCH 0689/1072] [SPARK-27166][SQL] Improve `printSchema` to print
 up to the given level

## What changes were proposed in this pull request?

This PR aims to improve `printSchema` to be able to print up to the given level of the schema.

```scala
scala> val df = Seq((1,(2,(3,4)))).toDF
df: org.apache.spark.sql.DataFrame = [_1: int, _2: struct<_1: int, _2: struct<_1: int, _2: int>>]

scala> df.printSchema
root
|-- _1: integer (nullable = false)
|-- _2: struct (nullable = true)
| |-- _1: integer (nullable = false)
| |-- _2: struct (nullable = true)
| | |-- _1: integer (nullable = false)
| | |-- _2: integer (nullable = false)

scala> df.printSchema(1)
root
|-- _1: integer (nullable = false)
|-- _2: struct (nullable = true)

scala> df.printSchema(2)
root
|-- _1: integer (nullable = false)
|-- _2: struct (nullable = true)
| |-- _1: integer (nullable = false)
| |-- _2: struct (nullable = true)

scala> df.printSchema(3)
root
|-- _1: integer (nullable = false)
|-- _2: struct (nullable = true)
| |-- _1: integer (nullable = false)
| |-- _2: struct (nullable = true)
| | |-- _1: integer (nullable = false)
| | |-- _2: integer (nullable = false)
```

## How was this patch tested?

Pass the Jenkins with the newly added test case.

Closes #24098 from dongjoon-hyun/SPARK-27166.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/types/StructType.scala      | 10 ++++++++--
 .../org/apache/spark/sql/types/StructTypeSuite.scala | 12 ++++++++++++
 .../main/scala/org/apache/spark/sql/Dataset.scala    | 10 +++++++++-
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index d563276a5711..73a86acc808d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -310,13 +310,19 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   protected[sql] def toAttributes: Seq[AttributeReference] =
     map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
 
-  def treeString: String = {
+  def treeString: String = treeString(Int.MaxValue)
+
+  def treeString(level: Int): String = {
     val builder = new StringBuilder
     builder.append("root\n")
     val prefix = " |"
     fields.foreach(field => field.buildFormattedString(prefix, builder))
 
-    builder.toString()
+    if (level <= 0 || level == Int.MaxValue) {
+      builder.toString()
+    } else {
+      builder.toString().split("\n").filter(_.lastIndexOf("|--") < level * 5 + 1).mkString("\n")
+    }
   }
 
   // scalastyle:off println
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index b4ce26be24de..c49308838a19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -70,4 +70,16 @@ class StructTypeSuite extends SparkFunSuite {
 
     assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""")
   }
+
+
+  test("Print up to the given level") {
+    val schema = StructType.fromDDL(
+      "c1 INT, c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+
+    assert(5 == schema.treeString(2).split("\n").length)
+    assert(3 == schema.treeString(1).split("\n").length)
+    assert(7 == schema.treeString.split("\n").length)
+    assert(7 == schema.treeString(0).split("\n").length)
+    assert(7 == schema.treeString(-1).split("\n").length)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 24f5c817da93..c2c2ebc6e48f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -478,8 +478,16 @@ class Dataset[T] private[sql](
    * @group basic
    * @since 1.6.0
    */
+  def printSchema(): Unit = printSchema(Int.MaxValue)
+
   // scalastyle:off println
-  def printSchema(): Unit = println(schema.treeString)
+  /**
+   * Prints the schema up to the given level to the console in a nice tree format.
+   *
+   * @group basic
+   * @since 3.0.0
+   */
+  def printSchema(level: Int): Unit = println(schema.treeString(level))
   // scalastyle:on println
 
   /**

From 2a37d6ed93b96db78d38d9b9fc705dacfc47207b Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 15 Mar 2019 11:58:03 +0800
Subject: [PATCH 0690/1072] [SPARK-27132][SQL] Improve file source V2 framework

## What changes were proposed in this pull request?

During the migration of CSV V2(https://github.com/apache/spark/pull/24005), I find that we can improve the file source v2 framework by:
1. check duplicated column names in both read and write
2. Not all the file sources support filter push down. So remove `SupportsPushDownFilters` from FileScanBuilder
3. The method `isSplitable` might require data source options. Add a new member `options` to FileScan.
4. Make `FileTable.schema` a lazy value instead of a method.

## How was this patch tested?

Unit test

Closes #24066 from gengliangwang/reviseFileSourceV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/FallbackOrcDataSourceV2.scala  |  2 +-
 .../execution/datasources/v2/FileScan.scala    |  4 +++-
 .../datasources/v2/FileScanBuilder.scala       |  3 +--
 .../execution/datasources/v2/FileTable.scala   | 10 ++++++++--
 .../datasources/v2/FileWriteBuilder.scala      | 18 ++++++++++++++----
 .../execution/datasources/v2/orc/OrcScan.scala |  5 ++++-
 .../datasources/v2/orc/OrcScanBuilder.scala    |  7 ++++---
 .../sql/test/DataFrameReaderWriterSuite.scala  |  6 ++++++
 8 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
index 7c72495548e3..d5e89beaf42f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
@@ -40,7 +40,7 @@ class FallbackOrcDataSourceV2(sparkSession: SparkSession) extends Rule[LogicalPl
       val relation = HadoopFsRelation(
         table.fileIndex,
         table.fileIndex.partitionSchema,
-        table.schema(),
+        table.schema,
         None,
         v1FileFormat,
         d.options.asScala.toMap)(sparkSession)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index bdd6a48df20c..6ab5c4b269b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -23,11 +23,13 @@ import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
 import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 abstract class FileScan(
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
-    readSchema: StructType) extends Scan with Batch {
+    readSchema: StructType,
+    options: CaseInsensitiveStringMap) extends Scan with Batch {
   /**
    * Returns whether a file with `path` could be split or not.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
index 5dd343ba44b6..d4e55a50307d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -21,8 +21,7 @@ import org.apache.spark.sql.types.StructType
 
 abstract class FileScanBuilder(schema: StructType)
   extends ScanBuilder
-  with SupportsPushDownRequiredColumns
-  with SupportsPushDownFilters {
+  with SupportsPushDownRequiredColumns {
   protected var readSchema = schema
 
   override def pruneColumns(requiredSchema: StructType): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 21fb6fda98a9..9423fe95fb97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.v2.{SupportsBatchRead, SupportsBatchWrite, Table}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.util.SchemaUtils
 
 abstract class FileTable(
     sparkSession: SparkSession,
@@ -50,10 +51,15 @@ abstract class FileTable(
       s"Unable to infer schema for $name. It must be specified manually.")
   }.asNullable
 
-  override def schema(): StructType = {
+  override lazy val schema: StructType = {
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+    SchemaUtils.checkColumnNameDuplication(dataSchema.fieldNames,
+      "in the data schema", caseSensitive)
+    val partitionSchema = fileIndex.partitionSchema
+    SchemaUtils.checkColumnNameDuplication(partitionSchema.fieldNames,
+      "in the partition schema", caseSensitive)
     PartitioningUtils.mergeDataAndPartitionSchema(dataSchema,
-      fileIndex.partitionSchema, caseSensitive)._1
+      partitionSchema, caseSensitive)._1
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index e16ee4c460f3..0d07f5a02c25 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.writer.{BatchWrite, SupportsSaveMode, WriteBuilder}
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.SerializableConfiguration
 
 abstract class FileWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
@@ -60,10 +61,11 @@ abstract class FileWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[St
   }
 
   override def buildForBatch(): BatchWrite = {
-    validateInputs()
-    val path = new Path(paths.head)
     val sparkSession = SparkSession.active
+    validateInputs(sparkSession.sessionState.conf.caseSensitiveAnalysis)
+    val path = new Path(paths.head)
     val optionsAsScala = options.asScala.toMap
+
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(optionsAsScala)
     val job = getJobInstance(hadoopConf, path)
     val committer = FileCommitProtocol.instantiate(
@@ -122,12 +124,20 @@ abstract class FileWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[St
    */
   def formatName: String
 
-  private def validateInputs(): Unit = {
+  private def validateInputs(caseSensitiveAnalysis: Boolean): Unit = {
     assert(schema != null, "Missing input data schema")
     assert(queryId != null, "Missing query ID")
     assert(mode != null, "Missing save mode")
-    assert(paths.length == 1)
+
+    if (paths.length != 1) {
+      throw new IllegalArgumentException("Expected exactly one path to be specified, but " +
+        s"got: ${paths.mkString(", ")}")
+    }
+    val pathName = paths.head
+    SchemaUtils.checkColumnNameDuplication(schema.fields.map(_.name),
+      s"when inserting into $pathName", caseSensitiveAnalysis)
     DataSource.validateSchema(schema)
+
     schema.foreach { field =>
       if (!supportsDataType(field.dataType)) {
         throw new AnalysisException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index 3c5dc1f50d7e..237eadb698b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScan
 import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
 
 case class OrcScan(
@@ -31,7 +32,9 @@ case class OrcScan(
     hadoopConf: Configuration,
     fileIndex: PartitioningAwareFileIndex,
     dataSchema: StructType,
-    readSchema: StructType) extends FileScan(sparkSession, fileIndex, readSchema) {
+    readSchema: StructType,
+    options: CaseInsensitiveStringMap)
+  extends FileScan(sparkSession, fileIndex, readSchema, options) {
   override def isSplitable(path: Path): Boolean = true
 
   override def createReaderFactory(): PartitionReaderFactory = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index 0b153416b7bb..4767f21bb029 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.orc.OrcFilters
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.Scan
+import org.apache.spark.sql.sources.v2.reader.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -35,11 +35,12 @@ case class OrcScanBuilder(
     fileIndex: PartitioningAwareFileIndex,
     schema: StructType,
     dataSchema: StructType,
-    options: CaseInsensitiveStringMap) extends FileScanBuilder(schema) {
+    options: CaseInsensitiveStringMap)
+  extends FileScanBuilder(schema) with SupportsPushDownFilters {
   lazy val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options.asScala.toMap)
 
   override def build(): Scan = {
-    OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema, readSchema)
+    OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema, readSchema, options)
   }
 
   private var _pushedFilters: Array[Filter] = Array.empty
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index e45ab19aadbf..9f969473da61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -838,6 +838,12 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
           checkReadUserSpecifiedDataColumnDuplication(
             Seq((1, 1)).toDF("c0", "c1"), "parquet", c0, c1, src)
           checkReadPartitionColumnDuplication("parquet", c0, c1, src)
+
+          // Check ORC format
+          checkWriteDataColumnDuplication("orc", c0, c1, src)
+          checkReadUserSpecifiedDataColumnDuplication(
+            Seq((1, 1)).toDF("c0", "c1"), "orc", c0, c1, src)
+          checkReadPartitionColumnDuplication("orc", c0, c1, src)
         }
       }
     }

From 4bab69b22a50ae00b92ed6ab3b5120574dc3aa19 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 15 Mar 2019 14:56:08 -0700
Subject: [PATCH 0691/1072] Revert "[SPARK-27070] Fix performance bug in
 DefaultPartitionCoalescer"

This reverts commit 21db4336b08fcb93779d72ebbb0251f3a2d08934.
---
 .../CoalescedRDDBenchmark-results.txt         | 40 ----------
 .../org/apache/spark/rdd/CoalescedRDD.scala   | 34 ++++----
 .../spark/rdd/CoalescedRDDBenchmark.scala     | 80 -------------------
 3 files changed, 17 insertions(+), 137 deletions(-)
 delete mode 100644 core/benchmarks/CoalescedRDDBenchmark-results.txt
 delete mode 100644 core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala

diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
deleted file mode 100644
index dd63b0adea4f..000000000000
--- a/core/benchmarks/CoalescedRDDBenchmark-results.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-================================================================================================
-Coalesced RDD , large scale
-================================================================================================
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Windows 10 10.0
-Intel64 Family 6 Model 63 Stepping 2, GenuineIntel
-Coalesced RDD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1            346            364          24          0.3        3458.9       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5            258            264           6          0.4        2579.0       1.3X
-Coalesce Num Partitions: 100 Num Hosts: 10            242            249           7          0.4        2415.2       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 20            237            242           7          0.4        2371.7       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 40            230            231           1          0.4        2299.8       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 80            222            233          14          0.4        2223.0       1.6X
-Coalesce Num Partitions: 500 Num Hosts: 1            659            665           5          0.2        6590.4       0.5X
-Coalesce Num Partitions: 500 Num Hosts: 5            340            381          47          0.3        3395.2       1.0X
-Coalesce Num Partitions: 500 Num Hosts: 10            279            307          47          0.4        2788.3       1.2X
-Coalesce Num Partitions: 500 Num Hosts: 20            259            261           2          0.4        2591.9       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 40            241            250          15          0.4        2406.5       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 80            235            237           3          0.4        2349.9       1.5X
-Coalesce Num Partitions: 1000 Num Hosts: 1           1050           1053           4          0.1       10503.2       0.3X
-Coalesce Num Partitions: 1000 Num Hosts: 5            405            407           2          0.2        4049.5       0.9X
-Coalesce Num Partitions: 1000 Num Hosts: 10            320            322           2          0.3        3202.7       1.1X
-Coalesce Num Partitions: 1000 Num Hosts: 20            276            277           0          0.4        2762.3       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 40            257            260           5          0.4        2571.2       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 80            245            252          13          0.4        2448.9       1.4X
-Coalesce Num Partitions: 5000 Num Hosts: 1           3099           3145          55          0.0       30988.6       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5           1037           1050          20          0.1       10374.4       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 10            626            633           8          0.2        6261.8       0.6X
-Coalesce Num Partitions: 5000 Num Hosts: 20            426            431           5          0.2        4258.6       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 40            328            341          22          0.3        3275.4       1.1X
-Coalesce Num Partitions: 5000 Num Hosts: 80            272            275           4          0.4        2721.4       1.3X
-Coalesce Num Partitions: 10000 Num Hosts: 1           5516           5526           9          0.0       55156.8       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 5           1956           1992          48          0.1       19560.9       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 10           1045           1057          18          0.1       10447.4       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 20            637            658          24          0.2        6373.2       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 40            431            448          15          0.2        4312.9       0.8X
-Coalesce Num Partitions: 10000 Num Hosts: 80            326            328           2          0.3        3263.4       1.1X
-
-
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index e006f635fdae..94e7d0b38cba 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -58,7 +58,7 @@ private[spark] case class CoalescedRDDPartition(
       val parentPreferredLocations = rdd.context.getPreferredLocs(rdd, p.index).map(_.host)
       preferredLocation.exists(parentPreferredLocations.contains)
     }
-    if (parents.isEmpty) 0.0 else loc.toDouble / parents.size.toDouble
+    if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble)
   }
 }
 
@@ -91,7 +91,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
     pc.coalesce(maxPartitions, prev).zipWithIndex.map {
       case (pg, i) =>
         val ids = pg.partitions.map(_.index).toArray
-        CoalescedRDDPartition(i, prev, ids, pg.prefLoc)
+        new CoalescedRDDPartition(i, prev, ids, pg.prefLoc)
     }
   }
 
@@ -116,7 +116,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
   /**
    * Returns the preferred machine for the partition. If split is of type CoalescedRDDPartition,
    * then the preferred machine will be one which most parent splits prefer too.
-   * @param partition the partition for which to retrieve the preferred machine, if exists
+   * @param partition
    * @return the machine most preferred by split
    */
   override def getPreferredLocations(partition: Partition): Seq[String] = {
@@ -156,11 +156,9 @@ private[spark] class CoalescedRDD[T: ClassTag](
 
 private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
   extends PartitionCoalescer {
-
-  implicit val partitionGroupOrdering: Ordering[PartitionGroup] =
-    (o1: PartitionGroup, o2: PartitionGroup) =>
-      java.lang.Integer.compare(o1.numPartitions, o2.numPartitions)
-
+  def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.numPartitions < o2.numPartitions
+  def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean =
+    if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get)
 
   val rnd = new scala.util.Random(7919) // keep this class deterministic
 
@@ -180,7 +178,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
     prev.context.getPreferredLocs(prev, part.index).map(tl => tl.host)
   }
 
-  private class PartitionLocations(prev: RDD[_]) {
+  class PartitionLocations(prev: RDD[_]) {
 
     // contains all the partitions from the previous RDD that don't have preferred locations
     val partsWithoutLocs = ArrayBuffer[Partition]()
@@ -215,14 +213,15 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
   }
 
   /**
-   * Gets the least element of the list associated with key in groupHash
+   * Sorts and gets the least element of the list associated with key in groupHash
    * The returned PartitionGroup is the least loaded of all groups that represent the machine "key"
    *
    * @param key string representing a partitioned group on preferred machine key
    * @return Option of [[PartitionGroup]] that has least elements for key
    */
-  def getLeastGroupHash(key: String): Option[PartitionGroup] =
-    groupHash.get(key).filter(_.nonEmpty).map(_.min)
+  def getLeastGroupHash(key: String): Option[PartitionGroup] = {
+    groupHash.get(key).map(_.sortWith(compare).head)
+  }
 
   def addPartToPGroup(part: Partition, pgroup: PartitionGroup): Boolean = {
     if (!initialHash.contains(part)) {
@@ -237,12 +236,12 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
    * is assigned a preferredLocation. This uses coupon collector to estimate how many
    * preferredLocations it must rotate through until it has seen most of the preferred
    * locations (2 * n log(n))
-   * @param targetLen The number of desired partition groups
+   * @param targetLen
    */
   def setupGroups(targetLen: Int, partitionLocs: PartitionLocations) {
     // deal with empty case, just create targetLen partition groups with no preferred location
     if (partitionLocs.partsWithLocs.isEmpty) {
-      (1 to targetLen).foreach(_ => groupArr += new PartitionGroup())
+      (1 to targetLen).foreach(x => groupArr += new PartitionGroup())
       return
     }
 
@@ -298,8 +297,9 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
       partitionLocs: PartitionLocations): PartitionGroup = {
     val slack = (balanceSlack * prev.partitions.length).toInt
     // least loaded pref locs
-    val pref = currPrefLocs(p, prev).flatMap(getLeastGroupHash)
-    val prefPart = if (pref.isEmpty) None else Some(pref.min)
+    val pref = currPrefLocs(p, prev).map(getLeastGroupHash(_)).sortWith(compare)
+    val prefPart = if (pref == Nil) None else pref.head
+
     val r1 = rnd.nextInt(groupArr.size)
     val r2 = rnd.nextInt(groupArr.size)
     val minPowerOfTwo = {
@@ -351,7 +351,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
       val partIter = partitionLocs.partsWithLocs.iterator
       groupArr.filter(pg => pg.numPartitions == 0).foreach { pg =>
         while (partIter.hasNext && pg.numPartitions == 0) {
-          var (_, nxt_part) = partIter.next()
+          var (nxt_replica, nxt_part) = partIter.next()
           if (!initialHash.contains(nxt_part)) {
             pg.partitions += nxt_part
             initialHash += nxt_part
diff --git a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
deleted file mode 100644
index 42b30707f262..000000000000
--- a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.rdd
-
-import scala.collection.immutable
-
-import org.apache.spark.SparkContext
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
-
-/**
- * Benchmark for CoalescedRDD.
- * Measures rdd.coalesce performance under various combinations of
- * coalesced partitions and preferred hosts
- * To run this benchmark:
- * {{{
- *   1. without sbt:
- *      bin/spark-submit --class <this class> --jars <spark core test jar>
- *   2. build/sbt "core/test:runMain <this class>"
- *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
- *      Results will be written to "benchmarks/CoalescedRDD-results.txt".
- * }}}
- * */
-object CoalescedRDDBenchmark extends BenchmarkBase {
-  val seed = 0x1337
-  val sc = new SparkContext(master = "local[4]", appName = "test")
-
-  private def coalescedRDD(numIters: Int): Unit = {
-    val numBlocks = 100000
-    val benchmark = new Benchmark("Coalesced RDD", numBlocks, output = output)
-    for (numPartitions <- Seq(100, 500, 1000, 5000, 10000)) {
-      for (numHosts <- Seq(1, 5, 10, 20, 40, 80)) {
-
-        import collection.mutable
-        val hosts = mutable.ArrayBuffer[String]()
-        (1 to numHosts).foreach(hosts += "m" + _)
-        hosts.length
-        val rnd = scala.util.Random
-        rnd.setSeed(seed)
-        val blocks: immutable.Seq[(Int, Seq[String])] = (1 to numBlocks).map { i =>
-          (i, hosts(rnd.nextInt(hosts.size)) :: Nil)
-        }
-
-        benchmark.addCase(
-          s"Coalesce Num Partitions: $numPartitions Num Hosts: $numHosts",
-          numIters) { _ =>
-          performCoalesce(blocks, numPartitions)
-        }
-      }
-    }
-
-    benchmark.run()
-  }
-
-  private def performCoalesce(blocks: immutable.Seq[(Int, Seq[String])], numPartitions: Int) {
-    sc.makeRDD(blocks).coalesce(numPartitions).partitions
-  }
-
-  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
-    val numIters = 3
-    runBenchmark("Coalesced RDD , large scale") {
-      coalescedRDD(numIters)
-    }
-  }
-}

From 8ee09f26d51dd0acdbada9c4d3d2baee05a7f786 Mon Sep 17 00:00:00 2001
From: "Zhu, Lipeng" <lipzhu@ebay.com>
Date: Fri, 15 Mar 2019 20:21:59 -0500
Subject: [PATCH 0692/1072] [SPARK-27159][SQL] update mssql server dialect to
 support binary type

## What changes were proposed in this pull request?

Change the binary type mapping from default blob to varbinary(max) for mssql server.
https://docs.microsoft.com/en-us/sql/t-sql/data-types/binary-and-varbinary-transact-sql?view=sql-server-2017
![image](https://user-images.githubusercontent.com/698621/54351715-0e8c8780-468b-11e9-8931-7ecb85c5ad6b.png)

## How was this patch tested?

Unit test.

Closes #24091 from lipzhu/SPARK-27159.

Authored-by: Zhu, Lipeng <lipzhu@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/sql/jdbc/MsSqlServerDialect.scala   |  1 +
 .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala  | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index da787b4859a7..29500cf2afbc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -38,6 +38,7 @@ private object MsSqlServerDialect extends JdbcDialect {
     case TimestampType => Some(JdbcType("DATETIME", java.sql.Types.TIMESTAMP))
     case StringType => Some(JdbcType("NVARCHAR(MAX)", java.sql.Types.NVARCHAR))
     case BooleanType => Some(JdbcType("BIT", java.sql.Types.BIT))
+    case BinaryType => Some(JdbcType("VARBINARY(MAX)", java.sql.Types.VARBINARY))
     case _ => None
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index a4dc537d31b7..5f27e75addcf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -885,6 +885,18 @@ class JDBCSuite extends QueryTest
       Some(TimestampType))
   }
 
+  test("MsSqlServerDialect jdbc type mapping") {
+    val msSqlServerDialect = JdbcDialects.get("jdbc:sqlserver")
+    assert(msSqlServerDialect.getJDBCType(TimestampType).map(_.databaseTypeDefinition).get ==
+      "DATETIME")
+    assert(msSqlServerDialect.getJDBCType(StringType).map(_.databaseTypeDefinition).get ==
+      "NVARCHAR(MAX)")
+    assert(msSqlServerDialect.getJDBCType(BooleanType).map(_.databaseTypeDefinition).get ==
+      "BIT")
+    assert(msSqlServerDialect.getJDBCType(BinaryType).map(_.databaseTypeDefinition).get ==
+      "VARBINARY(MAX)")
+  }
+
   test("table exists query by jdbc dialect") {
     val MySQL = JdbcDialects.get("jdbc:mysql://127.0.0.1/db")
     val Postgres = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db")

From ec1179058071a9ad9bfea0878c42aa81e4cbce58 Mon Sep 17 00:00:00 2001
From: SongYadong <song.yadong1@zte.com.cn>
Date: Fri, 15 Mar 2019 20:30:36 -0500
Subject: [PATCH 0693/1072] [CORE][MINOR] Correct the comment to show heartbeat
 interval is configurable

## What changes were proposed in this pull request?

Executor heartbeat interval is configurable by `"spark.executor.heartbeatInterval"`. But in a comment, heartbeat interval is presented as a constant `10s`. This pr tries to correct the description.

## How was this patch tested?

Existing unit tests.

Closes #24101 from SongYadong/heartbeat_interval_comment.

Authored-by: SongYadong <song.yadong1@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/executor/Executor.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 740080e1ccbb..d5973158cf73 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -175,8 +175,8 @@ private[spark] class Executor(
 
   /**
    * When an executor is unable to send heartbeats to the driver more than `HEARTBEAT_MAX_FAILURES`
-   * times, it should kill itself. The default value is 60. It means we will retry to send
-   * heartbeats about 10 minutes because the heartbeat interval is 10s.
+   * times, it should kill itself. The default value is 60. For example, if max failures is 60 and
+   * heartbeat interval is 10s, then it will try to send heartbeats for up to 600s (10 minutes).
    */
   private val HEARTBEAT_MAX_FAILURES = conf.get(EXECUTOR_HEARTBEAT_MAX_FAILURES)
 

From 7a136f867049adbdb0c1c31de91ea8488a0c3a77 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Sat, 16 Mar 2019 13:04:54 +0900
Subject: [PATCH 0694/1072] [SPARK-27096][SQL][FOLLOWUP] Do the correct
 validation of join types in R side and fix join docs for scala, python and r

## What changes were proposed in this pull request?
This is a minor follow-up PR for SPARK-27096. The original PR reconciled the join types supported between dataset and sql interface. In case of R, we do the join type validation in the R side. In this PR we do the correct validation and adds tests in R to test all the join types along with the error condition. Along with this, i made the necessary doc correction.

## How was this patch tested?
Add R tests.

Closes #24087 from dilipbiswal/joinfix_followup.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/DataFrame.R                           |  15 +--
 R/pkg/tests/fulltests/test_sparkSQL.R         | 105 +++++++++++++-----
 python/pyspark/sql/dataframe.py               |   5 +-
 .../scala/org/apache/spark/sql/Dataset.scala  |  14 ++-
 4 files changed, 99 insertions(+), 40 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 9ad64a71a126..014ba285bab0 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2520,8 +2520,9 @@ setMethod("dropDuplicates",
 #' Column expression. If joinExpr is omitted, the default, inner join is attempted and an error is
 #' thrown if it would be a Cartesian Product. For Cartesian join, use crossJoin instead.
 #' @param joinType The type of join to perform, default 'inner'.
-#' Must be one of: 'inner', 'cross', 'outer', 'full', 'full_outer',
-#' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', or 'left_anti'.
+#' Must be one of: 'inner', 'cross', 'outer', 'full', 'fullouter', 'full_outer',
+#' 'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer', 'semi',
+#' 'leftsemi', 'left_semi', 'anti', 'leftanti', 'left_anti'.
 #' @return A SparkDataFrame containing the result of the join operation.
 #' @family SparkDataFrame functions
 #' @aliases join,SparkDataFrame,SparkDataFrame-method
@@ -2553,14 +2554,14 @@ setMethod("join",
                     "outer", "full", "fullouter", "full_outer",
                     "left", "leftouter", "left_outer",
                     "right", "rightouter", "right_outer",
-                    "left_semi", "leftsemi", "left_anti", "leftanti")) {
+                    "semi", "left_semi", "leftsemi", "anti", "left_anti", "leftanti")) {
                   joinType <- gsub("_", "", joinType)
                   sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType)
                 } else {
-                  stop("joinType must be one of the following types: ",
-                       "'inner', 'cross', 'outer', 'full', 'full_outer',",
-                       "'left', 'left_outer', 'right', 'right_outer',",
-                       "'left_semi', or 'left_anti'.")
+                  stop(paste("joinType must be one of the following types:",
+                       "'inner', 'cross', 'outer', 'full', 'fullouter', 'full_outer',",
+                       "'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer',",
+                       "'semi', 'leftsemi', 'left_semi', 'anti', 'leftanti' or 'left_anti'."))
                 }
               }
             }
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index c60c9517189d..c9d6134197b0 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2356,40 +2356,95 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   expect_equal(names(joined2), c("age", "name", "name", "test"))
   expect_equal(count(joined2), 3)
 
-  joined3 <- join(df, df2, df$name == df2$name, "rightouter")
+  joined3 <- join(df, df2, df$name == df2$name, "right")
   expect_equal(names(joined3), c("age", "name", "name", "test"))
   expect_equal(count(joined3), 4)
   expect_true(is.na(collect(orderBy(joined3, joined3$age))$age[2]))
-
-  joined4 <- select(join(df, df2, df$name == df2$name, "outer"),
-                    alias(df$age + 5, "newAge"), df$name, df2$test)
-  expect_equal(names(joined4), c("newAge", "name", "test"))
+  
+  joined4 <- join(df, df2, df$name == df2$name, "right_outer")
+  expect_equal(names(joined4), c("age", "name", "name", "test"))
   expect_equal(count(joined4), 4)
-  expect_equal(collect(orderBy(joined4, joined4$name))$newAge[3], 24)
+  expect_true(is.na(collect(orderBy(joined4, joined4$age))$age[2]))
 
-  joined5 <- join(df, df2, df$name == df2$name, "leftouter")
+  joined5 <- join(df, df2, df$name == df2$name, "rightouter")
   expect_equal(names(joined5), c("age", "name", "name", "test"))
-  expect_equal(count(joined5), 3)
-  expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[1]))
-
-  joined6 <- join(df, df2, df$name == df2$name, "inner")
-  expect_equal(names(joined6), c("age", "name", "name", "test"))
-  expect_equal(count(joined6), 3)
+  expect_equal(count(joined5), 4)
+  expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[2]))
 
-  joined7 <- join(df, df2, df$name == df2$name, "leftsemi")
-  expect_equal(names(joined7), c("age", "name"))
-  expect_equal(count(joined7), 3)
 
-  joined8 <- join(df, df2, df$name == df2$name, "left_outer")
-  expect_equal(names(joined8), c("age", "name", "name", "test"))
-  expect_equal(count(joined8), 3)
-  expect_true(is.na(collect(orderBy(joined8, joined8$age))$age[1]))
-
-  joined9 <- join(df, df2, df$name == df2$name, "right_outer")
-  expect_equal(names(joined9), c("age", "name", "name", "test"))
+  joined6 <- select(join(df, df2, df$name == df2$name, "outer"),
+                    alias(df$age + 5, "newAge"), df$name, df2$test)
+  expect_equal(names(joined6), c("newAge", "name", "test"))
+  expect_equal(count(joined6), 4)
+  expect_equal(collect(orderBy(joined6, joined6$name))$newAge[3], 24)
+  
+  joined7 <- select(join(df, df2, df$name == df2$name, "full"),
+                    alias(df$age + 5, "newAge"), df$name, df2$test)
+  expect_equal(names(joined7), c("newAge", "name", "test"))
+  expect_equal(count(joined7), 4)
+  expect_equal(collect(orderBy(joined7, joined7$name))$newAge[3], 24)
+  
+  joined8 <- select(join(df, df2, df$name == df2$name, "fullouter"),
+                    alias(df$age + 5, "newAge"), df$name, df2$test)
+  expect_equal(names(joined8), c("newAge", "name", "test"))
+  expect_equal(count(joined8), 4)
+  expect_equal(collect(orderBy(joined8, joined8$name))$newAge[3], 24)
+  
+  joined9 <- select(join(df, df2, df$name == df2$name, "full_outer"),
+                    alias(df$age + 5, "newAge"), df$name, df2$test)
+  expect_equal(names(joined9), c("newAge", "name", "test"))
   expect_equal(count(joined9), 4)
-  expect_true(is.na(collect(orderBy(joined9, joined9$age))$age[2]))
-
+  expect_equal(collect(orderBy(joined9, joined9$name))$newAge[3], 24)
+
+  joined10 <- join(df, df2, df$name == df2$name, "left")
+  expect_equal(names(joined10), c("age", "name", "name", "test"))
+  expect_equal(count(joined10), 3)
+  expect_true(is.na(collect(orderBy(joined10, joined10$age))$age[1]))
+  
+  joined11 <- join(df, df2, df$name == df2$name, "leftouter")
+  expect_equal(names(joined11), c("age", "name", "name", "test"))
+  expect_equal(count(joined11), 3)
+  expect_true(is.na(collect(orderBy(joined11, joined11$age))$age[1]))
+  
+  joined12 <- join(df, df2, df$name == df2$name, "left_outer")
+  expect_equal(names(joined12), c("age", "name", "name", "test"))
+  expect_equal(count(joined12), 3)
+  expect_true(is.na(collect(orderBy(joined12, joined12$age))$age[1]))
+
+  joined13 <- join(df, df2, df$name == df2$name, "inner")
+  expect_equal(names(joined13), c("age", "name", "name", "test"))
+  expect_equal(count(joined13), 3)
+
+  joined14 <- join(df, df2, df$name == df2$name, "semi")
+  expect_equal(names(joined14), c("age", "name"))
+  expect_equal(count(joined14), 3)
+  
+  joined14 <- join(df, df2, df$name == df2$name, "leftsemi")
+  expect_equal(names(joined14), c("age", "name"))
+  expect_equal(count(joined14), 3)
+  
+  joined15 <- join(df, df2, df$name == df2$name, "left_semi")
+  expect_equal(names(joined15), c("age", "name"))
+  expect_equal(count(joined15), 3)
+  
+  joined16 <- join(df2, df, df2$name == df$name, "anti")
+  expect_equal(names(joined16), c("name", "test"))
+  expect_equal(count(joined16), 1)
+  
+  joined17 <- join(df2, df, df2$name == df$name, "leftanti")
+  expect_equal(names(joined17), c("name", "test"))
+  expect_equal(count(joined17), 1)
+  
+  joined18 <- join(df2, df, df2$name == df$name, "left_anti")
+  expect_equal(names(joined18), c("name", "test"))
+  expect_equal(count(joined18), 1)
+
+  error_msg <- paste("joinType must be one of the following types:",
+                 "'inner', 'cross', 'outer', 'full', 'fullouter', 'full_outer',",
+                 "'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer',",
+                 "'semi', 'leftsemi', 'left_semi', 'anti', 'leftanti' or 'left_anti'.")
+  expect_error(join(df2, df, df2$name == df$name, "invalid"), error_msg)
+  
   merged <- merge(df, df2, by.x = "name", by.y = "name", all.x = TRUE, all.y = TRUE)
   expect_equal(count(merged), 4)
   expect_equal(names(merged), c("age", "name_x", "name_y", "test"))
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 75dd9fb42340..8227e829597e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1000,8 +1000,9 @@ def join(self, other, on=None, how=None):
             If `on` is a string or a list of strings indicating the name of the join column(s),
             the column(s) must exist on both sides, and this performs an equi-join.
         :param how: str, default ``inner``. Must be one of: ``inner``, ``cross``, ``outer``,
-            ``full``, ``full_outer``, ``left``, ``left_outer``, ``right``, ``right_outer``,
-            ``left_semi``, and ``left_anti``.
+            ``full``, ``fullouter``, ``full_outer``, ``left``, ``leftouter``, ``left_outer``,
+            ``right``, ``rightouter``, ``right_outer``, ``semi``, ``leftsemi``, ``left_semi``,
+            ``anti``, ``leftanti`` and ``left_anti``.
 
         The following performs a full outer join between ``df1`` and ``df2``.
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index c2c2ebc6e48f..2accb3293ed4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -936,8 +936,9 @@ class Dataset[T] private[sql](
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
    * @param joinType Type of join to perform. Default `inner`. Must be one of:
-   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
-   *                 `right`, `right_outer`, `left_semi`, `semi`, `left_anti`, `anti`.
+   *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
+   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
+   *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`.
    *
    * @note If you perform a self-join using this function without aliasing the input
    * `DataFrame`s, you will NOT be able to reference any columns after the join, since
@@ -994,8 +995,9 @@ class Dataset[T] private[sql](
    * @param right Right side of the join.
    * @param joinExprs Join expression.
    * @param joinType Type of join to perform. Default `inner`. Must be one of:
-   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
-   *                 `right`, `right_outer`, `left_semi`, `semi`, `left_anti`, `anti`.
+   *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
+   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
+   *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`.
    *
    * @group untypedrel
    * @since 2.0.0
@@ -1078,8 +1080,8 @@ class Dataset[T] private[sql](
    * @param other Right side of the join.
    * @param condition Join expression.
    * @param joinType Type of join to perform. Default `inner`. Must be one of:
-   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
-   *                 `right`, `right_outer`.
+   *                 `inner`, `cross`, `outer`, `full`, `fullouter`,`full_outer`, `left`,
+   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`.
    *
    * @group typedrel
    * @since 1.6.0

From aea9a574c44768d1d93ee7e8069729383859292c Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Sat, 16 Mar 2019 14:30:42 -0500
Subject: [PATCH 0695/1072] [SPARK-27134][SQL] array_distinct function does not
 work correctly with columns containing array of array

## What changes were proposed in this pull request?
Correct the logic to compute the distinct.

Below is a small repro snippet.

```
scala> val df = Seq(Seq(Seq(1, 2), Seq(1, 2), Seq(1, 2), Seq(3, 4), Seq(4, 5))).toDF("array_col")
df: org.apache.spark.sql.DataFrame = [array_col: array<array<int>>]

scala> val distinctDF = df.select(array_distinct(col("array_col")))
distinctDF: org.apache.spark.sql.DataFrame = [array_distinct(array_col): array<array<int>>]

scala> df.show(false)
+----------------------------------------+
|array_col                               |
+----------------------------------------+
|[[1, 2], [1, 2], [1, 2], [3, 4], [4, 5]]|
+----------------------------------------+
```
Error
```
scala> distinctDF.show(false)
+-------------------------+
|array_distinct(array_col)|
+-------------------------+
|[[1, 2], [1, 2], [1, 2]] |
+-------------------------+
```
Expected result
```
scala> distinctDF.show(false)
+-------------------------+
|array_distinct(array_col)|
+-------------------------+
|[[1, 2], [3, 4], [4, 5]] |
+-------------------------+
```
## How was this patch tested?
Added an additional test.

Closes #24073 from dilipbiswal/SPARK-27134.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../expressions/collectionOperations.scala    | 34 +++++++++----------
 .../CollectionExpressionsSuite.scala          | 11 ++++++
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 6cc5bc85bcfb..f7955bc92339 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -3112,29 +3112,29 @@ case class ArrayDistinct(child: Expression)
     (data: Array[AnyRef]) => new GenericArrayData(data.distinct.asInstanceOf[Array[Any]])
   } else {
     (data: Array[AnyRef]) => {
-      var foundNullElement = false
-      var pos = 0
+      val arrayBuffer = new scala.collection.mutable.ArrayBuffer[AnyRef]
+      var alreadyStoredNull = false
       for (i <- 0 until data.length) {
-        if (data(i) == null) {
-          if (!foundNullElement) {
-            foundNullElement = true
-            pos = pos + 1
-          }
-        } else {
+        if (data(i) != null) {
+          var found = false
           var j = 0
-          var done = false
-          while (j <= i && !done) {
-            if (data(j) != null && ordering.equiv(data(j), data(i))) {
-              done = true
-            }
-            j = j + 1
+          while (!found && j < arrayBuffer.size) {
+            val va = arrayBuffer(j)
+            found = (va != null) && ordering.equiv(va, data(i))
+            j += 1
           }
-          if (i == j - 1) {
-            pos = pos + 1
+          if (!found) {
+            arrayBuffer += data(i)
+          }
+        } else {
+          // De-duplicate the null values.
+          if (!alreadyStoredNull) {
+            arrayBuffer += data(i)
+            alreadyStoredNull = true
           }
         }
       }
-      new GenericArrayData(data.slice(0, pos))
+      new GenericArrayData(arrayBuffer)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index c1c045938cad..603073b40d7a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -1364,6 +1364,8 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       ArrayType(DoubleType))
     val a7 = Literal.create(Seq(1.123f, 0.1234f, 1.121f, 1.123f, 1.1230f, 1.121f, 0.1234f),
       ArrayType(FloatType))
+    val a8 =
+      Literal.create(Seq(2, 1, 2, 3, 4, 4, 5).map(_.toString.getBytes), ArrayType(BinaryType))
 
     checkEvaluation(new ArrayDistinct(a0), Seq(2, 1, 3, 4, 5))
     checkEvaluation(new ArrayDistinct(a1), Seq.empty[Integer])
@@ -1373,6 +1375,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new ArrayDistinct(a5), Seq(true, false))
     checkEvaluation(new ArrayDistinct(a6), Seq(1.123, 0.1234, 1.121))
     checkEvaluation(new ArrayDistinct(a7), Seq(1.123f, 0.1234f, 1.121f))
+    checkEvaluation(new ArrayDistinct(a8), Seq(2, 1, 3, 4, 5).map(_.toString.getBytes))
 
     // complex data types
     val b0 = Literal.create(Seq[Array[Byte]](Array[Byte](5, 6), Array[Byte](1, 2),
@@ -1393,9 +1396,17 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       ArrayType(ArrayType(IntegerType)))
     val c2 = Literal.create(Seq[Seq[Int]](null, Seq[Int](2, 1), null, null, Seq[Int](2, 1), null),
       ArrayType(ArrayType(IntegerType)))
+    val c3 = Literal.create(Seq[Seq[Int]](Seq[Int](1, 2), Seq[Int](1, 2), Seq[Int](1, 2),
+      Seq[Int](3, 4), Seq[Int](4, 5)), ArrayType(ArrayType(IntegerType)))
+    val c4 = Literal.create(Seq[Seq[Int]](null, Seq[Int](1, 2), Seq[Int](1, 2),
+      Seq[Int](3, 4), Seq[Int](4, 5), null), ArrayType(ArrayType(IntegerType)))
     checkEvaluation(ArrayDistinct(c0), Seq[Seq[Int]](Seq[Int](1, 2), Seq[Int](3, 4)))
     checkEvaluation(ArrayDistinct(c1), Seq[Seq[Int]](Seq[Int](5, 6), Seq[Int](2, 1)))
     checkEvaluation(ArrayDistinct(c2), Seq[Seq[Int]](null, Seq[Int](2, 1)))
+    checkEvaluation(ArrayDistinct(c3), Seq[Seq[Int]](Seq[Int](1, 2), Seq[Int](3, 4),
+      Seq[Int](4, 5)))
+    checkEvaluation(ArrayDistinct(c4), Seq[Seq[Int]](null, Seq[Int](1, 2), Seq[Int](3, 4),
+      Seq[Int](4, 5)))
   }
 
   test("Array Union") {

From 6a6075ac96279e2d3c3bb5e11e292bf21572c5ce Mon Sep 17 00:00:00 2001
From: Lantao Jin <jinlantao@gmail.com>
Date: Sat, 16 Mar 2019 14:52:19 -0500
Subject: [PATCH 0696/1072] [SPARK-27157][DOCS] Add Executor level metrics to
 monitoring docs

## What changes were proposed in this pull request?

A sub-task of [SPARK-23206](https://issues.apache.org/jira/browse/SPARK-23206)
Add Executor level metrics to monitoring docs

## How was this patch tested?

jekyll

Closes #24090 from LantaoJin/SPARK-27157.

Authored-by: Lantao Jin <jinlantao@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/monitoring.md | 143 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index 72e4f47e197d..036a575bb861 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -609,7 +609,150 @@ A list of the available metrics, with a short description:
   </tr>
 </table>
 
+### Executor Metrics
 
+Executor-level metrics are sent from each executor to the driver as part of the Heartbeat to describe the performance metrics of Executor itself like JVM heap memory, GC infomation. Metrics `peakExecutorMetrics.*` are only enabled if `spark.eventLog.logStageExecutorMetrics.enabled` is true.
+A list of the available metrics, with a short description:
+
+<table class="table">
+  <tr><th>Executor Level Metric name</th>
+      <th>Short description</th>
+  </tr>
+  <tr>
+    <td>totalGCTime</td>
+    <td>Elapsed time the JVM spent in garbage collection summed in this Executor.
+    The value is expressed in milliseconds.</td>
+  </tr>
+  <tr>
+    <td>totalInputBytes</td>
+    <td>Total input bytes summed in this Executor.</td>
+  </tr>
+  <tr>
+    <td>totalShuffleRead</td>
+    <td>Total shuffer read bytes summed in this Executor.</td>
+  </tr>
+  <tr>
+    <td>totalShuffleWrite</td>
+    <td>Total shuffer write bytes summed in this Executor.</td>
+  </tr>
+  <tr>
+    <td>maxMemory</td>
+    <td>Total amount of memory available for storage, in bytes.</td>
+  </tr>
+  <tr>
+    <td>memoryMetrics.*</td>
+    <td>Current value of memory metrics:</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.usedOnHeapStorageMemory</td>
+    <td>Used on heap memory currently for storage, in bytes.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.usedOffHeapStorageMemory</td>
+    <td>Used off heap memory currently for storage, in bytes.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.totalOnHeapStorageMemory</td>
+    <td>Total available on heap memory for storage, in bytes. This amount can vary over time,  on the MemoryManager implementation.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.totalOffHeapStorageMemory</td>
+    <td>Total available off heap memory for storage, in bytes. This amount can vary over time, depending on the MemoryManager implementation.</td>
+  </tr>
+  <tr>
+    <td>peakMemoryMetrics.*</td>
+    <td>Peak value of memory (and GC) metrics:</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.JVMHeapMemory</td>
+    <td>Peak memory usage of the heap that is used for object allocation.
+    The heap consists of one or more memory pools. The used and committed size of the returned memory usage is the sum of those values of all heap memory pools whereas the init and max size of the returned memory usage represents the setting of the heap memory which may not be the sum of those of all heap memory pools.
+    The amount of used memory in the returned memory usage is the amount of memory occupied by both live objects and garbage objects that have not been collected, if any.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.JVMOffHeapMemory</td>
+    <td>Peak memory usage of non-heap memory that is used by the Java virtual machine. The non-heap memory consists of one or more memory pools. The used and committed size of the returned memory usage is the sum of those values of all non-heap memory pools whereas the init and max size of the returned memory usage represents the setting of the non-heap memory which may not be the sum of those of all non-heap memory pools.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.OnHeapExecutionMemory</td>
+    <td>Peak on heap execution memory in use, in bytes.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.OffHeapExecutionMemory</td>
+    <td>Peak off heap execution memory in use, in bytes.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.OnHeapStorageMemory</td>
+    <td>Peak on heap storage memory in use, in bytes.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.OffHeapStorageMemory</td>
+    <td>Peak off heap storage memory in use, in bytes.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.OnHeapUnifiedMemory</td>
+    <td>Peak on heap memory (execution and storage).</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.OffHeapUnifiedMemory</td>
+    <td>Peak off heap memory (execution and storage).</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.DirectPoolMemory</td>
+    <td>Peak memory that the JVM is using for direct buffer pool ([[java.lang.management.BufferPoolMXBean]])</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.MappedPoolMemory</td>
+    <td>Peak memory that the JVM is using for mapped buffer pool ([[java.lang.management.BufferPoolMXBean]])</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreeJVMVMemory</td>
+    <td>Virtual memory size in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreeJVMRSSMemory</td>
+    <td>Resident Set Size: number of pages the process has
+      in real memory.  This is just the pages which count
+      toward text, data, or stack space.  This does not
+      include pages which have not been demand-loaded in,
+      or which are swapped out. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreePythonVMemory</td>
+    <td>Virtual memory size for Python in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreePythonRSSMemory</td>
+    <td>Resident Set Size for Python. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreeOtherVMemory</td>
+    <td>Virtual memory size for other kind of process in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreeOtherRSSMemory</td>
+    <td>Resident Set Size for other kind of process. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+  </tr>
+    <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.MinorGCCount</td>
+    <td>Total minor GC count. For example, the garbage collector is one of     Copy, PS Scavenge, ParNew, G1 Young Generation and so on.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.MinorGCTime</td>
+    <td>Elapsed total minor GC time. 
+    The value is expressed in milliseconds.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.MajorGCCount</td>
+    <td>Total major GC count. For example, the garbage collector is one of     MarkSweepCompact, PS MarkSweep, ConcurrentMarkSweep, G1 Old Generation and so on.</td>
+  </tr>
+  <tr>
+    <td>&nbsp;&nbsp;&nbsp;&nbsp;.MajorGCTime</td>
+    <td>Elapsed total major GC time. 
+    The value is expressed in milliseconds.</td>
+  </tr>
+</table>
+The computation of RSS and Vmem are based on [proc(5)](http://man7.org/linux/man-pages/man5/proc.5.html)
 
 ### API Versioning Policy
 

From 4adbcdc42478b61fa02047d54f3d3705d5b1ecc7 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Sun, 17 Mar 2019 08:25:40 +0900
Subject: [PATCH 0697/1072] [SPARK-22000][SQL][FOLLOW-UP] Fix bad test to
 ensure it can test properly

## What changes were proposed in this pull request?

There was some mistake on test code: it has wrong assertion. The patch proposes fixing it, as well as fixing other stuff to make test really pass.

## How was this patch tested?

Fixed unit test.

Closes #24112 from HeartSaVioR/SPARK-22000-hotfix.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../org/apache/spark/sql/JavaBeanDeserializationSuite.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
index f59afef36a5a..c5f38676ad0a 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
@@ -154,7 +154,7 @@ public void testSpark22000() {
 
     List<RecordSpark22000> records = dataset.collectAsList();
 
-    Assert.assertEquals(records, records);
+    Assert.assertEquals(expectedRecords, records);
   }
 
   @Test
@@ -211,7 +211,7 @@ private static RecordSpark22000 createRecordSpark22000(Row recordRow) {
     record.setDoubleField(String.valueOf(recordRow.getDouble(4)));
     record.setStringField(recordRow.getString(5));
     record.setBooleanField(String.valueOf(recordRow.getBoolean(6)));
-    record.setTimestampField(String.valueOf(recordRow.getTimestamp(7).getTime() * 1000));
+    record.setTimestampField(String.valueOf(recordRow.getTimestamp(7)));
     // This would figure out that null value will not become "null".
     record.setNullIntField(null);
     return record;

From 9c0af746e5dda9f05e64f0a16a3dbe11a23024de Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 16 Mar 2019 19:42:05 -0500
Subject: [PATCH 0698/1072] [SPARK-27175][BUILD] Upgrade hadoop-3 to 3.2.0

## What changes were proposed in this pull request?

This PR upgrade `hadoop-3` to `3.2.0`  to workaround [HADOOP-16086](https://issues.apache.org/jira/browse/HADOOP-16086). Otherwise some test case will throw IllegalArgumentException:
```java
02:44:34.707 ERROR org.apache.hadoop.hive.ql.exec.Task: Job Submission failed with exception 'java.io.IOException(Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.)'
java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.
	at org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:116)
	at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:109)
	at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:102)
	at org.apache.hadoop.mapred.JobClient.init(JobClient.java:475)
	at org.apache.hadoop.mapred.JobClient.<init>(JobClient.java:454)
	at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:369)
	at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:151)
	at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
	at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
	at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183)
	at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839)
	at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526)
	at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
	at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$runHive$1(HiveClientImpl.scala:730)
	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:283)
	at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:221)
	at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:220)
	at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:266)
	at org.apache.spark.sql.hive.client.HiveClientImpl.runHive(HiveClientImpl.scala:719)
	at org.apache.spark.sql.hive.client.HiveClientImpl.runSqlHive(HiveClientImpl.scala:709)
	at org.apache.spark.sql.hive.StatisticsSuite.createNonPartitionedTable(StatisticsSuite.scala:719)
	at org.apache.spark.sql.hive.StatisticsSuite.$anonfun$testAlterTableProperties$2(StatisticsSuite.scala:822)
```

## How was this patch tested?

manual tests

Closes #24106 from wangyum/SPARK-27175.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 ...-deps-hadoop-3.1 => spark-deps-hadoop-3.2} | 42 ++++++++++---------
 dev/run-tests-jenkins.py                      |  4 +-
 dev/run-tests.py                              |  2 +-
 dev/test-dependencies.sh                      |  2 +-
 hadoop-cloud/pom.xml                          |  2 +-
 pom.xml                                       |  8 ++--
 6 files changed, 31 insertions(+), 29 deletions(-)
 rename dev/deps/{spark-deps-hadoop-3.1 => spark-deps-hadoop-3.2} (87%)

diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.2
similarity index 87%
rename from dev/deps/spark-deps-hadoop-3.1
rename to dev/deps/spark-deps-hadoop-3.2
index 1f95c67d7a6c..6f3bbce3e746 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -15,6 +15,7 @@ arpack_combined_all-0.1.jar
 arrow-format-0.12.0.jar
 arrow-memory-0.12.0.jar
 arrow-vector-0.12.0.jar
+audience-annotations-0.5.0.jar
 automaton-1.11-8.jar
 avro-1.8.2.jar
 avro-ipc-1.8.2.jar
@@ -42,11 +43,12 @@ commons-logging-1.1.3.jar
 commons-math3-3.4.1.jar
 commons-net-3.1.jar
 commons-pool-1.5.4.jar
+commons-text-1.6.jar
 compress-lzf-1.0.3.jar
 core-1.1.2.jar
-curator-client-2.12.0.jar
-curator-framework-2.12.0.jar
-curator-recipes-2.12.0.jar
+curator-client-2.13.0.jar
+curator-framework-2.13.0.jar
+curator-recipes-2.13.0.jar
 datanucleus-api-jdo-3.2.6.jar
 datanucleus-core-3.2.10.jar
 datanucleus-rdbms-3.2.9.jar
@@ -60,20 +62,20 @@ gson-2.2.4.jar
 guava-14.0.1.jar
 guice-4.0.jar
 guice-servlet-4.0.jar
-hadoop-annotations-3.1.0.jar
-hadoop-auth-3.1.0.jar
-hadoop-client-3.1.0.jar
-hadoop-common-3.1.0.jar
-hadoop-hdfs-client-3.1.0.jar
-hadoop-mapreduce-client-common-3.1.0.jar
-hadoop-mapreduce-client-core-3.1.0.jar
-hadoop-mapreduce-client-jobclient-3.1.0.jar
-hadoop-yarn-api-3.1.0.jar
-hadoop-yarn-client-3.1.0.jar
-hadoop-yarn-common-3.1.0.jar
-hadoop-yarn-registry-3.1.0.jar
-hadoop-yarn-server-common-3.1.0.jar
-hadoop-yarn-server-web-proxy-3.1.0.jar
+hadoop-annotations-3.2.0.jar
+hadoop-auth-3.2.0.jar
+hadoop-client-3.2.0.jar
+hadoop-common-3.2.0.jar
+hadoop-hdfs-client-3.2.0.jar
+hadoop-mapreduce-client-common-3.2.0.jar
+hadoop-mapreduce-client-core-3.2.0.jar
+hadoop-mapreduce-client-jobclient-3.2.0.jar
+hadoop-yarn-api-3.2.0.jar
+hadoop-yarn-client-3.2.0.jar
+hadoop-yarn-common-3.2.0.jar
+hadoop-yarn-registry-3.2.0.jar
+hadoop-yarn-server-common-3.2.0.jar
+hadoop-yarn-server-web-proxy-3.2.0.jar
 hk2-api-2.4.0-b34.jar
 hk2-locator-2.4.0-b34.jar
 hk2-utils-2.4.0-b34.jar
@@ -88,8 +90,8 @@ jackson-core-2.9.8.jar
 jackson-core-asl-1.9.13.jar
 jackson-databind-2.9.8.jar
 jackson-dataformat-yaml-2.9.8.jar
-jackson-jaxrs-base-2.7.8.jar
-jackson-jaxrs-json-provider-2.7.8.jar
+jackson-jaxrs-base-2.9.5.jar
+jackson-jaxrs-json-provider-2.9.5.jar
 jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations-2.9.8.jar
 jackson-module-paranamer-2.9.8.jar
@@ -215,5 +217,5 @@ woodstox-core-5.0.3.jar
 xbean-asm7-shaded-4.12.jar
 xz-1.5.jar
 zjsonpatch-0.3.0.jar
-zookeeper-3.4.9.jar
+zookeeper-3.4.13.jar
 zstd-jni-1.3.2-2.jar
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index e01bcbef8f5a..fdc4f2b48685 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -173,8 +173,8 @@ def main():
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.6"
     if "test-hadoop2.7" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7"
-    if "test-hadoop3.1" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.1"
+    if "test-hadoop3.2" in ghprb_pull_title:
+        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2"
 
     build_display_name = os.environ["BUILD_DISPLAY_NAME"]
     build_url = os.environ["BUILD_URL"]
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 535c8775fd5b..dfad2991077b 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -280,7 +280,7 @@ def get_hadoop_profiles(hadoop_version):
 
     sbt_maven_hadoop_profiles = {
         "hadoop2.7": ["-Phadoop-2.7"],
-        "hadoop3.1": ["-Phadoop-3.1"],
+        "hadoop3.2": ["-Phadoop-3.2"],
     }
 
     if hadoop_version in sbt_maven_hadoop_profiles:
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 63e01e108560..54574f6097e2 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -33,7 +33,7 @@ HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pkubernetes -Pyarn -Phive"
 MVN="build/mvn"
 HADOOP_PROFILES=(
     hadoop-2.7
-    hadoop-3.1
+    hadoop-3.2
 )
 
 # We'll switch the version to a temp. one, publish POMs using that new version, then switch back to
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 2e5b04622cf1..68d3d04a9afa 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -199,7 +199,7 @@
      enables store-specific committers.
     -->
     <profile>
-      <id>hadoop-3.1</id>
+      <id>hadoop-3.2</id>
       <dependencies>
         <!--
         There's now a hadoop-cloud-storage which transitively pulls in the store JARs,
diff --git a/pom.xml b/pom.xml
index e68d470e55d6..5ccef71cde74 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2646,11 +2646,11 @@
     </profile>
 
     <profile>
-      <id>hadoop-3.1</id>
+      <id>hadoop-3.2</id>
       <properties>
-        <hadoop.version>3.1.0</hadoop.version>
-        <curator.version>2.12.0</curator.version>
-        <zookeeper.version>3.4.9</zookeeper.version>
+        <hadoop.version>3.2.0</hadoop.version>
+        <curator.version>2.13.0</curator.version>
+        <zookeeper.version>3.4.13</zookeeper.version>
       </properties>
     </profile>
 

From cad475dcc9376557f882859856286e858002389a Mon Sep 17 00:00:00 2001
From: Liupengcheng <liupengcheng@xiaomi.com>
Date: Sat, 16 Mar 2019 19:45:05 -0500
Subject: [PATCH 0699/1072] [SPARK-26941][YARN] Fix incorrect computation of
 maxNumExecutorFailures in ApplicationMaster for streaming

## What changes were proposed in this pull request?

Currently, when enabled streaming dynamic allocation for streaming applications, the maxNumExecutorFailures in ApplicationMaster is still computed with `spark.dynamicAllocation.maxExecutors`.

Actually, we should consider `spark.streaming.dynamicAllocation.maxExecutors` instead.

Related codes:
https://github.com/apache/spark/blob/f87153a3acbab9260db356a451ddae86adba41b2/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala#L101

## How was this patch tested?

NA

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #23845 from liupc/Fix-incorrect-maxNumExecutorFailures-for-streaming.

Lead-authored-by: Liupengcheng <liupengcheng@xiaomi.com>
Co-authored-by: liupengcheng <liupengcheng@xiaomi.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/internal/config/Streaming.scala     | 68 +++++++++++++++++++
 .../scala/org/apache/spark/util/Utils.scala   |  7 ++
 .../spark/deploy/yarn/ApplicationMaster.scala |  5 +-
 .../scheduler/ExecutorAllocationManager.scala | 66 ++++--------------
 .../ExecutorAllocationManagerSuite.scala      | 17 +++--
 5 files changed, 104 insertions(+), 59 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/internal/config/Streaming.scala

diff --git a/core/src/main/scala/org/apache/spark/internal/config/Streaming.scala b/core/src/main/scala/org/apache/spark/internal/config/Streaming.scala
new file mode 100644
index 000000000000..6e58c090e812
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/config/Streaming.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.config
+
+import java.util.concurrent.TimeUnit
+
+private[spark] object Streaming {
+
+  private[spark] val STREAMING_DYN_ALLOCATION_ENABLED =
+    ConfigBuilder("spark.streaming.dynamicAllocation.enabled")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val STREAMING_DYN_ALLOCATION_TESTING =
+    ConfigBuilder("spark.streaming.dynamicAllocation.testing")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val STREAMING_DYN_ALLOCATION_MIN_EXECUTORS =
+    ConfigBuilder("spark.streaming.dynamicAllocation.minExecutors")
+      .intConf
+      .checkValue(_ > 0, "The min executor number of streaming dynamic " +
+        "allocation must be positive.")
+      .createOptional
+
+  private[spark] val STREAMING_DYN_ALLOCATION_MAX_EXECUTORS =
+    ConfigBuilder("spark.streaming.dynamicAllocation.maxExecutors")
+      .intConf
+      .checkValue(_ > 0, "The max executor number of streaming dynamic " +
+        "allocation must be positive.")
+      .createWithDefault(Int.MaxValue)
+
+  private[spark] val STREAMING_DYN_ALLOCATION_SCALING_INTERVAL =
+    ConfigBuilder("spark.streaming.dynamicAllocation.scalingInterval")
+      .timeConf(TimeUnit.SECONDS)
+      .checkValue(_ > 0, "The scaling interval of streaming dynamic " +
+        "allocation must be positive.")
+      .createWithDefault(60)
+
+  private[spark] val STREAMING_DYN_ALLOCATION_SCALING_UP_RATIO =
+    ConfigBuilder("spark.streaming.dynamicAllocation.scalingUpRatio")
+      .doubleConf
+      .checkValue(_ > 0, "The scaling up ratio of streaming dynamic " +
+        "allocation must be positive.")
+      .createWithDefault(0.9)
+
+  private[spark] val STREAMING_DYN_ALLOCATION_SCALING_DOWN_RATIO =
+    ConfigBuilder("spark.streaming.dynamicAllocation.scalingDownRatio")
+      .doubleConf
+      .checkValue(_ > 0, "The scaling down ratio of streaming dynamic " +
+        "allocation must be positive.")
+      .createWithDefault(0.3)
+}
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 2d00453159d6..03986254f0c3 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -60,6 +60,7 @@ import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Streaming._
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.internal.config.Worker._
@@ -2490,6 +2491,12 @@ private[spark] object Utils extends Logging {
       (!isLocalMaster(conf) || conf.get(DYN_ALLOCATION_TESTING))
   }
 
+  def isStreamingDynamicAllocationEnabled(conf: SparkConf): Boolean = {
+    val streamingDynamicAllocationEnabled = conf.get(STREAMING_DYN_ALLOCATION_ENABLED)
+    streamingDynamicAllocationEnabled &&
+      (!isLocalMaster(conf) || conf.get(STREAMING_DYN_ALLOCATION_TESTING))
+  }
+
   /**
    * Return the initial number of executors for dynamic allocation.
    */
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 91e1fbd17ff5..9ed3b78986c0 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -45,6 +45,7 @@ import org.apache.spark.deploy.history.HistoryServer
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Streaming.STREAMING_DYN_ALLOCATION_MAX_EXECUTORS
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.rpc._
@@ -100,7 +101,9 @@ private[spark] class ApplicationMaster(
 
   private val maxNumExecutorFailures = {
     val effectiveNumExecutors =
-      if (Utils.isDynamicAllocationEnabled(sparkConf)) {
+      if (Utils.isStreamingDynamicAllocationEnabled(sparkConf)) {
+        sparkConf.get(STREAMING_DYN_ALLOCATION_MAX_EXECUTORS)
+      } else if (Utils.isDynamicAllocationEnabled(sparkConf)) {
         sparkConf.get(DYN_ALLOCATION_MAX_EXECUTORS)
       } else {
         sparkConf.get(EXECUTOR_INSTANCES).getOrElse(0)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
index 8717555dea49..e85a3b9009c3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
@@ -22,6 +22,7 @@ import scala.util.Random
 
 import org.apache.spark.{ExecutorAllocationClient, SparkConf}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Streaming._
 import org.apache.spark.streaming.util.RecurringTimer
 import org.apache.spark.util.{Clock, Utils}
 
@@ -55,17 +56,12 @@ private[streaming] class ExecutorAllocationManager(
     batchDurationMs: Long,
     clock: Clock) extends StreamingListener with Logging {
 
-  import ExecutorAllocationManager._
-
-  private val scalingIntervalSecs = conf.getTimeAsSeconds(
-    SCALING_INTERVAL_KEY,
-    s"${SCALING_INTERVAL_DEFAULT_SECS}s")
-  private val scalingUpRatio = conf.getDouble(SCALING_UP_RATIO_KEY, SCALING_UP_RATIO_DEFAULT)
-  private val scalingDownRatio = conf.getDouble(SCALING_DOWN_RATIO_KEY, SCALING_DOWN_RATIO_DEFAULT)
-  private val minNumExecutors = conf.getInt(
-    MIN_EXECUTORS_KEY,
-    math.max(1, receiverTracker.numReceivers))
-  private val maxNumExecutors = conf.getInt(MAX_EXECUTORS_KEY, Integer.MAX_VALUE)
+  private val scalingIntervalSecs = conf.get(STREAMING_DYN_ALLOCATION_SCALING_INTERVAL)
+  private val scalingUpRatio = conf.get(STREAMING_DYN_ALLOCATION_SCALING_UP_RATIO)
+  private val scalingDownRatio = conf.get(STREAMING_DYN_ALLOCATION_SCALING_DOWN_RATIO)
+  private val minNumExecutors = conf.get(STREAMING_DYN_ALLOCATION_MIN_EXECUTORS)
+    .getOrElse(math.max(1, receiverTracker.numReceivers()))
+  private val maxNumExecutors = conf.get(STREAMING_DYN_ALLOCATION_MAX_EXECUTORS)
   private val timer = new RecurringTimer(clock, scalingIntervalSecs * 1000,
     _ => manageAllocation(), "streaming-executor-allocation-manager")
 
@@ -150,34 +146,17 @@ private[streaming] class ExecutorAllocationManager(
   }
 
   private def validateSettings(): Unit = {
-    require(
-      scalingIntervalSecs > 0,
-      s"Config $SCALING_INTERVAL_KEY must be more than 0")
-
-    require(
-      scalingUpRatio > 0,
-      s"Config $SCALING_UP_RATIO_KEY must be more than 0")
-
-    require(
-      scalingDownRatio > 0,
-      s"Config $SCALING_DOWN_RATIO_KEY must be more than 0")
-
-    require(
-      minNumExecutors > 0,
-      s"Config $MIN_EXECUTORS_KEY must be more than 0")
-
-    require(
-      maxNumExecutors > 0,
-      s"$MAX_EXECUTORS_KEY must be more than 0")
-
     require(
       scalingUpRatio > scalingDownRatio,
-      s"Config $SCALING_UP_RATIO_KEY must be more than config $SCALING_DOWN_RATIO_KEY")
+      s"Config ${STREAMING_DYN_ALLOCATION_SCALING_UP_RATIO.key} must be more than config " +
+        s"${STREAMING_DYN_ALLOCATION_SCALING_DOWN_RATIO.key}")
 
-    if (conf.contains(MIN_EXECUTORS_KEY) && conf.contains(MAX_EXECUTORS_KEY)) {
+    if (conf.contains(STREAMING_DYN_ALLOCATION_MIN_EXECUTORS.key) &&
+      conf.contains(STREAMING_DYN_ALLOCATION_MAX_EXECUTORS.key)) {
       require(
         maxNumExecutors >= minNumExecutors,
-        s"Config $MAX_EXECUTORS_KEY must be more than config $MIN_EXECUTORS_KEY")
+        s"Config ${STREAMING_DYN_ALLOCATION_MAX_EXECUTORS.key} must be more than config " +
+          s"${STREAMING_DYN_ALLOCATION_MIN_EXECUTORS.key}")
     }
   }
 
@@ -190,23 +169,9 @@ private[streaming] class ExecutorAllocationManager(
 }
 
 private[streaming] object ExecutorAllocationManager extends Logging {
-  val ENABLED_KEY = "spark.streaming.dynamicAllocation.enabled"
-
-  val SCALING_INTERVAL_KEY = "spark.streaming.dynamicAllocation.scalingInterval"
-  val SCALING_INTERVAL_DEFAULT_SECS = 60
-
-  val SCALING_UP_RATIO_KEY = "spark.streaming.dynamicAllocation.scalingUpRatio"
-  val SCALING_UP_RATIO_DEFAULT = 0.9
-
-  val SCALING_DOWN_RATIO_KEY = "spark.streaming.dynamicAllocation.scalingDownRatio"
-  val SCALING_DOWN_RATIO_DEFAULT = 0.3
-
-  val MIN_EXECUTORS_KEY = "spark.streaming.dynamicAllocation.minExecutors"
-
-  val MAX_EXECUTORS_KEY = "spark.streaming.dynamicAllocation.maxExecutors"
 
   def isDynamicAllocationEnabled(conf: SparkConf): Boolean = {
-    val streamingDynamicAllocationEnabled = conf.getBoolean(ENABLED_KEY, false)
+    val streamingDynamicAllocationEnabled = Utils.isStreamingDynamicAllocationEnabled(conf)
     if (Utils.isDynamicAllocationEnabled(conf) && streamingDynamicAllocationEnabled) {
       throw new IllegalArgumentException(
         """
@@ -215,8 +180,7 @@ private[streaming] object ExecutorAllocationManager extends Logging {
           |false to use Dynamic Allocation in streaming.
         """.stripMargin)
     }
-    val testing = conf.getBoolean("spark.streaming.dynamicAllocation.testing", false)
-    streamingDynamicAllocationEnabled && (!Utils.isLocalMaster(conf) || testing)
+    streamingDynamicAllocationEnabled
   }
 
   def createIfEnabled(
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index fcbba006a816..22d027d9b818 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -26,6 +26,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.{DYN_ALLOCATION_ENABLED, DYN_ALLOCATION_TESTING}
+import org.apache.spark.internal.config.Streaming._
 import org.apache.spark.streaming.{DummyInputDStream, Seconds, StreamingContext}
 import org.apache.spark.util.{ManualClock, Utils}
 
@@ -33,8 +34,6 @@ import org.apache.spark.util.{ManualClock, Utils}
 class ExecutorAllocationManagerSuite extends SparkFunSuite
   with BeforeAndAfter with BeforeAndAfterAll with MockitoSugar with PrivateMethodTester {
 
-  import ExecutorAllocationManager._
-
   private val batchDurationMillis = 1000L
   private var allocationClient: ExecutorAllocationClient = null
   private var clock: StreamManualClock = null
@@ -58,7 +57,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
         reset(allocationClient)
         when(allocationClient.getExecutorIds()).thenReturn(Seq("1", "2"))
         addBatchProcTime(allocationManager, batchProcTimeMs.toLong)
-        val advancedTime = SCALING_INTERVAL_DEFAULT_SECS * 1000 + 1
+        val advancedTime = STREAMING_DYN_ALLOCATION_SCALING_INTERVAL.defaultValue.get * 1000 + 1
         val expectedWaitTime = clock.getTimeMillis() + advancedTime
         clock.advance(advancedTime)
         // Make sure ExecutorAllocationManager.manageAllocation is called
@@ -101,25 +100,29 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
       }
 
       // Batch proc time slightly more than the scale up ratio, should increase allocation by 1
-      addBatchProcTimeAndVerifyAllocation(batchDurationMillis * SCALING_UP_RATIO_DEFAULT + 1) {
+      addBatchProcTimeAndVerifyAllocation(
+        batchDurationMillis * STREAMING_DYN_ALLOCATION_SCALING_UP_RATIO.defaultValue.get + 1) {
         verifyTotalRequestedExecs(Some(3))
         verifyKilledExec(None)
       }
 
       // Batch proc time slightly less than the scale up ratio, should not change allocation
-      addBatchProcTimeAndVerifyAllocation(batchDurationMillis * SCALING_UP_RATIO_DEFAULT - 1) {
+      addBatchProcTimeAndVerifyAllocation(
+        batchDurationMillis * STREAMING_DYN_ALLOCATION_SCALING_UP_RATIO.defaultValue.get - 1) {
         verifyTotalRequestedExecs(None)
         verifyKilledExec(None)
       }
 
       // Batch proc time slightly more than the scale down ratio, should not change allocation
-      addBatchProcTimeAndVerifyAllocation(batchDurationMillis * SCALING_DOWN_RATIO_DEFAULT + 1) {
+      addBatchProcTimeAndVerifyAllocation(
+        batchDurationMillis * STREAMING_DYN_ALLOCATION_SCALING_DOWN_RATIO.defaultValue.get + 1) {
         verifyTotalRequestedExecs(None)
         verifyKilledExec(None)
       }
 
       // Batch proc time slightly more than the scale down ratio, should not change allocation
-      addBatchProcTimeAndVerifyAllocation(batchDurationMillis * SCALING_DOWN_RATIO_DEFAULT - 1) {
+      addBatchProcTimeAndVerifyAllocation(
+        batchDurationMillis * STREAMING_DYN_ALLOCATION_SCALING_DOWN_RATIO.defaultValue.get - 1) {
         verifyTotalRequestedExecs(None)
         verifyKilledExec(Some("2"))
       }

From d6a3cbea5dfd5e8b3761ecd75df1fbe4f163b3ad Mon Sep 17 00:00:00 2001
From: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Date: Sun, 17 Mar 2019 06:42:00 -0500
Subject: [PATCH 0700/1072] [MINOR][DOC] Add "completedStages" metircs for
 namespace=appStatus

## What changes were proposed in this pull request?

Add completedStages metircs for  namespace=appStatus for monitoring.md.

Closes #24109 from hehuiyuan/hehuiyuan-patch-5.

Authored-by: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/monitoring.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index 036a575bb861..3b187b6faec6 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -931,6 +931,7 @@ This is the component with the largest amount of instrumented metrics
    `spark.app.status.metrics.enabled=true` (default is false)
   - stages.failedStages.count
   - stages.skippedStages.count
+  - stages.completedStages.count
   - tasks.blackListedExecutors.count
   - tasks.completedTasks.count
   - tasks.failedTasks.count

From 4132c989dbdbb10ab3eb7fbe2b567a0c4b89b10f Mon Sep 17 00:00:00 2001
From: lichaoqun <li.chaoqun@zte.com.cn>
Date: Sun, 17 Mar 2019 06:43:14 -0500
Subject: [PATCH 0701/1072] [MINOR][CORE] spark.diskStore.subDirectories <= 0
 should throw Exception

## What changes were proposed in this pull request?
this pr add check this spark.diskStore.subDirectories > 0.This value need to be checked before it can be used.

## How was this patch tested?
N/A

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24024 from lcqzte10192193/wid-lcq-190308.

Authored-by: lichaoqun <li.chaoqun@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../main/scala/org/apache/spark/internal/config/package.scala    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 850d6845684b..2d664bb1aa1e 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -301,6 +301,7 @@ package object config {
       .doc("Number of subdirectories inside each path listed in spark.local.dir for " +
         "hashing Block files into.")
       .intConf
+      .checkValue(_ > 0, "The number of subdirectories must be positive.")
       .createWithDefault(64)
 
   private[spark] val BLOCK_FAILURES_BEFORE_LOCATION_REFRESH =

From c324e1da9d0519fc13220202136307d4547b7426 Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Sun, 17 Mar 2019 06:44:02 -0500
Subject: [PATCH 0702/1072] [SPARK-27122][CORE] Jetty classes must not be
 return via getters in org.apache.spark.ui.WebUI

## What changes were proposed in this pull request?

When we run YarnSchedulerBackendSuite, the class path seems to be made from the classes folder(resource-managers/yarn/target/scala-2.12/classes) instead of jar (resource-managers/yarn/target/spark-yarn_2.12-3.0.0-SNAPSHOT.jar) . ui.getHandlers is in spark-core and its loaded from spark-core.jar which is shaded and hence refers to org.spark_project.jetty.servlet.ServletContextHandler

Here in  org.apache.spark.scheduler.cluster.YarnSchedulerBackend, as its not shaded, it expects org.eclipse.jetty.servlet.ServletContextHandler
Refer discussion  https://issues.apache.org/jira/browse/SPARK-27122?focusedCommentId=16792318&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16792318

Hence as a fix, org.apache.spark.ui.WebUI must only return a wrapper class instance or references so that Jetty classes can be avoided in getters which are accessed outside spark-core

## How was this patch tested?

Existing UT can pass

Closes #24088 from ajithme/shadebug.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/ui/WebUI.scala     | 47 ++++++++++++++++++-
 .../cluster/YarnSchedulerBackend.scala        | 16 ++-----
 .../cluster/YarnSchedulerBackendSuite.scala   | 10 ++--
 3 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 81659426792c..54ae258ba565 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -17,13 +17,15 @@
 
 package org.apache.spark.ui
 
-import javax.servlet.http.HttpServletRequest
+import java.util.EnumSet
+import javax.servlet.DispatcherType
+import javax.servlet.http.{HttpServlet, HttpServletRequest}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 import scala.xml.Node
 
-import org.eclipse.jetty.servlet.ServletContextHandler
+import org.eclipse.jetty.servlet.{FilterHolder, FilterMapping, ServletContextHandler, ServletHolder}
 import org.json4s.JsonAST.{JNothing, JValue}
 
 import org.apache.spark.{SecurityManager, SparkConf, SSLOptions}
@@ -59,6 +61,10 @@ private[spark] abstract class WebUI(
   def getTabs: Seq[WebUITab] = tabs
   def getHandlers: Seq[ServletContextHandler] = handlers
 
+  def getDelegatingHandlers: Seq[DelegatingServletContextHandler] = {
+    handlers.map(new DelegatingServletContextHandler(_))
+  }
+
   /** Attaches a tab to this UI, along with all of its attached pages. */
   def attachTab(tab: WebUITab): Unit = {
     tab.pages.foreach(attachPage)
@@ -95,6 +101,14 @@ private[spark] abstract class WebUI(
     serverInfo.foreach(_.addHandler(handler, securityManager))
   }
 
+  /** Attaches a handler to this UI. */
+  def attachHandler(contextPath: String, httpServlet: HttpServlet, pathSpec: String): Unit = {
+    val ctx = new ServletContextHandler()
+    ctx.setContextPath(contextPath)
+    ctx.addServlet(new ServletHolder(httpServlet), pathSpec)
+    attachHandler(ctx)
+  }
+
   /** Detaches a handler from this UI. */
   def detachHandler(handler: ServletContextHandler): Unit = synchronized {
     handlers -= handler
@@ -193,3 +207,32 @@ private[spark] abstract class WebUIPage(var prefix: String) {
   def render(request: HttpServletRequest): Seq[Node]
   def renderJson(request: HttpServletRequest): JValue = JNothing
 }
+
+private[spark] class DelegatingServletContextHandler(handler: ServletContextHandler) {
+
+  def prependFilterMapping(
+      filterName: String,
+      spec: String,
+      types: EnumSet[DispatcherType]): Unit = {
+    val mapping = new FilterMapping()
+    mapping.setFilterName(filterName)
+    mapping.setPathSpec(spec)
+    mapping.setDispatcherTypes(types)
+    handler.getServletHandler.prependFilterMapping(mapping)
+  }
+
+  def addFilter(
+      filterName: String,
+      className: String,
+      filterParams: Map[String, String]): Unit = {
+    val filterHolder = new FilterHolder()
+    filterHolder.setName(filterName)
+    filterHolder.setClassName(className)
+    filterParams.foreach { case (k, v) => filterHolder.setInitParameter(k, v) }
+    handler.getServletHandler.addFilter(filterHolder)
+  }
+
+  def filterCount(): Int = {
+    handler.getServletHandler.getFilters.length
+  }
+}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index a8472b49ae27..dda8172fb636 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -180,19 +180,9 @@ private[spark] abstract class YarnSchedulerBackend(
           }
           conf.set(UI_FILTERS, allFilters)
 
-          ui.getHandlers.map(_.getServletHandler()).foreach { h =>
-            val holder = new FilterHolder()
-            holder.setName(filterName)
-            holder.setClassName(filterName)
-            filterParams.foreach { case (k, v) => holder.setInitParameter(k, v) }
-            h.addFilter(holder)
-
-            val mapping = new FilterMapping()
-            mapping.setFilterName(filterName)
-            mapping.setPathSpec("/*")
-            mapping.setDispatcherTypes(EnumSet.allOf(classOf[DispatcherType]))
-
-            h.prependFilterMapping(mapping)
+          ui.getDelegatingHandlers.foreach { h =>
+            h.addFilter(filterName, filterName, filterParams)
+            h.prependFilterMapping(filterName, "/*", EnumSet.allOf(classOf[DispatcherType]))
           }
         }
       }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 583694412322..70f86aaa72f6 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -101,9 +101,9 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
     yarnSchedulerBackend.addWebUIFilter(classOf[TestFilter2].getName(),
       Map("responseCode" -> HttpServletResponse.SC_NOT_ACCEPTABLE.toString), "")
 
-    sc.ui.get.getHandlers.foreach { h =>
+    sc.ui.get.getDelegatingHandlers.foreach { h =>
       // Two filters above + security filter.
-      assert(h.getServletHandler().getFilters().length === 3)
+      assert(h.filterCount() === 3)
     }
 
     // The filter should have been added first in the chain, so we should get SC_NOT_ACCEPTABLE
@@ -117,11 +117,7 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
       }
     }
 
-    val ctx = new ServletContextHandler()
-    ctx.setContextPath("/new-handler")
-    ctx.addServlet(new ServletHolder(servlet), "/")
-
-    sc.ui.get.attachHandler(ctx)
+    sc.ui.get.attachHandler("/new-handler", servlet, "/")
 
     val newUrl = new URL(sc.uiWebUrl.get + "/new-handler/")
     assert(TestUtils.httpResponseCode(newUrl) === HttpServletResponse.SC_NOT_ACCEPTABLE)

From 1bc481b77960cf69c2561722dc7ca419f1dc7967 Mon Sep 17 00:00:00 2001
From: fitermay <fiterman@gmail.com>
Date: Sun, 17 Mar 2019 11:47:14 -0500
Subject: [PATCH 0703/1072] [SPARK-27070] Improve performance of
 DefaultPartitionCoalescer

This time tested against Scala 2.11 as well

Closes #24116 from fitermay/master.

Authored-by: fitermay <fiterman@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../CoalescedRDDBenchmark-results.txt         | 40 ++++++++++
 .../org/apache/spark/rdd/CoalescedRDD.scala   | 35 ++++----
 .../spark/rdd/CoalescedRDDBenchmark.scala     | 80 +++++++++++++++++++
 3 files changed, 138 insertions(+), 17 deletions(-)
 create mode 100644 core/benchmarks/CoalescedRDDBenchmark-results.txt
 create mode 100644 core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala

diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
new file mode 100644
index 000000000000..dd63b0adea4f
--- /dev/null
+++ b/core/benchmarks/CoalescedRDDBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Coalesced RDD , large scale
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Windows 10 10.0
+Intel64 Family 6 Model 63 Stepping 2, GenuineIntel
+Coalesced RDD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Coalesce Num Partitions: 100 Num Hosts: 1            346            364          24          0.3        3458.9       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5            258            264           6          0.4        2579.0       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 10            242            249           7          0.4        2415.2       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 20            237            242           7          0.4        2371.7       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 40            230            231           1          0.4        2299.8       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 80            222            233          14          0.4        2223.0       1.6X
+Coalesce Num Partitions: 500 Num Hosts: 1            659            665           5          0.2        6590.4       0.5X
+Coalesce Num Partitions: 500 Num Hosts: 5            340            381          47          0.3        3395.2       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10            279            307          47          0.4        2788.3       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 20            259            261           2          0.4        2591.9       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 40            241            250          15          0.4        2406.5       1.4X
+Coalesce Num Partitions: 500 Num Hosts: 80            235            237           3          0.4        2349.9       1.5X
+Coalesce Num Partitions: 1000 Num Hosts: 1           1050           1053           4          0.1       10503.2       0.3X
+Coalesce Num Partitions: 1000 Num Hosts: 5            405            407           2          0.2        4049.5       0.9X
+Coalesce Num Partitions: 1000 Num Hosts: 10            320            322           2          0.3        3202.7       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 20            276            277           0          0.4        2762.3       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 40            257            260           5          0.4        2571.2       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 80            245            252          13          0.4        2448.9       1.4X
+Coalesce Num Partitions: 5000 Num Hosts: 1           3099           3145          55          0.0       30988.6       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5           1037           1050          20          0.1       10374.4       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 10            626            633           8          0.2        6261.8       0.6X
+Coalesce Num Partitions: 5000 Num Hosts: 20            426            431           5          0.2        4258.6       0.8X
+Coalesce Num Partitions: 5000 Num Hosts: 40            328            341          22          0.3        3275.4       1.1X
+Coalesce Num Partitions: 5000 Num Hosts: 80            272            275           4          0.4        2721.4       1.3X
+Coalesce Num Partitions: 10000 Num Hosts: 1           5516           5526           9          0.0       55156.8       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 5           1956           1992          48          0.1       19560.9       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1045           1057          18          0.1       10447.4       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 20            637            658          24          0.2        6373.2       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 40            431            448          15          0.2        4312.9       0.8X
+Coalesce Num Partitions: 10000 Num Hosts: 80            326            328           2          0.3        3263.4       1.1X
+
+
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index 94e7d0b38cba..44bc40b5babb 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -58,7 +58,7 @@ private[spark] case class CoalescedRDDPartition(
       val parentPreferredLocations = rdd.context.getPreferredLocs(rdd, p.index).map(_.host)
       preferredLocation.exists(parentPreferredLocations.contains)
     }
-    if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble)
+    if (parents.isEmpty) 0.0 else loc.toDouble / parents.size.toDouble
   }
 }
 
@@ -91,7 +91,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
     pc.coalesce(maxPartitions, prev).zipWithIndex.map {
       case (pg, i) =>
         val ids = pg.partitions.map(_.index).toArray
-        new CoalescedRDDPartition(i, prev, ids, pg.prefLoc)
+        CoalescedRDDPartition(i, prev, ids, pg.prefLoc)
     }
   }
 
@@ -116,7 +116,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
   /**
    * Returns the preferred machine for the partition. If split is of type CoalescedRDDPartition,
    * then the preferred machine will be one which most parent splits prefer too.
-   * @param partition
+   * @param partition the partition for which to retrieve the preferred machine, if exists
    * @return the machine most preferred by split
    */
   override def getPreferredLocations(partition: Partition): Seq[String] = {
@@ -156,9 +156,12 @@ private[spark] class CoalescedRDD[T: ClassTag](
 
 private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
   extends PartitionCoalescer {
-  def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.numPartitions < o2.numPartitions
-  def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean =
-    if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get)
+
+  implicit object partitionGroupOrdering extends Ordering[PartitionGroup] {
+    override def compare(o1: PartitionGroup, o2: PartitionGroup): Int =
+      java.lang.Integer.compare(o1.numPartitions, o2.numPartitions)
+  }
+
 
   val rnd = new scala.util.Random(7919) // keep this class deterministic
 
@@ -178,7 +181,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
     prev.context.getPreferredLocs(prev, part.index).map(tl => tl.host)
   }
 
-  class PartitionLocations(prev: RDD[_]) {
+  private class PartitionLocations(prev: RDD[_]) {
 
     // contains all the partitions from the previous RDD that don't have preferred locations
     val partsWithoutLocs = ArrayBuffer[Partition]()
@@ -213,15 +216,14 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
   }
 
   /**
-   * Sorts and gets the least element of the list associated with key in groupHash
+   * Gets the least element of the list associated with key in groupHash
    * The returned PartitionGroup is the least loaded of all groups that represent the machine "key"
    *
    * @param key string representing a partitioned group on preferred machine key
    * @return Option of [[PartitionGroup]] that has least elements for key
    */
-  def getLeastGroupHash(key: String): Option[PartitionGroup] = {
-    groupHash.get(key).map(_.sortWith(compare).head)
-  }
+  def getLeastGroupHash(key: String): Option[PartitionGroup] =
+    groupHash.get(key).filter(_.nonEmpty).map(_.min)
 
   def addPartToPGroup(part: Partition, pgroup: PartitionGroup): Boolean = {
     if (!initialHash.contains(part)) {
@@ -236,12 +238,12 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
    * is assigned a preferredLocation. This uses coupon collector to estimate how many
    * preferredLocations it must rotate through until it has seen most of the preferred
    * locations (2 * n log(n))
-   * @param targetLen
+   * @param targetLen The number of desired partition groups
    */
   def setupGroups(targetLen: Int, partitionLocs: PartitionLocations) {
     // deal with empty case, just create targetLen partition groups with no preferred location
     if (partitionLocs.partsWithLocs.isEmpty) {
-      (1 to targetLen).foreach(x => groupArr += new PartitionGroup())
+      (1 to targetLen).foreach(_ => groupArr += new PartitionGroup())
       return
     }
 
@@ -297,9 +299,8 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
       partitionLocs: PartitionLocations): PartitionGroup = {
     val slack = (balanceSlack * prev.partitions.length).toInt
     // least loaded pref locs
-    val pref = currPrefLocs(p, prev).map(getLeastGroupHash(_)).sortWith(compare)
-    val prefPart = if (pref == Nil) None else pref.head
-
+    val pref = currPrefLocs(p, prev).flatMap(getLeastGroupHash)
+    val prefPart = if (pref.isEmpty) None else Some(pref.min)
     val r1 = rnd.nextInt(groupArr.size)
     val r2 = rnd.nextInt(groupArr.size)
     val minPowerOfTwo = {
@@ -351,7 +352,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
       val partIter = partitionLocs.partsWithLocs.iterator
       groupArr.filter(pg => pg.numPartitions == 0).foreach { pg =>
         while (partIter.hasNext && pg.numPartitions == 0) {
-          var (nxt_replica, nxt_part) = partIter.next()
+          var (_, nxt_part) = partIter.next()
           if (!initialHash.contains(nxt_part)) {
             pg.partitions += nxt_part
             initialHash += nxt_part
diff --git a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
new file mode 100644
index 000000000000..42b30707f262
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import scala.collection.immutable
+
+import org.apache.spark.SparkContext
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+/**
+ * Benchmark for CoalescedRDD.
+ * Measures rdd.coalesce performance under various combinations of
+ * coalesced partitions and preferred hosts
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>
+ *   2. build/sbt "core/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      Results will be written to "benchmarks/CoalescedRDD-results.txt".
+ * }}}
+ * */
+object CoalescedRDDBenchmark extends BenchmarkBase {
+  val seed = 0x1337
+  val sc = new SparkContext(master = "local[4]", appName = "test")
+
+  private def coalescedRDD(numIters: Int): Unit = {
+    val numBlocks = 100000
+    val benchmark = new Benchmark("Coalesced RDD", numBlocks, output = output)
+    for (numPartitions <- Seq(100, 500, 1000, 5000, 10000)) {
+      for (numHosts <- Seq(1, 5, 10, 20, 40, 80)) {
+
+        import collection.mutable
+        val hosts = mutable.ArrayBuffer[String]()
+        (1 to numHosts).foreach(hosts += "m" + _)
+        hosts.length
+        val rnd = scala.util.Random
+        rnd.setSeed(seed)
+        val blocks: immutable.Seq[(Int, Seq[String])] = (1 to numBlocks).map { i =>
+          (i, hosts(rnd.nextInt(hosts.size)) :: Nil)
+        }
+
+        benchmark.addCase(
+          s"Coalesce Num Partitions: $numPartitions Num Hosts: $numHosts",
+          numIters) { _ =>
+          performCoalesce(blocks, numPartitions)
+        }
+      }
+    }
+
+    benchmark.run()
+  }
+
+  private def performCoalesce(blocks: immutable.Seq[(Int, Seq[String])], numPartitions: Int) {
+    sc.makeRDD(blocks).coalesce(numPartitions).partitions
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val numIters = 3
+    runBenchmark("Coalesced RDD , large scale") {
+      coalescedRDD(numIters)
+    }
+  }
+}

From fc88d3df5c53e0ade04af8ad9006ed543950c467 Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Sun, 17 Mar 2019 12:56:41 -0500
Subject: [PATCH 0704/1072] [SPARK-27164][CORE] RDD.countApprox on empty RDDs
 schedules jobs which never complete

## What changes were proposed in this pull request?

When Result stage has zero tasks, the Job End event is never fired, hence the Job is always running in UI. Example: sc.emptyRDD[Int].countApprox(1000) never finishes even it has no tasks to launch

## How was this patch tested?

Added UT

Closes #24100 from ajithme/emptyRDD.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/scheduler/DAGScheduler.scala   | 11 +++++++++--
 .../apache/spark/scheduler/DAGSchedulerSuite.scala  | 13 +++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index dd1b2595461f..9177c1b56a47 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -767,10 +767,17 @@ private[spark] class DAGScheduler(
       callSite: CallSite,
       timeout: Long,
       properties: Properties): PartialResult[R] = {
-    val listener = new ApproximateActionListener(rdd, func, evaluator, timeout)
-    val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     val partitions = (0 until rdd.partitions.length).toArray
     val jobId = nextJobId.getAndIncrement()
+    if (partitions.isEmpty) {
+      // Return immediately if the job is running 0 tasks
+      val time = clock.getTimeMillis()
+      listenerBus.post(SparkListenerJobStart(jobId, time, Seq[StageInfo](), properties))
+      listenerBus.post(SparkListenerJobEnd(jobId, time, JobSucceeded))
+      return new PartialResult(evaluator.currentResult(), true)
+    }
+    val listener = new ApproximateActionListener(rdd, func, evaluator, timeout)
+    val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     eventProcessLoop.post(JobSubmitted(
       jobId, rdd, func2, partitions, callSite, listener, SerializationUtils.clone(properties)))
     listener.awaitResult()    // Will throw an exception if the job fails
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index e17d264cced9..e74f4627db9b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.scheduler
 
 import java.util.Properties
+import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
 
 import scala.annotation.meta.param
@@ -2849,6 +2850,18 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     }
   }
 
+  test("SPARK-27164: RDD.countApprox on empty RDDs schedules jobs which never complete") {
+    val latch = new CountDownLatch(1)
+    val jobListener = new SparkListener {
+      override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
+        latch.countDown()
+      }
+    }
+    sc.addSparkListener(jobListener)
+    sc.emptyRDD[Int].countApprox(10000).getFinalValue()
+    assert(latch.await(10, TimeUnit.SECONDS))
+  }
+
   /**
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.

From f9180f8752b7cddec463faf82cdb09af707ae402 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20S=C3=A1=20de=20Mello?= <amello@palantir.com>
Date: Sun, 17 Mar 2019 12:58:16 -0500
Subject: [PATCH 0705/1072] [SPARK-26979][PYTHON] Add missing string column
 name support for some SQL functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Most SQL functions defined in `spark.sql.functions` have two calling patterns, one with a Column object as input, and another with a string representing a column name, which is then converted into a Column object internally.

There are, however, a few notable exceptions:

- lower()
- upper()
- abs()
- bitwiseNOT()
- ltrim()
- rtrim()
- trim()
- ascii()
- base64()
- unbase64()

While this doesn't break anything, as you can easily create a Column object yourself prior to passing it to one of these functions, it has two undesirable consequences:

1. It is surprising - it breaks coder's expectations when they are first starting with Spark. Every API should be as consistent as possible, so as to make the learning curve smoother and to reduce causes for human error;

2. It gets in the way of stylistic conventions. Most of the time it makes Python code more readable to use literal names, and the API provides ample support for that, but these few exceptions prevent this pattern from being universally applicable.

This patch is meant to fix the aforementioned problem.

### Effect

This patch **enables** support for passing column names as input to those functions mentioned above.

### Side effects

This PR also **fixes** an issue with some functions being defined multiple times by using `_create_function()`.

### How it works

`_create_function()` was redefined to always convert the argument to a Column object. The old implementation has been kept under `_create_name_function()`, and is still being used to generate the following special functions:

- lit()
- col()
- column()
- asc()
- desc()
- asc_nulls_first()
- asc_nulls_last()
- desc_nulls_first()
- desc_nulls_last()

This is because these functions can only take a column name as their argument. This is not a problem, as their semantics require so.

## How was this patch tested?

Ran ./dev/run-tests and tested it manually.

Closes #23882 from asmello/col-name-support-pyspark.

Authored-by: André Sá de Mello <amello@palantir.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 python/pyspark/sql/functions.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index a36423e67d75..3ee485cbd95d 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -37,8 +37,8 @@
 from pyspark.sql.udf import UserDefinedFunction, _create_udf
 
 
-def _create_function(name, doc=""):
-    """ Create a function for aggregator by name"""
+def _create_name_function(name, doc=""):
+    """ Create a function that takes a column name argument, by name"""
     def _(col):
         sc = SparkContext._active_spark_context
         jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col)
@@ -48,6 +48,17 @@ def _(col):
     return _
 
 
+def _create_function(name, doc=""):
+    """ Create a function that takes a Column object, by name"""
+    def _(col):
+        sc = SparkContext._active_spark_context
+        jc = getattr(sc._jvm.functions, name)(_to_java_column(col))
+        return Column(jc)
+    _.__name__ = name
+    _.__doc__ = doc
+    return _
+
+
 def _wrap_deprecated_function(func, message):
     """ Wrap the deprecated function to print out deprecation warnings"""
     def _(col):
@@ -85,13 +96,16 @@ def _():
     >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1)
     [Row(height=5, spark_user=True)]
     """
-_functions = {
+_name_functions = {
+    # name functions take a column name as their argument
     'lit': _lit_doc,
     'col': 'Returns a :class:`Column` based on the given column name.',
     'column': 'Returns a :class:`Column` based on the given column name.',
     'asc': 'Returns a sort expression based on the ascending order of the given column name.',
     'desc': 'Returns a sort expression based on the descending order of the given column name.',
+}
 
+_functions = {
     'upper': 'Converts a string expression to upper case.',
     'lower': 'Converts a string expression to upper case.',
     'sqrt': 'Computes the square root of the specified float value.',
@@ -141,7 +155,7 @@ def _():
     'bitwiseNOT': 'Computes bitwise not.',
 }
 
-_functions_2_4 = {
+_name_functions_2_4 = {
     'asc_nulls_first': 'Returns a sort expression based on the ascending order of the given' +
                        ' column name, and null values return before non-null values.',
     'asc_nulls_last': 'Returns a sort expression based on the ascending order of the given' +
@@ -254,6 +268,8 @@ def _():
 _functions_deprecated = {
 }
 
+for _name, _doc in _name_functions.items():
+    globals()[_name] = since(1.3)(_create_name_function(_name, _doc))
 for _name, _doc in _functions.items():
     globals()[_name] = since(1.3)(_create_function(_name, _doc))
 for _name, _doc in _functions_1_4.items():
@@ -268,8 +284,8 @@ def _():
     globals()[_name] = since(2.1)(_create_function(_name, _doc))
 for _name, _message in _functions_deprecated.items():
     globals()[_name] = _wrap_deprecated_function(globals()[_name], _message)
-for _name, _doc in _functions_2_4.items():
-    globals()[_name] = since(2.4)(_create_function(_name, _doc))
+for _name, _doc in _name_functions_2_4.items():
+    globals()[_name] = since(2.4)(_create_name_function(_name, _doc))
 del _name, _doc
 
 
@@ -1437,10 +1453,6 @@ def hash(*cols):
     'ascii': 'Computes the numeric value of the first character of the string column.',
     'base64': 'Computes the BASE64 encoding of a binary column and returns it as a string column.',
     'unbase64': 'Decodes a BASE64 encoded string column and returns it as a binary column.',
-    'initcap': 'Returns a new string column by converting the first letter of each word to ' +
-               'uppercase. Words are delimited by whitespace.',
-    'lower': 'Converts a string column to lower case.',
-    'upper': 'Converts a string column to upper case.',
     'ltrim': 'Trim the spaces from left end for the specified string value.',
     'rtrim': 'Trim the spaces from right end for the specified string value.',
     'trim': 'Trim the spaces from both ends for the specified string column.',

From dbcb4792f2396a31ab620210c6a8177c3b5db10a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 18 Mar 2019 15:19:52 +0900
Subject: [PATCH 0706/1072] [SPARK-27161][SQL] improve the document of SQL
 keywords

## What changes were proposed in this pull request?

Make it more clear about how Spark categories keywords regarding to the config `spark.sql.parser.ansi.enabled`

## How was this patch tested?

existing tests

Closes #24093 from cloud-fan/parser.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 ...n-reserved-keywords.md => sql-keywords.md} | 48 ++++++------
 .../spark/sql/catalyst/parser/SqlBase.g4      | 75 ++++++++++++-------
 2 files changed, 74 insertions(+), 49 deletions(-)
 rename docs/{sql-reserved-and-non-reserved-keywords.md => sql-keywords.md} (95%)

diff --git a/docs/sql-reserved-and-non-reserved-keywords.md b/docs/sql-keywords.md
similarity index 95%
rename from docs/sql-reserved-and-non-reserved-keywords.md
rename to docs/sql-keywords.md
index b1561fb593f3..5ba3ad8bd8cc 100644
--- a/docs/sql-reserved-and-non-reserved-keywords.md
+++ b/docs/sql-keywords.md
@@ -1,16 +1,20 @@
 ---
 layout: global
-title: SQL Reserved/Non-Reserved Keywords
-displayTitle: SQL Reserved/Non-Reserved Keywords
+title: Spark SQL Keywords
+displayTitle: Spark SQL Keywords
 ---
 
-In Spark SQL, there are 2 kinds of keywords: non-reserved and reserved. Non-reserved keywords have a
-special meaning only in particular contexts and can be used as identifiers (e.g., table names, view names,
-column names, column aliases, table aliases) in other contexts. Reserved keywords can't be used as
-table alias, but can be used as other identifiers.
+When `spark.sql.parser.ansi.enabled` is true, Spark SQL has two kinds of keywords:
+* Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc.
+* Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK can be used as identifiers in other places.
 
-The list of reserved and non-reserved keywords can change according to the config
-`spark.sql.parser.ansi.enabled`, which is false by default.
+When `spark.sql.parser.ansi.enabled` is false, Spark SQL has two kinds of keywords:
+* Non-reserved keywords: Same definition as the one when `spark.sql.parser.ansi.enabled=true`.
+* Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias.
+
+By default `spark.sql.parser.ansi.enabled` is false.
+
+Below is a list of all the keywords in Spark SQL.
 
 <table class="table">
   <tr><th rowspan="2" style="vertical-align: middle;"><b>Keyword</b></th><th colspan="2"><b>Spark SQL</b></th><th rowspan="2" style="vertical-align: middle;"><b>SQL-2011</b></th></tr>
@@ -26,7 +30,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>ALTER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ANALYZE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>AND</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ANTI</td><td>reserved</td><td>reserved</td><td>non-reserved</td></tr>
+  <tr><td>ANTI</td><td>reserved</td><td>strict-non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ANY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ARE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ARCHIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -116,7 +120,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>COVAR_POP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>COVAR_SAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CREATE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CROSS</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>CROSS</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>CUBE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CUME_DIST</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CURRENT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -185,7 +189,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>ESCAPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ESCAPED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>EVERY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>EXCEPT</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>EXCEPT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>EXCEPTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>EXCHANGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>EXEC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -215,7 +219,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>FRAME_ROW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FREE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FROM</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>FULL</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>FULL</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>FUNCTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FUNCTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>FUSION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -244,7 +248,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>INDEXES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>INITIAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>INITIALLY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>INNER</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>INNER</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>INOUT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INPATH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>INPUT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -253,7 +257,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>INSERT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INTEGER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>INTERSECT</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>INTERSECT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>INTERSECTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INTERVAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INTO</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -261,7 +265,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>ISOLATION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ITEMS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ITERATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>JOIN</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>JOIN</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>JSON_ARRAY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>JSON_ARRAYAGG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>JSON_EXISTS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -283,7 +287,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>LEAD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LEADING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LEAVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>LEFT</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>LEFT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>LEVEL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LIKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LIKE_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -332,7 +336,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>MULTISET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NAMES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>NATIONAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NATURAL</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>NATURAL</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>NCHAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NCLOB</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NEW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -354,7 +358,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>OFFSET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>OLD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>OMIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ON</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>ON</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>ONE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ONLY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>OPEN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -440,7 +444,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>RETURN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>RETURNS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>REVOKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>RIGHT</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>RIGHT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>RLIKE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ROLE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ROLES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -461,7 +465,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>SECTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SEEK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SELECT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SEMI</td><td>reserved</td><td>reserved</td><td>non-reserved</td></tr>
+  <tr><td>SEMI</td><td>reserved</td><td>strict-non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SENSITIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SEPARATED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SERDE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -545,7 +549,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>UNCACHE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>UNDER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>UNDO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>UNION</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>UNION</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>UNIQUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>UNKNOWN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>UNLOCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -557,7 +561,7 @@ The list of reserved and non-reserved keywords can change according to the confi
   <tr><td>USAGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>USE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>USING</td><td>reserved</td><td>reserved</td><td>reserved</td></tr>
+  <tr><td>USING</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>VALUES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>VALUE_OF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index be36aaa538b4..4d02d6256f1e 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -758,7 +758,7 @@ qualifiedName
 
 identifier
     : strictIdentifier
-    | {!ansi}? defaultReserved
+    | {!ansi}? strictNonReserved
     ;
 
 strictIdentifier
@@ -782,7 +782,16 @@ number
     | MINUS? BIGDECIMAL_LITERAL       #bigDecimalLiteral
     ;
 
-// The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` is true.
+// When `spark.sql.parser.ansi.enabled=true`, there are 2 kinds of keywords in Spark SQL.
+// - Reserved keywords:
+//     Keywords that are reserved and can't be used as identifiers for table, view, column,
+//     function, alias, etc.
+// - Non-reserved keywords:
+//     Keywords that have a special meaning only in particular contexts and can be used as
+//     identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK
+//     can be used as identifiers in other places.
+// You can find the full keywords list by searching "Start of the keywords list" in this file.
+// The non-reserved keywords are listed below. Keywords not in this list are reserved keywords.
 ansiNonReserved
     : ADD
     | AFTER
@@ -961,7 +970,16 @@ ansiNonReserved
     | YEARS
     ;
 
-defaultReserved
+// When `spark.sql.parser.ansi.enabled=false`, there are 2 kinds of keywords in Spark SQL.
+// - Non-reserved keywords:
+//     Same definition as the one when `spark.sql.parser.ansi.enabled=true`.
+// - Strict-non-reserved keywords:
+//     A strict version of non-reserved keywords, which can not be used as table alias.
+// You can find the full keywords list by searching "Start of the keywords list" in this file.
+// The strict-non-reserved keywords are listed in `strictNonReserved`.
+// The non-reserved keywords are listed in `nonReserved`.
+// These 2 together contain all the keywords.
+strictNonReserved
     : ANTI
     | CROSS
     | EXCEPT
@@ -1215,6 +1233,9 @@ nonReserved
     | YEARS
     ;
 
+//============================
+// Start of the keywords list
+//============================
 SELECT: 'SELECT';
 FROM: 'FROM';
 ADD: 'ADD';
@@ -1350,37 +1371,13 @@ IGNORE: 'IGNORE';
 BOTH: 'BOTH';
 LEADING: 'LEADING';
 TRAILING: 'TRAILING';
-
 IF: 'IF';
 POSITION: 'POSITION';
 EXTRACT: 'EXTRACT';
-
-EQ  : '=' | '==';
-NSEQ: '<=>';
-NEQ : '<>';
-NEQJ: '!=';
-LT  : '<';
-LTE : '<=' | '!>';
-GT  : '>';
-GTE : '>=' | '!<';
-
-PLUS: '+';
-MINUS: '-';
-ASTERISK: '*';
-SLASH: '/';
-PERCENT: '%';
-DIV: 'DIV';
-TILDE: '~';
-AMPERSAND: '&';
-PIPE: '|';
-CONCAT_PIPE: '||';
-HAT: '^';
-
 PERCENTLIT: 'PERCENT';
 BUCKET: 'BUCKET';
 OUT: 'OUT';
 OF: 'OF';
-
 SORT: 'SORT';
 CLUSTER: 'CLUSTER';
 DISTRIBUTE: 'DISTRIBUTE';
@@ -1487,6 +1484,30 @@ SESSION_USER: 'SESSION_USER';
 SOME: 'SOME';
 UNIQUE: 'UNIQUE';
 USER: 'USER';
+//============================
+// End of the keywords list
+//============================
+
+EQ  : '=' | '==';
+NSEQ: '<=>';
+NEQ : '<>';
+NEQJ: '!=';
+LT  : '<';
+LTE : '<=' | '!>';
+GT  : '>';
+GTE : '>=' | '!<';
+
+PLUS: '+';
+MINUS: '-';
+ASTERISK: '*';
+SLASH: '/';
+PERCENT: '%';
+DIV: 'DIV';
+TILDE: '~';
+AMPERSAND: '&';
+PIPE: '|';
+CONCAT_PIPE: '||';
+HAT: '^';
 
 STRING
     : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''

From e348f14259d3e8699319e7c2fe220902de255f44 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Mon, 18 Mar 2019 18:25:11 +0800
Subject: [PATCH 0707/1072] [SPARK-26811][SQL] Add capabilities to v2.Table

## What changes were proposed in this pull request?

This adds a new method, `capabilities` to `v2.Table` that returns a set of `TableCapability`. Capabilities are used to fail queries during analysis checks, `V2WriteSupportCheck`, when the table does not support operations, like truncation.

## How was this patch tested?

Existing tests for regressions, added new analysis suite, `V2WriteSupportCheckSuite`, for new capability checks.

Closes #24012 from rdblue/SPARK-26811-add-capabilities.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/kafka010/KafkaSourceProvider.scala    |   4 +-
 .../sql/sources/v2/SupportsBatchRead.java     |  34 ----
 .../sql/sources/v2/SupportsBatchWrite.java    |  33 ----
 .../spark/sql/sources/v2/SupportsRead.java    |   2 +-
 .../spark/sql/sources/v2/SupportsWrite.java   |   2 +-
 .../apache/spark/sql/sources/v2/Table.java    |  14 +-
 .../spark/sql/sources/v2/TableCapability.java |  69 ++++++++
 .../spark/sql/sources/v2/reader/Scan.java     |   7 +-
 .../sql/sources/v2/writer/WriteBuilder.java   |   4 +-
 .../apache/spark/sql/DataFrameReader.scala    |   6 +-
 .../apache/spark/sql/DataFrameWriter.scala    |   6 +-
 .../datasources/noop/NoopDataSource.scala     |   7 +-
 .../v2/DataSourceV2Implicits.scala            |  18 ++-
 .../datasources/v2/DataSourceV2Relation.scala |   2 +-
 .../datasources/v2/DataSourceV2Strategy.scala |   6 +-
 .../execution/datasources/v2/FileTable.scala  |  11 +-
 .../datasources/v2/V2WriteSupportCheck.scala  |  56 +++++++
 .../v2/WriteToDataSourceV2Exec.scala          |  12 +-
 .../sql/execution/streaming/console.scala     |   5 +
 .../sql/execution/streaming/memory.scala      |   4 +
 .../sources/ForeachWriterTable.scala          |   7 +-
 .../sources/RateStreamProvider.scala          |   5 +
 .../sources/TextSocketSourceProvider.scala    |   5 +-
 .../streaming/sources/memoryV2.scala          |   6 +-
 .../internal/BaseSessionStateBuilder.scala    |   2 +
 .../sql/sources/v2/JavaSimpleBatchTable.java  |  17 +-
 .../sql/sources/v2/DataSourceV2Suite.scala    |   8 +-
 .../v2/FileDataSourceV2FallBackSuite.scala    |  12 +-
 .../sources/v2/SimpleWritableDataSource.scala |   7 +-
 .../sources/v2/V2WriteSupportCheckSuite.scala | 149 ++++++++++++++++++
 .../sources/StreamingDataSourceV2Suite.scala  |   8 +
 .../sql/hive/HiveSessionStateBuilder.scala    |   2 +
 32 files changed, 416 insertions(+), 114 deletions(-)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2WriteSupportCheck.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V2WriteSupportCheckSuite.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 8496cbda261b..a8eff6be7ddd 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
-import java.util.{Locale, UUID}
+import java.util.{Collections, Locale, UUID}
 
 import scala.collection.JavaConverters._
 
@@ -359,6 +359,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
 
     override def schema(): StructType = KafkaOffsetReader.kafkaSchema
 
+    override def capabilities(): ju.Set[TableCapability] = Collections.emptySet()
+
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
       override def build(): Scan = new KafkaScan(options)
     }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
deleted file mode 100644
index ea7c5d2b108f..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchRead.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
-import org.apache.spark.sql.util.CaseInsensitiveStringMap;
-
-/**
- * An empty mix-in interface for {@link Table}, to indicate this table supports batch scan.
- * <p>
- * If a {@link Table} implements this interface, the
- * {@link SupportsRead#newScanBuilder(CaseInsensitiveStringMap)} must return a {@link ScanBuilder}
- * that builds {@link Scan} with {@link Scan#toBatch()} implemented.
- * </p>
- */
-@Evolving
-public interface SupportsBatchRead extends SupportsRead { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
deleted file mode 100644
index 09e23f84fd6b..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsBatchWrite.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
-import org.apache.spark.sql.util.CaseInsensitiveStringMap;
-
-/**
- * An empty mix-in interface for {@link Table}, to indicate this table supports batch write.
- * <p>
- * If a {@link Table} implements this interface, the
- * {@link SupportsWrite#newWriteBuilder(CaseInsensitiveStringMap)} must return a
- * {@link WriteBuilder} with {@link WriteBuilder#buildForBatch()} implemented.
- * </p>
- */
-@Evolving
-public interface SupportsBatchWrite extends SupportsWrite {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
index 14990effeda3..67fc72e070dc 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
@@ -26,7 +26,7 @@
  * {@link #newScanBuilder(CaseInsensitiveStringMap)} that is used to create a scan for batch,
  * micro-batch, or continuous processing.
  */
-interface SupportsRead extends Table {
+public interface SupportsRead extends Table {
 
   /**
    * Returns a {@link ScanBuilder} which can be used to build a {@link Scan}. Spark will call this
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
index f0d8e44f1528..b21596386821 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
@@ -26,7 +26,7 @@
  * {@link #newWriteBuilder(CaseInsensitiveStringMap)} that is used to create a write
  * for batch or streaming.
  */
-interface SupportsWrite extends Table {
+public interface SupportsWrite extends Table {
 
   /**
    * Returns a {@link WriteBuilder} which can be used to create {@link BatchWrite}. Spark will call
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
index 08664859b8de..78f979a2a9a4 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
@@ -20,16 +20,15 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.types.StructType;
 
+import java.util.Set;
+
 /**
  * An interface representing a logical structured data set of a data source. For example, the
  * implementation can be a directory on the file system, a topic of Kafka, or a table in the
  * catalog, etc.
  * <p>
- * This interface can mixin the following interfaces to support different operations:
- * </p>
- * <ul>
- *   <li>{@link SupportsBatchRead}: this table can be read in batch queries.</li>
- * </ul>
+ * This interface can mixin the following interfaces to support different operations, like
+ * {@code SupportsRead}.
  */
 @Evolving
 public interface Table {
@@ -45,4 +44,9 @@ public interface Table {
    * empty schema can be returned here.
    */
   StructType schema();
+
+  /**
+   * Returns the set of capabilities for this table.
+   */
+  Set<TableCapability> capabilities();
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
new file mode 100644
index 000000000000..8d3fdcd694e2
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Capabilities that can be provided by a {@link Table} implementation.
+ * <p>
+ * Tables use {@link Table#capabilities()} to return a set of capabilities. Each capability signals
+ * to Spark that the table supports a feature identified by the capability. For example, returning
+ * {@code BATCH_READ} allows Spark to read from the table using a batch scan.
+ */
+@Experimental
+public enum TableCapability {
+  /**
+   * Signals that the table supports reads in batch execution mode.
+   */
+  BATCH_READ,
+
+  /**
+   * Signals that the table supports append writes in batch execution mode.
+   * <p>
+   * Tables that return this capability must support appending data and may also support additional
+   * write modes, like {@link #TRUNCATE}, {@link #OVERWRITE_BY_FILTER}, and
+   * {@link #OVERWRITE_DYNAMIC}.
+   */
+  BATCH_WRITE,
+
+  /**
+   * Signals that the table can be truncated in a write operation.
+   * <p>
+   * Truncating a table removes all existing rows.
+   * <p>
+   * See {@link org.apache.spark.sql.sources.v2.writer.SupportsTruncate}.
+   */
+  TRUNCATE,
+
+  /**
+   * Signals that the table can replace existing data that matches a filter with appended data in
+   * a write operation.
+   * <p>
+   * See {@link org.apache.spark.sql.sources.v2.writer.SupportsOverwrite}.
+   */
+  OVERWRITE_BY_FILTER,
+
+  /**
+   * Signals that the table can dynamically replace existing data partitions with appended data in
+   * a write operation.
+   * <p>
+   * See {@link org.apache.spark.sql.sources.v2.writer.SupportsDynamicOverwrite}.
+   */
+  OVERWRITE_DYNAMIC
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
index 25ab06eee42e..e97d0548c66f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
@@ -21,7 +21,6 @@
 import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousStream;
 import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream;
 import org.apache.spark.sql.types.StructType;
-import org.apache.spark.sql.sources.v2.SupportsBatchRead;
 import org.apache.spark.sql.sources.v2.SupportsContinuousRead;
 import org.apache.spark.sql.sources.v2.SupportsMicroBatchRead;
 import org.apache.spark.sql.sources.v2.Table;
@@ -33,8 +32,8 @@
  * This logical representation is shared between batch scan, micro-batch streaming scan and
  * continuous streaming scan. Data sources must implement the corresponding methods in this
  * interface, to match what the table promises to support. For example, {@link #toBatch()} must be
- * implemented, if the {@link Table} that creates this {@link Scan} implements
- * {@link SupportsBatchRead}.
+ * implemented, if the {@link Table} that creates this {@link Scan} returns BATCH_READ support in
+ * its {@link Table#capabilities()}.
  * </p>
  */
 @Evolving
@@ -62,7 +61,7 @@ default String description() {
   /**
    * Returns the physical representation of this scan for batch query. By default this method throws
    * exception, data sources must overwrite this method to provide an implementation, if the
-   * {@link Table} that creates this scan implements {@link SupportsBatchRead}.
+   * {@link Table} that creates this returns batch read support in its {@link Table#capabilities()}.
    *
    * @throws UnsupportedOperationException
    */
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
index 07529fe1dee9..e08d34fbf453 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.sources.v2.writer;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.SupportsBatchWrite;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite;
 import org.apache.spark.sql.types.StructType;
@@ -58,7 +57,8 @@ default WriteBuilder withInputDataSchema(StructType schema) {
   /**
    * Returns a {@link BatchWrite} to write data to batch source. By default this method throws
    * exception, data sources must overwrite this method to provide an implementation, if the
-   * {@link Table} that creates this scan implements {@link SupportsBatchWrite}.
+   * {@link Table} that creates this write returns BATCH_WRITE support in its
+   * {@link Table#capabilities()}.
    *
    * Note that, the returned {@link BatchWrite} can be null if the implementation supports SaveMode,
    * to indicate that no writing is needed. We can clean it up after removing
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index dfba12a5856e..e057d33a6a14 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -37,8 +37,9 @@ import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv._
 import org.apache.spark.sql.execution.datasources.jdbc._
 import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, FileDataSourceV2, FileTable}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, FileDataSourceV2}
 import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.unsafe.types.UTF8String
@@ -220,8 +221,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         case Some(schema) => provider.getTable(dsOptions, schema)
         case _ => provider.getTable(dsOptions)
       }
+      import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
       table match {
-        case _: SupportsBatchRead =>
+        case _: SupportsRead if table.supports(BATCH_READ) =>
           Dataset.ofRows(sparkSession, DataSourceV2Relation.create(table, dsOptions))
 
         case _ => loadV1Source(paths: _*)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 3c51edd8ab60..b439a82e52cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, Logi
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, FileDataSourceV2, WriteToDataSourceV2}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -264,8 +265,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       val options = sessionOptions ++ extraOptions
       val dsOptions = new CaseInsensitiveStringMap(options.asJava)
 
+      import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
       provider.getTable(dsOptions) match {
-        case table: SupportsBatchWrite =>
+        case table: SupportsWrite if table.supports(BATCH_WRITE) =>
           lazy val relation = DataSourceV2Relation.create(table, dsOptions)
           mode match {
             case SaveMode.Append =>
@@ -273,7 +275,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
                 AppendData.byName(relation, df.logicalPlan)
               }
 
-            case SaveMode.Overwrite =>
+            case SaveMode.Overwrite if table.supportsAny(TRUNCATE, OVERWRITE_BY_FILTER) =>
               // truncate the table
               runCommand(df.sparkSession, "save") {
                 OverwriteByExpression.byName(relation, df.logicalPlan, Literal(true))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index aa2a5e9a06fb..96a78d3a0da2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql.execution.datasources.noop
 
+import java.util
+
+import scala.collection.JavaConverters._
+
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.sources.DataSourceRegister
@@ -35,10 +39,11 @@ class NoopDataSource extends TableProvider with DataSourceRegister {
   override def getTable(options: CaseInsensitiveStringMap): Table = NoopTable
 }
 
-private[noop] object NoopTable extends Table with SupportsBatchWrite with SupportsStreamingWrite {
+private[noop] object NoopTable extends Table with SupportsWrite with SupportsStreamingWrite {
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = NoopWriteBuilder
   override def name(): String = "noop-table"
   override def schema(): StructType = new StructType()
+  override def capabilities(): util.Set[TableCapability] = Set(TableCapability.BATCH_WRITE).asJava
 }
 
 private[noop] object NoopWriteBuilder extends WriteBuilder
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
index 2081af35ce2d..eed69cdc8cac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -18,26 +18,30 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.sources.v2.{SupportsBatchRead, SupportsBatchWrite, Table}
+import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, Table, TableCapability}
 
 object DataSourceV2Implicits {
   implicit class TableHelper(table: Table) {
-    def asBatchReadable: SupportsBatchRead = {
+    def asReadable: SupportsRead = {
       table match {
-        case support: SupportsBatchRead =>
+        case support: SupportsRead =>
           support
         case _ =>
-          throw new AnalysisException(s"Table does not support batch reads: ${table.name}")
+          throw new AnalysisException(s"Table does not support reads: ${table.name}")
       }
     }
 
-    def asBatchWritable: SupportsBatchWrite = {
+    def asWritable: SupportsWrite = {
       table match {
-        case support: SupportsBatchWrite =>
+        case support: SupportsWrite =>
           support
         case _ =>
-          throw new AnalysisException(s"Table does not support batch writes: ${table.name}")
+          throw new AnalysisException(s"Table does not support writes: ${table.name}")
       }
     }
+
+    def supports(capability: TableCapability): Boolean = table.capabilities.contains(capability)
+
+    def supportsAny(capabilities: TableCapability*): Boolean = capabilities.exists(supports)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 17407827d056..411995718603 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -49,7 +49,7 @@ case class DataSourceV2Relation(
   }
 
   def newScanBuilder(): ScanBuilder = {
-    table.asBatchReadable.newScanBuilder(options)
+    table.asReadable.newScanBuilder(options)
   }
 
   override def computeStats(): Statistics = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 424fbed6fc1e..f8c7e2c826a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -148,7 +148,7 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil
 
     case AppendData(r: DataSourceV2Relation, query, _) =>
-      AppendDataExec(r.table.asBatchWritable, r.options, planLater(query)) :: Nil
+      AppendDataExec(r.table.asWritable, r.options, planLater(query)) :: Nil
 
     case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, _) =>
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
@@ -158,10 +158,10 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       }.toArray
 
       OverwriteByExpressionExec(
-        r.table.asBatchWritable, filters, r.options, planLater(query)) :: Nil
+        r.table.asWritable, filters, r.options, planLater(query)) :: Nil
 
     case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _) =>
-      OverwritePartitionsDynamicExec(r.table.asBatchWritable, r.options, planLater(query)) :: Nil
+      OverwritePartitionsDynamicExec(r.table.asWritable, r.options, planLater(query)) :: Nil
 
     case WriteToContinuousDataSource(writer, query) =>
       WriteToContinuousDataSourceExec(writer, planLater(query)) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 9423fe95fb97..5944a20dd1ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -22,7 +22,8 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.sources.v2.{SupportsBatchRead, SupportsBatchWrite, Table}
+import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.util.SchemaUtils
@@ -32,7 +33,7 @@ abstract class FileTable(
     options: CaseInsensitiveStringMap,
     paths: Seq[String],
     userSpecifiedSchema: Option[StructType])
-  extends Table with SupportsBatchRead with SupportsBatchWrite {
+  extends Table with SupportsRead with SupportsWrite {
 
   lazy val fileIndex: PartitioningAwareFileIndex = {
     val scalaMap = options.asScala.toMap
@@ -62,6 +63,8 @@ abstract class FileTable(
       partitionSchema, caseSensitive)._1
   }
 
+  override def capabilities(): java.util.Set[TableCapability] = FileTable.CAPABILITIES
+
   /**
    * When possible, this method should return the schema of the given `files`.  When the format
    * does not support inference, or no valid files are given should return None.  In these cases
@@ -69,3 +72,7 @@ abstract class FileTable(
    */
   def inferSchema(files: Seq[FileStatus]): Option[StructType]
 }
+
+object FileTable {
+  private val CAPABILITIES = Set(BATCH_READ, BATCH_WRITE, TRUNCATE).asJava
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2WriteSupportCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2WriteSupportCheck.scala
new file mode 100644
index 000000000000..cf77998c122f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2WriteSupportCheck.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.sources.v2.TableCapability._
+import org.apache.spark.sql.types.BooleanType
+
+object V2WriteSupportCheck extends (LogicalPlan => Unit) {
+  import DataSourceV2Implicits._
+
+  def failAnalysis(msg: String): Unit = throw new AnalysisException(msg)
+
+  override def apply(plan: LogicalPlan): Unit = plan foreach {
+    case AppendData(rel: DataSourceV2Relation, _, _) if !rel.table.supports(BATCH_WRITE) =>
+      failAnalysis(s"Table does not support append in batch mode: ${rel.table}")
+
+    case OverwritePartitionsDynamic(rel: DataSourceV2Relation, _, _)
+      if !rel.table.supports(BATCH_WRITE) || !rel.table.supports(OVERWRITE_DYNAMIC) =>
+      failAnalysis(s"Table does not support dynamic overwrite in batch mode: ${rel.table}")
+
+    case OverwriteByExpression(rel: DataSourceV2Relation, expr, _, _) =>
+      expr match {
+        case Literal(true, BooleanType) =>
+          if (!rel.table.supports(BATCH_WRITE) ||
+              !rel.table.supportsAny(TRUNCATE, OVERWRITE_BY_FILTER)) {
+            failAnalysis(
+              s"Table does not support truncate in batch mode: ${rel.table}")
+          }
+        case _ =>
+          if (!rel.table.supports(BATCH_WRITE) || !rel.table.supports(OVERWRITE_BY_FILTER)) {
+            failAnalysis(s"Table does not support overwrite expression ${expr.sql} " +
+                s"in batch mode: ${rel.table}")
+          }
+      }
+
+    case _ => // OK
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 51606abdb563..607f2fa0f82c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
-import org.apache.spark.sql.sources.v2.SupportsBatchWrite
+import org.apache.spark.sql.sources.v2.SupportsWrite
 import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriterFactory, SupportsDynamicOverwrite, SupportsOverwrite, SupportsSaveMode, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{LongAccumulator, Utils}
@@ -53,7 +53,7 @@ case class WriteToDataSourceV2(batchWrite: BatchWrite, query: LogicalPlan)
  * Rows in the output data set are appended.
  */
 case class AppendDataExec(
-    table: SupportsBatchWrite,
+    table: SupportsWrite,
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
 
@@ -80,7 +80,7 @@ case class AppendDataExec(
  * AlwaysTrue to delete all rows.
  */
 case class OverwriteByExpressionExec(
-    table: SupportsBatchWrite,
+    table: SupportsWrite,
     deleteWhere: Array[Filter],
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
@@ -101,7 +101,7 @@ case class OverwriteByExpressionExec(
         builder.overwrite(deleteWhere).buildForBatch()
 
       case _ =>
-        throw new SparkException(s"Table does not support dynamic partition overwrite: $table")
+        throw new SparkException(s"Table does not support overwrite by expression: $table")
     }
 
     doWrite(batchWrite)
@@ -118,7 +118,7 @@ case class OverwriteByExpressionExec(
  * are not modified.
  */
 case class OverwritePartitionsDynamicExec(
-    table: SupportsBatchWrite,
+    table: SupportsWrite,
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
 
@@ -153,7 +153,7 @@ case class WriteToDataSourceV2Exec(
  * Helper for physical plans that build batch writes.
  */
 trait BatchWriteHelper {
-  def table: SupportsBatchWrite
+  def table: SupportsWrite
   def query: SparkPlan
   def writeOptions: CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index dbdfcf808560..884b92ae9421 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.util
+import java.util.Collections
+
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming.sources.ConsoleWrite
 import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister}
@@ -63,6 +66,8 @@ object ConsoleTable extends Table with SupportsStreamingWrite {
 
   override def schema(): StructType = StructType(Nil)
 
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
       private var inputSchema: StructType = _
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index df7990c6a652..bfa9c0998550 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.util
+import java.util.Collections
 import java.util.concurrent.atomic.AtomicInteger
 import javax.annotation.concurrent.GuardedBy
 
@@ -97,6 +99,8 @@ class MemoryStreamTable(val stream: MemoryStreamBase[_]) extends Table
 
   override def schema(): StructType = stream.fullSchema()
 
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
     new MemoryStreamScanBuilder(stream)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index 44516bbb2a5a..807e0b12c627 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.execution.streaming.sources
 
+import java.util
+import java.util.Collections
+
 import org.apache.spark.sql.{ForeachWriter, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.python.PythonForeachWriter
-import org.apache.spark.sql.sources.v2.{SupportsStreamingWrite, Table}
+import org.apache.spark.sql.sources.v2.{SupportsStreamingWrite, Table, TableCapability}
 import org.apache.spark.sql.sources.v2.writer.{DataWriter, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
@@ -45,6 +48,8 @@ case class ForeachWriterTable[T](
 
   override def schema(): StructType = StructType(Nil)
 
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
       private var inputSchema: StructType = _
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index 3d8a90e99b85..08aea75de2b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.execution.streaming.sources
 
+import java.util
+import java.util.Collections
+
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.streaming.continuous.RateStreamContinuousStream
@@ -84,6 +87,8 @@ class RateStreamTable(
 
   override def schema(): StructType = RateStreamProvider.SCHEMA
 
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
     override def build(): Scan = new Scan {
       override def readSchema(): StructType = RateStreamProvider.SCHEMA
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index 0adbf1d9b368..c0292acdf104 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.streaming.sources
 
 import java.text.SimpleDateFormat
-import java.util.Locale
+import java.util
+import java.util.{Collections, Locale}
 
 import scala.util.{Failure, Success, Try}
 
@@ -78,6 +79,8 @@ class TextSocketTable(host: String, port: Int, numPartitions: Int, includeTimest
     }
   }
 
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
     override def build(): Scan = new Scan {
       override def readSchema(): StructType = schema()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
index 22adceba930f..8eb5de0f640a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming.sources
 
+import java.util
+import java.util.Collections
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
@@ -31,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.execution.streaming.{MemorySinkBase, Sink}
-import org.apache.spark.sql.sources.v2.SupportsStreamingWrite
+import org.apache.spark.sql.sources.v2.{SupportsStreamingWrite, TableCapability}
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
@@ -47,6 +49,8 @@ class MemorySinkV2 extends SupportsStreamingWrite with MemorySinkBase with Loggi
 
   override def schema(): StructType = StructType(Nil)
 
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
       private var needTruncate: Boolean = false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index a605dc640dc9..f05aa5113e03 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.v2.V2WriteSupportCheck
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.ExecutionListenerManager
 
@@ -172,6 +173,7 @@ abstract class BaseSessionStateBuilder(
       PreWriteCheck +:
         PreReadCheck +:
         HiveOnlyCheck +:
+        V2WriteSupportCheck +:
         customCheckRules
   }
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
index cb5954d5a621..9b0eb610a206 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
@@ -18,15 +18,23 @@
 package test.org.apache.spark.sql.sources.v2;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.sources.v2.SupportsBatchRead;
+import org.apache.spark.sql.sources.v2.SupportsRead;
 import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableCapability;
 import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
 
-abstract class JavaSimpleBatchTable implements Table, SupportsBatchRead {
+abstract class JavaSimpleBatchTable implements Table, SupportsRead {
+  private static final Set<TableCapability> CAPABILITIES = new HashSet<>(Arrays.asList(
+      TableCapability.BATCH_READ,
+      TableCapability.BATCH_WRITE,
+      TableCapability.TRUNCATE));
 
   @Override
   public StructType schema() {
@@ -37,6 +45,11 @@ public StructType schema() {
   public String name() {
     return this.getClass().toString();
   }
+
+  @Override
+  public Set<TableCapability> capabilities() {
+    return CAPABILITIES;
+  }
 }
 
 abstract class JavaSimpleScanBuilder implements ScanBuilder, Scan, Batch {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index 705559d099be..587cfa9bd664 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -18,8 +18,11 @@
 package org.apache.spark.sql.sources.v2
 
 import java.io.File
+import java.util
 import java.util.OptionalLong
 
+import scala.collection.JavaConverters._
+
 import test.org.apache.spark.sql.sources.v2._
 
 import org.apache.spark.SparkException
@@ -30,6 +33,7 @@ import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.sources.{Filter, GreaterThan}
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.partitioning.{ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.test.SharedSQLContext
@@ -411,11 +415,13 @@ object SimpleReaderFactory extends PartitionReaderFactory {
   }
 }
 
-abstract class SimpleBatchTable extends Table with SupportsBatchRead  {
+abstract class SimpleBatchTable extends Table with SupportsRead  {
 
   override def schema(): StructType = new StructType().add("i", "int").add("j", "int")
 
   override def name(): String = this.getClass.toString
+
+  override def capabilities(): util.Set[TableCapability] = Set(BATCH_READ).asJava
 }
 
 abstract class SimpleScanBuilder extends ScanBuilder
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
index f9f9db35ac2d..e019dbfe3f51 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.sql.sources.v2
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
@@ -38,7 +40,7 @@ class DummyReadOnlyFileDataSourceV2 extends FileDataSourceV2 {
   }
 }
 
-class DummyReadOnlyFileTable extends Table with SupportsBatchRead {
+class DummyReadOnlyFileTable extends Table with SupportsRead {
   override def name(): String = "dummy"
 
   override def schema(): StructType = StructType(Nil)
@@ -46,6 +48,9 @@ class DummyReadOnlyFileTable extends Table with SupportsBatchRead {
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
     throw new AnalysisException("Dummy file reader")
   }
+
+  override def capabilities(): java.util.Set[TableCapability] =
+    Set(TableCapability.BATCH_READ).asJava
 }
 
 class DummyWriteOnlyFileDataSourceV2 extends FileDataSourceV2 {
@@ -59,13 +64,16 @@ class DummyWriteOnlyFileDataSourceV2 extends FileDataSourceV2 {
   }
 }
 
-class DummyWriteOnlyFileTable extends Table with SupportsBatchWrite {
+class DummyWriteOnlyFileTable extends Table with SupportsWrite {
   override def name(): String = "dummy"
 
   override def schema(): StructType = StructType(Nil)
 
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
     throw new AnalysisException("Dummy file writer")
+
+  override def capabilities(): java.util.Set[TableCapability] =
+    Set(TableCapability.BATCH_WRITE).asJava
 }
 
 class FileDataSourceV2FallBackSuite extends QueryTest with SharedSQLContext {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
index 160354520e43..edebb0b62b29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.sources.v2
 
 import java.io.{BufferedReader, InputStreamReader, IOException}
+import java.util
 
 import scala.collection.JavaConverters._
 
@@ -27,6 +28,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.spark.SparkContext
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.types.StructType
@@ -142,7 +144,7 @@ class SimpleWritableDataSource extends TableProvider with SessionConfigSupport {
   }
 
   class MyTable(options: CaseInsensitiveStringMap)
-    extends SimpleBatchTable with SupportsBatchWrite {
+    extends SimpleBatchTable with SupportsWrite {
 
     private val path = options.get("path")
     private val conf = SparkContext.getActive.get.hadoopConfiguration
@@ -156,6 +158,9 @@ class SimpleWritableDataSource extends TableProvider with SessionConfigSupport {
     override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
       new MyWriteBuilder(path)
     }
+
+    override def capabilities(): util.Set[TableCapability] =
+      Set(BATCH_READ, BATCH_WRITE, TRUNCATE).asJava
   }
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V2WriteSupportCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V2WriteSupportCheckSuite.scala
new file mode 100644
index 000000000000..1d76ee34a0e0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V2WriteSupportCheckSuite.scala
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2
+
+import java.util
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, NamedRelation}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LeafNode, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, V2WriteSupportCheck}
+import org.apache.spark.sql.sources.v2.TableCapability._
+import org.apache.spark.sql.types.{LongType, StringType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class V2WriteSupportCheckSuite extends AnalysisTest {
+
+  test("AppendData: check missing capabilities") {
+    val plan = AppendData.byName(
+      DataSourceV2Relation.create(CapabilityTable(), CaseInsensitiveStringMap.empty), TestRelation)
+
+    val exc = intercept[AnalysisException]{
+      V2WriteSupportCheck.apply(plan)
+    }
+
+    assert(exc.getMessage.contains("does not support append in batch mode"))
+  }
+
+  test("AppendData: check correct capabilities") {
+    val plan = AppendData.byName(
+      DataSourceV2Relation.create(CapabilityTable(BATCH_WRITE), CaseInsensitiveStringMap.empty),
+      TestRelation)
+
+    V2WriteSupportCheck.apply(plan)
+  }
+
+  test("Truncate: check missing capabilities") {
+    Seq(CapabilityTable(),
+      CapabilityTable(BATCH_WRITE),
+      CapabilityTable(TRUNCATE),
+      CapabilityTable(OVERWRITE_BY_FILTER)).foreach { table =>
+
+      val plan = OverwriteByExpression.byName(
+        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
+        Literal(true))
+
+      val exc = intercept[AnalysisException]{
+        V2WriteSupportCheck.apply(plan)
+      }
+
+      assert(exc.getMessage.contains("does not support truncate in batch mode"))
+    }
+  }
+
+  test("Truncate: check correct capabilities") {
+    Seq(CapabilityTable(BATCH_WRITE, TRUNCATE),
+      CapabilityTable(BATCH_WRITE, OVERWRITE_BY_FILTER)).foreach { table =>
+
+      val plan = OverwriteByExpression.byName(
+        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
+        Literal(true))
+
+      V2WriteSupportCheck.apply(plan)
+    }
+  }
+
+  test("OverwriteByExpression: check missing capabilities") {
+    Seq(CapabilityTable(),
+      CapabilityTable(BATCH_WRITE),
+      CapabilityTable(OVERWRITE_BY_FILTER)).foreach { table =>
+
+      val plan = OverwriteByExpression.byName(
+        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
+        EqualTo(AttributeReference("x", LongType)(), Literal(5)))
+
+      val exc = intercept[AnalysisException]{
+        V2WriteSupportCheck.apply(plan)
+      }
+
+      assert(exc.getMessage.contains(
+        "does not support overwrite expression (`x` = 5) in batch mode"))
+    }
+  }
+
+  test("OverwriteByExpression: check correct capabilities") {
+    val table = CapabilityTable(BATCH_WRITE, OVERWRITE_BY_FILTER)
+    val plan = OverwriteByExpression.byName(
+      DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
+      EqualTo(AttributeReference("x", LongType)(), Literal(5)))
+
+    V2WriteSupportCheck.apply(plan)
+  }
+
+  test("OverwritePartitionsDynamic: check missing capabilities") {
+    Seq(CapabilityTable(),
+      CapabilityTable(BATCH_WRITE),
+      CapabilityTable(OVERWRITE_DYNAMIC)).foreach { table =>
+
+      val plan = OverwritePartitionsDynamic.byName(
+        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation)
+
+      val exc = intercept[AnalysisException] {
+        V2WriteSupportCheck.apply(plan)
+      }
+
+      assert(exc.getMessage.contains("does not support dynamic overwrite in batch mode"))
+    }
+  }
+
+  test("OverwritePartitionsDynamic: check correct capabilities") {
+    val table = CapabilityTable(BATCH_WRITE, OVERWRITE_DYNAMIC)
+    val plan = OverwritePartitionsDynamic.byName(
+      DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation)
+
+    V2WriteSupportCheck.apply(plan)
+  }
+}
+
+private object V2WriteSupportCheckSuite {
+  val schema: StructType = new StructType().add("id", LongType).add("data", StringType)
+}
+
+private case object TestRelation extends LeafNode with NamedRelation {
+  override def name: String = "source_relation"
+  override def output: Seq[AttributeReference] = V2WriteSupportCheckSuite.schema.toAttributes
+}
+
+private case class CapabilityTable(_capabilities: TableCapability*) extends Table {
+  override def name(): String = "capability_test_table"
+  override def schema(): StructType = V2WriteSupportCheckSuite.schema
+  override def capabilities(): util.Set[TableCapability] = _capabilities.toSet.asJava
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 13bb686fbd3b..f022edea275e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.streaming.sources
 
+import java.util
+import java.util.Collections
+
 import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.streaming.{RateStreamOffset, Sink, StreamingQueryWrapper}
@@ -77,18 +80,21 @@ class FakeWriteBuilder extends WriteBuilder with StreamingWrite {
 trait FakeMicroBatchReadTable extends Table with SupportsMicroBatchRead {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder
 }
 
 trait FakeContinuousReadTable extends Table with SupportsContinuousRead {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder
 }
 
 trait FakeStreamingWriteTable extends Table with SupportsStreamingWrite {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
+  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new FakeWriteBuilder
   }
@@ -137,6 +143,7 @@ class FakeReadNeitherMode extends DataSourceRegister with TableProvider {
     new Table {
       override def name(): String = "fake"
       override def schema(): StructType = StructType(Nil)
+      override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
     }
   }
 }
@@ -164,6 +171,7 @@ class FakeNoWrite extends DataSourceRegister with TableProvider {
     new Table {
       override def name(): String = "fake"
       override def schema(): StructType = StructType(Nil)
+      override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
     }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 132b0e4db0d7..68f4b2ddbac0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.SparkPlanner
 import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.v2.V2WriteSupportCheck
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLoader, SessionState}
 
@@ -86,6 +87,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
     override val extendedCheckRules: Seq[LogicalPlan => Unit] =
       PreWriteCheck +:
         PreReadCheck +:
+        V2WriteSupportCheck +:
         customCheckRules
   }
 

From 7043aee1ba95e92e1cbd0ebafcc5b09b69ee3082 Mon Sep 17 00:00:00 2001
From: pgandhi <pgandhi@verizonmedia.com>
Date: Mon, 18 Mar 2019 10:33:51 -0500
Subject: [PATCH 0708/1072] =?UTF-8?q?[SPARK-27112][CORE]=20:=20Create=20a?=
 =?UTF-8?q?=20resource=20ordering=20between=20threads=20to=20resolve=20the?=
 =?UTF-8?q?=20deadlocks=20encountered=20=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…when trying to kill executors either due to dynamic allocation or blacklisting

## What changes were proposed in this pull request?

There are two deadlocks as a result of the interplay between three different threads:

**task-result-getter thread**

**spark-dynamic-executor-allocation thread**

**dispatcher-event-loop thread(makeOffers())**

The fix ensures ordering synchronization constraint by acquiring lock on `TaskSchedulerImpl` before acquiring lock on `CoarseGrainedSchedulerBackend` in `makeOffers()` as well as killExecutors() method. This ensures resource ordering between the threads and thus, fixes the deadlocks.

## How was this patch tested?

Manual Tests

Closes #24072 from pgandhi999/SPARK-27112-2.

Authored-by: pgandhi <pgandhi@verizonmedia.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../cluster/CoarseGrainedSchedulerBackend.scala     | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index dc0f21c5e91d..808ef0836121 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -258,7 +258,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     // Make fake resource offers on all executors
     private def makeOffers() {
       // Make sure no executor is killed while some task is launching on it
-      val taskDescs = CoarseGrainedSchedulerBackend.this.synchronized {
+      val taskDescs = withLock {
         // Filter out executors under killing
         val activeExecutors = executorDataMap.filterKeys(executorIsAlive)
         val workOffers = activeExecutors.map {
@@ -284,7 +284,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     // Make fake resource offers on just one executor
     private def makeOffers(executorId: String) {
       // Make sure no executor is killed while some task is launching on it
-      val taskDescs = CoarseGrainedSchedulerBackend.this.synchronized {
+      val taskDescs = withLock {
         // Filter out executors under killing
         if (executorIsAlive(executorId)) {
           val executorData = executorDataMap(executorId)
@@ -631,7 +631,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       force: Boolean): Seq[String] = {
     logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}")
 
-    val response = synchronized {
+    val response = withLock {
       val (knownExecutors, unknownExecutors) = executorIds.partition(executorDataMap.contains)
       unknownExecutors.foreach { id =>
         logWarning(s"Executor to kill $id does not exist!")
@@ -730,6 +730,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
   protected def currentDelegationTokens: Array[Byte] = delegationTokens.get()
 
+  // SPARK-27112: We need to ensure that there is ordering of lock acquisition
+  // between TaskSchedulerImpl and CoarseGrainedSchedulerBackend objects in order to fix
+  // the deadlock issue exposed in SPARK-27112
+  private def withLock[T](fn: => T): T = scheduler.synchronized {
+    CoarseGrainedSchedulerBackend.this.synchronized { fn }
+  }
+
 }
 
 private[spark] object CoarseGrainedSchedulerBackend {

From 5564fe51513f725d2526dbf9e25a2f2c40d19afc Mon Sep 17 00:00:00 2001
From: shane knapp <incomplete@gmail.com>
Date: Mon, 18 Mar 2019 16:38:42 -0700
Subject: [PATCH 0709/1072] [SPARK-27178][K8S] add nss to the spark/k8s
 Dockerfile

## What changes were proposed in this pull request?

while performing some tests on our existing minikube and k8s infrastructure, i noticed that the integration tests were failing. i dug in and discovered the following message buried at the end of the stacktrace:

```
  Caused by: java.io.FileNotFoundException: /usr/lib/libnss3.so
  	at sun.security.pkcs11.Secmod.initialize(Secmod.java:193)
  	at sun.security.pkcs11.SunPKCS11.<init>(SunPKCS11.java:218)
  	... 81 more
```
after i added the `nss` package to `resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile`, everything worked.

this is also impacting current builds.  see:  https://amplab.cs.berkeley.edu/jenkins/job/testing-k8s-prb-make-spark-distribution-unified/8959/console

## How was this patch tested?

i tested locally before pushing, and the build system will test the rest.

Closes #24111 from shaneknapp/add-nss-package-to-dockerfile.

Authored-by: shane knapp <incomplete@gmail.com>
Signed-off-by: shane knapp <incomplete@gmail.com>
---
 .../kubernetes/docker/src/main/dockerfiles/spark/Dockerfile     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 1d8ac3ce89c7..871d34b11e17 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -29,7 +29,7 @@ ARG spark_uid=185
 RUN set -ex && \
     apk upgrade --no-cache && \
     ln -s /lib /lib64 && \
-    apk add --no-cache bash tini libc6-compat linux-pam krb5 krb5-libs && \
+    apk add --no-cache bash tini libc6-compat linux-pam krb5 krb5-libs nss && \
     mkdir -p /opt/spark && \
     mkdir -p /opt/spark/examples && \
     mkdir -p /opt/spark/work-dir && \

From 26e9849cb49e4f83656d482d0a979a4cb14cbefc Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 18 Mar 2019 20:10:30 -0700
Subject: [PATCH 0710/1072] [SPARK-27195][SQL][TEST] Add AvroReadSchemaSuite

## What changes were proposed in this pull request?

The reader schema is said to be evolved (or projected) when it changed after the data is written by writers. Apache Spark file-based data sources have a test coverage for that, [ReadSchemaSuite.scala](https://github.com/apache/spark/blob/master/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala). This PR aims to add `AvroReadSchemaSuite` to ensure the minimal consistency among file-based data sources and prevent a future regression in Avro data source.

## How was this patch tested?

Pass the Jenkins with the newly added test suite.

Closes #24135 from dongjoon-hyun/SPARK-27195.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/AvroReadSchemaSuite.scala     | 27 +++++++++++++++++++
 .../datasources/ReadSchemaSuite.scala         |  2 ++
 .../datasources/ReadSchemaTest.scala          |  6 ++++-
 3 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala

diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala
new file mode 100644
index 000000000000..a480bb90b5a7
--- /dev/null
+++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+class AvroReadSchemaSuite
+  extends ReadSchemaSuite
+  with AddColumnIntoTheMiddleTest
+  with HideColumnInTheMiddleTest
+  with ChangePositionTest {
+
+  override val format: String = "avro"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
index de234c14c7b5..802fda830207 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
@@ -36,6 +36,8 @@ import org.apache.spark.sql.internal.SQLConf
  *     -> ParquetReadSchemaSuite
  *     -> VectorizedParquetReadSchemaSuite
  *     -> MergedParquetReadSchemaSuite
+ *
+ *     -> AvroReadSchemaSuite
  */
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
index 17d9d43a3e08..d42809529cee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
@@ -46,6 +46,7 @@ import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
  *   | JSON         | 1, 2, 3, 4   |                                                        |
  *   | ORC          | 1, 2, 3, 4   | Native vectorized ORC reader has the widest coverage.  |
  *   | PARQUET      | 1, 2, 3      |                                                        |
+ *   | AVRO         | 1, 2, 3      |                                                        |
  *
  * This aims to provide an explicit test coverage for reader schema change on file-based data
  * sources. Since a file format has its own coverage, we need a test suite for each file-based
@@ -55,9 +56,12 @@ import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
  *
  *   ReadSchemaTest
  *     -> AddColumnTest
- *     -> HideColumnTest
+ *     -> AddColumnIntoTheMiddleTest
+ *     -> HideColumnAtTheEndTest
+ *     -> HideColumnInTheMiddleTest
  *     -> ChangePositionTest
  *     -> BooleanTypeTest
+ *     -> ToStringTypeTest
  *     -> IntegralTypeTest
  *     -> ToDoubleTypeTest
  *     -> ToDecimalTypeTest

From 28d35c85789edb060dda47a984b5b3f0a27d8bf0 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Tue, 19 Mar 2019 13:35:47 +0800
Subject: [PATCH 0711/1072] [SPARK-27162][SQL] Add new method
 asCaseSensitiveMap in CaseInsensitiveStringMap

## What changes were proposed in this pull request?

Currently, DataFrameReader/DataFrameReader supports setting Hadoop configurations via method `.option()`.
E.g, the following test case should be passed in both ORC V1 and V2
```
  class TestFileFilter extends PathFilter {
    override def accept(path: Path): Boolean = path.getParent.getName != "p=2"
  }

  withTempPath { dir =>
      val path = dir.getCanonicalPath

      val df = spark.range(2)
      df.write.orc(path + "/p=1")
      df.write.orc(path + "/p=2")
      val extraOptions = Map(
        "mapred.input.pathFilter.class" -> classOf[TestFileFilter].getName,
        "mapreduce.input.pathFilter.class" -> classOf[TestFileFilter].getName
      )
      assert(spark.read.options(extraOptions).orc(path).count() === 2)
    }
  }
```
While Hadoop Configurations are case sensitive, the current data source V2 APIs are using `CaseInsensitiveStringMap` in the top level entry `TableProvider`.
To create Hadoop configurations correctly, I suggest
1. adding a new method `asCaseSensitiveMap` in `CaseInsensitiveStringMap`.
2. Make `CaseInsensitiveStringMap` read-only to ambiguous conversion in `asCaseSensitiveMap`

## How was this patch tested?

Unit test

Closes #24094 from gengliangwang/originalMap.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalog/v2/Catalogs.java |  5 ++-
 .../sql/util/CaseInsensitiveStringMap.java    | 37 +++++++++++++++----
 .../util/CaseInsensitiveStringMapSuite.scala  | 31 ++++++++++++++--
 .../execution/datasources/v2/FileTable.scala  |  7 ++--
 .../datasources/v2/FileWriteBuilder.scala     |  8 ++--
 .../datasources/v2/orc/OrcScanBuilder.scala   |  6 ++-
 .../orc/OrcPartitionDiscoverySuite.scala      | 23 ++++++++++++
 7 files changed, 96 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
index efae26636a4b..aa4cbfcef8f5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
@@ -23,6 +23,7 @@
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 import org.apache.spark.util.Utils;
 
+import java.util.HashMap;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -96,7 +97,7 @@ private static CaseInsensitiveStringMap catalogOptions(String name, SQLConf conf
     Map<String, String> allConfs = mapAsJavaMapConverter(conf.getAllConfs()).asJava();
     Pattern prefix = Pattern.compile("^spark\\.sql\\.catalog\\." + name + "\\.(.+)");
 
-    CaseInsensitiveStringMap options = CaseInsensitiveStringMap.empty();
+    HashMap<String, String> options = new HashMap<>();
     for (Map.Entry<String, String> entry : allConfs.entrySet()) {
       Matcher matcher = prefix.matcher(entry.getKey());
       if (matcher.matches() && matcher.groupCount() > 0) {
@@ -104,6 +105,6 @@ private static CaseInsensitiveStringMap catalogOptions(String name, SQLConf conf
       }
     }
 
-    return options;
+    return new CaseInsensitiveStringMap(options);
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
index 704d90ed60ad..da41346d7ce7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
@@ -18,8 +18,11 @@
 package org.apache.spark.sql.util;
 
 import org.apache.spark.annotation.Experimental;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
@@ -35,16 +38,29 @@
  */
 @Experimental
 public class CaseInsensitiveStringMap implements Map<String, String> {
+  private final Logger logger = LoggerFactory.getLogger(CaseInsensitiveStringMap.class);
+
+  private String unsupportedOperationMsg = "CaseInsensitiveStringMap is read-only.";
 
   public static CaseInsensitiveStringMap empty() {
     return new CaseInsensitiveStringMap(new HashMap<>(0));
   }
 
+  private final Map<String, String> original;
+
   private final Map<String, String> delegate;
 
   public CaseInsensitiveStringMap(Map<String, String> originalMap) {
-    this.delegate = new HashMap<>(originalMap.size());
-    putAll(originalMap);
+    original = new HashMap<>(originalMap);
+    delegate = new HashMap<>(originalMap.size());
+    for (Map.Entry<String, String> entry : originalMap.entrySet()) {
+      String key = toLowerCase(entry.getKey());
+      if (delegate.containsKey(key)) {
+        logger.warn("Converting duplicated key " + entry.getKey() +
+                " into CaseInsensitiveStringMap.");
+      }
+      delegate.put(key, entry.getValue());
+    }
   }
 
   @Override
@@ -78,24 +94,22 @@ public String get(Object key) {
 
   @Override
   public String put(String key, String value) {
-    return delegate.put(toLowerCase(key), value);
+    throw new UnsupportedOperationException(unsupportedOperationMsg);
   }
 
   @Override
   public String remove(Object key) {
-    return delegate.remove(toLowerCase(key));
+    throw new UnsupportedOperationException(unsupportedOperationMsg);
   }
 
   @Override
   public void putAll(Map<? extends String, ? extends String> m) {
-    for (Map.Entry<? extends String, ? extends String> entry : m.entrySet()) {
-      put(entry.getKey(), entry.getValue());
-    }
+    throw new UnsupportedOperationException(unsupportedOperationMsg);
   }
 
   @Override
   public void clear() {
-    delegate.clear();
+    throw new UnsupportedOperationException(unsupportedOperationMsg);
   }
 
   @Override
@@ -157,4 +171,11 @@ public double getDouble(String key, double defaultValue) {
     String value = get(key);
     return value == null ? defaultValue : Double.parseDouble(value);
   }
+
+  /**
+   * Returns the original case-sensitive map.
+   */
+  public Map<String, String> asCaseSensitiveMap() {
+    return Collections.unmodifiableMap(original);
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
index 623ddeb14025..0accb471cada 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.util
 
+import java.util
+
 import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkFunSuite
@@ -25,9 +27,16 @@ class CaseInsensitiveStringMapSuite extends SparkFunSuite {
 
   test("put and get") {
     val options = CaseInsensitiveStringMap.empty()
-    options.put("kEy", "valUE")
-    assert(options.get("key") == "valUE")
-    assert(options.get("KEY") == "valUE")
+    intercept[UnsupportedOperationException] {
+      options.put("kEy", "valUE")
+    }
+  }
+
+  test("clear") {
+    val options = new CaseInsensitiveStringMap(Map("kEy" -> "valUE").asJava)
+    intercept[UnsupportedOperationException] {
+      options.clear()
+    }
   }
 
   test("key and value set") {
@@ -80,4 +89,20 @@ class CaseInsensitiveStringMapSuite extends SparkFunSuite {
       options.getDouble("foo", 0.1d)
     }
   }
+
+  test("asCaseSensitiveMap") {
+    val originalMap = new util.HashMap[String, String] {
+      put("Foo", "Bar")
+      put("OFO", "ABR")
+      put("OoF", "bar")
+    }
+
+    val options = new CaseInsensitiveStringMap(originalMap)
+    val caseSensitiveMap = options.asCaseSensitiveMap
+    assert(caseSensitiveMap.equals(originalMap))
+    // The result of `asCaseSensitiveMap` is read-only.
+    intercept[UnsupportedOperationException] {
+      caseSensitiveMap.put("kEy", "valUE")
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 5944a20dd1ef..4b35df355b6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -36,13 +36,14 @@ abstract class FileTable(
   extends Table with SupportsRead with SupportsWrite {
 
   lazy val fileIndex: PartitioningAwareFileIndex = {
-    val scalaMap = options.asScala.toMap
-    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(scalaMap)
+    val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+    // Hadoop Configurations are case sensitive.
+    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
     val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(paths, hadoopConf,
       checkEmptyGlobPath = true, checkFilesExist = true)
     val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
     new InMemoryFileIndex(
-      sparkSession, rootPathsSpecified, scalaMap, userSpecifiedSchema, fileStatusCache)
+      sparkSession, rootPathsSpecified, caseSensitiveMap, userSpecifiedSchema, fileStatusCache)
   }
 
   lazy val dataSchema: StructType = userSpecifiedSchema.orElse {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index 0d07f5a02c25..bb4a428e4066 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -64,16 +64,16 @@ abstract class FileWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[St
     val sparkSession = SparkSession.active
     validateInputs(sparkSession.sessionState.conf.caseSensitiveAnalysis)
     val path = new Path(paths.head)
-    val optionsAsScala = options.asScala.toMap
-
-    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(optionsAsScala)
+    val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+    // Hadoop Configurations are case sensitive.
+    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
     val job = getJobInstance(hadoopConf, path)
     val committer = FileCommitProtocol.instantiate(
       sparkSession.sessionState.conf.fileCommitProtocolClass,
       jobId = java.util.UUID.randomUUID().toString,
       outputPath = paths.head)
     lazy val description =
-      createWriteJobDescription(sparkSession, hadoopConf, job, paths.head, optionsAsScala)
+      createWriteJobDescription(sparkSession, hadoopConf, job, paths.head, options.asScala.toMap)
 
     val fs = path.getFileSystem(hadoopConf)
     mode match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index 4767f21bb029..8ac56aa5f64b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -37,7 +37,11 @@ case class OrcScanBuilder(
     dataSchema: StructType,
     options: CaseInsensitiveStringMap)
   extends FileScanBuilder(schema) with SupportsPushDownFilters {
-  lazy val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options.asScala.toMap)
+  lazy val hadoopConf = {
+    val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+    // Hadoop Configurations are case sensitive.
+    sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
+  }
 
   override def build(): Scan = {
     OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema, readSchema, options)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
index bc5a30e97fae..e1d02540a745 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.orc
 
 import java.io.File
 
+import org.apache.hadoop.fs.{Path, PathFilter}
+
 import org.apache.spark.SparkConf
 import org.apache.spark.sql._
 import org.apache.spark.sql.internal.SQLConf
@@ -30,6 +32,10 @@ case class OrcParData(intField: Int, stringField: String)
 // The data that also includes the partitioning key
 case class OrcParDataWithKey(intField: Int, pi: Int, stringField: String, ps: String)
 
+class TestFileFilter extends PathFilter {
+  override def accept(path: Path): Boolean = path.getParent.getName != "p=2"
+}
+
 abstract class OrcPartitionDiscoveryTest extends OrcTest {
   val defaultPartitionName = "__HIVE_DEFAULT_PARTITION__"
 
@@ -226,6 +232,23 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
       }
     }
   }
+
+  test("SPARK-27162: handle pathfilter configuration correctly") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      val df = spark.range(2)
+      df.write.orc(path + "/p=1")
+      df.write.orc(path + "/p=2")
+      assert(spark.read.orc(path).count() === 4)
+
+      val extraOptions = Map(
+        "mapred.input.pathFilter.class" -> classOf[TestFileFilter].getName,
+        "mapreduce.input.pathFilter.class" -> classOf[TestFileFilter].getName
+      )
+      assert(spark.read.options(extraOptions).orc(path).count() === 2)
+    }
+  }
 }
 
 class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext

From a8af23d7aba62d22b257c94d38a50cba05d0fcfd Mon Sep 17 00:00:00 2001
From: wuyi <ngone_5451@163.com>
Date: Tue, 19 Mar 2019 14:47:51 +0800
Subject: [PATCH 0712/1072] [SPARK-27193][SQL] CodeFormatter should format
 multiple comment lines correctly

## What changes were proposed in this pull request?

when enable `spark.sql.codegen.comments`,  there will be multiple comment lines. However, CodeFormatter can not handle multiple comment lines currently:

```
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ /**
 * Codegend pipeline for stage (id=1)
 * *(1) Project [(id#0L + 1) AS (id + 1)#3L]
 * +- *(1) Filter (id#0L = 1)
 *    +- *(1) Range (0, 10, step=1, splits=4)
 */
/* 006 */ // codegenStageId=1
/* 007 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
```

After applying this pr:

```
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ /**
/* 006 */  * Codegend pipeline for stage (id=1)
/* 007 */  * *(1) Project [(id#0L + 1) AS (id + 1)#4L]
/* 008 */  * +- *(1) Filter (id#0L = 1)
/* 009 */  *    +- *(1) Range (0, 10, step=1, splits=2)
/* 010 */  */
/* 011 */ // codegenStageId=1
/* 012 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
```

## How was this patch tested?

Tested Manually.

Closes #24133 from Ngone51/fix-codeformatter-for-multi-comment-lines.

Authored-by: wuyi <ngone_5451@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/codegen/CodeFormatter.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index ea1bb87d415c..2ec31458270f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -41,7 +41,8 @@ object CodeFormatter {
       val commentReplaced = commentHolder.replaceAllIn(
         line.trim,
         m => code.comment.get(m.group(1)).map(Matcher.quoteReplacement).getOrElse(m.group(0)))
-      formatter.addLine(commentReplaced)
+      val comments = commentReplaced.split("\n")
+      comments.foreach(formatter.addLine)
     }
     if (needToTruncate) {
       formatter.addLine(s"[truncated to $maxLines lines (total lines is ${lines.length})]")

From d5c08fcaab141e13f95bd8d7a2c900aeccdf718d Mon Sep 17 00:00:00 2001
From: mwlon <mloncaric@hmc.edu>
Date: Tue, 19 Mar 2019 18:22:01 +0800
Subject: [PATCH 0713/1072] [SPARK-26555][SQL] make ScalaReflection subtype
 checking thread safe

## What changes were proposed in this pull request?

Make ScalaReflection subtype checking thread safe by adding a lock. There is a thread safety bug in the <:< operator in all versions of scala (https://github.com/scala/bug/issues/10766).

## How was this patch tested?

Existing tests and a new one for the new subtype checking function.

Closes #24085 from mwlon/SPARK-26555.

Authored-by: mwlon <mloncaric@hmc.edu>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/ScalaReflection.scala  | 234 ++++++++++--------
 .../sql/catalyst/ScalaReflectionSuite.scala   |   6 +
 2 files changed, 136 insertions(+), 104 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index d8d268a77ca1..fa8993e8d24c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -38,6 +38,9 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 trait DefinedByConstructorParams
 
 
+private[catalyst] object ScalaSubtypeLock
+
+
 /**
  * A default version of ScalaReflection that uses the runtime universe.
  */
@@ -66,19 +69,32 @@ object ScalaReflection extends ScalaReflection {
    */
   def dataTypeFor[T : TypeTag]: DataType = dataTypeFor(localTypeOf[T])
 
+  /**
+   * Synchronize to prevent concurrent usage of `<:<` operator.
+   * This operator is not thread safe in any current version of scala; i.e.
+   * (2.11.12, 2.12.8, 2.13.0-M5).
+   *
+   * See https://github.com/scala/bug/issues/10766
+   */
+  private[catalyst] def isSubtype(tpe1: `Type`, tpe2: `Type`): Boolean = {
+    ScalaSubtypeLock.synchronized {
+      tpe1 <:< tpe2
+    }
+  }
+
   private def dataTypeFor(tpe: `Type`): DataType = cleanUpReflectionObjects {
     tpe.dealias match {
-      case t if t <:< definitions.NullTpe => NullType
-      case t if t <:< definitions.IntTpe => IntegerType
-      case t if t <:< definitions.LongTpe => LongType
-      case t if t <:< definitions.DoubleTpe => DoubleType
-      case t if t <:< definitions.FloatTpe => FloatType
-      case t if t <:< definitions.ShortTpe => ShortType
-      case t if t <:< definitions.ByteTpe => ByteType
-      case t if t <:< definitions.BooleanTpe => BooleanType
-      case t if t <:< localTypeOf[Array[Byte]] => BinaryType
-      case t if t <:< localTypeOf[CalendarInterval] => CalendarIntervalType
-      case t if t <:< localTypeOf[Decimal] => DecimalType.SYSTEM_DEFAULT
+      case t if isSubtype(t, definitions.NullTpe) => NullType
+      case t if isSubtype(t, definitions.IntTpe) => IntegerType
+      case t if isSubtype(t, definitions.LongTpe) => LongType
+      case t if isSubtype(t, definitions.DoubleTpe) => DoubleType
+      case t if isSubtype(t, definitions.FloatTpe) => FloatType
+      case t if isSubtype(t, definitions.ShortTpe) => ShortType
+      case t if isSubtype(t, definitions.ByteTpe) => ByteType
+      case t if isSubtype(t, definitions.BooleanTpe) => BooleanType
+      case t if isSubtype(t, localTypeOf[Array[Byte]]) => BinaryType
+      case t if isSubtype(t, localTypeOf[CalendarInterval]) => CalendarIntervalType
+      case t if isSubtype(t, localTypeOf[Decimal]) => DecimalType.SYSTEM_DEFAULT
       case _ =>
         val className = getClassNameFromType(tpe)
         className match {
@@ -101,13 +117,13 @@ object ScalaReflection extends ScalaReflection {
    */
   private def arrayClassFor(tpe: `Type`): ObjectType = cleanUpReflectionObjects {
     val cls = tpe.dealias match {
-      case t if t <:< definitions.IntTpe => classOf[Array[Int]]
-      case t if t <:< definitions.LongTpe => classOf[Array[Long]]
-      case t if t <:< definitions.DoubleTpe => classOf[Array[Double]]
-      case t if t <:< definitions.FloatTpe => classOf[Array[Float]]
-      case t if t <:< definitions.ShortTpe => classOf[Array[Short]]
-      case t if t <:< definitions.ByteTpe => classOf[Array[Byte]]
-      case t if t <:< definitions.BooleanTpe => classOf[Array[Boolean]]
+      case t if isSubtype(t, definitions.IntTpe) => classOf[Array[Int]]
+      case t if isSubtype(t, definitions.LongTpe) => classOf[Array[Long]]
+      case t if isSubtype(t, definitions.DoubleTpe) => classOf[Array[Double]]
+      case t if isSubtype(t, definitions.FloatTpe) => classOf[Array[Float]]
+      case t if isSubtype(t, definitions.ShortTpe) => classOf[Array[Short]]
+      case t if isSubtype(t, definitions.ByteTpe) => classOf[Array[Byte]]
+      case t if isSubtype(t, definitions.BooleanTpe) => classOf[Array[Boolean]]
       case other =>
         // There is probably a better way to do this, but I couldn't find it...
         val elementType = dataTypeFor(other).asInstanceOf[ObjectType].cls
@@ -161,68 +177,68 @@ object ScalaReflection extends ScalaReflection {
     tpe.dealias match {
       case t if !dataTypeFor(t).isInstanceOf[ObjectType] => path
 
-      case t if t <:< localTypeOf[Option[_]] =>
+      case t if isSubtype(t, localTypeOf[Option[_]]) =>
         val TypeRef(_, _, Seq(optType)) = t
         val className = getClassNameFromType(optType)
         val newTypePath = walkedTypePath.recordOption(className)
         WrapOption(deserializerFor(optType, path, newTypePath), dataTypeFor(optType))
 
-      case t if t <:< localTypeOf[java.lang.Integer] =>
+      case t if isSubtype(t, localTypeOf[java.lang.Integer]) =>
         createDeserializerForTypesSupportValueOf(path,
           classOf[java.lang.Integer])
 
-      case t if t <:< localTypeOf[java.lang.Long] =>
+      case t if isSubtype(t, localTypeOf[java.lang.Long]) =>
         createDeserializerForTypesSupportValueOf(path,
           classOf[java.lang.Long])
 
-      case t if t <:< localTypeOf[java.lang.Double] =>
+      case t if isSubtype(t, localTypeOf[java.lang.Double]) =>
         createDeserializerForTypesSupportValueOf(path,
           classOf[java.lang.Double])
 
-      case t if t <:< localTypeOf[java.lang.Float] =>
+      case t if isSubtype(t, localTypeOf[java.lang.Float]) =>
         createDeserializerForTypesSupportValueOf(path,
           classOf[java.lang.Float])
 
-      case t if t <:< localTypeOf[java.lang.Short] =>
+      case t if isSubtype(t, localTypeOf[java.lang.Short]) =>
         createDeserializerForTypesSupportValueOf(path,
           classOf[java.lang.Short])
 
-      case t if t <:< localTypeOf[java.lang.Byte] =>
+      case t if isSubtype(t, localTypeOf[java.lang.Byte]) =>
         createDeserializerForTypesSupportValueOf(path,
           classOf[java.lang.Byte])
 
-      case t if t <:< localTypeOf[java.lang.Boolean] =>
+      case t if isSubtype(t, localTypeOf[java.lang.Boolean]) =>
         createDeserializerForTypesSupportValueOf(path,
           classOf[java.lang.Boolean])
 
-      case t if t <:< localTypeOf[java.time.LocalDate] =>
+      case t if isSubtype(t, localTypeOf[java.time.LocalDate]) =>
         createDeserializerForLocalDate(path)
 
-      case t if t <:< localTypeOf[java.sql.Date] =>
+      case t if isSubtype(t, localTypeOf[java.sql.Date]) =>
         createDeserializerForSqlDate(path)
 
-      case t if t <:< localTypeOf[java.time.Instant] =>
+      case t if isSubtype(t, localTypeOf[java.time.Instant]) =>
         createDeserializerForInstant(path)
 
-      case t if t <:< localTypeOf[java.sql.Timestamp] =>
+      case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) =>
         createDeserializerForSqlTimestamp(path)
 
-      case t if t <:< localTypeOf[java.lang.String] =>
+      case t if isSubtype(t, localTypeOf[java.lang.String]) =>
         createDeserializerForString(path, returnNullable = false)
 
-      case t if t <:< localTypeOf[java.math.BigDecimal] =>
+      case t if isSubtype(t, localTypeOf[java.math.BigDecimal]) =>
         createDeserializerForJavaBigDecimal(path, returnNullable = false)
 
-      case t if t <:< localTypeOf[BigDecimal] =>
+      case t if isSubtype(t, localTypeOf[BigDecimal]) =>
         createDeserializerForScalaBigDecimal(path, returnNullable = false)
 
-      case t if t <:< localTypeOf[java.math.BigInteger] =>
+      case t if isSubtype(t, localTypeOf[java.math.BigInteger]) =>
         createDeserializerForJavaBigInteger(path, returnNullable = false)
 
-      case t if t <:< localTypeOf[scala.math.BigInt] =>
+      case t if isSubtype(t, localTypeOf[scala.math.BigInt]) =>
         createDeserializerForScalaBigInt(path)
 
-      case t if t <:< localTypeOf[Array[_]] =>
+      case t if isSubtype(t, localTypeOf[Array[_]]) =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, elementNullable) = schemaFor(elementType)
         val className = getClassNameFromType(elementType)
@@ -242,13 +258,13 @@ object ScalaReflection extends ScalaReflection {
         val arrayCls = arrayClassFor(elementType)
 
         val methodName = elementType match {
-          case t if t <:< definitions.IntTpe => "toIntArray"
-          case t if t <:< definitions.LongTpe => "toLongArray"
-          case t if t <:< definitions.DoubleTpe => "toDoubleArray"
-          case t if t <:< definitions.FloatTpe => "toFloatArray"
-          case t if t <:< definitions.ShortTpe => "toShortArray"
-          case t if t <:< definitions.ByteTpe => "toByteArray"
-          case t if t <:< definitions.BooleanTpe => "toBooleanArray"
+          case t if isSubtype(t, definitions.IntTpe) => "toIntArray"
+          case t if isSubtype(t, definitions.LongTpe) => "toLongArray"
+          case t if isSubtype(t, definitions.DoubleTpe) => "toDoubleArray"
+          case t if isSubtype(t, definitions.FloatTpe) => "toFloatArray"
+          case t if isSubtype(t, definitions.ShortTpe) => "toShortArray"
+          case t if isSubtype(t, definitions.ByteTpe) => "toByteArray"
+          case t if isSubtype(t, definitions.BooleanTpe) => "toBooleanArray"
           // non-primitive
           case _ => "array"
         }
@@ -256,8 +272,8 @@ object ScalaReflection extends ScalaReflection {
 
       // We serialize a `Set` to Catalyst array. When we deserialize a Catalyst array
       // to a `Set`, if there are duplicated elements, the elements will be de-duplicated.
-      case t if t <:< localTypeOf[Seq[_]] ||
-          t <:< localTypeOf[scala.collection.Set[_]] =>
+      case t if isSubtype(t, localTypeOf[Seq[_]]) ||
+          isSubtype(t, localTypeOf[scala.collection.Set[_]]) =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, elementNullable) = schemaFor(elementType)
         val className = getClassNameFromType(elementType)
@@ -274,14 +290,14 @@ object ScalaReflection extends ScalaReflection {
 
         val companion = t.dealias.typeSymbol.companion.typeSignature
         val cls = companion.member(TermName("newBuilder")) match {
-          case NoSymbol if t <:< localTypeOf[Seq[_]] => classOf[Seq[_]]
-          case NoSymbol if t <:< localTypeOf[scala.collection.Set[_]] =>
+          case NoSymbol if isSubtype(t, localTypeOf[Seq[_]]) => classOf[Seq[_]]
+          case NoSymbol if isSubtype(t, localTypeOf[scala.collection.Set[_]]) =>
             classOf[scala.collection.Set[_]]
           case _ => mirror.runtimeClass(t.typeSymbol.asClass)
         }
         UnresolvedMapObjects(mapFunction, path, Some(cls))
 
-      case t if t <:< localTypeOf[Map[_, _]] =>
+      case t if isSubtype(t, localTypeOf[Map[_, _]]) =>
         val TypeRef(_, _, Seq(keyType, valueType)) = t
 
         val classNameForKey = getClassNameFromType(keyType)
@@ -411,7 +427,7 @@ object ScalaReflection extends ScalaReflection {
     tpe.dealias match {
       case _ if !inputObject.dataType.isInstanceOf[ObjectType] => inputObject
 
-      case t if t <:< localTypeOf[Option[_]] =>
+      case t if isSubtype(t, localTypeOf[Option[_]]) =>
         val TypeRef(_, _, Seq(optType)) = t
         val className = getClassNameFromType(optType)
         val newPath = walkedTypePath.recordOption(className)
@@ -421,15 +437,15 @@ object ScalaReflection extends ScalaReflection {
       // Since List[_] also belongs to localTypeOf[Product], we put this case before
       // "case t if definedByConstructorParams(t)" to make sure it will match to the
       // case "localTypeOf[Seq[_]]"
-      case t if t <:< localTypeOf[Seq[_]] =>
+      case t if isSubtype(t, localTypeOf[Seq[_]]) =>
         val TypeRef(_, _, Seq(elementType)) = t
         toCatalystArray(inputObject, elementType)
 
-      case t if t <:< localTypeOf[Array[_]] =>
+      case t if isSubtype(t, localTypeOf[Array[_]]) =>
         val TypeRef(_, _, Seq(elementType)) = t
         toCatalystArray(inputObject, elementType)
 
-      case t if t <:< localTypeOf[Map[_, _]] =>
+      case t if isSubtype(t, localTypeOf[Map[_, _]]) =>
         val TypeRef(_, _, Seq(keyType, valueType)) = t
         val keyClsName = getClassNameFromType(keyType)
         val valueClsName = getClassNameFromType(valueType)
@@ -448,7 +464,7 @@ object ScalaReflection extends ScalaReflection {
             serializerFor(_, valueType, valuePath, seenTypeSet))
         )
 
-      case t if t <:< localTypeOf[scala.collection.Set[_]] =>
+      case t if isSubtype(t, localTypeOf[scala.collection.Set[_]]) =>
         val TypeRef(_, _, Seq(elementType)) = t
 
         // There's no corresponding Catalyst type for `Set`, we serialize a `Set` to Catalyst array.
@@ -461,35 +477,41 @@ object ScalaReflection extends ScalaReflection {
 
         toCatalystArray(newInput, elementType)
 
-      case t if t <:< localTypeOf[String] => createSerializerForString(inputObject)
+      case t if isSubtype(t, localTypeOf[String]) => createSerializerForString(inputObject)
 
-      case t if t <:< localTypeOf[java.time.Instant] => createSerializerForJavaInstant(inputObject)
+      case t if isSubtype(t, localTypeOf[java.time.Instant]) =>
+        createSerializerForJavaInstant(inputObject)
 
-      case t if t <:< localTypeOf[java.sql.Timestamp] =>
+      case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) =>
         createSerializerForSqlTimestamp(inputObject)
 
-      case t if t <:< localTypeOf[java.time.LocalDate] =>
+      case t if isSubtype(t, localTypeOf[java.time.LocalDate]) =>
         createSerializerForJavaLocalDate(inputObject)
 
-      case t if t <:< localTypeOf[java.sql.Date] => createSerializerForSqlDate(inputObject)
+      case t if isSubtype(t, localTypeOf[java.sql.Date]) => createSerializerForSqlDate(inputObject)
 
-      case t if t <:< localTypeOf[BigDecimal] => createSerializerForScalaBigDecimal(inputObject)
+      case t if isSubtype(t, localTypeOf[BigDecimal]) =>
+        createSerializerForScalaBigDecimal(inputObject)
 
-      case t if t <:< localTypeOf[java.math.BigDecimal] =>
+      case t if isSubtype(t, localTypeOf[java.math.BigDecimal]) =>
         createSerializerForJavaBigDecimal(inputObject)
 
-      case t if t <:< localTypeOf[java.math.BigInteger] =>
+      case t if isSubtype(t, localTypeOf[java.math.BigInteger]) =>
         createSerializerForJavaBigInteger(inputObject)
 
-      case t if t <:< localTypeOf[scala.math.BigInt] => createSerializerForScalaBigInt(inputObject)
+      case t if isSubtype(t, localTypeOf[scala.math.BigInt]) =>
+        createSerializerForScalaBigInt(inputObject)
 
-      case t if t <:< localTypeOf[java.lang.Integer] => createSerializerForInteger(inputObject)
-      case t if t <:< localTypeOf[java.lang.Long] => createSerializerForLong(inputObject)
-      case t if t <:< localTypeOf[java.lang.Double] => createSerializerForDouble(inputObject)
-      case t if t <:< localTypeOf[java.lang.Float] => createSerializerForFloat(inputObject)
-      case t if t <:< localTypeOf[java.lang.Short] => createSerializerForShort(inputObject)
-      case t if t <:< localTypeOf[java.lang.Byte] => createSerializerForByte(inputObject)
-      case t if t <:< localTypeOf[java.lang.Boolean] => createSerializerForBoolean(inputObject)
+      case t if isSubtype(t, localTypeOf[java.lang.Integer]) =>
+        createSerializerForInteger(inputObject)
+      case t if isSubtype(t, localTypeOf[java.lang.Long]) => createSerializerForLong(inputObject)
+      case t if isSubtype(t, localTypeOf[java.lang.Double]) =>
+        createSerializerForDouble(inputObject)
+      case t if isSubtype(t, localTypeOf[java.lang.Float]) => createSerializerForFloat(inputObject)
+      case t if isSubtype(t, localTypeOf[java.lang.Short]) => createSerializerForShort(inputObject)
+      case t if isSubtype(t, localTypeOf[java.lang.Byte]) => createSerializerForByte(inputObject)
+      case t if isSubtype(t, localTypeOf[java.lang.Boolean]) =>
+        createSerializerForBoolean(inputObject)
 
       case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t)
@@ -540,7 +562,7 @@ object ScalaReflection extends ScalaReflection {
    */
   def optionOfProductType(tpe: `Type`): Boolean = cleanUpReflectionObjects {
     tpe.dealias match {
-      case t if t <:< localTypeOf[Option[_]] =>
+      case t if isSubtype(t, localTypeOf[Option[_]]) =>
         val TypeRef(_, _, Seq(optType)) = t
         definedByConstructorParams(optType)
       case _ => false
@@ -606,7 +628,7 @@ object ScalaReflection extends ScalaReflection {
     tpe.dealias match {
       // this must be the first case, since all objects in scala are instances of Null, therefore
       // Null type would wrongly match the first of them, which is Option as of now
-      case t if t <:< definitions.NullTpe => Schema(NullType, nullable = true)
+      case t if isSubtype(t, definitions.NullTpe) => Schema(NullType, nullable = true)
       case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().
           getConstructor().newInstance()
@@ -615,54 +637,58 @@ object ScalaReflection extends ScalaReflection {
         val udt = UDTRegistration.getUDTFor(getClassNameFromType(t)).get.getConstructor().
           newInstance().asInstanceOf[UserDefinedType[_]]
         Schema(udt, nullable = true)
-      case t if t <:< localTypeOf[Option[_]] =>
+      case t if isSubtype(t, localTypeOf[Option[_]]) =>
         val TypeRef(_, _, Seq(optType)) = t
         Schema(schemaFor(optType).dataType, nullable = true)
-      case t if t <:< localTypeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
-      case t if t <:< localTypeOf[Array[_]] =>
+      case t if isSubtype(t, localTypeOf[Array[Byte]]) => Schema(BinaryType, nullable = true)
+      case t if isSubtype(t, localTypeOf[Array[_]]) =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, nullable) = schemaFor(elementType)
         Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if t <:< localTypeOf[Seq[_]] =>
+      case t if isSubtype(t, localTypeOf[Seq[_]]) =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, nullable) = schemaFor(elementType)
         Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if t <:< localTypeOf[Map[_, _]] =>
+      case t if isSubtype(t, localTypeOf[Map[_, _]]) =>
         val TypeRef(_, _, Seq(keyType, valueType)) = t
         val Schema(valueDataType, valueNullable) = schemaFor(valueType)
         Schema(MapType(schemaFor(keyType).dataType,
           valueDataType, valueContainsNull = valueNullable), nullable = true)
-      case t if t <:< localTypeOf[Set[_]] =>
+      case t if isSubtype(t, localTypeOf[Set[_]]) =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, nullable) = schemaFor(elementType)
         Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if t <:< localTypeOf[String] => Schema(StringType, nullable = true)
-      case t if t <:< localTypeOf[java.time.Instant] => Schema(TimestampType, nullable = true)
-      case t if t <:< localTypeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true)
-      case t if t <:< localTypeOf[java.time.LocalDate] => Schema(DateType, nullable = true)
-      case t if t <:< localTypeOf[java.sql.Date] => Schema(DateType, nullable = true)
-      case t if t <:< localTypeOf[BigDecimal] => Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
-      case t if t <:< localTypeOf[java.math.BigDecimal] =>
+      case t if isSubtype(t, localTypeOf[String]) => Schema(StringType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.time.Instant]) =>
+        Schema(TimestampType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) =>
+        Schema(TimestampType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.time.LocalDate]) => Schema(DateType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.sql.Date]) => Schema(DateType, nullable = true)
+      case t if isSubtype(t, localTypeOf[BigDecimal]) =>
         Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
-      case t if t <:< localTypeOf[java.math.BigInteger] =>
+      case t if isSubtype(t, localTypeOf[java.math.BigDecimal]) =>
+        Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.math.BigInteger]) =>
         Schema(DecimalType.BigIntDecimal, nullable = true)
-      case t if t <:< localTypeOf[scala.math.BigInt] =>
+      case t if isSubtype(t, localTypeOf[scala.math.BigInt]) =>
         Schema(DecimalType.BigIntDecimal, nullable = true)
-      case t if t <:< localTypeOf[Decimal] => Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
-      case t if t <:< localTypeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
-      case t if t <:< localTypeOf[java.lang.Long] => Schema(LongType, nullable = true)
-      case t if t <:< localTypeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
-      case t if t <:< localTypeOf[java.lang.Float] => Schema(FloatType, nullable = true)
-      case t if t <:< localTypeOf[java.lang.Short] => Schema(ShortType, nullable = true)
-      case t if t <:< localTypeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
-      case t if t <:< localTypeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
-      case t if t <:< definitions.IntTpe => Schema(IntegerType, nullable = false)
-      case t if t <:< definitions.LongTpe => Schema(LongType, nullable = false)
-      case t if t <:< definitions.DoubleTpe => Schema(DoubleType, nullable = false)
-      case t if t <:< definitions.FloatTpe => Schema(FloatType, nullable = false)
-      case t if t <:< definitions.ShortTpe => Schema(ShortType, nullable = false)
-      case t if t <:< definitions.ByteTpe => Schema(ByteType, nullable = false)
-      case t if t <:< definitions.BooleanTpe => Schema(BooleanType, nullable = false)
+      case t if isSubtype(t, localTypeOf[Decimal]) =>
+        Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.lang.Integer]) => Schema(IntegerType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.lang.Long]) => Schema(LongType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.lang.Double]) => Schema(DoubleType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.lang.Float]) => Schema(FloatType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.lang.Short]) => Schema(ShortType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.lang.Byte]) => Schema(ByteType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.lang.Boolean]) => Schema(BooleanType, nullable = true)
+      case t if isSubtype(t, definitions.IntTpe) => Schema(IntegerType, nullable = false)
+      case t if isSubtype(t, definitions.LongTpe) => Schema(LongType, nullable = false)
+      case t if isSubtype(t, definitions.DoubleTpe) => Schema(DoubleType, nullable = false)
+      case t if isSubtype(t, definitions.FloatTpe) => Schema(FloatType, nullable = false)
+      case t if isSubtype(t, definitions.ShortTpe) => Schema(ShortType, nullable = false)
+      case t if isSubtype(t, definitions.ByteTpe) => Schema(ByteType, nullable = false)
+      case t if isSubtype(t, definitions.BooleanTpe) => Schema(BooleanType, nullable = false)
       case t if definedByConstructorParams(t) =>
         val params = getConstructorParameters(t)
         Schema(StructType(
@@ -715,9 +741,9 @@ object ScalaReflection extends ScalaReflection {
   def definedByConstructorParams(tpe: Type): Boolean = cleanUpReflectionObjects {
     tpe.dealias match {
       // `Option` is a `Product`, but we don't wanna treat `Option[Int]` as a struct type.
-      case t if t <:< localTypeOf[Option[_]] => definedByConstructorParams(t.typeArgs.head)
-      case _ => tpe.dealias <:< localTypeOf[Product] ||
-        tpe.dealias <:< localTypeOf[DefinedByConstructorParams]
+      case t if isSubtype(t, localTypeOf[Option[_]]) => definedByConstructorParams(t.typeArgs.head)
+      case _ => isSubtype(tpe.dealias, localTypeOf[Product]) ||
+        isSubtype(tpe.dealias, localTypeOf[DefinedByConstructorParams])
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index 80824cc2a7f2..e8df031a1a32 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -145,6 +145,12 @@ class ScalaReflectionSuite extends SparkFunSuite {
   private def deserializerFor[T: TypeTag]: Expression =
     deserializerForType(ScalaReflection.localTypeOf[T])
 
+  test("isSubtype") {
+    assert(isSubtype(localTypeOf[Option[Int]], localTypeOf[Option[_]]))
+    assert(isSubtype(localTypeOf[Option[Int]], localTypeOf[Option[Int]]))
+    assert(!isSubtype(localTypeOf[Option[_]], localTypeOf[Option[Int]]))
+  }
+
   test("SQLUserDefinedType annotation on Scala structure") {
     val schema = schemaFor[TestingUDT.NestedStruct]
     assert(schema === Schema(

From 901c7408a42a807927542c41181688bdc8132bdf Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Tue, 19 Mar 2019 20:18:40 +0800
Subject: [PATCH 0714/1072] [SPARK-27161][SQL][FOLLOWUP] Drops non-keywords
 from docs/sql-keywords.md

## What changes were proposed in this pull request?
This pr is a follow-up of #24093 and includes fixes below;
 - Lists up all the keywords of Spark only (that is, drops non-keywords there); I listed up all the keywords of ANSI SQL-2011 in the previous commit (SPARK-26215).
 - Sorts the keywords in `SqlBase.g4` in a alphabetical order

## How was this patch tested?
Pass Jenkins.

Closes #24125 from maropu/SPARK-27161-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-keywords.md                          | 330 +------------
 .../spark/sql/catalyst/parser/SqlBase.g4      | 445 +++++++++---------
 .../parser/TableIdentifierParserSuite.scala   | 325 +------------
 .../sql-tests/results/describe-query.sql.out  |   2 +-
 4 files changed, 235 insertions(+), 867 deletions(-)

diff --git a/docs/sql-keywords.md b/docs/sql-keywords.md
index 5ba3ad8bd8cc..40f75863e2fc 100644
--- a/docs/sql-keywords.md
+++ b/docs/sql-keywords.md
@@ -19,79 +19,36 @@ Below is a list of all the keywords in Spark SQL.
 <table class="table">
   <tr><th rowspan="2" style="vertical-align: middle;"><b>Keyword</b></th><th colspan="2"><b>Spark SQL</b></th><th rowspan="2" style="vertical-align: middle;"><b>SQL-2011</b></th></tr>
   <tr><th><b>ANSI mode</b></th><th><b>default mode</b></th></tr>
-  <tr><td>ABS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ABSOLUTE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ACOS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ACTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ADD</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>AFTER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ALL</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ALLOCATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ALTER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ANALYZE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>AND</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ANTI</td><td>reserved</td><td>strict-non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ANY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ARE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ARCHIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ARRAY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ARRAY_AGG</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ARRAY_MAX_CARDINALITY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>AS</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ASC</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ASENSITIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ASIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ASSERTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ASYMMETRIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>AT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ATAN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ATOMIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>AUTHORIZATION</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>AVG</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>BEFORE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>BEGIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>BEGIN_FRAME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>BEGIN_PARTITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>BETWEEN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>BIGINT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>BINARY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>BIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>BIT_LENGTH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>BLOB</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>BOOLEAN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>BOTH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>BREADTH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>BUCKET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>BUCKETS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>BY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CACHE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CALL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CALLED</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CARDINALITY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CASCADE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CASCADED</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CASE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CAST</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CATALOG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CEIL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CEILING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CHANGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CHAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CHAR_LENGTH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CHARACTER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CHARACTER_LENGTH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CHECK</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CLASSIFIER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>CLEAR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CLOB</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CLOSE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CLUSTER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>CLUSTERED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>COALESCE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CODEGEN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>COLLATE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>COLLATION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>COLLECT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>COLLECTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>COLUMN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>COLUMNS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -101,102 +58,40 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>COMPACTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>COMPUTE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>CONCATENATE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CONDITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CONNECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CONNECTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>CONSTRAINT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CONSTRAINTS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CONSTRUCTOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CONTAINS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CONTINUE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CONVERT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>COPY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>CORR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CORRESPONDING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>COS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>COSH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>COST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>COUNT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>COVAR_POP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>COVAR_SAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CREATE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CROSS</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>CUBE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CUME_DIST</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CURRENT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CURRENT_CATALOG</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CURRENT_DATE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CURRENT_DEFAULT_TRANSFORM_GROUP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CURRENT_PATH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CURRENT_ROLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CURRENT_ROW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CURRENT_SCHEMA</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CURRENT_TIME</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CURRENT_TIMESTAMP</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CURRENT_TRANSFORM_GROUP_FOR_TYPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>CURRENT_USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CURSOR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>CYCLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DATA</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DATABASE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DATABASES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DAY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DAYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DBPROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DEALLOCATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DEC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DECFLOAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DECIMAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DECLARE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DEFAULT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DEFERRABLE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DEFERRED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DEFINE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DEFINED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DELETE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DELIMITED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DENSE_RANK</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DEPTH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DEREF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DESC</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DESCRIBE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>QUERY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DESCRIPTOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DETERMINISTIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DFS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DIAGNOSTICS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DIRECTORIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DIRECTORY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DISCONNECT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DISTINCT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DISTRIBUTE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DIV</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DOMAIN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>DOUBLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>DROP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DYNAMIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>EACH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ELEMENT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ELSE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ELSEIF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>EMPTY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>END</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>END_FRAME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>END_PARTITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>EQUALS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ESCAPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ESCAPED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>EVERY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>EXCEPT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
-  <tr><td>EXCEPTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>EXCHANGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>EXEC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>EXECUTE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>EXISTS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>EXIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>EXP</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>EXPLAIN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>EXPORT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>EXTENDED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -206,243 +101,108 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>FETCH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FIELDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>FILEFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>FILTER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FIRST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>FIRST_VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>FLOAT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FOLLOWING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>FOR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FOREIGN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>FORMATTED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>FOUND</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>FRAME_ROW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>FREE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FROM</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FULL</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>FUNCTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FUNCTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>FUSION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>GENERAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>GET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>GLOBAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>GO</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>GOTO</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>GRANT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>GROUP</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>GROUPING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>GROUPS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>HANDLER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>HAVING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>HOLD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>HOUR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>HOURS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>IDENTITY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>IF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>IGNORE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>IMMEDIATE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>IMPORT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>IN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>INDICATOR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INDEX</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>INDEXES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>INITIAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>INITIALLY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>INNER</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
-  <tr><td>INOUT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INPATH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>INPUT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>INPUTFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>INSENSITIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INSERT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>INT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>INTEGER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INTERSECT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
-  <tr><td>INTERSECTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INTERVAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>INTO</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>IS</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ISOLATION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ITEMS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ITERATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>JOIN</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
-  <tr><td>JSON_ARRAY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>JSON_ARRAYAGG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>JSON_EXISTS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>JSON_OBJECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>JSON_OBJECTAGG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>JSON_QUERY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>JSON_TABLE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>JSON_TABLE_PRIMITIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>JSON_VALUE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>KEY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>KEYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LAG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LANGUAGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>LARGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LAST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LAST_VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LATERAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LAZY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LEAD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LEADING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>LEAVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LEFT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
-  <tr><td>LEVEL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LIKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>LIKE_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LIMIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LINES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LIST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LISTAGG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LOAD</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LOCAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>LOCALTIME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>LOCALTIMESTAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LOCATION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LOCATOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LOCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LOCKS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LOG</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LOG10</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LOGICAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LOOP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>LOWER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MACRO</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MAP</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MATCH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MATCH_NUMBER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MATCH_RECOGNIZE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MATCHES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MAX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MEMBER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MERGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>METHOD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MICROSECOND</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MICROSECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MILLISECOND</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MILLISECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MINUS</td><td>reserved</td><td>reserved</td><td>non-reserved</td></tr>
+  <tr><td>MINUS</td><td>reserved</td><td>strict-non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MINUTE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MINUTES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MOD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MODIFIES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MODULE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MONTH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MONTHS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MSCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MULTISET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NAMES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>NATIONAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NATURAL</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
-  <tr><td>NCHAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NCLOB</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NEW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NEXT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>NO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NONE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NORMALIZE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NOT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NTH_VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NTILE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NULL</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NULLS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>NULLIF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>NUMERIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>OBJECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>OCCURRENCES_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>OCTET_LENGTH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>OF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>OFFSET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>OLD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>OMIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ON</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
-  <tr><td>ONE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ONLY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>OPEN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>OPTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>OPTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>OR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ORDER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ORDINALITY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>OUT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>OUTER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>OUTPUT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>OUTPUTFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>OVER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>OVERLAPS</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>OVERLAY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>OVERWRITE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PAD</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PARAMETER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PARTIAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>PARTITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>PARTITIONED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>PARTITIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PATH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PATTERN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PERCENT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PERCENT_RANK</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PERCENTILE_CONT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PERCENTILE_DISC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PERCENTLIT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PERIOD</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>PERCENT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>PIVOT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PORTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>POSITION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>POSITION_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>POWER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PRECEDES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>PRECEDING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PRECISION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PREPARE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PRESERVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>PRIMARY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>PRINCIPALS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PRIOR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PRIVILEGES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PROCEDURE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>PTF</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>PUBLIC</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>PURGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>QUERY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>RANGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>RANK</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>READ</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>READS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>RECORDREADER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>RECORDWRITER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>RECURSIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>RECOVER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>REDUCE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>REF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>REFERENCES</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REFERENCING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>REFRESH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>REGR_AVGX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REGR_AVGY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REGR_COUNT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REGR_INTERCEPT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REGR_R2</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REGR_SLOPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REGR_SXX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REGR_SXY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>REGR_SYY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>RELATIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>RELEASE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>RENAME</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>REPAIR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>REPEAT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>REPLACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>RESET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>RESIGNAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>RESTRICT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>RESULT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>RETURN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>RETURNS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>REVOKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>RIGHT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>RLIKE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -450,142 +210,62 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>ROLES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ROLLBACK</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ROLLUP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ROUTINE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>ROW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>ROW_NUMBER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ROWS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>RUNNING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SAVEPOINT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SCHEMA</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SCOPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SCROLL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SEARCH</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SECOND</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SECTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SEEK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SELECT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SEMI</td><td>reserved</td><td>strict-non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SENSITIVE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SEPARATED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SERDE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SERDEPROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SESSION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SESSION_USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SETS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SHOW</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SIGNAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SIMILAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SIN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SINH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SIZE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SKIP</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SKEWED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SMALLINT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SOME</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SORT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SORTED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SPACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SPECIFIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SPECIFICTYPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SQL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SQLCODE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SQLERROR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SQLEXCEPTION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SQLSTATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SQLWARNING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SQRT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>START</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>STATE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>STATIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>STATISTICS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>STDDEV_POP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>STDDEV_SAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>STORED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>STRATIFY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>STRUCT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SUBMULTISET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SUBSET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>SUBSTRING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SUBSTRING_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SUCCEEDS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SUM</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SYMMETRIC</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SYSTEM</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SYSTEM_TIME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SYSTEM_USER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>TABLE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>TABLES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>TABLESAMPLE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TAN</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>TANH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>TBLPROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>TEMPORARY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>TERMINATED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>THEN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TIME</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TIMESTAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TIMEZONE_HOUR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TIMEZONE_MINUTE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>TO</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>TOUCH</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>TRAILING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>TRANSACTION</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>TRANSACTIONS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>TRANSFORM</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>TRANSLATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TRANSLATE_REGEX</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TRANSLATION</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TREAT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TRIGGER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TRIM</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>TRIM_ARRAY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>TRUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>TRUNCATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>UESCAPE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>UNARCHIVE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>UNBOUNDED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>UNCACHE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>UNDER</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>UNDO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>UNION</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>UNIQUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>UNKNOWN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>UNLOCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>UNNEST</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>UNSET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>UNTIL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>UPDATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>UPPER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>USAGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>USE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>USING</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
-  <tr><td>VALUE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>VALUES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>VALUE_OF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>VAR_POP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>VAR_SAMP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>VARBINARY</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>VARCHAR</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>VARYING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>VERSIONING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>VIEW</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>WEEK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>WEEKS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>WHEN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>WHENEVER</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>WHERE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>WHILE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>WIDTH_BUCKET</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>WINDOW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>WITH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>WITHIN</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>WITHOUT</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>WEEK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>WEEKS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>WORK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>WRITE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>YEAR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>YEARS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>ZONE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
 </table>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 4d02d6256f1e..77d30fbc620c 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -903,9 +903,9 @@ ansiNonReserved
     | PARTITION
     | PARTITIONED
     | PARTITIONS
-    | PERCENT
     | PERCENTLIT
     | PIVOT
+    | POSITION
     | PRECEDING
     | PRINCIPALS
     | PURGE
@@ -1179,6 +1179,7 @@ nonReserved
     | ROLLUP
     | ROW
     | ROWS
+    | SCHEMA
     | SECOND
     | SECONDS
     | SELECT
@@ -1233,257 +1234,261 @@ nonReserved
     | YEARS
     ;
 
+// NOTE: If you add a new token in the list below, you should update the list of keywords
+// in `docs/sql-keywords.md`. If the token is a non-reserved keyword,
+// please update `ansiNonReserved` and `nonReserved` as well.
+
 //============================
 // Start of the keywords list
 //============================
-SELECT: 'SELECT';
-FROM: 'FROM';
 ADD: 'ADD';
-AS: 'AS';
+AFTER: 'AFTER';
 ALL: 'ALL';
+ALTER: 'ALTER';
+ANALYZE: 'ANALYZE';
+AND: 'AND';
+ANTI: 'ANTI';
 ANY: 'ANY';
-DISTINCT: 'DISTINCT';
-WHERE: 'WHERE';
-GROUP: 'GROUP';
-BY: 'BY';
-GROUPING: 'GROUPING';
-SETS: 'SETS';
-CUBE: 'CUBE';
-ROLLUP: 'ROLLUP';
-ORDER: 'ORDER';
-HAVING: 'HAVING';
-LIMIT: 'LIMIT';
+ARCHIVE: 'ARCHIVE';
+ARRAY: 'ARRAY';
+AS: 'AS';
+ASC: 'ASC';
 AT: 'AT';
-OR: 'OR';
-AND: 'AND';
-IN: 'IN';
-NOT: 'NOT' | '!';
-NO: 'NO';
-EXISTS: 'EXISTS';
+AUTHORIZATION: 'AUTHORIZATION';
 BETWEEN: 'BETWEEN';
-LIKE: 'LIKE';
-RLIKE: 'RLIKE' | 'REGEXP';
-IS: 'IS';
-NULL: 'NULL';
-TRUE: 'TRUE';
-FALSE: 'FALSE';
-NULLS: 'NULLS';
-ASC: 'ASC';
-DESC: 'DESC';
-FOR: 'FOR';
-INTERVAL: 'INTERVAL';
-YEAR: 'YEAR';
-YEARS: 'YEARS';
-MONTH: 'MONTH';
-MONTHS: 'MONTHS';
-WEEK: 'WEEK';
-WEEKS: 'WEEKS';
-DAY: 'DAY';
-DAYS: 'DAYS';
-HOUR: 'HOUR';
-HOURS: 'HOURS';
-MINUTE: 'MINUTE';
-MINUTES: 'MINUTES';
-SECOND: 'SECOND';
-SECONDS: 'SECONDS';
-MILLISECOND: 'MILLISECOND';
-MILLISECONDS: 'MILLISECONDS';
-MICROSECOND: 'MICROSECOND';
-MICROSECONDS: 'MICROSECONDS';
+BOTH: 'BOTH';
+BUCKET: 'BUCKET';
+BUCKETS: 'BUCKETS';
+BY: 'BY';
+CACHE: 'CACHE';
+CASCADE: 'CASCADE';
 CASE: 'CASE';
-WHEN: 'WHEN';
-THEN: 'THEN';
-ELSE: 'ELSE';
-END: 'END';
-JOIN: 'JOIN';
-CROSS: 'CROSS';
-OUTER: 'OUTER';
-INNER: 'INNER';
-LEFT: 'LEFT';
-SEMI: 'SEMI';
-RIGHT: 'RIGHT';
-FULL: 'FULL';
-NATURAL: 'NATURAL';
-ON: 'ON';
-PIVOT: 'PIVOT';
-LATERAL: 'LATERAL';
-WINDOW: 'WINDOW';
-OVER: 'OVER';
-PARTITION: 'PARTITION';
-RANGE: 'RANGE';
-ROWS: 'ROWS';
-UNBOUNDED: 'UNBOUNDED';
-PRECEDING: 'PRECEDING';
-FOLLOWING: 'FOLLOWING';
-CURRENT: 'CURRENT';
-FIRST: 'FIRST';
-AFTER: 'AFTER';
-LAST: 'LAST';
-ROW: 'ROW';
-WITH: 'WITH';
-VALUES: 'VALUES';
-CREATE: 'CREATE';
-TABLE: 'TABLE';
-QUERY: 'QUERY';
-DIRECTORY: 'DIRECTORY';
-VIEW: 'VIEW';
-REPLACE: 'REPLACE';
-INSERT: 'INSERT';
-DELETE: 'DELETE';
-INTO: 'INTO';
-DESCRIBE: 'DESCRIBE';
-EXPLAIN: 'EXPLAIN';
-FORMAT: 'FORMAT';
-LOGICAL: 'LOGICAL';
-CODEGEN: 'CODEGEN';
-COST: 'COST';
 CAST: 'CAST';
-SHOW: 'SHOW';
-TABLES: 'TABLES';
-COLUMNS: 'COLUMNS';
+CHANGE: 'CHANGE';
+CHECK: 'CHECK';
+CLEAR: 'CLEAR';
+CLUSTER: 'CLUSTER';
+CLUSTERED: 'CLUSTERED';
+CODEGEN: 'CODEGEN';
+COLLATE: 'COLLATE';
+COLLECTION: 'COLLECTION';
 COLUMN: 'COLUMN';
-USE: 'USE';
-PARTITIONS: 'PARTITIONS';
-FUNCTIONS: 'FUNCTIONS';
-DROP: 'DROP';
-UNION: 'UNION';
-EXCEPT: 'EXCEPT';
-SETMINUS: 'MINUS';
-INTERSECT: 'INTERSECT';
-TO: 'TO';
-TABLESAMPLE: 'TABLESAMPLE';
-STRATIFY: 'STRATIFY';
-ALTER: 'ALTER';
-RENAME: 'RENAME';
-ARRAY: 'ARRAY';
-MAP: 'MAP';
-STRUCT: 'STRUCT';
+COLUMNS: 'COLUMNS';
 COMMENT: 'COMMENT';
-SET: 'SET';
-RESET: 'RESET';
-DATA: 'DATA';
-START: 'START';
-TRANSACTION: 'TRANSACTION';
 COMMIT: 'COMMIT';
-ROLLBACK: 'ROLLBACK';
-MACRO: 'MACRO';
-IGNORE: 'IGNORE';
-BOTH: 'BOTH';
-LEADING: 'LEADING';
-TRAILING: 'TRAILING';
-IF: 'IF';
-POSITION: 'POSITION';
-EXTRACT: 'EXTRACT';
-PERCENTLIT: 'PERCENT';
-BUCKET: 'BUCKET';
-OUT: 'OUT';
-OF: 'OF';
-SORT: 'SORT';
-CLUSTER: 'CLUSTER';
-DISTRIBUTE: 'DISTRIBUTE';
-OVERWRITE: 'OVERWRITE';
-TRANSFORM: 'TRANSFORM';
-REDUCE: 'REDUCE';
-USING: 'USING';
-SERDE: 'SERDE';
-SERDEPROPERTIES: 'SERDEPROPERTIES';
-RECORDREADER: 'RECORDREADER';
-RECORDWRITER: 'RECORDWRITER';
-DELIMITED: 'DELIMITED';
-FIELDS: 'FIELDS';
-TERMINATED: 'TERMINATED';
-COLLECTION: 'COLLECTION';
-ITEMS: 'ITEMS';
-KEYS: 'KEYS';
-ESCAPED: 'ESCAPED';
-LINES: 'LINES';
-SEPARATED: 'SEPARATED';
-FUNCTION: 'FUNCTION';
-EXTENDED: 'EXTENDED';
-REFRESH: 'REFRESH';
-CLEAR: 'CLEAR';
-CACHE: 'CACHE';
-UNCACHE: 'UNCACHE';
-LAZY: 'LAZY';
-FORMATTED: 'FORMATTED';
-GLOBAL: 'GLOBAL';
-TEMPORARY: 'TEMPORARY' | 'TEMP';
-OPTIONS: 'OPTIONS';
-UNSET: 'UNSET';
-TBLPROPERTIES: 'TBLPROPERTIES';
-DBPROPERTIES: 'DBPROPERTIES';
-BUCKETS: 'BUCKETS';
-SKEWED: 'SKEWED';
-STORED: 'STORED';
-DIRECTORIES: 'DIRECTORIES';
-LOCATION: 'LOCATION';
-EXCHANGE: 'EXCHANGE';
-ARCHIVE: 'ARCHIVE';
-UNARCHIVE: 'UNARCHIVE';
-FILEFORMAT: 'FILEFORMAT';
-TOUCH: 'TOUCH';
 COMPACT: 'COMPACT';
+COMPACTIONS: 'COMPACTIONS';
+COMPUTE: 'COMPUTE';
 CONCATENATE: 'CONCATENATE';
-CHANGE: 'CHANGE';
-CASCADE: 'CASCADE';
-RESTRICT: 'RESTRICT';
-CLUSTERED: 'CLUSTERED';
-SORTED: 'SORTED';
-PURGE: 'PURGE';
-INPUTFORMAT: 'INPUTFORMAT';
-OUTPUTFORMAT: 'OUTPUTFORMAT';
-SCHEMA: 'SCHEMA';
+CONSTRAINT: 'CONSTRAINT';
+COST: 'COST';
+CREATE: 'CREATE';
+CROSS: 'CROSS';
+CUBE: 'CUBE';
+CURRENT: 'CURRENT';
+CURRENT_DATE: 'CURRENT_DATE';
+CURRENT_TIME: 'CURRENT_TIME';
+CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
+CURRENT_USER: 'CURRENT_USER';
+DATA: 'DATA';
 DATABASE: 'DATABASE';
 DATABASES: 'DATABASES' | 'SCHEMAS';
+DAY: 'DAY';
+DAYS: 'DAYS';
+DBPROPERTIES: 'DBPROPERTIES';
+DEFINED: 'DEFINED';
+DELETE: 'DELETE';
+DELIMITED: 'DELIMITED';
+DESC: 'DESC';
+DESCRIBE: 'DESCRIBE';
 DFS: 'DFS';
-TRUNCATE: 'TRUNCATE';
-ANALYZE: 'ANALYZE';
-COMPUTE: 'COMPUTE';
-LIST: 'LIST';
-STATISTICS: 'STATISTICS';
-PARTITIONED: 'PARTITIONED';
+DIRECTORIES: 'DIRECTORIES';
+DIRECTORY: 'DIRECTORY';
+DISTINCT: 'DISTINCT';
+DISTRIBUTE: 'DISTRIBUTE';
+DROP: 'DROP';
+ELSE: 'ELSE';
+END: 'END';
+ESCAPED: 'ESCAPED';
+EXCEPT: 'EXCEPT';
+EXCHANGE: 'EXCHANGE';
+EXISTS: 'EXISTS';
+EXPLAIN: 'EXPLAIN';
+EXPORT: 'EXPORT';
+EXTENDED: 'EXTENDED';
 EXTERNAL: 'EXTERNAL';
-DEFINED: 'DEFINED';
-REVOKE: 'REVOKE';
+EXTRACT: 'EXTRACT';
+FALSE: 'FALSE';
+FETCH: 'FETCH';
+FIELDS: 'FIELDS';
+FILEFORMAT: 'FILEFORMAT';
+FIRST: 'FIRST';
+FOLLOWING: 'FOLLOWING';
+FOR: 'FOR';
+FOREIGN: 'FOREIGN';
+FORMAT: 'FORMAT';
+FORMATTED: 'FORMATTED';
+FROM: 'FROM';
+FULL: 'FULL';
+FUNCTION: 'FUNCTION';
+FUNCTIONS: 'FUNCTIONS';
+GLOBAL: 'GLOBAL';
 GRANT: 'GRANT';
-LOCK: 'LOCK';
-UNLOCK: 'UNLOCK';
-MSCK: 'MSCK';
-REPAIR: 'REPAIR';
-RECOVER: 'RECOVER';
-EXPORT: 'EXPORT';
+GROUP: 'GROUP';
+GROUPING: 'GROUPING';
+HAVING: 'HAVING';
+HOUR: 'HOUR';
+HOURS: 'HOURS';
+IF: 'IF';
+IGNORE: 'IGNORE';
 IMPORT: 'IMPORT';
-LOAD: 'LOAD';
-ROLE: 'ROLE';
-ROLES: 'ROLES';
-COMPACTIONS: 'COMPACTIONS';
-PRINCIPALS: 'PRINCIPALS';
-TRANSACTIONS: 'TRANSACTIONS';
+IN: 'IN';
 INDEX: 'INDEX';
 INDEXES: 'INDEXES';
-LOCKS: 'LOCKS';
-OPTION: 'OPTION';
-ANTI: 'ANTI';
-LOCAL: 'LOCAL';
+INNER: 'INNER';
 INPATH: 'INPATH';
-AUTHORIZATION: 'AUTHORIZATION';
-CHECK: 'CHECK';
-COLLATE: 'COLLATE';
-CONSTRAINT: 'CONSTRAINT';
-CURRENT_DATE: 'CURRENT_DATE';
-CURRENT_TIME: 'CURRENT_TIME';
-CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
-CURRENT_USER: 'CURRENT_USER';
-FETCH: 'FETCH';
-FOREIGN: 'FOREIGN';
+INPUTFORMAT: 'INPUTFORMAT';
+INSERT: 'INSERT';
+INTERSECT: 'INTERSECT';
+INTERVAL: 'INTERVAL';
+INTO: 'INTO';
+IS: 'IS';
+ITEMS: 'ITEMS';
+JOIN: 'JOIN';
+KEYS: 'KEYS';
+LAST: 'LAST';
+LATERAL: 'LATERAL';
+LAZY: 'LAZY';
+LEADING: 'LEADING';
+LEFT: 'LEFT';
+LIKE: 'LIKE';
+LIMIT: 'LIMIT';
+LINES: 'LINES';
+LIST: 'LIST';
+LOAD: 'LOAD';
+LOCAL: 'LOCAL';
+LOCATION: 'LOCATION';
+LOCK: 'LOCK';
+LOCKS: 'LOCKS';
+LOGICAL: 'LOGICAL';
+MACRO: 'MACRO';
+MAP: 'MAP';
+MICROSECOND: 'MICROSECOND';
+MICROSECONDS: 'MICROSECONDS';
+MILLISECOND: 'MILLISECOND';
+MILLISECONDS: 'MILLISECONDS';
+MINUTE: 'MINUTE';
+MINUTES: 'MINUTES';
+MONTH: 'MONTH';
+MONTHS: 'MONTHS';
+MSCK: 'MSCK';
+NATURAL: 'NATURAL';
+NO: 'NO';
+NOT: 'NOT' | '!';
+NULL: 'NULL';
+NULLS: 'NULLS';
+OF: 'OF';
+ON: 'ON';
 ONLY: 'ONLY';
+OPTION: 'OPTION';
+OPTIONS: 'OPTIONS';
+OR: 'OR';
+ORDER: 'ORDER';
+OUT: 'OUT';
+OUTER: 'OUTER';
+OUTPUTFORMAT: 'OUTPUTFORMAT';
+OVER: 'OVER';
 OVERLAPS: 'OVERLAPS';
+OVERWRITE: 'OVERWRITE';
+PARTITION: 'PARTITION';
+PARTITIONED: 'PARTITIONED';
+PARTITIONS: 'PARTITIONS';
+PERCENTLIT: 'PERCENT';
+PIVOT: 'PIVOT';
+POSITION: 'POSITION';
+PRECEDING: 'PRECEDING';
 PRIMARY: 'PRIMARY';
+PRINCIPALS: 'PRINCIPALS';
+PURGE: 'PURGE';
+QUERY: 'QUERY';
+RANGE: 'RANGE';
+RECORDREADER: 'RECORDREADER';
+RECORDWRITER: 'RECORDWRITER';
+RECOVER: 'RECOVER';
+REDUCE: 'REDUCE';
 REFERENCES: 'REFERENCES';
+REFRESH: 'REFRESH';
+RENAME: 'RENAME';
+REPAIR: 'REPAIR';
+REPLACE: 'REPLACE';
+RESET: 'RESET';
+RESTRICT: 'RESTRICT';
+REVOKE: 'REVOKE';
+RIGHT: 'RIGHT';
+RLIKE: 'RLIKE' | 'REGEXP';
+ROLE: 'ROLE';
+ROLES: 'ROLES';
+ROLLBACK: 'ROLLBACK';
+ROLLUP: 'ROLLUP';
+ROW: 'ROW';
+ROWS: 'ROWS';
+SCHEMA: 'SCHEMA';
+SECOND: 'SECOND';
+SECONDS: 'SECONDS';
+SELECT: 'SELECT';
+SEMI: 'SEMI';
+SEPARATED: 'SEPARATED';
+SERDE: 'SERDE';
+SERDEPROPERTIES: 'SERDEPROPERTIES';
 SESSION_USER: 'SESSION_USER';
+SET: 'SET';
+SETMINUS: 'MINUS';
+SETS: 'SETS';
+SHOW: 'SHOW';
+SKEWED: 'SKEWED';
 SOME: 'SOME';
+SORT: 'SORT';
+SORTED: 'SORTED';
+START: 'START';
+STATISTICS: 'STATISTICS';
+STORED: 'STORED';
+STRATIFY: 'STRATIFY';
+STRUCT: 'STRUCT';
+TABLE: 'TABLE';
+TABLES: 'TABLES';
+TABLESAMPLE: 'TABLESAMPLE';
+TBLPROPERTIES: 'TBLPROPERTIES';
+TEMPORARY: 'TEMPORARY' | 'TEMP';
+TERMINATED: 'TERMINATED';
+THEN: 'THEN';
+TO: 'TO';
+TOUCH: 'TOUCH';
+TRAILING: 'TRAILING';
+TRANSACTION: 'TRANSACTION';
+TRANSACTIONS: 'TRANSACTIONS';
+TRANSFORM: 'TRANSFORM';
+TRUE: 'TRUE';
+TRUNCATE: 'TRUNCATE';
+UNARCHIVE: 'UNARCHIVE';
+UNBOUNDED: 'UNBOUNDED';
+UNCACHE: 'UNCACHE';
+UNION: 'UNION';
 UNIQUE: 'UNIQUE';
+UNLOCK: 'UNLOCK';
+UNSET: 'UNSET';
+USE: 'USE';
 USER: 'USER';
+USING: 'USING';
+VALUES: 'VALUES';
+VIEW: 'VIEW';
+WEEK: 'WEEK';
+WEEKS: 'WEEKS';
+WHEN: 'WHEN';
+WHERE: 'WHERE';
+WINDOW: 'WINDOW';
+WITH: 'WITH';
+YEAR: 'YEAR';
+YEARS: 'YEARS';
 //============================
 // End of the keywords list
 //============================
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 4dfd8172cb2b..f86ce1625323 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -295,81 +295,38 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "where",
     "with")
 
-  // All the keywords in `docs/sql-reserved-and-non-reserved-key-words.md` are listed below:
+  // All the keywords in `docs/sql-keywords.md` are listed below:
   val allCandidateKeywords = Set(
-    "abs",
-    "absolute",
-    "acos",
-    "action",
     "add",
     "after",
     "all",
-    "allocate",
     "alter",
     "analyze",
     "and",
     "anti",
     "any",
     "archive",
-    "are",
     "array",
-    "array_agg",
-    "array_max_cardinality",
     "as",
     "asc",
-    "asensitive",
-    "asin",
-    "assertion",
-    "asymmetric",
     "at",
-    "atan",
-    "atomic",
     "authorization",
-    "avg",
-    "before",
-    "begin",
-    "begin_frame",
-    "begin_partition",
     "between",
-    "bigint",
-    "binary",
-    "bit",
-    "bit_length",
-    "blob",
-    "boolean",
     "both",
-    "breadth",
     "bucket",
     "buckets",
     "by",
     "cache",
-    "call",
-    "called",
-    "cardinality",
     "cascade",
-    "cascaded",
     "case",
     "cast",
-    "catalog",
-    "ceil",
-    "ceiling",
     "change",
-    "char",
-    "char_length",
-    "character",
-    "character_length",
     "check",
-    "classifier",
     "clear",
-    "clob",
-    "close",
     "cluster",
     "clustered",
-    "coalesce",
     "codegen",
     "collate",
-    "collation",
-    "collect",
     "collection",
     "column",
     "columns",
@@ -379,101 +336,40 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "compactions",
     "compute",
     "concatenate",
-    "condition",
-    "connect",
-    "connection",
     "constraint",
-    "constraints",
-    "constructor",
-    "contains",
-    "continue",
-    "convert",
-    "copy",
-    "corr",
-    "corresponding",
-    "cos",
-    "cosh",
     "cost",
-    "count",
-    "covar_pop",
-    "covar_samp",
     "create",
     "cross",
     "cube",
-    "cume_dist",
     "current",
-    "current_catalog",
     "current_date",
-    "current_default_transform_group",
-    "current_path",
-    "current_role",
-    "current_row",
-    "current_schema",
     "current_time",
     "current_timestamp",
-    "current_transform_group_for_type",
     "current_user",
-    "cursor",
-    "cycle",
     "data",
     "database",
     "databases",
-    "date",
     "day",
     "days",
     "dbproperties",
-    "deallocate",
-    "dec",
-    "decfloat",
-    "decimal",
-    "declare",
-    "default",
-    "deferrable",
-    "deferred",
-    "define",
     "defined",
     "delete",
     "delimited",
-    "dense_rank",
-    "depth",
-    "deref",
     "desc",
     "describe",
-    "descriptor",
-    "deterministic",
     "dfs",
-    "diagnostics",
     "directories",
     "directory",
-    "disconnect",
     "distinct",
     "distribute",
     "div",
-    "do",
-    "domain",
-    "double",
     "drop",
-    "dynamic",
-    "each",
-    "element",
     "else",
-    "elseif",
-    "empty",
     "end",
-    "end_frame",
-    "end_partition",
-    "equals",
-    "escape",
     "escaped",
-    "every",
     "except",
-    "exception",
     "exchange",
-    "exec",
-    "execute",
     "exists",
-    "exit",
-    "exp",
     "explain",
     "export",
     "extended",
@@ -483,241 +379,108 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "fetch",
     "fields",
     "fileformat",
-    "filter",
     "first",
-    "first_value",
-    "float",
     "following",
     "for",
     "foreign",
     "format",
     "formatted",
-    "found",
-    "frame_row",
-    "free",
     "from",
     "full",
     "function",
     "functions",
-    "fusion",
-    "general",
-    "get",
     "global",
-    "go",
-    "goto",
     "grant",
     "group",
     "grouping",
-    "groups",
-    "handler",
     "having",
-    "hold",
     "hour",
     "hours",
-    "identity",
     "if",
     "ignore",
-    "immediate",
     "import",
     "in",
     "index",
     "indexes",
-    "indicator",
-    "initial",
-    "initially",
     "inner",
-    "inout",
     "inpath",
-    "input",
     "inputformat",
-    "insensitive",
     "insert",
-    "int",
-    "integer",
     "intersect",
-    "intersection",
     "interval",
     "into",
     "is",
-    "isolation",
     "items",
-    "iterate",
     "join",
-    "json_array",
-    "json_arrayagg",
-    "json_exists",
-    "json_object",
-    "json_objectagg",
-    "json_query",
-    "json_table",
-    "json_table_primitive",
-    "json_value",
-    "key",
     "keys",
-    "lag",
-    "language",
-    "large",
     "last",
-    "last_value",
     "lateral",
     "lazy",
-    "lead",
     "leading",
-    "leave",
     "left",
-    "level",
     "like",
-    "like_regex",
     "limit",
     "lines",
     "list",
-    "listagg",
-    "ln",
     "load",
     "local",
-    "localtime",
-    "localtimestamp",
     "location",
-    "locator",
     "lock",
     "locks",
-    "log",
-    "log10",
     "logical",
-    "loop",
-    "lower",
     "macro",
     "map",
-    "match",
-    "match_number",
-    "match_recognize",
-    "matches",
-    "max",
-    "member",
-    "merge",
-    "method",
     "microsecond",
     "microseconds",
     "millisecond",
     "milliseconds",
-    "min",
     "minus",
     "minute",
     "minutes",
-    "mod",
-    "modifies",
-    "module",
     "month",
     "months",
     "msck",
-    "multiset",
-    "names",
-    "national",
     "natural",
-    "nchar",
-    "nclob",
-    "new",
-    "next",
     "no",
-    "none",
-    "normalize",
     "not",
-    "nth_value",
-    "ntile",
     "null",
-    "nullif",
     "nulls",
-    "numeric",
-    "object",
-    "occurrences_regex",
-    "octet_length",
     "of",
-    "offset",
-    "old",
-    "omit",
     "on",
-    "one",
     "only",
-    "open",
     "option",
     "options",
     "or",
     "order",
-    "ordinality",
     "out",
     "outer",
-    "output",
     "outputformat",
     "over",
     "overlaps",
-    "overlay",
     "overwrite",
-    "pad",
-    "parameter",
-    "partial",
     "partition",
     "partitioned",
     "partitions",
-    "path",
-    "pattern",
-    "per",
     "percent",
-    "percent_rank",
-    "percentile_cont",
-    "percentile_disc",
-    "percentlit",
-    "period",
     "pivot",
-    "portion",
-    "power",
-    "precedes",
+    "position",
     "preceding",
-    "precision",
-    "prepare",
-    "preserve",
     "primary",
     "principals",
-    "prior",
-    "privileges",
-    "procedure",
-    "ptf",
-    "public",
     "purge",
+    "query",
     "range",
-    "rank",
-    "read",
-    "reads",
-    "real",
     "recordreader",
     "recordwriter",
     "recover",
-    "recursive",
     "reduce",
-    "ref",
     "references",
-    "referencing",
     "refresh",
-    "regr_avgx",
-    "regr_avgy",
-    "regr_count",
-    "regr_intercept",
-    "regr_r2",
-    "regr_slope",
-    "regr_sxx",
-    "regr_sxy",
-    "regr_syy",
-    "relative",
-    "release",
     "rename",
     "repair",
-    "repeat",
     "replace",
     "reset",
-    "resignal",
     "restrict",
-    "result",
-    "return",
-    "returns",
     "revoke",
     "right",
     "rlike",
@@ -725,144 +488,64 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "roles",
     "rollback",
     "rollup",
-    "routine",
     "row",
-    "row_number",
     "rows",
-    "running",
-    "savepoint",
     "schema",
-    "scope",
-    "scroll",
-    "search",
     "second",
     "seconds",
-    "section",
-    "seek",
     "select",
     "semi",
-    "sensitive",
     "separated",
     "serde",
     "serdeproperties",
-    "session",
     "session_user",
     "set",
     "sets",
     "show",
-    "signal",
-    "similar",
-    "sin",
-    "sinh",
-    "size",
     "skewed",
-    "skip",
-    "smallint",
     "some",
     "sort",
     "sorted",
-    "space",
-    "specific",
-    "specifictype",
-    "sql",
-    "sqlcode",
-    "sqlerror",
-    "sqlexception",
-    "sqlstate",
-    "sqlwarning",
-    "sqrt",
     "start",
-    "state",
-    "static",
     "statistics",
-    "stddev_pop",
-    "stddev_samp",
     "stored",
     "stratify",
     "struct",
-    "submultiset",
-    "subset",
-    "substring",
-    "substring_regex",
-    "succeeds",
-    "sum",
-    "symmetric",
-    "system",
-    "system_time",
-    "system_user",
     "table",
     "tables",
     "tablesample",
-    "tan",
-    "tanh",
     "tblproperties",
     "temporary",
     "terminated",
     "then",
-    "time",
-    "timestamp",
-    "timezone_hour",
-    "timezone_minute",
     "to",
     "touch",
     "trailing",
     "transaction",
     "transactions",
     "transform",
-    "translate",
-    "translate_regex",
-    "translation",
-    "treat",
-    "trigger",
-    "trim",
-    "trim_array",
     "true",
     "truncate",
-    "uescape",
     "unarchive",
     "unbounded",
     "uncache",
-    "under",
-    "undo",
     "union",
     "unique",
-    "unknown",
     "unlock",
-    "unnest",
     "unset",
-    "until",
-    "update",
-    "upper",
-    "usage",
     "use",
     "user",
     "using",
-    "value",
-    "value_of",
     "values",
-    "var_pop",
-    "var_samp",
-    "varbinary",
-    "varchar",
-    "varying",
-    "versioning",
     "view",
     "week",
     "weeks",
     "when",
-    "whenever",
     "where",
-    "while",
-    "width_bucket",
     "window",
     "with",
-    "within",
-    "without",
-    "work",
-    "write",
     "year",
-    "years",
-    "zone")
+    "years")
 
   val reservedKeywordsInAnsiMode = Set(
     "all",
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index 9209ce124c7b..9205b70dd2ba 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -145,7 +145,7 @@ struct<>
 -- !query 13 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'insert' expecting {<EOF>, '(', ',', 'SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'JOIN', 'CROSS', 'INNER', 'LEFT', 'SEMI', 'RIGHT', 'FULL', 'NATURAL', 'PIVOT', 'LATERAL', 'WINDOW', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'SORT', 'CLUSTER', 'DISTRIBUTE', 'ANTI'}(line 3, pos 5)
+mismatched input 'insert' expecting {<EOF>, '(', ',', 'ANTI', 'CLUSTER', 'CROSS', 'DISTRIBUTE', 'EXCEPT', 'FULL', 'GROUP', 'HAVING', 'INNER', 'INTERSECT', 'JOIN', 'LATERAL', 'LEFT', 'LIMIT', 'NATURAL', 'ORDER', 'PIVOT', 'RIGHT', 'SELECT', 'SEMI', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW'}(line 3, pos 5)
 
 == SQL ==
 DESCRIBE

From e402de5fd030cdc4150fda0755c7c636cad9619e Mon Sep 17 00:00:00 2001
From: s71955 <sujithchacko.2010@gmail.com>
Date: Tue, 19 Mar 2019 20:29:47 +0800
Subject: [PATCH 0715/1072] [SPARK-26176][SQL] Verify column names for CTAS
 with `STORED AS`

 ## What changes were proposed in this pull request?
Currently, users meet job abortions while creating a table using the Hive serde "STORED AS" with invalid column names. We had better prevent this by raising **AnalysisException** with a guide to use aliases instead like Paquet data source tables.
thus making compatible with error message shown while creating Parquet/ORC native table.

**BEFORE**
```scala
scala> sql("set spark.sql.hive.convertMetastoreParquet=false")
scala> sql("CREATE TABLE a STORED AS PARQUET AS SELECT 1 AS `COUNT(ID)`")
Caused by: java.lang.IllegalArgumentException: No enum constant parquet.schema.OriginalType.col1
```

**AFTER**
```scala
scala> sql("CREATE TABLE a STORED AS PARQUET AS SELECT 1 AS `COUNT(ID)`")
 Please use alias to rename it.;eption: Attribute name "count(ID)" contains invalid character(s) among " ,;{}()\n\t=".
```

## How was this patch tested?
Pass the Jenkins with the newly added test case.

Closes #24075 from sujith71955/master_serde.

Authored-by: s71955 <sujithchacko.2010@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/execution/command/ddl.scala   | 3 ++-
 .../spark/sql/execution/datasources/DataSourceStrategy.scala | 2 --
 .../org/apache/spark/sql/execution/datasources/rules.scala   | 3 +++
 .../scala/org/apache/spark/sql/hive/HiveStrategies.scala     | 5 ++---
 .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala  | 5 +++++
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index bcd8908e11f4..235801a6dc21 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -886,7 +886,8 @@ object DDLUtils {
           if (serde == HiveSerDe.sourceToSerDe("orc").get.serde) {
             OrcFileFormat.checkFieldNames(colNames)
           } else if (serde == HiveSerDe.sourceToSerDe("parquet").get.serde ||
-              serde == Some("parquet.hive.serde.ParquetHiveSerDe")) {
+            serde == Some("parquet.hive.serde.ParquetHiveSerDe") ||
+            serde == Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) {
             ParquetSchemaConverter.checkFieldNames(colNames)
           }
         case "parquet" => ParquetSchemaConverter.checkFieldNames(colNames)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index a1252ee0d846..4c699276135e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -133,12 +133,10 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case CreateTable(tableDesc, mode, None) if DDLUtils.isDatasourceTable(tableDesc) =>
-      DDLUtils.checkDataColNames(tableDesc)
       CreateDataSourceTableCommand(tableDesc, ignoreIfExists = mode == SaveMode.Ignore)
 
     case CreateTable(tableDesc, mode, Some(query))
         if query.resolved && DDLUtils.isDatasourceTable(tableDesc) =>
-      DDLUtils.checkDataColNames(tableDesc.copy(schema = query.schema))
       CreateDataSourceTableAsSelectCommand(tableDesc, mode, query, query.output.map(_.name))
 
     case InsertIntoTable(l @ LogicalRelation(_: InsertableRelation, _, _, _),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 949aa665527a..4e7fc40db6dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -206,6 +206,8 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
         val analyzedQuery = query.get
         val normalizedTable = normalizeCatalogTable(analyzedQuery.schema, tableDesc)
 
+        DDLUtils.checkDataColNames(tableDesc.copy(schema = analyzedQuery.schema))
+
         val output = analyzedQuery.output
         val partitionAttrs = normalizedTable.partitionColumnNames.map { partCol =>
           output.find(_.name == partCol).get
@@ -219,6 +221,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
 
         c.copy(tableDesc = normalizedTable, query = Some(reorderedQuery))
       } else {
+        DDLUtils.checkDataColNames(tableDesc)
         val normalizedTable = normalizeCatalogTable(tableDesc.schema, tableDesc)
 
         val partitionSchema = normalizedTable.partitionColumnNames.map { partCol =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 8a5ab188a949..58b711006e47 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -151,11 +151,9 @@ object HiveAnalysis extends Rule[LogicalPlan] {
         ifPartitionNotExists, query.output.map(_.name))
 
     case CreateTable(tableDesc, mode, None) if DDLUtils.isHiveTable(tableDesc) =>
-      DDLUtils.checkDataColNames(tableDesc)
       CreateTableCommand(tableDesc, ignoreIfExists = mode == SaveMode.Ignore)
 
     case CreateTable(tableDesc, mode, Some(query)) if DDLUtils.isHiveTable(tableDesc) =>
-      DDLUtils.checkDataColNames(tableDesc)
       CreateHiveTableAsSelectCommand(tableDesc, query, query.output.map(_.name), mode)
 
     case InsertIntoDir(isLocal, storage, provider, child, overwrite)
@@ -210,7 +208,8 @@ case class RelationConversions(
       case CreateTable(tableDesc, mode, Some(query))
           if DDLUtils.isHiveTable(tableDesc) && tableDesc.partitionColumnNames.isEmpty &&
             isConvertible(tableDesc) && SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_CTAS) =>
-        DDLUtils.checkDataColNames(tableDesc)
+        // validation is required to be done here before relation conversion.
+        DDLUtils.checkDataColNames(tableDesc.copy(schema = query.schema))
         OptimizedCreateHiveTableAsSelectCommand(
           tableDesc, query, query.output.map(_.name), mode)
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index ce7661a91455..aad34a31a33f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2163,6 +2163,11 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
           }.getMessage
           assert(m.contains(s"contains invalid character(s)"))
 
+          val m1 = intercept[AnalysisException] {
+            sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`")
+          }.getMessage
+          assert(m1.contains(s"contains invalid character(s)"))
+
           val m2 = intercept[AnalysisException] {
             sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`")
           }.getMessage

From 99c427b1d374c043fb833ac9d9530ad146e5e2c2 Mon Sep 17 00:00:00 2001
From: "Zhu, Lipeng" <lipzhu@ebay.com>
Date: Tue, 19 Mar 2019 08:43:23 -0700
Subject: [PATCH 0716/1072] [SPARK-27168][SQL][TEST] Add docker integration
 test for MsSql server

## What changes were proposed in this pull request?

This PR aims to add a JDBC integration test for MsSql server.

## How was this patch tested?

```
./build/mvn clean install -DskipTests
./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.12 \
-Dtest=none -DwildcardSuites=org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite
```

Closes #24099 from lipzhu/SPARK-27168.

Lead-authored-by: Zhu, Lipeng <lipzhu@ebay.com>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Co-authored-by: Lipeng Zhu <lipzhu@icloud.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 external/docker-integration-tests/pom.xml     |   6 +
 .../jdbc/MsSqlServerIntegrationSuite.scala    | 205 ++++++++++++++++++
 2 files changed, 211 insertions(+)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala

diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index b39db7540b7d..a4956ff5ee9c 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -150,5 +150,11 @@
       <version>10.5.0.5</version>
       <type>jar</type>
     </dependency>
+    <dependency>
+      <groupId>com.microsoft.sqlserver</groupId>
+      <artifactId>mssql-jdbc</artifactId>
+      <version>7.2.1.jre8</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
new file mode 100644
index 000000000000..82ce16c2b7e5
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.BigDecimal
+import java.sql.{Connection, Date, Timestamp}
+import java.util.Properties
+
+import org.apache.spark.tags.DockerTest
+
+@DockerTest
+class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
+  override val db = new DatabaseOnDocker {
+    override val imageName = "mcr.microsoft.com/mssql/server:2017-GA-ubuntu"
+    override val env = Map(
+      "SA_PASSWORD" -> "Sapass123",
+      "ACCEPT_EULA" -> "Y"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 1433
+
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
+
+    override def getStartupProcessName: Option[String] = None
+  }
+
+  override def dataPreparation(conn: Connection): Unit = {
+    conn.prepareStatement("CREATE TABLE tbl (x INT, y VARCHAR (50))").executeUpdate()
+    conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate()
+    conn.prepareStatement("INSERT INTO tbl VALUES (17,'dave')").executeUpdate()
+
+    conn.prepareStatement(
+      """
+        |CREATE TABLE numbers (
+        |a BIT,
+        |b TINYINT, c SMALLINT, d INT, e BIGINT,
+        |f FLOAT, f1 FLOAT(24),
+        |g REAL,
+        |h DECIMAL(5,2), i NUMERIC(10,5),
+        |j MONEY, k SMALLMONEY)
+      """.stripMargin).executeUpdate()
+    conn.prepareStatement(
+      """
+        |INSERT INTO numbers VALUES (
+        |0,
+        |255, 32767, 2147483647, 9223372036854775807,
+        |123456789012345.123456789012345, 123456789012345.123456789012345,
+        |123456789012345.123456789012345,
+        |123, 12345.12,
+        |922337203685477.58, 214748.3647)
+      """.stripMargin).executeUpdate()
+
+    conn.prepareStatement(
+      """
+        |CREATE TABLE dates (
+        |a DATE, b DATETIME, c DATETIME2,
+        |d DATETIMEOFFSET, e SMALLDATETIME,
+        |f TIME)
+      """.stripMargin).executeUpdate()
+    conn.prepareStatement(
+      """
+        |INSERT INTO dates VALUES (
+        |'1991-11-09', '1999-01-01 13:23:35', '9999-12-31 23:59:59',
+        |'1901-05-09 23:59:59 +14:00', '1996-01-01 23:23:45',
+        |'13:31:24')
+      """.stripMargin).executeUpdate()
+
+    conn.prepareStatement(
+      """
+        |CREATE TABLE strings (
+        |a CHAR(10), b VARCHAR(10),
+        |c NCHAR(10), d NVARCHAR(10),
+        |e BINARY(4), f VARBINARY(4),
+        |g TEXT, h NTEXT,
+        |i IMAGE)
+      """.stripMargin).executeUpdate()
+    conn.prepareStatement(
+      """
+        |INSERT INTO strings VALUES (
+        |'the', 'quick',
+        |'brown', 'fox',
+        |123456, 123456,
+        |'the', 'lazy',
+        |'dog')
+      """.stripMargin).executeUpdate()
+  }
+
+  test("Basic test") {
+    val df = spark.read.jdbc(jdbcUrl, "tbl", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 2)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 2)
+    assert(types(0).equals("class java.lang.Integer"))
+    assert(types(1).equals("class java.lang.String"))
+  }
+
+  test("Numeric types") {
+    val df = spark.read.jdbc(jdbcUrl, "numbers", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 1)
+    val row = rows(0)
+    val types = row.toSeq.map(x => x.getClass.toString)
+    assert(types.length == 12)
+    assert(types(0).equals("class java.lang.Boolean"))
+    assert(types(1).equals("class java.lang.Integer"))
+    assert(types(2).equals("class java.lang.Integer"))
+    assert(types(3).equals("class java.lang.Integer"))
+    assert(types(4).equals("class java.lang.Long"))
+    assert(types(5).equals("class java.lang.Double"))
+    assert(types(6).equals("class java.lang.Double"))
+    assert(types(7).equals("class java.lang.Double"))
+    assert(types(8).equals("class java.math.BigDecimal"))
+    assert(types(9).equals("class java.math.BigDecimal"))
+    assert(types(10).equals("class java.math.BigDecimal"))
+    assert(types(11).equals("class java.math.BigDecimal"))
+    assert(row.getBoolean(0) == false)
+    assert(row.getInt(1) == 255)
+    assert(row.getInt(2) == 32767)
+    assert(row.getInt(3) == 2147483647)
+    assert(row.getLong(4) == 9223372036854775807L)
+    assert(row.getDouble(5) == 1.2345678901234512E14) // float = float(53) has 15-digits precision
+    assert(row.getDouble(6) == 1.23456788103168E14)   // float(24) has 7-digits precision
+    assert(row.getDouble(7) == 1.23456788103168E14)   // real = float(24)
+    assert(row.getAs[BigDecimal](8).equals(new BigDecimal("123.00")))
+    assert(row.getAs[BigDecimal](9).equals(new BigDecimal("12345.12000")))
+    assert(row.getAs[BigDecimal](10).equals(new BigDecimal("922337203685477.5800")))
+    assert(row.getAs[BigDecimal](11).equals(new BigDecimal("214748.3647")))
+  }
+
+  test("Date types") {
+    val df = spark.read.jdbc(jdbcUrl, "dates", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 1)
+    val row = rows(0)
+    val types = row.toSeq.map(x => x.getClass.toString)
+    assert(types.length == 6)
+    assert(types(0).equals("class java.sql.Date"))
+    assert(types(1).equals("class java.sql.Timestamp"))
+    assert(types(2).equals("class java.sql.Timestamp"))
+    assert(types(3).equals("class java.lang.String"))
+    assert(types(4).equals("class java.sql.Timestamp"))
+    assert(types(5).equals("class java.sql.Timestamp"))
+    assert(row.getAs[Date](0).equals(Date.valueOf("1991-11-09")))
+    assert(row.getAs[Timestamp](1).equals(Timestamp.valueOf("1999-01-01 13:23:35.0")))
+    assert(row.getAs[Timestamp](2).equals(Timestamp.valueOf("9999-12-31 23:59:59.0")))
+    assert(row.getString(3).equals("1901-05-09 23:59:59.0000000 +14:00"))
+    assert(row.getAs[Timestamp](4).equals(Timestamp.valueOf("1996-01-01 23:24:00.0")))
+    assert(row.getAs[Timestamp](5).equals(Timestamp.valueOf("1900-01-01 13:31:24.0")))
+  }
+
+  test("String types") {
+    val df = spark.read.jdbc(jdbcUrl, "strings", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 1)
+    val row = rows(0)
+    val types = row.toSeq.map(x => x.getClass.toString)
+    assert(types.length == 9)
+    assert(types(0).equals("class java.lang.String"))
+    assert(types(1).equals("class java.lang.String"))
+    assert(types(2).equals("class java.lang.String"))
+    assert(types(3).equals("class java.lang.String"))
+    assert(types(4).equals("class [B"))
+    assert(types(5).equals("class [B"))
+    assert(types(6).equals("class java.lang.String"))
+    assert(types(7).equals("class java.lang.String"))
+    assert(types(8).equals("class [B"))
+    assert(row.getString(0).length == 10)
+    assert(row.getString(0).trim.equals("the"))
+    assert(row.getString(1).equals("quick"))
+    assert(row.getString(2).length == 10)
+    assert(row.getString(2).trim.equals("brown"))
+    assert(row.getString(3).equals("fox"))
+    assert(java.util.Arrays.equals(row.getAs[Array[Byte]](4), Array[Byte](0, 1, -30, 64)))
+    assert(java.util.Arrays.equals(row.getAs[Array[Byte]](5), Array[Byte](0, 1, -30, 64)))
+    assert(row.getString(6).equals("the"))
+    assert(row.getString(7).equals("lazy"))
+    assert(java.util.Arrays.equals(row.getAs[Array[Byte]](8), Array[Byte](100, 111, 103)))
+  }
+
+  test("Basic write test") {
+    val df1 = spark.read.jdbc(jdbcUrl, "numbers", new Properties)
+    val df2 = spark.read.jdbc(jdbcUrl, "dates", new Properties)
+    val df3 = spark.read.jdbc(jdbcUrl, "strings", new Properties)
+    df1.write.jdbc(jdbcUrl, "numberscopy", new Properties)
+    df2.write.jdbc(jdbcUrl, "datescopy", new Properties)
+    df3.write.jdbc(jdbcUrl, "stringscopy", new Properties)
+  }
+}

From 6783831f6847e3ba4fa0582bf42e4edd19deab39 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 19 Mar 2019 13:31:40 -0400
Subject: [PATCH 0717/1072] [SPARK-27179][BUILD] Exclude javax.ws.rs:jsr311-api
 from hadoop-client

## What changes were proposed in this pull request?
Since [YARN-7113](https://issues.apache.org/jira/browse/YARN-7113)(Hadoop-3.1.0), `hadoop-client` add `javax.ws.rs:jsr311-api` to its dependency. This conflict with [javax.ws.rs-api-2.0.1.jar](https://github.com/apache/spark/blob/f26a1f3d3766207595af6cb26d62d54218f5ac1d/dev/deps/spark-deps-hadoop-3.1#L105).
```shell
build/sbt  "core/testOnly *.UISeleniumSuite *.HistoryServerSuite" -Phadoop-3.2
...
[info] <pre>    Server Error</pre></p><h3>Caused by:</h3><pre>java.lang.NoSuchMethodError: javax.ws.rs.core.Application.getProperties()Ljava/util/Map;
...
```

This pr exclude `javax.ws.rs:jsr311-api` from hadoop-client.

## How was this patch tested?

manual tests:
```shell
build/sbt  "core/testOnly *.UISeleniumSuite *.HistoryServerSuite" -Phadoop-3.2
```

Closes #24114 from wangyum/SPARK-27179.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 pom.xml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pom.xml b/pom.xml
index 5ccef71cde74..e2fadbd23b35 100644
--- a/pom.xml
+++ b/pom.xml
@@ -946,6 +946,11 @@
             <groupId>net.java.dev.jets3t</groupId>
             <artifactId>jets3t</artifactId>
           </exclusion>
+          <!-- Hadoop-3.2 -->
+          <exclusion>
+            <groupId>javax.ws.rs</groupId>
+            <artifactId>jsr311-api</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>

From 8b0aa59218c209d39cbba5959302d8668b885cf6 Mon Sep 17 00:00:00 2001
From: weixiuli <weixiuli@jd.com>
Date: Tue, 19 Mar 2019 16:16:43 -0500
Subject: [PATCH 0718/1072] [SPARK-26288][CORE] add initRegisteredExecutorsDB

## What changes were proposed in this pull request?

As we all know that spark on Yarn uses DB https://github.com/apache/spark/pull/7943 to record RegisteredExecutors information which can be reloaded and used again when the ExternalShuffleService is restarted .

The RegisteredExecutors information can't be recorded both in the mode of spark's standalone and spark on k8s , which will cause the RegisteredExecutors information to be lost ,when the ExternalShuffleService is restarted.

To solve the problem above, a method is proposed and is committed .

## How was this patch tested?
new  unit tests

Closes #23393 from weixiuli/SPARK-26288.

Authored-by: weixiuli <weixiuli@jd.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../shuffle/ExternalShuffleBlockHandler.java  |   5 +
 core/pom.xml                                  |   7 +
 .../spark/deploy/ExternalShuffleService.scala |  25 +++-
 .../apache/spark/deploy/worker/Worker.scala   |   9 ++
 .../spark/internal/config/package.scala       |   7 +
 .../ExternalShuffleServiceDbSuite.scala       | 140 ++++++++++++++++++
 .../spark/deploy/worker/WorkerSuite.scala     |  51 ++++++-
 docs/spark-standalone.md                      |  11 ++
 8 files changed, 253 insertions(+), 2 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
index b25e48a164e6..70dcc8b8b8b6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -66,6 +66,11 @@ public ExternalShuffleBlockHandler(TransportConf conf, File registeredExecutorFi
       new ExternalShuffleBlockResolver(conf, registeredExecutorFile));
   }
 
+  @VisibleForTesting
+  public ExternalShuffleBlockResolver getBlockResolver() {
+    return blockManager;
+  }
+
   /** Enables mocking out the StreamManager and BlockManager. */
   @VisibleForTesting
   public ExternalShuffleBlockHandler(
diff --git a/core/pom.xml b/core/pom.xml
index b9f78b2f5d37..45bda44916f5 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -372,6 +372,13 @@
       <classifier>tests</classifier>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
 
     <!--
       This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index 12ed1893ed61..28279fc5f823 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.deploy
 
+import java.io.File
 import java.util.concurrent.CountDownLatch
 
 import scala.collection.JavaConverters._
@@ -49,6 +50,8 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
   private val enabled = sparkConf.get(config.SHUFFLE_SERVICE_ENABLED)
   private val port = sparkConf.get(config.SHUFFLE_SERVICE_PORT)
 
+  private val registeredExecutorsDB = "registeredExecutors.ldb"
+
   private val transportConf =
     SparkTransportConf.fromSparkConf(sparkConf, "shuffle", numUsableCores = 0)
   private val blockHandler = newShuffleBlockHandler(transportConf)
@@ -58,9 +61,29 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
 
   private val shuffleServiceSource = new ExternalShuffleServiceSource
 
+  protected def findRegisteredExecutorsDBFile(dbName: String): File = {
+    val localDirs = sparkConf.getOption("spark.local.dir").map(_.split(",")).getOrElse(Array())
+    if (localDirs.length >= 1) {
+      new File(localDirs.find(new File(_, dbName).exists()).getOrElse(localDirs(0)), dbName)
+    } else {
+      logWarning(s"'spark.local.dir' should be set first when we use db in " +
+        s"ExternalShuffleService. Note that this only affects standalone mode.")
+      null
+    }
+  }
+
+  /** Get blockhandler  */
+  def getBlockHandler: ExternalShuffleBlockHandler = {
+    blockHandler
+  }
+
   /** Create a new shuffle block handler. Factored out for subclasses to override. */
   protected def newShuffleBlockHandler(conf: TransportConf): ExternalShuffleBlockHandler = {
-    new ExternalShuffleBlockHandler(conf, null)
+    if (sparkConf.get(config.SHUFFLE_SERVICE_DB_ENABLED) && enabled) {
+      new ExternalShuffleBlockHandler(conf, findRegisteredExecutorsDBFile(registeredExecutorsDB))
+    } else {
+      new ExternalShuffleBlockHandler(conf, null)
+    }
   }
 
   /** Starts the external shuffle service if the user has configured us to. */
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 52892c329a50..a0664b3fdd2e 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -466,6 +466,15 @@ private[deploy] class Worker(
           }.foreach { dir =>
             logInfo(s"Removing directory: ${dir.getPath}")
             Utils.deleteRecursively(dir)
+
+            // Remove some registeredExecutors information of DB in external shuffle service when
+            // #spark.shuffle.service.db.enabled=true, the one which comes to mind is, what happens
+            // if an application is stopped while the external shuffle service is down?
+            // So then it'll leave an entry in the DB and the entry should be removed.
+            if (conf.get(config.SHUFFLE_SERVICE_DB_ENABLED) &&
+                conf.get(config.SHUFFLE_SERVICE_ENABLED)) {
+              shuffleService.applicationRemoved(dir.getName)
+            }
           }
         }(cleanupThreadExecutor)
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 2d664bb1aa1e..758f605d1d32 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -359,6 +359,13 @@ package object config {
   private[spark] val SHUFFLE_SERVICE_ENABLED =
     ConfigBuilder("spark.shuffle.service.enabled").booleanConf.createWithDefault(false)
 
+  private[spark] val SHUFFLE_SERVICE_DB_ENABLED =
+    ConfigBuilder("spark.shuffle.service.db.enabled")
+      .doc("Whether to use db in ExternalShuffleService. Note that this only affects " +
+        "standalone mode.")
+      .booleanConf
+      .createWithDefault(true)
+
   private[spark] val SHUFFLE_SERVICE_PORT =
     ConfigBuilder("spark.shuffle.service.port").intConf.createWithDefault(7337)
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
new file mode 100644
index 000000000000..e33c3f8f9550
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import java.io._
+import java.nio.charset.StandardCharsets
+
+import com.google.common.io.CharStreams
+
+import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
+import org.apache.spark.network.shuffle.{ExternalShuffleBlockHandler, ExternalShuffleBlockResolver}
+import org.apache.spark.network.shuffle.TestShuffleDataContext
+import org.apache.spark.util.Utils
+
+/**
+ * This suite gets BlockData when the ExternalShuffleService is restarted
+ * with #spark.shuffle.service.db.enabled = true or false
+ * Note that failures in this suite may arise when#spark.shuffle.service.db.enabled = false
+ */
+class ExternalShuffleServiceDbSuite extends SparkFunSuite {
+  val sortBlock0 = "Hello!"
+  val sortBlock1 = "World!"
+  val SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager"
+
+  var sparkConf: SparkConf = _
+  var dataContext: TestShuffleDataContext = _
+
+  var securityManager: SecurityManager = _
+  var externalShuffleService: ExternalShuffleService = _
+  var blockHandler: ExternalShuffleBlockHandler = _
+  var blockResolver: ExternalShuffleBlockResolver = _
+
+  override def beforeAll() {
+    super.beforeAll()
+    sparkConf = new SparkConf()
+    sparkConf.set("spark.shuffle.service.enabled", "true")
+    sparkConf.set("spark.local.dir", System.getProperty("java.io.tmpdir"))
+    Utils.loadDefaultSparkProperties(sparkConf, null)
+    securityManager = new SecurityManager(sparkConf)
+
+    dataContext = new TestShuffleDataContext(2, 5)
+    dataContext.create()
+    // Write some sort data.
+    dataContext.insertSortShuffleData(0, 0,
+      Array[Array[Byte]](sortBlock0.getBytes(StandardCharsets.UTF_8),
+        sortBlock1.getBytes(StandardCharsets.UTF_8)))
+    registerExecutor()
+  }
+
+  override def afterAll() {
+    try {
+      dataContext.cleanup()
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  def registerExecutor(): Unit = {
+    try {
+      sparkConf.set("spark.shuffle.service.db.enabled", "true")
+      externalShuffleService = new ExternalShuffleService(sparkConf, securityManager)
+
+      // external Shuffle Service start
+      externalShuffleService.start()
+      blockHandler = externalShuffleService.getBlockHandler
+      blockResolver = blockHandler.getBlockResolver
+      blockResolver.registerExecutor("app0", "exec0", dataContext.createExecutorInfo(SORT_MANAGER))
+    } finally {
+      blockHandler.close()
+      // external Shuffle Service stop
+      externalShuffleService.stop()
+    }
+  }
+
+  // The beforeAll ensures the shuffle data was already written, and then
+  // the shuffle service was stopped. Here we restart the shuffle service
+  // and make we can read the shuffle data
+  test("Recover shuffle data with spark.shuffle.service.db.enabled=true after " +
+    "shuffle service restart") {
+    try {
+      sparkConf.set("spark.shuffle.service.db.enabled", "true")
+      externalShuffleService = new ExternalShuffleService(sparkConf, securityManager)
+      // externalShuffleService restart
+      externalShuffleService.start()
+      blockHandler = externalShuffleService.getBlockHandler
+      blockResolver = blockHandler.getBlockResolver
+
+      val block0Stream = blockResolver.getBlockData("app0", "exec0", 0, 0, 0).createInputStream
+      val block0 = CharStreams.toString(new InputStreamReader(block0Stream, StandardCharsets.UTF_8))
+      block0Stream.close()
+      assert(sortBlock0 == block0)
+      // pass
+    } finally {
+      blockHandler.close()
+      // externalShuffleService stop
+      externalShuffleService.stop()
+    }
+
+  }
+
+  // The beforeAll ensures the shuffle data was already written, and then
+  // the shuffle service was stopped. Here we restart the shuffle service ,
+  // but we can't read the shuffle data
+  test("Can't recover shuffle data with spark.shuffle.service.db.enabled=false after" +
+    " shuffle service restart") {
+    try {
+      sparkConf.set("spark.shuffle.service.db.enabled", "false")
+      externalShuffleService = new ExternalShuffleService(sparkConf, securityManager)
+      // externalShuffleService restart
+      externalShuffleService.start()
+      blockHandler = externalShuffleService.getBlockHandler
+      blockResolver = blockHandler.getBlockResolver
+
+      val error = intercept[RuntimeException] {
+        blockResolver.getBlockData("app0", "exec0", 0, 0, 0).createInputStream
+      }.getMessage
+
+      assert(error.contains("not registered"))
+    } finally {
+      blockHandler.close()
+      // externalShuffleService stop
+      externalShuffleService.stop()
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
index f6559df40d99..168694c81e4a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -17,9 +17,12 @@
 
 package org.apache.spark.deploy.worker
 
+import java.io.{File, IOException}
 import java.util.concurrent.atomic.AtomicBoolean
 import java.util.function.Supplier
 
+import scala.concurrent.duration._
+
 import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.Answers.RETURNS_SMART_NULLS
 import org.mockito.ArgumentMatchers.any
@@ -27,14 +30,16 @@ import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.{BeforeAndAfter, Matchers}
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.{Command, ExecutorState, ExternalShuffleService}
-import org.apache.spark.deploy.DeployMessages.{DriverStateChanged, ExecutorStateChanged}
+import org.apache.spark.deploy.DeployMessages.{DriverStateChanged, ExecutorStateChanged, WorkDirCleanup}
 import org.apache.spark.deploy.master.DriverState
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Worker._
 import org.apache.spark.rpc.{RpcAddress, RpcEnv}
+import org.apache.spark.util.Utils
 
 class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
 
@@ -245,4 +250,48 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
       ExecutorStateChanged("app1", 0, ExecutorState.EXITED, None, None))
     assert(cleanupCalled.get() == value)
   }
+
+  test("WorkDirCleanup cleans app dirs and shuffle metadata when " +
+    "spark.shuffle.service.db.enabled=true") {
+    testWorkDirCleanupAndRemoveMetadataWithConfig(true)
+  }
+
+  test("WorkdDirCleanup cleans only app dirs when" +
+    "spark.shuffle.service.db.enabled=false") {
+    testWorkDirCleanupAndRemoveMetadataWithConfig(false)
+  }
+
+  private def testWorkDirCleanupAndRemoveMetadataWithConfig(dbCleanupEnabled: Boolean) = {
+    val conf = new SparkConf().set("spark.shuffle.service.db.enabled", dbCleanupEnabled.toString)
+    conf.set("spark.worker.cleanup.appDataTtl", "60")
+    conf.set("spark.shuffle.service.enabled", "true")
+
+    val appId = "app1"
+    val execId = "exec1"
+    val cleanupCalled = new AtomicBoolean(false)
+    when(shuffleService.applicationRemoved(any[String])).thenAnswer(new Answer[Unit] {
+      override def answer(invocations: InvocationOnMock): Unit = {
+        cleanupCalled.set(true)
+      }
+    })
+    val externalShuffleServiceSupplier = new Supplier[ExternalShuffleService] {
+      override def get: ExternalShuffleService = shuffleService
+    }
+    val worker = makeWorker(conf, externalShuffleServiceSupplier)
+    val workDir = Utils.createTempDir(namePrefix = "work")
+    // initialize workers
+    worker.workDir = workDir
+    // Create the executor's working directory
+    val executorDir = new File(worker.workDir, appId + "/" + execId)
+
+    if (!executorDir.exists && !executorDir.mkdirs()) {
+      throw new IOException("Failed to create directory " + executorDir)
+    }
+    executorDir.setLastModified(System.currentTimeMillis - (1000 * 120))
+    worker.receive(WorkDirCleanup)
+    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+      assert(!executorDir.exists() == true)
+      assert(cleanupCalled.get() == dbCleanupEnabled)
+    }
+  }
 }
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 60b84d30268f..3400da4d3f33 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -240,6 +240,7 @@ SPARK_WORKER_OPTS supports the following system properties:
   <td>
     Enable periodic cleanup of worker / application directories.  Note that this only affects standalone
     mode, as YARN works differently. Only the directories of stopped applications are cleaned up.
+    This should be enabled if spark.shuffle.service.db.enabled is "true"
   </td>
 </tr>
 <tr>
@@ -260,6 +261,16 @@ SPARK_WORKER_OPTS supports the following system properties:
     especially if you run jobs very frequently.
   </td>
 </tr>
+<tr>
+  <td><spark.shuffle.service.db.enabled</code></td>
+  <td>true</td>
+  <td>
+    Store External Shuffle service state on local disk so that when the external shuffle service is restarted, it will
+    automatically reload info on current executors.  This only affects standalone mode (yarn always has this behavior
+    enabled).  You should also enable <code>spark.worker.cleanup.enabled</code>, to ensure that the state
+    eventually gets cleaned up.  This config may be removed in the future.
+  </td>
+</tr>
 <tr>
   <td><code>spark.storage.cleanupFilesAfterExecutorExit</code></td>
   <td>true</td>

From c99463d4cfd5c70a28fdf89414207955f60c4789 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 20 Mar 2019 08:06:10 +0900
Subject: [PATCH 0719/1072] [SPARK-26979][PYTHON][FOLLOW-UP] Make binary
 math/string functions take string as columns as well

## What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/23882 to handle binary math/string functions. For instance, see the cases below:

**Before:**

```python
>>> from pyspark.sql.functions import lit, ascii
>>> spark.range(1).select(lit('a').alias("value")).select(ascii("value"))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/.../spark/python/pyspark/sql/functions.py", line 51, in _
    jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col)
  File "/.../spark/python/lib/py4j-0.10.8.1-src.zip/py4j/java_gateway.py", line 1286, in __call__
  File "/.../spark/python/pyspark/sql/utils.py", line 63, in deco
    return f(*a, **kw)
  File "/.../spark/python/lib/py4j-0.10.8.1-src.zip/py4j/protocol.py", line 332, in get_return_value
py4j.protocol.Py4JError: An error occurred while calling z:org.apache.spark.sql.functions.ascii. Trace:
py4j.Py4JException: Method ascii([class java.lang.String]) does not exist
	at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
	at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:339)
	at py4j.Gateway.invoke(Gateway.java:276)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
```

```python
>>> from pyspark.sql.functions import atan2
>>> spark.range(1).select(atan2("id", "id"))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/.../spark/python/pyspark/sql/functions.py", line 78, in _
    jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else float(col1),
ValueError: could not convert string to float: id
```

**After:**

```python
>>> from pyspark.sql.functions import lit, ascii
>>> spark.range(1).select(lit('a').alias("value")).select(ascii("value"))
DataFrame[ascii(value): int]
```

```python
>>> from pyspark.sql.functions import atan2
>>> spark.range(1).select(atan2("id", "id"))
DataFrame[ATAN2(id, id): double]
```

Note that,

- This PR causes a slight behaviour changes for math functions. For instance, numbers as strings (e.g., `"1"`) were supported as arguments of binary math functions before. After this PR, it recognises it as column names.

- I also intentionally didn't document this behaviour changes since we're going ahead for Spark 3.0 and I don't think numbers as strings make much sense in math functions.

- There is another exception `when`, which takes string as literal values as below. This PR doeesn't fix this ambiguity.
  ```python
  >>> spark.range(1).select(when(lit(True), col("id"))).show()
  ```

  ```
  +--------------------------+
  |CASE WHEN true THEN id END|
  +--------------------------+
  |                         0|
  +--------------------------+
  ```

  ```python
  >>> spark.range(1).select(when(lit(True), "id")).show()
  ```

  ```
  +--------------------------+
  |CASE WHEN true THEN id END|
  +--------------------------+
  |                        id|
  +--------------------------+
  ```

This PR also fixes as below:

https://github.com/apache/spark/pull/23882 fixed it to:

- Rename `_create_function` to `_create_name_function`
- Define new `_create_function` to take strings as column names.

This PR, I proposes to:

- Revert `_create_name_function` name to `_create_function`.
- Define new `_create_function_over_column` to take strings as column names.

## How was this patch tested?

Some unit tests were added for binary math / string functions.

Closes #24121 from HyukjinKwon/SPARK-26979.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/functions.py            | 79 ++++++++++++++--------
 python/pyspark/sql/tests/test_functions.py | 14 +++-
 2 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3ee485cbd95d..03266138024c 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -30,15 +30,22 @@
 
 from pyspark import since, SparkContext
 from pyspark.rdd import ignore_unicode_prefix, PythonEvalType
-from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal
+from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal, \
+    _create_column_from_name
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.types import StringType, DataType
 # Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409
 from pyspark.sql.udf import UserDefinedFunction, _create_udf
 
+# Note to developers: all of PySpark functions here take string as column names whenever possible.
+# Namely, if columns are referred as arguments, they can be always both Column or string,
+# even though there might be few exceptions for legacy or inevitable reasons.
+# If you are fixing other language APIs together, also please note that Scala side is not the case
+# since it requires to make every single overridden definition.
 
-def _create_name_function(name, doc=""):
-    """ Create a function that takes a column name argument, by name"""
+
+def _create_function(name, doc=""):
+    """Create a PySpark function by its name"""
     def _(col):
         sc = SparkContext._active_spark_context
         jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col)
@@ -48,8 +55,11 @@ def _(col):
     return _
 
 
-def _create_function(name, doc=""):
-    """ Create a function that takes a Column object, by name"""
+def _create_function_over_column(name, doc=""):
+    """Similar with `_create_function` but creates a PySpark function that takes a column
+    (as string as well). This is mainly for PySpark functions to take strings as
+    column names.
+    """
     def _(col):
         sc = SparkContext._active_spark_context
         jc = getattr(sc._jvm.functions, name)(_to_java_column(col))
@@ -71,9 +81,23 @@ def _create_binary_mathfunction(name, doc=""):
     """ Create a binary mathfunction by name"""
     def _(col1, col2):
         sc = SparkContext._active_spark_context
-        # users might write ints for simplicity. This would throw an error on the JVM side.
-        jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else float(col1),
-                                              col2._jc if isinstance(col2, Column) else float(col2))
+        # For legacy reasons, the arguments here can be implicitly converted into floats,
+        # if they are not columns or strings.
+        if isinstance(col1, Column):
+            arg1 = col1._jc
+        elif isinstance(col1, basestring):
+            arg1 = _create_column_from_name(col1)
+        else:
+            arg1 = float(col1)
+
+        if isinstance(col2, Column):
+            arg2 = col2._jc
+        elif isinstance(col2, basestring):
+            arg2 = _create_column_from_name(col2)
+        else:
+            arg2 = float(col2)
+
+        jc = getattr(sc._jvm.functions, name)(arg1, arg2)
         return Column(jc)
     _.__name__ = name
     _.__doc__ = doc
@@ -96,8 +120,7 @@ def _():
     >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1)
     [Row(height=5, spark_user=True)]
     """
-_name_functions = {
-    # name functions take a column name as their argument
+_functions = {
     'lit': _lit_doc,
     'col': 'Returns a :class:`Column` based on the given column name.',
     'column': 'Returns a :class:`Column` based on the given column name.',
@@ -105,9 +128,7 @@ def _():
     'desc': 'Returns a sort expression based on the descending order of the given column name.',
 }
 
-_functions = {
-    'upper': 'Converts a string expression to upper case.',
-    'lower': 'Converts a string expression to upper case.',
+_functions_over_column = {
     'sqrt': 'Computes the square root of the specified float value.',
     'abs': 'Computes the absolute value.',
 
@@ -120,7 +141,7 @@ def _():
     'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',
 }
 
-_functions_1_4 = {
+_functions_1_4_over_column = {
     # unary math functions
     'acos': ':return: inverse cosine of `col`, as if computed by `java.lang.Math.acos()`',
     'asin': ':return: inverse sine of `col`, as if computed by `java.lang.Math.asin()`',
@@ -155,7 +176,7 @@ def _():
     'bitwiseNOT': 'Computes bitwise not.',
 }
 
-_name_functions_2_4 = {
+_functions_2_4 = {
     'asc_nulls_first': 'Returns a sort expression based on the ascending order of the given' +
                        ' column name, and null values return before non-null values.',
     'asc_nulls_last': 'Returns a sort expression based on the ascending order of the given' +
@@ -186,7 +207,7 @@ def _():
     >>> df2.agg(collect_set('age')).collect()
     [Row(collect_set(age)=[5, 2])]
     """
-_functions_1_6 = {
+_functions_1_6_over_column = {
     # unary math functions
     'stddev': 'Aggregate function: returns the unbiased sample standard deviation of' +
               ' the expression in a group.',
@@ -203,7 +224,7 @@ def _():
     'collect_set': _collect_set_doc
 }
 
-_functions_2_1 = {
+_functions_2_1_over_column = {
     # unary math functions
     'degrees': """
                Converts an angle measured in radians to an approximately equivalent angle
@@ -268,24 +289,24 @@ def _():
 _functions_deprecated = {
 }
 
-for _name, _doc in _name_functions.items():
-    globals()[_name] = since(1.3)(_create_name_function(_name, _doc))
 for _name, _doc in _functions.items():
     globals()[_name] = since(1.3)(_create_function(_name, _doc))
-for _name, _doc in _functions_1_4.items():
-    globals()[_name] = since(1.4)(_create_function(_name, _doc))
+for _name, _doc in _functions_over_column.items():
+    globals()[_name] = since(1.3)(_create_function_over_column(_name, _doc))
+for _name, _doc in _functions_1_4_over_column.items():
+    globals()[_name] = since(1.4)(_create_function_over_column(_name, _doc))
 for _name, _doc in _binary_mathfunctions.items():
     globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc))
 for _name, _doc in _window_functions.items():
     globals()[_name] = since(1.6)(_create_window_function(_name, _doc))
-for _name, _doc in _functions_1_6.items():
-    globals()[_name] = since(1.6)(_create_function(_name, _doc))
-for _name, _doc in _functions_2_1.items():
-    globals()[_name] = since(2.1)(_create_function(_name, _doc))
+for _name, _doc in _functions_1_6_over_column.items():
+    globals()[_name] = since(1.6)(_create_function_over_column(_name, _doc))
+for _name, _doc in _functions_2_1_over_column.items():
+    globals()[_name] = since(2.1)(_create_function_over_column(_name, _doc))
 for _name, _message in _functions_deprecated.items():
     globals()[_name] = _wrap_deprecated_function(globals()[_name], _message)
-for _name, _doc in _name_functions_2_4.items():
-    globals()[_name] = since(2.4)(_create_name_function(_name, _doc))
+for _name, _doc in _functions_2_4.items():
+    globals()[_name] = since(2.4)(_create_function(_name, _doc))
 del _name, _doc
 
 
@@ -1450,6 +1471,8 @@ def hash(*cols):
 # ---------------------- String/Binary functions ------------------------------
 
 _string_functions = {
+    'upper': 'Converts a string expression to upper case.',
+    'lower': 'Converts a string expression to lower case.',
     'ascii': 'Computes the numeric value of the first character of the string column.',
     'base64': 'Computes the BASE64 encoding of a binary column and returns it as a string column.',
     'unbase64': 'Decodes a BASE64 encoded string column and returns it as a binary column.',
@@ -1460,7 +1483,7 @@ def hash(*cols):
 
 
 for _name, _doc in _string_functions.items():
-    globals()[_name] = since(1.5)(_create_function(_name, _doc))
+    globals()[_name] = since(1.5)(_create_function_over_column(_name, _doc))
 del _name, _doc
 
 
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index fe6660272e32..b77757342843 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -129,6 +129,12 @@ def assert_close(a, b):
                      df.select(functions.pow(df.a, 2.0)).collect())
         assert_close([math.hypot(i, 2 * i) for i in range(10)],
                      df.select(functions.hypot(df.a, df.b)).collect())
+        assert_close([math.hypot(i, 2 * i) for i in range(10)],
+                     df.select(functions.hypot("a", u"b")).collect())
+        assert_close([math.hypot(i, 2) for i in range(10)],
+                     df.select(functions.hypot("a", 2)).collect())
+        assert_close([math.hypot(i, 2) for i in range(10)],
+                     df.select(functions.hypot(df.a, 2)).collect())
 
     def test_rand_functions(self):
         df = self.df
@@ -151,7 +157,8 @@ def test_rand_functions(self):
         self.assertEqual(sorted(rndn1), sorted(rndn2))
 
     def test_string_functions(self):
-        from pyspark.sql.functions import col, lit
+        from pyspark.sql import functions
+        from pyspark.sql.functions import col, lit, _string_functions
         df = self.spark.createDataFrame([['nick']], schema=['name'])
         self.assertRaisesRegexp(
             TypeError,
@@ -162,6 +169,11 @@ def test_string_functions(self):
                 TypeError,
                 lambda: df.select(col('name').substr(long(0), long(1))))
 
+        for name in _string_functions.keys():
+            self.assertEqual(
+                df.select(getattr(functions, name)("name")).first()[0],
+                df.select(getattr(functions, name)(col("name"))).first()[0])
+
     def test_array_contains_function(self):
         from pyspark.sql.functions import array_contains
 

From 4d5247778aa706e6fe5eb79a339dc7fe9ddded96 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 20 Mar 2019 00:22:05 +0000
Subject: [PATCH 0720/1072] [SPARK-27197][SQL][TEST] Add ReadNestedSchemaTest
 for file-based data sources

## What changes were proposed in this pull request?

The reader schema is said to be evolved (or projected) when it changed after the data is written by writers. Apache Spark file-based data sources have a test coverage for that; e.g. [ReadSchemaSuite.scala](https://github.com/apache/spark/blob/master/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala). This PR aims to add a test coverage for nested columns by adding and hiding nested columns.

## How was this patch tested?

Pass the Jenkins with newly added tests.

Closes #24139 from dongjoon-hyun/SPARK-27197.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 .../datasources/AvroReadSchemaSuite.scala     |   2 +
 .../datasources/ReadNestedSchemaTest.scala    | 158 ++++++++++++++++++
 .../datasources/ReadSchemaSuite.scala         |  12 ++
 3 files changed, 172 insertions(+)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadNestedSchemaTest.scala

diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala
index a480bb90b5a7..aa58cbd3e243 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala
@@ -21,6 +21,8 @@ class AvroReadSchemaSuite
   extends ReadSchemaSuite
   with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
+  with AddNestedColumnTest
+  with HideNestedColumnTest
   with ChangePositionTest {
 
   override val format: String = "avro"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadNestedSchemaTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadNestedSchemaTest.scala
new file mode 100644
index 000000000000..2b0fdfe27781
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadNestedSchemaTest.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.File
+
+import org.apache.spark.sql.{DataFrame, Row}
+
+
+/**
+ * Add a nested column.
+ */
+trait AddNestedColumnTest extends ReadSchemaTest {
+
+  private def testAdd(df1: DataFrame, df2: DataFrame, expectedRows: Seq[Row]): Unit = {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      val dir1 = s"$path${File.separator}part=one"
+      val dir2 = s"$path${File.separator}part=two"
+
+      df1.write.format(format).options(options).save(dir1)
+      df2.write.format(format).options(options).save(dir2)
+
+      val df = spark.read
+        .schema(df2.schema)
+        .format(format)
+        .options(options)
+        .load(path)
+
+      checkAnswer(df, expectedRows)
+    }
+  }
+
+  test("add a nested column at the end of the leaf struct column") {
+    testAdd(
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4, 'c7', 5)) c2"),
+      Seq(
+        Row(1, Row(2, Row(3, 4, null)), "one"),
+        Row(1, Row(2, Row(3, 4, 5)), "two")))
+  }
+
+  test("add a nested column in the middle of the leaf struct column") {
+    testAdd(
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c7', 5, 'c6', 4)) c2"),
+      Seq(
+        Row(1, Row(2, Row(3, null, 4)), "one"),
+        Row(1, Row(2, Row(3, 5, 4)), "two")))
+  }
+
+  test("add a nested column at the end of the middle struct column") {
+    testAdd(
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4), 'c7', 5) c2"),
+      Seq(
+        Row(1, Row(2, Row(3, 4), null), "one"),
+        Row(1, Row(2, Row(3, 4), 5), "two")))
+  }
+
+  test("add a nested column in the middle of the middle struct column") {
+    testAdd(
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c7', 5, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      Seq(
+        Row(1, Row(2, null, Row(3, 4)), "one"),
+        Row(1, Row(2, 5, Row(3, 4)), "two")))
+  }
+}
+
+/**
+ * Hide a nested column.
+ */
+trait HideNestedColumnTest extends ReadSchemaTest {
+
+  private def testHide(df1: DataFrame, df2: DataFrame, df3: DataFrame, expectedRows: Seq[Row]) = {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      val dir1 = s"$path${File.separator}part=one"
+      val dir2 = s"$path${File.separator}part=two"
+      val dir3 = s"$path${File.separator}part=three"
+
+      df1.write.format(format).options(options).save(dir1)
+      df2.write.format(format).options(options).save(dir2)
+      df3.write.format(format).options(options).save(dir3)
+
+      val df = spark.read
+        .schema(df1.schema)
+        .format(format)
+        .options(options)
+        .load(path)
+
+      checkAnswer(df, expectedRows)
+    }
+  }
+
+  test("hide a nested column at the end of the leaf struct column") {
+    testHide(
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      sql("SELECT 5 c1, named_struct('c3', 6, 'c4', named_struct('c5', 7, 'c6', 8, 'c7', 9)) c2"),
+      sql("SELECT 0 c1, named_struct('c3', 1, 'c4', named_struct('c5', 2, 'c6', 3, 'c8', 4)) c2"),
+      Seq(
+        Row(1, Row(2, Row(3, 4)), "one"),
+        Row(5, Row(6, Row(7, 8)), "two"),
+        Row(0, Row(1, Row(2, 3)), "three")))
+  }
+
+  test("hide a nested column in the middle of the leaf struct column") {
+    testHide(
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      sql("SELECT 5 c1, named_struct('c3', 6, 'c4', named_struct('c5', 7, 'c7', 8, 'c6', 9)) c2"),
+      sql("SELECT 0 c1, named_struct('c3', 1, 'c4', named_struct('c7', 2, 'c5', 3, 'c6', 4)) c2"),
+      Seq(
+        Row(1, Row(2, Row(3, 4)), "one"),
+        Row(5, Row(6, Row(7, 9)), "two"),
+        Row(0, Row(1, Row(3, 4)), "three")))
+  }
+
+  test("hide a nested column at the end of the middle struct column") {
+    testHide(
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      sql("SELECT 5 c1, named_struct('c3', 6, 'c4', named_struct('c5', 7, 'c6', 8), 'c7', 9) c2"),
+      sql("SELECT 0 c1, named_struct('c3', 1, 'c4', named_struct('c5', 2, 'c6', 3), 'c8', 4) c2"),
+      Seq(
+        Row(1, Row(2, Row(3, 4)), "one"),
+        Row(5, Row(6, Row(7, 8)), "two"),
+        Row(0, Row(1, Row(2, 3)), "three")))
+  }
+
+  test("hide a nested column in the middle of the middle struct column") {
+    testHide(
+      sql("SELECT 1 c1, named_struct('c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      sql("SELECT 5 c1, named_struct('c3', 6, 'c7', 7, 'c4', named_struct('c5', 8, 'c6', 9)) c2"),
+      sql("SELECT 0 c1, named_struct('c7', 1, 'c3', 2, 'c4', named_struct('c5', 3, 'c6', 4)) c2"),
+      Seq(
+        Row(1, Row(2, Row(3, 4)), "one"),
+        Row(5, Row(6, Row(8, 9)), "two"),
+        Row(0, Row(2, Row(3, 4)), "three")))
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
index 802fda830207..8c95349ef3be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
@@ -76,6 +76,8 @@ class JsonReadSchemaSuite
   extends ReadSchemaSuite
   with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
+  with AddNestedColumnTest
+  with HideNestedColumnTest
   with ChangePositionTest
   with IntegralTypeTest
   with ToDoubleTypeTest
@@ -89,6 +91,8 @@ class OrcReadSchemaSuite
   extends ReadSchemaSuite
   with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
+  with AddNestedColumnTest
+  with HideNestedColumnTest
   with ChangePositionTest {
 
   override val format: String = "orc"
@@ -109,6 +113,8 @@ class VectorizedOrcReadSchemaSuite
   extends ReadSchemaSuite
   with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
+  with AddNestedColumnTest
+  with HideNestedColumnTest
   with ChangePositionTest
   with BooleanTypeTest
   with IntegralTypeTest
@@ -132,6 +138,8 @@ class ParquetReadSchemaSuite
   extends ReadSchemaSuite
   with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
+  with AddNestedColumnTest
+  with HideNestedColumnTest
   with ChangePositionTest {
 
   override val format: String = "parquet"
@@ -152,6 +160,8 @@ class VectorizedParquetReadSchemaSuite
   extends ReadSchemaSuite
   with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
+  with AddNestedColumnTest
+  with HideNestedColumnTest
   with ChangePositionTest {
 
   override val format: String = "parquet"
@@ -172,6 +182,8 @@ class MergedParquetReadSchemaSuite
   extends ReadSchemaSuite
   with AddColumnIntoTheMiddleTest
   with HideColumnInTheMiddleTest
+  with AddNestedColumnTest
+  with HideNestedColumnTest
   with ChangePositionTest {
 
   override val format: String = "parquet"

From 257391497bea51553d5a9e420c20a3fdfe4d36a9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 19 Mar 2019 20:24:22 -0700
Subject: [PATCH 0721/1072] [SPARK-26975][SQL] Support nested-column pruning
 over limit/sample/repartition

## What changes were proposed in this pull request?

As [SPARK-26958](https://github.com/apache/spark/pull/23862/files) benchmark shows, nested-column pruning has limitations. This PR aims to remove the limitations on `limit/repartition/sample`. Here, repartition means `Repartition`, not `RepartitionByExpression`.

**PREPARATION**
```scala
scala> spark.range(100).map(x => (x, (x, s"$x" * 100))).toDF("col1", "col2").write.mode("overwrite").save("/tmp/p")
scala> sql("set spark.sql.optimizer.nestedSchemaPruning.enabled=true")
scala> spark.read.parquet("/tmp/p").createOrReplaceTempView("t")
```

**BEFORE**
```scala
scala> sql("SELECT col2._1 FROM (SELECT col2 FROM t LIMIT 1000000)").explain
== Physical Plan ==
CollectLimit 1000000
+- *(1) Project [col2#22._1 AS _1#28L]
   +- *(1) FileScan parquet [col2#22] Batched: false, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/tmp/p], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<col2:struct<_1:bigint>>

scala> sql("SELECT col2._1 FROM (SELECT /*+ REPARTITION(1) */ col2 FROM t)").explain
== Physical Plan ==
*(2) Project [col2#22._1 AS _1#33L]
+- Exchange RoundRobinPartitioning(1)
   +- *(1) Project [col2#22]
      +- *(1) FileScan parquet [col2#22] Batched: false, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/tmp/p], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<col2:struct<_1:bigint,_2:string>>
```

**AFTER**
```scala
scala> sql("SELECT col2._1 FROM (SELECT /*+ REPARTITION(1) */ col2 FROM t)").explain
== Physical Plan ==
Exchange RoundRobinPartitioning(1)
+- *(1) Project [col2#5._1 AS _1#11L]
   +- *(1) FileScan parquet [col2#5] Batched: false, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/tmp/p], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<col2:struct<_1:bigint>>
```

This supercedes https://github.com/apache/spark/pull/23542 and https://github.com/apache/spark/pull/23873 .

## How was this patch tested?

Pass the Jenkins with a newly added test suite.

Closes #23964 from dongjoon-hyun/SPARK-26975-ALIAS.

Lead-authored-by: Dongjoon Hyun <dhyun@apple.com>
Co-authored-by: DB Tsai <d_tsai@apple.com>
Co-authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Co-authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../optimizer/NestedColumnAliasing.scala      | 151 ++++++++++++
 .../sql/catalyst/optimizer/Optimizer.scala    |   3 +
 .../optimizer/NestedColumnAliasingSuite.scala | 229 ++++++++++++++++++
 ...rcNestedSchemaPruningBenchmark-results.txt |  27 ++-
 ...V2NestedSchemaPruningBenchmark-results.txt |  27 ++-
 ...etNestedSchemaPruningBenchmark-results.txt |  27 ++-
 .../NestedSchemaPruningBenchmark.scala        |  31 ++-
 7 files changed, 460 insertions(+), 35 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
new file mode 100644
index 000000000000..d9f2d36867b2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+/**
+ * This aims to handle a nested column aliasing pattern inside the `ColumnPruning` optimizer rule.
+ * If a project or its child references to nested fields, and not all the fields
+ * in a nested attribute are used, we can substitute them by alias attributes; then a project
+ * of the nested fields as aliases on the children of the child will be created.
+ */
+object NestedColumnAliasing {
+
+  def unapply(plan: LogicalPlan)
+    : Option[(Map[GetStructField, Alias], Map[ExprId, Seq[Alias]])] = plan match {
+    case Project(projectList, child)
+        if SQLConf.get.nestedSchemaPruningEnabled && canProjectPushThrough(child) =>
+      getAliasSubMap(projectList)
+    case _ => None
+  }
+
+  /**
+   * Replace nested columns to prune unused nested columns later.
+   */
+  def replaceToAliases(
+      plan: LogicalPlan,
+      nestedFieldToAlias: Map[GetStructField, Alias],
+      attrToAliases: Map[ExprId, Seq[Alias]]): LogicalPlan = plan match {
+    case Project(projectList, child) =>
+      Project(
+        getNewProjectList(projectList, nestedFieldToAlias),
+        replaceChildrenWithAliases(child, attrToAliases))
+  }
+
+  /**
+   * Return a replaced project list.
+   */
+  private def getNewProjectList(
+      projectList: Seq[NamedExpression],
+      nestedFieldToAlias: Map[GetStructField, Alias]): Seq[NamedExpression] = {
+    projectList.map(_.transform {
+      case f: GetStructField if nestedFieldToAlias.contains(f) =>
+        nestedFieldToAlias(f).toAttribute
+    }.asInstanceOf[NamedExpression])
+  }
+
+  /**
+   * Return a plan with new children replaced with aliases.
+   */
+  private def replaceChildrenWithAliases(
+      plan: LogicalPlan,
+      attrToAliases: Map[ExprId, Seq[Alias]]): LogicalPlan = {
+    plan.withNewChildren(plan.children.map { plan =>
+      Project(plan.output.flatMap(a => attrToAliases.getOrElse(a.exprId, Seq(a))), plan)
+    })
+  }
+
+  /**
+   * Returns true for those operators that project can be pushed through.
+   */
+  private def canProjectPushThrough(plan: LogicalPlan) = plan match {
+    case _: GlobalLimit => true
+    case _: LocalLimit => true
+    case _: Repartition => true
+    case _: Sample => true
+    case _ => false
+  }
+
+  /**
+   * Return root references that are individually accessed as a whole, and `GetStructField`s.
+   */
+  private def collectRootReferenceAndGetStructField(e: Expression): Seq[Expression] = e match {
+    case _: AttributeReference | _: GetStructField => Seq(e)
+    case es if es.children.nonEmpty => es.children.flatMap(collectRootReferenceAndGetStructField)
+    case _ => Seq.empty
+  }
+
+  /**
+   * Return two maps in order to replace nested fields to aliases.
+   *
+   * 1. GetStructField -> Alias: A new alias is created for each nested field.
+   * 2. ExprId -> Seq[Alias]: A reference attribute has multiple aliases pointing it.
+   */
+  private def getAliasSubMap(projectList: Seq[NamedExpression])
+    : Option[(Map[GetStructField, Alias], Map[ExprId, Seq[Alias]])] = {
+    val (nestedFieldReferences, otherRootReferences) =
+      projectList.flatMap(collectRootReferenceAndGetStructField).partition {
+        case _: GetStructField => true
+        case _ => false
+      }
+
+    val aliasSub = nestedFieldReferences.asInstanceOf[Seq[GetStructField]]
+      .filter(!_.references.subsetOf(AttributeSet(otherRootReferences)))
+      .groupBy(_.references.head)
+      .flatMap { case (attr, nestedFields: Seq[GetStructField]) =>
+        // Each expression can contain multiple nested fields.
+        // Note that we keep the original names to deliver to parquet in a case-sensitive way.
+        val nestedFieldToAlias = nestedFields.distinct.map { f =>
+          val exprId = NamedExpression.newExprId
+          (f, Alias(f, s"_gen_alias_${exprId.id}")(exprId, Seq.empty, None))
+        }
+
+        // If all nested fields of `attr` are used, we don't need to introduce new aliases.
+        // By default, ColumnPruning rule uses `attr` already.
+        if (nestedFieldToAlias.nonEmpty &&
+            nestedFieldToAlias.length < totalFieldNum(attr.dataType)) {
+          Some(attr.exprId -> nestedFieldToAlias)
+        } else {
+          None
+        }
+      }
+
+    if (aliasSub.isEmpty) {
+      None
+    } else {
+      Some((aliasSub.values.flatten.toMap, aliasSub.map(x => (x._1, x._2.map(_._2)))))
+    }
+  }
+
+  /**
+   * Return total number of fields of this type. This is used as a threshold to use nested column
+   * pruning. It's okay to underestimate. If the number of reference is bigger than this, the parent
+   * reference is used instead of nested field references.
+   */
+  private def totalFieldNum(dataType: DataType): Int = dataType match {
+    case _: AtomicType => 1
+    case StructType(fields) => fields.map(f => totalFieldNum(f.dataType)).sum
+    case ArrayType(elementType, _) => totalFieldNum(elementType)
+    case MapType(keyType, valueType, _) => totalFieldNum(keyType) + totalFieldNum(valueType)
+    case _ => 1 // UDT and others
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 4babd408403d..d4eb516534f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -647,6 +647,9 @@ object ColumnPruning extends Rule[LogicalPlan] {
     // Can't prune the columns on LeafNode
     case p @ Project(_, _: LeafNode) => p
 
+    case p @ NestedColumnAliasing(nestedFieldToAlias, attrToAliases) =>
+      NestedColumnAliasing.replaceToAliases(p, nestedFieldToAlias, attrToAliases)
+
     // for all other logical plans that inherits the output from it's children
     // Project over project is handled by the first case, skip it here.
     case p @ Project(_, child) if !child.isInstanceOf[Project] =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
new file mode 100644
index 000000000000..87f7d19f6297
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.catalyst.SchemaPruningTest
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.types.{StringType, StructType}
+
+class NestedColumnAliasingSuite extends SchemaPruningTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("Nested column pruning", FixedPoint(100),
+      ColumnPruning,
+      CollapseProject,
+      RemoveNoopOperators) :: Nil
+  }
+
+  private val name = StructType.fromDDL("first string, middle string, last string")
+  private val employer = StructType.fromDDL("id int, company struct<name:string, address:string>")
+  private val contact = LocalRelation(
+    'id.int,
+    'name.struct(name),
+    'address.string,
+    'friends.array(name),
+    'relatives.map(StringType, name),
+    'employer.struct(employer))
+
+  test("Pushing a single nested field projection") {
+    def testSingleFieldPushDown(op: LogicalPlan => LogicalPlan): Unit = {
+      val middle = GetStructField('name, 1, Some("middle"))
+      val query = op(contact).select(middle).analyze
+      val optimized = Optimize.execute(query)
+      val expected = op(contact.select(middle)).analyze
+      comparePlans(optimized, expected)
+    }
+
+    testSingleFieldPushDown((input: LogicalPlan) => input.limit(5))
+    testSingleFieldPushDown((input: LogicalPlan) => input.repartition(1))
+    testSingleFieldPushDown((input: LogicalPlan) => Sample(0.0, 0.6, false, 11L, input))
+  }
+
+  test("Pushing multiple nested field projection") {
+    val first = GetStructField('name, 0, Some("first"))
+    val last = GetStructField('name, 2, Some("last"))
+
+    val query = contact
+      .limit(5)
+      .select('id, first, last)
+      .analyze
+
+    val optimized = Optimize.execute(query)
+
+    val expected = contact
+      .select('id, first, last)
+      .limit(5)
+      .analyze
+
+    comparePlans(optimized, expected)
+  }
+
+  test("function with nested field inputs") {
+    val first = GetStructField('name, 0, Some("first"))
+    val last = GetStructField('name, 2, Some("last"))
+
+    val query = contact
+      .limit(5)
+      .select('id, ConcatWs(Seq(first, last)))
+      .analyze
+
+    val optimized = Optimize.execute(query)
+
+    val aliases = collectGeneratedAliases(optimized)
+
+    val expected = contact
+      .select('id, first.as(aliases(0)), last.as(aliases(1)))
+      .limit(5)
+      .select(
+        'id,
+        ConcatWs(Seq($"${aliases(0)}", $"${aliases(1)}")).as("concat_ws(name.first, name.last)"))
+      .analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("multi-level nested field") {
+    val field1 = GetStructField(GetStructField('employer, 1, Some("company")), 0, Some("name"))
+    val field2 = GetStructField('employer, 0, Some("id"))
+
+    val query = contact
+      .limit(5)
+      .select(field1, field2)
+      .analyze
+
+    val optimized = Optimize.execute(query)
+
+    val expected = contact
+      .select(field1, field2)
+      .limit(5)
+      .analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("Push original case-sensitive names") {
+    val first1 = GetStructField('name, 0, Some("first"))
+    val first2 = GetStructField('name, 1, Some("FIRST"))
+
+    val query = contact
+      .limit(5)
+      .select('id, first1, first2)
+      .analyze
+
+    val optimized = Optimize.execute(query)
+
+    val expected = contact
+      .select('id, first1, first2)
+      .limit(5)
+      .analyze
+
+    comparePlans(optimized, expected)
+  }
+
+  test("Pushing a single nested field projection - negative") {
+    val ops = Array(
+      (input: LogicalPlan) => input.distribute('name)(1),
+      (input: LogicalPlan) => input.distribute($"name.middle")(1),
+      (input: LogicalPlan) => input.orderBy('name.asc),
+      (input: LogicalPlan) => input.orderBy($"name.middle".asc),
+      (input: LogicalPlan) => input.sortBy('name.asc),
+      (input: LogicalPlan) => input.sortBy($"name.middle".asc),
+      (input: LogicalPlan) => input.union(input)
+    )
+
+    val queries = ops.map { op =>
+      op(contact.select('name))
+        .select(GetStructField('name, 1, Some("middle")))
+        .analyze
+    }
+
+    val optimizedQueries = queries.map(Optimize.execute)
+    val expectedQueries = queries
+    optimizedQueries.zip(expectedQueries).foreach { case (optimized, expected) =>
+      comparePlans(optimized, expected)
+    }
+  }
+
+  test("Pushing a single nested field projection through filters - negative") {
+    val ops = Array(
+      (input: LogicalPlan) => input.where('name.isNotNull),
+      (input: LogicalPlan) => input.where($"name.middle".isNotNull)
+    )
+
+    val queries = ops.map { op =>
+      op(contact)
+        .select(GetStructField('name, 1, Some("middle")))
+        .analyze
+    }
+
+    val optimizedQueries = queries.map(Optimize.execute)
+    val expectedQueries = queries
+
+    optimizedQueries.zip(expectedQueries).foreach { case (optimized, expected) =>
+      comparePlans(optimized, expected)
+    }
+  }
+
+  test("Do not optimize when parent field is used") {
+    val query = contact
+      .limit(5)
+      .select('id, GetStructField('name, 0, Some("first")), 'name)
+      .analyze
+
+    val optimized = Optimize.execute(query)
+
+    val expected = contact
+      .select('id, 'name)
+      .limit(5)
+      .select('id, GetStructField('name, 0, Some("first")), 'name)
+      .analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("Some nested column means the whole structure") {
+    val nestedRelation = LocalRelation('a.struct('b.struct('c.int, 'd.int, 'e.int)))
+
+    val query = nestedRelation
+      .limit(5)
+      .select(GetStructField('a, 0, Some("b")))
+      .analyze
+
+    val optimized = Optimize.execute(query)
+
+    val expected = nestedRelation
+      .select(GetStructField('a, 0, Some("b")))
+      .limit(5)
+      .analyze
+
+    comparePlans(optimized, expected)
+  }
+
+  private def collectGeneratedAliases(query: LogicalPlan): ArrayBuffer[String] = {
+    val aliases = ArrayBuffer[String]()
+    query.transformAllExpressions {
+      case a @ Alias(_, name) if name.startsWith("_gen_alias_") =>
+        aliases += name
+        a
+    }
+    aliases
+  }
+}
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index 078fbbb29e15..53fbf14d500d 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -6,35 +6,42 @@ OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    117            154          23          8.5         117.5       1.0X
-Nested column                                      1271           1295          26          0.8        1270.5       0.1X
+Top-level column                                    131            150          25          7.7         130.6       1.0X
+Nested column                                       922            954          21          1.1         922.2       0.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    431            488          73          2.3         431.2       1.0X
-Nested column                                      1738           1777          24          0.6        1738.3       0.2X
+Top-level column                                    446            477          50          2.2         445.5       1.0X
+Nested column                                      1328           1366          44          0.8        1328.4       0.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    349            381          87          2.9         348.7       1.0X
-Nested column                                      4374           4456         125          0.2        4373.6       0.1X
+Top-level column                                    357            386          33          2.8         356.8       1.0X
+Nested column                                      1266           1274           7          0.8        1266.3       0.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    358            410          85          2.8         358.5       1.0X
-Nested column                                      4447           4517         125          0.2        4447.3       0.1X
+Top-level column                                    368            394          54          2.7         367.6       1.0X
+Nested column                                      3890           3954          80          0.3        3890.3       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    129            140          10          7.7         129.1       1.0X
+Nested column                                       966            999          26          1.0         966.2       0.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    553            566          13          1.8         552.8       1.0X
-Nested column                                      5115           5248          84          0.2        5115.1       0.1X
+Top-level column                                    573            601          61          1.7         573.2       1.0X
+Nested column                                      4417           4598         149          0.2        4417.1       0.1X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index eaaf3b0abc9c..f259e142e18b 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -6,35 +6,42 @@ OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    120            148          24          8.3         120.0       1.0X
-Nested column                                      2367           2415          43          0.4        2367.0       0.1X
+Top-level column                                    135            169          19          7.4         134.7       1.0X
+Nested column                                      2131           2216          95          0.5        2131.4       0.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    129            153          16          7.8         128.5       1.0X
-Nested column                                      2368           2400          32          0.4        2367.7       0.1X
+Top-level column                                    147            158          10          6.8         146.9       1.0X
+Nested column                                      2149           2204          50          0.5        2148.9       0.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    359            396          59          2.8         358.9       1.0X
-Nested column                                      4100           4147          59          0.2        4099.9       0.1X
+Top-level column                                    386            399          16          2.6         385.8       1.0X
+Nested column                                      2612           2666          57          0.4        2612.2       0.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    367            394          56          2.7         366.8       1.0X
-Nested column                                      4182           4236          64          0.2        4181.5       0.1X
+Top-level column                                    392            454         119          2.5         392.2       1.0X
+Nested column                                      4106           4168          79          0.2        4106.1       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    146            157          13          6.9         145.9       1.0X
+Nested column                                      2294           2338          44          0.4        2293.6       0.1X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    262            269           6          3.8         262.5       1.0X
-Nested column                                      2950           3021          65          0.3        2949.8       0.1X
+Top-level column                                    290            294           4          3.5         289.7       1.0X
+Nested column                                      2914           2997          87          0.3        2913.6       0.1X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
index 1f1296eff8d6..bd5b39ad8d4e 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -6,35 +6,42 @@ OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    145            174          23          6.9         145.1       1.0X
-Nested column                                       325            346          19          3.1         324.8       0.4X
+Top-level column                                    128            166          24          7.8         128.0       1.0X
+Nested column                                       308            325          10          3.2         308.3       0.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    434            508         108          2.3         434.3       1.0X
-Nested column                                       625            647          23          1.6         624.8       0.7X
+Top-level column                                    447            496          91          2.2         447.0       1.0X
+Nested column                                       631            666          40          1.6         631.2       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    357            368           9          2.8         356.9       1.0X
-Nested column                                      2897           2976          88          0.3        2897.4       0.1X
+Top-level column                                    360            394          84          2.8         360.0       1.0X
+Nested column                                       553            586          65          1.8         553.5       0.7X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    365            413          77          2.7         364.9       1.0X
-Nested column                                      2902           2969          99          0.3        2902.4       0.1X
+Top-level column                                    368            393          50          2.7         368.3       1.0X
+Nested column                                      2942           3017          82          0.3        2942.0       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    124            143          10          8.1         124.1       1.0X
+Nested column                                       345            366          34          2.9         344.8       0.4X
 
 OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    555            600          80          1.8         555.4       1.0X
-Nested column                                      3448           3490          45          0.3        3447.9       0.2X
+Top-level column                                    577            618          55          1.7         576.8       1.0X
+Nested column                                      3473           3524          49          0.3        3473.0       0.2X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
index e852de1fedad..9ffb3f932822 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
@@ -125,6 +125,26 @@ abstract class NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
     }
   }
 
+  protected def sampleBenchmark(numRows: Int, numIters: Int): Unit = {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      Seq(1, 2).foreach { i =>
+        df.write.format(dataSourceName).save(path + s"/$i")
+        spark.read.format(dataSourceName).load(path + s"/$i").createOrReplaceTempView(s"t$i")
+      }
+
+      val benchmark = new Benchmark(s"Sample", numRows, numIters, output = output)
+
+      addCase(benchmark, "Top-level column",
+        s"SELECT col1 FROM (SELECT col1 FROM t1 TABLESAMPLE(100 percent))")
+      addCase(benchmark, "Nested column",
+        s"SELECT col2._1 FROM (SELECT col2 FROM t2 TABLESAMPLE(100 percent))")
+
+      benchmark.run()
+    }
+  }
+
   protected def sortBenchmark(numRows: Int, numIters: Int): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
@@ -146,11 +166,12 @@ abstract class NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     runBenchmark(benchmarkName) {
       withSQLConf(SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
-        selectBenchmark (N, numIters)
-        limitBenchmark (N, numIters)
-        repartitionBenchmark (N, numIters)
-        repartitionByExprBenchmark (N, numIters)
-        sortBenchmark (N, numIters)
+        selectBenchmark(N, numIters)
+        limitBenchmark(N, numIters)
+        repartitionBenchmark(N, numIters)
+        repartitionByExprBenchmark(N, numIters)
+        sampleBenchmark(N, numIters)
+        sortBenchmark(N, numIters)
       }
     }
   }

From 9a43852f174058b7e8bc9aa07daf9804d0e8cf51 Mon Sep 17 00:00:00 2001
From: Darcy Shen <sadhen@zoho.com>
Date: Tue, 19 Mar 2019 20:28:46 -0700
Subject: [PATCH 0722/1072] [SPARK-27160][SQL] Fix DecimalType when building
 orc filters

## What changes were proposed in this pull request?
DecimalType Literal should not be casted to Long.

eg. For `df.filter("x < 3.14")`, assuming df (x in DecimalType) reads from a ORC table and uses the native ORC reader with predicate push down enabled, we will push down the `x < 3.14` predicate to the ORC reader via a SearchArgument.

OrcFilters will construct the SearchArgument, but not handle the DecimalType correctly.

The previous impl will construct `x < 3` from `x < 3.14`.

## How was this patch tested?
```
$ sbt
> sql/testOnly *OrcFilterSuite
> sql/testOnly *OrcQuerySuite -- -z "27160"
```

Closes #24092 from sadhen/spark27160.

Authored-by: Darcy Shen <sadhen@zoho.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../execution/datasources/orc/OrcFilters.scala |  6 ++----
 .../datasources/orc/OrcFilterSuite.scala       | 14 +++++++++++++-
 .../datasources/orc/OrcQuerySuite.scala        | 18 +++++++++++++++++-
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index cd2a68a53bab..98484003644a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
+import org.apache.orc.storage.common.`type`.HiveDecimal
 import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
 import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder
 import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
@@ -136,10 +137,7 @@ private[sql] object OrcFilters {
     case FloatType | DoubleType =>
       value.asInstanceOf[Number].doubleValue()
     case _: DecimalType =>
-      val decimal = value.asInstanceOf[java.math.BigDecimal]
-      val decimalWritable = new HiveDecimalWritable(decimal.longValue)
-      decimalWritable.mutateEnforcePrecisionScale(decimal.precision, decimal.scale)
-      decimalWritable
+      new HiveDecimalWritable(HiveDecimal.create(value.asInstanceOf[java.math.BigDecimal]))
     case _ => value
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index 034454d21d7a..7b65ba87e7fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
+import java.math.MathContext
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 
@@ -31,7 +32,6 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -425,5 +425,17 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
       )).get.toString
     }
   }
+
+  test("SPARK-27160: Fix casting of the DecimalType literal") {
+    import org.apache.spark.sql.sources._
+    val schema = StructType(Array(StructField("a", DecimalType(3, 2))))
+    assertResult("leaf-0 = (LESS_THAN a 3.14), expr = leaf-0") {
+      OrcFilters.createFilter(schema, Array(
+        LessThan(
+          "a",
+          new java.math.BigDecimal(3.14, MathContext.DECIMAL64).setScale(2)))
+      ).get.toString
+    }
+  }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 9bf122766687..fe40b9a4f861 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, RecordReaderIterator}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{IntegerType, StructType, TestUDT}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 case class AllDataTypesWithNonPrimitiveType(
@@ -603,6 +603,22 @@ abstract class OrcQueryTest extends OrcTest {
       assert(m4.contains("Malformed ORC file"))
     }
   }
+
+  test("SPARK-27160 Predicate pushdown correctness on DecimalType for ORC") {
+    withTempPath { dir =>
+      withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") {
+        val path = dir.getCanonicalPath
+        Seq(BigDecimal(0.1), BigDecimal(0.2), BigDecimal(-0.3))
+          .toDF("x").write.orc(path)
+        val df = spark.read.orc(path)
+        checkAnswer(df.filter("x >= 0.1"), Seq(Row(0.1), Row(0.2)))
+        checkAnswer(df.filter("x > 0.1"), Seq(Row(0.2)))
+        checkAnswer(df.filter("x <= 0.15"), Seq(Row(0.1), Row(-0.3)))
+        checkAnswer(df.filter("x < 0.1"), Seq(Row(-0.3)))
+        checkAnswer(df.filter("x == 0.2"), Seq(Row(0.2)))
+      }
+    }
+  }
 }
 
 class OrcQuerySuite extends OrcQueryTest with SharedSQLContext {

From b67d36957287c1fbefa1996e6a4a009a75c4c3f8 Mon Sep 17 00:00:00 2001
From: Huon Wilson <Huon.Wilson@data61.csiro.au>
Date: Wed, 20 Mar 2019 16:34:34 +0800
Subject: [PATCH 0723/1072] [SPARK-27099][SQL] Add 'xxhash64' for hashing
 arbitrary columns to Long

## What changes were proposed in this pull request?

This introduces a new SQL function 'xxhash64' for getting a 64-bit hash of an arbitrary number of columns.

This is designed to exactly mimic the 32-bit `hash`, which uses
MurmurHash3. The name is designed to be more future-proof than the
'hash', by indicating the exact algorithm used, similar to md5 and the
sha hashes.

## How was this patch tested?

The tests for the existing `hash` function were duplicated to run with `xxhash64`.

Closes #24019 from huonw/hash64.

Authored-by: Huon Wilson <Huon.Wilson@data61.csiro.au>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/NAMESPACE                               |  1 +
 R/pkg/R/functions.R                           | 19 +++++++++++++++++++
 R/pkg/R/generics.R                            |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R         |  2 +-
 python/pyspark/sql/functions.py               | 13 +++++++++++++
 .../sql/catalyst/analysis/Analyzer.scala      |  5 +++++
 .../catalyst/analysis/FunctionRegistry.scala  |  1 +
 .../spark/sql/catalyst/expressions/hash.scala |  9 ++++++++-
 .../ExpressionTypeCheckingSuite.scala         |  1 +
 .../expressions/HashExpressionsSuite.scala    |  5 +++++
 .../org/apache/spark/sql/functions.scala      | 13 +++++++++++++
 .../spark/sql/DataFrameFunctionsSuite.scala   |  4 +++-
 .../org/apache/spark/sql/DataFrameSuite.scala | 19 +++++++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 11 +++++++++++
 14 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 1dcad16cebd8..f9d9494ca6fa 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -408,6 +408,7 @@ exportMethods("%<=>%",
               "weekofyear",
               "when",
               "window",
+              "xxhash64",
               "year")
 
 exportClasses("GroupedData")
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 8f425b12937e..d91896a9a994 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -735,6 +735,25 @@ setMethod("hash",
             column(jc)
           })
 
+#' @details
+#' \code{xxhash64}: Calculates the hash code of given columns using the 64-bit
+#' variant of the xxHash algorithm, and returns the result as a long
+#' column.
+#'
+#' @rdname column_misc_functions
+#' @aliases xxhash64 xxhash64,Column-method
+#' @note xxhash64 since 3.0.0
+setMethod("xxhash64",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function(x) {
+              stopifnot(class(x) == "Column")
+              x@jc
+            })
+            jc <- callJStatic("org.apache.spark.sql.functions", "xxhash64", jcols)
+            column(jc)
+          })
+
 #' @details
 #' \code{dayofmonth}: Extracts the day of the month as an integer from a
 #' given date/timestamp/string.
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index fcb511ed251a..f849dd172247 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1394,6 +1394,10 @@ setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
 #' @name NULL
 setGeneric("window", function(x, ...) { standardGeneric("window") })
 
+#' @rdname column_misc_functions
+#' @name NULL
+setGeneric("xxhash64", function(x, ...) { standardGeneric("xxhash64") })
+
 #' @rdname column_datetime_functions
 #' @name NULL
 setGeneric("year", function(x) { standardGeneric("year") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index c9d6134197b0..cebd0f890ecf 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1390,7 +1390,7 @@ test_that("column functions", {
   c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
   c10 <- sumDistinct(c) + tan(c) + tanh(c) + degrees(c) + radians(c)
   c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
-  c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
+  c12 <- variance(c) + xxhash64(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
   c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
   c14 <- cume_dist() + ntile(1) + corr(c, c1)
   c15 <- dense_rank() + percent_rank() + rank() + row_number()
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 03266138024c..bc28c9d453ec 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1468,6 +1468,19 @@ def hash(*cols):
     return Column(jc)
 
 
+@since(3.0)
+def xxhash64(*cols):
+    """Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm,
+    and returns the result as a long column.
+
+    >>> spark.createDataFrame([('ABC',)], ['a']).select(xxhash64('a').alias('hash')).collect()
+    [Row(hash=4105715581806190027)]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.xxhash64(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
 # ---------------------- String/Binary functions ------------------------------
 
 _string_functions = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ab9cedc1306c..e4cf43d43586 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1047,6 +1047,11 @@ class Analyzer(
             case s: Star => s.expand(child, resolver)
             case o => o :: Nil
           })
+        case p: XxHash64 if containsStar(p.children) =>
+          p.copy(children = p.children.flatMap {
+            case s: Star => s.expand(child, resolver)
+            case o => o :: Nil
+          })
         // count(*) has been replaced by count(1)
         case o if containsStar(o.children) =>
           failAnalysis(s"Invalid usage of '*' in expression '${o.prettyName}'")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index befc02f1a8c5..46cf0f9d16f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -461,6 +461,7 @@ object FunctionRegistry {
     expression[Md5]("md5"),
     expression[Uuid]("uuid"),
     expression[Murmur3Hash]("hash"),
+    expression[XxHash64]("xxhash64"),
     expression[Sha1]("sha"),
     expression[Sha1]("sha1"),
     expression[Sha2]("sha2"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 8d17b07eb544..21c28c525fb3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -594,12 +594,19 @@ object Murmur3HashFunction extends InterpretedHashFunction {
 /**
  * A xxHash64 64-bit hash expression.
  */
+@ExpressionDescription(
+  usage = "_FUNC_(expr1, expr2, ...) - Returns a 64-bit hash value of the arguments.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('Spark', array(123), 2);
+       5602566077635097486
+  """)
 case class XxHash64(children: Seq[Expression], seed: Long) extends HashExpression[Long] {
   def this(arguments: Seq[Expression]) = this(arguments, 42L)
 
   override def dataType: DataType = LongType
 
-  override def prettyName: String = "xxHash"
+  override def prettyName: String = "xxhash64"
 
   override protected def hasherClassName: String = classOf[XXH64].getName
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 3eb3fe66cebc..725764755c62 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -161,6 +161,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
       "input to function coalesce should all be the same type")
     assertError(Coalesce(Nil), "function coalesce requires at least one argument")
     assertError(new Murmur3Hash(Nil), "function hash requires at least one argument")
+    assertError(new XxHash64(Nil), "function xxhash64 requires at least one argument")
     assertError(Explode('intField),
       "input to function explode should be array or map type")
     assertError(PosExplode('intField),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index 555ccb892497..79589c9d03f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -630,6 +630,11 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       val murmursHashEval = Murmur3Hash(exprs, 42).eval(input)
       assert(murmur3HashPlan(input).getInt(0) == murmursHashEval)
 
+      val xxHash64Expr = XxHash64(exprs, 42)
+      val xxHash64Plan = GenerateMutableProjection.generate(Seq(xxHash64Expr))
+      val xxHash64Eval = XxHash64(exprs, 42).eval(input)
+      assert(xxHash64Plan(input).getLong(0) == xxHash64Eval)
+
       val hiveHashExpr = HiveHash(exprs)
       val hiveHashPlan = GenerateMutableProjection.generate(Seq(hiveHashExpr))
       val hiveHashEval = HiveHash(exprs).eval(input)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 1199cd8df6a9..f99186cabc26 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2167,6 +2167,19 @@ object functions {
     new Murmur3Hash(cols.map(_.expr))
   }
 
+  /**
+   * Calculates the hash code of given columns using the 64-bit
+   * variant of the xxHash algorithm, and returns the result as a long
+   * column.
+   *
+   * @group misc_funcs
+   * @since 3.0.0
+   */
+  @scala.annotation.varargs
+  def xxhash64(cols: Column*): Column = withExpr {
+    new XxHash64(cols.map(_.expr))
+  }
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // String functions
   //////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 16b14a669a5d..e5c2de950c2c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -2884,7 +2884,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
       ("coalesce", (df: DataFrame) => df.select(coalesce())) ::
       ("coalesce", (df: DataFrame) => df.selectExpr("coalesce()")) ::
       ("hash", (df: DataFrame) => df.select(hash())) ::
-      ("hash", (df: DataFrame) => df.selectExpr("hash()")) :: Nil
+      ("hash", (df: DataFrame) => df.selectExpr("hash()")) ::
+      ("xxhash64", (df: DataFrame) => df.select(xxhash64())) ::
+      ("xxhash64", (df: DataFrame) => df.selectExpr("xxhash64()")) :: Nil
     funcsMustHaveAtLeastOneArg.foreach { case (name, func) =>
       val errMsg = intercept[AnalysisException] { func(df) }.getMessage
       assert(errMsg.contains(s"input to function $name requires at least one argument"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 3082e0bb97df..6dec129582e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -137,6 +137,25 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       structDf.select(hash($"a", $"record.*")))
   }
 
+  test("Star Expansion - xxhash64") {
+    val structDf = testData2.select("a", "b").as("record")
+    checkAnswer(
+      structDf.groupBy($"a", $"b").agg(min(xxhash64($"a", $"*"))),
+      structDf.groupBy($"a", $"b").agg(min(xxhash64($"a", $"a", $"b"))))
+
+    checkAnswer(
+      structDf.groupBy($"a", $"b").agg(xxhash64($"a", $"*")),
+      structDf.groupBy($"a", $"b").agg(xxhash64($"a", $"a", $"b")))
+
+    checkAnswer(
+      structDf.select(xxhash64($"*")),
+      structDf.select(xxhash64($"record.*")))
+
+    checkAnswer(
+      structDf.select(xxhash64($"a", $"*")),
+      structDf.select(xxhash64($"a", $"record.*")))
+  }
+
   test("Star Expansion - explode should fail with a meaningful message if it takes a star") {
     val df = Seq(("1,2"), ("4"), ("7,8,9")).toDF("csv")
     val e = intercept[AnalysisException] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 7b856262a801..e8d1eccd329d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2047,6 +2047,17 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("xxhash64 function") {
+    val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
+    withTempView("tbl") {
+      df.createOrReplaceTempView("tbl")
+      checkAnswer(
+        df.select(xxhash64($"i", $"j")),
+        sql("SELECT xxhash64(i, j) from tbl")
+      )
+    }
+  }
+
   test("join with using clause") {
     val df1 = Seq(("r1c1", "r1c2", "t1r1c3"),
       ("r2c1", "r2c2", "t1r2c3"), ("r3c1x", "r3c2", "t1r3c3")).toDF("c1", "c2", "c3")

From 1f692e522cefed0fcc10096c5ca5ce77650636f3 Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Wed, 20 Mar 2019 20:16:17 +0900
Subject: [PATCH 0724/1072] [SPARK-27200][WEBUI][HISTORYSERVER] History
 Environment tab must sort Configurations/Properties by default

Environment Page in SparkUI have all the configuration sorted by key. But this is not the case in History server case, to keep UX same, we can have it sorted in history server too

## What changes were proposed in this pull request?

On render of Env page the properties are sorted before creating page

## How was this patch tested?

Manually tested in UI

Closes #24143 from ajithme/historyenv.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/ui/env/EnvironmentPage.scala      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
index cbb8b3c88c65..cf0815bcda09 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
@@ -39,15 +39,15 @@ private[ui] class EnvironmentPage(
       "Scala Version" -> appEnv.runtime.scalaVersion)
 
     val runtimeInformationTable = UIUtils.listingTable(
-      propertyHeader, jvmRow, jvmInformation, fixedWidth = true)
+      propertyHeader, jvmRow, jvmInformation.toSeq.sorted, fixedWidth = true)
     val sparkPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
-      Utils.redact(conf, appEnv.sparkProperties.toSeq), fixedWidth = true)
+      Utils.redact(conf, appEnv.sparkProperties.sorted), fixedWidth = true)
     val hadoopPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
-      Utils.redact(conf, appEnv.hadoopProperties.toSeq), fixedWidth = true)
+      Utils.redact(conf, appEnv.hadoopProperties.sorted), fixedWidth = true)
     val systemPropertiesTable = UIUtils.listingTable(
-      propertyHeader, propertyRow, appEnv.systemProperties, fixedWidth = true)
+      propertyHeader, propertyRow, appEnv.systemProperties.sorted, fixedWidth = true)
     val classpathEntriesTable = UIUtils.listingTable(
-      classPathHeaders, classPathRow, appEnv.classpathEntries, fixedWidth = true)
+      classPathHeaders, classPathRow, appEnv.classpathEntries.sorted, fixedWidth = true)
     val content =
       <span>
         <span class="collapse-aggregated-runtimeInformation collapse-table"

From 1882912cca4921d3d8c8632b3bb34e69e8119791 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 20 Mar 2019 21:28:11 +0900
Subject: [PATCH 0725/1072] [SPARK-27199][SQL] Replace TimeZone by ZoneId in
 TimestampFormatter API

## What changes were proposed in this pull request?

In the PR, I propose to use `ZoneId` instead of `TimeZone` in:
- the `apply` and `getFractionFormatter ` methods of the `TimestampFormatter` object,
- and in implementations of the `TimestampFormatter` trait like `FractionTimestampFormatter`.

The reason of the changes is to avoid unnecessary conversion from `TimeZone` to `ZoneId` because `ZoneId` is used in `TimestampFormatter` implementations internally, and the conversion is performed via `String` which is not for free. Also taking into account that `TimeZone` instances are converted from `String` in some cases, the worse case looks like `String` -> `TimeZone` -> `String` -> `ZoneId`. The PR eliminates the unneeded conversions.

## How was this patch tested?

It was tested by `DateExpressionsSuite`, `DateTimeUtilsSuite` and `TimestampFormatterSuite`.

Closes #24141 from MaxGekk/zone-id.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/catalog/interface.scala      |  3 +-
 .../sql/catalyst/csv/CSVInferSchema.scala     |  2 +-
 .../spark/sql/catalyst/csv/CSVOptions.scala   |  5 ++-
 .../sql/catalyst/csv/UnivocityGenerator.scala |  2 +-
 .../sql/catalyst/csv/UnivocityParser.scala    |  2 +-
 .../spark/sql/catalyst/expressions/Cast.scala |  2 +-
 .../expressions/datetimeExpressions.scala     | 21 ++++++-----
 .../spark/sql/catalyst/json/JSONOptions.scala |  5 ++-
 .../sql/catalyst/json/JacksonGenerator.scala  |  2 +-
 .../sql/catalyst/json/JacksonParser.scala     |  2 +-
 .../sql/catalyst/json/JsonInferSchema.scala   |  2 +-
 .../sql/catalyst/util/DateTimeUtils.scala     |  4 +-
 .../catalyst/util/TimestampFormatter.scala    | 31 ++++++++--------
 .../catalyst/csv/UnivocityParserSuite.scala   |  8 +++-
 .../expressions/DateExpressionsSuite.scala    |  3 +-
 .../sql/catalyst/util/DateTimeTestUtils.scala |  3 +-
 .../catalyst/util/DateTimeUtilsSuite.scala    |  2 +-
 .../sql/util/TimestampFormatterSuite.scala    | 31 +++++++---------
 .../spark/sql/execution/HiveResult.scala      |  2 +-
 .../datasources/PartitioningUtils.scala       | 37 ++++++++++---------
 .../datasources/jdbc/JDBCRelation.scala       |  2 +-
 .../ParquetPartitionDiscoverySuite.scala      | 21 +++++------
 22 files changed, 99 insertions(+), 93 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 69b5cb4512e6..60066371e3dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.catalog
 
 import java.net.URI
+import java.time.ZoneOffset
 import java.util.Date
 
 import scala.collection.mutable
@@ -477,7 +478,7 @@ object CatalogColumnStat extends Logging {
   val VERSION = 2
 
   private def getTimestampFormatter(): TimestampFormatter = {
-    TimestampFormatter(format = "yyyy-MM-dd HH:mm:ss.SSSSSS", timeZone = DateTimeUtils.TimeZoneUTC)
+    TimestampFormatter(format = "yyyy-MM-dd HH:mm:ss.SSSSSS", zoneId = ZoneOffset.UTC)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 4dd41042856d..ae9f0571555e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -29,7 +29,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
 
   private val timestampParser = TimestampFormatter(
     options.timestampFormat,
-    options.timeZone,
+    options.zoneId,
     options.locale)
 
   private val decimalParser = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 90c96d1f55c9..1268fcffcfcd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.catalyst.csv
 
 import java.nio.charset.StandardCharsets
-import java.util.{Locale, TimeZone}
+import java.time.ZoneId
+import java.util.Locale
 
 import com.univocity.parsers.csv.{CsvParserSettings, CsvWriterSettings, UnescapedQuoteHandling}
 
@@ -139,7 +140,7 @@ class CSVOptions(
     name.map(CompressionCodecs.getCodecClassName)
   }
 
-  val timeZone: TimeZone = DateTimeUtils.getTimeZone(
+  val zoneId: ZoneId = DateTimeUtils.getZoneId(
     parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
 
   // A language tag in IETF BCP 47 format
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index f012d96138f3..9ca94501f5c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -43,7 +43,7 @@ class UnivocityGenerator(
 
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
-    options.timeZone,
+    options.zoneId,
     options.locale)
   private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 79dff6fda33d..b26044e483e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -76,7 +76,7 @@ class UnivocityParser(
 
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
-    options.timeZone,
+    options.zoneId,
     options.locale)
   private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index c238ccb5c360..a70ed6d5c10e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -233,7 +233,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
 
   private lazy val dateFormatter = DateFormatter()
-  private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(timeZone)
+  private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
 
   // UDFToString
   private[this] def castToString(from: DataType): Any => Any = from match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 7aa1e70684c1..7878a87572a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, ZoneId}
 import java.time.temporal.IsoFields
 import java.util.{Locale, TimeZone}
 
@@ -49,6 +49,7 @@ trait TimeZoneAwareExpression extends Expression {
   def withTimeZone(timeZoneId: String): TimeZoneAwareExpression
 
   @transient lazy val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneId.get)
+  @transient lazy val zoneId: ZoneId = DateTimeUtils.getZoneId(timeZoneId.get)
 }
 
 /**
@@ -532,16 +533,16 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
     copy(timeZoneId = Option(timeZoneId))
 
   override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
-    val df = TimestampFormatter(format.toString, timeZone)
+    val df = TimestampFormatter(format.toString, zoneId)
     UTF8String.fromString(df.format(timestamp.asInstanceOf[Long]))
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
-    val tz = ctx.addReferenceObj("timeZone", timeZone)
+    val zid = ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId")
     val locale = ctx.addReferenceObj("locale", Locale.US)
     defineCodeGen(ctx, ev, (timestamp, format) => {
-      s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $tz, $locale)
+      s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
           .format($timestamp))"""
     })
   }
@@ -635,7 +636,7 @@ abstract class UnixTime
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
   private lazy val formatter: TimestampFormatter =
     try {
-      TimestampFormatter(constFormat.toString, timeZone)
+      TimestampFormatter(constFormat.toString, zoneId)
     } catch {
       case NonFatal(_) => null
     }
@@ -668,7 +669,7 @@ abstract class UnixTime
           } else {
             val formatString = f.asInstanceOf[UTF8String].toString
             try {
-              TimestampFormatter(formatString, timeZone).parse(
+              TimestampFormatter(formatString, zoneId).parse(
                 t.asInstanceOf[UTF8String].toString) / MICROS_PER_SECOND
             } catch {
               case NonFatal(_) => null
@@ -707,7 +708,7 @@ abstract class UnixTime
             }""")
         }
       case StringType =>
-        val tz = ctx.addReferenceObj("timeZone", timeZone)
+        val tz = ctx.addReferenceObj("zoneId", zoneId)
         val locale = ctx.addReferenceObj("locale", Locale.US)
         val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
         nullSafeCodeGen(ctx, ev, (string, format) => {
@@ -789,7 +790,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
   private lazy val formatter: TimestampFormatter =
     try {
-      TimestampFormatter(constFormat.toString, timeZone)
+      TimestampFormatter(constFormat.toString, zoneId)
     } catch {
       case NonFatal(_) => null
     }
@@ -815,7 +816,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           null
         } else {
           try {
-            UTF8String.fromString(TimestampFormatter(f.toString, timeZone)
+            UTF8String.fromString(TimestampFormatter(f.toString, zoneId)
               .format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
           } catch {
             case NonFatal(_) => null
@@ -846,7 +847,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           }""")
       }
     } else {
-      val tz = ctx.addReferenceObj("timeZone", timeZone)
+      val tz = ctx.addReferenceObj("zoneId", zoneId)
       val locale = ctx.addReferenceObj("locale", Locale.US)
       val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
       nullSafeCodeGen(ctx, ev, (seconds, f) => {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 1ec9d5093a78..788eb00ba016 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.catalyst.json
 
 import java.nio.charset.{Charset, StandardCharsets}
-import java.util.{Locale, TimeZone}
+import java.time.ZoneId
+import java.util.Locale
 
 import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
 
@@ -78,7 +79,7 @@ private[sql] class JSONOptions(
   // A language tag in IETF BCP 47 format
   val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
 
-  val timeZone: TimeZone = DateTimeUtils.getTimeZone(
+  val zoneId: ZoneId = DateTimeUtils.getZoneId(
     parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
 
   val dateFormat: String = parameters.getOrElse("dateFormat", "yyyy-MM-dd")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 951f5190cd50..3378040d1b64 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -79,7 +79,7 @@ private[sql] class JacksonGenerator(
 
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
-    options.timeZone,
+    options.zoneId,
     options.locale)
   private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 8cf758e26e29..19bc5bf3b29e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -57,7 +57,7 @@ class JacksonParser(
 
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
-    options.timeZone,
+    options.zoneId,
     options.locale)
   private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 1fb459498201..c5a97c7b8835 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -39,7 +39,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
 
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
-    options.timeZone,
+    options.zoneId,
     options.locale)
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 627ee143d8fc..45d2406e6624 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -70,9 +70,9 @@ object DateTimeUtils {
 
   def defaultTimeZone(): TimeZone = TimeZone.getDefault()
 
+  def getZoneId(timeZoneId: String): ZoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS)
   def getTimeZone(timeZoneId: String): TimeZone = {
-    val zoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS)
-    TimeZone.getTimeZone(zoneId)
+    TimeZone.getTimeZone(getZoneId(timeZoneId))
   }
 
   // we should use the exact day as Int, for example, (year, month, day) -> day
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index c079691a2fa9..f2a1a95d1dc5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -44,17 +44,16 @@ sealed trait TimestampFormatter extends Serializable {
 
 class Iso8601TimestampFormatter(
     pattern: String,
-    timeZone: TimeZone,
+    zoneId: ZoneId,
     locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
   @transient
   protected lazy val formatter = getOrCreateFormatter(pattern, locale)
-  private val timeZoneId = timeZone.toZoneId
 
   override def parse(s: String): Long = {
     val parsed = formatter.parse(s)
     val parsedZoneId = parsed.query(TemporalQueries.zone())
-    val zoneId = if (parsedZoneId == null) timeZoneId else parsedZoneId
-    val zonedDateTime = toZonedDateTime(parsed, zoneId)
+    val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
+    val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
     val epochSeconds = zonedDateTime.toEpochSecond
     val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
 
@@ -63,7 +62,7 @@ class Iso8601TimestampFormatter(
 
   override def format(us: Long): String = {
     val instant = DateTimeUtils.microsToInstant(us)
-    formatter.withZone(timeZoneId).format(instant)
+    formatter.withZone(zoneId).format(instant)
   }
 }
 
@@ -73,10 +72,10 @@ class Iso8601TimestampFormatter(
  * output trailing zeros in the fraction. For example, the timestamp `2019-03-05 15:00:01.123400` is
  * formatted as the string `2019-03-05 15:00:01.1234`.
  *
- * @param timeZone the time zone in which the formatter parses or format timestamps
+ * @param zoneId the time zone identifier in which the formatter parses or format timestamps
  */
-class FractionTimestampFormatter(timeZone: TimeZone)
-  extends Iso8601TimestampFormatter("", timeZone, TimestampFormatter.defaultLocale) {
+class FractionTimestampFormatter(zoneId: ZoneId)
+  extends Iso8601TimestampFormatter("", zoneId, TimestampFormatter.defaultLocale) {
 
   @transient
   override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
@@ -86,19 +85,19 @@ object TimestampFormatter {
   val defaultPattern: String = "yyyy-MM-dd HH:mm:ss"
   val defaultLocale: Locale = Locale.US
 
-  def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = {
-    new Iso8601TimestampFormatter(format, timeZone, locale)
+  def apply(format: String, zoneId: ZoneId, locale: Locale): TimestampFormatter = {
+    new Iso8601TimestampFormatter(format, zoneId, locale)
   }
 
-  def apply(format: String, timeZone: TimeZone): TimestampFormatter = {
-    apply(format, timeZone, defaultLocale)
+  def apply(format: String, zoneId: ZoneId): TimestampFormatter = {
+    apply(format, zoneId, defaultLocale)
   }
 
-  def apply(timeZone: TimeZone): TimestampFormatter = {
-    apply(defaultPattern, timeZone, defaultLocale)
+  def apply(zoneId: ZoneId): TimestampFormatter = {
+    apply(defaultPattern, zoneId, defaultLocale)
   }
 
-  def getFractionFormatter(timeZone: TimeZone): TimestampFormatter = {
-    new FractionTimestampFormatter(timeZone)
+  def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = {
+    new FractionTimestampFormatter(zoneId)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 4ae61bc61255..986de122c8d2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -116,7 +116,9 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
     val customTimestamp = "31/01/2015 00:00"
     var format = FastDateFormat.getInstance(
-      timestampsOptions.timestampFormat, timestampsOptions.timeZone, timestampsOptions.locale)
+      timestampsOptions.timestampFormat,
+      TimeZone.getTimeZone(timestampsOptions.zoneId),
+      timestampsOptions.locale)
     val expectedTime = format.parse(customTimestamp).getTime
     val castedTimestamp = parser.makeConverter("_1", TimestampType, nullable = true)
         .apply(customTimestamp)
@@ -126,7 +128,9 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     val dateOptions = new CSVOptions(Map("dateFormat" -> "dd/MM/yyyy"), false, "GMT")
     parser = new UnivocityParser(StructType(Seq.empty), dateOptions)
     format = FastDateFormat.getInstance(
-      dateOptions.dateFormat, dateOptions.timeZone, dateOptions.locale)
+      dateOptions.dateFormat,
+      TimeZone.getTimeZone(dateOptions.zoneId),
+      dateOptions.locale)
     val expectedDate = format.parse(customDate).getTime
     val castedDate = parser.makeConverter("_1", DateType, nullable = true)
         .apply(customDate)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 62d194f7f66a..61ee8f0cd46d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
+import java.time.ZoneOffset
 import java.util.{Calendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 import java.util.concurrent.TimeUnit._
@@ -43,7 +44,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   val jstId = Option(TimeZoneJST.getID)
 
   def toMillis(timestamp: String): Long = {
-    val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", TimeZoneGMT)
+    val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", ZoneOffset.UTC)
     TimeUnit.MICROSECONDS.toMillis(tf.parse(timestamp))
   }
   val date = "2015-04-08 13:10:15"
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
index c35ab2bfb1c1..4dfeb85c74f9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.{LocalDate, LocalDateTime, LocalTime}
+import java.time.{LocalDate, LocalDateTime, LocalTime, ZoneId}
 import java.util.TimeZone
 import java.util.concurrent.TimeUnit
 
@@ -40,6 +40,7 @@ object DateTimeTestUtils {
     "Asia/Hong_Kong",
     "Europe/Amsterdam")
   val outstandingTimezones: Seq[TimeZone] = outstandingTimezonesIds.map(TimeZone.getTimeZone)
+  val outstandingZoneIds: Seq[ZoneId] = outstandingTimezonesIds.map(DateTimeUtils.getZoneId)
 
   def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = {
     val originalDefaultTimeZone = TimeZone.getDefault
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 39eb7d144988..464d0ab933b2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -33,7 +33,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   private def defaultTz = DateTimeUtils.defaultTimeZone()
 
   test("nanoseconds truncation") {
-    val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone())
+    val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId)
     def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
       val parsedTimestampOp = DateTimeUtils.stringToTimestamp(
         UTF8String.fromString(originalTime), defaultTz)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index 1675b61d07db..d10c30c14d87 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -17,13 +17,12 @@
 
 package org.apache.spark.sql.util
 
-import java.time.{LocalDateTime, ZoneOffset}
-import java.util.TimeZone
+import java.time.{LocalDateTime, ZoneId, ZoneOffset}
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, TimestampFormatter}
 
 class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
 
@@ -38,12 +37,12 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
       "Antarctica/Vostok" -> 1543723872001234L,
       "Asia/Hong_Kong" -> 1543716672001234L,
       "Europe/Amsterdam" -> 1543741872001234L)
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId =>
       val formatter = TimestampFormatter(
         "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
-        TimeZone.getTimeZone(timeZone))
+        DateTimeUtils.getZoneId(zoneId))
       val microsSinceEpoch = formatter.parse(localDate)
-      assert(microsSinceEpoch === expectedMicros(timeZone))
+      assert(microsSinceEpoch === expectedMicros(zoneId))
     }
   }
 
@@ -58,12 +57,12 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
       "Antarctica/Vostok" -> "2018-12-02T16:11:12.001234",
       "Asia/Hong_Kong" -> "2018-12-02T18:11:12.001234",
       "Europe/Amsterdam" -> "2018-12-02T11:11:12.001234")
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId =>
       val formatter = TimestampFormatter(
         "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
-        TimeZone.getTimeZone(timeZone))
+        DateTimeUtils.getZoneId(zoneId))
       val timestamp = formatter.format(microsSinceEpoch)
-      assert(timestamp === expectedTimestamp(timeZone))
+      assert(timestamp === expectedTimestamp(zoneId))
     }
   }
 
@@ -79,8 +78,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
         1543749753123456L,
         2177456523456789L,
         11858049903010203L).foreach { micros =>
-        DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
-          val formatter = TimestampFormatter(pattern, timeZone)
+        DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+          val formatter = TimestampFormatter(pattern, zoneId)
           val timestamp = formatter.format(micros)
           val parsed = formatter.parse(timestamp)
           assert(micros === parsed)
@@ -100,8 +99,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
       "2018-12-02T11:22:33.123456",
       "2039-01-01T01:02:03.456789",
       "2345-10-07T22:45:03.010203").foreach { timestamp =>
-      DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
-        val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", timeZone)
+      DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+        val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", zoneId)
         val micros = formatter.parse(timestamp)
         val formatted = formatter.format(micros)
         assert(timestamp === formatted)
@@ -110,16 +109,14 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test(" case insensitive parsing of am and pm") {
-    val formatter = TimestampFormatter(
-      "yyyy MMM dd hh:mm:ss a",
-      TimeZone.getTimeZone("UTC"))
+    val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", ZoneOffset.UTC)
     val micros = formatter.parse("2009 Mar 20 11:30:01 am")
     assert(micros === TimeUnit.SECONDS.toMicros(
       LocalDateTime.of(2009, 3, 20, 11, 30, 1).toEpochSecond(ZoneOffset.UTC)))
   }
 
   test("format fraction of second") {
-    val formatter = TimestampFormatter.getFractionFormatter(TimeZone.getTimeZone("UTC"))
+    val formatter = TimestampFormatter.getFractionFormatter(ZoneOffset.UTC)
     assert(formatter.format(0) === "1970-01-01 00:00:00")
     assert(formatter.format(1) === "1970-01-01 00:00:00.000001")
     assert(formatter.format(1000) === "1970-01-01 00:00:00.001")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 38ef72ebfad2..eec8d70b5adf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -79,7 +79,7 @@ object HiveResult {
 
   private lazy val dateFormatter = DateFormatter()
   private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(
-    DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone))
+    DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
 
   /** Hive outputs fields of structs slightly differently than top level attributes. */
   private def toHiveStructString(a: (Any, DataType)): String = a match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 0625cfb772da..6f4242335309 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.execution.datasources
 
 import java.lang.{Double => JDouble, Long => JLong}
 import java.math.{BigDecimal => JBigDecimal}
-import java.util.{Locale, TimeZone}
+import java.time.ZoneId
+import java.util.Locale
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
@@ -100,7 +101,7 @@ object PartitioningUtils {
       validatePartitionColumns: Boolean,
       timeZoneId: String): PartitionSpec = {
     parsePartitions(paths, typeInference, basePaths, userSpecifiedSchema, caseSensitive,
-      validatePartitionColumns, DateTimeUtils.getTimeZone(timeZoneId))
+      validatePartitionColumns, DateTimeUtils.getZoneId(timeZoneId))
   }
 
   private[datasources] def parsePartitions(
@@ -110,7 +111,7 @@ object PartitioningUtils {
       userSpecifiedSchema: Option[StructType],
       caseSensitive: Boolean,
       validatePartitionColumns: Boolean,
-      timeZone: TimeZone): PartitionSpec = {
+      zoneId: ZoneId): PartitionSpec = {
     val userSpecifiedDataTypes = if (userSpecifiedSchema.isDefined) {
       val nameToDataType = userSpecifiedSchema.get.fields.map(f => f.name -> f.dataType).toMap
       if (!caseSensitive) {
@@ -130,11 +131,11 @@ object PartitioningUtils {
     }
 
     val dateFormatter = DateFormatter()
-    val timestampFormatter = TimestampFormatter(timestampPartitionPattern, timeZone)
+    val timestampFormatter = TimestampFormatter(timestampPartitionPattern, zoneId)
     // First, we need to parse every partition's path and see if we can find partition values.
     val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
       parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes,
-        validatePartitionColumns, timeZone, dateFormatter, timestampFormatter)
+        validatePartitionColumns, zoneId, dateFormatter, timestampFormatter)
     }.unzip
 
     // We create pairs of (path -> path's partition value) here
@@ -169,7 +170,7 @@ object PartitioningUtils {
           "please load them separately and then union them.")
 
       val resolvedPartitionValues =
-        resolvePartitions(pathsWithPartitionValues, caseSensitive, timeZone)
+        resolvePartitions(pathsWithPartitionValues, caseSensitive, zoneId)
 
       // Creates the StructType which represents the partition columns.
       val fields = {
@@ -219,7 +220,7 @@ object PartitioningUtils {
       basePaths: Set[Path],
       userSpecifiedDataTypes: Map[String, DataType],
       validatePartitionColumns: Boolean,
-      timeZone: TimeZone,
+      zoneId: ZoneId,
       dateFormatter: DateFormatter,
       timestampFormatter: TimestampFormatter): (Option[PartitionValues], Option[Path]) = {
     val columns = ArrayBuffer.empty[(String, Literal)]
@@ -243,7 +244,7 @@ object PartitioningUtils {
         // Once we get the string, we try to parse it and find the partition column and value.
         val maybeColumn =
           parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes,
-            validatePartitionColumns, timeZone, dateFormatter, timestampFormatter)
+            validatePartitionColumns, zoneId, dateFormatter, timestampFormatter)
         maybeColumn.foreach(columns += _)
 
         // Now, we determine if we should stop.
@@ -278,7 +279,7 @@ object PartitioningUtils {
       typeInference: Boolean,
       userSpecifiedDataTypes: Map[String, DataType],
       validatePartitionColumns: Boolean,
-      timeZone: TimeZone,
+      zoneId: ZoneId,
       dateFormatter: DateFormatter,
       timestampFormatter: TimestampFormatter): Option[(String, Literal)] = {
     val equalSignIndex = columnSpec.indexOf('=')
@@ -298,11 +299,11 @@ object PartitioningUtils {
         val columnValueLiteral = inferPartitionColumnValue(
           rawColumnValue,
           false,
-          timeZone,
+          zoneId,
           dateFormatter,
           timestampFormatter)
         val columnValue = columnValueLiteral.eval()
-        val castedValue = Cast(columnValueLiteral, dataType, Option(timeZone.getID)).eval()
+        val castedValue = Cast(columnValueLiteral, dataType, Option(zoneId.getId)).eval()
         if (validatePartitionColumns && columnValue != null && castedValue == null) {
           throw new RuntimeException(s"Failed to cast value `$columnValue` to `$dataType` " +
             s"for partition column `$columnName`")
@@ -312,7 +313,7 @@ object PartitioningUtils {
         inferPartitionColumnValue(
           rawColumnValue,
           typeInference,
-          timeZone,
+          zoneId,
           dateFormatter,
           timestampFormatter)
       }
@@ -384,7 +385,7 @@ object PartitioningUtils {
   def resolvePartitions(
       pathsWithPartitionValues: Seq[(Path, PartitionValues)],
       caseSensitive: Boolean,
-      timeZone: TimeZone): Seq[PartitionValues] = {
+      zoneId: ZoneId): Seq[PartitionValues] = {
     if (pathsWithPartitionValues.isEmpty) {
       Seq.empty
     } else {
@@ -401,7 +402,7 @@ object PartitioningUtils {
       val values = pathsWithPartitionValues.map(_._2)
       val columnCount = values.head.columnNames.size
       val resolvedValues = (0 until columnCount).map { i =>
-        resolveTypeConflicts(values.map(_.literals(i)), timeZone)
+        resolveTypeConflicts(values.map(_.literals(i)), zoneId)
       }
 
       // Fills resolved literals back to each partition
@@ -467,7 +468,7 @@ object PartitioningUtils {
   private[datasources] def inferPartitionColumnValue(
       raw: String,
       typeInference: Boolean,
-      timeZone: TimeZone,
+      zoneId: ZoneId,
       dateFormatter: DateFormatter,
       timestampFormatter: TimestampFormatter): Literal = {
     val decimalTry = Try {
@@ -503,7 +504,7 @@ object PartitioningUtils {
       // TimestampType
       timestampFormatter.parse(unescapedRaw)
       // SPARK-23436: see comment for date
-      val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval()
+      val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(zoneId.getId)).eval()
       // Disallow TimestampType if the cast returned null
       require(timestampValue != null)
       Literal.create(timestampValue, TimestampType)
@@ -607,12 +608,12 @@ object PartitioningUtils {
    * Given a collection of [[Literal]]s, resolves possible type conflicts by
    * [[findWiderTypeForPartitionColumn]].
    */
-  private def resolveTypeConflicts(literals: Seq[Literal], timeZone: TimeZone): Seq[Literal] = {
+  private def resolveTypeConflicts(literals: Seq[Literal], zoneId: ZoneId): Seq[Literal] = {
     val litTypes = literals.map(_.dataType)
     val desiredType = litTypes.reduce(findWiderTypeForPartitionColumn)
 
     literals.map { case l @ Literal(_, dataType) =>
-      Literal.create(Cast(l, desiredType, Some(timeZone.getID)).eval(), desiredType)
+      Literal.create(Cast(l, desiredType, Some(zoneId.getId)).eval(), desiredType)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 724a0f3ba679..fe45d67a8398 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -200,7 +200,7 @@ private[sql] object JDBCRelation extends Logging {
         case DateType => DateFormatter().format(value.toInt)
         case TimestampType =>
           val timestampFormatter = TimestampFormatter.getFractionFormatter(
-            DateTimeUtils.getTimeZone(timeZoneId))
+            DateTimeUtils.getZoneId(timeZoneId))
           DateTimeUtils.timestampToString(timestampFormatter, value)
       }
       s"'$dateTimeStr'"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 864c1e99fbfb..febbe054641c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
 import java.io.File
 import java.math.BigInteger
 import java.sql.{Date, Timestamp}
+import java.time.{ZoneId, ZoneOffset}
 import java.util.{Calendar, Locale, TimeZone}
 
 import scala.collection.mutable.ArrayBuffer
@@ -36,7 +37,6 @@ import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, Timesta
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition}
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
@@ -54,10 +54,9 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
   val defaultPartitionName = ExternalCatalogUtils.DEFAULT_PARTITION_NAME
 
-  val timeZone = TimeZone.getDefault()
-  val timeZoneId = timeZone.getID
+  val timeZoneId = ZoneId.systemDefault()
   val df = DateFormatter()
-  val tf = TimestampFormatter(timestampPartitionPattern, timeZone)
+  val tf = TimestampFormatter(timestampPartitionPattern, timeZoneId)
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()
@@ -70,8 +69,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
   }
 
   test("column type inference") {
-    def check(raw: String, literal: Literal, timeZone: TimeZone = timeZone): Unit = {
-      assert(inferPartitionColumnValue(raw, true, timeZone, df, tf) === literal)
+    def check(raw: String, literal: Literal, zoneId: ZoneId = timeZoneId): Unit = {
+      assert(inferPartitionColumnValue(raw, true, zoneId, df, tf) === literal)
     }
 
     check("10", Literal.create(10, IntegerType))
@@ -90,7 +89,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
     c.set(Calendar.MILLISECOND, 0)
     check("1990-02-24 12:00:30",
       Literal.create(new Timestamp(c.getTimeInMillis), TimestampType),
-      TimeZone.getTimeZone("GMT"))
+      ZoneOffset.UTC)
 
     check(defaultPartitionName, Literal.create(null, NullType))
   }
@@ -199,13 +198,13 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
   test("parse partition") {
     def check(path: String, expected: Option[PartitionValues]): Unit = {
       val actual = parsePartition(new Path(path), true, Set.empty[Path],
-        Map.empty, true, timeZone, df, tf)._1
+        Map.empty, true, timeZoneId, df, tf)._1
       assert(expected === actual)
     }
 
     def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = {
       val message = intercept[T] {
-        parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZone, df, tf)
+        parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZoneId, df, tf)
       }.getMessage
 
       assert(message.contains(expected))
@@ -251,7 +250,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       basePaths = Set(new Path("file://path/a=10")),
       Map.empty,
       true,
-      timeZone = timeZone,
+      zoneId = timeZoneId,
       df,
       tf)._1
 
@@ -264,7 +263,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       basePaths = Set(new Path("file://path")),
       Map.empty,
       true,
-      timeZone = timeZone,
+      zoneId = timeZoneId,
       df,
       tf)._1
 

From ef2d63bfb1859232d166320896957c0543c059cc Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 20 Mar 2019 21:29:13 +0900
Subject: [PATCH 0726/1072] [SPARK-27201][WEBUI] Toggle full job description on
 click

## What changes were proposed in this pull request?

Previously, in https://github.com/apache/spark/pull/6646 there was an improvement to show full job description after double clicks.
I think this is a bit hard to be noticed by some users. I suggest changing the event to one click.
Also, after the full description is shown, another click should be able to hide the overflow text again.

Before click:
![short](https://user-images.githubusercontent.com/1097932/54608784-79bfca80-4a8c-11e9-912b-30799be0d6cb.png)

After click:
![full](https://user-images.githubusercontent.com/1097932/54608790-7b898e00-4a8c-11e9-9251-86061158db68.png)

Click again:
![short](https://user-images.githubusercontent.com/1097932/54608784-79bfca80-4a8c-11e9-912b-30799be0d6cb.png)

## How was this patch tested?

Manually check.

Closes #24145 from gengliangwang/showDescriptionDetail.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/ui/static/additional-metrics.js        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js b/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js
index 3c8ddddf07b1..3798dc47529a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js
@@ -83,8 +83,8 @@ $(function() {
         $(this).parent().find('input[type="checkbox"]').trigger('click');
     });
 
-    // Trigger a double click on the span to show full job description.
-    $(".description-input").dblclick(function() {
-        $(this).removeClass("description-input").addClass("description-input-full");
+    // Show/hide full job description on click event.
+    $(".description-input").click(function() {
+        $(this).toggleClass("description-input-full");
     });
 });

From c65f9b2bc35c2926bb3658f65fe4f8a0b8e9fe4a Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Wed, 20 Mar 2019 09:12:52 -0500
Subject: [PATCH 0727/1072] [SPARK-26839][SQL] Work around classloader changes
 in Java 9 for Hive isolation

Note, this doesn't really resolve the JIRA, but makes the changes we can make so far that would be required to solve it.

## What changes were proposed in this pull request?

Java 9+ changed how ClassLoaders work. The two most salient points:
- The boot classloader no longer 'sees' the platform classes. A new 'platform classloader' does and should be the parent of new ClassLoaders
- The system classloader is no longer a URLClassLoader, so we can't get the URLs of JARs in its classpath

## How was this patch tested?

We'll see whether Java 8 tests still pass here. Java 11 tests do not fully pass at this point; more notes below. This does make progress on the failures though.

(NB: to test with Java 11, you need to build with Java 8 first, setting JAVA_HOME and java's executable correctly, then switch both to Java 11 for testing.)

Closes #24057 from srowen/SPARK-26839.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveUtils.scala | 20 +++--
 .../hive/client/IsolatedClientLoader.scala    | 77 +++++++++++--------
 2 files changed, 59 insertions(+), 38 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 38bbe643f5fa..a7f40c6bff0b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.hive
 
 import java.io.File
 import java.net.{URL, URLClassLoader}
-import java.nio.charset.StandardCharsets
-import java.sql.Timestamp
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
@@ -28,12 +26,11 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 import scala.language.implicitConversions
 
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.common.`type`.HiveDecimal
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
 import org.apache.hadoop.util.VersionInfo
 
 import org.apache.spark.{SparkConf, SparkContext}
@@ -329,10 +326,17 @@ private[spark] object HiveUtils extends Logging {
 
       val classLoader = Utils.getContextOrSparkClassLoader
       val jars = allJars(classLoader)
-      if (jars.length == 0) {
-        throw new IllegalArgumentException(
-          "Unable to locate hive jars to connect to metastore. " +
-            s"Please set ${HIVE_METASTORE_JARS.key}.")
+      if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+        // Do nothing. The system classloader is no longer a URLClassLoader in Java 9,
+        // so it won't match the case in allJars above. It no longer exposes URLs of
+        // the system classpath
+      } else {
+        // Verify at least one jar was found
+        if (jars.length == 0) {
+          throw new IllegalArgumentException(
+            "Unable to locate hive jars to connect to metastore. " +
+              s"Please set ${HIVE_METASTORE_JARS.key}.")
+        }
       }
 
       logInfo(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index efa97b2aa4e2..98999eb12400 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -25,6 +25,7 @@ import java.util
 import scala.util.Try
 
 import org.apache.commons.io.{FileUtils, IOUtils}
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 
@@ -157,7 +158,6 @@ private[hive] object IsolatedClientLoader extends Logging {
  * @param isolationOn When true, custom versions of barrier classes will be constructed.  Must be
  *                    true unless loading the version of hive that is on Sparks classloader.
  * @param sharesHadoopClasses When true, we will share Hadoop classes between Spark and
- * @param rootClassLoader The system root classloader. Must not know about Hive classes.
  * @param baseClassLoader The spark classloader that is used to load shared classes.
  */
 private[hive] class IsolatedClientLoader(
@@ -168,15 +168,11 @@ private[hive] class IsolatedClientLoader(
     val config: Map[String, String] = Map.empty,
     val isolationOn: Boolean = true,
     val sharesHadoopClasses: Boolean = true,
-    val rootClassLoader: ClassLoader = ClassLoader.getSystemClassLoader.getParent.getParent,
     val baseClassLoader: ClassLoader = Thread.currentThread().getContextClassLoader,
     val sharedPrefixes: Seq[String] = Seq.empty,
     val barrierPrefixes: Seq[String] = Seq.empty)
   extends Logging {
 
-  // Check to make sure that the root classloader does not know about Hive.
-  assert(Try(rootClassLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure)
-
   /** All jars used by the hive specific classloader. */
   protected def allJars = execJars.toArray
 
@@ -191,8 +187,8 @@ private[hive] class IsolatedClientLoader(
     (sharesHadoopClasses && isHadoopClass) ||
     name.startsWith("scala.") ||
     (name.startsWith("com.google") && !name.startsWith("com.google.cloud")) ||
-    name.startsWith("java.lang.") ||
-    name.startsWith("java.net") ||
+    name.startsWith("java.") ||
+    name.startsWith("javax.sql.") ||
     sharedPrefixes.exists(name.startsWith)
   }
 
@@ -214,30 +210,51 @@ private[hive] class IsolatedClientLoader(
   private[hive] val classLoader: MutableURLClassLoader = {
     val isolatedClassLoader =
       if (isolationOn) {
-        new URLClassLoader(allJars, rootClassLoader) {
-          override def loadClass(name: String, resolve: Boolean): Class[_] = {
-            val loaded = findLoadedClass(name)
-            if (loaded == null) doLoadClass(name, resolve) else loaded
-          }
-          def doLoadClass(name: String, resolve: Boolean): Class[_] = {
-            val classFileName = name.replaceAll("\\.", "/") + ".class"
-            if (isBarrierClass(name)) {
-              // For barrier classes, we construct a new copy of the class.
-              val bytes = IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName))
-              logDebug(s"custom defining: $name - ${util.Arrays.hashCode(bytes)}")
-              defineClass(name, bytes, 0, bytes.length)
-            } else if (!isSharedClass(name)) {
-              logDebug(s"hive class: $name - ${getResource(classToPath(name))}")
-              super.loadClass(name, resolve)
+        if (allJars.isEmpty) {
+          // See HiveUtils; this is the Java 9+ + builtin mode scenario
+          baseClassLoader
+        } else {
+          val rootClassLoader: ClassLoader =
+            if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+              // In Java 9, the boot classloader can see few JDK classes. The intended parent
+              // classloader for delegation is now the platform classloader.
+              // See http://java9.wtf/class-loading/
+              val platformCL =
+              classOf[ClassLoader].getMethod("getPlatformClassLoader").
+                invoke(null).asInstanceOf[ClassLoader]
+              // Check to make sure that the root classloader does not know about Hive.
+              assert(Try(platformCL.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure)
+              platformCL
             } else {
-              // For shared classes, we delegate to baseClassLoader, but fall back in case the
-              // class is not found.
-              logDebug(s"shared class: $name")
-              try {
-                baseClassLoader.loadClass(name)
-              } catch {
-                case _: ClassNotFoundException =>
-                  super.loadClass(name, resolve)
+              // The boot classloader is represented by null (the instance itself isn't accessible)
+              // and before Java 9 can see all JDK classes
+              null
+            }
+          new URLClassLoader(allJars, rootClassLoader) {
+            override def loadClass(name: String, resolve: Boolean): Class[_] = {
+              val loaded = findLoadedClass(name)
+              if (loaded == null) doLoadClass(name, resolve) else loaded
+            }
+            def doLoadClass(name: String, resolve: Boolean): Class[_] = {
+              val classFileName = name.replaceAll("\\.", "/") + ".class"
+              if (isBarrierClass(name)) {
+                // For barrier classes, we construct a new copy of the class.
+                val bytes = IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName))
+                logDebug(s"custom defining: $name - ${util.Arrays.hashCode(bytes)}")
+                defineClass(name, bytes, 0, bytes.length)
+              } else if (!isSharedClass(name)) {
+                logDebug(s"hive class: $name - ${getResource(classToPath(name))}")
+                super.loadClass(name, resolve)
+              } else {
+                // For shared classes, we delegate to baseClassLoader, but fall back in case the
+                // class is not found.
+                logDebug(s"shared class: $name")
+                try {
+                  baseClassLoader.loadClass(name)
+                } catch {
+                  case _: ClassNotFoundException =>
+                    super.loadClass(name, resolve)
+                }
               }
             }
           }

From ec5e34205a8b0e2f6bc4287b86e7eac269452ffb Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 20 Mar 2019 11:48:06 -0700
Subject: [PATCH 0728/1072] [SPARK-27094][YARN] Work around RackResolver
 swallowing thread interrupt.

To avoid the case where the YARN libraries would swallow the exception and
prevent YarnAllocator from shutting down, call the offending code in a
separate thread, so that the parent thread can respond appropriately to
the shut down.

As a safeguard, also explicitly stop the executor launch thread pool when
shutting down the application, to prevent new executors from coming up
after the application started its shutdown.

Tested with unit tests + some internal tests on real cluster.

Closes #24017 from vanzin/SPARK-27094.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/deploy/yarn/ApplicationMaster.scala | 154 +++++++++---------
 .../spark/deploy/yarn/YarnAllocator.scala     |  45 ++++-
 2 files changed, 120 insertions(+), 79 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 9ed3b78986c0..743c2e015232 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -550,88 +550,94 @@ private[spark] class ApplicationMaster(
     reporterThread.join()
   }
 
-  private def launchReporterThread(): Thread = {
-    // The number of failures in a row until Reporter thread give up
+  private def allocationThreadImpl(): Unit = {
+    // The number of failures in a row until the allocation thread gives up.
     val reporterMaxFailures = sparkConf.get(MAX_REPORTER_THREAD_FAILURES)
-
-    val t = new Thread {
-      override def run() {
-        var failureCount = 0
-        while (!finished) {
-          try {
-            if (allocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
-              finish(FinalApplicationStatus.FAILED,
-                ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
-                s"Max number of executor failures ($maxNumExecutorFailures) reached")
-            } else if (allocator.isAllNodeBlacklisted) {
-              finish(FinalApplicationStatus.FAILED,
-                ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
-                "Due to executor failures all available nodes are blacklisted")
-            } else {
-              logDebug("Sending progress")
-              allocator.allocateResources()
-            }
-            failureCount = 0
-          } catch {
-            case i: InterruptedException => // do nothing
-            case e: ApplicationAttemptNotFoundException =>
-              failureCount += 1
-              logError("Exception from Reporter thread.", e)
-              finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_REPORTER_FAILURE,
-                e.getMessage)
-            case e: Throwable =>
-              failureCount += 1
-              if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
-                finish(FinalApplicationStatus.FAILED,
-                  ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
-                    s"$failureCount time(s) from Reporter thread.")
-              } else {
-                logWarning(s"Reporter thread fails $failureCount time(s) in a row.", e)
-              }
+    var failureCount = 0
+    while (!finished) {
+      try {
+        if (allocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
+          finish(FinalApplicationStatus.FAILED,
+            ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
+            s"Max number of executor failures ($maxNumExecutorFailures) reached")
+        } else if (allocator.isAllNodeBlacklisted) {
+          finish(FinalApplicationStatus.FAILED,
+            ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
+            "Due to executor failures all available nodes are blacklisted")
+        } else {
+          logDebug("Sending progress")
+          allocator.allocateResources()
+        }
+        failureCount = 0
+      } catch {
+        case i: InterruptedException => // do nothing
+        case e: ApplicationAttemptNotFoundException =>
+          failureCount += 1
+          logError("Exception from Reporter thread.", e)
+          finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_REPORTER_FAILURE,
+            e.getMessage)
+        case e: Throwable =>
+          failureCount += 1
+          if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
+            finish(FinalApplicationStatus.FAILED,
+              ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
+                s"$failureCount time(s) from Reporter thread.")
+          } else {
+            logWarning(s"Reporter thread fails $failureCount time(s) in a row.", e)
           }
-          try {
-            val numPendingAllocate = allocator.getPendingAllocate.size
-            var sleepStartNs = 0L
-            var sleepInterval = 200L // ms
-            allocatorLock.synchronized {
-              sleepInterval =
-                if (numPendingAllocate > 0 || allocator.getNumPendingLossReasonRequests > 0) {
-                  val currentAllocationInterval =
-                    math.min(heartbeatInterval, nextAllocationInterval)
-                  nextAllocationInterval = currentAllocationInterval * 2 // avoid overflow
-                  currentAllocationInterval
-                } else {
-                  nextAllocationInterval = initialAllocationInterval
-                  heartbeatInterval
-                }
-              sleepStartNs = System.nanoTime()
-              allocatorLock.wait(sleepInterval)
-            }
-            val sleepDuration = System.nanoTime() - sleepStartNs
-            if (sleepDuration < TimeUnit.MILLISECONDS.toNanos(sleepInterval)) {
-              // log when sleep is interrupted
-              logDebug(s"Number of pending allocations is $numPendingAllocate. " +
-                  s"Slept for $sleepDuration/$sleepInterval ms.")
-              // if sleep was less than the minimum interval, sleep for the rest of it
-              val toSleep = math.max(0, initialAllocationInterval - sleepDuration)
-              if (toSleep > 0) {
-                logDebug(s"Going back to sleep for $toSleep ms")
-                // use Thread.sleep instead of allocatorLock.wait. there is no need to be woken up
-                // by the methods that signal allocatorLock because this is just finishing the min
-                // sleep interval, which should happen even if this is signalled again.
-                Thread.sleep(toSleep)
-              }
+      }
+      try {
+        val numPendingAllocate = allocator.getPendingAllocate.size
+        var sleepStartNs = 0L
+        var sleepInterval = 200L // ms
+        allocatorLock.synchronized {
+          sleepInterval =
+            if (numPendingAllocate > 0 || allocator.getNumPendingLossReasonRequests > 0) {
+              val currentAllocationInterval =
+                math.min(heartbeatInterval, nextAllocationInterval)
+              nextAllocationInterval = currentAllocationInterval * 2 // avoid overflow
+              currentAllocationInterval
             } else {
-              logDebug(s"Number of pending allocations is $numPendingAllocate. " +
-                  s"Slept for $sleepDuration/$sleepInterval.")
+              nextAllocationInterval = initialAllocationInterval
+              heartbeatInterval
             }
-          } catch {
-            case e: InterruptedException =>
+          sleepStartNs = System.nanoTime()
+          allocatorLock.wait(sleepInterval)
+        }
+        val sleepDuration = System.nanoTime() - sleepStartNs
+        if (sleepDuration < TimeUnit.MILLISECONDS.toNanos(sleepInterval)) {
+          // log when sleep is interrupted
+          logDebug(s"Number of pending allocations is $numPendingAllocate. " +
+              s"Slept for $sleepDuration/$sleepInterval ms.")
+          // if sleep was less than the minimum interval, sleep for the rest of it
+          val toSleep = math.max(0, initialAllocationInterval - sleepDuration)
+          if (toSleep > 0) {
+            logDebug(s"Going back to sleep for $toSleep ms")
+            // use Thread.sleep instead of allocatorLock.wait. there is no need to be woken up
+            // by the methods that signal allocatorLock because this is just finishing the min
+            // sleep interval, which should happen even if this is signalled again.
+            Thread.sleep(toSleep)
           }
+        } else {
+          logDebug(s"Number of pending allocations is $numPendingAllocate. " +
+              s"Slept for $sleepDuration/$sleepInterval.")
+        }
+      } catch {
+        case e: InterruptedException =>
+      }
+    }
+  }
+
+  private def launchReporterThread(): Thread = {
+    val t = new Thread {
+      override def run(): Unit = {
+        try {
+          allocationThreadImpl()
+        } finally {
+          allocator.stop()
         }
       }
     }
-    // setting to daemon status, though this is usually not a good idea.
     t.setDaemon(true)
     t.setName("Reporter")
     t.start()
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 8c6eff991513..f9bdddcd1780 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -381,6 +381,13 @@ private[yarn] class YarnAllocator(
     }
   }
 
+  def stop(): Unit = {
+    // Forcefully shut down the launcher pool, in case this is being called in the middle of
+    // container allocation. This will prevent queued executors from being started - and
+    // potentially interrupt active ExecutorRunnable instaces too.
+    launcherPool.shutdownNow()
+  }
+
   private def hostStr(request: ContainerRequest): String = {
     Option(request.getNodes) match {
       case Some(nodes) => nodes.asScala.mkString(",")
@@ -417,12 +424,40 @@ private[yarn] class YarnAllocator(
         containersToUse, remainingAfterHostMatches)
     }
 
-    // Match remaining by rack
+    // Match remaining by rack. Because YARN's RackResolver swallows thread interrupts
+    // (see SPARK-27094), which can cause this code to miss interrupts from the AM, use
+    // a separate thread to perform the operation.
     val remainingAfterRackMatches = new ArrayBuffer[Container]
-    for (allocatedContainer <- remainingAfterHostMatches) {
-      val rack = resolver.resolve(conf, allocatedContainer.getNodeId.getHost)
-      matchContainerToRequest(allocatedContainer, rack, containersToUse,
-        remainingAfterRackMatches)
+    if (remainingAfterHostMatches.nonEmpty) {
+      var exception: Option[Throwable] = None
+      val thread = new Thread("spark-rack-resolver") {
+        override def run(): Unit = {
+          try {
+            for (allocatedContainer <- remainingAfterHostMatches) {
+              val rack = resolver.resolve(conf, allocatedContainer.getNodeId.getHost)
+              matchContainerToRequest(allocatedContainer, rack, containersToUse,
+                remainingAfterRackMatches)
+            }
+          } catch {
+            case e: Throwable =>
+              exception = Some(e)
+          }
+        }
+      }
+      thread.setDaemon(true)
+      thread.start()
+
+      try {
+        thread.join()
+      } catch {
+        case e: InterruptedException =>
+          thread.interrupt()
+          throw e
+      }
+
+      if (exception.isDefined) {
+        throw exception.get
+      }
     }
 
     // Assign remaining that are neither node-local nor rack-local

From 93c6d2a198d1b3070eea32210042873c68d0d5f7 Mon Sep 17 00:00:00 2001
From: Lantao Jin <jinlantao@gmail.com>
Date: Wed, 20 Mar 2019 14:27:05 -0700
Subject: [PATCH 0729/1072] [SPARK-27215][CORE] Correct the kryo configurations

## What changes were proposed in this pull request?

```scala
val KRYO_USE_UNSAFE = ConfigBuilder("spark.kyro.unsafe")
    .booleanConf
    .createWithDefault(false)

  val KRYO_USE_POOL = ConfigBuilder("spark.kyro.pool")
    .booleanConf
    .createWithDefault(true)
```
**kyro should be kryo**

## How was this patch tested?

no need

Closes #24156 from LantaoJin/SPARK-27215.

Authored-by: Lantao Jin <jinlantao@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../main/scala/org/apache/spark/internal/config/Kryo.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala b/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
index 7873141440de..717a09914a2f 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
@@ -34,11 +34,11 @@ private[spark] object Kryo {
     .toSequence
     .createWithDefault(Nil)
 
-  val KRYO_USE_UNSAFE = ConfigBuilder("spark.kyro.unsafe")
+  val KRYO_USE_UNSAFE = ConfigBuilder("spark.kryo.unsafe")
     .booleanConf
     .createWithDefault(false)
 
-  val KRYO_USE_POOL = ConfigBuilder("spark.kyro.pool")
+  val KRYO_USE_POOL = ConfigBuilder("spark.kryo.pool")
     .booleanConf
     .createWithDefault(true)
 

From 61d99462a0366b1a9ef3ef966673717a17d2d533 Mon Sep 17 00:00:00 2001
From: Rob Vesse <rvesse@dotnetrdf.org>
Date: Wed, 20 Mar 2019 14:28:27 -0700
Subject: [PATCH 0730/1072] [SPARK-26729][K8S] Make image names under test
 configurable

## What changes were proposed in this pull request?

Allow specifying system properties to customise the image names for the images used in the integration testing.  Useful if your CI/CD pipeline or policy requires using a different naming format.

This is one part of addressing SPARK-26729, I plan to have a follow up patch that will also make the names configurable when using `docker-image-tool.sh`

## How was this patch tested?

Ran integration tests against custom images generated by our CI/CD pipeline that do not follow Spark's existing hardcoded naming conventions using the new system properties to override the image names appropriately:

```
mvn clean integration-test -pl :spark-kubernetes-integration-tests_${SCALA_VERSION} \
            -Pkubernetes -Pkubernetes-integration-tests \
            -P${SPARK_HADOOP_PROFILE} -Dhadoop.version=${HADOOP_VERSION} \
            -Dspark.kubernetes.test.sparkTgz=${TARBALL} \
            -Dspark.kubernetes.test.imageTag=${TAG} \
            -Dspark.kubernetes.test.imageRepo=${REPO} \
            -Dspark.kubernetes.test.namespace=${K8S_NAMESPACE} \
            -Dspark.kubernetes.test.kubeConfigContext=${K8S_CONTEXT} \
            -Dspark.kubernetes.test.deployMode=${K8S_TEST_DEPLOY_MODE} \
            -Dspark.kubernetes.test.jvmImage=apache-spark \
            -Dspark.kubernetes.test.pythonImage=apache-spark-py \
            -Dspark.kubernetes.test.rImage=apache-spark-r \
            -Dtest.include.tags=k8s
...
[INFO] --- scalatest-maven-plugin:1.0:test (integration-test)  spark-kubernetes-integration-tests_2.12 ---
Discovery starting.
Discovery completed in 230 milliseconds.
Run starting. Expected test count is: 15
KubernetesSuite:
- Run SparkPi with no resources
- Run SparkPi with a very long application name.
- Use SparkLauncher.NO_RESOURCE
- Run SparkPi with a master URL without a scheme.
- Run SparkPi with an argument.
- Run SparkPi with custom labels, annotations, and environment variables.
- Run extraJVMOptions check on driver
- Run SparkRemoteFileTest using a remote data file
- Run SparkPi with env and mount secrets.
- Run PySpark on simple pi.py example
- Run PySpark with Python2 to test a pyfiles example
- Run PySpark with Python3 to test a pyfiles example
- Run PySpark with memory customization
- Run in client mode.
- Start pod creation from template
Run completed in 8 minutes, 33 seconds.
Total number of tests run: 15
Suites: completed 2, aborted 0
Tests: succeeded 15, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```

Closes #23846 from rvesse/SPARK-26729.

Authored-by: Rob Vesse <rvesse@dotnetrdf.org>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../kubernetes/integration-tests/README.md    | 37 ++++++++++++++++++-
 .../dev/dev-run-integration-tests.sh          | 31 ++++++++++++++++
 .../kubernetes/integration-tests/pom.xml      |  3 ++
 .../k8s/integrationtest/KubernetesSuite.scala |  6 +--
 .../k8s/integrationtest/TestConstants.scala   |  3 ++
 5 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 73fc0581d64f..57c26e10060d 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -28,7 +28,7 @@ The main useful options are outlined below.
 ## Using a different backend
 
 The integration test backend i.e. the K8S cluster used for testing is controlled by the `--deploy-mode` option.  By 
-default this is set to `minikube`, the available backends are their perequisites are as follows.  
+default this is set to `minikube`, the available backends are their prerequisites are as follows.  
 
 ### `minikube`
 
@@ -46,7 +46,7 @@ environment variable appropriately.
 
 ### `cloud` 
 
-These cloud backend configures the tests to use an arbitrary Kubernetes cluster running in the cloud or otherwise.
+The cloud backend configures the tests to use an arbitrary Kubernetes cluster running in the cloud or otherwise.
 
 The `cloud` backend auto-configures the cluster to use from your K8S config file, this is assumed to be `~/.kube/config`
 unless the `KUBECONFIG` environment variable is set to override this location.  By default this will use whatever your 
@@ -67,6 +67,18 @@ image tag that you have built by other means already, pass the tag to the test s
 where if you still want to use images that were built before by the test framework:
 
     dev/dev-run-integration-tests.sh --image-tag $(cat target/imageTag.txt)
+    
+### Customising the Image Names
+
+If your image names do not follow the standard Spark naming convention - `spark`, `spark-py` and `spark-r` - then you can customise the names using several options.
+
+If you use the same basic pattern but a different prefix for the name e.g. `apache-spark` you can just set `--base-image-name <base-name>` e.g.
+
+    dev/dev-run-integration-tests.sh --base-image-name apache-spark
+    
+Alternatively if you use completely custom names then you can set each individually via the `--jvm-image-name <name>`, `--python-image-name <name>` and `--r-image-name <name>` arguments e.g.
+
+    dev/dev-run-integration-tests.sh --jvm-image-name jvm-spark --python-image-name pyspark --r-image-name sparkr
 
 ## Spark Distribution Under Test
 
@@ -189,6 +201,27 @@ to the wrapper scripts and using the wrapper scripts will simply set these appro
     </td>
     <td><code>docker.io/kubespark</code></td>
   </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.jvmImage</code></td>
+    <td>
+      The image name for the JVM based Spark image to test
+    </td>
+    <td><code>spark</code></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.pythonImage</code></td>
+    <td>
+      The image name for the Python based Spark image to test
+    </td>
+    <td><code>spark-py</code></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.rImage</code></td>
+    <td>
+      The image name for the R based Spark image to test
+    </td>
+    <td><code>spark-r</code></td>
+  </tr>
   <tr>
     <td><code>spark.kubernetes.test.namespace</code></td>
     <td>
diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
index 68f284ca1d1c..fc17ce6e10cb 100755
--- a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
+++ b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
@@ -23,6 +23,10 @@ DEPLOY_MODE="minikube"
 IMAGE_REPO="docker.io/kubespark"
 SPARK_TGZ="N/A"
 IMAGE_TAG="N/A"
+BASE_IMAGE_NAME=
+JVM_IMAGE_NAME=
+PYTHON_IMAGE_NAME=
+R_IMAGE_NAME=
 SPARK_MASTER=
 NAMESPACE=
 SERVICE_ACCOUNT=
@@ -79,6 +83,22 @@ while (( "$#" )); do
       EXCLUDE_TAGS="$2"
       shift
       ;;
+    --base-image-name)
+      BASE_IMAGE_NAME="$2"
+      shift
+      ;;
+    --jvm-image-name)
+      JVM_IMAGE_NAME="$2"
+      shift
+      ;;
+    --python-image-name)
+      PYTHON_IMAGE_NAME="$2"
+      shift
+      ;;
+    --r-image-name)
+      R_IMAGE_NAME="$2"
+      shift
+      ;;
     *)
       break
       ;;
@@ -119,4 +139,15 @@ then
   properties=( ${properties[@]} -Dtest.exclude.tags=$EXCLUDE_TAGS )
 fi
 
+BASE_IMAGE_NAME=${BASE_IMAGE_NAME:-spark}
+JVM_IMAGE_NAME=${JVM_IMAGE_NAME:-${BASE_IMAGE_NAME}}
+PYTHON_IMAGE_NAME=${PYTHON_IMAGE_NAME:-${BASE_IMAGE_NAME}-py}
+R_IMAGE_NAME=${R_IMAGE_NAME:-${R_IMAGE_NAME}-r}
+
+properties+=(
+  -Dspark.kubernetes.test.jvmImage=$JVM_IMAGE_NAME
+  -Dspark.kubernetes.test.pythonImage=$PYTHON_IMAGE_NAME
+  -Dspark.kubernetes.test.rImage=$R_IMAGE_NAME
+)
+
 $TEST_ROOT_DIR/build/mvn integration-test -f $TEST_ROOT_DIR/pom.xml -pl resource-managers/kubernetes/integration-tests -am -Pscala-$SCALA_VERSION -Pkubernetes -Pkubernetes-integration-tests ${properties[@]}
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 91ef0e23bf25..2248482558cc 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -148,6 +148,9 @@
             <spark.kubernetes.test.master>${spark.kubernetes.test.master}</spark.kubernetes.test.master>
             <spark.kubernetes.test.namespace>${spark.kubernetes.test.namespace}</spark.kubernetes.test.namespace>
             <spark.kubernetes.test.serviceAccountName>${spark.kubernetes.test.serviceAccountName}</spark.kubernetes.test.serviceAccountName>
+            <spark.kubernetes.test.jvmImage>${spark.kubernetes.test.jvmImage}</spark.kubernetes.test.jvmImage>
+            <spark.kubernetes.test.pythonImage>${spark.kubernetes.test.pythonImage}</spark.kubernetes.test.pythonImage>
+            <spark.kubernetes.test.rImage>${spark.kubernetes.test.rImage}</spark.kubernetes.test.rImage>
           </systemProperties>
           <tagsToExclude>${test.exclude.tags}</tagsToExclude>
           <tagsToInclude>${test.include.tags}</tagsToInclude>
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 16b8714f10ac..91419e895ba8 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -116,9 +116,9 @@ class KubernetesSuite extends SparkFunSuite
     sparkHomeDir = Paths.get(sparkDirProp)
     require(sparkHomeDir.toFile.isDirectory,
       s"No directory found for spark home specified at $sparkHomeDir.")
-    image = testImageRef("spark")
-    pyImage = testImageRef("spark-py")
-    rImage = testImageRef("spark-r")
+    image = testImageRef(sys.props.getOrElse(CONFIG_KEY_IMAGE_JVM, "spark"))
+    pyImage = testImageRef(sys.props.getOrElse(CONFIG_KEY_IMAGE_PYTHON, "spark-py"))
+    rImage = testImageRef(sys.props.getOrElse(CONFIG_KEY_IMAGE_R, "spark-r"))
 
     val scalaVersion = scala.util.Properties.versionNumberString
       .split("\\.")
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
index ecc4df716330..2b1fd0816461 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
@@ -26,6 +26,9 @@ object TestConstants {
   val CONFIG_KEY_KUBE_MASTER_URL = "spark.kubernetes.test.master"
   val CONFIG_KEY_KUBE_NAMESPACE = "spark.kubernetes.test.namespace"
   val CONFIG_KEY_KUBE_SVC_ACCOUNT = "spark.kubernetes.test.serviceAccountName"
+  val CONFIG_KEY_IMAGE_JVM = "spark.kubernetes.test.jvmImage"
+  val CONFIG_KEY_IMAGE_PYTHON = "spark.kubernetes.test.pythonImage"
+  val CONFIG_KEY_IMAGE_R = "spark.kubernetes.test.rImage"
   val CONFIG_KEY_IMAGE_TAG = "spark.kubernetes.test.imageTag"
   val CONFIG_KEY_IMAGE_TAG_FILE = "spark.kubernetes.test.imageTagFile"
   val CONFIG_KEY_IMAGE_REPO = "spark.kubernetes.test.imageRepo"

From 46f9f44918ba2589c6ffc938822cbdfac1c6f4d1 Mon Sep 17 00:00:00 2001
From: "wangguangxin.cn" <wangguangxin.cn@gmail.com>
Date: Wed, 20 Mar 2019 17:54:28 -0500
Subject: [PATCH 0731/1072] [SPARK-27202][MINOR][SQL] Update comments to keep
 according with code

## What changes were proposed in this pull request?

Update comments in `InMemoryFileIndex.listLeafFiles` to keep according with code.

## How was this patch tested?

existing test cases

Closes #24146 from WangGuangxin/SPARK-27202.

Authored-by: wangguangxin.cn <wangguangxin.cn@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/execution/datasources/InMemoryFileIndex.scala   | 2 +-
 .../execution/datasources/PartitioningAwareFileIndex.scala    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index fe418e610da8..db55a06c1082 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -308,7 +308,7 @@ object InMemoryFileIndex extends Logging {
       //   implementations don't actually issue RPC for this method.
       //
       // - Here we are calling `getFileBlockLocations` in a sequential manner, but it should not
-      //   be a big deal since we always use to `listLeafFilesInParallel` when the number of
+      //   be a big deal since we always use to `bulkListLeafFiles` when the number of
       //   paths exceeds threshold.
       case f =>
         // The other constructor of LocatedFileStatus will call FileStatus.getPermission(),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index b2e4155e6f49..f5ae095a6dad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -89,7 +89,7 @@ abstract class PartitioningAwareFileIndex(
     if (partitionSpec().partitionColumns.isEmpty) {
       // For each of the root input paths, get the list of files inside them
       rootPaths.flatMap { path =>
-        // Make the path qualified (consistent with listLeafFiles and listLeafFilesInParallel).
+        // Make the path qualified (consistent with listLeafFiles and bulkListLeafFiles).
         val fs = path.getFileSystem(hadoopConf)
         val qualifiedPathPre = fs.makeQualified(path)
         val qualifiedPath: Path = if (qualifiedPathPre.isRoot && !qualifiedPathPre.isAbsolute) {
@@ -203,7 +203,7 @@ abstract class PartitioningAwareFileIndex(
 
       case None =>
         rootPaths.map { path =>
-          // Make the path qualified (consistent with listLeafFiles and listLeafFilesInParallel).
+          // Make the path qualified (consistent with listLeafFiles and bulkListLeafFiles).
           val qualifiedPath = path.getFileSystem(hadoopConf).makeQualified(path)
           if (leafFiles.contains(qualifiedPath)) qualifiedPath.getParent else qualifiedPath }.toSet
     }

From a8d9531edc80116d4549efd62af116a90256864b Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Wed, 20 Mar 2019 17:55:48 -0500
Subject: [PATCH 0732/1072] [SPARK-27205][CORE] Remove complicated logic for
 just leaving warning log when main class is scala.App

## What changes were proposed in this pull request?

[SPARK-26977](https://issues.apache.org/jira/browse/SPARK-26977) introduced very strange bug which spark-shell is no longer able to load classes which are provided via `--packages`. TBH I don't know about the details why it is broken, but looks like initializing `object class` brings the weirdness (maybe due to static initialization done twice?).

This patch removes the logic to leave warning log when main class is scala.App, to not deal with such complexity for just leaving warning message.

## How was this patch tested?

Manual test: suppose we run spark-shell with `--packages` option like below:

```
./bin/spark-shell --verbose   --master "local[*]" --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.0
```

Before this patch, importing class in transitive dependency fails:

```
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://localhost:4040
Spark context available as 'sc' (master = local[*], app id = local-1553005771597).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.0.0-SNAPSHOT
      /_/

Using Scala version 2.12.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_191)
Type in expressions to have them evaluated.
Type :help for more information.

scala> import org.apache.kafka
<console>:23: error: object kafka is not a member of package org.apache
       import org.apache.kafka
```

After this patch, importing class in transitive dependency succeeds:

```
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://localhost:4040
Spark context available as 'sc' (master = local[*], app id = local-1553004095542).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.0.0-SNAPSHOT
      /_/

Using Scala version 2.12.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_191)
Type in expressions to have them evaluated.
Type :help for more information.

scala> import org.apache.kafka
import org.apache.kafka
```

Closes #24147 from HeartSaVioR/SPARK-27205.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/deploy/SparkSubmit.scala  | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 493cad0f14e1..b6673e4568d4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -826,18 +826,6 @@ private[spark] class SparkSubmit extends Logging {
     val app: SparkApplication = if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
       mainClass.getConstructor().newInstance().asInstanceOf[SparkApplication]
     } else {
-      // Scala object subclassing scala.App has its whole class body executed in the
-      // main method it inherits. Fields of the object will not have been initialized
-      // before the main method has been executed, which will cause problems like SPARK-4170
-      // Note two Java classes are generated, the childMainClass and childMainClass$.
-      // Users will pass in childMainClass which will delegate all invocations to childMainClass$
-      // but it's childMainClass$ that subclasses scala.App and we should check for.
-      Try {
-        if (classOf[scala.App].isAssignableFrom(Utils.classForName(s"$childMainClass$$"))) {
-          logWarning("Subclasses of scala.App may not work correctly. " +
-            "Use a main() method instead.")
-        }
-      }
       new JavaMainApplication(mainClass)
     }
 

From d6ee2f331db461c1f7a25e0ef17901f53d8b707e Mon Sep 17 00:00:00 2001
From: Ruocheng Jiang <jiangruocheng@gmail.com>
Date: Wed, 20 Mar 2019 17:59:12 -0500
Subject: [PATCH 0733/1072] [MINOR][EXAMPLES] Add missing return keyword
 streaming word count example

This is a very low level error.

Closes #24153 from jiangruocheng/master.

Authored-by: Ruocheng Jiang <jiangruocheng@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../main/python/streaming/recoverable_network_wordcount.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/python/streaming/recoverable_network_wordcount.py b/examples/src/main/python/streaming/recoverable_network_wordcount.py
index 60167dc77254..a39c4d0b5b8c 100644
--- a/examples/src/main/python/streaming/recoverable_network_wordcount.py
+++ b/examples/src/main/python/streaming/recoverable_network_wordcount.py
@@ -83,9 +83,9 @@ def echo(time, rdd):
         def filterFunc(wordCount):
             if wordCount[0] in blacklist.value:
                 droppedWordsCounter.add(wordCount[1])
-                False
+                return False
             else:
-                True
+                return True
 
         counts = "Counts at time %s %s" % (time, rdd.filter(filterFunc).collect())
         print(counts)

From 2e090ba628d4622f28e934f602bf8bc9783924c8 Mon Sep 17 00:00:00 2001
From: maryannxue <maryannxue@apache.org>
Date: Thu, 21 Mar 2019 11:13:25 +0900
Subject: [PATCH 0734/1072] [SPARK-27223][SQL] Remove private methods that skip
 conversion when passing user schemas for constructing a DataFrame

## What changes were proposed in this pull request?

When passing in a user schema to create a DataFrame, there might be mismatched nullability between the user schema and the the actual data. All related public interfaces now perform catalyst conversion using the user provided schema, which catches such mismatches to avoid runtime errors later on. However, there're private methods which allow this conversion to be skipped, so we need to remove these private methods which may lead to confusion and potential issues.

## How was this patch tested?

Passed existing tests. No new tests were added since this PR removed the private interfaces that would potentially cause null problems and other interfaces are covered already by existing tests.

Closes #24162 from maryannxue/spark-27223.

Authored-by: maryannxue <maryannxue@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/SQLContext.scala     |  9 -------
 .../org/apache/spark/sql/SparkSession.scala   | 25 ++++---------------
 .../org/apache/spark/sql/DataFrameSuite.scala |  6 ++---
 3 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 43f34e6ff4b8..08b7521de957 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -324,15 +324,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
     sparkSession.createDataFrame(rowRDD, schema)
   }
 
-  /**
-   * Creates a DataFrame from an RDD[Row]. User can specify whether the input rows should be
-   * converted to Catalyst rows.
-   */
-  private[sql]
-  def createDataFrame(rowRDD: RDD[Row], schema: StructType, needsConversion: Boolean) = {
-    sparkSession.createDataFrame(rowRDD, schema, needsConversion)
-  }
-
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 5208b9a545cb..0b5bf3f48b59 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -361,7 +361,11 @@ class SparkSession private(
   @DeveloperApi
   @Evolving
   def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = {
-    createDataFrame(rowRDD, schema, needsConversion = true)
+    // TODO: use MutableProjection when rowRDD is another DataFrame and the applied
+    // schema differs from the existing schema on any field data type.
+    val encoder = RowEncoder(schema)
+    val catalystRows = rowRDD.map(encoder.toRow)
+    internalCreateDataFrame(catalystRows.setName(rowRDD.name), schema)
   }
 
   /**
@@ -590,25 +594,6 @@ class SparkSession private(
     Dataset.ofRows(self, logicalPlan)
   }
 
-  /**
-   * Creates a `DataFrame` from an `RDD[Row]`.
-   * User can specify whether the input rows should be converted to Catalyst rows.
-   */
-  private[sql] def createDataFrame(
-      rowRDD: RDD[Row],
-      schema: StructType,
-      needsConversion: Boolean) = {
-    // TODO: use MutableProjection when rowRDD is another DataFrame and the applied
-    // schema differs from the existing schema on any field data type.
-    val catalystRows = if (needsConversion) {
-      val encoder = RowEncoder(schema)
-      rowRDD.map(encoder.toRow)
-    } else {
-      rowRDD.map { r: Row => InternalRow.fromSeq(r.toSeq) }
-    }
-    internalCreateDataFrame(catalystRows.setName(rowRDD.name), schema)
-  }
-
 
   /* ------------------------- *
    |  Catalog-related methods  |
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 6dec129582e9..78decd49e5f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1572,8 +1572,8 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       val rdd = sparkContext.makeRDD(Seq(Row(1, 3), Row(2, 1)))
       val df = spark.createDataFrame(
         rdd,
-        new StructType().add("f1", IntegerType).add("f2", IntegerType),
-        needsConversion = false).select($"F1", $"f2".as("f2"))
+        new StructType().add("f1", IntegerType).add("f2", IntegerType))
+        .select($"F1", $"f2".as("f2"))
       val df1 = df.as("a")
       val df2 = df.as("b")
       checkAnswer(df1.join(df2, $"a.f2" === $"b.f2"), Row(1, 3, 1, 3) :: Row(2, 1, 2, 1) :: Nil)
@@ -1774,7 +1774,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     val size = 201L
     val rdd = sparkContext.makeRDD(Seq(Row.fromSeq(Seq.range(0, size))))
     val schemas = List.range(0, size).map(a => StructField("name" + a, LongType, true))
-    val df = spark.createDataFrame(rdd, StructType(schemas), false)
+    val df = spark.createDataFrame(rdd, StructType(schemas))
     assert(df.persist.take(1).apply(0).toSeq(100).asInstanceOf[Long] == 100)
   }
 

From c26379b446e90b3104ebe36f288be483f613a652 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Thu, 21 Mar 2019 11:15:05 +0900
Subject: [PATCH 0735/1072] [SPARK-27221][SQL] Improve the assert error message
 in TreeNode.parseToJson

## What changes were proposed in this pull request?

When `TreeNode.parseToJson` may throw an assert error without any error message when a TreeNode is not implemented properly, and it's hard to find the bad TreeNode implementation.

This PR adds the assert message to improve the error, like what `TreeNode.jsonFields` does.

## How was this patch tested?

Jenkins

Closes #24159 from zsxwing/SPARK-27221.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 72b1931e47cf..66342af171ce 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -630,7 +630,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     val fieldNames = getConstructorParameterNames(getClass)
     val fieldValues = productIterator.toSeq ++ otherCopyArgs
     assert(fieldNames.length == fieldValues.length, s"${getClass.getSimpleName} fields: " +
-      fieldNames.mkString(", ") + s", values: " + fieldValues.map(_.toString).mkString(", "))
+      fieldNames.mkString(", ") + s", values: " + fieldValues.mkString(", "))
 
     fieldNames.zip(fieldValues).map {
       // If the field value is a child, then use an int to encode it, represents the index of
@@ -683,7 +683,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       try {
         val fieldNames = getConstructorParameterNames(p.getClass)
         val fieldValues = p.productIterator.toSeq
-        assert(fieldNames.length == fieldValues.length)
+        assert(fieldNames.length == fieldValues.length, s"${getClass.getSimpleName} fields: " +
+          fieldNames.mkString(", ") + s", values: " + fieldValues.mkString(", "))
         ("product-class" -> JString(p.getClass.getName)) :: fieldNames.zip(fieldValues).map {
           case (name, value) => name -> parseToJson(value)
         }.toList

From b1857a4d7dfe17663f8adccd7825d890ae70d2a1 Mon Sep 17 00:00:00 2001
From: Venkata krishnan Sowrirajan <vsowrirajan@qubole.com>
Date: Thu, 21 Mar 2019 11:21:56 +0900
Subject: [PATCH 0736/1072] [SPARK-26894][SQL] Handle Alias as well in
 AggregateEstimation to propagate child stats

## What changes were proposed in this pull request?

Currently aliases are not handled in the Aggregate Estimation due to which stats are not getting propagated. This causes CBO join-reordering to not give optimal join plans. ProjectEstimation is already taking care of aliases, we need same logic for AggregateEstimation as well to properly propagate stats when CBO is enabled.

## How was this patch tested?

This patch is manually tested using the query Q83 of TPCDS benchmark (scale 1000)

Closes #23803 from venkata91/aggstats.

Authored-by: Venkata krishnan Sowrirajan <vsowrirajan@qubole.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../statsEstimation/AggregateEstimation.scala |  7 ++++--
 .../statsEstimation/EstimationUtils.scala     | 14 ++++++++++-
 .../statsEstimation/ProjectEstimation.scala   | 10 +++-----
 .../AggregateEstimationSuite.scala            | 24 +++++++++++++++++++
 4 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
index eb56ab43ea9d..0606d0d516bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 
-import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Statistics}
 
 
@@ -52,7 +52,10 @@ object AggregateEstimation {
         outputRows.min(childStats.rowCount.get)
       }
 
-      val outputAttrStats = getOutputMap(childStats.attributeStats, agg.output)
+      val aliasStats = EstimationUtils.getAliasStats(agg.expressions, childStats.attributeStats)
+
+      val outputAttrStats = getOutputMap(
+        AttributeMap(childStats.attributeStats.toSeq ++ aliasStats), agg.output)
       Some(Statistics(
         sizeInBytes = getOutputSize(agg.output, outputRows, outputAttrStats),
         rowCount = Some(outputRows),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
index 211a2a071737..11d2f024c13a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 import scala.collection.mutable.ArrayBuffer
 import scala.math.BigDecimal.RoundingMode
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, Expression}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types.{DecimalType, _}
 
@@ -71,6 +71,18 @@ object EstimationUtils {
     AttributeMap(output.flatMap(a => inputMap.get(a).map(a -> _)))
   }
 
+  /**
+   * Returns the stats for aliases of child's attributes
+   */
+  def getAliasStats(
+      expressions: Seq[Expression],
+      attributeStats: AttributeMap[ColumnStat]): Seq[(Attribute, ColumnStat)] = {
+    expressions.collect {
+      case alias @ Alias(attr: Attribute, _) if attributeStats.contains(attr) =>
+        alias.toAttribute -> attributeStats(attr)
+    }
+  }
+
   def getSizePerRow(
       attributes: Seq[Attribute],
       attrStats: AttributeMap[ColumnStat] = AttributeMap(Nil)): BigInt = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
index 489eb904ffd0..6925423f003b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
@@ -26,14 +26,10 @@ object ProjectEstimation {
   def estimate(project: Project): Option[Statistics] = {
     if (rowCountsExist(project.child)) {
       val childStats = project.child.stats
-      val inputAttrStats = childStats.attributeStats
-      // Match alias with its child's column stat
-      val aliasStats = project.expressions.collect {
-        case alias @ Alias(attr: Attribute, _) if inputAttrStats.contains(attr) =>
-          alias.toAttribute -> inputAttrStats(attr)
-      }
+      val aliasStats = EstimationUtils.getAliasStats(project.expressions, childStats.attributeStats)
+
       val outputAttrStats =
-        getOutputMap(AttributeMap(inputAttrStats.toSeq ++ aliasStats), project.output)
+        getOutputMap(AttributeMap(childStats.attributeStats.toSeq ++ aliasStats), project.output)
       Some(childStats.copy(
         sizeInBytes = getOutputSize(project.output, childStats.rowCount.get, outputAttrStats),
         attributeStats = outputAttrStats))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
index 8213d568fe85..dfa6e467166a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
@@ -45,6 +45,30 @@ class AggregateEstimationSuite extends StatsEstimationTestBase with PlanTest {
   private val nameToColInfo: Map[String, (Attribute, ColumnStat)] =
     columnInfo.map(kv => kv._1.name -> kv)
 
+  test("SPARK-26894: propagate child stats for aliases in Aggregate") {
+    val tableColumns = Seq("key11", "key12")
+    val groupByColumns = Seq("key11")
+    val attributes = groupByColumns.map(nameToAttr)
+
+    val rowCount = 2
+    val child = StatsTestPlan(
+      outputList = tableColumns.map(nameToAttr),
+      rowCount,
+      // rowCount * (overhead + column size)
+      size = Some(4 * (8 + 4)),
+      attributeStats = AttributeMap(tableColumns.map(nameToColInfo)))
+
+    val testAgg = Aggregate(
+      groupingExpressions = attributes,
+      aggregateExpressions = Seq(Alias(nameToAttr("key12"), "abc")()),
+      child)
+
+    val expectedColStats = Seq("abc" -> nameToColInfo("key12")._2)
+    val expectedAttrStats = toAttributeMap(expectedColStats, testAgg)
+
+    assert(testAgg.stats.attributeStats == expectedAttrStats)
+  }
+
   test("set an upper bound if the product of ndv's of group-by columns is too large") {
     // Suppose table1 (key11 int, key12 int) has 4 records: (1, 10), (1, 20), (2, 30), (2, 40)
     checkAggStats(

From be08b415dadd2725be2d2c8f8890a425fbad9338 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 21 Mar 2019 17:44:51 +0900
Subject: [PATCH 0737/1072] [SPARK-27163][PYTHON] Cleanup and consolidate
 Pandas UDF functionality

## What changes were proposed in this pull request?

This change is a cleanup and consolidation of 3 areas related to Pandas UDFs:

1) `ArrowStreamPandasSerializer` now inherits from `ArrowStreamSerializer` and uses the base class `dump_stream`, `load_stream` to create Arrow reader/writer and send Arrow record batches.  `ArrowStreamPandasSerializer` makes the conversions to/from Pandas and converts to Arrow record batch iterators. This change removed duplicated creation of Arrow readers/writers.

2) `createDataFrame` with Arrow now uses `ArrowStreamPandasSerializer` instead of doing its own conversions from Pandas to Arrow and sending record batches through `ArrowStreamSerializer`.

3) Grouped Map UDFs now reuse existing logic in `ArrowStreamPandasSerializer` to send Pandas DataFrame results as a `StructType` instead of separating each column from the DataFrame. This makes the code a little more consistent with the Python worker, but does require that the returned StructType column is flattened out in `FlatMapGroupsInPandasExec` in Scala.

## How was this patch tested?

Existing tests and ran tests with pyarrow 0.12.0

Closes #24095 from BryanCutler/arrow-refactor-cleanup-UDFs.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/serializers.py                 | 218 ++++++++++--------
 python/pyspark/sql/session.py                 |  42 ++--
 python/pyspark/worker.py                      |  23 +-
 .../sql/execution/arrow/ArrowConverters.scala |   1 -
 .../python/FlatMapGroupsInPandasExec.scala    |  12 +-
 5 files changed, 161 insertions(+), 135 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 0c3c68ec0bd9..58f7552cab49 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -245,92 +245,13 @@ def __repr__(self):
         return "ArrowStreamSerializer"
 
 
-def _create_batch(series, timezone, safecheck, assign_cols_by_name):
+class ArrowStreamPandasSerializer(ArrowStreamSerializer):
     """
-    Create an Arrow record batch from the given pandas.Series or list of Series, with optional type.
+    Serializes Pandas.Series as Arrow data with Arrow streaming format.
 
-    :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
     :param timezone: A timezone to respect when handling timestamp values
-    :return: Arrow RecordBatch
-    """
-    import decimal
-    from distutils.version import LooseVersion
-    import pandas as pd
-    import pyarrow as pa
-    from pyspark.sql.types import _check_series_convert_timestamps_internal
-    # Make input conform to [(series1, type1), (series2, type2), ...]
-    if not isinstance(series, (list, tuple)) or \
-            (len(series) == 2 and isinstance(series[1], pa.DataType)):
-        series = [series]
-    series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)
-
-    def create_array(s, t):
-        mask = s.isnull()
-        # Ensure timestamp series are in expected form for Spark internal representation
-        # TODO: maybe don't need None check anymore as of Arrow 0.9.1
-        if t is not None and pa.types.is_timestamp(t):
-            s = _check_series_convert_timestamps_internal(s.fillna(0), timezone)
-            # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
-            return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
-        elif t is not None and pa.types.is_string(t) and sys.version < '3':
-            # TODO: need decode before converting to Arrow in Python 2
-            # TODO: don't need as of Arrow 0.9.1
-            return pa.Array.from_pandas(s.apply(
-                lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t)
-        elif t is not None and pa.types.is_decimal(t) and \
-                LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
-            return pa.Array.from_pandas(s.apply(
-                lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
-        elif LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
-            # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
-            return pa.Array.from_pandas(s, mask=mask, type=t)
-
-        try:
-            array = pa.Array.from_pandas(s, mask=mask, type=t, safe=safecheck)
-        except pa.ArrowException as e:
-            error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \
-                        "Array (%s). It can be caused by overflows or other unsafe " + \
-                        "conversions warned by Arrow. Arrow safe type check can be " + \
-                        "disabled by using SQL config " + \
-                        "`spark.sql.execution.pandas.arrowSafeTypeConversion`."
-            raise RuntimeError(error_msg % (s.dtype, t), e)
-        return array
-
-    arrs = []
-    for s, t in series:
-        if t is not None and pa.types.is_struct(t):
-            if not isinstance(s, pd.DataFrame):
-                raise ValueError("A field of type StructType expects a pandas.DataFrame, "
-                                 "but got: %s" % str(type(s)))
-
-            # Input partition and result pandas.DataFrame empty, make empty Arrays with struct
-            if len(s) == 0 and len(s.columns) == 0:
-                arrs_names = [(pa.array([], type=field.type), field.name) for field in t]
-            # Assign result columns by schema name if user labeled with strings
-            elif assign_cols_by_name and any(isinstance(name, basestring) for name in s.columns):
-                arrs_names = [(create_array(s[field.name], field.type), field.name) for field in t]
-            # Assign result columns by  position
-            else:
-                arrs_names = [(create_array(s[s.columns[i]], field.type), field.name)
-                              for i, field in enumerate(t)]
-
-            struct_arrs, struct_names = zip(*arrs_names)
-
-            # TODO: from_arrays args switched for v0.9.0, remove when bump minimum pyarrow version
-            if LooseVersion(pa.__version__) < LooseVersion("0.9.0"):
-                arrs.append(pa.StructArray.from_arrays(struct_names, struct_arrs))
-            else:
-                arrs.append(pa.StructArray.from_arrays(struct_arrs, struct_names))
-        else:
-            arrs.append(create_array(s, t))
-
-    return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
-
-
-class ArrowStreamPandasSerializer(Serializer):
-    """
-    Serializes Pandas.Series as Arrow data with Arrow streaming format.
+    :param safecheck: If True, conversion from Arrow to Pandas checks for overflow/truncation
+    :param assign_cols_by_name: If True, then Pandas DataFrames will get columns by name
     """
 
     def __init__(self, timezone, safecheck, assign_cols_by_name):
@@ -347,39 +268,138 @@ def arrow_to_pandas(self, arrow_column):
         s = _check_series_localize_timestamps(s, self._timezone)
         return s
 
+    def _create_batch(self, series):
+        """
+        Create an Arrow record batch from the given pandas.Series or list of Series,
+        with optional type.
+
+        :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
+        :return: Arrow RecordBatch
+        """
+        import decimal
+        from distutils.version import LooseVersion
+        import pandas as pd
+        import pyarrow as pa
+        from pyspark.sql.types import _check_series_convert_timestamps_internal
+        # Make input conform to [(series1, type1), (series2, type2), ...]
+        if not isinstance(series, (list, tuple)) or \
+                (len(series) == 2 and isinstance(series[1], pa.DataType)):
+            series = [series]
+        series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)
+
+        def create_array(s, t):
+            mask = s.isnull()
+            # Ensure timestamp series are in expected form for Spark internal representation
+            # TODO: maybe don't need None check anymore as of Arrow 0.9.1
+            if t is not None and pa.types.is_timestamp(t):
+                s = _check_series_convert_timestamps_internal(s.fillna(0), self._timezone)
+                # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
+                return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
+            elif t is not None and pa.types.is_string(t) and sys.version < '3':
+                # TODO: need decode before converting to Arrow in Python 2
+                # TODO: don't need as of Arrow 0.9.1
+                return pa.Array.from_pandas(s.apply(
+                    lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t)
+            elif t is not None and pa.types.is_decimal(t) and \
+                    LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+                # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
+                return pa.Array.from_pandas(s.apply(
+                    lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
+            elif LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
+                # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
+                return pa.Array.from_pandas(s, mask=mask, type=t)
+
+            try:
+                array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck)
+            except pa.ArrowException as e:
+                error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \
+                            "Array (%s). It can be caused by overflows or other unsafe " + \
+                            "conversions warned by Arrow. Arrow safe type check can be " + \
+                            "disabled by using SQL config " + \
+                            "`spark.sql.execution.pandas.arrowSafeTypeConversion`."
+                raise RuntimeError(error_msg % (s.dtype, t), e)
+            return array
+
+        arrs = []
+        for s, t in series:
+            if t is not None and pa.types.is_struct(t):
+                if not isinstance(s, pd.DataFrame):
+                    raise ValueError("A field of type StructType expects a pandas.DataFrame, "
+                                     "but got: %s" % str(type(s)))
+
+                # Input partition and result pandas.DataFrame empty, make empty Arrays with struct
+                if len(s) == 0 and len(s.columns) == 0:
+                    arrs_names = [(pa.array([], type=field.type), field.name) for field in t]
+                # Assign result columns by schema name if user labeled with strings
+                elif self._assign_cols_by_name and any(isinstance(name, basestring)
+                                                       for name in s.columns):
+                    arrs_names = [(create_array(s[field.name], field.type), field.name)
+                                  for field in t]
+                # Assign result columns by  position
+                else:
+                    arrs_names = [(create_array(s[s.columns[i]], field.type), field.name)
+                                  for i, field in enumerate(t)]
+
+                struct_arrs, struct_names = zip(*arrs_names)
+
+                # TODO: from_arrays args switched for v0.9.0, remove when bump min pyarrow version
+                if LooseVersion(pa.__version__) < LooseVersion("0.9.0"):
+                    arrs.append(pa.StructArray.from_arrays(struct_names, struct_arrs))
+                else:
+                    arrs.append(pa.StructArray.from_arrays(struct_arrs, struct_names))
+            else:
+                arrs.append(create_array(s, t))
+
+        return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
+
     def dump_stream(self, iterator, stream):
         """
         Make ArrowRecordBatches from Pandas Series and serialize. Input is a single series or
         a list of series accompanied by an optional pyarrow type to coerce the data to.
         """
-        import pyarrow as pa
-        writer = None
-        try:
-            for series in iterator:
-                batch = _create_batch(series, self._timezone, self._safecheck,
-                                      self._assign_cols_by_name)
-                if writer is None:
-                    write_int(SpecialLengths.START_ARROW_STREAM, stream)
-                    writer = pa.RecordBatchStreamWriter(stream, batch.schema)
-                writer.write_batch(batch)
-        finally:
-            if writer is not None:
-                writer.close()
+        batches = (self._create_batch(series) for series in iterator)
+        super(ArrowStreamPandasSerializer, self).dump_stream(batches, stream)
 
     def load_stream(self, stream):
         """
         Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series.
         """
+        batches = super(ArrowStreamPandasSerializer, self).load_stream(stream)
         import pyarrow as pa
-        reader = pa.ipc.open_stream(stream)
-
-        for batch in reader:
+        for batch in batches:
             yield [self.arrow_to_pandas(c) for c in pa.Table.from_batches([batch]).itercolumns()]
 
     def __repr__(self):
         return "ArrowStreamPandasSerializer"
 
 
+class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
+    """
+    Serializer used by Python worker to evaluate Pandas UDFs
+    """
+
+    def dump_stream(self, iterator, stream):
+        """
+        Override because Pandas UDFs require a START_ARROW_STREAM before the Arrow stream is sent.
+        This should be sent after creating the first record batch so in case of an error, it can
+        be sent back to the JVM before the Arrow stream starts.
+        """
+
+        def init_stream_yield_batches():
+            should_write_start_length = True
+            for series in iterator:
+                batch = self._create_batch(series)
+                if should_write_start_length:
+                    write_int(SpecialLengths.START_ARROW_STREAM, stream)
+                    should_write_start_length = False
+                yield batch
+
+        return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream)
+
+    def __repr__(self):
+        return "ArrowStreamPandasUDFSerializer"
+
+
 class BatchedSerializer(Serializer):
 
     """
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 32a2c8a67252..b11e0f3ff69d 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -530,8 +530,9 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
         to Arrow data, then sending to the JVM to parallelize. If a schema is passed in, the
         data types will be used to coerce the data in Pandas to Arrow conversion.
         """
-        from pyspark.serializers import ArrowStreamSerializer, _create_batch
-        from pyspark.sql.types import from_arrow_schema, to_arrow_type, TimestampType
+        from distutils.version import LooseVersion
+        from pyspark.serializers import ArrowStreamPandasSerializer
+        from pyspark.sql.types import from_arrow_type, to_arrow_type, TimestampType
         from pyspark.sql.utils import require_minimum_pandas_version, \
             require_minimum_pyarrow_version
 
@@ -539,6 +540,19 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
         require_minimum_pyarrow_version()
 
         from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
+        import pyarrow as pa
+
+        # Create the Spark schema from list of names passed in with Arrow types
+        if isinstance(schema, (list, tuple)):
+            if LooseVersion(pa.__version__) < LooseVersion("0.12.0"):
+                temp_batch = pa.RecordBatch.from_pandas(pdf[0:100], preserve_index=False)
+                arrow_schema = temp_batch.schema
+            else:
+                arrow_schema = pa.Schema.from_pandas(pdf, preserve_index=False)
+            struct = StructType()
+            for name, field in zip(schema, arrow_schema):
+                struct.add(name, from_arrow_type(field.type), nullable=field.nullable)
+            schema = struct
 
         # Determine arrow types to coerce data when creating batches
         if isinstance(schema, StructType):
@@ -555,23 +569,16 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
         step = -(-len(pdf) // self.sparkContext.defaultParallelism)  # round int up
         pdf_slices = (pdf[start:start + step] for start in xrange(0, len(pdf), step))
 
-        # Create Arrow record batches
-        safecheck = self._wrapped._conf.arrowSafeTypeConversion()
-        col_by_name = True  # col by name only applies to StructType columns, can't happen here
-        batches = [_create_batch([(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)],
-                                 timezone, safecheck, col_by_name)
-                   for pdf_slice in pdf_slices]
-
-        # Create the Spark schema from the first Arrow batch (always at least 1 batch after slicing)
-        if isinstance(schema, (list, tuple)):
-            struct = from_arrow_schema(batches[0].schema)
-            for i, name in enumerate(schema):
-                struct.fields[i].name = name
-                struct.names[i] = name
-            schema = struct
+        # Create list of Arrow (columns, type) for serializer dump_stream
+        arrow_data = [[(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)]
+                      for pdf_slice in pdf_slices]
 
         jsqlContext = self._wrapped._jsqlContext
 
+        safecheck = self._wrapped._conf.arrowSafeTypeConversion()
+        col_by_name = True  # col by name only applies to StructType columns, can't happen here
+        ser = ArrowStreamPandasSerializer(timezone, safecheck, col_by_name)
+
         def reader_func(temp_filename):
             return self._jvm.PythonSQLUtils.readArrowStreamFromFile(jsqlContext, temp_filename)
 
@@ -579,8 +586,7 @@ def create_RDD_server():
             return self._jvm.ArrowRDDServer(jsqlContext)
 
         # Create Spark DataFrame from Arrow stream file, using one batch per partition
-        jrdd = self._sc._serialize_to_jvm(batches, ArrowStreamSerializer(), reader_func,
-                                          create_RDD_server)
+        jrdd = self._sc._serialize_to_jvm(arrow_data, ser, reader_func, create_RDD_server)
         jdf = self._jvm.PythonSQLUtils.toDataFrame(jrdd, schema.json(), jsqlContext)
         df = DataFrame(jdf, self._wrapped)
         df._schema = schema
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 7811012c8f12..0e3bef687a25 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -38,7 +38,7 @@
 from pyspark.rdd import PythonEvalType
 from pyspark.serializers import write_with_length, write_int, read_long, read_bool, \
     write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \
-    BatchedSerializer, ArrowStreamPandasSerializer
+    BatchedSerializer, ArrowStreamPandasUDFSerializer
 from pyspark.sql.types import to_arrow_type, StructType
 from pyspark.util import _get_argspec, fail_on_stopiteration
 from pyspark import shuffle
@@ -103,10 +103,7 @@ def verify_result_length(*a):
     return lambda *a: (verify_result_length(*a), arrow_return_type)
 
 
-def wrap_grouped_map_pandas_udf(f, return_type, argspec, runner_conf):
-    assign_cols_by_name = runner_conf.get(
-        "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true")
-    assign_cols_by_name = assign_cols_by_name.lower() == "true"
+def wrap_grouped_map_pandas_udf(f, return_type, argspec):
 
     def wrapped(key_series, value_series):
         import pandas as pd
@@ -125,15 +122,9 @@ def wrapped(key_series, value_series):
                 "Number of columns of the returned pandas.DataFrame "
                 "doesn't match specified schema. "
                 "Expected: {} Actual: {}".format(len(return_type), len(result.columns)))
+        return result
 
-        # Assign result columns by schema name if user labeled with strings, else use position
-        if assign_cols_by_name and any(isinstance(name, basestring) for name in result.columns):
-            return [(result[field.name], to_arrow_type(field.dataType)) for field in return_type]
-        else:
-            return [(result[result.columns[i]], to_arrow_type(field.dataType))
-                    for i, field in enumerate(return_type)]
-
-    return wrapped
+    return lambda k, v: [(wrapped(k, v), to_arrow_type(return_type))]
 
 
 def wrap_grouped_agg_pandas_udf(f, return_type):
@@ -227,7 +218,7 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index):
         return arg_offsets, wrap_scalar_pandas_udf(func, return_type)
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
         argspec = _get_argspec(row_func)  # signature was lost when wrapping it
-        return arg_offsets, wrap_grouped_map_pandas_udf(func, return_type, argspec, runner_conf)
+        return arg_offsets, wrap_grouped_map_pandas_udf(func, return_type, argspec)
     elif eval_type == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
         return arg_offsets, wrap_grouped_agg_pandas_udf(func, return_type)
     elif eval_type == PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF:
@@ -257,12 +248,12 @@ def read_udfs(pickleSer, infile, eval_type):
         timezone = runner_conf.get("spark.sql.session.timeZone", None)
         safecheck = runner_conf.get("spark.sql.execution.pandas.arrowSafeTypeConversion",
                                     "false").lower() == 'true'
-        # NOTE: this is duplicated from wrap_grouped_map_pandas_udf
+        # Used by SQL_GROUPED_MAP_PANDAS_UDF and SQL_SCALAR_PANDAS_UDF when returning StructType
         assign_cols_by_name = runner_conf.get(
             "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true")\
             .lower() == "true"
 
-        ser = ArrowStreamPandasSerializer(timezone, safecheck, assign_cols_by_name)
+        ser = ArrowStreamPandasUDFSerializer(timezone, safecheck, assign_cols_by_name)
     else:
         ser = BatchedSerializer(PickleSerializer(), 100)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
index 2bf6a58b5565..884dc8c6215f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -31,7 +31,6 @@ import org.apache.arrow.vector.ipc.message.{ArrowRecordBatch, MessageSerializer}
 import org.apache.spark.TaskContext
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
index e9cff1a5a200..ce755ffb7c9f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistrib
 import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 
 /**
  * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInPandas]]
@@ -145,7 +146,16 @@ case class FlatMapGroupsInPandasExec(
         sessionLocalTimeZone,
         pythonRunnerConf).compute(grouped, context.partitionId(), context)
 
-      columnarBatchIter.flatMap(_.rowIterator.asScala).map(UnsafeProjection.create(output, output))
+      val unsafeProj = UnsafeProjection.create(output, output)
+
+      columnarBatchIter.flatMap { batch =>
+        // Grouped Map UDF returns a StructType column in ColumnarBatch, select the children here
+        val structVector = batch.column(0).asInstanceOf[ArrowColumnVector]
+        val outputVectors = output.indices.map(structVector.getChild)
+        val flattenedBatch = new ColumnarBatch(outputVectors.toArray)
+        flattenedBatch.setNumRows(batch.numRows())
+        flattenedBatch.rowIterator.asScala
+      }.map(unsafeProj)
     }
   }
 }

From 22c9ed6a9c1428137a2a56c323d79b0c8442ffcf Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Thu, 21 Mar 2019 06:41:04 -0500
Subject: [PATCH 0738/1072] [MINOR][SQL] Put the grammar of database together,
 because this is good for maintenance and readability.

## What changes were proposed in this pull request?

The SQL grammar `SHOW DATABASES` is mixed in some grammar of table. I think should arrange the grammar of database together.
This is good for maintenance and readability.

## How was this patch tested?

No UT

Closes #24138 from beliefer/arrange-sql-grammar.

Authored-by: gengjiaan <gengjiaan@360.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 77d30fbc620c..8bd7df5a3d22 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -83,6 +83,7 @@ statement
         (WITH DBPROPERTIES tablePropertyList)?                         #createDatabase
     | ALTER database identifier SET DBPROPERTIES tablePropertyList     #setDatabaseProperties
     | DROP database (IF EXISTS)? identifier (RESTRICT | CASCADE)?      #dropDatabase
+    | SHOW DATABASES (LIKE? pattern=STRING)?                           #showDatabases
     | createTableHeader ('(' colTypeList ')')? tableProvider
         ((OPTIONS options=tablePropertyList) |
         (PARTITIONED BY partitionColumnNames=identifierList) |
@@ -153,7 +154,6 @@ statement
         (LIKE? pattern=STRING)?                                        #showTables
     | SHOW TABLE EXTENDED ((FROM | IN) db=identifier)?
         LIKE pattern=STRING partitionSpec?                             #showTable
-    | SHOW DATABASES (LIKE? pattern=STRING)?                           #showDatabases
     | SHOW TBLPROPERTIES table=tableIdentifier
         ('(' key=tablePropertyKey ')')?                                #showTblProperties
     | SHOW COLUMNS (FROM | IN) tableIdentifier

From 0627850b7e1845147060bc298afb23d5939d4a87 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 21 Mar 2019 09:20:35 -0700
Subject: [PATCH 0739/1072] [SPARK-25196][SQL] Extends the analyze column
 command for cached tables

## What changes were proposed in this pull request?
This pr extended `ANALYZE` commands to analyze column stats for cached table.

In common use cases, users read catalog table data, join/aggregate them, and then cache the result for following reuse. Since we are only allowed to analyze column statistics in catalog tables via ANALYZE commands, the current optimization depends on non-existing or inaccurate column statistics of cached data. So, it would be great if we could analyze cached data as follows;

```scala
scala> def printColumnStats(tableName: String) = {
     |   spark.table(tableName).queryExecution.optimizedPlan.stats.attributeStats.foreach {
     |     case (k, v) => println(s"[$k]: $v")
     |   }
     | }

scala> sql("SET spark.sql.cbo.enabled=true")
scala> sql("SET spark.sql.statistics.histogram.enabled=true")

scala> spark.range(1000).selectExpr("id % 33 AS c0", "rand() AS c1", "0 AS c2").write.saveAsTable("t")
scala> sql("ANALYZE TABLE t COMPUTE STATISTICS FOR COLUMNS c0, c1, c2")
scala> spark.table("t").groupBy("c0").agg(count("c1").as("v1"), sum("c2").as("v2")).createTempView("temp")

// Prints column statistics in catalog table `t`
scala> printColumnStats("t")
[c0#7073L]: ColumnStat(Some(33),Some(0),Some(32),Some(0),Some(8),Some(8),Some(Histogram(3.937007874015748,[Lorg.apache.spark.sql.catalyst.plans.logical.HistogramBin;9f7c1c)),2)
[c1#7074]: ColumnStat(Some(944),Some(3.2108484832404915E-4),Some(0.997584797423909),Some(0),Some(8),Some(8),Some(Histogram(3.937007874015748,[Lorg.apache.spark.sql.catalyst.plans.logical.HistogramBin;60a386b1)),2)
[c2#7075]: ColumnStat(Some(1),Some(0),Some(0),Some(0),Some(4),Some(4),Some(Histogram(3.937007874015748,[Lorg.apache.spark.sql.catalyst.plans.logical.HistogramBin;5ffd29e8)),2)

// Prints column statistics on cached table `temp`
scala> sql("CACHE TABLE temp")
scala> printColumnStats("temp")
<No Column Statistics>

// Analyzes columns `v1` and `v2` on cached table `temp`
scala> sql("ANALYZE TABLE temp COMPUTE STATISTICS FOR COLUMNS v1, v2")

// Then, prints again
scala> printColumnStats("temp")
[v1#7084L]: ColumnStat(Some(2),Some(30),Some(31),Some(0),Some(8),Some(8),Some(Histogram(0.12992125984251968,[Lorg.apache.spark.sql.catalyst.plans.logical.HistogramBin;49f7bb6f)),2)
[v2#7086L]: ColumnStat(Some(1),Some(0),Some(0),Some(0),Some(8),Some(8),Some(Histogram(0.12992125984251968,[Lorg.apache.spark.sql.catalyst.plans.logical.HistogramBin;12701677)),2)

// Analyzes one left column and prints again
scala> sql("ANALYZE TABLE temp COMPUTE STATISTICS FOR COLUMNS c0")
scala> printColumnStats("temp")
[v1#7084L]: ColumnStat(Some(2),Some(30),Some(31),Some(0),Some(8),Some(8),Some(Histogram(0.12992125984251968,[Lorg.apache.spark.sql.catalyst.plans.logical.HistogramBin;49f7bb6f)),2)
[v2#7086L]: ColumnStat(Some(1),Some(0),Some(0),Some(0),Some(8),Some(8),Some(Histogram(0.12992125984251968,[Lorg.apache.spark.sql.catalyst.plans.logical.HistogramBin;12701677)),2)
[c0#7073L]: ColumnStat(Some(33),Some(0),Some(32),Some(0),Some(8),Some(8),Some(Histogram(0.12992125984251968,[Lorg.apache.spark.sql.catalyst.plans.logical.HistogramBin;1f5c1b81)),2)
```

## How was this patch tested?
Added tests in `CachedTableSuite` and `StatisticsCollectionSuite`.

Closes #24047 from maropu/SPARK-25196-4.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/CacheManager.scala    | 19 +++-
 .../execution/columnar/InMemoryRelation.scala |  4 +-
 .../command/AnalyzeColumnCommand.scala        | 86 ++++++++++++++-----
 .../sql/execution/command/CommandUtils.scala  |  6 +-
 .../apache/spark/sql/CachedTableSuite.scala   | 29 +++++++
 .../spark/sql/StatisticsCollectionSuite.scala | 74 ++++++++++++++++
 6 files changed, 192 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 5a11a8f76a2d..612f6938a4b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -25,9 +25,10 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Dataset, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
+import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
@@ -154,6 +155,22 @@ class CacheManager extends Logging {
     }
   }
 
+  // Analyzes column statistics in the given cache data
+  private[sql] def analyzeColumnCacheQuery(
+      sparkSession: SparkSession,
+      cachedData: CachedData,
+      column: Seq[Attribute]): Unit = {
+    val relation = cachedData.cachedRepresentation
+    val (rowCount, newColStats) =
+      CommandUtils.computeColumnStats(sparkSession, relation, column)
+    val oldStats = relation.statsOfPlanToCache
+    val newStats = oldStats.copy(
+      rowCount = Some(rowCount),
+      attributeStats = AttributeMap((oldStats.attributeStats ++ newColStats).toSeq)
+    )
+    relation.statsOfPlanToCache = newStats
+  }
+
   /**
    * Tries to re-cache all the cache entries that refer to the given plan.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 7180853e4cd8..fcc346839b27 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -159,7 +159,7 @@ case class InMemoryRelation(
     output: Seq[Attribute],
     @transient cacheBuilder: CachedRDDBuilder,
     override val outputOrdering: Seq[SortOrder])(
-    statsOfPlanToCache: Statistics)
+    @volatile var statsOfPlanToCache: Statistics)
   extends logical.LeafNode with MultiInstanceRelation {
 
   override protected def innerChildren: Seq[SparkPlan] = Seq(cachedPlan)
@@ -181,7 +181,7 @@ case class InMemoryRelation(
       // Underlying columnar RDD hasn't been materialized, use the stats from the plan to cache.
       statsOfPlanToCache
     } else {
-      Statistics(sizeInBytes = cacheBuilder.sizeInBytesStats.value.longValue)
+      statsOfPlanToCache.copy(sizeInBytes = cacheBuilder.sizeInBytesStats.value.longValue)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 5d91f33b8256..501789307792 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -39,30 +40,41 @@ case class AnalyzeColumnCommand(
     require(columnNames.isDefined ^ allColumns, "Parameter `columnNames` or `allColumns` are " +
       "mutually exclusive. Only one of them should be specified.")
     val sessionState = sparkSession.sessionState
-    val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase)
-    val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db))
-    val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB)
-    if (tableMeta.tableType == CatalogTableType.VIEW) {
-      throw new AnalysisException("ANALYZE TABLE is not supported on views.")
-    }
-    val sizeInBytes = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
-    val relation = sparkSession.table(tableIdent).logicalPlan
-    val columnsToAnalyze = getColumnsToAnalyze(tableIdent, relation, columnNames, allColumns)
 
-    // Compute stats for the computed list of columns.
-    val (rowCount, newColStats) =
-      CommandUtils.computeColumnStats(sparkSession, relation, columnsToAnalyze)
+    tableIdent.database match {
+      case Some(db) if db == sparkSession.sharedState.globalTempViewManager.database =>
+        val plan = sessionState.catalog.getGlobalTempView(tableIdent.identifier).getOrElse {
+          throw new NoSuchTableException(db = db, table = tableIdent.identifier)
+        }
+        analyzeColumnInTempView(plan, sparkSession)
+      case Some(_) =>
+        analyzeColumnInCatalog(sparkSession)
+      case None =>
+        sessionState.catalog.getTempView(tableIdent.identifier) match {
+          case Some(tempView) => analyzeColumnInTempView(tempView, sparkSession)
+          case _ => analyzeColumnInCatalog(sparkSession)
+        }
+    }
 
-    // We also update table-level stats in order to keep them consistent with column-level stats.
-    val statistics = CatalogStatistics(
-      sizeInBytes = sizeInBytes,
-      rowCount = Some(rowCount),
-      // Newly computed column stats should override the existing ones.
-      colStats = tableMeta.stats.map(_.colStats).getOrElse(Map.empty) ++ newColStats)
+    Seq.empty[Row]
+  }
 
-    sessionState.catalog.alterTableStats(tableIdentWithDB, Some(statistics))
+  private def analyzeColumnInCachedData(plan: LogicalPlan, sparkSession: SparkSession): Boolean = {
+    val cacheManager = sparkSession.sharedState.cacheManager
+    cacheManager.lookupCachedData(plan).map { cachedData =>
+      val columnsToAnalyze = getColumnsToAnalyze(
+        tableIdent, cachedData.plan, columnNames, allColumns)
+      cacheManager.analyzeColumnCacheQuery(sparkSession, cachedData, columnsToAnalyze)
+      cachedData
+    }.isDefined
+  }
 
-    Seq.empty[Row]
+  private def analyzeColumnInTempView(plan: LogicalPlan, sparkSession: SparkSession): Unit = {
+    if (!analyzeColumnInCachedData(plan, sparkSession)) {
+      val catalog = sparkSession.sessionState.catalog
+      val db = tableIdent.database.getOrElse(catalog.getCurrentDatabase)
+      throw new NoSuchTableException(db = db, table = tableIdent.identifier)
+    }
   }
 
   private def getColumnsToAnalyze(
@@ -89,6 +101,40 @@ case class AnalyzeColumnCommand(
     columnsToAnalyze
   }
 
+  private def analyzeColumnInCatalog(sparkSession: SparkSession): Unit = {
+    val sessionState = sparkSession.sessionState
+    val tableMeta = sessionState.catalog.getTableMetadata(tableIdent)
+    if (tableMeta.tableType == CatalogTableType.VIEW) {
+      // Analyzes a catalog view if the view is cached
+      val plan = sparkSession.table(tableIdent.quotedString).logicalPlan
+      if (!analyzeColumnInCachedData(plan, sparkSession)) {
+        throw new AnalysisException("ANALYZE TABLE is not supported on views.")
+      }
+    } else {
+      val sizeInBytes = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
+      val relation = sparkSession.table(tableIdent).logicalPlan
+      val columnsToAnalyze = getColumnsToAnalyze(tableIdent, relation, columnNames, allColumns)
+
+      // Compute stats for the computed list of columns.
+      val (rowCount, newColStats) =
+        CommandUtils.computeColumnStats(sparkSession, relation, columnsToAnalyze)
+
+      val newColCatalogStats = newColStats.map {
+        case (attr, columnStat) =>
+          attr.name -> columnStat.toCatalogColumnStat(attr.name, attr.dataType)
+      }
+
+      // We also update table-level stats in order to keep them consistent with column-level stats.
+      val statistics = CatalogStatistics(
+        sizeInBytes = sizeInBytes,
+        rowCount = Some(rowCount),
+        // Newly computed column stats should override the existing ones.
+        colStats = tableMeta.stats.map(_.colStats).getOrElse(Map.empty) ++ newColCatalogStats)
+
+      sessionState.catalog.alterTableStats(tableIdent, Some(statistics))
+    }
+  }
+
   /** Returns true iff the we support gathering column statistics on column of the given type. */
   private def supportsType(dataType: DataType): Boolean = dataType match {
     case _: IntegralType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index 0ea928b0e89f..dea1e017b2e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -168,7 +168,7 @@ object CommandUtils extends Logging {
   private[sql] def computeColumnStats(
       sparkSession: SparkSession,
       relation: LogicalPlan,
-      columns: Seq[Attribute]): (Long, Map[String, CatalogColumnStat]) = {
+      columns: Seq[Attribute]): (Long, Map[Attribute, ColumnStat]) = {
     val conf = sparkSession.sessionState.conf
 
     // Collect statistics per column.
@@ -195,8 +195,8 @@ object CommandUtils extends Logging {
     val rowCount = statsRow.getLong(0)
     val columnStats = columns.zipWithIndex.map { case (attr, i) =>
       // according to `statExprs`, the stats struct always have 7 fields.
-      (attr.name, rowToColumnStat(statsRow.getStruct(i + 1, 7), attr, rowCount,
-        attributePercentiles.get(attr)).toCatalogColumnStat(attr.name, attr.dataType))
+      (attr, rowToColumnStat(statsRow.getStruct(i + 1, 7), attr, rowCount,
+        attributePercentiles.get(attr)))
     }.toMap
     (rowCount, columnStats)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 2141be4d680f..47e745f19166 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -946,4 +946,33 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     // Clean-up
     df.unpersist()
   }
+
+  test("analyzes column statistics in cached query") {
+    def query(): DataFrame = {
+      spark.range(100)
+        .selectExpr("id % 3 AS c0", "id % 5 AS c1", "2 AS c2")
+        .groupBy("c0")
+        .agg(avg("c1").as("v1"), sum("c2").as("v2"))
+    }
+    // First, checks if there is no column statistic in cached query
+    val queryStats1 = query().cache.queryExecution.optimizedPlan.stats.attributeStats
+    assert(queryStats1.map(_._1.name).isEmpty)
+
+    val cacheManager = spark.sharedState.cacheManager
+    val cachedData = cacheManager.lookupCachedData(query().logicalPlan)
+    assert(cachedData.isDefined)
+    val queryAttrs = cachedData.get.plan.output
+    assert(queryAttrs.size === 3)
+    val (c0, v1, v2) = (queryAttrs(0), queryAttrs(1), queryAttrs(2))
+
+    // Analyzes one column in the query output
+    cacheManager.analyzeColumnCacheQuery(spark, cachedData.get, v1 :: Nil)
+    val queryStats2 = query().queryExecution.optimizedPlan.stats.attributeStats
+    assert(queryStats2.map(_._1.name).toSet === Set("v1"))
+
+    // Analyzes two more columns
+    cacheManager.analyzeColumnCacheQuery(spark, cachedData.get, c0 :: v2 :: Nil)
+    val queryStats3 = query().queryExecution.optimizedPlan.stats.attributeStats
+    assert(queryStats3.map(_._1.name).toSet === Set("c0", "v1", "v2"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 7ba9f9ff7aa3..b76678f2a4f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -24,6 +24,7 @@ import java.util.concurrent.TimeUnit
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
@@ -470,4 +471,77 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       }
     }
   }
+
+  def getStatAttrNames(tableName: String): Set[String] = {
+    val queryStats = spark.table(tableName).queryExecution.optimizedPlan.stats.attributeStats
+    queryStats.map(_._1.name).toSet
+  }
+
+  test("analyzes column statistics in cached query") {
+    withTempView("cachedQuery") {
+      sql(
+        """CACHE TABLE cachedQuery AS
+          |  SELECT c0, avg(c1) AS v1, avg(c2) AS v2
+          |  FROM (SELECT id % 3 AS c0, id % 5 AS c1, 2 AS c2 FROM range(1, 30))
+          |  GROUP BY c0
+        """.stripMargin)
+
+      // Analyzes one column in the cached logical plan
+      sql("ANALYZE TABLE cachedQuery COMPUTE STATISTICS FOR COLUMNS v1")
+      assert(getStatAttrNames("cachedQuery") === Set("v1"))
+
+      // Analyzes two more columns
+      sql("ANALYZE TABLE cachedQuery COMPUTE STATISTICS FOR COLUMNS c0, v2")
+      assert(getStatAttrNames("cachedQuery")  === Set("c0", "v1", "v2"))
+    }
+  }
+
+  test("analyzes column statistics in cached local temporary view") {
+    withTempView("tempView") {
+      // Analyzes in a temporary view
+      sql("CREATE TEMPORARY VIEW tempView AS SELECT * FROM range(1, 30)")
+      val errMsg = intercept[AnalysisException] {
+        sql("ANALYZE TABLE tempView COMPUTE STATISTICS FOR COLUMNS id")
+      }.getMessage
+      assert(errMsg.contains(s"Table or view 'tempView' not found in database 'default'"))
+
+      // Cache the view then analyze it
+      sql("CACHE TABLE tempView")
+      assert(getStatAttrNames("tempView") !== Set("id"))
+      sql("ANALYZE TABLE tempView COMPUTE STATISTICS FOR COLUMNS id")
+      assert(getStatAttrNames("tempView") === Set("id"))
+    }
+  }
+
+  test("analyzes column statistics in cached global temporary view") {
+    withGlobalTempView("gTempView") {
+      val globalTempDB = spark.sharedState.globalTempViewManager.database
+      val errMsg1 = intercept[NoSuchTableException] {
+        sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
+      }.getMessage
+      assert(errMsg1.contains(s"Table or view 'gTempView' not found in database '$globalTempDB'"))
+      // Analyzes in a global temporary view
+      sql("CREATE GLOBAL TEMP VIEW gTempView AS SELECT * FROM range(1, 30)")
+      val errMsg2 = intercept[AnalysisException] {
+        sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
+      }.getMessage
+      assert(errMsg2.contains(s"Table or view 'gTempView' not found in database '$globalTempDB'"))
+
+      // Cache the view then analyze it
+      sql(s"CACHE TABLE $globalTempDB.gTempView")
+      assert(getStatAttrNames(s"$globalTempDB.gTempView") !== Set("id"))
+      sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
+      assert(getStatAttrNames(s"$globalTempDB.gTempView") === Set("id"))
+    }
+  }
+
+  test("analyzes column statistics in cached catalog view") {
+    withTempDatabase { database =>
+      sql(s"CREATE VIEW $database.v AS SELECT 1 c")
+      sql(s"CACHE TABLE $database.v")
+      assert(getStatAttrNames(s"$database.v") !== Set("id"))
+      sql(s"ANALYZE TABLE $database.v COMPUTE STATISTICS FOR COLUMNS c")
+      assert(getStatAttrNames(s"$database.v") !== Set("id"))
+    }
+  }
 }

From 0f4f8160e6d01d2e263adcf39d53bd0a03fc1b73 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 21 Mar 2019 12:57:32 -0700
Subject: [PATCH 0740/1072] [SPARK-27222][SQL] Support Instant and LocalDate in
 Literal.apply

## What changes were proposed in this pull request?

In the PR, I propose to extend `Literal.apply` to support constructing literals of `TimestampType` and `DateType` from `java.time.Instant` and `java.time.LocalDate`. The java classes have been already supported as external types for `TimestampType` and `DateType` by the PRs #23811  and #23913.

## How was this patch tested?

Added new tests to `LiteralExpressionSuite`.

Closes #24161 from MaxGekk/literal-instant-localdate.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/expressions/literals.scala   |  6 ++
 .../expressions/LiteralExpressionSuite.scala  | 55 ++++++++++++++++++-
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index d7ee22fe1bbe..8df2dcc01757 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -27,6 +27,7 @@ import java.lang.{Short => JavaShort}
 import java.math.{BigDecimal => JavaBigDecimal}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
+import java.time.{Instant, LocalDate}
 import java.util
 import java.util.Objects
 import javax.xml.bind.DatatypeConverter
@@ -41,6 +42,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils, MapData}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types._
 import org.apache.spark.util.Utils
@@ -64,7 +66,9 @@ object Literal {
     case d: JavaBigDecimal =>
       Literal(Decimal(d), DecimalType(Math.max(d.precision, d.scale), d.scale()))
     case d: Decimal => Literal(d, DecimalType(Math.max(d.precision, d.scale), d.scale))
+    case i: Instant => Literal(instantToMicros(i), TimestampType)
     case t: Timestamp => Literal(DateTimeUtils.fromJavaTimestamp(t), TimestampType)
+    case ld: LocalDate => Literal(ld.toEpochDay.toInt, DateType)
     case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
     case a: Array[Byte] => Literal(a, BinaryType)
     case a: collection.mutable.WrappedArray[_] => apply(a.array)
@@ -96,7 +100,9 @@ object Literal {
     case JavaBoolean.TYPE => BooleanType
 
     // java classes
+    case _ if clz == classOf[LocalDate] => DateType
     case _ if clz == classOf[Date] => DateType
+    case _ if clz == classOf[Instant] => TimestampType
     case _ if clz == classOf[Timestamp] => TimestampType
     case _ if clz == classOf[JavaBigDecimal] => DecimalType.SYSTEM_DEFAULT
     case _ if clz == classOf[Array[Byte]] => BinaryType
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 995d7b455765..717e02009bb0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.nio.charset.StandardCharsets
+import java.time.{Instant, LocalDate}
 
 import scala.reflect.runtime.universe.{typeTag, TypeTag}
 
@@ -26,6 +27,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection}
 import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -64,8 +66,14 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Literal.default(BinaryType), "".getBytes(StandardCharsets.UTF_8))
     checkEvaluation(Literal.default(DecimalType.USER_DEFAULT), Decimal(0))
     checkEvaluation(Literal.default(DecimalType.SYSTEM_DEFAULT), Decimal(0))
-    checkEvaluation(Literal.default(DateType), DateTimeUtils.toJavaDate(0))
-    checkEvaluation(Literal.default(TimestampType), DateTimeUtils.toJavaTimestamp(0L))
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "false") {
+      checkEvaluation(Literal.default(DateType), DateTimeUtils.toJavaDate(0))
+      checkEvaluation(Literal.default(TimestampType), DateTimeUtils.toJavaTimestamp(0L))
+    }
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+      checkEvaluation(Literal.default(DateType), LocalDate.ofEpochDay(0))
+      checkEvaluation(Literal.default(TimestampType), Instant.ofEpochSecond(0))
+    }
     checkEvaluation(Literal.default(CalendarIntervalType), new CalendarInterval(0, 0L))
     checkEvaluation(Literal.default(ArrayType(StringType)), Array())
     checkEvaluation(Literal.default(MapType(IntegerType, StringType)), Map())
@@ -228,4 +236,47 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Literal('\u0000'), "\u0000")
     checkEvaluation(Literal.create('\n'), "\n")
   }
+
+  test("construct literals from java.time.LocalDate") {
+    Seq(
+      LocalDate.of(1, 1, 1),
+      LocalDate.of(1582, 10, 1),
+      LocalDate.of(1600, 7, 30),
+      LocalDate.of(1969, 12, 31),
+      LocalDate.of(1970, 1, 1),
+      LocalDate.of(2019, 3, 20),
+      LocalDate.of(2100, 5, 17)).foreach { localDate =>
+      checkEvaluation(Literal(localDate), localDate)
+    }
+  }
+
+  test("construct literals from arrays of java.time.LocalDate") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+      val localDate0 = LocalDate.of(2019, 3, 20)
+      checkEvaluation(Literal(Array(localDate0)), Array(localDate0))
+      val localDate1 = LocalDate.of(2100, 4, 22)
+      checkEvaluation(Literal(Array(localDate0, localDate1)), Array(localDate0, localDate1))
+    }
+  }
+
+  test("construct literals from java.time.Instant") {
+    Seq(
+      Instant.parse("0001-01-01T00:00:00Z"),
+      Instant.parse("1582-10-01T01:02:03Z"),
+      Instant.parse("1970-02-28T11:12:13Z"),
+      Instant.ofEpochMilli(0),
+      Instant.parse("2019-03-20T10:15:30Z"),
+      Instant.parse("2100-12-31T22:17:31Z")).foreach { instant =>
+      checkEvaluation(Literal(instant), instant)
+    }
+  }
+
+  test("construct literals from arrays of java.time.Instant") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+      val instant0 = Instant.ofEpochMilli(0)
+      checkEvaluation(Literal(Array(instant0)), Array(instant0))
+      val instant1 = Instant.parse("2019-03-20T10:15:30Z")
+      checkEvaluation(Literal(Array(instant0, instant1)), Array(instant0, instant1))
+    }
+  }
 }

From 80565ce253c6efc0d5b5966122d3e81262e5c82f Mon Sep 17 00:00:00 2001
From: John Zhuge <jzhuge@apache.org>
Date: Thu, 21 Mar 2019 18:04:50 -0700
Subject: [PATCH 0741/1072] [SPARK-26946][SQL] Identifiers for multi-catalog

## What changes were proposed in this pull request?

- Support N-part identifier in SQL
- N-part identifier extractor in Analyzer

## How was this patch tested?

- A new unit test suite ResolveMultipartRelationSuite
- CatalogLoadingSuite

rblue cloud-fan mccheah

Closes #23848 from jzhuge/SPARK-26946.

Authored-by: John Zhuge <jzhuge@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  8 ++
 .../apache/spark/sql/catalog/v2/Catalogs.java |  8 +-
 .../spark/sql/catalog/v2/Identifier.java      | 41 ++++++++
 .../spark/sql/catalog/v2/IdentifierImpl.java  | 45 +++++++++
 .../catalog/v2/CatalogNotFoundException.scala | 28 ++++++
 .../spark/sql/catalog/v2/LookupCatalog.scala  | 74 ++++++++++++++
 .../sql/catalyst/analysis/Analyzer.scala      | 11 ++-
 .../sql/catalyst/parser/AstBuilder.scala      | 13 +++
 .../sql/catalyst/parser/ParseDriver.scala     |  7 ++
 .../sql/catalyst/parser/ParserInterface.scala |  6 ++
 .../sql/catalog/v2/CatalogLoadingSuite.java   |  3 +-
 .../v2/ResolveMultipartIdentifierSuite.scala  | 99 +++++++++++++++++++
 .../sql/SparkSessionExtensionSuite.scala      |  3 +
 13 files changed, 340 insertions(+), 6 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/ResolveMultipartIdentifierSuite.scala

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 8bd7df5a3d22..1f7da190ce46 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -63,6 +63,10 @@ singleTableIdentifier
     : tableIdentifier EOF
     ;
 
+singleMultipartIdentifier
+    : multipartIdentifier EOF
+    ;
+
 singleFunctionIdentifier
     : functionIdentifier EOF
     ;
@@ -554,6 +558,10 @@ rowFormat
       (NULL DEFINED AS nullDefinedAs=STRING)?                                       #rowFormatDelimited
     ;
 
+multipartIdentifier
+    : parts+=identifier ('.' parts+=identifier)*
+    ;
+
 tableIdentifier
     : (db=identifier '.')? table=identifier
     ;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
index aa4cbfcef8f5..851a6a9f6d16 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
@@ -44,12 +44,14 @@ private Catalogs() {
    * @param name a String catalog name
    * @param conf a SQLConf
    * @return an initialized CatalogPlugin
-   * @throws SparkException If the plugin class cannot be found or instantiated
+   * @throws CatalogNotFoundException if the plugin class cannot be found
+   * @throws SparkException if the plugin class cannot be instantiated
    */
-  public static CatalogPlugin load(String name, SQLConf conf) throws SparkException {
+  public static CatalogPlugin load(String name, SQLConf conf)
+      throws CatalogNotFoundException, SparkException {
     String pluginClassName = conf.getConfString("spark.sql.catalog." + name, null);
     if (pluginClassName == null) {
-      throw new SparkException(String.format(
+      throw new CatalogNotFoundException(String.format(
           "Catalog '%s' plugin class not found: spark.sql.catalog.%s is not defined", name, name));
     }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java
new file mode 100644
index 000000000000..3e697c1945bf
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Identifies an object in a catalog.
+ */
+@Experimental
+public interface Identifier {
+
+  static Identifier of(String[] namespace, String name) {
+    return new IdentifierImpl(namespace, name);
+  }
+
+  /**
+   * @return the namespace in the catalog
+   */
+  String[] namespace();
+
+  /**
+   * @return the object name
+   */
+  String name();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
new file mode 100644
index 000000000000..8874faa71b5b
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ *  An {@link Identifier} implementation.
+ */
+@Experimental
+class IdentifierImpl implements Identifier {
+
+  private String[] namespace;
+  private String name;
+
+  IdentifierImpl(String[] namespace, String name) {
+    this.namespace = namespace;
+    this.name = name;
+  }
+
+  @Override
+  public String[] namespace() {
+    return namespace;
+  }
+
+  @Override
+  public String name() {
+    return name;
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala
new file mode 100644
index 000000000000..86de1c9285b7
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2
+
+import org.apache.spark.SparkException
+import org.apache.spark.annotation.Experimental
+
+@Experimental
+class CatalogNotFoundException(message: String, cause: Throwable)
+  extends SparkException(message, cause) {
+
+  def this(message: String) = this(message, null)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala
new file mode 100644
index 000000000000..932d32022702
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.TableIdentifier
+
+/**
+ * A trait to encapsulate catalog lookup function and helpful extractors.
+ */
+@Experimental
+trait LookupCatalog {
+
+  def lookupCatalog: Option[(String) => CatalogPlugin] = None
+
+  type CatalogObjectIdentifier = (Option[CatalogPlugin], Identifier)
+
+  /**
+   * Extract catalog plugin and identifier from a multi-part identifier.
+   */
+  object CatalogObjectIdentifier {
+    def unapply(parts: Seq[String]): Option[CatalogObjectIdentifier] = lookupCatalog.map { lookup =>
+      parts match {
+        case Seq(name) =>
+          (None, Identifier.of(Array.empty, name))
+        case Seq(catalogName, tail @ _*) =>
+          try {
+            val catalog = lookup(catalogName)
+            (Some(catalog), Identifier.of(tail.init.toArray, tail.last))
+          } catch {
+            case _: CatalogNotFoundException =>
+              (None, Identifier.of(parts.init.toArray, parts.last))
+          }
+      }
+    }
+  }
+
+  /**
+   * Extract legacy table identifier from a multi-part identifier.
+   *
+   * For legacy support only. Please use
+   * [[org.apache.spark.sql.catalog.v2.LookupCatalog.CatalogObjectIdentifier]] in DSv2 code paths.
+   */
+  object AsTableIdentifier {
+    def unapply(parts: Seq[String]): Option[TableIdentifier] = parts match {
+      case CatalogObjectIdentifier(None, ident) =>
+        ident.namespace match {
+          case Array() =>
+            Some(TableIdentifier(ident.name))
+          case Array(database) =>
+            Some(TableIdentifier(ident.name, Some(database)))
+          case _ =>
+            None
+        }
+      case _ =>
+        None
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e4cf43d43586..e2162340f9dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalog.v2.{CatalogPlugin, LookupCatalog}
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
@@ -95,13 +96,19 @@ object AnalysisContext {
 class Analyzer(
     catalog: SessionCatalog,
     conf: SQLConf,
-    maxIterations: Int)
-  extends RuleExecutor[LogicalPlan] with CheckAnalysis {
+    maxIterations: Int,
+    override val lookupCatalog: Option[(String) => CatalogPlugin] = None)
+  extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog {
 
   def this(catalog: SessionCatalog, conf: SQLConf) = {
     this(catalog, conf, conf.optimizerMaxIterations)
   }
 
+  def this(lookupCatalog: Option[(String) => CatalogPlugin], catalog: SessionCatalog,
+      conf: SQLConf) = {
+    this(catalog, conf, conf.optimizerMaxIterations, lookupCatalog)
+  }
+
   def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
     AnalysisHelper.markInAnalyzer {
       val analyzed = executeAndTrack(plan, tracker)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 4cce1c1c70ad..b1d6be590dbf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -86,6 +86,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     visitFunctionIdentifier(ctx.functionIdentifier)
   }
 
+  override def visitSingleMultipartIdentifier(
+      ctx: SingleMultipartIdentifierContext): Seq[String] = withOrigin(ctx) {
+    visitMultipartIdentifier(ctx.multipartIdentifier)
+  }
+
   override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
     visitSparkDataType(ctx.dataType)
   }
@@ -957,6 +962,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     FunctionIdentifier(ctx.function.getText, Option(ctx.db).map(_.getText))
   }
 
+  /**
+   * Create a multi-part identifier.
+   */
+  override def visitMultipartIdentifier(
+      ctx: MultipartIdentifierContext): Seq[String] = withOrigin(ctx) {
+    ctx.parts.asScala.map(_.getText)
+  }
+
   /* ********************************************************************************************
    * Expression parsing
    * ******************************************************************************************** */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 1d5de0a35330..d5c7bb722921 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -57,6 +57,13 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     }
   }
 
+  /** Creates a multi-part identifier for a given SQL string */
+  override def parseMultipartIdentifier(sqlText: String): Seq[String] = {
+    parse(sqlText) { parser =>
+      astBuilder.visitSingleMultipartIdentifier(parser.singleMultipartIdentifier())
+    }
+  }
+
   /**
    * Creates StructType for a given SQL string, which is a comma separated list of field
    * definitions which will preserve the correct Hive metadata.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index 75240d219622..77e357ad073d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -52,6 +52,12 @@ trait ParserInterface {
   @throws[ParseException]("Text cannot be parsed to a FunctionIdentifier")
   def parseFunctionIdentifier(sqlText: String): FunctionIdentifier
 
+  /**
+   * Parse a string to a multi-part identifier.
+   */
+  @throws[ParseException]("Text cannot be parsed to a multi-part identifier")
+  def parseMultipartIdentifier(sqlText: String): Seq[String]
+
   /**
    * Parse a string to a [[StructType]]. The passed SQL string should be a comma separated list
    * of field definitions which will preserve the correct Hive metadata.
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
index 2f55da83e2a4..326b12f3618d 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
@@ -66,7 +66,8 @@ public void testInitializationOptions() throws SparkException {
   public void testLoadWithoutConfig() {
     SQLConf conf = new SQLConf();
 
-    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("missing", conf));
+    SparkException exc = intercept(CatalogNotFoundException.class,
+        () -> Catalogs.load("missing", conf));
 
     Assert.assertTrue("Should complain that implementation is not configured",
         exc.getMessage()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/ResolveMultipartIdentifierSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/ResolveMultipartIdentifierSuite.scala
new file mode 100644
index 000000000000..0f2d67eaa9b2
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/ResolveMultipartIdentifierSuite.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.catalog.v2
+
+import org.scalatest.Matchers._
+
+import org.apache.spark.sql.catalog.v2.{CatalogNotFoundException, CatalogPlugin}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+private class TestCatalogPlugin(override val name: String) extends CatalogPlugin {
+
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = Unit
+}
+
+class ResolveMultipartIdentifierSuite extends AnalysisTest {
+  import CatalystSqlParser._
+
+  private val analyzer = makeAnalyzer(caseSensitive = false)
+
+  private val catalogs = Seq("prod", "test").map(name => name -> new TestCatalogPlugin(name)).toMap
+
+  private def lookupCatalog(catalog: String): CatalogPlugin =
+    catalogs.getOrElse(catalog, throw new CatalogNotFoundException("Not found"))
+
+  private def makeAnalyzer(caseSensitive: Boolean) = {
+    val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
+    new Analyzer(Some(lookupCatalog _), null, conf)
+  }
+
+  override protected def getAnalyzer(caseSensitive: Boolean) = analyzer
+
+  private def checkResolution(sqlText: String, expectedCatalog: Option[CatalogPlugin],
+      expectedNamespace: Array[String], expectedName: String): Unit = {
+
+    import analyzer.CatalogObjectIdentifier
+    val CatalogObjectIdentifier(catalog, ident) = parseMultipartIdentifier(sqlText)
+    catalog shouldEqual expectedCatalog
+    ident.namespace shouldEqual expectedNamespace
+    ident.name shouldEqual expectedName
+  }
+
+  private def checkTableResolution(sqlText: String,
+      expectedIdent: Option[TableIdentifier]): Unit = {
+
+    import analyzer.AsTableIdentifier
+    parseMultipartIdentifier(sqlText) match {
+      case AsTableIdentifier(ident) =>
+        assert(Some(ident) === expectedIdent)
+      case _ =>
+        assert(None === expectedIdent)
+    }
+  }
+
+  test("resolve multipart identifier") {
+    checkResolution("tbl", None, Array.empty, "tbl")
+    checkResolution("db.tbl", None, Array("db"), "tbl")
+    checkResolution("prod.func", catalogs.get("prod"), Array.empty, "func")
+    checkResolution("ns1.ns2.tbl", None, Array("ns1", "ns2"), "tbl")
+    checkResolution("prod.db.tbl", catalogs.get("prod"), Array("db"), "tbl")
+    checkResolution("test.db.tbl", catalogs.get("test"), Array("db"), "tbl")
+    checkResolution("test.ns1.ns2.ns3.tbl",
+      catalogs.get("test"), Array("ns1", "ns2", "ns3"), "tbl")
+    checkResolution("`db.tbl`", None, Array.empty, "db.tbl")
+    checkResolution("parquet.`file:/tmp/db.tbl`", None, Array("parquet"), "file:/tmp/db.tbl")
+    checkResolution("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`", None,
+      Array("org.apache.spark.sql.json"), "s3://buck/tmp/abc.json")
+  }
+
+  test("resolve table identifier") {
+    checkTableResolution("tbl", Some(TableIdentifier("tbl")))
+    checkTableResolution("db.tbl", Some(TableIdentifier("tbl", Some("db"))))
+    checkTableResolution("prod.func", None)
+    checkTableResolution("ns1.ns2.tbl", None)
+    checkTableResolution("prod.db.tbl", None)
+    checkTableResolution("`db.tbl`", Some(TableIdentifier("db.tbl")))
+    checkTableResolution("parquet.`file:/tmp/db.tbl`",
+      Some(TableIdentifier("file:/tmp/db.tbl", Some("parquet"))))
+    checkTableResolution("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`",
+      Some(TableIdentifier("s3://buck/tmp/abc.json", Some("org.apache.spark.sql.json"))))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 9f33feb1950c..881268440ccd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -234,6 +234,9 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars
   override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier =
     delegate.parseFunctionIdentifier(sqlText)
 
+  override def parseMultipartIdentifier(sqlText: String): Seq[String] =
+    delegate.parseMultipartIdentifier(sqlText)
+
   override def parseTableSchema(sqlText: String): StructType =
     delegate.parseTableSchema(sqlText)
 

From 9f58d3b4369fe6129a8471b74dc2f933f3d417eb Mon Sep 17 00:00:00 2001
From: maryannxue <maryannxue@apache.org>
Date: Thu, 21 Mar 2019 19:18:30 -0700
Subject: [PATCH 0742/1072] [SPARK-27236][TEST] Refactor log-appender pattern
 in tests

## What changes were proposed in this pull request?

Refactored code in tests regarding the "withLogAppender()" pattern by creating a general helper method in SparkFunSuite.

## How was this patch tested?

Passed existing tests.

Closes #24172 from maryannxue/log-appender.

Authored-by: maryannxue <maryannxue@apache.org>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/SparkFunSuite.scala      | 25 +++++++++++++++++++
 .../expressions/CodeGenerationSuite.scala     | 11 +-------
 .../optimizer/OptimizerLoggingSuite.scala     | 14 ++---------
 .../execution/datasources/csv/CSVSuite.scala  | 10 ++------
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 7d114b1b0c14..34e386819667 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark
 // scalastyle:off
 import java.io.File
 
+import org.apache.log4j.{Appender, Level, Logger}
 import org.scalatest.{BeforeAndAfterAll, FunSuite, Outcome}
 
 import org.apache.spark.internal.Logging
@@ -117,4 +118,28 @@ abstract class SparkFunSuite
       Utils.deleteRecursively(dir)
     }
   }
+
+  /**
+   * Adds a log appender and optionally sets a log level to the root logger or the logger with
+   * the specified name, then executes the specified function, and in the end removes the log
+   * appender and restores the log level if necessary.
+   */
+  protected def withLogAppender(
+      appender: Appender,
+      loggerName: Option[String] = None,
+      level: Option[Level] = None)(
+      f: => Unit): Unit = {
+    val logger = loggerName.map(Logger.getLogger).getOrElse(Logger.getRootLogger)
+    val restoreLevel = logger.getLevel
+    logger.addAppender(appender)
+    if (level.isDefined) {
+      logger.setLevel(level.get)
+    }
+    try f finally {
+      logger.removeAppender(appender)
+      if (level.isDefined) {
+        logger.setLevel(restoreLevel)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 7d49866b1796..7d656fc57d75 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -529,7 +529,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
 
     val appender = new MockAppender()
-    withLogAppender(appender) {
+    withLogAppender(appender, loggerName = Some(classOf[CodeGenerator[_, _]].getName)) {
       val x = 42
       val expr = HugeCodeIntExpression(x)
       val proj = GenerateUnsafeProjection.generate(Seq(expr))
@@ -538,15 +538,6 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
     assert(appender.seenMessage)
   }
-
-  private def withLogAppender(appender: Appender)(f: => Unit): Unit = {
-    val logger =
-      Logger.getLogger(classOf[CodeGenerator[_, _]].getName)
-    logger.addAppender(appender)
-    try f finally {
-      logger.removeAppender(appender)
-    }
-  }
 }
 
 case class HugeCodeIntExpression(value: Int) extends Expression {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
index 915f408089fe..3e9b453c85ab 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
@@ -51,20 +51,10 @@ class OptimizerLoggingSuite extends PlanTest {
     override def requiresLayout(): Boolean = false
   }
 
-  private def withLogLevelAndAppender(level: Level, appender: Appender)(f: => Unit): Unit = {
-    val logger = Logger.getLogger(Optimize.getClass.getName.dropRight(1))
-    val restoreLevel = logger.getLevel
-    logger.setLevel(level)
-    logger.addAppender(appender)
-    try f finally {
-      logger.setLevel(restoreLevel)
-      logger.removeAppender(appender)
-    }
-  }
-
   private def verifyLog(expectedLevel: Level, expectedRules: Seq[String]): Unit = {
     val logAppender = new MockAppender()
-    withLogLevelAndAppender(Level.TRACE, logAppender) {
+    withLogAppender(logAppender,
+        loggerName = Some(Optimize.getClass.getName.dropRight(1)), level = Some(Level.TRACE)) {
       val input = LocalRelation('a.int, 'b.string, 'c.double)
       val query = input.select('a, 'b).select('a).where('a > 1).analyze
       val expected = input.where('a > 1).select('a).analyze
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index d9e5d7af1967..302db0305ff4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1697,21 +1697,17 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     }
 
     val testAppender1 = new TestAppender
-    LogManager.getRootLogger.addAppender(testAppender1)
-    try {
+    withLogAppender(testAppender1) {
       val ds = Seq("columnA,columnB", "1.0,1000.0").toDS()
       val ischema = new StructType().add("columnB", DoubleType).add("columnA", DoubleType)
 
       spark.read.schema(ischema).option("header", true).option("enforceSchema", true).csv(ds)
-    } finally {
-      LogManager.getRootLogger.removeAppender(testAppender1)
     }
     assert(testAppender1.events.asScala
       .exists(msg => msg.getRenderedMessage.contains("CSV header does not conform to the schema")))
 
     val testAppender2 = new TestAppender
-    LogManager.getRootLogger.addAppender(testAppender2)
-    try {
+    withLogAppender(testAppender2) {
       withTempPath { path =>
         val oschema = new StructType().add("f1", DoubleType).add("f2", DoubleType)
         val odf = spark.createDataFrame(List(Row(1.0, 1234.5)).asJava, oschema)
@@ -1724,8 +1720,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
           .csv(path.getCanonicalPath)
           .collect()
       }
-    } finally {
-      LogManager.getRootLogger.removeAppender(testAppender2)
     }
     assert(testAppender2.events.asScala
       .exists(msg => msg.getRenderedMessage.contains("CSV header does not conform to the schema")))

From a529be2930b1d69015f1ac8f85e590f197cf53cf Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Fri, 22 Mar 2019 18:01:29 +0900
Subject: [PATCH 0743/1072] [SPARK-27212][SQL] Eliminate TimeZone to ZoneId
 conversion in stringToTimestamp

## What changes were proposed in this pull request?

In the PR, I propose to avoid the `TimeZone` to `ZoneId` conversion in `DateTimeUtils.stringToTimestamp` by changing signature of the method, and require a parameter of `ZoneId` type. This will allow to avoid unnecessary conversion (`TimeZone` -> `String` -> `ZoneId`) per each row.

Also the PR avoids creation of `ZoneId` instances from `ZoneOffset` because `ZoneOffset` is a sub-class, and the conversion is unnecessary too.

## How was this patch tested?

It was tested by `DateTimeUtilsSuite` and `CastSuite`.

Closes #24155 from MaxGekk/stringtotimestamp-zoneid.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/expressions/Cast.scala |  6 +--
 .../sql/catalyst/parser/AstBuilder.scala      |  6 +--
 .../sql/catalyst/util/DateTimeUtils.scala     |  8 ++--
 .../expressions/HashExpressionsSuite.scala    |  8 ++--
 .../catalyst/util/DateTimeUtilsSuite.scala    | 46 ++++++++++---------
 .../sql/catalyst/util/UnsafeArraySuite.scala  |  9 +++-
 .../datasources/jdbc/JDBCRelation.scala       |  4 +-
 7 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index a70ed6d5c10e..72cb6b2a917e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -364,7 +364,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // TimestampConverter
   private[this] def castToTimestamp(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, timeZone).orNull)
+      buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull)
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1L else 0)
     case LongType =>
@@ -1017,12 +1017,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       from: DataType,
       ctx: CodegenContext): CastFunction = from match {
     case StringType =>
-      val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
+      val zid = ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId")
       val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]])
       (c, evPrim, evNull) =>
         code"""
           scala.Option<Long> $longOpt =
-            org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $tz);
+            org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid);
           if ($longOpt.isDefined()) {
             $evPrim = ((Long) $longOpt.get()).longValue();
           } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index b1d6be590dbf..38a61b873177 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getTimeZone, stringToDate, stringToTimestamp}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -1593,8 +1593,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       valueType match {
         case "DATE" => toLiteral(stringToDate, DateType)
         case "TIMESTAMP" =>
-          val timeZone = getTimeZone(SQLConf.get.sessionLocalTimeZone)
-          toLiteral(stringToTimestamp(_, timeZone), TimestampType)
+          val zoneId = getZoneId(SQLConf.get.sessionLocalTimeZone)
+          toLiteral(stringToTimestamp(_, zoneId), TimestampType)
         case "X" =>
           val padding = if (value.length % 2 != 0) "0" else ""
           Literal(DatatypeConverter.parseHexBinary(padding + value))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 45d2406e6624..3004336301ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -211,7 +211,7 @@ object DateTimeUtils {
    * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
    * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
    */
-  def stringToTimestamp(s: UTF8String, timeZone: TimeZone): Option[SQLTimestamp] = {
+  def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[SQLTimestamp] = {
     if (s == null) {
       return None
     }
@@ -320,10 +320,10 @@ object DateTimeUtils {
     }
     try {
       val zoneId = if (tz.isEmpty) {
-        timeZone.toZoneId
+        timeZoneId
       } else {
         val sign = if (tz.get.toChar == '-') -1 else 1
-        ZoneId.ofOffset("GMT", ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8)))
+        ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8))
       }
       val nanoseconds = MICROSECONDS.toNanos(segments(6))
       val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
@@ -411,7 +411,7 @@ object DateTimeUtils {
     segments(i) = currentSegmentValue
     try {
       val localDate = LocalDate.of(segments(0), segments(1), segments(2))
-      val instant = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toInstant
+      val instant = localDate.atStartOfDay(ZoneOffset.UTC).toInstant
       Some(instantToDays(instant))
     } catch {
       case NonFatal(_) => None
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index 79589c9d03f5..b5cfaf8f4b0f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.nio.charset.StandardCharsets
-import java.util.TimeZone
+import java.time.{ZoneId, ZoneOffset}
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -208,9 +208,9 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     def checkHiveHashForTimestampType(
         timestamp: String,
         expected: Long,
-        timeZone: TimeZone = TimeZone.getTimeZone("UTC")): Unit = {
+        zoneId: ZoneId = ZoneOffset.UTC): Unit = {
       checkHiveHash(
-        DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp), timeZone).get,
+        DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp), zoneId).get,
         TimestampType,
         expected)
     }
@@ -223,7 +223,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     // with different timezone
     checkHiveHashForTimestampType("2017-02-24 10:56:29", 1445732471,
-      TimeZone.getTimeZone("US/Pacific"))
+      DateTimeUtils.getZoneId("US/Pacific"))
 
     // boundary cases
     checkHiveHashForTimestampType("0001-01-01 00:00:00", 1645969984)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 464d0ab933b2..2de9c41a30b9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
+import java.time.ZoneId
 import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
@@ -31,12 +32,13 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
   val TimeZonePST = TimeZone.getTimeZone("PST")
   private def defaultTz = DateTimeUtils.defaultTimeZone()
+  private def defaultZoneId = ZoneId.systemDefault()
 
   test("nanoseconds truncation") {
     val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId)
     def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
       val parsedTimestampOp = DateTimeUtils.stringToTimestamp(
-        UTF8String.fromString(originalTime), defaultTz)
+        UTF8String.fromString(originalTime), defaultZoneId)
       assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly")
       assert(DateTimeUtils.timestampToString(tf, parsedTimestampOp.get) === expectedParsedTime)
     }
@@ -141,7 +143,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   test("string to timestamp") {
     for (tz <- ALL_TIMEZONES) {
       def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = {
-        assert(stringToTimestamp(UTF8String.fromString(str), tz) === expected)
+        assert(stringToTimestamp(UTF8String.fromString(str), tz.toZoneId) === expected)
       }
 
       checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, tz = tz)))
@@ -261,11 +263,11 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
     // Test stringToTimestamp
     assert(stringToTimestamp(
-      UTF8String.fromString("2015-02-29 00:00:00"), defaultTz).isEmpty)
+      UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId).isEmpty)
     assert(stringToTimestamp(
-      UTF8String.fromString("2015-04-31 00:00:00"), defaultTz).isEmpty)
-    assert(stringToTimestamp(UTF8String.fromString("2015-02-29"), defaultTz).isEmpty)
-    assert(stringToTimestamp(UTF8String.fromString("2015-04-31"), defaultTz).isEmpty)
+      UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("2015-02-29"), defaultZoneId).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("2015-04-31"), defaultZoneId).isEmpty)
   }
 
   test("hours") {
@@ -450,20 +452,20 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       val truncated =
         DateTimeUtils.truncTimestamp(inputTS, level, timezone)
       val expectedTS =
-        DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected), defaultTz)
+        DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected), defaultZoneId)
       assert(truncated === expectedTS.get)
     }
 
-    val defaultInputTS =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359"), defaultTz)
-    val defaultInputTS1 =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359"), defaultTz)
-    val defaultInputTS2 =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359"), defaultTz)
-    val defaultInputTS3 =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359"), defaultTz)
-    val defaultInputTS4 =
-      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359"), defaultTz)
+    val defaultInputTS = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId)
+    val defaultInputTS1 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId)
+    val defaultInputTS2 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId)
+    val defaultInputTS3 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId)
+    val defaultInputTS4 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId)
 
     testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get)
     testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get)
@@ -483,15 +485,15 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     for (tz <- ALL_TIMEZONES) {
       withDefaultTimeZone(tz) {
         val inputTS = DateTimeUtils.stringToTimestamp(
-          UTF8String.fromString("2015-03-05T09:32:05.359"), defaultTz)
+          UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId)
         val inputTS1 = DateTimeUtils.stringToTimestamp(
-          UTF8String.fromString("2015-03-31T20:32:05.359"), defaultTz)
+          UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId)
         val inputTS2 = DateTimeUtils.stringToTimestamp(
-          UTF8String.fromString("2015-04-01T02:32:05.359"), defaultTz)
+          UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId)
         val inputTS3 = DateTimeUtils.stringToTimestamp(
-          UTF8String.fromString("2015-03-30T02:32:05.359"), defaultTz)
+          UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId)
         val inputTS4 = DateTimeUtils.stringToTimestamp(
-          UTF8String.fromString("2015-03-29T02:32:05.359"), defaultTz)
+          UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId)
 
         testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz)
         testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
index 9d1eaa121979..29a2bcd643f1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.time.ZoneId
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
@@ -37,9 +39,12 @@ class UnsafeArraySuite extends SparkFunSuite {
     DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1")).get,
     DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26")).get)
   private def defaultTz = DateTimeUtils.defaultTimeZone()
+  private def defaultZoneId = ZoneId.systemDefault()
   val timestampArray = Array(
-    DateTimeUtils.stringToTimestamp(UTF8String.fromString("1970-1-1 00:00:00"), defaultTz).get,
-    DateTimeUtils.stringToTimestamp(UTF8String.fromString("2016-7-26 00:00:00"), defaultTz).get)
+    DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("1970-1-1 00:00:00"), defaultZoneId).get,
+    DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2016-7-26 00:00:00"), defaultZoneId).get)
   val decimalArray4_1 = Array(
     BigDecimal("123.4").setScale(1, BigDecimal.RoundingMode.FLOOR),
     BigDecimal("567.8").setScale(1, BigDecimal.RoundingMode.FLOOR))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index fe45d67a8398..3cd5cb164792 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -25,7 +25,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getTimeZone, stringToDate, stringToTimestamp}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.jdbc.JdbcDialects
 import org.apache.spark.sql.sources._
@@ -187,7 +187,7 @@ private[sql] object JDBCRelation extends Logging {
     columnType match {
       case _: NumericType => value.toLong
       case DateType => parse(stringToDate).toLong
-      case TimestampType => parse(stringToTimestamp(_, getTimeZone(timeZoneId)))
+      case TimestampType => parse(stringToTimestamp(_, getZoneId(timeZoneId)))
     }
   }
 

From 174531c183d058c6f92330ef1780e5a5c03d34f0 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 22 Mar 2019 05:28:46 -0500
Subject: [PATCH 0744/1072] [MINOR][CORE] Leverage modified Utils.classForName
 to reduce scalastyle off for Class.forName

## What changes were proposed in this pull request?

This patch modifies Utils.classForName to have optional parameters - initialize, noSparkClassLoader - to let callers of Class.forName with thread context classloader to use it instead. This helps to reduce scalastyle off for Class.forName.

## How was this patch tested?

Existing UTs.

Closes #24148 from HeartSaVioR/MINOR-reduce-scalastyle-off-for-class-forname.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/serializer/KryoSerializer.scala     | 35 +++++++++----------
 .../apache/spark/util/ClosureCleaner.scala    | 14 +++-----
 .../scala/org/apache/spark/util/Utils.scala   | 20 +++++++----
 .../scala/org/apache/spark/FileSuite.scala    | 15 +++-----
 .../KryoSerializerDistributedSuite.scala      |  6 ++--
 .../util/MutableURLClassLoaderSuite.scala     |  5 +--
 6 files changed, 41 insertions(+), 54 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 2df133dd2b13..eef19973e8d7 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -130,7 +130,6 @@ class KryoSerializer(conf: SparkConf)
     val kryo = instantiator.newKryo()
     kryo.setRegistrationRequired(registrationRequired)
 
-    val oldClassLoader = Thread.currentThread.getContextClassLoader
     val classLoader = defaultClassLoader.getOrElse(Thread.currentThread.getContextClassLoader)
 
     // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops.
@@ -156,24 +155,22 @@ class KryoSerializer(conf: SparkConf)
     kryo.register(classOf[GenericRecord], new GenericAvroSerializer(avroSchemas))
     kryo.register(classOf[GenericData.Record], new GenericAvroSerializer(avroSchemas))
 
-    try {
-      // scalastyle:off classforname
-      // Use the default classloader when calling the user registrator.
-      Thread.currentThread.setContextClassLoader(classLoader)
-      // Register classes given through spark.kryo.classesToRegister.
-      classesToRegister
-        .foreach { className => kryo.register(Class.forName(className, true, classLoader)) }
-      // Allow the user to register their own classes by setting spark.kryo.registrator.
-      userRegistrators
-        .map(Class.forName(_, true, classLoader).getConstructor().
-          newInstance().asInstanceOf[KryoRegistrator])
-        .foreach { reg => reg.registerClasses(kryo) }
-      // scalastyle:on classforname
-    } catch {
-      case e: Exception =>
-        throw new SparkException(s"Failed to register classes with Kryo", e)
-    } finally {
-      Thread.currentThread.setContextClassLoader(oldClassLoader)
+    // Use the default classloader when calling the user registrator.
+    Utils.withContextClassLoader(classLoader) {
+      try {
+        // Register classes given through spark.kryo.classesToRegister.
+        classesToRegister.foreach { className =>
+          kryo.register(Utils.classForName(className, noSparkClassLoader = true))
+        }
+        // Allow the user to register their own classes by setting spark.kryo.registrator.
+        userRegistrators
+          .map(Utils.classForName(_, noSparkClassLoader = true).getConstructor().
+            newInstance().asInstanceOf[KryoRegistrator])
+          .foreach { reg => reg.registerClasses(kryo) }
+      } catch {
+        case e: Exception =>
+          throw new SparkException(s"Failed to register classes with Kryo", e)
+      }
     }
 
     // Register Chill's classes; we do this after our ranges and the user's own classes to let
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 1b3e525644f0..5f725d86972b 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -378,10 +378,8 @@ private[spark] object ClosureCleaner extends Logging {
     } else {
       logDebug(s"Cleaning lambda: ${lambdaFunc.get.getImplMethodName}")
 
-      // scalastyle:off classforname
-      val captClass = Class.forName(lambdaFunc.get.getCapturingClass.replace('/', '.'),
-        false, Thread.currentThread.getContextClassLoader)
-      // scalastyle:on classforname
+      val captClass = Utils.classForName(lambdaFunc.get.getCapturingClass.replace('/', '.'),
+        initialize = false, noSparkClassLoader = true)
       // Fail fast if we detect return statements in closures
       getClassReader(captClass)
         .accept(new ReturnStatementFinder(Some(lambdaFunc.get.getImplMethodName)), 0)
@@ -547,12 +545,8 @@ private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM
         if (op == INVOKESPECIAL && name == "<init>" && argTypes.length > 0
             && argTypes(0).toString.startsWith("L") // is it an object?
             && argTypes(0).getInternalName == myName) {
-          // scalastyle:off classforname
-          output += Class.forName(
-              owner.replace('/', '.'),
-              false,
-              Thread.currentThread.getContextClassLoader)
-          // scalastyle:on classforname
+          output += Utils.classForName(owner.replace('/', '.'),
+            initialize = false, noSparkClassLoader = true)
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 03986254f0c3..7c8648d61bfb 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -189,15 +189,23 @@ private[spark] object Utils extends Logging {
 
   /** Determines whether the provided class is loadable in the current thread. */
   def classIsLoadable(clazz: String): Boolean = {
-    // scalastyle:off classforname
-    Try { Class.forName(clazz, false, getContextOrSparkClassLoader) }.isSuccess
-    // scalastyle:on classforname
+    Try { classForName(clazz, initialize = false) }.isSuccess
   }
 
   // scalastyle:off classforname
-  /** Preferred alternative to Class.forName(className) */
-  def classForName(className: String): Class[_] = {
-    Class.forName(className, true, getContextOrSparkClassLoader)
+  /**
+   * Preferred alternative to Class.forName(className), as well as
+   * Class.forName(className, initialize, loader) with current thread's ContextClassLoader.
+   */
+  def classForName(
+      className: String,
+      initialize: Boolean = true,
+      noSparkClassLoader: Boolean = false): Class[_] = {
+    if (!noSparkClassLoader) {
+      Class.forName(className, initialize, getContextOrSparkClassLoader)
+    } else {
+      Class.forName(className, initialize, Thread.currentThread().getContextClassLoader)
+    }
     // scalastyle:on classforname
   }
 
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 766cb0a23fc1..8f212f601c79 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -200,30 +200,23 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("object files of classes from a JAR") {
-    // scalastyle:off classforname
-    val original = Thread.currentThread().getContextClassLoader
     val className = "FileSuiteObjectFileTest"
     val jar = TestUtils.createJarWithClasses(Seq(className))
     val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
-    Thread.currentThread().setContextClassLoader(loader)
-    try {
+
+    Utils.withContextClassLoader(loader) {
       sc = new SparkContext("local", "test")
       val objs = sc.makeRDD(1 to 3).map { x =>
-        val loader = Thread.currentThread().getContextClassLoader
-        Class.forName(className, true, loader).getConstructor().newInstance()
+        Utils.classForName(className, noSparkClassLoader = true).getConstructor().newInstance()
       }
       val outputDir = new File(tempDir, "output").getAbsolutePath
       objs.saveAsObjectFile(outputDir)
       // Try reading the output back as an object file
-      val ct = reflect.ClassTag[Any](Class.forName(className, true, loader))
+      val ct = reflect.ClassTag[Any](Utils.classForName(className, noSparkClassLoader = true))
       val output = sc.objectFile[Any](outputDir)
       assert(output.collect().size === 3)
       assert(output.collect().head.getClass.getName === className)
     }
-    finally {
-      Thread.currentThread().setContextClassLoader(original)
-    }
-    // scalastyle:on classforname
   }
 
   test("write SequenceFile using new Hadoop API") {
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
index 57ed1f647fae..5d76c096d46a 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -57,10 +57,8 @@ object KryoDistributedTest {
 
   class AppJarRegistrator extends KryoRegistrator {
     override def registerClasses(k: Kryo) {
-      val classLoader = Thread.currentThread.getContextClassLoader
-      // scalastyle:off classforname
-      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
-      // scalastyle:on classforname
+      k.register(Utils.classForName(AppJarRegistrator.customClassName,
+        noSparkClassLoader = true))
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/MutableURLClassLoaderSuite.scala b/core/src/test/scala/org/apache/spark/util/MutableURLClassLoaderSuite.scala
index 8d844bd08771..8f38ee3142a6 100644
--- a/core/src/test/scala/org/apache/spark/util/MutableURLClassLoaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/MutableURLClassLoaderSuite.scala
@@ -134,10 +134,7 @@ class MutableURLClassLoaderSuite extends SparkFunSuite with Matchers {
 
     try {
       sc.makeRDD(1 to 5, 2).mapPartitions { x =>
-        val loader = Thread.currentThread().getContextClassLoader
-        // scalastyle:off classforname
-        Class.forName(className, true, loader).getConstructor().newInstance()
-        // scalastyle:on classforname
+        Utils.classForName(className, noSparkClassLoader = true).getConstructor().newInstance()
         Seq().iterator
       }.count()
     }

From 8204dc1e548b87aabaf36c5800592bafd44e4419 Mon Sep 17 00:00:00 2001
From: 10087686 <wang.jiaochun@zte.com.cn>
Date: Fri, 22 Mar 2019 05:29:29 -0500
Subject: [PATCH 0745/1072] [SPARK-27141][YARN] Use ConfigEntry for hardcoded
 configs for Yarn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
There is some hardcode configs in code, I think it best to modify。

## How was this patch tested?
Existing tests

Closes #24103 from wangjiaochun/yarnHardCode.

Authored-by: 10087686 <wang.jiaochun@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/deploy/yarn/ApplicationMaster.scala  |  4 ++--
 .../deploy/yarn/ApplicationMasterSuite.scala   |  3 ++-
 .../deploy/yarn/BaseYarnClusterSuite.scala     |  2 +-
 .../spark/deploy/yarn/YarnAllocatorSuite.scala | 17 +++++++++--------
 .../spark/deploy/yarn/YarnClusterSuite.scala   | 18 +++++++++---------
 .../network/yarn/YarnShuffleServiceSuite.scala |  3 ++-
 6 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 743c2e015232..e4b6b3db780d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -296,8 +296,8 @@ private[spark] class ApplicationMaster(
         Option(appAttemptId.getApplicationId.toString), None).setCurrentContext()
 
       val driverRef = clientRpcEnv.setupEndpointRef(
-        RpcAddress(sparkConf.get("spark.driver.host"),
-          sparkConf.get("spark.driver.port").toInt),
+        RpcAddress(sparkConf.get(DRIVER_HOST_ADDRESS),
+          sparkConf.get(DRIVER_PORT)),
         YarnSchedulerBackend.ENDPOINT_NAME)
       // The client-mode AM doesn't listen for incoming connections, so report an invalid port.
       registerAM(Utils.localHostName, -1, sparkConf,
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ApplicationMasterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ApplicationMasterSuite.scala
index 695a82f3583e..d9bdace6caa6 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ApplicationMasterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ApplicationMasterSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy.yarn
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.yarn.config._
 
 class ApplicationMasterSuite extends SparkFunSuite {
 
@@ -28,7 +29,7 @@ class ApplicationMasterSuite extends SparkFunSuite {
     val port = 18080
     val sparkConf = new SparkConf()
 
-    sparkConf.set("spark.yarn.historyServer.address",
+    sparkConf.set(HISTORY_SERVER_ADDRESS,
       "http://${hadoopconf-yarn.resourcemanager.hostname}:${spark.history.ui.port}")
     val yarnConf = new YarnConfiguration()
     yarnConf.set("yarn.resourcemanager.hostname", host)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 49367e0c77c7..b9aeb1c422be 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -149,7 +149,7 @@ abstract class BaseYarnClusterSuite
     launcher.setSparkHome(sys.props("spark.test.home"))
       .setMaster("yarn")
       .setDeployMode(deployMode)
-      .setConf("spark.executor.instances", "1")
+      .setConf(EXECUTOR_INSTANCES.key, "1")
       .setPropertiesFile(propsFile)
       .addAppArgs(appArgs.toArray: _*)
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index 53a538dc1de2..42b59663af0b 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -33,6 +33,7 @@ import org.scalatest.{BeforeAndAfterEach, Matchers}
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.deploy.yarn.config._
+import org.apache.spark.internal.config._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.SplitInfo
 import org.apache.spark.util.ManualClock
@@ -48,8 +49,8 @@ class MockResolver extends SparkRackResolver {
 class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfterEach {
   val conf = new YarnConfiguration()
   val sparkConf = new SparkConf()
-  sparkConf.set("spark.driver.host", "localhost")
-  sparkConf.set("spark.driver.port", "4040")
+  sparkConf.set(DRIVER_HOST_ADDRESS, "localhost")
+  sparkConf.set(DRIVER_PORT, 4040)
   sparkConf.set(SPARK_JARS, Seq("notarealjar.jar"))
   sparkConf.set("spark.yarn.launchContainers", "false")
 
@@ -95,9 +96,9 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       "--class", "SomeClass")
     val sparkConfClone = sparkConf.clone()
     sparkConfClone
-      .set("spark.executor.instances", maxExecutors.toString)
-      .set("spark.executor.cores", "5")
-      .set("spark.executor.memory", "2048")
+      .set(EXECUTOR_INSTANCES, maxExecutors)
+      .set(EXECUTOR_CORES, 5)
+      .set(EXECUTOR_MEMORY, 2048L)
 
     for ((name, value) <- additionalConfigs) {
       sparkConfClone.set(name, value)
@@ -394,7 +395,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
   }
 
   test("window based failure executor counting") {
-    sparkConf.set("spark.yarn.executor.failuresValidityInterval", "100s")
+    sparkConf.set(EXECUTOR_ATTEMPT_FAILURE_VALIDITY_INTERVAL_MS, 100 * 1000L)
     val handler = createAllocator(4)
 
     handler.updateResourceRequests()
@@ -444,8 +445,8 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       maxExecutors,
       rmClientSpy,
       Map(
-        "spark.yarn.blacklist.executor.launch.blacklisting.enabled" -> "true",
-        "spark.blacklist.application.maxFailedExecutorsPerNode" -> "0"))
+        YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED.key -> "true",
+        MAX_FAILED_EXEC_PER_NODE.key -> "0"))
     handler.updateResourceRequests()
 
     val hosts = (0 until maxExecutors).map(i => s"host$i")
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 56b7dfc13699..bb75952a82e9 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -98,10 +98,10 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
   test("run Spark in yarn-client mode with different configurations, ensuring redaction") {
     testBasicYarnApp(true,
       Map(
-        "spark.driver.memory" -> "512m",
-        "spark.executor.cores" -> "1",
-        "spark.executor.memory" -> "512m",
-        "spark.executor.instances" -> "2",
+        DRIVER_MEMORY.key -> "512m",
+        EXECUTOR_CORES.key -> "1",
+        EXECUTOR_MEMORY.key -> "512m",
+        EXECUTOR_INSTANCES.key -> "2",
         // Sending some sensitive information, which we'll make sure gets redacted
         "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD,
         "spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD
@@ -111,11 +111,11 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
   test("run Spark in yarn-cluster mode with different configurations, ensuring redaction") {
     testBasicYarnApp(false,
       Map(
-        "spark.driver.memory" -> "512m",
-        "spark.driver.cores" -> "1",
-        "spark.executor.cores" -> "1",
-        "spark.executor.memory" -> "512m",
-        "spark.executor.instances" -> "2",
+        DRIVER_MEMORY.key -> "512m",
+        DRIVER_CORES.key -> "1",
+        EXECUTOR_CORES.key -> "1",
+        EXECUTOR_MEMORY.key -> "512m",
+        EXECUTOR_INSTANCES.key -> "2",
         // Sending some sensitive information, which we'll make sure gets redacted
         "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD,
         "spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
index 268f4bd13f6c..756e6d437b48 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
@@ -36,6 +36,7 @@ import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.SecurityManager
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.config._
 import org.apache.spark.network.shuffle.ShuffleTestAccessor
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
 import org.apache.spark.util.Utils
@@ -52,7 +53,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle")
     yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"),
       classOf[YarnShuffleService].getCanonicalName)
-    yarnConfig.setInt("spark.shuffle.service.port", 0)
+    yarnConfig.setInt(SHUFFLE_SERVICE_PORT.key, 0)
     yarnConfig.setBoolean(YarnShuffleService.STOP_ON_FAILURE_KEY, true)
     val localDir = Utils.createTempDir()
     yarnConfig.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath)

From 8efc5ec72e2f5899547941010e22c023d6cb86b3 Mon Sep 17 00:00:00 2001
From: Martin Junghanns <martin.junghanns@neotechnology.com>
Date: Fri, 22 Mar 2019 10:09:35 -0700
Subject: [PATCH 0746/1072] [SPARK-27174][SQL] Add support for casting integer
 types to binary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Philip Stutz <philip.stutzgmail.com>

## What changes were proposed in this pull request?

This PR adds support for casting

* `ByteType`
* `ShortType`
* `IntegerType`
* `LongType`

to `BinaryType`.

## How was this patch tested?

We added unit tests for casting instances of the above types. For validation, we used Javas `DataOutputStream` to compare the resulting byte array with the result of `Cast`.

We state that the contribution is our original work and that we license the work to the project under the project’s open source license.

cloud-fan we'd appreciate a review if you find the time, thx

Closes #24107 from s1ck/cast_to_binary.

Authored-by: Martin Junghanns <martin.junghanns@neotechnology.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala |  11 +-
 .../sql/catalyst/util/NumberConverter.scala   |  35 +++++
 .../analysis/AnalysisErrorSuite.scala         |   5 -
 .../catalyst/util/NumberConverterSuite.scala  |  48 ++++++-
 .../test/resources/sql-tests/inputs/cast.sql  |  13 ++
 .../resources/sql-tests/results/cast.sql.out  |  84 +++++++++++-
 .../native/binaryComparison.sql.out           | 120 +++++++-----------
 .../native/windowFrameCoercion.sql.out        |   2 +-
 .../org/apache/spark/sql/DatasetSuite.scala   |   4 +-
 9 files changed, 234 insertions(+), 88 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 72cb6b2a917e..848195fe1818 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -44,6 +44,7 @@ object Cast {
     case (_, StringType) => true
 
     case (StringType, BinaryType) => true
+    case (_: IntegralType, BinaryType) => true
 
     case (StringType, BooleanType) => true
     case (DateType, BooleanType) => true
@@ -326,6 +327,10 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // BinaryConverter
   private[this] def castToBinary(from: DataType): Any => Any = from match {
     case StringType => buildCast[UTF8String](_, _.getBytes)
+    case ByteType => buildCast[Byte](_, NumberConverter.toBinary)
+    case ShortType => buildCast[Short](_, NumberConverter.toBinary)
+    case IntegerType => buildCast[Int](_, NumberConverter.toBinary)
+    case LongType => buildCast[Long](_, NumberConverter.toBinary)
   }
 
   // UDFToBoolean
@@ -908,7 +913,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   private[this] def castToBinaryCode(from: DataType): CastFunction = from match {
     case StringType =>
-      (c, evPrim, evNull) => code"$evPrim = $c.getBytes();"
+      (c, evPrim, evNull) =>
+        code"$evPrim = $c.getBytes();"
+    case _: IntegralType =>
+      (c, evPrim, evNull) =>
+        code"$evPrim = ${NumberConverter.getClass.getName.stripSuffix("$")}.toBinary($c);"
   }
 
   private[this] def castToDateCode(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index 9c3f6b7c5d24..7dbdd1ef1cdc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -169,4 +169,39 @@ object NumberConverter {
     }
     UTF8String.fromBytes(java.util.Arrays.copyOfRange(temp, resultStartPos, temp.length))
   }
+
+  def toBinary(l: Long): Array[Byte] = {
+    val result = new Array[Byte](8)
+    result(0) = (l >>> 56 & 0xFF).toByte
+    result(1) = (l >>> 48 & 0xFF).toByte
+    result(2) = (l >>> 40 & 0xFF).toByte
+    result(3) = (l >>> 32 & 0xFF).toByte
+    result(4) = (l >>> 24 & 0xFF).toByte
+    result(5) = (l >>> 16 & 0xFF).toByte
+    result(6) = (l >>> 8 & 0xFF).toByte
+    result(7) = (l & 0xFF).toByte
+    result
+  }
+
+  def toBinary(i: Int): Array[Byte] = {
+    val result = new Array[Byte](4)
+    result(0) = (i >>> 24 & 0xFF).toByte
+    result(1) = (i >>> 16 & 0xFF).toByte
+    result(2) = (i >>> 8 & 0xFF).toByte
+    result(3) = (i & 0xFF).toByte
+    result
+  }
+
+  def toBinary(s: Short): Array[Byte] = {
+    val result = new Array[Byte](2)
+    result(0) = (s >>> 8 & 0xFF).toByte
+    result(1) = (s & 0xFF).toByte
+    result
+  }
+
+  def toBinary(s: Byte): Array[Byte] = {
+    val result = new Array[Byte](1)
+    result(0) = s
+    result
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 129ce3b1105e..ec71c1c93452 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -215,11 +215,6 @@ class AnalysisErrorSuite extends AnalysisTest {
     testRelation2.groupBy('a)(sum(UnresolvedStar(None))),
     "Invalid usage of '*'" :: "in expression 'sum'" :: Nil)
 
-  errorTest(
-    "bad casts",
-    testRelation.select(Literal(1).cast(BinaryType).as('badCast)),
-  "cannot cast" :: Literal(1).dataType.simpleString :: BinaryType.simpleString :: Nil)
-
   errorTest(
     "sorting by unsupported column types",
     mapRelation.orderBy('map.asc),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
index 13265a1ff1c7..ec73f4518737 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.nio.ByteBuffer
+import java.nio.ByteOrder.BIG_ENDIAN
+
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.util.NumberConverter.convert
+import org.apache.spark.sql.catalyst.util.NumberConverter.{convert, toBinary}
 import org.apache.spark.unsafe.types.UTF8String
 
 class NumberConverterSuite extends SparkFunSuite {
@@ -37,4 +40,47 @@ class NumberConverterSuite extends SparkFunSuite {
     checkConv("11abc", 10, 16, "B")
   }
 
+  test("byte to binary") {
+    checkToBinary(0.toByte)
+    checkToBinary(1.toByte)
+    checkToBinary(-1.toByte)
+    checkToBinary(Byte.MaxValue)
+    checkToBinary(Byte.MinValue)
+  }
+
+  test("short to binary") {
+    checkToBinary(0.toShort)
+    checkToBinary(1.toShort)
+    checkToBinary(-1.toShort)
+    checkToBinary(Short.MaxValue)
+    checkToBinary(Short.MinValue)
+  }
+
+  test("integer to binary") {
+    checkToBinary(0)
+    checkToBinary(1)
+    checkToBinary(-1)
+    checkToBinary(Int.MaxValue)
+    checkToBinary(Int.MinValue)
+  }
+
+  test("long to binary") {
+    checkToBinary(0L)
+    checkToBinary(1L)
+    checkToBinary(-1L)
+    checkToBinary(Long.MaxValue)
+    checkToBinary(Long.MinValue)
+  }
+
+  def checkToBinary[T](in: T): Unit = in match {
+    case b: Byte =>
+      assert(toBinary(b) === ByteBuffer.allocate(1).order(BIG_ENDIAN).put(b).array())
+    case s: Short =>
+      assert(toBinary(s) === ByteBuffer.allocate(2).order(BIG_ENDIAN).putShort(s).array())
+    case i: Int =>
+      assert(toBinary(i) === ByteBuffer.allocate(4).order(BIG_ENDIAN).putInt(i).array())
+    case l: Long =>
+      assert(toBinary(l) === ByteBuffer.allocate(8).order(BIG_ENDIAN).putLong(l).array())
+  }
+
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
index 629df59cff8b..7244cd31bba5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
@@ -40,6 +40,19 @@ SELECT CAST('-9223372036854775809' AS long);
 SELECT CAST('9223372036854775807' AS long);
 SELECT CAST('9223372036854775808' AS long);
 
+-- cast string to its binary representation
+SELECT HEX(CAST('abc' AS binary));
+
+-- cast integral values to their corresponding binary representation
+SELECT HEX(CAST(CAST(123 AS byte) AS binary));
+SELECT HEX(CAST(CAST(-123 AS byte) AS binary));
+SELECT HEX(CAST(123S AS binary));
+SELECT HEX(CAST(-123S AS binary));
+SELECT HEX(CAST(123 AS binary));
+SELECT HEX(CAST(-123 AS binary));
+SELECT HEX(CAST(123L AS binary));
+SELECT HEX(CAST(-123L AS binary));
+
 DESC FUNCTION boolean;
 DESC FUNCTION EXTENDED boolean;
 -- TODO: migrate all cast tests here.
diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
index 9c5f4554d9fe..21b18e9b0f84 100644
--- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 24
+-- Number of queries: 33
 
 
 -- !query 0
@@ -179,20 +179,92 @@ NULL
 
 
 -- !query 22
-DESC FUNCTION boolean
+SELECT HEX(CAST('abc' AS binary))
 -- !query 22 schema
-struct<function_desc:string>
+struct<hex(CAST(abc AS BINARY)):string>
 -- !query 22 output
+616263
+
+
+-- !query 23
+SELECT HEX(CAST(CAST(123 AS byte) AS binary))
+-- !query 23 schema
+struct<hex(CAST(CAST(123 AS TINYINT) AS BINARY)):string>
+-- !query 23 output
+7B
+
+
+-- !query 24
+SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
+-- !query 24 schema
+struct<hex(CAST(CAST(-123 AS TINYINT) AS BINARY)):string>
+-- !query 24 output
+85
+
+
+-- !query 25
+SELECT HEX(CAST(123S AS binary))
+-- !query 25 schema
+struct<hex(CAST(123 AS BINARY)):string>
+-- !query 25 output
+007B
+
+
+-- !query 26
+SELECT HEX(CAST(-123S AS binary))
+-- !query 26 schema
+struct<hex(CAST(-123 AS BINARY)):string>
+-- !query 26 output
+FF85
+
+
+-- !query 27
+SELECT HEX(CAST(123 AS binary))
+-- !query 27 schema
+struct<hex(CAST(123 AS BINARY)):string>
+-- !query 27 output
+0000007B
+
+
+-- !query 28
+SELECT HEX(CAST(-123 AS binary))
+-- !query 28 schema
+struct<hex(CAST(-123 AS BINARY)):string>
+-- !query 28 output
+FFFFFF85
+
+
+-- !query 29
+SELECT HEX(CAST(123L AS binary))
+-- !query 29 schema
+struct<hex(CAST(123 AS BINARY)):string>
+-- !query 29 output
+000000000000007B
+
+
+-- !query 30
+SELECT HEX(CAST(-123L AS binary))
+-- !query 30 schema
+struct<hex(CAST(-123 AS BINARY)):string>
+-- !query 30 output
+FFFFFFFFFFFFFF85
+
+
+-- !query 31
+DESC FUNCTION boolean
+-- !query 31 schema
+struct<function_desc:string>
+-- !query 31 output
 Class: org.apache.spark.sql.catalyst.expressions.Cast
 Function: boolean
 Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
 
 
--- !query 23
+-- !query 32
 DESC FUNCTION EXTENDED boolean
--- !query 23 schema
+-- !query 32 schema
 struct<function_desc:string>
--- !query 23 output
+-- !query 32 output
 Class: org.apache.spark.sql.catalyst.expressions.Cast
 Extended Usage:
     No example/argument for boolean.
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
index 2914d6015ea8..b764ce45d029 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
@@ -13,217 +13,193 @@ struct<>
 -- !query 1
 SELECT cast(1 as binary) = '1' FROM t
 -- !query 1 schema
-struct<>
+struct<(CAST(1 AS BINARY) = CAST(1 AS BINARY)):boolean>
 -- !query 1 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+false
 
 
 -- !query 2
 SELECT cast(1 as binary) > '2' FROM t
 -- !query 2 schema
-struct<>
+struct<(CAST(1 AS BINARY) > CAST(2 AS BINARY)):boolean>
 -- !query 2 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+false
 
 
 -- !query 3
 SELECT cast(1 as binary) >= '2' FROM t
 -- !query 3 schema
-struct<>
+struct<(CAST(1 AS BINARY) >= CAST(2 AS BINARY)):boolean>
 -- !query 3 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+false
 
 
 -- !query 4
 SELECT cast(1 as binary) < '2' FROM t
 -- !query 4 schema
-struct<>
+struct<(CAST(1 AS BINARY) < CAST(2 AS BINARY)):boolean>
 -- !query 4 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+true
 
 
 -- !query 5
 SELECT cast(1 as binary) <= '2' FROM t
 -- !query 5 schema
-struct<>
+struct<(CAST(1 AS BINARY) <= CAST(2 AS BINARY)):boolean>
 -- !query 5 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+true
 
 
 -- !query 6
 SELECT cast(1 as binary) <> '2' FROM t
 -- !query 6 schema
-struct<>
+struct<(NOT (CAST(1 AS BINARY) = CAST(2 AS BINARY))):boolean>
 -- !query 6 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+true
 
 
 -- !query 7
 SELECT cast(1 as binary) = cast(null as string) FROM t
 -- !query 7 schema
-struct<>
+struct<(CAST(1 AS BINARY) = CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
 -- !query 7 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+NULL
 
 
 -- !query 8
 SELECT cast(1 as binary) > cast(null as string) FROM t
 -- !query 8 schema
-struct<>
+struct<(CAST(1 AS BINARY) > CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
 -- !query 8 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+NULL
 
 
 -- !query 9
 SELECT cast(1 as binary) >= cast(null as string) FROM t
 -- !query 9 schema
-struct<>
+struct<(CAST(1 AS BINARY) >= CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
 -- !query 9 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+NULL
 
 
 -- !query 10
 SELECT cast(1 as binary) < cast(null as string) FROM t
 -- !query 10 schema
-struct<>
+struct<(CAST(1 AS BINARY) < CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
 -- !query 10 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+NULL
 
 
 -- !query 11
 SELECT cast(1 as binary) <= cast(null as string) FROM t
 -- !query 11 schema
-struct<>
+struct<(CAST(1 AS BINARY) <= CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
 -- !query 11 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+NULL
 
 
 -- !query 12
 SELECT cast(1 as binary) <> cast(null as string) FROM t
 -- !query 12 schema
-struct<>
+struct<(NOT (CAST(1 AS BINARY) = CAST(CAST(NULL AS STRING) AS BINARY))):boolean>
 -- !query 12 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+NULL
 
 
 -- !query 13
 SELECT '1' = cast(1 as binary) FROM t
 -- !query 13 schema
-struct<>
+struct<(CAST(1 AS BINARY) = CAST(1 AS BINARY)):boolean>
 -- !query 13 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 13
+false
 
 
 -- !query 14
 SELECT '2' > cast(1 as binary) FROM t
 -- !query 14 schema
-struct<>
+struct<(CAST(2 AS BINARY) > CAST(1 AS BINARY)):boolean>
 -- !query 14 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 13
+true
 
 
 -- !query 15
 SELECT '2' >= cast(1 as binary) FROM t
 -- !query 15 schema
-struct<>
+struct<(CAST(2 AS BINARY) >= CAST(1 AS BINARY)):boolean>
 -- !query 15 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 14
+true
 
 
 -- !query 16
 SELECT '2' < cast(1 as binary) FROM t
 -- !query 16 schema
-struct<>
+struct<(CAST(2 AS BINARY) < CAST(1 AS BINARY)):boolean>
 -- !query 16 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 13
+false
 
 
 -- !query 17
 SELECT '2' <= cast(1 as binary) FROM t
 -- !query 17 schema
-struct<>
+struct<(CAST(2 AS BINARY) <= CAST(1 AS BINARY)):boolean>
 -- !query 17 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 14
+false
 
 
 -- !query 18
 SELECT '2' <> cast(1 as binary) FROM t
 -- !query 18 schema
-struct<>
+struct<(NOT (CAST(2 AS BINARY) = CAST(1 AS BINARY))):boolean>
 -- !query 18 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 14
+true
 
 
 -- !query 19
 SELECT cast(null as string) = cast(1 as binary) FROM t
 -- !query 19 schema
-struct<>
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) = CAST(1 AS BINARY)):boolean>
 -- !query 19 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 30
+NULL
 
 
 -- !query 20
 SELECT cast(null as string) > cast(1 as binary) FROM t
 -- !query 20 schema
-struct<>
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) > CAST(1 AS BINARY)):boolean>
 -- !query 20 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 30
+NULL
 
 
 -- !query 21
 SELECT cast(null as string) >= cast(1 as binary) FROM t
 -- !query 21 schema
-struct<>
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) >= CAST(1 AS BINARY)):boolean>
 -- !query 21 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 31
+NULL
 
 
 -- !query 22
 SELECT cast(null as string) < cast(1 as binary) FROM t
 -- !query 22 schema
-struct<>
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) < CAST(1 AS BINARY)):boolean>
 -- !query 22 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 30
+NULL
 
 
 -- !query 23
 SELECT cast(null as string) <= cast(1 as binary) FROM t
 -- !query 23 schema
-struct<>
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) <= CAST(1 AS BINARY)):boolean>
 -- !query 23 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 31
+NULL
 
 
 -- !query 24
 SELECT cast(null as string) <> cast(1 as binary) FROM t
 -- !query 24 schema
-struct<>
+struct<(NOT (CAST(CAST(NULL AS STRING) AS BINARY) = CAST(1 AS BINARY))):boolean>
 -- !query 24 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 31
+NULL
 
 
 -- !query 25
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
index 01d83938031f..4fa2032c66c6 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
@@ -177,7 +177,7 @@ SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary) DESC RANGE BET
 struct<>
 -- !query 21 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY 1 ORDER BY CAST('1' AS BINARY) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'binary' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 21
+cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS BINARY) FOLLOWING' due to data type mismatch: The data type of the upper bound 'binary' does not match the expected data type '(numeric or calendarinterval)'.; line 1 pos 21
 
 
 -- !query 22
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index db9f251e1dca..050699da7f7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -714,11 +714,11 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
 
   test("Kryo encoder: check the schema mismatch when converting DataFrame to Dataset") {
     implicit val kryoEncoder = Encoders.kryo[KryoData]
-    val df = Seq((1)).toDF("a")
+    val df = Seq((1.0)).toDF("a")
     val e = intercept[AnalysisException] {
       df.as[KryoData]
     }.message
-    assert(e.contains("cannot cast int to binary"))
+    assert(e.contains("cannot cast double to binary"))
   }
 
   test("Java encoder") {

From 78d546fe15aebcbf4b671c44383ddcf82b05b8a7 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 22 Mar 2019 11:26:53 -0700
Subject: [PATCH 0747/1072] [SPARK-27210][SS] Cleanup incomplete output files
 in ManifestFileCommitProtocol if task is aborted

## What changes were proposed in this pull request?

This patch proposes ManifestFileCommitProtocol to clean up incomplete output files in task level if task aborts. Please note that this works as 'best-effort', not kind of guarantee, as we have in HadoopMapReduceCommitProtocol.

## How was this patch tested?

Added UT.

Closes #24154 from HeartSaVioR/SPARK-27210.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../ManifestFileCommitProtocol.scala          |  7 +++--
 .../sql/streaming/FileStreamSinkSuite.scala   | 29 +++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
index 92191c8b64b7..916bd2ddbc81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
@@ -114,7 +114,10 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
   }
 
   override def abortTask(taskContext: TaskAttemptContext): Unit = {
-    // Do nothing
-    // TODO: we can also try delete the addedFiles as a best-effort cleanup.
+    // best effort cleanup of incomplete files
+    if (addedFiles.nonEmpty) {
+      val fs = new Path(addedFiles.head).getFileSystem(taskContext.getConfiguration)
+      addedFiles.foreach { file => fs.delete(new Path(file), false) }
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 619d118e2087..020ab234f67a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -18,8 +18,11 @@
 package org.apache.spark.sql.streaming
 
 import java.io.File
+import java.nio.file.Files
 import java.util.Locale
 
+import scala.collection.JavaConverters._
+
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
@@ -478,4 +481,30 @@ class FileStreamSinkSuite extends StreamTest {
       checkDatasetUnorderly(outputDf, 1, 2, 3)
     }
   }
+
+  testQuietly("cleanup incomplete output for aborted task") {
+    withTempDir { tempDir =>
+      val checkpointDir = new File(tempDir, "chk")
+      val outputDir = new File(tempDir, "output")
+      val inputData = MemoryStream[Int]
+      inputData.addData(1, 2, 3)
+      val q = inputData.toDS().map(_ / 0)
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getCanonicalPath)
+        .format("parquet")
+        .start(outputDir.getCanonicalPath)
+
+      intercept[StreamingQueryException] {
+        try {
+          q.processAllAvailable()
+        } finally {
+          q.stop()
+        }
+      }
+
+      val outputFiles = Files.walk(outputDir.toPath).iterator().asScala
+        .filter(_.toString.endsWith(".parquet"))
+      assert(outputFiles.toList.isEmpty, "Incomplete files should be cleaned up.")
+    }
+  }
 }

From 34e3cc70602b5107ebeea3f99c7c41672107ca13 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Fri, 22 Mar 2019 13:58:54 -0700
Subject: [PATCH 0748/1072] [SPARK-27108][SQL] Add parsed SQL plans for create,
 CTAS.

## What changes were proposed in this pull request?

This moves parsing `CREATE TABLE ... USING` statements into catalyst. Catalyst produces logical plans with the parsed information and those plans are converted to v1 `DataSource` plans in `DataSourceAnalysis`.

This prepares for adding v2 create plans that should receive the information parsed from SQL without being translated to v1 plans first.

This also makes it possible to parse in catalyst instead of breaking the parser across the abstract `AstBuilder` in catalyst and `SparkSqlParser` in core.

For more information, see the [mailing list thread](https://lists.apache.org/thread.html/54f4e1929ceb9a2b0cac7cb058000feb8de5d6c667b2e0950804c613%3Cdev.spark.apache.org%3E).

## How was this patch tested?

This uses existing tests to catch regressions. This introduces no behavior changes.

Closes #24029 from rdblue/SPARK-27108-add-parsed-create-logical-plans.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      | 192 ++++++++++-
 .../logical/sql/CreateTableStatement.scala    |  66 ++++
 .../plans/logical/sql/ParsedStatement.scala   |  44 +++
 .../sql/catalyst/parser/DDLParserSuite.scala  | 318 ++++++++++++++++++
 .../spark/sql/execution/SparkSqlParser.scala  | 233 ++-----------
 .../datasources/DataSourceResolution.scala    | 112 ++++++
 .../internal/BaseSessionStateBuilder.scala    |   1 +
 .../sql/execution/SparkSqlParserSuite.scala   |  13 -
 .../execution/command/DDLParserSuite.scala    | 251 +-------------
 .../command/PlanResolutionSuite.scala         | 257 ++++++++++++++
 .../sql/hive/HiveSessionStateBuilder.scala    |   1 +
 11 files changed, 1030 insertions(+), 458 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ParsedStatement.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 38a61b873177..52a5d2c66c5b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -30,12 +30,13 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.CatalogStorageFormat
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.sql.{CreateTableAsSelectStatement, CreateTableStatement}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -1888,4 +1889,193 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val structField = StructField(identifier.getText, typedVisit(dataType), nullable = true)
     if (STRING == null) structField else structField.withComment(string(STRING))
   }
+
+  /**
+   * Create location string.
+   */
+  override def visitLocationSpec(ctx: LocationSpecContext): String = withOrigin(ctx) {
+    string(ctx.STRING)
+  }
+
+  /**
+   * Create a [[BucketSpec]].
+   */
+  override def visitBucketSpec(ctx: BucketSpecContext): BucketSpec = withOrigin(ctx) {
+    BucketSpec(
+      ctx.INTEGER_VALUE.getText.toInt,
+      visitIdentifierList(ctx.identifierList),
+      Option(ctx.orderedIdentifierList)
+          .toSeq
+          .flatMap(_.orderedIdentifier.asScala)
+          .map { orderedIdCtx =>
+            Option(orderedIdCtx.ordering).map(_.getText).foreach { dir =>
+              if (dir.toLowerCase(Locale.ROOT) != "asc") {
+                operationNotAllowed(s"Column ordering must be ASC, was '$dir'", ctx)
+              }
+            }
+
+            orderedIdCtx.identifier.getText
+          })
+  }
+
+  /**
+   * Convert a table property list into a key-value map.
+   * This should be called through [[visitPropertyKeyValues]] or [[visitPropertyKeys]].
+   */
+  override def visitTablePropertyList(
+      ctx: TablePropertyListContext): Map[String, String] = withOrigin(ctx) {
+    val properties = ctx.tableProperty.asScala.map { property =>
+      val key = visitTablePropertyKey(property.key)
+      val value = visitTablePropertyValue(property.value)
+      key -> value
+    }
+    // Check for duplicate property names.
+    checkDuplicateKeys(properties, ctx)
+    properties.toMap
+  }
+
+  /**
+   * Parse a key-value map from a [[TablePropertyListContext]], assuming all values are specified.
+   */
+  def visitPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] = {
+    val props = visitTablePropertyList(ctx)
+    val badKeys = props.collect { case (key, null) => key }
+    if (badKeys.nonEmpty) {
+      operationNotAllowed(
+        s"Values must be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx)
+    }
+    props
+  }
+
+  /**
+   * Parse a list of keys from a [[TablePropertyListContext]], assuming no values are specified.
+   */
+  def visitPropertyKeys(ctx: TablePropertyListContext): Seq[String] = {
+    val props = visitTablePropertyList(ctx)
+    val badKeys = props.filter { case (_, v) => v != null }.keys
+    if (badKeys.nonEmpty) {
+      operationNotAllowed(
+        s"Values should not be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx)
+    }
+    props.keys.toSeq
+  }
+
+  /**
+   * A table property key can either be String or a collection of dot separated elements. This
+   * function extracts the property key based on whether its a string literal or a table property
+   * identifier.
+   */
+  override def visitTablePropertyKey(key: TablePropertyKeyContext): String = {
+    if (key.STRING != null) {
+      string(key.STRING)
+    } else {
+      key.getText
+    }
+  }
+
+  /**
+   * A table property value can be String, Integer, Boolean or Decimal. This function extracts
+   * the property value based on whether its a string, integer, boolean or decimal literal.
+   */
+  override def visitTablePropertyValue(value: TablePropertyValueContext): String = {
+    if (value == null) {
+      null
+    } else if (value.STRING != null) {
+      string(value.STRING)
+    } else if (value.booleanValue != null) {
+      value.getText.toLowerCase(Locale.ROOT)
+    } else {
+      value.getText
+    }
+  }
+
+  /**
+   * Type to keep track of a table header: (identifier, isTemporary, ifNotExists, isExternal).
+   */
+  type TableHeader = (TableIdentifier, Boolean, Boolean, Boolean)
+
+  /**
+   * Validate a create table statement and return the [[TableIdentifier]].
+   */
+  override def visitCreateTableHeader(
+      ctx: CreateTableHeaderContext): TableHeader = withOrigin(ctx) {
+    val temporary = ctx.TEMPORARY != null
+    val ifNotExists = ctx.EXISTS != null
+    if (temporary && ifNotExists) {
+      operationNotAllowed("CREATE TEMPORARY TABLE ... IF NOT EXISTS", ctx)
+    }
+    (visitTableIdentifier(ctx.tableIdentifier), temporary, ifNotExists, ctx.EXTERNAL != null)
+  }
+
+  /**
+   * Create a table, returning a [[CreateTableStatement]] logical plan.
+   *
+   * Expected format:
+   * {{{
+   *   CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name
+   *   USING table_provider
+   *   create_table_clauses
+   *   [[AS] select_statement];
+   *
+   *   create_table_clauses (order insensitive):
+   *     [OPTIONS table_property_list]
+   *     [PARTITIONED BY (col_name, col_name, ...)]
+   *     [CLUSTERED BY (col_name, col_name, ...)
+   *       [SORTED BY (col_name [ASC|DESC], ...)]
+   *       INTO num_buckets BUCKETS
+   *     ]
+   *     [LOCATION path]
+   *     [COMMENT table_comment]
+   *     [TBLPROPERTIES (property_name=property_value, ...)]
+   * }}}
+   */
+  override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
+    val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
+    if (external) {
+      operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
+    }
+
+    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
+    checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
+    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
+    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
+    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
+    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
+
+    val schema = Option(ctx.colTypeList()).map(createSchema)
+    val partitionCols: Seq[String] =
+      Option(ctx.partitionColumnNames).map(visitIdentifierList).getOrElse(Nil)
+    val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
+    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
+
+    val provider = ctx.tableProvider.qualifiedName.getText
+    val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
+    val comment = Option(ctx.comment).map(string)
+
+    Option(ctx.query).map(plan) match {
+      case Some(_) if temp =>
+        operationNotAllowed("CREATE TEMPORARY TABLE ... USING ... AS query", ctx)
+
+      case Some(_) if schema.isDefined =>
+        operationNotAllowed(
+          "Schema may not be specified in a Create Table As Select (CTAS) statement",
+          ctx)
+
+      case Some(query) =>
+        CreateTableAsSelectStatement(
+          table, query, partitionCols, bucketSpec, properties, provider, options, location, comment,
+          ifNotExists = ifNotExists)
+
+      case None if temp =>
+        // CREATE TEMPORARY TABLE ... USING ... is not supported by the catalyst parser.
+        // Use CREATE TEMPORARY VIEW ... USING ... instead.
+        operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
+
+      case _ =>
+        CreateTableStatement(table, schema.getOrElse(new StructType), partitionCols, bucketSpec,
+          properties, provider, options, location, comment, ifNotExists = ifNotExists)
+    }
+  }
+
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
new file mode 100644
index 000000000000..c734968e838d
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical.sql
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.types.StructType
+
+/**
+ * A CREATE TABLE command, as parsed from SQL.
+ *
+ * This is a metadata-only command and is not used to write data to the created table.
+ */
+case class CreateTableStatement(
+    table: TableIdentifier,
+    tableSchema: StructType,
+    partitioning: Seq[String],
+    bucketSpec: Option[BucketSpec],
+    properties: Map[String, String],
+    provider: String,
+    options: Map[String, String],
+    location: Option[String],
+    comment: Option[String],
+    ifNotExists: Boolean) extends ParsedStatement {
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq.empty
+}
+
+/**
+ * A CREATE TABLE AS SELECT command, as parsed from SQL.
+ */
+case class CreateTableAsSelectStatement(
+    table: TableIdentifier,
+    asSelect: LogicalPlan,
+    partitioning: Seq[String],
+    bucketSpec: Option[BucketSpec],
+    properties: Map[String, String],
+    provider: String,
+    options: Map[String, String],
+    location: Option[String],
+    comment: Option[String],
+    ifNotExists: Boolean) extends ParsedStatement {
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq(asSelect)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ParsedStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ParsedStatement.scala
new file mode 100644
index 000000000000..510f2a1ba1e6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ParsedStatement.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical.sql
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * A logical plan node that contains exactly what was parsed from SQL.
+ *
+ * This is used to hold information parsed from SQL when there are multiple implementations of a
+ * query or command. For example, CREATE TABLE may be implemented by different nodes for v1 and v2.
+ * Instead of parsing directly to a v1 CreateTable that keeps metadata in CatalogTable, and then
+ * converting that v1 metadata to the v2 equivalent, the sql [[CreateTableStatement]] plan is
+ * produced by the parser and converted once into both implementations.
+ *
+ * Parsed logical plans are not resolved because they must be converted to concrete logical plans.
+ *
+ * Parsed logical plans are located in Catalyst so that as much SQL parsing logic as possible is be
+ * kept in a [[org.apache.spark.sql.catalyst.parser.AbstractSqlParser]].
+ */
+private[sql] abstract class ParsedStatement extends LogicalPlan {
+  // Redact properties and options when parsed nodes are used by generic methods like toString
+  override def productIterator: Iterator[Any] = super.productIterator.map {
+    case mapArg: Map[_, _] => conf.redactOptions(mapArg.asInstanceOf[Map[String, String]])
+    case other => other
+  }
+
+  final override lazy val resolved = false
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
new file mode 100644
index 000000000000..dae8f582c771
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.AnalysisTest
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.plans.logical.sql.{CreateTableAsSelectStatement, CreateTableStatement}
+import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+
+class DDLParserSuite extends AnalysisTest {
+  import CatalystSqlParser._
+
+  private def intercept(sqlCommand: String, messages: String*): Unit = {
+    val e = intercept[ParseException](parsePlan(sqlCommand))
+    messages.foreach { message =>
+      assert(e.message.contains(message))
+    }
+  }
+
+  test("create table using - schema") {
+    val sql = "CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) USING parquet"
+
+    parsePlan(sql) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableSchema == new StructType()
+            .add("a", IntegerType, nullable = true, "test")
+            .add("b", StringType))
+        assert(create.partitioning.isEmpty)
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties.isEmpty)
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.isEmpty)
+        assert(create.comment.isEmpty)
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+
+    intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet",
+      "no viable alternative at input")
+  }
+
+  test("create table - with IF NOT EXISTS") {
+    val sql = "CREATE TABLE IF NOT EXISTS my_tab(a INT, b STRING) USING parquet"
+
+    parsePlan(sql) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
+        assert(create.partitioning.isEmpty)
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties.isEmpty)
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.isEmpty)
+        assert(create.comment.isEmpty)
+        assert(create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("create table - with partitioned by") {
+    val query = "CREATE TABLE my_tab(a INT comment 'test', b STRING) " +
+        "USING parquet PARTITIONED BY (a)"
+
+    parsePlan(query) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableSchema == new StructType()
+            .add("a", IntegerType, nullable = true, "test")
+            .add("b", StringType))
+        assert(create.partitioning == Seq("a"))
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties.isEmpty)
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.isEmpty)
+        assert(create.comment.isEmpty)
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $query")
+    }
+  }
+
+  test("create table - with bucket") {
+    val query = "CREATE TABLE my_tab(a INT, b STRING) USING parquet " +
+        "CLUSTERED BY (a) SORTED BY (b) INTO 5 BUCKETS"
+
+    parsePlan(query) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
+        assert(create.partitioning.isEmpty)
+        assert(create.bucketSpec.contains(BucketSpec(5, Seq("a"), Seq("b"))))
+        assert(create.properties.isEmpty)
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.isEmpty)
+        assert(create.comment.isEmpty)
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $query")
+    }
+  }
+
+  test("create table - with comment") {
+    val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet COMMENT 'abc'"
+
+    parsePlan(sql) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
+        assert(create.partitioning.isEmpty)
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties.isEmpty)
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.isEmpty)
+        assert(create.comment.contains("abc"))
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("create table - with table properties") {
+    val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet TBLPROPERTIES('test' = 'test')"
+
+    parsePlan(sql) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
+        assert(create.partitioning.isEmpty)
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties == Map("test" -> "test"))
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.isEmpty)
+        assert(create.comment.isEmpty)
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("create table - with location") {
+    val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'"
+
+    parsePlan(sql) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
+        assert(create.partitioning.isEmpty)
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties.isEmpty)
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.contains("/tmp/file"))
+        assert(create.comment.isEmpty)
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("create table - byte length literal table name") {
+    val sql = "CREATE TABLE 1m.2g(a INT) USING parquet"
+
+    parsePlan(sql) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("2g", Some("1m")))
+        assert(create.tableSchema == new StructType().add("a", IntegerType))
+        assert(create.partitioning.isEmpty)
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties.isEmpty)
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.isEmpty)
+        assert(create.comment.isEmpty)
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("Duplicate clauses - create table") {
+    def createTableHeader(duplicateClause: String): String = {
+      s"CREATE TABLE my_tab(a INT, b STRING) USING parquet $duplicateClause $duplicateClause"
+    }
+
+    intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"),
+      "Found duplicate clauses: TBLPROPERTIES")
+    intercept(createTableHeader("LOCATION '/tmp/file'"),
+      "Found duplicate clauses: LOCATION")
+    intercept(createTableHeader("COMMENT 'a table'"),
+      "Found duplicate clauses: COMMENT")
+    intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"),
+      "Found duplicate clauses: CLUSTERED BY")
+    intercept(createTableHeader("PARTITIONED BY (b)"),
+      "Found duplicate clauses: PARTITIONED BY")
+  }
+
+  test("support for other types in OPTIONS") {
+    val sql =
+      """
+        |CREATE TABLE table_name USING json
+        |OPTIONS (a 1, b 0.1, c TRUE)
+      """.stripMargin
+
+    parsePlan(sql) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("table_name"))
+        assert(create.tableSchema == new StructType)
+        assert(create.partitioning.isEmpty)
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties.isEmpty)
+        assert(create.provider == "json")
+        assert(create.options == Map("a" -> "1", "b" -> "0.1", "c" -> "true"))
+        assert(create.location.isEmpty)
+        assert(create.comment.isEmpty)
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("Test CTAS against native tables") {
+    val s1 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |COMMENT 'This is the staging page view table'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s2 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |LOCATION '/user/external/page_view'
+        |COMMENT 'This is the staging page view table'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s3 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |COMMENT 'This is the staging page view table'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    checkParsing(s1)
+    checkParsing(s2)
+    checkParsing(s3)
+
+    def checkParsing(sql: String): Unit = {
+      parsePlan(sql) match {
+        case create: CreateTableAsSelectStatement =>
+          assert(create.table == TableIdentifier("page_view", Some("mydb")))
+          assert(create.partitioning.isEmpty)
+          assert(create.bucketSpec.isEmpty)
+          assert(create.properties == Map("p1" -> "v1", "p2" -> "v2"))
+          assert(create.provider == "parquet")
+          assert(create.options.isEmpty)
+          assert(create.location.contains("/user/external/page_view"))
+          assert(create.comment.contains("This is the staging page view table"))
+          assert(create.ifNotExists)
+
+        case other =>
+          fail(s"Expected to parse ${classOf[CreateTableAsSelectStatement].getClass.getName} " +
+              s"from query, got ${other.getClass.getName}: $sql")
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a9a5e6ec67e4..735c0a5758d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -25,7 +25,7 @@ import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.tree.TerminalNode
 
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser._
@@ -376,128 +376,46 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     DescribeQueryCommand(visitQueryToDesc(ctx.queryToDesc()))
   }
 
-  /**
-   * Type to keep track of a table header: (identifier, isTemporary, ifNotExists, isExternal).
-   */
-  type TableHeader = (TableIdentifier, Boolean, Boolean, Boolean)
-
-  /**
-   * Validate a create table statement and return the [[TableIdentifier]].
-   */
-  override def visitCreateTableHeader(
-      ctx: CreateTableHeaderContext): TableHeader = withOrigin(ctx) {
-    val temporary = ctx.TEMPORARY != null
-    val ifNotExists = ctx.EXISTS != null
-    if (temporary && ifNotExists) {
-      operationNotAllowed("CREATE TEMPORARY TABLE ... IF NOT EXISTS", ctx)
-    }
-    (visitTableIdentifier(ctx.tableIdentifier), temporary, ifNotExists, ctx.EXTERNAL != null)
-  }
-
   /**
    * Create a table, returning a [[CreateTable]] logical plan.
    *
-   * Expected format:
-   * {{{
-   *   CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name
-   *   USING table_provider
-   *   create_table_clauses
-   *   [[AS] select_statement];
+   * This is used to produce CreateTempViewUsing from CREATE TEMPORARY TABLE.
    *
-   *   create_table_clauses (order insensitive):
-   *     [OPTIONS table_property_list]
-   *     [PARTITIONED BY (col_name, col_name, ...)]
-   *     [CLUSTERED BY (col_name, col_name, ...)
-   *       [SORTED BY (col_name [ASC|DESC], ...)]
-   *       INTO num_buckets BUCKETS
-   *     ]
-   *     [LOCATION path]
-   *     [COMMENT table_comment]
-   *     [TBLPROPERTIES (property_name=property_value, ...)]
-   * }}}
+   * TODO: Remove this. It is used because CreateTempViewUsing is not a Catalyst plan.
+   * Either move CreateTempViewUsing into catalyst as a parsed logical plan, or remove it because
+   * it is deprecated.
    */
   override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
     val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
-    if (external) {
-      operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
-    }
-
-    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
-    checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
-    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
-    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
-    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
-    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
-
-    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    val provider = ctx.tableProvider.qualifiedName.getText
-    val schema = Option(ctx.colTypeList()).map(createSchema)
-    val partitionColumnNames =
-      Option(ctx.partitionColumnNames)
-        .map(visitIdentifierList(_).toArray)
-        .getOrElse(Array.empty[String])
-    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
-
-    val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
-    val storage = DataSource.buildStorageFormatFromOptions(options)
 
-    if (location.isDefined && storage.locationUri.isDefined) {
-      throw new ParseException(
-        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
-          "you can only specify one of them.", ctx)
-    }
-    val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI))
-
-    val tableType = if (customLocation.isDefined) {
-      CatalogTableType.EXTERNAL
+    if (!temp || ctx.query != null) {
+      super.visitCreateTable(ctx)
     } else {
-      CatalogTableType.MANAGED
-    }
-
-    val tableDesc = CatalogTable(
-      identifier = table,
-      tableType = tableType,
-      storage = storage.copy(locationUri = customLocation),
-      schema = schema.getOrElse(new StructType),
-      provider = Some(provider),
-      partitionColumnNames = partitionColumnNames,
-      bucketSpec = bucketSpec,
-      properties = properties,
-      comment = Option(ctx.comment).map(string))
-
-    // Determine the storage mode.
-    val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
-
-    if (ctx.query != null) {
-      // Get the backing query.
-      val query = plan(ctx.query)
-
-      if (temp) {
-        operationNotAllowed("CREATE TEMPORARY TABLE ... USING ... AS query", ctx)
+      if (external) {
+        operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
       }
 
-      // Don't allow explicit specification of schema for CTAS
-      if (schema.nonEmpty) {
-        operationNotAllowed(
-          "Schema may not be specified in a Create Table As Select (CTAS) statement",
-          ctx)
-      }
-      CreateTable(tableDesc, mode, Some(query))
-    } else {
-      if (temp) {
-        if (ifNotExists) {
-          operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
-        }
+      checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
+      checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
+      checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
+      checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
+      checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
+      checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
 
-        logWarning(s"CREATE TEMPORARY TABLE ... USING ... is deprecated, please use " +
-          "CREATE TEMPORARY VIEW ... USING ... instead")
+      if (ifNotExists) {
         // Unlike CREATE TEMPORARY VIEW USING, CREATE TEMPORARY TABLE USING does not support
         // IF NOT EXISTS. Users are not allowed to replace the existing temp table.
-        CreateTempViewUsing(table, schema, replace = false, global = false, provider, options)
-      } else {
-        CreateTable(tableDesc, mode, None)
+        operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
       }
+
+      val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
+      val provider = ctx.tableProvider.qualifiedName.getText
+      val schema = Option(ctx.colTypeList()).map(createSchema)
+
+      logWarning(s"CREATE TEMPORARY TABLE ... USING ... is deprecated, please use " +
+          "CREATE TEMPORARY VIEW ... USING ... instead")
+
+      CreateTempViewUsing(table, schema, replace = false, global = false, provider, options)
     }
   }
 
@@ -562,77 +480,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       "MSCK REPAIR TABLE")
   }
 
-  /**
-   * Convert a table property list into a key-value map.
-   * This should be called through [[visitPropertyKeyValues]] or [[visitPropertyKeys]].
-   */
-  override def visitTablePropertyList(
-      ctx: TablePropertyListContext): Map[String, String] = withOrigin(ctx) {
-    val properties = ctx.tableProperty.asScala.map { property =>
-      val key = visitTablePropertyKey(property.key)
-      val value = visitTablePropertyValue(property.value)
-      key -> value
-    }
-    // Check for duplicate property names.
-    checkDuplicateKeys(properties, ctx)
-    properties.toMap
-  }
-
-  /**
-   * Parse a key-value map from a [[TablePropertyListContext]], assuming all values are specified.
-   */
-  private def visitPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] = {
-    val props = visitTablePropertyList(ctx)
-    val badKeys = props.collect { case (key, null) => key }
-    if (badKeys.nonEmpty) {
-      operationNotAllowed(
-        s"Values must be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx)
-    }
-    props
-  }
-
-  /**
-   * Parse a list of keys from a [[TablePropertyListContext]], assuming no values are specified.
-   */
-  private def visitPropertyKeys(ctx: TablePropertyListContext): Seq[String] = {
-    val props = visitTablePropertyList(ctx)
-    val badKeys = props.filter { case (_, v) => v != null }.keys
-    if (badKeys.nonEmpty) {
-      operationNotAllowed(
-        s"Values should not be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx)
-    }
-    props.keys.toSeq
-  }
-
-  /**
-   * A table property key can either be String or a collection of dot separated elements. This
-   * function extracts the property key based on whether its a string literal or a table property
-   * identifier.
-   */
-  override def visitTablePropertyKey(key: TablePropertyKeyContext): String = {
-    if (key.STRING != null) {
-      string(key.STRING)
-    } else {
-      key.getText
-    }
-  }
-
-  /**
-   * A table property value can be String, Integer, Boolean or Decimal. This function extracts
-   * the property value based on whether its a string, integer, boolean or decimal literal.
-   */
-  override def visitTablePropertyValue(value: TablePropertyValueContext): String = {
-    if (value == null) {
-      null
-    } else if (value.STRING != null) {
-      string(value.STRING)
-    } else if (value.booleanValue != null) {
-      value.getText.toLowerCase(Locale.ROOT)
-    } else {
-      value.getText
-    }
-  }
-
   /**
    * Create a [[CreateDatabaseCommand]] command.
    *
@@ -1006,34 +853,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       newColumn = visitColType(ctx.colType))
   }
 
-  /**
-   * Create location string.
-   */
-  override def visitLocationSpec(ctx: LocationSpecContext): String = withOrigin(ctx) {
-    string(ctx.STRING)
-  }
-
-  /**
-   * Create a [[BucketSpec]].
-   */
-  override def visitBucketSpec(ctx: BucketSpecContext): BucketSpec = withOrigin(ctx) {
-    BucketSpec(
-      ctx.INTEGER_VALUE.getText.toInt,
-      visitIdentifierList(ctx.identifierList),
-      Option(ctx.orderedIdentifierList)
-        .toSeq
-        .flatMap(_.orderedIdentifier.asScala)
-        .map { orderedIdCtx =>
-          Option(orderedIdCtx.ordering).map(_.getText).foreach { dir =>
-            if (dir.toLowerCase(Locale.ROOT) != "asc") {
-              operationNotAllowed(s"Column ordering must be ASC, was '$dir'", ctx)
-            }
-          }
-
-          orderedIdCtx.identifier.getText
-        })
-  }
-
   /**
    * Convert a nested constants list into a sequence of string sequences.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
new file mode 100644
index 000000000000..9fd44ea4e637
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.util.Locale
+
+import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.CastSupport
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType, CatalogUtils}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.sql.{CreateTableAsSelectStatement, CreateTableStatement}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.TableProvider
+import org.apache.spark.sql.types.StructType
+
+case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport  {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    case CreateTableStatement(
+        table, schema, partitionCols, bucketSpec, properties, V1WriteProvider(provider), options,
+        location, comment, ifNotExists) =>
+
+      val tableDesc = buildCatalogTable(table, schema, partitionCols, bucketSpec, properties,
+        provider, options, location, comment, ifNotExists)
+      val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
+
+      CreateTable(tableDesc, mode, None)
+
+    case CreateTableAsSelectStatement(
+        table, query, partitionCols, bucketSpec, properties, V1WriteProvider(provider), options,
+        location, comment, ifNotExists) =>
+
+      val tableDesc = buildCatalogTable(table, new StructType, partitionCols, bucketSpec,
+        properties, provider, options, location, comment, ifNotExists)
+      val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
+
+      CreateTable(tableDesc, mode, Some(query))
+  }
+
+  object V1WriteProvider {
+    private val v1WriteOverrideSet =
+      conf.userV1SourceWriterList.toLowerCase(Locale.ROOT).split(",").toSet
+
+    def unapply(provider: String): Option[String] = {
+      if (v1WriteOverrideSet.contains(provider.toLowerCase(Locale.ROOT))) {
+        Some(provider)
+      } else {
+        lazy val providerClass = DataSource.lookupDataSource(provider, conf)
+        provider match {
+          case _ if classOf[TableProvider].isAssignableFrom(providerClass) =>
+            None
+          case _ =>
+            Some(provider)
+        }
+      }
+    }
+  }
+
+  private def buildCatalogTable(
+      table: TableIdentifier,
+      schema: StructType,
+      partitionColumnNames: Seq[String],
+      bucketSpec: Option[BucketSpec],
+      properties: Map[String, String],
+      provider: String,
+      options: Map[String, String],
+      location: Option[String],
+      comment: Option[String],
+      ifNotExists: Boolean): CatalogTable = {
+
+    val storage = DataSource.buildStorageFormatFromOptions(options)
+    if (location.isDefined && storage.locationUri.isDefined) {
+      throw new AnalysisException(
+        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
+            "you can only specify one of them.")
+    }
+    val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI))
+
+    val tableType = if (customLocation.isDefined) {
+      CatalogTableType.EXTERNAL
+    } else {
+      CatalogTableType.MANAGED
+    }
+
+    CatalogTable(
+      identifier = table,
+      tableType = tableType,
+      storage = storage.copy(locationUri = customLocation),
+      schema = schema,
+      provider = Some(provider),
+      partitionColumnNames = partitionColumnNames,
+      bucketSpec = bucketSpec,
+      properties = properties,
+      comment = comment)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index f05aa5113e03..d5543e8a31aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -161,6 +161,7 @@ abstract class BaseSessionStateBuilder(
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
         new FallbackOrcDataSourceV2(session) +:
+        DataSourceResolution(conf) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 425a96b871ad..be3d0794d403 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -215,19 +215,6 @@ class SparkSqlParserSuite extends AnalysisTest {
       "no viable alternative at input")
   }
 
-  test("create table using - schema") {
-    assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) USING parquet",
-      createTableUsing(
-        table = "my_tab",
-        schema = (new StructType)
-          .add("a", IntegerType, nullable = true, "test")
-          .add("b", StringType)
-      )
-    )
-    intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet",
-      "no viable alternative at input")
-  }
-
   test("create view as insert into table") {
     // Single insert query
     intercept("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index e0ccae15f1d0..d430eeb294e1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -415,173 +415,28 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
     assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything")))
   }
 
-  test("create table - with partitioned by") {
-    val query = "CREATE TABLE my_tab(a INT comment 'test', b STRING) " +
-      "USING parquet PARTITIONED BY (a)"
-
-    val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab"),
-      tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat.empty,
-      schema = new StructType()
-        .add("a", IntegerType, nullable = true, "test")
-        .add("b", StringType),
-      provider = Some("parquet"),
-      partitionColumnNames = Seq("a")
-    )
-
-    parser.parsePlan(query) match {
-      case CreateTable(tableDesc, _, None) =>
-        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
-      case other =>
-        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
-          s"got ${other.getClass.getName}: $query")
-    }
-  }
-
-  test("create table - with bucket") {
-    val query = "CREATE TABLE my_tab(a INT, b STRING) USING parquet " +
-      "CLUSTERED BY (a) SORTED BY (b) INTO 5 BUCKETS"
-
-    val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab"),
-      tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat.empty,
-      schema = new StructType().add("a", IntegerType).add("b", StringType),
-      provider = Some("parquet"),
-      bucketSpec = Some(BucketSpec(5, Seq("a"), Seq("b")))
-    )
-
-    parser.parsePlan(query) match {
-      case CreateTable(tableDesc, _, None) =>
-        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
-      case other =>
-        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
-          s"got ${other.getClass.getName}: $query")
-    }
-  }
-
-  test("create table - with comment") {
-    val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet COMMENT 'abc'"
-
-    val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab"),
-      tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat.empty,
-      schema = new StructType().add("a", IntegerType).add("b", StringType),
-      provider = Some("parquet"),
-      comment = Some("abc"))
-
-    parser.parsePlan(sql) match {
-      case CreateTable(tableDesc, _, None) =>
-        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
-      case other =>
-        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
-          s"got ${other.getClass.getName}: $sql")
+  test("Duplicate clauses - create hive table") {
+    def createTableHeader(duplicateClause: String): String = {
+      s"CREATE TABLE my_tab(a INT, b STRING) STORED AS parquet $duplicateClause $duplicateClause"
     }
-  }
-
-  test("create table - with table properties") {
-    val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet TBLPROPERTIES('test' = 'test')"
 
-    val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab"),
-      tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat.empty,
-      schema = new StructType().add("a", IntegerType).add("b", StringType),
-      provider = Some("parquet"),
-      properties = Map("test" -> "test"))
-
-    parser.parsePlan(sql) match {
-      case CreateTable(tableDesc, _, None) =>
-        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
-      case other =>
-        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
-          s"got ${other.getClass.getName}: $sql")
-    }
-  }
-
-  test("Duplicate clauses - create table") {
-    def createTableHeader(duplicateClause: String, isNative: Boolean): String = {
-      val fileFormat = if (isNative) "USING parquet" else "STORED AS parquet"
-      s"CREATE TABLE my_tab(a INT, b STRING) $fileFormat $duplicateClause $duplicateClause"
-    }
-
-    Seq(true, false).foreach { isNative =>
-      intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')", isNative),
-        "Found duplicate clauses: TBLPROPERTIES")
-      intercept(createTableHeader("LOCATION '/tmp/file'", isNative),
-        "Found duplicate clauses: LOCATION")
-      intercept(createTableHeader("COMMENT 'a table'", isNative),
-        "Found duplicate clauses: COMMENT")
-      intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS", isNative),
-        "Found duplicate clauses: CLUSTERED BY")
-    }
-
-    // Only for native data source tables
-    intercept(createTableHeader("PARTITIONED BY (b)", isNative = true),
+    intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"),
+      "Found duplicate clauses: TBLPROPERTIES")
+    intercept(createTableHeader("LOCATION '/tmp/file'"),
+      "Found duplicate clauses: LOCATION")
+    intercept(createTableHeader("COMMENT 'a table'"),
+      "Found duplicate clauses: COMMENT")
+    intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"),
+      "Found duplicate clauses: CLUSTERED BY")
+    intercept(createTableHeader("PARTITIONED BY (k int)"),
       "Found duplicate clauses: PARTITIONED BY")
-
-    // Only for Hive serde tables
-    intercept(createTableHeader("PARTITIONED BY (k int)", isNative = false),
-      "Found duplicate clauses: PARTITIONED BY")
-    intercept(createTableHeader("STORED AS parquet", isNative = false),
+    intercept(createTableHeader("STORED AS parquet"),
       "Found duplicate clauses: STORED AS/BY")
     intercept(
-      createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'", isNative = false),
+      createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'"),
       "Found duplicate clauses: ROW FORMAT")
   }
 
-  test("create table - with location") {
-    val v1 = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'"
-
-    val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab"),
-      tableType = CatalogTableType.EXTERNAL,
-      storage = CatalogStorageFormat.empty.copy(locationUri = Some(new URI("/tmp/file"))),
-      schema = new StructType().add("a", IntegerType).add("b", StringType),
-      provider = Some("parquet"))
-
-    parser.parsePlan(v1) match {
-      case CreateTable(tableDesc, _, None) =>
-        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
-      case other =>
-        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
-          s"got ${other.getClass.getName}: $v1")
-    }
-
-    val v2 =
-      """
-        |CREATE TABLE my_tab(a INT, b STRING)
-        |USING parquet
-        |OPTIONS (path '/tmp/file')
-        |LOCATION '/tmp/file'
-      """.stripMargin
-    val e = intercept[ParseException] {
-      parser.parsePlan(v2)
-    }
-    assert(e.message.contains("you can only specify one of them."))
-  }
-
-  test("create table - byte length literal table name") {
-    val sql = "CREATE TABLE 1m.2g(a INT) USING parquet"
-
-    val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("2g", Some("1m")),
-      tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat.empty,
-      schema = new StructType().add("a", IntegerType),
-      provider = Some("parquet"))
-
-    parser.parsePlan(sql) match {
-      case CreateTable(tableDesc, _, None) =>
-        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
-      case other =>
-        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
-          s"got ${other.getClass.getName}: $sql")
-    }
-  }
-
   test("insert overwrite directory") {
     val v1 = "INSERT OVERWRITE DIRECTORY '/tmp/file' USING parquet SELECT 1 as a"
     parser.parsePlan(v1) match {
@@ -1165,84 +1020,6 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
     comparePlans(parsed, expected)
   }
 
-  test("support for other types in OPTIONS") {
-    val sql =
-      """
-        |CREATE TABLE table_name USING json
-        |OPTIONS (a 1, b 0.1, c TRUE)
-      """.stripMargin
-
-    val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("table_name"),
-      tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat.empty.copy(
-        properties = Map("a" -> "1", "b" -> "0.1", "c" -> "true")
-      ),
-      schema = new StructType,
-      provider = Some("json")
-    )
-
-    parser.parsePlan(sql) match {
-      case CreateTable(tableDesc, _, None) =>
-        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
-      case other =>
-        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
-          s"got ${other.getClass.getName}: $sql")
-    }
-  }
-
-  test("Test CTAS against data source tables") {
-    val s1 =
-      """
-        |CREATE TABLE IF NOT EXISTS mydb.page_view
-        |USING parquet
-        |COMMENT 'This is the staging page view table'
-        |LOCATION '/user/external/page_view'
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |AS SELECT * FROM src
-      """.stripMargin
-
-    val s2 =
-      """
-        |CREATE TABLE IF NOT EXISTS mydb.page_view
-        |USING parquet
-        |LOCATION '/user/external/page_view'
-        |COMMENT 'This is the staging page view table'
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |AS SELECT * FROM src
-      """.stripMargin
-
-    val s3 =
-      """
-        |CREATE TABLE IF NOT EXISTS mydb.page_view
-        |USING parquet
-        |COMMENT 'This is the staging page view table'
-        |LOCATION '/user/external/page_view'
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |AS SELECT * FROM src
-      """.stripMargin
-
-    checkParsing(s1)
-    checkParsing(s2)
-    checkParsing(s3)
-
-    def checkParsing(sql: String): Unit = {
-      val (desc, exists) = extractTableDesc(sql)
-      assert(exists)
-      assert(desc.identifier.database == Some("mydb"))
-      assert(desc.identifier.table == "page_view")
-      assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
-      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
-      assert(desc.comment == Some("This is the staging page view table"))
-      assert(desc.viewText.isEmpty)
-      assert(desc.viewDefaultDatabase.isEmpty)
-      assert(desc.viewQueryColumnNames.isEmpty)
-      assert(desc.partitionColumnNames.isEmpty)
-      assert(desc.provider == Some("parquet"))
-      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
-    }
-  }
-
   test("Test CTAS #1") {
     val s1 =
       """
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
new file mode 100644
index 000000000000..89c5df0900b6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import java.net.URI
+
+import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.AnalysisTest
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceResolution}
+import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+
+class PlanResolutionSuite extends AnalysisTest {
+  import CatalystSqlParser._
+
+  def parseAndResolve(query: String): LogicalPlan = {
+    DataSourceResolution(conf).apply(parsePlan(query))
+  }
+
+  private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
+    parseAndResolve(sql).collect {
+      case CreateTable(tableDesc, mode, _) => (tableDesc, mode == SaveMode.Ignore)
+    }.head
+  }
+
+  test("create table - with partitioned by") {
+    val query = "CREATE TABLE my_tab(a INT comment 'test', b STRING) " +
+        "USING parquet PARTITIONED BY (a)"
+
+    val expectedTableDesc = CatalogTable(
+      identifier = TableIdentifier("my_tab"),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = new StructType()
+          .add("a", IntegerType, nullable = true, "test")
+          .add("b", StringType),
+      provider = Some("parquet"),
+      partitionColumnNames = Seq("a")
+    )
+
+    parseAndResolve(query) match {
+      case CreateTable(tableDesc, _, None) =>
+        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $query")
+    }
+  }
+
+  test("create table - with bucket") {
+    val query = "CREATE TABLE my_tab(a INT, b STRING) USING parquet " +
+        "CLUSTERED BY (a) SORTED BY (b) INTO 5 BUCKETS"
+
+    val expectedTableDesc = CatalogTable(
+      identifier = TableIdentifier("my_tab"),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = new StructType().add("a", IntegerType).add("b", StringType),
+      provider = Some("parquet"),
+      bucketSpec = Some(BucketSpec(5, Seq("a"), Seq("b")))
+    )
+
+    parseAndResolve(query) match {
+      case CreateTable(tableDesc, _, None) =>
+        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $query")
+    }
+  }
+
+  test("create table - with comment") {
+    val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet COMMENT 'abc'"
+
+    val expectedTableDesc = CatalogTable(
+      identifier = TableIdentifier("my_tab"),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = new StructType().add("a", IntegerType).add("b", StringType),
+      provider = Some("parquet"),
+      comment = Some("abc"))
+
+    parseAndResolve(sql) match {
+      case CreateTable(tableDesc, _, None) =>
+        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("create table - with table properties") {
+    val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet TBLPROPERTIES('test' = 'test')"
+
+    val expectedTableDesc = CatalogTable(
+      identifier = TableIdentifier("my_tab"),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = new StructType().add("a", IntegerType).add("b", StringType),
+      provider = Some("parquet"),
+      properties = Map("test" -> "test"))
+
+    parseAndResolve(sql) match {
+      case CreateTable(tableDesc, _, None) =>
+        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("create table - with location") {
+    val v1 = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'"
+
+    val expectedTableDesc = CatalogTable(
+      identifier = TableIdentifier("my_tab"),
+      tableType = CatalogTableType.EXTERNAL,
+      storage = CatalogStorageFormat.empty.copy(locationUri = Some(new URI("/tmp/file"))),
+      schema = new StructType().add("a", IntegerType).add("b", StringType),
+      provider = Some("parquet"))
+
+    parseAndResolve(v1) match {
+      case CreateTable(tableDesc, _, None) =>
+        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $v1")
+    }
+
+    val v2 =
+      """
+        |CREATE TABLE my_tab(a INT, b STRING)
+        |USING parquet
+        |OPTIONS (path '/tmp/file')
+        |LOCATION '/tmp/file'
+      """.stripMargin
+    val e = intercept[AnalysisException] {
+      parseAndResolve(v2)
+    }
+    assert(e.message.contains("you can only specify one of them."))
+  }
+
+  test("create table - byte length literal table name") {
+    val sql = "CREATE TABLE 1m.2g(a INT) USING parquet"
+
+    val expectedTableDesc = CatalogTable(
+      identifier = TableIdentifier("2g", Some("1m")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = new StructType().add("a", IntegerType),
+      provider = Some("parquet"))
+
+    parseAndResolve(sql) match {
+      case CreateTable(tableDesc, _, None) =>
+        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("support for other types in OPTIONS") {
+    val sql =
+      """
+        |CREATE TABLE table_name USING json
+        |OPTIONS (a 1, b 0.1, c TRUE)
+      """.stripMargin
+
+    val expectedTableDesc = CatalogTable(
+      identifier = TableIdentifier("table_name"),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty.copy(
+        properties = Map("a" -> "1", "b" -> "0.1", "c" -> "true")
+      ),
+      schema = new StructType,
+      provider = Some("json")
+    )
+
+    parseAndResolve(sql) match {
+      case CreateTable(tableDesc, _, None) =>
+        assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  test("Test CTAS against data source tables") {
+    val s1 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |COMMENT 'This is the staging page view table'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s2 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |LOCATION '/user/external/page_view'
+        |COMMENT 'This is the staging page view table'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s3 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |COMMENT 'This is the staging page view table'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    checkParsing(s1)
+    checkParsing(s2)
+    checkParsing(s3)
+
+    def checkParsing(sql: String): Unit = {
+      val (desc, exists) = extractTableDesc(sql)
+      assert(exists)
+      assert(desc.identifier.database.contains("mydb"))
+      assert(desc.identifier.table == "page_view")
+      assert(desc.storage.locationUri.contains(new URI("/user/external/page_view")))
+      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
+      assert(desc.comment.contains("This is the staging page view table"))
+      assert(desc.viewText.isEmpty)
+      assert(desc.viewDefaultDatabase.isEmpty)
+      assert(desc.viewQueryColumnNames.isEmpty)
+      assert(desc.partitionColumnNames.isEmpty)
+      assert(desc.provider.contains("parquet"))
+      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
+    }
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 68f4b2ddbac0..877a0dadf0b0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -73,6 +73,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
         new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
         new FallbackOrcDataSourceV2(session) +:
+        DataSourceResolution(conf) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =

From 8a9eb05137cd4c665f39a54c30d46c0c4eb7d20b Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 22 Mar 2019 15:07:49 -0700
Subject: [PATCH 0749/1072] [SPARK-26606][CORE] Handle driver options properly
 when submitting to standalone cluster mode via legacy Client

## What changes were proposed in this pull request?

This patch fixes the issue that ClientEndpoint in standalone cluster doesn't recognize about driver options which are passed to SparkConf instead of system properties. When `Client` is executed via cli they should be provided as system properties, but with `spark-submit` they can be provided as SparkConf. (SpartSubmit will call `ClientApp.start` with SparkConf which would contain these options.)

## How was this patch tested?

Manually tested via following steps:

1) setup standalone cluster (launch master and worker via `./sbin/start-all.sh`)

2) submit one of example app with standalone cluster mode

```
./bin/spark-submit --class org.apache.spark.examples.SparkPi --master "spark://localhost:7077" --conf "spark.driver.extraJavaOptions=-Dfoo=BAR" --deploy-mode "cluster" --num-executors 1 --driver-memory 512m --executor-memory 512m --executor-cores 1 examples/jars/spark-examples*.jar 10
```

3) check whether `foo=BAR` is provided in system properties in Spark UI

<img width="877" alt="Screen Shot 2019-03-21 at 8 18 04 AM" src="https://user-images.githubusercontent.com/1317309/54728501-97db1700-4bc1-11e9-89da-078445c71e9b.png">

Closes #24163 from HeartSaVioR/SPARK-26606.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../main/scala/org/apache/spark/deploy/Client.scala   | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index e65a494fae29..ea7c902b1b6b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -61,6 +61,10 @@ private class ClientEndpoint(
    private val lostMasters = new HashSet[RpcAddress]
    private var activeMasterEndpoint: RpcEndpointRef = null
 
+  private def getProperty(key: String, conf: SparkConf): Option[String] = {
+    sys.props.get(key).orElse(conf.getOption(key))
+  }
+
   override def onStart(): Unit = {
     driverArgs.cmd match {
       case "launch" =>
@@ -70,18 +74,19 @@ private class ClientEndpoint(
         val mainClass = "org.apache.spark.deploy.worker.DriverWrapper"
 
         val classPathConf = config.DRIVER_CLASS_PATH.key
-        val classPathEntries = sys.props.get(classPathConf).toSeq.flatMap { cp =>
+        val classPathEntries = getProperty(classPathConf, conf).toSeq.flatMap { cp =>
           cp.split(java.io.File.pathSeparator)
         }
 
         val libraryPathConf = config.DRIVER_LIBRARY_PATH.key
-        val libraryPathEntries = sys.props.get(libraryPathConf).toSeq.flatMap { cp =>
+        val libraryPathEntries = getProperty(libraryPathConf, conf).toSeq.flatMap { cp =>
           cp.split(java.io.File.pathSeparator)
         }
 
         val extraJavaOptsConf = config.DRIVER_JAVA_OPTIONS.key
-        val extraJavaOpts = sys.props.get(extraJavaOptsConf)
+        val extraJavaOpts = getProperty(extraJavaOptsConf, conf)
           .map(Utils.splitCommandString).getOrElse(Seq.empty)
+
         val sparkJavaOpts = Utils.sparkJavaOpts(conf)
         val javaOpts = sparkJavaOpts ++ extraJavaOpts
         val command = new Command(mainClass,

From 68abf77b1ad8da7916a9dc5fa8bb350b64479410 Mon Sep 17 00:00:00 2001
From: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Date: Sat, 23 Mar 2019 09:43:00 +0900
Subject: [PATCH 0750/1072] [SPARK-27184][CORE] Avoid hardcoded 'spark.jars',
 'spark.files', 'spark.submit.pyFiles' and 'spark.submit.deployMode'

## What changes were proposed in this pull request?

For [SPARK-27184](https://issues.apache.org/jira/browse/SPARK-27184)

In the `org.apache.spark.internal.config`, we define the variables of `FILES` and `JARS`, we can use them instead of "spark.jars" and "spark.files".

```scala
private[spark] val JARS = ConfigBuilder("spark.jars")
  .stringConf
  .toSequence
  .createWithDefault(Nil)
```

```scala
private[spark] val FILES = ConfigBuilder("spark.files")
  .stringConf
  .toSequence
  .createWithDefault(Nil)
```

Other :
In the `org.apache.spark.internal.config`, we define the variables of `SUBMIT_PYTHON_FILES ` and `SUBMIT_DEPLOY_MODE `, we can use them instead of "spark.submit.pyFiles" and "spark.submit.deployMode".
```scala
private[spark] val SUBMIT_PYTHON_FILES = ConfigBuilder("spark.submit.pyFiles")
    .stringConf
    .toSequence
    .createWithDefault(Nil)

```
```scala
private[spark] val SUBMIT_DEPLOY_MODE = ConfigBuilder("spark.submit.deployMode")
    .stringConf
    .createWithDefault("client")
```

Closes #24123 from hehuiyuan/hehuiyuan-patch-6.

Authored-by: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala   | 2 +-
 .../main/scala/org/apache/spark/deploy/SparkSubmit.scala  | 6 +++---
 .../org/apache/spark/deploy/SparkSubmitArguments.scala    | 8 ++++----
 .../scala/org/apache/spark/deploy/SparkSubmitSuite.scala  | 2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 5cd6c2b9c380..4abb18d4aaa7 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -394,7 +394,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _conf.set(EXECUTOR_ID, SparkContext.DRIVER_IDENTIFIER)
 
     _jars = Utils.getUserJars(_conf)
-    _files = _conf.getOption("spark.files").map(_.split(",")).map(_.filter(_.nonEmpty))
+    _files = _conf.getOption(FILES.key).map(_.split(",")).map(_.filter(_.nonEmpty))
       .toSeq.flatten
 
     _eventLogDir =
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index b6673e4568d4..b4d7462f7936 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -542,10 +542,10 @@ private[spark] class SparkSubmit extends Logging {
       OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = CORES_MAX.key),
       OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
-        confKey = "spark.files"),
-      OptionAssigner(args.jars, LOCAL, CLIENT, confKey = "spark.jars"),
+        confKey = FILES.key),
+      OptionAssigner(args.jars, LOCAL, CLIENT, confKey = JARS.key),
       OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
-        confKey = "spark.jars"),
+        confKey = JARS.key),
       OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN | KUBERNETES, CLUSTER,
         confKey = DRIVER_MEMORY.key),
       OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN | KUBERNETES, CLUSTER,
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index f8c533021283..e7954d1ee165 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -183,9 +183,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       .orElse(sparkProperties.get(config.CORES_MAX.key))
       .orNull
     name = Option(name).orElse(sparkProperties.get("spark.app.name")).orNull
-    jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull
-    files = Option(files).orElse(sparkProperties.get("spark.files")).orNull
-    pyFiles = Option(pyFiles).orElse(sparkProperties.get("spark.submit.pyFiles")).orNull
+    jars = Option(jars).orElse(sparkProperties.get(config.JARS.key)).orNull
+    files = Option(files).orElse(sparkProperties.get(config.FILES.key)).orNull
+    pyFiles = Option(pyFiles).orElse(sparkProperties.get(config.SUBMIT_PYTHON_FILES.key)).orNull
     ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull
     ivySettingsPath = sparkProperties.get("spark.jars.ivySettings")
     packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull
@@ -194,7 +194,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     repositories = Option(repositories)
       .orElse(sparkProperties.get("spark.jars.repositories")).orNull
     deployMode = Option(deployMode)
-      .orElse(sparkProperties.get("spark.submit.deployMode"))
+      .orElse(sparkProperties.get(config.SUBMIT_DEPLOY_MODE.key))
       .orElse(env.get("DEPLOY_MODE"))
       .orNull
     numExecutors = Option(numExecutors)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 4b414f713376..3051d650750f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -681,7 +681,7 @@ class SparkSubmitSuite
       appArgs.jars should be(Utils.resolveURIs(jars))
       appArgs.files should be(Utils.resolveURIs(files))
       conf.get(JARS) should be(Utils.resolveURIs(jars + ",thejar.jar").split(",").toSeq)
-      conf.get("spark.files") should be(Utils.resolveURIs(files))
+      conf.get(FILES) should be(Utils.resolveURIs(files).split(",").toSeq)
 
       // Test files and archives (Yarn)
       val clArgs2 = Seq(

From fe317dc74e5fa1509ae9d735485f66724f7292e5 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Sat, 23 Mar 2019 09:49:20 +0900
Subject: [PATCH 0751/1072] [SPARK-27243][SQL] RuleExecutor.dumpTimeSpent
 should not throw exception when empty

## What changes were proposed in this pull request?

`RuleExecutor.dumpTimeSpent` currently throws an exception when invoked before any rule is run or immediately after `RuleExecutor.reset`. The PR makes it returning an empty summary, which is the expected output instead.

## How was this patch tested?

added UT

Closes #24180 from mgaido91/SPARK-27243.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/rules/QueryExecutionMetering.scala   | 6 +++++-
 .../apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala | 6 ++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
index e4d5fa91dc25..7a86433d56c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
@@ -64,7 +64,11 @@ case class QueryExecutionMetering() {
   /** Dump statistics about time spent running specific rules. */
   def dumpTimeSpent(): String = {
     val map = timeMap.asMap().asScala
-    val maxLengthRuleNames = map.keys.map(_.toString.length).max
+    val maxLengthRuleNames = if (map.isEmpty) {
+      0
+    } else {
+      map.keys.map(_.toString.length).max
+    }
 
     val colRuleName = "Rule".padTo(maxLengthRuleNames, " ").mkString
     val colRunTime = "Effective Time / Total Time".padTo(len = 47, " ").mkString
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
index ab5d722975ef..8dbe198e54c4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
@@ -91,4 +91,10 @@ class RuleExecutorSuite extends SparkFunSuite {
     }.getMessage
     assert(message.contains("the structural integrity of the plan is broken"))
   }
+
+  test("SPARK-27243: dumpTimeSpent when no rule has run") {
+    RuleExecutor.resetMetrics()
+    // This should not throw an exception
+    RuleExecutor.dumpTimeSpent()
+  }
 }

From 027ed2d11b861a4b38c62452d26ce446794792af Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Sat, 23 Mar 2019 11:26:09 -0500
Subject: [PATCH 0752/1072] [SPARK-23643][CORE][SQL][ML] Shrinking the buffer
 in hashSeed up to size of the seed parameter

## What changes were proposed in this pull request?

The hashSeed method allocates 64 bytes instead of 8. Other bytes are always zeros (thanks to default behavior of ByteBuffer). And they could be excluded from hash calculation because they don't differentiate inputs.

## How was this patch tested?

By running the existing tests - XORShiftRandomSuite

Closes #20793 from MaxGekk/hash-buff-size.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../fulltests/test_mllib_classification.R     |   6 +-
 R/pkg/tests/fulltests/test_mllib_clustering.R |   2 +-
 .../fulltests/test_mllib_recommendation.R     |   4 +-
 R/pkg/tests/fulltests/test_mllib_tree.R       |   8 +-
 R/pkg/tests/fulltests/test_sparkSQL.R         |  30 +-
 .../spark/util/random/XORShiftRandom.scala    |   2 +-
 .../test/org/apache/spark/JavaAPISuite.java   |   9 +-
 .../spark/rdd/PairRDDFunctionsSuite.scala     |   2 +-
 .../util/random/RandomSamplerSuite.scala      |   2 +-
 .../classification/GBTClassifierSuite.scala   |   2 +-
 .../LogisticRegressionSuite.scala             | 585 +++++++++---------
 .../spark/ml/clustering/KMeansSuite.scala     |   2 +-
 .../PowerIterationClusteringSuite.scala       |   6 +-
 .../spark/ml/feature/Word2VecSuite.scala      |  10 +-
 .../ml/regression/GBTRegressorSuite.scala     |   2 +-
 .../GeneralizedLinearRegressionSuite.scala    |  48 +-
 .../PowerIterationClusteringSuite.scala       |   8 +-
 .../clustering/StreamingKMeansSuite.scala     |   3 +-
 python/pyspark/ml/clustering.py               |  14 +-
 python/pyspark/ml/feature.py                  |  14 +-
 python/pyspark/ml/recommendation.py           |  14 +-
 python/pyspark/ml/tests/test_algorithms.py    |   2 +-
 python/pyspark/ml/tuning.py                   |   6 +-
 python/pyspark/mllib/recommendation.py        |   6 +-
 python/pyspark/sql/dataframe.py               |  12 +-
 python/pyspark/sql/functions.py               |   8 +-
 python/pyspark/sql/tests/test_functions.py    |   4 +-
 .../catalyst/expressions/RandomSuite.scala    |  16 +-
 .../results/group-by-ordinal.sql.out          |  12 +-
 .../sql-tests/results/random.sql.out          |  16 +-
 .../apache/spark/sql/DataFrameStatSuite.scala |   8 +-
 .../org/apache/spark/sql/DatasetSuite.scala   |  15 +-
 .../datasources/csv/TestCsvData.scala         |   3 +-
 .../datasources/json/TestJsonData.scala       |   3 +-
 34 files changed, 446 insertions(+), 438 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
index 9fdb0cfd9b61..1f1b187aef56 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -299,7 +299,7 @@ test_that("spark.mlp", {
   df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
                 source = "libsvm")
   model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3),
-                     solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1)
+                     solver = "l-bfgs", maxIter = 100, tol = 0.00001, stepSize = 1, seed = 1)
 
   # Test summary method
   summary <- summary(model)
@@ -307,13 +307,13 @@ test_that("spark.mlp", {
   expect_equal(summary$numOfOutputs, 3)
   expect_equal(summary$layers, c(4, 5, 4, 3))
   expect_equal(length(summary$weights), 64)
-  expect_equal(head(summary$weights, 5), list(-0.878743, 0.2154151, -1.16304, -0.6583214, 1.009825),
+  expect_equal(head(summary$weights, 5), list(-24.28415, 107.8701, 16.86376, 1.103736, 9.244488),
                tolerance = 1e-6)
 
   # Test predict method
   mlpTestDF <- df
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 6), c("0.0", "1.0", "1.0", "1.0", "1.0", "1.0"))
+  expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0"))
 
   # Test model save/load
   if (windows_with_hadoop()) {
diff --git a/R/pkg/tests/fulltests/test_mllib_clustering.R b/R/pkg/tests/fulltests/test_mllib_clustering.R
index b78a476f1d05..028ad574b813 100644
--- a/R/pkg/tests/fulltests/test_mllib_clustering.R
+++ b/R/pkg/tests/fulltests/test_mllib_clustering.R
@@ -153,7 +153,7 @@ test_that("spark.kmeans", {
   model <- spark.kmeans(data = training, ~ ., k = 2, maxIter = 10, initMode = "random")
   sample <- take(select(predict(model, training), "prediction"), 1)
   expect_equal(typeof(sample$prediction), "integer")
-  expect_equal(sample$prediction, 1)
+  expect_equal(sample$prediction, 0)
 
   # Test stats::kmeans is working
   statsModel <- kmeans(x = newIris, centers = 2)
diff --git a/R/pkg/tests/fulltests/test_mllib_recommendation.R b/R/pkg/tests/fulltests/test_mllib_recommendation.R
index 4d919c9d746b..d50de4123aeb 100644
--- a/R/pkg/tests/fulltests/test_mllib_recommendation.R
+++ b/R/pkg/tests/fulltests/test_mllib_recommendation.R
@@ -27,13 +27,13 @@ test_that("spark.als", {
                list(2, 1, 1.0), list(2, 2, 5.0))
   df <- createDataFrame(data, c("user", "item", "score"))
   model <- spark.als(df, ratingCol = "score", userCol = "user", itemCol = "item",
-                     rank = 10, maxIter = 5, seed = 0, regParam = 0.1)
+                     rank = 10, maxIter = 15, seed = 0, regParam = 0.1)
   stats <- summary(model)
   expect_equal(stats$rank, 10)
   test <- createDataFrame(list(list(0, 2), list(1, 0), list(2, 0)), c("user", "item"))
   predictions <- collect(predict(model, test))
 
-  expect_equal(predictions$prediction, c(-0.1380762, 2.6258414, -1.5018409),
+  expect_equal(predictions$prediction, c(0.6324540, 3.6218479, -0.4568263),
   tolerance = 1e-4)
 
   # Test model save/load
diff --git a/R/pkg/tests/fulltests/test_mllib_tree.R b/R/pkg/tests/fulltests/test_mllib_tree.R
index facd3a941cf1..ad68700c7ff4 100644
--- a/R/pkg/tests/fulltests/test_mllib_tree.R
+++ b/R/pkg/tests/fulltests/test_mllib_tree.R
@@ -148,10 +148,10 @@ test_that("spark.randomForest", {
   model <- spark.randomForest(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16,
                               numTrees = 20, seed = 123)
   predictions <- collect(predict(model, data))
-  expect_equal(predictions$prediction, c(60.32820, 61.22315, 60.69025, 62.11070,
-                                         63.53160, 64.05470, 65.12710, 64.30450,
-                                         66.70910, 67.86125, 68.08700, 67.21865,
-                                         68.89275, 69.53180, 69.39640, 69.68250),
+  expect_equal(predictions$prediction, c(60.32495, 61.06495, 60.52120, 61.98500,
+                                         63.64450, 64.21910, 65.00810, 64.30450,
+                                         66.70910, 67.96875, 68.22140, 67.21865,
+                                         68.89275, 69.55900, 69.30160, 69.93050),
                tolerance = 1e-4)
   stats <- summary(model)
   expect_equal(stats$numTrees, 20)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index cebd0f890ecf..2394f7414284 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1786,9 +1786,9 @@ test_that("column binary mathfunctions", {
   expect_equal(collect(select(df, shiftRight(df$b, 1)))[4, 1], 4)
   expect_equal(collect(select(df, shiftRightUnsigned(df$b, 1)))[4, 1], 4)
   expect_equal(class(collect(select(df, rand()))[2, 1]), "numeric")
-  expect_equal(collect(select(df, rand(1)))[1, 1], 0.134, tolerance = 0.01)
+  expect_equal(collect(select(df, rand(1)))[1, 1], 0.636, tolerance = 0.01)
   expect_equal(class(collect(select(df, randn()))[2, 1]), "numeric")
-  expect_equal(collect(select(df, randn(1)))[1, 1], -1.03, tolerance = 0.01)
+  expect_equal(collect(select(df, randn(1)))[1, 1], 1.68, tolerance = 0.01)
 })
 
 test_that("string operators", {
@@ -2360,7 +2360,7 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   expect_equal(names(joined3), c("age", "name", "name", "test"))
   expect_equal(count(joined3), 4)
   expect_true(is.na(collect(orderBy(joined3, joined3$age))$age[2]))
-  
+
   joined4 <- join(df, df2, df$name == df2$name, "right_outer")
   expect_equal(names(joined4), c("age", "name", "name", "test"))
   expect_equal(count(joined4), 4)
@@ -2377,19 +2377,19 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   expect_equal(names(joined6), c("newAge", "name", "test"))
   expect_equal(count(joined6), 4)
   expect_equal(collect(orderBy(joined6, joined6$name))$newAge[3], 24)
-  
+
   joined7 <- select(join(df, df2, df$name == df2$name, "full"),
                     alias(df$age + 5, "newAge"), df$name, df2$test)
   expect_equal(names(joined7), c("newAge", "name", "test"))
   expect_equal(count(joined7), 4)
   expect_equal(collect(orderBy(joined7, joined7$name))$newAge[3], 24)
-  
+
   joined8 <- select(join(df, df2, df$name == df2$name, "fullouter"),
                     alias(df$age + 5, "newAge"), df$name, df2$test)
   expect_equal(names(joined8), c("newAge", "name", "test"))
   expect_equal(count(joined8), 4)
   expect_equal(collect(orderBy(joined8, joined8$name))$newAge[3], 24)
-  
+
   joined9 <- select(join(df, df2, df$name == df2$name, "full_outer"),
                     alias(df$age + 5, "newAge"), df$name, df2$test)
   expect_equal(names(joined9), c("newAge", "name", "test"))
@@ -2400,12 +2400,12 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   expect_equal(names(joined10), c("age", "name", "name", "test"))
   expect_equal(count(joined10), 3)
   expect_true(is.na(collect(orderBy(joined10, joined10$age))$age[1]))
-  
+
   joined11 <- join(df, df2, df$name == df2$name, "leftouter")
   expect_equal(names(joined11), c("age", "name", "name", "test"))
   expect_equal(count(joined11), 3)
   expect_true(is.na(collect(orderBy(joined11, joined11$age))$age[1]))
-  
+
   joined12 <- join(df, df2, df$name == df2$name, "left_outer")
   expect_equal(names(joined12), c("age", "name", "name", "test"))
   expect_equal(count(joined12), 3)
@@ -2418,23 +2418,23 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   joined14 <- join(df, df2, df$name == df2$name, "semi")
   expect_equal(names(joined14), c("age", "name"))
   expect_equal(count(joined14), 3)
-  
+
   joined14 <- join(df, df2, df$name == df2$name, "leftsemi")
   expect_equal(names(joined14), c("age", "name"))
   expect_equal(count(joined14), 3)
-  
+
   joined15 <- join(df, df2, df$name == df2$name, "left_semi")
   expect_equal(names(joined15), c("age", "name"))
   expect_equal(count(joined15), 3)
-  
+
   joined16 <- join(df2, df, df2$name == df$name, "anti")
   expect_equal(names(joined16), c("name", "test"))
   expect_equal(count(joined16), 1)
-  
+
   joined17 <- join(df2, df, df2$name == df$name, "leftanti")
   expect_equal(names(joined17), c("name", "test"))
   expect_equal(count(joined17), 1)
-  
+
   joined18 <- join(df2, df, df2$name == df$name, "left_anti")
   expect_equal(names(joined18), c("name", "test"))
   expect_equal(count(joined18), 1)
@@ -2444,7 +2444,7 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
                  "'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer',",
                  "'semi', 'leftsemi', 'left_semi', 'anti', 'leftanti' or 'left_anti'.")
   expect_error(join(df2, df, df2$name == df$name, "invalid"), error_msg)
-  
+
   merged <- merge(df, df2, by.x = "name", by.y = "name", all.x = TRUE, all.y = TRUE)
   expect_equal(count(merged), 4)
   expect_equal(names(merged), c("age", "name_x", "name_y", "test"))
@@ -3026,7 +3026,7 @@ test_that("sampleBy() on a DataFrame", {
   sample <- sampleBy(df, "key", fractions, 0)
   result <- collect(orderBy(count(groupBy(sample, "key")), "key"))
   expect_identical(as.list(result[1, ]), list(key = "0", count = 3))
-  expect_identical(as.list(result[2, ]), list(key = "1", count = 7))
+  expect_identical(as.list(result[2, ]), list(key = "1", count = 8))
 })
 
 test_that("approxQuantile() on a DataFrame", {
diff --git a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
index e47275643267..af09e50a157a 100644
--- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
@@ -59,7 +59,7 @@ private[spark] object XORShiftRandom {
 
   /** Hash seeds to have 0/1 bits throughout. */
   private[random] def hashSeed(seed: Long): Long = {
-    val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array()
+    val bytes = ByteBuffer.allocate(java.lang.Long.BYTES).putLong(seed).array()
     val lowBits = MurmurHash3.bytesHash(bytes)
     val highBits = MurmurHash3.bytesHash(bytes, lowBits)
     (highBits.toLong << 32) | (lowBits.toLong & 0xFFFFFFFFL)
diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
index f979f9e8bb95..a8252e03b5c1 100644
--- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -32,6 +32,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 import org.apache.spark.Partitioner;
 import org.apache.spark.SparkConf;
@@ -156,13 +158,16 @@ public void intersection() {
 
   @Test
   public void sample() {
-    List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    List<Integer> ints = IntStream.iterate(1, x -> x + 1)
+      .limit(20)
+      .boxed()
+      .collect(Collectors.toList());
     JavaRDD<Integer> rdd = sc.parallelize(ints);
     // the seeds here are "magic" to make this work out nicely
     JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 8);
     assertEquals(2, sample20.count());
     JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 2);
-    assertEquals(2, sample20WithoutReplacement.count());
+    assertEquals(4, sample20WithoutReplacement.count());
   }
 
   @Test
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 945b09441ea9..1564435a0bba 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -739,7 +739,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
         val dist = new BinomialDistribution(trials, p)
         val q = dist.cumulativeProbability(actual)
         withClue(s"p = $p: trials = $trials") {
-          assert(q >= 0.001 && q <= 0.999)
+          assert(0.0 < q && q < 1.0)
         }
       }
     }
diff --git a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
index 7eb2f56c2058..c2e3830d955c 100644
--- a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
@@ -59,7 +59,7 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
   // will always fail with some nonzero probability, so I'll fix the seed to prevent these
   // tests from generating random failure noise in CI testing, etc.
   val rngSeed: Random = RandomSampler.newDefaultRNG
-  rngSeed.setSeed(235711)
+  rngSeed.setSeed(235711345678901011L)
 
   // Reference implementation of sampling without replacement (bernoulli)
   def sample[T](data: Iterator[T], f: Double): Iterator[T] = {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index cd59900c521c..379e14fbc057 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -345,7 +345,7 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
   test("Tests of feature subset strategy") {
     val numClasses = 2
     val gbt = new GBTClassifier()
-      .setSeed(123)
+      .setSeed(42)
       .setMaxDepth(3)
       .setMaxIter(5)
       .setFeatureSubsetStrategy("all")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 24998926abd8..9af7fff2a6e3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -664,18 +664,16 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       coefficients = coef(glmnet(features, label, weights=w, family="binomial", alpha = 0,
       lambda = 0))
       coefficients
-      $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                           s0
-      (Intercept)  2.7355261
-      data.V3     -0.5734389
-      data.V4      0.8911736
-      data.V5     -0.3878645
-      data.V6     -0.8060570
-
+      (Intercept)  2.7114519
+      data.V3     -0.5667801
+      data.V4      0.8818754
+      data.V5     -0.3882505
+      data.V6     -0.7891183
      */
-    val coefficientsR = Vectors.dense(-0.5734389, 0.8911736, -0.3878645, -0.8060570)
-    val interceptR = 2.7355261
+    val coefficientsR = Vectors.dense(-0.5667801, 0.8818754, -0.3882505, -0.7891183)
+    val interceptR = 2.7114519
 
     assert(model1.intercept ~== interceptR relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR relTol 1E-3)
@@ -707,7 +705,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val model2 = trainer2.fit(binaryDataset)
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpected1 = Vectors.dense(0.06079437, 0.0, -0.26351059, -0.59102199)
+    val coefficientsExpected1 = Vectors.dense(
+      0.05997387390575594, 0.0, -0.26536616889454984, -0.5793842425088045)
     val interceptExpected1 = 1.0
 
     assert(model1.intercept ~== interceptExpected1 relTol 1E-3)
@@ -742,8 +741,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val model4 = trainer4.fit(binaryDataset)
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpected3 = Vectors.dense(0.0, 0.0, 0.0, -0.71708632)
-    val interceptExpected3 = 0.58776113
+    val coefficientsExpected3 = Vectors.dense(0.0, 0.0, 0.0, -0.7003382019888361)
+    val interceptExpected3 = 0.5673234605102715
 
     assert(model3.intercept ~== interceptExpected3 relTol 1E-3)
     assert(model3.coefficients ~= coefficientsExpected3 relTol 1E-3)
@@ -775,8 +774,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
     // It should be same as unbound constrained optimization with LBFGS.
-    val coefficientsExpected5 = Vectors.dense(-0.5734389, 0.8911736, -0.3878645, -0.8060570)
-    val interceptExpected5 = 2.7355261
+    val coefficientsExpected5 = Vectors.dense(
+      -0.5667990118366208, 0.8819300812352234, -0.38825593561750166, -0.7891233856979563)
+    val interceptExpected5 = 2.711413425425
 
     assert(model5.intercept ~== interceptExpected5 relTol 1E-3)
     assert(model5.coefficients ~= coefficientsExpected5 relTol 1E-3)
@@ -810,13 +810,13 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       5 x 1 sparse Matrix of class "dgCMatrix"
                           s0
       (Intercept)  .
-      data.V3     -0.3448461
-      data.V4      1.2776453
-      data.V5     -0.3539178
-      data.V6     -0.7469384
+      data.V3     -0.3451301
+      data.V4      1.2721785
+      data.V5     -0.3537743
+      data.V6     -0.7315618
 
      */
-    val coefficientsR = Vectors.dense(-0.3448461, 1.2776453, -0.3539178, -0.7469384)
+    val coefficientsR = Vectors.dense(-0.3451301, 1.2721785, -0.3537743, -0.7315618)
 
     assert(model1.intercept ~== 0.0 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR relTol 1E-2)
@@ -844,7 +844,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val model2 = trainer2.fit(binaryDataset)
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpected = Vectors.dense(0.20847553, 0.0, -0.24240289, -0.55568071)
+    val coefficientsExpected = Vectors.dense(
+      0.20721074484293306, 0.0, -0.24389739190279183, -0.5446655961212726)
 
     assert(model1.intercept ~== 0.0 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsExpected relTol 1E-3)
@@ -877,15 +878,15 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-      (Intercept) -0.06775980
+      (Intercept) -0.07157076
       data.V3      .
       data.V4      .
-      data.V5     -0.03933146
-      data.V6     -0.03047580
+      data.V5     -0.04058143
+      data.V6     -0.02322760
 
      */
-    val coefficientsRStd = Vectors.dense(0.0, 0.0, -0.03933146, -0.03047580)
-    val interceptRStd = -0.06775980
+    val coefficientsRStd = Vectors.dense(0.0, 0.0, -0.04058143, -0.02322760)
+    val interceptRStd = -0.07157076
 
     assert(model1.intercept ~== interceptRStd relTol 1E-2)
     assert(model1.coefficients ~= coefficientsRStd absTol 2E-2)
@@ -904,15 +905,15 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                           s0
-      (Intercept)  0.3544768
+      (Intercept)  0.3602029
       data.V3      .
       data.V4      .
-      data.V5     -0.1626191
+      data.V5     -0.1635707
       data.V6      .
 
      */
-    val coefficientsR = Vectors.dense(0.0, 0.0, -0.1626191, 0.0)
-    val interceptR = 0.3544768
+    val coefficientsR = Vectors.dense(0.0, 0.0, -0.1635707, 0.0)
+    val interceptR = 0.3602029
 
     assert(model2.intercept ~== interceptR relTol 1E-2)
     assert(model2.coefficients ~== coefficientsR absTol 1E-3)
@@ -945,8 +946,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       (Intercept)  .
       data.V3      .
       data.V4      .
-      data.V5     -0.04967635
-      data.V6     -0.04757757
+      data.V5     -0.05164150
+      data.V6     -0.04079129
 
       coefficients
       5 x 1 sparse Matrix of class "dgCMatrix"
@@ -954,13 +955,13 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       (Intercept)  .
       data.V3      .
       data.V4      .
-      data.V5     -0.08433195
+      data.V5     -0.08408014
       data.V6      .
 
      */
-    val coefficientsRStd = Vectors.dense(0.0, 0.0, -0.04967635, -0.04757757)
+    val coefficientsRStd = Vectors.dense(0.0, 0.0, -0.05164150, -0.04079129)
 
-    val coefficientsR = Vectors.dense(0.0, 0.0, -0.08433195, 0.0)
+    val coefficientsR = Vectors.dense(0.0, 0.0, -0.08408014, 0.0)
 
     assert(model1.intercept ~== 0.0 absTol 1E-3)
     assert(model1.coefficients ~= coefficientsRStd absTol 1E-3)
@@ -992,26 +993,26 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       coefficientsStd
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-      (Intercept)  0.12707703
-      data.V3     -0.06980967
-      data.V4      0.10803933
-      data.V5     -0.04800404
-      data.V6     -0.10165096
+      (Intercept)  0.12943705
+      data.V3     -0.06979418
+      data.V4      0.10691465
+      data.V5     -0.04835674
+      data.V6     -0.09939108
 
       coefficients
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-      (Intercept)  0.46613016
-      data.V3     -0.04944529
-      data.V4      0.02326772
-      data.V5     -0.11362772
-      data.V6     -0.06312848
+      (Intercept)  0.47553535
+      data.V3     -0.05058465
+      data.V4      0.02296823
+      data.V5     -0.11368284
+      data.V6     -0.06309008
 
      */
-    val coefficientsRStd = Vectors.dense(-0.06980967, 0.10803933, -0.04800404, -0.10165096)
-    val interceptRStd = 0.12707703
-    val coefficientsR = Vectors.dense(-0.04944529, 0.02326772, -0.11362772, -0.06312848)
-    val interceptR = 0.46613016
+    val coefficientsRStd = Vectors.dense(-0.06979418, 0.10691465, -0.04835674, -0.09939108)
+    val interceptRStd = 0.12943705
+    val coefficientsR = Vectors.dense(-0.05058465, 0.02296823, -0.11368284, -0.06309008)
+    val interceptR = 0.47553535
 
     assert(model1.intercept ~== interceptRStd relTol 1E-3)
     assert(model1.coefficients ~= coefficientsRStd relTol 1E-3)
@@ -1042,10 +1043,12 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val model2 = trainer2.fit(binaryDataset)
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpectedWithStd = Vectors.dense(-0.06985003, 0.0, -0.04794278, -0.10168595)
-    val interceptExpectedWithStd = 0.45750141
-    val coefficientsExpected = Vectors.dense(-0.0494524, 0.0, -0.11360797, -0.06313577)
-    val interceptExpected = 0.53722967
+    val coefficientsExpectedWithStd = Vectors.dense(
+      -0.06974410278847253, 0.0, -0.04833486093952599, -0.09941770618793982)
+    val interceptExpectedWithStd = 0.4564981350661977
+    val coefficientsExpected = Vectors.dense(
+      -0.050579069523730306, 0.0, -0.11367447252893222, -0.06309435539607525)
+    val interceptExpected = 0.5457873335999178
 
     assert(model1.intercept ~== interceptExpectedWithStd relTol 1E-3)
     assert(model1.coefficients ~= coefficientsExpectedWithStd relTol 1E-3)
@@ -1078,23 +1081,24 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
       (Intercept)  .
-      data.V3     -0.06000152
-      data.V4      0.12598737
-      data.V5     -0.04669009
-      data.V6     -0.09941025
+      data.V3     -0.05998915
+      data.V4      0.12541885
+      data.V5     -0.04697872
+      data.V6     -0.09713973
 
       coefficients
       5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
       (Intercept)  .
-      data.V3     -0.005482255
-      data.V4      0.048106338
-      data.V5     -0.093411640
-      data.V6     -0.054149798
+      data.V3     -0.005927466
+      data.V4      0.048313659
+      data.V5     -0.092956052
+      data.V6     -0.053974895
 
      */
-    val coefficientsRStd = Vectors.dense(-0.06000152, 0.12598737, -0.04669009, -0.09941025)
-    val coefficientsR = Vectors.dense(-0.005482255, 0.048106338, -0.093411640, -0.054149798)
+    val coefficientsRStd = Vectors.dense(-0.05998915, 0.12541885, -0.04697872, -0.09713973)
+    val coefficientsR = Vectors.dense(
+      -0.0059320221190687205, 0.04834399477383437, -0.09296353778288495, -0.05398080548228108)
 
     assert(model1.intercept ~== 0.0 absTol 1E-3)
     assert(model1.coefficients ~= coefficientsRStd relTol 1E-2)
@@ -1122,8 +1126,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val model2 = trainer2.fit(binaryDataset)
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpectedWithStd = Vectors.dense(-0.00796538, 0.0, -0.0394228, -0.0873314)
-    val coefficientsExpected = Vectors.dense(0.01105972, 0.0, -0.08574949, -0.05079558)
+    val coefficientsExpectedWithStd = Vectors.dense(
+      -0.00845365508769699, 0.0, -0.03954848648474558, -0.0851639471468608)
+    val coefficientsExpected = Vectors.dense(
+      0.010675769768102661, 0.0, -0.0852582080623827, -0.050615535080106376)
 
     assert(model1.intercept ~== 0.0 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsExpectedWithStd relTol 1E-3)
@@ -1134,7 +1140,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("binary logistic regression with intercept with ElasticNet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(true).setMaxIter(120)
       .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(true).setWeightCol("weight")
-    val trainer2 = (new LogisticRegression).setFitIntercept(true).setMaxIter(30)
+    val trainer2 = (new LogisticRegression).setFitIntercept(true).setMaxIter(60)
       .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(false).setWeightCol("weight")
 
     val model1 = trainer1.fit(binaryDataset)
@@ -1155,26 +1161,26 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       coefficientsStd
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-      (Intercept)  0.49991996
-      data.V3     -0.04131110
+      (Intercept)  0.51344133
+      data.V3     -0.04395595
       data.V4      .
-      data.V5     -0.08585233
-      data.V6     -0.15875400
+      data.V5     -0.08699271
+      data.V6     -0.15249200
 
       coefficients
       5 x 1 sparse Matrix of class "dgCMatrix"
                           s0
-      (Intercept)  0.5024256
+      (Intercept)  0.50936159
       data.V3      .
       data.V4      .
-      data.V5     -0.1846038
-      data.V6     -0.0559614
+      data.V5     -0.18569346
+      data.V6     -0.05625862
 
      */
-    val coefficientsRStd = Vectors.dense(-0.04131110, 0.0, -0.08585233, -0.15875400)
-    val interceptRStd = 0.49991996
-    val coefficientsR = Vectors.dense(0.0, 0.0, -0.1846038, -0.0559614)
-    val interceptR = 0.5024256
+    val coefficientsRStd = Vectors.dense(-0.04395595, 0.0, -0.08699271, -0.15249200)
+    val interceptRStd = 0.51344133
+    val coefficientsR = Vectors.dense(0.0, 0.0, -0.18569346, -0.05625862)
+    val interceptR = 0.50936159
 
     assert(model1.intercept ~== interceptRStd relTol 6E-2)
     assert(model1.coefficients ~== coefficientsRStd absTol 5E-3)
@@ -1285,13 +1291,13 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-       (Intercept) -0.2516986
+       (Intercept) -0.2521953
        data.V3      0.0000000
        data.V4      .
        data.V5      .
        data.V6      .
      */
-    val interceptR = -0.2516986
+    val interceptR = -0.2521953
     val coefficientsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
 
     assert(model1.intercept ~== interceptR relTol 1E-5)
@@ -1373,37 +1379,36 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -2.10320093
-      data.V3  0.24337896
-      data.V4 -0.05916156
-      data.V5  0.14446790
-      data.V6  0.35976165
+              -2.22347257
+      data.V3  0.24574397
+      data.V4 -0.04054235
+      data.V5  0.14963756
+      data.V6  0.37504027
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               0.3394473
-      data.V3 -0.3443375
-      data.V4  0.9181331
-      data.V5 -0.2283959
-      data.V6 -0.4388066
+               0.3674309
+      data.V3 -0.3266910
+      data.V4  0.8939282
+      data.V5 -0.2363519
+      data.V6 -0.4631336
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               1.76375361
-      data.V3  0.10095851
-      data.V4 -0.85897154
-      data.V5  0.08392798
-      data.V6  0.07904499
-
+               1.85604170
+      data.V3  0.08094703
+      data.V4 -0.85338588
+      data.V5  0.08671439
+      data.V6  0.08809332
 
      */
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.24337896, -0.05916156, 0.14446790, 0.35976165,
-      -0.3443375, 0.9181331, -0.2283959, -0.4388066,
-      0.10095851, -0.85897154, 0.08392798, 0.07904499), isTransposed = true)
-    val interceptsR = Vectors.dense(-2.10320093, 0.3394473, 1.76375361)
+      0.24574397, -0.04054235, 0.14963756, 0.37504027,
+      -0.3266910, 0.8939282, -0.2363519, -0.4631336,
+      0.08094703, -0.85338588, 0.08671439, 0.08809332), isTransposed = true)
+    val interceptsR = Vectors.dense(-2.22347257, 0.3674309, 1.85604170)
 
     model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
     model2.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
@@ -1496,10 +1501,12 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
     val coefficientsExpected1 = new DenseMatrix(3, 4, Array(
-      2.52076464, 2.73596057, 1.87984904, 2.73264492,
-      1.93302281, 3.71363303, 1.50681746, 1.93398782,
-      2.37839917, 1.93601818, 1.81924758, 2.45191255), isTransposed = true)
-    val interceptsExpected1 = Vectors.dense(1.00010477, 3.44237083, 4.86740286)
+      2.1156620676212325, 2.7146375863138825, 1.8108730417428125, 2.711975470258063,
+      1.54314110882009, 3.648963914233324, 1.4248901324480239, 1.8737908246138315,
+      1.950852726788052, 1.9017484391817425, 1.7479497661988832, 2.425055298693075),
+      isTransposed = true)
+    val interceptsExpected1 = Vectors.dense(
+      1.0000152482448372, 3.591773288423673, 5.079685953744937)
 
     checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected1)
     assert(model1.interceptVector ~== interceptsExpected1 relTol 0.01)
@@ -1532,9 +1539,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
     val coefficientsExpected3 = new DenseMatrix(3, 4, Array(
-      1.61967097, 1.16027835, 1.45131448, 1.97390431,
-      1.30529317, 2.0, 1.12985473, 1.26652854,
-      1.61647195, 1.0, 1.40642959, 1.72985589), isTransposed = true)
+      1.641980508924569, 1.1579023489264648, 1.434651352010351, 1.9541352988127463,
+      1.3416273422126057, 2.0, 1.1014102844446283, 1.2076556940852765,
+      1.6371808928302913, 1.0, 1.3936094723717016, 1.71022540576362),
+      isTransposed = true)
     val interceptsExpected3 = Vectors.dense(1.0, 2.0, 2.0)
 
     checkCoefficientsEquivalent(model3.coefficientMatrix, coefficientsExpected3)
@@ -1566,10 +1574,12 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
     // It should be same as unbound constrained optimization with LBFGS.
     val coefficientsExpected5 = new DenseMatrix(3, 4, Array(
-      0.24337896, -0.05916156, 0.14446790, 0.35976165,
-      -0.3443375, 0.9181331, -0.2283959, -0.4388066,
-      0.10095851, -0.85897154, 0.08392798, 0.07904499), isTransposed = true)
-    val interceptsExpected5 = Vectors.dense(-2.10320093, 0.3394473, 1.76375361)
+      0.24573204902629314, -0.040610820463585905, 0.14962716893619094, 0.37502549108817784,
+      -0.3266914048842952, 0.8940567211111817, -0.23633898260880218, -0.4631024664883818,
+      0.08095935585808962, -0.8534459006476851, 0.0867118136726069, 0.0880769754002182),
+      isTransposed = true)
+    val interceptsExpected5 = Vectors.dense(
+      -2.2231282183460723, 0.3669496747012527, 1.856178543644802)
 
     checkCoefficientsEquivalent(model5.coefficientMatrix, coefficientsExpected5)
     assert(model5.interceptVector ~== interceptsExpected5 relTol 0.01)
@@ -1602,35 +1612,35 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3  0.07276291
-      data.V4 -0.36325496
-      data.V5  0.12015088
-      data.V6  0.31397340
+      data.V3  0.06892068
+      data.V4 -0.36546704
+      data.V5  0.12274583
+      data.V6  0.32616580
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3 -0.3180040
-      data.V4  0.9679074
-      data.V5 -0.2252219
-      data.V6 -0.4319914
+      data.V3 -0.2987384
+      data.V4  0.9483147
+      data.V5 -0.2328113
+      data.V6 -0.4555157
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3  0.2452411
-      data.V4 -0.6046524
-      data.V5  0.1050710
-      data.V6  0.1180180
+      data.V3  0.2298177
+      data.V4 -0.5828477
+      data.V5  0.1100655
+      data.V6  0.1293499
 
 
      */
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.07276291, -0.36325496, 0.12015088, 0.31397340,
-      -0.3180040, 0.9679074, -0.2252219, -0.4319914,
-      0.2452411, -0.6046524, 0.1050710, 0.1180180), isTransposed = true)
+      0.06892068, -0.36546704, 0.12274583, 0.32616580,
+      -0.2987384, 0.9483147, -0.2328113, -0.4555157,
+      0.2298177, -0.5828477, 0.1100655, 0.1293499), isTransposed = true)
 
     model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
     model2.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
@@ -1664,9 +1674,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
     val coefficientsExpected = new DenseMatrix(3, 4, Array(
-      1.62410051, 1.38219391, 1.34486618, 1.74641729,
-      1.23058989, 2.71787825, 1.0, 1.00007073,
-      1.79478632, 1.14360459, 1.33011603, 1.55093897), isTransposed = true)
+      1.5933935326002155, 1.4427758360562475, 1.356079506266844, 1.7818682794856215,
+      1.2224266732592248, 2.762691362720858, 1.0005885171478472, 1.0000022613855966,
+      1.7524631428961193, 1.2292565990448736, 1.3433784431904323, 1.5846063017678864),
+      isTransposed = true)
 
     checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected)
     assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
@@ -1703,27 +1714,27 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -0.62244703
+              -0.69265374
       data.V3  .
       data.V4  .
       data.V5  .
-      data.V6  0.08419825
+      data.V6  0.09064661
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-              -0.2804845
-      data.V3 -0.1336960
-      data.V4  0.3717091
-      data.V5 -0.1530363
-      data.V6 -0.2035286
+              -0.2260274
+      data.V3 -0.1144333
+      data.V4  0.3204703
+      data.V5 -0.1621061
+      data.V6 -0.2308192
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               0.9029315
+               0.9186811
       data.V3  .
-      data.V4 -0.4629737
+      data.V4 -0.4832131
       data.V5  .
       data.V6  .
 
@@ -1732,25 +1743,25 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -0.44215290
+              -0.44707756
       data.V3  .
       data.V4  .
-      data.V5  0.01767089
-      data.V6  0.02542866
+      data.V5  0.01641412
+      data.V6  0.03570376
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               0.76308326
-      data.V3 -0.06818576
+               0.75180900
+      data.V3 -0.05110822
       data.V4  .
-      data.V5 -0.20446351
-      data.V6 -0.13017924
+      data.V5 -0.21595670
+      data.V6 -0.16162836
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-              -0.3209304
+              -0.3047314
       data.V3  .
       data.V4  .
       data.V5  .
@@ -1759,15 +1770,15 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.08419825,
-      -0.1336960, 0.3717091, -0.1530363, -0.2035286,
-      0.0, -0.4629737, 0.0, 0.0), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-0.62244703, -0.2804845, 0.9029315)
+      0.0, 0.0, 0.0, 0.09064661,
+      -0.1144333, 0.3204703, -0.1621061, -0.2308192,
+      0.0, -0.4832131, 0.0, 0.0), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-0.72638218, -0.01737265, 0.74375484)
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.01767089, 0.02542866,
-      -0.06818576, 0.0, -0.20446351, -0.13017924,
+      0.0, 0.0, 0.01641412, 0.03570376,
+      -0.05110822, 0.0, -0.21595670, -0.16162836,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
-    val interceptsR = Vectors.dense(-0.44215290, 0.76308326, -0.3209304)
+    val interceptsR = Vectors.dense(-0.44707756, 0.75180900, -0.3047314)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.05)
     assert(model1.interceptVector ~== interceptsRStd relTol 0.1)
@@ -1800,31 +1811,30 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       coefficientsStd
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
-                      s0
+                   s0
               .
       data.V3 .
       data.V4 .
       data.V5 .
-      data.V6 0.01144225
+      data.V6 0.01167
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3 -0.1678787
-      data.V4  0.5385351
-      data.V5 -0.1573039
-      data.V6 -0.2471624
+      data.V3 -0.1413518
+      data.V4  0.5100469
+      data.V5 -0.1658025
+      data.V6 -0.2755998
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
-              s0
-               .
-      data.V3  .
-      data.V4  .
-      data.V5  .
-      data.V6  .
-
+                       s0
+              .
+      data.V3 0.001536337
+      data.V4 .
+      data.V5 .
+      data.V6 .
 
       coefficients
       $`0`
@@ -1841,9 +1851,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
                       s0
                .
       data.V3  .
-      data.V4  0.1929409
-      data.V5 -0.1889121
-      data.V6 -0.1010413
+      data.V4  0.2094410
+      data.V5 -0.1944582
+      data.V6 -0.1307681
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
@@ -1857,13 +1867,13 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.01144225,
-      -0.1678787, 0.5385351, -0.1573039, -0.2471624,
-      0.0, 0.0, 0.0, 0.0), isTransposed = true)
+      0.0, 0.0, 0.0, 0.01167,
+      -0.1413518, 0.5100469, -0.1658025, -0.2755998,
+      0.001536337, 0.0, 0.0, 0.0), isTransposed = true)
 
     val coefficientsR = new DenseMatrix(3, 4, Array(
       0.0, 0.0, 0.0, 0.0,
-      0.0, 0.1929409, -0.1889121, -0.1010413,
+      0.0, 0.2094410, -0.1944582, -0.1307681,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
@@ -1897,72 +1907,71 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       coefficientsStd
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
-                         s0
-              -1.5898288335
-      data.V3  0.1691226336
-      data.V4  0.0002983651
-      data.V5  0.1001732896
-      data.V6  0.2554575585
+                       s0
+              -1.68571384
+      data.V3  0.17156077
+      data.V4  0.01658014
+      data.V5  0.10303296
+      data.V6  0.26459585
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               0.2125746
-      data.V3 -0.2304586
-      data.V4  0.6153492
-      data.V5 -0.1537017
-      data.V6 -0.2975443
+               0.2364585
+      data.V3 -0.2182805
+      data.V4  0.5960025
+      data.V5 -0.1587441
+      data.V6 -0.3121284
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               1.37725427
-      data.V3  0.06133600
-      data.V4 -0.61564761
-      data.V5  0.05352840
-      data.V6  0.04208671
-
+               1.44925536
+      data.V3  0.04671972
+      data.V4 -0.61258267
+      data.V5  0.05571116
+      data.V6  0.04753251
 
       coefficients
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
-                      s0
-              -1.5681088
-      data.V3  0.1508182
-      data.V4  0.0121955
-      data.V5  0.1217930
-      data.V6  0.2162850
+                       s0
+              -1.65140201
+      data.V3  0.15446206
+      data.V4  0.02134769
+      data.V5  0.12524946
+      data.V6  0.22607972
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               1.1217130
-      data.V3 -0.2028984
-      data.V4  0.2862431
-      data.V5 -0.1843559
-      data.V6 -0.2481218
+               1.1367722
+      data.V3 -0.1931713
+      data.V4  0.2766548
+      data.V5 -0.1910455
+      data.V6 -0.2629336
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               0.44639579
-      data.V3  0.05208012
-      data.V4 -0.29843864
-      data.V5  0.06256289
-      data.V6  0.03183676
+               0.51462979
+      data.V3  0.03870921
+      data.V4 -0.29800245
+      data.V5  0.06579606
+      data.V6  0.03685390
 
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.1691226336, 0.0002983651, 0.1001732896, 0.2554575585,
-      -0.2304586, 0.6153492, -0.1537017, -0.2975443,
-      0.06133600, -0.61564761, 0.05352840, 0.04208671), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-1.5898288335, 0.2125746, 1.37725427)
+      0.17156077, 0.01658014, 0.10303296, 0.26459585,
+      -0.2182805, 0.5960025, -0.1587441, -0.3121284,
+      0.04671972, -0.61258267, 0.05571116, 0.04753251), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-1.68571384, 0.2364585, 1.44925536)
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.1508182, 0.0121955, 0.1217930, 0.2162850,
-      -0.2028984, 0.2862431, -0.1843559, -0.2481218,
-      0.05208012, -0.29843864, 0.06256289, 0.03183676), isTransposed = true)
-    val interceptsR = Vectors.dense(-1.5681088, 1.1217130, 0.44639579)
+      0.15446206, 0.02134769, 0.12524946, 0.22607972,
+      -0.1931713, 0.2766548, -0.1910455, -0.2629336,
+      0.03870921, -0.29800245, 0.06579606, 0.03685390), isTransposed = true)
+    val interceptsR = Vectors.dense(-1.65140201, 1.1367722, 0.51462979)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.001)
     assert(model1.interceptVector ~== interceptsRStd relTol 0.05)
@@ -1996,15 +2005,16 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
     // The solution is generated by https://github.com/yanboliang/bound-optimization.
     val coefficientsExpectedWithStd = new DenseMatrix(3, 4, Array(
-      1.0, 1.0, 1.0, 1.01647497,
-      1.0, 1.44105616, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.025970328910313,
+      1.0, 1.4150672323873024, 1.0, 1.0,
       1.0, 1.0, 1.0, 1.0), isTransposed = true)
-    val interceptsExpectedWithStd = Vectors.dense(2.52055893, 1.0, 2.560682)
+    val interceptsExpectedWithStd = Vectors.dense(
+      2.4259954221861473, 1.0000087410832004, 2.490461716522559)
     val coefficientsExpected = new DenseMatrix(3, 4, Array(
-      1.0, 1.0, 1.03189386, 1.0,
+      1.0, 1.0, 1.0336746541813002, 1.0,
       1.0, 1.0, 1.0, 1.0,
       1.0, 1.0, 1.0, 1.0), isTransposed = true)
-    val interceptsExpected = Vectors.dense(1.06418835, 1.0, 1.20494701)
+    val interceptsExpected = Vectors.dense(1.0521598454128, 1.0, 1.213158241431565)
 
     assert(model1.coefficientMatrix ~== coefficientsExpectedWithStd relTol 0.01)
     assert(model1.interceptVector ~== interceptsExpectedWithStd relTol 0.01)
@@ -2037,69 +2047,68 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3  0.04048126
-      data.V4 -0.23075758
-      data.V5  0.08228864
-      data.V6  0.22277648
+      data.V3  0.03804571
+      data.V4 -0.23204409
+      data.V5  0.08337512
+      data.V6  0.23029089
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3 -0.2149745
-      data.V4  0.6478666
-      data.V5 -0.1515158
-      data.V6 -0.2930498
+      data.V3 -0.2015495
+      data.V4  0.6328705
+      data.V5 -0.1562475
+      data.V6 -0.3071447
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3  0.17449321
-      data.V4 -0.41710901
-      data.V5  0.06922716
-      data.V6  0.07027332
-
+      data.V3  0.16350376
+      data.V4 -0.40082637
+      data.V5  0.07287239
+      data.V6  0.07685379
 
       coefficients
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                         s0
                .
-      data.V3 -0.003949652
-      data.V4 -0.142982415
-      data.V5  0.091439598
-      data.V6  0.179286241
+      data.V3 -0.006493452
+      data.V4 -0.143831823
+      data.V5  0.092538445
+      data.V6  0.187244839
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3 -0.09071124
-      data.V4  0.39752531
-      data.V5 -0.16233832
-      data.V6 -0.22206059
+      data.V3 -0.08068443
+      data.V4  0.39038929
+      data.V5 -0.16822390
+      data.V6 -0.23667470
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3  0.09466090
-      data.V4 -0.25454290
-      data.V5  0.07089872
-      data.V6  0.04277435
+      data.V3  0.08717788
+      data.V4 -0.24655746
+      data.V5  0.07568546
+      data.V6  0.04942986
 
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.04048126, -0.23075758, 0.08228864, 0.22277648,
-      -0.2149745, 0.6478666, -0.1515158, -0.2930498,
-      0.17449321, -0.41710901, 0.06922716, 0.07027332), isTransposed = true)
+      0.03804571, -0.23204409, 0.08337512, 0.23029089,
+      -0.2015495, 0.6328705, -0.1562475, -0.3071447,
+      0.16350376, -0.40082637, 0.07287239, 0.07685379), isTransposed = true)
 
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      -0.003949652, -0.142982415, 0.091439598, 0.179286241,
-      -0.09071124, 0.39752531, -0.16233832, -0.22206059,
-      0.09466090, -0.25454290, 0.07089872, 0.04277435), isTransposed = true)
+      -0.006493452, -0.143831823, 0.092538445, 0.187244839,
+      -0.08068443, 0.39038929, -0.16822390, -0.23667470,
+      0.08717788, -0.24655746, 0.07568546, 0.04942986), isTransposed = true)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
     assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
@@ -2150,7 +2159,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       .setMaxIter(220).setTol(1e-10)
     val trainer2 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
-      .setMaxIter(90).setTol(1e-10)
+      .setMaxIter(220).setTol(1e-10)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -2170,54 +2179,53 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -0.50133383
+              -0.55325803
       data.V3  .
       data.V4  .
       data.V5  .
-      data.V6  0.08351653
+      data.V6  0.09074857
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
-                      s0
-              -0.3151913
-      data.V3 -0.1058702
-      data.V4  0.3183251
-      data.V5 -0.1212969
-      data.V6 -0.1629778
+                       s0
+              -0.27291366
+      data.V3 -0.09093399
+      data.V4  0.28078251
+      data.V5 -0.12854559
+      data.V6 -0.18382494
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               0.8165252
+               0.8261717
       data.V3  .
-      data.V4 -0.3943069
+      data.V4 -0.4064444
       data.V5  .
       data.V6  .
 
-
       coefficients
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -0.38857157
+              -0.40016908
       data.V3  .
       data.V4  .
-      data.V5  0.02384198
-      data.V6  0.03127749
+      data.V5  0.02312769
+      data.V6  0.04159224
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               0.62492165
-      data.V3 -0.04949061
+               0.62474768
+      data.V3 -0.03776471
       data.V4  .
-      data.V5 -0.18584462
-      data.V6 -0.08952455
+      data.V5 -0.19588206
+      data.V6 -0.11187712
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-              -0.2363501
+              -0.2245786
       data.V3  .
       data.V4  .
       data.V5  .
@@ -2226,15 +2234,15 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.08351653,
-      -0.1058702, 0.3183251, -0.1212969, -0.1629778,
-      0.0, -0.3943069, 0.0, 0.0), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-0.50133383, -0.3151913, 0.8165252)
+      0.0, 0.0, 0.0, 0.09074857,
+      -0.09093399, 0.28078251, -0.12854559, -0.18382494,
+      0.0, -0.4064444, 0.0, 0.0), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-0.55325803, -0.27291366, 0.8261717)
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.02384198, 0.03127749,
-      -0.04949061, 0.0, -0.18584462, -0.08952455,
+      0.0, 0.0, 0.02312769, 0.04159224,
+      -0.03776471, 0.0, -0.19588206, -0.11187712,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
-    val interceptsR = Vectors.dense(-0.38857157, 0.62492165, -0.2363501)
+    val interceptsR = Vectors.dense(-0.40016908, 0.62474768, -0.2245786)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.05)
     assert(model1.interceptVector ~== interceptsRStd absTol 0.1)
@@ -2274,27 +2282,26 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       data.V3 .
       data.V4 .
       data.V5 .
-      data.V6 0.03238285
+      data.V6 0.03418889
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3 -0.1328284
-      data.V4  0.4219321
-      data.V5 -0.1247544
-      data.V6 -0.1893318
+      data.V3 -0.1114779
+      data.V4  0.3992145
+      data.V5 -0.1315371
+      data.V6 -0.2107956
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
               .
-      data.V3 0.004572312
+      data.V3 0.006442826
       data.V4 .
       data.V5 .
       data.V6 .
 
-
       coefficients
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
@@ -2310,9 +2317,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
                        s0
                .
       data.V3  .
-      data.V4  0.14571623
-      data.V5 -0.16456351
-      data.V6 -0.05866264
+      data.V4  0.15710979
+      data.V5 -0.16871602
+      data.V6 -0.07928527
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
@@ -2326,13 +2333,13 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.03238285,
-      -0.1328284, 0.4219321, -0.1247544, -0.1893318,
-      0.004572312, 0.0, 0.0, 0.0), isTransposed = true)
+      0.0, 0.0, 0.0, 0.03418889,
+      -0.1114779, 0.3992145, -0.1315371, -0.2107956,
+      0.006442826, 0.0, 0.0, 0.0), isTransposed = true)
 
     val coefficientsR = new DenseMatrix(3, 4, Array(
       0.0, 0.0, 0.0, 0.0,
-      0.0, 0.14571623, -0.16456351, -0.05866264,
+      0.0, 0.15710979, -0.16871602, -0.07928527,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index a5159bcb0bbc..5d439a2fe29b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -167,7 +167,7 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
 
     val model = new KMeans()
       .setK(3)
-      .setSeed(1)
+      .setSeed(42)
       .setInitMode(MLlibKMeans.RANDOM)
       .setTol(1e-6)
       .setDistanceMeasure(DistanceMeasure.COSINE)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
index 97269eea5b83..d3b8575327a8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
@@ -34,9 +34,9 @@ class PowerIterationClusteringSuite extends SparkFunSuite
 
   @transient var data: Dataset[_] = _
   final val r1 = 1.0
-  final val n1 = 10
+  final val n1 = 80
   final val r2 = 4.0
-  final val n2 = 40
+  final val n2 = 80
 
   override def beforeAll(): Unit = {
     super.beforeAll()
@@ -222,7 +222,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite
       (0, 1),
       (0, 2),
       (3, 4)
-    )).toDF("src", "dst")
+    )).toDF("src", "dst").repartition(1)
 
     var assignments2 = new PowerIterationClustering()
       .setInitMode("random")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index 70d11774d1c7..d28f1f4240ad 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -65,7 +65,7 @@ class Word2VecSuite extends MLTest with DefaultReadWriteTest {
 
     // These expectations are just magic values, characterizing the current
     // behavior.  The test needs to be updated to be more general, see SPARK-11502
-    val magicExp = Vectors.dense(0.30153007534417237, -0.6833061711354689, 0.5116530778733167)
+    val magicExp = Vectors.dense(-0.11654884266582402, 0.3115301721475341, -0.6879349987615239)
     testTransformer[(Seq[String], Vector)](docDF, model, "result", "expected") {
       case Row(vector1: Vector, vector2: Vector) =>
         assert(vector1 ~== magicExp absTol 1E-5, "Transformed vector is different with expected.")
@@ -98,9 +98,9 @@ class Word2VecSuite extends MLTest with DefaultReadWriteTest {
     // These expectations are just magic values, characterizing the current
     // behavior.  The test needs to be updated to be more general, see SPARK-11502
     val magicExpected = Seq(
-      Vectors.dense(0.3326166272163391, -0.5603077411651611, -0.2309209555387497),
-      Vectors.dense(0.32463887333869934, -0.9306551218032837, 1.393115520477295),
-      Vectors.dense(-0.27150997519493103, 0.4372006058692932, -0.13465698063373566)
+      Vectors.dense(0.12662248313426971, 0.6108677387237549, -0.006755620241165161),
+      Vectors.dense(-0.3870747685432434, 0.023309476673603058, -1.567158818244934),
+      Vectors.dense(-0.08617416769266129, -0.09897610545158386, 0.6113300323486328)
     )
 
     realVectors.zip(magicExpected).foreach {
@@ -122,7 +122,7 @@ class Word2VecSuite extends MLTest with DefaultReadWriteTest {
       .setSeed(42L)
       .fit(docDF)
 
-    val expected = Map(("b", 0.2608488929093532), ("c", -0.8271274846926078))
+    val expected = Map(("b", -0.024012837558984756), ("c", -0.19355152547359467))
     val findSynonymsResult = model.findSynonyms("a", 2).rdd.map {
       case Row(w: String, sim: Double) => (w, sim)
     }.collectAsMap()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index 46fa3767efdc..f35c8c64bea6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -184,7 +184,7 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
     val gbt = new GBTRegressor()
       .setMaxDepth(3)
       .setMaxIter(5)
-      .setSeed(123)
+      .setSeed(42)
       .setFeatureSubsetStrategy("all")
 
     // In this data, feature 1 is very important.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 600a43242751..fc1284e770c0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -232,8 +232,8 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
          print(as.vector(coef(model)))
        }
 
-       [1] 2.2960999 0.8087933
-       [1] 2.5002642 2.2000403 0.5999485
+       [1] 2.2958751 0.8088523
+       [1] 2.5009266 2.1997901 0.5999522
 
        data <- read.csv("path", header=FALSE)
        model1 <- glm(f1, family=gaussian(link=log), data=data, start=c(0,0))
@@ -241,8 +241,8 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
        print(as.vector(coef(model1)))
        print(as.vector(coef(model2)))
 
-       [1] 0.23069326 0.07993778
-       [1] 0.25001858 0.22002452 0.05998789
+       [1] 0.23063118 0.07995495
+       [1] 0.25016124 0.21995737 0.05999335
 
        data <- read.csv("path", header=FALSE)
        for (formula in c(f1, f2)) {
@@ -250,17 +250,17 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
          print(as.vector(coef(model)))
        }
 
-       [1] 2.3010179 0.8198976
-       [1] 2.4108902 2.2130248 0.6086152
+       [1] 2.3320341 0.8121904
+       [1] 2.2837064 2.2487147 0.6120262
      */
 
     val expected = Seq(
-      Vectors.dense(0.0, 2.2960999, 0.8087933),
-      Vectors.dense(2.5002642, 2.2000403, 0.5999485),
-      Vectors.dense(0.0, 0.23069326, 0.07993778),
-      Vectors.dense(0.25001858, 0.22002452, 0.05998789),
-      Vectors.dense(0.0, 2.3010179, 0.8198976),
-      Vectors.dense(2.4108902, 2.2130248, 0.6086152))
+      Vectors.dense(0.0, 2.2958751, 0.8088523),
+      Vectors.dense(2.5009266, 2.1997901, 0.5999522),
+      Vectors.dense(0.0, 0.23063118, 0.07995495),
+      Vectors.dense(0.25016124, 0.21995737, 0.05999335),
+      Vectors.dense(0.0, 2.3320341, 0.8121904),
+      Vectors.dense(2.2837064, 2.2487147, 0.6120262))
 
     import GeneralizedLinearRegression._
 
@@ -308,21 +308,21 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
          }
        }
 
-       [1] 0.0000000 2.2961005 0.8087932
-       [1] 0.0000000 2.2130368 0.8309556
-       [1] 0.0000000 1.7176137 0.9610657
-       [1] 2.5002642 2.2000403 0.5999485
-       [1] 3.1106389 2.0935142 0.5712711
-       [1] 6.7597127 1.4581054 0.3994266
+       [1] 0.0000000 2.2958757 0.8088521
+       [1] 0.0000000 2.2128149 0.8310136
+       [1] 0.0000000 1.7174260 0.9611137
+       [1] 2.5009266 2.1997901 0.5999522
+       [1] 3.1113269 2.0932659 0.5712717
+       [1] 6.7604302 1.4578902 0.3994153
      */
 
     val expected = Seq(
-      Vectors.dense(0.0, 2.2961005, 0.8087932),
-      Vectors.dense(0.0, 2.2130368, 0.8309556),
-      Vectors.dense(0.0, 1.7176137, 0.9610657),
-      Vectors.dense(2.5002642, 2.2000403, 0.5999485),
-      Vectors.dense(3.1106389, 2.0935142, 0.5712711),
-      Vectors.dense(6.7597127, 1.4581054, 0.3994266))
+      Vectors.dense(0.0, 2.2958757, 0.8088521),
+      Vectors.dense(0.0, 2.2128149, 0.8310136),
+      Vectors.dense(0.0, 1.7174260, 0.9611137),
+      Vectors.dense(2.5009266, 2.1997901, 0.5999522),
+      Vectors.dense(3.1113269, 2.0932659, 0.5712717),
+      Vectors.dense(6.7604302, 1.4578902, 0.3994153))
 
     var idx = 0
     for (fitIntercept <- Seq(false, true);
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
index b33b86b39a42..c25c89b5679a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
@@ -47,9 +47,9 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
   test("power iteration clustering") {
     // Generate two circles following the example in the PIC paper.
     val r1 = 1.0
-    val n1 = 10
+    val n1 = 80
     val r2 = 4.0
-    val n2 = 10
+    val n2 = 80
     val n = n1 + n2
     val points = genCircle(r1, n1) ++ genCircle(r2, n2)
     val similarities = for (i <- 1 until n; j <- 0 until i) yield {
@@ -81,9 +81,9 @@ class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkCon
   test("power iteration clustering on graph") {
     // Generate two circles following the example in the PIC paper.
     val r1 = 1.0
-    val n1 = 10
+    val n1 = 80
     val r2 = 4.0
-    val n2 = 10
+    val n2 = 80
     val n = n1 + n2
     val points = genCircle(r1, n1) ++ genCircle(r2, n2)
     val similarities = for (i <- 1 until n; j <- 0 until i) yield {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
index fdaa098345d1..a1ac10c06c69 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -77,6 +77,7 @@ class StreamingKMeansSuite extends SparkFunSuite with TestSuiteBase {
     val k = 2
     val d = 5
     val r = 0.1
+    val seed = 987654321
 
     // create model with two clusters
     val kMeans = new StreamingKMeans()
@@ -88,7 +89,7 @@ class StreamingKMeansSuite extends SparkFunSuite with TestSuiteBase {
         Array(5.0, 5.0))
 
     // generate random data for k-means
-    val (input, centers) = StreamingKMeansDataGenerator(numPoints, numBatches, k, d, r, 42)
+    val (input, centers) = StreamingKMeansDataGenerator(numPoints, numBatches, k, d, r, seed)
 
     // setup and run the model training
     ssc = setupStreams(input, (inputDStream: DStream[Vector]) => {
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 864e2a3e09d2..6c9cf7b6c829 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -1193,19 +1193,19 @@ class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReada
     ...         (3, 0, 0.5), (3, 1, 0.7), (3, 2, 0.9),
     ...         (4, 0, 0.5), (4, 1, 0.7), (4, 2, 0.9), (4, 3, 1.1),
     ...         (5, 0, 0.5), (5, 1, 0.7), (5, 2, 0.9), (5, 3, 1.1), (5, 4, 1.3)]
-    >>> df = spark.createDataFrame(data).toDF("src", "dst", "weight")
+    >>> df = spark.createDataFrame(data).toDF("src", "dst", "weight").repartition(1)
     >>> pic = PowerIterationClustering(k=2, maxIter=40, weightCol="weight")
     >>> assignments = pic.assignClusters(df)
     >>> assignments.sort(assignments.id).show(truncate=False)
     +---+-------+
     |id |cluster|
     +---+-------+
-    |0  |1      |
-    |1  |1      |
-    |2  |1      |
-    |3  |1      |
-    |4  |1      |
-    |5  |0      |
+    |0  |0      |
+    |1  |0      |
+    |2  |0      |
+    |3  |0      |
+    |4  |0      |
+    |5  |1      |
     +---+-------+
     ...
     >>> pic_path = temp_path + "/pic"
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 3f9de9ca207a..595ab1818488 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -3064,24 +3064,24 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
     +----+--------------------+
     |word|              vector|
     +----+--------------------+
-    |   a|[0.09461779892444...|
-    |   b|[1.15474212169647...|
-    |   c|[-0.3794820010662...|
+    |   a|[0.09511678665876...|
+    |   b|[-1.2028766870498...|
+    |   c|[0.30153277516365...|
     +----+--------------------+
     ...
     >>> model.findSynonymsArray("a", 2)
-    [(u'b', 0.25053444504737854), (u'c', -0.6980510950088501)]
+    [(u'b', 0.015859870240092278), (u'c', -0.5680795907974243)]
     >>> from pyspark.sql.functions import format_number as fmt
     >>> model.findSynonyms("a", 2).select("word", fmt("similarity", 5).alias("similarity")).show()
     +----+----------+
     |word|similarity|
     +----+----------+
-    |   b|   0.25053|
-    |   c|  -0.69805|
+    |   b|   0.01586|
+    |   c|  -0.56808|
     +----+----------+
     ...
     >>> model.transform(doc).head().model
-    DenseVector([0.5524, -0.4995, -0.3599, 0.0241, 0.3461])
+    DenseVector([-0.4833, 0.1855, -0.273, -0.0509, -0.4769])
     >>> word2vecPath = temp_path + "/word2vec"
     >>> word2Vec.save(word2vecPath)
     >>> loadedWord2Vec = Word2Vec.load(word2vecPath)
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 520d7912c1a1..bf2716485df9 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -79,27 +79,27 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"])
     >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0])
     >>> predictions[0]
-    Row(user=0, item=2, prediction=-0.13807615637779236)
+    Row(user=0, item=2, prediction=0.6929101347923279)
     >>> predictions[1]
-    Row(user=1, item=0, prediction=2.6258413791656494)
+    Row(user=1, item=0, prediction=3.47356915473938)
     >>> predictions[2]
-    Row(user=2, item=0, prediction=-1.5018409490585327)
+    Row(user=2, item=0, prediction=-0.8991986513137817)
     >>> user_recs = model.recommendForAllUsers(3)
     >>> user_recs.where(user_recs.user == 0)\
         .select("recommendations.item", "recommendations.rating").collect()
-    [Row(item=[0, 1, 2], rating=[3.910..., 1.992..., -0.138...])]
+    [Row(item=[0, 1, 2], rating=[3.910..., 1.997..., 0.692...])]
     >>> item_recs = model.recommendForAllItems(3)
     >>> item_recs.where(item_recs.item == 2)\
         .select("recommendations.user", "recommendations.rating").collect()
-    [Row(user=[2, 1, 0], rating=[4.901..., 3.981..., -0.138...])]
+    [Row(user=[2, 1, 0], rating=[4.892..., 3.991..., 0.692...])]
     >>> user_subset = df.where(df.user == 2)
     >>> user_subset_recs = model.recommendForUserSubset(user_subset, 3)
     >>> user_subset_recs.select("recommendations.item", "recommendations.rating").first()
-    Row(item=[2, 1, 0], rating=[4.901..., 1.056..., -1.501...])
+    Row(item=[2, 1, 0], rating=[4.892..., 1.076..., -0.899...])
     >>> item_subset = df.where(df.item == 0)
     >>> item_subset_recs = model.recommendForItemSubset(item_subset, 3)
     >>> item_subset_recs.select("recommendations.user", "recommendations.rating").first()
-    Row(user=[0, 1, 2], rating=[3.910..., 2.625..., -1.501...])
+    Row(user=[0, 1, 2], rating=[3.910..., 3.473..., -0.899...])
     >>> als_path = temp_path + "/als"
     >>> als.save(als_path)
     >>> als2 = ALS.load(als_path)
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index 6082082c1809..034eaed6868e 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -83,7 +83,7 @@ def test_raw_and_probability_prediction(self):
         result = model.transform(test).head()
         expected_prediction = 2.0
         expected_probability = [0.0, 0.0, 1.0]
-        expected_rawPrediction = [57.3955, -124.5462, 67.9943]
+        expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045]
         self.assertTrue(result.prediction, expected_prediction)
         self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4))
         self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1E-4))
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 1f4abf515733..be7b8da98131 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -504,15 +504,15 @@ class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, HasCollec
     ...      (Vectors.dense([0.5]), 0.0),
     ...      (Vectors.dense([0.6]), 1.0),
     ...      (Vectors.dense([1.0]), 1.0)] * 10,
-    ...     ["features", "label"])
+    ...     ["features", "label"]).repartition(1)
     >>> lr = LogisticRegression()
     >>> grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
     >>> evaluator = BinaryClassificationEvaluator()
     >>> tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
-    ...     parallelism=2)
+    ...     parallelism=1, seed=42)
     >>> tvsModel = tvs.fit(dataset)
     >>> evaluator.evaluate(tvsModel.transform(dataset))
-    0.8333...
+    0.833...
 
     .. versionadded:: 2.0.0
     """
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 3d4eae85132b..3dd7cb200c28 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -100,16 +100,16 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     >>> users_for_products[0]
     (1, (Rating(user=2, product=1, rating=...),))
 
-    >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10)
+    >>> model = ALS.train(ratings, 1, nonnegative=True, seed=123456789)
     >>> model.predict(2, 2)
     3.73...
 
     >>> df = sqlContext.createDataFrame([Rating(1, 1, 1.0), Rating(1, 2, 2.0), Rating(2, 1, 2.0)])
-    >>> model = ALS.train(df, 1, nonnegative=True, seed=10)
+    >>> model = ALS.train(df, 1, nonnegative=True, seed=123456789)
     >>> model.predict(2, 2)
     3.73...
 
-    >>> model = ALS.trainImplicit(ratings, 1, nonnegative=True, seed=10)
+    >>> model = ALS.trainImplicit(ratings, 1, nonnegative=True, seed=123456789)
     >>> model.predict(2, 2)
     0.4...
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8227e829597e..58d74f5d7d9b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -795,9 +795,9 @@ def sample(self, withReplacement=None, fraction=None, seed=None):
 
         >>> df = spark.range(10)
         >>> df.sample(0.5, 3).count()
-        4
+        7
         >>> df.sample(fraction=0.5, seed=3).count()
-        4
+        7
         >>> df.sample(withReplacement=True, fraction=0.5, seed=3).count()
         1
         >>> df.sample(1.0).count()
@@ -865,8 +865,8 @@ def sampleBy(self, col, fractions, seed=None):
         +---+-----+
         |key|count|
         +---+-----+
-        |  0|    5|
-        |  1|    9|
+        |  0|    3|
+        |  1|    6|
         +---+-----+
         >>> dataset.sampleBy(col("key"), fractions={2: 1.0}, seed=0).count()
         33
@@ -898,10 +898,10 @@ def randomSplit(self, weights, seed=None):
 
         >>> splits = df4.randomSplit([1.0, 2.0], 24)
         >>> splits[0].count()
-        1
+        2
 
         >>> splits[1].count()
-        3
+        2
         """
         for w in weights:
             if w < 0.0:
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index bc28c9d453ec..6ae23576e7bc 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -584,8 +584,8 @@ def rand(seed=None):
     .. note:: The function is non-deterministic in general case.
 
     >>> df.withColumn('rand', rand(seed=42) * 3).collect()
-    [Row(age=2, name=u'Alice', rand=1.1568609015300986),
-     Row(age=5, name=u'Bob', rand=1.403379671529166)]
+    [Row(age=2, name=u'Alice', rand=2.4052597283576684),
+     Row(age=5, name=u'Bob', rand=2.3913904055683974)]
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
@@ -604,8 +604,8 @@ def randn(seed=None):
     .. note:: The function is non-deterministic in general case.
 
     >>> df.withColumn('randn', randn(seed=42)).collect()
-    [Row(age=2, name=u'Alice', randn=-0.7556247885860078),
-    Row(age=5, name=u'Bob', randn=-0.0861619008451133)]
+    [Row(age=2, name=u'Alice', randn=1.1027054481455365),
+    Row(age=5, name=u'Bob', randn=0.7400395449950132)]
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index b77757342843..273749ed1112 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -83,9 +83,9 @@ def test_corr(self):
         self.assertTrue(abs(corr - 0.95734012) < 1e-6)
 
     def test_sampleby(self):
-        df = self.sc.parallelize([Row(a=i, b=(i % 3)) for i in range(10)]).toDF()
+        df = self.sc.parallelize([Row(a=i, b=(i % 3)) for i in range(100)]).toDF()
         sampled = df.stat.sampleBy(u"b", fractions={0: 0.5, 1: 0.5}, seed=0)
-        self.assertTrue(sampled.count() == 3)
+        self.assertTrue(sampled.count() == 35)
 
     def test_cov(self):
         df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
index 752c9d5449ee..469c24b3b5f4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -17,25 +17,21 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.scalatest.Matchers._
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types.{IntegerType, LongType}
 
 class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("random") {
-    checkDoubleEvaluation(Rand(30), 0.31429268272540556 +- 0.001)
-    checkDoubleEvaluation(Randn(30), -0.4798519469521663 +- 0.001)
+    checkEvaluation(Rand(30), 0.2762195585886885)
+    checkEvaluation(Randn(30), -1.0451987154313813)
 
-    checkDoubleEvaluation(
-      new Rand(Literal.create(null, LongType)), 0.8446490682263027 +- 0.001)
-    checkDoubleEvaluation(
-      new Randn(Literal.create(null, IntegerType)), 1.1164209726833079 +- 0.001)
+    checkEvaluation(new Rand(Literal.create(null, LongType)), 0.7604953758285915)
+    checkEvaluation(new Randn(Literal.create(null, IntegerType)), 1.6034991609278433)
   }
 
   test("SPARK-9127 codegen with long seed") {
-    checkDoubleEvaluation(Rand(5419823303878592871L), 0.2304755080444375 +- 0.001)
-    checkDoubleEvaluation(Randn(5419823303878592871L), -1.2824262718225607 +- 0.001)
+    checkEvaluation(Rand(5419823303878592871L), 0.7145363364564755)
+    checkEvaluation(Randn(5419823303878592871L), 0.7816815274533012)
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index cf5add6a71af..09e2c632f638 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -141,12 +141,12 @@ from
 -- !query 13 schema
 struct<a:int,rand(0):double,sum(b):bigint>
 -- !query 13 output
-1	0.4048454303385226	2
-1	0.8446490682263027	1
-2	0.5871875724155838	1
-2	0.8865128837019473	2
-3	0.742083829230211	1
-3	0.9179913208300406	2
+1	0.5234194256885571	2
+1	0.7604953758285915	1
+2	0.0953472826424725	1
+2	0.3163249920547614	2
+3	0.2710259815484829	2
+3	0.7141011170991605	1
 
 
 -- !query 14
diff --git a/sql/core/src/test/resources/sql-tests/results/random.sql.out b/sql/core/src/test/resources/sql-tests/results/random.sql.out
index bca67320fe7b..acd0609aabb1 100644
--- a/sql/core/src/test/resources/sql-tests/results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/random.sql.out
@@ -7,7 +7,7 @@ SELECT rand(0)
 -- !query 0 schema
 struct<rand(0):double>
 -- !query 0 output
-0.8446490682263027
+0.7604953758285915
 
 
 -- !query 1
@@ -15,7 +15,7 @@ SELECT rand(cast(3 / 7 AS int))
 -- !query 1 schema
 struct<rand(CAST((CAST(3 AS DOUBLE) / CAST(7 AS DOUBLE)) AS INT)):double>
 -- !query 1 output
-0.8446490682263027
+0.7604953758285915
 
 
 -- !query 2
@@ -23,7 +23,7 @@ SELECT rand(NULL)
 -- !query 2 schema
 struct<rand(CAST(NULL AS INT)):double>
 -- !query 2 output
-0.8446490682263027
+0.7604953758285915
 
 
 -- !query 3
@@ -31,7 +31,7 @@ SELECT rand(cast(NULL AS int))
 -- !query 3 schema
 struct<rand(CAST(NULL AS INT)):double>
 -- !query 3 output
-0.8446490682263027
+0.7604953758285915
 
 
 -- !query 4
@@ -48,7 +48,7 @@ SELECT randn(0L)
 -- !query 5 schema
 struct<randn(0):double>
 -- !query 5 output
-1.1164209726833079
+1.6034991609278433
 
 
 -- !query 6
@@ -56,7 +56,7 @@ SELECT randn(cast(3 / 7 AS long))
 -- !query 6 schema
 struct<randn(CAST((CAST(3 AS DOUBLE) / CAST(7 AS DOUBLE)) AS BIGINT)):double>
 -- !query 6 output
-1.1164209726833079
+1.6034991609278433
 
 
 -- !query 7
@@ -64,7 +64,7 @@ SELECT randn(NULL)
 -- !query 7 schema
 struct<randn(CAST(NULL AS INT)):double>
 -- !query 7 output
-1.1164209726833079
+1.6034991609278433
 
 
 -- !query 8
@@ -72,7 +72,7 @@ SELECT randn(cast(NULL AS long))
 -- !query 8 schema
 struct<randn(CAST(NULL AS BIGINT)):double>
 -- !query 8 output
-1.1164209726833079
+1.6034991609278433
 
 
 -- !query 9
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 589873b9c3ea..2a74bfe4d378 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -47,7 +47,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
     val data = sparkContext.parallelize(1 to n, 2).toDF("id")
     checkAnswer(
       data.sample(withReplacement = false, 0.05, seed = 13),
-      Seq(3, 17, 27, 58, 62).map(Row(_))
+      Seq(37, 8, 90).map(Row(_))
     )
   }
 
@@ -371,7 +371,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
     val sampled = df.stat.sampleBy("key", Map(0 -> 0.1, 1 -> 0.2), 0L)
     checkAnswer(
       sampled.groupBy("key").count().orderBy("key"),
-      Seq(Row(0, 6), Row(1, 11)))
+      Seq(Row(0, 1), Row(1, 6)))
   }
 
   test("sampleBy one column") {
@@ -379,7 +379,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
     val sampled = df.stat.sampleBy($"key", Map(0 -> 0.1, 1 -> 0.2), 0L)
     checkAnswer(
       sampled.groupBy("key").count().orderBy("key"),
-      Seq(Row(0, 6), Row(1, 11)))
+      Seq(Row(0, 1), Row(1, 6)))
   }
 
   test("sampleBy multiple columns") {
@@ -389,7 +389,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
       struct($"name", $"key"), Map(Row("Foo", 0) -> 0.1, Row("Foo", 1) -> 0.2), 0L)
     checkAnswer(
       sampled.groupBy("key").count().orderBy("key"),
-      Seq(Row(0, 6), Row(1, 11)))
+      Seq(Row(0, 1), Row(1, 6)))
   }
 
   // This test case only verifies that `DataFrame.countMinSketch()` methods do return
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 050699da7f7c..6e35b525ab5b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -618,7 +618,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val data = sparkContext.parallelize(1 to n, 2).toDS()
     checkDataset(
       data.sample(withReplacement = false, 0.05, seed = 13),
-      3, 17, 27, 58, 62)
+      8, 37, 90)
   }
 
   test("sample fraction should not be negative with replacement") {
@@ -650,9 +650,10 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
   }
 
   test("SPARK-16686: Dataset.sample with seed results shouldn't depend on downstream usage") {
+    val a = 7
     val simpleUdf = udf((n: Int) => {
-      require(n != 1, "simpleUdf shouldn't see id=1!")
-      1
+      require(n != a, s"simpleUdf shouldn't see id=$a!")
+      a
     })
 
     val df = Seq(
@@ -668,10 +669,10 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       (9, "string9")
     ).toDF("id", "stringData")
     val sampleDF = df.sample(false, 0.7, 50)
-    // After sampling, sampleDF doesn't contain id=1.
-    assert(!sampleDF.select("id").as[Int].collect.contains(1))
-    // simpleUdf should not encounter id=1.
-    checkAnswer(sampleDF.select(simpleUdf($"id")), List.fill(sampleDF.count.toInt)(Row(1)))
+    // After sampling, sampleDF doesn't contain id=a.
+    assert(!sampleDF.select("id").as[Int].collect.contains(a))
+    // simpleUdf should not encounter id=a.
+    checkAnswer(sampleDF.select(simpleUdf($"id")), List.fill(sampleDF.count.toInt)(Row(a)))
   }
 
   test("SPARK-11436: we should rebind right encoder when join 2 datasets") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala
index 3e20cc47dca2..79993313a86e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala
@@ -24,8 +24,7 @@ private[csv] trait TestCsvData {
 
   def sampledTestData: Dataset[String] = {
     spark.range(0, 100, 1).map { index =>
-      val predefinedSample = Set[Long](2, 8, 15, 27, 30, 34, 35, 37, 44, 46,
-        57, 62, 68, 72)
+      val predefinedSample = Set[Long](3, 18, 20, 24, 50, 60, 87, 99)
       if (predefinedSample.contains(index)) {
         index.toString
       } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
index 6e9559edf8ec..17503330bfd5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
@@ -236,8 +236,7 @@ private[json] trait TestJsonData {
 
   def sampledTestData: Dataset[String] = {
     spark.range(0, 100, 1).map { index =>
-      val predefinedSample = Set[Long](2, 8, 15, 27, 30, 34, 35, 37, 44, 46,
-        57, 62, 68, 72)
+      val predefinedSample = Set[Long](3, 18, 20, 24, 50, 60, 87, 99)
       if (predefinedSample.contains(index)) {
         s"""{"f1":${index.toString}}"""
       } else {

From 240c6a4d75a408f1e2d650c383293644857b3575 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Sat, 23 Mar 2019 15:24:54 -0700
Subject: [PATCH 0753/1072] [SPARK-27262][R] Add explicit UTF-8 Encoding to
 DESCRIPTION

## What changes were proposed in this pull request?

I got this warning when following the recommended approach to generating documentation:

```
Warning message:
roxygen2 requires Encoding: UTF-8
```

As can be seen in [other](https://github.com/tidyverse/tidyverse/blob/master/DESCRIPTION) [`tidyverse`](https://github.com/tidyverse/dplyr/blob/master/DESCRIPTION) [`DESCRIPTION`s](https://github.com/tidyverse/readr/blob/master/DESCRIPTION), this is standard practice

This PR adds `Encoding: UTF-8` to `R/pkg/DESCRIPTION`

## How was this patch tested?

Pass the Jenkins without warning.

Closes #23823 from MichaelChirico/patch-1.

Authored-by: Michael Chirico <michaelchirico4@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 R/pkg/DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 4d48cd7c659d..db7d05e0c4a4 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -60,4 +60,5 @@ Collate:
     'window.R'
 RoxygenNote: 5.0.1
 VignetteBuilder: knitr
+Encoding: UTF-8
 NeedsCompilation: no

From 624288556de1e6b4393bcf30eee75dbbd4765e5e Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Sat, 23 Mar 2019 15:43:46 -0700
Subject: [PATCH 0754/1072] [SPARK-27085][SQL] Migrate CSV to File Data Source
 V2

## What changes were proposed in this pull request?

Migrate CSV to File Data Source V2.

## How was this patch tested?

Unit test

Closes #24005 from gengliangwang/CSVDataSourceV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  2 +-
 ...pache.spark.sql.sources.DataSourceRegister |  2 +-
 .../spark/sql/execution/command/tables.scala  |  4 +-
 .../datasources/csv/CSVFileFormat.scala       | 30 +------
 .../datasources/csv/CsvOutputWriter.scala     | 57 +++++++++++++
 .../datasources/v2/FileDataSourceV2.scala     |  4 +
 .../v2/PartitionReaderFromIterator.scala      | 37 ++++++++
 .../datasources/v2/TextBasedFileScan.scala    | 45 ++++++++++
 .../datasources/v2/csv/CSVDataSourceV2.scala  | 53 ++++++++++++
 .../v2/csv/CSVPartitionReaderFactory.scala    | 72 ++++++++++++++++
 .../datasources/v2/csv/CSVScan.scala          | 84 +++++++++++++++++++
 .../datasources/v2/csv/CSVScanBuilder.scala   | 37 ++++++++
 .../datasources/v2/csv/CSVTable.scala         | 52 ++++++++++++
 .../datasources/v2/csv/CSVWriteBuilder.scala  | 65 ++++++++++++++
 .../datasources/v2/orc/OrcDataSourceV2.scala  |  4 -
 .../spark/sql/FileBasedDataSourceSuite.scala  | 58 +++++++------
 .../execution/datasources/csv/CSVSuite.scala  | 10 +--
 .../sql/test/DataFrameReaderWriterSuite.scala |  2 +-
 18 files changed, 544 insertions(+), 74 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CsvOutputWriter.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 20f4080c9859..a4ca1a0a72ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1482,7 +1482,7 @@ object SQLConf {
       " register class names for which data source V2 write paths are disabled. Writes from these" +
       " sources will fall back to the V1 sources.")
     .stringConf
-    .createWithDefault("orc")
+    .createWithDefault("csv,orc")
 
   val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 7cdfddc5e7aa..b68618755258 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,4 +1,4 @@
-org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
+org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2
 org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
 org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 org.apache.spark.sql.execution.datasources.noop.NoopDataSource
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 8b70e336c14b..08d6dc62e354 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -37,9 +37,9 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}
 import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
-import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -238,7 +238,7 @@ case class AlterTableAddColumnsCommand(
         // TextFileFormat only default to one column "value"
         // Hive type is already considered as hive serde table, so the logic will not
         // come in here.
-        case _: JsonFileFormat | _: CSVFileFormat | _: ParquetFileFormat | _: OrcDataSourceV2 =>
+        case _: JsonFileFormat | _: CSVDataSourceV2 | _: ParquetFileFormat | _: OrcDataSourceV2 =>
         case s if s.getClass.getCanonicalName.endsWith("OrcFileFormat") =>
         case s =>
           throw new AnalysisException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index d08a54cc9b1f..c8de53a17aca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -118,7 +118,7 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
       throw new AnalysisException(
         "Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the\n" +
           "referenced columns only include the internal corrupt record column\n" +
-          s"(named _corrupt_record by default). For example:\n" +
+          "(named _corrupt_record by default). For example:\n" +
           "spark.read.schema(schema).csv(file).filter($\"_corrupt_record\".isNotNull).count()\n" +
           "and spark.read.schema(schema).csv(file).select(\"_corrupt_record\").show().\n" +
           "Instead, you can cache or save the parsed results and then send the same query.\n" +
@@ -163,31 +163,3 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
 
 }
 
-private[csv] class CsvOutputWriter(
-    path: String,
-    dataSchema: StructType,
-    context: TaskAttemptContext,
-    params: CSVOptions) extends OutputWriter with Logging {
-
-  private var univocityGenerator: Option[UnivocityGenerator] = None
-
-  if (params.headerFlag) {
-    val gen = getGen()
-    gen.writeHeaders()
-  }
-
-  private def getGen(): UnivocityGenerator = univocityGenerator.getOrElse {
-    val charset = Charset.forName(params.charset)
-    val os = CodecStreams.createOutputStreamWriter(context, new Path(path), charset)
-    val newGen = new UnivocityGenerator(dataSchema, os, params)
-    univocityGenerator = Some(newGen)
-    newGen
-  }
-
-  override def write(row: InternalRow): Unit = {
-    val gen = getGen()
-    gen.write(row)
-  }
-
-  override def close(): Unit = univocityGenerator.foreach(_.close())
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CsvOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CsvOutputWriter.scala
new file mode 100644
index 000000000000..3ff36bfde3cc
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CsvOutputWriter.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.csv
+
+import java.nio.charset.Charset
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.TaskAttemptContext
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.csv.{CSVOptions, UnivocityGenerator}
+import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter}
+import org.apache.spark.sql.types.StructType
+
+class CsvOutputWriter(
+    path: String,
+    dataSchema: StructType,
+    context: TaskAttemptContext,
+    params: CSVOptions) extends OutputWriter with Logging {
+
+  private var univocityGenerator: Option[UnivocityGenerator] = None
+
+  if (params.headerFlag) {
+    val gen = getGen()
+    gen.writeHeaders()
+  }
+
+  private def getGen(): UnivocityGenerator = univocityGenerator.getOrElse {
+    val charset = Charset.forName(params.charset)
+    val os = CodecStreams.createOutputStreamWriter(context, new Path(path), charset)
+    val newGen = new UnivocityGenerator(dataSchema, os, params)
+    univocityGenerator = Some(newGen)
+    newGen
+  }
+
+  override def write(row: InternalRow): Unit = {
+    val gen = getGen()
+    gen.write(row)
+  }
+
+  override def close(): Unit = univocityGenerator.foreach(_.close())
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index e9c7a1bb749d..ebe7fee312e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -47,4 +47,8 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
       Option(map.get("path")).toSeq
     }
   }
+
+  protected def getTableName(paths: Seq[String]): String = {
+    shortName() + ":" + paths.mkString(";")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
new file mode 100644
index 000000000000..f9dfcf448a3e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.sources.v2.reader.PartitionReader
+
+class PartitionReaderFromIterator[InternalRow](
+    iter: Iterator[InternalRow]) extends PartitionReader[InternalRow] {
+  private var currentValue: InternalRow = _
+
+  override def next(): Boolean = {
+    if (iter.hasNext) {
+      currentValue = iter.next()
+      true
+    } else {
+      false
+    }
+  }
+
+  override def get(): InternalRow = currentValue
+
+  override def close(): Unit = {}
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
new file mode 100644
index 000000000000..8d9cc68417ef
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.compress.{CompressionCodecFactory, SplittableCompressionCodec}
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+abstract class TextBasedFileScan(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    readSchema: StructType,
+    options: CaseInsensitiveStringMap)
+  extends FileScan(sparkSession, fileIndex, readSchema, options) {
+  private var codecFactory: CompressionCodecFactory = _
+
+  override def isSplitable(path: Path): Boolean = {
+    if (codecFactory == null) {
+      codecFactory = new CompressionCodecFactory(
+        sparkSession.sessionState.newHadoopConfWithOptions(options.asScala.toMap))
+    }
+    val codec = codecFactory.getCodec(path)
+    codec == null || codec.isInstanceOf[SplittableCompressionCodec]
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
new file mode 100644
index 000000000000..4ecd9cdc32ac
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.csv
+
+import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
+import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class CSVDataSourceV2 extends FileDataSourceV2 {
+
+  override def fallBackFileFormat: Class[_ <: FileFormat] = classOf[CSVFileFormat]
+
+  override def shortName(): String = "csv"
+
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
+    val paths = getPaths(options)
+    val tableName = getTableName(paths)
+    CSVTable(tableName, sparkSession, options, paths, None)
+  }
+
+  override def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
+    val paths = getPaths(options)
+    val tableName = getTableName(paths)
+    CSVTable(tableName, sparkSession, options, paths, Some(schema))
+  }
+}
+
+object CSVDataSourceV2 {
+  def supportsDataType(dataType: DataType): Boolean = dataType match {
+    case _: AtomicType => true
+
+    case udt: UserDefinedType[_] => supportsDataType(udt.sqlType)
+
+    case _ => false
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
new file mode 100644
index 000000000000..e2d50282e9cb
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.csv
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.SerializableConfiguration
+
+/**
+ * A factory used to create CSV readers.
+ *
+ * @param sqlConf SQL configuration.
+ * @param broadcastedConf Broadcasted serializable Hadoop Configuration.
+ * @param dataSchema Schema of CSV files.
+ * @param partitionSchema Schema of partitions.
+ * @param readSchema Required schema in the batch scan.
+ * @param parsedOptions Options for parsing CSV files.
+ */
+case class CSVPartitionReaderFactory(
+    sqlConf: SQLConf,
+    broadcastedConf: Broadcast[SerializableConfiguration],
+    dataSchema: StructType,
+    partitionSchema: StructType,
+    readSchema: StructType,
+    parsedOptions: CSVOptions) extends FilePartitionReaderFactory {
+  private val columnPruning = sqlConf.csvColumnPruning
+  private val readDataSchema =
+    getReadDataSchema(readSchema, partitionSchema, sqlConf.caseSensitiveAnalysis)
+
+  override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
+    val conf = broadcastedConf.value.value
+
+    val parser = new UnivocityParser(
+      StructType(dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)),
+      StructType(readDataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)),
+      parsedOptions)
+    val schema = if (columnPruning) readDataSchema else dataSchema
+    val isStartOfFile = file.start == 0
+    val headerChecker = new CSVHeaderChecker(
+      schema, parsedOptions, source = s"CSV file: ${file.filePath}", isStartOfFile)
+    val iter = CSVDataSource(parsedOptions).readFile(
+      conf,
+      file,
+      parser,
+      headerChecker,
+      readDataSchema)
+    val fileReader = new PartitionReaderFromIterator[InternalRow](iter)
+    new PartitionReaderWithPartitionValues(fileReader, readDataSchema,
+      partitionSchema, file.partitionValues)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
new file mode 100644
index 000000000000..35c6a668f22a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.csv
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.csv.CSVOptions
+import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
+import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
+import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
+import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.SerializableConfiguration
+
+case class CSVScan(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    dataSchema: StructType,
+    readSchema: StructType,
+    options: CaseInsensitiveStringMap)
+  extends TextBasedFileScan(sparkSession, fileIndex, readSchema, options) {
+
+  private lazy val parsedOptions: CSVOptions = new CSVOptions(
+    options.asScala.toMap,
+    columnPruning = sparkSession.sessionState.conf.csvColumnPruning,
+    sparkSession.sessionState.conf.sessionLocalTimeZone,
+    sparkSession.sessionState.conf.columnNameOfCorruptRecord)
+
+  override def isSplitable(path: Path): Boolean = {
+    CSVDataSource(parsedOptions).isSplitable && super.isSplitable(path)
+  }
+
+  override def createReaderFactory(): PartitionReaderFactory = {
+    // Check a field requirement for corrupt records here to throw an exception in a driver side
+    ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, parsedOptions.columnNameOfCorruptRecord)
+
+    if (readSchema.length == 1 &&
+      readSchema.head.name == parsedOptions.columnNameOfCorruptRecord) {
+      throw new AnalysisException(
+        "Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the\n" +
+          "referenced columns only include the internal corrupt record column\n" +
+          "(named _corrupt_record by default). For example:\n" +
+          "spark.read.schema(schema).csv(file).filter($\"_corrupt_record\".isNotNull).count()\n" +
+          "and spark.read.schema(schema).csv(file).select(\"_corrupt_record\").show().\n" +
+          "Instead, you can cache or save the parsed results and then send the same query.\n" +
+          "For example, val df = spark.read.schema(schema).csv(file).cache() and then\n" +
+          "df.filter($\"_corrupt_record\".isNotNull).count()."
+      )
+    }
+
+    val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+    // Hadoop Configurations are case sensitive.
+    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
+    val broadcastedConf = sparkSession.sparkContext.broadcast(
+      new SerializableConfiguration(hadoopConf))
+    CSVPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
+      dataSchema, fileIndex.partitionSchema, readSchema, parsedOptions)
+  }
+
+  override def supportsDataType(dataType: DataType): Boolean = {
+    CSVDataSourceV2.supportsDataType(dataType)
+  }
+
+  override def formatName: String = "CSV"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
new file mode 100644
index 000000000000..dbb3c03ca981
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2.csv
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
+import org.apache.spark.sql.sources.v2.reader.Scan
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+case class CSVScanBuilder(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    schema: StructType,
+    dataSchema: StructType,
+    options: CaseInsensitiveStringMap) extends FileScanBuilder(schema) {
+
+  override def build(): Scan = {
+    CSVScan(sparkSession, fileIndex, dataSchema, readSchema, options)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
new file mode 100644
index 000000000000..bf4b8ba868f2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.csv
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.FileStatus
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.csv.CSVOptions
+import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
+import org.apache.spark.sql.execution.datasources.v2.FileTable
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+case class CSVTable(
+    name: String,
+    sparkSession: SparkSession,
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    userSpecifiedSchema: Option[StructType])
+  extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
+  override def newScanBuilder(options: CaseInsensitiveStringMap): CSVScanBuilder =
+    CSVScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+
+  override def inferSchema(files: Seq[FileStatus]): Option[StructType] = {
+    val parsedOptions = new CSVOptions(
+      options.asScala.toMap,
+      columnPruning = sparkSession.sessionState.conf.csvColumnPruning,
+      sparkSession.sessionState.conf.sessionLocalTimeZone)
+
+    CSVDataSource(parsedOptions).inferSchema(sparkSession, files, parsedOptions)
+  }
+
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
+    new CSVWriteBuilder(options, paths)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
new file mode 100644
index 000000000000..bb26d2f92d74
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.csv
+
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+
+import org.apache.spark.sql.catalyst.csv.CSVOptions
+import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
+import org.apache.spark.sql.execution.datasources.csv.CsvOutputWriter
+import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class CSVWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
+  extends FileWriteBuilder(options, paths) {
+  override def prepareWrite(
+      sqlConf: SQLConf,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory = {
+    val conf = job.getConfiguration
+    val csvOptions = new CSVOptions(
+      options,
+      columnPruning = sqlConf.csvColumnPruning,
+      sqlConf.sessionLocalTimeZone)
+    csvOptions.compressionCodec.foreach { codec =>
+      CompressionCodecs.setCodecConfiguration(conf, codec)
+    }
+
+    new OutputWriterFactory {
+      override def newInstance(
+          path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): OutputWriter = {
+        new CsvOutputWriter(path, dataSchema, context, csvOptions)
+      }
+
+      override def getFileExtension(context: TaskAttemptContext): String = {
+        ".csv" + CodecStreams.getCompressionExtension(context)
+      }
+    }
+  }
+
+  override def supportsDataType(dataType: DataType): Boolean = {
+    CSVDataSourceV2.supportsDataType(dataType)
+  }
+
+  override def formatName: String = "CSV"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
index 900c94e937ff..36e7e12e41ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -29,10 +29,6 @@ class OrcDataSourceV2 extends FileDataSourceV2 {
 
   override def shortName(): String = "orc"
 
-  private def getTableName(paths: Seq[String]): String = {
-    shortName() + ":" + paths.mkString(";")
-  }
-
   override def getTable(options: CaseInsensitiveStringMap): Table = {
     val paths = getPaths(options)
     val tableName = getTableName(paths)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 58522f7b1376..1d30cbfbaf1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -329,27 +329,27 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
   test("SPARK-24204 error handling for unsupported Interval data types - csv, json, parquet, orc") {
     withTempDir { dir =>
       val tempDir = new File(dir, "files").getCanonicalPath
+      // TODO: test file source V2 after write path is fixed.
       Seq(true).foreach { useV1 =>
         val useV1List = if (useV1) {
-          "orc"
+          "csv,orc"
         } else {
           ""
         }
-        def errorMessage(format: String, isWrite: Boolean): String = {
-          if (isWrite && (useV1 || format != "orc")) {
-            "cannot save interval data type into external storage."
-          } else {
-            s"$format data source does not support calendarinterval data type."
-          }
+        def validateErrorMessage(msg: String): Unit = {
+          val msg1 = "cannot save interval data type into external storage."
+          val msg2 = "data source does not support calendarinterval data type."
+          assert(msg.toLowerCase(Locale.ROOT).contains(msg1) ||
+            msg.toLowerCase(Locale.ROOT).contains(msg2))
         }
 
         withSQLConf(SQLConf.USE_V1_SOURCE_WRITER_LIST.key -> useV1List) {
           // write path
           Seq("csv", "json", "parquet", "orc").foreach { format =>
-            var msg = intercept[AnalysisException] {
+            val msg = intercept[AnalysisException] {
               sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir)
             }.getMessage
-            assert(msg.toLowerCase(Locale.ROOT).contains(errorMessage(format, true)))
+            validateErrorMessage(msg)
           }
 
           // read path
@@ -359,14 +359,14 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
               spark.range(1).write.format(format).mode("overwrite").save(tempDir)
               spark.read.schema(schema).format(format).load(tempDir).collect()
             }.getMessage
-            assert(msg.toLowerCase(Locale.ROOT).contains(errorMessage(format, false)))
+            validateErrorMessage(msg)
 
             msg = intercept[AnalysisException] {
               val schema = StructType(StructField("a", new IntervalUDT(), true) :: Nil)
               spark.range(1).write.format(format).mode("overwrite").save(tempDir)
               spark.read.schema(schema).format(format).load(tempDir).collect()
             }.getMessage
-            assert(msg.toLowerCase(Locale.ROOT).contains(errorMessage(format, false)))
+            validateErrorMessage(msg)
           }
         }
       }
@@ -374,9 +374,10 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
   }
 
   test("SPARK-24204 error handling for unsupported Null data types - csv, parquet, orc") {
+    // TODO: test file source V2 after write path is fixed.
     Seq(true).foreach { useV1 =>
       val useV1List = if (useV1) {
-        "orc"
+        "csv,orc"
       } else {
         ""
       }
@@ -470,22 +471,25 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
   }
 
   test("SPARK-25237 compute correct input metrics in FileScanRDD") {
-    withTempPath { p =>
-      val path = p.getAbsolutePath
-      spark.range(1000).repartition(1).write.csv(path)
-      val bytesReads = new mutable.ArrayBuffer[Long]()
-      val bytesReadListener = new SparkListener() {
-        override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
-          bytesReads += taskEnd.taskMetrics.inputMetrics.bytesRead
+    // TODO: Test CSV V2 as well after it implements [[SupportsReportStatistics]].
+    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "csv") {
+      withTempPath { p =>
+        val path = p.getAbsolutePath
+        spark.range(1000).repartition(1).write.csv(path)
+        val bytesReads = new mutable.ArrayBuffer[Long]()
+        val bytesReadListener = new SparkListener() {
+          override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+            bytesReads += taskEnd.taskMetrics.inputMetrics.bytesRead
+          }
+        }
+        sparkContext.addSparkListener(bytesReadListener)
+        try {
+          spark.read.csv(path).limit(1).collect()
+          sparkContext.listenerBus.waitUntilEmpty(1000L)
+          assert(bytesReads.sum === 7860)
+        } finally {
+          sparkContext.removeSparkListener(bytesReadListener)
         }
-      }
-      sparkContext.addSparkListener(bytesReadListener)
-      try {
-        spark.read.csv(path).limit(1).collect()
-        sparkContext.listenerBus.waitUntilEmpty(1000L)
-        assert(bytesReads.sum === 7860)
-      } finally {
-        sparkContext.removeSparkListener(bytesReadListener)
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 302db0305ff4..b2b0f39ef4cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1343,15 +1343,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
         .collect()
     }.getMessage
     assert(msg.contains("only include the internal corrupt record column"))
-    intercept[org.apache.spark.sql.catalyst.errors.TreeNodeException[_]] {
-      spark
-        .read
-        .option("columnNameOfCorruptRecord", columnNameOfCorruptRecord)
-        .schema(schema)
-        .csv(testFile(valueMalformedFile))
-        .filter($"_corrupt_record".isNotNull)
-        .count()
-    }
+
     // workaround
     val df = spark
       .read
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 9f969473da61..2569085bec08 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -428,7 +428,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
     val message = intercept[AnalysisException] {
       testRead(spark.read.csv(), Seq.empty, schema)
     }.getMessage
-    assert(message.contains("Unable to infer schema for CSV. It must be specified manually."))
+    assert(message.toLowerCase(Locale.ROOT).contains("unable to infer schema for csv"))
 
     testRead(spark.read.csv(dir), data, schema)
     testRead(spark.read.csv(dir, dir), data ++ data, schema)

From 01e63053df3456989fc8bc7fe08cd4a713aa60e5 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sat, 23 Mar 2019 22:54:27 -0700
Subject: [PATCH 0755/1072] [SPARK-25196][SPARK-27251][SQL][FOLLOWUP] Add
 synchronized for InMemoryRelation.statsOfPlanToCache

## What changes were proposed in this pull request?
This is a follow-up of #24047; to follow the `CacheManager.cachedData` lock semantics, this pr wrapped the `statsOfPlanToCache` update with `synchronized`.

## How was this patch tested?
Pass Jenkins

Closes #24178 from maropu/SPARK-24047-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/CacheManager.scala    |  9 +---
 .../execution/columnar/InMemoryRelation.scala | 54 +++++++++++++------
 .../columnar/InMemoryColumnarQuerySuite.scala |  2 +-
 3 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 612f6938a4b9..01454783e179 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution
 
-import java.util.concurrent.locks.ReentrantReadWriteLock
-
 import scala.collection.immutable.IndexedSeq
 
 import org.apache.hadoop.fs.{FileSystem, Path}
@@ -163,12 +161,7 @@ class CacheManager extends Logging {
     val relation = cachedData.cachedRepresentation
     val (rowCount, newColStats) =
       CommandUtils.computeColumnStats(sparkSession, relation, column)
-    val oldStats = relation.statsOfPlanToCache
-    val newStats = oldStats.copy(
-      rowCount = Some(rowCount),
-      attributeStats = AttributeMap((oldStats.attributeStats ++ newColStats).toSeq)
-    )
-    relation.statsOfPlanToCache = newStats
+    relation.updateStats(rowCount, newColStats)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index fcc346839b27..1e4453f8df98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.storage.StorageLevel
@@ -145,30 +145,43 @@ object InMemoryRelation {
       tableName: Option[String],
       logicalPlan: LogicalPlan): InMemoryRelation = {
     val cacheBuilder = CachedRDDBuilder(useCompression, batchSize, storageLevel, child, tableName)
-    new InMemoryRelation(child.output, cacheBuilder, logicalPlan.outputOrdering)(
-      statsOfPlanToCache = logicalPlan.stats)
+    val relation = new InMemoryRelation(child.output, cacheBuilder, logicalPlan.outputOrdering)
+    relation.statsOfPlanToCache = logicalPlan.stats
+    relation
   }
 
   def apply(cacheBuilder: CachedRDDBuilder, logicalPlan: LogicalPlan): InMemoryRelation = {
-    new InMemoryRelation(cacheBuilder.cachedPlan.output, cacheBuilder, logicalPlan.outputOrdering)(
-      statsOfPlanToCache = logicalPlan.stats)
+    val relation = new InMemoryRelation(
+      cacheBuilder.cachedPlan.output, cacheBuilder, logicalPlan.outputOrdering)
+    relation.statsOfPlanToCache = logicalPlan.stats
+    relation
+  }
+
+  def apply(
+      output: Seq[Attribute],
+      cacheBuilder: CachedRDDBuilder,
+      outputOrdering: Seq[SortOrder],
+      statsOfPlanToCache: Statistics): InMemoryRelation = {
+    val relation = InMemoryRelation(output, cacheBuilder, outputOrdering)
+    relation.statsOfPlanToCache = statsOfPlanToCache
+    relation
   }
 }
 
 case class InMemoryRelation(
     output: Seq[Attribute],
     @transient cacheBuilder: CachedRDDBuilder,
-    override val outputOrdering: Seq[SortOrder])(
-    @volatile var statsOfPlanToCache: Statistics)
+    override val outputOrdering: Seq[SortOrder])
   extends logical.LeafNode with MultiInstanceRelation {
 
+  @volatile var statsOfPlanToCache: Statistics = null
+
   override protected def innerChildren: Seq[SparkPlan] = Seq(cachedPlan)
 
   override def doCanonicalize(): logical.LogicalPlan =
     copy(output = output.map(QueryPlan.normalizeExprId(_, cachedPlan.output)),
       cacheBuilder,
-      outputOrdering)(
-      statsOfPlanToCache)
+      outputOrdering)
 
   override def producedAttributes: AttributeSet = outputSet
 
@@ -176,6 +189,16 @@ case class InMemoryRelation(
 
   def cachedPlan: SparkPlan = cacheBuilder.cachedPlan
 
+  private[sql] def updateStats(
+      rowCount: Long,
+      newColStats: Map[Attribute, ColumnStat]): Unit = this.synchronized {
+    val newStats = statsOfPlanToCache.copy(
+      rowCount = Some(rowCount),
+      attributeStats = AttributeMap((statsOfPlanToCache.attributeStats ++ newColStats).toSeq)
+    )
+    statsOfPlanToCache = newStats
+  }
+
   override def computeStats(): Statistics = {
     if (cacheBuilder.sizeInBytesStats.value == 0L) {
       // Underlying columnar RDD hasn't been materialized, use the stats from the plan to cache.
@@ -185,20 +208,17 @@ case class InMemoryRelation(
     }
   }
 
-  def withOutput(newOutput: Seq[Attribute]): InMemoryRelation = {
-    InMemoryRelation(newOutput, cacheBuilder, outputOrdering)(statsOfPlanToCache)
-  }
+  def withOutput(newOutput: Seq[Attribute]): InMemoryRelation =
+    InMemoryRelation(newOutput, cacheBuilder, outputOrdering, statsOfPlanToCache)
 
   override def newInstance(): this.type = {
-    new InMemoryRelation(
+    InMemoryRelation(
       output.map(_.newInstance()),
       cacheBuilder,
-      outputOrdering)(
-        statsOfPlanToCache).asInstanceOf[this.type]
+      outputOrdering,
+      statsOfPlanToCache).asInstanceOf[this.type]
   }
 
-  override protected def otherCopyArgs: Seq[AnyRef] = Seq(statsOfPlanToCache)
-
   override def simpleString(maxFields: Int): String =
     s"InMemoryRelation [${truncatedString(output, ", ", maxFields)}], ${cacheBuilder.storageLevel}"
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 1c0d60397c1c..0a1141c050ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -488,7 +488,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   test("SPARK-25727 - otherCopyArgs in InMemoryRelation does not include outputOrdering") {
     val data = Seq(100).toDF("count").cache()
     val json = data.queryExecution.optimizedPlan.toJSON
-    assert(json.contains("outputOrdering") && json.contains("statsOfPlanToCache"))
+    assert(json.contains("outputOrdering"))
   }
 
   test("SPARK-22673: InMemoryRelation should utilize existing stats of the plan to be cached") {

From 6f18ac9e99a330d4df1826c5beae11d883a0fceb Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 23 Mar 2019 23:13:31 -0700
Subject: [PATCH 0756/1072] [SPARK-27241][SQL] Support map_keys and map_values
 in SelectedField

## What changes were proposed in this pull request?

`SelectedField` doesn't support map_keys and map_values for now. When map key or value is complex struct, we should be able to prune unnecessary fields from keys/values. This proposes to add map_keys and map_values support to `SelectedField`.

## How was this patch tested?

Added tests.

Closes #24179 from viirya/SPARK-27241.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../catalyst/expressions/SelectedField.scala  |  22 ++++
 .../expressions/SelectedFieldSuite.scala      | 103 +++++++++++++++++-
 2 files changed, 121 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
index 38a04814c231..7ba3d302d553 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
@@ -97,6 +97,28 @@ object SelectedField {
         val MapType(keyType, _, valueContainsNull) = child.dataType
         val opt = dataTypeOpt.map(dt => MapType(keyType, dt, valueContainsNull))
         selectField(child, opt)
+      case MapValues(child) =>
+        val MapType(keyType, _, valueContainsNull) = child.dataType
+        // MapValues does not select a field from a struct (i.e. prune the struct) so it can't be
+        // the top-level extractor. However it can be part of an extractor chain.
+        val opt = dataTypeOpt.map {
+          case ArrayType(dataType, _) => MapType(keyType, dataType, valueContainsNull)
+          case x =>
+            // This should not happen.
+            throw new AnalysisException(s"DataType '$x' is not supported by MapValues.")
+        }
+        selectField(child, opt)
+      case MapKeys(child) =>
+        val MapType(_, valueType, valueContainsNull) = child.dataType
+        // MapKeys does not select a field from a struct (i.e. prune the struct) so it can't be
+        // the top-level extractor. However it can be part of an extractor chain.
+        val opt = dataTypeOpt.map {
+          case ArrayType(dataType, _) => MapType(dataType, valueType, valueContainsNull)
+          case x =>
+            // This should not happen.
+            throw new AnalysisException(s"DataType '$x' is not supported by MapKeys.")
+        }
+        selectField(child, opt)
       case GetArrayItem(child, _) =>
         // GetArrayItem does not select a field from a struct (i.e. prune the struct) so it can't be
         // the top-level extractor. However it can be part of an extractor chain.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
index 7cfe4bfbef36..6a3cc2180499 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
@@ -17,16 +17,15 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.exceptions.TestFailedException
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.types._
 
-class SelectedFieldSuite extends SparkFunSuite with BeforeAndAfterAll {
+class SelectedFieldSuite extends AnalysisTest {
   private val ignoredField = StructField("col1", StringType, nullable = false)
 
   // The test schema as a tree string, i.e. `schema.treeString`
@@ -317,6 +316,18 @@ class SelectedFieldSuite extends SparkFunSuite with BeforeAndAfterAll {
         StructField("subfield1", IntegerType) :: Nil)) :: Nil), valueContainsNull = false)))
   }
 
+  testSelect(arrayOfStruct, "map_values(col5[0]).field1.subfield1 as foo") {
+    StructField("col5", ArrayType(MapType(StringType, StructType(
+      StructField("field1", StructType(
+        StructField("subfield1", IntegerType) :: Nil)) :: Nil), valueContainsNull = false)))
+  }
+
+  testSelect(arrayOfStruct, "map_values(col5[0]).field1.subfield2 as foo") {
+    StructField("col5", ArrayType(MapType(StringType, StructType(
+      StructField("field1", StructType(
+        StructField("subfield2", IntegerType) :: Nil)) :: Nil), valueContainsNull = false)))
+  }
+
   //  |-- col1: string (nullable = false)
   //  |-- col6: map (nullable = true)
   //  |    |-- key: string
@@ -394,6 +405,90 @@ class SelectedFieldSuite extends SparkFunSuite with BeforeAndAfterAll {
         :: Nil)))
   }
 
+  //  |-- col1: string (nullable = false)
+  //  |-- col2: map (nullable = true)
+  //  |    |-- key: struct (containsNull = false)
+  //  |    |     |-- field1: string (nullable = true)
+  //  |    |     |-- field2: integer (nullable = true)
+  //  |    |-- value: array (valueContainsNull = true)
+  //  |    |    |-- element: struct (containsNull = false)
+  //  |    |    |    |-- field3: struct (nullable = true)
+  //  |    |    |    |    |-- subfield1: integer (nullable = true)
+  //  |    |    |    |    |-- subfield2: integer (nullable = true)
+  private val mapWithStructKey = StructType(Array(ignoredField,
+    StructField("col2", MapType(
+      StructType(
+        StructField("field1", StringType) ::
+        StructField("field2", IntegerType) :: Nil),
+      ArrayType(StructType(
+        StructField("field3", StructType(
+          StructField("subfield1", IntegerType) ::
+          StructField("subfield2", IntegerType) :: Nil)) :: Nil), containsNull = false)))))
+
+  testSelect(mapWithStructKey, "map_keys(col2).field1 as foo") {
+    StructField("col2", MapType(
+      StructType(StructField("field1", StringType) :: Nil),
+      ArrayType(StructType(
+        StructField("field3", StructType(
+          StructField("subfield1", IntegerType) ::
+          StructField("subfield2", IntegerType) :: Nil)) :: Nil), containsNull = false)))
+  }
+
+  testSelect(mapWithStructKey, "map_keys(col2).field2 as foo") {
+    StructField("col2", MapType(
+      StructType(StructField("field2", IntegerType) :: Nil),
+      ArrayType(StructType(
+        StructField("field3", StructType(
+          StructField("subfield1", IntegerType) ::
+          StructField("subfield2", IntegerType) :: Nil)) :: Nil), containsNull = false)))
+  }
+
+  //  |-- col1: string (nullable = false)
+  //  |-- col2: map (nullable = true)
+  //  |    |-- key: array (valueContainsNull = true)
+  //  |    |     |-- element: struct (containsNull = false)
+  //  |    |     |     |-- field1: string (nullable = true)
+  //  |    |     |     |-- field2: struct (containsNull = false)
+  //  |    |     |     |     |-- subfield1: integer (nullable = true)
+  //  |    |     |     |     |-- subfield2: long (nullable = true)
+  //  |    |-- value: array (valueContainsNull = true)
+  //  |    |    |-- element: struct (containsNull = false)
+  //  |    |    |    |-- field3: struct (nullable = true)
+  //  |    |    |    |    |-- subfield3: integer (nullable = true)
+  //  |    |    |    |    |-- subfield4: integer (nullable = true)
+  private val mapWithArrayOfStructKey = StructType(Array(ignoredField,
+    StructField("col2", MapType(
+      ArrayType(StructType(
+        StructField("field1", StringType) ::
+        StructField("field2", StructType(
+          StructField("subfield1", IntegerType) ::
+          StructField("subfield2", LongType) :: Nil)) :: Nil), containsNull = false),
+      ArrayType(StructType(
+        StructField("field3", StructType(
+          StructField("subfield3", IntegerType) ::
+          StructField("subfield4", IntegerType) :: Nil)) :: Nil), containsNull = false)))))
+
+  testSelect(mapWithArrayOfStructKey, "map_keys(col2)[0].field1 as foo") {
+    StructField("col2", MapType(
+      ArrayType(StructType(
+        StructField("field1", StringType) :: Nil), containsNull = false),
+      ArrayType(StructType(
+        StructField("field3", StructType(
+          StructField("subfield3", IntegerType) ::
+          StructField("subfield4", IntegerType) :: Nil)) :: Nil), containsNull = false)))
+  }
+
+  testSelect(mapWithArrayOfStructKey, "map_keys(col2)[0].field2.subfield1 as foo") {
+    StructField("col2", MapType(
+      ArrayType(StructType(
+        StructField("field2", StructType(
+          StructField("subfield1", IntegerType) :: Nil)) :: Nil), containsNull = false),
+      ArrayType(StructType(
+        StructField("field3", StructType(
+          StructField("subfield3", IntegerType) ::
+          StructField("subfield4", IntegerType) :: Nil)) :: Nil), containsNull = false)))
+  }
+
   def assertResult(expected: StructField)(actual: StructField)(selectExpr: String): Unit = {
     try {
       super.assertResult(expected)(actual)
@@ -439,7 +534,7 @@ class SelectedFieldSuite extends SparkFunSuite with BeforeAndAfterAll {
   private def unapplySelect(expr: String, relation: LocalRelation) = {
     val parsedExpr = parseAsCatalystExpression(Seq(expr)).head
     val select = relation.select(parsedExpr)
-    val analyzed = select.analyze
+    val analyzed = caseSensitiveAnalyzer.execute(select)
     SelectedField.unapply(analyzed.expressions.head)
   }
 

From a15f17ce277e21d4a1e6b6018a2faea1549d5506 Mon Sep 17 00:00:00 2001
From: John Zhuge <jzhuge@apache.org>
Date: Sun, 24 Mar 2019 09:05:41 -0500
Subject: [PATCH 0757/1072] [SPARK-27250][TEST-MAVEN][BUILD] Scala 2.11 maven
 compile should target Java 1.8

## What changes were proposed in this pull request?

Fix Scala 2.11 maven build issue after merging SPARK-26946.

## How was this patch tested?

Maven Scala 2.11 and 2.12 builds with `-Phadoop-provided -Phadoop-2.7 -Pyarn -Phive -Phive-thriftserver`.

Closes #24184 from jzhuge/SPARK-26946-1.

Authored-by: John Zhuge <jzhuge@apache.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pom.xml b/pom.xml
index e2fadbd23b35..33b992237660 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2031,6 +2031,7 @@
               <arg>-feature</arg>
               <arg>-explaintypes</arg>
               <arg>-Yno-adapted-args</arg>
+              <arg>-target:jvm-1.8</arg>
             </args>
             <jvmArgs>
               <jvmArg>-Xms1024m</jvmArg>

From a6c207c9c0c7aa057cfa27d16fe882b396440113 Mon Sep 17 00:00:00 2001
From: pgandhi <pgandhi@verizonmedia.com>
Date: Sun, 24 Mar 2019 16:07:35 -0700
Subject: [PATCH 0758/1072] [SPARK-24935][SQL] fix Hive UDAF with two
 aggregation buffers

## What changes were proposed in this pull request?

Hive UDAF knows the aggregation mode when creating the aggregation buffer, so that it can create different buffers for different inputs: the original data or the aggregation buffer. Please see an example in the [sketches library](https://github.com/DataSketches/sketches-hive/blob/7f9e76e9e03807277146291beb2c7bec40e8672b/src/main/java/com/yahoo/sketches/hive/cpc/DataToSketchUDAF.java#L107).

However, the Hive UDAF adapter in Spark always creates the buffer with partial1 mode, which can only deal with one input: the original data. This PR fixes it.

All credits go to pgandhi999 , who investigate the problem and study the Hive UDAF behaviors, and write the tests.

close https://github.com/apache/spark/pull/23778

## How was this patch tested?

a new test

Closes #24144 from cloud-fan/hive.

Lead-authored-by: pgandhi <pgandhi@verizonmedia.com>
Co-authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  64 +++++++----
 .../sql/hive/execution/HiveUDAFSuite.scala    | 106 ++++++++++++++++++
 2 files changed, 147 insertions(+), 23 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 4a8450901e3a..8ece4b5caef6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -352,12 +352,14 @@ private[hive] case class HiveUDAFFunction(
     HiveEvaluator(evaluator, evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputInspectors))
   }
 
-  // The UDAF evaluator used to merge partial aggregation results.
+  // The UDAF evaluator used to consume partial aggregation results and produce final results.
+  // Hive `ObjectInspector` used to inspect final results.
   @transient
-  private lazy val partial2ModeEvaluator = {
+  private lazy val finalHiveEvaluator = {
     val evaluator = newEvaluator()
-    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL2, Array(partial1HiveEvaluator.objectInspector))
-    evaluator
+    HiveEvaluator(
+      evaluator,
+      evaluator.init(GenericUDAFEvaluator.Mode.FINAL, Array(partial1HiveEvaluator.objectInspector)))
   }
 
   // Spark SQL data type of partial aggregation results
@@ -365,16 +367,6 @@ private[hive] case class HiveUDAFFunction(
   private lazy val partialResultDataType =
     inspectorToDataType(partial1HiveEvaluator.objectInspector)
 
-  // The UDAF evaluator used to compute the final result from a partial aggregation result objects.
-  // Hive `ObjectInspector` used to inspect the final aggregation result object.
-  @transient
-  private lazy val finalHiveEvaluator = {
-    val evaluator = newEvaluator()
-    HiveEvaluator(
-      evaluator,
-      evaluator.init(GenericUDAFEvaluator.Mode.FINAL, Array(partial1HiveEvaluator.objectInspector)))
-  }
-
   // Wrapper functions used to wrap Spark SQL input arguments into Hive specific format.
   @transient
   private lazy val inputWrappers = children.map(x => wrapperFor(toInspector(x), x.dataType)).toArray
@@ -401,25 +393,43 @@ private[hive] case class HiveUDAFFunction(
     s"$name($distinct${children.map(_.sql).mkString(", ")})"
   }
 
-  override def createAggregationBuffer(): AggregationBuffer =
-    partial1HiveEvaluator.evaluator.getNewAggregationBuffer
+  // The hive UDAF may create different buffers to handle different inputs: original data or
+  // aggregate buffer. However, the Spark UDAF framework does not expose this information when
+  // creating the buffer. Here we return null, and create the buffer in `update` and `merge`
+  // on demand, so that we can know what input we are dealing with.
+  override def createAggregationBuffer(): AggregationBuffer = null
 
   @transient
   private lazy val inputProjection = UnsafeProjection.create(children)
 
   override def update(buffer: AggregationBuffer, input: InternalRow): AggregationBuffer = {
+    // The input is original data, we create buffer with the partial1 evaluator.
+    val nonNullBuffer = if (buffer == null) {
+      partial1HiveEvaluator.evaluator.getNewAggregationBuffer
+    } else {
+      buffer
+    }
+
     partial1HiveEvaluator.evaluator.iterate(
-      buffer, wrap(inputProjection(input), inputWrappers, cached, inputDataTypes))
-    buffer
+      nonNullBuffer, wrap(inputProjection(input), inputWrappers, cached, inputDataTypes))
+    nonNullBuffer
   }
 
   override def merge(buffer: AggregationBuffer, input: AggregationBuffer): AggregationBuffer = {
+    // The input is aggregate buffer, we create buffer with the final evaluator.
+    val nonNullBuffer = if (buffer == null) {
+      finalHiveEvaluator.evaluator.getNewAggregationBuffer
+    } else {
+      buffer
+    }
+
     // The 2nd argument of the Hive `GenericUDAFEvaluator.merge()` method is an input aggregation
     // buffer in the 3rd format mentioned in the ScalaDoc of this class. Originally, Hive converts
     // this `AggregationBuffer`s into this format before shuffling partial aggregation results, and
     // calls `GenericUDAFEvaluator.terminatePartial()` to do the conversion.
-    partial2ModeEvaluator.merge(buffer, partial1HiveEvaluator.evaluator.terminatePartial(input))
-    buffer
+    finalHiveEvaluator.evaluator.merge(
+      nonNullBuffer, partial1HiveEvaluator.evaluator.terminatePartial(input))
+    nonNullBuffer
   }
 
   override def eval(buffer: AggregationBuffer): Any = {
@@ -450,11 +460,19 @@ private[hive] case class HiveUDAFFunction(
     private val mutableRow = new GenericInternalRow(1)
 
     def serialize(buffer: AggregationBuffer): Array[Byte] = {
+      // The buffer may be null if there is no input. It's unclear if the hive UDAF accepts null
+      // buffer, for safety we create an empty buffer here.
+      val nonNullBuffer = if (buffer == null) {
+        partial1HiveEvaluator.evaluator.getNewAggregationBuffer
+      } else {
+        buffer
+      }
+
       // `GenericUDAFEvaluator.terminatePartial()` converts an `AggregationBuffer` into an object
       // that can be inspected by the `ObjectInspector` returned by `GenericUDAFEvaluator.init()`.
       // Then we can unwrap it to a Spark SQL value.
       mutableRow.update(0, partialResultUnwrapper(
-        partial1HiveEvaluator.evaluator.terminatePartial(buffer)))
+        partial1HiveEvaluator.evaluator.terminatePartial(nonNullBuffer)))
       val unsafeRow = projection(mutableRow)
       val bytes = ByteBuffer.allocate(unsafeRow.getSizeInBytes)
       unsafeRow.writeTo(bytes)
@@ -466,11 +484,11 @@ private[hive] case class HiveUDAFFunction(
       // returned by `GenericUDAFEvaluator.terminatePartial()` back to an `AggregationBuffer`. The
       // workaround here is creating an initial `AggregationBuffer` first and then merge the
       // deserialized object into the buffer.
-      val buffer = partial2ModeEvaluator.getNewAggregationBuffer
+      val buffer = finalHiveEvaluator.evaluator.getNewAggregationBuffer
       val unsafeRow = new UnsafeRow(1)
       unsafeRow.pointTo(bytes, bytes.length)
       val partialResult = unsafeRow.get(0, partialResultDataType)
-      partial2ModeEvaluator.merge(buffer, partialResultWrapper(partialResult))
+      finalHiveEvaluator.evaluator.merge(buffer, partialResultWrapper(partialResult))
       buffer
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
index fe3deceb0806..ef40323c6131 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo
 import test.org.apache.spark.sql.MyDoubleAvg
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -40,6 +41,7 @@ class HiveUDAFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
     super.beforeAll()
     sql(s"CREATE TEMPORARY FUNCTION mock AS '${classOf[MockUDAF].getName}'")
     sql(s"CREATE TEMPORARY FUNCTION hive_max AS '${classOf[GenericUDAFMax].getName}'")
+    sql(s"CREATE TEMPORARY FUNCTION mock2 AS '${classOf[MockUDAF2].getName}'")
 
     Seq(
       (0: Integer) -> "val_0",
@@ -92,6 +94,23 @@ class HiveUDAFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
     ))
   }
 
+  test("customized Hive UDAF with two aggregation buffers") {
+    val df = sql("SELECT key % 2, mock2(value) FROM t GROUP BY key % 2")
+
+    val aggs = df.queryExecution.executedPlan.collect {
+      case agg: ObjectHashAggregateExec => agg
+    }
+
+    // There should be two aggregate operators, one for partial aggregation, and the other for
+    // global aggregation.
+    assert(aggs.length == 2)
+
+    checkAnswer(df, Seq(
+      Row(0, Row(1, 1)),
+      Row(1, Row(1, 1))
+    ))
+  }
+
   test("call JAVA UDAF") {
     withTempView("temp") {
       withUserDefinedFunction("myDoubleAvg" -> false) {
@@ -127,12 +146,22 @@ class MockUDAF extends AbstractGenericUDAFResolver {
   override def getEvaluator(info: Array[TypeInfo]): GenericUDAFEvaluator = new MockUDAFEvaluator
 }
 
+class MockUDAF2 extends AbstractGenericUDAFResolver {
+  override def getEvaluator(info: Array[TypeInfo]): GenericUDAFEvaluator = new MockUDAFEvaluator2
+}
+
 class MockUDAFBuffer(var nonNullCount: Long, var nullCount: Long)
   extends GenericUDAFEvaluator.AbstractAggregationBuffer {
 
   override def estimate(): Int = JavaDataModel.PRIMITIVES2 * 2
 }
 
+class MockUDAFBuffer2(var nonNullCount: Long, var nullCount: Long)
+  extends GenericUDAFEvaluator.AbstractAggregationBuffer {
+
+  override def estimate(): Int = JavaDataModel.PRIMITIVES2 * 2
+}
+
 class MockUDAFEvaluator extends GenericUDAFEvaluator {
   private val nonNullCountOI = PrimitiveObjectInspectorFactory.javaLongObjectInspector
 
@@ -184,3 +213,80 @@ class MockUDAFEvaluator extends GenericUDAFEvaluator {
 
   override def terminate(agg: AggregationBuffer): AnyRef = terminatePartial(agg)
 }
+
+// Same as MockUDAFEvaluator but using two aggregation buffers, one for PARTIAL1 and the other
+// for PARTIAL2.
+class MockUDAFEvaluator2 extends GenericUDAFEvaluator {
+  private val nonNullCountOI = PrimitiveObjectInspectorFactory.javaLongObjectInspector
+
+  private val nullCountOI = PrimitiveObjectInspectorFactory.javaLongObjectInspector
+  private var aggMode: Mode = null
+
+  private val bufferOI = {
+    val fieldNames = Seq("nonNullCount", "nullCount").asJava
+    val fieldOIs = Seq(nonNullCountOI: ObjectInspector, nullCountOI: ObjectInspector).asJava
+    ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs)
+  }
+
+  private val nonNullCountField = bufferOI.getStructFieldRef("nonNullCount")
+
+  private val nullCountField = bufferOI.getStructFieldRef("nullCount")
+
+  override def getNewAggregationBuffer: AggregationBuffer = {
+    // These 2 modes consume original data.
+    if (aggMode == Mode.PARTIAL1 || aggMode == Mode.COMPLETE) {
+      new MockUDAFBuffer(0L, 0L)
+    } else {
+      new MockUDAFBuffer2(0L, 0L)
+    }
+  }
+
+  override def reset(agg: AggregationBuffer): Unit = {
+    val buffer = agg.asInstanceOf[MockUDAFBuffer]
+    buffer.nonNullCount = 0L
+    buffer.nullCount = 0L
+  }
+
+  override def init(mode: Mode, parameters: Array[ObjectInspector]): ObjectInspector = {
+    aggMode = mode
+    bufferOI
+  }
+
+  override def iterate(agg: AggregationBuffer, parameters: Array[AnyRef]): Unit = {
+    val buffer = agg.asInstanceOf[MockUDAFBuffer]
+    if (parameters.head eq null) {
+      buffer.nullCount += 1L
+    } else {
+      buffer.nonNullCount += 1L
+    }
+  }
+
+  override def merge(agg: AggregationBuffer, partial: Object): Unit = {
+    if (partial ne null) {
+      val nonNullCount = nonNullCountOI.get(bufferOI.getStructFieldData(partial, nonNullCountField))
+      val nullCount = nullCountOI.get(bufferOI.getStructFieldData(partial, nullCountField))
+      val buffer = agg.asInstanceOf[MockUDAFBuffer2]
+      buffer.nonNullCount += nonNullCount
+      buffer.nullCount += nullCount
+    }
+  }
+
+  // As this method is called for both states, Partial1 and Partial2, the hack in the method
+  // to check for class of aggregation buffer was necessary.
+  override def terminatePartial(agg: AggregationBuffer): AnyRef = {
+    var result: AnyRef = null
+    if (agg.getClass.toString.contains("MockUDAFBuffer2")) {
+      val buffer = agg.asInstanceOf[MockUDAFBuffer2]
+      result = Array[Object](buffer.nonNullCount: java.lang.Long, buffer.nullCount: java.lang.Long)
+    } else {
+      val buffer = agg.asInstanceOf[MockUDAFBuffer]
+      result = Array[Object](buffer.nonNullCount: java.lang.Long, buffer.nullCount: java.lang.Long)
+    }
+    result
+  }
+
+  override def terminate(agg: AggregationBuffer): AnyRef = {
+    val buffer = agg.asInstanceOf[MockUDAFBuffer2]
+    Array[Object](buffer.nonNullCount: java.lang.Long, buffer.nullCount: java.lang.Long)
+  }
+}

From 52671d631d2a64ed1cfa0c6e01168908faf92df8 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sun, 24 Mar 2019 17:16:33 -0700
Subject: [PATCH 0759/1072] [SPARK-27008][SQL][FOLLOWUP] Fix typo from
 `*_EANBLED` to `*_ENABLED`

## What changes were proposed in this pull request?

This fixes a typo in the SQL config value: DATETIME_JAVA8API_**EANBLED** -> DATETIME_JAVA8API_**ENABLED**.

## How was this patch tested?

This was tested by `RowEncoderSuite` and `LiteralExpressionSuite`.

Closes #24194 from MaxGekk/date-localdate-followup.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/sql/internal/SQLConf.scala     | 4 ++--
 .../spark/sql/catalyst/CatalystTypeConvertersSuite.scala  | 4 ++--
 .../spark/sql/catalyst/encoders/RowEncoderSuite.scala     | 4 ++--
 .../sql/catalyst/expressions/LiteralExpressionSuite.scala | 8 ++++----
 .../test/java/test/org/apache/spark/sql/JavaUDFSuite.java | 6 +++---
 .../src/test/scala/org/apache/spark/sql/UDFSuite.scala    | 4 ++--
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index a4ca1a0a72ae..39bbf0b1bdb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1710,7 +1710,7 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val DATETIME_JAVA8API_EANBLED = buildConf("spark.sql.datetime.java8API.enabled")
+  val DATETIME_JAVA8API_ENABLED = buildConf("spark.sql.datetime.java8API.enabled")
     .doc("If the configuration property is set to true, java.time.Instant and " +
       "java.time.LocalDate classes of Java 8 API are used as external types for " +
       "Catalyst's TimestampType and DateType. If it is set to false, java.sql.Timestamp " +
@@ -1906,7 +1906,7 @@ class SQLConf extends Serializable with Logging {
 
   def fastHashAggregateRowMaxCapacityBit: Int = getConf(FAST_HASH_AGGREGATE_MAX_ROWS_CAPACITY_BIT)
 
-  def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_EANBLED)
+  def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_ENABLED)
 
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 6999526c801c..b9e7cf304989 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -169,7 +169,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("converting TimestampType to java.time.Instant") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       Seq(
         -9463427405253013L,
         -244000001L,
@@ -199,7 +199,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("converting DateType to java.time.LocalDate") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       Seq(
         -701265,
         -371419,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 79c8abbcdc91..4b3d0612a9b8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -283,7 +283,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
   }
 
   test("encoding/decoding TimestampType to/from java.time.Instant") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       val schema = new StructType().add("t", TimestampType)
       val encoder = RowEncoder(schema).resolveAndBind()
       val instant = java.time.Instant.parse("2019-02-26T16:56:00Z")
@@ -295,7 +295,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
   }
 
   test("encoding/decoding DateType to/from java.time.LocalDate") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       val schema = new StructType().add("d", DateType)
       val encoder = RowEncoder(schema).resolveAndBind()
       val localDate = java.time.LocalDate.parse("2019-02-27")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 717e02009bb0..2af31144b89f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -66,11 +66,11 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Literal.default(BinaryType), "".getBytes(StandardCharsets.UTF_8))
     checkEvaluation(Literal.default(DecimalType.USER_DEFAULT), Decimal(0))
     checkEvaluation(Literal.default(DecimalType.SYSTEM_DEFAULT), Decimal(0))
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "false") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "false") {
       checkEvaluation(Literal.default(DateType), DateTimeUtils.toJavaDate(0))
       checkEvaluation(Literal.default(TimestampType), DateTimeUtils.toJavaTimestamp(0L))
     }
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       checkEvaluation(Literal.default(DateType), LocalDate.ofEpochDay(0))
       checkEvaluation(Literal.default(TimestampType), Instant.ofEpochSecond(0))
     }
@@ -251,7 +251,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("construct literals from arrays of java.time.LocalDate") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       val localDate0 = LocalDate.of(2019, 3, 20)
       checkEvaluation(Literal(Array(localDate0)), Array(localDate0))
       val localDate1 = LocalDate.of(2100, 4, 22)
@@ -272,7 +272,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("construct literals from arrays of java.time.Instant") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       val instant0 = Instant.ofEpochMilli(0)
       checkEvaluation(Literal(Array(instant0)), Array(instant0))
       val instant1 = Instant.parse("2019-03-20T10:15:30Z")
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
index a8ba8753aab5..9af5023acf39 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
@@ -127,16 +127,16 @@ public void udf6Test() {
   @SuppressWarnings("unchecked")
   @Test
   public void udf7Test() {
-    String originConf = spark.conf().get(SQLConf.DATETIME_JAVA8API_EANBLED().key());
+    String originConf = spark.conf().get(SQLConf.DATETIME_JAVA8API_ENABLED().key());
     try {
-      spark.conf().set(SQLConf.DATETIME_JAVA8API_EANBLED().key(), "true");
+      spark.conf().set(SQLConf.DATETIME_JAVA8API_ENABLED().key(), "true");
       spark.udf().register(
           "plusDay",
           (java.time.LocalDate ld) -> ld.plusDays(1), DataTypes.DateType);
       Row result = spark.sql("SELECT plusDay(DATE '2019-02-26')").head();
       Assert.assertEquals(LocalDate.parse("2019-02-27"), result.get(0));
     } finally {
-      spark.conf().set(SQLConf.DATETIME_JAVA8API_EANBLED().key(), originConf);
+      spark.conf().set(SQLConf.DATETIME_JAVA8API_ENABLED().key(), originConf);
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 794c597d3f65..f2a71bd628bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -496,7 +496,7 @@ class UDFSuite extends QueryTest with SharedSQLContext {
   }
 
   test("Using java.time.Instant in UDF") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       val expected = java.time.Instant.parse("2019-02-27T00:00:00Z")
       val plusSec = udf((i: java.time.Instant) => i.plusSeconds(1))
       val df = spark.sql("SELECT TIMESTAMP '2019-02-26 23:59:59Z' as t")
@@ -506,7 +506,7 @@ class UDFSuite extends QueryTest with SharedSQLContext {
   }
 
   test("Using java.time.LocalDate in UDF") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_EANBLED.key -> "true") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       val expected = java.time.LocalDate.parse("2019-02-27")
       val plusDay = udf((i: java.time.LocalDate) => i.plusDays(1))
       val df = spark.sql("SELECT DATE '2019-02-26' as d")

From 6ef94e0f18d3c2b482564fddd3e6181c05b75280 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 24 Mar 2019 17:39:57 -0700
Subject: [PATCH 0760/1072] [SPARK-27260][SS] Upgrade to Kafka 2.2.0

## What changes were proposed in this pull request?

This PR aims to update Kafka dependency to 2.2.0 to bring the following improvement and bug fixes.
- https://issues.apache.org/jira/projects/KAFKA/versions/12344063

Due to [KAFKA-4453](https://issues.apache.org/jira/browse/KAFKA-4453), data plane API and controller plane API are separated. Apache Spark needs the following changes.
```scala
- servers.head.apis.metadataCache
+ servers.head.dataPlaneRequestProcessor.metadataCache
```

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #24190 from dongjoon-hyun/SPARK-27260.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala   | 3 ++-
 .../org/apache/spark/streaming/kafka010/KafkaTestUtils.scala   | 3 ++-
 pom.xml                                                        | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 9fa88b454f1b..70b6e67f80d5 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -429,7 +429,8 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
   }
 
   private def waitUntilMetadataIsPropagated(topic: String, partition: Int): Unit = {
-    def isPropagated = server.apis.metadataCache.getPartitionInfo(topic, partition) match {
+    def isPropagated = server.dataPlaneRequestProcessor.metadataCache
+        .getPartitionInfo(topic, partition) match {
       case Some(partitionState) =>
         zkUtils.getLeaderForPartition(topic, partition).isDefined &&
           Request.isValidBrokerId(partitionState.basePartitionState.leader) &&
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
index fd9fd00f2676..5dec9709011e 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
@@ -286,7 +286,8 @@ private[kafka010] class KafkaTestUtils extends Logging {
   }
 
   private def waitUntilMetadataIsPropagated(topic: String, partition: Int): Unit = {
-    def isPropagated = server.apis.metadataCache.getPartitionInfo(topic, partition) match {
+    def isPropagated = server.dataPlaneRequestProcessor.metadataCache
+        .getPartitionInfo(topic, partition) match {
       case Some(partitionState) =>
         val leader = partitionState.basePartitionState.leader
         val isr = partitionState.basePartitionState.isr
diff --git a/pom.xml b/pom.xml
index 33b992237660..19775050dee7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -131,7 +131,7 @@
     <!-- Version used for internal directory structure -->
     <hive.version.short>1.2.1</hive.version.short>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
-    <kafka.version>2.1.1</kafka.version>
+    <kafka.version>2.2.0</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
     <orc.version>1.5.5</orc.version>

From 84ec06d95ebb62e4917b186ed84b6acfd9c2530a Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 25 Mar 2019 11:02:14 +0900
Subject: [PATCH 0761/1072] Revert "[SPARK-27262][R] Add explicit UTF-8
 Encoding to DESCRIPTION"

This reverts commit 240c6a4d75a408f1e2d650c383293644857b3575.
---
 R/pkg/DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index db7d05e0c4a4..4d48cd7c659d 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -60,5 +60,4 @@ Collate:
     'window.R'
 RoxygenNote: 5.0.1
 VignetteBuilder: knitr
-Encoding: UTF-8
 NeedsCompilation: no

From db801cf3f26a5452b341dafbcd3efea64d177d83 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Sun, 24 Mar 2019 21:49:54 -0700
Subject: [PATCH 0762/1072] [SPARK-27219][CORE] Treat timeouts as fatal in SASL
 fallback path.

When a timeout happens we don't know what's the state of the remote end,
so there is no point in doing anything else since it will most probably
fail anyway.

The change also demotes the log message printed when falling back to
SASL, since a warning is too noisy for when the fallback is really
needed (e.g. old shuffle service, or shuffle service with new auth
disabled).

Closes #24160 from vanzin/SPARK-27219.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../network/crypto/AuthClientBootstrap.java     | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
index 3c263783a610..77b167d15e91 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.security.GeneralSecurityException;
+import java.util.concurrent.TimeoutException;
 
 import com.google.common.base.Throwables;
 import io.netty.buffer.ByteBuf;
@@ -82,13 +83,19 @@ public void doBootstrap(TransportClient client, Channel channel) {
     } catch (RuntimeException e) {
       // There isn't a good exception that can be caught here to know whether it's really
       // OK to switch back to SASL (because the server doesn't speak the new protocol). So
-      // try it anyway, and in the worst case things will fail again.
-      if (conf.saslFallback()) {
-        LOG.warn("New auth protocol failed, trying SASL.", e);
-        doSaslAuth(client, channel);
-      } else {
+      // try it anyway, unless it's a timeout, which is locally fatal. In the worst case
+      // things will fail again.
+      if (!conf.saslFallback() || e.getCause() instanceof TimeoutException) {
         throw e;
       }
+
+      if (LOG.isDebugEnabled()) {
+        Throwable cause = e.getCause() != null ? e.getCause() : e;
+        LOG.debug("New auth protocol failed, trying SASL.", cause);
+      } else {
+        LOG.info("New auth protocol failed, trying SASL.");
+      }
+      doSaslAuth(client, channel);
     }
   }
 

From 8ec6cb67c71c67788230cab8a0cd34d8ad3ce24b Mon Sep 17 00:00:00 2001
From: s71955 <sujithchacko.2010@gmail.com>
Date: Sun, 24 Mar 2019 21:55:48 -0700
Subject: [PATCH 0763/1072] [SPARK-27261][DOC] Improve app submission doc for
 passing multiple configs

## What changes were proposed in this pull request?

While submitting the spark application, passing multiple configurations not documented clearly, no examples given.it will be better if it can be documented since  clarity is less from spark documentation side.
Even when i was browsing i could see few queries raised by users, below provided the reference.

https://community.hortonworks.com/questions/105022/spark-submit-multiple-configurations.html

 As part of fixing i had documented the above scenario  with an example.

## How was this patch tested?
Manual inspection of the updated document.

Closes #24191 from sujith71955/master_conf.

Authored-by: s71955 <sujithchacko.2010@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/submitting-applications.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index 77aa083c4a58..d6b663ea8dcc 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -44,7 +44,7 @@ Some of the commonly used options are:
 * `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`)
 * `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`)
 * `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`) <b> &#8224; </b>
-* `--conf`: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap "key=value" in quotes (as shown).
+* `--conf`: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap "key=value" in quotes (as shown). Multiple configurations should be passed as separate arguments. (e.g. `--conf <key>=<value> --conf <key2>=<value2>`)
 * `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes.
 * `application-arguments`: Arguments passed to the main method of your main class, if any
 

From b8a0f981f2f69cb7ea56626b6ed3143276beb824 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 25 Mar 2019 21:02:01 +0900
Subject: [PATCH 0764/1072] [SPARK-25196][SQL][FOLLOWUP] Fix wrong tests in
 StatisticsCollectionSuite

## What changes were proposed in this pull request?
This is a follow-up of #24047 and it fixed wrong tests in `StatisticsCollectionSuite`.

## How was this patch tested?
Pass Jenkins.

Closes #24198 from maropu/SPARK-25196-FOLLOWUP-2.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../org/apache/spark/sql/StatisticsCollectionSuite.scala      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index b76678f2a4f9..d071efb804ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -539,9 +539,9 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     withTempDatabase { database =>
       sql(s"CREATE VIEW $database.v AS SELECT 1 c")
       sql(s"CACHE TABLE $database.v")
-      assert(getStatAttrNames(s"$database.v") !== Set("id"))
+      assert(getStatAttrNames(s"$database.v") !== Set("c"))
       sql(s"ANALYZE TABLE $database.v COMPUTE STATISTICS FOR COLUMNS c")
-      assert(getStatAttrNames(s"$database.v") !== Set("id"))
+      assert(getStatAttrNames(s"$database.v") === Set("c"))
     }
   }
 }

From 8bc304f97ee693b57f33fa6708eb63e2d641c609 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 25 Mar 2019 10:46:42 -0500
Subject: [PATCH 0765/1072] [SPARK-26132][BUILD][CORE] Remove support for Scala
 2.11 in Spark 3.0.0

## What changes were proposed in this pull request?

Remove Scala 2.11 support in build files and docs, and in various parts of code that accommodated 2.11. See some targeted comments below.

## How was this patch tested?

Existing tests.

Closes #23098 from srowen/SPARK-26132.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 R/pkg/R/sparkR.R                              |   2 +-
 R/pkg/tests/fulltests/test_client.R           |   4 +-
 R/pkg/vignettes/sparkr-vignettes.Rmd          |   2 +-
 bin/load-spark-env.cmd                        |  49 +--
 bin/load-spark-env.sh                         |  42 +--
 .../scala/org/apache/spark/FutureAction.scala |  63 +---
 .../scala/org/apache/spark/rdd/UnionRDD.scala |   5 +-
 .../apache/spark/util/ClosureCleaner.scala    |   5 -
 .../org/apache/spark/util/ThreadUtils.scala   |  13 +-
 .../spark/deploy/SparkSubmitUtilsSuite.scala  |   8 +-
 .../spark/util/ClosureCleanerSuite.scala      |  73 +---
 .../spark/util/ClosureCleanerSuite2.scala     | 311 +-----------------
 dev/change-scala-version.sh                   |   6 +-
 dev/create-release/release-build.sh           |  89 ++---
 docs/building-spark.md                        |  15 +-
 docs/index.md                                 |   3 +-
 docs/storage-openstack-swift.md               |   2 +-
 external/docker/spark-test/base/Dockerfile    |   2 +-
 .../graphx/util/BytecodeUtilsSuite.scala      | 119 -------
 .../launcher/AbstractCommandBuilder.java      |  23 +-
 pom.xml                                       |  36 --
 project/MimaExcludes.scala                    |   4 +
 project/SparkBuild.scala                      |   8 +-
 python/run-tests.py                           |   3 +-
 .../org/apache/spark/repl/SparkILoop.scala    |  50 +--
 .../kubernetes/integration-tests/README.md    |   2 +-
 .../sql/execution/WholeStageCodegenExec.scala |   5 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |   3 +-
 28 files changed, 163 insertions(+), 784 deletions(-)
 delete mode 100644 graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index ac289d38d01b..31b986c326d0 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -269,7 +269,7 @@ sparkR.sparkContext <- function(
 #' sparkR.session("yarn-client", "SparkR", "/home/spark",
 #'                list(spark.executor.memory="4g"),
 #'                c("one.jar", "two.jar", "three.jar"),
-#'                c("com.databricks:spark-avro_2.11:2.0.1"))
+#'                c("com.databricks:spark-avro_2.12:2.0.1"))
 #' sparkR.session(spark.master = "yarn-client", spark.executor.memory = "4g")
 #'}
 #' @note sparkR.session since 2.0.0
diff --git a/R/pkg/tests/fulltests/test_client.R b/R/pkg/tests/fulltests/test_client.R
index de624b572cc2..9798627ffc55 100644
--- a/R/pkg/tests/fulltests/test_client.R
+++ b/R/pkg/tests/fulltests/test_client.R
@@ -37,7 +37,7 @@ test_that("multiple packages don't produce a warning", {
 
 test_that("sparkJars sparkPackages as character vectors", {
   args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "",
-                                  c("com.databricks:spark-avro_2.11:2.0.1"))
+                                  c("com.databricks:spark-avro_2.12:2.0.1"))
   expect_match(args, "--jars one.jar,two.jar,three.jar")
-  expect_match(args, "--packages com.databricks:spark-avro_2.11:2.0.1")
+  expect_match(args, "--packages com.databricks:spark-avro_2.12:2.0.1")
 })
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index cbe8c61725c8..9e48ae34634e 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -219,7 +219,7 @@ SparkR supports operating on a variety of data sources through the `SparkDataFra
 The general method for creating `SparkDataFrame` from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active Spark Session will be used automatically. SparkR supports reading CSV, JSON and Parquet files natively and through Spark Packages you can find data source connectors for popular file formats like Avro. These packages can be added with `sparkPackages` parameter when initializing SparkSession using `sparkR.session`.
 
 ```{r, eval=FALSE}
-sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0")
+sparkR.session(sparkPackages = "com.databricks:spark-avro_2.12:3.0.0")
 ```
 
 We can see how to use data sources using an example CSV input file. For more information please refer to SparkR [read.df](https://spark.apache.org/docs/latest/api/R/read.df.html) API documentation.
diff --git a/bin/load-spark-env.cmd b/bin/load-spark-env.cmd
index cefa513b6fb7..5f98cc34b6ba 100644
--- a/bin/load-spark-env.cmd
+++ b/bin/load-spark-env.cmd
@@ -21,6 +21,7 @@ rem This script loads spark-env.cmd if it exists, and ensures it is only loaded
 rem spark-env.cmd is loaded from SPARK_CONF_DIR if set, or within the current directory's
 rem conf\ subdirectory.
 
+set SPARK_ENV_CMD=spark-env.cmd
 if [%SPARK_ENV_LOADED%] == [] (
   set SPARK_ENV_LOADED=1
 
@@ -28,30 +29,34 @@ if [%SPARK_ENV_LOADED%] == [] (
     set SPARK_CONF_DIR=%~dp0..\conf
   )
 
-  call :LoadSparkEnv
+  set SPARK_ENV_CMD=%SPARK_CONF_DIR%\%SPARK_ENV_CMD%
+  if exist %SPARK_ENV_CMD% (
+    call %SPARK_ENV_CMD%
+  )
 )
 
 rem Setting SPARK_SCALA_VERSION if not already set.
 
-set ASSEMBLY_DIR2="%SPARK_HOME%\assembly\target\scala-2.11"
-set ASSEMBLY_DIR1="%SPARK_HOME%\assembly\target\scala-2.12"
-
-if [%SPARK_SCALA_VERSION%] == [] (
-
-  if exist %ASSEMBLY_DIR2% if exist %ASSEMBLY_DIR1% (
-    echo "Presence of build for multiple Scala versions detected."
-    echo "Either clean one of them or, set SPARK_SCALA_VERSION in spark-env.cmd."
-    exit 1
-  )
-  if exist %ASSEMBLY_DIR2% (
-    set SPARK_SCALA_VERSION=2.11
-  ) else (
-    set SPARK_SCALA_VERSION=2.12
-  )
-)
+rem TODO: revisit for Scala 2.13 support
+set SPARK_SCALA_VERSION=2.12
+rem if [%SPARK_SCALA_VERSION%] == [] (
+rem   set SCALA_VERSION_1=2.12
+rem   set SCALA_VERSION_2=2.11
+rem
+rem   set ASSEMBLY_DIR1=%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_1%
+rem   set ASSEMBLY_DIR2=%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_2%
+rem   set ENV_VARIABLE_DOC=https://spark.apache.org/docs/latest/configuration.html#environment-variables
+rem   if exist %ASSEMBLY_DIR2% if exist %ASSEMBLY_DIR1% (
+rem     echo "Presence of build for multiple Scala versions detected (%ASSEMBLY_DIR1% and %ASSEMBLY_DIR2%)."
+rem     echo "Remove one of them or, set SPARK_SCALA_VERSION=%SCALA_VERSION_1% in %SPARK_ENV_CMD%."
+rem     echo "Visit %ENV_VARIABLE_DOC% for more details about setting environment variables in spark-env.cmd."
+rem     echo "Either clean one of them or, set SPARK_SCALA_VERSION in spark-env.cmd."
+rem     exit 1
+rem   )
+rem   if exist %ASSEMBLY_DIR1% (
+rem     set SPARK_SCALA_VERSION=%SCALA_VERSION_1%
+rem   ) else (
+rem     set SPARK_SCALA_VERSION=%SCALA_VERSION_2%
+rem   )
+rem )
 exit /b 0
-
-:LoadSparkEnv
-if exist "%SPARK_CONF_DIR%\spark-env.cmd" (
-  call "%SPARK_CONF_DIR%\spark-env.cmd"
-)
diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
index 0ada5d8d0fc1..107e7991c28b 100644
--- a/bin/load-spark-env.sh
+++ b/bin/load-spark-env.sh
@@ -43,23 +43,25 @@ fi
 
 # Setting SPARK_SCALA_VERSION if not already set.
 
-if [ -z "$SPARK_SCALA_VERSION" ]; then
-  SCALA_VERSION_1=2.12
-  SCALA_VERSION_2=2.11
-
-  ASSEMBLY_DIR_1="${SPARK_HOME}/assembly/target/scala-${SCALA_VERSION_1}"
-  ASSEMBLY_DIR_2="${SPARK_HOME}/assembly/target/scala-${SCALA_VERSION_2}"
-  ENV_VARIABLE_DOC="https://spark.apache.org/docs/latest/configuration.html#environment-variables"
-  if [[ -d "$ASSEMBLY_DIR_1" && -d "$ASSEMBLY_DIR_2" ]]; then
-    echo "Presence of build for multiple Scala versions detected ($ASSEMBLY_DIR_1 and $ASSEMBLY_DIR_2)." 1>&2
-    echo "Remove one of them or, export SPARK_SCALA_VERSION=$SCALA_VERSION_1 in ${SPARK_ENV_SH}." 1>&2
-    echo "Visit ${ENV_VARIABLE_DOC} for more details about setting environment variables in spark-env.sh." 1>&2
-    exit 1
-  fi
-
-  if [[ -d "$ASSEMBLY_DIR_1" ]]; then
-    export SPARK_SCALA_VERSION=${SCALA_VERSION_1}
-  else
-    export SPARK_SCALA_VERSION=${SCALA_VERSION_2}
-  fi
-fi
+# TODO: revisit for Scala 2.13 support
+export SPARK_SCALA_VERSION=2.12
+#if [ -z "$SPARK_SCALA_VERSION" ]; then
+#  SCALA_VERSION_1=2.12
+#  SCALA_VERSION_2=2.11
+#
+#  ASSEMBLY_DIR_1="${SPARK_HOME}/assembly/target/scala-${SCALA_VERSION_1}"
+#  ASSEMBLY_DIR_2="${SPARK_HOME}/assembly/target/scala-${SCALA_VERSION_2}"
+#  ENV_VARIABLE_DOC="https://spark.apache.org/docs/latest/configuration.html#environment-variables"
+#  if [[ -d "$ASSEMBLY_DIR_1" && -d "$ASSEMBLY_DIR_2" ]]; then
+#    echo "Presence of build for multiple Scala versions detected ($ASSEMBLY_DIR_1 and $ASSEMBLY_DIR_2)." 1>&2
+#    echo "Remove one of them or, export SPARK_SCALA_VERSION=$SCALA_VERSION_1 in ${SPARK_ENV_SH}." 1>&2
+#    echo "Visit ${ENV_VARIABLE_DOC} for more details about setting environment variables in spark-env.sh." 1>&2
+#    exit 1
+#  fi
+#
+#  if [[ -d "$ASSEMBLY_DIR_1" ]]; then
+#    export SPARK_SCALA_VERSION=${SCALA_VERSION_1}
+#  else
+#    export SPARK_SCALA_VERSION=${SCALA_VERSION_2}
+#  fi
+#fi
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index 036c9a60630e..8230533f9d24 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -89,18 +89,6 @@ trait FutureAction[T] extends Future[T] {
    */
   override def value: Option[Try[T]]
 
-  // These two methods must be implemented in Scala 2.12. They're implemented as a no-op here
-  // and then filled in with a real implementation in the two subclasses below. The no-op exists
-  // here so that those implementations can declare "override", necessary in 2.12, while working
-  // in 2.11, where the method doesn't exist in the superclass.
-  // After 2.11 support goes away, remove these two:
-
-  def transform[S](f: (Try[T]) => Try[S])(implicit executor: ExecutionContext): Future[S] =
-    throw new UnsupportedOperationException()
-
-  def transformWith[S](f: (Try[T]) => Future[S])(implicit executor: ExecutionContext): Future[S] =
-    throw new UnsupportedOperationException()
-
   /**
    * Blocks and returns the result of this job.
    */
@@ -117,43 +105,6 @@ trait FutureAction[T] extends Future[T] {
 
 }
 
-/**
- * Scala 2.12 defines the two new transform/transformWith methods mentioned above. Impementing
- * these for 2.12 in the Spark class here requires delegating to these same methods in an
- * underlying Future object. But that only exists in 2.12. But these methods are only called
- * in 2.12. So define helper shims to access these methods on a Future by reflection.
- */
-private[spark] object FutureAction {
-
-  private val transformTryMethod =
-    try {
-      classOf[Future[_]].getMethod("transform", classOf[(_) => _], classOf[ExecutionContext])
-    } catch {
-      case _: NoSuchMethodException => null // Would fail later in 2.11, but not called in 2.11
-    }
-
-  private val transformWithTryMethod =
-    try {
-      classOf[Future[_]].getMethod("transformWith", classOf[(_) => _], classOf[ExecutionContext])
-    } catch {
-      case _: NoSuchMethodException => null // Would fail later in 2.11, but not called in 2.11
-    }
-
-  private[spark] def transform[T, S](
-      future: Future[T],
-      f: (Try[T]) => Try[S],
-      executor: ExecutionContext): Future[S] =
-    transformTryMethod.invoke(future, f, executor).asInstanceOf[Future[S]]
-
-  private[spark] def transformWith[T, S](
-      future: Future[T],
-      f: (Try[T]) => Future[S],
-      executor: ExecutionContext): Future[S] =
-    transformWithTryMethod.invoke(future, f, executor).asInstanceOf[Future[S]]
-
-}
-
-
 /**
  * A [[FutureAction]] holding the result of an action that triggers a single job. Examples include
  * count, collect, reduce.
@@ -195,16 +146,10 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
   def jobIds: Seq[Int] = Seq(jobWaiter.jobId)
 
   override def transform[S](f: (Try[T]) => Try[S])(implicit e: ExecutionContext): Future[S] =
-    FutureAction.transform(
-      jobWaiter.completionFuture,
-      (u: Try[Unit]) => f(u.map(_ => resultFunc)),
-      e)
+    jobWaiter.completionFuture.transform((u: Try[Unit]) => f(u.map(_ => resultFunc)))
 
   override def transformWith[S](f: (Try[T]) => Future[S])(implicit e: ExecutionContext): Future[S] =
-    FutureAction.transformWith(
-      jobWaiter.completionFuture,
-      (u: Try[Unit]) => f(u.map(_ => resultFunc)),
-      e)
+    jobWaiter.completionFuture.transformWith((u: Try[Unit]) => f(u.map(_ => resultFunc)))
 }
 
 
@@ -299,10 +244,10 @@ class ComplexFutureAction[T](run : JobSubmitter => Future[T])
   def jobIds: Seq[Int] = subActions.flatMap(_.jobIds)
 
   override def transform[S](f: (Try[T]) => Try[S])(implicit e: ExecutionContext): Future[S] =
-    FutureAction.transform(p.future, f, e)
+    p.future.transform(f)
 
   override def transformWith[S](f: (Try[T]) => Future[S])(implicit e: ExecutionContext): Future[S] =
-    FutureAction.transformWith(p.future, f, e)
+    p.future.transformWith(f)
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 6480e87c477a..36589e93a1c5 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -21,13 +21,12 @@ import java.io.{IOException, ObjectOutputStream}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.parallel.ForkJoinTaskSupport
-import scala.concurrent.forkjoin.ForkJoinPool
 import scala.reflect.ClassTag
 
 import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
  * Partition for UnionRDD.
@@ -61,7 +60,7 @@ private[spark] class UnionPartition[T: ClassTag](
 
 object UnionRDD {
   private[spark] lazy val partitionEvalTaskSupport =
-    new ForkJoinTaskSupport(new ForkJoinPool(8))
+    new ForkJoinTaskSupport(ThreadUtils.newForkJoinPool("partition-eval-task-support", 8))
 }
 
 @DeveloperApi
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 5f725d86972b..df696a31278a 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -34,8 +34,6 @@ import org.apache.spark.internal.Logging
  */
 private[spark] object ClosureCleaner extends Logging {
 
-  private val isScala2_11 = scala.util.Properties.versionString.contains("2.11")
-
   // Get an ASM class reader for a given class from the JAR that loaded it
   private[util] def getClassReader(cls: Class[_]): ClassReader = {
     // Copy data over, before delegating to ClassReader - else we can run out of open file handles.
@@ -168,9 +166,6 @@ private[spark] object ClosureCleaner extends Logging {
    * @param closure the closure to check.
    */
   private def getSerializedLambda(closure: AnyRef): Option[SerializedLambda] = {
-    if (isScala2_11) {
-      return None
-    }
     val isClosureCandidate =
       closure.getClass.isSynthetic &&
         closure
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index cb0c20541d0d..04b0b4c37df9 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -26,7 +26,6 @@ import scala.language.higherKinds
 import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
 import scala.concurrent.{Awaitable, ExecutionContext, ExecutionContextExecutor, Future}
 import scala.concurrent.duration.{Duration, FiniteDuration}
-import scala.concurrent.forkjoin.{ForkJoinPool => SForkJoinPool, ForkJoinWorkerThread => SForkJoinWorkerThread}
 import scala.util.control.NonFatal
 
 import org.apache.spark.SparkException
@@ -181,17 +180,17 @@ private[spark] object ThreadUtils {
   }
 
   /**
-   * Construct a new Scala ForkJoinPool with a specified max parallelism and name prefix.
+   * Construct a new ForkJoinPool with a specified max parallelism and name prefix.
    */
-  def newForkJoinPool(prefix: String, maxThreadNumber: Int): SForkJoinPool = {
+  def newForkJoinPool(prefix: String, maxThreadNumber: Int): ForkJoinPool = {
     // Custom factory to set thread names
-    val factory = new SForkJoinPool.ForkJoinWorkerThreadFactory {
-      override def newThread(pool: SForkJoinPool) =
-        new SForkJoinWorkerThread(pool) {
+    val factory = new ForkJoinPool.ForkJoinWorkerThreadFactory {
+      override def newThread(pool: ForkJoinPool) =
+        new ForkJoinWorkerThread(pool) {
           setName(prefix + "-" + super.getName)
         }
     }
-    new SForkJoinPool(maxThreadNumber, factory,
+    new ForkJoinPool(maxThreadNumber, factory,
       null, // handler
       false // asyncMode
     )
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index a0f09891787e..8e1a519e187c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -94,8 +94,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   test("add dependencies works correctly") {
     val md = SparkSubmitUtils.getModuleDescriptor
-    val artifacts = SparkSubmitUtils.extractMavenCoordinates("com.databricks:spark-csv_2.11:0.1," +
-      "com.databricks:spark-avro_2.11:0.1")
+    val artifacts = SparkSubmitUtils.extractMavenCoordinates("com.databricks:spark-csv_2.12:0.1," +
+      "com.databricks:spark-avro_2.12:0.1")
 
     SparkSubmitUtils.addDependenciesToIvy(md, artifacts, "default")
     assert(md.getDependencies.length === 2)
@@ -189,7 +189,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   test("neglects Spark and Spark's dependencies") {
     val coordinates = SparkSubmitUtils.IVY_DEFAULT_EXCLUDES
-      .map(comp => s"org.apache.spark:spark-${comp}2.11:2.1.1")
+      .map(comp => s"org.apache.spark:spark-${comp}2.12:2.4.0")
       .mkString(",") + ",org.apache.spark:spark-core_fake:1.2.0"
 
     val path = SparkSubmitUtils.resolveMavenCoordinates(
@@ -197,7 +197,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       SparkSubmitUtils.buildIvySettings(None, None),
       isTest = true)
     assert(path === "", "should return empty path")
-    val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.11", "1.2.0")
+    val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.12", "1.2.0")
     IvyTestUtils.withRepository(main, None, None) { repo =>
       val files = SparkSubmitUtils.resolveMavenCoordinates(
         coordinates + "," + main.toString,
diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
index 3c6660800f17..b7032e81a969 100644
--- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
@@ -74,7 +74,7 @@ class ClosureCleanerSuite extends SparkFunSuite {
       try {
         body
       } catch {
-        case rse: ReturnStatementInClosureException => // Success!
+        case _: ReturnStatementInClosureException => // Success!
         case e @ (_: NotSerializableException | _: SparkException) =>
           fail(s"Expected ReturnStatementInClosureException, but got $e.\n" +
             "This means the closure provided by user is not actually cleaned.")
@@ -122,65 +122,6 @@ class ClosureCleanerSuite extends SparkFunSuite {
     new TestCreateNullValue().run()
   }
 
-  test("SPARK-22328: ClosureCleaner misses referenced superclass fields: case 1") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val concreteObject = new TestAbstractClass {
-      val n2 = 222
-      val s2 = "bbb"
-      val d2 = 2.0d
-
-      def run(): Seq[(Int, Int, String, String, Double, Double)] = {
-        withSpark(new SparkContext("local", "test")) { sc =>
-          val rdd = sc.parallelize(1 to 1)
-          body(rdd)
-        }
-      }
-
-      def body(rdd: RDD[Int]): Seq[(Int, Int, String, String, Double, Double)] = rdd.map { _ =>
-        (n1, n2, s1, s2, d1, d2)
-      }.collect()
-    }
-    assert(concreteObject.run() === Seq((111, 222, "aaa", "bbb", 1.0d, 2.0d)))
-  }
-
-  test("SPARK-22328: ClosureCleaner misses referenced superclass fields: case 2") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val concreteObject = new TestAbstractClass2 {
-      val n2 = 222
-      val s2 = "bbb"
-      val d2 = 2.0d
-      def getData: Int => (Int, Int, String, String, Double, Double) = _ => (n1, n2, s1, s2, d1, d2)
-    }
-    withSpark(new SparkContext("local", "test")) { sc =>
-      val rdd = sc.parallelize(1 to 1).map(concreteObject.getData)
-      assert(rdd.collect() === Seq((111, 222, "aaa", "bbb", 1.0d, 2.0d)))
-    }
-  }
-
-  test("SPARK-22328: multiple outer classes have the same parent class") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val concreteObject = new TestAbstractClass2 {
-
-      val innerObject = new TestAbstractClass2 {
-        override val n1 = 222
-        override val s1 = "bbb"
-      }
-
-      val innerObject2 = new TestAbstractClass2 {
-        override val n1 = 444
-        val n3 = 333
-        val s3 = "ccc"
-        val d3 = 3.0d
-
-        def getData: Int => (Int, Int, String, String, Double, Double, Int, String) =
-          _ => (n1, n3, s1, s3, d1, d3, innerObject.n1, innerObject.s1)
-      }
-    }
-    withSpark(new SparkContext("local", "test")) { sc =>
-      val rdd = sc.parallelize(1 to 1).map(concreteObject.innerObject2.getData)
-      assert(rdd.collect() === Seq((444, 333, "aaa", "ccc", 1.0d, 3.0d, 222, "bbb")))
-    }
-  }
 }
 
 // A non-serializable class we create in closures to make sure that we aren't
@@ -328,13 +269,13 @@ private object TestUserClosuresActuallyCleaned {
     rdd.mapPartitionsWithIndex { (_, it) => return; it }.count()
   }
   def testZipPartitions2(rdd: RDD[Int]): Unit = {
-    rdd.zipPartitions(rdd) { case (it1, it2) => return; it1 }.count()
+    rdd.zipPartitions(rdd) { case (it1, _) => return; it1 }.count()
   }
   def testZipPartitions3(rdd: RDD[Int]): Unit = {
-    rdd.zipPartitions(rdd, rdd) { case (it1, it2, it3) => return; it1 }.count()
+    rdd.zipPartitions(rdd, rdd) { case (it1, _, _) => return; it1 }.count()
   }
   def testZipPartitions4(rdd: RDD[Int]): Unit = {
-    rdd.zipPartitions(rdd, rdd, rdd) { case (it1, it2, it3, it4) => return; it1 }.count()
+    rdd.zipPartitions(rdd, rdd, rdd) { case (it1, _, _, _) => return; it1 }.count()
   }
   def testForeach(rdd: RDD[Int]): Unit = { rdd.foreach { _ => return } }
   def testForeachPartition(rdd: RDD[Int]): Unit = { rdd.foreachPartition { _ => return } }
@@ -374,17 +315,17 @@ private object TestUserClosuresActuallyCleaned {
   // Test SparkContext runJob
   def testRunJob1(sc: SparkContext): Unit = {
     val rdd = sc.parallelize(1 to 10, 10)
-    sc.runJob(rdd, { (ctx: TaskContext, iter: Iterator[Int]) => return; 1 } )
+    sc.runJob(rdd, { (_: TaskContext, _: Iterator[Int]) => return; 1 } )
   }
   def testRunJob2(sc: SparkContext): Unit = {
     val rdd = sc.parallelize(1 to 10, 10)
-    sc.runJob(rdd, { iter: Iterator[Int] => return; 1 } )
+    sc.runJob(rdd, { _: Iterator[Int] => return; 1 } )
   }
   def testRunApproximateJob(sc: SparkContext): Unit = {
     val rdd = sc.parallelize(1 to 10, 10)
     val evaluator = new CountEvaluator(1, 0.5)
     sc.runApproximateJob(
-      rdd, { (ctx: TaskContext, iter: Iterator[Int]) => return; 1L }, evaluator, 1000)
+      rdd, { (_: TaskContext, _: Iterator[Int]) => return; 1L }, evaluator, 1000)
   }
   def testSubmitJob(sc: SparkContext): Unit = {
     val rdd = sc.parallelize(1 to 10, 10)
diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala
index 96da8ec3b2a1..0635b4a358a8 100644
--- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala
@@ -19,8 +19,6 @@ package org.apache.spark.util
 
 import java.io.NotSerializableException
 
-import scala.collection.mutable
-
 import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
 
 import org.apache.spark.{SparkContext, SparkException, SparkFunSuite}
@@ -107,271 +105,6 @@ class ClosureCleanerSuite2 extends SparkFunSuite with BeforeAndAfterAll with Pri
     assertSerializable(closure, serializableAfter)
   }
 
-  /**
-   * Return the fields accessed by the given closure by class.
-   * This also optionally finds the fields transitively referenced through methods invocations.
-   */
-  private def findAccessedFields(
-      closure: AnyRef,
-      outerClasses: Seq[Class[_]],
-      findTransitively: Boolean): Map[Class[_], Set[String]] = {
-    val fields = new mutable.HashMap[Class[_], mutable.Set[String]]
-    outerClasses.foreach { c => fields(c) = new mutable.HashSet[String] }
-    val cr = ClosureCleaner.getClassReader(closure.getClass)
-    if (cr == null) {
-      Map.empty
-    } else {
-      cr.accept(new FieldAccessFinder(fields, findTransitively), 0)
-      fields.mapValues(_.toSet).toMap
-    }
-  }
-
-  // Accessors for private methods
-  private val _isClosure = PrivateMethod[Boolean]('isClosure)
-  private val _getInnerClosureClasses = PrivateMethod[List[Class[_]]]('getInnerClosureClasses)
-  private val _getOuterClassesAndObjects =
-    PrivateMethod[(List[Class[_]], List[AnyRef])]('getOuterClassesAndObjects)
-
-  private def isClosure(obj: AnyRef): Boolean = {
-    ClosureCleaner invokePrivate _isClosure(obj)
-  }
-
-  private def getInnerClosureClasses(closure: AnyRef): List[Class[_]] = {
-    ClosureCleaner invokePrivate _getInnerClosureClasses(closure)
-  }
-
-  private def getOuterClassesAndObjects(closure: AnyRef): (List[Class[_]], List[AnyRef]) = {
-    ClosureCleaner invokePrivate _getOuterClassesAndObjects(closure)
-  }
-
-  test("get inner closure classes") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val closure1 = () => 1
-    val closure2 = () => { () => 1 }
-    val closure3 = (i: Int) => {
-      (1 to i).map { x => x + 1 }.filter { x => x > 5 }
-    }
-    val closure4 = (j: Int) => {
-      (1 to j).flatMap { x =>
-        (1 to x).flatMap { y =>
-          (1 to y).map { z => z + 1 }
-        }
-      }
-    }
-    val inner1 = getInnerClosureClasses(closure1)
-    val inner2 = getInnerClosureClasses(closure2)
-    val inner3 = getInnerClosureClasses(closure3)
-    val inner4 = getInnerClosureClasses(closure4)
-    assert(inner1.isEmpty)
-    assert(inner2.size === 1)
-    assert(inner3.size === 2)
-    assert(inner4.size === 3)
-    assert(inner2.forall(isClosure))
-    assert(inner3.forall(isClosure))
-    assert(inner4.forall(isClosure))
-  }
-
-  test("get outer classes and objects") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val localValue = someSerializableValue
-    val closure1 = () => 1
-    val closure2 = () => localValue
-    val closure3 = () => someSerializableValue
-    val closure4 = () => someSerializableMethod()
-
-    val (outerClasses1, outerObjects1) = getOuterClassesAndObjects(closure1)
-    val (outerClasses2, outerObjects2) = getOuterClassesAndObjects(closure2)
-    val (outerClasses3, outerObjects3) = getOuterClassesAndObjects(closure3)
-    val (outerClasses4, outerObjects4) = getOuterClassesAndObjects(closure4)
-
-    // The classes and objects should have the same size
-    assert(outerClasses1.size === outerObjects1.size)
-    assert(outerClasses2.size === outerObjects2.size)
-    assert(outerClasses3.size === outerObjects3.size)
-    assert(outerClasses4.size === outerObjects4.size)
-
-    // These do not have $outer pointers because they reference only local variables
-    assert(outerClasses1.isEmpty)
-    assert(outerClasses2.isEmpty)
-
-    // These closures do have $outer pointers because they ultimately reference `this`
-    // The first $outer pointer refers to the closure defines this test (see FunSuite#test)
-    // The second $outer pointer refers to ClosureCleanerSuite2
-    assert(outerClasses3.size === 2)
-    assert(outerClasses4.size === 2)
-    assert(isClosure(outerClasses3(0)))
-    assert(isClosure(outerClasses4(0)))
-    assert(outerClasses3(0) === outerClasses4(0)) // part of the same "FunSuite#test" scope
-    assert(outerClasses3(1) === this.getClass)
-    assert(outerClasses4(1) === this.getClass)
-    assert(outerObjects3(1) === this)
-    assert(outerObjects4(1) === this)
-  }
-
-  test("get outer classes and objects with nesting") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val localValue = someSerializableValue
-
-    val test1 = () => {
-      val x = 1
-      val closure1 = () => 1
-      val closure2 = () => x
-      val (outerClasses1, outerObjects1) = getOuterClassesAndObjects(closure1)
-      val (outerClasses2, outerObjects2) = getOuterClassesAndObjects(closure2)
-      assert(outerClasses1.size === outerObjects1.size)
-      assert(outerClasses2.size === outerObjects2.size)
-      // These inner closures only reference local variables, and so do not have $outer pointers
-      assert(outerClasses1.isEmpty)
-      assert(outerClasses2.isEmpty)
-    }
-
-    val test2 = () => {
-      def y = 1
-      val closure1 = () => 1
-      val closure2 = () => y
-      val closure3 = () => localValue
-      val (outerClasses1, outerObjects1) = getOuterClassesAndObjects(closure1)
-      val (outerClasses2, outerObjects2) = getOuterClassesAndObjects(closure2)
-      val (outerClasses3, outerObjects3) = getOuterClassesAndObjects(closure3)
-      assert(outerClasses1.size === outerObjects1.size)
-      assert(outerClasses2.size === outerObjects2.size)
-      assert(outerClasses3.size === outerObjects3.size)
-      // Same as above, this closure only references local variables
-      assert(outerClasses1.isEmpty)
-      // This closure references the "test2" scope because it needs to find the method `y`
-      // Scope hierarchy: "test2" < "FunSuite#test" < ClosureCleanerSuite2
-      assert(outerClasses2.size === 3)
-      // This closure references the "test2" scope because it needs to find the `localValue`
-      // defined outside of this scope
-      assert(outerClasses3.size === 3)
-      assert(isClosure(outerClasses2(0)))
-      assert(isClosure(outerClasses3(0)))
-      assert(isClosure(outerClasses2(1)))
-      assert(isClosure(outerClasses3(1)))
-      assert(outerClasses2(0) === outerClasses3(0)) // part of the same "test2" scope
-      assert(outerClasses2(1) === outerClasses3(1)) // part of the same "FunSuite#test" scope
-      assert(outerClasses2(2) === this.getClass)
-      assert(outerClasses3(2) === this.getClass)
-      assert(outerObjects2(2) === this)
-      assert(outerObjects3(2) === this)
-    }
-
-    test1()
-    test2()
-  }
-
-  test("find accessed fields") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val localValue = someSerializableValue
-    val closure1 = () => 1
-    val closure2 = () => localValue
-    val closure3 = () => someSerializableValue
-    val (outerClasses1, _) = getOuterClassesAndObjects(closure1)
-    val (outerClasses2, _) = getOuterClassesAndObjects(closure2)
-    val (outerClasses3, _) = getOuterClassesAndObjects(closure3)
-
-    val fields1 = findAccessedFields(closure1, outerClasses1, findTransitively = false)
-    val fields2 = findAccessedFields(closure2, outerClasses2, findTransitively = false)
-    val fields3 = findAccessedFields(closure3, outerClasses3, findTransitively = false)
-    assert(fields1.isEmpty)
-    assert(fields2.isEmpty)
-    assert(fields3.size === 2)
-    // This corresponds to the "FunSuite#test" closure. This is empty because the
-    // `someSerializableValue` belongs to its parent (i.e. ClosureCleanerSuite2).
-    assert(fields3(outerClasses3(0)).isEmpty)
-    // This corresponds to the ClosureCleanerSuite2. This is also empty, however,
-    // because accessing a `ClosureCleanerSuite2#someSerializableValue` actually involves a
-    // method call. Since we do not find fields transitively, we will not recursively trace
-    // through the fields referenced by this method.
-    assert(fields3(outerClasses3(1)).isEmpty)
-
-    val fields1t = findAccessedFields(closure1, outerClasses1, findTransitively = true)
-    val fields2t = findAccessedFields(closure2, outerClasses2, findTransitively = true)
-    val fields3t = findAccessedFields(closure3, outerClasses3, findTransitively = true)
-    assert(fields1t.isEmpty)
-    assert(fields2t.isEmpty)
-    assert(fields3t.size === 2)
-    // Because we find fields transitively now, we are able to detect that we need the
-    // $outer pointer to get the field from the ClosureCleanerSuite2
-    assert(fields3t(outerClasses3(0)).size === 1)
-    assert(fields3t(outerClasses3(0)).head === "$outer")
-    assert(fields3t(outerClasses3(1)).size === 1)
-    assert(fields3t(outerClasses3(1)).head.contains("someSerializableValue"))
-  }
-
-  test("find accessed fields with nesting") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val localValue = someSerializableValue
-
-    val test1 = () => {
-      def a = localValue + 1
-      val closure1 = () => 1
-      val closure2 = () => a
-      val closure3 = () => localValue
-      val closure4 = () => someSerializableValue
-      val (outerClasses1, _) = getOuterClassesAndObjects(closure1)
-      val (outerClasses2, _) = getOuterClassesAndObjects(closure2)
-      val (outerClasses3, _) = getOuterClassesAndObjects(closure3)
-      val (outerClasses4, _) = getOuterClassesAndObjects(closure4)
-
-      // First, find only fields accessed directly, not transitively, by these closures
-      val fields1 = findAccessedFields(closure1, outerClasses1, findTransitively = false)
-      val fields2 = findAccessedFields(closure2, outerClasses2, findTransitively = false)
-      val fields3 = findAccessedFields(closure3, outerClasses3, findTransitively = false)
-      val fields4 = findAccessedFields(closure4, outerClasses4, findTransitively = false)
-      assert(fields1.isEmpty)
-      // Note that the size here represents the number of outer classes, not the number of fields
-      // "test1" < parameter of "FunSuite#test" < ClosureCleanerSuite2
-      assert(fields2.size === 3)
-      // Since we do not find fields transitively here, we do not look into what `def a` references
-      assert(fields2(outerClasses2(0)).isEmpty) // This corresponds to the "test1" scope
-      assert(fields2(outerClasses2(1)).isEmpty) // This corresponds to the "FunSuite#test" scope
-      assert(fields2(outerClasses2(2)).isEmpty) // This corresponds to the ClosureCleanerSuite2
-      assert(fields3.size === 3)
-      // Note that `localValue` is a field of the "test1" scope because `def a` references it,
-      // but NOT a field of the "FunSuite#test" scope because it is only a local variable there
-      assert(fields3(outerClasses3(0)).size === 1)
-      assert(fields3(outerClasses3(0)).head.contains("localValue"))
-      assert(fields3(outerClasses3(1)).isEmpty)
-      assert(fields3(outerClasses3(2)).isEmpty)
-      assert(fields4.size === 3)
-      // Because `val someSerializableValue` is an instance variable, even an explicit reference
-      // here actually involves a method call to access the underlying value of the variable.
-      // Because we are not finding fields transitively here, we do not consider the fields
-      // accessed by this "method" (i.e. the val's accessor).
-      assert(fields4(outerClasses4(0)).isEmpty)
-      assert(fields4(outerClasses4(1)).isEmpty)
-      assert(fields4(outerClasses4(2)).isEmpty)
-
-      // Now do the same, but find fields that the closures transitively reference
-      val fields1t = findAccessedFields(closure1, outerClasses1, findTransitively = true)
-      val fields2t = findAccessedFields(closure2, outerClasses2, findTransitively = true)
-      val fields3t = findAccessedFields(closure3, outerClasses3, findTransitively = true)
-      val fields4t = findAccessedFields(closure4, outerClasses4, findTransitively = true)
-      assert(fields1t.isEmpty)
-      assert(fields2t.size === 3)
-      assert(fields2t(outerClasses2(0)).size === 1) // `def a` references `localValue`
-      assert(fields2t(outerClasses2(0)).head.contains("localValue"))
-      assert(fields2t(outerClasses2(1)).isEmpty)
-      assert(fields2t(outerClasses2(2)).isEmpty)
-      assert(fields3t.size === 3)
-      assert(fields3t(outerClasses3(0)).size === 1) // as before
-      assert(fields3t(outerClasses3(0)).head.contains("localValue"))
-      assert(fields3t(outerClasses3(1)).isEmpty)
-      assert(fields3t(outerClasses3(2)).isEmpty)
-      assert(fields4t.size === 3)
-      // Through a series of method calls, we are able to detect that we ultimately access
-      // ClosureCleanerSuite2's field `someSerializableValue`. Along the way, we also accessed
-      // a few $outer parent pointers to get to the outermost object.
-      assert(fields4t(outerClasses4(0)) === Set("$outer"))
-      assert(fields4t(outerClasses4(1)) === Set("$outer"))
-      assert(fields4t(outerClasses4(2)).size === 1)
-      assert(fields4t(outerClasses4(2)).head.contains("someSerializableValue"))
-    }
-
-    test1()
-  }
-
   test("clean basic serializable closures") {
     val localValue = someSerializableValue
     val closure1 = () => 1
@@ -452,12 +185,12 @@ class ClosureCleanerSuite2 extends SparkFunSuite with BeforeAndAfterAll with Pri
     val closure2 = (j: Int) => { (1 to j).map { x => x + someSerializableMethod() } }
     val closure4 = (k: Int) => { (1 to k).map { x => x + localSerializableMethod() } }
     // This closure references a local non-serializable value
-    val closure3 = (l: Int) => { (1 to l).map { x => localNonSerializableValue } }
+    val closure3 = (l: Int) => { (1 to l).map { _ => localNonSerializableValue } }
     // This is non-serializable no matter how many levels we nest it
     val closure5 = (m: Int) => {
       (1 to m).foreach { x =>
         (1 to x).foreach { y =>
-          (1 to y).foreach { z =>
+          (1 to y).foreach { _ =>
             someSerializableValue
           }
         }
@@ -542,23 +275,8 @@ class ClosureCleanerSuite2 extends SparkFunSuite with BeforeAndAfterAll with Pri
 
       // As before, this closure is neither serializable nor cleanable
       verifyCleaning(inner1, serializableBefore = false, serializableAfter = false)
-
-      if (ClosureCleanerSuite2.supportsLMFs) {
-        verifyCleaning(
-          inner2, serializableBefore = true, serializableAfter = true)
-      } else {
-        // This closure is no longer serializable because it now has a pointer to the outer closure,
-        // which is itself not serializable because it has a pointer to the ClosureCleanerSuite2.
-        // If we do not clean transitively, we will not null out this indirect reference.
-        verifyCleaning(
-          inner2, serializableBefore = false, serializableAfter = false, transitive = false)
-
-        // If we clean transitively, we will find that method `a` does not actually reference the
-        // outer closure's parent (i.e. the ClosureCleanerSuite), so we can additionally null out
-        // the outer closure's parent pointer. This will make `inner2` serializable.
-        verifyCleaning(
-          inner2, serializableBefore = false, serializableAfter = true, transitive = true)
-      }
+      verifyCleaning(
+        inner2, serializableBefore = true, serializableAfter = true)
     }
 
     // Same as above, but with more levels of nesting
@@ -575,25 +293,4 @@ class ClosureCleanerSuite2 extends SparkFunSuite with BeforeAndAfterAll with Pri
     test6()()()
   }
 
-  test("verify nested non-LMF closures") {
-    assume(ClosureCleanerSuite2.supportsLMFs)
-    class A1(val f: Int => Int)
-    class A2(val f: Int => Int => Int)
-    class B extends A1(x => x*x)
-    class C extends A2(x => new B().f )
-    val closure1 = new B().f
-    val closure2 = new C().f
-    // serializable already
-    verifyCleaning(closure1, serializableBefore = true, serializableAfter = true)
-    // brings in deps that can't be cleaned
-    verifyCleaning(closure2, serializableBefore = false, serializableAfter = false)
-  }
-}
-
-object ClosureCleanerSuite2 {
-  // Scala 2.12 allows better interop with Java 8 via lambda syntax. This is supported
-  // by implementing FunctionN classes in Scala’s standard library as Single Abstract
-  // Method (SAM) types. Lambdas are implemented via the invokedynamic instruction and
-  // the use of the LambdaMwtaFactory (LMF) machanism.
-  val supportsLMFs = scala.util.Properties.versionString.contains("2.12")
 }
diff --git a/dev/change-scala-version.sh b/dev/change-scala-version.sh
index 022e68c2796c..4054d530d065 100755
--- a/dev/change-scala-version.sh
+++ b/dev/change-scala-version.sh
@@ -19,7 +19,7 @@
 
 set -e
 
-VALID_VERSIONS=( 2.11 2.12 )
+VALID_VERSIONS=( 2.12 )
 
 usage() {
   echo "Usage: $(basename $0) [-h|--help] <version>
@@ -44,10 +44,10 @@ check_scala_version() {
 
 check_scala_version "$TO_VERSION"
 
-if [ $TO_VERSION = "2.11" ]; then
+if [ $TO_VERSION = "2.13" ]; then
   FROM_VERSION="2.12"
 else
-  FROM_VERSION="2.11"
+  FROM_VERSION="2.13"
 fi
 
 sed_i() {
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 35deadfbbfb3..f35bc4f48652 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -110,16 +110,20 @@ fi
 # Depending on the version being built, certain extra profiles need to be activated, and
 # different versions of Scala are supported.
 BASE_PROFILES="-Pmesos -Pyarn"
-PUBLISH_SCALA_2_10=0
-SCALA_2_10_PROFILES="-Pscala-2.10"
-SCALA_2_11_PROFILES=
 if [[ $SPARK_VERSION > "2.3" ]]; then
   BASE_PROFILES="$BASE_PROFILES -Pkubernetes"
+fi
+
+# TODO: revisit for Scala 2.13
+
+PUBLISH_SCALA_2_11=1
+SCALA_2_11_PROFILES="-Pscala-2.11"
+if [[ $SPARK_VERSION > "2.3" ]]; then
   if [[ $SPARK_VERSION < "3.0." ]]; then
-    SCALA_2_11_PROFILES="-Pkafka-0-8 -Pflume"
+    SCALA_2_11_PROFILES="-Pkafka-0-8 -Pflume $SCALA_2_11_PROFILES"
+  else
+    PUBLISH_SCALA_2_11=0
   fi
-else
-  PUBLISH_SCALA_2_10=1
 fi
 
 PUBLISH_SCALA_2_12=0
@@ -138,22 +142,10 @@ PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-a
 # Profiles for building binary releases
 BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr"
 
-if [[ ! $SPARK_VERSION < "2.2." ]]; then
-  if [[ $JAVA_VERSION < "1.8." ]]; then
-    echo "Java version $JAVA_VERSION is less than required 1.8 for 2.2+"
-    echo "Please set JAVA_HOME correctly."
-    exit 1
-  fi
-else
-  if ! [[ $JAVA_VERSION =~ 1\.7\..* ]]; then
-    if [ -z "$JAVA_7_HOME" ]; then
-      echo "Java version $JAVA_VERSION is higher than required 1.7 for pre-2.2"
-      echo "Please set JAVA_HOME correctly."
-      exit 1
-    else
-      export JAVA_HOME="$JAVA_7_HOME"
-    fi
-  fi
+if [[ $JAVA_VERSION < "1.8." ]]; then
+  echo "Java version $JAVA_VERSION is less than required 1.8 for 2.2+"
+  echo "Please set JAVA_HOME correctly."
+  exit 1
 fi
 
 # This is a band-aid fix to avoid the failure of Maven nightly snapshot in some Jenkins
@@ -221,9 +213,7 @@ if [[ "$1" == "package" ]]; then
     cp -r spark spark-$SPARK_VERSION-bin-$NAME
     cd spark-$SPARK_VERSION-bin-$NAME
 
-    if [[ "$SCALA_VERSION" != "2.11" ]]; then
-      ./dev/change-scala-version.sh $SCALA_VERSION
-    fi
+    ./dev/change-scala-version.sh $SCALA_VERSION
 
     export ZINC_PORT=$ZINC_PORT
     echo "Creating distribution: $NAME ($FLAGS)"
@@ -292,31 +282,29 @@ if [[ "$1" == "package" ]]; then
     if [[ $SPARK_VERSION < "3.0." ]]; then
       BINARY_PKGS_ARGS["hadoop2.6"]="-Phadoop-2.6 $HIVE_PROFILES"
     fi
-    if [[ $SPARK_VERSION < "2.2." ]]; then
-      BINARY_PKGS_ARGS["hadoop2.4"]="-Phadoop-2.4 $HIVE_PROFILES"
-      BINARY_PKGS_ARGS["hadoop2.3"]="-Phadoop-2.3 $HIVE_PROFILES"
-    fi
   fi
 
   declare -A BINARY_PKGS_EXTRA
   BINARY_PKGS_EXTRA["hadoop2.7"]="withpip,withr"
 
-  echo "Packages to build: ${!BINARY_PKGS_ARGS[@]}"
-  for key in ${!BINARY_PKGS_ARGS[@]}; do
-    args=${BINARY_PKGS_ARGS[$key]}
-    extra=${BINARY_PKGS_EXTRA[$key]}
+  if [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
+    key="without-hadoop-scala-2.11"
+    args="-Phadoop-provided"
+    extra=""
     if ! make_binary_release "$key" "$SCALA_2_11_PROFILES $args" "$extra" "2.11"; then
       error "Failed to build $key package. Check logs for details."
     fi
-  done
+  fi
 
   if [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
-    key="without-hadoop-scala-2.12"
-    args="-Phadoop-provided"
-    extra=""
-    if ! make_binary_release "$key" "$SCALA_2_12_PROFILES $args" "$extra" "2.12"; then
-      error "Failed to build $key package. Check logs for details."
-    fi
+    echo "Packages to build: ${!BINARY_PKGS_ARGS[@]}"
+    for key in ${!BINARY_PKGS_ARGS[@]}; do
+      args=${BINARY_PKGS_ARGS[$key]}
+      extra=${BINARY_PKGS_EXTRA[$key]}
+      if ! make_binary_release "$key" "$SCALA_2_12_PROFILES $args" "$extra" "2.12"; then
+        error "Failed to build $key package. Check logs for details."
+      fi
+    done
   fi
 
   rm -rf spark-$SPARK_VERSION-bin-*/
@@ -391,10 +379,7 @@ if [[ "$1" == "publish-snapshot" ]]; then
   # Generate random point for Zinc
   export ZINC_PORT=$(python -S -c "import random; print random.randrange(3030,4030)")
 
-  $MVN -DzincPort=$ZINC_PORT --settings $tmp_settings -DskipTests $SCALA_2_11_PROFILES $PUBLISH_PROFILES deploy
-  #./dev/change-scala-version.sh 2.12
-  #$MVN -DzincPort=$ZINC_PORT --settings $tmp_settings \
-  #  -DskipTests $SCALA_2_12_PROFILES $PUBLISH_PROFILES clean deploy
+  $MVN -DzincPort=$ZINC_PORT --settings $tmp_settings -DskipTests $SCALA_2_12_PROFILES $PUBLISH_PROFILES deploy
 
   rm $tmp_settings
   cd ..
@@ -426,22 +411,20 @@ if [[ "$1" == "publish-release" ]]; then
   # Generate random point for Zinc
   export ZINC_PORT=$(python -S -c "import random; print random.randrange(3030,4030)")
 
-  $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
+  # TODO: revisit for Scala 2.13 support
 
-  if ! is_dry_run && [[ $PUBLISH_SCALA_2_10 = 1 ]]; then
-    ./dev/change-scala-version.sh 2.10
-    $MVN -DzincPort=$((ZINC_PORT + 1)) -Dmaven.repo.local=$tmp_repo -Dscala-2.10 \
-      -DskipTests $PUBLISH_PROFILES $SCALA_2_10_PROFILES clean install
+  if ! is_dry_run && [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
+    ./dev/change-scala-version.sh 2.11
+    $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests \
+      $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
   fi
 
   if ! is_dry_run && [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
     ./dev/change-scala-version.sh 2.12
-    $MVN -DzincPort=$((ZINC_PORT + 2)) -Dmaven.repo.local=$tmp_repo -Dscala-2.12 \
-      -DskipTests $PUBLISH_PROFILES $SCALA_2_12_PROFILES clean install
+    $MVN -DzincPort=$((ZINC_PORT + 2)) -Dmaven.repo.local=$tmp_repo -DskipTests \
+      $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
   fi
 
-  ./dev/change-scala-version.sh 2.11
-
   pushd $tmp_repo/org/apache/spark
 
   # Remove any extra files generated during install
diff --git a/docs/building-spark.md b/docs/building-spark.md
index b16083f9f828..ef2f2bf231c3 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -13,7 +13,7 @@ redirect_from: "building-with-maven.html"
 
 The Maven-based build is the build of reference for Apache Spark.
 Building Spark using Maven requires Maven 3.6.0 and Java 8.
-Note that support for Java 7 was removed as of Spark 2.2.0.
+Spark requires Scala 2.12; support for Scala 2.11 was removed in Spark 3.0.0.
 
 ### Setting up Maven's Memory Usage
 
@@ -246,17 +246,18 @@ or
 
 ## Change Scala Version
 
-To build Spark using another supported Scala version, please change the major Scala version using (e.g. 2.11):
+When other versions of Scala like 2.13 are supported, it will be possible to build for that version.
+Change the major Scala version using (e.g. 2.13):
 
-    ./dev/change-scala-version.sh 2.11
+    ./dev/change-scala-version.sh 2.13
 
-For Maven, please enable the profile (e.g. 2.11):
+For Maven, please enable the profile (e.g. 2.13):
 
-    ./build/mvn -Pscala-2.11 compile
+    ./build/mvn -Pscala-2.13 compile
 
-For SBT, specify a complete scala version using (e.g. 2.11.12):
+For SBT, specify a complete scala version using (e.g. 2.13.0):
 
-    ./build/sbt -Dscala.version=2.11.12
+    ./build/sbt -Dscala.version=2.13.0
 
 Otherwise, the sbt-pom-reader plugin will use the `scala.version` specified in the spark-parent pom.
 
diff --git a/docs/index.md b/docs/index.md
index a85dd9e553ed..59cd3f6faa1e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -31,7 +31,8 @@ Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). It's easy
 locally on one machine --- all you need is to have `java` installed on your system `PATH`,
 or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 8+, Python 2.7+/3.4+ and R 3.1+. R prior to version 3.4 support is deprecated as of Spark 3.0.0.
+Spark runs on Java 8+, Scala 2.12, Python 2.7+/3.4+ and R 3.1+.
+R prior to version 3.4 support is deprecated as of Spark 3.0.0.
 For the Scala API, Spark {{site.SPARK_VERSION}}
 uses Scala {{site.SCALA_BINARY_VERSION}}. You will need to use a compatible Scala version
 ({{site.SCALA_BINARY_VERSION}}.x).
diff --git a/docs/storage-openstack-swift.md b/docs/storage-openstack-swift.md
index dacaa3438d48..3264711df74a 100644
--- a/docs/storage-openstack-swift.md
+++ b/docs/storage-openstack-swift.md
@@ -29,7 +29,7 @@ For example, for Maven support, add the following to the <code>pom.xml</code> fi
   ...
   <dependency>
     <groupId>org.apache.spark</groupId>
-    <artifactId>hadoop-cloud_2.11</artifactId>
+    <artifactId>hadoop-cloud_2.12</artifactId>
     <version>${spark.version}</version>
   </dependency>
   ...
diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile
index c70cd7136767..224e7031fd43 100644
--- a/external/docker/spark-test/base/Dockerfile
+++ b/external/docker/spark-test/base/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && \
     apt-get install -y less openjdk-8-jre-headless iproute2 vim-tiny sudo openssh-server && \
     rm -rf /var/lib/apt/lists/*
 
-ENV SCALA_VERSION 2.11.8
+ENV SCALA_VERSION 2.12.8
 ENV CDH_VERSION cdh4
 ENV SCALA_HOME /opt/scala-$SCALA_VERSION
 ENV SPARK_HOME /opt/spark
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
deleted file mode 100644
index 5325978a0a1e..000000000000
--- a/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.util
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.util.ClosureCleanerSuite2
-
-
-// scalastyle:off println
-class BytecodeUtilsSuite extends SparkFunSuite {
-
-  import BytecodeUtilsSuite.TestClass
-
-  test("closure invokes a method") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val c1 = {e: TestClass => println(e.foo); println(e.bar); println(e.baz); }
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-
-    val c2 = {e: TestClass => println(e.foo); println(e.bar); }
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "foo"))
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "bar"))
-    assert(!BytecodeUtils.invokedMethod(c2, classOf[TestClass], "baz"))
-
-    val c3 = {e: TestClass => println(e.foo); }
-    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "bar"))
-    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "baz"))
-  }
-
-  test("closure inside a closure invokes a method") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val c1 = {e: TestClass => println(e.foo); println(e.bar); println(e.baz); }
-    val c2 = {e: TestClass => c1(e); println(e.foo); }
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "foo"))
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "baz"))
-  }
-
-  test("closure inside a closure inside a closure invokes a method") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val c1 = {e: TestClass => println(e.baz); }
-    val c2 = {e: TestClass => c1(e); println(e.foo); }
-    val c3 = {e: TestClass => c2(e) }
-    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "baz"))
-  }
-
-  test("closure calling a function that invokes a method") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    def zoo(e: TestClass) {
-      println(e.baz)
-    }
-    val c1 = {e: TestClass => zoo(e)}
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-  }
-
-  test("closure calling a function that invokes a method which uses another closure") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val c2 = {e: TestClass => println(e.baz)}
-    def zoo(e: TestClass) {
-      c2(e)
-    }
-    val c1 = {e: TestClass => zoo(e)}
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-  }
-
-  test("nested closure") {
-    assume(!ClosureCleanerSuite2.supportsLMFs)
-    val c2 = {e: TestClass => println(e.baz)}
-    def zoo(e: TestClass, c: TestClass => Unit) {
-      c(e)
-    }
-    val c1 = {e: TestClass => zoo(e, c2)}
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-  }
-
-  // The following doesn't work yet, because the byte code doesn't contain any information
-  // about what exactly "c" is.
-//  test("invoke interface") {
-//    val c1 = {e: TestClass => c(e)}
-//    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-//    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-//    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-//  }
-
-  private val c = {e: TestClass => println(e.baz)}
-}
-
-// scalastyle:on println
-
-object BytecodeUtilsSuite {
-  class TestClass(val foo: Int, val bar: Long) {
-    def baz: Boolean = false
-  }
-}
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index 56edceb17bfb..3ae4633c79b0 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -235,17 +235,20 @@ String getScalaVersion() {
       return scala;
     }
     String sparkHome = getSparkHome();
+    // TODO: revisit for Scala 2.13 support
     File scala212 = new File(sparkHome, "launcher/target/scala-2.12");
-    File scala211 = new File(sparkHome, "launcher/target/scala-2.11");
-    checkState(!scala212.isDirectory() || !scala211.isDirectory(),
-      "Presence of build for multiple Scala versions detected.\n" +
-      "Either clean one of them or set SPARK_SCALA_VERSION in your environment.");
-    if (scala212.isDirectory()) {
-      return "2.12";
-    } else {
-      checkState(scala211.isDirectory(), "Cannot find any build directories.");
-      return "2.11";
-    }
+    // File scala211 = new File(sparkHome, "launcher/target/scala-2.11");
+    // checkState(!scala212.isDirectory() || !scala211.isDirectory(),
+    //   "Presence of build for multiple Scala versions detected.\n" +
+    //   "Either clean one of them or set SPARK_SCALA_VERSION in your environment.");
+    // if (scala212.isDirectory()) {
+    //   return "2.12";
+    // } else {
+    //   checkState(scala211.isDirectory(), "Cannot find any build directories.");
+    //   return "2.11";
+    // }
+    checkState(scala212.isDirectory(), "Cannot find any build directories.");
+    return "2.12";
   }
 
   String getSparkHome() {
diff --git a/pom.xml b/pom.xml
index 19775050dee7..0cce66472be6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -774,7 +774,6 @@
         <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
         <version>1.1.0</version>
       </dependency>
-      <!-- SPARK-16770 affecting Scala 2.11.x -->
       <dependency>
         <groupId>jline</groupId>
         <artifactId>jline</artifactId>
@@ -2719,41 +2718,6 @@
       <id>scala-2.12</id>
     </profile>
 
-    <profile>
-      <id>scala-2.11</id>
-      <properties>
-        <scala.version>2.11.12</scala.version>
-        <scala.binary.version>2.11</scala.binary.version>
-      </properties>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-enforcer-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>enforce-versions</id>
-                <goals>
-                  <goal>enforce</goal>
-                </goals>
-                <configuration>
-                  <rules>
-                    <bannedDependencies>
-                      <excludes combine.self="override">
-                        <exclude>org.jboss.netty</exclude>
-                        <exclude>org.codehaus.groovy</exclude>
-                        <exclude>*:*_2.10</exclude>
-                      </excludes>
-                    </bannedDependencies>
-                  </rules>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-
     <!--
      This is a profile to enable the use of the ASF snapshot and staging repositories
      during a build. It is useful when testing against nightly or RC releases of dependencies.
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index eaea6f0d0341..fb92dfabfe52 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -59,6 +59,10 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LassoModel.formatVersion"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionModel.formatVersion"),
 
+    // [SPARK-26132] Remove support for Scala 2.11 in Spark 3.0.0
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("scala.concurrent.Future.transformWith"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("scala.concurrent.Future.transform"),
+
     // [SPARK-26254][CORE] Extract Hive + Kafka dependencies from Core.
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.deploy.security.HiveDelegationTokenProvider"),
 
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 59215a48bae1..2036dc011a14 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -93,6 +93,8 @@ object SparkBuild extends PomBuild {
         v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
     }
 
+    // TODO: revisit for Scala 2.13 support
+    /*
     Option(System.getProperty("scala.version"))
       .filter(_.startsWith("2.11"))
       .foreach { versionString =>
@@ -104,6 +106,7 @@ object SparkBuild extends PomBuild {
       // see: https://github.com/apache/maven/blob/maven-3.0.4/maven-embedder/src/main/java/org/apache/maven/cli/MavenCli.java#L1082
       System.setProperty("scala-2.11", "true")
     }
+     */
     profiles
   }
 
@@ -854,12 +857,15 @@ object CopyDependencies {
 object TestSettings {
   import BuildCommons._
 
-  private val scalaBinaryVersion =
+  // TODO revisit for Scala 2.13 support
+  private val scalaBinaryVersion = "2.12"
+    /*
     if (System.getProperty("scala-2.11") == "true") {
       "2.11"
     } else {
       "2.12"
     }
+     */
   lazy val settings = Seq (
     // Fork new JVMs for tests and set Java options for those
     fork := true,
diff --git a/python/run-tests.py b/python/run-tests.py
index 54db79c74aa5..793035f5d83a 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -58,7 +58,8 @@ def print_red(text):
 LOGGER = logging.getLogger()
 
 # Find out where the assembly jars are located.
-for scala in ["2.11", "2.12"]:
+# TODO: revisit for Scala 2.13
+for scala in ["2.12"]:
     build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
     if os.path.isdir(build_dir):
         SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index aa9aa2793b8b..ed3c672e7daf 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -32,7 +32,7 @@ import scala.tools.nsc.interpreter.{AbstractOrMissingHandler, ILoop, IMain, JPri
 import scala.tools.nsc.interpreter.{NamedParam, SimpleReader, SplashLoop, SplashReader}
 import scala.tools.nsc.interpreter.StdReplTags.tagOfIMain
 import scala.tools.nsc.util.stringFromStream
-import scala.util.Properties.{javaVersion, javaVmName, versionNumberString, versionString}
+import scala.util.Properties.{javaVersion, javaVmName, versionString}
 
 /**
  *  A Spark-specific interactive shell.
@@ -42,32 +42,6 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
   def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out)
   def this() = this(None, new JPrintWriter(Console.out, true))
 
-  /**
-   * TODO: Remove the following `override` when the support of Scala 2.11 is ended
-   * Scala 2.11 has a bug of finding imported types in class constructors, extends clause
-   * which is fixed in Scala 2.12 but never be back-ported into Scala 2.11.x.
-   * As a result, we copied the fixes into `SparkILoopInterpreter`. See SPARK-22393 for detail.
-   */
-  override def createInterpreter(): Unit = {
-    if (isScala2_11) {
-      if (addedClasspath != "") {
-        settings.classpath append addedClasspath
-      }
-      // scalastyle:off classforname
-      // Have to use the default classloader to match the one used in
-      // `classOf[Settings]` and `classOf[JPrintWriter]`.
-      intp = Class.forName("org.apache.spark.repl.SparkILoopInterpreter")
-        .getDeclaredConstructor(Seq(classOf[Settings], classOf[JPrintWriter]): _*)
-        .newInstance(Seq(settings, out): _*)
-        .asInstanceOf[IMain]
-      // scalastyle:on classforname
-    } else {
-      super.createInterpreter()
-    }
-  }
-
-  private val isScala2_11 = versionNumberString.startsWith("2.11")
-
   val initializationCommands: Seq[String] = Seq(
     """
     @transient val spark = if (org.apache.spark.repl.Main.sparkSession != null) {
@@ -145,26 +119,6 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
     super.replay()
   }
 
-  /**
-   * TODO: Remove `runClosure` when the support of Scala 2.11 is ended
-   */
-  private def runClosure(body: => Boolean): Boolean = {
-    if (isScala2_11) {
-      // In Scala 2.11, there is a bug that interpret could set the current thread's
-      // context classloader, but fails to reset it to its previous state when returning
-      // from that method. This is fixed in SI-8521 https://github.com/scala/scala/pull/5657
-      // which is never back-ported into Scala 2.11.x. The following is a workaround fix.
-      val original = Thread.currentThread().getContextClassLoader
-      try {
-        body
-      } finally {
-        Thread.currentThread().setContextClassLoader(original)
-      }
-    } else {
-      body
-    }
-  }
-
   /**
    * The following code is mostly a copy of `process` implementation in `ILoop.scala` in Scala
    *
@@ -179,7 +133,7 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
    * We should remove this duplication once Scala provides a way to load our custom initialization
    * code, and also customize the ordering of printing welcome message.
    */
-  override def process(settings: Settings): Boolean = runClosure {
+  override def process(settings: Settings): Boolean = {
 
     def newReader = in0.fold(chooseReader(settings))(r => SimpleReader(r, out, interactive = true))
 
diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 57c26e10060d..ea8286124a68 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -117,7 +117,7 @@ configuration is provided in `dev/spark-rbac.yaml`.
 If you prefer to run just the integration tests directly, then you can customise the behaviour via passing system 
 properties to Maven.  For example:
 
-    mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.11 \
+    mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.12 \
                             -Pkubernetes -Pkubernetes-integration-tests \ 
                             -Phadoop-2.7 -Dhadoop.version=2.7.4 \
                             -Dspark.kubernetes.test.sparkTgz=spark-3.0.0-SNAPSHOT-bin-example.tgz \
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 443ce8cc46a3..5e28cfb84eed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution
 
 import java.io.Writer
 import java.util.Locale
-import java.util.function.Supplier
 
 import scala.collection.mutable
 import scala.util.control.NonFatal
@@ -579,9 +578,7 @@ object WholeStageCodegenId {
   // is created, e.g. for special fallback handling when an existing WholeStageCodegenExec
   // failed to generate/compile code.
 
-  private val codegenStageCounter = ThreadLocal.withInitial(new Supplier[Integer] {
-    override def get() = 1  // TODO: change to Scala lambda syntax when upgraded to Scala 2.12+
-  })
+  private val codegenStageCounter: ThreadLocal[Integer] = ThreadLocal.withInitial(() => 1)
 
   def resetPerQuery(): Unit = codegenStageCounter.set(1)
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index d3640086e74a..8a361486d093 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -144,8 +144,9 @@ class HiveSparkSubmitSuite
     // Before the fix in SPARK-8470, this results in a MissingRequirementError because
     // the HiveContext code mistakenly overrides the class loader that contains user classes.
     // For more detail, see sql/hive/src/test/resources/regression-test-SPARK-8489/*scala.
+    // TODO: revisit for Scala 2.13 support
     val version = Properties.versionNumberString match {
-      case v if v.startsWith("2.12") || v.startsWith("2.11") => v.substring(0, 4)
+      case v if v.startsWith("2.12") => v.substring(0, 4)
       case x => throw new Exception(s"Unsupported Scala Version: $x")
     }
     val jarDir = getTestResourcePath("regression-test-SPARK-8489")

From 594be7a911584a05f875f217ac39af9a5eeff049 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Mon, 25 Mar 2019 11:26:09 -0700
Subject: [PATCH 0766/1072] [SPARK-27240][PYTHON] Use pandas DataFrame for
 struct type argument in Scalar Pandas UDF.

## What changes were proposed in this pull request?

Now that we support returning pandas DataFrame for struct type in Scalar Pandas UDF.

If we chain another Pandas UDF after the Scalar Pandas UDF returning pandas DataFrame, the argument of the chained UDF will be pandas DataFrame, but currently we don't support pandas DataFrame as an argument of Scalar Pandas UDF. That means there is an inconsistency between the chained UDF and the single UDF.

We should support taking pandas DataFrame for struct type argument in Scalar Pandas UDF to be consistent.
Currently pyarrow >=0.11 is supported.

## How was this patch tested?

Modified and added some tests.

Closes #24177 from ueshin/issues/SPARK-27240/structtype_argument.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 python/pyspark/serializers.py                 | 29 +++++++++++++---
 .../sql/tests/test_pandas_udf_scalar.py       | 33 +++++++++++++++++++
 python/pyspark/sql/types.py                   | 10 ++++++
 python/pyspark/worker.py                      |  6 +++-
 4 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 58f7552cab49..ed419db2df6c 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -260,11 +260,10 @@ def __init__(self, timezone, safecheck, assign_cols_by_name):
         self._safecheck = safecheck
         self._assign_cols_by_name = assign_cols_by_name
 
-    def arrow_to_pandas(self, arrow_column):
-        from pyspark.sql.types import from_arrow_type, \
-            _arrow_column_to_pandas, _check_series_localize_timestamps
+    def arrow_to_pandas(self, arrow_column, data_type):
+        from pyspark.sql.types import _arrow_column_to_pandas, _check_series_localize_timestamps
 
-        s = _arrow_column_to_pandas(arrow_column, from_arrow_type(arrow_column.type))
+        s = _arrow_column_to_pandas(arrow_column, data_type)
         s = _check_series_localize_timestamps(s, self._timezone)
         return s
 
@@ -366,8 +365,10 @@ def load_stream(self, stream):
         """
         batches = super(ArrowStreamPandasSerializer, self).load_stream(stream)
         import pyarrow as pa
+        from pyspark.sql.types import from_arrow_type
         for batch in batches:
-            yield [self.arrow_to_pandas(c) for c in pa.Table.from_batches([batch]).itercolumns()]
+            yield [self.arrow_to_pandas(c, from_arrow_type(c.type))
+                   for c in pa.Table.from_batches([batch]).itercolumns()]
 
     def __repr__(self):
         return "ArrowStreamPandasSerializer"
@@ -378,6 +379,24 @@ class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
     Serializer used by Python worker to evaluate Pandas UDFs
     """
 
+    def __init__(self, timezone, safecheck, assign_cols_by_name, df_for_struct=False):
+        super(ArrowStreamPandasUDFSerializer, self) \
+            .__init__(timezone, safecheck, assign_cols_by_name)
+        self._df_for_struct = df_for_struct
+
+    def arrow_to_pandas(self, arrow_column, data_type):
+        from pyspark.sql.types import StructType, \
+            _arrow_column_to_pandas, _check_dataframe_localize_timestamps
+
+        if self._df_for_struct and type(data_type) == StructType:
+            import pandas as pd
+            series = [_arrow_column_to_pandas(column, field.dataType).rename(field.name)
+                      for column, field in zip(arrow_column.flatten(), data_type)]
+            s = _check_dataframe_localize_timestamps(pd.concat(series, axis=1), self._timezone)
+        else:
+            s = super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(arrow_column, data_type)
+        return s
+
     def dump_stream(self, iterator, stream):
         """
         Override because Pandas UDFs require a START_ARROW_STREAM before the Arrow stream is sent.
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 28b6db216d00..7df918b55c1b 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -270,6 +270,7 @@ def test_vectorized_udf_null_array(self):
 
     def test_vectorized_udf_struct_type(self):
         import pandas as pd
+        import pyarrow as pa
 
         df = self.spark.range(10)
         return_type = StructType([
@@ -291,6 +292,18 @@ def func(id):
         actual = df.select(g(col('id')).alias('struct')).collect()
         self.assertEqual(expected, actual)
 
+        struct_f = pandas_udf(lambda x: x, return_type)
+        actual = df.select(struct_f(struct(col('id'), col('id').cast('string').alias('str'))))
+        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            with QuietTest(self.sc):
+                from py4j.protocol import Py4JJavaError
+                with self.assertRaisesRegexp(
+                        Py4JJavaError,
+                        'Unsupported type in conversion from Arrow'):
+                    self.assertEqual(expected, actual.collect())
+        else:
+            self.assertEqual(expected, actual.collect())
+
     def test_vectorized_udf_struct_complex(self):
         import pandas as pd
 
@@ -363,6 +376,26 @@ def test_vectorized_udf_chained(self):
         res = df.select(g(f(col('id'))))
         self.assertEquals(df.collect(), res.collect())
 
+    def test_vectorized_udf_chained_struct_type(self):
+        import pandas as pd
+
+        df = self.spark.range(10)
+        return_type = StructType([
+            StructField('id', LongType()),
+            StructField('str', StringType())])
+
+        @pandas_udf(return_type)
+        def f(id):
+            return pd.DataFrame({'id': id, 'str': id.apply(unicode)})
+
+        g = pandas_udf(lambda x: x, return_type)
+
+        expected = df.select(struct(col('id'), col('id').cast('string').alias('str'))
+                             .alias('struct')).collect()
+
+        actual = df.select(g(f(col('id'))).alias('struct')).collect()
+        self.assertEqual(expected, actual)
+
     def test_vectorized_udf_wrong_return_type(self):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index d87f0f91499a..21595e5d63bf 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1674,6 +1674,16 @@ def from_arrow_type(at):
         if types.is_timestamp(at.value_type):
             raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
         spark_type = ArrayType(from_arrow_type(at.value_type))
+    elif types.is_struct(at):
+        # TODO: remove version check once minimum pyarrow version is 0.10.0
+        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            raise TypeError("Unsupported type in conversion from Arrow: " + str(at) +
+                            "\nPlease install pyarrow >= 0.10.0 for StructType support.")
+        if any(types.is_struct(field.type) for field in at):
+            raise TypeError("Nested StructType not supported in conversion from Arrow: " + str(at))
+        return StructType(
+            [StructField(field.name, from_arrow_type(field.type), nullable=field.nullable)
+             for field in at])
     else:
         raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
     return spark_type
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 0e3bef687a25..16257bef6b32 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -253,7 +253,11 @@ def read_udfs(pickleSer, infile, eval_type):
             "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true")\
             .lower() == "true"
 
-        ser = ArrowStreamPandasUDFSerializer(timezone, safecheck, assign_cols_by_name)
+        # Scalar Pandas UDF handles struct type arguments as pandas DataFrames instead of
+        # pandas Series. See SPARK-27240.
+        df_for_struct = eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF
+        ser = ArrowStreamPandasUDFSerializer(timezone, safecheck, assign_cols_by_name,
+                                             df_for_struct)
     else:
         ser = BatchedSerializer(PickleSerializer(), 100)
 

From 1f2564d0b045108fe63dca987fba54d0de0fd91e Mon Sep 17 00:00:00 2001
From: "Zhu, Lipeng" <lipzhu@ebay.com>
Date: Mon, 25 Mar 2019 15:17:41 -0500
Subject: [PATCH 0767/1072] [SPARK-27155][TEST] Parameterize Oracle docker
 image name

## What changes were proposed in this pull request?

Update Oracle docker image name.

## How was this patch tested?

./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.12

Closes #24086 from lipzhu/SPARK-27155.

Authored-by: Zhu, Lipeng <lipzhu@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/jdbc/OracleIntegrationSuite.scala   | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 79fdf9c2ba43..c9a541783679 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -39,13 +39,16 @@ import org.apache.spark.tags.DockerTest
  * while Spark QA test run.
  *
  * The following would be the steps to test this
- * 1. Pull oracle 11g image - docker pull wnameless/oracle-xe-11g
- * 2. Start docker - sudo service docker start
- * 3. Download oracle 11g driver jar and put it in maven local repo:
+ * 1. Build Oracle database in Docker, please refer below link about how to.
+ *    https://github.com/oracle/docker-images/blob/master/OracleDatabase/SingleInstance/README.md
+ * 2. export ORACLE_DOCKER_IMAGE_NAME=$ORACLE_DOCKER_IMAGE_NAME
+ *    Pull oracle $ORACLE_DOCKER_IMAGE_NAME image - docker pull $ORACLE_DOCKER_IMAGE_NAME
+ * 3. Start docker - sudo service docker start
+ * 4. Download oracle 11g driver jar and put it in maven local repo:
  *    (com/oracle/ojdbc6/11.2.0.2.0/ojdbc6-11.2.0.2.0.jar)
- * 4. The timeout and interval parameter to be increased from 60,1 to a high value for oracle test
+ * 5. The timeout and interval parameter to be increased from 60,1 to a high value for oracle test
  *    in DockerJDBCIntegrationSuite.scala (Locally tested with 200,200 and executed successfully).
- * 5. Run spark test - ./build/sbt "test-only org.apache.spark.sql.jdbc.OracleIntegrationSuite"
+ * 6. Run spark test - ./build/sbt "test-only org.apache.spark.sql.jdbc.OracleIntegrationSuite"
  *
  * All tests in this suite are ignored because of the dependency with the oracle jar from maven
  * repository.
@@ -55,7 +58,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
   import testImplicits._
 
   override val db = new DatabaseOnDocker {
-    override val imageName = "wnameless/oracle-xe-11g:16.04"
+    override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
     override val env = Map(
       "ORACLE_ROOT_PASSWORD" -> "oracle"
     )

From 2fbed378bf9688cfca649de162f2f8e2582ac06d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Mon, 25 Mar 2019 15:22:07 -0500
Subject: [PATCH 0768/1072] [MINOR][DOC] Add missing space after comma
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding missing spaces after commas.

Closes #24205 from attilapiros/minor-doc-changes.

Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 docs/configuration.md            | 2 +-
 docs/graphx-programming-guide.md | 2 +-
 docs/mllib-statistics.md         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index f23dc7c611ae..006d8395b582 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2296,7 +2296,7 @@ In some cases, you may want to avoid hard-coding certain configurations in a `Sp
 instance, Spark allows you to simply create an empty conf and set spark/spark hadoop properties.
 
 {% highlight scala %}
-val conf = new SparkConf().set("spark.hadoop.abc.def","xyz")
+val conf = new SparkConf().set("spark.hadoop.abc.def", "xyz")
 val sc = new SparkContext(conf)
 {% endhighlight %}
 
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 6a0be7b0691f..3badb0a479ed 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -317,7 +317,7 @@ class Graph[VD, ED] {
   // Iterative graph-parallel computation ==========================================================
   def pregel[A](initialMsg: A, maxIterations: Int, activeDirection: EdgeDirection)(
       vprog: (VertexId, VD, A) => VD,
-      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId,A)],
+      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
       mergeMsg: (A, A) => A)
     : Graph[VD, ED]
   // Basic graph algorithms ========================================================================
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index c29400af8505..6bf013f3b0d6 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -239,7 +239,7 @@ Refer to the [`Statistics` Python docs](api/python/pyspark.mllib.html#pyspark.ml
 ### Streaming Significance Testing
 `spark.mllib` provides online implementations of some tests to support use cases
 like A/B testing. These tests may be performed on a Spark Streaming
-`DStream[(Boolean,Double)]` where the first element of each tuple
+`DStream[(Boolean, Double)]` where the first element of each tuple
 indicates control group (`false`) or treatment group (`true`) and the
 second element is the value of an observation.
 

From 4b2b3da766d90f76729dd4a684266698999161f2 Mon Sep 17 00:00:00 2001
From: Luca Canali <luca.canali@cern.ch>
Date: Mon, 25 Mar 2019 15:35:24 -0500
Subject: [PATCH 0769/1072] [SPARK-26928][CORE][FOLLOWUP] Fix JVMCPUSource file
 name and minor updates to doc

## What changes were proposed in this pull request?

This applies some minor updates/cleaning following up SPARK-26928, notably renaming JVMCPU.scala to JVMCPUSource.scala.

## How was this patch tested?

Manually tested

Closes #24201 from LucaCanali/fixupSPARK-26928.

Authored-by: Luca Canali <luca.canali@cern.ch>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../main/scala/org/apache/spark/executor/ExecutorSource.scala | 3 ---
 .../spark/metrics/source/{JVMCPU.scala => JVMCPUSource.scala} | 2 +-
 docs/monitoring.md                                            | 4 ++--
 3 files changed, 3 insertions(+), 6 deletions(-)
 rename core/src/main/scala/org/apache/spark/metrics/source/{JVMCPU.scala => JVMCPUSource.scala} (97%)

diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
index e8523769c5aa..669ce63325d0 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -17,12 +17,9 @@
 
 package org.apache.spark.executor
 
-import java.lang.management.ManagementFactory
 import java.util.concurrent.ThreadPoolExecutor
-import javax.management.{MBeanServer, ObjectName}
 
 import scala.collection.JavaConverters._
-import scala.util.control.NonFatal
 
 import com.codahale.metrics.{Gauge, MetricRegistry}
 import org.apache.hadoop.fs.FileSystem
diff --git a/core/src/main/scala/org/apache/spark/metrics/source/JVMCPU.scala b/core/src/main/scala/org/apache/spark/metrics/source/JVMCPUSource.scala
similarity index 97%
rename from core/src/main/scala/org/apache/spark/metrics/source/JVMCPU.scala
rename to core/src/main/scala/org/apache/spark/metrics/source/JVMCPUSource.scala
index 6ea86b88806a..11a3acfea016 100644
--- a/core/src/main/scala/org/apache/spark/metrics/source/JVMCPU.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/source/JVMCPUSource.scala
@@ -19,7 +19,7 @@ package org.apache.spark.metrics.source
 
 import java.lang.management.ManagementFactory
 
-import com.codahale.metrics.{Counter, Gauge, MetricRegistry}
+import com.codahale.metrics.{Gauge, MetricRegistry}
 import javax.management.{MBeanServer, ObjectName}
 import scala.util.control.NonFatal
 
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 3b187b6faec6..bf77b4abd6e5 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -948,7 +948,7 @@ This is the component with the largest amount of instrumented metrics
   - LongAccumulatorSource
 
 - namespace=spark.streaming
-  - **note** applies to Spark Structured Streaming only. Conditional to a configuration
+  - **note:** This applies to Spark Structured Streaming only. Conditional to a configuration
   parameter: `spark.sql.streaming.metricsEnabled=true` (default is false) 
   - eventTime-watermark
   - inputRate-total
@@ -1076,7 +1076,7 @@ Note: applies when running in Spark standalone as worker
 - coresFree
 - memFree_MB
 
-## Component instance = shuffleService
+### Component instance = shuffleService
 Note: applies to the shuffle service
 
 - blockTransferRateBytes (meter)

From e4b36df2c0ae3bdba4484f9f92461dbb528d8fb9 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Mon, 25 Mar 2019 14:47:40 -0700
Subject: [PATCH 0770/1072] [SPARK-27256][CORE][SQL] If the configuration is
 used to set the number of bytes, we'd better use `bytesConf`'.

## What changes were proposed in this pull request?
Currently, if we want to configure `spark.sql.files.maxPartitionBytes` to 256 megabytes, we must set  `spark.sql.files.maxPartitionBytes=268435456`, which is very unfriendly to users.

And if we set it like this:`spark.sql.files.maxPartitionBytes=256M`, we will  encounter this exception:
```
Exception in thread "main" java.lang.IllegalArgumentException:
 spark.sql.files.maxPartitionBytes should be long, but was 256M
        at org.apache.spark.internal.config.ConfigHelpers$.toNumber(ConfigBuilder.scala)
```
This PR use `bytesConf` to replace `longConf` or `intConf`,  if the configuration is used to set the number of bytes.
Configuration change list:
`spark.files.maxPartitionBytes`
`spark.files.openCostInBytes`
`spark.shuffle.sort.initialBufferSize`
`spark.shuffle.spill.initialMemoryThreshold`
`spark.sql.autoBroadcastJoinThreshold`
`spark.sql.files.maxPartitionBytes`
`spark.sql.files.openCostInBytes`
`spark.sql.defaultSizeInBytes`
## How was this patch tested?
1.Existing unit tests
2.Manual testing

Closes #24187 from 10110346/bytesConf.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/shuffle/sort/UnsafeShuffleWriter.java       |  2 +-
 .../org/apache/spark/internal/config/package.scala    | 11 ++++++-----
 .../spark/sql/execution/UnsafeExternalRowSorter.java  |  2 +-
 .../scala/org/apache/spark/sql/internal/SQLConf.scala |  6 +++---
 .../spark/sql/execution/UnsafeKVExternalSorter.java   |  4 ++--
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 36081069b0e7..9d05f03613ce 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -144,7 +144,7 @@ public UnsafeShuffleWriter(
     this.sparkConf = sparkConf;
     this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
     this.initialSortBufferSize =
-      (int) sparkConf.get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE());
+      (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE());
     this.inputBufferSizeInBytes =
       (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
     this.outputBufferSizeInBytes =
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 758f605d1d32..0bd46bef35d2 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -606,7 +606,7 @@ package object config {
 
   private[spark] val FILES_MAX_PARTITION_BYTES = ConfigBuilder("spark.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when reading files.")
-    .longConf
+    .bytesConf(ByteUnit.BYTE)
     .createWithDefault(128 * 1024 * 1024)
 
   private[spark] val FILES_OPEN_COST_IN_BYTES = ConfigBuilder("spark.files.openCostInBytes")
@@ -614,7 +614,7 @@ package object config {
       " the same time. This is used when putting multiple files into a partition. It's better to" +
       " over estimate, then the partitions with small files will be faster than partitions with" +
       " bigger files.")
-    .longConf
+    .bytesConf(ByteUnit.BYTE)
     .createWithDefault(4 * 1024 * 1024)
 
   private[spark] val HADOOP_RDD_IGNORE_EMPTY_SPLITS =
@@ -868,8 +868,9 @@ package object config {
   private[spark] val SHUFFLE_SORT_INIT_BUFFER_SIZE =
     ConfigBuilder("spark.shuffle.sort.initialBufferSize")
       .internal()
-      .intConf
-      .checkValue(v => v > 0, "The value should be a positive integer.")
+      .bytesConf(ByteUnit.BYTE)
+      .checkValue(v => v > 0 && v <= Int.MaxValue,
+        s"The buffer size must be greater than 0 and less than or equal to ${Int.MaxValue}.")
       .createWithDefault(4096)
 
   private[spark] val SHUFFLE_COMPRESS =
@@ -891,7 +892,7 @@ package object config {
       .internal()
       .doc("Initial threshold for the size of a collection before we start tracking its " +
         "memory usage.")
-      .longConf
+      .bytesConf(ByteUnit.BYTE)
       .createWithDefault(5 * 1024 * 1024)
 
   private[spark] val SHUFFLE_SPILL_BATCH_SIZE =
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
index d6edddfc1ae6..863d80b5cb9c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
@@ -111,7 +111,7 @@ private UnsafeExternalRowSorter(
       taskContext,
       recordComparatorSupplier,
       prefixComparator,
-      (int) sparkEnv.conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
+      (int) (long) sparkEnv.conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
       pageSizeBytes,
       (int) SparkEnv.get().conf().get(
         package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD()),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 39bbf0b1bdb2..3ecc34005c6d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -254,7 +254,7 @@ object SQLConf {
       "command <code>ANALYZE TABLE &lt;tableName&gt; COMPUTE STATISTICS noscan</code> has been " +
       "run, and file-based data source tables where the statistics are computed directly on " +
       "the files of data.")
-    .longConf
+    .bytesConf(ByteUnit.BYTE)
     .createWithDefault(10L * 1024 * 1024)
 
   val LIMIT_SCALE_UP_FACTOR = buildConf("spark.sql.limit.scaleUpFactor")
@@ -853,7 +853,7 @@ object SQLConf {
 
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
     .doc("The maximum number of bytes to pack into a single partition when reading files.")
-    .longConf
+    .bytesConf(ByteUnit.BYTE)
     .createWithDefault(128 * 1024 * 1024) // parquet.block.size
 
   val FILES_OPEN_COST_IN_BYTES = buildConf("spark.sql.files.openCostInBytes")
@@ -1156,7 +1156,7 @@ object SQLConf {
       s"which is larger than `${AUTO_BROADCASTJOIN_THRESHOLD.key}` to be more conservative. " +
       "That is to say by default the optimizer will not choose to broadcast a table unless it " +
       "knows for sure its size is small enough.")
-    .longConf
+    .bytesConf(ByteUnit.BYTE)
     .createWithDefault(Long.MaxValue)
 
   val NDV_MAX_ERROR =
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
index 5b126338f33b..09426117a24b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
@@ -94,7 +94,7 @@ public UnsafeKVExternalSorter(
         taskContext,
         comparatorSupplier,
         prefixComparator,
-        (int) SparkEnv.get().conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
+        (int) (long) SparkEnv.get().conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
         pageSizeBytes,
         numElementsForSpillThreshold,
         canUseRadixSort);
@@ -160,7 +160,7 @@ public UnsafeKVExternalSorter(
         taskContext,
         comparatorSupplier,
         prefixComparator,
-        (int) SparkEnv.get().conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
+        (int) (long) SparkEnv.get().conf().get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE()),
         pageSizeBytes,
         numElementsForSpillThreshold,
         inMemSorter);

From 5a36cf66edb7673e3b2fb5209d61d00525ef0684 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 25 Mar 2019 15:32:01 -0700
Subject: [PATCH 0771/1072] [SPARK-27268][SQL] Add map_keys and map_values
 support in nested schema pruning.

## What changes were proposed in this pull request?

We need to add `map_keys` and `map_values` into `ProjectionOverSchema` to support those methods in nested schema pruning. This also adds end-to-end tests to SchemaPruningSuite.

## How was this patch tested?

Added tests.

Closes #24202 from viirya/SPARK-27268.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/ProjectionOverSchema.scala    |  4 +++
 .../datasources/SchemaPruningSuite.scala      | 32 +++++++++++++++----
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
index f4956a7e54d2..13c6f8db7c12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
@@ -49,6 +49,10 @@ case class ProjectionOverSchema(schema: StructType) {
               s"unmatched child schema for GetArrayStructFields: ${projSchema.toString}"
             )
         }
+      case MapKeys(child) =>
+        getProjection(child).map { projection => MapKeys(projection) }
+      case MapValues(child) =>
+        getProjection(child).map { projection => MapValues(projection) }
       case GetMapValue(child, key) =>
         getProjection(child).map { projection => GetMapValue(projection, key) }
       case GetStructFieldObject(child, field: StructField) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index d328ef466064..b0314e621e6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -45,7 +45,8 @@ abstract class SchemaPruningSuite
     pets: Int,
     friends: Array[FullName] = Array.empty,
     relatives: Map[String, FullName] = Map.empty,
-    employer: Employer = null)
+    employer: Employer = null,
+    relations: Map[FullName, String] = Map.empty)
 
   val janeDoe = FullName("Jane", "X.", "Doe")
   val johnDoe = FullName("John", "Y.", "Doe")
@@ -56,9 +57,10 @@ abstract class SchemaPruningSuite
 
   val contacts =
     Contact(0, janeDoe, "123 Main Street", 1, friends = Array(susanSmith),
-      relatives = Map("brother" -> johnDoe), employer = employer) ::
+      relatives = Map("brother" -> johnDoe), employer = employer,
+      relations = Map(johnDoe -> "brother")) ::
     Contact(1, johnDoe, "321 Wall Street", 3, relatives = Map("sister" -> janeDoe),
-      employer = employerWithNullCompany) :: Nil
+      employer = employerWithNullCompany, relations = Map(janeDoe -> "sister")) :: Nil
 
   case class Name(first: String, last: String)
   case class BriefContact(id: Int, name: Name, address: String)
@@ -75,13 +77,15 @@ abstract class SchemaPruningSuite
     friends: Array[FullName] = Array(),
     relatives: Map[String, FullName] = Map(),
     employer: Employer = null,
+    relations: Map[FullName, String] = Map(),
     p: Int)
 
   case class BriefContactWithDataPartitionColumn(id: Int, name: Name, address: String, p: Int)
 
   val contactsWithDataPartitionColumn =
-    contacts.map { case Contact(id, name, address, pets, friends, relatives, employer) =>
-      ContactWithDataPartitionColumn(id, name, address, pets, friends, relatives, employer, 1) }
+    contacts.map {case Contact(id, name, address, pets, friends, relatives, employer, relations) =>
+      ContactWithDataPartitionColumn(id, name, address, pets, friends, relatives, employer,
+        relations, 1) }
   val briefContactsWithDataPartitionColumn =
     briefContacts.map { case BriefContact(id, name, address) =>
       BriefContactWithDataPartitionColumn(id, name, address, 2) }
@@ -253,6 +257,18 @@ abstract class SchemaPruningSuite
     checkAnswer(query, Row(1) :: Nil)
   }
 
+  testSchemaPruning("select nested field from a complex map key using map_keys") {
+    val query = sql("select map_keys(relations).middle[0], p from contacts")
+    checkScan(query, "struct<relations:map<struct<middle:string>,string>>")
+    checkAnswer(query, Row("Y.", 1) :: Row("X.", 1) :: Row(null, 2) :: Row(null, 2) :: Nil)
+  }
+
+  testSchemaPruning("select nested field from a complex map value using map_values") {
+    val query = sql("select map_values(relatives).middle[0], p from contacts")
+    checkScan(query, "struct<relatives:map<string,struct<middle:string>>>")
+    checkAnswer(query, Row("Y.", 1) :: Row("X.", 1) :: Row(null, 2) :: Row(null, 2) :: Nil)
+  }
+
   protected def testSchemaPruning(testName: String)(testThunk: => Unit) {
     test(s"Spark vectorized reader - without partition data column - $testName") {
       withSQLConf(vectorizedReaderEnabledKey -> "true") {
@@ -290,7 +306,8 @@ abstract class SchemaPruningSuite
         "`address` STRING,`pets` INT,`friends` ARRAY<STRUCT<`first`: STRING, `middle`: STRING, " +
         "`last`: STRING>>,`relatives` MAP<STRING, STRUCT<`first`: STRING, `middle`: STRING, " +
         "`last`: STRING>>,`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, " +
-        "`address`: STRING>>,`p` INT"
+        "`address`: STRING>>,`relations` MAP<STRUCT<`first`: STRING, `middle`: STRING, " +
+        "`last`: STRING>,STRING>,`p` INT"
       spark.read.format(dataSourceName).schema(schema).load(path + "/contacts")
         .createOrReplaceTempView("contacts")
 
@@ -311,7 +328,8 @@ abstract class SchemaPruningSuite
         "`address` STRING,`pets` INT,`friends` ARRAY<STRUCT<`first`: STRING, `middle`: STRING, " +
         "`last`: STRING>>,`relatives` MAP<STRING, STRUCT<`first`: STRING, `middle`: STRING, " +
         "`last`: STRING>>,`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, " +
-        "`address`: STRING>>,`p` INT"
+        "`address`: STRING>>,`relations` MAP<STRUCT<`first`: STRING, `middle`: STRING, " +
+        "`last`: STRING>,STRING>,`p` INT"
       spark.read.format(dataSourceName).schema(schema).load(path + "/contacts")
         .createOrReplaceTempView("contacts")
 

From 8433ff6607a25f5e9c4e685f3b9521d232375265 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 25 Mar 2019 15:36:58 -0700
Subject: [PATCH 0772/1072] [SPARK-26847][SQL] Pruning nested serializers from
 object serializers: MapType support

## What changes were proposed in this pull request?

In SPARK-26837, we prune nested fields from object serializers if they are unnecessary in the query execution. SPARK-26837 leaves the support of MapType as a TODO item. This proposes to support map type.

## How was this patch tested?

Added tests.

Closes #24158 from viirya/SPARK-26847.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/optimizer/objects.scala      | 76 ++++++++++++-------
 .../ObjectSerializerPruningSuite.scala        | 10 ++-
 .../spark/sql/DatasetOptimizationSuite.scala  | 25 +++++-
 3 files changed, 78 insertions(+), 33 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index 96a172c5c593..8e92421924cf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{ArrayType, DataType, StructType}
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType, UserDefinedType}
 
 /*
  * This file defines optimization rules related to object manipulation (for the Dataset API).
@@ -121,9 +121,8 @@ object EliminateMapObjects extends Rule[LogicalPlan] {
 object ObjectSerializerPruning extends Rule[LogicalPlan] {
 
   /**
-   * Collects all struct types from given data type object, recursively. Supports struct and array
-   * types for now.
-   * TODO(SPARK-26847): support map type.
+   * Visible for testing.
+   * Collects all struct types from given data type object, recursively.
    */
   def collectStructType(dt: DataType, structs: ArrayBuffer[StructType]): ArrayBuffer[StructType] = {
     dt match {
@@ -132,17 +131,45 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
         fields.map(f => collectStructType(f.dataType, structs))
       case ArrayType(elementType, _) =>
         collectStructType(elementType, structs)
+      case MapType(_, valueType, _) =>
+        // Because we can't select a field from struct in key, so we skip key type.
+        collectStructType(valueType, structs)
+      // We don't use UserDefinedType in those serializers.
+      case _: UserDefinedType[_] =>
       case _ =>
     }
     structs
   }
 
+  /**
+   * This method returns pruned `CreateNamedStruct` expression given an original `CreateNamedStruct`
+   * and a pruned `StructType`.
+   */
+  private def pruneNamedStruct(struct: CreateNamedStruct, prunedType: StructType) = {
+    // Filters out the pruned fields.
+    val resolver = SQLConf.get.resolver
+    val prunedFields = struct.nameExprs.zip(struct.valExprs).filter { case (nameExpr, _) =>
+      val name = nameExpr.eval(EmptyRow).toString
+      prunedType.fieldNames.exists(resolver(_, name))
+    }.flatMap(pair => Seq(pair._1, pair._2))
+
+    CreateNamedStruct(prunedFields)
+  }
+
+  /**
+   * When we change nested serializer data type, `If` expression will be unresolved because
+   * literal null's data type doesn't match now. We need to align it with new data type.
+   * Note: we should do `transformUp` explicitly to change data types.
+   */
+  private def alignNullTypeInIf(expr: Expression) = expr.transformUp {
+    case i @ If(_: IsNull, Literal(null, dt), ser) if !dt.sameType(ser.dataType) =>
+      i.copy(trueValue = Literal(null, ser.dataType))
+  }
+
   /**
    * This method prunes given serializer expression by given pruned data type. For example,
    * given a serializer creating struct(a int, b int) and pruned data type struct(a int),
-   * this method returns pruned serializer creating struct(a int). For now it supports to
-   * prune nested fields in struct and array of struct.
-   * TODO(SPARK-26847): support to prune nested fields in key and value of map type.
+   * this method returns pruned serializer creating struct(a int).
    */
   def pruneSerializer(
       serializer: NamedExpression,
@@ -150,31 +177,22 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
     val prunedStructTypes = collectStructType(prunedDataType, ArrayBuffer.empty[StructType])
     var structTypeIndex = 0
 
-    val prunedSerializer = serializer.transformDown {
+    val transformedSerializer = serializer.transformDown {
+      case m: ExternalMapToCatalyst =>
+        val prunedValueConverter = m.valueConverter.transformDown {
+          case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
+            val prunedType = prunedStructTypes(structTypeIndex)
+            structTypeIndex += 1
+            pruneNamedStruct(s, prunedType)
+        }
+        m.copy(valueConverter = alignNullTypeInIf(prunedValueConverter))
       case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
         val prunedType = prunedStructTypes(structTypeIndex)
-
-        // Filters out the pruned fields.
-        val prunedFields = s.nameExprs.zip(s.valExprs).filter { case (nameExpr, _) =>
-          val name = nameExpr.eval(EmptyRow).toString
-          prunedType.fieldNames.exists { fieldName =>
-            if (SQLConf.get.caseSensitiveAnalysis) {
-              fieldName.equals(name)
-            } else {
-              fieldName.equalsIgnoreCase(name)
-            }
-          }
-        }.flatMap(pair => Seq(pair._1, pair._2))
-
         structTypeIndex += 1
-        CreateNamedStruct(prunedFields)
-    }.transformUp {
-      // When we change nested serializer data type, `If` expression will be unresolved because
-      // literal null's data type doesn't match now. We need to align it with new data type.
-      // Note: we should do `transformUp` explicitly to change data types.
-      case i @ If(_: IsNull, Literal(null, dt), ser) if !dt.sameType(ser.dataType) =>
-        i.copy(trueValue = Literal(null, ser.dataType))
-    }.asInstanceOf[NamedExpression]
+        pruneNamedStruct(s, prunedType)
+    }
+
+    val prunedSerializer = alignNullTypeInIf(transformedSerializer).asInstanceOf[NamedExpression]
 
     if (prunedSerializer.dataType.sameType(prunedDataType)) {
       prunedSerializer
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
index dee685afb22f..fb0f3a3f63da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
@@ -46,7 +46,10 @@ class ObjectSerializerPruningSuite extends PlanTest {
       ArrayType(IntegerType),
       StructType.fromDDL("a int, b int"),
       ArrayType(StructType.fromDDL("a int, b int, c string")),
-      StructType.fromDDL("a struct<a:int, b:int>, b int")
+      StructType.fromDDL("a struct<a:int, b:int>, b int"),
+      MapType(IntegerType, StructType.fromDDL("a int, b int, c string")),
+      MapType(StructType.fromDDL("a struct<a:int, b:int>, b int"), IntegerType),
+      MapType(StructType.fromDDL("a int, b int"), StructType.fromDDL("c long, d string"))
     )
 
     val expectedTypes = Seq(
@@ -55,7 +58,10 @@ class ObjectSerializerPruningSuite extends PlanTest {
       Seq(StructType.fromDDL("a int, b int")),
       Seq(StructType.fromDDL("a int, b int, c string")),
       Seq(StructType.fromDDL("a struct<a:int, b:int>, b int"),
-        StructType.fromDDL("a int, b int"))
+        StructType.fromDDL("a int, b int")),
+      Seq(StructType.fromDDL("a int, b int, c string")),
+      Seq.empty[StructType],
+      Seq(StructType.fromDDL("c long, d string"))
     )
 
     dataTypes.zipWithIndex.foreach { case (dt, idx) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
index 2b1dbf0f24df..69634f84c3fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.expressions.CreateNamedStruct
+import org.apache.spark.sql.catalyst.expressions.objects.ExternalMapToCatalyst
 import org.apache.spark.sql.catalyst.plans.logical.SerializeFromObject
+import org.apache.spark.sql.functions.expr
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
@@ -47,8 +49,12 @@ class DatasetOptimizationSuite extends QueryTest with SharedSQLContext {
 
     serializer.serializer.zip(structFields).foreach { case (serializer, fields) =>
       val structs = serializer.collect {
-        case c: CreateNamedStruct => c
-      }
+        case c: CreateNamedStruct => Seq(c)
+        case m: ExternalMapToCatalyst =>
+          m.valueConverter.collect {
+            case c: CreateNamedStruct => c
+          }
+      }.flatten
       assert(structs.size == fields.size)
       structs.zip(fields).foreach { case (struct, fieldNames) =>
         assert(struct.names.map(_.toString) == fieldNames)
@@ -104,4 +110,19 @@ class DatasetOptimizationSuite extends QueryTest with SharedSQLContext {
         Row(Seq("c", "d"), Seq(33, 44), "bb")))
     }
   }
+
+  test("Prune nested serializers: map of struct") {
+    withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+      val mapData = Seq((Map(("k", ("a_1", 11))), 1), (Map(("k", ("b_1", 22))), 2),
+        (Map(("k", ("c_1", 33))), 3))
+      val mapDs = mapData.toDS().map(t => (t._1, t._2 + 1))
+      val df1 = mapDs.select("_1.k._1")
+      testSerializer(df1, Seq(Seq("_1")))
+      checkAnswer(df1, Seq(Row("a_1"), Row("b_1"), Row("c_1")))
+
+      val df2 = mapDs.select("_1.k._2")
+      testSerializer(df2, Seq(Seq("_2")))
+      checkAnswer(df2, Seq(Row(11), Row(22), Row(33)))
+    }
+  }
 }

From b61dce23d2ee7ca95770bc7c390029aae8c65f7e Mon Sep 17 00:00:00 2001
From: Ajith <ajith2489@gmail.com>
Date: Mon, 25 Mar 2019 19:07:30 -0500
Subject: [PATCH 0773/1072] [SPARK-26961][CORE] Enable parallel classloading
 capability

## What changes were proposed in this pull request?

As per https://docs.oracle.com/javase/8/docs/api/java/lang/ClassLoader.html
``Class loaders that support concurrent loading of classes are known as parallel capable class loaders and are required to register themselves at their class initialization time by invoking the ClassLoader.registerAsParallelCapable method. Note that the ClassLoader class is registered as parallel capable by default. However, its subclasses still need to register themselves if they are parallel capable. ``
 i.e we can have finer class loading locks by registering classloaders as parallel capable.  (Refer to deadlock due to macro lock  https://issues.apache.org/jira/browse/SPARK-26961).
All the classloaders we have are wrapper of URLClassLoader which by itself is parallel capable.
But this cannot be achieved by scala code due to static registration Refer https://github.com/scala/bug/issues/11429

## How was this patch tested?

All Existing UT must pass

Closes #24126 from ajithme/driverlock.

Authored-by: Ajith <ajith2489@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/util/ChildFirstURLClassLoader.java  | 68 +++++++++++++++++
 .../spark/util/MutableURLClassLoader.java     | 40 ++++++++++
 .../apache/spark/util/ParentClassLoader.java} | 21 ++++--
 .../spark/util/MutableURLClassLoader.scala    | 75 -------------------
 .../NonClosableMutableURLClassLoader.java     | 39 ++++++++++
 .../spark/sql/internal/SharedState.scala      | 11 ---
 6 files changed, 160 insertions(+), 94 deletions(-)
 create mode 100644 core/src/main/java/org/apache/spark/util/ChildFirstURLClassLoader.java
 create mode 100644 core/src/main/java/org/apache/spark/util/MutableURLClassLoader.java
 rename core/src/main/{scala/org/apache/spark/util/ParentClassLoader.scala => java/org/apache/spark/util/ParentClassLoader.java} (65%)
 delete mode 100644 core/src/main/scala/org/apache/spark/util/MutableURLClassLoader.scala
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/internal/NonClosableMutableURLClassLoader.java

diff --git a/core/src/main/java/org/apache/spark/util/ChildFirstURLClassLoader.java b/core/src/main/java/org/apache/spark/util/ChildFirstURLClassLoader.java
new file mode 100644
index 000000000000..57d96756c8be
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/util/ChildFirstURLClassLoader.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Enumeration;
+
+/**
+ * A mutable class loader that gives preference to its own URLs over the parent class loader
+ * when loading classes and resources.
+ */
+public class ChildFirstURLClassLoader extends MutableURLClassLoader {
+
+  static {
+    ClassLoader.registerAsParallelCapable();
+  }
+
+  private ParentClassLoader parent;
+
+  public ChildFirstURLClassLoader(URL[] urls, ClassLoader parent) {
+    super(urls, null);
+    this.parent = new ParentClassLoader(parent);
+  }
+
+  @Override
+  public Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException {
+    try {
+      return super.loadClass(name, resolve);
+    } catch (ClassNotFoundException cnf) {
+      return parent.loadClass(name, resolve);
+    }
+  }
+
+  @Override
+  public Enumeration<URL> getResources(String name) throws IOException {
+    ArrayList<URL> urls = Collections.list(super.getResources(name));
+    urls.addAll(Collections.list(parent.getResources(name)));
+    return Collections.enumeration(urls);
+  }
+
+  @Override
+  public URL getResource(String name) {
+    URL url = super.getResource(name);
+    if (url != null) {
+      return url;
+    } else {
+      return parent.getResource(name);
+    }
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/util/MutableURLClassLoader.java b/core/src/main/java/org/apache/spark/util/MutableURLClassLoader.java
new file mode 100644
index 000000000000..a7c775db179b
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/util/MutableURLClassLoader.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util;
+
+import java.net.URL;
+import java.net.URLClassLoader;
+
+/**
+ * URL class loader that exposes the `addURL` method in URLClassLoader.
+ */
+public class MutableURLClassLoader extends URLClassLoader {
+
+  static {
+    ClassLoader.registerAsParallelCapable();
+  }
+
+  public MutableURLClassLoader(URL[] urls, ClassLoader parent) {
+    super(urls, parent);
+  }
+
+  @Override
+  public void addURL(URL url) {
+    super.addURL(url);
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala b/core/src/main/java/org/apache/spark/util/ParentClassLoader.java
similarity index 65%
rename from core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala
rename to core/src/main/java/org/apache/spark/util/ParentClassLoader.java
index c9b7493fcdc1..094005c58c78 100644
--- a/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala
+++ b/core/src/main/java/org/apache/spark/util/ParentClassLoader.java
@@ -15,23 +15,28 @@
  * limitations under the License.
  */
 
-package org.apache.spark.util
+package org.apache.spark.util;
 
 /**
  * A class loader which makes some protected methods in ClassLoader accessible.
  */
-private[spark] class ParentClassLoader(parent: ClassLoader) extends ClassLoader(parent) {
+public class ParentClassLoader extends ClassLoader {
 
-  override def findClass(name: String): Class[_] = {
-    super.findClass(name)
+  static {
+    ClassLoader.registerAsParallelCapable();
   }
 
-  override def loadClass(name: String): Class[_] = {
-    super.loadClass(name)
+  public ParentClassLoader(ClassLoader parent) {
+    super(parent);
   }
 
-  override def loadClass(name: String, resolve: Boolean): Class[_] = {
-    super.loadClass(name, resolve)
+  @Override
+  public Class<?> findClass(String name) throws ClassNotFoundException {
+    return super.findClass(name);
   }
 
+  @Override
+  public Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException {
+    return super.loadClass(name, resolve);
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/MutableURLClassLoader.scala b/core/src/main/scala/org/apache/spark/util/MutableURLClassLoader.scala
deleted file mode 100644
index 034826c57ef1..000000000000
--- a/core/src/main/scala/org/apache/spark/util/MutableURLClassLoader.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-import java.net.{URL, URLClassLoader}
-import java.util.Enumeration
-
-import scala.collection.JavaConverters._
-
-/**
- * URL class loader that exposes the `addURL` and `getURLs` methods in URLClassLoader.
- */
-private[spark] class MutableURLClassLoader(urls: Array[URL], parent: ClassLoader)
-  extends URLClassLoader(urls, parent) {
-
-  override def addURL(url: URL): Unit = {
-    super.addURL(url)
-  }
-
-  override def getURLs(): Array[URL] = {
-    super.getURLs()
-  }
-
-}
-
-/**
- * A mutable class loader that gives preference to its own URLs over the parent class loader
- * when loading classes and resources.
- */
-private[spark] class ChildFirstURLClassLoader(urls: Array[URL], parent: ClassLoader)
-  extends MutableURLClassLoader(urls, null) {
-
-  private val parentClassLoader = new ParentClassLoader(parent)
-
-  override def loadClass(name: String, resolve: Boolean): Class[_] = {
-    try {
-      super.loadClass(name, resolve)
-    } catch {
-      case e: ClassNotFoundException =>
-        parentClassLoader.loadClass(name, resolve)
-    }
-  }
-
-  override def getResource(name: String): URL = {
-    val url = super.findResource(name)
-    val res = if (url != null) url else parentClassLoader.getResource(name)
-    res
-  }
-
-  override def getResources(name: String): Enumeration[URL] = {
-    val childUrls = super.findResources(name).asScala
-    val parentUrls = parentClassLoader.getResources(name).asScala
-    (childUrls ++ parentUrls).asJavaEnumeration
-  }
-
-  override def addURL(url: URL) {
-    super.addURL(url)
-  }
-
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/internal/NonClosableMutableURLClassLoader.java b/sql/core/src/main/java/org/apache/spark/sql/internal/NonClosableMutableURLClassLoader.java
new file mode 100644
index 000000000000..db77d9136aab
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/internal/NonClosableMutableURLClassLoader.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal;
+
+import java.net.URL;
+
+import org.apache.spark.util.MutableURLClassLoader;
+
+/**
+ * This class loader cannot be closed (its `close` method is a no-op).
+ */
+public class NonClosableMutableURLClassLoader extends MutableURLClassLoader {
+
+  static {
+    ClassLoader.registerAsParallelCapable();
+  }
+
+  public NonClosableMutableURLClassLoader(ClassLoader parent) {
+    super(new URL[]{}, parent);
+  }
+
+  @Override
+  public void close() {}
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 3dd2ae61e8cd..33d78343a661 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -218,14 +218,3 @@ object SharedState extends Logging {
     }
   }
 }
-
-
-/**
- * URL class loader that exposes the `addURL` and `getURLs` methods in URLClassLoader.
- * This class loader cannot be closed (its `close` method is a no-op).
- */
-private[sql] class NonClosableMutableURLClassLoader(parent: ClassLoader)
-  extends MutableURLClassLoader(Array.empty, parent) {
-
-  override def close(): Unit = {}
-}

From 0bc030c859ddb340d34085010428261ad767f10a Mon Sep 17 00:00:00 2001
From: sandeep-katta <sandeep.katta2007@gmail.com>
Date: Tue, 26 Mar 2019 09:25:57 +0900
Subject: [PATCH 0774/1072] [SPARK-27246][SQL] Add an assert on invalid Scalar
 subquery plan with no column

## What changes were proposed in this pull request?

This PR proposes to add an assert on `ScalarSubquery`'s `dataType` because there's a possibility that `dataType` can be called alone before throwing analysis exception.

This was found while working on [SPARK-27088](https://issues.apache.org/jira/browse/SPARK-27088). This change calls `treeString` for logging purpose, and the specific test "scalar subquery with no column" under `AnalysisErrorSuite` was being failed with:

```
Caused by: sbt.ForkMain$ForkError: java.util.NoSuchElementException: next on empty iterator
	...
	at scala.collection.mutable.ArrayOps$ofRef.head(ArrayOps.scala:198)
	at org.apache.spark.sql.catalyst.expressions.ScalarSubquery.dataType(subquery.scala:251)
	at org.apache.spark.sql.catalyst.expressions.Alias.dataType(namedExpressions.scala:163)
        ...
	at org.apache.spark.sql.catalyst.trees.TreeNode.simpleString(TreeNode.scala:465)
        ...
	at org.apache.spark.sql.catalyst.rules.RuleExecutor$PlanChangeLogger.logRule(RuleExecutor.scala:176)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:116)
	...
```

The reason is that `treeString` for logging happened to call `dataType` on `ScalarSubquery` but one test has empty column plan. So, it happened to throw `NoSuchElementException` before checking analysis.

## How was this patch tested?

Manually tested.

```scala
ScalarSubquery(LocalRelation()).treeString
```

```
An exception or error caused a run to abort: assertion failed: Scala subquery should have only one column
java.lang.AssertionError: assertion failed: Scala subquery should have only one column
	at scala.Predef$.assert(Predef.scala:223)
	at org.apache.spark.sql.catalyst.expressions.ScalarSubquery.dataType(subquery.scala:252)
	at org.apache.spark.sql.catalyst.analysis.AnalysisErrorSuite.<init>(AnalysisErrorSuite.scala:116)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at java.lang.Class.newInstance(Class.java:442)
	at org.scalatest.tools.Runner$.genSuiteConfig(Runner.scala:1428)
	at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$8(Runner.scala:1236)
	at scala.collection.immutable.List.map(List.scala:286)
	at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1235)
```

Closes #24182 from sandeep-katta/subqueryissue.

Authored-by: sandeep-katta <sandeep.katta2007@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/catalyst/expressions/subquery.scala | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index fc1caed84e27..043113492074 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -248,7 +248,10 @@ case class ScalarSubquery(
     children: Seq[Expression] = Seq.empty,
     exprId: ExprId = NamedExpression.newExprId)
   extends SubqueryExpression(plan, children, exprId) with Unevaluable {
-  override def dataType: DataType = plan.schema.fields.head.dataType
+  override def dataType: DataType = {
+    assert(plan.schema.fields.nonEmpty, "Scalar subquery should have only one column")
+    plan.schema.fields.head.dataType
+  }
   override def nullable: Boolean = true
   override def withNewPlan(plan: LogicalPlan): ScalarSubquery = copy(plan = plan)
   override def toString: String = s"scalar-subquery#${exprId.id} $conditionString"

From 300ec1a74cb14867c22e616e657566d510426331 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 25 Mar 2019 19:39:00 -0500
Subject: [PATCH 0775/1072] [SPARK-27226][SQL] Reduce the code duplicate when
 upgrading built-in Hive

## What changes were proposed in this pull request?

This pr related to #24119. Reduce the code duplicate when upgrading built-in Hive.
To achieve this, we should avoid using classes in `org.apache.orc.storage.*` because these classes will be replaced with `org.apache.hadoop.hive.*` after upgrading the built-in Hive. Such as:
![image](https://user-images.githubusercontent.com/5399861/54437594-e9be1000-476f-11e9-8878-3b7414871ee5.png)

- Move the usage of `org.apache.orc.storage.*` to `OrcShimUtils`:
1. Add wrapper for `VectorizedRowBatch`(Reduce code duplication of [OrcColumnarBatchReader](https://github.com/apache/spark/pull/24166/files#diff-e594f7295e5408c01ace8175166313b6)).
2. Move some serializer/deserializer method out of `OrcDeserializer` and `OrcSerializer`(Reduce code duplication of [OrcDeserializer](https://github.com/apache/spark/pull/24166/files#diff-b933819e6dcaff41eee8fce1e8f2932c) and [OrcSerializer](https://github.com/apache/spark/pull/24166/files#diff-6d3849d88929f6ea25c436d71da729da)).
3. Defined two type aliases: `Operator` and `SearchArgument`(Reduce code duplication of [OrcV1FilterSuite](https://github.com/apache/spark/pull/24166/files#diff-48c4fc7a3b3384a6d0aab246723a0058)).

- Move duplication code to super class:
1. Add a trait for `OrcFilters`(Reduce code duplication of [OrcFilters](https://github.com/apache/spark/pull/24166/files#diff-224b8cbedf286ecbfdd092d1e2e2f237)).
2. Move `checkNoFilterPredicate` from `OrcFilterSuite` to `OrcTest`(Reduce code duplication of [OrcFilterSuite](https://github.com/apache/spark/pull/24166/files#diff-8e05c1faaaec98edd7723e62f84066f1)).

After this pr. We only need to copy these 4 files: OrcColumnVector, OrcFilters, OrcFilterSuite and OrcShimUtils.

## How was this patch tested?

existing tests

Closes #24166 from wangyum/SPARK-27226.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../orc/OrcColumnarBatchReader.java           | 16 ++---
 .../datasources/orc/OrcDeserializer.scala     |  6 +-
 .../datasources/orc/OrcFilters.scala          | 34 +---------
 .../datasources/orc/OrcFiltersBase.scala      | 58 ++++++++++++++++
 .../datasources/orc/OrcSerializer.scala       | 16 +----
 .../datasources/orc/OrcShimUtils.scala        | 66 +++++++++++++++++++
 .../datasources/orc/OrcFilterSuite.scala      | 28 --------
 .../execution/datasources/orc/OrcTest.scala   | 34 +++++++++-
 .../datasources/orc/OrcV1FilterSuite.scala    |  5 +-
 9 files changed, 174 insertions(+), 89 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
index efca96e9ce58..6a4b116cdef0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
@@ -30,9 +30,9 @@
 import org.apache.orc.Reader;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.mapred.OrcInputFormat;
-import org.apache.orc.storage.ql.exec.vector.*;
 
 import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.datasources.orc.OrcShimUtils.VectorizedRowBatchWrap;
 import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
 import org.apache.spark.sql.types.*;
@@ -48,8 +48,8 @@ public class OrcColumnarBatchReader extends RecordReader<Void, ColumnarBatch> {
   // The capacity of vectorized batch.
   private int capacity;
 
-  // Vectorized ORC Row Batch
-  private VectorizedRowBatch batch;
+  // Vectorized ORC Row Batch wrap.
+  private VectorizedRowBatchWrap wrap;
 
   /**
    * The column IDs of the physical ORC file schema which are required by this reader.
@@ -146,8 +146,8 @@ public void initBatch(
       int[] requestedDataColIds,
       int[] requestedPartitionColIds,
       InternalRow partitionValues) {
-    batch = orcSchema.createRowBatch(capacity);
-    assert(!batch.selectedInUse); // `selectedInUse` should be initialized with `false`.
+    wrap = new VectorizedRowBatchWrap(orcSchema.createRowBatch(capacity));
+    assert(!wrap.batch().selectedInUse); // `selectedInUse` should be initialized with `false`.
     assert(requiredFields.length == requestedDataColIds.length);
     assert(requiredFields.length == requestedPartitionColIds.length);
     // If a required column is also partition column, use partition value and don't read from file.
@@ -180,7 +180,7 @@ public void initBatch(
           missingCol.setIsConstant();
           orcVectorWrappers[i] = missingCol;
         } else {
-          orcVectorWrappers[i] = new OrcColumnVector(dt, batch.cols[colId]);
+          orcVectorWrappers[i] = new OrcColumnVector(dt, wrap.batch().cols[colId]);
         }
       }
     }
@@ -193,8 +193,8 @@ public void initBatch(
    * by copying from ORC VectorizedRowBatch columns to Spark ColumnarBatch columns.
    */
   private boolean nextBatch() throws IOException {
-    recordReader.nextBatch(batch);
-    int batchSize = batch.size;
+    recordReader.nextBatch(wrap.batch());
+    int batchSize = wrap.batch().size;
     if (batchSize == 0) {
       return false;
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
index 62e16707a8e3..6d52d40d6dd0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.orc
 
 import org.apache.hadoop.io._
 import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp}
-import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
@@ -109,14 +108,13 @@ class OrcDeserializer(
         updater.set(ordinal, bytes)
 
       case DateType => (ordinal, value) =>
-        updater.setInt(ordinal, DateTimeUtils.fromJavaDate(value.asInstanceOf[DateWritable].get))
+        updater.setInt(ordinal, DateTimeUtils.fromJavaDate(OrcShimUtils.getSqlDate(value)))
 
       case TimestampType => (ordinal, value) =>
         updater.setLong(ordinal, DateTimeUtils.fromJavaTimestamp(value.asInstanceOf[OrcTimestamp]))
 
       case DecimalType.Fixed(precision, scale) => (ordinal, value) =>
-        val decimal = value.asInstanceOf[HiveDecimalWritable].getHiveDecimal()
-        val v = Decimal(decimal.bigDecimalValue, decimal.precision(), decimal.scale())
+        val v = OrcShimUtils.getDecimal(value)
         v.changePrecision(precision, scale)
         updater.set(ordinal, v)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 98484003644a..112dcb2cb238 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -23,7 +23,7 @@ import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder
 import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
 import org.apache.orc.storage.serde2.io.HiveDecimalWritable
 
-import org.apache.spark.sql.sources.{And, Filter}
+import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types._
 
 /**
@@ -56,27 +56,7 @@ import org.apache.spark.sql.types._
  * builder methods mentioned above can only be found in test code, where all tested filters are
  * known to be convertible.
  */
-private[sql] object OrcFilters {
-  private[sql] def buildTree(filters: Seq[Filter]): Option[Filter] = {
-    filters match {
-      case Seq() => None
-      case Seq(filter) => Some(filter)
-      case Seq(filter1, filter2) => Some(And(filter1, filter2))
-      case _ => // length > 2
-        val (left, right) = filters.splitAt(filters.length / 2)
-        Some(And(buildTree(left).get, buildTree(right).get))
-    }
-  }
-
-  // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
-  // in order to distinguish predicate pushdown for nested columns.
-  private def quoteAttributeNameIfNeeded(name: String) : String = {
-    if (!name.contains("`") && name.contains(".")) {
-      s"`$name`"
-    } else {
-      name
-    }
-  }
+private[sql] object OrcFilters extends OrcFiltersBase {
 
   /**
    * Create ORC filter as a SearchArgument instance.
@@ -101,16 +81,6 @@ private[sql] object OrcFilters {
     } yield filter
   }
 
-  /**
-   * Return true if this is a searchable type in ORC.
-   * Both CharType and VarcharType are cleaned at AstBuilder.
-   */
-  private def isSearchableType(dataType: DataType) = dataType match {
-    case BinaryType => false
-    case _: AtomicType => true
-    case _ => false
-  }
-
   /**
    * Get PredicateLeafType which is corresponding to the given DataType.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
new file mode 100644
index 000000000000..8d4898a6b85c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.spark.sql.sources.{And, Filter}
+import org.apache.spark.sql.types.{AtomicType, BinaryType, DataType}
+
+/**
+ * Methods that can be shared when upgrading the built-in Hive.
+ */
+trait OrcFiltersBase {
+
+  private[sql] def buildTree(filters: Seq[Filter]): Option[Filter] = {
+    filters match {
+      case Seq() => None
+      case Seq(filter) => Some(filter)
+      case Seq(filter1, filter2) => Some(And(filter1, filter2))
+      case _ => // length > 2
+        val (left, right) = filters.splitAt(filters.length / 2)
+        Some(And(buildTree(left).get, buildTree(right).get))
+    }
+  }
+
+  // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
+  // in order to distinguish predicate pushdown for nested columns.
+  protected def quoteAttributeNameIfNeeded(name: String) : String = {
+    if (!name.contains("`") && name.contains(".")) {
+      s"`$name`"
+    } else {
+      name
+    }
+  }
+
+  /**
+   * Return true if this is a searchable type in ORC.
+   * Both CharType and VarcharType are cleaned at AstBuilder.
+   */
+  protected def isSearchableType(dataType: DataType) = dataType match {
+    case BinaryType => false
+    case _: AtomicType => true
+    case _ => false
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala
index 90d126802809..0b9cbecd0d32 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.execution.datasources.orc
 import org.apache.hadoop.io._
 import org.apache.orc.TypeDescription
 import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp}
-import org.apache.orc.storage.common.`type`.HiveDecimal
-import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
@@ -139,14 +137,7 @@ class OrcSerializer(dataSchema: StructType) {
       new BytesWritable(getter.getBinary(ordinal))
 
     case DateType =>
-      if (reuseObj) {
-        val result = new DateWritable()
-        (getter, ordinal) =>
-          result.set(getter.getInt(ordinal))
-          result
-      } else {
-        (getter, ordinal) => new DateWritable(getter.getInt(ordinal))
-      }
+      OrcShimUtils.getDateWritable(reuseObj)
 
     // The following cases are already expensive, reusing object or not doesn't matter.
 
@@ -156,9 +147,8 @@ class OrcSerializer(dataSchema: StructType) {
       result.setNanos(ts.getNanos)
       result
 
-    case DecimalType.Fixed(precision, scale) => (getter, ordinal) =>
-      val d = getter.getDecimal(ordinal, precision, scale)
-      new HiveDecimalWritable(HiveDecimal.create(d.toJavaBigDecimal))
+    case DecimalType.Fixed(precision, scale) =>
+      OrcShimUtils.getHiveDecimalWritable(precision, scale)
 
     case st: StructType => (getter, ordinal) =>
       val result = createOrcValue(st).asInstanceOf[OrcStruct]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
new file mode 100644
index 000000000000..68503aba22b4
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import java.sql.Date
+
+import org.apache.orc.storage.common.`type`.HiveDecimal
+import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch
+import org.apache.orc.storage.ql.io.sarg.{SearchArgument => OrcSearchArgument}
+import org.apache.orc.storage.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator}
+import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
+
+import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
+import org.apache.spark.sql.types.Decimal
+
+/**
+ * Various utilities for ORC used to upgrade the built-in Hive.
+ */
+private[sql] object OrcShimUtils {
+
+  class VectorizedRowBatchWrap(val batch: VectorizedRowBatch) {}
+
+  private[sql] type Operator = OrcOperator
+  private[sql] type SearchArgument = OrcSearchArgument
+
+  def getSqlDate(value: Any): Date = value.asInstanceOf[DateWritable].get
+
+  def getDecimal(value: Any): Decimal = {
+    val decimal = value.asInstanceOf[HiveDecimalWritable].getHiveDecimal()
+    Decimal(decimal.bigDecimalValue, decimal.precision(), decimal.scale())
+  }
+
+  def getDateWritable(reuseObj: Boolean): (SpecializedGetters, Int) => DateWritable = {
+    if (reuseObj) {
+      val result = new DateWritable()
+      (getter, ordinal) =>
+        result.set(getter.getInt(ordinal))
+        result
+    } else {
+      (getter: SpecializedGetters, ordinal: Int) =>
+        new DateWritable(getter.getInt(ordinal))
+    }
+  }
+
+  def getHiveDecimalWritable(precision: Int, scale: Int):
+      (SpecializedGetters, Int) => HiveDecimalWritable = {
+    (getter, ordinal) =>
+      val d = getter.getDecimal(ordinal, precision, scale)
+      new HiveDecimalWritable(HiveDecimal.create(d.toJavaBigDecimal))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index 7b65ba87e7fd..e96c6fb7716c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -89,34 +89,6 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     checkFilterPredicate(df, predicate, checkLogicalOperator)
   }
 
-  protected def checkNoFilterPredicate
-      (predicate: Predicate, noneSupported: Boolean = false)
-      (implicit df: DataFrame): Unit = {
-    val output = predicate.collect { case a: Attribute => a }.distinct
-    val query = df
-      .select(output.map(e => Column(e)): _*)
-      .where(Column(predicate))
-
-    query.queryExecution.optimizedPlan match {
-      case PhysicalOperation(_, filters,
-      DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
-        assert(filters.nonEmpty, "No filter is analyzed from the given query")
-        val scanBuilder = orcTable.newScanBuilder(options)
-        scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
-        val pushedFilters = scanBuilder.pushedFilters()
-        if (noneSupported) {
-          assert(pushedFilters.isEmpty, "Unsupported filters should not show in pushed filters")
-        } else {
-          assert(pushedFilters.nonEmpty, "No filter is pushed down")
-          val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
-          assert(maybeFilter.isEmpty, s"Couldn't generate filter predicate for $pushedFilters")
-        }
-
-      case _ =>
-        throw new AnalysisException("Can not match OrcTable in the query.")
-    }
-  }
-
   test("filter pushdown - integer") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
       checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index 411e632f95c1..adbd93dcb4fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -25,7 +25,11 @@ import scala.reflect.runtime.universe.TypeTag
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.datasources.FileBasedDataSourceTest
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Predicate}
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, FileBasedDataSourceTest}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
 
@@ -104,4 +108,32 @@ abstract class OrcTest extends QueryTest with FileBasedDataSourceTest with Befor
       assert(actual < numRows)
     }
   }
+
+  protected def checkNoFilterPredicate
+      (predicate: Predicate, noneSupported: Boolean = false)
+      (implicit df: DataFrame): Unit = {
+    val output = predicate.collect { case a: Attribute => a }.distinct
+    val query = df
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
+
+    query.queryExecution.optimizedPlan match {
+      case PhysicalOperation(_, filters,
+      DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
+        assert(filters.nonEmpty, "No filter is analyzed from the given query")
+        val scanBuilder = orcTable.newScanBuilder(options)
+        scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
+        val pushedFilters = scanBuilder.pushedFilters()
+        if (noneSupported) {
+          assert(pushedFilters.isEmpty, "Unsupported filters should not show in pushed filters")
+        } else {
+          assert(pushedFilters.nonEmpty, "No filter is pushed down")
+          val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
+          assert(maybeFilter.isEmpty, s"Couldn't generate filter predicate for $pushedFilters")
+        }
+
+      case _ =>
+        throw new AnalysisException("Can not match OrcTable in the query.")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
index 5a1bf9b43756..4452780ff73f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
@@ -18,14 +18,13 @@ package org.apache.spark.sql.execution.datasources.orc
 
 import scala.collection.JavaConverters._
 
-import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
-
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Column, DataFrame}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Predicate}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.orc.OrcShimUtils.{Operator, SearchArgument}
 import org.apache.spark.sql.internal.SQLConf
 
 class OrcV1FilterSuite extends OrcFilterSuite {
@@ -63,7 +62,7 @@ class OrcV1FilterSuite extends OrcFilterSuite {
   }
 
   override def checkFilterPredicate
-      (predicate: Predicate, filterOperator: PredicateLeaf.Operator)
+      (predicate: Predicate, filterOperator: Operator)
       (implicit df: DataFrame): Unit = {
     def checkComparisonOperator(filter: SearchArgument) = {
       val operator = filter.getLeaves.asScala

From 9cc925cda2c415deca8cf142a2a1774e81fda3fb Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Mon, 25 Mar 2019 17:43:03 -0700
Subject: [PATCH 0776/1072] [SPARK-27209][SQL] Split parsing of SELECT and
 INSERT into two top-level rules in the grammar file.

## What changes were proposed in this pull request?
Currently in the grammar file the rule `query` is responsible to parse both select and insert statements. As a result, we need to have more semantic checks in the code to guard against in-valid insert constructs in a query. Couple of examples are in the `visitCreateView` and `visitAlterView` functions. One other issue is that, we don't catch the `invalid insert constructs` in all the places until checkAnalysis (the errors we raise can be confusing as well). Here are couple of examples :

```SQL
select * from (insert into bar values (2));
```
```
Error in query: unresolved operator 'Project [*];
'Project [*]
+- SubqueryAlias `__auto_generated_subquery_name`
   +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1]
      +- Project [cast(col1#18 as int) AS c1#20]
         +- LocalRelation [col1#18]
```

```SQL
select * from foo where c1 in (insert into bar values (2))
```
```
Error in query: cannot resolve '(default.foo.`c1` IN (listquery()))' due to data type mismatch:
The number of columns in the left hand side of an IN subquery does not match the
number of columns in the output of subquery.
#columns in left hand side: 1.
#columns in right hand side: 0.

Left side columns:
[default.foo.`c1`].
Right side columns:
[].;;
'Project [*]
+- 'Filter c1#6 IN (list#5 [])
   :  +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1]
   :     +- Project [cast(col1#7 as int) AS c1#9]
   :        +- LocalRelation [col1#7]
   +- SubqueryAlias `default`.`foo`
      +- HiveTableRelation `default`.`foo`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#6]
```

For both the cases above, we should reject the syntax at parser level.

In this PR, we create two top-level parser rules to parse `SELECT` and `INSERT` respectively.
I will create a small PR to allow CTEs in DESCRIBE QUERY after this PR is in.
## How was this patch tested?
Added tests to PlanParserSuite and removed the semantic check tests from SparkSqlParserSuites.

Closes #24150 from dilipbiswal/split-query-insert.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      | 23 ++++--
 .../sql/catalyst/parser/AstBuilder.scala      | 77 ++++++++++++++-----
 .../sql/catalyst/parser/PlanParserSuite.scala | 49 +++++++++++-
 .../spark/sql/execution/SparkSqlParser.scala  | 17 ----
 .../sql/execution/SparkSqlParserSuite.scala   | 22 ------
 5 files changed, 124 insertions(+), 64 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 1f7da190ce46..78dc60c02758 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -81,6 +81,8 @@ singleTableSchema
 
 statement
     : query                                                            #statementDefault
+    | insertStatement                                                  #insertStatementDefault
+    | multiSelectStatement                                             #multiSelectStatementDefault
     | USE db=identifier                                                #use
     | CREATE database (IF NOT EXISTS)? identifier
         (COMMENT comment=STRING)? locationSpec?
@@ -358,9 +360,14 @@ resource
     : identifier STRING
     ;
 
+insertStatement
+    : (ctes)? insertInto queryTerm queryOrganization                                       #singleInsertQuery
+    | (ctes)? fromClause multiInsertQueryBody+                                             #multiInsertQuery
+    ;
+
 queryNoWith
-    : insertInto? queryTerm queryOrganization                                              #singleInsertQuery
-    | fromClause multiInsertQueryBody+                                                     #multiInsertQuery
+    : queryTerm queryOrganization                                               #noWithQuery
+    | fromClause selectStatement                                                #queryWithFrom
     ;
 
 queryOrganization
@@ -373,9 +380,15 @@ queryOrganization
     ;
 
 multiInsertQueryBody
-    : insertInto?
-      querySpecification
-      queryOrganization
+    : insertInto selectStatement
+    ;
+
+multiSelectStatement
+    : (ctes)? fromClause selectStatement+                                                #multiSelect
+    ;
+
+selectStatement
+    : querySpecification queryOrganization
     ;
 
 queryTerm
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 52a5d2c66c5b..7164ad26029c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -112,21 +112,36 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val query = plan(ctx.queryNoWith)
 
     // Apply CTEs
-    query.optional(ctx.ctes) {
-      val ctes = ctx.ctes.namedQuery.asScala.map { nCtx =>
-        val namedQuery = visitNamedQuery(nCtx)
-        (namedQuery.alias, namedQuery)
-      }
-      // Check for duplicate names.
-      checkDuplicateKeys(ctes, ctx)
-      With(query, ctes)
+    query.optionalMap(ctx.ctes)(withCTE)
+  }
+
+  private def withCTE(ctx: CtesContext, plan: LogicalPlan): LogicalPlan = {
+    val ctes = ctx.namedQuery.asScala.map { nCtx =>
+      val namedQuery = visitNamedQuery(nCtx)
+      (namedQuery.alias, namedQuery)
     }
+    // Check for duplicate names.
+    checkDuplicateKeys(ctes, ctx)
+    With(plan, ctes)
   }
 
   override def visitQueryToDesc(ctx: QueryToDescContext): LogicalPlan = withOrigin(ctx) {
     plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
   }
 
+  override def visitQueryWithFrom(ctx: QueryWithFromContext): LogicalPlan = withOrigin(ctx) {
+    val from = visitFromClause(ctx.fromClause)
+    validate(ctx.selectStatement.querySpecification.fromClause == null,
+      "Individual select statement can not have FROM cause as its already specified in the" +
+        " outer query block", ctx)
+    withQuerySpecification(ctx.selectStatement.querySpecification, from).
+      optionalMap(ctx.selectStatement.queryOrganization)(withQueryResultClauses)
+  }
+
+  override def visitNoWithQuery(ctx: NoWithQueryContext): LogicalPlan = withOrigin(ctx) {
+    plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
+  }
+
   /**
    * Create a named logical plan.
    *
@@ -156,24 +171,49 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val from = visitFromClause(ctx.fromClause)
 
     // Build the insert clauses.
-    val inserts = ctx.multiInsertQueryBody.asScala.map {
+    val inserts = ctx.multiInsertQueryBody().asScala.map {
       body =>
-        validate(body.querySpecification.fromClause == null,
+        validate(body.selectStatement.querySpecification.fromClause == null,
           "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements",
           body)
 
+       withInsertInto(body.insertInto,
+         withQuerySpecification(body.selectStatement.querySpecification, from).
+           // Add organization statements.
+           optionalMap(body.selectStatement.queryOrganization)(withQueryResultClauses))
+    }
+
+    // If there are multiple INSERTS just UNION them together into one query.
+    val insertPlan = inserts match {
+      case Seq(query) => query
+      case queries => Union(queries)
+    }
+    // Apply CTEs
+    insertPlan.optionalMap(ctx.ctes)(withCTE)
+  }
+
+  override def visitMultiSelect(ctx: MultiSelectContext): LogicalPlan = withOrigin(ctx) {
+    val from = visitFromClause(ctx.fromClause)
+
+    // Build the insert clauses.
+    val selects = ctx.selectStatement.asScala.map {
+      body =>
+        validate(body.querySpecification.fromClause == null,
+          "Multi-select queries cannot have a FROM clause in their individual SELECT statements",
+          body)
+
         withQuerySpecification(body.querySpecification, from).
           // Add organization statements.
-          optionalMap(body.queryOrganization)(withQueryResultClauses).
-          // Add insert.
-          optionalMap(body.insertInto())(withInsertInto)
+          optionalMap(body.queryOrganization)(withQueryResultClauses)
     }
 
     // If there are multiple INSERTS just UNION them together into one query.
-    inserts match {
+    val selectUnionPlan = selects match {
       case Seq(query) => query
       case queries => Union(queries)
     }
+    // Apply CTEs
+    selectUnionPlan.optionalMap(ctx.ctes)(withCTE)
   }
 
   /**
@@ -181,11 +221,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitSingleInsertQuery(
       ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) {
-    plan(ctx.queryTerm).
-      // Add organization statements.
-      optionalMap(ctx.queryOrganization)(withQueryResultClauses).
-      // Add insert.
-      optionalMap(ctx.insertInto())(withInsertInto)
+    val insertPlan = withInsertInto(ctx.insertInto(),
+        plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses))
+    // Apply CTEs
+    insertPlan.optionalMap(ctx.ctes)(withCTE)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index d628150d3ecf..9208d9358016 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -132,7 +132,11 @@ class PlanParserSuite extends AnalysisTest {
       table("a").select(star()).union(table("a").where('s < 10).select(star())))
     intercept(
       "from a select * select * from x where a.s < 10",
-      "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements")
+      "Multi-select queries cannot have a FROM clause in their individual SELECT statements")
+    intercept(
+      "from a select * from b",
+      "Individual select statement can not have FROM cause as its already specified in " +
+        "the outer query block")
     assertEqual(
       "from a insert into tbl1 select * insert into tbl2 select * where s < 10",
       table("a").select(star()).insertInto("tbl1").union(
@@ -754,4 +758,47 @@ class PlanParserSuite extends AnalysisTest {
       assertEqual(query2, Distinct(a.union(b)).except(c.intersect(d, isAll = true), isAll = true))
     }
   }
+
+  test("create/alter view as insert into table") {
+    val m1 = intercept[ParseException] {
+      parsePlan("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)")
+    }.getMessage
+    assert(m1.contains("mismatched input 'INSERT' expecting"))
+    // Multi insert query
+    val m2 = intercept[ParseException] {
+      parsePlan(
+        """
+          |CREATE VIEW testView AS FROM jt
+          |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
+          |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
+        """.stripMargin)
+    }.getMessage
+    assert(m2.contains("mismatched input 'INSERT' expecting"))
+    val m3 = intercept[ParseException] {
+      parsePlan("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)")
+    }.getMessage
+    assert(m3.contains("mismatched input 'INSERT' expecting"))
+    // Multi insert query
+    val m4 = intercept[ParseException] {
+      parsePlan(
+        """
+          |ALTER VIEW testView AS FROM jt
+          |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
+          |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
+        """.stripMargin
+      )
+    }.getMessage
+    assert(m4.contains("mismatched input 'INSERT' expecting"))
+  }
+
+  test("Invalid insert constructs in the query") {
+    val m1 = intercept[ParseException] {
+      parsePlan("SELECT * FROM (INSERT INTO BAR VALUES (2))")
+    }.getMessage
+    assert(m1.contains("mismatched input 'FROM' expecting"))
+    val m2 = intercept[ParseException] {
+      parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))")
+    }.getMessage
+    assert(m2.contains("mismatched input 'FROM' expecting"))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 735c0a5758d9..8c7b2cb50c0b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -1257,15 +1257,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     if (ctx.identifierList != null) {
       operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
     } else {
-      // CREATE VIEW ... AS INSERT INTO is not allowed.
-      ctx.query.queryNoWith match {
-        case s: SingleInsertQueryContext if s.insertInto != null =>
-          operationNotAllowed("CREATE VIEW ... AS INSERT INTO", ctx)
-        case _: MultiInsertQueryContext =>
-          operationNotAllowed("CREATE VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
-        case _ => // OK
-      }
-
       val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
         icl.identifierComment.asScala.map { ic =>
           ic.identifier.getText -> Option(ic.STRING).map(string)
@@ -1302,14 +1293,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * }}}
    */
   override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
-    // ALTER VIEW ... AS INSERT INTO is not allowed.
-    ctx.query.queryNoWith match {
-      case s: SingleInsertQueryContext if s.insertInto != null =>
-        operationNotAllowed("ALTER VIEW ... AS INSERT INTO", ctx)
-      case _: MultiInsertQueryContext =>
-        operationNotAllowed("ALTER VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
-      case _ => // OK
-    }
     AlterViewAsCommand(
       name = visitTableIdentifier(ctx.tableIdentifier),
       originalText = source(ctx.query),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index be3d0794d403..fd60779b49fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -215,17 +215,6 @@ class SparkSqlParserSuite extends AnalysisTest {
       "no viable alternative at input")
   }
 
-  test("create view as insert into table") {
-    // Single insert query
-    intercept("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)",
-      "Operation not allowed: CREATE VIEW ... AS INSERT INTO")
-
-    // Multi insert query
-    intercept("CREATE VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
-      "INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
-      "Operation not allowed: CREATE VIEW ... AS FROM ... [INSERT INTO ...]+")
-  }
-
   test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
     assertEqual("describe t",
       DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = false))
@@ -369,17 +358,6 @@ class SparkSqlParserSuite extends AnalysisTest {
       Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t"))))
   }
 
-  test("SPARK-25046 Fix Alter View ... As Insert Into Table") {
-    // Single insert query
-    intercept("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)",
-      "Operation not allowed: ALTER VIEW ... AS INSERT INTO")
-
-    // Multi insert query
-    intercept("ALTER VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
-      "INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
-      "Operation not allowed: ALTER VIEW ... AS FROM ... [INSERT INTO ...]+")
-  }
-
   test("database and schema tokens are interchangeable") {
     assertEqual("CREATE DATABASE foo", parser.parsePlan("CREATE SCHEMA foo"))
     assertEqual("DROP DATABASE foo", parser.parsePlan("DROP SCHEMA foo"))

From 90b72512f46ac08008d574577c65263d9b13a33c Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Tue, 26 Mar 2019 13:08:40 +0900
Subject: [PATCH 0777/1072] [SPARK-26288][CORE][FOLLOW-UP][DOC] Fix broken tag
 in the doc.

## What changes were proposed in this pull request?

This pr is a follow-up of #23393.
The HTML in the doc is broken so fixing the broken `code` tag.

## How was this patch tested?

Existing tests.

Closes #24216 from ueshin/issues/SPARK-26288/fix_doc.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/spark-standalone.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 3400da4d3f33..ef99f05828af 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -262,7 +262,7 @@ SPARK_WORKER_OPTS supports the following system properties:
   </td>
 </tr>
 <tr>
-  <td><spark.shuffle.service.db.enabled</code></td>
+  <td><code>spark.shuffle.service.db.enabled</code></td>
   <td>true</td>
   <td>
     Store External Shuffle service state on local disk so that when the external shuffle service is restarted, it will

From 529a06116886ad655f44aa6205e60f3500ddd084 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Tue, 26 Mar 2019 20:56:00 +0900
Subject: [PATCH 0778/1072] [SPARK-26103][SQL][FOLLOW-UP] Use
 string-interpolation to show the config key.

## What changes were proposed in this pull request?

This is a follow-up of #23169.
We should've used string-interpolation to show the config key in the warn message.

## How was this patch tested?

Existing tests.

Closes #24217 from ueshin/issues/SPARK-26103/s.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/catalyst/util/StringUtils.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index 6118d8c6fc28..6510bacf5589 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -144,7 +144,7 @@ object StringUtils extends Logging {
         logWarning(
           "Truncated the string representation of a plan since it was too long. The " +
             s"plan had length ${length} and the maximum is ${maxLength}. This behavior " +
-            "can be adjusted by setting '${SQLConf.MAX_PLAN_STRING_LENGTH.key}'.")
+            s"can be adjusted by setting '${SQLConf.MAX_PLAN_STRING_LENGTH.key}'.")
         val truncateMsg = if (maxLength == 0) {
           s"Truncated plan of $length characters"
         } else {

From 0e16a6f5b0e9405feb0b00bd61e548ec167cc8b3 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 26 Mar 2019 21:14:07 +0900
Subject: [PATCH 0779/1072] [SPARK-27277][INFRA] Recover from setting fix
 version failure in merge script

## What changes were proposed in this pull request?

I happened to meet this case few times before:

```
Enter comma-separated fix version(s) [3.0.0]: 3.0,0
Restoring head pointer to master
git checkout master
Already on 'master'
git branch
Traceback (most recent call last):
  File "./dev/merge_spark_pr_jira.py", line 537, in <module>
    main()
  File "./dev/merge_spark_pr_jira.py", line 523, in main
    resolve_jira_issues(title, merged_refs, jira_comment)
  File "./dev/merge_spark_pr_jira.py", line 359, in resolve_jira_issues
    resolve_jira_issue(merge_branches, comment, jira_id)
  File "./dev/merge_spark_pr_jira.py", line 302, in resolve_jira_issue
    jira_fix_versions = map(lambda v: get_version_json(v), fix_versions)
  File "./dev/merge_spark_pr_jira.py", line 302, in <lambda>
    jira_fix_versions = map(lambda v: get_version_json(v), fix_versions)
  File "./dev/merge_spark_pr_jira.py", line 300, in get_version_json
    return filter(lambda v: v.name == version_str, versions)[0].raw
IndexError: list index out of range
```

I typed the fix version wrongly (there's comma in `3.0,0`) and it ended the loop in the merge script. Not a big deal but it bugged me few times. Finally I met this today again, and decided to fix.

This PR proposes to recover from wrongly set fix versions.

## How was this patch tested?

I manually copied and pasted the specific codes and tested separately in both Python 2 and Python 3.

**Positive cases:**

```
Enter comma-separated fix version(s) [3.0.0]:  # blank test (to use default)
['3.0.0']
```

```
Enter comma-separated fix version(s) [3.0.0,2.4.2]:  # multiple default versions
['3.0.0', '2.4.2']
```

```
Enter comma-separated fix version(s) [3.0.0]: 2.4.1  # valid version
['2.4.1']
```

```
Enter comma-separated fix version(s) [3.0.0]: 3.0.0,2.4.2  # multiple valid versions
['3.0.0', '2.4.2']
```

**Keyboard interrupt(Ctrl + c):**

```
Enter comma-separated fix version(s) [3.0.0]: ^CTraceback (most recent call last):  # keyboard interrupt
  File "test_merge_script.py", line 45, in <module>
    test()
  File "test_merge_script.py", line 26, in test
    fix_versions = input("Enter comma-separated fix version(s) [%s]: " % default_fix_versions)
KeyboardInterrupt
```

**Wrongly typed versions (recovered):**

```
Enter comma-separated fix version(s) [3.0.0]: 3.1
Specified version(s) [3.1] not found in the available versions, try again (or leave blank and fix manually).
Enter comma-separated fix version(s) [3.0.0]: 123
Specified version(s) [123] not found in the available versions, try again (or leave blank and fix manually).
Enter comma-separated fix version(s) [3.0.0]: 3.0,0
Specified version(s) [3.0, 0] not found in the available versions, try again (or leave blank and fix manually).
Enter comma-separated fix version(s) [3.0.0]: damn
Specified version(s) [damn] not found in the available versions, try again (or leave blank and fix manually).
Enter comma-separated fix version(s) [3.0.0]: 3.0.0,2.5.2  # one invalid versions in multiple versions
Specified version(s) [3.0.0, 2.5.2] not found in the available versions, try again (or leave blank and fix manually).
```

**Arbitrary exceptions in fix version parsing (recovered)**

```
Enter comma-separated fix version(s) [3.0.0]:
Traceback (most recent call last):
  File "tmp.py", line 11, in <module>
    raise Exception("arbitrary exception")
Exception: arbitrary exception
Error setting fix version(s), try again (or leave blank and fix manually)
Enter comma-separated fix version(s) [3.0.0]:
Traceback (most recent call last):
  File "tmp.py", line 10, in <module>
    raise Exception("arbitrary exception")
Exception: arbitrary exception
Error setting fix version(s), try again (or leave blank and fix manually)
Enter comma-separated fix version(s) [3.0.0]:
```

Closes #24213 from HyukjinKwon/merge_script_fix_version.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/merge_spark_pr.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index cca6f405e89a..593e34983aa0 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -289,10 +289,24 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
                 default_fix_versions = filter(lambda x: x != v, default_fix_versions)
     default_fix_versions = ",".join(default_fix_versions)
 
-    fix_versions = input("Enter comma-separated fix version(s) [%s]: " % default_fix_versions)
-    if fix_versions == "":
-        fix_versions = default_fix_versions
-    fix_versions = fix_versions.replace(" ", "").split(",")
+    available_versions = set(map(lambda v: v.name, versions))
+    while True:
+        try:
+            fix_versions = input(
+                "Enter comma-separated fix version(s) [%s]: " % default_fix_versions)
+            if fix_versions == "":
+                fix_versions = default_fix_versions
+            fix_versions = fix_versions.replace(" ", "").split(",")
+            if set(fix_versions).issubset(available_versions):
+                break
+            else:
+                print("Specified version(s) [%s] not found in the available versions, try "
+                      "again (or leave blank and fix manually)." % (", ".join(fix_versions)))
+        except KeyboardInterrupt:
+            raise
+        except:
+            traceback.print_exc()
+            print("Error setting fix version(s), try again (or leave blank and fix manually)")
 
     def get_version_json(version_str):
         return filter(lambda v: v.name == version_str, versions)[0].raw

From 887279cc46a99f7e0c268199cb17faa71d011ef5 Mon Sep 17 00:00:00 2001
From: Ilya Matiach <ilmat@microsoft.com>
Date: Tue, 26 Mar 2019 09:06:04 -0500
Subject: [PATCH 0780/1072] [SPARK-24102][ML][MLLIB][PYSPARK][FOLLOWUP] Added
 weight column to pyspark API for regression evaluator and metrics

## What changes were proposed in this pull request?
Followup to PR https://github.com/apache/spark/pull/17085
This PR adds the weight column to the pyspark side, which was already added to the scala API.
The PR also undoes a name change in the scala side corresponding to a change in another similar PR as noted here:
https://github.com/apache/spark/pull/17084#discussion_r259648639

## How was this patch tested?

This patch adds python tests for the changes to the pyspark API.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24197 from imatiach-msft/ilmat/regressor-eval-python.

Authored-by: Ilya Matiach <ilmat@microsoft.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../mllib/evaluation/RegressionMetrics.scala  | 19 +++++++++++------
 python/pyspark/ml/evaluation.py               | 21 ++++++++++++-------
 python/pyspark/mllib/evaluation.py            | 16 ++++++++++----
 3 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 525047973ad5..3cf858471b98 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -22,19 +22,19 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, Row}
 
 /**
  * Evaluator for regression.
  *
- * @param predAndObsWithOptWeight an RDD of either (prediction, observation, weight)
+ * @param predictionAndObservations an RDD of either (prediction, observation, weight)
  *                                                    or (prediction, observation) pairs
  * @param throughOrigin True if the regression is through the origin. For example, in linear
  *                      regression, it will be true without fitting intercept.
  */
 @Since("1.2.0")
 class RegressionMetrics @Since("2.0.0") (
-    predAndObsWithOptWeight: RDD[_ <: Product], throughOrigin: Boolean)
+    predictionAndObservations: RDD[_ <: Product], throughOrigin: Boolean)
     extends Logging {
 
   @Since("1.2.0")
@@ -47,13 +47,20 @@ class RegressionMetrics @Since("2.0.0") (
    *                                  prediction and observation
    */
   private[mllib] def this(predictionAndObservations: DataFrame) =
-    this(predictionAndObservations.rdd.map(r => (r.getDouble(0), r.getDouble(1))))
+    this(predictionAndObservations.rdd.map {
+      case Row(prediction: Double, label: Double, weight: Double) =>
+        (prediction, label, weight)
+      case Row(prediction: Double, label: Double) =>
+        (prediction, label, 1.0)
+      case other =>
+        throw new IllegalArgumentException(s"Expected Row of tuples, got $other")
+    })
 
   /**
    * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors.
    */
   private lazy val summary: MultivariateStatisticalSummary = {
-    val summary: MultivariateStatisticalSummary = predAndObsWithOptWeight.map {
+    val summary: MultivariateStatisticalSummary = predictionAndObservations.map {
       case (prediction: Double, observation: Double, weight: Double) =>
         (Vectors.dense(observation, observation - prediction), weight)
       case (prediction: Double, observation: Double) =>
@@ -70,7 +77,7 @@ class RegressionMetrics @Since("2.0.0") (
   private lazy val SStot = summary.variance(0) * (summary.weightSum - 1)
   private lazy val SSreg = {
     val yMean = summary.mean(0)
-    predAndObsWithOptWeight.map {
+    predictionAndObservations.map {
       case (prediction: Double, _: Double, weight: Double) =>
         math.pow(prediction - yMean, 2) * weight
       case (prediction: Double, _: Double) => math.pow(prediction - yMean, 2)
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index 0f70860ceaf0..8aca74d3f4d3 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -190,13 +190,13 @@ def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
 
 
 @inherit_doc
-class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
+class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol,
                           JavaMLReadable, JavaMLWritable):
     """
     .. note:: Experimental
 
-    Evaluator for Regression, which expects two input
-    columns: prediction and label.
+    Evaluator for Regression, which expects input columns prediction, label
+    and an optional weight column.
 
     >>> scoreAndLabels = [(-28.98343821, -27.0), (20.21491975, 21.5),
     ...   (-25.98418959, -22.0), (30.69731842, 33.0), (74.69283752, 71.0)]
@@ -214,6 +214,13 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
     >>> evaluator2 = RegressionEvaluator.load(re_path)
     >>> str(evaluator2.getPredictionCol())
     'raw'
+    >>> scoreAndLabelsAndWeight = [(-28.98343821, -27.0, 1.0), (20.21491975, 21.5, 0.8),
+    ...   (-25.98418959, -22.0, 1.0), (30.69731842, 33.0, 0.6), (74.69283752, 71.0, 0.2)]
+    >>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["raw", "label", "weight"])
+    ...
+    >>> evaluator = RegressionEvaluator(predictionCol="raw", weightCol="weight")
+    >>> evaluator.evaluate(dataset)
+    2.740...
 
     .. versionadded:: 1.4.0
     """
@@ -227,10 +234,10 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
 
     @keyword_only
     def __init__(self, predictionCol="prediction", labelCol="label",
-                 metricName="rmse"):
+                 metricName="rmse", weightCol=None):
         """
         __init__(self, predictionCol="prediction", labelCol="label", \
-                 metricName="rmse")
+                 metricName="rmse", weightCol=None)
         """
         super(RegressionEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -256,10 +263,10 @@ def getMetricName(self):
     @keyword_only
     @since("1.4.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
-                  metricName="rmse"):
+                  metricName="rmse", weightCol=None):
         """
         setParams(self, predictionCol="prediction", labelCol="label", \
-                  metricName="rmse")
+                  metricName="rmse", weightCol=None)
         Sets params for regression evaluator.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 171c62ce97f9..30032b31982b 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -95,8 +95,7 @@ class RegressionMetrics(JavaModelWrapper):
     """
     Evaluator for regression.
 
-    :param predictionAndObservations: an RDD of (prediction,
-                                      observation) pairs.
+    :param predictionAndObservations: an RDD of prediction, observation and optional weight.
 
     >>> predictionAndObservations = sc.parallelize([
     ...     (2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)])
@@ -111,6 +110,11 @@ class RegressionMetrics(JavaModelWrapper):
     0.61...
     >>> metrics.r2
     0.94...
+    >>> predictionAndObservationsWithOptWeight = sc.parallelize([
+    ...     (2.5, 3.0, 0.5), (0.0, -0.5, 1.0), (2.0, 2.0, 0.3), (8.0, 7.0, 0.9)])
+    >>> metrics = RegressionMetrics(predictionAndObservationsWithOptWeight)
+    >>> metrics.rootMeanSquaredError
+    0.68...
 
     .. versionadded:: 1.4.0
     """
@@ -118,9 +122,13 @@ class RegressionMetrics(JavaModelWrapper):
     def __init__(self, predictionAndObservations):
         sc = predictionAndObservations.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
-        df = sql_ctx.createDataFrame(predictionAndObservations, schema=StructType([
+        numCol = len(predictionAndObservations.first())
+        schema = StructType([
             StructField("prediction", DoubleType(), nullable=False),
-            StructField("observation", DoubleType(), nullable=False)]))
+            StructField("observation", DoubleType(), nullable=False)])
+        if numCol == 3:
+            schema.add("weight", DoubleType(), False)
+        df = sql_ctx.createDataFrame(predictionAndObservations, schema=schema)
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.RegressionMetrics
         java_model = java_class(df._jdf)
         super(RegressionMetrics, self).__init__(java_model)

From 05168e725d2a17c4164ee5f9aa068801ec2454f4 Mon Sep 17 00:00:00 2001
From: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Date: Tue, 26 Mar 2019 11:50:23 -0700
Subject: [PATCH 0781/1072] [SPARK-24793][K8S] Enhance spark-submit for app
 management

- supports `--kill` & `--status` flags.
- supports globs which is useful in general check this long standing [issue](https://github.com/kubernetes/kubernetes/issues/17144#issuecomment-272052461) for kubectl.

Manually against running apps. Example output:

Submission Id reported at launch time:

```
2019-01-20 23:47:56 INFO  Client:58 - Waiting for application spark-pi with submissionId spark:spark-pi-1548020873671-driver to finish...
```

Killing the app:

```
./bin/spark-submit --kill spark:spark-pi-1548020873671-driver --master  k8s://https://192.168.2.8:8443
2019-01-20 23:48:07 WARN  Utils:70 - Your hostname, universe resolves to a loopback address: 127.0.0.1; using 192.168.2.8 instead (on interface wlp2s0)
2019-01-20 23:48:07 WARN  Utils:70 - Set SPARK_LOCAL_IP if you need to bind to another address

```

App terminates with 143 (SIGTERM, since we have tiny this should lead to [graceful shutdown](https://cloud.google.com/solutions/best-practices-for-building-containers)):

```
2019-01-20 23:48:08 INFO  LoggingPodStatusWatcherImpl:58 - State changed, new state:
	 pod name: spark-pi-1548020873671-driver
	 namespace: spark
	 labels: spark-app-selector -> spark-e4730c80e1014b72aa77915a2203ae05, spark-role -> driver
	 pod uid: 0ba9a794-1cfd-11e9-8215-a434d9270a65
	 creation time: 2019-01-20T21:47:55Z
	 service account name: spark-sa
	 volumes: spark-local-dir-1, spark-conf-volume, spark-sa-token-b7wcm
	 node name: minikube
	 start time: 2019-01-20T21:47:55Z
	 phase: Running
	 container status:
		 container name: spark-kubernetes-driver
		 container image: skonto/spark:k8s-3.0.0
		 container state: running
		 container started at: 2019-01-20T21:48:00Z
2019-01-20 23:48:09 INFO  LoggingPodStatusWatcherImpl:58 - State changed, new state:
	 pod name: spark-pi-1548020873671-driver
	 namespace: spark
	 labels: spark-app-selector -> spark-e4730c80e1014b72aa77915a2203ae05, spark-role -> driver
	 pod uid: 0ba9a794-1cfd-11e9-8215-a434d9270a65
	 creation time: 2019-01-20T21:47:55Z
	 service account name: spark-sa
	 volumes: spark-local-dir-1, spark-conf-volume, spark-sa-token-b7wcm
	 node name: minikube
	 start time: 2019-01-20T21:47:55Z
	 phase: Failed
	 container status:
		 container name: spark-kubernetes-driver
		 container image: skonto/spark:k8s-3.0.0
		 container state: terminated
		 container started at: 2019-01-20T21:48:00Z
		 container finished at: 2019-01-20T21:48:08Z
		 exit code: 143
		 termination reason: Error
2019-01-20 23:48:09 INFO  LoggingPodStatusWatcherImpl:58 - Container final statuses:
	 container name: spark-kubernetes-driver
	 container image: skonto/spark:k8s-3.0.0
	 container state: terminated
	 container started at: 2019-01-20T21:48:00Z
	 container finished at: 2019-01-20T21:48:08Z
	 exit code: 143
	 termination reason: Error
2019-01-20 23:48:09 INFO  Client:58 - Application spark-pi with submissionId spark:spark-pi-1548020873671-driver finished.
2019-01-20 23:48:09 INFO  ShutdownHookManager:58 - Shutdown hook called
2019-01-20 23:48:09 INFO  ShutdownHookManager:58 - Deleting directory /tmp/spark-f114b2e0-5605-4083-9203-a4b1c1f6059e

```

Glob scenario:

```
./bin/spark-submit --status spark:spark-pi* --master  k8s://https://192.168.2.8:8443
2019-01-20 22:27:44 WARN  Utils:70 - Your hostname, universe resolves to a loopback address: 127.0.0.1; using 192.168.2.8 instead (on interface wlp2s0)
2019-01-20 22:27:44 WARN  Utils:70 - Set SPARK_LOCAL_IP if you need to bind to another address
Application status (driver):
	 pod name: spark-pi-1547948600328-driver
	 namespace: spark
	 labels: spark-app-selector -> spark-f13f01702f0b4503975ce98252d59b94, spark-role -> driver
	 pod uid: c576e1c6-1c54-11e9-8215-a434d9270a65
	 creation time: 2019-01-20T01:43:22Z
	 service account name: spark-sa
	 volumes: spark-local-dir-1, spark-conf-volume, spark-sa-token-b7wcm
	 node name: minikube
	 start time: 2019-01-20T01:43:22Z
	 phase: Running
	 container status:
		 container name: spark-kubernetes-driver
		 container image: skonto/spark:k8s-3.0.0
		 container state: running
		 container started at: 2019-01-20T01:43:27Z
Application status (driver):
	 pod name: spark-pi-1547948792539-driver
	 namespace: spark
	 labels: spark-app-selector -> spark-006d252db9b24f25b5069df357c30264, spark-role -> driver
	 pod uid: 38375b4b-1c55-11e9-8215-a434d9270a65
	 creation time: 2019-01-20T01:46:35Z
	 service account name: spark-sa
	 volumes: spark-local-dir-1, spark-conf-volume, spark-sa-token-b7wcm
	 node name: minikube
	 start time: 2019-01-20T01:46:35Z
	 phase: Succeeded
	 container status:
		 container name: spark-kubernetes-driver
		 container image: skonto/spark:k8s-3.0.0
		 container state: terminated
		 container started at: 2019-01-20T01:46:39Z
		 container finished at: 2019-01-20T01:46:56Z
		 exit code: 0
		 termination reason: Completed

```

Closes #23599 from skonto/submit_ops_extension.

Authored-by: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala |  61 +++++-
 .../spark/deploy/SparkSubmitArguments.scala   |  17 +-
 .../deploy/rest/RestSubmissionClient.scala    |   9 +-
 .../apache/spark/util/CommandLineUtils.scala  |   8 +-
 .../spark/deploy/SparkSubmitSuite.scala       |  17 ++
 docs/running-on-kubernetes.md                 |  67 +++++--
 resource-managers/kubernetes/core/pom.xml     |   6 +
 ...g.apache.spark.deploy.SparkSubmitOperation |   1 +
 .../org/apache/spark/deploy/k8s/Config.scala  |   7 +
 .../deploy/k8s/submit/K8sSubmitOps.scala      | 188 ++++++++++++++++++
 .../submit/KubernetesClientApplication.scala  |   9 +-
 .../deploy/k8s/submit/K8sSubmitOpSuite.scala  | 156 +++++++++++++++
 12 files changed, 504 insertions(+), 42 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation
 create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOps.scala
 create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOpSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index b4d7462f7936..9efaaa765b5d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -22,9 +22,10 @@ import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowab
 import java.net.{URI, URL}
 import java.security.PrivilegedExceptionAction
 import java.text.ParseException
-import java.util.UUID
+import java.util.{ServiceLoader, UUID}
 
 import scala.annotation.tailrec
+import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 import scala.util.{Properties, Try}
 
@@ -96,20 +97,35 @@ private[spark] class SparkSubmit extends Logging {
   }
 
   /**
-   * Kill an existing submission using the REST protocol. Standalone and Mesos cluster mode only.
+   * Kill an existing submission.
    */
   private def kill(args: SparkSubmitArguments): Unit = {
-    new RestSubmissionClient(args.master)
-      .killSubmission(args.submissionToKill)
+    if (RestSubmissionClient.supportsRestClient(args.master)) {
+      new RestSubmissionClient(args.master)
+        .killSubmission(args.submissionToKill)
+    } else {
+      val sparkConf = args.toSparkConf()
+      sparkConf.set("spark.master", args.master)
+      SparkSubmitUtils
+        .getSubmitOperations(args.master)
+        .kill(args.submissionToKill, sparkConf)
+    }
   }
 
   /**
-   * Request the status of an existing submission using the REST protocol.
-   * Standalone and Mesos cluster mode only.
+   * Request the status of an existing submission.
    */
   private def requestStatus(args: SparkSubmitArguments): Unit = {
-    new RestSubmissionClient(args.master)
-      .requestSubmissionStatus(args.submissionToRequestStatusFor)
+    if (RestSubmissionClient.supportsRestClient(args.master)) {
+      new RestSubmissionClient(args.master)
+        .requestSubmissionStatus(args.submissionToRequestStatusFor)
+    } else {
+      val sparkConf = args.toSparkConf()
+      sparkConf.set("spark.master", args.master)
+      SparkSubmitUtils
+        .getSubmitOperations(args.master)
+        .printSubmissionStatus(args.submissionToRequestStatusFor, sparkConf)
+    }
   }
 
   /** Print version information to the log. */
@@ -320,7 +336,8 @@ private[spark] class SparkSubmit extends Logging {
       }
     }
 
-    args.sparkProperties.foreach { case (k, v) => sparkConf.set(k, v) }
+    // update spark config from args
+    args.toSparkConf(Option(sparkConf))
     val hadoopConf = conf.getOrElse(SparkHadoopUtil.newConfiguration(sparkConf))
     val targetDir = Utils.createTempDir()
 
@@ -1336,6 +1353,23 @@ private[spark] object SparkSubmitUtils {
     }
   }
 
+  private[deploy] def getSubmitOperations(master: String): SparkSubmitOperation = {
+    val loader = Utils.getContextOrSparkClassLoader
+    val serviceLoaders =
+      ServiceLoader.load(classOf[SparkSubmitOperation], loader)
+        .asScala
+        .filter(_.supports(master))
+
+    serviceLoaders.size match {
+      case x if x > 1 =>
+        throw new SparkException(s"Multiple($x) external SparkSubmitOperations " +
+          s"clients registered for master url ${master}.")
+      case 1 => serviceLoaders.headOption.get
+      case _ =>
+        throw new IllegalArgumentException(s"No external SparkSubmitOperations " +
+          s"clients found for master url: '$master'")
+    }
+  }
 }
 
 /**
@@ -1348,3 +1382,12 @@ private case class OptionAssigner(
     deployMode: Int,
     clOption: String = null,
     confKey: String = null)
+
+private[spark] trait SparkSubmitOperation {
+
+  def kill(submissionId: String, conf: SparkConf): Unit
+
+  def printSubmissionStatus(submissionId: String, conf: SparkConf): Unit
+
+  def supports(master: String): Boolean
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index e7954d1ee165..ed1324baed0f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -29,7 +29,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.io.Source
 import scala.util.Try
 
-import org.apache.spark.{SparkException, SparkUserAppException}
+import org.apache.spark.{SparkConf, SparkException, SparkUserAppException}
 import org.apache.spark.deploy.SparkSubmitAction._
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.DYN_ALLOCATION_ENABLED
@@ -305,19 +305,12 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   }
 
   private def validateKillArguments(): Unit = {
-    if (!master.startsWith("spark://") && !master.startsWith("mesos://")) {
-      error("Killing submissions is only supported in standalone or Mesos mode!")
-    }
     if (submissionToKill == null) {
       error("Please specify a submission to kill.")
     }
   }
 
   private def validateStatusRequestArguments(): Unit = {
-    if (!master.startsWith("spark://") && !master.startsWith("mesos://")) {
-      error(
-        "Requesting submission statuses is only supported in standalone or Mesos mode!")
-    }
     if (submissionToRequestStatusFor == null) {
       error("Please specify a submission to request status for.")
     }
@@ -574,6 +567,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |
         | Spark standalone or Mesos with cluster deploy mode only:
         |  --supervise                 If given, restarts the driver on failure.
+        |
+        | Spark standalone, Mesos or K8s with cluster deploy mode only:
         |  --kill SUBMISSION_ID        If given, kills the driver specified.
         |  --status SUBMISSION_ID      If given, requests the status of the driver specified.
         |
@@ -662,4 +657,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
 
   private def error(msg: String): Unit = throw new SparkException(msg)
 
+  private[deploy] def toSparkConf(sparkConf: Option[SparkConf] = None): SparkConf = {
+    // either use an existing config or create a new empty one
+    sparkProperties.foldLeft(sparkConf.getOrElse(new SparkConf())) {
+      case (conf, (k, v)) => conf.set(k, v)
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
index afa413fe165d..1648ba516d9b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
@@ -61,8 +61,6 @@ import org.apache.spark.util.Utils
 private[spark] class RestSubmissionClient(master: String) extends Logging {
   import RestSubmissionClient._
 
-  private val supportedMasterPrefixes = Seq("spark://", "mesos://")
-
   private val masters: Array[String] = if (master.startsWith("spark://")) {
     Utils.parseStandaloneMasterUrls(master)
   } else {
@@ -409,6 +407,8 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
 
 private[spark] object RestSubmissionClient {
 
+  val supportedMasterPrefixes = Seq("spark://", "mesos://")
+
   // SPARK_HOME and SPARK_CONF_DIR are filtered out because they are usually wrong
   // on the remote machine (SPARK-12345) (SPARK-25934)
   private val BLACKLISTED_SPARK_ENV_VARS = Set("SPARK_ENV_LOADED", "SPARK_HOME", "SPARK_CONF_DIR")
@@ -424,6 +424,10 @@ private[spark] object RestSubmissionClient {
       (k.startsWith("SPARK_") && !BLACKLISTED_SPARK_ENV_VARS.contains(k)) || k.startsWith("MESOS_")
     }
   }
+
+  private[spark] def supportsRestClient(master: String): Boolean = {
+    supportedMasterPrefixes.exists(master.startsWith)
+  }
 }
 
 private[spark] class RestSubmissionClientApp extends SparkApplication {
@@ -456,5 +460,4 @@ private[spark] class RestSubmissionClientApp extends SparkApplication {
     val env = RestSubmissionClient.filterSystemEnvironment(sys.env)
     run(appResource, mainClass, appArgs, conf, env)
   }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala b/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
index 4b6602b50aa1..add1146c9084 100644
--- a/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/CommandLineUtils.scala
@@ -25,8 +25,12 @@ import org.apache.spark.SparkException
  * Contains basic command line parsing functionality and methods to parse some common Spark CLI
  * options.
  */
-private[spark] trait CommandLineUtils {
+private[spark] trait CommandLineUtils extends CommandLineLoggingUtils {
 
+  def main(args: Array[String]): Unit
+}
+
+private[spark] trait CommandLineLoggingUtils {
   // Exposed for testing
   private[spark] var exitFn: Int => Unit = (exitCode: Int) => System.exit(exitCode)
 
@@ -41,6 +45,4 @@ private[spark] trait CommandLineUtils {
     printMessage("Run with --help for usage help or --verbose for debug output")
     exitFn(1)
   }
-
-  def main(args: Array[String]): Unit
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 3051d650750f..641751ff36e5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -1239,6 +1239,23 @@ class SparkSubmitSuite
 
     conf.get(nonDelimSpaceFromFile._1) should be ("blah")
   }
+
+  test("get a Spark configuration from arguments") {
+    val testConf = "spark.test.hello" -> "world"
+    val masterConf = "spark.master" -> "yarn"
+    val clArgs = Seq(
+      "--conf", s"${testConf._1}=${testConf._2}",
+      "--conf", s"${masterConf._1}=${masterConf._2}",
+      "--class", "Foo",
+      "app.jar")
+    val conf = new SparkSubmitArguments(clArgs).toSparkConf()
+     Seq(
+       testConf,
+       masterConf
+     ).foreach { case (k, v) =>
+       conf.get(k) should be (v)
+     }
+  }
 }
 
 object SparkSubmitSuite extends SparkFunSuite with TimeLimits {
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 40ff62cf43dc..7e26d77b055f 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -62,7 +62,7 @@ logs and remains in "completed" state in the Kubernetes API until it's eventuall
 
 Note that in the completed state, the driver pod does *not* use any computational or memory resources.
 
-The driver and executor pod scheduling is handled by Kubernetes. Communication to the Kubernetes API is done via fabric8, and we are 
+The driver and executor pod scheduling is handled by Kubernetes. Communication to the Kubernetes API is done via fabric8, and we are
 currently running <code>kubernetes-client</code> version <code>4.1.0</code>. Make sure that when you are making infrastructure additions that you are aware of said version. It is possible to schedule the
 driver and executor pods on a subset of available nodes through a [node selector](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector)
 using the configuration property for it. It will be possible to use more advanced
@@ -89,7 +89,7 @@ $ ./bin/docker-image-tool.sh -r <repo> -t my-tag push
 ```
 This will build using the projects provided default `Dockerfiles`. To see more options available for customising the behaviour of this tool, including providing custom `Dockerfiles`, please run with the `-h` flag.
 
-By default `bin/docker-image-tool.sh` builds docker image for running JVM jobs. You need to opt-in to build additional 
+By default `bin/docker-image-tool.sh` builds docker image for running JVM jobs. You need to opt-in to build additional
 language binding docker images.
 
 Example usage is
@@ -250,7 +250,7 @@ To mount a volume of any of the types above into the driver pod, use the followi
 --conf spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.path=<mount path>
 --conf spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.readOnly=<true|false>
 --conf spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.subPath=<mount subPath>
-``` 
+```
 
 Specifically, `VolumeType` can be one of the following values: `hostPath`, `emptyDir`, and `persistentVolumeClaim`. `VolumeName` is the name you want to use for the volume under the `volumes` field in the pod specification.
 
@@ -258,7 +258,7 @@ Each supported type of volumes may have some specific configuration options, whi
 
 ```
 spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].options.[OptionName]=<value>
-``` 
+```
 
 For example, the claim name of a `persistentVolumeClaim` with volume name `checkpointpvc` can be specified using the following property:
 
@@ -266,7 +266,7 @@ For example, the claim name of a `persistentVolumeClaim` with volume name `check
 spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.options.claimName=check-point-pvc-claim
 ```
 
-The configuration properties for mounting volumes into the executor pods use prefix `spark.kubernetes.executor.` instead of `spark.kubernetes.driver.`. For a complete list of available options for each supported type of volumes, please refer to the [Spark Properties](#spark-properties) section below. 
+The configuration properties for mounting volumes into the executor pods use prefix `spark.kubernetes.executor.` instead of `spark.kubernetes.driver.`. For a complete list of available options for each supported type of volumes, please refer to the [Spark Properties](#spark-properties) section below.
 
 ## Local Storage
 
@@ -403,6 +403,36 @@ RBAC authorization and how to configure Kubernetes service accounts for pods, pl
 [Using RBAC Authorization](https://kubernetes.io/docs/admin/authorization/rbac/) and
 [Configure Service Accounts for Pods](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/).
 
+## Spark Application Management
+
+Kubernetes provides simple application management via the spark-submit CLI tool in cluster mode.
+Users can kill a job by providing the submission ID that is printed when submitting their job.
+The submission ID follows the format ``namespace:driver-pod-name``.
+If user omits the namespace then the namespace set in current k8s context is used.
+For example if user has set a specific namespace as follows `kubectl config set-context minikube --namespace=spark`
+then the `spark` namespace will be used by default. On the other hand, if there is no namespace added to the specific context
+then all namespaces will be considered by default. That means operations will affect all Spark applications matching the given submission ID regardless of namespace.
+Moreover, spark-submit for application management uses the same backend code that is used for submitting the driver, so the same properties
+like `spark.kubernetes.context` etc., can be re-used.
+
+For example:
+```bash
+$ spark-submit --kill spark:spark-pi-1547948636094-driver --master k8s://https://192.168.2.8:8443
+```
+Users also can list the application status by using the `--status` flag:
+
+```bash
+$ spark-submit --status spark:spark-pi-1547948636094-driver --master  k8s://https://192.168.2.8:8443
+```
+Both operations support glob patterns. For example user can run:
+```bash
+$ spark-submit --kill spark:spark-pi* --master  k8s://https://192.168.2.8:8443
+```
+The above will kill all application with the specific prefix.
+
+User can specify the grace period for pod termination via the `spark.kubernetes.appKillPodDeletionGracePeriod` property,
+using `--conf` as means to provide it (default value for all K8s pods is <a href="https://kubernetes.io/docs/concepts/workloads/pods/pod">30 secs</a>).
+
 ## Future Work
 
 There are several Spark on Kubernetes features that are currently being worked on or planned to be worked on. Those features are expected to eventually make it into future versions of the spark-kubernetes integration.
@@ -411,7 +441,6 @@ Some of these include:
 
 * Dynamic Resource Allocation and External Shuffle Service
 * Local File Dependency Management
-* Spark Application Management
 * Job Queues and Resource Management
 
 # Configuration
@@ -426,10 +455,10 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.kubernetes.context</code></td>
   <td><code>(none)</code></td>
   <td>
-    The context from the user Kubernetes configuration file used for the initial 
+    The context from the user Kubernetes configuration file used for the initial
     auto-configuration of the Kubernetes client library.  When not specified then
-    the users current context is used.  <strong>NB:</strong> Many of the 
-    auto-configured settings can be overridden by the use of other Spark 
+    the users current context is used.  <strong>NB:</strong> Many of the
+    auto-configured settings can be overridden by the use of other Spark
     configuration properties e.g. <code>spark.kubernetes.namespace</code>.
   </td>
 </tr>
@@ -762,7 +791,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>
     Specify the cpu request for each executor pod. Values conform to the Kubernetes <a href="https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu">convention</a>.
     Example values include 0.1, 500m, 1.5, 5, etc., with the definition of cpu units documented in <a href="https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units">CPU units</a>.
-    This is distinct from <code>spark.executor.cores</code>: it is only used and takes precedence over <code>spark.executor.cores</code> for specifying the executor pod cpu request if set. Task 
+    This is distinct from <code>spark.executor.cores</code>: it is only used and takes precedence over <code>spark.executor.cores</code> for specifying the executor pod cpu request if set. Task
     parallelism, e.g., number of tasks an executor can run concurrently is not affected by this.
   </td>
 </tr>
@@ -900,14 +929,14 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>0.1</code></td>
   <td>
     This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, and various systems processes. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs.
-    This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This prempts this error with a higher default. 
+    This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This prempts this error with a higher default.
   </td>
 </tr>
 <tr>
   <td><code>spark.kubernetes.pyspark.pythonVersion</code></td>
   <td><code>"3"</code></td>
   <td>
-   This sets the major Python version of the docker image used to run the driver and executor containers. Can either be 2 or 3. 
+   This sets the major Python version of the docker image used to run the driver and executor containers. Can either be 2 or 3.
   </td>
 </tr>
 <tr>
@@ -931,7 +960,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.kubernetes.hadoop.configMapName</code></td>
   <td><code>(none)</code></td>
   <td>
-    Specify the name of the ConfigMap, containing the HADOOP_CONF_DIR files, to be mounted on the driver 
+    Specify the name of the ConfigMap, containing the HADOOP_CONF_DIR files, to be mounted on the driver
     and executors for custom Hadoop configuration.
   </td>
 </tr>
@@ -940,14 +969,14 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>(none)</code></td>
   <td>
     Specify the name of the secret where your existing delegation tokens are stored. This removes the need for the job user
-    to provide any kerberos credentials for launching a job. 
+    to provide any kerberos credentials for launching a job.
   </td>
 </tr>
 <tr>
   <td><code>spark.kubernetes.kerberos.tokenSecret.itemKey</code></td>
   <td><code>(none)</code></td>
   <td>
-    Specify the item key of the data where your existing delegation tokens are stored. This removes the need for the job user 
+    Specify the item key of the data where your existing delegation tokens are stored. This removes the need for the job user
     to provide any kerberos credentials for launching a job.
   </td>
 </tr>
@@ -1018,6 +1047,13 @@ See the [configuration page](configuration.html) for information on Spark config
     Request timeout in milliseconds for the kubernetes client in driver to use when requesting executors.
   </td>
 </tr>
+<tr>  
+  <td><code>spark.kubernetes.appKillPodDeletionGracePeriod</code></td>
+  <td>(none)</td>
+  <td>
+  Specify the grace period in seconds when deleting a Spark application using spark-submit.
+  </td>
+</tr>
 </table>
 
 #### Pod template properties
@@ -1155,7 +1191,6 @@ The following affect the driver and executor containers. All other containers in
     The cpu limits are set by <code>spark.kubernetes.{driver,executor}.limit.cores</code>. The cpu is set by
     <code>spark.{driver,executor}.cores</code>. The memory request and limit are set by summing the values of
     <code>spark.{driver,executor}.memory</code> and <code>spark.{driver,executor}.memoryOverhead</code>.
-
   </td>
 </tr>
 <tr>
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 23106cb7ec68..85fd613228e6 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -53,6 +53,12 @@
       <type>test-jar</type>
     </dependency>
 
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>io.fabric8</groupId>
       <artifactId>kubernetes-client</artifactId>
diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation
new file mode 100644
index 000000000000..d589e6b60f84
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation
@@ -0,0 +1 @@
+org.apache.spark.deploy.k8s.submit.K8SSparkSubmitOperation
\ No newline at end of file
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 83b5a758f0f5..b33b125f5dd2 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -325,6 +325,13 @@ private[spark] object Config extends Logging {
       .booleanConf
       .createWithDefault(true)
 
+  val KUBERNETES_SUBMIT_GRACE_PERIOD =
+    ConfigBuilder("spark.kubernetes.appKillPodDeletionGracePeriod")
+      .doc("Time to wait for graceful deletion of Spark pods when spark-submit" +
+        " is used for killing an application.")
+      .timeConf(TimeUnit.SECONDS)
+      .createOptional
+
   val KUBERNETES_DRIVER_LABEL_PREFIX = "spark.kubernetes.driver.label."
   val KUBERNETES_DRIVER_ANNOTATION_PREFIX = "spark.kubernetes.driver.annotation."
   val KUBERNETES_DRIVER_SECRETS_PREFIX = "spark.kubernetes.driver.secrets."
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOps.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOps.scala
new file mode 100644
index 000000000000..d45c06772c76
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOps.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.submit
+
+import scala.collection.JavaConverters._
+
+import K8SSparkSubmitOperation.getGracePeriod
+import io.fabric8.kubernetes.api.model.{DoneablePod, Pod, PodList}
+import io.fabric8.kubernetes.client.KubernetesClient
+import io.fabric8.kubernetes.client.dsl.{NonNamespaceOperation, PodResource}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkSubmitOperation
+import org.apache.spark.deploy.k8s.{KubernetesUtils, SparkKubernetesClientFactory}
+import org.apache.spark.deploy.k8s.Config.{KUBERNETES_AUTH_SUBMISSION_CONF_PREFIX, KUBERNETES_SUBMIT_GRACE_PERIOD}
+import org.apache.spark.deploy.k8s.Constants.{SPARK_POD_DRIVER_ROLE, SPARK_ROLE_LABEL}
+import org.apache.spark.deploy.k8s.KubernetesUtils.formatPodState
+import org.apache.spark.util.{CommandLineLoggingUtils, Utils}
+
+private sealed trait K8sSubmitOp extends CommandLineLoggingUtils {
+  type NON_NAMESPACED_PODS =
+    NonNamespaceOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]]
+  def executeOnPod(pName: String, namespace: Option[String], sparkConf: SparkConf)
+      (implicit client: KubernetesClient): Unit
+  def executeOnGlob(pods: List[Pod], ns: Option[String], sparkConf: SparkConf)
+      (implicit client: KubernetesClient): Unit
+  def listPodsInNameSpace(namespace: Option[String])
+      (implicit client: KubernetesClient): NON_NAMESPACED_PODS = {
+    namespace match {
+      case Some(ns) => client.pods.inNamespace(ns)
+      case None => client.pods
+    }
+  }
+}
+
+private class KillApplication extends K8sSubmitOp  {
+  override def executeOnPod(pName: String, namespace: Option[String], sparkConf: SparkConf)
+      (implicit client: KubernetesClient): Unit = {
+    val podToDelete = listPodsInNameSpace(namespace).withName(pName)
+
+    if (Option(podToDelete).isDefined) {
+      getGracePeriod(sparkConf) match {
+        case Some(period) => podToDelete.withGracePeriod(period).delete()
+        case _ => podToDelete.delete()
+      }
+    } else {
+      printMessage("Application not found.")
+    }
+  }
+
+  override def executeOnGlob(pods: List[Pod], namespace: Option[String], sparkConf: SparkConf)
+      (implicit client: KubernetesClient): Unit = {
+    if (pods.nonEmpty) {
+      pods.foreach { pod => printMessage(s"Deleting driver pod: ${pod.getMetadata.getName}.") }
+      val listedPods = listPodsInNameSpace(namespace)
+
+      getGracePeriod(sparkConf) match {
+        case Some(period) =>
+          // this is not using the batch api because no option is provided
+          // when using the grace period.
+          pods.foreach { pod =>
+            listedPods
+              .withName(pod.getMetadata.getName)
+              .withGracePeriod(period)
+              .delete()
+          }
+        case _ => listedPods.delete(pods.asJava)
+      }
+    } else {
+      printMessage("No applications found.")
+    }
+  }
+}
+
+private class ListStatus extends K8sSubmitOp {
+  override def executeOnPod(pName: String, namespace: Option[String], sparkConf: SparkConf)
+      (implicit client: KubernetesClient): Unit = {
+    val pod = listPodsInNameSpace(namespace).withName(pName).get()
+    if (Option(pod).isDefined) {
+      printMessage("Application status (driver): " +
+        Option(pod).map(formatPodState).getOrElse("unknown."))
+    } else {
+      printMessage("Application not found.")
+    }
+  }
+
+  override def executeOnGlob(pods: List[Pod], ns: Option[String], sparkConf: SparkConf)
+      (implicit client: KubernetesClient): Unit = {
+    if (pods.nonEmpty) {
+      for (pod <- pods) {
+        printMessage("Application status (driver): " +
+          Option(pod).map(formatPodState).getOrElse("unknown."))
+      }
+    } else {
+      printMessage("No applications found.")
+    }
+  }
+}
+
+private[spark] class K8SSparkSubmitOperation extends SparkSubmitOperation
+  with CommandLineLoggingUtils {
+
+  private def isGlob(name: String): Boolean = {
+    name.last == '*'
+  }
+
+  def execute(submissionId: String, sparkConf: SparkConf, op: K8sSubmitOp): Unit = {
+    val master = KubernetesUtils.parseMasterUrl(sparkConf.get("spark.master"))
+    submissionId.split(":", 2) match {
+      case Array(part1, part2@_*) =>
+        val namespace = if (part2.isEmpty) None else Some(part1)
+        val pName = if (part2.isEmpty) part1 else part2.headOption.get
+        Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient(
+          master,
+          namespace,
+          KUBERNETES_AUTH_SUBMISSION_CONF_PREFIX,
+          SparkKubernetesClientFactory.ClientType.Submission,
+          sparkConf,
+          None,
+          None)
+        ) { kubernetesClient =>
+          implicit val client: KubernetesClient = kubernetesClient
+          if (isGlob(pName)) {
+            val ops = namespace match {
+              case Some(ns) =>
+                kubernetesClient
+                  .pods
+                  .inNamespace(ns)
+              case None =>
+                kubernetesClient
+                  .pods
+            }
+            val pods = ops
+              .list()
+              .getItems
+              .asScala
+              .filter { pod =>
+                val meta = pod.getMetadata
+                meta.getName.startsWith(pName.stripSuffix("*")) &&
+                  meta.getLabels.get(SPARK_ROLE_LABEL) == SPARK_POD_DRIVER_ROLE
+              }.toList
+            op.executeOnGlob(pods, namespace, sparkConf)
+          } else {
+            op.executeOnPod(pName, namespace, sparkConf)
+          }
+        }
+      case _ =>
+        printErrorAndExit(s"Submission ID: {$submissionId} is invalid.")
+    }
+  }
+
+  override def kill(submissionId: String, conf: SparkConf): Unit = {
+    printMessage(s"Submitting a request to kill submission " +
+      s"${submissionId} in ${conf.get("spark.master")}. " +
+      s"Grace period in secs: ${getGracePeriod(conf).getOrElse("not set.")}")
+    execute(submissionId, conf, new KillApplication)
+  }
+
+  override def printSubmissionStatus(submissionId: String, conf: SparkConf): Unit = {
+    printMessage(s"Submitting a request for the status of submission" +
+      s" ${submissionId} in ${conf.get("spark.master")}.")
+    execute(submissionId, conf, new ListStatus)
+  }
+
+  override def supports(master: String): Boolean = {
+    master.startsWith("k8s://")
+  }
+}
+
+private object K8SSparkSubmitOperation {
+  def getGracePeriod(sparkConf: SparkConf): Option[Long] = {
+    sparkConf.get(KUBERNETES_SUBMIT_GRACE_PERIOD)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 92d5176baa4d..11bbad9c480a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -141,12 +141,15 @@ private[spark] class Client(
           throw e
       }
 
+      val sId = s"${Option(conf.namespace).map(_ + ":").getOrElse("")}" +
+        s"${resolvedDriverPod.getMetadata.getName}"
       if (waitForAppCompletion) {
-        logInfo(s"Waiting for application ${conf.appName} to finish...")
+        logInfo(s"Waiting for application ${conf.appName} with submission ID ${sId} to finish...")
         watcher.awaitCompletion()
-        logInfo(s"Application ${conf.appName} finished.")
+        logInfo(s"Application ${conf.appName} with submission ID ${sId} finished.")
       } else {
-        logInfo(s"Deployed Spark application ${conf.appName} into Kubernetes.")
+        logInfo(s"Deployed Spark application ${conf.appName} with " +
+          s"submission ID ${sId} into Kubernetes.")
       }
     }
   }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOpSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOpSuite.scala
new file mode 100644
index 000000000000..d8be13280c2e
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOpSuite.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.submit
+
+import java.io.PrintStream
+
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model._
+import io.fabric8.kubernetes.client.KubernetesClient
+import io.fabric8.kubernetes.client.dsl.PodResource
+import org.mockito.{ArgumentMatchers, Mock, MockitoAnnotations}
+import org.mockito.Mockito.{times, verify, when}
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.Config.KUBERNETES_SUBMIT_GRACE_PERIOD
+import org.apache.spark.deploy.k8s.Constants.{SPARK_APP_ID_LABEL, SPARK_POD_DRIVER_ROLE, SPARK_ROLE_LABEL}
+import org.apache.spark.deploy.k8s.Fabric8Aliases.PODS
+import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils.TEST_SPARK_APP_ID
+
+class K8sSubmitOpSuite extends SparkFunSuite with BeforeAndAfter {
+  private val driverPodName1 = "driver1"
+  private val driverPodName2 = "driver2"
+  private val driverPod1 = buildDriverPod(driverPodName1, "1")
+  private val driverPod2 = buildDriverPod(driverPodName2, "2")
+  private val podList = List(driverPod1, driverPod2)
+  private val namespace = "test"
+
+  @Mock
+  private var podOperations: PODS = _
+
+  @Mock
+  private var driverPodOperations1: PodResource[Pod, DoneablePod] = _
+
+  @Mock
+  private var driverPodOperations2: PodResource[Pod, DoneablePod] = _
+
+  @Mock
+  private var kubernetesClient: KubernetesClient = _
+
+  @Mock
+  private var err: PrintStream = _
+
+  before {
+    MockitoAnnotations.initMocks(this)
+    when(kubernetesClient.pods()).thenReturn(podOperations)
+    when(podOperations.inNamespace(namespace)).thenReturn(podOperations)
+    when(podOperations.delete(podList.asJava)).thenReturn(true)
+    when(podOperations.withName(driverPodName1)).thenReturn(driverPodOperations1)
+    when(podOperations.withName(driverPodName2)).thenReturn(driverPodOperations2)
+    when(driverPodOperations1.get).thenReturn(driverPod1)
+    when(driverPodOperations1.delete()).thenReturn(true)
+    when(driverPodOperations2.get).thenReturn(driverPod2)
+    when(driverPodOperations2.delete()).thenReturn(true)
+  }
+
+  test("List app status") {
+    implicit val kubeClient: KubernetesClient = kubernetesClient
+    val listStatus = new ListStatus
+    listStatus.printStream = err
+    listStatus.executeOnPod(driverPodName1, Option(namespace), new SparkConf())
+    // scalastyle:off
+    verify(err).println(ArgumentMatchers.eq(getPodStatus(driverPodName1, "1")))
+    // scalastyle:on
+  }
+
+  test("List status for multiple apps with glob") {
+    implicit val kubeClient: KubernetesClient = kubernetesClient
+    val listStatus = new ListStatus
+    listStatus.printStream = err
+    listStatus.executeOnGlob(podList, Option(namespace), new SparkConf())
+    // scalastyle:off
+    verify(err).println(ArgumentMatchers.eq(getPodStatus(driverPodName1, "1")))
+    verify(err).println(ArgumentMatchers.eq(getPodStatus(driverPodName2, "2")))
+    // scalastyle:on
+  }
+
+  test("Kill app") {
+    implicit val kubeClient: KubernetesClient = kubernetesClient
+    val killApp = new KillApplication
+    killApp.executeOnPod(driverPodName1, Option(namespace), new SparkConf())
+    verify(driverPodOperations1, times(1)).delete()
+  }
+
+  test("Kill app with gracePeriod") {
+    implicit val kubeClient: KubernetesClient = kubernetesClient
+    val killApp = new KillApplication
+    val conf = new SparkConf().set(KUBERNETES_SUBMIT_GRACE_PERIOD, 1L)
+    when(driverPodOperations1.withGracePeriod(1L)).thenReturn(driverPodOperations1)
+    killApp.executeOnPod(driverPodName1, Option(namespace), conf)
+    verify(driverPodOperations1, times(1)).withGracePeriod(1L)
+    verify(driverPodOperations1, times(1)).delete()
+  }
+
+  test("Kill multiple apps with glob without gracePeriod") {
+    implicit val kubeClient: KubernetesClient = kubernetesClient
+    val killApp = new KillApplication
+    killApp.printStream = err
+    killApp.executeOnGlob(podList, Option(namespace), new SparkConf())
+    verify(podOperations, times(1)).delete(podList.asJava)
+    // scalastyle:off
+    verify(err).println(ArgumentMatchers.eq(s"Deleting driver pod: $driverPodName1."))
+    verify(err).println(ArgumentMatchers.eq(s"Deleting driver pod: $driverPodName2."))
+    // scalastyle:on
+  }
+
+  private def buildDriverPod(podName: String, id: String): Pod = {
+    new PodBuilder()
+      .withNewMetadata()
+        .withName(podName)
+        .withNamespace(namespace)
+        .addToLabels(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)
+        .addToLabels(SPARK_ROLE_LABEL, SPARK_POD_DRIVER_ROLE)
+        .withUid(s"driver-pod-$id")
+      .endMetadata()
+      .withNewSpec()
+        .withServiceAccountName(s"test$id")
+        .withVolumes()
+        .withNodeName(s"testNode$id")
+      .endSpec()
+      .withNewStatus()
+        .withPhase("Running")
+      .endStatus()
+      .build()
+  }
+
+  private def getPodStatus(podName: String, id: String): String = {
+    "Application status (driver): " +
+      s"""|${"\n\t"} pod name: $podName
+          |${"\t"} namespace: N/A
+          |${"\t"} labels: spark-app-selector -> spark-app-id, spark-role -> driver
+          |${"\t"} pod uid: driver-pod-$id
+          |${"\t"} creation time: N/A
+          |${"\t"} service account name: test$id
+          |${"\t"} volumes: N/A
+          |${"\t"} node name: testNode$id
+          |${"\t"} start time: N/A
+          |${"\t"} phase: Running
+          |${"\t"} container status: N/A""".stripMargin
+  }
+}

From 69035684d4c85c8705b6385f7b1bc2816ed8e1c3 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Tue, 26 Mar 2019 15:29:59 -0700
Subject: [PATCH 0782/1072] [SPARK-27242][SQL] Make formatting TIMESTAMP/DATE
 literals independent from the default time zone

## What changes were proposed in this pull request?

In the PR, I propose to use the SQL config `spark.sql.session.timeZone` in formatting `TIMESTAMP` literals, and make formatting `DATE` literals independent from time zone. The changes make parsing and formatting `TIMESTAMP`/`DATE` literals consistent, and independent from the default time zone of current JVM.

Also this PR ports `TIMESTAMP`/`DATE` literals formatting on Proleptic Gregorian Calendar via using `TimestampFormatter`/`DateFormatter`.

## How was this patch tested?

Added new tests to `LiteralExpressionSuite`

Closes #24181 from MaxGekk/timezone-aware-literals.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |  4 ++
 .../sql/catalyst/expressions/literals.scala   | 10 +++--
 .../expressions/LiteralExpressionSuite.scala  | 41 ++++++++++++++++++-
 .../resources/sql-tests/results/array.sql.out |  2 +-
 .../sql-tests/results/literals.sql.out        |  2 +-
 .../typeCoercion/native/arrayJoin.sql.out     |  2 +-
 .../catalyst/ExpressionSQLBuilderSuite.scala  | 10 +++--
 7 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 2c05527c49ac..b8597f0262c5 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -96,6 +96,8 @@ displayTitle: Spark SQL Upgrading Guide
     - The `weekofyear`, `weekday`, `dayofweek`, `date_trunc`, `from_utc_timestamp`, `to_utc_timestamp`, and `unix_timestamp` functions use java.time API for calculation week number of year, day number of week as well for conversion from/to TimestampType values in UTC time zone.
 
     - the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
+    
+    - Formatting of `TIMESTAMP` and `DATE` literals.
 
   - In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`.
 
@@ -103,6 +105,8 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 abd earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
 
+  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`, and `DATE` literals are formatted using the UTC time zone. In Spark version 2.4 and earlier, both conversions use the default time zone of the Java virtual machine.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 8df2dcc01757..9cef3ecadc54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -41,8 +41,9 @@ import org.json4s.JsonAST._
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection}
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils, MapData}
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types._
 import org.apache.spark.util.Utils
@@ -370,8 +371,11 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
         case _ => v + "D"
       }
     case (v: Decimal, t: DecimalType) => v + "BD"
-    case (v: Int, DateType) => s"DATE '${DateTimeUtils.toJavaDate(v)}'"
-    case (v: Long, TimestampType) => s"TIMESTAMP('${DateTimeUtils.toJavaTimestamp(v)}')"
+    case (v: Int, DateType) => s"DATE '${DateFormatter().format(v)}'"
+    case (v: Long, TimestampType) =>
+      val formatter = TimestampFormatter.getFractionFormatter(
+        DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
+      s"TIMESTAMP('${formatter.format(v)}')"
     case (v: Array[Byte], BinaryType) => s"X'${DatatypeConverter.printHexBinary(v)}'"
     case _ => value.toString
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 2af31144b89f..269f1a09ac53 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -18,9 +18,10 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.nio.charset.StandardCharsets
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneOffset}
+import java.util.TimeZone
 
-import scala.reflect.runtime.universe.{typeTag, TypeTag}
+import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
@@ -279,4 +280,40 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(Literal(Array(instant0, instant1)), Array(instant0, instant1))
     }
   }
+
+  private def withTimeZones(
+      sessionTimeZone: String,
+      systemTimeZone: String)(f: => Unit): Unit = {
+    withSQLConf(
+      SQLConf.SESSION_LOCAL_TIMEZONE.key -> sessionTimeZone,
+      SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
+      val originTimeZone = TimeZone.getDefault
+      try {
+        TimeZone.setDefault(TimeZone.getTimeZone(systemTimeZone))
+        f
+      } finally {
+        TimeZone.setDefault(originTimeZone)
+      }
+    }
+  }
+
+  test("format timestamp literal using spark.sql.session.timeZone") {
+    withTimeZones(sessionTimeZone = "GMT+01:00", systemTimeZone = "GMT-08:00") {
+      val timestamp = LocalDateTime.of(2019, 3, 21, 0, 2, 3, 456000000)
+        .atZone(ZoneOffset.UTC)
+        .toInstant
+      val expected = "TIMESTAMP('2019-03-21 01:02:03.456')"
+      val literalStr = Literal.create(timestamp).sql
+      assert(literalStr === expected)
+    }
+  }
+
+  test("format date literal independently from time zone") {
+    withTimeZones(sessionTimeZone = "GMT-11:00", systemTimeZone = "GMT-10:00") {
+      val date = LocalDate.of(2019, 3, 21)
+      val expected = "DATE '2019-03-21'"
+      val literalStr = Literal.create(date).sql
+      assert(literalStr === expected)
+    }
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 981b2504bcaa..5f5d98877184 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -92,7 +92,7 @@ select
   array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
 from primitive_arrays
 -- !query 6 schema
-struct<array_contains(boolean_array, true):boolean,array_contains(boolean_array, false):boolean,array_contains(tinyint_array, 2):boolean,array_contains(tinyint_array, 0):boolean,array_contains(smallint_array, 2):boolean,array_contains(smallint_array, 0):boolean,array_contains(int_array, 2):boolean,array_contains(int_array, 0):boolean,array_contains(bigint_array, 2):boolean,array_contains(bigint_array, 0):boolean,array_contains(decimal_array, 9223372036854775809):boolean,array_contains(decimal_array, CAST(1 AS DECIMAL(19,0))):boolean,array_contains(double_array, 2.0):boolean,array_contains(double_array, 0.0):boolean,array_contains(float_array, CAST(2.0 AS FLOAT)):boolean,array_contains(float_array, CAST(0.0 AS FLOAT)):boolean,array_contains(date_array, DATE '2016-03-14'):boolean,array_contains(date_array, DATE '2016-01-01'):boolean,array_contains(timestamp_array, TIMESTAMP('2016-11-15 20:54:00.0')):boolean,array_contains(timestamp_array, TIMESTAMP('2016-01-01 20:54:00.0')):boolean>
+struct<array_contains(boolean_array, true):boolean,array_contains(boolean_array, false):boolean,array_contains(tinyint_array, 2):boolean,array_contains(tinyint_array, 0):boolean,array_contains(smallint_array, 2):boolean,array_contains(smallint_array, 0):boolean,array_contains(int_array, 2):boolean,array_contains(int_array, 0):boolean,array_contains(bigint_array, 2):boolean,array_contains(bigint_array, 0):boolean,array_contains(decimal_array, 9223372036854775809):boolean,array_contains(decimal_array, CAST(1 AS DECIMAL(19,0))):boolean,array_contains(double_array, 2.0):boolean,array_contains(double_array, 0.0):boolean,array_contains(float_array, CAST(2.0 AS FLOAT)):boolean,array_contains(float_array, CAST(0.0 AS FLOAT)):boolean,array_contains(date_array, DATE '2016-03-14'):boolean,array_contains(date_array, DATE '2016-01-01'):boolean,array_contains(timestamp_array, TIMESTAMP('2016-11-15 20:54:00')):boolean,array_contains(timestamp_array, TIMESTAMP('2016-01-01 20:54:00')):boolean>
 -- !query 6 output
 true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false
 
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index c0ce3d79fda5..9d3668d49d0a 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -301,7 +301,7 @@ select date 'mar 11 2016'
 -- !query 32
 select tImEstAmp '2016-03-11 20:54:00.000'
 -- !query 32 schema
-struct<TIMESTAMP('2016-03-11 20:54:00.0'):timestamp>
+struct<TIMESTAMP('2016-03-11 20:54:00'):timestamp>
 -- !query 32 output
 2016-03-11 20:54:00
 
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
index b23a62dacef7..c3d5fad0870b 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
@@ -77,7 +77,7 @@ struct<array_join(array(DATE '2016-03-14', DATE '2016-03-13'), , ):string>
 -- !query 9
 SELECT array_join(array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000'), ', ')
 -- !query 9 schema
-struct<array_join(array(TIMESTAMP('2016-11-15 20:54:00.0'), TIMESTAMP('2016-11-12 20:54:00.0')), , ):string>
+struct<array_join(array(TIMESTAMP('2016-11-15 20:54:00'), TIMESTAMP('2016-11-12 20:54:00')), , ):string>
 -- !query 9 output
 2016-11-15 20:54:00, 2016-11-12 20:54:00
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
index 61f9179042fe..ae701f266bf4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.sql.catalyst
 
-import java.sql.Timestamp
+import java.time.LocalDateTime
 
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.unsafe.types.CalendarInterval
 
 class ExpressionSQLBuilderSuite extends QueryTest with TestHiveSingleton {
@@ -60,8 +62,10 @@ class ExpressionSQLBuilderSuite extends QueryTest with TestHiveSingleton {
     checkSQL(Literal(Double.NaN), "CAST('NaN' AS DOUBLE)")
     checkSQL(Literal(BigDecimal("10.0000000").underlying), "10.0000000BD")
     checkSQL(Literal(Array(0x01, 0xA3).map(_.toByte)), "X'01A3'")
-    checkSQL(
-      Literal(Timestamp.valueOf("2016-01-01 00:00:00")), "TIMESTAMP('2016-01-01 00:00:00.0')")
+    val timestamp = LocalDateTime.of(2016, 1, 1, 0, 0, 0)
+      .atZone(DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
+      .toInstant
+    checkSQL(Literal(timestamp), "TIMESTAMP('2016-01-01 00:00:00')")
     // TODO tests for decimals
   }
 

From 5624bfbcfe3004a957d43f7f69e5ea5e47fcd3c2 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Tue, 26 Mar 2019 15:48:29 -0700
Subject: [PATCH 0783/1072] [SPARK-27275][CORE] Fix potential corruption in
 EncryptedMessage.transferTo

## What changes were proposed in this pull request?

Right now there are several issues in `EncryptedMessage.transferTo`:

- When the underlying buffer has more than `1024 * 32` bytes (this should be rare but it could happen in error messages that send over the wire), it may just send a partial message as `EncryptedMessage.count` becomes less than `transferred`. This will cause the client hang forever (or timeout) as it will wait until receiving expected length of bytes, or weird errors (such as corruption or silent correctness issue) if the channel is reused by other messages.
- When the underlying buffer is full, it's still trying to write out bytes in a busy loop.

This PR fixes  the issues in `EncryptedMessage.transferTo` and also makes it follow the contract of `FileRegion`:

- `count` should be a fixed value which is just the length of the whole message.
- It should be non-blocking. When the underlying socket is not ready to write, it should give up and give control back.
- `transferTo` should return the length of written bytes.

## How was this patch tested?

The new added tests.

Closes #24211 from zsxwing/fix-enc.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/network/crypto/TransportCipher.java | 53 +++++++++---
 .../spark/network/crypto/AuthEngineSuite.java | 84 +++++++++++++++++++
 .../network/crypto/AuthIntegrationSuite.java  | 47 +++++++++--
 3 files changed, 166 insertions(+), 18 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
index 2745052265f7..8995bbc940f6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
@@ -44,7 +44,8 @@ public class TransportCipher {
   @VisibleForTesting
   static final String ENCRYPTION_HANDLER_NAME = "TransportEncryption";
   private static final String DECRYPTION_HANDLER_NAME = "TransportDecryption";
-  private static final int STREAM_BUFFER_SIZE = 1024 * 32;
+  @VisibleForTesting
+  static final int STREAM_BUFFER_SIZE = 1024 * 32;
 
   private final Properties conf;
   private final String cipher;
@@ -84,7 +85,8 @@ public byte[] getOutputIv() {
     return outIv;
   }
 
-  private CryptoOutputStream createOutputStream(WritableByteChannel ch) throws IOException {
+  @VisibleForTesting
+  CryptoOutputStream createOutputStream(WritableByteChannel ch) throws IOException {
     return new CryptoOutputStream(cipher, conf, ch, key, new IvParameterSpec(outIv));
   }
 
@@ -104,7 +106,8 @@ public void addToChannel(Channel ch) throws IOException {
       .addFirst(DECRYPTION_HANDLER_NAME, new DecryptionHandler(this));
   }
 
-  private static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
+  @VisibleForTesting
+  static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
     private final ByteArrayWritableChannel byteChannel;
     private final CryptoOutputStream cos;
     private boolean isCipherValid;
@@ -118,7 +121,12 @@ private static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
     @Override
     public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
       throws Exception {
-      ctx.write(new EncryptedMessage(this, cos, msg, byteChannel), promise);
+      ctx.write(createEncryptedMessage(msg), promise);
+    }
+
+    @VisibleForTesting
+    EncryptedMessage createEncryptedMessage(Object msg) {
+      return new EncryptedMessage(this, cos, msg, byteChannel);
     }
 
     @Override
@@ -190,12 +198,14 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception {
     }
   }
 
-  private static class EncryptedMessage extends AbstractFileRegion {
+  @VisibleForTesting
+  static class EncryptedMessage extends AbstractFileRegion {
     private final boolean isByteBuf;
     private final ByteBuf buf;
     private final FileRegion region;
     private final CryptoOutputStream cos;
     private final EncryptionHandler handler;
+    private final long count;
     private long transferred;
 
     // Due to streaming issue CRYPTO-125: https://issues.apache.org/jira/browse/CRYPTO-125, it has
@@ -221,11 +231,12 @@ private static class EncryptedMessage extends AbstractFileRegion {
       this.byteRawChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
       this.cos = cos;
       this.byteEncChannel = ch;
+      this.count = isByteBuf ? buf.readableBytes() : region.count();
     }
 
     @Override
     public long count() {
-      return isByteBuf ? buf.readableBytes() : region.count();
+      return count;
     }
 
     @Override
@@ -277,22 +288,38 @@ public boolean release(int decrement) {
     public long transferTo(WritableByteChannel target, long position) throws IOException {
       Preconditions.checkArgument(position == transferred(), "Invalid position.");
 
+      if (transferred == count) {
+        return 0;
+      }
+
+      long totalBytesWritten = 0L;
       do {
         if (currentEncrypted == null) {
           encryptMore();
         }
 
-        int bytesWritten = currentEncrypted.remaining();
-        target.write(currentEncrypted);
-        bytesWritten -= currentEncrypted.remaining();
-        transferred += bytesWritten;
-        if (!currentEncrypted.hasRemaining()) {
+        long remaining = currentEncrypted.remaining();
+        if (remaining == 0)  {
+          // Just for safety to avoid endless loop. It usually won't happen, but since the
+          // underlying `region.transferTo` is allowed to transfer 0 bytes, we should handle it for
+          // safety.
           currentEncrypted = null;
           byteEncChannel.reset();
+          return totalBytesWritten;
         }
-      } while (transferred < count());
 
-      return transferred;
+        long bytesWritten = target.write(currentEncrypted);
+        totalBytesWritten += bytesWritten;
+        transferred += bytesWritten;
+        if (bytesWritten < remaining) {
+          // break as the underlying buffer in "target" is full
+          break;
+        }
+        currentEncrypted = null;
+        byteEncChannel.reset();
+      } while (transferred < count);
+
+      return totalBytesWritten;
     }
 
     private void encryptMore() throws IOException {
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
index c0aa298a4017..0790f0079c2b 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
@@ -17,16 +17,27 @@
 
 package org.apache.spark.network.crypto;
 
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
 import java.util.Arrays;
 import java.util.Map;
 import java.security.InvalidKeyException;
+import java.util.Random;
+
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 import com.google.common.collect.ImmutableMap;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.FileRegion;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
 
+import org.apache.spark.network.util.ByteArrayWritableChannel;
 import org.apache.spark.network.util.MapConfigProvider;
 import org.apache.spark.network.util.TransportConf;
 
@@ -121,4 +132,77 @@ public void testBadKeySize() throws Exception {
     }
   }
 
+  @Test
+  public void testEncryptedMessage() throws Exception {
+    AuthEngine client = new AuthEngine("appId", "secret", conf);
+    AuthEngine server = new AuthEngine("appId", "secret", conf);
+    try {
+      ClientChallenge clientChallenge = client.challenge();
+      ServerResponse serverResponse = server.respond(clientChallenge);
+      client.validate(serverResponse);
+
+      TransportCipher cipher = server.sessionCipher();
+      TransportCipher.EncryptionHandler handler = new TransportCipher.EncryptionHandler(cipher);
+
+      byte[] data = new byte[TransportCipher.STREAM_BUFFER_SIZE + 1];
+      new Random().nextBytes(data);
+      ByteBuf buf = Unpooled.wrappedBuffer(data);
+
+      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length);
+      TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(buf);
+      while (emsg.transfered() < emsg.count()) {
+        emsg.transferTo(channel, emsg.transfered());
+      }
+      assertEquals(data.length, channel.length());
+    } finally {
+      client.close();
+      server.close();
+    }
+  }
+
+  @Test
+  public void testEncryptedMessageWhenTransferringZeroBytes() throws Exception {
+    AuthEngine client = new AuthEngine("appId", "secret", conf);
+    AuthEngine server = new AuthEngine("appId", "secret", conf);
+    try {
+      ClientChallenge clientChallenge = client.challenge();
+      ServerResponse serverResponse = server.respond(clientChallenge);
+      client.validate(serverResponse);
+
+      TransportCipher cipher = server.sessionCipher();
+      TransportCipher.EncryptionHandler handler = new TransportCipher.EncryptionHandler(cipher);
+
+      int testDataLength = 4;
+      FileRegion region = mock(FileRegion.class);
+      when(region.count()).thenReturn((long) testDataLength);
+      // Make `region.transferTo` do nothing in first call and transfer 4 bytes in the second one.
+      when(region.transferTo(any(), anyLong())).thenAnswer(new Answer<Long>() {
+
+        private boolean firstTime = true;
+
+        @Override
+        public Long answer(InvocationOnMock invocationOnMock) throws Throwable {
+          if (firstTime) {
+            firstTime = false;
+            return 0L;
+          } else {
+            WritableByteChannel channel = invocationOnMock.getArgument(0);
+            channel.write(ByteBuffer.wrap(new byte[testDataLength]));
+            return (long) testDataLength;
+          }
+        }
+      });
+
+      TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(region);
+      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(testDataLength);
+      // "transferTo" should act correctly when the underlying FileRegion transfers 0 bytes.
+      assertEquals(0L, emsg.transferTo(channel, emsg.transfered()));
+      assertEquals(testDataLength, emsg.transferTo(channel, emsg.transfered()));
+      assertEquals(emsg.transfered(), emsg.count());
+      assertEquals(4, channel.length());
+    } finally {
+      client.close();
+      server.close();
+    }
+  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
index 8a0ff54ee224..2f9dd629df1b 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
@@ -124,6 +124,42 @@ public void testAuthReplay() throws Exception {
     }
   }
 
+  @Test
+  public void testLargeMessageEncryption() throws Exception {
+    // Use a big length to create a message that cannot be put into the encryption buffer completely
+    final int testErrorMessageLength = TransportCipher.STREAM_BUFFER_SIZE;
+    ctx = new AuthTestCtx(new RpcHandler() {
+      @Override
+      public void receive(
+          TransportClient client,
+          ByteBuffer message,
+          RpcResponseCallback callback) {
+        char[] longMessage = new char[testErrorMessageLength];
+        Arrays.fill(longMessage, 'D');
+        callback.onFailure(new RuntimeException(new String(longMessage)));
+      }
+
+      @Override
+      public StreamManager getStreamManager() {
+        return null;
+      }
+    });
+    ctx.createServer("secret");
+    ctx.createClient("secret");
+
+    try {
+      ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000);
+      fail("Should have failed unencrypted RPC.");
+    } catch (Exception e) {
+      assertTrue(ctx.authRpcHandler.doDelegate);
+      assertTrue(e.getMessage() + " is not an expected error", e.getMessage().contains("DDDDD"));
+      // Verify we receive the complete error message
+      int messageStart = e.getMessage().indexOf("DDDDD");
+      int messageEnd = e.getMessage().lastIndexOf("DDDDD") + 5;
+      assertEquals(testErrorMessageLength, messageEnd - messageStart);
+    }
+  }
+
   private class AuthTestCtx {
 
     private final String appId = "testAppId";
@@ -136,10 +172,7 @@ private class AuthTestCtx {
     volatile AuthRpcHandler authRpcHandler;
 
     AuthTestCtx() throws Exception {
-      Map<String, String> testConf = ImmutableMap.of("spark.network.crypto.enabled", "true");
-      this.conf = new TransportConf("rpc", new MapConfigProvider(testConf));
-
-      RpcHandler rpcHandler = new RpcHandler() {
+      this(new RpcHandler() {
         @Override
         public void receive(
             TransportClient client,
@@ -153,8 +186,12 @@ public void receive(
         public StreamManager getStreamManager() {
           return null;
         }
-      };
+      });
+    }
 
+    AuthTestCtx(RpcHandler rpcHandler) throws Exception {
+      Map<String, String> testConf = ImmutableMap.of("spark.network.crypto.enabled", "true");
+      this.conf = new TransportConf("rpc", new MapConfigProvider(testConf));
       this.ctx = new TransportContext(conf, rpcHandler);
     }
 

From 267160b36032c81b9d89ea4a90d3757a1f1e98b7 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 27 Mar 2019 07:58:31 +0900
Subject: [PATCH 0784/1072] [SPARK-27269][SQL] File source v2 should validate
 data schema only

## What changes were proposed in this pull request?

Currently, File source v2 allows each data source to specify the supported data types by implementing the method `supportsDataType` in `FileScan` and `FileWriteBuilder`.

However, in the read path, the validation checks all the data types in `readSchema`, which might contain partition columns. This is actually a regression. E.g. Text data source only supports String data type, while the partition columns can still contain Integer type since partition columns are processed by Spark.

This PR is to:
1. Refactor schema validation and check data schema only.
2. Filter the partition columns in data schema if user specified schema provided.

## How was this patch tested?

 Unit test

Closes #24203 from gengliangwang/schemaValidation.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../execution/datasources/v2/FileScan.scala   | 30 +------
 .../execution/datasources/v2/FileTable.scala  | 30 ++++++-
 .../datasources/v2/FileWriteBuilder.scala     | 22 ++---
 .../datasources/v2/csv/CSVDataSourceV2.scala  | 12 +--
 .../datasources/v2/csv/CSVScan.scala          |  6 --
 .../datasources/v2/csv/CSVTable.scala         | 14 +++-
 .../datasources/v2/csv/CSVWriteBuilder.scala  | 14 ++--
 .../datasources/v2/orc/OrcDataSourceV2.scala  | 18 +---
 .../datasources/v2/orc/OrcScan.scala          |  8 +-
 .../datasources/v2/orc/OrcTable.scala         | 21 ++++-
 .../datasources/v2/orc/OrcWriteBuilder.scala  | 14 ++--
 .../datasources/v2/FileTableSuite.scala       | 84 +++++++++++++++++++
 12 files changed, 166 insertions(+), 107 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 6ab5c4b269b9..e971fd762efe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -18,11 +18,11 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 abstract class FileScan(
@@ -37,22 +37,6 @@ abstract class FileScan(
     false
   }
 
-  /**
-   * Returns whether this format supports the given [[DataType]] in write path.
-   * By default all data types are supported.
-   */
-  def supportsDataType(dataType: DataType): Boolean = true
-
-  /**
-   * The string that represents the format that this data source provider uses. This is
-   * overridden by children to provide a nice alias for the data source. For example:
-   *
-   * {{{
-   *   override def formatName(): String = "ORC"
-   * }}}
-   */
-  def formatName: String
-
   protected def partitions: Seq[FilePartition] = {
     val selectedPartitions = fileIndex.listFiles(Seq.empty, Seq.empty)
     val maxSplitBytes = FilePartition.maxSplitBytes(sparkSession, selectedPartitions)
@@ -76,13 +60,5 @@ abstract class FileScan(
     partitions.toArray
   }
 
-  override def toBatch: Batch = {
-    readSchema.foreach { field =>
-      if (!supportsDataType(field.dataType)) {
-        throw new AnalysisException(
-          s"$formatName data source does not support ${field.dataType.catalogString} data type.")
-      }
-    }
-    this
-  }
+  override def toBatch: Batch = this
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 4b35df355b6e..188016c161a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -46,7 +46,11 @@ abstract class FileTable(
       sparkSession, rootPathsSpecified, caseSensitiveMap, userSpecifiedSchema, fileStatusCache)
   }
 
-  lazy val dataSchema: StructType = userSpecifiedSchema.orElse {
+  lazy val dataSchema: StructType = userSpecifiedSchema.map { schema =>
+    val partitionSchema = fileIndex.partitionSchema
+    val resolver = sparkSession.sessionState.conf.resolver
+    StructType(schema.filterNot(f => partitionSchema.exists(p => resolver(p.name, f.name))))
+  }.orElse {
     inferSchema(fileIndex.allFiles())
   }.getOrElse {
     throw new AnalysisException(
@@ -57,6 +61,12 @@ abstract class FileTable(
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
     SchemaUtils.checkColumnNameDuplication(dataSchema.fieldNames,
       "in the data schema", caseSensitive)
+    dataSchema.foreach { field =>
+      if (!supportsDataType(field.dataType)) {
+        throw new AnalysisException(
+          s"$formatName data source does not support ${field.dataType.catalogString} data type.")
+      }
+    }
     val partitionSchema = fileIndex.partitionSchema
     SchemaUtils.checkColumnNameDuplication(partitionSchema.fieldNames,
       "in the partition schema", caseSensitive)
@@ -72,6 +82,22 @@ abstract class FileTable(
    * Spark will require that user specify the schema manually.
    */
   def inferSchema(files: Seq[FileStatus]): Option[StructType]
+
+  /**
+   * Returns whether this format supports the given [[DataType]] in read/write path.
+   * By default all data types are supported.
+   */
+  def supportsDataType(dataType: DataType): Boolean = true
+
+  /**
+   * The string that represents the format that this data source provider uses. This is
+   * overridden by children to provide a nice alias for the data source. For example:
+   *
+   * {{{
+   *   override def formatName(): String = "ORC"
+   * }}}
+   */
+  def formatName: String
 }
 
 object FileTable {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index bb4a428e4066..7ff5c4182d98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -39,7 +39,11 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.SerializableConfiguration
 
-abstract class FileWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
+abstract class FileWriteBuilder(
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    formatName: String,
+    supportsDataType: DataType => Boolean)
   extends WriteBuilder with SupportsSaveMode {
   private var schema: StructType = _
   private var queryId: String = _
@@ -108,22 +112,6 @@ abstract class FileWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[St
       options: Map[String, String],
       dataSchema: StructType): OutputWriterFactory
 
-  /**
-   * Returns whether this format supports the given [[DataType]] in write path.
-   * By default all data types are supported.
-   */
-  def supportsDataType(dataType: DataType): Boolean = true
-
-  /**
-   * The string that represents the format that this data source provider uses. This is
-   * overridden by children to provide a nice alias for the data source. For example:
-   *
-   * {{{
-   *   override def formatName(): String = "ORC"
-   * }}}
-   */
-  def formatName: String
-
   private def validateInputs(caseSensitiveAnalysis: Boolean): Unit = {
     assert(schema != null, "Missing input data schema")
     assert(queryId != null, "Missing query ID")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
index 4ecd9cdc32ac..55222c624d91 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
@@ -20,7 +20,7 @@ import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class CSVDataSourceV2 extends FileDataSourceV2 {
@@ -41,13 +41,3 @@ class CSVDataSourceV2 extends FileDataSourceV2 {
     CSVTable(tableName, sparkSession, options, paths, Some(schema))
   }
 }
-
-object CSVDataSourceV2 {
-  def supportsDataType(dataType: DataType): Boolean = dataType match {
-    case _: AtomicType => true
-
-    case udt: UserDefinedType[_] => supportsDataType(udt.sqlType)
-
-    case _ => false
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
index 35c6a668f22a..8f2f8f256731 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -75,10 +75,4 @@ case class CSVScan(
     CSVPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
       dataSchema, fileIndex.partitionSchema, readSchema, parsedOptions)
   }
-
-  override def supportsDataType(dataType: DataType): Boolean = {
-    CSVDataSourceV2.supportsDataType(dataType)
-  }
-
-  override def formatName: String = "CSV"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
index bf4b8ba868f2..852cbf07c350 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.csv.CSVOptions
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2.FileTable
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{AtomicType, DataType, StructType, UserDefinedType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class CSVTable(
@@ -48,5 +48,15 @@ case class CSVTable(
   }
 
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
-    new CSVWriteBuilder(options, paths)
+    new CSVWriteBuilder(options, paths, formatName, supportsDataType)
+
+  override def supportsDataType(dataType: DataType): Boolean = dataType match {
+    case _: AtomicType => true
+
+    case udt: UserDefinedType[_] => supportsDataType(udt.sqlType)
+
+    case _ => false
+  }
+
+  override def formatName: String = "CSV"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
index bb26d2f92d74..92b47e435480 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
@@ -27,8 +27,12 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class CSVWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
-  extends FileWriteBuilder(options, paths) {
+class CSVWriteBuilder(
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    formatName: String,
+    supportsDataType: DataType => Boolean)
+  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
   override def prepareWrite(
       sqlConf: SQLConf,
       job: Job,
@@ -56,10 +60,4 @@ class CSVWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
       }
     }
   }
-
-  override def supportsDataType(dataType: DataType): Boolean = {
-    CSVDataSourceV2.supportsDataType(dataType)
-  }
-
-  override def formatName: String = "CSV"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
index 36e7e12e41ce..e8b9e6c6f498 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -20,7 +20,7 @@ import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class OrcDataSourceV2 extends FileDataSourceV2 {
@@ -42,19 +42,3 @@ class OrcDataSourceV2 extends FileDataSourceV2 {
   }
 }
 
-object OrcDataSourceV2 {
-  def supportsDataType(dataType: DataType): Boolean = dataType match {
-    case _: AtomicType => true
-
-    case st: StructType => st.forall { f => supportsDataType(f.dataType) }
-
-    case ArrayType(elementType, _) => supportsDataType(elementType)
-
-    case MapType(keyType, valueType, _) =>
-      supportsDataType(keyType) && supportsDataType(valueType)
-
-    case udt: UserDefinedType[_] => supportsDataType(udt.sqlType)
-
-    case _ => false
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index 237eadb698b4..fc8a682b226c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScan
 import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
 
@@ -43,10 +43,4 @@ case class OrcScan(
     OrcPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
       dataSchema, fileIndex.partitionSchema, readSchema)
   }
-
-  override def supportsDataType(dataType: DataType): Boolean = {
-    OrcDataSourceV2.supportsDataType(dataType)
-  }
-
-  override def formatName: String = "ORC"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index aac38fb3fa1f..ace77b7c4d9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources.orc.OrcUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class OrcTable(
@@ -40,5 +40,22 @@ case class OrcTable(
     OrcUtils.readSchema(sparkSession, files)
 
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
-    new OrcWriteBuilder(options, paths)
+    new OrcWriteBuilder(options, paths, formatName, supportsDataType)
+
+  override def supportsDataType(dataType: DataType): Boolean = dataType match {
+    case _: AtomicType => true
+
+    case st: StructType => st.forall { f => supportsDataType(f.dataType) }
+
+    case ArrayType(elementType, _) => supportsDataType(elementType)
+
+    case MapType(keyType, valueType, _) =>
+      supportsDataType(keyType) && supportsDataType(valueType)
+
+    case udt: UserDefinedType[_] => supportsDataType(udt.sqlType)
+
+    case _ => false
+  }
+
+  override def formatName: String = "ORC"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
index 829ab5fbe176..f5b06e11c8bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
@@ -28,8 +28,12 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class OrcWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
-  extends FileWriteBuilder(options, paths) {
+class OrcWriteBuilder(
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    formatName: String,
+    supportsDataType: DataType => Boolean)
+  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
 
   override def prepareWrite(
       sqlConf: SQLConf,
@@ -65,10 +69,4 @@ class OrcWriteBuilder(options: CaseInsensitiveStringMap, paths: Seq[String])
       }
     }
   }
-
-  override def supportsDataType(dataType: DataType): Boolean = {
-    OrcDataSourceV2.supportsDataType(dataType)
-  }
-
-  override def formatName: String = "ORC"
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
new file mode 100644
index 000000000000..3d4f5640723e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.FileStatus
+
+import org.apache.spark.sql.{QueryTest, SparkSession}
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class DummyFileTable(
+    sparkSession: SparkSession,
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    expectedDataSchema: StructType,
+    userSpecifiedSchema: Option[StructType])
+  extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
+  override def inferSchema(files: Seq[FileStatus]): Option[StructType] = Some(expectedDataSchema)
+
+  override def name(): String = "Dummy"
+
+  override def formatName: String = "Dummy"
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = null
+
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = null
+
+  override def supportsDataType(dataType: DataType): Boolean = dataType == StringType
+}
+
+class FileTableSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
+
+  test("Data type validation should check data schema only") {
+    withTempPath { dir =>
+      val df = spark.createDataFrame(Seq(("a", 1), ("b", 2))).toDF("v", "p")
+      val pathName = dir.getCanonicalPath
+      df.write.partitionBy("p").text(pathName)
+      val options = new CaseInsensitiveStringMap(Map("path" -> pathName).asJava)
+      val expectedDataSchema = StructType(Seq(StructField("v", StringType, true)))
+      // DummyFileTable doesn't support Integer data type.
+      // However, the partition schema is handled by Spark, so it is allowed to contain
+      // Integer data type here.
+      val table = new DummyFileTable(spark, options, Seq(pathName), expectedDataSchema, None)
+      assert(table.dataSchema == expectedDataSchema)
+      val expectedPartitionSchema = StructType(Seq(StructField("p", IntegerType, true)))
+      assert(table.fileIndex.partitionSchema == expectedPartitionSchema)
+    }
+  }
+
+  test("Returns correct data schema when user specified schema contains partition schema") {
+    withTempPath { dir =>
+      val df = spark.createDataFrame(Seq(("a", 1), ("b", 2))).toDF("v", "p")
+      val pathName = dir.getCanonicalPath
+      df.write.partitionBy("p").text(pathName)
+      val options = new CaseInsensitiveStringMap(Map("path" -> pathName).asJava)
+      val userSpecifiedSchema = Some(StructType(Seq(
+        StructField("v", StringType, true),
+        StructField("p", IntegerType, true))))
+      val expectedDataSchema = StructType(Seq(StructField("v", StringType, true)))
+      val table =
+        new DummyFileTable(spark, options, Seq(pathName), expectedDataSchema, userSpecifiedSchema)
+      assert(table.dataSchema == expectedDataSchema)
+    }
+  }
+}

From 6c0e13b456e731f2967c1d820803a33d44b35e31 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Tue, 26 Mar 2019 16:00:56 -0700
Subject: [PATCH 0785/1072] [SPARK-27285] Support describing output of CTE

## What changes were proposed in this pull request?
SPARK-26982 allows users to describe output of a query. However, it had a limitation of not supporting CTEs due to limitation of the grammar having a single rule to parse both select and inserts. After SPARK-27209, which splits select and insert parsing to two different rules, we can now support describing output of the CTEs easily.

## How was this patch tested?
Existing tests were modified.

Closes #24224 from dilipbiswal/describe_support_cte.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  6 +-----
 .../sql/catalyst/parser/AstBuilder.scala      |  4 ----
 .../spark/sql/execution/SparkSqlParser.scala  |  2 +-
 .../spark/sql/execution/command/tables.scala  |  3 +--
 .../sql-tests/results/describe-query.sql.out  | 21 +++++--------------
 5 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 78dc60c02758..0f9387b0213e 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -172,7 +172,7 @@ statement
     | (DESC | DESCRIBE) database EXTENDED? identifier                  #describeDatabase
     | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
         tableIdentifier partitionSpec? describeColName?                #describeTable
-    | (DESC | DESCRIBE) QUERY? queryToDesc                             #describeQuery
+    | (DESC | DESCRIBE) QUERY? query                                   #describeQuery
     | REFRESH TABLE tableIdentifier                                    #refreshTable
     | REFRESH (STRING | .*?)                                           #refreshResource
     | CACHE LAZY? TABLE tableIdentifier
@@ -262,10 +262,6 @@ query
     : ctes? queryNoWith
     ;
 
-queryToDesc
-    : queryTerm queryOrganization
-    ;
-
 insertInto
     : INSERT OVERWRITE TABLE tableIdentifier (partitionSpec (IF NOT EXISTS)?)?                              #insertOverwriteTable
     | INSERT INTO TABLE? tableIdentifier partitionSpec?                                                     #insertIntoTable
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 7164ad26029c..111815bee498 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -125,10 +125,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     With(plan, ctes)
   }
 
-  override def visitQueryToDesc(ctx: QueryToDescContext): LogicalPlan = withOrigin(ctx) {
-    plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
-  }
-
   override def visitQueryWithFrom(ctx: QueryWithFromContext): LogicalPlan = withOrigin(ctx) {
     val from = visitFromClause(ctx.fromClause)
     validate(ctx.selectStatement.querySpecification.fromClause == null,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 8c7b2cb50c0b..2c20b5377217 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -373,7 +373,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * Create a [[DescribeQueryCommand]] logical command.
    */
   override def visitDescribeQuery(ctx: DescribeQueryContext): LogicalPlan = withOrigin(ctx) {
-    DescribeQueryCommand(visitQueryToDesc(ctx.queryToDesc()))
+    DescribeQueryCommand(visitQuery(ctx.query))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 08d6dc62e354..fb619a7b65cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -635,8 +635,7 @@ case class DescribeTableCommand(
  * 3. VALUES statement.
  * 4. TABLE statement. Example : TABLE table_name
  * 5. statements of the form 'FROM table SELECT *'
- *
- * TODO : support CTEs.
+ * 6. Common table expressions (CTEs)
  */
 case class DescribeQueryCommand(query: LogicalPlan)
   extends DescribeCommandBase {
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index 9205b70dd2ba..fc51b4643e8c 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -82,29 +82,18 @@ val                 	string
 -- !query 9
 DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
 -- !query 9 schema
-struct<>
+struct<col_name:string,data_type:string,comment:string>
 -- !query 9 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input 'AS' expecting {<EOF>, '.'}(line 1, pos 12)
-
-== SQL ==
-DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
-------------^^^
+col1                	string
 
 
 -- !query 10
 DESCRIBE QUERY WITH s AS (SELECT * from desc_temp1) SELECT * FROM s
 -- !query 10 schema
-struct<>
+struct<col_name:string,data_type:string,comment:string>
 -- !query 10 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input 's' expecting {<EOF>, '.'}(line 1, pos 20)
-
-== SQL ==
-DESCRIBE QUERY WITH s AS (SELECT * from desc_temp1) SELECT * FROM s
---------------------^^^
+key                 	int                 	column_comment      
+val                 	string
 
 
 -- !query 11

From 3a8398df5cf87f597e672bfbb8c6eadbad800d03 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Wed, 27 Mar 2019 10:42:26 +0900
Subject: [PATCH 0786/1072] [SPARK-26660][FOLLOWUP] Raise task serialized size
 warning threshold to 1000 KiB

## What changes were proposed in this pull request?

Raise the threshold size for serialized task size at which a warning is generated from 100KiB to 1000KiB.

As several people have noted, the original change for this JIRA highlighted that this threshold is low. Test output regularly shows:

```
- sorting on StringType with nullable=false, sortOrder=List('a DESC NULLS LAST)
22:47:53.320 WARN org.apache.spark.scheduler.TaskSetManager: Stage 80 contains a task of very large size (755 KiB). The maximum recommended task size is 100 KiB.
22:47:53.348 WARN org.apache.spark.scheduler.TaskSetManager: Stage 81 contains a task of very large size (755 KiB). The maximum recommended task size is 100 KiB.
22:47:53.417 WARN org.apache.spark.scheduler.TaskSetManager: Stage 83 contains a task of very large size (755 KiB). The maximum recommended task size is 100 KiB.
22:47:53.444 WARN org.apache.spark.scheduler.TaskSetManager: Stage 84 contains a task of very large size (755 KiB). The maximum recommended task size is 100 KiB.

...

- SPARK-20688: correctly check analysis for scalar sub-queries
22:49:10.314 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 150.8 KiB
- SPARK-21835: Join in correlated subquery should be duplicateResolved: case 1
22:49:10.595 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 150.7 KiB
22:49:10.744 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 150.7 KiB
22:49:10.894 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 150.7 KiB
- SPARK-21835: Join in correlated subquery should be duplicateResolved: case 2
- SPARK-21835: Join in correlated subquery should be duplicateResolved: case 3
- SPARK-23316: AnalysisException after max iteration reached for IN query
22:49:11.559 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 154.2 KiB
```

It seems that a larger threshold of about 1MB is more suitable.

## How was this patch tested?

Existing tests.

Closes #24226 from srowen/SPARK-26660.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../main/scala/org/apache/spark/scheduler/TaskSetManager.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index ea31fe80ef56..3977c0bafa57 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -1111,5 +1111,5 @@ private[spark] class TaskSetManager(
 private[spark] object TaskSetManager {
   // The user will be warned if any stages contain a task that has a serialized size greater than
   // this.
-  val TASK_SIZE_TO_WARN_KIB = 100
+  val TASK_SIZE_TO_WARN_KIB = 1000
 }

From ca1433b94a702b17e6a1940e59a85af2caaf50fc Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 26 Mar 2019 22:32:03 -0700
Subject: [PATCH 0787/1072] [SPARK-27182][SQL] Move the conflict source code of
 the sql/core module to sql/core/v1.2.1

## What changes were proposed in this pull request?
To make https://github.com/apache/spark/pull/23788 easy to review. This PR moves `OrcColumnVector.java`, `OrcShimUtils.scala`, `OrcFilters.scala` and `OrcFilterSuite.scala` to `sql/core/v1.2.1` and copies it to `sql/core/v2.3.4`.

## How was this patch tested?

manual tests
```shell
diff -urNa sql/core/v1.2.1 sql/core/v2.3.4
```

Closes #24119 from wangyum/SPARK-27182.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 sql/core/pom.xml                              |  14 +
 .../datasources/orc/OrcColumnVector.java      |   0
 .../datasources/orc/OrcFilters.scala          |   0
 .../datasources/orc/OrcShimUtils.scala        |   0
 .../datasources/orc/OrcFilterSuite.scala      |   0
 .../datasources/orc/OrcColumnVector.java      | 193 ++++++++
 .../datasources/orc/OrcFilters.scala          | 244 +++++++++++
 .../datasources/orc/OrcShimUtils.scala        |  66 +++
 .../datasources/orc/OrcFilterSuite.scala      | 413 ++++++++++++++++++
 9 files changed, 930 insertions(+)
 rename sql/core/{ => v1.2.1}/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java (100%)
 rename sql/core/{ => v1.2.1}/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala (100%)
 rename sql/core/{ => v1.2.1}/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala (100%)
 rename sql/core/{ => v1.2.1}/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala (100%)
 create mode 100644 sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
 create mode 100644 sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
 create mode 100644 sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
 create mode 100644 sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index ac5f1fc923e7..5ddfb02f0de3 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -189,6 +189,19 @@
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>build-helper-maven-plugin</artifactId>
         <executions>
+          <execution>
+            <id>add-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>v${hive.version.short}/src/main/scala</source>
+                <source>v${hive.version.short}/src/main/java</source>
+              </sources>
+            </configuration>
+          </execution>
           <execution>
             <id>add-scala-test-sources</id>
             <phase>generate-test-sources</phase>
@@ -197,6 +210,7 @@
             </goals>
             <configuration>
               <sources>
+                <source>v${hive.version.short}/src/test/scala</source>
                 <source>src/test/gen-java</source>
               </sources>
             </configuration>
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v1.2.1/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
similarity index 100%
rename from sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
rename to sql/core/v1.2.1/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
rename to sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
rename to sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
similarity index 100%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
rename to sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
diff --git a/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
new file mode 100644
index 000000000000..9bfad1e83ee7
--- /dev/null
+++ b/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc;
+
+import java.math.BigDecimal;
+
+import org.apache.orc.storage.ql.exec.vector.*;
+
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.TimestampType;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarMap;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * A column vector class wrapping Hive's ColumnVector. Because Spark ColumnarBatch only accepts
+ * Spark's vectorized.ColumnVector, this column vector is used to adapt Hive ColumnVector with
+ * Spark ColumnarVector.
+ */
+public class OrcColumnVector extends org.apache.spark.sql.vectorized.ColumnVector {
+  private ColumnVector baseData;
+  private LongColumnVector longData;
+  private DoubleColumnVector doubleData;
+  private BytesColumnVector bytesData;
+  private DecimalColumnVector decimalData;
+  private TimestampColumnVector timestampData;
+  private final boolean isTimestamp;
+
+  private int batchSize;
+
+  OrcColumnVector(DataType type, ColumnVector vector) {
+    super(type);
+
+    if (type instanceof TimestampType) {
+      isTimestamp = true;
+    } else {
+      isTimestamp = false;
+    }
+
+    baseData = vector;
+    if (vector instanceof LongColumnVector) {
+      longData = (LongColumnVector) vector;
+    } else if (vector instanceof DoubleColumnVector) {
+      doubleData = (DoubleColumnVector) vector;
+    } else if (vector instanceof BytesColumnVector) {
+      bytesData = (BytesColumnVector) vector;
+    } else if (vector instanceof DecimalColumnVector) {
+      decimalData = (DecimalColumnVector) vector;
+    } else if (vector instanceof TimestampColumnVector) {
+      timestampData = (TimestampColumnVector) vector;
+    } else {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  public void setBatchSize(int batchSize) {
+    this.batchSize = batchSize;
+  }
+
+  @Override
+  public void close() {
+
+  }
+
+  @Override
+  public boolean hasNull() {
+    return !baseData.noNulls;
+  }
+
+  @Override
+  public int numNulls() {
+    if (baseData.isRepeating) {
+      if (baseData.isNull[0]) {
+        return batchSize;
+      } else {
+        return 0;
+      }
+    } else if (baseData.noNulls) {
+      return 0;
+    } else {
+      int count = 0;
+      for (int i = 0; i < batchSize; i++) {
+        if (baseData.isNull[i]) count++;
+      }
+      return count;
+    }
+  }
+
+  /* A helper method to get the row index in a column. */
+  private int getRowIndex(int rowId) {
+    return baseData.isRepeating ? 0 : rowId;
+  }
+
+  @Override
+  public boolean isNullAt(int rowId) {
+    return baseData.isNull[getRowIndex(rowId)];
+  }
+
+  @Override
+  public boolean getBoolean(int rowId) {
+    return longData.vector[getRowIndex(rowId)] == 1;
+  }
+
+  @Override
+  public byte getByte(int rowId) {
+    return (byte) longData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public short getShort(int rowId) {
+    return (short) longData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public int getInt(int rowId) {
+    return (int) longData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public long getLong(int rowId) {
+    int index = getRowIndex(rowId);
+    if (isTimestamp) {
+      return timestampData.time[index] * 1000 + timestampData.nanos[index] / 1000 % 1000;
+    } else {
+      return longData.vector[index];
+    }
+  }
+
+  @Override
+  public float getFloat(int rowId) {
+    return (float) doubleData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public double getDouble(int rowId) {
+    return doubleData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public Decimal getDecimal(int rowId, int precision, int scale) {
+    if (isNullAt(rowId)) return null;
+    BigDecimal data = decimalData.vector[getRowIndex(rowId)].getHiveDecimal().bigDecimalValue();
+    return Decimal.apply(data, precision, scale);
+  }
+
+  @Override
+  public UTF8String getUTF8String(int rowId) {
+    if (isNullAt(rowId)) return null;
+    int index = getRowIndex(rowId);
+    BytesColumnVector col = bytesData;
+    return UTF8String.fromBytes(col.vector[index], col.start[index], col.length[index]);
+  }
+
+  @Override
+  public byte[] getBinary(int rowId) {
+    if (isNullAt(rowId)) return null;
+    int index = getRowIndex(rowId);
+    byte[] binary = new byte[bytesData.length[index]];
+    System.arraycopy(bytesData.vector[index], bytesData.start[index], binary, 0, binary.length);
+    return binary;
+  }
+
+  @Override
+  public ColumnarArray getArray(int rowId) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public ColumnarMap getMap(int rowId) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public org.apache.spark.sql.vectorized.ColumnVector getChild(int ordinal) {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
new file mode 100644
index 000000000000..112dcb2cb238
--- /dev/null
+++ b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.orc.storage.common.`type`.HiveDecimal
+import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
+import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder
+import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
+import org.apache.orc.storage.serde2.io.HiveDecimalWritable
+
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types._
+
+/**
+ * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down.
+ *
+ * Due to limitation of ORC `SearchArgument` builder, we had to end up with a pretty weird double-
+ * checking pattern when converting `And`/`Or`/`Not` filters.
+ *
+ * An ORC `SearchArgument` must be built in one pass using a single builder.  For example, you can't
+ * build `a = 1` and `b = 2` first, and then combine them into `a = 1 AND b = 2`.  This is quite
+ * different from the cases in Spark SQL or Parquet, where complex filters can be easily built using
+ * existing simpler ones.
+ *
+ * The annoying part is that, `SearchArgument` builder methods like `startAnd()`, `startOr()`, and
+ * `startNot()` mutate internal state of the builder instance.  This forces us to translate all
+ * convertible filters with a single builder instance. However, before actually converting a filter,
+ * we've no idea whether it can be recognized by ORC or not. Thus, when an inconvertible filter is
+ * found, we may already end up with a builder whose internal state is inconsistent.
+ *
+ * For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and then
+ * try to convert its children.  Say we convert `left` child successfully, but find that `right`
+ * child is inconvertible.  Alas, `b.startAnd()` call can't be rolled back, and `b` is inconsistent
+ * now.
+ *
+ * The workaround employed here is that, for `And`/`Or`/`Not`, we first try to convert their
+ * children with brand new builders, and only do the actual conversion with the right builder
+ * instance when the children are proven to be convertible.
+ *
+ * P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only.  Usage of
+ * builder methods mentioned above can only be found in test code, where all tested filters are
+ * known to be convertible.
+ */
+private[sql] object OrcFilters extends OrcFiltersBase {
+
+  /**
+   * Create ORC filter as a SearchArgument instance.
+   */
+  def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = {
+    val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
+    for {
+      // Combines all convertible filters using `And` to produce a single conjunction
+      conjunction <- buildTree(convertibleFilters(schema, dataTypeMap, filters))
+      // Then tries to build a single ORC `SearchArgument` for the conjunction predicate
+      builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
+    } yield builder.build()
+  }
+
+  def convertibleFilters(
+      schema: StructType,
+      dataTypeMap: Map[String, DataType],
+      filters: Seq[Filter]): Seq[Filter] = {
+    for {
+      filter <- filters
+      _ <- buildSearchArgument(dataTypeMap, filter, newBuilder())
+    } yield filter
+  }
+
+  /**
+   * Get PredicateLeafType which is corresponding to the given DataType.
+   */
+  private def getPredicateLeafType(dataType: DataType) = dataType match {
+    case BooleanType => PredicateLeaf.Type.BOOLEAN
+    case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG
+    case FloatType | DoubleType => PredicateLeaf.Type.FLOAT
+    case StringType => PredicateLeaf.Type.STRING
+    case DateType => PredicateLeaf.Type.DATE
+    case TimestampType => PredicateLeaf.Type.TIMESTAMP
+    case _: DecimalType => PredicateLeaf.Type.DECIMAL
+    case _ => throw new UnsupportedOperationException(s"DataType: ${dataType.catalogString}")
+  }
+
+  /**
+   * Cast literal values for filters.
+   *
+   * We need to cast to long because ORC raises exceptions
+   * at 'checkLiteralType' of SearchArgumentImpl.java.
+   */
+  private def castLiteralValue(value: Any, dataType: DataType): Any = dataType match {
+    case ByteType | ShortType | IntegerType | LongType =>
+      value.asInstanceOf[Number].longValue
+    case FloatType | DoubleType =>
+      value.asInstanceOf[Number].doubleValue()
+    case _: DecimalType =>
+      new HiveDecimalWritable(HiveDecimal.create(value.asInstanceOf[java.math.BigDecimal]))
+    case _ => value
+  }
+
+  /**
+   * Build a SearchArgument and return the builder so far.
+   */
+  private def buildSearchArgument(
+      dataTypeMap: Map[String, DataType],
+      expression: Filter,
+      builder: Builder): Option[Builder] = {
+    createBuilder(dataTypeMap, expression, builder, canPartialPushDownConjuncts = true)
+  }
+
+  /**
+   * @param dataTypeMap a map from the attribute name to its data type.
+   * @param expression the input filter predicates.
+   * @param builder the input SearchArgument.Builder.
+   * @param canPartialPushDownConjuncts whether a subset of conjuncts of predicates can be pushed
+   *                                    down safely. Pushing ONLY one side of AND down is safe to
+   *                                    do at the top level or none of its ancestors is NOT and OR.
+   * @return the builder so far.
+   */
+  private def createBuilder(
+      dataTypeMap: Map[String, DataType],
+      expression: Filter,
+      builder: Builder,
+      canPartialPushDownConjuncts: Boolean): Option[Builder] = {
+    def getType(attribute: String): PredicateLeaf.Type =
+      getPredicateLeafType(dataTypeMap(attribute))
+
+    import org.apache.spark.sql.sources._
+
+    expression match {
+      case And(left, right) =>
+        // At here, it is not safe to just convert one side and remove the other side
+        // if we do not understand what the parent filters are.
+        //
+        // Here is an example used to explain the reason.
+        // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
+        // convert b in ('1'). If we only convert a = 2, we will end up with a filter
+        // NOT(a = 2), which will generate wrong results.
+        //
+        // Pushing one side of AND down is only safe to do at the top level or in the child
+        // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate
+        // can be safely removed.
+        val leftBuilderOption =
+          createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts)
+        val rightBuilderOption =
+          createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts)
+        (leftBuilderOption, rightBuilderOption) match {
+          case (Some(_), Some(_)) =>
+            for {
+              lhs <- createBuilder(dataTypeMap, left,
+                builder.startAnd(), canPartialPushDownConjuncts)
+              rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts)
+            } yield rhs.end()
+
+          case (Some(_), None) if canPartialPushDownConjuncts =>
+            createBuilder(dataTypeMap, left, builder, canPartialPushDownConjuncts)
+
+          case (None, Some(_)) if canPartialPushDownConjuncts =>
+            createBuilder(dataTypeMap, right, builder, canPartialPushDownConjuncts)
+
+          case _ => None
+        }
+
+      case Or(left, right) =>
+        for {
+          _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts = false)
+          _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts = false)
+          lhs <- createBuilder(dataTypeMap, left,
+            builder.startOr(), canPartialPushDownConjuncts = false)
+          rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts = false)
+        } yield rhs.end()
+
+      case Not(child) =>
+        for {
+          _ <- createBuilder(dataTypeMap, child, newBuilder, canPartialPushDownConjuncts = false)
+          negate <- createBuilder(dataTypeMap,
+            child, builder.startNot(), canPartialPushDownConjuncts = false)
+        } yield negate.end()
+
+      // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
+      // call is mandatory.  ORC `SearchArgument` builder requires that all leaf predicates must be
+      // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
+
+      case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())
+
+      case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())
+
+      case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())
+
+      case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())
+
+      case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())
+
+      case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())
+
+      case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())
+
+      case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        Some(builder.startNot().isNull(quotedName, getType(attribute)).end())
+
+      case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
+        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
+        Some(builder.startAnd().in(quotedName, getType(attribute),
+          castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
+
+      case _ => None
+    }
+  }
+}
diff --git a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
new file mode 100644
index 000000000000..68503aba22b4
--- /dev/null
+++ b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import java.sql.Date
+
+import org.apache.orc.storage.common.`type`.HiveDecimal
+import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch
+import org.apache.orc.storage.ql.io.sarg.{SearchArgument => OrcSearchArgument}
+import org.apache.orc.storage.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator}
+import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
+
+import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
+import org.apache.spark.sql.types.Decimal
+
+/**
+ * Various utilities for ORC used to upgrade the built-in Hive.
+ */
+private[sql] object OrcShimUtils {
+
+  class VectorizedRowBatchWrap(val batch: VectorizedRowBatch) {}
+
+  private[sql] type Operator = OrcOperator
+  private[sql] type SearchArgument = OrcSearchArgument
+
+  def getSqlDate(value: Any): Date = value.asInstanceOf[DateWritable].get
+
+  def getDecimal(value: Any): Decimal = {
+    val decimal = value.asInstanceOf[HiveDecimalWritable].getHiveDecimal()
+    Decimal(decimal.bigDecimalValue, decimal.precision(), decimal.scale())
+  }
+
+  def getDateWritable(reuseObj: Boolean): (SpecializedGetters, Int) => DateWritable = {
+    if (reuseObj) {
+      val result = new DateWritable()
+      (getter, ordinal) =>
+        result.set(getter.getInt(ordinal))
+        result
+    } else {
+      (getter: SpecializedGetters, ordinal: Int) =>
+        new DateWritable(getter.getInt(ordinal))
+    }
+  }
+
+  def getHiveDecimalWritable(precision: Int, scale: Int):
+      (SpecializedGetters, Int) => HiveDecimalWritable = {
+    (getter, ordinal) =>
+      val d = getter.getDecimal(ordinal, precision, scale)
+      new HiveDecimalWritable(HiveDecimal.create(d.toJavaBigDecimal))
+  }
+}
diff --git a/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
new file mode 100644
index 000000000000..e96c6fb7716c
--- /dev/null
+++ b/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import java.math.MathContext
+import java.nio.charset.StandardCharsets
+import java.sql.{Date, Timestamp}
+
+import scala.collection.JavaConverters._
+
+import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
+
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame}
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types._
+
+/**
+ * A test suite that tests Apache ORC filter API based filter pushdown optimization.
+ * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage.
+ * The difference are the packages containing 'Predicate' and 'SearchArgument' classes.
+ * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package.
+ * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package.
+ */
+class OrcFilterSuite extends OrcTest with SharedSQLContext {
+
+  protected def checkFilterPredicate(
+      df: DataFrame,
+      predicate: Predicate,
+      checker: (SearchArgument) => Unit): Unit = {
+    val output = predicate.collect { case a: Attribute => a }.distinct
+    val query = df
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
+
+    query.queryExecution.optimizedPlan match {
+      case PhysicalOperation(_, filters,
+        DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
+        assert(filters.nonEmpty, "No filter is analyzed from the given query")
+        val scanBuilder = orcTable.newScanBuilder(options)
+        scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
+        val pushedFilters = scanBuilder.pushedFilters()
+        assert(pushedFilters.nonEmpty, "No filter is pushed down")
+        val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
+        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pushedFilters")
+        checker(maybeFilter.get)
+
+      case _ =>
+        throw new AnalysisException("Can not match OrcTable in the query.")
+    }
+  }
+
+  protected def checkFilterPredicate
+      (predicate: Predicate, filterOperator: PredicateLeaf.Operator)
+      (implicit df: DataFrame): Unit = {
+    def checkComparisonOperator(filter: SearchArgument) = {
+      val operator = filter.getLeaves.asScala
+      assert(operator.map(_.getOperator).contains(filterOperator))
+    }
+    checkFilterPredicate(df, predicate, checkComparisonOperator)
+  }
+
+  protected def checkFilterPredicate
+      (predicate: Predicate, stringExpr: String)
+      (implicit df: DataFrame): Unit = {
+    def checkLogicalOperator(filter: SearchArgument) = {
+      assert(filter.toString == stringExpr)
+    }
+    checkFilterPredicate(df, predicate, checkLogicalOperator)
+  }
+
+  test("filter pushdown - integer") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - long") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - float") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - double") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - string") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - boolean") {
+    withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - decimal") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - timestamp") {
+    val timeString = "2015-08-20 14:57:00"
+    val timestamps = (1 to 4).map { i =>
+      val milliseconds = Timestamp.valueOf(timeString).getTime + i * 3600
+      new Timestamp(milliseconds)
+    }
+    withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - combinations with logical operators") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
+      checkFilterPredicate(
+        '_1.isNotNull,
+        "leaf-0 = (IS_NULL _1), expr = (not leaf-0)"
+      )
+      checkFilterPredicate(
+        '_1 =!= 1,
+        "leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))"
+      )
+      checkFilterPredicate(
+        !('_1 < 4),
+        "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))"
+      )
+      checkFilterPredicate(
+        '_1 < 2 || '_1 > 3,
+        "leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " +
+          "expr = (or leaf-0 (not leaf-1))"
+      )
+      checkFilterPredicate(
+        '_1 < 2 && '_1 > 3,
+        "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " +
+          "expr = (and (not leaf-0) leaf-1 (not leaf-2))"
+      )
+    }
+  }
+
+  test("filter pushdown - date") {
+    val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day =>
+      Date.valueOf(day)
+    }
+    withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("no filter pushdown - non-supported types") {
+    implicit class IntToBinary(int: Int) {
+      def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
+    }
+    // ArrayType
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
+      checkNoFilterPredicate('_1.isNull, noneSupported = true)
+    }
+    // BinaryType
+    withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
+      checkNoFilterPredicate('_1 <=> 1.b, noneSupported = true)
+    }
+    // MapType
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
+      checkNoFilterPredicate('_1.isNotNull, noneSupported = true)
+    }
+  }
+
+  test("SPARK-12218 and SPARK-25699 Converting conjunctions into ORC SearchArguments") {
+    import org.apache.spark.sql.sources._
+    // The `LessThan` should be converted while the `StringContains` shouldn't
+    val schema = new StructType(
+      Array(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", StringType, nullable = true)))
+    assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
+      OrcFilters.createFilter(schema, Array(
+        LessThan("a", 10),
+        StringContains("b", "prefix")
+      )).get.toString
+    }
+
+    // The `LessThan` should be converted while the whole inner `And` shouldn't
+    assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
+      OrcFilters.createFilter(schema, Array(
+        LessThan("a", 10),
+        Not(And(
+          GreaterThan("a", 1),
+          StringContains("b", "prefix")
+        ))
+      )).get.toString
+    }
+
+    // Can not remove unsupported `StringContains` predicate since it is under `Or` operator.
+    assert(OrcFilters.createFilter(schema, Array(
+      Or(
+        LessThan("a", 10),
+        And(
+          StringContains("b", "prefix"),
+          GreaterThan("a", 1)
+        )
+      )
+    )).isEmpty)
+
+    // Safely remove unsupported `StringContains` predicate and push down `LessThan`
+    assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
+      OrcFilters.createFilter(schema, Array(
+        And(
+          LessThan("a", 10),
+          StringContains("b", "prefix")
+        )
+      )).get.toString
+    }
+
+    // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`.
+    assertResult("leaf-0 = (LESS_THAN a 10), leaf-1 = (LESS_THAN_EQUALS a 1)," +
+      " expr = (and leaf-0 (not leaf-1))") {
+      OrcFilters.createFilter(schema, Array(
+        And(
+          And(
+            LessThan("a", 10),
+            StringContains("b", "prefix")
+          ),
+          GreaterThan("a", 1)
+        )
+      )).get.toString
+    }
+  }
+
+  test("SPARK-27160: Fix casting of the DecimalType literal") {
+    import org.apache.spark.sql.sources._
+    val schema = StructType(Array(StructField("a", DecimalType(3, 2))))
+    assertResult("leaf-0 = (LESS_THAN a 3.14), expr = leaf-0") {
+      OrcFilters.createFilter(schema, Array(
+        LessThan(
+          "a",
+          new java.math.BigDecimal(3.14, MathContext.DECIMAL64).setScale(2)))
+      ).get.toString
+    }
+  }
+}
+

From 6bcd4805d2eda3293a1f601b05e9d1f36b26b241 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Tue, 26 Mar 2019 22:33:34 -0700
Subject: [PATCH 0788/1072] [SPARK-27286][SQL] Handles exceptions on proceeding
 to next record in FilePartitionReader

## What changes were proposed in this pull request?

In data source V2, the method `PartitionReader.next()` has side effects. When the method is called, the current reader proceeds to the next record.
This might throw RuntimeException/IOException and File source V2 framework should handle these exceptions.

## How was this patch tested?

Unit test.

Closes #24225 from gengliangwang/corruptFile.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/FilePartitionReader.scala  | 13 ++++++-
 .../execution/datasources/csv/CSVSuite.scala  | 37 ++++++++++++++++++-
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index d76d69dba31d..7ecd51659143 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -58,7 +58,18 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
         return false
       }
     }
-    if (currentReader.next()) {
+
+    // In PartitionReader.next(), the current reader proceeds to next record.
+    // It might throw RuntimeException/IOException and Spark should handle these exceptions.
+    val hasNext = try {
+      currentReader.next()
+    } catch {
+      case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
+        logWarning(
+          s"Skipped the rest of the content in the corrupted file: $currentReader", e)
+        false
+    }
+    if (hasNext) {
       true
     } else {
       close()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index b2b0f39ef4cc..6584a30036fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.sql.execution.datasources.csv
 
-import java.io.File
+import java.io.{ByteArrayOutputStream, EOFException, File, FileOutputStream}
 import java.nio.charset.{Charset, StandardCharsets, UnsupportedCharsetException}
 import java.nio.file.Files
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.Locale
+import java.util.zip.GZIPOutputStream
 
 import scala.collection.JavaConverters._
 import scala.util.Properties
@@ -1171,6 +1172,40 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
     }
   }
 
+  test("Enabling/disabling ignoreCorruptFiles") {
+    val inputFile = File.createTempFile("input-", ".gz")
+    try {
+      // Create a corrupt gzip file
+      val byteOutput = new ByteArrayOutputStream()
+      val gzip = new GZIPOutputStream(byteOutput)
+      try {
+        gzip.write(Array[Byte](1, 2, 3, 4))
+      } finally {
+        gzip.close()
+      }
+      val bytes = byteOutput.toByteArray
+      val o = new FileOutputStream(inputFile)
+      try {
+        // It's corrupt since we only write half of bytes into the file.
+        o.write(bytes.take(bytes.length / 2))
+      } finally {
+        o.close()
+      }
+      withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
+        val e = intercept[SparkException] {
+          spark.read.csv(inputFile.toURI.toString).collect()
+        }
+        assert(e.getCause.isInstanceOf[EOFException])
+        assert(e.getCause.getMessage === "Unexpected end of input stream")
+      }
+      withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") {
+        assert(spark.read.csv(inputFile.toURI.toString).collect().isEmpty)
+      }
+    } finally {
+      inputFile.delete()
+    }
+  }
+
   test("SPARK-19610: Parse normal multi-line CSV files") {
     val primitiveFieldAndType = Seq(
       """"

From fac31104f69daa6cf72eae1a0de995ef0777d75c Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Tue, 26 Mar 2019 23:37:58 -0700
Subject: [PATCH 0789/1072] [SPARK-27083][SQL] Add a new conf to control
 subqueryReuse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Subquery Reuse and Exchange Reuse are not the same feature， if we don't want to reuse subqueries，and we just want to reuse exchanges，only one configuration that cannot be done.

This PR adds a new configuration `spark.sql.subquery.reuse` to control subqueryReuse.

## How was this patch tested?

N/A

Closes #23998 from 10110346/SUBQUERY_REUSE.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/internal/SQLConf.scala     | 8 ++++++++
 .../scala/org/apache/spark/sql/execution/subquery.scala   | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3ecc34005c6d..411805e63287 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -889,6 +889,12 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val SUBQUERY_REUSE_ENABLED = buildConf("spark.sql.subquery.reuse")
+    .internal()
+    .doc("When true, the planner will try to find out duplicated subqueries and re-use them.")
+    .booleanConf
+    .createWithDefault(true)
+
   val STATE_STORE_PROVIDER_CLASS =
     buildConf("spark.sql.streaming.stateStore.providerClass")
       .internal()
@@ -1888,6 +1894,8 @@ class SQLConf extends Serializable with Logging {
 
   def exchangeReuseEnabled: Boolean = getConf(EXCHANGE_REUSE_ENABLED)
 
+  def subqueryReuseEnabled: Boolean = getConf(SUBQUERY_REUSE_ENABLED)
+
   def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
 
   def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index e084c79a619d..b7f10baaca7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -125,7 +125,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
 case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] {
 
   def apply(plan: SparkPlan): SparkPlan = {
-    if (!conf.exchangeReuseEnabled) {
+    if (!conf.subqueryReuseEnabled) {
       return plan
     }
     // Build a hash map using schema of subqueries to avoid O(N*N) sameResult calls.

From 93ff69003b228abcf08da4488593f552e3a61665 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 27 Mar 2019 19:40:14 +0900
Subject: [PATCH 0790/1072] [SPARK-27288][SQL] Pruning nested field in complex
 map key from object serializers

## What changes were proposed in this pull request?

In the original PR #24158, pruning nested field in complex map key was not supported, because some methods in schema pruning did't support it at that moment. This is a followup to add it.

## How was this patch tested?

Added tests.

Closes #24220 from viirya/SPARK-26847-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/optimizer/objects.scala      | 13 ++++++++++---
 .../ObjectSerializerPruningSuite.scala        |  5 +++--
 .../spark/sql/DatasetOptimizationSuite.scala  | 19 ++++++++++++++++++-
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index 8e92421924cf..c48bd8f27649 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -131,8 +131,8 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
         fields.map(f => collectStructType(f.dataType, structs))
       case ArrayType(elementType, _) =>
         collectStructType(elementType, structs)
-      case MapType(_, valueType, _) =>
-        // Because we can't select a field from struct in key, so we skip key type.
+      case MapType(keyType, valueType, _) =>
+        collectStructType(keyType, structs)
         collectStructType(valueType, structs)
       // We don't use UserDefinedType in those serializers.
       case _: UserDefinedType[_] =>
@@ -179,13 +179,20 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
 
     val transformedSerializer = serializer.transformDown {
       case m: ExternalMapToCatalyst =>
+        val prunedKeyConverter = m.keyConverter.transformDown {
+          case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
+            val prunedType = prunedStructTypes(structTypeIndex)
+            structTypeIndex += 1
+            pruneNamedStruct(s, prunedType)
+        }
         val prunedValueConverter = m.valueConverter.transformDown {
           case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
             val prunedType = prunedStructTypes(structTypeIndex)
             structTypeIndex += 1
             pruneNamedStruct(s, prunedType)
         }
-        m.copy(valueConverter = alignNullTypeInIf(prunedValueConverter))
+        m.copy(keyConverter = alignNullTypeInIf(prunedKeyConverter),
+          valueConverter = alignNullTypeInIf(prunedValueConverter))
       case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
         val prunedType = prunedStructTypes(structTypeIndex)
         structTypeIndex += 1
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
index fb0f3a3f63da..0dd4d6a245f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
@@ -60,8 +60,9 @@ class ObjectSerializerPruningSuite extends PlanTest {
       Seq(StructType.fromDDL("a struct<a:int, b:int>, b int"),
         StructType.fromDDL("a int, b int")),
       Seq(StructType.fromDDL("a int, b int, c string")),
-      Seq.empty[StructType],
-      Seq(StructType.fromDDL("c long, d string"))
+      Seq(StructType.fromDDL("a struct<a:int, b:int>, b int"),
+        StructType.fromDDL("a int, b int")),
+      Seq(StructType.fromDDL("a int, b int"), StructType.fromDDL("c long, d string"))
     )
 
     dataTypes.zipWithIndex.foreach { case (dt, idx) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
index 69634f84c3fd..cfbb34368e2a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
@@ -51,7 +51,9 @@ class DatasetOptimizationSuite extends QueryTest with SharedSQLContext {
       val structs = serializer.collect {
         case c: CreateNamedStruct => Seq(c)
         case m: ExternalMapToCatalyst =>
-          m.valueConverter.collect {
+          m.keyConverter.collect {
+            case c: CreateNamedStruct => c
+          } ++ m.valueConverter.collect {
             case c: CreateNamedStruct => c
           }
       }.flatten
@@ -123,6 +125,21 @@ class DatasetOptimizationSuite extends QueryTest with SharedSQLContext {
       val df2 = mapDs.select("_1.k._2")
       testSerializer(df2, Seq(Seq("_2")))
       checkAnswer(df2, Seq(Row(11), Row(22), Row(33)))
+
+      val df3 = mapDs.select(expr("map_values(_1)._2[0]"))
+      testSerializer(df3, Seq(Seq("_2")))
+      checkAnswer(df3, Seq(Row(11), Row(22), Row(33)))
+    }
+  }
+
+  test("Pruned nested serializers: map of complex key") {
+    withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+      val mapData = Seq((Map((("1", 1), "a_1")), 1), (Map((("2", 2), "b_1")), 2),
+        (Map((("3", 3), "c_1")), 3))
+      val mapDs = mapData.toDS().map(t => (t._1, t._2 + 1))
+      val df1 = mapDs.select(expr("map_keys(_1)._1[0]"))
+      testSerializer(df1, Seq(Seq("_1")))
+      checkAnswer(df1, Seq(Row("1"), Row("2"), Row("3")))
     }
   }
 }

From 956b52b1670985a67e49b938ac1499ae65c79f6e Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 27 Mar 2019 21:01:36 +0900
Subject: [PATCH 0791/1072] [SPARK-26771][SQL][FOLLOWUP] Make all the uncache
 operations non-blocking by default

## What changes were proposed in this pull request?
To make the blocking behaviour consistent, this pr made catalog table/view `uncacheQuery` non-blocking by default. If this pr merged, all the behaviours in spark are non-blocking by default.

## How was this patch tested?
Pass Jenkins.

Closes #24212 from maropu/SPARK-26771-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../scala/org/apache/spark/sql/Dataset.scala  |  3 +-
 .../spark/sql/execution/CacheManager.scala    |  8 +--
 .../execution/columnar/InMemoryRelation.scala |  2 +-
 .../spark/sql/internal/CatalogImpl.scala      |  6 +-
 .../apache/spark/sql/CachedTableSuite.scala   | 69 +++++++++++--------
 .../spark/sql/hive/CachedTableSuite.scala     | 15 +++-
 6 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 2accb3293ed4..69c2f618ecea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2956,7 +2956,8 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def unpersist(blocking: Boolean): this.type = {
-    sparkSession.sharedState.cacheManager.uncacheQuery(this, cascade = false, blocking)
+    sparkSession.sharedState.cacheManager.uncacheQuery(
+      sparkSession, logicalPlan, cascade = false, blocking)
     this
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 01454783e179..d1f096bc5945 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -98,13 +98,11 @@ class CacheManager extends Logging {
    * @param query     The [[Dataset]] to be un-cached.
    * @param cascade   If true, un-cache all the cache entries that refer to the given
    *                  [[Dataset]]; otherwise un-cache the given [[Dataset]] only.
-   * @param blocking  Whether to block until all blocks are deleted.
    */
   def uncacheQuery(
       query: Dataset[_],
-      cascade: Boolean,
-      blocking: Boolean = true): Unit = {
-    uncacheQuery(query.sparkSession, query.logicalPlan, cascade, blocking)
+      cascade: Boolean): Unit = {
+    uncacheQuery(query.sparkSession, query.logicalPlan, cascade)
   }
 
   /**
@@ -119,7 +117,7 @@ class CacheManager extends Logging {
       spark: SparkSession,
       plan: LogicalPlan,
       cascade: Boolean,
-      blocking: Boolean): Unit = {
+      blocking: Boolean = false): Unit = {
     val shouldRemove: LogicalPlan => Boolean =
       if (cascade) {
         _.find(_.sameResult(plan)).isDefined
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 1e4453f8df98..1af50339a015 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -65,7 +65,7 @@ case class CachedRDDBuilder(
     _cachedColumnBuffers
   }
 
-  def clearCache(blocking: Boolean = true): Unit = {
+  def clearCache(blocking: Boolean = false): Unit = {
     if (_cachedColumnBuffers != null) {
       synchronized {
         if (_cachedColumnBuffers != null) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 4698e8ab13ce..5e7d17b7d34e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -365,7 +365,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   override def dropTempView(viewName: String): Boolean = {
     sparkSession.sessionState.catalog.getTempView(viewName).exists { viewDef =>
       sparkSession.sharedState.cacheManager.uncacheQuery(
-        sparkSession, viewDef, cascade = false, blocking = true)
+        sparkSession, viewDef, cascade = false)
       sessionCatalog.dropTempView(viewName)
     }
   }
@@ -381,7 +381,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   override def dropGlobalTempView(viewName: String): Boolean = {
     sparkSession.sessionState.catalog.getGlobalTempView(viewName).exists { viewDef =>
       sparkSession.sharedState.cacheManager.uncacheQuery(
-        sparkSession, viewDef, cascade = false, blocking = true)
+        sparkSession, viewDef, cascade = false)
       sessionCatalog.dropGlobalTempView(viewName)
     }
   }
@@ -494,7 +494,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     // cached version and make the new version cached lazily.
     if (isCached(table)) {
       // Uncache the logicalPlan.
-      sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade = true, blocking = true)
+      sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade = true)
       // Cache it again.
       sparkSession.sharedState.cacheManager.cacheQuery(table, Some(tableIdent.table))
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 47e745f19166..4d63390fb452 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -93,13 +93,24 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     }.sum
   }
 
+  // Blocking uncache table for tests
+  private def uncacheTable(tableName: String): Unit = {
+    val tableIdent = spark.sessionState.sqlParser.parseTableIdentifier(tableName)
+    val cascade = !spark.sessionState.catalog.isTemporaryTable(tableIdent)
+    spark.sharedState.cacheManager.uncacheQuery(
+      spark,
+      spark.table(tableName).logicalPlan,
+      cascade = cascade,
+      blocking = true)
+  }
+
   test("cache temp table") {
     withTempView("tempTable") {
       testData.select('key).createOrReplaceTempView("tempTable")
       assertCached(sql("SELECT COUNT(*) FROM tempTable"), 0)
       spark.catalog.cacheTable("tempTable")
       assertCached(sql("SELECT COUNT(*) FROM tempTable"))
-      spark.catalog.uncacheTable("tempTable")
+      uncacheTable("tempTable")
     }
   }
 
@@ -121,7 +132,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     withTempView("tempTable") {
       sql("CACHE TABLE tempTable AS SELECT key FROM testData")
       assertCached(sql("SELECT COUNT(*) FROM tempTable"))
-      spark.catalog.uncacheTable("tempTable")
+      uncacheTable("tempTable")
     }
   }
 
@@ -134,7 +145,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     assertCached(sql("SELECT COUNT(*) FROM tempTable2"))
 
     // Is this valid?
-    spark.catalog.uncacheTable("tempTable2")
+    uncacheTable("tempTable2")
 
     // Should this be cached?
     assertCached(sql("SELECT COUNT(*) FROM tempTable1"), 0)
@@ -171,7 +182,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       case _ => false
     })
 
-    spark.catalog.uncacheTable("testData")
+    uncacheTable("testData")
     assert(!spark.catalog.isCached("testData"))
     assert(spark.table("testData").queryExecution.withCachedData match {
       case _: InMemoryRelation => false
@@ -196,7 +207,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       }.size
     }
 
-    spark.catalog.uncacheTable("testData")
+    uncacheTable("testData")
   }
 
   test("read from cached table and uncache") {
@@ -204,7 +215,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     checkAnswer(spark.table("testData"), testData.collect().toSeq)
     assertCached(spark.table("testData"))
 
-    spark.catalog.uncacheTable("testData")
+    uncacheTable("testData")
     checkAnswer(spark.table("testData"), testData.collect().toSeq)
     assertCached(spark.table("testData"), 0)
   }
@@ -215,7 +226,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     checkAnswer(
       sql("SELECT * FROM selectStar WHERE key = 1"),
       Seq(Row(1, "1")))
-    spark.catalog.uncacheTable("selectStar")
+    uncacheTable("selectStar")
   }
 
   test("Self-join cached") {
@@ -225,7 +236,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     checkAnswer(
       sql("SELECT * FROM testData a JOIN testData b ON a.key = b.key"),
       unCachedAnswer.toSeq)
-    spark.catalog.uncacheTable("testData")
+    uncacheTable("testData")
   }
 
   test("'CACHE TABLE' and 'UNCACHE TABLE' SQL statement") {
@@ -255,7 +266,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
         isMaterialized(rddId),
         "Eagerly cached in-memory table should have already been materialized")
 
-      spark.catalog.uncacheTable("testCacheTable")
+      uncacheTable("testCacheTable")
       eventually(timeout(10 seconds)) {
         assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
       }
@@ -272,7 +283,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
         isMaterialized(rddId),
         "Eagerly cached in-memory table should have already been materialized")
 
-      spark.catalog.uncacheTable("testCacheTable")
+      uncacheTable("testCacheTable")
       eventually(timeout(10 seconds)) {
         assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
       }
@@ -293,7 +304,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       isMaterialized(rddId),
       "Lazily cached in-memory table should have been materialized")
 
-    spark.catalog.uncacheTable("testData")
+    uncacheTable("testData")
     eventually(timeout(10 seconds)) {
       assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
     }
@@ -430,8 +441,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     }
     spark.sparkContext.cleaner.get.attachListener(cleanerListener)
 
-    spark.catalog.uncacheTable("t1")
-    spark.catalog.uncacheTable("t2")
+    uncacheTable("t1")
+    uncacheTable("t2")
 
     System.gc()
 
@@ -478,7 +489,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     checkAnswer(
       sql("SELECT key, count(*) FROM orderedTable GROUP BY key ORDER BY key"),
       sql("SELECT key, count(*) FROM testData3x GROUP BY key ORDER BY key").collect())
-    spark.catalog.uncacheTable("orderedTable")
+    uncacheTable("orderedTable")
     spark.catalog.dropTempView("orderedTable")
 
     // Set up two tables distributed in the same way. Try this with the data distributed into
@@ -500,8 +511,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
         checkAnswer(sql("SELECT count(*) FROM t1 GROUP BY key"),
           sql("SELECT count(*) FROM testData GROUP BY key"))
 
-        spark.catalog.uncacheTable("t1")
-        spark.catalog.uncacheTable("t2")
+        uncacheTable("t1")
+        uncacheTable("t2")
       }
     }
 
@@ -518,8 +529,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       checkAnswer(
         query,
         testData.join(testData2, $"key" === $"a").select($"key", $"value", $"a", $"b"))
-      spark.catalog.uncacheTable("t1")
-      spark.catalog.uncacheTable("t2")
+      uncacheTable("t1")
+      uncacheTable("t2")
     }
 
     // One side of join is not partitioned in the desired way. Need to shuffle one side.
@@ -535,8 +546,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       checkAnswer(
         query,
         testData.join(testData2, $"key" === $"a").select($"key", $"value", $"a", $"b"))
-      spark.catalog.uncacheTable("t1")
-      spark.catalog.uncacheTable("t2")
+      uncacheTable("t1")
+      uncacheTable("t2")
     }
 
     withTempView("t1", "t2") {
@@ -551,8 +562,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       checkAnswer(
         query,
         testData.join(testData2, $"key" === $"a").select($"key", $"value", $"a", $"b"))
-      spark.catalog.uncacheTable("t1")
-      spark.catalog.uncacheTable("t2")
+      uncacheTable("t1")
+      uncacheTable("t2")
     }
 
     // One side of join is not partitioned in the desired way. Since the number of partitions of
@@ -569,8 +580,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       checkAnswer(
         query,
         testData.join(testData2, $"key" === $"a").select($"key", $"value", $"a", $"b"))
-      spark.catalog.uncacheTable("t1")
-      spark.catalog.uncacheTable("t2")
+      uncacheTable("t1")
+      uncacheTable("t2")
     }
 
     // repartition's column ordering is different from group by column ordering.
@@ -584,7 +595,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       checkAnswer(
         query,
         testData.distinct().select($"value", $"key"))
-      spark.catalog.uncacheTable("t1")
+      uncacheTable("t1")
     }
 
     // repartition's column ordering is different from join condition's column ordering.
@@ -606,8 +617,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       checkAnswer(
         query,
         df1.join(df2, $"key" === $"a" && $"value" === $"b").select($"key", $"value", $"a", $"b"))
-      spark.catalog.uncacheTable("t1")
-      spark.catalog.uncacheTable("t2")
+      uncacheTable("t1")
+      uncacheTable("t2")
     }
   }
 
@@ -620,7 +631,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       selectStar,
       Seq(Row(1, "1")))
 
-    spark.catalog.uncacheTable("selectStar")
+    uncacheTable("selectStar")
     checkAnswer(
       selectStar,
       Seq(Row(1, "1")))
@@ -698,7 +709,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
 
         Utils.deleteRecursively(path)
         spark.sessionState.catalog.refreshTable(TableIdentifier("t"))
-        spark.catalog.uncacheTable("t")
+        uncacheTable("t")
         assert(spark.table("t").select($"i").count() == 0)
         assert(getNumInMemoryRelations(spark.table("t").select($"i")) == 0)
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 9483a9cfcb8c..3eca632481dd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -49,6 +49,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     maybeBlock.nonEmpty
   }
 
+  // Blocking uncache table for tests
+  private def uncacheTable(tableName: String): Unit = {
+    val tableIdent = spark.sessionState.sqlParser.parseTableIdentifier(tableName)
+    val cascade = !spark.sessionState.catalog.isTemporaryTable(tableIdent)
+    spark.sharedState.cacheManager.uncacheQuery(
+      spark,
+      spark.table(tableName).logicalPlan,
+      cascade = cascade,
+      blocking = true)
+  }
+
   test("cache table") {
     val preCacheResults = sql("SELECT * FROM src").collect().toSeq
 
@@ -106,7 +117,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     var e = intercept[AnalysisException](spark.table("nonexistentTable")).getMessage
     assert(e.contains(expectedErrorMsg))
     e = intercept[AnalysisException] {
-      spark.catalog.uncacheTable("nonexistentTable")
+      uncacheTable("nonexistentTable")
     }.getMessage
     assert(e.contains(expectedErrorMsg))
     e = intercept[AnalysisException] {
@@ -121,7 +132,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     withTable(tableName) {
       sql(s"CREATE TABLE $tableName(a INT)")
       // no error will be reported in the following three ways to uncache a table.
-      spark.catalog.uncacheTable(tableName)
+      uncacheTable(tableName)
       sql("UNCACHE TABLE newTable")
       sparkSession.table(tableName).unpersist(blocking = true)
     }

From f1fe805bed5e69c9c8055c83bf854d2d713e6466 Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <me@daoyuan.wang>
Date: Wed, 27 Mar 2019 08:45:22 -0700
Subject: [PATCH 0792/1072] [SPARK-27279][SQL] Reuse subquery should compare
 child plan of `SubqueryExec`

## What changes were proposed in this pull request?

For now, `ReuseSubquery` in Spark compares two subqueries at `SubqueryExec` level, which invalidates the `ReuseSubquery` rule. This pull request fixes this, and add a configuration key for subquery reuse exclusively.

## How was this patch tested?

add a unit test.

Closes #24214 from adrian-wang/reuse.

Authored-by: Daoyuan Wang <me@daoyuan.wang>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../execution/basicPhysicalOperators.scala    |  2 ++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 29 ++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index eacd35b0771f..731e7daf2acd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -674,6 +674,8 @@ case class SubqueryExec(name: String, child: SparkPlan) extends UnaryExecNode {
 
   override def outputOrdering: Seq[SortOrder] = child.outputOrdering
 
+  override def doCanonicalize(): SparkPlan = child.canonicalized
+
   @transient
   private lazy val relationFuture: Future[Array[InternalRow]] = {
     // relationFuture is used in "doExecute". Therefore we can get the execution id correctly here.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index e8d1eccd329d..5916cbb7e681 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -25,7 +25,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.execution.aggregate
+import org.apache.spark.sql.execution.{aggregate, ScalarSubquery, SubqueryExec}
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.datasources.FilePartition
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec}
@@ -113,6 +113,33 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("Reuse Subquery") {
+    Seq(true, false).foreach { reuse =>
+      withSQLConf(SQLConf.SUBQUERY_REUSE_ENABLED.key -> reuse.toString) {
+        val df = sql(
+          """
+            |SELECT (SELECT avg(key) FROM testData) + (SELECT avg(key) FROM testData)
+            |FROM testData
+            |LIMIT 1
+          """.stripMargin)
+
+        import scala.collection.mutable.ArrayBuffer
+        val subqueries = ArrayBuffer[SubqueryExec]()
+        df.queryExecution.executedPlan.transformAllExpressions {
+          case s @ ScalarSubquery(plan: SubqueryExec, _) =>
+            subqueries += plan
+            s
+        }
+
+        if (reuse) {
+          assert(subqueries.distinct.size == 1, "Subquery reusing not working correctly")
+        } else {
+          assert(subqueries.distinct.size == 2, "There should be 2 subqueries when not reusing")
+        }
+      }
+    }
+  }
+
   test("SPARK-6743: no columns from cache") {
     Seq(
       (83, 0, 38),

From 49b0411549dad82d0ae8daf93a9ae2624f206791 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 27 Mar 2019 10:08:38 -0700
Subject: [PATCH 0793/1072] [SPARK-27291][SQL] PartitioningAwareFileIndex:
 Filter out empty files on listing files

## What changes were proposed in this pull request?

In https://github.com/apache/spark/pull/23130, all empty files are excluded from target file splits in `FileSourceScanExec`.
In File source V2, we should keep the same behavior.

This PR suggests to filter out empty files on listing files in `PartitioningAwareFileIndex` so that the upper level doesn't need to handle them.
## How was this patch tested?

Unit test

Closes #24227 from gengliangwang/ignoreEmptyFile.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/DataSourceScanExec.scala |  4 ++--
 .../datasources/PartitioningAwareFileIndex.scala |  7 +++++--
 .../apache/spark/sql/sources/SaveLoadSuite.scala | 16 +++++++++-------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 92f7d664b66b..33adfce34dd4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -382,7 +382,7 @@ case class FileSourceScanExec(
     logInfo(s"Planning with ${bucketSpec.numBuckets} buckets")
     val filesGroupedToBuckets =
       selectedPartitions.flatMap { p =>
-        p.files.filter(_.getLen > 0).map { f =>
+        p.files.map { f =>
           PartitionedFileUtil.getPartitionedFile(f, f.getPath, p.values)
         }
       }.groupBy { f =>
@@ -426,7 +426,7 @@ case class FileSourceScanExec(
       s"open cost is considered as scanning $openCostInBytes bytes.")
 
     val splitFiles = selectedPartitions.flatMap { partition =>
-      partition.files.filter(_.getLen > 0).flatMap { file =>
+      partition.files.flatMap { file =>
         // getPath() is very expensive so we only want to call it once in this block:
         val filePath = file.getPath
         val isSplitable = relation.fileFormat.isSplitable(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index f5ae095a6dad..29b304a1e487 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -58,15 +58,18 @@ abstract class PartitioningAwareFileIndex(
 
   override def listFiles(
       partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[PartitionDirectory] = {
+    def isNonEmptyFile(f: FileStatus): Boolean = {
+      isDataPath(f.getPath) && f.getLen > 0
+    }
     val selectedPartitions = if (partitionSpec().partitionColumns.isEmpty) {
-      PartitionDirectory(InternalRow.empty, allFiles().filter(f => isDataPath(f.getPath))) :: Nil
+      PartitionDirectory(InternalRow.empty, allFiles().filter(isNonEmptyFile)) :: Nil
     } else {
       prunePartitions(partitionFilters, partitionSpec()).map {
         case PartitionPath(values, path) =>
           val files: Seq[FileStatus] = leafDirToChildrenFiles.get(path) match {
             case Some(existingDir) =>
               // Directory has children files in it, return them
-              existingDir.filter(f => isDataPath(f.getPath))
+              existingDir.filter(isNonEmptyFile)
 
             case None =>
               // Directory does not exist, or has no children files
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
index 048e4b80c72a..7680f61b8b6c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
@@ -146,13 +146,15 @@ class SaveLoadSuite extends DataSourceTest with SharedSQLContext with BeforeAndA
   }
 
   test("skip empty files in non bucketed read") {
-    withTempDir { dir =>
-      val path = dir.getCanonicalPath
-      Files.write(Paths.get(path, "empty"), Array.empty[Byte])
-      Files.write(Paths.get(path, "notEmpty"), "a".getBytes(StandardCharsets.UTF_8))
-      val readback = spark.read.option("wholetext", true).text(path)
-
-      assert(readback.rdd.getNumPartitions === 1)
+    Seq("csv", "text").foreach { format =>
+      withTempDir { dir =>
+        val path = dir.getCanonicalPath
+        Files.write(Paths.get(path, "empty"), Array.empty[Byte])
+        Files.write(Paths.get(path, "notEmpty"), "a".getBytes(StandardCharsets.UTF_8))
+        val readBack = spark.read.option("wholetext", true).format(format).load(path)
+
+        assert(readBack.rdd.getNumPartitions === 1)
+      }
     }
   }
 }

From 39577a27a0b58fd75b41d24b10012447748b7ee9 Mon Sep 17 00:00:00 2001
From: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Date: Wed, 27 Mar 2019 13:00:56 -0700
Subject: [PATCH 0794/1072] [SPARK-24902][K8S] Add PV integration tests

## What changes were proposed in this pull request?

- Adds persistent volume integration tests
- Adds a custom tag to the test to exclude it if it is run against a cloud backend.
- Assumes default fs type for the host, AFAIK that is ext4.

## How was this patch tested?
Manually run the tests against minikube as usual:
```
[INFO] --- scalatest-maven-plugin:1.0:test (integration-test)  spark-kubernetes-integration-tests_2.12 ---
Discovery starting.
Discovery completed in 192 milliseconds.
Run starting. Expected test count is: 16
KubernetesSuite:
- Run SparkPi with no resources
- Run SparkPi with a very long application name.
- Use SparkLauncher.NO_RESOURCE
- Run SparkPi with a master URL without a scheme.
- Run SparkPi with an argument.
- Run SparkPi with custom labels, annotations, and environment variables.
- Run extraJVMOptions check on driver
- Run SparkRemoteFileTest using a remote data file
- Run SparkPi with env and mount secrets.
- Run PySpark on simple pi.py example
- Run PySpark with Python2 to test a pyfiles example
- Run PySpark with Python3 to test a pyfiles example
- Run PySpark with memory customization
- Run in client mode.
- Start pod creation from template
- Test PVs with local storage
```

Closes #23514 from skonto/pvctests.

Authored-by: Stavros Kontopoulos <stavros.kontopoulos@lightbend.com>
Signed-off-by: shane knapp <incomplete@gmail.com>
---
 .../spark/examples/DFSReadWriteTest.scala     |  12 +-
 .../k8s/integrationtest/KubernetesSuite.scala |  35 +++-
 .../KubernetesTestComponents.scala            |   3 +-
 .../k8s/integrationtest/PVTestsSuite.scala    | 189 ++++++++++++++++++
 .../integrationtest/SecretsTestsSuite.scala   |  27 +--
 .../deploy/k8s/integrationtest/Utils.scala    |  22 ++
 6 files changed, 260 insertions(+), 28 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala

diff --git a/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala b/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
index 1a779716ec4c..a73859891a86 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
@@ -22,6 +22,9 @@ import java.io.File
 
 import scala.io.Source._
 
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+
 import org.apache.spark.sql.SparkSession
 
 /**
@@ -107,6 +110,13 @@ object DFSReadWriteTest {
 
     println("Writing local file to DFS")
     val dfsFilename = s"$dfsDirPath/dfs_read_write_test"
+
+    // delete file if exists
+    val fs = FileSystem.get(spark.sessionState.newHadoopConf())
+    if (fs.exists(new Path(dfsFilename))) {
+        fs.delete(new Path(dfsFilename), true)
+    }
+
     val fileRDD = spark.sparkContext.parallelize(fileContents)
     fileRDD.saveAsTextFile(dfsFilename)
 
@@ -123,7 +133,6 @@ object DFSReadWriteTest {
       .sum
 
     spark.stop()
-
     if (localWordCount == dfsWordCount) {
       println(s"Success! Local Word Count $localWordCount and " +
         s"DFS Word Count $dfsWordCount agree.")
@@ -131,7 +140,6 @@ object DFSReadWriteTest {
       println(s"Failure! Local Word Count $localWordCount " +
         s"and DFS Word Count $dfsWordCount disagree.")
     }
-
   }
 }
 // scalastyle:on println
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 91419e895ba8..bc0bb2090825 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -40,7 +40,7 @@ import org.apache.spark.internal.config._
 
 class KubernetesSuite extends SparkFunSuite
   with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite
-  with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite
+  with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite with PVTestsSuite
   with Logging with Eventually with Matchers {
 
   import KubernetesSuite._
@@ -178,6 +178,29 @@ class KubernetesSuite extends SparkFunSuite
       isJVM)
   }
 
+  protected def runDFSReadWriteAndVerifyCompletion(
+      wordCount: Int,
+      appResource: String = containerLocalSparkDistroExamplesJar,
+      driverPodChecker: Pod => Unit = doBasicDriverPodCheck,
+      executorPodChecker: Pod => Unit = doBasicExecutorPodCheck,
+      appArgs: Array[String] = Array.empty[String],
+      appLocator: String = appLocator,
+      isJVM: Boolean = true,
+      interval: Option[PatienceConfiguration.Interval] = None): Unit = {
+    runSparkApplicationAndVerifyCompletion(
+      appResource,
+      SPARK_DFS_READ_WRITE_TEST,
+      Seq(s"Success! Local Word Count $wordCount and " +
+    s"DFS Word Count $wordCount agree."),
+      appArgs,
+      driverPodChecker,
+      executorPodChecker,
+      appLocator,
+      isJVM,
+      None,
+      interval)
+  }
+
   protected def runSparkRemoteCheckAndVerifyCompletion(
       appResource: String = containerLocalSparkDistroExamplesJar,
       driverPodChecker: Pod => Unit = doBasicDriverPodCheck,
@@ -241,7 +264,8 @@ class KubernetesSuite extends SparkFunSuite
       executorPodChecker: Pod => Unit,
       appLocator: String,
       isJVM: Boolean,
-      pyFiles: Option[String] = None): Unit = {
+      pyFiles: Option[String] = None,
+      interval: Option[PatienceConfiguration.Interval] = None): Unit = {
     val appArguments = SparkAppArguments(
       mainAppResource = appResource,
       mainClass = mainClass,
@@ -281,10 +305,12 @@ class KubernetesSuite extends SparkFunSuite
           }
         }
       })
-    Eventually.eventually(TIMEOUT, INTERVAL) { execPods.values.nonEmpty should be (true) }
+
+    val patienceInterval = interval.getOrElse(INTERVAL)
+    Eventually.eventually(TIMEOUT, patienceInterval) { execPods.values.nonEmpty should be (true) }
     execWatcher.close()
     execPods.values.foreach(executorPodChecker(_))
-    Eventually.eventually(TIMEOUT, INTERVAL) {
+    Eventually.eventually(TIMEOUT, patienceInterval) {
       expectedLogOnCompletion.foreach { e =>
         assert(kubernetesTestComponents.kubernetesClient
           .pods()
@@ -383,6 +409,7 @@ class KubernetesSuite extends SparkFunSuite
 private[spark] object KubernetesSuite {
   val k8sTestTag = Tag("k8s")
   val SPARK_PI_MAIN_CLASS: String = "org.apache.spark.examples.SparkPi"
+  val SPARK_DFS_READ_WRITE_TEST = "org.apache.spark.examples.DFSReadWriteTest"
   val SPARK_REMOTE_MAIN_CLASS: String = "org.apache.spark.examples.SparkRemoteFileTest"
   val SPARK_DRIVER_MAIN_CLASS: String = "org.apache.spark.examples.DriverSubmissionTest"
   val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes))
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index e539c8e78dab..40059454dacf 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -21,6 +21,7 @@ import java.util.UUID
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
 
 import io.fabric8.kubernetes.client.DefaultKubernetesClient
 import org.scalatest.concurrent.Eventually
@@ -124,7 +125,7 @@ private[spark] object SparkAppLauncher extends Logging {
         appConf.toStringArray :+ appArguments.mainAppResource
 
     if (appArguments.appArgs.nonEmpty) {
-      commandLine += appArguments.appArgs.mkString(" ")
+      commandLine ++= appArguments.appArgs.to[ArrayBuffer]
     }
     logInfo(s"Launching a spark app with command line: ${commandLine.mkString(" ")}")
     ProcessUtils.executeProcess(commandLine.toArray, timeoutSecs)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
new file mode 100644
index 000000000000..d7a237f999c0
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import java.io.{File, PrintWriter}
+
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model._
+import io.fabric8.kubernetes.api.model.storage.StorageClassBuilder
+import org.scalatest.Tag
+import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
+import org.scalatest.time.{Milliseconds, Span}
+
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite._
+
+private[spark] trait PVTestsSuite { k8sSuite: KubernetesSuite =>
+  import PVTestsSuite._
+
+  private def setupLocalStorage(): Unit = {
+    val scBuilder = new StorageClassBuilder()
+      .withKind("StorageClass")
+      .withApiVersion("storage.k8s.io/v1")
+      .withNewMetadata()
+        .withName(STORAGE_NAME)
+      .endMetadata()
+      .withProvisioner("kubernetes.io/no-provisioner")
+      .withVolumeBindingMode("WaitForFirstConsumer")
+
+    val pvBuilder = new PersistentVolumeBuilder()
+      .withKind("PersistentVolume")
+      .withApiVersion("v1")
+      .withNewMetadata()
+        .withName("test-local-pv")
+      .endMetadata()
+      .withNewSpec()
+        .withCapacity(Map("storage" -> new QuantityBuilder().withAmount("1Gi").build()).asJava)
+        .withAccessModes("ReadWriteOnce")
+        .withPersistentVolumeReclaimPolicy("Retain")
+        .withStorageClassName("test-local-storage")
+        .withLocal(new LocalVolumeSourceBuilder().withPath(VM_PATH).build())
+          .withNewNodeAffinity()
+            .withNewRequired()
+              .withNodeSelectorTerms(new NodeSelectorTermBuilder()
+                .withMatchExpressions(new NodeSelectorRequirementBuilder()
+                  .withKey("kubernetes.io/hostname")
+                  .withOperator("In")
+                  .withValues("minikube").build()).build())
+            .endRequired()
+          .endNodeAffinity()
+      .endSpec()
+
+    val pvcBuilder = new PersistentVolumeClaimBuilder()
+      .withKind("PersistentVolumeClaim")
+      .withApiVersion("v1")
+      .withNewMetadata()
+        .withName(PVC_NAME)
+      .endMetadata()
+      .withNewSpec()
+        .withAccessModes("ReadWriteOnce")
+        .withStorageClassName("test-local-storage")
+        .withResources(new ResourceRequirementsBuilder()
+        .withRequests(Map("storage" -> new QuantityBuilder()
+          .withAmount("1Gi").build()).asJava).build())
+      .endSpec()
+
+    kubernetesTestComponents
+      .kubernetesClient
+      .storage()
+      .storageClasses()
+      .create(scBuilder.build())
+
+    kubernetesTestComponents
+      .kubernetesClient
+      .persistentVolumes()
+      .create(pvBuilder.build())
+
+    kubernetesTestComponents
+      .kubernetesClient
+      .persistentVolumeClaims()
+      .create(pvcBuilder.build())
+  }
+
+  private def deleteLocalStorage(): Unit = {
+    kubernetesTestComponents
+      .kubernetesClient
+      .persistentVolumeClaims()
+      .withName(PVC_NAME)
+      .delete()
+
+    kubernetesTestComponents
+      .kubernetesClient
+      .persistentVolumes()
+      .withName(PV_NAME)
+      .delete()
+
+    kubernetesTestComponents
+      .kubernetesClient
+      .storage()
+      .storageClasses()
+      .withName(STORAGE_NAME)
+      .delete()
+  }
+
+  private def checkPVs(pod: Pod, file: String) = {
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      implicit val podName: String = pod.getMetadata.getName
+      implicit val components: KubernetesTestComponents = kubernetesTestComponents
+      val contents = Utils.executeCommand("cat", s"$CONTAINER_MOUNT_PATH/$file")
+      assert(contents.toString.trim.equals(FILE_CONTENTS))
+    }
+  }
+
+  private def createTempFile(): String = {
+    val filename = try {
+      val f = File.createTempFile("tmp", ".txt", new File(HOST_PATH))
+      f.deleteOnExit()
+      new PrintWriter(f) {
+        try {
+          write(FILE_CONTENTS)
+        } finally {
+          close()
+        }
+      }
+      f.getName
+    } catch {
+      case e: Exception => e.printStackTrace(); throw e;
+    }
+    filename
+  }
+
+  test("Test PVs with local storage", k8sTestTag, MinikubeTag) {
+    sparkAppConf
+      .set(s"spark.kubernetes.driver.volumes.persistentVolumeClaim.data.mount.path",
+        CONTAINER_MOUNT_PATH)
+      .set(s"spark.kubernetes.driver.volumes.persistentVolumeClaim.data.options.claimName",
+        PVC_NAME)
+      .set(s"spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.path",
+        CONTAINER_MOUNT_PATH)
+      .set(s"spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.claimName",
+        PVC_NAME)
+    val file = createTempFile()
+    try {
+      setupLocalStorage()
+      runDFSReadWriteAndVerifyCompletion(
+        FILE_CONTENTS.split(" ").length,
+        driverPodChecker = (driverPod: Pod) => {
+          doBasicDriverPodCheck(driverPod)
+          checkPVs(driverPod, file)
+        },
+        executorPodChecker = (executorPod: Pod) => {
+          doBasicExecutorPodCheck(executorPod)
+          checkPVs(executorPod, file)
+        },
+        appArgs = Array(s"$CONTAINER_MOUNT_PATH/$file", s"$CONTAINER_MOUNT_PATH"),
+        interval = Some(PV_TESTS_INTERVAL)
+      )
+    } finally {
+      // make sure this always run
+      deleteLocalStorage()
+    }
+  }
+}
+
+private[spark] object PVTestsSuite {
+  val MinikubeTag = Tag("minikube")
+  val STORAGE_NAME = "test-local-storage"
+  val PV_NAME = "test-local-pv"
+  val PVC_NAME = "test-local-pvc"
+  val CONTAINER_MOUNT_PATH = "/opt/spark/pv-tests"
+  val HOST_PATH = sys.env.getOrElse("PVC_TESTS_HOST_PATH", "/tmp")
+  val VM_PATH = sys.env.getOrElse("PVC_TESTS_VM_PATH", "/tmp")
+  val FILE_CONTENTS = "test PVs"
+  val PV_TESTS_INTERVAL = PatienceConfiguration.Interval(Span(10, Milliseconds))
+}
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
index b18a6aebda49..cd61ea1040f3 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
@@ -83,33 +83,18 @@ private[spark] trait SecretsTestsSuite { k8sSuite: KubernetesSuite =>
   private def checkSecrets(pod: Pod): Unit = {
     Eventually.eventually(TIMEOUT, INTERVAL) {
       implicit val podName: String = pod.getMetadata.getName
-      val env = executeCommand("env")
+      implicit val components: KubernetesTestComponents = kubernetesTestComponents
+      val env = Utils.executeCommand("env")
       assert(env.toString.contains(ENV_SECRET_VALUE_1))
       assert(env.toString.contains(ENV_SECRET_VALUE_2))
-      val fileUsernameContents = executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_1")
-      val filePasswordContents = executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_2")
+      val fileUsernameContents = Utils
+        .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_1")
+      val filePasswordContents = Utils
+        .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_2")
       assert(fileUsernameContents.toString.trim.equals(ENV_SECRET_VALUE_1))
       assert(filePasswordContents.toString.trim.equals(ENV_SECRET_VALUE_2))
     }
   }
-
-  private def executeCommand(cmd: String*)(implicit podName: String): String = {
-    val out = new ByteArrayOutputStream()
-    val watch = kubernetesTestComponents
-      .kubernetesClient
-      .pods()
-      .withName(podName)
-      .readingInput(System.in)
-      .writingOutput(out)
-      .writingError(System.err)
-      .withTTY()
-      .exec(cmd.toArray: _*)
-    // wait to get some result back
-    Thread.sleep(1000)
-    watch.close()
-    out.flush()
-    out.toString()
-  }
 }
 
 private[spark] object SecretsTestsSuite {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index 663f8b6523ac..d425f707180c 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -19,6 +19,8 @@ package org.apache.spark.deploy.k8s.integrationtest
 import java.io.Closeable
 import java.net.URI
 
+import org.apache.commons.io.output.ByteArrayOutputStream
+
 import org.apache.spark.internal.Logging
 
 object Utils extends Logging {
@@ -27,4 +29,24 @@ object Utils extends Logging {
     val resource = createResource
     try f.apply(resource) finally resource.close()
   }
+
+  def executeCommand(cmd: String*)(
+      implicit podName: String,
+      kubernetesTestComponents: KubernetesTestComponents): String = {
+    val out = new ByteArrayOutputStream()
+    val watch = kubernetesTestComponents
+      .kubernetesClient
+      .pods()
+      .withName(podName)
+      .readingInput(System.in)
+      .writingOutput(out)
+      .writingError(System.err)
+      .withTTY()
+      .exec(cmd.toArray: _*)
+    // wait to get some result back
+    Thread.sleep(1000)
+    watch.close()
+    out.flush()
+    out.toString()
+  }
 }

From e4a968d82947a68d2daedc8e5fb9adf57cf9bd5b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 28 Mar 2019 21:12:18 +0900
Subject: [PATCH 0795/1072] [MINOR][CORE] Remove import scala.collection.Set in
 TaskSchedulerImpl

## What changes were proposed in this pull request?

I was playing with the scheduler and found this weird thing. In `TaskSchedulerImpl` we import `scala.collection.Set` without any reason. This is bad in practice, as it silently changes the actual class when we simply type `Set`, which by default should point to the immutable set.

This change only affects one method: `getExecutorsAliveOnHost`. I checked all the caller side and none of them need a general `Set` type.

## How was this patch tested?

N/A

Closes #24231 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index db0fbfe29f65..1ef3566af9b0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -22,7 +22,6 @@ import java.util.{Locale, Timer, TimerTask}
 import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import java.util.concurrent.atomic.AtomicLong
 
-import scala.collection.Set
 import scala.collection.mutable.{ArrayBuffer, BitSet, HashMap, HashSet}
 import scala.util.Random
 
@@ -829,8 +828,8 @@ private[spark] class TaskSchedulerImpl(
    * Get a snapshot of the currently blacklisted nodes for the entire application.  This is
    * thread-safe -- it can be called without a lock on the TaskScheduler.
    */
-  def nodeBlacklist(): scala.collection.immutable.Set[String] = {
-    blacklistTrackerOpt.map(_.nodeBlacklist()).getOrElse(scala.collection.immutable.Set())
+  def nodeBlacklist(): Set[String] = {
+    blacklistTrackerOpt.map(_.nodeBlacklist()).getOrElse(Set.empty)
   }
 
   // By default, rack is unknown

From 43bf4ae6417fcb15d0fbc7880f14f307c164d464 Mon Sep 17 00:00:00 2001
From: zhoukang <zhoukang199191@gmail.com>
Date: Thu, 28 Mar 2019 09:24:16 -0500
Subject: [PATCH 0796/1072] [SPARK-26914][SQL] Fix scheduler pool may be
 unpredictable when we only want to use default pool and do not set
 spark.scheduler.pool for the session

## What changes were proposed in this pull request?

When using fair scheduler mode for thrift server, we may have unpredictable result.
```
val pool = sessionToActivePool.get(parentSession.getSessionHandle)
if (pool != null) {
   sqlContext.sparkContext.setLocalProperty(SparkContext.SPARK_SCHEDULER_POOL, pool)
}
```
The cause is we use thread pool to execute queries for thriftserver, and when we call setLocalProperty we may have unpredictab behavior.

```
/**
   * Set a local property that affects jobs submitted from this thread, such as the Spark fair
   * scheduler pool. User-defined properties may also be set here. These properties are propagated
   * through to worker tasks and can be accessed there via
   * [[org.apache.spark.TaskContext#getLocalProperty]].
   *
   * These properties are inherited by child threads spawned from this thread. This
   * may have unexpected consequences when working with thread pools. The standard java
   * implementation of thread pools have worker threads spawn other worker threads.
   * As a result, local properties may propagate unpredictably.
   */
  def setLocalProperty(key: String, value: String) {
    if (value == null) {
      localProperties.get.remove(key)
    } else {
      localProperties.get.setProperty(key, value)
    }
  }
```

I post an example on https://jira.apache.org/jira/browse/SPARK-26914 .

## How was this patch tested?
UT

Closes #23826 from caneGuy/zhoukang/fix-scheduler-error.

Authored-by: zhoukang <zhoukang199191@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../hive/thriftserver/SparkExecuteStatementOperation.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 1772fe69622d..b05307e086f5 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -226,9 +226,9 @@ private[hive] class SparkExecuteStatementOperation(
       parentSession.getUsername)
     sqlContext.sparkContext.setJobGroup(statementId, statement)
     val pool = sessionToActivePool.get(parentSession.getSessionHandle)
-    if (pool != null) {
-      sqlContext.sparkContext.setLocalProperty(SparkContext.SPARK_SCHEDULER_POOL, pool)
-    }
+    // It may have unpredictably behavior since we use thread pools to execute quries and
+    // the 'spark.scheduler.pool' may not be 'default' when we did not set its value.(SPARK-26914)
+    sqlContext.sparkContext.setLocalProperty(SparkContext.SPARK_SCHEDULER_POOL, pool)
     try {
       result = sqlContext.sql(statement)
       logDebug(result.queryExecution.toString())

From 50cded590f8c16ba3263a5ecba6805fb06dd8a64 Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Thu, 28 Mar 2019 12:11:00 -0500
Subject: [PATCH 0797/1072] [MINOR] Move java file to java directory

## What changes were proposed in this pull request?

move
```scala
org.apache.spark.sql.execution.streaming.BaseStreamingSource
org.apache.spark.sql.execution.streaming.BaseStreamingSink
```
to java directory

## How was this patch tested?

Existing UT.

Closes #24222 from ConeyLiu/move-scala-to-java.

Authored-by: Xianyang Liu <xianyang.liu@intel.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/sql/execution/streaming/BaseStreamingSink.java   | 0
 .../apache/spark/sql/execution/streaming/BaseStreamingSource.java | 0
 .../org/apache/spark/sql/execution/streaming/Offset.java          | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename sql/core/src/main/{scala => java}/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java (100%)
 rename sql/core/src/main/{scala => java}/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java (100%)
 rename sql/core/src/main/{scala => java}/org/apache/spark/sql/execution/streaming/Offset.java (100%)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.java b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java

From 06abd06112965cd73417ccceacdbd94b6b3d2793 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Thu, 28 Mar 2019 18:44:08 -0700
Subject: [PATCH 0798/1072] [SPARK-27252][SQL] Make current_date() independent
 from time zones

## What changes were proposed in this pull request?

This makes the `CurrentDate` expression and `current_date` function independent from time zone settings. New result is number of days since epoch in `UTC` time zone. Previously, Spark shifted the current date (in `UTC` time zone) according the session time zone which violets definition of `DateType` - number of days since epoch (which is an absolute point in time, midnight of Jan 1 1970 in UTC time).

The changes makes `CurrentDate` consistent to `CurrentTimestamp` which is independent from time zone too.

## How was this patch tested?

The changes were tested by existing test suites like `DateExpressionsSuite`.

Closes #24185 from MaxGekk/current-date.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |  4 ++-
 .../expressions/datetimeExpressions.scala     | 18 ++++++-------
 .../catalyst/optimizer/finishAnalysis.scala   | 25 +++++++++----------
 .../expressions/DateExpressionsSuite.scala    |  6 ++---
 .../streaming/MicroBatchExecution.scala       |  2 +-
 .../org/apache/spark/sql/functions.scala      |  2 +-
 .../spark/sql/DataFrameAggregateSuite.scala   |  2 +-
 .../apache/spark/sql/DateFunctionsSuite.scala | 19 ++++++++------
 8 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index b8597f0262c5..3ba89c0bb591 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -103,7 +103,9 @@ displayTitle: Spark SQL Upgrading Guide
 
   - In Spark version 2.4 and earlier, the `current_timestamp` function returns a timestamp with millisecond resolution only. Since Spark 3.0, the function can return the result with microsecond resolution if the underlying clock available on the system offers such resolution.
 
-  - In Spark version 2.4 abd earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
+  - In Spark version 2.4 and earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
+
+  - In Spark version 2.4 and earlier, the `current_date` function returns the current date shifted according to the SQL config `spark.sql.session.timeZone`. Since Spark 3.0, the function always returns the current date in the `UTC` time zone.
 
   - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`, and `DATE` literals are formatted using the UTC time zone. In Spark version 2.4 and earlier, both conversions use the default time zone of the Java virtual machine.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 7878a87572a2..3cda9899aa5c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
-import java.time.{Instant, LocalDate, ZoneId}
+import java.time.{Instant, LocalDate, ZoneId, ZoneOffset}
 import java.time.temporal.IsoFields
 import java.util.{Locale, TimeZone}
 
@@ -52,30 +52,26 @@ trait TimeZoneAwareExpression extends Expression {
   @transient lazy val zoneId: ZoneId = DateTimeUtils.getZoneId(timeZoneId.get)
 }
 
+// scalastyle:off line.size.limit
 /**
- * Returns the current date at the start of query evaluation.
+ * Returns the current date in the UTC time zone at the start of query evaluation.
  * All calls of current_date within the same query return the same value.
  *
  * There is no code generation since this expression should get constant folded by the optimizer.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the current date at the start of query evaluation.",
+  usage = "_FUNC_() - Returns the current date in the UTC time zone at the start of query evaluation.",
   since = "1.5.0")
-case class CurrentDate(timeZoneId: Option[String] = None)
-  extends LeafExpression with TimeZoneAwareExpression with CodegenFallback {
-
-  def this() = this(None)
+// scalastyle:on line.size.limit
+case class CurrentDate() extends LeafExpression with CodegenFallback {
 
   override def foldable: Boolean = true
   override def nullable: Boolean = false
 
   override def dataType: DataType = DateType
 
-  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
-    copy(timeZoneId = Option(timeZoneId))
-
   override def eval(input: InternalRow): Any = {
-    DateTimeUtils.millisToDays(System.currentTimeMillis(), timeZone)
+    LocalDate.now(ZoneOffset.UTC).toEpochDay.toInt
   }
 
   override def prettyName: String = "current_date"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 4094864f952e..d0bf4eaf8c49 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -58,21 +58,20 @@ object ReplaceExpressions extends Rule[LogicalPlan] {
  */
 object ComputeCurrentTime extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    val currentDates = mutable.Map.empty[String, Literal]
-    val timeExpr = CurrentTimestamp()
-    val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long]
-    val currentTime = Literal.create(timestamp, timeExpr.dataType)
+    val currentDate = {
+      val dateExpr = CurrentDate()
+      val date = dateExpr.eval(EmptyRow).asInstanceOf[Int]
+      Literal.create(date, dateExpr.dataType)
+    }
+    val currentTimestamp = {
+      val timeExpr = CurrentTimestamp()
+      val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long]
+      Literal.create(timestamp, timeExpr.dataType)
+    }
 
     plan transformAllExpressions {
-      case CurrentDate(Some(timeZoneId)) =>
-        currentDates.getOrElseUpdate(timeZoneId, {
-          Literal.create(
-            DateTimeUtils.millisToDays(
-              MICROSECONDS.toMillis(timestamp),
-              DateTimeUtils.getTimeZone(timeZoneId)),
-            DateType)
-        })
-      case CurrentTimestamp() => currentTime
+      case CurrentDate() => currentDate
+      case CurrentTimestamp() => currentTimestamp
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 61ee8f0cd46d..8823fe779406 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -54,12 +54,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("datetime function current_date") {
     val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis(), TimeZoneGMT)
-    val cd = CurrentDate(gmtId).eval(EmptyRow).asInstanceOf[Int]
+    val cd = CurrentDate().eval(EmptyRow).asInstanceOf[Int]
     val d1 = DateTimeUtils.millisToDays(System.currentTimeMillis(), TimeZoneGMT)
     assert(d0 <= cd && cd <= d1 && d1 - d0 <= 1)
 
-    val cdjst = CurrentDate(jstId).eval(EmptyRow).asInstanceOf[Int]
-    val cdpst = CurrentDate(pstId).eval(EmptyRow).asInstanceOf[Int]
+    val cdjst = CurrentDate().eval(EmptyRow).asInstanceOf[Int]
+    val cdpst = CurrentDate().eval(EmptyRow).asInstanceOf[Int]
     assert(cdpst <= cd && cd <= cdjst)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index fdd80ccaf052..59a4afbfe093 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -514,7 +514,7 @@ class MicroBatchExecution(
           ct.dataType, Some("Dummy TimeZoneId"))
       case cd: CurrentDate =>
         CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
-          cd.dataType, cd.timeZoneId)
+          cd.dataType, Some("UTC"))
     }
 
     val triggerLogicalPlan = sink match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index f99186cabc26..d6d1f6aee50b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2572,7 +2572,7 @@ object functions {
   }
 
   /**
-   * Returns the current date as a date column.
+   * Returns the current date in the UTC time zone as a date column.
    *
    * @group datetime_funcs
    * @since 1.5.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 73259a0ed3b5..d7cd15febb25 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -720,7 +720,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
     assert(testData.groupBy(col("key")).toString.contains(
       "[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
     assert(testData.groupBy(current_date()).toString.contains(
-      "grouping expressions: [current_date(None)], value: [key: int, value: string], " +
+      "grouping expressions: [current_date()], value: [key: int, value: string], " +
         "type: GroupBy]"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index b06d52ddebc7..a9435e01ef8f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -19,11 +19,13 @@ package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
+import java.time.LocalDate
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -31,13 +33,14 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
   test("function current_date") {
-    val df1 = Seq((1, 2), (3, 1)).toDF("a", "b")
-    val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis())
-    val d1 = DateTimeUtils.fromJavaDate(df1.select(current_date()).collect().head.getDate(0))
-    val d2 = DateTimeUtils.fromJavaDate(
-      sql("""SELECT CURRENT_DATE()""").collect().head.getDate(0))
-    val d3 = DateTimeUtils.millisToDays(System.currentTimeMillis())
-    assert(d0 <= d1 && d1 <= d2 && d2 <= d3 && d3 - d0 <= 1)
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
+      val df1 = Seq((1, 2), (3, 1)).toDF("a", "b")
+      val d0 = System.currentTimeMillis() / MILLIS_PER_DAY
+      val d1 = localDateToDays(df1.select(current_date()).collect().head.getAs[LocalDate](0))
+      val d2 = localDateToDays(sql("""SELECT CURRENT_DATE()""").collect().head.getAs[LocalDate](0))
+      val d3 = System.currentTimeMillis() / MILLIS_PER_DAY
+      assert(d0 <= d1 && d1 <= d2 && d2 <= d3 && d3 - d0 <= 1)
+    }
   }
 
   test("function current_timestamp and now") {

From dbc7ce18b934fbfd0743b1348fc1265778f07027 Mon Sep 17 00:00:00 2001
From: Ninad Ingole <robert.wallis@example.com>
Date: Fri, 29 Mar 2019 14:16:53 -0500
Subject: [PATCH 0799/1072] [SPARK-27244][CORE] Redact Passwords While Using
 Option logConf=true

## What changes were proposed in this pull request?

When logConf is set to true, config keys that contain password were printed in cleartext in driver log. This change uses the already present redact method in Utils, to redact all the passwords based on redact pattern in SparkConf and then print the conf to driver log thus ensuring that sensitive information like passwords is not printed in clear text.

## How was this patch tested?
This patch was tested through `SparkConfSuite` & then entire unit test through sbt

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24196 from ninadingole/SPARK-27244.

Authored-by: Ninad Ingole <robert.wallis@example.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkConf.scala   |  2 +-
 .../test/scala/org/apache/spark/SparkConfSuite.scala   | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 529ca3faac13..7050396e84a8 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -606,7 +606,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * configuration out for debugging.
    */
   def toDebugString: String = {
-    getAll.sorted.map{case (k, v) => k + "=" + v}.mkString("\n")
+    Utils.redact(this, getAll).sorted.map { case (k, v) => k + "=" + v }.mkString("\n")
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 4ba8a3ab1c85..5ca4f9c73f93 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.internal.config.Network._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.serializer.{JavaSerializer, KryoRegistrator, KryoSerializer}
-import org.apache.spark.util.{ResetSystemProperties, RpcUtils}
+import org.apache.spark.util.{ResetSystemProperties, RpcUtils, Utils}
 
 class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSystemProperties {
   test("Test byteString conversion") {
@@ -354,6 +354,14 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     }
   }
 
+  test("SPARK-27244 toDebugString should redact passwords") {
+    val conf = new SparkConf().set("dummy.password", "dummy-password")
+    conf.validateSettings()
+
+    assert(conf.get("dummy.password") === "dummy-password")
+    assert(conf.toDebugString.contains(s"dummy.password=${Utils.REDACTION_REPLACEMENT_TEXT}"))
+  }
+
   val defaultIllegalValue = "SomeIllegalValue"
   val illegalValueTests : Map[String, (SparkConf, String) => Any] = Map(
     "getTimeAsSeconds" -> (_.getTimeAsSeconds(_)),

From 61561c1c2d4e47191fdfe9bf3539a3db29e89fa9 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Fri, 29 Mar 2019 14:28:36 -0500
Subject: [PATCH 0800/1072] [SPARK-27252][SQL][FOLLOWUP] Calculate min and max
 days independently from time zone in ComputeCurrentTimeSuite

## What changes were proposed in this pull request?

This fixes the `analyzer should replace current_date with literals` test in `ComputeCurrentTimeSuite` by making calculation of `min` and `max` days independent from time zone.

## How was this patch tested?

by `ComputeCurrentTimeSuite`.

Closes #24240 from MaxGekk/current-date-followup.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala   | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
index 10ed4e46ddd1..c36b8dab604c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import java.util.concurrent.TimeUnit.MILLISECONDS
+
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
 
 class ComputeCurrentTimeSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
@@ -51,9 +52,9 @@ class ComputeCurrentTimeSuite extends PlanTest {
   test("analyzer should replace current_date with literals") {
     val in = Project(Seq(Alias(CurrentDate(), "a")(), Alias(CurrentDate(), "b")()), LocalRelation())
 
-    val min = DateTimeUtils.millisToDays(System.currentTimeMillis())
+    val min = MILLISECONDS.toDays(System.currentTimeMillis())
     val plan = Optimize.execute(in.analyze).asInstanceOf[Project]
-    val max = DateTimeUtils.millisToDays(System.currentTimeMillis())
+    val max = MILLISECONDS.toDays(System.currentTimeMillis())
 
     val lits = new scala.collection.mutable.ArrayBuffer[Int]
     plan.transformAllExpressions { case e: Literal =>

From f176dd3f2827bc2d0ad18bccb5e63414e3fa5ffe Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Fri, 29 Mar 2019 14:05:38 -0700
Subject: [PATCH 0801/1072] [SPARK-27314][SQL] Deduplicate exprIds for Union.

## What changes were proposed in this pull request?

We have been having a potential problem with `Union` when the children have the same expression id in their outputs, which happens when self-union.

## How was this patch tested?

Modified some tests to adjust plan changes.

Closes #24236 from ueshin/issues/SPARK-27314/dedup_union.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala   | 16 ++++++++++++++++
 .../plans/logical/basicLogicalOperators.scala    |  5 +++++
 .../optimizer/FoldablePropagationSuite.scala     |  4 ++--
 .../optimizer/NestedColumnAliasingSuite.scala    | 10 +++++++---
 .../optimizer/OptimizerRuleExclusionSuite.scala  |  6 +++++-
 .../catalyst/optimizer/SetOperationSuite.scala   | 10 +++++-----
 6 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e2162340f9dd..01e40e64a3e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -956,6 +956,22 @@ class Analyzer(
         i.copy(right = dedupRight(left, right))
       case e @ Except(left, right, _) if !e.duplicateResolved =>
         e.copy(right = dedupRight(left, right))
+      case u @ Union(children) if !u.duplicateResolved =>
+        // Use projection-based de-duplication for Union to avoid breaking the checkpoint sharing
+        // feature in streaming.
+        val newChildren = children.foldRight(Seq.empty[LogicalPlan]) { (head, tail) =>
+          head +: tail.map {
+            case child if head.outputSet.intersect(child.outputSet).isEmpty =>
+              child
+            case child =>
+              val projectList = child.output.map { attr =>
+                Alias(attr, attr.name)()
+              }
+              Project(projectList, child)
+          }
+        }
+        u.copy(children = newChildren)
+
       // When resolve `SortOrder`s in Sort based on child, don't report errors as
       // we still have chance to resolve it based on its descendants
       case s @ Sort(ordering, global, child) if child.resolved && !s.resolved =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index f7f701cea51f..f4cc79c772bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -229,6 +229,11 @@ case class Union(children: Seq[LogicalPlan]) extends LogicalPlan {
     }
   }
 
+  def duplicateResolved: Boolean = {
+    children.map(_.outputSet.size).sum ==
+      AttributeSet.fromAttributeSets(children.map(_.outputSet)).size
+  }
+
   // updating nullability to make all the children consistent
   override def output: Seq[Attribute] =
     children.map(_.output).transpose.map(attrs =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index c28844642aed..0d48ecb31cfa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -132,10 +132,10 @@ class FoldablePropagationSuite extends PlanTest {
 
   test("Propagate in inner join") {
     val ta = testRelation.select('a, Literal(1).as('tag))
-      .union(testRelation.select('a, Literal(2).as('tag)))
+      .union(testRelation.select('a.as('a), Literal(2).as('tag)))
       .subquery('ta)
     val tb = testRelation.select('a, Literal(1).as('tag))
-      .union(testRelation.select('a, Literal(2).as('tag)))
+      .union(testRelation.select('a.as('a), Literal(2).as('tag)))
       .subquery('tb)
     val query = ta.join(tb, Inner,
       Some("ta.a".attr === "tb.a".attr && "ta.tag".attr === "tb.tag".attr))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
index 87f7d19f6297..abb340e5c751 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
@@ -140,7 +140,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   }
 
   test("Pushing a single nested field projection - negative") {
-    val ops = Array(
+    val ops = Seq(
       (input: LogicalPlan) => input.distribute('name)(1),
       (input: LogicalPlan) => input.distribute($"name.middle")(1),
       (input: LogicalPlan) => input.orderBy('name.asc),
@@ -156,11 +156,15 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
         .analyze
     }
 
-    val optimizedQueries = queries.map(Optimize.execute)
-    val expectedQueries = queries
+    val optimizedQueries :+ optimizedUnion = queries.map(Optimize.execute)
+    val expectedQueries = queries.init
     optimizedQueries.zip(expectedQueries).foreach { case (optimized, expected) =>
       comparePlans(optimized, expected)
     }
+    val expectedUnion =
+      contact.select('name).union(contact.select('name.as('name)))
+        .select(GetStructField('name, 1, Some("middle"))).analyze
+    comparePlans(optimizedUnion, expectedUnion)
   }
 
   test("Pushing a single nested field projection through filters - negative") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
index 4fa4a7aadc8f..7587776e1ab9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
@@ -121,10 +121,14 @@ class OptimizerRuleExclusionSuite extends PlanTest {
       PropagateEmptyRelation.ruleName,
       CombineUnions.ruleName)
 
+    val testRelation1 = LocalRelation('a.int, 'b.int, 'c.int)
+    val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int)
+    val testRelation3 = LocalRelation('a.int, 'b.int, 'c.int)
+
     withSQLConf(
       OPTIMIZER_EXCLUDED_RULES.key -> excludedRules.foldLeft("")((l, r) => l + "," + r)) {
       val optimizer = new SimpleTestOptimizer()
-      val originalQuery = testRelation.union(testRelation.union(testRelation)).analyze
+      val originalQuery = testRelation1.union(testRelation2.union(testRelation3)).analyze
       val optimized = optimizer.execute(originalQuery)
       comparePlans(originalQuery, optimized)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
index 17e00c9a3ead..3d3e36130253 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
@@ -31,7 +31,7 @@ class SetOperationSuite extends PlanTest {
     val batches =
       Batch("Subqueries", Once,
         EliminateSubqueryAliases) ::
-      Batch("Union Pushdown", Once,
+      Batch("Union Pushdown", FixedPoint(5),
         CombineUnions,
         PushProjectionThroughUnion,
         PushDownPredicate,
@@ -44,8 +44,8 @@ class SetOperationSuite extends PlanTest {
   val testUnion = Union(testRelation :: testRelation2 :: testRelation3 :: Nil)
 
   test("union: combine unions into one unions") {
-    val unionQuery1 = Union(Union(testRelation, testRelation2), testRelation)
-    val unionQuery2 = Union(testRelation, Union(testRelation2, testRelation))
+    val unionQuery1 = Union(Union(testRelation, testRelation2), testRelation3)
+    val unionQuery2 = Union(testRelation, Union(testRelation2, testRelation3))
     val unionOptimized1 = Optimize.execute(unionQuery1.analyze)
     val unionOptimized2 = Optimize.execute(unionQuery2.analyze)
 
@@ -93,7 +93,7 @@ class SetOperationSuite extends PlanTest {
     val unionQuery1 = Distinct(Union(Distinct(Union(query1, query2)), query3)).analyze
     val optimized1 = Optimize.execute(unionQuery1)
     val distinctUnionCorrectAnswer1 =
-      Distinct(Union(query1 :: query2 :: query3 :: Nil)).analyze
+      Distinct(Union(query1 :: query2 :: query3 :: Nil))
     comparePlans(distinctUnionCorrectAnswer1, optimized1)
 
     //         query1
@@ -107,7 +107,7 @@ class SetOperationSuite extends PlanTest {
       Distinct(Union(query2, query3)))).analyze
     val optimized2 = Optimize.execute(unionQuery2)
     val distinctUnionCorrectAnswer2 =
-      Distinct(Union(query1 :: query2 :: query2 :: query3 :: Nil)).analyze
+      Distinct(Union(query1 :: query2 :: query2 :: query3 :: Nil))
     comparePlans(distinctUnionCorrectAnswer2, optimized2)
   }
 

From 144b35fe3a87b8391c4fbba544304a61fefda6a1 Mon Sep 17 00:00:00 2001
From: 10129659 <chen.yanshan@zte.com.cn>
Date: Sat, 30 Mar 2019 02:27:12 -0500
Subject: [PATCH 0802/1072] [SPARK-27320][SQL] Replacing index with iterator to
 traverse the expressions list in AggregationIterator, which make it simpler

## What changes were proposed in this pull request?
In AggregationIterator's loop function, we access the expressions by `expressions(i)`, the type of `expressions` is `::`, a subtype of list.

```
while (i < expressionsLength) {
      val func = expressions(i).aggregateFunction
```

This PR replacing  index with iterator to access the expressions list, which make it simpler.

## How was this patch tested?
Existing tests.

Closes #24238 from eatoncys/array.

Authored-by: 10129659 <chen.yanshan@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../execution/aggregate/AggregationIterator.scala | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
index a1fb23d621d4..d03de1507fbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
@@ -74,13 +74,12 @@ abstract class AggregationIterator(
       startingInputBufferOffset: Int): Array[AggregateFunction] = {
     var mutableBufferOffset = 0
     var inputBufferOffset: Int = startingInputBufferOffset
-    val expressionsLength = expressions.length
-    val functions = new Array[AggregateFunction](expressionsLength)
+    val functions = new Array[AggregateFunction](expressions.length)
     var i = 0
     val inputAttributeSeq: AttributeSeq = inputAttributes
-    while (i < expressionsLength) {
-      val func = expressions(i).aggregateFunction
-      val funcWithBoundReferences: AggregateFunction = expressions(i).mode match {
+    for (expression <- expressions) {
+      val func = expression.aggregateFunction
+      val funcWithBoundReferences: AggregateFunction = expression.mode match {
         case Partial | Complete if func.isInstanceOf[ImperativeAggregate] =>
           // We need to create BoundReferences if the function is not an
           // expression-based aggregate function (it does not support code-gen) and the mode of
@@ -158,9 +157,9 @@ abstract class AggregationIterator(
       inputAttributes: Seq[Attribute]): (InternalRow, InternalRow) => Unit = {
     val joinedRow = new JoinedRow
     if (expressions.nonEmpty) {
-      val mergeExpressions = functions.zipWithIndex.flatMap {
-        case (ae: DeclarativeAggregate, i) =>
-          expressions(i).mode match {
+      val mergeExpressions = functions.zip(expressions).flatMap {
+        case (ae: DeclarativeAggregate, expression) =>
+          expression.mode match {
             case Partial | Complete => ae.updateExpressions
             case PartialMerge | Final => ae.mergeExpressions
           }

From e6d8d0f13fec40596abe39476d84119ba6b1ba5b Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 30 Mar 2019 02:30:34 -0500
Subject: [PATCH 0803/1072] [SPARK-27121][REPL] Resolve Scala compiler failure
 for Java 9+ in REPL

## What changes were proposed in this pull request?

Avoid trying to extract the classpath of the environment from a URLClassLoader in Java 11, as the default classloader isn't one. Use `java.class.path` instead.

## How was this patch tested?

Existing tests, manually tested under Java 11.

Closes #24239 from srowen/SPARK-27121.0.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/repl/ReplSuite.scala     | 24 ++++++-------------
 .../spark/repl/SingletonReplSuite.scala       | 17 ++-----------
 2 files changed, 9 insertions(+), 32 deletions(-)

diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index a46cb6b3f401..4849c7cdc71b 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -18,9 +18,7 @@
 package org.apache.spark.repl
 
 import java.io._
-import java.net.URLClassLoader
 
-import scala.collection.mutable.ArrayBuffer
 import scala.tools.nsc.interpreter.SimpleReader
 
 import org.apache.log4j.{Level, LogManager}
@@ -34,25 +32,16 @@ class ReplSuite extends SparkFunSuite {
   def runInterpreter(master: String, input: String): String = {
     val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
 
-    val in = new BufferedReader(new StringReader(input + "\n"))
-    val out = new StringWriter()
-    val cl = getClass.getClassLoader
-    var paths = new ArrayBuffer[String]
-    if (cl.isInstanceOf[URLClassLoader]) {
-      val urlLoader = cl.asInstanceOf[URLClassLoader]
-      for (url <- urlLoader.getURLs) {
-        if (url.getProtocol == "file") {
-          paths += url.getFile
-        }
-      }
-    }
-    val classpath = paths.map(new File(_).getAbsolutePath).mkString(File.pathSeparator)
-
     val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
+    val classpath = System.getProperty("java.class.path")
     System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+
     Main.sparkContext = null
     Main.sparkSession = null // causes recreation of SparkContext for each test.
     Main.conf.set("spark.master", master)
+
+    val in = new BufferedReader(new StringReader(input + "\n"))
+    val out = new StringWriter()
     Main.doMain(Array("-classpath", classpath), new SparkILoop(in, new PrintWriter(out)))
 
     if (oldExecutorClasspath != null) {
@@ -60,7 +49,8 @@ class ReplSuite extends SparkFunSuite {
     } else {
       System.clearProperty(CONF_EXECUTOR_CLASSPATH)
     }
-    return out.toString
+
+    out.toString
   }
 
   // Simulate the paste mode in Scala REPL.
diff --git a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
index d49e0fd85229..039fc627f52f 100644
--- a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
@@ -18,9 +18,6 @@
 package org.apache.spark.repl
 
 import java.io._
-import java.net.URLClassLoader
-
-import scala.collection.mutable.ArrayBuffer
 
 import org.apache.commons.lang3.StringEscapeUtils
 
@@ -42,19 +39,9 @@ class SingletonReplSuite extends SparkFunSuite {
   override def beforeAll(): Unit = {
     super.beforeAll()
 
-    val cl = getClass.getClassLoader
-    var paths = new ArrayBuffer[String]
-    if (cl.isInstanceOf[URLClassLoader]) {
-      val urlLoader = cl.asInstanceOf[URLClassLoader]
-      for (url <- urlLoader.getURLs) {
-        if (url.getProtocol == "file") {
-          paths += url.getFile
-        }
-      }
-    }
-    val classpath = paths.map(new File(_).getAbsolutePath).mkString(File.pathSeparator)
-
+    val classpath = System.getProperty("java.class.path")
     System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+
     Main.conf.set("spark.master", "local-cluster[2,1,1024]")
     val interp = new SparkILoop(
       new BufferedReader(new InputStreamReader(new PipedInputStream(in))),

From f4c73b7c685b901dd69950e4929c65e3b8dd3a55 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Sat, 30 Mar 2019 02:35:49 -0500
Subject: [PATCH 0804/1072] [SPARK-27301][DSTREAM] Shorten the FileSystem
 cached life cycle to the cleanup method inner scope

## What changes were proposed in this pull request?

The cached FileSystem's token will expire if no tokens explicitly are add into it.

```scala
19/03/28 13:40:16 INFO storage.BlockManager: Removing RDD 83189
19/03/28 13:40:16 INFO rdd.MapPartitionsRDD: Removing RDD 82860 from persistence list
19/03/28 13:40:16 INFO spark.ContextCleaner: Cleaned shuffle 6005
19/03/28 13:40:16 INFO storage.BlockManager: Removing RDD 82860
19/03/28 13:40:16 INFO scheduler.ReceivedBlockTracker: Deleting batches:
19/03/28 13:40:16 INFO scheduler.InputInfoTracker: remove old batch metadata: 1553750250000 ms
19/03/28 13:40:17 WARN security.UserGroupInformation: PriviledgedActionException as:ursHADOOP.HZ.NETEASE.COM (auth:KERBEROS) cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken): token (HDFS_DELEGATION_TOKEN token 53240500 for urs) is expired, current time: 2019-03-28 13:40:17,010+0800 expected renewal time: 2019-03-28 13:39:48,523+0800
19/03/28 13:40:17 WARN ipc.Client: Exception encountered while connecting to the server : org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken): token (HDFS_DELEGATION_TOKEN token 53240500 for urs) is expired, current time: 2019-03-28 13:40:17,010+0800 expected renewal time: 2019-03-28 13:39:48,523+0800
19/03/28 13:40:17 WARN security.UserGroupInformation: PriviledgedActionException as:ursHADOOP.HZ.NETEASE.COM (auth:KERBEROS) cause:org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken): token (HDFS_DELEGATION_TOKEN token 53240500 for urs) is expired, current time: 2019-03-28 13:40:17,010+0800 expected renewal time: 2019-03-28 13:39:48,523+0800
19/03/28 13:40:17 WARN hdfs.LeaseRenewer: Failed to renew lease for [DFSClient_NONMAPREDUCE_-1396157959_1] for 53 seconds. Will retry shortly ...
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken): token (HDFS_DELEGATION_TOKEN token 53240500 for urs) is expired, current time: 2019-03-28 13:40:17,010+0800 expected renewal time: 2019-03-28 13:39:48,523+0800
at org.apache.hadoop.ipc.Client.call(Client.java:1468)
at org.apache.hadoop.ipc.Client.call(Client.java:1399)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232)
at com.sun.proxy.$Proxy11.renewLease(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.renewLease(ClientNamenodeProtocolTranslatorPB.java:571)
at sun.reflect.GeneratedMethodAccessor40.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy12.renewLease(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.renewLease(DFSClient.java:878)
at org.apache.hadoop.hdfs.LeaseRenewer.renew(LeaseRenewer.java:417)
at org.apache.hadoop.hdfs.LeaseRenewer.run(LeaseRenewer.java:442)
at org.apache.hadoop.hdfs.LeaseRenewer.access$700(LeaseRenewer.java:71)
at org.apache.hadoop.hdfs.LeaseRenewer$1.run(LeaseRenewer.java:298)
at java.lang.Thread.run(Thread.java:748)
```

This PR shorten the FileSystem cached life cycle to the cleanup method inner scope in case of token expiry.

## How was this patch tested?

existing ut

Closes #24235 from yaooqinn/SPARK-27301.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/streaming/dstream/DStreamCheckpointData.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
index e73837eb9602..ebfaa83c704b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -39,7 +39,6 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
   // in that batch's checkpoint data
   @transient private var timeToOldestCheckpointFileTime = new HashMap[Time, Time]
 
-  @transient private var fileSystem: FileSystem = null
   protected[streaming] def currentCheckpointFiles = data.asInstanceOf[HashMap[Time, String]]
 
   /**
@@ -80,6 +79,7 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
         // even after master fails, as the checkpoint data of `time` does not refer to those files
         val filesToDelete = timeToCheckpointFile.filter(_._1 < lastCheckpointFileTime)
         logDebug("Files to delete:\n" + filesToDelete.mkString(","))
+        var fileSystem: FileSystem = null
         filesToDelete.foreach {
           case (time, file) =>
             try {

From 2ec650d84316d5820795d44dbc3574885b358698 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 30 Mar 2019 02:41:24 -0500
Subject: [PATCH 0805/1072] [SPARK-27267][CORE] Update snappy to avoid error
 when decompressing empty serialized data

## What changes were proposed in this pull request?

(See JIRA for problem statement)

Update snappy 1.1.7.1 -> 1.1.7.3 to pick up an empty-stream and Java 9 fix.

There appear to be no other changes of consequence:
https://github.com/xerial/snappy-java/blob/master/Milestone.md

## How was this patch tested?

Existing tests

Closes #24242 from srowen/SPARK-27267.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.2 | 2 +-
 pom.xml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index cd4eed6e2937..277274acbba6 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -182,7 +182,7 @@ slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snakeyaml-1.23.jar
 snappy-0.2.jar
-snappy-java-1.1.7.1.jar
+snappy-java-1.1.7.3.jar
 spire-macros_2.12-0.13.0.jar
 spire_2.12-0.13.0.jar
 stax-api-1.0-2.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 6f3bbce3e746..5d40c240cc59 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -202,7 +202,7 @@ slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snakeyaml-1.23.jar
 snappy-0.2.jar
-snappy-java-1.1.7.1.jar
+snappy-java-1.1.7.3.jar
 spire-macros_2.12-0.13.0.jar
 spire_2.12-0.13.0.jar
 stax-api-1.0.1.jar
diff --git a/pom.xml b/pom.xml
index 0cce66472be6..44b3c15ddc09 100644
--- a/pom.xml
+++ b/pom.xml
@@ -166,7 +166,7 @@
     <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <fasterxml.jackson.version>2.9.8</fasterxml.jackson.version>
-    <snappy.version>1.1.7.1</snappy.version>
+    <snappy.version>1.1.7.3</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.10</commons-codec.version>
     <commons-io.version>2.4</commons-io.version>

From f8fa564dec10d46f90b38fa58291a8ad116400c5 Mon Sep 17 00:00:00 2001
From: liulijia <liutang123@yeah.net>
Date: Sat, 30 Mar 2019 12:38:05 -0500
Subject: [PATCH 0806/1072] [SPARK-27192][CORE] spark.task.cpus should be less
 or equal than spark.executor.cores

## What changes were proposed in this pull request?
spark.task.cpus should be less or equal than spark.executor.cores when use static executor allocation
## How was this patch tested?
manual

Closes #24131 from liutang123/SPARK-27192.

Authored-by: liulijia <liutang123@yeah.net>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ExecutorAllocationManager.scala     |  3 ---
 .../scala/org/apache/spark/SparkConf.scala    | 10 ---------
 .../scala/org/apache/spark/SparkContext.scala | 22 +++++++++++++++++++
 .../org/apache/spark/SparkConfSuite.scala     |  7 ------
 .../org/apache/spark/SparkContextSuite.scala  | 21 ++++++++++++++++++
 5 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 60d04046c7b2..6fade10b7a3c 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -222,9 +222,6 @@ private[spark] class ExecutorAllocationManager(
       throw new SparkException("Dynamic allocation of executors requires the external " +
         "shuffle service. You may enable this through spark.shuffle.service.enabled.")
     }
-    if (tasksPerExecutorForFullParallelism == 0) {
-      throw new SparkException(s"${EXECUTOR_CORES.key} must not be < ${CPUS_PER_TASK.key}.")
-    }
 
     if (executorAllocationRatio > 1.0 || executorAllocationRatio <= 0.0) {
       throw new SparkException(
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 7050396e84a8..30d8aa4e5001 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -577,16 +577,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
-    if (contains(EXECUTOR_CORES) && contains(CPUS_PER_TASK)) {
-      val executorCores = get(EXECUTOR_CORES)
-      val taskCpus = get(CPUS_PER_TASK)
-
-      if (executorCores < taskCpus) {
-        throw new SparkException(
-          s"${EXECUTOR_CORES.key} must not be less than ${CPUS_PER_TASK.key}.")
-      }
-    }
-
     val encryptionEnabled = get(NETWORK_CRYPTO_ENABLED) || get(SASL_ENCRYPTION_ENABLED)
     require(!encryptionEnabled || get(NETWORK_AUTH_ENABLED),
       s"${NETWORK_AUTH_ENABLED.key} must be enabled when enabling encryption.")
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 4abb18d4aaa7..71604667bc3e 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2665,8 +2665,25 @@ object SparkContext extends Logging {
     // When running locally, don't try to re-execute tasks on failure.
     val MAX_LOCAL_TASK_FAILURES = 1
 
+    // SPARK-26340: Ensure that executor's core num meets at least one task requirement.
+    def checkCpusPerTask(
+      executorCoreNum: Int = sc.conf.get(EXECUTOR_CORES),
+      clusterMode: Boolean = true): Unit = {
+      val cpusPerTask = sc.conf.get(CPUS_PER_TASK)
+      if (executorCoreNum < cpusPerTask) {
+        val message = if (clusterMode) {
+          s"${CPUS_PER_TASK.key} must be <= ${EXECUTOR_CORES.key} when run on $master."
+        } else {
+          s"Only $executorCoreNum cores available per executor when run on $master," +
+            s" and ${CPUS_PER_TASK.key} must be <= it."
+        }
+        throw new SparkException(message)
+      }
+    }
+
     master match {
       case "local" =>
+        checkCpusPerTask(1, clusterMode = false)
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, 1)
         scheduler.initialize(backend)
@@ -2679,6 +2696,7 @@ object SparkContext extends Logging {
         if (threadCount <= 0) {
           throw new SparkException(s"Asked to run locally with $threadCount threads")
         }
+        checkCpusPerTask(threadCount, clusterMode = false)
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
         scheduler.initialize(backend)
@@ -2689,12 +2707,14 @@ object SparkContext extends Logging {
         // local[*, M] means the number of cores on the computer with M failures
         // local[N, M] means exactly N threads with M failures
         val threadCount = if (threads == "*") localCpuCount else threads.toInt
+        checkCpusPerTask(threadCount, clusterMode = false)
         val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
         scheduler.initialize(backend)
         (backend, scheduler)
 
       case SPARK_REGEX(sparkUrl) =>
+        checkCpusPerTask()
         val scheduler = new TaskSchedulerImpl(sc)
         val masterUrls = sparkUrl.split(",").map("spark://" + _)
         val backend = new StandaloneSchedulerBackend(scheduler, sc, masterUrls)
@@ -2702,6 +2722,7 @@ object SparkContext extends Logging {
         (backend, scheduler)
 
       case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
+        checkCpusPerTask()
         // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
         val memoryPerSlaveInt = memoryPerSlave.toInt
         if (sc.executorMemory > memoryPerSlaveInt) {
@@ -2722,6 +2743,7 @@ object SparkContext extends Logging {
         (backend, scheduler)
 
       case masterUrl =>
+        checkCpusPerTask()
         val cm = getClusterManager(masterUrl) match {
           case Some(clusterMgr) => clusterMgr
           case None => throw new SparkException("Could not parse Master URL: '" + master + "'")
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 5ca4f9c73f93..8a57b90a76a3 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -140,13 +140,6 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     assert(sc.appName === "My other app")
   }
 
-  test("creating SparkContext with cpus per tasks bigger than cores per executors") {
-    val conf = new SparkConf(false)
-      .set(EXECUTOR_CORES, 1)
-      .set(CPUS_PER_TASK, 2)
-    intercept[SparkException] { sc = new SparkContext(conf) }
-  }
-
   test("nested property names") {
     // This wasn't supported by some external conf parsing libraries
     System.setProperty("spark.test.a", "a")
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 7a16f7b715e6..3490eaf550ce 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -710,6 +710,27 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       assert(runningTaskIds.isEmpty)
     }
   }
+
+  test(s"Avoid setting ${CPUS_PER_TASK.key} unreasonably (SPARK-27192)") {
+    val FAIL_REASON = s"${CPUS_PER_TASK.key} must be <="
+    Seq(
+      ("local", 2, None),
+      ("local[2]", 3, None),
+      ("local[2, 1]", 3, None),
+      ("spark://test-spark-cluster", 2, Option(1)),
+      ("local-cluster[1, 1, 1000]", 2, Option(1)),
+      ("yarn", 2, Option(1))
+    ).foreach { case (master, cpusPerTask, executorCores) =>
+      val conf = new SparkConf()
+      conf.set(CPUS_PER_TASK, cpusPerTask)
+      executorCores.map(executorCores => conf.set(EXECUTOR_CORES, executorCores))
+      val ex = intercept[SparkException] {
+        sc = new SparkContext(master, "test", conf)
+      }
+      assert(ex.getMessage.contains(FAIL_REASON))
+      resetSparkContext()
+    }
+  }
 }
 
 object SparkContextSuite {

From 44b0d328e5b60313186ec025a4ffbd739d0864c6 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 30 Mar 2019 12:46:08 -0500
Subject: [PATCH 0807/1072] [MINOR] Update the scala version of LICENSE-binary
 to 2.12

## What changes were proposed in this pull request?

Update the scala version of `LICENSE-binary` to 2.12.

## How was this patch tested?

N/A

Closes #24250 from wangyum/LICENSE-binary.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 LICENSE-binary | 50 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index 0c157cfe1fac..5f57133bef43 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -209,10 +209,10 @@ org.apache.zookeeper:zookeeper
 oro:oro
 commons-configuration:commons-configuration
 commons-digester:commons-digester
-com.chuusai:shapeless_2.11
+com.chuusai:shapeless_2.12
 com.googlecode.javaewah:JavaEWAH
 com.twitter:chill-java
-com.twitter:chill_2.11
+com.twitter:chill_2.12
 com.univocity:univocity-parsers
 javax.jdo:jdo-api
 joda-time:joda-time
@@ -220,23 +220,23 @@ net.sf.opencsv:opencsv
 org.apache.derby:derby
 org.objenesis:objenesis
 org.roaringbitmap:RoaringBitmap
-org.scalanlp:breeze-macros_2.11
-org.scalanlp:breeze_2.11
-org.typelevel:macro-compat_2.11
+org.scalanlp:breeze-macros_2.12
+org.scalanlp:breeze_2.12
+org.typelevel:macro-compat_2.12
 org.yaml:snakeyaml
 org.apache.xbean:xbean-asm5-shaded
 com.squareup.okhttp3:logging-interceptor
 com.squareup.okhttp3:okhttp
 com.squareup.okio:okio
-org.apache.spark:spark-catalyst_2.11
-org.apache.spark:spark-kvstore_2.11
-org.apache.spark:spark-launcher_2.11
-org.apache.spark:spark-mllib-local_2.11
-org.apache.spark:spark-network-common_2.11
-org.apache.spark:spark-network-shuffle_2.11
-org.apache.spark:spark-sketch_2.11
-org.apache.spark:spark-tags_2.11
-org.apache.spark:spark-unsafe_2.11
+org.apache.spark:spark-catalyst_2.12
+org.apache.spark:spark-kvstore_2.12
+org.apache.spark:spark-launcher_2.12
+org.apache.spark:spark-mllib-local_2.12
+org.apache.spark:spark-network-common_2.12
+org.apache.spark:spark-network-shuffle_2.12
+org.apache.spark:spark-sketch_2.12
+org.apache.spark:spark-tags_2.12
+org.apache.spark:spark-unsafe_2.12
 commons-httpclient:commons-httpclient
 com.vlkan:flatbuffers
 com.ning:compress-lzf
@@ -284,10 +284,10 @@ org.apache.orc:orc-mapreduce
 org.mortbay.jetty:jetty
 org.mortbay.jetty:jetty-util
 com.jolbox:bonecp
-org.json4s:json4s-ast_2.11
-org.json4s:json4s-core_2.11
-org.json4s:json4s-jackson_2.11
-org.json4s:json4s-scalap_2.11
+org.json4s:json4s-ast_2.12
+org.json4s:json4s-core_2.12
+org.json4s:json4s-jackson_2.12
+org.json4s:json4s-scalap_2.12
 com.carrotsearch:hppc
 com.fasterxml.jackson.core:jackson-annotations
 com.fasterxml.jackson.core:jackson-core
@@ -295,7 +295,7 @@ com.fasterxml.jackson.core:jackson-databind
 com.fasterxml.jackson.dataformat:jackson-dataformat-yaml
 com.fasterxml.jackson.module:jackson-module-jaxb-annotations
 com.fasterxml.jackson.module:jackson-module-paranamer
-com.fasterxml.jackson.module:jackson-module-scala_2.11
+com.fasterxml.jackson.module:jackson-module-scala_2.12
 com.github.mifmif:generex
 com.google.code.findbugs:jsr305
 com.google.code.gson:gson
@@ -412,8 +412,8 @@ com.thoughtworks.paranamer:paranamer
 org.scala-lang:scala-compiler
 org.scala-lang:scala-library
 org.scala-lang:scala-reflect
-org.scala-lang.modules:scala-parser-combinators_2.11
-org.scala-lang.modules:scala-xml_2.11
+org.scala-lang.modules:scala-parser-combinators_2.12
+org.scala-lang.modules:scala-xml_2.12
 org.fusesource.leveldbjni:leveldbjni-all
 net.sourceforge.f2j:arpack_combined_all
 xmlenc:xmlenc
@@ -434,15 +434,15 @@ is distributed under the 3-Clause BSD license.
 MIT License
 -----------
 
-org.spire-math:spire-macros_2.11
-org.spire-math:spire_2.11
-org.typelevel:machinist_2.11
+org.spire-math:spire-macros_2.12
+org.spire-math:spire_2.12
+org.typelevel:machinist_2.12
 net.razorvine:pyrolite
 org.slf4j:jcl-over-slf4j
 org.slf4j:jul-to-slf4j
 org.slf4j:slf4j-api
 org.slf4j:slf4j-log4j12
-com.github.scopt:scopt_2.11
+com.github.scopt:scopt_2.12
 
 core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
 core/src/main/resources/org/apache/spark/ui/static/*dataTables*

From 0cbef34ede038b5be4ab39518c21c6e194c7f224 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 30 Mar 2019 12:47:02 -0500
Subject: [PATCH 0808/1072] [MINOR][BUILD] Add ASF license header to
 plugins.sbt

## What changes were proposed in this pull request?

This PR add ASF license header to plugins.sbt, otherwise:
![image](https://user-images.githubusercontent.com/5399861/55273959-670b8800-530d-11e9-9b6f-214a3cde802e.png)

## How was this patch tested?
Warning disappears after adding ASF license header:
![image](https://user-images.githubusercontent.com/5399861/55273961-6c68d280-530d-11e9-9d15-5fb73a1b991e.png)

Closes #24248 from wangyum/plugins.sbt.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 project/plugins.sbt | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/project/plugins.sbt b/project/plugins.sbt
index 21be62fe00a0..e510443f6e97 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1")
 
 // sbt-checkstyle-plugin uses an old version of checkstyle. Match it to Maven's.

From 5dab5f651fcf3a9b8c4d6900e45718dc041c54af Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Sat, 30 Mar 2019 14:38:26 -0700
Subject: [PATCH 0809/1072] [SPARK-27326][SQL] Fall back all v2 file sources in
 `InsertIntoTable` to V1 FileFormat

## What changes were proposed in this pull request?

In the first PR for file source V2, there was a rule for falling back Orc V2 table to OrcFileFormat: https://github.com/apache/spark/pull/23383/files#diff-57e8244b6964e4f84345357a188421d5R34

As we are migrating more file sources to data source V2, we should make the rule more generic. This PR proposes to:
1. Rename the rule `FallbackOrcDataSourceV2 ` to `FallBackFileSourceV2`.The name is more generic. And we use "fall back" as verb, while "fallback" is noun.
2. Rename the method `fallBackFileFormat` in `FileDataSourceV2` to `fallbackFileFormat`. Here we should use "fallback" as noun.
3. Add new method `fallbackFileFormat` in `FileTable`. This is for falling back to V1 in rule `FallbackOrcDataSourceV2 `.

## How was this patch tested?

Existing Unit tests.

Closes #24251 from gengliangwang/fallbackV1Rule.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/sql/DataFrameReader.scala |  2 +-
 .../scala/org/apache/spark/sql/DataFrameWriter.scala |  2 +-
 .../spark/sql/execution/datasources/DataSource.scala |  2 +-
 ...DataSourceV2.scala => FallBackFileSourceV2.scala} | 12 +++++-------
 .../execution/datasources/v2/FileDataSourceV2.scala  |  2 +-
 .../sql/execution/datasources/v2/FileTable.scala     |  9 +++++++++
 .../datasources/v2/csv/CSVDataSourceV2.scala         |  6 +++---
 .../sql/execution/datasources/v2/csv/CSVTable.scala  |  4 +++-
 .../datasources/v2/orc/OrcDataSourceV2.scala         |  6 +++---
 .../sql/execution/datasources/v2/orc/OrcTable.scala  |  4 +++-
 .../spark/sql/internal/BaseSessionStateBuilder.scala |  2 +-
 .../execution/datasources/v2/FileTableSuite.scala    |  4 ++++
 .../sources/v2/FileDataSourceV2FallBackSuite.scala   |  4 ++--
 .../spark/sql/hive/HiveSessionStateBuilder.scala     |  2 +-
 14 files changed, 38 insertions(+), 23 deletions(-)
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/{FallbackOrcDataSourceV2.scala => FallBackFileSourceV2.scala} (78%)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index e057d33a6a14..d514b9495c5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -200,7 +200,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     val cls = lookupCls.newInstance() match {
       case f: FileDataSourceV2 if useV1Sources.contains(f.shortName()) ||
         useV1Sources.contains(lookupCls.getCanonicalName.toLowerCase(Locale.ROOT)) =>
-        f.fallBackFileFormat
+        f.fallbackFileFormat
       case _ => lookupCls
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index b439a82e52cf..a06b75a86067 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -252,7 +252,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     val cls = lookupCls.newInstance() match {
       case f: FileDataSourceV2 if useV1Sources.contains(f.shortName()) ||
         useV1Sources.contains(lookupCls.getCanonicalName.toLowerCase(Locale.ROOT)) =>
-        f.fallBackFileFormat
+        f.fallbackFileFormat
       case _ => lookupCls
     }
     // In Data Source V2 project, partitioning is still under development.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 516c56eee4e6..08650c8c6fa8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -102,7 +102,7 @@ case class DataSource(
     // [[DataFrameReader]]/[[DataFrameWriter]], since they use method `lookupDataSource`
     // instead of `providingClass`.
     cls.newInstance() match {
-      case f: FileDataSourceV2 => f.fallBackFileFormat
+      case f: FileDataSourceV2 => f.fallbackFileFormat
       case _ => cls
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
similarity index 78%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
index d5e89beaf42f..813af8203c2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallbackOrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
@@ -22,21 +22,19 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2}
-import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2, FileTable}
 
 /**
- * Replace the ORC V2 data source of table in [[InsertIntoTable]] to V1 [[FileFormat]].
+ * Replace the File source V2 table in [[InsertIntoTable]] to V1 [[FileFormat]].
  * E.g, with temporary view `t` using [[FileDataSourceV2]], inserting into  view `t` fails
  * since there is no corresponding physical plan.
  * This is a temporary hack for making current data source V2 work. It should be
  * removed when Catalog support of file data source v2 is finished.
  */
-class FallbackOrcDataSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoTable(d @ DataSourceV2Relation(table: OrcTable, _, _), _, _, _, _) =>
-      val v1FileFormat = new OrcFileFormat
+    case i @ InsertIntoTable(d @ DataSourceV2Relation(table: FileTable, _, _), _, _, _, _) =>
+      val v1FileFormat = table.fallbackFileFormat.newInstance()
       val relation = HadoopFsRelation(
         table.fileIndex,
         table.fileIndex.partitionSchema,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index ebe7fee312e8..d60d4292090d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -35,7 +35,7 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
    *    source via SQL configuration and fall back to FileFormat.
    * 2. Catalog support is required, which is still under development for data source V2.
    */
-  def fallBackFileFormat: Class[_ <: FileFormat]
+  def fallbackFileFormat: Class[_ <: FileFormat]
 
   lazy val sparkSession = SparkSession.active
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 188016c161a4..d7284fd05ad3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -98,6 +98,15 @@ abstract class FileTable(
    * }}}
    */
   def formatName: String
+
+  /**
+   * Returns a V1 [[FileFormat]] class of the same file data source.
+   * This is a solution for the following cases:
+   * 1. File datasource V2 implementations cause regression. Users can disable the problematic data
+   *    source via SQL configuration and fall back to FileFormat.
+   * 2. Catalog support is required, which is still under development for data source V2.
+   */
+  def fallbackFileFormat: Class[_ <: FileFormat]
 }
 
 object FileTable {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
index 55222c624d91..045f41e670ad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
@@ -25,19 +25,19 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class CSVDataSourceV2 extends FileDataSourceV2 {
 
-  override def fallBackFileFormat: Class[_ <: FileFormat] = classOf[CSVFileFormat]
+  override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[CSVFileFormat]
 
   override def shortName(): String = "csv"
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
     val paths = getPaths(options)
     val tableName = getTableName(paths)
-    CSVTable(tableName, sparkSession, options, paths, None)
+    CSVTable(tableName, sparkSession, options, paths, None, fallbackFileFormat)
   }
 
   override def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
     val paths = getPaths(options)
     val tableName = getTableName(paths)
-    CSVTable(tableName, sparkSession, options, paths, Some(schema))
+    CSVTable(tableName, sparkSession, options, paths, Some(schema), fallbackFileFormat)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
index 852cbf07c350..8170661a7017 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -22,6 +22,7 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.csv.CSVOptions
+import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2.FileTable
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
@@ -33,7 +34,8 @@ case class CSVTable(
     sparkSession: SparkSession,
     options: CaseInsensitiveStringMap,
     paths: Seq[String],
-    userSpecifiedSchema: Option[StructType])
+    userSpecifiedSchema: Option[StructType],
+    fallbackFileFormat: Class[_ <: FileFormat])
   extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
   override def newScanBuilder(options: CaseInsensitiveStringMap): CSVScanBuilder =
     CSVScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
index e8b9e6c6f498..1ea80d2ba5fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -25,20 +25,20 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class OrcDataSourceV2 extends FileDataSourceV2 {
 
-  override def fallBackFileFormat: Class[_ <: FileFormat] = classOf[OrcFileFormat]
+  override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[OrcFileFormat]
 
   override def shortName(): String = "orc"
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
     val paths = getPaths(options)
     val tableName = getTableName(paths)
-    OrcTable(tableName, sparkSession, options, paths, None)
+    OrcTable(tableName, sparkSession, options, paths, None, fallbackFileFormat)
   }
 
   override def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
     val paths = getPaths(options)
     val tableName = getTableName(paths)
-    OrcTable(tableName, sparkSession, options, paths, Some(schema))
+    OrcTable(tableName, sparkSession, options, paths, Some(schema), fallbackFileFormat)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index ace77b7c4d9a..1cc6e61c845c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2.orc
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.orc.OrcUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
@@ -30,7 +31,8 @@ case class OrcTable(
     sparkSession: SparkSession,
     options: CaseInsensitiveStringMap,
     paths: Seq[String],
-    userSpecifiedSchema: Option[StructType])
+    userSpecifiedSchema: Option[StructType],
+    fallbackFileFormat: Class[_ <: FileFormat])
   extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): OrcScanBuilder =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index d5543e8a31aa..4b0f68baf9c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -160,7 +160,7 @@ abstract class BaseSessionStateBuilder(
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
-        new FallbackOrcDataSourceV2(session) +:
+        new FallBackFileSourceV2(session) +:
         DataSourceResolution(conf) +:
         customResolutionRules
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
index 3d4f5640723e..ac1d5672af68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
@@ -21,6 +21,8 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{QueryTest, SparkSession}
+import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.execution.datasources.text.TextFileFormat
 import org.apache.spark.sql.sources.v2.reader.ScanBuilder
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
@@ -45,6 +47,8 @@ class DummyFileTable(
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = null
 
   override def supportsDataType(dataType: DataType): Boolean = dataType == StringType
+
+  override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[TextFileFormat]
 }
 
 class FileTableSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
index e019dbfe3f51..8627bdf4ae18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class DummyReadOnlyFileDataSourceV2 extends FileDataSourceV2 {
 
-  override def fallBackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
+  override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
 
   override def shortName(): String = "parquet"
 
@@ -55,7 +55,7 @@ class DummyReadOnlyFileTable extends Table with SupportsRead {
 
 class DummyWriteOnlyFileDataSourceV2 extends FileDataSourceV2 {
 
-  override def fallBackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
+  override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
 
   override def shortName(): String = "parquet"
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 877a0dadf0b0..3bca77094bad 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -72,7 +72,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
       new ResolveHiveSerdeTable(session) +:
         new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
-        new FallbackOrcDataSourceV2(session) +:
+        new FallBackFileSourceV2(session) +:
         DataSourceResolution(conf) +:
         customResolutionRules
 

From 88ea3198716e8cf1f56291b9737c8c1aae5aecdc Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sat, 30 Mar 2019 16:35:34 -0700
Subject: [PATCH 0810/1072] Revert "[SPARK-27192][CORE] spark.task.cpus should
 be less or equal than spark.executor.cores"

This reverts commit f8fa564dec10d46f90b38fa58291a8ad116400c5.
---
 .../spark/ExecutorAllocationManager.scala     |  3 +++
 .../scala/org/apache/spark/SparkConf.scala    | 10 +++++++++
 .../scala/org/apache/spark/SparkContext.scala | 22 -------------------
 .../org/apache/spark/SparkConfSuite.scala     |  7 ++++++
 .../org/apache/spark/SparkContextSuite.scala  | 21 ------------------
 5 files changed, 20 insertions(+), 43 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 6fade10b7a3c..60d04046c7b2 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -222,6 +222,9 @@ private[spark] class ExecutorAllocationManager(
       throw new SparkException("Dynamic allocation of executors requires the external " +
         "shuffle service. You may enable this through spark.shuffle.service.enabled.")
     }
+    if (tasksPerExecutorForFullParallelism == 0) {
+      throw new SparkException(s"${EXECUTOR_CORES.key} must not be < ${CPUS_PER_TASK.key}.")
+    }
 
     if (executorAllocationRatio > 1.0 || executorAllocationRatio <= 0.0) {
       throw new SparkException(
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 30d8aa4e5001..7050396e84a8 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -577,6 +577,16 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
+    if (contains(EXECUTOR_CORES) && contains(CPUS_PER_TASK)) {
+      val executorCores = get(EXECUTOR_CORES)
+      val taskCpus = get(CPUS_PER_TASK)
+
+      if (executorCores < taskCpus) {
+        throw new SparkException(
+          s"${EXECUTOR_CORES.key} must not be less than ${CPUS_PER_TASK.key}.")
+      }
+    }
+
     val encryptionEnabled = get(NETWORK_CRYPTO_ENABLED) || get(SASL_ENCRYPTION_ENABLED)
     require(!encryptionEnabled || get(NETWORK_AUTH_ENABLED),
       s"${NETWORK_AUTH_ENABLED.key} must be enabled when enabling encryption.")
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 71604667bc3e..4abb18d4aaa7 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2665,25 +2665,8 @@ object SparkContext extends Logging {
     // When running locally, don't try to re-execute tasks on failure.
     val MAX_LOCAL_TASK_FAILURES = 1
 
-    // SPARK-26340: Ensure that executor's core num meets at least one task requirement.
-    def checkCpusPerTask(
-      executorCoreNum: Int = sc.conf.get(EXECUTOR_CORES),
-      clusterMode: Boolean = true): Unit = {
-      val cpusPerTask = sc.conf.get(CPUS_PER_TASK)
-      if (executorCoreNum < cpusPerTask) {
-        val message = if (clusterMode) {
-          s"${CPUS_PER_TASK.key} must be <= ${EXECUTOR_CORES.key} when run on $master."
-        } else {
-          s"Only $executorCoreNum cores available per executor when run on $master," +
-            s" and ${CPUS_PER_TASK.key} must be <= it."
-        }
-        throw new SparkException(message)
-      }
-    }
-
     master match {
       case "local" =>
-        checkCpusPerTask(1, clusterMode = false)
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, 1)
         scheduler.initialize(backend)
@@ -2696,7 +2679,6 @@ object SparkContext extends Logging {
         if (threadCount <= 0) {
           throw new SparkException(s"Asked to run locally with $threadCount threads")
         }
-        checkCpusPerTask(threadCount, clusterMode = false)
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
         scheduler.initialize(backend)
@@ -2707,14 +2689,12 @@ object SparkContext extends Logging {
         // local[*, M] means the number of cores on the computer with M failures
         // local[N, M] means exactly N threads with M failures
         val threadCount = if (threads == "*") localCpuCount else threads.toInt
-        checkCpusPerTask(threadCount, clusterMode = false)
         val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
         scheduler.initialize(backend)
         (backend, scheduler)
 
       case SPARK_REGEX(sparkUrl) =>
-        checkCpusPerTask()
         val scheduler = new TaskSchedulerImpl(sc)
         val masterUrls = sparkUrl.split(",").map("spark://" + _)
         val backend = new StandaloneSchedulerBackend(scheduler, sc, masterUrls)
@@ -2722,7 +2702,6 @@ object SparkContext extends Logging {
         (backend, scheduler)
 
       case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
-        checkCpusPerTask()
         // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
         val memoryPerSlaveInt = memoryPerSlave.toInt
         if (sc.executorMemory > memoryPerSlaveInt) {
@@ -2743,7 +2722,6 @@ object SparkContext extends Logging {
         (backend, scheduler)
 
       case masterUrl =>
-        checkCpusPerTask()
         val cm = getClusterManager(masterUrl) match {
           case Some(clusterMgr) => clusterMgr
           case None => throw new SparkException("Could not parse Master URL: '" + master + "'")
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 8a57b90a76a3..5ca4f9c73f93 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -140,6 +140,13 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     assert(sc.appName === "My other app")
   }
 
+  test("creating SparkContext with cpus per tasks bigger than cores per executors") {
+    val conf = new SparkConf(false)
+      .set(EXECUTOR_CORES, 1)
+      .set(CPUS_PER_TASK, 2)
+    intercept[SparkException] { sc = new SparkContext(conf) }
+  }
+
   test("nested property names") {
     // This wasn't supported by some external conf parsing libraries
     System.setProperty("spark.test.a", "a")
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 3490eaf550ce..7a16f7b715e6 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -710,27 +710,6 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       assert(runningTaskIds.isEmpty)
     }
   }
-
-  test(s"Avoid setting ${CPUS_PER_TASK.key} unreasonably (SPARK-27192)") {
-    val FAIL_REASON = s"${CPUS_PER_TASK.key} must be <="
-    Seq(
-      ("local", 2, None),
-      ("local[2]", 3, None),
-      ("local[2, 1]", 3, None),
-      ("spark://test-spark-cluster", 2, Option(1)),
-      ("local-cluster[1, 1, 1000]", 2, Option(1)),
-      ("yarn", 2, Option(1))
-    ).foreach { case (master, cpusPerTask, executorCores) =>
-      val conf = new SparkConf()
-      conf.set(CPUS_PER_TASK, cpusPerTask)
-      executorCores.map(executorCores => conf.set(EXECUTOR_CORES, executorCores))
-      val ex = intercept[SparkException] {
-        sc = new SparkContext(master, "test", conf)
-      }
-      assert(ex.getMessage.contains(FAIL_REASON))
-      resetSparkContext()
-    }
-  }
 }
 
 object SparkContextSuite {

From 754f820035477f9538de6417cf68241b98ba82c6 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 30 Mar 2019 19:49:45 -0500
Subject: [PATCH 0811/1072] [SPARK-26918][DOCS] All .md should have ASF license
 header

## What changes were proposed in this pull request?

Add AL2 license to metadata of all .md files.
This seemed to be the tidiest way as it will get ignored by .md renderers and other tools. Attempts to write them as markdown comments revealed that there is no such standard thing.

## How was this patch tested?

Doc build

Closes #24243 from srowen/SPARK-26918.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 R/CRAN_RELEASE.md                             | 18 +++++++++++++++
 R/DOCUMENTATION.md                            | 18 +++++++++++++++
 R/WINDOWS.md                                  | 18 +++++++++++++++
 .../apache/spark/deploy/security/README.md    | 18 +++++++++++++++
 dev/appveyor-guide.md                         | 18 +++++++++++++++
 docs/README.md                                | 18 +++++++++++++++
 docs/api.md                                   | 15 +++++++++++++
 docs/building-spark.md                        | 15 +++++++++++++
 docs/cloud-integration.md                     | 22 ++++++++++---------
 docs/cluster-overview.md                      | 15 +++++++++++++
 docs/configuration.md                         | 15 +++++++++++++
 docs/contributing-to-spark.md                 | 15 +++++++++++++
 docs/graphx-programming-guide.md              | 15 +++++++++++++
 docs/hadoop-provided.md                       | 15 +++++++++++++
 docs/hardware-provisioning.md                 | 15 +++++++++++++
 docs/index.md                                 | 15 +++++++++++++
 docs/job-scheduling.md                        | 15 +++++++++++++
 docs/ml-advanced.md                           | 15 +++++++++++++
 docs/ml-ann.md                                | 15 +++++++++++++
 docs/ml-classification-regression.md          | 15 +++++++++++++
 docs/ml-clustering.md                         | 15 +++++++++++++
 docs/ml-collaborative-filtering.md            | 15 +++++++++++++
 docs/ml-datasource.md                         | 15 +++++++++++++
 docs/ml-decision-tree.md                      | 15 +++++++++++++
 docs/ml-ensembles.md                          | 15 +++++++++++++
 docs/ml-features.md                           | 15 +++++++++++++
 docs/ml-frequent-pattern-mining.md            | 15 +++++++++++++
 docs/ml-guide.md                              | 15 +++++++++++++
 docs/ml-linear-methods.md                     | 15 +++++++++++++
 docs/ml-migration-guides.md                   | 15 +++++++++++++
 docs/ml-pipeline.md                           | 15 +++++++++++++
 docs/ml-statistics.md                         | 15 +++++++++++++
 docs/ml-survival-regression.md                | 15 +++++++++++++
 docs/ml-tuning.md                             | 15 +++++++++++++
 docs/mllib-classification-regression.md       | 15 +++++++++++++
 docs/mllib-clustering.md                      | 15 +++++++++++++
 docs/mllib-collaborative-filtering.md         | 15 +++++++++++++
 docs/mllib-data-types.md                      | 15 +++++++++++++
 docs/mllib-decision-tree.md                   | 15 +++++++++++++
 docs/mllib-dimensionality-reduction.md        | 15 +++++++++++++
 docs/mllib-ensembles.md                       | 15 +++++++++++++
 docs/mllib-evaluation-metrics.md              | 15 +++++++++++++
 docs/mllib-feature-extraction.md              | 15 +++++++++++++
 docs/mllib-frequent-pattern-mining.md         | 15 +++++++++++++
 docs/mllib-guide.md                           | 15 +++++++++++++
 docs/mllib-isotonic-regression.md             | 15 +++++++++++++
 docs/mllib-linear-methods.md                  | 15 +++++++++++++
 docs/mllib-migration-guides.md                | 15 +++++++++++++
 docs/mllib-naive-bayes.md                     | 15 +++++++++++++
 docs/mllib-optimization.md                    | 15 +++++++++++++
 docs/mllib-pmml-model-export.md               | 15 +++++++++++++
 docs/mllib-statistics.md                      | 15 +++++++++++++
 docs/monitoring.md                            | 15 +++++++++++++
 docs/programming-guide.md                     | 15 +++++++++++++
 docs/quick-start.md                           | 15 +++++++++++++
 docs/rdd-programming-guide.md                 | 15 +++++++++++++
 docs/running-on-kubernetes.md                 | 15 +++++++++++++
 docs/running-on-mesos.md                      | 15 +++++++++++++
 docs/running-on-yarn.md                       | 15 +++++++++++++
 docs/security.md                              | 15 +++++++++++++
 docs/spark-standalone.md                      | 15 +++++++++++++
 docs/sparkr.md                                | 15 +++++++++++++
 docs/sql-data-sources-avro.md                 | 15 +++++++++++++
 docs/sql-data-sources-hive-tables.md          | 15 +++++++++++++
 docs/sql-data-sources-jdbc.md                 | 15 +++++++++++++
 docs/sql-data-sources-json.md                 | 15 +++++++++++++
 docs/sql-data-sources-load-save-functions.md  | 15 +++++++++++++
 docs/sql-data-sources-orc.md                  | 15 +++++++++++++
 docs/sql-data-sources-parquet.md              | 15 +++++++++++++
 docs/sql-data-sources-troubleshooting.md      | 15 +++++++++++++
 docs/sql-data-sources.md                      | 15 +++++++++++++
 docs/sql-distributed-sql-engine.md            | 15 +++++++++++++
 docs/sql-getting-started.md                   | 15 +++++++++++++
 docs/sql-keywords.md                          | 15 +++++++++++++
 .../sql-migration-guide-hive-compatibility.md | 15 +++++++++++++
 docs/sql-migration-guide-upgrade.md           | 15 +++++++++++++
 docs/sql-migration-guide.md                   | 15 +++++++++++++
 docs/sql-performance-tuning.md                | 15 +++++++++++++
 docs/sql-programming-guide.md                 | 15 +++++++++++++
 docs/sql-pyspark-pandas-with-arrow.md         | 15 +++++++++++++
 docs/sql-reference.md                         | 15 +++++++++++++
 docs/storage-openstack-swift.md               | 15 +++++++++++++
 docs/streaming-custom-receivers.md            | 15 +++++++++++++
 docs/streaming-kafka-0-10-integration.md      | 15 +++++++++++++
 docs/streaming-kafka-integration.md           | 15 +++++++++++++
 docs/streaming-kinesis-integration.md         | 15 +++++++++++++
 docs/streaming-programming-guide.md           | 15 +++++++++++++
 .../structured-streaming-kafka-integration.md | 15 +++++++++++++
 .../structured-streaming-programming-guide.md | 15 +++++++++++++
 docs/submitting-applications.md               | 15 +++++++++++++
 docs/tuning.md                                | 15 +++++++++++++
 91 files changed, 1380 insertions(+), 10 deletions(-)

diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md
index d6084c7a7cc9..4d9b6416c01c 100644
--- a/R/CRAN_RELEASE.md
+++ b/R/CRAN_RELEASE.md
@@ -1,3 +1,21 @@
+---
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
 # SparkR CRAN Release
 
 To release SparkR as a package to CRAN, we would use the `devtools` package. Please work with the
diff --git a/R/DOCUMENTATION.md b/R/DOCUMENTATION.md
index 7314a1fcccda..fd0c7644189d 100644
--- a/R/DOCUMENTATION.md
+++ b/R/DOCUMENTATION.md
@@ -1,3 +1,21 @@
+---
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
 # SparkR Documentation
 
 SparkR documentation is generated by using in-source comments and annotated by using
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index 33a4c850cfda..92442f9b655f 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -1,3 +1,21 @@
+---
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
 ## Building SparkR on Windows
 
 To build SparkR on Windows, the following steps are required
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/README.md b/core/src/main/scala/org/apache/spark/deploy/security/README.md
index c3ef60a231f0..0d98ce43919e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/README.md
+++ b/core/src/main/scala/org/apache/spark/deploy/security/README.md
@@ -1,3 +1,21 @@
+---
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
 # Delegation Token Handling In Spark
 
 This document aims to explain and demystify delegation tokens as they are used by Spark, since
diff --git a/dev/appveyor-guide.md b/dev/appveyor-guide.md
index a842f39b3049..a8c0c1ef23ac 100644
--- a/dev/appveyor-guide.md
+++ b/dev/appveyor-guide.md
@@ -1,3 +1,21 @@
+---
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
 # AppVeyor Guides
 
 Currently, SparkR on Windows is being tested with [AppVeyor](https://ci.appveyor.com). This page describes how to set up AppVeyor with Spark, how to run the build, check the status and stop the build via this tool. There is the documentation for AppVeyor [here](https://www.appveyor.com/docs). Please refer this for full details.
diff --git a/docs/README.md b/docs/README.md
index fb67c4b3586d..5b90f98cf803 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,3 +1,21 @@
+---
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
 Welcome to the Spark documentation!
 
 This readme will walk you through navigating and building the Spark documentation, which is included
diff --git a/docs/api.md b/docs/api.md
index 70484f02de78..241d552d1432 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Spark API Documentation
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Here you can read API docs for Spark and its submodules.
diff --git a/docs/building-spark.md b/docs/building-spark.md
index ef2f2bf231c3..ab8036381922 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -2,6 +2,21 @@
 layout: global
 title: Building Spark
 redirect_from: "building-with-maven.html"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/cloud-integration.md b/docs/cloud-integration.md
index a014d6fe0bf9..b64ffe55d835 100644
--- a/docs/cloud-integration.md
+++ b/docs/cloud-integration.md
@@ -3,20 +3,22 @@ layout: global
 displayTitle: Integration with Cloud Infrastructures
 title: Integration with Cloud Infrastructures
 description: Introduction to cloud storage support in Apache Spark SPARK_VERSION_SHORT
----
-<!---
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
-  limitations under the License. See accompanying LICENSE file.
--->
+  limitations under the License.
+---
 
 * This will become a table of contents (this text will be scraped).
 {:toc}
diff --git a/docs/cluster-overview.md b/docs/cluster-overview.md
index 1f0822f7a317..cb6d3a5c1f7c 100644
--- a/docs/cluster-overview.md
+++ b/docs/cluster-overview.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Cluster Mode Overview
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 This document gives a short overview of how Spark runs on clusters, to make it easier to understand
diff --git a/docs/configuration.md b/docs/configuration.md
index 006d8395b582..328995486595 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2,6 +2,21 @@
 layout: global
 displayTitle: Spark Configuration
 title: Configuration
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 * This will become a table of contents (this text will be scraped).
 {:toc}
diff --git a/docs/contributing-to-spark.md b/docs/contributing-to-spark.md
index ede5584a0cf9..1e4cf4c0c153 100644
--- a/docs/contributing-to-spark.md
+++ b/docs/contributing-to-spark.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Contributing to Spark
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 The Spark team welcomes all forms of contributions, including bug reports, documentation or patches.
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 3badb0a479ed..c96f7aaba88e 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -3,6 +3,21 @@ layout: global
 displayTitle: GraphX Programming Guide
 title: GraphX
 description: GraphX graph processing library guide for Spark SPARK_VERSION_SHORT
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/hadoop-provided.md b/docs/hadoop-provided.md
index bbd26b343e2e..37cdaa6150d3 100644
--- a/docs/hadoop-provided.md
+++ b/docs/hadoop-provided.md
@@ -2,6 +2,21 @@
 layout: global
 displayTitle: Using Spark's "Hadoop Free" Build
 title: Using Spark's "Hadoop Free" Build
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Spark uses Hadoop client libraries for HDFS and YARN. Starting in version Spark 1.4, the project packages "Hadoop free" builds that lets you more easily connect a single Spark binary to any Hadoop version. To use these builds, you need to modify `SPARK_DIST_CLASSPATH` to include Hadoop's package jars. The most convenient place to do this is by adding an entry in `conf/spark-env.sh`.
diff --git a/docs/hardware-provisioning.md b/docs/hardware-provisioning.md
index 29876a51b280..dab65e29b195 100644
--- a/docs/hardware-provisioning.md
+++ b/docs/hardware-provisioning.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Hardware Provisioning
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 A common question received by Spark developers is how to configure hardware for it. While the right
diff --git a/docs/index.md b/docs/index.md
index 59cd3f6faa1e..5adbebdb74a1 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -3,6 +3,21 @@ layout: global
 displayTitle: Spark Overview
 title: Overview
 description: Apache Spark SPARK_VERSION_SHORT documentation homepage
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Apache Spark is a fast and general-purpose cluster computing system.
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index 2316f175676e..3e70c59d89a3 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Job Scheduling
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/ml-advanced.md b/docs/ml-advanced.md
index 375957e92cc4..8e8c701da435 100644
--- a/docs/ml-advanced.md
+++ b/docs/ml-advanced.md
@@ -2,6 +2,21 @@
 layout: global
 title: Advanced topics
 displayTitle: Advanced topics
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/ml-ann.md b/docs/ml-ann.md
index 7c460c4af6f4..df5127d501ac 100644
--- a/docs/ml-ann.md
+++ b/docs/ml-ann.md
@@ -2,6 +2,21 @@
 layout: global
 title: Multilayer perceptron classifier
 displayTitle: Multilayer perceptron classifier
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
   > This section has been moved into the
diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index 42912a2e2bc3..b83b4ba08a5f 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -2,6 +2,21 @@
 layout: global
 title: Classification and regression
 displayTitle: Classification and regression
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 
diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index 21e38cad1590..2775d0421ccc 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -2,6 +2,21 @@
 layout: global
 title: Clustering
 displayTitle: Clustering
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 This page describes clustering algorithms in MLlib.
diff --git a/docs/ml-collaborative-filtering.md b/docs/ml-collaborative-filtering.md
index 58646642bfbc..e6e596bed110 100644
--- a/docs/ml-collaborative-filtering.md
+++ b/docs/ml-collaborative-filtering.md
@@ -2,6 +2,21 @@
 layout: global
 title: Collaborative Filtering
 displayTitle: Collaborative Filtering
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/ml-datasource.md b/docs/ml-datasource.md
index 35afaef5ad7f..71bec9c798ee 100644
--- a/docs/ml-datasource.md
+++ b/docs/ml-datasource.md
@@ -2,6 +2,21 @@
 layout: global
 title: Data sources
 displayTitle: Data sources
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 In this section, we introduce how to use data source in ML to load data.
diff --git a/docs/ml-decision-tree.md b/docs/ml-decision-tree.md
index 5e1eeb95e472..43c1fcafd262 100644
--- a/docs/ml-decision-tree.md
+++ b/docs/ml-decision-tree.md
@@ -2,6 +2,21 @@
 layout: global
 title: Decision trees
 displayTitle: Decision trees
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
   > This section has been moved into the
diff --git a/docs/ml-ensembles.md b/docs/ml-ensembles.md
index 97f1bdc803d0..bab04a678085 100644
--- a/docs/ml-ensembles.md
+++ b/docs/ml-ensembles.md
@@ -2,6 +2,21 @@
 layout: global
 title: Tree ensemble methods
 displayTitle: Tree ensemble methods
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
   > This section has been moved into the
diff --git a/docs/ml-features.md b/docs/ml-features.md
index 33373e094656..799b83504c04 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -2,6 +2,21 @@
 layout: global
 title: Extracting, transforming and selecting features
 displayTitle: Extracting, transforming and selecting features
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 This section covers algorithms for working with features, roughly divided into these groups:
diff --git a/docs/ml-frequent-pattern-mining.md b/docs/ml-frequent-pattern-mining.md
index f613664271ec..a24318860399 100644
--- a/docs/ml-frequent-pattern-mining.md
+++ b/docs/ml-frequent-pattern-mining.md
@@ -2,6 +2,21 @@
 layout: global
 title: Frequent Pattern Mining
 displayTitle: Frequent Pattern Mining
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Mining frequent items, itemsets, subsequences, or other substructures is usually among the
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index cffe41940eed..4661d6cd87c0 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -2,6 +2,21 @@
 layout: global
 title: "MLlib: Main Guide"
 displayTitle: "Machine Learning Library (MLlib) Guide"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 MLlib is Spark's machine learning (ML) library.
diff --git a/docs/ml-linear-methods.md b/docs/ml-linear-methods.md
index eb39173505ae..047a6c79ff62 100644
--- a/docs/ml-linear-methods.md
+++ b/docs/ml-linear-methods.md
@@ -2,6 +2,21 @@
 layout: global
 title: Linear methods
 displayTitle: Linear methods
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
   > This section has been moved into the
diff --git a/docs/ml-migration-guides.md b/docs/ml-migration-guides.md
index 2047065f71eb..99edd9bd69ef 100644
--- a/docs/ml-migration-guides.md
+++ b/docs/ml-migration-guides.md
@@ -3,6 +3,21 @@ layout: global
 title: Old Migration Guides - MLlib
 displayTitle: Old Migration Guides - MLlib
 description: MLlib migration guides from before Spark SPARK_VERSION_SHORT
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 The migration guide for the current Spark version is kept on the [MLlib Guide main page](ml-guide.html#migration-guide).
diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index 0c9c998f6353..993a428ab548 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -2,6 +2,21 @@
 layout: global
 title: ML Pipelines
 displayTitle: ML Pipelines
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 `\[
diff --git a/docs/ml-statistics.md b/docs/ml-statistics.md
index 6c82b3bb94b2..c404b628117d 100644
--- a/docs/ml-statistics.md
+++ b/docs/ml-statistics.md
@@ -2,6 +2,21 @@
 layout: global
 title: Basic Statistics
 displayTitle: Basic Statistics
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 
diff --git a/docs/ml-survival-regression.md b/docs/ml-survival-regression.md
index efa3c21c7ca1..e3bb133482d4 100644
--- a/docs/ml-survival-regression.md
+++ b/docs/ml-survival-regression.md
@@ -2,6 +2,21 @@
 layout: global
 title: Survival Regression
 displayTitle: Survival Regression
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
   > This section has been moved into the
diff --git a/docs/ml-tuning.md b/docs/ml-tuning.md
index 028bfec465ba..0717cce538bf 100644
--- a/docs/ml-tuning.md
+++ b/docs/ml-tuning.md
@@ -2,6 +2,21 @@
 layout: global
 title: "ML Tuning"
 displayTitle: "ML Tuning: model selection and hyperparameter tuning"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 `\[
diff --git a/docs/mllib-classification-regression.md b/docs/mllib-classification-regression.md
index a7b90de09369..d99c7ff05314 100644
--- a/docs/mllib-classification-regression.md
+++ b/docs/mllib-classification-regression.md
@@ -2,6 +2,21 @@
 layout: global
 title: Classification and Regression - RDD-based API
 displayTitle: Classification and Regression - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 The `spark.mllib` package supports various methods for 
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index dc6b095f5d59..18bab115a2da 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -2,6 +2,21 @@
 layout: global
 title: Clustering - RDD-based API
 displayTitle: Clustering - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 [Clustering](https://en.wikipedia.org/wiki/Cluster_analysis) is an unsupervised learning problem whereby we aim to group subsets
diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md
index aeebb26bb45f..21546a63263f 100644
--- a/docs/mllib-collaborative-filtering.md
+++ b/docs/mllib-collaborative-filtering.md
@@ -2,6 +2,21 @@
 layout: global
 title: Collaborative Filtering - RDD-based API
 displayTitle: Collaborative Filtering - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index eca101132d2e..cdac46284b6b 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -2,6 +2,21 @@
 layout: global
 title: Data Types - RDD-based API
 displayTitle: Data Types - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 281755f4cea8..045da744239b 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -2,6 +2,21 @@
 layout: global
 title: Decision Trees - RDD-based API
 displayTitle: Decision Trees - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md
index 4e6b4530942f..5eb36b4228ca 100644
--- a/docs/mllib-dimensionality-reduction.md
+++ b/docs/mllib-dimensionality-reduction.md
@@ -2,6 +2,21 @@
 layout: global
 title: Dimensionality Reduction - RDD-based API
 displayTitle: Dimensionality Reduction - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md
index e1984b6c8d5a..6149f458214e 100644
--- a/docs/mllib-ensembles.md
+++ b/docs/mllib-ensembles.md
@@ -2,6 +2,21 @@
 layout: global
 title: Ensembles - RDD-based API
 displayTitle: Ensembles - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
index 896d95bd4884..f931fa32ea54 100644
--- a/docs/mllib-evaluation-metrics.md
+++ b/docs/mllib-evaluation-metrics.md
@@ -2,6 +2,21 @@
 layout: global
 title: Evaluation Metrics - RDD-based API
 displayTitle: Evaluation Metrics - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index bb29f65c0322..b7f8ae9d07b0 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -2,6 +2,21 @@
 layout: global
 title: Feature Extraction and Transformation - RDD-based API
 displayTitle: Feature Extraction and Transformation - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-frequent-pattern-mining.md b/docs/mllib-frequent-pattern-mining.md
index 8e4505756b27..8bc93ac2e8ad 100644
--- a/docs/mllib-frequent-pattern-mining.md
+++ b/docs/mllib-frequent-pattern-mining.md
@@ -2,6 +2,21 @@
 layout: global
 title: Frequent Pattern Mining - RDD-based API
 displayTitle: Frequent Pattern Mining - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Mining frequent items, itemsets, subsequences, or other substructures is usually among the
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 30112c72c9c3..dbb74407d030 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -2,6 +2,21 @@
 layout: global
 title: "MLlib: RDD-based API"
 displayTitle: "MLlib: RDD-based API"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 This page documents sections of the MLlib guide for the RDD-based API (the `spark.mllib` package).
diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md
index 9964fce3273b..d9cc775547bb 100644
--- a/docs/mllib-isotonic-regression.md
+++ b/docs/mllib-isotonic-regression.md
@@ -2,6 +2,21 @@
 layout: global
 title: Isotonic regression - RDD-based API
 displayTitle: Regression - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 ## Isotonic regression
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 2879d884162a..2d3ec4ca2444 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -2,6 +2,21 @@
 layout: global
 title: Linear Methods - RDD-based API
 displayTitle: Linear Methods - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-migration-guides.md b/docs/mllib-migration-guides.md
index ea6f93fcf67f..b746b96e19f0 100644
--- a/docs/mllib-migration-guides.md
+++ b/docs/mllib-migration-guides.md
@@ -2,6 +2,21 @@
 layout: global
 title: Old Migration Guides - MLlib
 displayTitle: Old Migration Guides - MLlib
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 The migration guide for the current Spark version is kept on the [MLlib Guide main page](ml-guide.html#migration-guide).
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 7471d18a0ddd..09b15876a391 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -2,6 +2,21 @@
 layout: global
 title: Naive Bayes - RDD-based API
 displayTitle: Naive Bayes - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 [Naive Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier) is a simple
diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md
index 04758903da89..f2e128ec215a 100644
--- a/docs/mllib-optimization.md
+++ b/docs/mllib-optimization.md
@@ -2,6 +2,21 @@
 layout: global
 title: Optimization - RDD-based API
 displayTitle: Optimization - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-pmml-model-export.md b/docs/mllib-pmml-model-export.md
index f56756543792..fbc14cefd14e 100644
--- a/docs/mllib-pmml-model-export.md
+++ b/docs/mllib-pmml-model-export.md
@@ -2,6 +2,21 @@
 layout: global
 title: PMML model export - RDD-based API
 displayTitle: PMML model export - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index 6bf013f3b0d6..4698d3e6347e 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -2,6 +2,21 @@
 layout: global
 title: Basic Statistics - RDD-based API
 displayTitle: Basic Statistics - RDD-based API
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/monitoring.md b/docs/monitoring.md
index bf77b4abd6e5..751f8c6c4801 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -2,6 +2,21 @@
 layout: global
 title: Monitoring and Instrumentation
 description: Monitoring, metrics, and instrumentation guide for Spark SPARK_VERSION_SHORT
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 There are several ways to monitor Spark applications: web UIs, metrics, and external instrumentation.
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index f8b8f74c53b5..986bea3f7165 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -2,6 +2,21 @@
 layout: global
 title: Spark Programming Guide
 redirect: rdd-programming-guide.html
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 This document has moved [here](rdd-programming-guide.html).
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 8bf0d6ee6bba..93abb25f20f4 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -2,6 +2,21 @@
 layout: global
 title: Quick Start
 description: Quick start tutorial for Spark SPARK_VERSION_SHORT
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 7f427659b185..b568e94c6710 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -2,6 +2,21 @@
 layout: global
 title: RDD Programming Guide
 description: Spark SPARK_VERSION_SHORT programming guide in Java, Scala and Python
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 7e26d77b055f..f1000346cba0 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Running Spark on Kubernetes
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 * This will become a table of contents (this text will be scraped).
 {:toc}
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index a07773c1c71e..907f414e5dc4 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Running Spark on Mesos
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 * This will become a table of contents (this text will be scraped).
 {:toc}
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 261f7e31eaa6..dc93e9cea5bc 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Running Spark on YARN
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 * This will become a table of contents (this text will be scraped).
 {:toc}
diff --git a/docs/security.md b/docs/security.md
index 20492d871b1a..a4556e242cff 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -2,6 +2,21 @@
 layout: global
 displayTitle: Spark Security
 title: Security
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 * This will become a table of contents (this text will be scraped).
 {:toc}
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index ef99f05828af..2ca3ee6aa721 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Spark Standalone Mode
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/sparkr.md b/docs/sparkr.md
index dbb61241007f..26eeae6fe03d 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -2,6 +2,21 @@
 layout: global
 displayTitle: SparkR (R on Spark)
 title: SparkR (R on Spark)
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index 40d53fbe9871..b73fd4f5c0ec 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Apache Avro Data Source Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index 14773ca4ec52..0c51c11079ce 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -2,6 +2,21 @@
 layout: global
 title: Hive Tables
 displayTitle: Hive Tables
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index a2b14620be12..3119ec004b2a 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -2,6 +2,21 @@
 layout: global
 title: JDBC To Other Databases
 displayTitle: JDBC To Other Databases
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-data-sources-json.md b/docs/sql-data-sources-json.md
index f84336b5716d..4ce489718984 100644
--- a/docs/sql-data-sources-json.md
+++ b/docs/sql-data-sources-json.md
@@ -2,6 +2,21 @@
 layout: global
 title: JSON Files
 displayTitle: JSON Files
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 <div class="codetabs">
diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
index b45561cf93b9..a7efb9347ac6 100644
--- a/docs/sql-data-sources-load-save-functions.md
+++ b/docs/sql-data-sources-load-save-functions.md
@@ -2,6 +2,21 @@
 layout: global
 title: Generic Load/Save Functions
 displayTitle: Generic Load/Save Functions
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md
index ef07d2fa2e53..45bff17c6cf2 100644
--- a/docs/sql-data-sources-orc.md
+++ b/docs/sql-data-sources-orc.md
@@ -2,6 +2,21 @@
 layout: global
 title: ORC Files
 displayTitle: ORC Files
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Since Spark 2.3, Spark supports a vectorized ORC reader with a new ORC file format for ORC files.
diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index f6e03fbaff4d..b5309870f485 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -2,6 +2,21 @@
 layout: global
 title: Parquet Files
 displayTitle: Parquet Files
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-data-sources-troubleshooting.md b/docs/sql-data-sources-troubleshooting.md
index 5775eb8b5c95..993d9bf686f3 100644
--- a/docs/sql-data-sources-troubleshooting.md
+++ b/docs/sql-data-sources-troubleshooting.md
@@ -2,6 +2,21 @@
 layout: global
 title: Troubleshooting
 displayTitle: Troubleshooting
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
  * The JDBC driver class must be visible to the primordial class loader on the client session and on all executors. This is because Java's DriverManager class does a security check that results in it ignoring all drivers not visible to the primordial class loader when one goes to open a connection. One convenient way to do this is to modify compute_classpath.sh on all worker nodes to include your driver JARs.
diff --git a/docs/sql-data-sources.md b/docs/sql-data-sources.md
index 636636af6263..d908aac2140e 100644
--- a/docs/sql-data-sources.md
+++ b/docs/sql-data-sources.md
@@ -2,6 +2,21 @@
 layout: global
 title: Data Sources
 displayTitle: Data Sources
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 
diff --git a/docs/sql-distributed-sql-engine.md b/docs/sql-distributed-sql-engine.md
index 66d6fdaf90a0..fc849d3912b9 100644
--- a/docs/sql-distributed-sql-engine.md
+++ b/docs/sql-distributed-sql-engine.md
@@ -2,6 +2,21 @@
 layout: global
 title: Distributed SQL Engine
 displayTitle: Distributed SQL Engine
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-getting-started.md b/docs/sql-getting-started.md
index 0c3f0fb20610..5d18c48879f9 100644
--- a/docs/sql-getting-started.md
+++ b/docs/sql-getting-started.md
@@ -2,6 +2,21 @@
 layout: global
 title: Getting Started
 displayTitle: Getting Started
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-keywords.md b/docs/sql-keywords.md
index 40f75863e2fc..62378d9b409e 100644
--- a/docs/sql-keywords.md
+++ b/docs/sql-keywords.md
@@ -2,6 +2,21 @@
 layout: global
 title: Spark SQL Keywords
 displayTitle: Spark SQL Keywords
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 When `spark.sql.parser.ansi.enabled` is true, Spark SQL has two kinds of keywords:
diff --git a/docs/sql-migration-guide-hive-compatibility.md b/docs/sql-migration-guide-hive-compatibility.md
index dd7b06225714..857e9748dd04 100644
--- a/docs/sql-migration-guide-hive-compatibility.md
+++ b/docs/sql-migration-guide-hive-compatibility.md
@@ -2,6 +2,21 @@
 layout: global
 title: Compatibility with Apache Hive
 displayTitle: Compatibility with Apache Hive
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 3ba89c0bb591..fef801275bd6 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -2,6 +2,21 @@
 layout: global
 title: Spark SQL Upgrading Guide
 displayTitle: Spark SQL Upgrading Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 71d83e8a5570..4c23147106b6 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -2,6 +2,21 @@
 layout: global
 title: Migration Guide
 displayTitle: Migration Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * [Spark SQL Upgrading Guide](sql-migration-guide-upgrade.html)
diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index 7c7c4a815545..68569744afcc 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -2,6 +2,21 @@
 layout: global
 title: Performance Tuning
 displayTitle: Performance Tuning
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 9c85a15827bb..0a4d07ea37b6 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -2,6 +2,21 @@
 layout: global
 displayTitle: Spark SQL, DataFrames and Datasets Guide
 title: Spark SQL and DataFrames
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Spark SQL is a Spark module for structured data processing. Unlike the basic Spark RDD API, the interfaces provided
diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md
index d18ca0beb0fc..f40b33eb97a9 100644
--- a/docs/sql-pyspark-pandas-with-arrow.md
+++ b/docs/sql-pyspark-pandas-with-arrow.md
@@ -2,6 +2,21 @@
 layout: global
 title: PySpark Usage Guide for Pandas with Apache Arrow
 displayTitle: PySpark Usage Guide for Pandas with Apache Arrow
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/sql-reference.md b/docs/sql-reference.md
index 88d0596f3876..ee99ed8f4399 100644
--- a/docs/sql-reference.md
+++ b/docs/sql-reference.md
@@ -2,6 +2,21 @@
 layout: global
 title: Reference
 displayTitle: Reference
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * Table of contents
diff --git a/docs/storage-openstack-swift.md b/docs/storage-openstack-swift.md
index 3264711df74a..6bdcaa7ccbd7 100644
--- a/docs/storage-openstack-swift.md
+++ b/docs/storage-openstack-swift.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Accessing OpenStack Swift from Spark
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Spark's support for Hadoop InputFormat allows it to process data in OpenStack Swift using the
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index a83ebd9449fa..a37b961243f4 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Spark Streaming Custom Receivers
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Spark Streaming can receive streaming data from any arbitrary data source beyond
diff --git a/docs/streaming-kafka-0-10-integration.md b/docs/streaming-kafka-0-10-integration.md
index 3fb627105497..d8fd6724e91b 100644
--- a/docs/streaming-kafka-0-10-integration.md
+++ b/docs/streaming-kafka-0-10-integration.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Spark Streaming + Kafka Integration Guide (Kafka broker version 0.10.0 or higher)
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 The Spark Streaming integration for Kafka 0.10 provides simple parallelism, 1:1 correspondence between Kafka 
diff --git a/docs/streaming-kafka-integration.md b/docs/streaming-kafka-integration.md
index 0ec5a31ddfd2..6fa363285f38 100644
--- a/docs/streaming-kafka-integration.md
+++ b/docs/streaming-kafka-integration.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Spark Streaming + Kafka Integration Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 [Apache Kafka](https://kafka.apache.org/) is publish-subscribe messaging rethought as a distributed, partitioned, 
diff --git a/docs/streaming-kinesis-integration.md b/docs/streaming-kinesis-integration.md
index 4a1812bbb40a..fd6d776045cd 100644
--- a/docs/streaming-kinesis-integration.md
+++ b/docs/streaming-kinesis-integration.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Spark Streaming + Kinesis Integration
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 [Amazon Kinesis](http://aws.amazon.com/kinesis/) is a fully managed service for real-time processing of streaming data at massive scale.
 The Kinesis receiver creates an input DStream using the Kinesis Client Library (KCL) provided by Amazon under the Amazon Software License (ASL).
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 30a0160a650e..854beb60f1bc 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -3,6 +3,21 @@ layout: global
 displayTitle: Spark Streaming Programming Guide
 title: Spark Streaming
 description: Spark Streaming programming guide and tutorial for Spark SPARK_VERSION_SHORT
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 7c043acb499c..9e1bbc06e5cd 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Structured Streaming + Kafka Integration Guide (Kafka broker version 0.10.0 or higher)
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 Structured Streaming integration for Kafka 0.10 to read data from and write data to Kafka.
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index d425bcc1256f..2b6940d055e1 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -2,6 +2,21 @@
 layout: global
 displayTitle: Structured Streaming Programming Guide
 title: Structured Streaming Programming Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index d6b663ea8dcc..cbc6f86919d3 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -1,6 +1,21 @@
 ---
 layout: global
 title: Submitting Applications
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 The `spark-submit` script in Spark's `bin` directory is used to launch applications on a cluster.
diff --git a/docs/tuning.md b/docs/tuning.md
index 43acacb98cbf..222f8720ce35 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -3,6 +3,21 @@ layout: global
 displayTitle: Tuning Spark
 title: Tuning
 description: Tuning and performance optimization guide for Spark SPARK_VERSION_SHORT
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 ---
 
 * This will become a table of contents (this text will be scraped).

From fa0f791d4d9f083a45ab631a2e9f88a6b749e416 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sat, 30 Mar 2019 21:00:46 -0700
Subject: [PATCH 0812/1072] [MINOR][R] fix R project description
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

update as per this NOTE when running CRAN check

```
The Title field should be in title case, current version then in title case:
‘R Front end for 'Apache Spark'’
‘R Front End for 'Apache Spark'’

```

Closes #24255 from felixcheung/rdesc.

Authored-by: Felix Cheung <felixcheung_m@hotmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 R/pkg/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 4d48cd7c659d..3d31be809be6 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Version: 3.0.0
-Title: R Front end for 'Apache Spark'
+Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),

From b670f39fc65e599bf7fe95229f22378da860eb8e Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 30 Mar 2019 21:29:32 -0700
Subject: [PATCH 0813/1072] [SPARK-24793][FOLLOW-UP][K8S] Remove duplicate
 declaration of mockito-core

## What changes were proposed in this pull request?

```
[WARNING] Some problems were encountered while building the effective model for org.apache.spark:spark-kubernetes_2.12:jar:3.0.0-SNAPSHOT
[WARNING] 'dependencies.dependency.(groupId:artifactId:type:classifier)' must be unique: org.mockito:mockito-core:jar -> duplicate declaration of version (?)  org.apache.spark:spark-kubernetes_2.12:[unknown-version], /Users/yumwang/spark/resource-managers/kubernetes/core/pom.xml, line 98, column 17
```
This pr remove duplicate declaration of `mockito-core`.

## How was this patch tested?

N/A

Closes #24256 from wangyum/SPARK-24793-FOLLOW-UP.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 resource-managers/kubernetes/core/pom.xml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 85fd613228e6..23106cb7ec68 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -53,12 +53,6 @@
       <type>test-jar</type>
     </dependency>
 
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-
     <dependency>
       <groupId>io.fabric8</groupId>
       <artifactId>kubernetes-client</artifactId>

From 92b6f86f6d25abbc2abbf374e77c0b70cd1779c7 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sat, 30 Mar 2019 22:58:28 -0700
Subject: [PATCH 0814/1072] [SPARK-27244][CORE][TEST][FOLLOWUP] toDebugString
 redacts sensitive information

## What changes were proposed in this pull request?
This PR is a FollowUp of https://github.com/apache/spark/pull/24196. It improves the test case by using the parameters that are being used in the actual scenarios.

## How was this patch tested?
N/A

Closes #24257 from gatorsmile/followupSPARK-27244.

Authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/SparkConfSuite.scala     | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 5ca4f9c73f93..1123191c43c9 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -354,12 +354,19 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     }
   }
 
-  test("SPARK-27244 toDebugString should redact passwords") {
-    val conf = new SparkConf().set("dummy.password", "dummy-password")
-    conf.validateSettings()
-
-    assert(conf.get("dummy.password") === "dummy-password")
-    assert(conf.toDebugString.contains(s"dummy.password=${Utils.REDACTION_REPLACEMENT_TEXT}"))
+  test("SPARK-27244 toDebugString redacts sensitive information") {
+    val conf = new SparkConf(loadDefaults = false)
+      .set("dummy.password", "dummy-password")
+      .set("spark.hadoop.hive.server2.keystore.password", "1234")
+      .set("spark.hadoop.javax.jdo.option.ConnectionPassword", "1234")
+      .set("spark.regular.property", "regular_value")
+    assert(conf.toDebugString ==
+      s"""
+        |dummy.password=${Utils.REDACTION_REPLACEMENT_TEXT}
+        |spark.hadoop.hive.server2.keystore.password=${Utils.REDACTION_REPLACEMENT_TEXT}
+        |spark.hadoop.javax.jdo.option.ConnectionPassword=${Utils.REDACTION_REPLACEMENT_TEXT}
+        |spark.regular.property=regular_value
+      """.stripMargin.trim)
   }
 
   val defaultIllegalValue = "SomeIllegalValue"

From 6115a5e1a096e4a2c97ea9b9b18848a782a05b25 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 1 Apr 2019 08:33:16 +0900
Subject: [PATCH 0815/1072] [SPARK-27327][SQL] New JSON benchmarks: functions,
 Dataset[String]

## What changes were proposed in this pull request?

Added new benchmarks for:
1. JSON functions: `from_json`, `json_tuple` and `get_json_object`
2. Parsing `Dataset[String]` with JSON records
3. Comparing just splitting input text by lines with schema inferring, per-line parsing when encoding is set and not set.

Also existing benchmarks were refactored to use the `NoOp` datasource to eliminate overhead of triggers like `.filter((_: Row) => true).count()`.

## How was this patch tested?

By running `JSONBenchmark` locally.

Closes #24252 from MaxGekk/json-benchmark-func.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 sql/core/benchmarks/JSONBenchmark-results.txt | 124 ++++++++------
 .../datasources/json/JsonBenchmark.scala      | 155 +++++++++++++++---
 2 files changed, 206 insertions(+), 73 deletions(-)

diff --git a/sql/core/benchmarks/JSONBenchmark-results.txt b/sql/core/benchmarks/JSONBenchmark-results.txt
index f16e60c33374..2b784c330bbe 100644
--- a/sql/core/benchmarks/JSONBenchmark-results.txt
+++ b/sql/core/benchmarks/JSONBenchmark-results.txt
@@ -3,53 +3,81 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-JSON schema inferring:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-No encoding                                 80821 / 82526          1.2         808.2       1.0X
-UTF-8 is set                              129478 / 130381          0.8        1294.8       0.6X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-count a short column:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-No encoding                                 16804 / 16948          6.0         168.0       1.0X
-UTF-8 is set                                16648 / 16757          6.0         166.5       1.0X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-count a wide column:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-No encoding                                 30949 / 31058          0.3        3094.9       1.0X
-UTF-8 is set                                30629 / 33896          0.3        3062.9       1.0X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-select wide row:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-No encoding                               123050 / 124199          0.0      246099.8       1.0X
-UTF-8 is set                              139306 / 142569          0.0      278612.7       0.9X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-Select a subset of 10 columns:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Select 10 columns + count()                 19539 / 19896          0.5        1953.9       1.0X
-Select 1 column + count()                   16412 / 16445          0.6        1641.2       1.2X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-creation of JSON parser per line:        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Short column without encoding                 9576 / 9612          1.0         957.6       1.0X
-Short column with UTF-8                     13555 / 13698          0.7        1355.5       0.7X
-Wide column without encoding              174761 / 175665          0.1       17476.1       0.1X
-Wide column with UTF-8                    203219 / 205151          0.0       20321.9       0.0X
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       51280          51722         420          2.0         512.8       1.0X
+UTF-8 is set                                      75009          77276        1963          1.3         750.1       0.7X
 
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       39675          39738          83          2.5         396.7       1.0X
+UTF-8 is set                                      62755          64399        1436          1.6         627.5       0.6X
+
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       56429          56468          65          0.2        5642.9       1.0X
+UTF-8 is set                                      81078          81454         374          0.1        8107.8       0.7X
+
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       95329          95557         265          0.0      190658.2       1.0X
+UTF-8 is set                                     102827         102967         166          0.0      205654.2       0.9X
+
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 10 columns                                 14102          14136          52          0.7        1410.2       1.0X
+Select 1 column                                   17487          17537          51          0.6        1748.7       0.8X
+
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Short column without encoding                      6013           6066          70          1.7         601.3       1.0X
+Short column with UTF-8                            8031           8079          45          1.2         803.1       0.7X
+Wide column without encoding                     107093         108539         NaN          0.1       10709.3       0.1X
+Wide column with UTF-8                           130983         132518        1346          0.1       13098.3       0.0X
+
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                           939            950          11         10.6          93.9       1.0X
+from_json                                         12924          12944          26          0.8        1292.4       0.1X
+json_tuple                                        15312          15771         432          0.7        1531.2       0.1X
+get_json_object                                   13049          13475         714          0.8        1304.9       0.1X
+
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                          4556           4630         108         11.0          91.1       1.0X
+schema inferring                                  23624          24338         626          2.1         472.5       0.2X
+parsing                                           22342          22420          81          2.2         446.8       0.2X
+
+Preparing data for benchmarking ...
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                          7537           7556          26          6.6         150.7       1.0X
+Schema inferring                                  27875          28306         499          1.8         557.5       0.3X
+Parsing without charset                           26030          26083          67          1.9         520.6       0.3X
+Parsing with UTF-8                                37115          37480         392          1.3         742.3       0.2X
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index 25f76205f86d..f9e867bcc51b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.execution.datasources.json
 
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{Dataset, Row}
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
-import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.functions.{from_json, get_json_object, json_tuple, lit}
 import org.apache.spark.sql.types._
 
 /**
@@ -39,12 +39,16 @@ import org.apache.spark.sql.types._
 object JSONBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
-  def prepareDataInfo(benchmark: Benchmark): Unit = {
+  private def prepareDataInfo(benchmark: Benchmark): Unit = {
     // scalastyle:off println
     benchmark.out.println("Preparing data for benchmarking ...")
     // scalastyle:on println
   }
 
+  private def run(ds: Dataset[_]): Unit = {
+    ds.write.format("noop").save()
+  }
+
   def schemaInferring(rowsNum: Int, numIters: Int): Unit = {
     val benchmark = new Benchmark("JSON schema inferring", rowsNum, output = output)
 
@@ -202,20 +206,21 @@ object JSONBenchmark extends SqlBasedBenchmark {
 
       val fields = Seq.tabulate(colsNum)(i => StructField(s"col$i", IntegerType))
       val schema = StructType(fields)
-      val columnNames = schema.fieldNames
 
       spark.range(rowsNum)
         .select(Seq.tabulate(colsNum)(i => lit(i).as(s"col$i")): _*)
         .write
         .json(path.getAbsolutePath)
 
-      val ds = spark.read.schema(schema).json(path.getAbsolutePath)
+      val in = spark.read.schema(schema).json(path.getAbsolutePath)
 
-      benchmark.addCase(s"Select $colsNum columns + count()", numIters) { _ =>
-        ds.select("*").filter((_: Row) => true).count()
+      benchmark.addCase(s"Select $colsNum columns", numIters) { _ =>
+        val ds = in.select("*")
+        run(ds)
       }
-      benchmark.addCase(s"Select 1 column + count()", numIters) { _ =>
-        ds.select($"col1").filter((_: Row) => true).count()
+      benchmark.addCase(s"Select 1 column", numIters) { _ =>
+        val ds = in.select($"col1")
+        run(ds)
       }
 
       benchmark.run()
@@ -235,37 +240,134 @@ object JSONBenchmark extends SqlBasedBenchmark {
       val wideSchema = writeWideColumn(wideColumnPath, rowsNum)
 
       benchmark.addCase("Short column without encoding", numIters) { _ =>
-        spark.read
-          .schema(shortSchema)
-          .json(shortColumnPath)
-          .filter((_: Row) => true)
-          .count()
+        val ds = spark.read.schema(shortSchema).json(shortColumnPath)
+        run(ds)
       }
 
       benchmark.addCase("Short column with UTF-8", numIters) { _ =>
-        spark.read
+        val ds = spark.read
           .option("encoding", "UTF-8")
           .schema(shortSchema)
           .json(shortColumnPath)
-          .filter((_: Row) => true)
-          .count()
+        run(ds)
       }
 
       benchmark.addCase("Wide column without encoding", numIters) { _ =>
-        spark.read
-          .schema(wideSchema)
-          .json(wideColumnPath)
-          .filter((_: Row) => true)
-          .count()
+        val ds = spark.read.schema(wideSchema).json(wideColumnPath)
+        run(ds)
       }
 
       benchmark.addCase("Wide column with UTF-8", numIters) { _ =>
-        spark.read
+        val ds = spark.read
           .option("encoding", "UTF-8")
           .schema(wideSchema)
           .json(wideColumnPath)
-          .filter((_: Row) => true)
-          .count()
+        run(ds)
+      }
+
+      benchmark.run()
+    }
+  }
+
+  def jsonFunctions(rows: Int, iters: Int): Unit = {
+    val benchmark = new Benchmark("JSON functions", rows, output = output)
+
+    prepareDataInfo(benchmark)
+
+    val in = spark.range(0, rows, 1, 1).map(_ => """{"a":1}""")
+
+    benchmark.addCase("Text read", iters) { _ =>
+      run(in)
+    }
+
+    benchmark.addCase("from_json", iters) { _ =>
+      val schema = new StructType().add("a", IntegerType)
+      val from_json_ds = in.select(from_json('value, schema))
+      run(from_json_ds)
+    }
+
+    benchmark.addCase("json_tuple", iters) { _ =>
+      val json_tuple_ds = in.select(json_tuple($"value", "a"))
+      run(json_tuple_ds)
+    }
+
+    benchmark.addCase("get_json_object", iters) { _ =>
+      val get_json_object_ds = in.select(get_json_object($"value", "$.a"))
+      run(get_json_object_ds)
+    }
+
+    benchmark.run()
+  }
+
+  def jsonInDS(rows: Int, iters: Int): Unit = {
+    val benchmark = new Benchmark("Dataset of json strings", rows, output = output)
+
+    prepareDataInfo(benchmark)
+
+    val in = spark.range(0, rows, 1, 1).map(_ => """{"a":1}""")
+
+    benchmark.addCase("Text read", iters) { _ =>
+      run(in)
+    }
+
+    benchmark.addCase("schema inferring", iters) { _ =>
+      spark.read.json(in).schema
+    }
+
+    benchmark.addCase("parsing", iters) { _ =>
+      val schema = new StructType().add("a", IntegerType)
+      val ds = spark.read
+        .schema(schema)
+        .json(in)
+      run(ds)
+    }
+
+    benchmark.run()
+  }
+
+  def jsonInFile(rows: Int, iters: Int): Unit = {
+    val benchmark = new Benchmark("Json files in the per-line mode", rows, output = output)
+
+    withTempPath { path =>
+      prepareDataInfo(benchmark)
+
+      spark.sparkContext.range(0, rows, 1, 1)
+        .toDF("a")
+        .write
+        .json(path.getAbsolutePath)
+
+      benchmark.addCase("Text read", iters) { _ =>
+        val ds = spark.read
+          .format("text")
+          .load(path.getAbsolutePath)
+        run(ds)
+      }
+
+      benchmark.addCase("Schema inferring", iters) { _ =>
+        val ds = spark.read
+          .option("multiLine", false)
+          .json(path.getAbsolutePath)
+        ds.schema
+      }
+
+      val schema = new StructType().add("a", LongType)
+
+      benchmark.addCase("Parsing without charset", iters) { _ =>
+        val ds = spark.read
+          .schema(schema)
+          .option("multiLine", false)
+          .json(path.getAbsolutePath)
+        run(ds)
+      }
+
+      benchmark.addCase("Parsing with UTF-8", iters) { _ =>
+        val ds = spark.read
+          .schema(schema)
+          .option("multiLine", false)
+          .option("charset", "UTF-8")
+          .json(path.getAbsolutePath)
+
+        run(ds)
       }
 
       benchmark.run()
@@ -281,6 +383,9 @@ object JSONBenchmark extends SqlBasedBenchmark {
       countWideRow(500 * 1000, numIters)
       selectSubsetOfColumns(10 * 1000 * 1000, numIters)
       jsonParserCreation(10 * 1000 * 1000, numIters)
+      jsonFunctions(10 * 1000 * 1000, numIters)
+      jsonInDS(50 * 1000 * 1000, numIters)
+      jsonInFile(50 * 1000 * 1000, numIters)
     }
   }
 }

From 885aab40a27fb3cac0c26629b35b6ea209e70ca8 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sun, 31 Mar 2019 17:24:21 -0700
Subject: [PATCH 0816/1072] [SPARK-27266][SQL] Support ANALYZE TABLE to collect
 tables stats for cached catalog views

## What changes were proposed in this pull request?
The current master doesn't support ANALYZE TABLE to collect tables stats for catalog views even if they are cached as follows;

```scala
scala> sql(s"CREATE VIEW v AS SELECT 1 c")
scala> sql(s"CACHE LAZY TABLE v")
scala> sql(s"ANALYZE TABLE v COMPUTE STATISTICS")
org.apache.spark.sql.AnalysisException: ANALYZE TABLE is not supported on views.;
...
```

Since SPARK-25196 has supported to an ANALYZE command to collect column statistics for cached catalog view, we could support table stats, too.

## How was this patch tested?
Added tests in `StatisticsCollectionSuite` and `InMemoryColumnarQuerySuite`.

Closes #24200 from maropu/SPARK-27266.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../execution/columnar/InMemoryRelation.scala |  9 +++--
 .../command/AnalyzeTableCommand.scala         | 34 ++++++++++++-------
 .../spark/sql/StatisticsCollectionSuite.scala | 32 +++++++++++++++++
 .../columnar/InMemoryColumnarQuerySuite.scala | 13 +++++++
 4 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 1af50339a015..3edfd8fc8c81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -53,6 +53,7 @@ case class CachedRDDBuilder(
   @transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null
 
   val sizeInBytesStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator
+  val rowCountStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator
 
   def cachedColumnBuffers: RDD[CachedBatch] = {
     if (_cachedColumnBuffers == null) {
@@ -116,6 +117,7 @@ case class CachedRDDBuilder(
           }
 
           sizeInBytesStats.add(totalSize)
+          rowCountStats.add(rowCount)
 
           val stats = InternalRow.fromSeq(
             columnBuilders.flatMap(_.columnStats.collectedStatistics))
@@ -200,11 +202,14 @@ case class InMemoryRelation(
   }
 
   override def computeStats(): Statistics = {
-    if (cacheBuilder.sizeInBytesStats.value == 0L) {
+    if (!cacheBuilder.isCachedColumnBuffersLoaded) {
       // Underlying columnar RDD hasn't been materialized, use the stats from the plan to cache.
       statsOfPlanToCache
     } else {
-      statsOfPlanToCache.copy(sizeInBytes = cacheBuilder.sizeInBytesStats.value.longValue)
+      statsOfPlanToCache.copy(
+        sizeInBytes = cacheBuilder.sizeInBytesStats.value.longValue,
+        rowCount = Some(cacheBuilder.rowCountStats.value.longValue)
+      )
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index 3076e919dd61..67cfcebec187 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -35,19 +35,29 @@ case class AnalyzeTableCommand(
     val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db))
     val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB)
     if (tableMeta.tableType == CatalogTableType.VIEW) {
-      throw new AnalysisException("ANALYZE TABLE is not supported on views.")
-    }
-
-    // Compute stats for the whole table
-    val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
-    val newRowCount =
-      if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count()))
+      // Analyzes a catalog view if the view is cached
+      val table = sparkSession.table(tableIdent.quotedString)
+      val cacheManager = sparkSession.sharedState.cacheManager
+      if (cacheManager.lookupCachedData(table.logicalPlan).isDefined) {
+        if (!noscan) {
+          // To collect table stats, materializes an underlying columnar RDD
+          table.count()
+        }
+      } else {
+        throw new AnalysisException("ANALYZE TABLE is not supported on views.")
+      }
+    } else {
+      // Compute stats for the whole table
+      val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
+      val newRowCount =
+        if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count()))
 
-    // Update the metastore if the above statistics of the table are different from those
-    // recorded in the metastore.
-    val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount)
-    if (newStats.isDefined) {
-      sessionState.catalog.alterTableStats(tableIdentWithDB, newStats)
+      // Update the metastore if the above statistics of the table are different from those
+      // recorded in the metastore.
+      val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount)
+      if (newStats.isDefined) {
+        sessionState.catalog.alterTableStats(tableIdentWithDB, newStats)
+      }
     }
 
     Seq.empty[Row]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index d071efb804ad..90b3586712a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -544,4 +544,36 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       assert(getStatAttrNames(s"$database.v") === Set("c"))
     }
   }
+
+  test("analyzes table statistics in cached catalog view") {
+    def getTableStats(tableName: String): Statistics = {
+      spark.table(tableName).queryExecution.optimizedPlan.stats
+    }
+
+    withTempDatabase { database =>
+      sql(s"CREATE VIEW $database.v AS SELECT 1 c")
+      // Cache data eagerly by default, so this operation collects table stats
+      sql(s"CACHE TABLE $database.v")
+      val stats1 = getTableStats(s"$database.v")
+      assert(stats1.sizeInBytes > 0)
+      assert(stats1.rowCount === Some(1))
+      sql(s"UNCACHE TABLE $database.v")
+
+      // Cache data lazily, then analyze table stats
+      sql(s"CACHE LAZY TABLE $database.v")
+      val stats2 = getTableStats(s"$database.v")
+      assert(stats2.sizeInBytes === OneRowRelation().computeStats().sizeInBytes)
+      assert(stats2.rowCount === None)
+
+      sql(s"ANALYZE TABLE $database.v COMPUTE STATISTICS NOSCAN")
+      val stats3 = getTableStats(s"$database.v")
+      assert(stats3.sizeInBytes === OneRowRelation().computeStats().sizeInBytes)
+      assert(stats3.rowCount === None)
+
+      sql(s"ANALYZE TABLE $database.v COMPUTE STATISTICS")
+      val stats4 = getTableStats(s"$database.v")
+      assert(stats4.sizeInBytes === stats1.sizeInBytes)
+      assert(stats4.rowCount === Some(1))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 0a1141c050ec..e40528fbcb8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -340,6 +340,19 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     assert(cached.cacheBuilder.sizeInBytesStats.value === expectedAnswer.size * INT.defaultSize)
   }
 
+   test("cached row count should be calculated") {
+    val data = spark.range(6).toDF
+    val plan = spark.sessionState.executePlan(data.logicalPlan).sparkPlan
+    val cached = InMemoryRelation(true, 5, MEMORY_ONLY, plan, None, data.logicalPlan)
+
+    // Materialize the data.
+    val expectedAnswer = data.collect()
+    checkAnswer(cached, expectedAnswer)
+
+    // Check that the right row count was calculated.
+    assert(cached.cacheBuilder.rowCountStats.value === 6)
+  }
+
   test("access primitive-type columns in CachedBatch without whole stage codegen") {
     // whole stage codegen is not applied to a row with more than WHOLESTAGE_MAX_NUM_FIELDS fields
     withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "2") {

From fc9aad0957fa98ce7a1af2ba529a476b33eebd0e Mon Sep 17 00:00:00 2001
From: chakravarthiT <tcchakra@gmail.com>
Date: Mon, 1 Apr 2019 09:33:18 +0900
Subject: [PATCH 0817/1072] [SPARK-27253][SQL] Prioritizes parent session's
 SQLConf over SparkConf when cloning a session

## What changes were proposed in this pull request?

Cloned session should prioritize `SQLConf` from parent's over `SparkConf`. Currently, when cloning a session, the child session has configuration set in `SparkConf` even the same properties are set to its parent `SQLConf`.

Currently, when a Spark session is cloned, `mergeSparkConf` in `BaseSessionStateBuilder`'s `conf` overwrites  `SQLConf` values as set in `SparkConf`.

This PR proposes to call `mergeSparkConf` only when the parent session is empty.

See below codes to read.

1. Parent's `sessionState`

https://github.com/apache/spark/blob/c26379b446e90b3104ebe36f288be483f613a652/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala#L268

https://github.com/apache/spark/blob/c26379b446e90b3104ebe36f288be483f613a652/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala#L157-L161

https://github.com/apache/spark/blob/5dab5f651fcf3a9b8c4d6900e45718dc041c54af/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala#L88-L90

2. Child `sessionState`

https://github.com/apache/spark/blob/c26379b446e90b3104ebe36f288be483f613a652/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala#L269

https://github.com/apache/spark/blob/c26379b446e90b3104ebe36f288be483f613a652/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala#L155

https://github.com/apache/spark/blob/c26379b446e90b3104ebe36f288be483f613a652/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala#L102

https://github.com/apache/spark/blob/c26379b446e90b3104ebe36f288be483f613a652/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala#L74

https://github.com/apache/spark/blob/5dab5f651fcf3a9b8c4d6900e45718dc041c54af/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala#L305

https://github.com/apache/spark/blob/5dab5f651fcf3a9b8c4d6900e45718dc041c54af/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala#L283

https://github.com/apache/spark/blob/5dab5f651fcf3a9b8c4d6900e45718dc041c54af/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala#L292

https://github.com/apache/spark/blob/5dab5f651fcf3a9b8c4d6900e45718dc041c54af/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala#L88-L90

## How was this patch tested?
Added UT and with existing Unit Tests.

Closes #24189 from chakravarthiT/CloneDiscardsConf.

Authored-by: chakravarthiT <tcchakra@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../internal/BaseSessionStateBuilder.scala    |  8 ++++---
 .../apache/spark/sql/SessionStateSuite.scala  | 22 ++++++++++++++++++-
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 4b0f68baf9c2..8f2f8e80e126 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -85,9 +85,11 @@ abstract class BaseSessionStateBuilder(
    * merged with its [[SparkConf]].
    */
   protected lazy val conf: SQLConf = {
-    val conf = parentState.map(_.conf.clone()).getOrElse(new SQLConf)
-    mergeSparkConf(conf, session.sparkContext.conf)
-    conf
+    parentState.map(_.conf.clone()).getOrElse {
+      val conf = new SQLConf
+      mergeSparkConf(conf, session.sparkContext.conf)
+      conf
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 6317cd28bcc6..6fe58b780eae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -37,7 +37,10 @@ class SessionStateSuite extends SparkFunSuite {
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    activeSession = SparkSession.builder().master("local").getOrCreate()
+    activeSession = SparkSession.builder()
+      .master("local")
+      .config("default-config", "default")
+      .getOrCreate()
   }
 
   override def afterAll(): Unit = {
@@ -219,4 +222,21 @@ class SessionStateSuite extends SparkFunSuite {
     val forkedSession = activeSession.cloneSession()
     assert(activeSession.sharedState eq forkedSession.sharedState)
   }
+
+  test("SPARK-27253: forked new session should not discard SQLConf overrides") {
+    val key = "default-config"
+    try {
+      // override default config
+      activeSession.conf.set(key, "active")
+
+      val forkedSession = activeSession.cloneSession()
+      assert(forkedSession ne activeSession)
+      assert(forkedSession.conf ne activeSession.conf)
+
+      // forked new session should not discard SQLConf overrides
+      assert(forkedSession.conf.get(key) == "active")
+    } finally {
+      activeSession.conf.unset(key)
+    }
+  }
 }

From 9eb896cc3b67499148dd7c0b7ac694ba2fd9dd32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cattilapiros=E2=80=9D?=
 <piros.attila.zsolt@gmail.com>
Date: Sun, 31 Mar 2019 17:33:31 -0700
Subject: [PATCH 0818/1072] [SPARK-27333][TEST] Update thread audit whitelist
 to skip broadcast-exchange-.*, process reaper and
 StatisticsDataReferenceCleaner threads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Update thread audit whitelist to skip threads of the global broadcast exchange thread pool, process reaper and Hadoop FS statistics data reference cleaner thread.

## How was this patch tested?

Via existing UT using broadcast exchange via `sbt` i.e:

```
> project sql
> testOnly *.SessionStateSuite -- -z "fork new sessions and run query on inherited table"
```

Before (wrapped long line for manually to save horizontal scrolling for reviewers):

```
===== POSSIBLE THREAD LEAK IN SUITE o.a.s.sql.SessionStateSuite,
thread names: broadcast-exchange-6, broadcast-exchange-0,
broadcast-exchange-2, broadcast-exchange-5, broadcast-exchange-7,
broadcast-exchange-4, broadcast-exchange-1, process reaper, broadcast-exchange-3,
 org.apache.hadoop.fs.FileSystem$Statistics$StatisticsDataReferenceCleaner =====
```

After this change no possible thread leak detected.

Closes #24244 from attilapiros/thread-audit-minor.

Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/ThreadAudit.scala  | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/ThreadAudit.scala b/core/src/test/scala/org/apache/spark/ThreadAudit.scala
index 6b91162aedd4..44d1f220bf6b 100644
--- a/core/src/test/scala/org/apache/spark/ThreadAudit.scala
+++ b/core/src/test/scala/org/apache/spark/ThreadAudit.scala
@@ -77,7 +77,25 @@ trait ThreadAudit extends Logging {
      * asynchronously. In each case proper stopping is checked manually.
      */
     "shuffle-client.*",
-    "shuffle-server.*"
+    "shuffle-server.*",
+
+    /**
+     * Global cleaner thread that manage statistics data references of Hadoop filesystems.
+     * This is excluded because their lifecycle is handled by Hadoop and spark has no explicit
+     * effect on it.
+     */
+    "org.apache.hadoop.fs.FileSystem\\$Statistics\\$StatisticsDataReferenceCleaner",
+
+    /**
+     * A global thread pool for broadcast exchange executions.
+     */
+    "broadcast-exchange.*",
+
+    /**
+     * A thread started by JRE to support safe parallel execution of waitFor() and exitStatus()
+     * methods to forked subprocesses.
+     */
+    "process reaper"
   )
   private var threadNamesSnapshot: Set[String] = Set.empty
 

From f799e34962c24bb55662e7e570195d8f5aa5799a Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 1 Apr 2019 16:44:42 +0900
Subject: [PATCH 0819/1072] [MINOR][BUILD] Upgrade apache-rat to 0.13

## What changes were proposed in this pull request?

This PR upgrade `apache-rat` to 0.13. Issues fixed by 0.13:
https://issues.apache.org/jira/issues/?jql=project%20%3D%20RAT%20AND%20fixVersion%20%3D%200.13

## How was this patch tested?

manual tests

Closes #24262 from wangyum/apache-rat.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/check-license | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/check-license b/dev/check-license
index b729f34f2405..0cc17ffe55c6 100755
--- a/dev/check-license
+++ b/dev/check-license
@@ -58,7 +58,7 @@ else
     declare java_cmd=java
 fi
 
-export RAT_VERSION=0.12
+export RAT_VERSION=0.13
 export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar
 mkdir -p "$FWDIR"/lib
 

From 8012f55a9b9eabe71cfd3510f36435c934def04d Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 1 Apr 2019 22:22:10 +0800
Subject: [PATCH 0820/1072] [SPARK-26812][SQL] Report correct nullability for
 complex datatypes in Union

## What changes were proposed in this pull request?

When there is a `Union`, the reported output datatypes are the ones of the first plan and the nullability is updated according to all the plans. For complex types, though, the nullability of their elements is not updated using the types from the other plans. This means that the nullability of the inner elements is the one of the first plan. If this is not compatible with the one of other plans, errors can happen (as reported in the JIRA).

The PR proposes to update the nullability of the inner elements of complex datatypes according to most permissive value of all the plans.

## How was this patch tested?

added UT

Closes #23726 from mgaido91/SPARK-26812.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/logical/basicLogicalOperators.scala | 16 ++++--
 .../execution/basicPhysicalOperators.scala    | 19 +++++--
 .../spark/sql/execution/PlannerSuite.scala    | 53 ++++++++++++++++++-
 3 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index f4cc79c772bc..60a254d5271d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -235,9 +235,19 @@ case class Union(children: Seq[LogicalPlan]) extends LogicalPlan {
   }
 
   // updating nullability to make all the children consistent
-  override def output: Seq[Attribute] =
-    children.map(_.output).transpose.map(attrs =>
-      attrs.head.withNullability(attrs.exists(_.nullable)))
+  override def output: Seq[Attribute] = {
+    children.map(_.output).transpose.map { attrs =>
+      val firstAttr = attrs.head
+      val nullable = attrs.exists(_.nullable)
+      val newDt = attrs.map(_.dataType).reduce(StructType.merge)
+      if (firstAttr.dataType == newDt) {
+        firstAttr.withNullability(nullable)
+      } else {
+        AttributeReference(firstAttr.name, newDt, nullable, firstAttr.metadata)(
+          firstAttr.exprId, firstAttr.qualifier)
+      }
+    }
+  }
 
   override lazy val resolved: Boolean = {
     // allChildrenCompatible needs to be evaluated after childrenResolved
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 731e7daf2acd..70e41a33f4a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.types.LongType
+import org.apache.spark.sql.types.{LongType, StructType}
 import org.apache.spark.util.ThreadUtils
 import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler}
 
@@ -601,9 +601,20 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
  * [[org.apache.spark.sql.catalyst.plans.logical.Union.maxRowsPerPartition]].
  */
 case class UnionExec(children: Seq[SparkPlan]) extends SparkPlan {
-  override def output: Seq[Attribute] =
-    children.map(_.output).transpose.map(attrs =>
-      attrs.head.withNullability(attrs.exists(_.nullable)))
+  // updating nullability to make all the children consistent
+  override def output: Seq[Attribute] = {
+    children.map(_.output).transpose.map { attrs =>
+      val firstAttr = attrs.head
+      val nullable = attrs.exists(_.nullable)
+      val newDt = attrs.map(_.dataType).reduce(StructType.merge)
+      if (firstAttr.dataType == newDt) {
+        firstAttr.withNullability(nullable)
+      } else {
+        AttributeReference(firstAttr.name, newDt, nullable, firstAttr.metadata)(
+          firstAttr.exprId, firstAttr.qualifier)
+      }
+    }
+  }
 
   protected override def doExecute(): RDD[InternalRow] =
     sparkContext.union(children.map(_.execute()))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 71bd09609821..0671524a2ab6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.{execution, DataFrame, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range, Repartition, Sort, Union}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range, Repartition, Sort, Union}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReusedExchangeExec, ReuseExchange, ShuffleExchangeExec}
@@ -780,6 +780,57 @@ class PlannerSuite extends SharedSQLContext {
         classOf[PartitioningCollection])
     }
   }
+
+  test("SPARK-26812: wrong nullability for complex datatypes in union") {
+    def testUnionOutputType(input1: DataType, input2: DataType, output: DataType): Unit = {
+      val query = Union(
+        LocalRelation(StructField("a", input1)), LocalRelation(StructField("a", input2)))
+      assert(query.output.head.dataType == output)
+    }
+
+    // Map
+    testUnionOutputType(
+      MapType(StringType, StringType, valueContainsNull = false),
+      MapType(StringType, StringType, valueContainsNull = true),
+      MapType(StringType, StringType, valueContainsNull = true))
+    testUnionOutputType(
+      MapType(StringType, StringType, valueContainsNull = true),
+      MapType(StringType, StringType, valueContainsNull = false),
+      MapType(StringType, StringType, valueContainsNull = true))
+    testUnionOutputType(
+      MapType(StringType, StringType, valueContainsNull = false),
+      MapType(StringType, StringType, valueContainsNull = false),
+      MapType(StringType, StringType, valueContainsNull = false))
+
+    // Array
+    testUnionOutputType(
+      ArrayType(StringType, containsNull = false),
+      ArrayType(StringType, containsNull = true),
+      ArrayType(StringType, containsNull = true))
+    testUnionOutputType(
+      ArrayType(StringType, containsNull = true),
+      ArrayType(StringType, containsNull = false),
+      ArrayType(StringType, containsNull = true))
+    testUnionOutputType(
+      ArrayType(StringType, containsNull = false),
+      ArrayType(StringType, containsNull = false),
+      ArrayType(StringType, containsNull = false))
+
+    // Struct
+    testUnionOutputType(
+      StructType(Seq(
+        StructField("f1", StringType, nullable = false),
+        StructField("f2", StringType, nullable = true),
+        StructField("f3", StringType, nullable = false))),
+      StructType(Seq(
+        StructField("f1", StringType, nullable = true),
+        StructField("f2", StringType, nullable = false),
+        StructField("f3", StringType, nullable = false))),
+      StructType(Seq(
+        StructField("f1", StringType, nullable = true),
+        StructField("f2", StringType, nullable = true),
+        StructField("f3", StringType, nullable = false))))
+  }
 }
 
 // Used for unit-testing EnsureRequirements

From d332958109d7dc20fef9367fe645108f72a820d4 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 1 Apr 2019 23:02:48 +0800
Subject: [PATCH 0821/1072] [SPARK-27325][SQL] Add implicit encoders for
 LocalDate and Instant

## What changes were proposed in this pull request?

Added implicit encoders for the `java.time.LocalDate` and `java.time.Instant` classes. This allows creation of datasets from instances of the types.

## How was this patch tested?

Added new tests to `JavaDatasetSuite` and `DatasetSuite`.

Closes #24249 from MaxGekk/instant-localdate-encoders.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/Encoders.scala    | 16 ++++++++++++++++
 .../org/apache/spark/sql/SQLImplicits.scala      |  5 +++++
 .../org/apache/spark/sql/JavaDatasetSuite.java   | 12 ++++++++++++
 .../org/apache/spark/sql/DatasetSuite.scala      |  8 ++++++++
 4 files changed, 41 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index 42b865c02720..f54c6920ce82 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -109,6 +109,14 @@ object Encoders {
    */
   def DATE: Encoder[java.sql.Date] = ExpressionEncoder()
 
+  /**
+   * Creates an encoder that serializes instances of the `java.time.LocalDate` class
+   * to the internal representation of nullable Catalyst's DateType.
+   *
+   * @since 3.0.0
+   */
+  def LOCALDATE: Encoder[java.time.LocalDate] = ExpressionEncoder()
+
   /**
    * An encoder for nullable timestamp type.
    *
@@ -116,6 +124,14 @@ object Encoders {
    */
   def TIMESTAMP: Encoder[java.sql.Timestamp] = ExpressionEncoder()
 
+  /**
+   * Creates an encoder that serializes instances of the `java.time.Instant` class
+   * to the internal representation of nullable Catalyst's TimestampType.
+   *
+   * @since 3.0.0
+   */
+  def INSTANT: Encoder[java.time.Instant] = ExpressionEncoder()
+
   /**
    * An encoder for arrays of bytes.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index d329af0145c2..c997b7d8e0bf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -81,9 +81,14 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits {
   /** @since 2.2.0 */
   implicit def newDateEncoder: Encoder[java.sql.Date] = Encoders.DATE
 
+  /** @since 3.0.0 */
+  implicit def newLocalDateEncoder: Encoder[java.time.LocalDate] = Encoders.LOCALDATE
+
   /** @since 2.2.0 */
   implicit def newTimeStampEncoder: Encoder[java.sql.Timestamp] = Encoders.TIMESTAMP
 
+  /** @since 3.0.0 */
+  implicit def newInstantEncoder: Encoder[java.time.Instant] = Encoders.INSTANT
 
   // Boxed primitives
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 2c695fc58fd8..1e5f55e494b7 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -21,6 +21,8 @@
 import java.math.BigDecimal;
 import java.sql.Date;
 import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
 import java.util.*;
 
 import org.apache.spark.sql.streaming.GroupStateTimeout;
@@ -399,6 +401,16 @@ public void testPrimitiveEncoder() {
     Assert.assertEquals(data, ds.collectAsList());
   }
 
+  @Test
+  public void testLocalDateAndInstantEncoders() {
+    Encoder<Tuple2<LocalDate, Instant>> encoder =
+      Encoders.tuple(Encoders.LOCALDATE(), Encoders.INSTANT());
+    List<Tuple2<LocalDate, Instant>> data =
+      Arrays.asList(new Tuple2<>(LocalDate.ofEpochDay(0), Instant.ofEpochSecond(0)));
+    Dataset<Tuple2<LocalDate, Instant>> ds = spark.createDataset(data, encoder);
+    Assert.assertEquals(data, ds.collectAsList());
+  }
+
   public static class KryoSerializable {
     String value;
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 6e35b525ab5b..d52047d76e14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1730,6 +1730,14 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     def assertExecutionId: UserDefinedFunction = udf(AssertExecutionId.apply _)
     spark.range(10).select(assertExecutionId($"id")).localCheckpoint(true)
   }
+
+  test("implicit encoder for LocalDate and Instant") {
+    val localDate = java.time.LocalDate.of(2019, 3, 30)
+    assert(spark.range(1).map { _ => localDate }.head === localDate)
+
+    val instant = java.time.Instant.parse("2019-03-30T09:54:00Z")
+    assert(spark.range(1).map { _ => instant }.head === instant)
+  }
 }
 
 object AssertExecutionId {

From 5888b15d9cdf8272012018f39bf58c8faf68a5e1 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 1 Apr 2019 09:09:06 -0700
Subject: [PATCH 0822/1072] [SPARK-27278][SQL] Optimize GetMapValue when the
 map is a foldable and the key is not

## What changes were proposed in this pull request?

When `GetMapValue` contains a foldable Map and a non-foldable key, `SimplifyExtractValueOps` fails to optimize it transforming it into case when statements.
The PR adds a case for covering this situation too.

## How was this patch tested?

added UT

Closes #24223 from mgaido91/SPARK-27278.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/optimizer/ComplexTypes.scala      | 14 ++++++++++++++
 .../sql/catalyst/optimizer/complexTypesSuite.scala | 11 +++++++++++
 2 files changed, 25 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index db7d6d3254bd..02551281386a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -17,9 +17,13 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import scala.collection.mutable
+
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.MapData
+import org.apache.spark.sql.types.MapType
 
 /**
  * Simplify redundant [[CreateNamedStructLike]], [[CreateArray]] and [[CreateMap]] expressions.
@@ -59,6 +63,16 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
           Literal(null, ga.dataType)
         }
       case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems)
+      // The case below happens when the map is foldable, but the key is not, so ConstantFolding
+      // converts the map in a Literal, but the GetMapValue is still there since the key is not
+      // foldable. It cannot happen in any other case.
+      case GetMapValue(Literal(map: MapData, MapType(kt, vt, _)), key) if !key.foldable =>
+        val elems = new mutable.ListBuffer[Literal]
+        map.foreach(kt, vt, (key, value) => {
+          elems.append(Literal(key, kt))
+          elems.append(Literal(value, vt))
+        })
+        CaseKeyWhen(key, elems.result())
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 5452e72b3864..84c74a693f1d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -452,4 +452,15 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2")
     checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](3, 4))), null)
   }
+
+  test("SPARK-27278: simplify map access with non-foldable key and foldable map") {
+    val query = relation.select(GetMapValue(CreateMap(Seq(
+      1L, "a",
+      2L, "b")), 'id) as "a")
+    val expected = relation.select(
+        CaseWhen(Seq(
+          (EqualTo('id, 1L), "a"),
+          (EqualTo('id, 2L), "b"))) as "a")
+    checkRule(query, expected)
+  }
 }

From 92530c7db1e5ec2827e4211a2290555c3959397a Mon Sep 17 00:00:00 2001
From: Giovanni Lanzani <giovanni@lanzani.nl>
Date: Mon, 1 Apr 2019 09:30:33 -0700
Subject: [PATCH 0823/1072] [SPARK-9792] Make DenseMatrix equality semantical

Before, you could have this code

```
A = SparseMatrix(2, 2, [0, 2, 3], [0], [2])
B = DenseMatrix(2, 2, [2, 0, 0, 0])

B == A  # False
A == B  # True
```

The second would be `True` as `SparseMatrix` already checks for semantic
equality. This commit changes `DenseMatrix` so that equality is
semantical as well.

## What changes were proposed in this pull request?

Better semantic equality for DenseMatrix

## How was this patch tested?

Unit tests were added, plus manual testing. Note that the code falls back to the old behavior when `other` is not a SparseMatrix.

Closes #17968 from gglanzani/SPARK-9792.

Authored-by: Giovanni Lanzani <giovanni@lanzani.nl>
Signed-off-by: Holden Karau <holden@pigscanfly.ca>
---
 python/pyspark/ml/linalg/__init__.py      | 8 ++++----
 python/pyspark/ml/tests/test_linalg.py    | 6 ++++++
 python/pyspark/mllib/linalg/__init__.py   | 8 ++++----
 python/pyspark/mllib/tests/test_linalg.py | 6 ++++++
 4 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index 9da983667be7..f99161c88151 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -980,14 +980,14 @@ def __getitem__(self, indices):
             return self.values[i + j * self.numRows]
 
     def __eq__(self, other):
-        if (not isinstance(other, DenseMatrix) or
-                self.numRows != other.numRows or
-                self.numCols != other.numCols):
+        if (self.numRows != other.numRows or self.numCols != other.numCols):
             return False
+        if isinstance(other, SparseMatrix):
+            return np.all(self.toArray() == other.toArray())
 
         self_values = np.ravel(self.toArray(), order='F')
         other_values = np.ravel(other.toArray(), order='F')
-        return all(self_values == other_values)
+        return np.all(self_values == other_values)
 
 
 class SparseMatrix(Matrix):
diff --git a/python/pyspark/ml/tests/test_linalg.py b/python/pyspark/ml/tests/test_linalg.py
index 995bc35e4ca8..0c25e2b81849 100644
--- a/python/pyspark/ml/tests/test_linalg.py
+++ b/python/pyspark/ml/tests/test_linalg.py
@@ -112,11 +112,17 @@ def test_eq(self):
         v4 = SparseVector(6, [(1, 1.0), (3, 5.5)])
         v5 = DenseVector([0.0, 1.0, 0.0, 2.5])
         v6 = SparseVector(4, [(1, 1.0), (3, 2.5)])
+        dm1 = DenseMatrix(2, 2, [2, 0, 0, 0])
+        sm1 = SparseMatrix(2, 2, [0, 2, 3], [0], [2])
         self.assertEqual(v1, v2)
         self.assertEqual(v1, v3)
         self.assertFalse(v2 == v4)
         self.assertFalse(v1 == v5)
         self.assertFalse(v1 == v6)
+        # this is done as Dense and Sparse matrices can be semantically
+        # equal while still implementing a different __eq__ method
+        self.assertEqual(dm1, sm1)
+        self.assertEqual(sm1, dm1)
 
     def test_equals(self):
         indices = [1, 2, 4]
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 94a3e2af4d2d..df411d79903c 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -1135,14 +1135,14 @@ def __getitem__(self, indices):
             return self.values[i + j * self.numRows]
 
     def __eq__(self, other):
-        if (not isinstance(other, DenseMatrix) or
-                self.numRows != other.numRows or
-                self.numCols != other.numCols):
+        if (self.numRows != other.numRows or self.numCols != other.numCols):
             return False
+        if isinstance(other, SparseMatrix):
+            return np.all(self.toArray() == other.toArray())
 
         self_values = np.ravel(self.toArray(), order='F')
         other_values = np.ravel(other.toArray(), order='F')
-        return all(self_values == other_values)
+        return np.all(self_values == other_values)
 
 
 class SparseMatrix(Matrix):
diff --git a/python/pyspark/mllib/tests/test_linalg.py b/python/pyspark/mllib/tests/test_linalg.py
index f26e28d1744d..703aed2fe16a 100644
--- a/python/pyspark/mllib/tests/test_linalg.py
+++ b/python/pyspark/mllib/tests/test_linalg.py
@@ -115,11 +115,17 @@ def test_eq(self):
         v4 = SparseVector(6, [(1, 1.0), (3, 5.5)])
         v5 = DenseVector([0.0, 1.0, 0.0, 2.5])
         v6 = SparseVector(4, [(1, 1.0), (3, 2.5)])
+        dm1 = DenseMatrix(2, 2, [2, 0, 0, 0])
+        sm1 = SparseMatrix(2, 2, [0, 2, 3], [0], [2])
         self.assertEqual(v1, v2)
         self.assertEqual(v1, v3)
         self.assertFalse(v2 == v4)
         self.assertFalse(v1 == v5)
         self.assertFalse(v1 == v6)
+        # this is done as Dense and Sparse matrices can be semantically
+        # equal while still implementing a different __eq__ method
+        self.assertEqual(dm1, sm1)
+        self.assertEqual(sm1, dm1)
 
     def test_equals(self):
         indices = [1, 2, 4]

From eaf008ad0e6e246665127c283b139a16424f3139 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 1 Apr 2019 13:53:55 -0700
Subject: [PATCH 0824/1072] [SPARK-27329][SQL] Pruning nested field in map of
 map key and value from object serializers

## What changes were proposed in this pull request?

If object serializer has map of map key/value, pruning nested field should work.

Previously object serializer pruner don't recursively prunes nested fields if it is deeply located in map key or value. This patch proposed to address it by slightly factoring the pruning logic.

## How was this patch tested?

Added tests.

Closes #24260 from viirya/SPARK-27329.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/optimizer/objects.scala      | 26 ++++-------
 .../spark/sql/DatasetOptimizationSuite.scala  | 44 ++++++++++++++-----
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index c48bd8f27649..216c125bee87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -175,30 +175,22 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
       serializer: NamedExpression,
       prunedDataType: DataType): NamedExpression = {
     val prunedStructTypes = collectStructType(prunedDataType, ArrayBuffer.empty[StructType])
-    var structTypeIndex = 0
+      .toIterator
 
-    val transformedSerializer = serializer.transformDown {
+    def transformer: PartialFunction[Expression, Expression] = {
       case m: ExternalMapToCatalyst =>
-        val prunedKeyConverter = m.keyConverter.transformDown {
-          case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
-            val prunedType = prunedStructTypes(structTypeIndex)
-            structTypeIndex += 1
-            pruneNamedStruct(s, prunedType)
-        }
-        val prunedValueConverter = m.valueConverter.transformDown {
-          case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
-            val prunedType = prunedStructTypes(structTypeIndex)
-            structTypeIndex += 1
-            pruneNamedStruct(s, prunedType)
-        }
+        val prunedKeyConverter = m.keyConverter.transformDown(transformer)
+        val prunedValueConverter = m.valueConverter.transformDown(transformer)
+
         m.copy(keyConverter = alignNullTypeInIf(prunedKeyConverter),
           valueConverter = alignNullTypeInIf(prunedValueConverter))
-      case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size =>
-        val prunedType = prunedStructTypes(structTypeIndex)
-        structTypeIndex += 1
+
+      case s: CreateNamedStruct if prunedStructTypes.hasNext =>
+        val prunedType = prunedStructTypes.next()
         pruneNamedStruct(s, prunedType)
     }
 
+    val transformedSerializer = serializer.transformDown(transformer)
     val prunedSerializer = alignNullTypeInIf(transformedSerializer).asInstanceOf[NamedExpression]
 
     if (prunedSerializer.dataType.sameType(prunedDataType)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
index cfbb34368e2a..b9242541abcb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.expressions.CreateNamedStruct
+import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression}
 import org.apache.spark.sql.catalyst.expressions.objects.ExternalMapToCatalyst
 import org.apache.spark.sql.catalyst.plans.logical.SerializeFromObject
 import org.apache.spark.sql.functions.expr
@@ -47,16 +47,15 @@ class DatasetOptimizationSuite extends QueryTest with SharedSQLContext {
       case s: SerializeFromObject => s
     }.head
 
+    def collectNamedStruct: PartialFunction[Expression, Seq[CreateNamedStruct]] = {
+      case c: CreateNamedStruct => Seq(c)
+      case m: ExternalMapToCatalyst =>
+        m.keyConverter.collect(collectNamedStruct).flatten ++
+          m.valueConverter.collect(collectNamedStruct).flatten
+    }
+
     serializer.serializer.zip(structFields).foreach { case (serializer, fields) =>
-      val structs = serializer.collect {
-        case c: CreateNamedStruct => Seq(c)
-        case m: ExternalMapToCatalyst =>
-          m.keyConverter.collect {
-            case c: CreateNamedStruct => c
-          } ++ m.valueConverter.collect {
-            case c: CreateNamedStruct => c
-          }
-      }.flatten
+      val structs: Seq[CreateNamedStruct] = serializer.collect(collectNamedStruct).flatten
       assert(structs.size == fields.size)
       structs.zip(fields).foreach { case (struct, fieldNames) =>
         assert(struct.names.map(_.toString) == fieldNames)
@@ -142,4 +141,29 @@ class DatasetOptimizationSuite extends QueryTest with SharedSQLContext {
       checkAnswer(df1, Seq(Row("1"), Row("2"), Row("3")))
     }
   }
+
+  test("Pruned nested serializers: map of map value") {
+    withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+      val mapData = Seq(
+        (Map(("k", Map(("k2", ("a_1", 11))))), 1),
+        (Map(("k", Map(("k2", ("b_1", 22))))), 2),
+        (Map(("k", Map(("k2", ("c_1", 33))))), 3))
+      val mapDs = mapData.toDS().map(t => (t._1, t._2 + 1))
+      val df = mapDs.select("_1.k.k2._1")
+      testSerializer(df, Seq(Seq("_1")))
+    }
+  }
+
+  test("Pruned nested serializers: map of map key") {
+    withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+      val mapData = Seq(
+        (Map((Map((("1", 1), "val1")), "a_1")), 1),
+        (Map((Map((("2", 2), "val2")), "b_1")), 2),
+        (Map((Map((("3", 3), "val3")), "c_1")), 3))
+      val mapDs = mapData.toDS().map(t => (t._1, t._2 + 1))
+      val df = mapDs.select(expr("map_keys(map_keys(_1)[0])._1[0]"))
+      testSerializer(df, Seq(Seq("_1")))
+      checkAnswer(df, Seq(Row("1"), Row("2"), Row("3")))
+    }
+  }
 }

From c5e83ab92c0cb514963209dc3e70ba0e24570082 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 2 Apr 2019 10:20:06 +0800
Subject: [PATCH 0825/1072] [SPARK-25496][SQL] Deprecate from_utc_timestamp and
 to_utc_timestamp

## What changes were proposed in this pull request?

In the PR, I propose to deprecate the `from_utc_timestamp()` and `to_utc_timestamp`, and disable them by default. The functions can be enabled back via the SQL config `spark.sql.legacy.utcTimestampFunc.enabled`. By default, any calls of the functions throw an analysis exception.

One of the reason for deprecation is functions violate semantic of `TimestampType` which is number of microseconds since epoch in UTC time zone. Shifting microseconds since epoch by time zone offset doesn't make sense because the result doesn't represent microseconds since epoch in UTC time zone any more, and cannot be considered as `TimestampType`.

## How was this patch tested?

The changes were tested by `DateExpressionsSuite` and `DateFunctionsSuite`.

Closes #24195 from MaxGekk/conv-utc-timestamp-deprecate.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/R/functions.R                           |   2 +
 R/pkg/tests/fulltests/test_sparkSQL.R         |  18 ++-
 python/pyspark/sql/functions.py               |  10 ++
 .../expressions/datetimeExpressions.scala     |  12 ++
 .../apache/spark/sql/internal/SQLConf.scala   |   8 ++
 .../expressions/CodeGenerationSuite.scala     |  61 +++++-----
 .../expressions/DateExpressionsSuite.scala    |  63 ++++++----
 .../org/apache/spark/sql/functions.scala      |   4 +
 .../apache/spark/sql/DateFunctionsSuite.scala | 112 ++++++++++--------
 .../benchmark/DateTimeBenchmark.scala         |   9 +-
 .../streaming/StreamingAggregationSuite.scala |   9 +-
 11 files changed, 199 insertions(+), 109 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index d91896a9a994..0566a47cc875 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2459,6 +2459,7 @@ setMethod("schema_of_csv", signature(x = "characterOrColumn"),
 #' @note from_utc_timestamp since 1.5.0
 setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
+            .Deprecated(msg = "from_utc_timestamp is deprecated. See SPARK-25496.")
             jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
             column(jc)
           })
@@ -2517,6 +2518,7 @@ setMethod("next_day", signature(y = "Column", x = "character"),
 #' @note to_utc_timestamp since 1.5.0
 setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
+            .Deprecated(msg = "to_utc_timestamp is deprecated. See SPARK-25496.")
             jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
             column(jc)
           })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 2394f7414284..fdc747482065 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1905,10 +1905,20 @@ test_that("date functions on a DataFrame", {
   df2 <- createDataFrame(l2)
   expect_equal(collect(select(df2, minute(df2$b)))[, 1], c(34, 24))
   expect_equal(collect(select(df2, second(df2$b)))[, 1], c(0, 34))
-  expect_equal(collect(select(df2, from_utc_timestamp(df2$b, "JST")))[, 1],
-               c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
-  expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
-               c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
+  conf <- callJMethod(sparkSession, "conf")
+  isUtcTimestampFuncEnabled <- callJMethod(conf, "get", "spark.sql.legacy.utcTimestampFunc.enabled")
+  callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", "true")
+  tryCatch({
+    # Both from_utc_timestamp and to_utc_timestamp are deprecated as of SPARK-25496
+    expect_equal(suppressWarnings(collect(select(df2, from_utc_timestamp(df2$b, "JST"))))[, 1],
+                 c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
+    expect_equal(suppressWarnings(collect(select(df2, to_utc_timestamp(df2$b, "JST"))))[, 1],
+                 c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
+  },
+  finally = {
+    # Reverting the conf back
+    callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", isUtcTimestampFuncEnabled)
+  })
   expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 6ae23576e7bc..22163f52b472 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1306,7 +1306,10 @@ def from_utc_timestamp(timestamp, tz):
     [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
     >>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
     [Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
+
+    .. note:: Deprecated in 3.0. See SPARK-25496
     """
+    warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
     sc = SparkContext._active_spark_context
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
@@ -1340,7 +1343,10 @@ def to_utc_timestamp(timestamp, tz):
     [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
     >>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
     [Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
+
+    .. note:: Deprecated in 3.0. See SPARK-25496
     """
+    warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
     sc = SparkContext._active_spark_context
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
@@ -3191,9 +3197,13 @@ def _test():
     globs['sc'] = sc
     globs['spark'] = spark
     globs['df'] = spark.createDataFrame([Row(name='Alice', age=2), Row(name='Bob', age=5)])
+
+    spark.conf.set("spark.sql.legacy.utcTimestampFunc.enabled", "true")
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.functions, globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    spark.conf.unset("spark.sql.legacy.utcTimestampFunc.enabled")
+
     spark.stop()
     if failure_count:
         sys.exit(-1)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 3cda9899aa5c..784425e29739 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -26,11 +26,13 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringEscapeUtils
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
@@ -1021,6 +1023,11 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
 case class FromUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
+  if (!SQLConf.get.utcTimestampFuncEnabled) {
+    throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0." +
+      s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
+  }
+
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
   override def dataType: DataType = TimestampType
   override def prettyName: String = "from_utc_timestamp"
@@ -1227,6 +1234,11 @@ case class MonthsBetween(
 case class ToUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
+  if (!SQLConf.get.utcTimestampFuncEnabled) {
+    throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0. " +
+      s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
+  }
+
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
   override def dataType: DataType = TimestampType
   override def prettyName: String = "to_utc_timestamp"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 411805e63287..71a49c2657c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1723,6 +1723,12 @@ object SQLConf {
       "and java.sql.Date are used for the same purpose.")
     .booleanConf
     .createWithDefault(false)
+
+  val UTC_TIMESTAMP_FUNC_ENABLED = buildConf("spark.sql.legacy.utcTimestampFunc.enabled")
+    .doc("The configuration property enables the to_utc_timestamp() " +
+         "and from_utc_timestamp() functions.")
+    .booleanConf
+    .createWithDefault(false)
 }
 
 /**
@@ -1916,6 +1922,8 @@ class SQLConf extends Serializable with Logging {
 
   def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_ENABLED)
 
+  def utcTimestampFuncEnabled: Boolean = getConf(UTC_TIMESTAMP_FUNC_ENABLED)
+
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
    * identifiers are equal.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 7d656fc57d75..4e64313da136 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
 
-import org.apache.log4j.{Appender, AppenderSkeleton, Logger}
+import org.apache.log4j.AppenderSkeleton
 import org.apache.log4j.spi.LoggingEvent
 
 import org.apache.spark.SparkFunSuite
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ThreadUtils
@@ -189,36 +190,42 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-17702: split wide constructor into blocks due to JVM code size limit") {
-    val length = 5000
-    val expressions = Seq.fill(length) {
-      ToUTCTimestamp(
-        Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
-        Literal.create("PST", StringType))
-    }
-    val plan = GenerateMutableProjection.generate(expressions)
-    val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
-    val expected = Seq.fill(length)(
-      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
-
-    if (actual != expected) {
-      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val length = 5000
+      val expressions = Seq.fill(length) {
+        ToUTCTimestamp(
+          Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
+          Literal.create("PST", StringType))
+      }
+      val plan = GenerateMutableProjection.generate(expressions)
+      val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
+      val expected = Seq.fill(length)(
+        DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
+
+      if (actual != expected) {
+        fail(
+          s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+      }
     }
   }
 
   test("SPARK-22226: group splitted expressions into one method per nested class") {
-    val length = 10000
-    val expressions = Seq.fill(length) {
-      ToUTCTimestamp(
-        Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
-        Literal.create("PST", StringType))
-    }
-    val plan = GenerateMutableProjection.generate(expressions)
-    val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
-    val expected = Seq.fill(length)(
-      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
-
-    if (actual != expected) {
-      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val length = 10000
+      val expressions = Seq.fill(length) {
+        ToUTCTimestamp(
+          Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
+          Literal.create("PST", StringType))
+      }
+      val plan = GenerateMutableProjection.generate(expressions)
+      val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
+      val expected = Seq.fill(length)(
+        DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
+
+      if (actual != expected) {
+        fail(
+          s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+      }
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 8823fe779406..bc2c575cb023 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -25,10 +25,12 @@ import java.util.concurrent.TimeUnit
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -816,21 +818,29 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           NonFoldableLiteral.create(tz, StringType)),
         if (expected != null) Timestamp.valueOf(expected) else null)
     }
-    test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
-    test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
-    test(null, "UTC", null)
-    test("2015-07-24 00:00:00", null, null)
-    test(null, null, null)
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
+      test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
+      test(null, "UTC", null)
+      test("2015-07-24 00:00:00", null, null)
+      test(null, null, null)
+    }
+    val msg = intercept[AnalysisException] {
+      test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
+    }.getMessage
+    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
   }
 
   test("to_utc_timestamp - invalid time zone id") {
-    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
-      val msg = intercept[java.time.DateTimeException] {
-        GenerateUnsafeProjection.generate(
-          ToUTCTimestamp(
-            Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
-      }.getMessage
-      assert(msg.contains(invalidTz))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+        val msg = intercept[java.time.DateTimeException] {
+          GenerateUnsafeProjection.generate(
+            ToUTCTimestamp(
+              Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
+        }.getMessage
+        assert(msg.contains(invalidTz))
+      }
     }
   }
 
@@ -847,19 +857,28 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           NonFoldableLiteral.create(tz, StringType)),
         if (expected != null) Timestamp.valueOf(expected) else null)
     }
-    test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
-    test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
-    test(null, "UTC", null)
-    test("2015-07-24 00:00:00", null, null)
-    test(null, null, null)
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
+      test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
+      test(null, "UTC", null)
+      test("2015-07-24 00:00:00", null, null)
+      test(null, null, null)
+    }
+    val msg = intercept[AnalysisException] {
+      test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
+    }.getMessage
+    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
   }
 
   test("from_utc_timestamp - invalid time zone id") {
-    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
-      val msg = intercept[java.time.DateTimeException] {
-        GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
-      }.getMessage
-      assert(msg.contains(invalidTz))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+        val msg = intercept[java.time.DateTimeException] {
+          GenerateUnsafeProjection.generate(
+            FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
+        }.getMessage
+        assert(msg.contains(invalidTz))
+      }
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index d6d1f6aee50b..d0be216e6a2f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2988,6 +2988,7 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
+  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def from_utc_timestamp(ts: Column, tz: String): Column = withExpr {
     FromUTCTimestamp(ts.expr, Literal(tz))
   }
@@ -2999,6 +3000,7 @@ object functions {
    * @group datetime_funcs
    * @since 2.4.0
    */
+  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def from_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
     FromUTCTimestamp(ts.expr, tz.expr)
   }
@@ -3017,6 +3019,7 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
+  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def to_utc_timestamp(ts: Column, tz: String): Column = withExpr {
     ToUTCTimestamp(ts.expr, Literal(tz))
   }
@@ -3028,6 +3031,7 @@ object functions {
    * @group datetime_funcs
    * @since 2.4.0
    */
+  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def to_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
     ToUTCTimestamp(ts.expr, tz.expr)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index a9435e01ef8f..5ad1cb32d003 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -675,33 +675,41 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
       (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
     ).toDF("a", "b")
-    checkAnswer(
-      df.select(from_utc_timestamp(col("a"), "PST")),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-23 17:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-    checkAnswer(
-      df.select(from_utc_timestamp(col("b"), "PST")),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-23 17:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      checkAnswer(
+        df.select(from_utc_timestamp(col("a"), "PST")),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+      checkAnswer(
+        df.select(from_utc_timestamp(col("b"), "PST")),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    }
+    val msg = intercept[AnalysisException] {
+      df.select(from_utc_timestamp(col("a"), "PST")).collect()
+    }.getMessage
+    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
   }
 
   test("from_utc_timestamp with column zone") {
-    val df = Seq(
-      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"),
-      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST")
-    ).toDF("a", "b", "c")
-    checkAnswer(
-      df.select(from_utc_timestamp(col("a"), col("c"))),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 02:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-    checkAnswer(
-      df.select(from_utc_timestamp(col("b"), col("c"))),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 02:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val df = Seq(
+        (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"),
+        (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST")
+      ).toDF("a", "b", "c")
+      checkAnswer(
+        df.select(from_utc_timestamp(col("a"), col("c"))),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 02:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+      checkAnswer(
+        df.select(from_utc_timestamp(col("b"), col("c"))),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 02:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    }
   }
 
   test("to_utc_timestamp with literal zone") {
@@ -709,32 +717,40 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
       (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
     ).toDF("a", "b")
-    checkAnswer(
-      df.select(to_utc_timestamp(col("a"), "PST")),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-        Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
-    checkAnswer(
-      df.select(to_utc_timestamp(col("b"), "PST")),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-        Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      checkAnswer(
+        df.select(to_utc_timestamp(col("a"), "PST")),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+          Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+      checkAnswer(
+        df.select(to_utc_timestamp(col("b"), "PST")),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+          Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+    }
+    val msg = intercept[AnalysisException] {
+      df.select(to_utc_timestamp(col("a"), "PST")).collect()
+    }.getMessage
+    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
   }
 
   test("to_utc_timestamp with column zone") {
-    val df = Seq(
-      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"),
-      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET")
-    ).toDF("a", "b", "c")
-    checkAnswer(
-      df.select(to_utc_timestamp(col("a"), col("c"))),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
-    checkAnswer(
-      df.select(to_utc_timestamp(col("b"), col("c"))),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val df = Seq(
+        (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"),
+        (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET")
+      ).toDF("a", "b", "c")
+      checkAnswer(
+        df.select(to_utc_timestamp(col("a"), col("c"))),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+      checkAnswer(
+        df.select(to_utc_timestamp(col("b"), col("c"))),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index cbd51b490141..17bdd218dc17 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Synthetic benchmark for date and timestamp functions.
@@ -86,9 +87,11 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       run(N, "from_unixtime", "from_unixtime(id, 'yyyy-MM-dd HH:mm:ss.SSSSSS')")
     }
     runBenchmark("Convert timestamps") {
-      val timestampExpr = "cast(id as timestamp)"
-      run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
-      run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
+      withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+        val timestampExpr = "cast(id as timestamp)"
+        run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
+        run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
+      }
     }
     runBenchmark("Intervals") {
       val (start, end) = ("cast(id as timestamp)", "cast((id+8640000) as timestamp)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 116fd74352fe..485eb7bcb2e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.streaming
 
 import java.io.File
-import java.util.{Locale, TimeZone}
+import java.util.Locale
 
 import org.apache.commons.io.FileUtils
-import org.scalatest.{Assertions, BeforeAndAfterAll}
+import org.scalatest.Assertions
 
 import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.rdd.BlockRDD
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.state.{StateStore, StreamingAggregationStateManager}
+import org.apache.spark.sql.execution.streaming.state.StreamingAggregationStateManager
 import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -344,11 +344,10 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   testWithAllStateVersions("prune results by current_date, complete mode") {
     import testImplicits._
     val clock = new StreamManualClock
-    val tz = TimeZone.getDefault.getID
     val inputData = MemoryStream[Long]
     val aggregated =
       inputData.toDF()
-        .select(to_utc_timestamp(from_unixtime('value * DateTimeUtils.SECONDS_PER_DAY), tz))
+        .select(to_timestamp(from_unixtime('value * DateTimeUtils.SECONDS_PER_DAY)))
         .toDF("value")
         .groupBy($"value")
         .agg(count("*"))

From 0b150f833c226b3d7111d48e24013e4bf5af2900 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Tue, 2 Apr 2019 14:12:47 +0900
Subject: [PATCH 0826/1072] [SPARK-26224][SQL] Advice the user when creating
 many project on subsequent calls to withColumn

## What changes were proposed in this pull request?

We have seen many cases when users make several subsequent calls to `withColumn` on a Dataset. This leads now to the generation of a lot of `Project` nodes on the top of the plan, with serious problem which can lead also to `StackOverflowException`s.

The PR improves the doc of `withColumn`, in order to advise the user to avoid this pattern and do something different, ie. a single select with all the column he/she needs.

## How was this patch tested?

NA

Closes #23285 from mgaido91/SPARK-26224.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 69c2f618ecea..477e87325072 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2151,6 +2151,11 @@ class Dataset[T] private[sql](
    * `column`'s expression must only refer to attributes supplied by this Dataset. It is an
    * error to add a column that refers to some other Dataset.
    *
+   * Please notice that this method introduces a `Project`. This means that using it in loops in
+   * order to add several columns can generate very big plans which can cause huge performance
+   * issues and even `StackOverflowException`s. A much better alternative use `select` with the
+   * list of columns to add.
+   *
    * @group untypedrel
    * @since 2.0.0
    */

From a0d807d5abb2b66fe35216bcf2e7a0c7e2dc177b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 2 Apr 2019 14:52:56 +0900
Subject: [PATCH 0827/1072] [SPARK-26856][PYSPARK][FOLLOWUP] Fix UT failure due
 to wrong patterns for Kinesis assembly

## What changes were proposed in this pull request?

After [SPARK-26856](https://github.com/apache/spark/pull/23797), `Kinesis` Python UT fails with `Found multiple JARs` exception due to a wrong pattern.

- https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/104171/console
```
Exception: Found multiple JARs:
.../spark-streaming-kinesis-asl-assembly-3.0.0-SNAPSHOT.jar,
.../spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT.jar;
please remove all but one
```

It's because the pattern was changed in a wrong way.

**Original**
```python
kinesis_asl_assembly_dir, "target/scala-*/%s-*.jar" % name_prefix))
kinesis_asl_assembly_dir, "target/%s_*.jar" % name_prefix))
```
**After SPARK-26856**
```python
project_full_path, "target/scala-*/%s*.jar" % jar_name_prefix))
project_full_path, "target/%s*.jar" % jar_name_prefix))
```

The actual kinesis assembly jar files look like the followings.

**SBT Build**
```
-rw-r--r--  1 dongjoon  staff  87459461 Apr  1 19:01 spark-streaming-kinesis-asl-assembly-3.0.0-SNAPSHOT.jar
-rw-r--r--  1 dongjoon  staff       309 Apr  1 18:58 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT-tests.jar
-rw-r--r--  1 dongjoon  staff       309 Apr  1 18:58 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT.jar
```

**MAVEN Build**
```
-rw-r--r--   1 dongjoon  staff   8.6K Apr  1 18:55 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT-sources.jar
-rw-r--r--   1 dongjoon  staff   8.6K Apr  1 18:55 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT-test-sources.jar
-rw-r--r--   1 dongjoon  staff   8.7K Apr  1 18:55 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT-tests.jar
-rw-r--r--   1 dongjoon  staff    21M Apr  1 18:55 spark-streaming-kinesis-asl-assembly_2.12-3.0.0-SNAPSHOT.jar
```

In addition, after SPARK-26856, the utility function `search_jar` is shared to find `avro` jar files which are identical for both `sbt` and `mvn`. To sum up, The current jar pattern parameter cannot handle both `kinesis` and `avro` jars. This PR splits the single pattern into two patterns.

## How was this patch tested?

Manual. Please note that this will remove only `Found multiple JARs` exception. Kinesis tests need more configurations to run locally.
```
$ build/sbt -Pkinesis-asl test:package streaming-kinesis-asl-assembly/assembly
$ export ENABLE_KINESIS_TESTS=1
$ python/run-tests.py --python-executables python2.7 --module pyspark-streaming
```

Closes #24268 from dongjoon-hyun/SPARK-26856.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/avro/functions.py     | 2 +-
 python/pyspark/testing/streamingutils.py | 3 ++-
 python/pyspark/testing/utils.py          | 8 +++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index 81686df86679..e07b62528417 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -100,7 +100,7 @@ def _test():
     import os
     import sys
     from pyspark.testing.utils import search_jar
-    avro_jar = search_jar("external/avro", "spark-avro")
+    avro_jar = search_jar("external/avro", "spark-avro", "spark-avro")
     if avro_jar is None:
         print(
             "Skipping all Avro Python tests as the optional Avro project was "
diff --git a/python/pyspark/testing/streamingutils.py b/python/pyspark/testing/streamingutils.py
index 3bed50721a98..4c27f8aad538 100644
--- a/python/pyspark/testing/streamingutils.py
+++ b/python/pyspark/testing/streamingutils.py
@@ -34,7 +34,8 @@
         "was not set.")
 else:
     kinesis_asl_assembly_jar = search_jar("external/kinesis-asl-assembly",
-                                          "spark-streaming-kinesis-asl-assembly")
+                                          "spark-streaming-kinesis-asl-assembly-",
+                                          "spark-streaming-kinesis-asl-assembly_")
     if kinesis_asl_assembly_jar is None:
         kinesis_requirement_message = (
             "Skipping all Kinesis Python tests as the optional Kinesis project was "
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index c6a528194f00..2b42b898f9ed 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -103,7 +103,9 @@ def close(self):
         pass
 
 
-def search_jar(project_relative_path, jar_name_prefix):
+def search_jar(project_relative_path, sbt_jar_name_prefix, mvn_jar_name_prefix):
+    # Note that 'sbt_jar_name_prefix' and 'mvn_jar_name_prefix' are used since the prefix can
+    # vary for SBT or Maven specifically. See also SPARK-26856
     project_full_path = os.path.join(
         os.environ["SPARK_HOME"], project_relative_path)
 
@@ -113,9 +115,9 @@ def search_jar(project_relative_path, jar_name_prefix):
     # Search jar in the project dir using the jar name_prefix for both sbt build and maven
     # build because the artifact jars are in different directories.
     sbt_build = glob.glob(os.path.join(
-        project_full_path, "target/scala-*/%s*.jar" % jar_name_prefix))
+        project_full_path, "target/scala-*/%s*.jar" % sbt_jar_name_prefix))
     maven_build = glob.glob(os.path.join(
-        project_full_path, "target/%s*.jar" % jar_name_prefix))
+        project_full_path, "target/%s*.jar" % mvn_jar_name_prefix))
     jar_paths = sbt_build + maven_build
     jars = [jar for jar in jar_paths if not jar.endswith(ignored_jar_suffixes)]
 

From d575a453db2def210218cad72afa208b6711fd8d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 2 Apr 2019 01:05:54 -0700
Subject: [PATCH 0828/1072] Revert "[SPARK-25496][SQL] Deprecate
 from_utc_timestamp and to_utc_timestamp"

This reverts commit c5e83ab92c0cb514963209dc3e70ba0e24570082.
---
 R/pkg/R/functions.R                           |   2 -
 R/pkg/tests/fulltests/test_sparkSQL.R         |  18 +--
 python/pyspark/sql/functions.py               |  10 --
 .../expressions/datetimeExpressions.scala     |  12 --
 .../apache/spark/sql/internal/SQLConf.scala   |   8 --
 .../expressions/CodeGenerationSuite.scala     |  61 +++++-----
 .../expressions/DateExpressionsSuite.scala    |  63 ++++------
 .../org/apache/spark/sql/functions.scala      |   4 -
 .../apache/spark/sql/DateFunctionsSuite.scala | 112 ++++++++----------
 .../benchmark/DateTimeBenchmark.scala         |   9 +-
 .../streaming/StreamingAggregationSuite.scala |   9 +-
 11 files changed, 109 insertions(+), 199 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 0566a47cc875..d91896a9a994 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2459,7 +2459,6 @@ setMethod("schema_of_csv", signature(x = "characterOrColumn"),
 #' @note from_utc_timestamp since 1.5.0
 setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
-            .Deprecated(msg = "from_utc_timestamp is deprecated. See SPARK-25496.")
             jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
             column(jc)
           })
@@ -2518,7 +2517,6 @@ setMethod("next_day", signature(y = "Column", x = "character"),
 #' @note to_utc_timestamp since 1.5.0
 setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
-            .Deprecated(msg = "to_utc_timestamp is deprecated. See SPARK-25496.")
             jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
             column(jc)
           })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index fdc747482065..2394f7414284 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1905,20 +1905,10 @@ test_that("date functions on a DataFrame", {
   df2 <- createDataFrame(l2)
   expect_equal(collect(select(df2, minute(df2$b)))[, 1], c(34, 24))
   expect_equal(collect(select(df2, second(df2$b)))[, 1], c(0, 34))
-  conf <- callJMethod(sparkSession, "conf")
-  isUtcTimestampFuncEnabled <- callJMethod(conf, "get", "spark.sql.legacy.utcTimestampFunc.enabled")
-  callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", "true")
-  tryCatch({
-    # Both from_utc_timestamp and to_utc_timestamp are deprecated as of SPARK-25496
-    expect_equal(suppressWarnings(collect(select(df2, from_utc_timestamp(df2$b, "JST"))))[, 1],
-                 c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
-    expect_equal(suppressWarnings(collect(select(df2, to_utc_timestamp(df2$b, "JST"))))[, 1],
-                 c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
-  },
-  finally = {
-    # Reverting the conf back
-    callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", isUtcTimestampFuncEnabled)
-  })
+  expect_equal(collect(select(df2, from_utc_timestamp(df2$b, "JST")))[, 1],
+               c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
+  expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
+               c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
   expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 22163f52b472..6ae23576e7bc 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1306,10 +1306,7 @@ def from_utc_timestamp(timestamp, tz):
     [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
     >>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
     [Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
-
-    .. note:: Deprecated in 3.0. See SPARK-25496
     """
-    warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
     sc = SparkContext._active_spark_context
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
@@ -1343,10 +1340,7 @@ def to_utc_timestamp(timestamp, tz):
     [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
     >>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
     [Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
-
-    .. note:: Deprecated in 3.0. See SPARK-25496
     """
-    warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
     sc = SparkContext._active_spark_context
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
@@ -3197,13 +3191,9 @@ def _test():
     globs['sc'] = sc
     globs['spark'] = spark
     globs['df'] = spark.createDataFrame([Row(name='Alice', age=2), Row(name='Bob', age=5)])
-
-    spark.conf.set("spark.sql.legacy.utcTimestampFunc.enabled", "true")
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.functions, globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
-    spark.conf.unset("spark.sql.legacy.utcTimestampFunc.enabled")
-
     spark.stop()
     if failure_count:
         sys.exit(-1)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 784425e29739..3cda9899aa5c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -26,13 +26,11 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringEscapeUtils
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
@@ -1023,11 +1021,6 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
 case class FromUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
-  if (!SQLConf.get.utcTimestampFuncEnabled) {
-    throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0." +
-      s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
-  }
-
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
   override def dataType: DataType = TimestampType
   override def prettyName: String = "from_utc_timestamp"
@@ -1234,11 +1227,6 @@ case class MonthsBetween(
 case class ToUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
-  if (!SQLConf.get.utcTimestampFuncEnabled) {
-    throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0. " +
-      s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
-  }
-
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
   override def dataType: DataType = TimestampType
   override def prettyName: String = "to_utc_timestamp"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 71a49c2657c9..411805e63287 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1723,12 +1723,6 @@ object SQLConf {
       "and java.sql.Date are used for the same purpose.")
     .booleanConf
     .createWithDefault(false)
-
-  val UTC_TIMESTAMP_FUNC_ENABLED = buildConf("spark.sql.legacy.utcTimestampFunc.enabled")
-    .doc("The configuration property enables the to_utc_timestamp() " +
-         "and from_utc_timestamp() functions.")
-    .booleanConf
-    .createWithDefault(false)
 }
 
 /**
@@ -1922,8 +1916,6 @@ class SQLConf extends Serializable with Logging {
 
   def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_ENABLED)
 
-  def utcTimestampFuncEnabled: Boolean = getConf(UTC_TIMESTAMP_FUNC_ENABLED)
-
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
    * identifiers are equal.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 4e64313da136..7d656fc57d75 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
 
-import org.apache.log4j.AppenderSkeleton
+import org.apache.log4j.{Appender, AppenderSkeleton, Logger}
 import org.apache.log4j.spi.LoggingEvent
 
 import org.apache.spark.SparkFunSuite
@@ -31,7 +31,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ThreadUtils
@@ -190,42 +189,36 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-17702: split wide constructor into blocks due to JVM code size limit") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      val length = 5000
-      val expressions = Seq.fill(length) {
-        ToUTCTimestamp(
-          Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
-          Literal.create("PST", StringType))
-      }
-      val plan = GenerateMutableProjection.generate(expressions)
-      val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
-      val expected = Seq.fill(length)(
-        DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
-
-      if (actual != expected) {
-        fail(
-          s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
-      }
+    val length = 5000
+    val expressions = Seq.fill(length) {
+      ToUTCTimestamp(
+        Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
+        Literal.create("PST", StringType))
+    }
+    val plan = GenerateMutableProjection.generate(expressions)
+    val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
+    val expected = Seq.fill(length)(
+      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
+
+    if (actual != expected) {
+      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
     }
   }
 
   test("SPARK-22226: group splitted expressions into one method per nested class") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      val length = 10000
-      val expressions = Seq.fill(length) {
-        ToUTCTimestamp(
-          Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
-          Literal.create("PST", StringType))
-      }
-      val plan = GenerateMutableProjection.generate(expressions)
-      val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
-      val expected = Seq.fill(length)(
-        DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
-
-      if (actual != expected) {
-        fail(
-          s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
-      }
+    val length = 10000
+    val expressions = Seq.fill(length) {
+      ToUTCTimestamp(
+        Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
+        Literal.create("PST", StringType))
+    }
+    val plan = GenerateMutableProjection.generate(expressions)
+    val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
+    val expected = Seq.fill(length)(
+      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
+
+    if (actual != expected) {
+      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index bc2c575cb023..8823fe779406 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -25,12 +25,10 @@ import java.util.concurrent.TimeUnit
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -818,29 +816,21 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           NonFoldableLiteral.create(tz, StringType)),
         if (expected != null) Timestamp.valueOf(expected) else null)
     }
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
-      test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
-      test(null, "UTC", null)
-      test("2015-07-24 00:00:00", null, null)
-      test(null, null, null)
-    }
-    val msg = intercept[AnalysisException] {
-      test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
-    }.getMessage
-    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
+    test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
+    test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
+    test(null, "UTC", null)
+    test("2015-07-24 00:00:00", null, null)
+    test(null, null, null)
   }
 
   test("to_utc_timestamp - invalid time zone id") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
-        val msg = intercept[java.time.DateTimeException] {
-          GenerateUnsafeProjection.generate(
-            ToUTCTimestamp(
-              Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
-        }.getMessage
-        assert(msg.contains(invalidTz))
-      }
+    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+      val msg = intercept[java.time.DateTimeException] {
+        GenerateUnsafeProjection.generate(
+          ToUTCTimestamp(
+            Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
+      }.getMessage
+      assert(msg.contains(invalidTz))
     }
   }
 
@@ -857,28 +847,19 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           NonFoldableLiteral.create(tz, StringType)),
         if (expected != null) Timestamp.valueOf(expected) else null)
     }
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
-      test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
-      test(null, "UTC", null)
-      test("2015-07-24 00:00:00", null, null)
-      test(null, null, null)
-    }
-    val msg = intercept[AnalysisException] {
-      test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
-    }.getMessage
-    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
+    test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
+    test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
+    test(null, "UTC", null)
+    test("2015-07-24 00:00:00", null, null)
+    test(null, null, null)
   }
 
   test("from_utc_timestamp - invalid time zone id") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
-        val msg = intercept[java.time.DateTimeException] {
-          GenerateUnsafeProjection.generate(
-            FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
-        }.getMessage
-        assert(msg.contains(invalidTz))
-      }
+    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+      val msg = intercept[java.time.DateTimeException] {
+        GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
+      }.getMessage
+      assert(msg.contains(invalidTz))
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index d0be216e6a2f..d6d1f6aee50b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2988,7 +2988,6 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
-  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def from_utc_timestamp(ts: Column, tz: String): Column = withExpr {
     FromUTCTimestamp(ts.expr, Literal(tz))
   }
@@ -3000,7 +2999,6 @@ object functions {
    * @group datetime_funcs
    * @since 2.4.0
    */
-  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def from_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
     FromUTCTimestamp(ts.expr, tz.expr)
   }
@@ -3019,7 +3017,6 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
-  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def to_utc_timestamp(ts: Column, tz: String): Column = withExpr {
     ToUTCTimestamp(ts.expr, Literal(tz))
   }
@@ -3031,7 +3028,6 @@ object functions {
    * @group datetime_funcs
    * @since 2.4.0
    */
-  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def to_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
     ToUTCTimestamp(ts.expr, tz.expr)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 5ad1cb32d003..a9435e01ef8f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -675,41 +675,33 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
       (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
     ).toDF("a", "b")
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      checkAnswer(
-        df.select(from_utc_timestamp(col("a"), "PST")),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-23 17:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-      checkAnswer(
-        df.select(from_utc_timestamp(col("b"), "PST")),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-23 17:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-    }
-    val msg = intercept[AnalysisException] {
-      df.select(from_utc_timestamp(col("a"), "PST")).collect()
-    }.getMessage
-    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
+    checkAnswer(
+      df.select(from_utc_timestamp(col("a"), "PST")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    checkAnswer(
+      df.select(from_utc_timestamp(col("b"), "PST")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
   }
 
   test("from_utc_timestamp with column zone") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      val df = Seq(
-        (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"),
-        (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST")
-      ).toDF("a", "b", "c")
-      checkAnswer(
-        df.select(from_utc_timestamp(col("a"), col("c"))),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 02:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-      checkAnswer(
-        df.select(from_utc_timestamp(col("b"), col("c"))),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 02:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-    }
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST")
+    ).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(from_utc_timestamp(col("a"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 02:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    checkAnswer(
+      df.select(from_utc_timestamp(col("b"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 02:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
   }
 
   test("to_utc_timestamp with literal zone") {
@@ -717,40 +709,32 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
       (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
     ).toDF("a", "b")
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      checkAnswer(
-        df.select(to_utc_timestamp(col("a"), "PST")),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-          Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
-      checkAnswer(
-        df.select(to_utc_timestamp(col("b"), "PST")),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-          Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
-    }
-    val msg = intercept[AnalysisException] {
-      df.select(to_utc_timestamp(col("a"), "PST")).collect()
-    }.getMessage
-    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), "PST")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), "PST")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
   }
 
   test("to_utc_timestamp with column zone") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      val df = Seq(
-        (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"),
-        (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET")
-      ).toDF("a", "b", "c")
-      checkAnswer(
-        df.select(to_utc_timestamp(col("a"), col("c"))),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
-      checkAnswer(
-        df.select(to_utc_timestamp(col("b"), col("c"))),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
-    }
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET")
+    ).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index 17bdd218dc17..cbd51b490141 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql
 
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Synthetic benchmark for date and timestamp functions.
@@ -87,11 +86,9 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       run(N, "from_unixtime", "from_unixtime(id, 'yyyy-MM-dd HH:mm:ss.SSSSSS')")
     }
     runBenchmark("Convert timestamps") {
-      withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-        val timestampExpr = "cast(id as timestamp)"
-        run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
-        run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
-      }
+      val timestampExpr = "cast(id as timestamp)"
+      run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
+      run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
     }
     runBenchmark("Intervals") {
       val (start, end) = ("cast(id as timestamp)", "cast((id+8640000) as timestamp)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 485eb7bcb2e6..116fd74352fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.streaming
 
 import java.io.File
-import java.util.Locale
+import java.util.{Locale, TimeZone}
 
 import org.apache.commons.io.FileUtils
-import org.scalatest.Assertions
+import org.scalatest.{Assertions, BeforeAndAfterAll}
 
 import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.rdd.BlockRDD
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.state.StreamingAggregationStateManager
+import org.apache.spark.sql.execution.streaming.state.{StateStore, StreamingAggregationStateManager}
 import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -344,10 +344,11 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   testWithAllStateVersions("prune results by current_date, complete mode") {
     import testImplicits._
     val clock = new StreamManualClock
+    val tz = TimeZone.getDefault.getID
     val inputData = MemoryStream[Long]
     val aggregated =
       inputData.toDF()
-        .select(to_timestamp(from_unixtime('value * DateTimeUtils.SECONDS_PER_DAY)))
+        .select(to_utc_timestamp(from_unixtime('value * DateTimeUtils.SECONDS_PER_DAY), tz))
         .toDF("value")
         .groupBy($"value")
         .agg(count("*"))

From d4420b455ab81b86c29fc45a3107e45873c72dc2 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Tue, 2 Apr 2019 07:37:05 -0700
Subject: [PATCH 0829/1072] [SPARK-27323][CORE][SQL][STREAMING] Use
 Single-Abstract-Method support in Scala 2.12 to simplify code

## What changes were proposed in this pull request?

Use Single Abstract Method syntax where possible (and minor related cleanup). Comments below. No logic should change here.

## How was this patch tested?

Existing tests.

Closes #24241 from srowen/SPARK-27323.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/BarrierCoordinator.scala |  8 +-
 .../org/apache/spark/ContextCleaner.scala     |  5 +-
 .../org/apache/spark/HeartbeatReceiver.scala  |  8 +-
 .../scala/org/apache/spark/SparkConf.scala    |  6 +-
 .../apache/spark/deploy/PythonRunner.scala    |  6 +-
 .../apache/spark/deploy/SparkHadoopUtil.scala | 11 +--
 .../deploy/history/FsHistoryProvider.scala    | 44 ++++-------
 .../apache/spark/deploy/master/Master.scala   |  8 +-
 .../apache/spark/deploy/worker/Worker.scala   | 38 ++++-----
 .../org/apache/spark/executor/Executor.scala  | 11 +--
 .../spark/launcher/LauncherBackend.scala      |  9 +--
 .../apache/spark/metrics/MetricsSystem.scala  |  6 +-
 .../apache/spark/scheduler/DAGScheduler.scala | 34 ++++----
 .../spark/scheduler/TaskResultGetter.scala    | 40 +++++-----
 .../spark/scheduler/TaskSchedulerImpl.scala   | 14 ++--
 .../CoarseGrainedSchedulerBackend.scala       | 14 ++--
 .../spark/status/AppStatusListener.scala      | 15 ++--
 .../util/collection/ExternalSorter.scala      | 32 ++++----
 .../WritablePartitionedPairCollection.scala   | 23 ++----
 .../spark/util/logging/DriverLogger.scala     |  6 +-
 .../org/apache/spark/SparkConfSuite.scala     |  7 +-
 .../history/ApplicationCacheSuite.scala       |  9 +--
 .../history/FsHistoryProviderSuite.scala      |  9 +--
 .../deploy/worker/DriverRunnerTest.scala      | 25 +++---
 .../spark/deploy/worker/WorkerSuite.scala     | 19 ++---
 .../apache/spark/executor/ExecutorSuite.scala | 11 +--
 .../spark/memory/MemoryManagerSuite.scala     | 42 +++++-----
 .../scheduler/BlacklistTrackerSuite.scala     | 23 +++---
 .../OutputCommitCoordinatorSuite.scala        | 46 +++++------
 .../scheduler/SchedulerIntegrationSuite.scala | 10 +--
 .../spark/scheduler/TaskSetManagerSuite.scala | 19 ++---
 .../BypassMergeSortShuffleWriterSuite.scala   | 67 +++++++---------
 .../sort/IndexShuffleBlockResolverSuite.scala |  7 +-
 .../PartiallySerializedBlockSuite.scala       |  9 +--
 .../ShuffleBlockFetcherIteratorSuite.scala    | 79 ++++++++-----------
 .../apache/spark/util/ThreadUtilsSuite.scala  | 14 +---
 .../util/collection/AppendOnlyMapSuite.scala  | 12 +--
 .../util/collection/ExternalSorterSuite.scala | 15 +---
 .../spark/util/collection/SorterSuite.scala   | 20 ++---
 .../unsafe/sort/RadixSortSuite.scala          | 13 ++-
 .../sql/kafka010/KafkaOffsetReader.scala      | 16 ++--
 .../sql/kafka010/KafkaSourceProvider.scala    |  5 +-
 .../kafka010/DirectKafkaStreamSuite.scala     | 18 ++---
 .../kinesis/KinesisCheckpointerSuite.scala    |  9 +--
 .../graphx/impl/EdgePartitionBuilder.scala    | 14 ++--
 .../spark/repl/ExecutorClassLoaderSuite.scala | 13 ++-
 .../spark/repl/SingletonReplSuite.scala       |  4 +-
 .../k8s/submit/LoggingPodStatusWatcher.scala  |  4 +-
 .../k8s/ExecutorPodsSnapshotsStoreImpl.scala  |  6 +-
 .../KubernetesFeaturesTestUtils.scala         | 21 +++--
 .../k8s/ExecutorPodsAllocatorSuite.scala      | 15 ++--
 .../ExecutorPodsLifecycleManagerSuite.scala   | 14 ++--
 .../spark/deploy/yarn/YarnAllocator.scala     | 66 ++++++++--------
 .../expressions/collectionOperations.scala    | 46 +++++------
 .../spark/sql/catalyst/util/package.scala     |  8 +-
 .../apache/spark/sql/types/BinaryType.scala   |  8 +-
 .../apache/spark/sql/types/DoubleType.scala   |  7 +-
 .../apache/spark/sql/types/FloatType.scala    |  7 +-
 .../catalog/ExternalCatalogEventSuite.scala   |  9 +--
 .../spark/sql/execution/SortPrefixUtils.scala | 11 +--
 .../spark/sql/execution/command/ddl.scala     | 23 +++---
 .../datasources/DataSourceStrategy.scala      | 36 ++++-----
 .../streaming/CheckpointFileManager.scala     | 16 ++--
 .../streaming/CompactibleFileStreamLog.scala  | 26 +++---
 .../sources/RateStreamProvider.scala          | 18 ++---
 .../sources/TextSocketMicroBatchStream.scala  | 31 ++++----
 .../sources/TextSocketSourceProvider.scala    | 16 ++--
 .../execution/ui/SQLAppStatusListener.scala   |  7 +-
 .../spark/sql/internal/SharedState.scala      |  8 +-
 .../org/apache/spark/sql/DatasetSuite.scala   |  7 +-
 .../sql/execution/QueryExecutionSuite.scala   | 19 ++---
 .../execution/benchmark/SortBenchmark.scala   | 13 +--
 .../spark/sql/streaming/StreamTest.scala      |  7 +-
 .../test/DataStreamReaderWriterSuite.scala    |  6 +-
 .../sql/streaming/util/BlockingSource.scala   |  6 +-
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 22 +++---
 .../streaming/scheduler/ReceiverTracker.scala | 18 ++---
 .../streaming/util/BatchedWriteAheadLog.scala | 18 ++---
 78 files changed, 543 insertions(+), 849 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
index 6439ca5db06e..9f59295059d3 100644
--- a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
@@ -19,7 +19,7 @@ package org.apache.spark
 
 import java.util.{Timer, TimerTask}
 import java.util.concurrent.ConcurrentHashMap
-import java.util.function.{Consumer, Function}
+import java.util.function.Consumer
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -202,10 +202,8 @@ private[spark] class BarrierCoordinator(
     case request @ RequestToSync(numTasks, stageId, stageAttemptId, _, _) =>
       // Get or init the ContextBarrierState correspond to the stage attempt.
       val barrierId = ContextBarrierId(stageId, stageAttemptId)
-      states.computeIfAbsent(barrierId, new Function[ContextBarrierId, ContextBarrierState] {
-        override def apply(key: ContextBarrierId): ContextBarrierState =
-          new ContextBarrierState(key, numTasks)
-      })
+      states.computeIfAbsent(barrierId,
+        (key: ContextBarrierId) => new ContextBarrierState(key, numTasks))
       val barrierState = states.get(barrierId)
 
       barrierState.handleRequest(context, request)
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index 305ec46a364a..a111a60d1d02 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -123,9 +123,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
     cleaningThread.setDaemon(true)
     cleaningThread.setName("Spark Context Cleaner")
     cleaningThread.start()
-    periodicGCService.scheduleAtFixedRate(new Runnable {
-      override def run(): Unit = System.gc()
-    }, periodicGCInterval, periodicGCInterval, TimeUnit.SECONDS)
+    periodicGCService.scheduleAtFixedRate(() => System.gc(),
+      periodicGCInterval, periodicGCInterval, TimeUnit.SECONDS)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index ec3a184bc16c..f7e3103f2502 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -98,11 +98,9 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
   private val killExecutorThread = ThreadUtils.newDaemonSingleThreadExecutor("kill-executor-thread")
 
   override def onStart(): Unit = {
-    timeoutCheckingTask = eventLoopThread.scheduleAtFixedRate(new Runnable {
-      override def run(): Unit = Utils.tryLogNonFatalError {
-        Option(self).foreach(_.ask[Boolean](ExpireDeadHosts))
-      }
-    }, 0, checkTimeoutIntervalMs, TimeUnit.MILLISECONDS)
+    timeoutCheckingTask = eventLoopThread.scheduleAtFixedRate(
+      () => Utils.tryLogNonFatalError { Option(self).foreach(_.ask[Boolean](ExpireDeadHosts)) },
+      0, checkTimeoutIntervalMs, TimeUnit.MILLISECONDS)
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 7050396e84a8..8499246a6c9c 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -62,9 +62,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
 
   @transient private lazy val reader: ConfigReader = {
     val _reader = new ConfigReader(new SparkConfigProvider(settings))
-    _reader.bindEnv(new ConfigProvider {
-      override def get(key: String): Option[String] = Option(getenv(key))
-    })
+    _reader.bindEnv((key: String) => Option(getenv(key)))
     _reader
   }
 
@@ -392,7 +390,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
 
   /** Get an optional value, applying variable substitution. */
   private[spark] def getWithSubstitution(key: String): Option[String] = {
-    getOption(key).map(reader.substitute(_))
+    getOption(key).map(reader.substitute)
   }
 
   /** Get all parameters as a list of pairs */
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index ccb30e205ca4..f5e8cfff2ad1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -60,11 +60,7 @@ object PythonRunner {
       .javaAddress(localhost)
       .callbackClient(py4j.GatewayServer.DEFAULT_PYTHON_PORT, localhost, secret)
       .build()
-    val thread = new Thread(new Runnable() {
-      override def run(): Unit = Utils.logUncaughtExceptions {
-        gatewayServer.start()
-      }
-    })
+    val thread = new Thread(() => Utils.logUncaughtExceptions { gatewayServer.start() })
     thread.setName("py4j-gateway-init")
     thread.setDaemon(true)
     thread.start()
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index a97d0721de7b..11420bb98552 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -20,7 +20,7 @@ package org.apache.spark.deploy
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream, File, IOException}
 import java.security.PrivilegedExceptionAction
 import java.text.DateFormat
-import java.util.{Arrays, Comparator, Date, Locale}
+import java.util.{Arrays, Date, Locale}
 
 import scala.collection.JavaConverters._
 import scala.collection.immutable.Map
@@ -270,11 +270,8 @@ private[spark] class SparkHadoopUtil extends Logging {
             name.startsWith(prefix) && !name.endsWith(exclusionSuffix)
           }
         })
-      Arrays.sort(fileStatuses, new Comparator[FileStatus] {
-        override def compare(o1: FileStatus, o2: FileStatus): Int = {
-          Longs.compare(o1.getModificationTime, o2.getModificationTime)
-        }
-      })
+      Arrays.sort(fileStatuses, (o1: FileStatus, o2: FileStatus) =>
+        Longs.compare(o1.getModificationTime, o2.getModificationTime))
       fileStatuses
     } catch {
       case NonFatal(e) =>
@@ -465,7 +462,7 @@ private[spark] object SparkHadoopUtil {
   // scalastyle:on line.size.limit
   def createNonECFile(fs: FileSystem, path: Path): FSDataOutputStream = {
     try {
-      // Use reflection as this uses apis only avialable in hadoop 3
+      // Use reflection as this uses APIs only available in Hadoop 3
       val builderMethod = fs.getClass().getMethod("createFile", classOf[Path])
       // the builder api does not resolve relative paths, nor does it create parent dirs, while
       // the old api does.
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 56aea0c53d49..98265ff47592 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -186,13 +186,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
    * Return a runnable that performs the given operation on the event logs.
    * This operation is expected to be executed periodically.
    */
-  private def getRunner(operateFun: () => Unit): Runnable = {
-    new Runnable() {
-      override def run(): Unit = Utils.tryOrExit {
-        operateFun()
-      }
-    }
-  }
+  private def getRunner(operateFun: () => Unit): Runnable =
+    () => Utils.tryOrExit { operateFun() }
 
   /**
    * Fixed size thread pool to fetch and parse log files.
@@ -221,29 +216,25 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     // Cannot probe anything while the FS is in safe mode, so spawn a new thread that will wait
     // for the FS to leave safe mode before enabling polling. This allows the main history server
     // UI to be shown (so that the user can see the HDFS status).
-    val initThread = new Thread(new Runnable() {
-      override def run(): Unit = {
-        try {
-          while (isFsInSafeMode()) {
-            logInfo("HDFS is still in safe mode. Waiting...")
-            val deadline = clock.getTimeMillis() +
-              TimeUnit.SECONDS.toMillis(SAFEMODE_CHECK_INTERVAL_S)
-            clock.waitTillTime(deadline)
-          }
-          startPolling()
-        } catch {
-          case _: InterruptedException =>
+    val initThread = new Thread(() => {
+      try {
+        while (isFsInSafeMode()) {
+          logInfo("HDFS is still in safe mode. Waiting...")
+          val deadline = clock.getTimeMillis() +
+            TimeUnit.SECONDS.toMillis(SAFEMODE_CHECK_INTERVAL_S)
+          clock.waitTillTime(deadline)
         }
+        startPolling()
+      } catch {
+        case _: InterruptedException =>
       }
     })
     initThread.setDaemon(true)
     initThread.setName(s"${getClass().getSimpleName()}-init")
     initThread.setUncaughtExceptionHandler(errorHandler.getOrElse(
-      new Thread.UncaughtExceptionHandler() {
-        override def uncaughtException(t: Thread, e: Throwable): Unit = {
-          logError("Error initializing FsHistoryProvider.", e)
-          System.exit(1)
-        }
+      (_: Thread, e: Throwable) => {
+        logError("Error initializing FsHistoryProvider.", e)
+        System.exit(1)
       }))
     initThread.start()
     initThread
@@ -517,9 +508,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
       val tasks = updated.flatMap { entry =>
         try {
-          val task: Future[Unit] = replayExecutor.submit(new Runnable {
-            override def run(): Unit = mergeApplicationListing(entry, newLastScanTime, true)
-          }, Unit)
+          val task: Future[Unit] = replayExecutor.submit(
+            () => mergeApplicationListing(entry, newLastScanTime, true))
           Some(task -> entry.getPath)
         } catch {
           // let the iteration over the updated entries break, since an exception on
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 50e282f35090..5db9b529a2dc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -150,11 +150,9 @@ private[deploy] class Master(
       logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " +
        s"Applications UIs are available at $masterWebUiUrl")
     }
-    checkForWorkerTimeOutTask = forwardMessageThread.scheduleAtFixedRate(new Runnable {
-      override def run(): Unit = Utils.tryLogNonFatalError {
-        self.send(CheckForWorkerTimeOut)
-      }
-    }, 0, workerTimeoutMs, TimeUnit.MILLISECONDS)
+    checkForWorkerTimeOutTask = forwardMessageThread.scheduleAtFixedRate(
+      () => Utils.tryLogNonFatalError { self.send(CheckForWorkerTimeOut) },
+      0, workerTimeoutMs, TimeUnit.MILLISECONDS)
 
     if (restServerEnabled) {
       val port = conf.get(MASTER_REST_SERVER_PORT)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index a0664b3fdd2e..eb2add3af825 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -325,11 +325,9 @@ private[deploy] class Worker(
         if (connectionAttemptCount == INITIAL_REGISTRATION_RETRIES) {
           registrationRetryTimer.foreach(_.cancel(true))
           registrationRetryTimer = Some(
-            forwardMessageScheduler.scheduleAtFixedRate(new Runnable {
-              override def run(): Unit = Utils.tryLogNonFatalError {
-                self.send(ReregisterWithMaster)
-              }
-            }, PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
+            forwardMessageScheduler.scheduleAtFixedRate(
+              () => Utils.tryLogNonFatalError { self.send(ReregisterWithMaster) },
+              PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
               PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
               TimeUnit.SECONDS))
         }
@@ -341,7 +339,7 @@ private[deploy] class Worker(
   }
 
   /**
-   * Cancel last registeration retry, or do nothing if no retry
+   * Cancel last registration retry, or do nothing if no retry
    */
   private def cancelLastRegistrationRetry(): Unit = {
     if (registerMasterFutures != null) {
@@ -361,11 +359,7 @@ private[deploy] class Worker(
         registerMasterFutures = tryRegisterAllMasters()
         connectionAttemptCount = 0
         registrationRetryTimer = Some(forwardMessageScheduler.scheduleAtFixedRate(
-          new Runnable {
-            override def run(): Unit = Utils.tryLogNonFatalError {
-              Option(self).foreach(_.send(ReregisterWithMaster))
-            }
-          },
+          () => Utils.tryLogNonFatalError { Option(self).foreach(_.send(ReregisterWithMaster)) },
           INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
           INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
           TimeUnit.SECONDS))
@@ -407,19 +401,15 @@ private[deploy] class Worker(
         }
         registered = true
         changeMaster(masterRef, masterWebUiUrl, masterAddress)
-        forwardMessageScheduler.scheduleAtFixedRate(new Runnable {
-          override def run(): Unit = Utils.tryLogNonFatalError {
-            self.send(SendHeartbeat)
-          }
-        }, 0, HEARTBEAT_MILLIS, TimeUnit.MILLISECONDS)
+        forwardMessageScheduler.scheduleAtFixedRate(
+          () => Utils.tryLogNonFatalError { self.send(SendHeartbeat) },
+          0, HEARTBEAT_MILLIS, TimeUnit.MILLISECONDS)
         if (CLEANUP_ENABLED) {
           logInfo(
             s"Worker cleanup enabled; old application directories will be deleted in: $workDir")
-          forwardMessageScheduler.scheduleAtFixedRate(new Runnable {
-            override def run(): Unit = Utils.tryLogNonFatalError {
-              self.send(WorkDirCleanup)
-            }
-          }, CLEANUP_INTERVAL_MILLIS, CLEANUP_INTERVAL_MILLIS, TimeUnit.MILLISECONDS)
+          forwardMessageScheduler.scheduleAtFixedRate(
+            () => Utils.tryLogNonFatalError { self.send(WorkDirCleanup) },
+            CLEANUP_INTERVAL_MILLIS, CLEANUP_INTERVAL_MILLIS, TimeUnit.MILLISECONDS)
         }
 
         val execs = executors.values.map { e =>
@@ -568,7 +558,7 @@ private[deploy] class Worker(
         }
       }
 
-    case executorStateChanged @ ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
+    case executorStateChanged: ExecutorStateChanged =>
       handleExecutorStateChanged(executorStateChanged)
 
     case KillExecutor(masterUrl, appId, execId) =>
@@ -632,7 +622,7 @@ private[deploy] class Worker(
 
   override def onDisconnected(remoteAddress: RpcAddress): Unit = {
     if (master.exists(_.address == remoteAddress) ||
-      masterAddressToConnect.exists(_ == remoteAddress)) {
+        masterAddressToConnect.contains(remoteAddress)) {
       logInfo(s"$remoteAddress Disassociated !")
       masterDisconnected()
     }
@@ -815,7 +805,7 @@ private[deploy] object Worker extends Logging {
     val systemName = SYSTEM_NAME + workerNumber.map(_.toString).getOrElse("")
     val securityMgr = new SecurityManager(conf)
     val rpcEnv = RpcEnv.create(systemName, host, port, conf, securityMgr)
-    val masterAddresses = masterUrls.map(RpcAddress.fromSparkURL(_))
+    val masterAddresses = masterUrls.map(RpcAddress.fromSparkURL)
     rpcEnv.setupEndpoint(ENDPOINT_NAME, new Worker(rpcEnv, webUiPort, cores, memory,
       masterAddresses, ENDPOINT_NAME, workDir, conf, securityMgr))
     rpcEnv
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index d5973158cf73..cc3cc1604d68 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -89,17 +89,14 @@ private[spark] class Executor(
   }
 
   // Start worker thread pool
+  // Use UninterruptibleThread to run tasks so that we can allow running codes without being
+  // interrupted by `Thread.interrupt()`. Some issues, such as KAFKA-1894, HADOOP-10622,
+  // will hang forever if some methods are interrupted.
   private val threadPool = {
     val threadFactory = new ThreadFactoryBuilder()
       .setDaemon(true)
       .setNameFormat("Executor task launch worker-%d")
-      .setThreadFactory(new ThreadFactory {
-        override def newThread(r: Runnable): Thread =
-          // Use UninterruptibleThread to run tasks so that we can allow running codes without being
-          // interrupted by `Thread.interrupt()`. Some issues, such as KAFKA-1894, HADOOP-10622,
-          // will hang forever if some methods are interrupted.
-          new UninterruptibleThread(r, "unused") // thread name will be set by ThreadFactoryBuilder
-      })
+      .setThreadFactory((r: Runnable) => new UninterruptibleThread(r, "unused"))
       .build()
     Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor]
   }
diff --git a/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala b/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
index 1b049b786023..77bbbd9a934c 100644
--- a/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
+++ b/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
@@ -44,7 +44,7 @@ private[spark] abstract class LauncherBackend {
       .map(_.toInt)
     val secret = conf.getOption(LauncherProtocol.CONF_LAUNCHER_SECRET)
       .orElse(sys.env.get(LauncherProtocol.ENV_LAUNCHER_SECRET))
-    if (port != None && secret != None) {
+    if (port.isDefined && secret.isDefined) {
       val s = new Socket(InetAddress.getLoopbackAddress(), port.get)
       connection = new BackendConnection(s)
       connection.send(new Hello(secret.get, SPARK_VERSION))
@@ -94,11 +94,8 @@ private[spark] abstract class LauncherBackend {
   protected def onDisconnected() : Unit = { }
 
   private def fireStopRequest(): Unit = {
-    val thread = LauncherBackend.threadFactory.newThread(new Runnable() {
-      override def run(): Unit = Utils.tryLogNonFatalError {
-        onStopRequest()
-      }
-    })
+    val thread = LauncherBackend.threadFactory.newThread(
+      () => Utils.tryLogNonFatalError { onStopRequest() })
     thread.start()
   }
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 89dd74e13465..8dad42b6096a 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.collection.mutable
 
-import com.codahale.metrics.{Metric, MetricFilter, MetricRegistry}
+import com.codahale.metrics.{Metric, MetricRegistry}
 import org.eclipse.jetty.servlet.ServletContextHandler
 
 import org.apache.spark.{SecurityManager, SparkConf}
@@ -168,9 +168,7 @@ private[spark] class MetricsSystem private (
   def removeSource(source: Source) {
     sources -= source
     val regName = buildRegistryName(source)
-    registry.removeMatching(new MetricFilter {
-      def matches(name: String, metric: Metric): Boolean = name.startsWith(regName)
-    })
+    registry.removeMatching((name: String, _: Metric) => name.startsWith(regName))
   }
 
   private def registerSources() {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 9177c1b56a47..d967d38c5263 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -21,7 +21,6 @@ import java.io.NotSerializableException
 import java.util.Properties
 import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
-import java.util.function.BiFunction
 
 import scala.annotation.tailrec
 import scala.collection.Map
@@ -370,9 +369,10 @@ private[spark] class DAGScheduler(
    * 2. An RDD that depends on multiple barrier RDDs (eg. barrierRdd1.zip(barrierRdd2)).
    */
   private def checkBarrierStageWithRDDChainPattern(rdd: RDD[_], numTasksInStage: Int): Unit = {
-    val predicate: RDD[_] => Boolean = (r =>
-      r.getNumPartitions == numTasksInStage && r.dependencies.filter(_.rdd.isBarrier()).size <= 1)
-    if (rdd.isBarrier() && !traverseParentRDDsWithinStage(rdd, predicate)) {
+    if (rdd.isBarrier() &&
+        !traverseParentRDDsWithinStage(rdd, (r: RDD[_]) =>
+          r.getNumPartitions == numTasksInStage &&
+          r.dependencies.count(_.rdd.isBarrier()) <= 1)) {
       throw new BarrierJobUnsupportedRDDChainException
     }
   }
@@ -692,7 +692,7 @@ private[spark] class DAGScheduler(
     }
 
     val jobId = nextJobId.getAndIncrement()
-    if (partitions.size == 0) {
+    if (partitions.isEmpty) {
       val time = clock.getTimeMillis()
       listenerBus.post(
         SparkListenerJobStart(jobId, time, Seq[StageInfo](), properties))
@@ -702,9 +702,9 @@ private[spark] class DAGScheduler(
       return new JobWaiter[U](this, jobId, 0, resultHandler)
     }
 
-    assert(partitions.size > 0)
+    assert(partitions.nonEmpty)
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
-    val waiter = new JobWaiter(this, jobId, partitions.size, resultHandler)
+    val waiter = new JobWaiter[U](this, jobId, partitions.size, resultHandler)
     eventProcessLoop.post(JobSubmitted(
       jobId, rdd, func2, partitions.toArray, callSite, waiter,
       SerializationUtils.clone(properties)))
@@ -767,9 +767,8 @@ private[spark] class DAGScheduler(
       callSite: CallSite,
       timeout: Long,
       properties: Properties): PartialResult[R] = {
-    val partitions = (0 until rdd.partitions.length).toArray
     val jobId = nextJobId.getAndIncrement()
-    if (partitions.isEmpty) {
+    if (rdd.partitions.isEmpty) {
       // Return immediately if the job is running 0 tasks
       val time = clock.getTimeMillis()
       listenerBus.post(SparkListenerJobStart(jobId, time, Seq[StageInfo](), properties))
@@ -779,7 +778,8 @@ private[spark] class DAGScheduler(
     val listener = new ApproximateActionListener(rdd, func, evaluator, timeout)
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     eventProcessLoop.post(JobSubmitted(
-      jobId, rdd, func2, partitions, callSite, listener, SerializationUtils.clone(properties)))
+      jobId, rdd, func2, rdd.partitions.indices.toArray, callSite, listener,
+      SerializationUtils.clone(properties)))
     listener.awaitResult()    // Will throw an exception if the job fails
   }
 
@@ -812,7 +812,9 @@ private[spark] class DAGScheduler(
     // This makes it easier to avoid race conditions between the user code and the map output
     // tracker that might result if we told the user the stage had finished, but then they queries
     // the map output tracker and some node failures had caused the output statistics to be lost.
-    val waiter = new JobWaiter(this, jobId, 1, (i: Int, r: MapOutputStatistics) => callback(r))
+    val waiter = new JobWaiter[MapOutputStatistics](
+      this, jobId, 1,
+      (_: Int, r: MapOutputStatistics) => callback(r))
     eventProcessLoop.post(MapStageSubmitted(
       jobId, dependency, callSite, waiter, SerializationUtils.clone(properties)))
     waiter
@@ -870,7 +872,7 @@ private[spark] class DAGScheduler(
    * the last fetch failure.
    */
   private[scheduler] def resubmitFailedStages() {
-    if (failedStages.size > 0) {
+    if (failedStages.nonEmpty) {
       // Failed stages may be removed by job cancellation, so failed might be empty even if
       // the ResubmitFailedStages event has been scheduled.
       logInfo("Resubmitting failed stages")
@@ -982,9 +984,7 @@ private[spark] class DAGScheduler(
           "than the total number of slots in the cluster currently.")
         // If jobId doesn't exist in the map, Scala coverts its value null to 0: Int automatically.
         val numCheckFailures = barrierJobIdToNumTasksCheckFailures.compute(jobId,
-          new BiFunction[Int, Int, Int] {
-            override def apply(key: Int, value: Int): Int = value + 1
-          })
+          (_: Int, value: Int) => value + 1)
         if (numCheckFailures <= maxFailureNumTasksCheck) {
           messageScheduler.schedule(
             new Runnable {
@@ -1227,7 +1227,7 @@ private[spark] class DAGScheduler(
         return
     }
 
-    if (tasks.size > 0) {
+    if (tasks.nonEmpty) {
       logInfo(s"Submitting ${tasks.size} missing tasks from $stage (${stage.rdd}) (first 15 " +
         s"tasks are for partitions ${tasks.take(15).map(_.partitionId)})")
       taskScheduler.submitTasks(new TaskSet(
@@ -1942,7 +1942,7 @@ private[spark] class DAGScheduler(
       job: ActiveJob,
       failureReason: String,
       exception: Option[Throwable] = None): Unit = {
-    val error = new SparkException(failureReason, exception.getOrElse(null))
+    val error = new SparkException(failureReason, exception.orNull)
     var ableToCancelStages = true
 
     // Cancel all independent, running stages.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index a284f7956cd3..c6dedaaa9554 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -80,7 +80,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               logDebug("Fetching indirect task result for TID %s".format(tid))
               scheduler.handleTaskGettingResult(taskSetManager, tid)
               val serializedTaskResult = sparkEnv.blockManager.getRemoteBytes(blockId)
-              if (!serializedTaskResult.isDefined) {
+              if (serializedTaskResult.isEmpty) {
                 /* We won't be able to get the task result if the machine that ran the task failed
                  * between when the task ended and when we tried to fetch the result, or if the
                  * block manager had to flush the result. */
@@ -128,27 +128,25 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
     serializedData: ByteBuffer) {
     var reason : TaskFailedReason = UnknownReason
     try {
-      getTaskResultExecutor.execute(new Runnable {
-        override def run(): Unit = Utils.logUncaughtExceptions {
-          val loader = Utils.getContextOrSparkClassLoader
-          try {
-            if (serializedData != null && serializedData.limit() > 0) {
-              reason = serializer.get().deserialize[TaskFailedReason](
-                serializedData, loader)
-            }
-          } catch {
-            case cnd: ClassNotFoundException =>
-              // Log an error but keep going here -- the task failed, so not catastrophic
-              // if we can't deserialize the reason.
-              logError(
-                "Could not deserialize TaskEndReason: ClassNotFound with classloader " + loader)
-            case ex: Exception => // No-op
-          } finally {
-            // If there's an error while deserializing the TaskEndReason, this Runnable
-            // will die. Still tell the scheduler about the task failure, to avoid a hang
-            // where the scheduler thinks the task is still running.
-            scheduler.handleFailedTask(taskSetManager, tid, taskState, reason)
+      getTaskResultExecutor.execute(() => Utils.logUncaughtExceptions {
+        val loader = Utils.getContextOrSparkClassLoader
+        try {
+          if (serializedData != null && serializedData.limit() > 0) {
+            reason = serializer.get().deserialize[TaskFailedReason](
+              serializedData, loader)
           }
+        } catch {
+          case _: ClassNotFoundException =>
+            // Log an error but keep going here -- the task failed, so not catastrophic
+            // if we can't deserialize the reason.
+            logError(
+              "Could not deserialize TaskEndReason: ClassNotFound with classloader " + loader)
+          case _: Exception => // No-op
+        } finally {
+          // If there's an error while deserializing the TaskEndReason, this Runnable
+          // will die. Still tell the scheduler about the task failure, to avoid a hang
+          // where the scheduler thinks the task is still running.
+          scheduler.handleFailedTask(taskSetManager, tid, taskState, reason)
         }
       })
     } catch {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 1ef3566af9b0..bffa1ffc5d39 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -53,7 +53,7 @@ import org.apache.spark.util.{AccumulatorV2, SystemClock, ThreadUtils, Utils}
  * we are holding a lock on ourselves.  This class is called from many threads, notably:
  *   * The DAGScheduler Event Loop
  *   * The RPCHandler threads, responding to status updates from Executors
- *   * Periodic revival of all offers from the CoarseGrainedSchedulerBackend, to accomodate delay
+ *   * Periodic revival of all offers from the CoarseGrainedSchedulerBackend, to accommodate delay
  *      scheduling
  *   * task-result-getter threads
  */
@@ -194,11 +194,9 @@ private[spark] class TaskSchedulerImpl(
 
     if (!isLocal && conf.get(SPECULATION_ENABLED)) {
       logInfo("Starting speculative execution thread")
-      speculationScheduler.scheduleWithFixedDelay(new Runnable {
-        override def run(): Unit = Utils.tryOrStopSparkContext(sc) {
-          checkSpeculatableTasks()
-        }
-      }, SPECULATION_INTERVAL_MS, SPECULATION_INTERVAL_MS, TimeUnit.MILLISECONDS)
+      speculationScheduler.scheduleWithFixedDelay(
+        () => Utils.tryOrStopSparkContext(sc) { checkSpeculatableTasks() },
+        SPECULATION_INTERVAL_MS, SPECULATION_INTERVAL_MS, TimeUnit.MILLISECONDS)
     }
   }
 
@@ -373,7 +371,7 @@ private[spark] class TaskSchedulerImpl(
         }
       }
     }
-    return launchedTask
+    launchedTask
   }
 
   /**
@@ -527,7 +525,7 @@ private[spark] class TaskSchedulerImpl(
 
     // TODO SPARK-24823 Cancel a job that contains barrier stage(s) if the barrier tasks don't get
     // launched within a configured time.
-    if (tasks.size > 0) {
+    if (tasks.nonEmpty) {
       hasLaunchedTask = true
     }
     return tasks
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 808ef0836121..4830d0e6f800 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.TimeUnit
 import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 import javax.annotation.concurrent.GuardedBy
 
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.collection.mutable.{HashMap, HashSet}
 import scala.concurrent.Future
 
 import org.apache.hadoop.security.UserGroupInformation
@@ -133,10 +133,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       // Periodically revive offers to allow delay scheduling to work
       val reviveIntervalMs = conf.get(SCHEDULER_REVIVE_INTERVAL).getOrElse(1000L)
 
-      reviveThread.scheduleAtFixedRate(new Runnable {
-        override def run(): Unit = Utils.tryLogNonFatalError {
-          Option(self).foreach(_.send(ReviveOffers))
-        }
+      reviveThread.scheduleAtFixedRate(() => Utils.tryLogNonFatalError {
+        Option(self).foreach(_.send(ReviveOffers))
       }, 0, reviveIntervalMs, TimeUnit.MILLISECONDS)
     }
 
@@ -268,7 +266,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         }.toIndexedSeq
         scheduler.resourceOffers(workOffers)
       }
-      if (!taskDescs.isEmpty) {
+      if (taskDescs.nonEmpty) {
         launchTasks(taskDescs)
       }
     }
@@ -296,7 +294,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           Seq.empty
         }
       }
-      if (!taskDescs.isEmpty) {
+      if (taskDescs.nonEmpty) {
         launchTasks(taskDescs)
       }
     }
@@ -669,7 +667,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         }
 
       val killExecutors: Boolean => Future[Boolean] =
-        if (!executorsToKill.isEmpty) {
+        if (executorsToKill.nonEmpty) {
           _ => doKillExecutors(executorsToKill)
         } else {
           _ => Future.successful(false)
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 71e0390f39a4..a3e82424be6e 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -19,7 +19,6 @@ package org.apache.spark.status
 
 import java.util.Date
 import java.util.concurrent.ConcurrentHashMap
-import java.util.function.Function
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
@@ -840,11 +839,11 @@ private[spark] class AppStatusListener(
     // check if there is a new peak value for any of the executor level memory metrics,
     // while reading from the log. SparkListenerStageExecutorMetrics are only processed
     // when reading logs.
-    liveExecutors.get(executorMetrics.execId)
-      .orElse(deadExecutors.get(executorMetrics.execId)).map { exec =>
-      if (exec.peakExecutorMetrics.compareAndUpdatePeakValues(executorMetrics.executorMetrics)) {
-        update(exec, now)
-      }
+    liveExecutors.get(executorMetrics.execId).orElse(
+      deadExecutors.get(executorMetrics.execId)).foreach { exec =>
+        if (exec.peakExecutorMetrics.compareAndUpdatePeakValues(executorMetrics.executorMetrics)) {
+          update(exec, now)
+        }
     }
   }
 
@@ -1048,9 +1047,7 @@ private[spark] class AppStatusListener(
 
   private def getOrCreateStage(info: StageInfo): LiveStage = {
     val stage = liveStages.computeIfAbsent((info.stageId, info.attemptNumber),
-      new Function[(Int, Int), LiveStage]() {
-        override def apply(key: (Int, Int)): LiveStage = new LiveStage()
-      })
+      (_: (Int, Int)) => new LiveStage())
     stage.info = info
     stage
   }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 4806c1396725..3f3b7d20eb16 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -143,12 +143,10 @@ private[spark] class ExternalSorter[K, V, C](
   // user. (A partial ordering means that equal keys have comparator.compare(k, k) = 0, but some
   // non-equal keys also have this, so we need to do a later pass to find truly equal keys).
   // Note that we ignore this if no aggregator and no ordering are given.
-  private val keyComparator: Comparator[K] = ordering.getOrElse(new Comparator[K] {
-    override def compare(a: K, b: K): Int = {
-      val h1 = if (a == null) 0 else a.hashCode()
-      val h2 = if (b == null) 0 else b.hashCode()
-      if (h1 < h2) -1 else if (h1 == h2) 0 else 1
-    }
+  private val keyComparator: Comparator[K] = ordering.getOrElse((a: K, b: K) => {
+    val h1 = if (a == null) 0 else a.hashCode()
+    val h2 = if (b == null) 0 else b.hashCode()
+    if (h1 < h2) -1 else if (h1 == h2) 0 else 1
   })
 
   private def comparator: Option[Comparator[K]] = {
@@ -363,17 +361,15 @@ private[spark] class ExternalSorter[K, V, C](
    * Merge-sort a sequence of (K, C) iterators using a given a comparator for the keys.
    */
   private def mergeSort(iterators: Seq[Iterator[Product2[K, C]]], comparator: Comparator[K])
-      : Iterator[Product2[K, C]] =
-  {
+      : Iterator[Product2[K, C]] = {
     val bufferedIters = iterators.filter(_.hasNext).map(_.buffered)
     type Iter = BufferedIterator[Product2[K, C]]
-    val heap = new mutable.PriorityQueue[Iter]()(new Ordering[Iter] {
-      // Use the reverse order because PriorityQueue dequeues the max
-      override def compare(x: Iter, y: Iter): Int = comparator.compare(y.head._1, x.head._1)
-    })
+    // Use the reverse order (compare(y,x)) because PriorityQueue dequeues the max
+    val heap = new mutable.PriorityQueue[Iter]()(
+      (x: Iter, y: Iter) => comparator.compare(y.head._1, x.head._1))
     heap.enqueue(bufferedIters: _*)  // Will contain only the iterators with hasNext = true
     new Iterator[Product2[K, C]] {
-      override def hasNext: Boolean = !heap.isEmpty
+      override def hasNext: Boolean = heap.nonEmpty
 
       override def next(): Product2[K, C] = {
         if (!hasNext) {
@@ -400,13 +396,12 @@ private[spark] class ExternalSorter[K, V, C](
       mergeCombiners: (C, C) => C,
       comparator: Comparator[K],
       totalOrder: Boolean)
-      : Iterator[Product2[K, C]] =
-  {
+      : Iterator[Product2[K, C]] = {
     if (!totalOrder) {
       // We only have a partial ordering, e.g. comparing the keys by hash code, which means that
       // multiple distinct keys might be treated as equal by the ordering. To deal with this, we
       // need to read all keys considered equal by the ordering at once and compare them.
-      new Iterator[Iterator[Product2[K, C]]] {
+      val it = new Iterator[Iterator[Product2[K, C]]] {
         val sorted = mergeSort(iterators, comparator).buffered
 
         // Buffers reused across elements to decrease memory allocation
@@ -446,7 +441,8 @@ private[spark] class ExternalSorter[K, V, C](
           // equal by the partial order; we flatten this below to get a flat iterator of (K, C).
           keys.iterator.zip(combiners.iterator)
         }
-      }.flatMap(i => i)
+      }
+      it.flatten
     } else {
       // We have a total ordering, so the objects with the same key are sequential.
       new Iterator[Product2[K, C]] {
@@ -650,7 +646,7 @@ private[spark] class ExternalSorter[K, V, C](
     if (spills.isEmpty) {
       // Special case: if we have only in-memory data, we don't need to merge streams, and perhaps
       // we don't even need to sort by anything other than partition ID
-      if (!ordering.isDefined) {
+      if (ordering.isEmpty) {
         // The user hasn't requested sorted keys, so only sort by partition ID, not key
         groupByPartition(destructiveIterator(collection.partitionedDestructiveSortedIterator(None)))
       } else {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala b/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
index 5232c2bd8d6f..dd7f68fd038d 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
@@ -68,27 +68,20 @@ private[spark] object WritablePartitionedPairCollection {
   /**
    * A comparator for (Int, K) pairs that orders them by only their partition ID.
    */
-  def partitionComparator[K]: Comparator[(Int, K)] = new Comparator[(Int, K)] {
-    override def compare(a: (Int, K), b: (Int, K)): Int = {
-      a._1 - b._1
-    }
-  }
+  def partitionComparator[K]: Comparator[(Int, K)] = (a: (Int, K), b: (Int, K)) => a._1 - b._1
 
   /**
    * A comparator for (Int, K) pairs that orders them both by their partition ID and a key ordering.
    */
-  def partitionKeyComparator[K](keyComparator: Comparator[K]): Comparator[(Int, K)] = {
-    new Comparator[(Int, K)] {
-      override def compare(a: (Int, K), b: (Int, K)): Int = {
-        val partitionDiff = a._1 - b._1
-        if (partitionDiff != 0) {
-          partitionDiff
-        } else {
-          keyComparator.compare(a._2, b._2)
-        }
+  def partitionKeyComparator[K](keyComparator: Comparator[K]): Comparator[(Int, K)] =
+    (a: (Int, K), b: (Int, K)) => {
+      val partitionDiff = a._1 - b._1
+      if (partitionDiff != 0) {
+        partitionDiff
+      } else {
+        keyComparator.compare(a._2, b._2)
       }
     }
-  }
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
index bea18a3df478..c4540433bce9 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
@@ -39,7 +39,7 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
   private val DEFAULT_LAYOUT = "%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n"
   private val LOG_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort)
 
-  private var localLogFile: String = FileUtils.getFile(
+  private val localLogFile: String = FileUtils.getFile(
     Utils.getLocalDir(conf),
     DriverLogger.DRIVER_LOG_DIR,
     DriverLogger.DRIVER_LOG_FILE).getAbsolutePath()
@@ -163,9 +163,7 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
 
     def closeWriter(): Unit = {
       try {
-        threadpool.execute(new Runnable() {
-          override def run(): Unit = DfsAsyncWriter.this.close()
-        })
+        threadpool.execute(() => DfsAsyncWriter.this.close())
         threadpool.shutdown()
         threadpool.awaitTermination(1, TimeUnit.MINUTES)
       } catch {
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 1123191c43c9..4b2dd9a5b69f 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -164,10 +164,9 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
 
   test("Thread safeness - SPARK-5425") {
     val executor = Executors.newSingleThreadScheduledExecutor()
-    val sf = executor.scheduleAtFixedRate(new Runnable {
-      override def run(): Unit =
-        System.setProperty("spark.5425." + Random.nextInt(), Random.nextInt().toString)
-    }, 0, 1, TimeUnit.MILLISECONDS)
+    executor.scheduleAtFixedRate(
+      () => System.setProperty("spark.5425." + Random.nextInt(), Random.nextInt().toString),
+      0, 1, TimeUnit.MILLISECONDS)
 
     try {
       val t0 = System.nanoTime()
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
index 0402d949e904..1148446c9faa 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
@@ -27,7 +27,6 @@ import org.eclipse.jetty.servlet.ServletContextHandler
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.Matchers
 import org.scalatest.mockito.MockitoSugar
 
@@ -374,11 +373,9 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
     when(request.getRequestURI()).thenReturn("http://localhost:18080/history/local-123/jobs/job/")
     when(request.getQueryString()).thenReturn("id=2")
     val resp = mock[HttpServletResponse]
-    when(resp.encodeRedirectURL(any())).thenAnswer(new Answer[String]() {
-      override def answer(invocationOnMock: InvocationOnMock): String = {
-        invocationOnMock.getArguments()(0).asInstanceOf[String]
-      }
-    })
+    when(resp.encodeRedirectURL(any())).thenAnswer { (invocationOnMock: InvocationOnMock) =>
+      invocationOnMock.getArguments()(0).asInstanceOf[String]
+    }
     filter.doFilter(request, resp, null)
     verify(resp).sendRedirect("http://localhost:18080/history/local-123/jobs/job/?id=2")
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index bce17334a47b..d183170d251f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -33,7 +33,6 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, FSDataInputStream, Path}
 import org.apache.hadoop.hdfs.{DFSInputStream, DistributedFileSystem}
 import org.apache.hadoop.security.AccessControlException
 import org.json4s.jackson.JsonMethods._
-import org.mockito.ArgumentMatcher
 import org.mockito.ArgumentMatchers.{any, argThat}
 import org.mockito.Mockito.{doThrow, mock, spy, verify, when}
 import org.scalatest.BeforeAndAfter
@@ -49,7 +48,7 @@ import org.apache.spark.io._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.security.GroupMappingServiceProvider
-import org.apache.spark.status.{AppStatusStore, ExecutorSummaryWrapper}
+import org.apache.spark.status.AppStatusStore
 import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
 import org.apache.spark.util.{Clock, JsonProtocol, ManualClock, Utils}
 import org.apache.spark.util.logging.DriverLogger
@@ -1122,11 +1121,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
       SparkListenerApplicationEnd(5L))
     val mockedFs = spy(provider.fs)
     doThrow(new AccessControlException("Cannot read accessDenied file")).when(mockedFs).open(
-      argThat(new ArgumentMatcher[Path]() {
-        override def matches(path: Path): Boolean = {
-          path.asInstanceOf[Path].getName.toLowerCase(Locale.ROOT) == "accessdenied"
-        }
-      }))
+      argThat((path: Path) => path.getName.toLowerCase(Locale.ROOT) == "accessdenied"))
     val mockedProvider = spy(provider)
     when(mockedProvider.fs).thenReturn(mockedFs)
     updateAndCheck(mockedProvider) { list =>
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
index 1deac43897f9..c3b580e7ccac 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
@@ -24,7 +24,6 @@ import scala.concurrent.duration._
 import org.mockito.ArgumentMatchers.{any, anyInt}
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
@@ -57,11 +56,9 @@ class DriverRunnerTest extends SparkFunSuite {
       superviseRetry: Boolean) = {
     val runner = createDriverRunner()
     runner.setSleeper(mock(classOf[Sleeper]))
-    doAnswer(new Answer[Int] {
-      def answer(invocation: InvocationOnMock): Int = {
-        runner.runCommandWithRetry(processBuilder, p => (), supervise = superviseRetry)
-      }
-    }).when(runner).prepareAndRunDriver()
+    doAnswer { (_: InvocationOnMock) =>
+      runner.runCommandWithRetry(processBuilder, p => (), supervise = superviseRetry)
+    }.when(runner).prepareAndRunDriver()
     runner
   }
 
@@ -120,11 +117,9 @@ class DriverRunnerTest extends SparkFunSuite {
     runner.setSleeper(sleeper)
 
     val (processBuilder, process) = createProcessBuilderAndProcess()
-    when(process.waitFor()).thenAnswer(new Answer[Int] {
-      def answer(invocation: InvocationOnMock): Int = {
-        runner.kill()
-        -1
-      }
+    when(process.waitFor()).thenAnswer((_: InvocationOnMock) => {
+      runner.kill()
+      -1
     })
 
     runner.runCommandWithRetry(processBuilder, p => (), supervise = true)
@@ -169,11 +164,9 @@ class DriverRunnerTest extends SparkFunSuite {
     val (processBuilder, process) = createProcessBuilderAndProcess()
     val runner = createTestableDriverRunner(processBuilder, superviseRetry = true)
 
-    when(process.waitFor()).thenAnswer(new Answer[Int] {
-      def answer(invocation: InvocationOnMock): Int = {
-        runner.kill()
-        -1
-      }
+    when(process.waitFor()).thenAnswer((_: InvocationOnMock) => {
+      runner.kill()
+      -1
     })
 
     runner.start()
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
index 168694c81e4a..5e8b363ae822 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -28,7 +28,6 @@ import org.mockito.Answers.RETURNS_SMART_NULLS
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.{BeforeAndAfter, Matchers}
 import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
@@ -233,11 +232,8 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
     val conf = new SparkConf().set(config.STORAGE_CLEANUP_FILES_AFTER_EXECUTOR_EXIT, value)
 
     val cleanupCalled = new AtomicBoolean(false)
-    when(shuffleService.executorRemoved(any[String], any[String])).thenAnswer(new Answer[Unit] {
-      override def answer(invocations: InvocationOnMock): Unit = {
-        cleanupCalled.set(true)
-      }
-    })
+    when(shuffleService.executorRemoved(any[String], any[String])).thenAnswer(
+      (_: InvocationOnMock) => cleanupCalled.set(true))
     val externalShuffleServiceSupplier = new Supplier[ExternalShuffleService] {
       override def get: ExternalShuffleService = shuffleService
     }
@@ -269,11 +265,8 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
     val appId = "app1"
     val execId = "exec1"
     val cleanupCalled = new AtomicBoolean(false)
-    when(shuffleService.applicationRemoved(any[String])).thenAnswer(new Answer[Unit] {
-      override def answer(invocations: InvocationOnMock): Unit = {
-        cleanupCalled.set(true)
-      }
-    })
+    when(shuffleService.applicationRemoved(any[String])).thenAnswer(
+      (_: InvocationOnMock) => cleanupCalled.set(true))
     val externalShuffleServiceSupplier = new Supplier[ExternalShuffleService] {
       override def get: ExternalShuffleService = shuffleService
     }
@@ -289,8 +282,8 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
     }
     executorDir.setLastModified(System.currentTimeMillis - (1000 * 120))
     worker.receive(WorkDirCleanup)
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
-      assert(!executorDir.exists() == true)
+    eventually(timeout(1000.milliseconds), interval(10.milliseconds)) {
+      assert(!executorDir.exists())
       assert(cleanupCalled.get() == dbCleanupEnabled)
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 558cd3626ab9..63a72e208935 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -279,13 +279,10 @@ class ExecutorSuite extends SparkFunSuite
     val heartbeats = ArrayBuffer[Heartbeat]()
     val mockReceiver = mock[RpcEndpointRef]
     when(mockReceiver.askSync(any[Heartbeat], any[RpcTimeout])(any))
-      .thenAnswer(new Answer[HeartbeatResponse] {
-        override def answer(invocation: InvocationOnMock): HeartbeatResponse = {
-          val args = invocation.getArguments()
-          val mock = invocation.getMock
-          heartbeats += args(0).asInstanceOf[Heartbeat]
-          HeartbeatResponse(false)
-        }
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val args = invocation.getArguments()
+        heartbeats += args(0).asInstanceOf[Heartbeat]
+        HeartbeatResponse(false)
       })
     val receiverRef = executorClass.getDeclaredField("heartbeatReceiverRef")
     receiverRef.setAccessible(true)
diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
index 1d5360a58fa1..4a8ba0a1e104 100644
--- a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
@@ -84,11 +84,8 @@ private[memory] trait MemoryManagerSuite extends SparkFunSuite with BeforeAndAft
    */
   protected def makeBadMemoryStore(mm: MemoryManager): MemoryStore = {
     val ms = mock(classOf[MemoryStore], RETURNS_SMART_NULLS)
-    when(ms.evictBlocksToFreeSpace(any(), anyLong(), any())).thenAnswer(new Answer[Long] {
-      override def answer(invocation: InvocationOnMock): Long = {
-        throw new RuntimeException("bad memory store!")
-      }
-    })
+    when(ms.evictBlocksToFreeSpace(any(), anyLong(), any())).thenAnswer(
+      (_: InvocationOnMock) => throw new RuntimeException("bad memory store!"))
     mm.setMemoryStore(ms)
     ms
   }
@@ -106,27 +103,24 @@ private[memory] trait MemoryManagerSuite extends SparkFunSuite with BeforeAndAft
    * records the number of bytes this is called with. This variable is expected to be cleared
    * by the test code later through [[assertEvictBlocksToFreeSpaceCalled]].
    */
-  private def evictBlocksToFreeSpaceAnswer(mm: MemoryManager): Answer[Long] = {
-    new Answer[Long] {
-      override def answer(invocation: InvocationOnMock): Long = {
-        val args = invocation.getArguments
-        val numBytesToFree = args(1).asInstanceOf[Long]
-        assert(numBytesToFree > 0)
-        require(evictBlocksToFreeSpaceCalled.get() === DEFAULT_EVICT_BLOCKS_TO_FREE_SPACE_CALLED,
-          "bad test: evictBlocksToFreeSpace() variable was not reset")
-        evictBlocksToFreeSpaceCalled.set(numBytesToFree)
-        if (numBytesToFree <= mm.storageMemoryUsed) {
-          // We can evict enough blocks to fulfill the request for space
-          mm.releaseStorageMemory(numBytesToFree, mm.tungstenMemoryMode)
-          evictedBlocks += Tuple2(null, BlockStatus(StorageLevel.MEMORY_ONLY, numBytesToFree, 0L))
-          numBytesToFree
-        } else {
-          // No blocks were evicted because eviction would not free enough space.
-          0L
-        }
+  private def evictBlocksToFreeSpaceAnswer(mm: MemoryManager): Answer[Long] =
+    (invocation: InvocationOnMock) => {
+      val args = invocation.getArguments
+      val numBytesToFree = args(1).asInstanceOf[Long]
+      assert(numBytesToFree > 0)
+      require(evictBlocksToFreeSpaceCalled.get() === DEFAULT_EVICT_BLOCKS_TO_FREE_SPACE_CALLED,
+        "bad test: evictBlocksToFreeSpace() variable was not reset")
+      evictBlocksToFreeSpaceCalled.set(numBytesToFree)
+      if (numBytesToFree <= mm.storageMemoryUsed) {
+        // We can evict enough blocks to fulfill the request for space
+        mm.releaseStorageMemory(numBytesToFree, mm.tungstenMemoryMode)
+        evictedBlocks += Tuple2(null, BlockStatus(StorageLevel.MEMORY_ONLY, numBytesToFree, 0L))
+        numBytesToFree
+      } else {
+        // No blocks were evicted because eviction would not free enough space.
+        0L
       }
     }
-  }
 
   /**
    * Assert that [[MemoryStore.evictBlocksToFreeSpace]] is called with the given parameters.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
index 0adfb077964e..93a88cc30a20 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.scheduler
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{never, verify, when}
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.mockito.MockitoSugar
 
@@ -480,17 +479,16 @@ class BlacklistTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with M
   test("blacklisting kills executors, configured by BLACKLIST_KILL_ENABLED") {
     val allocationClientMock = mock[ExecutorAllocationClient]
     when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called"))
-    when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer(new Answer[Boolean] {
+    when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) =>
       // To avoid a race between blacklisting and killing, it is important that the nodeBlacklist
       // is updated before we ask the executor allocation client to kill all the executors
       // on a particular host.
-      override def answer(invocation: InvocationOnMock): Boolean = {
-        if (blacklist.nodeBlacklist.contains("hostA") == false) {
-          throw new IllegalStateException("hostA should be on the blacklist")
-        }
+      if (blacklist.nodeBlacklist.contains("hostA")) {
         true
+      } else {
+        throw new IllegalStateException("hostA should be on the blacklist")
       }
-    })
+    }
     blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
 
     // Disable auto-kill. Blacklist an executor and make sure killExecutors is not called.
@@ -552,17 +550,16 @@ class BlacklistTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with M
   test("fetch failure blacklisting kills executors, configured by BLACKLIST_KILL_ENABLED") {
     val allocationClientMock = mock[ExecutorAllocationClient]
     when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called"))
-    when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer(new Answer[Boolean] {
+    when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) =>
       // To avoid a race between blacklisting and killing, it is important that the nodeBlacklist
       // is updated before we ask the executor allocation client to kill all the executors
       // on a particular host.
-      override def answer(invocation: InvocationOnMock): Boolean = {
-        if (blacklist.nodeBlacklist.contains("hostA") == false) {
-          throw new IllegalStateException("hostA should be on the blacklist")
-        }
+      if (blacklist.nodeBlacklist.contains("hostA")) {
         true
+      } else {
+        throw new IllegalStateException("hostA should be on the blacklist")
       }
-    })
+    }
 
     conf.set(config.BLACKLIST_FETCH_FAILURE_ENABLED, true)
     blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index a560013dba96..2891dd65b064 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -29,7 +29,6 @@ import org.apache.hadoop.mapreduce.TaskType
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{doAnswer, spy, times, verify}
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
@@ -98,34 +97,29 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     // Use Mockito.spy() to maintain the default infrastructure everywhere else
     val mockTaskScheduler = spy(sc.taskScheduler.asInstanceOf[TaskSchedulerImpl])
 
-    doAnswer(new Answer[Unit]() {
-      override def answer(invoke: InvocationOnMock): Unit = {
-        // Submit the tasks, then force the task scheduler to dequeue the
-        // speculated task
-        invoke.callRealMethod()
-        mockTaskScheduler.backend.reviveOffers()
-      }
-    }).when(mockTaskScheduler).submitTasks(any())
-
-    doAnswer(new Answer[TaskSetManager]() {
-      override def answer(invoke: InvocationOnMock): TaskSetManager = {
-        val taskSet = invoke.getArguments()(0).asInstanceOf[TaskSet]
-        new TaskSetManager(mockTaskScheduler, taskSet, 4) {
-          var hasDequeuedSpeculatedTask = false
-          override def dequeueSpeculativeTask(
-              execId: String,
-              host: String,
-              locality: TaskLocality.Value): Option[(Int, TaskLocality.Value)] = {
-            if (!hasDequeuedSpeculatedTask) {
-              hasDequeuedSpeculatedTask = true
-              Some((0, TaskLocality.PROCESS_LOCAL))
-            } else {
-              None
-            }
+    doAnswer { (invoke: InvocationOnMock) =>
+      // Submit the tasks, then force the task scheduler to dequeue the
+      // speculated task
+      invoke.callRealMethod()
+      mockTaskScheduler.backend.reviveOffers()
+    }.when(mockTaskScheduler).submitTasks(any())
+
+    doAnswer { (invoke: InvocationOnMock) =>
+      val taskSet = invoke.getArguments()(0).asInstanceOf[TaskSet]
+      new TaskSetManager(mockTaskScheduler, taskSet, 4) {
+        private var hasDequeuedSpeculatedTask = false
+        override def dequeueSpeculativeTask(execId: String,
+            host: String,
+            locality: TaskLocality.Value): Option[(Int, TaskLocality.Value)] = {
+          if (hasDequeuedSpeculatedTask) {
+            None
+          } else {
+            hasDequeuedSpeculatedTask = true
+            Some((0, TaskLocality.PROCESS_LOCAL))
           }
         }
       }
-    }).when(mockTaskScheduler).createTaskSetManager(any(), any())
+    }.when(mockTaskScheduler).createTaskSetManager(any(), any())
 
     sc.taskScheduler = mockTaskScheduler
     val dagSchedulerWithMockTaskScheduler = new DAGScheduler(sc, mockTaskScheduler)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index aa6db8d0423a..43d6ec1010fc 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -356,13 +356,9 @@ private[spark] abstract class MockBackend(
     assignedTasksWaitingToRun.nonEmpty
   }
 
-  override def start(): Unit = {
-    reviveThread.scheduleAtFixedRate(new Runnable {
-      override def run(): Unit = Utils.tryLogNonFatalError {
-        reviveOffers()
-      }
-    }, 0, reviveIntervalMs, TimeUnit.MILLISECONDS)
-  }
+  override def start(): Unit =
+    reviveThread.scheduleAtFixedRate(() => Utils.tryLogNonFatalError { reviveOffers() },
+      0, reviveIntervalMs, TimeUnit.MILLISECONDS)
 
   override def stop(): Unit = {
     reviveThread.shutdown()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 32a2bdbae7ee..ad03194fe4c3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -25,14 +25,13 @@ import scala.collection.mutable.ArrayBuffer
 import org.mockito.ArgumentMatchers.{any, anyInt, anyString}
 import org.mockito.Mockito.{mock, never, spy, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
 import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.util.{AccumulatorV2, ManualClock, Utils}
+import org.apache.spark.util.{AccumulatorV2, ManualClock}
 
 class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
   extends DAGScheduler(sc) {
@@ -1190,11 +1189,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     val taskSet = FakeTask.createTaskSet(numTasks = 1, stageId = 0, stageAttemptId = 0)
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = new ManualClock(1))
     when(mockDAGScheduler.taskEnded(any(), any(), any(), any(), any())).thenAnswer(
-      new Answer[Unit] {
-        override def answer(invocationOnMock: InvocationOnMock): Unit = {
-          assert(manager.isZombie)
-        }
-      })
+      (invocationOnMock: InvocationOnMock) => assert(manager.isZombie))
     val taskOption = manager.resourceOffer("exec1", "host1", NO_PREF)
     assert(taskOption.isDefined)
     // this would fail, inside our mock dag scheduler, if it calls dagScheduler.taskEnded() too soon
@@ -1317,12 +1312,10 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
     // Assert the task has been black listed on the executor it was last executed on.
     when(taskSetManagerSpy.addPendingTask(anyInt())).thenAnswer(
-      new Answer[Unit] {
-        override def answer(invocationOnMock: InvocationOnMock): Unit = {
-          val task: Int = invocationOnMock.getArgument(0)
-          assert(taskSetManager.taskSetBlacklistHelperOpt.get.
-            isExecutorBlacklistedForTask(exec, task))
-        }
+      (invocationOnMock: InvocationOnMock) => {
+        val task: Int = invocationOnMock.getArgument(0)
+        assert(taskSetManager.taskSetBlacklistHelperOpt.get.
+          isExecutorBlacklistedForTask(exec, task))
       }
     )
 
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
index 72a1a4fb500f..fc1422dfaac7 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
@@ -28,7 +28,6 @@ import org.mockito.Answers.RETURNS_SMART_NULLS
 import org.mockito.ArgumentMatchers.{any, anyInt}
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark._
@@ -69,16 +68,14 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     when(dependency.serializer).thenReturn(new JavaSerializer(conf))
     when(taskContext.taskMetrics()).thenReturn(taskMetrics)
     when(blockResolver.getDataFile(0, 0)).thenReturn(outputFile)
-    doAnswer(new Answer[Void] {
-      def answer(invocationOnMock: InvocationOnMock): Void = {
-        val tmp: File = invocationOnMock.getArguments()(3).asInstanceOf[File]
-        if (tmp != null) {
-          outputFile.delete
-          tmp.renameTo(outputFile)
-        }
-        null
+    doAnswer { (invocationOnMock: InvocationOnMock) =>
+      val tmp = invocationOnMock.getArguments()(3).asInstanceOf[File]
+      if (tmp != null) {
+        outputFile.delete
+        tmp.renameTo(outputFile)
       }
-    }).when(blockResolver)
+      null
+    }.when(blockResolver)
       .writeIndexFileAndCommit(anyInt, anyInt, any(classOf[Array[Long]]), any(classOf[File]))
     when(blockManager.diskBlockManager).thenReturn(diskBlockManager)
     when(blockManager.getDiskWriter(
@@ -87,37 +84,29 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
       any[SerializerInstance],
       anyInt(),
       any[ShuffleWriteMetrics]
-    )).thenAnswer(new Answer[DiskBlockObjectWriter] {
-      override def answer(invocation: InvocationOnMock): DiskBlockObjectWriter = {
-        val args = invocation.getArguments
-        val manager = new SerializerManager(new JavaSerializer(conf), conf)
-        new DiskBlockObjectWriter(
-          args(1).asInstanceOf[File],
-          manager,
-          args(2).asInstanceOf[SerializerInstance],
-          args(3).asInstanceOf[Int],
-          syncWrites = false,
-          args(4).asInstanceOf[ShuffleWriteMetrics],
-          blockId = args(0).asInstanceOf[BlockId]
-        )
-      }
+    )).thenAnswer((invocation: InvocationOnMock) => {
+      val args = invocation.getArguments
+      val manager = new SerializerManager(new JavaSerializer(conf), conf)
+      new DiskBlockObjectWriter(
+        args(1).asInstanceOf[File],
+        manager,
+        args(2).asInstanceOf[SerializerInstance],
+        args(3).asInstanceOf[Int],
+        syncWrites = false,
+        args(4).asInstanceOf[ShuffleWriteMetrics],
+        blockId = args(0).asInstanceOf[BlockId]
+      )
     })
-    when(diskBlockManager.createTempShuffleBlock()).thenAnswer(
-      new Answer[(TempShuffleBlockId, File)] {
-        override def answer(invocation: InvocationOnMock): (TempShuffleBlockId, File) = {
-          val blockId = new TempShuffleBlockId(UUID.randomUUID)
-          val file = new File(tempDir, blockId.name)
-          blockIdToFileMap.put(blockId, file)
-          temporaryFilesCreated += file
-          (blockId, file)
-        }
-      })
-    when(diskBlockManager.getFile(any[BlockId])).thenAnswer(
-      new Answer[File] {
-        override def answer(invocation: InvocationOnMock): File = {
-          blockIdToFileMap(invocation.getArguments.head.asInstanceOf[BlockId])
-        }
+    when(diskBlockManager.createTempShuffleBlock()).thenAnswer((_: InvocationOnMock) => {
+      val blockId = new TempShuffleBlockId(UUID.randomUUID)
+      val file = new File(tempDir, blockId.name)
+      blockIdToFileMap.put(blockId, file)
+      temporaryFilesCreated += file
+      (blockId, file)
     })
+    when(diskBlockManager.getFile(any[BlockId])).thenAnswer { (invocation: InvocationOnMock) =>
+      blockIdToFileMap(invocation.getArguments.head.asInstanceOf[BlockId])
+    }
   }
 
   override def afterEach(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
index 0154d0b6ef6f..27bb06b4e063 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
@@ -24,7 +24,6 @@ import org.mockito.Answers.RETURNS_SMART_NULLS
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -48,11 +47,7 @@ class IndexShuffleBlockResolverSuite extends SparkFunSuite with BeforeAndAfterEa
 
     when(blockManager.diskBlockManager).thenReturn(diskBlockManager)
     when(diskBlockManager.getFile(any[BlockId])).thenAnswer(
-      new Answer[File] {
-        override def answer(invocation: InvocationOnMock): File = {
-          new File(tempDir, invocation.getArguments.head.toString)
-        }
-      })
+      (invocation: InvocationOnMock) => new File(tempDir, invocation.getArguments.head.toString))
   }
 
   override def afterEach(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala b/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
index 535105379963..3dbc1c4b457a 100644
--- a/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
@@ -24,7 +24,6 @@ import scala.reflect.ClassTag
 import org.mockito.Mockito
 import org.mockito.Mockito.atLeastOnce
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 
 import org.apache.spark.{SparkConf, SparkFunSuite, TaskContext, TaskContextImpl}
@@ -59,11 +58,9 @@ class PartiallySerializedBlockSuite
 
     val bbos: ChunkedByteBufferOutputStream = {
       val spy = Mockito.spy(new ChunkedByteBufferOutputStream(128, ByteBuffer.allocate))
-      Mockito.doAnswer(new Answer[ChunkedByteBuffer] {
-        override def answer(invocationOnMock: InvocationOnMock): ChunkedByteBuffer = {
-          Mockito.spy(invocationOnMock.callRealMethod().asInstanceOf[ChunkedByteBuffer])
-        }
-      }).when(spy).toChunkedByteBuffer
+      Mockito.doAnswer { (invocationOnMock: InvocationOnMock) =>
+        Mockito.spy(invocationOnMock.callRealMethod().asInstanceOf[ChunkedByteBuffer])
+      }.when(spy).toChunkedByteBuffer
       spy
     }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index a1c298ae9446..3ab2f0bf4596 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -28,7 +28,6 @@ import scala.concurrent.Future
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{mock, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.{SparkFunSuite, TaskContext}
@@ -50,9 +49,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
   /** Creates a mock [[BlockTransferService]] that returns data from the given map. */
   private def createMockTransfer(data: Map[BlockId, ManagedBuffer]): BlockTransferService = {
     val transfer = mock(classOf[BlockTransferService])
-    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
-      .thenAnswer(new Answer[Unit] {
-      override def answer(invocation: InvocationOnMock): Unit = {
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any())).thenAnswer(
+      (invocation: InvocationOnMock) => {
         val blocks = invocation.getArguments()(3).asInstanceOf[Array[String]]
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
 
@@ -63,8 +61,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
             listener.onBlockFetchFailure(blockId, new BlockNotFoundException(blockId))
           }
         }
-      }
-    })
+      })
     transfer
   }
 
@@ -168,8 +165,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     val transfer = mock(classOf[BlockTransferService])
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
-      .thenAnswer(new Answer[Unit] {
-      override def answer(invocation: InvocationOnMock): Unit = {
+      .thenAnswer((invocation: InvocationOnMock) => {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
           // Return the first two blocks, and wait till task completion before returning the 3rd one
@@ -181,8 +177,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
           listener.onBlockFetchSuccess(
             ShuffleBlockId(0, 2, 0).toString, blocks(ShuffleBlockId(0, 2, 0)))
         }
-      }
-    })
+      })
 
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
       (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
@@ -237,20 +232,18 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     val transfer = mock(classOf[BlockTransferService])
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
-      .thenAnswer(new Answer[Unit] {
-        override def answer(invocation: InvocationOnMock): Unit = {
-          val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
-          Future {
-            // Return the first two blocks, and wait till task completion before returning the last
-            listener.onBlockFetchSuccess(
-              ShuffleBlockId(0, 0, 0).toString, blocks(ShuffleBlockId(0, 0, 0)))
-            listener.onBlockFetchSuccess(
-              ShuffleBlockId(0, 1, 0).toString, blocks(ShuffleBlockId(0, 1, 0)))
-            sem.acquire()
-            listener.onBlockFetchSuccess(
-              ShuffleBlockId(0, 2, 0).toString, blocks(ShuffleBlockId(0, 2, 0)))
-          }
-      }
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        Future {
+          // Return the first two blocks, and wait till task completion before returning the last
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 0, 0).toString, blocks(ShuffleBlockId(0, 0, 0)))
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 1, 0).toString, blocks(ShuffleBlockId(0, 1, 0)))
+          sem.acquire()
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 2, 0).toString, blocks(ShuffleBlockId(0, 2, 0)))
+        }
       })
 
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
@@ -298,8 +291,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     val transfer = mock(classOf[BlockTransferService])
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
-      .thenAnswer(new Answer[Unit] {
-      override def answer(invocation: InvocationOnMock): Unit = {
+      .thenAnswer((invocation: InvocationOnMock) => {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
           // Return the first block, and then fail.
@@ -311,8 +303,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
             ShuffleBlockId(0, 2, 0).toString, new BlockNotFoundException("blah"))
           sem.release()
         }
-      }
-    })
+      })
 
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
       (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
@@ -389,8 +380,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     val transfer = mock(classOf[BlockTransferService])
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
-      .thenAnswer(new Answer[Unit] {
-      override def answer(invocation: InvocationOnMock): Unit = {
+      .thenAnswer((invocation: InvocationOnMock) => {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
           // Return the first block, and then fail.
@@ -402,8 +392,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
             ShuffleBlockId(0, 2, 0).toString, corruptLocalBuffer)
           sem.release()
         }
-      }
-    })
+      })
 
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
       (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
@@ -431,8 +420,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     assert(id1 === ShuffleBlockId(0, 0, 0))
 
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
-      .thenAnswer(new Answer[Unit] {
-      override def answer(invocation: InvocationOnMock): Unit = {
+      .thenAnswer((invocation: InvocationOnMock) => {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
           // Return the first block, and then fail.
@@ -440,8 +428,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
             ShuffleBlockId(0, 1, 0).toString, mockCorruptBuffer())
           sem.release()
         }
-      }
-    })
+      })
 
     // The next block is corrupt local block (the second one is corrupt and retried)
     intercept[FetchFailedException] { iterator.next() }
@@ -588,8 +575,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     val transfer = mock(classOf[BlockTransferService])
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
-      .thenAnswer(new Answer[Unit] {
-      override def answer(invocation: InvocationOnMock): Unit = {
+      .thenAnswer((invocation: InvocationOnMock) => {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
           // Return the first block, and then fail.
@@ -601,8 +587,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
             ShuffleBlockId(0, 2, 0).toString, mockCorruptBuffer())
           sem.release()
         }
-      }
-    })
+      })
 
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
       (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
@@ -654,14 +639,12 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val transfer = mock(classOf[BlockTransferService])
     var tempFileManager: DownloadFileManager = null
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
-      .thenAnswer(new Answer[Unit] {
-        override def answer(invocation: InvocationOnMock): Unit = {
-          val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
-          tempFileManager = invocation.getArguments()(5).asInstanceOf[DownloadFileManager]
-          Future {
-            listener.onBlockFetchSuccess(
-              ShuffleBlockId(0, 0, 0).toString, remoteBlocks(ShuffleBlockId(0, 0, 0)))
-          }
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        tempFileManager = invocation.getArguments()(5).asInstanceOf[DownloadFileManager]
+        Future {
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 0, 0).toString, remoteBlocks(ShuffleBlockId(0, 0, 0)))
         }
       })
 
diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
index c181553f3996..aa3f062e582c 100644
--- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
@@ -71,11 +71,9 @@ class ThreadUtilsSuite extends SparkFunSuite {
       keepAliveSeconds = 2)
     try {
       for (_ <- 1 to maxThreadNumber) {
-        cachedThreadPool.execute(new Runnable {
-          override def run(): Unit = {
-            startThreadsLatch.countDown()
-            latch.await(10, TimeUnit.SECONDS)
-          }
+        cachedThreadPool.execute(() => {
+          startThreadsLatch.countDown()
+          latch.await(10, TimeUnit.SECONDS)
         })
       }
       startThreadsLatch.await(10, TimeUnit.SECONDS)
@@ -84,11 +82,7 @@ class ThreadUtilsSuite extends SparkFunSuite {
 
       // Submit a new task and it should be put into the queue since the thread number reaches the
       // limitation
-      cachedThreadPool.execute(new Runnable {
-        override def run(): Unit = {
-          latch.await(10, TimeUnit.SECONDS)
-        }
-      })
+      cachedThreadPool.execute(() => latch.await(10, TimeUnit.SECONDS))
 
       assert(cachedThreadPool.getActiveCount === maxThreadNumber)
       assert(cachedThreadPool.getQueue.size === 1)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
index b28489a6441b..f2057703d324 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.util.collection
 
-import java.util.Comparator
-
 import scala.collection.mutable.HashSet
 
 import org.apache.spark.SparkFunSuite
@@ -170,12 +168,10 @@ class AppendOnlyMapSuite extends SparkFunSuite {
       case e: IllegalStateException => fail()
     }
 
-    val it = map.destructiveSortedIterator(new Comparator[String] {
-      def compare(key1: String, key2: String): Int = {
-        val x = if (key1 != null) key1.toInt else Int.MinValue
-        val y = if (key2 != null) key2.toInt else Int.MinValue
-        x.compareTo(y)
-      }
+    val it = map.destructiveSortedIterator((key1: String, key2: String) => {
+      val x = if (key1 != null) key1.toInt else Int.MinValue
+      val y = if (key2 != null) key2.toInt else Int.MinValue
+      x.compareTo(y)
     })
 
     // Should be sorted by key
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index bbc0b333af97..2bad56d7ff42 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.util.collection
 
-import java.util.Comparator
-
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
@@ -111,14 +109,9 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     val tmp = new Array[Long](size/2)
     val tmpBuf = new LongArray(MemoryBlock.fromLongArray(tmp))
 
-    new Sorter(new UnsafeSortDataFormat(tmpBuf)).sort(
-      buf, 0, size, new Comparator[RecordPointerAndKeyPrefix] {
-        override def compare(
-            r1: RecordPointerAndKeyPrefix,
-            r2: RecordPointerAndKeyPrefix): Int = {
-          PrefixComparators.LONG.compare(r1.keyPrefix, r2.keyPrefix)
-        }
-      })
+    new Sorter(new UnsafeSortDataFormat(tmpBuf)).sort(buf, 0, size,
+      (r1: RecordPointerAndKeyPrefix, r2: RecordPointerAndKeyPrefix) =>
+        PrefixComparators.LONG.compare(r1.keyPrefix, r2.keyPrefix))
   }
 
   test("spilling with hash collisions") {
@@ -135,7 +128,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
         buffer2: ArrayBuffer[String]): ArrayBuffer[String] = buffer1 ++= buffer2
 
     val agg = new Aggregator[String, String, ArrayBuffer[String]](
-      createCombiner _, mergeValue _, mergeCombiners _)
+      createCombiner, mergeValue, mergeCombiners)
 
     val sorter = new ExternalSorter[String, String, ArrayBuffer[String]](
       context, Some(agg), None, None)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index 35a369e7b4a3..acd0b0e29555 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.util.collection
 
 import java.lang.{Float => JFloat}
-import java.util.{Arrays, Comparator}
+import java.util.Arrays
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
@@ -219,10 +219,8 @@ class SorterSuite extends SparkFunSuite with Logging {
       System.arraycopy(kvTuples, 0, kvTupleArray, 0, numElements)
     }
     runExperiment("Tuple-sort using Arrays.sort()")({
-      Arrays.sort(kvTupleArray, new Comparator[AnyRef] {
-        override def compare(x: AnyRef, y: AnyRef): Int =
-          x.asInstanceOf[(JFloat, _)]._1.compareTo(y.asInstanceOf[(JFloat, _)]._1)
-      })
+      Arrays.sort(kvTupleArray, (x: AnyRef, y: AnyRef) =>
+        x.asInstanceOf[(JFloat, _)]._1.compareTo(y.asInstanceOf[(JFloat, _)]._1))
     }, prepareKvTupleArray)
 
     // Test our Sorter where each element alternates between Float and Integer, non-primitive
@@ -245,9 +243,7 @@ class SorterSuite extends SparkFunSuite with Logging {
 
     val sorter = new Sorter(new KVArraySortDataFormat[JFloat, AnyRef])
     runExperiment("KV-sort using Sorter")({
-      sorter.sort(keyValueArray, 0, numElements, new Comparator[JFloat] {
-        override def compare(x: JFloat, y: JFloat): Int = x.compareTo(y)
-      })
+      sorter.sort(keyValueArray, 0, numElements, (x: JFloat, y: JFloat) => x.compareTo(y))
     }, prepareKeyValueArray)
   }
 
@@ -280,11 +276,9 @@ class SorterSuite extends SparkFunSuite with Logging {
       System.arraycopy(intObjects, 0, intObjectArray, 0, numElements)
     }
 
-    runExperiment("Java Arrays.sort() on non-primitive int array")({
-      Arrays.sort(intObjectArray, new Comparator[Integer] {
-        override def compare(x: Integer, y: Integer): Int = x.compareTo(y)
-      })
-    }, prepareIntObjectArray)
+    runExperiment("Java Arrays.sort() on non-primitive int array")(
+      Arrays.sort(intObjectArray, (x: Integer, y: Integer) => x.compareTo(y)),
+      prepareIntObjectArray)
 
     val intPrimitiveArray = new Array[Int](numElements)
     val prepareIntPrimitiveArray = () => {
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
index d570630c1a09..a3c006b43d8e 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
@@ -69,9 +69,8 @@ class RadixSortSuite extends SparkFunSuite with Logging {
         override def sortDescending = false
         override def sortSigned = false
         override def nullsFirst = true
-        override def compare(a: Long, b: Long): Int = {
-          return PrefixComparators.BINARY.compare(a & 0xffffff0000L, b & 0xffffff0000L)
-        }
+        override def compare(a: Long, b: Long): Int =
+          PrefixComparators.BINARY.compare(a & 0xffffff0000L, b & 0xffffff0000L)
       },
       2, 4, false, false, true))
 
@@ -112,11 +111,9 @@ class RadixSortSuite extends SparkFunSuite with Logging {
   private def referenceKeyPrefixSort(buf: LongArray, lo: Long, hi: Long, refCmp: PrefixComparator) {
     val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt)))
     new Sorter(new UnsafeSortDataFormat(sortBuffer)).sort(
-      buf, Ints.checkedCast(lo), Ints.checkedCast(hi), new Comparator[RecordPointerAndKeyPrefix] {
-        override def compare(
-            r1: RecordPointerAndKeyPrefix,
-            r2: RecordPointerAndKeyPrefix): Int = refCmp.compare(r1.keyPrefix, r2.keyPrefix)
-      })
+      buf, Ints.checkedCast(lo), Ints.checkedCast(hi),
+      (r1: RecordPointerAndKeyPrefix, r2: RecordPointerAndKeyPrefix) =>
+        refCmp.compare(r1.keyPrefix, r2.keyPrefix))
   }
 
   private def fuzzTest(name: String)(testFn: Long => Unit): Unit = {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index 14bc6bae6d67..c64b0706f62b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
-import java.util.concurrent.{Executors, ThreadFactory}
+import java.util.concurrent.Executors
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
@@ -52,16 +52,14 @@ private[kafka010] class KafkaOffsetReader(
   /**
    * Used to ensure execute fetch operations execute in an UninterruptibleThread
    */
-  val kafkaReaderThread = Executors.newSingleThreadExecutor(new ThreadFactory {
-    override def newThread(r: Runnable): Thread = {
-      val t = new UninterruptibleThread("Kafka Offset Reader") {
-        override def run(): Unit = {
-          r.run()
-        }
+  val kafkaReaderThread = Executors.newSingleThreadExecutor((r: Runnable) => {
+    val t = new UninterruptibleThread("Kafka Offset Reader") {
+      override def run(): Unit = {
+        r.run()
       }
-      t.setDaemon(true)
-      t
     }
+    t.setDaemon(true)
+    t
   })
   val execContext = ExecutionContext.fromExecutorService(kafkaReaderThread)
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index a8eff6be7ddd..f7a20326f6e4 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -361,9 +361,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
 
     override def capabilities(): ju.Set[TableCapability] = Collections.emptySet()
 
-    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
-      override def build(): Scan = new KafkaScan(options)
-    }
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder =
+      () => new KafkaScan(options)
 
     override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
       new WriteBuilder {
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
index 93d0d2fd06c7..e042ae0c1425 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
@@ -53,7 +53,7 @@ class DirectKafkaStreamSuite
     .setMaster("local[4]")
     .setAppName(this.getClass.getSimpleName)
     // Set a timeout of 10 seconds that's going to be used to fetch topics/partitions from kafka.
-    // Othewise the poll timeout defaults to 2 minutes and causes test cases to run longer.
+    // Otherwise the poll timeout defaults to 2 minutes and causes test cases to run longer.
     .set("spark.streaming.kafka.consumer.poll.ms", "10000")
 
   private var ssc: StreamingContext = _
@@ -61,13 +61,13 @@ class DirectKafkaStreamSuite
 
   private var kafkaTestUtils: KafkaTestUtils = _
 
-  override def beforeAll {
+  override def beforeAll() {
     super.beforeAll()
     kafkaTestUtils = new KafkaTestUtils
     kafkaTestUtils.setup()
   }
 
-  override def afterAll {
+  override def afterAll() {
     try {
       if (kafkaTestUtils != null) {
         kafkaTestUtils.teardown()
@@ -454,13 +454,11 @@ class DirectKafkaStreamSuite
         val data = rdd.map(_.value).collect()
         collectedData.addAll(Arrays.asList(data: _*))
         kafkaStream.asInstanceOf[CanCommitOffsets]
-          .commitAsync(offsets, new OffsetCommitCallback() {
-            def onComplete(m: JMap[TopicPartition, OffsetAndMetadata], e: Exception) {
-              if (null != e) {
-                logError("commit failed", e)
-              } else {
-                committed.putAll(m)
-              }
+          .commitAsync(offsets, (m: JMap[TopicPartition, OffsetAndMetadata], e: Exception) => {
+            if (null != e) {
+              logError("commit failed", e)
+            } else {
+              committed.putAll(m)
             }
           })
       }
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
index bd31b7dc49a6..9ea7bfc56064 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
@@ -27,7 +27,6 @@ import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorC
 import org.mockito.ArgumentMatchers._
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually
 import org.scalatest.mockito.MockitoSugar
@@ -124,11 +123,9 @@ class KinesisCheckpointerSuite extends TestSuiteBase
   test("if checkpointing is going on, wait until finished before removing and checkpointing") {
     when(receiverMock.getLatestSeqNumToCheckpoint(shardId))
       .thenReturn(someSeqNum).thenReturn(someOtherSeqNum)
-    when(checkpointerMock.checkpoint(anyString)).thenAnswer(new Answer[Unit] {
-      override def answer(invocations: InvocationOnMock): Unit = {
-        clock.waitTillTime(clock.getTimeMillis() + checkpointInterval.milliseconds / 2)
-      }
-    })
+    when(checkpointerMock.checkpoint(anyString)).thenAnswer { (_: InvocationOnMock) =>
+      clock.waitTillTime(clock.getTimeMillis() + checkpointInterval.milliseconds / 2)
+    }
 
     kinesisCheckpointer.setCheckpointer(shardId, checkpointerMock)
     clock.advance(checkpointInterval.milliseconds)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index da3db3c4dca0..27c08c894a39 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -130,16 +130,14 @@ private[impl] case class EdgeWithLocalIds[@specialized ED](
 
 private[impl] object EdgeWithLocalIds {
   implicit def lexicographicOrdering[ED]: Ordering[EdgeWithLocalIds[ED]] =
-    new Ordering[EdgeWithLocalIds[ED]] {
-      override def compare(a: EdgeWithLocalIds[ED], b: EdgeWithLocalIds[ED]): Int = {
-        if (a.srcId == b.srcId) {
-          if (a.dstId == b.dstId) 0
-          else if (a.dstId < b.dstId) -1
-          else 1
-        } else if (a.srcId < b.srcId) -1
+    (a: EdgeWithLocalIds[ED], b: EdgeWithLocalIds[ED]) =>
+      if (a.srcId == b.srcId) {
+        if (a.dstId == b.dstId) 0
+        else if (a.dstId < b.dstId) -1
         else 1
       }
-    }
+      else if (a.srcId < b.srcId) -1
+      else 1
 
   private[graphx] def edgeArraySortDataFormat[ED] = {
     new SortDataFormat[EdgeWithLocalIds[ED], Array[EdgeWithLocalIds[ED]]] {
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 4752495e8e75..0276f2dd40b7 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.repl
 
 import java.io.File
 import java.net.{URI, URL, URLClassLoader}
-import java.nio.channels.{FileChannel, ReadableByteChannel}
+import java.nio.channels.FileChannel
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
@@ -33,7 +33,6 @@ import com.google.common.io.Files
 import org.mockito.ArgumentMatchers.anyString
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.mockito.MockitoSugar
 
@@ -191,12 +190,10 @@ class ExecutorClassLoaderSuite
     val env = mock[SparkEnv]
     val rpcEnv = mock[RpcEnv]
     when(env.rpcEnv).thenReturn(rpcEnv)
-    when(rpcEnv.openChannel(anyString())).thenAnswer(new Answer[ReadableByteChannel]() {
-      override def answer(invocation: InvocationOnMock): ReadableByteChannel = {
-        val uri = new URI(invocation.getArguments()(0).asInstanceOf[String])
-        val path = Paths.get(tempDir1.getAbsolutePath(), uri.getPath().stripPrefix("/"))
-        FileChannel.open(path, StandardOpenOption.READ)
-      }
+    when(rpcEnv.openChannel(anyString())).thenAnswer((invocation: InvocationOnMock) => {
+      val uri = new URI(invocation.getArguments()(0).asInstanceOf[String])
+      val path = Paths.get(tempDir1.getAbsolutePath(), uri.getPath().stripPrefix("/"))
+      FileChannel.open(path, StandardOpenOption.READ)
     })
 
     val classLoader = new ExecutorClassLoader(new SparkConf(), env, "spark://localhost:1234",
diff --git a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
index 039fc627f52f..7e3d0d924438 100644
--- a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
@@ -52,9 +52,7 @@ class SingletonReplSuite extends SparkFunSuite {
     Main.sparkSession = null
 
     // Starts a new thread to run the REPL interpreter, so that we won't block.
-    thread = new Thread(new Runnable {
-      override def run(): Unit = Main.doMain(Array("-classpath", classpath), interp)
-    })
+    thread = new Thread(() => Main.doMain(Array("-classpath", classpath), interp))
     thread.setDaemon(true)
     thread.start()
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
index a2430c05e256..f16d1f3be7a6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
@@ -47,9 +47,7 @@ private[k8s] class LoggingPodStatusWatcherImpl(
   // start timer for periodic logging
   private val scheduler =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("logging-pod-status-watcher")
-  private val logRunnable: Runnable = new Runnable {
-    override def run() = logShortStatus()
-  }
+  private val logRunnable: Runnable = () => logShortStatus()
 
   private var pod = Option.empty[Pod]
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
index 5583b4617eeb..010d93fbf847 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
@@ -68,7 +68,7 @@ private[spark] class ExecutorPodsSnapshotsStoreImpl(subscribersExecutor: Schedul
     }
     subscribers += newSubscriber
     pollingTasks += subscribersExecutor.scheduleWithFixedDelay(
-      toRunnable(() => callSubscriber(newSubscriber)),
+      () => callSubscriber(newSubscriber),
       0L,
       processBatchIntervalMillis,
       TimeUnit.MILLISECONDS)
@@ -103,10 +103,6 @@ private[spark] class ExecutorPodsSnapshotsStoreImpl(subscribersExecutor: Schedul
     }
   }
 
-  private def toRunnable[T](runnable: () => Unit): Runnable = new Runnable {
-    override def run(): Unit = runnable()
-  }
-
   private case class SnapshotsSubscriber(
       snapshotsBuffer: BlockingQueue[ExecutorPodsSnapshot],
       onNewSnapshots: Seq[ExecutorPodsSnapshot] => Unit)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
index 95de7d905954..b0604ea888da 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KubernetesFeaturesTestUtils.scala
@@ -23,7 +23,6 @@ import io.fabric8.kubernetes.api.model.{Container, HasMetadata, PodBuilder, Secr
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, when}
 import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
 
 import org.apache.spark.deploy.k8s.SparkPod
 
@@ -38,16 +37,14 @@ object KubernetesFeaturesTestUtils {
     when(mockStep.getAdditionalPodSystemProperties())
       .thenReturn(Map(stepType -> stepType))
     when(mockStep.configurePod(any(classOf[SparkPod])))
-      .thenAnswer(new Answer[SparkPod]() {
-        override def answer(invocation: InvocationOnMock): SparkPod = {
-          val originalPod: SparkPod = invocation.getArgument(0)
-          val configuredPod = new PodBuilder(originalPod.pod)
-            .editOrNewMetadata()
-            .addToLabels(stepType, stepType)
-            .endMetadata()
-            .build()
-          SparkPod(configuredPod, originalPod.container)
-        }
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val originalPod: SparkPod = invocation.getArgument(0)
+        val configuredPod = new PodBuilder(originalPod.pod)
+          .editOrNewMetadata()
+          .addToLabels(stepType, stepType)
+          .endMetadata()
+          .build()
+        SparkPod(configuredPod, originalPod.container)
       })
     mockStep
   }
@@ -67,6 +64,6 @@ object KubernetesFeaturesTestUtils {
 
   def filter[T: ClassTag](list: Seq[HasMetadata]): Seq[T] = {
     val desired = implicitly[ClassTag[T]].runtimeClass
-    list.filter(_.getClass() == desired).map(_.asInstanceOf[T]).toSeq
+    list.filter(_.getClass() == desired).map(_.asInstanceOf[T])
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 55d9adc212f9..5862f647ccb9 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler.cluster.k8s
 import io.fabric8.kubernetes.api.model.{DoneablePod, Pod, PodBuilder}
 import io.fabric8.kubernetes.client.KubernetesClient
 import io.fabric8.kubernetes.client.dsl.PodResource
-import org.mockito.{ArgumentMatcher, Matchers, Mock, MockitoAnnotations}
+import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{never, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
@@ -27,7 +27,7 @@ import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
@@ -153,12 +153,9 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     verify(podOperations).create(podWithAttachedContainerForId(2))
   }
 
-  private def executorPodAnswer(): Answer[SparkPod] = {
-    new Answer[SparkPod] {
-      override def answer(invocation: InvocationOnMock): SparkPod = {
-        val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
-        executorPodWithId(k8sConf.executorId.toInt)
-      }
-    }
+  private def executorPodAnswer(): Answer[SparkPod] =
+    (invocation: InvocationOnMock) => {
+      val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
+      executorPodWithId(k8sConf.executorId.toInt)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
index b20ed4799e32..9920f4d3ea73 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
@@ -26,7 +26,6 @@ import org.mockito.Mockito.{mock, never, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -125,13 +124,10 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
       """.stripMargin
   }
 
-  private def namedPodsAnswer(): Answer[PodResource[Pod, DoneablePod]] = {
-    new Answer[PodResource[Pod, DoneablePod]] {
-      override def answer(invocation: InvocationOnMock): PodResource[Pod, DoneablePod] = {
-        val podName: String = invocation.getArgument(0)
-        namedExecutorPods.getOrElseUpdate(
-          podName, mock(classOf[PodResource[Pod, DoneablePod]]))
-      }
+  private def namedPodsAnswer(): Answer[PodResource[Pod, DoneablePod]] =
+    (invocation: InvocationOnMock) => {
+      val podName: String = invocation.getArgument(0)
+      namedExecutorPods.getOrElseUpdate(
+        podName, mock(classOf[PodResource[Pod, DoneablePod]]))
     }
-  }
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index f9bdddcd1780..5d939cfd41f9 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -192,11 +192,9 @@ private[yarn] class YarnAllocator(
    * A sequence of pending container requests at the given location that have not yet been
    * fulfilled.
    */
-  private def getPendingAtLocation(location: String): Seq[ContainerRequest] = {
+  private def getPendingAtLocation(location: String): Seq[ContainerRequest] =
     amClient.getMatchingRequests(RM_REQUEST_PRIORITY, location, resource).asScala
       .flatMap(_.asScala)
-      .toSeq
-  }
 
   /**
    * Request as many executors from the ResourceManager as needed to reach the desired total. If
@@ -384,7 +382,7 @@ private[yarn] class YarnAllocator(
   def stop(): Unit = {
     // Forcefully shut down the launcher pool, in case this is being called in the middle of
     // container allocation. This will prevent queued executors from being started - and
-    // potentially interrupt active ExecutorRunnable instaces too.
+    // potentially interrupt active ExecutorRunnable instances too.
     launcherPool.shutdownNow()
   }
 
@@ -467,7 +465,7 @@ private[yarn] class YarnAllocator(
         remainingAfterOffRackMatches)
     }
 
-    if (!remainingAfterOffRackMatches.isEmpty) {
+    if (remainingAfterOffRackMatches.nonEmpty) {
       logDebug(s"Releasing ${remainingAfterOffRackMatches.size} unneeded containers that were " +
         s"allocated to us")
       for (container <- remainingAfterOffRackMatches) {
@@ -550,35 +548,33 @@ private[yarn] class YarnAllocator(
       if (runningExecutors.size() < targetNumExecutors) {
         numExecutorsStarting.incrementAndGet()
         if (launchContainers) {
-          launcherPool.execute(new Runnable {
-            override def run(): Unit = {
-              try {
-                new ExecutorRunnable(
-                  Some(container),
-                  conf,
-                  sparkConf,
-                  driverUrl,
-                  executorId,
-                  executorHostname,
-                  executorMemory,
-                  executorCores,
-                  appAttemptId.getApplicationId.toString,
-                  securityMgr,
-                  localResources
-                ).run()
-                updateInternalState()
-              } catch {
-                case e: Throwable =>
-                  numExecutorsStarting.decrementAndGet()
-                  if (NonFatal(e)) {
-                    logError(s"Failed to launch executor $executorId on container $containerId", e)
-                    // Assigned container should be released immediately
-                    // to avoid unnecessary resource occupation.
-                    amClient.releaseAssignedContainer(containerId)
-                  } else {
-                    throw e
-                  }
-              }
+          launcherPool.execute(() => {
+            try {
+              new ExecutorRunnable(
+                Some(container),
+                conf,
+                sparkConf,
+                driverUrl,
+                executorId,
+                executorHostname,
+                executorMemory,
+                executorCores,
+                appAttemptId.getApplicationId.toString,
+                securityMgr,
+                localResources
+              ).run()
+              updateInternalState()
+            } catch {
+              case e: Throwable =>
+                numExecutorsStarting.decrementAndGet()
+                if (NonFatal(e)) {
+                  logError(s"Failed to launch executor $executorId on container $containerId", e)
+                  // Assigned container should be released immediately
+                  // to avoid unnecessary resource occupation.
+                  amClient.releaseAssignedContainer(containerId)
+                } else {
+                  throw e
+                }
             }
           })
         } else {
@@ -776,7 +772,7 @@ private[yarn] class YarnAllocator(
       }
     }
 
-    (localityMatched.toSeq, localityUnMatched.toSeq, localityFree.toSeq)
+    (localityMatched, localityUnMatched, localityFree)
   }
 
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index f7955bc92339..2a7488a59f69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
 import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
@@ -273,7 +272,7 @@ case class ArraysZip(children: Seq[Expression]) extends Expression with ExpectsI
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    if (children.length == 0) {
+    if (children.isEmpty) {
       emptyInputGenCode(ev)
     } else {
       nonEmptyInputGenCode(ctx, ev)
@@ -718,17 +717,15 @@ trait ArraySortLike extends ExpectsInputTypes {
       case _ @ ArrayType(s: StructType, _) => s.interpretedOrdering.asInstanceOf[Ordering[Any]]
     }
 
-    new Comparator[Any]() {
-      override def compare(o1: Any, o2: Any): Int = {
-        if (o1 == null && o2 == null) {
-          0
-        } else if (o1 == null) {
-          nullOrder
-        } else if (o2 == null) {
-          -nullOrder
-        } else {
-          ordering.compare(o1, o2)
-        }
+    (o1: Any, o2: Any) => {
+      if (o1 == null && o2 == null) {
+        0
+      } else if (o1 == null) {
+        nullOrder
+      } else if (o2 == null) {
+        -nullOrder
+      } else {
+        ordering.compare(o1, o2)
       }
     }
   }
@@ -740,17 +737,15 @@ trait ArraySortLike extends ExpectsInputTypes {
       case _ @ ArrayType(s: StructType, _) => s.interpretedOrdering.asInstanceOf[Ordering[Any]]
     }
 
-    new Comparator[Any]() {
-      override def compare(o1: Any, o2: Any): Int = {
-        if (o1 == null && o2 == null) {
-          0
-        } else if (o1 == null) {
-          -nullOrder
-        } else if (o2 == null) {
-          nullOrder
-        } else {
-          ordering.compare(o2, o1)
-        }
+    (o1: Any, o2: Any) => {
+      if (o1 == null && o2 == null) {
+        0
+      } else if (o1 == null) {
+        -nullOrder
+      } else if (o2 == null) {
+        nullOrder
+      } else {
+        ordering.compare(o2, o1)
       }
     }
   }
@@ -769,7 +764,6 @@ trait ArraySortLike extends ExpectsInputTypes {
   }
 
   def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = {
-    val arrayData = classOf[ArrayData].getName
     val genericArrayData = classOf[GenericArrayData].getName
     val unsafeArrayData = classOf[UnsafeArrayData].getName
     val array = ctx.freshName("array")
@@ -2784,7 +2778,7 @@ case class ArrayRepeat(left: Expression, right: Expression)
     } else {
       if (count.asInstanceOf[Int] > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
         throw new RuntimeException(s"Unsuccessful try to create array with $count elements " +
-          s"due to exceeding the array size limit ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.");
+          s"due to exceeding the array size limit ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
       }
       val element = left.eval(input)
       new GenericArrayData(Array.fill(count.asInstanceOf[Int])(element))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index a5dbc75d2c0d..12e8d0223767 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -35,12 +35,8 @@ package object util extends Logging {
     val origErr = System.err
     val origOut = System.out
     try {
-      System.setErr(new PrintStream(new OutputStream {
-        def write(b: Int) = {}
-      }))
-      System.setOut(new PrintStream(new OutputStream {
-        def write(b: Int) = {}
-      }))
+      System.setErr(new PrintStream((_: Int) => {}))
+      System.setOut(new PrintStream((_: Int) => {}))
 
       f
     } finally {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
index cc8b3e6e399a..dddf874b9c6c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.types
 
-import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
@@ -37,11 +36,8 @@ class BinaryType private() extends AtomicType {
 
   @transient private[sql] lazy val tag = typeTag[InternalType]
 
-  private[sql] val ordering = new Ordering[InternalType] {
-    def compare(x: Array[Byte], y: Array[Byte]): Int = {
-      TypeUtils.compareBinary(x, y)
-    }
-  }
+  private[sql] val ordering =
+    (x: Array[Byte], y: Array[Byte]) => TypeUtils.compareBinary(x, y)
 
   /**
    * The default size of a value of the BinaryType is 100 bytes.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
index afd335339701..7a48202ff7fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.types
 
-import scala.math.{Fractional, Numeric, Ordering}
+import scala.math.{Fractional, Numeric}
 import scala.math.Numeric.DoubleAsIfIntegral
 import scala.reflect.runtime.universe.typeTag
 
@@ -38,9 +38,8 @@ class DoubleType private() extends FractionalType {
   @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = implicitly[Numeric[Double]]
   private[sql] val fractional = implicitly[Fractional[Double]]
-  private[sql] val ordering = new Ordering[Double] {
-    override def compare(x: Double, y: Double): Int = Utils.nanSafeCompareDoubles(x, y)
-  }
+  private[sql] val ordering =
+    (x: Double, y: Double) => Utils.nanSafeCompareDoubles(x, y)
   private[sql] val asIntegral = DoubleAsIfIntegral
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
index 6d9898730408..652edb9b0f7e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.types
 
-import scala.math.{Fractional, Numeric, Ordering}
+import scala.math.{Fractional, Numeric}
 import scala.math.Numeric.FloatAsIfIntegral
 import scala.reflect.runtime.universe.typeTag
 
@@ -38,9 +38,8 @@ class FloatType private() extends FractionalType {
   @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = implicitly[Numeric[Float]]
   private[sql] val fractional = implicitly[Fractional[Float]]
-  private[sql] val ordering = new Ordering[Float] {
-    override def compare(x: Float, y: Float): Int = Utils.nanSafeCompareFloats(x, y)
-  }
+  private[sql] val ordering =
+    (x: Float, y: Float) => Utils.nanSafeCompareFloats(x, y)
   private[sql] val asIntegral = FloatAsIfIntegral
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala
index 2fcaeca34db3..366188c3327b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala
@@ -38,11 +38,7 @@ class ExternalCatalogEventSuite extends SparkFunSuite {
       f: (ExternalCatalog, Seq[ExternalCatalogEvent] => Unit) => Unit): Unit = test(name) {
     val catalog = new ExternalCatalogWithListener(newCatalog)
     val recorder = mutable.Buffer.empty[ExternalCatalogEvent]
-    catalog.addListener(new ExternalCatalogEventListener {
-      override def onEvent(event: ExternalCatalogEvent): Unit = {
-        recorder += event
-      }
-    })
+    catalog.addListener((event: ExternalCatalogEvent) => recorder += event)
     f(catalog, (expected: Seq[ExternalCatalogEvent]) => {
       val actual = recorder.clone()
       recorder.clear()
@@ -174,9 +170,6 @@ class ExternalCatalogEventSuite extends SparkFunSuite {
       className = "",
       resources = Seq.empty)
 
-    val newIdentifier = functionDefinition.identifier.copy(funcName = "fn4")
-    val renamedFunctionDefinition = functionDefinition.copy(identifier = newIdentifier)
-
     catalog.createDatabase(dbDefinition, ignoreIfExists = false)
     checkEvents(CreateDatabasePreEvent("db5") :: CreateDatabaseEvent("db5") :: Nil)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
index c6665d273fd2..2bd5cad43a38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
@@ -112,9 +112,7 @@ object SortPrefixUtils {
       val field = schema.head
       getPrefixComparator(SortOrder(BoundReference(0, field.dataType, field.nullable), Ascending))
     } else {
-      new PrefixComparator {
-        override def compare(prefix1: Long, prefix2: Long): Int = 0
-      }
+      (_: Long, _: Long) => 0
     }
   }
 
@@ -164,12 +162,7 @@ object SortPrefixUtils {
         }
       }
     } else {
-      new UnsafeExternalRowSorter.PrefixComputer {
-        override def computePrefix(row: InternalRow):
-            UnsafeExternalRowSorter.PrefixComputer.Prefix = {
-          emptyPrefix
-        }
-      }
+      _: InternalRow => emptyPrefix
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 235801a6dc21..1d3cc88c9fac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -30,7 +30,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, Resolver}
+import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
@@ -72,7 +72,7 @@ case class CreateDatabaseCommand(
       CatalogDatabase(
         databaseName,
         comment.getOrElse(""),
-        path.map(CatalogUtils.stringToURI(_)).getOrElse(catalog.getDefaultDBPath(databaseName)),
+        path.map(CatalogUtils.stringToURI).getOrElse(catalog.getDefaultDBPath(databaseName)),
         props),
       ifNotExists)
     Seq.empty[Row]
@@ -352,9 +352,8 @@ case class AlterTableChangeColumnCommand(
   }
 
   // Add the comment to a column, if comment is empty, return the original column.
-  private def addComment(column: StructField, comment: Option[String]): StructField = {
-    comment.map(column.withComment(_)).getOrElse(column)
-  }
+  private def addComment(column: StructField, comment: Option[String]): StructField =
+    comment.map(column.withComment).getOrElse(column)
 
   // Compare a [[StructField]] to another, return true if they have the same column
   // name(by resolver) and dataType.
@@ -584,14 +583,12 @@ case class AlterTableRecoverPartitionsCommand(
     // It's very expensive to create a JobConf(ClassUtil.findContainingJar() is slow)
     val jobConf = new JobConf(hadoopConf, this.getClass)
     val pathFilter = FileInputFormat.getInputPathFilter(jobConf)
-    new PathFilter {
-      override def accept(path: Path): Boolean = {
-        val name = path.getName
-        if (name != "_SUCCESS" && name != "_temporary" && !name.startsWith(".")) {
-          pathFilter == null || pathFilter.accept(path)
-        } else {
-          false
-        }
+    path: Path => {
+      val name = path.getName
+      if (name != "_SUCCESS" && name != "_temporary" && !name.startsWith(".")) {
+        pathFilter == null || pathFilter.accept(path)
+      } else {
+        false
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 4c699276135e..c907ac21af38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.datasources
 
 import java.util.Locale
-import java.util.concurrent.Callable
 
 import org.apache.hadoop.fs.Path
 
@@ -222,23 +221,20 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
   private def readDataSourceTable(table: CatalogTable): LogicalPlan = {
     val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
     val catalog = sparkSession.sessionState.catalog
-    catalog.getCachedPlan(qualifiedTableName, new Callable[LogicalPlan]() {
-      override def call(): LogicalPlan = {
-        val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
-        val dataSource =
-          DataSource(
-            sparkSession,
-            // In older version(prior to 2.1) of Spark, the table schema can be empty and should be
-            // inferred at runtime. We should still support it.
-            userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
-            partitionColumns = table.partitionColumnNames,
-            bucketSpec = table.bucketSpec,
-            className = table.provider.get,
-            options = table.storage.properties ++ pathOption,
-            catalogTable = Some(table))
-
-        LogicalRelation(dataSource.resolveRelation(checkFilesExist = false), table)
-      }
+    catalog.getCachedPlan(qualifiedTableName, () => {
+      val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
+      val dataSource =
+        DataSource(
+          sparkSession,
+          // In older version(prior to 2.1) of Spark, the table schema can be empty and should be
+          // inferred at runtime. We should still support it.
+          userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
+          partitionColumns = table.partitionColumnNames,
+          bucketSpec = table.bucketSpec,
+          className = table.provider.get,
+          options = table.storage.properties ++ pathOption,
+          catalogTable = Some(table))
+      LogicalRelation(dataSource.resolveRelation(checkFilesExist = false), table)
     })
   }
 
@@ -484,8 +480,8 @@ object DataSourceStrategy {
       // Because we only convert In to InSet in Optimizer when there are more than certain
       // items. So it is possible we still get an In expression here that needs to be pushed
       // down.
-      case expressions.In(a: Attribute, list) if !list.exists(!_.isInstanceOf[Literal]) =>
-        val hSet = list.map(e => e.eval(EmptyRow))
+      case expressions.In(a: Attribute, list) if list.forall(_.isInstanceOf[Literal]) =>
+        val hSet = list.map(_.eval(EmptyRow))
         val toScala = CatalystTypeConverters.createToScalaConverter(a.dataType)
         Some(sources.In(a.name, hSet.toArray.map(toScala)))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
index b3e4240c315b..fe6362d878d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
@@ -69,7 +69,7 @@ trait CheckpointFileManager {
 
   /** List all the files in a path. */
   def list(path: Path): Array[FileStatus] = {
-    list(path, new PathFilter { override def accept(path: Path): Boolean = true })
+    list(path, (_: Path) => true)
   }
 
   /** Make directory at the give path and all its parent directories as needed. */
@@ -103,7 +103,7 @@ object CheckpointFileManager extends Logging {
      * @param overwriteIfPossible If true, then the implementations must do a best-effort attempt to
      *                            overwrite the file if it already exists. It should not throw
      *                            any exception if the file exists. However, if false, then the
-     *                            implementation must not overwrite if the file alraedy exists and
+     *                            implementation must not overwrite if the file already exists and
      *                            must throw `FileAlreadyExistsException` in that case.
      */
     def renameTempFile(srcPath: Path, dstPath: Path, overwriteIfPossible: Boolean): Unit
@@ -236,14 +236,12 @@ class FileSystemBasedCheckpointFileManager(path: Path, hadoopConf: Configuration
     fs.open(path)
   }
 
-  override def exists(path: Path): Boolean = {
-    try
-      return fs.getFileStatus(path) != null
-    catch {
-      case e: FileNotFoundException =>
-        return false
+  override def exists(path: Path): Boolean =
+    try {
+      fs.getFileStatus(path) != null
+    } catch {
+      case _: FileNotFoundException => false
     }
-  }
 
   override def renameTempFile(srcPath: Path, dstPath: Path, overwriteIfPossible: Boolean): Unit = {
     if (!overwriteIfPossible && fs.exists(dstPath)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index 77bc0ba5548d..9e9bcedd885f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets.UTF_8
 import scala.io.{Source => IOSource}
 import scala.reflect.ClassTag
 
-import org.apache.hadoop.fs.{Path, PathFilter}
+import org.apache.hadoop.fs.Path
 import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
 
@@ -169,13 +169,13 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
    */
   private def compact(batchId: Long, logs: Array[T]): Boolean = {
     val validBatches = getValidBatchesBeforeCompactionBatch(batchId, compactInterval)
-    val allLogs = validBatches.map { id =>
+    val allLogs = validBatches.flatMap { id =>
       super.get(id).getOrElse {
         throw new IllegalStateException(
           s"${batchIdToPath(id)} doesn't exist when compacting batch $batchId " +
             s"(compactInterval: $compactInterval)")
       }
-    }.flatten ++ logs
+    } ++ logs
     // Return false as there is another writer.
     super.add(batchId, compactLogs(allLogs).toArray)
   }
@@ -192,13 +192,13 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
       if (latestId >= 0) {
         try {
           val logs =
-            getAllValidBatches(latestId, compactInterval).map { id =>
+            getAllValidBatches(latestId, compactInterval).flatMap { id =>
               super.get(id).getOrElse {
                 throw new IllegalStateException(
                   s"${batchIdToPath(id)} doesn't exist " +
                     s"(latestId: $latestId, compactInterval: $compactInterval)")
               }
-            }.flatten
+            }
           return compactLogs(logs).toArray
         } catch {
           case e: IOException =>
@@ -240,15 +240,13 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
         s"min compaction batch id to delete = $minCompactionBatchId")
 
       val expiredTime = System.currentTimeMillis() - fileCleanupDelayMs
-      fileManager.list(metadataPath, new PathFilter {
-        override def accept(path: Path): Boolean = {
-          try {
-            val batchId = getBatchIdFromFileName(path.getName)
-            batchId < minCompactionBatchId
-          } catch {
-            case _: NumberFormatException =>
-              false
-          }
+      fileManager.list(metadataPath, (path: Path) => {
+        try {
+          val batchId = getBatchIdFromFileName(path.getName)
+          batchId < minCompactionBatchId
+        } catch {
+          case _: NumberFormatException =>
+            false
         }
       }).foreach { f =>
         if (f.getModificationTime <= expiredTime) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index 08aea75de2b5..a652eeb8d5f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -89,19 +89,15 @@ class RateStreamTable(
 
   override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
 
-  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
-    override def build(): Scan = new Scan {
-      override def readSchema(): StructType = RateStreamProvider.SCHEMA
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = () => new Scan {
+    override def readSchema(): StructType = RateStreamProvider.SCHEMA
 
-      override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
-        new RateStreamMicroBatchStream(
-          rowsPerSecond, rampUpTimeSeconds, numPartitions, options, checkpointLocation)
-      }
+    override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream =
+      new RateStreamMicroBatchStream(
+        rowsPerSecond, rampUpTimeSeconds, numPartitions, options, checkpointLocation)
 
-      override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
-        new RateStreamContinuousStream(rowsPerSecond, numPartitions)
-      }
-    }
+    override def toContinuousStream(checkpointLocation: String): ContinuousStream =
+      new RateStreamContinuousStream(rowsPerSecond, numPartitions)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
index 9168d46493ae..dab64e10dec3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
@@ -130,27 +130,24 @@ class TextSocketMicroBatchStream(host: String, port: Int, numPartitions: Int)
     slices.map(TextSocketInputPartition)
   }
 
-  override def createReaderFactory(): PartitionReaderFactory = {
-    new PartitionReaderFactory {
-      override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
-        val slice = partition.asInstanceOf[TextSocketInputPartition].slice
-        new PartitionReader[InternalRow] {
-          private var currentIdx = -1
-
-          override def next(): Boolean = {
-            currentIdx += 1
-            currentIdx < slice.size
-          }
-
-          override def get(): InternalRow = {
-            InternalRow(slice(currentIdx)._1, slice(currentIdx)._2)
-          }
+  override def createReaderFactory(): PartitionReaderFactory =
+    (partition: InputPartition) => {
+      val slice = partition.asInstanceOf[TextSocketInputPartition].slice
+      new PartitionReader[InternalRow] {
+        private var currentIdx = -1
+
+        override def next(): Boolean = {
+          currentIdx += 1
+          currentIdx < slice.size
+        }
 
-          override def close(): Unit = {}
+        override def get(): InternalRow = {
+          InternalRow(slice(currentIdx)._1, slice(currentIdx)._2)
         }
+
+        override def close(): Unit = {}
       }
     }
-  }
 
   override def commit(end: Offset): Unit = synchronized {
     val newOffset = LongOffset.convert(end).getOrElse(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index c0292acdf104..a0452cf844d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -81,17 +81,15 @@ class TextSocketTable(host: String, port: Int, numPartitions: Int, includeTimest
 
   override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
 
-  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new ScanBuilder {
-    override def build(): Scan = new Scan {
-      override def readSchema(): StructType = schema()
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = () => new Scan {
+    override def readSchema(): StructType = schema()
 
-      override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
-        new TextSocketMicroBatchStream(host, port, numPartitions)
-      }
+    override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
+      new TextSocketMicroBatchStream(host, port, numPartitions)
+    }
 
-      override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
-        new TextSocketContinuousStream(host, port, numPartitions, options)
-      }
+    override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
+      new TextSocketContinuousStream(host, port, numPartitions, options)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index daf7f2cfe84f..e496de1b05e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.ui
 
 import java.util.{Date, NoSuchElementException}
 import java.util.concurrent.ConcurrentHashMap
-import java.util.function.Function
 
 import scala.collection.JavaConverters._
 
@@ -196,7 +195,7 @@ class SQLAppStatusListener(
 
     // Check the execution again for whether the aggregated metrics data has been calculated.
     // This can happen if the UI is requesting this data, and the onExecutionEnd handler is
-    // running at the same time. The metrics calculated for the UI can be innacurate in that
+    // running at the same time. The metrics calculated for the UI can be inaccurate in that
     // case, since the onExecutionEnd handler will clean up tracked stage metrics.
     if (exec.metricsValues != null) {
       exec.metricsValues
@@ -328,9 +327,7 @@ class SQLAppStatusListener(
 
   private def getOrCreateExecution(executionId: Long): LiveExecutionData = {
     liveExecutions.computeIfAbsent(executionId,
-      new Function[Long, LiveExecutionData]() {
-        override def apply(key: Long): LiveExecutionData = new LiveExecutionData(executionId)
-      })
+      (_: Long) => new LiveExecutionData(executionId))
   }
 
   private def update(exec: LiveExecutionData, force: Boolean = false): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 33d78343a661..e6c40bd93174 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.CacheManager
 import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab}
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.status.ElementTrackingStore
-import org.apache.spark.util.{MutableURLClassLoader, Utils}
+import org.apache.spark.util.Utils
 
 
 /**
@@ -146,11 +146,7 @@ private[sql] class SharedState(
     val wrapped = new ExternalCatalogWithListener(externalCatalog)
 
     // Make sure we propagate external catalog events to the spark listener bus
-    wrapped.addListener(new ExternalCatalogEventListener {
-      override def onEvent(event: ExternalCatalogEvent): Unit = {
-        sparkContext.listenerBus.post(event)
-      }
-    })
+    wrapped.addListener((event: ExternalCatalogEvent) => sparkContext.listenerBus.post(event))
 
     wrapped
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index d52047d76e14..dd7c38011bc9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1195,11 +1195,8 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       GroupedRoutes("a", "c", Seq(Route("a", "c", 2)))
     )
 
-    implicit def ordering[GroupedRoutes]: Ordering[GroupedRoutes] = new Ordering[GroupedRoutes] {
-      override def compare(x: GroupedRoutes, y: GroupedRoutes): Int = {
-        x.toString.compareTo(y.toString)
-      }
-    }
+    implicit def ordering[GroupedRoutes]: Ordering[GroupedRoutes] =
+      (x: GroupedRoutes, y: GroupedRoutes) => x.toString.compareTo(y.toString)
 
     checkDatasetUnorderly(grped, expected: _*)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 3cc97c995702..7de5e826f667 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -120,10 +120,7 @@ class QueryExecutionSuite extends SharedSQLContext {
   }
 
   test("toString() exception/error handling") {
-    spark.experimental.extraStrategies = Seq(
-        new SparkStrategy {
-          override def apply(plan: LogicalPlan): Seq[SparkPlan] = Nil
-        })
+    spark.experimental.extraStrategies = Seq[SparkStrategy]((_: LogicalPlan) => Nil)
 
     def qe: QueryExecution = new QueryExecution(spark, OneRowRelation())
 
@@ -131,19 +128,13 @@ class QueryExecutionSuite extends SharedSQLContext {
     assert(qe.toString.contains("OneRowRelation"))
 
     // Throw an AnalysisException - this should be captured.
-    spark.experimental.extraStrategies = Seq(
-      new SparkStrategy {
-        override def apply(plan: LogicalPlan): Seq[SparkPlan] =
-          throw new AnalysisException("exception")
-      })
+    spark.experimental.extraStrategies = Seq[SparkStrategy](
+      (_: LogicalPlan) => throw new AnalysisException("exception"))
     assert(qe.toString.contains("org.apache.spark.sql.AnalysisException"))
 
     // Throw an Error - this should not be captured.
-    spark.experimental.extraStrategies = Seq(
-      new SparkStrategy {
-        override def apply(plan: LogicalPlan): Seq[SparkPlan] =
-          throw new Error("error")
-      })
+    spark.experimental.extraStrategies = Seq[SparkStrategy](
+      (_: LogicalPlan) => throw new Error("error"))
     val error = intercept[Error](qe.toString)
     assert(error.getMessage.contains("error"))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
index 784438cd43eb..3760539c1684 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.benchmark
 
-import java.util.{Arrays, Comparator}
+import java.util.Arrays
 
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.unsafe.array.LongArray
@@ -40,14 +40,9 @@ object SortBenchmark extends BenchmarkBase {
 
   private def referenceKeyPrefixSort(buf: LongArray, lo: Int, hi: Int, refCmp: PrefixComparator) {
     val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt)))
-    new Sorter(new UnsafeSortDataFormat(sortBuffer)).sort(
-      buf, lo, hi, new Comparator[RecordPointerAndKeyPrefix] {
-        override def compare(
-          r1: RecordPointerAndKeyPrefix,
-          r2: RecordPointerAndKeyPrefix): Int = {
-          refCmp.compare(r1.keyPrefix, r2.keyPrefix)
-        }
-      })
+    new Sorter(new UnsafeSortDataFormat(sortBuffer)).sort(buf, lo, hi,
+      (r1: RecordPointerAndKeyPrefix, r2: RecordPointerAndKeyPrefix) =>
+        refCmp.compare(r1.keyPrefix, r2.keyPrefix))
   }
 
   private def generateKeyPrefixTestData(size: Int, rand: => Long): (LongArray, LongArray) = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index da496837e7a1..89ce63635860 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -354,11 +354,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
     val listener = new StreamingQueryListener {
       override def onQueryStarted(event: QueryStartedEvent): Unit = {
         // Note: this assumes there is only one query active in the `testStream` method.
-        Thread.currentThread.setUncaughtExceptionHandler(new UncaughtExceptionHandler {
-          override def uncaughtException(t: Thread, e: Throwable): Unit = {
-            streamThreadDeathCause = e
-          }
-        })
+        Thread.currentThread.setUncaughtExceptionHandler(
+          (_: Thread, e: Throwable) => streamThreadDeathCause = e)
       }
 
       override def onQueryProgress(event: QueryProgressEvent): Unit = {}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 99dc0769a3d6..8fb1400a9b5a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
-import org.apache.spark.sql.streaming.{ProcessingTime => DeprecatedProcessingTime, _}
+import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamingQueryException, StreamTest}
 import org.apache.spark.sql.streaming.Trigger._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -104,9 +104,7 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider {
     LastOptions.parameters = parameters
     LastOptions.partitionColumns = partitionColumns
     LastOptions.mockStreamSinkProvider.createSink(spark, parameters, partitionColumns, outputMode)
-    new Sink {
-      override def addBatch(batchId: Long, data: DataFrame): Unit = {}
-    }
+    (_: Long, _: DataFrame) => {}
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
index 19ab2ff13e14..67158fb99d13 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
@@ -60,11 +60,7 @@ class BlockingSource extends StreamSourceProvider with StreamSinkProvider {
       spark: SQLContext,
       parameters: Map[String, String],
       partitionColumns: Seq[String],
-      outputMode: OutputMode): Sink = {
-    new Sink {
-      override def addBatch(batchId: Long, data: DataFrame): Unit = {}
-    }
-  }
+      outputMode: OutputMode): Sink = (_: Long, _: DataFrame) => {}
 }
 
 object BlockingSource {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index d9e4842e8fe9..327dca7de293 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -29,7 +29,7 @@ import org.apache.commons.lang3.StringUtils
 import org.apache.commons.logging.LogFactory
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.cli.{CliDriver, CliSessionState, OptionsProcessor}
-import org.apache.hadoop.hive.common.{HiveInterruptCallback, HiveInterruptUtils}
+import org.apache.hadoop.hive.common.HiveInterruptUtils
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.Driver
 import org.apache.hadoop.hive.ql.exec.Utilities
@@ -65,16 +65,14 @@ private[hive] object SparkSQLCLIDriver extends Logging {
    * a command is being processed by the current thread.
    */
   def installSignalHandler() {
-    HiveInterruptUtils.add(new HiveInterruptCallback {
-      override def interrupt() {
-        // Handle remote execution mode
-        if (SparkSQLEnv.sparkContext != null) {
-          SparkSQLEnv.sparkContext.cancelAllJobs()
-        } else {
-          if (transport != null) {
-            // Force closing of TCP connection upon session termination
-            transport.getSocket.close()
-          }
+    HiveInterruptUtils.add(() => {
+      // Handle remote execution mode
+      if (SparkSQLEnv.sparkContext != null) {
+        SparkSQLEnv.sparkContext.cancelAllJobs()
+      } else {
+        if (transport != null) {
+          // Force closing of TCP connection upon session termination
+          transport.getSocket.close()
         }
       }
     })
@@ -208,7 +206,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     reader.setBellEnabled(false)
     reader.setExpandEvents(false)
     // reader.setDebug(new PrintWriter(new FileWriter("writer.debug", true)))
-    CliDriver.getCommandCompleter.foreach((e) => reader.addCompleter(e))
+    CliDriver.getCommandCompleter.foreach(reader.addCompleter)
 
     val historyDirectory = System.getProperty("user.home")
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index c74ca1918a81..829254b2b065 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -255,9 +255,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
     }
   }
 
-  def numReceivers(): Int = {
-    receiverInputStreams.size
-  }
+  def numReceivers(): Int = receiverInputStreams.length
 
   /** Register a receiver */
   private def registerReceiver(
@@ -516,14 +514,12 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
         context.reply(successful)
       case AddBlock(receivedBlockInfo) =>
         if (WriteAheadLogUtils.isBatchingEnabled(ssc.conf, isDriver = true)) {
-          walBatchingThreadPool.execute(new Runnable {
-            override def run(): Unit = Utils.tryLogNonFatalError {
-              if (active) {
-                context.reply(addBlock(receivedBlockInfo))
-              } else {
-                context.sendFailure(
-                  new IllegalStateException("ReceiverTracker RpcEndpoint already shut down."))
-              }
+          walBatchingThreadPool.execute(() => Utils.tryLogNonFatalError {
+            if (active) {
+              context.reply(addBlock(receivedBlockInfo))
+            } else {
+              context.sendFailure(
+                new IllegalStateException("ReceiverTracker RpcEndpoint already shut down."))
             }
           })
         } else {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala
index 71b86d16866e..31e4c6b59a64 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala
@@ -135,18 +135,16 @@ private[util] class BatchedWriteAheadLog(val wrappedLog: WriteAheadLog, conf: Sp
 
   /** Start the actual log writer on a separate thread. */
   private def startBatchedWriterThread(): Thread = {
-    val thread = new Thread(new Runnable {
-      override def run(): Unit = {
-        while (active.get()) {
-          try {
-            flushRecords()
-          } catch {
-            case NonFatal(e) =>
-              logWarning("Encountered exception in Batched Writer Thread.", e)
-          }
+    val thread = new Thread(() => {
+      while (active.get()) {
+        try {
+          flushRecords()
+        } catch {
+          case NonFatal(e) =>
+            logWarning("Encountered exception in Batched Writer Thread.", e)
         }
-        logInfo("BatchedWriteAheadLog Writer thread exiting.")
       }
+      logInfo("BatchedWriteAheadLog Writer thread exiting.")
     }, "BatchedWriteAheadLog Writer")
     thread.setDaemon(true)
     thread.start()

From 57aff93886ac7d02b88294672ce0d2495b0942b8 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Tue, 2 Apr 2019 09:18:43 -0700
Subject: [PATCH 0830/1072] [SPARK-26998][CORE] Remove SSL configuration from
 executors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Different SSL passwords shown up as command line argument on executor side in standalone mode:
* keyStorePassword
* keyPassword
* trustStorePassword

In this PR I've removed SSL configurations from executors.

## How was this patch tested?

Existing + additional unit tests.
Additionally tested with standalone mode and checked the command line arguments:
```
[gaborsomogyi:~/spark] SPARK-26998(+4/-0,3)+ ± jps
94803 CoarseGrainedExecutorBackend
94818 Jps
90149 RemoteMavenServer
91925 Nailgun
94793 SparkSubmit
94680 Worker
94556 Master
398
[gaborsomogyi:~/spark] SPARK-26998(+4/-1,3)+ ± ps -ef | egrep "94556|94680|94793|94803"
  502 94556     1   0  2:02PM ttys007    0:07.39 /Library/Java/JavaVirtualMachines/jdk1.8.0_152.jdk/Contents/Home/bin/java -cp /Users/gaborsomogyi/spark/conf/:/Users/gaborsomogyi/spark/assembly/target/scala-2.12/jars/* -Xmx1g org.apache.spark.deploy.master.Master --host gsomogyi-MBP.local --port 7077 --webui-port 8080 --properties-file conf/spark-defaults.conf
  502 94680     1   0  2:02PM ttys007    0:07.27 /Library/Java/JavaVirtualMachines/jdk1.8.0_152.jdk/Contents/Home/bin/java -cp /Users/gaborsomogyi/spark/conf/:/Users/gaborsomogyi/spark/assembly/target/scala-2.12/jars/* -Xmx1g org.apache.spark.deploy.worker.Worker --webui-port 8081 --properties-file conf/spark-defaults.conf spark://gsomogyi-MBP.local:7077
  502 94793 94782   0  2:02PM ttys007    0:35.52 /Library/Java/JavaVirtualMachines/jdk1.8.0_152.jdk/Contents/Home/bin/java -cp /Users/gaborsomogyi/spark/conf/:/Users/gaborsomogyi/spark/assembly/target/scala-2.12/jars/* -Dscala.usejavacp=true -Xmx1g org.apache.spark.deploy.SparkSubmit --master spark://gsomogyi-MBP.local:7077 --class org.apache.spark.repl.Main --name Spark shell spark-shell
  502 94803 94680   0  2:03PM ttys007    0:05.20 /Library/Java/JavaVirtualMachines/jdk1.8.0_152.jdk/Contents/Home/bin/java -cp /Users/gaborsomogyi/spark/conf/:/Users/gaborsomogyi/spark/assembly/target/scala-2.12/jars/* -Xmx1024M -Dspark.ssl.ui.port=0 -Dspark.driver.port=60902 org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url spark://CoarseGrainedScheduler172.30.65.186:60902 --executor-id 0 --hostname 172.30.65.186 --cores 8 --app-id app-20190326140311-0000 --worker-url spark://Worker172.30.65.186:60899
  502 94910 57352   0  2:05PM ttys008    0:00.00 egrep 94556|94680|94793|94803
```

Closes #24170 from gaborgsomogyi/SPARK-26998.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 core/src/main/scala/org/apache/spark/SparkConf.scala  |  1 -
 .../test/scala/org/apache/spark/SparkConfSuite.scala  | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 8499246a6c9c..4117aea59b7e 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -738,7 +738,6 @@ private[spark] object SparkConf extends Logging {
    */
   def isExecutorStartupConf(name: String): Boolean = {
     (name.startsWith("spark.auth") && name != SecurityManager.SPARK_AUTH_SECRET_CONF) ||
-    name.startsWith("spark.ssl") ||
     name.startsWith("spark.rpc") ||
     name.startsWith("spark.network") ||
     isSparkPortConf(name)
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 4b2dd9a5b69f..9f759a5bf74b 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -353,6 +353,17 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     }
   }
 
+  test("SPARK-26998: SSL configuration not needed on executors") {
+    val conf = new SparkConf(false)
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ssl.keyPassword", "password")
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ssl.trustStorePassword", "password")
+
+    val filtered = conf.getAll.filter { case (k, _) => SparkConf.isExecutorStartupConf(k) }
+    assert(filtered.isEmpty)
+  }
+
   test("SPARK-27244 toDebugString redacts sensitive information") {
     val conf = new SparkConf(loadDefaults = false)
       .set("dummy.password", "dummy-password")

From 13c5c1fb4b88e15c6cd09e6e275a9fc03f3085bd Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 2 Apr 2019 15:38:26 -0500
Subject: [PATCH 0831/1072] [SPARK-27180][BUILD][YARN] Fix testing issues with
 yarn module in Hadoop-3

## What changes were proposed in this pull request?

Fix testing issues with `yarn` module in Hadoop-3:

1. Upgrade jersey-1 to `1.19` to fix ```Cause: java.lang.NoClassDefFoundError: com/sun/jersey/spi/container/servlet/ServletContainer```.
2. Copy `ServerSocketUtil` from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java to fix ```java.lang.NoClassDefFoundError: org/apache/hadoop/net/ServerSocketUtil```.
3. Adapte `SessionHandler` from jetty-9.3.25.v20180904/jetty-server/src/main/java/org/eclipse/jetty/server/session/SessionHandler.java  to fix ```java.lang.NoSuchMethodError: org.eclipse.jetty.server.session.SessionHandler.getSessionManager()Lorg/eclipse/jetty/server/SessionManager```.

## How was this patch tested?

manual tests:
```shell
build/sbt yarn/test -Pyarn
build/sbt yarn/test -Phadoop-3.2 -Pyarn

build/mvn -Dtest=none -DwildcardSuites=org.apache.spark.deploy.yarn.YarnClusterSuite -pl resource-managers/yarn test -Pyarn
build/mvn -Dtest=none -DwildcardSuites=org.apache.spark.deploy.yarn.YarnClusterSuite -pl resource-managers/yarn test -Pyarn -Phadoop-3.2
```

Closes #24115 from wangyum/hadoop3-yarn.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/.rat-excludes                             |   2 +
 resource-managers/yarn/pom.xml                |   8 +-
 .../apache/hadoop/net/ServerSocketUtil.java   | 132 ++++++++
 .../eclipse/jetty/server/SessionManager.java  | 290 ++++++++++++++++++
 .../jetty/server/session/SessionHandler.java  |  90 ++++++
 5 files changed, 521 insertions(+), 1 deletion(-)
 create mode 100644 resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java
 create mode 100644 resource-managers/yarn/src/test/java/org/eclipse/jetty/server/SessionManager.java
 create mode 100644 resource-managers/yarn/src/test/java/org/eclipse/jetty/server/session/SessionHandler.java

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 59e619ba109b..ccf266c3218d 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -115,3 +115,5 @@ structured-streaming/*
 kafka-source-initial-offset-version-2.1.0.bin
 kafka-source-initial-offset-future-version.bin
 vote.tmpl
+SessionManager.java
+SessionHandler.java
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index df910c159787..0e5df14e060d 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -29,7 +29,7 @@
   <name>Spark Project YARN</name>
   <properties>
     <sbt.project.name>yarn</sbt.project.name>
-    <jersey-1.version>1.9</jersey-1.version>
+    <jersey-1.version>1.19</jersey-1.version>
   </properties>
 
   <dependencies>
@@ -166,6 +166,12 @@
        <scope>test</scope>
        <version>${jersey-1.version}</version>
      </dependency>
+     <dependency>
+       <groupId>com.sun.jersey</groupId>
+       <artifactId>jersey-servlet</artifactId>
+       <scope>test</scope>
+       <version>${jersey-1.version}</version>
+     </dependency>
 
     <!-- These dependencies are duplicated from core, because dependencies in the "provided"
     scope are not transitive.-->
diff --git a/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java b/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java
new file mode 100644
index 000000000000..df0ebcc9871a
--- /dev/null
+++ b/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.net;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.ServerSocket;
+import java.util.Random;
+
+/**
+ * Copied from
+ * hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java
+ * for Hadoop-3.x testing
+ */
+public class ServerSocketUtil {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ServerSocketUtil.class);
+  private static Random rand = new Random();
+
+  /**
+   * Port scan & allocate is how most other apps find ports
+   *
+   * @param port given port
+   * @param retries number of retries
+   * @return
+   * @throws IOException
+   */
+  public static int getPort(int port, int retries) throws IOException {
+    int tryPort = port;
+    int tries = 0;
+    while (true) {
+      if (tries > 0 || tryPort == 0) {
+        tryPort = port + rand.nextInt(65535 - port);
+      }
+      if (tryPort == 0) {
+        continue;
+      }
+      try (ServerSocket s = new ServerSocket(tryPort)) {
+        LOG.info("Using port " + tryPort);
+        return tryPort;
+      } catch (IOException e) {
+        tries++;
+        if (tries >= retries) {
+          LOG.info("Port is already in use; giving up");
+          throw e;
+        } else {
+          LOG.info("Port is already in use; trying again");
+        }
+      }
+    }
+  }
+
+  /**
+   * Check whether port is available or not.
+   *
+   * @param port given port
+   * @return
+   */
+  private static boolean isPortAvailable(int port) {
+    try (ServerSocket s = new ServerSocket(port)) {
+      return true;
+    } catch (IOException e) {
+      return false;
+    }
+  }
+
+  /**
+   * Wait till the port available.
+   *
+   * @param port given port
+   * @param retries number of retries for given port
+   * @return
+   * @throws InterruptedException
+   * @throws IOException
+   */
+  public static int waitForPort(int port, int retries)
+          throws InterruptedException, IOException {
+    int tries = 0;
+    while (true) {
+      if (isPortAvailable(port)) {
+        return port;
+      } else {
+        tries++;
+        if (tries >= retries) {
+          throw new IOException(
+                  "Port is already in use; giving up after " + tries + " times.");
+        }
+        Thread.sleep(1000);
+      }
+    }
+  }
+
+  /**
+   * Find the specified number of unique ports available.
+   * The ports are all closed afterwards,
+   * so other network services started may grab those same ports.
+   *
+   * @param numPorts number of required port nubmers
+   * @return array of available port numbers
+   * @throws IOException
+   */
+  public static int[] getPorts(int numPorts) throws IOException {
+    ServerSocket[] sockets = new ServerSocket[numPorts];
+    int[] ports = new int[numPorts];
+    for (int i = 0; i < numPorts; i++) {
+      ServerSocket sock = new ServerSocket(0);
+      sockets[i] = sock;
+      ports[i] = sock.getLocalPort();
+    }
+    for (ServerSocket sock : sockets) {
+      sock.close();
+    }
+    return ports;
+  }
+}
diff --git a/resource-managers/yarn/src/test/java/org/eclipse/jetty/server/SessionManager.java b/resource-managers/yarn/src/test/java/org/eclipse/jetty/server/SessionManager.java
new file mode 100644
index 000000000000..bf89f8d7c804
--- /dev/null
+++ b/resource-managers/yarn/src/test/java/org/eclipse/jetty/server/SessionManager.java
@@ -0,0 +1,290 @@
+//
+//  ========================================================================
+//  Copyright (c) 1995-2018 Mort Bay Consulting Pty. Ltd.
+//  ------------------------------------------------------------------------
+//  All rights reserved. This program and the accompanying materials
+//  are made available under the terms of the Eclipse Public License v1.0
+//  and Apache License v2.0 which accompanies this distribution.
+//
+//      The Eclipse Public License is available at
+//      http://www.eclipse.org/legal/epl-v10.html
+//
+//      The Apache License v2.0 is available at
+//      http://www.opensource.org/licenses/apache2.0.php
+//
+//  You may elect to redistribute this code under either of these licenses.
+//  ========================================================================
+//
+
+package org.eclipse.jetty.server;
+
+import javax.servlet.SessionCookieConfig;
+import javax.servlet.SessionTrackingMode;
+import javax.servlet.http.Cookie;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpSession;
+import java.util.EventListener;
+import java.util.Set;
+
+import org.eclipse.jetty.http.HttpCookie;
+import org.eclipse.jetty.server.session.SessionHandler;
+import org.eclipse.jetty.util.component.LifeCycle;
+
+/**
+ * Adapted from https://github.com/eclipse/jetty.project/blob/jetty-9.3.25.v20180904/
+ *   jetty-server/src/main/java/org/eclipse/jetty/server/SessionManager.java
+ */
+public interface SessionManager extends LifeCycle {
+  /**
+   * Session cookie name.
+   * Defaults to <code>JSESSIONID</code>, but can be set with the
+   * <code>org.eclipse.jetty.servlet.SessionCookie</code> context init parameter.
+   */
+  String __SessionCookieProperty = "org.eclipse.jetty.servlet.SessionCookie";
+  String __DefaultSessionCookie = "JSESSIONID";
+
+  /**
+   * Session id path parameter name.
+   * Defaults to <code>jsessionid</code>, but can be set with the
+   * <code>org.eclipse.jetty.servlet.SessionIdPathParameterName</code> context init parameter.
+   * If set to null or "none" no URL rewriting will be done.
+   */
+  String __SessionIdPathParameterNameProperty =
+    "org.eclipse.jetty.servlet.SessionIdPathParameterName";
+  String __DefaultSessionIdPathParameterName = "jsessionid";
+  String __CheckRemoteSessionEncoding = "org.eclipse.jetty.servlet.CheckingRemoteSessionIdEncoding";
+
+  /**
+   * Session Domain.
+   * If this property is set as a ServletContext InitParam, then it is
+   * used as the domain for session cookies. If it is not set, then
+   * no domain is specified for the session cookie.
+   */
+  String __SessionDomainProperty = "org.eclipse.jetty.servlet.SessionDomain";
+  String __DefaultSessionDomain = null;
+
+  /**
+   * Session Path.
+   * If this property is set as a ServletContext InitParam, then it is
+   * used as the path for the session cookie.  If it is not set, then
+   * the context path is used as the path for the cookie.
+   */
+  String __SessionPathProperty = "org.eclipse.jetty.servlet.SessionPath";
+
+  /**
+   * Session Max Age.
+   * If this property is set as a ServletContext InitParam, then it is
+   * used as the max age for the session cookie.  If it is not set, then
+   * a max age of -1 is used.
+   */
+  String __MaxAgeProperty = "org.eclipse.jetty.servlet.MaxAge";
+
+  /**
+   * Returns the <code>HttpSession</code> with the given session id
+   *
+   * @param id the session id
+   * @return the <code>HttpSession</code> with the corresponding id
+   *         or null if no session with the given id exists
+   */
+  HttpSession getHttpSession(String id);
+
+  /**
+   * Creates a new <code>HttpSession</code>.
+   *
+   * @param request the HttpServletRequest containing the requested session id
+   * @return the new <code>HttpSession</code>
+   */
+  HttpSession newHttpSession(HttpServletRequest request);
+
+  /**
+   * @return true if session cookies should be HTTP-only (Microsoft extension)
+   * @see HttpCookie#isHttpOnly()
+   */
+  boolean getHttpOnly();
+
+  /**
+   * @return the max period of inactivity, after which the session is invalidated, in seconds.
+   * @see #setMaxInactiveInterval(int)
+   */
+  int getMaxInactiveInterval();
+
+  /**
+   * Sets the max period of inactivity, after which the session is invalidated, in seconds.
+   *
+   * @param seconds the max inactivity period, in seconds.
+   * @see #getMaxInactiveInterval()
+   */
+  void setMaxInactiveInterval(int seconds);
+
+  /**
+   * Sets the {@link SessionHandler}.
+   *
+   * @param handler the <code>SessionHandler</code> object
+   */
+  void setSessionHandler(SessionHandler handler);
+
+  /**
+   * Adds an event listener for session-related events.
+   *
+   * @param listener the session event listener to add
+   *                 Individual SessionManagers implementations may accept arbitrary listener types,
+   *                 but they are expected to at least handle HttpSessionActivationListener,
+   *                 HttpSessionAttributeListener,
+   *                 HttpSessionBindingListener and HttpSessionListener.
+   * @see #removeEventListener(EventListener)
+   */
+  void addEventListener(EventListener listener);
+
+  /**
+   * Removes an event listener for for session-related events.
+   *
+   * @param listener the session event listener to remove
+   * @see #addEventListener(EventListener)
+   */
+  void removeEventListener(EventListener listener);
+
+  /**
+   * Removes all event listeners for session-related events.
+   *
+   * @see #removeEventListener(EventListener)
+   */
+  void clearEventListeners();
+
+  /**
+   * Gets a Cookie for a session.
+   *
+   * @param session         the session to which the cookie should refer.
+   * @param contextPath     the context to which the cookie should be linked.
+   *                        The client will only send the cookie value when
+   *                        requesting resources under this path.
+   * @param requestIsSecure whether the client is accessing the server over
+   *                        a secure protocol (i.e. HTTPS).
+   * @return if this <code>SessionManager</code> uses cookies, then this method will return a new
+   *         {@link Cookie cookie object} that should be set on the client
+   *         in order to link future HTTP requests
+   *         with the <code>session</code>. If cookies are not in use,
+   *         this method returns <code>null</code>.
+   */
+  HttpCookie getSessionCookie(HttpSession session, String contextPath, boolean requestIsSecure);
+
+  /**
+   * @return the cross context session id manager.
+   * @see #setSessionIdManager(SessionIdManager)
+   */
+  SessionIdManager getSessionIdManager();
+
+  /**
+   * @return the cross context session id manager.
+   * @deprecated use {@link #getSessionIdManager()}
+   */
+  @Deprecated
+  SessionIdManager getMetaManager();
+
+  /**
+   * Sets the cross context session id manager
+   *
+   * @param idManager the cross context session id manager.
+   * @see #getSessionIdManager()
+   */
+  void setSessionIdManager(SessionIdManager idManager);
+
+  /**
+   * @param session the session to test for validity
+   * @return whether the given session is valid, that is, it has not been invalidated.
+   */
+  boolean isValid(HttpSession session);
+
+  /**
+   * @param session the session object
+   * @return the unique id of the session within the cluster, extended with an optional node id.
+   * @see #getClusterId(HttpSession)
+   */
+  String getNodeId(HttpSession session);
+
+  /**
+   * @param session the session object
+   * @return the unique id of the session within the cluster (without a node id extension)
+   * @see #getNodeId(HttpSession)
+   */
+  String getClusterId(HttpSession session);
+
+  /**
+   * Called by the {@link SessionHandler} when a session is first accessed by a request.
+   *
+   * @param session the session object
+   * @param secure  whether the request is secure or not
+   * @return the session cookie. If not null,
+   *         this cookie should be set on the response to either migrate
+   *         the session or to refresh a session cookie that may expire.
+   * @see #complete(HttpSession)
+   */
+  HttpCookie access(HttpSession session, boolean secure);
+
+  /**
+   * Called by the {@link SessionHandler} when a session is last accessed by a request.
+   *
+   * @param session the session object
+   * @see #access(HttpSession, boolean)
+   */
+  void complete(HttpSession session);
+
+  /**
+   * Sets the session id URL path parameter name.
+   *
+   * @param parameterName the URL path parameter name
+   *                      for session id URL rewriting (null or "none" for no rewriting).
+   * @see #getSessionIdPathParameterName()
+   * @see #getSessionIdPathParameterNamePrefix()
+   */
+  void setSessionIdPathParameterName(String parameterName);
+
+  /**
+   * @return the URL path parameter name for session id URL rewriting, by default "jsessionid".
+   * @see #setSessionIdPathParameterName(String)
+   */
+  String getSessionIdPathParameterName();
+
+  /**
+   * @return a formatted version of {@link #getSessionIdPathParameterName()}, by default
+   *         ";" + sessionIdParameterName + "=", for easier lookup in URL strings.
+   * @see #getSessionIdPathParameterName()
+   */
+  String getSessionIdPathParameterNamePrefix();
+
+  /**
+   * @return whether the session management is handled via cookies.
+   */
+  boolean isUsingCookies();
+
+  /**
+   * @return whether the session management is handled via URLs.
+   */
+  boolean isUsingURLs();
+
+  Set<SessionTrackingMode> getDefaultSessionTrackingModes();
+
+  Set<SessionTrackingMode> getEffectiveSessionTrackingModes();
+
+  void setSessionTrackingModes(Set<SessionTrackingMode> sessionTrackingModes);
+
+  SessionCookieConfig getSessionCookieConfig();
+
+  /**
+   * @return True if absolute URLs are check for remoteness before being session encoded.
+   */
+  boolean isCheckingRemoteSessionIdEncoding();
+
+  /**
+   * @param remote True if absolute URLs are check for remoteness before being session encoded.
+   */
+  void setCheckingRemoteSessionIdEncoding(boolean remote);
+
+  /** Change the existing session id.
+   *
+   * @param oldClusterId the old cluster id
+   * @param oldNodeId the old node id
+   * @param newClusterId the new cluster id
+   * @param newNodeId the new node id
+   */
+  void renewSessionId(String oldClusterId, String oldNodeId, String newClusterId, String newNodeId);
+}
diff --git a/resource-managers/yarn/src/test/java/org/eclipse/jetty/server/session/SessionHandler.java b/resource-managers/yarn/src/test/java/org/eclipse/jetty/server/session/SessionHandler.java
new file mode 100644
index 000000000000..f8bcf8de82f6
--- /dev/null
+++ b/resource-managers/yarn/src/test/java/org/eclipse/jetty/server/session/SessionHandler.java
@@ -0,0 +1,90 @@
+//
+//  ========================================================================
+//  Copyright (c) 1995-2018 Mort Bay Consulting Pty. Ltd.
+//  ------------------------------------------------------------------------
+//  All rights reserved. This program and the accompanying materials
+//  are made available under the terms of the Eclipse Public License v1.0
+//  and Apache License v2.0 which accompanies this distribution.
+//
+//      The Eclipse Public License is available at
+//      http://www.eclipse.org/legal/epl-v10.html
+//
+//      The Apache License v2.0 is available at
+//      http://www.opensource.org/licenses/apache2.0.php
+//
+//  You may elect to redistribute this code under either of these licenses.
+//  ========================================================================
+//
+
+package org.eclipse.jetty.server.session;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import java.io.IOException;
+
+import org.eclipse.jetty.server.Request;
+import org.eclipse.jetty.server.SessionManager;
+import org.eclipse.jetty.server.handler.ScopedHandler;
+
+/**
+ * Adapted from https://github.com/eclipse/jetty.project/blob/jetty-9.3.25.v20180904/
+ *   jetty-server/src/main/java/org/eclipse/jetty/server/session/SessionHandler.java
+ */
+public class SessionHandler extends ScopedHandler {
+  private SessionManager _sessionManager;
+
+  public SessionHandler() {
+  }
+
+  /**
+   * @param manager
+   *            The session manager
+   */
+  public SessionHandler(SessionManager manager) {
+    setSessionManager(manager);
+  }
+
+  /**
+   * @return Returns the sessionManager.
+   */
+  public SessionManager getSessionManager() {
+    return _sessionManager;
+  }
+
+  /**
+   * @param sessionManager
+   *            The sessionManager to set.
+   */
+  public void setSessionManager(SessionManager sessionManager) {
+    if (isStarted()) {
+      throw new IllegalStateException();
+    }
+    if (sessionManager != null) {
+      updateBean(_sessionManager,sessionManager);
+      _sessionManager=sessionManager;
+    }
+  }
+
+  /*
+   * @see org.eclipse.jetty.server.Handler#handle(javax.servlet.http.HttpServletRequest,
+   * javax.servlet.http.HttpServletResponse, int)
+   */
+  @Override
+  public void doHandle(String target, Request baseRequest, HttpServletRequest request,
+      HttpServletResponse response) throws IOException, ServletException {
+    // start manual inline of nextHandle(target,baseRequest,request,response);
+    if (_nextScope != null && _nextScope == _handler) {
+      _nextScope.doHandle(target,baseRequest,request,response);
+    } else if (_handler != null) {
+      _handler.handle(target,baseRequest,request,response);
+      // end manual inline
+    }
+  }
+
+  public void clearEventListeners() {
+    if (_sessionManager != null) {
+      _sessionManager.clearEventListeners();
+    }
+  }
+}

From 949d71283932ba4ce50aa6b329665e0f8be7ecf1 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 3 Apr 2019 08:27:41 +0900
Subject: [PATCH 0832/1072] [SPARK-27346][SQL] Loosen the newline assert
 condition on 'examples' field in ExpressionInfo

## What changes were proposed in this pull request?

I haven't tested by myself on Windows and I am not 100% sure if this is going to cause an actual problem.

However, this one line:

https://github.com/apache/spark/blob/827383a97c11a61661440ff86ce0c3382a2a23b2/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java#L82

made me to investigate a lot today.

Given my speculation, if Spark is built in Linux and it's executed on Windows, it looks possible for multiline strings, like,

https://github.com/apache/spark/blob/5264164a67df498b73facae207eda12ee133be7d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala#L146-L150

to throw an exception because the newline in the binary is `\n` but `System.lineSeparator` returns `\r\n`.

I think this is not yet found because this particular codes are not released yet (see SPARK-26426).

Looks just better to loosen the condition and forget about this stuff.

This should be backported into branch-2.4 as well.

## How was this patch tested?

N/A

Closes #24274 from HyukjinKwon/SPARK-27346.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../apache/spark/sql/catalyst/expressions/ExpressionInfo.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index d5a1b77c0ec8..20bdd874fd5a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -79,7 +79,7 @@ public ExpressionInfo(
         assert name != null;
         assert arguments != null;
         assert examples != null;
-        assert examples.isEmpty() || examples.startsWith(System.lineSeparator() + "    Examples:");
+        assert examples.isEmpty() || examples.contains("    Examples:");
         assert note != null;
         assert since != null;
 

From d7dd59a6b4b734284b157af1191af337ceb1d256 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 3 Apr 2019 08:30:24 +0900
Subject: [PATCH 0833/1072] [SPARK-26224][SQL][PYTHON][R][FOLLOW-UP] Add notes
 about many projects in withColumn at SparkR and PySpark as well

## What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/23285. This PR adds the notes into PySpark and SparkR documentation as well.

While I am here, I revised the doc a bit to make it sound a bit more neutral

## How was this patch tested?

Manually built the doc and verified.

Closes #24272 from HyukjinKwon/SPARK-26224.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 R/pkg/R/DataFrame.R                                       | 5 +++++
 python/pyspark/sql/dataframe.py                           | 5 +++++
 .../src/main/scala/org/apache/spark/sql/Dataset.scala     | 8 ++++----
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 014ba285bab0..774a2b2202cc 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2143,6 +2143,11 @@ setMethod("selectExpr",
 #' Return a new SparkDataFrame by adding a column or replacing the existing column
 #' that has the same name.
 #'
+#' Note: This method introduces a projection internally. Therefore, calling it multiple times,
+#' for instance, via loops in order to add multiple columns can generate big plans which
+#' can cause performance issues and even \code{StackOverflowException}. To avoid this,
+#' use \code{select} with the multiple columns at once.
+#'
 #' @param x a SparkDataFrame.
 #' @param colName a column name.
 #' @param col a Column expression (which must refer only to this SparkDataFrame), or an atomic
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 58d74f5d7d9b..659cbc4487d1 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1974,6 +1974,11 @@ def withColumn(self, colName, col):
         :param colName: string, name of the new column.
         :param col: a :class:`Column` expression for the new column.
 
+        .. note:: This method introduces a projection internally. Therefore, calling it multiple
+            times, for instance, via loops in order to add multiple columns can generate big
+            plans which can cause performance issues and even `StackOverflowException`.
+            To avoid this, use :func:`select` with the multiple columns at once.
+
         >>> df.withColumn('age2', df.age + 2).collect()
         [Row(age=2, name=u'Alice', age2=4), Row(age=5, name=u'Bob', age2=7)]
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 477e87325072..05015f8dcdd5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2151,10 +2151,10 @@ class Dataset[T] private[sql](
    * `column`'s expression must only refer to attributes supplied by this Dataset. It is an
    * error to add a column that refers to some other Dataset.
    *
-   * Please notice that this method introduces a `Project`. This means that using it in loops in
-   * order to add several columns can generate very big plans which can cause huge performance
-   * issues and even `StackOverflowException`s. A much better alternative use `select` with the
-   * list of columns to add.
+   * @note this method introduces a projection internally. Therefore, calling it multiple times,
+   * for instance, via loops in order to add multiple columns can generate big plans which
+   * can cause performance issues and even `StackOverflowException`. To avoid this,
+   * use `select` with the multiple columns at once.
    *
    * @group untypedrel
    * @since 2.0.0

From 3628242bd05e181f5f1aacd072cd005b6ceec65c Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Tue, 2 Apr 2019 18:34:40 -0500
Subject: [PATCH 0834/1072] [MINOR][DSTREAMS] Add DStreamCheckpointData.cleanup
 warning if delete returns false

## What changes were proposed in this pull request?

While I was reviewing #24235 I've found a minor addition possibility. Namely `FileSystem.delete` returns a boolean which is not yet checked. In this PR I've added a warning message when it returns false. I've added this as MINOR because no control flow change introduced.

## How was this patch tested?

Existing unit tests.

Closes #24263 from gaborgsomogyi/SPARK-27301-minor.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/streaming/dstream/DStreamCheckpointData.scala    | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
index ebfaa83c704b..b35f7d97233e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -87,9 +87,12 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
               if (fileSystem == null) {
                 fileSystem = path.getFileSystem(dstream.ssc.sparkContext.hadoopConfiguration)
               }
-              fileSystem.delete(path, true)
+              if (fileSystem.delete(path, true)) {
+                logInfo("Deleted checkpoint file '" + file + "' for time " + time)
+              } else {
+                logWarning(s"Error deleting old checkpoint file '$file' for time $time")
+              }
               timeToCheckpointFile -= time
-              logInfo("Deleted checkpoint file '" + file + "' for time " + time)
             } catch {
               case e: Exception =>
                 logWarning("Error deleting old checkpoint file '" + file + "' for time " + time, e)

From b8b5acdd417f28c60c784159253a8974fa738904 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Wed, 3 Apr 2019 09:56:27 +0800
Subject: [PATCH 0835/1072] [SPARK-19712][SQL][FOLLOW-UP] Don't do partial
 pushdown when pushing down LeftAnti joins below Aggregate or Window
 operators.

## What changes were proposed in this pull request?
After [23750](https://github.com/apache/spark/pull/23750), we may pushdown left anti joins below aggregate and window operators with a partial join condition. This is not correct and was pointed out by hvanhovell and cloud-fan [here](https://github.com/apache/spark/pull/23750#discussion_r270017097). This pr addresses their comments.
## How was this patch tested?
Added two new tests to verify the behaviour.

Closes #24253 from dilipbiswal/SPARK-19712-followup.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/PushDownLeftSemiAntiJoin.scala  | 35 +++++++++++++--
 .../LeftSemiAntiJoinPushDownSuite.scala       | 44 +++++++++++++++++--
 2 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
index bc868df3dbb0..afe2cfa81ffe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
@@ -82,7 +82,18 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
           val newAgg = agg.copy(child = Join(agg.child, rightOp, joinType, Option(replaced), hint))
           // If there is no more filter to stay up, just return the Aggregate over Join.
           // Otherwise, create "Filter(stayUp) <- Aggregate <- Join(pushDownPredicate)".
-          if (stayUp.isEmpty) newAgg else Filter(stayUp.reduce(And), newAgg)
+          if (stayUp.isEmpty) {
+            newAgg
+          } else {
+            joinType match {
+              // In case of Left semi join, the part of the join condition which does not refer to
+              // to child attributes of the aggregate operator are kept as a Filter over window.
+              case LeftSemi => Filter(stayUp.reduce(And), newAgg)
+              // In case of left anti join, the join is pushed down when the entire join condition
+              // is eligible to be pushed down to preserve the semantics of left anti join.
+              case _ => join
+            }
+          }
         } else {
           // The join condition is not a subset of the Aggregate's GROUP BY columns,
           // no push down.
@@ -114,7 +125,18 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
         if (pushDown.nonEmpty && rightOpColumns.isEmpty) {
           val predicate = pushDown.reduce(And)
           val newPlan = w.copy(child = Join(w.child, rightOp, joinType, Option(predicate), hint))
-          if (stayUp.isEmpty) newPlan else Filter(stayUp.reduce(And), newPlan)
+          if (stayUp.isEmpty) {
+            newPlan
+          } else {
+            joinType match {
+              // In case of Left semi join, the part of the join condition which does not refer to
+              // to partition attributes of the window operator are kept as a Filter over window.
+              case LeftSemi => Filter(stayUp.reduce(And), newPlan)
+              // In case of left anti join, the join is pushed down when the entire join condition
+              // is eligible to be pushed down to preserve the semantics of left anti join.
+              case _ => join
+            }
+          }
         } else {
           // The join condition is not a subset of the Window's PARTITION BY clause,
           // no push down.
@@ -184,7 +206,14 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
       if (pushDown.nonEmpty && rightOpColumns.isEmpty) {
         val newChild = insertJoin(Option(pushDown.reduceLeft(And)))
         if (stayUp.nonEmpty) {
-          Filter(stayUp.reduceLeft(And), newChild)
+          join.joinType match {
+            // In case of Left semi join, the part of the join condition which does not refer to
+            // to attributes of the grandchild are kept as a Filter over window.
+            case LeftSemi => Filter(stayUp.reduce(And), newChild)
+            // In case of left anti join, the join is pushed down when the entire join condition
+            // is eligible to be pushed down to preserve the semantics of left anti join.
+            case _ => join
+          }
         } else {
           newChild
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
index 1a0231ed2d99..185568d334ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
@@ -117,7 +117,7 @@ class LeftSemiPushdownSuite extends PlanTest {
     comparePlans(optimized, originalQuery.analyze)
   }
 
-  test("Aggregate: LeftSemiAnti join partial pushdown") {
+  test("Aggregate: LeftSemi join partial pushdown") {
     val originalQuery = testRelation
       .groupBy('b)('b, sum('c).as('sum))
       .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'sum === 10))
@@ -132,6 +132,15 @@ class LeftSemiPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("Aggregate: LeftAnti join no pushdown") {
+    val originalQuery = testRelation
+      .groupBy('b)('b, sum('c).as('sum))
+      .join(testRelation1, joinType = LeftAnti, condition = Some('b === 'd && 'sum === 10))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+
   test("LeftSemiAnti join over aggregate - no pushdown") {
     val originalQuery = testRelation
       .groupBy('b)('b, sum('c).as('sum))
@@ -174,7 +183,7 @@ class LeftSemiPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Window: LeftSemiAnti partial pushdown") {
+  test("Window: LeftSemi partial pushdown") {
     // Attributes from join condition which does not refer to the window partition spec
     // are kept up in the plan as a Filter operator above Window.
     val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
@@ -195,6 +204,25 @@ class LeftSemiPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("Window: LeftAnti no pushdown") {
+    // Attributes from join condition which does not refer to the window partition spec
+    // are kept up in the plan as a Filter operator above Window.
+    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+
+    val originalQuery = testRelation
+      .select('a, 'b, 'c, winExpr.as('window))
+      .join(testRelation1, joinType = LeftAnti, condition = Some('a === 'd && 'b > 5))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    val correctAnswer = testRelation
+      .select('a, 'b, 'c)
+      .window(winExpr.as('window) :: Nil, 'a :: Nil, 'b.asc :: Nil)
+      .join(testRelation1, joinType = LeftAnti, condition = Some('a === 'd && 'b > 5))
+      .select('a, 'b, 'c, 'window).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("Union: LeftSemiAnti join pushdown") {
     val testRelation2 = LocalRelation('x.int, 'y.int, 'z.int)
 
@@ -251,7 +279,7 @@ class LeftSemiPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Unary: LeftSemiAnti join pushdown - partial pushdown") {
+  test("Unary: LeftSemi join pushdown - partial pushdown") {
     val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
     val originalQuery = testRelationWithArrayType
       .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
@@ -267,6 +295,16 @@ class LeftSemiPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("Unary: LeftAnti join pushdown - no pushdown") {
+    val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
+    val originalQuery = testRelationWithArrayType
+      .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
+      .join(testRelation1, joinType = LeftAnti, condition = Some('b === 'd && 'b === 'out_col))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+
   test("Unary: LeftSemiAnti join pushdown - no pushdown") {
     val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
     val originalQuery = testRelationWithArrayType

From 1d20d13149140f53df307f47420740f45b4fa5f6 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 3 Apr 2019 10:55:56 +0800
Subject: [PATCH 0836/1072] [SPARK-25496][SQL] Deprecate from_utc_timestamp and
 to_utc_timestamp

## What changes were proposed in this pull request?

In the PR, I propose to deprecate the `from_utc_timestamp()` and `to_utc_timestamp`, and disable them by default. The functions can be enabled back via the SQL config `spark.sql.legacy.utcTimestampFunc.enabled`. By default, any calls of the functions throw an analysis exception.

One of the reason for deprecation is functions violate semantic of `TimestampType` which is number of microseconds since epoch in UTC time zone. Shifting microseconds since epoch by time zone offset doesn't make sense because the result doesn't represent microseconds since epoch in UTC time zone any more, and cannot be considered as `TimestampType`.

## How was this patch tested?

The changes were tested by `DateExpressionsSuite` and `DateFunctionsSuite`.

Closes #24195 from MaxGekk/conv-utc-timestamp-deprecate.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/R/functions.R                           |   2 +
 R/pkg/tests/fulltests/test_sparkSQL.R         |  18 ++-
 python/pyspark/sql/functions.py               |  10 ++
 .../expressions/datetimeExpressions.scala     |  12 ++
 .../apache/spark/sql/internal/SQLConf.scala   |   8 ++
 .../expressions/CodeGenerationSuite.scala     |  61 +++++-----
 .../expressions/DateExpressionsSuite.scala    |  63 ++++++----
 .../org/apache/spark/sql/functions.scala      |   4 +
 .../apache/spark/sql/DateFunctionsSuite.scala | 112 ++++++++++--------
 .../benchmark/DateTimeBenchmark.scala         |   9 +-
 .../streaming/StreamingAggregationSuite.scala |  99 ++++++++--------
 11 files changed, 246 insertions(+), 152 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index d91896a9a994..0566a47cc875 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2459,6 +2459,7 @@ setMethod("schema_of_csv", signature(x = "characterOrColumn"),
 #' @note from_utc_timestamp since 1.5.0
 setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
+            .Deprecated(msg = "from_utc_timestamp is deprecated. See SPARK-25496.")
             jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
             column(jc)
           })
@@ -2517,6 +2518,7 @@ setMethod("next_day", signature(y = "Column", x = "character"),
 #' @note to_utc_timestamp since 1.5.0
 setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
+            .Deprecated(msg = "to_utc_timestamp is deprecated. See SPARK-25496.")
             jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
             column(jc)
           })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 2394f7414284..fdc747482065 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1905,10 +1905,20 @@ test_that("date functions on a DataFrame", {
   df2 <- createDataFrame(l2)
   expect_equal(collect(select(df2, minute(df2$b)))[, 1], c(34, 24))
   expect_equal(collect(select(df2, second(df2$b)))[, 1], c(0, 34))
-  expect_equal(collect(select(df2, from_utc_timestamp(df2$b, "JST")))[, 1],
-               c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
-  expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
-               c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
+  conf <- callJMethod(sparkSession, "conf")
+  isUtcTimestampFuncEnabled <- callJMethod(conf, "get", "spark.sql.legacy.utcTimestampFunc.enabled")
+  callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", "true")
+  tryCatch({
+    # Both from_utc_timestamp and to_utc_timestamp are deprecated as of SPARK-25496
+    expect_equal(suppressWarnings(collect(select(df2, from_utc_timestamp(df2$b, "JST"))))[, 1],
+                 c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
+    expect_equal(suppressWarnings(collect(select(df2, to_utc_timestamp(df2$b, "JST"))))[, 1],
+                 c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
+  },
+  finally = {
+    # Reverting the conf back
+    callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", isUtcTimestampFuncEnabled)
+  })
   expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 6ae23576e7bc..22163f52b472 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1306,7 +1306,10 @@ def from_utc_timestamp(timestamp, tz):
     [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
     >>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
     [Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
+
+    .. note:: Deprecated in 3.0. See SPARK-25496
     """
+    warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
     sc = SparkContext._active_spark_context
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
@@ -1340,7 +1343,10 @@ def to_utc_timestamp(timestamp, tz):
     [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
     >>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
     [Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
+
+    .. note:: Deprecated in 3.0. See SPARK-25496
     """
+    warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
     sc = SparkContext._active_spark_context
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
@@ -3191,9 +3197,13 @@ def _test():
     globs['sc'] = sc
     globs['spark'] = spark
     globs['df'] = spark.createDataFrame([Row(name='Alice', age=2), Row(name='Bob', age=5)])
+
+    spark.conf.set("spark.sql.legacy.utcTimestampFunc.enabled", "true")
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.functions, globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    spark.conf.unset("spark.sql.legacy.utcTimestampFunc.enabled")
+
     spark.stop()
     if failure_count:
         sys.exit(-1)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 3cda9899aa5c..784425e29739 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -26,11 +26,13 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringEscapeUtils
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
@@ -1021,6 +1023,11 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
 case class FromUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
+  if (!SQLConf.get.utcTimestampFuncEnabled) {
+    throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0." +
+      s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
+  }
+
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
   override def dataType: DataType = TimestampType
   override def prettyName: String = "from_utc_timestamp"
@@ -1227,6 +1234,11 @@ case class MonthsBetween(
 case class ToUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
+  if (!SQLConf.get.utcTimestampFuncEnabled) {
+    throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0. " +
+      s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
+  }
+
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
   override def dataType: DataType = TimestampType
   override def prettyName: String = "to_utc_timestamp"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 411805e63287..71a49c2657c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1723,6 +1723,12 @@ object SQLConf {
       "and java.sql.Date are used for the same purpose.")
     .booleanConf
     .createWithDefault(false)
+
+  val UTC_TIMESTAMP_FUNC_ENABLED = buildConf("spark.sql.legacy.utcTimestampFunc.enabled")
+    .doc("The configuration property enables the to_utc_timestamp() " +
+         "and from_utc_timestamp() functions.")
+    .booleanConf
+    .createWithDefault(false)
 }
 
 /**
@@ -1916,6 +1922,8 @@ class SQLConf extends Serializable with Logging {
 
   def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_ENABLED)
 
+  def utcTimestampFuncEnabled: Boolean = getConf(UTC_TIMESTAMP_FUNC_ENABLED)
+
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
    * identifiers are equal.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 7d656fc57d75..4e64313da136 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
 
-import org.apache.log4j.{Appender, AppenderSkeleton, Logger}
+import org.apache.log4j.AppenderSkeleton
 import org.apache.log4j.spi.LoggingEvent
 
 import org.apache.spark.SparkFunSuite
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ThreadUtils
@@ -189,36 +190,42 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-17702: split wide constructor into blocks due to JVM code size limit") {
-    val length = 5000
-    val expressions = Seq.fill(length) {
-      ToUTCTimestamp(
-        Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
-        Literal.create("PST", StringType))
-    }
-    val plan = GenerateMutableProjection.generate(expressions)
-    val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
-    val expected = Seq.fill(length)(
-      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
-
-    if (actual != expected) {
-      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val length = 5000
+      val expressions = Seq.fill(length) {
+        ToUTCTimestamp(
+          Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
+          Literal.create("PST", StringType))
+      }
+      val plan = GenerateMutableProjection.generate(expressions)
+      val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
+      val expected = Seq.fill(length)(
+        DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
+
+      if (actual != expected) {
+        fail(
+          s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+      }
     }
   }
 
   test("SPARK-22226: group splitted expressions into one method per nested class") {
-    val length = 10000
-    val expressions = Seq.fill(length) {
-      ToUTCTimestamp(
-        Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
-        Literal.create("PST", StringType))
-    }
-    val plan = GenerateMutableProjection.generate(expressions)
-    val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
-    val expected = Seq.fill(length)(
-      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
-
-    if (actual != expected) {
-      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val length = 10000
+      val expressions = Seq.fill(length) {
+        ToUTCTimestamp(
+          Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
+          Literal.create("PST", StringType))
+      }
+      val plan = GenerateMutableProjection.generate(expressions)
+      val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
+      val expected = Seq.fill(length)(
+        DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
+
+      if (actual != expected) {
+        fail(
+          s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+      }
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 8823fe779406..bc2c575cb023 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -25,10 +25,12 @@ import java.util.concurrent.TimeUnit
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -816,21 +818,29 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           NonFoldableLiteral.create(tz, StringType)),
         if (expected != null) Timestamp.valueOf(expected) else null)
     }
-    test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
-    test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
-    test(null, "UTC", null)
-    test("2015-07-24 00:00:00", null, null)
-    test(null, null, null)
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
+      test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
+      test(null, "UTC", null)
+      test("2015-07-24 00:00:00", null, null)
+      test(null, null, null)
+    }
+    val msg = intercept[AnalysisException] {
+      test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
+    }.getMessage
+    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
   }
 
   test("to_utc_timestamp - invalid time zone id") {
-    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
-      val msg = intercept[java.time.DateTimeException] {
-        GenerateUnsafeProjection.generate(
-          ToUTCTimestamp(
-            Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
-      }.getMessage
-      assert(msg.contains(invalidTz))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+        val msg = intercept[java.time.DateTimeException] {
+          GenerateUnsafeProjection.generate(
+            ToUTCTimestamp(
+              Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
+        }.getMessage
+        assert(msg.contains(invalidTz))
+      }
     }
   }
 
@@ -847,19 +857,28 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           NonFoldableLiteral.create(tz, StringType)),
         if (expected != null) Timestamp.valueOf(expected) else null)
     }
-    test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
-    test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
-    test(null, "UTC", null)
-    test("2015-07-24 00:00:00", null, null)
-    test(null, null, null)
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
+      test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
+      test(null, "UTC", null)
+      test("2015-07-24 00:00:00", null, null)
+      test(null, null, null)
+    }
+    val msg = intercept[AnalysisException] {
+      test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
+    }.getMessage
+    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
   }
 
   test("from_utc_timestamp - invalid time zone id") {
-    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
-      val msg = intercept[java.time.DateTimeException] {
-        GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
-      }.getMessage
-      assert(msg.contains(invalidTz))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+        val msg = intercept[java.time.DateTimeException] {
+          GenerateUnsafeProjection.generate(
+            FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
+        }.getMessage
+        assert(msg.contains(invalidTz))
+      }
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index d6d1f6aee50b..d0be216e6a2f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2988,6 +2988,7 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
+  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def from_utc_timestamp(ts: Column, tz: String): Column = withExpr {
     FromUTCTimestamp(ts.expr, Literal(tz))
   }
@@ -2999,6 +3000,7 @@ object functions {
    * @group datetime_funcs
    * @since 2.4.0
    */
+  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def from_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
     FromUTCTimestamp(ts.expr, tz.expr)
   }
@@ -3017,6 +3019,7 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
+  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def to_utc_timestamp(ts: Column, tz: String): Column = withExpr {
     ToUTCTimestamp(ts.expr, Literal(tz))
   }
@@ -3028,6 +3031,7 @@ object functions {
    * @group datetime_funcs
    * @since 2.4.0
    */
+  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def to_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
     ToUTCTimestamp(ts.expr, tz.expr)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index a9435e01ef8f..5ad1cb32d003 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -675,33 +675,41 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
       (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
     ).toDF("a", "b")
-    checkAnswer(
-      df.select(from_utc_timestamp(col("a"), "PST")),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-23 17:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-    checkAnswer(
-      df.select(from_utc_timestamp(col("b"), "PST")),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-23 17:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      checkAnswer(
+        df.select(from_utc_timestamp(col("a"), "PST")),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+      checkAnswer(
+        df.select(from_utc_timestamp(col("b"), "PST")),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    }
+    val msg = intercept[AnalysisException] {
+      df.select(from_utc_timestamp(col("a"), "PST")).collect()
+    }.getMessage
+    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
   }
 
   test("from_utc_timestamp with column zone") {
-    val df = Seq(
-      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"),
-      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST")
-    ).toDF("a", "b", "c")
-    checkAnswer(
-      df.select(from_utc_timestamp(col("a"), col("c"))),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 02:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-    checkAnswer(
-      df.select(from_utc_timestamp(col("b"), col("c"))),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 02:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val df = Seq(
+        (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"),
+        (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST")
+      ).toDF("a", "b", "c")
+      checkAnswer(
+        df.select(from_utc_timestamp(col("a"), col("c"))),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 02:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+      checkAnswer(
+        df.select(from_utc_timestamp(col("b"), col("c"))),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 02:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    }
   }
 
   test("to_utc_timestamp with literal zone") {
@@ -709,32 +717,40 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
       (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
     ).toDF("a", "b")
-    checkAnswer(
-      df.select(to_utc_timestamp(col("a"), "PST")),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-        Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
-    checkAnswer(
-      df.select(to_utc_timestamp(col("b"), "PST")),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-        Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      checkAnswer(
+        df.select(to_utc_timestamp(col("a"), "PST")),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+          Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+      checkAnswer(
+        df.select(to_utc_timestamp(col("b"), "PST")),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+          Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+    }
+    val msg = intercept[AnalysisException] {
+      df.select(to_utc_timestamp(col("a"), "PST")).collect()
+    }.getMessage
+    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
   }
 
   test("to_utc_timestamp with column zone") {
-    val df = Seq(
-      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"),
-      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET")
-    ).toDF("a", "b", "c")
-    checkAnswer(
-      df.select(to_utc_timestamp(col("a"), col("c"))),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
-    checkAnswer(
-      df.select(to_utc_timestamp(col("b"), col("c"))),
-      Seq(
-        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-        Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val df = Seq(
+        (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"),
+        (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET")
+      ).toDF("a", "b", "c")
+      checkAnswer(
+        df.select(to_utc_timestamp(col("a"), col("c"))),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+      checkAnswer(
+        df.select(to_utc_timestamp(col("b"), col("c"))),
+        Seq(
+          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+          Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index cbd51b490141..17bdd218dc17 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Synthetic benchmark for date and timestamp functions.
@@ -86,9 +87,11 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       run(N, "from_unixtime", "from_unixtime(id, 'yyyy-MM-dd HH:mm:ss.SSSSSS')")
     }
     runBenchmark("Convert timestamps") {
-      val timestampExpr = "cast(id as timestamp)"
-      run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
-      run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
+      withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+        val timestampExpr = "cast(id as timestamp)"
+        run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
+        run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
+      }
     }
     runBenchmark("Intervals") {
       val (start, end) = ("cast(id as timestamp)", "cast((id+8640000) as timestamp)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 116fd74352fe..2a9e6b849d89 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -21,7 +21,7 @@ import java.io.File
 import java.util.{Locale, TimeZone}
 
 import org.apache.commons.io.FileUtils
-import org.scalatest.{Assertions, BeforeAndAfterAll}
+import org.scalatest.Assertions
 
 import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.rdd.BlockRDD
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.state.{StateStore, StreamingAggregationStateManager}
+import org.apache.spark.sql.execution.streaming.state.StreamingAggregationStateManager
 import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -342,52 +342,55 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   }
 
   testWithAllStateVersions("prune results by current_date, complete mode") {
-    import testImplicits._
-    val clock = new StreamManualClock
-    val tz = TimeZone.getDefault.getID
-    val inputData = MemoryStream[Long]
-    val aggregated =
-      inputData.toDF()
-        .select(to_utc_timestamp(from_unixtime('value * DateTimeUtils.SECONDS_PER_DAY), tz))
-        .toDF("value")
-        .groupBy($"value")
-        .agg(count("*"))
-        .where($"value".cast("date") >= date_sub(current_date(), 10))
-        .select(($"value".cast("long") / DateTimeUtils.SECONDS_PER_DAY).cast("long"), $"count(1)")
-    testStream(aggregated, Complete)(
-      StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
-      // advance clock to 10 days, should retain all keys
-      AddData(inputData, 0L, 5L, 5L, 10L),
-      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
-      CheckLastBatch((0L, 1), (5L, 2), (10L, 1)),
-      // advance clock to 20 days, should retain keys >= 10
-      AddData(inputData, 15L, 15L, 20L),
-      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
-      CheckLastBatch((10L, 1), (15L, 2), (20L, 1)),
-      // advance clock to 30 days, should retain keys >= 20
-      AddData(inputData, 85L),
-      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
-      CheckLastBatch((20L, 1), (85L, 1)),
-
-      // bounce stream and ensure correct batch timestamp is used
-      // i.e., we don't take it from the clock, which is at 90 days.
-      StopStream,
-      AssertOnQuery { q => // clear the sink
-        q.sink.asInstanceOf[MemorySink].clear()
-        q.commitLog.purge(3)
-        // advance by 60 days i.e., 90 days total
-        clock.advance(DateTimeUtils.MILLIS_PER_DAY * 60)
-        true
-      },
-      StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
-      // Commit log blown, causing a re-run of the last batch
-      CheckLastBatch((20L, 1), (85L, 1)),
-
-      // advance clock to 100 days, should retain keys >= 90
-      AddData(inputData, 85L, 90L, 100L, 105L),
-      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
-      CheckLastBatch((90L, 1), (100L, 1), (105L, 1))
-    )
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      import testImplicits._
+      val clock = new StreamManualClock
+      val tz = TimeZone.getDefault.getID
+      val inputData = MemoryStream[Long]
+      val aggregated =
+        inputData.toDF()
+          .select(to_utc_timestamp(from_unixtime('value * DateTimeUtils.SECONDS_PER_DAY), tz))
+          .toDF("value")
+          .groupBy($"value")
+          .agg(count("*"))
+          .where($"value".cast("date") >= date_sub(current_date(), 10))
+          .select(
+            ($"value".cast("long") / DateTimeUtils.SECONDS_PER_DAY).cast("long"), $"count(1)")
+      testStream(aggregated, Complete)(
+        StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
+        // advance clock to 10 days, should retain all keys
+        AddData(inputData, 0L, 5L, 5L, 10L),
+        AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+        CheckLastBatch((0L, 1), (5L, 2), (10L, 1)),
+        // advance clock to 20 days, should retain keys >= 10
+        AddData(inputData, 15L, 15L, 20L),
+        AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+        CheckLastBatch((10L, 1), (15L, 2), (20L, 1)),
+        // advance clock to 30 days, should retain keys >= 20
+        AddData(inputData, 85L),
+        AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+        CheckLastBatch((20L, 1), (85L, 1)),
+
+        // bounce stream and ensure correct batch timestamp is used
+        // i.e., we don't take it from the clock, which is at 90 days.
+        StopStream,
+        AssertOnQuery { q => // clear the sink
+          q.sink.asInstanceOf[MemorySink].clear()
+          q.commitLog.purge(3)
+          // advance by 60 days i.e., 90 days total
+          clock.advance(DateTimeUtils.MILLIS_PER_DAY * 60)
+          true
+        },
+        StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
+        // Commit log blown, causing a re-run of the last batch
+        CheckLastBatch((20L, 1), (85L, 1)),
+
+        // advance clock to 100 days, should retain keys >= 90
+        AddData(inputData, 85L, 90L, 100L, 105L),
+        AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+        CheckLastBatch((90L, 1), (100L, 1), (105L, 1))
+      )
+    }
   }
 
   testWithAllStateVersions("SPARK-19690: do not convert batch aggregation in streaming query " +

From 3286bff94286325920273a0d50434817158471cd Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Wed, 3 Apr 2019 13:05:06 +0800
Subject: [PATCH 0837/1072] [SPARK-27255][SQL] Report error when illegal
 expressions are hosted by a plan operator.

## What changes were proposed in this pull request?
In the PR, we raise an AnalysisError when we detect the presense of aggregate expressions in where clause. Here is the problem description from the JIRA.

Aggregate functions should not be allowed in WHERE clause. But Spark SQL throws an exception when generating codes. It is supposed to throw an exception during parsing or analyzing.

Here is an example:
```
val df = spark.sql("select * from t where sum(ta) > 0")
df.explain(true)
df.show()
```
Resulting exception:
```
Exception in thread "main" java.lang.UnsupportedOperationException: Cannot generate code for expression: sum(cast(input[0, int, false] as bigint))
	at org.apache.spark.sql.catalyst.expressions.Unevaluable.doGenCode(Expression.scala:291)
	at org.apache.spark.sql.catalyst.expressions.Unevaluable.doGenCode$(Expression.scala:290)
	at org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression.doGenCode(interfaces.scala:87)
	at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$genCode$3(Expression.scala:138)
	at scala.Option.getOrElse(Option.scala:138)
```
Checked the behaviour of other database and all of them return an exception:
**Postgress**
```
select * from foo where max(c1) > 0;
Error
ERROR: aggregate functions are not allowed in WHERE Position: 25
```
**DB2**
```
db2 => select * from foo where max(c1) > 0;
SQL0120N  Invalid use of an aggregate function or OLAP function.
```
**Oracle**
```
select * from foo where max(c1) > 0;
ORA-00934: group function is not allowed here
```
**MySql**
```
select * from foo where max(c1) > 0;
Invalid use of group function
```

**Update**
This PR has been enhanced to report error when expressions such as Aggregate, Window, Generate are hosted by operators where they are invalid.
## How was this patch tested?
Added tests in AnalysisErrorSuite and group-by.sql

Closes #24209 from dilipbiswal/SPARK-27255.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 21 +++++-
 .../sql/catalyst/optimizer/Optimizer.scala    | 34 +---------
 .../catalyst/plans/logical/PlanHelper.scala   | 53 +++++++++++++++
 .../analysis/AnalysisErrorSuite.scala         |  8 +++
 .../resources/sql-tests/inputs/group-by.sql   | 13 ++++
 .../negative-cases/invalid-correlation.sql    |  5 +-
 .../sql-tests/results/group-by.sql.out        | 64 ++++++++++++++++++-
 .../invalid-correlation.sql.out               |  5 +-
 8 files changed, 165 insertions(+), 38 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 18c40b370cb5..427f196344df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -178,7 +178,7 @@ trait CheckAnalysis extends PredicateHelper {
                 s"of type ${condition.dataType.catalogString} is not a boolean.")
 
           case Aggregate(groupingExprs, aggregateExprs, child) =>
-            def isAggregateExpression(expr: Expression) = {
+            def isAggregateExpression(expr: Expression): Boolean = {
               expr.isInstanceOf[AggregateExpression] || PythonUDF.isGroupedAggPandasUDF(expr)
             }
 
@@ -376,6 +376,25 @@ trait CheckAnalysis extends PredicateHelper {
             throw new IllegalStateException(
               "Internal error: logical hint operator should have been removed during analysis")
 
+          case f @ Filter(condition, _)
+            if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty =>
+            val invalidExprSqls = PlanHelper.specialExpressionsInUnsupportedOperator(f).map(_.sql)
+            failAnalysis(
+              s"""
+                 |Aggregate/Window/Generate expressions are not valid in where clause of the query.
+                 |Expression in where clause: [${condition.sql}]
+                 |Invalid expressions: [${invalidExprSqls.mkString(", ")}]""".stripMargin)
+
+          case other if PlanHelper.specialExpressionsInUnsupportedOperator(other).nonEmpty =>
+            val invalidExprSqls =
+              PlanHelper.specialExpressionsInUnsupportedOperator(other).map(_.sql)
+            failAnalysis(
+              s"""
+                 |The query operator `${other.nodeName}` contains one or more unsupported
+                 |expression types Aggregate, Window or Generate.
+                 |Invalid expressions: [${invalidExprSqls.mkString(", ")}]""".stripMargin
+            )
+
           case _ => // Analysis successful!
         }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d4eb516534f1..6319d47c9a0d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -43,38 +43,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
   // - is still resolved
   // - only host special expressions in supported operators
   override protected def isPlanIntegral(plan: LogicalPlan): Boolean = {
-    !Utils.isTesting || (plan.resolved && checkSpecialExpressionIntegrity(plan))
-  }
-
-  /**
-   * Check if all operators in this plan hold structural integrity with regards to hosting special
-   * expressions.
-   * Returns true when all operators are integral.
-   */
-  private def checkSpecialExpressionIntegrity(plan: LogicalPlan): Boolean = {
-    plan.find(specialExpressionInUnsupportedOperator).isEmpty
-  }
-
-  /**
-   * Check if there's any expression in this query plan operator that is
-   * - A WindowExpression but the plan is not Window
-   * - An AggregateExpresion but the plan is not Aggregate or Window
-   * - A Generator but the plan is not Generate
-   * Returns true when this operator breaks structural integrity with one of the cases above.
-   */
-  private def specialExpressionInUnsupportedOperator(plan: LogicalPlan): Boolean = {
-    val exprs = plan.expressions
-    exprs.flatMap { root =>
-      root.find {
-        case e: WindowExpression
-          if !plan.isInstanceOf[Window] => true
-        case e: AggregateExpression
-          if !(plan.isInstanceOf[Aggregate] || plan.isInstanceOf[Window]) => true
-        case e: Generator
-          if !plan.isInstanceOf[Generate] => true
-        case _ => false
-      }
-    }.nonEmpty
+    !Utils.isTesting || (plan.resolved &&
+      plan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty)
   }
 
   protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala
new file mode 100644
index 000000000000..4a28d879d114
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, Generator, WindowExpression}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+
+/**
+ * [[PlanHelper]] contains utility methods that can be used by Analyzer and Optimizer.
+ * It can also be container of methods that are common across multiple rules in Analyzer
+ * and Optimizer.
+ */
+object PlanHelper {
+  /**
+   * Check if there's any expression in this query plan operator that is
+   * - A WindowExpression but the plan is not Window
+   * - An AggregateExpresion but the plan is not Aggregate or Window
+   * - A Generator but the plan is not Generate
+   * Returns the list of invalid expressions that this operator hosts. This can happen when
+   * 1. The input query from users contain invalid expressions.
+   *    Example : SELECT * FROM tab WHERE max(c1) > 0
+   * 2. Query rewrites inadvertently produce plans that are invalid.
+   */
+  def specialExpressionsInUnsupportedOperator(plan: LogicalPlan): Seq[Expression] = {
+    val exprs = plan.expressions
+    val invalidExpressions = exprs.flatMap { root =>
+      root.collect {
+        case e: WindowExpression
+          if !plan.isInstanceOf[Window] => e
+        case e: AggregateExpression
+          if !(plan.isInstanceOf[Aggregate] || plan.isInstanceOf[Window]) => e
+        case e: Generator
+          if !plan.isInstanceOf[Generate] => e
+      }
+    }
+    invalidExpressions
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index ec71c1c93452..55ce93ead4a8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -599,4 +599,12 @@ class AnalysisErrorSuite extends AnalysisTest {
     assertAnalysisError(plan5,
                         "Accessing outer query column is not allowed in" :: Nil)
   }
+
+  test("Error on filter condition containing aggregate expressions") {
+    val a = AttributeReference("a", IntegerType)()
+    val b = AttributeReference("b", IntegerType)()
+    val plan = Filter('a === UnresolvedFunction("max", Seq(b), true), LocalRelation(a, b))
+    assertAnalysisError(plan,
+      "Aggregate/Window/Generate expressions are not valid in where clause of the query" :: Nil)
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 7e81ff1aba37..66bc90914e0d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -141,3 +141,16 @@ SELECT every("true");
 SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
 SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
 SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+
+-- Having referencing aggregate expressions is ok.
+SELECT count(*) FROM test_agg HAVING count(*) > 1L;
+SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true;
+
+-- Aggrgate expressions can be referenced through an alias
+SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L;
+
+-- Error when aggregate expressions are in where clause directly
+SELECT count(*) FROM test_agg WHERE count(*) > 1L;
+SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L;
+SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1;
+
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
index e22cade93679..109ffa77d621 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
@@ -46,9 +46,10 @@ WHERE  t1a IN (SELECT   min(t2a)
 SELECT t1a 
 FROM   t1
 GROUP  BY 1
-HAVING EXISTS (SELECT 1 
+HAVING EXISTS (SELECT t2a
                FROM  t2
-               WHERE t2a < min(t1a + t2a));
+               GROUP BY 1
+               HAVING t2a < min(t1a + t2a));
 
 -- TC 01.04
 -- Invalid due to mixure of outer and local references under an AggegatedExpression 
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index daf47c4d0a39..3a5df254f2cd 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 46
+-- Number of queries: 52
 
 
 -- !query 0
@@ -459,3 +459,65 @@ struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RA
 5	NULL	NULL
 5	false	false
 5	true	true
+
+
+-- !query 46
+SELECT count(*) FROM test_agg HAVING count(*) > 1L
+-- !query 46 schema
+struct<count(1):bigint>
+-- !query 46 output
+10
+
+
+-- !query 47
+SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true
+-- !query 47 schema
+struct<k:int,max(v):boolean>
+-- !query 47 output
+1	true
+2	true
+5	true
+
+
+-- !query 48
+SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L
+-- !query 48 schema
+struct<cnt:bigint>
+-- !query 48 output
+10
+
+
+-- !query 49
+SELECT count(*) FROM test_agg WHERE count(*) > 1L
+-- !query 49 schema
+struct<>
+-- !query 49 output
+org.apache.spark.sql.AnalysisException
+
+Aggregate/Window/Generate expressions are not valid in where clause of the query.
+Expression in where clause: [(count(1) > 1L)]
+Invalid expressions: [count(1)];
+
+
+-- !query 50
+SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L
+-- !query 50 schema
+struct<>
+-- !query 50 output
+org.apache.spark.sql.AnalysisException
+
+Aggregate/Window/Generate expressions are not valid in where clause of the query.
+Expression in where clause: [((count(1) + 1L) > 1L)]
+Invalid expressions: [count(1)];
+
+
+-- !query 51
+SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1
+-- !query 51 schema
+struct<>
+-- !query 51 output
+org.apache.spark.sql.AnalysisException
+
+Aggregate/Window/Generate expressions are not valid in where clause of the query.
+Expression in where clause: [(((test_agg.`k` = 1) OR (test_agg.`k` = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.`k`) > 1)))]
+Invalid expressions: [count(1), max(test_agg.`k`)];
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index e49978ddb1ce..7b47a6139f60 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -70,9 +70,10 @@ Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2
 SELECT t1a 
 FROM   t1
 GROUP  BY 1
-HAVING EXISTS (SELECT 1 
+HAVING EXISTS (SELECT t2a
                FROM  t2
-               WHERE t2a < min(t1a + t2a))
+               GROUP BY 1
+               HAVING t2a < min(t1a + t2a))
 -- !query 5 schema
 struct<>
 -- !query 5 output

From 1bc672366d6818eaa86f00cfb5af74891fff33e6 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 3 Apr 2019 17:45:59 +0800
Subject: [PATCH 0838/1072] [SPARK-27344][SQL][TEST] Support the LocalDate and
 Instant classes in Java Bean encoders

## What changes were proposed in this pull request?

- Added new test for Java Bean encoder of the classes: `java.time.LocalDate` and `java.time.Instant`.
- Updated comment for `Encoders.bean`
- New Row getters: `getLocalDate` and `getInstant`
- Extended `inferDataType` to infer types for `java.time.LocalDate` -> `DateType` and `java.time.Instant` -> `TimestampType`.

## How was this patch tested?

By `JavaBeanDeserializationSuite`

Closes #24273 from MaxGekk/bean-instant-localdate.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/Encoders.scala |  2 +-
 .../main/scala/org/apache/spark/sql/Row.scala | 14 +++
 .../sql/catalyst/JavaTypeInference.scala      |  2 +
 .../sql/JavaBeanDeserializationSuite.java     | 96 +++++++++++++++++++
 4 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index f54c6920ce82..055fbc49bdcd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -149,7 +149,7 @@ object Encoders {
    *  - boxed types: Boolean, Integer, Double, etc.
    *  - String
    *  - java.math.BigDecimal, java.math.BigInteger
-   *  - time related: java.sql.Date, java.sql.Timestamp
+   *  - time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate, java.time.Instant
    *  - collection types: only array and java.util.List currently, map support is in progress
    *  - nested java bean.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 4f5af9ac80b1..f13eddee77e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -269,6 +269,13 @@ trait Row extends Serializable {
    */
   def getDate(i: Int): java.sql.Date = getAs[java.sql.Date](i)
 
+  /**
+   * Returns the value at position i of date type as java.time.LocalDate.
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getLocalDate(i: Int): java.time.LocalDate = getAs[java.time.LocalDate](i)
+
   /**
    * Returns the value at position i of date type as java.sql.Timestamp.
    *
@@ -276,6 +283,13 @@ trait Row extends Serializable {
    */
   def getTimestamp(i: Int): java.sql.Timestamp = getAs[java.sql.Timestamp](i)
 
+  /**
+   * Returns the value at position i of date type as java.time.Instant.
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getInstant(i: Int): java.time.Instant = getAs[java.time.Instant](i)
+
   /**
    * Returns the value at position i of array type as a Scala Seq.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 39132139237c..c5be3efc6371 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -102,7 +102,9 @@ object JavaTypeInference {
 
       case c: Class[_] if c == classOf[java.math.BigDecimal] => (DecimalType.SYSTEM_DEFAULT, true)
       case c: Class[_] if c == classOf[java.math.BigInteger] => (DecimalType.BigIntDecimal, true)
+      case c: Class[_] if c == classOf[java.time.LocalDate] => (DateType, true)
       case c: Class[_] if c == classOf[java.sql.Date] => (DateType, true)
+      case c: Class[_] if c == classOf[java.time.Instant] => (TimestampType, true)
       case c: Class[_] if c == classOf[java.sql.Timestamp] => (TimestampType, true)
 
       case _ if typeToken.isArray =>
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
index c5f38676ad0a..7bf0789b43d6 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
@@ -18,10 +18,15 @@
 package test.org.apache.spark.sql;
 
 import java.io.Serializable;
+import java.time.Instant;
+import java.time.LocalDate;
 import java.util.*;
 
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.catalyst.expressions.GenericRow;
+import org.apache.spark.sql.catalyst.util.DateTimeUtils;
+import org.apache.spark.sql.catalyst.util.TimestampFormatter;
+import org.apache.spark.sql.internal.SQLConf;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.StructType;
 import org.junit.*;
@@ -509,4 +514,95 @@ public void setId(Integer id) {
       this.id = id;
     }
   }
+
+  @Test
+  public void testBeanWithLocalDateAndInstant() {
+    String originConf = spark.conf().get(SQLConf.DATETIME_JAVA8API_ENABLED().key());
+    try {
+      spark.conf().set(SQLConf.DATETIME_JAVA8API_ENABLED().key(), "true");
+      List<Row> inputRows = new ArrayList<>();
+      List<LocalDateInstantRecord> expectedRecords = new ArrayList<>();
+
+      for (long idx = 0 ; idx < 5 ; idx++) {
+        Row row = createLocalDateInstantRow(idx);
+        inputRows.add(row);
+        expectedRecords.add(createLocalDateInstantRecord(row));
+      }
+
+      Encoder<LocalDateInstantRecord> encoder = Encoders.bean(LocalDateInstantRecord.class);
+
+      StructType schema = new StructType()
+        .add("localDateField", DataTypes.DateType)
+        .add("instantField", DataTypes.TimestampType);
+
+      Dataset<Row> dataFrame = spark.createDataFrame(inputRows, schema);
+      Dataset<LocalDateInstantRecord> dataset = dataFrame.as(encoder);
+
+      List<LocalDateInstantRecord> records = dataset.collectAsList();
+
+      Assert.assertEquals(expectedRecords, records);
+    } finally {
+        spark.conf().set(SQLConf.DATETIME_JAVA8API_ENABLED().key(), originConf);
+    }
+  }
+
+  public static final class LocalDateInstantRecord {
+    private String localDateField;
+    private String instantField;
+
+    public LocalDateInstantRecord() { }
+
+    public String getLocalDateField() {
+      return localDateField;
+    }
+
+    public void setLocalDateField(String localDateField) {
+      this.localDateField = localDateField;
+    }
+
+    public String getInstantField() {
+      return instantField;
+    }
+
+    public void setInstantField(String instantField) {
+      this.instantField = instantField;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      LocalDateInstantRecord that = (LocalDateInstantRecord) o;
+      return Objects.equals(localDateField, that.localDateField) &&
+        Objects.equals(instantField, that.instantField);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(localDateField, instantField);
+    }
+
+    @Override
+    public String toString() {
+      return com.google.common.base.Objects.toStringHelper(this)
+        .add("localDateField", localDateField)
+        .add("instantField", instantField)
+        .toString();
+    }
+  }
+
+  private static Row createLocalDateInstantRow(Long index) {
+    Object[] values = new Object[] { LocalDate.ofEpochDay(42), Instant.ofEpochSecond(42) };
+    return new GenericRow(values);
+  }
+
+  private static LocalDateInstantRecord createLocalDateInstantRecord(Row recordRow) {
+    LocalDateInstantRecord record = new LocalDateInstantRecord();
+    record.setLocalDateField(String.valueOf(recordRow.getLocalDate(0)));
+    Instant instant = recordRow.getInstant(1);
+    TimestampFormatter formatter = TimestampFormatter.getFractionFormatter(
+      DateTimeUtils.getZoneId(SQLConf.get().sessionLocalTimeZone()));
+    record.setInstantField(formatter.format(DateTimeUtils.instantToMicros(instant)));
+    return record;
+  }
 }

From d04a7371daec4af046a35066f9664c5011162baa Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 3 Apr 2019 09:11:09 -0700
Subject: [PATCH 0839/1072] [MINOR][DOC][SQL] Remove out-of-date doc about ORC
 in DataFrameReader and Writer

## What changes were proposed in this pull request?

According to current status, `orc` is available even Hive support isn't enabled. This is a minor doc change to reflect it.

## How was this patch tested?

Doc only change.

Closes #24280 from viirya/fix-orc-doc.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/readwriter.py                              | 4 ----
 .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 2 --
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 1 -
 3 files changed, 7 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index d555bad75a3e..1353a0f6e910 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -509,8 +509,6 @@ def func(iterator):
     def orc(self, path):
         """Loads ORC files, returning the result as a :class:`DataFrame`.
 
-        .. note:: Currently ORC support is only available together with Hive support.
-
         >>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
         >>> df.dtypes
         [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
@@ -950,8 +948,6 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
     def orc(self, path, mode=None, partitionBy=None, compression=None):
         """Saves the content of the :class:`DataFrame` in ORC format at the specified path.
 
-        .. note:: Currently ORC support is only available together with Hive support.
-
         :param path: the path in any Hadoop supported file system
         :param mode: specifies the behavior of the save operation when data already exists.
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index d514b9495c5d..3cd48baebf2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -679,7 +679,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    *
    * @param path input path
    * @since 1.5.0
-   * @note Currently, this method can only be used after enabling Hive support.
    */
   def orc(path: String): DataFrame = {
     // This method ensures that calls that explicit need single argument works, see SPARK-16009
@@ -691,7 +690,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    *
    * @param paths input paths
    * @since 2.0.0
-   * @note Currently, this method can only be used after enabling Hive support.
    */
   @scala.annotation.varargs
   def orc(paths: String*): DataFrame = format("orc").load(paths: _*)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index a06b75a86067..937193604798 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -615,7 +615,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * </ul>
    *
    * @since 1.5.0
-   * @note Currently, this method can only be used after enabling Hive support
    */
   def orc(path: String): Unit = {
     format("orc").save(path)

From ffb362a705fabbc8c0d152e462fd2e0ca528b711 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 4 Apr 2019 00:37:57 +0800
Subject: [PATCH 0840/1072] [SPARK-19712][SQL][FOLLOW-UP] reduce code
 duplication

## What changes were proposed in this pull request?

abstract some common code into a method.

## How was this patch tested?

existing tests

Closes #24281 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/PushDownLeftSemiAntiJoin.scala  | 159 ++++++------------
 1 file changed, 47 insertions(+), 112 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
index afe2cfa81ffe..d91f262d75e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
@@ -35,7 +35,7 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     // LeftSemi/LeftAnti over Project
     case Join(p @ Project(pList, gChild), rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
-      if pList.forall(_.deterministic) &&
+        if pList.forall(_.deterministic) &&
         !pList.exists(ScalarSubquery.hasCorrelatedScalarSubquery) &&
         canPushThroughCondition(Seq(gChild), joinCond, rightOp) =>
       if (joinCond.isEmpty) {
@@ -52,101 +52,29 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
       }
 
     // LeftSemi/LeftAnti over Aggregate
-    case join @ Join(agg: Aggregate, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
-      if agg.aggregateExpressions.forall(_.deterministic) && agg.groupingExpressions.nonEmpty &&
+    case join @ Join(agg: Aggregate, rightOp, LeftSemiOrAnti(_), _, _)
+        if agg.aggregateExpressions.forall(_.deterministic) && agg.groupingExpressions.nonEmpty &&
         !agg.aggregateExpressions.exists(ScalarSubquery.hasCorrelatedScalarSubquery) =>
-      if (joinCond.isEmpty) {
-        // No join condition, just push down Join below Aggregate
-        agg.copy(child = Join(agg.child, rightOp, joinType, joinCond, hint))
-      } else {
-        val aliasMap = PushDownPredicate.getAliasMap(agg)
-
-        // For each join condition, expand the alias and check if the condition can be evaluated
-        // using attributes produced by the aggregate operator's child operator.
-        val (pushDown, stayUp) = splitConjunctivePredicates(joinCond.get).partition { cond =>
-          val replaced = replaceAlias(cond, aliasMap)
-          cond.references.nonEmpty &&
-            replaced.references.subsetOf(agg.child.outputSet ++ rightOp.outputSet)
-        }
-
-        // Check if the remaining predicates do not contain columns from the right
-        // hand side of the join. Since the remaining predicates will be kept
-        // as a filter over aggregate, this check is necessary after the left semi
-        // or left anti join is moved below aggregate. The reason is, for this kind
-        // of join, we only output from the left leg of the join.
-        val rightOpColumns = AttributeSet(stayUp.toSet).intersect(rightOp.outputSet)
-
-        if (pushDown.nonEmpty && rightOpColumns.isEmpty) {
-          val pushDownPredicate = pushDown.reduce(And)
-          val replaced = replaceAlias(pushDownPredicate, aliasMap)
-          val newAgg = agg.copy(child = Join(agg.child, rightOp, joinType, Option(replaced), hint))
-          // If there is no more filter to stay up, just return the Aggregate over Join.
-          // Otherwise, create "Filter(stayUp) <- Aggregate <- Join(pushDownPredicate)".
-          if (stayUp.isEmpty) {
-            newAgg
-          } else {
-            joinType match {
-              // In case of Left semi join, the part of the join condition which does not refer to
-              // to child attributes of the aggregate operator are kept as a Filter over window.
-              case LeftSemi => Filter(stayUp.reduce(And), newAgg)
-              // In case of left anti join, the join is pushed down when the entire join condition
-              // is eligible to be pushed down to preserve the semantics of left anti join.
-              case _ => join
-            }
-          }
-        } else {
-          // The join condition is not a subset of the Aggregate's GROUP BY columns,
-          // no push down.
-          join
-        }
+      val aliasMap = PushDownPredicate.getAliasMap(agg)
+      val canPushDownPredicate = (predicate: Expression) => {
+        val replaced = replaceAlias(predicate, aliasMap)
+        predicate.references.nonEmpty &&
+          replaced.references.subsetOf(agg.child.outputSet ++ rightOp.outputSet)
+      }
+      val makeJoinCondition = (predicates: Seq[Expression]) => {
+        replaceAlias(predicates.reduce(And), aliasMap)
       }
+      pushDownJoin(join, canPushDownPredicate, makeJoinCondition)
 
     // LeftSemi/LeftAnti over Window
-    case join @ Join(w: Window, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
-      if w.partitionSpec.forall(_.isInstanceOf[AttributeReference]) =>
-      if (joinCond.isEmpty) {
-        // No join condition, just push down Join below Window
-        w.copy(child = Join(w.child, rightOp, joinType, joinCond, hint))
-      } else {
-        val partitionAttrs = AttributeSet(w.partitionSpec.flatMap(_.references)) ++
-          rightOp.outputSet
-
-        val (pushDown, stayUp) = splitConjunctivePredicates(joinCond.get).partition { cond =>
-          cond.references.subsetOf(partitionAttrs)
-        }
-
-        // Check if the remaining predicates do not contain columns from the right
-        // hand side of the join. Since the remaining predicates will be kept
-        // as a filter over window, this check is necessary after the left semi
-        // or left anti join is moved below window. The reason is, for this kind
-        // of join, we only output from the left leg of the join.
-        val rightOpColumns = AttributeSet(stayUp.toSet).intersect(rightOp.outputSet)
-
-        if (pushDown.nonEmpty && rightOpColumns.isEmpty) {
-          val predicate = pushDown.reduce(And)
-          val newPlan = w.copy(child = Join(w.child, rightOp, joinType, Option(predicate), hint))
-          if (stayUp.isEmpty) {
-            newPlan
-          } else {
-            joinType match {
-              // In case of Left semi join, the part of the join condition which does not refer to
-              // to partition attributes of the window operator are kept as a Filter over window.
-              case LeftSemi => Filter(stayUp.reduce(And), newPlan)
-              // In case of left anti join, the join is pushed down when the entire join condition
-              // is eligible to be pushed down to preserve the semantics of left anti join.
-              case _ => join
-            }
-          }
-        } else {
-          // The join condition is not a subset of the Window's PARTITION BY clause,
-          // no push down.
-          join
-        }
-      }
+    case join @ Join(w: Window, rightOp, LeftSemiOrAnti(_), _, _)
+        if w.partitionSpec.forall(_.isInstanceOf[AttributeReference]) =>
+      val partitionAttrs = AttributeSet(w.partitionSpec.flatMap(_.references)) ++ rightOp.outputSet
+      pushDownJoin(join, _.references.subsetOf(partitionAttrs), _.reduce(And))
 
     // LeftSemi/LeftAnti over Union
-    case join @ Join(union: Union, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
-      if canPushThroughCondition(union.children, joinCond, rightOp) =>
+    case Join(union: Union, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
+        if canPushThroughCondition(union.children, joinCond, rightOp) =>
       if (joinCond.isEmpty) {
         // Push down the Join below Union
         val newGrandChildren = union.children.map { Join(_, rightOp, joinType, joinCond, hint) }
@@ -165,11 +93,10 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
       }
 
     // LeftSemi/LeftAnti over UnaryNode
-    case join @ Join(u: UnaryNode, rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
-      if PushDownPredicate.canPushThrough(u) && u.expressions.forall(_.deterministic) =>
-      pushDownJoin(join, u.child) { joinCond =>
-        u.withNewChildren(Seq(Join(u.child, rightOp, joinType, joinCond, hint)))
-      }
+    case join @ Join(u: UnaryNode, rightOp, LeftSemiOrAnti(_), _, _)
+        if PushDownPredicate.canPushThrough(u) && u.expressions.forall(_.deterministic) =>
+      val validAttrs = u.child.outputSet ++ rightOp.outputSet
+      pushDownJoin(join, _.references.subsetOf(validAttrs), _.reduce(And))
   }
 
   /**
@@ -192,35 +119,43 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
     }
   }
 
-
   private def pushDownJoin(
       join: Join,
-      grandchild: LogicalPlan)(insertJoin: Option[Expression] => LogicalPlan): LogicalPlan = {
+      canPushDownPredicate: Expression => Boolean,
+      makeJoinCondition: Seq[Expression] => Expression): LogicalPlan = {
+    assert(join.left.children.length == 1)
+
     if (join.condition.isEmpty) {
-      insertJoin(None)
+      join.left.withNewChildren(Seq(join.copy(left = join.left.children.head)))
     } else {
       val (pushDown, stayUp) = splitConjunctivePredicates(join.condition.get)
-        .partition {_.references.subsetOf(grandchild.outputSet ++ join.right.outputSet)}
+        .partition(canPushDownPredicate)
+
+      // Check if the remaining predicates do not contain columns from the right hand side of the
+      // join. Since the remaining predicates will be kept as a filter over the operator under join,
+      // this check is necessary after the left-semi/anti join is pushed down. The reason is, for
+      // this kind of join, we only output from the left leg of the join.
+      val referRightSideCols = AttributeSet(stayUp.toSet).intersect(join.right.outputSet).nonEmpty
 
-      val rightOpColumns = AttributeSet(stayUp.toSet).intersect(join.right.outputSet)
-      if (pushDown.nonEmpty && rightOpColumns.isEmpty) {
-        val newChild = insertJoin(Option(pushDown.reduceLeft(And)))
-        if (stayUp.nonEmpty) {
+      if (pushDown.isEmpty || referRightSideCols)  {
+        join
+      } else {
+        val newPlan = join.left.withNewChildren(Seq(join.copy(
+          left = join.left.children.head, condition = Some(makeJoinCondition(pushDown)))))
+        // If there is no more filter to stay up, return the new plan that has join pushed down.
+        if (stayUp.isEmpty) {
+          newPlan
+        } else {
           join.joinType match {
             // In case of Left semi join, the part of the join condition which does not refer to
-            // to attributes of the grandchild are kept as a Filter over window.
-            case LeftSemi => Filter(stayUp.reduce(And), newChild)
-            // In case of left anti join, the join is pushed down when the entire join condition
-            // is eligible to be pushed down to preserve the semantics of left anti join.
+            // to attributes of the grandchild are kept as a Filter above.
+            case LeftSemi => Filter(stayUp.reduce(And), newPlan)
+            // In case of left-anti join, the join is pushed down only when the entire join
+            // condition is eligible to be pushed down to preserve the semantics of left-anti join.
             case _ => join
           }
-        } else {
-          newChild
         }
-      } else {
-        join
       }
     }
   }
 }
-

From b51763612a71e214ed94ac5fb6843cf03d00f9f8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 3 Apr 2019 09:41:13 -0700
Subject: [PATCH 0841/1072] Revert "[SPARK-27278][SQL] Optimize GetMapValue
 when the map is a foldable and the key is not"

This reverts commit 5888b15d9cdf8272012018f39bf58c8faf68a5e1.
---
 .../sql/catalyst/optimizer/ComplexTypes.scala      | 14 --------------
 .../sql/catalyst/optimizer/complexTypesSuite.scala | 11 -----------
 2 files changed, 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index 02551281386a..db7d6d3254bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -17,13 +17,9 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import scala.collection.mutable
-
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.MapData
-import org.apache.spark.sql.types.MapType
 
 /**
  * Simplify redundant [[CreateNamedStructLike]], [[CreateArray]] and [[CreateMap]] expressions.
@@ -63,16 +59,6 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
           Literal(null, ga.dataType)
         }
       case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems)
-      // The case below happens when the map is foldable, but the key is not, so ConstantFolding
-      // converts the map in a Literal, but the GetMapValue is still there since the key is not
-      // foldable. It cannot happen in any other case.
-      case GetMapValue(Literal(map: MapData, MapType(kt, vt, _)), key) if !key.foldable =>
-        val elems = new mutable.ListBuffer[Literal]
-        map.foreach(kt, vt, (key, value) => {
-          elems.append(Literal(key, kt))
-          elems.append(Literal(value, vt))
-        })
-        CaseKeyWhen(key, elems.result())
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 84c74a693f1d..5452e72b3864 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -452,15 +452,4 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2")
     checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](3, 4))), null)
   }
-
-  test("SPARK-27278: simplify map access with non-foldable key and foldable map") {
-    val query = relation.select(GetMapValue(CreateMap(Seq(
-      1L, "a",
-      2L, "b")), 'id) as "a")
-    val expected = relation.select(
-        CaseWhen(Seq(
-          (EqualTo('id, 1L), "a"),
-          (EqualTo('id, 2L), "b"))) as "a")
-    checkRule(query, expected)
-  }
 }

From 69dd44af19ed9a2d7873350e8610c9faeb151a7b Mon Sep 17 00:00:00 2001
From: LantaoJin <jinlantao@gmail.com>
Date: Wed, 3 Apr 2019 20:09:50 -0500
Subject: [PATCH 0842/1072] [SPARK-27216][CORE] Upgrade RoaringBitmap to 0.7.45
 to fix Kryo unsafe ser/dser issue

## What changes were proposed in this pull request?

HighlyCompressedMapStatus uses RoaringBitmap to record the empty blocks. But RoaringBitmap couldn't be ser/deser with unsafe KryoSerializer.

It's a bug of RoaringBitmap-0.5.11 and fixed in latest version.

This is an update of #24157

## How was this patch tested?

Add a UT

Closes #24264 from LantaoJin/SPARK-27216.

Lead-authored-by: LantaoJin <jinlantao@gmail.com>
Co-authored-by: Lantao Jin <jinlantao@gmail.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../serializer/KryoSerializerSuite.scala      | 34 +++++--------------
 dev/deps/spark-deps-hadoop-2.7                |  3 +-
 dev/deps/spark-deps-hadoop-3.2                |  3 +-
 pom.xml                                       |  2 +-
 4 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 16eec7e0bea1..2442670b6d3f 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.serializer
 
-import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream}
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 import java.nio.ByteBuffer
 import java.util.concurrent.Executors
 
@@ -370,30 +370,6 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     assert(thrown.getCause.isInstanceOf[KryoException])
   }
 
-  test("SPARK-12222: deserialize RoaringBitmap throw Buffer underflow exception") {
-    withTempDir { dir =>
-      val tmpfile = dir.toString + "/RoaringBitmap"
-      val outStream = new FileOutputStream(tmpfile)
-      val output = new KryoOutput(outStream)
-      val bitmap = new RoaringBitmap
-      bitmap.add(1)
-      bitmap.add(3)
-      bitmap.add(5)
-      // Ignore Kryo because it doesn't use writeObject
-      bitmap.serialize(new KryoOutputObjectOutputBridge(null, output))
-      output.flush()
-      output.close()
-
-      val inStream = new FileInputStream(tmpfile)
-      val input = new KryoInput(inStream)
-      val ret = new RoaringBitmap
-      // Ignore Kryo because it doesn't use readObject
-      ret.deserialize(new KryoInputObjectInputBridge(null, input))
-      input.close()
-      assert(ret == bitmap)
-    }
-  }
-
   test("KryoOutputObjectOutputBridge.writeObject and KryoInputObjectInputBridge.readObject") {
     val kryo = new KryoSerializer(conf).newKryo()
 
@@ -518,6 +494,14 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
       assert(ThreadUtils.awaitResult(f, 10.seconds))
     }
   }
+
+  test("SPARK-27216: test RoaringBitmap ser/dser with Kryo") {
+    val expected = new RoaringBitmap()
+    expected.add(1787)
+    val ser = new KryoSerializer(conf).newInstance()
+    val actual: RoaringBitmap = ser.deserialize(ser.serialize(expected))
+    assert(actual === expected)
+  }
 }
 
 class KryoSerializerAutoResetDisabledSuite extends SparkFunSuite with SharedSparkContext {
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 277274acbba6..de0cb1492150 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -1,5 +1,5 @@
 JavaEWAH-0.3.2.jar
-RoaringBitmap-0.5.11.jar
+RoaringBitmap-0.7.45.jar
 ST4-4.0.4.jar
 activation-1.1.1.jar
 aircompressor-0.10.jar
@@ -178,6 +178,7 @@ scala-parser-combinators_2.12-1.1.0.jar
 scala-reflect-2.12.8.jar
 scala-xml_2.12-1.0.6.jar
 shapeless_2.12-2.3.2.jar
+shims-0.7.45.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snakeyaml-1.23.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 5d40c240cc59..326d0857a7c0 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -1,6 +1,6 @@
 HikariCP-java7-2.4.12.jar
 JavaEWAH-0.3.2.jar
-RoaringBitmap-0.5.11.jar
+RoaringBitmap-0.7.45.jar
 ST4-4.0.4.jar
 accessors-smart-1.2.jar
 activation-1.1.1.jar
@@ -198,6 +198,7 @@ scala-parser-combinators_2.12-1.1.0.jar
 scala-reflect-2.12.8.jar
 scala-xml_2.12-1.0.6.jar
 shapeless_2.12-2.3.2.jar
+shims-0.7.45.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snakeyaml-1.23.jar
diff --git a/pom.xml b/pom.xml
index 44b3c15ddc09..62eb16db4181 100644
--- a/pom.xml
+++ b/pom.xml
@@ -606,7 +606,7 @@
       <dependency>
         <groupId>org.roaringbitmap</groupId>
         <artifactId>RoaringBitmap</artifactId>
-        <version>0.5.11</version>
+        <version>0.7.45</version>
       </dependency>
       <dependency>
         <groupId>commons-net</groupId>

From 6c4552c65045cfe82ed95212ee7cff684e44288b Mon Sep 17 00:00:00 2001
From: Venkata krishnan Sowrirajan <vsowrirajan@qubole.com>
Date: Thu, 4 Apr 2019 09:58:05 +0800
Subject: [PATCH 0843/1072] [SPARK-27338][CORE] Fix deadlock in
 UnsafeExternalSorter.SpillableIterator when locking both
 UnsafeExternalSorter.SpillableIterator and TaskMemoryManager

## What changes were proposed in this pull request?

In `UnsafeExternalSorter.SpillableIterator#loadNext()` takes lock on the `UnsafeExternalSorter` and calls `freePage` once the `lastPage` is consumed which needs to take a lock on `TaskMemoryManager`. At the same time, there can be another MemoryConsumer using `UnsafeExternalSorter` as part of sorting can try to `allocatePage` needs to get lock on `TaskMemoryManager` which can cause spill to happen which requires lock on `UnsafeExternalSorter` again causing deadlock. This is a classic deadlock situation happening similar to the SPARK-26265.

To fix this, we can move the `freePage` call in `loadNext` outside of `Synchronized` block similar to the fix in SPARK-26265

## How was this patch tested?

Manual tests were being done and will also try to add a test.

Closes #24265 from venkata91/deadlock-sorter.

Authored-by: Venkata krishnan Sowrirajan <vsowrirajan@qubole.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../unsafe/sort/UnsafeExternalSorter.java     | 34 +++++++++++++------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index af5a934b7da6..231451468320 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -573,19 +573,31 @@ public boolean hasNext() {
 
     @Override
     public void loadNext() throws IOException {
-      synchronized (this) {
-        loaded = true;
-        if (nextUpstream != null) {
-          // Just consumed the last record from in memory iterator
-          if (lastPage != null) {
-            freePage(lastPage);
-            lastPage = null;
+      MemoryBlock pageToFree = null;
+      try {
+        synchronized (this) {
+          loaded = true;
+          if (nextUpstream != null) {
+            // Just consumed the last record from in memory iterator
+            if(lastPage != null) {
+              // Do not free the page here, while we are locking `SpillableIterator`. The `freePage`
+              // method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in
+              // sequence. We may hit dead lock if another thread locks `TaskMemoryManager` and
+              // `SpillableIterator` in sequence, which may happen in
+              // `TaskMemoryManager.acquireExecutionMemory`. 
+              pageToFree = lastPage;
+              lastPage = null;
+            }
+            upstream = nextUpstream;
+            nextUpstream = null;
           }
-          upstream = nextUpstream;
-          nextUpstream = null;
+          numRecords--;
+          upstream.loadNext();
+        }
+      } finally {
+        if (pageToFree != null) {
+          freePage(pageToFree);
         }
-        numRecords--;
-        upstream.loadNext();
       }
     }
 

From 5c50f682539b4af04b6536e75214bedd7187574b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 4 Apr 2019 10:31:27 +0800
Subject: [PATCH 0844/1072] [SPARK-26811][SQL][FOLLOWUP] fix some documentation

## What changes were proposed in this pull request?

It's a followup of https://github.com/apache/spark/pull/24012 , to fix 2 documentation:
1. `SupportsRead` and `SupportsWrite` are not internal anymore. They are public interfaces now.
2. `Scan` should link the `BATCH_READ` instead of hardcoding it.

## How was this patch tested?
N/A

Closes #24285 from cloud-fan/doc.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/sources/v2/SupportsRead.java     | 2 +-
 .../org/apache/spark/sql/sources/v2/SupportsWrite.java    | 2 +-
 .../java/org/apache/spark/sql/sources/v2/reader/Scan.java | 8 +++++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
index 67fc72e070dc..826fa2f8a072 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
@@ -22,7 +22,7 @@
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
- * An internal base interface of mix-in interfaces for readable {@link Table}. This adds
+ * A mix-in interface of {@link Table}, to indicate that it's readable. This adds
  * {@link #newScanBuilder(CaseInsensitiveStringMap)} that is used to create a scan for batch,
  * micro-batch, or continuous processing.
  */
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
index b21596386821..c52e54569dc0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
@@ -22,7 +22,7 @@
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
- * An internal base interface of mix-in interfaces for writable {@link Table}. This adds
+ * A mix-in interface of {@link Table}, to indicate that it's writable. This adds
  * {@link #newWriteBuilder(CaseInsensitiveStringMap)} that is used to create a write
  * for batch or streaming.
  */
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
index e97d0548c66f..7633d504d36b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
@@ -24,6 +24,7 @@
 import org.apache.spark.sql.sources.v2.SupportsContinuousRead;
 import org.apache.spark.sql.sources.v2.SupportsMicroBatchRead;
 import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableCapability;
 
 /**
  * A logical representation of a data source scan. This interface is used to provide logical
@@ -32,8 +33,8 @@
  * This logical representation is shared between batch scan, micro-batch streaming scan and
  * continuous streaming scan. Data sources must implement the corresponding methods in this
  * interface, to match what the table promises to support. For example, {@link #toBatch()} must be
- * implemented, if the {@link Table} that creates this {@link Scan} returns BATCH_READ support in
- * its {@link Table#capabilities()}.
+ * implemented, if the {@link Table} that creates this {@link Scan} returns
+ * {@link TableCapability#BATCH_READ} support in its {@link Table#capabilities()}.
  * </p>
  */
 @Evolving
@@ -61,7 +62,8 @@ default String description() {
   /**
    * Returns the physical representation of this scan for batch query. By default this method throws
    * exception, data sources must overwrite this method to provide an implementation, if the
-   * {@link Table} that creates this returns batch read support in its {@link Table#capabilities()}.
+   * {@link Table} that creates this scan returns {@link TableCapability#BATCH_READ} in its
+   * {@link Table#capabilities()}.
    *
    * @throws UnsupportedOperationException
    */

From b56e433b54363eaf4fbb3c0eb699e28678cbdbd1 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 4 Apr 2019 11:43:20 +0800
Subject: [PATCH 0845/1072] [SPARK-27338][CORE][FOLLOWUP] remove trailing space

## What changes were proposed in this pull request?

https://github.com/apache/spark/pull/24265 breaks the lint check, because it has trailing space. (not sure why it passed jenkins). This PR fixes it.

## How was this patch tested?

N/A

Closes #24289 from cloud-fan/fix.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/util/collection/unsafe/sort/UnsafeExternalSorter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 231451468320..1b206c11d9a8 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -584,7 +584,7 @@ public void loadNext() throws IOException {
               // method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in
               // sequence. We may hit dead lock if another thread locks `TaskMemoryManager` and
               // `SpillableIterator` in sequence, which may happen in
-              // `TaskMemoryManager.acquireExecutionMemory`. 
+              // `TaskMemoryManager.acquireExecutionMemory`.
               pageToFree = lastPage;
               lastPage = null;
             }

From 1d95dea30788b9f64c5e304d908b85936aafb238 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 3 Apr 2019 22:52:37 -0700
Subject: [PATCH 0846/1072] [SPARK-27349][SQL] Dealing with TimeVars removed in
 Hive 2.x

## What changes were proposed in this pull request?
`hive.stats.jdbc.timeout` and `hive.stats.retries.wait` were removed by [HIVE-12164](https://issues.apache.org/jira/browse/HIVE-12164).
This pr to deal with this change.

## How was this patch tested?

unit tests

Closes #24277 from wangyum/SPARK-27349.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/hive/HiveUtils.scala | 17 ++++++++++++----
 .../spark/sql/hive/HiveUtilsSuite.scala       | 20 +++++++++++++++++++
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index a7f40c6bff0b..01a503db78dd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -194,7 +194,7 @@ private[spark] object HiveUtils extends Logging {
     //
     // Here we enumerate all time `ConfVar`s and convert their values to numeric strings according
     // to their output time units.
-    Seq(
+    val commonTimeVars = Seq(
       ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY -> TimeUnit.SECONDS,
       ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT -> TimeUnit.SECONDS,
       ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME -> TimeUnit.SECONDS,
@@ -207,8 +207,6 @@ private[spark] object HiveUtils extends Logging {
       ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT -> TimeUnit.MILLISECONDS,
       ConfVars.HIVES_AUTO_PROGRESS_TIMEOUT -> TimeUnit.SECONDS,
       ConfVars.HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL -> TimeUnit.MILLISECONDS,
-      ConfVars.HIVE_STATS_JDBC_TIMEOUT -> TimeUnit.SECONDS,
-      ConfVars.HIVE_STATS_RETRIES_WAIT -> TimeUnit.MILLISECONDS,
       ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES -> TimeUnit.SECONDS,
       ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT -> TimeUnit.MILLISECONDS,
       ConfVars.HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME -> TimeUnit.MILLISECONDS,
@@ -236,7 +234,18 @@ private[spark] object HiveUtils extends Logging {
       ConfVars.SPARK_RPC_CLIENT_HANDSHAKE_TIMEOUT -> TimeUnit.MILLISECONDS
     ).map { case (confVar, unit) =>
       confVar.varname -> HiveConf.getTimeVar(hadoopConf, confVar, unit).toString
-    }.toMap
+    }
+
+    // The following configurations were removed by HIVE-12164(Hive 2.0)
+    val hardcodingTimeVars = Seq(
+      ("hive.stats.jdbc.timeout", "30s") -> TimeUnit.SECONDS,
+      ("hive.stats.retries.wait", "3000ms") -> TimeUnit.MILLISECONDS
+    ).map { case ((key, defaultValue), unit) =>
+      val value = hadoopConf.get(key, defaultValue)
+      key -> HiveConf.toTime(value, unit, unit).toString
+    }
+
+    (commonTimeVars ++ hardcodingTimeVars).toMap
   }
 
   /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index 303dd70760a1..daf06645abc2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.hive
 
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 
 import org.apache.spark.SparkConf
@@ -29,6 +30,12 @@ import org.apache.spark.util.ChildFirstURLClassLoader
 
 class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
 
+  private def testFormatTimeVarsForHiveClient(key: String, value: String, expected: Long): Unit = {
+    val conf = new Configuration
+    conf.set(key, value)
+    assert(HiveUtils.formatTimeVarsForHiveClient(conf)(key) === expected.toString)
+  }
+
   test("newTemporaryConfiguration overwrites listener configurations") {
     Seq(true, false).foreach { useInMemoryDerby =>
       val conf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby)
@@ -61,4 +68,17 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton
       Thread.currentThread().setContextClassLoader(contextClassLoader)
     }
   }
+
+  test("SPARK-27349: Dealing with TimeVars removed in Hive 2.x") {
+    // Test default value
+    val defaultConf = new Configuration
+    assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.jdbc.timeout") === "30")
+    assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.retries.wait") === "3000")
+
+    testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "40s", 40)
+    testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "1d", 1 * 24 * 60 * 60)
+
+    testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "4000ms", 4000)
+    testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "1d", 1 * 24 * 60 * 60 * 1000)
+  }
 }

From f7bd1ab5862c38a9ae93c6c6f27919c529e8fb6d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 5 Apr 2019 01:07:08 +0800
Subject: [PATCH 0847/1072] [SPARK-26811][SQL][FOLLOWUP] some more document
 fixes

## What changes were proposed in this pull request?

while working on https://github.com/apache/spark/pull/24129, I realized that I missed some document fixes in https://github.com/apache/spark/pull/24285. This PR covers all of them.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #24295 from cloud-fan/doc.
---
 .../java/org/apache/spark/sql/sources/v2/reader/Scan.java    | 2 +-
 .../org/apache/spark/sql/sources/v2/writer/WriteBuilder.java | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
index 7633d504d36b..f6085b933c65 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
@@ -62,7 +62,7 @@ default String description() {
   /**
    * Returns the physical representation of this scan for batch query. By default this method throws
    * exception, data sources must overwrite this method to provide an implementation, if the
-   * {@link Table} that creates this scan returns {@link TableCapability#BATCH_READ} in its
+   * {@link Table} that creates this scan returns {@link TableCapability#BATCH_READ} support in its
    * {@link Table#capabilities()}.
    *
    * @throws UnsupportedOperationException
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
index e08d34fbf453..aab46b078c33 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
@@ -19,6 +19,7 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.sources.v2.TableCapability;
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite;
 import org.apache.spark.sql.types.StructType;
 
@@ -57,8 +58,8 @@ default WriteBuilder withInputDataSchema(StructType schema) {
   /**
    * Returns a {@link BatchWrite} to write data to batch source. By default this method throws
    * exception, data sources must overwrite this method to provide an implementation, if the
-   * {@link Table} that creates this write returns BATCH_WRITE support in its
-   * {@link Table#capabilities()}.
+   * {@link Table} that creates this write returns {@link TableCapability#BATCH_WRITE} support in
+   * its {@link Table#capabilities()}.
    *
    * Note that, the returned {@link BatchWrite} can be null if the implementation supports SaveMode,
    * to indicate that no writing is needed. We can clean it up after removing

From 938d95437526e1108e6c09f09cec96e9800b2143 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 4 Apr 2019 13:49:56 -0700
Subject: [PATCH 0848/1072] [SPARK-27382][SQL][TEST] Update Spark 2.4.x testing
 in HiveExternalCatalogVersionsSuite

## What changes were proposed in this pull request?

Since Apache Spark 2.4.1 vote passed and is distributed into mirrors, we need to test 2.4.1. This should land on both `master` and `branch-2.4`.

## How was this patch tested?

Pass the Jenkins.

Closes #24292 from dongjoon-hyun/SPARK-27382.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 1dd60c6757d4..d6a850d144bf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -206,7 +206,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
 
 object PROCESS_TABLES extends QueryTest with SQLTestUtils {
   // Tests the latest version of every release line.
-  val testingVersions = Seq("2.3.3", "2.4.0")
+  val testingVersions = Seq("2.3.3", "2.4.1")
 
   protected var spark: SparkSession = _
 

From 0e44a51f2eafa33fc1521abb1699226e397577e7 Mon Sep 17 00:00:00 2001
From: Ruben Fiszel <rubenfiszel@gmail.com>
Date: Thu, 4 Apr 2019 18:20:34 -0500
Subject: [PATCH 0849/1072] [SPARK-24345][SQL] Improve ParseError stop location
 when offending symbol is a token

In the case where the offending symbol is a CommonToken, this PR increases the accuracy of the start and stop origin by leveraging the start and stop index information from CommonToken.

Closes #21334 from rubenfiszel/patch-1.

Lead-authored-by: Ruben Fiszel <rubenfiszel@gmail.com>
Co-authored-by: rubenfiszel <rfiszel@palantir.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/catalyst/parser/ParseDriver.scala      | 13 +++++++++++--
 .../sql/catalyst/parser/ErrorParserSuite.scala | 18 +++++++++++-------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index d5c7bb722921..1cc12d902218 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -203,8 +203,17 @@ case object ParseErrorListener extends BaseErrorListener {
       charPositionInLine: Int,
       msg: String,
       e: RecognitionException): Unit = {
-    val position = Origin(Some(line), Some(charPositionInLine))
-    throw new ParseException(None, msg, position, position)
+    val (start, stop) = offendingSymbol match {
+      case token: CommonToken =>
+        val start = Origin(Some(line), Some(token.getCharPositionInLine))
+        val length = token.getStopIndex - token.getStartIndex + 1
+        val stop = Origin(Some(line), Some(token.getCharPositionInLine + length))
+        (start, stop)
+      case _ =>
+        val start = Origin(Some(line), Some(charPositionInLine))
+        (start, start)
+    }
+    throw new ParseException(None, msg, start, stop)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index baaf01800b33..96a37992e8c8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -22,7 +22,8 @@ import org.apache.spark.SparkFunSuite
  * Test various parser errors.
  */
 class ErrorParserSuite extends SparkFunSuite {
-  def intercept(sql: String, line: Int, startPosition: Int, messages: String*): Unit = {
+  def intercept(sql: String, line: Int, startPosition: Int, stopPosition: Int,
+                messages: String*): Unit = {
     val e = intercept[ParseException](CatalystSqlParser.parsePlan(sql))
 
     // Check position.
@@ -30,6 +31,8 @@ class ErrorParserSuite extends SparkFunSuite {
     assert(e.line.get === line)
     assert(e.startPosition.isDefined)
     assert(e.startPosition.get === startPosition)
+    assert(e.stop.startPosition.isDefined)
+    assert(e.stop.startPosition.get === stopPosition)
 
     // Check messages.
     val error = e.getMessage
@@ -39,23 +42,24 @@ class ErrorParserSuite extends SparkFunSuite {
   }
 
   test("no viable input") {
-    intercept("select ((r + 1) ", 1, 16, "no viable alternative at input", "----------------^^^")
+    intercept("select ((r + 1) ", 1, 16, 16,
+      "no viable alternative at input", "----------------^^^")
   }
 
   test("extraneous input") {
-    intercept("select 1 1", 1, 9, "extraneous input '1' expecting", "---------^^^")
-    intercept("select *\nfrom r as q t", 2, 12, "extraneous input", "------------^^^")
+    intercept("select 1 1", 1, 9, 10, "extraneous input '1' expecting", "---------^^^")
+    intercept("select *\nfrom r as q t", 2, 12, 13, "extraneous input", "------------^^^")
   }
 
   test("mismatched input") {
-    intercept("select * from r order by q from t", 1, 27,
+    intercept("select * from r order by q from t", 1, 27, 31,
       "mismatched input",
       "---------------------------^^^")
-    intercept("select *\nfrom r\norder by q\nfrom t", 4, 0, "mismatched input", "^^^")
+    intercept("select *\nfrom r\norder by q\nfrom t", 4, 0, 4, "mismatched input", "^^^")
   }
 
   test("semantic errors") {
-    intercept("select *\nfrom r\norder by q\ncluster by q", 3, 0,
+    intercept("select *\nfrom r\norder by q\ncluster by q", 3, 0, 11,
       "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported",
       "^^^")
   }

From 04e53d2e3c9dc993b8ba88c5ef0bcf0a8a9b06b2 Mon Sep 17 00:00:00 2001
From: Aayushmaan Jain <aayushmaan.jain42@gmail.com>
Date: Thu, 4 Apr 2019 21:19:40 -0700
Subject: [PATCH 0850/1072] [SPAR-27342][SQL] Optimize Limit 0 queries

## What changes were proposed in this pull request?
With this change, unnecessary file scans are avoided in case of Limit 0 queries.

I added a case (rule) to `PropagateEmptyRelation` to replace `GlobalLimit 0` and `LocalLimit 0` nodes with an empty `LocalRelation`. This prunes the subtree under the Limit 0 node and further allows other rules of `PropagateEmptyRelation` to optimize the Logical Plan - while remaining semantically consistent with the Limit 0 query.

For instance:
**Query:**
`SELECT * FROM table1 INNER JOIN (SELECT * FROM table2 LIMIT 0) AS table2 ON table1.id = table2.id`

**Optimized Plan without fix:**
```
Join Inner, (id#79 = id#87)
:- Filter isnotnull(id#79)
:  +- Relation[id#79,num1#80] parquet
+- Filter isnotnull(id#87)
   +- GlobalLimit 0
      +- LocalLimit 0
         +- Relation[id#87,num2#88] parquet
```

**Optimized Plan with fix:**
`LocalRelation <empty>, [id#75, num1#76, id#77, num2#78]`

## How was this patch tested?
Added unit tests to verify Limit 0 optimization for:
- Simple query containing Limit 0
- Inner Join, Left Outer Join, Right Outer Join, Full Outer Join queries containing Limit 0 as one of their children
- Nested Inner Joins between 3 tables with one of them having a Limit 0 clause.
- Intersect query wherein one of the subqueries was a Limit 0 query.

Closes #24271 from aayushmaanjain/optimize-limit0.

Authored-by: Aayushmaan Jain <aayushmaan.jain42@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  36 ++++++
 .../optimizer/OptimizeLimitZeroSuite.scala    | 108 ++++++++++++++++++
 2 files changed, 144 insertions(+)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeLimitZeroSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 6319d47c9a0d..d0368becaeff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -137,6 +137,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
     //   since the other rules might make two separate Unions operators adjacent.
     Batch("Union", Once,
       CombineUnions) ::
+    Batch("OptimizeLimitZero", Once,
+      OptimizeLimitZero) ::
     // Run this once earlier. This might simplify the plan and reduce cost of optimizer.
     // For example, a query such as Filter(LocalRelation) would go through all the heavy
     // optimizer rules that are triggered when there is a filter
@@ -1681,3 +1683,37 @@ object RemoveRepetitionFromGroupExpressions extends Rule[LogicalPlan] {
       }
   }
 }
+
+/**
+ * Replaces GlobalLimit 0 and LocalLimit 0 nodes (subtree) with empty Local Relation, as they don't
+ * return any rows.
+ */
+object OptimizeLimitZero extends Rule[LogicalPlan] {
+  // returns empty Local Relation corresponding to given plan
+  private def empty(plan: LogicalPlan) =
+    LocalRelation(plan.output, data = Seq.empty, isStreaming = plan.isStreaming)
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+    // Nodes below GlobalLimit or LocalLimit can be pruned if the limit value is zero (0).
+    // Any subtree in the logical plan that has GlobalLimit 0 or LocalLimit 0 as its root is
+    // semantically equivalent to an empty relation.
+    //
+    // In such cases, the effects of Limit 0 can be propagated through the Logical Plan by replacing
+    // the (Global/Local) Limit subtree with an empty LocalRelation, thereby pruning the subtree
+    // below and triggering other optimization rules of PropagateEmptyRelation to propagate the
+    // changes up the Logical Plan.
+    //
+    // Replace Global Limit 0 nodes with empty Local Relation
+    case gl @ GlobalLimit(IntegerLiteral(0), _) =>
+      empty(gl)
+
+    // Note: For all SQL queries, if a LocalLimit 0 node exists in the Logical Plan, then a
+    // GlobalLimit 0 node would also exist. Thus, the above case would be sufficient to handle
+    // almost all cases. However, if a user explicitly creates a Logical Plan with LocalLimit 0 node
+    // then the following rule will handle that case as well.
+    //
+    // Replace Local Limit 0 nodes with empty Local Relation
+    case ll @ LocalLimit(IntegerLiteral(0), _) =>
+      empty(ll)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeLimitZeroSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeLimitZeroSuite.scala
new file mode 100644
index 000000000000..cf875efc62c9
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeLimitZeroSuite.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical.{Distinct, GlobalLimit, LocalLimit, LocalRelation, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.types.IntegerType
+
+// Test class to verify correct functioning of OptimizeLimitZero rule in various scenarios
+class OptimizeLimitZeroSuite extends PlanTest {
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("OptimizeLimitZero", Once,
+        ReplaceIntersectWithSemiJoin,
+        OptimizeLimitZero,
+        PropagateEmptyRelation) :: Nil
+  }
+
+  val testRelation1 = LocalRelation.fromExternalRows(Seq('a.int), data = Seq(Row(1)))
+  val testRelation2 = LocalRelation.fromExternalRows(Seq('b.int), data = Seq(Row(1)))
+
+  test("Limit 0: return empty local relation") {
+    val query = testRelation1.limit(0)
+
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = LocalRelation('a.int)
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Limit 0: individual LocalLimit 0 node") {
+    val query = LocalLimit(0, testRelation1)
+
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = LocalRelation('a.int)
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Limit 0: individual GlobalLimit 0 node") {
+    val query = GlobalLimit(0, testRelation1)
+
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = LocalRelation('a.int)
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  Seq(
+    (Inner, LocalRelation('a.int, 'b.int)),
+    (LeftOuter, Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze),
+    (RightOuter, LocalRelation('a.int, 'b.int)),
+    (FullOuter, Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze)
+  ).foreach { case (jt, correctAnswer) =>
+      test(s"Limit 0: for join type $jt") {
+        val query = testRelation1
+          .join(testRelation2.limit(0), joinType = jt, condition = Some('a.attr == 'b.attr))
+
+        val optimized = Optimize.execute(query.analyze)
+
+        comparePlans(optimized, correctAnswer)
+      }
+  }
+
+  test("Limit 0: 3-way join") {
+    val testRelation3 = LocalRelation.fromExternalRows(Seq('c.int), data = Seq(Row(1)))
+
+    val subJoinQuery = testRelation1
+      .join(testRelation2, joinType = Inner, condition = Some('a.attr == 'b.attr))
+    val query = subJoinQuery
+      .join(testRelation3.limit(0), joinType = Inner, condition = Some('a.attr == 'c.attr))
+
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = LocalRelation('a.int, 'b.int, 'c.int)
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Limit 0: intersect") {
+    val query = testRelation1
+      .intersect(testRelation1.limit(0), isAll = false)
+
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = Distinct(LocalRelation('a.int))
+
+    comparePlans(optimized, correctAnswer)
+  }
+}

From 568db94e0c3787305fea76c546c558d695ae2951 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 5 Apr 2019 13:34:46 +0800
Subject: [PATCH 0851/1072] [SPARK-27356][SQL] File source V2: Fix the case
 that data columns overlap with partition schema

## What changes were proposed in this pull request?

In the current file source V2 framework, the schema of `FileScan` is not returned correctly if there are overlap columns between `dataSchema` and `partitionSchema`. The actual schema should be
`dataSchema - overlapSchema + partitionSchema`, which might have different column order from the pushed down `requiredSchema` in `SupportsPushDownRequiredColumns.pruneColumns`.

For example, if the data schema is `[a: String, b: String, c: String]` and the partition schema is `[b: Int, d: Int]`, the result schema is `[a: String, b: Int, c: String, d: Int]` in current `FileTable` and `HadoopFsRelation`. while the actual scan schema is `[a: String, c: String, b: Int, d: Int]` in `FileScan`.

To fix the corner case, this PR proposes that the output schema of `FileTable` should be `dataSchema - overlapSchema + partitionSchema`, so that the column order is consistent with `FileScan`.
Putting all the partition columns to the end of table schema is more reasonable.

## How was this patch tested?

Unit test.

Closes #24284 from gengliangwang/FixReadSchema.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/v2/FileTable.scala  |  12 +-
 .../spark/sql/FileBasedDataSourceSuite.scala  |  14 ++
 .../orc/OrcPartitionDiscoverySuite.scala      | 165 +++++++++++++-----
 3 files changed, 147 insertions(+), 44 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index d7284fd05ad3..cb816d69b68c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -70,8 +70,16 @@ abstract class FileTable(
     val partitionSchema = fileIndex.partitionSchema
     SchemaUtils.checkColumnNameDuplication(partitionSchema.fieldNames,
       "in the partition schema", caseSensitive)
-    PartitioningUtils.mergeDataAndPartitionSchema(dataSchema,
-      partitionSchema, caseSensitive)._1
+    val partitionNameSet: Set[String] =
+      partitionSchema.fields.map(PartitioningUtils.getColName(_, caseSensitive)).toSet
+
+    // When data and partition schemas have overlapping columns,
+    // tableSchema = dataSchema - overlapSchema + partitionSchema
+    val fields = dataSchema.fields.filterNot { field =>
+      val colName = PartitioningUtils.getColName(field, caseSensitive)
+      partitionNameSet.contains(colName)
+    } ++ partitionSchema.fields
+    StructType(fields)
   }
 
   override def capabilities(): java.util.Set[TableCapability] = FileTable.CAPABILITIES
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 1d30cbfbaf1a..add8a306a9e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -493,6 +493,20 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
       }
     }
   }
+
+  test("Return correct results when data columns overlap with partition columns") {
+    Seq("parquet", "orc", "json").foreach { format =>
+      withTempPath { path =>
+        val tablePath = new File(s"${path.getCanonicalPath}/cOl3=c/cOl1=a/cOl5=e")
+        Seq((1, 2, 3, 4, 5)).toDF("cOl1", "cOl2", "cOl3", "cOl4", "cOl5")
+          .write.format(format).save(tablePath.getCanonicalPath)
+
+        val df = spark.read.format(format).load(path.getCanonicalPath)
+          .select("CoL1", "Col2", "CoL5", "CoL3")
+        checkAnswer(df, Row("a", 2, "e", "c"))
+      }
+    }
+  }
 }
 
 object TestingUDT {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
index e1d02540a745..143e3f099720 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
@@ -107,6 +107,68 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
     }
   }
 
+  test("read partitioned table - with nulls") {
+    withTempDir { base =>
+      for {
+      // Must be `Integer` rather than `Int` here. `null.asInstanceOf[Int]` results in a zero...
+        pi <- Seq(1, null.asInstanceOf[Integer])
+        ps <- Seq("foo", null.asInstanceOf[String])
+      } {
+        makeOrcFile(
+          (1 to 10).map(i => OrcParData(i, i.toString)),
+          makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
+      }
+
+      spark.read
+        .option("hive.exec.default.partition.name", defaultPartitionName)
+        .orc(base.getCanonicalPath)
+        .createOrReplaceTempView("t")
+
+      withTempTable("t") {
+        checkAnswer(
+          sql("SELECT * FROM t"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, null.asInstanceOf[Integer])
+            ps <- Seq("foo", null.asInstanceOf[String])
+          } yield Row(i, i.toString, pi, ps))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE pi IS NULL"),
+          for {
+            i <- 1 to 10
+            ps <- Seq("foo", null.asInstanceOf[String])
+          } yield Row(i, i.toString, null, ps))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE ps IS NULL"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, null.asInstanceOf[Integer])
+          } yield Row(i, i.toString, pi, null))
+      }
+    }
+  }
+
+  test("SPARK-27162: handle pathfilter configuration correctly") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      val df = spark.range(2)
+      df.write.orc(path + "/p=1")
+      df.write.orc(path + "/p=2")
+      assert(spark.read.orc(path).count() === 4)
+
+      val extraOptions = Map(
+        "mapred.input.pathFilter.class" -> classOf[TestFileFilter].getName,
+        "mapreduce.input.pathFilter.class" -> classOf[TestFileFilter].getName
+      )
+      assert(spark.read.options(extraOptions).orc(path).count() === 2)
+    }
+  }
+}
+
+class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext {
   test("read partitioned table - partition key included in orc file") {
     withTempDir { base =>
       for {
@@ -127,7 +189,7 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
             i <- 1 to 10
             pi <- Seq(1, 2)
             ps <- Seq("foo", "bar")
-          } yield Row(i, pi, i.toString, ps))
+          } yield Row(i, i.toString, pi, ps))
 
         checkAnswer(
           sql("SELECT intField, pi FROM t"),
@@ -142,28 +204,26 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
           for {
             i <- 1 to 10
             ps <- Seq("foo", "bar")
-          } yield Row(i, 1, i.toString, ps))
+          } yield Row(i, i.toString, 1, ps))
 
         checkAnswer(
           sql("SELECT * FROM t WHERE ps = 'foo'"),
           for {
             i <- 1 to 10
             pi <- Seq(1, 2)
-          } yield Row(i, pi, i.toString, "foo"))
+          } yield Row(i, i.toString, pi, "foo"))
       }
     }
   }
 
-
-  test("read partitioned table - with nulls") {
+  test("read partitioned table - with nulls and partition keys are included in Orc file") {
     withTempDir { base =>
       for {
-      // Must be `Integer` rather than `Int` here. `null.asInstanceOf[Int]` results in a zero...
-        pi <- Seq(1, null.asInstanceOf[Integer])
+        pi <- Seq(1, 2)
         ps <- Seq("foo", null.asInstanceOf[String])
       } {
         makeOrcFile(
-          (1 to 10).map(i => OrcParData(i, i.toString)),
+          (1 to 10).map(i => OrcParDataWithKey(i, pi, i.toString, ps)),
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
@@ -177,23 +237,71 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
           sql("SELECT * FROM t"),
           for {
             i <- 1 to 10
-            pi <- Seq(1, null.asInstanceOf[Integer])
+            pi <- Seq(1, 2)
             ps <- Seq("foo", null.asInstanceOf[String])
           } yield Row(i, i.toString, pi, ps))
 
         checkAnswer(
-          sql("SELECT * FROM t WHERE pi IS NULL"),
+          sql("SELECT * FROM t WHERE ps IS NULL"),
           for {
             i <- 1 to 10
-            ps <- Seq("foo", null.asInstanceOf[String])
-          } yield Row(i, i.toString, null, ps))
+            pi <- Seq(1, 2)
+          } yield Row(i, i.toString, pi, null))
+      }
+    }
+  }
+}
 
+class OrcV1PartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+      .set(SQLConf.USE_V1_SOURCE_WRITER_LIST, "orc")
+
+  test("read partitioned table - partition key included in orc file") {
+    withTempDir { base =>
+      for {
+        pi <- Seq(1, 2)
+        ps <- Seq("foo", "bar")
+      } {
+        makeOrcFile(
+          (1 to 10).map(i => OrcParDataWithKey(i, pi, i.toString, ps)),
+          makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
+      }
+
+      spark.read.orc(base.getCanonicalPath).createOrReplaceTempView("t")
+
+      withTempTable("t") {
         checkAnswer(
-          sql("SELECT * FROM t WHERE ps IS NULL"),
+          sql("SELECT * FROM t"),
           for {
             i <- 1 to 10
-            pi <- Seq(1, null.asInstanceOf[Integer])
-          } yield Row(i, i.toString, pi, null))
+            pi <- Seq(1, 2)
+            ps <- Seq("foo", "bar")
+          } yield Row(i, pi, i.toString, ps))
+
+        checkAnswer(
+          sql("SELECT intField, pi FROM t"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+            _ <- Seq("foo", "bar")
+          } yield Row(i, pi))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE pi = 1"),
+          for {
+            i <- 1 to 10
+            ps <- Seq("foo", "bar")
+          } yield Row(i, 1, i.toString, ps))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE ps = 'foo'"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+          } yield Row(i, pi, i.toString, "foo"))
       }
     }
   }
@@ -232,31 +340,4 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
       }
     }
   }
-
-  test("SPARK-27162: handle pathfilter configuration correctly") {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
-
-      val df = spark.range(2)
-      df.write.orc(path + "/p=1")
-      df.write.orc(path + "/p=2")
-      assert(spark.read.orc(path).count() === 4)
-
-      val extraOptions = Map(
-        "mapred.input.pathFilter.class" -> classOf[TestFileFilter].getName,
-        "mapreduce.input.pathFilter.class" -> classOf[TestFileFilter].getName
-      )
-      assert(spark.read.options(extraOptions).orc(path).count() === 2)
-    }
-  }
-}
-
-class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext
-
-class OrcV1PartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext {
-  override protected def sparkConf: SparkConf =
-    super
-      .sparkConf
-      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
-      .set(SQLConf.USE_V1_SOURCE_WRITER_LIST, "orc")
 }

From 5678e687c60aee91f04410f03324483f31d433ea Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 5 Apr 2019 08:31:41 -0700
Subject: [PATCH 0852/1072] [SPARK-27393][SQL] Show ReusedSubquery in the plan
 when the subquery is reused

## What changes were proposed in this pull request?
With this change, we can easily identify the plan difference when subquery is reused.

When the reuse is enabled, the plan looks like
```
== Physical Plan ==
CollectLimit 1
+- *(1) Project [(Subquery subquery240 + ReusedSubquery Subquery subquery240) AS (scalarsubquery() + scalarsubquery())#253]
   :  :- Subquery subquery240
   :  :  +- *(2) HashAggregate(keys=[], functions=[avg(cast(key#13 as bigint))], output=[avg(key)#250])
   :  :     +- Exchange SinglePartition
   :  :        +- *(1) HashAggregate(keys=[], functions=[partial_avg(cast(key#13 as bigint))], output=[sum#256, count#257L])
   :  :           +- *(1) SerializeFromObject [knownnotnull(assertnotnull(input[0, org.apache.spark.sql.test.SQLTestData$TestData, true])).key AS key#13]
   :  :              +- Scan[obj#12]
   :  +- ReusedSubquery Subquery subquery240
   +- *(1) SerializeFromObject
      +- Scan[obj#12]
```

When the reuse is disabled, the plan looks like
```
== Physical Plan ==
CollectLimit 1
+- *(1) Project [(Subquery subquery286 + Subquery subquery287) AS (scalarsubquery() + scalarsubquery())#299]
   :  :- Subquery subquery286
   :  :  +- *(2) HashAggregate(keys=[], functions=[avg(cast(key#13 as bigint))], output=[avg(key)#296])
   :  :     +- Exchange SinglePartition
   :  :        +- *(1) HashAggregate(keys=[], functions=[partial_avg(cast(key#13 as bigint))], output=[sum#302, count#303L])
   :  :           +- *(1) SerializeFromObject [knownnotnull(assertnotnull(input[0, org.apache.spark.sql.test.SQLTestData$TestData, true])).key AS key#13]
   :  :              +- Scan[obj#12]
   :  +- Subquery subquery287
   :     +- *(2) HashAggregate(keys=[], functions=[avg(cast(key#13 as bigint))], output=[avg(key)#298])
   :        +- Exchange SinglePartition
   :           +- *(1) HashAggregate(keys=[], functions=[partial_avg(cast(key#13 as bigint))], output=[sum#306, count#307L])
   :              +- *(1) SerializeFromObject [knownnotnull(assertnotnull(input[0, org.apache.spark.sql.test.SQLTestData$TestData, true])).key AS key#13]
   :                 +- Scan[obj#12]
   +- *(1) SerializeFromObject
      +- Scan[obj#12]
```

## How was this patch tested?
Modified the existing test.

Closes #24258 from gatorsmile/followupSPARK-27279.

Authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/expressions/subquery.scala   |  5 ++-
 .../spark/sql/catalyst/plans/QueryPlan.scala  |  2 +-
 .../spark/sql/execution/SparkPlanInfo.scala   |  1 +
 .../execution/basicPhysicalOperators.scala    | 45 ++++++++++++++++---
 .../apache/spark/sql/execution/subquery.scala | 20 ++++++---
 .../sql/execution/ui/SparkPlanGraph.scala     |  5 +++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 30 +------------
 .../org/apache/spark/sql/SubquerySuite.scala  | 36 ++++++++++++++-
 8 files changed, 98 insertions(+), 46 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index 043113492074..bf9f2970ded1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -37,6 +37,9 @@ abstract class PlanExpression[T <: QueryPlan[_]] extends Expression {
   /** Updates the expression with a new plan. */
   def withNewPlan(plan: T): PlanExpression[T]
 
+  /** Defines how the canonicalization should work for this expression. */
+  def canonicalize(attrs: AttributeSeq): PlanExpression[T]
+
   protected def conditionString: String = children.mkString("[", " && ", "]")
 }
 
@@ -58,7 +61,7 @@ abstract class SubqueryExpression(
         children.zip(p.children).forall(p => p._1.semanticEquals(p._2))
     case _ => false
   }
-  def canonicalize(attrs: AttributeSeq): SubqueryExpression = {
+  override def canonicalize(attrs: AttributeSeq): SubqueryExpression = {
     // Normalize the outer references in the subquery plan.
     val normalizedPlan = plan.transformAllExpressions {
       case OuterReference(r) => OuterReference(QueryPlan.normalizeExprId(r, attrs))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 8f5444ed8a5a..3d4de5c7e871 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -279,7 +279,7 @@ object QueryPlan extends PredicateHelper {
    */
   def normalizeExprId[T <: Expression](e: T, input: AttributeSeq): T = {
     e.transformUp {
-      case s: SubqueryExpression => s.canonicalize(input)
+      case s: PlanExpression[_] => s.canonicalize(input)
       case ar: AttributeReference =>
         val ordinal = input.indexOf(ar.exprId)
         if (ordinal == -1) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
index f554ff0aa775..3cd02b984d33 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
@@ -52,6 +52,7 @@ private[execution] object SparkPlanInfo {
   def fromSparkPlan(plan: SparkPlan): SparkPlanInfo = {
     val children = plan match {
       case ReusedExchangeExec(_, child) => child :: Nil
+      case ReusedSubqueryExec(child) => child :: Nil
       case _ => plan.children ++ plan.subqueries
     }
     val metrics = plan.metrics.toSeq.map { case (key, metric) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 70e41a33f4a1..7204548181f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -671,21 +671,28 @@ object CoalesceExec {
 }
 
 /**
- * Physical plan for a subquery.
+ * Parent class for different types of subquery plans
  */
-case class SubqueryExec(name: String, child: SparkPlan) extends UnaryExecNode {
-
-  override lazy val metrics = Map(
-    "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
-    "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"))
+abstract class BaseSubqueryExec extends SparkPlan {
+  def name: String
+  def child: SparkPlan
 
   override def output: Seq[Attribute] = child.output
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
   override def outputOrdering: Seq[SortOrder] = child.outputOrdering
+}
 
-  override def doCanonicalize(): SparkPlan = child.canonicalized
+/**
+ * Physical plan for a subquery.
+ */
+case class SubqueryExec(name: String, child: SparkPlan)
+  extends BaseSubqueryExec with UnaryExecNode {
+
+  override lazy val metrics = Map(
+    "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
+    "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"))
 
   @transient
   private lazy val relationFuture: Future[Array[InternalRow]] = {
@@ -709,6 +716,10 @@ case class SubqueryExec(name: String, child: SparkPlan) extends UnaryExecNode {
     }(SubqueryExec.executionContext)
   }
 
+  protected override def doCanonicalize(): SparkPlan = {
+    SubqueryExec("Subquery", child.canonicalized)
+  }
+
   protected override def doPrepare(): Unit = {
     relationFuture
   }
@@ -726,3 +737,23 @@ object SubqueryExec {
   private[execution] val executionContext = ExecutionContext.fromExecutorService(
     ThreadUtils.newDaemonCachedThreadPool("subquery", 16))
 }
+
+/**
+ * A wrapper for reused [[BaseSubqueryExec]].
+ */
+case class ReusedSubqueryExec(child: BaseSubqueryExec)
+  extends BaseSubqueryExec with LeafExecNode {
+
+  override def name: String = child.name
+
+  override def output: Seq[Attribute] = child.output
+  override def doCanonicalize(): SparkPlan = child.canonicalized
+  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
+  protected override def doPrepare(): Unit = child.prepare()
+
+  protected override def doExecute(): RDD[InternalRow] = child.execute()
+
+  override def executeCollect(): Array[InternalRow] = child.executeCollect()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index b7f10baaca7b..c70f17fa49b7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression}
+import org.apache.spark.sql.catalyst.expressions.{AttributeSeq, Expression, ExprId, InSet, Literal, PlanExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
@@ -31,11 +31,16 @@ import org.apache.spark.sql.types.{BooleanType, DataType, StructType}
 /**
  * The base class for subquery that is used in SparkPlan.
  */
-abstract class ExecSubqueryExpression extends PlanExpression[SubqueryExec] {
+abstract class ExecSubqueryExpression extends PlanExpression[BaseSubqueryExec] {
   /**
    * Fill the expression with collected result from executed plan.
    */
   def updateResult(): Unit
+
+  override def canonicalize(attrs: AttributeSeq): ExecSubqueryExpression = {
+    withNewPlan(plan.canonicalized.asInstanceOf[BaseSubqueryExec])
+      .asInstanceOf[ExecSubqueryExpression]
+  }
 }
 
 object ExecSubqueryExpression {
@@ -56,7 +61,7 @@ object ExecSubqueryExpression {
  * This is the physical copy of ScalarSubquery to be used inside SparkPlan.
  */
 case class ScalarSubquery(
-    plan: SubqueryExec,
+    plan: BaseSubqueryExec,
     exprId: ExprId)
   extends ExecSubqueryExpression {
 
@@ -64,7 +69,7 @@ case class ScalarSubquery(
   override def children: Seq[Expression] = Nil
   override def nullable: Boolean = true
   override def toString: String = plan.simpleString(SQLConf.get.maxToStringFields)
-  override def withNewPlan(query: SubqueryExec): ScalarSubquery = copy(plan = query)
+  override def withNewPlan(query: BaseSubqueryExec): ScalarSubquery = copy(plan = query)
 
   override def semanticEquals(other: Expression): Boolean = other match {
     case s: ScalarSubquery => plan.sameResult(s.plan)
@@ -129,13 +134,14 @@ case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] {
       return plan
     }
     // Build a hash map using schema of subqueries to avoid O(N*N) sameResult calls.
-    val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]()
+    val subqueries = mutable.HashMap[StructType, ArrayBuffer[BaseSubqueryExec]]()
     plan transformAllExpressions {
       case sub: ExecSubqueryExpression =>
-        val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]())
+        val sameSchema =
+          subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[BaseSubqueryExec]())
         val sameResult = sameSchema.find(_.sameResult(sub.plan))
         if (sameResult.isDefined) {
-          sub.withNewPlan(sameResult.get)
+          sub.withNewPlan(ReusedSubqueryExec(sameResult.get))
         } else {
           sameSchema += sub.plan
           sub
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
index 838ee76c8d43..b864ad1c7108 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
@@ -103,6 +103,11 @@ object SparkPlanGraph {
         // Point to the re-used subquery
         val node = exchanges(planInfo)
         edges += SparkPlanGraphEdge(node.id, parent.id)
+      case "ReusedSubquery" =>
+        // Re-used subquery might appear before the original subquery, so skip this node and let
+        // the previous `case` make sure the re-used and the original point to the same node.
+        buildSparkPlanGraphNode(
+          planInfo.children.head, nodeIdGenerator, nodes, edges, parent, subgraph, exchanges)
       case "ReusedExchange" if exchanges.contains(planInfo.children.head) =>
         // Point to the re-used exchange
         val node = exchanges(planInfo.children.head)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 5916cbb7e681..db896b3b36ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -25,7 +25,6 @@ import java.util.concurrent.atomic.AtomicBoolean
 import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.execution.{aggregate, ScalarSubquery, SubqueryExec}
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.datasources.FilePartition
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec}
@@ -113,33 +112,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("Reuse Subquery") {
-    Seq(true, false).foreach { reuse =>
-      withSQLConf(SQLConf.SUBQUERY_REUSE_ENABLED.key -> reuse.toString) {
-        val df = sql(
-          """
-            |SELECT (SELECT avg(key) FROM testData) + (SELECT avg(key) FROM testData)
-            |FROM testData
-            |LIMIT 1
-          """.stripMargin)
-
-        import scala.collection.mutable.ArrayBuffer
-        val subqueries = ArrayBuffer[SubqueryExec]()
-        df.queryExecution.executedPlan.transformAllExpressions {
-          case s @ ScalarSubquery(plan: SubqueryExec, _) =>
-            subqueries += plan
-            s
-        }
-
-        if (reuse) {
-          assert(subqueries.distinct.size == 1, "Subquery reusing not working correctly")
-        } else {
-          assert(subqueries.distinct.size == 2, "There should be 2 subqueries when not reusing")
-        }
-      }
-    }
-  }
-
   test("SPARK-6743: no columns from cache") {
     Seq(
       (83, 0, 38),
@@ -288,7 +260,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     val df = sql(sqlText)
     // First, check if we have GeneratedAggregate.
     val hasGeneratedAgg = df.queryExecution.sparkPlan
-      .collect { case _: aggregate.HashAggregateExec => true }
+      .collect { case _: HashAggregateExec => true }
       .nonEmpty
     if (!hasGeneratedAgg) {
       fail(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 53eb7ea7a0a0..28a990545967 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -21,8 +21,9 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort}
-import org.apache.spark.sql.execution.{ExecSubqueryExpression, FileSourceScanExec, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.{ExecSubqueryExpression, FileSourceScanExec, ReusedSubqueryExec, ScalarSubquery, SubqueryExec, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.datasources.FileScanRDD
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
 class SubquerySuite extends QueryTest with SharedSQLContext {
@@ -1337,4 +1338,37 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
       checkAnswer(df3, Seq(Row("a", 2, "a"), Row("a", 2, "b")))
     }
   }
+
+  test("SPARK-27279: Reuse Subquery") {
+    Seq(true, false).foreach { reuse =>
+      withSQLConf(SQLConf.SUBQUERY_REUSE_ENABLED.key -> reuse.toString) {
+        val df = sql(
+          """
+            |SELECT (SELECT avg(key) FROM testData) + (SELECT avg(key) FROM testData)
+            |FROM testData
+            |LIMIT 1
+          """.stripMargin)
+
+        var countSubqueryExec = 0
+        var countReuseSubqueryExec = 0
+        df.queryExecution.executedPlan.transformAllExpressions {
+          case s @ ScalarSubquery(_: SubqueryExec, _) =>
+            countSubqueryExec = countSubqueryExec + 1
+            s
+          case s @ ScalarSubquery(_: ReusedSubqueryExec, _) =>
+            countReuseSubqueryExec = countReuseSubqueryExec + 1
+            s
+        }
+
+        if (reuse) {
+          assert(countSubqueryExec == 1, "Subquery reusing not working correctly")
+          assert(countReuseSubqueryExec == 1, "Subquery reusing not working correctly")
+        } else {
+          assert(countSubqueryExec == 2, "expect 2 SubqueryExec when not reusing")
+          assert(countReuseSubqueryExec == 0,
+            "expect 0 ReusedSubqueryExec when not reusing")
+        }
+      }
+    }
+  }
 }

From a840b99daf97de06c9b1b66efed0567244ec4a01 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Fri, 5 Apr 2019 08:41:19 -0700
Subject: [PATCH 0853/1072] [MINOR][DOC] Fix html tag broken in
 configuration.md

## What changes were proposed in this pull request?

This patch fixes wrong HTML tag in configuration.md which breaks the table tag.

This is originally reported in dev mailing list: https://lists.apache.org/thread.html/744bdc83b3935776c8d91bf48fdf80d9a3fed3858391e60e343206f9%3Cdev.spark.apache.org%3E

## How was this patch tested?

This change is one-liner and pretty obvious so I guess we may be able to skip testing.

Closes #24304 from HeartSaVioR/MINOR-configuration-doc-html-tag-error.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 328995486595..3187b77250f0 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1001,7 +1001,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>
     The maximum allowed size for a HTTP request header, in bytes unless otherwise specified.
     This setting applies for the Spark History Server too.
-  <td>
+  </td>
 </tr>
 </table>
 

From 23bde447976d0bb33cae67124bac476994634f04 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 5 Apr 2019 12:54:01 -0500
Subject: [PATCH 0854/1072] [SPARK-27358][UI] Update jquery to 1.12.x to pick
 up security fixes

## What changes were proposed in this pull request?

Update jquery -> 1.12.4, datatables -> 1.10.18, mustache -> 2.3.12.
Add missing mustache license

## How was this patch tested?

I manually tested the UI locally with the javascript console open and didn't observe any problems or JS errors. The only 'risky' change seems to be mustache, but on reading its release notes, don't think the changes from 0.8.1 to 2.x would affect Spark's simple usage.

Closes #24288 from srowen/SPARK-27358.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ui/static/jquery-1.11.1.min.js      |   4 -
 .../spark/ui/static/jquery-1.12.4.min.js      |   5 +
 .../static/jquery.dataTables.1.10.18.min.css  |   1 +
 .../static/jquery.dataTables.1.10.18.min.js   | 166 ++++++
 .../static/jquery.dataTables.1.10.4.min.css   |   1 -
 .../ui/static/jquery.dataTables.1.10.4.min.js | 157 ------
 .../apache/spark/ui/static/jquery.mustache.js | 502 ++++++++++--------
 .../scala/org/apache/spark/ui/UIUtils.scala   |   6 +-
 dev/.rat-excludes                             |   6 +-
 docs/_layouts/global.html                     |   2 +-
 docs/js/vendor/jquery-1.12.4.min.js           |   5 +
 docs/js/vendor/jquery-1.8.0.min.js            |   2 -
 licenses-binary/LICENSE-mustache.txt          |  11 +
 licenses/LICENSE-mustache.txt                 |  11 +
 14 files changed, 476 insertions(+), 403 deletions(-)
 delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery-1.11.1.min.js
 create mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery-1.12.4.min.js
 create mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.18.min.css
 create mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.18.min.js
 delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.4.min.css
 delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.4.min.js
 create mode 100755 docs/js/vendor/jquery-1.12.4.min.js
 delete mode 100755 docs/js/vendor/jquery-1.8.0.min.js
 create mode 100644 licenses-binary/LICENSE-mustache.txt
 create mode 100644 licenses/LICENSE-mustache.txt

diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery-1.11.1.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery-1.11.1.min.js
deleted file mode 100644
index ab28a24729b3..000000000000
--- a/core/src/main/resources/org/apache/spark/ui/static/jquery-1.11.1.min.js
+++ /dev/null
@@ -1,4 +0,0 @@
-/*! jQuery v1.11.1 | (c) 2005, 2014 jQuery Foundation, Inc. | jquery.org/license */
-!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=c.slice,e=c.concat,f=c.push,g=c.indexOf,h={},i=h.toString,j=h.hasOwnProperty,k={},l="1.11.1",m=function(a,b){return new m.fn.init(a,b)},n=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,o=/^-ms-/,p=/-([\da-z])/gi,q=function(a,b){return b.toUpperCase()};m.fn=m.prototype={jquery:l,constructor:m,selector:"",length:0,toArray:function(){return d.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:d.call(this)},pushStack:function(a){var b=m.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a,b){return m.each(this,a,b)},map:function(a){return this.pushStack(m.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(d.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor(null)},push:f,sort:c.sort,splice:c.splice},m.extend=m.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||m.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(e=arguments[h]))for(d in e)a=g[d],c=e[d],g!==c&&(j&&c&&(m.isPlainObject(c)||(b=m.isArray(c)))?(b?(b=!1,f=a&&m.isArray(a)?a:[]):f=a&&m.isPlainObject(a)?a:{},g[d]=m.extend(j,f,c)):void 0!==c&&(g[d]=c));return g},m.extend({expando:"jQuery"+(l+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===m.type(a)},isArray:Array.isArray||function(a){return"array"===m.type(a)},isWindow:function(a){return null!=a&&a==a.window},isNumeric:function(a){return!m.isArray(a)&&a-parseFloat(a)>=0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},isPlainObject:function(a){var b;if(!a||"object"!==m.type(a)||a.nodeType||m.isWindow(a))return!1;try{if(a.constructor&&!j.call(a,"constructor")&&!j.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}if(k.ownLast)for(b in a)return j.call(a,b);for(b in a);return void 0===b||j.call(a,b)},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?h[i.call(a)]||"object":typeof a},globalEval:function(b){b&&m.trim(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(o,"ms-").replace(p,q)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b,c){var d,e=0,f=a.length,g=r(a);if(c){if(g){for(;f>e;e++)if(d=b.apply(a[e],c),d===!1)break}else for(e in a)if(d=b.apply(a[e],c),d===!1)break}else if(g){for(;f>e;e++)if(d=b.call(a[e],e,a[e]),d===!1)break}else for(e in a)if(d=b.call(a[e],e,a[e]),d===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(n,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(r(Object(a))?m.merge(c,"string"==typeof a?[a]:a):f.call(c,a)),c},inArray:function(a,b,c){var d;if(b){if(g)return g.call(b,a,c);for(d=b.length,c=c?0>c?Math.max(0,d+c):c:0;d>c;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,b){var c=+b.length,d=0,e=a.length;while(c>d)a[e++]=b[d++];if(c!==c)while(void 0!==b[d])a[e++]=b[d++];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,f=0,g=a.length,h=r(a),i=[];if(h)for(;g>f;f++)d=b(a[f],f,c),null!=d&&i.push(d);else for(f in a)d=b(a[f],f,c),null!=d&&i.push(d);return e.apply([],i)},guid:1,proxy:function(a,b){var c,e,f;return"string"==typeof b&&(f=a[b],b=a,a=f),m.isFunction(a)?(c=d.call(arguments,2),e=function(){return a.apply(b||this,c.concat(d.call(arguments)))},e.guid=a.guid=a.guid||m.guid++,e):void 0},now:function(){return+new Date},support:k}),m.each("Boolean Number String Function Array Date RegExp Object Error".split(" "),function(a,b){h["[object "+b+"]"]=b.toLowerCase()});function r(a){var b=a.length,c=m.type(a);return"function"===c||m.isWindow(a)?!1:1===a.nodeType&&b?!0:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var s=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+-new Date,v=a.document,w=0,x=0,y=gb(),z=gb(),A=gb(),B=function(a,b){return a===b&&(l=!0),0},C="undefined",D=1<<31,E={}.hasOwnProperty,F=[],G=F.pop,H=F.push,I=F.push,J=F.slice,K=F.indexOf||function(a){for(var b=0,c=this.length;c>b;b++)if(this[b]===a)return b;return-1},L="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",N="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",O=N.replace("w","w#"),P="\\["+M+"*("+N+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+O+"))|)"+M+"*\\]",Q=":("+N+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+P+")*)|.*)\\)|)",R=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),S=new RegExp("^"+M+"*,"+M+"*"),T=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp("="+M+"*([^\\]'\"]*?)"+M+"*\\]","g"),V=new RegExp(Q),W=new RegExp("^"+O+"$"),X={ID:new RegExp("^#("+N+")"),CLASS:new RegExp("^\\.("+N+")"),TAG:new RegExp("^("+N.replace("w","w*")+")"),ATTR:new RegExp("^"+P),PSEUDO:new RegExp("^"+Q),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+L+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/^(?:input|select|textarea|button)$/i,Z=/^h\d$/i,$=/^[^{]+\{\s*\[native \w/,_=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ab=/[+~]/,bb=/'|\\/g,cb=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),db=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)};try{I.apply(F=J.call(v.childNodes),v.childNodes),F[v.childNodes.length].nodeType}catch(eb){I={apply:F.length?function(a,b){H.apply(a,J.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fb(a,b,d,e){var f,h,j,k,l,o,r,s,w,x;if((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,d=d||[],!a||"string"!=typeof a)return d;if(1!==(k=b.nodeType)&&9!==k)return[];if(p&&!e){if(f=_.exec(a))if(j=f[1]){if(9===k){if(h=b.getElementById(j),!h||!h.parentNode)return d;if(h.id===j)return d.push(h),d}else if(b.ownerDocument&&(h=b.ownerDocument.getElementById(j))&&t(b,h)&&h.id===j)return d.push(h),d}else{if(f[2])return I.apply(d,b.getElementsByTagName(a)),d;if((j=f[3])&&c.getElementsByClassName&&b.getElementsByClassName)return I.apply(d,b.getElementsByClassName(j)),d}if(c.qsa&&(!q||!q.test(a))){if(s=r=u,w=b,x=9===k&&a,1===k&&"object"!==b.nodeName.toLowerCase()){o=g(a),(r=b.getAttribute("id"))?s=r.replace(bb,"\\$&"):b.setAttribute("id",s),s="[id='"+s+"'] ",l=o.length;while(l--)o[l]=s+qb(o[l]);w=ab.test(a)&&ob(b.parentNode)||b,x=o.join(",")}if(x)try{return I.apply(d,w.querySelectorAll(x)),d}catch(y){}finally{r||b.removeAttribute("id")}}}return i(a.replace(R,"$1"),b,d,e)}function gb(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function hb(a){return a[u]=!0,a}function ib(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function jb(a,b){var c=a.split("|"),e=a.length;while(e--)d.attrHandle[c[e]]=b}function kb(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||D)-(~a.sourceIndex||D);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function lb(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function mb(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function nb(a){return hb(function(b){return b=+b,hb(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function ob(a){return a&&typeof a.getElementsByTagName!==C&&a}c=fb.support={},f=fb.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fb.setDocument=function(a){var b,e=a?a.ownerDocument||a:v,g=e.defaultView;return e!==n&&9===e.nodeType&&e.documentElement?(n=e,o=e.documentElement,p=!f(e),g&&g!==g.top&&(g.addEventListener?g.addEventListener("unload",function(){m()},!1):g.attachEvent&&g.attachEvent("onunload",function(){m()})),c.attributes=ib(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ib(function(a){return a.appendChild(e.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=$.test(e.getElementsByClassName)&&ib(function(a){return a.innerHTML="<div class='a'></div><div class='a i'></div>",a.firstChild.className="i",2===a.getElementsByClassName("i").length}),c.getById=ib(function(a){return o.appendChild(a).id=u,!e.getElementsByName||!e.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if(typeof b.getElementById!==C&&p){var c=b.getElementById(a);return c&&c.parentNode?[c]:[]}},d.filter.ID=function(a){var b=a.replace(cb,db);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(cb,db);return function(a){var c=typeof a.getAttributeNode!==C&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return typeof b.getElementsByTagName!==C?b.getElementsByTagName(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return typeof b.getElementsByClassName!==C&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=$.test(e.querySelectorAll))&&(ib(function(a){a.innerHTML="<select msallowclip=''><option selected=''></option></select>",a.querySelectorAll("[msallowclip^='']").length&&q.push("[*^$]="+M+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+M+"*(?:value|"+L+")"),a.querySelectorAll(":checked").length||q.push(":checked")}),ib(function(a){var b=e.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+M+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=$.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ib(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",Q)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=$.test(o.compareDocumentPosition),t=b||$.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===e||a.ownerDocument===v&&t(v,a)?-1:b===e||b.ownerDocument===v&&t(v,b)?1:k?K.call(k,a)-K.call(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,f=a.parentNode,g=b.parentNode,h=[a],i=[b];if(!f||!g)return a===e?-1:b===e?1:f?-1:g?1:k?K.call(k,a)-K.call(k,b):0;if(f===g)return kb(a,b);c=a;while(c=c.parentNode)h.unshift(c);c=b;while(c=c.parentNode)i.unshift(c);while(h[d]===i[d])d++;return d?kb(h[d],i[d]):h[d]===v?-1:i[d]===v?1:0},e):n},fb.matches=function(a,b){return fb(a,null,null,b)},fb.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(U,"='$1']"),!(!c.matchesSelector||!p||r&&r.test(b)||q&&q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fb(b,n,null,[a]).length>0},fb.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fb.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&E.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fb.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fb.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fb.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fb.selectors={cacheLength:50,createPseudo:hb,match:X,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(cb,db),a[3]=(a[3]||a[4]||a[5]||"").replace(cb,db),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fb.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fb.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return X.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&V.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(cb,db).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+M+")"+a+"("+M+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||typeof a.getAttribute!==C&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fb.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h;if(q){if(f){while(p){l=b;while(l=l[p])if(h?l.nodeName.toLowerCase()===r:1===l.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){k=q[u]||(q[u]={}),j=k[a]||[],n=j[0]===w&&j[1],m=j[0]===w&&j[2],l=n&&q.childNodes[n];while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if(1===l.nodeType&&++m&&l===b){k[a]=[w,n,m];break}}else if(s&&(j=(b[u]||(b[u]={}))[a])&&j[0]===w)m=j[1];else while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if((h?l.nodeName.toLowerCase()===r:1===l.nodeType)&&++m&&(s&&((l[u]||(l[u]={}))[a]=[w,m]),l===b))break;return m-=e,m===d||m%d===0&&m/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fb.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?hb(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=K.call(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:hb(function(a){var b=[],c=[],d=h(a.replace(R,"$1"));return d[u]?hb(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),!c.pop()}}),has:hb(function(a){return function(b){return fb(a,b).length>0}}),contains:hb(function(a){return function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:hb(function(a){return W.test(a||"")||fb.error("unsupported lang: "+a),a=a.replace(cb,db).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Z.test(a.nodeName)},input:function(a){return Y.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:nb(function(){return[0]}),last:nb(function(a,b){return[b-1]}),eq:nb(function(a,b,c){return[0>c?c+b:c]}),even:nb(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:nb(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:nb(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:nb(function(a,b,c){for(var d=0>c?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=lb(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=mb(b);function pb(){}pb.prototype=d.filters=d.pseudos,d.setFilters=new pb,g=fb.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){(!c||(e=S.exec(h)))&&(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=T.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(R," ")}),h=h.slice(c.length));for(g in d.filter)!(e=X[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?fb.error(a):z(a,i).slice(0)};function qb(a){for(var b=0,c=a.length,d="";c>b;b++)d+=a[b].value;return d}function rb(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(i=b[u]||(b[u]={}),(h=i[d])&&h[0]===w&&h[1]===f)return j[2]=h[2];if(i[d]=j,j[2]=a(b,c,g))return!0}}}function sb(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function tb(a,b,c){for(var d=0,e=b.length;e>d;d++)fb(a,b[d],c);return c}function ub(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(!c||c(f,d,e))&&(g.push(f),j&&b.push(h));return g}function vb(a,b,c,d,e,f){return d&&!d[u]&&(d=vb(d)),e&&!e[u]&&(e=vb(e,f)),hb(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||tb(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ub(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ub(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?K.call(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ub(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):I.apply(g,r)})}function wb(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=rb(function(a){return a===b},h,!0),l=rb(function(a){return K.call(b,a)>-1},h,!0),m=[function(a,c,d){return!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d))}];f>i;i++)if(c=d.relative[a[i].type])m=[rb(sb(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return vb(i>1&&sb(m),i>1&&qb(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(R,"$1"),c,e>i&&wb(a.slice(i,e)),f>e&&wb(a=a.slice(e)),f>e&&qb(a))}m.push(c)}return sb(m)}function xb(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,m,o,p=0,q="0",r=f&&[],s=[],t=j,u=f||e&&d.find.TAG("*",k),v=w+=null==t?1:Math.random()||.1,x=u.length;for(k&&(j=g!==n&&g);q!==x&&null!=(l=u[q]);q++){if(e&&l){m=0;while(o=a[m++])if(o(l,g,h)){i.push(l);break}k&&(w=v)}c&&((l=!o&&l)&&p--,f&&r.push(l))}if(p+=q,c&&q!==p){m=0;while(o=b[m++])o(r,s,g,h);if(f){if(p>0)while(q--)r[q]||s[q]||(s[q]=G.call(i));s=ub(s)}I.apply(i,s),k&&!f&&s.length>0&&p+b.length>1&&fb.uniqueSort(i)}return k&&(w=v,j=t),r};return c?hb(f):f}return h=fb.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wb(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xb(e,d)),f.selector=a}return f},i=fb.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(cb,db),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=X.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(cb,db),ab.test(j[0].type)&&ob(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qb(j),!a)return I.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,ab.test(a)&&ob(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ib(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ib(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||jb("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ib(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||jb("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ib(function(a){return null==a.getAttribute("disabled")})||jb(L,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fb}(a);m.find=s,m.expr=s.selectors,m.expr[":"]=m.expr.pseudos,m.unique=s.uniqueSort,m.text=s.getText,m.isXMLDoc=s.isXML,m.contains=s.contains;var t=m.expr.match.needsContext,u=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,v=/^.[^:#\[\.,]*$/;function w(a,b,c){if(m.isFunction(b))return m.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return m.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(v.test(b))return m.filter(b,a,c);b=m.filter(b,a)}return m.grep(a,function(a){return m.inArray(a,b)>=0!==c})}m.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?m.find.matchesSelector(d,a)?[d]:[]:m.find.matches(a,m.grep(b,function(a){return 1===a.nodeType}))},m.fn.extend({find:function(a){var b,c=[],d=this,e=d.length;if("string"!=typeof a)return this.pushStack(m(a).filter(function(){for(b=0;e>b;b++)if(m.contains(d[b],this))return!0}));for(b=0;e>b;b++)m.find(a,d[b],c);return c=this.pushStack(e>1?m.unique(c):c),c.selector=this.selector?this.selector+" "+a:a,c},filter:function(a){return this.pushStack(w(this,a||[],!1))},not:function(a){return this.pushStack(w(this,a||[],!0))},is:function(a){return!!w(this,"string"==typeof a&&t.test(a)?m(a):a||[],!1).length}});var x,y=a.document,z=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,A=m.fn.init=function(a,b){var c,d;if(!a)return this;if("string"==typeof a){if(c="<"===a.charAt(0)&&">"===a.charAt(a.length-1)&&a.length>=3?[null,a,null]:z.exec(a),!c||!c[1]&&b)return!b||b.jquery?(b||x).find(a):this.constructor(b).find(a);if(c[1]){if(b=b instanceof m?b[0]:b,m.merge(this,m.parseHTML(c[1],b&&b.nodeType?b.ownerDocument||b:y,!0)),u.test(c[1])&&m.isPlainObject(b))for(c in b)m.isFunction(this[c])?this[c](b[c]):this.attr(c,b[c]);return this}if(d=y.getElementById(c[2]),d&&d.parentNode){if(d.id!==c[2])return x.find(a);this.length=1,this[0]=d}return this.context=y,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):m.isFunction(a)?"undefined"!=typeof x.ready?x.ready(a):a(m):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),m.makeArray(a,this))};A.prototype=m.fn,x=m(y);var B=/^(?:parents|prev(?:Until|All))/,C={children:!0,contents:!0,next:!0,prev:!0};m.extend({dir:function(a,b,c){var d=[],e=a[b];while(e&&9!==e.nodeType&&(void 0===c||1!==e.nodeType||!m(e).is(c)))1===e.nodeType&&d.push(e),e=e[b];return d},sibling:function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c}}),m.fn.extend({has:function(a){var b,c=m(a,this),d=c.length;return this.filter(function(){for(b=0;d>b;b++)if(m.contains(this,c[b]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=t.test(a)||"string"!=typeof a?m(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&m.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?m.unique(f):f)},index:function(a){return a?"string"==typeof a?m.inArray(this[0],m(a)):m.inArray(a.jquery?a[0]:a,this):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(m.unique(m.merge(this.get(),m(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function D(a,b){do a=a[b];while(a&&1!==a.nodeType);return a}m.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return m.dir(a,"parentNode")},parentsUntil:function(a,b,c){return m.dir(a,"parentNode",c)},next:function(a){return D(a,"nextSibling")},prev:function(a){return D(a,"previousSibling")},nextAll:function(a){return m.dir(a,"nextSibling")},prevAll:function(a){return m.dir(a,"previousSibling")},nextUntil:function(a,b,c){return m.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return m.dir(a,"previousSibling",c)},siblings:function(a){return m.sibling((a.parentNode||{}).firstChild,a)},children:function(a){return m.sibling(a.firstChild)},contents:function(a){return m.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:m.merge([],a.childNodes)}},function(a,b){m.fn[a]=function(c,d){var e=m.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=m.filter(d,e)),this.length>1&&(C[a]||(e=m.unique(e)),B.test(a)&&(e=e.reverse())),this.pushStack(e)}});var E=/\S+/g,F={};function G(a){var b=F[a]={};return m.each(a.match(E)||[],function(a,c){b[c]=!0}),b}m.Callbacks=function(a){a="string"==typeof a?F[a]||G(a):m.extend({},a);var b,c,d,e,f,g,h=[],i=!a.once&&[],j=function(l){for(c=a.memory&&l,d=!0,f=g||0,g=0,e=h.length,b=!0;h&&e>f;f++)if(h[f].apply(l[0],l[1])===!1&&a.stopOnFalse){c=!1;break}b=!1,h&&(i?i.length&&j(i.shift()):c?h=[]:k.disable())},k={add:function(){if(h){var d=h.length;!function f(b){m.each(b,function(b,c){var d=m.type(c);"function"===d?a.unique&&k.has(c)||h.push(c):c&&c.length&&"string"!==d&&f(c)})}(arguments),b?e=h.length:c&&(g=d,j(c))}return this},remove:function(){return h&&m.each(arguments,function(a,c){var d;while((d=m.inArray(c,h,d))>-1)h.splice(d,1),b&&(e>=d&&e--,f>=d&&f--)}),this},has:function(a){return a?m.inArray(a,h)>-1:!(!h||!h.length)},empty:function(){return h=[],e=0,this},disable:function(){return h=i=c=void 0,this},disabled:function(){return!h},lock:function(){return i=void 0,c||k.disable(),this},locked:function(){return!i},fireWith:function(a,c){return!h||d&&!i||(c=c||[],c=[a,c.slice?c.slice():c],b?i.push(c):j(c)),this},fire:function(){return k.fireWith(this,arguments),this},fired:function(){return!!d}};return k},m.extend({Deferred:function(a){var b=[["resolve","done",m.Callbacks("once memory"),"resolved"],["reject","fail",m.Callbacks("once memory"),"rejected"],["notify","progress",m.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return m.Deferred(function(c){m.each(b,function(b,f){var g=m.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&m.isFunction(a.promise)?a.promise().done(c.resolve).fail(c.reject).progress(c.notify):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?m.extend(a,d):d}},e={};return d.pipe=d.then,m.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=d.call(arguments),e=c.length,f=1!==e||a&&m.isFunction(a.promise)?e:0,g=1===f?a:m.Deferred(),h=function(a,b,c){return function(e){b[a]=this,c[a]=arguments.length>1?d.call(arguments):e,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(e>1)for(i=new Array(e),j=new Array(e),k=new Array(e);e>b;b++)c[b]&&m.isFunction(c[b].promise)?c[b].promise().done(h(b,k,c)).fail(g.reject).progress(h(b,j,i)):--f;return f||g.resolveWith(k,c),g.promise()}});var H;m.fn.ready=function(a){return m.ready.promise().done(a),this},m.extend({isReady:!1,readyWait:1,holdReady:function(a){a?m.readyWait++:m.ready(!0)},ready:function(a){if(a===!0?!--m.readyWait:!m.isReady){if(!y.body)return setTimeout(m.ready);m.isReady=!0,a!==!0&&--m.readyWait>0||(H.resolveWith(y,[m]),m.fn.triggerHandler&&(m(y).triggerHandler("ready"),m(y).off("ready")))}}});function I(){y.addEventListener?(y.removeEventListener("DOMContentLoaded",J,!1),a.removeEventListener("load",J,!1)):(y.detachEvent("onreadystatechange",J),a.detachEvent("onload",J))}function J(){(y.addEventListener||"load"===event.type||"complete"===y.readyState)&&(I(),m.ready())}m.ready.promise=function(b){if(!H)if(H=m.Deferred(),"complete"===y.readyState)setTimeout(m.ready);else if(y.addEventListener)y.addEventListener("DOMContentLoaded",J,!1),a.addEventListener("load",J,!1);else{y.attachEvent("onreadystatechange",J),a.attachEvent("onload",J);var c=!1;try{c=null==a.frameElement&&y.documentElement}catch(d){}c&&c.doScroll&&!function e(){if(!m.isReady){try{c.doScroll("left")}catch(a){return setTimeout(e,50)}I(),m.ready()}}()}return H.promise(b)};var K="undefined",L;for(L in m(k))break;k.ownLast="0"!==L,k.inlineBlockNeedsLayout=!1,m(function(){var a,b,c,d;c=y.getElementsByTagName("body")[0],c&&c.style&&(b=y.createElement("div"),d=y.createElement("div"),d.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(d).appendChild(b),typeof b.style.zoom!==K&&(b.style.cssText="display:inline;margin:0;border:0;padding:1px;width:1px;zoom:1",k.inlineBlockNeedsLayout=a=3===b.offsetWidth,a&&(c.style.zoom=1)),c.removeChild(d))}),function(){var a=y.createElement("div");if(null==k.deleteExpando){k.deleteExpando=!0;try{delete a.test}catch(b){k.deleteExpando=!1}}a=null}(),m.acceptData=function(a){var b=m.noData[(a.nodeName+" ").toLowerCase()],c=+a.nodeType||1;return 1!==c&&9!==c?!1:!b||b!==!0&&a.getAttribute("classid")===b};var M=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,N=/([A-Z])/g;function O(a,b,c){if(void 0===c&&1===a.nodeType){var d="data-"+b.replace(N,"-$1").toLowerCase();if(c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:M.test(c)?m.parseJSON(c):c}catch(e){}m.data(a,b,c)}else c=void 0}return c}function P(a){var b;for(b in a)if(("data"!==b||!m.isEmptyObject(a[b]))&&"toJSON"!==b)return!1;return!0}function Q(a,b,d,e){if(m.acceptData(a)){var f,g,h=m.expando,i=a.nodeType,j=i?m.cache:a,k=i?a[h]:a[h]&&h;
-if(k&&j[k]&&(e||j[k].data)||void 0!==d||"string"!=typeof b)return k||(k=i?a[h]=c.pop()||m.guid++:h),j[k]||(j[k]=i?{}:{toJSON:m.noop}),("object"==typeof b||"function"==typeof b)&&(e?j[k]=m.extend(j[k],b):j[k].data=m.extend(j[k].data,b)),g=j[k],e||(g.data||(g.data={}),g=g.data),void 0!==d&&(g[m.camelCase(b)]=d),"string"==typeof b?(f=g[b],null==f&&(f=g[m.camelCase(b)])):f=g,f}}function R(a,b,c){if(m.acceptData(a)){var d,e,f=a.nodeType,g=f?m.cache:a,h=f?a[m.expando]:m.expando;if(g[h]){if(b&&(d=c?g[h]:g[h].data)){m.isArray(b)?b=b.concat(m.map(b,m.camelCase)):b in d?b=[b]:(b=m.camelCase(b),b=b in d?[b]:b.split(" ")),e=b.length;while(e--)delete d[b[e]];if(c?!P(d):!m.isEmptyObject(d))return}(c||(delete g[h].data,P(g[h])))&&(f?m.cleanData([a],!0):k.deleteExpando||g!=g.window?delete g[h]:g[h]=null)}}}m.extend({cache:{},noData:{"applet ":!0,"embed ":!0,"object ":"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"},hasData:function(a){return a=a.nodeType?m.cache[a[m.expando]]:a[m.expando],!!a&&!P(a)},data:function(a,b,c){return Q(a,b,c)},removeData:function(a,b){return R(a,b)},_data:function(a,b,c){return Q(a,b,c,!0)},_removeData:function(a,b){return R(a,b,!0)}}),m.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=m.data(f),1===f.nodeType&&!m._data(f,"parsedAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=m.camelCase(d.slice(5)),O(f,d,e[d])));m._data(f,"parsedAttrs",!0)}return e}return"object"==typeof a?this.each(function(){m.data(this,a)}):arguments.length>1?this.each(function(){m.data(this,a,b)}):f?O(f,a,m.data(f,a)):void 0},removeData:function(a){return this.each(function(){m.removeData(this,a)})}}),m.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=m._data(a,b),c&&(!d||m.isArray(c)?d=m._data(a,b,m.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=m.queue(a,b),d=c.length,e=c.shift(),f=m._queueHooks(a,b),g=function(){m.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return m._data(a,c)||m._data(a,c,{empty:m.Callbacks("once memory").add(function(){m._removeData(a,b+"queue"),m._removeData(a,c)})})}}),m.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?m.queue(this[0],a):void 0===b?this:this.each(function(){var c=m.queue(this,a,b);m._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&m.dequeue(this,a)})},dequeue:function(a){return this.each(function(){m.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=m.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=m._data(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}});var S=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,T=["Top","Right","Bottom","Left"],U=function(a,b){return a=b||a,"none"===m.css(a,"display")||!m.contains(a.ownerDocument,a)},V=m.access=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===m.type(c)){e=!0;for(h in c)m.access(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,m.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(m(a),c)})),b))for(;i>h;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},W=/^(?:checkbox|radio)$/i;!function(){var a=y.createElement("input"),b=y.createElement("div"),c=y.createDocumentFragment();if(b.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",k.leadingWhitespace=3===b.firstChild.nodeType,k.tbody=!b.getElementsByTagName("tbody").length,k.htmlSerialize=!!b.getElementsByTagName("link").length,k.html5Clone="<:nav></:nav>"!==y.createElement("nav").cloneNode(!0).outerHTML,a.type="checkbox",a.checked=!0,c.appendChild(a),k.appendChecked=a.checked,b.innerHTML="<textarea>x</textarea>",k.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue,c.appendChild(b),b.innerHTML="<input type='radio' checked='checked' name='t'/>",k.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,k.noCloneEvent=!0,b.attachEvent&&(b.attachEvent("onclick",function(){k.noCloneEvent=!1}),b.cloneNode(!0).click()),null==k.deleteExpando){k.deleteExpando=!0;try{delete b.test}catch(d){k.deleteExpando=!1}}}(),function(){var b,c,d=y.createElement("div");for(b in{submit:!0,change:!0,focusin:!0})c="on"+b,(k[b+"Bubbles"]=c in a)||(d.setAttribute(c,"t"),k[b+"Bubbles"]=d.attributes[c].expando===!1);d=null}();var X=/^(?:input|select|textarea)$/i,Y=/^key/,Z=/^(?:mouse|pointer|contextmenu)|click/,$=/^(?:focusinfocus|focusoutblur)$/,_=/^([^.]*)(?:\.(.+)|)$/;function ab(){return!0}function bb(){return!1}function cb(){try{return y.activeElement}catch(a){}}m.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,n,o,p,q,r=m._data(a);if(r){c.handler&&(i=c,c=i.handler,e=i.selector),c.guid||(c.guid=m.guid++),(g=r.events)||(g=r.events={}),(k=r.handle)||(k=r.handle=function(a){return typeof m===K||a&&m.event.triggered===a.type?void 0:m.event.dispatch.apply(k.elem,arguments)},k.elem=a),b=(b||"").match(E)||[""],h=b.length;while(h--)f=_.exec(b[h])||[],o=q=f[1],p=(f[2]||"").split(".").sort(),o&&(j=m.event.special[o]||{},o=(e?j.delegateType:j.bindType)||o,j=m.event.special[o]||{},l=m.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&m.expr.match.needsContext.test(e),namespace:p.join(".")},i),(n=g[o])||(n=g[o]=[],n.delegateCount=0,j.setup&&j.setup.call(a,d,p,k)!==!1||(a.addEventListener?a.addEventListener(o,k,!1):a.attachEvent&&a.attachEvent("on"+o,k))),j.add&&(j.add.call(a,l),l.handler.guid||(l.handler.guid=c.guid)),e?n.splice(n.delegateCount++,0,l):n.push(l),m.event.global[o]=!0);a=null}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,n,o,p,q,r=m.hasData(a)&&m._data(a);if(r&&(k=r.events)){b=(b||"").match(E)||[""],j=b.length;while(j--)if(h=_.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=m.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,n=k[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),i=f=n.length;while(f--)g=n[f],!e&&q!==g.origType||c&&c.guid!==g.guid||h&&!h.test(g.namespace)||d&&d!==g.selector&&("**"!==d||!g.selector)||(n.splice(f,1),g.selector&&n.delegateCount--,l.remove&&l.remove.call(a,g));i&&!n.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||m.removeEvent(a,o,r.handle),delete k[o])}else for(o in k)m.event.remove(a,o+b[j],c,d,!0);m.isEmptyObject(k)&&(delete r.handle,m._removeData(a,"events"))}},trigger:function(b,c,d,e){var f,g,h,i,k,l,n,o=[d||y],p=j.call(b,"type")?b.type:b,q=j.call(b,"namespace")?b.namespace.split("."):[];if(h=l=d=d||y,3!==d.nodeType&&8!==d.nodeType&&!$.test(p+m.event.triggered)&&(p.indexOf(".")>=0&&(q=p.split("."),p=q.shift(),q.sort()),g=p.indexOf(":")<0&&"on"+p,b=b[m.expando]?b:new m.Event(p,"object"==typeof b&&b),b.isTrigger=e?2:3,b.namespace=q.join("."),b.namespace_re=b.namespace?new RegExp("(^|\\.)"+q.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=d),c=null==c?[b]:m.makeArray(c,[b]),k=m.event.special[p]||{},e||!k.trigger||k.trigger.apply(d,c)!==!1)){if(!e&&!k.noBubble&&!m.isWindow(d)){for(i=k.delegateType||p,$.test(i+p)||(h=h.parentNode);h;h=h.parentNode)o.push(h),l=h;l===(d.ownerDocument||y)&&o.push(l.defaultView||l.parentWindow||a)}n=0;while((h=o[n++])&&!b.isPropagationStopped())b.type=n>1?i:k.bindType||p,f=(m._data(h,"events")||{})[b.type]&&m._data(h,"handle"),f&&f.apply(h,c),f=g&&h[g],f&&f.apply&&m.acceptData(h)&&(b.result=f.apply(h,c),b.result===!1&&b.preventDefault());if(b.type=p,!e&&!b.isDefaultPrevented()&&(!k._default||k._default.apply(o.pop(),c)===!1)&&m.acceptData(d)&&g&&d[p]&&!m.isWindow(d)){l=d[g],l&&(d[g]=null),m.event.triggered=p;try{d[p]()}catch(r){}m.event.triggered=void 0,l&&(d[g]=l)}return b.result}},dispatch:function(a){a=m.event.fix(a);var b,c,e,f,g,h=[],i=d.call(arguments),j=(m._data(this,"events")||{})[a.type]||[],k=m.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=m.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,g=0;while((e=f.handlers[g++])&&!a.isImmediatePropagationStopped())(!a.namespace_re||a.namespace_re.test(e.namespace))&&(a.handleObj=e,a.data=e.data,c=((m.event.special[e.origType]||{}).handle||e.handler).apply(f.elem,i),void 0!==c&&(a.result=c)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&(!a.button||"click"!==a.type))for(;i!=this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(e=[],f=0;h>f;f++)d=b[f],c=d.selector+" ",void 0===e[c]&&(e[c]=d.needsContext?m(c,this).index(i)>=0:m.find(c,this,null,[i]).length),e[c]&&e.push(d);e.length&&g.push({elem:i,handlers:e})}return h<b.length&&g.push({elem:this,handlers:b.slice(h)}),g},fix:function(a){if(a[m.expando])return a;var b,c,d,e=a.type,f=a,g=this.fixHooks[e];g||(this.fixHooks[e]=g=Z.test(e)?this.mouseHooks:Y.test(e)?this.keyHooks:{}),d=g.props?this.props.concat(g.props):this.props,a=new m.Event(f),b=d.length;while(b--)c=d[b],a[c]=f[c];return a.target||(a.target=f.srcElement||y),3===a.target.nodeType&&(a.target=a.target.parentNode),a.metaKey=!!a.metaKey,g.filter?g.filter(a,f):a},props:"altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){return null==a.which&&(a.which=null!=b.charCode?b.charCode:b.keyCode),a}},mouseHooks:{props:"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,b){var c,d,e,f=b.button,g=b.fromElement;return null==a.pageX&&null!=b.clientX&&(d=a.target.ownerDocument||y,e=d.documentElement,c=d.body,a.pageX=b.clientX+(e&&e.scrollLeft||c&&c.scrollLeft||0)-(e&&e.clientLeft||c&&c.clientLeft||0),a.pageY=b.clientY+(e&&e.scrollTop||c&&c.scrollTop||0)-(e&&e.clientTop||c&&c.clientTop||0)),!a.relatedTarget&&g&&(a.relatedTarget=g===a.target?b.toElement:g),a.which||void 0===f||(a.which=1&f?1:2&f?3:4&f?2:0),a}},special:{load:{noBubble:!0},focus:{trigger:function(){if(this!==cb()&&this.focus)try{return this.focus(),!1}catch(a){}},delegateType:"focusin"},blur:{trigger:function(){return this===cb()&&this.blur?(this.blur(),!1):void 0},delegateType:"focusout"},click:{trigger:function(){return m.nodeName(this,"input")&&"checkbox"===this.type&&this.click?(this.click(),!1):void 0},_default:function(a){return m.nodeName(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}},simulate:function(a,b,c,d){var e=m.extend(new m.Event,c,{type:a,isSimulated:!0,originalEvent:{}});d?m.event.trigger(e,null,b):m.event.dispatch.call(b,e),e.isDefaultPrevented()&&c.preventDefault()}},m.removeEvent=y.removeEventListener?function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c,!1)}:function(a,b,c){var d="on"+b;a.detachEvent&&(typeof a[d]===K&&(a[d]=null),a.detachEvent(d,c))},m.Event=function(a,b){return this instanceof m.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?ab:bb):this.type=a,b&&m.extend(this,b),this.timeStamp=a&&a.timeStamp||m.now(),void(this[m.expando]=!0)):new m.Event(a,b)},m.Event.prototype={isDefaultPrevented:bb,isPropagationStopped:bb,isImmediatePropagationStopped:bb,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=ab,a&&(a.preventDefault?a.preventDefault():a.returnValue=!1)},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=ab,a&&(a.stopPropagation&&a.stopPropagation(),a.cancelBubble=!0)},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=ab,a&&a.stopImmediatePropagation&&a.stopImmediatePropagation(),this.stopPropagation()}},m.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){m.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return(!e||e!==d&&!m.contains(d,e))&&(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),k.submitBubbles||(m.event.special.submit={setup:function(){return m.nodeName(this,"form")?!1:void m.event.add(this,"click._submit keypress._submit",function(a){var b=a.target,c=m.nodeName(b,"input")||m.nodeName(b,"button")?b.form:void 0;c&&!m._data(c,"submitBubbles")&&(m.event.add(c,"submit._submit",function(a){a._submit_bubble=!0}),m._data(c,"submitBubbles",!0))})},postDispatch:function(a){a._submit_bubble&&(delete a._submit_bubble,this.parentNode&&!a.isTrigger&&m.event.simulate("submit",this.parentNode,a,!0))},teardown:function(){return m.nodeName(this,"form")?!1:void m.event.remove(this,"._submit")}}),k.changeBubbles||(m.event.special.change={setup:function(){return X.test(this.nodeName)?(("checkbox"===this.type||"radio"===this.type)&&(m.event.add(this,"propertychange._change",function(a){"checked"===a.originalEvent.propertyName&&(this._just_changed=!0)}),m.event.add(this,"click._change",function(a){this._just_changed&&!a.isTrigger&&(this._just_changed=!1),m.event.simulate("change",this,a,!0)})),!1):void m.event.add(this,"beforeactivate._change",function(a){var b=a.target;X.test(b.nodeName)&&!m._data(b,"changeBubbles")&&(m.event.add(b,"change._change",function(a){!this.parentNode||a.isSimulated||a.isTrigger||m.event.simulate("change",this.parentNode,a,!0)}),m._data(b,"changeBubbles",!0))})},handle:function(a){var b=a.target;return this!==b||a.isSimulated||a.isTrigger||"radio"!==b.type&&"checkbox"!==b.type?a.handleObj.handler.apply(this,arguments):void 0},teardown:function(){return m.event.remove(this,"._change"),!X.test(this.nodeName)}}),k.focusinBubbles||m.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){m.event.simulate(b,a.target,m.event.fix(a),!0)};m.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=m._data(d,b);e||d.addEventListener(a,c,!0),m._data(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=m._data(d,b)-1;e?m._data(d,b,e):(d.removeEventListener(a,c,!0),m._removeData(d,b))}}}),m.fn.extend({on:function(a,b,c,d,e){var f,g;if("object"==typeof a){"string"!=typeof b&&(c=c||b,b=void 0);for(f in a)this.on(f,b,c,a[f],e);return this}if(null==c&&null==d?(d=b,c=b=void 0):null==d&&("string"==typeof b?(d=c,c=void 0):(d=c,c=b,b=void 0)),d===!1)d=bb;else if(!d)return this;return 1===e&&(g=d,d=function(a){return m().off(a),g.apply(this,arguments)},d.guid=g.guid||(g.guid=m.guid++)),this.each(function(){m.event.add(this,a,d,c,b)})},one:function(a,b,c,d){return this.on(a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,m(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return(b===!1||"function"==typeof b)&&(c=b,b=void 0),c===!1&&(c=bb),this.each(function(){m.event.remove(this,a,c,b)})},trigger:function(a,b){return this.each(function(){m.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];return c?m.event.trigger(a,b,c,!0):void 0}});function db(a){var b=eb.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}var eb="abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",fb=/ jQuery\d+="(?:null|\d+)"/g,gb=new RegExp("<(?:"+eb+")[\\s/>]","i"),hb=/^\s+/,ib=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,jb=/<([\w:]+)/,kb=/<tbody/i,lb=/<|&#?\w+;/,mb=/<(?:script|style|link)/i,nb=/checked\s*(?:[^=]|=\s*.checked.)/i,ob=/^$|\/(?:java|ecma)script/i,pb=/^true\/(.*)/,qb=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g,rb={option:[1,"<select multiple='multiple'>","</select>"],legend:[1,"<fieldset>","</fieldset>"],area:[1,"<map>","</map>"],param:[1,"<object>","</object>"],thead:[1,"<table>","</table>"],tr:[2,"<table><tbody>","</tbody></table>"],col:[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:k.htmlSerialize?[0,"",""]:[1,"X<div>","</div>"]},sb=db(y),tb=sb.appendChild(y.createElement("div"));rb.optgroup=rb.option,rb.tbody=rb.tfoot=rb.colgroup=rb.caption=rb.thead,rb.th=rb.td;function ub(a,b){var c,d,e=0,f=typeof a.getElementsByTagName!==K?a.getElementsByTagName(b||"*"):typeof a.querySelectorAll!==K?a.querySelectorAll(b||"*"):void 0;if(!f)for(f=[],c=a.childNodes||a;null!=(d=c[e]);e++)!b||m.nodeName(d,b)?f.push(d):m.merge(f,ub(d,b));return void 0===b||b&&m.nodeName(a,b)?m.merge([a],f):f}function vb(a){W.test(a.type)&&(a.defaultChecked=a.checked)}function wb(a,b){return m.nodeName(a,"table")&&m.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function xb(a){return a.type=(null!==m.find.attr(a,"type"))+"/"+a.type,a}function yb(a){var b=pb.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function zb(a,b){for(var c,d=0;null!=(c=a[d]);d++)m._data(c,"globalEval",!b||m._data(b[d],"globalEval"))}function Ab(a,b){if(1===b.nodeType&&m.hasData(a)){var c,d,e,f=m._data(a),g=m._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;e>d;d++)m.event.add(b,c,h[c][d])}g.data&&(g.data=m.extend({},g.data))}}function Bb(a,b){var c,d,e;if(1===b.nodeType){if(c=b.nodeName.toLowerCase(),!k.noCloneEvent&&b[m.expando]){e=m._data(b);for(d in e.events)m.removeEvent(b,d,e.handle);b.removeAttribute(m.expando)}"script"===c&&b.text!==a.text?(xb(b).text=a.text,yb(b)):"object"===c?(b.parentNode&&(b.outerHTML=a.outerHTML),k.html5Clone&&a.innerHTML&&!m.trim(b.innerHTML)&&(b.innerHTML=a.innerHTML)):"input"===c&&W.test(a.type)?(b.defaultChecked=b.checked=a.checked,b.value!==a.value&&(b.value=a.value)):"option"===c?b.defaultSelected=b.selected=a.defaultSelected:("input"===c||"textarea"===c)&&(b.defaultValue=a.defaultValue)}}m.extend({clone:function(a,b,c){var d,e,f,g,h,i=m.contains(a.ownerDocument,a);if(k.html5Clone||m.isXMLDoc(a)||!gb.test("<"+a.nodeName+">")?f=a.cloneNode(!0):(tb.innerHTML=a.outerHTML,tb.removeChild(f=tb.firstChild)),!(k.noCloneEvent&&k.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||m.isXMLDoc(a)))for(d=ub(f),h=ub(a),g=0;null!=(e=h[g]);++g)d[g]&&Bb(e,d[g]);if(b)if(c)for(h=h||ub(a),d=d||ub(f),g=0;null!=(e=h[g]);g++)Ab(e,d[g]);else Ab(a,f);return d=ub(f,"script"),d.length>0&&zb(d,!i&&ub(a,"script")),d=h=e=null,f},buildFragment:function(a,b,c,d){for(var e,f,g,h,i,j,l,n=a.length,o=db(b),p=[],q=0;n>q;q++)if(f=a[q],f||0===f)if("object"===m.type(f))m.merge(p,f.nodeType?[f]:f);else if(lb.test(f)){h=h||o.appendChild(b.createElement("div")),i=(jb.exec(f)||["",""])[1].toLowerCase(),l=rb[i]||rb._default,h.innerHTML=l[1]+f.replace(ib,"<$1></$2>")+l[2],e=l[0];while(e--)h=h.lastChild;if(!k.leadingWhitespace&&hb.test(f)&&p.push(b.createTextNode(hb.exec(f)[0])),!k.tbody){f="table"!==i||kb.test(f)?"<table>"!==l[1]||kb.test(f)?0:h:h.firstChild,e=f&&f.childNodes.length;while(e--)m.nodeName(j=f.childNodes[e],"tbody")&&!j.childNodes.length&&f.removeChild(j)}m.merge(p,h.childNodes),h.textContent="";while(h.firstChild)h.removeChild(h.firstChild);h=o.lastChild}else p.push(b.createTextNode(f));h&&o.removeChild(h),k.appendChecked||m.grep(ub(p,"input"),vb),q=0;while(f=p[q++])if((!d||-1===m.inArray(f,d))&&(g=m.contains(f.ownerDocument,f),h=ub(o.appendChild(f),"script"),g&&zb(h),c)){e=0;while(f=h[e++])ob.test(f.type||"")&&c.push(f)}return h=null,o},cleanData:function(a,b){for(var d,e,f,g,h=0,i=m.expando,j=m.cache,l=k.deleteExpando,n=m.event.special;null!=(d=a[h]);h++)if((b||m.acceptData(d))&&(f=d[i],g=f&&j[f])){if(g.events)for(e in g.events)n[e]?m.event.remove(d,e):m.removeEvent(d,e,g.handle);j[f]&&(delete j[f],l?delete d[i]:typeof d.removeAttribute!==K?d.removeAttribute(i):d[i]=null,c.push(f))}}}),m.fn.extend({text:function(a){return V(this,function(a){return void 0===a?m.text(this):this.empty().append((this[0]&&this[0].ownerDocument||y).createTextNode(a))},null,a,arguments.length)},append:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=wb(this,a);b.appendChild(a)}})},prepend:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=wb(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},remove:function(a,b){for(var c,d=a?m.filter(a,this):this,e=0;null!=(c=d[e]);e++)b||1!==c.nodeType||m.cleanData(ub(c)),c.parentNode&&(b&&m.contains(c.ownerDocument,c)&&zb(ub(c,"script")),c.parentNode.removeChild(c));return this},empty:function(){for(var a,b=0;null!=(a=this[b]);b++){1===a.nodeType&&m.cleanData(ub(a,!1));while(a.firstChild)a.removeChild(a.firstChild);a.options&&m.nodeName(a,"select")&&(a.options.length=0)}return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return m.clone(this,a,b)})},html:function(a){return V(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a)return 1===b.nodeType?b.innerHTML.replace(fb,""):void 0;if(!("string"!=typeof a||mb.test(a)||!k.htmlSerialize&&gb.test(a)||!k.leadingWhitespace&&hb.test(a)||rb[(jb.exec(a)||["",""])[1].toLowerCase()])){a=a.replace(ib,"<$1></$2>");try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(m.cleanData(ub(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=arguments[0];return this.domManip(arguments,function(b){a=this.parentNode,m.cleanData(ub(this)),a&&a.replaceChild(b,this)}),a&&(a.length||a.nodeType)?this:this.remove()},detach:function(a){return this.remove(a,!0)},domManip:function(a,b){a=e.apply([],a);var c,d,f,g,h,i,j=0,l=this.length,n=this,o=l-1,p=a[0],q=m.isFunction(p);if(q||l>1&&"string"==typeof p&&!k.checkClone&&nb.test(p))return this.each(function(c){var d=n.eq(c);q&&(a[0]=p.call(this,c,d.html())),d.domManip(a,b)});if(l&&(i=m.buildFragment(a,this[0].ownerDocument,!1,this),c=i.firstChild,1===i.childNodes.length&&(i=c),c)){for(g=m.map(ub(i,"script"),xb),f=g.length;l>j;j++)d=i,j!==o&&(d=m.clone(d,!0,!0),f&&m.merge(g,ub(d,"script"))),b.call(this[j],d,j);if(f)for(h=g[g.length-1].ownerDocument,m.map(g,yb),j=0;f>j;j++)d=g[j],ob.test(d.type||"")&&!m._data(d,"globalEval")&&m.contains(h,d)&&(d.src?m._evalUrl&&m._evalUrl(d.src):m.globalEval((d.text||d.textContent||d.innerHTML||"").replace(qb,"")));i=c=null}return this}}),m.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){m.fn[a]=function(a){for(var c,d=0,e=[],g=m(a),h=g.length-1;h>=d;d++)c=d===h?this:this.clone(!0),m(g[d])[b](c),f.apply(e,c.get());return this.pushStack(e)}});var Cb,Db={};function Eb(b,c){var d,e=m(c.createElement(b)).appendTo(c.body),f=a.getDefaultComputedStyle&&(d=a.getDefaultComputedStyle(e[0]))?d.display:m.css(e[0],"display");return e.detach(),f}function Fb(a){var b=y,c=Db[a];return c||(c=Eb(a,b),"none"!==c&&c||(Cb=(Cb||m("<iframe frameborder='0' width='0' height='0'/>")).appendTo(b.documentElement),b=(Cb[0].contentWindow||Cb[0].contentDocument).document,b.write(),b.close(),c=Eb(a,b),Cb.detach()),Db[a]=c),c}!function(){var a;k.shrinkWrapBlocks=function(){if(null!=a)return a;a=!1;var b,c,d;return c=y.getElementsByTagName("body")[0],c&&c.style?(b=y.createElement("div"),d=y.createElement("div"),d.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(d).appendChild(b),typeof b.style.zoom!==K&&(b.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:1px;width:1px;zoom:1",b.appendChild(y.createElement("div")).style.width="5px",a=3!==b.offsetWidth),c.removeChild(d),a):void 0}}();var Gb=/^margin/,Hb=new RegExp("^("+S+")(?!px)[a-z%]+$","i"),Ib,Jb,Kb=/^(top|right|bottom|left)$/;a.getComputedStyle?(Ib=function(a){return a.ownerDocument.defaultView.getComputedStyle(a,null)},Jb=function(a,b,c){var d,e,f,g,h=a.style;return c=c||Ib(a),g=c?c.getPropertyValue(b)||c[b]:void 0,c&&(""!==g||m.contains(a.ownerDocument,a)||(g=m.style(a,b)),Hb.test(g)&&Gb.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f)),void 0===g?g:g+""}):y.documentElement.currentStyle&&(Ib=function(a){return a.currentStyle},Jb=function(a,b,c){var d,e,f,g,h=a.style;return c=c||Ib(a),g=c?c[b]:void 0,null==g&&h&&h[b]&&(g=h[b]),Hb.test(g)&&!Kb.test(b)&&(d=h.left,e=a.runtimeStyle,f=e&&e.left,f&&(e.left=a.currentStyle.left),h.left="fontSize"===b?"1em":g,g=h.pixelLeft+"px",h.left=d,f&&(e.left=f)),void 0===g?g:g+""||"auto"});function Lb(a,b){return{get:function(){var c=a();if(null!=c)return c?void delete this.get:(this.get=b).apply(this,arguments)}}}!function(){var b,c,d,e,f,g,h;if(b=y.createElement("div"),b.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",d=b.getElementsByTagName("a")[0],c=d&&d.style){c.cssText="float:left;opacity:.5",k.opacity="0.5"===c.opacity,k.cssFloat=!!c.cssFloat,b.style.backgroundClip="content-box",b.cloneNode(!0).style.backgroundClip="",k.clearCloneStyle="content-box"===b.style.backgroundClip,k.boxSizing=""===c.boxSizing||""===c.MozBoxSizing||""===c.WebkitBoxSizing,m.extend(k,{reliableHiddenOffsets:function(){return null==g&&i(),g},boxSizingReliable:function(){return null==f&&i(),f},pixelPosition:function(){return null==e&&i(),e},reliableMarginRight:function(){return null==h&&i(),h}});function i(){var b,c,d,i;c=y.getElementsByTagName("body")[0],c&&c.style&&(b=y.createElement("div"),d=y.createElement("div"),d.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(d).appendChild(b),b.style.cssText="-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;display:block;margin-top:1%;top:1%;border:1px;padding:1px;width:4px;position:absolute",e=f=!1,h=!0,a.getComputedStyle&&(e="1%"!==(a.getComputedStyle(b,null)||{}).top,f="4px"===(a.getComputedStyle(b,null)||{width:"4px"}).width,i=b.appendChild(y.createElement("div")),i.style.cssText=b.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:0",i.style.marginRight=i.style.width="0",b.style.width="1px",h=!parseFloat((a.getComputedStyle(i,null)||{}).marginRight)),b.innerHTML="<table><tr><td></td><td>t</td></tr></table>",i=b.getElementsByTagName("td"),i[0].style.cssText="margin:0;border:0;padding:0;display:none",g=0===i[0].offsetHeight,g&&(i[0].style.display="",i[1].style.display="none",g=0===i[0].offsetHeight),c.removeChild(d))}}}(),m.swap=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e};var Mb=/alpha\([^)]*\)/i,Nb=/opacity\s*=\s*([^)]*)/,Ob=/^(none|table(?!-c[ea]).+)/,Pb=new RegExp("^("+S+")(.*)$","i"),Qb=new RegExp("^([+-])=("+S+")","i"),Rb={position:"absolute",visibility:"hidden",display:"block"},Sb={letterSpacing:"0",fontWeight:"400"},Tb=["Webkit","O","Moz","ms"];function Ub(a,b){if(b in a)return b;var c=b.charAt(0).toUpperCase()+b.slice(1),d=b,e=Tb.length;while(e--)if(b=Tb[e]+c,b in a)return b;return d}function Vb(a,b){for(var c,d,e,f=[],g=0,h=a.length;h>g;g++)d=a[g],d.style&&(f[g]=m._data(d,"olddisplay"),c=d.style.display,b?(f[g]||"none"!==c||(d.style.display=""),""===d.style.display&&U(d)&&(f[g]=m._data(d,"olddisplay",Fb(d.nodeName)))):(e=U(d),(c&&"none"!==c||!e)&&m._data(d,"olddisplay",e?c:m.css(d,"display"))));for(g=0;h>g;g++)d=a[g],d.style&&(b&&"none"!==d.style.display&&""!==d.style.display||(d.style.display=b?f[g]||"":"none"));return a}function Wb(a,b,c){var d=Pb.exec(b);return d?Math.max(0,d[1]-(c||0))+(d[2]||"px"):b}function Xb(a,b,c,d,e){for(var f=c===(d?"border":"content")?4:"width"===b?1:0,g=0;4>f;f+=2)"margin"===c&&(g+=m.css(a,c+T[f],!0,e)),d?("content"===c&&(g-=m.css(a,"padding"+T[f],!0,e)),"margin"!==c&&(g-=m.css(a,"border"+T[f]+"Width",!0,e))):(g+=m.css(a,"padding"+T[f],!0,e),"padding"!==c&&(g+=m.css(a,"border"+T[f]+"Width",!0,e)));return g}function Yb(a,b,c){var d=!0,e="width"===b?a.offsetWidth:a.offsetHeight,f=Ib(a),g=k.boxSizing&&"border-box"===m.css(a,"boxSizing",!1,f);if(0>=e||null==e){if(e=Jb(a,b,f),(0>e||null==e)&&(e=a.style[b]),Hb.test(e))return e;d=g&&(k.boxSizingReliable()||e===a.style[b]),e=parseFloat(e)||0}return e+Xb(a,b,c||(g?"border":"content"),d,f)+"px"}m.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=Jb(a,"opacity");return""===c?"1":c}}}},cssNumber:{columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":k.cssFloat?"cssFloat":"styleFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=m.camelCase(b),i=a.style;if(b=m.cssProps[h]||(m.cssProps[h]=Ub(i,h)),g=m.cssHooks[b]||m.cssHooks[h],void 0===c)return g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:i[b];if(f=typeof c,"string"===f&&(e=Qb.exec(c))&&(c=(e[1]+1)*e[2]+parseFloat(m.css(a,b)),f="number"),null!=c&&c===c&&("number"!==f||m.cssNumber[h]||(c+="px"),k.clearCloneStyle||""!==c||0!==b.indexOf("background")||(i[b]="inherit"),!(g&&"set"in g&&void 0===(c=g.set(a,c,d)))))try{i[b]=c}catch(j){}}},css:function(a,b,c,d){var e,f,g,h=m.camelCase(b);return b=m.cssProps[h]||(m.cssProps[h]=Ub(a.style,h)),g=m.cssHooks[b]||m.cssHooks[h],g&&"get"in g&&(f=g.get(a,!0,c)),void 0===f&&(f=Jb(a,b,d)),"normal"===f&&b in Sb&&(f=Sb[b]),""===c||c?(e=parseFloat(f),c===!0||m.isNumeric(e)?e||0:f):f}}),m.each(["height","width"],function(a,b){m.cssHooks[b]={get:function(a,c,d){return c?Ob.test(m.css(a,"display"))&&0===a.offsetWidth?m.swap(a,Rb,function(){return Yb(a,b,d)}):Yb(a,b,d):void 0},set:function(a,c,d){var e=d&&Ib(a);return Wb(a,c,d?Xb(a,b,d,k.boxSizing&&"border-box"===m.css(a,"boxSizing",!1,e),e):0)}}}),k.opacity||(m.cssHooks.opacity={get:function(a,b){return Nb.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?.01*parseFloat(RegExp.$1)+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=m.isNumeric(b)?"alpha(opacity="+100*b+")":"",f=d&&d.filter||c.filter||"";c.zoom=1,(b>=1||""===b)&&""===m.trim(f.replace(Mb,""))&&c.removeAttribute&&(c.removeAttribute("filter"),""===b||d&&!d.filter)||(c.filter=Mb.test(f)?f.replace(Mb,e):f+" "+e)}}),m.cssHooks.marginRight=Lb(k.reliableMarginRight,function(a,b){return b?m.swap(a,{display:"inline-block"},Jb,[a,"marginRight"]):void 0}),m.each({margin:"",padding:"",border:"Width"},function(a,b){m.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];4>d;d++)e[a+T[d]+b]=f[d]||f[d-2]||f[0];return e}},Gb.test(a)||(m.cssHooks[a+b].set=Wb)}),m.fn.extend({css:function(a,b){return V(this,function(a,b,c){var d,e,f={},g=0;if(m.isArray(b)){for(d=Ib(a),e=b.length;e>g;g++)f[b[g]]=m.css(a,b[g],!1,d);return f}return void 0!==c?m.style(a,b,c):m.css(a,b)},a,b,arguments.length>1)},show:function(){return Vb(this,!0)},hide:function(){return Vb(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){U(this)?m(this).show():m(this).hide()})}});function Zb(a,b,c,d,e){return new Zb.prototype.init(a,b,c,d,e)}m.Tween=Zb,Zb.prototype={constructor:Zb,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||"swing",this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(m.cssNumber[c]?"":"px")
-},cur:function(){var a=Zb.propHooks[this.prop];return a&&a.get?a.get(this):Zb.propHooks._default.get(this)},run:function(a){var b,c=Zb.propHooks[this.prop];return this.pos=b=this.options.duration?m.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):Zb.propHooks._default.set(this),this}},Zb.prototype.init.prototype=Zb.prototype,Zb.propHooks={_default:{get:function(a){var b;return null==a.elem[a.prop]||a.elem.style&&null!=a.elem.style[a.prop]?(b=m.css(a.elem,a.prop,""),b&&"auto"!==b?b:0):a.elem[a.prop]},set:function(a){m.fx.step[a.prop]?m.fx.step[a.prop](a):a.elem.style&&(null!=a.elem.style[m.cssProps[a.prop]]||m.cssHooks[a.prop])?m.style(a.elem,a.prop,a.now+a.unit):a.elem[a.prop]=a.now}}},Zb.propHooks.scrollTop=Zb.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},m.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2}},m.fx=Zb.prototype.init,m.fx.step={};var $b,_b,ac=/^(?:toggle|show|hide)$/,bc=new RegExp("^(?:([+-])=|)("+S+")([a-z%]*)$","i"),cc=/queueHooks$/,dc=[ic],ec={"*":[function(a,b){var c=this.createTween(a,b),d=c.cur(),e=bc.exec(b),f=e&&e[3]||(m.cssNumber[a]?"":"px"),g=(m.cssNumber[a]||"px"!==f&&+d)&&bc.exec(m.css(c.elem,a)),h=1,i=20;if(g&&g[3]!==f){f=f||g[3],e=e||[],g=+d||1;do h=h||".5",g/=h,m.style(c.elem,a,g+f);while(h!==(h=c.cur()/d)&&1!==h&&--i)}return e&&(g=c.start=+g||+d||0,c.unit=f,c.end=e[1]?g+(e[1]+1)*e[2]:+e[2]),c}]};function fc(){return setTimeout(function(){$b=void 0}),$b=m.now()}function gc(a,b){var c,d={height:a},e=0;for(b=b?1:0;4>e;e+=2-b)c=T[e],d["margin"+c]=d["padding"+c]=a;return b&&(d.opacity=d.width=a),d}function hc(a,b,c){for(var d,e=(ec[b]||[]).concat(ec["*"]),f=0,g=e.length;g>f;f++)if(d=e[f].call(c,b,a))return d}function ic(a,b,c){var d,e,f,g,h,i,j,l,n=this,o={},p=a.style,q=a.nodeType&&U(a),r=m._data(a,"fxshow");c.queue||(h=m._queueHooks(a,"fx"),null==h.unqueued&&(h.unqueued=0,i=h.empty.fire,h.empty.fire=function(){h.unqueued||i()}),h.unqueued++,n.always(function(){n.always(function(){h.unqueued--,m.queue(a,"fx").length||h.empty.fire()})})),1===a.nodeType&&("height"in b||"width"in b)&&(c.overflow=[p.overflow,p.overflowX,p.overflowY],j=m.css(a,"display"),l="none"===j?m._data(a,"olddisplay")||Fb(a.nodeName):j,"inline"===l&&"none"===m.css(a,"float")&&(k.inlineBlockNeedsLayout&&"inline"!==Fb(a.nodeName)?p.zoom=1:p.display="inline-block")),c.overflow&&(p.overflow="hidden",k.shrinkWrapBlocks()||n.always(function(){p.overflow=c.overflow[0],p.overflowX=c.overflow[1],p.overflowY=c.overflow[2]}));for(d in b)if(e=b[d],ac.exec(e)){if(delete b[d],f=f||"toggle"===e,e===(q?"hide":"show")){if("show"!==e||!r||void 0===r[d])continue;q=!0}o[d]=r&&r[d]||m.style(a,d)}else j=void 0;if(m.isEmptyObject(o))"inline"===("none"===j?Fb(a.nodeName):j)&&(p.display=j);else{r?"hidden"in r&&(q=r.hidden):r=m._data(a,"fxshow",{}),f&&(r.hidden=!q),q?m(a).show():n.done(function(){m(a).hide()}),n.done(function(){var b;m._removeData(a,"fxshow");for(b in o)m.style(a,b,o[b])});for(d in o)g=hc(q?r[d]:0,d,n),d in r||(r[d]=g.start,q&&(g.end=g.start,g.start="width"===d||"height"===d?1:0))}}function jc(a,b){var c,d,e,f,g;for(c in a)if(d=m.camelCase(c),e=b[d],f=a[c],m.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=m.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function kc(a,b,c){var d,e,f=0,g=dc.length,h=m.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=$b||fc(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;i>g;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),1>f&&i?c:(h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:m.extend({},b),opts:m.extend(!0,{specialEasing:{}},c),originalProperties:b,originalOptions:c,startTime:$b||fc(),duration:c.duration,tweens:[],createTween:function(b,c){var d=m.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;d>c;c++)j.tweens[c].run(1);return b?h.resolveWith(a,[j,b]):h.rejectWith(a,[j,b]),this}}),k=j.props;for(jc(k,j.opts.specialEasing);g>f;f++)if(d=dc[f].call(j,a,k,j.opts))return d;return m.map(k,hc,j),m.isFunction(j.opts.start)&&j.opts.start.call(a,j),m.fx.timer(m.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always)}m.Animation=m.extend(kc,{tweener:function(a,b){m.isFunction(a)?(b=a,a=["*"]):a=a.split(" ");for(var c,d=0,e=a.length;e>d;d++)c=a[d],ec[c]=ec[c]||[],ec[c].unshift(b)},prefilter:function(a,b){b?dc.unshift(a):dc.push(a)}}),m.speed=function(a,b,c){var d=a&&"object"==typeof a?m.extend({},a):{complete:c||!c&&b||m.isFunction(a)&&a,duration:a,easing:c&&b||b&&!m.isFunction(b)&&b};return d.duration=m.fx.off?0:"number"==typeof d.duration?d.duration:d.duration in m.fx.speeds?m.fx.speeds[d.duration]:m.fx.speeds._default,(null==d.queue||d.queue===!0)&&(d.queue="fx"),d.old=d.complete,d.complete=function(){m.isFunction(d.old)&&d.old.call(this),d.queue&&m.dequeue(this,d.queue)},d},m.fn.extend({fadeTo:function(a,b,c,d){return this.filter(U).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=m.isEmptyObject(a),f=m.speed(b,c,d),g=function(){var b=kc(this,m.extend({},a),f);(e||m._data(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=m.timers,g=m._data(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&cc.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));(b||!c)&&m.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=m._data(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=m.timers,g=d?d.length:0;for(c.finish=!0,m.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;g>b;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),m.each(["toggle","show","hide"],function(a,b){var c=m.fn[b];m.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(gc(b,!0),a,d,e)}}),m.each({slideDown:gc("show"),slideUp:gc("hide"),slideToggle:gc("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){m.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),m.timers=[],m.fx.tick=function(){var a,b=m.timers,c=0;for($b=m.now();c<b.length;c++)a=b[c],a()||b[c]!==a||b.splice(c--,1);b.length||m.fx.stop(),$b=void 0},m.fx.timer=function(a){m.timers.push(a),a()?m.fx.start():m.timers.pop()},m.fx.interval=13,m.fx.start=function(){_b||(_b=setInterval(m.fx.tick,m.fx.interval))},m.fx.stop=function(){clearInterval(_b),_b=null},m.fx.speeds={slow:600,fast:200,_default:400},m.fn.delay=function(a,b){return a=m.fx?m.fx.speeds[a]||a:a,b=b||"fx",this.queue(b,function(b,c){var d=setTimeout(b,a);c.stop=function(){clearTimeout(d)}})},function(){var a,b,c,d,e;b=y.createElement("div"),b.setAttribute("className","t"),b.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",d=b.getElementsByTagName("a")[0],c=y.createElement("select"),e=c.appendChild(y.createElement("option")),a=b.getElementsByTagName("input")[0],d.style.cssText="top:1px",k.getSetAttribute="t"!==b.className,k.style=/top/.test(d.getAttribute("style")),k.hrefNormalized="/a"===d.getAttribute("href"),k.checkOn=!!a.value,k.optSelected=e.selected,k.enctype=!!y.createElement("form").enctype,c.disabled=!0,k.optDisabled=!e.disabled,a=y.createElement("input"),a.setAttribute("value",""),k.input=""===a.getAttribute("value"),a.value="t",a.setAttribute("type","radio"),k.radioValue="t"===a.value}();var lc=/\r/g;m.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=m.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,m(this).val()):a,null==e?e="":"number"==typeof e?e+="":m.isArray(e)&&(e=m.map(e,function(a){return null==a?"":a+""})),b=m.valHooks[this.type]||m.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=m.valHooks[e.type]||m.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(lc,""):null==c?"":c)}}}),m.extend({valHooks:{option:{get:function(a){var b=m.find.attr(a,"value");return null!=b?b:m.trim(m.text(a))}},select:{get:function(a){for(var b,c,d=a.options,e=a.selectedIndex,f="select-one"===a.type||0>e,g=f?null:[],h=f?e+1:d.length,i=0>e?h:f?e:0;h>i;i++)if(c=d[i],!(!c.selected&&i!==e||(k.optDisabled?c.disabled:null!==c.getAttribute("disabled"))||c.parentNode.disabled&&m.nodeName(c.parentNode,"optgroup"))){if(b=m(c).val(),f)return b;g.push(b)}return g},set:function(a,b){var c,d,e=a.options,f=m.makeArray(b),g=e.length;while(g--)if(d=e[g],m.inArray(m.valHooks.option.get(d),f)>=0)try{d.selected=c=!0}catch(h){d.scrollHeight}else d.selected=!1;return c||(a.selectedIndex=-1),e}}}}),m.each(["radio","checkbox"],function(){m.valHooks[this]={set:function(a,b){return m.isArray(b)?a.checked=m.inArray(m(a).val(),b)>=0:void 0}},k.checkOn||(m.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var mc,nc,oc=m.expr.attrHandle,pc=/^(?:checked|selected)$/i,qc=k.getSetAttribute,rc=k.input;m.fn.extend({attr:function(a,b){return V(this,m.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){m.removeAttr(this,a)})}}),m.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(a&&3!==f&&8!==f&&2!==f)return typeof a.getAttribute===K?m.prop(a,b,c):(1===f&&m.isXMLDoc(a)||(b=b.toLowerCase(),d=m.attrHooks[b]||(m.expr.match.bool.test(b)?nc:mc)),void 0===c?d&&"get"in d&&null!==(e=d.get(a,b))?e:(e=m.find.attr(a,b),null==e?void 0:e):null!==c?d&&"set"in d&&void 0!==(e=d.set(a,c,b))?e:(a.setAttribute(b,c+""),c):void m.removeAttr(a,b))},removeAttr:function(a,b){var c,d,e=0,f=b&&b.match(E);if(f&&1===a.nodeType)while(c=f[e++])d=m.propFix[c]||c,m.expr.match.bool.test(c)?rc&&qc||!pc.test(c)?a[d]=!1:a[m.camelCase("default-"+c)]=a[d]=!1:m.attr(a,c,""),a.removeAttribute(qc?c:d)},attrHooks:{type:{set:function(a,b){if(!k.radioValue&&"radio"===b&&m.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}}}),nc={set:function(a,b,c){return b===!1?m.removeAttr(a,c):rc&&qc||!pc.test(c)?a.setAttribute(!qc&&m.propFix[c]||c,c):a[m.camelCase("default-"+c)]=a[c]=!0,c}},m.each(m.expr.match.bool.source.match(/\w+/g),function(a,b){var c=oc[b]||m.find.attr;oc[b]=rc&&qc||!pc.test(b)?function(a,b,d){var e,f;return d||(f=oc[b],oc[b]=e,e=null!=c(a,b,d)?b.toLowerCase():null,oc[b]=f),e}:function(a,b,c){return c?void 0:a[m.camelCase("default-"+b)]?b.toLowerCase():null}}),rc&&qc||(m.attrHooks.value={set:function(a,b,c){return m.nodeName(a,"input")?void(a.defaultValue=b):mc&&mc.set(a,b,c)}}),qc||(mc={set:function(a,b,c){var d=a.getAttributeNode(c);return d||a.setAttributeNode(d=a.ownerDocument.createAttribute(c)),d.value=b+="","value"===c||b===a.getAttribute(c)?b:void 0}},oc.id=oc.name=oc.coords=function(a,b,c){var d;return c?void 0:(d=a.getAttributeNode(b))&&""!==d.value?d.value:null},m.valHooks.button={get:function(a,b){var c=a.getAttributeNode(b);return c&&c.specified?c.value:void 0},set:mc.set},m.attrHooks.contenteditable={set:function(a,b,c){mc.set(a,""===b?!1:b,c)}},m.each(["width","height"],function(a,b){m.attrHooks[b]={set:function(a,c){return""===c?(a.setAttribute(b,"auto"),c):void 0}}})),k.style||(m.attrHooks.style={get:function(a){return a.style.cssText||void 0},set:function(a,b){return a.style.cssText=b+""}});var sc=/^(?:input|select|textarea|button|object)$/i,tc=/^(?:a|area)$/i;m.fn.extend({prop:function(a,b){return V(this,m.prop,a,b,arguments.length>1)},removeProp:function(a){return a=m.propFix[a]||a,this.each(function(){try{this[a]=void 0,delete this[a]}catch(b){}})}}),m.extend({propFix:{"for":"htmlFor","class":"className"},prop:function(a,b,c){var d,e,f,g=a.nodeType;if(a&&3!==g&&8!==g&&2!==g)return f=1!==g||!m.isXMLDoc(a),f&&(b=m.propFix[b]||b,e=m.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=m.find.attr(a,"tabindex");return b?parseInt(b,10):sc.test(a.nodeName)||tc.test(a.nodeName)&&a.href?0:-1}}}}),k.hrefNormalized||m.each(["href","src"],function(a,b){m.propHooks[b]={get:function(a){return a.getAttribute(b,4)}}}),k.optSelected||(m.propHooks.selected={get:function(a){var b=a.parentNode;return b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex),null}}),m.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){m.propFix[this.toLowerCase()]=this}),k.enctype||(m.propFix.enctype="encoding");var uc=/[\t\r\n\f]/g;m.fn.extend({addClass:function(a){var b,c,d,e,f,g,h=0,i=this.length,j="string"==typeof a&&a;if(m.isFunction(a))return this.each(function(b){m(this).addClass(a.call(this,b,this.className))});if(j)for(b=(a||"").match(E)||[];i>h;h++)if(c=this[h],d=1===c.nodeType&&(c.className?(" "+c.className+" ").replace(uc," "):" ")){f=0;while(e=b[f++])d.indexOf(" "+e+" ")<0&&(d+=e+" ");g=m.trim(d),c.className!==g&&(c.className=g)}return this},removeClass:function(a){var b,c,d,e,f,g,h=0,i=this.length,j=0===arguments.length||"string"==typeof a&&a;if(m.isFunction(a))return this.each(function(b){m(this).removeClass(a.call(this,b,this.className))});if(j)for(b=(a||"").match(E)||[];i>h;h++)if(c=this[h],d=1===c.nodeType&&(c.className?(" "+c.className+" ").replace(uc," "):"")){f=0;while(e=b[f++])while(d.indexOf(" "+e+" ")>=0)d=d.replace(" "+e+" "," ");g=a?m.trim(d):"",c.className!==g&&(c.className=g)}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):this.each(m.isFunction(a)?function(c){m(this).toggleClass(a.call(this,c,this.className,b),b)}:function(){if("string"===c){var b,d=0,e=m(this),f=a.match(E)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else(c===K||"boolean"===c)&&(this.className&&m._data(this,"__className__",this.className),this.className=this.className||a===!1?"":m._data(this,"__className__")||"")})},hasClass:function(a){for(var b=" "+a+" ",c=0,d=this.length;d>c;c++)if(1===this[c].nodeType&&(" "+this[c].className+" ").replace(uc," ").indexOf(b)>=0)return!0;return!1}}),m.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){m.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),m.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)},bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)}});var vc=m.now(),wc=/\?/,xc=/(,)|(\[|{)|(}|])|"(?:[^"\\\r\n]|\\["\\\/bfnrt]|\\u[\da-fA-F]{4})*"\s*:?|true|false|null|-?(?!0\d)\d+(?:\.\d+|)(?:[eE][+-]?\d+|)/g;m.parseJSON=function(b){if(a.JSON&&a.JSON.parse)return a.JSON.parse(b+"");var c,d=null,e=m.trim(b+"");return e&&!m.trim(e.replace(xc,function(a,b,e,f){return c&&b&&(d=0),0===d?a:(c=e||b,d+=!f-!e,"")}))?Function("return "+e)():m.error("Invalid JSON: "+b)},m.parseXML=function(b){var c,d;if(!b||"string"!=typeof b)return null;try{a.DOMParser?(d=new DOMParser,c=d.parseFromString(b,"text/xml")):(c=new ActiveXObject("Microsoft.XMLDOM"),c.async="false",c.loadXML(b))}catch(e){c=void 0}return c&&c.documentElement&&!c.getElementsByTagName("parsererror").length||m.error("Invalid XML: "+b),c};var yc,zc,Ac=/#.*$/,Bc=/([?&])_=[^&]*/,Cc=/^(.*?):[ \t]*([^\r\n]*)\r?$/gm,Dc=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,Ec=/^(?:GET|HEAD)$/,Fc=/^\/\//,Gc=/^([\w.+-]+:)(?:\/\/(?:[^\/?#]*@|)([^\/?#:]*)(?::(\d+)|)|)/,Hc={},Ic={},Jc="*/".concat("*");try{zc=location.href}catch(Kc){zc=y.createElement("a"),zc.href="",zc=zc.href}yc=Gc.exec(zc.toLowerCase())||[];function Lc(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(E)||[];if(m.isFunction(c))while(d=f[e++])"+"===d.charAt(0)?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function Mc(a,b,c,d){var e={},f=a===Ic;function g(h){var i;return e[h]=!0,m.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function Nc(a,b){var c,d,e=m.ajaxSettings.flatOptions||{};for(d in b)void 0!==b[d]&&((e[d]?a:c||(c={}))[d]=b[d]);return c&&m.extend(!0,a,c),a}function Oc(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===e&&(e=a.mimeType||b.getResponseHeader("Content-Type"));if(e)for(g in h)if(h[g]&&h[g].test(e)){i.unshift(g);break}if(i[0]in c)f=i[0];else{for(g in c){if(!i[0]||a.converters[g+" "+i[0]]){f=g;break}d||(d=g)}f=f||d}return f?(f!==i[0]&&i.unshift(f),c[f]):void 0}function Pc(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}m.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:zc,type:"GET",isLocal:Dc.test(yc[1]),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Jc,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":m.parseJSON,"text xml":m.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?Nc(Nc(a,m.ajaxSettings),b):Nc(m.ajaxSettings,a)},ajaxPrefilter:Lc(Hc),ajaxTransport:Lc(Ic),ajax:function(a,b){"object"==typeof a&&(b=a,a=void 0),b=b||{};var c,d,e,f,g,h,i,j,k=m.ajaxSetup({},b),l=k.context||k,n=k.context&&(l.nodeType||l.jquery)?m(l):m.event,o=m.Deferred(),p=m.Callbacks("once memory"),q=k.statusCode||{},r={},s={},t=0,u="canceled",v={readyState:0,getResponseHeader:function(a){var b;if(2===t){if(!j){j={};while(b=Cc.exec(f))j[b[1].toLowerCase()]=b[2]}b=j[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return 2===t?f:null},setRequestHeader:function(a,b){var c=a.toLowerCase();return t||(a=s[c]=s[c]||a,r[a]=b),this},overrideMimeType:function(a){return t||(k.mimeType=a),this},statusCode:function(a){var b;if(a)if(2>t)for(b in a)q[b]=[q[b],a[b]];else v.always(a[v.status]);return this},abort:function(a){var b=a||u;return i&&i.abort(b),x(0,b),this}};if(o.promise(v).complete=p.add,v.success=v.done,v.error=v.fail,k.url=((a||k.url||zc)+"").replace(Ac,"").replace(Fc,yc[1]+"//"),k.type=b.method||b.type||k.method||k.type,k.dataTypes=m.trim(k.dataType||"*").toLowerCase().match(E)||[""],null==k.crossDomain&&(c=Gc.exec(k.url.toLowerCase()),k.crossDomain=!(!c||c[1]===yc[1]&&c[2]===yc[2]&&(c[3]||("http:"===c[1]?"80":"443"))===(yc[3]||("http:"===yc[1]?"80":"443")))),k.data&&k.processData&&"string"!=typeof k.data&&(k.data=m.param(k.data,k.traditional)),Mc(Hc,k,b,v),2===t)return v;h=k.global,h&&0===m.active++&&m.event.trigger("ajaxStart"),k.type=k.type.toUpperCase(),k.hasContent=!Ec.test(k.type),e=k.url,k.hasContent||(k.data&&(e=k.url+=(wc.test(e)?"&":"?")+k.data,delete k.data),k.cache===!1&&(k.url=Bc.test(e)?e.replace(Bc,"$1_="+vc++):e+(wc.test(e)?"&":"?")+"_="+vc++)),k.ifModified&&(m.lastModified[e]&&v.setRequestHeader("If-Modified-Since",m.lastModified[e]),m.etag[e]&&v.setRequestHeader("If-None-Match",m.etag[e])),(k.data&&k.hasContent&&k.contentType!==!1||b.contentType)&&v.setRequestHeader("Content-Type",k.contentType),v.setRequestHeader("Accept",k.dataTypes[0]&&k.accepts[k.dataTypes[0]]?k.accepts[k.dataTypes[0]]+("*"!==k.dataTypes[0]?", "+Jc+"; q=0.01":""):k.accepts["*"]);for(d in k.headers)v.setRequestHeader(d,k.headers[d]);if(k.beforeSend&&(k.beforeSend.call(l,v,k)===!1||2===t))return v.abort();u="abort";for(d in{success:1,error:1,complete:1})v[d](k[d]);if(i=Mc(Ic,k,b,v)){v.readyState=1,h&&n.trigger("ajaxSend",[v,k]),k.async&&k.timeout>0&&(g=setTimeout(function(){v.abort("timeout")},k.timeout));try{t=1,i.send(r,x)}catch(w){if(!(2>t))throw w;x(-1,w)}}else x(-1,"No Transport");function x(a,b,c,d){var j,r,s,u,w,x=b;2!==t&&(t=2,g&&clearTimeout(g),i=void 0,f=d||"",v.readyState=a>0?4:0,j=a>=200&&300>a||304===a,c&&(u=Oc(k,v,c)),u=Pc(k,u,v,j),j?(k.ifModified&&(w=v.getResponseHeader("Last-Modified"),w&&(m.lastModified[e]=w),w=v.getResponseHeader("etag"),w&&(m.etag[e]=w)),204===a||"HEAD"===k.type?x="nocontent":304===a?x="notmodified":(x=u.state,r=u.data,s=u.error,j=!s)):(s=x,(a||!x)&&(x="error",0>a&&(a=0))),v.status=a,v.statusText=(b||x)+"",j?o.resolveWith(l,[r,x,v]):o.rejectWith(l,[v,x,s]),v.statusCode(q),q=void 0,h&&n.trigger(j?"ajaxSuccess":"ajaxError",[v,k,j?r:s]),p.fireWith(l,[v,x]),h&&(n.trigger("ajaxComplete",[v,k]),--m.active||m.event.trigger("ajaxStop")))}return v},getJSON:function(a,b,c){return m.get(a,b,c,"json")},getScript:function(a,b){return m.get(a,void 0,b,"script")}}),m.each(["get","post"],function(a,b){m[b]=function(a,c,d,e){return m.isFunction(c)&&(e=e||d,d=c,c=void 0),m.ajax({url:a,type:b,dataType:e,data:c,success:d})}}),m.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){m.fn[b]=function(a){return this.on(b,a)}}),m._evalUrl=function(a){return m.ajax({url:a,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0})},m.fn.extend({wrapAll:function(a){if(m.isFunction(a))return this.each(function(b){m(this).wrapAll(a.call(this,b))});if(this[0]){var b=m(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&1===a.firstChild.nodeType)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){return this.each(m.isFunction(a)?function(b){m(this).wrapInner(a.call(this,b))}:function(){var b=m(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=m.isFunction(a);return this.each(function(c){m(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){m.nodeName(this,"body")||m(this).replaceWith(this.childNodes)}).end()}}),m.expr.filters.hidden=function(a){return a.offsetWidth<=0&&a.offsetHeight<=0||!k.reliableHiddenOffsets()&&"none"===(a.style&&a.style.display||m.css(a,"display"))},m.expr.filters.visible=function(a){return!m.expr.filters.hidden(a)};var Qc=/%20/g,Rc=/\[\]$/,Sc=/\r?\n/g,Tc=/^(?:submit|button|image|reset|file)$/i,Uc=/^(?:input|select|textarea|keygen)/i;function Vc(a,b,c,d){var e;if(m.isArray(b))m.each(b,function(b,e){c||Rc.test(a)?d(a,e):Vc(a+"["+("object"==typeof e?b:"")+"]",e,c,d)});else if(c||"object"!==m.type(b))d(a,b);else for(e in b)Vc(a+"["+e+"]",b[e],c,d)}m.param=function(a,b){var c,d=[],e=function(a,b){b=m.isFunction(b)?b():null==b?"":b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};if(void 0===b&&(b=m.ajaxSettings&&m.ajaxSettings.traditional),m.isArray(a)||a.jquery&&!m.isPlainObject(a))m.each(a,function(){e(this.name,this.value)});else for(c in a)Vc(c,a[c],b,e);return d.join("&").replace(Qc,"+")},m.fn.extend({serialize:function(){return m.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=m.prop(this,"elements");return a?m.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!m(this).is(":disabled")&&Uc.test(this.nodeName)&&!Tc.test(a)&&(this.checked||!W.test(a))}).map(function(a,b){var c=m(this).val();return null==c?null:m.isArray(c)?m.map(c,function(a){return{name:b.name,value:a.replace(Sc,"\r\n")}}):{name:b.name,value:c.replace(Sc,"\r\n")}}).get()}}),m.ajaxSettings.xhr=void 0!==a.ActiveXObject?function(){return!this.isLocal&&/^(get|post|head|put|delete|options)$/i.test(this.type)&&Zc()||$c()}:Zc;var Wc=0,Xc={},Yc=m.ajaxSettings.xhr();a.ActiveXObject&&m(a).on("unload",function(){for(var a in Xc)Xc[a](void 0,!0)}),k.cors=!!Yc&&"withCredentials"in Yc,Yc=k.ajax=!!Yc,Yc&&m.ajaxTransport(function(a){if(!a.crossDomain||k.cors){var b;return{send:function(c,d){var e,f=a.xhr(),g=++Wc;if(f.open(a.type,a.url,a.async,a.username,a.password),a.xhrFields)for(e in a.xhrFields)f[e]=a.xhrFields[e];a.mimeType&&f.overrideMimeType&&f.overrideMimeType(a.mimeType),a.crossDomain||c["X-Requested-With"]||(c["X-Requested-With"]="XMLHttpRequest");for(e in c)void 0!==c[e]&&f.setRequestHeader(e,c[e]+"");f.send(a.hasContent&&a.data||null),b=function(c,e){var h,i,j;if(b&&(e||4===f.readyState))if(delete Xc[g],b=void 0,f.onreadystatechange=m.noop,e)4!==f.readyState&&f.abort();else{j={},h=f.status,"string"==typeof f.responseText&&(j.text=f.responseText);try{i=f.statusText}catch(k){i=""}h||!a.isLocal||a.crossDomain?1223===h&&(h=204):h=j.text?200:404}j&&d(h,i,j,f.getAllResponseHeaders())},a.async?4===f.readyState?setTimeout(b):f.onreadystatechange=Xc[g]=b:b()},abort:function(){b&&b(void 0,!0)}}}});function Zc(){try{return new a.XMLHttpRequest}catch(b){}}function $c(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}m.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/(?:java|ecma)script/},converters:{"text script":function(a){return m.globalEval(a),a}}}),m.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),m.ajaxTransport("script",function(a){if(a.crossDomain){var b,c=y.head||m("head")[0]||y.documentElement;return{send:function(d,e){b=y.createElement("script"),b.async=!0,a.scriptCharset&&(b.charset=a.scriptCharset),b.src=a.url,b.onload=b.onreadystatechange=function(a,c){(c||!b.readyState||/loaded|complete/.test(b.readyState))&&(b.onload=b.onreadystatechange=null,b.parentNode&&b.parentNode.removeChild(b),b=null,c||e(200,"success"))},c.insertBefore(b,c.firstChild)},abort:function(){b&&b.onload(void 0,!0)}}}});var _c=[],ad=/(=)\?(?=&|$)|\?\?/;m.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=_c.pop()||m.expando+"_"+vc++;return this[a]=!0,a}}),m.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(ad.test(b.url)?"url":"string"==typeof b.data&&!(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&ad.test(b.data)&&"data");return h||"jsonp"===b.dataTypes[0]?(e=b.jsonpCallback=m.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(ad,"$1"+e):b.jsonp!==!1&&(b.url+=(wc.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||m.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,_c.push(e)),g&&m.isFunction(f)&&f(g[0]),g=f=void 0}),"script"):void 0}),m.parseHTML=function(a,b,c){if(!a||"string"!=typeof a)return null;"boolean"==typeof b&&(c=b,b=!1),b=b||y;var d=u.exec(a),e=!c&&[];return d?[b.createElement(d[1])]:(d=m.buildFragment([a],b,e),e&&e.length&&m(e).remove(),m.merge([],d.childNodes))};var bd=m.fn.load;m.fn.load=function(a,b,c){if("string"!=typeof a&&bd)return bd.apply(this,arguments);var d,e,f,g=this,h=a.indexOf(" ");return h>=0&&(d=m.trim(a.slice(h,a.length)),a=a.slice(0,h)),m.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(f="POST"),g.length>0&&m.ajax({url:a,type:f,dataType:"html",data:b}).done(function(a){e=arguments,g.html(d?m("<div>").append(m.parseHTML(a)).find(d):a)}).complete(c&&function(a,b){g.each(c,e||[a.responseText,b,a])}),this},m.expr.filters.animated=function(a){return m.grep(m.timers,function(b){return a===b.elem}).length};var cd=a.document.documentElement;function dd(a){return m.isWindow(a)?a:9===a.nodeType?a.defaultView||a.parentWindow:!1}m.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=m.css(a,"position"),l=m(a),n={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=m.css(a,"top"),i=m.css(a,"left"),j=("absolute"===k||"fixed"===k)&&m.inArray("auto",[f,i])>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),m.isFunction(b)&&(b=b.call(a,c,h)),null!=b.top&&(n.top=b.top-h.top+g),null!=b.left&&(n.left=b.left-h.left+e),"using"in b?b.using.call(a,n):l.css(n)}},m.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){m.offset.setOffset(this,a,b)});var b,c,d={top:0,left:0},e=this[0],f=e&&e.ownerDocument;if(f)return b=f.documentElement,m.contains(b,e)?(typeof e.getBoundingClientRect!==K&&(d=e.getBoundingClientRect()),c=dd(f),{top:d.top+(c.pageYOffset||b.scrollTop)-(b.clientTop||0),left:d.left+(c.pageXOffset||b.scrollLeft)-(b.clientLeft||0)}):d},position:function(){if(this[0]){var a,b,c={top:0,left:0},d=this[0];return"fixed"===m.css(d,"position")?b=d.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),m.nodeName(a[0],"html")||(c=a.offset()),c.top+=m.css(a[0],"borderTopWidth",!0),c.left+=m.css(a[0],"borderLeftWidth",!0)),{top:b.top-c.top-m.css(d,"marginTop",!0),left:b.left-c.left-m.css(d,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||cd;while(a&&!m.nodeName(a,"html")&&"static"===m.css(a,"position"))a=a.offsetParent;return a||cd})}}),m.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c=/Y/.test(b);m.fn[a]=function(d){return V(this,function(a,d,e){var f=dd(a);return void 0===e?f?b in f?f[b]:f.document.documentElement[d]:a[d]:void(f?f.scrollTo(c?m(f).scrollLeft():e,c?e:m(f).scrollTop()):a[d]=e)},a,d,arguments.length,null)}}),m.each(["top","left"],function(a,b){m.cssHooks[b]=Lb(k.pixelPosition,function(a,c){return c?(c=Jb(a,b),Hb.test(c)?m(a).position()[b]+"px":c):void 0})}),m.each({Height:"height",Width:"width"},function(a,b){m.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){m.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return V(this,function(b,c,d){var e;return m.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?m.css(b,c,g):m.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),m.fn.size=function(){return this.length},m.fn.andSelf=m.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return m});var ed=a.jQuery,fd=a.$;return m.noConflict=function(b){return a.$===m&&(a.$=fd),b&&a.jQuery===m&&(a.jQuery=ed),m},typeof b===K&&(a.jQuery=a.$=m),m});
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery-1.12.4.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery-1.12.4.min.js
new file mode 100644
index 000000000000..e836475870da
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery-1.12.4.min.js
@@ -0,0 +1,5 @@
+/*! jQuery v1.12.4 | (c) jQuery Foundation | jquery.org/license */
+!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=a.document,e=c.slice,f=c.concat,g=c.push,h=c.indexOf,i={},j=i.toString,k=i.hasOwnProperty,l={},m="1.12.4",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return e.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:e.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a){return n.each(this,a)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(e.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor()},push:g,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(e=arguments[h]))for(d in e)a=g[d],c=e[d],g!==c&&(j&&c&&(n.isPlainObject(c)||(b=n.isArray(c)))?(b?(b=!1,f=a&&n.isArray(a)?a:[]):f=a&&n.isPlainObject(a)?a:{},g[d]=n.extend(j,f,c)):void 0!==c&&(g[d]=c));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray||function(a){return"array"===n.type(a)},isWindow:function(a){return null!=a&&a==a.window},isNumeric:function(a){var b=a&&a.toString();return!n.isArray(a)&&b-parseFloat(b)+1>=0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},isPlainObject:function(a){var b;if(!a||"object"!==n.type(a)||a.nodeType||n.isWindow(a))return!1;try{if(a.constructor&&!k.call(a,"constructor")&&!k.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}if(!l.ownFirst)for(b in a)return k.call(a,b);for(b in a);return void 0===b||k.call(a,b)},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?i[j.call(a)]||"object":typeof a},globalEval:function(b){b&&n.trim(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b){var c,d=0;if(s(a)){for(c=a.length;c>d;d++)if(b.call(a[d],d,a[d])===!1)break}else for(d in a)if(b.call(a[d],d,a[d])===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):g.call(c,a)),c},inArray:function(a,b,c){var d;if(b){if(h)return h.call(b,a,c);for(d=b.length,c=c?0>c?Math.max(0,d+c):c:0;d>c;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,b){var c=+b.length,d=0,e=a.length;while(c>d)a[e++]=b[d++];if(c!==c)while(void 0!==b[d])a[e++]=b[d++];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,e,g=0,h=[];if(s(a))for(d=a.length;d>g;g++)e=b(a[g],g,c),null!=e&&h.push(e);else for(g in a)e=b(a[g],g,c),null!=e&&h.push(e);return f.apply([],h)},guid:1,proxy:function(a,b){var c,d,f;return"string"==typeof b&&(f=a[b],b=a,a=f),n.isFunction(a)?(c=e.call(arguments,2),d=function(){return a.apply(b||this,c.concat(e.call(arguments)))},d.guid=a.guid=a.guid||n.guid++,d):void 0},now:function(){return+new Date},support:l}),"function"==typeof Symbol&&(n.fn[Symbol.iterator]=c[Symbol.iterator]),n.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(a,b){i["[object "+b+"]"]=b.toLowerCase()});function s(a){var b=!!a&&"length"in a&&a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ga(),z=ga(),A=ga(),B=function(a,b){return a===b&&(l=!0),0},C=1<<31,D={}.hasOwnProperty,E=[],F=E.pop,G=E.push,H=E.push,I=E.slice,J=function(a,b){for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},K="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",L="[\\x20\\t\\r\\n\\f]",M="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",N="\\["+L+"*("+M+")(?:"+L+"*([*^$|!~]?=)"+L+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+M+"))|)"+L+"*\\]",O=":("+M+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+N+")*)|.*)\\)|)",P=new RegExp(L+"+","g"),Q=new RegExp("^"+L+"+|((?:^|[^\\\\])(?:\\\\.)*)"+L+"+$","g"),R=new RegExp("^"+L+"*,"+L+"*"),S=new RegExp("^"+L+"*([>+~]|"+L+")"+L+"*"),T=new RegExp("="+L+"*([^\\]'\"]*?)"+L+"*\\]","g"),U=new RegExp(O),V=new RegExp("^"+M+"$"),W={ID:new RegExp("^#("+M+")"),CLASS:new RegExp("^\\.("+M+")"),TAG:new RegExp("^("+M+"|[*])"),ATTR:new RegExp("^"+N),PSEUDO:new RegExp("^"+O),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+L+"*(even|odd|(([+-]|)(\\d*)n|)"+L+"*(?:([+-]|)"+L+"*(\\d+)|))"+L+"*\\)|)","i"),bool:new RegExp("^(?:"+K+")$","i"),needsContext:new RegExp("^"+L+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+L+"*((?:-\\d)?\\d*)"+L+"*\\)|)(?=[^-]|$)","i")},X=/^(?:input|select|textarea|button)$/i,Y=/^h\d$/i,Z=/^[^{]+\{\s*\[native \w/,$=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,_=/[+~]/,aa=/'|\\/g,ba=new RegExp("\\\\([\\da-f]{1,6}"+L+"?|("+L+")|.)","ig"),ca=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},da=function(){m()};try{H.apply(E=I.call(v.childNodes),v.childNodes),E[v.childNodes.length].nodeType}catch(ea){H={apply:E.length?function(a,b){G.apply(a,I.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fa(a,b,d,e){var f,h,j,k,l,o,r,s,w=b&&b.ownerDocument,x=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==x&&9!==x&&11!==x)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==x&&(o=$.exec(a)))if(f=o[1]){if(9===x){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(w&&(j=w.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(o[2])return H.apply(d,b.getElementsByTagName(a)),d;if((f=o[3])&&c.getElementsByClassName&&b.getElementsByClassName)return H.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==x)w=b,s=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(aa,"\\$&"):b.setAttribute("id",k=u),r=g(a),h=r.length,l=V.test(k)?"#"+k:"[id='"+k+"']";while(h--)r[h]=l+" "+qa(r[h]);s=r.join(","),w=_.test(a)&&oa(b.parentNode)||b}if(s)try{return H.apply(d,w.querySelectorAll(s)),d}catch(y){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(Q,"$1"),b,d,e)}function ga(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ha(a){return a[u]=!0,a}function ia(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ja(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function ka(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||C)-(~a.sourceIndex||C);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function la(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function na(a){return ha(function(b){return b=+b,ha(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function oa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=fa.support={},f=fa.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fa.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ia(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ia(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Z.test(n.getElementsByClassName),c.getById=ia(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}},d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return"undefined"!=typeof b.getElementsByClassName&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=Z.test(n.querySelectorAll))&&(ia(function(a){o.appendChild(a).innerHTML="<a id='"+u+"'></a><select id='"+u+"-\r\\' msallowcapture=''><option selected=''></option></select>",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+L+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+L+"*(?:value|"+K+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ia(function(a){var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+L+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Z.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ia(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",O)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Z.test(o.compareDocumentPosition),t=b||Z.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?J(k,a)-J(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?J(k,a)-J(k,b):0;if(e===f)return ka(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?ka(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},fa.matches=function(a,b){return fa(a,null,null,b)},fa.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(T,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fa(b,n,null,[a]).length>0},fa.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fa.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&D.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fa.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fa.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fa.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fa.selectors={cacheLength:50,createPseudo:ha,match:W,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(ba,ca),a[3]=(a[3]||a[4]||a[5]||"").replace(ba,ca),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fa.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fa.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return W.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&U.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(ba,ca).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+L+")"+a+"("+L+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fa.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(P," ")+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fa.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ha(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=J(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ha(function(a){var b=[],c=[],d=h(a.replace(Q,"$1"));return d[u]?ha(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ha(function(a){return function(b){return fa(a,b).length>0}}),contains:ha(function(a){return a=a.replace(ba,ca),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ha(function(a){return V.test(a||"")||fa.error("unsupported lang: "+a),a=a.replace(ba,ca).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Y.test(a.nodeName)},input:function(a){return X.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:na(function(){return[0]}),last:na(function(a,b){return[b-1]}),eq:na(function(a,b,c){return[0>c?c+b:c]}),even:na(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:na(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:na(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:na(function(a,b,c){for(var d=0>c?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=la(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=ma(b);function pa(){}pa.prototype=d.filters=d.pseudos,d.setFilters=new pa,g=fa.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){c&&!(e=R.exec(h))||(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=S.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(Q," ")}),h=h.slice(c.length));for(g in d.filter)!(e=W[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?fa.error(a):z(a,i).slice(0)};function qa(a){for(var b=0,c=a.length,d="";c>b;b++)d+=a[b].value;return d}function ra(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j,k=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(j=b[u]||(b[u]={}),i=j[b.uniqueID]||(j[b.uniqueID]={}),(h=i[d])&&h[0]===w&&h[1]===f)return k[2]=h[2];if(i[d]=k,k[2]=a(b,c,g))return!0}}}function sa(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function ta(a,b,c){for(var d=0,e=b.length;e>d;d++)fa(a,b[d],c);return c}function ua(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(c&&!c(f,d,e)||(g.push(f),j&&b.push(h)));return g}function va(a,b,c,d,e,f){return d&&!d[u]&&(d=va(d)),e&&!e[u]&&(e=va(e,f)),ha(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||ta(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ua(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ua(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?J(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ua(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):H.apply(g,r)})}function wa(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ra(function(a){return a===b},h,!0),l=ra(function(a){return J(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];f>i;i++)if(c=d.relative[a[i].type])m=[ra(sa(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return va(i>1&&sa(m),i>1&&qa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(Q,"$1"),c,e>i&&wa(a.slice(i,e)),f>e&&wa(a=a.slice(e)),f>e&&qa(a))}m.push(c)}return sa(m)}function xa(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=F.call(i));u=ua(u)}H.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&fa.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ha(f):f}return h=fa.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wa(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xa(e,d)),f.selector=a}return f},i=fa.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(ba,ca),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=W.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(ba,ca),_.test(j[0].type)&&oa(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qa(j),!a)return H.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,!b||_.test(a)&&oa(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ia(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ia(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||ja("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ia(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ja("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ia(function(a){return null==a.getAttribute("disabled")})||ja(K,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fa}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.uniqueSort=n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&n(a).is(c))break;d.push(a)}return d},v=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},w=n.expr.match.needsContext,x=/^<([\w-]+)\s*\/?>(?:<\/\1>|)$/,y=/^.[^:#\[\.,]*$/;function z(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(y.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return n.inArray(a,b)>-1!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=[],d=this,e=d.length;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;e>b;b++)if(n.contains(d[b],this))return!0}));for(b=0;e>b;b++)n.find(a,d[b],c);return c=this.pushStack(e>1?n.unique(c):c),c.selector=this.selector?this.selector+" "+a:a,c},filter:function(a){return this.pushStack(z(this,a||[],!1))},not:function(a){return this.pushStack(z(this,a||[],!0))},is:function(a){return!!z(this,"string"==typeof a&&w.test(a)?n(a):a||[],!1).length}});var A,B=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,C=n.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||A,"string"==typeof a){if(e="<"===a.charAt(0)&&">"===a.charAt(a.length-1)&&a.length>=3?[null,a,null]:B.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),x.test(e[1])&&n.isPlainObject(b))for(e in b)n.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}if(f=d.getElementById(e[2]),f&&f.parentNode){if(f.id!==e[2])return A.find(a);this.length=1,this[0]=f}return this.context=d,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?"undefined"!=typeof c.ready?c.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};C.prototype=n.fn,A=n(d);var D=/^(?:parents|prev(?:Until|All))/,E={children:!0,contents:!0,next:!0,prev:!0};n.fn.extend({has:function(a){var b,c=n(a,this),d=c.length;return this.filter(function(){for(b=0;d>b;b++)if(n.contains(this,c[b]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=w.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?n.inArray(this[0],n(a)):n.inArray(a.jquery?a[0]:a,this):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.uniqueSort(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function F(a,b){do a=a[b];while(a&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return u(a,"parentNode")},parentsUntil:function(a,b,c){return u(a,"parentNode",c)},next:function(a){return F(a,"nextSibling")},prev:function(a){return F(a,"previousSibling")},nextAll:function(a){return u(a,"nextSibling")},prevAll:function(a){return u(a,"previousSibling")},nextUntil:function(a,b,c){return u(a,"nextSibling",c)},prevUntil:function(a,b,c){return u(a,"previousSibling",c)},siblings:function(a){return v((a.parentNode||{}).firstChild,a)},children:function(a){return v(a.firstChild)},contents:function(a){return n.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(E[a]||(e=n.uniqueSort(e)),D.test(a)&&(e=e.reverse())),this.pushStack(e)}});var G=/\S+/g;function H(a){var b={};return n.each(a.match(G)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?H(a):n.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h<f.length)f[h].apply(c[0],c[1])===!1&&a.stopOnFalse&&(h=f.length,c=!1)}a.memory||(c=!1),b=!1,e&&(f=c?[]:"")},j={add:function(){return f&&(c&&!b&&(h=f.length-1,g.push(c)),function d(b){n.each(b,function(b,c){n.isFunction(c)?a.unique&&j.has(c)||f.push(c):c&&c.length&&"string"!==n.type(c)&&d(c)})}(arguments),c&&!b&&i()),this},remove:function(){return n.each(arguments,function(a,b){var c;while((c=n.inArray(b,f,c))>-1)f.splice(c,1),h>=c&&h--}),this},has:function(a){return a?n.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=!0,c||j.disable(),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().progress(c.notify).done(c.resolve).fail(c.reject):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=e.call(arguments),d=c.length,f=1!==d||a&&n.isFunction(a.promise)?d:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(d){b[a]=this,c[a]=arguments.length>1?e.call(arguments):d,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(d>1)for(i=new Array(d),j=new Array(d),k=new Array(d);d>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().progress(h(b,j,i)).done(h(b,k,c)).fail(g.reject):--f;return f||g.resolveWith(k,c),g.promise()}});var I;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){(a===!0?--n.readyWait:n.isReady)||(n.isReady=!0,a!==!0&&--n.readyWait>0||(I.resolveWith(d,[n]),n.fn.triggerHandler&&(n(d).triggerHandler("ready"),n(d).off("ready"))))}});function J(){d.addEventListener?(d.removeEventListener("DOMContentLoaded",K),a.removeEventListener("load",K)):(d.detachEvent("onreadystatechange",K),a.detachEvent("onload",K))}function K(){(d.addEventListener||"load"===a.event.type||"complete"===d.readyState)&&(J(),n.ready())}n.ready.promise=function(b){if(!I)if(I=n.Deferred(),"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll)a.setTimeout(n.ready);else if(d.addEventListener)d.addEventListener("DOMContentLoaded",K),a.addEventListener("load",K);else{d.attachEvent("onreadystatechange",K),a.attachEvent("onload",K);var c=!1;try{c=null==a.frameElement&&d.documentElement}catch(e){}c&&c.doScroll&&!function f(){if(!n.isReady){try{c.doScroll("left")}catch(b){return a.setTimeout(f,50)}J(),n.ready()}}()}return I.promise(b)},n.ready.promise();var L;for(L in n(l))break;l.ownFirst="0"===L,l.inlineBlockNeedsLayout=!1,n(function(){var a,b,c,e;c=d.getElementsByTagName("body")[0],c&&c.style&&(b=d.createElement("div"),e=d.createElement("div"),e.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(e).appendChild(b),"undefined"!=typeof b.style.zoom&&(b.style.cssText="display:inline;margin:0;border:0;padding:1px;width:1px;zoom:1",l.inlineBlockNeedsLayout=a=3===b.offsetWidth,a&&(c.style.zoom=1)),c.removeChild(e))}),function(){var a=d.createElement("div");l.deleteExpando=!0;try{delete a.test}catch(b){l.deleteExpando=!1}a=null}();var M=function(a){var b=n.noData[(a.nodeName+" ").toLowerCase()],c=+a.nodeType||1;return 1!==c&&9!==c?!1:!b||b!==!0&&a.getAttribute("classid")===b},N=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,O=/([A-Z])/g;function P(a,b,c){if(void 0===c&&1===a.nodeType){var d="data-"+b.replace(O,"-$1").toLowerCase();if(c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:N.test(c)?n.parseJSON(c):c}catch(e){}n.data(a,b,c)}else c=void 0;
+}return c}function Q(a){var b;for(b in a)if(("data"!==b||!n.isEmptyObject(a[b]))&&"toJSON"!==b)return!1;return!0}function R(a,b,d,e){if(M(a)){var f,g,h=n.expando,i=a.nodeType,j=i?n.cache:a,k=i?a[h]:a[h]&&h;if(k&&j[k]&&(e||j[k].data)||void 0!==d||"string"!=typeof b)return k||(k=i?a[h]=c.pop()||n.guid++:h),j[k]||(j[k]=i?{}:{toJSON:n.noop}),"object"!=typeof b&&"function"!=typeof b||(e?j[k]=n.extend(j[k],b):j[k].data=n.extend(j[k].data,b)),g=j[k],e||(g.data||(g.data={}),g=g.data),void 0!==d&&(g[n.camelCase(b)]=d),"string"==typeof b?(f=g[b],null==f&&(f=g[n.camelCase(b)])):f=g,f}}function S(a,b,c){if(M(a)){var d,e,f=a.nodeType,g=f?n.cache:a,h=f?a[n.expando]:n.expando;if(g[h]){if(b&&(d=c?g[h]:g[h].data)){n.isArray(b)?b=b.concat(n.map(b,n.camelCase)):b in d?b=[b]:(b=n.camelCase(b),b=b in d?[b]:b.split(" ")),e=b.length;while(e--)delete d[b[e]];if(c?!Q(d):!n.isEmptyObject(d))return}(c||(delete g[h].data,Q(g[h])))&&(f?n.cleanData([a],!0):l.deleteExpando||g!=g.window?delete g[h]:g[h]=void 0)}}}n.extend({cache:{},noData:{"applet ":!0,"embed ":!0,"object ":"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"},hasData:function(a){return a=a.nodeType?n.cache[a[n.expando]]:a[n.expando],!!a&&!Q(a)},data:function(a,b,c){return R(a,b,c)},removeData:function(a,b){return S(a,b)},_data:function(a,b,c){return R(a,b,c,!0)},_removeData:function(a,b){return S(a,b,!0)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=n.data(f),1===f.nodeType&&!n._data(f,"parsedAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),P(f,d,e[d])));n._data(f,"parsedAttrs",!0)}return e}return"object"==typeof a?this.each(function(){n.data(this,a)}):arguments.length>1?this.each(function(){n.data(this,a,b)}):f?P(f,a,n.data(f,a)):void 0},removeData:function(a){return this.each(function(){n.removeData(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=n._data(a,b),c&&(!d||n.isArray(c)?d=n._data(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return n._data(a,c)||n._data(a,c,{empty:n.Callbacks("once memory").add(function(){n._removeData(a,b+"queue"),n._removeData(a,c)})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?n.queue(this[0],a):void 0===b?this:this.each(function(){var c=n.queue(this,a,b);n._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&n.dequeue(this,a)})},dequeue:function(a){return this.each(function(){n.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=n.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=n._data(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}}),function(){var a;l.shrinkWrapBlocks=function(){if(null!=a)return a;a=!1;var b,c,e;return c=d.getElementsByTagName("body")[0],c&&c.style?(b=d.createElement("div"),e=d.createElement("div"),e.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(e).appendChild(b),"undefined"!=typeof b.style.zoom&&(b.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:1px;width:1px;zoom:1",b.appendChild(d.createElement("div")).style.width="5px",a=3!==b.offsetWidth),c.removeChild(e),a):void 0}}();var T=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,U=new RegExp("^(?:([+-])=|)("+T+")([a-z%]*)$","i"),V=["Top","Right","Bottom","Left"],W=function(a,b){return a=b||a,"none"===n.css(a,"display")||!n.contains(a.ownerDocument,a)};function X(a,b,c,d){var e,f=1,g=20,h=d?function(){return d.cur()}:function(){return n.css(a,b,"")},i=h(),j=c&&c[3]||(n.cssNumber[b]?"":"px"),k=(n.cssNumber[b]||"px"!==j&&+i)&&U.exec(n.css(a,b));if(k&&k[3]!==j){j=j||k[3],c=c||[],k=+i||1;do f=f||".5",k/=f,n.style(a,b,k+j);while(f!==(f=h()/i)&&1!==f&&--g)}return c&&(k=+k||+i||0,e=c[1]?k+(c[1]+1)*c[2]:+c[2],d&&(d.unit=j,d.start=k,d.end=e)),e}var Y=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===n.type(c)){e=!0;for(h in c)Y(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,n.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(n(a),c)})),b))for(;i>h;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},Z=/^(?:checkbox|radio)$/i,$=/<([\w:-]+)/,_=/^$|\/(?:java|ecma)script/i,aa=/^\s+/,ba="abbr|article|aside|audio|bdi|canvas|data|datalist|details|dialog|figcaption|figure|footer|header|hgroup|main|mark|meter|nav|output|picture|progress|section|summary|template|time|video";function ca(a){var b=ba.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}!function(){var a=d.createElement("div"),b=d.createDocumentFragment(),c=d.createElement("input");a.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",l.leadingWhitespace=3===a.firstChild.nodeType,l.tbody=!a.getElementsByTagName("tbody").length,l.htmlSerialize=!!a.getElementsByTagName("link").length,l.html5Clone="<:nav></:nav>"!==d.createElement("nav").cloneNode(!0).outerHTML,c.type="checkbox",c.checked=!0,b.appendChild(c),l.appendChecked=c.checked,a.innerHTML="<textarea>x</textarea>",l.noCloneChecked=!!a.cloneNode(!0).lastChild.defaultValue,b.appendChild(a),c=d.createElement("input"),c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),a.appendChild(c),l.checkClone=a.cloneNode(!0).cloneNode(!0).lastChild.checked,l.noCloneEvent=!!a.addEventListener,a[n.expando]=1,l.attributes=!a.getAttribute(n.expando)}();var da={option:[1,"<select multiple='multiple'>","</select>"],legend:[1,"<fieldset>","</fieldset>"],area:[1,"<map>","</map>"],param:[1,"<object>","</object>"],thead:[1,"<table>","</table>"],tr:[2,"<table><tbody>","</tbody></table>"],col:[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:l.htmlSerialize?[0,"",""]:[1,"X<div>","</div>"]};da.optgroup=da.option,da.tbody=da.tfoot=da.colgroup=da.caption=da.thead,da.th=da.td;function ea(a,b){var c,d,e=0,f="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):void 0;if(!f)for(f=[],c=a.childNodes||a;null!=(d=c[e]);e++)!b||n.nodeName(d,b)?f.push(d):n.merge(f,ea(d,b));return void 0===b||b&&n.nodeName(a,b)?n.merge([a],f):f}function fa(a,b){for(var c,d=0;null!=(c=a[d]);d++)n._data(c,"globalEval",!b||n._data(b[d],"globalEval"))}var ga=/<|&#?\w+;/,ha=/<tbody/i;function ia(a){Z.test(a.type)&&(a.defaultChecked=a.checked)}function ja(a,b,c,d,e){for(var f,g,h,i,j,k,m,o=a.length,p=ca(b),q=[],r=0;o>r;r++)if(g=a[r],g||0===g)if("object"===n.type(g))n.merge(q,g.nodeType?[g]:g);else if(ga.test(g)){i=i||p.appendChild(b.createElement("div")),j=($.exec(g)||["",""])[1].toLowerCase(),m=da[j]||da._default,i.innerHTML=m[1]+n.htmlPrefilter(g)+m[2],f=m[0];while(f--)i=i.lastChild;if(!l.leadingWhitespace&&aa.test(g)&&q.push(b.createTextNode(aa.exec(g)[0])),!l.tbody){g="table"!==j||ha.test(g)?"<table>"!==m[1]||ha.test(g)?0:i:i.firstChild,f=g&&g.childNodes.length;while(f--)n.nodeName(k=g.childNodes[f],"tbody")&&!k.childNodes.length&&g.removeChild(k)}n.merge(q,i.childNodes),i.textContent="";while(i.firstChild)i.removeChild(i.firstChild);i=p.lastChild}else q.push(b.createTextNode(g));i&&p.removeChild(i),l.appendChecked||n.grep(ea(q,"input"),ia),r=0;while(g=q[r++])if(d&&n.inArray(g,d)>-1)e&&e.push(g);else if(h=n.contains(g.ownerDocument,g),i=ea(p.appendChild(g),"script"),h&&fa(i),c){f=0;while(g=i[f++])_.test(g.type||"")&&c.push(g)}return i=null,p}!function(){var b,c,e=d.createElement("div");for(b in{submit:!0,change:!0,focusin:!0})c="on"+b,(l[b]=c in a)||(e.setAttribute(c,"t"),l[b]=e.attributes[c].expando===!1);e=null}();var ka=/^(?:input|select|textarea)$/i,la=/^key/,ma=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,na=/^(?:focusinfocus|focusoutblur)$/,oa=/^([^.]*)(?:\.(.+)|)/;function pa(){return!0}function qa(){return!1}function ra(){try{return d.activeElement}catch(a){}}function sa(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)sa(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=qa;else if(!e)return a;return 1===f&&(g=e,e=function(a){return n().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=n.guid++)),a.each(function(){n.event.add(this,b,e,d,c)})}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n._data(a);if(r){c.handler&&(i=c,c=i.handler,e=i.selector),c.guid||(c.guid=n.guid++),(g=r.events)||(g=r.events={}),(k=r.handle)||(k=r.handle=function(a){return"undefined"==typeof n||a&&n.event.triggered===a.type?void 0:n.event.dispatch.apply(k.elem,arguments)},k.elem=a),b=(b||"").match(G)||[""],h=b.length;while(h--)f=oa.exec(b[h])||[],o=q=f[1],p=(f[2]||"").split(".").sort(),o&&(j=n.event.special[o]||{},o=(e?j.delegateType:j.bindType)||o,j=n.event.special[o]||{},l=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},i),(m=g[o])||(m=g[o]=[],m.delegateCount=0,j.setup&&j.setup.call(a,d,p,k)!==!1||(a.addEventListener?a.addEventListener(o,k,!1):a.attachEvent&&a.attachEvent("on"+o,k))),j.add&&(j.add.call(a,l),l.handler.guid||(l.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,l):m.push(l),n.event.global[o]=!0);a=null}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n.hasData(a)&&n._data(a);if(r&&(k=r.events)){b=(b||"").match(G)||[""],j=b.length;while(j--)if(h=oa.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=k[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),i=f=m.length;while(f--)g=m[f],!e&&q!==g.origType||c&&c.guid!==g.guid||h&&!h.test(g.namespace)||d&&d!==g.selector&&("**"!==d||!g.selector)||(m.splice(f,1),g.selector&&m.delegateCount--,l.remove&&l.remove.call(a,g));i&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete k[o])}else for(o in k)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(k)&&(delete r.handle,n._removeData(a,"events"))}},trigger:function(b,c,e,f){var g,h,i,j,l,m,o,p=[e||d],q=k.call(b,"type")?b.type:b,r=k.call(b,"namespace")?b.namespace.split("."):[];if(i=m=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!na.test(q+n.event.triggered)&&(q.indexOf(".")>-1&&(r=q.split("."),q=r.shift(),r.sort()),h=q.indexOf(":")<0&&"on"+q,b=b[n.expando]?b:new n.Event(q,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=r.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+r.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:n.makeArray(c,[b]),l=n.event.special[q]||{},f||!l.trigger||l.trigger.apply(e,c)!==!1)){if(!f&&!l.noBubble&&!n.isWindow(e)){for(j=l.delegateType||q,na.test(j+q)||(i=i.parentNode);i;i=i.parentNode)p.push(i),m=i;m===(e.ownerDocument||d)&&p.push(m.defaultView||m.parentWindow||a)}o=0;while((i=p[o++])&&!b.isPropagationStopped())b.type=o>1?j:l.bindType||q,g=(n._data(i,"events")||{})[b.type]&&n._data(i,"handle"),g&&g.apply(i,c),g=h&&i[h],g&&g.apply&&M(i)&&(b.result=g.apply(i,c),b.result===!1&&b.preventDefault());if(b.type=q,!f&&!b.isDefaultPrevented()&&(!l._default||l._default.apply(p.pop(),c)===!1)&&M(e)&&h&&e[q]&&!n.isWindow(e)){m=e[h],m&&(e[h]=null),n.event.triggered=q;try{e[q]()}catch(s){}n.event.triggered=void 0,m&&(e[h]=m)}return b.result}},dispatch:function(a){a=n.event.fix(a);var b,c,d,f,g,h=[],i=e.call(arguments),j=(n._data(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,c=0;while((g=f.handlers[c++])&&!a.isImmediatePropagationStopped())a.rnamespace&&!a.rnamespace.test(g.namespace)||(a.handleObj=g,a.data=g.data,d=((n.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==d&&(a.result=d)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&("click"!==a.type||isNaN(a.button)||a.button<1))for(;i!=this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(d=[],c=0;h>c;c++)f=b[c],e=f.selector+" ",void 0===d[e]&&(d[e]=f.needsContext?n(e,this).index(i)>-1:n.find(e,this,null,[i]).length),d[e]&&d.push(f);d.length&&g.push({elem:i,handlers:d})}return h<b.length&&g.push({elem:this,handlers:b.slice(h)}),g},fix:function(a){if(a[n.expando])return a;var b,c,e,f=a.type,g=a,h=this.fixHooks[f];h||(this.fixHooks[f]=h=ma.test(f)?this.mouseHooks:la.test(f)?this.keyHooks:{}),e=h.props?this.props.concat(h.props):this.props,a=new n.Event(g),b=e.length;while(b--)c=e[b],a[c]=g[c];return a.target||(a.target=g.srcElement||d),3===a.target.nodeType&&(a.target=a.target.parentNode),a.metaKey=!!a.metaKey,h.filter?h.filter(a,g):a},props:"altKey bubbles cancelable ctrlKey currentTarget detail eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){return null==a.which&&(a.which=null!=b.charCode?b.charCode:b.keyCode),a}},mouseHooks:{props:"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,b){var c,e,f,g=b.button,h=b.fromElement;return null==a.pageX&&null!=b.clientX&&(e=a.target.ownerDocument||d,f=e.documentElement,c=e.body,a.pageX=b.clientX+(f&&f.scrollLeft||c&&c.scrollLeft||0)-(f&&f.clientLeft||c&&c.clientLeft||0),a.pageY=b.clientY+(f&&f.scrollTop||c&&c.scrollTop||0)-(f&&f.clientTop||c&&c.clientTop||0)),!a.relatedTarget&&h&&(a.relatedTarget=h===a.target?b.toElement:h),a.which||void 0===g||(a.which=1&g?1:2&g?3:4&g?2:0),a}},special:{load:{noBubble:!0},focus:{trigger:function(){if(this!==ra()&&this.focus)try{return this.focus(),!1}catch(a){}},delegateType:"focusin"},blur:{trigger:function(){return this===ra()&&this.blur?(this.blur(),!1):void 0},delegateType:"focusout"},click:{trigger:function(){return n.nodeName(this,"input")&&"checkbox"===this.type&&this.click?(this.click(),!1):void 0},_default:function(a){return n.nodeName(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}},simulate:function(a,b,c){var d=n.extend(new n.Event,c,{type:a,isSimulated:!0});n.event.trigger(d,null,b),d.isDefaultPrevented()&&c.preventDefault()}},n.removeEvent=d.removeEventListener?function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c)}:function(a,b,c){var d="on"+b;a.detachEvent&&("undefined"==typeof a[d]&&(a[d]=null),a.detachEvent(d,c))},n.Event=function(a,b){return this instanceof n.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?pa:qa):this.type=a,b&&n.extend(this,b),this.timeStamp=a&&a.timeStamp||n.now(),void(this[n.expando]=!0)):new n.Event(a,b)},n.Event.prototype={constructor:n.Event,isDefaultPrevented:qa,isPropagationStopped:qa,isImmediatePropagationStopped:qa,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=pa,a&&(a.preventDefault?a.preventDefault():a.returnValue=!1)},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=pa,a&&!this.isSimulated&&(a.stopPropagation&&a.stopPropagation(),a.cancelBubble=!0)},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=pa,a&&a.stopImmediatePropagation&&a.stopImmediatePropagation(),this.stopPropagation()}},n.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){n.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return e&&(e===d||n.contains(d,e))||(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),l.submit||(n.event.special.submit={setup:function(){return n.nodeName(this,"form")?!1:void n.event.add(this,"click._submit keypress._submit",function(a){var b=a.target,c=n.nodeName(b,"input")||n.nodeName(b,"button")?n.prop(b,"form"):void 0;c&&!n._data(c,"submit")&&(n.event.add(c,"submit._submit",function(a){a._submitBubble=!0}),n._data(c,"submit",!0))})},postDispatch:function(a){a._submitBubble&&(delete a._submitBubble,this.parentNode&&!a.isTrigger&&n.event.simulate("submit",this.parentNode,a))},teardown:function(){return n.nodeName(this,"form")?!1:void n.event.remove(this,"._submit")}}),l.change||(n.event.special.change={setup:function(){return ka.test(this.nodeName)?("checkbox"!==this.type&&"radio"!==this.type||(n.event.add(this,"propertychange._change",function(a){"checked"===a.originalEvent.propertyName&&(this._justChanged=!0)}),n.event.add(this,"click._change",function(a){this._justChanged&&!a.isTrigger&&(this._justChanged=!1),n.event.simulate("change",this,a)})),!1):void n.event.add(this,"beforeactivate._change",function(a){var b=a.target;ka.test(b.nodeName)&&!n._data(b,"change")&&(n.event.add(b,"change._change",function(a){!this.parentNode||a.isSimulated||a.isTrigger||n.event.simulate("change",this.parentNode,a)}),n._data(b,"change",!0))})},handle:function(a){var b=a.target;return this!==b||a.isSimulated||a.isTrigger||"radio"!==b.type&&"checkbox"!==b.type?a.handleObj.handler.apply(this,arguments):void 0},teardown:function(){return n.event.remove(this,"._change"),!ka.test(this.nodeName)}}),l.focusin||n.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){n.event.simulate(b,a.target,n.event.fix(a))};n.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=n._data(d,b);e||d.addEventListener(a,c,!0),n._data(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=n._data(d,b)-1;e?n._data(d,b,e):(d.removeEventListener(a,c,!0),n._removeData(d,b))}}}),n.fn.extend({on:function(a,b,c,d){return sa(this,a,b,c,d)},one:function(a,b,c,d){return sa(this,a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,n(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return b!==!1&&"function"!=typeof b||(c=b,b=void 0),c===!1&&(c=qa),this.each(function(){n.event.remove(this,a,c,b)})},trigger:function(a,b){return this.each(function(){n.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];return c?n.event.trigger(a,b,c,!0):void 0}});var ta=/ jQuery\d+="(?:null|\d+)"/g,ua=new RegExp("<(?:"+ba+")[\\s/>]","i"),va=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:-]+)[^>]*)\/>/gi,wa=/<script|<style|<link/i,xa=/checked\s*(?:[^=]|=\s*.checked.)/i,ya=/^true\/(.*)/,za=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g,Aa=ca(d),Ba=Aa.appendChild(d.createElement("div"));function Ca(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function Da(a){return a.type=(null!==n.find.attr(a,"type"))+"/"+a.type,a}function Ea(a){var b=ya.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Fa(a,b){if(1===b.nodeType&&n.hasData(a)){var c,d,e,f=n._data(a),g=n._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;e>d;d++)n.event.add(b,c,h[c][d])}g.data&&(g.data=n.extend({},g.data))}}function Ga(a,b){var c,d,e;if(1===b.nodeType){if(c=b.nodeName.toLowerCase(),!l.noCloneEvent&&b[n.expando]){e=n._data(b);for(d in e.events)n.removeEvent(b,d,e.handle);b.removeAttribute(n.expando)}"script"===c&&b.text!==a.text?(Da(b).text=a.text,Ea(b)):"object"===c?(b.parentNode&&(b.outerHTML=a.outerHTML),l.html5Clone&&a.innerHTML&&!n.trim(b.innerHTML)&&(b.innerHTML=a.innerHTML)):"input"===c&&Z.test(a.type)?(b.defaultChecked=b.checked=a.checked,b.value!==a.value&&(b.value=a.value)):"option"===c?b.defaultSelected=b.selected=a.defaultSelected:"input"!==c&&"textarea"!==c||(b.defaultValue=a.defaultValue)}}function Ha(a,b,c,d){b=f.apply([],b);var e,g,h,i,j,k,m=0,o=a.length,p=o-1,q=b[0],r=n.isFunction(q);if(r||o>1&&"string"==typeof q&&!l.checkClone&&xa.test(q))return a.each(function(e){var f=a.eq(e);r&&(b[0]=q.call(this,e,f.html())),Ha(f,b,c,d)});if(o&&(k=ja(b,a[0].ownerDocument,!1,a,d),e=k.firstChild,1===k.childNodes.length&&(k=e),e||d)){for(i=n.map(ea(k,"script"),Da),h=i.length;o>m;m++)g=k,m!==p&&(g=n.clone(g,!0,!0),h&&n.merge(i,ea(g,"script"))),c.call(a[m],g,m);if(h)for(j=i[i.length-1].ownerDocument,n.map(i,Ea),m=0;h>m;m++)g=i[m],_.test(g.type||"")&&!n._data(g,"globalEval")&&n.contains(j,g)&&(g.src?n._evalUrl&&n._evalUrl(g.src):n.globalEval((g.text||g.textContent||g.innerHTML||"").replace(za,"")));k=e=null}return a}function Ia(a,b,c){for(var d,e=b?n.filter(b,a):a,f=0;null!=(d=e[f]);f++)c||1!==d.nodeType||n.cleanData(ea(d)),d.parentNode&&(c&&n.contains(d.ownerDocument,d)&&fa(ea(d,"script")),d.parentNode.removeChild(d));return a}n.extend({htmlPrefilter:function(a){return a.replace(va,"<$1></$2>")},clone:function(a,b,c){var d,e,f,g,h,i=n.contains(a.ownerDocument,a);if(l.html5Clone||n.isXMLDoc(a)||!ua.test("<"+a.nodeName+">")?f=a.cloneNode(!0):(Ba.innerHTML=a.outerHTML,Ba.removeChild(f=Ba.firstChild)),!(l.noCloneEvent&&l.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(d=ea(f),h=ea(a),g=0;null!=(e=h[g]);++g)d[g]&&Ga(e,d[g]);if(b)if(c)for(h=h||ea(a),d=d||ea(f),g=0;null!=(e=h[g]);g++)Fa(e,d[g]);else Fa(a,f);return d=ea(f,"script"),d.length>0&&fa(d,!i&&ea(a,"script")),d=h=e=null,f},cleanData:function(a,b){for(var d,e,f,g,h=0,i=n.expando,j=n.cache,k=l.attributes,m=n.event.special;null!=(d=a[h]);h++)if((b||M(d))&&(f=d[i],g=f&&j[f])){if(g.events)for(e in g.events)m[e]?n.event.remove(d,e):n.removeEvent(d,e,g.handle);j[f]&&(delete j[f],k||"undefined"==typeof d.removeAttribute?d[i]=void 0:d.removeAttribute(i),c.push(f))}}}),n.fn.extend({domManip:Ha,detach:function(a){return Ia(this,a,!0)},remove:function(a){return Ia(this,a)},text:function(a){return Y(this,function(a){return void 0===a?n.text(this):this.empty().append((this[0]&&this[0].ownerDocument||d).createTextNode(a))},null,a,arguments.length)},append:function(){return Ha(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ca(this,a);b.appendChild(a)}})},prepend:function(){return Ha(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ca(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return Ha(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return Ha(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++){1===a.nodeType&&n.cleanData(ea(a,!1));while(a.firstChild)a.removeChild(a.firstChild);a.options&&n.nodeName(a,"select")&&(a.options.length=0)}return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return Y(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a)return 1===b.nodeType?b.innerHTML.replace(ta,""):void 0;if("string"==typeof a&&!wa.test(a)&&(l.htmlSerialize||!ua.test(a))&&(l.leadingWhitespace||!aa.test(a))&&!da[($.exec(a)||["",""])[1].toLowerCase()]){a=n.htmlPrefilter(a);try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(ea(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=[];return Ha(this,arguments,function(b){var c=this.parentNode;n.inArray(this,a)<0&&(n.cleanData(ea(this)),c&&c.replaceChild(b,this))},a)}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=0,e=[],f=n(a),h=f.length-1;h>=d;d++)c=d===h?this:this.clone(!0),n(f[d])[b](c),g.apply(e,c.get());return this.pushStack(e)}});var Ja,Ka={HTML:"block",BODY:"block"};function La(a,b){var c=n(b.createElement(a)).appendTo(b.body),d=n.css(c[0],"display");return c.detach(),d}function Ma(a){var b=d,c=Ka[a];return c||(c=La(a,b),"none"!==c&&c||(Ja=(Ja||n("<iframe frameborder='0' width='0' height='0'/>")).appendTo(b.documentElement),b=(Ja[0].contentWindow||Ja[0].contentDocument).document,b.write(),b.close(),c=La(a,b),Ja.detach()),Ka[a]=c),c}var Na=/^margin/,Oa=new RegExp("^("+T+")(?!px)[a-z%]+$","i"),Pa=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e},Qa=d.documentElement;!function(){var b,c,e,f,g,h,i=d.createElement("div"),j=d.createElement("div");if(j.style){j.style.cssText="float:left;opacity:.5",l.opacity="0.5"===j.style.opacity,l.cssFloat=!!j.style.cssFloat,j.style.backgroundClip="content-box",j.cloneNode(!0).style.backgroundClip="",l.clearCloneStyle="content-box"===j.style.backgroundClip,i=d.createElement("div"),i.style.cssText="border:0;width:8px;height:0;top:0;left:-9999px;padding:0;margin-top:1px;position:absolute",j.innerHTML="",i.appendChild(j),l.boxSizing=""===j.style.boxSizing||""===j.style.MozBoxSizing||""===j.style.WebkitBoxSizing,n.extend(l,{reliableHiddenOffsets:function(){return null==b&&k(),f},boxSizingReliable:function(){return null==b&&k(),e},pixelMarginRight:function(){return null==b&&k(),c},pixelPosition:function(){return null==b&&k(),b},reliableMarginRight:function(){return null==b&&k(),g},reliableMarginLeft:function(){return null==b&&k(),h}});function k(){var k,l,m=d.documentElement;m.appendChild(i),j.style.cssText="-webkit-box-sizing:border-box;box-sizing:border-box;position:relative;display:block;margin:auto;border:1px;padding:1px;top:1%;width:50%",b=e=h=!1,c=g=!0,a.getComputedStyle&&(l=a.getComputedStyle(j),b="1%"!==(l||{}).top,h="2px"===(l||{}).marginLeft,e="4px"===(l||{width:"4px"}).width,j.style.marginRight="50%",c="4px"===(l||{marginRight:"4px"}).marginRight,k=j.appendChild(d.createElement("div")),k.style.cssText=j.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:0",k.style.marginRight=k.style.width="0",j.style.width="1px",g=!parseFloat((a.getComputedStyle(k)||{}).marginRight),j.removeChild(k)),j.style.display="none",f=0===j.getClientRects().length,f&&(j.style.display="",j.innerHTML="<table><tr><td></td><td>t</td></tr></table>",j.childNodes[0].style.borderCollapse="separate",k=j.getElementsByTagName("td"),k[0].style.cssText="margin:0;border:0;padding:0;display:none",f=0===k[0].offsetHeight,f&&(k[0].style.display="",k[1].style.display="none",f=0===k[0].offsetHeight)),m.removeChild(i)}}}();var Ra,Sa,Ta=/^(top|right|bottom|left)$/;a.getComputedStyle?(Ra=function(b){var c=b.ownerDocument.defaultView;return c&&c.opener||(c=a),c.getComputedStyle(b)},Sa=function(a,b,c){var d,e,f,g,h=a.style;return c=c||Ra(a),g=c?c.getPropertyValue(b)||c[b]:void 0,""!==g&&void 0!==g||n.contains(a.ownerDocument,a)||(g=n.style(a,b)),c&&!l.pixelMarginRight()&&Oa.test(g)&&Na.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f),void 0===g?g:g+""}):Qa.currentStyle&&(Ra=function(a){return a.currentStyle},Sa=function(a,b,c){var d,e,f,g,h=a.style;return c=c||Ra(a),g=c?c[b]:void 0,null==g&&h&&h[b]&&(g=h[b]),Oa.test(g)&&!Ta.test(b)&&(d=h.left,e=a.runtimeStyle,f=e&&e.left,f&&(e.left=a.currentStyle.left),h.left="fontSize"===b?"1em":g,g=h.pixelLeft+"px",h.left=d,f&&(e.left=f)),void 0===g?g:g+""||"auto"});function Ua(a,b){return{get:function(){return a()?void delete this.get:(this.get=b).apply(this,arguments)}}}var Va=/alpha\([^)]*\)/i,Wa=/opacity\s*=\s*([^)]*)/i,Xa=/^(none|table(?!-c[ea]).+)/,Ya=new RegExp("^("+T+")(.*)$","i"),Za={position:"absolute",visibility:"hidden",display:"block"},$a={letterSpacing:"0",fontWeight:"400"},_a=["Webkit","O","Moz","ms"],ab=d.createElement("div").style;function bb(a){if(a in ab)return a;var b=a.charAt(0).toUpperCase()+a.slice(1),c=_a.length;while(c--)if(a=_a[c]+b,a in ab)return a}function cb(a,b){for(var c,d,e,f=[],g=0,h=a.length;h>g;g++)d=a[g],d.style&&(f[g]=n._data(d,"olddisplay"),c=d.style.display,b?(f[g]||"none"!==c||(d.style.display=""),""===d.style.display&&W(d)&&(f[g]=n._data(d,"olddisplay",Ma(d.nodeName)))):(e=W(d),(c&&"none"!==c||!e)&&n._data(d,"olddisplay",e?c:n.css(d,"display"))));for(g=0;h>g;g++)d=a[g],d.style&&(b&&"none"!==d.style.display&&""!==d.style.display||(d.style.display=b?f[g]||"":"none"));return a}function db(a,b,c){var d=Ya.exec(b);return d?Math.max(0,d[1]-(c||0))+(d[2]||"px"):b}function eb(a,b,c,d,e){for(var f=c===(d?"border":"content")?4:"width"===b?1:0,g=0;4>f;f+=2)"margin"===c&&(g+=n.css(a,c+V[f],!0,e)),d?("content"===c&&(g-=n.css(a,"padding"+V[f],!0,e)),"margin"!==c&&(g-=n.css(a,"border"+V[f]+"Width",!0,e))):(g+=n.css(a,"padding"+V[f],!0,e),"padding"!==c&&(g+=n.css(a,"border"+V[f]+"Width",!0,e)));return g}function fb(a,b,c){var d=!0,e="width"===b?a.offsetWidth:a.offsetHeight,f=Ra(a),g=l.boxSizing&&"border-box"===n.css(a,"boxSizing",!1,f);if(0>=e||null==e){if(e=Sa(a,b,f),(0>e||null==e)&&(e=a.style[b]),Oa.test(e))return e;d=g&&(l.boxSizingReliable()||e===a.style[b]),e=parseFloat(e)||0}return e+eb(a,b,c||(g?"border":"content"),d,f)+"px"}n.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=Sa(a,"opacity");return""===c?"1":c}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":l.cssFloat?"cssFloat":"styleFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=n.camelCase(b),i=a.style;if(b=n.cssProps[h]||(n.cssProps[h]=bb(h)||h),g=n.cssHooks[b]||n.cssHooks[h],void 0===c)return g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:i[b];if(f=typeof c,"string"===f&&(e=U.exec(c))&&e[1]&&(c=X(a,b,e),f="number"),null!=c&&c===c&&("number"===f&&(c+=e&&e[3]||(n.cssNumber[h]?"":"px")),l.clearCloneStyle||""!==c||0!==b.indexOf("background")||(i[b]="inherit"),!(g&&"set"in g&&void 0===(c=g.set(a,c,d)))))try{i[b]=c}catch(j){}}},css:function(a,b,c,d){var e,f,g,h=n.camelCase(b);return b=n.cssProps[h]||(n.cssProps[h]=bb(h)||h),g=n.cssHooks[b]||n.cssHooks[h],g&&"get"in g&&(f=g.get(a,!0,c)),void 0===f&&(f=Sa(a,b,d)),"normal"===f&&b in $a&&(f=$a[b]),""===c||c?(e=parseFloat(f),c===!0||isFinite(e)?e||0:f):f}}),n.each(["height","width"],function(a,b){n.cssHooks[b]={get:function(a,c,d){return c?Xa.test(n.css(a,"display"))&&0===a.offsetWidth?Pa(a,Za,function(){return fb(a,b,d)}):fb(a,b,d):void 0},set:function(a,c,d){var e=d&&Ra(a);return db(a,c,d?eb(a,b,d,l.boxSizing&&"border-box"===n.css(a,"boxSizing",!1,e),e):0)}}}),l.opacity||(n.cssHooks.opacity={get:function(a,b){return Wa.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?.01*parseFloat(RegExp.$1)+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=n.isNumeric(b)?"alpha(opacity="+100*b+")":"",f=d&&d.filter||c.filter||"";c.zoom=1,(b>=1||""===b)&&""===n.trim(f.replace(Va,""))&&c.removeAttribute&&(c.removeAttribute("filter"),""===b||d&&!d.filter)||(c.filter=Va.test(f)?f.replace(Va,e):f+" "+e)}}),n.cssHooks.marginRight=Ua(l.reliableMarginRight,function(a,b){return b?Pa(a,{display:"inline-block"},Sa,[a,"marginRight"]):void 0}),n.cssHooks.marginLeft=Ua(l.reliableMarginLeft,function(a,b){return b?(parseFloat(Sa(a,"marginLeft"))||(n.contains(a.ownerDocument,a)?a.getBoundingClientRect().left-Pa(a,{
+marginLeft:0},function(){return a.getBoundingClientRect().left}):0))+"px":void 0}),n.each({margin:"",padding:"",border:"Width"},function(a,b){n.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];4>d;d++)e[a+V[d]+b]=f[d]||f[d-2]||f[0];return e}},Na.test(a)||(n.cssHooks[a+b].set=db)}),n.fn.extend({css:function(a,b){return Y(this,function(a,b,c){var d,e,f={},g=0;if(n.isArray(b)){for(d=Ra(a),e=b.length;e>g;g++)f[b[g]]=n.css(a,b[g],!1,d);return f}return void 0!==c?n.style(a,b,c):n.css(a,b)},a,b,arguments.length>1)},show:function(){return cb(this,!0)},hide:function(){return cb(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){W(this)?n(this).show():n(this).hide()})}});function gb(a,b,c,d,e){return new gb.prototype.init(a,b,c,d,e)}n.Tween=gb,gb.prototype={constructor:gb,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||n.easing._default,this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(n.cssNumber[c]?"":"px")},cur:function(){var a=gb.propHooks[this.prop];return a&&a.get?a.get(this):gb.propHooks._default.get(this)},run:function(a){var b,c=gb.propHooks[this.prop];return this.options.duration?this.pos=b=n.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):this.pos=b=a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):gb.propHooks._default.set(this),this}},gb.prototype.init.prototype=gb.prototype,gb.propHooks={_default:{get:function(a){var b;return 1!==a.elem.nodeType||null!=a.elem[a.prop]&&null==a.elem.style[a.prop]?a.elem[a.prop]:(b=n.css(a.elem,a.prop,""),b&&"auto"!==b?b:0)},set:function(a){n.fx.step[a.prop]?n.fx.step[a.prop](a):1!==a.elem.nodeType||null==a.elem.style[n.cssProps[a.prop]]&&!n.cssHooks[a.prop]?a.elem[a.prop]=a.now:n.style(a.elem,a.prop,a.now+a.unit)}}},gb.propHooks.scrollTop=gb.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},n.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2},_default:"swing"},n.fx=gb.prototype.init,n.fx.step={};var hb,ib,jb=/^(?:toggle|show|hide)$/,kb=/queueHooks$/;function lb(){return a.setTimeout(function(){hb=void 0}),hb=n.now()}function mb(a,b){var c,d={height:a},e=0;for(b=b?1:0;4>e;e+=2-b)c=V[e],d["margin"+c]=d["padding"+c]=a;return b&&(d.opacity=d.width=a),d}function nb(a,b,c){for(var d,e=(qb.tweeners[b]||[]).concat(qb.tweeners["*"]),f=0,g=e.length;g>f;f++)if(d=e[f].call(c,b,a))return d}function ob(a,b,c){var d,e,f,g,h,i,j,k,m=this,o={},p=a.style,q=a.nodeType&&W(a),r=n._data(a,"fxshow");c.queue||(h=n._queueHooks(a,"fx"),null==h.unqueued&&(h.unqueued=0,i=h.empty.fire,h.empty.fire=function(){h.unqueued||i()}),h.unqueued++,m.always(function(){m.always(function(){h.unqueued--,n.queue(a,"fx").length||h.empty.fire()})})),1===a.nodeType&&("height"in b||"width"in b)&&(c.overflow=[p.overflow,p.overflowX,p.overflowY],j=n.css(a,"display"),k="none"===j?n._data(a,"olddisplay")||Ma(a.nodeName):j,"inline"===k&&"none"===n.css(a,"float")&&(l.inlineBlockNeedsLayout&&"inline"!==Ma(a.nodeName)?p.zoom=1:p.display="inline-block")),c.overflow&&(p.overflow="hidden",l.shrinkWrapBlocks()||m.always(function(){p.overflow=c.overflow[0],p.overflowX=c.overflow[1],p.overflowY=c.overflow[2]}));for(d in b)if(e=b[d],jb.exec(e)){if(delete b[d],f=f||"toggle"===e,e===(q?"hide":"show")){if("show"!==e||!r||void 0===r[d])continue;q=!0}o[d]=r&&r[d]||n.style(a,d)}else j=void 0;if(n.isEmptyObject(o))"inline"===("none"===j?Ma(a.nodeName):j)&&(p.display=j);else{r?"hidden"in r&&(q=r.hidden):r=n._data(a,"fxshow",{}),f&&(r.hidden=!q),q?n(a).show():m.done(function(){n(a).hide()}),m.done(function(){var b;n._removeData(a,"fxshow");for(b in o)n.style(a,b,o[b])});for(d in o)g=nb(q?r[d]:0,d,m),d in r||(r[d]=g.start,q&&(g.end=g.start,g.start="width"===d||"height"===d?1:0))}}function pb(a,b){var c,d,e,f,g;for(c in a)if(d=n.camelCase(c),e=b[d],f=a[c],n.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=n.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function qb(a,b,c){var d,e,f=0,g=qb.prefilters.length,h=n.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=hb||lb(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;i>g;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),1>f&&i?c:(h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:n.extend({},b),opts:n.extend(!0,{specialEasing:{},easing:n.easing._default},c),originalProperties:b,originalOptions:c,startTime:hb||lb(),duration:c.duration,tweens:[],createTween:function(b,c){var d=n.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;d>c;c++)j.tweens[c].run(1);return b?(h.notifyWith(a,[j,1,0]),h.resolveWith(a,[j,b])):h.rejectWith(a,[j,b]),this}}),k=j.props;for(pb(k,j.opts.specialEasing);g>f;f++)if(d=qb.prefilters[f].call(j,a,k,j.opts))return n.isFunction(d.stop)&&(n._queueHooks(j.elem,j.opts.queue).stop=n.proxy(d.stop,d)),d;return n.map(k,nb,j),n.isFunction(j.opts.start)&&j.opts.start.call(a,j),n.fx.timer(n.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always)}n.Animation=n.extend(qb,{tweeners:{"*":[function(a,b){var c=this.createTween(a,b);return X(c.elem,a,U.exec(b),c),c}]},tweener:function(a,b){n.isFunction(a)?(b=a,a=["*"]):a=a.match(G);for(var c,d=0,e=a.length;e>d;d++)c=a[d],qb.tweeners[c]=qb.tweeners[c]||[],qb.tweeners[c].unshift(b)},prefilters:[ob],prefilter:function(a,b){b?qb.prefilters.unshift(a):qb.prefilters.push(a)}}),n.speed=function(a,b,c){var d=a&&"object"==typeof a?n.extend({},a):{complete:c||!c&&b||n.isFunction(a)&&a,duration:a,easing:c&&b||b&&!n.isFunction(b)&&b};return d.duration=n.fx.off?0:"number"==typeof d.duration?d.duration:d.duration in n.fx.speeds?n.fx.speeds[d.duration]:n.fx.speeds._default,null!=d.queue&&d.queue!==!0||(d.queue="fx"),d.old=d.complete,d.complete=function(){n.isFunction(d.old)&&d.old.call(this),d.queue&&n.dequeue(this,d.queue)},d},n.fn.extend({fadeTo:function(a,b,c,d){return this.filter(W).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=n.isEmptyObject(a),f=n.speed(b,c,d),g=function(){var b=qb(this,n.extend({},a),f);(e||n._data(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=n.timers,g=n._data(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&kb.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));!b&&c||n.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=n._data(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=n.timers,g=d?d.length:0;for(c.finish=!0,n.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;g>b;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),n.each(["toggle","show","hide"],function(a,b){var c=n.fn[b];n.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(mb(b,!0),a,d,e)}}),n.each({slideDown:mb("show"),slideUp:mb("hide"),slideToggle:mb("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){n.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),n.timers=[],n.fx.tick=function(){var a,b=n.timers,c=0;for(hb=n.now();c<b.length;c++)a=b[c],a()||b[c]!==a||b.splice(c--,1);b.length||n.fx.stop(),hb=void 0},n.fx.timer=function(a){n.timers.push(a),a()?n.fx.start():n.timers.pop()},n.fx.interval=13,n.fx.start=function(){ib||(ib=a.setInterval(n.fx.tick,n.fx.interval))},n.fx.stop=function(){a.clearInterval(ib),ib=null},n.fx.speeds={slow:600,fast:200,_default:400},n.fn.delay=function(b,c){return b=n.fx?n.fx.speeds[b]||b:b,c=c||"fx",this.queue(c,function(c,d){var e=a.setTimeout(c,b);d.stop=function(){a.clearTimeout(e)}})},function(){var a,b=d.createElement("input"),c=d.createElement("div"),e=d.createElement("select"),f=e.appendChild(d.createElement("option"));c=d.createElement("div"),c.setAttribute("className","t"),c.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",a=c.getElementsByTagName("a")[0],b.setAttribute("type","checkbox"),c.appendChild(b),a=c.getElementsByTagName("a")[0],a.style.cssText="top:1px",l.getSetAttribute="t"!==c.className,l.style=/top/.test(a.getAttribute("style")),l.hrefNormalized="/a"===a.getAttribute("href"),l.checkOn=!!b.value,l.optSelected=f.selected,l.enctype=!!d.createElement("form").enctype,e.disabled=!0,l.optDisabled=!f.disabled,b=d.createElement("input"),b.setAttribute("value",""),l.input=""===b.getAttribute("value"),b.value="t",b.setAttribute("type","radio"),l.radioValue="t"===b.value}();var rb=/\r/g,sb=/[\x20\t\r\n\f]+/g;n.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=n.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,n(this).val()):a,null==e?e="":"number"==typeof e?e+="":n.isArray(e)&&(e=n.map(e,function(a){return null==a?"":a+""})),b=n.valHooks[this.type]||n.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=n.valHooks[e.type]||n.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(rb,""):null==c?"":c)}}}),n.extend({valHooks:{option:{get:function(a){var b=n.find.attr(a,"value");return null!=b?b:n.trim(n.text(a)).replace(sb," ")}},select:{get:function(a){for(var b,c,d=a.options,e=a.selectedIndex,f="select-one"===a.type||0>e,g=f?null:[],h=f?e+1:d.length,i=0>e?h:f?e:0;h>i;i++)if(c=d[i],(c.selected||i===e)&&(l.optDisabled?!c.disabled:null===c.getAttribute("disabled"))&&(!c.parentNode.disabled||!n.nodeName(c.parentNode,"optgroup"))){if(b=n(c).val(),f)return b;g.push(b)}return g},set:function(a,b){var c,d,e=a.options,f=n.makeArray(b),g=e.length;while(g--)if(d=e[g],n.inArray(n.valHooks.option.get(d),f)>-1)try{d.selected=c=!0}catch(h){d.scrollHeight}else d.selected=!1;return c||(a.selectedIndex=-1),e}}}}),n.each(["radio","checkbox"],function(){n.valHooks[this]={set:function(a,b){return n.isArray(b)?a.checked=n.inArray(n(a).val(),b)>-1:void 0}},l.checkOn||(n.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var tb,ub,vb=n.expr.attrHandle,wb=/^(?:checked|selected)$/i,xb=l.getSetAttribute,yb=l.input;n.fn.extend({attr:function(a,b){return Y(this,n.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){n.removeAttr(this,a)})}}),n.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return"undefined"==typeof a.getAttribute?n.prop(a,b,c):(1===f&&n.isXMLDoc(a)||(b=b.toLowerCase(),e=n.attrHooks[b]||(n.expr.match.bool.test(b)?ub:tb)),void 0!==c?null===c?void n.removeAttr(a,b):e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:(a.setAttribute(b,c+""),c):e&&"get"in e&&null!==(d=e.get(a,b))?d:(d=n.find.attr(a,b),null==d?void 0:d))},attrHooks:{type:{set:function(a,b){if(!l.radioValue&&"radio"===b&&n.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}},removeAttr:function(a,b){var c,d,e=0,f=b&&b.match(G);if(f&&1===a.nodeType)while(c=f[e++])d=n.propFix[c]||c,n.expr.match.bool.test(c)?yb&&xb||!wb.test(c)?a[d]=!1:a[n.camelCase("default-"+c)]=a[d]=!1:n.attr(a,c,""),a.removeAttribute(xb?c:d)}}),ub={set:function(a,b,c){return b===!1?n.removeAttr(a,c):yb&&xb||!wb.test(c)?a.setAttribute(!xb&&n.propFix[c]||c,c):a[n.camelCase("default-"+c)]=a[c]=!0,c}},n.each(n.expr.match.bool.source.match(/\w+/g),function(a,b){var c=vb[b]||n.find.attr;yb&&xb||!wb.test(b)?vb[b]=function(a,b,d){var e,f;return d||(f=vb[b],vb[b]=e,e=null!=c(a,b,d)?b.toLowerCase():null,vb[b]=f),e}:vb[b]=function(a,b,c){return c?void 0:a[n.camelCase("default-"+b)]?b.toLowerCase():null}}),yb&&xb||(n.attrHooks.value={set:function(a,b,c){return n.nodeName(a,"input")?void(a.defaultValue=b):tb&&tb.set(a,b,c)}}),xb||(tb={set:function(a,b,c){var d=a.getAttributeNode(c);return d||a.setAttributeNode(d=a.ownerDocument.createAttribute(c)),d.value=b+="","value"===c||b===a.getAttribute(c)?b:void 0}},vb.id=vb.name=vb.coords=function(a,b,c){var d;return c?void 0:(d=a.getAttributeNode(b))&&""!==d.value?d.value:null},n.valHooks.button={get:function(a,b){var c=a.getAttributeNode(b);return c&&c.specified?c.value:void 0},set:tb.set},n.attrHooks.contenteditable={set:function(a,b,c){tb.set(a,""===b?!1:b,c)}},n.each(["width","height"],function(a,b){n.attrHooks[b]={set:function(a,c){return""===c?(a.setAttribute(b,"auto"),c):void 0}}})),l.style||(n.attrHooks.style={get:function(a){return a.style.cssText||void 0},set:function(a,b){return a.style.cssText=b+""}});var zb=/^(?:input|select|textarea|button|object)$/i,Ab=/^(?:a|area)$/i;n.fn.extend({prop:function(a,b){return Y(this,n.prop,a,b,arguments.length>1)},removeProp:function(a){return a=n.propFix[a]||a,this.each(function(){try{this[a]=void 0,delete this[a]}catch(b){}})}}),n.extend({prop:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return 1===f&&n.isXMLDoc(a)||(b=n.propFix[b]||b,e=n.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=n.find.attr(a,"tabindex");return b?parseInt(b,10):zb.test(a.nodeName)||Ab.test(a.nodeName)&&a.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),l.hrefNormalized||n.each(["href","src"],function(a,b){n.propHooks[b]={get:function(a){return a.getAttribute(b,4)}}}),l.optSelected||(n.propHooks.selected={get:function(a){var b=a.parentNode;return b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex),null},set:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex)}}),n.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){n.propFix[this.toLowerCase()]=this}),l.enctype||(n.propFix.enctype="encoding");var Bb=/[\t\r\n\f]/g;function Cb(a){return n.attr(a,"class")||""}n.fn.extend({addClass:function(a){var b,c,d,e,f,g,h,i=0;if(n.isFunction(a))return this.each(function(b){n(this).addClass(a.call(this,b,Cb(this)))});if("string"==typeof a&&a){b=a.match(G)||[];while(c=this[i++])if(e=Cb(c),d=1===c.nodeType&&(" "+e+" ").replace(Bb," ")){g=0;while(f=b[g++])d.indexOf(" "+f+" ")<0&&(d+=f+" ");h=n.trim(d),e!==h&&n.attr(c,"class",h)}}return this},removeClass:function(a){var b,c,d,e,f,g,h,i=0;if(n.isFunction(a))return this.each(function(b){n(this).removeClass(a.call(this,b,Cb(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof a&&a){b=a.match(G)||[];while(c=this[i++])if(e=Cb(c),d=1===c.nodeType&&(" "+e+" ").replace(Bb," ")){g=0;while(f=b[g++])while(d.indexOf(" "+f+" ")>-1)d=d.replace(" "+f+" "," ");h=n.trim(d),e!==h&&n.attr(c,"class",h)}}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):n.isFunction(a)?this.each(function(c){n(this).toggleClass(a.call(this,c,Cb(this),b),b)}):this.each(function(){var b,d,e,f;if("string"===c){d=0,e=n(this),f=a.match(G)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else void 0!==a&&"boolean"!==c||(b=Cb(this),b&&n._data(this,"__className__",b),n.attr(this,"class",b||a===!1?"":n._data(this,"__className__")||""))})},hasClass:function(a){var b,c,d=0;b=" "+a+" ";while(c=this[d++])if(1===c.nodeType&&(" "+Cb(c)+" ").replace(Bb," ").indexOf(b)>-1)return!0;return!1}}),n.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){n.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),n.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}});var Db=a.location,Eb=n.now(),Fb=/\?/,Gb=/(,)|(\[|{)|(}|])|"(?:[^"\\\r\n]|\\["\\\/bfnrt]|\\u[\da-fA-F]{4})*"\s*:?|true|false|null|-?(?!0\d)\d+(?:\.\d+|)(?:[eE][+-]?\d+|)/g;n.parseJSON=function(b){if(a.JSON&&a.JSON.parse)return a.JSON.parse(b+"");var c,d=null,e=n.trim(b+"");return e&&!n.trim(e.replace(Gb,function(a,b,e,f){return c&&b&&(d=0),0===d?a:(c=e||b,d+=!f-!e,"")}))?Function("return "+e)():n.error("Invalid JSON: "+b)},n.parseXML=function(b){var c,d;if(!b||"string"!=typeof b)return null;try{a.DOMParser?(d=new a.DOMParser,c=d.parseFromString(b,"text/xml")):(c=new a.ActiveXObject("Microsoft.XMLDOM"),c.async="false",c.loadXML(b))}catch(e){c=void 0}return c&&c.documentElement&&!c.getElementsByTagName("parsererror").length||n.error("Invalid XML: "+b),c};var Hb=/#.*$/,Ib=/([?&])_=[^&]*/,Jb=/^(.*?):[ \t]*([^\r\n]*)\r?$/gm,Kb=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,Lb=/^(?:GET|HEAD)$/,Mb=/^\/\//,Nb=/^([\w.+-]+:)(?:\/\/(?:[^\/?#]*@|)([^\/?#:]*)(?::(\d+)|)|)/,Ob={},Pb={},Qb="*/".concat("*"),Rb=Db.href,Sb=Nb.exec(Rb.toLowerCase())||[];function Tb(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(G)||[];if(n.isFunction(c))while(d=f[e++])"+"===d.charAt(0)?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function Ub(a,b,c,d){var e={},f=a===Pb;function g(h){var i;return e[h]=!0,n.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function Vb(a,b){var c,d,e=n.ajaxSettings.flatOptions||{};for(d in b)void 0!==b[d]&&((e[d]?a:c||(c={}))[d]=b[d]);return c&&n.extend(!0,a,c),a}function Wb(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===e&&(e=a.mimeType||b.getResponseHeader("Content-Type"));if(e)for(g in h)if(h[g]&&h[g].test(e)){i.unshift(g);break}if(i[0]in c)f=i[0];else{for(g in c){if(!i[0]||a.converters[g+" "+i[0]]){f=g;break}d||(d=g)}f=f||d}return f?(f!==i[0]&&i.unshift(f),c[f]):void 0}function Xb(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}n.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:Rb,type:"GET",isLocal:Kb.test(Sb[1]),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Qb,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":n.parseJSON,"text xml":n.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?Vb(Vb(a,n.ajaxSettings),b):Vb(n.ajaxSettings,a)},ajaxPrefilter:Tb(Ob),ajaxTransport:Tb(Pb),ajax:function(b,c){"object"==typeof b&&(c=b,b=void 0),c=c||{};var d,e,f,g,h,i,j,k,l=n.ajaxSetup({},c),m=l.context||l,o=l.context&&(m.nodeType||m.jquery)?n(m):n.event,p=n.Deferred(),q=n.Callbacks("once memory"),r=l.statusCode||{},s={},t={},u=0,v="canceled",w={readyState:0,getResponseHeader:function(a){var b;if(2===u){if(!k){k={};while(b=Jb.exec(g))k[b[1].toLowerCase()]=b[2]}b=k[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return 2===u?g:null},setRequestHeader:function(a,b){var c=a.toLowerCase();return u||(a=t[c]=t[c]||a,s[a]=b),this},overrideMimeType:function(a){return u||(l.mimeType=a),this},statusCode:function(a){var b;if(a)if(2>u)for(b in a)r[b]=[r[b],a[b]];else w.always(a[w.status]);return this},abort:function(a){var b=a||v;return j&&j.abort(b),y(0,b),this}};if(p.promise(w).complete=q.add,w.success=w.done,w.error=w.fail,l.url=((b||l.url||Rb)+"").replace(Hb,"").replace(Mb,Sb[1]+"//"),l.type=c.method||c.type||l.method||l.type,l.dataTypes=n.trim(l.dataType||"*").toLowerCase().match(G)||[""],null==l.crossDomain&&(d=Nb.exec(l.url.toLowerCase()),l.crossDomain=!(!d||d[1]===Sb[1]&&d[2]===Sb[2]&&(d[3]||("http:"===d[1]?"80":"443"))===(Sb[3]||("http:"===Sb[1]?"80":"443")))),l.data&&l.processData&&"string"!=typeof l.data&&(l.data=n.param(l.data,l.traditional)),Ub(Ob,l,c,w),2===u)return w;i=n.event&&l.global,i&&0===n.active++&&n.event.trigger("ajaxStart"),l.type=l.type.toUpperCase(),l.hasContent=!Lb.test(l.type),f=l.url,l.hasContent||(l.data&&(f=l.url+=(Fb.test(f)?"&":"?")+l.data,delete l.data),l.cache===!1&&(l.url=Ib.test(f)?f.replace(Ib,"$1_="+Eb++):f+(Fb.test(f)?"&":"?")+"_="+Eb++)),l.ifModified&&(n.lastModified[f]&&w.setRequestHeader("If-Modified-Since",n.lastModified[f]),n.etag[f]&&w.setRequestHeader("If-None-Match",n.etag[f])),(l.data&&l.hasContent&&l.contentType!==!1||c.contentType)&&w.setRequestHeader("Content-Type",l.contentType),w.setRequestHeader("Accept",l.dataTypes[0]&&l.accepts[l.dataTypes[0]]?l.accepts[l.dataTypes[0]]+("*"!==l.dataTypes[0]?", "+Qb+"; q=0.01":""):l.accepts["*"]);for(e in l.headers)w.setRequestHeader(e,l.headers[e]);if(l.beforeSend&&(l.beforeSend.call(m,w,l)===!1||2===u))return w.abort();v="abort";for(e in{success:1,error:1,complete:1})w[e](l[e]);if(j=Ub(Pb,l,c,w)){if(w.readyState=1,i&&o.trigger("ajaxSend",[w,l]),2===u)return w;l.async&&l.timeout>0&&(h=a.setTimeout(function(){w.abort("timeout")},l.timeout));try{u=1,j.send(s,y)}catch(x){if(!(2>u))throw x;y(-1,x)}}else y(-1,"No Transport");function y(b,c,d,e){var k,s,t,v,x,y=c;2!==u&&(u=2,h&&a.clearTimeout(h),j=void 0,g=e||"",w.readyState=b>0?4:0,k=b>=200&&300>b||304===b,d&&(v=Wb(l,w,d)),v=Xb(l,v,w,k),k?(l.ifModified&&(x=w.getResponseHeader("Last-Modified"),x&&(n.lastModified[f]=x),x=w.getResponseHeader("etag"),x&&(n.etag[f]=x)),204===b||"HEAD"===l.type?y="nocontent":304===b?y="notmodified":(y=v.state,s=v.data,t=v.error,k=!t)):(t=y,!b&&y||(y="error",0>b&&(b=0))),w.status=b,w.statusText=(c||y)+"",k?p.resolveWith(m,[s,y,w]):p.rejectWith(m,[w,y,t]),w.statusCode(r),r=void 0,i&&o.trigger(k?"ajaxSuccess":"ajaxError",[w,l,k?s:t]),q.fireWith(m,[w,y]),i&&(o.trigger("ajaxComplete",[w,l]),--n.active||n.event.trigger("ajaxStop")))}return w},getJSON:function(a,b,c){return n.get(a,b,c,"json")},getScript:function(a,b){return n.get(a,void 0,b,"script")}}),n.each(["get","post"],function(a,b){n[b]=function(a,c,d,e){return n.isFunction(c)&&(e=e||d,d=c,c=void 0),n.ajax(n.extend({url:a,type:b,dataType:e,data:c,success:d},n.isPlainObject(a)&&a))}}),n._evalUrl=function(a){return n.ajax({url:a,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,"throws":!0})},n.fn.extend({wrapAll:function(a){if(n.isFunction(a))return this.each(function(b){n(this).wrapAll(a.call(this,b))});if(this[0]){var b=n(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&1===a.firstChild.nodeType)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){return n.isFunction(a)?this.each(function(b){n(this).wrapInner(a.call(this,b))}):this.each(function(){var b=n(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=n.isFunction(a);return this.each(function(c){n(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){n.nodeName(this,"body")||n(this).replaceWith(this.childNodes)}).end()}});function Yb(a){return a.style&&a.style.display||n.css(a,"display")}function Zb(a){if(!n.contains(a.ownerDocument||d,a))return!0;while(a&&1===a.nodeType){if("none"===Yb(a)||"hidden"===a.type)return!0;a=a.parentNode}return!1}n.expr.filters.hidden=function(a){return l.reliableHiddenOffsets()?a.offsetWidth<=0&&a.offsetHeight<=0&&!a.getClientRects().length:Zb(a)},n.expr.filters.visible=function(a){return!n.expr.filters.hidden(a)};var $b=/%20/g,_b=/\[\]$/,ac=/\r?\n/g,bc=/^(?:submit|button|image|reset|file)$/i,cc=/^(?:input|select|textarea|keygen)/i;function dc(a,b,c,d){var e;if(n.isArray(b))n.each(b,function(b,e){c||_b.test(a)?d(a,e):dc(a+"["+("object"==typeof e&&null!=e?b:"")+"]",e,c,d)});else if(c||"object"!==n.type(b))d(a,b);else for(e in b)dc(a+"["+e+"]",b[e],c,d)}n.param=function(a,b){var c,d=[],e=function(a,b){b=n.isFunction(b)?b():null==b?"":b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};if(void 0===b&&(b=n.ajaxSettings&&n.ajaxSettings.traditional),n.isArray(a)||a.jquery&&!n.isPlainObject(a))n.each(a,function(){e(this.name,this.value)});else for(c in a)dc(c,a[c],b,e);return d.join("&").replace($b,"+")},n.fn.extend({serialize:function(){return n.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=n.prop(this,"elements");return a?n.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!n(this).is(":disabled")&&cc.test(this.nodeName)&&!bc.test(a)&&(this.checked||!Z.test(a))}).map(function(a,b){var c=n(this).val();return null==c?null:n.isArray(c)?n.map(c,function(a){return{name:b.name,value:a.replace(ac,"\r\n")}}):{name:b.name,value:c.replace(ac,"\r\n")}}).get()}}),n.ajaxSettings.xhr=void 0!==a.ActiveXObject?function(){return this.isLocal?ic():d.documentMode>8?hc():/^(get|post|head|put|delete|options)$/i.test(this.type)&&hc()||ic()}:hc;var ec=0,fc={},gc=n.ajaxSettings.xhr();a.attachEvent&&a.attachEvent("onunload",function(){for(var a in fc)fc[a](void 0,!0)}),l.cors=!!gc&&"withCredentials"in gc,gc=l.ajax=!!gc,gc&&n.ajaxTransport(function(b){if(!b.crossDomain||l.cors){var c;return{send:function(d,e){var f,g=b.xhr(),h=++ec;if(g.open(b.type,b.url,b.async,b.username,b.password),b.xhrFields)for(f in b.xhrFields)g[f]=b.xhrFields[f];b.mimeType&&g.overrideMimeType&&g.overrideMimeType(b.mimeType),b.crossDomain||d["X-Requested-With"]||(d["X-Requested-With"]="XMLHttpRequest");for(f in d)void 0!==d[f]&&g.setRequestHeader(f,d[f]+"");g.send(b.hasContent&&b.data||null),c=function(a,d){var f,i,j;if(c&&(d||4===g.readyState))if(delete fc[h],c=void 0,g.onreadystatechange=n.noop,d)4!==g.readyState&&g.abort();else{j={},f=g.status,"string"==typeof g.responseText&&(j.text=g.responseText);try{i=g.statusText}catch(k){i=""}f||!b.isLocal||b.crossDomain?1223===f&&(f=204):f=j.text?200:404}j&&e(f,i,j,g.getAllResponseHeaders())},b.async?4===g.readyState?a.setTimeout(c):g.onreadystatechange=fc[h]=c:c()},abort:function(){c&&c(void 0,!0)}}}});function hc(){try{return new a.XMLHttpRequest}catch(b){}}function ic(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}n.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(a){return n.globalEval(a),a}}}),n.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),n.ajaxTransport("script",function(a){if(a.crossDomain){var b,c=d.head||n("head")[0]||d.documentElement;return{send:function(e,f){b=d.createElement("script"),b.async=!0,a.scriptCharset&&(b.charset=a.scriptCharset),b.src=a.url,b.onload=b.onreadystatechange=function(a,c){(c||!b.readyState||/loaded|complete/.test(b.readyState))&&(b.onload=b.onreadystatechange=null,b.parentNode&&b.parentNode.removeChild(b),b=null,c||f(200,"success"))},c.insertBefore(b,c.firstChild)},abort:function(){b&&b.onload(void 0,!0)}}}});var jc=[],kc=/(=)\?(?=&|$)|\?\?/;n.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=jc.pop()||n.expando+"_"+Eb++;return this[a]=!0,a}}),n.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(kc.test(b.url)?"url":"string"==typeof b.data&&0===(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&kc.test(b.data)&&"data");return h||"jsonp"===b.dataTypes[0]?(e=b.jsonpCallback=n.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(kc,"$1"+e):b.jsonp!==!1&&(b.url+=(Fb.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||n.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){void 0===f?n(a).removeProp(e):a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,jc.push(e)),g&&n.isFunction(f)&&f(g[0]),g=f=void 0}),"script"):void 0}),n.parseHTML=function(a,b,c){if(!a||"string"!=typeof a)return null;"boolean"==typeof b&&(c=b,b=!1),b=b||d;var e=x.exec(a),f=!c&&[];return e?[b.createElement(e[1])]:(e=ja([a],b,f),f&&f.length&&n(f).remove(),n.merge([],e.childNodes))};var lc=n.fn.load;n.fn.load=function(a,b,c){if("string"!=typeof a&&lc)return lc.apply(this,arguments);var d,e,f,g=this,h=a.indexOf(" ");return h>-1&&(d=n.trim(a.slice(h,a.length)),a=a.slice(0,h)),n.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(e="POST"),g.length>0&&n.ajax({url:a,type:e||"GET",dataType:"html",data:b}).done(function(a){f=arguments,g.html(d?n("<div>").append(n.parseHTML(a)).find(d):a)}).always(c&&function(a,b){g.each(function(){c.apply(this,f||[a.responseText,b,a])})}),this},n.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){n.fn[b]=function(a){return this.on(b,a)}}),n.expr.filters.animated=function(a){return n.grep(n.timers,function(b){return a===b.elem}).length};function mc(a){return n.isWindow(a)?a:9===a.nodeType?a.defaultView||a.parentWindow:!1}n.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=n.css(a,"position"),l=n(a),m={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=n.css(a,"top"),i=n.css(a,"left"),j=("absolute"===k||"fixed"===k)&&n.inArray("auto",[f,i])>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),n.isFunction(b)&&(b=b.call(a,c,n.extend({},h))),null!=b.top&&(m.top=b.top-h.top+g),null!=b.left&&(m.left=b.left-h.left+e),"using"in b?b.using.call(a,m):l.css(m)}},n.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){n.offset.setOffset(this,a,b)});var b,c,d={top:0,left:0},e=this[0],f=e&&e.ownerDocument;if(f)return b=f.documentElement,n.contains(b,e)?("undefined"!=typeof e.getBoundingClientRect&&(d=e.getBoundingClientRect()),c=mc(f),{top:d.top+(c.pageYOffset||b.scrollTop)-(b.clientTop||0),left:d.left+(c.pageXOffset||b.scrollLeft)-(b.clientLeft||0)}):d},position:function(){if(this[0]){var a,b,c={top:0,left:0},d=this[0];return"fixed"===n.css(d,"position")?b=d.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),n.nodeName(a[0],"html")||(c=a.offset()),c.top+=n.css(a[0],"borderTopWidth",!0),c.left+=n.css(a[0],"borderLeftWidth",!0)),{top:b.top-c.top-n.css(d,"marginTop",!0),left:b.left-c.left-n.css(d,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent;while(a&&!n.nodeName(a,"html")&&"static"===n.css(a,"position"))a=a.offsetParent;return a||Qa})}}),n.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c=/Y/.test(b);n.fn[a]=function(d){return Y(this,function(a,d,e){var f=mc(a);return void 0===e?f?b in f?f[b]:f.document.documentElement[d]:a[d]:void(f?f.scrollTo(c?n(f).scrollLeft():e,c?e:n(f).scrollTop()):a[d]=e)},a,d,arguments.length,null)}}),n.each(["top","left"],function(a,b){n.cssHooks[b]=Ua(l.pixelPosition,function(a,c){return c?(c=Sa(a,b),Oa.test(c)?n(a).position()[b]+"px":c):void 0})}),n.each({Height:"height",Width:"width"},function(a,b){n.each({
+padding:"inner"+a,content:b,"":"outer"+a},function(c,d){n.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return Y(this,function(b,c,d){var e;return n.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?n.css(b,c,g):n.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),n.fn.extend({bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)}}),n.fn.size=function(){return this.length},n.fn.andSelf=n.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return n});var nc=a.jQuery,oc=a.$;return n.noConflict=function(b){return a.$===n&&(a.$=oc),b&&a.jQuery===n&&(a.jQuery=nc),n},b||(a.jQuery=a.$=n),n});
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.18.min.css b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.18.min.css
new file mode 100644
index 000000000000..55360f98b511
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.18.min.css
@@ -0,0 +1 @@
+table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px 18px;border-bottom:1px solid #111}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 18px 6px 18px;border-top:1px solid #111}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;*cursor:hand;background-repeat:no-repeat;background-position:center right}table.dataTable thead .sorting{background-image:url("../images/sort_both.png")}table.dataTable thead .sorting_asc{background-image:url("../images/sort_asc.png")}table.dataTable thead .sorting_desc{background-image:url("../images/sort_desc.png")}table.dataTable thead .sorting_asc_disabled{background-image:url("../images/sort_asc_disabled.png")}table.dataTable thead .sorting_desc_disabled{background-image:url("../images/sort_desc_disabled.png")}table.dataTable tbody tr{background-color:#ffffff}table.dataTable tbody tr.selected{background-color:#B0BED9}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid #ddd}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid #ddd;border-right:1px solid #ddd}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid #ddd}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe tbody tr.odd,table.dataTable.display tbody tr.odd{background-color:#f9f9f9}table.dataTable.stripe tbody tr.odd.selected,table.dataTable.display tbody tr.odd.selected{background-color:#acbad4}table.dataTable.hover tbody tr:hover,table.dataTable.display tbody tr:hover{background-color:#f6f6f6}table.dataTable.hover tbody tr:hover.selected,table.dataTable.display tbody tr:hover.selected{background-color:#aab7d1}table.dataTable.order-column tbody tr>.sorting_1,table.dataTable.order-column tbody tr>.sorting_2,table.dataTable.order-column tbody tr>.sorting_3,table.dataTable.display tbody tr>.sorting_1,table.dataTable.display tbody tr>.sorting_2,table.dataTable.display tbody tr>.sorting_3{background-color:#fafafa}table.dataTable.order-column tbody tr.selected>.sorting_1,table.dataTable.order-column tbody tr.selected>.sorting_2,table.dataTable.order-column tbody tr.selected>.sorting_3,table.dataTable.display tbody tr.selected>.sorting_1,table.dataTable.display tbody tr.selected>.sorting_2,table.dataTable.display tbody tr.selected>.sorting_3{background-color:#acbad5}table.dataTable.display tbody tr.odd>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd>.sorting_1{background-color:#f1f1f1}table.dataTable.display tbody tr.odd>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd>.sorting_2{background-color:#f3f3f3}table.dataTable.display tbody tr.odd>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd>.sorting_3{background-color:whitesmoke}table.dataTable.display tbody tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_1{background-color:#a6b4cd}table.dataTable.display tbody tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_2{background-color:#a8b5cf}table.dataTable.display tbody tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_3{background-color:#a9b7d1}table.dataTable.display tbody tr.even>.sorting_1,table.dataTable.order-column.stripe tbody tr.even>.sorting_1{background-color:#fafafa}table.dataTable.display tbody tr.even>.sorting_2,table.dataTable.order-column.stripe tbody tr.even>.sorting_2{background-color:#fcfcfc}table.dataTable.display tbody tr.even>.sorting_3,table.dataTable.order-column.stripe tbody tr.even>.sorting_3{background-color:#fefefe}table.dataTable.display tbody tr.even.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_1{background-color:#acbad5}table.dataTable.display tbody tr.even.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_2{background-color:#aebcd6}table.dataTable.display tbody tr.even.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_3{background-color:#afbdd8}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{background-color:#eaeaea}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{background-color:#ececec}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{background-color:#efefef}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{background-color:#a2aec7}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{background-color:#a3b0c9}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{background-color:#a5b2cb}table.dataTable.no-footer{border-bottom:1px solid #111}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:4px 17px 4px 4px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:4px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable,table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both;*zoom:1;zoom:1}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{margin-left:0.5em}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:0.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:0.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:0.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;*cursor:hand;color:#333 !important;border:1px solid transparent;border-radius:2px}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid #979797;background-color:white;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #fff), color-stop(100%, #dcdcdc));background:-webkit-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-moz-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-ms-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-o-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:linear-gradient(to bottom, #fff 0%, #dcdcdc 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_processing{position:absolute;top:50%;left:50%;width:100%;height:40px;margin-left:-50%;margin-top:-25px;padding-top:20px;text-align:center;font-size:1.2em;background-color:white;background:-webkit-gradient(linear, left top, right top, color-stop(0%, rgba(255,255,255,0)), color-stop(25%, rgba(255,255,255,0.9)), color-stop(75%, rgba(255,255,255,0.9)), color-stop(100%, rgba(255,255,255,0)));background:-webkit-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-moz-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-ms-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-o-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:linear-gradient(to right, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%)}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{*margin-top:-1px;-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid #111}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:0.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:0.5em}}
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.18.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.18.min.js
new file mode 100644
index 000000000000..6bac6da7564e
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.18.min.js
@@ -0,0 +1,166 @@
+/*!
+ DataTables 1.10.18
+ ©2008-2018 SpryMedia Ltd - datatables.net/license
+*/
+(function(h){"function"===typeof define&&define.amd?define(["jquery"],function(E){return h(E,window,document)}):"object"===typeof exports?module.exports=function(E,H){E||(E=window);H||(H="undefined"!==typeof window?require("jquery"):require("jquery")(E));return h(H,E,E.document)}:h(jQuery,window,document)})(function(h,E,H,k){function Z(a){var b,c,d={};h.each(a,function(e){if((b=e.match(/^([^A-Z]+?)([A-Z])/))&&-1!=="a aa ai ao as b fn i m o s ".indexOf(b[1]+" "))c=e.replace(b[0],b[2].toLowerCase()),
+d[c]=e,"o"===b[1]&&Z(a[e])});a._hungarianMap=d}function J(a,b,c){a._hungarianMap||Z(a);var d;h.each(b,function(e){d=a._hungarianMap[e];if(d!==k&&(c||b[d]===k))"o"===d.charAt(0)?(b[d]||(b[d]={}),h.extend(!0,b[d],b[e]),J(a[d],b[d],c)):b[d]=b[e]})}function Ca(a){var b=n.defaults.oLanguage,c=b.sDecimal;c&&Da(c);if(a){var d=a.sZeroRecords;!a.sEmptyTable&&(d&&"No data available in table"===b.sEmptyTable)&&F(a,a,"sZeroRecords","sEmptyTable");!a.sLoadingRecords&&(d&&"Loading..."===b.sLoadingRecords)&&F(a,
+a,"sZeroRecords","sLoadingRecords");a.sInfoThousands&&(a.sThousands=a.sInfoThousands);(a=a.sDecimal)&&c!==a&&Da(a)}}function eb(a){A(a,"ordering","bSort");A(a,"orderMulti","bSortMulti");A(a,"orderClasses","bSortClasses");A(a,"orderCellsTop","bSortCellsTop");A(a,"order","aaSorting");A(a,"orderFixed","aaSortingFixed");A(a,"paging","bPaginate");A(a,"pagingType","sPaginationType");A(a,"pageLength","iDisplayLength");A(a,"searching","bFilter");"boolean"===typeof a.sScrollX&&(a.sScrollX=a.sScrollX?"100%":
+"");"boolean"===typeof a.scrollX&&(a.scrollX=a.scrollX?"100%":"");if(a=a.aoSearchCols)for(var b=0,c=a.length;b<c;b++)a[b]&&J(n.models.oSearch,a[b])}function fb(a){A(a,"orderable","bSortable");A(a,"orderData","aDataSort");A(a,"orderSequence","asSorting");A(a,"orderDataType","sortDataType");var b=a.aDataSort;"number"===typeof b&&!h.isArray(b)&&(a.aDataSort=[b])}function gb(a){if(!n.__browser){var b={};n.__browser=b;var c=h("<div/>").css({position:"fixed",top:0,left:-1*h(E).scrollLeft(),height:1,width:1,
+overflow:"hidden"}).append(h("<div/>").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(h("<div/>").css({width:"100%",height:10}))).appendTo("body"),d=c.children(),e=d.children();b.barWidth=d[0].offsetWidth-d[0].clientWidth;b.bScrollOversize=100===e[0].offsetWidth&&100!==d[0].clientWidth;b.bScrollbarLeft=1!==Math.round(e.offset().left);b.bBounding=c[0].getBoundingClientRect().width?!0:!1;c.remove()}h.extend(a.oBrowser,n.__browser);a.oScroll.iBarWidth=n.__browser.barWidth}
+function hb(a,b,c,d,e,f){var g,j=!1;c!==k&&(g=c,j=!0);for(;d!==e;)a.hasOwnProperty(d)&&(g=j?b(g,a[d],d,a):a[d],j=!0,d+=f);return g}function Ea(a,b){var c=n.defaults.column,d=a.aoColumns.length,c=h.extend({},n.models.oColumn,c,{nTh:b?b:H.createElement("th"),sTitle:c.sTitle?c.sTitle:b?b.innerHTML:"",aDataSort:c.aDataSort?c.aDataSort:[d],mData:c.mData?c.mData:d,idx:d});a.aoColumns.push(c);c=a.aoPreSearchCols;c[d]=h.extend({},n.models.oSearch,c[d]);ka(a,d,h(b).data())}function ka(a,b,c){var b=a.aoColumns[b],
+d=a.oClasses,e=h(b.nTh);if(!b.sWidthOrig){b.sWidthOrig=e.attr("width")||null;var f=(e.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/);f&&(b.sWidthOrig=f[1])}c!==k&&null!==c&&(fb(c),J(n.defaults.column,c),c.mDataProp!==k&&!c.mData&&(c.mData=c.mDataProp),c.sType&&(b._sManualType=c.sType),c.className&&!c.sClass&&(c.sClass=c.className),c.sClass&&e.addClass(c.sClass),h.extend(b,c),F(b,c,"sWidth","sWidthOrig"),c.iDataSort!==k&&(b.aDataSort=[c.iDataSort]),F(b,c,"aDataSort"));var g=b.mData,j=S(g),i=b.mRender?
+S(b.mRender):null,c=function(a){return"string"===typeof a&&-1!==a.indexOf("@")};b._bAttrSrc=h.isPlainObject(g)&&(c(g.sort)||c(g.type)||c(g.filter));b._setter=null;b.fnGetData=function(a,b,c){var d=j(a,b,k,c);return i&&b?i(d,b,a,c):d};b.fnSetData=function(a,b,c){return N(g)(a,b,c)};"number"!==typeof g&&(a._rowReadObject=!0);a.oFeatures.bSort||(b.bSortable=!1,e.addClass(d.sSortableNone));a=-1!==h.inArray("asc",b.asSorting);c=-1!==h.inArray("desc",b.asSorting);!b.bSortable||!a&&!c?(b.sSortingClass=d.sSortableNone,
+b.sSortingClassJUI=""):a&&!c?(b.sSortingClass=d.sSortableAsc,b.sSortingClassJUI=d.sSortJUIAscAllowed):!a&&c?(b.sSortingClass=d.sSortableDesc,b.sSortingClassJUI=d.sSortJUIDescAllowed):(b.sSortingClass=d.sSortable,b.sSortingClassJUI=d.sSortJUI)}function $(a){if(!1!==a.oFeatures.bAutoWidth){var b=a.aoColumns;Fa(a);for(var c=0,d=b.length;c<d;c++)b[c].nTh.style.width=b[c].sWidth}b=a.oScroll;(""!==b.sY||""!==b.sX)&&la(a);r(a,null,"column-sizing",[a])}function aa(a,b){var c=ma(a,"bVisible");return"number"===
+typeof c[b]?c[b]:null}function ba(a,b){var c=ma(a,"bVisible"),c=h.inArray(b,c);return-1!==c?c:null}function V(a){var b=0;h.each(a.aoColumns,function(a,d){d.bVisible&&"none"!==h(d.nTh).css("display")&&b++});return b}function ma(a,b){var c=[];h.map(a.aoColumns,function(a,e){a[b]&&c.push(e)});return c}function Ga(a){var b=a.aoColumns,c=a.aoData,d=n.ext.type.detect,e,f,g,j,i,h,l,q,t;e=0;for(f=b.length;e<f;e++)if(l=b[e],t=[],!l.sType&&l._sManualType)l.sType=l._sManualType;else if(!l.sType){g=0;for(j=d.length;g<
+j;g++){i=0;for(h=c.length;i<h;i++){t[i]===k&&(t[i]=B(a,i,e,"type"));q=d[g](t[i],a);if(!q&&g!==d.length-1)break;if("html"===q)break}if(q){l.sType=q;break}}l.sType||(l.sType="string")}}function ib(a,b,c,d){var e,f,g,j,i,m,l=a.aoColumns;if(b)for(e=b.length-1;0<=e;e--){m=b[e];var q=m.targets!==k?m.targets:m.aTargets;h.isArray(q)||(q=[q]);f=0;for(g=q.length;f<g;f++)if("number"===typeof q[f]&&0<=q[f]){for(;l.length<=q[f];)Ea(a);d(q[f],m)}else if("number"===typeof q[f]&&0>q[f])d(l.length+q[f],m);else if("string"===
+typeof q[f]){j=0;for(i=l.length;j<i;j++)("_all"==q[f]||h(l[j].nTh).hasClass(q[f]))&&d(j,m)}}if(c){e=0;for(a=c.length;e<a;e++)d(e,c[e])}}function O(a,b,c,d){var e=a.aoData.length,f=h.extend(!0,{},n.models.oRow,{src:c?"dom":"data",idx:e});f._aData=b;a.aoData.push(f);for(var g=a.aoColumns,j=0,i=g.length;j<i;j++)g[j].sType=null;a.aiDisplayMaster.push(e);b=a.rowIdFn(b);b!==k&&(a.aIds[b]=f);(c||!a.oFeatures.bDeferRender)&&Ha(a,e,c,d);return e}function na(a,b){var c;b instanceof h||(b=h(b));return b.map(function(b,
+e){c=Ia(a,e);return O(a,c.data,e,c.cells)})}function B(a,b,c,d){var e=a.iDraw,f=a.aoColumns[c],g=a.aoData[b]._aData,j=f.sDefaultContent,i=f.fnGetData(g,d,{settings:a,row:b,col:c});if(i===k)return a.iDrawError!=e&&null===j&&(K(a,0,"Requested unknown parameter "+("function"==typeof f.mData?"{function}":"'"+f.mData+"'")+" for row "+b+", column "+c,4),a.iDrawError=e),j;if((i===g||null===i)&&null!==j&&d!==k)i=j;else if("function"===typeof i)return i.call(g);return null===i&&"display"==d?"":i}function jb(a,
+b,c,d){a.aoColumns[c].fnSetData(a.aoData[b]._aData,d,{settings:a,row:b,col:c})}function Ja(a){return h.map(a.match(/(\\.|[^\.])+/g)||[""],function(a){return a.replace(/\\\./g,".")})}function S(a){if(h.isPlainObject(a)){var b={};h.each(a,function(a,c){c&&(b[a]=S(c))});return function(a,c,f,g){var j=b[c]||b._;return j!==k?j(a,c,f,g):a}}if(null===a)return function(a){return a};if("function"===typeof a)return function(b,c,f,g){return a(b,c,f,g)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||
+-1!==a.indexOf("("))){var c=function(a,b,f){var g,j;if(""!==f){j=Ja(f);for(var i=0,m=j.length;i<m;i++){f=j[i].match(ca);g=j[i].match(W);if(f){j[i]=j[i].replace(ca,"");""!==j[i]&&(a=a[j[i]]);g=[];j.splice(0,i+1);j=j.join(".");if(h.isArray(a)){i=0;for(m=a.length;i<m;i++)g.push(c(a[i],b,j))}a=f[0].substring(1,f[0].length-1);a=""===a?g:g.join(a);break}else if(g){j[i]=j[i].replace(W,"");a=a[j[i]]();continue}if(null===a||a[j[i]]===k)return k;a=a[j[i]]}}return a};return function(b,e){return c(b,e,a)}}return function(b){return b[a]}}
+function N(a){if(h.isPlainObject(a))return N(a._);if(null===a)return function(){};if("function"===typeof a)return function(b,d,e){a(b,"set",d,e)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||-1!==a.indexOf("("))){var b=function(a,d,e){var e=Ja(e),f;f=e[e.length-1];for(var g,j,i=0,m=e.length-1;i<m;i++){g=e[i].match(ca);j=e[i].match(W);if(g){e[i]=e[i].replace(ca,"");a[e[i]]=[];f=e.slice();f.splice(0,i+1);g=f.join(".");if(h.isArray(d)){j=0;for(m=d.length;j<m;j++)f={},b(f,d[j],g),
+a[e[i]].push(f)}else a[e[i]]=d;return}j&&(e[i]=e[i].replace(W,""),a=a[e[i]](d));if(null===a[e[i]]||a[e[i]]===k)a[e[i]]={};a=a[e[i]]}if(f.match(W))a[f.replace(W,"")](d);else a[f.replace(ca,"")]=d};return function(c,d){return b(c,d,a)}}return function(b,d){b[a]=d}}function Ka(a){return D(a.aoData,"_aData")}function oa(a){a.aoData.length=0;a.aiDisplayMaster.length=0;a.aiDisplay.length=0;a.aIds={}}function pa(a,b,c){for(var d=-1,e=0,f=a.length;e<f;e++)a[e]==b?d=e:a[e]>b&&a[e]--; -1!=d&&c===k&&a.splice(d,
+1)}function da(a,b,c,d){var e=a.aoData[b],f,g=function(c,d){for(;c.childNodes.length;)c.removeChild(c.firstChild);c.innerHTML=B(a,b,d,"display")};if("dom"===c||(!c||"auto"===c)&&"dom"===e.src)e._aData=Ia(a,e,d,d===k?k:e._aData).data;else{var j=e.anCells;if(j)if(d!==k)g(j[d],d);else{c=0;for(f=j.length;c<f;c++)g(j[c],c)}}e._aSortData=null;e._aFilterData=null;g=a.aoColumns;if(d!==k)g[d].sType=null;else{c=0;for(f=g.length;c<f;c++)g[c].sType=null;La(a,e)}}function Ia(a,b,c,d){var e=[],f=b.firstChild,g,
+j,i=0,m,l=a.aoColumns,q=a._rowReadObject,d=d!==k?d:q?{}:[],t=function(a,b){if("string"===typeof a){var c=a.indexOf("@");-1!==c&&(c=a.substring(c+1),N(a)(d,b.getAttribute(c)))}},G=function(a){if(c===k||c===i)j=l[i],m=h.trim(a.innerHTML),j&&j._bAttrSrc?(N(j.mData._)(d,m),t(j.mData.sort,a),t(j.mData.type,a),t(j.mData.filter,a)):q?(j._setter||(j._setter=N(j.mData)),j._setter(d,m)):d[i]=m;i++};if(f)for(;f;){g=f.nodeName.toUpperCase();if("TD"==g||"TH"==g)G(f),e.push(f);f=f.nextSibling}else{e=b.anCells;
+f=0;for(g=e.length;f<g;f++)G(e[f])}if(b=b.firstChild?b:b.nTr)(b=b.getAttribute("id"))&&N(a.rowId)(d,b);return{data:d,cells:e}}function Ha(a,b,c,d){var e=a.aoData[b],f=e._aData,g=[],j,i,m,l,q;if(null===e.nTr){j=c||H.createElement("tr");e.nTr=j;e.anCells=g;j._DT_RowIndex=b;La(a,e);l=0;for(q=a.aoColumns.length;l<q;l++){m=a.aoColumns[l];i=c?d[l]:H.createElement(m.sCellType);i._DT_CellIndex={row:b,column:l};g.push(i);if((!c||m.mRender||m.mData!==l)&&(!h.isPlainObject(m.mData)||m.mData._!==l+".display"))i.innerHTML=
+B(a,b,l,"display");m.sClass&&(i.className+=" "+m.sClass);m.bVisible&&!c?j.appendChild(i):!m.bVisible&&c&&i.parentNode.removeChild(i);m.fnCreatedCell&&m.fnCreatedCell.call(a.oInstance,i,B(a,b,l),f,b,l)}r(a,"aoRowCreatedCallback",null,[j,f,b,g])}e.nTr.setAttribute("role","row")}function La(a,b){var c=b.nTr,d=b._aData;if(c){var e=a.rowIdFn(d);e&&(c.id=e);d.DT_RowClass&&(e=d.DT_RowClass.split(" "),b.__rowc=b.__rowc?qa(b.__rowc.concat(e)):e,h(c).removeClass(b.__rowc.join(" ")).addClass(d.DT_RowClass));
+d.DT_RowAttr&&h(c).attr(d.DT_RowAttr);d.DT_RowData&&h(c).data(d.DT_RowData)}}function kb(a){var b,c,d,e,f,g=a.nTHead,j=a.nTFoot,i=0===h("th, td",g).length,m=a.oClasses,l=a.aoColumns;i&&(e=h("<tr/>").appendTo(g));b=0;for(c=l.length;b<c;b++)f=l[b],d=h(f.nTh).addClass(f.sClass),i&&d.appendTo(e),a.oFeatures.bSort&&(d.addClass(f.sSortingClass),!1!==f.bSortable&&(d.attr("tabindex",a.iTabIndex).attr("aria-controls",a.sTableId),Ma(a,f.nTh,b))),f.sTitle!=d[0].innerHTML&&d.html(f.sTitle),Na(a,"header")(a,d,
+f,m);i&&ea(a.aoHeader,g);h(g).find(">tr").attr("role","row");h(g).find(">tr>th, >tr>td").addClass(m.sHeaderTH);h(j).find(">tr>th, >tr>td").addClass(m.sFooterTH);if(null!==j){a=a.aoFooter[0];b=0;for(c=a.length;b<c;b++)f=l[b],f.nTf=a[b].cell,f.sClass&&h(f.nTf).addClass(f.sClass)}}function fa(a,b,c){var d,e,f,g=[],j=[],i=a.aoColumns.length,m;if(b){c===k&&(c=!1);d=0;for(e=b.length;d<e;d++){g[d]=b[d].slice();g[d].nTr=b[d].nTr;for(f=i-1;0<=f;f--)!a.aoColumns[f].bVisible&&!c&&g[d].splice(f,1);j.push([])}d=
+0;for(e=g.length;d<e;d++){if(a=g[d].nTr)for(;f=a.firstChild;)a.removeChild(f);f=0;for(b=g[d].length;f<b;f++)if(m=i=1,j[d][f]===k){a.appendChild(g[d][f].cell);for(j[d][f]=1;g[d+i]!==k&&g[d][f].cell==g[d+i][f].cell;)j[d+i][f]=1,i++;for(;g[d][f+m]!==k&&g[d][f].cell==g[d][f+m].cell;){for(c=0;c<i;c++)j[d+c][f+m]=1;m++}h(g[d][f].cell).attr("rowspan",i).attr("colspan",m)}}}}function P(a){var b=r(a,"aoPreDrawCallback","preDraw",[a]);if(-1!==h.inArray(!1,b))C(a,!1);else{var b=[],c=0,d=a.asStripeClasses,e=
+d.length,f=a.oLanguage,g=a.iInitDisplayStart,j="ssp"==y(a),i=a.aiDisplay;a.bDrawing=!0;g!==k&&-1!==g&&(a._iDisplayStart=j?g:g>=a.fnRecordsDisplay()?0:g,a.iInitDisplayStart=-1);var g=a._iDisplayStart,m=a.fnDisplayEnd();if(a.bDeferLoading)a.bDeferLoading=!1,a.iDraw++,C(a,!1);else if(j){if(!a.bDestroying&&!lb(a))return}else a.iDraw++;if(0!==i.length){f=j?a.aoData.length:m;for(j=j?0:g;j<f;j++){var l=i[j],q=a.aoData[l];null===q.nTr&&Ha(a,l);var t=q.nTr;if(0!==e){var G=d[c%e];q._sRowStripe!=G&&(h(t).removeClass(q._sRowStripe).addClass(G),
+q._sRowStripe=G)}r(a,"aoRowCallback",null,[t,q._aData,c,j,l]);b.push(t);c++}}else c=f.sZeroRecords,1==a.iDraw&&"ajax"==y(a)?c=f.sLoadingRecords:f.sEmptyTable&&0===a.fnRecordsTotal()&&(c=f.sEmptyTable),b[0]=h("<tr/>",{"class":e?d[0]:""}).append(h("<td />",{valign:"top",colSpan:V(a),"class":a.oClasses.sRowEmpty}).html(c))[0];r(a,"aoHeaderCallback","header",[h(a.nTHead).children("tr")[0],Ka(a),g,m,i]);r(a,"aoFooterCallback","footer",[h(a.nTFoot).children("tr")[0],Ka(a),g,m,i]);d=h(a.nTBody);d.children().detach();
+d.append(h(b));r(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function T(a,b){var c=a.oFeatures,d=c.bFilter;c.bSort&&mb(a);d?ga(a,a.oPreviousSearch):a.aiDisplay=a.aiDisplayMaster.slice();!0!==b&&(a._iDisplayStart=0);a._drawHold=b;P(a);a._drawHold=!1}function nb(a){var b=a.oClasses,c=h(a.nTable),c=h("<div/>").insertBefore(c),d=a.oFeatures,e=h("<div/>",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=e[0];a.nTableReinsertBefore=
+a.nTable.nextSibling;for(var f=a.sDom.split(""),g,j,i,m,l,q,k=0;k<f.length;k++){g=null;j=f[k];if("<"==j){i=h("<div/>")[0];m=f[k+1];if("'"==m||'"'==m){l="";for(q=2;f[k+q]!=m;)l+=f[k+q],q++;"H"==l?l=b.sJUIHeader:"F"==l&&(l=b.sJUIFooter);-1!=l.indexOf(".")?(m=l.split("."),i.id=m[0].substr(1,m[0].length-1),i.className=m[1]):"#"==l.charAt(0)?i.id=l.substr(1,l.length-1):i.className=l;k+=q}e.append(i);e=h(i)}else if(">"==j)e=e.parent();else if("l"==j&&d.bPaginate&&d.bLengthChange)g=ob(a);else if("f"==j&&
+d.bFilter)g=pb(a);else if("r"==j&&d.bProcessing)g=qb(a);else if("t"==j)g=rb(a);else if("i"==j&&d.bInfo)g=sb(a);else if("p"==j&&d.bPaginate)g=tb(a);else if(0!==n.ext.feature.length){i=n.ext.feature;q=0;for(m=i.length;q<m;q++)if(j==i[q].cFeature){g=i[q].fnInit(a);break}}g&&(i=a.aanFeatures,i[j]||(i[j]=[]),i[j].push(g),e.append(g))}c.replaceWith(e);a.nHolding=null}function ea(a,b){var c=h(b).children("tr"),d,e,f,g,j,i,m,l,q,k;a.splice(0,a.length);f=0;for(i=c.length;f<i;f++)a.push([]);f=0;for(i=c.length;f<
+i;f++){d=c[f];for(e=d.firstChild;e;){if("TD"==e.nodeName.toUpperCase()||"TH"==e.nodeName.toUpperCase()){l=1*e.getAttribute("colspan");q=1*e.getAttribute("rowspan");l=!l||0===l||1===l?1:l;q=!q||0===q||1===q?1:q;g=0;for(j=a[f];j[g];)g++;m=g;k=1===l?!0:!1;for(j=0;j<l;j++)for(g=0;g<q;g++)a[f+g][m+j]={cell:e,unique:k},a[f+g].nTr=d}e=e.nextSibling}}}function ra(a,b,c){var d=[];c||(c=a.aoHeader,b&&(c=[],ea(c,b)));for(var b=0,e=c.length;b<e;b++)for(var f=0,g=c[b].length;f<g;f++)if(c[b][f].unique&&(!d[f]||
+!a.bSortCellsTop))d[f]=c[b][f].cell;return d}function sa(a,b,c){r(a,"aoServerParams","serverParams",[b]);if(b&&h.isArray(b)){var d={},e=/(.*?)\[\]$/;h.each(b,function(a,b){var c=b.name.match(e);c?(c=c[0],d[c]||(d[c]=[]),d[c].push(b.value)):d[b.name]=b.value});b=d}var f,g=a.ajax,j=a.oInstance,i=function(b){r(a,null,"xhr",[a,b,a.jqXHR]);c(b)};if(h.isPlainObject(g)&&g.data){f=g.data;var m="function"===typeof f?f(b,a):f,b="function"===typeof f&&m?m:h.extend(!0,b,m);delete g.data}m={data:b,success:function(b){var c=
+b.error||b.sError;c&&K(a,0,c);a.json=b;i(b)},dataType:"json",cache:!1,type:a.sServerMethod,error:function(b,c){var d=r(a,null,"xhr",[a,null,a.jqXHR]);-1===h.inArray(!0,d)&&("parsererror"==c?K(a,0,"Invalid JSON response",1):4===b.readyState&&K(a,0,"Ajax error",7));C(a,!1)}};a.oAjaxData=b;r(a,null,"preXhr",[a,b]);a.fnServerData?a.fnServerData.call(j,a.sAjaxSource,h.map(b,function(a,b){return{name:b,value:a}}),i,a):a.sAjaxSource||"string"===typeof g?a.jqXHR=h.ajax(h.extend(m,{url:g||a.sAjaxSource})):
+"function"===typeof g?a.jqXHR=g.call(j,b,i,a):(a.jqXHR=h.ajax(h.extend(m,g)),g.data=f)}function lb(a){return a.bAjaxDataGet?(a.iDraw++,C(a,!0),sa(a,ub(a),function(b){vb(a,b)}),!1):!0}function ub(a){var b=a.aoColumns,c=b.length,d=a.oFeatures,e=a.oPreviousSearch,f=a.aoPreSearchCols,g,j=[],i,m,l,k=X(a);g=a._iDisplayStart;i=!1!==d.bPaginate?a._iDisplayLength:-1;var t=function(a,b){j.push({name:a,value:b})};t("sEcho",a.iDraw);t("iColumns",c);t("sColumns",D(b,"sName").join(","));t("iDisplayStart",g);t("iDisplayLength",
+i);var G={draw:a.iDraw,columns:[],order:[],start:g,length:i,search:{value:e.sSearch,regex:e.bRegex}};for(g=0;g<c;g++)m=b[g],l=f[g],i="function"==typeof m.mData?"function":m.mData,G.columns.push({data:i,name:m.sName,searchable:m.bSearchable,orderable:m.bSortable,search:{value:l.sSearch,regex:l.bRegex}}),t("mDataProp_"+g,i),d.bFilter&&(t("sSearch_"+g,l.sSearch),t("bRegex_"+g,l.bRegex),t("bSearchable_"+g,m.bSearchable)),d.bSort&&t("bSortable_"+g,m.bSortable);d.bFilter&&(t("sSearch",e.sSearch),t("bRegex",
+e.bRegex));d.bSort&&(h.each(k,function(a,b){G.order.push({column:b.col,dir:b.dir});t("iSortCol_"+a,b.col);t("sSortDir_"+a,b.dir)}),t("iSortingCols",k.length));b=n.ext.legacy.ajax;return null===b?a.sAjaxSource?j:G:b?j:G}function vb(a,b){var c=ta(a,b),d=b.sEcho!==k?b.sEcho:b.draw,e=b.iTotalRecords!==k?b.iTotalRecords:b.recordsTotal,f=b.iTotalDisplayRecords!==k?b.iTotalDisplayRecords:b.recordsFiltered;if(d){if(1*d<a.iDraw)return;a.iDraw=1*d}oa(a);a._iRecordsTotal=parseInt(e,10);a._iRecordsDisplay=parseInt(f,
+10);d=0;for(e=c.length;d<e;d++)O(a,c[d]);a.aiDisplay=a.aiDisplayMaster.slice();a.bAjaxDataGet=!1;P(a);a._bInitComplete||ua(a,b);a.bAjaxDataGet=!0;C(a,!1)}function ta(a,b){var c=h.isPlainObject(a.ajax)&&a.ajax.dataSrc!==k?a.ajax.dataSrc:a.sAjaxDataProp;return"data"===c?b.aaData||b[c]:""!==c?S(c)(b):b}function pb(a){var b=a.oClasses,c=a.sTableId,d=a.oLanguage,e=a.oPreviousSearch,f=a.aanFeatures,g='<input type="search" class="'+b.sFilterInput+'"/>',j=d.sSearch,j=j.match(/_INPUT_/)?j.replace("_INPUT_",
+g):j+g,b=h("<div/>",{id:!f.f?c+"_filter":null,"class":b.sFilter}).append(h("<label/>").append(j)),f=function(){var b=!this.value?"":this.value;b!=e.sSearch&&(ga(a,{sSearch:b,bRegex:e.bRegex,bSmart:e.bSmart,bCaseInsensitive:e.bCaseInsensitive}),a._iDisplayStart=0,P(a))},g=null!==a.searchDelay?a.searchDelay:"ssp"===y(a)?400:0,i=h("input",b).val(e.sSearch).attr("placeholder",d.sSearchPlaceholder).on("keyup.DT search.DT input.DT paste.DT cut.DT",g?Oa(f,g):f).on("keypress.DT",function(a){if(13==a.keyCode)return!1}).attr("aria-controls",
+c);h(a.nTable).on("search.dt.DT",function(b,c){if(a===c)try{i[0]!==H.activeElement&&i.val(e.sSearch)}catch(d){}});return b[0]}function ga(a,b,c){var d=a.oPreviousSearch,e=a.aoPreSearchCols,f=function(a){d.sSearch=a.sSearch;d.bRegex=a.bRegex;d.bSmart=a.bSmart;d.bCaseInsensitive=a.bCaseInsensitive};Ga(a);if("ssp"!=y(a)){wb(a,b.sSearch,c,b.bEscapeRegex!==k?!b.bEscapeRegex:b.bRegex,b.bSmart,b.bCaseInsensitive);f(b);for(b=0;b<e.length;b++)xb(a,e[b].sSearch,b,e[b].bEscapeRegex!==k?!e[b].bEscapeRegex:e[b].bRegex,
+e[b].bSmart,e[b].bCaseInsensitive);yb(a)}else f(b);a.bFiltered=!0;r(a,null,"search",[a])}function yb(a){for(var b=n.ext.search,c=a.aiDisplay,d,e,f=0,g=b.length;f<g;f++){for(var j=[],i=0,m=c.length;i<m;i++)e=c[i],d=a.aoData[e],b[f](a,d._aFilterData,e,d._aData,i)&&j.push(e);c.length=0;h.merge(c,j)}}function xb(a,b,c,d,e,f){if(""!==b){for(var g=[],j=a.aiDisplay,d=Pa(b,d,e,f),e=0;e<j.length;e++)b=a.aoData[j[e]]._aFilterData[c],d.test(b)&&g.push(j[e]);a.aiDisplay=g}}function wb(a,b,c,d,e,f){var d=Pa(b,
+d,e,f),f=a.oPreviousSearch.sSearch,g=a.aiDisplayMaster,j,e=[];0!==n.ext.search.length&&(c=!0);j=zb(a);if(0>=b.length)a.aiDisplay=g.slice();else{if(j||c||f.length>b.length||0!==b.indexOf(f)||a.bSorted)a.aiDisplay=g.slice();b=a.aiDisplay;for(c=0;c<b.length;c++)d.test(a.aoData[b[c]]._sFilterRow)&&e.push(b[c]);a.aiDisplay=e}}function Pa(a,b,c,d){a=b?a:Qa(a);c&&(a="^(?=.*?"+h.map(a.match(/"[^"]+"|[^ ]+/g)||[""],function(a){if('"'===a.charAt(0))var b=a.match(/^"(.*)"$/),a=b?b[1]:a;return a.replace('"',
+"")}).join(")(?=.*?")+").*$");return RegExp(a,d?"i":"")}function zb(a){var b=a.aoColumns,c,d,e,f,g,j,i,h,l=n.ext.type.search;c=!1;d=0;for(f=a.aoData.length;d<f;d++)if(h=a.aoData[d],!h._aFilterData){j=[];e=0;for(g=b.length;e<g;e++)c=b[e],c.bSearchable?(i=B(a,d,e,"filter"),l[c.sType]&&(i=l[c.sType](i)),null===i&&(i=""),"string"!==typeof i&&i.toString&&(i=i.toString())):i="",i.indexOf&&-1!==i.indexOf("&")&&(va.innerHTML=i,i=Wb?va.textContent:va.innerText),i.replace&&(i=i.replace(/[\r\n]/g,"")),j.push(i);
+h._aFilterData=j;h._sFilterRow=j.join("  ");c=!0}return c}function Ab(a){return{search:a.sSearch,smart:a.bSmart,regex:a.bRegex,caseInsensitive:a.bCaseInsensitive}}function Bb(a){return{sSearch:a.search,bSmart:a.smart,bRegex:a.regex,bCaseInsensitive:a.caseInsensitive}}function sb(a){var b=a.sTableId,c=a.aanFeatures.i,d=h("<div/>",{"class":a.oClasses.sInfo,id:!c?b+"_info":null});c||(a.aoDrawCallback.push({fn:Cb,sName:"information"}),d.attr("role","status").attr("aria-live","polite"),h(a.nTable).attr("aria-describedby",
+b+"_info"));return d[0]}function Cb(a){var b=a.aanFeatures.i;if(0!==b.length){var c=a.oLanguage,d=a._iDisplayStart+1,e=a.fnDisplayEnd(),f=a.fnRecordsTotal(),g=a.fnRecordsDisplay(),j=g?c.sInfo:c.sInfoEmpty;g!==f&&(j+=" "+c.sInfoFiltered);j+=c.sInfoPostFix;j=Db(a,j);c=c.fnInfoCallback;null!==c&&(j=c.call(a.oInstance,a,d,e,f,g,j));h(b).html(j)}}function Db(a,b){var c=a.fnFormatNumber,d=a._iDisplayStart+1,e=a._iDisplayLength,f=a.fnRecordsDisplay(),g=-1===e;return b.replace(/_START_/g,c.call(a,d)).replace(/_END_/g,
+c.call(a,a.fnDisplayEnd())).replace(/_MAX_/g,c.call(a,a.fnRecordsTotal())).replace(/_TOTAL_/g,c.call(a,f)).replace(/_PAGE_/g,c.call(a,g?1:Math.ceil(d/e))).replace(/_PAGES_/g,c.call(a,g?1:Math.ceil(f/e)))}function ha(a){var b,c,d=a.iInitDisplayStart,e=a.aoColumns,f;c=a.oFeatures;var g=a.bDeferLoading;if(a.bInitialised){nb(a);kb(a);fa(a,a.aoHeader);fa(a,a.aoFooter);C(a,!0);c.bAutoWidth&&Fa(a);b=0;for(c=e.length;b<c;b++)f=e[b],f.sWidth&&(f.nTh.style.width=v(f.sWidth));r(a,null,"preInit",[a]);T(a);e=
+y(a);if("ssp"!=e||g)"ajax"==e?sa(a,[],function(c){var f=ta(a,c);for(b=0;b<f.length;b++)O(a,f[b]);a.iInitDisplayStart=d;T(a);C(a,!1);ua(a,c)},a):(C(a,!1),ua(a))}else setTimeout(function(){ha(a)},200)}function ua(a,b){a._bInitComplete=!0;(b||a.oInit.aaData)&&$(a);r(a,null,"plugin-init",[a,b]);r(a,"aoInitComplete","init",[a,b])}function Ra(a,b){var c=parseInt(b,10);a._iDisplayLength=c;Sa(a);r(a,null,"length",[a,c])}function ob(a){for(var b=a.oClasses,c=a.sTableId,d=a.aLengthMenu,e=h.isArray(d[0]),f=
+e?d[0]:d,d=e?d[1]:d,e=h("<select/>",{name:c+"_length","aria-controls":c,"class":b.sLengthSelect}),g=0,j=f.length;g<j;g++)e[0][g]=new Option("number"===typeof d[g]?a.fnFormatNumber(d[g]):d[g],f[g]);var i=h("<div><label/></div>").addClass(b.sLength);a.aanFeatures.l||(i[0].id=c+"_length");i.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",e[0].outerHTML));h("select",i).val(a._iDisplayLength).on("change.DT",function(){Ra(a,h(this).val());P(a)});h(a.nTable).on("length.dt.DT",function(b,c,d){a===
+c&&h("select",i).val(d)});return i[0]}function tb(a){var b=a.sPaginationType,c=n.ext.pager[b],d="function"===typeof c,e=function(a){P(a)},b=h("<div/>").addClass(a.oClasses.sPaging+b)[0],f=a.aanFeatures;d||c.fnInit(a,b,e);f.p||(b.id=a.sTableId+"_paginate",a.aoDrawCallback.push({fn:function(a){if(d){var b=a._iDisplayStart,i=a._iDisplayLength,h=a.fnRecordsDisplay(),l=-1===i,b=l?0:Math.ceil(b/i),i=l?1:Math.ceil(h/i),h=c(b,i),k,l=0;for(k=f.p.length;l<k;l++)Na(a,"pageButton")(a,f.p[l],l,h,b,i)}else c.fnUpdate(a,
+e)},sName:"pagination"}));return b}function Ta(a,b,c){var d=a._iDisplayStart,e=a._iDisplayLength,f=a.fnRecordsDisplay();0===f||-1===e?d=0:"number"===typeof b?(d=b*e,d>f&&(d=0)):"first"==b?d=0:"previous"==b?(d=0<=e?d-e:0,0>d&&(d=0)):"next"==b?d+e<f&&(d+=e):"last"==b?d=Math.floor((f-1)/e)*e:K(a,0,"Unknown paging action: "+b,5);b=a._iDisplayStart!==d;a._iDisplayStart=d;b&&(r(a,null,"page",[a]),c&&P(a));return b}function qb(a){return h("<div/>",{id:!a.aanFeatures.r?a.sTableId+"_processing":null,"class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).insertBefore(a.nTable)[0]}
+function C(a,b){a.oFeatures.bProcessing&&h(a.aanFeatures.r).css("display",b?"block":"none");r(a,null,"processing",[a,b])}function rb(a){var b=h(a.nTable);b.attr("role","grid");var c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var d=c.sX,e=c.sY,f=a.oClasses,g=b.children("caption"),j=g.length?g[0]._captionSide:null,i=h(b[0].cloneNode(!1)),m=h(b[0].cloneNode(!1)),l=b.children("tfoot");l.length||(l=null);i=h("<div/>",{"class":f.sScrollWrapper}).append(h("<div/>",{"class":f.sScrollHead}).css({overflow:"hidden",
+position:"relative",border:0,width:d?!d?null:v(d):"100%"}).append(h("<div/>",{"class":f.sScrollHeadInner}).css({"box-sizing":"content-box",width:c.sXInner||"100%"}).append(i.removeAttr("id").css("margin-left",0).append("top"===j?g:null).append(b.children("thead"))))).append(h("<div/>",{"class":f.sScrollBody}).css({position:"relative",overflow:"auto",width:!d?null:v(d)}).append(b));l&&i.append(h("<div/>",{"class":f.sScrollFoot}).css({overflow:"hidden",border:0,width:d?!d?null:v(d):"100%"}).append(h("<div/>",
+{"class":f.sScrollFootInner}).append(m.removeAttr("id").css("margin-left",0).append("bottom"===j?g:null).append(b.children("tfoot")))));var b=i.children(),k=b[0],f=b[1],t=l?b[2]:null;if(d)h(f).on("scroll.DT",function(){var a=this.scrollLeft;k.scrollLeft=a;l&&(t.scrollLeft=a)});h(f).css(e&&c.bCollapse?"max-height":"height",e);a.nScrollHead=k;a.nScrollBody=f;a.nScrollFoot=t;a.aoDrawCallback.push({fn:la,sName:"scrolling"});return i[0]}function la(a){var b=a.oScroll,c=b.sX,d=b.sXInner,e=b.sY,b=b.iBarWidth,
+f=h(a.nScrollHead),g=f[0].style,j=f.children("div"),i=j[0].style,m=j.children("table"),j=a.nScrollBody,l=h(j),q=j.style,t=h(a.nScrollFoot).children("div"),n=t.children("table"),o=h(a.nTHead),p=h(a.nTable),s=p[0],r=s.style,u=a.nTFoot?h(a.nTFoot):null,x=a.oBrowser,U=x.bScrollOversize,Xb=D(a.aoColumns,"nTh"),Q,L,R,w,Ua=[],y=[],z=[],A=[],B,C=function(a){a=a.style;a.paddingTop="0";a.paddingBottom="0";a.borderTopWidth="0";a.borderBottomWidth="0";a.height=0};L=j.scrollHeight>j.clientHeight;if(a.scrollBarVis!==
+L&&a.scrollBarVis!==k)a.scrollBarVis=L,$(a);else{a.scrollBarVis=L;p.children("thead, tfoot").remove();u&&(R=u.clone().prependTo(p),Q=u.find("tr"),R=R.find("tr"));w=o.clone().prependTo(p);o=o.find("tr");L=w.find("tr");w.find("th, td").removeAttr("tabindex");c||(q.width="100%",f[0].style.width="100%");h.each(ra(a,w),function(b,c){B=aa(a,b);c.style.width=a.aoColumns[B].sWidth});u&&I(function(a){a.style.width=""},R);f=p.outerWidth();if(""===c){r.width="100%";if(U&&(p.find("tbody").height()>j.offsetHeight||
+"scroll"==l.css("overflow-y")))r.width=v(p.outerWidth()-b);f=p.outerWidth()}else""!==d&&(r.width=v(d),f=p.outerWidth());I(C,L);I(function(a){z.push(a.innerHTML);Ua.push(v(h(a).css("width")))},L);I(function(a,b){if(h.inArray(a,Xb)!==-1)a.style.width=Ua[b]},o);h(L).height(0);u&&(I(C,R),I(function(a){A.push(a.innerHTML);y.push(v(h(a).css("width")))},R),I(function(a,b){a.style.width=y[b]},Q),h(R).height(0));I(function(a,b){a.innerHTML='<div class="dataTables_sizing">'+z[b]+"</div>";a.childNodes[0].style.height=
+"0";a.childNodes[0].style.overflow="hidden";a.style.width=Ua[b]},L);u&&I(function(a,b){a.innerHTML='<div class="dataTables_sizing">'+A[b]+"</div>";a.childNodes[0].style.height="0";a.childNodes[0].style.overflow="hidden";a.style.width=y[b]},R);if(p.outerWidth()<f){Q=j.scrollHeight>j.offsetHeight||"scroll"==l.css("overflow-y")?f+b:f;if(U&&(j.scrollHeight>j.offsetHeight||"scroll"==l.css("overflow-y")))r.width=v(Q-b);(""===c||""!==d)&&K(a,1,"Possible column misalignment",6)}else Q="100%";q.width=v(Q);
+g.width=v(Q);u&&(a.nScrollFoot.style.width=v(Q));!e&&U&&(q.height=v(s.offsetHeight+b));c=p.outerWidth();m[0].style.width=v(c);i.width=v(c);d=p.height()>j.clientHeight||"scroll"==l.css("overflow-y");e="padding"+(x.bScrollbarLeft?"Left":"Right");i[e]=d?b+"px":"0px";u&&(n[0].style.width=v(c),t[0].style.width=v(c),t[0].style[e]=d?b+"px":"0px");p.children("colgroup").insertBefore(p.children("thead"));l.scroll();if((a.bSorted||a.bFiltered)&&!a._drawHold)j.scrollTop=0}}function I(a,b,c){for(var d=0,e=0,
+f=b.length,g,j;e<f;){g=b[e].firstChild;for(j=c?c[e].firstChild:null;g;)1===g.nodeType&&(c?a(g,j,d):a(g,d),d++),g=g.nextSibling,j=c?j.nextSibling:null;e++}}function Fa(a){var b=a.nTable,c=a.aoColumns,d=a.oScroll,e=d.sY,f=d.sX,g=d.sXInner,j=c.length,i=ma(a,"bVisible"),m=h("th",a.nTHead),l=b.getAttribute("width"),k=b.parentNode,t=!1,n,o,p=a.oBrowser,d=p.bScrollOversize;(n=b.style.width)&&-1!==n.indexOf("%")&&(l=n);for(n=0;n<i.length;n++)o=c[i[n]],null!==o.sWidth&&(o.sWidth=Eb(o.sWidthOrig,k),t=!0);if(d||
+!t&&!f&&!e&&j==V(a)&&j==m.length)for(n=0;n<j;n++)i=aa(a,n),null!==i&&(c[i].sWidth=v(m.eq(n).width()));else{j=h(b).clone().css("visibility","hidden").removeAttr("id");j.find("tbody tr").remove();var s=h("<tr/>").appendTo(j.find("tbody"));j.find("thead, tfoot").remove();j.append(h(a.nTHead).clone()).append(h(a.nTFoot).clone());j.find("tfoot th, tfoot td").css("width","");m=ra(a,j.find("thead")[0]);for(n=0;n<i.length;n++)o=c[i[n]],m[n].style.width=null!==o.sWidthOrig&&""!==o.sWidthOrig?v(o.sWidthOrig):
+"",o.sWidthOrig&&f&&h(m[n]).append(h("<div/>").css({width:o.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(a.aoData.length)for(n=0;n<i.length;n++)t=i[n],o=c[t],h(Fb(a,t)).clone(!1).append(o.sContentPadding).appendTo(s);h("[name]",j).removeAttr("name");o=h("<div/>").css(f||e?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(j).appendTo(k);f&&g?j.width(g):f?(j.css("width","auto"),j.removeAttr("width"),j.width()<k.clientWidth&&l&&j.width(k.clientWidth)):e?j.width(k.clientWidth):
+l&&j.width(l);for(n=e=0;n<i.length;n++)k=h(m[n]),g=k.outerWidth()-k.width(),k=p.bBounding?Math.ceil(m[n].getBoundingClientRect().width):k.outerWidth(),e+=k,c[i[n]].sWidth=v(k-g);b.style.width=v(e);o.remove()}l&&(b.style.width=v(l));if((l||f)&&!a._reszEvt)b=function(){h(E).on("resize.DT-"+a.sInstance,Oa(function(){$(a)}))},d?setTimeout(b,1E3):b(),a._reszEvt=!0}function Eb(a,b){if(!a)return 0;var c=h("<div/>").css("width",v(a)).appendTo(b||H.body),d=c[0].offsetWidth;c.remove();return d}function Fb(a,
+b){var c=Gb(a,b);if(0>c)return null;var d=a.aoData[c];return!d.nTr?h("<td/>").html(B(a,c,b,"display"))[0]:d.anCells[b]}function Gb(a,b){for(var c,d=-1,e=-1,f=0,g=a.aoData.length;f<g;f++)c=B(a,f,b,"display")+"",c=c.replace(Yb,""),c=c.replace(/&nbsp;/g," "),c.length>d&&(d=c.length,e=f);return e}function v(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function X(a){var b,c,d=[],e=a.aoColumns,f,g,j,i;b=a.aaSortingFixed;c=h.isPlainObject(b);var m=[];f=function(a){a.length&&
+!h.isArray(a[0])?m.push(a):h.merge(m,a)};h.isArray(b)&&f(b);c&&b.pre&&f(b.pre);f(a.aaSorting);c&&b.post&&f(b.post);for(a=0;a<m.length;a++){i=m[a][0];f=e[i].aDataSort;b=0;for(c=f.length;b<c;b++)g=f[b],j=e[g].sType||"string",m[a]._idx===k&&(m[a]._idx=h.inArray(m[a][1],e[g].asSorting)),d.push({src:i,col:g,dir:m[a][1],index:m[a]._idx,type:j,formatter:n.ext.type.order[j+"-pre"]})}return d}function mb(a){var b,c,d=[],e=n.ext.type.order,f=a.aoData,g=0,j,i=a.aiDisplayMaster,h;Ga(a);h=X(a);b=0;for(c=h.length;b<
+c;b++)j=h[b],j.formatter&&g++,Hb(a,j.col);if("ssp"!=y(a)&&0!==h.length){b=0;for(c=i.length;b<c;b++)d[i[b]]=b;g===h.length?i.sort(function(a,b){var c,e,g,j,i=h.length,k=f[a]._aSortData,n=f[b]._aSortData;for(g=0;g<i;g++)if(j=h[g],c=k[j.col],e=n[j.col],c=c<e?-1:c>e?1:0,0!==c)return"asc"===j.dir?c:-c;c=d[a];e=d[b];return c<e?-1:c>e?1:0}):i.sort(function(a,b){var c,g,j,i,k=h.length,n=f[a]._aSortData,o=f[b]._aSortData;for(j=0;j<k;j++)if(i=h[j],c=n[i.col],g=o[i.col],i=e[i.type+"-"+i.dir]||e["string-"+i.dir],
+c=i(c,g),0!==c)return c;c=d[a];g=d[b];return c<g?-1:c>g?1:0})}a.bSorted=!0}function Ib(a){for(var b,c,d=a.aoColumns,e=X(a),a=a.oLanguage.oAria,f=0,g=d.length;f<g;f++){c=d[f];var j=c.asSorting;b=c.sTitle.replace(/<.*?>/g,"");var i=c.nTh;i.removeAttribute("aria-sort");c.bSortable&&(0<e.length&&e[0].col==f?(i.setAttribute("aria-sort","asc"==e[0].dir?"ascending":"descending"),c=j[e[0].index+1]||j[0]):c=j[0],b+="asc"===c?a.sSortAscending:a.sSortDescending);i.setAttribute("aria-label",b)}}function Va(a,
+b,c,d){var e=a.aaSorting,f=a.aoColumns[b].asSorting,g=function(a,b){var c=a._idx;c===k&&(c=h.inArray(a[1],f));return c+1<f.length?c+1:b?null:0};"number"===typeof e[0]&&(e=a.aaSorting=[e]);c&&a.oFeatures.bSortMulti?(c=h.inArray(b,D(e,"0")),-1!==c?(b=g(e[c],!0),null===b&&1===e.length&&(b=0),null===b?e.splice(c,1):(e[c][1]=f[b],e[c]._idx=b)):(e.push([b,f[0],0]),e[e.length-1]._idx=0)):e.length&&e[0][0]==b?(b=g(e[0]),e.length=1,e[0][1]=f[b],e[0]._idx=b):(e.length=0,e.push([b,f[0]]),e[0]._idx=0);T(a);"function"==
+typeof d&&d(a)}function Ma(a,b,c,d){var e=a.aoColumns[c];Wa(b,{},function(b){!1!==e.bSortable&&(a.oFeatures.bProcessing?(C(a,!0),setTimeout(function(){Va(a,c,b.shiftKey,d);"ssp"!==y(a)&&C(a,!1)},0)):Va(a,c,b.shiftKey,d))})}function wa(a){var b=a.aLastSort,c=a.oClasses.sSortColumn,d=X(a),e=a.oFeatures,f,g;if(e.bSort&&e.bSortClasses){e=0;for(f=b.length;e<f;e++)g=b[e].src,h(D(a.aoData,"anCells",g)).removeClass(c+(2>e?e+1:3));e=0;for(f=d.length;e<f;e++)g=d[e].src,h(D(a.aoData,"anCells",g)).addClass(c+
+(2>e?e+1:3))}a.aLastSort=d}function Hb(a,b){var c=a.aoColumns[b],d=n.ext.order[c.sSortDataType],e;d&&(e=d.call(a.oInstance,a,b,ba(a,b)));for(var f,g=n.ext.type.order[c.sType+"-pre"],j=0,i=a.aoData.length;j<i;j++)if(c=a.aoData[j],c._aSortData||(c._aSortData=[]),!c._aSortData[b]||d)f=d?e[j]:B(a,j,b,"sort"),c._aSortData[b]=g?g(f):f}function xa(a){if(a.oFeatures.bStateSave&&!a.bDestroying){var b={time:+new Date,start:a._iDisplayStart,length:a._iDisplayLength,order:h.extend(!0,[],a.aaSorting),search:Ab(a.oPreviousSearch),
+columns:h.map(a.aoColumns,function(b,d){return{visible:b.bVisible,search:Ab(a.aoPreSearchCols[d])}})};r(a,"aoStateSaveParams","stateSaveParams",[a,b]);a.oSavedState=b;a.fnStateSaveCallback.call(a.oInstance,a,b)}}function Jb(a,b,c){var d,e,f=a.aoColumns,b=function(b){if(b&&b.time){var g=r(a,"aoStateLoadParams","stateLoadParams",[a,b]);if(-1===h.inArray(!1,g)&&(g=a.iStateDuration,!(0<g&&b.time<+new Date-1E3*g)&&!(b.columns&&f.length!==b.columns.length))){a.oLoadedState=h.extend(!0,{},b);b.start!==k&&
+(a._iDisplayStart=b.start,a.iInitDisplayStart=b.start);b.length!==k&&(a._iDisplayLength=b.length);b.order!==k&&(a.aaSorting=[],h.each(b.order,function(b,c){a.aaSorting.push(c[0]>=f.length?[0,c[1]]:c)}));b.search!==k&&h.extend(a.oPreviousSearch,Bb(b.search));if(b.columns){d=0;for(e=b.columns.length;d<e;d++)g=b.columns[d],g.visible!==k&&(f[d].bVisible=g.visible),g.search!==k&&h.extend(a.aoPreSearchCols[d],Bb(g.search))}r(a,"aoStateLoaded","stateLoaded",[a,b])}}c()};if(a.oFeatures.bStateSave){var g=
+a.fnStateLoadCallback.call(a.oInstance,a,b);g!==k&&b(g)}else c()}function ya(a){var b=n.settings,a=h.inArray(a,D(b,"nTable"));return-1!==a?b[a]:null}function K(a,b,c,d){c="DataTables warning: "+(a?"table id="+a.sTableId+" - ":"")+c;d&&(c+=". For more information about this error, please see http://datatables.net/tn/"+d);if(b)E.console&&console.log&&console.log(c);else if(b=n.ext,b=b.sErrMode||b.errMode,a&&r(a,null,"error",[a,d,c]),"alert"==b)alert(c);else{if("throw"==b)throw Error(c);"function"==
+typeof b&&b(a,d,c)}}function F(a,b,c,d){h.isArray(c)?h.each(c,function(c,d){h.isArray(d)?F(a,b,d[0],d[1]):F(a,b,d)}):(d===k&&(d=c),b[c]!==k&&(a[d]=b[c]))}function Xa(a,b,c){var d,e;for(e in b)b.hasOwnProperty(e)&&(d=b[e],h.isPlainObject(d)?(h.isPlainObject(a[e])||(a[e]={}),h.extend(!0,a[e],d)):a[e]=c&&"data"!==e&&"aaData"!==e&&h.isArray(d)?d.slice():d);return a}function Wa(a,b,c){h(a).on("click.DT",b,function(b){h(a).blur();c(b)}).on("keypress.DT",b,function(a){13===a.which&&(a.preventDefault(),c(a))}).on("selectstart.DT",
+function(){return!1})}function z(a,b,c,d){c&&a[b].push({fn:c,sName:d})}function r(a,b,c,d){var e=[];b&&(e=h.map(a[b].slice().reverse(),function(b){return b.fn.apply(a.oInstance,d)}));null!==c&&(b=h.Event(c+".dt"),h(a.nTable).trigger(b,d),e.push(b.result));return e}function Sa(a){var b=a._iDisplayStart,c=a.fnDisplayEnd(),d=a._iDisplayLength;b>=c&&(b=c-d);b-=b%d;if(-1===d||0>b)b=0;a._iDisplayStart=b}function Na(a,b){var c=a.renderer,d=n.ext.renderer[b];return h.isPlainObject(c)&&c[b]?d[c[b]]||d._:"string"===
+typeof c?d[c]||d._:d._}function y(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function ia(a,b){var c=[],c=Kb.numbers_length,d=Math.floor(c/2);b<=c?c=Y(0,b):a<=d?(c=Y(0,c-2),c.push("ellipsis"),c.push(b-1)):(a>=b-1-d?c=Y(b-(c-2),b):(c=Y(a-d+2,a+d-1),c.push("ellipsis"),c.push(b-1)),c.splice(0,0,"ellipsis"),c.splice(0,0,0));c.DT_el="span";return c}function Da(a){h.each({num:function(b){return za(b,a)},"num-fmt":function(b){return za(b,a,Ya)},"html-num":function(b){return za(b,
+a,Aa)},"html-num-fmt":function(b){return za(b,a,Aa,Ya)}},function(b,c){x.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(x.type.search[b+a]=x.type.search.html)})}function Lb(a){return function(){var b=[ya(this[n.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return n.ext.internal[a].apply(this,b)}}var n=function(a){this.$=function(a,b){return this.api(!0).$(a,b)};this._=function(a,b){return this.api(!0).rows(a,b).data()};this.api=function(a){return a?new s(ya(this[x.iApiIndex])):new s(this)};
+this.fnAddData=function(a,b){var c=this.api(!0),d=h.isArray(a)&&(h.isArray(a[0])||h.isPlainObject(a[0]))?c.rows.add(a):c.row.add(a);(b===k||b)&&c.draw();return d.flatten().toArray()};this.fnAdjustColumnSizing=function(a){var b=this.api(!0).columns.adjust(),c=b.settings()[0],d=c.oScroll;a===k||a?b.draw(!1):(""!==d.sX||""!==d.sY)&&la(c)};this.fnClearTable=function(a){var b=this.api(!0).clear();(a===k||a)&&b.draw()};this.fnClose=function(a){this.api(!0).row(a).child.hide()};this.fnDeleteRow=function(a,
+b,c){var d=this.api(!0),a=d.rows(a),e=a.settings()[0],h=e.aoData[a[0][0]];a.remove();b&&b.call(this,e,h);(c===k||c)&&d.draw();return h};this.fnDestroy=function(a){this.api(!0).destroy(a)};this.fnDraw=function(a){this.api(!0).draw(a)};this.fnFilter=function(a,b,c,d,e,h){e=this.api(!0);null===b||b===k?e.search(a,c,d,h):e.column(b).search(a,c,d,h);e.draw()};this.fnGetData=function(a,b){var c=this.api(!0);if(a!==k){var d=a.nodeName?a.nodeName.toLowerCase():"";return b!==k||"td"==d||"th"==d?c.cell(a,b).data():
+c.row(a).data()||null}return c.data().toArray()};this.fnGetNodes=function(a){var b=this.api(!0);return a!==k?b.row(a).node():b.rows().nodes().flatten().toArray()};this.fnGetPosition=function(a){var b=this.api(!0),c=a.nodeName.toUpperCase();return"TR"==c?b.row(a).index():"TD"==c||"TH"==c?(a=b.cell(a).index(),[a.row,a.columnVisible,a.column]):null};this.fnIsOpen=function(a){return this.api(!0).row(a).child.isShown()};this.fnOpen=function(a,b,c){return this.api(!0).row(a).child(b,c).show().child()[0]};
+this.fnPageChange=function(a,b){var c=this.api(!0).page(a);(b===k||b)&&c.draw(!1)};this.fnSetColumnVis=function(a,b,c){a=this.api(!0).column(a).visible(b);(c===k||c)&&a.columns.adjust().draw()};this.fnSettings=function(){return ya(this[x.iApiIndex])};this.fnSort=function(a){this.api(!0).order(a).draw()};this.fnSortListener=function(a,b,c){this.api(!0).order.listener(a,b,c)};this.fnUpdate=function(a,b,c,d,e){var h=this.api(!0);c===k||null===c?h.row(b).data(a):h.cell(b,c).data(a);(e===k||e)&&h.columns.adjust();
+(d===k||d)&&h.draw();return 0};this.fnVersionCheck=x.fnVersionCheck;var b=this,c=a===k,d=this.length;c&&(a={});this.oApi=this.internal=x.internal;for(var e in n.ext.internal)e&&(this[e]=Lb(e));this.each(function(){var e={},g=1<d?Xa(e,a,!0):a,j=0,i,e=this.getAttribute("id"),m=!1,l=n.defaults,q=h(this);if("table"!=this.nodeName.toLowerCase())K(null,0,"Non-table node initialisation ("+this.nodeName+")",2);else{eb(l);fb(l.column);J(l,l,!0);J(l.column,l.column,!0);J(l,h.extend(g,q.data()));var t=n.settings,
+j=0;for(i=t.length;j<i;j++){var o=t[j];if(o.nTable==this||o.nTHead&&o.nTHead.parentNode==this||o.nTFoot&&o.nTFoot.parentNode==this){var s=g.bRetrieve!==k?g.bRetrieve:l.bRetrieve;if(c||s)return o.oInstance;if(g.bDestroy!==k?g.bDestroy:l.bDestroy){o.oInstance.fnDestroy();break}else{K(o,0,"Cannot reinitialise DataTable",3);return}}if(o.sTableId==this.id){t.splice(j,1);break}}if(null===e||""===e)this.id=e="DataTables_Table_"+n.ext._unique++;var p=h.extend(!0,{},n.models.oSettings,{sDestroyWidth:q[0].style.width,
+sInstance:e,sTableId:e});p.nTable=this;p.oApi=b.internal;p.oInit=g;t.push(p);p.oInstance=1===b.length?b:q.dataTable();eb(g);Ca(g.oLanguage);g.aLengthMenu&&!g.iDisplayLength&&(g.iDisplayLength=h.isArray(g.aLengthMenu[0])?g.aLengthMenu[0][0]:g.aLengthMenu[0]);g=Xa(h.extend(!0,{},l),g);F(p.oFeatures,g,"bPaginate bLengthChange bFilter bSort bSortMulti bInfo bProcessing bAutoWidth bSortClasses bServerSide bDeferRender".split(" "));F(p,g,["asStripeClasses","ajax","fnServerData","fnFormatNumber","sServerMethod",
+"aaSorting","aaSortingFixed","aLengthMenu","sPaginationType","sAjaxSource","sAjaxDataProp","iStateDuration","sDom","bSortCellsTop","iTabIndex","fnStateLoadCallback","fnStateSaveCallback","renderer","searchDelay","rowId",["iCookieDuration","iStateDuration"],["oSearch","oPreviousSearch"],["aoSearchCols","aoPreSearchCols"],["iDisplayLength","_iDisplayLength"]]);F(p.oScroll,g,[["sScrollX","sX"],["sScrollXInner","sXInner"],["sScrollY","sY"],["bScrollCollapse","bCollapse"]]);F(p.oLanguage,g,"fnInfoCallback");
+z(p,"aoDrawCallback",g.fnDrawCallback,"user");z(p,"aoServerParams",g.fnServerParams,"user");z(p,"aoStateSaveParams",g.fnStateSaveParams,"user");z(p,"aoStateLoadParams",g.fnStateLoadParams,"user");z(p,"aoStateLoaded",g.fnStateLoaded,"user");z(p,"aoRowCallback",g.fnRowCallback,"user");z(p,"aoRowCreatedCallback",g.fnCreatedRow,"user");z(p,"aoHeaderCallback",g.fnHeaderCallback,"user");z(p,"aoFooterCallback",g.fnFooterCallback,"user");z(p,"aoInitComplete",g.fnInitComplete,"user");z(p,"aoPreDrawCallback",
+g.fnPreDrawCallback,"user");p.rowIdFn=S(g.rowId);gb(p);var u=p.oClasses;h.extend(u,n.ext.classes,g.oClasses);q.addClass(u.sTable);p.iInitDisplayStart===k&&(p.iInitDisplayStart=g.iDisplayStart,p._iDisplayStart=g.iDisplayStart);null!==g.iDeferLoading&&(p.bDeferLoading=!0,e=h.isArray(g.iDeferLoading),p._iRecordsDisplay=e?g.iDeferLoading[0]:g.iDeferLoading,p._iRecordsTotal=e?g.iDeferLoading[1]:g.iDeferLoading);var v=p.oLanguage;h.extend(!0,v,g.oLanguage);v.sUrl&&(h.ajax({dataType:"json",url:v.sUrl,success:function(a){Ca(a);
+J(l.oLanguage,a);h.extend(true,v,a);ha(p)},error:function(){ha(p)}}),m=!0);null===g.asStripeClasses&&(p.asStripeClasses=[u.sStripeOdd,u.sStripeEven]);var e=p.asStripeClasses,x=q.children("tbody").find("tr").eq(0);-1!==h.inArray(!0,h.map(e,function(a){return x.hasClass(a)}))&&(h("tbody tr",this).removeClass(e.join(" ")),p.asDestroyStripes=e.slice());e=[];t=this.getElementsByTagName("thead");0!==t.length&&(ea(p.aoHeader,t[0]),e=ra(p));if(null===g.aoColumns){t=[];j=0;for(i=e.length;j<i;j++)t.push(null)}else t=
+g.aoColumns;j=0;for(i=t.length;j<i;j++)Ea(p,e?e[j]:null);ib(p,g.aoColumnDefs,t,function(a,b){ka(p,a,b)});if(x.length){var w=function(a,b){return a.getAttribute("data-"+b)!==null?b:null};h(x[0]).children("th, td").each(function(a,b){var c=p.aoColumns[a];if(c.mData===a){var d=w(b,"sort")||w(b,"order"),e=w(b,"filter")||w(b,"search");if(d!==null||e!==null){c.mData={_:a+".display",sort:d!==null?a+".@data-"+d:k,type:d!==null?a+".@data-"+d:k,filter:e!==null?a+".@data-"+e:k};ka(p,a)}}})}var U=p.oFeatures,
+e=function(){if(g.aaSorting===k){var a=p.aaSorting;j=0;for(i=a.length;j<i;j++)a[j][1]=p.aoColumns[j].asSorting[0]}wa(p);U.bSort&&z(p,"aoDrawCallback",function(){if(p.bSorted){var a=X(p),b={};h.each(a,function(a,c){b[c.src]=c.dir});r(p,null,"order",[p,a,b]);Ib(p)}});z(p,"aoDrawCallback",function(){(p.bSorted||y(p)==="ssp"||U.bDeferRender)&&wa(p)},"sc");var a=q.children("caption").each(function(){this._captionSide=h(this).css("caption-side")}),b=q.children("thead");b.length===0&&(b=h("<thead/>").appendTo(q));
+p.nTHead=b[0];b=q.children("tbody");b.length===0&&(b=h("<tbody/>").appendTo(q));p.nTBody=b[0];b=q.children("tfoot");if(b.length===0&&a.length>0&&(p.oScroll.sX!==""||p.oScroll.sY!==""))b=h("<tfoot/>").appendTo(q);if(b.length===0||b.children().length===0)q.addClass(u.sNoFooter);else if(b.length>0){p.nTFoot=b[0];ea(p.aoFooter,p.nTFoot)}if(g.aaData)for(j=0;j<g.aaData.length;j++)O(p,g.aaData[j]);else(p.bDeferLoading||y(p)=="dom")&&na(p,h(p.nTBody).children("tr"));p.aiDisplay=p.aiDisplayMaster.slice();
+p.bInitialised=true;m===false&&ha(p)};g.bStateSave?(U.bStateSave=!0,z(p,"aoDrawCallback",xa,"state_save"),Jb(p,g,e)):e()}});b=null;return this},x,s,o,u,Za={},Mb=/[\r\n]/g,Aa=/<.*?>/g,Zb=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,$b=RegExp("(\\/|\\.|\\*|\\+|\\?|\\||\\(|\\)|\\[|\\]|\\{|\\}|\\\\|\\$|\\^|\\-)","g"),Ya=/[',$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,M=function(a){return!a||!0===a||"-"===a?!0:!1},Nb=function(a){var b=parseInt(a,10);return!isNaN(b)&&
+isFinite(a)?b:null},Ob=function(a,b){Za[b]||(Za[b]=RegExp(Qa(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(Za[b],"."):a},$a=function(a,b,c){var d="string"===typeof a;if(M(a))return!0;b&&d&&(a=Ob(a,b));c&&d&&(a=a.replace(Ya,""));return!isNaN(parseFloat(a))&&isFinite(a)},Pb=function(a,b,c){return M(a)?!0:!(M(a)||"string"===typeof a)?null:$a(a.replace(Aa,""),b,c)?!0:null},D=function(a,b,c){var d=[],e=0,f=a.length;if(c!==k)for(;e<f;e++)a[e]&&a[e][b]&&d.push(a[e][b][c]);else for(;e<
+f;e++)a[e]&&d.push(a[e][b]);return d},ja=function(a,b,c,d){var e=[],f=0,g=b.length;if(d!==k)for(;f<g;f++)a[b[f]][c]&&e.push(a[b[f]][c][d]);else for(;f<g;f++)e.push(a[b[f]][c]);return e},Y=function(a,b){var c=[],d;b===k?(b=0,d=a):(d=b,b=a);for(var e=b;e<d;e++)c.push(e);return c},Qb=function(a){for(var b=[],c=0,d=a.length;c<d;c++)a[c]&&b.push(a[c]);return b},qa=function(a){var b;a:{if(!(2>a.length)){b=a.slice().sort();for(var c=b[0],d=1,e=b.length;d<e;d++){if(b[d]===c){b=!1;break a}c=b[d]}}b=!0}if(b)return a.slice();
+b=[];var e=a.length,f,g=0,d=0;a:for(;d<e;d++){c=a[d];for(f=0;f<g;f++)if(b[f]===c)continue a;b.push(c);g++}return b};n.util={throttle:function(a,b){var c=b!==k?b:200,d,e;return function(){var b=this,g=+new Date,j=arguments;d&&g<d+c?(clearTimeout(e),e=setTimeout(function(){d=k;a.apply(b,j)},c)):(d=g,a.apply(b,j))}},escapeRegex:function(a){return a.replace($b,"\\$1")}};var A=function(a,b,c){a[b]!==k&&(a[c]=a[b])},ca=/\[.*?\]$/,W=/\(\)$/,Qa=n.util.escapeRegex,va=h("<div>")[0],Wb=va.textContent!==k,Yb=
+/<.*?>/g,Oa=n.util.throttle,Rb=[],w=Array.prototype,ac=function(a){var b,c,d=n.settings,e=h.map(d,function(a){return a.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase())return b=h.inArray(a,e),-1!==b?[d[b]]:null;if(a&&"function"===typeof a.settings)return a.settings().toArray();"string"===typeof a?c=h(a):a instanceof h&&(c=a)}else return[];if(c)return c.map(function(){b=h.inArray(this,e);return-1!==b?d[b]:null}).toArray()};s=function(a,b){if(!(this instanceof
+s))return new s(a,b);var c=[],d=function(a){(a=ac(a))&&(c=c.concat(a))};if(h.isArray(a))for(var e=0,f=a.length;e<f;e++)d(a[e]);else d(a);this.context=qa(c);b&&h.merge(this,b);this.selector={rows:null,cols:null,opts:null};s.extend(this,this,Rb)};n.Api=s;h.extend(s.prototype,{any:function(){return 0!==this.count()},concat:w.concat,context:[],count:function(){return this.flatten().length},each:function(a){for(var b=0,c=this.length;b<c;b++)a.call(this,this[b],b,this);return this},eq:function(a){var b=
+this.context;return b.length>a?new s(b[a],this[a]):null},filter:function(a){var b=[];if(w.filter)b=w.filter.call(this,a,this);else for(var c=0,d=this.length;c<d;c++)a.call(this,this[c],c,this)&&b.push(this[c]);return new s(this.context,b)},flatten:function(){var a=[];return new s(this.context,a.concat.apply(a,this.toArray()))},join:w.join,indexOf:w.indexOf||function(a,b){for(var c=b||0,d=this.length;c<d;c++)if(this[c]===a)return c;return-1},iterator:function(a,b,c,d){var e=[],f,g,j,h,m,l=this.context,
+n,o,u=this.selector;"string"===typeof a&&(d=c,c=b,b=a,a=!1);g=0;for(j=l.length;g<j;g++){var r=new s(l[g]);if("table"===b)f=c.call(r,l[g],g),f!==k&&e.push(f);else if("columns"===b||"rows"===b)f=c.call(r,l[g],this[g],g),f!==k&&e.push(f);else if("column"===b||"column-rows"===b||"row"===b||"cell"===b){o=this[g];"column-rows"===b&&(n=Ba(l[g],u.opts));h=0;for(m=o.length;h<m;h++)f=o[h],f="cell"===b?c.call(r,l[g],f.row,f.column,g,h):c.call(r,l[g],f,g,h,n),f!==k&&e.push(f)}}return e.length||d?(a=new s(l,a?
+e.concat.apply([],e):e),b=a.selector,b.rows=u.rows,b.cols=u.cols,b.opts=u.opts,a):this},lastIndexOf:w.lastIndexOf||function(a,b){return this.indexOf.apply(this.toArray.reverse(),arguments)},length:0,map:function(a){var b=[];if(w.map)b=w.map.call(this,a,this);else for(var c=0,d=this.length;c<d;c++)b.push(a.call(this,this[c],c));return new s(this.context,b)},pluck:function(a){return this.map(function(b){return b[a]})},pop:w.pop,push:w.push,reduce:w.reduce||function(a,b){return hb(this,a,b,0,this.length,
+1)},reduceRight:w.reduceRight||function(a,b){return hb(this,a,b,this.length-1,-1,-1)},reverse:w.reverse,selector:null,shift:w.shift,slice:function(){return new s(this.context,this)},sort:w.sort,splice:w.splice,toArray:function(){return w.slice.call(this)},to$:function(){return h(this)},toJQuery:function(){return h(this)},unique:function(){return new s(this.context,qa(this))},unshift:w.unshift});s.extend=function(a,b,c){if(c.length&&b&&(b instanceof s||b.__dt_wrapper)){var d,e,f,g=function(a,b,c){return function(){var d=
+b.apply(a,arguments);s.extend(d,d,c.methodExt);return d}};d=0;for(e=c.length;d<e;d++)f=c[d],b[f.name]="function"===typeof f.val?g(a,f.val,f):h.isPlainObject(f.val)?{}:f.val,b[f.name].__dt_wrapper=!0,s.extend(a,b[f.name],f.propExt)}};s.register=o=function(a,b){if(h.isArray(a))for(var c=0,d=a.length;c<d;c++)s.register(a[c],b);else for(var e=a.split("."),f=Rb,g,j,c=0,d=e.length;c<d;c++){g=(j=-1!==e[c].indexOf("()"))?e[c].replace("()",""):e[c];var i;a:{i=0;for(var m=f.length;i<m;i++)if(f[i].name===g){i=
+f[i];break a}i=null}i||(i={name:g,val:{},methodExt:[],propExt:[]},f.push(i));c===d-1?i.val=b:f=j?i.methodExt:i.propExt}};s.registerPlural=u=function(a,b,c){s.register(a,c);s.register(b,function(){var a=c.apply(this,arguments);return a===this?this:a instanceof s?a.length?h.isArray(a[0])?new s(a.context,a[0]):a[0]:k:a})};o("tables()",function(a){var b;if(a){b=s;var c=this.context;if("number"===typeof a)a=[c[a]];else var d=h.map(c,function(a){return a.nTable}),a=h(d).filter(a).map(function(){var a=h.inArray(this,
+d);return c[a]}).toArray();b=new b(a)}else b=this;return b});o("table()",function(a){var a=this.tables(a),b=a.context;return b.length?new s(b[0]):a});u("tables().nodes()","table().node()",function(){return this.iterator("table",function(a){return a.nTable},1)});u("tables().body()","table().body()",function(){return this.iterator("table",function(a){return a.nTBody},1)});u("tables().header()","table().header()",function(){return this.iterator("table",function(a){return a.nTHead},1)});u("tables().footer()",
+"table().footer()",function(){return this.iterator("table",function(a){return a.nTFoot},1)});u("tables().containers()","table().container()",function(){return this.iterator("table",function(a){return a.nTableWrapper},1)});o("draw()",function(a){return this.iterator("table",function(b){"page"===a?P(b):("string"===typeof a&&(a="full-hold"===a?!1:!0),T(b,!1===a))})});o("page()",function(a){return a===k?this.page.info().page:this.iterator("table",function(b){Ta(b,a)})});o("page.info()",function(){if(0===
+this.context.length)return k;var a=this.context[0],b=a._iDisplayStart,c=a.oFeatures.bPaginate?a._iDisplayLength:-1,d=a.fnRecordsDisplay(),e=-1===c;return{page:e?0:Math.floor(b/c),pages:e?1:Math.ceil(d/c),start:b,end:a.fnDisplayEnd(),length:c,recordsTotal:a.fnRecordsTotal(),recordsDisplay:d,serverSide:"ssp"===y(a)}});o("page.len()",function(a){return a===k?0!==this.context.length?this.context[0]._iDisplayLength:k:this.iterator("table",function(b){Ra(b,a)})});var Sb=function(a,b,c){if(c){var d=new s(a);
+d.one("draw",function(){c(d.ajax.json())})}if("ssp"==y(a))T(a,b);else{C(a,!0);var e=a.jqXHR;e&&4!==e.readyState&&e.abort();sa(a,[],function(c){oa(a);for(var c=ta(a,c),d=0,e=c.length;d<e;d++)O(a,c[d]);T(a,b);C(a,!1)})}};o("ajax.json()",function(){var a=this.context;if(0<a.length)return a[0].json});o("ajax.params()",function(){var a=this.context;if(0<a.length)return a[0].oAjaxData});o("ajax.reload()",function(a,b){return this.iterator("table",function(c){Sb(c,!1===b,a)})});o("ajax.url()",function(a){var b=
+this.context;if(a===k){if(0===b.length)return k;b=b[0];return b.ajax?h.isPlainObject(b.ajax)?b.ajax.url:b.ajax:b.sAjaxSource}return this.iterator("table",function(b){h.isPlainObject(b.ajax)?b.ajax.url=a:b.ajax=a})});o("ajax.url().load()",function(a,b){return this.iterator("table",function(c){Sb(c,!1===b,a)})});var ab=function(a,b,c,d,e){var f=[],g,j,i,m,l,n;i=typeof b;if(!b||"string"===i||"function"===i||b.length===k)b=[b];i=0;for(m=b.length;i<m;i++){j=b[i]&&b[i].split&&!b[i].match(/[\[\(:]/)?b[i].split(","):
+[b[i]];l=0;for(n=j.length;l<n;l++)(g=c("string"===typeof j[l]?h.trim(j[l]):j[l]))&&g.length&&(f=f.concat(g))}a=x.selector[a];if(a.length){i=0;for(m=a.length;i<m;i++)f=a[i](d,e,f)}return qa(f)},bb=function(a){a||(a={});a.filter&&a.search===k&&(a.search=a.filter);return h.extend({search:"none",order:"current",page:"all"},a)},cb=function(a){for(var b=0,c=a.length;b<c;b++)if(0<a[b].length)return a[0]=a[b],a[0].length=1,a.length=1,a.context=[a.context[b]],a;a.length=0;return a},Ba=function(a,b){var c,
+d,e,f=[],g=a.aiDisplay;e=a.aiDisplayMaster;var j=b.search;c=b.order;d=b.page;if("ssp"==y(a))return"removed"===j?[]:Y(0,e.length);if("current"==d){c=a._iDisplayStart;for(d=a.fnDisplayEnd();c<d;c++)f.push(g[c])}else if("current"==c||"applied"==c)if("none"==j)f=e.slice();else if("applied"==j)f=g.slice();else{if("removed"==j){var i={};c=0;for(d=g.length;c<d;c++)i[g[c]]=null;f=h.map(e,function(a){return!i.hasOwnProperty(a)?a:null})}}else if("index"==c||"original"==c){c=0;for(d=a.aoData.length;c<d;c++)"none"==
+j?f.push(c):(e=h.inArray(c,g),(-1===e&&"removed"==j||0<=e&&"applied"==j)&&f.push(c))}return f};o("rows()",function(a,b){a===k?a="":h.isPlainObject(a)&&(b=a,a="");var b=bb(b),c=this.iterator("table",function(c){var e=b,f;return ab("row",a,function(a){var b=Nb(a),i=c.aoData;if(b!==null&&!e)return[b];f||(f=Ba(c,e));if(b!==null&&h.inArray(b,f)!==-1)return[b];if(a===null||a===k||a==="")return f;if(typeof a==="function")return h.map(f,function(b){var c=i[b];return a(b,c._aData,c.nTr)?b:null});if(a.nodeName){var b=
+a._DT_RowIndex,m=a._DT_CellIndex;if(b!==k)return i[b]&&i[b].nTr===a?[b]:[];if(m)return i[m.row]&&i[m.row].nTr===a?[m.row]:[];b=h(a).closest("*[data-dt-row]");return b.length?[b.data("dt-row")]:[]}if(typeof a==="string"&&a.charAt(0)==="#"){b=c.aIds[a.replace(/^#/,"")];if(b!==k)return[b.idx]}b=Qb(ja(c.aoData,f,"nTr"));return h(b).filter(a).map(function(){return this._DT_RowIndex}).toArray()},c,e)},1);c.selector.rows=a;c.selector.opts=b;return c});o("rows().nodes()",function(){return this.iterator("row",
+function(a,b){return a.aoData[b].nTr||k},1)});o("rows().data()",function(){return this.iterator(!0,"rows",function(a,b){return ja(a.aoData,b,"_aData")},1)});u("rows().cache()","row().cache()",function(a){return this.iterator("row",function(b,c){var d=b.aoData[c];return"search"===a?d._aFilterData:d._aSortData},1)});u("rows().invalidate()","row().invalidate()",function(a){return this.iterator("row",function(b,c){da(b,c,a)})});u("rows().indexes()","row().index()",function(){return this.iterator("row",
+function(a,b){return b},1)});u("rows().ids()","row().id()",function(a){for(var b=[],c=this.context,d=0,e=c.length;d<e;d++)for(var f=0,g=this[d].length;f<g;f++){var h=c[d].rowIdFn(c[d].aoData[this[d][f]]._aData);b.push((!0===a?"#":"")+h)}return new s(c,b)});u("rows().remove()","row().remove()",function(){var a=this;this.iterator("row",function(b,c,d){var e=b.aoData,f=e[c],g,h,i,m,l;e.splice(c,1);g=0;for(h=e.length;g<h;g++)if(i=e[g],l=i.anCells,null!==i.nTr&&(i.nTr._DT_RowIndex=g),null!==l){i=0;for(m=
+l.length;i<m;i++)l[i]._DT_CellIndex.row=g}pa(b.aiDisplayMaster,c);pa(b.aiDisplay,c);pa(a[d],c,!1);0<b._iRecordsDisplay&&b._iRecordsDisplay--;Sa(b);c=b.rowIdFn(f._aData);c!==k&&delete b.aIds[c]});this.iterator("table",function(a){for(var c=0,d=a.aoData.length;c<d;c++)a.aoData[c].idx=c});return this});o("rows.add()",function(a){var b=this.iterator("table",function(b){var c,f,g,h=[];f=0;for(g=a.length;f<g;f++)c=a[f],c.nodeName&&"TR"===c.nodeName.toUpperCase()?h.push(na(b,c)[0]):h.push(O(b,c));return h},
+1),c=this.rows(-1);c.pop();h.merge(c,b);return c});o("row()",function(a,b){return cb(this.rows(a,b))});o("row().data()",function(a){var b=this.context;if(a===k)return b.length&&this.length?b[0].aoData[this[0]]._aData:k;var c=b[0].aoData[this[0]];c._aData=a;h.isArray(a)&&c.nTr.id&&N(b[0].rowId)(a,c.nTr.id);da(b[0],this[0],"data");return this});o("row().node()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]].nTr||null:null});o("row.add()",function(a){a instanceof h&&
+a.length&&(a=a[0]);var b=this.iterator("table",function(b){return a.nodeName&&"TR"===a.nodeName.toUpperCase()?na(b,a)[0]:O(b,a)});return this.row(b[0])});var db=function(a,b){var c=a.context;if(c.length&&(c=c[0].aoData[b!==k?b:a[0]])&&c._details)c._details.remove(),c._detailsShow=k,c._details=k},Tb=function(a,b){var c=a.context;if(c.length&&a.length){var d=c[0].aoData[a[0]];if(d._details){(d._detailsShow=b)?d._details.insertAfter(d.nTr):d._details.detach();var e=c[0],f=new s(e),g=e.aoData;f.off("draw.dt.DT_details column-visibility.dt.DT_details destroy.dt.DT_details");
+0<D(g,"_details").length&&(f.on("draw.dt.DT_details",function(a,b){e===b&&f.rows({page:"current"}).eq(0).each(function(a){a=g[a];a._detailsShow&&a._details.insertAfter(a.nTr)})}),f.on("column-visibility.dt.DT_details",function(a,b){if(e===b)for(var c,d=V(b),f=0,h=g.length;f<h;f++)c=g[f],c._details&&c._details.children("td[colspan]").attr("colspan",d)}),f.on("destroy.dt.DT_details",function(a,b){if(e===b)for(var c=0,d=g.length;c<d;c++)g[c]._details&&db(f,c)}))}}};o("row().child()",function(a,b){var c=
+this.context;if(a===k)return c.length&&this.length?c[0].aoData[this[0]]._details:k;if(!0===a)this.child.show();else if(!1===a)db(this);else if(c.length&&this.length){var d=c[0],c=c[0].aoData[this[0]],e=[],f=function(a,b){if(h.isArray(a)||a instanceof h)for(var c=0,k=a.length;c<k;c++)f(a[c],b);else a.nodeName&&"tr"===a.nodeName.toLowerCase()?e.push(a):(c=h("<tr><td/></tr>").addClass(b),h("td",c).addClass(b).html(a)[0].colSpan=V(d),e.push(c[0]))};f(a,b);c._details&&c._details.detach();c._details=h(e);
+c._detailsShow&&c._details.insertAfter(c.nTr)}return this});o(["row().child.show()","row().child().show()"],function(){Tb(this,!0);return this});o(["row().child.hide()","row().child().hide()"],function(){Tb(this,!1);return this});o(["row().child.remove()","row().child().remove()"],function(){db(this);return this});o("row().child.isShown()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]]._detailsShow||!1:!1});var bc=/^([^:]+):(name|visIdx|visible)$/,Ub=function(a,b,
+c,d,e){for(var c=[],d=0,f=e.length;d<f;d++)c.push(B(a,e[d],b));return c};o("columns()",function(a,b){a===k?a="":h.isPlainObject(a)&&(b=a,a="");var b=bb(b),c=this.iterator("table",function(c){var e=a,f=b,g=c.aoColumns,j=D(g,"sName"),i=D(g,"nTh");return ab("column",e,function(a){var b=Nb(a);if(a==="")return Y(g.length);if(b!==null)return[b>=0?b:g.length+b];if(typeof a==="function"){var e=Ba(c,f);return h.map(g,function(b,f){return a(f,Ub(c,f,0,0,e),i[f])?f:null})}var k=typeof a==="string"?a.match(bc):
+"";if(k)switch(k[2]){case "visIdx":case "visible":b=parseInt(k[1],10);if(b<0){var n=h.map(g,function(a,b){return a.bVisible?b:null});return[n[n.length+b]]}return[aa(c,b)];case "name":return h.map(j,function(a,b){return a===k[1]?b:null});default:return[]}if(a.nodeName&&a._DT_CellIndex)return[a._DT_CellIndex.column];b=h(i).filter(a).map(function(){return h.inArray(this,i)}).toArray();if(b.length||!a.nodeName)return b;b=h(a).closest("*[data-dt-column]");return b.length?[b.data("dt-column")]:[]},c,f)},
+1);c.selector.cols=a;c.selector.opts=b;return c});u("columns().header()","column().header()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTh},1)});u("columns().footer()","column().footer()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTf},1)});u("columns().data()","column().data()",function(){return this.iterator("column-rows",Ub,1)});u("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].mData},
+1)});u("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,d,e,f){return ja(b.aoData,f,"search"===a?"_aFilterData":"_aSortData",c)},1)});u("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows",function(a,b,c,d,e){return ja(a.aoData,e,"anCells",b)},1)});u("columns().visible()","column().visible()",function(a,b){var c=this.iterator("column",function(b,c){if(a===k)return b.aoColumns[c].bVisible;var f=b.aoColumns,g=f[c],j=b.aoData,
+i,m,l;if(a!==k&&g.bVisible!==a){if(a){var n=h.inArray(!0,D(f,"bVisible"),c+1);i=0;for(m=j.length;i<m;i++)l=j[i].nTr,f=j[i].anCells,l&&l.insertBefore(f[c],f[n]||null)}else h(D(b.aoData,"anCells",c)).detach();g.bVisible=a;fa(b,b.aoHeader);fa(b,b.aoFooter);b.aiDisplay.length||h(b.nTBody).find("td[colspan]").attr("colspan",V(b));xa(b)}});a!==k&&(this.iterator("column",function(c,e){r(c,null,"column-visibility",[c,e,a,b])}),(b===k||b)&&this.columns.adjust());return c});u("columns().indexes()","column().index()",
+function(a){return this.iterator("column",function(b,c){return"visible"===a?ba(b,c):c},1)});o("columns.adjust()",function(){return this.iterator("table",function(a){$(a)},1)});o("column.index()",function(a,b){if(0!==this.context.length){var c=this.context[0];if("fromVisible"===a||"toData"===a)return aa(c,b);if("fromData"===a||"toVisible"===a)return ba(c,b)}});o("column()",function(a,b){return cb(this.columns(a,b))});o("cells()",function(a,b,c){h.isPlainObject(a)&&(a.row===k?(c=a,a=null):(c=b,b=null));
+h.isPlainObject(b)&&(c=b,b=null);if(null===b||b===k)return this.iterator("table",function(b){var d=a,e=bb(c),f=b.aoData,g=Ba(b,e),j=Qb(ja(f,g,"anCells")),i=h([].concat.apply([],j)),l,m=b.aoColumns.length,n,o,u,s,r,v;return ab("cell",d,function(a){var c=typeof a==="function";if(a===null||a===k||c){n=[];o=0;for(u=g.length;o<u;o++){l=g[o];for(s=0;s<m;s++){r={row:l,column:s};if(c){v=f[l];a(r,B(b,l,s),v.anCells?v.anCells[s]:null)&&n.push(r)}else n.push(r)}}return n}if(h.isPlainObject(a))return a.column!==
+k&&a.row!==k&&h.inArray(a.row,g)!==-1?[a]:[];c=i.filter(a).map(function(a,b){return{row:b._DT_CellIndex.row,column:b._DT_CellIndex.column}}).toArray();if(c.length||!a.nodeName)return c;v=h(a).closest("*[data-dt-row]");return v.length?[{row:v.data("dt-row"),column:v.data("dt-column")}]:[]},b,e)});var d=this.columns(b),e=this.rows(a),f,g,j,i,m;this.iterator("table",function(a,b){f=[];g=0;for(j=e[b].length;g<j;g++){i=0;for(m=d[b].length;i<m;i++)f.push({row:e[b][g],column:d[b][i]})}},1);var l=this.cells(f,
+c);h.extend(l.selector,{cols:b,rows:a,opts:c});return l});u("cells().nodes()","cell().node()",function(){return this.iterator("cell",function(a,b,c){return(a=a.aoData[b])&&a.anCells?a.anCells[c]:k},1)});o("cells().data()",function(){return this.iterator("cell",function(a,b,c){return B(a,b,c)},1)});u("cells().cache()","cell().cache()",function(a){a="search"===a?"_aFilterData":"_aSortData";return this.iterator("cell",function(b,c,d){return b.aoData[c][a][d]},1)});u("cells().render()","cell().render()",
+function(a){return this.iterator("cell",function(b,c,d){return B(b,c,d,a)},1)});u("cells().indexes()","cell().index()",function(){return this.iterator("cell",function(a,b,c){return{row:b,column:c,columnVisible:ba(a,c)}},1)});u("cells().invalidate()","cell().invalidate()",function(a){return this.iterator("cell",function(b,c,d){da(b,c,a,d)})});o("cell()",function(a,b,c){return cb(this.cells(a,b,c))});o("cell().data()",function(a){var b=this.context,c=this[0];if(a===k)return b.length&&c.length?B(b[0],
+c[0].row,c[0].column):k;jb(b[0],c[0].row,c[0].column,a);da(b[0],c[0].row,"data",c[0].column);return this});o("order()",function(a,b){var c=this.context;if(a===k)return 0!==c.length?c[0].aaSorting:k;"number"===typeof a?a=[[a,b]]:a.length&&!h.isArray(a[0])&&(a=Array.prototype.slice.call(arguments));return this.iterator("table",function(b){b.aaSorting=a.slice()})});o("order.listener()",function(a,b,c){return this.iterator("table",function(d){Ma(d,a,b,c)})});o("order.fixed()",function(a){if(!a){var b=
+this.context,b=b.length?b[0].aaSortingFixed:k;return h.isArray(b)?{pre:b}:b}return this.iterator("table",function(b){b.aaSortingFixed=h.extend(!0,{},a)})});o(["columns().order()","column().order()"],function(a){var b=this;return this.iterator("table",function(c,d){var e=[];h.each(b[d],function(b,c){e.push([c,a])});c.aaSorting=e})});o("search()",function(a,b,c,d){var e=this.context;return a===k?0!==e.length?e[0].oPreviousSearch.sSearch:k:this.iterator("table",function(e){e.oFeatures.bFilter&&ga(e,
+h.extend({},e.oPreviousSearch,{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===d?!0:d}),1)})});u("columns().search()","column().search()",function(a,b,c,d){return this.iterator("column",function(e,f){var g=e.aoPreSearchCols;if(a===k)return g[f].sSearch;e.oFeatures.bFilter&&(h.extend(g[f],{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===d?!0:d}),ga(e,e.oPreviousSearch,1))})});o("state()",function(){return this.context.length?this.context[0].oSavedState:
+null});o("state.clear()",function(){return this.iterator("table",function(a){a.fnStateSaveCallback.call(a.oInstance,a,{})})});o("state.loaded()",function(){return this.context.length?this.context[0].oLoadedState:null});o("state.save()",function(){return this.iterator("table",function(a){xa(a)})});n.versionCheck=n.fnVersionCheck=function(a){for(var b=n.version.split("."),a=a.split("."),c,d,e=0,f=a.length;e<f;e++)if(c=parseInt(b[e],10)||0,d=parseInt(a[e],10)||0,c!==d)return c>d;return!0};n.isDataTable=
+n.fnIsDataTable=function(a){var b=h(a).get(0),c=!1;if(a instanceof n.Api)return!0;h.each(n.settings,function(a,e){var f=e.nScrollHead?h("table",e.nScrollHead)[0]:null,g=e.nScrollFoot?h("table",e.nScrollFoot)[0]:null;if(e.nTable===b||f===b||g===b)c=!0});return c};n.tables=n.fnTables=function(a){var b=!1;h.isPlainObject(a)&&(b=a.api,a=a.visible);var c=h.map(n.settings,function(b){if(!a||a&&h(b.nTable).is(":visible"))return b.nTable});return b?new s(c):c};n.camelToHungarian=J;o("$()",function(a,b){var c=
+this.rows(b).nodes(),c=h(c);return h([].concat(c.filter(a).toArray(),c.find(a).toArray()))});h.each(["on","one","off"],function(a,b){o(b+"()",function(){var a=Array.prototype.slice.call(arguments);a[0]=h.map(a[0].split(/\s/),function(a){return!a.match(/\.dt\b/)?a+".dt":a}).join(" ");var d=h(this.tables().nodes());d[b].apply(d,a);return this})});o("clear()",function(){return this.iterator("table",function(a){oa(a)})});o("settings()",function(){return new s(this.context,this.context)});o("init()",function(){var a=
+this.context;return a.length?a[0].oInit:null});o("data()",function(){return this.iterator("table",function(a){return D(a.aoData,"_aData")}).flatten()});o("destroy()",function(a){a=a||!1;return this.iterator("table",function(b){var c=b.nTableWrapper.parentNode,d=b.oClasses,e=b.nTable,f=b.nTBody,g=b.nTHead,j=b.nTFoot,i=h(e),f=h(f),k=h(b.nTableWrapper),l=h.map(b.aoData,function(a){return a.nTr}),o;b.bDestroying=!0;r(b,"aoDestroyCallback","destroy",[b]);a||(new s(b)).columns().visible(!0);k.off(".DT").find(":not(tbody *)").off(".DT");
+h(E).off(".DT-"+b.sInstance);e!=g.parentNode&&(i.children("thead").detach(),i.append(g));j&&e!=j.parentNode&&(i.children("tfoot").detach(),i.append(j));b.aaSorting=[];b.aaSortingFixed=[];wa(b);h(l).removeClass(b.asStripeClasses.join(" "));h("th, td",g).removeClass(d.sSortable+" "+d.sSortableAsc+" "+d.sSortableDesc+" "+d.sSortableNone);f.children().detach();f.append(l);g=a?"remove":"detach";i[g]();k[g]();!a&&c&&(c.insertBefore(e,b.nTableReinsertBefore),i.css("width",b.sDestroyWidth).removeClass(d.sTable),
+(o=b.asDestroyStripes.length)&&f.children().each(function(a){h(this).addClass(b.asDestroyStripes[a%o])}));c=h.inArray(b,n.settings);-1!==c&&n.settings.splice(c,1)})});h.each(["column","row","cell"],function(a,b){o(b+"s().every()",function(a){var d=this.selector.opts,e=this;return this.iterator(b,function(f,g,h,i,m){a.call(e[b](g,"cell"===b?h:d,"cell"===b?d:k),g,h,i,m)})})});o("i18n()",function(a,b,c){var d=this.context[0],a=S(a)(d.oLanguage);a===k&&(a=b);c!==k&&h.isPlainObject(a)&&(a=a[c]!==k?a[c]:
+a._);return a.replace("%d",c)});n.version="1.10.18";n.settings=[];n.models={};n.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0};n.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1};n.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,
+sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null};n.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,
+bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+
+a.sInstance+"_"+location.pathname))}catch(b){}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},
+oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"Processing...",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:h.extend({},
+n.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"};Z(n.defaults);n.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};
+Z(n.defaults.column);n.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],
+aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",
+iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,bAjaxDataGet:!0,jqXHR:null,json:k,oAjaxData:k,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==y(this)?1*this._iRecordsTotal:
+this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==y(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+a,d=this.aiDisplay.length,e=this.oFeatures,f=e.bPaginate;return e.bServerSide?!1===f||-1===a?b+d:Math.min(b+a,this._iRecordsDisplay):!f||c>d||-1===a?d:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null};n.ext=x={buttons:{},
+classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:n.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:n.version};h.extend(x,{afnFiltering:x.search,aTypes:x.type.detect,ofnSearch:x.type.search,oSort:x.type.order,afnSortData:x.order,aoFeatures:x.feature,oApi:x.internal,oStdClasses:x.classes,oPagination:x.pager});
+h.extend(n.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_asc_disabled",
+sSortableDesc:"sorting_desc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",
+sJUIHeader:"",sJUIFooter:""});var Kb=n.ext.pager;h.extend(Kb,{simple:function(){return["previous","next"]},full:function(){return["first","previous","next","last"]},numbers:function(a,b){return[ia(a,b)]},simple_numbers:function(a,b){return["previous",ia(a,b),"next"]},full_numbers:function(a,b){return["first","previous",ia(a,b),"next","last"]},first_last_numbers:function(a,b){return["first",ia(a,b),"last"]},_numbers:ia,numbers_length:7});h.extend(!0,n.ext.renderer,{pageButton:{_:function(a,b,c,d,e,
+f){var g=a.oClasses,j=a.oLanguage.oPaginate,i=a.oLanguage.oAria.paginate||{},m,l,n=0,o=function(b,d){var k,s,u,r,v=function(b){Ta(a,b.data.action,true)};k=0;for(s=d.length;k<s;k++){r=d[k];if(h.isArray(r)){u=h("<"+(r.DT_el||"div")+"/>").appendTo(b);o(u,r)}else{m=null;l="";switch(r){case "ellipsis":b.append('<span class="ellipsis">&#x2026;</span>');break;case "first":m=j.sFirst;l=r+(e>0?"":" "+g.sPageButtonDisabled);break;case "previous":m=j.sPrevious;l=r+(e>0?"":" "+g.sPageButtonDisabled);break;case "next":m=
+j.sNext;l=r+(e<f-1?"":" "+g.sPageButtonDisabled);break;case "last":m=j.sLast;l=r+(e<f-1?"":" "+g.sPageButtonDisabled);break;default:m=r+1;l=e===r?g.sPageButtonActive:""}if(m!==null){u=h("<a>",{"class":g.sPageButton+" "+l,"aria-controls":a.sTableId,"aria-label":i[r],"data-dt-idx":n,tabindex:a.iTabIndex,id:c===0&&typeof r==="string"?a.sTableId+"_"+r:null}).html(m).appendTo(b);Wa(u,{action:r},v);n++}}}},s;try{s=h(b).find(H.activeElement).data("dt-idx")}catch(u){}o(h(b).empty(),d);s!==k&&h(b).find("[data-dt-idx="+
+s+"]").focus()}}});h.extend(n.ext.type.detect,[function(a,b){var c=b.oLanguage.sDecimal;return $a(a,c)?"num"+c:null},function(a){if(a&&!(a instanceof Date)&&!Zb.test(a))return null;var b=Date.parse(a);return null!==b&&!isNaN(b)||M(a)?"date":null},function(a,b){var c=b.oLanguage.sDecimal;return $a(a,c,!0)?"num-fmt"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Pb(a,c)?"html-num"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Pb(a,c,!0)?"html-num-fmt"+c:null},function(a){return M(a)||
+"string"===typeof a&&-1!==a.indexOf("<")?"html":null}]);h.extend(n.ext.type.search,{html:function(a){return M(a)?a:"string"===typeof a?a.replace(Mb," ").replace(Aa,""):""},string:function(a){return M(a)?a:"string"===typeof a?a.replace(Mb," "):a}});var za=function(a,b,c,d){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=Ob(a,b));a.replace&&(c&&(a=a.replace(c,"")),d&&(a=a.replace(d,"")));return 1*a};h.extend(x.type.order,{"date-pre":function(a){a=Date.parse(a);return isNaN(a)?-Infinity:a},"html-pre":function(a){return M(a)?
+"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return M(a)?"":"string"===typeof a?a.toLowerCase():!a.toString?"":a.toString()},"string-asc":function(a,b){return a<b?-1:a>b?1:0},"string-desc":function(a,b){return a<b?1:a>b?-1:0}});Da("");h.extend(!0,n.ext.renderer,{header:{_:function(a,b,c,d){h(a.nTable).on("order.dt.DT",function(e,f,g,h){if(a===f){e=c.idx;b.removeClass(c.sSortingClass+" "+d.sSortAsc+" "+d.sSortDesc).addClass(h[e]=="asc"?d.sSortAsc:h[e]=="desc"?d.sSortDesc:
+c.sSortingClass)}})},jqueryui:function(a,b,c,d){h("<div/>").addClass(d.sSortJUIWrapper).append(b.contents()).append(h("<span/>").addClass(d.sSortIcon+" "+c.sSortingClassJUI)).appendTo(b);h(a.nTable).on("order.dt.DT",function(e,f,g,h){if(a===f){e=c.idx;b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass(h[e]=="asc"?d.sSortAsc:h[e]=="desc"?d.sSortDesc:c.sSortingClass);b.find("span."+d.sSortIcon).removeClass(d.sSortJUIAsc+" "+d.sSortJUIDesc+" "+d.sSortJUI+" "+d.sSortJUIAscAllowed+" "+d.sSortJUIDescAllowed).addClass(h[e]==
+"asc"?d.sSortJUIAsc:h[e]=="desc"?d.sSortJUIDesc:c.sSortingClassJUI)}})}}});var Vb=function(a){return"string"===typeof a?a.replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;"):a};n.render={number:function(a,b,c,d,e){return{display:function(f){if("number"!==typeof f&&"string"!==typeof f)return f;var g=0>f?"-":"",h=parseFloat(f);if(isNaN(h))return Vb(f);h=h.toFixed(c);f=Math.abs(h);h=parseInt(f,10);f=c?b+(f-h).toFixed(c).substring(2):"";return g+(d||"")+h.toString().replace(/\B(?=(\d{3})+(?!\d))/g,
+a)+f+(e||"")}}},text:function(){return{display:Vb}}};h.extend(n.ext.internal,{_fnExternApiFunc:Lb,_fnBuildAjax:sa,_fnAjaxUpdate:lb,_fnAjaxParameters:ub,_fnAjaxUpdateDraw:vb,_fnAjaxDataSrc:ta,_fnAddColumn:Ea,_fnColumnOptions:ka,_fnAdjustColumnSizing:$,_fnVisibleToColumnIndex:aa,_fnColumnIndexToVisible:ba,_fnVisbleColumns:V,_fnGetColumns:ma,_fnColumnTypes:Ga,_fnApplyColumnDefs:ib,_fnHungarianMap:Z,_fnCamelToHungarian:J,_fnLanguageCompat:Ca,_fnBrowserDetect:gb,_fnAddData:O,_fnAddTr:na,_fnNodeToDataIndex:function(a,
+b){return b._DT_RowIndex!==k?b._DT_RowIndex:null},_fnNodeToColumnIndex:function(a,b,c){return h.inArray(c,a.aoData[b].anCells)},_fnGetCellData:B,_fnSetCellData:jb,_fnSplitObjNotation:Ja,_fnGetObjectDataFn:S,_fnSetObjectDataFn:N,_fnGetDataMaster:Ka,_fnClearTable:oa,_fnDeleteIndex:pa,_fnInvalidate:da,_fnGetRowElements:Ia,_fnCreateTr:Ha,_fnBuildHead:kb,_fnDrawHead:fa,_fnDraw:P,_fnReDraw:T,_fnAddOptionsHtml:nb,_fnDetectHeader:ea,_fnGetUniqueThs:ra,_fnFeatureHtmlFilter:pb,_fnFilterComplete:ga,_fnFilterCustom:yb,
+_fnFilterColumn:xb,_fnFilter:wb,_fnFilterCreateSearch:Pa,_fnEscapeRegex:Qa,_fnFilterData:zb,_fnFeatureHtmlInfo:sb,_fnUpdateInfo:Cb,_fnInfoMacros:Db,_fnInitialise:ha,_fnInitComplete:ua,_fnLengthChange:Ra,_fnFeatureHtmlLength:ob,_fnFeatureHtmlPaginate:tb,_fnPageChange:Ta,_fnFeatureHtmlProcessing:qb,_fnProcessingDisplay:C,_fnFeatureHtmlTable:rb,_fnScrollDraw:la,_fnApplyToChildren:I,_fnCalculateColumnWidths:Fa,_fnThrottle:Oa,_fnConvertToWidth:Eb,_fnGetWidestNode:Fb,_fnGetMaxLenString:Gb,_fnStringToCss:v,
+_fnSortFlatten:X,_fnSort:mb,_fnSortAria:Ib,_fnSortListener:Va,_fnSortAttachListener:Ma,_fnSortingClasses:wa,_fnSortData:Hb,_fnSaveState:xa,_fnLoadState:Jb,_fnSettingsFromNode:ya,_fnLog:K,_fnMap:F,_fnBindAction:Wa,_fnCallbackReg:z,_fnCallbackFire:r,_fnLengthOverflow:Sa,_fnRenderer:Na,_fnDataSource:y,_fnRowAttributes:La,_fnExtend:Xa,_fnCalculateEnd:function(){}});h.fn.dataTable=n;n.$=h;h.fn.dataTableSettings=n.settings;h.fn.dataTableExt=n.ext;h.fn.DataTable=function(a){return h(this).dataTable(a).api()};
+h.each(n,function(a,b){h.fn.DataTable[a]=b});return h.fn.dataTable});
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.4.min.css b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.4.min.css
deleted file mode 100644
index a2c5489c89fc..000000000000
--- a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.4.min.css
+++ /dev/null
@@ -1 +0,0 @@
-table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px 18px;border-bottom:1px solid #111}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 18px 6px 18px;border-top:1px solid #111}table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting{cursor:pointer;*cursor:hand}table.dataTable thead .sorting{background:url("../images/sort_both.png") no-repeat center right}table.dataTable thead .sorting_asc{background:url("../images/sort_asc.png") no-repeat center right}table.dataTable thead .sorting_desc{background:url("../images/sort_desc.png") no-repeat center right}table.dataTable thead .sorting_asc_disabled{background:url("../images/sort_asc_disabled.png") no-repeat center right}table.dataTable thead .sorting_desc_disabled{background:url("../images/sort_desc_disabled.png") no-repeat center right}table.dataTable tbody tr{background-color:#fff}table.dataTable tbody tr.selected{background-color:#b0bed9}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid #ddd}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid #ddd;border-right:1px solid #ddd}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid #ddd}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe tbody tr.odd,table.dataTable.display tbody tr.odd{background-color:#f9f9f9}table.dataTable.stripe tbody tr.odd.selected,table.dataTable.display tbody tr.odd.selected{background-color:#abb9d3}table.dataTable.hover tbody tr:hover,table.dataTable.hover tbody tr.odd:hover,table.dataTable.hover tbody tr.even:hover,table.dataTable.display tbody tr:hover,table.dataTable.display tbody tr.odd:hover,table.dataTable.display tbody tr.even:hover{background-color:#f5f5f5}table.dataTable.hover tbody tr:hover.selected,table.dataTable.hover tbody tr.odd:hover.selected,table.dataTable.hover tbody tr.even:hover.selected,table.dataTable.display tbody tr:hover.selected,table.dataTable.display tbody tr.odd:hover.selected,table.dataTable.display tbody tr.even:hover.selected{background-color:#a9b7d1}table.dataTable.order-column tbody tr>.sorting_1,table.dataTable.order-column tbody tr>.sorting_2,table.dataTable.order-column tbody tr>.sorting_3,table.dataTable.display tbody tr>.sorting_1,table.dataTable.display tbody tr>.sorting_2,table.dataTable.display tbody tr>.sorting_3{background-color:#f9f9f9}table.dataTable.order-column tbody tr.selected>.sorting_1,table.dataTable.order-column tbody tr.selected>.sorting_2,table.dataTable.order-column tbody tr.selected>.sorting_3,table.dataTable.display tbody tr.selected>.sorting_1,table.dataTable.display tbody tr.selected>.sorting_2,table.dataTable.display tbody tr.selected>.sorting_3{background-color:#acbad4}table.dataTable.display tbody tr.odd>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd>.sorting_1{background-color:#f1f1f1}table.dataTable.display tbody tr.odd>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd>.sorting_2{background-color:#f3f3f3}table.dataTable.display tbody tr.odd>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd>.sorting_3{background-color:#f5f5f5}table.dataTable.display tbody tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_1{background-color:#a6b3cd}table.dataTable.display tbody tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_2{background-color:#a7b5ce}table.dataTable.display tbody tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_3{background-color:#a9b6d0}table.dataTable.display tbody tr.even>.sorting_1,table.dataTable.order-column.stripe tbody tr.even>.sorting_1{background-color:#f9f9f9}table.dataTable.display tbody tr.even>.sorting_2,table.dataTable.order-column.stripe tbody tr.even>.sorting_2{background-color:#fbfbfb}table.dataTable.display tbody tr.even>.sorting_3,table.dataTable.order-column.stripe tbody tr.even>.sorting_3{background-color:#fdfdfd}table.dataTable.display tbody tr.even.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_1{background-color:#acbad4}table.dataTable.display tbody tr.even.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_2{background-color:#adbbd6}table.dataTable.display tbody tr.even.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_3{background-color:#afbdd8}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.display tbody tr.odd:hover>.sorting_1,table.dataTable.display tbody tr.even:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr.odd:hover>.sorting_1,table.dataTable.order-column.hover tbody tr.even:hover>.sorting_1{background-color:#eaeaea}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.display tbody tr.odd:hover>.sorting_2,table.dataTable.display tbody tr.even:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr.odd:hover>.sorting_2,table.dataTable.order-column.hover tbody tr.even:hover>.sorting_2{background-color:#ebebeb}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.display tbody tr.odd:hover>.sorting_3,table.dataTable.display tbody tr.even:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr.odd:hover>.sorting_3,table.dataTable.order-column.hover tbody tr.even:hover>.sorting_3{background-color:#eee}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.display tbody tr.odd:hover.selected>.sorting_1,table.dataTable.display tbody tr.even:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr.odd:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr.even:hover.selected>.sorting_1{background-color:#a1aec7}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.display tbody tr.odd:hover.selected>.sorting_2,table.dataTable.display tbody tr.even:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr.odd:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr.even:hover.selected>.sorting_2{background-color:#a2afc8}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.display tbody tr.odd:hover.selected>.sorting_3,table.dataTable.display tbody tr.even:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr.odd:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr.even:hover.selected>.sorting_3{background-color:#a4b2cb}table.dataTable.no-footer{border-bottom:1px solid #111}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:5px 9px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:5px 9px 3px 9px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px 5px}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable,table.dataTable th,table.dataTable td{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both;*zoom:1;zoom:1}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{margin-left:0.5em}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:0.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:0.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:0.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;*cursor:hand;color:#333 !important;border:1px solid transparent}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid #cacaca;background-color:#fff;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #fff), color-stop(100%, #dcdcdc));background:-webkit-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-moz-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-ms-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-o-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:linear-gradient(to bottom, #fff 0%, #dcdcdc 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_processing{position:absolute;top:50%;left:50%;width:100%;height:40px;margin-left:-50%;margin-top:-25px;padding-top:20px;text-align:center;font-size:1.2em;background-color:white;background:-webkit-gradient(linear, left top, right top, color-stop(0%, rgba(255,255,255,0)), color-stop(25%, rgba(255,255,255,0.9)), color-stop(75%, rgba(255,255,255,0.9)), color-stop(100%, rgba(255,255,255,0)));background:-webkit-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-moz-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-ms-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-o-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:linear-gradient(to right, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%)}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{*margin-top:-1px;-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid #111}.dataTables_wrapper.no-footer div.dataTables_scrollHead table,.dataTables_wrapper.no-footer div.dataTables_scrollBody table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:0.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:0.5em}}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.4.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.4.min.js
deleted file mode 100644
index 8885017c35d0..000000000000
--- a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.4.min.js
+++ /dev/null
@@ -1,157 +0,0 @@
-/*! DataTables 1.10.4
- * ©2008-2014 SpryMedia Ltd - datatables.net/license
- */
-(function(Da,P,l){var O=function(g){function V(a){var b,c,e={};g.each(a,function(d){if((b=d.match(/^([^A-Z]+?)([A-Z])/))&&-1!=="a aa ai ao as b fn i m o s ".indexOf(b[1]+" "))c=d.replace(b[0],b[2].toLowerCase()),e[c]=d,"o"===b[1]&&V(a[d])});a._hungarianMap=e}function G(a,b,c){a._hungarianMap||V(a);var e;g.each(b,function(d){e=a._hungarianMap[d];if(e!==l&&(c||b[e]===l))"o"===e.charAt(0)?(b[e]||(b[e]={}),g.extend(!0,b[e],b[d]),G(a[e],b[e],c)):b[e]=b[d]})}function O(a){var b=p.defaults.oLanguage,c=a.sZeroRecords;
-!a.sEmptyTable&&(c&&"No data available in table"===b.sEmptyTable)&&D(a,a,"sZeroRecords","sEmptyTable");!a.sLoadingRecords&&(c&&"Loading..."===b.sLoadingRecords)&&D(a,a,"sZeroRecords","sLoadingRecords");a.sInfoThousands&&(a.sThousands=a.sInfoThousands);(a=a.sDecimal)&&cb(a)}function db(a){z(a,"ordering","bSort");z(a,"orderMulti","bSortMulti");z(a,"orderClasses","bSortClasses");z(a,"orderCellsTop","bSortCellsTop");z(a,"order","aaSorting");z(a,"orderFixed","aaSortingFixed");z(a,"paging","bPaginate");
-z(a,"pagingType","sPaginationType");z(a,"pageLength","iDisplayLength");z(a,"searching","bFilter");if(a=a.aoSearchCols)for(var b=0,c=a.length;b<c;b++)a[b]&&G(p.models.oSearch,a[b])}function eb(a){z(a,"orderable","bSortable");z(a,"orderData","aDataSort");z(a,"orderSequence","asSorting");z(a,"orderDataType","sortDataType")}function fb(a){var a=a.oBrowser,b=g("<div/>").css({position:"absolute",top:0,left:0,height:1,width:1,overflow:"hidden"}).append(g("<div/>").css({position:"absolute",top:1,left:1,width:100,
-overflow:"scroll"}).append(g('<div class="test"/>').css({width:"100%",height:10}))).appendTo("body"),c=b.find(".test");a.bScrollOversize=100===c[0].offsetWidth;a.bScrollbarLeft=1!==c.offset().left;b.remove()}function gb(a,b,c,e,d,f){var h,i=!1;c!==l&&(h=c,i=!0);for(;e!==d;)a.hasOwnProperty(e)&&(h=i?b(h,a[e],e,a):a[e],i=!0,e+=f);return h}function Ea(a,b){var c=p.defaults.column,e=a.aoColumns.length,c=g.extend({},p.models.oColumn,c,{nTh:b?b:P.createElement("th"),sTitle:c.sTitle?c.sTitle:b?b.innerHTML:
-"",aDataSort:c.aDataSort?c.aDataSort:[e],mData:c.mData?c.mData:e,idx:e});a.aoColumns.push(c);c=a.aoPreSearchCols;c[e]=g.extend({},p.models.oSearch,c[e]);ja(a,e,null)}function ja(a,b,c){var b=a.aoColumns[b],e=a.oClasses,d=g(b.nTh);if(!b.sWidthOrig){b.sWidthOrig=d.attr("width")||null;var f=(d.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/);f&&(b.sWidthOrig=f[1])}c!==l&&null!==c&&(eb(c),G(p.defaults.column,c),c.mDataProp!==l&&!c.mData&&(c.mData=c.mDataProp),c.sType&&(b._sManualType=c.sType),c.className&&
-!c.sClass&&(c.sClass=c.className),g.extend(b,c),D(b,c,"sWidth","sWidthOrig"),"number"===typeof c.iDataSort&&(b.aDataSort=[c.iDataSort]),D(b,c,"aDataSort"));var h=b.mData,i=W(h),j=b.mRender?W(b.mRender):null,c=function(a){return"string"===typeof a&&-1!==a.indexOf("@")};b._bAttrSrc=g.isPlainObject(h)&&(c(h.sort)||c(h.type)||c(h.filter));b.fnGetData=function(a,b,c){var e=i(a,b,l,c);return j&&b?j(e,b,a,c):e};b.fnSetData=function(a,b,c){return Q(h)(a,b,c)};"number"!==typeof h&&(a._rowReadObject=!0);a.oFeatures.bSort||
-(b.bSortable=!1,d.addClass(e.sSortableNone));a=-1!==g.inArray("asc",b.asSorting);c=-1!==g.inArray("desc",b.asSorting);!b.bSortable||!a&&!c?(b.sSortingClass=e.sSortableNone,b.sSortingClassJUI=""):a&&!c?(b.sSortingClass=e.sSortableAsc,b.sSortingClassJUI=e.sSortJUIAscAllowed):!a&&c?(b.sSortingClass=e.sSortableDesc,b.sSortingClassJUI=e.sSortJUIDescAllowed):(b.sSortingClass=e.sSortable,b.sSortingClassJUI=e.sSortJUI)}function X(a){if(!1!==a.oFeatures.bAutoWidth){var b=a.aoColumns;Fa(a);for(var c=0,e=b.length;c<
-e;c++)b[c].nTh.style.width=b[c].sWidth}b=a.oScroll;(""!==b.sY||""!==b.sX)&&Y(a);u(a,null,"column-sizing",[a])}function ka(a,b){var c=Z(a,"bVisible");return"number"===typeof c[b]?c[b]:null}function $(a,b){var c=Z(a,"bVisible"),c=g.inArray(b,c);return-1!==c?c:null}function aa(a){return Z(a,"bVisible").length}function Z(a,b){var c=[];g.map(a.aoColumns,function(a,d){a[b]&&c.push(d)});return c}function Ga(a){var b=a.aoColumns,c=a.aoData,e=p.ext.type.detect,d,f,h,i,j,g,m,o,k;d=0;for(f=b.length;d<f;d++)if(m=
-b[d],k=[],!m.sType&&m._sManualType)m.sType=m._sManualType;else if(!m.sType){h=0;for(i=e.length;h<i;h++){j=0;for(g=c.length;j<g;j++){k[j]===l&&(k[j]=v(a,j,d,"type"));o=e[h](k[j],a);if(!o&&h!==e.length-1)break;if("html"===o)break}if(o){m.sType=o;break}}m.sType||(m.sType="string")}}function hb(a,b,c,e){var d,f,h,i,j,n,m=a.aoColumns;if(b)for(d=b.length-1;0<=d;d--){n=b[d];var o=n.targets!==l?n.targets:n.aTargets;g.isArray(o)||(o=[o]);f=0;for(h=o.length;f<h;f++)if("number"===typeof o[f]&&0<=o[f]){for(;m.length<=
-o[f];)Ea(a);e(o[f],n)}else if("number"===typeof o[f]&&0>o[f])e(m.length+o[f],n);else if("string"===typeof o[f]){i=0;for(j=m.length;i<j;i++)("_all"==o[f]||g(m[i].nTh).hasClass(o[f]))&&e(i,n)}}if(c){d=0;for(a=c.length;d<a;d++)e(d,c[d])}}function I(a,b,c,e){var d=a.aoData.length,f=g.extend(!0,{},p.models.oRow,{src:c?"dom":"data"});f._aData=b;a.aoData.push(f);for(var b=a.aoColumns,f=0,h=b.length;f<h;f++)c&&Ha(a,d,f,v(a,d,f)),b[f].sType=null;a.aiDisplayMaster.push(d);(c||!a.oFeatures.bDeferRender)&&Ia(a,
-d,c,e);return d}function la(a,b){var c;b instanceof g||(b=g(b));return b.map(function(b,d){c=ma(a,d);return I(a,c.data,d,c.cells)})}function v(a,b,c,e){var d=a.iDraw,f=a.aoColumns[c],h=a.aoData[b]._aData,i=f.sDefaultContent,c=f.fnGetData(h,e,{settings:a,row:b,col:c});if(c===l)return a.iDrawError!=d&&null===i&&(R(a,0,"Requested unknown parameter "+("function"==typeof f.mData?"{function}":"'"+f.mData+"'")+" for row "+b,4),a.iDrawError=d),i;if((c===h||null===c)&&null!==i)c=i;else if("function"===typeof c)return c.call(h);
-return null===c&&"display"==e?"":c}function Ha(a,b,c,e){a.aoColumns[c].fnSetData(a.aoData[b]._aData,e,{settings:a,row:b,col:c})}function Ja(a){return g.map(a.match(/(\\.|[^\.])+/g),function(a){return a.replace(/\\./g,".")})}function W(a){if(g.isPlainObject(a)){var b={};g.each(a,function(a,c){c&&(b[a]=W(c))});return function(a,c,f,h){var i=b[c]||b._;return i!==l?i(a,c,f,h):a}}if(null===a)return function(a){return a};if("function"===typeof a)return function(b,c,f,h){return a(b,c,f,h)};if("string"===
-typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||-1!==a.indexOf("("))){var c=function(a,b,f){var h,i;if(""!==f){i=Ja(f);for(var j=0,g=i.length;j<g;j++){f=i[j].match(ba);h=i[j].match(S);if(f){i[j]=i[j].replace(ba,"");""!==i[j]&&(a=a[i[j]]);h=[];i.splice(0,j+1);i=i.join(".");j=0;for(g=a.length;j<g;j++)h.push(c(a[j],b,i));a=f[0].substring(1,f[0].length-1);a=""===a?h:h.join(a);break}else if(h){i[j]=i[j].replace(S,"");a=a[i[j]]();continue}if(null===a||a[i[j]]===l)return l;a=a[i[j]]}}return a};return function(b,
-d){return c(b,d,a)}}return function(b){return b[a]}}function Q(a){if(g.isPlainObject(a))return Q(a._);if(null===a)return function(){};if("function"===typeof a)return function(b,e,d){a(b,"set",e,d)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||-1!==a.indexOf("("))){var b=function(a,e,d){var d=Ja(d),f;f=d[d.length-1];for(var h,i,j=0,g=d.length-1;j<g;j++){h=d[j].match(ba);i=d[j].match(S);if(h){d[j]=d[j].replace(ba,"");a[d[j]]=[];f=d.slice();f.splice(0,j+1);h=f.join(".");i=0;for(g=
-e.length;i<g;i++)f={},b(f,e[i],h),a[d[j]].push(f);return}i&&(d[j]=d[j].replace(S,""),a=a[d[j]](e));if(null===a[d[j]]||a[d[j]]===l)a[d[j]]={};a=a[d[j]]}if(f.match(S))a[f.replace(S,"")](e);else a[f.replace(ba,"")]=e};return function(c,e){return b(c,e,a)}}return function(b,e){b[a]=e}}function Ka(a){return C(a.aoData,"_aData")}function na(a){a.aoData.length=0;a.aiDisplayMaster.length=0;a.aiDisplay.length=0}function oa(a,b,c){for(var e=-1,d=0,f=a.length;d<f;d++)a[d]==b?e=d:a[d]>b&&a[d]--; -1!=e&&c===l&&
-a.splice(e,1)}function ca(a,b,c,e){var d=a.aoData[b],f,h=function(c,f){for(;c.childNodes.length;)c.removeChild(c.firstChild);c.innerHTML=v(a,b,f,"display")};if("dom"===c||(!c||"auto"===c)&&"dom"===d.src)d._aData=ma(a,d,e,e===l?l:d._aData).data;else{var i=d.anCells;if(i)if(e!==l)h(i[e],e);else{c=0;for(f=i.length;c<f;c++)h(i[c],c)}}d._aSortData=null;d._aFilterData=null;h=a.aoColumns;if(e!==l)h[e].sType=null;else{c=0;for(f=h.length;c<f;c++)h[c].sType=null;La(d)}}function ma(a,b,c,e){var d=[],f=b.firstChild,
-h,i=0,j,n=a.aoColumns,m=a._rowReadObject,e=e||m?{}:[],o=function(a,b){if("string"===typeof a){var c=a.indexOf("@");-1!==c&&(c=a.substring(c+1),Q(a)(e,b.getAttribute(c)))}},a=function(a){if(c===l||c===i)h=n[i],j=g.trim(a.innerHTML),h&&h._bAttrSrc?(Q(h.mData._)(e,j),o(h.mData.sort,a),o(h.mData.type,a),o(h.mData.filter,a)):m?(h._setter||(h._setter=Q(h.mData)),h._setter(e,j)):e[i]=j;i++};if(f)for(;f;){b=f.nodeName.toUpperCase();if("TD"==b||"TH"==b)a(f),d.push(f);f=f.nextSibling}else{d=b.anCells;f=0;for(b=
-d.length;f<b;f++)a(d[f])}return{data:e,cells:d}}function Ia(a,b,c,e){var d=a.aoData[b],f=d._aData,h=[],i,j,g,m,o;if(null===d.nTr){i=c||P.createElement("tr");d.nTr=i;d.anCells=h;i._DT_RowIndex=b;La(d);m=0;for(o=a.aoColumns.length;m<o;m++){g=a.aoColumns[m];j=c?e[m]:P.createElement(g.sCellType);h.push(j);if(!c||g.mRender||g.mData!==m)j.innerHTML=v(a,b,m,"display");g.sClass&&(j.className+=" "+g.sClass);g.bVisible&&!c?i.appendChild(j):!g.bVisible&&c&&j.parentNode.removeChild(j);g.fnCreatedCell&&g.fnCreatedCell.call(a.oInstance,
-j,v(a,b,m),f,b,m)}u(a,"aoRowCreatedCallback",null,[i,f,b])}d.nTr.setAttribute("role","row")}function La(a){var b=a.nTr,c=a._aData;if(b){c.DT_RowId&&(b.id=c.DT_RowId);if(c.DT_RowClass){var e=c.DT_RowClass.split(" ");a.__rowc=a.__rowc?Ma(a.__rowc.concat(e)):e;g(b).removeClass(a.__rowc.join(" ")).addClass(c.DT_RowClass)}c.DT_RowData&&g(b).data(c.DT_RowData)}}function ib(a){var b,c,e,d,f,h=a.nTHead,i=a.nTFoot,j=0===g("th, td",h).length,n=a.oClasses,m=a.aoColumns;j&&(d=g("<tr/>").appendTo(h));b=0;for(c=
-m.length;b<c;b++)f=m[b],e=g(f.nTh).addClass(f.sClass),j&&e.appendTo(d),a.oFeatures.bSort&&(e.addClass(f.sSortingClass),!1!==f.bSortable&&(e.attr("tabindex",a.iTabIndex).attr("aria-controls",a.sTableId),Na(a,f.nTh,b))),f.sTitle!=e.html()&&e.html(f.sTitle),Oa(a,"header")(a,e,f,n);j&&da(a.aoHeader,h);g(h).find(">tr").attr("role","row");g(h).find(">tr>th, >tr>td").addClass(n.sHeaderTH);g(i).find(">tr>th, >tr>td").addClass(n.sFooterTH);if(null!==i){a=a.aoFooter[0];b=0;for(c=a.length;b<c;b++)f=m[b],f.nTf=
-a[b].cell,f.sClass&&g(f.nTf).addClass(f.sClass)}}function ea(a,b,c){var e,d,f,h=[],i=[],j=a.aoColumns.length,n;if(b){c===l&&(c=!1);e=0;for(d=b.length;e<d;e++){h[e]=b[e].slice();h[e].nTr=b[e].nTr;for(f=j-1;0<=f;f--)!a.aoColumns[f].bVisible&&!c&&h[e].splice(f,1);i.push([])}e=0;for(d=h.length;e<d;e++){if(a=h[e].nTr)for(;f=a.firstChild;)a.removeChild(f);f=0;for(b=h[e].length;f<b;f++)if(n=j=1,i[e][f]===l){a.appendChild(h[e][f].cell);for(i[e][f]=1;h[e+j]!==l&&h[e][f].cell==h[e+j][f].cell;)i[e+j][f]=1,j++;
-for(;h[e][f+n]!==l&&h[e][f].cell==h[e][f+n].cell;){for(c=0;c<j;c++)i[e+c][f+n]=1;n++}g(h[e][f].cell).attr("rowspan",j).attr("colspan",n)}}}}function L(a){var b=u(a,"aoPreDrawCallback","preDraw",[a]);if(-1!==g.inArray(!1,b))B(a,!1);else{var b=[],c=0,e=a.asStripeClasses,d=e.length,f=a.oLanguage,h=a.iInitDisplayStart,i="ssp"==A(a),j=a.aiDisplay;a.bDrawing=!0;h!==l&&-1!==h&&(a._iDisplayStart=i?h:h>=a.fnRecordsDisplay()?0:h,a.iInitDisplayStart=-1);var h=a._iDisplayStart,n=a.fnDisplayEnd();if(a.bDeferLoading)a.bDeferLoading=
-!1,a.iDraw++,B(a,!1);else if(i){if(!a.bDestroying&&!jb(a))return}else a.iDraw++;if(0!==j.length){f=i?a.aoData.length:n;for(i=i?0:h;i<f;i++){var m=j[i],o=a.aoData[m];null===o.nTr&&Ia(a,m);m=o.nTr;if(0!==d){var k=e[c%d];o._sRowStripe!=k&&(g(m).removeClass(o._sRowStripe).addClass(k),o._sRowStripe=k)}u(a,"aoRowCallback",null,[m,o._aData,c,i]);b.push(m);c++}}else c=f.sZeroRecords,1==a.iDraw&&"ajax"==A(a)?c=f.sLoadingRecords:f.sEmptyTable&&0===a.fnRecordsTotal()&&(c=f.sEmptyTable),b[0]=g("<tr/>",{"class":d?
-e[0]:""}).append(g("<td />",{valign:"top",colSpan:aa(a),"class":a.oClasses.sRowEmpty}).html(c))[0];u(a,"aoHeaderCallback","header",[g(a.nTHead).children("tr")[0],Ka(a),h,n,j]);u(a,"aoFooterCallback","footer",[g(a.nTFoot).children("tr")[0],Ka(a),h,n,j]);e=g(a.nTBody);e.children().detach();e.append(g(b));u(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function M(a,b){var c=a.oFeatures,e=c.bFilter;c.bSort&&kb(a);e?fa(a,a.oPreviousSearch):a.aiDisplay=a.aiDisplayMaster.slice();
-!0!==b&&(a._iDisplayStart=0);a._drawHold=b;L(a);a._drawHold=!1}function lb(a){var b=a.oClasses,c=g(a.nTable),c=g("<div/>").insertBefore(c),e=a.oFeatures,d=g("<div/>",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=d[0];a.nTableReinsertBefore=a.nTable.nextSibling;for(var f=a.sDom.split(""),h,i,j,n,m,o,k=0;k<f.length;k++){h=null;i=f[k];if("<"==i){j=g("<div/>")[0];n=f[k+1];if("'"==n||'"'==n){m="";for(o=2;f[k+o]!=n;)m+=f[k+o],o++;"H"==m?m=b.sJUIHeader:
-"F"==m&&(m=b.sJUIFooter);-1!=m.indexOf(".")?(n=m.split("."),j.id=n[0].substr(1,n[0].length-1),j.className=n[1]):"#"==m.charAt(0)?j.id=m.substr(1,m.length-1):j.className=m;k+=o}d.append(j);d=g(j)}else if(">"==i)d=d.parent();else if("l"==i&&e.bPaginate&&e.bLengthChange)h=mb(a);else if("f"==i&&e.bFilter)h=nb(a);else if("r"==i&&e.bProcessing)h=ob(a);else if("t"==i)h=pb(a);else if("i"==i&&e.bInfo)h=qb(a);else if("p"==i&&e.bPaginate)h=rb(a);else if(0!==p.ext.feature.length){j=p.ext.feature;o=0;for(n=j.length;o<
-n;o++)if(i==j[o].cFeature){h=j[o].fnInit(a);break}}h&&(j=a.aanFeatures,j[i]||(j[i]=[]),j[i].push(h),d.append(h))}c.replaceWith(d)}function da(a,b){var c=g(b).children("tr"),e,d,f,h,i,j,n,m,o,k;a.splice(0,a.length);f=0;for(j=c.length;f<j;f++)a.push([]);f=0;for(j=c.length;f<j;f++){e=c[f];for(d=e.firstChild;d;){if("TD"==d.nodeName.toUpperCase()||"TH"==d.nodeName.toUpperCase()){m=1*d.getAttribute("colspan");o=1*d.getAttribute("rowspan");m=!m||0===m||1===m?1:m;o=!o||0===o||1===o?1:o;h=0;for(i=a[f];i[h];)h++;
-n=h;k=1===m?!0:!1;for(i=0;i<m;i++)for(h=0;h<o;h++)a[f+h][n+i]={cell:d,unique:k},a[f+h].nTr=e}d=d.nextSibling}}}function pa(a,b,c){var e=[];c||(c=a.aoHeader,b&&(c=[],da(c,b)));for(var b=0,d=c.length;b<d;b++)for(var f=0,h=c[b].length;f<h;f++)if(c[b][f].unique&&(!e[f]||!a.bSortCellsTop))e[f]=c[b][f].cell;return e}function qa(a,b,c){u(a,"aoServerParams","serverParams",[b]);if(b&&g.isArray(b)){var e={},d=/(.*?)\[\]$/;g.each(b,function(a,b){var c=b.name.match(d);c?(c=c[0],e[c]||(e[c]=[]),e[c].push(b.value)):
-e[b.name]=b.value});b=e}var f,h=a.ajax,i=a.oInstance;if(g.isPlainObject(h)&&h.data){f=h.data;var j=g.isFunction(f)?f(b):f,b=g.isFunction(f)&&j?j:g.extend(!0,b,j);delete h.data}j={data:b,success:function(b){var f=b.error||b.sError;f&&a.oApi._fnLog(a,0,f);a.json=b;u(a,null,"xhr",[a,b]);c(b)},dataType:"json",cache:!1,type:a.sServerMethod,error:function(b,c){var f=a.oApi._fnLog;"parsererror"==c?f(a,0,"Invalid JSON response",1):4===b.readyState&&f(a,0,"Ajax error",7);B(a,!1)}};a.oAjaxData=b;u(a,null,"preXhr",
-[a,b]);a.fnServerData?a.fnServerData.call(i,a.sAjaxSource,g.map(b,function(a,b){return{name:b,value:a}}),c,a):a.sAjaxSource||"string"===typeof h?a.jqXHR=g.ajax(g.extend(j,{url:h||a.sAjaxSource})):g.isFunction(h)?a.jqXHR=h.call(i,b,c,a):(a.jqXHR=g.ajax(g.extend(j,h)),h.data=f)}function jb(a){return a.bAjaxDataGet?(a.iDraw++,B(a,!0),qa(a,sb(a),function(b){tb(a,b)}),!1):!0}function sb(a){var b=a.aoColumns,c=b.length,e=a.oFeatures,d=a.oPreviousSearch,f=a.aoPreSearchCols,h,i=[],j,n,m,o=T(a);h=a._iDisplayStart;
-j=!1!==e.bPaginate?a._iDisplayLength:-1;var k=function(a,b){i.push({name:a,value:b})};k("sEcho",a.iDraw);k("iColumns",c);k("sColumns",C(b,"sName").join(","));k("iDisplayStart",h);k("iDisplayLength",j);var l={draw:a.iDraw,columns:[],order:[],start:h,length:j,search:{value:d.sSearch,regex:d.bRegex}};for(h=0;h<c;h++)n=b[h],m=f[h],j="function"==typeof n.mData?"function":n.mData,l.columns.push({data:j,name:n.sName,searchable:n.bSearchable,orderable:n.bSortable,search:{value:m.sSearch,regex:m.bRegex}}),
-k("mDataProp_"+h,j),e.bFilter&&(k("sSearch_"+h,m.sSearch),k("bRegex_"+h,m.bRegex),k("bSearchable_"+h,n.bSearchable)),e.bSort&&k("bSortable_"+h,n.bSortable);e.bFilter&&(k("sSearch",d.sSearch),k("bRegex",d.bRegex));e.bSort&&(g.each(o,function(a,b){l.order.push({column:b.col,dir:b.dir});k("iSortCol_"+a,b.col);k("sSortDir_"+a,b.dir)}),k("iSortingCols",o.length));b=p.ext.legacy.ajax;return null===b?a.sAjaxSource?i:l:b?i:l}function tb(a,b){var c=b.sEcho!==l?b.sEcho:b.draw,e=b.iTotalRecords!==l?b.iTotalRecords:
-b.recordsTotal,d=b.iTotalDisplayRecords!==l?b.iTotalDisplayRecords:b.recordsFiltered;if(c){if(1*c<a.iDraw)return;a.iDraw=1*c}na(a);a._iRecordsTotal=parseInt(e,10);a._iRecordsDisplay=parseInt(d,10);c=ra(a,b);e=0;for(d=c.length;e<d;e++)I(a,c[e]);a.aiDisplay=a.aiDisplayMaster.slice();a.bAjaxDataGet=!1;L(a);a._bInitComplete||sa(a,b);a.bAjaxDataGet=!0;B(a,!1)}function ra(a,b){var c=g.isPlainObject(a.ajax)&&a.ajax.dataSrc!==l?a.ajax.dataSrc:a.sAjaxDataProp;return"data"===c?b.aaData||b[c]:""!==c?W(c)(b):
-b}function nb(a){var b=a.oClasses,c=a.sTableId,e=a.oLanguage,d=a.oPreviousSearch,f=a.aanFeatures,h='<input type="search" class="'+b.sFilterInput+'"/>',i=e.sSearch,i=i.match(/_INPUT_/)?i.replace("_INPUT_",h):i+h,b=g("<div/>",{id:!f.f?c+"_filter":null,"class":b.sFilter}).append(g("<label/>").append(i)),f=function(){var b=!this.value?"":this.value;b!=d.sSearch&&(fa(a,{sSearch:b,bRegex:d.bRegex,bSmart:d.bSmart,bCaseInsensitive:d.bCaseInsensitive}),a._iDisplayStart=0,L(a))},h=null!==a.searchDelay?a.searchDelay:
-"ssp"===A(a)?400:0,j=g("input",b).val(d.sSearch).attr("placeholder",e.sSearchPlaceholder).bind("keyup.DT search.DT input.DT paste.DT cut.DT",h?ta(f,h):f).bind("keypress.DT",function(a){if(13==a.keyCode)return!1}).attr("aria-controls",c);g(a.nTable).on("search.dt.DT",function(b,c){if(a===c)try{j[0]!==P.activeElement&&j.val(d.sSearch)}catch(f){}});return b[0]}function fa(a,b,c){var e=a.oPreviousSearch,d=a.aoPreSearchCols,f=function(a){e.sSearch=a.sSearch;e.bRegex=a.bRegex;e.bSmart=a.bSmart;e.bCaseInsensitive=
-a.bCaseInsensitive};Ga(a);if("ssp"!=A(a)){ub(a,b.sSearch,c,b.bEscapeRegex!==l?!b.bEscapeRegex:b.bRegex,b.bSmart,b.bCaseInsensitive);f(b);for(b=0;b<d.length;b++)vb(a,d[b].sSearch,b,d[b].bEscapeRegex!==l?!d[b].bEscapeRegex:d[b].bRegex,d[b].bSmart,d[b].bCaseInsensitive);wb(a)}else f(b);a.bFiltered=!0;u(a,null,"search",[a])}function wb(a){for(var b=p.ext.search,c=a.aiDisplay,e,d,f=0,h=b.length;f<h;f++){for(var i=[],j=0,g=c.length;j<g;j++)d=c[j],e=a.aoData[d],b[f](a,e._aFilterData,d,e._aData,j)&&i.push(d);
-c.length=0;c.push.apply(c,i)}}function vb(a,b,c,e,d,f){if(""!==b)for(var h=a.aiDisplay,e=Pa(b,e,d,f),d=h.length-1;0<=d;d--)b=a.aoData[h[d]]._aFilterData[c],e.test(b)||h.splice(d,1)}function ub(a,b,c,e,d,f){var e=Pa(b,e,d,f),d=a.oPreviousSearch.sSearch,f=a.aiDisplayMaster,h;0!==p.ext.search.length&&(c=!0);h=xb(a);if(0>=b.length)a.aiDisplay=f.slice();else{if(h||c||d.length>b.length||0!==b.indexOf(d)||a.bSorted)a.aiDisplay=f.slice();b=a.aiDisplay;for(c=b.length-1;0<=c;c--)e.test(a.aoData[b[c]]._sFilterRow)||
-b.splice(c,1)}}function Pa(a,b,c,e){a=b?a:ua(a);c&&(a="^(?=.*?"+g.map(a.match(/"[^"]+"|[^ ]+/g)||"",function(a){if('"'===a.charAt(0))var b=a.match(/^"(.*)"$/),a=b?b[1]:a;return a.replace('"',"")}).join(")(?=.*?")+").*$");return RegExp(a,e?"i":"")}function ua(a){return a.replace(Xb,"\\$1")}function xb(a){var b=a.aoColumns,c,e,d,f,h,i,g,n,m=p.ext.type.search;c=!1;e=0;for(f=a.aoData.length;e<f;e++)if(n=a.aoData[e],!n._aFilterData){i=[];d=0;for(h=b.length;d<h;d++)c=b[d],c.bSearchable?(g=v(a,e,d,"filter"),
-m[c.sType]&&(g=m[c.sType](g)),null===g&&(g=""),"string"!==typeof g&&g.toString&&(g=g.toString())):g="",g.indexOf&&-1!==g.indexOf("&")&&(va.innerHTML=g,g=Yb?va.textContent:va.innerText),g.replace&&(g=g.replace(/[\r\n]/g,"")),i.push(g);n._aFilterData=i;n._sFilterRow=i.join("  ");c=!0}return c}function yb(a){return{search:a.sSearch,smart:a.bSmart,regex:a.bRegex,caseInsensitive:a.bCaseInsensitive}}function zb(a){return{sSearch:a.search,bSmart:a.smart,bRegex:a.regex,bCaseInsensitive:a.caseInsensitive}}
-function qb(a){var b=a.sTableId,c=a.aanFeatures.i,e=g("<div/>",{"class":a.oClasses.sInfo,id:!c?b+"_info":null});c||(a.aoDrawCallback.push({fn:Ab,sName:"information"}),e.attr("role","status").attr("aria-live","polite"),g(a.nTable).attr("aria-describedby",b+"_info"));return e[0]}function Ab(a){var b=a.aanFeatures.i;if(0!==b.length){var c=a.oLanguage,e=a._iDisplayStart+1,d=a.fnDisplayEnd(),f=a.fnRecordsTotal(),h=a.fnRecordsDisplay(),i=h?c.sInfo:c.sInfoEmpty;h!==f&&(i+=" "+c.sInfoFiltered);i+=c.sInfoPostFix;
-i=Bb(a,i);c=c.fnInfoCallback;null!==c&&(i=c.call(a.oInstance,a,e,d,f,h,i));g(b).html(i)}}function Bb(a,b){var c=a.fnFormatNumber,e=a._iDisplayStart+1,d=a._iDisplayLength,f=a.fnRecordsDisplay(),h=-1===d;return b.replace(/_START_/g,c.call(a,e)).replace(/_END_/g,c.call(a,a.fnDisplayEnd())).replace(/_MAX_/g,c.call(a,a.fnRecordsTotal())).replace(/_TOTAL_/g,c.call(a,f)).replace(/_PAGE_/g,c.call(a,h?1:Math.ceil(e/d))).replace(/_PAGES_/g,c.call(a,h?1:Math.ceil(f/d)))}function ga(a){var b,c,e=a.iInitDisplayStart,
-d=a.aoColumns,f;c=a.oFeatures;if(a.bInitialised){lb(a);ib(a);ea(a,a.aoHeader);ea(a,a.aoFooter);B(a,!0);c.bAutoWidth&&Fa(a);b=0;for(c=d.length;b<c;b++)f=d[b],f.sWidth&&(f.nTh.style.width=s(f.sWidth));M(a);d=A(a);"ssp"!=d&&("ajax"==d?qa(a,[],function(c){var f=ra(a,c);for(b=0;b<f.length;b++)I(a,f[b]);a.iInitDisplayStart=e;M(a);B(a,!1);sa(a,c)},a):(B(a,!1),sa(a)))}else setTimeout(function(){ga(a)},200)}function sa(a,b){a._bInitComplete=!0;b&&X(a);u(a,"aoInitComplete","init",[a,b])}function Qa(a,b){var c=
-parseInt(b,10);a._iDisplayLength=c;Ra(a);u(a,null,"length",[a,c])}function mb(a){for(var b=a.oClasses,c=a.sTableId,e=a.aLengthMenu,d=g.isArray(e[0]),f=d?e[0]:e,e=d?e[1]:e,d=g("<select/>",{name:c+"_length","aria-controls":c,"class":b.sLengthSelect}),h=0,i=f.length;h<i;h++)d[0][h]=new Option(e[h],f[h]);var j=g("<div><label/></div>").addClass(b.sLength);a.aanFeatures.l||(j[0].id=c+"_length");j.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",d[0].outerHTML));g("select",j).val(a._iDisplayLength).bind("change.DT",
-function(){Qa(a,g(this).val());L(a)});g(a.nTable).bind("length.dt.DT",function(b,c,f){a===c&&g("select",j).val(f)});return j[0]}function rb(a){var b=a.sPaginationType,c=p.ext.pager[b],e="function"===typeof c,d=function(a){L(a)},b=g("<div/>").addClass(a.oClasses.sPaging+b)[0],f=a.aanFeatures;e||c.fnInit(a,b,d);f.p||(b.id=a.sTableId+"_paginate",a.aoDrawCallback.push({fn:function(a){if(e){var b=a._iDisplayStart,g=a._iDisplayLength,n=a.fnRecordsDisplay(),m=-1===g,b=m?0:Math.ceil(b/g),g=m?1:Math.ceil(n/
-g),n=c(b,g),o,m=0;for(o=f.p.length;m<o;m++)Oa(a,"pageButton")(a,f.p[m],m,n,b,g)}else c.fnUpdate(a,d)},sName:"pagination"}));return b}function Sa(a,b,c){var e=a._iDisplayStart,d=a._iDisplayLength,f=a.fnRecordsDisplay();0===f||-1===d?e=0:"number"===typeof b?(e=b*d,e>f&&(e=0)):"first"==b?e=0:"previous"==b?(e=0<=d?e-d:0,0>e&&(e=0)):"next"==b?e+d<f&&(e+=d):"last"==b?e=Math.floor((f-1)/d)*d:R(a,0,"Unknown paging action: "+b,5);b=a._iDisplayStart!==e;a._iDisplayStart=e;b&&(u(a,null,"page",[a]),c&&L(a));
-return b}function ob(a){return g("<div/>",{id:!a.aanFeatures.r?a.sTableId+"_processing":null,"class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).insertBefore(a.nTable)[0]}function B(a,b){a.oFeatures.bProcessing&&g(a.aanFeatures.r).css("display",b?"block":"none");u(a,null,"processing",[a,b])}function pb(a){var b=g(a.nTable);b.attr("role","grid");var c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var e=c.sX,d=c.sY,f=a.oClasses,h=b.children("caption"),i=h.length?h[0]._captionSide:null,
-j=g(b[0].cloneNode(!1)),n=g(b[0].cloneNode(!1)),m=b.children("tfoot");c.sX&&"100%"===b.attr("width")&&b.removeAttr("width");m.length||(m=null);c=g("<div/>",{"class":f.sScrollWrapper}).append(g("<div/>",{"class":f.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:e?!e?null:s(e):"100%"}).append(g("<div/>",{"class":f.sScrollHeadInner}).css({"box-sizing":"content-box",width:c.sXInner||"100%"}).append(j.removeAttr("id").css("margin-left",0).append("top"===i?h:null).append(b.children("thead"))))).append(g("<div/>",
-{"class":f.sScrollBody}).css({overflow:"auto",height:!d?null:s(d),width:!e?null:s(e)}).append(b));m&&c.append(g("<div/>",{"class":f.sScrollFoot}).css({overflow:"hidden",border:0,width:e?!e?null:s(e):"100%"}).append(g("<div/>",{"class":f.sScrollFootInner}).append(n.removeAttr("id").css("margin-left",0).append("bottom"===i?h:null).append(b.children("tfoot")))));var b=c.children(),o=b[0],f=b[1],k=m?b[2]:null;e&&g(f).scroll(function(){var a=this.scrollLeft;o.scrollLeft=a;m&&(k.scrollLeft=a)});a.nScrollHead=
-o;a.nScrollBody=f;a.nScrollFoot=k;a.aoDrawCallback.push({fn:Y,sName:"scrolling"});return c[0]}function Y(a){var b=a.oScroll,c=b.sX,e=b.sXInner,d=b.sY,f=b.iBarWidth,h=g(a.nScrollHead),i=h[0].style,j=h.children("div"),n=j[0].style,m=j.children("table"),j=a.nScrollBody,o=g(j),k=j.style,l=g(a.nScrollFoot).children("div"),p=l.children("table"),r=g(a.nTHead),q=g(a.nTable),t=q[0],N=t.style,J=a.nTFoot?g(a.nTFoot):null,u=a.oBrowser,w=u.bScrollOversize,y,v,x,K,z,A=[],B=[],C=[],D,E=function(a){a=a.style;a.paddingTop=
-"0";a.paddingBottom="0";a.borderTopWidth="0";a.borderBottomWidth="0";a.height=0};q.children("thead, tfoot").remove();z=r.clone().prependTo(q);y=r.find("tr");x=z.find("tr");z.find("th, td").removeAttr("tabindex");J&&(K=J.clone().prependTo(q),v=J.find("tr"),K=K.find("tr"));c||(k.width="100%",h[0].style.width="100%");g.each(pa(a,z),function(b,c){D=ka(a,b);c.style.width=a.aoColumns[D].sWidth});J&&F(function(a){a.style.width=""},K);b.bCollapse&&""!==d&&(k.height=o[0].offsetHeight+r[0].offsetHeight+"px");
-h=q.outerWidth();if(""===c){if(N.width="100%",w&&(q.find("tbody").height()>j.offsetHeight||"scroll"==o.css("overflow-y")))N.width=s(q.outerWidth()-f)}else""!==e?N.width=s(e):h==o.width()&&o.height()<q.height()?(N.width=s(h-f),q.outerWidth()>h-f&&(N.width=s(h))):N.width=s(h);h=q.outerWidth();F(E,x);F(function(a){C.push(a.innerHTML);A.push(s(g(a).css("width")))},x);F(function(a,b){a.style.width=A[b]},y);g(x).height(0);J&&(F(E,K),F(function(a){B.push(s(g(a).css("width")))},K),F(function(a,b){a.style.width=
-B[b]},v),g(K).height(0));F(function(a,b){a.innerHTML='<div class="dataTables_sizing" style="height:0;overflow:hidden;">'+C[b]+"</div>";a.style.width=A[b]},x);J&&F(function(a,b){a.innerHTML="";a.style.width=B[b]},K);if(q.outerWidth()<h){v=j.scrollHeight>j.offsetHeight||"scroll"==o.css("overflow-y")?h+f:h;if(w&&(j.scrollHeight>j.offsetHeight||"scroll"==o.css("overflow-y")))N.width=s(v-f);(""===c||""!==e)&&R(a,1,"Possible column misalignment",6)}else v="100%";k.width=s(v);i.width=s(v);J&&(a.nScrollFoot.style.width=
-s(v));!d&&w&&(k.height=s(t.offsetHeight+f));d&&b.bCollapse&&(k.height=s(d),b=c&&t.offsetWidth>j.offsetWidth?f:0,t.offsetHeight<j.offsetHeight&&(k.height=s(t.offsetHeight+b)));b=q.outerWidth();m[0].style.width=s(b);n.width=s(b);m=q.height()>j.clientHeight||"scroll"==o.css("overflow-y");u="padding"+(u.bScrollbarLeft?"Left":"Right");n[u]=m?f+"px":"0px";J&&(p[0].style.width=s(b),l[0].style.width=s(b),l[0].style[u]=m?f+"px":"0px");o.scroll();if((a.bSorted||a.bFiltered)&&!a._drawHold)j.scrollTop=0}function F(a,
-b,c){for(var e=0,d=0,f=b.length,h,g;d<f;){h=b[d].firstChild;for(g=c?c[d].firstChild:null;h;)1===h.nodeType&&(c?a(h,g,e):a(h,e),e++),h=h.nextSibling,g=c?g.nextSibling:null;d++}}function Fa(a){var b=a.nTable,c=a.aoColumns,e=a.oScroll,d=e.sY,f=e.sX,h=e.sXInner,i=c.length,e=Z(a,"bVisible"),j=g("th",a.nTHead),n=b.getAttribute("width"),m=b.parentNode,o=!1,k,l;for(k=0;k<e.length;k++)l=c[e[k]],null!==l.sWidth&&(l.sWidth=Cb(l.sWidthOrig,m),o=!0);if(!o&&!f&&!d&&i==aa(a)&&i==j.length)for(k=0;k<i;k++)c[k].sWidth=
-s(j.eq(k).width());else{i=g(b).clone().empty().css("visibility","hidden").removeAttr("id").append(g(a.nTHead).clone(!1)).append(g(a.nTFoot).clone(!1)).append(g("<tbody><tr/></tbody>"));i.find("tfoot th, tfoot td").css("width","");var p=i.find("tbody tr"),j=pa(a,i.find("thead")[0]);for(k=0;k<e.length;k++)l=c[e[k]],j[k].style.width=null!==l.sWidthOrig&&""!==l.sWidthOrig?s(l.sWidthOrig):"";if(a.aoData.length)for(k=0;k<e.length;k++)o=e[k],l=c[o],g(Db(a,o)).clone(!1).append(l.sContentPadding).appendTo(p);
-i.appendTo(m);f&&h?i.width(h):f?(i.css("width","auto"),i.width()<m.offsetWidth&&i.width(m.offsetWidth)):d?i.width(m.offsetWidth):n&&i.width(n);Eb(a,i[0]);if(f){for(k=h=0;k<e.length;k++)l=c[e[k]],d=g(j[k]).outerWidth(),h+=null===l.sWidthOrig?d:parseInt(l.sWidth,10)+d-g(j[k]).width();i.width(s(h));b.style.width=s(h)}for(k=0;k<e.length;k++)if(l=c[e[k]],d=g(j[k]).width())l.sWidth=s(d);b.style.width=s(i.css("width"));i.remove()}n&&(b.style.width=s(n));if((n||f)&&!a._reszEvt)g(Da).bind("resize.DT-"+a.sInstance,
-ta(function(){X(a)})),a._reszEvt=!0}function ta(a,b){var c=b!==l?b:200,e,d;return function(){var b=this,h=+new Date,g=arguments;e&&h<e+c?(clearTimeout(d),d=setTimeout(function(){e=l;a.apply(b,g)},c)):e?(e=h,a.apply(b,g)):e=h}}function Cb(a,b){if(!a)return 0;var c=g("<div/>").css("width",s(a)).appendTo(b||P.body),e=c[0].offsetWidth;c.remove();return e}function Eb(a,b){var c=a.oScroll;if(c.sX||c.sY)c=!c.sX?c.iBarWidth:0,b.style.width=s(g(b).outerWidth()-c)}function Db(a,b){var c=Fb(a,b);if(0>c)return null;
-var e=a.aoData[c];return!e.nTr?g("<td/>").html(v(a,c,b,"display"))[0]:e.anCells[b]}function Fb(a,b){for(var c,e=-1,d=-1,f=0,h=a.aoData.length;f<h;f++)c=v(a,f,b,"display")+"",c=c.replace(Zb,""),c.length>e&&(e=c.length,d=f);return d}function s(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function Gb(){if(!p.__scrollbarWidth){var a=g("<p/>").css({width:"100%",height:200,padding:0})[0],b=g("<div/>").css({position:"absolute",top:0,left:0,width:200,height:150,padding:0,
-overflow:"hidden",visibility:"hidden"}).append(a).appendTo("body"),c=a.offsetWidth;b.css("overflow","scroll");a=a.offsetWidth;c===a&&(a=b[0].clientWidth);b.remove();p.__scrollbarWidth=c-a}return p.__scrollbarWidth}function T(a){var b,c,e=[],d=a.aoColumns,f,h,i,j;b=a.aaSortingFixed;c=g.isPlainObject(b);var n=[];f=function(a){a.length&&!g.isArray(a[0])?n.push(a):n.push.apply(n,a)};g.isArray(b)&&f(b);c&&b.pre&&f(b.pre);f(a.aaSorting);c&&b.post&&f(b.post);for(a=0;a<n.length;a++){j=n[a][0];f=d[j].aDataSort;
-b=0;for(c=f.length;b<c;b++)h=f[b],i=d[h].sType||"string",n[a]._idx===l&&(n[a]._idx=g.inArray(n[a][1],d[h].asSorting)),e.push({src:j,col:h,dir:n[a][1],index:n[a]._idx,type:i,formatter:p.ext.type.order[i+"-pre"]})}return e}function kb(a){var b,c,e=[],d=p.ext.type.order,f=a.aoData,h=0,g,j=a.aiDisplayMaster,n;Ga(a);n=T(a);b=0;for(c=n.length;b<c;b++)g=n[b],g.formatter&&h++,Hb(a,g.col);if("ssp"!=A(a)&&0!==n.length){b=0;for(c=j.length;b<c;b++)e[j[b]]=b;h===n.length?j.sort(function(a,b){var c,d,h,g,i=n.length,
-j=f[a]._aSortData,l=f[b]._aSortData;for(h=0;h<i;h++)if(g=n[h],c=j[g.col],d=l[g.col],c=c<d?-1:c>d?1:0,0!==c)return"asc"===g.dir?c:-c;c=e[a];d=e[b];return c<d?-1:c>d?1:0}):j.sort(function(a,b){var c,h,g,i,j=n.length,l=f[a]._aSortData,p=f[b]._aSortData;for(g=0;g<j;g++)if(i=n[g],c=l[i.col],h=p[i.col],i=d[i.type+"-"+i.dir]||d["string-"+i.dir],c=i(c,h),0!==c)return c;c=e[a];h=e[b];return c<h?-1:c>h?1:0})}a.bSorted=!0}function Ib(a){for(var b,c,e=a.aoColumns,d=T(a),a=a.oLanguage.oAria,f=0,h=e.length;f<h;f++){c=
-e[f];var g=c.asSorting;b=c.sTitle.replace(/<.*?>/g,"");var j=c.nTh;j.removeAttribute("aria-sort");c.bSortable&&(0<d.length&&d[0].col==f?(j.setAttribute("aria-sort","asc"==d[0].dir?"ascending":"descending"),c=g[d[0].index+1]||g[0]):c=g[0],b+="asc"===c?a.sSortAscending:a.sSortDescending);j.setAttribute("aria-label",b)}}function Ta(a,b,c,e){var d=a.aaSorting,f=a.aoColumns[b].asSorting,h=function(a,b){var c=a._idx;c===l&&(c=g.inArray(a[1],f));return c+1<f.length?c+1:b?null:0};"number"===typeof d[0]&&
-(d=a.aaSorting=[d]);c&&a.oFeatures.bSortMulti?(c=g.inArray(b,C(d,"0")),-1!==c?(b=h(d[c],!0),null===b?d.splice(c,1):(d[c][1]=f[b],d[c]._idx=b)):(d.push([b,f[0],0]),d[d.length-1]._idx=0)):d.length&&d[0][0]==b?(b=h(d[0]),d.length=1,d[0][1]=f[b],d[0]._idx=b):(d.length=0,d.push([b,f[0]]),d[0]._idx=0);M(a);"function"==typeof e&&e(a)}function Na(a,b,c,e){var d=a.aoColumns[c];Ua(b,{},function(b){!1!==d.bSortable&&(a.oFeatures.bProcessing?(B(a,!0),setTimeout(function(){Ta(a,c,b.shiftKey,e);"ssp"!==A(a)&&B(a,
-!1)},0)):Ta(a,c,b.shiftKey,e))})}function wa(a){var b=a.aLastSort,c=a.oClasses.sSortColumn,e=T(a),d=a.oFeatures,f,h;if(d.bSort&&d.bSortClasses){d=0;for(f=b.length;d<f;d++)h=b[d].src,g(C(a.aoData,"anCells",h)).removeClass(c+(2>d?d+1:3));d=0;for(f=e.length;d<f;d++)h=e[d].src,g(C(a.aoData,"anCells",h)).addClass(c+(2>d?d+1:3))}a.aLastSort=e}function Hb(a,b){var c=a.aoColumns[b],e=p.ext.order[c.sSortDataType],d;e&&(d=e.call(a.oInstance,a,b,$(a,b)));for(var f,h=p.ext.type.order[c.sType+"-pre"],g=0,j=a.aoData.length;g<
-j;g++)if(c=a.aoData[g],c._aSortData||(c._aSortData=[]),!c._aSortData[b]||e)f=e?d[g]:v(a,g,b,"sort"),c._aSortData[b]=h?h(f):f}function xa(a){if(a.oFeatures.bStateSave&&!a.bDestroying){var b={time:+new Date,start:a._iDisplayStart,length:a._iDisplayLength,order:g.extend(!0,[],a.aaSorting),search:yb(a.oPreviousSearch),columns:g.map(a.aoColumns,function(b,e){return{visible:b.bVisible,search:yb(a.aoPreSearchCols[e])}})};u(a,"aoStateSaveParams","stateSaveParams",[a,b]);a.oSavedState=b;a.fnStateSaveCallback.call(a.oInstance,
-a,b)}}function Jb(a){var b,c,e=a.aoColumns;if(a.oFeatures.bStateSave){var d=a.fnStateLoadCallback.call(a.oInstance,a);if(d&&d.time&&(b=u(a,"aoStateLoadParams","stateLoadParams",[a,d]),-1===g.inArray(!1,b)&&(b=a.iStateDuration,!(0<b&&d.time<+new Date-1E3*b)&&e.length===d.columns.length))){a.oLoadedState=g.extend(!0,{},d);a._iDisplayStart=d.start;a.iInitDisplayStart=d.start;a._iDisplayLength=d.length;a.aaSorting=[];g.each(d.order,function(b,c){a.aaSorting.push(c[0]>=e.length?[0,c[1]]:c)});g.extend(a.oPreviousSearch,
-zb(d.search));b=0;for(c=d.columns.length;b<c;b++){var f=d.columns[b];e[b].bVisible=f.visible;g.extend(a.aoPreSearchCols[b],zb(f.search))}u(a,"aoStateLoaded","stateLoaded",[a,d])}}}function ya(a){var b=p.settings,a=g.inArray(a,C(b,"nTable"));return-1!==a?b[a]:null}function R(a,b,c,e){c="DataTables warning: "+(null!==a?"table id="+a.sTableId+" - ":"")+c;e&&(c+=". For more information about this error, please see http://datatables.net/tn/"+e);if(b)Da.console&&console.log&&console.log(c);else if(a=p.ext,
-"alert"==(a.sErrMode||a.errMode))alert(c);else throw Error(c);}function D(a,b,c,e){g.isArray(c)?g.each(c,function(c,f){g.isArray(f)?D(a,b,f[0],f[1]):D(a,b,f)}):(e===l&&(e=c),b[c]!==l&&(a[e]=b[c]))}function Kb(a,b,c){var e,d;for(d in b)b.hasOwnProperty(d)&&(e=b[d],g.isPlainObject(e)?(g.isPlainObject(a[d])||(a[d]={}),g.extend(!0,a[d],e)):a[d]=c&&"data"!==d&&"aaData"!==d&&g.isArray(e)?e.slice():e);return a}function Ua(a,b,c){g(a).bind("click.DT",b,function(b){a.blur();c(b)}).bind("keypress.DT",b,function(a){13===
-a.which&&(a.preventDefault(),c(a))}).bind("selectstart.DT",function(){return!1})}function x(a,b,c,e){c&&a[b].push({fn:c,sName:e})}function u(a,b,c,e){var d=[];b&&(d=g.map(a[b].slice().reverse(),function(b){return b.fn.apply(a.oInstance,e)}));null!==c&&g(a.nTable).trigger(c+".dt",e);return d}function Ra(a){var b=a._iDisplayStart,c=a.fnDisplayEnd(),e=a._iDisplayLength;b>=c&&(b=c-e);b-=b%e;if(-1===e||0>b)b=0;a._iDisplayStart=b}function Oa(a,b){var c=a.renderer,e=p.ext.renderer[b];return g.isPlainObject(c)&&
-c[b]?e[c[b]]||e._:"string"===typeof c?e[c]||e._:e._}function A(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function Va(a,b){var c=[],c=Lb.numbers_length,e=Math.floor(c/2);b<=c?c=U(0,b):a<=e?(c=U(0,c-2),c.push("ellipsis"),c.push(b-1)):(a>=b-1-e?c=U(b-(c-2),b):(c=U(a-1,a+2),c.push("ellipsis"),c.push(b-1)),c.splice(0,0,"ellipsis"),c.splice(0,0,0));c.DT_el="span";return c}function cb(a){g.each({num:function(b){return za(b,a)},"num-fmt":function(b){return za(b,a,Wa)},"html-num":function(b){return za(b,
-a,Aa)},"html-num-fmt":function(b){return za(b,a,Aa,Wa)}},function(b,c){w.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(w.type.search[b+a]=w.type.search.html)})}function Mb(a){return function(){var b=[ya(this[p.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return p.ext.internal[a].apply(this,b)}}var p,w,q,r,t,Xa={},Nb=/[\r\n]/g,Aa=/<.*?>/g,$b=/^[\w\+\-]/,ac=/[\w\+\-]$/,Xb=RegExp("(\\/|\\.|\\*|\\+|\\?|\\||\\(|\\)|\\[|\\]|\\{|\\}|\\\\|\\$|\\^|\\-)","g"),Wa=/[',$\u00a3\u20ac\u00a5%\u2009\u202F]/g,
-H=function(a){return!a||!0===a||"-"===a?!0:!1},Ob=function(a){var b=parseInt(a,10);return!isNaN(b)&&isFinite(a)?b:null},Pb=function(a,b){Xa[b]||(Xa[b]=RegExp(ua(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(Xa[b],"."):a},Ya=function(a,b,c){var e="string"===typeof a;b&&e&&(a=Pb(a,b));c&&e&&(a=a.replace(Wa,""));return H(a)||!isNaN(parseFloat(a))&&isFinite(a)},Qb=function(a,b,c){return H(a)?!0:!(H(a)||"string"===typeof a)?null:Ya(a.replace(Aa,""),b,c)?!0:null},C=function(a,
-b,c){var e=[],d=0,f=a.length;if(c!==l)for(;d<f;d++)a[d]&&a[d][b]&&e.push(a[d][b][c]);else for(;d<f;d++)a[d]&&e.push(a[d][b]);return e},ha=function(a,b,c,e){var d=[],f=0,h=b.length;if(e!==l)for(;f<h;f++)a[b[f]][c]&&d.push(a[b[f]][c][e]);else for(;f<h;f++)d.push(a[b[f]][c]);return d},U=function(a,b){var c=[],e;b===l?(b=0,e=a):(e=b,b=a);for(var d=b;d<e;d++)c.push(d);return c},Rb=function(a){for(var b=[],c=0,e=a.length;c<e;c++)a[c]&&b.push(a[c]);return b},Ma=function(a){var b=[],c,e,d=a.length,f,h=0;
-e=0;a:for(;e<d;e++){c=a[e];for(f=0;f<h;f++)if(b[f]===c)continue a;b.push(c);h++}return b},z=function(a,b,c){a[b]!==l&&(a[c]=a[b])},ba=/\[.*?\]$/,S=/\(\)$/,va=g("<div>")[0],Yb=va.textContent!==l,Zb=/<.*?>/g;p=function(a){this.$=function(a,b){return this.api(!0).$(a,b)};this._=function(a,b){return this.api(!0).rows(a,b).data()};this.api=function(a){return a?new q(ya(this[w.iApiIndex])):new q(this)};this.fnAddData=function(a,b){var c=this.api(!0),e=g.isArray(a)&&(g.isArray(a[0])||g.isPlainObject(a[0]))?
-c.rows.add(a):c.row.add(a);(b===l||b)&&c.draw();return e.flatten().toArray()};this.fnAdjustColumnSizing=function(a){var b=this.api(!0).columns.adjust(),c=b.settings()[0],e=c.oScroll;a===l||a?b.draw(!1):(""!==e.sX||""!==e.sY)&&Y(c)};this.fnClearTable=function(a){var b=this.api(!0).clear();(a===l||a)&&b.draw()};this.fnClose=function(a){this.api(!0).row(a).child.hide()};this.fnDeleteRow=function(a,b,c){var e=this.api(!0),a=e.rows(a),d=a.settings()[0],g=d.aoData[a[0][0]];a.remove();b&&b.call(this,d,g);
-(c===l||c)&&e.draw();return g};this.fnDestroy=function(a){this.api(!0).destroy(a)};this.fnDraw=function(a){this.api(!0).draw(!a)};this.fnFilter=function(a,b,c,e,d,g){d=this.api(!0);null===b||b===l?d.search(a,c,e,g):d.column(b).search(a,c,e,g);d.draw()};this.fnGetData=function(a,b){var c=this.api(!0);if(a!==l){var e=a.nodeName?a.nodeName.toLowerCase():"";return b!==l||"td"==e||"th"==e?c.cell(a,b).data():c.row(a).data()||null}return c.data().toArray()};this.fnGetNodes=function(a){var b=this.api(!0);
-return a!==l?b.row(a).node():b.rows().nodes().flatten().toArray()};this.fnGetPosition=function(a){var b=this.api(!0),c=a.nodeName.toUpperCase();return"TR"==c?b.row(a).index():"TD"==c||"TH"==c?(a=b.cell(a).index(),[a.row,a.columnVisible,a.column]):null};this.fnIsOpen=function(a){return this.api(!0).row(a).child.isShown()};this.fnOpen=function(a,b,c){return this.api(!0).row(a).child(b,c).show().child()[0]};this.fnPageChange=function(a,b){var c=this.api(!0).page(a);(b===l||b)&&c.draw(!1)};this.fnSetColumnVis=
-function(a,b,c){a=this.api(!0).column(a).visible(b);(c===l||c)&&a.columns.adjust().draw()};this.fnSettings=function(){return ya(this[w.iApiIndex])};this.fnSort=function(a){this.api(!0).order(a).draw()};this.fnSortListener=function(a,b,c){this.api(!0).order.listener(a,b,c)};this.fnUpdate=function(a,b,c,e,d){var g=this.api(!0);c===l||null===c?g.row(b).data(a):g.cell(b,c).data(a);(d===l||d)&&g.columns.adjust();(e===l||e)&&g.draw();return 0};this.fnVersionCheck=w.fnVersionCheck;var b=this,c=a===l,e=this.length;
-c&&(a={});this.oApi=this.internal=w.internal;for(var d in p.ext.internal)d&&(this[d]=Mb(d));this.each(function(){var d={},d=1<e?Kb(d,a,!0):a,h=0,i,j=this.getAttribute("id"),n=!1,m=p.defaults;if("table"!=this.nodeName.toLowerCase())R(null,0,"Non-table node initialisation ("+this.nodeName+")",2);else{db(m);eb(m.column);G(m,m,!0);G(m.column,m.column,!0);G(m,d);var o=p.settings,h=0;for(i=o.length;h<i;h++){if(o[h].nTable==this){i=d.bRetrieve!==l?d.bRetrieve:m.bRetrieve;if(c||i)return o[h].oInstance;if(d.bDestroy!==
-l?d.bDestroy:m.bDestroy){o[h].oInstance.fnDestroy();break}else{R(o[h],0,"Cannot reinitialise DataTable",3);return}}if(o[h].sTableId==this.id){o.splice(h,1);break}}if(null===j||""===j)this.id=j="DataTables_Table_"+p.ext._unique++;var k=g.extend(!0,{},p.models.oSettings,{nTable:this,oApi:b.internal,oInit:d,sDestroyWidth:g(this)[0].style.width,sInstance:j,sTableId:j});o.push(k);k.oInstance=1===b.length?b:g(this).dataTable();db(d);d.oLanguage&&O(d.oLanguage);d.aLengthMenu&&!d.iDisplayLength&&(d.iDisplayLength=
-g.isArray(d.aLengthMenu[0])?d.aLengthMenu[0][0]:d.aLengthMenu[0]);d=Kb(g.extend(!0,{},m),d);D(k.oFeatures,d,"bPaginate bLengthChange bFilter bSort bSortMulti bInfo bProcessing bAutoWidth bSortClasses bServerSide bDeferRender".split(" "));D(k,d,["asStripeClasses","ajax","fnServerData","fnFormatNumber","sServerMethod","aaSorting","aaSortingFixed","aLengthMenu","sPaginationType","sAjaxSource","sAjaxDataProp","iStateDuration","sDom","bSortCellsTop","iTabIndex","fnStateLoadCallback","fnStateSaveCallback",
-"renderer","searchDelay",["iCookieDuration","iStateDuration"],["oSearch","oPreviousSearch"],["aoSearchCols","aoPreSearchCols"],["iDisplayLength","_iDisplayLength"],["bJQueryUI","bJUI"]]);D(k.oScroll,d,[["sScrollX","sX"],["sScrollXInner","sXInner"],["sScrollY","sY"],["bScrollCollapse","bCollapse"]]);D(k.oLanguage,d,"fnInfoCallback");x(k,"aoDrawCallback",d.fnDrawCallback,"user");x(k,"aoServerParams",d.fnServerParams,"user");x(k,"aoStateSaveParams",d.fnStateSaveParams,"user");x(k,"aoStateLoadParams",
-d.fnStateLoadParams,"user");x(k,"aoStateLoaded",d.fnStateLoaded,"user");x(k,"aoRowCallback",d.fnRowCallback,"user");x(k,"aoRowCreatedCallback",d.fnCreatedRow,"user");x(k,"aoHeaderCallback",d.fnHeaderCallback,"user");x(k,"aoFooterCallback",d.fnFooterCallback,"user");x(k,"aoInitComplete",d.fnInitComplete,"user");x(k,"aoPreDrawCallback",d.fnPreDrawCallback,"user");j=k.oClasses;d.bJQueryUI?(g.extend(j,p.ext.oJUIClasses,d.oClasses),d.sDom===m.sDom&&"lfrtip"===m.sDom&&(k.sDom='<"H"lfr>t<"F"ip>'),k.renderer)?
-g.isPlainObject(k.renderer)&&!k.renderer.header&&(k.renderer.header="jqueryui"):k.renderer="jqueryui":g.extend(j,p.ext.classes,d.oClasses);g(this).addClass(j.sTable);if(""!==k.oScroll.sX||""!==k.oScroll.sY)k.oScroll.iBarWidth=Gb();!0===k.oScroll.sX&&(k.oScroll.sX="100%");k.iInitDisplayStart===l&&(k.iInitDisplayStart=d.iDisplayStart,k._iDisplayStart=d.iDisplayStart);null!==d.iDeferLoading&&(k.bDeferLoading=!0,h=g.isArray(d.iDeferLoading),k._iRecordsDisplay=h?d.iDeferLoading[0]:d.iDeferLoading,k._iRecordsTotal=
-h?d.iDeferLoading[1]:d.iDeferLoading);var r=k.oLanguage;g.extend(!0,r,d.oLanguage);""!==r.sUrl&&(g.ajax({dataType:"json",url:r.sUrl,success:function(a){O(a);G(m.oLanguage,a);g.extend(true,r,a);ga(k)},error:function(){ga(k)}}),n=!0);null===d.asStripeClasses&&(k.asStripeClasses=[j.sStripeOdd,j.sStripeEven]);var h=k.asStripeClasses,q=g("tbody tr:eq(0)",this);-1!==g.inArray(!0,g.map(h,function(a){return q.hasClass(a)}))&&(g("tbody tr",this).removeClass(h.join(" ")),k.asDestroyStripes=h.slice());var o=
-[],s,h=this.getElementsByTagName("thead");0!==h.length&&(da(k.aoHeader,h[0]),o=pa(k));if(null===d.aoColumns){s=[];h=0;for(i=o.length;h<i;h++)s.push(null)}else s=d.aoColumns;h=0;for(i=s.length;h<i;h++)Ea(k,o?o[h]:null);hb(k,d.aoColumnDefs,s,function(a,b){ja(k,a,b)});if(q.length){var t=function(a,b){return a.getAttribute("data-"+b)?b:null};g.each(ma(k,q[0]).cells,function(a,b){var c=k.aoColumns[a];if(c.mData===a){var e=t(b,"sort")||t(b,"order"),d=t(b,"filter")||t(b,"search");if(e!==null||d!==null){c.mData=
-{_:a+".display",sort:e!==null?a+".@data-"+e:l,type:e!==null?a+".@data-"+e:l,filter:d!==null?a+".@data-"+d:l};ja(k,a)}}})}var v=k.oFeatures;d.bStateSave&&(v.bStateSave=!0,Jb(k,d),x(k,"aoDrawCallback",xa,"state_save"));if(d.aaSorting===l){o=k.aaSorting;h=0;for(i=o.length;h<i;h++)o[h][1]=k.aoColumns[h].asSorting[0]}wa(k);v.bSort&&x(k,"aoDrawCallback",function(){if(k.bSorted){var a=T(k),b={};g.each(a,function(a,c){b[c.src]=c.dir});u(k,null,"order",[k,a,b]);Ib(k)}});x(k,"aoDrawCallback",function(){(k.bSorted||
-A(k)==="ssp"||v.bDeferRender)&&wa(k)},"sc");fb(k);h=g(this).children("caption").each(function(){this._captionSide=g(this).css("caption-side")});i=g(this).children("thead");0===i.length&&(i=g("<thead/>").appendTo(this));k.nTHead=i[0];i=g(this).children("tbody");0===i.length&&(i=g("<tbody/>").appendTo(this));k.nTBody=i[0];i=g(this).children("tfoot");if(0===i.length&&0<h.length&&(""!==k.oScroll.sX||""!==k.oScroll.sY))i=g("<tfoot/>").appendTo(this);0===i.length||0===i.children().length?g(this).addClass(j.sNoFooter):
-0<i.length&&(k.nTFoot=i[0],da(k.aoFooter,k.nTFoot));if(d.aaData)for(h=0;h<d.aaData.length;h++)I(k,d.aaData[h]);else(k.bDeferLoading||"dom"==A(k))&&la(k,g(k.nTBody).children("tr"));k.aiDisplay=k.aiDisplayMaster.slice();k.bInitialised=!0;!1===n&&ga(k)}});b=null;return this};var Sb=[],y=Array.prototype,bc=function(a){var b,c,e=p.settings,d=g.map(e,function(a){return a.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase())return b=g.inArray(a,d),-1!==b?[e[b]]:
-null;if(a&&"function"===typeof a.settings)return a.settings().toArray();"string"===typeof a?c=g(a):a instanceof g&&(c=a)}else return[];if(c)return c.map(function(){b=g.inArray(this,d);return-1!==b?e[b]:null}).toArray()};q=function(a,b){if(!this instanceof q)throw"DT API must be constructed as a new object";var c=[],e=function(a){(a=bc(a))&&c.push.apply(c,a)};if(g.isArray(a))for(var d=0,f=a.length;d<f;d++)e(a[d]);else e(a);this.context=Ma(c);b&&this.push.apply(this,b.toArray?b.toArray():b);this.selector=
-{rows:null,cols:null,opts:null};q.extend(this,this,Sb)};p.Api=q;q.prototype={concat:y.concat,context:[],each:function(a){for(var b=0,c=this.length;b<c;b++)a.call(this,this[b],b,this);return this},eq:function(a){var b=this.context;return b.length>a?new q(b[a],this[a]):null},filter:function(a){var b=[];if(y.filter)b=y.filter.call(this,a,this);else for(var c=0,e=this.length;c<e;c++)a.call(this,this[c],c,this)&&b.push(this[c]);return new q(this.context,b)},flatten:function(){var a=[];return new q(this.context,
-a.concat.apply(a,this.toArray()))},join:y.join,indexOf:y.indexOf||function(a,b){for(var c=b||0,e=this.length;c<e;c++)if(this[c]===a)return c;return-1},iterator:function(a,b,c,e){var d=[],f,h,g,j,n,m=this.context,o,k,p=this.selector;"string"===typeof a&&(e=c,c=b,b=a,a=!1);h=0;for(g=m.length;h<g;h++){var r=new q(m[h]);if("table"===b)f=c.call(r,m[h],h),f!==l&&d.push(f);else if("columns"===b||"rows"===b)f=c.call(r,m[h],this[h],h),f!==l&&d.push(f);else if("column"===b||"column-rows"===b||"row"===b||"cell"===
-b){k=this[h];"column-rows"===b&&(o=Ba(m[h],p.opts));j=0;for(n=k.length;j<n;j++)f=k[j],f="cell"===b?c.call(r,m[h],f.row,f.column,h,j):c.call(r,m[h],f,h,j,o),f!==l&&d.push(f)}}return d.length||e?(a=new q(m,a?d.concat.apply([],d):d),b=a.selector,b.rows=p.rows,b.cols=p.cols,b.opts=p.opts,a):this},lastIndexOf:y.lastIndexOf||function(a,b){return this.indexOf.apply(this.toArray.reverse(),arguments)},length:0,map:function(a){var b=[];if(y.map)b=y.map.call(this,a,this);else for(var c=0,e=this.length;c<e;c++)b.push(a.call(this,
-this[c],c));return new q(this.context,b)},pluck:function(a){return this.map(function(b){return b[a]})},pop:y.pop,push:y.push,reduce:y.reduce||function(a,b){return gb(this,a,b,0,this.length,1)},reduceRight:y.reduceRight||function(a,b){return gb(this,a,b,this.length-1,-1,-1)},reverse:y.reverse,selector:null,shift:y.shift,sort:y.sort,splice:y.splice,toArray:function(){return y.slice.call(this)},to$:function(){return g(this)},toJQuery:function(){return g(this)},unique:function(){return new q(this.context,
-Ma(this))},unshift:y.unshift};q.extend=function(a,b,c){if(b&&(b instanceof q||b.__dt_wrapper)){var e,d,f,h=function(a,b,c){return function(){var e=b.apply(a,arguments);q.extend(e,e,c.methodExt);return e}};e=0;for(d=c.length;e<d;e++)f=c[e],b[f.name]="function"===typeof f.val?h(a,f.val,f):g.isPlainObject(f.val)?{}:f.val,b[f.name].__dt_wrapper=!0,q.extend(a,b[f.name],f.propExt)}};q.register=r=function(a,b){if(g.isArray(a))for(var c=0,e=a.length;c<e;c++)q.register(a[c],b);else for(var d=a.split("."),
-f=Sb,h,i,c=0,e=d.length;c<e;c++){h=(i=-1!==d[c].indexOf("()"))?d[c].replace("()",""):d[c];var j;a:{j=0;for(var n=f.length;j<n;j++)if(f[j].name===h){j=f[j];break a}j=null}j||(j={name:h,val:{},methodExt:[],propExt:[]},f.push(j));c===e-1?j.val=b:f=i?j.methodExt:j.propExt}};q.registerPlural=t=function(a,b,c){q.register(a,c);q.register(b,function(){var a=c.apply(this,arguments);return a===this?this:a instanceof q?a.length?g.isArray(a[0])?new q(a.context,a[0]):a[0]:l:a})};r("tables()",function(a){var b;
-if(a){b=q;var c=this.context;if("number"===typeof a)a=[c[a]];else var e=g.map(c,function(a){return a.nTable}),a=g(e).filter(a).map(function(){var a=g.inArray(this,e);return c[a]}).toArray();b=new b(a)}else b=this;return b});r("table()",function(a){var a=this.tables(a),b=a.context;return b.length?new q(b[0]):a});t("tables().nodes()","table().node()",function(){return this.iterator("table",function(a){return a.nTable},1)});t("tables().body()","table().body()",function(){return this.iterator("table",
-function(a){return a.nTBody},1)});t("tables().header()","table().header()",function(){return this.iterator("table",function(a){return a.nTHead},1)});t("tables().footer()","table().footer()",function(){return this.iterator("table",function(a){return a.nTFoot},1)});t("tables().containers()","table().container()",function(){return this.iterator("table",function(a){return a.nTableWrapper},1)});r("draw()",function(a){return this.iterator("table",function(b){M(b,!1===a)})});r("page()",function(a){return a===
-l?this.page.info().page:this.iterator("table",function(b){Sa(b,a)})});r("page.info()",function(){if(0===this.context.length)return l;var a=this.context[0],b=a._iDisplayStart,c=a._iDisplayLength,e=a.fnRecordsDisplay(),d=-1===c;return{page:d?0:Math.floor(b/c),pages:d?1:Math.ceil(e/c),start:b,end:a.fnDisplayEnd(),length:c,recordsTotal:a.fnRecordsTotal(),recordsDisplay:e}});r("page.len()",function(a){return a===l?0!==this.context.length?this.context[0]._iDisplayLength:l:this.iterator("table",function(b){Qa(b,
-a)})});var Tb=function(a,b,c){"ssp"==A(a)?M(a,b):(B(a,!0),qa(a,[],function(c){na(a);for(var c=ra(a,c),e=0,h=c.length;e<h;e++)I(a,c[e]);M(a,b);B(a,!1)}));if(c){var e=new q(a);e.one("draw",function(){c(e.ajax.json())})}};r("ajax.json()",function(){var a=this.context;if(0<a.length)return a[0].json});r("ajax.params()",function(){var a=this.context;if(0<a.length)return a[0].oAjaxData});r("ajax.reload()",function(a,b){return this.iterator("table",function(c){Tb(c,!1===b,a)})});r("ajax.url()",function(a){var b=
-this.context;if(a===l){if(0===b.length)return l;b=b[0];return b.ajax?g.isPlainObject(b.ajax)?b.ajax.url:b.ajax:b.sAjaxSource}return this.iterator("table",function(b){g.isPlainObject(b.ajax)?b.ajax.url=a:b.ajax=a})});r("ajax.url().load()",function(a,b){return this.iterator("table",function(c){Tb(c,!1===b,a)})});var Za=function(a,b){var c=[],e,d,f,h,i,j;e=typeof a;if(!a||"string"===e||"function"===e||a.length===l)a=[a];f=0;for(h=a.length;f<h;f++){d=a[f]&&a[f].split?a[f].split(","):[a[f]];i=0;for(j=
-d.length;i<j;i++)(e=b("string"===typeof d[i]?g.trim(d[i]):d[i]))&&e.length&&c.push.apply(c,e)}return c},$a=function(a){a||(a={});a.filter&&!a.search&&(a.search=a.filter);return{search:a.search||"none",order:a.order||"current",page:a.page||"all"}},ab=function(a){for(var b=0,c=a.length;b<c;b++)if(0<a[b].length)return a[0]=a[b],a.length=1,a.context=[a.context[b]],a;a.length=0;return a},Ba=function(a,b){var c,e,d,f=[],h=a.aiDisplay;c=a.aiDisplayMaster;var i=b.search;e=b.order;d=b.page;if("ssp"==A(a))return"removed"===
-i?[]:U(0,c.length);if("current"==d){c=a._iDisplayStart;for(e=a.fnDisplayEnd();c<e;c++)f.push(h[c])}else if("current"==e||"applied"==e)f="none"==i?c.slice():"applied"==i?h.slice():g.map(c,function(a){return-1===g.inArray(a,h)?a:null});else if("index"==e||"original"==e){c=0;for(e=a.aoData.length;c<e;c++)"none"==i?f.push(c):(d=g.inArray(c,h),(-1===d&&"removed"==i||0<=d&&"applied"==i)&&f.push(c))}return f};r("rows()",function(a,b){a===l?a="":g.isPlainObject(a)&&(b=a,a="");var b=$a(b),c=this.iterator("table",
-function(c){var d=b;return Za(a,function(a){var b=Ob(a);if(b!==null&&!d)return[b];var i=Ba(c,d);if(b!==null&&g.inArray(b,i)!==-1)return[b];if(!a)return i;if(typeof a==="function")return g.map(i,function(b){var d=c.aoData[b];return a(b,d._aData,d.nTr)?b:null});b=Rb(ha(c.aoData,i,"nTr"));return a.nodeName&&g.inArray(a,b)!==-1?[a._DT_RowIndex]:g(b).filter(a).map(function(){return this._DT_RowIndex}).toArray()})},1);c.selector.rows=a;c.selector.opts=b;return c});r("rows().nodes()",function(){return this.iterator("row",
-function(a,b){return a.aoData[b].nTr||l},1)});r("rows().data()",function(){return this.iterator(!0,"rows",function(a,b){return ha(a.aoData,b,"_aData")},1)});t("rows().cache()","row().cache()",function(a){return this.iterator("row",function(b,c){var e=b.aoData[c];return"search"===a?e._aFilterData:e._aSortData},1)});t("rows().invalidate()","row().invalidate()",function(a){return this.iterator("row",function(b,c){ca(b,c,a)})});t("rows().indexes()","row().index()",function(){return this.iterator("row",
-function(a,b){return b},1)});t("rows().remove()","row().remove()",function(){var a=this;return this.iterator("row",function(b,c,e){var d=b.aoData;d.splice(c,1);for(var f=0,h=d.length;f<h;f++)null!==d[f].nTr&&(d[f].nTr._DT_RowIndex=f);g.inArray(c,b.aiDisplay);oa(b.aiDisplayMaster,c);oa(b.aiDisplay,c);oa(a[e],c,!1);Ra(b)})});r("rows.add()",function(a){var b=this.iterator("table",function(b){var c,f,h,g=[];f=0;for(h=a.length;f<h;f++)c=a[f],c.nodeName&&"TR"===c.nodeName.toUpperCase()?g.push(la(b,c)[0]):
-g.push(I(b,c));return g},1),c=this.rows(-1);c.pop();c.push.apply(c,b.toArray());return c});r("row()",function(a,b){return ab(this.rows(a,b))});r("row().data()",function(a){var b=this.context;if(a===l)return b.length&&this.length?b[0].aoData[this[0]]._aData:l;b[0].aoData[this[0]]._aData=a;ca(b[0],this[0],"data");return this});r("row().node()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]].nTr||null:null});r("row.add()",function(a){a instanceof g&&a.length&&(a=a[0]);
-var b=this.iterator("table",function(b){return a.nodeName&&"TR"===a.nodeName.toUpperCase()?la(b,a)[0]:I(b,a)});return this.row(b[0])});var bb=function(a,b){var c=a.context;c.length&&(c=c[0].aoData[b!==l?b:a[0]],c._details&&(c._details.remove(),c._detailsShow=l,c._details=l))},Ub=function(a,b){var c=a.context;if(c.length&&a.length){var e=c[0].aoData[a[0]];if(e._details){(e._detailsShow=b)?e._details.insertAfter(e.nTr):e._details.detach();var d=c[0],f=new q(d),h=d.aoData;f.off("draw.dt.DT_details column-visibility.dt.DT_details destroy.dt.DT_details");
-0<C(h,"_details").length&&(f.on("draw.dt.DT_details",function(a,b){d===b&&f.rows({page:"current"}).eq(0).each(function(a){a=h[a];a._detailsShow&&a._details.insertAfter(a.nTr)})}),f.on("column-visibility.dt.DT_details",function(a,b){if(d===b)for(var c,e=aa(b),f=0,g=h.length;f<g;f++)c=h[f],c._details&&c._details.children("td[colspan]").attr("colspan",e)}),f.on("destroy.dt.DT_details",function(a,b){if(d===b)for(var c=0,e=h.length;c<e;c++)h[c]._details&&bb(f,c)}))}}};r("row().child()",function(a,b){var c=
-this.context;if(a===l)return c.length&&this.length?c[0].aoData[this[0]]._details:l;if(!0===a)this.child.show();else if(!1===a)bb(this);else if(c.length&&this.length){var e=c[0],c=c[0].aoData[this[0]],d=[],f=function(a,b){if(a.nodeName&&"tr"===a.nodeName.toLowerCase())d.push(a);else{var c=g("<tr><td/></tr>").addClass(b);g("td",c).addClass(b).html(a)[0].colSpan=aa(e);d.push(c[0])}};if(g.isArray(a)||a instanceof g)for(var h=0,i=a.length;h<i;h++)f(a[h],b);else f(a,b);c._details&&c._details.remove();c._details=
-g(d);c._detailsShow&&c._details.insertAfter(c.nTr)}return this});r(["row().child.show()","row().child().show()"],function(){Ub(this,!0);return this});r(["row().child.hide()","row().child().hide()"],function(){Ub(this,!1);return this});r(["row().child.remove()","row().child().remove()"],function(){bb(this);return this});r("row().child.isShown()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]]._detailsShow||!1:!1});var cc=/^(.+):(name|visIdx|visible)$/,Vb=function(a,
-b,c,e,d){for(var c=[],e=0,f=d.length;e<f;e++)c.push(v(a,d[e],b));return c};r("columns()",function(a,b){a===l?a="":g.isPlainObject(a)&&(b=a,a="");var b=$a(b),c=this.iterator("table",function(c){var d=a,f=b,h=c.aoColumns,i=C(h,"sName"),j=C(h,"nTh");return Za(d,function(a){var b=Ob(a);if(a==="")return U(h.length);if(b!==null)return[b>=0?b:h.length+b];if(typeof a==="function"){var d=Ba(c,f);return g.map(h,function(b,f){return a(f,Vb(c,f,0,0,d),j[f])?f:null})}var k=typeof a==="string"?a.match(cc):"";if(k)switch(k[2]){case "visIdx":case "visible":b=
-parseInt(k[1],10);if(b<0){var l=g.map(h,function(a,b){return a.bVisible?b:null});return[l[l.length+b]]}return[ka(c,b)];case "name":return g.map(i,function(a,b){return a===k[1]?b:null})}else return g(j).filter(a).map(function(){return g.inArray(this,j)}).toArray()})},1);c.selector.cols=a;c.selector.opts=b;return c});t("columns().header()","column().header()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTh},1)});t("columns().footer()","column().footer()",function(){return this.iterator("column",
-function(a,b){return a.aoColumns[b].nTf},1)});t("columns().data()","column().data()",function(){return this.iterator("column-rows",Vb,1)});t("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].mData},1)});t("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,e,d,f){return ha(b.aoData,f,"search"===a?"_aFilterData":"_aSortData",c)},1)});t("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows",
-function(a,b,c,e,d){return ha(a.aoData,d,"anCells",b)},1)});t("columns().visible()","column().visible()",function(a,b){return this.iterator("column",function(c,e){if(a===l)return c.aoColumns[e].bVisible;var d=c.aoColumns,f=d[e],h=c.aoData,i,j,n;if(a!==l&&f.bVisible!==a){if(a){var m=g.inArray(!0,C(d,"bVisible"),e+1);i=0;for(j=h.length;i<j;i++)n=h[i].nTr,d=h[i].anCells,n&&n.insertBefore(d[e],d[m]||null)}else g(C(c.aoData,"anCells",e)).detach();f.bVisible=a;ea(c,c.aoHeader);ea(c,c.aoFooter);if(b===l||
-b)X(c),(c.oScroll.sX||c.oScroll.sY)&&Y(c);u(c,null,"column-visibility",[c,e,a]);xa(c)}})});t("columns().indexes()","column().index()",function(a){return this.iterator("column",function(b,c){return"visible"===a?$(b,c):c},1)});r("columns.adjust()",function(){return this.iterator("table",function(a){X(a)},1)});r("column.index()",function(a,b){if(0!==this.context.length){var c=this.context[0];if("fromVisible"===a||"toData"===a)return ka(c,b);if("fromData"===a||"toVisible"===a)return $(c,b)}});r("column()",
-function(a,b){return ab(this.columns(a,b))});r("cells()",function(a,b,c){g.isPlainObject(a)&&(typeof a.row!==l?(c=b,b=null):(c=a,a=null));g.isPlainObject(b)&&(c=b,b=null);if(null===b||b===l)return this.iterator("table",function(b){var e=a,d=$a(c),f=b.aoData,h=Ba(b,d),d=Rb(ha(f,h,"anCells")),i=g([].concat.apply([],d)),j,m=b.aoColumns.length,n,p,r,q,s,t;return Za(e,function(a){var c=typeof a==="function";if(a===null||a===l||c){n=[];p=0;for(r=h.length;p<r;p++){j=h[p];for(q=0;q<m;q++){s={row:j,column:q};
-if(c){t=b.aoData[j];a(s,v(b,j,q),t.anCells[q])&&n.push(s)}else n.push(s)}}return n}return g.isPlainObject(a)?[a]:i.filter(a).map(function(a,b){j=b.parentNode._DT_RowIndex;return{row:j,column:g.inArray(b,f[j].anCells)}}).toArray()})});var e=this.columns(b,c),d=this.rows(a,c),f,h,i,j,n,m=this.iterator("table",function(a,b){f=[];h=0;for(i=d[b].length;h<i;h++){j=0;for(n=e[b].length;j<n;j++)f.push({row:d[b][h],column:e[b][j]})}return f},1);g.extend(m.selector,{cols:b,rows:a,opts:c});return m});t("cells().nodes()",
-"cell().node()",function(){return this.iterator("cell",function(a,b,c){return(a=a.aoData[b].anCells)?a[c]:l},1)});r("cells().data()",function(){return this.iterator("cell",function(a,b,c){return v(a,b,c)},1)});t("cells().cache()","cell().cache()",function(a){a="search"===a?"_aFilterData":"_aSortData";return this.iterator("cell",function(b,c,e){return b.aoData[c][a][e]},1)});t("cells().render()","cell().render()",function(a){return this.iterator("cell",function(b,c,e){return v(b,c,e,a)},1)});t("cells().indexes()",
-"cell().index()",function(){return this.iterator("cell",function(a,b,c){return{row:b,column:c,columnVisible:$(a,c)}},1)});t("cells().invalidate()","cell().invalidate()",function(a){return this.iterator("cell",function(b,c,e){ca(b,c,a,e)})});r("cell()",function(a,b,c){return ab(this.cells(a,b,c))});r("cell().data()",function(a){var b=this.context,c=this[0];if(a===l)return b.length&&c.length?v(b[0],c[0].row,c[0].column):l;Ha(b[0],c[0].row,c[0].column,a);ca(b[0],c[0].row,"data",c[0].column);return this});
-r("order()",function(a,b){var c=this.context;if(a===l)return 0!==c.length?c[0].aaSorting:l;"number"===typeof a?a=[[a,b]]:g.isArray(a[0])||(a=Array.prototype.slice.call(arguments));return this.iterator("table",function(b){b.aaSorting=a.slice()})});r("order.listener()",function(a,b,c){return this.iterator("table",function(e){Na(e,a,b,c)})});r(["columns().order()","column().order()"],function(a){var b=this;return this.iterator("table",function(c,e){var d=[];g.each(b[e],function(b,c){d.push([c,a])});
-c.aaSorting=d})});r("search()",function(a,b,c,e){var d=this.context;return a===l?0!==d.length?d[0].oPreviousSearch.sSearch:l:this.iterator("table",function(d){d.oFeatures.bFilter&&fa(d,g.extend({},d.oPreviousSearch,{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===e?!0:e}),1)})});t("columns().search()","column().search()",function(a,b,c,e){return this.iterator("column",function(d,f){var h=d.aoPreSearchCols;if(a===l)return h[f].sSearch;d.oFeatures.bFilter&&(g.extend(h[f],
-{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===e?!0:e}),fa(d,d.oPreviousSearch,1))})});r("state()",function(){return this.context.length?this.context[0].oSavedState:null});r("state.clear()",function(){return this.iterator("table",function(a){a.fnStateSaveCallback.call(a.oInstance,a,{})})});r("state.loaded()",function(){return this.context.length?this.context[0].oLoadedState:null});r("state.save()",function(){return this.iterator("table",function(a){xa(a)})});p.versionCheck=
-p.fnVersionCheck=function(a){for(var b=p.version.split("."),a=a.split("."),c,e,d=0,f=a.length;d<f;d++)if(c=parseInt(b[d],10)||0,e=parseInt(a[d],10)||0,c!==e)return c>e;return!0};p.isDataTable=p.fnIsDataTable=function(a){var b=g(a).get(0),c=!1;g.each(p.settings,function(a,d){if(d.nTable===b||d.nScrollHead===b||d.nScrollFoot===b)c=!0});return c};p.tables=p.fnTables=function(a){return g.map(p.settings,function(b){if(!a||a&&g(b.nTable).is(":visible"))return b.nTable})};p.util={throttle:ta,escapeRegex:ua};
-p.camelToHungarian=G;r("$()",function(a,b){var c=this.rows(b).nodes(),c=g(c);return g([].concat(c.filter(a).toArray(),c.find(a).toArray()))});g.each(["on","one","off"],function(a,b){r(b+"()",function(){var a=Array.prototype.slice.call(arguments);a[0].match(/\.dt\b/)||(a[0]+=".dt");var e=g(this.tables().nodes());e[b].apply(e,a);return this})});r("clear()",function(){return this.iterator("table",function(a){na(a)})});r("settings()",function(){return new q(this.context,this.context)});r("data()",function(){return this.iterator("table",
-function(a){return C(a.aoData,"_aData")}).flatten()});r("destroy()",function(a){a=a||!1;return this.iterator("table",function(b){var c=b.nTableWrapper.parentNode,e=b.oClasses,d=b.nTable,f=b.nTBody,h=b.nTHead,i=b.nTFoot,j=g(d),f=g(f),l=g(b.nTableWrapper),m=g.map(b.aoData,function(a){return a.nTr}),o;b.bDestroying=!0;u(b,"aoDestroyCallback","destroy",[b]);a||(new q(b)).columns().visible(!0);l.unbind(".DT").find(":not(tbody *)").unbind(".DT");g(Da).unbind(".DT-"+b.sInstance);d!=h.parentNode&&(j.children("thead").detach(),
-j.append(h));i&&d!=i.parentNode&&(j.children("tfoot").detach(),j.append(i));j.detach();l.detach();b.aaSorting=[];b.aaSortingFixed=[];wa(b);g(m).removeClass(b.asStripeClasses.join(" "));g("th, td",h).removeClass(e.sSortable+" "+e.sSortableAsc+" "+e.sSortableDesc+" "+e.sSortableNone);b.bJUI&&(g("th span."+e.sSortIcon+", td span."+e.sSortIcon,h).detach(),g("th, td",h).each(function(){var a=g("div."+e.sSortJUIWrapper,this);g(this).append(a.contents());a.detach()}));!a&&c&&c.insertBefore(d,b.nTableReinsertBefore);
-f.children().detach();f.append(m);j.css("width",b.sDestroyWidth).removeClass(e.sTable);(o=b.asDestroyStripes.length)&&f.children().each(function(a){g(this).addClass(b.asDestroyStripes[a%o])});c=g.inArray(b,p.settings);-1!==c&&p.settings.splice(c,1)})});p.version="1.10.4";p.settings=[];p.models={};p.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0};p.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null};p.models.oColumn=
-{idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null};p.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,
-aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bJQueryUI:!1,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,
-fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+a.sInstance+"_"+location.pathname))}catch(b){}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},fnStateSaveParams:null,
-iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",
-sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"Processing...",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:g.extend({},p.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null};V(p.defaults);p.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,
-bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};V(p.defaults.column);p.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},
-oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aoColumns:[],aoHeader:[],aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],
-sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,bAjaxDataGet:!0,jqXHR:null,json:l,oAjaxData:l,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,
-_iRecordsTotal:0,_iRecordsDisplay:0,bJUI:null,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==A(this)?1*this._iRecordsTotal:this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==A(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+a,e=this.aiDisplay.length,d=this.oFeatures,f=d.bPaginate;return d.bServerSide?!1===f||-1===a?b+e:
-Math.min(b+a,this._iRecordsDisplay):!f||c>e||-1===a?e:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{}};p.ext=w={classes:{},errMode:"alert",feature:[],search:[],internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:p.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:p.version};g.extend(w,{afnFiltering:w.search,aTypes:w.type.detect,ofnSearch:w.type.search,
-oSort:w.type.order,afnSortData:w.order,aoFeatures:w.feature,oApi:w.internal,oStdClasses:w.classes,oPagination:w.pager});g.extend(p.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",
-sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_asc_disabled",sSortableDesc:"sorting_desc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",
-sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",sJUIHeader:"",sJUIFooter:""});var Ca="",Ca="",E=Ca+"ui-state-default",ia=Ca+"css_right ui-icon ui-icon-",Wb=Ca+"fg-toolbar ui-toolbar ui-widget-header ui-helper-clearfix";g.extend(p.ext.oJUIClasses,p.ext.classes,{sPageButton:"fg-button ui-button "+E,sPageButtonActive:"ui-state-disabled",sPageButtonDisabled:"ui-state-disabled",sPaging:"dataTables_paginate fg-buttonset ui-buttonset fg-buttonset-multi ui-buttonset-multi paging_",
-sSortAsc:E+" sorting_asc",sSortDesc:E+" sorting_desc",sSortable:E+" sorting",sSortableAsc:E+" sorting_asc_disabled",sSortableDesc:E+" sorting_desc_disabled",sSortableNone:E+" sorting_disabled",sSortJUIAsc:ia+"triangle-1-n",sSortJUIDesc:ia+"triangle-1-s",sSortJUI:ia+"carat-2-n-s",sSortJUIAscAllowed:ia+"carat-1-n",sSortJUIDescAllowed:ia+"carat-1-s",sSortJUIWrapper:"DataTables_sort_wrapper",sSortIcon:"DataTables_sort_icon",sScrollHead:"dataTables_scrollHead "+E,sScrollFoot:"dataTables_scrollFoot "+E,
-sHeaderTH:E,sFooterTH:E,sJUIHeader:Wb+" ui-corner-tl ui-corner-tr",sJUIFooter:Wb+" ui-corner-bl ui-corner-br"});var Lb=p.ext.pager;g.extend(Lb,{simple:function(){return["previous","next"]},full:function(){return["first","previous","next","last"]},simple_numbers:function(a,b){return["previous",Va(a,b),"next"]},full_numbers:function(a,b){return["first","previous",Va(a,b),"next","last"]},_numbers:Va,numbers_length:7});g.extend(!0,p.ext.renderer,{pageButton:{_:function(a,b,c,e,d,f){var h=a.oClasses,i=
-a.oLanguage.oPaginate,j,l,m=0,o=function(b,e){var k,p,r,q,s=function(b){Sa(a,b.data.action,true)};k=0;for(p=e.length;k<p;k++){q=e[k];if(g.isArray(q)){r=g("<"+(q.DT_el||"div")+"/>").appendTo(b);o(r,q)}else{l=j="";switch(q){case "ellipsis":b.append("<span>&hellip;</span>");break;case "first":j=i.sFirst;l=q+(d>0?"":" "+h.sPageButtonDisabled);break;case "previous":j=i.sPrevious;l=q+(d>0?"":" "+h.sPageButtonDisabled);break;case "next":j=i.sNext;l=q+(d<f-1?"":" "+h.sPageButtonDisabled);break;case "last":j=
-i.sLast;l=q+(d<f-1?"":" "+h.sPageButtonDisabled);break;default:j=q+1;l=d===q?h.sPageButtonActive:""}if(j){r=g("<a>",{"class":h.sPageButton+" "+l,"aria-controls":a.sTableId,"data-dt-idx":m,tabindex:a.iTabIndex,id:c===0&&typeof q==="string"?a.sTableId+"_"+q:null}).html(j).appendTo(b);Ua(r,{action:q},s);m++}}}};try{var k=g(P.activeElement).data("dt-idx");o(g(b).empty(),e);k!==null&&g(b).find("[data-dt-idx="+k+"]").focus()}catch(p){}}}});g.extend(p.ext.type.detect,[function(a,b){var c=b.oLanguage.sDecimal;
-return Ya(a,c)?"num"+c:null},function(a){if(a&&!(a instanceof Date)&&(!$b.test(a)||!ac.test(a)))return null;var b=Date.parse(a);return null!==b&&!isNaN(b)||H(a)?"date":null},function(a,b){var c=b.oLanguage.sDecimal;return Ya(a,c,!0)?"num-fmt"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Qb(a,c)?"html-num"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Qb(a,c,!0)?"html-num-fmt"+c:null},function(a){return H(a)||"string"===typeof a&&-1!==a.indexOf("<")?"html":null}]);g.extend(p.ext.type.search,
-{html:function(a){return H(a)?a:"string"===typeof a?a.replace(Nb," ").replace(Aa,""):""},string:function(a){return H(a)?a:"string"===typeof a?a.replace(Nb," "):a}});var za=function(a,b,c,e){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=Pb(a,b));a.replace&&(c&&(a=a.replace(c,"")),e&&(a=a.replace(e,"")));return 1*a};g.extend(w.type.order,{"date-pre":function(a){return Date.parse(a)||0},"html-pre":function(a){return H(a)?"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return H(a)?
-"":"string"===typeof a?a.toLowerCase():!a.toString?"":a.toString()},"string-asc":function(a,b){return a<b?-1:a>b?1:0},"string-desc":function(a,b){return a<b?1:a>b?-1:0}});cb("");g.extend(!0,p.ext.renderer,{header:{_:function(a,b,c,e){g(a.nTable).on("order.dt.DT",function(d,f,h,g){if(a===f){d=c.idx;b.removeClass(c.sSortingClass+" "+e.sSortAsc+" "+e.sSortDesc).addClass(g[d]=="asc"?e.sSortAsc:g[d]=="desc"?e.sSortDesc:c.sSortingClass)}})},jqueryui:function(a,b,c,e){g("<div/>").addClass(e.sSortJUIWrapper).append(b.contents()).append(g("<span/>").addClass(e.sSortIcon+
-" "+c.sSortingClassJUI)).appendTo(b);g(a.nTable).on("order.dt.DT",function(d,f,g,i){if(a===f){d=c.idx;b.removeClass(e.sSortAsc+" "+e.sSortDesc).addClass(i[d]=="asc"?e.sSortAsc:i[d]=="desc"?e.sSortDesc:c.sSortingClass);b.find("span."+e.sSortIcon).removeClass(e.sSortJUIAsc+" "+e.sSortJUIDesc+" "+e.sSortJUI+" "+e.sSortJUIAscAllowed+" "+e.sSortJUIDescAllowed).addClass(i[d]=="asc"?e.sSortJUIAsc:i[d]=="desc"?e.sSortJUIDesc:c.sSortingClassJUI)}})}}});p.render={number:function(a,b,c,e){return{display:function(d){var f=
-0>d?"-":"",d=Math.abs(parseFloat(d)),g=parseInt(d,10),d=c?b+(d-g).toFixed(c).substring(2):"";return f+(e||"")+g.toString().replace(/\B(?=(\d{3})+(?!\d))/g,a)+d}}}};g.extend(p.ext.internal,{_fnExternApiFunc:Mb,_fnBuildAjax:qa,_fnAjaxUpdate:jb,_fnAjaxParameters:sb,_fnAjaxUpdateDraw:tb,_fnAjaxDataSrc:ra,_fnAddColumn:Ea,_fnColumnOptions:ja,_fnAdjustColumnSizing:X,_fnVisibleToColumnIndex:ka,_fnColumnIndexToVisible:$,_fnVisbleColumns:aa,_fnGetColumns:Z,_fnColumnTypes:Ga,_fnApplyColumnDefs:hb,_fnHungarianMap:V,
-_fnCamelToHungarian:G,_fnLanguageCompat:O,_fnBrowserDetect:fb,_fnAddData:I,_fnAddTr:la,_fnNodeToDataIndex:function(a,b){return b._DT_RowIndex!==l?b._DT_RowIndex:null},_fnNodeToColumnIndex:function(a,b,c){return g.inArray(c,a.aoData[b].anCells)},_fnGetCellData:v,_fnSetCellData:Ha,_fnSplitObjNotation:Ja,_fnGetObjectDataFn:W,_fnSetObjectDataFn:Q,_fnGetDataMaster:Ka,_fnClearTable:na,_fnDeleteIndex:oa,_fnInvalidate:ca,_fnGetRowElements:ma,_fnCreateTr:Ia,_fnBuildHead:ib,_fnDrawHead:ea,_fnDraw:L,_fnReDraw:M,
-_fnAddOptionsHtml:lb,_fnDetectHeader:da,_fnGetUniqueThs:pa,_fnFeatureHtmlFilter:nb,_fnFilterComplete:fa,_fnFilterCustom:wb,_fnFilterColumn:vb,_fnFilter:ub,_fnFilterCreateSearch:Pa,_fnEscapeRegex:ua,_fnFilterData:xb,_fnFeatureHtmlInfo:qb,_fnUpdateInfo:Ab,_fnInfoMacros:Bb,_fnInitialise:ga,_fnInitComplete:sa,_fnLengthChange:Qa,_fnFeatureHtmlLength:mb,_fnFeatureHtmlPaginate:rb,_fnPageChange:Sa,_fnFeatureHtmlProcessing:ob,_fnProcessingDisplay:B,_fnFeatureHtmlTable:pb,_fnScrollDraw:Y,_fnApplyToChildren:F,
-_fnCalculateColumnWidths:Fa,_fnThrottle:ta,_fnConvertToWidth:Cb,_fnScrollingWidthAdjust:Eb,_fnGetWidestNode:Db,_fnGetMaxLenString:Fb,_fnStringToCss:s,_fnScrollBarWidth:Gb,_fnSortFlatten:T,_fnSort:kb,_fnSortAria:Ib,_fnSortListener:Ta,_fnSortAttachListener:Na,_fnSortingClasses:wa,_fnSortData:Hb,_fnSaveState:xa,_fnLoadState:Jb,_fnSettingsFromNode:ya,_fnLog:R,_fnMap:D,_fnBindAction:Ua,_fnCallbackReg:x,_fnCallbackFire:u,_fnLengthOverflow:Ra,_fnRenderer:Oa,_fnDataSource:A,_fnRowAttributes:La,_fnCalculateEnd:function(){}});
-g.fn.dataTable=p;g.fn.dataTableSettings=p.settings;g.fn.dataTableExt=p.ext;g.fn.DataTable=function(a){return g(this).dataTable(a).api()};g.each(p,function(a,b){g.fn.DataTable[a]=b});return g.fn.dataTable};"function"===typeof define&&define.amd?define("datatables",["jquery"],O):"object"===typeof exports?O(require("jquery")):jQuery&&!jQuery.fn.dataTable&&O(jQuery)})(window,document);
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.mustache.js b/core/src/main/resources/org/apache/spark/ui/static/jquery.mustache.js
index 14925bf93d0f..6f93498262ac 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/jquery.mustache.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery.mustache.js
@@ -1,89 +1,84 @@
-/*
-Shameless port of a shameless port
-@defunkt => @janl => @aq
- 
-See http://github.com/defunkt/mustache for more info.
-*/
- 
-;(function($) {
-
 /*!
  * mustache.js - Logic-less {{mustache}} templates with JavaScript
  * http://github.com/janl/mustache.js
  */
 
-/*global define: false*/
+/*global define: false Mustache: true*/
 
-(function (root, factory) {
-  if (typeof exports === "object" && exports) {
+(function defineMustache (global, factory) {
+  if (typeof exports === 'object' && exports && typeof exports.nodeName !== 'string') {
     factory(exports); // CommonJS
+  } else if (typeof define === 'function' && define.amd) {
+    define(['exports'], factory); // AMD
   } else {
-    var mustache = {};
-    factory(mustache);
-    if (typeof define === "function" && define.amd) {
-      define(mustache); // AMD
-    } else {
-      root.Mustache = mustache; // <script>
-    }
+    global.Mustache = {};
+    factory(global.Mustache); // script, wsh, asp
   }
-}(this, function (mustache) {
+}(this, function mustacheFactory (mustache) {
 
-  var whiteRe = /\s*/;
-  var spaceRe = /\s+/;
-  var nonSpaceRe = /\S/;
-  var eqRe = /\s*=/;
-  var curlyRe = /\s*\}/;
-  var tagRe = /#|\^|\/|>|\{|&|=|!/;
+  var objectToString = Object.prototype.toString;
+  var isArray = Array.isArray || function isArrayPolyfill (object) {
+    return objectToString.call(object) === '[object Array]';
+  };
 
-  // Workaround for https://issues.apache.org/jira/browse/COUCHDB-577
-  // See https://github.com/janl/mustache.js/issues/189
-  var RegExp_test = RegExp.prototype.test;
-  function testRegExp(re, string) {
-    return RegExp_test.call(re, string);
+  function isFunction (object) {
+    return typeof object === 'function';
   }
 
-  function isWhitespace(string) {
-    return !testRegExp(nonSpaceRe, string);
+  /**
+   * More correct typeof string handling array
+   * which normally returns typeof 'object'
+   */
+  function typeStr (obj) {
+    return isArray(obj) ? 'array' : typeof obj;
   }
 
-  var Object_toString = Object.prototype.toString;
-  var isArray = Array.isArray || function (object) {
-    return Object_toString.call(object) === '[object Array]';
-  };
+  function escapeRegExp (string) {
+    return string.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, '\\$&');
+  }
 
-  function isFunction(object) {
-    return typeof object === 'function';
+  /**
+   * Null safe way of checking whether or not an object,
+   * including its prototype, has a given property
+   */
+  function hasProperty (obj, propName) {
+    return obj != null && typeof obj === 'object' && (propName in obj);
   }
 
-  function escapeRegExp(string) {
-    return string.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&");
+  // Workaround for https://issues.apache.org/jira/browse/COUCHDB-577
+  // See https://github.com/janl/mustache.js/issues/189
+  var regExpTest = RegExp.prototype.test;
+  function testRegExp (re, string) {
+    return regExpTest.call(re, string);
+  }
+
+  var nonSpaceRe = /\S/;
+  function isWhitespace (string) {
+    return !testRegExp(nonSpaceRe, string);
   }
 
   var entityMap = {
-    "&": "&amp;",
-    "<": "&lt;",
-    ">": "&gt;",
+    '&': '&amp;',
+    '<': '&lt;',
+    '>': '&gt;',
     '"': '&quot;',
     "'": '&#39;',
-    "/": '&#x2F;'
+    '/': '&#x2F;',
+    '`': '&#x60;',
+    '=': '&#x3D;'
   };
 
-  function escapeHtml(string) {
-    return String(string).replace(/[&<>"'\/]/g, function (s) {
+  function escapeHtml (string) {
+    return String(string).replace(/[&<>"'`=\/]/g, function fromEntityMap (s) {
       return entityMap[s];
     });
   }
 
-  function escapeTags(tags) {
-    if (!isArray(tags) || tags.length !== 2) {
-      throw new Error('Invalid tags: ' + tags);
-    }
-
-    return [
-      new RegExp(escapeRegExp(tags[0]) + "\\s*"),
-      new RegExp("\\s*" + escapeRegExp(tags[1]))
-    ];
-  }
+  var whiteRe = /\s*/;
+  var spaceRe = /\s+/;
+  var equalsRe = /\s*=/;
+  var curlyRe = /\s*\}/;
+  var tagRe = /#|\^|\/|>|\{|&|=|!/;
 
   /**
    * Breaks up the given `template` string into a tree of tokens. If the `tags`
@@ -94,29 +89,22 @@ See http://github.com/defunkt/mustache for more info.
    * A token is an array with at least 4 elements. The first element is the
    * mustache symbol that was used inside the tag, e.g. "#" or "&". If the tag
    * did not contain a symbol (i.e. {{myValue}}) this element is "name". For
-   * all template text that appears outside a symbol this element is "text".
+   * all text that appears outside a symbol this element is "text".
    *
    * The second element of a token is its "value". For mustache tags this is
    * whatever else was inside the tag besides the opening symbol. For text tokens
    * this is the text itself.
    *
-   * The third and fourth elements of the token are the start and end indices
-   * in the original template of the token, respectively.
+   * The third and fourth elements of the token are the start and end indices,
+   * respectively, of the token in the original template.
    *
-   * Tokens that are the root node of a subtree contain two more elements: an
-   * array of tokens in the subtree and the index in the original template at which
-   * the closing tag for that section begins.
+   * Tokens that are the root node of a subtree contain two more elements: 1) an
+   * array of tokens in the subtree and 2) the index in the original template at
+   * which the closing tag for that section begins.
    */
-  function parseTemplate(template, tags) {
-    tags = tags || mustache.tags;
-    template = template || '';
-
-    if (typeof tags === 'string') {
-      tags = tags.split(spaceRe);
-    }
-
-    var tagRes = escapeTags(tags);
-    var scanner = new Scanner(template);
+  function parseTemplate (template, tags) {
+    if (!template)
+      return [];
 
     var sections = [];     // Stack to hold section tokens
     var tokens = [];       // Buffer to hold the tokens
@@ -126,11 +114,10 @@ See http://github.com/defunkt/mustache for more info.
 
     // Strips all whitespace tokens array for the current line
     // if there was a {{#tag}} on it and otherwise only space.
-    function stripSpace() {
+    function stripSpace () {
       if (hasTag && !nonSpace) {
-        while (spaces.length) {
+        while (spaces.length)
           delete tokens[spaces.pop()];
-        }
       } else {
         spaces = [];
       }
@@ -139,14 +126,32 @@ See http://github.com/defunkt/mustache for more info.
       nonSpace = false;
     }
 
+    var openingTagRe, closingTagRe, closingCurlyRe;
+    function compileTags (tagsToCompile) {
+      if (typeof tagsToCompile === 'string')
+        tagsToCompile = tagsToCompile.split(spaceRe, 2);
+
+      if (!isArray(tagsToCompile) || tagsToCompile.length !== 2)
+        throw new Error('Invalid tags: ' + tagsToCompile);
+
+      openingTagRe = new RegExp(escapeRegExp(tagsToCompile[0]) + '\\s*');
+      closingTagRe = new RegExp('\\s*' + escapeRegExp(tagsToCompile[1]));
+      closingCurlyRe = new RegExp('\\s*' + escapeRegExp('}' + tagsToCompile[1]));
+    }
+
+    compileTags(tags || mustache.tags);
+
+    var scanner = new Scanner(template);
+
     var start, type, value, chr, token, openSection;
     while (!scanner.eos()) {
       start = scanner.pos;
 
       // Match any text between tags.
-      value = scanner.scanUntil(tagRes[0]);
+      value = scanner.scanUntil(openingTagRe);
+
       if (value) {
-        for (var i = 0, len = value.length; i < len; ++i) {
+        for (var i = 0, valueLength = value.length; i < valueLength; ++i) {
           chr = value.charAt(i);
 
           if (isWhitespace(chr)) {
@@ -155,18 +160,19 @@ See http://github.com/defunkt/mustache for more info.
             nonSpace = true;
           }
 
-          tokens.push(['text', chr, start, start + 1]);
+          tokens.push([ 'text', chr, start, start + 1 ]);
           start += 1;
 
           // Check for whitespace on the current line.
-          if (chr === '\n') {
+          if (chr === '\n')
             stripSpace();
-          }
         }
       }
 
       // Match the opening tag.
-      if (!scanner.scan(tagRes[0])) break;
+      if (!scanner.scan(openingTagRe))
+        break;
+
       hasTag = true;
 
       // Get the tag type.
@@ -175,22 +181,21 @@ See http://github.com/defunkt/mustache for more info.
 
       // Get the tag value.
       if (type === '=') {
-        value = scanner.scanUntil(eqRe);
-        scanner.scan(eqRe);
-        scanner.scanUntil(tagRes[1]);
+        value = scanner.scanUntil(equalsRe);
+        scanner.scan(equalsRe);
+        scanner.scanUntil(closingTagRe);
       } else if (type === '{') {
-        value = scanner.scanUntil(new RegExp('\\s*' + escapeRegExp('}' + tags[1])));
+        value = scanner.scanUntil(closingCurlyRe);
         scanner.scan(curlyRe);
-        scanner.scanUntil(tagRes[1]);
+        scanner.scanUntil(closingTagRe);
         type = '&';
       } else {
-        value = scanner.scanUntil(tagRes[1]);
+        value = scanner.scanUntil(closingTagRe);
       }
 
       // Match the closing tag.
-      if (!scanner.scan(tagRes[1])) {
+      if (!scanner.scan(closingTagRe))
         throw new Error('Unclosed tag at ' + scanner.pos);
-      }
 
       token = [ type, value, start, scanner.pos ];
       tokens.push(token);
@@ -201,25 +206,24 @@ See http://github.com/defunkt/mustache for more info.
         // Check section nesting.
         openSection = sections.pop();
 
-        if (!openSection) {
+        if (!openSection)
           throw new Error('Unopened section "' + value + '" at ' + start);
-        }
-        if (openSection[1] !== value) {
+
+        if (openSection[1] !== value)
           throw new Error('Unclosed section "' + openSection[1] + '" at ' + start);
-        }
       } else if (type === 'name' || type === '{' || type === '&') {
         nonSpace = true;
       } else if (type === '=') {
         // Set the tags for the next time around.
-        tagRes = escapeTags(tags = value.split(spaceRe));
+        compileTags(value);
       }
     }
 
     // Make sure there are no open sections when we're done.
     openSection = sections.pop();
-    if (openSection) {
+
+    if (openSection)
       throw new Error('Unclosed section "' + openSection[1] + '" at ' + scanner.pos);
-    }
 
     return nestTokens(squashTokens(tokens));
   }
@@ -228,11 +232,11 @@ See http://github.com/defunkt/mustache for more info.
    * Combines the values of consecutive text tokens in the given `tokens` array
    * to a single token.
    */
-  function squashTokens(tokens) {
+  function squashTokens (tokens) {
     var squashedTokens = [];
 
     var token, lastToken;
-    for (var i = 0, len = tokens.length; i < len; ++i) {
+    for (var i = 0, numTokens = tokens.length; i < numTokens; ++i) {
       token = tokens[i];
 
       if (token) {
@@ -255,29 +259,29 @@ See http://github.com/defunkt/mustache for more info.
    * all tokens that appear in that section and 2) the index in the original
    * template that represents the end of that section.
    */
-  function nestTokens(tokens) {
+  function nestTokens (tokens) {
     var nestedTokens = [];
     var collector = nestedTokens;
     var sections = [];
 
     var token, section;
-    for (var i = 0, len = tokens.length; i < len; ++i) {
+    for (var i = 0, numTokens = tokens.length; i < numTokens; ++i) {
       token = tokens[i];
 
       switch (token[0]) {
-      case '#':
-      case '^':
-        collector.push(token);
-        sections.push(token);
-        collector = token[4] = [];
-        break;
-      case '/':
-        section = sections.pop();
-        section[5] = token[2];
-        collector = sections.length > 0 ? sections[sections.length - 1][4] : nestedTokens;
-        break;
-      default:
-        collector.push(token);
+        case '#':
+        case '^':
+          collector.push(token);
+          sections.push(token);
+          collector = token[4] = [];
+          break;
+        case '/':
+          section = sections.pop();
+          section[5] = token[2];
+          collector = sections.length > 0 ? sections[sections.length - 1][4] : nestedTokens;
+          break;
+        default:
+          collector.push(token);
       }
     }
 
@@ -288,7 +292,7 @@ See http://github.com/defunkt/mustache for more info.
    * A simple string scanner that is used by the template parser to find
    * tokens in template strings.
    */
-  function Scanner(string) {
+  function Scanner (string) {
     this.string = string;
     this.tail = string;
     this.pos = 0;
@@ -297,45 +301,46 @@ See http://github.com/defunkt/mustache for more info.
   /**
    * Returns `true` if the tail is empty (end of string).
    */
-  Scanner.prototype.eos = function () {
-    return this.tail === "";
+  Scanner.prototype.eos = function eos () {
+    return this.tail === '';
   };
 
   /**
    * Tries to match the given regular expression at the current position.
    * Returns the matched text if it can match, the empty string otherwise.
    */
-  Scanner.prototype.scan = function (re) {
+  Scanner.prototype.scan = function scan (re) {
     var match = this.tail.match(re);
 
-    if (match && match.index === 0) {
-      var string = match[0];
-      this.tail = this.tail.substring(string.length);
-      this.pos += string.length;
-      return string;
-    }
+    if (!match || match.index !== 0)
+      return '';
+
+    var string = match[0];
 
-    return "";
+    this.tail = this.tail.substring(string.length);
+    this.pos += string.length;
+
+    return string;
   };
 
   /**
    * Skips all text until the given regular expression can be matched. Returns
    * the skipped string, which is the entire tail if no match can be made.
    */
-  Scanner.prototype.scanUntil = function (re) {
+  Scanner.prototype.scanUntil = function scanUntil (re) {
     var index = this.tail.search(re), match;
 
     switch (index) {
-    case -1:
-      match = this.tail;
-      this.tail = "";
-      break;
-    case 0:
-      match = "";
-      break;
-    default:
-      match = this.tail.substring(0, index);
-      this.tail = this.tail.substring(index);
+      case -1:
+        match = this.tail;
+        this.tail = '';
+        break;
+      case 0:
+        match = '';
+        break;
+      default:
+        match = this.tail.substring(0, index);
+        this.tail = this.tail.substring(index);
     }
 
     this.pos += match.length;
@@ -347,8 +352,8 @@ See http://github.com/defunkt/mustache for more info.
    * Represents a rendering context by wrapping a view object and
    * maintaining a reference to the parent context.
    */
-  function Context(view, parentContext) {
-    this.view = view == null ? {} : view;
+  function Context (view, parentContext) {
+    this.view = view;
     this.cache = { '.': this.view };
     this.parent = parentContext;
   }
@@ -357,7 +362,7 @@ See http://github.com/defunkt/mustache for more info.
    * Creates a new context using the given view with this context
    * as the parent.
    */
-  Context.prototype.push = function (view) {
+  Context.prototype.push = function push (view) {
     return new Context(view, this);
   };
 
@@ -365,36 +370,54 @@ See http://github.com/defunkt/mustache for more info.
    * Returns the value of the given name in this context, traversing
    * up the context hierarchy if the value is absent in this context's view.
    */
-  Context.prototype.lookup = function (name) {
+  Context.prototype.lookup = function lookup (name) {
+    var cache = this.cache;
+
     var value;
-    if (name in this.cache) {
-      value = this.cache[name];
+    if (cache.hasOwnProperty(name)) {
+      value = cache[name];
     } else {
-      var context = this;
+      var context = this, names, index, lookupHit = false;
 
       while (context) {
         if (name.indexOf('.') > 0) {
           value = context.view;
-
-          var names = name.split('.'), i = 0;
-          while (value != null && i < names.length) {
-            value = value[names[i++]];
+          names = name.split('.');
+          index = 0;
+
+          /**
+           * Using the dot notion path in `name`, we descend through the
+           * nested objects.
+           *
+           * To be certain that the lookup has been successful, we have to
+           * check if the last object in the path actually has the property
+           * we are looking for. We store the result in `lookupHit`.
+           *
+           * This is specially necessary for when the value has been set to
+           * `undefined` and we want to avoid looking up parent contexts.
+           **/
+          while (value != null && index < names.length) {
+            if (index === names.length - 1)
+              lookupHit = hasProperty(value, names[index]);
+
+            value = value[names[index++]];
           }
         } else {
           value = context.view[name];
+          lookupHit = hasProperty(context.view, name);
         }
 
-        if (value != null) break;
+        if (lookupHit)
+          break;
 
         context = context.parent;
       }
 
-      this.cache[name] = value;
+      cache[name] = value;
     }
 
-    if (isFunction(value)) {
+    if (isFunction(value))
       value = value.call(this.view);
-    }
 
     return value;
   };
@@ -404,14 +427,14 @@ See http://github.com/defunkt/mustache for more info.
    * string, given a context. It also maintains a cache of templates to
    * avoid the need to parse the same template twice.
    */
-  function Writer() {
+  function Writer () {
     this.cache = {};
   }
 
   /**
    * Clears all cached templates in this writer.
    */
-  Writer.prototype.clearCache = function () {
+  Writer.prototype.clearCache = function clearCache () {
     this.cache = {};
   };
 
@@ -419,13 +442,12 @@ See http://github.com/defunkt/mustache for more info.
    * Parses and caches the given `template` and returns the array of tokens
    * that is generated from the parse.
    */
-  Writer.prototype.parse = function (template, tags) {
+  Writer.prototype.parse = function parse (template, tags) {
     var cache = this.cache;
     var tokens = cache[template];
 
-    if (tokens == null) {
+    if (tokens == null)
       tokens = cache[template] = parseTemplate(template, tags);
-    }
 
     return tokens;
   };
@@ -439,7 +461,7 @@ See http://github.com/defunkt/mustache for more info.
    * also be a function that is used to load partial templates on the fly
    * that takes a single argument: the name of the partial.
    */
-  Writer.prototype.render = function (template, view, partials) {
+  Writer.prototype.render = function render (template, view, partials) {
     var tokens = this.parse(template);
     var context = (view instanceof Context) ? view : new Context(view);
     return this.renderTokens(tokens, context, partials, template);
@@ -454,80 +476,99 @@ See http://github.com/defunkt/mustache for more info.
    * If the template doesn't use higher-order sections, this argument may
    * be omitted.
    */
-  Writer.prototype.renderTokens = function (tokens, context, partials, originalTemplate) {
+  Writer.prototype.renderTokens = function renderTokens (tokens, context, partials, originalTemplate) {
     var buffer = '';
 
+    var token, symbol, value;
+    for (var i = 0, numTokens = tokens.length; i < numTokens; ++i) {
+      value = undefined;
+      token = tokens[i];
+      symbol = token[0];
+
+      if (symbol === '#') value = this.renderSection(token, context, partials, originalTemplate);
+      else if (symbol === '^') value = this.renderInverted(token, context, partials, originalTemplate);
+      else if (symbol === '>') value = this.renderPartial(token, context, partials, originalTemplate);
+      else if (symbol === '&') value = this.unescapedValue(token, context);
+      else if (symbol === 'name') value = this.escapedValue(token, context);
+      else if (symbol === 'text') value = this.rawValue(token);
+
+      if (value !== undefined)
+        buffer += value;
+    }
+
+    return buffer;
+  };
+
+  Writer.prototype.renderSection = function renderSection (token, context, partials, originalTemplate) {
+    var self = this;
+    var buffer = '';
+    var value = context.lookup(token[1]);
+
     // This function is used to render an arbitrary template
     // in the current context by higher-order sections.
-    var self = this;
-    function subRender(template) {
+    function subRender (template) {
       return self.render(template, context, partials);
     }
 
-    var token, value;
-    for (var i = 0, len = tokens.length; i < len; ++i) {
-      token = tokens[i];
+    if (!value) return;
 
-      switch (token[0]) {
-      case '#':
-        value = context.lookup(token[1]);
-        if (!value) continue;
+    if (isArray(value)) {
+      for (var j = 0, valueLength = value.length; j < valueLength; ++j) {
+        buffer += this.renderTokens(token[4], context.push(value[j]), partials, originalTemplate);
+      }
+    } else if (typeof value === 'object' || typeof value === 'string' || typeof value === 'number') {
+      buffer += this.renderTokens(token[4], context.push(value), partials, originalTemplate);
+    } else if (isFunction(value)) {
+      if (typeof originalTemplate !== 'string')
+        throw new Error('Cannot use higher-order sections without the original template');
 
-        if (isArray(value)) {
-          for (var j = 0, jlen = value.length; j < jlen; ++j) {
-            buffer += this.renderTokens(token[4], context.push(value[j]), partials, originalTemplate);
-          }
-        } else if (typeof value === 'object' || typeof value === 'string') {
-          buffer += this.renderTokens(token[4], context.push(value), partials, originalTemplate);
-        } else if (isFunction(value)) {
-          if (typeof originalTemplate !== 'string') {
-            throw new Error('Cannot use higher-order sections without the original template');
-          }
+      // Extract the portion of the original template that the section contains.
+      value = value.call(context.view, originalTemplate.slice(token[3], token[5]), subRender);
+
+      if (value != null)
+        buffer += value;
+    } else {
+      buffer += this.renderTokens(token[4], context, partials, originalTemplate);
+    }
+    return buffer;
+  };
 
-          // Extract the portion of the original template that the section contains.
-          value = value.call(context.view, originalTemplate.slice(token[3], token[5]), subRender);
+  Writer.prototype.renderInverted = function renderInverted (token, context, partials, originalTemplate) {
+    var value = context.lookup(token[1]);
 
-          if (value != null) buffer += value;
-        } else {
-          buffer += this.renderTokens(token[4], context, partials, originalTemplate);
-        }
+    // Use JavaScript's definition of falsy. Include empty arrays.
+    // See https://github.com/janl/mustache.js/issues/186
+    if (!value || (isArray(value) && value.length === 0))
+      return this.renderTokens(token[4], context, partials, originalTemplate);
+  };
 
-        break;
-      case '^':
-        value = context.lookup(token[1]);
+  Writer.prototype.renderPartial = function renderPartial (token, context, partials) {
+    if (!partials) return;
 
-        // Use JavaScript's definition of falsy. Include empty arrays.
-        // See https://github.com/janl/mustache.js/issues/186
-        if (!value || (isArray(value) && value.length === 0)) {
-          buffer += this.renderTokens(token[4], context, partials, originalTemplate);
-        }
+    var value = isFunction(partials) ? partials(token[1]) : partials[token[1]];
+    if (value != null)
+      return this.renderTokens(this.parse(value), context, partials, value);
+  };
 
-        break;
-      case '>':
-        if (!partials) continue;
-        value = isFunction(partials) ? partials(token[1]) : partials[token[1]];
-        if (value != null) buffer += this.renderTokens(this.parse(value), context, partials, value);
-        break;
-      case '&':
-        value = context.lookup(token[1]);
-        if (value != null) buffer += value;
-        break;
-      case 'name':
-        value = context.lookup(token[1]);
-        if (value != null) buffer += mustache.escape(value);
-        break;
-      case 'text':
-        buffer += token[1];
-        break;
-      }
-    }
+  Writer.prototype.unescapedValue = function unescapedValue (token, context) {
+    var value = context.lookup(token[1]);
+    if (value != null)
+      return value;
+  };
 
-    return buffer;
+  Writer.prototype.escapedValue = function escapedValue (token, context) {
+    var value = context.lookup(token[1]);
+    if (value != null)
+      return mustache.escape(value);
   };
 
-  mustache.name = "mustache.js";
-  mustache.version = "0.8.1";
-  mustache.tags = [ "{{", "}}" ];
+  Writer.prototype.rawValue = function rawValue (token) {
+    return token[1];
+  };
+
+  mustache.name = 'mustache.js';
+  mustache.version = '2.3.2';
+  mustache.tags = [ '{{', '}}' ];
 
   // All high-level mustache.* functions use this writer.
   var defaultWriter = new Writer();
@@ -535,7 +576,7 @@ See http://github.com/defunkt/mustache for more info.
   /**
    * Clears all cached templates in the default writer.
    */
-  mustache.clearCache = function () {
+  mustache.clearCache = function clearCache () {
     return defaultWriter.clearCache();
   };
 
@@ -544,7 +585,7 @@ See http://github.com/defunkt/mustache for more info.
    * array of tokens it contains. Doing this ahead of time avoids the need to
    * parse templates on the fly as they are rendered.
    */
-  mustache.parse = function (template, tags) {
+  mustache.parse = function parse (template, tags) {
     return defaultWriter.parse(template, tags);
   };
 
@@ -552,12 +593,21 @@ See http://github.com/defunkt/mustache for more info.
    * Renders the `template` with the given `view` and `partials` using the
    * default writer.
    */
-  mustache.render = function (template, view, partials) {
+  mustache.render = function render (template, view, partials) {
+    if (typeof template !== 'string') {
+      throw new TypeError('Invalid template! Template should be a "string" ' +
+                          'but "' + typeStr(template) + '" was given as the first ' +
+                          'argument for mustache#render(template, view, partials)');
+    }
+
     return defaultWriter.render(template, view, partials);
   };
 
-  // This is here for backwards compatibility with 0.4.x.
-  mustache.to_html = function (template, view, partials, send) {
+  // This is here for backwards compatibility with 0.4.x.,
+  /*eslint-disable */ // eslint wants camel cased function name
+  mustache.to_html = function to_html (template, view, partials, send) {
+    /*eslint-enable*/
+
     var result = mustache.render(template, view, partials);
 
     if (isFunction(send)) {
@@ -576,17 +626,5 @@ See http://github.com/defunkt/mustache for more info.
   mustache.Context = Context;
   mustache.Writer = Writer;
 
+  return mustache;
 }));
-  $.mustache = function (template, view, partials) {
-    return Mustache.render(template, view, partials);
-  };
-
-  $.fn.mustache = function (view, partials) {
-    return $(this).map(function (i, elm) {
-      var template = $.trim($(elm).html());
-      var output = $.mustache(template, view, partials);
-      return $(output).get();
-    });
-  };
-
-})(jQuery);
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 54f2750831d6..d7bda8b80c24 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -172,7 +172,7 @@ private[spark] object UIUtils extends Logging {
     <link rel="stylesheet"
           href={prependBaseUri(request, "/static/timeline-view.css")} type="text/css"/>
     <script src={prependBaseUri(request, "/static/sorttable.js")} ></script>
-    <script src={prependBaseUri(request, "/static/jquery-1.11.1.min.js")}></script>
+    <script src={prependBaseUri(request, "/static/jquery-1.12.4.min.js")}></script>
     <script src={prependBaseUri(request, "/static/vis.min.js")}></script>
     <script src={prependBaseUri(request, "/static/bootstrap-tooltip.js")}></script>
     <script src={prependBaseUri(request, "/static/initialize-tooltips.js")}></script>
@@ -195,14 +195,14 @@ private[spark] object UIUtils extends Logging {
 
   def dataTablesHeaderNodes(request: HttpServletRequest): Seq[Node] = {
     <link rel="stylesheet" href={prependBaseUri(request,
-      "/static/jquery.dataTables.1.10.4.min.css")} type="text/css"/>
+      "/static/jquery.dataTables.1.10.18.min.css")} type="text/css"/>
     <link rel="stylesheet"
           href={prependBaseUri(request, "/static/dataTables.bootstrap.css")} type="text/css"/>
     <link rel="stylesheet"
           href={prependBaseUri(request, "/static/jsonFormatter.min.css")} type="text/css"/>
     <link rel="stylesheet"
           href={prependBaseUri(request, "/static/webui-dataTables.css")} type="text/css"/>
-    <script src={prependBaseUri(request, "/static/jquery.dataTables.1.10.4.min.js")}></script>
+    <script src={prependBaseUri(request, "/static/jquery.dataTables.1.10.18.min.js")}></script>
     <script src={prependBaseUri(request, "/static/jquery.cookies.2.2.0.min.js")}></script>
     <script src={prependBaseUri(request, "/static/jquery.blockUI.min.js")}></script>
     <script src={prependBaseUri(request, "/static/dataTables.bootstrap.min.js")}></script>
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index ccf266c3218d..7c2a720e8694 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -22,7 +22,7 @@ docs
 slaves
 spark-env.cmd
 bootstrap-tooltip.js
-jquery-1.11.1.min.js
+jquery-1.12.4.min.js
 d3.min.js
 dagre-d3.min.js
 graphlib-dot.min.js
@@ -34,8 +34,8 @@ dataTables.bootstrap.min.js
 dataTables.rowsGroup.js
 jquery.blockUI.min.js
 jquery.cookies.2.2.0.min.js
-jquery.dataTables.1.10.4.min.css
-jquery.dataTables.1.10.4.min.js
+jquery.dataTables.1.10.18.min.css
+jquery.dataTables.1.10.18.min.js
 jquery.mustache.js
 jsonFormatter.min.css
 jsonFormatter.min.js
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index cbe4306799d4..63e9a4277562 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -159,7 +159,7 @@ <h1 class="title">{{ page.title }}</h1>
              <!-- /container -->
         </div>
 
-        <script src="js/vendor/jquery-1.8.0.min.js"></script>
+        <script src="js/vendor/jquery-1.12.4.min.js"></script>
         <script src="js/vendor/bootstrap.min.js"></script>
         <script src="js/vendor/anchor.min.js"></script>
         <script src="js/main.js"></script>
diff --git a/docs/js/vendor/jquery-1.12.4.min.js b/docs/js/vendor/jquery-1.12.4.min.js
new file mode 100755
index 000000000000..e836475870da
--- /dev/null
+++ b/docs/js/vendor/jquery-1.12.4.min.js
@@ -0,0 +1,5 @@
+/*! jQuery v1.12.4 | (c) jQuery Foundation | jquery.org/license */
+!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=a.document,e=c.slice,f=c.concat,g=c.push,h=c.indexOf,i={},j=i.toString,k=i.hasOwnProperty,l={},m="1.12.4",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return e.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:e.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a){return n.each(this,a)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(e.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor()},push:g,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(e=arguments[h]))for(d in e)a=g[d],c=e[d],g!==c&&(j&&c&&(n.isPlainObject(c)||(b=n.isArray(c)))?(b?(b=!1,f=a&&n.isArray(a)?a:[]):f=a&&n.isPlainObject(a)?a:{},g[d]=n.extend(j,f,c)):void 0!==c&&(g[d]=c));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray||function(a){return"array"===n.type(a)},isWindow:function(a){return null!=a&&a==a.window},isNumeric:function(a){var b=a&&a.toString();return!n.isArray(a)&&b-parseFloat(b)+1>=0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},isPlainObject:function(a){var b;if(!a||"object"!==n.type(a)||a.nodeType||n.isWindow(a))return!1;try{if(a.constructor&&!k.call(a,"constructor")&&!k.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}if(!l.ownFirst)for(b in a)return k.call(a,b);for(b in a);return void 0===b||k.call(a,b)},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?i[j.call(a)]||"object":typeof a},globalEval:function(b){b&&n.trim(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b){var c,d=0;if(s(a)){for(c=a.length;c>d;d++)if(b.call(a[d],d,a[d])===!1)break}else for(d in a)if(b.call(a[d],d,a[d])===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):g.call(c,a)),c},inArray:function(a,b,c){var d;if(b){if(h)return h.call(b,a,c);for(d=b.length,c=c?0>c?Math.max(0,d+c):c:0;d>c;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,b){var c=+b.length,d=0,e=a.length;while(c>d)a[e++]=b[d++];if(c!==c)while(void 0!==b[d])a[e++]=b[d++];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,e,g=0,h=[];if(s(a))for(d=a.length;d>g;g++)e=b(a[g],g,c),null!=e&&h.push(e);else for(g in a)e=b(a[g],g,c),null!=e&&h.push(e);return f.apply([],h)},guid:1,proxy:function(a,b){var c,d,f;return"string"==typeof b&&(f=a[b],b=a,a=f),n.isFunction(a)?(c=e.call(arguments,2),d=function(){return a.apply(b||this,c.concat(e.call(arguments)))},d.guid=a.guid=a.guid||n.guid++,d):void 0},now:function(){return+new Date},support:l}),"function"==typeof Symbol&&(n.fn[Symbol.iterator]=c[Symbol.iterator]),n.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(a,b){i["[object "+b+"]"]=b.toLowerCase()});function s(a){var b=!!a&&"length"in a&&a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ga(),z=ga(),A=ga(),B=function(a,b){return a===b&&(l=!0),0},C=1<<31,D={}.hasOwnProperty,E=[],F=E.pop,G=E.push,H=E.push,I=E.slice,J=function(a,b){for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},K="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",L="[\\x20\\t\\r\\n\\f]",M="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",N="\\["+L+"*("+M+")(?:"+L+"*([*^$|!~]?=)"+L+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+M+"))|)"+L+"*\\]",O=":("+M+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+N+")*)|.*)\\)|)",P=new RegExp(L+"+","g"),Q=new RegExp("^"+L+"+|((?:^|[^\\\\])(?:\\\\.)*)"+L+"+$","g"),R=new RegExp("^"+L+"*,"+L+"*"),S=new RegExp("^"+L+"*([>+~]|"+L+")"+L+"*"),T=new RegExp("="+L+"*([^\\]'\"]*?)"+L+"*\\]","g"),U=new RegExp(O),V=new RegExp("^"+M+"$"),W={ID:new RegExp("^#("+M+")"),CLASS:new RegExp("^\\.("+M+")"),TAG:new RegExp("^("+M+"|[*])"),ATTR:new RegExp("^"+N),PSEUDO:new RegExp("^"+O),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+L+"*(even|odd|(([+-]|)(\\d*)n|)"+L+"*(?:([+-]|)"+L+"*(\\d+)|))"+L+"*\\)|)","i"),bool:new RegExp("^(?:"+K+")$","i"),needsContext:new RegExp("^"+L+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+L+"*((?:-\\d)?\\d*)"+L+"*\\)|)(?=[^-]|$)","i")},X=/^(?:input|select|textarea|button)$/i,Y=/^h\d$/i,Z=/^[^{]+\{\s*\[native \w/,$=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,_=/[+~]/,aa=/'|\\/g,ba=new RegExp("\\\\([\\da-f]{1,6}"+L+"?|("+L+")|.)","ig"),ca=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},da=function(){m()};try{H.apply(E=I.call(v.childNodes),v.childNodes),E[v.childNodes.length].nodeType}catch(ea){H={apply:E.length?function(a,b){G.apply(a,I.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fa(a,b,d,e){var f,h,j,k,l,o,r,s,w=b&&b.ownerDocument,x=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==x&&9!==x&&11!==x)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==x&&(o=$.exec(a)))if(f=o[1]){if(9===x){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(w&&(j=w.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(o[2])return H.apply(d,b.getElementsByTagName(a)),d;if((f=o[3])&&c.getElementsByClassName&&b.getElementsByClassName)return H.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==x)w=b,s=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(aa,"\\$&"):b.setAttribute("id",k=u),r=g(a),h=r.length,l=V.test(k)?"#"+k:"[id='"+k+"']";while(h--)r[h]=l+" "+qa(r[h]);s=r.join(","),w=_.test(a)&&oa(b.parentNode)||b}if(s)try{return H.apply(d,w.querySelectorAll(s)),d}catch(y){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(Q,"$1"),b,d,e)}function ga(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ha(a){return a[u]=!0,a}function ia(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ja(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function ka(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||C)-(~a.sourceIndex||C);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function la(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function na(a){return ha(function(b){return b=+b,ha(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function oa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=fa.support={},f=fa.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fa.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ia(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ia(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Z.test(n.getElementsByClassName),c.getById=ia(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}},d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return"undefined"!=typeof b.getElementsByClassName&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=Z.test(n.querySelectorAll))&&(ia(function(a){o.appendChild(a).innerHTML="<a id='"+u+"'></a><select id='"+u+"-\r\\' msallowcapture=''><option selected=''></option></select>",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+L+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+L+"*(?:value|"+K+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ia(function(a){var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+L+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Z.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ia(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",O)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Z.test(o.compareDocumentPosition),t=b||Z.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?J(k,a)-J(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?J(k,a)-J(k,b):0;if(e===f)return ka(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?ka(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},fa.matches=function(a,b){return fa(a,null,null,b)},fa.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(T,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fa(b,n,null,[a]).length>0},fa.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fa.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&D.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fa.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fa.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fa.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fa.selectors={cacheLength:50,createPseudo:ha,match:W,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(ba,ca),a[3]=(a[3]||a[4]||a[5]||"").replace(ba,ca),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fa.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fa.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return W.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&U.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(ba,ca).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+L+")"+a+"("+L+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fa.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(P," ")+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fa.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ha(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=J(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ha(function(a){var b=[],c=[],d=h(a.replace(Q,"$1"));return d[u]?ha(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ha(function(a){return function(b){return fa(a,b).length>0}}),contains:ha(function(a){return a=a.replace(ba,ca),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ha(function(a){return V.test(a||"")||fa.error("unsupported lang: "+a),a=a.replace(ba,ca).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Y.test(a.nodeName)},input:function(a){return X.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:na(function(){return[0]}),last:na(function(a,b){return[b-1]}),eq:na(function(a,b,c){return[0>c?c+b:c]}),even:na(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:na(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:na(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:na(function(a,b,c){for(var d=0>c?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=la(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=ma(b);function pa(){}pa.prototype=d.filters=d.pseudos,d.setFilters=new pa,g=fa.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){c&&!(e=R.exec(h))||(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=S.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(Q," ")}),h=h.slice(c.length));for(g in d.filter)!(e=W[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?fa.error(a):z(a,i).slice(0)};function qa(a){for(var b=0,c=a.length,d="";c>b;b++)d+=a[b].value;return d}function ra(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j,k=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(j=b[u]||(b[u]={}),i=j[b.uniqueID]||(j[b.uniqueID]={}),(h=i[d])&&h[0]===w&&h[1]===f)return k[2]=h[2];if(i[d]=k,k[2]=a(b,c,g))return!0}}}function sa(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function ta(a,b,c){for(var d=0,e=b.length;e>d;d++)fa(a,b[d],c);return c}function ua(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(c&&!c(f,d,e)||(g.push(f),j&&b.push(h)));return g}function va(a,b,c,d,e,f){return d&&!d[u]&&(d=va(d)),e&&!e[u]&&(e=va(e,f)),ha(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||ta(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ua(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ua(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?J(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ua(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):H.apply(g,r)})}function wa(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ra(function(a){return a===b},h,!0),l=ra(function(a){return J(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];f>i;i++)if(c=d.relative[a[i].type])m=[ra(sa(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return va(i>1&&sa(m),i>1&&qa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(Q,"$1"),c,e>i&&wa(a.slice(i,e)),f>e&&wa(a=a.slice(e)),f>e&&qa(a))}m.push(c)}return sa(m)}function xa(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=F.call(i));u=ua(u)}H.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&fa.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ha(f):f}return h=fa.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wa(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xa(e,d)),f.selector=a}return f},i=fa.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(ba,ca),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=W.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(ba,ca),_.test(j[0].type)&&oa(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qa(j),!a)return H.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,!b||_.test(a)&&oa(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ia(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ia(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||ja("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ia(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ja("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ia(function(a){return null==a.getAttribute("disabled")})||ja(K,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fa}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.uniqueSort=n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&n(a).is(c))break;d.push(a)}return d},v=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},w=n.expr.match.needsContext,x=/^<([\w-]+)\s*\/?>(?:<\/\1>|)$/,y=/^.[^:#\[\.,]*$/;function z(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(y.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return n.inArray(a,b)>-1!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=[],d=this,e=d.length;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;e>b;b++)if(n.contains(d[b],this))return!0}));for(b=0;e>b;b++)n.find(a,d[b],c);return c=this.pushStack(e>1?n.unique(c):c),c.selector=this.selector?this.selector+" "+a:a,c},filter:function(a){return this.pushStack(z(this,a||[],!1))},not:function(a){return this.pushStack(z(this,a||[],!0))},is:function(a){return!!z(this,"string"==typeof a&&w.test(a)?n(a):a||[],!1).length}});var A,B=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,C=n.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||A,"string"==typeof a){if(e="<"===a.charAt(0)&&">"===a.charAt(a.length-1)&&a.length>=3?[null,a,null]:B.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),x.test(e[1])&&n.isPlainObject(b))for(e in b)n.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}if(f=d.getElementById(e[2]),f&&f.parentNode){if(f.id!==e[2])return A.find(a);this.length=1,this[0]=f}return this.context=d,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?"undefined"!=typeof c.ready?c.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};C.prototype=n.fn,A=n(d);var D=/^(?:parents|prev(?:Until|All))/,E={children:!0,contents:!0,next:!0,prev:!0};n.fn.extend({has:function(a){var b,c=n(a,this),d=c.length;return this.filter(function(){for(b=0;d>b;b++)if(n.contains(this,c[b]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=w.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?n.inArray(this[0],n(a)):n.inArray(a.jquery?a[0]:a,this):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.uniqueSort(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function F(a,b){do a=a[b];while(a&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return u(a,"parentNode")},parentsUntil:function(a,b,c){return u(a,"parentNode",c)},next:function(a){return F(a,"nextSibling")},prev:function(a){return F(a,"previousSibling")},nextAll:function(a){return u(a,"nextSibling")},prevAll:function(a){return u(a,"previousSibling")},nextUntil:function(a,b,c){return u(a,"nextSibling",c)},prevUntil:function(a,b,c){return u(a,"previousSibling",c)},siblings:function(a){return v((a.parentNode||{}).firstChild,a)},children:function(a){return v(a.firstChild)},contents:function(a){return n.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(E[a]||(e=n.uniqueSort(e)),D.test(a)&&(e=e.reverse())),this.pushStack(e)}});var G=/\S+/g;function H(a){var b={};return n.each(a.match(G)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?H(a):n.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h<f.length)f[h].apply(c[0],c[1])===!1&&a.stopOnFalse&&(h=f.length,c=!1)}a.memory||(c=!1),b=!1,e&&(f=c?[]:"")},j={add:function(){return f&&(c&&!b&&(h=f.length-1,g.push(c)),function d(b){n.each(b,function(b,c){n.isFunction(c)?a.unique&&j.has(c)||f.push(c):c&&c.length&&"string"!==n.type(c)&&d(c)})}(arguments),c&&!b&&i()),this},remove:function(){return n.each(arguments,function(a,b){var c;while((c=n.inArray(b,f,c))>-1)f.splice(c,1),h>=c&&h--}),this},has:function(a){return a?n.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=!0,c||j.disable(),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().progress(c.notify).done(c.resolve).fail(c.reject):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=e.call(arguments),d=c.length,f=1!==d||a&&n.isFunction(a.promise)?d:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(d){b[a]=this,c[a]=arguments.length>1?e.call(arguments):d,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(d>1)for(i=new Array(d),j=new Array(d),k=new Array(d);d>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().progress(h(b,j,i)).done(h(b,k,c)).fail(g.reject):--f;return f||g.resolveWith(k,c),g.promise()}});var I;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){(a===!0?--n.readyWait:n.isReady)||(n.isReady=!0,a!==!0&&--n.readyWait>0||(I.resolveWith(d,[n]),n.fn.triggerHandler&&(n(d).triggerHandler("ready"),n(d).off("ready"))))}});function J(){d.addEventListener?(d.removeEventListener("DOMContentLoaded",K),a.removeEventListener("load",K)):(d.detachEvent("onreadystatechange",K),a.detachEvent("onload",K))}function K(){(d.addEventListener||"load"===a.event.type||"complete"===d.readyState)&&(J(),n.ready())}n.ready.promise=function(b){if(!I)if(I=n.Deferred(),"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll)a.setTimeout(n.ready);else if(d.addEventListener)d.addEventListener("DOMContentLoaded",K),a.addEventListener("load",K);else{d.attachEvent("onreadystatechange",K),a.attachEvent("onload",K);var c=!1;try{c=null==a.frameElement&&d.documentElement}catch(e){}c&&c.doScroll&&!function f(){if(!n.isReady){try{c.doScroll("left")}catch(b){return a.setTimeout(f,50)}J(),n.ready()}}()}return I.promise(b)},n.ready.promise();var L;for(L in n(l))break;l.ownFirst="0"===L,l.inlineBlockNeedsLayout=!1,n(function(){var a,b,c,e;c=d.getElementsByTagName("body")[0],c&&c.style&&(b=d.createElement("div"),e=d.createElement("div"),e.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(e).appendChild(b),"undefined"!=typeof b.style.zoom&&(b.style.cssText="display:inline;margin:0;border:0;padding:1px;width:1px;zoom:1",l.inlineBlockNeedsLayout=a=3===b.offsetWidth,a&&(c.style.zoom=1)),c.removeChild(e))}),function(){var a=d.createElement("div");l.deleteExpando=!0;try{delete a.test}catch(b){l.deleteExpando=!1}a=null}();var M=function(a){var b=n.noData[(a.nodeName+" ").toLowerCase()],c=+a.nodeType||1;return 1!==c&&9!==c?!1:!b||b!==!0&&a.getAttribute("classid")===b},N=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,O=/([A-Z])/g;function P(a,b,c){if(void 0===c&&1===a.nodeType){var d="data-"+b.replace(O,"-$1").toLowerCase();if(c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:N.test(c)?n.parseJSON(c):c}catch(e){}n.data(a,b,c)}else c=void 0;
+}return c}function Q(a){var b;for(b in a)if(("data"!==b||!n.isEmptyObject(a[b]))&&"toJSON"!==b)return!1;return!0}function R(a,b,d,e){if(M(a)){var f,g,h=n.expando,i=a.nodeType,j=i?n.cache:a,k=i?a[h]:a[h]&&h;if(k&&j[k]&&(e||j[k].data)||void 0!==d||"string"!=typeof b)return k||(k=i?a[h]=c.pop()||n.guid++:h),j[k]||(j[k]=i?{}:{toJSON:n.noop}),"object"!=typeof b&&"function"!=typeof b||(e?j[k]=n.extend(j[k],b):j[k].data=n.extend(j[k].data,b)),g=j[k],e||(g.data||(g.data={}),g=g.data),void 0!==d&&(g[n.camelCase(b)]=d),"string"==typeof b?(f=g[b],null==f&&(f=g[n.camelCase(b)])):f=g,f}}function S(a,b,c){if(M(a)){var d,e,f=a.nodeType,g=f?n.cache:a,h=f?a[n.expando]:n.expando;if(g[h]){if(b&&(d=c?g[h]:g[h].data)){n.isArray(b)?b=b.concat(n.map(b,n.camelCase)):b in d?b=[b]:(b=n.camelCase(b),b=b in d?[b]:b.split(" ")),e=b.length;while(e--)delete d[b[e]];if(c?!Q(d):!n.isEmptyObject(d))return}(c||(delete g[h].data,Q(g[h])))&&(f?n.cleanData([a],!0):l.deleteExpando||g!=g.window?delete g[h]:g[h]=void 0)}}}n.extend({cache:{},noData:{"applet ":!0,"embed ":!0,"object ":"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"},hasData:function(a){return a=a.nodeType?n.cache[a[n.expando]]:a[n.expando],!!a&&!Q(a)},data:function(a,b,c){return R(a,b,c)},removeData:function(a,b){return S(a,b)},_data:function(a,b,c){return R(a,b,c,!0)},_removeData:function(a,b){return S(a,b,!0)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=n.data(f),1===f.nodeType&&!n._data(f,"parsedAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),P(f,d,e[d])));n._data(f,"parsedAttrs",!0)}return e}return"object"==typeof a?this.each(function(){n.data(this,a)}):arguments.length>1?this.each(function(){n.data(this,a,b)}):f?P(f,a,n.data(f,a)):void 0},removeData:function(a){return this.each(function(){n.removeData(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=n._data(a,b),c&&(!d||n.isArray(c)?d=n._data(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return n._data(a,c)||n._data(a,c,{empty:n.Callbacks("once memory").add(function(){n._removeData(a,b+"queue"),n._removeData(a,c)})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?n.queue(this[0],a):void 0===b?this:this.each(function(){var c=n.queue(this,a,b);n._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&n.dequeue(this,a)})},dequeue:function(a){return this.each(function(){n.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=n.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=n._data(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}}),function(){var a;l.shrinkWrapBlocks=function(){if(null!=a)return a;a=!1;var b,c,e;return c=d.getElementsByTagName("body")[0],c&&c.style?(b=d.createElement("div"),e=d.createElement("div"),e.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(e).appendChild(b),"undefined"!=typeof b.style.zoom&&(b.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:1px;width:1px;zoom:1",b.appendChild(d.createElement("div")).style.width="5px",a=3!==b.offsetWidth),c.removeChild(e),a):void 0}}();var T=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,U=new RegExp("^(?:([+-])=|)("+T+")([a-z%]*)$","i"),V=["Top","Right","Bottom","Left"],W=function(a,b){return a=b||a,"none"===n.css(a,"display")||!n.contains(a.ownerDocument,a)};function X(a,b,c,d){var e,f=1,g=20,h=d?function(){return d.cur()}:function(){return n.css(a,b,"")},i=h(),j=c&&c[3]||(n.cssNumber[b]?"":"px"),k=(n.cssNumber[b]||"px"!==j&&+i)&&U.exec(n.css(a,b));if(k&&k[3]!==j){j=j||k[3],c=c||[],k=+i||1;do f=f||".5",k/=f,n.style(a,b,k+j);while(f!==(f=h()/i)&&1!==f&&--g)}return c&&(k=+k||+i||0,e=c[1]?k+(c[1]+1)*c[2]:+c[2],d&&(d.unit=j,d.start=k,d.end=e)),e}var Y=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===n.type(c)){e=!0;for(h in c)Y(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,n.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(n(a),c)})),b))for(;i>h;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},Z=/^(?:checkbox|radio)$/i,$=/<([\w:-]+)/,_=/^$|\/(?:java|ecma)script/i,aa=/^\s+/,ba="abbr|article|aside|audio|bdi|canvas|data|datalist|details|dialog|figcaption|figure|footer|header|hgroup|main|mark|meter|nav|output|picture|progress|section|summary|template|time|video";function ca(a){var b=ba.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}!function(){var a=d.createElement("div"),b=d.createDocumentFragment(),c=d.createElement("input");a.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",l.leadingWhitespace=3===a.firstChild.nodeType,l.tbody=!a.getElementsByTagName("tbody").length,l.htmlSerialize=!!a.getElementsByTagName("link").length,l.html5Clone="<:nav></:nav>"!==d.createElement("nav").cloneNode(!0).outerHTML,c.type="checkbox",c.checked=!0,b.appendChild(c),l.appendChecked=c.checked,a.innerHTML="<textarea>x</textarea>",l.noCloneChecked=!!a.cloneNode(!0).lastChild.defaultValue,b.appendChild(a),c=d.createElement("input"),c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),a.appendChild(c),l.checkClone=a.cloneNode(!0).cloneNode(!0).lastChild.checked,l.noCloneEvent=!!a.addEventListener,a[n.expando]=1,l.attributes=!a.getAttribute(n.expando)}();var da={option:[1,"<select multiple='multiple'>","</select>"],legend:[1,"<fieldset>","</fieldset>"],area:[1,"<map>","</map>"],param:[1,"<object>","</object>"],thead:[1,"<table>","</table>"],tr:[2,"<table><tbody>","</tbody></table>"],col:[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:l.htmlSerialize?[0,"",""]:[1,"X<div>","</div>"]};da.optgroup=da.option,da.tbody=da.tfoot=da.colgroup=da.caption=da.thead,da.th=da.td;function ea(a,b){var c,d,e=0,f="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):void 0;if(!f)for(f=[],c=a.childNodes||a;null!=(d=c[e]);e++)!b||n.nodeName(d,b)?f.push(d):n.merge(f,ea(d,b));return void 0===b||b&&n.nodeName(a,b)?n.merge([a],f):f}function fa(a,b){for(var c,d=0;null!=(c=a[d]);d++)n._data(c,"globalEval",!b||n._data(b[d],"globalEval"))}var ga=/<|&#?\w+;/,ha=/<tbody/i;function ia(a){Z.test(a.type)&&(a.defaultChecked=a.checked)}function ja(a,b,c,d,e){for(var f,g,h,i,j,k,m,o=a.length,p=ca(b),q=[],r=0;o>r;r++)if(g=a[r],g||0===g)if("object"===n.type(g))n.merge(q,g.nodeType?[g]:g);else if(ga.test(g)){i=i||p.appendChild(b.createElement("div")),j=($.exec(g)||["",""])[1].toLowerCase(),m=da[j]||da._default,i.innerHTML=m[1]+n.htmlPrefilter(g)+m[2],f=m[0];while(f--)i=i.lastChild;if(!l.leadingWhitespace&&aa.test(g)&&q.push(b.createTextNode(aa.exec(g)[0])),!l.tbody){g="table"!==j||ha.test(g)?"<table>"!==m[1]||ha.test(g)?0:i:i.firstChild,f=g&&g.childNodes.length;while(f--)n.nodeName(k=g.childNodes[f],"tbody")&&!k.childNodes.length&&g.removeChild(k)}n.merge(q,i.childNodes),i.textContent="";while(i.firstChild)i.removeChild(i.firstChild);i=p.lastChild}else q.push(b.createTextNode(g));i&&p.removeChild(i),l.appendChecked||n.grep(ea(q,"input"),ia),r=0;while(g=q[r++])if(d&&n.inArray(g,d)>-1)e&&e.push(g);else if(h=n.contains(g.ownerDocument,g),i=ea(p.appendChild(g),"script"),h&&fa(i),c){f=0;while(g=i[f++])_.test(g.type||"")&&c.push(g)}return i=null,p}!function(){var b,c,e=d.createElement("div");for(b in{submit:!0,change:!0,focusin:!0})c="on"+b,(l[b]=c in a)||(e.setAttribute(c,"t"),l[b]=e.attributes[c].expando===!1);e=null}();var ka=/^(?:input|select|textarea)$/i,la=/^key/,ma=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,na=/^(?:focusinfocus|focusoutblur)$/,oa=/^([^.]*)(?:\.(.+)|)/;function pa(){return!0}function qa(){return!1}function ra(){try{return d.activeElement}catch(a){}}function sa(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)sa(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=qa;else if(!e)return a;return 1===f&&(g=e,e=function(a){return n().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=n.guid++)),a.each(function(){n.event.add(this,b,e,d,c)})}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n._data(a);if(r){c.handler&&(i=c,c=i.handler,e=i.selector),c.guid||(c.guid=n.guid++),(g=r.events)||(g=r.events={}),(k=r.handle)||(k=r.handle=function(a){return"undefined"==typeof n||a&&n.event.triggered===a.type?void 0:n.event.dispatch.apply(k.elem,arguments)},k.elem=a),b=(b||"").match(G)||[""],h=b.length;while(h--)f=oa.exec(b[h])||[],o=q=f[1],p=(f[2]||"").split(".").sort(),o&&(j=n.event.special[o]||{},o=(e?j.delegateType:j.bindType)||o,j=n.event.special[o]||{},l=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},i),(m=g[o])||(m=g[o]=[],m.delegateCount=0,j.setup&&j.setup.call(a,d,p,k)!==!1||(a.addEventListener?a.addEventListener(o,k,!1):a.attachEvent&&a.attachEvent("on"+o,k))),j.add&&(j.add.call(a,l),l.handler.guid||(l.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,l):m.push(l),n.event.global[o]=!0);a=null}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n.hasData(a)&&n._data(a);if(r&&(k=r.events)){b=(b||"").match(G)||[""],j=b.length;while(j--)if(h=oa.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=k[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),i=f=m.length;while(f--)g=m[f],!e&&q!==g.origType||c&&c.guid!==g.guid||h&&!h.test(g.namespace)||d&&d!==g.selector&&("**"!==d||!g.selector)||(m.splice(f,1),g.selector&&m.delegateCount--,l.remove&&l.remove.call(a,g));i&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete k[o])}else for(o in k)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(k)&&(delete r.handle,n._removeData(a,"events"))}},trigger:function(b,c,e,f){var g,h,i,j,l,m,o,p=[e||d],q=k.call(b,"type")?b.type:b,r=k.call(b,"namespace")?b.namespace.split("."):[];if(i=m=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!na.test(q+n.event.triggered)&&(q.indexOf(".")>-1&&(r=q.split("."),q=r.shift(),r.sort()),h=q.indexOf(":")<0&&"on"+q,b=b[n.expando]?b:new n.Event(q,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=r.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+r.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:n.makeArray(c,[b]),l=n.event.special[q]||{},f||!l.trigger||l.trigger.apply(e,c)!==!1)){if(!f&&!l.noBubble&&!n.isWindow(e)){for(j=l.delegateType||q,na.test(j+q)||(i=i.parentNode);i;i=i.parentNode)p.push(i),m=i;m===(e.ownerDocument||d)&&p.push(m.defaultView||m.parentWindow||a)}o=0;while((i=p[o++])&&!b.isPropagationStopped())b.type=o>1?j:l.bindType||q,g=(n._data(i,"events")||{})[b.type]&&n._data(i,"handle"),g&&g.apply(i,c),g=h&&i[h],g&&g.apply&&M(i)&&(b.result=g.apply(i,c),b.result===!1&&b.preventDefault());if(b.type=q,!f&&!b.isDefaultPrevented()&&(!l._default||l._default.apply(p.pop(),c)===!1)&&M(e)&&h&&e[q]&&!n.isWindow(e)){m=e[h],m&&(e[h]=null),n.event.triggered=q;try{e[q]()}catch(s){}n.event.triggered=void 0,m&&(e[h]=m)}return b.result}},dispatch:function(a){a=n.event.fix(a);var b,c,d,f,g,h=[],i=e.call(arguments),j=(n._data(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,c=0;while((g=f.handlers[c++])&&!a.isImmediatePropagationStopped())a.rnamespace&&!a.rnamespace.test(g.namespace)||(a.handleObj=g,a.data=g.data,d=((n.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==d&&(a.result=d)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&("click"!==a.type||isNaN(a.button)||a.button<1))for(;i!=this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(d=[],c=0;h>c;c++)f=b[c],e=f.selector+" ",void 0===d[e]&&(d[e]=f.needsContext?n(e,this).index(i)>-1:n.find(e,this,null,[i]).length),d[e]&&d.push(f);d.length&&g.push({elem:i,handlers:d})}return h<b.length&&g.push({elem:this,handlers:b.slice(h)}),g},fix:function(a){if(a[n.expando])return a;var b,c,e,f=a.type,g=a,h=this.fixHooks[f];h||(this.fixHooks[f]=h=ma.test(f)?this.mouseHooks:la.test(f)?this.keyHooks:{}),e=h.props?this.props.concat(h.props):this.props,a=new n.Event(g),b=e.length;while(b--)c=e[b],a[c]=g[c];return a.target||(a.target=g.srcElement||d),3===a.target.nodeType&&(a.target=a.target.parentNode),a.metaKey=!!a.metaKey,h.filter?h.filter(a,g):a},props:"altKey bubbles cancelable ctrlKey currentTarget detail eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){return null==a.which&&(a.which=null!=b.charCode?b.charCode:b.keyCode),a}},mouseHooks:{props:"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,b){var c,e,f,g=b.button,h=b.fromElement;return null==a.pageX&&null!=b.clientX&&(e=a.target.ownerDocument||d,f=e.documentElement,c=e.body,a.pageX=b.clientX+(f&&f.scrollLeft||c&&c.scrollLeft||0)-(f&&f.clientLeft||c&&c.clientLeft||0),a.pageY=b.clientY+(f&&f.scrollTop||c&&c.scrollTop||0)-(f&&f.clientTop||c&&c.clientTop||0)),!a.relatedTarget&&h&&(a.relatedTarget=h===a.target?b.toElement:h),a.which||void 0===g||(a.which=1&g?1:2&g?3:4&g?2:0),a}},special:{load:{noBubble:!0},focus:{trigger:function(){if(this!==ra()&&this.focus)try{return this.focus(),!1}catch(a){}},delegateType:"focusin"},blur:{trigger:function(){return this===ra()&&this.blur?(this.blur(),!1):void 0},delegateType:"focusout"},click:{trigger:function(){return n.nodeName(this,"input")&&"checkbox"===this.type&&this.click?(this.click(),!1):void 0},_default:function(a){return n.nodeName(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}},simulate:function(a,b,c){var d=n.extend(new n.Event,c,{type:a,isSimulated:!0});n.event.trigger(d,null,b),d.isDefaultPrevented()&&c.preventDefault()}},n.removeEvent=d.removeEventListener?function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c)}:function(a,b,c){var d="on"+b;a.detachEvent&&("undefined"==typeof a[d]&&(a[d]=null),a.detachEvent(d,c))},n.Event=function(a,b){return this instanceof n.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?pa:qa):this.type=a,b&&n.extend(this,b),this.timeStamp=a&&a.timeStamp||n.now(),void(this[n.expando]=!0)):new n.Event(a,b)},n.Event.prototype={constructor:n.Event,isDefaultPrevented:qa,isPropagationStopped:qa,isImmediatePropagationStopped:qa,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=pa,a&&(a.preventDefault?a.preventDefault():a.returnValue=!1)},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=pa,a&&!this.isSimulated&&(a.stopPropagation&&a.stopPropagation(),a.cancelBubble=!0)},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=pa,a&&a.stopImmediatePropagation&&a.stopImmediatePropagation(),this.stopPropagation()}},n.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){n.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return e&&(e===d||n.contains(d,e))||(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),l.submit||(n.event.special.submit={setup:function(){return n.nodeName(this,"form")?!1:void n.event.add(this,"click._submit keypress._submit",function(a){var b=a.target,c=n.nodeName(b,"input")||n.nodeName(b,"button")?n.prop(b,"form"):void 0;c&&!n._data(c,"submit")&&(n.event.add(c,"submit._submit",function(a){a._submitBubble=!0}),n._data(c,"submit",!0))})},postDispatch:function(a){a._submitBubble&&(delete a._submitBubble,this.parentNode&&!a.isTrigger&&n.event.simulate("submit",this.parentNode,a))},teardown:function(){return n.nodeName(this,"form")?!1:void n.event.remove(this,"._submit")}}),l.change||(n.event.special.change={setup:function(){return ka.test(this.nodeName)?("checkbox"!==this.type&&"radio"!==this.type||(n.event.add(this,"propertychange._change",function(a){"checked"===a.originalEvent.propertyName&&(this._justChanged=!0)}),n.event.add(this,"click._change",function(a){this._justChanged&&!a.isTrigger&&(this._justChanged=!1),n.event.simulate("change",this,a)})),!1):void n.event.add(this,"beforeactivate._change",function(a){var b=a.target;ka.test(b.nodeName)&&!n._data(b,"change")&&(n.event.add(b,"change._change",function(a){!this.parentNode||a.isSimulated||a.isTrigger||n.event.simulate("change",this.parentNode,a)}),n._data(b,"change",!0))})},handle:function(a){var b=a.target;return this!==b||a.isSimulated||a.isTrigger||"radio"!==b.type&&"checkbox"!==b.type?a.handleObj.handler.apply(this,arguments):void 0},teardown:function(){return n.event.remove(this,"._change"),!ka.test(this.nodeName)}}),l.focusin||n.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){n.event.simulate(b,a.target,n.event.fix(a))};n.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=n._data(d,b);e||d.addEventListener(a,c,!0),n._data(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=n._data(d,b)-1;e?n._data(d,b,e):(d.removeEventListener(a,c,!0),n._removeData(d,b))}}}),n.fn.extend({on:function(a,b,c,d){return sa(this,a,b,c,d)},one:function(a,b,c,d){return sa(this,a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,n(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return b!==!1&&"function"!=typeof b||(c=b,b=void 0),c===!1&&(c=qa),this.each(function(){n.event.remove(this,a,c,b)})},trigger:function(a,b){return this.each(function(){n.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];return c?n.event.trigger(a,b,c,!0):void 0}});var ta=/ jQuery\d+="(?:null|\d+)"/g,ua=new RegExp("<(?:"+ba+")[\\s/>]","i"),va=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:-]+)[^>]*)\/>/gi,wa=/<script|<style|<link/i,xa=/checked\s*(?:[^=]|=\s*.checked.)/i,ya=/^true\/(.*)/,za=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g,Aa=ca(d),Ba=Aa.appendChild(d.createElement("div"));function Ca(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function Da(a){return a.type=(null!==n.find.attr(a,"type"))+"/"+a.type,a}function Ea(a){var b=ya.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Fa(a,b){if(1===b.nodeType&&n.hasData(a)){var c,d,e,f=n._data(a),g=n._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;e>d;d++)n.event.add(b,c,h[c][d])}g.data&&(g.data=n.extend({},g.data))}}function Ga(a,b){var c,d,e;if(1===b.nodeType){if(c=b.nodeName.toLowerCase(),!l.noCloneEvent&&b[n.expando]){e=n._data(b);for(d in e.events)n.removeEvent(b,d,e.handle);b.removeAttribute(n.expando)}"script"===c&&b.text!==a.text?(Da(b).text=a.text,Ea(b)):"object"===c?(b.parentNode&&(b.outerHTML=a.outerHTML),l.html5Clone&&a.innerHTML&&!n.trim(b.innerHTML)&&(b.innerHTML=a.innerHTML)):"input"===c&&Z.test(a.type)?(b.defaultChecked=b.checked=a.checked,b.value!==a.value&&(b.value=a.value)):"option"===c?b.defaultSelected=b.selected=a.defaultSelected:"input"!==c&&"textarea"!==c||(b.defaultValue=a.defaultValue)}}function Ha(a,b,c,d){b=f.apply([],b);var e,g,h,i,j,k,m=0,o=a.length,p=o-1,q=b[0],r=n.isFunction(q);if(r||o>1&&"string"==typeof q&&!l.checkClone&&xa.test(q))return a.each(function(e){var f=a.eq(e);r&&(b[0]=q.call(this,e,f.html())),Ha(f,b,c,d)});if(o&&(k=ja(b,a[0].ownerDocument,!1,a,d),e=k.firstChild,1===k.childNodes.length&&(k=e),e||d)){for(i=n.map(ea(k,"script"),Da),h=i.length;o>m;m++)g=k,m!==p&&(g=n.clone(g,!0,!0),h&&n.merge(i,ea(g,"script"))),c.call(a[m],g,m);if(h)for(j=i[i.length-1].ownerDocument,n.map(i,Ea),m=0;h>m;m++)g=i[m],_.test(g.type||"")&&!n._data(g,"globalEval")&&n.contains(j,g)&&(g.src?n._evalUrl&&n._evalUrl(g.src):n.globalEval((g.text||g.textContent||g.innerHTML||"").replace(za,"")));k=e=null}return a}function Ia(a,b,c){for(var d,e=b?n.filter(b,a):a,f=0;null!=(d=e[f]);f++)c||1!==d.nodeType||n.cleanData(ea(d)),d.parentNode&&(c&&n.contains(d.ownerDocument,d)&&fa(ea(d,"script")),d.parentNode.removeChild(d));return a}n.extend({htmlPrefilter:function(a){return a.replace(va,"<$1></$2>")},clone:function(a,b,c){var d,e,f,g,h,i=n.contains(a.ownerDocument,a);if(l.html5Clone||n.isXMLDoc(a)||!ua.test("<"+a.nodeName+">")?f=a.cloneNode(!0):(Ba.innerHTML=a.outerHTML,Ba.removeChild(f=Ba.firstChild)),!(l.noCloneEvent&&l.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(d=ea(f),h=ea(a),g=0;null!=(e=h[g]);++g)d[g]&&Ga(e,d[g]);if(b)if(c)for(h=h||ea(a),d=d||ea(f),g=0;null!=(e=h[g]);g++)Fa(e,d[g]);else Fa(a,f);return d=ea(f,"script"),d.length>0&&fa(d,!i&&ea(a,"script")),d=h=e=null,f},cleanData:function(a,b){for(var d,e,f,g,h=0,i=n.expando,j=n.cache,k=l.attributes,m=n.event.special;null!=(d=a[h]);h++)if((b||M(d))&&(f=d[i],g=f&&j[f])){if(g.events)for(e in g.events)m[e]?n.event.remove(d,e):n.removeEvent(d,e,g.handle);j[f]&&(delete j[f],k||"undefined"==typeof d.removeAttribute?d[i]=void 0:d.removeAttribute(i),c.push(f))}}}),n.fn.extend({domManip:Ha,detach:function(a){return Ia(this,a,!0)},remove:function(a){return Ia(this,a)},text:function(a){return Y(this,function(a){return void 0===a?n.text(this):this.empty().append((this[0]&&this[0].ownerDocument||d).createTextNode(a))},null,a,arguments.length)},append:function(){return Ha(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ca(this,a);b.appendChild(a)}})},prepend:function(){return Ha(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ca(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return Ha(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return Ha(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++){1===a.nodeType&&n.cleanData(ea(a,!1));while(a.firstChild)a.removeChild(a.firstChild);a.options&&n.nodeName(a,"select")&&(a.options.length=0)}return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return Y(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a)return 1===b.nodeType?b.innerHTML.replace(ta,""):void 0;if("string"==typeof a&&!wa.test(a)&&(l.htmlSerialize||!ua.test(a))&&(l.leadingWhitespace||!aa.test(a))&&!da[($.exec(a)||["",""])[1].toLowerCase()]){a=n.htmlPrefilter(a);try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(ea(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=[];return Ha(this,arguments,function(b){var c=this.parentNode;n.inArray(this,a)<0&&(n.cleanData(ea(this)),c&&c.replaceChild(b,this))},a)}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=0,e=[],f=n(a),h=f.length-1;h>=d;d++)c=d===h?this:this.clone(!0),n(f[d])[b](c),g.apply(e,c.get());return this.pushStack(e)}});var Ja,Ka={HTML:"block",BODY:"block"};function La(a,b){var c=n(b.createElement(a)).appendTo(b.body),d=n.css(c[0],"display");return c.detach(),d}function Ma(a){var b=d,c=Ka[a];return c||(c=La(a,b),"none"!==c&&c||(Ja=(Ja||n("<iframe frameborder='0' width='0' height='0'/>")).appendTo(b.documentElement),b=(Ja[0].contentWindow||Ja[0].contentDocument).document,b.write(),b.close(),c=La(a,b),Ja.detach()),Ka[a]=c),c}var Na=/^margin/,Oa=new RegExp("^("+T+")(?!px)[a-z%]+$","i"),Pa=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e},Qa=d.documentElement;!function(){var b,c,e,f,g,h,i=d.createElement("div"),j=d.createElement("div");if(j.style){j.style.cssText="float:left;opacity:.5",l.opacity="0.5"===j.style.opacity,l.cssFloat=!!j.style.cssFloat,j.style.backgroundClip="content-box",j.cloneNode(!0).style.backgroundClip="",l.clearCloneStyle="content-box"===j.style.backgroundClip,i=d.createElement("div"),i.style.cssText="border:0;width:8px;height:0;top:0;left:-9999px;padding:0;margin-top:1px;position:absolute",j.innerHTML="",i.appendChild(j),l.boxSizing=""===j.style.boxSizing||""===j.style.MozBoxSizing||""===j.style.WebkitBoxSizing,n.extend(l,{reliableHiddenOffsets:function(){return null==b&&k(),f},boxSizingReliable:function(){return null==b&&k(),e},pixelMarginRight:function(){return null==b&&k(),c},pixelPosition:function(){return null==b&&k(),b},reliableMarginRight:function(){return null==b&&k(),g},reliableMarginLeft:function(){return null==b&&k(),h}});function k(){var k,l,m=d.documentElement;m.appendChild(i),j.style.cssText="-webkit-box-sizing:border-box;box-sizing:border-box;position:relative;display:block;margin:auto;border:1px;padding:1px;top:1%;width:50%",b=e=h=!1,c=g=!0,a.getComputedStyle&&(l=a.getComputedStyle(j),b="1%"!==(l||{}).top,h="2px"===(l||{}).marginLeft,e="4px"===(l||{width:"4px"}).width,j.style.marginRight="50%",c="4px"===(l||{marginRight:"4px"}).marginRight,k=j.appendChild(d.createElement("div")),k.style.cssText=j.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:0",k.style.marginRight=k.style.width="0",j.style.width="1px",g=!parseFloat((a.getComputedStyle(k)||{}).marginRight),j.removeChild(k)),j.style.display="none",f=0===j.getClientRects().length,f&&(j.style.display="",j.innerHTML="<table><tr><td></td><td>t</td></tr></table>",j.childNodes[0].style.borderCollapse="separate",k=j.getElementsByTagName("td"),k[0].style.cssText="margin:0;border:0;padding:0;display:none",f=0===k[0].offsetHeight,f&&(k[0].style.display="",k[1].style.display="none",f=0===k[0].offsetHeight)),m.removeChild(i)}}}();var Ra,Sa,Ta=/^(top|right|bottom|left)$/;a.getComputedStyle?(Ra=function(b){var c=b.ownerDocument.defaultView;return c&&c.opener||(c=a),c.getComputedStyle(b)},Sa=function(a,b,c){var d,e,f,g,h=a.style;return c=c||Ra(a),g=c?c.getPropertyValue(b)||c[b]:void 0,""!==g&&void 0!==g||n.contains(a.ownerDocument,a)||(g=n.style(a,b)),c&&!l.pixelMarginRight()&&Oa.test(g)&&Na.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f),void 0===g?g:g+""}):Qa.currentStyle&&(Ra=function(a){return a.currentStyle},Sa=function(a,b,c){var d,e,f,g,h=a.style;return c=c||Ra(a),g=c?c[b]:void 0,null==g&&h&&h[b]&&(g=h[b]),Oa.test(g)&&!Ta.test(b)&&(d=h.left,e=a.runtimeStyle,f=e&&e.left,f&&(e.left=a.currentStyle.left),h.left="fontSize"===b?"1em":g,g=h.pixelLeft+"px",h.left=d,f&&(e.left=f)),void 0===g?g:g+""||"auto"});function Ua(a,b){return{get:function(){return a()?void delete this.get:(this.get=b).apply(this,arguments)}}}var Va=/alpha\([^)]*\)/i,Wa=/opacity\s*=\s*([^)]*)/i,Xa=/^(none|table(?!-c[ea]).+)/,Ya=new RegExp("^("+T+")(.*)$","i"),Za={position:"absolute",visibility:"hidden",display:"block"},$a={letterSpacing:"0",fontWeight:"400"},_a=["Webkit","O","Moz","ms"],ab=d.createElement("div").style;function bb(a){if(a in ab)return a;var b=a.charAt(0).toUpperCase()+a.slice(1),c=_a.length;while(c--)if(a=_a[c]+b,a in ab)return a}function cb(a,b){for(var c,d,e,f=[],g=0,h=a.length;h>g;g++)d=a[g],d.style&&(f[g]=n._data(d,"olddisplay"),c=d.style.display,b?(f[g]||"none"!==c||(d.style.display=""),""===d.style.display&&W(d)&&(f[g]=n._data(d,"olddisplay",Ma(d.nodeName)))):(e=W(d),(c&&"none"!==c||!e)&&n._data(d,"olddisplay",e?c:n.css(d,"display"))));for(g=0;h>g;g++)d=a[g],d.style&&(b&&"none"!==d.style.display&&""!==d.style.display||(d.style.display=b?f[g]||"":"none"));return a}function db(a,b,c){var d=Ya.exec(b);return d?Math.max(0,d[1]-(c||0))+(d[2]||"px"):b}function eb(a,b,c,d,e){for(var f=c===(d?"border":"content")?4:"width"===b?1:0,g=0;4>f;f+=2)"margin"===c&&(g+=n.css(a,c+V[f],!0,e)),d?("content"===c&&(g-=n.css(a,"padding"+V[f],!0,e)),"margin"!==c&&(g-=n.css(a,"border"+V[f]+"Width",!0,e))):(g+=n.css(a,"padding"+V[f],!0,e),"padding"!==c&&(g+=n.css(a,"border"+V[f]+"Width",!0,e)));return g}function fb(a,b,c){var d=!0,e="width"===b?a.offsetWidth:a.offsetHeight,f=Ra(a),g=l.boxSizing&&"border-box"===n.css(a,"boxSizing",!1,f);if(0>=e||null==e){if(e=Sa(a,b,f),(0>e||null==e)&&(e=a.style[b]),Oa.test(e))return e;d=g&&(l.boxSizingReliable()||e===a.style[b]),e=parseFloat(e)||0}return e+eb(a,b,c||(g?"border":"content"),d,f)+"px"}n.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=Sa(a,"opacity");return""===c?"1":c}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":l.cssFloat?"cssFloat":"styleFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=n.camelCase(b),i=a.style;if(b=n.cssProps[h]||(n.cssProps[h]=bb(h)||h),g=n.cssHooks[b]||n.cssHooks[h],void 0===c)return g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:i[b];if(f=typeof c,"string"===f&&(e=U.exec(c))&&e[1]&&(c=X(a,b,e),f="number"),null!=c&&c===c&&("number"===f&&(c+=e&&e[3]||(n.cssNumber[h]?"":"px")),l.clearCloneStyle||""!==c||0!==b.indexOf("background")||(i[b]="inherit"),!(g&&"set"in g&&void 0===(c=g.set(a,c,d)))))try{i[b]=c}catch(j){}}},css:function(a,b,c,d){var e,f,g,h=n.camelCase(b);return b=n.cssProps[h]||(n.cssProps[h]=bb(h)||h),g=n.cssHooks[b]||n.cssHooks[h],g&&"get"in g&&(f=g.get(a,!0,c)),void 0===f&&(f=Sa(a,b,d)),"normal"===f&&b in $a&&(f=$a[b]),""===c||c?(e=parseFloat(f),c===!0||isFinite(e)?e||0:f):f}}),n.each(["height","width"],function(a,b){n.cssHooks[b]={get:function(a,c,d){return c?Xa.test(n.css(a,"display"))&&0===a.offsetWidth?Pa(a,Za,function(){return fb(a,b,d)}):fb(a,b,d):void 0},set:function(a,c,d){var e=d&&Ra(a);return db(a,c,d?eb(a,b,d,l.boxSizing&&"border-box"===n.css(a,"boxSizing",!1,e),e):0)}}}),l.opacity||(n.cssHooks.opacity={get:function(a,b){return Wa.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?.01*parseFloat(RegExp.$1)+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=n.isNumeric(b)?"alpha(opacity="+100*b+")":"",f=d&&d.filter||c.filter||"";c.zoom=1,(b>=1||""===b)&&""===n.trim(f.replace(Va,""))&&c.removeAttribute&&(c.removeAttribute("filter"),""===b||d&&!d.filter)||(c.filter=Va.test(f)?f.replace(Va,e):f+" "+e)}}),n.cssHooks.marginRight=Ua(l.reliableMarginRight,function(a,b){return b?Pa(a,{display:"inline-block"},Sa,[a,"marginRight"]):void 0}),n.cssHooks.marginLeft=Ua(l.reliableMarginLeft,function(a,b){return b?(parseFloat(Sa(a,"marginLeft"))||(n.contains(a.ownerDocument,a)?a.getBoundingClientRect().left-Pa(a,{
+marginLeft:0},function(){return a.getBoundingClientRect().left}):0))+"px":void 0}),n.each({margin:"",padding:"",border:"Width"},function(a,b){n.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];4>d;d++)e[a+V[d]+b]=f[d]||f[d-2]||f[0];return e}},Na.test(a)||(n.cssHooks[a+b].set=db)}),n.fn.extend({css:function(a,b){return Y(this,function(a,b,c){var d,e,f={},g=0;if(n.isArray(b)){for(d=Ra(a),e=b.length;e>g;g++)f[b[g]]=n.css(a,b[g],!1,d);return f}return void 0!==c?n.style(a,b,c):n.css(a,b)},a,b,arguments.length>1)},show:function(){return cb(this,!0)},hide:function(){return cb(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){W(this)?n(this).show():n(this).hide()})}});function gb(a,b,c,d,e){return new gb.prototype.init(a,b,c,d,e)}n.Tween=gb,gb.prototype={constructor:gb,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||n.easing._default,this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(n.cssNumber[c]?"":"px")},cur:function(){var a=gb.propHooks[this.prop];return a&&a.get?a.get(this):gb.propHooks._default.get(this)},run:function(a){var b,c=gb.propHooks[this.prop];return this.options.duration?this.pos=b=n.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):this.pos=b=a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):gb.propHooks._default.set(this),this}},gb.prototype.init.prototype=gb.prototype,gb.propHooks={_default:{get:function(a){var b;return 1!==a.elem.nodeType||null!=a.elem[a.prop]&&null==a.elem.style[a.prop]?a.elem[a.prop]:(b=n.css(a.elem,a.prop,""),b&&"auto"!==b?b:0)},set:function(a){n.fx.step[a.prop]?n.fx.step[a.prop](a):1!==a.elem.nodeType||null==a.elem.style[n.cssProps[a.prop]]&&!n.cssHooks[a.prop]?a.elem[a.prop]=a.now:n.style(a.elem,a.prop,a.now+a.unit)}}},gb.propHooks.scrollTop=gb.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},n.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2},_default:"swing"},n.fx=gb.prototype.init,n.fx.step={};var hb,ib,jb=/^(?:toggle|show|hide)$/,kb=/queueHooks$/;function lb(){return a.setTimeout(function(){hb=void 0}),hb=n.now()}function mb(a,b){var c,d={height:a},e=0;for(b=b?1:0;4>e;e+=2-b)c=V[e],d["margin"+c]=d["padding"+c]=a;return b&&(d.opacity=d.width=a),d}function nb(a,b,c){for(var d,e=(qb.tweeners[b]||[]).concat(qb.tweeners["*"]),f=0,g=e.length;g>f;f++)if(d=e[f].call(c,b,a))return d}function ob(a,b,c){var d,e,f,g,h,i,j,k,m=this,o={},p=a.style,q=a.nodeType&&W(a),r=n._data(a,"fxshow");c.queue||(h=n._queueHooks(a,"fx"),null==h.unqueued&&(h.unqueued=0,i=h.empty.fire,h.empty.fire=function(){h.unqueued||i()}),h.unqueued++,m.always(function(){m.always(function(){h.unqueued--,n.queue(a,"fx").length||h.empty.fire()})})),1===a.nodeType&&("height"in b||"width"in b)&&(c.overflow=[p.overflow,p.overflowX,p.overflowY],j=n.css(a,"display"),k="none"===j?n._data(a,"olddisplay")||Ma(a.nodeName):j,"inline"===k&&"none"===n.css(a,"float")&&(l.inlineBlockNeedsLayout&&"inline"!==Ma(a.nodeName)?p.zoom=1:p.display="inline-block")),c.overflow&&(p.overflow="hidden",l.shrinkWrapBlocks()||m.always(function(){p.overflow=c.overflow[0],p.overflowX=c.overflow[1],p.overflowY=c.overflow[2]}));for(d in b)if(e=b[d],jb.exec(e)){if(delete b[d],f=f||"toggle"===e,e===(q?"hide":"show")){if("show"!==e||!r||void 0===r[d])continue;q=!0}o[d]=r&&r[d]||n.style(a,d)}else j=void 0;if(n.isEmptyObject(o))"inline"===("none"===j?Ma(a.nodeName):j)&&(p.display=j);else{r?"hidden"in r&&(q=r.hidden):r=n._data(a,"fxshow",{}),f&&(r.hidden=!q),q?n(a).show():m.done(function(){n(a).hide()}),m.done(function(){var b;n._removeData(a,"fxshow");for(b in o)n.style(a,b,o[b])});for(d in o)g=nb(q?r[d]:0,d,m),d in r||(r[d]=g.start,q&&(g.end=g.start,g.start="width"===d||"height"===d?1:0))}}function pb(a,b){var c,d,e,f,g;for(c in a)if(d=n.camelCase(c),e=b[d],f=a[c],n.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=n.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function qb(a,b,c){var d,e,f=0,g=qb.prefilters.length,h=n.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=hb||lb(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;i>g;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),1>f&&i?c:(h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:n.extend({},b),opts:n.extend(!0,{specialEasing:{},easing:n.easing._default},c),originalProperties:b,originalOptions:c,startTime:hb||lb(),duration:c.duration,tweens:[],createTween:function(b,c){var d=n.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;d>c;c++)j.tweens[c].run(1);return b?(h.notifyWith(a,[j,1,0]),h.resolveWith(a,[j,b])):h.rejectWith(a,[j,b]),this}}),k=j.props;for(pb(k,j.opts.specialEasing);g>f;f++)if(d=qb.prefilters[f].call(j,a,k,j.opts))return n.isFunction(d.stop)&&(n._queueHooks(j.elem,j.opts.queue).stop=n.proxy(d.stop,d)),d;return n.map(k,nb,j),n.isFunction(j.opts.start)&&j.opts.start.call(a,j),n.fx.timer(n.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always)}n.Animation=n.extend(qb,{tweeners:{"*":[function(a,b){var c=this.createTween(a,b);return X(c.elem,a,U.exec(b),c),c}]},tweener:function(a,b){n.isFunction(a)?(b=a,a=["*"]):a=a.match(G);for(var c,d=0,e=a.length;e>d;d++)c=a[d],qb.tweeners[c]=qb.tweeners[c]||[],qb.tweeners[c].unshift(b)},prefilters:[ob],prefilter:function(a,b){b?qb.prefilters.unshift(a):qb.prefilters.push(a)}}),n.speed=function(a,b,c){var d=a&&"object"==typeof a?n.extend({},a):{complete:c||!c&&b||n.isFunction(a)&&a,duration:a,easing:c&&b||b&&!n.isFunction(b)&&b};return d.duration=n.fx.off?0:"number"==typeof d.duration?d.duration:d.duration in n.fx.speeds?n.fx.speeds[d.duration]:n.fx.speeds._default,null!=d.queue&&d.queue!==!0||(d.queue="fx"),d.old=d.complete,d.complete=function(){n.isFunction(d.old)&&d.old.call(this),d.queue&&n.dequeue(this,d.queue)},d},n.fn.extend({fadeTo:function(a,b,c,d){return this.filter(W).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=n.isEmptyObject(a),f=n.speed(b,c,d),g=function(){var b=qb(this,n.extend({},a),f);(e||n._data(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=n.timers,g=n._data(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&kb.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));!b&&c||n.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=n._data(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=n.timers,g=d?d.length:0;for(c.finish=!0,n.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;g>b;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),n.each(["toggle","show","hide"],function(a,b){var c=n.fn[b];n.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(mb(b,!0),a,d,e)}}),n.each({slideDown:mb("show"),slideUp:mb("hide"),slideToggle:mb("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){n.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),n.timers=[],n.fx.tick=function(){var a,b=n.timers,c=0;for(hb=n.now();c<b.length;c++)a=b[c],a()||b[c]!==a||b.splice(c--,1);b.length||n.fx.stop(),hb=void 0},n.fx.timer=function(a){n.timers.push(a),a()?n.fx.start():n.timers.pop()},n.fx.interval=13,n.fx.start=function(){ib||(ib=a.setInterval(n.fx.tick,n.fx.interval))},n.fx.stop=function(){a.clearInterval(ib),ib=null},n.fx.speeds={slow:600,fast:200,_default:400},n.fn.delay=function(b,c){return b=n.fx?n.fx.speeds[b]||b:b,c=c||"fx",this.queue(c,function(c,d){var e=a.setTimeout(c,b);d.stop=function(){a.clearTimeout(e)}})},function(){var a,b=d.createElement("input"),c=d.createElement("div"),e=d.createElement("select"),f=e.appendChild(d.createElement("option"));c=d.createElement("div"),c.setAttribute("className","t"),c.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",a=c.getElementsByTagName("a")[0],b.setAttribute("type","checkbox"),c.appendChild(b),a=c.getElementsByTagName("a")[0],a.style.cssText="top:1px",l.getSetAttribute="t"!==c.className,l.style=/top/.test(a.getAttribute("style")),l.hrefNormalized="/a"===a.getAttribute("href"),l.checkOn=!!b.value,l.optSelected=f.selected,l.enctype=!!d.createElement("form").enctype,e.disabled=!0,l.optDisabled=!f.disabled,b=d.createElement("input"),b.setAttribute("value",""),l.input=""===b.getAttribute("value"),b.value="t",b.setAttribute("type","radio"),l.radioValue="t"===b.value}();var rb=/\r/g,sb=/[\x20\t\r\n\f]+/g;n.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=n.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,n(this).val()):a,null==e?e="":"number"==typeof e?e+="":n.isArray(e)&&(e=n.map(e,function(a){return null==a?"":a+""})),b=n.valHooks[this.type]||n.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=n.valHooks[e.type]||n.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(rb,""):null==c?"":c)}}}),n.extend({valHooks:{option:{get:function(a){var b=n.find.attr(a,"value");return null!=b?b:n.trim(n.text(a)).replace(sb," ")}},select:{get:function(a){for(var b,c,d=a.options,e=a.selectedIndex,f="select-one"===a.type||0>e,g=f?null:[],h=f?e+1:d.length,i=0>e?h:f?e:0;h>i;i++)if(c=d[i],(c.selected||i===e)&&(l.optDisabled?!c.disabled:null===c.getAttribute("disabled"))&&(!c.parentNode.disabled||!n.nodeName(c.parentNode,"optgroup"))){if(b=n(c).val(),f)return b;g.push(b)}return g},set:function(a,b){var c,d,e=a.options,f=n.makeArray(b),g=e.length;while(g--)if(d=e[g],n.inArray(n.valHooks.option.get(d),f)>-1)try{d.selected=c=!0}catch(h){d.scrollHeight}else d.selected=!1;return c||(a.selectedIndex=-1),e}}}}),n.each(["radio","checkbox"],function(){n.valHooks[this]={set:function(a,b){return n.isArray(b)?a.checked=n.inArray(n(a).val(),b)>-1:void 0}},l.checkOn||(n.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var tb,ub,vb=n.expr.attrHandle,wb=/^(?:checked|selected)$/i,xb=l.getSetAttribute,yb=l.input;n.fn.extend({attr:function(a,b){return Y(this,n.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){n.removeAttr(this,a)})}}),n.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return"undefined"==typeof a.getAttribute?n.prop(a,b,c):(1===f&&n.isXMLDoc(a)||(b=b.toLowerCase(),e=n.attrHooks[b]||(n.expr.match.bool.test(b)?ub:tb)),void 0!==c?null===c?void n.removeAttr(a,b):e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:(a.setAttribute(b,c+""),c):e&&"get"in e&&null!==(d=e.get(a,b))?d:(d=n.find.attr(a,b),null==d?void 0:d))},attrHooks:{type:{set:function(a,b){if(!l.radioValue&&"radio"===b&&n.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}},removeAttr:function(a,b){var c,d,e=0,f=b&&b.match(G);if(f&&1===a.nodeType)while(c=f[e++])d=n.propFix[c]||c,n.expr.match.bool.test(c)?yb&&xb||!wb.test(c)?a[d]=!1:a[n.camelCase("default-"+c)]=a[d]=!1:n.attr(a,c,""),a.removeAttribute(xb?c:d)}}),ub={set:function(a,b,c){return b===!1?n.removeAttr(a,c):yb&&xb||!wb.test(c)?a.setAttribute(!xb&&n.propFix[c]||c,c):a[n.camelCase("default-"+c)]=a[c]=!0,c}},n.each(n.expr.match.bool.source.match(/\w+/g),function(a,b){var c=vb[b]||n.find.attr;yb&&xb||!wb.test(b)?vb[b]=function(a,b,d){var e,f;return d||(f=vb[b],vb[b]=e,e=null!=c(a,b,d)?b.toLowerCase():null,vb[b]=f),e}:vb[b]=function(a,b,c){return c?void 0:a[n.camelCase("default-"+b)]?b.toLowerCase():null}}),yb&&xb||(n.attrHooks.value={set:function(a,b,c){return n.nodeName(a,"input")?void(a.defaultValue=b):tb&&tb.set(a,b,c)}}),xb||(tb={set:function(a,b,c){var d=a.getAttributeNode(c);return d||a.setAttributeNode(d=a.ownerDocument.createAttribute(c)),d.value=b+="","value"===c||b===a.getAttribute(c)?b:void 0}},vb.id=vb.name=vb.coords=function(a,b,c){var d;return c?void 0:(d=a.getAttributeNode(b))&&""!==d.value?d.value:null},n.valHooks.button={get:function(a,b){var c=a.getAttributeNode(b);return c&&c.specified?c.value:void 0},set:tb.set},n.attrHooks.contenteditable={set:function(a,b,c){tb.set(a,""===b?!1:b,c)}},n.each(["width","height"],function(a,b){n.attrHooks[b]={set:function(a,c){return""===c?(a.setAttribute(b,"auto"),c):void 0}}})),l.style||(n.attrHooks.style={get:function(a){return a.style.cssText||void 0},set:function(a,b){return a.style.cssText=b+""}});var zb=/^(?:input|select|textarea|button|object)$/i,Ab=/^(?:a|area)$/i;n.fn.extend({prop:function(a,b){return Y(this,n.prop,a,b,arguments.length>1)},removeProp:function(a){return a=n.propFix[a]||a,this.each(function(){try{this[a]=void 0,delete this[a]}catch(b){}})}}),n.extend({prop:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return 1===f&&n.isXMLDoc(a)||(b=n.propFix[b]||b,e=n.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=n.find.attr(a,"tabindex");return b?parseInt(b,10):zb.test(a.nodeName)||Ab.test(a.nodeName)&&a.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),l.hrefNormalized||n.each(["href","src"],function(a,b){n.propHooks[b]={get:function(a){return a.getAttribute(b,4)}}}),l.optSelected||(n.propHooks.selected={get:function(a){var b=a.parentNode;return b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex),null},set:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex)}}),n.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){n.propFix[this.toLowerCase()]=this}),l.enctype||(n.propFix.enctype="encoding");var Bb=/[\t\r\n\f]/g;function Cb(a){return n.attr(a,"class")||""}n.fn.extend({addClass:function(a){var b,c,d,e,f,g,h,i=0;if(n.isFunction(a))return this.each(function(b){n(this).addClass(a.call(this,b,Cb(this)))});if("string"==typeof a&&a){b=a.match(G)||[];while(c=this[i++])if(e=Cb(c),d=1===c.nodeType&&(" "+e+" ").replace(Bb," ")){g=0;while(f=b[g++])d.indexOf(" "+f+" ")<0&&(d+=f+" ");h=n.trim(d),e!==h&&n.attr(c,"class",h)}}return this},removeClass:function(a){var b,c,d,e,f,g,h,i=0;if(n.isFunction(a))return this.each(function(b){n(this).removeClass(a.call(this,b,Cb(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof a&&a){b=a.match(G)||[];while(c=this[i++])if(e=Cb(c),d=1===c.nodeType&&(" "+e+" ").replace(Bb," ")){g=0;while(f=b[g++])while(d.indexOf(" "+f+" ")>-1)d=d.replace(" "+f+" "," ");h=n.trim(d),e!==h&&n.attr(c,"class",h)}}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):n.isFunction(a)?this.each(function(c){n(this).toggleClass(a.call(this,c,Cb(this),b),b)}):this.each(function(){var b,d,e,f;if("string"===c){d=0,e=n(this),f=a.match(G)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else void 0!==a&&"boolean"!==c||(b=Cb(this),b&&n._data(this,"__className__",b),n.attr(this,"class",b||a===!1?"":n._data(this,"__className__")||""))})},hasClass:function(a){var b,c,d=0;b=" "+a+" ";while(c=this[d++])if(1===c.nodeType&&(" "+Cb(c)+" ").replace(Bb," ").indexOf(b)>-1)return!0;return!1}}),n.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){n.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),n.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}});var Db=a.location,Eb=n.now(),Fb=/\?/,Gb=/(,)|(\[|{)|(}|])|"(?:[^"\\\r\n]|\\["\\\/bfnrt]|\\u[\da-fA-F]{4})*"\s*:?|true|false|null|-?(?!0\d)\d+(?:\.\d+|)(?:[eE][+-]?\d+|)/g;n.parseJSON=function(b){if(a.JSON&&a.JSON.parse)return a.JSON.parse(b+"");var c,d=null,e=n.trim(b+"");return e&&!n.trim(e.replace(Gb,function(a,b,e,f){return c&&b&&(d=0),0===d?a:(c=e||b,d+=!f-!e,"")}))?Function("return "+e)():n.error("Invalid JSON: "+b)},n.parseXML=function(b){var c,d;if(!b||"string"!=typeof b)return null;try{a.DOMParser?(d=new a.DOMParser,c=d.parseFromString(b,"text/xml")):(c=new a.ActiveXObject("Microsoft.XMLDOM"),c.async="false",c.loadXML(b))}catch(e){c=void 0}return c&&c.documentElement&&!c.getElementsByTagName("parsererror").length||n.error("Invalid XML: "+b),c};var Hb=/#.*$/,Ib=/([?&])_=[^&]*/,Jb=/^(.*?):[ \t]*([^\r\n]*)\r?$/gm,Kb=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,Lb=/^(?:GET|HEAD)$/,Mb=/^\/\//,Nb=/^([\w.+-]+:)(?:\/\/(?:[^\/?#]*@|)([^\/?#:]*)(?::(\d+)|)|)/,Ob={},Pb={},Qb="*/".concat("*"),Rb=Db.href,Sb=Nb.exec(Rb.toLowerCase())||[];function Tb(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(G)||[];if(n.isFunction(c))while(d=f[e++])"+"===d.charAt(0)?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function Ub(a,b,c,d){var e={},f=a===Pb;function g(h){var i;return e[h]=!0,n.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function Vb(a,b){var c,d,e=n.ajaxSettings.flatOptions||{};for(d in b)void 0!==b[d]&&((e[d]?a:c||(c={}))[d]=b[d]);return c&&n.extend(!0,a,c),a}function Wb(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===e&&(e=a.mimeType||b.getResponseHeader("Content-Type"));if(e)for(g in h)if(h[g]&&h[g].test(e)){i.unshift(g);break}if(i[0]in c)f=i[0];else{for(g in c){if(!i[0]||a.converters[g+" "+i[0]]){f=g;break}d||(d=g)}f=f||d}return f?(f!==i[0]&&i.unshift(f),c[f]):void 0}function Xb(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}n.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:Rb,type:"GET",isLocal:Kb.test(Sb[1]),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Qb,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":n.parseJSON,"text xml":n.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?Vb(Vb(a,n.ajaxSettings),b):Vb(n.ajaxSettings,a)},ajaxPrefilter:Tb(Ob),ajaxTransport:Tb(Pb),ajax:function(b,c){"object"==typeof b&&(c=b,b=void 0),c=c||{};var d,e,f,g,h,i,j,k,l=n.ajaxSetup({},c),m=l.context||l,o=l.context&&(m.nodeType||m.jquery)?n(m):n.event,p=n.Deferred(),q=n.Callbacks("once memory"),r=l.statusCode||{},s={},t={},u=0,v="canceled",w={readyState:0,getResponseHeader:function(a){var b;if(2===u){if(!k){k={};while(b=Jb.exec(g))k[b[1].toLowerCase()]=b[2]}b=k[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return 2===u?g:null},setRequestHeader:function(a,b){var c=a.toLowerCase();return u||(a=t[c]=t[c]||a,s[a]=b),this},overrideMimeType:function(a){return u||(l.mimeType=a),this},statusCode:function(a){var b;if(a)if(2>u)for(b in a)r[b]=[r[b],a[b]];else w.always(a[w.status]);return this},abort:function(a){var b=a||v;return j&&j.abort(b),y(0,b),this}};if(p.promise(w).complete=q.add,w.success=w.done,w.error=w.fail,l.url=((b||l.url||Rb)+"").replace(Hb,"").replace(Mb,Sb[1]+"//"),l.type=c.method||c.type||l.method||l.type,l.dataTypes=n.trim(l.dataType||"*").toLowerCase().match(G)||[""],null==l.crossDomain&&(d=Nb.exec(l.url.toLowerCase()),l.crossDomain=!(!d||d[1]===Sb[1]&&d[2]===Sb[2]&&(d[3]||("http:"===d[1]?"80":"443"))===(Sb[3]||("http:"===Sb[1]?"80":"443")))),l.data&&l.processData&&"string"!=typeof l.data&&(l.data=n.param(l.data,l.traditional)),Ub(Ob,l,c,w),2===u)return w;i=n.event&&l.global,i&&0===n.active++&&n.event.trigger("ajaxStart"),l.type=l.type.toUpperCase(),l.hasContent=!Lb.test(l.type),f=l.url,l.hasContent||(l.data&&(f=l.url+=(Fb.test(f)?"&":"?")+l.data,delete l.data),l.cache===!1&&(l.url=Ib.test(f)?f.replace(Ib,"$1_="+Eb++):f+(Fb.test(f)?"&":"?")+"_="+Eb++)),l.ifModified&&(n.lastModified[f]&&w.setRequestHeader("If-Modified-Since",n.lastModified[f]),n.etag[f]&&w.setRequestHeader("If-None-Match",n.etag[f])),(l.data&&l.hasContent&&l.contentType!==!1||c.contentType)&&w.setRequestHeader("Content-Type",l.contentType),w.setRequestHeader("Accept",l.dataTypes[0]&&l.accepts[l.dataTypes[0]]?l.accepts[l.dataTypes[0]]+("*"!==l.dataTypes[0]?", "+Qb+"; q=0.01":""):l.accepts["*"]);for(e in l.headers)w.setRequestHeader(e,l.headers[e]);if(l.beforeSend&&(l.beforeSend.call(m,w,l)===!1||2===u))return w.abort();v="abort";for(e in{success:1,error:1,complete:1})w[e](l[e]);if(j=Ub(Pb,l,c,w)){if(w.readyState=1,i&&o.trigger("ajaxSend",[w,l]),2===u)return w;l.async&&l.timeout>0&&(h=a.setTimeout(function(){w.abort("timeout")},l.timeout));try{u=1,j.send(s,y)}catch(x){if(!(2>u))throw x;y(-1,x)}}else y(-1,"No Transport");function y(b,c,d,e){var k,s,t,v,x,y=c;2!==u&&(u=2,h&&a.clearTimeout(h),j=void 0,g=e||"",w.readyState=b>0?4:0,k=b>=200&&300>b||304===b,d&&(v=Wb(l,w,d)),v=Xb(l,v,w,k),k?(l.ifModified&&(x=w.getResponseHeader("Last-Modified"),x&&(n.lastModified[f]=x),x=w.getResponseHeader("etag"),x&&(n.etag[f]=x)),204===b||"HEAD"===l.type?y="nocontent":304===b?y="notmodified":(y=v.state,s=v.data,t=v.error,k=!t)):(t=y,!b&&y||(y="error",0>b&&(b=0))),w.status=b,w.statusText=(c||y)+"",k?p.resolveWith(m,[s,y,w]):p.rejectWith(m,[w,y,t]),w.statusCode(r),r=void 0,i&&o.trigger(k?"ajaxSuccess":"ajaxError",[w,l,k?s:t]),q.fireWith(m,[w,y]),i&&(o.trigger("ajaxComplete",[w,l]),--n.active||n.event.trigger("ajaxStop")))}return w},getJSON:function(a,b,c){return n.get(a,b,c,"json")},getScript:function(a,b){return n.get(a,void 0,b,"script")}}),n.each(["get","post"],function(a,b){n[b]=function(a,c,d,e){return n.isFunction(c)&&(e=e||d,d=c,c=void 0),n.ajax(n.extend({url:a,type:b,dataType:e,data:c,success:d},n.isPlainObject(a)&&a))}}),n._evalUrl=function(a){return n.ajax({url:a,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,"throws":!0})},n.fn.extend({wrapAll:function(a){if(n.isFunction(a))return this.each(function(b){n(this).wrapAll(a.call(this,b))});if(this[0]){var b=n(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&1===a.firstChild.nodeType)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){return n.isFunction(a)?this.each(function(b){n(this).wrapInner(a.call(this,b))}):this.each(function(){var b=n(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=n.isFunction(a);return this.each(function(c){n(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){n.nodeName(this,"body")||n(this).replaceWith(this.childNodes)}).end()}});function Yb(a){return a.style&&a.style.display||n.css(a,"display")}function Zb(a){if(!n.contains(a.ownerDocument||d,a))return!0;while(a&&1===a.nodeType){if("none"===Yb(a)||"hidden"===a.type)return!0;a=a.parentNode}return!1}n.expr.filters.hidden=function(a){return l.reliableHiddenOffsets()?a.offsetWidth<=0&&a.offsetHeight<=0&&!a.getClientRects().length:Zb(a)},n.expr.filters.visible=function(a){return!n.expr.filters.hidden(a)};var $b=/%20/g,_b=/\[\]$/,ac=/\r?\n/g,bc=/^(?:submit|button|image|reset|file)$/i,cc=/^(?:input|select|textarea|keygen)/i;function dc(a,b,c,d){var e;if(n.isArray(b))n.each(b,function(b,e){c||_b.test(a)?d(a,e):dc(a+"["+("object"==typeof e&&null!=e?b:"")+"]",e,c,d)});else if(c||"object"!==n.type(b))d(a,b);else for(e in b)dc(a+"["+e+"]",b[e],c,d)}n.param=function(a,b){var c,d=[],e=function(a,b){b=n.isFunction(b)?b():null==b?"":b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};if(void 0===b&&(b=n.ajaxSettings&&n.ajaxSettings.traditional),n.isArray(a)||a.jquery&&!n.isPlainObject(a))n.each(a,function(){e(this.name,this.value)});else for(c in a)dc(c,a[c],b,e);return d.join("&").replace($b,"+")},n.fn.extend({serialize:function(){return n.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=n.prop(this,"elements");return a?n.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!n(this).is(":disabled")&&cc.test(this.nodeName)&&!bc.test(a)&&(this.checked||!Z.test(a))}).map(function(a,b){var c=n(this).val();return null==c?null:n.isArray(c)?n.map(c,function(a){return{name:b.name,value:a.replace(ac,"\r\n")}}):{name:b.name,value:c.replace(ac,"\r\n")}}).get()}}),n.ajaxSettings.xhr=void 0!==a.ActiveXObject?function(){return this.isLocal?ic():d.documentMode>8?hc():/^(get|post|head|put|delete|options)$/i.test(this.type)&&hc()||ic()}:hc;var ec=0,fc={},gc=n.ajaxSettings.xhr();a.attachEvent&&a.attachEvent("onunload",function(){for(var a in fc)fc[a](void 0,!0)}),l.cors=!!gc&&"withCredentials"in gc,gc=l.ajax=!!gc,gc&&n.ajaxTransport(function(b){if(!b.crossDomain||l.cors){var c;return{send:function(d,e){var f,g=b.xhr(),h=++ec;if(g.open(b.type,b.url,b.async,b.username,b.password),b.xhrFields)for(f in b.xhrFields)g[f]=b.xhrFields[f];b.mimeType&&g.overrideMimeType&&g.overrideMimeType(b.mimeType),b.crossDomain||d["X-Requested-With"]||(d["X-Requested-With"]="XMLHttpRequest");for(f in d)void 0!==d[f]&&g.setRequestHeader(f,d[f]+"");g.send(b.hasContent&&b.data||null),c=function(a,d){var f,i,j;if(c&&(d||4===g.readyState))if(delete fc[h],c=void 0,g.onreadystatechange=n.noop,d)4!==g.readyState&&g.abort();else{j={},f=g.status,"string"==typeof g.responseText&&(j.text=g.responseText);try{i=g.statusText}catch(k){i=""}f||!b.isLocal||b.crossDomain?1223===f&&(f=204):f=j.text?200:404}j&&e(f,i,j,g.getAllResponseHeaders())},b.async?4===g.readyState?a.setTimeout(c):g.onreadystatechange=fc[h]=c:c()},abort:function(){c&&c(void 0,!0)}}}});function hc(){try{return new a.XMLHttpRequest}catch(b){}}function ic(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}n.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(a){return n.globalEval(a),a}}}),n.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),n.ajaxTransport("script",function(a){if(a.crossDomain){var b,c=d.head||n("head")[0]||d.documentElement;return{send:function(e,f){b=d.createElement("script"),b.async=!0,a.scriptCharset&&(b.charset=a.scriptCharset),b.src=a.url,b.onload=b.onreadystatechange=function(a,c){(c||!b.readyState||/loaded|complete/.test(b.readyState))&&(b.onload=b.onreadystatechange=null,b.parentNode&&b.parentNode.removeChild(b),b=null,c||f(200,"success"))},c.insertBefore(b,c.firstChild)},abort:function(){b&&b.onload(void 0,!0)}}}});var jc=[],kc=/(=)\?(?=&|$)|\?\?/;n.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=jc.pop()||n.expando+"_"+Eb++;return this[a]=!0,a}}),n.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(kc.test(b.url)?"url":"string"==typeof b.data&&0===(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&kc.test(b.data)&&"data");return h||"jsonp"===b.dataTypes[0]?(e=b.jsonpCallback=n.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(kc,"$1"+e):b.jsonp!==!1&&(b.url+=(Fb.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||n.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){void 0===f?n(a).removeProp(e):a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,jc.push(e)),g&&n.isFunction(f)&&f(g[0]),g=f=void 0}),"script"):void 0}),n.parseHTML=function(a,b,c){if(!a||"string"!=typeof a)return null;"boolean"==typeof b&&(c=b,b=!1),b=b||d;var e=x.exec(a),f=!c&&[];return e?[b.createElement(e[1])]:(e=ja([a],b,f),f&&f.length&&n(f).remove(),n.merge([],e.childNodes))};var lc=n.fn.load;n.fn.load=function(a,b,c){if("string"!=typeof a&&lc)return lc.apply(this,arguments);var d,e,f,g=this,h=a.indexOf(" ");return h>-1&&(d=n.trim(a.slice(h,a.length)),a=a.slice(0,h)),n.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(e="POST"),g.length>0&&n.ajax({url:a,type:e||"GET",dataType:"html",data:b}).done(function(a){f=arguments,g.html(d?n("<div>").append(n.parseHTML(a)).find(d):a)}).always(c&&function(a,b){g.each(function(){c.apply(this,f||[a.responseText,b,a])})}),this},n.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){n.fn[b]=function(a){return this.on(b,a)}}),n.expr.filters.animated=function(a){return n.grep(n.timers,function(b){return a===b.elem}).length};function mc(a){return n.isWindow(a)?a:9===a.nodeType?a.defaultView||a.parentWindow:!1}n.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=n.css(a,"position"),l=n(a),m={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=n.css(a,"top"),i=n.css(a,"left"),j=("absolute"===k||"fixed"===k)&&n.inArray("auto",[f,i])>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),n.isFunction(b)&&(b=b.call(a,c,n.extend({},h))),null!=b.top&&(m.top=b.top-h.top+g),null!=b.left&&(m.left=b.left-h.left+e),"using"in b?b.using.call(a,m):l.css(m)}},n.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){n.offset.setOffset(this,a,b)});var b,c,d={top:0,left:0},e=this[0],f=e&&e.ownerDocument;if(f)return b=f.documentElement,n.contains(b,e)?("undefined"!=typeof e.getBoundingClientRect&&(d=e.getBoundingClientRect()),c=mc(f),{top:d.top+(c.pageYOffset||b.scrollTop)-(b.clientTop||0),left:d.left+(c.pageXOffset||b.scrollLeft)-(b.clientLeft||0)}):d},position:function(){if(this[0]){var a,b,c={top:0,left:0},d=this[0];return"fixed"===n.css(d,"position")?b=d.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),n.nodeName(a[0],"html")||(c=a.offset()),c.top+=n.css(a[0],"borderTopWidth",!0),c.left+=n.css(a[0],"borderLeftWidth",!0)),{top:b.top-c.top-n.css(d,"marginTop",!0),left:b.left-c.left-n.css(d,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent;while(a&&!n.nodeName(a,"html")&&"static"===n.css(a,"position"))a=a.offsetParent;return a||Qa})}}),n.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c=/Y/.test(b);n.fn[a]=function(d){return Y(this,function(a,d,e){var f=mc(a);return void 0===e?f?b in f?f[b]:f.document.documentElement[d]:a[d]:void(f?f.scrollTo(c?n(f).scrollLeft():e,c?e:n(f).scrollTop()):a[d]=e)},a,d,arguments.length,null)}}),n.each(["top","left"],function(a,b){n.cssHooks[b]=Ua(l.pixelPosition,function(a,c){return c?(c=Sa(a,b),Oa.test(c)?n(a).position()[b]+"px":c):void 0})}),n.each({Height:"height",Width:"width"},function(a,b){n.each({
+padding:"inner"+a,content:b,"":"outer"+a},function(c,d){n.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return Y(this,function(b,c,d){var e;return n.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?n.css(b,c,g):n.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),n.fn.extend({bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)}}),n.fn.size=function(){return this.length},n.fn.andSelf=n.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return n});var nc=a.jQuery,oc=a.$;return n.noConflict=function(b){return a.$===n&&(a.$=oc),b&&a.jQuery===n&&(a.jQuery=nc),n},b||(a.jQuery=a.$=n),n});
diff --git a/docs/js/vendor/jquery-1.8.0.min.js b/docs/js/vendor/jquery-1.8.0.min.js
deleted file mode 100755
index 066d72c7e3a2..000000000000
--- a/docs/js/vendor/jquery-1.8.0.min.js
+++ /dev/null
@@ -1,2 +0,0 @@
-/*! jQuery v@1.8.0 jquery.com | jquery.org/license */
-(function(a,b){function G(a){var b=F[a]={};return p.each(a.split(s),function(a,c){b[c]=!0}),b}function J(a,c,d){if(d===b&&a.nodeType===1){var e="data-"+c.replace(I,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:+d+""===d?+d:H.test(d)?p.parseJSON(d):d}catch(f){}p.data(a,c,d)}else d=b}return d}function K(a){var b;for(b in a){if(b==="data"&&p.isEmptyObject(a[b]))continue;if(b!=="toJSON")return!1}return!0}function ba(){return!1}function bb(){return!0}function bh(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function bi(a,b){do a=a[b];while(a&&a.nodeType!==1);return a}function bj(a,b,c){b=b||0;if(p.isFunction(b))return p.grep(a,function(a,d){var e=!!b.call(a,d,a);return e===c});if(b.nodeType)return p.grep(a,function(a,d){return a===b===c});if(typeof b=="string"){var d=p.grep(a,function(a){return a.nodeType===1});if(be.test(b))return p.filter(b,d,!c);b=p.filter(b,d)}return p.grep(a,function(a,d){return p.inArray(a,b)>=0===c})}function bk(a){var b=bl.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}function bC(a,b){return a.getElementsByTagName(b)[0]||a.appendChild(a.ownerDocument.createElement(b))}function bD(a,b){if(b.nodeType!==1||!p.hasData(a))return;var c,d,e,f=p._data(a),g=p._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;d<e;d++)p.event.add(b,c,h[c][d])}g.data&&(g.data=p.extend({},g.data))}function bE(a,b){var c;if(b.nodeType!==1)return;b.clearAttributes&&b.clearAttributes(),b.mergeAttributes&&b.mergeAttributes(a),c=b.nodeName.toLowerCase(),c==="object"?(b.parentNode&&(b.outerHTML=a.outerHTML),p.support.html5Clone&&a.innerHTML&&!p.trim(b.innerHTML)&&(b.innerHTML=a.innerHTML)):c==="input"&&bv.test(a.type)?(b.defaultChecked=b.checked=a.checked,b.value!==a.value&&(b.value=a.value)):c==="option"?b.selected=a.defaultSelected:c==="input"||c==="textarea"?b.defaultValue=a.defaultValue:c==="script"&&b.text!==a.text&&(b.text=a.text),b.removeAttribute(p.expando)}function bF(a){return typeof a.getElementsByTagName!="undefined"?a.getElementsByTagName("*"):typeof a.querySelectorAll!="undefined"?a.querySelectorAll("*"):[]}function bG(a){bv.test(a.type)&&(a.defaultChecked=a.checked)}function bX(a,b){if(b in a)return b;var c=b.charAt(0).toUpperCase()+b.slice(1),d=b,e=bV.length;while(e--){b=bV[e]+c;if(b in a)return b}return d}function bY(a,b){return a=b||a,p.css(a,"display")==="none"||!p.contains(a.ownerDocument,a)}function bZ(a,b){var c,d,e=[],f=0,g=a.length;for(;f<g;f++){c=a[f];if(!c.style)continue;e[f]=p._data(c,"olddisplay"),b?(!e[f]&&c.style.display==="none"&&(c.style.display=""),c.style.display===""&&bY(c)&&(e[f]=p._data(c,"olddisplay",cb(c.nodeName)))):(d=bH(c,"display"),!e[f]&&d!=="none"&&p._data(c,"olddisplay",d))}for(f=0;f<g;f++){c=a[f];if(!c.style)continue;if(!b||c.style.display==="none"||c.style.display==="")c.style.display=b?e[f]||"":"none"}return a}function b$(a,b,c){var d=bO.exec(b);return d?Math.max(0,d[1]-(c||0))+(d[2]||"px"):b}function b_(a,b,c,d){var e=c===(d?"border":"content")?4:b==="width"?1:0,f=0;for(;e<4;e+=2)c==="margin"&&(f+=p.css(a,c+bU[e],!0)),d?(c==="content"&&(f-=parseFloat(bH(a,"padding"+bU[e]))||0),c!=="margin"&&(f-=parseFloat(bH(a,"border"+bU[e]+"Width"))||0)):(f+=parseFloat(bH(a,"padding"+bU[e]))||0,c!=="padding"&&(f+=parseFloat(bH(a,"border"+bU[e]+"Width"))||0));return f}function ca(a,b,c){var d=b==="width"?a.offsetWidth:a.offsetHeight,e=!0,f=p.support.boxSizing&&p.css(a,"boxSizing")==="border-box";if(d<=0){d=bH(a,b);if(d<0||d==null)d=a.style[b];if(bP.test(d))return d;e=f&&(p.support.boxSizingReliable||d===a.style[b]),d=parseFloat(d)||0}return d+b_(a,b,c||(f?"border":"content"),e)+"px"}function cb(a){if(bR[a])return bR[a];var b=p("<"+a+">").appendTo(e.body),c=b.css("display");b.remove();if(c==="none"||c===""){bI=e.body.appendChild(bI||p.extend(e.createElement("iframe"),{frameBorder:0,width:0,height:0}));if(!bJ||!bI.createElement)bJ=(bI.contentWindow||bI.contentDocument).document,bJ.write("<!doctype html><html><body>"),bJ.close();b=bJ.body.appendChild(bJ.createElement(a)),c=bH(b,"display"),e.body.removeChild(bI)}return bR[a]=c,c}function ch(a,b,c,d){var e;if(p.isArray(b))p.each(b,function(b,e){c||cd.test(a)?d(a,e):ch(a+"["+(typeof e=="object"?b:"")+"]",e,c,d)});else if(!c&&p.type(b)==="object")for(e in b)ch(a+"["+e+"]",b[e],c,d);else d(a,b)}function cy(a){return function(b,c){typeof b!="string"&&(c=b,b="*");var d,e,f,g=b.toLowerCase().split(s),h=0,i=g.length;if(p.isFunction(c))for(;h<i;h++)d=g[h],f=/^\+/.test(d),f&&(d=d.substr(1)||"*"),e=a[d]=a[d]||[],e[f?"unshift":"push"](c)}}function cz(a,c,d,e,f,g){f=f||c.dataTypes[0],g=g||{},g[f]=!0;var h,i=a[f],j=0,k=i?i.length:0,l=a===cu;for(;j<k&&(l||!h);j++)h=i[j](c,d,e),typeof h=="string"&&(!l||g[h]?h=b:(c.dataTypes.unshift(h),h=cz(a,c,d,e,h,g)));return(l||!h)&&!g["*"]&&(h=cz(a,c,d,e,"*",g)),h}function cA(a,c){var d,e,f=p.ajaxSettings.flatOptions||{};for(d in c)c[d]!==b&&((f[d]?a:e||(e={}))[d]=c[d]);e&&p.extend(!0,a,e)}function cB(a,c,d){var e,f,g,h,i=a.contents,j=a.dataTypes,k=a.responseFields;for(f in k)f in d&&(c[k[f]]=d[f]);while(j[0]==="*")j.shift(),e===b&&(e=a.mimeType||c.getResponseHeader("content-type"));if(e)for(f in i)if(i[f]&&i[f].test(e)){j.unshift(f);break}if(j[0]in d)g=j[0];else{for(f in d){if(!j[0]||a.converters[f+" "+j[0]]){g=f;break}h||(h=f)}g=g||h}if(g)return g!==j[0]&&j.unshift(g),d[g]}function cC(a,b){var c,d,e,f,g=a.dataTypes.slice(),h=g[0],i={},j=0;a.dataFilter&&(b=a.dataFilter(b,a.dataType));if(g[1])for(c in a.converters)i[c.toLowerCase()]=a.converters[c];for(;e=g[++j];)if(e!=="*"){if(h!=="*"&&h!==e){c=i[h+" "+e]||i["* "+e];if(!c)for(d in i){f=d.split(" ");if(f[1]===e){c=i[h+" "+f[0]]||i["* "+f[0]];if(c){c===!0?c=i[d]:i[d]!==!0&&(e=f[0],g.splice(j--,0,e));break}}}if(c!==!0)if(c&&a["throws"])b=c(b);else try{b=c(b)}catch(k){return{state:"parsererror",error:c?k:"No conversion from "+h+" to "+e}}}h=e}return{state:"success",data:b}}function cK(){try{return new a.XMLHttpRequest}catch(b){}}function cL(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}function cT(){return setTimeout(function(){cM=b},0),cM=p.now()}function cU(a,b){p.each(b,function(b,c){var d=(cS[b]||[]).concat(cS["*"]),e=0,f=d.length;for(;e<f;e++)if(d[e].call(a,b,c))return})}function cV(a,b,c){var d,e=0,f=0,g=cR.length,h=p.Deferred().always(function(){delete i.elem}),i=function(){var b=cM||cT(),c=Math.max(0,j.startTime+j.duration-b),d=1-(c/j.duration||0),e=0,f=j.tweens.length;for(;e<f;e++)j.tweens[e].run(d);return h.notifyWith(a,[j,d,c]),d<1&&f?c:(h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:p.extend({},b),opts:p.extend(!0,{specialEasing:{}},c),originalProperties:b,originalOptions:c,startTime:cM||cT(),duration:c.duration,tweens:[],createTween:function(b,c,d){var e=p.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(e),e},stop:function(b){var c=0,d=b?j.tweens.length:0;for(;c<d;c++)j.tweens[c].run(1);return b?h.resolveWith(a,[j,b]):h.rejectWith(a,[j,b]),this}}),k=j.props;cW(k,j.opts.specialEasing);for(;e<g;e++){d=cR[e].call(j,a,k,j.opts);if(d)return d}return cU(j,k),p.isFunction(j.opts.start)&&j.opts.start.call(a,j),p.fx.timer(p.extend(i,{anim:j,queue:j.opts.queue,elem:a})),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always)}function cW(a,b){var c,d,e,f,g;for(c in a){d=p.camelCase(c),e=b[d],f=a[c],p.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=p.cssHooks[d];if(g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}}function cX(a,b,c){var d,e,f,g,h,i,j,k,l=this,m=a.style,n={},o=[],q=a.nodeType&&bY(a);c.queue||(j=p._queueHooks(a,"fx"),j.unqueued==null&&(j.unqueued=0,k=j.empty.fire,j.empty.fire=function(){j.unqueued||k()}),j.unqueued++,l.always(function(){l.always(function(){j.unqueued--,p.queue(a,"fx").length||j.empty.fire()})})),a.nodeType===1&&("height"in b||"width"in b)&&(c.overflow=[m.overflow,m.overflowX,m.overflowY],p.css(a,"display")==="inline"&&p.css(a,"float")==="none"&&(!p.support.inlineBlockNeedsLayout||cb(a.nodeName)==="inline"?m.display="inline-block":m.zoom=1)),c.overflow&&(m.overflow="hidden",p.support.shrinkWrapBlocks||l.done(function(){m.overflow=c.overflow[0],m.overflowX=c.overflow[1],m.overflowY=c.overflow[2]}));for(d in b){f=b[d];if(cO.exec(f)){delete b[d];if(f===(q?"hide":"show"))continue;o.push(d)}}g=o.length;if(g){h=p._data(a,"fxshow")||p._data(a,"fxshow",{}),q?p(a).show():l.done(function(){p(a).hide()}),l.done(function(){var b;p.removeData(a,"fxshow",!0);for(b in n)p.style(a,b,n[b])});for(d=0;d<g;d++)e=o[d],i=l.createTween(e,q?h[e]:0),n[e]=h[e]||p.style(a,e),e in h||(h[e]=i.start,q&&(i.end=i.start,i.start=e==="width"||e==="height"?1:0))}}function cY(a,b,c,d,e){return new cY.prototype.init(a,b,c,d,e)}function cZ(a,b){var c,d={height:a},e=0;for(;e<4;e+=2-b)c=bU[e],d["margin"+c]=d["padding"+c]=a;return b&&(d.opacity=d.width=a),d}function c_(a){return p.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}var c,d,e=a.document,f=a.location,g=a.navigator,h=a.jQuery,i=a.$,j=Array.prototype.push,k=Array.prototype.slice,l=Array.prototype.indexOf,m=Object.prototype.toString,n=Object.prototype.hasOwnProperty,o=String.prototype.trim,p=function(a,b){return new p.fn.init(a,b,c)},q=/[\-+]?(?:\d*\.|)\d+(?:[eE][\-+]?\d+|)/.source,r=/\S/,s=/\s+/,t=r.test(" ")?/^[\s\xA0]+|[\s\xA0]+$/g:/^\s+|\s+$/g,u=/^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,v=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,w=/^[\],:{}\s]*$/,x=/(?:^|:|,)(?:\s*\[)+/g,y=/\\(?:["\\\/bfnrt]|u[\da-fA-F]{4})/g,z=/"[^"\\\r\n]*"|true|false|null|-?(?:\d\d*\.|)\d+(?:[eE][\-+]?\d+|)/g,A=/^-ms-/,B=/-([\da-z])/gi,C=function(a,b){return(b+"").toUpperCase()},D=function(){e.addEventListener?(e.removeEventListener("DOMContentLoaded",D,!1),p.ready()):e.readyState==="complete"&&(e.detachEvent("onreadystatechange",D),p.ready())},E={};p.fn=p.prototype={constructor:p,init:function(a,c,d){var f,g,h,i;if(!a)return this;if(a.nodeType)return this.context=this[0]=a,this.length=1,this;if(typeof a=="string"){a.charAt(0)==="<"&&a.charAt(a.length-1)===">"&&a.length>=3?f=[null,a,null]:f=u.exec(a);if(f&&(f[1]||!c)){if(f[1])return c=c instanceof p?c[0]:c,i=c&&c.nodeType?c.ownerDocument||c:e,a=p.parseHTML(f[1],i,!0),v.test(f[1])&&p.isPlainObject(c)&&this.attr.call(a,c,!0),p.merge(this,a);g=e.getElementById(f[2]);if(g&&g.parentNode){if(g.id!==f[2])return d.find(a);this.length=1,this[0]=g}return this.context=e,this.selector=a,this}return!c||c.jquery?(c||d).find(a):this.constructor(c).find(a)}return p.isFunction(a)?d.ready(a):(a.selector!==b&&(this.selector=a.selector,this.context=a.context),p.makeArray(a,this))},selector:"",jquery:"1.8.0",length:0,size:function(){return this.length},toArray:function(){return k.call(this)},get:function(a){return a==null?this.toArray():a<0?this[this.length+a]:this[a]},pushStack:function(a,b,c){var d=p.merge(this.constructor(),a);return d.prevObject=this,d.context=this.context,b==="find"?d.selector=this.selector+(this.selector?" ":"")+c:b&&(d.selector=this.selector+"."+b+"("+c+")"),d},each:function(a,b){return p.each(this,a,b)},ready:function(a){return p.ready.promise().done(a),this},eq:function(a){return a=+a,a===-1?this.slice(a):this.slice(a,a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(k.apply(this,arguments),"slice",k.call(arguments).join(","))},map:function(a){return this.pushStack(p.map(this,function(b,c){return a.call(b,c,b)}))},end:function(){return this.prevObject||this.constructor(null)},push:j,sort:[].sort,splice:[].splice},p.fn.init.prototype=p.fn,p.extend=p.fn.extend=function(){var a,c,d,e,f,g,h=arguments[0]||{},i=1,j=arguments.length,k=!1;typeof h=="boolean"&&(k=h,h=arguments[1]||{},i=2),typeof h!="object"&&!p.isFunction(h)&&(h={}),j===i&&(h=this,--i);for(;i<j;i++)if((a=arguments[i])!=null)for(c in a){d=h[c],e=a[c];if(h===e)continue;k&&e&&(p.isPlainObject(e)||(f=p.isArray(e)))?(f?(f=!1,g=d&&p.isArray(d)?d:[]):g=d&&p.isPlainObject(d)?d:{},h[c]=p.extend(k,g,e)):e!==b&&(h[c]=e)}return h},p.extend({noConflict:function(b){return a.$===p&&(a.$=i),b&&a.jQuery===p&&(a.jQuery=h),p},isReady:!1,readyWait:1,holdReady:function(a){a?p.readyWait++:p.ready(!0)},ready:function(a){if(a===!0?--p.readyWait:p.isReady)return;if(!e.body)return setTimeout(p.ready,1);p.isReady=!0;if(a!==!0&&--p.readyWait>0)return;d.resolveWith(e,[p]),p.fn.trigger&&p(e).trigger("ready").off("ready")},isFunction:function(a){return p.type(a)==="function"},isArray:Array.isArray||function(a){return p.type(a)==="array"},isWindow:function(a){return a!=null&&a==a.window},isNumeric:function(a){return!isNaN(parseFloat(a))&&isFinite(a)},type:function(a){return a==null?String(a):E[m.call(a)]||"object"},isPlainObject:function(a){if(!a||p.type(a)!=="object"||a.nodeType||p.isWindow(a))return!1;try{if(a.constructor&&!n.call(a,"constructor")&&!n.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}var d;for(d in a);return d===b||n.call(a,d)},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},error:function(a){throw new Error(a)},parseHTML:function(a,b,c){var d;return!a||typeof a!="string"?null:(typeof b=="boolean"&&(c=b,b=0),b=b||e,(d=v.exec(a))?[b.createElement(d[1])]:(d=p.buildFragment([a],b,c?null:[]),p.merge([],(d.cacheable?p.clone(d.fragment):d.fragment).childNodes)))},parseJSON:function(b){if(!b||typeof b!="string")return null;b=p.trim(b);if(a.JSON&&a.JSON.parse)return a.JSON.parse(b);if(w.test(b.replace(y,"@").replace(z,"]").replace(x,"")))return(new Function("return "+b))();p.error("Invalid JSON: "+b)},parseXML:function(c){var d,e;if(!c||typeof c!="string")return null;try{a.DOMParser?(e=new DOMParser,d=e.parseFromString(c,"text/xml")):(d=new ActiveXObject("Microsoft.XMLDOM"),d.async="false",d.loadXML(c))}catch(f){d=b}return(!d||!d.documentElement||d.getElementsByTagName("parsererror").length)&&p.error("Invalid XML: "+c),d},noop:function(){},globalEval:function(b){b&&r.test(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(A,"ms-").replace(B,C)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toUpperCase()===b.toUpperCase()},each:function(a,c,d){var e,f=0,g=a.length,h=g===b||p.isFunction(a);if(d){if(h){for(e in a)if(c.apply(a[e],d)===!1)break}else for(;f<g;)if(c.apply(a[f++],d)===!1)break}else if(h){for(e in a)if(c.call(a[e],e,a[e])===!1)break}else for(;f<g;)if(c.call(a[f],f,a[f++])===!1)break;return a},trim:o?function(a){return a==null?"":o.call(a)}:function(a){return a==null?"":a.toString().replace(t,"")},makeArray:function(a,b){var c,d=b||[];return a!=null&&(c=p.type(a),a.length==null||c==="string"||c==="function"||c==="regexp"||p.isWindow(a)?j.call(d,a):p.merge(d,a)),d},inArray:function(a,b,c){var d;if(b){if(l)return l.call(b,a,c);d=b.length,c=c?c<0?Math.max(0,d+c):c:0;for(;c<d;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,c){var d=c.length,e=a.length,f=0;if(typeof d=="number")for(;f<d;f++)a[e++]=c[f];else while(c[f]!==b)a[e++]=c[f++];return a.length=e,a},grep:function(a,b,c){var d,e=[],f=0,g=a.length;c=!!c;for(;f<g;f++)d=!!b(a[f],f),c!==d&&e.push(a[f]);return e},map:function(a,c,d){var e,f,g=[],h=0,i=a.length,j=a instanceof p||i!==b&&typeof i=="number"&&(i>0&&a[0]&&a[i-1]||i===0||p.isArray(a));if(j)for(;h<i;h++)e=c(a[h],h,d),e!=null&&(g[g.length]=e);else for(f in a)e=c(a[f],f,d),e!=null&&(g[g.length]=e);return g.concat.apply([],g)},guid:1,proxy:function(a,c){var d,e,f;return typeof c=="string"&&(d=a[c],c=a,a=d),p.isFunction(a)?(e=k.call(arguments,2),f=function(){return a.apply(c,e.concat(k.call(arguments)))},f.guid=a.guid=a.guid||f.guid||p.guid++,f):b},access:function(a,c,d,e,f,g,h){var i,j=d==null,k=0,l=a.length;if(d&&typeof d=="object"){for(k in d)p.access(a,c,k,d[k],1,g,e);f=1}else if(e!==b){i=h===b&&p.isFunction(e),j&&(i?(i=c,c=function(a,b,c){return i.call(p(a),c)}):(c.call(a,e),c=null));if(c)for(;k<l;k++)c(a[k],d,i?e.call(a[k],k,c(a[k],d)):e,h);f=1}return f?a:j?c.call(a):l?c(a[0],d):g},now:function(){return(new Date).getTime()}}),p.ready.promise=function(b){if(!d){d=p.Deferred();if(e.readyState==="complete"||e.readyState!=="loading"&&e.addEventListener)setTimeout(p.ready,1);else if(e.addEventListener)e.addEventListener("DOMContentLoaded",D,!1),a.addEventListener("load",p.ready,!1);else{e.attachEvent("onreadystatechange",D),a.attachEvent("onload",p.ready);var c=!1;try{c=a.frameElement==null&&e.documentElement}catch(f){}c&&c.doScroll&&function g(){if(!p.isReady){try{c.doScroll("left")}catch(a){return setTimeout(g,50)}p.ready()}}()}}return d.promise(b)},p.each("Boolean Number String Function Array Date RegExp Object".split(" "),function(a,b){E["[object "+b+"]"]=b.toLowerCase()}),c=p(e);var F={};p.Callbacks=function(a){a=typeof a=="string"?F[a]||G(a):p.extend({},a);var c,d,e,f,g,h,i=[],j=!a.once&&[],k=function(b){c=a.memory&&b,d=!0,h=f||0,f=0,g=i.length,e=!0;for(;i&&h<g;h++)if(i[h].apply(b[0],b[1])===!1&&a.stopOnFalse){c=!1;break}e=!1,i&&(j?j.length&&k(j.shift()):c?i=[]:l.disable())},l={add:function(){if(i){var b=i.length;(function d(b){p.each(b,function(b,c){p.isFunction(c)&&(!a.unique||!l.has(c))?i.push(c):c&&c.length&&d(c)})})(arguments),e?g=i.length:c&&(f=b,k(c))}return this},remove:function(){return i&&p.each(arguments,function(a,b){var c;while((c=p.inArray(b,i,c))>-1)i.splice(c,1),e&&(c<=g&&g--,c<=h&&h--)}),this},has:function(a){return p.inArray(a,i)>-1},empty:function(){return i=[],this},disable:function(){return i=j=c=b,this},disabled:function(){return!i},lock:function(){return j=b,c||l.disable(),this},locked:function(){return!j},fireWith:function(a,b){return b=b||[],b=[a,b.slice?b.slice():b],i&&(!d||j)&&(e?j.push(b):k(b)),this},fire:function(){return l.fireWith(this,arguments),this},fired:function(){return!!d}};return l},p.extend({Deferred:function(a){var b=[["resolve","done",p.Callbacks("once memory"),"resolved"],["reject","fail",p.Callbacks("once memory"),"rejected"],["notify","progress",p.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return p.Deferred(function(c){p.each(b,function(b,d){var f=d[0],g=a[b];e[d[1]](p.isFunction(g)?function(){var a=g.apply(this,arguments);a&&p.isFunction(a.promise)?a.promise().done(c.resolve).fail(c.reject).progress(c.notify):c[f+"With"](this===e?c:this,[a])}:c[f])}),a=null}).promise()},promise:function(a){return typeof a=="object"?p.extend(a,d):d}},e={};return d.pipe=d.then,p.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[a^1][2].disable,b[2][2].lock),e[f[0]]=g.fire,e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=k.call(arguments),d=c.length,e=d!==1||a&&p.isFunction(a.promise)?d:0,f=e===1?a:p.Deferred(),g=function(a,b,c){return function(d){b[a]=this,c[a]=arguments.length>1?k.call(arguments):d,c===h?f.notifyWith(b,c):--e||f.resolveWith(b,c)}},h,i,j;if(d>1){h=new Array(d),i=new Array(d),j=new Array(d);for(;b<d;b++)c[b]&&p.isFunction(c[b].promise)?c[b].promise().done(g(b,j,c)).fail(f.reject).progress(g(b,i,h)):--e}return e||f.resolveWith(j,c),f.promise()}}),p.support=function(){var b,c,d,f,g,h,i,j,k,l,m,n=e.createElement("div");n.setAttribute("className","t"),n.innerHTML="  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>",c=n.getElementsByTagName("*"),d=n.getElementsByTagName("a")[0],d.style.cssText="top:1px;float:left;opacity:.5";if(!c||!c.length||!d)return{};f=e.createElement("select"),g=f.appendChild(e.createElement("option")),h=n.getElementsByTagName("input")[0],b={leadingWhitespace:n.firstChild.nodeType===3,tbody:!n.getElementsByTagName("tbody").length,htmlSerialize:!!n.getElementsByTagName("link").length,style:/top/.test(d.getAttribute("style")),hrefNormalized:d.getAttribute("href")==="/a",opacity:/^0.5/.test(d.style.opacity),cssFloat:!!d.style.cssFloat,checkOn:h.value==="on",optSelected:g.selected,getSetAttribute:n.className!=="t",enctype:!!e.createElement("form").enctype,html5Clone:e.createElement("nav").cloneNode(!0).outerHTML!=="<:nav></:nav>",boxModel:e.compatMode==="CSS1Compat",submitBubbles:!0,changeBubbles:!0,focusinBubbles:!1,deleteExpando:!0,noCloneEvent:!0,inlineBlockNeedsLayout:!1,shrinkWrapBlocks:!1,reliableMarginRight:!0,boxSizingReliable:!0,pixelPosition:!1},h.checked=!0,b.noCloneChecked=h.cloneNode(!0).checked,f.disabled=!0,b.optDisabled=!g.disabled;try{delete n.test}catch(o){b.deleteExpando=!1}!n.addEventListener&&n.attachEvent&&n.fireEvent&&(n.attachEvent("onclick",m=function(){b.noCloneEvent=!1}),n.cloneNode(!0).fireEvent("onclick"),n.detachEvent("onclick",m)),h=e.createElement("input"),h.value="t",h.setAttribute("type","radio"),b.radioValue=h.value==="t",h.setAttribute("checked","checked"),h.setAttribute("name","t"),n.appendChild(h),i=e.createDocumentFragment(),i.appendChild(n.lastChild),b.checkClone=i.cloneNode(!0).cloneNode(!0).lastChild.checked,b.appendChecked=h.checked,i.removeChild(h),i.appendChild(n);if(n.attachEvent)for(k in{submit:!0,change:!0,focusin:!0})j="on"+k,l=j in n,l||(n.setAttribute(j,"return;"),l=typeof n[j]=="function"),b[k+"Bubbles"]=l;return p(function(){var c,d,f,g,h="padding:0;margin:0;border:0;display:block;overflow:hidden;",i=e.getElementsByTagName("body")[0];if(!i)return;c=e.createElement("div"),c.style.cssText="visibility:hidden;border:0;width:0;height:0;position:static;top:0;margin-top:1px",i.insertBefore(c,i.firstChild),d=e.createElement("div"),c.appendChild(d),d.innerHTML="<table><tr><td></td><td>t</td></tr></table>",f=d.getElementsByTagName("td"),f[0].style.cssText="padding:0;margin:0;border:0;display:none",l=f[0].offsetHeight===0,f[0].style.display="",f[1].style.display="none",b.reliableHiddenOffsets=l&&f[0].offsetHeight===0,d.innerHTML="",d.style.cssText="box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;padding:1px;border:1px;display:block;width:4px;margin-top:1%;position:absolute;top:1%;",b.boxSizing=d.offsetWidth===4,b.doesNotIncludeMarginInBodyOffset=i.offsetTop!==1,a.getComputedStyle&&(b.pixelPosition=(a.getComputedStyle(d,null)||{}).top!=="1%",b.boxSizingReliable=(a.getComputedStyle(d,null)||{width:"4px"}).width==="4px",g=e.createElement("div"),g.style.cssText=d.style.cssText=h,g.style.marginRight=g.style.width="0",d.style.width="1px",d.appendChild(g),b.reliableMarginRight=!parseFloat((a.getComputedStyle(g,null)||{}).marginRight)),typeof d.style.zoom!="undefined"&&(d.innerHTML="",d.style.cssText=h+"width:1px;padding:1px;display:inline;zoom:1",b.inlineBlockNeedsLayout=d.offsetWidth===3,d.style.display="block",d.style.overflow="visible",d.innerHTML="<div></div>",d.firstChild.style.width="5px",b.shrinkWrapBlocks=d.offsetWidth!==3,c.style.zoom=1),i.removeChild(c),c=d=f=g=null}),i.removeChild(n),c=d=f=g=h=i=n=null,b}();var H=/^(?:\{.*\}|\[.*\])$/,I=/([A-Z])/g;p.extend({cache:{},deletedIds:[],uuid:0,expando:"jQuery"+(p.fn.jquery+Math.random()).replace(/\D/g,""),noData:{embed:!0,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:!0},hasData:function(a){return a=a.nodeType?p.cache[a[p.expando]]:a[p.expando],!!a&&!K(a)},data:function(a,c,d,e){if(!p.acceptData(a))return;var f,g,h=p.expando,i=typeof c=="string",j=a.nodeType,k=j?p.cache:a,l=j?a[h]:a[h]&&h;if((!l||!k[l]||!e&&!k[l].data)&&i&&d===b)return;l||(j?a[h]=l=p.deletedIds.pop()||++p.uuid:l=h),k[l]||(k[l]={},j||(k[l].toJSON=p.noop));if(typeof c=="object"||typeof c=="function")e?k[l]=p.extend(k[l],c):k[l].data=p.extend(k[l].data,c);return f=k[l],e||(f.data||(f.data={}),f=f.data),d!==b&&(f[p.camelCase(c)]=d),i?(g=f[c],g==null&&(g=f[p.camelCase(c)])):g=f,g},removeData:function(a,b,c){if(!p.acceptData(a))return;var d,e,f,g=a.nodeType,h=g?p.cache:a,i=g?a[p.expando]:p.expando;if(!h[i])return;if(b){d=c?h[i]:h[i].data;if(d){p.isArray(b)||(b in d?b=[b]:(b=p.camelCase(b),b in d?b=[b]:b=b.split(" ")));for(e=0,f=b.length;e<f;e++)delete d[b[e]];if(!(c?K:p.isEmptyObject)(d))return}}if(!c){delete h[i].data;if(!K(h[i]))return}g?p.cleanData([a],!0):p.support.deleteExpando||h!=h.window?delete h[i]:h[i]=null},_data:function(a,b,c){return p.data(a,b,c,!0)},acceptData:function(a){var b=a.nodeName&&p.noData[a.nodeName.toLowerCase()];return!b||b!==!0&&a.getAttribute("classid")===b}}),p.fn.extend({data:function(a,c){var d,e,f,g,h,i=this[0],j=0,k=null;if(a===b){if(this.length){k=p.data(i);if(i.nodeType===1&&!p._data(i,"parsedAttrs")){f=i.attributes;for(h=f.length;j<h;j++)g=f[j].name,g.indexOf("data-")===0&&(g=p.camelCase(g.substring(5)),J(i,g,k[g]));p._data(i,"parsedAttrs",!0)}}return k}return typeof a=="object"?this.each(function(){p.data(this,a)}):(d=a.split(".",2),d[1]=d[1]?"."+d[1]:"",e=d[1]+"!",p.access(this,function(c){if(c===b)return k=this.triggerHandler("getData"+e,[d[0]]),k===b&&i&&(k=p.data(i,a),k=J(i,a,k)),k===b&&d[1]?this.data(d[0]):k;d[1]=c,this.each(function(){var b=p(this);b.triggerHandler("setData"+e,d),p.data(this,a,c),b.triggerHandler("changeData"+e,d)})},null,c,arguments.length>1,null,!1))},removeData:function(a){return this.each(function(){p.removeData(this,a)})}}),p.extend({queue:function(a,b,c){var d;if(a)return b=(b||"fx")+"queue",d=p._data(a,b),c&&(!d||p.isArray(c)?d=p._data(a,b,p.makeArray(c)):d.push(c)),d||[]},dequeue:function(a,b){b=b||"fx";var c=p.queue(a,b),d=c.shift(),e=p._queueHooks(a,b),f=function(){p.dequeue(a,b)};d==="inprogress"&&(d=c.shift()),d&&(b==="fx"&&c.unshift("inprogress"),delete e.stop,d.call(a,f,e)),!c.length&&e&&e.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return p._data(a,c)||p._data(a,c,{empty:p.Callbacks("once memory").add(function(){p.removeData(a,b+"queue",!0),p.removeData(a,c,!0)})})}}),p.fn.extend({queue:function(a,c){var d=2;return typeof a!="string"&&(c=a,a="fx",d--),arguments.length<d?p.queue(this[0],a):c===b?this:this.each(function(){var b=p.queue(this,a,c);p._queueHooks(this,a),a==="fx"&&b[0]!=="inprogress"&&p.dequeue(this,a)})},dequeue:function(a){return this.each(function(){p.dequeue(this,a)})},delay:function(a,b){return a=p.fx?p.fx.speeds[a]||a:a,b=b||"fx",this.queue(b,function(b,c){var d=setTimeout(b,a);c.stop=function(){clearTimeout(d)}})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,c){var d,e=1,f=p.Deferred(),g=this,h=this.length,i=function(){--e||f.resolveWith(g,[g])};typeof a!="string"&&(c=a,a=b),a=a||"fx";while(h--)(d=p._data(g[h],a+"queueHooks"))&&d.empty&&(e++,d.empty.add(i));return i(),f.promise(c)}});var L,M,N,O=/[\t\r\n]/g,P=/\r/g,Q=/^(?:button|input)$/i,R=/^(?:button|input|object|select|textarea)$/i,S=/^a(?:rea|)$/i,T=/^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,U=p.support.getSetAttribute;p.fn.extend({attr:function(a,b){return p.access(this,p.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){p.removeAttr(this,a)})},prop:function(a,b){return p.access(this,p.prop,a,b,arguments.length>1)},removeProp:function(a){return a=p.propFix[a]||a,this.each(function(){try{this[a]=b,delete this[a]}catch(c){}})},addClass:function(a){var b,c,d,e,f,g,h;if(p.isFunction(a))return this.each(function(b){p(this).addClass(a.call(this,b,this.className))});if(a&&typeof a=="string"){b=a.split(s);for(c=0,d=this.length;c<d;c++){e=this[c];if(e.nodeType===1)if(!e.className&&b.length===1)e.className=a;else{f=" "+e.className+" ";for(g=0,h=b.length;g<h;g++)~f.indexOf(" "+b[g]+" ")||(f+=b[g]+" ");e.className=p.trim(f)}}}return this},removeClass:function(a){var c,d,e,f,g,h,i;if(p.isFunction(a))return this.each(function(b){p(this).removeClass(a.call(this,b,this.className))});if(a&&typeof a=="string"||a===b){c=(a||"").split(s);for(h=0,i=this.length;h<i;h++){e=this[h];if(e.nodeType===1&&e.className){d=(" "+e.className+" ").replace(O," ");for(f=0,g=c.length;f<g;f++)while(d.indexOf(" "+c[f]+" ")>-1)d=d.replace(" "+c[f]+" "," ");e.className=a?p.trim(d):""}}}return this},toggleClass:function(a,b){var c=typeof a,d=typeof b=="boolean";return p.isFunction(a)?this.each(function(c){p(this).toggleClass(a.call(this,c,this.className,b),b)}):this.each(function(){if(c==="string"){var e,f=0,g=p(this),h=b,i=a.split(s);while(e=i[f++])h=d?h:!g.hasClass(e),g[h?"addClass":"removeClass"](e)}else if(c==="undefined"||c==="boolean")this.className&&p._data(this,"__className__",this.className),this.className=this.className||a===!1?"":p._data(this,"__className__")||""})},hasClass:function(a){var b=" "+a+" ",c=0,d=this.length;for(;c<d;c++)if(this[c].nodeType===1&&(" "+this[c].className+" ").replace(O," ").indexOf(b)>-1)return!0;return!1},val:function(a){var c,d,e,f=this[0];if(!arguments.length){if(f)return c=p.valHooks[f.type]||p.valHooks[f.nodeName.toLowerCase()],c&&"get"in c&&(d=c.get(f,"value"))!==b?d:(d=f.value,typeof d=="string"?d.replace(P,""):d==null?"":d);return}return e=p.isFunction(a),this.each(function(d){var f,g=p(this);if(this.nodeType!==1)return;e?f=a.call(this,d,g.val()):f=a,f==null?f="":typeof f=="number"?f+="":p.isArray(f)&&(f=p.map(f,function(a){return a==null?"":a+""})),c=p.valHooks[this.type]||p.valHooks[this.nodeName.toLowerCase()];if(!c||!("set"in c)||c.set(this,f,"value")===b)this.value=f})}}),p.extend({valHooks:{option:{get:function(a){var b=a.attributes.value;return!b||b.specified?a.value:a.text}},select:{get:function(a){var b,c,d,e,f=a.selectedIndex,g=[],h=a.options,i=a.type==="select-one";if(f<0)return null;c=i?f:0,d=i?f+1:h.length;for(;c<d;c++){e=h[c];if(e.selected&&(p.support.optDisabled?!e.disabled:e.getAttribute("disabled")===null)&&(!e.parentNode.disabled||!p.nodeName(e.parentNode,"optgroup"))){b=p(e).val();if(i)return b;g.push(b)}}return i&&!g.length&&h.length?p(h[f]).val():g},set:function(a,b){var c=p.makeArray(b);return p(a).find("option").each(function(){this.selected=p.inArray(p(this).val(),c)>=0}),c.length||(a.selectedIndex=-1),c}}},attrFn:{},attr:function(a,c,d,e){var f,g,h,i=a.nodeType;if(!a||i===3||i===8||i===2)return;if(e&&p.isFunction(p.fn[c]))return p(a)[c](d);if(typeof a.getAttribute=="undefined")return p.prop(a,c,d);h=i!==1||!p.isXMLDoc(a),h&&(c=c.toLowerCase(),g=p.attrHooks[c]||(T.test(c)?M:L));if(d!==b){if(d===null){p.removeAttr(a,c);return}return g&&"set"in g&&h&&(f=g.set(a,d,c))!==b?f:(a.setAttribute(c,""+d),d)}return g&&"get"in g&&h&&(f=g.get(a,c))!==null?f:(f=a.getAttribute(c),f===null?b:f)},removeAttr:function(a,b){var c,d,e,f,g=0;if(b&&a.nodeType===1){d=b.split(s);for(;g<d.length;g++)e=d[g],e&&(c=p.propFix[e]||e,f=T.test(e),f||p.attr(a,e,""),a.removeAttribute(U?e:c),f&&c in a&&(a[c]=!1))}},attrHooks:{type:{set:function(a,b){if(Q.test(a.nodeName)&&a.parentNode)p.error("type property can't be changed");else if(!p.support.radioValue&&b==="radio"&&p.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}},value:{get:function(a,b){return L&&p.nodeName(a,"button")?L.get(a,b):b in a?a.value:null},set:function(a,b,c){if(L&&p.nodeName(a,"button"))return L.set(a,b,c);a.value=b}}},propFix:{tabindex:"tabIndex",readonly:"readOnly","for":"htmlFor","class":"className",maxlength:"maxLength",cellspacing:"cellSpacing",cellpadding:"cellPadding",rowspan:"rowSpan",colspan:"colSpan",usemap:"useMap",frameborder:"frameBorder",contenteditable:"contentEditable"},prop:function(a,c,d){var e,f,g,h=a.nodeType;if(!a||h===3||h===8||h===2)return;return g=h!==1||!p.isXMLDoc(a),g&&(c=p.propFix[c]||c,f=p.propHooks[c]),d!==b?f&&"set"in f&&(e=f.set(a,d,c))!==b?e:a[c]=d:f&&"get"in f&&(e=f.get(a,c))!==null?e:a[c]},propHooks:{tabIndex:{get:function(a){var c=a.getAttributeNode("tabindex");return c&&c.specified?parseInt(c.value,10):R.test(a.nodeName)||S.test(a.nodeName)&&a.href?0:b}}}}),M={get:function(a,c){var d,e=p.prop(a,c);return e===!0||typeof e!="boolean"&&(d=a.getAttributeNode(c))&&d.nodeValue!==!1?c.toLowerCase():b},set:function(a,b,c){var d;return b===!1?p.removeAttr(a,c):(d=p.propFix[c]||c,d in a&&(a[d]=!0),a.setAttribute(c,c.toLowerCase())),c}},U||(N={name:!0,id:!0,coords:!0},L=p.valHooks.button={get:function(a,c){var d;return d=a.getAttributeNode(c),d&&(N[c]?d.value!=="":d.specified)?d.value:b},set:function(a,b,c){var d=a.getAttributeNode(c);return d||(d=e.createAttribute(c),a.setAttributeNode(d)),d.value=b+""}},p.each(["width","height"],function(a,b){p.attrHooks[b]=p.extend(p.attrHooks[b],{set:function(a,c){if(c==="")return a.setAttribute(b,"auto"),c}})}),p.attrHooks.contenteditable={get:L.get,set:function(a,b,c){b===""&&(b="false"),L.set(a,b,c)}}),p.support.hrefNormalized||p.each(["href","src","width","height"],function(a,c){p.attrHooks[c]=p.extend(p.attrHooks[c],{get:function(a){var d=a.getAttribute(c,2);return d===null?b:d}})}),p.support.style||(p.attrHooks.style={get:function(a){return a.style.cssText.toLowerCase()||b},set:function(a,b){return a.style.cssText=""+b}}),p.support.optSelected||(p.propHooks.selected=p.extend(p.propHooks.selected,{get:function(a){var b=a.parentNode;return b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex),null}})),p.support.enctype||(p.propFix.enctype="encoding"),p.support.checkOn||p.each(["radio","checkbox"],function(){p.valHooks[this]={get:function(a){return a.getAttribute("value")===null?"on":a.value}}}),p.each(["radio","checkbox"],function(){p.valHooks[this]=p.extend(p.valHooks[this],{set:function(a,b){if(p.isArray(b))return a.checked=p.inArray(p(a).val(),b)>=0}})});var V=/^(?:textarea|input|select)$/i,W=/^([^\.]*|)(?:\.(.+)|)$/,X=/(?:^|\s)hover(\.\S+|)\b/,Y=/^key/,Z=/^(?:mouse|contextmenu)|click/,$=/^(?:focusinfocus|focusoutblur)$/,_=function(a){return p.event.special.hover?a:a.replace(X,"mouseenter$1 mouseleave$1")};p.event={add:function(a,c,d,e,f){var g,h,i,j,k,l,m,n,o,q,r;if(a.nodeType===3||a.nodeType===8||!c||!d||!(g=p._data(a)))return;d.handler&&(o=d,d=o.handler,f=o.selector),d.guid||(d.guid=p.guid++),i=g.events,i||(g.events=i={}),h=g.handle,h||(g.handle=h=function(a){return typeof p!="undefined"&&(!a||p.event.triggered!==a.type)?p.event.dispatch.apply(h.elem,arguments):b},h.elem=a),c=p.trim(_(c)).split(" ");for(j=0;j<c.length;j++){k=W.exec(c[j])||[],l=k[1],m=(k[2]||"").split(".").sort(),r=p.event.special[l]||{},l=(f?r.delegateType:r.bindType)||l,r=p.event.special[l]||{},n=p.extend({type:l,origType:k[1],data:e,handler:d,guid:d.guid,selector:f,namespace:m.join(".")},o),q=i[l];if(!q){q=i[l]=[],q.delegateCount=0;if(!r.setup||r.setup.call(a,e,m,h)===!1)a.addEventListener?a.addEventListener(l,h,!1):a.attachEvent&&a.attachEvent("on"+l,h)}r.add&&(r.add.call(a,n),n.handler.guid||(n.handler.guid=d.guid)),f?q.splice(q.delegateCount++,0,n):q.push(n),p.event.global[l]=!0}a=null},global:{},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,q,r=p.hasData(a)&&p._data(a);if(!r||!(m=r.events))return;b=p.trim(_(b||"")).split(" ");for(f=0;f<b.length;f++){g=W.exec(b[f])||[],h=i=g[1],j=g[2];if(!h){for(h in m)p.event.remove(a,h+b[f],c,d,!0);continue}n=p.event.special[h]||{},h=(d?n.delegateType:n.bindType)||h,o=m[h]||[],k=o.length,j=j?new RegExp("(^|\\.)"+j.split(".").sort().join("\\.(?:.*\\.|)")+"(\\.|$)"):null;for(l=0;l<o.length;l++)q=o[l],(e||i===q.origType)&&(!c||c.guid===q.guid)&&(!j||j.test(q.namespace))&&(!d||d===q.selector||d==="**"&&q.selector)&&(o.splice(l--,1),q.selector&&o.delegateCount--,n.remove&&n.remove.call(a,q));o.length===0&&k!==o.length&&((!n.teardown||n.teardown.call(a,j,r.handle)===!1)&&p.removeEvent(a,h,r.handle),delete m[h])}p.isEmptyObject(m)&&(delete r.handle,p.removeData(a,"events",!0))},customEvent:{getData:!0,setData:!0,changeData:!0},trigger:function(c,d,f,g){if(!f||f.nodeType!==3&&f.nodeType!==8){var h,i,j,k,l,m,n,o,q,r,s=c.type||c,t=[];if($.test(s+p.event.triggered))return;s.indexOf("!")>=0&&(s=s.slice(0,-1),i=!0),s.indexOf(".")>=0&&(t=s.split("."),s=t.shift(),t.sort());if((!f||p.event.customEvent[s])&&!p.event.global[s])return;c=typeof c=="object"?c[p.expando]?c:new p.Event(s,c):new p.Event(s),c.type=s,c.isTrigger=!0,c.exclusive=i,c.namespace=t.join("."),c.namespace_re=c.namespace?new RegExp("(^|\\.)"+t.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,m=s.indexOf(":")<0?"on"+s:"";if(!f){h=p.cache;for(j in h)h[j].events&&h[j].events[s]&&p.event.trigger(c,d,h[j].handle.elem,!0);return}c.result=b,c.target||(c.target=f),d=d!=null?p.makeArray(d):[],d.unshift(c),n=p.event.special[s]||{};if(n.trigger&&n.trigger.apply(f,d)===!1)return;q=[[f,n.bindType||s]];if(!g&&!n.noBubble&&!p.isWindow(f)){r=n.delegateType||s,k=$.test(r+s)?f:f.parentNode;for(l=f;k;k=k.parentNode)q.push([k,r]),l=k;l===(f.ownerDocument||e)&&q.push([l.defaultView||l.parentWindow||a,r])}for(j=0;j<q.length&&!c.isPropagationStopped();j++)k=q[j][0],c.type=q[j][1],o=(p._data(k,"events")||{})[c.type]&&p._data(k,"handle"),o&&o.apply(k,d),o=m&&k[m],o&&p.acceptData(k)&&o.apply(k,d)===!1&&c.preventDefault();return c.type=s,!g&&!c.isDefaultPrevented()&&(!n._default||n._default.apply(f.ownerDocument,d)===!1)&&(s!=="click"||!p.nodeName(f,"a"))&&p.acceptData(f)&&m&&f[s]&&(s!=="focus"&&s!=="blur"||c.target.offsetWidth!==0)&&!p.isWindow(f)&&(l=f[m],l&&(f[m]=null),p.event.triggered=s,f[s](),p.event.triggered=b,l&&(f[m]=l)),c.result}return},dispatch:function(c){c=p.event.fix(c||a.event);var d,e,f,g,h,i,j,k,l,m,n,o=(p._data(this,"events")||{})[c.type]||[],q=o.delegateCount,r=[].slice.call(arguments),s=!c.exclusive&&!c.namespace,t=p.event.special[c.type]||{},u=[];r[0]=c,c.delegateTarget=this;if(t.preDispatch&&t.preDispatch.call(this,c)===!1)return;if(q&&(!c.button||c.type!=="click")){g=p(this),g.context=this;for(f=c.target;f!=this;f=f.parentNode||this)if(f.disabled!==!0||c.type!=="click"){i={},k=[],g[0]=f;for(d=0;d<q;d++)l=o[d],m=l.selector,i[m]===b&&(i[m]=g.is(m)),i[m]&&k.push(l);k.length&&u.push({elem:f,matches:k})}}o.length>q&&u.push({elem:this,matches:o.slice(q)});for(d=0;d<u.length&&!c.isPropagationStopped();d++){j=u[d],c.currentTarget=j.elem;for(e=0;e<j.matches.length&&!c.isImmediatePropagationStopped();e++){l=j.matches[e];if(s||!c.namespace&&!l.namespace||c.namespace_re&&c.namespace_re.test(l.namespace))c.data=l.data,c.handleObj=l,h=((p.event.special[l.origType]||{}).handle||l.handler).apply(j.elem,r),h!==b&&(c.result=h,h===!1&&(c.preventDefault(),c.stopPropagation()))}}return t.postDispatch&&t.postDispatch.call(this,c),c.result},props:"attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){return a.which==null&&(a.which=b.charCode!=null?b.charCode:b.keyCode),a}},mouseHooks:{props:"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,c){var d,f,g,h=c.button,i=c.fromElement;return a.pageX==null&&c.clientX!=null&&(d=a.target.ownerDocument||e,f=d.documentElement,g=d.body,a.pageX=c.clientX+(f&&f.scrollLeft||g&&g.scrollLeft||0)-(f&&f.clientLeft||g&&g.clientLeft||0),a.pageY=c.clientY+(f&&f.scrollTop||g&&g.scrollTop||0)-(f&&f.clientTop||g&&g.clientTop||0)),!a.relatedTarget&&i&&(a.relatedTarget=i===a.target?c.toElement:i),!a.which&&h!==b&&(a.which=h&1?1:h&2?3:h&4?2:0),a}},fix:function(a){if(a[p.expando])return a;var b,c,d=a,f=p.event.fixHooks[a.type]||{},g=f.props?this.props.concat(f.props):this.props;a=p.Event(d);for(b=g.length;b;)c=g[--b],a[c]=d[c];return a.target||(a.target=d.srcElement||e),a.target.nodeType===3&&(a.target=a.target.parentNode),a.metaKey=!!a.metaKey,f.filter?f.filter(a,d):a},special:{ready:{setup:p.bindReady},load:{noBubble:!0},focus:{delegateType:"focusin"},blur:{delegateType:"focusout"},beforeunload:{setup:function(a,b,c){p.isWindow(this)&&(this.onbeforeunload=c)},teardown:function(a,b){this.onbeforeunload===b&&(this.onbeforeunload=null)}}},simulate:function(a,b,c,d){var e=p.extend(new p.Event,c,{type:a,isSimulated:!0,originalEvent:{}});d?p.event.trigger(e,null,b):p.event.dispatch.call(b,e),e.isDefaultPrevented()&&c.preventDefault()}},p.event.handle=p.event.dispatch,p.removeEvent=e.removeEventListener?function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c,!1)}:function(a,b,c){var d="on"+b;a.detachEvent&&(typeof a[d]=="undefined"&&(a[d]=null),a.detachEvent(d,c))},p.Event=function(a,b){if(this instanceof p.Event)a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||a.returnValue===!1||a.getPreventDefault&&a.getPreventDefault()?bb:ba):this.type=a,b&&p.extend(this,b),this.timeStamp=a&&a.timeStamp||p.now(),this[p.expando]=!0;else return new p.Event(a,b)},p.Event.prototype={preventDefault:function(){this.isDefaultPrevented=bb;var a=this.originalEvent;if(!a)return;a.preventDefault?a.preventDefault():a.returnValue=!1},stopPropagation:function(){this.isPropagationStopped=bb;var a=this.originalEvent;if(!a)return;a.stopPropagation&&a.stopPropagation(),a.cancelBubble=!0},stopImmediatePropagation:function(){this.isImmediatePropagationStopped=bb,this.stopPropagation()},isDefaultPrevented:ba,isPropagationStopped:ba,isImmediatePropagationStopped:ba},p.each({mouseenter:"mouseover",mouseleave:"mouseout"},function(a,b){p.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj,g=f.selector;if(!e||e!==d&&!p.contains(d,e))a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b;return c}}}),p.support.submitBubbles||(p.event.special.submit={setup:function(){if(p.nodeName(this,"form"))return!1;p.event.add(this,"click._submit keypress._submit",function(a){var c=a.target,d=p.nodeName(c,"input")||p.nodeName(c,"button")?c.form:b;d&&!p._data(d,"_submit_attached")&&(p.event.add(d,"submit._submit",function(a){a._submit_bubble=!0}),p._data(d,"_submit_attached",!0))})},postDispatch:function(a){a._submit_bubble&&(delete a._submit_bubble,this.parentNode&&!a.isTrigger&&p.event.simulate("submit",this.parentNode,a,!0))},teardown:function(){if(p.nodeName(this,"form"))return!1;p.event.remove(this,"._submit")}}),p.support.changeBubbles||(p.event.special.change={setup:function(){if(V.test(this.nodeName)){if(this.type==="checkbox"||this.type==="radio")p.event.add(this,"propertychange._change",function(a){a.originalEvent.propertyName==="checked"&&(this._just_changed=!0)}),p.event.add(this,"click._change",function(a){this._just_changed&&!a.isTrigger&&(this._just_changed=!1),p.event.simulate("change",this,a,!0)});return!1}p.event.add(this,"beforeactivate._change",function(a){var b=a.target;V.test(b.nodeName)&&!p._data(b,"_change_attached")&&(p.event.add(b,"change._change",function(a){this.parentNode&&!a.isSimulated&&!a.isTrigger&&p.event.simulate("change",this.parentNode,a,!0)}),p._data(b,"_change_attached",!0))})},handle:function(a){var b=a.target;if(this!==b||a.isSimulated||a.isTrigger||b.type!=="radio"&&b.type!=="checkbox")return a.handleObj.handler.apply(this,arguments)},teardown:function(){return p.event.remove(this,"._change"),V.test(this.nodeName)}}),p.support.focusinBubbles||p.each({focus:"focusin",blur:"focusout"},function(a,b){var c=0,d=function(a){p.event.simulate(b,a.target,p.event.fix(a),!0)};p.event.special[b]={setup:function(){c++===0&&e.addEventListener(a,d,!0)},teardown:function(){--c===0&&e.removeEventListener(a,d,!0)}}}),p.fn.extend({on:function(a,c,d,e,f){var g,h;if(typeof a=="object"){typeof c!="string"&&(d=d||c,c=b);for(h in a)this.on(h,c,d,a[h],f);return this}d==null&&e==null?(e=c,d=c=b):e==null&&(typeof c=="string"?(e=d,d=b):(e=d,d=c,c=b));if(e===!1)e=ba;else if(!e)return this;return f===1&&(g=e,e=function(a){return p().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=p.guid++)),this.each(function(){p.event.add(this,a,e,d,c)})},one:function(a,b,c,d){return this.on(a,b,c,d,1)},off:function(a,c,d){var e,f;if(a&&a.preventDefault&&a.handleObj)return e=a.handleObj,p(a.delegateTarget).off(e.namespace?e.origType+"."+e.namespace:e.origType,e.selector,e.handler),this;if(typeof a=="object"){for(f in a)this.off(f,c,a[f]);return this}if(c===!1||typeof c=="function")d=c,c=b;return d===!1&&(d=ba),this.each(function(){p.event.remove(this,a,d,c)})},bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},live:function(a,b,c){return p(this.context).on(a,this.selector,b,c),this},die:function(a,b){return p(this.context).off(a,this.selector||"**",b),this},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return arguments.length==1?this.off(a,"**"):this.off(b,a||"**",c)},trigger:function(a,b){return this.each(function(){p.event.trigger(a,b,this)})},triggerHandler:function(a,b){if(this[0])return p.event.trigger(a,b,this[0],!0)},toggle:function(a){var b=arguments,c=a.guid||p.guid++,d=0,e=function(c){var e=(p._data(this,"lastToggle"+a.guid)||0)%d;return p._data(this,"lastToggle"+a.guid,e+1),c.preventDefault(),b[e].apply(this,arguments)||!1};e.guid=c;while(d<b.length)b[d++].guid=c;return this.click(e)},hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}}),p.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){p.fn[b]=function(a,c){return c==null&&(c=a,a=null),arguments.length>0?this.on(b,null,a,c):this.trigger(b)},Y.test(b)&&(p.event.fixHooks[b]=p.event.keyHooks),Z.test(b)&&(p.event.fixHooks[b]=p.event.mouseHooks)}),function(a,b){function bd(a,b,c,d){var e=0,f=b.length;for(;e<f;e++)Z(a,b[e],c,d)}function be(a,b,c,d,e,f){var g,h=$.setFilters[b.toLowerCase()];return h||Z.error(b),(a||!(g=e))&&bd(a||"*",d,g=[],e),g.length>0?h(g,c,f):[]}function bf(a,c,d,e,f){var g,h,i,j,k,l,m,n,p=0,q=f.length,s=L.POS,t=new RegExp("^"+s.source+"(?!"+r+")","i"),u=function(){var a=1,c=arguments.length-2;for(;a<c;a++)arguments[a]===b&&(g[a]=b)};for(;p<q;p++){s.exec(""),a=f[p],j=[],i=0,k=e;while(g=s.exec(a)){n=s.lastIndex=g.index+g[0].length;if(n>i){m=a.slice(i,g.index),i=n,l=[c],B.test(m)&&(k&&(l=k),k=e);if(h=H.test(m))m=m.slice(0,-5).replace(B,"$&*");g.length>1&&g[0].replace(t,u),k=be(m,g[1],g[2],l,k,h)}}k?(j=j.concat(k),(m=a.slice(i))&&m!==")"?B.test(m)?bd(m,j,d,e):Z(m,c,d,e?e.concat(k):k):o.apply(d,j)):Z(a,c,d,e)}return q===1?d:Z.uniqueSort(d)}function bg(a,b,c){var d,e,f,g=[],i=0,j=D.exec(a),k=!j.pop()&&!j.pop(),l=k&&a.match(C)||[""],m=$.preFilter,n=$.filter,o=!c&&b!==h;for(;(e=l[i])!=null&&k;i++){g.push(d=[]),o&&(e=" "+e);while(e){k=!1;if(j=B.exec(e))e=e.slice(j[0].length),k=d.push({part:j.pop().replace(A," "),captures:j});for(f in n)(j=L[f].exec(e))&&(!m[f]||(j=m[f](j,b,c)))&&(e=e.slice(j.shift().length),k=d.push({part:f,captures:j}));if(!k)break}}return k||Z.error(a),g}function bh(a,b,e){var f=b.dir,g=m++;return a||(a=function(a){return a===e}),b.first?function(b,c){while(b=b[f])if(b.nodeType===1)return a(b,c)&&b}:function(b,e){var h,i=g+"."+d,j=i+"."+c;while(b=b[f])if(b.nodeType===1){if((h=b[q])===j)return b.sizset;if(typeof h=="string"&&h.indexOf(i)===0){if(b.sizset)return b}else{b[q]=j;if(a(b,e))return b.sizset=!0,b;b.sizset=!1}}}}function bi(a,b){return a?function(c,d){var e=b(c,d);return e&&a(e===!0?c:e,d)}:b}function bj(a,b,c){var d,e,f=0;for(;d=a[f];f++)$.relative[d.part]?e=bh(e,$.relative[d.part],b):(d.captures.push(b,c),e=bi(e,$.filter[d.part].apply(null,d.captures)));return e}function bk(a){return function(b,c){var d,e=0;for(;d=a[e];e++)if(d(b,c))return!0;return!1}}var c,d,e,f,g,h=a.document,i=h.documentElement,j="undefined",k=!1,l=!0,m=0,n=[].slice,o=[].push,q=("sizcache"+Math.random()).replace(".",""),r="[\\x20\\t\\r\\n\\f]",s="(?:\\\\.|[-\\w]|[^\\x00-\\xa0])+",t=s.replace("w","w#"),u="([*^$|!~]?=)",v="\\["+r+"*("+s+")"+r+"*(?:"+u+r+"*(?:(['\"])((?:\\\\.|[^\\\\])*?)\\3|("+t+")|)|)"+r+"*\\]",w=":("+s+")(?:\\((?:(['\"])((?:\\\\.|[^\\\\])*?)\\2|((?:[^,]|\\\\,|(?:,(?=[^\\[]*\\]))|(?:,(?=[^\\(]*\\))))*))\\)|)",x=":(nth|eq|gt|lt|first|last|even|odd)(?:\\((\\d*)\\)|)(?=[^-]|$)",y=r+"*([\\x20\\t\\r\\n\\f>+~])"+r+"*",z="(?=[^\\x20\\t\\r\\n\\f])(?:\\\\.|"+v+"|"+w.replace(2,7)+"|[^\\\\(),])+",A=new RegExp("^"+r+"+|((?:^|[^\\\\])(?:\\\\.)*)"+r+"+$","g"),B=new RegExp("^"+y),C=new RegExp(z+"?(?="+r+"*,|$)","g"),D=new RegExp("^(?:(?!,)(?:(?:^|,)"+r+"*"+z+")*?|"+r+"*(.*?))(\\)|$)"),E=new RegExp(z.slice(19,-6)+"\\x20\\t\\r\\n\\f>+~])+|"+y,"g"),F=/^(?:#([\w\-]+)|(\w+)|\.([\w\-]+))$/,G=/[\x20\t\r\n\f]*[+~]/,H=/:not\($/,I=/h\d/i,J=/input|select|textarea|button/i,K=/\\(?!\\)/g,L={ID:new RegExp("^#("+s+")"),CLASS:new RegExp("^\\.("+s+")"),NAME:new RegExp("^\\[name=['\"]?("+s+")['\"]?\\]"),TAG:new RegExp("^("+s.replace("[-","[-\\*")+")"),ATTR:new RegExp("^"+v),PSEUDO:new RegExp("^"+w),CHILD:new RegExp("^:(only|nth|last|first)-child(?:\\("+r+"*(even|odd|(([+-]|)(\\d*)n|)"+r+"*(?:([+-]|)"+r+"*(\\d+)|))"+r+"*\\)|)","i"),POS:new RegExp(x,"ig"),needsContext:new RegExp("^"+r+"*[>+~]|"+x,"i")},M={},N=[],O={},P=[],Q=function(a){return a.sizzleFilter=!0,a},R=function(a){return function(b){return b.nodeName.toLowerCase()==="input"&&b.type===a}},S=function(a){return function(b){var c=b.nodeName.toLowerCase();return(c==="input"||c==="button")&&b.type===a}},T=function(a){var b=!1,c=h.createElement("div");try{b=a(c)}catch(d){}return c=null,b},U=T(function(a){a.innerHTML="<select></select>";var b=typeof a.lastChild.getAttribute("multiple");return b!=="boolean"&&b!=="string"}),V=T(function(a){a.id=q+0,a.innerHTML="<a name='"+q+"'></a><div name='"+q+"'></div>",i.insertBefore(a,i.firstChild);var b=h.getElementsByName&&h.getElementsByName(q).length===2+h.getElementsByName(q+0).length;return g=!h.getElementById(q),i.removeChild(a),b}),W=T(function(a){return a.appendChild(h.createComment("")),a.getElementsByTagName("*").length===0}),X=T(function(a){return a.innerHTML="<a href='#'></a>",a.firstChild&&typeof a.firstChild.getAttribute!==j&&a.firstChild.getAttribute("href")==="#"}),Y=T(function(a){return a.innerHTML="<div class='hidden e'></div><div class='hidden'></div>",!a.getElementsByClassName||a.getElementsByClassName("e").length===0?!1:(a.lastChild.className="e",a.getElementsByClassName("e").length!==1)}),Z=function(a,b,c,d){c=c||[],b=b||h;var e,f,g,i,j=b.nodeType;if(j!==1&&j!==9)return[];if(!a||typeof a!="string")return c;g=ba(b);if(!g&&!d)if(e=F.exec(a))if(i=e[1]){if(j===9){f=b.getElementById(i);if(!f||!f.parentNode)return c;if(f.id===i)return c.push(f),c}else if(b.ownerDocument&&(f=b.ownerDocument.getElementById(i))&&bb(b,f)&&f.id===i)return c.push(f),c}else{if(e[2])return o.apply(c,n.call(b.getElementsByTagName(a),0)),c;if((i=e[3])&&Y&&b.getElementsByClassName)return o.apply(c,n.call(b.getElementsByClassName(i),0)),c}return bm(a,b,c,d,g)},$=Z.selectors={cacheLength:50,match:L,order:["ID","TAG"],attrHandle:{},createPseudo:Q,find:{ID:g?function(a,b,c){if(typeof b.getElementById!==j&&!c){var d=b.getElementById(a);return d&&d.parentNode?[d]:[]}}:function(a,c,d){if(typeof c.getElementById!==j&&!d){var e=c.getElementById(a);return e?e.id===a||typeof e.getAttributeNode!==j&&e.getAttributeNode("id").value===a?[e]:b:[]}},TAG:W?function(a,b){if(typeof b.getElementsByTagName!==j)return b.getElementsByTagName(a)}:function(a,b){var c=b.getElementsByTagName(a);if(a==="*"){var d,e=[],f=0;for(;d=c[f];f++)d.nodeType===1&&e.push(d);return e}return c}},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(K,""),a[3]=(a[4]||a[5]||"").replace(K,""),a[2]==="~="&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),a[1]==="nth"?(a[2]||Z.error(a[0]),a[3]=+(a[3]?a[4]+(a[5]||1):2*(a[2]==="even"||a[2]==="odd")),a[4]=+(a[6]+a[7]||a[2]==="odd")):a[2]&&Z.error(a[0]),a},PSEUDO:function(a){var b,c=a[4];return L.CHILD.test(a[0])?null:(c&&(b=D.exec(c))&&b.pop()&&(a[0]=a[0].slice(0,b[0].length-c.length-1),c=b[0].slice(0,-1)),a.splice(2,3,c||a[3]),a)}},filter:{ID:g?function(a){return a=a.replace(K,""),function(b){return b.getAttribute("id")===a}}:function(a){return a=a.replace(K,""),function(b){var c=typeof b.getAttributeNode!==j&&b.getAttributeNode("id");return c&&c.value===a}},TAG:function(a){return a==="*"?function(){return!0}:(a=a.replace(K,"").toLowerCase(),function(b){return b.nodeName&&b.nodeName.toLowerCase()===a})},CLASS:function(a){var b=M[a];return b||(b=M[a]=new RegExp("(^|"+r+")"+a+"("+r+"|$)"),N.push(a),N.length>$.cacheLength&&delete M[N.shift()]),function(a){return b.test(a.className||typeof a.getAttribute!==j&&a.getAttribute("class")||"")}},ATTR:function(a,b,c){return b?function(d){var e=Z.attr(d,a),f=e+"";if(e==null)return b==="!=";switch(b){case"=":return f===c;case"!=":return f!==c;case"^=":return c&&f.indexOf(c)===0;case"*=":return c&&f.indexOf(c)>-1;case"$=":return c&&f.substr(f.length-c.length)===c;case"~=":return(" "+f+" ").indexOf(c)>-1;case"|=":return f===c||f.substr(0,c.length+1)===c+"-"}}:function(b){return Z.attr(b,a)!=null}},CHILD:function(a,b,c,d){if(a==="nth"){var e=m++;return function(a){var b,f,g=0,h=a;if(c===1&&d===0)return!0;b=a.parentNode;if(b&&(b[q]!==e||!a.sizset)){for(h=b.firstChild;h;h=h.nextSibling)if(h.nodeType===1){h.sizset=++g;if(h===a)break}b[q]=e}return f=a.sizset-d,c===0?f===0:f%c===0&&f/c>=0}}return function(b){var c=b;switch(a){case"only":case"first":while(c=c.previousSibling)if(c.nodeType===1)return!1;if(a==="first")return!0;c=b;case"last":while(c=c.nextSibling)if(c.nodeType===1)return!1;return!0}}},PSEUDO:function(a,b,c,d){var e=$.pseudos[a]||$.pseudos[a.toLowerCase()];return e||Z.error("unsupported pseudo: "+a),e.sizzleFilter?e(b,c,d):e}},pseudos:{not:Q(function(a,b,c){var d=bl(a.replace(A,"$1"),b,c);return function(a){return!d(a)}}),enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return b==="input"&&!!a.checked||b==="option"&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},parent:function(a){return!$.pseudos.empty(a)},empty:function(a){var b;a=a.firstChild;while(a){if(a.nodeName>"@"||(b=a.nodeType)===3||b===4)return!1;a=a.nextSibling}return!0},contains:Q(function(a){return function(b){return(b.textContent||b.innerText||bc(b)).indexOf(a)>-1}}),has:Q(function(a){return function(b){return Z(a,b).length>0}}),header:function(a){return I.test(a.nodeName)},text:function(a){var b,c;return a.nodeName.toLowerCase()==="input"&&(b=a.type)==="text"&&((c=a.getAttribute("type"))==null||c.toLowerCase()===b)},radio:R("radio"),checkbox:R("checkbox"),file:R("file"),password:R("password"),image:R("image"),submit:S("submit"),reset:S("reset"),button:function(a){var b=a.nodeName.toLowerCase();return b==="input"&&a.type==="button"||b==="button"},input:function(a){return J.test(a.nodeName)},focus:function(a){var b=a.ownerDocument;return a===b.activeElement&&(!b.hasFocus||b.hasFocus())&&(!!a.type||!!a.href)},active:function(a){return a===a.ownerDocument.activeElement}},setFilters:{first:function(a,b,c){return c?a.slice(1):[a[0]]},last:function(a,b,c){var d=a.pop();return c?a:[d]},even:function(a,b,c){var d=[],e=c?1:0,f=a.length;for(;e<f;e=e+2)d.push(a[e]);return d},odd:function(a,b,c){var d=[],e=c?0:1,f=a.length;for(;e<f;e=e+2)d.push(a[e]);return d},lt:function(a,b,c){return c?a.slice(+b):a.slice(0,+b)},gt:function(a,b,c){return c?a.slice(0,+b+1):a.slice(+b+1)},eq:function(a,b,c){var d=a.splice(+b,1);return c?a:d}}};$.setFilters.nth=$.setFilters.eq,$.filters=$.pseudos,X||($.attrHandle={href:function(a){return a.getAttribute("href",2)},type:function(a){return a.getAttribute("type")}}),V&&($.order.push("NAME"),$.find.NAME=function(a,b){if(typeof b.getElementsByName!==j)return b.getElementsByName(a)}),Y&&($.order.splice(1,0,"CLASS"),$.find.CLASS=function(a,b,c){if(typeof b.getElementsByClassName!==j&&!c)return b.getElementsByClassName(a)});try{n.call(i.childNodes,0)[0].nodeType}catch(_){n=function(a){var b,c=[];for(;b=this[a];a++)c.push(b);return c}}var ba=Z.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?b.nodeName!=="HTML":!1},bb=Z.contains=i.compareDocumentPosition?function(a,b){return!!(a.compareDocumentPosition(b)&16)}:i.contains?function(a,b){var c=a.nodeType===9?a.documentElement:a,d=b.parentNode;return a===d||!!(d&&d.nodeType===1&&c.contains&&c.contains(d))}:function(a,b){while(b=b.parentNode)if(b===a)return!0;return!1},bc=Z.getText=function(a){var b,c="",d=0,e=a.nodeType;if(e){if(e===1||e===9||e===11){if(typeof a.textContent=="string")return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=bc(a)}else if(e===3||e===4)return a.nodeValue}else for(;b=a[d];d++)c+=bc(b);return c};Z.attr=function(a,b){var c,d=ba(a);return d||(b=b.toLowerCase()),$.attrHandle[b]?$.attrHandle[b](a):U||d?a.getAttribute(b):(c=a.getAttributeNode(b),c?typeof a[b]=="boolean"?a[b]?b:null:c.specified?c.value:null:null)},Z.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},[0,0].sort(function(){return l=0}),i.compareDocumentPosition?e=function(a,b){return a===b?(k=!0,0):(!a.compareDocumentPosition||!b.compareDocumentPosition?a.compareDocumentPosition:a.compareDocumentPosition(b)&4)?-1:1}:(e=function(a,b){if(a===b)return k=!0,0;if(a.sourceIndex&&b.sourceIndex)return a.sourceIndex-b.sourceIndex;var c,d,e=[],g=[],h=a.parentNode,i=b.parentNode,j=h;if(h===i)return f(a,b);if(!h)return-1;if(!i)return 1;while(j)e.unshift(j),j=j.parentNode;j=i;while(j)g.unshift(j),j=j.parentNode;c=e.length,d=g.length;for(var l=0;l<c&&l<d;l++)if(e[l]!==g[l])return f(e[l],g[l]);return l===c?f(a,g[l],-1):f(e[l],b,1)},f=function(a,b,c){if(a===b)return c;var d=a.nextSibling;while(d){if(d===b)return-1;d=d.nextSibling}return 1}),Z.uniqueSort=function(a){var b,c=1;if(e){k=l,a.sort(e);if(k)for(;b=a[c];c++)b===a[c-1]&&a.splice(c--,1)}return a};var bl=Z.compile=function(a,b,c){var d,e,f,g=O[a];if(g&&g.context===b)return g;e=bg(a,b,c);for(f=0;d=e[f];f++)e[f]=bj(d,b,c);return g=O[a]=bk(e),g.context=b,g.runs=g.dirruns=0,P.push(a),P.length>$.cacheLength&&delete O[P.shift()],g};Z.matches=function(a,b){return Z(a,null,null,b)},Z.matchesSelector=function(a,b){return Z(b,null,null,[a]).length>0};var bm=function(a,b,e,f,g){a=a.replace(A,"$1");var h,i,j,k,l,m,p,q,r,s=a.match(C),t=a.match(E),u=b.nodeType;if(L.POS.test(a))return bf(a,b,e,f,s);if(f)h=n.call(f,0);else if(s&&s.length===1){if(t.length>1&&u===9&&!g&&(s=L.ID.exec(t[0]))){b=$.find.ID(s[1],b,g)[0];if(!b)return e;a=a.slice(t.shift().length)}q=(s=G.exec(t[0]))&&!s.index&&b.parentNode||b,r=t.pop(),m=r.split(":not")[0];for(j=0,k=$.order.length;j<k;j++){p=$.order[j];if(s=L[p].exec(m)){h=$.find[p]((s[1]||"").replace(K,""),q,g);if(h==null)continue;m===r&&(a=a.slice(0,a.length-r.length)+m.replace(L[p],""),a||o.apply(e,n.call(h,0)));break}}}if(a){i=bl(a,b,g),d=i.dirruns++,h==null&&(h=$.find.TAG("*",G.test(a)&&b.parentNode||b));for(j=0;l=h[j];j++)c=i.runs++,i(l,b)&&e.push(l)}return e};h.querySelectorAll&&function(){var a,b=bm,c=/'|\\/g,d=/\=[\x20\t\r\n\f]*([^'"\]]*)[\x20\t\r\n\f]*\]/g,e=[],f=[":active"],g=i.matchesSelector||i.mozMatchesSelector||i.webkitMatchesSelector||i.oMatchesSelector||i.msMatchesSelector;T(function(a){a.innerHTML="<select><option selected></option></select>",a.querySelectorAll("[selected]").length||e.push("\\["+r+"*(?:checked|disabled|ismap|multiple|readonly|selected|value)"),a.querySelectorAll(":checked").length||e.push(":checked")}),T(function(a){a.innerHTML="<p test=''></p>",a.querySelectorAll("[test^='']").length&&e.push("[*^$]="+r+"*(?:\"\"|'')"),a.innerHTML="<input type='hidden'>",a.querySelectorAll(":enabled").length||e.push(":enabled",":disabled")}),e=e.length&&new RegExp(e.join("|")),bm=function(a,d,f,g,h){if(!g&&!h&&(!e||!e.test(a)))if(d.nodeType===9)try{return o.apply(f,n.call(d.querySelectorAll(a),0)),f}catch(i){}else if(d.nodeType===1&&d.nodeName.toLowerCase()!=="object"){var j=d.getAttribute("id"),k=j||q,l=G.test(a)&&d.parentNode||d;j?k=k.replace(c,"\\$&"):d.setAttribute("id",k);try{return o.apply(f,n.call(l.querySelectorAll(a.replace(C,"[id='"+k+"'] $&")),0)),f}catch(i){}finally{j||d.removeAttribute("id")}}return b(a,d,f,g,h)},g&&(T(function(b){a=g.call(b,"div");try{g.call(b,"[test!='']:sizzle"),f.push($.match.PSEUDO)}catch(c){}}),f=new RegExp(f.join("|")),Z.matchesSelector=function(b,c){c=c.replace(d,"='$1']");if(!ba(b)&&!f.test(c)&&(!e||!e.test(c)))try{var h=g.call(b,c);if(h||a||b.document&&b.document.nodeType!==11)return h}catch(i){}return Z(c,null,null,[b]).length>0})}(),Z.attr=p.attr,p.find=Z,p.expr=Z.selectors,p.expr[":"]=p.expr.pseudos,p.unique=Z.uniqueSort,p.text=Z.getText,p.isXMLDoc=Z.isXML,p.contains=Z.contains}(a);var bc=/Until$/,bd=/^(?:parents|prev(?:Until|All))/,be=/^.[^:#\[\.,]*$/,bf=p.expr.match.needsContext,bg={children:!0,contents:!0,next:!0,prev:!0};p.fn.extend({find:function(a){var b,c,d,e,f,g,h=this;if(typeof a!="string")return p(a).filter(function(){for(b=0,c=h.length;b<c;b++)if(p.contains(h[b],this))return!0});g=this.pushStack("","find",a);for(b=0,c=this.length;b<c;b++){d=g.length,p.find(a,this[b],g);if(b>0)for(e=d;e<g.length;e++)for(f=0;f<d;f++)if(g[f]===g[e]){g.splice(e--,1);break}}return g},has:function(a){var b,c=p(a,this),d=c.length;return this.filter(function(){for(b=0;b<d;b++)if(p.contains(this,c[b]))return!0})},not:function(a){return this.pushStack(bj(this,a,!1),"not",a)},filter:function(a){return this.pushStack(bj(this,a,!0),"filter",a)},is:function(a){return!!a&&(typeof a=="string"?bf.test(a)?p(a,this.context).index(this[0])>=0:p.filter(a,this).length>0:this.filter(a).length>0)},closest:function(a,b){var c,d=0,e=this.length,f=[],g=bf.test(a)||typeof a!="string"?p(a,b||this.context):0;for(;d<e;d++){c=this[d];while(c&&c.ownerDocument&&c!==b&&c.nodeType!==11){if(g?g.index(c)>-1:p.find.matchesSelector(c,a)){f.push(c);break}c=c.parentNode}}return f=f.length>1?p.unique(f):f,this.pushStack(f,"closest",a)},index:function(a){return a?typeof a=="string"?p.inArray(this[0],p(a)):p.inArray(a.jquery?a[0]:a,this):this[0]&&this[0].parentNode?this.prevAll().length:-1},add:function(a,b){var c=typeof a=="string"?p(a,b):p.makeArray(a&&a.nodeType?[a]:a),d=p.merge(this.get(),c);return this.pushStack(bh(c[0])||bh(d[0])?d:p.unique(d))},addBack:function(a){return this.add(a==null?this.prevObject:this.prevObject.filter(a))}}),p.fn.andSelf=p.fn.addBack,p.each({parent:function(a){var b=a.parentNode;return b&&b.nodeType!==11?b:null},parents:function(a){return p.dir(a,"parentNode")},parentsUntil:function(a,b,c){return p.dir(a,"parentNode",c)},next:function(a){return bi(a,"nextSibling")},prev:function(a){return bi(a,"previousSibling")},nextAll:function(a){return p.dir(a,"nextSibling")},prevAll:function(a){return p.dir(a,"previousSibling")},nextUntil:function(a,b,c){return p.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return p.dir(a,"previousSibling",c)},siblings:function(a){return p.sibling((a.parentNode||{}).firstChild,a)},children:function(a){return p.sibling(a.firstChild)},contents:function(a){return p.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:p.merge([],a.childNodes)}},function(a,b){p.fn[a]=function(c,d){var e=p.map(this,b,c);return bc.test(a)||(d=c),d&&typeof d=="string"&&(e=p.filter(d,e)),e=this.length>1&&!bg[a]?p.unique(e):e,this.length>1&&bd.test(a)&&(e=e.reverse()),this.pushStack(e,a,k.call(arguments).join(","))}}),p.extend({filter:function(a,b,c){return c&&(a=":not("+a+")"),b.length===1?p.find.matchesSelector(b[0],a)?[b[0]]:[]:p.find.matches(a,b)},dir:function(a,c,d){var e=[],f=a[c];while(f&&f.nodeType!==9&&(d===b||f.nodeType!==1||!p(f).is(d)))f.nodeType===1&&e.push(f),f=f[c];return e},sibling:function(a,b){var c=[];for(;a;a=a.nextSibling)a.nodeType===1&&a!==b&&c.push(a);return c}});var bl="abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",bm=/ jQuery\d+="(?:null|\d+)"/g,bn=/^\s+/,bo=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,bp=/<([\w:]+)/,bq=/<tbody/i,br=/<|&#?\w+;/,bs=/<(?:script|style|link)/i,bt=/<(?:script|object|embed|option|style)/i,bu=new RegExp("<(?:"+bl+")[\\s/>]","i"),bv=/^(?:checkbox|radio)$/,bw=/checked\s*(?:[^=]|=\s*.checked.)/i,bx=/\/(java|ecma)script/i,by=/^\s*<!(?:\[CDATA\[|\-\-)|[\]\-]{2}>\s*$/g,bz={option:[1,"<select multiple='multiple'>","</select>"],legend:[1,"<fieldset>","</fieldset>"],thead:[1,"<table>","</table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],col:[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"],area:[1,"<map>","</map>"],_default:[0,"",""]},bA=bk(e),bB=bA.appendChild(e.createElement("div"));bz.optgroup=bz.option,bz.tbody=bz.tfoot=bz.colgroup=bz.caption=bz.thead,bz.th=bz.td,p.support.htmlSerialize||(bz._default=[1,"X<div>","</div>"]),p.fn.extend({text:function(a){return p.access(this,function(a){return a===b?p.text(this):this.empty().append((this[0]&&this[0].ownerDocument||e).createTextNode(a))},null,a,arguments.length)},wrapAll:function(a){if(p.isFunction(a))return this.each(function(b){p(this).wrapAll(a.call(this,b))});if(this[0]){var b=p(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&a.firstChild.nodeType===1)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){return p.isFunction(a)?this.each(function(b){p(this).wrapInner(a.call(this,b))}):this.each(function(){var b=p(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=p.isFunction(a);return this.each(function(c){p(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){p.nodeName(this,"body")||p(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,!0,function(a){(this.nodeType===1||this.nodeType===11)&&this.appendChild(a)})},prepend:function(){return this.domManip(arguments,!0,function(a){(this.nodeType===1||this.nodeType===11)&&this.insertBefore(a,this.firstChild)})},before:function(){if(!bh(this[0]))return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this)});if(arguments.length){var a=p.clean(arguments);return this.pushStack(p.merge(a,this),"before",this.selector)}},after:function(){if(!bh(this[0]))return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this.nextSibling)});if(arguments.length){var a=p.clean(arguments);return this.pushStack(p.merge(this,a),"after",this.selector)}},remove:function(a,b){var c,d=0;for(;(c=this[d])!=null;d++)if(!a||p.filter(a,[c]).length)!b&&c.nodeType===1&&(p.cleanData(c.getElementsByTagName("*")),p.cleanData([c])),c.parentNode&&c.parentNode.removeChild(c);return this},empty:function(){var a,b=0;for(;(a=this[b])!=null;b++){a.nodeType===1&&p.cleanData(a.getElementsByTagName("*"));while(a.firstChild)a.removeChild(a.firstChild)}return this},clone:function(a,b){return a=a==null?!1:a,b=b==null?a:b,this.map(function(){return p.clone(this,a,b)})},html:function(a){return p.access(this,function(a){var c=this[0]||{},d=0,e=this.length;if(a===b)return c.nodeType===1?c.innerHTML.replace(bm,""):b;if(typeof a=="string"&&!bs.test(a)&&(p.support.htmlSerialize||!bu.test(a))&&(p.support.leadingWhitespace||!bn.test(a))&&!bz[(bp.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(bo,"<$1></$2>");try{for(;d<e;d++)c=this[d]||{},c.nodeType===1&&(p.cleanData(c.getElementsByTagName("*")),c.innerHTML=a);c=0}catch(f){}}c&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(a){return bh(this[0])?this.length?this.pushStack(p(p.isFunction(a)?a():a),"replaceWith",a):this:p.isFunction(a)?this.each(function(b){var c=p(this),d=c.html();c.replaceWith(a.call(this,b,d))}):(typeof a!="string"&&(a=p(a).detach()),this.each(function(){var b=this.nextSibling,c=this.parentNode;p(this).remove(),b?p(b).before(a):p(c).append(a)}))},detach:function(a){return this.remove(a,!0)},domManip:function(a,c,d){a=[].concat.apply([],a);var e,f,g,h,i=0,j=a[0],k=[],l=this.length;if(!p.support.checkClone&&l>1&&typeof j=="string"&&bw.test(j))return this.each(function(){p(this).domManip(a,c,d)});if(p.isFunction(j))return this.each(function(e){var f=p(this);a[0]=j.call(this,e,c?f.html():b),f.domManip(a,c,d)});if(this[0]){e=p.buildFragment(a,this,k),g=e.fragment,f=g.firstChild,g.childNodes.length===1&&(g=f);if(f){c=c&&p.nodeName(f,"tr");for(h=e.cacheable||l-1;i<l;i++)d.call(c&&p.nodeName(this[i],"table")?bC(this[i],"tbody"):this[i],i===h?g:p.clone(g,!0,!0))}g=f=null,k.length&&p.each(k,function(a,b){b.src?p.ajax?p.ajax({url:b.src,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0}):p.error("no ajax"):p.globalEval((b.text||b.textContent||b.innerHTML||"").replace(by,"")),b.parentNode&&b.parentNode.removeChild(b)})}return this}}),p.buildFragment=function(a,c,d){var f,g,h,i=a[0];return c=c||e,c=(c[0]||c).ownerDocument||c[0]||c,typeof c.createDocumentFragment=="undefined"&&(c=e),a.length===1&&typeof i=="string"&&i.length<512&&c===e&&i.charAt(0)==="<"&&!bt.test(i)&&(p.support.checkClone||!bw.test(i))&&(p.support.html5Clone||!bu.test(i))&&(g=!0,f=p.fragments[i],h=f!==b),f||(f=c.createDocumentFragment(),p.clean(a,c,f,d),g&&(p.fragments[i]=h&&f)),{fragment:f,cacheable:g}},p.fragments={},p.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){p.fn[a]=function(c){var d,e=0,f=[],g=p(c),h=g.length,i=this.length===1&&this[0].parentNode;if((i==null||i&&i.nodeType===11&&i.childNodes.length===1)&&h===1)return g[b](this[0]),this;for(;e<h;e++)d=(e>0?this.clone(!0):this).get(),p(g[e])[b](d),f=f.concat(d);return this.pushStack(f,a,g.selector)}}),p.extend({clone:function(a,b,c){var d,e,f,g;p.support.html5Clone||p.isXMLDoc(a)||!bu.test("<"+a.nodeName+">")?g=a.cloneNode(!0):(bB.innerHTML=a.outerHTML,bB.removeChild(g=bB.firstChild));if((!p.support.noCloneEvent||!p.support.noCloneChecked)&&(a.nodeType===1||a.nodeType===11)&&!p.isXMLDoc(a)){bE(a,g),d=bF(a),e=bF(g);for(f=0;d[f];++f)e[f]&&bE(d[f],e[f])}if(b){bD(a,g);if(c){d=bF(a),e=bF(g);for(f=0;d[f];++f)bD(d[f],e[f])}}return d=e=null,g},clean:function(a,b,c,d){var f,g,h,i,j,k,l,m,n,o,q,r,s=0,t=[];if(!b||typeof b.createDocumentFragment=="undefined")b=e;for(g=b===e&&bA;(h=a[s])!=null;s++){typeof h=="number"&&(h+="");if(!h)continue;if(typeof h=="string")if(!br.test(h))h=b.createTextNode(h);else{g=g||bk(b),l=l||g.appendChild(b.createElement("div")),h=h.replace(bo,"<$1></$2>"),i=(bp.exec(h)||["",""])[1].toLowerCase(),j=bz[i]||bz._default,k=j[0],l.innerHTML=j[1]+h+j[2];while(k--)l=l.lastChild;if(!p.support.tbody){m=bq.test(h),n=i==="table"&&!m?l.firstChild&&l.firstChild.childNodes:j[1]==="<table>"&&!m?l.childNodes:[];for(f=n.length-1;f>=0;--f)p.nodeName(n[f],"tbody")&&!n[f].childNodes.length&&n[f].parentNode.removeChild(n[f])}!p.support.leadingWhitespace&&bn.test(h)&&l.insertBefore(b.createTextNode(bn.exec(h)[0]),l.firstChild),h=l.childNodes,l=g.lastChild}h.nodeType?t.push(h):t=p.merge(t,h)}l&&(g.removeChild(l),h=l=g=null);if(!p.support.appendChecked)for(s=0;(h=t[s])!=null;s++)p.nodeName(h,"input")?bG(h):typeof h.getElementsByTagName!="undefined"&&p.grep(h.getElementsByTagName("input"),bG);if(c){q=function(a){if(!a.type||bx.test(a.type))return d?d.push(a.parentNode?a.parentNode.removeChild(a):a):c.appendChild(a)};for(s=0;(h=t[s])!=null;s++)if(!p.nodeName(h,"script")||!q(h))c.appendChild(h),typeof h.getElementsByTagName!="undefined"&&(r=p.grep(p.merge([],h.getElementsByTagName("script")),q),t.splice.apply(t,[s+1,0].concat(r)),s+=r.length)}return t},cleanData:function(a,b){var c,d,e,f,g=0,h=p.expando,i=p.cache,j=p.support.deleteExpando,k=p.event.special;for(;(e=a[g])!=null;g++)if(b||p.acceptData(e)){d=e[h],c=d&&i[d];if(c){if(c.events)for(f in c.events)k[f]?p.event.remove(e,f):p.removeEvent(e,f,c.handle);i[d]&&(delete i[d],j?delete e[h]:e.removeAttribute?e.removeAttribute(h):e[h]=null,p.deletedIds.push(d))}}}}),function(){var a,b;p.uaMatch=function(a){a=a.toLowerCase();var b=/(chrome)[ \/]([\w.]+)/.exec(a)||/(webkit)[ \/]([\w.]+)/.exec(a)||/(opera)(?:.*version|)[ \/]([\w.]+)/.exec(a)||/(msie) ([\w.]+)/.exec(a)||a.indexOf("compatible")<0&&/(mozilla)(?:.*? rv:([\w.]+)|)/.exec(a)||[];return{browser:b[1]||"",version:b[2]||"0"}},a=p.uaMatch(g.userAgent),b={},a.browser&&(b[a.browser]=!0,b.version=a.version),b.webkit&&(b.safari=!0),p.browser=b,p.sub=function(){function a(b,c){return new a.fn.init(b,c)}p.extend(!0,a,this),a.superclass=this,a.fn=a.prototype=this(),a.fn.constructor=a,a.sub=this.sub,a.fn.init=function c(c,d){return d&&d instanceof p&&!(d instanceof a)&&(d=a(d)),p.fn.init.call(this,c,d,b)},a.fn.init.prototype=a.fn;var b=a(e);return a}}();var bH,bI,bJ,bK=/alpha\([^)]*\)/i,bL=/opacity=([^)]*)/,bM=/^(top|right|bottom|left)$/,bN=/^margin/,bO=new RegExp("^("+q+")(.*)$","i"),bP=new RegExp("^("+q+")(?!px)[a-z%]+$","i"),bQ=new RegExp("^([-+])=("+q+")","i"),bR={},bS={position:"absolute",visibility:"hidden",display:"block"},bT={letterSpacing:0,fontWeight:400,lineHeight:1},bU=["Top","Right","Bottom","Left"],bV=["Webkit","O","Moz","ms"],bW=p.fn.toggle;p.fn.extend({css:function(a,c){return p.access(this,function(a,c,d){return d!==b?p.style(a,c,d):p.css(a,c)},a,c,arguments.length>1)},show:function(){return bZ(this,!0)},hide:function(){return bZ(this)},toggle:function(a,b){var c=typeof a=="boolean";return p.isFunction(a)&&p.isFunction(b)?bW.apply(this,arguments):this.each(function(){(c?a:bY(this))?p(this).show():p(this).hide()})}}),p.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=bH(a,"opacity");return c===""?"1":c}}}},cssNumber:{fillOpacity:!0,fontWeight:!0,lineHeight:!0,opacity:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":p.support.cssFloat?"cssFloat":"styleFloat"},style:function(a,c,d,e){if(!a||a.nodeType===3||a.nodeType===8||!a.style)return;var f,g,h,i=p.camelCase(c),j=a.style;c=p.cssProps[i]||(p.cssProps[i]=bX(j,i)),h=p.cssHooks[c]||p.cssHooks[i];if(d===b)return h&&"get"in h&&(f=h.get(a,!1,e))!==b?f:j[c];g=typeof d,g==="string"&&(f=bQ.exec(d))&&(d=(f[1]+1)*f[2]+parseFloat(p.css(a,c)),g="number");if(d==null||g==="number"&&isNaN(d))return;g==="number"&&!p.cssNumber[i]&&(d+="px");if(!h||!("set"in h)||(d=h.set(a,d,e))!==b)try{j[c]=d}catch(k){}},css:function(a,c,d,e){var f,g,h,i=p.camelCase(c);return c=p.cssProps[i]||(p.cssProps[i]=bX(a.style,i)),h=p.cssHooks[c]||p.cssHooks[i],h&&"get"in h&&(f=h.get(a,!0,e)),f===b&&(f=bH(a,c)),f==="normal"&&c in bT&&(f=bT[c]),d||e!==b?(g=parseFloat(f),d||p.isNumeric(g)?g||0:f):f},swap:function(a,b,c){var d,e,f={};for(e in b)f[e]=a.style[e],a.style[e]=b[e];d=c.call(a);for(e in b)a.style[e]=f[e];return d}}),a.getComputedStyle?bH=function(a,b){var c,d,e,f,g=getComputedStyle(a,null),h=a.style;return g&&(c=g[b],c===""&&!p.contains(a.ownerDocument.documentElement,a)&&(c=p.style(a,b)),bP.test(c)&&bN.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=c,c=g.width,h.width=d,h.minWidth=e,h.maxWidth=f)),c}:e.documentElement.currentStyle&&(bH=function(a,b){var c,d,e=a.currentStyle&&a.currentStyle[b],f=a.style;return e==null&&f&&f[b]&&(e=f[b]),bP.test(e)&&!bM.test(b)&&(c=f.left,d=a.runtimeStyle&&a.runtimeStyle.left,d&&(a.runtimeStyle.left=a.currentStyle.left),f.left=b==="fontSize"?"1em":e,e=f.pixelLeft+"px",f.left=c,d&&(a.runtimeStyle.left=d)),e===""?"auto":e}),p.each(["height","width"],function(a,b){p.cssHooks[b]={get:function(a,c,d){if(c)return a.offsetWidth!==0||bH(a,"display")!=="none"?ca(a,b,d):p.swap(a,bS,function(){return ca(a,b,d)})},set:function(a,c,d){return b$(a,c,d?b_(a,b,d,p.support.boxSizing&&p.css(a,"boxSizing")==="border-box"):0)}}}),p.support.opacity||(p.cssHooks.opacity={get:function(a,b){return bL.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?.01*parseFloat(RegExp.$1)+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=p.isNumeric(b)?"alpha(opacity="+b*100+")":"",f=d&&d.filter||c.filter||"";c.zoom=1;if(b>=1&&p.trim(f.replace(bK,""))===""&&c.removeAttribute){c.removeAttribute("filter");if(d&&!d.filter)return}c.filter=bK.test(f)?f.replace(bK,e):f+" "+e}}),p(function(){p.support.reliableMarginRight||(p.cssHooks.marginRight={get:function(a,b){return p.swap(a,{display:"inline-block"},function(){if(b)return bH(a,"marginRight")})}}),!p.support.pixelPosition&&p.fn.position&&p.each(["top","left"],function(a,b){p.cssHooks[b]={get:function(a,c){if(c){var d=bH(a,b);return bP.test(d)?p(a).position()[b]+"px":d}}}})}),p.expr&&p.expr.filters&&(p.expr.filters.hidden=function(a){return a.offsetWidth===0&&a.offsetHeight===0||!p.support.reliableHiddenOffsets&&(a.style&&a.style.display||bH(a,"display"))==="none"},p.expr.filters.visible=function(a){return!p.expr.filters.hidden(a)}),p.each({margin:"",padding:"",border:"Width"},function(a,b){p.cssHooks[a+b]={expand:function(c){var d,e=typeof c=="string"?c.split(" "):[c],f={};for(d=0;d<4;d++)f[a+bU[d]+b]=e[d]||e[d-2]||e[0];return f}},bN.test(a)||(p.cssHooks[a+b].set=b$)});var cc=/%20/g,cd=/\[\]$/,ce=/\r?\n/g,cf=/^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,cg=/^(?:select|textarea)/i;p.fn.extend({serialize:function(){return p.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?p.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||cg.test(this.nodeName)||cf.test(this.type))}).map(function(a,b){var c=p(this).val();return c==null?null:p.isArray(c)?p.map(c,function(a,c){return{name:b.name,value:a.replace(ce,"\r\n")}}):{name:b.name,value:c.replace(ce,"\r\n")}}).get()}}),p.param=function(a,c){var d,e=[],f=function(a,b){b=p.isFunction(b)?b():b==null?"":b,e[e.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};c===b&&(c=p.ajaxSettings&&p.ajaxSettings.traditional);if(p.isArray(a)||a.jquery&&!p.isPlainObject(a))p.each(a,function(){f(this.name,this.value)});else for(d in a)ch(d,a[d],c,f);return e.join("&").replace(cc,"+")};var ci,cj,ck=/#.*$/,cl=/^(.*?):[ \t]*([^\r\n]*)\r?$/mg,cm=/^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,cn=/^(?:GET|HEAD)$/,co=/^\/\//,cp=/\?/,cq=/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,cr=/([?&])_=[^&]*/,cs=/^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+)|)|)/,ct=p.fn.load,cu={},cv={},cw=["*/"]+["*"];try{ci=f.href}catch(cx){ci=e.createElement("a"),ci.href="",ci=ci.href}cj=cs.exec(ci.toLowerCase())||[],p.fn.load=function(a,c,d){if(typeof a!="string"&&ct)return ct.apply(this,arguments);if(!this.length)return this;var e,f,g,h=this,i=a.indexOf(" ");return i>=0&&(e=a.slice(i,a.length),a=a.slice(0,i)),p.isFunction(c)?(d=c,c=b):typeof c=="object"&&(f="POST"),p.ajax({url:a,type:f,dataType:"html",data:c,complete:function(a,b){d&&h.each(d,g||[a.responseText,b,a])}}).done(function(a){g=arguments,h.html(e?p("<div>").append(a.replace(cq,"")).find(e):a)}),this},p.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(a,b){p.fn[b]=function(a){return this.on(b,a)}}),p.each(["get","post"],function(a,c){p[c]=function(a,d,e,f){return p.isFunction(d)&&(f=f||e,e=d,d=b),p.ajax({type:c,url:a,data:d,success:e,dataType:f})}}),p.extend({getScript:function(a,c){return p.get(a,b,c,"script")},getJSON:function(a,b,c){return p.get(a,b,c,"json")},ajaxSetup:function(a,b){return b?cA(a,p.ajaxSettings):(b=a,a=p.ajaxSettings),cA(a,b),a},ajaxSettings:{url:ci,isLocal:cm.test(cj[1]),global:!0,type:"GET",contentType:"application/x-www-form-urlencoded; charset=UTF-8",processData:!0,async:!0,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":cw},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":a.String,"text html":!0,"text json":p.parseJSON,"text xml":p.parseXML},flatOptions:{context:!0,url:!0}},ajaxPrefilter:cy(cu),ajaxTransport:cy(cv),ajax:function(a,c){function y(a,c,f,i){var k,s,t,u,w,y=c;if(v===2)return;v=2,h&&clearTimeout(h),g=b,e=i||"",x.readyState=a>0?4:0,f&&(u=cB(l,x,f));if(a>=200&&a<300||a===304)l.ifModified&&(w=x.getResponseHeader("Last-Modified"),w&&(p.lastModified[d]=w),w=x.getResponseHeader("Etag"),w&&(p.etag[d]=w)),a===304?(y="notmodified",k=!0):(k=cC(l,u),y=k.state,s=k.data,t=k.error,k=!t);else{t=y;if(!y||a)y="error",a<0&&(a=0)}x.status=a,x.statusText=""+(c||y),k?o.resolveWith(m,[s,y,x]):o.rejectWith(m,[x,y,t]),x.statusCode(r),r=b,j&&n.trigger("ajax"+(k?"Success":"Error"),[x,l,k?s:t]),q.fireWith(m,[x,y]),j&&(n.trigger("ajaxComplete",[x,l]),--p.active||p.event.trigger("ajaxStop"))}typeof a=="object"&&(c=a,a=b),c=c||{};var d,e,f,g,h,i,j,k,l=p.ajaxSetup({},c),m=l.context||l,n=m!==l&&(m.nodeType||m instanceof p)?p(m):p.event,o=p.Deferred(),q=p.Callbacks("once memory"),r=l.statusCode||{},t={},u={},v=0,w="canceled",x={readyState:0,setRequestHeader:function(a,b){if(!v){var c=a.toLowerCase();a=u[c]=u[c]||a,t[a]=b}return this},getAllResponseHeaders:function(){return v===2?e:null},getResponseHeader:function(a){var c;if(v===2){if(!f){f={};while(c=cl.exec(e))f[c[1].toLowerCase()]=c[2]}c=f[a.toLowerCase()]}return c===b?null:c},overrideMimeType:function(a){return v||(l.mimeType=a),this},abort:function(a){return a=a||w,g&&g.abort(a),y(0,a),this}};o.promise(x),x.success=x.done,x.error=x.fail,x.complete=q.add,x.statusCode=function(a){if(a){var b;if(v<2)for(b in a)r[b]=[r[b],a[b]];else b=a[x.status],x.always(b)}return this},l.url=((a||l.url)+"").replace(ck,"").replace(co,cj[1]+"//"),l.dataTypes=p.trim(l.dataType||"*").toLowerCase().split(s),l.crossDomain==null&&(i=cs.exec(l.url.toLowerCase()),l.crossDomain=!(!i||i[1]==cj[1]&&i[2]==cj[2]&&(i[3]||(i[1]==="http:"?80:443))==(cj[3]||(cj[1]==="http:"?80:443)))),l.data&&l.processData&&typeof l.data!="string"&&(l.data=p.param(l.data,l.traditional)),cz(cu,l,c,x);if(v===2)return x;j=l.global,l.type=l.type.toUpperCase(),l.hasContent=!cn.test(l.type),j&&p.active++===0&&p.event.trigger("ajaxStart");if(!l.hasContent){l.data&&(l.url+=(cp.test(l.url)?"&":"?")+l.data,delete l.data),d=l.url;if(l.cache===!1){var z=p.now(),A=l.url.replace(cr,"$1_="+z);l.url=A+(A===l.url?(cp.test(l.url)?"&":"?")+"_="+z:"")}}(l.data&&l.hasContent&&l.contentType!==!1||c.contentType)&&x.setRequestHeader("Content-Type",l.contentType),l.ifModified&&(d=d||l.url,p.lastModified[d]&&x.setRequestHeader("If-Modified-Since",p.lastModified[d]),p.etag[d]&&x.setRequestHeader("If-None-Match",p.etag[d])),x.setRequestHeader("Accept",l.dataTypes[0]&&l.accepts[l.dataTypes[0]]?l.accepts[l.dataTypes[0]]+(l.dataTypes[0]!=="*"?", "+cw+"; q=0.01":""):l.accepts["*"]);for(k in l.headers)x.setRequestHeader(k,l.headers[k]);if(!l.beforeSend||l.beforeSend.call(m,x,l)!==!1&&v!==2){w="abort";for(k in{success:1,error:1,complete:1})x[k](l[k]);g=cz(cv,l,c,x);if(!g)y(-1,"No Transport");else{x.readyState=1,j&&n.trigger("ajaxSend",[x,l]),l.async&&l.timeout>0&&(h=setTimeout(function(){x.abort("timeout")},l.timeout));try{v=1,g.send(t,y)}catch(B){if(v<2)y(-1,B);else throw B}}return x}return x.abort()},active:0,lastModified:{},etag:{}});var cD=[],cE=/\?/,cF=/(=)\?(?=&|$)|\?\?/,cG=p.now();p.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=cD.pop()||p.expando+"_"+cG++;return this[a]=!0,a}}),p.ajaxPrefilter("json jsonp",function(c,d,e){var f,g,h,i=c.data,j=c.url,k=c.jsonp!==!1,l=k&&cF.test(j),m=k&&!l&&typeof i=="string"&&!(c.contentType||"").indexOf("application/x-www-form-urlencoded")&&cF.test(i);if(c.dataTypes[0]==="jsonp"||l||m)return f=c.jsonpCallback=p.isFunction(c.jsonpCallback)?c.jsonpCallback():c.jsonpCallback,g=a[f],l?c.url=j.replace(cF,"$1"+f):m?c.data=i.replace(cF,"$1"+f):k&&(c.url+=(cE.test(j)?"&":"?")+c.jsonp+"="+f),c.converters["script json"]=function(){return h||p.error(f+" was not called"),h[0]},c.dataTypes[0]="json",a[f]=function(){h=arguments},e.always(function(){a[f]=g,c[f]&&(c.jsonpCallback=d.jsonpCallback,cD.push(f)),h&&p.isFunction(g)&&g(h[0]),h=g=b}),"script"}),p.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/javascript|ecmascript/},converters:{"text script":function(a){return p.globalEval(a),a}}}),p.ajaxPrefilter("script",function(a){a.cache===b&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),p.ajaxTransport("script",function(a){if(a.crossDomain){var c,d=e.head||e.getElementsByTagName("head")[0]||e.documentElement;return{send:function(f,g){c=e.createElement("script"),c.async="async",a.scriptCharset&&(c.charset=a.scriptCharset),c.src=a.url,c.onload=c.onreadystatechange=function(a,e){if(e||!c.readyState||/loaded|complete/.test(c.readyState))c.onload=c.onreadystatechange=null,d&&c.parentNode&&d.removeChild(c),c=b,e||g(200,"success")},d.insertBefore(c,d.firstChild)},abort:function(){c&&c.onload(0,1)}}}});var cH,cI=a.ActiveXObject?function(){for(var a in cH)cH[a](0,1)}:!1,cJ=0;p.ajaxSettings.xhr=a.ActiveXObject?function(){return!this.isLocal&&cK()||cL()}:cK,function(a){p.extend(p.support,{ajax:!!a,cors:!!a&&"withCredentials"in a})}(p.ajaxSettings.xhr()),p.support.ajax&&p.ajaxTransport(function(c){if(!c.crossDomain||p.support.cors){var d;return{send:function(e,f){var g,h,i=c.xhr();c.username?i.open(c.type,c.url,c.async,c.username,c.password):i.open(c.type,c.url,c.async);if(c.xhrFields)for(h in c.xhrFields)i[h]=c.xhrFields[h];c.mimeType&&i.overrideMimeType&&i.overrideMimeType(c.mimeType),!c.crossDomain&&!e["X-Requested-With"]&&(e["X-Requested-With"]="XMLHttpRequest");try{for(h in e)i.setRequestHeader(h,e[h])}catch(j){}i.send(c.hasContent&&c.data||null),d=function(a,e){var h,j,k,l,m;try{if(d&&(e||i.readyState===4)){d=b,g&&(i.onreadystatechange=p.noop,cI&&delete cH[g]);if(e)i.readyState!==4&&i.abort();else{h=i.status,k=i.getAllResponseHeaders(),l={},m=i.responseXML,m&&m.documentElement&&(l.xml=m);try{l.text=i.responseText}catch(a){}try{j=i.statusText}catch(n){j=""}!h&&c.isLocal&&!c.crossDomain?h=l.text?200:404:h===1223&&(h=204)}}}catch(o){e||f(-1,o)}l&&f(h,j,l,k)},c.async?i.readyState===4?setTimeout(d,0):(g=++cJ,cI&&(cH||(cH={},p(a).unload(cI)),cH[g]=d),i.onreadystatechange=d):d()},abort:function(){d&&d(0,1)}}}});var cM,cN,cO=/^(?:toggle|show|hide)$/,cP=new RegExp("^(?:([-+])=|)("+q+")([a-z%]*)$","i"),cQ=/queueHooks$/,cR=[cX],cS={"*":[function(a,b){var c,d,e,f=this.createTween(a,b),g=cP.exec(b),h=f.cur(),i=+h||0,j=1;if(g){c=+g[2],d=g[3]||(p.cssNumber[a]?"":"px");if(d!=="px"&&i){i=p.css(f.elem,a,!0)||c||1;do e=j=j||".5",i=i/j,p.style(f.elem,a,i+d),j=f.cur()/h;while(j!==1&&j!==e)}f.unit=d,f.start=i,f.end=g[1]?i+(g[1]+1)*c:c}return f}]};p.Animation=p.extend(cV,{tweener:function(a,b){p.isFunction(a)?(b=a,a=["*"]):a=a.split(" ");var c,d=0,e=a.length;for(;d<e;d++)c=a[d],cS[c]=cS[c]||[],cS[c].unshift(b)},prefilter:function(a,b){b?cR.unshift(a):cR.push(a)}}),p.Tween=cY,cY.prototype={constructor:cY,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||"swing",this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(p.cssNumber[c]?"":"px")},cur:function(){var a=cY.propHooks[this.prop];return a&&a.get?a.get(this):cY.propHooks._default.get(this)},run:function(a){var b,c=cY.propHooks[this.prop];return this.pos=b=p.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration),this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):cY.propHooks._default.set(this),this}},cY.prototype.init.prototype=cY.prototype,cY.propHooks={_default:{get:function(a){var b;return a.elem[a.prop]==null||!!a.elem.style&&a.elem.style[a.prop]!=null?(b=p.css(a.elem,a.prop,!1,""),!b||b==="auto"?0:b):a.elem[a.prop]},set:function(a){p.fx.step[a.prop]?p.fx.step[a.prop](a):a.elem.style&&(a.elem.style[p.cssProps[a.prop]]!=null||p.cssHooks[a.prop])?p.style(a.elem,a.prop,a.now+a.unit):a.elem[a.prop]=a.now}}},cY.propHooks.scrollTop=cY.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},p.each(["toggle","show","hide"],function(a,b){var c=p.fn[b];p.fn[b]=function(d,e,f){return d==null||typeof d=="boolean"||!a&&p.isFunction(d)&&p.isFunction(e)?c.apply(this,arguments):this.animate(cZ(b,!0),d,e,f)}}),p.fn.extend({fadeTo:function(a,b,c,d){return this.filter(bY).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=p.isEmptyObject(a),f=p.speed(b,c,d),g=function(){var b=cV(this,p.extend({},a),f);e&&b.stop(!0)};return e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,c,d){var e=function(a){var b=a.stop;delete a.stop,b(d)};return typeof a!="string"&&(d=c,c=a,a=b),c&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,c=a!=null&&a+"queueHooks",f=p.timers,g=p._data(this);if(c)g[c]&&g[c].stop&&e(g[c]);else for(c in g)g[c]&&g[c].stop&&cQ.test(c)&&e(g[c]);for(c=f.length;c--;)f[c].elem===this&&(a==null||f[c].queue===a)&&(f[c].anim.stop(d),b=!1,f.splice(c,1));(b||!d)&&p.dequeue(this,a)})}}),p.each({slideDown:cZ("show"),slideUp:cZ("hide"),slideToggle:cZ("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){p.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),p.speed=function(a,b,c){var d=a&&typeof a=="object"?p.extend({},a):{complete:c||!c&&b||p.isFunction(a)&&a,duration:a,easing:c&&b||b&&!p.isFunction(b)&&b};d.duration=p.fx.off?0:typeof d.duration=="number"?d.duration:d.duration in p.fx.speeds?p.fx.speeds[d.duration]:p.fx.speeds._default;if(d.queue==null||d.queue===!0)d.queue="fx";return d.old=d.complete,d.complete=function(){p.isFunction(d.old)&&d.old.call(this),d.queue&&p.dequeue(this,d.queue)},d},p.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2}},p.timers=[],p.fx=cY.prototype.init,p.fx.tick=function(){var a,b=p.timers,c=0;for(;c<b.length;c++)a=b[c],!a()&&b[c]===a&&b.splice(c--,1);b.length||p.fx.stop()},p.fx.timer=function(a){a()&&p.timers.push(a)&&!cN&&(cN=setInterval(p.fx.tick,p.fx.interval))},p.fx.interval=13,p.fx.stop=function(){clearInterval(cN),cN=null},p.fx.speeds={slow:600,fast:200,_default:400},p.fx.step={},p.expr&&p.expr.filters&&(p.expr.filters.animated=function(a){return p.grep(p.timers,function(b){return a===b.elem}).length});var c$=/^(?:body|html)$/i;p.fn.offset=function(a){if(arguments.length)return a===b?this:this.each(function(b){p.offset.setOffset(this,a,b)});var c,d,e,f,g,h,i,j,k,l,m=this[0],n=m&&m.ownerDocument;if(!n)return;return(e=n.body)===m?p.offset.bodyOffset(m):(d=n.documentElement,p.contains(d,m)?(c=m.getBoundingClientRect(),f=c_(n),g=d.clientTop||e.clientTop||0,h=d.clientLeft||e.clientLeft||0,i=f.pageYOffset||d.scrollTop,j=f.pageXOffset||d.scrollLeft,k=c.top+i-g,l=c.left+j-h,{top:k,left:l}):{top:0,left:0})},p.offset={bodyOffset:function(a){var b=a.offsetTop,c=a.offsetLeft;return p.support.doesNotIncludeMarginInBodyOffset&&(b+=parseFloat(p.css(a,"marginTop"))||0,c+=parseFloat(p.css(a,"marginLeft"))||0),{top:b,left:c}},setOffset:function(a,b,c){var d=p.css(a,"position");d==="static"&&(a.style.position="relative");var e=p(a),f=e.offset(),g=p.css(a,"top"),h=p.css(a,"left"),i=(d==="absolute"||d==="fixed")&&p.inArray("auto",[g,h])>-1,j={},k={},l,m;i?(k=e.position(),l=k.top,m=k.left):(l=parseFloat(g)||0,m=parseFloat(h)||0),p.isFunction(b)&&(b=b.call(a,c,f)),b.top!=null&&(j.top=b.top-f.top+l),b.left!=null&&(j.left=b.left-f.left+m),"using"in b?b.using.call(a,j):e.css(j)}},p.fn.extend({position:function(){if(!this[0])return;var a=this[0],b=this.offsetParent(),c=this.offset(),d=c$.test(b[0].nodeName)?{top:0,left:0}:b.offset();return c.top-=parseFloat(p.css(a,"marginTop"))||0,c.left-=parseFloat(p.css(a,"marginLeft"))||0,d.top+=parseFloat(p.css(b[0],"borderTopWidth"))||0,d.left+=parseFloat(p.css(b[0],"borderLeftWidth"))||0,{top:c.top-d.top,left:c.left-d.left}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||e.body;while(a&&!c$.test(a.nodeName)&&p.css(a,"position")==="static")a=a.offsetParent;return a||e.body})}}),p.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,c){var d=/Y/.test(c);p.fn[a]=function(e){return p.access(this,function(a,e,f){var g=c_(a);if(f===b)return g?c in g?g[c]:g.document.documentElement[e]:a[e];g?g.scrollTo(d?p(g).scrollLeft():f,d?f:p(g).scrollTop()):a[e]=f},a,e,arguments.length,null)}}),p.each({Height:"height",Width:"width"},function(a,c){p.each({padding:"inner"+a,content:c,"":"outer"+a},function(d,e){p.fn[e]=function(e,f){var g=arguments.length&&(d||typeof e!="boolean"),h=d||(e===!0||f===!0?"margin":"border");return p.access(this,function(c,d,e){var f;return p.isWindow(c)?c.document.documentElement["client"+a]:c.nodeType===9?(f=c.documentElement,Math.max(c.body["scroll"+a],f["scroll"+a],c.body["offset"+a],f["offset"+a],f["client"+a])):e===b?p.css(c,d,e,h):p.style(c,d,e,h)},c,g?e:b,g)}})}),a.jQuery=a.$=p,typeof define=="function"&&define.amd&&define.amd.jQuery&&define("jquery",[],function(){return p})})(window);
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-mustache.txt b/licenses-binary/LICENSE-mustache.txt
new file mode 100644
index 000000000000..038cbb95d1cd
--- /dev/null
+++ b/licenses-binary/LICENSE-mustache.txt
@@ -0,0 +1,11 @@
+The MIT License
+
+Copyright (c) 2009 Chris Wanstrath (Ruby)
+Copyright (c) 2010-2014 Jan Lehnardt (JavaScript)
+Copyright (c) 2010-2015 The mustache.js community
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/licenses/LICENSE-mustache.txt b/licenses/LICENSE-mustache.txt
new file mode 100644
index 000000000000..038cbb95d1cd
--- /dev/null
+++ b/licenses/LICENSE-mustache.txt
@@ -0,0 +1,11 @@
+The MIT License
+
+Copyright (c) 2009 Chris Wanstrath (Ruby)
+Copyright (c) 2010-2014 Jan Lehnardt (JavaScript)
+Copyright (c) 2010-2015 The mustache.js community
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file

From 982c4c8e3cfc25822e0d755d8d1daa324e6399b8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 5 Apr 2019 11:50:37 -0700
Subject: [PATCH 0855/1072] [SPARK-27390][CORE][SQL][TEST] Fix package name
 mismatch

## What changes were proposed in this pull request?

This PR aims to clean up package name mismatches.

## How was this patch tested?

Pass the Jenkins.

Closes #24300 from dongjoon-hyun/SPARK-27390.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala  | 3 +--
 .../org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala | 4 +---
 .../spark/sql/execution/benchmark/DateTimeBenchmark.scala     | 3 +--
 .../sql/execution/datasources/orc/OrcSchemaPruningSuite.scala | 2 +-
 .../streaming/continuous/shuffle/ContinuousShuffleSuite.scala | 4 ++--
 5 files changed, 6 insertions(+), 10 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/{ => execution}/streaming/continuous/shuffle/ContinuousShuffleSuite.scala (99%)

diff --git a/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala b/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
index e48f0014fbbd..06c2ceb68bd7 100644
--- a/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.utils
+package org.apache.spark.util
 
 import org.apache.hadoop.fs.Path
 
@@ -23,7 +23,6 @@ import org.apache.spark.{SharedSparkContext, SparkContext, SparkFunSuite}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.util.PeriodicRDDCheckpointer
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.Utils
 
 
 class PeriodicRDDCheckpointerSuite extends SparkFunSuite with SharedSparkContext {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
index 48a3ca2ccfb0..388eb238db0c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
@@ -15,12 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
+package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 
 class DSLHintSuite extends AnalysisTest {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index 17bdd218dc17..d3fe69717b34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -15,9 +15,8 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
+package org.apache.spark.sql.execution.benchmark
 
-import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.internal.SQLConf
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala
index 5dade6f8ca14..2623bf943368 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.datasources.parquet
+package org.apache.spark.sql.execution.datasources.orc
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/shuffle/ContinuousShuffleSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala
similarity index 99%
rename from sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/shuffle/ContinuousShuffleSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala
index b42f8267916b..54ec4a8352c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/shuffle/ContinuousShuffleSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala
@@ -21,9 +21,9 @@ import java.util.UUID
 
 import scala.language.implicitConversions
 
-import org.apache.spark.{HashPartitioner, Partition, TaskContext, TaskContextImpl}
+import org.apache.spark.{HashPartitioner, TaskContext, TaskContextImpl}
 import org.apache.spark.rpc.RpcEndpointRef
-import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection}
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
 import org.apache.spark.unsafe.types.UTF8String

From 39f75b45889951b175c48b2e849e920d0a0eee6e Mon Sep 17 00:00:00 2001
From: liulijia <liutang123@yeah.net>
Date: Fri, 5 Apr 2019 13:55:57 -0500
Subject: [PATCH 0856/1072] [SPARK-27192][CORE] spark.task.cpus should be less
 or equal than spark.executor.cores

## What changes were proposed in this pull request?
check spark.task.cpus before creating TaskScheduler in SparkContext

## How was this patch tested?
UT

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24261 from liutang123/SPARK-27192.

Authored-by: liulijia <liutang123@yeah.net>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ExecutorAllocationManager.scala     |  3 ---
 .../scala/org/apache/spark/SparkConf.scala    | 10 --------
 .../scala/org/apache/spark/SparkContext.scala | 24 +++++++++++++++++++
 .../org/apache/spark/SparkConfSuite.scala     |  7 ------
 .../org/apache/spark/SparkContextSuite.scala  | 21 ++++++++++++++++
 .../scheduler/TaskSchedulerImplSuite.scala    | 22 +++++++++++++----
 6 files changed, 62 insertions(+), 25 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 60d04046c7b2..6fade10b7a3c 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -222,9 +222,6 @@ private[spark] class ExecutorAllocationManager(
       throw new SparkException("Dynamic allocation of executors requires the external " +
         "shuffle service. You may enable this through spark.shuffle.service.enabled.")
     }
-    if (tasksPerExecutorForFullParallelism == 0) {
-      throw new SparkException(s"${EXECUTOR_CORES.key} must not be < ${CPUS_PER_TASK.key}.")
-    }
 
     if (executorAllocationRatio > 1.0 || executorAllocationRatio <= 0.0) {
       throw new SparkException(
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 4117aea59b7e..913a1704ad5c 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -575,16 +575,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
-    if (contains(EXECUTOR_CORES) && contains(CPUS_PER_TASK)) {
-      val executorCores = get(EXECUTOR_CORES)
-      val taskCpus = get(CPUS_PER_TASK)
-
-      if (executorCores < taskCpus) {
-        throw new SparkException(
-          s"${EXECUTOR_CORES.key} must not be less than ${CPUS_PER_TASK.key}.")
-      }
-    }
-
     val encryptionEnabled = get(NETWORK_CRYPTO_ENABLED) || get(SASL_ENCRYPTION_ENABLED)
     require(!encryptionEnabled || get(NETWORK_AUTH_ENABLED),
       s"${NETWORK_AUTH_ENABLED.key} must be enabled when enabling encryption.")
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 4abb18d4aaa7..8b744356daae 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2665,8 +2665,27 @@ object SparkContext extends Logging {
     // When running locally, don't try to re-execute tasks on failure.
     val MAX_LOCAL_TASK_FAILURES = 1
 
+    // SPARK-26340: Ensure that executor's core num meets at least one task requirement.
+    def checkCpusPerTask(
+      clusterMode: Boolean,
+      maxCoresPerExecutor: Option[Int]): Unit = {
+      val cpusPerTask = sc.conf.get(CPUS_PER_TASK)
+      if (clusterMode && sc.conf.contains(EXECUTOR_CORES)) {
+        if (sc.conf.get(EXECUTOR_CORES) < cpusPerTask) {
+          throw new SparkException(s"${CPUS_PER_TASK.key}" +
+            s" must be <= ${EXECUTOR_CORES.key} when run on $master.")
+        }
+      } else if (maxCoresPerExecutor.isDefined) {
+        if (maxCoresPerExecutor.get < cpusPerTask) {
+          throw new SparkException(s"Only ${maxCoresPerExecutor.get} cores available per executor" +
+            s" when run on $master, and ${CPUS_PER_TASK.key} must be <= it.")
+        }
+      }
+    }
+
     master match {
       case "local" =>
+        checkCpusPerTask(clusterMode = false, Some(1))
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, 1)
         scheduler.initialize(backend)
@@ -2679,6 +2698,7 @@ object SparkContext extends Logging {
         if (threadCount <= 0) {
           throw new SparkException(s"Asked to run locally with $threadCount threads")
         }
+        checkCpusPerTask(clusterMode = false, Some(threadCount))
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
         scheduler.initialize(backend)
@@ -2689,12 +2709,14 @@ object SparkContext extends Logging {
         // local[*, M] means the number of cores on the computer with M failures
         // local[N, M] means exactly N threads with M failures
         val threadCount = if (threads == "*") localCpuCount else threads.toInt
+        checkCpusPerTask(clusterMode = false, Some(threadCount))
         val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
         scheduler.initialize(backend)
         (backend, scheduler)
 
       case SPARK_REGEX(sparkUrl) =>
+        checkCpusPerTask(clusterMode = true, None)
         val scheduler = new TaskSchedulerImpl(sc)
         val masterUrls = sparkUrl.split(",").map("spark://" + _)
         val backend = new StandaloneSchedulerBackend(scheduler, sc, masterUrls)
@@ -2702,6 +2724,7 @@ object SparkContext extends Logging {
         (backend, scheduler)
 
       case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
+        checkCpusPerTask(clusterMode = true, Some(coresPerSlave.toInt))
         // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
         val memoryPerSlaveInt = memoryPerSlave.toInt
         if (sc.executorMemory > memoryPerSlaveInt) {
@@ -2722,6 +2745,7 @@ object SparkContext extends Logging {
         (backend, scheduler)
 
       case masterUrl =>
+        checkCpusPerTask(clusterMode = true, None)
         val cm = getClusterManager(masterUrl) match {
           case Some(clusterMgr) => clusterMgr
           case None => throw new SparkException("Could not parse Master URL: '" + master + "'")
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 9f759a5bf74b..0795790b2ddf 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -140,13 +140,6 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     assert(sc.appName === "My other app")
   }
 
-  test("creating SparkContext with cpus per tasks bigger than cores per executors") {
-    val conf = new SparkConf(false)
-      .set(EXECUTOR_CORES, 1)
-      .set(CPUS_PER_TASK, 2)
-    intercept[SparkException] { sc = new SparkContext(conf) }
-  }
-
   test("nested property names") {
     // This wasn't supported by some external conf parsing libraries
     System.setProperty("spark.test.a", "a")
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 7a16f7b715e6..3490eaf550ce 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -710,6 +710,27 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       assert(runningTaskIds.isEmpty)
     }
   }
+
+  test(s"Avoid setting ${CPUS_PER_TASK.key} unreasonably (SPARK-27192)") {
+    val FAIL_REASON = s"${CPUS_PER_TASK.key} must be <="
+    Seq(
+      ("local", 2, None),
+      ("local[2]", 3, None),
+      ("local[2, 1]", 3, None),
+      ("spark://test-spark-cluster", 2, Option(1)),
+      ("local-cluster[1, 1, 1000]", 2, Option(1)),
+      ("yarn", 2, Option(1))
+    ).foreach { case (master, cpusPerTask, executorCores) =>
+      val conf = new SparkConf()
+      conf.set(CPUS_PER_TASK, cpusPerTask)
+      executorCores.map(executorCores => conf.set(EXECUTOR_CORES, executorCores))
+      val ex = intercept[SparkException] {
+        sc = new SparkContext(master, "test", conf)
+      }
+      assert(ex.getMessage.contains(FAIL_REASON))
+      resetSparkContext()
+    }
+  }
 }
 
 object SparkContextSuite {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 1a81f556e061..115b203df1e4 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -77,7 +77,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
   }
 
   def setupScheduler(confs: (String, String)*): TaskSchedulerImpl = {
-    val conf = new SparkConf().setMaster("local").setAppName("TaskSchedulerImplSuite")
+    setupSchedulerWithMaster("local", confs: _*)
+  }
+
+  def setupSchedulerWithMaster(master: String, confs: (String, String)*): TaskSchedulerImpl = {
+    val conf = new SparkConf().setMaster(master).setAppName("TaskSchedulerImplSuite")
     confs.foreach { case (k, v) => conf.set(k, v) }
     sc = new SparkContext(conf)
     taskScheduler = new TaskSchedulerImpl(sc)
@@ -155,7 +159,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("Scheduler correctly accounts for multiple CPUs per task") {
     val taskCpus = 2
-    val taskScheduler = setupScheduler(config.CPUS_PER_TASK.key -> taskCpus.toString)
+    val taskScheduler = setupSchedulerWithMaster(
+      s"local[$taskCpus]",
+      config.CPUS_PER_TASK.key -> taskCpus.toString)
     // Give zero core offers. Should not generate any tasks
     val zeroCoreWorkerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", 0),
       new WorkerOffer("executor1", "host1", 0))
@@ -185,7 +191,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("Scheduler does not crash when tasks are not serializable") {
     val taskCpus = 2
-    val taskScheduler = setupScheduler(config.CPUS_PER_TASK.key -> taskCpus.toString)
+    val taskScheduler = setupSchedulerWithMaster(
+      s"local[$taskCpus]",
+      config.CPUS_PER_TASK.key -> taskCpus.toString)
     val numFreeCores = 1
     val taskSet = new TaskSet(
       Array(new NotSerializableFakeTask(1, 0), new NotSerializableFakeTask(0, 1)), 0, 0, 0, null)
@@ -1241,7 +1249,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("don't schedule for a barrier taskSet if available slots are less than pending tasks") {
     val taskCpus = 2
-    val taskScheduler = setupScheduler(config.CPUS_PER_TASK.key -> taskCpus.toString)
+    val taskScheduler = setupSchedulerWithMaster(
+      s"local[$taskCpus]",
+      config.CPUS_PER_TASK.key -> taskCpus.toString)
 
     val numFreeCores = 3
     val workerOffers = IndexedSeq(
@@ -1258,7 +1268,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("schedule tasks for a barrier taskSet if all tasks can be launched together") {
     val taskCpus = 2
-    val taskScheduler = setupScheduler(config.CPUS_PER_TASK.key -> taskCpus.toString)
+    val taskScheduler = setupSchedulerWithMaster(
+      s"local[$taskCpus]",
+      config.CPUS_PER_TASK.key -> taskCpus.toString)
 
     val numFreeCores = 3
     val workerOffers = IndexedSeq(

From 979bb905b77b95bc26dff33b7884b38d8dd869f5 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Fri, 5 Apr 2019 14:02:46 -0500
Subject: [PATCH 0857/1072] [SPARK-26936][SQL] Fix bug of insert overwrite
 local dir can not create temporary path in local staging directory

## What changes were proposed in this pull request?
Th environment of my cluster as follows:
```
OS:Linux version 2.6.32-220.7.1.el6.x86_64 (mockbuildc6b18n3.bsys.dev.centos.org) (gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC) ) #1 SMP Wed Mar 7 00:52:02 GMT 2012
Hadoop: 2.7.2
Spark: 2.3.0 or 3.0.0(master branch)
Hive: 1.2.1
```

My spark run on deploy mode yarn-client.

If I execute the SQL `insert overwrite local directory '/home/test/call_center/' select * from call_center`, a HiveException will appear as follows:
`Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: Mkdirs failed to create file:/home/xitong/hive/stagingdir_hive_2019-02-19_17-31-00_678_1816816774691551856-1/-ext-10000/_temporary/0/_temporary/attempt_20190219173233_0002_m_000000_3 (exists=false, cwd=file:/data10/yarn/nm-local-dir/usercache/xitong/appcache/application_1543893582405_6126857/container_e124_1543893582405_6126857_01_000011)
at org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getHiveRecordWriter(HiveFileFormatUtils.java:249)`
Current spark sql generate a local temporary path in local staging directory.The schema of local temporary path start with `file`, so the HiveException appears.
This PR change the local temporary path to HDFS temporary path, and use DistributedFileSystem instance copy the data from HDFS temporary path to local directory.
If Spark run on local deploy mode, 'insert overwrite local directory' works fine.
## How was this patch tested?

UT cannot support yarn-client mode.The test is in my product environment.

Closes #23841 from beliefer/fix-bug-of-insert-overwrite-local-dir.

Authored-by: gengjiaan <gengjiaan@360.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../execution/InsertIntoHiveDirCommand.scala  | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
index 0c694910b06d..1825af6191ce 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
@@ -86,20 +86,21 @@ case class InsertIntoHiveDirCommand(
     val jobConf = new JobConf(hadoopConf)
 
     val targetPath = new Path(storage.locationUri.get)
-    val writeToPath =
+    val qualifiedPath = FileUtils.makeQualified(targetPath, hadoopConf)
+    val (writeToPath: Path, fs: FileSystem) =
       if (isLocal) {
         val localFileSystem = FileSystem.getLocal(jobConf)
-        localFileSystem.makeQualified(targetPath)
+        (localFileSystem.makeQualified(targetPath), localFileSystem)
       } else {
-        val qualifiedPath = FileUtils.makeQualified(targetPath, hadoopConf)
-        val dfs = qualifiedPath.getFileSystem(jobConf)
-        if (!dfs.exists(qualifiedPath)) {
-          dfs.mkdirs(qualifiedPath.getParent)
-        }
-        qualifiedPath
+        val dfs = qualifiedPath.getFileSystem(hadoopConf)
+        (qualifiedPath, dfs)
       }
+    if (!fs.exists(writeToPath)) {
+      fs.mkdirs(writeToPath)
+    }
 
-    val tmpPath = getExternalTmpPath(sparkSession, hadoopConf, writeToPath)
+    // The temporary path must be a HDFS path, not a local path.
+    val tmpPath = getExternalTmpPath(sparkSession, hadoopConf, qualifiedPath)
     val fileSinkConf = new org.apache.spark.sql.hive.HiveShim.ShimFileSinkDesc(
       tmpPath.toString, tableDesc, false)
 
@@ -111,15 +112,20 @@ case class InsertIntoHiveDirCommand(
         fileSinkConf = fileSinkConf,
         outputLocation = tmpPath.toString)
 
-      val fs = writeToPath.getFileSystem(hadoopConf)
       if (overwrite && fs.exists(writeToPath)) {
         fs.listStatus(writeToPath).foreach { existFile =>
           if (Option(existFile.getPath) != createdTempDir) fs.delete(existFile.getPath, true)
         }
       }
 
-      fs.listStatus(tmpPath).foreach {
-        tmpFile => fs.rename(tmpFile.getPath, writeToPath)
+      val dfs = tmpPath.getFileSystem(hadoopConf)
+      dfs.listStatus(tmpPath).foreach {
+        tmpFile =>
+          if (isLocal) {
+            dfs.copyToLocalFile(tmpFile.getPath, writeToPath)
+          } else {
+            dfs.rename(tmpFile.getPath, writeToPath)
+          }
       }
     } catch {
       case e: Throwable =>

From 4a5768b2a2adf87e3df278655918f72558f0b3b9 Mon Sep 17 00:00:00 2001
From: Jose Torres <torres.joseph.f+github@gmail.com>
Date: Fri, 5 Apr 2019 12:56:36 -0700
Subject: [PATCH 0858/1072] [SPARK-27391][SS] Don't initialize a lazy val in
 ContinuousExecution job.

## What changes were proposed in this pull request?

Fix a potential deadlock in ContinuousExecution by not initializing the toRDD lazy val.

Closes #24301 from jose-torres/deadlock.

Authored-by: Jose Torres <torres.joseph.f+github@gmail.com>
Signed-off-by: Jose Torres <torres.joseph.f+github@gmail.com>
---
 .../streaming/continuous/ContinuousExecution.scala        | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index c8fb53df5259..4c13e722e828 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -251,12 +251,8 @@ class ContinuousExecution(
 
       updateStatusMessage("Running")
       reportTimeTaken("runContinuous") {
-        SQLExecution.withNewExecutionId(
-          sparkSessionForQuery, lastExecution) {
-          // Materialize `executedPlan` so that accessing it when `toRdd` is running doesn't need to
-          // wait for a lock
-          lastExecution.executedPlan
-          lastExecution.toRdd
+        SQLExecution.withNewExecutionId(sparkSessionForQuery, lastExecution) {
+          lastExecution.executedPlan.execute()
         }
       }
 

From 6450c5948adb58cbf3afaaf249560c81a4164cf6 Mon Sep 17 00:00:00 2001
From: cxzl25 <cxzl25@users.noreply.github.com>
Date: Sat, 6 Apr 2019 17:14:29 -0500
Subject: [PATCH 0859/1072] [SPARK-26992][STS] Fix STS scheduler pool correct
 delivery

## What changes were proposed in this pull request?

The user sets the value of spark.sql.thriftserver.scheduler.pool.
Spark thrift server saves this value in the LocalProperty of threadlocal type, but does not clean up after running, causing other sessions to run in the previously set pool name.

## How was this patch tested?

manual tests

Closes #23895 from cxzl25/thrift_server_scheduler_pool_pollute.

Lead-authored-by: cxzl25 <cxzl25@users.noreply.github.com>
Co-authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../SparkExecuteStatementOperation.scala      | 22 ++++++++++++++-----
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index b05307e086f5..3862d6c129d8 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -109,7 +109,7 @@ private[hive] class SparkExecuteStatementOperation(
     }
   }
 
-  def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = {
+  def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = withSchedulerPool {
     validateDefaultFetchOrientation(order)
     assertState(OperationState.FINISHED)
     setHasResultSet(true)
@@ -210,7 +210,7 @@ private[hive] class SparkExecuteStatementOperation(
     }
   }
 
-  private def execute(): Unit = {
+  private def execute(): Unit = withSchedulerPool {
     statementId = UUID.randomUUID().toString
     logInfo(s"Running query '$statement' with $statementId")
     setState(OperationState.RUNNING)
@@ -225,10 +225,6 @@ private[hive] class SparkExecuteStatementOperation(
       statementId,
       parentSession.getUsername)
     sqlContext.sparkContext.setJobGroup(statementId, statement)
-    val pool = sessionToActivePool.get(parentSession.getSessionHandle)
-    // It may have unpredictably behavior since we use thread pools to execute quries and
-    // the 'spark.scheduler.pool' may not be 'default' when we did not set its value.(SPARK-26914)
-    sqlContext.sparkContext.setLocalProperty(SparkContext.SPARK_SCHEDULER_POOL, pool)
     try {
       result = sqlContext.sql(statement)
       logDebug(result.queryExecution.toString())
@@ -291,6 +287,20 @@ private[hive] class SparkExecuteStatementOperation(
       sqlContext.sparkContext.cancelJobGroup(statementId)
     }
   }
+
+  private def withSchedulerPool[T](body: => T): T = {
+    val pool = sessionToActivePool.get(parentSession.getSessionHandle)
+    if (pool != null) {
+      sqlContext.sparkContext.setLocalProperty(SparkContext.SPARK_SCHEDULER_POOL, pool)
+    }
+    try {
+      body
+    } finally {
+      if (pool != null) {
+        sqlContext.sparkContext.setLocalProperty(SparkContext.SPARK_SCHEDULER_POOL, null)
+      }
+    }
+  }
 }
 
 object SparkExecuteStatementOperation {

From 53e31e2ca1f0aec1d19f1528cdd1a5317a4e0a32 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Sat, 6 Apr 2019 18:05:15 -0500
Subject: [PATCH 0860/1072] [SPARK-27399][STREAMING][KAFKA] Arrange scattered
 config and reduce hardcode for kafka 10.

## What changes were proposed in this pull request?

I found a lot scattered config in `Kafka` streaming.I think should arrange these config in unified position.

## How was this patch tested?

No need UT.

Closes #24267 from beliefer/arrange-scattered-streaming-kafka-config.

Authored-by: gengjiaan <gengjiaan@360.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../kafka010/DirectKafkaInputDStream.scala    |  3 +-
 .../spark/streaming/kafka010/KafkaRDD.scala   | 16 +++----
 .../kafka010/PerPartitionConfig.scala         |  4 +-
 .../spark/streaming/kafka010/package.scala    | 47 ++++++++++++++++++-
 4 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
index 224f41a68395..88d6d0eea536 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
@@ -229,8 +229,7 @@ private[spark] class DirectKafkaInputDStream[K, V](
       val fo = currentOffsets(tp)
       OffsetRange(tp.topic, tp.partition, fo, uo)
     }
-    val useConsumerCache = context.conf.getBoolean("spark.streaming.kafka.consumer.cache.enabled",
-      true)
+    val useConsumerCache = context.conf.get(CONSUMER_CACHE_ENABLED)
     val rdd = new KafkaRDD[K, V](context.sparkContext, executorKafkaParams, offsetRanges.toArray,
       getPreferredHosts, useConsumerCache)
 
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
index 4513dca44c7c..bd2e7e11b738 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
@@ -24,6 +24,7 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.{Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Network._
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
@@ -64,16 +65,11 @@ private[spark] class KafkaRDD[K, V](
       " must be set to false for executor kafka params, else offsets may commit before processing")
 
   // TODO is it necessary to have separate configs for initial poll time vs ongoing poll time?
-  private val pollTimeout = conf.getLong("spark.streaming.kafka.consumer.poll.ms",
-    conf.getTimeAsSeconds("spark.network.timeout", "120s") * 1000L)
-  private val cacheInitialCapacity =
-    conf.getInt("spark.streaming.kafka.consumer.cache.initialCapacity", 16)
-  private val cacheMaxCapacity =
-    conf.getInt("spark.streaming.kafka.consumer.cache.maxCapacity", 64)
-  private val cacheLoadFactor =
-    conf.getDouble("spark.streaming.kafka.consumer.cache.loadFactor", 0.75).toFloat
-  private val compacted =
-    conf.getBoolean("spark.streaming.kafka.allowNonConsecutiveOffsets", false)
+  private val pollTimeout = conf.get(CONSUMER_POLL_MS).getOrElse(conf.get(NETWORK_TIMEOUT) * 1000L)
+  private val cacheInitialCapacity = conf.get(CONSUMER_CACHE_INITIAL_CAPACITY)
+  private val cacheMaxCapacity = conf.get(CONSUMER_CACHE_MAX_CAPACITY)
+  private val cacheLoadFactor = conf.get(CONSUMER_CACHE_LOAD_FACTOR).toFloat
+  private val compacted = conf.get(ALLOW_NON_CONSECUTIVE_OFFSETS)
 
   override def persist(newLevel: StorageLevel): this.type = {
     logError("Kafka ConsumerRecord is not serializable. " +
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala
index 77193e2a0c8c..b261500a454a 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala
@@ -39,8 +39,8 @@ abstract class PerPartitionConfig extends Serializable {
  */
 private class DefaultPerPartitionConfig(conf: SparkConf)
     extends PerPartitionConfig {
-  val maxRate = conf.getLong("spark.streaming.kafka.maxRatePerPartition", 0)
-  val minRate = conf.getLong("spark.streaming.kafka.minRatePerPartition", 1)
+  val maxRate = conf.get(MAX_RATE_PER_PARTITION)
+  val minRate = conf.get(MIN_RATE_PER_PARTITION)
 
   def maxRatePerPartition(topicPartition: TopicPartition): Long = maxRate
   override def minRatePerPartition(topicPartition: TopicPartition): Long = minRate
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package.scala
index 09db6d6062d8..3d2921fa900a 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package.scala
@@ -17,7 +17,52 @@
 
 package org.apache.spark.streaming
 
+import org.apache.spark.internal.config.ConfigBuilder
+
 /**
  * Spark Integration for Kafka 0.10
  */
-package object kafka010 //scalastyle:ignore
+package object kafka010 { //scalastyle:ignore
+
+  private[spark] val CONSUMER_CACHE_ENABLED =
+    ConfigBuilder("spark.streaming.kafka.consumer.cache.enabled")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val CONSUMER_POLL_MS =
+    ConfigBuilder("spark.streaming.kafka.consumer.poll.ms")
+    .longConf
+    .createOptional
+
+  private[spark] val CONSUMER_CACHE_INITIAL_CAPACITY =
+    ConfigBuilder("spark.streaming.kafka.consumer.cache.initialCapacity")
+    .intConf
+    .createWithDefault(16)
+
+  private[spark] val CONSUMER_CACHE_MAX_CAPACITY =
+    ConfigBuilder("spark.streaming.kafka.consumer.cache.maxCapacity")
+    .intConf
+    .createWithDefault(64)
+
+  private[spark] val CONSUMER_CACHE_LOAD_FACTOR =
+    ConfigBuilder("spark.streaming.kafka.consumer.cache.loadFactor")
+    .doubleConf
+    .createWithDefault(0.75)
+
+  private[spark] val MAX_RATE_PER_PARTITION =
+    ConfigBuilder("spark.streaming.kafka.maxRatePerPartition")
+    .longConf
+    .createWithDefault(0)
+
+  private[spark] val MIN_RATE_PER_PARTITION =
+    ConfigBuilder("spark.streaming.kafka.minRatePerPartition")
+    .longConf
+    .createWithDefault(1)
+
+  private[spark] val ALLOW_NON_CONSECUTIVE_OFFSETS =
+    ConfigBuilder("spark.streaming.kafka.allowNonConsecutiveOffsets")
+    .booleanConf
+    .createWithDefault(false)
+
+}
+

From 017919b636fd3ce43ccf5ec57f1c1842aa2130db Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 6 Apr 2019 18:06:52 -0500
Subject: [PATCH 0861/1072] [SPARK-27383][SQL][TEST] Avoid using hard-coded jar
 names in Hive tests

## What changes were proposed in this pull request?

This pr avoid using hard-coded jar names(`hive-contrib-0.13.1.jar` and `hive-hcatalog-core-0.13.1.jar`) in Hive tests. This change makes it easy to change when upgrading the built-in Hive to 2.3.4.

## How was this patch tested?

Existing test

Closes #24294 from wangyum/SPARK-27383.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/sql/hive/test/TestHive.scala    | 11 +++++++++++
 .../apache/spark/sql/hive/HiveSparkSubmitSuite.scala | 12 ++++++------
 .../spark/sql/hive/execution/HiveQuerySuite.scala    |  6 +++---
 .../spark/sql/hive/execution/SQLQuerySuite.scala     |  2 +-
 4 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index c45f3e706e9e..151580750f86 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -116,6 +116,9 @@ class TestHiveContext(
     @transient override val sparkSession: TestHiveSparkSession)
   extends SQLContext(sparkSession) {
 
+  val HIVE_CONTRIB_JAR: String = "hive-contrib-0.13.1.jar"
+  val HIVE_HCATALOG_CORE_JAR: String = "hive-hcatalog-core-0.13.1.jar"
+
   /**
    * If loadTestTables is false, no test tables are loaded. Note that this flag can only be true
    * when running in the JVM, i.e. it needs to be false when calling from Python.
@@ -142,6 +145,14 @@ class TestHiveContext(
     sparkSession.getHiveFile(path)
   }
 
+  def getHiveContribJar(): File = {
+    sparkSession.getHiveFile(HIVE_CONTRIB_JAR)
+  }
+
+  def getHiveHcatalogCoreJar(): File = {
+    sparkSession.getHiveFile(HIVE_HCATALOG_CORE_JAR)
+  }
+
   def loadTestTable(name: String): Unit = {
     sparkSession.loadTestTable(name)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 8a361486d093..3b64a6b500ba 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -108,8 +108,8 @@ class HiveSparkSubmitSuite
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
     val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
-    val jar3 = TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
-    val jar4 = TestHive.getHiveFile("hive-hcatalog-core-0.13.1.jar").getCanonicalPath
+    val jar3 = TestHive.getHiveContribJar().getCanonicalPath
+    val jar4 = TestHive.getHiveHcatalogCoreJar().getCanonicalPath
     val jarsString = Seq(jar1, jar2, jar3, jar4).map(j => j.toString).mkString(",")
     val args = Seq(
       "--class", SparkSubmitClassLoaderTest.getClass.getName.stripSuffix("$"),
@@ -315,7 +315,7 @@ class HiveSparkSubmitSuite
       "--master", "local-cluster[2,1,1024]",
       "--conf", "spark.ui.enabled=false",
       "--conf", "spark.master.rest.enabled=false",
-      "--jars", TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath,
+      "--jars", TestHive.getHiveContribJar().getCanonicalPath,
       unusedJar.toString)
     runSparkSubmit(argsForCreateTable)
 
@@ -457,7 +457,7 @@ object TemporaryHiveUDFTest extends Logging {
 
     // Load a Hive UDF from the jar.
     logInfo("Registering a temporary Hive UDF provided in a jar.")
-    val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
+    val jar = hiveContext.getHiveContribJar().getCanonicalPath
     hiveContext.sql(
       s"""
          |CREATE TEMPORARY FUNCTION example_max
@@ -495,7 +495,7 @@ object PermanentHiveUDFTest1 extends Logging {
 
     // Load a Hive UDF from the jar.
     logInfo("Registering a permanent Hive UDF provided in a jar.")
-    val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
+    val jar = hiveContext.getHiveContribJar().getCanonicalPath
     hiveContext.sql(
       s"""
          |CREATE FUNCTION example_max
@@ -532,7 +532,7 @@ object PermanentHiveUDFTest2 extends Logging {
     val hiveContext = new TestHiveContext(sc)
     // Load a Hive UDF from the jar.
     logInfo("Write the metadata of a permanent Hive UDF into metastore.")
-    val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
+    val jar = hiveContext.getHiveContribJar().getCanonicalPath
     val function = CatalogFunction(
       FunctionIdentifier("example_max"),
       "org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index a5cff35abf37..c35ff80f33d0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -816,7 +816,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
 
   test("ADD JAR command 2") {
     // this is a test case from mapjoin_addjar.q
-    val testJar = TestHive.getHiveFile("hive-hcatalog-core-0.13.1.jar").toURI
+    val testJar = TestHive.getHiveHcatalogCoreJar().toURI
     val testData = TestHive.getHiveFile("data/files/sample.json").toURI
     sql(s"ADD JAR $testJar")
     sql(
@@ -826,9 +826,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     sql("select * from src join t1 on src.key = t1.a")
     sql("DROP TABLE t1")
     assert(sql("list jars").
-      filter(_.getString(0).contains("hive-hcatalog-core-0.13.1.jar")).count() > 0)
+      filter(_.getString(0).contains(TestHive.HIVE_HCATALOG_CORE_JAR)).count() > 0)
     assert(sql("list jar").
-      filter(_.getString(0).contains("hive-hcatalog-core-0.13.1.jar")).count() > 0)
+      filter(_.getString(0).contains(TestHive.HIVE_HCATALOG_CORE_JAR)).count() > 0)
     val testJar2 = TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath
     sql(s"ADD JAR $testJar2")
     assert(sql(s"list jar $testJar").count() == 1)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index aad34a31a33f..29de55f7040f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1105,7 +1105,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       override def run() {
         // To make sure this test works, this jar should not be loaded in another place.
         sql(
-          s"ADD JAR ${hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath()}")
+          s"ADD JAR ${hiveContext.getHiveContribJar().getCanonicalPath}")
         try {
           sql(
             """

From 18b36ee5ba7b7b7a88d641b5872c9d1aa87460e0 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 8 Apr 2019 09:14:40 +0900
Subject: [PATCH 0862/1072] [SPARK-27253][SQL][FOLLOW-UP] Add a note about
 parent-session configuration priority in migration guide

## What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/24189. It adds a note about parent-session configuration priority.

## How was this patch tested?

Manually built the site and checked.

Closes #24279 from HyukjinKwon/SPARK-27253.

Lead-authored-by: HyukjinKwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index fef801275bd6..c3837f60dc4e 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -124,6 +124,8 @@ license: |
 
   - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`, and `DATE` literals are formatted using the UTC time zone. In Spark version 2.4 and earlier, both conversions use the default time zone of the Java virtual machine.
 
+  - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.

From 02e9f933097107d870dba87cc03f6003af9b0efa Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 8 Apr 2019 15:14:02 +0800
Subject: [PATCH 0863/1072] [SPARK-27384][SQL] File source V2: Prune
 unnecessary partition columns

## What changes were proposed in this pull request?

When scanning file sources, we can prune unnecessary partition columns on constructing input partitions, so that:
1. Reduce the data transformation from Driver to Executors
2. Make it easier to implement columnar batch readers, since the partition columns are already pruned.
## How was this patch tested?

Existing unit tests.

Closes #24296 from gengliangwang/prunePartitionValue.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../v2/FilePartitionReaderFactory.scala       | 13 ------
 .../execution/datasources/v2/FileScan.scala   | 41 +++++++++++++++++--
 .../datasources/v2/FileScanBuilder.scala      | 41 ++++++++++++++++---
 .../datasources/v2/TextBasedFileScan.scala    |  5 ++-
 .../v2/csv/CSVPartitionReaderFactory.scala    |  7 +---
 .../datasources/v2/csv/CSVScan.scala          | 13 +++---
 .../datasources/v2/csv/CSVScanBuilder.scala   |  5 ++-
 .../v2/orc/OrcPartitionReaderFactory.scala    | 37 ++++++++---------
 .../datasources/v2/orc/OrcScan.scala          |  9 ++--
 .../datasources/v2/orc/OrcScanBuilder.scala   |  5 ++-
 10 files changed, 114 insertions(+), 62 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
index 1daf8ae72b63..d053ea98f8b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
@@ -46,19 +46,6 @@ abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
   def buildColumnarReader(partitionedFile: PartitionedFile): PartitionReader[ColumnarBatch] = {
     throw new UnsupportedOperationException("Cannot create columnar reader.")
   }
-
-  protected def getReadDataSchema(
-      readSchema: StructType,
-      partitionSchema: StructType,
-      isCaseSensitive: Boolean): StructType = {
-    val partitionNameSet =
-      partitionSchema.fields.map(PartitioningUtils.getColName(_, isCaseSensitive)).toSet
-    val fields = readSchema.fields.filterNot { field =>
-      partitionNameSet.contains(PartitioningUtils.getColName(field, isCaseSensitive))
-    }
-
-    StructType(fields)
-  }
 }
 
 // A compound class for combining file and its corresponding reader.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index e971fd762efe..337aac9ea651 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -16,9 +16,13 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
+import java.util.Locale
+
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
@@ -28,8 +32,8 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 abstract class FileScan(
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
-    readSchema: StructType,
-    options: CaseInsensitiveStringMap) extends Scan with Batch {
+    readDataSchema: StructType,
+    readPartitionSchema: StructType) extends Scan with Batch {
   /**
    * Returns whether a file with `path` could be split or not.
    */
@@ -40,7 +44,23 @@ abstract class FileScan(
   protected def partitions: Seq[FilePartition] = {
     val selectedPartitions = fileIndex.listFiles(Seq.empty, Seq.empty)
     val maxSplitBytes = FilePartition.maxSplitBytes(sparkSession, selectedPartitions)
+    val partitionAttributes = fileIndex.partitionSchema.toAttributes
+    val attributeMap = partitionAttributes.map(a => normalizeName(a.name) -> a).toMap
+    val readPartitionAttributes = readPartitionSchema.map { readField =>
+      attributeMap.get(normalizeName(readField.name)).getOrElse {
+        throw new AnalysisException(s"Can't find required partition column ${readField.name} " +
+          s"in partition schema ${fileIndex.partitionSchema}")
+      }
+    }
+    lazy val partitionValueProject =
+      GenerateUnsafeProjection.generate(readPartitionAttributes, partitionAttributes)
     val splitFiles = selectedPartitions.flatMap { partition =>
+      // Prune partition values if part of the partition columns are not required.
+      val partitionValues = if (readPartitionAttributes != partitionAttributes) {
+        partitionValueProject(partition.values).copy()
+      } else {
+        partition.values
+      }
       partition.files.flatMap { file =>
         val filePath = file.getPath
         PartitionedFileUtil.splitFiles(
@@ -49,7 +69,7 @@ abstract class FileScan(
           filePath = filePath,
           isSplitable = isSplitable(filePath),
           maxSplitBytes = maxSplitBytes,
-          partitionValues = partition.values
+          partitionValues = partitionValues
         )
       }.toArray.sortBy(_.length)(implicitly[Ordering[Long]].reverse)
     }
@@ -61,4 +81,17 @@ abstract class FileScan(
   }
 
   override def toBatch: Batch = this
+
+  override def readSchema(): StructType =
+    StructType(readDataSchema.fields ++ readPartitionSchema.fields)
+
+  private val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+
+  private def normalizeName(name: String): String = {
+    if (isCaseSensitive) {
+      name
+    } else {
+      name.toLowerCase(Locale.ROOT)
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
index d4e55a50307d..3b236be90e6f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -16,15 +16,44 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.sql.sources.v2.reader.{ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.{PartitioningAwareFileIndex, PartitioningUtils}
+import org.apache.spark.sql.sources.v2.reader.{ScanBuilder, SupportsPushDownRequiredColumns}
 import org.apache.spark.sql.types.StructType
 
-abstract class FileScanBuilder(schema: StructType)
-  extends ScanBuilder
-  with SupportsPushDownRequiredColumns {
-  protected var readSchema = schema
+abstract class FileScanBuilder(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    dataSchema: StructType) extends ScanBuilder with SupportsPushDownRequiredColumns {
+  private val partitionSchema = fileIndex.partitionSchema
+  private val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+  protected var requiredSchema = StructType(dataSchema.fields ++ partitionSchema.fields)
 
   override def pruneColumns(requiredSchema: StructType): Unit = {
-    this.readSchema = requiredSchema
+    this.requiredSchema = requiredSchema
   }
+
+  protected def readDataSchema(): StructType = {
+    val requiredNameSet = createRequiredNameSet()
+    val fields = dataSchema.fields.filter { field =>
+      val colName = PartitioningUtils.getColName(field, isCaseSensitive)
+      requiredNameSet.contains(colName) && !partitionNameSet.contains(colName)
+    }
+    StructType(fields)
+  }
+
+  protected def readPartitionSchema(): StructType = {
+    val requiredNameSet = createRequiredNameSet()
+    val fields = partitionSchema.fields.filter { field =>
+      val colName = PartitioningUtils.getColName(field, isCaseSensitive)
+      requiredNameSet.contains(colName)
+    }
+    StructType(fields)
+  }
+
+  private def createRequiredNameSet(): Set[String] =
+    requiredSchema.fields.map(PartitioningUtils.getColName(_, isCaseSensitive)).toSet
+
+  private val partitionNameSet: Set[String] =
+    partitionSchema.fields.map(PartitioningUtils.getColName(_, isCaseSensitive)).toSet
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
index 8d9cc68417ef..d6b84dcdfd15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
@@ -29,9 +29,10 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 abstract class TextBasedFileScan(
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
-    readSchema: StructType,
+    readDataSchema: StructType,
+    readPartitionSchema: StructType,
     options: CaseInsensitiveStringMap)
-  extends FileScan(sparkSession, fileIndex, readSchema, options) {
+  extends FileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema) {
   private var codecFactory: CompressionCodecFactory = _
 
   override def isSplitable(path: Path): Boolean = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
index e2d50282e9cb..28e310489cd6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
@@ -33,24 +33,21 @@ import org.apache.spark.util.SerializableConfiguration
  * @param sqlConf SQL configuration.
  * @param broadcastedConf Broadcasted serializable Hadoop Configuration.
  * @param dataSchema Schema of CSV files.
+ * @param readDataSchema Required data schema in the batch scan.
  * @param partitionSchema Schema of partitions.
- * @param readSchema Required schema in the batch scan.
  * @param parsedOptions Options for parsing CSV files.
  */
 case class CSVPartitionReaderFactory(
     sqlConf: SQLConf,
     broadcastedConf: Broadcast[SerializableConfiguration],
     dataSchema: StructType,
+    readDataSchema: StructType,
     partitionSchema: StructType,
-    readSchema: StructType,
     parsedOptions: CSVOptions) extends FilePartitionReaderFactory {
   private val columnPruning = sqlConf.csvColumnPruning
-  private val readDataSchema =
-    getReadDataSchema(readSchema, partitionSchema, sqlConf.caseSensitiveAnalysis)
 
   override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
     val conf = broadcastedConf.value.value
-
     val parser = new UnivocityParser(
       StructType(dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)),
       StructType(readDataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
index 8f2f8f256731..5bc8029b4068 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -35,9 +35,10 @@ case class CSVScan(
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
     dataSchema: StructType,
-    readSchema: StructType,
+    readDataSchema: StructType,
+    readPartitionSchema: StructType,
     options: CaseInsensitiveStringMap)
-  extends TextBasedFileScan(sparkSession, fileIndex, readSchema, options) {
+  extends TextBasedFileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema, options) {
 
   private lazy val parsedOptions: CSVOptions = new CSVOptions(
     options.asScala.toMap,
@@ -53,8 +54,8 @@ case class CSVScan(
     // Check a field requirement for corrupt records here to throw an exception in a driver side
     ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, parsedOptions.columnNameOfCorruptRecord)
 
-    if (readSchema.length == 1 &&
-      readSchema.head.name == parsedOptions.columnNameOfCorruptRecord) {
+    if (readDataSchema.length == 1 &&
+      readDataSchema.head.name == parsedOptions.columnNameOfCorruptRecord) {
       throw new AnalysisException(
         "Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the\n" +
           "referenced columns only include the internal corrupt record column\n" +
@@ -72,7 +73,9 @@ case class CSVScan(
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
     val broadcastedConf = sparkSession.sparkContext.broadcast(
       new SerializableConfiguration(hadoopConf))
+    // The partition values are already truncated in `FileScan.partitions`.
+    // We should use `readPartitionSchema` as the partition schema here.
     CSVPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
-      dataSchema, fileIndex.partitionSchema, readSchema, parsedOptions)
+      dataSchema, readDataSchema, readPartitionSchema, parsedOptions)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
index dbb3c03ca981..28c5b3d81a3d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
@@ -29,9 +29,10 @@ case class CSVScanBuilder(
     fileIndex: PartitioningAwareFileIndex,
     schema: StructType,
     dataSchema: StructType,
-    options: CaseInsensitiveStringMap) extends FileScanBuilder(schema) {
+    options: CaseInsensitiveStringMap)
+  extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
 
   override def build(): Scan = {
-    CSVScan(sparkSession, fileIndex, dataSchema, readSchema, options)
+    CSVScan(sparkSession, fileIndex, dataSchema, readDataSchema(), readPartitionSchema(), options)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index 1da9469909f1..ec923797e269 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -46,30 +46,30 @@ import org.apache.spark.util.SerializableConfiguration
  * @param sqlConf SQL configuration.
  * @param broadcastedConf Broadcast serializable Hadoop Configuration.
  * @param dataSchema Schema of orc files.
+ * @param readDataSchema Required data schema in the batch scan.
  * @param partitionSchema Schema of partitions.
- * @param readSchema Required schema in the batch scan.
  */
 case class OrcPartitionReaderFactory(
     sqlConf: SQLConf,
     broadcastedConf: Broadcast[SerializableConfiguration],
     dataSchema: StructType,
-    partitionSchema: StructType,
-    readSchema: StructType) extends FilePartitionReaderFactory {
+    readDataSchema: StructType,
+    partitionSchema: StructType) extends FilePartitionReaderFactory {
+  private val resultSchema = StructType(readDataSchema.fields ++ partitionSchema.fields)
   private val isCaseSensitive = sqlConf.caseSensitiveAnalysis
   private val capacity = sqlConf.orcVectorizedReaderBatchSize
 
   override def supportColumnarReads(partition: InputPartition): Boolean = {
     sqlConf.orcVectorizedReaderEnabled && sqlConf.wholeStageEnabled &&
-      readSchema.length <= sqlConf.wholeStageMaxNumFields &&
-      readSchema.forall(_.dataType.isInstanceOf[AtomicType])
+      resultSchema.length <= sqlConf.wholeStageMaxNumFields &&
+      resultSchema.forall(_.dataType.isInstanceOf[AtomicType])
   }
 
   override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
     val conf = broadcastedConf.value.value
 
-    val readDataSchema = getReadDataSchema(readSchema, partitionSchema, isCaseSensitive)
-    val readDataSchemaString = OrcUtils.orcTypeDescriptionString(readDataSchema)
-    OrcConf.MAPRED_INPUT_SCHEMA.setString(conf, readDataSchemaString)
+    val resultSchemaString = OrcUtils.orcTypeDescriptionString(resultSchema)
+    OrcConf.MAPRED_INPUT_SCHEMA.setString(conf, resultSchemaString)
     OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(conf, isCaseSensitive)
 
     val filePath = new Path(new URI(file.filePath))
@@ -113,8 +113,8 @@ case class OrcPartitionReaderFactory(
   override def buildColumnarReader(file: PartitionedFile): PartitionReader[ColumnarBatch] = {
     val conf = broadcastedConf.value.value
 
-    val readSchemaString = OrcUtils.orcTypeDescriptionString(readSchema)
-    OrcConf.MAPRED_INPUT_SCHEMA.setString(conf, readSchemaString)
+    val resultSchemaString = OrcUtils.orcTypeDescriptionString(resultSchema)
+    OrcConf.MAPRED_INPUT_SCHEMA.setString(conf, resultSchemaString)
     OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(conf, isCaseSensitive)
 
     val filePath = new Path(new URI(file.filePath))
@@ -124,13 +124,13 @@ case class OrcPartitionReaderFactory(
     val reader = OrcFile.createReader(filePath, readerOptions)
 
     val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
-      isCaseSensitive, dataSchema, readSchema, reader, conf)
+      isCaseSensitive, dataSchema, readDataSchema, reader, conf)
 
     if (requestedColIdsOrEmptyFile.isEmpty) {
       new EmptyPartitionReader
     } else {
-      val requestedColIds = requestedColIdsOrEmptyFile.get
-      assert(requestedColIds.length == readSchema.length,
+      val requestedColIds = requestedColIdsOrEmptyFile.get ++ Array.fill(partitionSchema.length)(-1)
+      assert(requestedColIds.length == resultSchema.length,
         "[BUG] requested column IDs do not match required schema")
       val taskConf = new Configuration(conf)
 
@@ -140,15 +140,12 @@ case class OrcPartitionReaderFactory(
 
       val batchReader = new OrcColumnarBatchReader(capacity)
       batchReader.initialize(fileSplit, taskAttemptContext)
-      val columnNameMap = partitionSchema.fields.map(
-        PartitioningUtils.getColName(_, isCaseSensitive)).zipWithIndex.toMap
-      val requestedPartitionColIds = readSchema.fields.map { field =>
-        columnNameMap.getOrElse(PartitioningUtils.getColName(field, isCaseSensitive), -1)
-      }
+      val requestedPartitionColIds =
+        Array.fill(readDataSchema.length)(-1) ++ Range(0, partitionSchema.length)
 
       batchReader.initBatch(
-        TypeDescription.fromString(readSchemaString),
-        readSchema.fields,
+        TypeDescription.fromString(resultSchemaString),
+        resultSchema.fields,
         requestedColIds,
         requestedPartitionColIds,
         file.partitionValues)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index fc8a682b226c..dc6b67ceb7e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -32,15 +32,18 @@ case class OrcScan(
     hadoopConf: Configuration,
     fileIndex: PartitioningAwareFileIndex,
     dataSchema: StructType,
-    readSchema: StructType,
+    readDataSchema: StructType,
+    readPartitionSchema: StructType,
     options: CaseInsensitiveStringMap)
-  extends FileScan(sparkSession, fileIndex, readSchema, options) {
+  extends FileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema) {
   override def isSplitable(path: Path): Boolean = true
 
   override def createReaderFactory(): PartitionReaderFactory = {
     val broadcastedConf = sparkSession.sparkContext.broadcast(
       new SerializableConfiguration(hadoopConf))
+    // The partition values are already truncated in `FileScan.partitions`.
+    // We should use `readPartitionSchema` as the partition schema here.
     OrcPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
-      dataSchema, fileIndex.partitionSchema, readSchema)
+      dataSchema, readDataSchema, readPartitionSchema)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index 8ac56aa5f64b..4c1ec520c6ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -36,7 +36,7 @@ case class OrcScanBuilder(
     schema: StructType,
     dataSchema: StructType,
     options: CaseInsensitiveStringMap)
-  extends FileScanBuilder(schema) with SupportsPushDownFilters {
+  extends FileScanBuilder(sparkSession, fileIndex, dataSchema) with SupportsPushDownFilters {
   lazy val hadoopConf = {
     val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
     // Hadoop Configurations are case sensitive.
@@ -44,7 +44,8 @@ case class OrcScanBuilder(
   }
 
   override def build(): Scan = {
-    OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema, readSchema, options)
+    OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema,
+      readDataSchema(), readPartitionSchema(), options)
   }
 
   private var _pushedFilters: Array[Filter] = Array.empty

From 215609def22da14c464b37374ceae4f53a39a145 Mon Sep 17 00:00:00 2001
From: Michael Allman <msa@allman.ms>
Date: Mon, 8 Apr 2019 22:26:02 +0900
Subject: [PATCH 0864/1072] [SPARK-25407][SQL] Allow nested access for
 non-existent field for Parquet file when nested pruning is enabled

## What changes were proposed in this pull request?

As part of schema clipping in `ParquetReadSupport.scala`, we add fields in the Catalyst requested schema which are missing from the Parquet file schema to the Parquet clipped schema. However, nested schema pruning requires we ignore unrequested field data when reading from a Parquet file. Therefore we pass two schema to `ParquetRecordMaterializer`: the schema of the file data we want to read and the schema of the rows we want to return. The reader is responsible for reconciling the differences between the two.

Aside from checking whether schema pruning is enabled, there is an additional complication to constructing the Parquet requested schema. The manner in which Spark's two Parquet readers reconcile the differences between the Parquet requested schema and the Catalyst requested schema differ. Spark's vectorized reader does not (currently) support reading Parquet files with complex types in their schema. Further, it assumes that the Parquet requested schema includes all fields requested in the Catalyst requested schema. It includes logic in its read path to skip fields in the Parquet requested schema which are not present in the file.

Spark's parquet-mr based reader supports reading Parquet files of any kind of complex schema, and it supports nested schema pruning as well. Unlike the vectorized reader, the parquet-mr reader requires that the Parquet requested schema include only those fields present in the underlying Parquet file's schema. Therefore, in the case where we use the parquet-mr reader we intersect the Parquet clipped schema with the Parquet file's schema to construct the Parquet requested schema that's set in the `ReadContext`.

_Additional description (by HyukjinKwon):_

Let's suppose that we have a Parquet schema as below:

```
message spark_schema {
  required int32 id;
  optional group name {
    optional binary first (UTF8);
    optional binary last (UTF8);
  }
  optional binary address (UTF8);
}
```

Currently, the clipped schema as follows:

```
message spark_schema {
  optional group name {
    optional binary middle (UTF8);
  }
  optional binary address (UTF8);
}
```

Parquet MR does not support access to the nested non-existent field (`name.middle`).

To workaround this, this PR removes `name.middle` request at all to Parquet reader as below:

```
Parquet requested schema:
message spark_schema {
  optional binary address (UTF8);
}
```

and produces the record (`name.middle`) properly as the requested Catalyst schema.

```
root
-- name: struct (nullable = true)
    |-- middle: string (nullable = true)
-- address: string (nullable = true)
```

I think technically this is what Parquet library should support since Parquet library made a design decision to produce `null` for non-existent fields IIRC. This PR targets to work around it.

## How was this patch tested?

A previously ignored test case which exercises the failure scenario this PR addresses has been enabled.

This closes #22880

Closes #24307 from dongjoon-hyun/SPARK-25407.

Lead-authored-by: Michael Allman <msa@allman.ms>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../parquet/ParquetFileFormat.scala           |  8 +-
 .../parquet/ParquetReadSupport.scala          | 88 ++++++++++++++-----
 .../parquet/ParquetRowConverter.scala         | 21 +++--
 .../datasources/SchemaPruningSuite.scala      |  2 +-
 4 files changed, 88 insertions(+), 31 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index efa4f3f166d9..e37f2283e00c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -310,6 +310,9 @@ class ParquetFileFormat
     hadoopConf.set(
       SQLConf.SESSION_LOCAL_TIMEZONE.key,
       sparkSession.sessionState.conf.sessionLocalTimeZone)
+    hadoopConf.setBoolean(
+      SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key,
+      sparkSession.sessionState.conf.nestedSchemaPruningEnabled)
     hadoopConf.setBoolean(
       SQLConf.CASE_SENSITIVE.key,
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
@@ -424,11 +427,12 @@ class ParquetFileFormat
       } else {
         logDebug(s"Falling back to parquet-mr")
         // ParquetRecordReader returns UnsafeRow
+        val readSupport = new ParquetReadSupport(convertTz, enableVectorizedReader = false)
         val reader = if (pushed.isDefined && enableRecordFilter) {
           val parquetFilter = FilterCompat.get(pushed.get, null)
-          new ParquetRecordReader[UnsafeRow](new ParquetReadSupport(convertTz), parquetFilter)
+          new ParquetRecordReader[UnsafeRow](readSupport, parquetFilter)
         } else {
-          new ParquetRecordReader[UnsafeRow](new ParquetReadSupport(convertTz))
+          new ParquetRecordReader[UnsafeRow](readSupport)
         }
         val iter = new RecordReaderIterator(reader)
         // SPARK-23457 Register a task completion lister before `initialization`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 3319e73f2b31..df7766520290 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -49,15 +49,16 @@ import org.apache.spark.sql.types._
  * Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from [[init()]]
  * to [[prepareForRead()]], but use a private `var` for simplicity.
  */
-private[parquet] class ParquetReadSupport(val convertTz: Option[TimeZone])
-    extends ReadSupport[UnsafeRow] with Logging {
+private[parquet] class ParquetReadSupport(val convertTz: Option[TimeZone],
+    enableVectorizedReader: Boolean)
+  extends ReadSupport[UnsafeRow] with Logging {
   private var catalystRequestedSchema: StructType = _
 
   def this() {
     // We need a zero-arg constructor for SpecificParquetRecordReaderBase.  But that is only
     // used in the vectorized reader, where we get the convertTz value directly, and the value here
     // is ignored.
-    this(None)
+    this(None, enableVectorizedReader = true)
   }
 
   /**
@@ -65,18 +66,48 @@ private[parquet] class ParquetReadSupport(val convertTz: Option[TimeZone])
    * readers.  Responsible for figuring out Parquet requested schema used for column pruning.
    */
   override def init(context: InitContext): ReadContext = {
+    val conf = context.getConfiguration
     catalystRequestedSchema = {
-      val conf = context.getConfiguration
       val schemaString = conf.get(ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA)
       assert(schemaString != null, "Parquet requested schema not set.")
       StructType.fromString(schemaString)
     }
 
-    val caseSensitive = context.getConfiguration.getBoolean(SQLConf.CASE_SENSITIVE.key,
+    val caseSensitive = conf.getBoolean(SQLConf.CASE_SENSITIVE.key,
       SQLConf.CASE_SENSITIVE.defaultValue.get)
-    val parquetRequestedSchema = ParquetReadSupport.clipParquetSchema(
-      context.getFileSchema, catalystRequestedSchema, caseSensitive)
-
+    val schemaPruningEnabled = conf.getBoolean(SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key,
+      SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.defaultValue.get)
+    val parquetFileSchema = context.getFileSchema
+    val parquetClippedSchema = ParquetReadSupport.clipParquetSchema(parquetFileSchema,
+      catalystRequestedSchema, caseSensitive)
+
+    // We pass two schema to ParquetRecordMaterializer:
+    // - parquetRequestedSchema: the schema of the file data we want to read
+    // - catalystRequestedSchema: the schema of the rows we want to return
+    // The reader is responsible for reconciling the differences between the two.
+    val parquetRequestedSchema = if (schemaPruningEnabled && !enableVectorizedReader) {
+      // Parquet-MR reader requires that parquetRequestedSchema include only those fields present
+      // in the underlying parquetFileSchema. Therefore, we intersect the parquetClippedSchema
+      // with the parquetFileSchema
+      ParquetReadSupport.intersectParquetGroups(parquetClippedSchema, parquetFileSchema)
+        .map(groupType => new MessageType(groupType.getName, groupType.getFields))
+        .getOrElse(ParquetSchemaConverter.EMPTY_MESSAGE)
+    } else {
+      // Spark's vectorized reader only support atomic types currently. It also skip fields
+      // in parquetRequestedSchema which are not present in the file.
+      parquetClippedSchema
+    }
+    logDebug(
+      s"""Going to read the following fields from the Parquet file with the following schema:
+         |Parquet file schema:
+         |$parquetFileSchema
+         |Parquet clipped schema:
+         |$parquetClippedSchema
+         |Parquet requested schema:
+         |$parquetRequestedSchema
+         |Catalyst requested schema:
+         |${catalystRequestedSchema.treeString}
+       """.stripMargin)
     new ReadContext(parquetRequestedSchema, Map.empty[String, String].asJava)
   }
 
@@ -90,19 +121,7 @@ private[parquet] class ParquetReadSupport(val convertTz: Option[TimeZone])
       keyValueMetaData: JMap[String, String],
       fileSchema: MessageType,
       readContext: ReadContext): RecordMaterializer[UnsafeRow] = {
-    log.debug(s"Preparing for read Parquet file with message type: $fileSchema")
     val parquetRequestedSchema = readContext.getRequestedSchema
-
-    logInfo {
-      s"""Going to read the following fields from the Parquet file:
-         |
-         |Parquet form:
-         |$parquetRequestedSchema
-         |Catalyst form:
-         |$catalystRequestedSchema
-       """.stripMargin
-    }
-
     new ParquetRecordMaterializer(
       parquetRequestedSchema,
       ParquetReadSupport.expandUDT(catalystRequestedSchema),
@@ -322,6 +341,35 @@ private[parquet] object ParquetReadSupport {
     }
   }
 
+  /**
+   * Computes the structural intersection between two Parquet group types.
+   * This is used to create a requestedSchema for ReadContext of Parquet-MR reader.
+   * Parquet-MR reader does not support the nested field access to non-existent field
+   * while parquet library does support to read the non-existent field by regular field access.
+   */
+  private def intersectParquetGroups(
+      groupType1: GroupType, groupType2: GroupType): Option[GroupType] = {
+    val fields =
+      groupType1.getFields.asScala
+        .filter(field => groupType2.containsField(field.getName))
+        .flatMap {
+          case field1: GroupType =>
+            val field2 = groupType2.getType(field1.getName)
+            if (field2.isPrimitive) {
+              None
+            } else {
+              intersectParquetGroups(field1, field2.asGroupType)
+            }
+          case field1 => Some(field1)
+        }
+
+    if (fields.nonEmpty) {
+      Some(groupType1.withNewFields(fields.asJava))
+    } else {
+      None
+    }
+  }
+
   def expandUDT(schema: StructType): StructType = {
     def expand(dataType: DataType): DataType = {
       dataType match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index 004a96d13413..b772b6b77d1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -130,8 +130,8 @@ private[parquet] class ParquetRowConverter(
   extends ParquetGroupConverter(updater) with Logging {
 
   assert(
-    parquetType.getFieldCount == catalystType.length,
-    s"""Field counts of the Parquet schema and the Catalyst schema don't match:
+    parquetType.getFieldCount <= catalystType.length,
+    s"""Field count of the Parquet schema is greater than the field count of the Catalyst schema:
        |
        |Parquet schema:
        |$parquetType
@@ -182,10 +182,11 @@ private[parquet] class ParquetRowConverter(
 
   // Converters for each field.
   private val fieldConverters: Array[Converter with HasParentContainerUpdater] = {
-    parquetType.getFields.asScala.zip(catalystType).zipWithIndex.map {
-      case ((parquetFieldType, catalystField), ordinal) =>
-        // Converted field value should be set to the `ordinal`-th cell of `currentRow`
-        newConverter(parquetFieldType, catalystField.dataType, new RowUpdater(currentRow, ordinal))
+    parquetType.getFields.asScala.map { parquetField =>
+      val fieldIndex = catalystType.fieldIndex(parquetField.getName)
+      val catalystField = catalystType(fieldIndex)
+      // Converted field value should be set to the `fieldIndex`-th cell of `currentRow`
+      newConverter(parquetField, catalystField.dataType, new RowUpdater(currentRow, fieldIndex))
     }.toArray
   }
 
@@ -193,7 +194,7 @@ private[parquet] class ParquetRowConverter(
 
   override def end(): Unit = {
     var i = 0
-    while (i < currentRow.numFields) {
+    while (i < fieldConverters.length) {
       fieldConverters(i).updater.end()
       i += 1
     }
@@ -203,10 +204,14 @@ private[parquet] class ParquetRowConverter(
   override def start(): Unit = {
     var i = 0
     while (i < currentRow.numFields) {
-      fieldConverters(i).updater.start()
       currentRow.setNullAt(i)
       i += 1
     }
+    i = 0
+    while (i < fieldConverters.length) {
+      fieldConverters(i).updater.start()
+      i += 1
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index b0314e621e6e..22317fe8d13a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -135,7 +135,7 @@ abstract class SchemaPruningSuite
       Row("X.", 1) :: Row("Y.", 1) :: Row(null, 2) :: Row(null, 2) :: Nil)
   }
 
-  ignore("partial schema intersection - select missing subfield") {
+  testSchemaPruning("partial schema intersection - select missing subfield") {
     val query = sql("select name.middle, address from contacts where p=2")
     checkScan(query, "struct<name:struct<middle:string>,address:string>")
     checkAnswer(query.orderBy("id"),

From 33f3c48cac087e079b9c7e342c2e58b16eaaa681 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 8 Apr 2019 08:42:21 -0700
Subject: [PATCH 0865/1072] [SPARK-27176][SQL] Upgrade hadoop-3's built-in Hive
 maven dependencies to 2.3.4

## What changes were proposed in this pull request?

This PR mainly contains:
1. Upgrade hadoop-3's built-in Hive maven dependencies to 2.3.4.
2. Resolve compatibility issues between Hive 1.2.1 and Hive 2.3.4 in the `sql/hive` module.

## How was this patch tested?
jenkins test hadoop-2.7
manual test hadoop-3:
```shell
build/sbt clean package -Phadoop-3.2 -Phive
export SPARK_PREPEND_CLASSES=true

# rm -rf metastore_db

cat <<EOF > test_hadoop3.scala
spark.range(10).write.saveAsTable("test_hadoop3")
spark.table("test_hadoop3").show
EOF

bin/spark-shell --conf spark.hadoop.hive.metastore.schema.verification=false --conf spark.hadoop.datanucleus.schema.autoCreateAll=true -i test_hadoop3.scala
```

Closes #23788 from wangyum/SPARK-23710-hadoop3.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 dev/deps/spark-deps-hadoop-3.2                |   3 +-
 pom.xml                                       | 164 ++++++++++++++++++
 .../datasources/orc/OrcColumnVector.java      |   2 +-
 .../datasources/orc/OrcFilters.scala          |  10 +-
 .../datasources/orc/OrcShimUtils.scala        |  10 +-
 .../datasources/orc/OrcFilterSuite.scala      |   2 +-
 sql/hive/pom.xml                              |  25 +++
 .../org/apache/spark/sql/hive/HiveShim.scala  |  73 +++++---
 .../org/apache/spark/sql/hive/HiveUtils.scala |   6 +-
 .../sql/hive/client/HiveClientImpl.scala      |   2 +
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  19 +-
 .../spark/sql/hive/orc/OrcFilters.scala       |  82 ++++++---
 12 files changed, 333 insertions(+), 65 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 326d0857a7c0..04526f2ab918 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -50,7 +50,7 @@ curator-client-2.13.0.jar
 curator-framework-2.13.0.jar
 curator-recipes-2.13.0.jar
 datanucleus-api-jdo-3.2.6.jar
-datanucleus-core-3.2.10.jar
+datanucleus-core-4.1.17.jar
 datanucleus-rdbms-3.2.9.jar
 derby-10.12.1.1.jar
 dnsjava-2.1.7.jar
@@ -76,6 +76,7 @@ hadoop-yarn-common-3.2.0.jar
 hadoop-yarn-registry-3.2.0.jar
 hadoop-yarn-server-common-3.2.0.jar
 hadoop-yarn-server-web-proxy-3.2.0.jar
+hive-storage-api-2.6.0.jar
 hk2-api-2.4.0-b34.jar
 hk2-locator-2.4.0-b34.jar
 hk2-utils-2.4.0-b34.jar
diff --git a/pom.xml b/pom.xml
index 62eb16db4181..da8e18ebce48 100644
--- a/pom.xml
+++ b/pom.xml
@@ -128,6 +128,7 @@
     <hive.classifier></hive.classifier>
     <!-- Version used in Maven Hive dependency -->
     <hive.version>1.2.1.spark2</hive.version>
+    <hive23.version>2.3.4</hive23.version>
     <!-- Version used for internal directory structure -->
     <hive.version.short>1.2.1</hive.version.short>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
@@ -1414,6 +1415,37 @@
             <groupId>commons-logging</groupId>
             <artifactId>commons-logging</artifactId>
           </exclusion>
+          <!-- Begin of Hive 2.3 exclusion -->
+          <!--
+            ORC is needed, but the version should be consistent with the `sql/core` ORC data source.
+            Looks like this is safe, please see the major changes from ORC 1.3.3 to 1.5.4:
+            HIVE-17631 and HIVE-19465
+          -->
+          <exclusion>
+            <groupId>org.apache.orc</groupId>
+            <artifactId>orc-core</artifactId>
+          </exclusion>
+          <!-- jetty-all conflict with jetty 9.4.12.v20180830 -->
+          <exclusion>
+            <groupId>org.eclipse.jetty.aggregate</groupId>
+            <artifactId>jetty-all</artifactId>
+          </exclusion>
+          <!-- org.apache.logging.log4j:* conflict with log4j 1.2.17 -->
+          <exclusion>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <!-- Hive includes javax.servlet to fix the Hive on Spark test failure; see HIVE-12783 -->
+          <exclusion>
+            <groupId>org.eclipse.jetty.orbit</groupId>
+            <artifactId>javax.servlet</artifactId>
+          </exclusion>
+          <!-- hive-storage-api is needed and must be explicitly included later -->
+          <exclusion>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-storage-api</artifactId>
+          </exclusion>
+          <!-- End of Hive 2.3 exclusion -->
         </exclusions>
       </dependency>
 
@@ -1532,6 +1564,27 @@
             <groupId>org.json</groupId>
             <artifactId>json</artifactId>
           </exclusion>
+          <!-- Begin of Hive 2.3 exclusion -->
+          <!-- Do not need Tez -->
+          <exclusion>
+            <groupId>${hive.group}</groupId>
+            <artifactId>hive-llap-tez</artifactId>
+          </exclusion>
+          <!-- Do not need Calcite, see SPARK-27054 -->
+          <exclusion>
+            <groupId>org.apache.calcite</groupId>
+            <artifactId>calcite-druid</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.calcite.avatica</groupId>
+            <artifactId>avatica</artifactId>
+          </exclusion>
+          <!-- org.apache.logging.log4j:* conflict with log4j 1.2.17 -->
+          <exclusion>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <!-- End of Hive 2.3 exclusion -->
         </exclusions>
       </dependency>
       <dependency>
@@ -1640,6 +1693,17 @@
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-log4j12</artifactId>
           </exclusion>
+          <!-- Begin of Hive 2.3 exclusion -->
+          <!-- Hive removes the HBase Metastore; see HIVE-17234 -->
+          <exclusion>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-client</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>co.cask.tephra</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+          <!-- End of Hive 2.3 exclusion -->
         </exclusions>
       </dependency>
 
@@ -1697,6 +1761,22 @@
             <groupId>org.codehaus.groovy</groupId>
             <artifactId>groovy-all</artifactId>
           </exclusion>
+          <!-- Begin of Hive 2.3 exclusion -->
+          <!-- parquet-hadoop-bundle:1.8.1 conflict with 1.10.1 -->
+          <exclusion>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-hadoop-bundle</artifactId>
+          </exclusion>
+          <!-- Do not need Jasper, see HIVE-19799 -->
+          <exclusion>
+            <groupId>tomcat</groupId>
+            <artifactId>jasper-compiler</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>tomcat</groupId>
+            <artifactId>jasper-runtime</artifactId>
+          </exclusion>
+          <!-- End of Hive 2.3 exclusion -->
         </exclusions>
       </dependency>
 
@@ -1762,8 +1842,76 @@
             <groupId>org.codehaus.groovy</groupId>
             <artifactId>groovy-all</artifactId>
           </exclusion>
+          <!-- Begin of Hive 2.3 exclusion -->
+          <!-- Exclude log4j-slf4j-impl, otherwise throw NCDFE when starting spark-shell -->
+          <exclusion>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+          </exclusion>
+          <!-- End of Hive 2.3 exclusion -->
+        </exclusions>
+      </dependency>
+
+      <!-- hive-llap-common is needed when registering UDFs in Hive 2.3.
+         We add it here, otherwise -Phive-provided won't work. -->
+      <dependency>
+        <groupId>org.apache.hive</groupId>
+        <artifactId>hive-llap-common</artifactId>
+        <version>${hive23.version}</version>
+        <scope>${hive.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-common</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-serde</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
+      <!-- hive-llap-client is needed when run MapReduce test in Hive 2.3. -->
+      <dependency>
+        <groupId>org.apache.hive</groupId>
+        <artifactId>hive-llap-client</artifactId>
+        <version>${hive23.version}</version>
+        <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-common</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-serde</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-llap-common</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.curator</groupId>
+            <artifactId>curator-framework</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.curator</groupId>
+            <artifactId>apache-curator</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.zookeeper</groupId>
+            <artifactId>zookeeper</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
       <dependency>
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-core</artifactId>
@@ -2656,7 +2804,23 @@
         <hadoop.version>3.2.0</hadoop.version>
         <curator.version>2.13.0</curator.version>
         <zookeeper.version>3.4.13</zookeeper.version>
+        <hive.group>org.apache.hive</hive.group>
+        <hive.classifier>core</hive.classifier>
+        <hive.version>${hive23.version}</hive.version>
+        <hive.version.short>2.3.4</hive.version.short>
+        <hive.parquet.group>org.apache.parquet</hive.parquet.group>
+        <hive.parquet.version>1.8.1</hive.parquet.version>
+        <orc.classifier></orc.classifier>
+        <datanucleus-core.version>4.1.17</datanucleus-core.version>
       </properties>
+      <dependencies>
+        <!-- Both Hive and ORC need hive-storage-api, but it is excluded by orc-mapreduce -->
+        <dependency>
+          <groupId>org.apache.hive</groupId>
+          <artifactId>hive-storage-api</artifactId>
+          <version>2.6.0</version>
+        </dependency>
+      </dependencies>
     </profile>
 
     <profile>
diff --git a/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
index 9bfad1e83ee7..2f1925e69a33 100644
--- a/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
+++ b/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
@@ -19,7 +19,7 @@
 
 import java.math.BigDecimal;
 
-import org.apache.orc.storage.ql.exec.vector.*;
+import org.apache.hadoop.hive.ql.exec.vector.*;
 
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.Decimal;
diff --git a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 112dcb2cb238..85d61bccc8e3 100644
--- a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
-import org.apache.orc.storage.common.`type`.HiveDecimal
-import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
-import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder
-import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
-import org.apache.orc.storage.serde2.io.HiveDecimalWritable
+import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument}
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable
 
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types._
diff --git a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
index 68503aba22b4..c32f024476e6 100644
--- a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
+++ b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources.orc
 
 import java.sql.Date
 
-import org.apache.orc.storage.common.`type`.HiveDecimal
-import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch
-import org.apache.orc.storage.ql.io.sarg.{SearchArgument => OrcSearchArgument}
-import org.apache.orc.storage.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator}
-import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
+import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch
+import org.apache.hadoop.hive.ql.io.sarg.{SearchArgument => OrcSearchArgument}
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator}
+import org.apache.hadoop.hive.serde2.io.{DateWritable, HiveDecimalWritable}
 
 import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
 import org.apache.spark.sql.types.Decimal
diff --git a/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index e96c6fb7716c..1ed42f1c5f75 100644
--- a/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -23,7 +23,7 @@ import java.sql.{Date, Timestamp}
 
 import scala.collection.JavaConverters._
 
-import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
+import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument}
 
 import org.apache.spark.sql.{AnalysisException, Column, DataFrame}
 import org.apache.spark.sql.catalyst.dsl.expressions._
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 55afbe7c5d65..f627227aa038 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -208,6 +208,31 @@
         </plugins>
       </build>
     </profile>
+    <profile>
+      <id>hadoop-3.2</id>
+      <dependencies>
+        <dependency>
+          <groupId>${hive.group}</groupId>
+          <artifactId>hive-common</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>${hive.group}</groupId>
+          <artifactId>hive-serde</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>${hive.group}</groupId>
+          <artifactId>hive-shims</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hive</groupId>
+          <artifactId>hive-llap-common</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hive</groupId>
+          <artifactId>hive-llap-client</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 
   <build>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index c9fc3d4a02c4..be4a0c175b6d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import java.io.{InputStream, OutputStream}
+import java.lang.reflect.Method
 import java.rmi.server.UID
 
 import scala.collection.JavaConverters._
@@ -28,15 +29,13 @@ import com.google.common.base.Objects
 import org.apache.avro.Schema
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.ql.exec.{UDF, Utilities}
+import org.apache.hadoop.hive.ql.exec.UDF
 import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
 import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils}
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector
 import org.apache.hadoop.io.Writable
-import org.apache.hive.com.esotericsoftware.kryo.Kryo
-import org.apache.hive.com.esotericsoftware.kryo.io.{Input, Output}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.types.Decimal
@@ -146,34 +145,60 @@ private[hive] object HiveShim {
       case _ => false
     }
 
-    @transient
-    def deserializeObjectByKryo[T: ClassTag](
-        kryo: Kryo,
-        in: InputStream,
-        clazz: Class[_]): T = {
-      val inp = new Input(in)
-      val t: T = kryo.readObject(inp, clazz).asInstanceOf[T]
-      inp.close()
-      t
-    }
+    private lazy val serUtilClass =
+      Utils.classForName("org.apache.hadoop.hive.ql.exec.SerializationUtilities")
+    private lazy val utilClass = Utils.classForName("org.apache.hadoop.hive.ql.exec.Utilities")
+    private val deserializeMethodName = "deserializeObjectByKryo"
+    private val serializeMethodName = "serializeObjectByKryo"
 
-    @transient
-    def serializeObjectByKryo(
-        kryo: Kryo,
-        plan: Object,
-        out: OutputStream) {
-      val output: Output = new Output(out)
-      kryo.writeObject(output, plan)
-      output.close()
+    private def findMethod(klass: Class[_], name: String, args: Class[_]*): Method = {
+      val method = klass.getDeclaredMethod(name, args: _*)
+      method.setAccessible(true)
+      method
     }
 
     def deserializePlan[UDFType](is: java.io.InputStream, clazz: Class[_]): UDFType = {
-      deserializeObjectByKryo(Utilities.runtimeSerializationKryo.get(), is, clazz)
-        .asInstanceOf[UDFType]
+      if (HiveUtils.isHive23) {
+        val borrowKryo = serUtilClass.getMethod("borrowKryo")
+        val kryo = borrowKryo.invoke(serUtilClass)
+        val deserializeObjectByKryo = findMethod(serUtilClass, deserializeMethodName,
+          kryo.getClass.getSuperclass, classOf[InputStream], classOf[Class[_]])
+        try {
+          deserializeObjectByKryo.invoke(null, kryo, is, clazz).asInstanceOf[UDFType]
+        } finally {
+          serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo)
+        }
+      } else {
+        val runtimeSerializationKryo = utilClass.getField("runtimeSerializationKryo")
+        val threadLocalValue = runtimeSerializationKryo.get(utilClass)
+        val getMethod = threadLocalValue.getClass.getMethod("get")
+        val kryo = getMethod.invoke(threadLocalValue)
+        val deserializeObjectByKryo = findMethod(utilClass, deserializeMethodName,
+          kryo.getClass, classOf[InputStream], classOf[Class[_]])
+        deserializeObjectByKryo.invoke(null, kryo, is, clazz).asInstanceOf[UDFType]
+      }
     }
 
     def serializePlan(function: AnyRef, out: java.io.OutputStream): Unit = {
-      serializeObjectByKryo(Utilities.runtimeSerializationKryo.get(), function, out)
+      if (HiveUtils.isHive23) {
+        val borrowKryo = serUtilClass.getMethod("borrowKryo")
+        val kryo = borrowKryo.invoke(serUtilClass)
+        val serializeObjectByKryo = findMethod(serUtilClass, serializeMethodName,
+          kryo.getClass.getSuperclass, classOf[Object], classOf[OutputStream])
+        try {
+          serializeObjectByKryo.invoke(null, kryo, function, out)
+        } finally {
+          serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo)
+        }
+      } else {
+        val runtimeSerializationKryo = utilClass.getField("runtimeSerializationKryo")
+        val threadLocalValue = runtimeSerializationKryo.get(utilClass)
+        val getMethod = threadLocalValue.getClass.getMethod("get")
+        val kryo = getMethod.invoke(threadLocalValue)
+        val serializeObjectByKryo = findMethod(utilClass, serializeMethodName,
+          kryo.getClass, classOf[Object], classOf[OutputStream])
+        serializeObjectByKryo.invoke(null, kryo, function, out)
+      }
     }
 
     def writeExternal(out: java.io.ObjectOutput) {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 01a503db78dd..773bf312031a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.util.VersionInfo
+import org.apache.hive.common.util.HiveVersionInfo
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -54,8 +55,11 @@ private[spark] object HiveUtils extends Logging {
     sc
   }
 
+  private val hiveVersion = HiveVersionInfo.getVersion
+  val isHive23: Boolean = hiveVersion.startsWith("2.3")
+
   /** The version of hive used internally by Spark SQL. */
-  val builtinHiveVersion: String = "1.2.1"
+  val builtinHiveVersion: String = if (isHive23) hiveVersion else "1.2.1"
 
   val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 8132dee286b3..640cca0b24c7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -710,6 +710,8 @@ private[hive] class HiveClientImpl(
   /**
    * Execute the command using Hive and return the results as a sequence. Each element
    * in the sequence is one row.
+   * Since upgrading the built-in Hive to 2.3, hive-llap-client is needed when
+   * running MapReduce jobs with `runHive`.
    */
   protected def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = withHiveState {
     logDebug(s"Running hiveql '$cmd'")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 8ece4b5caef6..0938576a7147 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.hive
 
+import java.lang.{Boolean => JBoolean}
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
@@ -38,7 +39,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.hive.HiveShim._
 import org.apache.spark.sql.types._
-
+import org.apache.spark.util.Utils
 
 private[hive] case class HiveSimpleUDF(
     name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
@@ -336,8 +337,20 @@ private[hive] case class HiveUDAFFunction(
       funcWrapper.createFunction[AbstractGenericUDAFResolver]()
     }
 
-    val parameterInfo = new SimpleGenericUDAFParameterInfo(inputInspectors, false, false)
-    resolver.getEvaluator(parameterInfo)
+    val clazz = Utils.classForName(classOf[SimpleGenericUDAFParameterInfo].getName)
+    if (HiveUtils.isHive23) {
+      val ctor = clazz.getDeclaredConstructor(
+        classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE, JBoolean.TYPE)
+      val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE, JBoolean.FALSE)
+      val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo]
+      resolver.getEvaluator(parameterInfo)
+    } else {
+      val ctor = clazz.getDeclaredConstructor(
+        classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE)
+      val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE)
+      val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo]
+      resolver.getEvaluator(parameterInfo)
+    }
   }
 
   private case class HiveEvaluator(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
index a82576a233ac..dfac73ce62fa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.sql.hive.orc
 
+import java.lang.reflect.Method
+
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.datasources.orc.{OrcFilters => DatasourceOrcFilters}
 import org.apache.spark.sql.execution.datasources.orc.OrcFilters.buildTree
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 
@@ -57,22 +61,33 @@ import org.apache.spark.sql.types._
  * known to be convertible.
  */
 private[orc] object OrcFilters extends Logging {
+
+  private def findMethod(klass: Class[_], name: String, args: Class[_]*): Method = {
+    val method = klass.getMethod(name, args: _*)
+    method.setAccessible(true)
+    method
+  }
+
   def createFilter(schema: StructType, filters: Array[Filter]): Option[SearchArgument] = {
-    val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
-
-    // First, tries to convert each filter individually to see whether it's convertible, and then
-    // collect all convertible ones to build the final `SearchArgument`.
-    val convertibleFilters = for {
-      filter <- filters
-      _ <- buildSearchArgument(dataTypeMap, filter, newBuilder)
-    } yield filter
-
-    for {
-      // Combines all convertible filters using `And` to produce a single conjunction
-      conjunction <- buildTree(convertibleFilters)
-      // Then tries to build a single ORC `SearchArgument` for the conjunction predicate
-      builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
-    } yield builder.build()
+    if (HiveUtils.isHive23) {
+      DatasourceOrcFilters.createFilter(schema, filters).asInstanceOf[Option[SearchArgument]]
+    } else {
+      val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
+
+      // First, tries to convert each filter individually to see whether it's convertible, and then
+      // collect all convertible ones to build the final `SearchArgument`.
+      val convertibleFilters = for {
+        filter <- filters
+        _ <- buildSearchArgument(dataTypeMap, filter, newBuilder)
+      } yield filter
+
+      for {
+        // Combines all convertible filters using `And` to produce a single conjunction
+        conjunction <- buildTree(convertibleFilters)
+        // Then tries to build a single ORC `SearchArgument` for the conjunction predicate
+        builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
+      } yield builder.build()
+    }
   }
 
   private def buildSearchArgument(
@@ -160,31 +175,50 @@ private[orc] object OrcFilters extends Logging {
       // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
 
       case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startAnd().equals(attribute, value).end())
+        val bd = builder.startAnd()
+        val method = findMethod(bd.getClass, "equals", classOf[String], classOf[Object])
+        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
 
       case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startAnd().nullSafeEquals(attribute, value).end())
+        val bd = builder.startAnd()
+        val method = findMethod(bd.getClass, "nullSafeEquals", classOf[String], classOf[Object])
+        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
 
       case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startAnd().lessThan(attribute, value).end())
+        val bd = builder.startAnd()
+        val method = findMethod(bd.getClass, "lessThan", classOf[String], classOf[Object])
+        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
 
       case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startAnd().lessThanEquals(attribute, value).end())
+        val bd = builder.startAnd()
+        val method = findMethod(bd.getClass, "lessThanEquals", classOf[String], classOf[Object])
+        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
 
       case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startNot().lessThanEquals(attribute, value).end())
+        val bd = builder.startNot()
+        val method = findMethod(bd.getClass, "lessThanEquals", classOf[String], classOf[Object])
+        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
 
       case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startNot().lessThan(attribute, value).end())
+        val bd = builder.startNot()
+        val method = findMethod(bd.getClass, "lessThan", classOf[String], classOf[Object])
+        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
 
       case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startAnd().isNull(attribute).end())
+        val bd = builder.startAnd()
+        val method = findMethod(bd.getClass, "isNull", classOf[String])
+        Some(method.invoke(bd, attribute).asInstanceOf[Builder].end())
 
       case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startNot().isNull(attribute).end())
+        val bd = builder.startNot()
+        val method = findMethod(bd.getClass, "isNull", classOf[String])
+        Some(method.invoke(bd, attribute).asInstanceOf[Builder].end())
 
       case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
-        Some(builder.startAnd().in(attribute, values.map(_.asInstanceOf[AnyRef]): _*).end())
+        val bd = builder.startAnd()
+        val method = findMethod(bd.getClass, "in", classOf[String], classOf[Array[Object]])
+        Some(method.invoke(bd, attribute, values.map(_.asInstanceOf[AnyRef]))
+          .asInstanceOf[Builder].end())
 
       case _ => None
     }

From 52838e74afdd58a7c09707284b4e0232dc01ef26 Mon Sep 17 00:00:00 2001
From: LantaoJin <jinlantao@gmail.com>
Date: Mon, 8 Apr 2019 10:47:06 -0500
Subject: [PATCH 0866/1072] [SPARK-13704][CORE][YARN] Reduce rack resolution
 time

## What changes were proposed in this pull request?

When you submit a stage on a large cluster, rack resolving takes a long time when initializing TaskSetManager because a script is invoked to resolve the rack of each host, one by one.
Based on current implementation, it takes 30~40 seconds to resolve the racks in our 5000 nodes' cluster. After applied the patch, it decreased to less than 15 seconds.

YARN-9332 has added an interface to handle multiple hosts in one invocation to save time. But before upgrading to the newest Hadoop, we could construct the same tool in Spark to resolve this issue.

## How was this patch tested?

UT and manually testing on a 5000 node cluster.

Closes #24245 from squito/SPARK-13704_update.

Lead-authored-by: LantaoJin <jinlantao@gmail.com>
Co-authored-by: Imran Rashid <irashid@cloudera.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../spark/scheduler/TaskSchedulerImpl.scala   | 30 +++++--
 .../spark/scheduler/TaskSetManager.scala      | 33 +++++--
 .../spark/scheduler/TaskSetManagerSuite.scala | 76 ++++++++++++++--
 ...yPreferredContainerPlacementStrategy.scala |  5 +-
 .../spark/deploy/yarn/SparkRackResolver.scala | 88 +++++++++++++++++--
 .../spark/deploy/yarn/YarnAllocator.scala     |  2 +-
 .../spark/deploy/yarn/YarnRMClient.scala      |  2 +-
 .../scheduler/cluster/YarnScheduler.scala     | 20 ++---
 .../deploy/yarn/YarnAllocatorSuite.scala      |  5 +-
 9 files changed, 219 insertions(+), 42 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index bffa1ffc5d39..e401c395a048 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -158,6 +158,8 @@ private[spark] class TaskSchedulerImpl(
 
   private[scheduler] var barrierCoordinator: RpcEndpoint = null
 
+  protected val defaultRackValue: Option[String] = None
+
   private def maybeInitBarrierCoordinator(): Unit = {
     if (barrierCoordinator == null) {
       barrierCoordinator = new BarrierCoordinator(barrierSyncTimeout, sc.listenerBus,
@@ -394,9 +396,10 @@ private[spark] class TaskSchedulerImpl(
         executorIdToRunningTaskIds(o.executorId) = HashSet[Long]()
         newExecAvail = true
       }
-      for (rack <- getRackForHost(o.host)) {
-        hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += o.host
-      }
+    }
+    val hosts = offers.map(_.host).toSet.toSeq
+    for ((host, Some(rack)) <- hosts.zip(getRacksForHosts(hosts))) {
+      hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host
     }
 
     // Before making any offers, remove any nodes from the blacklist whose blacklist has expired. Do
@@ -830,8 +833,25 @@ private[spark] class TaskSchedulerImpl(
     blacklistTrackerOpt.map(_.nodeBlacklist()).getOrElse(Set.empty)
   }
 
-  // By default, rack is unknown
-  def getRackForHost(value: String): Option[String] = None
+  /**
+   * Get the rack for one host.
+   *
+   * Note that [[getRacksForHosts]] should be preferred when possible as that can be much
+   * more efficient.
+   */
+  def getRackForHost(host: String): Option[String] = {
+    getRacksForHosts(Seq(host)).head
+  }
+
+  /**
+   * Get racks for multiple hosts.
+   *
+   * The returned Sequence will be the same length as the hosts argument and can be zipped
+   * together with the hosts argument.
+   */
+  def getRacksForHosts(hosts: Seq[String]): Seq[Option[String]] = {
+    hosts.map(_ => defaultRackValue)
+  }
 
   private def waitBackendReady(): Unit = {
     if (backend.isReady) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 3977c0bafa57..144422022c22 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -186,8 +186,24 @@ private[spark] class TaskSetManager(
 
   // Add all our tasks to the pending lists. We do this in reverse order
   // of task index so that tasks with low indices get launched first.
-  for (i <- (0 until numTasks).reverse) {
-    addPendingTask(i)
+  addPendingTasks()
+
+  private def addPendingTasks(): Unit = {
+    val (_, duration) = Utils.timeTakenMs {
+      for (i <- (0 until numTasks).reverse) {
+        addPendingTask(i, resolveRacks = false)
+      }
+      // Resolve the rack for each host. This can be slow, so de-dupe the list of hosts,
+      // and assign the rack to all relevant task indices.
+      val (hosts, indicesForHosts) = pendingTasksForHost.toSeq.unzip
+      val racks = sched.getRacksForHosts(hosts)
+      racks.zip(indicesForHosts).foreach {
+        case (Some(rack), indices) =>
+          pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer) ++= indices
+        case (None, _) => // no rack, nothing to do
+      }
+    }
+    logDebug(s"Adding pending tasks took $duration ms")
   }
 
   /**
@@ -214,7 +230,9 @@ private[spark] class TaskSetManager(
   private[scheduler] var emittedTaskSizeWarning = false
 
   /** Add a task to all the pending-task lists that it should be on. */
-  private[spark] def addPendingTask(index: Int) {
+  private[spark] def addPendingTask(
+      index: Int,
+      resolveRacks: Boolean = true): Unit = {
     for (loc <- tasks(index).preferredLocations) {
       loc match {
         case e: ExecutorCacheTaskLocation =>
@@ -234,8 +252,11 @@ private[spark] class TaskSetManager(
         case _ =>
       }
       pendingTasksForHost.getOrElseUpdate(loc.host, new ArrayBuffer) += index
-      for (rack <- sched.getRackForHost(loc.host)) {
-        pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer) += index
+
+      if (resolveRacks) {
+        sched.getRackForHost(loc.host).foreach { rack =>
+          pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer) += index
+        }
       }
     }
 
@@ -331,7 +352,7 @@ private[spark] class TaskSetManager(
         val executors = prefs.flatMap(_ match {
           case e: ExecutorCacheTaskLocation => Some(e.executorId)
           case _ => None
-        });
+        })
         if (executors.contains(execId)) {
           speculatableTasks -= index
           return Some((index, TaskLocality.PROCESS_LOCAL))
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index ad03194fe4c3..79160d05b3e6 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -22,8 +22,8 @@ import java.util.{Properties, Random}
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import org.mockito.ArgumentMatchers.{any, anyInt, anyString}
-import org.mockito.Mockito.{mock, never, spy, times, verify, when}
+import org.mockito.ArgumentMatchers.{any, anyBoolean, anyInt, anyString}
+import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 
 import org.apache.spark._
@@ -68,17 +68,27 @@ class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
 // Get the rack for a given host
 object FakeRackUtil {
   private val hostToRack = new mutable.HashMap[String, String]()
+  var numBatchInvocation = 0
+  var numSingleHostInvocation = 0
 
   def cleanUp() {
     hostToRack.clear()
+    numBatchInvocation = 0
+    numSingleHostInvocation = 0
   }
 
   def assignHostToRack(host: String, rack: String) {
     hostToRack(host) = rack
   }
 
-  def getRackForHost(host: String): Option[String] = {
-    hostToRack.get(host)
+  def getRacksForHosts(hosts: Seq[String]): Seq[Option[String]] = {
+    assert(hosts.toSet.size == hosts.size) // no dups in hosts
+    if (hosts.nonEmpty && hosts.length != 1) {
+      numBatchInvocation += 1
+    } else if (hosts.length == 1) {
+      numSingleHostInvocation += 1
+    }
+    hosts.map(hostToRack.get(_))
   }
 }
 
@@ -98,6 +108,9 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
   val speculativeTasks = new ArrayBuffer[Int]
 
   val executors = new mutable.HashMap[String, String]
+
+  // this must be initialized before addExecutor
+  override val defaultRackValue: Option[String] = Some("default")
   for ((execId, host) <- liveExecutors) {
     addExecutor(execId, host)
   }
@@ -143,8 +156,9 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
     }
   }
 
-
-  override def getRackForHost(value: String): Option[String] = FakeRackUtil.getRackForHost(value)
+  override def getRacksForHosts(hosts: Seq[String]): Seq[Option[String]] = {
+    FakeRackUtil.getRacksForHosts(hosts)
+  }
 }
 
 /**
@@ -1311,7 +1325,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     val taskDesc = taskSetManagerSpy.resourceOffer(exec, host, TaskLocality.ANY)
 
     // Assert the task has been black listed on the executor it was last executed on.
-    when(taskSetManagerSpy.addPendingTask(anyInt())).thenAnswer(
+    when(taskSetManagerSpy.addPendingTask(anyInt(), anyBoolean())).thenAnswer(
       (invocationOnMock: InvocationOnMock) => {
         val task: Int = invocationOnMock.getArgument(0)
         assert(taskSetManager.taskSetBlacklistHelperOpt.get.
@@ -1323,7 +1337,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     val e = new ExceptionFailure("a", "b", Array(), "c", None)
     taskSetManagerSpy.handleFailedTask(taskDesc.get.taskId, TaskState.FAILED, e)
 
-    verify(taskSetManagerSpy, times(1)).addPendingTask(anyInt())
+    verify(taskSetManagerSpy, times(1)).addPendingTask(0, false)
   }
 
   test("SPARK-21563 context's added jars shouldn't change mid-TaskSet") {
@@ -1595,4 +1609,50 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     verify(sched.dagScheduler).taskEnded(manager.tasks(3), Success, result.value(),
       result.accumUpdates, info3)
   }
+
+  test("SPARK-13704 Rack Resolution is done with a batch of de-duped hosts") {
+    val conf = new SparkConf()
+      .set(config.LOCALITY_WAIT, 0L)
+      .set(config.LOCALITY_WAIT_RACK, 1L)
+    sc = new SparkContext("local", "test", conf)
+    // Create a cluster with 20 racks, with hosts spread out among them
+    val execAndHost = (0 to 199).map { i =>
+      FakeRackUtil.assignHostToRack("host" + i, "rack" + (i % 20))
+      ("exec" + i, "host" + i)
+    }
+    sched = new FakeTaskScheduler(sc, execAndHost: _*)
+    // make a taskset with preferred locations on the first 100 hosts in our cluster
+    val locations = new ArrayBuffer[Seq[TaskLocation]]()
+    for (i <- 0 to 99) {
+      locations += Seq(TaskLocation("host" + i))
+    }
+    val taskSet = FakeTask.createTaskSet(100, locations: _*)
+    val clock = new ManualClock
+    // make sure we only do one rack resolution call, for the entire batch of hosts, as this
+    // can be expensive.  The FakeTaskScheduler calls rack resolution more than the real one
+    // -- that is outside of the scope of this test, we just want to check the task set manager.
+    FakeRackUtil.numBatchInvocation = 0
+    FakeRackUtil.numSingleHostInvocation = 0
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
+    assert(FakeRackUtil.numBatchInvocation === 1)
+    assert(FakeRackUtil.numSingleHostInvocation === 0)
+    // with rack locality, reject an offer on a host with an unknown rack
+    assert(manager.resourceOffer("otherExec", "otherHost", TaskLocality.RACK_LOCAL).isEmpty)
+    (0 until 20).foreach { rackIdx =>
+      (0 until 5).foreach { offerIdx =>
+        // if we offer hosts which are not in preferred locations,
+        // we'll reject them at NODE_LOCAL level,
+        // but accept them at RACK_LOCAL level if they're on OK racks
+        val hostIdx = 100 + rackIdx
+        assert(manager.resourceOffer("exec" + hostIdx, "host" + hostIdx, TaskLocality.NODE_LOCAL)
+          .isEmpty)
+        assert(manager.resourceOffer("exec" + hostIdx, "host" + hostIdx, TaskLocality.RACK_LOCAL)
+          .isDefined)
+      }
+    }
+    // check no more expensive calls to the rack resolution.  manager.resourceOffer() will call
+    // the single-host resolution, but the real rack resolution would have cached all hosts
+    // by that point.
+    assert(FakeRackUtil.numBatchInvocation === 1)
+  }
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
index 0a7a16f468fb..2288bb55d8b4 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
@@ -138,9 +138,8 @@ private[yarn] class LocalityPreferredContainerPlacementStrategy(
         // Only filter out the ratio which is larger than 0, which means the current host can
         // still be allocated with new container request.
         val hosts = preferredLocalityRatio.filter(_._2 > 0).keys.toArray
-        val racks = hosts.map { h =>
-          resolver.resolve(yarnConf, h)
-        }.toSet
+        val racks = resolver.resolve(hosts).map(_.getNetworkLocation)
+          .filter(_ != null).toSet
         containerLocalityPreferences += ContainerLocalityPreferences(hosts, racks.toArray)
 
         // Minus 1 each time when the host is used. When the current ratio is 0,
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
index c711d088f211..cab32724e13a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
@@ -17,24 +17,100 @@
 
 package org.apache.spark.deploy.yarn
 
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+import com.google.common.base.Strings
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic
+import org.apache.hadoop.net._
+import org.apache.hadoop.util.ReflectionUtils
 import org.apache.hadoop.yarn.util.RackResolver
 import org.apache.log4j.{Level, Logger}
 
+import org.apache.spark.internal.Logging
+
 /**
- * Wrapper around YARN's [[RackResolver]]. This allows Spark tests to easily override the
- * default behavior, since YARN's class self-initializes the first time it's called, and
- * future calls all use the initial configuration.
+ * Re-implement YARN's [[RackResolver]] for hadoop releases without YARN-9332.
+ * This also allows Spark tests to easily override the default behavior, since YARN's class
+ * self-initializes the first time it's called, and future calls all use the initial configuration.
  */
-private[yarn] class SparkRackResolver {
+private[spark] class SparkRackResolver(conf: Configuration) extends Logging {
 
   // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
   if (Logger.getLogger(classOf[RackResolver]).getLevel == null) {
     Logger.getLogger(classOf[RackResolver]).setLevel(Level.WARN)
   }
 
-  def resolve(conf: Configuration, hostName: String): String = {
-    RackResolver.resolve(conf, hostName).getNetworkLocation()
+  private val dnsToSwitchMapping: DNSToSwitchMapping = {
+    val dnsToSwitchMappingClass =
+      conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
+        classOf[ScriptBasedMapping], classOf[DNSToSwitchMapping])
+    ReflectionUtils.newInstance(dnsToSwitchMappingClass, conf)
+        .asInstanceOf[DNSToSwitchMapping] match {
+      case c: CachedDNSToSwitchMapping => c
+      case o => new CachedDNSToSwitchMapping(o)
+    }
+  }
+
+  def resolve(hostName: String): String = {
+    coreResolve(Seq(hostName)).head.getNetworkLocation
+  }
+
+  /**
+   * Added in SPARK-13704.
+   * This should be changed to `RackResolver.resolve(conf, hostNames)`
+   * in hadoop releases with YARN-9332.
+   */
+  def resolve(hostNames: Seq[String]): Seq[Node] = {
+    coreResolve(hostNames)
+  }
+
+  private def coreResolve(hostNames: Seq[String]): Seq[Node] = {
+    val nodes = new ArrayBuffer[Node]
+    // dnsToSwitchMapping is thread-safe
+    val rNameList = dnsToSwitchMapping.resolve(hostNames.toList.asJava).asScala
+    if (rNameList == null || rNameList.isEmpty) {
+      hostNames.foreach(nodes += new NodeBase(_, NetworkTopology.DEFAULT_RACK))
+      logInfo(s"Got an error when resolving hostNames. " +
+        s"Falling back to ${NetworkTopology.DEFAULT_RACK} for all")
+    } else {
+      for ((hostName, rName) <- hostNames.zip(rNameList)) {
+        if (Strings.isNullOrEmpty(rName)) {
+          nodes += new NodeBase(hostName, NetworkTopology.DEFAULT_RACK)
+          logDebug(s"Could not resolve $hostName. " +
+            s"Falling back to ${NetworkTopology.DEFAULT_RACK}")
+        } else {
+          nodes += new NodeBase(hostName, rName)
+        }
+      }
+    }
+    nodes.toList
+  }
+}
+
+/**
+ * Utility to resolve the rack for hosts in an efficient manner.
+ * It will cache the rack for individual hosts to avoid
+ * repeatedly performing the same expensive lookup.
+ */
+object SparkRackResolver extends Logging {
+  @volatile private var instance: SparkRackResolver = _
+
+  /**
+   * It will return the static resolver instance.  If there is already an instance, the passed
+   * conf is entirely ignored.  If there is not a shared instance, it will create one with the
+   * given conf.
+   */
+  def get(conf: Configuration): SparkRackResolver = {
+    if (instance == null) {
+      synchronized {
+        if (instance == null) {
+          instance = new SparkRackResolver(conf)
+        }
+      }
+    }
+    instance
   }
 
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 5d939cfd41f9..1dc9d49f17a1 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -432,7 +432,7 @@ private[yarn] class YarnAllocator(
         override def run(): Unit = {
           try {
             for (allocatedContainer <- remainingAfterHostMatches) {
-              val rack = resolver.resolve(conf, allocatedContainer.getNodeId.getHost)
+              val rack = resolver.resolve(allocatedContainer.getNodeId.getHost)
               matchContainerToRequest(allocatedContainer, rack, containersToUse,
                 remainingAfterRackMatches)
             }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
index cf16edf16c03..7c67493c3316 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -83,7 +83,7 @@ private[spark] class YarnRMClient extends Logging {
       localResources: Map[String, LocalResource]): YarnAllocator = {
     require(registered, "Must register AM before creating allocator.")
     new YarnAllocator(driverUrl, driverRef, conf, sparkConf, amClient, appAttemptId, securityMgr,
-      localResources, new SparkRackResolver())
+      localResources, SparkRackResolver.get(conf))
   }
 
   /**
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala
index 029382133ddf..d466ed77a929 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala
@@ -17,23 +17,23 @@
 
 package org.apache.spark.scheduler.cluster
 
-import org.apache.hadoop.yarn.util.RackResolver
-import org.apache.log4j.{Level, Logger}
+import org.apache.hadoop.net.NetworkTopology
 
 import org.apache.spark._
+import org.apache.spark.deploy.yarn.SparkRackResolver
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.Utils
 
 private[spark] class YarnScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
 
-  // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
-  if (Logger.getLogger(classOf[RackResolver]).getLevel == null) {
-    Logger.getLogger(classOf[RackResolver]).setLevel(Level.WARN)
-  }
+  override val defaultRackValue: Option[String] = Some(NetworkTopology.DEFAULT_RACK)
+
+  private[spark] val resolver = SparkRackResolver.get(sc.hadoopConfiguration)
 
-  // By default, rack is unknown
-  override def getRackForHost(hostPort: String): Option[String] = {
-    val host = Utils.parseHostPort(hostPort)._1
-    Option(RackResolver.resolve(sc.hadoopConfiguration, host).getNetworkLocation)
+  override def getRacksForHosts(hostPorts: Seq[String]): Seq[Option[String]] = {
+    val hosts = hostPorts.map(Utils.parseHostPort(_)._1)
+    resolver.resolve(hosts).map { node =>
+      Option(node.getNetworkLocation)
+    }
   }
 }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index 42b59663af0b..59291af72eac 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -31,6 +31,7 @@ import org.mockito.Mockito._
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.config._
@@ -38,9 +39,9 @@ import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.SplitInfo
 import org.apache.spark.util.ManualClock
 
-class MockResolver extends SparkRackResolver {
+class MockResolver extends SparkRackResolver(SparkHadoopUtil.get.conf) {
 
-  override def resolve(conf: Configuration, hostName: String): String = {
+  override def resolve(hostName: String): String = {
     if (hostName == "host3") "/rack2" else "/rack1"
   }
 

From 00241733a6d7761a4f6e64442319b11dd9d2b57b Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 8 Apr 2019 09:53:00 -0700
Subject: [PATCH 0867/1072] [SPARK-27405][SQL][TEST] Restrict the range of
 generated random timestamps

## What changes were proposed in this pull request?

In the PR, I propose to restrict the range of random timestamp literals generated in `LiteralGenerator. timestampLiteralGen`. The generator creates instances of `java.sql.Timestamp` by passing milliseconds since epoch as `Long` type. Converting the milliseconds to microseconds can cause arithmetic overflow of Long type because Catalyst's Timestamp type stores microseconds since epoch in `Long` type internally as well. Proposed interval of random milliseconds is `[Long.MinValue / 1000, Long.MaxValue / 1000]`.

For example, generated timestamp `new java.sql.Timestamp(-3948373668011580000)` causes `Long` overflow at the method:
```scala
  def fromJavaTimestamp(t: Timestamp): SQLTimestamp = {
  ...
      MILLISECONDS.toMicros(t.getTime()) + NANOSECONDS.toMicros(t.getNanos()) % NANOS_PER_MICROS
  ...
  }
```
because `t.getTime()` returns `-3948373668011580000` which is multiplied by `1000` at `MILLISECONDS.toMicros`, and the result `-3948373668011580000000` is less than `Long.MinValue`.

## How was this patch tested?

By `DateExpressionsSuite` in the PR https://github.com/apache/spark/pull/24311

Closes #24316 from MaxGekk/random-timestamps-gen.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/expressions/LiteralGenerator.scala | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
index 032aec01782f..be5fdb5b42ea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
@@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp}
 
 import org.scalacheck.{Arbitrary, Gen}
 
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -102,8 +103,16 @@ object LiteralGenerator {
   lazy val dateLiteralGen: Gen[Literal] =
     for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType)
 
-  lazy val timestampLiteralGen: Gen[Literal] =
-    for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType)
+  lazy val timestampLiteralGen: Gen[Literal] = {
+    // Catalyst's Timestamp type stores number of microseconds since epoch in
+    // a variable of Long type. To prevent arithmetic overflow of Long on
+    // conversion from milliseconds to microseconds, the range of random milliseconds
+    // since epoch is restricted here.
+    val maxMillis = Long.MaxValue / DateTimeUtils.MICROS_PER_MILLIS
+    val minMillis = Long.MinValue / DateTimeUtils.MICROS_PER_MILLIS
+    for { millis <- Gen.choose(minMillis, maxMillis) }
+      yield Literal.create(new Timestamp(millis), TimestampType)
+  }
 
   lazy val calendarIntervalLiterGen: Gen[Literal] =
     for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary}

From d50603a37c478b299e5bd5c1e04cf2f65e108d1e Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 8 Apr 2019 10:15:22 -0700
Subject: [PATCH 0868/1072] [SPARK-27271][SQL] Migrate Text to File Data Source
 V2

## What changes were proposed in this pull request?

Migrate Text source to File Data Source V2

## How was this patch tested?

Unit test

Closes #24207 from gengliangwang/textV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  2 +-
 ...pache.spark.sql.sources.DataSourceRegister |  2 +-
 .../datasources/text/TextFileFormat.scala     | 29 --------
 .../datasources/text/TextOptions.scala        |  2 +-
 .../datasources/text/TextOutputWriter.scala   | 54 ++++++++++++++
 .../v2/text/TextDataSourceV2.scala            | 44 +++++++++++
 .../v2/text/TextPartitionReaderFactory.scala  | 73 +++++++++++++++++++
 .../datasources/v2/text/TextScan.scala        | 61 ++++++++++++++++
 .../datasources/v2/text/TextScanBuilder.scala | 38 ++++++++++
 .../datasources/v2/text/TextTable.scala       | 48 ++++++++++++
 .../v2/text/TextWriteBuilder.scala            | 70 ++++++++++++++++++
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  2 +-
 12 files changed, 392 insertions(+), 33 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOutputWriter.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWriteBuilder.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 71a49c2657c9..157be1b0f594 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1488,7 +1488,7 @@ object SQLConf {
       " register class names for which data source V2 write paths are disabled. Writes from these" +
       " sources will fall back to the V1 sources.")
     .stringConf
-    .createWithDefault("csv,orc")
+    .createWithDefault("csv,orc,text")
 
   val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index b68618755258..be9cb8153f25 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -4,7 +4,7 @@ org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
-org.apache.spark.sql.execution.datasources.text.TextFileFormat
+org.apache.spark.sql.execution.datasources.v2.text.TextDataSourceV2
 org.apache.spark.sql.execution.streaming.ConsoleSinkProvider
 org.apache.spark.sql.execution.streaming.sources.RateStreamProvider
 org.apache.spark.sql.execution.streaming.sources.TextSocketSourceProvider
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index 60756e7b1da9..d8811c708a6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.text
 
-import java.io.OutputStream
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
@@ -143,30 +141,3 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
     dataType == StringType
 }
 
-class TextOutputWriter(
-    path: String,
-    dataSchema: StructType,
-    lineSeparator: Array[Byte],
-    context: TaskAttemptContext)
-  extends OutputWriter {
-
-  private var outputStream: Option[OutputStream] = None
-
-  override def write(row: InternalRow): Unit = {
-    val os = outputStream.getOrElse {
-      val newStream = CodecStreams.createOutputStream(context, new Path(path))
-      outputStream = Some(newStream)
-      newStream
-    }
-
-    if (!row.isNullAt(0)) {
-      val utf8string = row.getUTF8String(0)
-      utf8string.writeTo(os)
-    }
-    os.write(lineSeparator)
-  }
-
-  override def close(): Unit = {
-    outputStream.foreach(_.close())
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
index e4e201995faa..ef132162750b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs
 /**
  * Options for the Text data source.
  */
-private[text] class TextOptions(@transient private val parameters: CaseInsensitiveMap[String])
+class TextOptions(@transient private val parameters: CaseInsensitiveMap[String])
   extends Serializable {
 
   import TextOptions._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOutputWriter.scala
new file mode 100644
index 000000000000..faf6e573105f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOutputWriter.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.text
+
+import java.io.OutputStream
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.TaskAttemptContext
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter}
+import org.apache.spark.sql.types.StructType
+
+class TextOutputWriter(
+    path: String,
+    dataSchema: StructType,
+    lineSeparator: Array[Byte],
+    context: TaskAttemptContext)
+  extends OutputWriter {
+
+  private var outputStream: Option[OutputStream] = None
+
+  override def write(row: InternalRow): Unit = {
+    val os = outputStream.getOrElse {
+      val newStream = CodecStreams.createOutputStream(context, new Path(path))
+      outputStream = Some(newStream)
+      newStream
+    }
+
+    if (!row.isNullAt(0)) {
+      val utf8string = row.getUTF8String(0)
+      utf8string.writeTo(os)
+    }
+    os.write(lineSeparator)
+  }
+
+  override def close(): Unit = {
+    outputStream.foreach(_.close())
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
new file mode 100644
index 000000000000..f6aa1e9c898b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.text
+
+import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.execution.datasources.text.TextFileFormat
+import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class TextDataSourceV2 extends FileDataSourceV2 {
+
+  override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[TextFileFormat]
+
+  override def shortName(): String = "text"
+
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
+    val paths = getPaths(options)
+    val tableName = getTableName(paths)
+    TextTable(tableName, sparkSession, options, paths, None, fallbackFileFormat)
+  }
+
+  override def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
+    val paths = getPaths(options)
+    val tableName = getTableName(paths)
+    TextTable(tableName, sparkSession, options, paths, Some(schema), fallbackFileFormat)
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
new file mode 100644
index 000000000000..878888711188
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.text
+
+import org.apache.spark.TaskContext
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
+import org.apache.spark.sql.execution.datasources.{HadoopFileLinesReader, HadoopFileWholeTextReader, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.text.TextOptions
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.SerializableConfiguration
+
+/**
+ * A factory used to create Text readers.
+ *
+ * @param sqlConf SQL configuration.
+ * @param broadcastedConf Broadcasted serializable Hadoop Configuration.
+ * @param readDataSchema Required schema in the batch scan.
+ * @param partitionSchema Schema of partitions.
+ * @param textOptions Options for reading a text file.
+ * */
+case class TextPartitionReaderFactory(
+    sqlConf: SQLConf,
+    broadcastedConf: Broadcast[SerializableConfiguration],
+    readDataSchema: StructType,
+    partitionSchema: StructType,
+    textOptions: TextOptions) extends FilePartitionReaderFactory {
+
+  override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
+    val confValue = broadcastedConf.value.value
+    val reader = if (!textOptions.wholeText) {
+      new HadoopFileLinesReader(file, textOptions.lineSeparatorInRead, confValue)
+    } else {
+      new HadoopFileWholeTextReader(file, confValue)
+    }
+    Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit](_ => reader.close()))
+    val iter = if (readDataSchema.isEmpty) {
+      val emptyUnsafeRow = new UnsafeRow(0)
+      reader.map(_ => emptyUnsafeRow)
+    } else {
+      val unsafeRowWriter = new UnsafeRowWriter(1)
+
+      reader.map { line =>
+        // Writes to an UnsafeRow directly
+        unsafeRowWriter.reset()
+        unsafeRowWriter.write(0, line.getBytes, 0, line.getLength)
+        unsafeRowWriter.getRow()
+      }
+    }
+    val fileReader = new PartitionReaderFromIterator[InternalRow](iter)
+    new PartitionReaderWithPartitionValues(fileReader, readDataSchema,
+      partitionSchema, file.partitionValues)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
new file mode 100644
index 000000000000..202723db2742
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.text
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.text.TextOptions
+import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
+import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.SerializableConfiguration
+
+case class TextScan(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    readDataSchema: StructType,
+    readPartitionSchema: StructType,
+    options: CaseInsensitiveStringMap)
+  extends TextBasedFileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema, options) {
+
+  private val optionsAsScala = options.asScala.toMap
+  private lazy val textOptions: TextOptions = new TextOptions(optionsAsScala)
+
+  override def isSplitable(path: Path): Boolean = {
+    super.isSplitable(path) && !textOptions.wholeText
+  }
+
+  override def createReaderFactory(): PartitionReaderFactory = {
+    assert(
+      readDataSchema.length <= 1,
+      "Text data source only produces a single data column named \"value\".")
+    val hadoopConf = {
+      val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+      // Hadoop Configurations are case sensitive.
+      sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
+    }
+    val broadcastedConf = sparkSession.sparkContext.broadcast(
+      new SerializableConfiguration(hadoopConf))
+    TextPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf, readDataSchema,
+      readPartitionSchema, textOptions)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
new file mode 100644
index 000000000000..fbe5e1688b83
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2.text
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
+import org.apache.spark.sql.sources.v2.reader.Scan
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+case class TextScanBuilder(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    schema: StructType,
+    dataSchema: StructType,
+    options: CaseInsensitiveStringMap)
+  extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
+
+  override def build(): Scan = {
+    TextScan(sparkSession, fileIndex, readDataSchema(), readPartitionSchema(), options)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
new file mode 100644
index 000000000000..b8cb61a6c646
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.text
+
+import org.apache.hadoop.fs.FileStatus
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.execution.datasources.v2.FileTable
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.types.{DataType, StringType, StructField, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+case class TextTable(
+    name: String,
+    sparkSession: SparkSession,
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    userSpecifiedSchema: Option[StructType],
+    fallbackFileFormat: Class[_ <: FileFormat])
+  extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
+  override def newScanBuilder(options: CaseInsensitiveStringMap): TextScanBuilder =
+    TextScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+
+  override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
+    Some(StructType(Seq(StructField("value", StringType))))
+
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
+    new TextWriteBuilder(options, paths, formatName, supportsDataType)
+
+  override def supportsDataType(dataType: DataType): Boolean = dataType == StringType
+
+  override def formatName: String = "Text"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWriteBuilder.scala
new file mode 100644
index 000000000000..c00dbc20be64
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWriteBuilder.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.text
+
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
+import org.apache.spark.sql.execution.datasources.text.{TextOptions, TextOutputWriter}
+import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class TextWriteBuilder(
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    formatName: String,
+    supportsDataType: DataType => Boolean)
+  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
+  private def verifySchema(schema: StructType): Unit = {
+    if (schema.size != 1) {
+      throw new AnalysisException(
+        s"Text data source supports only a single column, and you have ${schema.size} columns.")
+    }
+  }
+
+  override def prepareWrite(
+      sqlConf: SQLConf,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory = {
+    verifySchema(dataSchema)
+
+    val textOptions = new TextOptions(options)
+    val conf = job.getConfiguration
+
+    textOptions.compressionCodec.foreach { codec =>
+      CompressionCodecs.setCodecConfiguration(conf, codec)
+    }
+
+    new OutputWriterFactory {
+      override def newInstance(
+          path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): OutputWriter = {
+        new TextOutputWriter(path, dataSchema, textOptions.lineSeparatorInWrite, context)
+      }
+
+      override def getFileExtension(context: TaskAttemptContext): String = {
+        ".txt" + CodecStreams.getCompressionExtension(context)
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 3402ed240f8b..c9ff4fc0777e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -930,7 +930,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       }
       assert(e.getMessage.contains(
         "The format of the existing table default.appendTextToJson is `JsonFileFormat`. " +
-        "It doesn't match the specified format `TextFileFormat`"))
+        "It doesn't match the specified format"))
     }
   }
 

From dfa2328e2806edf6f079dcbf74f8ece764c1f130 Mon Sep 17 00:00:00 2001
From: Rafael Renaudin <renaudin.rafael@gmail.com>
Date: Mon, 8 Apr 2019 20:56:53 -0500
Subject: [PATCH 0869/1072] [SPARK-26881][MLLIB] Heuristic for tree aggregate
 depth

Changes proposed:
- Adding method to compute treeAggregate depth required to avoid exceeding driver max result size (first commit)
- Using it in the computation  of grammian of RowMatrix (second commit)

Tests:
- Unit Test wise, one unit test checking the behavior of the depth computation method
- Tested at scale on hadoop cluster by doing PCA on a large dataset (needed depth 3 to succeed)

Debatable choice:

I'm not sure if RDD API is the right place to put the depth computation method. The advantage of it is that it allows to access driver max result size, and rdd number of partitions, to set default arguments for the method. Semantically, such a method might belong to something like org.apache.spark.util.Utils though.

Closes #23983 from gagafunctor/Heuristic_for_treeAggregate_depth.

Authored-by: Rafael Renaudin <renaudin.rafael@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../mllib/linalg/distributed/RowMatrix.scala  | 34 ++++++++++++++++++-
 .../linalg/distributed/RowMatrixSuite.scala   | 20 +++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 56caeac05c0c..43f48befd014 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -27,6 +27,7 @@ import breeze.numerics.{sqrt => brzSqrt}
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary}
 import org.apache.spark.rdd.RDD
@@ -117,6 +118,7 @@ class RowMatrix @Since("1.0.0") (
     // Computes n*(n+1)/2, avoiding overflow in the multiplication.
     // This succeeds when n <= 65535, which is checked above
     val nt = if (n % 2 == 0) ((n / 2) * (n + 1)) else (n * ((n + 1) / 2))
+    val gramianSizeInBytes = nt * 8L
 
     // Compute the upper triangular part of the gram matrix.
     val GU = rows.treeAggregate(null.asInstanceOf[BDV[Double]])(
@@ -136,7 +138,8 @@ class RowMatrix @Since("1.0.0") (
           U1
         } else {
           U1 += U2
-        }
+        },
+      depth = getTreeAggregateIdealDepth(gramianSizeInBytes)
     )
 
     RowMatrix.triuToFull(n, GU.data)
@@ -775,6 +778,35 @@ class RowMatrix @Since("1.0.0") (
         s"The number of rows $m is different from what specified or previously computed: ${nRows}.")
     }
   }
+
+  /**
+   * Computing desired tree aggregate depth necessary to avoid exceeding
+   * driver.MaxResultSize during aggregation.
+   * Based on the formulae: (numPartitions)^(1/depth) * objectSize <= DriverMaxResultSize
+   * @param aggregatedObjectSizeInBytes the size, in megabytes, of the object being tree aggregated
+   */
+  private[spark] def getTreeAggregateIdealDepth(aggregatedObjectSizeInBytes: Long) = {
+    require(aggregatedObjectSizeInBytes > 0,
+      "Cannot compute aggregate depth heuristic based on a zero-size object to aggregate")
+
+    val maxDriverResultSizeInBytes = rows.conf.get[Long](MAX_RESULT_SIZE)
+
+    require(maxDriverResultSizeInBytes > aggregatedObjectSizeInBytes,
+      s"Cannot aggregate object of size $aggregatedObjectSizeInBytes Bytes, "
+        + s"as it's bigger than maxResultSize ($maxDriverResultSizeInBytes Bytes)")
+
+    val numerator = math.log(rows.getNumPartitions)
+    val denominator = math.log(maxDriverResultSizeInBytes) - math.log(aggregatedObjectSizeInBytes)
+    val desiredTreeDepth = math.ceil(numerator / denominator)
+
+    if (desiredTreeDepth > 4) {
+      logWarning(
+        s"Desired tree depth for treeAggregation is big ($desiredTreeDepth)."
+          + "Consider increasing driver max result size or reducing number of partitions")
+    }
+
+    math.min(math.max(1, desiredTreeDepth), 10).toInt
+  }
 }
 
 @Since("1.0.0")
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index a4ca4f0a80fa..a0c4c68243e6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -101,6 +101,26 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
     }
   }
 
+  test("getTreeAggregateIdealDepth") {
+    val nbPartitions = 100
+    val vectors = sc.emptyRDD[Vector]
+      .repartition(nbPartitions)
+    val rowMat = new RowMatrix(vectors)
+
+    assert(rowMat.getTreeAggregateIdealDepth(100 * 1024 * 1024) === 2)
+    assert(rowMat.getTreeAggregateIdealDepth(110 * 1024 * 1024) === 3)
+    assert(rowMat.getTreeAggregateIdealDepth(700 * 1024 * 1024) === 10)
+
+    val zeroSizeException = intercept[Exception]{
+      rowMat.getTreeAggregateIdealDepth(0)
+    }
+    assert(zeroSizeException.getMessage.contains("zero-size object to aggregate"))
+    val objectBiggerThanResultSize = intercept[Exception]{
+      rowMat.getTreeAggregateIdealDepth(1100 * 1024 * 1024)
+    }
+    assert(objectBiggerThanResultSize.getMessage.contains("it's bigger than maxResultSize"))
+  }
+
   test("similar columns") {
     val colMags = Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94))
     val expected = BDM(

From 051336d9dd1000df82cae5fa3c1aa6b4559d9fb7 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 9 Apr 2019 10:13:38 +0800
Subject: [PATCH 0870/1072] [SPARK-25496][SQL][FOLLOWUP] avoid using
 to_utc_timestamp

## What changes were proposed in this pull request?

in https://github.com/apache/spark/pull/24195 , we deprecate `from/to_utc_timestamp`.

This PR removes unnecessary use of `to_utc_timestamp` in the test.

## How was this patch tested?

test only PR

Closes #24319 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../streaming/StreamingAggregationSuite.scala | 94 +++++++++----------
 1 file changed, 45 insertions(+), 49 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 2a9e6b849d89..81b22be0b8bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -342,55 +342,51 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   }
 
   testWithAllStateVersions("prune results by current_date, complete mode") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      import testImplicits._
-      val clock = new StreamManualClock
-      val tz = TimeZone.getDefault.getID
-      val inputData = MemoryStream[Long]
-      val aggregated =
-        inputData.toDF()
-          .select(to_utc_timestamp(from_unixtime('value * DateTimeUtils.SECONDS_PER_DAY), tz))
-          .toDF("value")
-          .groupBy($"value")
-          .agg(count("*"))
-          .where($"value".cast("date") >= date_sub(current_date(), 10))
-          .select(
-            ($"value".cast("long") / DateTimeUtils.SECONDS_PER_DAY).cast("long"), $"count(1)")
-      testStream(aggregated, Complete)(
-        StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
-        // advance clock to 10 days, should retain all keys
-        AddData(inputData, 0L, 5L, 5L, 10L),
-        AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
-        CheckLastBatch((0L, 1), (5L, 2), (10L, 1)),
-        // advance clock to 20 days, should retain keys >= 10
-        AddData(inputData, 15L, 15L, 20L),
-        AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
-        CheckLastBatch((10L, 1), (15L, 2), (20L, 1)),
-        // advance clock to 30 days, should retain keys >= 20
-        AddData(inputData, 85L),
-        AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
-        CheckLastBatch((20L, 1), (85L, 1)),
-
-        // bounce stream and ensure correct batch timestamp is used
-        // i.e., we don't take it from the clock, which is at 90 days.
-        StopStream,
-        AssertOnQuery { q => // clear the sink
-          q.sink.asInstanceOf[MemorySink].clear()
-          q.commitLog.purge(3)
-          // advance by 60 days i.e., 90 days total
-          clock.advance(DateTimeUtils.MILLIS_PER_DAY * 60)
-          true
-        },
-        StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
-        // Commit log blown, causing a re-run of the last batch
-        CheckLastBatch((20L, 1), (85L, 1)),
-
-        // advance clock to 100 days, should retain keys >= 90
-        AddData(inputData, 85L, 90L, 100L, 105L),
-        AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
-        CheckLastBatch((90L, 1), (100L, 1), (105L, 1))
-      )
-    }
+    import testImplicits._
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[Long]
+    val aggregated =
+      inputData.toDF()
+        .select(($"value" * DateTimeUtils.SECONDS_PER_DAY).cast("timestamp").as("value"))
+        .groupBy($"value")
+        .agg(count("*"))
+        .where($"value".cast("date") >= date_sub(current_timestamp().cast("date"), 10))
+        .select(
+          ($"value".cast("long") / DateTimeUtils.SECONDS_PER_DAY).cast("long"), $"count(1)")
+    testStream(aggregated, Complete)(
+      StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
+      // advance clock to 10 days, should retain all keys
+      AddData(inputData, 0L, 5L, 5L, 10L),
+      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      CheckLastBatch((0L, 1), (5L, 2), (10L, 1)),
+      // advance clock to 20 days, should retain keys >= 10
+      AddData(inputData, 15L, 15L, 20L),
+      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      CheckLastBatch((10L, 1), (15L, 2), (20L, 1)),
+      // advance clock to 30 days, should retain keys >= 20
+      AddData(inputData, 85L),
+      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      CheckLastBatch((20L, 1), (85L, 1)),
+
+      // bounce stream and ensure correct batch timestamp is used
+      // i.e., we don't take it from the clock, which is at 90 days.
+      StopStream,
+      AssertOnQuery { q => // clear the sink
+        q.sink.asInstanceOf[MemorySink].clear()
+        q.commitLog.purge(3)
+        // advance by 60 days i.e., 90 days total
+        clock.advance(DateTimeUtils.MILLIS_PER_DAY * 60)
+        true
+      },
+      StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
+      // Commit log blown, causing a re-run of the last batch
+      CheckLastBatch((20L, 1), (85L, 1)),
+
+      // advance clock to 100 days, should retain keys >= 90
+      AddData(inputData, 85L, 90L, 100L, 105L),
+      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      CheckLastBatch((90L, 1), (100L, 1), (105L, 1))
+    )
   }
 
   testWithAllStateVersions("SPARK-19690: do not convert batch aggregation in streaming query " +

From f16dfb91296c49c4a3f82a77327d235b210f6226 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 9 Apr 2019 13:49:42 +0800
Subject: [PATCH 0871/1072] [SPARK-27328][SQL] Add 'deprecated' in
 ExpressionDescription for extended usage and SQL doc

## What changes were proposed in this pull request?

This PR proposes to two things:

1. Add `deprecated` field to `ExpressionDescription` so that it can be shown in our SQL function documentation (https://spark.apache.org/docs/latest/api/sql/), and it can be shown via `DESCRIBE FUNCTION EXTENDED`.

2. While I am here, add some more restrictions for `note()` and `since()`. Looks some documentations are broken due to malformed `note`:

    ![Screen Shot 2019-03-31 at 3 00 53 PM](https://user-images.githubusercontent.com/6477701/55285518-a3e88500-53c8-11e9-9e99-41d857794fbe.png)

    It should start with 4 spaces and end with a newline. I added some asserts, and fixed the instances together while I am here. This is technically a breaking change but I think it's too trivial to note somewhere (and we're in Spark 3.0.0).

This PR adds `deprecated` property into `from_utc_timestamp` and `to_utc_timestamp` (it's deprecated as of #24195) as examples of using this field.

Now it shows the deprecation information as below:

- **SQL documentation is shown as below:**

    ![Screen Shot 2019-03-31 at 3 07 31 PM](https://user-images.githubusercontent.com/6477701/55285537-2113fa00-53c9-11e9-9932-f5693a03332d.png)

- **`DESCRIBE FUNCTION EXTENDED from_utc_timestamp;`**:

    ```
    Function: from_utc_timestamp
    Class: org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp
    Usage: from_utc_timestamp(timestamp, timezone) - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'.
    Extended Usage:
        Examples:
          > SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
           2016-08-31 09:00:00

        Since: 1.5.0

        Deprecated:
          Deprecated since 3.0.0. See SPARK-25496.

    ```

## How was this patch tested?

Manually tested via:

- For documentation verification:

    ```
    $ cd sql
    $ sh create-docs.sh
    ```

- For checking description:

    ```
    $ ./bin/spark-sql
    ```
    ```
    spark-sql> DESCRIBE FUNCTION EXTENDED from_utc_timestamp;
    spark-sql> DESCRIBE FUNCTION EXTENDED to_utc_timestamp;
    ```

Closes #24259 from HyukjinKwon/SPARK-27328.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/ExpressionDescription.java    | 64 +++++++++++++------
 .../catalyst/expressions/ExpressionInfo.java  | 35 ++++++++--
 .../catalyst/analysis/FunctionRegistry.scala  |  5 +-
 .../expressions/collectionOperations.scala    | 12 +++-
 .../expressions/complexTypeCreator.scala      |  1 +
 .../expressions/datetimeExpressions.scala     | 10 ++-
 .../spark/sql/catalyst/expressions/misc.scala |  4 +-
 .../expressions/randomExpressions.scala       |  8 ++-
 sql/gen-sql-markdown.py                       | 33 +++++++++-
 9 files changed, 136 insertions(+), 36 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
index ea6fffaebc9a..6a64cb127e6c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
@@ -31,35 +31,58 @@
  * `usage()` will be used for the function usage in brief way.
  *
  * These below are concatenated and used for the function usage in verbose way, suppose arguments,
- * examples, note and since will be provided.
+ * examples, note, since and deprecated will be provided.
  *
- * `arguments()` describes arguments for the expression. This should follow the format as below:
+ * `arguments()` describes arguments for the expression.
  *
- *   Arguments:
- *     * arg0 - ...
- *         ....
- *     * arg1 - ...
- *         ....
- *
- * `examples()` describes examples for the expression. This should follow the format as below:
- *
- *   Examples:
- *     > SELECT ...;
- *      ...
- *     > SELECT ...;
- *      ...
+ * `examples()` describes examples for the expression.
  *
  * `note()` contains some notes for the expression optionally.
  *
  * `since()` contains version information for the expression. Version is specified by,
  * for example, "2.2.0".
  *
- *  We can refer the function name by `_FUNC_`, in `usage`, `arguments` and `examples`, as it's
- *  registered in `FunctionRegistry`.
+ * `deprecated()` contains deprecation information for the expression optionally, for example,
+ * "Deprecated since 2.2.0. Use something else instead".
+ *
+ * The format, in particular for `arguments()`, `examples()`,`note()`, `since()` and
+ * `deprecated()`, should strictly be as follows.
+ *
+ * <pre>
+ * <code>@ExpressionDescription(
+ *   ...
+ *   arguments = """
+ *     Arguments:
+ *       * arg0 - ...
+ *           ....
+ *       * arg1 - ...
+ *           ....
+ *   """,
+ *   examples = """
+ *     Examples:
+ *       > SELECT ...;
+ *        ...
+ *       > SELECT ...;
+ *        ...
+ *   """,
+ *   note = """
+ *     ...
+ *   """,
+ *   since = "3.0.0",
+ *   deprecated = """
+ *     ...
+ *   """)
+ * </code>
+ * </pre>
+ *
+ *  We can refer the function name by `_FUNC_`, in `usage()`, `arguments()` and `examples()` as
+ *  it is registered in `FunctionRegistry`.
+ *
+ *  Note that, if `extended()` is defined, `arguments()`, `examples()`, `note()`, `since()` and
+ *  `deprecated()` should be not defined together. `extended()` exists for backward compatibility.
  *
- *  Note that, if `extended()` is defined, `arguments()`, `examples()`, `note()` and `since()` will
- *  be ignored and `extended()` will be used for the extended description for backward
- *  compatibility.
+ *  Note this contents are used in the SparkSQL documentation for built-in functions. The contents
+ *  here are considered as a Markdown text and then rendered.
  */
 @DeveloperApi
 @Retention(RetentionPolicy.RUNTIME)
@@ -70,4 +93,5 @@
     String examples() default "";
     String note() default "";
     String since() default "";
+    String deprecated() default "";
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 20bdd874fd5a..0c699421f991 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -30,6 +30,7 @@ public class ExpressionInfo {
     private String examples;
     private String note;
     private String since;
+    private String deprecated;
 
     public String getClassName() {
         return className;
@@ -63,6 +64,10 @@ public String getNote() {
         return note;
     }
 
+    public String getDeprecated() {
+        return deprecated;
+    }
+
     public String getDb() {
         return db;
     }
@@ -75,13 +80,15 @@ public ExpressionInfo(
             String arguments,
             String examples,
             String note,
-            String since) {
+            String since,
+            String deprecated) {
         assert name != null;
         assert arguments != null;
         assert examples != null;
         assert examples.isEmpty() || examples.contains("    Examples:");
         assert note != null;
         assert since != null;
+        assert deprecated != null;
 
         this.className = className;
         this.db = db;
@@ -91,6 +98,7 @@ public ExpressionInfo(
         this.examples = examples;
         this.note = note;
         this.since = since;
+        this.deprecated = deprecated;
 
         // Make the extended description.
         this.extended = arguments + examples;
@@ -98,25 +106,44 @@ public ExpressionInfo(
             this.extended = "\n    No example/argument for _FUNC_.\n";
         }
         if (!note.isEmpty()) {
+            if (!note.contains("    ") || !note.endsWith("  ")) {
+                throw new IllegalArgumentException("'note' is malformed in the expression [" +
+                    this.name + "]. It should start with a newline and 4 leading spaces; end " +
+                    "with a newline and two spaces; however, got [" + note + "].");
+            }
             this.extended += "\n    Note:\n      " + note.trim() + "\n";
         }
         if (!since.isEmpty()) {
+            if (Integer.parseInt(since.split("\\.")[0]) < 0) {
+                throw new IllegalArgumentException("'since' is malformed in the expression [" +
+                    this.name + "]. It should not start with a negative number; however, " +
+                    "got [" + since + "].");
+            }
             this.extended += "\n    Since: " + since + "\n";
         }
+        if (!deprecated.isEmpty()) {
+            if (!deprecated.contains("    ") || !deprecated.endsWith("  ")) {
+                throw new IllegalArgumentException("'deprecated' is malformed in the " +
+                    "expression [" + this.name + "]. It should start with a newline and 4 " +
+                    "leading spaces; end with a newline and two spaces; however, got [" +
+                    deprecated + "].");
+            }
+            this.extended += "\n    Deprecated:\n      " + deprecated.trim() + "\n";
+        }
     }
 
     public ExpressionInfo(String className, String name) {
-        this(className, null, name, null, "", "", "", "");
+        this(className, null, name, null, "", "", "", "", "");
     }
 
     public ExpressionInfo(String className, String db, String name) {
-        this(className, db, name, null, "", "", "", "");
+        this(className, db, name, null, "", "", "", "", "");
     }
 
     // This is to keep the original constructor just in case.
     public ExpressionInfo(String className, String db, String name, String usage, String extended) {
         // `arguments` and `examples` are concatenated for the extended description. So, here
         // simply pass the `extended` as `arguments` and an empty string for `examples`.
-        this(className, db, name, usage, extended, "", "", "");
+        this(className, db, name, usage, extended, "", "", "", "");
     }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 46cf0f9d16f2..deb53cf6a4ba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -621,7 +621,7 @@ object FunctionRegistry {
     val clazz = scala.reflect.classTag[Cast].runtimeClass
     val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`."
     val expressionInfo =
-      new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "")
+      new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "")
     (name, (expressionInfo, builder))
   }
 
@@ -641,7 +641,8 @@ object FunctionRegistry {
           df.arguments(),
           df.examples(),
           df.note(),
-          df.since())
+          df.since(),
+          df.deprecated())
       } else {
         // This exists for the backward compatibility with old `ExpressionDescription`s defining
         // the extended description in `extended()`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 2a7488a59f69..41d9b06ed1d0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -958,7 +958,9 @@ case class ArraySort(child: Expression) extends UnaryExpression with ArraySortLi
       > SELECT _FUNC_(array(1, 20, null, 3));
        [20,null,3,1]
   """,
-  note = "The function is non-deterministic.",
+  note = """
+    The function is non-deterministic.
+  """,
   since = "2.4.0")
 case class Shuffle(child: Expression, randomSeed: Option[Long] = None)
   extends UnaryExpression with ExpectsInputTypes with Stateful with ExpressionWithRandomSeed {
@@ -1042,7 +1044,9 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None)
        [3,4,1,2]
   """,
   since = "1.5.0",
-  note = "Reverse logic for arrays is available since 2.4.0."
+  note = """
+    Reverse logic for arrays is available since 2.4.0.
+  """
 )
 case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
@@ -2056,7 +2060,9 @@ case class ElementAt(left: Expression, right: Expression)
       > SELECT _FUNC_(array(1, 2, 3), array(4, 5), array(6));
        [1,2,3,4,5,6]
   """,
-  note = "Concat logic for arrays is available since 2.4.0.")
+  note = """
+    Concat logic for arrays is available since 2.4.0.
+  """)
 case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
   private def allowedTypes: Seq[AbstractDataType] = Seq(StringType, BinaryType, ArrayType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 4e722c9237a9..8d3a64132b77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -288,6 +288,7 @@ object CreateStruct extends FunctionBuilder {
       "",
       "",
       "",
+      "",
       "")
     ("struct", (info, this))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 784425e29739..5fb0b852b882 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1018,7 +1018,10 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
       > SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
        2016-08-31 09:00:00
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  deprecated = """
+    Deprecated since 3.0.0. See SPARK-25496.
+  """)
 // scalastyle:on line.size.limit
 case class FromUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
@@ -1229,7 +1232,10 @@ case class MonthsBetween(
       > SELECT _FUNC_('2016-08-31', 'Asia/Seoul');
        2016-08-30 15:00:00
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  deprecated = """
+    Deprecated since 3.0.0. See SPARK-25496.
+  """)
 // scalastyle:on line.size.limit
 case class ToUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 1f1decc45a3f..2af2b13ad77f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -125,7 +125,9 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable {
       > SELECT _FUNC_();
        46707d92-02f4-4817-8116-a4c3b23e6266
   """,
-  note = "The function is non-deterministic.")
+  note = """
+    The function is non-deterministic.
+  """)
 // scalastyle:on line.size.limit
 case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Stateful
     with ExpressionWithRandomSeed {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index b70c34141b97..91a8ac07b908 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -78,7 +78,9 @@ trait ExpressionWithRandomSeed {
       > SELECT _FUNC_(null);
        0.8446490682263027
   """,
-  note = "The function is non-deterministic in general case.")
+  note = """
+    The function is non-deterministic in general case.
+  """)
 // scalastyle:on line.size.limit
 case class Rand(child: Expression) extends RDG with ExpressionWithRandomSeed {
 
@@ -118,7 +120,9 @@ object Rand {
       > SELECT _FUNC_(null);
        1.1164209726833079
   """,
-  note = "The function is non-deterministic in general case.")
+  note = """
+    The function is non-deterministic in general case.
+  """)
 // scalastyle:on line.size.limit
 case class Randn(child: Expression) extends RDG with ExpressionWithRandomSeed {
 
diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py
index fa8124b4513a..e0529f831061 100644
--- a/sql/gen-sql-markdown.py
+++ b/sql/gen-sql-markdown.py
@@ -20,7 +20,7 @@
 from collections import namedtuple
 
 ExpressionInfo = namedtuple(
-    "ExpressionInfo", "className name usage arguments examples note since")
+    "ExpressionInfo", "className name usage arguments examples note since deprecated")
 
 
 def _list_function_infos(jvm):
@@ -42,7 +42,8 @@ def _list_function_infos(jvm):
             arguments=jinfo.getArguments().replace("_FUNC_", name),
             examples=jinfo.getExamples().replace("_FUNC_", name),
             note=jinfo.getNote(),
-            since=jinfo.getSince()))
+            since=jinfo.getSince(),
+            deprecated=jinfo.getDeprecated()))
     return sorted(infos, key=lambda i: i.name)
 
 
@@ -136,6 +137,27 @@ def _make_pretty_note(note):
         return "**Note:**\n%s\n" % note
 
 
+def _make_pretty_deprecated(deprecated):
+    """
+    Makes the deprecated description pretty and returns a formatted string if `deprecated`
+    is not an empty string. Otherwise, returns None.
+
+    Expected input:
+
+        ...
+
+    Expected output:
+    **Deprecated:**
+
+    ...
+
+    """
+
+    if deprecated != "":
+        deprecated = "\n".join(map(lambda n: n[4:], deprecated.split("\n")))
+        return "**Deprecated:**\n%s\n" % deprecated
+
+
 def generate_sql_markdown(jvm, path):
     """
     Generates a markdown file after listing the function information. The output file
@@ -162,6 +184,10 @@ def generate_sql_markdown(jvm, path):
 
     **Since:** SINCE
 
+    **Deprecated:**
+
+    DEPRECATED
+
     <br/>
 
     """
@@ -174,6 +200,7 @@ def generate_sql_markdown(jvm, path):
             examples = _make_pretty_examples(info.examples)
             note = _make_pretty_note(info.note)
             since = info.since
+            deprecated = _make_pretty_deprecated(info.deprecated)
 
             mdfile.write("### %s\n\n" % name)
             if usage is not None:
@@ -186,6 +213,8 @@ def generate_sql_markdown(jvm, path):
                 mdfile.write(note)
             if since is not None and since != "":
                 mdfile.write("**Since:** %s\n\n" % since.strip())
+            if deprecated is not None:
+                mdfile.write(deprecated)
             mdfile.write("<br/>\n\n")
 
 
From 3e4cfe9dbcabc6c3d91b4ad0f0c5834bf5d740d6 Mon Sep 17 00:00:00 2001
From: mingbo_pb <mingbo.pb@alibaba-inc.com>
Date: Tue, 9 Apr 2019 15:41:42 +0800
Subject: [PATCH 0872/1072] [SPARK-27406][SQL] UnsafeArrayData serialization
 breaks when two machines have different Oops size

## What changes were proposed in this pull request?
ApproxCountDistinctForIntervals holds the UnsafeArrayData data to initialize endpoints. When the UnsafeArrayData is serialized with Java serialization, the BYTE_ARRAY_OFFSET in memory can change if two machines have different pointer width (Oops in JVM).

This PR fixes this issue by using the same way in https://github.com/apache/spark/pull/9030

## How was this patch tested?
Manual test has been done in our tpcds environment and regarding unit test case has been added as well

Closes #24317 from pengbo/SPARK-27406.

Authored-by: mingbo_pb <mingbo.pb@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/UnsafeArrayData.java | 39 ++++++++++++++++++-
 .../sql/catalyst/util/UnsafeArraySuite.scala  | 17 +++++++-
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index 04fc98659b64..4ff0838ac117 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
@@ -30,6 +34,8 @@
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 
+import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
+
 /**
  * An Unsafe implementation of Array which is backed by raw memory instead of Java objects.
  *
@@ -52,7 +58,7 @@
  * Instances of `UnsafeArrayData` act as pointers to row data stored in this format.
  */
 
-public final class UnsafeArrayData extends ArrayData {
+public final class UnsafeArrayData extends ArrayData implements Externalizable {
   public static int calculateHeaderPortionInBytes(int numFields) {
     return (int)calculateHeaderPortionInBytes((long)numFields);
   }
@@ -485,4 +491,35 @@ public static UnsafeArrayData fromPrimitiveArray(float[] arr) {
   public static UnsafeArrayData fromPrimitiveArray(double[] arr) {
     return fromPrimitiveArray(arr, Platform.DOUBLE_ARRAY_OFFSET, arr.length, 8);
   }
+
+
+  public byte[] getBytes() {
+    if (baseObject instanceof byte[]
+            && baseOffset == Platform.BYTE_ARRAY_OFFSET
+            && (((byte[]) baseObject).length == sizeInBytes)) {
+      return (byte[]) baseObject;
+    } else {
+      byte[] bytes = new byte[sizeInBytes];
+      Platform.copyMemory(baseObject, baseOffset, bytes, Platform.BYTE_ARRAY_OFFSET, sizeInBytes);
+      return bytes;
+    }
+  }
+
+  @Override
+  public void writeExternal(ObjectOutput out) throws IOException {
+    byte[] bytes = getBytes();
+    out.writeInt(bytes.length);
+    out.writeInt(this.numElements);
+    out.write(bytes);
+  }
+
+  @Override
+  public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+    this.baseOffset = BYTE_ARRAY_OFFSET;
+    this.sizeInBytes = in.readInt();
+    this.numElements = in.readInt();
+    this.elementOffset = baseOffset + calculateHeaderPortionInBytes(this.numElements);
+    this.baseObject = new byte[sizeInBytes];
+    in.readFully((byte[]) baseObject);
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
index 29a2bcd643f1..db25d2fb97e5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -19,11 +19,13 @@ package org.apache.spark.sql.catalyst.util
 
 import java.time.ZoneId
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class UnsafeArraySuite extends SparkFunSuite {
@@ -210,4 +212,17 @@ class UnsafeArraySuite extends SparkFunSuite {
     val doubleEncoder = ExpressionEncoder[Array[Double]].resolveAndBind()
     assert(doubleEncoder.toRow(doubleArray).getArray(0).toDoubleArray.sameElements(doubleArray))
   }
+
+  test("unsafe java serialization") {
+    val offset = 32
+    val data = new Array[Byte](1024)
+    Platform.putLong(data, offset, 1)
+    val arrayData = new UnsafeArrayData()
+    arrayData.pointTo(data, offset, data.length)
+    arrayData.setLong(0, 19285)
+    val ser = new JavaSerializer(new SparkConf).newInstance()
+    val arrayDataSer = ser.deserialize[UnsafeArrayData](ser.serialize(arrayData))
+    assert(arrayDataSer.getLong(0) == 19285)
+    assert(arrayDataSer.getBaseObject.asInstanceOf[Array[Byte]].length == 1024)
+  }
 }

From 601fac2cb30b7dc2d033e7be5bbd2f6c3a3c5fc1 Mon Sep 17 00:00:00 2001
From: francis0407 <hanmingcong123@hotmail.com>
Date: Tue, 9 Apr 2019 21:45:46 +0800
Subject: [PATCH 0873/1072] [SPARK-27411][SQL] DataSourceV2Strategy should not
 eliminate subquery

## What changes were proposed in this pull request?

In DataSourceV2Strategy, it seems we eliminate the subqueries by mistake after normalizing filters.
We have a sql with a scalar subquery:

``` scala
val plan = spark.sql("select * from t2 where t2a > (select max(t1a) from t1)")
plan.explain(true)
```

And we get the log info of DataSourceV2Strategy:
```
Pushing operators to csv:examples/src/main/resources/t2.txt
Pushed Filters:
Post-Scan Filters: isnotnull(t2a#30)
Output: t2a#30, t2b#31
```

The `Post-Scan Filters` should contain the scalar subquery, but we eliminate it by mistake.
```
== Parsed Logical Plan ==
'Project [*]
+- 'Filter ('t2a > scalar-subquery#56 [])
   :  +- 'Project [unresolvedalias('max('t1a), None)]
   :     +- 'UnresolvedRelation `t1`
   +- 'UnresolvedRelation `t2`

== Analyzed Logical Plan ==
t2a: string, t2b: string
Project [t2a#30, t2b#31]
+- Filter (t2a#30 > scalar-subquery#56 [])
   :  +- Aggregate [max(t1a#13) AS max(t1a)#63]
   :     +- SubqueryAlias `t1`
   :        +- RelationV2[t1a#13, t1b#14] csv:examples/src/main/resources/t1.txt
   +- SubqueryAlias `t2`
      +- RelationV2[t2a#30, t2b#31] csv:examples/src/main/resources/t2.txt

== Optimized Logical Plan ==
Filter (isnotnull(t2a#30) && (t2a#30 > scalar-subquery#56 []))
:  +- Aggregate [max(t1a#13) AS max(t1a)#63]
:     +- Project [t1a#13]
:        +- RelationV2[t1a#13, t1b#14] csv:examples/src/main/resources/t1.txt
+- RelationV2[t2a#30, t2b#31] csv:examples/src/main/resources/t2.txt

== Physical Plan ==
*(1) Project [t2a#30, t2b#31]
+- *(1) Filter isnotnull(t2a#30)
   +- *(1) BatchScan[t2a#30, t2b#31] class org.apache.spark.sql.execution.datasources.v2.csv.CSVScan
```
## How was this patch tested?

ut

Closes #24321 from francis0407/SPARK-27411.

Authored-by: francis0407 <hanmingcong123@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/DataSourceV2Strategy.scala       |  7 +++++--
 .../spark/sql/sources/v2/DataSourceV2Suite.scala    | 13 +++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index f8c7e2c826a3..7681dc8dfb37 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -106,13 +106,16 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
     case PhysicalOperation(project, filters, relation: DataSourceV2Relation) =>
       val scanBuilder = relation.newScanBuilder()
 
+      val (withSubquery, withoutSubquery) = filters.partition(SubqueryExpression.hasSubquery)
       val normalizedFilters = DataSourceStrategy.normalizeFilters(
-        filters.filterNot(SubqueryExpression.hasSubquery), relation.output)
+        withoutSubquery, relation.output)
 
       // `pushedFilters` will be pushed down and evaluated in the underlying data sources.
       // `postScanFilters` need to be evaluated after the scan.
       // `postScanFilters` and `pushedFilters` can overlap, e.g. the parquet row group filter.
-      val (pushedFilters, postScanFilters) = pushFilters(scanBuilder, normalizedFilters)
+      val (pushedFilters, postScanFiltersWithoutSubquery) =
+        pushFilters(scanBuilder, normalizedFilters)
+      val postScanFilters = postScanFiltersWithoutSubquery ++ withSubquery
       val (scan, output) = pruneColumns(scanBuilder, relation, project ++ postScanFilters)
       logInfo(
         s"""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index 587cfa9bd664..4e071c5af6a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -392,6 +392,19 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-27411: DataSourceV2Strategy should not eliminate subquery") {
+    withTempView("t1") {
+      val t2 = spark.read.format(classOf[SimpleDataSourceV2].getName).load()
+      Seq(2, 3).toDF("a").createTempView("t1")
+      val df = t2.where("i < (select max(a) from t1)").select('i)
+      val subqueries = df.queryExecution.executedPlan.collect {
+        case p => p.subqueries
+      }.flatten
+      assert(subqueries.length == 1)
+      checkAnswer(df, (0 until 3).map(i => Row(i)))
+    }
+  }
 }
 
 
From 5ff39cd5ee92da0d08380c7a680d350ff6f4b5db Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Tue, 9 Apr 2019 08:26:00 -0700
Subject: [PATCH 0874/1072] [SPARK-27394][WEBUI] Flush LiveEntity if necessary
 when receiving SparkListenerExecutorMetricsUpdate

## What changes were proposed in this pull request?

This PR updates `AppStatusListener` to flush `LiveEntity` if necessary when receiving `SparkListenerExecutorMetricsUpdate`. This will ensure the staleness of Spark UI doesn't last more than the executor heartbeat interval.

## How was this patch tested?

The new unit test.

Closes #24303 from zsxwing/SPARK-27394.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/internal/config/Status.scala |  6 +++
 .../spark/status/AppStatusListener.scala      | 39 +++++++++++++------
 .../org/apache/spark/ui/UISeleniumSuite.scala | 33 +++++++++++++++-
 docs/configuration.md                         |  8 ++++
 4 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/Status.scala b/core/src/main/scala/org/apache/spark/internal/config/Status.scala
index c56157227f8f..3e6a4e981066 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Status.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Status.scala
@@ -29,6 +29,12 @@ private[spark] object Status {
     .timeConf(TimeUnit.NANOSECONDS)
     .createWithDefaultString("100ms")
 
+  val LIVE_ENTITY_UPDATE_MIN_FLUSH_PERIOD = ConfigBuilder("spark.ui.liveUpdate.minFlushPeriod")
+    .doc("Minimum time elapsed before stale UI data is flushed. This avoids UI staleness when " +
+      "incoming task events are not fired frequently.")
+    .timeConf(TimeUnit.NANOSECONDS)
+    .createWithDefaultString("1s")
+
   val MAX_RETAINED_JOBS = ConfigBuilder("spark.ui.retainedJobs")
     .intConf
     .createWithDefault(1000)
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index a3e82424be6e..b085f21f2d5c 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -58,6 +58,12 @@ private[spark] class AppStatusListener(
   // operations that we can live without when rapidly processing incoming task events.
   private val liveUpdatePeriodNs = if (live) conf.get(LIVE_ENTITY_UPDATE_PERIOD) else -1L
 
+  /**
+   * Minimum time elapsed before stale UI data is flushed. This avoids UI staleness when incoming
+   * task events are not fired frequently.
+   */
+  private val liveUpdateMinFlushPeriod = conf.get(LIVE_ENTITY_UPDATE_MIN_FLUSH_PERIOD)
+
   private val maxTasksPerStage = conf.get(MAX_RETAINED_TASKS_PER_STAGE)
   private val maxGraphRootNodes = conf.get(MAX_RETAINED_ROOT_NODES)
 
@@ -76,6 +82,9 @@ private[spark] class AppStatusListener(
   // around liveExecutors.
   @volatile private var activeExecutorCount = 0
 
+  /** The last time when flushing `LiveEntity`s. This is to avoid flushing too frequently. */
+  private var lastFlushTimeNs = System.nanoTime()
+
   kvstore.addTrigger(classOf[ExecutorSummaryWrapper], conf.get(MAX_RETAINED_DEAD_EXECUTORS))
     { count => cleanupExecutors(count) }
 
@@ -89,7 +98,8 @@ private[spark] class AppStatusListener(
 
   kvstore.onFlush {
     if (!live) {
-      flush()
+      val now = System.nanoTime()
+      flush(update(_, now))
     }
   }
 
@@ -831,6 +841,14 @@ private[spark] class AppStatusListener(
         }
       }
     }
+    // Flush updates if necessary. Executor heartbeat is an event that happens periodically. Flush
+    // here to ensure the staleness of Spark UI doesn't last more than
+    // `max(heartbeat interval, liveUpdateMinFlushPeriod)`.
+    if (now - lastFlushTimeNs > liveUpdateMinFlushPeriod) {
+      flush(maybeUpdate(_, now))
+      // Re-get the current system time because `flush` may be slow and `now` is stale.
+      lastFlushTimeNs = System.nanoTime()
+    }
   }
 
   override def onStageExecutorMetrics(executorMetrics: SparkListenerStageExecutorMetrics): Unit = {
@@ -856,18 +874,17 @@ private[spark] class AppStatusListener(
     }
   }
 
-  /** Flush all live entities' data to the underlying store. */
-  private def flush(): Unit = {
-    val now = System.nanoTime()
+  /** Go through all `LiveEntity`s and use `entityFlushFunc(entity)` to flush them. */
+  private def flush(entityFlushFunc: LiveEntity => Unit): Unit = {
     liveStages.values.asScala.foreach { stage =>
-      update(stage, now)
-      stage.executorSummaries.values.foreach(update(_, now))
+      entityFlushFunc(stage)
+      stage.executorSummaries.values.foreach(entityFlushFunc)
     }
-    liveJobs.values.foreach(update(_, now))
-    liveExecutors.values.foreach(update(_, now))
-    liveTasks.values.foreach(update(_, now))
-    liveRDDs.values.foreach(update(_, now))
-    pools.values.foreach(update(_, now))
+    liveJobs.values.foreach(entityFlushFunc)
+    liveExecutors.values.foreach(entityFlushFunc)
+    liveTasks.values.foreach(entityFlushFunc)
+    liveRDDs.values.foreach(entityFlushFunc)
+    pools.values.foreach(entityFlushFunc)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index cdc7185a3551..a9f03ebb72b2 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -100,14 +100,18 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
    * Create a test SparkContext with the SparkUI enabled.
    * It is safe to `get` the SparkUI directly from the SparkContext returned here.
    */
-  private def newSparkContext(killEnabled: Boolean = true): SparkContext = {
+  private def newSparkContext(
+      killEnabled: Boolean = true,
+      master: String = "local",
+      additionalConfs: Map[String, String] = Map.empty): SparkContext = {
     val conf = new SparkConf()
-      .setMaster("local")
+      .setMaster(master)
       .setAppName("test")
       .set(UI_ENABLED, true)
       .set(UI_PORT, 0)
       .set(UI_KILL_ENABLED, killEnabled)
       .set(MEMORY_OFFHEAP_SIZE.key, "64m")
+    additionalConfs.foreach { case (k, v) => conf.set(k, v) }
     val sc = new SparkContext(conf)
     assert(sc.ui.isDefined)
     sc
@@ -725,6 +729,31 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     }
   }
 
+  test("Staleness of Spark UI should not last minutes or hours") {
+    withSpark(newSparkContext(
+      master = "local[2]",
+      // Set a small heart beat interval to make the test fast
+      additionalConfs = Map(
+        EXECUTOR_HEARTBEAT_INTERVAL.key -> "10ms",
+        LIVE_ENTITY_UPDATE_MIN_FLUSH_PERIOD.key -> "10ms"))) { sc =>
+      sc.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "true")
+      val f = sc.parallelize(1 to 1000, 1000).foreachAsync { _ =>
+        // Make the task never finish so there won't be any task start/end events after the first 2
+        // tasks start.
+        Thread.sleep(300000)
+      }
+      try {
+        eventually(timeout(10.seconds)) {
+          val jobsJson = getJson(sc.ui.get, "jobs")
+          jobsJson.children.length should be (1)
+          (jobsJson.children.head \ "numActiveTasks").extract[Int] should be (2)
+        }
+      } finally {
+        f.cancel()
+      }
+    }
+  }
+
   def goToUi(sc: SparkContext, path: String): Unit = {
     goToUi(sc.ui.get, path)
   }
diff --git a/docs/configuration.md b/docs/configuration.md
index 3187b77250f0..5325f8a352f7 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -874,6 +874,14 @@ Apart from these, the following properties are also available, and may be useful
     operations that we can live without when rapidly processing incoming task events.
   </td>
 </tr>
+<tr>
+  <td><code>spark.ui.liveUpdate.minFlushPeriod</code></td>
+  <td>1s</td>
+  <td>
+    Minimum time elapsed before stale UI data is flushed. This avoids UI staleness when incoming
+    task events are not fired frequently.
+  </td>
+</tr>
 <tr>
   <td><code>spark.ui.port</code></td>
   <td>4040</td>

From 3db117e43e06c9de9f4158d53166829fb5b93f30 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Tue, 9 Apr 2019 09:25:37 -0700
Subject: [PATCH 0875/1072] [SPARK-27407][SQL] File source V2: Invalidate cache
 data on overwrite/append

## What changes were proposed in this pull request?

File source V2 currently incorrectly continues to use cached data even if the underlying data is overwritten.
We should follow https://github.com/apache/spark/pull/13566 and fix it by invalidating and refreshes all the cached data (and the associated metadata) for any Dataframe that contains the given data source path.

## How was this patch tested?

Unit test

Closes #24318 from gengliangwang/invalidCache.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/CacheManager.scala    | 30 +++++++++++++----
 .../spark/sql/FileBasedDataSourceSuite.scala  | 32 +++++++++++++++++++
 .../parquet/ParquetQuerySuite.scala           | 24 --------------
 3 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index d1f096bc5945..b3c253b75c31 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -27,7 +27,8 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Subqu
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.CommandUtils
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
 
@@ -253,15 +254,30 @@ class CacheManager extends Logging {
     plan match {
       case lr: LogicalRelation => lr.relation match {
         case hr: HadoopFsRelation =>
-          val prefixToInvalidate = qualifiedPath.toString
-          val invalidate = hr.location.rootPaths
-            .map(_.makeQualified(fs.getUri, fs.getWorkingDirectory).toString)
-            .exists(_.startsWith(prefixToInvalidate))
-          if (invalidate) hr.location.refresh()
-          invalidate
+          refreshFileIndexIfNecessary(hr.location, fs, qualifiedPath)
         case _ => false
       }
+
+      case DataSourceV2Relation(fileTable: FileTable, _, _) =>
+        refreshFileIndexIfNecessary(fileTable.fileIndex, fs, qualifiedPath)
+
       case _ => false
     }
   }
+
+  /**
+   * Refresh the given [[FileIndex]] if any of its root paths starts with `qualifiedPath`.
+   * @return whether the [[FileIndex]] is refreshed.
+   */
+  private def refreshFileIndexIfNecessary(
+      fileIndex: FileIndex,
+      fs: FileSystem,
+      qualifiedPath: Path): Boolean = {
+    val prefixToInvalidate = qualifiedPath.toString
+    val needToRefresh = fileIndex.rootPaths
+      .map(_.makeQualified(fs.getUri, fs.getWorkingDirectory).toString)
+      .exists(_.startsWith(prefixToInvalidate))
+    if (needToRefresh) fileIndex.refresh()
+    needToRefresh
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index add8a306a9e0..506156d0631c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -494,6 +494,38 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
     }
   }
 
+  test("Do not use cache on overwrite") {
+    Seq("", "orc").foreach { useV1SourceReaderList =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> useV1SourceReaderList) {
+        withTempDir { dir =>
+          val path = dir.toString
+          spark.range(1000).write.mode("overwrite").orc(path)
+          val df = spark.read.orc(path).cache()
+          assert(df.count() == 1000)
+          spark.range(10).write.mode("overwrite").orc(path)
+          assert(df.count() == 10)
+          assert(spark.read.orc(path).count() == 10)
+        }
+      }
+    }
+  }
+
+  test("Do not use cache on append") {
+    Seq("", "orc").foreach { useV1SourceReaderList =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> useV1SourceReaderList) {
+        withTempDir { dir =>
+          val path = dir.toString
+          spark.range(1000).write.mode("append").orc(path)
+          val df = spark.read.orc(path).cache()
+          assert(df.count() == 1000)
+          spark.range(10).write.mode("append").orc(path)
+          assert(df.count() == 1010)
+          assert(spark.read.orc(path).count() == 1010)
+        }
+      }
+    }
+  }
+
   test("Return correct results when data columns overlap with partition columns") {
     Seq("parquet", "orc", "json").foreach { format =>
       withTempPath { path =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index beb89d91c926..8cc3beec1fe5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -70,30 +70,6 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
       TableIdentifier("tmp"), ignoreIfNotExists = true, purge = false)
   }
 
-  test("SPARK-15678: not use cache on overwrite") {
-    withTempDir { dir =>
-      val path = dir.toString
-      spark.range(1000).write.mode("overwrite").parquet(path)
-      val df = spark.read.parquet(path).cache()
-      assert(df.count() == 1000)
-      spark.range(10).write.mode("overwrite").parquet(path)
-      assert(df.count() == 10)
-      assert(spark.read.parquet(path).count() == 10)
-    }
-  }
-
-  test("SPARK-15678: not use cache on append") {
-    withTempDir { dir =>
-      val path = dir.toString
-      spark.range(1000).write.mode("append").parquet(path)
-      val df = spark.read.parquet(path).cache()
-      assert(df.count() == 1000)
-      spark.range(10).write.mode("append").parquet(path)
-      assert(df.count() == 1010)
-      assert(spark.read.parquet(path).count() == 1010)
-    }
-  }
-
   test("self-join") {
     // 4 rows, cells of column 1 of row 2 and row 4 are null
     val data = (1 to 4).map { i =>

From 63e4bf42c2b0227359d2c3eab124083d8cd67ccd Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 9 Apr 2019 15:42:27 -0700
Subject: [PATCH 0876/1072] [SPARK-27401][SQL] Refactoring conversion of
 Timestamp to/from java.sql.Timestamp

## What changes were proposed in this pull request?

In the PR, I propose simpler implementation of `toJavaTimestamp()`/`fromJavaTimestamp()` by reusing existing functions of `DateTimeUtils`. This will allow to:
- Simply implementation of `toJavaTimestamp()`, and handle properly negative inputs.
- Detect `Long` overflow in conversion of milliseconds (`java.sql.Timestamp`) to microseconds (Catalyst's Timestamp).

## How was this patch tested?

By existing test suites `DateTimeUtilsSuite`, `DateFunctionsSuite`, `DateExpressionsSuite` and `CastSuite`. And by new benchmark for export/import timestamps added to `DateTimeBenchmark`:

Before:
```
To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
From java.sql.Timestamp                             290            335          49         17.2          58.0       1.0X
Collect longs                                      1234           1681         487          4.1         246.8       0.2X
Collect timestamps                                 1718           1755          63          2.9         343.7       0.2X
```

After:
```
To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
From java.sql.Timestamp                             283            301          19         17.7          56.6       1.0X
Collect longs                                      1048           1087          36          4.8         209.6       0.3X
Collect timestamps                                 1425           1479          56          3.5         285.1       0.2X
```

Closes #24311 from MaxGekk/conv-java-sql-date-timestamp.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/util/DateTimeUtils.scala     | 19 ++-------------
 .../benchmarks/DateTimeBenchmark-results.txt  |  9 +++++++
 .../benchmark/DateTimeBenchmark.scala         | 24 +++++++++++++++++++
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 3004336301ee..7687afae38e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -120,29 +120,14 @@ object DateTimeUtils {
    * Returns a java.sql.Timestamp from number of micros since epoch.
    */
   def toJavaTimestamp(us: SQLTimestamp): Timestamp = {
-    // setNanos() will overwrite the millisecond part, so the milliseconds should be
-    // cut off at seconds
-    var seconds = us / MICROS_PER_SECOND
-    var micros = us % MICROS_PER_SECOND
-    // setNanos() can not accept negative value
-    if (micros < 0) {
-      micros += MICROS_PER_SECOND
-      seconds -= 1
-    }
-    val t = new Timestamp(SECONDS.toMillis(seconds))
-    t.setNanos(MICROSECONDS.toNanos(micros).toInt)
-    t
+    Timestamp.from(microsToInstant(us))
   }
 
   /**
    * Returns the number of micros since epoch from java.sql.Timestamp.
    */
   def fromJavaTimestamp(t: Timestamp): SQLTimestamp = {
-    if (t != null) {
-      MILLISECONDS.toMicros(t.getTime()) + NANOSECONDS.toMicros(t.getNanos()) % NANOS_PER_MICROS
-    } else {
-      0L
-    }
+    instantToMicros(t.toInstant)
   }
 
   /**
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 6c39f8f3b019..d9947526d0e2 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -414,3 +414,12 @@ to_date wholestage off                        1477 / 1479          0.7        14
 to_date wholestage on                         1468 / 1473          0.7        1468.2       1.0X
 
 
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+From java.sql.Timestamp                             283            301          19         17.7          56.6       1.0X
+Collect longs                                      1048           1087          36          4.8         209.6       0.3X
+Collect timestamps                                 1425           1479          56          3.5         285.1       0.2X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index d3fe69717b34..df0f87e483cd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.execution.benchmark
 
+import java.sql.Timestamp
+
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -121,5 +124,26 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       run(n, "to date str", dateStrExpr)
       run(n, "to_date", s"to_date($dateStrExpr, 'yyyy-MM-dd')")
     }
+    runBenchmark("Conversion from/to external types") {
+      import spark.implicits._
+      val rowsNum = 5000000
+      val numIters = 3
+      val benchmark = new Benchmark("To/from java.sql.Timestamp", rowsNum, output = output)
+      benchmark.addCase("From java.sql.Timestamp", numIters) { _ =>
+        spark.range(rowsNum)
+          .map(millis => new Timestamp(millis))
+          .write.format("noop").save()
+      }
+      benchmark.addCase("Collect longs", numIters) { _ =>
+        spark.range(0, rowsNum, 1, 1)
+          .collect()
+      }
+      benchmark.addCase("Collect timestamps", numIters) { _ =>
+        spark.range(0, rowsNum, 1, 1)
+          .map(millis => new Timestamp(millis))
+          .collect()
+      }
+      benchmark.run()
+    }
   }
 }

From f62f44f2a277c38d3d5b5524b287340991523236 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 10 Apr 2019 07:50:25 +0900
Subject: [PATCH 0877/1072] [SPARK-27387][PYTHON][TESTS] Replace
 sqlutils.assertPandasEqual with Pandas assert_frame_equals

## What changes were proposed in this pull request?

Running PySpark tests with Pandas 0.24.x causes a failure in `test_pandas_udf_grouped_map` test_supported_types:
`ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()`

This is because a column is an ArrayType and the method `sqlutils ReusedSQLTestCase.assertPandasEqual ` does not properly check this.

This PR removes `assertPandasEqual` and replaces it with the built-in `pandas.util.testing.assert_frame_equal` which can properly handle columns of ArrayType and also prints out better diff between the DataFrames when an error occurs.

Additionally, imports of pandas and pyarrow were moved to the top of related test files to avoid duplicating the same import many times.

## How was this patch tested?

Existing tests

Closes #24306 from BryanCutler/python-pandas-assert_frame_equal-SPARK-27387.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_arrow.py        | 44 ++++++-------
 python/pyspark/sql/tests/test_dataframe.py    |  7 ++-
 .../sql/tests/test_pandas_udf_grouped_agg.py  | 60 +++++++++---------
 .../sql/tests/test_pandas_udf_grouped_map.py  | 61 +++++++++++--------
 .../sql/tests/test_pandas_udf_scalar.py       | 33 ++--------
 .../sql/tests/test_pandas_udf_window.py       | 41 +++++++------
 python/pyspark/testing/sqlutils.py            |  6 --
 7 files changed, 115 insertions(+), 137 deletions(-)

diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 38a6402c0132..a45c3fb85601 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -29,6 +29,13 @@
 from pyspark.testing.utils import QuietTest
 from pyspark.util import _exception_message
 
+if have_pandas:
+    import pandas as pd
+    from pandas.util.testing import assert_frame_equal
+
+if have_pyarrow:
+    import pyarrow as pa
+
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
@@ -40,7 +47,6 @@ def setUpClass(cls):
         from datetime import date, datetime
         from decimal import Decimal
         from distutils.version import LooseVersion
-        import pyarrow as pa
         super(ArrowTests, cls).setUpClass()
         cls.warnings_lock = threading.Lock()
 
@@ -89,7 +95,6 @@ def tearDownClass(cls):
         super(ArrowTests, cls).tearDownClass()
 
     def create_pandas_data_frame(self):
-        import pandas as pd
         import numpy as np
         data_dict = {}
         for j, name in enumerate(self.schema.names):
@@ -100,8 +105,6 @@ def create_pandas_data_frame(self):
         return pd.DataFrame(data=data_dict)
 
     def test_toPandas_fallback_enabled(self):
-        import pandas as pd
-
         with self.sql_conf({"spark.sql.execution.arrow.fallback.enabled": True}):
             schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
             df = self.spark.createDataFrame([({u'a': 1},)], schema=schema)
@@ -117,11 +120,10 @@ def test_toPandas_fallback_enabled(self):
                         self.assertTrue(len(user_warns) > 0)
                         self.assertTrue(
                             "Attempting non-optimization" in _exception_message(user_warns[-1]))
-                        self.assertPandasEqual(pdf, pd.DataFrame({u'map': [{u'a': 1}]}))
+                        assert_frame_equal(pdf, pd.DataFrame({u'map': [{u'a': 1}]}))
 
     def test_toPandas_fallback_disabled(self):
         from distutils.version import LooseVersion
-        import pyarrow as pa
 
         schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
         df = self.spark.createDataFrame([(None,)], schema=schema)
@@ -157,8 +159,8 @@ def test_toPandas_arrow_toggle(self):
         df = self.spark.createDataFrame(self.data, schema=self.schema)
         pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
         expected = self.create_pandas_data_frame()
-        self.assertPandasEqual(expected, pdf)
-        self.assertPandasEqual(expected, pdf_arrow)
+        assert_frame_equal(expected, pdf)
+        assert_frame_equal(expected, pdf_arrow)
 
     def test_toPandas_respect_session_timezone(self):
         df = self.spark.createDataFrame(self.data, schema=self.schema)
@@ -168,13 +170,13 @@ def test_toPandas_respect_session_timezone(self):
                 "spark.sql.execution.pandas.respectSessionTimeZone": False,
                 "spark.sql.session.timeZone": timezone}):
             pdf_la, pdf_arrow_la = self._toPandas_arrow_toggle(df)
-            self.assertPandasEqual(pdf_arrow_la, pdf_la)
+            assert_frame_equal(pdf_arrow_la, pdf_la)
 
         with self.sql_conf({
                 "spark.sql.execution.pandas.respectSessionTimeZone": True,
                 "spark.sql.session.timeZone": timezone}):
             pdf_ny, pdf_arrow_ny = self._toPandas_arrow_toggle(df)
-            self.assertPandasEqual(pdf_arrow_ny, pdf_ny)
+            assert_frame_equal(pdf_arrow_ny, pdf_ny)
 
             self.assertFalse(pdf_ny.equals(pdf_la))
 
@@ -184,13 +186,13 @@ def test_toPandas_respect_session_timezone(self):
                 if isinstance(field.dataType, TimestampType):
                     pdf_la_corrected[field.name] = _check_series_convert_timestamps_local_tz(
                         pdf_la_corrected[field.name], timezone)
-            self.assertPandasEqual(pdf_ny, pdf_la_corrected)
+            assert_frame_equal(pdf_ny, pdf_la_corrected)
 
     def test_pandas_round_trip(self):
         pdf = self.create_pandas_data_frame()
         df = self.spark.createDataFrame(self.data, schema=self.schema)
         pdf_arrow = df.toPandas()
-        self.assertPandasEqual(pdf_arrow, pdf)
+        assert_frame_equal(pdf_arrow, pdf)
 
     def test_filtered_frame(self):
         df = self.spark.range(3).toDF("i")
@@ -245,7 +247,7 @@ def test_createDataFrame_with_schema(self):
         df = self.spark.createDataFrame(pdf, schema=self.schema)
         self.assertEquals(self.schema, df.schema)
         pdf_arrow = df.toPandas()
-        self.assertPandasEqual(pdf_arrow, pdf)
+        assert_frame_equal(pdf_arrow, pdf)
 
     def test_createDataFrame_with_incorrect_schema(self):
         pdf = self.create_pandas_data_frame()
@@ -267,7 +269,6 @@ def test_createDataFrame_with_names(self):
         self.assertEquals(df.schema.fieldNames(), new_names)
 
     def test_createDataFrame_column_name_encoding(self):
-        import pandas as pd
         pdf = pd.DataFrame({u'a': [1]})
         columns = self.spark.createDataFrame(pdf).columns
         self.assertTrue(isinstance(columns[0], str))
@@ -277,13 +278,11 @@ def test_createDataFrame_column_name_encoding(self):
         self.assertEquals(columns[0], 'b')
 
     def test_createDataFrame_with_single_data_type(self):
-        import pandas as pd
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(ValueError, ".*IntegerType.*not supported.*"):
                 self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int")
 
     def test_createDataFrame_does_not_modify_input(self):
-        import pandas as pd
         # Some series get converted for Spark to consume, this makes sure input is unchanged
         pdf = self.create_pandas_data_frame()
         # Use a nanosecond value to make sure it is not truncated
@@ -301,7 +300,6 @@ def test_schema_conversion_roundtrip(self):
         self.assertEquals(self.schema, schema_rt)
 
     def test_createDataFrame_with_array_type(self):
-        import pandas as pd
         pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [[u"x", u"y"], [u"y", u"z"]]})
         df, df_arrow = self._createDataFrame_toggle(pdf)
         result = df.collect()
@@ -327,7 +325,6 @@ def test_toPandas_with_array_type(self):
 
     def test_createDataFrame_with_int_col_names(self):
         import numpy as np
-        import pandas as pd
         pdf = pd.DataFrame(np.random.rand(4, 2))
         df, df_arrow = self._createDataFrame_toggle(pdf)
         pdf_col_names = [str(c) for c in pdf.columns]
@@ -335,8 +332,6 @@ def test_createDataFrame_with_int_col_names(self):
         self.assertEqual(pdf_col_names, df_arrow.columns)
 
     def test_createDataFrame_fallback_enabled(self):
-        import pandas as pd
-
         with QuietTest(self.sc):
             with self.sql_conf({"spark.sql.execution.arrow.fallback.enabled": True}):
                 with warnings.catch_warnings(record=True) as warns:
@@ -354,8 +349,6 @@ def test_createDataFrame_fallback_enabled(self):
 
     def test_createDataFrame_fallback_disabled(self):
         from distutils.version import LooseVersion
-        import pandas as pd
-        import pyarrow as pa
 
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
@@ -371,7 +364,6 @@ def test_createDataFrame_fallback_disabled(self):
 
     # Regression test for SPARK-23314
     def test_timestamp_dst(self):
-        import pandas as pd
         # Daylight saving time for Los Angeles for 2015 is Sun, Nov 1 at 2:00 am
         dt = [datetime.datetime(2015, 11, 1, 0, 30),
               datetime.datetime(2015, 11, 1, 1, 30),
@@ -381,8 +373,8 @@ def test_timestamp_dst(self):
         df_from_python = self.spark.createDataFrame(dt, 'timestamp').toDF('time')
         df_from_pandas = self.spark.createDataFrame(pdf)
 
-        self.assertPandasEqual(pdf, df_from_python.toPandas())
-        self.assertPandasEqual(pdf, df_from_pandas.toPandas())
+        assert_frame_equal(pdf, df_from_python.toPandas())
+        assert_frame_equal(pdf, df_from_pandas.toPandas())
 
     def test_toPandas_batch_order(self):
 
@@ -398,7 +390,7 @@ def run_test(num_records, num_parts, max_records, use_delay=False):
                 df = df.rdd.mapPartitionsWithIndex(delay_first_part).toDF()
             with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": max_records}):
                 pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
-                self.assertPandasEqual(pdf, pdf_arrow)
+                assert_frame_equal(pdf, pdf_arrow)
 
         cases = [
             (1024, 512, 2),    # Use large num partitions for more likely collecting out of order
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 65edf593c300..eb34bbb77925 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -581,14 +581,15 @@ def test_create_dataframe_required_pandas_not_found(self):
 
     # Regression test for SPARK-23360
     @unittest.skipIf(not have_pandas, pandas_requirement_message)
-    def test_create_dateframe_from_pandas_with_dst(self):
+    def test_create_dataframe_from_pandas_with_dst(self):
         import pandas as pd
+        from pandas.util.testing import assert_frame_equal
         from datetime import datetime
 
         pdf = pd.DataFrame({'time': [datetime(2015, 10, 31, 22, 30)]})
 
         df = self.spark.createDataFrame(pdf)
-        self.assertPandasEqual(pdf, df.toPandas())
+        assert_frame_equal(pdf, df.toPandas())
 
         orig_env_tz = os.environ.get('TZ', None)
         try:
@@ -597,7 +598,7 @@ def test_create_dateframe_from_pandas_with_dst(self):
             time.tzset()
             with self.sql_conf({'spark.sql.session.timeZone': tz}):
                 df = self.spark.createDataFrame(pdf)
-                self.assertPandasEqual(pdf, df.toPandas())
+                assert_frame_equal(pdf, df.toPandas())
         finally:
             del os.environ['TZ']
             if orig_env_tz is not None:
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
index 18264ead2fd0..9eda1aa61010 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
@@ -26,6 +26,10 @@
     pandas_requirement_message, pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
 
+if have_pandas:
+    import pandas as pd
+    from pandas.util.testing import assert_frame_equal
+
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
@@ -50,8 +54,6 @@ def plus_one(v):
 
     @property
     def pandas_scalar_plus_two(self):
-        import pandas as pd
-
         @pandas_udf('double', PandasUDFType.SCALAR)
         def plus_two(v):
             assert isinstance(v, pd.Series)
@@ -107,7 +109,7 @@ def test_manual(self):
              [9, 335.0, 33.5, [33.5]]],
             ['id', 'sum(v)', 'avg(v)', 'avg(array(v))'])
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_basic(self):
         df = self.data
@@ -116,19 +118,19 @@ def test_basic(self):
         # Groupby one column and aggregate one UDF with literal
         result1 = df.groupby('id').agg(weighted_mean_udf(df.v, lit(1.0))).sort('id')
         expected1 = df.groupby('id').agg(mean(df.v).alias('weighted_mean(v, 1.0)')).sort('id')
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
         # Groupby one expression and aggregate one UDF with literal
         result2 = df.groupby((col('id') + 1)).agg(weighted_mean_udf(df.v, lit(1.0)))\
             .sort(df.id + 1)
         expected2 = df.groupby((col('id') + 1))\
             .agg(mean(df.v).alias('weighted_mean(v, 1.0)')).sort(df.id + 1)
-        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+        assert_frame_equal(expected2.toPandas(), result2.toPandas())
 
         # Groupby one column and aggregate one UDF without literal
         result3 = df.groupby('id').agg(weighted_mean_udf(df.v, df.w)).sort('id')
         expected3 = df.groupby('id').agg(mean(df.v).alias('weighted_mean(v, w)')).sort('id')
-        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
+        assert_frame_equal(expected3.toPandas(), result3.toPandas())
 
         # Groupby one expression and aggregate one UDF without literal
         result4 = df.groupby((col('id') + 1).alias('id'))\
@@ -137,7 +139,7 @@ def test_basic(self):
         expected4 = df.groupby((col('id') + 1).alias('id'))\
             .agg(mean(df.v).alias('weighted_mean(v, w)'))\
             .sort('id')
-        self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
+        assert_frame_equal(expected4.toPandas(), result4.toPandas())
 
     def test_unsupported_types(self):
         with QuietTest(self.sc):
@@ -166,7 +168,7 @@ def test_alias(self):
         result1 = df.groupby('id').agg(mean_udf(df.v).alias('mean_alias'))
         expected1 = df.groupby('id').agg(mean(df.v).alias('mean_alias'))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_mixed_sql(self):
         """
@@ -200,9 +202,9 @@ def test_mixed_sql(self):
                      .agg(sum(df.v + 1) + 2)
                      .sort('id'))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
-        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
-        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected2.toPandas(), result2.toPandas())
+        assert_frame_equal(expected3.toPandas(), result3.toPandas())
 
     def test_mixed_udfs(self):
         """
@@ -262,12 +264,12 @@ def test_mixed_udfs(self):
                      .agg(plus_two(sum(plus_two(df.v))))
                      .sort('plus_two(id)'))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
-        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
-        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
-        self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
-        self.assertPandasEqual(expected5.toPandas(), result5.toPandas())
-        self.assertPandasEqual(expected6.toPandas(), result6.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected2.toPandas(), result2.toPandas())
+        assert_frame_equal(expected3.toPandas(), result3.toPandas())
+        assert_frame_equal(expected4.toPandas(), result4.toPandas())
+        assert_frame_equal(expected5.toPandas(), result5.toPandas())
+        assert_frame_equal(expected6.toPandas(), result6.toPandas())
 
     def test_multiple_udfs(self):
         """
@@ -291,7 +293,7 @@ def test_multiple_udfs(self):
                      .sort('id')
                      .toPandas())
 
-        self.assertPandasEqual(expected1, result1)
+        assert_frame_equal(expected1, result1)
 
     def test_complex_groupby(self):
         df = self.data
@@ -327,13 +329,13 @@ def test_complex_groupby(self):
         result7 = df.groupby(df.v % 2, plus_two(df.id)).agg(sum_udf(df.v)).sort('sum(v)')
         expected7 = df.groupby(df.v % 2, plus_two(df.id)).agg(sum(df.v)).sort('sum(v)')
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
-        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
-        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
-        self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
-        self.assertPandasEqual(expected5.toPandas(), result5.toPandas())
-        self.assertPandasEqual(expected6.toPandas(), result6.toPandas())
-        self.assertPandasEqual(expected7.toPandas(), result7.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected2.toPandas(), result2.toPandas())
+        assert_frame_equal(expected3.toPandas(), result3.toPandas())
+        assert_frame_equal(expected4.toPandas(), result4.toPandas())
+        assert_frame_equal(expected5.toPandas(), result5.toPandas())
+        assert_frame_equal(expected6.toPandas(), result6.toPandas())
+        assert_frame_equal(expected7.toPandas(), result7.toPandas())
 
     def test_complex_expressions(self):
         df = self.data
@@ -404,9 +406,9 @@ def test_complex_expressions(self):
                      .sort('id')
                      .toPandas())
 
-        self.assertPandasEqual(expected1, result1)
-        self.assertPandasEqual(expected2, result2)
-        self.assertPandasEqual(expected3, result3)
+        assert_frame_equal(expected1, result1)
+        assert_frame_equal(expected2, result2)
+        assert_frame_equal(expected3, result3)
 
     def test_retain_group_columns(self):
         with self.sql_conf({"spark.sql.retainGroupColumns": False}):
@@ -415,7 +417,7 @@ def test_retain_group_columns(self):
 
             result1 = df.groupby(df.id).agg(sum_udf(df.v))
             expected1 = df.groupby(df.id).agg(sum(df.v))
-            self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+            assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_array_type(self):
         df = self.data
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
index f7684d3fbcff..c8bad99a7705 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
@@ -17,6 +17,7 @@
 
 import datetime
 import unittest
+import sys
 
 from collections import OrderedDict
 from decimal import Decimal
@@ -29,6 +30,23 @@
     pandas_requirement_message, pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
 
+if have_pandas:
+    import pandas as pd
+    from pandas.util.testing import assert_frame_equal
+
+if have_pyarrow:
+    import pyarrow as pa
+
+
+"""
+Tests below use pd.DataFrame.assign that will infer mixed types (unicode/str) for column names
+from kwargs w/ Python 2, so need to set check_column_type=False and avoid this check
+"""
+if sys.version < '3':
+    _check_column_type = False
+else:
+    _check_column_type = True
+
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
@@ -42,7 +60,6 @@ def data(self):
             .withColumn("v", explode(col('vs'))).drop('vs')
 
     def test_supported_types(self):
-        import pyarrow as pa
 
         values = [
             1, 2, 3,
@@ -127,9 +144,9 @@ def test_supported_types(self):
         result3 = df.groupby('id').apply(udf3).sort('id').toPandas()
         expected3 = expected1
 
-        self.assertPandasEqual(expected1, result1)
-        self.assertPandasEqual(expected2, result2)
-        self.assertPandasEqual(expected3, result3)
+        assert_frame_equal(expected1, result1, check_column_type=_check_column_type)
+        assert_frame_equal(expected2, result2, check_column_type=_check_column_type)
+        assert_frame_equal(expected3, result3, check_column_type=_check_column_type)
 
     def test_array_type_correct(self):
         df = self.data.withColumn("arr", array(col("id"))).repartition(1, "id")
@@ -147,7 +164,7 @@ def test_array_type_correct(self):
 
         result = df.groupby('id').apply(udf).sort('id').toPandas()
         expected = df.toPandas().groupby('id').apply(udf.func).reset_index(drop=True)
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
     def test_register_grouped_map_udf(self):
         foo_udf = pandas_udf(lambda x: x, "id long", PandasUDFType.GROUPED_MAP)
@@ -169,7 +186,7 @@ def foo(pdf):
 
         result = df.groupby('id').apply(foo).sort('id').toPandas()
         expected = df.toPandas().groupby('id').apply(foo.func).reset_index(drop=True)
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
     def test_coerce(self):
         df = self.data
@@ -183,7 +200,7 @@ def test_coerce(self):
         result = df.groupby('id').apply(foo).sort('id').toPandas()
         expected = df.toPandas().groupby('id').apply(foo.func).reset_index(drop=True)
         expected = expected.assign(v=expected.v.astype('float64'))
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
     def test_complex_groupby(self):
         df = self.data
@@ -201,7 +218,7 @@ def normalize(pdf):
         expected = pdf.groupby(pdf['id'] % 2 == 0, as_index=False).apply(normalize.func)
         expected = expected.sort_values(['id', 'v']).reset_index(drop=True)
         expected = expected.assign(norm=expected.norm.astype('float64'))
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
     def test_empty_groupby(self):
         df = self.data
@@ -219,7 +236,7 @@ def normalize(pdf):
         expected = normalize.func(pdf)
         expected = expected.sort_values(['id', 'v']).reset_index(drop=True)
         expected = expected.assign(norm=expected.norm.astype('float64'))
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
     def test_datatype_string(self):
         df = self.data
@@ -232,7 +249,7 @@ def test_datatype_string(self):
 
         result = df.groupby('id').apply(foo_udf).sort('id').toPandas()
         expected = df.toPandas().groupby('id').apply(foo_udf.func).reset_index(drop=True)
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
     def test_wrong_return_type(self):
         with QuietTest(self.sc):
@@ -266,8 +283,6 @@ def test_wrong_args(self):
                     pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
 
     def test_unsupported_types(self):
-        import pyarrow as pa
-
         common_err_msg = 'Invalid returnType.*grouped map Pandas UDF.*'
         unsupported_types = [
             StructField('map', MapType(StringType(), IntegerType())),
@@ -295,7 +310,7 @@ def test_timestamp_dst(self):
         df = self.spark.createDataFrame(dt, 'timestamp').toDF('time')
         foo_udf = pandas_udf(lambda pdf: pdf, 'time timestamp', PandasUDFType.GROUPED_MAP)
         result = df.groupby('time').apply(foo_udf).sort('time')
-        self.assertPandasEqual(df.toPandas(), result.toPandas())
+        assert_frame_equal(df.toPandas(), result.toPandas(), check_column_type=_check_column_type)
 
     def test_udf_with_key(self):
         import numpy as np
@@ -349,29 +364,28 @@ def foo3(key, pdf):
         expected1 = pdf.groupby('id', as_index=False)\
             .apply(lambda x: udf1.func((x.id.iloc[0],), x))\
             .sort_values(['id', 'v']).reset_index(drop=True)
-        self.assertPandasEqual(expected1, result1)
+        assert_frame_equal(expected1, result1, check_column_type=_check_column_type)
 
         # Test groupby expression
         result2 = df.groupby(df.id % 2).apply(udf1).sort('id', 'v').toPandas()
         expected2 = pdf.groupby(pdf.id % 2, as_index=False)\
             .apply(lambda x: udf1.func((x.id.iloc[0] % 2,), x))\
             .sort_values(['id', 'v']).reset_index(drop=True)
-        self.assertPandasEqual(expected2, result2)
+        assert_frame_equal(expected2, result2, check_column_type=_check_column_type)
 
         # Test complex groupby
         result3 = df.groupby(df.id, df.v % 2).apply(udf2).sort('id', 'v').toPandas()
         expected3 = pdf.groupby([pdf.id, pdf.v % 2], as_index=False)\
             .apply(lambda x: udf2.func((x.id.iloc[0], (x.v % 2).iloc[0],), x))\
             .sort_values(['id', 'v']).reset_index(drop=True)
-        self.assertPandasEqual(expected3, result3)
+        assert_frame_equal(expected3, result3, check_column_type=_check_column_type)
 
         # Test empty groupby
         result4 = df.groupby().apply(udf3).sort('id', 'v').toPandas()
         expected4 = udf3.func((), pdf)
-        self.assertPandasEqual(expected4, result4)
+        assert_frame_equal(expected4, result4, check_column_type=_check_column_type)
 
     def test_column_order(self):
-        import pandas as pd
 
         # Helper function to set column names from a list
         def rename_pdf(pdf, names):
@@ -402,7 +416,7 @@ def change_col_order(pdf):
             .select('id', 'u', 'v').toPandas()
         pd_result = grouped_pdf.apply(change_col_order)
         expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
         # Function returns a pdf with positional columns, indexed by range
         def range_col_order(pdf):
@@ -421,7 +435,7 @@ def range_col_order(pdf):
         pd_result = grouped_pdf.apply(range_col_order)
         rename_pdf(pd_result, ['id', 'u', 'v'])
         expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
         # Function returns a pdf with columns indexed with integers
         def int_index(pdf):
@@ -439,7 +453,7 @@ def int_index(pdf):
         pd_result = grouped_pdf.apply(int_index)
         rename_pdf(pd_result, ['id', 'u', 'v'])
         expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
-        self.assertPandasEqual(expected, result)
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
 
         @pandas_udf('id long, v int', PandasUDFType.GROUPED_MAP)
         def column_name_typo(pdf):
@@ -452,7 +466,6 @@ def invalid_positional_types(pdf):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(Exception, "KeyError: 'id'"):
                 grouped_df.apply(column_name_typo).collect()
-            import pyarrow as pa
             if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
                 # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
                 with self.assertRaisesRegexp(Exception, "No cast implemented"):
@@ -462,8 +475,6 @@ def invalid_positional_types(pdf):
                     grouped_df.apply(invalid_positional_types).collect()
 
     def test_positional_assignment_conf(self):
-        import pandas as pd
-
         with self.sql_conf({
                 "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName": False}):
 
@@ -492,8 +503,6 @@ def dummy_pandas_udf(df):
         self.assertEquals(res.count(), 5)
 
     def test_mixed_scalar_udfs_followed_by_grouby_apply(self):
-        import pandas as pd
-
         df = self.spark.range(0, 10).toDF('v1')
         df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1'])) \
             .withColumn('v3', pandas_udf(lambda x: x + 2, 'int')(df['v1']))
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 7df918b55c1b..ebba074fec10 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -41,6 +41,12 @@
     pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
 
+if have_pandas:
+    import pandas as pd
+
+if have_pyarrow:
+    import pyarrow as pa
+
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
@@ -70,7 +76,6 @@ def tearDownClass(cls):
 
     @property
     def nondeterministic_vectorized_udf(self):
-        import pandas as pd
         import numpy as np
 
         @pandas_udf('double')
@@ -205,7 +210,6 @@ def test_vectorized_udf_null_string(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_string_in_udf(self):
-        import pandas as pd
         df = self.spark.range(10)
         str_f = pandas_udf(lambda x: pd.Series(map(str, x)), StringType())
         actual = df.select(str_f(col('id')))
@@ -236,8 +240,6 @@ def test_vectorized_udf_datatype_string(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_binary(self):
-        import pyarrow as pa
-
         if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
             with QuietTest(self.sc):
                 with self.assertRaisesRegexp(
@@ -269,9 +271,6 @@ def test_vectorized_udf_null_array(self):
         self.assertEquals(df.collect(), result.collect())
 
     def test_vectorized_udf_struct_type(self):
-        import pandas as pd
-        import pyarrow as pa
-
         df = self.spark.range(10)
         return_type = StructType([
             StructField('id', LongType()),
@@ -305,8 +304,6 @@ def func(id):
             self.assertEqual(expected, actual.collect())
 
     def test_vectorized_udf_struct_complex(self):
-        import pandas as pd
-
         df = self.spark.range(10)
         return_type = StructType([
             StructField('ts', TimestampType()),
@@ -359,8 +356,6 @@ def test_vectorized_udf_exception(self):
                 df.select(raise_exception(col('id'))).collect()
 
     def test_vectorized_udf_invalid_length(self):
-        import pandas as pd
-
         df = self.spark.range(10)
         raise_exception = pandas_udf(lambda _: pd.Series(1), LongType())
         with QuietTest(self.sc):
@@ -377,8 +372,6 @@ def test_vectorized_udf_chained(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_chained_struct_type(self):
-        import pandas as pd
-
         df = self.spark.range(10)
         return_type = StructType([
             StructField('id', LongType()),
@@ -470,7 +463,6 @@ def test_vectorized_udf_dates(self):
 
         @pandas_udf(returnType=StringType())
         def check_data(idx, date, date_copy):
-            import pandas as pd
             msgs = []
             is_equal = date.isnull()
             for i in range(len(idx)):
@@ -509,7 +501,6 @@ def test_vectorized_udf_timestamps(self):
 
         @pandas_udf(returnType=StringType())
         def check_data(idx, timestamp, timestamp_copy):
-            import pandas as pd
             msgs = []
             is_equal = timestamp.isnull()  # use this array to check values are equal
             for i in range(len(idx)):
@@ -533,8 +524,6 @@ def check_data(idx, timestamp, timestamp_copy):
             self.assertIsNone(result[i][3])  # "check_data" col
 
     def test_vectorized_udf_return_timestamp_tz(self):
-        import pandas as pd
-
         df = self.spark.range(10)
 
         @pandas_udf(returnType=TimestampType())
@@ -551,8 +540,6 @@ def gen_timestamps(id):
             self.assertEquals(expected, ts)
 
     def test_vectorized_udf_check_config(self):
-        import pandas as pd
-
         with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 3}):
             df = self.spark.range(10, numPartitions=1)
 
@@ -565,8 +552,6 @@ def check_records_per_batch(x):
                 self.assertTrue(r <= 3)
 
     def test_vectorized_udf_timestamps_respect_session_timezone(self):
-        import pandas as pd
-
         schema = StructType([
             StructField("idx", LongType(), True),
             StructField("timestamp", TimestampType(), True)])
@@ -653,7 +638,6 @@ def test_timestamp_dst(self):
 
     @unittest.skipIf(sys.version_info[:2] < (3, 5), "Type hints are supported from Python 3.5.")
     def test_type_annotation(self):
-        from pyspark.sql.functions import pandas_udf
         # Regression test to check if type hints can be used. See SPARK-23569.
         # Note that it throws an error during compilation in lower Python versions if 'exec'
         # is not used. Also, note that we explicitly use another dictionary to avoid modifications
@@ -670,8 +654,6 @@ def test_type_annotation(self):
         self.assertEqual(df.first()[0], 0)
 
     def test_mixed_udf(self):
-        import pandas as pd
-
         df = self.spark.range(0, 1).toDF('v')
 
         # Test mixture of multiple UDFs and Pandas UDFs.
@@ -772,8 +754,6 @@ def f4(x):
         self.assertEquals(expected.collect(), df_multi_2.collect())
 
     def test_mixed_udf_and_sql(self):
-        import pandas as pd
-
         df = self.spark.range(0, 1).toDF('v')
 
         # Test mixture of UDFs, Pandas UDFs and SQL expression.
@@ -831,7 +811,6 @@ def f3(x):
     def test_datasource_with_udf(self):
         # Same as SQLTests.test_datasource_with_udf, but with Pandas UDF
         # This needs to a separate test because Arrow dependency is optional
-        import pandas as pd
         import numpy as np
 
         path = tempfile.mkdtemp()
diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/test_pandas_udf_window.py
index 3ba98e76468b..7d6540d50fb7 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_window.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_window.py
@@ -25,6 +25,9 @@
     pandas_requirement_message, pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
 
+if have_pandas:
+    from pandas.util.testing import assert_frame_equal
+
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
@@ -48,8 +51,6 @@ def pandas_scalar_time_two(self):
 
     @property
     def pandas_agg_count_udf(self):
-        from pyspark.sql.functions import pandas_udf, PandasUDFType
-
         @pandas_udf('long', PandasUDFType.GROUPED_AGG)
         def count(v):
             return len(v)
@@ -127,8 +128,8 @@ def test_simple(self):
         result2 = df.select(mean_udf(df['v']).over(w))
         expected2 = df.select(mean(df['v']).over(w))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
-        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected2.toPandas(), result2.toPandas())
 
     def test_multiple_udfs(self):
         df = self.data
@@ -142,7 +143,7 @@ def test_multiple_udfs(self):
             .withColumn('max_v', max(df['v']).over(w)) \
             .withColumn('min_w', min(df['w']).over(w))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_replace_existing(self):
         df = self.data
@@ -151,7 +152,7 @@ def test_replace_existing(self):
         result1 = df.withColumn('v', self.pandas_agg_mean_udf(df['v']).over(w))
         expected1 = df.withColumn('v', mean(df['v']).over(w))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_mixed_sql(self):
         df = self.data
@@ -161,7 +162,7 @@ def test_mixed_sql(self):
         result1 = df.withColumn('v', mean_udf(df['v'] * 2).over(w) + 1)
         expected1 = df.withColumn('v', mean(df['v'] * 2).over(w) + 1)
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_mixed_udf(self):
         df = self.data
@@ -185,8 +186,8 @@ def test_mixed_udf(self):
             'v2',
             time_two(mean(time_two(df['v'])).over(w)))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
-        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected2.toPandas(), result2.toPandas())
 
     def test_without_partitionBy(self):
         df = self.data
@@ -199,8 +200,8 @@ def test_without_partitionBy(self):
         result2 = df.select(mean_udf(df['v']).over(w))
         expected2 = df.select(mean(df['v']).over(w))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
-        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected2.toPandas(), result2.toPandas())
 
     def test_mixed_sql_and_udf(self):
         df = self.data
@@ -229,10 +230,10 @@ def test_mixed_sql_and_udf(self):
         expected4 = df.withColumn('max_v', max(df['v']).over(w)) \
             .withColumn('rank', rank().over(ow))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
-        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
-        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
-        self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected2.toPandas(), result2.toPandas())
+        assert_frame_equal(expected3.toPandas(), result3.toPandas())
+        assert_frame_equal(expected4.toPandas(), result4.toPandas())
 
     def test_array_type(self):
         df = self.data
@@ -276,7 +277,7 @@ def test_bounded_simple(self):
             .withColumn('max_v', max(df['v']).over(w2)) \
             .withColumn('min_v', min(df['v']).over(w1))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_growing_window(self):
         from pyspark.sql.functions import mean
@@ -293,7 +294,7 @@ def test_growing_window(self):
         expected1 = df.withColumn('m1', mean(df['v']).over(w1)) \
             .withColumn('m2', mean(df['v']).over(w2))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_sliding_window(self):
         from pyspark.sql.functions import mean
@@ -310,7 +311,7 @@ def test_sliding_window(self):
         expected1 = df.withColumn('m1', mean(df['v']).over(w1)) \
             .withColumn('m2', mean(df['v']).over(w2))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_shrinking_window(self):
         from pyspark.sql.functions import mean
@@ -327,7 +328,7 @@ def test_shrinking_window(self):
         expected1 = df.withColumn('m1', mean(df['v']).over(w1)) \
             .withColumn('m2', mean(df['v']).over(w2))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
     def test_bounded_mixed(self):
         from pyspark.sql.functions import mean, max
@@ -347,7 +348,7 @@ def test_bounded_mixed(self):
             .withColumn('max_v', max(df['v']).over(w2)) \
             .withColumn('mean_unbounded_v', mean(df['v']).over(w1))
 
-        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        assert_frame_equal(expected1.toPandas(), result1.toPandas())
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py
index afc40ccf4139..13800cfa528b 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -260,9 +260,3 @@ def tearDownClass(cls):
         super(ReusedSQLTestCase, cls).tearDownClass()
         cls.spark.stop()
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
-
-    def assertPandasEqual(self, expected, result):
-        msg = ("DataFrames are not equal: " +
-               "\n\nExpected:\n%s\n%s" % (expected, expected.dtypes) +
-               "\n\nResult:\n%s\n%s" % (result, result.dtypes))
-        self.assertTrue(expected.equals(result), msg=msg)

From 05f6b87e8126b30e1b9dd2c43e8bbbb101919dea Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Tue, 9 Apr 2019 19:13:35 -0500
Subject: [PATCH 0878/1072] [SPARK-27410][MLLIB] Remove deprecated / no-op
 mllib.KMeans getRuns, setRuns

## What changes were proposed in this pull request?

Remove deprecated / no-op mllib.KMeans getRuns, setRuns
mllib.KMeans has getRuns, setRuns methods which haven't done anything since Spark 2.1. They're deprecated, and no-ops, and should be removed for Spark 3.

## How was this patch tested?

Existing tests.

Closes #24320 from srowen/SPARK-27410.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/mllib/clustering/KMeans.scala       | 21 -------------------
 project/MimaExcludes.scala                    |  4 ++++
 2 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index d2118c2fcf78..4bb79bc69eef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -22,7 +22,6 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.annotation.Since
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.clustering.{KMeans => NewKMeans}
 import org.apache.spark.ml.util.Instrumentation
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.linalg.BLAS.axpy
@@ -121,26 +120,6 @@ class KMeans private (
     this
   }
 
-  /**
-   * This function has no effect since Spark 2.0.0.
-   */
-  @Since("1.4.0")
-  @deprecated("This has no effect and always returns 1", "2.1.0")
-  def getRuns: Int = {
-    logWarning("Getting number of runs has no effect since Spark 2.0.0.")
-    1
-  }
-
-  /**
-   * This function has no effect since Spark 2.0.0.
-   */
-  @Since("0.8.0")
-  @deprecated("This has no effect", "2.1.0")
-  def setRuns(runs: Int): this.type = {
-    logWarning("Setting number of runs has no effect since Spark 2.0.0.")
-    this
-  }
-
   /**
    * Number of steps for the k-means|| initialization mode
    */
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index fb92dfabfe52..2a5d01e3772c 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,10 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    // [SPARK-27410][MLLIB] Remove deprecated / no-op mllib.KMeans getRuns, setRuns
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeans.getRuns"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeans.setRuns"),
+
     // [SPARK-27090][CORE] Removing old LEGACY_DRIVER_IDENTIFIER ("<driver>")
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.LEGACY_DRIVER_IDENTIFIER"),
     // [SPARK-25838] Remove formatVersion from Saveable

From 08858f6abcb7a0e31ee1d4d2c439112e33730bb4 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 10 Apr 2019 13:21:21 +0900
Subject: [PATCH 0879/1072] [SPARK-27253][SQL][FOLLOW-UP] Update doc about
 parent-session configuration priority

## What changes were proposed in this pull request?

The PR #24189 changes the behavior of merging SparkConf. The existing doc is not updated for it. This is a followup of it to update the doc.

## How was this patch tested?

Doc only change.

Closes #24326 from viirya/SPARK-27253-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/internal/BaseSessionStateBuilder.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 8f2f8e80e126..3d29ff33ad48 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -81,8 +81,8 @@ abstract class BaseSessionStateBuilder(
   /**
    * SQL-specific key-value configurations.
    *
-   * These either get cloned from a pre-existing instance or newly created. The conf is always
-   * merged with its [[SparkConf]].
+   * These either get cloned from a pre-existing instance or newly created. The conf is merged
+   * with its [[SparkConf]] only when there is no parent session.
    */
   protected lazy val conf: SQLConf = {
     parentState.map(_.conf.clone()).getOrElse {

From 58674d54baedc048ae9bf237f268dce5f3d06d77 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 10 Apr 2019 14:30:39 +0800
Subject: [PATCH 0880/1072] [SPARK-27181][SQL] Add public transform API

## What changes were proposed in this pull request?

This adds a public Expression API that can be used to pass partition transformations to data sources.

## How was this patch tested?

Existing tests to validate no regressions. Added transform cases to DDL suite and v1 conversions suite.

Closes #24117 from rdblue/add-public-transform-api.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  17 +-
 .../catalog/v2/expressions/Expression.java    |  31 +++
 .../catalog/v2/expressions/Expressions.java   | 162 ++++++++++++++
 .../sql/catalog/v2/expressions/Literal.java   |  42 ++++
 .../v2/expressions/NamedReference.java        |  33 +++
 .../sql/catalog/v2/expressions/Transform.java |  44 ++++
 .../catalog/v2/expressions/expressions.scala  | 203 ++++++++++++++++++
 .../sql/catalyst/parser/AstBuilder.scala      | 101 ++++++++-
 .../logical/sql/CreateTableStatement.scala    |   5 +-
 .../sql/catalyst/parser/DDLParserSuite.scala  |  52 ++++-
 .../datasources/DataSourceResolution.scala    |   7 +-
 .../command/PlanResolutionSuite.scala         |  20 ++
 12 files changed, 705 insertions(+), 12 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 0f9387b0213e..d261b5637bd3 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -92,7 +92,7 @@ statement
     | SHOW DATABASES (LIKE? pattern=STRING)?                           #showDatabases
     | createTableHeader ('(' colTypeList ')')? tableProvider
         ((OPTIONS options=tablePropertyList) |
-        (PARTITIONED BY partitionColumnNames=identifierList) |
+        (PARTITIONED BY partitioning=transformList) |
         bucketSpec |
         locationSpec |
         (COMMENT comment=STRING) |
@@ -587,6 +587,21 @@ namedExpressionSeq
     : namedExpression (',' namedExpression)*
     ;
 
+transformList
+    : '(' transforms+=transform (',' transforms+=transform)* ')'
+    ;
+
+transform
+    : qualifiedName                                                           #identityTransform
+    | transformName=identifier
+      '(' argument+=transformArgument (',' argument+=transformArgument)* ')'  #applyTransform
+    ;
+
+transformArgument
+    : qualifiedName
+    | constant
+    ;
+
 expression
     : booleanExpression
     ;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java
new file mode 100644
index 000000000000..1e2aca9556df
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Base class of the public logical expression API.
+ */
+@Experimental
+public interface Expression {
+  /**
+   * Format the expression as a human readable SQL-like string.
+   */
+  String describe();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
new file mode 100644
index 000000000000..009e89bd4eb6
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.types.DataType;
+import scala.collection.JavaConverters;
+
+import java.util.Arrays;
+
+/**
+ * Helper methods to create logical transforms to pass into Spark.
+ */
+@Experimental
+public class Expressions {
+  private Expressions() {
+  }
+
+  /**
+   * Create a logical transform for applying a named transform.
+   * <p>
+   * This transform can represent applying any named transform.
+   *
+   * @param name the transform name
+   * @param args expression arguments to the transform
+   * @return a logical transform
+   */
+  public Transform apply(String name, Expression... args) {
+    return LogicalExpressions.apply(name,
+        JavaConverters.asScalaBuffer(Arrays.asList(args)).toSeq());
+  }
+
+  /**
+   * Create a named reference expression for a column.
+   *
+   * @param name a column name
+   * @return a named reference for the column
+   */
+  public NamedReference column(String name) {
+    return LogicalExpressions.reference(name);
+  }
+
+  /**
+   * Create a literal from a value.
+   * <p>
+   * The JVM type of the value held by a literal must be the type used by Spark's InternalRow API
+   * for the literal's {@link DataType SQL data type}.
+   *
+   * @param value a value
+   * @param <T> the JVM type of the value
+   * @return a literal expression for the value
+   */
+  public <T> Literal<T> literal(T value) {
+    return LogicalExpressions.literal(value);
+  }
+
+  /**
+   * Create a bucket transform for one or more columns.
+   * <p>
+   * This transform represents a logical mapping from a value to a bucket id in [0, numBuckets)
+   * based on a hash of the value.
+   * <p>
+   * The name reported by transforms created with this method is "bucket".
+   *
+   * @param numBuckets the number of output buckets
+   * @param columns input columns for the bucket transform
+   * @return a logical bucket transform with name "bucket"
+   */
+  public Transform bucket(int numBuckets, String... columns) {
+    return LogicalExpressions.bucket(numBuckets,
+        JavaConverters.asScalaBuffer(Arrays.asList(columns)).toSeq());
+  }
+
+  /**
+   * Create an identity transform for a column.
+   * <p>
+   * This transform represents a logical mapping from a value to itself.
+   * <p>
+   * The name reported by transforms created with this method is "identity".
+   *
+   * @param column an input column
+   * @return a logical identity transform with name "identity"
+   */
+  public Transform identity(String column) {
+    return LogicalExpressions.identity(column);
+  }
+
+  /**
+   * Create a yearly transform for a timestamp or date column.
+   * <p>
+   * This transform represents a logical mapping from a timestamp or date to a year, such as 2018.
+   * <p>
+   * The name reported by transforms created with this method is "years".
+   *
+   * @param column an input timestamp or date column
+   * @return a logical yearly transform with name "years"
+   */
+  public Transform years(String column) {
+    return LogicalExpressions.years(column);
+  }
+
+  /**
+   * Create a monthly transform for a timestamp or date column.
+   * <p>
+   * This transform represents a logical mapping from a timestamp or date to a month, such as
+   * 2018-05.
+   * <p>
+   * The name reported by transforms created with this method is "months".
+   *
+   * @param column an input timestamp or date column
+   * @return a logical monthly transform with name "months"
+   */
+  public Transform months(String column) {
+    return LogicalExpressions.months(column);
+  }
+
+  /**
+   * Create a daily transform for a timestamp or date column.
+   * <p>
+   * This transform represents a logical mapping from a timestamp or date to a date, such as
+   * 2018-05-13.
+   * <p>
+   * The name reported by transforms created with this method is "days".
+   *
+   * @param column an input timestamp or date column
+   * @return a logical daily transform with name "days"
+   */
+  public Transform days(String column) {
+    return LogicalExpressions.days(column);
+  }
+
+  /**
+   * Create an hourly transform for a timestamp column.
+   * <p>
+   * This transform represents a logical mapping from a timestamp to a date and hour, such as
+   * 2018-05-13, hour 19.
+   * <p>
+   * The name reported by transforms created with this method is "hours".
+   *
+   * @param column an input timestamp column
+   * @return a logical hourly transform with name "hours"
+   */
+  public Transform hours(String column) {
+    return LogicalExpressions.hours(column);
+  }
+
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java
new file mode 100644
index 000000000000..e41bcf9000c5
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.types.DataType;
+
+/**
+ * Represents a constant literal value in the public expression API.
+ * <p>
+ * The JVM type of the value held by a literal must be the type used by Spark's InternalRow API for
+ * the literal's {@link DataType SQL data type}.
+ *
+ * @param <T> the JVM type of a value held by the literal
+ */
+@Experimental
+public interface Literal<T> extends Expression {
+  /**
+   * Returns the literal value.
+   */
+  T value();
+
+  /**
+   * Returns the SQL data type of the literal.
+   */
+  DataType dataType();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java
new file mode 100644
index 000000000000..c71ffbe70651
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Represents a field or column reference in the public logical expression API.
+ */
+@Experimental
+public interface NamedReference extends Expression {
+  /**
+   * Returns the referenced field name as an array of String parts.
+   * <p>
+   * Each string in the returned array represents a field name.
+   */
+  String[] fieldNames();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java
new file mode 100644
index 000000000000..c85e0c412f1a
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Represents a transform function in the public logical expression API.
+ * <p>
+ * For example, the transform date(ts) is used to derive a date value from a timestamp column. The
+ * transform name is "date" and its argument is a reference to the "ts" column.
+ */
+@Experimental
+public interface Transform extends Expression {
+  /**
+   * Returns the transform function name.
+   */
+  String name();
+
+  /**
+   * Returns all field references in the transform arguments.
+   */
+  NamedReference[] references();
+
+  /**
+   * Returns the arguments passed to the transform function.
+   */
+  Expression[] arguments();
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
new file mode 100644
index 000000000000..813d88255c6a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
+
+/**
+ * Helper methods for working with the logical expressions API.
+ *
+ * Factory methods can be used when referencing the logical expression nodes is ambiguous because
+ * logical and internal expressions are used.
+ */
+private[sql] object LogicalExpressions {
+  // a generic parser that is only used for parsing multi-part field names.
+  // because this is only used for field names, the SQL conf passed in does not matter.
+  private lazy val parser = new CatalystSqlParser(SQLConf.get)
+
+  def fromPartitionColumns(columns: String*): Array[IdentityTransform] =
+    columns.map(identity).toArray
+
+  def fromBucketSpec(spec: BucketSpec): BucketTransform = {
+    if (spec.sortColumnNames.nonEmpty) {
+      throw new AnalysisException(
+        s"Cannot convert bucketing with sort columns to a transform: $spec")
+    }
+
+    bucket(spec.numBuckets, spec.bucketColumnNames: _*)
+  }
+
+  implicit class TransformHelper(transforms: Seq[Transform]) {
+    def asPartitionColumns: Seq[String] = {
+      val (idTransforms, nonIdTransforms) = transforms.partition(_.isInstanceOf[IdentityTransform])
+
+      if (nonIdTransforms.nonEmpty) {
+        throw new AnalysisException("Transforms cannot be converted to partition columns: " +
+            nonIdTransforms.map(_.describe).mkString(", "))
+      }
+
+      idTransforms.map(_.asInstanceOf[IdentityTransform]).map(_.reference).map { ref =>
+        val parts = ref.fieldNames
+        if (parts.size > 1) {
+          throw new AnalysisException(s"Cannot partition by nested column: $ref")
+        } else {
+          parts(0)
+        }
+      }
+    }
+  }
+
+  def literal[T](value: T): LiteralValue[T] = {
+    val internalLit = catalyst.expressions.Literal(value)
+    literal(value, internalLit.dataType)
+  }
+
+  def literal[T](value: T, dataType: DataType): LiteralValue[T] = LiteralValue(value, dataType)
+
+  def reference(name: String): NamedReference =
+    FieldReference(parser.parseMultipartIdentifier(name))
+
+  def apply(name: String, arguments: Expression*): Transform = ApplyTransform(name, arguments)
+
+  def bucket(numBuckets: Int, columns: String*): BucketTransform =
+    BucketTransform(literal(numBuckets, IntegerType), columns.map(reference))
+
+  def identity(column: String): IdentityTransform = IdentityTransform(reference(column))
+
+  def years(column: String): YearsTransform = YearsTransform(reference(column))
+
+  def months(column: String): MonthsTransform = MonthsTransform(reference(column))
+
+  def days(column: String): DaysTransform = DaysTransform(reference(column))
+
+  def hours(column: String): HoursTransform = HoursTransform(reference(column))
+}
+
+/**
+ * Base class for simple transforms of a single column.
+ */
+private[sql] abstract class SingleColumnTransform(ref: NamedReference) extends Transform {
+
+  def reference: NamedReference = ref
+
+  override def references: Array[NamedReference] = Array(ref)
+
+  override def arguments: Array[Expression] = Array(ref)
+
+  override def describe: String = name + "(" + reference.describe + ")"
+
+  override def toString: String = describe
+}
+
+private[sql] final case class BucketTransform(
+    numBuckets: Literal[Int],
+    columns: Seq[NamedReference]) extends Transform {
+
+  override val name: String = "bucket"
+
+  override def references: Array[NamedReference] = {
+    arguments
+        .filter(_.isInstanceOf[NamedReference])
+        .map(_.asInstanceOf[NamedReference])
+  }
+
+  override def arguments: Array[Expression] = numBuckets +: columns.toArray
+
+  override def describe: String = s"bucket(${arguments.map(_.describe).mkString(", ")})"
+
+  override def toString: String = describe
+}
+
+private[sql] final case class ApplyTransform(
+    name: String,
+    args: Seq[Expression]) extends Transform {
+
+  override def arguments: Array[Expression] = args.toArray
+
+  override def references: Array[NamedReference] = {
+    arguments
+        .filter(_.isInstanceOf[NamedReference])
+        .map(_.asInstanceOf[NamedReference])
+  }
+
+  override def describe: String = s"$name(${arguments.map(_.describe).mkString(", ")})"
+
+  override def toString: String = describe
+}
+
+private[sql] final case class IdentityTransform(
+    ref: NamedReference) extends SingleColumnTransform(ref) {
+  override val name: String = "identity"
+  override def describe: String = ref.describe
+}
+
+private[sql] final case class YearsTransform(
+    ref: NamedReference) extends SingleColumnTransform(ref) {
+  override val name: String = "years"
+}
+
+private[sql] final case class MonthsTransform(
+    ref: NamedReference) extends SingleColumnTransform(ref) {
+  override val name: String = "months"
+}
+
+private[sql] final case class DaysTransform(
+    ref: NamedReference) extends SingleColumnTransform(ref) {
+  override val name: String = "days"
+}
+
+private[sql] final case class HoursTransform(
+    ref: NamedReference) extends SingleColumnTransform(ref) {
+  override val name: String = "hours"
+}
+
+private[sql] final case class LiteralValue[T](value: T, dataType: DataType) extends Literal[T] {
+  override def describe: String = {
+    if (dataType.isInstanceOf[StringType]) {
+      s"'$value'"
+    } else {
+      s"$value"
+    }
+  }
+  override def toString: String = describe
+}
+
+private[sql] final case class FieldReference(parts: Seq[String]) extends NamedReference {
+  override def fieldNames: Array[String] = parts.toArray
+  override def describe: String = parts.map(quote).mkString(".")
+  override def toString: String = describe
+
+  private def quote(part: String): String = {
+    if (part.contains(".") || part.contains("`")) {
+      s"`${part.replace("`", "``")}`"
+    } else {
+      part
+    }
+  }
+}
+
+private[sql] object FieldReference {
+  def apply(column: String): NamedReference = {
+    LogicalExpressions.reference(column)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 111815bee498..6bb991c24175 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -28,6 +28,8 @@ import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalog.v2
+import org.apache.spark.sql.catalog.v2.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat}
@@ -2042,6 +2044,95 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     (visitTableIdentifier(ctx.tableIdentifier), temporary, ifNotExists, ctx.EXTERNAL != null)
   }
 
+  /**
+   * Parse a list of transforms.
+   */
+  override def visitTransformList(ctx: TransformListContext): Seq[Transform] = withOrigin(ctx) {
+    def getFieldReference(
+        ctx: ApplyTransformContext,
+        arg: v2.expressions.Expression): FieldReference = {
+      lazy val name: String = ctx.identifier.getText
+      arg match {
+        case ref: FieldReference =>
+          ref
+        case nonRef =>
+          throw new ParseException(
+            s"Expected a column reference for transform $name: ${nonRef.describe}", ctx)
+      }
+    }
+
+    def getSingleFieldReference(
+        ctx: ApplyTransformContext,
+        arguments: Seq[v2.expressions.Expression]): FieldReference = {
+      lazy val name: String = ctx.identifier.getText
+      if (arguments.size > 1) {
+        throw new ParseException(s"Too many arguments for transform $name", ctx)
+      } else if (arguments.isEmpty) {
+        throw new ParseException(s"Not enough arguments for transform $name", ctx)
+      } else {
+        getFieldReference(ctx, arguments.head)
+      }
+    }
+
+    ctx.transforms.asScala.map {
+      case identityCtx: IdentityTransformContext =>
+        IdentityTransform(FieldReference(
+          identityCtx.qualifiedName.identifier.asScala.map(_.getText)))
+
+      case applyCtx: ApplyTransformContext =>
+        val arguments = applyCtx.argument.asScala.map(visitTransformArgument)
+
+        applyCtx.identifier.getText match {
+          case "bucket" =>
+            val numBuckets: Int = arguments.head match {
+              case LiteralValue(shortValue, ShortType) =>
+                shortValue.asInstanceOf[Short].toInt
+              case LiteralValue(intValue, IntegerType) =>
+                intValue.asInstanceOf[Int]
+              case LiteralValue(longValue, LongType) =>
+                longValue.asInstanceOf[Long].toInt
+              case lit =>
+                throw new ParseException(s"Invalid number of buckets: ${lit.describe}", applyCtx)
+            }
+
+            val fields = arguments.tail.map(arg => getFieldReference(applyCtx, arg))
+
+            BucketTransform(LiteralValue(numBuckets, IntegerType), fields)
+
+          case "years" =>
+            YearsTransform(getSingleFieldReference(applyCtx, arguments))
+
+          case "months" =>
+            MonthsTransform(getSingleFieldReference(applyCtx, arguments))
+
+          case "days" =>
+            DaysTransform(getSingleFieldReference(applyCtx, arguments))
+
+          case "hours" =>
+            HoursTransform(getSingleFieldReference(applyCtx, arguments))
+
+          case name =>
+            ApplyTransform(name, arguments)
+        }
+    }
+  }
+
+  /**
+   * Parse an argument to a transform. An argument may be a field reference (qualified name) or
+   * a value literal.
+   */
+  override def visitTransformArgument(ctx: TransformArgumentContext): v2.expressions.Expression = {
+    withOrigin(ctx) {
+      val reference = Option(ctx.qualifiedName)
+          .map(nameCtx => FieldReference(nameCtx.identifier.asScala.map(_.getText)))
+      val literal = Option(ctx.constant)
+          .map(typedVisit[Literal])
+          .map(lit => LiteralValue(lit.value, lit.dataType))
+      reference.orElse(literal)
+          .getOrElse(throw new ParseException(s"Invalid transform argument", ctx))
+    }
+  }
+
   /**
    * Create a table, returning a [[CreateTableStatement]] logical plan.
    *
@@ -2054,7 +2145,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    *
    *   create_table_clauses (order insensitive):
    *     [OPTIONS table_property_list]
-   *     [PARTITIONED BY (col_name, col_name, ...)]
+   *     [PARTITIONED BY (col_name, transform(col_name), transform(constant, col_name), ...)]
    *     [CLUSTERED BY (col_name, col_name, ...)
    *       [SORTED BY (col_name [ASC|DESC], ...)]
    *       INTO num_buckets BUCKETS
@@ -2078,8 +2169,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
 
     val schema = Option(ctx.colTypeList()).map(createSchema)
-    val partitionCols: Seq[String] =
-      Option(ctx.partitionColumnNames).map(visitIdentifierList).getOrElse(Nil)
+    val partitioning: Seq[Transform] =
+      Option(ctx.partitioning).map(visitTransformList).getOrElse(Nil)
     val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
     val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
     val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
@@ -2099,7 +2190,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
 
       case Some(query) =>
         CreateTableAsSelectStatement(
-          table, query, partitionCols, bucketSpec, properties, provider, options, location, comment,
+          table, query, partitioning, bucketSpec, properties, provider, options, location, comment,
           ifNotExists = ifNotExists)
 
       case None if temp =>
@@ -2108,7 +2199,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
         operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
 
       case _ =>
-        CreateTableStatement(table, schema.getOrElse(new StructType), partitionCols, bucketSpec,
+        CreateTableStatement(table, schema.getOrElse(new StructType), partitioning, bucketSpec,
           properties, provider, options, location, comment, ifNotExists = ifNotExists)
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
index c734968e838d..ed1b3e3778c7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical.sql
 
+import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -31,7 +32,7 @@ import org.apache.spark.sql.types.StructType
 case class CreateTableStatement(
     table: TableIdentifier,
     tableSchema: StructType,
-    partitioning: Seq[String],
+    partitioning: Seq[Transform],
     bucketSpec: Option[BucketSpec],
     properties: Map[String, String],
     provider: String,
@@ -51,7 +52,7 @@ case class CreateTableStatement(
 case class CreateTableAsSelectStatement(
     table: TableIdentifier,
     asSelect: LogicalPlan,
-    partitioning: Seq[String],
+    partitioning: Seq[Transform],
     bucketSpec: Option[BucketSpec],
     properties: Map[String, String],
     provider: String,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index dae8f582c771..98388a74cd29 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.sql.catalyst.parser
 
+import org.apache.spark.sql.catalog.v2.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.plans.logical.sql.{CreateTableAsSelectStatement, CreateTableStatement}
-import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+import org.apache.spark.sql.types.{IntegerType, StringType, StructType, TimestampType}
+import org.apache.spark.unsafe.types.UTF8String
 
 class DDLParserSuite extends AnalysisTest {
   import CatalystSqlParser._
@@ -92,7 +94,7 @@ class DDLParserSuite extends AnalysisTest {
         assert(create.tableSchema == new StructType()
             .add("a", IntegerType, nullable = true, "test")
             .add("b", StringType))
-        assert(create.partitioning == Seq("a"))
+        assert(create.partitioning == Seq(IdentityTransform(FieldReference("a"))))
         assert(create.bucketSpec.isEmpty)
         assert(create.properties.isEmpty)
         assert(create.provider == "parquet")
@@ -107,6 +109,52 @@ class DDLParserSuite extends AnalysisTest {
     }
   }
 
+  test("create table - partitioned by transforms") {
+    val sql =
+      """
+        |CREATE TABLE my_tab (a INT, b STRING, ts TIMESTAMP) USING parquet
+        |PARTITIONED BY (
+        |    a,
+        |    bucket(16, b),
+        |    years(ts),
+        |    months(ts),
+        |    days(ts),
+        |    hours(ts),
+        |    foo(a, "bar", 34))
+      """.stripMargin
+
+    parsePlan(sql) match {
+      case create: CreateTableStatement =>
+        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableSchema == new StructType()
+            .add("a", IntegerType)
+            .add("b", StringType)
+            .add("ts", TimestampType))
+        assert(create.partitioning == Seq(
+            IdentityTransform(FieldReference("a")),
+            BucketTransform(LiteralValue(16, IntegerType), Seq(FieldReference("b"))),
+            YearsTransform(FieldReference("ts")),
+            MonthsTransform(FieldReference("ts")),
+            DaysTransform(FieldReference("ts")),
+            HoursTransform(FieldReference("ts")),
+            ApplyTransform("foo", Seq(
+                FieldReference("a"),
+                LiteralValue(UTF8String.fromString("bar"), StringType),
+                LiteralValue(34, IntegerType)))))
+        assert(create.bucketSpec.isEmpty)
+        assert(create.properties.isEmpty)
+        assert(create.provider == "parquet")
+        assert(create.options.isEmpty)
+        assert(create.location.isEmpty)
+        assert(create.comment.isEmpty)
+        assert(!create.ifNotExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableStatement].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
   test("create table - with bucket") {
     val query = "CREATE TABLE my_tab(a INT, b STRING) USING parquet " +
         "CLUSTERED BY (a) SORTED BY (b) INTO 5 BUCKETS"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
index 9fd44ea4e637..f503ff03b971 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources
 import java.util.Locale
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.CastSupport
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType, CatalogUtils}
@@ -31,6 +32,8 @@ import org.apache.spark.sql.sources.v2.TableProvider
 import org.apache.spark.sql.types.StructType
 
 case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport  {
+  import org.apache.spark.sql.catalog.v2.expressions.LogicalExpressions.TransformHelper
+
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case CreateTableStatement(
         table, schema, partitionCols, bucketSpec, properties, V1WriteProvider(provider), options,
@@ -75,7 +78,7 @@ case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with Ca
   private def buildCatalogTable(
       table: TableIdentifier,
       schema: StructType,
-      partitionColumnNames: Seq[String],
+      partitioning: Seq[Transform],
       bucketSpec: Option[BucketSpec],
       properties: Map[String, String],
       provider: String,
@@ -104,7 +107,7 @@ case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with Ca
       storage = storage.copy(locationUri = customLocation),
       schema = schema,
       provider = Some(provider),
-      partitionColumnNames = partitionColumnNames,
+      partitionColumnNames = partitioning.asPartitionColumns,
       bucketSpec = bucketSpec,
       properties = properties,
       comment = comment)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 89c5df0900b6..7fae54bb95ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -65,6 +65,26 @@ class PlanResolutionSuite extends AnalysisTest {
     }
   }
 
+  test("create table - partitioned by transforms") {
+    val transforms = Seq(
+        "bucket(16, b)", "years(ts)", "months(ts)", "days(ts)", "hours(ts)", "foo(a, 'bar', 34)",
+        "bucket(32, b), days(ts)")
+    transforms.foreach { transform =>
+      val query =
+        s"""
+           |CREATE TABLE my_tab(a INT, b STRING) USING parquet
+           |PARTITIONED BY ($transform)
+           """.stripMargin
+
+      val ae = intercept[AnalysisException] {
+        parseAndResolve(query)
+      }
+
+      assert(ae.message
+          .contains(s"Transforms cannot be converted to partition columns: $transform"))
+    }
+  }
+
   test("create table - with bucket") {
     val query = "CREATE TABLE my_tab(a INT, b STRING) USING parquet " +
         "CLUSTERED BY (a) SORTED BY (b) INTO 5 BUCKETS"

From 2e90574dd0e60ea960a33580dfb29654671b66f4 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 10 Apr 2019 16:39:28 +0800
Subject: [PATCH 0881/1072] [SPARK-27414][SQL] make it clear that date type is
 timezone independent

## What changes were proposed in this pull request?

In SQL standard, date type is a union of the `year`, `month` and `day` fields. It's timezone independent, which means it does not represent a specific point in the timeline.

Spark SQL follows the SQL standard, this PR is to make it clear that date type is timezone independent
1. improve the doc to highlight that date is timezone independent.
2. when converting string to date,  uses the java time API that can directly parse a `LocalDate` from a string, instead of converting `LocalDate` to a `Instant` at UTC first.
3. when converting date to string, uses the java time API that can directly format a `LocalDate` to a string, instead of converting `LocalDate` to a `Instant` at UTC first.

2 and 3 should not introduce any behavior changes.

## How was this patch tested?

existing tests

Closes #24325 from cloud-fan/doc.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md                  |  2 +-
 docs/sql-reference.md                                |  6 ++++--
 .../spark/sql/catalyst/util/DateFormatter.scala      | 12 ++++--------
 .../spark/sql/catalyst/util/DateTimeUtils.scala      |  7 ++++---
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index c3837f60dc4e..741c51020aa3 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -122,7 +122,7 @@ license: |
 
   - In Spark version 2.4 and earlier, the `current_date` function returns the current date shifted according to the SQL config `spark.sql.session.timeZone`. Since Spark 3.0, the function always returns the current date in the `UTC` time zone.
 
-  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`, and `DATE` literals are formatted using the UTC time zone. In Spark version 2.4 and earlier, both conversions use the default time zone of the Java virtual machine.
+  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion uses the default time zone of the Java virtual machine.
 
   - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`.
 
diff --git a/docs/sql-reference.md b/docs/sql-reference.md
index ee99ed8f4399..2ec26ecc2e1e 100644
--- a/docs/sql-reference.md
+++ b/docs/sql-reference.md
@@ -46,8 +46,10 @@ Spark SQL and DataFrames support the following data types:
   - `BooleanType`: Represents boolean values.
 * Datetime type
   - `TimestampType`: Represents values comprising values of fields year, month, day,
-  hour, minute, and second.
-  - `DateType`: Represents values comprising values of fields year, month, day.
+  hour, minute, and second, with the session local time-zone. The timestamp value represents an
+  absolute point in time.
+  - `DateType`: Represents values comprising values of fields year, month and day, without a
+  time-zone.
 * Complex types
   - `ArrayType(elementType, containsNull)`: Represents values comprising a sequence of
   elements with the type of `elementType`. `containsNull` is used to indicate if
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 20e043a0826c..984329797c46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.{Instant, ZoneOffset}
+import java.time.LocalDate
 import java.util.Locale
-import java.util.concurrent.TimeUnit.SECONDS
 
 sealed trait DateFormatter extends Serializable {
   def parse(s: String): Int // returns days since epoch
@@ -34,15 +33,12 @@ class Iso8601DateFormatter(
   private lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   override def parse(s: String): Int = {
-    val parsed = formatter.parse(s)
-    val zonedDateTime = toZonedDateTime(parsed, ZoneOffset.UTC)
-    val seconds = zonedDateTime.toEpochSecond
-    SECONDS.toDays(seconds).toInt
+    val localDate = LocalDate.parse(s, formatter)
+    DateTimeUtils.localDateToDays(localDate)
   }
 
   override def format(days: Int): String = {
-    val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
-    formatter.withZone(ZoneOffset.UTC).format(instant)
+    LocalDate.ofEpochDay(days).format(formatter)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 7687afae38e4..7f3bb8307665 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -344,7 +344,9 @@ object DateTimeUtils {
     days.toInt
   }
 
-  def localDateToDays(localDate: LocalDate): Int = localDate.toEpochDay.toInt
+  def localDateToDays(localDate: LocalDate): Int = {
+    Math.toIntExact(localDate.toEpochDay)
+  }
 
   def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days)
 
@@ -396,8 +398,7 @@ object DateTimeUtils {
     segments(i) = currentSegmentValue
     try {
       val localDate = LocalDate.of(segments(0), segments(1), segments(2))
-      val instant = localDate.atStartOfDay(ZoneOffset.UTC).toInstant
-      Some(instantToDays(instant))
+      Some(localDateToDays(localDate))
     } catch {
       case NonFatal(_) => None
     }

From 85e5d4f141eedd571dfa0dcdabedced19736a351 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=94=B0=E7=94=B000222924?=
 <han.tiantian@zte.com.cn>
Date: Wed, 10 Apr 2019 16:58:01 +0800
Subject: [PATCH 0882/1072] [SPARK-24872] Replace taking the $symbol with
 $sqlOperator in BinaryOperator's toString method
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

For BinaryOperator's toString method, it's better to use `$sqlOperator` instead of `$symbol`.

## How was this patch tested?

We can test this patch  with unit tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #21826 from httfighter/SPARK-24872.

Authored-by: 韩田田00222924 <han.tiantian@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Expression.scala       | 2 +-
 .../spark/sql/catalyst/expressions/PredicateSuite.scala   | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index d5d119543da7..2cd84b54661c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -627,7 +627,7 @@ abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes {
 
   def sqlOperator: String = symbol
 
-  override def toString: String = s"($left $symbol $right)"
+  override def toString: String = s"($left $sqlOperator $right)"
 
   override def inputTypes: Seq[AbstractDataType] = Seq(inputType, inputType)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index e38bdeb6153d..9b6896f65abf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -513,4 +514,11 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     interpreted.initialize(0)
     assert(interpreted.eval(new UnsafeRow()))
   }
+
+  test("SPARK-24872: Replace taking the $symbol with $sqlOperator in BinaryOperator's" +
+    " toString method") {
+    val expression = CatalystSqlParser.parseExpression("id=1 or id=2").toString()
+    val expected = "(('id = 1) OR ('id = 2))"
+    assert(expression == expected)
+  }
 }

From 5ea4deec447fb413fd90b8c1a8d983d6bee89d91 Mon Sep 17 00:00:00 2001
From: 10129659 <chen.yanshan@zte.com.cn>
Date: Wed, 10 Apr 2019 19:54:19 +0800
Subject: [PATCH 0883/1072] [SPARK-26012][SQL] Null and '' values should not
 cause dynamic partition failure of string types

Dynamic partition will fail when both '' and null values are taken as dynamic partition values simultaneously.
For example, the test bellow will fail before this PR:

test("Null and '' values should not cause dynamic partition failure of string types") {
withTable("t1", "t2") {
spark.range(3).write.saveAsTable("t1")
spark.sql("select id, cast(case when id = 1 then '' else null end as string) as p" +
" from t1").write.partitionBy("p").saveAsTable("t2")
checkAnswer(spark.table("t2").sort("id"), Seq(Row(0, null), Row(1, null), Row(2, null)))
}
}

The error is: 'org.apache.hadoop.fs.FileAlreadyExistsException: File already exists'.
This PR convert the empty strings to null for partition values.
This is another way for PR(https://github.com/apache/spark/pull/23010)

(Please fill in changes proposed in this fix)

How was this patch tested?
New added test.

Closes #24334 from eatoncys/FileFormatWriter.

Authored-by: 10129659 <chen.yanshan@zte.com.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/FileFormatWriter.scala        | 36 ++++++++++++++++---
 .../datasources/FileFormatWriterSuite.scala   | 19 +++++++++-
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index a9de64908c08..f1fc5d762ad5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -35,9 +35,12 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
-import org.apache.spark.sql.execution.{SortExec, SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan, SQLExecution}
+import org.apache.spark.sql.types.StringType
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 
@@ -49,6 +52,22 @@ object FileFormatWriter extends Logging {
       customPartitionLocations: Map[TablePartitionSpec, String],
       outputColumns: Seq[Attribute])
 
+  /** A function that converts the empty string to null for partition values. */
+  case class Empty2Null(child: Expression) extends UnaryExpression with String2StringExpression {
+    override def convert(v: UTF8String): UTF8String = if (v.numBytes() == 0) null else v
+    override def nullable: Boolean = true
+    override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+      nullSafeCodeGen(ctx, ev, c => {
+        s"""if ($c.numBytes() == 0) {
+           |  ${ev.isNull} = true;
+           |  ${ev.value} = null;
+           |} else {
+           |  ${ev.value} = $c;
+           |}""".stripMargin
+      })
+    }
+  }
+
   /**
    * Basic work flow of this command is:
    * 1. Driver side setup, including output committer initialization and data source specific
@@ -84,6 +103,15 @@ object FileFormatWriter extends Logging {
     val partitionSet = AttributeSet(partitionColumns)
     val dataColumns = outputSpec.outputColumns.filterNot(partitionSet.contains)
 
+    var needConvert = false
+    val projectList: Seq[NamedExpression] = plan.output.map {
+      case p if partitionSet.contains(p) && p.dataType == StringType && p.nullable =>
+        needConvert = true
+        Alias(Empty2Null(p), p.name)()
+      case attr => attr
+    }
+    val empty2NullPlan = if (needConvert) ProjectExec(projectList, plan) else plan
+
     val bucketIdExpression = bucketSpec.map { spec =>
       val bucketColumns = spec.bucketColumnNames.map(c => dataColumns.find(_.name == c).get)
       // Use `HashPartitioning.partitionIdExpression` as our bucket id expression, so that we can
@@ -123,7 +151,7 @@ object FileFormatWriter extends Logging {
     // We should first sort by partition columns, then bucket id, and finally sorting columns.
     val requiredOrdering = partitionColumns ++ bucketIdExpression ++ sortColumns
     // the sort order doesn't matter
-    val actualOrdering = plan.outputOrdering.map(_.child)
+    val actualOrdering = empty2NullPlan.outputOrdering.map(_.child)
     val orderingMatched = if (requiredOrdering.length > actualOrdering.length) {
       false
     } else {
@@ -141,7 +169,7 @@ object FileFormatWriter extends Logging {
 
     try {
       val rdd = if (orderingMatched) {
-        plan.execute()
+        empty2NullPlan.execute()
       } else {
         // SPARK-21165: the `requiredOrdering` is based on the attributes from analyzed plan, and
         // the physical plan may have different attribute ids due to optimizer removing some
@@ -151,7 +179,7 @@ object FileFormatWriter extends Logging {
         SortExec(
           orderingExpr,
           global = false,
-          child = plan).execute()
+          child = empty2NullPlan).execute()
       }
 
       // SPARK-23271 If we are attempting to write a zero partition rdd, create a dummy single
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
index 13f0e0bca86c..e09ec0d7bbb4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
@@ -18,9 +18,14 @@
 package org.apache.spark.sql.execution.datasources
 
 import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.CodegenInterpretedPlanTest
 import org.apache.spark.sql.test.SharedSQLContext
 
-class FileFormatWriterSuite extends QueryTest with SharedSQLContext {
+class FileFormatWriterSuite
+  extends QueryTest
+  with SharedSQLContext
+  with CodegenInterpretedPlanTest{
+
   import testImplicits._
 
   test("empty file should be skipped while write to file") {
@@ -44,4 +49,16 @@ class FileFormatWriterSuite extends QueryTest with SharedSQLContext {
       checkAnswer(spark.table("t4"), Row(0, 0))
     }
   }
+
+  test("Null and '' values should not cause dynamic partition failure of string types") {
+    withTable("t1", "t2") {
+      Seq((0, None), (1, Some("")), (2, None)).toDF("id", "p")
+        .write.partitionBy("p").saveAsTable("t1")
+      checkAnswer(spark.table("t1").sort("id"), Seq(Row(0, null), Row(1, null), Row(2, null)))
+
+      sql("create table t2(id long, p string) using parquet partitioned by (p)")
+      sql("insert overwrite table t2 partition(p) select id, p from t1")
+      checkAnswer(spark.table("t2").sort("id"), Seq(Row(0, null), Row(1, null), Row(2, null)))
+    }
+  }
 }

From 1470f23ec93f13d6c847832c06d40f8fc9803129 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Wed, 10 Apr 2019 21:54:50 +0800
Subject: [PATCH 0884/1072] [SPARK-27422][SQL] current_date() should return
 current date in the session time zone

## What changes were proposed in this pull request?

In the PR, I propose to revert 2 commits https://github.com/apache/spark/commit/06abd06112965cd73417ccceacdbd94b6b3d2793 and https://github.com/apache/spark/commit/61561c1c2d4e47191fdfe9bf3539a3db29e89fa9, and take current date via `LocalDate.now` in the session time zone. The result is stored as days since epoch `1970-01-01`.

## How was this patch tested?

It was tested by `DateExpressionsSuite`, `DateFunctionsSuite`, `DateTimeUtilsSuite`, and `ComputeCurrentTimeSuite`.

Closes #24330 from MaxGekk/current-date2.

Lead-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |  2 --
 .../expressions/datetimeExpressions.scala     | 18 +++++++------
 .../catalyst/optimizer/finishAnalysis.scala   | 25 +++++++++----------
 .../expressions/DateExpressionsSuite.scala    |  6 ++---
 .../optimizer/ComputeCurrentTimeSuite.scala   |  7 +++---
 .../streaming/MicroBatchExecution.scala       |  2 +-
 .../org/apache/spark/sql/functions.scala      |  2 +-
 .../spark/sql/DataFrameAggregateSuite.scala   |  2 +-
 .../apache/spark/sql/DateFunctionsSuite.scala | 18 ++++++-------
 9 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 741c51020aa3..b1935224150d 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -120,8 +120,6 @@ license: |
 
   - In Spark version 2.4 and earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
 
-  - In Spark version 2.4 and earlier, the `current_date` function returns the current date shifted according to the SQL config `spark.sql.session.timeZone`. Since Spark 3.0, the function always returns the current date in the `UTC` time zone.
-
   - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion uses the default time zone of the Java virtual machine.
 
   - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 5fb0b852b882..aad9f20e022b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
-import java.time.{Instant, LocalDate, ZoneId, ZoneOffset}
+import java.time.{Instant, LocalDate, ZoneId}
 import java.time.temporal.IsoFields
 import java.util.{Locale, TimeZone}
 
@@ -54,26 +54,30 @@ trait TimeZoneAwareExpression extends Expression {
   @transient lazy val zoneId: ZoneId = DateTimeUtils.getZoneId(timeZoneId.get)
 }
 
-// scalastyle:off line.size.limit
 /**
- * Returns the current date in the UTC time zone at the start of query evaluation.
+ * Returns the current date at the start of query evaluation.
  * All calls of current_date within the same query return the same value.
  *
  * There is no code generation since this expression should get constant folded by the optimizer.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the current date in the UTC time zone at the start of query evaluation.",
+  usage = "_FUNC_() - Returns the current date at the start of query evaluation.",
   since = "1.5.0")
-// scalastyle:on line.size.limit
-case class CurrentDate() extends LeafExpression with CodegenFallback {
+case class CurrentDate(timeZoneId: Option[String] = None)
+  extends LeafExpression with TimeZoneAwareExpression with CodegenFallback {
+
+  def this() = this(None)
 
   override def foldable: Boolean = true
   override def nullable: Boolean = false
 
   override def dataType: DataType = DateType
 
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
   override def eval(input: InternalRow): Any = {
-    LocalDate.now(ZoneOffset.UTC).toEpochDay.toInt
+    localDateToDays(LocalDate.now(zoneId))
   }
 
   override def prettyName: String = "current_date"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index d0bf4eaf8c49..c213a21ebaa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import java.util.concurrent.TimeUnit._
+import java.time.LocalDate
 
 import scala.collection.mutable
 
@@ -58,20 +58,19 @@ object ReplaceExpressions extends Rule[LogicalPlan] {
  */
 object ComputeCurrentTime extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    val currentDate = {
-      val dateExpr = CurrentDate()
-      val date = dateExpr.eval(EmptyRow).asInstanceOf[Int]
-      Literal.create(date, dateExpr.dataType)
-    }
-    val currentTimestamp = {
-      val timeExpr = CurrentTimestamp()
-      val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long]
-      Literal.create(timestamp, timeExpr.dataType)
-    }
+    val currentDates = mutable.Map.empty[String, Literal]
+    val timeExpr = CurrentTimestamp()
+    val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long]
+    val currentTime = Literal.create(timestamp, timeExpr.dataType)
 
     plan transformAllExpressions {
-      case CurrentDate() => currentDate
-      case CurrentTimestamp() => currentTimestamp
+      case CurrentDate(Some(timeZoneId)) =>
+        currentDates.getOrElseUpdate(timeZoneId, {
+          Literal.create(
+            LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)),
+            DateType)
+        })
+      case CurrentTimestamp() => currentTime
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index bc2c575cb023..64bf89926b47 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -56,12 +56,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("datetime function current_date") {
     val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis(), TimeZoneGMT)
-    val cd = CurrentDate().eval(EmptyRow).asInstanceOf[Int]
+    val cd = CurrentDate(gmtId).eval(EmptyRow).asInstanceOf[Int]
     val d1 = DateTimeUtils.millisToDays(System.currentTimeMillis(), TimeZoneGMT)
     assert(d0 <= cd && cd <= d1 && d1 - d0 <= 1)
 
-    val cdjst = CurrentDate().eval(EmptyRow).asInstanceOf[Int]
-    val cdpst = CurrentDate().eval(EmptyRow).asInstanceOf[Int]
+    val cdjst = CurrentDate(jstId).eval(EmptyRow).asInstanceOf[Int]
+    val cdpst = CurrentDate(pstId).eval(EmptyRow).asInstanceOf[Int]
     assert(cdpst <= cd && cd <= cdjst)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
index c36b8dab604c..10ed4e46ddd1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
@@ -17,13 +17,12 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import java.util.concurrent.TimeUnit.MILLISECONDS
-
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 
 class ComputeCurrentTimeSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
@@ -52,9 +51,9 @@ class ComputeCurrentTimeSuite extends PlanTest {
   test("analyzer should replace current_date with literals") {
     val in = Project(Seq(Alias(CurrentDate(), "a")(), Alias(CurrentDate(), "b")()), LocalRelation())
 
-    val min = MILLISECONDS.toDays(System.currentTimeMillis())
+    val min = DateTimeUtils.millisToDays(System.currentTimeMillis())
     val plan = Optimize.execute(in.analyze).asInstanceOf[Project]
-    val max = MILLISECONDS.toDays(System.currentTimeMillis())
+    val max = DateTimeUtils.millisToDays(System.currentTimeMillis())
 
     val lits = new scala.collection.mutable.ArrayBuffer[Int]
     plan.transformAllExpressions { case e: Literal =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 59a4afbfe093..fdd80ccaf052 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -514,7 +514,7 @@ class MicroBatchExecution(
           ct.dataType, Some("Dummy TimeZoneId"))
       case cd: CurrentDate =>
         CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
-          cd.dataType, Some("UTC"))
+          cd.dataType, cd.timeZoneId)
     }
 
     val triggerLogicalPlan = sink match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index d0be216e6a2f..bcb57836021e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2572,7 +2572,7 @@ object functions {
   }
 
   /**
-   * Returns the current date in the UTC time zone as a date column.
+   * Returns the current date as a date column.
    *
    * @group datetime_funcs
    * @since 1.5.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index d7cd15febb25..73259a0ed3b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -720,7 +720,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
     assert(testData.groupBy(col("key")).toString.contains(
       "[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
     assert(testData.groupBy(current_date()).toString.contains(
-      "grouping expressions: [current_date()], value: [key: int, value: string], " +
+      "grouping expressions: [current_date(None)], value: [key: int, value: string], " +
         "type: GroupBy]"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 5ad1cb32d003..29cef693ea83 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -19,11 +19,10 @@ package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.LocalDate
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
@@ -33,14 +32,13 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
   test("function current_date") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
-      val df1 = Seq((1, 2), (3, 1)).toDF("a", "b")
-      val d0 = System.currentTimeMillis() / MILLIS_PER_DAY
-      val d1 = localDateToDays(df1.select(current_date()).collect().head.getAs[LocalDate](0))
-      val d2 = localDateToDays(sql("""SELECT CURRENT_DATE()""").collect().head.getAs[LocalDate](0))
-      val d3 = System.currentTimeMillis() / MILLIS_PER_DAY
-      assert(d0 <= d1 && d1 <= d2 && d2 <= d3 && d3 - d0 <= 1)
-    }
+    val df1 = Seq((1, 2), (3, 1)).toDF("a", "b")
+    val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis())
+    val d1 = DateTimeUtils.fromJavaDate(df1.select(current_date()).collect().head.getDate(0))
+    val d2 = DateTimeUtils.fromJavaDate(
+      sql("""SELECT CURRENT_DATE()""").collect().head.getDate(0))
+    val d3 = DateTimeUtils.millisToDays(System.currentTimeMillis())
+    assert(d0 <= d1 && d1 <= d2 && d2 <= d3 && d3 - d0 <= 1)
   }
 
   test("function current_timestamp and now") {

From ab8710b57916a129fcb89464209361120d224535 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Wed, 10 Apr 2019 22:41:19 +0800
Subject: [PATCH 0885/1072] [SPARK-27423][SQL] Cast DATE <-> TIMESTAMP
 according to the SQL standard

## What changes were proposed in this pull request?

According to SQL standard, value of `DATE` type is union of year, month, dayInMonth, and it is independent from any time zones. To convert it to Catalyst's `TIMESTAMP`, `DATE` value should be "extended" by the time at midnight - `00:00:00`. The resulted local date+time should be considered as a timestamp in the session time zone, and casted to microseconds since epoch in `UTC` accordingly.

The reverse casting from `TIMESTAMP` to `DATE` should be performed in the similar way. `TIMESTAMP` values should be represented as a local date+time in the session time zone. And the time component should be just removed. For example, `TIMESTAMP 2019-04-10 00:10:12` -> `DATE 2019-04-10`. The resulted date is converted to days since epoch `1970-01-01`.

## How was this patch tested?

The changes were tested by existing test suites - `DateFunctionsSuite`, `DateExpressionsSuite` and `CastSuite`.

Closes #24332 from MaxGekk/cast-timestamp-to-date2.

Lead-authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala | 19 +++++++++++--------
 .../sql/catalyst/util/DateTimeUtils.scala     | 11 +++++++++++
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 848195fe1818..f7bc8b9ce40f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.math.{BigDecimal => JavaBigDecimal}
+import java.time.{LocalDate, LocalDateTime, LocalTime}
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkException
@@ -381,7 +382,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case ByteType =>
       buildCast[Byte](_, b => longToTimestamp(b.toLong))
     case DateType =>
-      buildCast[Int](_, d => MILLISECONDS.toMicros(DateTimeUtils.daysToMillis(d, timeZone)))
+      buildCast[Int](_, d => epochDaysToMicros(d, zoneId))
     // TimestampWritable.decimalToTimestamp
     case DecimalType() =>
       buildCast[Decimal](_, d => decimalToTimestamp(d))
@@ -418,7 +419,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case TimestampType =>
       // throw valid precision more than seconds, according to Hive.
       // Timestamp.nanos is in 0 to 999,999,999, no more than a second.
-      buildCast[Long](_, t => DateTimeUtils.millisToDays(MICROSECONDS.toMillis(t), timeZone))
+      buildCast[Long](_, t => microsToEpochDays(t, zoneId))
   }
 
   // IntervalConverter
@@ -935,11 +936,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         }
        """
     case TimestampType =>
-      val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
+      val zid = JavaCode.global(
+        ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
+        zoneId.getClass)
       (c, evPrim, evNull) =>
         code"""$evPrim =
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToDays(
-            $c / $MICROS_PER_MILLIS, $tz);"""
+          org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
     case _ =>
       (c, evPrim, evNull) => code"$evNull = true;"
   }
@@ -1043,11 +1045,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case _: IntegralType =>
       (c, evPrim, evNull) => code"$evPrim = ${longToTimeStampCode(c)};"
     case DateType =>
-      val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
+      val zid = JavaCode.global(
+        ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
+        zoneId.getClass)
       (c, evPrim, evNull) =>
         code"""$evPrim =
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMillis(
-            $c, $tz) * $MICROS_PER_MILLIS;"""
+          org.apache.spark.sql.catalyst.util.DateTimeUtils.epochDaysToMicros($c, $zid);"""
     case DecimalType() =>
       (c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};"
     case DoubleType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 7f3bb8307665..50fa6fb87e74 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -170,6 +170,17 @@ object DateTimeUtils {
     MILLISECONDS.toMicros(millis)
   }
 
+  def microsToEpochDays(epochMicros: SQLTimestamp, zoneId: ZoneId): SQLDate = {
+    localDateToDays(microsToInstant(epochMicros).atZone(zoneId).toLocalDate)
+  }
+
+  def epochDaysToMicros(epochDays: SQLDate, zoneId: ZoneId): SQLTimestamp = {
+    val localDate = LocalDate.ofEpochDay(epochDays)
+    val zeroLocalTime = LocalTime.MIDNIGHT
+    val localDateTime = LocalDateTime.of(localDate, zeroLocalTime)
+    instantToMicros(localDateTime.atZone(zoneId).toInstant)
+  }
+
   /**
    * Trim and parse a given UTF8 date string to the corresponding a corresponding [[Long]] value.
    * The return type is [[Option]] in order to distinguish between 0L and null. The following

From 181d190c606ec6cbd09f6d618347b60eaa4ea828 Mon Sep 17 00:00:00 2001
From: ocaballero <oliver.caballero.alvarez@gmail.com>
Date: Thu, 11 Apr 2019 10:00:09 +0900
Subject: [PATCH 0886/1072] [MINOR][SQL] Unnecessary access to externalCatalog

Necessarily access the external catalog without having to do it

## What changes were proposed in this pull request?

The existsFunction function has been changed because it unnecessarily accessed the externalCatalog to find if the database exists in cases where the function is in the functionRegistry

## How was this patch tested?

It has been tested through spark-shell and accessing the metastore logs of hive.

Inside spark-shell we use spark.table (% tableA%). SelectExpr ("trim (% columnA%)") in the current version and it appears every time:

org.apache.hadoop.hive.metastore.HiveMetaStore.audit: cmd = get_database: default

Once the change is made, no record appears

Closes #24312 from OCaballero/master.

Authored-by: ocaballero <oliver.caballero.alvarez@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 4b862a579cc3..c05f777770f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1105,10 +1105,11 @@ class SessionCatalog(
    * Check if the function with the specified name exists
    */
   def functionExists(name: FunctionIdentifier): Boolean = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
-    requireDbExists(db)
-    functionRegistry.functionExists(name) ||
+    functionRegistry.functionExists(name) || {
+      val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
+      requireDbExists(db)
       externalCatalog.functionExists(db, name.funcName)
+    }
   }
 
   // ----------------------------------------------------------------

From 074533334d01afdd7862a1ac6c5a7a672bcce3f8 Mon Sep 17 00:00:00 2001
From: chakravarthiT <45845595+chakravarthiT@users.noreply.github.com>
Date: Thu, 11 Apr 2019 10:02:27 +0900
Subject: [PATCH 0887/1072] [SPARK-27088][SQL] Add a configuration to set log
 level for each batch at RuleExecutor

## What changes were proposed in this pull request?

Similar to #22406 , which has made log level for plan changes by each rule configurable ,this PR is to make log level for plan changes by each batch configurable,and I have reused the same configuration: "spark.sql.optimizer.planChangeLog.level".

Config proposed in this PR ,
spark.sql.optimizer.planChangeLog.batches - enable plan change logging only for a set of specified batches, separated by commas.

## How was this patch tested?

Added UT , also tested manually and attached screenshots below.

1)Setting spark.sql.optimizer.planChangeLog.leve to warn.

![settingLogLevelToWarn](https://user-images.githubusercontent.com/45845595/54556730-8803dd00-49df-11e9-95ab-ebb0c8d735ef.png)

2)setting spark.sql.optimizer.planChangeLog.batches to Resolution and Subquery.
![settingBatchestoLog](https://user-images.githubusercontent.com/45845595/54556740-8cc89100-49df-11e9-80ab-fbbbe1ff2cdf.png)

3)  plan change logging enabled only for a set of specified batches(Resolution and Subquery)
![batchloggingOp](https://user-images.githubusercontent.com/45845595/54556788-ab2e8c80-49df-11e9-9ae0-57815f552896.png)

Closes #24136 from chakravarthiT/logBatches.

Lead-authored-by: chakravarthiT <45845595+chakravarthiT@users.noreply.github.com>
Co-authored-by: chakravarthiT <tcchakra@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/rules/RuleExecutor.scala     | 55 ++++++++++++-------
 .../apache/spark/sql/internal/SQLConf.scala   | 18 ++++--
 .../optimizer/OptimizerLoggingSuite.scala     | 45 ++++++++++++---
 3 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 088f1fecb645..3e8a6e0b0b75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -113,7 +113,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
             if (effective) {
               queryExecutionMetrics.incNumEffectiveExecution(rule.ruleName)
               queryExecutionMetrics.incTimeEffectiveExecutionBy(rule.ruleName, runTime)
-              planChangeLogger.log(rule.ruleName, plan, result)
+              planChangeLogger.logRule(rule.ruleName, plan, result)
             }
             queryExecutionMetrics.incExecutionTimeBy(rule.ruleName, runTime)
             queryExecutionMetrics.incNumExecution(rule.ruleName)
@@ -152,15 +152,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
         lastPlan = curPlan
       }
 
-      if (!batchStartPlan.fastEquals(curPlan)) {
-        logDebug(
-          s"""
-            |=== Result of Batch ${batch.name} ===
-            |${sideBySide(batchStartPlan.treeString, curPlan.treeString).mkString("\n")}
-          """.stripMargin)
-      } else {
-        logTrace(s"Batch ${batch.name} has no effect.")
-      }
+      planChangeLogger.logBatch(batch.name, batchStartPlan, curPlan)
     }
 
     curPlan
@@ -172,21 +164,46 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
 
     private val logRules = SQLConf.get.optimizerPlanChangeRules.map(Utils.stringToSeq)
 
-    def log(ruleName: String, oldPlan: TreeType, newPlan: TreeType): Unit = {
+    private val logBatches = SQLConf.get.optimizerPlanChangeBatches.map(Utils.stringToSeq)
+
+    def logRule(ruleName: String, oldPlan: TreeType, newPlan: TreeType): Unit = {
       if (logRules.isEmpty || logRules.get.contains(ruleName)) {
-        lazy val message =
+        def message(): String = {
           s"""
              |=== Applying Rule ${ruleName} ===
              |${sideBySide(oldPlan.treeString, newPlan.treeString).mkString("\n")}
            """.stripMargin
-        logLevel match {
-          case "TRACE" => logTrace(message)
-          case "DEBUG" => logDebug(message)
-          case "INFO" => logInfo(message)
-          case "WARN" => logWarning(message)
-          case "ERROR" => logError(message)
-          case _ => logTrace(message)
         }
+
+        logBasedOnLevel(message)
+      }
+    }
+
+    def logBatch(batchName: String, oldPlan: TreeType, newPlan: TreeType): Unit = {
+      if (logBatches.isEmpty || logBatches.get.contains(batchName)) {
+        def message(): String = {
+          if (!oldPlan.fastEquals(newPlan)) {
+            s"""
+               |=== Result of Batch ${batchName} ===
+               |${sideBySide(oldPlan.treeString, newPlan.treeString).mkString("\n")}
+          """.stripMargin
+          } else {
+            s"Batch ${batchName} has no effect."
+          }
+        }
+
+        logBasedOnLevel(message)
+      }
+    }
+
+    private def logBasedOnLevel(f: => String): Unit = {
+      logLevel match {
+        case "TRACE" => logTrace(f)
+        case "DEBUG" => logDebug(f)
+        case "INFO" => logInfo(f)
+        case "WARN" => logWarning(f)
+        case "ERROR" => logError(f)
+        case _ => logTrace(f)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 157be1b0f594..f33cc86a18a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -184,8 +184,8 @@ object SQLConf {
   val OPTIMIZER_PLAN_CHANGE_LOG_LEVEL = buildConf("spark.sql.optimizer.planChangeLog.level")
     .internal()
     .doc("Configures the log level for logging the change from the original plan to the new " +
-      "plan after a rule is applied. The value can be 'trace', 'debug', 'info', 'warn', or " +
-      "'error'. The default log level is 'trace'.")
+      "plan after a rule or batch is applied. The value can be 'trace', 'debug', 'info', " +
+      "'warn', or 'error'. The default log level is 'trace'.")
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .checkValue(logLevel => Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR").contains(logLevel),
@@ -195,9 +195,15 @@ object SQLConf {
 
   val OPTIMIZER_PLAN_CHANGE_LOG_RULES = buildConf("spark.sql.optimizer.planChangeLog.rules")
     .internal()
-    .doc("If this configuration is set, the optimizer will only log plan changes caused by " +
-      "applying the rules specified in this configuration. The value can be a list of rule " +
-      "names separated by comma.")
+    .doc("Configures a list of rules to be logged in the optimizer, in which the rules are " +
+      "specified by their rule names and separated by comma.")
+    .stringConf
+    .createOptional
+
+  val OPTIMIZER_PLAN_CHANGE_LOG_BATCHES = buildConf("spark.sql.optimizer.planChangeLog.batches")
+    .internal()
+    .doc("Configures a list of batches to be logged in the optimizer, in which the batches " +
+      "are specified by their batch names and separated by comma.")
     .stringConf
     .createOptional
 
@@ -1763,6 +1769,8 @@ class SQLConf extends Serializable with Logging {
 
   def optimizerPlanChangeRules: Option[String] = getConf(OPTIMIZER_PLAN_CHANGE_LOG_RULES)
 
+  def optimizerPlanChangeBatches: Option[String] = getConf(OPTIMIZER_PLAN_CHANGE_LOG_BATCHES)
+
   def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS)
 
   def stateStoreMinDeltasForSnapshot: Int = getConf(STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
index 3e9b453c85ab..dd7e29d4f9b8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
@@ -32,17 +32,20 @@ import org.apache.spark.sql.internal.SQLConf
 class OptimizerLoggingSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("Optimizer Batch", FixedPoint(100),
-      PushDownPredicate,
-      ColumnPruning,
-      CollapseProject) :: Nil
+    val batches =
+      Batch("Optimizer Batch", FixedPoint(100),
+        PushDownPredicate, ColumnPruning, CollapseProject) ::
+      Batch("Batch Has No Effect", Once,
+        ColumnPruning) :: Nil
   }
 
   class MockAppender extends AppenderSkeleton {
     val loggingEvents = new ArrayBuffer[LoggingEvent]()
 
     override def append(loggingEvent: LoggingEvent): Unit = {
-      if (loggingEvent.getRenderedMessage().contains("Applying Rule")) {
+      if (loggingEvent.getRenderedMessage().contains("Applying Rule") ||
+        loggingEvent.getRenderedMessage().contains("Result of Batch") ||
+        loggingEvent.getRenderedMessage().contains("has no effect")) {
         loggingEvents.append(loggingEvent)
       }
     }
@@ -51,7 +54,18 @@ class OptimizerLoggingSuite extends PlanTest {
     override def requiresLayout(): Boolean = false
   }
 
-  private def verifyLog(expectedLevel: Level, expectedRules: Seq[String]): Unit = {
+  private def withLogLevelAndAppender(level: Level, appender: Appender)(f: => Unit): Unit = {
+    val logger = Logger.getLogger(Optimize.getClass.getName.dropRight(1))
+    val restoreLevel = logger.getLevel
+    logger.setLevel(level)
+    logger.addAppender(appender)
+    try f finally {
+      logger.setLevel(restoreLevel)
+      logger.removeAppender(appender)
+    }
+  }
+
+  private def verifyLog(expectedLevel: Level, expectedRulesOrBatches: Seq[String]): Unit = {
     val logAppender = new MockAppender()
     withLogAppender(logAppender,
         loggerName = Some(Optimize.getClass.getName.dropRight(1)), level = Some(Level.TRACE)) {
@@ -61,7 +75,8 @@ class OptimizerLoggingSuite extends PlanTest {
       comparePlans(Optimize.execute(query), expected)
     }
     val logMessages = logAppender.loggingEvents.map(_.getRenderedMessage)
-    assert(expectedRules.forall(rule => logMessages.exists(_.contains(rule))))
+    assert(expectedRulesOrBatches.forall
+    (ruleOrBatch => logMessages.exists(_.contains(ruleOrBatch))))
     assert(logAppender.loggingEvents.forall(_.getLevel == expectedLevel))
   }
 
@@ -135,4 +150,20 @@ class OptimizerLoggingSuite extends PlanTest {
       }
     }
   }
+
+  test("test log batches which change the plan") {
+    withSQLConf(
+      SQLConf.OPTIMIZER_PLAN_CHANGE_LOG_BATCHES.key -> "Optimizer Batch",
+      SQLConf.OPTIMIZER_PLAN_CHANGE_LOG_LEVEL.key -> "INFO") {
+      verifyLog(Level.INFO, Seq("Optimizer Batch"))
+    }
+  }
+
+  test("test log batches which do not change the plan") {
+    withSQLConf(
+      SQLConf.OPTIMIZER_PLAN_CHANGE_LOG_BATCHES.key -> "Batch Has No Effect",
+      SQLConf.OPTIMIZER_PLAN_CHANGE_LOG_LEVEL.key -> "INFO") {
+      verifyLog(Level.INFO, Seq("Batch Has No Effect"))
+    }
+  }
 }

From 4177292dcd65f77679e74365dc627a506812ddbb Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 11 Apr 2019 20:03:32 +0800
Subject: [PATCH 0888/1072] [SPARK-27435][SQL] Support schema pruning in ORC V2

## What changes were proposed in this pull request?

Currently, the optimization rule `SchemaPruning` only works for Parquet/Orc V1.
We should have the same optimization in ORC V2.

## How was this patch tested?

Unit test

Closes #24338 from gengliangwang/schemaPruningForV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/SchemaPruning.scala | 104 ++++++++++++++----
 .../datasources/SchemaPruningSuite.scala      |   4 +-
 ...te.scala => OrcV1SchemaPruningSuite.scala} |   2 +-
 .../orc/OrcV2SchemaPruningSuite.scala         |  52 +++++++++
 4 files changed, 139 insertions(+), 23 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/{OrcSchemaPruningSuite.scala => OrcV1SchemaPruningSuite.scala} (95%)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 3a37ca7c048e..15fdf65ee897 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
 
@@ -48,7 +51,7 @@ object SchemaPruning extends Rule[LogicalPlan] {
           l @ LogicalRelation(hadoopFsRelation: HadoopFsRelation, _, _, _))
         if canPruneRelation(hadoopFsRelation) =>
         val (normalizedProjects, normalizedFilters) =
-          normalizeAttributeRefNames(l, projects, filters)
+          normalizeAttributeRefNames(l.output, projects, filters)
         val requestedRootFields = identifyRootFields(normalizedProjects, normalizedFilters)
 
         // If requestedRootFields includes a nested field, continue. Otherwise,
@@ -76,6 +79,43 @@ object SchemaPruning extends Rule[LogicalPlan] {
         } else {
           op
         }
+
+      case op @ PhysicalOperation(projects, filters,
+          d @ DataSourceV2Relation(table: FileTable, output, _)) if canPruneTable(table) =>
+        val (normalizedProjects, normalizedFilters) =
+          normalizeAttributeRefNames(output, projects, filters)
+        val requestedRootFields = identifyRootFields(normalizedProjects, normalizedFilters)
+
+        // If requestedRootFields includes a nested field, continue. Otherwise,
+        // return op
+        if (requestedRootFields.exists { root: RootField => !root.derivedFromAtt }) {
+          val dataSchema = table.dataSchema
+          val prunedDataSchema = pruneDataSchema(dataSchema, requestedRootFields)
+
+          // If the data schema is different from the pruned data schema, continue. Otherwise,
+          // return op. We effect this comparison by counting the number of "leaf" fields in
+          // each schemata, assuming the fields in prunedDataSchema are a subset of the fields
+          // in dataSchema.
+          if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) {
+            val prunedFileTable = table match {
+              case o: OrcTable => o.copy(userSpecifiedSchema = Some(prunedDataSchema))
+              case _ =>
+                val message = s"${table.formatName} data source doesn't support schema pruning."
+                throw new AnalysisException(message)
+            }
+
+
+            val prunedRelationV2 = buildPrunedRelationV2(d, prunedFileTable)
+            val projectionOverSchema = ProjectionOverSchema(prunedDataSchema)
+
+            buildNewProjection(normalizedProjects, normalizedFilters, prunedRelationV2,
+              projectionOverSchema)
+          } else {
+            op
+          }
+        } else {
+          op
+        }
     }
 
   /**
@@ -85,16 +125,22 @@ object SchemaPruning extends Rule[LogicalPlan] {
     fsRelation.fileFormat.isInstanceOf[ParquetFileFormat] ||
       fsRelation.fileFormat.isInstanceOf[OrcFileFormat]
 
+  /**
+   * Checks to see if the given [[FileTable]] can be pruned. Currently we support ORC v2.
+   */
+  private def canPruneTable(table: FileTable) =
+    table.isInstanceOf[OrcTable]
+
   /**
    * Normalizes the names of the attribute references in the given projects and filters to reflect
    * the names in the given logical relation. This makes it possible to compare attributes and
    * fields by name. Returns a tuple with the normalized projects and filters, respectively.
    */
   private def normalizeAttributeRefNames(
-      logicalRelation: LogicalRelation,
+      output: Seq[AttributeReference],
       projects: Seq[NamedExpression],
       filters: Seq[Expression]): (Seq[NamedExpression], Seq[Expression]) = {
-    val normalizedAttNameMap = logicalRelation.output.map(att => (att.exprId, att.name)).toMap
+    val normalizedAttNameMap = output.map(att => (att.exprId, att.name)).toMap
     val normalizedProjects = projects.map(_.transform {
       case att: AttributeReference if normalizedAttNameMap.contains(att.exprId) =>
         att.withName(normalizedAttNameMap(att.exprId))
@@ -107,11 +153,13 @@ object SchemaPruning extends Rule[LogicalPlan] {
   }
 
   /**
-   * Builds the new output [[Project]] Spark SQL operator that has the pruned output relation.
+   * Builds the new output [[Project]] Spark SQL operator that has the `leafNode`.
    */
   private def buildNewProjection(
-      projects: Seq[NamedExpression], filters: Seq[Expression], prunedRelation: LogicalRelation,
-      projectionOverSchema: ProjectionOverSchema) = {
+      projects: Seq[NamedExpression],
+      filters: Seq[Expression],
+      leafNode: LeafNode,
+      projectionOverSchema: ProjectionOverSchema): Project = {
     // Construct a new target for our projection by rewriting and
     // including the original filters where available
     val projectionChild =
@@ -120,9 +168,9 @@ object SchemaPruning extends Rule[LogicalPlan] {
           case projectionOverSchema(expr) => expr
         })
         val newFilterCondition = projectedFilters.reduce(And)
-        Filter(newFilterCondition, prunedRelation)
+        Filter(newFilterCondition, leafNode)
       } else {
-        prunedRelation
+        leafNode
       }
 
     // Construct the new projections of our Project by
@@ -145,20 +193,36 @@ object SchemaPruning extends Rule[LogicalPlan] {
   private def buildPrunedRelation(
       outputRelation: LogicalRelation,
       prunedBaseRelation: HadoopFsRelation) = {
+    val prunedOutput = getPrunedOutput(outputRelation.output, prunedBaseRelation.schema)
+    outputRelation.copy(relation = prunedBaseRelation, output = prunedOutput)
+  }
+
+  /**
+   * Builds a pruned data source V2 relation from the output of the relation and the schema
+   * of the pruned [[FileTable]].
+   */
+  private def buildPrunedRelationV2(
+      outputRelation: DataSourceV2Relation,
+      prunedFileTable: FileTable) = {
+    val prunedOutput = getPrunedOutput(outputRelation.output, prunedFileTable.schema)
+    outputRelation.copy(table = prunedFileTable, output = prunedOutput)
+  }
+
+  // Prune the given output to make it consistent with `requiredSchema`.
+  private def getPrunedOutput(
+      output: Seq[AttributeReference],
+      requiredSchema: StructType): Seq[AttributeReference] = {
     // We need to replace the expression ids of the pruned relation output attributes
     // with the expression ids of the original relation output attributes so that
     // references to the original relation's output are not broken
-    val outputIdMap = outputRelation.output.map(att => (att.name, att.exprId)).toMap
-    val prunedRelationOutput =
-      prunedBaseRelation
-        .schema
-        .toAttributes
-        .map {
-          case att if outputIdMap.contains(att.name) =>
-            att.withExprId(outputIdMap(att.name))
-          case att => att
-        }
-    outputRelation.copy(relation = prunedBaseRelation, output = prunedRelationOutput)
+    val outputIdMap = output.map(att => (att.name, att.exprId)).toMap
+    requiredSchema
+      .toAttributes
+      .map {
+        case att if outputIdMap.contains(att.name) =>
+          att.withExprId(outputIdMap(att.name))
+        case att => att
+      }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index 22317fe8d13a..09ca42851836 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -407,7 +407,7 @@ abstract class SchemaPruningSuite
     }
   }
 
-  private val schemaEquality = new Equality[StructType] {
+  protected val schemaEquality = new Equality[StructType] {
     override def areEqual(a: StructType, b: Any): Boolean =
       b match {
         case otherType: StructType => a.sameType(otherType)
@@ -422,7 +422,7 @@ abstract class SchemaPruningSuite
     df.collect()
   }
 
-  private def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
+  protected def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
     val fileSourceScanSchemata =
       df.queryExecution.executedPlan.collect {
         case scan: FileSourceScanExec => scan.requiredSchema
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala
similarity index 95%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala
index 2623bf943368..832da59a78ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkConf
 import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
 import org.apache.spark.sql.internal.SQLConf
 
-class OrcSchemaPruningSuite extends SchemaPruningSuite {
+class OrcV1SchemaPruningSuite extends SchemaPruningSuite {
   override protected val dataSourceName: String = "orc"
   override protected val vectorizedReaderEnabledKey: String =
     SQLConf.ORC_VECTORIZED_READER_ENABLED.key
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
new file mode 100644
index 000000000000..b042f7f9d716
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
+import org.apache.spark.sql.internal.SQLConf
+
+class OrcV2SchemaPruningSuite extends SchemaPruningSuite {
+  override protected val dataSourceName: String = "orc"
+  override protected val vectorizedReaderEnabledKey: String =
+    SQLConf.ORC_VECTORIZED_READER_ENABLED.key
+
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "")
+
+  override def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
+    val fileSourceScanSchemata =
+      df.queryExecution.executedPlan.collect {
+        case BatchScanExec(_, scan: OrcScan) => scan.readDataSchema
+      }
+    assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
+      s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
+        s"but expected $expectedSchemaCatalogStrings")
+    fileSourceScanSchemata.zip(expectedSchemaCatalogStrings).foreach {
+      case (scanSchema, expectedScanSchemaCatalogString) =>
+        val expectedScanSchema = CatalystSqlParser.parseDataType(expectedScanSchemaCatalogString)
+        implicit val equality = schemaEquality
+        assert(scanSchema === expectedScanSchema)
+    }
+  }
+}

From d33ae2e9edda08313f5a430a6d162ad23e95a7cb Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 11 Apr 2019 07:58:57 -0700
Subject: [PATCH 0889/1072] [SPARK-26953][CORE][TEST] Disable result checking
 in the test: java.lang.ArrayIndexOutOfBoundsException in TimSort

## What changes were proposed in this pull request?

I propose to disable (comment) result checking in `SorterSuite`.`java.lang.ArrayIndexOutOfBoundsException in TimSort` because:
1. The check is optional, and correctness of TimSort is checked by another tests. Purpose of the test is to check that TimSort doesn't fail with `ArrayIndexOutOfBoundsException`.
2. Significantly drops execution time of the test.

Here are timing of running the test locally:
```
Sort: 1.4 seconds
Result checking: 15.6 seconds
```

## How was this patch tested?

By `SorterSuite`.

Closes #24343 from MaxGekk/timsort-test-speedup.

Authored-by: Maxim Gekk <maxim.gekk@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/util/collection/SorterSuite.scala    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index acd0b0e29555..e80bd96c982d 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -154,6 +154,7 @@ class SorterSuite extends SparkFunSuite with Logging {
     // The sort must finish without ArrayIndexOutOfBoundsException
     // The arrayToSort contains runLengths.length elements of 1, and others are 0.
     // Those 1 must be placed at the end of arrayToSort after sorting.
+    /*
     var i: Int = 0
     sum = 0
     val amountOfZeros = arrayToSort.length - runLengths.length
@@ -169,6 +170,7 @@ class SorterSuite extends SparkFunSuite with Logging {
       i += 1
     } while (i < sizeOfArrayToSort)
     assert(sum === runLengths.length)
+    */
   }
 
   /** Runs an experiment several times. */

From 239082d9667a4fa4198bd9524d63c739df147e0e Mon Sep 17 00:00:00 2001
From: s71955 <sujithchacko.2010@gmail.com>
Date: Thu, 11 Apr 2019 08:53:00 -0700
Subject: [PATCH 0890/1072] [SPARK-27403][SQL] Fix `updateTableStats` to update
 table stats always with new stats or None

## What changes were proposed in this pull request?

System shall update the table stats automatically if user set spark.sql.statistics.size.autoUpdate.enabled as true, currently this property is not having any significance even if it is enabled or disabled. This feature is similar to Hives auto-gather feature where statistics are automatically computed by default if this feature is enabled.
Reference:
https://cwiki.apache.org/confluence/display/Hive/StatsDev

As part of fix , autoSizeUpdateEnabled  validation is been done initially so that system will calculate the table size for the user automatically and record it in metastore as per user expectation.

## How was this patch tested?
UT is written and manually verified in cluster.
Tested with unit tests + some internal tests on real cluster.

Before fix:

![image](https://user-images.githubusercontent.com/12999161/55688682-cd8d4780-5998-11e9-85da-e1a4e34419f6.png)

After fix
![image](https://user-images.githubusercontent.com/12999161/55688654-7d15ea00-5998-11e9-973f-1f4cee27018f.png)

Closes #24315 from sujith71955/master_autoupdate.

Authored-by: s71955 <sujithchacko.2010@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/execution/command/CommandUtils.scala  | 18 ++++++++---------
 .../spark/sql/StatisticsCollectionSuite.scala | 20 +++++++++++++++++++
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index dea1e017b2e5..70e7cd9a1e40 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -42,16 +42,14 @@ object CommandUtils extends Logging {
 
   /** Change statistics after changing data by commands. */
   def updateTableStats(sparkSession: SparkSession, table: CatalogTable): Unit = {
-    if (table.stats.nonEmpty) {
-      val catalog = sparkSession.sessionState.catalog
-      if (sparkSession.sessionState.conf.autoSizeUpdateEnabled) {
-        val newTable = catalog.getTableMetadata(table.identifier)
-        val newSize = CommandUtils.calculateTotalSize(sparkSession, newTable)
-        val newStats = CatalogStatistics(sizeInBytes = newSize)
-        catalog.alterTableStats(table.identifier, Some(newStats))
-      } else {
-        catalog.alterTableStats(table.identifier, None)
-      }
+    val catalog = sparkSession.sessionState.catalog
+    if (sparkSession.sessionState.conf.autoSizeUpdateEnabled) {
+      val newTable = catalog.getTableMetadata(table.identifier)
+      val newSize = CommandUtils.calculateTotalSize(sparkSession, newTable)
+      val newStats = CatalogStatistics(sizeInBytes = newSize)
+      catalog.alterTableStats(table.identifier, Some(newStats))
+    } else if (table.stats.nonEmpty) {
+      catalog.alterTableStats(table.identifier, None)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 90b3586712a8..4e1e42488b58 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -337,6 +337,26 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("auto gather stats after insert command") {
+    val table = "change_stats_insert_datasource_table"
+    Seq(false, true).foreach { autoUpdate =>
+      withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> autoUpdate.toString) {
+        withTable(table) {
+          sql(s"CREATE TABLE $table (i int, j string) USING PARQUET")
+          // insert into command
+          sql(s"INSERT INTO TABLE $table SELECT 1, 'abc'")
+          val stats = getCatalogTable(table).stats
+          if (autoUpdate) {
+            assert(stats.isDefined)
+            assert(stats.get.sizeInBytes >= 0)
+          } else {
+            assert(stats.isEmpty)
+          }
+        }
+      }
+    }
+  }
+
   test("invalidation of tableRelationCache after inserts") {
     val table = "invalidate_catalog_cache_table"
     Seq(false, true).foreach { autoUpdate =>

From 43da473c1c49a99a038f558c64b5b34eb2fec764 Mon Sep 17 00:00:00 2001
From: maryannxue <maryannxue@apache.org>
Date: Fri, 12 Apr 2019 00:14:37 +0800
Subject: [PATCH 0891/1072] [SPARK-27225][SQL] Implement join strategy hints

## What changes were proposed in this pull request?

This PR extends the existing BROADCAST join hint (for both broadcast-hash join and broadcast-nested-loop join) by implementing other join strategy hints corresponding to the rest of Spark's existing join strategies: shuffle-hash, sort-merge, cartesian-product. The hint names: SHUFFLE_MERGE, SHUFFLE_HASH, SHUFFLE_REPLICATE_NL are partly different from the code names in order to make them clearer to users and reflect the actual algorithms better.

The hinted strategy will be used for the join with which it is associated if it is applicable/doable.

Conflict resolving rules in case of multiple hints:
1. Conflicts within either side of the join: take the first strategy hint specified in the query, or the top hint node in Dataset. For example, in "select /*+ merge(t1) */ /*+ broadcast(t1) */ k1, v2 from t1 join t2 on t1.k1 = t2.k2", take "merge(t1)"; in ```df1.hint("merge").hint("shuffle_hash").join(df2)```, take "shuffle_hash". This is a general hint conflict resolving strategy, not specific to join strategy hint.
2. Conflicts between two sides of the join:
  a) In case of different strategy hints, hints are prioritized as ```BROADCAST``` over ```SHUFFLE_MERGE``` over ```SHUFFLE_HASH``` over ```SHUFFLE_REPLICATE_NL```.
  b) In case of same strategy hints but conflicts in build side, choose the build side based on join type and size.

## How was this patch tested?

Added new UTs.

Closes #24164 from maryannxue/join-hints.

Lead-authored-by: maryannxue <maryannxue@apache.org>
Co-authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-performance-tuning.md                |  24 +-
 .../sql/catalyst/analysis/Analyzer.scala      |   2 +-
 .../sql/catalyst/analysis/ResolveHints.scala  |  92 ++--
 .../sql/catalyst/catalog/interface.scala      |   2 +-
 .../optimizer/EliminateResolvedHint.scala     |  52 ++-
 .../sql/catalyst/plans/logical/hints.scala    |  95 ++++-
 .../catalyst/analysis/ResolveHintsSuite.scala |  47 ++-
 .../spark/sql/execution/SparkStrategies.scala | 366 +++++++++-------
 .../org/apache/spark/sql/functions.scala      |   4 +-
 .../apache/spark/sql/CachedTableSuite.scala   |   4 +-
 .../org/apache/spark/sql/JoinHintSuite.scala  | 396 ++++++++++++++++--
 .../execution/joins/BroadcastJoinSuite.scala  |   5 +-
 12 files changed, 837 insertions(+), 252 deletions(-)

diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index 68569744afcc..2a1edda84252 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -107,14 +107,22 @@ that these options will be deprecated in future release as more optimizations ar
   </tr>
 </table>
 
-## Broadcast Hint for SQL Queries
-
-The `BROADCAST` hint guides Spark to broadcast each specified table when joining them with another table or view.
-When Spark deciding the join methods, the broadcast hash join (i.e., BHJ) is preferred,
-even if the statistics is above the configuration `spark.sql.autoBroadcastJoinThreshold`.
-When both sides of a join are specified, Spark broadcasts the one having the lower statistics.
-Note Spark does not guarantee BHJ is always chosen, since not all cases (e.g. full outer join)
-support BHJ. When the broadcast nested loop join is selected, we still respect the hint.
+## Join Strategy Hints for SQL Queries
+
+The join strategy hints, namely `BROADCAST`, `MERGE`, `SHUFFLE_HASH` and `SHUFFLE_REPLICATE_NL`,
+instruct Spark to use the hinted strategy on each specified relation when joining them with another
+relation. For example, when the `BROADCAST` hint is used on table 't1', broadcast join (either
+broadcast hash join or broadcast nested loop join depending on whether there is any equi-join key)
+with 't1' as the build side will be prioritized by Spark even if the size of table 't1' suggested
+by the statistics is above the configuration `spark.sql.autoBroadcastJoinThreshold`.
+
+When different join strategy hints are specified on both sides of a join, Spark prioritizes the
+`BROADCAST` hint over the `MERGE` hint over the `SHUFFLE_HASH` hint over the `SHUFFLE_REPLICATE_NL`
+hint. When both sides are specified with the `BROADCAST` hint or the `SHUFFLE_HASH` hint, Spark will
+pick the build side based on the join type and the sizes of the relations.
+
+Note that there is no guarantee that Spark will choose the join strategy specified in the hint since
+a specific strategy may not support all join types.
 
 <div class="codetabs">
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 01e40e64a3e8..02d83e7e8cb6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -153,7 +153,7 @@ class Analyzer(
 
   lazy val batches: Seq[Batch] = Seq(
     Batch("Hints", fixedPoint,
-      new ResolveHints.ResolveBroadcastHints(conf),
+      new ResolveHints.ResolveJoinStrategyHints(conf),
       ResolveHints.ResolveCoalesceHints,
       ResolveHints.RemoveAllHints),
     Batch("Simple Sanity Check", Once,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index dbd4ed845e32..9440a3f806b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util.Locale
 
+import scala.collection.mutable
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.IntegerLiteral
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -28,45 +30,66 @@ import org.apache.spark.sql.internal.SQLConf
 
 
 /**
- * Collection of rules related to hints. The only hint currently available is broadcast join hint.
+ * Collection of rules related to hints. The only hint currently available is join strategy hint.
  *
  * Note that this is separately into two rules because in the future we might introduce new hint
- * rules that have different ordering requirements from broadcast.
+ * rules that have different ordering requirements from join strategies.
  */
 object ResolveHints {
 
   /**
-   * For broadcast hint, we accept "BROADCAST", "BROADCASTJOIN", and "MAPJOIN", and a sequence of
-   * relation aliases can be specified in the hint. A broadcast hint plan node will be inserted
-   * on top of any relation (that is not aliased differently), subquery, or common table expression
-   * that match the specified name.
+   * The list of allowed join strategy hints is defined in [[JoinStrategyHint.strategies]], and a
+   * sequence of relation aliases can be specified with a join strategy hint, e.g., "MERGE(a, c)",
+   * "BROADCAST(a)". A join strategy hint plan node will be inserted on top of any relation (that
+   * is not aliased differently), subquery, or common table expression that match the specified
+   * name.
    *
    * The hint resolution works by recursively traversing down the query plan to find a relation or
-   * subquery that matches one of the specified broadcast aliases. The traversal does not go past
-   * beyond any existing broadcast hints, subquery aliases.
+   * subquery that matches one of the specified relation aliases. The traversal does not go past
+   * beyond any view reference, with clause or subquery alias.
    *
    * This rule must happen before common table expressions.
    */
-  class ResolveBroadcastHints(conf: SQLConf) extends Rule[LogicalPlan] {
-    private val BROADCAST_HINT_NAMES = Set("BROADCAST", "BROADCASTJOIN", "MAPJOIN")
+  class ResolveJoinStrategyHints(conf: SQLConf) extends Rule[LogicalPlan] {
+    private val STRATEGY_HINT_NAMES = JoinStrategyHint.strategies.flatMap(_.hintAliases)
 
     def resolver: Resolver = conf.resolver
 
-    private def applyBroadcastHint(plan: LogicalPlan, toBroadcast: Set[String]): LogicalPlan = {
+    private def createHintInfo(hintName: String): HintInfo = {
+      HintInfo(strategy =
+        JoinStrategyHint.strategies.find(
+          _.hintAliases.map(
+            _.toUpperCase(Locale.ROOT)).contains(hintName.toUpperCase(Locale.ROOT))))
+    }
+
+    private def applyJoinStrategyHint(
+        plan: LogicalPlan,
+        relations: mutable.HashSet[String],
+        hintName: String): LogicalPlan = {
       // Whether to continue recursing down the tree
       var recurse = true
 
       val newNode = CurrentOrigin.withOrigin(plan.origin) {
         plan match {
-          case u: UnresolvedRelation if toBroadcast.exists(resolver(_, u.tableIdentifier.table)) =>
-            ResolvedHint(plan, HintInfo(broadcast = true))
-          case r: SubqueryAlias if toBroadcast.exists(resolver(_, r.alias)) =>
-            ResolvedHint(plan, HintInfo(broadcast = true))
+          case ResolvedHint(u: UnresolvedRelation, hint)
+              if relations.exists(resolver(_, u.tableIdentifier.table)) =>
+            relations.remove(u.tableIdentifier.table)
+            ResolvedHint(u, createHintInfo(hintName).merge(hint, handleOverriddenHintInfo))
+          case ResolvedHint(r: SubqueryAlias, hint)
+              if relations.exists(resolver(_, r.alias)) =>
+            relations.remove(r.alias)
+            ResolvedHint(r, createHintInfo(hintName).merge(hint, handleOverriddenHintInfo))
+
+          case u: UnresolvedRelation if relations.exists(resolver(_, u.tableIdentifier.table)) =>
+            relations.remove(u.tableIdentifier.table)
+            ResolvedHint(plan, createHintInfo(hintName))
+          case r: SubqueryAlias if relations.exists(resolver(_, r.alias)) =>
+            relations.remove(r.alias)
+            ResolvedHint(plan, createHintInfo(hintName))
 
           case _: ResolvedHint | _: View | _: With | _: SubqueryAlias =>
             // Don't traverse down these nodes.
-            // For an existing broadcast hint, there is no point going down (if we do, we either
-            // won't change the structure, or will introduce another broadcast hint that is useless.
+            // For an existing strategy hint, there is no chance for a match from this point down.
             // The rest (view, with, subquery) indicates different scopes that we shouldn't traverse
             // down. Note that technically when this rule is executed, we haven't completed view
             // resolution yet and as a result the view part should be deadcode. I'm leaving it here
@@ -80,25 +103,38 @@ object ResolveHints {
       }
 
       if ((plan fastEquals newNode) && recurse) {
-        newNode.mapChildren(child => applyBroadcastHint(child, toBroadcast))
+        newNode.mapChildren(child => applyJoinStrategyHint(child, relations, hintName))
       } else {
         newNode
       }
     }
 
+    private def handleOverriddenHintInfo(hint: HintInfo): Unit = {
+      logWarning(s"Join hint $hint is overridden by another hint and will not take effect.")
+    }
+
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
-      case h: UnresolvedHint if BROADCAST_HINT_NAMES.contains(h.name.toUpperCase(Locale.ROOT)) =>
+      case h: UnresolvedHint if STRATEGY_HINT_NAMES.contains(h.name.toUpperCase(Locale.ROOT)) =>
         if (h.parameters.isEmpty) {
-          // If there is no table alias specified, turn the entire subtree into a BroadcastHint.
-          ResolvedHint(h.child, HintInfo(broadcast = true))
+          // If there is no table alias specified, apply the hint on the entire subtree.
+          ResolvedHint(h.child, createHintInfo(h.name))
         } else {
-          // Otherwise, find within the subtree query plans that should be broadcasted.
-          applyBroadcastHint(h.child, h.parameters.map {
+          // Otherwise, find within the subtree query plans to apply the hint.
+          val relationNames = h.parameters.map {
             case tableName: String => tableName
             case tableId: UnresolvedAttribute => tableId.name
-            case unsupported => throw new AnalysisException("Broadcast hint parameter should be " +
-              s"an identifier or string but was $unsupported (${unsupported.getClass}")
-          }.toSet)
+            case unsupported => throw new AnalysisException("Join strategy hint parameter " +
+              s"should be an identifier or string but was $unsupported (${unsupported.getClass}")
+          }
+          val relationNameSet = new mutable.HashSet[String]
+          relationNames.foreach(relationNameSet.add)
+
+          val applied = applyJoinStrategyHint(h.child, relationNameSet, h.name)
+          relationNameSet.foreach { n =>
+            logWarning(s"Count not find relation '$n' for join strategy hint " +
+              s"'${h.name}${relationNames.mkString("(", ", ", ")")}'.")
+          }
+          applied
         }
     }
   }
@@ -135,7 +171,9 @@ object ResolveHints {
    */
   object RemoveAllHints extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
-      case h: UnresolvedHint => h.child
+      case h: UnresolvedHint =>
+        logWarning(s"Unrecognized hint: ${h.name}${h.parameters.mkString("(", ", ", ")")}")
+        h.child
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 60066371e3dc..2d646721f87a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -374,7 +374,7 @@ object CatalogTable {
 /**
  * This class of statistics is used in [[CatalogTable]] to interact with metastore.
  * We define this new class instead of directly using [[Statistics]] here because there are no
- * concepts of attributes or broadcast hint in catalog.
+ * concepts of attributes in catalog.
  */
 case class CatalogStatistics(
     sizeInBytes: BigInt,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
index a136f0493699..5586690520c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
@@ -30,30 +30,58 @@ object EliminateResolvedHint extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
     val pulledUp = plan transformUp {
       case j: Join =>
-        val leftHint = mergeHints(collectHints(j.left))
-        val rightHint = mergeHints(collectHints(j.right))
-        j.copy(hint = JoinHint(leftHint, rightHint))
+        val (newLeft, leftHints) = extractHintsFromPlan(j.left)
+        val (newRight, rightHints) = extractHintsFromPlan(j.right)
+        val newJoinHint = JoinHint(mergeHints(leftHints), mergeHints(rightHints))
+        j.copy(left = newLeft, right = newRight, hint = newJoinHint)
     }
     pulledUp.transformUp {
-      case h: ResolvedHint => h.child
+      case h: ResolvedHint =>
+        handleInvalidHintInfo(h.hints)
+        h.child
     }
   }
 
+  /**
+   * Combine a list of [[HintInfo]]s into one [[HintInfo]].
+   */
   private def mergeHints(hints: Seq[HintInfo]): Option[HintInfo] = {
-    hints.reduceOption((h1, h2) => HintInfo(
-      broadcast = h1.broadcast || h2.broadcast))
+    hints.reduceOption((h1, h2) => h1.merge(h2, handleOverriddenHintInfo))
   }
 
-  private def collectHints(plan: LogicalPlan): Seq[HintInfo] = {
+  /**
+   * Extract all hints from the plan, returning a list of extracted hints and the transformed plan
+   * with [[ResolvedHint]] nodes removed. The returned hint list comes in top-down order.
+   * Note that hints can only be extracted from under certain nodes. Those that cannot be extracted
+   * in this method will be cleaned up later by this rule, and may emit warnings depending on the
+   * configurations.
+   */
+  private def extractHintsFromPlan(plan: LogicalPlan): (LogicalPlan, Seq[HintInfo]) = {
     plan match {
-      case h: ResolvedHint => collectHints(h.child) :+ h.hints
-      case u: UnaryNode => collectHints(u.child)
+      case h: ResolvedHint =>
+        val (plan, hints) = extractHintsFromPlan(h.child)
+        (plan, h.hints +: hints)
+      case u: UnaryNode =>
+        val (plan, hints) = extractHintsFromPlan(u.child)
+        (u.withNewChildren(Seq(plan)), hints)
       // TODO revisit this logic:
       // except and intersect are semi/anti-joins which won't return more data then
       // their left argument, so the broadcast hint should be propagated here
-      case i: Intersect => collectHints(i.left)
-      case e: Except => collectHints(e.left)
-      case _ => Seq.empty
+      case i: Intersect =>
+        val (plan, hints) = extractHintsFromPlan(i.left)
+        (i.copy(left = plan), hints)
+      case e: Except =>
+        val (plan, hints) = extractHintsFromPlan(e.left)
+        (e.copy(left = plan), hints)
+      case p: LogicalPlan => (p, Seq.empty)
     }
   }
+
+  private def handleInvalidHintInfo(hint: HintInfo): Unit = {
+    logWarning(s"A join hint $hint is specified but it is not part of a join relation.")
+  }
+
+  private def handleOverriddenHintInfo(hint: HintInfo): Unit = {
+    logWarning(s"Join hint $hint is overridden by another hint and will not take effect.")
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index b2ba725e9d44..870dd87d8a36 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -66,17 +66,94 @@ object JoinHint {
 /**
  * The hint attributes to be applied on a specific node.
  *
- * @param broadcast If set to true, it indicates that the broadcast hash join is the preferred join
- *                  strategy and the node with this hint is preferred to be the build side.
+ * @param strategy The preferred join strategy.
  */
-case class HintInfo(broadcast: Boolean = false) {
+case class HintInfo(strategy: Option[JoinStrategyHint] = None) {
 
-  override def toString: String = {
-    val hints = scala.collection.mutable.ArrayBuffer.empty[String]
-    if (broadcast) {
-      hints += "broadcast"
+  /**
+   * Combine this [[HintInfo]] with another [[HintInfo]] and return the new [[HintInfo]].
+   * @param other the other [[HintInfo]]
+   * @param hintOverriddenCallback a callback to notify if any [[HintInfo]] has been overridden
+   *                               in this merge.
+   *
+   * Currently, for join strategy hints, the new [[HintInfo]] will contain the strategy in this
+   * [[HintInfo]] if defined, otherwise the strategy in the other [[HintInfo]]. The
+   * `hintOverriddenCallback` will be called if this [[HintInfo]] and the other [[HintInfo]]
+   * both have a strategy defined but the join strategies are different.
+   */
+  def merge(other: HintInfo, hintOverriddenCallback: HintInfo => Unit): HintInfo = {
+    if (this.strategy.isDefined &&
+        other.strategy.isDefined &&
+        this.strategy.get != other.strategy.get) {
+      hintOverriddenCallback(other)
     }
-
-    if (hints.isEmpty) "none" else hints.mkString("(", ", ", ")")
+    HintInfo(strategy = this.strategy.orElse(other.strategy))
   }
+
+  override def toString: String = strategy.map(s => s"(strategy=$s)").getOrElse("none")
+}
+
+sealed abstract class JoinStrategyHint {
+
+  def displayName: String
+  def hintAliases: Set[String]
+
+  override def toString: String = displayName
+}
+
+/**
+ * The enumeration of join strategy hints.
+ *
+ * The hinted strategy will be used for the join with which it is associated if doable. In case
+ * of contradicting strategy hints specified for each side of the join, hints are prioritized as
+ * BROADCAST over SHUFFLE_MERGE over SHUFFLE_HASH over SHUFFLE_REPLICATE_NL.
+ */
+object JoinStrategyHint {
+
+  val strategies: Set[JoinStrategyHint] = Set(
+    BROADCAST,
+    SHUFFLE_MERGE,
+    SHUFFLE_HASH,
+    SHUFFLE_REPLICATE_NL)
+}
+
+/**
+ * The hint for broadcast hash join or broadcast nested loop join, depending on the availability of
+ * equi-join keys.
+ */
+case object BROADCAST extends JoinStrategyHint {
+  override def displayName: String = "broadcast"
+  override def hintAliases: Set[String] = Set(
+    "BROADCAST",
+    "BROADCASTJOIN",
+    "MAPJOIN")
+}
+
+/**
+ * The hint for shuffle sort merge join.
+ */
+case object SHUFFLE_MERGE extends JoinStrategyHint {
+  override def displayName: String = "merge"
+  override def hintAliases: Set[String] = Set(
+    "SHUFFLE_MERGE",
+    "MERGE",
+    "MERGEJOIN")
+}
+
+/**
+ * The hint for shuffle hash join.
+ */
+case object SHUFFLE_HASH extends JoinStrategyHint {
+  override def displayName: String = "shuffle_hash"
+  override def hintAliases: Set[String] = Set(
+    "SHUFFLE_HASH")
+}
+
+/**
+ * The hint for shuffle-and-replicate nested loop join, a.k.a. cartesian product join.
+ */
+case object SHUFFLE_REPLICATE_NL extends JoinStrategyHint {
+  override def displayName: String = "shuffle_replicate_nl"
+  override def hintAliases: Set[String] = Set(
+    "SHUFFLE_REPLICATE_NL")
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
index 563e8adf87ed..474e58a335e7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.log4j.{AppenderSkeleton, Level}
+import org.apache.log4j.spi.LoggingEvent
+
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.Literal
@@ -27,6 +32,14 @@ import org.apache.spark.sql.catalyst.plans.logical._
 class ResolveHintsSuite extends AnalysisTest {
   import org.apache.spark.sql.catalyst.analysis.TestRelations._
 
+  class MockAppender extends AppenderSkeleton {
+    val loggingEvents = new ArrayBuffer[LoggingEvent]()
+
+    override def append(loggingEvent: LoggingEvent): Unit = loggingEvents.append(loggingEvent)
+    override def close(): Unit = {}
+    override def requiresLayout(): Boolean = false
+  }
+
   test("invalid hints should be ignored") {
     checkAnalysis(
       UnresolvedHint("some_random_hint_that_does_not_exist", Seq("TaBlE"), table("TaBlE")),
@@ -37,17 +50,17 @@ class ResolveHintsSuite extends AnalysisTest {
   test("case-sensitive or insensitive parameters") {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("TaBlE"), table("TaBlE")),
-      ResolvedHint(testRelation, HintInfo(broadcast = true)),
+      ResolvedHint(testRelation, HintInfo(strategy = Some(BROADCAST))),
       caseSensitive = false)
 
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("table"), table("TaBlE")),
-      ResolvedHint(testRelation, HintInfo(broadcast = true)),
+      ResolvedHint(testRelation, HintInfo(strategy = Some(BROADCAST))),
       caseSensitive = false)
 
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("TaBlE"), table("TaBlE")),
-      ResolvedHint(testRelation, HintInfo(broadcast = true)),
+      ResolvedHint(testRelation, HintInfo(strategy = Some(BROADCAST))),
       caseSensitive = true)
 
     checkAnalysis(
@@ -59,28 +72,29 @@ class ResolveHintsSuite extends AnalysisTest {
   test("multiple broadcast hint aliases") {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("table", "table2"), table("table").join(table("table2"))),
-      Join(ResolvedHint(testRelation, HintInfo(broadcast = true)),
-        ResolvedHint(testRelation2, HintInfo(broadcast = true)), Inner, None, JoinHint.NONE),
+      Join(ResolvedHint(testRelation, HintInfo(strategy = Some(BROADCAST))),
+        ResolvedHint(testRelation2, HintInfo(strategy = Some(BROADCAST))),
+        Inner, None, JoinHint.NONE),
       caseSensitive = false)
   }
 
   test("do not traverse past existing broadcast hints") {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("table"),
-        ResolvedHint(table("table").where('a > 1), HintInfo(broadcast = true))),
-      ResolvedHint(testRelation.where('a > 1), HintInfo(broadcast = true)).analyze,
+        ResolvedHint(table("table").where('a > 1), HintInfo(strategy = Some(BROADCAST)))),
+      ResolvedHint(testRelation.where('a > 1), HintInfo(strategy = Some(BROADCAST))).analyze,
       caseSensitive = false)
   }
 
   test("should work for subqueries") {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("tableAlias"), table("table").as("tableAlias")),
-      ResolvedHint(testRelation, HintInfo(broadcast = true)),
+      ResolvedHint(testRelation, HintInfo(strategy = Some(BROADCAST))),
       caseSensitive = false)
 
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("tableAlias"), table("table").subquery('tableAlias)),
-      ResolvedHint(testRelation, HintInfo(broadcast = true)),
+      ResolvedHint(testRelation, HintInfo(strategy = Some(BROADCAST))),
       caseSensitive = false)
 
     // Negative case: if the alias doesn't match, don't match the original table name.
@@ -105,7 +119,7 @@ class ResolveHintsSuite extends AnalysisTest {
           |SELECT /*+ BROADCAST(ctetable) */ * FROM ctetable
         """.stripMargin
       ),
-      ResolvedHint(testRelation.where('a > 1).select('a), HintInfo(broadcast = true))
+      ResolvedHint(testRelation.where('a > 1).select('a), HintInfo(strategy = Some(BROADCAST)))
         .select('a).analyze,
       caseSensitive = false)
   }
@@ -155,4 +169,17 @@ class ResolveHintsSuite extends AnalysisTest {
       UnresolvedHint("REPARTITION", Seq(Literal(true)), table("TaBlE")),
       Seq(errMsgRepa))
   }
+
+  test("log warnings for invalid hints") {
+    val logAppender = new MockAppender()
+    withLogAppender(logAppender) {
+      checkAnalysis(
+        UnresolvedHint("unknown_hint", Seq("TaBlE"), table("TaBlE")),
+        testRelation,
+        caseSensitive = false)
+    }
+    assert(logAppender.loggingEvents.exists(
+      e => e.getLevel == Level.WARN &&
+        e.getRenderedMessage.contains("Unrecognized hint: unknown_hint")))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index bf38189c2fb5..efd05a3e2b3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -90,61 +90,35 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   }
 
   /**
-   * Select the proper physical plan for join based on joining keys and size of logical plan.
-   *
-   * At first, uses the [[ExtractEquiJoinKeys]] pattern to find joins where at least some of the
-   * predicates can be evaluated by matching join keys. If found, join implementations are chosen
-   * with the following precedence:
+   * Select the proper physical plan for join based on join strategy hints, the availability of
+   * equi-join keys and the sizes of joining relations. Below are the existing join strategies,
+   * their characteristics and their limitations.
    *
    * - Broadcast hash join (BHJ):
-   *     BHJ is not supported for full outer join. For right outer join, we only can broadcast the
-   *     left side. For left outer, left semi, left anti and the internal join type ExistenceJoin,
-   *     we only can broadcast the right side. For inner like join, we can broadcast both sides.
-   *     Normally, BHJ can perform faster than the other join algorithms when the broadcast side is
-   *     small. However, broadcasting tables is a network-intensive operation. It could cause OOM
-   *     or perform worse than the other join algorithms, especially when the build/broadcast side
-   *     is big.
-   *
-   *     For the supported cases, users can specify the broadcast hint (e.g. the user applied the
-   *     [[org.apache.spark.sql.functions.broadcast()]] function to a DataFrame) and session-based
-   *     [[SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]] threshold to adjust whether BHJ is used and
-   *     which join side is broadcast.
-   *
-   *     1) Broadcast the join side with the broadcast hint, even if the size is larger than
-   *     [[SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]]. If both sides have the hint (only when the type
-   *     is inner like join), the side with a smaller estimated physical size will be broadcast.
-   *     2) Respect the [[SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]] threshold and broadcast the side
-   *     whose estimated physical size is smaller than the threshold. If both sides are below the
-   *     threshold, broadcast the smaller side. If neither is smaller, BHJ is not used.
-   *
-   * - Shuffle hash join: if the average size of a single partition is small enough to build a hash
-   *     table.
-   *
-   * - Sort merge: if the matching join keys are sortable.
-   *
-   * If there is no joining keys, Join implementations are chosen with the following precedence:
-   * - BroadcastNestedLoopJoin (BNLJ):
-   *     BNLJ supports all the join types but the impl is OPTIMIZED for the following scenarios:
-   *     For right outer join, the left side is broadcast. For left outer, left semi, left anti
-   *     and the internal join type ExistenceJoin, the right side is broadcast. For inner like
-   *     joins, either side is broadcast.
+   *     Only supported for equi-joins, while the join keys do not need to be sortable.
+   *     Supported for all join types except full outer joins.
+   *     BHJ usually performs faster than the other join algorithms when the broadcast side is
+   *     small. However, broadcasting tables is a network-intensive operation and it could cause
+   *     OOM or perform badly in some cases, especially when the build/broadcast side is big.
    *
-   *     Like BHJ, users still can specify the broadcast hint and session-based
-   *     [[SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]] threshold to impact which side is broadcast.
+   * - Shuffle hash join:
+   *     Only supported for equi-joins, while the join keys do not need to be sortable.
+   *     Supported for all join types except full outer joins.
    *
-   *     1) Broadcast the join side with the broadcast hint, even if the size is larger than
-   *     [[SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]]. If both sides have the hint (i.e., just for
-   *     inner-like join), the side with a smaller estimated physical size will be broadcast.
-   *     2) Respect the [[SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]] threshold and broadcast the side
-   *     whose estimated physical size is smaller than the threshold. If both sides are below the
-   *     threshold, broadcast the smaller side. If neither is smaller, BNLJ is not used.
+   * - Shuffle sort merge join (SMJ):
+   *     Only supported for equi-joins and the join keys have to be sortable.
+   *     Supported for all join types.
    *
-   * - CartesianProduct: for inner like join, CartesianProduct is the fallback option.
+   * - Broadcast nested loop join (BNLJ):
+   *     Supports both equi-joins and non-equi-joins.
+   *     Supports all the join types, but the implementation is optimized for:
+   *       1) broadcasting the left side in a right outer join;
+   *       2) broadcasting the right side in a left outer, left semi, left anti or existence join;
+   *       3) broadcasting either side in an inner-like join.
    *
-   * - BroadcastNestedLoopJoin (BNLJ):
-   *     For the other join types, BNLJ is the fallback option. Here, we just pick the broadcast
-   *     side with the broadcast hint. If neither side has a hint, we broadcast the side with
-   *     the smaller estimated physical size.
+   * - Shuffle-and-replicate nested loop join (a.k.a. cartesian product join):
+   *     Supports both equi-joins and non-equi-joins.
+   *     Supports only inner like joins.
    */
   object JoinSelection extends Strategy with PredicateHelper {
 
@@ -186,126 +160,218 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case _ => false
     }
 
-    private def broadcastSide(
-        canBuildLeft: Boolean,
-        canBuildRight: Boolean,
+    private def getBuildSide(
+        wantToBuildLeft: Boolean,
+        wantToBuildRight: Boolean,
         left: LogicalPlan,
-        right: LogicalPlan): BuildSide = {
-
-      def smallerSide =
-        if (right.stats.sizeInBytes <= left.stats.sizeInBytes) BuildRight else BuildLeft
-
-      if (canBuildRight && canBuildLeft) {
-        // Broadcast smaller side base on its estimated physical size
-        // if both sides have broadcast hint
-        smallerSide
-      } else if (canBuildRight) {
-        BuildRight
-      } else if (canBuildLeft) {
-        BuildLeft
+        right: LogicalPlan): Option[BuildSide] = {
+      if (wantToBuildLeft && wantToBuildRight) {
+        // returns the smaller side base on its estimated physical size, if we want to build the
+        // both sides.
+        Some(getSmallerSide(left, right))
+      } else if (wantToBuildLeft) {
+        Some(BuildLeft)
+      } else if (wantToBuildRight) {
+        Some(BuildRight)
       } else {
-        // for the last default broadcast nested loop join
-        smallerSide
+        None
       }
     }
 
-    private def canBroadcastByHints(
-        joinType: JoinType, left: LogicalPlan, right: LogicalPlan, hint: JoinHint): Boolean = {
-      val buildLeft = canBuildLeft(joinType) && hint.leftHint.exists(_.broadcast)
-      val buildRight = canBuildRight(joinType) && hint.rightHint.exists(_.broadcast)
-      buildLeft || buildRight
+    private def getSmallerSide(left: LogicalPlan, right: LogicalPlan) = {
+      if (right.stats.sizeInBytes <= left.stats.sizeInBytes) BuildRight else BuildLeft
     }
 
-    private def broadcastSideByHints(
-        joinType: JoinType, left: LogicalPlan, right: LogicalPlan, hint: JoinHint): BuildSide = {
-      val buildLeft = canBuildLeft(joinType) && hint.leftHint.exists(_.broadcast)
-      val buildRight = canBuildRight(joinType) && hint.rightHint.exists(_.broadcast)
-      broadcastSide(buildLeft, buildRight, left, right)
+    private def hintToBroadcastLeft(hint: JoinHint): Boolean = {
+      hint.leftHint.exists(_.strategy.contains(BROADCAST))
     }
 
-    private def canBroadcastBySizes(joinType: JoinType, left: LogicalPlan, right: LogicalPlan)
-      : Boolean = {
-      val buildLeft = canBuildLeft(joinType) && canBroadcast(left)
-      val buildRight = canBuildRight(joinType) && canBroadcast(right)
-      buildLeft || buildRight
+    private def hintToBroadcastRight(hint: JoinHint): Boolean = {
+      hint.rightHint.exists(_.strategy.contains(BROADCAST))
     }
 
-    private def broadcastSideBySizes(joinType: JoinType, left: LogicalPlan, right: LogicalPlan)
-      : BuildSide = {
-      val buildLeft = canBuildLeft(joinType) && canBroadcast(left)
-      val buildRight = canBuildRight(joinType) && canBroadcast(right)
-      broadcastSide(buildLeft, buildRight, left, right)
+    private def hintToShuffleHashLeft(hint: JoinHint): Boolean = {
+      hint.leftHint.exists(_.strategy.contains(SHUFFLE_HASH))
+    }
+
+    private def hintToShuffleHashRight(hint: JoinHint): Boolean = {
+      hint.rightHint.exists(_.strategy.contains(SHUFFLE_HASH))
+    }
+
+    private def hintToSortMergeJoin(hint: JoinHint): Boolean = {
+      hint.leftHint.exists(_.strategy.contains(SHUFFLE_MERGE)) ||
+        hint.rightHint.exists(_.strategy.contains(SHUFFLE_MERGE))
+    }
+
+    private def hintToShuffleReplicateNL(hint: JoinHint): Boolean = {
+      hint.leftHint.exists(_.strategy.contains(SHUFFLE_REPLICATE_NL)) ||
+        hint.rightHint.exists(_.strategy.contains(SHUFFLE_REPLICATE_NL))
     }
 
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
 
-      // --- BroadcastHashJoin --------------------------------------------------------------------
-
-      // broadcast hints were specified
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, hint)
-        if canBroadcastByHints(joinType, left, right, hint) =>
-        val buildSide = broadcastSideByHints(joinType, left, right, hint)
-        Seq(joins.BroadcastHashJoinExec(
-          leftKeys, rightKeys, joinType, buildSide, condition, planLater(left), planLater(right)))
-
-      // broadcast hints were not specified, so need to infer it from size and configuration.
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
-        if canBroadcastBySizes(joinType, left, right) =>
-        val buildSide = broadcastSideBySizes(joinType, left, right)
-        Seq(joins.BroadcastHashJoinExec(
-          leftKeys, rightKeys, joinType, buildSide, condition, planLater(left), planLater(right)))
-
-      // --- ShuffledHashJoin ---------------------------------------------------------------------
-
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
-         if !conf.preferSortMergeJoin && canBuildRight(joinType) && canBuildLocalHashMap(right)
-           && muchSmaller(right, left) ||
-           !RowOrdering.isOrderable(leftKeys) =>
-        Seq(joins.ShuffledHashJoinExec(
-          leftKeys, rightKeys, joinType, BuildRight, condition, planLater(left), planLater(right)))
-
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
-         if !conf.preferSortMergeJoin && canBuildLeft(joinType) && canBuildLocalHashMap(left)
-           && muchSmaller(left, right) ||
-           !RowOrdering.isOrderable(leftKeys) =>
-        Seq(joins.ShuffledHashJoinExec(
-          leftKeys, rightKeys, joinType, BuildLeft, condition, planLater(left), planLater(right)))
-
-      // --- SortMergeJoin ------------------------------------------------------------
-
-      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
-        if RowOrdering.isOrderable(leftKeys) =>
-        joins.SortMergeJoinExec(
-          leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right)) :: Nil
-
-      // --- Without joining keys ------------------------------------------------------------
-
-      // Pick BroadcastNestedLoopJoin if one side could be broadcast
-      case j @ logical.Join(left, right, joinType, condition, hint)
-          if canBroadcastByHints(joinType, left, right, hint) =>
-        val buildSide = broadcastSideByHints(joinType, left, right, hint)
-        joins.BroadcastNestedLoopJoinExec(
-          planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
-
-      case j @ logical.Join(left, right, joinType, condition, _)
-          if canBroadcastBySizes(joinType, left, right) =>
-        val buildSide = broadcastSideBySizes(joinType, left, right)
-        joins.BroadcastNestedLoopJoinExec(
-          planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
-
-      // Pick CartesianProduct for InnerJoin
-      case logical.Join(left, right, _: InnerLike, condition, _) =>
-        joins.CartesianProductExec(planLater(left), planLater(right), condition) :: Nil
+      // If it is an equi-join, we first look at the join hints w.r.t. the following order:
+      //   1. broadcast hint: pick broadcast hash join if the join type is supported. If both sides
+      //      have the broadcast hints, choose the smaller side (based on stats) to broadcast.
+      //   2. sort merge hint: pick sort merge join if join keys are sortable.
+      //   3. shuffle hash hint: We pick shuffle hash join if the join type is supported. If both
+      //      sides have the shuffle hash hints, choose the smaller side (based on stats) as the
+      //      build side.
+      //   4. shuffle replicate NL hint: pick cartesian product if join type is inner like.
+      //
+      // If there is no hint or the hints are not applicable, we follow these rules one by one:
+      //   1. Pick broadcast hash join if one side is small enough to broadcast, and the join type
+      //      is supported. If both sides are small, choose the smaller side (based on stats)
+      //      to broadcast.
+      //   2. Pick shuffle hash join if one side is small enough to build local hash map, and is
+      //      much smaller than the other side, and `spark.sql.join.preferSortMergeJoin` is false.
+      //   3. Pick sort merge join if the join keys are sortable.
+      //   4. Pick cartesian product if join type is inner like.
+      //   5. Pick broadcast nested loop join as the final solution. It may OOM but we don't have
+      //      other choice.
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, hint) =>
+        def createBroadcastHashJoin(buildLeft: Boolean, buildRight: Boolean) = {
+          val wantToBuildLeft = canBuildLeft(joinType) && buildLeft
+          val wantToBuildRight = canBuildRight(joinType) && buildRight
+          getBuildSide(wantToBuildLeft, wantToBuildRight, left, right).map { buildSide =>
+            Seq(joins.BroadcastHashJoinExec(
+              leftKeys,
+              rightKeys,
+              joinType,
+              buildSide,
+              condition,
+              planLater(left),
+              planLater(right)))
+          }
+        }
+
+        def createShuffleHashJoin(buildLeft: Boolean, buildRight: Boolean) = {
+          val wantToBuildLeft = canBuildLeft(joinType) && buildLeft
+          val wantToBuildRight = canBuildRight(joinType) && buildRight
+          getBuildSide(wantToBuildLeft, wantToBuildRight, left, right).map { buildSide =>
+            Seq(joins.ShuffledHashJoinExec(
+              leftKeys,
+              rightKeys,
+              joinType,
+              buildSide,
+              condition,
+              planLater(left),
+              planLater(right)))
+          }
+        }
+
+        def createSortMergeJoin() = {
+          if (RowOrdering.isOrderable(leftKeys)) {
+            Some(Seq(joins.SortMergeJoinExec(
+              leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right))))
+          } else {
+            None
+          }
+        }
+
+        def createCartesianProduct() = {
+          if (joinType.isInstanceOf[InnerLike]) {
+            Some(Seq(joins.CartesianProductExec(planLater(left), planLater(right), condition)))
+          } else {
+            None
+          }
+        }
+
+        def createJoinWithoutHint() = {
+          createBroadcastHashJoin(canBroadcast(left), canBroadcast(right))
+            .orElse {
+              if (!conf.preferSortMergeJoin) {
+                createShuffleHashJoin(
+                  canBuildLocalHashMap(left) && muchSmaller(left, right),
+                  canBuildLocalHashMap(right) && muchSmaller(right, left))
+              } else {
+                None
+              }
+            }
+            .orElse(createSortMergeJoin())
+            .orElse(createCartesianProduct())
+            .getOrElse {
+              // This join could be very slow or OOM
+              val buildSide = getSmallerSide(left, right)
+              Seq(joins.BroadcastNestedLoopJoinExec(
+                planLater(left), planLater(right), buildSide, joinType, condition))
+            }
+        }
 
+        createBroadcastHashJoin(hintToBroadcastLeft(hint), hintToBroadcastRight(hint))
+          .orElse { if (hintToSortMergeJoin(hint)) createSortMergeJoin() else None }
+          .orElse(createShuffleHashJoin(hintToShuffleHashLeft(hint), hintToShuffleHashRight(hint)))
+          .orElse { if (hintToShuffleReplicateNL(hint)) createCartesianProduct() else None }
+          .getOrElse(createJoinWithoutHint())
+
+      // If it is not an equi-join, we first look at the join hints w.r.t. the following order:
+      //   1. broadcast hint: pick broadcast nested loop join. If both sides have the broadcast
+      //      hints, choose the smaller side (based on stats) to broadcast.
+      //   2. shuffle replicate NL hint: pick cartesian product if join type is inner like.
+      //
+      // If there is no hint or the hints are not applicable, we follow these rules one by one:
+      //   1. Pick cartesian product if join type is inner like, and both sides are too big to
+      //      to broadcast.
+      //   2. Pick broadcast nested loop join. Pick the smaller side (based on stats) to broadcast.
       case logical.Join(left, right, joinType, condition, hint) =>
-        val buildSide = broadcastSide(
-          hint.leftHint.exists(_.broadcast), hint.rightHint.exists(_.broadcast), left, right)
-        // This join could be very slow or OOM
-        joins.BroadcastNestedLoopJoinExec(
-          planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
+        def createBroadcastNLJoin(buildLeft: Boolean, buildRight: Boolean) = {
+          getBuildSide(buildLeft, buildRight, left, right).map { buildSide =>
+            Seq(joins.BroadcastNestedLoopJoinExec(
+              planLater(left), planLater(right), buildSide, joinType, condition))
+          }
+        }
 
-      // --- Cases where this strategy does not apply ---------------------------------------------
+        def createCartesianProduct() = {
+          if (joinType.isInstanceOf[InnerLike]) {
+            Some(Seq(joins.CartesianProductExec(planLater(left), planLater(right), condition)))
+          } else {
+            None
+          }
+        }
+
+        def createJoinWithoutHint() = {
+          (if (!canBroadcast(left) && !canBroadcast(right)) createCartesianProduct() else None)
+            .getOrElse {
+              // This join could be very slow or OOM
+              val buildSide = getSmallerSide(left, right)
+              Seq(joins.BroadcastNestedLoopJoinExec(
+                planLater(left), planLater(right), buildSide, joinType, condition))
+            }
+        }
+
+        if (joinType.isInstanceOf[InnerLike] || joinType == FullOuter) {
+          createBroadcastNLJoin(hintToBroadcastLeft(hint), hintToBroadcastRight(hint))
+            .orElse { if (hintToShuffleReplicateNL(hint)) createCartesianProduct() else None }
+            .getOrElse(createJoinWithoutHint())
+        } else {
+          val smallerSide = getSmallerSide(left, right)
+          val buildSide = if (canBuildLeft(joinType)) {
+            // For RIGHT JOIN, we may broadcast left side even if the hint asks us to broadcast
+            // the right side. This is for history reasons.
+            if (hintToBroadcastLeft(hint) || canBroadcast(left)) {
+              BuildLeft
+            } else if (hintToBroadcastRight(hint)) {
+              BuildRight
+            } else {
+              smallerSide
+            }
+          } else {
+            // For LEFT JOIN, we may broadcast right side even if the hint asks us to broadcast
+            // the left side. This is for history reasons.
+            if (hintToBroadcastRight(hint) || canBroadcast(right)) {
+              BuildRight
+            } else if (hintToBroadcastLeft(hint)) {
+              BuildLeft
+            } else {
+              smallerSide
+            }
+          }
+          Seq(joins.BroadcastNestedLoopJoinExec(
+            planLater(left), planLater(right), buildSide, joinType, condition))
+        }
 
+
+      // --- Cases where this strategy does not apply ---------------------------------------------
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index bcb57836021e..7ac3ed5a44f0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, ResolvedHint}
+import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, ResolvedHint}
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedFunction}
 import org.apache.spark.sql.internal.SQLConf
@@ -1045,7 +1045,7 @@ object functions {
    */
   def broadcast[T](df: Dataset[T]): Dataset[T] = {
     Dataset[T](df.sparkSession,
-      ResolvedHint(df.logicalPlan, HintInfo(broadcast = true)))(df.exprEnc)
+      ResolvedHint(df.logicalPlan, HintInfo(strategy = Some(BROADCAST))))(df.exprEnc)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 4d63390fb452..92157d8ad49e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.executor.DataReadMethod.DataReadMethod
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.Join
+import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, Join}
 import org.apache.spark.sql.execution.{RDDScanExec, SparkPlan}
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
@@ -951,7 +951,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       case Join(_, _, _, _, hint) => hint
     }
     assert(hint.size == 1)
-    assert(hint(0).leftHint.get.broadcast)
+    assert(hint(0).leftHint.get.strategy.contains(BROADCAST))
     assert(hint(0).rightHint.isEmpty)
 
     // Clean-up
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
index 67f0f1a6fd23..9c2dc0c62b2f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
@@ -17,8 +17,14 @@
 
 package org.apache.spark.sql
 
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.log4j.{AppenderSkeleton, Level}
+import org.apache.log4j.spi.LoggingEvent
+
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
@@ -30,6 +36,41 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
   lazy val df2 = df.selectExpr("id as b1", "id as b2")
   lazy val df3 = df.selectExpr("id as c1", "id as c2")
 
+  class MockAppender extends AppenderSkeleton {
+    val loggingEvents = new ArrayBuffer[LoggingEvent]()
+
+    override def append(loggingEvent: LoggingEvent): Unit = loggingEvents.append(loggingEvent)
+    override def close(): Unit = {}
+    override def requiresLayout(): Boolean = false
+  }
+
+  def msgNoHintRelationFound(relation: String, hint: String): String =
+    s"Count not find relation '$relation' for join strategy hint '$hint'."
+
+  def msgNoJoinForJoinHint(strategy: String): String =
+    s"A join hint (strategy=$strategy) is specified but it is not part of a join relation."
+
+  def msgJoinHintOverridden(strategy: String): String =
+    s"Join hint (strategy=$strategy) is overridden by another hint and will not take effect."
+
+  def verifyJoinHintWithWarnings(
+      df: => DataFrame,
+      expectedHints: Seq[JoinHint],
+      warnings: Seq[String]): Unit = {
+    val logAppender = new MockAppender()
+    withLogAppender(logAppender) {
+      verifyJoinHint(df, expectedHints)
+    }
+    val warningMessages = logAppender.loggingEvents
+      .filter(_.getLevel == Level.WARN)
+      .map(_.getRenderedMessage)
+      .filter(_.contains("hint"))
+    assert(warningMessages.size == warnings.size)
+    warnings.foreach { w =>
+      assert(warningMessages.contains(w))
+    }
+  }
+
   def verifyJoinHint(df: DataFrame, expectedHints: Seq[JoinHint]): Unit = {
     val optimized = df.queryExecution.optimizedPlan
     val joinHints = optimized collect {
@@ -43,14 +84,14 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
     verifyJoinHint(
       df.hint("broadcast").join(df, "id"),
       JoinHint(
-        Some(HintInfo(broadcast = true)),
+        Some(HintInfo(strategy = Some(BROADCAST))),
         None) :: Nil
     )
     verifyJoinHint(
       df.join(df.hint("broadcast"), "id"),
       JoinHint(
         None,
-        Some(HintInfo(broadcast = true))) :: Nil
+        Some(HintInfo(strategy = Some(BROADCAST)))) :: Nil
     )
   }
 
@@ -59,18 +100,18 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
       df1.join(df2.hint("broadcast").join(df3, 'b1 === 'c1).hint("broadcast"), 'a1 === 'c1),
       JoinHint(
         None,
-        Some(HintInfo(broadcast = true))) ::
+        Some(HintInfo(strategy = Some(BROADCAST)))) ::
         JoinHint(
-          Some(HintInfo(broadcast = true)),
+          Some(HintInfo(strategy = Some(BROADCAST))),
           None) :: Nil
     )
     verifyJoinHint(
       df1.hint("broadcast").join(df2, 'a1 === 'b1).hint("broadcast").join(df3, 'a1 === 'c1),
       JoinHint(
-        Some(HintInfo(broadcast = true)),
+        Some(HintInfo(strategy = Some(BROADCAST))),
         None) ::
         JoinHint(
-          Some(HintInfo(broadcast = true)),
+          Some(HintInfo(strategy = Some(BROADCAST))),
           None) :: Nil
     )
   }
@@ -89,13 +130,13 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
             |) b on a.a1 = b.b1
           """.stripMargin),
         JoinHint(
-          Some(HintInfo(broadcast = true)),
-          Some(HintInfo(broadcast = true))) ::
+          Some(HintInfo(strategy = Some(BROADCAST))),
+          Some(HintInfo(strategy = Some(BROADCAST)))) ::
           JoinHint(
             None,
-            Some(HintInfo(broadcast = true))) ::
+            Some(HintInfo(strategy = Some(BROADCAST)))) ::
           JoinHint(
-            Some(HintInfo(broadcast = true)),
+            Some(HintInfo(strategy = Some(BROADCAST))),
             None) :: Nil
       )
     }
@@ -112,9 +153,9 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
             "where a.a1 = b.b1 and b.b1 = c.c1"),
           JoinHint(
             None,
-            Some(HintInfo(broadcast = true))) ::
+            Some(HintInfo(strategy = Some(BROADCAST)))) ::
             JoinHint(
-              Some(HintInfo(broadcast = true)),
+              Some(HintInfo(strategy = Some(BROADCAST))),
               None) :: Nil
         )
         verifyJoinHint(
@@ -122,25 +163,25 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
             "where a.a1 = b.b1 and b.b1 = c.c1"),
           JoinHint.NONE ::
             JoinHint(
-              Some(HintInfo(broadcast = true)),
-              Some(HintInfo(broadcast = true))) :: Nil
+              Some(HintInfo(strategy = Some(BROADCAST))),
+              Some(HintInfo(strategy = Some(BROADCAST)))) :: Nil
         )
         verifyJoinHint(
           sql("select /*+ broadcast(b, c)*/ * from a, c, b " +
             "where a.a1 = b.b1 and b.b1 = c.c1"),
           JoinHint(
             None,
-            Some(HintInfo(broadcast = true))) ::
+            Some(HintInfo(strategy = Some(BROADCAST)))) ::
             JoinHint(
               None,
-              Some(HintInfo(broadcast = true))) :: Nil
+              Some(HintInfo(strategy = Some(BROADCAST)))) :: Nil
         )
 
         verifyJoinHint(
           df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
             .join(df3, 'b1 === 'c1 && 'a1 < 10),
           JoinHint(
-            Some(HintInfo(broadcast = true)),
+            Some(HintInfo(strategy = Some(BROADCAST))),
             None) ::
             JoinHint.NONE :: Nil
         )
@@ -151,7 +192,7 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
             .join(df, 'b1 === 'id),
           JoinHint.NONE ::
             JoinHint(
-              Some(HintInfo(broadcast = true)),
+              Some(HintInfo(strategy = Some(BROADCAST))),
               None) ::
             JoinHint.NONE :: Nil
         )
@@ -164,7 +205,7 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
     verifyJoinHint(
       df.hint("broadcast").except(dfSub).join(df, "id"),
       JoinHint(
-        Some(HintInfo(broadcast = true)),
+        Some(HintInfo(strategy = Some(BROADCAST))),
         None) ::
         JoinHint.NONE :: Nil
     )
@@ -172,31 +213,112 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
       df.join(df.hint("broadcast").intersect(dfSub), "id"),
       JoinHint(
         None,
-        Some(HintInfo(broadcast = true))) ::
+        Some(HintInfo(strategy = Some(BROADCAST)))) ::
         JoinHint.NONE :: Nil
     )
   }
 
   test("hint merge") {
-    verifyJoinHint(
+    verifyJoinHintWithWarnings(
       df.hint("broadcast").filter('id > 2).hint("broadcast").join(df, "id"),
       JoinHint(
-        Some(HintInfo(broadcast = true)),
-        None) :: Nil
+        Some(HintInfo(strategy = Some(BROADCAST))),
+        None) :: Nil,
+      Nil
     )
-    verifyJoinHint(
+    verifyJoinHintWithWarnings(
       df.join(df.hint("broadcast").limit(2).hint("broadcast"), "id"),
       JoinHint(
         None,
-        Some(HintInfo(broadcast = true))) :: Nil
+        Some(HintInfo(strategy = Some(BROADCAST)))) :: Nil,
+      Nil
+    )
+    verifyJoinHintWithWarnings(
+      df.hint("merge").filter('id > 2).hint("shuffle_hash").join(df, "id").hint("broadcast"),
+      JoinHint(
+        Some(HintInfo(strategy = Some(SHUFFLE_HASH))),
+        None) :: Nil,
+      msgJoinHintOverridden("merge") ::
+        msgNoJoinForJoinHint("broadcast") :: Nil
+    )
+    verifyJoinHintWithWarnings(
+      df.join(df.hint("broadcast").limit(2).hint("merge"), "id")
+        .hint("shuffle_hash")
+        .hint("shuffle_replicate_nl")
+        .join(df, "id"),
+      JoinHint(
+        Some(HintInfo(strategy = Some(SHUFFLE_REPLICATE_NL))),
+        None) ::
+        JoinHint(
+          None,
+          Some(HintInfo(strategy = Some(SHUFFLE_MERGE)))) :: Nil,
+      msgJoinHintOverridden("broadcast") ::
+        msgJoinHintOverridden("shuffle_hash") :: Nil
     )
   }
 
+  test("hint merge - SQL") {
+    withTempView("a", "b", "c") {
+      df1.createOrReplaceTempView("a")
+      df2.createOrReplaceTempView("b")
+      df3.createOrReplaceTempView("c")
+      verifyJoinHintWithWarnings(
+        sql("select /*+ shuffle_hash merge(a, c) broadcast(a, b)*/ * from a, b, c " +
+          "where a.a1 = b.b1 and b.b1 = c.c1"),
+        JoinHint(
+          None,
+          Some(HintInfo(strategy = Some(SHUFFLE_MERGE)))) ::
+          JoinHint(
+            Some(HintInfo(strategy = Some(SHUFFLE_MERGE))),
+            Some(HintInfo(strategy = Some(BROADCAST)))) :: Nil,
+        msgNoJoinForJoinHint("shuffle_hash") ::
+          msgJoinHintOverridden("broadcast") :: Nil
+      )
+      verifyJoinHintWithWarnings(
+        sql("select /*+ shuffle_hash(a, b) merge(b, d) broadcast(b)*/ * from a, b, c " +
+          "where a.a1 = b.b1 and b.b1 = c.c1"),
+        JoinHint.NONE ::
+          JoinHint(
+            Some(HintInfo(strategy = Some(SHUFFLE_HASH))),
+            Some(HintInfo(strategy = Some(SHUFFLE_HASH)))) :: Nil,
+        msgNoHintRelationFound("d", "merge(b, d)") ::
+          msgJoinHintOverridden("broadcast") ::
+          msgJoinHintOverridden("merge") :: Nil
+      )
+      verifyJoinHintWithWarnings(
+        sql(
+          """
+            |select /*+ broadcast(a, c) merge(a, d)*/ * from a
+            |join (
+            |  select /*+ shuffle_hash(c) shuffle_replicate_nl(b, c)*/ * from b
+            |  join c on b.b1 = c.c1
+            |) as d
+            |on a.a2 = d.b2
+          """.stripMargin),
+        JoinHint(
+          Some(HintInfo(strategy = Some(BROADCAST))),
+          Some(HintInfo(strategy = Some(SHUFFLE_MERGE)))) ::
+          JoinHint(
+            Some(HintInfo(strategy = Some(SHUFFLE_REPLICATE_NL))),
+            Some(HintInfo(strategy = Some(SHUFFLE_HASH)))) :: Nil,
+        msgNoHintRelationFound("c", "broadcast(a, c)") ::
+          msgJoinHintOverridden("merge") ::
+          msgJoinHintOverridden("shuffle_replicate_nl") :: Nil
+      )
+    }
+  }
+
   test("nested hint") {
     verifyJoinHint(
       df.hint("broadcast").hint("broadcast").filter('id > 2).join(df, "id"),
       JoinHint(
-        Some(HintInfo(broadcast = true)),
+        Some(HintInfo(strategy = Some(BROADCAST))),
+        None) :: Nil
+    )
+    verifyJoinHint(
+      df.hint("shuffle_hash").hint("broadcast").hint("merge").filter('id > 2).join(df, "id"),
+      JoinHint(
+        Some(HintInfo(strategy = Some(SHUFFLE_MERGE))),
         None) :: Nil
     )
   }
@@ -209,12 +331,230 @@ class JoinHintSuite extends PlanTest with SharedSQLContext {
         join.join(broadcasted, "id").join(broadcasted, "id"),
         JoinHint(
           None,
-          Some(HintInfo(broadcast = true))) ::
+          Some(HintInfo(strategy = Some(BROADCAST)))) ::
           JoinHint(
             None,
-            Some(HintInfo(broadcast = true))) ::
+            Some(HintInfo(strategy = Some(BROADCAST)))) ::
           JoinHint.NONE :: JoinHint.NONE :: JoinHint.NONE :: Nil
       )
     }
   }
+
+  def equiJoinQueryWithHint(hints: Seq[String], joinType: String = "INNER"): String =
+    hints.map("/*+ " + _ + " */").mkString(
+      "SELECT ", " ", s" * FROM t1 $joinType JOIN t2 ON t1.key = t2.key")
+
+  def nonEquiJoinQueryWithHint(hints: Seq[String], joinType: String = "INNER"): String =
+    hints.map("/*+ " + _ + " */").mkString(
+      "SELECT ", " ", s" * FROM t1 $joinType JOIN t2 ON t1.key > t2.key")
+
+  private def assertBroadcastHashJoin(df: DataFrame, buildSide: BuildSide): Unit = {
+    val executedPlan = df.queryExecution.executedPlan
+    val broadcastHashJoins = executedPlan.collect {
+      case b: BroadcastHashJoinExec => b
+    }
+    assert(broadcastHashJoins.size == 1)
+    assert(broadcastHashJoins.head.buildSide == buildSide)
+  }
+
+  private def assertBroadcastNLJoin(df: DataFrame, buildSide: BuildSide): Unit = {
+    val executedPlan = df.queryExecution.executedPlan
+    val broadcastNLJoins = executedPlan.collect {
+      case b: BroadcastNestedLoopJoinExec => b
+    }
+    assert(broadcastNLJoins.size == 1)
+    assert(broadcastNLJoins.head.buildSide == buildSide)
+  }
+
+  private def assertShuffleHashJoin(df: DataFrame, buildSide: BuildSide): Unit = {
+    val executedPlan = df.queryExecution.executedPlan
+    val shuffleHashJoins = executedPlan.collect {
+      case s: ShuffledHashJoinExec => s
+    }
+    assert(shuffleHashJoins.size == 1)
+    assert(shuffleHashJoins.head.buildSide == buildSide)
+  }
+
+  private def assertShuffleMergeJoin(df: DataFrame): Unit = {
+    val executedPlan = df.queryExecution.executedPlan
+    val shuffleMergeJoins = executedPlan.collect {
+      case s: SortMergeJoinExec => s
+    }
+    assert(shuffleMergeJoins.size == 1)
+  }
+
+  private def assertShuffleReplicateNLJoin(df: DataFrame): Unit = {
+    val executedPlan = df.queryExecution.executedPlan
+    val shuffleReplicateNLJoins = executedPlan.collect {
+      case c: CartesianProductExec => c
+    }
+    assert(shuffleReplicateNLJoins.size == 1)
+  }
+
+  test("join strategy hint - broadcast") {
+    withTempView("t1", "t2") {
+      Seq((1, "4"), (2, "2")).toDF("key", "value").createTempView("t1")
+      Seq((1, "1"), (2, "12.3"), (2, "123")).toDF("key", "value").createTempView("t2")
+
+      val t1Size = spark.table("t1").queryExecution.analyzed.children.head.stats.sizeInBytes
+      val t2Size = spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
+      assert(t1Size < t2Size)
+
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+        // Broadcast hint specified on one side
+        assertBroadcastHashJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t1)" :: Nil)), BuildLeft)
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("BROADCAST(t2)" :: Nil)), BuildRight)
+
+        // Determine build side based on the join type and child relation sizes
+        assertBroadcastHashJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t1, t2)" :: Nil)), BuildLeft)
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("BROADCAST(t1, t2)" :: Nil, "left")), BuildRight)
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("BROADCAST(t1, t2)" :: Nil, "right")), BuildLeft)
+
+        // Use broadcast-hash join if hinted "broadcast" and equi-join
+        assertBroadcastHashJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t2)" :: "SHUFFLE_HASH(t1)" :: Nil)), BuildRight)
+        assertBroadcastHashJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t1)" :: "MERGE(t1, t2)" :: Nil)), BuildLeft)
+        assertBroadcastHashJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t1)" :: "SHUFFLE_REPLICATE_NL(t2)" :: Nil)),
+          BuildLeft)
+
+        // Use broadcast-nl join if hinted "broadcast" and non-equi-join
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("SHUFFLE_HASH(t2)" :: "BROADCAST(t1)" :: Nil)), BuildLeft)
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("MERGE(t1)" :: "BROADCAST(t2)" :: Nil)), BuildRight)
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("SHUFFLE_REPLICATE_NL(t1)" :: "BROADCAST(t2)" :: Nil)),
+          BuildRight)
+
+        // Broadcast hint specified but not doable
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t1)" :: Nil, "left")))
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t2)" :: Nil, "right")))
+      }
+    }
+  }
+
+  test("join strategy hint - shuffle-merge") {
+    withTempView("t1", "t2") {
+      Seq((1, "4"), (2, "2")).toDF("key", "value").createTempView("t1")
+      Seq((1, "1"), (2, "12.3"), (2, "123")).toDF("key", "value").createTempView("t2")
+
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> Int.MaxValue.toString) {
+        // Shuffle-merge hint specified on one side
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_MERGE(t1)" :: Nil)))
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("MERGEJOIN(t2)" :: Nil)))
+
+        // Shuffle-merge hint specified on both sides
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("MERGE(t1, t2)" :: Nil)))
+
+        // Shuffle-merge hint prioritized over shuffle-hash hint and shuffle-replicate-nl hint
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_REPLICATE_NL(t2)" :: "MERGE(t1)" :: Nil, "left")))
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("MERGE(t2)" :: "SHUFFLE_HASH(t1)" :: Nil, "right")))
+
+        // Broadcast hint prioritized over shuffle-merge hint, but broadcast hint is not applicable
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t1)" :: "MERGE(t2)" :: Nil, "left")))
+        assertShuffleMergeJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t2)" :: "MERGE(t1)" :: Nil, "right")))
+
+        // Shuffle-merge hint specified but not doable
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("MERGE(t1, t2)" :: Nil, "left")), BuildRight)
+      }
+    }
+  }
+
+  test("join strategy hint - shuffle-hash") {
+    withTempView("t1", "t2") {
+      Seq((1, "4"), (2, "2")).toDF("key", "value").createTempView("t1")
+      Seq((1, "1"), (2, "12.3"), (2, "123")).toDF("key", "value").createTempView("t2")
+
+      val t1Size = spark.table("t1").queryExecution.analyzed.children.head.stats.sizeInBytes
+      val t2Size = spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
+      assert(t1Size < t2Size)
+
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> Int.MaxValue.toString) {
+        // Shuffle-hash hint specified on one side
+        assertShuffleHashJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_HASH(t1)" :: Nil)), BuildLeft)
+        assertShuffleHashJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_HASH(t2)" :: Nil)), BuildRight)
+
+        // Determine build side based on the join type and child relation sizes
+        assertShuffleHashJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_HASH(t1, t2)" :: Nil)), BuildLeft)
+        assertShuffleHashJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_HASH(t1, t2)" :: Nil, "left")), BuildRight)
+        assertShuffleHashJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_HASH(t1, t2)" :: Nil, "right")), BuildLeft)
+
+        // Shuffle-hash hint prioritized over shuffle-replicate-nl hint
+        assertShuffleHashJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_REPLICATE_NL(t2)" :: "SHUFFLE_HASH(t1)" :: Nil)),
+          BuildLeft)
+
+        // Broadcast hint prioritized over shuffle-hash hint, but broadcast hint is not applicable
+        assertShuffleHashJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t1)" :: "SHUFFLE_HASH(t2)" :: Nil, "left")),
+          BuildRight)
+        assertShuffleHashJoin(
+          sql(equiJoinQueryWithHint("BROADCAST(t2)" :: "SHUFFLE_HASH(t1)" :: Nil, "right")),
+          BuildLeft)
+
+        // Shuffle-hash hint specified but not doable
+        assertBroadcastHashJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_HASH(t1)" :: Nil, "left")), BuildRight)
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("SHUFFLE_HASH(t1)" :: Nil)), BuildLeft)
+      }
+    }
+  }
+
+  test("join strategy hint - shuffle-replicate-nl") {
+    withTempView("t1", "t2") {
+      Seq((1, "4"), (2, "2")).toDF("key", "value").createTempView("t1")
+      Seq((1, "1"), (2, "12.3"), (2, "123")).toDF("key", "value").createTempView("t2")
+
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> Int.MaxValue.toString) {
+        // Shuffle-replicate-nl hint specified on one side
+        assertShuffleReplicateNLJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_REPLICATE_NL(t1)" :: Nil)))
+        assertShuffleReplicateNLJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_REPLICATE_NL(t2)" :: Nil)))
+
+        // Shuffle-replicate-nl hint specified on both sides
+        assertShuffleReplicateNLJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_REPLICATE_NL(t1, t2)" :: Nil)))
+
+        // Shuffle-merge hint prioritized over shuffle-replicate-nl hint, but shuffle-merge hint
+        // is not applicable
+        assertShuffleReplicateNLJoin(
+          sql(nonEquiJoinQueryWithHint("MERGE(t1)" :: "SHUFFLE_REPLICATE_NL(t2)" :: Nil)))
+
+        // Shuffle-hash hint prioritized over shuffle-replicate-nl hint, but shuffle-hash hint is
+        // not applicable
+        assertShuffleReplicateNLJoin(
+          sql(nonEquiJoinQueryWithHint("SHUFFLE_HASH(t2)" :: "SHUFFLE_REPLICATE_NL(t1)" :: Nil)))
+
+        // Shuffle-replicate-nl hint specified but not doable
+        assertBroadcastHashJoin(
+          sql(equiJoinQueryWithHint("SHUFFLE_REPLICATE_NL(t1, t2)" :: Nil, "left")), BuildRight)
+        assertBroadcastNLJoin(
+          sql(nonEquiJoinQueryWithHint("SHUFFLE_REPLICATE_NL(t1, t2)" :: Nil, "right")), BuildLeft)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index f238148e61c3..05c583c80e50 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -22,6 +22,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.AccumulatorSuite
 import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{BitwiseAnd, BitwiseOr, Cast, Literal, ShiftLeft}
+import org.apache.spark.sql.catalyst.plans.logical.BROADCAST
 import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
@@ -216,10 +217,10 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
       val plan3 = sql(s"SELECT /*+ $name(v) */ * FROM t JOIN u ON t.id = u.id").queryExecution
         .optimizedPlan
 
-      assert(plan1.asInstanceOf[Join].hint.leftHint.get.broadcast)
+      assert(plan1.asInstanceOf[Join].hint.leftHint.get.strategy.contains(BROADCAST))
       assert(plan1.asInstanceOf[Join].hint.rightHint.isEmpty)
       assert(plan2.asInstanceOf[Join].hint.leftHint.isEmpty)
-      assert(plan2.asInstanceOf[Join].hint.rightHint.get.broadcast)
+      assert(plan2.asInstanceOf[Join].hint.rightHint.get.strategy.contains(BROADCAST))
       assert(plan3.asInstanceOf[Join].hint.leftHint.isEmpty)
       assert(plan3.asInstanceOf[Join].hint.rightHint.isEmpty)
     }

From 4ec7f631aa23bc0121656dd77f05767f447112c0 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Thu, 11 Apr 2019 13:43:44 -0500
Subject: [PATCH 0892/1072] [SPARK-27404][CORE][SQL][STREAMING][YARN] Fix build
 warnings for 3.0: postfixOps edition

## What changes were proposed in this pull request?

Fix build warnings -- see some details below.

But mostly, remove use of postfix syntax where it causes warnings without the `scala.language.postfixOps` import. This is mostly in expressions like "120000 milliseconds". Which, I'd like to simplify to things like "2.minutes" anyway.

## How was this patch tested?

Existing tests.

Closes #24314 from srowen/SPARK-27404.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../shuffle/RetryingBlockFetcherSuite.java    |  8 +--
 .../org/apache/spark/BarrierTaskContext.scala |  3 +-
 .../spark/deploy/FaultToleranceTest.scala     | 17 +++---
 .../org/apache/spark/rpc/RpcTimeout.scala     |  2 +-
 .../apache/spark/scheduler/DAGScheduler.scala |  3 +-
 .../scala/org/apache/spark/ui/UIUtils.scala   | 19 +++---
 .../spark/BarrierStageOnSubmittedSuite.scala  |  3 +-
 .../apache/spark/ContextCleanerSuite.scala    | 16 ++---
 .../scala/org/apache/spark/DriverSuite.scala  |  4 +-
 .../apache/spark/JobCancellationSuite.scala   |  4 +-
 .../org/apache/spark/SparkConfSuite.scala     |  5 +-
 .../org/apache/spark/StatusTrackerSuite.scala | 29 +++++-----
 .../spark/deploy/SparkSubmitSuite.scala       |  2 +-
 .../StandaloneDynamicAllocationSuite.scala    |  2 +-
 .../spark/deploy/client/AppClientSuite.scala  |  2 +-
 .../history/FsHistoryProviderSuite.scala      | 11 ++--
 .../deploy/history/HistoryServerSuite.scala   | 20 +------
 .../spark/deploy/master/MasterSuite.scala     | 11 ++--
 .../spark/deploy/worker/WorkerSuite.scala     |  6 +-
 .../apache/spark/executor/ExecutorSuite.scala |  1 -
 .../spark/launcher/LauncherBackendSuite.scala |  5 +-
 .../spark/rdd/AsyncRDDActionsSuite.scala      |  4 +-
 .../spark/rdd/LocalCheckpointSuite.scala      |  3 +-
 .../org/apache/spark/rpc/RpcEnvSuite.scala    | 55 +++++++++---------
 .../CoarseGrainedSchedulerBackendSuite.scala  |  2 +-
 .../spark/scheduler/DAGSchedulerSuite.scala   |  2 +-
 .../OutputCommitCoordinatorSuite.scala        |  3 +-
 .../scheduler/SchedulerIntegrationSuite.scala |  2 +-
 .../scheduler/TaskResultGetterSuite.scala     |  7 +--
 .../BlockManagerReplicationSuite.scala        | 15 +++--
 .../spark/storage/BlockManagerSuite.scala     | 22 +++----
 .../org/apache/spark/ui/UISeleniumSuite.scala | 54 ++++++++---------
 .../scala/org/apache/spark/ui/UISuite.scala   |  4 +-
 .../apache/spark/util/EventLoopSuite.scala    | 19 +++---
 .../ExternalAppendOnlyMapSuite.scala          |  3 +-
 .../sql/jdbc/DockerJDBCIntegrationSuite.scala |  2 +-
 .../KafkaDontFailOnDataLossSuite.scala        |  2 +-
 .../spark/sql/kafka010/KafkaTestUtils.scala   |  9 ++-
 .../kafka010/DirectKafkaStreamSuite.scala     | 21 ++++---
 .../kinesis/KinesisCheckpointerSuite.scala    | 11 ++--
 .../kinesis/KinesisReceiverSuite.scala        |  6 +-
 .../kinesis/KinesisStreamSuite.scala          | 42 ++++++++------
 .../org/apache/spark/ml/MLEventsSuite.scala   | 41 +++++++------
 .../deploy/yarn/BaseYarnClusterSuite.scala    |  4 +-
 .../spark/deploy/yarn/YarnClusterSuite.scala  |  8 +--
 .../yarn/YarnShuffleServiceSuite.scala        |  5 +-
 .../sql/sources/v2/JavaSimpleBatchTable.java  | 54 -----------------
 .../sources/v2/JavaSimpleReaderFactory.java   | 50 ++++++++++++++++
 .../sql/sources/v2/JavaSimpleScanBuilder.java | 47 +++++++++++++++
 .../apache/spark/sql/CachedTableSuite.scala   | 11 ++--
 .../apache/spark/sql/DatasetCacheSuite.scala  |  4 +-
 .../sources/TextSocketStreamSuite.scala       |  2 +-
 .../state/StateStoreCoordinatorSuite.scala    |  8 +--
 .../streaming/state/StateStoreSuite.scala     |  2 +-
 .../StreamingQueryManagerSuite.scala          | 58 +++++++++----------
 ...StreamingQueryStatusAndProgressSuite.scala |  3 +-
 .../hive/thriftserver/UISeleniumSuite.scala   |  4 +-
 .../receiver/ReceivedBlockHandler.scala       |  2 +-
 .../util/FileBasedWriteAheadLog.scala         |  5 +-
 .../spark/streaming/CheckpointSuite.scala     |  4 +-
 .../streaming/ReceivedBlockHandlerSuite.scala |  3 +-
 .../streaming/ReceivedBlockTrackerSuite.scala |  4 +-
 .../spark/streaming/ReceiverSuite.scala       | 22 +++----
 .../streaming/StreamingContextSuite.scala     | 52 ++++++++---------
 .../streaming/StreamingListenerSuite.scala    |  4 +-
 .../spark/streaming/UISeleniumSuite.scala     |  8 +--
 .../receiver/BlockGeneratorSuite.scala        | 16 ++---
 .../ExecutorAllocationManagerSuite.scala      |  2 +-
 .../scheduler/JobGeneratorSuite.scala         |  5 +-
 .../scheduler/ReceiverTrackerSuite.scala      |  8 +--
 .../streaming/util/WriteAheadLogSuite.scala   | 20 +++----
 71 files changed, 458 insertions(+), 459 deletions(-)
 create mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java
 create mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java

diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
index a530e16734db..6f90df5f611a 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
@@ -46,9 +46,9 @@
  */
 public class RetryingBlockFetcherSuite {
 
-  ManagedBuffer block0 = new NioManagedBuffer(ByteBuffer.wrap(new byte[13]));
-  ManagedBuffer block1 = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
-  ManagedBuffer block2 = new NioManagedBuffer(ByteBuffer.wrap(new byte[19]));
+  private final ManagedBuffer block0 = new NioManagedBuffer(ByteBuffer.wrap(new byte[13]));
+  private final ManagedBuffer block1 = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
+  private final ManagedBuffer block2 = new NioManagedBuffer(ByteBuffer.wrap(new byte[19]));
 
   @Test
   public void testNoFailures() throws IOException, InterruptedException {
@@ -291,7 +291,7 @@ private static void performInteractions(List<? extends Map<String, Object>> inte
     }
 
     assertNotNull(stub);
-    stub.when(fetchStarter).createAndStart(any(), anyObject());
+    stub.when(fetchStarter).createAndStart(any(), any());
     String[] blockIdArray = blockIds.toArray(new String[blockIds.size()]);
     new RetryingBlockFetcher(conf, fetchStarter, blockIdArray, listener).start();
   }
diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index 2d842b98ead4..a354f44a1be1 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -20,7 +20,6 @@ package org.apache.spark
 import java.util.{Properties, Timer, TimerTask}
 
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.executor.TaskMetrics
@@ -122,7 +121,7 @@ class BarrierTaskContext private[spark] (
           barrierEpoch),
         // Set a fixed timeout for RPC here, so users shall get a SparkException thrown by
         // BarrierCoordinator on timeout, instead of RPCTimeoutException from the RPC framework.
-        timeout = new RpcTimeout(31536000 /* = 3600 * 24 * 365 */ seconds, "barrierTimeout"))
+        timeout = new RpcTimeout(365.days, "barrierTimeout"))
       barrierEpoch += 1
       logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt $stageAttemptNumber) finished " +
         "global sync successfully, waited for " +
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index a66243012041..99f841234005 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -26,7 +26,6 @@ import scala.collection.mutable.ListBuffer
 import scala.concurrent.{Future, Promise}
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.sys.process._
 
 import org.json4s._
@@ -112,7 +111,7 @@ private object FaultToleranceTest extends App with Logging {
     assertValidClusterState()
 
     killLeader()
-    delay(30 seconds)
+    delay(30.seconds)
     assertValidClusterState()
     createClient()
     assertValidClusterState()
@@ -126,12 +125,12 @@ private object FaultToleranceTest extends App with Logging {
 
     killLeader()
     addMasters(1)
-    delay(30 seconds)
+    delay(30.seconds)
     assertValidClusterState()
 
     killLeader()
     addMasters(1)
-    delay(30 seconds)
+    delay(30.seconds)
     assertValidClusterState()
   }
 
@@ -156,7 +155,7 @@ private object FaultToleranceTest extends App with Logging {
     killLeader()
     workers.foreach(_.kill())
     workers.clear()
-    delay(30 seconds)
+    delay(30.seconds)
     addWorkers(2)
     assertValidClusterState()
   }
@@ -174,7 +173,7 @@ private object FaultToleranceTest extends App with Logging {
 
     (1 to 3).foreach { _ =>
       killLeader()
-      delay(30 seconds)
+      delay(30.seconds)
       assertValidClusterState()
       assertTrue(getLeader == masters.head)
       addMasters(1)
@@ -264,7 +263,7 @@ private object FaultToleranceTest extends App with Logging {
     }
 
     // Avoid waiting indefinitely (e.g., we could register but get no executors).
-    assertTrue(ThreadUtils.awaitResult(f, 120 seconds))
+    assertTrue(ThreadUtils.awaitResult(f, 2.minutes))
   }
 
   /**
@@ -317,7 +316,7 @@ private object FaultToleranceTest extends App with Logging {
     }
 
     try {
-      assertTrue(ThreadUtils.awaitResult(f, 120 seconds))
+      assertTrue(ThreadUtils.awaitResult(f, 2.minutes))
     } catch {
       case e: TimeoutException =>
         logError("Master states: " + masters.map(_.state))
@@ -421,7 +420,7 @@ private object SparkDocker {
     }
 
     dockerCmd.run(ProcessLogger(findIpAndLog _))
-    val ip = ThreadUtils.awaitResult(ipPromise.future, 30 seconds)
+    val ip = ThreadUtils.awaitResult(ipPromise.future, 30.seconds)
     val dockerId = Docker.getLastProcessId
     (ip, dockerId, outFile)
   }
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala b/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala
index 3dc41f7f1279..770ae2f1dd22 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala
@@ -52,7 +52,7 @@ private[spark] class RpcTimeout(val duration: FiniteDuration, val timeoutProp: S
    *
    * @note This can be used in the recover callback of a Future to add to a TimeoutException
    * Example:
-   *    val timeout = new RpcTimeout(5 millis, "short timeout")
+   *    val timeout = new RpcTimeout(5.milliseconds, "short timeout")
    *    Future(throw new TimeoutException).recover(timeout.addMessageIfTimeout)
    */
   def addMessageIfTimeout[T]: PartialFunction[Throwable, T] = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index d967d38c5263..524b0c4f6c3a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -27,7 +27,6 @@ import scala.collection.Map
 import scala.collection.mutable.{ArrayStack, HashMap, HashSet}
 import scala.concurrent.duration._
 import scala.language.existentials
-import scala.language.postfixOps
 import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.SerializationUtils
@@ -270,7 +269,7 @@ private[spark] class DAGScheduler(
     listenerBus.post(SparkListenerExecutorMetricsUpdate(execId, accumUpdates,
       Some(executorUpdates)))
     blockManagerMaster.driverEndpoint.askSync[Boolean](
-      BlockManagerHeartbeat(blockManagerId), new RpcTimeout(600 seconds, "BlockManagerHeartbeat"))
+      BlockManagerHeartbeat(blockManagerId), new RpcTimeout(10.minutes, "BlockManagerHeartbeat"))
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index d7bda8b80c24..11647c0c7c62 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -109,12 +109,12 @@ private[spark] object UIUtils extends Logging {
         }
       }
       // if time is more than a year
-      return s"$yearString $weekString $dayString"
+      s"$yearString $weekString $dayString"
     } catch {
       case e: Exception =>
         logError("Error converting time to string", e)
         // if there is some error, return blank string
-        return ""
+        ""
     }
   }
 
@@ -336,7 +336,7 @@ private[spark] object UIUtils extends Logging {
     def getHeaderContent(header: String): Seq[Node] = {
       if (newlinesInHeader) {
         <ul class="unstyled">
-          { header.split("\n").map { case t => <li> {t} </li> } }
+          { header.split("\n").map(t => <li> {t} </li>) }
         </ul>
       } else {
         Text(header)
@@ -446,7 +446,7 @@ private[spark] object UIUtils extends Logging {
    * the whole string will rendered as a simple escaped text.
    *
    * Note: In terms of security, only anchor tags with root relative links are supported. So any
-   * attempts to embed links outside Spark UI, or other tags like {@code <script>} will cause in
+   * attempts to embed links outside Spark UI, or other tags like &lt;script&gt; will cause in
    * the whole description to be treated as plain text.
    *
    * @param desc        the original job or stage description string, which may contain html tags.
@@ -458,7 +458,6 @@ private[spark] object UIUtils extends Logging {
    *         is true, and an Elem otherwise.
    */
   def makeDescription(desc: String, basePathUri: String, plainText: Boolean = false): NodeSeq = {
-    import scala.language.postfixOps
 
     // If the description can be parsed as HTML and has only relative links, then render
     // as HTML, otherwise render as escaped string
@@ -468,9 +467,7 @@ private[spark] object UIUtils extends Logging {
 
       // Verify that this has only anchors and span (we are wrapping in span)
       val allowedNodeLabels = Set("a", "span", "br")
-      val illegalNodes = xml \\ "_"  filterNot { case node: Node =>
-        allowedNodeLabels.contains(node.label)
-      }
+      val illegalNodes = (xml \\ "_").filterNot(node => allowedNodeLabels.contains(node.label))
       if (illegalNodes.nonEmpty) {
         throw new IllegalArgumentException(
           "Only HTML anchors allowed in job descriptions\n" +
@@ -491,8 +488,8 @@ private[spark] object UIUtils extends Logging {
           new RewriteRule() {
             override def transform(n: Node): Seq[Node] = {
               n match {
-                case e: Elem if e.child isEmpty => Text(e.text)
-                case e: Elem if e.child nonEmpty => Text(e.child.flatMap(transform).text)
+                case e: Elem if e.child.isEmpty => Text(e.text)
+                case e: Elem => Text(e.child.flatMap(transform).text)
                 case _ => n
               }
             }
@@ -503,7 +500,7 @@ private[spark] object UIUtils extends Logging {
           new RewriteRule() {
             override def transform(n: Node): Seq[Node] = {
               n match {
-                case e: Elem if e \ "@href" nonEmpty =>
+                case e: Elem if (e \ "@href").nonEmpty =>
                   val relativePath = e.attribute("href").get.toString
                   val fullUri = s"${basePathUri.stripSuffix("/")}/${relativePath.stripPrefix("/")}"
                   e % Attribute(null, "href", fullUri, Null)
diff --git a/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala b/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
index ca839d148051..435b927068e6 100644
--- a/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark
 
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.apache.spark.internal.config._
 import org.apache.spark.rdd.{PartitionPruningRDD, RDD}
@@ -52,7 +51,7 @@ class BarrierStageOnSubmittedSuite extends SparkFunSuite with LocalSparkContext
     )
 
     val error = intercept[SparkException] {
-      ThreadUtils.awaitResult(futureAction, 5 seconds)
+      ThreadUtils.awaitResult(futureAction, 5.seconds)
     }.getCause.getMessage
     assert(error.contains(message))
   }
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index d4bf20ca8107..c16e227edbfa 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -43,7 +43,7 @@ import org.apache.spark.storage._
 abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[SortShuffleManager])
   extends SparkFunSuite with BeforeAndAfter with LocalSparkContext
 {
-  implicit val defaultTimeout = timeout(10000 millis)
+  implicit val defaultTimeout = timeout(10.seconds)
   val conf = new SparkConf()
     .setMaster("local[2]")
     .setAppName("ContextCleanerSuite")
@@ -159,7 +159,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     val preGCTester = new CleanerTester(sc, rddIds = Seq(rdd.id))
     runGC()
     intercept[Exception] {
-      preGCTester.assertCleanup()(timeout(1000 millis))
+      preGCTester.assertCleanup()(timeout(1.second))
     }
 
     // Test that GC causes RDD cleanup after dereferencing the RDD
@@ -178,7 +178,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     val preGCTester = new CleanerTester(sc, shuffleIds = Seq(0))
     runGC()
     intercept[Exception] {
-      preGCTester.assertCleanup()(timeout(1000 millis))
+      preGCTester.assertCleanup()(timeout(1.second))
     }
     rdd.count()  // Defeat early collection by the JVM
 
@@ -196,7 +196,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     val preGCTester = new CleanerTester(sc, broadcastIds = Seq(broadcast.id))
     runGC()
     intercept[Exception] {
-      preGCTester.assertCleanup()(timeout(1000 millis))
+      preGCTester.assertCleanup()(timeout(1.second))
     }
 
     // Test that GC causes broadcast cleanup after dereferencing the broadcast variable
@@ -272,7 +272,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     val preGCTester = new CleanerTester(sc, rddIds = Seq(rdd.id))
     runGC()
     intercept[Exception] {
-      preGCTester.assertCleanup()(timeout(1000 millis))
+      preGCTester.assertCleanup()(timeout(1.second))
     }
 
     // Test that RDD going out of scope does cause the checkpoint blocks to be cleaned up
@@ -294,7 +294,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     val preGCTester = new CleanerTester(sc, rddIds, shuffleIds, broadcastIds)
     runGC()
     intercept[Exception] {
-      preGCTester.assertCleanup()(timeout(1000 millis))
+      preGCTester.assertCleanup()(timeout(1.second))
     }
 
     // Test that GC triggers the cleanup of all variables after the dereferencing them
@@ -334,7 +334,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     val preGCTester = new CleanerTester(sc, rddIds, shuffleIds, broadcastIds)
     runGC()
     intercept[Exception] {
-      preGCTester.assertCleanup()(timeout(1000 millis))
+      preGCTester.assertCleanup()(timeout(1.second))
     }
 
     // Test that GC triggers the cleanup of all variables after the dereferencing them
@@ -408,7 +408,7 @@ class CleanerTester(
   /** Assert that all the stuff has been cleaned up */
   def assertCleanup()(implicit waitTimeout: PatienceConfiguration.Timeout) {
     try {
-      eventually(waitTimeout, interval(100 millis)) {
+      eventually(waitTimeout, interval(100.milliseconds)) {
         assert(isAllCleanedUp,
           "The following resources were not cleaned up:\n" + uncleanedResourcesToString)
       }
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 896cd2e80aae..182f28c5cce5 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -33,12 +33,12 @@ class DriverSuite extends SparkFunSuite with TimeLimits {
   ignore("driver should exit after finishing without cleanup (SPARK-530)") {
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
     val masters = Table("master", "local", "local-cluster[2,1,1024]")
-    forAll(masters) { (master: String) =>
+    forAll(masters) { master =>
       val process = Utils.executeCommand(
         Seq(s"$sparkHome/bin/spark-class", "org.apache.spark.DriverWithoutCleanup", master),
         new File(sparkHome),
         Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
-      failAfter(60 seconds) { process.waitFor() }
+      failAfter(1.minute) { process.waitFor() }
       // Ensure we still kill the process in case it timed out
       process.destroy()
     }
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index 00fecd4ca53e..db90c3113a0b 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -250,7 +250,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     assert(e.getMessage contains "cancel")
 
     // Once A is cancelled, job B should finish fairly quickly.
-    assert(ThreadUtils.awaitResult(jobB, 60.seconds) === 100)
+    assert(ThreadUtils.awaitResult(jobB, 1.minute) === 100)
   }
 
   test("task reaper will not kill JVM if spark.task.killTimeout == -1") {
@@ -290,7 +290,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     assert(e.getMessage contains "cancel")
 
     // Once A is cancelled, job B should finish fairly quickly.
-    assert(ThreadUtils.awaitResult(jobB, 60.seconds) === 100)
+    assert(ThreadUtils.awaitResult(jobB, 1.minute) === 100)
   }
 
   test("two jobs sharing the same stage") {
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 0795790b2ddf..3a5de8afb7d2 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -21,7 +21,6 @@ import java.util.concurrent.{Executors, TimeUnit}
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.util.{Random, Try}
 
 import com.esotericsoftware.kryo.Kryo
@@ -279,10 +278,10 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     assert(RpcUtils.retryWaitMs(conf) === 2L)
 
     conf.set("spark.akka.askTimeout", "3")
-    assert(RpcUtils.askRpcTimeout(conf).duration === (3 seconds))
+    assert(RpcUtils.askRpcTimeout(conf).duration === 3.seconds)
 
     conf.set("spark.akka.lookupTimeout", "4")
-    assert(RpcUtils.lookupRpcTimeout(conf).duration === (4 seconds))
+    assert(RpcUtils.lookupRpcTimeout(conf).duration === 4.seconds)
   }
 
   test("SPARK-13727") {
diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
index a15ae040d43a..c96db4e63941 100644
--- a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark
 
 import scala.concurrent.duration._
 import scala.language.implicitConversions
-import scala.language.postfixOps
 
 import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
@@ -31,25 +30,25 @@ class StatusTrackerSuite extends SparkFunSuite with Matchers with LocalSparkCont
   test("basic status API usage") {
     sc = new SparkContext("local", "test", new SparkConf(false))
     val jobFuture = sc.parallelize(1 to 10000, 2).map(identity).groupBy(identity).collectAsync()
-    val jobId: Int = eventually(timeout(10 seconds)) {
+    val jobId: Int = eventually(timeout(10.seconds)) {
       val jobIds = jobFuture.jobIds
       jobIds.size should be(1)
       jobIds.head
     }
-    val jobInfo = eventually(timeout(10 seconds)) {
+    val jobInfo = eventually(timeout(10.seconds)) {
       sc.statusTracker.getJobInfo(jobId).get
     }
     jobInfo.status() should not be FAILED
     val stageIds = jobInfo.stageIds()
     stageIds.size should be(2)
 
-    val firstStageInfo = eventually(timeout(10 seconds)) {
+    val firstStageInfo = eventually(timeout(10.seconds)) {
       sc.statusTracker.getStageInfo(stageIds.min).get
     }
     firstStageInfo.stageId() should be(stageIds.min)
     firstStageInfo.currentAttemptId() should be(0)
     firstStageInfo.numTasks() should be(2)
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       val updatedFirstStageInfo = sc.statusTracker.getStageInfo(stageIds.min).get
       updatedFirstStageInfo.numCompletedTasks() should be(2)
       updatedFirstStageInfo.numActiveTasks() should be(0)
@@ -61,27 +60,27 @@ class StatusTrackerSuite extends SparkFunSuite with Matchers with LocalSparkCont
     sc = new SparkContext("local", "test", new SparkConf(false))
     // Passing `null` should return jobs that were not run in a job group:
     val defaultJobGroupFuture = sc.parallelize(1 to 1000).countAsync()
-    val defaultJobGroupJobId = eventually(timeout(10 seconds)) {
+    val defaultJobGroupJobId = eventually(timeout(10.seconds)) {
       defaultJobGroupFuture.jobIds.head
     }
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       sc.statusTracker.getJobIdsForGroup(null).toSet should be (Set(defaultJobGroupJobId))
     }
     // Test jobs submitted in job groups:
     sc.setJobGroup("my-job-group", "description")
     sc.statusTracker.getJobIdsForGroup("my-job-group") should be (Seq.empty)
     val firstJobFuture = sc.parallelize(1 to 1000).countAsync()
-    val firstJobId = eventually(timeout(10 seconds)) {
+    val firstJobId = eventually(timeout(10.seconds)) {
       firstJobFuture.jobIds.head
     }
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       sc.statusTracker.getJobIdsForGroup("my-job-group") should be (Seq(firstJobId))
     }
     val secondJobFuture = sc.parallelize(1 to 1000).countAsync()
-    val secondJobId = eventually(timeout(10 seconds)) {
+    val secondJobId = eventually(timeout(10.seconds)) {
       secondJobFuture.jobIds.head
     }
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       sc.statusTracker.getJobIdsForGroup("my-job-group").toSet should be (
         Set(firstJobId, secondJobId))
     }
@@ -92,10 +91,10 @@ class StatusTrackerSuite extends SparkFunSuite with Matchers with LocalSparkCont
     sc.setJobGroup("my-job-group2", "description")
     sc.statusTracker.getJobIdsForGroup("my-job-group2") shouldBe empty
     val firstJobFuture = sc.parallelize(1 to 1000, 1).takeAsync(1)
-    val firstJobId = eventually(timeout(10 seconds)) {
+    val firstJobId = eventually(timeout(10.seconds)) {
       firstJobFuture.jobIds.head
     }
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       sc.statusTracker.getJobIdsForGroup("my-job-group2") should be (Seq(firstJobId))
     }
   }
@@ -105,10 +104,10 @@ class StatusTrackerSuite extends SparkFunSuite with Matchers with LocalSparkCont
     sc.setJobGroup("my-job-group2", "description")
     sc.statusTracker.getJobIdsForGroup("my-job-group2") shouldBe empty
     val firstJobFuture = sc.parallelize(1 to 1000, 2).takeAsync(999)
-    val firstJobId = eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       firstJobFuture.jobIds.head
     }
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       sc.statusTracker.getJobIdsForGroup("my-job-group2") should have size 2
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 641751ff36e5..2a172453da40 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -1277,7 +1277,7 @@ object SparkSubmitSuite extends SparkFunSuite with TimeLimits {
       Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
 
     try {
-      val exitCode = failAfter(60 seconds) { process.waitFor() }
+      val exitCode = failAfter(1.minute) { process.waitFor() }
       if (exitCode != 0) {
         fail(s"Process returned with exit code $exitCode. See the log4j logs for more detail.")
       }
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index 573a49648d78..1ed2a1a2aeb4 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -67,7 +67,7 @@ class StandaloneDynamicAllocationSuite
     master = makeMaster()
     workers = makeWorkers(10, 2048)
     // Wait until all workers register with master successfully
-    eventually(timeout(60.seconds), interval(10.millis)) {
+    eventually(timeout(1.minute), interval(10.milliseconds)) {
       assert(getMasterState.workers.size === numWorkers)
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index baeefea3158e..a1d3077b8fc8 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -65,7 +65,7 @@ class AppClientSuite
     master = makeMaster()
     workers = makeWorkers(10, 2048)
     // Wait until all workers register with master successfully
-    eventually(timeout(60.seconds), interval(10.millis)) {
+    eventually(timeout(1.minute), interval(10.milliseconds)) {
       assert(getMasterState.workers.size === numWorkers)
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index d183170d251f..1a326a8af3b2 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -25,7 +25,6 @@ import java.util.zip.{ZipInputStream, ZipOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import com.google.common.io.{ByteStreams, Files}
 import org.apache.commons.io.FileUtils
@@ -142,7 +141,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
         clock.getTimeMillis(), "test", false))
 
       // Make sure the UI can be rendered.
-      list.foreach { case info =>
+      list.foreach { info =>
         val appUi = provider.getAppUI(info.id, None)
         appUi should not be null
         appUi should not be None
@@ -281,7 +280,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
       list.last.attempts.size should be (3)
       list.head.attempts.head.attemptId should be (Some("attempt1"))
 
-      list.foreach { case app =>
+      list.foreach { app =>
         app.attempts.foreach { attempt =>
           val appUi = provider.getAppUI(app.id, attempt.attemptId)
           appUi should not be null
@@ -734,7 +733,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
       provider.inSafeMode = false
       clock.setTime(10000)
 
-      eventually(timeout(1 second), interval(10 millis)) {
+      eventually(timeout(1.second), interval(10.milliseconds)) {
         provider.getConfig().keys should not contain ("HDFS State")
       }
     } finally {
@@ -747,12 +746,12 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     val clock = new ManualClock()
     val provider = new SafeModeTestProvider(createTestConf(), clock)
     val errorHandler = mock(classOf[Thread.UncaughtExceptionHandler])
-    val initThread = provider.startSafeModeCheckThread(Some(errorHandler))
+    provider.startSafeModeCheckThread(Some(errorHandler))
     try {
       provider.inSafeMode = false
       clock.setTime(10000)
 
-      eventually(timeout(1 second), interval(10 millis)) {
+      eventually(timeout(1.second), interval(10.milliseconds)) {
         verify(errorHandler).uncaughtException(any(), any())
       }
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 6665a890220e..c99bcf2de5b0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -25,9 +25,7 @@ import javax.servlet.http.{HttpServletRequest, HttpServletRequestWrapper, HttpSe
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
-import com.codahale.metrics.Counter
 import com.google.common.io.{ByteStreams, Files}
 import org.apache.commons.io.{FileUtils, IOUtils}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
@@ -457,16 +455,6 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     val port = server.boundPort
     val metrics = server.cacheMetrics
 
-    // assert that a metric has a value; if not dump the whole metrics instance
-    def assertMetric(name: String, counter: Counter, expected: Long): Unit = {
-      val actual = counter.getCount
-      if (actual != expected) {
-        // this is here because Scalatest loses stack depth
-        fail(s"Wrong $name value - expected $expected but got $actual" +
-            s" in metrics\n$metrics")
-      }
-    }
-
     // build a URL for an app or app/attempt plus a page underneath
     def buildURL(appId: String, suffix: String): URL = {
       new URL(s"http://localhost:$port/history/$appId$suffix")
@@ -477,13 +465,11 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       new URL(s"http://localhost:$port/api/v1/applications/$appId$suffix")
     }
 
-    val historyServerRoot = new URL(s"http://localhost:$port/")
-
     // start initial job
     val d = sc.parallelize(1 to 10)
     d.count()
-    val stdInterval = interval(100 milliseconds)
-    val appId = eventually(timeout(20 seconds), stdInterval) {
+    val stdInterval = interval(100.milliseconds)
+    val appId = eventually(timeout(20.seconds), stdInterval) {
       val json = getContentAndCode("applications", port)._2.get
       val apps = parse(json).asInstanceOf[JArray].arr
       apps should have size 1
@@ -567,7 +553,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     d2.count()
     dumpLogDir("After second job")
 
-    val stdTimeout = timeout(10 seconds)
+    val stdTimeout = timeout(10.seconds)
     logDebug("waiting for UI to update")
     eventually(stdTimeout, stdInterval) {
       assert(2 === getNumJobs(""),
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index fbf2acc3175d..61aaaa557529 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -26,7 +26,6 @@ import scala.collection.mutable
 import scala.collection.mutable.{HashMap, HashSet}
 import scala.concurrent.duration._
 import scala.io.Source
-import scala.language.postfixOps
 import scala.reflect.ClassTag
 
 import org.json4s._
@@ -216,7 +215,7 @@ class MasterSuite extends SparkFunSuite
       master = makeMaster(conf)
       master.rpcEnv.setupEndpoint(Master.ENDPOINT_NAME, master)
       // Wait until Master recover from checkpoint data.
-      eventually(timeout(5 seconds), interval(100 milliseconds)) {
+      eventually(timeout(5.seconds), interval(100.milliseconds)) {
         master.workers.size should be(1)
       }
 
@@ -234,7 +233,7 @@ class MasterSuite extends SparkFunSuite
       fakeWorkerInfo.coresUsed should be(0)
 
       master.self.send(MasterChangeAcknowledged(fakeAppInfo.id))
-      eventually(timeout(1 second), interval(10 milliseconds)) {
+      eventually(timeout(1.second), interval(10.milliseconds)) {
         // Application state should be WAITING when "MasterChangeAcknowledged" event executed.
         fakeAppInfo.state should be(ApplicationState.WAITING)
       }
@@ -242,7 +241,7 @@ class MasterSuite extends SparkFunSuite
       master.self.send(
         WorkerSchedulerStateResponse(fakeWorkerInfo.id, fakeExecutors, Seq(fakeDriverInfo.id)))
 
-      eventually(timeout(5 seconds), interval(100 milliseconds)) {
+      eventually(timeout(5.seconds), interval(100.milliseconds)) {
         getState(master) should be(RecoveryState.ALIVE)
       }
 
@@ -267,7 +266,7 @@ class MasterSuite extends SparkFunSuite
     val localCluster = new LocalSparkCluster(2, 2, 512, conf)
     localCluster.start()
     try {
-      eventually(timeout(5 seconds), interval(100 milliseconds)) {
+      eventually(timeout(5.seconds), interval(100.milliseconds)) {
         val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json")
           .getLines().mkString("\n")
         val JArray(workers) = (parse(json) \ "workers")
@@ -293,7 +292,7 @@ class MasterSuite extends SparkFunSuite
     val localCluster = new LocalSparkCluster(2, 2, 512, conf)
     localCluster.start()
     try {
-      eventually(timeout(5 seconds), interval(100 milliseconds)) {
+      eventually(timeout(5.seconds), interval(100.milliseconds)) {
         val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json")
           .getLines().mkString("\n")
         val JArray(workers) = (parse(json) \ "workers")
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
index 5e8b363ae822..d899ee007c22 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -228,7 +228,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
     testCleanupFilesWithConfig(false)
   }
 
-  private def testCleanupFilesWithConfig(value: Boolean) = {
+  private def testCleanupFilesWithConfig(value: Boolean): Unit = {
     val conf = new SparkConf().set(config.STORAGE_CLEANUP_FILES_AFTER_EXECUTOR_EXIT, value)
 
     val cleanupCalled = new AtomicBoolean(false)
@@ -257,7 +257,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
     testWorkDirCleanupAndRemoveMetadataWithConfig(false)
   }
 
-  private def testWorkDirCleanupAndRemoveMetadataWithConfig(dbCleanupEnabled: Boolean) = {
+  private def testWorkDirCleanupAndRemoveMetadataWithConfig(dbCleanupEnabled: Boolean): Unit = {
     val conf = new SparkConf().set("spark.shuffle.service.db.enabled", dbCleanupEnabled.toString)
     conf.set("spark.worker.cleanup.appDataTtl", "60")
     conf.set("spark.shuffle.service.enabled", "true")
@@ -282,7 +282,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
     }
     executorDir.setLastModified(System.currentTimeMillis - (1000 * 120))
     worker.receive(WorkDirCleanup)
-    eventually(timeout(1000.milliseconds), interval(10.milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!executorDir.exists())
       assert(cleanupCalled.get() == dbCleanupEnabled)
     }
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 63a72e208935..8a4f7a34e545 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -27,7 +27,6 @@ import java.util.concurrent.atomic.AtomicBoolean
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.Map
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, eq => meq}
diff --git a/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala b/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
index 548949edf47b..edec968d0745 100644
--- a/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.launcher
 import java.util.concurrent.TimeUnit
 
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
@@ -57,13 +56,13 @@ class LauncherBackendSuite extends SparkFunSuite with Matchers {
       .startApplication()
 
     try {
-      eventually(timeout(30 seconds), interval(100 millis)) {
+      eventually(timeout(30.seconds), interval(100.milliseconds)) {
         handle.getAppId() should not be (null)
       }
 
       handle.stop()
 
-      eventually(timeout(30 seconds), interval(100 millis)) {
+      eventually(timeout(30.seconds), interval(100.milliseconds)) {
         handle.getState() should be (SparkAppHandle.State.KILLED)
       }
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index 24b0144a38bd..a7eb0eca72e5 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -142,7 +142,7 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
     }
     assert(f.get() === 10)
 
-    failAfter(10 seconds) {
+    failAfter(10.seconds) {
       sem.acquire(2)
     }
   }
@@ -178,7 +178,7 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
       f.get()
     }
 
-    failAfter(10 seconds) {
+    failAfter(10.seconds) {
       sem.acquire(2)
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
index 478f0690f8e4..c942328acc8c 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.rdd
 
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
@@ -174,7 +173,7 @@ class LocalCheckpointSuite extends SparkFunSuite with LocalSparkContext {
     val blockId = RDDBlockId(rdd.id, numPartitions - 1)
     bmm.removeBlock(blockId)
     // Wait until the block has been removed successfully.
-    eventually(timeout(1 seconds), interval(100 milliseconds)) {
+    eventually(timeout(1.second), interval(100.milliseconds)) {
       assert(bmm.getBlockStatus(blockId).isEmpty)
     }
     try {
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 178d42096c20..99b4e8fe8280 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -26,7 +26,6 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.concurrent.Await
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import com.google.common.io.Files
 import org.mockito.ArgumentMatchers.any
@@ -80,7 +79,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     })
     rpcEndpointRef.send("hello")
-    eventually(timeout(5 seconds), interval(10 millis)) {
+    eventually(timeout(5.seconds), interval(10.milliseconds)) {
       assert("hello" === message)
     }
   }
@@ -101,7 +100,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "send-remotely")
     try {
       rpcEndpointRef.send("hello")
-      eventually(timeout(5 seconds), interval(10 millis)) {
+      eventually(timeout(5.seconds), interval(10.milliseconds)) {
         assert("hello" === message)
       }
     } finally {
@@ -180,7 +179,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "ask-timeout")
     try {
       val e = intercept[RpcTimeoutException] {
-        rpcEndpointRef.askSync[String]("hello", new RpcTimeout(1 millis, shortProp))
+        rpcEndpointRef.askSync[String]("hello", new RpcTimeout(1.millisecond, shortProp))
       }
       // The SparkException cause should be a RpcTimeoutException with message indicating the
       // controlling timeout property
@@ -236,7 +235,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     })
 
-    eventually(timeout(5 seconds), interval(10 millis)) {
+    eventually(timeout(5.seconds), interval(10.milliseconds)) {
       assert(e.getMessage === "Oops!")
     }
   }
@@ -261,7 +260,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     env.stop(endpointRef)
 
-    eventually(timeout(5 seconds), interval(10 millis)) {
+    eventually(timeout(5.seconds), interval(10.milliseconds)) {
       assert(e.getMessage === "Oops!")
     }
   }
@@ -282,7 +281,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     endpointRef.send("Foo")
 
-    eventually(timeout(5 seconds), interval(10 millis)) {
+    eventually(timeout(5.seconds), interval(10.milliseconds)) {
       assert(e.getMessage === "Oops!")
     }
   }
@@ -303,7 +302,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     })
 
-    eventually(timeout(5 seconds), interval(10 millis)) {
+    eventually(timeout(5.seconds), interval(10.milliseconds)) {
       // Calling `self` in `onStart` is fine
       assert(callSelfSuccessfully)
     }
@@ -324,7 +323,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     endpointRef.send("Foo")
 
-    eventually(timeout(5 seconds), interval(10 millis)) {
+    eventually(timeout(5.seconds), interval(10.milliseconds)) {
       // Calling `self` in `receive` is fine
       assert(callSelfSuccessfully)
     }
@@ -347,9 +346,9 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     env.stop(endpointRef)
 
-    eventually(timeout(5 seconds), interval(10 millis)) {
+    eventually(timeout(5.seconds), interval(10.milliseconds)) {
       // Calling `self` in `onStop` will return null, so selfOption will be None
-      assert(selfOption == None)
+      assert(selfOption.isEmpty)
     }
   }
 
@@ -376,7 +375,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
         }.start()
       }
 
-      eventually(timeout(5 seconds), interval(5 millis)) {
+      eventually(timeout(5.seconds), interval(5.milliseconds)) {
         assert(result == 1000)
       }
 
@@ -401,7 +400,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     env.stop(endpointRef)
     env.stop(endpointRef)
 
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       // Calling stop twice should only trigger onStop once.
       assert(onStopCount == 1)
     }
@@ -417,7 +416,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     })
 
     val f = endpointRef.ask[String]("Hi")
-    val ack = ThreadUtils.awaitResult(f, 5 seconds)
+    val ack = ThreadUtils.awaitResult(f, 5.seconds)
     assert("ack" === ack)
 
     env.stop(endpointRef)
@@ -437,7 +436,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "sendWithReply-remotely")
     try {
       val f = rpcEndpointRef.ask[String]("hello")
-      val ack = ThreadUtils.awaitResult(f, 5 seconds)
+      val ack = ThreadUtils.awaitResult(f, 5.seconds)
       assert("ack" === ack)
     } finally {
       anotherEnv.shutdown()
@@ -456,7 +455,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     val f = endpointRef.ask[String]("Hi")
     val e = intercept[SparkException] {
-      ThreadUtils.awaitResult(f, 5 seconds)
+      ThreadUtils.awaitResult(f, 5.seconds)
     }
     assert("Oops" === e.getCause.getMessage)
 
@@ -478,7 +477,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     try {
       val f = rpcEndpointRef.ask[String]("hello")
       val e = intercept[SparkException] {
-        ThreadUtils.awaitResult(f, 5 seconds)
+        ThreadUtils.awaitResult(f, 5.seconds)
       }
       assert("Oops" === e.getCause.getMessage)
     } finally {
@@ -530,14 +529,14 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       // Send a message to set up the connection
       serverRefInServer2.send("hello")
 
-      eventually(timeout(5 seconds), interval(5 millis)) {
+      eventually(timeout(5.seconds), interval(5.milliseconds)) {
         assert(events.contains(("onConnected", serverEnv2.address)))
       }
 
       serverEnv2.shutdown()
       serverEnv2.awaitTermination()
 
-      eventually(timeout(5 seconds), interval(5 millis)) {
+      eventually(timeout(5.seconds), interval(5.milliseconds)) {
         assert(events.contains(("onConnected", serverEnv2.address)))
         assert(events.contains(("onDisconnected", serverEnv2.address)))
       }
@@ -558,7 +557,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       // Send a message to set up the connection
       serverRefInClient.send("hello")
 
-      eventually(timeout(5 seconds), interval(5 millis)) {
+      eventually(timeout(5.seconds), interval(5.milliseconds)) {
         // We don't know the exact client address but at least we can verify the message type
         assert(events.asScala.map(_._1).exists(_ == "onConnected"))
       }
@@ -566,7 +565,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       clientEnv.shutdown()
       clientEnv.awaitTermination()
 
-      eventually(timeout(5 seconds), interval(5 millis)) {
+      eventually(timeout(5.seconds), interval(5.milliseconds)) {
         // We don't know the exact client address but at least we can verify the message type
         assert(events.asScala.map(_._1).exists(_ == "onConnected"))
         assert(events.asScala.map(_._1).exists(_ == "onDisconnected"))
@@ -589,14 +588,14 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       // Send a message to set up the connection
       serverRefInClient.send("hello")
 
-      eventually(timeout(5 seconds), interval(5 millis)) {
+      eventually(timeout(5.seconds), interval(5.milliseconds)) {
         assert(events.contains(("onConnected", serverEnv.address)))
       }
 
       serverEnv.shutdown()
       serverEnv.awaitTermination()
 
-      eventually(timeout(5 seconds), interval(5 millis)) {
+      eventually(timeout(5.seconds), interval(5.milliseconds)) {
         assert(events.contains(("onConnected", serverEnv.address)))
         assert(events.contains(("onDisconnected", serverEnv.address)))
       }
@@ -624,7 +623,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     try {
       val f = rpcEndpointRef.ask[String]("hello")
       val e = intercept[SparkException] {
-        ThreadUtils.awaitResult(f, 1 seconds)
+        ThreadUtils.awaitResult(f, 1.second)
       }
       assert(e.getCause.isInstanceOf[NotSerializableException])
     } finally {
@@ -658,7 +657,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       })
       val rpcEndpointRef = remoteEnv.setupEndpointRef(localEnv.address, "send-authentication")
       rpcEndpointRef.send("hello")
-      eventually(timeout(5 seconds), interval(10 millis)) {
+      eventually(timeout(5.seconds), interval(10.milliseconds)) {
         assert("hello" === message)
       }
     } finally {
@@ -778,8 +777,8 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     })
 
-    val longTimeout = new RpcTimeout(1 second, "spark.rpc.long.timeout")
-    val shortTimeout = new RpcTimeout(10 millis, "spark.rpc.short.timeout")
+    val longTimeout = new RpcTimeout(1.second, "spark.rpc.long.timeout")
+    val shortTimeout = new RpcTimeout(10.milliseconds, "spark.rpc.short.timeout")
 
     // Ask with immediate response, should complete successfully
     val fut1 = rpcEndpointRef.ask[String]("hello", longTimeout)
@@ -804,7 +803,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     // once the future is complete to verify addMessageIfTimeout was invoked
     val reply3 =
       intercept[RpcTimeoutException] {
-        Await.result(fut3, 2000 millis)
+        Await.result(fut3, 2.seconds)
       }.getMessage
     // scalastyle:on awaitresult
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index ae306d39a5a9..33f48b858a73 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.{RpcUtils, SerializableBuffer}
 class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkContext
     with Eventually {
 
-  private val executorUpTimeout = 60.seconds
+  private val executorUpTimeout = 1.minute
 
   test("serialized task larger than max RPC message size") {
     val conf = new SparkConf
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index e74f4627db9b..72c20a817336 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -594,7 +594,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     var rdd: RDD[_] = new MyRDD(sc, 1, Nil)
     (1 to 30).foreach(_ => rdd = rdd.zip(rdd))
     // getPreferredLocs runs quickly, indicating that exponential graph traversal is avoided.
-    failAfter(10 seconds) {
+    failAfter(10.seconds) {
       val preferredLocs = scheduler.getPreferredLocs(rdd, 0)
       // No preferred locations are returned.
       assert(preferredLocs.length === 0)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index 2891dd65b064..f582ef54dd1f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -22,7 +22,6 @@ import java.util.Date
 import java.util.concurrent.TimeoutException
 
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.apache.hadoop.mapred._
 import org.apache.hadoop.mapreduce.TaskType
@@ -159,7 +158,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     // It's an error if the job completes successfully even though no committer was authorized,
     // so throw an exception if the job was allowed to complete.
     intercept[TimeoutException] {
-      ThreadUtils.awaitResult(futureAction, 5 seconds)
+      ThreadUtils.awaitResult(futureAction, 5.seconds)
     }
     assert(tempDir.list().size === 0)
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index 43d6ec1010fc..76be6930ee23 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -170,7 +170,7 @@ abstract class SchedulerIntegrationSuite[T <: MockBackend: ClassTag] extends Spa
       // and notifies the job waiter before our original thread in the task scheduler finishes
       // handling the event and marks the taskset as complete.  So its ok if we need to wait a
       // *little* bit longer for the original taskscheduler thread to finish up to deal w/ the race.
-      eventually(timeout(1 second), interval(10 millis)) {
+      eventually(timeout(1.second), interval(10.milliseconds)) {
         assert(taskScheduler.runningTaskSets.isEmpty)
       }
       assert(!backend.hasTasks)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index 5b17dbf8b4c2..ae464352da44 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -23,7 +23,6 @@ import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.util.control.NonFatal
 
 import com.google.common.util.concurrent.MoreExecutors
@@ -58,18 +57,18 @@ private class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: Task
       // Only remove the result once, since we'd like to test the case where the task eventually
       // succeeds.
       serializer.get().deserialize[TaskResult[_]](serializedData) match {
-        case IndirectTaskResult(blockId, size) =>
+        case IndirectTaskResult(blockId, _) =>
           sparkEnv.blockManager.master.removeBlock(blockId)
           // removeBlock is asynchronous. Need to wait it's removed successfully
           try {
-            eventually(timeout(3 seconds), interval(200 milliseconds)) {
+            eventually(timeout(3.seconds), interval(200.milliseconds)) {
               assert(!sparkEnv.blockManager.master.contains(blockId))
             }
             removeBlockSuccessfully = true
           } catch {
             case NonFatal(e) => removeBlockSuccessfully = false
           }
-        case directResult: DirectTaskResult[_] =>
+        case _: DirectTaskResult[_] =>
           taskSetManager.abort("Internal error: expect only indirect results")
       }
       serializedData.rewind()
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index b158b6c291d5..a739701853f6 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -22,7 +22,6 @@ import java.util.Locale
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 import scala.language.implicitConversions
-import scala.language.postfixOps
 
 import org.mockito.Mockito.{mock, when}
 import org.scalatest.{BeforeAndAfter, Matchers}
@@ -247,7 +246,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
 
     // Add another normal block manager and test that 2x replication works
     makeBlockManager(10000, "anotherStore")
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(replicateAndGetNumCopies("a2") === 2)
     }
   }
@@ -272,14 +271,14 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
 
     // Add another store, 3x replication should work now, 4x replication should only replicate 3x
     val newStore1 = makeBlockManager(storeSize, s"newstore1")
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(replicateAndGetNumCopies("a3", 3) === 3)
     }
     assert(replicateAndGetNumCopies("a4", 4) === 3)
 
     // Add another store, 4x replication should work now
     val newStore2 = makeBlockManager(storeSize, s"newstore2")
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(replicateAndGetNumCopies("a5", 4) === 4)
     }
 
@@ -295,7 +294,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     val newStores = (3 to 5).map {
       i => makeBlockManager(storeSize, s"newstore$i")
     }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(replicateAndGetNumCopies("a7", 3) === 3)
     }
   }
@@ -454,13 +453,13 @@ class BlockManagerProactiveReplicationSuite extends BlockManagerReplicationBehav
       master.removeExecutor(bm.blockManagerId.executorId)
       bm.stop()
       // giving enough time for replication to happen and new block be reported to master
-      eventually(timeout(5 seconds), interval(100 millis)) {
+      eventually(timeout(5.seconds), interval(100.milliseconds)) {
         val newLocations = master.getLocations(blockId).toSet
         assert(newLocations.size === replicationFactor)
       }
     }
 
-    val newLocations = eventually(timeout(5 seconds), interval(100 millis)) {
+    val newLocations = eventually(timeout(5.seconds), interval(100.milliseconds)) {
       val _newLocations = master.getLocations(blockId).toSet
       assert(_newLocations.size === replicationFactor)
       _newLocations
@@ -472,7 +471,7 @@ class BlockManagerProactiveReplicationSuite extends BlockManagerReplicationBehav
       "New locations contain stopped block managers.")
 
     // Make sure all locks have been released.
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       initialStores.filter(bm => newLocations.contains(bm.blockManagerId)).foreach { bm =>
         assert(bm.blockInfoManager.getTaskLockCount(BlockInfo.NON_TASK_WRITER) === 0)
       }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 576e5f55fc8f..9f3d8f291ede 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Future
 import scala.concurrent.duration._
-import scala.language.{implicitConversions, postfixOps}
+import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
 import org.apache.commons.lang3.RandomUtils
@@ -275,19 +275,19 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     master.removeBlock("a2-to-remove")
     master.removeBlock("a3-to-remove")
 
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!store.hasLocalBlock("a1-to-remove"))
       master.getLocations("a1-to-remove") should have size 0
     }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!store.hasLocalBlock("a2-to-remove"))
       master.getLocations("a2-to-remove") should have size 0
     }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(store.hasLocalBlock("a3-to-remove"))
       master.getLocations("a3-to-remove") should have size 0
     }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       val memStatus = master.getMemoryStatus.head._2
       memStatus._1 should equal (40000L)
       memStatus._2 should equal (40000L)
@@ -305,15 +305,15 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     store.putSingle("nonrddblock", a3, StorageLevel.MEMORY_ONLY)
     master.removeRdd(0, blocking = false)
 
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       store.getSingleAndReleaseLock(rdd(0, 0)) should be (None)
       master.getLocations(rdd(0, 0)) should have size 0
     }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       store.getSingleAndReleaseLock(rdd(0, 1)) should be (None)
       master.getLocations(rdd(0, 1)) should have size 0
     }
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       store.getSingleAndReleaseLock("nonrddblock") should not be (None)
       master.getLocations("nonrddblock") should have size (1)
     }
@@ -378,7 +378,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     // remove broadcast 1 block from both the stores asynchronously
     // and verify all broadcast 1 blocks have been removed
     master.removeBroadcast(1, removeFromMaster = true, blocking = false)
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!driverStore.hasLocalBlock(broadcast1BlockId))
       assert(!executorStore.hasLocalBlock(broadcast1BlockId))
     }
@@ -386,7 +386,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     // remove broadcast 2 from both the stores asynchronously
     // and verify all broadcast 2 blocks have been removed
     master.removeBroadcast(2, removeFromMaster = true, blocking = false)
-    eventually(timeout(1000 milliseconds), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!driverStore.hasLocalBlock(broadcast2BlockId))
       assert(!driverStore.hasLocalBlock(broadcast2BlockId2))
       assert(!executorStore.hasLocalBlock(broadcast2BlockId))
@@ -905,7 +905,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     }
 
     // Make sure get a1 doesn't hang and returns None.
-    failAfter(1 second) {
+    failAfter(1.second) {
       assert(store.getSingleAndReleaseLock("a1").isEmpty, "a1 should not be in store")
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index a9f03ebb72b2..1913b8d42551 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -123,12 +123,12 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       val ui = sc.ui.get
       val rdd = sc.parallelize(Seq(1, 2, 3))
       rdd.persist(StorageLevels.DISK_ONLY).count()
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(ui, "/storage")
         val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.DISK_ONLY.description)
       }
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(ui, "/storage/rdd/?id=0")
         val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.DISK_ONLY.description)
@@ -143,12 +143,12 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
       rdd.unpersist(blocking = true)
       rdd.persist(StorageLevels.MEMORY_ONLY).count()
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(ui, "/storage")
         val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
       }
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(ui, "/storage/rdd/?id=0")
         val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
@@ -203,7 +203,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       intercept[SparkException] {
         sc.parallelize(1 to 10).map { x => throw new Exception()}.collect()
       }
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/stages")
         find(id("active")) should be(None)  // Since we hide empty tables
         find(id("failed")).get.text should be("Failed Stages (1)")
@@ -218,7 +218,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       intercept[SparkException] {
         sc.parallelize(1 to 10).map { x => unserializableObject}.collect()
       }
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/stages")
         find(id("active")) should be(None)  // Since we hide empty tables
         // The failure occurs before the stage becomes active, hence we should still show only one
@@ -239,7 +239,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
     withSpark(newSparkContext(killEnabled = true)) { sc =>
       runSlowJob(sc)
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs")
         assert(hasKillLink)
       }
@@ -247,7 +247,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
     withSpark(newSparkContext(killEnabled = false)) { sc =>
       runSlowJob(sc)
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs")
         assert(!hasKillLink)
       }
@@ -255,7 +255,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
     withSpark(newSparkContext(killEnabled = true)) { sc =>
       runSlowJob(sc)
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/stages")
         assert(hasKillLink)
       }
@@ -263,7 +263,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
     withSpark(newSparkContext(killEnabled = false)) { sc =>
       runSlowJob(sc)
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/stages")
         assert(!hasKillLink)
       }
@@ -274,7 +274,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     withSpark(newSparkContext()) { sc =>
       // If no job has been run in a job group, then "(Job Group)" should not appear in the header
       sc.parallelize(Seq(1, 2, 3)).count()
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs")
         val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq
         tableHeaders(0) should not startWith "Job Id (Job Group)"
@@ -282,7 +282,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       // Once at least one job has been run in a job group, then we should display the group name:
       sc.setJobGroup("my-job-group", "my-job-group-description")
       sc.parallelize(Seq(1, 2, 3)).count()
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs")
         val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq
         // Can suffix up/down arrow in the header
@@ -325,7 +325,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         }
       }
       mappedData.count()
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs")
         find(cssSelector(".stage-progress-cell")).get.text should be ("2/2 (1 failed)")
         find(cssSelector(".progress-cell .progress")).get.text should be ("2/2 (1 failed)")
@@ -373,7 +373,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       }.groupBy(identity).map(identity).groupBy(identity).map(identity)
       // Start the job:
       rdd.countAsync()
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs/job/?id=0")
         find(id("active")).get.text should be ("Active Stages (1)")
         find(id("pending")).get.text should be ("Pending Stages (2)")
@@ -399,7 +399,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       // mentioned in its job start event but which were never actually executed:
       rdd.count()
       rdd.count()
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs")
         // The completed jobs table should have two rows. The first row will be the most recent job:
         val firstRow = find(cssSelector("tbody tr")).get.underlying
@@ -426,7 +426,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       // mentioned in its job start event but which were never actually executed:
       rdd.count()
       rdd.count()
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs/job/?id=1")
         find(id("pending")) should be (None)
         find(id("active")) should be (None)
@@ -454,7 +454,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       // mentioned in its job start event but which were never actually executed:
       rdd.count()
       rdd.count()
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/jobs")
         findAll(cssSelector("tbody tr a")).foreach { link =>
           link.text.toLowerCase(Locale.ROOT) should include ("count")
@@ -476,7 +476,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         })
       }
       sparkUI.attachTab(newTab)
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         goToUi(sc, "")
         find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="stages"]""")) should not be(None)
@@ -484,13 +484,13 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         find(cssSelector("""ul li a[href*="environment"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="foo"]""")) should not be(None)
       }
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         // check whether new page exists
         goToUi(sc, "/foo")
         find(cssSelector("b")).get.text should include ("html magic")
       }
       sparkUI.detachTab(newTab)
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         goToUi(sc, "")
         find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="stages"]""")) should not be(None)
@@ -498,7 +498,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         find(cssSelector("""ul li a[href*="environment"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="foo"]""")) should be(None)
       }
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         // check new page not exist
         goToUi(sc, "/foo")
         find(cssSelector("b")) should be(None)
@@ -509,7 +509,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
   test("kill stage POST/GET response is correct") {
     withSpark(newSparkContext(killEnabled = true)) { sc =>
       sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         val url = new URL(
           sc.ui.get.webUrl.stripSuffix("/") + "/stages/stage/kill/?id=0")
         // SPARK-6846: should be POST only but YARN AM doesn't proxy POST
@@ -522,7 +522,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
   test("kill job POST/GET response is correct") {
     withSpark(newSparkContext(killEnabled = true)) { sc =>
       sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         val url = new URL(
           sc.ui.get.webUrl.stripSuffix("/") + "/jobs/job/kill/?id=0")
         // SPARK-6846: should be POST only but YARN AM doesn't proxy POST
@@ -560,7 +560,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         ("8", "count")
       )
 
-      eventually(timeout(1 second), interval(50 milliseconds)) {
+      eventually(timeout(1.second), interval(50.milliseconds)) {
         goToUi(sc, "/jobs")
         // The completed jobs table should have two rows. The first row will be the most recent job:
         find("completed-summary").get.text should be ("Completed Jobs: 10, only showing 2")
@@ -606,7 +606,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         ("17", "groupBy")
       )
 
-      eventually(timeout(1 second), interval(50 milliseconds)) {
+      eventually(timeout(1.second), interval(50.milliseconds)) {
         goToUi(sc, "/stages")
         find("completed-summary").get.text should be ("Completed Stages: 20, only showing 3")
         find("completed").get.text should be ("Completed Stages (20, only showing 3)")
@@ -679,7 +679,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         sc.parallelize(Seq(1, 2, 3)).map(identity).groupBy(identity).map(identity).groupBy(identity)
       rdd.count()
 
-      eventually(timeout(5 seconds), interval(100 milliseconds)) {
+      eventually(timeout(5.seconds), interval(100.milliseconds)) {
         val stage0 = Source.fromURL(sc.ui.get.webUrl +
           "/stages/stage/?id=0&attempt=0&expandDagViz=true").mkString
         assert(stage0.contains("digraph G {\n  subgraph clusterstage_0 {\n    " +
@@ -718,7 +718,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       rdd.count()
       rdd.count()
 
-      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
         goToUi(sc, "/stages")
         find(id("skipped")).get.text should be("Skipped Stages (1)")
       }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 1bd7aed7920b..34fd218437f8 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -75,7 +75,7 @@ class UISuite extends SparkFunSuite {
   ignore("basic ui visibility") {
     withSpark(newSparkContext()) { sc =>
       // test if the ui is visible, and all the expected tabs are visible
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         val html = Source.fromURL(sc.ui.get.webUrl).mkString
         assert(!html.contains("random data that should not be present"))
         assert(html.toLowerCase(Locale.ROOT).contains("stages"))
@@ -89,7 +89,7 @@ class UISuite extends SparkFunSuite {
   ignore("visibility at localhost:4040") {
     withSpark(newSparkContext()) { sc =>
       // test if visible from http://localhost:4040
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         val html = Source.fromURL("http://localhost:4040").mkString
         assert(html.toLowerCase(Locale.ROOT).contains("stages"))
       }
diff --git a/core/src/test/scala/org/apache/spark/util/EventLoopSuite.scala b/core/src/test/scala/org/apache/spark/util/EventLoopSuite.scala
index 550745771750..45aad3f82f35 100644
--- a/core/src/test/scala/org/apache/spark/util/EventLoopSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/EventLoopSuite.scala
@@ -21,7 +21,6 @@ import java.util.concurrent.{ConcurrentLinkedQueue, CountDownLatch}
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
@@ -45,7 +44,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
     }
     eventLoop.start()
     (1 to 100).foreach(eventLoop.post)
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       assert((1 to 100) === buffer.asScala.toSeq)
     }
     eventLoop.stop()
@@ -80,7 +79,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
     }
     eventLoop.start()
     eventLoop.post(1)
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       assert(e === receivedError)
     }
     eventLoop.stop()
@@ -102,7 +101,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
     }
     eventLoop.start()
     eventLoop.post(1)
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       assert(e === receivedError)
       assert(eventLoop.isActive)
     }
@@ -157,7 +156,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
       }.start()
     }
 
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       assert(threadNum * eventsFromEachThread === receivedEventsCount)
     }
     eventLoop.stop()
@@ -182,7 +181,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
     }
     eventLoop.start()
     eventLoop.post(1)
-    failAfter(5 seconds) {
+    failAfter(5.seconds) {
       // Wait until we enter `onReceive`
       onReceiveLatch.await()
       eventLoop.stop()
@@ -203,7 +202,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
     }
     eventLoop.start()
     eventLoop.post(1)
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       assert(!eventLoop.isActive)
     }
   }
@@ -227,7 +226,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
       }
     }
     eventLoop.start()
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       assert(!eventLoop.isActive)
     }
     assert(onStopCalled)
@@ -250,7 +249,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
     }
     eventLoop.start()
     eventLoop.post(1)
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       assert(!eventLoop.isActive)
     }
     assert(onStopCalled)
@@ -274,7 +273,7 @@ class EventLoopSuite extends SparkFunSuite with TimeLimits {
     }
     eventLoop.start()
     eventLoop.post(1)
-    eventually(timeout(5 seconds), interval(5 millis)) {
+    eventually(timeout(5.seconds), interval(5.milliseconds)) {
       assert(!eventLoop.isActive)
     }
     assert(onStopCalled)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index d5a20ccc4a51..2b5993a352cb 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.util.collection
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.ref.WeakReference
 
 import org.scalatest.Matchers
@@ -460,7 +459,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     // https://github.com/scala/scala/blob/2.13.x/test/junit/scala/tools/testing/AssertUtil.scala
     // (lines 69-89)
     // assert(map.currentMap == null)
-    eventually(timeout(5 seconds), interval(200 milliseconds)) {
+    eventually(timeout(5.seconds), interval(200.milliseconds)) {
       System.gc()
       // direct asserts introduced some macro generated code that held a reference to the map
       val tmpIsNull = null == underlyingMapRef.get.orNull
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index 609696bc8a2c..e9e547eb9a26 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -129,7 +129,7 @@ abstract class DockerJDBCIntegrationSuite
       // Start the container and wait until the database can accept JDBC connections:
       docker.startContainer(containerId)
       jdbcUrl = db.getJdbcUrl(dockerIp, externalPort)
-      eventually(timeout(60.seconds), interval(1.seconds)) {
+      eventually(timeout(1.minute), interval(1.second)) {
         val conn = java.sql.DriverManager.getConnection(jdbcUrl)
         conn.close()
       }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
index d9edf3c38a05..cabc8e1f7f7e 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
@@ -99,7 +99,7 @@ class KafkaDontFailOnDataLossSuite extends StreamTest with KafkaMissingOffsetsTe
     testUtils.createTopic(topic, partitions = 1)
     testUtils.sendMessages(topic, (0 until 50).map(_.toString).toArray)
 
-    eventually(timeout(60.seconds)) {
+    eventually(timeout(1.minute)) {
       assert(
         testUtils.getEarliestOffsets(Set(topic)).head._2 > 0,
         "Kafka didn't delete records after 1 minute")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 70b6e67f80d5..f2e4ee71450e 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -24,7 +24,6 @@ import java.util.{Collections, Map => JMap, Properties, UUID}
 import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
-import scala.language.postfixOps
 import scala.util.Random
 
 import kafka.api.Request
@@ -138,7 +137,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
 
     setupEmbeddedZookeeper()
     setupEmbeddedKafkaServer()
-    eventually(timeout(60.seconds)) {
+    eventually(timeout(1.minute)) {
       assert(zkUtils.getAllBrokersInCluster().nonEmpty, "Broker was not up in 60 seconds")
     }
   }
@@ -414,7 +413,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
       topic: String,
       numPartitions: Int,
       servers: Seq[KafkaServer]) {
-    eventually(timeout(60.seconds), interval(200.millis)) {
+    eventually(timeout(1.minute), interval(200.milliseconds)) {
       try {
         verifyTopicDeletion(topic, numPartitions, servers)
       } catch {
@@ -439,7 +438,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
       case _ =>
         false
     }
-    eventually(timeout(60.seconds)) {
+    eventually(timeout(1.minute)) {
       assert(isPropagated, s"Partition [$topic, $partition] metadata not propagated after timeout")
     }
   }
@@ -448,7 +447,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
    * Wait until the latest offset of the given `TopicPartition` is not less than `offset`.
    */
   def waitUntilOffsetAppears(topicPartition: TopicPartition, offset: Long): Unit = {
-    eventually(timeout(60.seconds)) {
+    eventually(timeout(1.minute)) {
       val currentOffset = getLatestOffsets(Set(topicPartition.topic)).get(topicPartition)
       assert(currentOffset.nonEmpty && currentOffset.get >= offset)
     }
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
index e042ae0c1425..422f53d6c877 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
@@ -25,7 +25,6 @@ import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.util.Random
 
 import org.apache.kafka.clients.consumer._
@@ -153,7 +152,7 @@ class DirectKafkaStreamSuite
       allReceived.addAll(Arrays.asList(rdd.map(r => (r.key, r.value)).collect(): _*))
     }
     ssc.start()
-    eventually(timeout(100000.milliseconds), interval(1000.milliseconds)) {
+    eventually(timeout(100.seconds), interval(1.second)) {
       assert(allReceived.size === expectedTotal,
         "didn't get expected number of messages, messages:\n" +
           allReceived.asScala.mkString("\n"))
@@ -219,7 +218,7 @@ class DirectKafkaStreamSuite
       allReceived.addAll(Arrays.asList(rdd.map(r => (r.key, r.value)).collect(): _*))
     }
     ssc.start()
-    eventually(timeout(100000.milliseconds), interval(1000.milliseconds)) {
+    eventually(timeout(100.seconds), interval(1.second)) {
       assert(allReceived.size === expectedTotal,
         "didn't get expected number of messages, messages:\n" +
           allReceived.asScala.mkString("\n"))
@@ -243,7 +242,7 @@ class DirectKafkaStreamSuite
 
     // Send some initial messages before starting context
     kafkaTestUtils.sendMessages(topic, data)
-    eventually(timeout(10 seconds), interval(20 milliseconds)) {
+    eventually(timeout(10.seconds), interval(20.milliseconds)) {
       assert(getLatestOffset() > 3)
     }
     val offsetBeforeStart = getLatestOffset()
@@ -272,7 +271,7 @@ class DirectKafkaStreamSuite
     ssc.start()
     val newData = Map("b" -> 10)
     kafkaTestUtils.sendMessages(topic, newData)
-    eventually(timeout(10 seconds), interval(50 milliseconds)) {
+    eventually(timeout(10.seconds), interval(50.milliseconds)) {
       collectedData.contains("b")
     }
     assert(!collectedData.contains("a"))
@@ -295,7 +294,7 @@ class DirectKafkaStreamSuite
 
     // Send some initial messages before starting context
     kafkaTestUtils.sendMessages(topic, data)
-    eventually(timeout(10 seconds), interval(20 milliseconds)) {
+    eventually(timeout(10.seconds), interval(20.milliseconds)) {
       assert(getLatestOffset() >= 10)
     }
     val offsetBeforeStart = getLatestOffset()
@@ -326,7 +325,7 @@ class DirectKafkaStreamSuite
     ssc.start()
     val newData = Map("b" -> 10)
     kafkaTestUtils.sendMessages(topic, newData)
-    eventually(timeout(10 seconds), interval(50 milliseconds)) {
+    eventually(timeout(10.seconds), interval(50.milliseconds)) {
       collectedData.contains("b")
     }
     assert(!collectedData.contains("a"))
@@ -375,7 +374,7 @@ class DirectKafkaStreamSuite
       sendData(i)
     }
 
-    eventually(timeout(20 seconds), interval(50 milliseconds)) {
+    eventually(timeout(20.seconds), interval(50.milliseconds)) {
       assert(DirectKafkaStreamSuite.total.get === (1 to 10).sum)
     }
 
@@ -414,7 +413,7 @@ class DirectKafkaStreamSuite
       sendData(i)
     }
 
-    eventually(timeout(20 seconds), interval(50 milliseconds)) {
+    eventually(timeout(20.seconds), interval(50.milliseconds)) {
       assert(DirectKafkaStreamSuite.total.get === (1 to 20).sum)
     }
     ssc.stop()
@@ -437,7 +436,7 @@ class DirectKafkaStreamSuite
     def sendDataAndWaitForReceive(data: Seq[Int]) {
       val strings = data.map { _.toString}
       kafkaTestUtils.sendMessages(topic, strings.map { _ -> 1}.toMap)
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         assert(strings.forall { collectedData.contains })
       }
     }
@@ -612,7 +611,7 @@ class DirectKafkaStreamSuite
       estimator.updateRate(rate)  // Set a new rate.
       // Expect blocks of data equal to "rate", scaled by the interval length in secs.
       val expectedSize = Math.round(rate * batchIntervalMilliseconds * 0.001)
-      eventually(timeout(5.seconds), interval(10 milliseconds)) {
+      eventually(timeout(5.seconds), interval(10.milliseconds)) {
         // Assert that rate estimator values are used to determine maxMessagesPerPartition.
         // Funky "-" in message makes the complete assertion message read better.
         assert(collectedData.asScala.exists(_.size == expectedSize),
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
index 9ea7bfc56064..ac0e6a8429d0 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
@@ -21,7 +21,6 @@ import java.util.concurrent.TimeoutException
 
 import scala.concurrent.{Await, ExecutionContext, Future}
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
 import org.mockito.ArgumentMatchers._
@@ -87,7 +86,7 @@ class KinesisCheckpointerSuite extends TestSuiteBase
     kinesisCheckpointer.setCheckpointer(shardId, checkpointerMock)
     clock.advance(5 * checkpointInterval.milliseconds)
 
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       verify(checkpointerMock, times(1)).checkpoint(seqNum)
       verify(checkpointerMock, times(1)).checkpoint(otherSeqNum)
     }
@@ -108,7 +107,7 @@ class KinesisCheckpointerSuite extends TestSuiteBase
     kinesisCheckpointer.setCheckpointer(shardId, checkpointerMock)
 
     clock.advance(checkpointInterval.milliseconds * 5)
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       verify(checkpointerMock, atMost(1)).checkpoint(anyString())
     }
   }
@@ -129,7 +128,7 @@ class KinesisCheckpointerSuite extends TestSuiteBase
 
     kinesisCheckpointer.setCheckpointer(shardId, checkpointerMock)
     clock.advance(checkpointInterval.milliseconds)
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       verify(checkpointerMock, times(1)).checkpoint(anyString())
     }
     // don't block test thread
@@ -138,12 +137,12 @@ class KinesisCheckpointerSuite extends TestSuiteBase
 
     intercept[TimeoutException] {
       // scalastyle:off awaitready
-      Await.ready(f, 50 millis)
+      Await.ready(f, 50.milliseconds)
       // scalastyle:on awaitready
     }
 
     clock.advance(checkpointInterval.milliseconds / 2)
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       verify(checkpointerMock, times(1)).checkpoint(anyString)
       verify(checkpointerMock, times(1)).checkpoint()
     }
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 7531a9cc400d..52690847418e 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -24,7 +24,7 @@ import com.amazonaws.services.kinesis.clientlibrary.exceptions._
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.ShutdownReason
 import com.amazonaws.services.kinesis.model.Record
-import org.mockito.ArgumentMatchers.{anyListOf, anyString, eq => meq}
+import org.mockito.ArgumentMatchers.{anyList, anyString, eq => meq}
 import org.mockito.Mockito.{never, times, verify, when}
 import org.scalatest.{BeforeAndAfter, Matchers}
 import org.scalatest.mockito.MockitoSugar
@@ -95,7 +95,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     recordProcessor.processRecords(batch, checkpointerMock)
 
     verify(receiverMock, times(1)).isStopped()
-    verify(receiverMock, never).addRecords(anyString, anyListOf(classOf[Record]))
+    verify(receiverMock, never).addRecords(anyString, anyList())
     verify(receiverMock, never).setCheckpointer(anyString, meq(checkpointerMock))
   }
 
@@ -103,7 +103,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     when(receiverMock.isStopped()).thenReturn(false)
     when(receiverMock.getCurrentLimit).thenReturn(Int.MaxValue)
     when(
-      receiverMock.addRecords(anyString, anyListOf(classOf[Record]))
+      receiverMock.addRecords(anyString, anyList())
     ).thenThrow(new RuntimeException())
 
     intercept[RuntimeException] {
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
index 5733721f3544..51ee7fd213de 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.streaming.kinesis
 
 import scala.collection.mutable
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.util.Random
 
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
@@ -198,10 +197,11 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
     ssc.start()
 
     val testData = 1 to 10
-    eventually(timeout(120 seconds), interval(10 second)) {
+    eventually(timeout(2.minutes), interval(10.seconds)) {
       testUtils.pushData(testData, aggregateTestData)
-      assert(collected.synchronized { collected === testData.toSet },
-        "\nData received does not match data sent")
+      collected.synchronized {
+        assert(collected === testData.toSet, "\nData received does not match data sent")
+      }
     }
     ssc.stop(stopSparkContext = false)
   }
@@ -217,7 +217,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       .initialPosition(new Latest())
       .checkpointInterval(Seconds(10))
       .storageLevel(StorageLevel.MEMORY_ONLY)
-      .buildWithMessageHandler(addFive(_))
+      .buildWithMessageHandler(addFive)
 
     stream shouldBe a [ReceiverInputDStream[_]]
 
@@ -231,11 +231,12 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
     ssc.start()
 
     val testData = 1 to 10
-    eventually(timeout(120 seconds), interval(10 second)) {
+    eventually(timeout(2.minutes), interval(10.seconds)) {
       testUtils.pushData(testData, aggregateTestData)
       val modData = testData.map(_ + 5)
-      assert(collected.synchronized { collected === modData.toSet },
-        "\nData received does not match data sent")
+      collected.synchronized {
+        assert(collected === modData.toSet, "\nData received does not match data sent")
+      }
     }
     ssc.stop(stopSparkContext = false)
   }
@@ -316,10 +317,11 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       val testData2 = 11 to 20
       val testData3 = 21 to 30
 
-      eventually(timeout(60 seconds), interval(10 second)) {
+      eventually(timeout(1.minute), interval(10.seconds)) {
         localTestUtils.pushData(testData1, aggregateTestData)
-        assert(collected.synchronized { collected === testData1.toSet },
-          "\nData received does not match data sent")
+        collected.synchronized {
+          assert(collected === testData1.toSet, "\nData received does not match data sent")
+        }
       }
 
       val shardToSplit = localTestUtils.getShards().head
@@ -332,10 +334,12 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       assert(splitCloseShards.size == 1)
       assert(splitOpenShards.size == 2)
 
-      eventually(timeout(60 seconds), interval(10 second)) {
+      eventually(timeout(1.minute), interval(10.seconds)) {
         localTestUtils.pushData(testData2, aggregateTestData)
-        assert(collected.synchronized { collected === (testData1 ++ testData2).toSet },
-          "\nData received does not match data sent after splitting a shard")
+        collected.synchronized {
+          assert(collected === (testData1 ++ testData2).toSet,
+            "\nData received does not match data sent after splitting a shard")
+        }
       }
 
       val Seq(shardToMerge, adjShard) = splitOpenShards
@@ -348,10 +352,12 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       assert(mergedCloseShards.size == 3)
       assert(mergedOpenShards.size == 1)
 
-      eventually(timeout(60 seconds), interval(10 second)) {
+      eventually(timeout(1.minute), interval(10.seconds)) {
         localTestUtils.pushData(testData3, aggregateTestData)
-        assert(collected.synchronized { collected === (testData1 ++ testData2 ++ testData3).toSet },
-          "\nData received does not match data sent after merging shards")
+        collected.synchronized {
+          assert(collected === (testData1 ++ testData2 ++ testData3).toSet,
+            "\nData received does not match data sent after merging shards")
+        }
       }
     } finally {
       ssc.stop(stopSparkContext = false)
@@ -399,7 +405,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
     // Run until there are at least 10 batches with some data in them
     // If this times out because numBatchesWithData is empty, then its likely that foreachRDD
     // function failed with exceptions, and nothing got added to `collectedData`
-    eventually(timeout(2 minutes), interval(1 seconds)) {
+    eventually(timeout(2.minutes), interval(1.second)) {
       testUtils.pushData(1 to 5, aggregateTestData)
       assert(isCheckpointPresent && numBatchesWithData > 10)
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
index 4fe69b6afb46..e2ee7c05ab39 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.ml
 
 import scala.collection.mutable
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.apache.hadoop.fs.Path
 import org.mockito.ArgumentMatchers.{any, eq => meq}
@@ -142,7 +141,7 @@ class MLEventsSuite
 
     val expected = Seq(
       event0, event1, event2, event3, event4, event5, event6, event7, event8, event9)
-    eventually(timeout(10 seconds), interval(1 second)) {
+    eventually(timeout(10.seconds), interval(1.second)) {
       assert(events === expected)
     }
     // Test if they can be ser/de via JSON protocol.
@@ -200,7 +199,7 @@ class MLEventsSuite
     event7.output = output
 
     val expected = Seq(event0, event1, event2, event3, event4, event5, event6, event7)
-    eventually(timeout(10 seconds), interval(1 second)) {
+    eventually(timeout(10.seconds), interval(1.second)) {
       assert(events === expected)
     }
     // Test if they can be ser/de via JSON protocol.
@@ -221,7 +220,7 @@ class MLEventsSuite
       val pipelineWriter = newPipeline.write
       assert(events.isEmpty)
       pipelineWriter.save(path)
-      eventually(timeout(10 seconds), interval(1 second)) {
+      eventually(timeout(10.seconds), interval(1.second)) {
         events.foreach {
           case e: SaveInstanceStart if e.writer.isInstanceOf[DefaultParamsWriter] =>
             assert(e.path.endsWith("writableStage"))
@@ -245,18 +244,18 @@ class MLEventsSuite
       val pipelineReader = Pipeline.read
       assert(events.isEmpty)
       pipelineReader.load(path)
-      eventually(timeout(10 seconds), interval(1 second)) {
+      eventually(timeout(10.seconds), interval(1.second)) {
         events.foreach {
-          case e: LoadInstanceStart[PipelineStage]
-              if e.reader.isInstanceOf[DefaultParamsReader[PipelineStage]] =>
+          case e: LoadInstanceStart[_]
+              if e.reader.isInstanceOf[DefaultParamsReader[_]] =>
             assert(e.path.endsWith("writableStage"))
-          case e: LoadInstanceEnd[PipelineStage]
-              if e.reader.isInstanceOf[DefaultParamsReader[PipelineStage]] =>
+          case e: LoadInstanceEnd[_]
+              if e.reader.isInstanceOf[DefaultParamsReader[_]] =>
             assert(e.instance.isInstanceOf[PipelineStage])
-          case e: LoadInstanceStart[Pipeline] =>
+          case e: LoadInstanceStart[_] =>
             assert(e.reader === pipelineReader)
-          case e: LoadInstanceEnd[Pipeline] =>
-            assert(e.instance.uid === newPipeline.uid)
+          case e: LoadInstanceEnd[_] =>
+            assert(e.instance.asInstanceOf[Pipeline].uid === newPipeline.uid)
           case e => fail(s"Unexpected event thrown: $e")
         }
       }
@@ -280,7 +279,7 @@ class MLEventsSuite
       val pipelineWriter = pipelineModel.write
       assert(events.isEmpty)
       pipelineWriter.save(path)
-      eventually(timeout(10 seconds), interval(1 second)) {
+      eventually(timeout(10.seconds), interval(1.second)) {
         events.foreach {
           case e: SaveInstanceStart if e.writer.isInstanceOf[DefaultParamsWriter] =>
             assert(e.path.endsWith("writableStage"))
@@ -304,18 +303,18 @@ class MLEventsSuite
       val pipelineModelReader = PipelineModel.read
       assert(events.isEmpty)
       pipelineModelReader.load(path)
-      eventually(timeout(10 seconds), interval(1 second)) {
+      eventually(timeout(10.seconds), interval(1.second)) {
         events.foreach {
-          case e: LoadInstanceStart[PipelineStage]
-            if e.reader.isInstanceOf[DefaultParamsReader[PipelineStage]] =>
+          case e: LoadInstanceStart[_]
+            if e.reader.isInstanceOf[DefaultParamsReader[_]] =>
             assert(e.path.endsWith("writableStage"))
-          case e: LoadInstanceEnd[PipelineStage]
-            if e.reader.isInstanceOf[DefaultParamsReader[PipelineStage]] =>
+          case e: LoadInstanceEnd[_]
+            if e.reader.isInstanceOf[DefaultParamsReader[_]] =>
             assert(e.instance.isInstanceOf[PipelineStage])
-          case e: LoadInstanceStart[PipelineModel] =>
+          case e: LoadInstanceStart[_] =>
             assert(e.reader === pipelineModelReader)
-          case e: LoadInstanceEnd[PipelineModel] =>
-            assert(e.instance.uid === pipelineModel.uid)
+          case e: LoadInstanceEnd[_] =>
+            assert(e.instance.asInstanceOf[PipelineModel].uid === pipelineModel.uid)
           case e => fail(s"Unexpected event thrown: $e")
         }
       }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index b9aeb1c422be..384a5f4e46b7 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -24,10 +24,8 @@ import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import com.google.common.io.Files
-import org.apache.commons.lang3.SerializationUtils
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.server.MiniYARNCluster
 import org.scalatest.{BeforeAndAfterAll, Matchers}
@@ -169,7 +167,7 @@ abstract class BaseYarnClusterSuite
 
     val handle = launcher.startApplication()
     try {
-      eventually(timeout(2 minutes), interval(1 second)) {
+      eventually(timeout(2.minutes), interval(1.second)) {
         assert(handle.getState().isFinal())
       }
     } finally {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index bb75952a82e9..b072202b7ce3 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -18,18 +18,14 @@
 package org.apache.spark.deploy.yarn
 
 import java.io.File
-import java.net.URL
 import java.nio.charset.StandardCharsets
 import java.util.{HashMap => JHashMap}
 
 import scala.collection.mutable
 import scala.concurrent.duration._
 import scala.io.Source
-import scala.language.postfixOps
 
 import com.google.common.io.{ByteStreams, Files}
-import org.apache.hadoop.HadoopIllegalArgumentException
-import org.apache.hadoop.yarn.api.ApplicationConstants
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.util.ConverterUtils
 import org.scalatest.Matchers
@@ -209,7 +205,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       .startApplication()
 
     try {
-      eventually(timeout(30 seconds), interval(100 millis)) {
+      eventually(timeout(30.seconds), interval(100.milliseconds)) {
         handle.getState() should be (SparkAppHandle.State.RUNNING)
       }
 
@@ -217,7 +213,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       handle.getAppId() should startWith ("application_")
       handle.stop()
 
-      eventually(timeout(30 seconds), interval(100 millis)) {
+      eventually(timeout(30.seconds), interval(100.milliseconds)) {
         handle.getState() should be (SparkAppHandle.State.KILLED)
       }
     } finally {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
index 756e6d437b48..381a93580f96 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
@@ -24,7 +24,6 @@ import java.util.EnumSet
 
 import scala.annotation.tailrec
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.service.ServiceStateException
@@ -327,10 +326,10 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
 
     recoveryPath.toString should be (new Path(execStateFile2.getParentFile.toURI).toString)
     recoveryPath.toString should be (new Path(secretsFile2.getParentFile.toURI).toString)
-    eventually(timeout(10 seconds), interval(5 millis)) {
+    eventually(timeout(10.seconds), interval(5.milliseconds)) {
       assert(!execStateFile.exists())
     }
-    eventually(timeout(10 seconds), interval(5 millis)) {
+    eventually(timeout(10.seconds), interval(5.milliseconds)) {
       assert(!secretsFile.exists())
     }
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
index 9b0eb610a206..64663d5db4be 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
@@ -17,17 +17,13 @@
 
 package test.org.apache.spark.sql.sources.v2;
 
-import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
-import org.apache.spark.sql.catalyst.InternalRow;
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
 import org.apache.spark.sql.sources.v2.SupportsRead;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.TableCapability;
-import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
 
 abstract class JavaSimpleBatchTable implements Table, SupportsRead {
@@ -52,53 +48,3 @@ public Set<TableCapability> capabilities() {
   }
 }
 
-abstract class JavaSimpleScanBuilder implements ScanBuilder, Scan, Batch {
-
-  @Override
-  public Scan build() {
-    return this;
-  }
-
-  @Override
-  public Batch toBatch() {
-    return this;
-  }
-
-  @Override
-  public StructType readSchema() {
-    return new StructType().add("i", "int").add("j", "int");
-  }
-
-  @Override
-  public PartitionReaderFactory createReaderFactory() {
-    return new JavaSimpleReaderFactory();
-  }
-}
-
-class JavaSimpleReaderFactory implements PartitionReaderFactory {
-
-  @Override
-  public PartitionReader<InternalRow> createReader(InputPartition partition) {
-    JavaRangeInputPartition p = (JavaRangeInputPartition) partition;
-    return new PartitionReader<InternalRow>() {
-      private int current = p.start - 1;
-
-      @Override
-      public boolean next() throws IOException {
-        current += 1;
-        return current < p.end;
-      }
-
-      @Override
-      public InternalRow get() {
-        return new GenericInternalRow(new Object[] {current, -current});
-      }
-
-      @Override
-      public void close() throws IOException {
-
-      }
-    };
-  }
-}
-
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java
new file mode 100644
index 000000000000..740279033c41
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.sources.v2.reader.InputPartition;
+import org.apache.spark.sql.sources.v2.reader.PartitionReader;
+import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
+
+class JavaSimpleReaderFactory implements PartitionReaderFactory {
+
+  @Override
+  public PartitionReader<InternalRow> createReader(InputPartition partition) {
+    JavaRangeInputPartition p = (JavaRangeInputPartition) partition;
+    return new PartitionReader<InternalRow>() {
+      private int current = p.start - 1;
+
+      @Override
+      public boolean next() {
+        current += 1;
+        return current < p.end;
+      }
+
+      @Override
+      public InternalRow get() {
+        return new GenericInternalRow(new Object[] {current, -current});
+      }
+
+      @Override
+      public void close() {
+      }
+    };
+  }
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java
new file mode 100644
index 000000000000..217e66950d14
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.sql.sources.v2.reader.Batch;
+import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
+import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.types.StructType;
+
+abstract class JavaSimpleScanBuilder implements ScanBuilder, Scan, Batch {
+
+  @Override
+  public Scan build() {
+    return this;
+  }
+
+  @Override
+  public Batch toBatch() {
+    return this;
+  }
+
+  @Override
+  public StructType readSchema() {
+    return new StructType().add("i", "int").add("j", "int");
+  }
+
+  @Override
+  public PartitionReaderFactory createReaderFactory() {
+    return new JavaSimpleReaderFactory();
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 92157d8ad49e..76350adb095b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql
 
 import scala.collection.mutable.HashSet
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.apache.spark.CleanerListener
 import org.apache.spark.executor.DataReadMethod._
@@ -251,7 +250,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     sql("UNCACHE TABLE testData")
     assert(!spark.catalog.isCached("testData"), "Table 'testData' should not be cached")
 
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
     }
   }
@@ -267,7 +266,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
         "Eagerly cached in-memory table should have already been materialized")
 
       uncacheTable("testCacheTable")
-      eventually(timeout(10 seconds)) {
+      eventually(timeout(10.seconds)) {
         assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
       }
     }
@@ -284,7 +283,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
         "Eagerly cached in-memory table should have already been materialized")
 
       uncacheTable("testCacheTable")
-      eventually(timeout(10 seconds)) {
+      eventually(timeout(10.seconds)) {
         assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
       }
     }
@@ -305,7 +304,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       "Lazily cached in-memory table should have been materialized")
 
     uncacheTable("testData")
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
     }
   }
@@ -446,7 +445,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
 
     System.gc()
 
-    eventually(timeout(10 seconds)) {
+    eventually(timeout(10.seconds)) {
       assert(toBeCleanedAccIds.synchronized { toBeCleanedAccIds.isEmpty },
         "batchStats accumulators should be cleared after GC when uncacheTable")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
index b3b2cdc3f162..b828b2302515 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
@@ -136,7 +136,7 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
     assertCached(df2)
 
     // udf has been evaluated during caching, and thus should not be re-evaluated here
-    failAfter(2 seconds) {
+    failAfter(2.seconds) {
       df2.collect()
     }
 
@@ -197,7 +197,7 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
     val df4 = df1.groupBy('a).agg(sum('b)).agg(sum("sum(b)"))
     assertCached(df4)
     // reuse loaded cache
-    failAfter(3 seconds) {
+    failAfter(3.seconds) {
       checkDataset(df4, Row(10))
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index a5ba4f9633e7..22f84376ecb5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -308,7 +308,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
     val offsets = scala.collection.mutable.ListBuffer[Int]()
     val readerFactory = stream.createContinuousReaderFactory()
     import org.scalatest.time.SpanSugar._
-    failAfter(5 seconds) {
+    failAfter(5.seconds) {
       // inject rows, read and check the data and offsets
       for (i <- 0 until numRecords) {
         serverThread.enqueue(i.toString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
index 3dd3210cf520..2a1e7d615e5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
@@ -41,7 +41,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
       assert(coordinatorRef.getLocation(id) === None)
 
       coordinatorRef.reportActiveInstance(id, "hostX", "exec1")
-      eventually(timeout(5 seconds)) {
+      eventually(timeout(5.seconds)) {
         assert(coordinatorRef.verifyIfInstanceActive(id, "exec1"))
         assert(
           coordinatorRef.getLocation(id) ===
@@ -50,7 +50,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
 
       coordinatorRef.reportActiveInstance(id, "hostX", "exec2")
 
-      eventually(timeout(5 seconds)) {
+      eventually(timeout(5.seconds)) {
         assert(coordinatorRef.verifyIfInstanceActive(id, "exec1") === false)
         assert(coordinatorRef.verifyIfInstanceActive(id, "exec2"))
 
@@ -75,7 +75,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
       coordinatorRef.reportActiveInstance(id2, host, exec)
       coordinatorRef.reportActiveInstance(id3, host, exec)
 
-      eventually(timeout(5 seconds)) {
+      eventually(timeout(5.seconds)) {
         assert(coordinatorRef.verifyIfInstanceActive(id1, exec))
         assert(coordinatorRef.verifyIfInstanceActive(id2, exec))
         assert(coordinatorRef.verifyIfInstanceActive(id3, exec))
@@ -107,7 +107,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
 
       coordRef1.reportActiveInstance(id, "hostX", "exec1")
 
-      eventually(timeout(5 seconds)) {
+      eventually(timeout(5.seconds)) {
         assert(coordRef2.verifyIfInstanceActive(id, "exec1"))
         assert(
           coordRef2.getLocation(id) ===
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 12bc68cfb14d..af4369de800e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -416,7 +416,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       }
     }
 
-    val timeoutDuration = 60 seconds
+    val timeoutDuration = 1.minute
 
     quietly {
       withSpark(new SparkContext(conf)) { sc =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index 13b8866c22b8..b421809f62d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -93,7 +93,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
       testAwaitAnyTermination(ExpectBlocked)
 
       // Stop a query asynchronously and see if it is reported through awaitAnyTermination
-      val q1 = stopRandomQueryAsync(stopAfter = 100 milliseconds, withError = false)
+      val q1 = stopRandomQueryAsync(stopAfter = 100.milliseconds, withError = false)
       testAwaitAnyTermination(ExpectNotBlocked)
       require(!q1.isActive) // should be inactive by the time the prev awaitAnyTerm returned
 
@@ -106,7 +106,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
 
       // Terminate a query asynchronously with exception and see awaitAnyTermination throws
       // the exception
-      val q2 = stopRandomQueryAsync(100 milliseconds, withError = true)
+      val q2 = stopRandomQueryAsync(100.milliseconds, withError = true)
       testAwaitAnyTermination(ExpectException[SparkException])
       require(!q2.isActive) // should be inactive by the time the prev awaitAnyTerm returned
 
@@ -119,10 +119,10 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
 
       // Terminate multiple queries, one with failure and see whether awaitAnyTermination throws
       // the exception
-      val q3 = stopRandomQueryAsync(10 milliseconds, withError = false)
+      val q3 = stopRandomQueryAsync(10.milliseconds, withError = false)
       testAwaitAnyTermination(ExpectNotBlocked)
       require(!q3.isActive)
-      val q4 = stopRandomQueryAsync(10 milliseconds, withError = true)
+      val q4 = stopRandomQueryAsync(10.milliseconds, withError = true)
       eventually(Timeout(streamingTimeout)) { require(!q4.isActive) }
       // After q4 terminates with exception, awaitAnyTerm should start throwing exception
       testAwaitAnyTermination(ExpectException[SparkException])
@@ -138,81 +138,81 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
       // awaitAnyTermination should be blocking or non-blocking depending on timeout values
       testAwaitAnyTermination(
         ExpectBlocked,
-        awaitTimeout = 4 seconds,
+        awaitTimeout = 4.seconds,
         expectedReturnedValue = false,
-        testBehaviorFor = 2 seconds)
+        testBehaviorFor = 2.seconds)
 
       testAwaitAnyTermination(
         ExpectNotBlocked,
-        awaitTimeout = 50 milliseconds,
+        awaitTimeout = 50.milliseconds,
         expectedReturnedValue = false,
-        testBehaviorFor = 1 second)
+        testBehaviorFor = 1.second)
 
       // Stop a query asynchronously within timeout and awaitAnyTerm should be unblocked
-      val q1 = stopRandomQueryAsync(stopAfter = 100 milliseconds, withError = false)
+      val q1 = stopRandomQueryAsync(stopAfter = 100.milliseconds, withError = false)
       testAwaitAnyTermination(
         ExpectNotBlocked,
-        awaitTimeout = 2 seconds,
+        awaitTimeout = 2.seconds,
         expectedReturnedValue = true,
-        testBehaviorFor = 4 seconds)
+        testBehaviorFor = 4.seconds)
       require(!q1.isActive) // should be inactive by the time the prev awaitAnyTerm returned
 
       // All subsequent calls to awaitAnyTermination should be non-blocking even if timeout is high
       testAwaitAnyTermination(
-        ExpectNotBlocked, awaitTimeout = 4 seconds, expectedReturnedValue = true)
+        ExpectNotBlocked, awaitTimeout = 4.seconds, expectedReturnedValue = true)
 
       // Resetting termination should make awaitAnyTermination() blocking again
       spark.streams.resetTerminated()
       testAwaitAnyTermination(
         ExpectBlocked,
-        awaitTimeout = 4 seconds,
+        awaitTimeout = 4.seconds,
         expectedReturnedValue = false,
-        testBehaviorFor = 1 second)
+        testBehaviorFor = 1.second)
 
       // Terminate a query asynchronously with exception within timeout, awaitAnyTermination should
       // throws the exception
-      val q2 = stopRandomQueryAsync(100 milliseconds, withError = true)
+      val q2 = stopRandomQueryAsync(100.milliseconds, withError = true)
       testAwaitAnyTermination(
         ExpectException[SparkException],
-        awaitTimeout = 4 seconds,
-        testBehaviorFor = 6 seconds)
+        awaitTimeout = 4.seconds,
+        testBehaviorFor = 6.seconds)
       require(!q2.isActive) // should be inactive by the time the prev awaitAnyTerm returned
 
       // All subsequent calls to awaitAnyTermination should throw the exception
       testAwaitAnyTermination(
         ExpectException[SparkException],
-        awaitTimeout = 2 seconds,
-        testBehaviorFor = 4 seconds)
+        awaitTimeout = 2.seconds,
+        testBehaviorFor = 4.seconds)
 
       // Terminate a query asynchronously outside the timeout, awaitAnyTerm should be blocked
       spark.streams.resetTerminated()
-      val q3 = stopRandomQueryAsync(2 seconds, withError = true)
+      val q3 = stopRandomQueryAsync(2.seconds, withError = true)
       testAwaitAnyTermination(
         ExpectNotBlocked,
-        awaitTimeout = 100 milliseconds,
+        awaitTimeout = 100.milliseconds,
         expectedReturnedValue = false,
-        testBehaviorFor = 4 seconds)
+        testBehaviorFor = 4.seconds)
 
       // After that query is stopped, awaitAnyTerm should throw exception
       eventually(Timeout(streamingTimeout)) { require(!q3.isActive) } // wait for query to stop
       testAwaitAnyTermination(
         ExpectException[SparkException],
-        awaitTimeout = 100 milliseconds,
-        testBehaviorFor = 4 seconds)
+        awaitTimeout = 100.milliseconds,
+        testBehaviorFor = 4.seconds)
 
 
       // Terminate multiple queries, one with failure and see whether awaitAnyTermination throws
       // the exception
       spark.streams.resetTerminated()
 
-      val q4 = stopRandomQueryAsync(10 milliseconds, withError = false)
+      val q4 = stopRandomQueryAsync(10.milliseconds, withError = false)
       testAwaitAnyTermination(
-        ExpectNotBlocked, awaitTimeout = 2 seconds, expectedReturnedValue = true)
+        ExpectNotBlocked, awaitTimeout = 2.seconds, expectedReturnedValue = true)
       require(!q4.isActive)
-      val q5 = stopRandomQueryAsync(10 milliseconds, withError = true)
+      val q5 = stopRandomQueryAsync(10.milliseconds, withError = true)
       eventually(Timeout(streamingTimeout)) { require(!q5.isActive) }
       // After q5 terminates with exception, awaitAnyTerm should start throwing exception
-      testAwaitAnyTermination(ExpectException[SparkException], awaitTimeout = 2 seconds)
+      testAwaitAnyTermination(ExpectException[SparkException], awaitTimeout = 2.seconds)
     }
   }
 
@@ -276,7 +276,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
       expectedBehavior: ExpectedBehavior,
       expectedReturnedValue: Boolean = false,
       awaitTimeout: Span = null,
-      testBehaviorFor: Span = 4 seconds
+      testBehaviorFor: Span = 4.seconds
     ): Unit = {
 
     def awaitTermFunc(): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 2f460b044b23..e784d318b4ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.streaming
 import java.util.UUID
 
 import scala.collection.JavaConverters._
-import scala.language.postfixOps
 
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
@@ -203,7 +202,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
         val progress = query.lastProgress
         assert(progress.stateOperators.length > 0)
         // Should emit new progresses every 10 ms, but we could be facing a slow Jenkins
-        eventually(timeout(1 minute)) {
+        eventually(timeout(1.minute)) {
           val nextProgress = query.lastProgress
           assert(nextProgress.timestamp !== progress.timestamp)
           assert(nextProgress.numInputRows === 0)
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
index fef18f147b05..47cf4f104d20 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
@@ -86,12 +86,12 @@ class UISeleniumSuite
 
       queries.foreach(statement.execute)
 
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to baseURL
         find(cssSelector("""ul li a[href*="sql"]""")) should not be None
       }
 
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to (baseURL + "/sql")
         find(id("sessionstat")) should not be None
         find(id("sqlstat")) should not be None
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index 80c07958b41f..f8ddabd62854 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.receiver
 
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration._
-import scala.language.{existentials, postfixOps}
+import scala.language.existentials
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index f0161e1465c2..21f3bbe40bfa 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -25,7 +25,6 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.parallel.ExecutionContextTaskSupport
 import scala.concurrent.{Await, ExecutionContext, Future}
-import scala.language.postfixOps
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -142,7 +141,7 @@ private[streaming] class FileBasedWriteAheadLog(
       CompletionIterator[ByteBuffer, Iterator[ByteBuffer]](reader, () => reader.close())
     }
     if (!closeFileAfterWrite) {
-      logFilesToRead.iterator.map(readFile).flatten.asJava
+      logFilesToRead.iterator.flatMap(readFile).asJava
     } else {
       // For performance gains, it makes sense to parallelize the recovery if
       // closeFileAfterWrite = true
@@ -190,7 +189,7 @@ private[streaming] class FileBasedWriteAheadLog(
           if (waitForCompletion) {
             import scala.concurrent.duration._
             // scalastyle:off awaitready
-            Await.ready(f, 1 second)
+            Await.ready(f, 1.second)
             // scalastyle:on awaitready
           }
         } catch {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 19da1813b722..55fdd4c82ac7 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -162,12 +162,12 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
 
       val outputStream = getTestOutputStream[V](ssc.graph.getOutputStreams())
 
-      eventually(timeout(10 seconds)) {
+      eventually(timeout(10.seconds)) {
         ssc.awaitTerminationOrTimeout(10)
         assert(batchCounter.getLastCompletedBatchTime === targetBatchTime)
       }
 
-      eventually(timeout(10 seconds)) {
+      eventually(timeout(10.seconds)) {
         val checkpointFilesOfLatestTime = Checkpoint.getCheckpointFiles(checkpointDir).filter {
           _.getName.contains(clock.getTimeMillis.toString)
         }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index f0960d00a75f..15060614983a 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -22,7 +22,6 @@ import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.Configuration
@@ -199,7 +198,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
 
       val cleanupThreshTime = 3000L
       handler.cleanupOldBlocks(cleanupThreshTime)
-      eventually(timeout(10000 millis), interval(10 millis)) {
+      eventually(timeout(10.seconds), interval(10.milliseconds)) {
         getWriteAheadLogFiles().size should be < preCleanupLogFiles.size
       }
     }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
index 8800f1c91b20..0b15f00eba49 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -22,7 +22,7 @@ import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
-import scala.language.{implicitConversions, postfixOps}
+import scala.language.implicitConversions
 import scala.util.Random
 
 import org.apache.hadoop.conf.Configuration
@@ -276,7 +276,7 @@ class ReceivedBlockTrackerSuite
     getWrittenLogData(getWriteAheadLogFiles().last) should contain(createBatchCleanup(batchTime1))
 
     // Verify that at least one log file gets deleted
-    eventually(timeout(10 seconds), interval(10 millisecond)) {
+    eventually(timeout(10.seconds), interval(10.millisecond)) {
       getWriteAheadLogFiles() should not contain oldestLogFile
     }
     printLogFiles("After clean")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
index 130e98199170..48aa9e599504 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -64,7 +64,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
 
     // Verify that the receiver
     intercept[Exception] {
-      failAfter(200 millis) {
+      failAfter(200.milliseconds) {
         executingThread.join()
       }
     }
@@ -78,7 +78,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     assert(receiver.isStarted)
     assert(!receiver.isStopped())
     assert(receiver.otherThread.isAlive)
-    eventually(timeout(100 millis), interval(10 millis)) {
+    eventually(timeout(100.milliseconds), interval(10.milliseconds)) {
       assert(receiver.receiving)
     }
 
@@ -107,12 +107,12 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
 
     // Verify restarting actually stops and starts the receiver
     receiver.restart("restarting", null, 600)
-    eventually(timeout(300 millis), interval(10 millis)) {
+    eventually(timeout(300.milliseconds), interval(10.milliseconds)) {
       // receiver will be stopped async
       assert(receiver.isStopped)
       assert(receiver.onStopCalled)
     }
-    eventually(timeout(1000 millis), interval(10 millis)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       // receiver will be started async
       assert(receiver.onStartCalled)
       assert(executor.isReceiverStarted)
@@ -122,7 +122,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     }
 
     // Verify that stopping actually stops the thread
-    failAfter(100 millis) {
+    failAfter(100.milliseconds) {
       receiver.stop("test")
       assert(receiver.isStopped)
       assert(!receiver.otherThread.isAlive)
@@ -159,7 +159,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
 
     val recordedBlocks = blockGeneratorListener.arrayBuffers
     val recordedData = recordedBlocks.flatten
-    assert(blockGeneratorListener.arrayBuffers.size > 0, "No blocks received")
+    assert(blockGeneratorListener.arrayBuffers.nonEmpty, "No blocks received")
     assert(recordedData.toSet === generatedData.toSet, "Received data not same")
 
     // recordedData size should be close to the expected rate; use an error margin proportional to
@@ -245,15 +245,15 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
 
       // Run until sufficient WAL files have been generated and
       // the first WAL files has been deleted
-      eventually(timeout(20 seconds), interval(batchDuration.milliseconds millis)) {
+      eventually(timeout(20.seconds), interval(batchDuration.milliseconds.millis)) {
         val (logFiles1, logFiles2) = getBothCurrentLogFiles()
         allLogFiles1 ++= logFiles1
         allLogFiles2 ++= logFiles2
-        if (allLogFiles1.size > 0) {
-          assert(!logFiles1.contains(allLogFiles1.toSeq.sorted.head))
+        if (allLogFiles1.nonEmpty) {
+          assert(!logFiles1.contains(allLogFiles1.toSeq.min))
         }
-        if (allLogFiles2.size > 0) {
-          assert(!logFiles2.contains(allLogFiles2.toSeq.sorted.head))
+        if (allLogFiles2.nonEmpty) {
+          assert(!logFiles2.contains(allLogFiles2.toSeq.min))
         }
         assert(allLogFiles1.size >= 7)
         assert(allLogFiles2.size >= 7)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 8d0721068ca2..5cda6f992545 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -211,7 +211,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // Local props set after start should be ignored
     ssc.sc.setLocalProperty("customPropKey", "value2")
 
-    eventually(timeout(10 seconds), interval(10 milliseconds)) {
+    eventually(timeout(10.seconds), interval(10.milliseconds)) {
       assert(allFound)
     }
 
@@ -342,7 +342,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     input.foreachRDD(_ => {})
     ssc.start()
     // Call `ssc.stop` at once so that it's possible that the receiver will miss "StopReceiver"
-    failAfter(30000 millis) {
+    failAfter(30.seconds) {
       ssc.stop(stopSparkContext = true, stopGracefully = true)
     }
   }
@@ -398,18 +398,18 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     inputStream.map(x => x).register()
 
     // test whether start() blocks indefinitely or not
-    failAfter(2000 millis) {
+    failAfter(2.seconds) {
       ssc.start()
     }
 
     // test whether awaitTermination() exits after give amount of time
-    failAfter(1000 millis) {
+    failAfter(1.second) {
       ssc.awaitTerminationOrTimeout(500)
     }
 
     // test whether awaitTermination() does not exit if not time is given
     val exception = intercept[Exception] {
-      failAfter(1000 millis) {
+      failAfter(1.second) {
         ssc.awaitTermination()
         throw new Exception("Did not wait for stop")
       }
@@ -418,7 +418,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
 
     var t: Thread = null
     // test whether wait exits if context is stopped
-    failAfter(10000 millis) { // 10 seconds because spark takes a long time to shutdown
+    failAfter(10.seconds) { // 10 seconds because spark takes a long time to shutdown
       t = new Thread() {
         override def run() {
           Thread.sleep(500)
@@ -439,7 +439,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     val inputStream = addInputStream(ssc)
     inputStream.map(x => x).register()
 
-    failAfter(10000 millis) {
+    failAfter(10.seconds) {
       ssc.start()
       ssc.stop()
       ssc.awaitTermination()
@@ -479,13 +479,13 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     ssc.start()
 
     // test whether awaitTerminationOrTimeout() return false after give amount of time
-    failAfter(1000 millis) {
+    failAfter(1.second) {
       assert(ssc.awaitTerminationOrTimeout(500) === false)
     }
 
     var t: Thread = null
     // test whether awaitTerminationOrTimeout() return true if context is stopped
-    failAfter(10000 millis) { // 10 seconds because spark takes a long time to shutdown
+    failAfter(10.seconds) { // 10 seconds because spark takes a long time to shutdown
       t = new Thread() {
         override def run() {
           Thread.sleep(500)
@@ -528,7 +528,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
 
     // getOrCreate should create new context with empty path
     testGetOrCreate {
-      ssc = StreamingContext.getOrCreate(emptyPath, creatingFunction _)
+      ssc = StreamingContext.getOrCreate(emptyPath, () => creatingFunction())
       assert(ssc != null, "no context created")
       assert(newContextCreated, "new context not created")
     }
@@ -537,19 +537,19 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
 
     // getOrCreate should throw exception with fake checkpoint file and createOnError = false
     intercept[Exception] {
-      ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, creatingFunction _)
+      ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, () => creatingFunction())
     }
 
     // getOrCreate should throw exception with fake checkpoint file
     intercept[Exception] {
       ssc = StreamingContext.getOrCreate(
-        corruptedCheckpointPath, creatingFunction _, createOnError = false)
+        corruptedCheckpointPath, () => creatingFunction(), createOnError = false)
     }
 
     // getOrCreate should create new context with fake checkpoint file and createOnError = true
     testGetOrCreate {
       ssc = StreamingContext.getOrCreate(
-        corruptedCheckpointPath, creatingFunction _, createOnError = true)
+        corruptedCheckpointPath, () => creatingFunction(), createOnError = true)
       assert(ssc != null, "no context created")
       assert(newContextCreated, "new context not created")
     }
@@ -558,7 +558,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
 
     // getOrCreate should recover context with checkpoint path, and recover old configuration
     testGetOrCreate {
-      ssc = StreamingContext.getOrCreate(checkpointPath, creatingFunction _)
+      ssc = StreamingContext.getOrCreate(checkpointPath, () => creatingFunction())
       assert(ssc != null, "no context created")
       assert(!newContextCreated, "old context not recovered")
       assert(ssc.conf.get("someKey") === "someValue", "checkpointed config not recovered")
@@ -567,7 +567,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // getOrCreate should recover StreamingContext with existing SparkContext
     testGetOrCreate {
       sc = new SparkContext(conf)
-      ssc = StreamingContext.getOrCreate(checkpointPath, creatingFunction _)
+      ssc = StreamingContext.getOrCreate(checkpointPath, () => creatingFunction())
       assert(ssc != null, "no context created")
       assert(!newContextCreated, "old context not recovered")
       assert(!ssc.conf.contains("someKey"), "checkpointed config unexpectedly recovered")
@@ -669,41 +669,41 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
         conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock"), batchDuration)
       addInputStream(ssc).register()
       ssc.start()
-      val returnedSsc = StreamingContext.getActiveOrCreate(checkpointPath, creatingFunction _)
+      val returnedSsc = StreamingContext.getActiveOrCreate(checkpointPath, () => creatingFunction())
       assert(!newContextCreated, "new context created instead of returning")
       assert(returnedSsc.eq(ssc), "returned context is not the activated context")
     }
 
     // getActiveOrCreate should create new context with empty path
     testGetActiveOrCreate {
-      ssc = StreamingContext.getActiveOrCreate(emptyPath, creatingFunction _)
+      ssc = StreamingContext.getActiveOrCreate(emptyPath, () => creatingFunction())
       assert(ssc != null, "no context created")
       assert(newContextCreated, "new context not created")
     }
 
     // getActiveOrCreate should throw exception with fake checkpoint file and createOnError = false
     intercept[Exception] {
-      ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, creatingFunction _)
+      ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, () => creatingFunction())
     }
 
     // getActiveOrCreate should throw exception with fake checkpoint file
     intercept[Exception] {
       ssc = StreamingContext.getActiveOrCreate(
-        corruptedCheckpointPath, creatingFunction _, createOnError = false)
+        corruptedCheckpointPath, () => creatingFunction(), createOnError = false)
     }
 
     // getActiveOrCreate should create new context with fake
     // checkpoint file and createOnError = true
     testGetActiveOrCreate {
       ssc = StreamingContext.getActiveOrCreate(
-        corruptedCheckpointPath, creatingFunction _, createOnError = true)
+        corruptedCheckpointPath, () => creatingFunction(), createOnError = true)
       assert(ssc != null, "no context created")
       assert(newContextCreated, "new context not created")
     }
 
     // getActiveOrCreate should recover context with checkpoint path, and recover old configuration
     testGetActiveOrCreate {
-      ssc = StreamingContext.getActiveOrCreate(checkpointPath, creatingFunction _)
+      ssc = StreamingContext.getActiveOrCreate(checkpointPath, () => creatingFunction())
       assert(ssc != null, "no context created")
       assert(!newContextCreated, "old context not recovered")
       assert(ssc.conf.get("someKey") === "someValue")
@@ -781,14 +781,14 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
       _ssc.queueStream[Int](Queue(rdd)).register()
       _ssc
     }
-    ssc = StreamingContext.getOrCreate(checkpointDirectory, creatingFunction _)
+    ssc = StreamingContext.getOrCreate(checkpointDirectory, () => creatingFunction())
     ssc.start()
-    eventually(timeout(10000 millis)) {
+    eventually(timeout(10.seconds)) {
       assert(Checkpoint.getCheckpointFiles(checkpointDirectory).size > 1)
     }
     ssc.stop()
     val e = intercept[SparkException] {
-      ssc = StreamingContext.getOrCreate(checkpointDirectory, creatingFunction _)
+      ssc = StreamingContext.getOrCreate(checkpointDirectory, () => creatingFunction())
     }
     // StreamingContext.validate changes the message, so use "contains" here
     assert(e.getCause.getMessage.contains("queueStream doesn't support checkpointing. " +
@@ -855,7 +855,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     ssc.textFileStream(testDirectory).foreachRDD { rdd => rdd.count() }
     ssc.start()
     try {
-      eventually(timeout(30000 millis)) {
+      eventually(timeout(30.seconds)) {
         assert(Checkpoint.getCheckpointFiles(checkpointDirectory).size > 1)
       }
     } finally {
@@ -967,7 +967,7 @@ package object testPackage extends Assertions {
       }
       ssc.start()
 
-      eventually(timeout(10000 millis), interval(10 millis)) {
+      eventually(timeout(10.seconds), interval(10.milliseconds)) {
         assert(rddGenerated && rddCreationSiteCorrect, "RDD creation site was not correct")
         assert(rddGenerated && foreachCallSiteCorrect, "Call site in foreachRDD was not correct")
       }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
index 0f957a1b5570..62fd43302b9d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
@@ -130,7 +130,7 @@ class StreamingListenerSuite extends TestSuiteBase with Matchers {
 
     ssc.start()
     try {
-      eventually(timeout(30 seconds), interval(20 millis)) {
+      eventually(timeout(30.seconds), interval(20.milliseconds)) {
         collector.startedReceiverStreamIds.size should equal (1)
         collector.startedReceiverStreamIds.peek() should equal (0)
         collector.stoppedReceiverStreamIds.size should equal (1)
@@ -157,7 +157,7 @@ class StreamingListenerSuite extends TestSuiteBase with Matchers {
 
     ssc.start()
     try {
-      eventually(timeout(30 seconds), interval(20 millis)) {
+      eventually(timeout(30.seconds), interval(20.milliseconds)) {
         collector.startedOutputOperationIds.asScala.take(3) should be (Seq(0, 1, 2))
         collector.completedOutputOperationIds.asScala.take(3) should be (Seq(0, 1, 2))
       }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
index 29e451332088..483a7519873e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
@@ -97,12 +97,12 @@ class UISeleniumSuite
 
       val sparkUI = ssc.sparkContext.ui.get
 
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to (sparkUI.webUrl.stripSuffix("/"))
         find(cssSelector( """ul li a[href*="streaming"]""")) should not be (None)
       }
 
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         // check whether streaming page exists
         go to (sparkUI.webUrl.stripSuffix("/") + "/streaming")
         val h3Text = findAll(cssSelector("h3")).map(_.text).toSeq
@@ -196,12 +196,12 @@ class UISeleniumSuite
 
       ssc.stop(false)
 
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to (sparkUI.webUrl.stripSuffix("/"))
         find(cssSelector( """ul li a[href*="streaming"]""")) should be(None)
       }
 
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to (sparkUI.webUrl.stripSuffix("/") + "/streaming")
         val h3Text = findAll(cssSelector("h3")).map(_.text).toSeq
         h3Text should not contain("Streaming Statistics")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
index 7b839aeb0f21..4c0dd0d258c5 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
@@ -84,7 +84,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     }
     clock.advance(blockIntervalMs)  // advance clock to generate blocks
     withClue("blocks not generated or pushed") {
-      eventually(timeout(1 second)) {
+      eventually(timeout(1.second)) {
         assert(listener.onGenerateBlockCalled)
         assert(listener.onPushBlockCalled)
       }
@@ -100,7 +100,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     listener.addedData.asScala.toSeq should contain theSameElementsInOrderAs (data2)
     listener.addedMetadata.asScala.toSeq should contain theSameElementsInOrderAs (metadata2)
     clock.advance(blockIntervalMs)  // advance clock to generate blocks
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       val combined = data1 ++ data2
       listener.pushedData.asScala.toSeq should contain theSameElementsInOrderAs combined
     }
@@ -112,7 +112,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     val combinedMetadata = metadata2 :+ metadata3
     listener.addedMetadata.asScala.toSeq should contain theSameElementsInOrderAs (combinedMetadata)
     clock.advance(blockIntervalMs)  // advance clock to generate blocks
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       val combinedData = data1 ++ data2 ++ data3
       listener.pushedData.asScala.toSeq should contain theSameElementsInOrderAs (combinedData)
     }
@@ -120,7 +120,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     // Stop the block generator by starting the stop on a different thread and
     // then advancing the manual clock for the stopping to proceed.
     val thread = stopBlockGenerator(blockGenerator)
-    eventually(timeout(1 second), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       clock.advance(blockIntervalMs)
       assert(blockGenerator.isStopped())
     }
@@ -160,7 +160,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     // - Finally, wait for all blocks to be pushed
     clock.advance(1) // to make sure that the timer for another interval to complete
     val thread = stopBlockGenerator(blockGenerator)
-    eventually(timeout(1 second), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(blockGenerator.isActive() === false)
     }
     assert(blockGenerator.isStopped() === false)
@@ -181,7 +181,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     // (expected as stop() should never complete) or a SparkException (unexpected as stop()
     // completed and thread terminated).
     val exception = intercept[Exception] {
-      failAfter(200 milliseconds) {
+      failAfter(200.milliseconds) {
         thread.join()
         throw new SparkException(
           "BlockGenerator.stop() completed before generating timer was stopped")
@@ -193,7 +193,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     // Verify that the final data is present in the final generated block and
     // pushed before complete stop
     assert(blockGenerator.isStopped() === false) // generator has not stopped yet
-    eventually(timeout(10 seconds), interval(10 milliseconds)) {
+    eventually(timeout(10.seconds), interval(10.milliseconds)) {
       // Keep calling `advance` to avoid blocking forever in `clock.waitTillTime`
       clock.advance(blockIntervalMs)
       assert(thread.isAlive === false)
@@ -213,7 +213,7 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter with TimeLim
     blockGenerator.start()
     assert(listener.onErrorCalled === false)
     blockGenerator.addData(1)
-    eventually(timeout(1 second), interval(10 milliseconds)) {
+    eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(listener.onErrorCalled)
     }
     blockGenerator.stop()
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index 22d027d9b818..d072b99df19a 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -61,7 +61,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
         val expectedWaitTime = clock.getTimeMillis() + advancedTime
         clock.advance(advancedTime)
         // Make sure ExecutorAllocationManager.manageAllocation is called
-        eventually(timeout(10 seconds)) {
+        eventually(timeout(10.seconds)) {
           assert(clock.isStreamWaitingAt(expectedWaitTime))
         }
         body
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
index 5f7f7fa5e67f..f0e502727402 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.streaming.scheduler
 import java.util.concurrent.CountDownLatch
 
 import scala.concurrent.duration._
-import scala.language.postfixOps
 
 import org.scalatest.concurrent.Eventually._
 
@@ -69,10 +68,10 @@ class JobGeneratorSuite extends TestSuiteBase {
       val longBatchNumber = 3 // 3rd batch will take a long time
       val longBatchTime = longBatchNumber * batchDuration.milliseconds
 
-      val testTimeout = timeout(10 seconds)
+      val testTimeout = timeout(10.seconds)
       val inputStream = ssc.receiverStream(new TestReceiver)
 
-      inputStream.foreachRDD((rdd: RDD[Int], time: Time) => {
+      inputStream.foreachRDD((_: RDD[Int], time: Time) => {
         if (time.milliseconds == longBatchTime) {
           while (waitLatch.getCount() > 0) {
             waitLatch.await()
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
index c206d3169d77..fec20f0429ff 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
@@ -41,7 +41,7 @@ class ReceiverTrackerSuite extends TestSuiteBase {
       try {
         // we wait until the Receiver has registered with the tracker,
         // otherwise our rate update is lost
-        eventually(timeout(5 seconds)) {
+        eventually(timeout(5.seconds)) {
           assert(RateTestReceiver.getActive().nonEmpty)
         }
 
@@ -49,7 +49,7 @@ class ReceiverTrackerSuite extends TestSuiteBase {
         // Verify that the rate of the block generator in the receiver get updated
         val activeReceiver = RateTestReceiver.getActive().get
         tracker.sendRateUpdate(inputDStream.id, newRateLimit)
-        eventually(timeout(5 seconds)) {
+        eventually(timeout(5.seconds)) {
           assert(activeReceiver.getDefaultBlockGeneratorRateLimit() === newRateLimit,
             "default block generator did not receive rate update")
           assert(activeReceiver.getCustomBlockGeneratorRateLimit() === newRateLimit,
@@ -76,7 +76,7 @@ class ReceiverTrackerSuite extends TestSuiteBase {
       output.register()
       ssc.start()
       StoppableReceiver.shouldStop = true
-      eventually(timeout(10 seconds), interval(10 millis)) {
+      eventually(timeout(10.seconds), interval(10.milliseconds)) {
         // The receiver is stopped once, so if it's restarted, it should be started twice.
         assert(startTimes === 2)
       }
@@ -98,7 +98,7 @@ class ReceiverTrackerSuite extends TestSuiteBase {
       val output = new TestOutputStream(input)
       output.register()
       ssc.start()
-      eventually(timeout(10 seconds), interval(10 millis)) {
+      eventually(timeout(10.seconds), interval(10.milliseconds)) {
         // If preferredLocations is set correctly, receiverTaskLocality should be PROCESS_LOCAL
         assert(receiverTaskLocality === TaskLocality.PROCESS_LOCAL)
       }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
index dc9305cf20f1..8d2fa7d515e2 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
@@ -26,7 +26,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent._
 import scala.concurrent.duration._
-import scala.language.{implicitConversions, postfixOps}
+import scala.language.implicitConversions
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -135,7 +135,7 @@ abstract class CommonWriteAheadLogTests(
     if (waitForCompletion) {
       assert(getLogFilesInDirectory(testDir).size < logFiles.size)
     } else {
-      eventually(Eventually.timeout(1 second), interval(10 milliseconds)) {
+      eventually(Eventually.timeout(1.second), interval(10.milliseconds)) {
         assert(getLogFilesInDirectory(testDir).size < logFiles.size)
       }
     }
@@ -504,7 +504,7 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests(
     // The queue.take() immediately takes the 3, and there is nothing left in the queue at that
     // moment. Then the promise blocks the writing of 3. The rest get queued.
     writeAsync(batchedWal, event1, 3L)
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       assert(blockingWal.isBlocked)
       assert(batchedWal.invokePrivate(queueLength()) === 0)
     }
@@ -514,12 +514,12 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests(
     // we would like event 5 to be written before event 4 in order to test that they get
     // sorted before being aggregated
     writeAsync(batchedWal, event5, 12L)
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       assert(blockingWal.isBlocked)
       assert(batchedWal.invokePrivate(queueLength()) === 3)
     }
     writeAsync(batchedWal, event4, 10L)
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       assert(walBatchingThreadPool.getActiveCount === 5)
       assert(batchedWal.invokePrivate(queueLength()) === 4)
     }
@@ -528,7 +528,7 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests(
     val buffer = wrapArrayArrayByte(Array(event1))
     val queuedEvents = Set(event2, event3, event4, event5)
 
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       assert(batchedWal.invokePrivate(queueLength()) === 0)
       verify(wal, times(1)).write(meq(buffer), meq(3L))
       // the file name should be the timestamp of the last record, as events should be naturally
@@ -559,7 +559,7 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests(
     // The queue.take() immediately takes the 3, and there is nothing left in the queue at that
     // moment. Then the promise blocks the writing of 3. The rest get queued.
     val promise1 = writeAsync(batchedWal, event1, 3L)
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       assert(blockingWal.isBlocked)
       assert(batchedWal.invokePrivate(queueLength()) === 0)
     }
@@ -567,7 +567,7 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests(
     val promise2 = writeAsync(batchedWal, event2, 5L)
     val promise3 = writeAsync(batchedWal, event3, 8L)
 
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       assert(walBatchingThreadPool.getActiveCount === 3)
       assert(blockingWal.isBlocked)
       assert(batchedWal.invokePrivate(queueLength()) === 2) // event1 is being written
@@ -576,7 +576,7 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests(
     val writePromises = Seq(promise1, promise2, promise3)
 
     batchedWal.close()
-    eventually(timeout(1 second)) {
+    eventually(timeout(1.second)) {
       assert(writePromises.forall(_.isCompleted))
       assert(writePromises.forall(_.future.value.get.isFailure)) // all should have failed
     }
@@ -772,7 +772,7 @@ object WriteAheadLogSuite {
 
     override def write(record: ByteBuffer, time: Long): WriteAheadLogRecordHandle = {
       isWriteCalled = true
-      eventually(Eventually.timeout(2 second)) {
+      eventually(Eventually.timeout(2.second)) {
         assert(!blockWrite)
       }
       wal.write(record, time)

From 5d8aee588696b6ddd0a6b60545c952184ab0e2b1 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Thu, 11 Apr 2019 14:50:46 -0700
Subject: [PATCH 0893/1072] [SPARK-27445][SQL][TEST] Update SQLQueryTestSuite
 to process files ending with `.sql`

## What changes were proposed in this pull request?
While using vi or vim to edit the test files the .swp or .swo files are created and attempt to run the test suite in the presence of these files causes errors like below :
```
nfo] - subquery/exists-subquery/.exists-basic.sql.swp *** FAILED *** (117 milliseconds)
[info]   java.io.FileNotFoundException: /Users/dbiswal/mygit/apache/spark/sql/core/target/scala-2.12/test-classes/sql-tests/results/subquery/exists-subquery/.exists-basic.sql.swp.out (No such file or directory)
[info]   at java.io.FileInputStream.open0(Native Method)
[info]   at java.io.FileInputStream.open(FileInputStream.java:195)
[info]   at java.io.FileInputStream.<init>(FileInputStream.java:138)
[info]   at org.apache.spark.sql.catalyst.util.package$.fileToString(package.scala:49)
[info]   at org.apache.spark.sql.SQLQueryTestSuite.runQueries(SQLQueryTestSuite.scala:247)
[info]   at org.apache.spark.sql.SQLQueryTestSuite.$anonfun$runTest$11(SQLQueryTestSuite.scala:192)
```
~~This minor pr adds these temp files in the ignore list.~~
While computing the list of test files to process, only consider files with `.sql` extension. This makes sure the unwanted temp files created from various editors are ignored from processing.
## How was this patch tested?
Verified manually.

Closes #24333 from dilipbiswal/dkb_sqlquerytest.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/SQLQueryTestSuite.scala      | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index d45ceca16714..def99c820af8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -105,11 +105,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
   private val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath
   private val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath
 
+  private val validFileExtensions = ".sql"
+
   /** List of test cases to ignore, in lower cases. */
   private val blackList = Set(
-    "blacklist.sql",  // Do NOT remove this one. It is here to test the blacklist functionality.
-    ".DS_Store"       // A meta-file that may be created on Mac by Finder App.
-                      // We should ignore this file from processing.
+    "blacklist.sql"   // Do NOT remove this one. It is here to test the blacklist functionality.
   )
 
   // Create all the test cases.
@@ -329,7 +329,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
   /** Returns all the files (not directories) in a directory, recursively. */
   private def listFilesRecursively(path: File): Seq[File] = {
     val (dirs, files) = path.listFiles().partition(_.isDirectory)
-    files ++ dirs.flatMap(listFilesRecursively)
+    // Filter out test files with invalid extensions such as temp files created
+    // by vi (.swp), Mac (.DS_Store) etc.
+    val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
+    filteredFiles ++ dirs.flatMap(listFilesRecursively)
   }
 
   /** Load built-in test tables into the SparkSession. */

From 94adffa8b160c0f0317df9675d0a1534e5f804cd Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Thu, 11 Apr 2019 16:39:40 -0700
Subject: [PATCH 0894/1072] [SPARK-27270][SS] Add Kafka dynamic JAAS
 authentication debug possibility

## What changes were proposed in this pull request?

`Krb5LoginModule` supports debug parameter which is not yet supported from Spark side. This configuration makes it easier to debug authentication issues against Kafka.

In this PR `Krb5LoginModule` debug flag controlled by either `sun.security.krb5.debug` or `com.ibm.security.krb5.Krb5Debug`.

Additionally found some hardcoded values like `ssl.truststore.location`, etc... which could be error prone if Kafka changes it so in such cases Kafka define used.

## How was this patch tested?

Existing + additional unit tests + on cluster.

Closes #24204 from gaborgsomogyi/SPARK-27270.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/kafka010/KafkaTokenUtil.scala       | 24 ++++++++---
 .../spark/kafka010/KafkaTokenUtilSuite.scala  | 40 +++++++++++--------
 2 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index e5604f2b14cf..e0825e5d37cb 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.admin.{AdminClient, CreateDelegationTokenOptions}
-import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.config.{SaslConfigs, SslConfigs}
 import org.apache.kafka.common.security.JaasContext
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
 import org.apache.kafka.common.security.scram.ScramLoginModule
@@ -136,22 +136,22 @@ private[spark] object KafkaTokenUtil extends Logging {
 
   private def setTrustStoreProperties(sparkConf: SparkConf, properties: ju.Properties): Unit = {
     sparkConf.get(Kafka.TRUSTSTORE_LOCATION).foreach { truststoreLocation =>
-      properties.put("ssl.truststore.location", truststoreLocation)
+      properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, truststoreLocation)
     }
     sparkConf.get(Kafka.TRUSTSTORE_PASSWORD).foreach { truststorePassword =>
-      properties.put("ssl.truststore.password", truststorePassword)
+      properties.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, truststorePassword)
     }
   }
 
   private def setKeyStoreProperties(sparkConf: SparkConf, properties: ju.Properties): Unit = {
     sparkConf.get(Kafka.KEYSTORE_LOCATION).foreach { keystoreLocation =>
-      properties.put("ssl.keystore.location", keystoreLocation)
+      properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keystoreLocation)
     }
     sparkConf.get(Kafka.KEYSTORE_PASSWORD).foreach { keystorePassword =>
-      properties.put("ssl.keystore.password", keystorePassword)
+      properties.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, keystorePassword)
     }
     sparkConf.get(Kafka.KEY_PASSWORD).foreach { keyPassword =>
-      properties.put("ssl.key.password", keyPassword)
+      properties.put(SslConfigs.SSL_KEY_PASSWORD_CONFIG, keyPassword)
     }
   }
 
@@ -159,6 +159,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     val params =
       s"""
       |${getKrb5LoginModuleName} required
+      | debug=${isGlobalKrbDebugEnabled()}
       | useKeyTab=true
       | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}"
       | keyTab="${sparkConf.get(KEYTAB).get}"
@@ -175,6 +176,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     val params =
       s"""
       |${getKrb5LoginModuleName} required
+      | debug=${isGlobalKrbDebugEnabled()}
       | useTicketCache=true
       | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}";
       """.stripMargin.replace("\n", "")
@@ -194,6 +196,16 @@ private[spark] object KafkaTokenUtil extends Logging {
     }
   }
 
+  private def isGlobalKrbDebugEnabled(): Boolean = {
+    if (System.getProperty("java.vendor").contains("IBM")) {
+      val debug = System.getenv("com.ibm.security.krb5.Krb5Debug")
+      debug != null && debug.equalsIgnoreCase("all")
+    } else {
+      val debug = System.getenv("sun.security.krb5.debug")
+      debug != null && debug.equalsIgnoreCase("true")
+    }
+  }
+
   private def printToken(token: DelegationToken): Unit = {
     if (log.isDebugEnabled) {
       val dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm")
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
index 0a5af1d751bc..763f8dbec821 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
@@ -21,7 +21,7 @@ import java.security.PrivilegedExceptionAction
 
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
-import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.config.{SaslConfigs, SslConfigs}
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -83,11 +83,11 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
       === bootStrapServers)
     assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
       === SASL_PLAINTEXT.name)
-    assert(!adminClientProperties.containsKey("ssl.truststore.location"))
-    assert(!adminClientProperties.containsKey("ssl.truststore.password"))
-    assert(!adminClientProperties.containsKey("ssl.keystore.location"))
-    assert(!adminClientProperties.containsKey("ssl.keystore.password"))
-    assert(!adminClientProperties.containsKey("ssl.key.password"))
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG))
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG))
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG))
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG))
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEY_PASSWORD_CONFIG))
   }
 
   test("createAdminClientProperties with SASL_SSL protocol should include truststore config") {
@@ -105,11 +105,13 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
       === bootStrapServers)
     assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
       === SASL_SSL.name)
-    assert(adminClientProperties.get("ssl.truststore.location") === trustStoreLocation)
-    assert(adminClientProperties.get("ssl.truststore.password") === trustStorePassword)
-    assert(!adminClientProperties.containsKey("ssl.keystore.location"))
-    assert(!adminClientProperties.containsKey("ssl.keystore.password"))
-    assert(!adminClientProperties.containsKey("ssl.key.password"))
+    assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG)
+      === trustStoreLocation)
+    assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG)
+      === trustStorePassword)
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG))
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG))
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEY_PASSWORD_CONFIG))
   }
 
   test("createAdminClientProperties with SSL protocol should include keystore and truststore " +
@@ -128,11 +130,13 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
       === bootStrapServers)
     assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
       === SSL.name)
-    assert(adminClientProperties.get("ssl.truststore.location") === trustStoreLocation)
-    assert(adminClientProperties.get("ssl.truststore.password") === trustStorePassword)
-    assert(adminClientProperties.get("ssl.keystore.location") === keyStoreLocation)
-    assert(adminClientProperties.get("ssl.keystore.password") === keyStorePassword)
-    assert(adminClientProperties.get("ssl.key.password") === keyPassword)
+    assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG)
+      === trustStoreLocation)
+    assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG)
+      === trustStorePassword)
+    assert(adminClientProperties.get(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG) === keyStoreLocation)
+    assert(adminClientProperties.get(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG) === keyStorePassword)
+    assert(adminClientProperties.get(SslConfigs.SSL_KEY_PASSWORD_CONFIG) === keyPassword)
   }
 
   test("createAdminClientProperties with global config should not set dynamic jaas config") {
@@ -165,7 +169,10 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
     assert(adminClientProperties.containsKey(SaslConfigs.SASL_MECHANISM))
     val saslJaasConfig = adminClientProperties.getProperty(SaslConfigs.SASL_JAAS_CONFIG)
     assert(saslJaasConfig.contains("Krb5LoginModule required"))
+    assert(saslJaasConfig.contains(s"debug="))
     assert(saslJaasConfig.contains("useKeyTab=true"))
+    assert(saslJaasConfig.contains(s"""keyTab="$keytab""""))
+    assert(saslJaasConfig.contains(s"""principal="$principal""""))
   }
 
   test("createAdminClientProperties without keytab should set ticket cache dynamic jaas config") {
@@ -181,6 +188,7 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
     assert(adminClientProperties.containsKey(SaslConfigs.SASL_MECHANISM))
     val saslJaasConfig = adminClientProperties.getProperty(SaslConfigs.SASL_JAAS_CONFIG)
     assert(saslJaasConfig.contains("Krb5LoginModule required"))
+    assert(saslJaasConfig.contains(s"debug="))
     assert(saslJaasConfig.contains("useTicketCache=true"))
   }
 

From bbbe54aa79c0f6b66e3f3ac34515cc096beb5730 Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Fri, 12 Apr 2019 13:31:18 +0800
Subject: [PATCH 0895/1072] [SPARK-27199][SQL][FOLLOWUP] Fix bug in codegen
 templates in UnixTime and FromUnixTime

## What changes were proposed in this pull request?

SPARK-27199 introduced the use of `ZoneId` instead of `TimeZone` in a few date/time expressions.
There were 3 occurrences of `ctx.addReferenceObj("zoneId", zoneId)` in that PR, which had a bug because while the `java.time.ZoneId` base type is public, the actual concrete implementation classes are not public, so using the 2-arg version of `CodegenContext.addReferenceObj` would incorrectly generate code that reference non-public types (`java.time.ZoneRegion`, to be specific). The 3-arg version should be used, with the class name of the referenced object explicitly specified to the public base type.

One of such occurrences was caught in testing in the main PR of SPARK-27199 (https://github.com/apache/spark/pull/24141), for `DateFormatClass`. But the other 2 occurrences slipped through because there were no test cases that covered them.

Example of this bug in the current Apache Spark master, in a Spark Shell:
```
scala> Seq(("2016-04-08", "yyyy-MM-dd")).toDF("s", "f").repartition(1).selectExpr("to_unix_timestamp(s, f)").show
...
java.lang.IllegalAccessError: tried to access class java.time.ZoneRegion from class org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1
```

This PR fixes the codegen issues and adds the corresponding unit tests.

## How was this patch tested?

Enhanced tests in `DateExpressionsSuite` for `to_unix_timestamp` and `from_unixtime`.

Closes #24352 from rednaxelafx/fix-spark-27199.

Authored-by: Kris Mok <kris.mok@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/datetimeExpressions.scala     | 10 ++--
 .../expressions/DateExpressionsSuite.scala    | 48 ++++++++++++++-----
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index aad9f20e022b..9a6e6c73001a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -541,7 +541,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
-    val zid = ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId")
+    val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
     val locale = ctx.addReferenceObj("locale", Locale.US)
     defineCodeGen(ctx, ev, (timestamp, format) => {
       s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
@@ -710,13 +710,13 @@ abstract class UnixTime
             }""")
         }
       case StringType =>
-        val tz = ctx.addReferenceObj("zoneId", zoneId)
+        val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
         val locale = ctx.addReferenceObj("locale", Locale.US)
         val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
         nullSafeCodeGen(ctx, ev, (string, format) => {
           s"""
             try {
-              ${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $tz, $locale)
+              ${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
                 .parse($string.toString()) / $MICROS_PER_SECOND;
             } catch (java.lang.IllegalArgumentException e) {
               ${ev.isNull} = true;
@@ -849,13 +849,13 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           }""")
       }
     } else {
-      val tz = ctx.addReferenceObj("zoneId", zoneId)
+      val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
       val locale = ctx.addReferenceObj("locale", Locale.US)
       val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
       nullSafeCodeGen(ctx, ev, (seconds, f) => {
         s"""
         try {
-          ${ev.value} = UTF8String.fromString($tf.apply($f.toString(), $tz, $locale).
+          ${ev.value} = UTF8String.fromString($tf.apply($f.toString(), $zid, $locale).
             format($seconds * 1000000L));
         } catch (java.lang.IllegalArgumentException e) {
           ${ev.isNull} = true;
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 64bf89926b47..88607d1740b1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -26,13 +26,14 @@ import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -652,7 +653,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("from_unixtime") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+    val fmt1 = "yyyy-MM-dd HH:mm:ss"
+    val sdf1 = new SimpleDateFormat(fmt1, Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
     val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
     for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) {
@@ -661,10 +663,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       sdf2.setTimeZone(tz)
 
       checkEvaluation(
-        FromUnixTime(Literal(0L), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+        FromUnixTime(Literal(0L), Literal(fmt1), timeZoneId),
         sdf1.format(new Timestamp(0)))
       checkEvaluation(FromUnixTime(
-        Literal(1000L), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+        Literal(1000L), Literal(fmt1), timeZoneId),
         sdf1.format(new Timestamp(1000000)))
       checkEvaluation(
         FromUnixTime(Literal(-1000L), Literal(fmt2), timeZoneId),
@@ -673,13 +675,22 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         FromUnixTime(Literal.create(null, LongType), Literal.create(null, StringType), timeZoneId),
         null)
       checkEvaluation(
-        FromUnixTime(Literal.create(null, LongType), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+        FromUnixTime(Literal.create(null, LongType), Literal(fmt1), timeZoneId),
         null)
       checkEvaluation(
         FromUnixTime(Literal(1000L), Literal.create(null, StringType), timeZoneId),
         null)
       checkEvaluation(
         FromUnixTime(Literal(0L), Literal("not a valid format"), timeZoneId), null)
+
+        // The codegen path for non-literal input should also work
+        checkEvaluation(
+          expression = FromUnixTime(
+            BoundReference(ordinal = 0, dataType = LongType, nullable = true),
+            BoundReference(ordinal = 1, dataType = StringType, nullable = true),
+            timeZoneId),
+          expected = UTF8String.fromString(sdf1.format(new Timestamp(0))),
+          inputRow = InternalRow(0L, UTF8String.fromString(fmt1)))
     }
   }
 
@@ -739,7 +750,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("to_unix_timestamp") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+    val fmt1 = "yyyy-MM-dd HH:mm:ss"
+    val sdf1 = new SimpleDateFormat(fmt1, Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
     val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
     val fmt3 = "yy-MM-dd"
@@ -754,15 +766,15 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
         val date1 = Date.valueOf("2015-07-24")
         checkEvaluation(ToUnixTimestamp(
-          Literal(sdf1.format(new Timestamp(0))), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), 0L)
+          Literal(sdf1.format(new Timestamp(0))), Literal(fmt1), timeZoneId), 0L)
         checkEvaluation(ToUnixTimestamp(
-          Literal(sdf1.format(new Timestamp(1000000))), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+          Literal(sdf1.format(new Timestamp(1000000))), Literal(fmt1), timeZoneId),
           1000L)
         checkEvaluation(ToUnixTimestamp(
-          Literal(new Timestamp(1000000)), Literal("yyyy-MM-dd HH:mm:ss")),
+          Literal(new Timestamp(1000000)), Literal(fmt1)),
           1000L)
         checkEvaluation(
-          ToUnixTimestamp(Literal(date1), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+          ToUnixTimestamp(Literal(date1), Literal(fmt1), timeZoneId),
           MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
         checkEvaluation(
           ToUnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId),
@@ -772,21 +784,31 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(
             DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz)))
         val t1 = ToUnixTimestamp(
-          CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
+          CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long]
         val t2 = ToUnixTimestamp(
-          CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
+          CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long]
         assert(t2 - t1 <= 1)
         checkEvaluation(ToUnixTimestamp(
           Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), null)
         checkEvaluation(
           ToUnixTimestamp(
-            Literal.create(null, DateType), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+            Literal.create(null, DateType), Literal(fmt1), timeZoneId),
           null)
         checkEvaluation(ToUnixTimestamp(
           Literal(date1), Literal.create(null, StringType), timeZoneId),
           MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
         checkEvaluation(
           ToUnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null)
+
+        // The codegen path for non-literal input should also work
+        checkEvaluation(
+          expression = ToUnixTimestamp(
+            BoundReference(ordinal = 0, dataType = StringType, nullable = true),
+            BoundReference(ordinal = 1, dataType = StringType, nullable = true),
+            timeZoneId),
+          expected = 0L,
+          inputRow = InternalRow(
+            UTF8String.fromString(sdf1.format(new Timestamp(0))), UTF8String.fromString(fmt1)))
       }
     }
   }

From 4eb694c58f4210f0d87a28b153605ba3c960cc38 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 12 Apr 2019 20:30:42 +0800
Subject: [PATCH 0896/1072] [SPARK-27443][SQL] Support UDF input_file_name in
 file source V2

## What changes were proposed in this pull request?

Currently, if we select the UDF `input_file_name` as a column in file source V2, the results are empty.
We should support it in file source V2.

## How was this patch tested?

Unit test

Closes #24347 from gengliangwang/input_file_name.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/FilePartitionReader.scala   | 18 ++++++++++++++----
 .../v2/FilePartitionReaderFactory.scala        |  6 +++---
 .../spark/sql/FileBasedDataSourceSuite.scala   | 13 +++++++++++++
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index 7ecd51659143..7c7b468e1473 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import java.io.{FileNotFoundException, IOException}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.InputFileBlockHolder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.reader.PartitionReader
 
@@ -35,8 +36,7 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
       if (readers.hasNext) {
         if (ignoreMissingFiles || ignoreCorruptFiles) {
           try {
-            currentReader = readers.next()
-            logInfo(s"Reading file $currentReader")
+            currentReader = getNextReader()
           } catch {
             case e: FileNotFoundException if ignoreMissingFiles =>
               logWarning(s"Skipped missing file: $currentReader", e)
@@ -48,11 +48,11 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
               logWarning(
                 s"Skipped the rest of the content in the corrupted file: $currentReader", e)
               currentReader = null
+              InputFileBlockHolder.unset()
               return false
           }
         } else {
-          currentReader = readers.next()
-          logInfo(s"Reading file $currentReader")
+          currentReader = getNextReader()
         }
       } else {
         return false
@@ -84,5 +84,15 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
     if (currentReader != null) {
       currentReader.close()
     }
+    InputFileBlockHolder.unset()
+  }
+
+  private def getNextReader(): PartitionedFileReader[T] = {
+    val reader = readers.next()
+    logInfo(s"Reading file $reader")
+    // Sets InputFileBlockHolder for the file block's information
+    val file = reader.file
+    InputFileBlockHolder.set(file.filePath, file.start, file.length)
+    reader
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
index d053ea98f8b6..5a19412c9033 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
@@ -27,7 +27,7 @@ abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
     assert(partition.isInstanceOf[FilePartition])
     val filePartition = partition.asInstanceOf[FilePartition]
     val iter = filePartition.files.toIterator.map { file =>
-      new PartitionedFileReader(file, buildReader(file))
+      PartitionedFileReader(file, buildReader(file))
     }
     new FilePartitionReader[InternalRow](iter)
   }
@@ -36,7 +36,7 @@ abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
     assert(partition.isInstanceOf[FilePartition])
     val filePartition = partition.asInstanceOf[FilePartition]
     val iter = filePartition.files.toIterator.map { file =>
-      new PartitionedFileReader(file, buildColumnarReader(file))
+      PartitionedFileReader(file, buildColumnarReader(file))
     }
     new FilePartitionReader[ColumnarBatch](iter)
   }
@@ -49,7 +49,7 @@ abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
 }
 
 // A compound class for combining file and its corresponding reader.
-private[v2] class PartitionedFileReader[T](
+private[v2] case class PartitionedFileReader[T](
     file: PartitionedFile,
     reader: PartitionReader[T]) extends PartitionReader[T] {
   override def next(): Boolean = reader.next()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 506156d0631c..8fcffbfee54c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -526,6 +526,19 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
     }
   }
 
+  test("UDF input_file_name()") {
+    Seq("", "orc").foreach { useV1SourceReaderList =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> useV1SourceReaderList) {
+        withTempPath { dir =>
+          val path = dir.getCanonicalPath
+          spark.range(10).write.orc(path)
+          val row = spark.read.orc(path).select(input_file_name).first()
+          assert(row.getString(0).contains(path))
+        }
+      }
+    }
+  }
+
   test("Return correct results when data columns overlap with partition columns") {
     Seq("parquet", "orc", "json").foreach { format =>
       withTempPath { path =>

From 0407070945fc090c84dde7a84e1b24a1acb131d4 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 12 Apr 2019 20:57:34 +0800
Subject: [PATCH 0897/1072] [SPARK-27444][SQL] multi-select can be used in
 subquery

## What changes were proposed in this pull request?

This is a regression caused by https://github.com/apache/spark/pull/24150

`select * from (from a select * select *)` is supported in 2.4, and we should keep supporting it.

This PR merges the parser rule for single and multi select statements, as they are very similar.

## How was this patch tested?

a new test case

Closes #24348 from cloud-fan/parser.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      | 15 ++---
 .../sql/catalyst/parser/AstBuilder.scala      | 66 ++++++++-----------
 .../sql/catalyst/parser/PlanParserSuite.scala | 10 ++-
 3 files changed, 40 insertions(+), 51 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d261b5637bd3..7ed7152692cf 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -81,8 +81,7 @@ singleTableSchema
 
 statement
     : query                                                            #statementDefault
-    | insertStatement                                                  #insertStatementDefault
-    | multiSelectStatement                                             #multiSelectStatementDefault
+    | ctes? dmlStatementNoWith                                         #dmlStatement
     | USE db=identifier                                                #use
     | CREATE database (IF NOT EXISTS)? identifier
         (COMMENT comment=STRING)? locationSpec?
@@ -356,14 +355,14 @@ resource
     : identifier STRING
     ;
 
-insertStatement
-    : (ctes)? insertInto queryTerm queryOrganization                                       #singleInsertQuery
-    | (ctes)? fromClause multiInsertQueryBody+                                             #multiInsertQuery
+dmlStatementNoWith
+    : insertInto queryTerm queryOrganization                                       #singleInsertQuery
+    | fromClause multiInsertQueryBody+                                             #multiInsertQuery
     ;
 
 queryNoWith
     : queryTerm queryOrganization                                               #noWithQuery
-    | fromClause selectStatement                                                #queryWithFrom
+    | fromClause selectStatement+                                               #queryWithFrom
     ;
 
 queryOrganization
@@ -379,10 +378,6 @@ multiInsertQueryBody
     : insertInto selectStatement
     ;
 
-multiSelectStatement
-    : (ctes)? fromClause selectStatement+                                                #multiSelect
-    ;
-
 selectStatement
     : querySpecification queryOrganization
     ;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 6bb991c24175..68cd1bd2f0a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -117,6 +117,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     query.optionalMap(ctx.ctes)(withCTE)
   }
 
+  override def visitDmlStatement(ctx: DmlStatementContext): AnyRef = withOrigin(ctx) {
+    val dmlStmt = plan(ctx.dmlStatementNoWith)
+    // Apply CTEs
+    dmlStmt.optionalMap(ctx.ctes)(withCTE)
+  }
+
   private def withCTE(ctx: CtesContext, plan: LogicalPlan): LogicalPlan = {
     val ctes = ctx.namedQuery.asScala.map { nCtx =>
       val namedQuery = visitNamedQuery(nCtx)
@@ -129,11 +135,21 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
 
   override def visitQueryWithFrom(ctx: QueryWithFromContext): LogicalPlan = withOrigin(ctx) {
     val from = visitFromClause(ctx.fromClause)
-    validate(ctx.selectStatement.querySpecification.fromClause == null,
-      "Individual select statement can not have FROM cause as its already specified in the" +
-        " outer query block", ctx)
-    withQuerySpecification(ctx.selectStatement.querySpecification, from).
-      optionalMap(ctx.selectStatement.queryOrganization)(withQueryResultClauses)
+    val selects = ctx.selectStatement.asScala.map { select =>
+      validate(select.querySpecification.fromClause == null,
+        "This select statement can not have FROM cause as its already specified upfront",
+        select)
+
+      withQuerySpecification(select.querySpecification, from).
+        // Add organization statements.
+        optionalMap(select.queryOrganization)(withQueryResultClauses)
+    }
+    // If there are multiple SELECT just UNION them together into one query.
+    if (selects.length == 1) {
+      selects.head
+    } else {
+      Union(selects)
+    }
   }
 
   override def visitNoWithQuery(ctx: NoWithQueryContext): LogicalPlan = withOrigin(ctx) {
@@ -182,36 +198,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
 
     // If there are multiple INSERTS just UNION them together into one query.
-    val insertPlan = inserts match {
-      case Seq(query) => query
-      case queries => Union(queries)
-    }
-    // Apply CTEs
-    insertPlan.optionalMap(ctx.ctes)(withCTE)
-  }
-
-  override def visitMultiSelect(ctx: MultiSelectContext): LogicalPlan = withOrigin(ctx) {
-    val from = visitFromClause(ctx.fromClause)
-
-    // Build the insert clauses.
-    val selects = ctx.selectStatement.asScala.map {
-      body =>
-        validate(body.querySpecification.fromClause == null,
-          "Multi-select queries cannot have a FROM clause in their individual SELECT statements",
-          body)
-
-        withQuerySpecification(body.querySpecification, from).
-          // Add organization statements.
-          optionalMap(body.queryOrganization)(withQueryResultClauses)
-    }
-
-    // If there are multiple INSERTS just UNION them together into one query.
-    val selectUnionPlan = selects match {
-      case Seq(query) => query
-      case queries => Union(queries)
+    if (inserts.length == 1) {
+      inserts.head
+    } else {
+      Union(inserts)
     }
-    // Apply CTEs
-    selectUnionPlan.optionalMap(ctx.ctes)(withCTE)
   }
 
   /**
@@ -219,10 +210,9 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitSingleInsertQuery(
       ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) {
-    val insertPlan = withInsertInto(ctx.insertInto(),
-        plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses))
-    // Apply CTEs
-    insertPlan.optionalMap(ctx.ctes)(withCTE)
+    withInsertInto(
+      ctx.insertInto(),
+      plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses))
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 9208d9358016..00836d352177 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -132,15 +132,19 @@ class PlanParserSuite extends AnalysisTest {
       table("a").select(star()).union(table("a").where('s < 10).select(star())))
     intercept(
       "from a select * select * from x where a.s < 10",
-      "Multi-select queries cannot have a FROM clause in their individual SELECT statements")
+      "This select statement can not have FROM cause as its already specified upfront")
     intercept(
       "from a select * from b",
-      "Individual select statement can not have FROM cause as its already specified in " +
-        "the outer query block")
+      "This select statement can not have FROM cause as its already specified upfront")
     assertEqual(
       "from a insert into tbl1 select * insert into tbl2 select * where s < 10",
       table("a").select(star()).insertInto("tbl1").union(
         table("a").where('s < 10).select(star()).insertInto("tbl2")))
+    assertEqual(
+      "select * from (from a select * select *)",
+      table("a").select(star())
+        .union(table("a").select(star()))
+        .as("__auto_generated_subquery_name").select(star()))
   }
 
   test("query organization") {

From 9ed60c2c33737d4017ab8fb2628c40f8b14f3c5c Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Fri, 12 Apr 2019 09:31:12 -0500
Subject: [PATCH 0898/1072] [MINOR][TEST][ML] Speed up some tests of ML
 regression by loosening tolerance

## What changes were proposed in this pull request?

Loosen some tolerances in the ML regression-related tests, as they seem to account for some of the top slow tests in https://spark-tests.appspot.com/slow-tests

These changes are good for about a 25 second speedup on my laptop.

## How was this patch tested?

Existing tests

Closes #24351 from srowen/SpeedReg.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../LogisticRegressionSuite.scala             | 21 ++++++++++++-------
 .../GeneralizedLinearRegressionSuite.scala    | 19 ++++++++---------
 .../ml/regression/LinearRegressionSuite.scala |  2 ++
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 9af7fff2a6e3..334f92bdbd9b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -1140,8 +1140,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("binary logistic regression with intercept with ElasticNet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(true).setMaxIter(120)
       .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(true).setWeightCol("weight")
+      .setTol(1e-5)
     val trainer2 = (new LogisticRegression).setFitIntercept(true).setMaxIter(60)
       .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(false).setWeightCol("weight")
+      .setTol(1e-5)
 
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
@@ -1489,12 +1491,14 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       .setFitIntercept(true)
       .setStandardization(true)
       .setWeightCol("weight")
+      .setTol(1e-5)
     val trainer2 = new LogisticRegression()
       .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
       .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
       .setFitIntercept(true)
       .setStandardization(false)
       .setWeightCol("weight")
+      .setTol(1e-5)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -1690,10 +1694,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     // use tighter constraints because OWL-QN solver takes longer to converge
     val trainer1 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
-      .setMaxIter(160).setTol(1e-10).setWeightCol("weight")
+      .setMaxIter(160).setTol(1e-5).setWeightCol("weight")
     val trainer2 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
-      .setMaxIter(110).setTol(1e-10).setWeightCol("weight")
+      .setMaxIter(110).setTol(1e-5).setWeightCol("weight")
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -1791,8 +1795,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("multinomial logistic regression without intercept with L1 regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true).setWeightCol("weight")
+      .setTol(1e-5)
     val trainer2 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false).setWeightCol("weight")
+      .setTol(1e-5)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -2156,10 +2162,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("multinomial logistic regression with intercept with elasticnet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
-      .setMaxIter(220).setTol(1e-10)
+      .setMaxIter(180).setTol(1e-5)
     val trainer2 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
-      .setMaxIter(220).setTol(1e-10)
+      .setMaxIter(150).setTol(1e-5)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -2255,10 +2261,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("multinomial logistic regression without intercept with elasticnet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
-      .setMaxIter(75).setTol(1e-10)
+      .setTol(1e-5)
     val trainer2 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
-      .setMaxIter(50).setTol(1e-10)
+      .setTol(1e-5)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -2672,6 +2678,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val trainer1 = new LogisticRegression()
       .setRegParam(0.1)
       .setElasticNetParam(1.0)
+      .setMaxIter(20)
 
     // compressed row major is optimal
     val model1 = trainer1.fit(multinomialDataset.limit(100))
@@ -2687,7 +2694,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
     // coefficients are dense without L1 regularization
     val trainer2 = new LogisticRegression()
-      .setElasticNetParam(0.0)
+      .setElasticNetParam(0.0).setMaxIter(1)
     val model3 = trainer2.fit(multinomialDataset.limit(100))
     assert(model3.coefficientMatrix.isInstanceOf[DenseMatrix])
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index fc1284e770c0..a30c47293c54 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
 
 import scala.util.Random
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.SparkConf
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.feature.{LabeledPoint, RFormula}
@@ -28,7 +28,6 @@ import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.random._
-import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
@@ -269,7 +268,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       ("inverse", datasetGaussianInverse))) {
       for (fitIntercept <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily("gaussian").setLink(link)
-          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
         assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " +
@@ -328,7 +327,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
     for (fitIntercept <- Seq(false, true);
          regParam <- Seq(0.0, 0.1, 1.0)) {
       val trainer = new GeneralizedLinearRegression().setFamily("gaussian")
-        .setFitIntercept(fitIntercept).setRegParam(regParam)
+        .setFitIntercept(fitIntercept).setRegParam(regParam).setTol(1e-3)
       val model = trainer.fit(datasetGaussianIdentity)
       val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
       assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " +
@@ -384,7 +383,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       ("cloglog", datasetBinomial))) {
       for (fitIntercept <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily("binomial").setLink(link)
-          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1),
           model.coefficients(2), model.coefficients(3))
@@ -457,7 +456,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       ("sqrt", datasetPoissonSqrt))) {
       for (fitIntercept <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
-          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
         assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
@@ -515,7 +514,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
     val dataset = datasetPoissonLogWithZero
     for (fitIntercept <- Seq(false, true)) {
       val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
-        .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+        .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
       val model = trainer.fit(dataset)
       val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
       assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
@@ -573,7 +572,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       ("identity", datasetGammaIdentity), ("log", datasetGammaLog))) {
       for (fitIntercept <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily("Gamma").setLink(link)
-          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
         assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gamma family, " +
@@ -659,7 +658,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
          variancePower <- Seq(1.6, 2.5)) {
       val trainer = new GeneralizedLinearRegression().setFamily("tweedie")
         .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
-        .setVariancePower(variancePower).setLinkPower(linkPower)
+        .setVariancePower(variancePower).setLinkPower(linkPower).setTol(1e-4)
       val model = trainer.fit(datasetTweedie)
       val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
       assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " +
@@ -736,7 +735,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       for (variancePower <- Seq(0.0, 1.0, 2.0, 1.5)) {
         val trainer = new GeneralizedLinearRegression().setFamily("tweedie")
           .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
-          .setVariancePower(variancePower)
+          .setVariancePower(variancePower).setTol(1e-3)
         val model = trainer.fit(datasetTweedie)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
         assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " +
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index c4db3365a677..d3df0e5b4448 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -892,6 +892,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
         .setRegParam(regParam)
         .setElasticNetParam(elasticNetParam)
         .setSolver(solver)
+        .setMaxIter(1)
       MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
         datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals)
       MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](
@@ -908,6 +909,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
         .setFitIntercept(fitIntercept)
         .setStandardization(standardization)
         .setRegParam(regParam)
+        .setMaxIter(1)
       MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
         datasetWithOutlier.as[LabeledPoint], estimator, modelEquals)
       MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](

From 0881f648cf3746fed6505dc2c9006587ee1a17c3 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 12 Apr 2019 19:21:43 -0700
Subject: [PATCH 0899/1072] [SPARK-27451][BUILD] Upgrade lz4-java to 1.5.1

## What changes were proposed in this pull request?

This PR upgrades `lz4-java` to 1.5.1 in order to get a patch for avoiding racing with GC.
- https://github.com/lz4/lz4-java/blob/master/CHANGES.md#151

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #24363 from dongjoon-hyun/SPARK-LZ4.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.2 | 2 +-
 pom.xml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index de0cb1492150..58eb8d035dd4 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -141,7 +141,7 @@ libfb303-0.9.3.jar
 libthrift-0.12.0.jar
 log4j-1.2.17.jar
 logging-interceptor-3.12.0.jar
-lz4-java-1.5.0.jar
+lz4-java-1.5.1.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
 mesos-1.4.0-shaded-protobuf.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 04526f2ab918..97085d6bb19b 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -158,7 +158,7 @@ libfb303-0.9.3.jar
 libthrift-0.12.0.jar
 log4j-1.2.17.jar
 logging-interceptor-3.12.0.jar
-lz4-java-1.5.0.jar
+lz4-java-1.5.1.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
 mesos-1.4.0-shaded-protobuf.jar
diff --git a/pom.xml b/pom.xml
index da8e18ebce48..0e1c67ffe655 100644
--- a/pom.xml
+++ b/pom.xml
@@ -574,7 +574,7 @@
       <dependency>
         <groupId>org.lz4</groupId>
         <artifactId>lz4-java</artifactId>
-        <version>1.5.0</version>
+        <version>1.5.1</version>
       </dependency>
       <dependency>
         <groupId>com.github.luben</groupId>

From 38fc8e2484aa4971d1f2c115da61fc96f36e7868 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 13 Apr 2019 22:27:25 +0900
Subject: [PATCH 0900/1072] [MINOR][DOCS] Fix some broken links in docs

## What changes were proposed in this pull request?

Fix some broken links in docs

## How was this patch tested?

N/A

Closes #24361 from srowen/BrokenLinks.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/hardware-provisioning.md |  2 +-
 docs/ml-advanced.md           |  2 +-
 docs/mllib-clustering.md      |  2 +-
 docs/rdd-programming-guide.md | 10 +++++-----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/hardware-provisioning.md b/docs/hardware-provisioning.md
index dab65e29b195..4e5d681962c8 100644
--- a/docs/hardware-provisioning.md
+++ b/docs/hardware-provisioning.md
@@ -45,7 +45,7 @@ nodes than the storage system to avoid interference.
 While Spark can perform a lot of its computation in memory, it still uses local disks to store
 data that doesn't fit in RAM, as well as to preserve intermediate output between stages. We
 recommend having **4-8 disks** per node, configured _without_ RAID (just as separate mount points).
-In Linux, mount the disks with the [`noatime` option](http://www.centos.org/docs/5/html/Global_File_System/s2-manage-mountnoatime.html)
+In Linux, mount the disks with the `noatime` option
 to reduce unnecessary writes. In Spark, [configure](configuration.html) the `spark.local.dir`
 variable to be a comma-separated list of the local disks. If you are running HDFS, it's fine to
 use the same disks as HDFS.
diff --git a/docs/ml-advanced.md b/docs/ml-advanced.md
index 8e8c701da435..5787fe914ce7 100644
--- a/docs/ml-advanced.md
+++ b/docs/ml-advanced.md
@@ -52,7 +52,7 @@ explicitly in Newton's method. As a result, L-BFGS often achieves faster converg
 other first-order optimizations.
 
 [Orthant-Wise Limited-memory
-Quasi-Newton](http://research-srv.microsoft.com/en-us/um/people/jfgao/paper/icml07scalable.pdf)
+Quasi-Newton](https://www.microsoft.com/en-us/research/wp-content/uploads/2007/01/andrew07scalable.pdf)
 (OWL-QN) is an extension of L-BFGS that can effectively handle L1 and elastic net regularization.
 
 L-BFGS is used as a solver for [LinearRegression](api/scala/index.html#org.apache.spark.ml.regression.LinearRegression),
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 18bab115a2da..12c33a5e3804 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -237,7 +237,7 @@ LDA supports different inference algorithms via `setOptimizer` function.
 on the likelihood function and yields comprehensive results, while
 `OnlineLDAOptimizer` uses iterative mini-batch sampling for [online
 variational
-inference](https://www.cs.princeton.edu/~blei/papers/HoffmanBleiBach2010b.pdf)
+inference](https://mimno.infosci.cornell.edu/info6150/readings/HoffmanBleiBach2010b.pdf)
 and is generally memory friendly.
 
 LDA takes in a collection of documents as vectors of word counts and the
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index b568e94c6710..c93774063e3b 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -345,7 +345,7 @@ One important parameter for parallel collections is the number of *partitions* t
 
 <div data-lang="scala"  markdown="1">
 
-Spark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
+Spark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
 
 Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes a URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
 
@@ -368,7 +368,7 @@ Apart from text files, Spark's Scala API also supports several other data format
 
 * `SparkContext.wholeTextFiles` lets you read a directory containing multiple small text files, and returns each of them as (filename, content) pairs. This is in contrast with `textFile`, which would return one record per line in each file. Partitioning is determined by data locality which, in some cases, may result in too few partitions. For those cases, `wholeTextFiles` provides an optional second argument for controlling the minimal number of partitions.
 
-* For [SequenceFiles](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), use SparkContext's `sequenceFile[K, V]` method where `K` and `V` are the types of key and values in the file. These should be subclasses of Hadoop's [Writable](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html) interface, like [IntWritable](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/IntWritable.html) and [Text](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Text.html). In addition, Spark allows you to specify native types for a few common Writables; for example, `sequenceFile[Int, String]` will automatically read IntWritables and Texts.
+* For [SequenceFiles](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), use SparkContext's `sequenceFile[K, V]` method where `K` and `V` are the types of key and values in the file. These should be subclasses of Hadoop's [Writable](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/Writable.html) interface, like [IntWritable](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/IntWritable.html) and [Text](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/Text.html). In addition, Spark allows you to specify native types for a few common Writables; for example, `sequenceFile[Int, String]` will automatically read IntWritables and Texts.
 
 * For other Hadoop InputFormats, you can use the `SparkContext.hadoopRDD` method, which takes an arbitrary `JobConf` and input format class, key class and value class. Set these the same way you would for a Hadoop job with your input source. You can also use `SparkContext.newAPIHadoopRDD` for InputFormats based on the "new" MapReduce API (`org.apache.hadoop.mapreduce`).
 
@@ -378,7 +378,7 @@ Apart from text files, Spark's Scala API also supports several other data format
 
 <div data-lang="java"  markdown="1">
 
-Spark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
+Spark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
 
 Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes a URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
 
@@ -400,7 +400,7 @@ Apart from text files, Spark's Java API also supports several other data formats
 
 * `JavaSparkContext.wholeTextFiles` lets you read a directory containing multiple small text files, and returns each of them as (filename, content) pairs. This is in contrast with `textFile`, which would return one record per line in each file.
 
-* For [SequenceFiles](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), use SparkContext's `sequenceFile[K, V]` method where `K` and `V` are the types of key and values in the file. These should be subclasses of Hadoop's [Writable](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Writable.html) interface, like [IntWritable](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/IntWritable.html) and [Text](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/Text.html).
+* For [SequenceFiles](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), use SparkContext's `sequenceFile[K, V]` method where `K` and `V` are the types of key and values in the file. These should be subclasses of Hadoop's [Writable](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/Writable.html) interface, like [IntWritable](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/IntWritable.html) and [Text](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/Text.html).
 
 * For other Hadoop InputFormats, you can use the `JavaSparkContext.hadoopRDD` method, which takes an arbitrary `JobConf` and input format class, key class and value class. Set these the same way you would for a Hadoop job with your input source. You can also use `JavaSparkContext.newAPIHadoopRDD` for InputFormats based on the "new" MapReduce API (`org.apache.hadoop.mapreduce`).
 
@@ -410,7 +410,7 @@ Apart from text files, Spark's Java API also supports several other data formats
 
 <div data-lang="python"  markdown="1">
 
-PySpark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
+PySpark can create distributed datasets from any storage source supported by Hadoop, including your local file system, HDFS, Cassandra, HBase, [Amazon S3](http://wiki.apache.org/hadoop/AmazonS3), etc. Spark supports text files, [SequenceFiles](https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/SequenceFileInputFormat.html), and any other Hadoop [InputFormat](http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/InputFormat.html).
 
 Text file RDDs can be created using `SparkContext`'s `textFile` method. This method takes a URI for the file (either a local path on the machine, or a `hdfs://`, `s3a://`, etc URI) and reads it as a collection of lines. Here is an example invocation:
 

From 67bd124f4ff6b54cf29415aad209cbe199f7bcf5 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Sat, 13 Apr 2019 17:03:23 -0500
Subject: [PATCH 0901/1072] [MINOR][TEST] Speed up slow tests in
 QuantileDiscretizerSuite

## What changes were proposed in this pull request?

This should reduce the total runtime of these tests from about 2 minutes to about 25 seconds.

## How was this patch tested?

Existing tests

Closes #24360 from srowen/SpeedQDS.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../ml/feature/QuantileDiscretizerSuite.scala | 67 +++++++++++--------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
index 82af05039653..ae086d32d6d0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
@@ -29,25 +29,30 @@ class QuantileDiscretizerSuite extends MLTest with DefaultReadWriteTest {
     val spark = this.spark
     import spark.implicits._
 
-    val datasetSize = 100000
+    val datasetSize = 30000
     val numBuckets = 5
-    val df = sc.parallelize(1 to datasetSize).map(_.toDouble).map(Tuple1.apply).toDF("input")
+    val df = sc.parallelize(1 to datasetSize).map(_.toDouble).toDF("input")
     val discretizer = new QuantileDiscretizer()
       .setInputCol("input")
       .setOutputCol("result")
       .setNumBuckets(numBuckets)
     val model = discretizer.fit(df)
 
-    testTransformerByGlobalCheckFunc[(Double)](df, model, "result") { rows =>
-      val result = rows.map { r => Tuple1(r.getDouble(0)) }.toDF("result")
-      val observedNumBuckets = result.select("result").distinct.count
-      assert(observedNumBuckets === numBuckets,
-        "Observed number of buckets does not equal expected number of buckets.")
-      val relativeError = discretizer.getRelativeError
-      val numGoodBuckets = result.groupBy("result").count
-        .filter(s"abs(count - ${datasetSize / numBuckets}) <= ${relativeError * datasetSize}").count
-      assert(numGoodBuckets === numBuckets,
-        "Bucket sizes are not within expected relative error tolerance.")
+    testTransformerByGlobalCheckFunc[Double](df, model, "result") { rows =>
+      val result = rows.map(_.getDouble(0)).toDF("result").cache()
+      try {
+        val observedNumBuckets = result.select("result").distinct().count()
+        assert(observedNumBuckets === numBuckets,
+          "Observed number of buckets does not equal expected number of buckets.")
+        val relativeError = discretizer.getRelativeError
+        val numGoodBuckets = result.groupBy("result").count()
+          .filter(s"abs(count - ${datasetSize / numBuckets}) <= ${relativeError * datasetSize}")
+          .count()
+        assert(numGoodBuckets === numBuckets,
+          "Bucket sizes are not within expected relative error tolerance.")
+      } finally {
+        result.unpersist()
+      }
     }
   }
 
@@ -162,10 +167,10 @@ class QuantileDiscretizerSuite extends MLTest with DefaultReadWriteTest {
     val spark = this.spark
     import spark.implicits._
 
-    val datasetSize = 100000
+    val datasetSize = 30000
     val numBuckets = 5
-    val data1 = Array.range(1, 100001, 1).map(_.toDouble)
-    val data2 = Array.range(1, 200000, 2).map(_.toDouble)
+    val data1 = Array.range(1, datasetSize + 1, 1).map(_.toDouble)
+    val data2 = Array.range(1, 2 * datasetSize, 2).map(_.toDouble)
     val df = data1.zip(data2).toSeq.toDF("input1", "input2")
 
     val discretizer = new QuantileDiscretizer()
@@ -175,20 +180,24 @@ class QuantileDiscretizerSuite extends MLTest with DefaultReadWriteTest {
     val model = discretizer.fit(df)
     testTransformerByGlobalCheckFunc[(Double, Double)](df, model, "result1", "result2") { rows =>
       val result =
-        rows.map { r => Tuple2(r.getDouble(0), r.getDouble(1)) }.toDF("result1", "result2")
-      val relativeError = discretizer.getRelativeError
-      for (i <- 1 to 2) {
-        val observedNumBuckets = result.select("result" + i).distinct.count
-        assert(observedNumBuckets === numBuckets,
-          "Observed number of buckets does not equal expected number of buckets.")
-
-        val numGoodBuckets = result
-          .groupBy("result" + i)
-          .count
-          .filter(s"abs(count - ${datasetSize / numBuckets}) <= ${relativeError * datasetSize}")
-          .count
-        assert(numGoodBuckets === numBuckets,
-          "Bucket sizes are not within expected relative error tolerance.")
+        rows.map(r => (r.getDouble(0), r.getDouble(1))).toDF("result1", "result2").cache()
+      try {
+        val relativeError = discretizer.getRelativeError
+        for (i <- 1 to 2) {
+          val observedNumBuckets = result.select("result" + i).distinct().count()
+          assert(observedNumBuckets === numBuckets,
+            "Observed number of buckets does not equal expected number of buckets.")
+
+          val numGoodBuckets = result
+            .groupBy("result" + i)
+            .count()
+            .filter(s"abs(count - ${datasetSize / numBuckets}) <= ${relativeError * datasetSize}")
+            .count()
+          assert(numGoodBuckets === numBuckets,
+            "Bucket sizes are not within expected relative error tolerance.")
+        }
+      } finally {
+        result.unpersist()
       }
     }
   }

From 4704af4c2639017278f2816b6db2fc38a9676b6b Mon Sep 17 00:00:00 2001
From: herman <herman@databricks.com>
Date: Sun, 14 Apr 2019 15:54:20 +0800
Subject: [PATCH 0902/1072] [SPARK-27449] Move
 WholeStageCodegen.limitNotReachedCond class checks into separate methods.

## What changes were proposed in this pull request?
This PR moves the checks done in `WholeStageCodegen.limitNotReachedCond` into a separate protected method. This makes it easier to introduce new leaf or blocking nodes.

## How was this patch tested?
Existing tests.

Closes #24358 from hvanhovell/SPARK-27449.

Authored-by: herman <herman@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/WholeStageCodegenExec.scala     | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 5e28cfb84eed..027885b1077d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -381,14 +381,17 @@ trait CodegenSupport extends SparkPlan {
    */
   def limitNotReachedChecks: Seq[String] = parent.limitNotReachedChecks
 
+  /**
+   * Check if the node is supposed to produce limit not reached checks.
+   */
+  protected def canCheckLimitNotReached: Boolean = children.isEmpty
+
   /**
    * A helper method to generate the data producing loop condition according to the
    * limit-not-reached checks.
    */
   final def limitNotReachedCond: String = {
-    // InputAdapter is also a leaf node.
-    val isLeafNode = children.isEmpty || this.isInstanceOf[InputAdapter]
-    if (!isLeafNode && !this.isInstanceOf[BlockingOperatorWithCodegen]) {
+    if (!canCheckLimitNotReached) {
       val errMsg = "Only leaf nodes and blocking nodes need to call 'limitNotReachedCond' " +
         "in its data producing loop."
       if (Utils.isTesting) {
@@ -426,6 +429,9 @@ trait BlockingOperatorWithCodegen extends CodegenSupport {
   // that upstream operators will not generate useless conditions (which are always evaluated to
   // false) for the Limit operators after this blocking operator.
   override def limitNotReachedChecks: Seq[String] = Nil
+
+  // This is a blocking node so the node can produce these checks
+  override protected def canCheckLimitNotReached: Boolean = true
 }
 
 /**
@@ -500,6 +506,9 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with InputRDDCod
 
   override def inputRDD: RDD[InternalRow] = child.execute()
 
+  // This is a leaf node so the node can produce limit not reached checks.
+  override protected def canCheckLimitNotReached: Boolean = true
+
   // InputAdapter does not need UnsafeProjection.
   protected val createUnsafeProjection: Boolean = false
 

From eea3f55a316f6ebab0f91f265ae101b41a187096 Mon Sep 17 00:00:00 2001
From: Bago Amirbekian <bago@databricks.com>
Date: Sun, 14 Apr 2019 17:09:12 +0900
Subject: [PATCH 0903/1072] [SPARK-27446][R] Use existing spark conf if
 available.

## What changes were proposed in this pull request?

The RBackend and RBackendHandler create new conf objects that don't pick up conf values from the existing SparkSession and therefore always use the default conf values instead of values specified by the user.

In this fix we check to see if the spark env already exists, and get the conf from there. We fall back to creating a new conf. This follows the pattern used in other places including this: https://github.com/apache/spark/blob/3725b1324f731d57dc776c256bc1a100ec9e6cd0/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala#L261

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24353 from MrBago/r-backend-use-existing-conf.

Authored-by: Bago Amirbekian <bago@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/api/r/RBackend.scala   | 6 +++---
 .../main/scala/org/apache/spark/api/r/RBackendHandler.scala | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
index 36b4132088b5..c755dcba6bce 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
@@ -30,7 +30,7 @@ import io.netty.handler.codec.LengthFieldBasedFrameDecoder
 import io.netty.handler.codec.bytes.{ByteArrayDecoder, ByteArrayEncoder}
 import io.netty.handler.timeout.ReadTimeoutHandler
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.R._
 
@@ -47,7 +47,7 @@ private[spark] class RBackend {
   private[r] val jvmObjectTracker = new JVMObjectTracker
 
   def init(): (Int, RAuthHelper) = {
-    val conf = new SparkConf()
+    val conf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
     val backendConnectionTimeout = conf.get(R_BACKEND_CONNECTION_TIMEOUT)
     bossGroup = new NioEventLoopGroup(conf.get(R_NUM_BACKEND_THREADS))
     val workerGroup = bossGroup
@@ -124,7 +124,7 @@ private[spark] object RBackend extends Logging {
       val listenPort = serverSocket.getLocalPort()
       // Connection timeout is set by socket client. To make it configurable we will pass the
       // timeout value to client inside the temp file
-      val conf = new SparkConf()
+      val conf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
       val backendConnectionTimeout = conf.get(R_BACKEND_CONNECTION_TIMEOUT)
 
       // tell the R process via temporary file
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
index 7b74efa41044..aaa432d58548 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
@@ -26,7 +26,7 @@ import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
 import io.netty.channel.ChannelHandler.Sharable
 import io.netty.handler.timeout.ReadTimeoutException
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.api.r.SerDe._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.R._
@@ -98,7 +98,7 @@ private[r] class RBackendHandler(server: RBackend)
           ctx.write(pingBaos.toByteArray)
         }
       }
-      val conf = new SparkConf()
+      val conf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
       val heartBeatInterval = conf.get(R_HEARTBEAT_INTERVAL)
       val backendConnectionTimeout = conf.get(R_BACKEND_CONNECTION_TIMEOUT)
       val interval = Math.min(heartBeatInterval, backendConnectionTimeout - 1)

From 0bb716bac38488bc216fbda29ce54e93751e641b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 14 Apr 2019 16:57:41 +0800
Subject: [PATCH 0904/1072] Revert [SPARK-23433][SPARK-25250][CORE] Later
 created TaskSet should learn about the finished partitions

## What changes were proposed in this pull request?

Our customer has a very complicated job. Sometimes it successes and sometimes it fails with
```
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: ShuffleMapStage 4  has failed the maximum allowable number of times: 4.
Most recent failure reason: org.apache.spark.shuffle.FetchFailedException
```

However, with the patch https://github.com/apache/spark/pull/23871 , the job hangs forever.

When I investigated it, I found that `DAGScheduler` and `TaskSchedulerImpl` define stage completion differently. `DAGScheduler` thinks a stage is completed if all its partitions are marked as completed ([result stage](https://github.com/apache/spark/blob/v2.4.1/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala#L1362-L1368) and [shuffle stage](https://github.com/apache/spark/blob/v2.4.1/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala#L1400)). `TaskSchedulerImpl` thinks a stage's task set is completed when all tasks finish (see the [code](https://github.com/apache/spark/blob/v2.4.1/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala#L779-L784)).

Ideally this two definition should be consistent, but #23871 breaks it. In our customer's Spark log, I found that, a stage's task set completes, but the stage never completes. More specifically, `DAGScheduler` submits a task set for stage 4.1 with 1000 tasks, but the `TaskSetManager` skips to run the first 100 tasks. Later on, `TaskSetManager` finishes 900 tasks and marks the task set as completed. However, `DAGScheduler` doesn't agree with it and hangs forever, waiting for more task completion events of stage 4.1.

With hindsight, I think `TaskSchedulerIImpl.stageIdToFinishedPartitions` is fragile. We need to pay more effort to make sure this is consistent with `DAGScheduler`'s knowledge. When `DAGScheduler` marks some partitions from finished to unfinished, `TaskSchedulerIImpl.stageIdToFinishedPartitions` should be updated as well.

This PR reverts #23871, let's think of a more robust idea later.

## How was this patch tested?

N/A

Closes #24359 from cloud-fan/revert.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/scheduler/TaskSchedulerImpl.scala   | 36 ++-------------
 .../spark/scheduler/TaskSetManager.scala      | 19 +++-----
 .../scheduler/TaskSchedulerImplSuite.scala    | 44 +++++--------------
 3 files changed, 20 insertions(+), 79 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index e401c395a048..f461a6f0aae3 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -22,7 +22,7 @@ import java.util.{Locale, Timer, TimerTask}
 import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import java.util.concurrent.atomic.AtomicLong
 
-import scala.collection.mutable.{ArrayBuffer, BitSet, HashMap, HashSet}
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.util.Random
 
 import org.apache.spark._
@@ -101,9 +101,6 @@ private[spark] class TaskSchedulerImpl(
   // Protected by `this`
   val taskIdToExecutorId = new HashMap[Long, String]
 
-  // Protected by `this`
-  private[scheduler] val stageIdToFinishedPartitions = new HashMap[Int, BitSet]
-
   @volatile private var hasReceivedTask = false
   @volatile private var hasLaunchedTask = false
   private val starvationTimer = new Timer(true)
@@ -252,20 +249,7 @@ private[spark] class TaskSchedulerImpl(
   private[scheduler] def createTaskSetManager(
       taskSet: TaskSet,
       maxTaskFailures: Int): TaskSetManager = {
-    // only create a BitSet once for a certain stage since we only remove
-    // that stage when an active TaskSetManager succeed.
-    stageIdToFinishedPartitions.getOrElseUpdate(taskSet.stageId, new BitSet)
-    val tsm = new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt)
-    // TaskSet got submitted by DAGScheduler may have some already completed
-    // tasks since DAGScheduler does not always know all the tasks that have
-    // been completed by other tasksets when completing a stage, so we mark
-    // those tasks as finished here to avoid launching duplicate tasks, while
-    // holding the TaskSchedulerImpl lock.
-    // See SPARK-25250 and `markPartitionCompletedInAllTaskSets()`
-    stageIdToFinishedPartitions.get(taskSet.stageId).foreach {
-      finishedPartitions => finishedPartitions.foreach(tsm.markPartitionCompleted(_, None))
-    }
-    tsm
+    new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt)
   }
 
   override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = synchronized {
@@ -886,31 +870,19 @@ private[spark] class TaskSchedulerImpl(
   }
 
   /**
-   * Marks the task has completed in all TaskSetManagers(active / zombie) for the given stage.
+   * Marks the task has completed in all TaskSetManagers for the given stage.
    *
    * After stage failure and retry, there may be multiple TaskSetManagers for the stage.
    * If an earlier attempt of a stage completes a task, we should ensure that the later attempts
    * do not also submit those same tasks.  That also means that a task completion from an earlier
    * attempt can lead to the entire stage getting marked as successful.
-   * And there is also the possibility that the DAGScheduler submits another taskset at the same
-   * time as we're marking a task completed here -- that taskset would have a task for a partition
-   * that was already completed. We maintain the set of finished partitions in
-   * stageIdToFinishedPartitions, protected by this, so we can detect those tasks when the taskset
-   * is submitted. See SPARK-25250 for more details.
-   *
-   * note: this method must be called with a lock on this.
    */
   private[scheduler] def markPartitionCompletedInAllTaskSets(
       stageId: Int,
       partitionId: Int,
       taskInfo: TaskInfo) = {
-    // if we do not find a BitSet for this stage, which means an active TaskSetManager
-    // has already succeeded and removed the stage.
-    stageIdToFinishedPartitions.get(stageId).foreach{
-      finishedPartitions => finishedPartitions += partitionId
-    }
     taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm =>
-      tsm.markPartitionCompleted(partitionId, Some(taskInfo))
+      tsm.markPartitionCompleted(partitionId, taskInfo)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 144422022c22..e1df1555b814 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -21,7 +21,7 @@ import java.io.NotSerializableException
 import java.nio.ByteBuffer
 import java.util.concurrent.ConcurrentLinkedQueue
 
-import scala.collection.mutable.{ArrayBuffer, BitSet, HashMap, HashSet}
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.math.max
 import scala.util.control.NonFatal
 
@@ -800,11 +800,7 @@ private[spark] class TaskSetManager(
       // Mark successful and stop if all the tasks have succeeded.
       successful(index) = true
       if (tasksSuccessful == numTasks) {
-        // clean up finished partitions for the stage when the active TaskSetManager succeed
-        if (!isZombie) {
-          sched.stageIdToFinishedPartitions -= stageId
-          isZombie = true
-        }
+        isZombie = true
       }
     } else {
       logInfo("Ignoring task-finished event for " + info.id + " in stage " + taskSet.id +
@@ -823,21 +819,16 @@ private[spark] class TaskSetManager(
     maybeFinishTaskSet()
   }
 
-  private[scheduler] def markPartitionCompleted(
-      partitionId: Int,
-      taskInfo: Option[TaskInfo]): Unit = {
+  private[scheduler] def markPartitionCompleted(partitionId: Int, taskInfo: TaskInfo): Unit = {
     partitionToIndex.get(partitionId).foreach { index =>
       if (!successful(index)) {
         if (speculationEnabled && !isZombie) {
-          taskInfo.foreach { info => successfulTaskDurations.insert(info.duration) }
+          successfulTaskDurations.insert(taskInfo.duration)
         }
         tasksSuccessful += 1
         successful(index) = true
         if (tasksSuccessful == numTasks) {
-          if (!isZombie) {
-            sched.stageIdToFinishedPartitions -= stageId
-            isZombie = true
-          }
+          isZombie = true
         }
         maybeFinishTaskSet()
       }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 115b203df1e4..137ff2bd167a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -1121,7 +1121,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
   }
 
-  test("SPARK-23433/25250 Completions in zombie tasksets update status of non-zombie taskset") {
+  test("Completions in zombie tasksets update status of non-zombie taskset") {
     val taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
     val valueSer = SparkEnv.get.serializer.newInstance()
 
@@ -1133,9 +1133,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
 
     // Submit a task set, have it fail with a fetch failed, and then re-submit the task attempt,
-    // two times, so we have three TaskSetManagers(2 zombie, 1 active) for one stage.  (For this
-    // to really happen, you'd need the previous stage to also get restarted, and then succeed,
-    // in between each attempt, but that happens outside what we're mocking here.)
+    // two times, so we have three active task sets for one stage.  (For this to really happen,
+    // you'd need the previous stage to also get restarted, and then succeed, in between each
+    // attempt, but that happens outside what we're mocking here.)
     val zombieAttempts = (0 until 2).map { stageAttempt =>
       val attempt = FakeTask.createTaskSet(10, stageAttemptId = stageAttempt)
       taskScheduler.submitTasks(attempt)
@@ -1152,33 +1152,13 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       assert(tsm.runningTasks === 9)
       tsm
     }
-    // we've now got 2 zombie attempts, each with 9 tasks still running. And there's no active
-    // attempt exists in taskScheduler by now.
-
-    // finish partition 1,2 by completing the tasks before a new attempt for the same stage submit.
-    // This is possible since the behaviour of submitting new attempt and handling successful task
-    // is from two different threads, which are "task-result-getter" and "dag-scheduler-event-loop"
-    // separately.
-    (0 until 2).foreach { i =>
-      completeTaskSuccessfully(zombieAttempts(i), i + 1)
-      assert(taskScheduler.stageIdToFinishedPartitions(0).contains(i + 1))
-    }
 
-    // Submit the 3rd attempt still with 10 tasks, this happens due to the race between thread
-    // "task-result-getter" and "dag-scheduler-event-loop", where a TaskSet gets submitted with
-    // already completed tasks. And this time with insufficient resources so not all tasks are
-    // active.
+    // we've now got 2 zombie attempts, each with 9 tasks still active.  Submit the 3rd attempt for
+    // the stage, but this time with insufficient resources so not all tasks are active.
+
     val finalAttempt = FakeTask.createTaskSet(10, stageAttemptId = 2)
     taskScheduler.submitTasks(finalAttempt)
     val finalTsm = taskScheduler.taskSetManagerForAttempt(0, 2).get
-    // Though finalTSM gets submitted with 10 tasks, the call to taskScheduler.submitTasks should
-    // realize that 2 tasks have already completed, and mark them appropriately, so it won't launch
-    // any duplicate tasks later (SPARK-25250).
-    (0 until 2).map(_ + 1).foreach { partitionId =>
-      val index = finalTsm.partitionToIndex(partitionId)
-      assert(finalTsm.successful(index))
-    }
-
     val offers = (0 until 5).map{ idx => WorkerOffer(s"exec-$idx", s"host-$idx", 1) }
     val finalAttemptLaunchedPartitions = taskScheduler.resourceOffers(offers).flatten.map { task =>
       finalAttempt.tasks(task.index).partitionId
@@ -1186,17 +1166,16 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(finalTsm.runningTasks === 5)
     assert(!finalTsm.isZombie)
 
-    // We continually simulate late completions from our zombie tasksets(but this time, there's one
-    // active attempt exists in taskScheduler), corresponding to all the pending partitions in our
-    // final attempt. This means we're only waiting on the tasks we've already launched.
+    // We simulate late completions from our zombie tasksets, corresponding to all the pending
+    // partitions in our final attempt.  This means we're only waiting on the tasks we've already
+    // launched.
     val finalAttemptPendingPartitions = (0 until 10).toSet.diff(finalAttemptLaunchedPartitions)
     finalAttemptPendingPartitions.foreach { partition =>
       completeTaskSuccessfully(zombieAttempts(0), partition)
-      assert(taskScheduler.stageIdToFinishedPartitions(0).contains(partition))
     }
 
     // If there is another resource offer, we shouldn't run anything.  Though our final attempt
-    // used to have pending tasks, now those tasks have been completed by zombie attempts. The
+    // used to have pending tasks, now those tasks have been completed by zombie attempts.  The
     // remaining tasks to compute are already active in the non-zombie attempt.
     assert(
       taskScheduler.resourceOffers(IndexedSeq(WorkerOffer("exec-1", "host-1", 1))).flatten.isEmpty)
@@ -1244,7 +1223,6 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       // perspective, as the failures weren't from a problem w/ the tasks themselves.
       verify(blacklist).updateBlacklistForSuccessfulTaskSet(meq(0), meq(stageAttempt), any())
     }
-    assert(taskScheduler.stageIdToFinishedPartitions.isEmpty)
   }
 
   test("don't schedule for a barrier taskSet if available slots are less than pending tasks") {

From 27d625d785244ae78287e3a0eede44c79dfcbb92 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 15 Apr 2019 21:06:03 +0800
Subject: [PATCH 0905/1072] [SPARK-27459][SQL] Revise the exception message of
 schema inference failure in file source V2

## What changes were proposed in this pull request?

Since https://github.com/apache/spark/pull/23383/files#diff-db4a140579c1ac4b1dbec7fe5057eecaR36, the exception message of schema inference failure in file source V2 is `tableName`, which is equivalent to `shortName + path`.

While in file source V1, the message is `Unable to infer schema from ORC/CSV/JSON...`.
We should make the message in V2 consistent with V1, so that in the future migration the related test cases don't need to be modified. https://github.com/apache/spark/pull/24058#pullrequestreview-226364350

## How was this patch tested?

Revert the modified unit test cases in https://github.com/apache/spark/pull/24005/files#diff-b9ddfbc9be8d83ecf100b3b8ff9610b9R431 and https://github.com/apache/spark/pull/23383/files#diff-9ab56940ee5a53f2bb81e3c008653362R577, and test with them.

Closes #24369 from gengliangwang/reviseInferSchemaMessage.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/execution/datasources/v2/FileTable.scala   | 2 +-
 .../spark/sql/execution/datasources/orc/OrcQuerySuite.scala     | 2 +-
 .../org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index cb816d69b68c..c0c57b862055 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -54,7 +54,7 @@ abstract class FileTable(
     inferSchema(fileIndex.allFiles())
   }.getOrElse {
     throw new AnalysisException(
-      s"Unable to infer schema for $name. It must be specified manually.")
+      s"Unable to infer schema for $formatName. It must be specified manually.")
   }.asNullable
 
   override lazy val schema: StructType = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index fe40b9a4f861..18ec3e3ebed0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -580,7 +580,7 @@ abstract class OrcQueryTest extends OrcTest {
       val m1 = intercept[AnalysisException] {
         testAllCorruptFiles()
       }.getMessage
-      assert(m1.contains("Unable to infer schema"))
+      assert(m1.contains("Unable to infer schema for ORC"))
       testAllCorruptFilesWithoutSchemaInfer()
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 2569085bec08..9f969473da61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -428,7 +428,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
     val message = intercept[AnalysisException] {
       testRead(spark.read.csv(), Seq.empty, schema)
     }.getMessage
-    assert(message.toLowerCase(Locale.ROOT).contains("unable to infer schema for csv"))
+    assert(message.contains("Unable to infer schema for CSV. It must be specified manually."))
 
     testRead(spark.read.csv(dir), data, schema)
     testRead(spark.read.csv(dir, dir), data ++ data, schema)

From 3ab96d7acf870e53c9016b0b63d0b328eec23bed Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Mon, 15 Apr 2019 21:26:45 +0800
Subject: [PATCH 0906/1072] [SPARK-27444][SQL][FOLLOWUP][MINOR][TEST] Add a
 test for describing multi select query.

## What changes were proposed in this pull request?
This is a minor pr to add a test to describe a multi select query.

## How was this patch tested?
Added a test in describe-query.sql

Closes #24370 from dilipbiswal/describe-query-multiselect-test.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/tables.scala  |  4 +-
 .../sql-tests/inputs/describe-query.sql       |  6 +--
 .../sql-tests/results/describe-query.sql.out  | 39 ++++++++++++-------
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index fb619a7b65cf..b31b2d355b41 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -635,7 +635,9 @@ case class DescribeTableCommand(
  * 3. VALUES statement.
  * 4. TABLE statement. Example : TABLE table_name
  * 5. statements of the form 'FROM table SELECT *'
- * 6. Common table expressions (CTEs)
+ * 6. Multi select statements of the following form:
+ *    select * from (from a select * select *)
+ * 7. Common table expressions (CTEs)
  */
 case class DescribeQueryCommand(query: LogicalPlan)
   extends DescribeCommandBase {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
index bc144d01cee6..b6351f993b91 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
@@ -10,11 +10,11 @@ DESC SELECT 10.00D as col1;
 DESC QUERY SELECT key FROM desc_temp1 UNION ALL select CAST(1 AS DOUBLE);
 DESC QUERY VALUES(1.00D, 'hello') as tab1(col1, col2);
 DESC QUERY FROM desc_temp1 a SELECT *;
-
-
--- Error cases.
 DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s;
 DESCRIBE QUERY WITH s AS (SELECT * from desc_temp1) SELECT * FROM s;
+DESCRIBE SELECT * FROM (FROM desc_temp2 select * select *);
+
+-- Error cases.
 DESCRIBE INSERT INTO desc_temp1 values (1, 'val1');
 DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2;
 DESCRIBE
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index fc51b4643e8c..15a346fc8c60 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
+-- Number of queries: 17
 
 
 -- !query 0
@@ -97,10 +97,19 @@ val                 	string
 
 
 -- !query 11
-DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
+DESCRIBE SELECT * FROM (FROM desc_temp2 select * select *)
 -- !query 11 schema
-struct<>
+struct<col_name:string,data_type:string,comment:string>
 -- !query 11 output
+key                 	int                 	                    
+val                 	string
+
+
+-- !query 12
+DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
+-- !query 12 schema
+struct<>
+-- !query 12 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21)
@@ -110,11 +119,11 @@ DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
 ---------------------^^^
 
 
--- !query 12
+-- !query 13
 DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
--- !query 12 schema
+-- !query 13 schema
 struct<>
--- !query 12 output
+-- !query 13 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21)
@@ -124,14 +133,14 @@ DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
 ---------------------^^^
 
 
--- !query 13
+-- !query 14
 DESCRIBE
    FROM desc_temp1 a
      insert into desc_temp1 select *
      insert into desc_temp2 select *
--- !query 13 schema
+-- !query 14 schema
 struct<>
--- !query 13 output
+-- !query 14 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 mismatched input 'insert' expecting {<EOF>, '(', ',', 'ANTI', 'CLUSTER', 'CROSS', 'DISTRIBUTE', 'EXCEPT', 'FULL', 'GROUP', 'HAVING', 'INNER', 'INTERSECT', 'JOIN', 'LATERAL', 'LEFT', 'LIMIT', 'NATURAL', 'ORDER', 'PIVOT', 'RIGHT', 'SELECT', 'SEMI', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW'}(line 3, pos 5)
@@ -144,17 +153,17 @@ DESCRIBE
      insert into desc_temp2 select *
 
 
--- !query 14
+-- !query 15
 DROP TABLE desc_temp1
--- !query 14 schema
+-- !query 15 schema
 struct<>
--- !query 14 output
+-- !query 15 output
 
 
--- !query 15
+-- !query 16
 DROP TABLE desc_temp2
--- !query 15 schema
+-- !query 16 schema
 struct<>
--- !query 15 output
+-- !query 16 output
 

From d35e81f4bc561598676a508319ec872f7361b069 Mon Sep 17 00:00:00 2001
From: WeichenXu <weichen.xu@databricks.com>
Date: Mon, 15 Apr 2019 11:55:51 -0700
Subject: [PATCH 0907/1072] [SPARK-27454][ML][SQL] Spark image datasource fail
 when encounter some illegal images

## What changes were proposed in this pull request?

Fix in Spark image datasource fail when encounter some illegal images.

This related to bugs inside `ImageIO.read` so in spark code I add exception handling for it.

## How was this patch tested?

N/A

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24362 from WeichenXu123/fix_image_ds_bug.

Authored-by: WeichenXu <weichen.xu@databricks.com>
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 .../scala/org/apache/spark/ml/image/ImageSchema.scala     | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
index 0b13eefdf3f5..a7ddf2f1f130 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
@@ -133,7 +133,13 @@ object ImageSchema {
    */
   private[spark] def decode(origin: String, bytes: Array[Byte]): Option[Row] = {
 
-    val img = ImageIO.read(new ByteArrayInputStream(bytes))
+    val img = try {
+      ImageIO.read(new ByteArrayInputStream(bytes))
+    } catch {
+      // Catch runtime exception because `ImageIO` may throw unexcepted `RuntimeException`.
+      // But do not catch the declared `IOException` (regarded as FileSystem failure)
+      case _: RuntimeException => null
+    }
 
     if (img == null) {
       None

From c58a4fed8d79aff9fbac9f9a33141b2edbfb0cea Mon Sep 17 00:00:00 2001
From: pengbo <bo.peng1019@gmail.com>
Date: Mon, 15 Apr 2019 15:37:07 -0700
Subject: [PATCH 0908/1072] =?UTF-8?q?[SPARK-27351][SQL]=20Wrong=20outputRo?=
 =?UTF-8?q?ws=20estimation=20after=20AggregateEstimation=20wit=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
The upper bound of group-by columns row number is to multiply distinct counts of group-by columns. However, column with only null value will cause the output row number to be 0 which is incorrect.
Ex:
col1 (distinct: 2, rowCount 2)
col2 (distinct: 0, rowCount 2)
=> group by col1, col2
Actual: output rows: 0
Expected: output rows: 2

## How was this patch tested?
According unit test has been added, plus manual test has been done in our tpcds benchmark environement.

Closes #24286 from pengbo/master.

Lead-authored-by: pengbo <bo.peng1019@gmail.com>
Co-authored-by: mingbo_pb <mingbo.pb@alibaba-inc.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../statsEstimation/AggregateEstimation.scala        | 12 ++++++++++--
 .../statsEstimation/AggregateEstimationSuite.scala   | 12 +++++++++++-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
index 0606d0d516bb..1198d3fc53cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
@@ -39,8 +39,16 @@ object AggregateEstimation {
       // Multiply distinct counts of group-by columns. This is an upper bound, which assumes
       // the data contains all combinations of distinct values of group-by columns.
       var outputRows: BigInt = agg.groupingExpressions.foldLeft(BigInt(1))(
-        (res, expr) => res *
-          childStats.attributeStats(expr.asInstanceOf[Attribute]).distinctCount.get)
+        (res, expr) => {
+          val columnStat = childStats.attributeStats(expr.asInstanceOf[Attribute])
+          val distinctCount = columnStat.distinctCount.get
+          val distinctValue: BigInt = if (distinctCount == 0 && columnStat.nullCount.get > 0) {
+            1
+          } else {
+            distinctCount
+          }
+          res * distinctValue
+        })
 
       outputRows = if (agg.groupingExpressions.isEmpty) {
         // If there's no group-by columns, the output is a single row containing values of aggregate
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
index dfa6e467166a..c24705043812 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
@@ -38,7 +38,9 @@ class AggregateEstimationSuite extends StatsEstimationTestBase with PlanTest {
     attr("key22") -> ColumnStat(distinctCount = Some(2), min = Some(10), max = Some(20),
       nullCount = Some(0), avgLen = Some(4), maxLen = Some(4)),
     attr("key31") -> ColumnStat(distinctCount = Some(0), min = None, max = None,
-      nullCount = Some(0), avgLen = Some(4), maxLen = Some(4))
+      nullCount = Some(0), avgLen = Some(4), maxLen = Some(4)),
+    attr("key32") -> ColumnStat(distinctCount = Some(0), min = None, max = None,
+      nullCount = Some(4), avgLen = Some(4), maxLen = Some(4))
   ))
 
   private val nameToAttr: Map[String, Attribute] = columnInfo.map(kv => kv._1.name -> kv._1)
@@ -116,6 +118,14 @@ class AggregateEstimationSuite extends StatsEstimationTestBase with PlanTest {
       expectedOutputRowCount = 0)
   }
 
+  test("group-by column with only null value") {
+    checkAggStats(
+      tableColumns = Seq("key22", "key32"),
+      tableRowCount = 6,
+      groupByColumns = Seq("key22", "key32"),
+      expectedOutputRowCount = nameToColInfo("key22")._2.distinctCount.get)
+  }
+
   test("non-cbo estimation") {
     val attributes = Seq("key12").map(nameToAttr)
     val child = StatsTestPlan(

From a4cf1a4f4e1b2707059c8c341e06942246cb83bf Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 15 Apr 2019 19:18:37 -0700
Subject: [PATCH 0909/1072] [SPARK-27469][CORE] Update Commons BeanUtils to
 1.9.3

## What changes were proposed in this pull request?

Unify commons-beanutils deps to latest 1.9.3. This resolves the version inconsistency in Hadoop 2.7's build and also picks up security and bug fixes.

## How was this patch tested?

Existing tests.

Closes #24378 from srowen/SPARK-27469.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 LICENSE-binary                 |  1 -
 dev/deps/spark-deps-hadoop-2.7 |  3 +--
 pom.xml                        | 10 ++++++++++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index 5f57133bef43..66c559983a6d 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -302,7 +302,6 @@ com.google.code.gson:gson
 com.google.inject:guice
 com.google.inject.extensions:guice-servlet
 com.twitter:parquet-hadoop-bundle
-commons-beanutils:commons-beanutils-core
 commons-cli:commons-cli
 commons-dbcp:commons-dbcp
 commons-io:commons-io
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 58eb8d035dd4..00dc2cedf777 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -26,8 +26,7 @@ breeze-macros_2.12-0.13.2.jar
 breeze_2.12-0.13.2.jar
 chill-java-0.9.3.jar
 chill_2.12-0.9.3.jar
-commons-beanutils-1.7.0.jar
-commons-beanutils-core-1.8.0.jar
+commons-beanutils-1.9.3.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
diff --git a/pom.xml b/pom.xml
index 0e1c67ffe655..449b426069e5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -468,6 +468,11 @@
         <artifactId>commons-collections</artifactId>
         <version>${commons.collections.version}</version>
       </dependency>
+      <dependency>
+        <groupId>commons-beanutils</groupId>
+        <artifactId>commons-beanutils</artifactId>
+        <version>1.9.3</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.ivy</groupId>
         <artifactId>ivy</artifactId>
@@ -910,6 +915,11 @@
             <groupId>org.jboss.netty</groupId>
             <artifactId>netty</artifactId>
           </exclusion>
+          <exclusion>
+            <!-- BeanUtils >= 1.9.0 no longer splits out -core; exclude it -->
+            <groupId>commons-beanutils</groupId>
+            <artifactId>commons-beanutils-core</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>commons-logging</groupId>
             <artifactId>commons-logging</artifactId>

From f9837d3bf6c15600f926917a253bf1bdddb4d1b4 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Tue, 16 Apr 2019 14:26:38 +0800
Subject: [PATCH 0910/1072] [SPARK-27448][SQL] File source V2 table provider
 should be compatible with V1 provider

## What changes were proposed in this pull request?

In the rule `PreprocessTableCreation`, if an existing table is appended with a different provider, the action will fail.
Currently, there are two implementations for file sources and creating a table with file source V2 will always fall back to V1 FileFormat. We should consider the following cases as valid:
1. Appending a table with file source V2 provider using the v1 file format
2. Appending a table with v1 file format provider using file source V2 format

## How was this patch tested?

Unit test

Closes #24356 from gengliangwang/fixTableProvider.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/datasources/rules.scala     | 10 ++-
 .../sql/hive/MetastoreDataSourcesSuite.scala  | 68 +++++++++++++++++++
 2 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 4e7fc40db6dc..534e2fd0757f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, Expres
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.command.DDLUtils
+import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.{AtomicType, StructType}
@@ -113,7 +114,9 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       val specifiedProvider = DataSource.lookupDataSource(tableDesc.provider.get, conf)
       // TODO: Check that options from the resolved relation match the relation that we are
       // inserting into (i.e. using the same compression).
-      if (existingProvider != specifiedProvider) {
+      // If the one of the provider is [[FileDataSourceV2]] and the other one is its corresponding
+      // [[FileFormat]], the two providers are considered compatible.
+      if (fallBackV2ToV1(existingProvider) != fallBackV2ToV1(specifiedProvider)) {
         throw new AnalysisException(s"The format of the existing table $tableName is " +
           s"`${existingProvider.getSimpleName}`. It doesn't match the specified format " +
           s"`${specifiedProvider.getSimpleName}`.")
@@ -235,6 +238,11 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       }
   }
 
+  private def fallBackV2ToV1(cls: Class[_]): Class[_] = cls.newInstance match {
+    case f: FileDataSourceV2 => f.fallbackFileFormat
+    case _ => cls
+  }
+
   private def normalizeCatalogTable(schema: StructType, table: CatalogTable): CatalogTable = {
     SchemaUtils.checkSchemaColumnNameDuplication(
       schema,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index c9ff4fc0777e..6a64599eed66 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -968,6 +968,74 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
     }
   }
 
+  test("append a table with file source V2 provider using the v1 file format") {
+    def createDF(from: Int, to: Int): DataFrame = {
+      (from to to).map(i => i -> s"str$i").toDF("c1", "c2")
+    }
+
+    withTable("appendCSV") {
+      createDF(0, 9)
+        .write
+        .mode(SaveMode.Append)
+        .format("org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2")
+        .saveAsTable("appendCSV")
+      createDF(10, 19)
+        .write
+        .mode(SaveMode.Append)
+        .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat")
+        .saveAsTable("appendCSV")
+      checkAnswer(
+        sql("SELECT p.c1, p.c2 FROM appendCSV p WHERE p.c1 > 5"),
+        (6 to 19).map(i => Row(i, s"str$i")))
+    }
+
+    withTable("appendCSV") {
+      createDF(0, 9).write.mode(SaveMode.Append).format("csv").saveAsTable("appendCSV")
+      createDF(10, 19)
+        .write
+        .mode(SaveMode.Append)
+        .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat")
+        .saveAsTable("appendCSV")
+      checkAnswer(
+        sql("SELECT p.c1, p.c2 FROM appendCSV p WHERE p.c1 > 5"),
+        (6 to 19).map(i => Row(i, s"str$i")))
+    }
+  }
+
+  test("append a table with v1 file format provider using file source V2 format") {
+    def createDF(from: Int, to: Int): DataFrame = {
+      (from to to).map(i => i -> s"str$i").toDF("c1", "c2")
+    }
+
+    withTable("appendCSV") {
+      createDF(0, 9)
+        .write
+        .mode(SaveMode.Append)
+        .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat")
+        .saveAsTable("appendCSV")
+      createDF(10, 19)
+        .write
+        .mode(SaveMode.Append)
+        .format("org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2")
+        .saveAsTable("appendCSV")
+      checkAnswer(
+        sql("SELECT p.c1, p.c2 FROM appendCSV p WHERE p.c1 > 5"),
+        (6 to 19).map(i => Row(i, s"str$i")))
+    }
+
+    withTable("appendCSV") {
+      createDF(0, 9)
+        .write
+        .mode(SaveMode.Append)
+        .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat")
+        .saveAsTable("appendCSV")
+      createDF(10, 19).write.mode(SaveMode.Append).format("csv").saveAsTable("appendCSV")
+      checkAnswer(
+        sql("SELECT p.c1, p.c2 FROM appendCSV p WHERE p.c1 > 5"),
+        (6 to 19).map(i => Row(i, s"str$i")))
+    }
+  }
+
   test("SPARK-8156:create table to specific database by 'use dbname' ") {
 
     val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")

From 8718367e2e739f1ed82997b9f4a1298b7a1c4e49 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Tue, 16 Apr 2019 19:41:40 +0900
Subject: [PATCH 0911/1072] [SPARK-27470][PYSPARK] Update pyrolite to 4.23

## What changes were proposed in this pull request?

 Update pyrolite to 4.23 to pick up bug and security fixes.

## How was this patch tested?

Existing tests.

Closes #24381 from srowen/SPARK-27470.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 core/pom.xml                   | 2 +-
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.2 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/pom.xml b/core/pom.xml
index 45bda44916f5..9d5702801988 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -347,7 +347,7 @@
     <dependency>
       <groupId>net.razorvine</groupId>
       <artifactId>pyrolite</artifactId>
-      <version>4.13</version>
+      <version>4.23</version>
       <exclusions>
         <exclusion>
           <groupId>net.razorvine</groupId>
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 00dc2cedf777..8ae59cc4def3 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -170,7 +170,7 @@ parquet-hadoop-bundle-1.6.0.jar
 parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
-pyrolite-4.13.jar
+pyrolite-4.23.jar
 scala-compiler-2.12.8.jar
 scala-library-2.12.8.jar
 scala-parser-combinators_2.12-1.1.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 97085d6bb19b..bbb0d737cd8c 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -191,7 +191,7 @@ parquet-hadoop-bundle-1.6.0.jar
 parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
-pyrolite-4.13.jar
+pyrolite-4.23.jar
 re2j-1.1.jar
 scala-compiler-2.12.8.jar
 scala-library-2.12.8.jar

From 257d01a6b8241c244b932104d2e7c6817d530fd9 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 16 Apr 2019 09:11:47 -0500
Subject: [PATCH 0912/1072] [SPARK-27397][CORE] Take care of OpenJ9 JVM in
 Spark

## What changes were proposed in this pull request?

This PR supports `OpenJ9` in addition to `IBM JDK` and `OpenJDK` in Spark by handling `System.getProperty("java.vendor") = "Eclipse OpenJ9"`.

In `inferDefaultMemory()` and `getKrb5LoginModuleName()`, this PR uses non `IBM` way.

```
$ ~/jdk-11.0.2+9_openj9-0.12.1/bin/jshell
|  Welcome to JShell -- Version 11.0.2
|  For an introduction type: /help intro

jshell> System.out.println(System.getProperty("java.vendor"))
Eclipse OpenJ9

jshell> System.out.println(System.getProperty("java.vm.info"))
JRE 11 Linux amd64-64-Bit Compressed References 20190204_127 (JIT enabled, AOT enabled)
OpenJ9   - 90dd8cb40
OMR      - d2f4534b
JCL      - 289c70b6844 based on jdk-11.0.2+9

jshell> System.out.println(Class.forName("com.ibm.lang.management.OperatingSystemMXBean").getDeclaredMethod("getTotalPhysicalMemory"))
public abstract long com.ibm.lang.management.OperatingSystemMXBean.getTotalPhysicalMemory()

jshell> System.out.println(Class.forName("com.sun.management.OperatingSystemMXBean").getDeclaredMethod("getTotalPhysicalMemorySize"))
public abstract long com.sun.management.OperatingSystemMXBean.getTotalPhysicalMemorySize()

jshell> System.out.println(Class.forName("com.ibm.security.auth.module.Krb5LoginModule"))
|  Exception java.lang.ClassNotFoundException: com.ibm.security.auth.module.Krb5LoginModule
|        at Class.forNameImpl (Native Method)
|        at Class.forName (Class.java:339)
|        at (#1:1)

jshell> System.out.println(Class.forName("com.sun.security.auth.module.Krb5LoginModule"))
class com.sun.security.auth.module.Krb5LoginModule
```

## How was this patch tested?

Existing test suites
Manual testing with OpenJ9.

Closes #24308 from kiszk/SPARK-27397.

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../org/apache/spark/util/SizeEstimator.scala |  5 +++--
 .../spark/launcher/CommandBuilderUtils.java   | 20 -------------------
 2 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 4837b01d817a..e09f1fc16f6f 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -131,8 +131,9 @@ object SizeEstimator extends Logging {
       return System.getProperty(TEST_USE_COMPRESSED_OOPS_KEY).toBoolean
     }
 
-    // java.vm.info provides compressed ref info for IBM JDKs
-    if (System.getProperty("java.vendor").contains("IBM")) {
+    // java.vm.info provides compressed ref info for IBM and OpenJ9 JDKs
+    val javaVendor = System.getProperty("java.vendor")
+    if (javaVendor.contains("IBM") || javaVendor.contains("OpenJ9")) {
       return System.getProperty("java.vm.info").contains("Compressed Ref")
     }
 
diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
index 47d2f8ef4e3d..172fb8c56087 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -31,11 +31,6 @@ class CommandBuilderUtils {
   static final String DEFAULT_PROPERTIES_FILE = "spark-defaults.conf";
   static final String ENV_SPARK_HOME = "SPARK_HOME";
 
-  /** The set of known JVM vendors. */
-  enum JavaVendor {
-    Oracle, IBM, OpenJDK, Unknown
-  }
-
   /** Returns whether the given string is null or empty. */
   static boolean isEmpty(String s) {
     return s == null || s.isEmpty();
@@ -112,21 +107,6 @@ static boolean isWindows() {
     return os.startsWith("Windows");
   }
 
-  /** Returns an enum value indicating whose JVM is being used. */
-  static JavaVendor getJavaVendor() {
-    String vendorString = System.getProperty("java.vendor");
-    if (vendorString.contains("Oracle")) {
-      return JavaVendor.Oracle;
-    }
-    if (vendorString.contains("IBM")) {
-      return JavaVendor.IBM;
-    }
-    if (vendorString.contains("OpenJDK")) {
-      return JavaVendor.OpenJDK;
-    }
-    return JavaVendor.Unknown;
-  }
-
   /**
    * Updates the user environment, appending the given pathList to the existing value of the given
    * environment variable (or setting it if it hasn't yet been set).

From 88d9de26dda1c91132fd909b0995492388dc5fac Mon Sep 17 00:00:00 2001
From: shivusondur <shivusondur@gmail.com>
Date: Tue, 16 Apr 2019 09:30:46 -0500
Subject: [PATCH 0913/1072] [SPARK-27464][CORE] Added Constant instead of
 referring string literal used from many places

## What changes were proposed in this pull request?

Added Constant instead of referring the same String literal "spark.buffer.pageSize" from many places
## How was this patch tested?
Run the corresponding Unit Test Cases manually.

Closes #24368 from shivusondur/Constant.

Authored-by: shivusondur <shivusondur@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../scala/org/apache/spark/internal/config/package.scala   | 6 ++++++
 .../main/scala/org/apache/spark/memory/MemoryManager.scala | 2 +-
 .../spark/shuffle/sort/UnsafeShuffleWriterSuite.java       | 2 +-
 .../collection/unsafe/sort/UnsafeExternalSorterSuite.java  | 3 ++-
 .../apache/spark/sql/execution/joins/HashedRelation.scala  | 7 +++----
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 0bd46bef35d2..8e59ce710170 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1303,4 +1303,10 @@ package object config {
     .doc("Staging directory used while submitting applications.")
     .stringConf
     .createOptional
+
+  private[spark] val BUFFER_PAGESIZE = ConfigBuilder("spark.buffer.pageSize")
+    .doc("The amount of memory used per page in bytes")
+    .bytesConf(ByteUnit.BYTE)
+    .createOptional
+
 }
diff --git a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
index ff6d84b57ebc..c08b47f99dda 100644
--- a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
@@ -255,7 +255,7 @@ private[spark] abstract class MemoryManager(
     }
     val size = ByteArrayMethods.nextPowerOf2(maxTungstenMemory / cores / safetyFactor)
     val default = math.min(maxPageSize, math.max(minPageSize, size))
-    conf.getSizeAsBytes("spark.buffer.pageSize", default)
+    conf.get(BUFFER_PAGESIZE).getOrElse(default)
   }
 
   /**
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 9bf707f783d4..88125a6b93ad 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -101,7 +101,7 @@ public void setUp() throws IOException {
     partitionSizesInMergedFile = null;
     spillFilesCreated.clear();
     conf = new SparkConf()
-      .set("spark.buffer.pageSize", "1m")
+      .set(package$.MODULE$.BUFFER_PAGESIZE().key(), "1m")
       .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false);
     taskMetrics = new TaskMetrics();
     memoryManager = new TestMemoryManager(conf);
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index dd71d3201b43..c6aa623560d5 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -88,7 +88,8 @@ public int compare(
 
   protected boolean shouldUseRadixSort() { return false; }
 
-  private final long pageSizeBytes = conf.getSizeAsBytes("spark.buffer.pageSize", "4m");
+  private final long pageSizeBytes = conf.getSizeAsBytes(
+          package$.MODULE$.BUFFER_PAGESIZE().key(), "4m");
 
   private final int spillThreshold =
     (int) conf.get(package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD());
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index b03e8f551c00..9d8063d53b5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -23,7 +23,7 @@ import com.esotericsoftware.kryo.{Kryo, KryoSerializable}
 import com.esotericsoftware.kryo.io.{Input, Output}
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkException}
-import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
+import org.apache.spark.internal.config.{BUFFER_PAGESIZE, MEMORY_OFFHEAP_ENABLED}
 import org.apache.spark.memory._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -235,7 +235,7 @@ private[joins] class UnsafeHashedRelation(
       0)
 
     val pageSizeBytes = Option(SparkEnv.get).map(_.memoryManager.pageSizeBytes)
-      .getOrElse(new SparkConf().getSizeAsBytes("spark.buffer.pageSize", "16m"))
+      .getOrElse(new SparkConf().get(BUFFER_PAGESIZE).getOrElse(16L * 1024 * 1024))
 
     // TODO(josh): We won't need this dummy memory manager after future refactorings; revisit
     // during code review
@@ -285,8 +285,7 @@ private[joins] object UnsafeHashedRelation {
       taskMemoryManager: TaskMemoryManager): HashedRelation = {
 
     val pageSizeBytes = Option(SparkEnv.get).map(_.memoryManager.pageSizeBytes)
-      .getOrElse(new SparkConf().getSizeAsBytes("spark.buffer.pageSize", "16m"))
-
+      .getOrElse(new SparkConf().get(BUFFER_PAGESIZE).getOrElse(16L * 1024 * 1024))
     val binaryMap = new BytesToBytesMap(
       taskMemoryManager,
       // Only 70% of the slots can be used before growing, more capacity help to reduce collision

From a8f20c95ab602fcda0ac68d21d7cc112bdfbdadf Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 16 Apr 2019 08:54:16 -0700
Subject: [PATCH 0914/1072] [SPARK-27452][BUILD] Update zstd-jni to 1.3.8-9

## What changes were proposed in this pull request?

This PR aims to update `zstd-jni` from 1.3.2-2 to 1.3.8-9 to be aligned with the latest Zstd 1.3.8 in Apache Spark 3.0.0. Currently, Apache Spark is aligned with the old Zstd used in the first PR and there are many bugfix and improvement updates in `zstd-jni` until now.
- https://github.com/facebook/zstd/releases/tag/v1.3.8
- https://github.com/facebook/zstd/releases/tag/v1.3.7
- https://github.com/facebook/zstd/releases/tag/v1.3.6
- https://github.com/facebook/zstd/releases/tag/v1.3.4
- https://github.com/facebook/zstd/releases/tag/v1.3.3

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #24364 from dongjoon-hyun/SPARK-ZSTD.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.2 | 2 +-
 pom.xml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 8ae59cc4def3..138634976403 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -198,4 +198,4 @@ xmlenc-0.52.jar
 xz-1.5.jar
 zjsonpatch-0.3.0.jar
 zookeeper-3.4.6.jar
-zstd-jni-1.3.2-2.jar
+zstd-jni-1.3.8-9.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index bbb0d737cd8c..961f65d21f57 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -220,4 +220,4 @@ xbean-asm7-shaded-4.12.jar
 xz-1.5.jar
 zjsonpatch-0.3.0.jar
 zookeeper-3.4.13.jar
-zstd-jni-1.3.2-2.jar
+zstd-jni-1.3.8-9.jar
diff --git a/pom.xml b/pom.xml
index 449b426069e5..6a0651ed4528 100644
--- a/pom.xml
+++ b/pom.xml
@@ -584,7 +584,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.3.2-2</version>
+        <version>1.3.8-9</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>

From 7c4a6439d6fc8ec0b47914603b68cff4ce6d0cfc Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 16 Apr 2019 08:55:27 -0700
Subject: [PATCH 0915/1072] [SPARK-27467][BUILD][TEST-MAVEN] Upgrade Maven to
 3.6.1

## What changes were proposed in this pull request?

This PR aims to upgrade Maven to 3.6.1 to bring JDK9+ related patches like [MNG-6506](https://issues.apache.org/jira/browse/MNG-6506). For the full release note, please see the following.
- https://maven.apache.org/docs/3.6.1/release-notes.html

## How was this patch tested?

Pass the Jenkins with `[test-maven]` tag.

Closes #24377 from dongjoon-hyun/SPARK-27467.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 6a0651ed4528..55f9e569fc7c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,7 +115,7 @@
     <java.version>1.8</java.version>
     <maven.compiler.source>${java.version}</maven.compiler.source>
     <maven.compiler.target>${java.version}</maven.compiler.target>
-    <maven.version>3.6.0</maven.version>
+    <maven.version>3.6.1</maven.version>
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.16</slf4j.version>
     <log4j.version>1.2.17</log4j.version>

From b404e02574084c5ab550ce8716d4177464e7ce8c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 16 Apr 2019 14:50:37 -0700
Subject: [PATCH 0916/1072] [SPARK-27476][SQL] Refactoring SchemaPruning rule
 to remove duplicate code

## What changes were proposed in this pull request?

In SchemaPruning rule, there is duplicate code for data source v1 and v2. Their logic is the same and we can refactor the rule to remove duplicate code.

## How was this patch tested?

Existing tests.

Closes #24383 from viirya/SPARK-27476.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |   2 +-
 .../execution/datasources/SchemaPruning.scala | 100 ++++++++----------
 2 files changed, 47 insertions(+), 55 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index f33cc86a18a1..3f59fa125c88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1551,7 +1551,7 @@ object SQLConf {
       .internal()
       .doc("Prune nested fields from a logical relation's output which are unnecessary in " +
         "satisfying a query. This optimization allows columnar file format readers to avoid " +
-        "reading unnecessary nested column data. Currently Parquet and ORC v1 are the " +
+        "reading unnecessary nested column data. Currently Parquet and ORC are the " +
         "data sources that implement this optimization.")
       .booleanConf
       .createWithDefault(false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 15fdf65ee897..463ee9a88730 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -50,73 +50,65 @@ object SchemaPruning extends Rule[LogicalPlan] {
       case op @ PhysicalOperation(projects, filters,
           l @ LogicalRelation(hadoopFsRelation: HadoopFsRelation, _, _, _))
         if canPruneRelation(hadoopFsRelation) =>
-        val (normalizedProjects, normalizedFilters) =
-          normalizeAttributeRefNames(l.output, projects, filters)
-        val requestedRootFields = identifyRootFields(normalizedProjects, normalizedFilters)
-
-        // If requestedRootFields includes a nested field, continue. Otherwise,
-        // return op
-        if (requestedRootFields.exists { root: RootField => !root.derivedFromAtt }) {
-          val dataSchema = hadoopFsRelation.dataSchema
-          val prunedDataSchema = pruneDataSchema(dataSchema, requestedRootFields)
-
-          // If the data schema is different from the pruned data schema, continue. Otherwise,
-          // return op. We effect this comparison by counting the number of "leaf" fields in
-          // each schemata, assuming the fields in prunedDataSchema are a subset of the fields
-          // in dataSchema.
-          if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) {
+
+        prunePhysicalColumns(l.output, projects, filters, hadoopFsRelation.dataSchema,
+          prunedDataSchema => {
             val prunedHadoopRelation =
               hadoopFsRelation.copy(dataSchema = prunedDataSchema)(hadoopFsRelation.sparkSession)
-
-            val prunedRelation = buildPrunedRelation(l, prunedHadoopRelation)
-            val projectionOverSchema = ProjectionOverSchema(prunedDataSchema)
-
-            buildNewProjection(normalizedProjects, normalizedFilters, prunedRelation,
-              projectionOverSchema)
-          } else {
-            op
-          }
-        } else {
-          op
-        }
+            buildPrunedRelation(l, prunedHadoopRelation)
+          }).getOrElse(op)
 
       case op @ PhysicalOperation(projects, filters,
           d @ DataSourceV2Relation(table: FileTable, output, _)) if canPruneTable(table) =>
-        val (normalizedProjects, normalizedFilters) =
-          normalizeAttributeRefNames(output, projects, filters)
-        val requestedRootFields = identifyRootFields(normalizedProjects, normalizedFilters)
-
-        // If requestedRootFields includes a nested field, continue. Otherwise,
-        // return op
-        if (requestedRootFields.exists { root: RootField => !root.derivedFromAtt }) {
-          val dataSchema = table.dataSchema
-          val prunedDataSchema = pruneDataSchema(dataSchema, requestedRootFields)
-
-          // If the data schema is different from the pruned data schema, continue. Otherwise,
-          // return op. We effect this comparison by counting the number of "leaf" fields in
-          // each schemata, assuming the fields in prunedDataSchema are a subset of the fields
-          // in dataSchema.
-          if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) {
+
+        prunePhysicalColumns(output, projects, filters, table.dataSchema,
+          prunedDataSchema => {
             val prunedFileTable = table match {
               case o: OrcTable => o.copy(userSpecifiedSchema = Some(prunedDataSchema))
               case _ =>
                 val message = s"${table.formatName} data source doesn't support schema pruning."
                 throw new AnalysisException(message)
             }
+            buildPrunedRelationV2(d, prunedFileTable)
+          }).getOrElse(op)
+    }
 
-
-            val prunedRelationV2 = buildPrunedRelationV2(d, prunedFileTable)
-            val projectionOverSchema = ProjectionOverSchema(prunedDataSchema)
-
-            buildNewProjection(normalizedProjects, normalizedFilters, prunedRelationV2,
-              projectionOverSchema)
-          } else {
-            op
-          }
-        } else {
-          op
-        }
+  /**
+   * This method returns optional logical plan. `None` is returned if no nested field is required or
+   * all nested fields are required.
+   */
+  private def prunePhysicalColumns(
+      output: Seq[AttributeReference],
+      projects: Seq[NamedExpression],
+      filters: Seq[Expression],
+      dataSchema: StructType,
+      leafNodeBuilder: StructType => LeafNode): Option[LogicalPlan] = {
+    val (normalizedProjects, normalizedFilters) =
+      normalizeAttributeRefNames(output, projects, filters)
+    val requestedRootFields = identifyRootFields(normalizedProjects, normalizedFilters)
+
+    // If requestedRootFields includes a nested field, continue. Otherwise,
+    // return op
+    if (requestedRootFields.exists { root: RootField => !root.derivedFromAtt }) {
+      val prunedDataSchema = pruneDataSchema(dataSchema, requestedRootFields)
+
+      // If the data schema is different from the pruned data schema, continue. Otherwise,
+      // return op. We effect this comparison by counting the number of "leaf" fields in
+      // each schemata, assuming the fields in prunedDataSchema are a subset of the fields
+      // in dataSchema.
+      if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) {
+        val prunedRelation = leafNodeBuilder(prunedDataSchema)
+        val projectionOverSchema = ProjectionOverSchema(prunedDataSchema)
+
+        Some(buildNewProjection(normalizedProjects, normalizedFilters, prunedRelation,
+          projectionOverSchema))
+      } else {
+        None
+      }
+    } else {
+      None
     }
+  }
 
   /**
    * Checks to see if the given relation can be pruned. Currently we support Parquet and ORC v1.

From 26ed65f4150db1fa37f8bfab24ac0873d2e42936 Mon Sep 17 00:00:00 2001
From: liwensun <liwen.sun@databricks.com>
Date: Tue, 16 Apr 2019 15:03:16 -0700
Subject: [PATCH 0917/1072] [SPARK-27453] Pass partitionBy as options in
 DataFrameWriter

## What changes were proposed in this pull request?

Pass partitionBy columns as options and feature-flag this behavior.

## How was this patch tested?

A new unit test.

Closes #24365 from liwensun/partitionby.

Authored-by: liwensun <liwen.sun@databricks.com>
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  9 +++++++++
 .../apache/spark/sql/DataFrameWriter.scala    | 11 +++++++++-
 .../datasources/DataSourceUtils.scala         | 20 +++++++++++++++++++
 .../sql/test/DataFrameReaderWriterSuite.scala | 19 ++++++++++++++++++
 4 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3f59fa125c88..b223a481cb52 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1687,6 +1687,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_PASS_PARTITION_BY_AS_OPTIONS =
+    buildConf("spark.sql.legacy.sources.write.passPartitionByAsOptions")
+      .internal()
+      .doc("Whether to pass the partitionBy columns as options in DataFrameWriter. " +
+        "Data source V1 now silently drops partitionBy columns for non-file-format sources; " +
+        "turning the flag on provides a way for these sources to see these partitionBy columns.")
+      .booleanConf
+      .createWithDefault(false)
+
   val NAME_NON_STRUCT_GROUPING_KEY_AS_VALUE =
     buildConf("spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue")
       .internal()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 937193604798..3b8415121c27 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -29,8 +29,9 @@ import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertIntoTable, LogicalPlan, OverwriteByExpression}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
-import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, DataSourceUtils, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, FileDataSourceV2, WriteToDataSourceV2}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.TableCapability._
@@ -313,6 +314,14 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   private def saveToV1Source(): Unit = {
+    if (SparkSession.active.sessionState.conf.getConf(
+      SQLConf.LEGACY_PASS_PARTITION_BY_AS_OPTIONS)) {
+      partitioningColumns.foreach { columns =>
+        extraOptions += (DataSourceUtils.PARTITIONING_COLUMNS_KEY ->
+          DataSourceUtils.encodePartitioningColumns(columns))
+      }
+    }
+
     // Code path for data source v1.
     runCommand(df.sparkSession, "save") {
       DataSource(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index 74eae94e65b0..0ad914e40610 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -18,12 +18,32 @@
 package org.apache.spark.sql.execution.datasources
 
 import org.apache.hadoop.fs.Path
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.types._
 
 
 object DataSourceUtils {
+  /**
+   * The key to use for storing partitionBy columns as options.
+   */
+  val PARTITIONING_COLUMNS_KEY = "__partition_columns"
+
+  /**
+   * Utility methods for converting partitionBy columns to options and back.
+   */
+  private implicit val formats = Serialization.formats(NoTypeHints)
+
+  def encodePartitioningColumns(columns: Seq[String]): String = {
+    Serialization.write(columns)
+  }
+
+  def decodePartitioningColumns(str: String): Seq[String] = {
+    Serialization.read[Seq[String]](str)
+  }
+
   /**
    * Verify if the schema is supported in datasource. This verification should be done
    * in a driver side.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 9f969473da61..d34da330496b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -38,6 +38,7 @@ import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
@@ -219,6 +220,24 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
     assert(LastOptions.parameters("opt3") == "3")
   }
 
+  test("pass partitionBy as options") {
+    Seq(true, false).foreach { flag =>
+      withSQLConf(SQLConf.LEGACY_PASS_PARTITION_BY_AS_OPTIONS.key -> s"$flag") {
+        Seq(1).toDF.write
+          .format("org.apache.spark.sql.test")
+          .partitionBy("col1", "col2")
+          .save()
+
+        if (flag) {
+          val partColumns = LastOptions.parameters(DataSourceUtils.PARTITIONING_COLUMNS_KEY)
+          assert(DataSourceUtils.decodePartitioningColumns(partColumns) === Seq("col1", "col2"))
+        } else {
+          assert(!LastOptions.parameters.contains(DataSourceUtils.PARTITIONING_COLUMNS_KEY))
+        }
+      }
+    }
+  }
+
   test("save mode") {
     val df = spark.read
       .format("org.apache.spark.sql.test")

From 1bb0c8e407e0fcd1283f0eb2f742ba2567eda87e Mon Sep 17 00:00:00 2001
From: WeichenXu <weichen.xu@databricks.com>
Date: Tue, 16 Apr 2019 15:41:32 -0700
Subject: [PATCH 0918/1072] [SPARK-25348][SQL] Data source for binary files

## What changes were proposed in this pull request?

Implement binary file data source in Spark.

Format name: "binaryFile" (case-insensitive)

Schema:
- content: BinaryType
- status: StructType
  - path: StringType
  - modificationTime: TimestampType
  - length: LongType

Options:
* pathGlobFilter (instead of pathFilterRegex) to reply on GlobFilter behavior
* maxBytesPerPartition is not implemented since it is controlled by two SQL confs: maxPartitionBytes and openCostInBytes.

## How was this patch tested?

Unit test added.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24354 from WeichenXu123/binary_file_datasource.

Lead-authored-by: WeichenXu <weichen.xu@databricks.com>
Co-authored-by: Xiangrui Meng <meng@databricks.com>
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 ...pache.spark.sql.sources.DataSourceRegister |   1 +
 .../binaryfile/BinaryFileFormat.scala         | 177 ++++++++++++++++++
 .../binaryfile/BinaryFileFormatSuite.scala    | 143 ++++++++++++++
 3 files changed, 321 insertions(+)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala

diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index be9cb8153f25..d98828748a78 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -8,3 +8,4 @@ org.apache.spark.sql.execution.datasources.v2.text.TextDataSourceV2
 org.apache.spark.sql.execution.streaming.ConsoleSinkProvider
 org.apache.spark.sql.execution.streaming.sources.RateStreamProvider
 org.apache.spark.sql.execution.streaming.sources.TextSocketSourceProvider
+org.apache.spark.sql.execution.datasources.binaryfile.BinaryFileFormat
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
new file mode 100644
index 000000000000..ad9292a694f7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.binaryfile
+
+import com.google.common.io.{ByteStreams, Closeables}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, GlobFilter, Path}
+import org.apache.hadoop.mapreduce.Job
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
+import org.apache.spark.sql.sources.{DataSourceRegister, Filter}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.SerializableConfiguration
+
+
+/**
+ * The binary file data source.
+ *
+ * It reads binary files and converts each file into a single record that contains the raw content
+ * and metadata of the file.
+ *
+ * Example:
+ * {{{
+ *   // Scala
+ *   val df = spark.read.format("binaryFile")
+ *     .option("pathGlobFilter", "*.png")
+ *     .load("/path/to/fileDir")
+ *
+ *   // Java
+ *   Dataset<Row> df = spark.read().format("binaryFile")
+ *     .option("pathGlobFilter", "*.png")
+ *     .load("/path/to/fileDir");
+ * }}}
+ */
+class BinaryFileFormat extends FileFormat with DataSourceRegister {
+
+  override def inferSchema(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      files: Seq[FileStatus]): Option[StructType] = Some(BinaryFileFormat.schema)
+
+  override def prepareWrite(
+      sparkSession: SparkSession,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory = {
+    throw new UnsupportedOperationException("Write is not supported for binary file data source")
+  }
+
+  override def isSplitable(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      path: Path): Boolean = {
+    false
+  }
+
+  override def shortName(): String = "binaryFile"
+
+  override protected def buildReader(
+      sparkSession: SparkSession,
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      requiredSchema: StructType,
+      filters: Seq[Filter],
+      options: Map[String, String],
+      hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
+
+    val broadcastedHadoopConf =
+      sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+
+    val binaryFileSourceOptions = new BinaryFileSourceOptions(options)
+
+    val pathGlobPattern = binaryFileSourceOptions.pathGlobFilter
+
+    (file: PartitionedFile) => {
+      val path = file.filePath
+      val fsPath = new Path(path)
+
+      // TODO: Improve performance here: each file will recompile the glob pattern here.
+      val globFilter = pathGlobPattern.map(new GlobFilter(_))
+      if (!globFilter.isDefined || globFilter.get.accept(fsPath)) {
+        val fs = fsPath.getFileSystem(broadcastedHadoopConf.value.value)
+        val fileStatus = fs.getFileStatus(fsPath)
+        val length = fileStatus.getLen()
+        val modificationTime = fileStatus.getModificationTime()
+        val stream = fs.open(fsPath)
+
+        val content = try {
+          ByteStreams.toByteArray(stream)
+        } finally {
+          Closeables.close(stream, true)
+        }
+
+        val fullOutput = dataSchema.map { f =>
+          AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()
+        }
+        val requiredOutput = fullOutput.filter { a =>
+          requiredSchema.fieldNames.contains(a.name)
+        }
+
+        // TODO: Add column pruning
+        // currently it still read the file content even if content column is not required.
+        val requiredColumns = GenerateUnsafeProjection.generate(requiredOutput, fullOutput)
+
+        val internalRow = InternalRow(
+          content,
+          InternalRow(
+            UTF8String.fromString(path),
+            DateTimeUtils.fromMillis(modificationTime),
+            length
+          )
+        )
+
+        Iterator(requiredColumns(internalRow))
+      } else {
+        Iterator.empty
+      }
+    }
+  }
+}
+
+object BinaryFileFormat {
+
+  private val fileStatusSchema = StructType(
+    StructField("path", StringType, false) ::
+      StructField("modificationTime", TimestampType, false) ::
+      StructField("length", LongType, false) :: Nil)
+
+  /**
+   * Schema for the binary file data source.
+   *
+   * Schema:
+   *  - content (BinaryType): The content of the file.
+   *  - status (StructType): The status of the file.
+   *    - path (StringType): The path of the file.
+   *    - modificationTime (TimestampType): The modification time of the file.
+   *      In some Hadoop FileSystem implementation, this might be unavailable and fallback to some
+   *      default value.
+   *    - length (LongType): The length of the file in bytes.
+   */
+  val schema = StructType(
+    StructField("content", BinaryType, true) ::
+      StructField("status", fileStatusSchema, false) :: Nil)
+}
+
+class BinaryFileSourceOptions(
+    @transient private val parameters: CaseInsensitiveMap[String]) extends Serializable {
+
+  def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
+
+  /**
+   * An optional glob pattern to only include files with paths matching the pattern.
+   * The syntax follows [[org.apache.hadoop.fs.GlobFilter]].
+   */
+  val pathGlobFilter: Option[String] = parameters.get("pathGlobFilter")
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
new file mode 100644
index 000000000000..090f4177c781
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.binaryfile
+
+import java.io.File
+import java.nio.file.{Files, StandardOpenOption}
+import java.sql.Timestamp
+
+import scala.collection.JavaConverters._
+
+import com.google.common.io.{ByteStreams, Closeables}
+import org.apache.hadoop.fs.{FileSystem, GlobFilter, Path}
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
+import org.apache.spark.util.Utils
+
+class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
+
+  private var testDir: String = _
+
+  private var fsTestDir: Path = _
+
+  private var fs: FileSystem = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    testDir = Utils.createTempDir().getAbsolutePath
+    fsTestDir = new Path(testDir)
+    fs = fsTestDir.getFileSystem(sparkContext.hadoopConfiguration)
+
+    val year2014Dir = new File(testDir, "year=2014")
+    year2014Dir.mkdir()
+    val year2015Dir = new File(testDir, "year=2015")
+    year2015Dir.mkdir()
+
+    Files.write(
+      new File(year2014Dir, "data.txt").toPath,
+      Seq("2014-test").asJava,
+      StandardOpenOption.CREATE, StandardOpenOption.WRITE
+    )
+    Files.write(
+      new File(year2014Dir, "data2.bin").toPath,
+      "2014-test-bin".getBytes,
+      StandardOpenOption.CREATE, StandardOpenOption.WRITE
+    )
+
+    Files.write(
+      new File(year2015Dir, "bool.csv").toPath,
+      Seq("bool", "True", "False", "true").asJava,
+      StandardOpenOption.CREATE, StandardOpenOption.WRITE
+    )
+    Files.write(
+      new File(year2015Dir, "data.txt").toPath,
+      "2015-test".getBytes,
+      StandardOpenOption.CREATE, StandardOpenOption.WRITE
+    )
+  }
+
+  def testBinaryFileDataSource(pathGlobFilter: String): Unit = {
+    val resultDF = spark.read.format("binaryFile")
+      .option("pathGlobFilter", pathGlobFilter)
+      .load(testDir)
+      .select(
+        col("status.path"),
+        col("status.modificationTime"),
+        col("status.length"),
+        col("content"),
+        col("year") // this is a partition column
+      )
+
+    val expectedRowSet = new collection.mutable.HashSet[Row]()
+
+    val globFilter = new GlobFilter(pathGlobFilter)
+    for (partitionDirStatus <- fs.listStatus(fsTestDir)) {
+      val dirPath = partitionDirStatus.getPath
+
+      val partitionName = dirPath.getName.split("=")(1)
+      val year = partitionName.toInt // partition column "year" value which is `Int` type
+
+      for (fileStatus <- fs.listStatus(dirPath)) {
+        if (globFilter.accept(fileStatus.getPath)) {
+          val fpath = fileStatus.getPath.toString.replace("file:/", "file:///")
+          val flen = fileStatus.getLen
+          val modificationTime = new Timestamp(fileStatus.getModificationTime)
+
+          val fcontent = {
+            val stream = fs.open(fileStatus.getPath)
+            val content = try {
+              ByteStreams.toByteArray(stream)
+            } finally {
+              Closeables.close(stream, true)
+            }
+            content
+          }
+
+          val row = Row(fpath, modificationTime, flen, fcontent, year)
+          expectedRowSet.add(row)
+        }
+      }
+    }
+
+    checkAnswer(resultDF, expectedRowSet.toSeq)
+  }
+
+  test("binary file data source test") {
+    testBinaryFileDataSource(pathGlobFilter = "*.*")
+    testBinaryFileDataSource(pathGlobFilter = "*.bin")
+    testBinaryFileDataSource(pathGlobFilter = "*.txt")
+    testBinaryFileDataSource(pathGlobFilter = "*.{txt,csv}")
+    testBinaryFileDataSource(pathGlobFilter = "*.json")
+  }
+
+  test ("binary file data source do not support write operation") {
+    val df = spark.read.format("binaryFile").load(testDir)
+    withTempDir { tmpDir =>
+      val thrown = intercept[UnsupportedOperationException] {
+        df.write
+          .format("binaryFile")
+          .save(tmpDir + "/test_save")
+      }
+      assert(thrown.getMessage.contains("Write is not supported for binary file data source"))
+    }
+  }
+
+}

From 61feb1635217ef1d4ebceebc1e7c8829c5c11994 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 16 Apr 2019 19:53:01 -0700
Subject: [PATCH 0919/1072] [SPARK-27479][BUILD] Hide API docs for
 org.apache.spark.util.kvstore

## What changes were proposed in this pull request?

The API docs should not include the "org.apache.spark.util.kvstore" package because they are internal private APIs. See the doc link: https://spark.apache.org/docs/latest/api/java/org/apache/spark/util/kvstore/LevelDB.html

## How was this patch tested?
N/A

Closes #24386 from gatorsmile/rmDoc.

Authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 project/SparkBuild.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 2036dc011a14..94f014a7a319 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -740,6 +740,7 @@ object Unidoc {
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/unsafe")))
       .map(_.filterNot(_.getCanonicalPath.contains("python")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/collection")))
+      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/kvstore")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalyst")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/execution")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal")))

From 54b0d1e0efda33065e7e1053d26fab63653619ec Mon Sep 17 00:00:00 2001
From: pengbo <bo.peng1019@gmail.com>
Date: Wed, 17 Apr 2019 13:03:00 +0800
Subject: [PATCH 0920/1072] =?UTF-8?q?[SPARK-27416][SQL]=20UnsafeMapData=20?=
 =?UTF-8?q?&=20UnsafeArrayData=20Kryo=20serialization=20=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Finish the rest work of https://github.com/apache/spark/pull/24317, https://github.com/apache/spark/pull/9030
a. Implement Kryo serialization for UnsafeArrayData
b. fix UnsafeMapData Java/Kryo Serialization issue when two machines have different Oops size
c. Move the duplicate code "getBytes()" to Utils.

## How was this patch tested?
According Units has been added & tested

Closes #24357 from pengbo/SPARK-27416_new.

Authored-by: pengbo <bo.peng1019@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/serializer/KryoSerializer.scala     |  4 ++
 .../catalyst/expressions/UnsafeArrayData.java | 40 +++++++-----
 .../catalyst/expressions/UnsafeDataUtils.java | 40 ++++++++++++
 .../catalyst/expressions/UnsafeMapData.java   | 45 ++++++++++++-
 .../sql/catalyst/expressions/UnsafeRow.java   |  9 +--
 .../sql/catalyst/util/UnsafeArraySuite.scala  | 27 +++++---
 .../sql/catalyst/util/UnsafeMapSuite.scala    | 64 +++++++++++++++++++
 7 files changed, 197 insertions(+), 32 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeDataUtils.java
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeMapSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index eef19973e8d7..c426095b4c5c 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -213,6 +213,10 @@ class KryoSerializer(conf: SparkConf)
     // We can't load those class directly in order to avoid unnecessary jar dependencies.
     // We load them safely, ignore it if the class not found.
     Seq(
+      "org.apache.spark.sql.catalyst.expressions.UnsafeRow",
+      "org.apache.spark.sql.catalyst.expressions.UnsafeArrayData",
+      "org.apache.spark.sql.catalyst.expressions.UnsafeMapData",
+
       "org.apache.spark.ml.attribute.Attribute",
       "org.apache.spark.ml.attribute.AttributeGroup",
       "org.apache.spark.ml.attribute.BinaryAttribute",
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index 4ff0838ac117..db6401b18c0e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -25,6 +25,11 @@
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
 
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.KryoSerializable;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
 import org.apache.spark.sql.catalyst.util.ArrayData;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.Platform;
@@ -58,7 +63,7 @@
  * Instances of `UnsafeArrayData` act as pointers to row data stored in this format.
  */
 
-public final class UnsafeArrayData extends ArrayData implements Externalizable {
+public final class UnsafeArrayData extends ArrayData implements Externalizable, KryoSerializable {
   public static int calculateHeaderPortionInBytes(int numFields) {
     return (int)calculateHeaderPortionInBytes((long)numFields);
   }
@@ -492,22 +497,9 @@ public static UnsafeArrayData fromPrimitiveArray(double[] arr) {
     return fromPrimitiveArray(arr, Platform.DOUBLE_ARRAY_OFFSET, arr.length, 8);
   }
 
-
-  public byte[] getBytes() {
-    if (baseObject instanceof byte[]
-            && baseOffset == Platform.BYTE_ARRAY_OFFSET
-            && (((byte[]) baseObject).length == sizeInBytes)) {
-      return (byte[]) baseObject;
-    } else {
-      byte[] bytes = new byte[sizeInBytes];
-      Platform.copyMemory(baseObject, baseOffset, bytes, Platform.BYTE_ARRAY_OFFSET, sizeInBytes);
-      return bytes;
-    }
-  }
-
   @Override
   public void writeExternal(ObjectOutput out) throws IOException {
-    byte[] bytes = getBytes();
+    byte[] bytes = UnsafeDataUtils.getBytes(baseObject, baseOffset, sizeInBytes);
     out.writeInt(bytes.length);
     out.writeInt(this.numElements);
     out.write(bytes);
@@ -522,4 +514,22 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundExcept
     this.baseObject = new byte[sizeInBytes];
     in.readFully((byte[]) baseObject);
   }
+
+  @Override
+  public void write(Kryo kryo, Output output) {
+    byte[] bytes = UnsafeDataUtils.getBytes(baseObject, baseOffset, sizeInBytes);
+    output.writeInt(bytes.length);
+    output.writeInt(this.numElements);
+    output.write(bytes);
+  }
+
+  @Override
+  public void read(Kryo kryo, Input input) {
+    this.baseOffset = BYTE_ARRAY_OFFSET;
+    this.sizeInBytes = input.readInt();
+    this.numElements = input.readInt();
+    this.elementOffset = baseOffset + calculateHeaderPortionInBytes(this.numElements);
+    this.baseObject = new byte[sizeInBytes];
+    input.read((byte[]) baseObject);
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeDataUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeDataUtils.java
new file mode 100644
index 000000000000..9b600192ac25
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeDataUtils.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions;
+
+import org.apache.spark.unsafe.Platform;
+
+/**
+ * General utilities available for unsafe data
+ */
+final class UnsafeDataUtils {
+
+  private UnsafeDataUtils() {
+  }
+
+  public static byte[] getBytes(Object baseObject, long baseOffset, int sizeInBytes) {
+    if (baseObject instanceof byte[]
+      && baseOffset == Platform.BYTE_ARRAY_OFFSET
+      && (((byte[]) baseObject).length == sizeInBytes)) {
+      return (byte[]) baseObject;
+    }
+    byte[] bytes = new byte[sizeInBytes];
+    Platform.copyMemory(baseObject, baseOffset, bytes, Platform.BYTE_ARRAY_OFFSET,
+      sizeInBytes);
+    return bytes;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
index a0833a6df8bb..60ac69b05994 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
@@ -17,11 +17,22 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
 import java.nio.ByteBuffer;
 
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.KryoSerializable;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
 import org.apache.spark.sql.catalyst.util.MapData;
 import org.apache.spark.unsafe.Platform;
 
+import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
+
 /**
  * An Unsafe implementation of Map which is backed by raw memory instead of Java objects.
  *
@@ -33,7 +44,7 @@
  * elements, otherwise the behavior is undefined.
  */
 // TODO: Use a more efficient format which doesn't depend on unsafe array.
-public final class UnsafeMapData extends MapData {
+public final class UnsafeMapData extends MapData implements Externalizable, KryoSerializable {
 
   private Object baseObject;
   private long baseOffset;
@@ -123,4 +134,36 @@ public UnsafeMapData copy() {
     mapCopy.pointTo(mapDataCopy, Platform.BYTE_ARRAY_OFFSET, sizeInBytes);
     return mapCopy;
   }
+
+  @Override
+  public void writeExternal(ObjectOutput out) throws IOException {
+    byte[] bytes = UnsafeDataUtils.getBytes(baseObject, baseOffset, sizeInBytes);
+    out.writeInt(bytes.length);
+    out.write(bytes);
+  }
+
+  @Override
+  public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+    this.baseOffset = BYTE_ARRAY_OFFSET;
+    this.sizeInBytes = in.readInt();
+    this.baseObject = new byte[sizeInBytes];
+    in.readFully((byte[]) baseObject);
+    pointTo(baseObject, baseOffset, sizeInBytes);
+  }
+
+  @Override
+  public void write(Kryo kryo, Output output) {
+    byte[] bytes = UnsafeDataUtils.getBytes(baseObject, baseOffset, sizeInBytes);
+    output.writeInt(bytes.length);
+    output.write(bytes);
+  }
+
+  @Override
+  public void read(Kryo kryo, Input input) {
+    this.baseOffset = BYTE_ARRAY_OFFSET;
+    this.sizeInBytes = input.readInt();
+    this.baseObject = new byte[sizeInBytes];
+    input.read((byte[]) baseObject);
+    pointTo(baseObject, baseOffset, sizeInBytes);
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 11561fa7764a..8fd6029e976e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -541,14 +541,7 @@ public boolean equals(Object other) {
    * Returns the underlying bytes for this UnsafeRow.
    */
   public byte[] getBytes() {
-    if (baseObject instanceof byte[] && baseOffset == Platform.BYTE_ARRAY_OFFSET
-      && (((byte[]) baseObject).length == sizeInBytes)) {
-      return (byte[]) baseObject;
-    } else {
-      byte[] bytes = new byte[sizeInBytes];
-      Platform.copyMemory(baseObject, baseOffset, bytes, Platform.BYTE_ARRAY_OFFSET, sizeInBytes);
-      return bytes;
-    }
+    return UnsafeDataUtils.getBytes(baseObject, baseOffset, sizeInBytes);
   }
 
   // This is for debugging
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
index db25d2fb97e5..61ce63faf0d2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.util
 import java.time.ZoneId
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
@@ -60,6 +60,16 @@ class UnsafeArraySuite extends SparkFunSuite {
   val doubleMultiDimArray = Array(
     Array(1.1, 11.1), Array(2.2, 22.2, 222.2), Array(3.3, 33.3, 333.3, 3333.3))
 
+  val serialArray = {
+    val offset = 32
+    val data = new Array[Byte](1024)
+    Platform.putLong(data, offset, 1)
+    val arrayData = new UnsafeArrayData()
+    arrayData.pointTo(data, offset, data.length)
+    arrayData.setLong(0, 19285)
+    arrayData
+  }
+
   test("read array") {
     val unsafeBoolean = ExpressionEncoder[Array[Boolean]].resolveAndBind().
       toRow(booleanArray).getArray(0)
@@ -214,14 +224,15 @@ class UnsafeArraySuite extends SparkFunSuite {
   }
 
   test("unsafe java serialization") {
-    val offset = 32
-    val data = new Array[Byte](1024)
-    Platform.putLong(data, offset, 1)
-    val arrayData = new UnsafeArrayData()
-    arrayData.pointTo(data, offset, data.length)
-    arrayData.setLong(0, 19285)
     val ser = new JavaSerializer(new SparkConf).newInstance()
-    val arrayDataSer = ser.deserialize[UnsafeArrayData](ser.serialize(arrayData))
+    val arrayDataSer = ser.deserialize[UnsafeArrayData](ser.serialize(serialArray))
+    assert(arrayDataSer.getLong(0) == 19285)
+    assert(arrayDataSer.getBaseObject.asInstanceOf[Array[Byte]].length == 1024)
+  }
+
+  test("unsafe Kryo serialization") {
+    val ser = new KryoSerializer(new SparkConf).newInstance()
+    val arrayDataSer = ser.deserialize[UnsafeArrayData](ser.serialize(serialArray))
     assert(arrayDataSer.getLong(0) == 19285)
     assert(arrayDataSer.getBaseObject.asInstanceOf[Array[Byte]].length == 1024)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeMapSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeMapSuite.scala
new file mode 100644
index 000000000000..ebc88612be22
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeMapSuite.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
+import org.apache.spark.sql.catalyst.expressions.{UnsafeArrayData, UnsafeMapData}
+import org.apache.spark.unsafe.Platform
+
+class UnsafeMapSuite extends SparkFunSuite {
+
+  val unsafeMapData = {
+    val offset = 32
+    val keyArraySize = 256
+    val baseObject = new Array[Byte](1024)
+    Platform.putLong(baseObject, offset, keyArraySize)
+
+    val unsafeMap = new UnsafeMapData
+    Platform.putLong(baseObject, offset + 8, 1)
+    val keyArray = new UnsafeArrayData()
+    keyArray.pointTo(baseObject, offset + 8, keyArraySize)
+    keyArray.setLong(0, 19285)
+
+    val valueArray = new UnsafeArrayData()
+    Platform.putLong(baseObject, offset + 8 + keyArray.getSizeInBytes, 1)
+    valueArray.pointTo(baseObject, offset + 8 + keyArray.getSizeInBytes, keyArraySize)
+    valueArray.setLong(0, 19286)
+    unsafeMap.pointTo(baseObject, offset, baseObject.length)
+    unsafeMap
+  }
+
+  test("unsafe java serialization") {
+    val ser = new JavaSerializer(new SparkConf).newInstance()
+    val mapDataSer = ser.deserialize[UnsafeMapData](ser.serialize(unsafeMapData))
+    assert(mapDataSer.numElements() == 1)
+    assert(mapDataSer.keyArray().getInt(0) == 19285)
+    assert(mapDataSer.valueArray().getInt(0) == 19286)
+    assert(mapDataSer.getBaseObject.asInstanceOf[Array[Byte]].length == 1024)
+  }
+
+  test("unsafe Kryo serialization") {
+    val ser = new KryoSerializer(new SparkConf).newInstance()
+    val mapDataSer = ser.deserialize[UnsafeMapData](ser.serialize(unsafeMapData))
+    assert(mapDataSer.numElements() == 1)
+    assert(mapDataSer.keyArray().getInt(0) == 19285)
+    assert(mapDataSer.valueArray().getInt(0) == 19286)
+    assert(mapDataSer.getBaseObject.asInstanceOf[Array[Byte]].length == 1024)
+  }
+}

From e6618de809458d1a3ca3b6b5eaa83c0613018590 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 17 Apr 2019 19:29:28 +0800
Subject: [PATCH 0921/1072] [SPARK-27430][SQL] broadcast hint should be
 respected for broadcast nested loop join

## What changes were proposed in this pull request?

A followup of https://github.com/apache/spark/pull/24164

broadcast hint should be respected for broadcast nested loop join. This PR also refactors the related code a little bit, to save duplicated code.

## How was this patch tested?

new tests

Closes #24376 from cloud-fan/join.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/SparkStrategies.scala | 77 ++++++++++---------
 .../execution/joins/BroadcastJoinSuite.scala  | 26 ++++++-
 2 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index efd05a3e2b3e..c0a28fad9d39 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -115,6 +115,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
    *       1) broadcasting the left side in a right outer join;
    *       2) broadcasting the right side in a left outer, left semi, left anti or existence join;
    *       3) broadcasting either side in an inner-like join.
+   *     For other cases, we need to scan the data multiple times, which can be rather slow.
    *
    * - Shuffle-and-replicate nested loop join (a.k.a. cartesian product join):
    *     Supports both equi-joins and non-equi-joins.
@@ -306,16 +307,43 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
       // If it is not an equi-join, we first look at the join hints w.r.t. the following order:
       //   1. broadcast hint: pick broadcast nested loop join. If both sides have the broadcast
-      //      hints, choose the smaller side (based on stats) to broadcast.
+      //      hints, choose the smaller side (based on stats) to broadcast for inner and full joins,
+      //      choose the left side for right join, and choose right side for left join.
       //   2. shuffle replicate NL hint: pick cartesian product if join type is inner like.
       //
       // If there is no hint or the hints are not applicable, we follow these rules one by one:
-      //   1. Pick cartesian product if join type is inner like, and both sides are too big to
-      //      to broadcast.
-      //   2. Pick broadcast nested loop join. Pick the smaller side (based on stats) to broadcast.
+      //   1. Pick broadcast nested loop join if one side is small enough to broadcast. If only left
+      //      side is broadcast-able and it's left join, or only right side is broadcast-able and
+      //      it's right join, we skip this rule. If both sides are small, broadcasts the smaller
+      //      side for inner and full joins, broadcasts the left side for right join, and broadcasts
+      //      right side for left join.
+      //   2. Pick cartesian product if join type is inner like.
+      //   3. Pick broadcast nested loop join as the final solution. It may OOM but we don't have
+      //      other choice. It broadcasts the smaller side for inner and full joins, broadcasts the
+      //      left side for right join, and broadcasts right side for left join.
       case logical.Join(left, right, joinType, condition, hint) =>
+        val desiredBuildSide = if (joinType.isInstanceOf[InnerLike] || joinType == FullOuter) {
+          getSmallerSide(left, right)
+        } else {
+          // For perf reasons, `BroadcastNestedLoopJoinExec` prefers to broadcast left side if
+          // it's a right join, and broadcast right side if it's a left join.
+          // TODO: revisit it. If left side is much smaller than the right side, it may be better
+          // to broadcast the left side even if it's a left join.
+          if (canBuildLeft(joinType)) BuildLeft else BuildRight
+        }
+
         def createBroadcastNLJoin(buildLeft: Boolean, buildRight: Boolean) = {
-          getBuildSide(buildLeft, buildRight, left, right).map { buildSide =>
+          val maybeBuildSide = if (buildLeft && buildRight) {
+            Some(desiredBuildSide)
+          } else if (buildLeft) {
+            Some(BuildLeft)
+          } else if (buildRight) {
+            Some(BuildRight)
+          } else {
+            None
+          }
+
+          maybeBuildSide.map { buildSide =>
             Seq(joins.BroadcastNestedLoopJoinExec(
               planLater(left), planLater(right), buildSide, joinType, condition))
           }
@@ -330,45 +358,18 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         }
 
         def createJoinWithoutHint() = {
-          (if (!canBroadcast(left) && !canBroadcast(right)) createCartesianProduct() else None)
+          createBroadcastNLJoin(canBroadcast(left), canBroadcast(right))
+            .orElse(createCartesianProduct())
             .getOrElse {
               // This join could be very slow or OOM
-              val buildSide = getSmallerSide(left, right)
               Seq(joins.BroadcastNestedLoopJoinExec(
-                planLater(left), planLater(right), buildSide, joinType, condition))
+                planLater(left), planLater(right), desiredBuildSide, joinType, condition))
             }
         }
 
-        if (joinType.isInstanceOf[InnerLike] || joinType == FullOuter) {
-          createBroadcastNLJoin(hintToBroadcastLeft(hint), hintToBroadcastRight(hint))
-            .orElse { if (hintToShuffleReplicateNL(hint)) createCartesianProduct() else None }
-            .getOrElse(createJoinWithoutHint())
-        } else {
-          val smallerSide = getSmallerSide(left, right)
-          val buildSide = if (canBuildLeft(joinType)) {
-            // For RIGHT JOIN, we may broadcast left side even if the hint asks us to broadcast
-            // the right side. This is for history reasons.
-            if (hintToBroadcastLeft(hint) || canBroadcast(left)) {
-              BuildLeft
-            } else if (hintToBroadcastRight(hint)) {
-              BuildRight
-            } else {
-              smallerSide
-            }
-          } else {
-            // For LEFT JOIN, we may broadcast right side even if the hint asks us to broadcast
-            // the left side. This is for history reasons.
-            if (hintToBroadcastRight(hint) || canBroadcast(right)) {
-              BuildRight
-            } else if (hintToBroadcastLeft(hint)) {
-              BuildLeft
-            } else {
-              smallerSide
-            }
-          }
-          Seq(joins.BroadcastNestedLoopJoinExec(
-            planLater(left), planLater(right), buildSide, joinType, condition))
-        }
+        createBroadcastNLJoin(hintToBroadcastLeft(hint), hintToBroadcastRight(hint))
+          .orElse { if (hintToShuffleReplicateNL(hint)) createCartesianProduct() else None }
+          .getOrElse(createJoinWithoutHint())
 
 
       // --- Cases where this strategy does not apply ---------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index 05c583c80e50..91cb919479bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -281,13 +281,16 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
       val t2Size = spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
       assert(t1Size < t2Size)
 
+      /* ######## test cases for equal join ######### */
       // INNER JOIN && t1Size < t2Size => BuildLeft
       assertJoinBuildSide(
         "SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
       // LEFT JOIN => BuildRight
+      // broadcast hash join can not build left side for left join.
       assertJoinBuildSide(
         "SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 LEFT JOIN t2 ON t1.key = t2.key", bh, BuildRight)
       // RIGHT JOIN => BuildLeft
+      // broadcast hash join can not build right side for right join.
       assertJoinBuildSide(
         "SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
       // INNER JOIN && broadcast(t1) => BuildLeft
@@ -297,16 +300,20 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
       assertJoinBuildSide(
         "SELECT /*+ MAPJOIN(t2) */ * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildRight)
 
-
+      /* ######## test cases for non-equal join ######### */
       withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
         // INNER JOIN && t1Size < t2Size => BuildLeft
         assertJoinBuildSide("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 JOIN t2", bl, BuildLeft)
         // FULL JOIN && t1Size < t2Size => BuildLeft
         assertJoinBuildSide("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 FULL JOIN t2", bl, BuildLeft)
+        // FULL OUTER && t1Size < t2Size => BuildLeft
+        assertJoinBuildSide("SELECT * FROM t1 FULL OUTER JOIN t2", bl, BuildLeft)
         // LEFT JOIN => BuildRight
         assertJoinBuildSide("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 LEFT JOIN t2", bl, BuildRight)
         // RIGHT JOIN => BuildLeft
         assertJoinBuildSide("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 RIGHT JOIN t2", bl, BuildLeft)
+
+        /* #### test with broadcast hint #### */
         // INNER JOIN && broadcast(t1) => BuildLeft
         assertJoinBuildSide("SELECT /*+ MAPJOIN(t1) */ * FROM t1 JOIN t2", bl, BuildLeft)
         // INNER JOIN && broadcast(t2) => BuildRight
@@ -316,8 +323,10 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
         // FULL OUTER && broadcast(t2) => BuildRight
         assertJoinBuildSide(
           "SELECT /*+ MAPJOIN(t2) */ * FROM t1 FULL OUTER JOIN t2", bl, BuildRight)
-        // FULL OUTER && t1Size < t2Size => BuildLeft
-        assertJoinBuildSide("SELECT * FROM t1 FULL OUTER JOIN t2", bl, BuildLeft)
+        // LEFT JOIN && broadcast(t1) => BuildLeft
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t1) */ * FROM t1 LEFT JOIN t2", bl, BuildLeft)
+        // RIGHT JOIN && broadcast(t2) => BuildRight
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t2) */ * FROM t1 RIGHT JOIN t2", bl, BuildRight)
       }
     }
   }
@@ -332,6 +341,7 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
       val t2Size = spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
       assert(t1Size < t2Size)
 
+      /* ######## test cases for equal join ######### */
       assertJoinBuildSide("SELECT * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
       assertJoinBuildSide("SELECT * FROM t2 JOIN t1 ON t1.key = t2.key", bh, BuildRight)
 
@@ -341,13 +351,23 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
       assertJoinBuildSide("SELECT * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
       assertJoinBuildSide("SELECT * FROM t2 RIGHT JOIN t1 ON t1.key = t2.key", bh, BuildLeft)
 
+      /* ######## test cases for non-equal join ######### */
       withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
+        // For full outer join, prefer to broadcast the smaller side.
         assertJoinBuildSide("SELECT * FROM t1 FULL OUTER JOIN t2", bl, BuildLeft)
         assertJoinBuildSide("SELECT * FROM t2 FULL OUTER JOIN t1", bl, BuildRight)
 
+        // For inner join, prefer to broadcast the smaller side, if broadcast-able.
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> (t2Size + 1).toString()) {
+          assertJoinBuildSide("SELECT * FROM t1 JOIN t2", bl, BuildLeft)
+          assertJoinBuildSide("SELECT * FROM t2 JOIN t1", bl, BuildRight)
+        }
+
+        // For left join, prefer to broadcast the right side.
         assertJoinBuildSide("SELECT * FROM t1 LEFT JOIN t2", bl, BuildRight)
         assertJoinBuildSide("SELECT * FROM t2 LEFT JOIN t1", bl, BuildRight)
 
+        // For right join, prefer to broadcast the left side.
         assertJoinBuildSide("SELECT * FROM t1 RIGHT JOIN t2", bl, BuildLeft)
         assertJoinBuildSide("SELECT * FROM t2 RIGHT JOIN t1", bl, BuildLeft)
       }

From e1c90d66bbea5b4cb97226610701b0389b734651 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Wed, 17 Apr 2019 20:30:20 +0800
Subject: [PATCH 0922/1072] [SPARK-19712][SQL] Pushdown LeftSemi/LeftAnti below
 join

## What changes were proposed in this pull request?
This PR adds support for pushing down LeftSemi and LeftAnti joins below the Join operator.
This is a prerequisite work thats needed for the subsequent task of moving the subquery rewrites to the beginning of optimization phase.

The larger  PR is [here](https://github.com/apache/spark/pull/23211) . This PR addresses the comment at [link](https://github.com/apache/spark/pull/23211#issuecomment-445705922).
## How was this patch tested?
Added tests under LeftSemiAntiJoinPushDownSuite.

Closes #24331 from dilipbiswal/SPARK-19712-pushleftsemi-belowjoin.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |   1 +
 .../optimizer/PushDownLeftSemiAntiJoin.scala  | 104 ++++++++++++++++
 .../LeftSemiAntiJoinPushDownSuite.scala       | 117 +++++++++++++++++-
 3 files changed, 221 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d0368becaeff..afdf61ef322d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -66,6 +66,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
         PushPredicateThroughJoin,
         PushDownPredicate,
         PushDownLeftSemiAntiJoin,
+        PushLeftSemiLeftAntiThroughJoin,
         LimitPushDown,
         ColumnPruning,
         InferFiltersFromConstraints,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
index d91f262d75e8..0c389000dd32 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
@@ -159,3 +159,107 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
     }
   }
 }
+
+/**
+ * This rule is a variant of [[PushPredicateThroughJoin]] which can handle
+ * pushing down Left semi and Left Anti joins below a join operator. The
+ * allowable join types are:
+ *  1) Inner
+ *  2) Cross
+ *  3) LeftOuter
+ *  4) RightOuter
+ *
+ * TODO:
+ * Currently this rule can push down the left semi or left anti joins to either
+ * left or right leg of the child join. This matches the behaviour of `PushPredicateThroughJoin`
+ * when the lefi semi or left anti join is in expression form. We need to explore the possibility
+ * to push the left semi/anti joins to both legs of join if the join condition refers to
+ * both left and right legs of the child join.
+ */
+object PushLeftSemiLeftAntiThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
+  /**
+   * Define an enumeration to identify whether a LeftSemi/LeftAnti join can be pushed down to
+   * the left leg or the right leg of the join.
+   */
+  object PushdownDirection extends Enumeration {
+    val TO_LEFT_BRANCH, TO_RIGHT_BRANCH, NONE = Value
+  }
+
+  object AllowedJoin {
+    def unapply(join: Join): Option[Join] = join.joinType match {
+      case Inner | Cross | LeftOuter | RightOuter => Some(join)
+      case _ => None
+    }
+  }
+
+  /**
+   * Determine which side of the join a LeftSemi/LeftAnti join can be pushed to.
+   */
+  private def pushTo(leftChild: Join, rightChild: LogicalPlan, joinCond: Option[Expression]) = {
+    val left = leftChild.left
+    val right = leftChild.right
+    val joinType = leftChild.joinType
+    val rightOutput = rightChild.outputSet
+
+    if (joinCond.nonEmpty) {
+      val conditions = splitConjunctivePredicates(joinCond.get)
+      val (leftConditions, rest) =
+        conditions.partition(_.references.subsetOf(left.outputSet ++ rightOutput))
+      val (rightConditions, commonConditions) =
+        rest.partition(_.references.subsetOf(right.outputSet ++ rightOutput))
+
+      if (rest.isEmpty && leftConditions.nonEmpty) {
+        // When the join conditions can be computed based on the left leg of
+        // leftsemi/anti join then push the leftsemi/anti join to the left side.
+        PushdownDirection.TO_LEFT_BRANCH
+      } else if (leftConditions.isEmpty && rightConditions.nonEmpty && commonConditions.isEmpty) {
+        // When the join conditions can be computed based on the attributes from right leg of
+        // leftsemi/anti join then push the leftsemi/anti join to the right side.
+        PushdownDirection.TO_RIGHT_BRANCH
+      } else {
+        PushdownDirection.NONE
+      }
+    } else {
+      /**
+       * When the join condition is empty,
+       * 1) if this is a left outer join or inner join, push leftsemi/anti join down
+       *    to the left leg of join.
+       * 2) if a right outer join, to the right leg of join,
+       */
+      joinType match {
+        case _: InnerLike | LeftOuter =>
+          PushdownDirection.TO_LEFT_BRANCH
+        case RightOuter =>
+          PushdownDirection.TO_RIGHT_BRANCH
+        case _ =>
+          PushdownDirection.NONE
+      }
+    }
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    // push LeftSemi/LeftAnti down into the join below
+    case j @ Join(AllowedJoin(left), right, LeftSemiOrAnti(joinType), joinCond, parentHint) =>
+      val (childJoinType, childLeft, childRight, childCondition, childHint) =
+        (left.joinType, left.left, left.right, left.condition, left.hint)
+      val action = pushTo(left, right, joinCond)
+
+      action match {
+        case PushdownDirection.TO_LEFT_BRANCH
+          if (childJoinType == LeftOuter || childJoinType.isInstanceOf[InnerLike]) =>
+          // push down leftsemi/anti join to the left table
+          val newLeft = Join(childLeft, right, joinType, joinCond, parentHint)
+          Join(newLeft, childRight, childJoinType, childCondition, childHint)
+        case PushdownDirection.TO_RIGHT_BRANCH
+          if (childJoinType == RightOuter || childJoinType.isInstanceOf[InnerLike]) =>
+          // push down leftsemi/anti join to the right table
+          val newRight = Join(childRight, right, joinType, joinCond, parentHint)
+          Join(childLeft, newRight, childJoinType, childCondition, childHint)
+        case _ =>
+          // Do nothing
+          j
+      }
+  }
+}
+
+
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
index 185568d334ce..00709adaa7c8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
@@ -37,13 +37,14 @@ class LeftSemiPushdownSuite extends PlanTest {
         CombineFilters,
         PushDownPredicate,
         PushDownLeftSemiAntiJoin,
+        PushLeftSemiLeftAntiThroughJoin,
         BooleanSimplification,
         CollapseProject) :: Nil
   }
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-
   val testRelation1 = LocalRelation('d.int)
+  val testRelation2 = LocalRelation('e.int)
 
   test("Project: LeftSemiAnti join pushdown") {
     val originalQuery = testRelation
@@ -314,4 +315,118 @@ class LeftSemiPushdownSuite extends PlanTest {
     val optimized = Optimize.execute(originalQuery.analyze)
     comparePlans(optimized, originalQuery.analyze)
   }
+
+  Seq(Some('d === 'e), None).foreach { case innerJoinCond =>
+    Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
+      Seq(Inner, LeftOuter, Cross, RightOuter).foreach { case innerJT =>
+        test(s"$outerJT pushdown empty join cond join type $innerJT join cond $innerJoinCond") {
+          val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, innerJoinCond)
+          val originalQuery = joinedRelation.join(testRelation, joinType = outerJT, None)
+          val optimized = Optimize.execute(originalQuery.analyze)
+
+          val correctAnswer = if (innerJT == RightOuter) {
+            val pushedDownJoin = testRelation2.join(testRelation, joinType = outerJT, None)
+            testRelation1.join(pushedDownJoin, joinType = innerJT, innerJoinCond).analyze
+          } else {
+            val pushedDownJoin = testRelation1.join(testRelation, joinType = outerJT, None)
+            pushedDownJoin.join(testRelation2, joinType = innerJT, innerJoinCond).analyze
+          }
+          comparePlans(optimized, correctAnswer)
+        }
+      }
+    }
+  }
+
+  Seq(Some('d === 'e), None).foreach { case innerJoinCond =>
+    Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
+      Seq(Inner, LeftOuter, Cross).foreach { case innerJT =>
+        test(s"$outerJT pushdown to left of join type: $innerJT join condition $innerJoinCond") {
+          val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, innerJoinCond)
+          val originalQuery =
+            joinedRelation.join(testRelation, joinType = outerJT, condition = Some('a === 'd))
+          val optimized = Optimize.execute(originalQuery.analyze)
+
+          val pushedDownJoin =
+            testRelation1.join(testRelation, joinType = outerJT, condition = Some('a === 'd))
+          val correctAnswer =
+            pushedDownJoin.join(testRelation2, joinType = innerJT, innerJoinCond).analyze
+          comparePlans(optimized, correctAnswer)
+        }
+      }
+    }
+  }
+
+  Seq(Some('e === 'd), None).foreach { case innerJoinCond =>
+    Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
+      Seq(Inner, RightOuter, Cross).foreach { case innerJT =>
+        test(s"$outerJT pushdown to right of join type: $innerJT join condition $innerJoinCond") {
+          val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, innerJoinCond)
+          val originalQuery =
+            joinedRelation.join(testRelation, joinType = outerJT, condition = Some('a === 'e))
+          val optimized = Optimize.execute(originalQuery.analyze)
+
+          val pushedDownJoin =
+            testRelation2.join(testRelation, joinType = outerJT, condition = Some('a === 'e))
+          val correctAnswer =
+            testRelation1.join(pushedDownJoin, joinType = innerJT, innerJoinCond).analyze
+          comparePlans(optimized, correctAnswer)
+        }
+      }
+    }
+  }
+
+  Seq(LeftSemi, LeftAnti).foreach { case jt =>
+    test(s"$jt no pushdown - join condition refers left leg - join type for RightOuter") {
+      val joinedRelation = testRelation1.join(testRelation2, joinType = RightOuter, None)
+      val originalQuery =
+        joinedRelation.join(testRelation, joinType = jt, condition = Some('a === 'd))
+      val optimized = Optimize.execute(originalQuery.analyze)
+      comparePlans(optimized, originalQuery.analyze)
+    }
+  }
+
+  Seq(LeftSemi, LeftAnti).foreach { case jt =>
+    test(s"$jt no pushdown - join condition refers right leg - join type for LeftOuter") {
+      val joinedRelation = testRelation1.join(testRelation2, joinType = LeftOuter, None)
+      val originalQuery =
+        joinedRelation.join(testRelation, joinType = jt, condition = Some('a === 'e))
+      val optimized = Optimize.execute(originalQuery.analyze)
+      comparePlans(optimized, originalQuery.analyze)
+    }
+  }
+
+  Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
+    Seq(Inner, LeftOuter, RightOuter, Cross).foreach { case innerJT =>
+      test(s"$outerJT no pushdown - join condition refers both leg - join type $innerJT") {
+        val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, None)
+        val originalQuery = joinedRelation
+          .join(testRelation, joinType = outerJT, condition = Some('a === 'd && 'a === 'e))
+        val optimized = Optimize.execute(originalQuery.analyze)
+        comparePlans(optimized, originalQuery.analyze)
+      }
+    }
+  }
+
+  Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
+    Seq(Inner, LeftOuter, RightOuter, Cross).foreach { case innerJT =>
+      test(s"$outerJT no pushdown - join condition refers none of the leg - join type $innerJT") {
+        val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, None)
+        val originalQuery = joinedRelation
+          .join(testRelation, joinType = outerJT, condition = Some('d + 'e === 'a))
+        val optimized = Optimize.execute(originalQuery.analyze)
+        comparePlans(optimized, originalQuery.analyze)
+      }
+    }
+  }
+
+  Seq(LeftSemi, LeftAnti).foreach { case jt =>
+    test(s"$jt no pushdown when child join type is FullOuter") {
+      val joinedRelation = testRelation1.join(testRelation2, joinType = FullOuter, None)
+      val originalQuery =
+        joinedRelation.join(testRelation, joinType = jt, condition = Some('a === 'e))
+      val optimized = Optimize.execute(originalQuery.analyze)
+      comparePlans(optimized, originalQuery.analyze)
+    }
+  }
+
 }

From f93460dae96d3668919b31d10d409c7b5033e267 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 18 Apr 2019 13:36:52 +0900
Subject: [PATCH 0923/1072] [SPARK-27493][BUILD] Upgrade ASM to 7.1

## What changes were proposed in this pull request?

[SPARK-25946](https://issues.apache.org/jira/browse/SPARK-25946) upgraded ASM to 7.0 to support JDK11. This PR aims to update ASM to 7.1 to bring the bug fixes.
- https://asm.ow2.io/versions.html
- https://issues.apache.org/jira/browse/XBEAN-316

## How was this patch tested?

Pass the Jenkins.

Closes #24395 from dongjoon-hyun/SPARK-27493.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.2 | 2 +-
 pom.xml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 138634976403..ece8c64154b6 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -192,7 +192,7 @@ stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
-xbean-asm7-shaded-4.12.jar
+xbean-asm7-shaded-4.13.jar
 xercesImpl-2.9.1.jar
 xmlenc-0.52.jar
 xz-1.5.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 961f65d21f57..a45f02daaabb 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -216,7 +216,7 @@ token-provider-1.0.1.jar
 univocity-parsers-2.7.3.jar
 validation-api-1.1.0.Final.jar
 woodstox-core-5.0.3.jar
-xbean-asm7-shaded-4.12.jar
+xbean-asm7-shaded-4.13.jar
 xz-1.5.jar
 zjsonpatch-0.3.0.jar
 zookeeper-3.4.13.jar
diff --git a/pom.xml b/pom.xml
index 55f9e569fc7c..fce4cbd6501c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -325,7 +325,7 @@
       <dependency>
         <groupId>org.apache.xbean</groupId>
         <artifactId>xbean-asm7-shaded</artifactId>
-        <version>4.12</version>
+        <version>4.13</version>
       </dependency>
 
       <!-- Shaded deps marked as provided. These are promoted to compile scope

From 50bdc9befad078c933876c640a393c9cb4e78838 Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Thu, 18 Apr 2019 14:27:33 +0800
Subject: [PATCH 0924/1072] [SPARK-27423][SQL][FOLLOWUP] Minor polishes to Cast
 codegen templates for Date <-> Timestamp

## What changes were proposed in this pull request?

https://github.com/apache/spark/pull/24332 introduced an unnecessary `import` statement and two slight issues in the codegen templates in `Cast` for `Date` <-> `Timestamp`.
This PR removes the unused import statement and fixes the slight codegen issue.

The issue in those two codegen templates is this pattern:
```scala
val zid = JavaCode.global(
  ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
  zoneId.getClass)
```
`zoneId` can refer to an instance of a non-public class, e.g. `java.time.ZoneRegion`, and while this code correctly puts in the 3rd argument to `ctx.addReferenceObj()`, it's still passing `zoneId.getClass` to `JavaCode.global()` which is not desirable, but doesn't cause any immediate bugs in this particular case, because `zid` is used in an expression immediately afterwards.
If this `zid` ever needs to spill to any explicitly typed variables, e.g. a local variable, and if the spill handling uses the `javaType` on this `GlobalVariable`, it'd generate code that looks like:
```java
java.time.ZoneRegion value1 = ((java.time.ZoneId) references[2] /* literal */);
```
which would then be a real bug:
- a non-accessible type `java.time.ZoneRegion` is referenced in the generated code, and
- `ZoneId` -> `ZoneRegion` requires an explicit downcast.

## How was this patch tested?

Existing tests. This PR does not change behavior, and the original PR won't cause any real behavior bug to begin with.

Closes #24392 from rednaxelafx/spark-27423-followup.

Authored-by: Kris Mok <kris.mok@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala   | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index f7bc8b9ce40f..3de9f5a951fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.math.{BigDecimal => JavaBigDecimal}
-import java.time.{LocalDate, LocalDateTime, LocalTime}
+import java.time.ZoneId
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkException
@@ -936,9 +936,10 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         }
        """
     case TimestampType =>
+      val zoneIdClass = classOf[ZoneId]
       val zid = JavaCode.global(
-        ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
-        zoneId.getClass)
+        ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
+        zoneIdClass)
       (c, evPrim, evNull) =>
         code"""$evPrim =
           org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
@@ -1028,7 +1029,10 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       from: DataType,
       ctx: CodegenContext): CastFunction = from match {
     case StringType =>
-      val zid = ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId")
+      val zoneIdClass = classOf[ZoneId]
+      val zid = JavaCode.global(
+        ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
+        zoneIdClass)
       val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]])
       (c, evPrim, evNull) =>
         code"""
@@ -1045,9 +1049,10 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case _: IntegralType =>
       (c, evPrim, evNull) => code"$evPrim = ${longToTimeStampCode(c)};"
     case DateType =>
+      val zoneIdClass = classOf[ZoneId]
       val zid = JavaCode.global(
-        ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId"),
-        zoneId.getClass)
+        ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
+        zoneIdClass)
       (c, evPrim, evNull) =>
         code"""$evPrim =
           org.apache.spark.sql.catalyst.util.DateTimeUtils.epochDaysToMicros($c, $zid);"""

From 7d44ba05d1d21d2488b075334569db47eacf424c Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 18 Apr 2019 14:39:30 +0800
Subject: [PATCH 0925/1072] [SPARK-27490][SQL] File source V2: return correct
 result for Dataset.inputFiles()

## What changes were proposed in this pull request?

Currently, a `Dateset` with file source V2 always return empty results for method `Dataset.inputFiles()`.

We should fix it.

## How was this patch tested?

Unit test

Closes #24393 from gengliangwang/inputFiles.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala  |  3 ++
 .../org/apache/spark/sql/DataFrameSuite.scala | 36 ++++++++++---------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 05015f8dcdd5..793714ff1113 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -50,6 +50,7 @@ import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.arrow.{ArrowBatchStreamWriter, ArrowConverters}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
 import org.apache.spark.sql.execution.python.EvaluatePython
 import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.streaming.DataStreamWriter
@@ -3175,6 +3176,8 @@ class Dataset[T] private[sql](
         fr.inputFiles
       case r: HiveTableRelation =>
         r.tableMeta.storage.locationUri.map(_.toString).toArray
+      case DataSourceV2Relation(table: FileTable, _, _) =>
+        table.fileIndex.inputFiles
     }.flatten
     files.toSet.toArray
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 78decd49e5f8..8a9c526b6b9d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -762,22 +762,26 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
   }
 
   test("inputFiles") {
-    withTempDir { dir =>
-      val df = Seq((1, 22)).toDF("a", "b")
-
-      val parquetDir = new File(dir, "parquet").getCanonicalPath
-      df.write.parquet(parquetDir)
-      val parquetDF = spark.read.parquet(parquetDir)
-      assert(parquetDF.inputFiles.nonEmpty)
-
-      val jsonDir = new File(dir, "json").getCanonicalPath
-      df.write.json(jsonDir)
-      val jsonDF = spark.read.json(jsonDir)
-      assert(parquetDF.inputFiles.nonEmpty)
-
-      val unioned = jsonDF.union(parquetDF).inputFiles.sorted
-      val allFiles = (jsonDF.inputFiles ++ parquetDF.inputFiles).distinct.sorted
-      assert(unioned === allFiles)
+    Seq("csv", "").foreach { useV1List =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> useV1List) {
+        withTempDir { dir =>
+          val df = Seq((1, 22)).toDF("a", "b")
+
+          val parquetDir = new File(dir, "parquet").getCanonicalPath
+          df.write.parquet(parquetDir)
+          val parquetDF = spark.read.parquet(parquetDir)
+          assert(parquetDF.inputFiles.nonEmpty)
+
+          val csvDir = new File(dir, "csv").getCanonicalPath
+          df.write.json(csvDir)
+          val csvDF = spark.read.json(csvDir)
+          assert(csvDF.inputFiles.nonEmpty)
+
+          val unioned = csvDF.union(parquetDF).inputFiles.sorted
+          val allFiles = (csvDF.inputFiles ++ parquetDF.inputFiles).distinct.sorted
+          assert(unioned === allFiles)
+        }
+      }
     }
   }
 

From 9c238b8a462c483644237bd038edcf58f42a7db9 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 18 Apr 2019 20:49:36 +0800
Subject: [PATCH 0926/1072] [SPARK-27460][TESTS] Running slowest test suites in
 their own forked JVMs for higher parallelism

## What changes were proposed in this pull request?

This patch modifies SparkBuild so that the largest / slowest test suites (or collections of suites) can run in their own forked JVMs, allowing them to be run in parallel with each other. This opt-in / whitelisting approach allows us to increase parallelism without having to fix a long-tail of flakiness / brittleness issues in tests which aren't performance bottlenecks.

See comments in SparkBuild.scala for information on the details, including a summary of why we sometimes opt to run entire groups of tests in a single forked JVM .

The time of full new pull request test in Jenkins is reduced by around 53%:
before changes: 4hr 40min
after changes: 2hr 13min

## How was this patch tested?

Unit test

Closes #24373 from gengliangwang/parallelTest.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 project/SparkBuild.scala                      | 94 ++++++++++++++++++-
 .../apache/spark/sql/SQLQueryTestSuite.scala  |  5 +-
 .../spark/sql/test/SharedSparkSession.scala   |  7 +-
 3 files changed, 100 insertions(+), 6 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 94f014a7a319..656071d3d78c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -430,6 +430,83 @@ object SparkBuild extends PomBuild {
       else x.settings(Seq[Setting[_]](): _*)
     } ++ Seq[Project](OldDeps.project)
   }
+
+  if (!sys.env.contains("SERIAL_SBT_TESTS")) {
+    allProjects.foreach(enable(SparkParallelTestGrouping.settings))
+  }
+}
+
+object SparkParallelTestGrouping {
+  // Settings for parallelizing tests. The basic strategy here is to run the slowest suites (or
+  // collections of suites) in their own forked JVMs, allowing us to gain parallelism within a
+  // SBT project. Here, we take a whitelisting approach where the default behavior is to run all
+  // tests sequentially in a single JVM, requiring us to manually opt-in to the extra parallelism.
+  //
+  // There are a reasons why such a whitelist approach is good:
+  //
+  //    1. Launching one JVM per suite adds significant overhead for short-running suites. In
+  //       addition to JVM startup time and JIT warmup, it appears that initialization of Derby
+  //       metastores can be very slow so creating a fresh warehouse per suite is inefficient.
+  //
+  //    2. When parallelizing within a project we need to give each forked JVM a different tmpdir
+  //       so that the metastore warehouses do not collide. Unfortunately, it seems that there are
+  //       some tests which have an overly tight dependency on the default tmpdir, so those fragile
+  //       tests need to continue re-running in the default configuration (or need to be rewritten).
+  //       Fixing that problem would be a huge amount of work for limited payoff in most cases
+  //       because most test suites are short-running.
+  //
+
+  private val testsWhichShouldRunInTheirOwnDedicatedJvm = Set(
+    "org.apache.spark.DistributedSuite",
+    "org.apache.spark.sql.catalyst.expressions.DateExpressionsSuite",
+    "org.apache.spark.sql.catalyst.expressions.HashExpressionsSuite",
+    "org.apache.spark.sql.catalyst.expressions.CastSuite",
+    "org.apache.spark.sql.catalyst.expressions.MathExpressionsSuite",
+    "org.apache.spark.sql.hive.HiveExternalCatalogSuite",
+    "org.apache.spark.sql.hive.StatisticsSuite",
+    "org.apache.spark.sql.hive.execution.HiveCompatibilitySuite",
+    "org.apache.spark.sql.hive.client.VersionsSuite",
+    "org.apache.spark.sql.hive.HiveExternalCatalogVersionsSuite",
+    "org.apache.spark.ml.classification.LogisticRegressionSuite",
+    "org.apache.spark.ml.classification.LinearSVCSuite",
+    "org.apache.spark.sql.SQLQueryTestSuite"
+  )
+
+  private val DEFAULT_TEST_GROUP = "default_test_group"
+
+  private def testNameToTestGroup(name: String): String = name match {
+    case _ if testsWhichShouldRunInTheirOwnDedicatedJvm.contains(name) => name
+    case _ => DEFAULT_TEST_GROUP
+  }
+
+  lazy val settings = Seq(
+    testGrouping in Test := {
+      val tests: Seq[TestDefinition] = (definedTests in Test).value
+      val defaultForkOptions = ForkOptions(
+        bootJars = Nil,
+        javaHome = javaHome.value,
+        connectInput = connectInput.value,
+        outputStrategy = outputStrategy.value,
+        runJVMOptions = (javaOptions in Test).value,
+        workingDirectory = Some(baseDirectory.value),
+        envVars = (envVars in Test).value
+      )
+      tests.groupBy(test => testNameToTestGroup(test.name)).map { case (groupName, groupTests) =>
+        val forkOptions = {
+          if (groupName == DEFAULT_TEST_GROUP) {
+            defaultForkOptions
+          } else {
+            defaultForkOptions.copy(runJVMOptions = defaultForkOptions.runJVMOptions ++
+              Seq(s"-Djava.io.tmpdir=${baseDirectory.value}/target/tmp/$groupName"))
+          }
+        }
+        new Tests.Group(
+          name = groupName,
+          tests = groupTests,
+          runPolicy = Tests.SubProcess(forkOptions))
+      }
+    }.toSeq
+  )
 }
 
 object Core {
@@ -911,8 +988,14 @@ object TestSettings {
     testOptions in Test += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
     // Enable Junit testing.
     libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % "test",
-    // Only allow one test at a time, even across projects, since they run in the same JVM
-    parallelExecution in Test := false,
+    // `parallelExecutionInTest` controls whether test suites belonging to the same SBT project
+    // can run in parallel with one another. It does NOT control whether tests execute in parallel
+    // within the same JVM (which is controlled by `testForkedParallel`) or whether test cases
+    // within the same suite can run in parallel (which is a ScalaTest runner option which is passed
+    // to the underlying runner but is not a SBT-level configuration). This needs to be `true` in
+    // order for the extra parallelism enabled by `SparkParallelTestGrouping` to take effect.
+    // The `SERIAL_SBT_TESTS` check is here so the extra parallelism can be feature-flagged.
+    parallelExecution in Test := { if (sys.env.contains("SERIAL_SBT_TESTS")) false else true },
     // Make sure the test temp directory exists.
     resourceGenerators in Test += Def.macroValueI(resourceManaged in Test map { outDir: File =>
       var dir = new File(testTempDir)
@@ -933,7 +1016,12 @@ object TestSettings {
       }
       Seq.empty[File]
     }).value,
-    concurrentRestrictions in Global += Tags.limit(Tags.Test, 1)
+    concurrentRestrictions in Global := {
+      // The number of concurrent test groups is empirically chosen based on experience
+      // with Jenkins flakiness.
+      if (sys.env.contains("SERIAL_SBT_TESTS")) (concurrentRestrictions in Global).value
+      else Seq(Tags.limit(Tags.ForkedTestGroup, 4))
+    }
   )
 
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index def99c820af8..d1842d329cc7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -291,10 +291,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
       val df = session.sql(sql)
       val schema = df.schema
       val notIncludedMsg = "[not included in comparison]"
+      val clsName = this.getClass.getCanonicalName
       // Get answer, but also get rid of the #1234 expression ids that show up in explain plans
       val answer = hiveResultString(df.queryExecution.executedPlan)
         .map(_.replaceAll("#\\d+", "#x")
-        .replaceAll("Location.*/sql/core/", s"Location ${notIncludedMsg}sql/core/")
+        .replaceAll(
+          s"Location.*/sql/core/spark-warehouse/$clsName/",
+          s"Location ${notIncludedMsg}sql/core/spark-warehouse/")
         .replaceAll("Created By.*", s"Created By $notIncludedMsg")
         .replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
         .replaceAll("Last Access.*", s"Last Access $notIncludedMsg")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
index 8734639edaa6..ff6211b95042 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{DebugFilesystem, SparkConf}
 import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
 import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 
 /**
  * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]].
@@ -37,7 +37,7 @@ trait SharedSparkSession
   with Eventually { self: Suite =>
 
   protected def sparkConf = {
-    new SparkConf()
+    val conf = new SparkConf()
       .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
       .set(UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
       .set(SQLConf.CODEGEN_FALLBACK.key, "false")
@@ -46,6 +46,9 @@ trait SharedSparkSession
       // this rule may potentially block testing of other optimization rules such as
       // ConstantPropagation etc.
       .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
+    conf.set(
+      StaticSQLConf.WAREHOUSE_PATH,
+      conf.get(StaticSQLConf.WAREHOUSE_PATH) + "/" + getClass.getCanonicalName)
   }
 
   /**

From 9c41bfd83cddf7bf6314378ca7c3acf2c44e7a9b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 18 Apr 2019 08:08:22 -0700
Subject: [PATCH 0927/1072] [SPARK-27502][SQL][TEST] Update nested schema
 benchmark result for Orc V2

## What changes were proposed in this pull request?

We added nested schema pruning support to Orc V2 recently. The benchmark result should be updated. The benchmark numbers are obtained by running benchmark on r3.xlarge machine.

## How was this patch tested?

Test only change.

Closes #24399 from viirya/update-orcv2-benchmark.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 ...V2NestedSchemaPruningBenchmark-results.txt | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index f259e142e18b..1bb8a68c19d9 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -2,46 +2,46 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_191-8u191-b12-2ubuntu0.18.04.1-b12 on Linux 4.15.0-1021-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    135            169          19          7.4         134.7       1.0X
-Nested column                                      2131           2216          95          0.5        2131.4       0.1X
+Top-level column                                    117            144          21          8.5         117.0       1.0X
+Nested column                                      1114           1138          23          0.9        1114.2       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_191-8u191-b12-2ubuntu0.18.04.1-b12 on Linux 4.15.0-1021-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    147            158          10          6.8         146.9       1.0X
-Nested column                                      2149           2204          50          0.5        2148.9       0.1X
+Top-level column                                    134            154          17          7.5         134.0       1.0X
+Nested column                                      1156           1220          51          0.9        1156.2       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_191-8u191-b12-2ubuntu0.18.04.1-b12 on Linux 4.15.0-1021-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    386            399          16          2.6         385.8       1.0X
-Nested column                                      2612           2666          57          0.4        2612.2       0.1X
+Top-level column                                    350            359           6          2.9         350.5       1.0X
+Nested column                                      1426           1443          13          0.7        1426.5       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_191-8u191-b12-2ubuntu0.18.04.1-b12 on Linux 4.15.0-1021-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    392            454         119          2.5         392.2       1.0X
-Nested column                                      4106           4168          79          0.2        4106.1       0.1X
+Top-level column                                    357            363           5          2.8         356.9       1.0X
+Nested column                                      4186           4245          71          0.2        4186.3       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_191-8u191-b12-2ubuntu0.18.04.1-b12 on Linux 4.15.0-1021-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    146            157          13          6.9         145.9       1.0X
-Nested column                                      2294           2338          44          0.4        2293.6       0.1X
+Top-level column                                    115            134          13          8.7         114.6       1.0X
+Nested column                                      1148           1199          59          0.9        1148.3       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_201-b09 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_191-8u191-b12-2ubuntu0.18.04.1-b12 on Linux 4.15.0-1021-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    290            294           4          3.5         289.7       1.0X
-Nested column                                      2914           2997          87          0.3        2913.6       0.1X
+Top-level column                                    274            279           3          3.6         274.1       1.0X
+Nested column                                      3133           3254         145          0.3        3132.7       0.1X
 
 
From 3748b381df891ce4017e21b549588264ba1206a2 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 18 Apr 2019 15:37:55 -0700
Subject: [PATCH 0928/1072] [SPARK-27460][TESTS][FOLLOWUP] Add
 HiveClientVersions to parallel test suite list

## What changes were proposed in this pull request?

The test time of `HiveClientVersions` is around 3.5 minutes.
This PR is to add it into the parallel test suite list. To make sure there is no colliding warehouse location,  we can change the warehouse path to a temporary directory.

## How was this patch tested?

Unit test

Closes #24404 from gengliangwang/parallelTestFollowUp.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/SparkBuild.scala                                        | 1 +
 .../org/apache/spark/sql/hive/client/HiveClientSuite.scala      | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 656071d3d78c..f55f187cee5d 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -466,6 +466,7 @@ object SparkParallelTestGrouping {
     "org.apache.spark.sql.hive.StatisticsSuite",
     "org.apache.spark.sql.hive.execution.HiveCompatibilitySuite",
     "org.apache.spark.sql.hive.client.VersionsSuite",
+    "org.apache.spark.sql.hive.client.HiveClientVersions",
     "org.apache.spark.sql.hive.HiveExternalCatalogVersionsSuite",
     "org.apache.spark.ml.classification.LogisticRegressionSuite",
     "org.apache.spark.ml.classification.LinearSVCSuite",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index f3d8c2ad440f..bda711200acd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StructType}
+import org.apache.spark.util.Utils
 
 // TODO: Refactor this to `HivePartitionFilteringSuite`
 class HiveClientSuite(version: String)
@@ -49,6 +50,7 @@ class HiveClientSuite(version: String)
 
     val hadoopConf = new Configuration()
     hadoopConf.setBoolean(tryDirectSqlKey, tryDirectSql)
+    hadoopConf.set("hive.metastore.warehouse.dir", Utils.createTempDir().toURI().toString())
     val client = buildClient(hadoopConf)
     val tableSchema =
       new StructType().add("value", "int").add("ds", "int").add("h", "int").add("chunk", "string")

From e1ece6a3192f2637c47de99ec8958e909c507824 Mon Sep 17 00:00:00 2001
From: shane knapp <incomplete@gmail.com>
Date: Fri, 19 Apr 2019 10:03:50 +0900
Subject: [PATCH 0929/1072] [SPARK-25079][PYTHON] update python3 executable to
 3.6.x

## What changes were proposed in this pull request?

have jenkins test against python3.6 (instead of 3.4).

## How was this patch tested?

extensive testing on both the centos and ubuntu jenkins workers.

NOTE:  this will need to be backported to all active branches.

Closes #24266 from shaneknapp/updating-python3-executable.

Authored-by: shane knapp <incomplete@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/run-tests.py    | 4 +++-
 python/run-tests.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index dfad2991077b..70dcb4a42e41 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -546,7 +546,9 @@ def main():
         hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
         test_env = "amplab_jenkins"
         # add path for Python3 in Jenkins if we're calling from a Jenkins machine
-        os.environ["PATH"] = "/home/anaconda/envs/py3k/bin:" + os.environ.get("PATH")
+        # TODO(sknapp):  after all builds are ported to the ubuntu workers, change this to be:
+        # /home/jenkins/anaconda2/envs/py36/bin
+        os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + os.environ.get("PATH")
     else:
         # else we're running locally and can use local settings
         build_tool = "sbt"
diff --git a/python/run-tests.py b/python/run-tests.py
index 793035f5d83a..027069da7b81 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -160,7 +160,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
 
 
 def get_default_python_executables():
-    python_execs = [x for x in ["python2.7", "python3.4", "pypy"] if which(x)]
+    python_execs = [x for x in ["python2.7", "python3.6", "pypy"] if which(x)]
     if "python2.7" not in python_execs:
         LOGGER.warning("Not testing against `python2.7` because it could not be found; falling"
                        " back to `python` instead")

From 8f82237a5b1865fc78778231c310bc7f3d66cf42 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 19 Apr 2019 10:12:21 +0900
Subject: [PATCH 0930/1072] [SPARK-27501][SQL][TEST] Add test for HIVE-13083:
 Writing HiveDecimal to ORC can wrongly suppress present stream

## What changes were proposed in this pull request?

This PR add test for [HIVE-13083](https://issues.apache.org/jira/browse/HIVE-13083): Writing HiveDecimal to ORC can wrongly suppress present stream.

## How was this patch tested?
manual tests:
```
build/sbt  "hive/testOnly *HiveOrcQuerySuite" -Phive -Phadoop-3.2
```

Closes #24397 from wangyum/SPARK-26437.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala  | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index 597b0f56a55e..94f35b0b3d52 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -218,4 +218,14 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
       }
     }
   }
+
+  test("SPARK-26437 Can not query decimal type when value is 0") {
+    assume(HiveUtils.isHive23, "bad test: This bug fixed by HIVE-13083(Hive 2.0.1)")
+    withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
+      withTable("spark_26437") {
+        sql("CREATE TABLE spark_26437 STORED AS ORCFILE AS SELECT 0.00 AS c1")
+        checkAnswer(spark.table("spark_26437"), Seq(Row(0.00)))
+      }
+    }
+  }
 }

From 163a6e298213f216f74f4764e241ee6298ea30b6 Mon Sep 17 00:00:00 2001
From: Yifei Huang <yifeih@palantir.com>
Date: Fri, 19 Apr 2019 14:04:44 +0800
Subject: [PATCH 0931/1072] [SPARK-27514] Skip collapsing windows with empty
 window expressions

## What changes were proposed in this pull request?

A previous change moved the removal of empty window expressions to the RemoveNoopOperations rule, which comes after the CollapseWindow rule. Therefore, by the time we get to CollapseWindow, we aren't guaranteed that empty windows have been removed. This change checks that the window expressions are not empty, and only collapses the windows if both windows are non-empty.

A lengthier description and repro steps here: https://issues.apache.org/jira/browse/SPARK-27514

## How was this patch tested?

A unit test, plus I reran the breaking case mentioned in the Jira ticket.

Closes #24411 from yifeih/yh/spark-27514.

Authored-by: Yifei Huang <yifeih@palantir.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala      |  1 +
 .../sql/catalyst/optimizer/CollapseWindowSuite.scala  | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index afdf61ef322d..f32f2c7986dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -770,6 +770,7 @@ object CollapseWindow extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case w1 @ Window(we1, ps1, os1, w2 @ Window(we2, ps2, os2, grandChild))
         if ps1 == ps2 && os1 == os2 && w1.references.intersect(w2.windowOutputSet).isEmpty &&
+          we1.nonEmpty && we2.nonEmpty &&
           // This assumes Window contains the same type of window expressions. This is ensured
           // by ExtractWindowFunctions.
           WindowFunctionType.functionType(we1.head) == WindowFunctionType.functionType(we2.head) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
index 52054c2f8bd8..3b3b4907eea8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
@@ -89,4 +89,15 @@ class CollapseWindowSuite extends PlanTest {
     val optimized = Optimize.execute(query.analyze)
     comparePlans(optimized, expected)
   }
+
+  test("Skip windows with empty window expressions") {
+    val query = testRelation
+      .window(Seq(), partitionSpec1, orderSpec1)
+      .window(Seq(sum(a).as('sum_a)), partitionSpec1, orderSpec1)
+
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = query.analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }

From 31488e1ca506efd34459e6bc9a08b6d0956c8d44 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 19 Apr 2019 18:26:03 +0800
Subject: [PATCH 0932/1072] [SPARK-27504][SQL] File source V2: support
 refreshing metadata cache

## What changes were proposed in this pull request?

In file source V1, if some file is deleted manually, reading the DataFrame/Table will throws an exception with suggestion message
```
It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved.
```
After refreshing the table/DataFrame, the reads should return correct results.

We should follow it in file source V2 as well.
## How was this patch tested?
Unit test

Closes #24401 from gengliangwang/refreshFileTable.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/DataSourceV2Relation.scala |  5 +++
 .../datasources/v2/FilePartitionReader.scala  | 38 ++++++++++---------
 .../apache/spark/sql/MetadataCacheSuite.scala | 35 ++++++++++++-----
 3 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 411995718603..e7e0be0dddf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -66,6 +66,11 @@ case class DataSourceV2Relation(
   override def newInstance(): DataSourceV2Relation = {
     copy(output = output.map(_.newInstance()))
   }
+
+  override def refresh(): Unit = table match {
+    case table: FileTable => table.fileIndex.refresh()
+    case _ => // Do nothing.
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index 7c7b468e1473..d4bad291021c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -34,25 +34,27 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
   override def next(): Boolean = {
     if (currentReader == null) {
       if (readers.hasNext) {
-        if (ignoreMissingFiles || ignoreCorruptFiles) {
-          try {
-            currentReader = getNextReader()
-          } catch {
-            case e: FileNotFoundException if ignoreMissingFiles =>
-              logWarning(s"Skipped missing file: $currentReader", e)
-              currentReader = null
-              return false
-            // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
-            case e: FileNotFoundException if !ignoreMissingFiles => throw e
-            case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
-              logWarning(
-                s"Skipped the rest of the content in the corrupted file: $currentReader", e)
-              currentReader = null
-              InputFileBlockHolder.unset()
-              return false
-          }
-        } else {
+        try {
           currentReader = getNextReader()
+        } catch {
+          case e: FileNotFoundException if ignoreMissingFiles =>
+            logWarning(s"Skipped missing file: $currentReader", e)
+            currentReader = null
+            return false
+          // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
+          case e: FileNotFoundException if !ignoreMissingFiles =>
+            throw new FileNotFoundException(
+              e.getMessage + "\n" +
+                "It is possible the underlying files have been updated. " +
+                "You can explicitly invalidate the cache in Spark by " +
+                "running 'REFRESH TABLE tableName' command in SQL or " +
+                "by recreating the Dataset/DataFrame involved.")
+          case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
+            logWarning(
+              s"Skipped the rest of the content in the corrupted file: $currentReader", e)
+            currentReader = null
+            InputFileBlockHolder.unset()
+            return false
         }
       } else {
         return false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
index 98aa447fc056..664d59c48db9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
@@ -19,14 +19,14 @@ package org.apache.spark.sql
 
 import java.io.File
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
 /**
  * Test suite to handle metadata cache related.
  */
-class MetadataCacheSuite extends QueryTest with SharedSQLContext {
+abstract class MetadataCacheSuite extends QueryTest with SharedSQLContext {
 
   /** Removes one data file in the given directory. */
   private def deleteOneFileInDirectory(dir: File): Unit = {
@@ -38,14 +38,15 @@ class MetadataCacheSuite extends QueryTest with SharedSQLContext {
     oneFile.foreach(_.delete())
   }
 
-  test("SPARK-16336 Suggest doing table refresh when encountering FileNotFoundException") {
+  test("SPARK-16336,SPARK-27504 Suggest doing table refresh " +
+    "when encountering FileNotFoundException") {
     withTempPath { (location: File) =>
       // Create a Parquet directory
       spark.range(start = 0, end = 100, step = 1, numPartitions = 3)
-        .write.parquet(location.getAbsolutePath)
+        .write.orc(location.getAbsolutePath)
 
       // Read the directory in
-      val df = spark.read.parquet(location.getAbsolutePath)
+      val df = spark.read.orc(location.getAbsolutePath)
       assert(df.count() == 100)
 
       // Delete a file
@@ -60,14 +61,14 @@ class MetadataCacheSuite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("SPARK-16337 temporary view refresh") {
+  test("SPARK-16337,SPARK-27504 temporary view refresh") {
     withTempView("view_refresh") { withTempPath { (location: File) =>
       // Create a Parquet directory
       spark.range(start = 0, end = 100, step = 1, numPartitions = 3)
-        .write.parquet(location.getAbsolutePath)
+        .write.orc(location.getAbsolutePath)
 
       // Read the directory in
-      spark.read.parquet(location.getAbsolutePath).createOrReplaceTempView("view_refresh")
+      spark.read.orc(location.getAbsolutePath).createOrReplaceTempView("view_refresh")
       assert(sql("select count(*) from view_refresh").first().getLong(0) == 100)
 
       // Delete a file
@@ -93,10 +94,10 @@ class MetadataCacheSuite extends QueryTest with SharedSQLContext {
         withTempPath { (location: File) =>
           // Create a Parquet directory
           spark.range(start = 0, end = 100, step = 1, numPartitions = 3)
-            .write.parquet(location.getAbsolutePath)
+            .write.orc(location.getAbsolutePath)
 
           // Read the directory in
-          spark.read.parquet(location.getAbsolutePath).createOrReplaceTempView("view_refresh")
+          spark.read.orc(location.getAbsolutePath).createOrReplaceTempView("view_refresh")
 
           // Delete a file
           deleteOneFileInDirectory(location)
@@ -111,3 +112,17 @@ class MetadataCacheSuite extends QueryTest with SharedSQLContext {
     }
   }
 }
+
+class MetadataCacheV1Suite extends MetadataCacheSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
+}
+
+class MetadataCacheV2Suite extends MetadataCacheSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "")
+}

From 16bbe0f798d2a75ba0a9f2daff709292f6971755 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Fri, 19 Apr 2019 08:12:20 -0700
Subject: [PATCH 0933/1072] [SPARK-27486][CORE][TEST] Enable History server
 storage information test in the HistoryServerSuite

## What changes were proposed in this pull request?

We have disabled a test related to storage in the History server suite after SPARK-13845. But, after SPARK-22050, we can store the information about block updated events to eventLog, if we enable "spark.eventLog.logBlockUpdates.enabled=true".

   So, we can enable the test, by adding an eventlog corresponding to the application, which has enabled the configuration, "spark.eventLog.logBlockUpdates.enabled=true"

## How was this patch tested?
Existing UTs

Closes #24390 from shahidki31/enableRddStorageTest.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../one_rdd_storage_json_expectation.json              | 10 ++++++++++
 .../spark/deploy/history/HistoryServerSuite.scala      |  8 +++++---
 2 files changed, 15 insertions(+), 3 deletions(-)
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json

diff --git a/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json
new file mode 100644
index 000000000000..09afdf5da5b6
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json
@@ -0,0 +1,10 @@
+{
+  "id" : 0,
+  "name" : "0",
+  "numPartitions" : 8,
+  "numCachedPartitions" : 0,
+  "storageLevel" : "Memory Deserialized 1x Replicated",
+  "memoryUsed" : 0,
+  "diskUsed" : 0,
+  "partitions" : [ ]
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index c99bcf2de5b0..5ba27a076a76 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -174,9 +174,11 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     "executor node blacklisting unblacklisting" -> "applications/app-20161115172038-0000/executors",
     "executor memory usage" -> "applications/app-20161116163331-0000/executors",
 
-    "app environment" -> "applications/app-20161116163331-0000/environment"
-    // Todo: enable this test when logging the even of onBlockUpdated. See: SPARK-13845
-    // "one rdd storage json" -> "applications/local-1422981780767/storage/rdd/0"
+    "app environment" -> "applications/app-20161116163331-0000/environment",
+
+    // Enable "spark.eventLog.logBlockUpdates.enabled", to get the storage information
+    // in the history server.
+    "one rdd storage json" -> "applications/local-1422981780767/storage/rdd/0"
   )
 
   // run a bunch of characterization tests -- just verify the behavior is the same as what is saved

From 777b4502b206b7240c6655d3c0b0a9ce08f6a09c Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 19 Apr 2019 08:59:08 -0700
Subject: [PATCH 0934/1072] [SPARK-27176][FOLLOW-UP][SQL] Upgrade Hive parquet
 to 1.10.1 for hadoop-3.2

## What changes were proposed in this pull request?

When we compile and test Hadoop 3.2, we will hint the following two issues:
1. JobSummaryLevel is not a member of object org.apache.parquet.hadoop.ParquetOutputFormat. Fixed by [PARQUET-381](https://issues.apache.org/jira/browse/PARQUET-381)(Parquet 1.9.0)
2. java.lang.NoSuchFieldError: BROTLI
    at org.apache.parquet.hadoop.metadata.CompressionCodecName.<clinit>(CompressionCodecName.java:31). Fixed by [PARQUET-1143](https://issues.apache.org/jira/browse/PARQUET-1143)(Parquet 1.10.0)

The reason is that the `parquet-hadoop-bundle-1.8.1.jar` conflicts with Parquet 1.10.1.
I think it would be safe to upgrade Hive's parquet to 1.10.1 to workaround this issue.

This is what Hive did when upgrading Parquet 1.8.1 to 1.10.0: [HIVE-17000](https://issues.apache.org/jira/browse/HIVE-17000) and [HIVE-19464](https://issues.apache.org/jira/browse/HIVE-19464). We can see that all changes are related to vectors, and vectors are disabled by default: see [HIVE-14826](https://issues.apache.org/jira/browse/HIVE-14826) and [HiveConf.java#L2723](https://github.com/apache/hive/blob/rel/release-2.3.4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java#L2723).

This pr removes [parquet-hadoop-bundle-1.8.1.jar](https://github.com/apache/parquet-mr/tree/master/parquet-hadoop-bundle) , so Hive serde will use [parquet-common-1.10.1.jar, parquet-column-1.10.1.jar and parquet-hadoop-1.10.1.jar](https://github.com/apache/spark/blob/master/dev/deps/spark-deps-hadoop-3.2#L185-L189).

## How was this patch tested?

1. manual tests
2. [upgrade Hive Parquet to 1.10.1 annd run Hadoop 3.2 test on jenkins](https://github.com/apache/spark/pull/24044#commits-pushed-0c3f962)

Closes #24346 from wangyum/SPARK-27176.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 dev/deps/spark-deps-hadoop-3.2 | 1 -
 pom.xml                        | 8 +++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index a45f02daaabb..8b3bd796d93c 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -187,7 +187,6 @@ parquet-common-1.10.1.jar
 parquet-encoding-1.10.1.jar
 parquet-format-2.4.0.jar
 parquet-hadoop-1.10.1.jar
-parquet-hadoop-bundle-1.6.0.jar
 parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
diff --git a/pom.xml b/pom.xml
index fce4cbd6501c..5879a76477dd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -221,6 +221,7 @@
     -->
     <hadoop.deps.scope>compile</hadoop.deps.scope>
     <hive.deps.scope>compile</hive.deps.scope>
+    <hive.parquet.scope>${hive.deps.scope}</hive.parquet.scope>
     <orc.deps.scope>compile</orc.deps.scope>
     <parquet.deps.scope>compile</parquet.deps.scope>
     <parquet.test.deps.scope>test</parquet.test.deps.scope>
@@ -2004,7 +2005,7 @@
         <groupId>${hive.parquet.group}</groupId>
         <artifactId>parquet-hadoop-bundle</artifactId>
         <version>${hive.parquet.version}</version>
-        <scope>compile</scope>
+        <scope>${hive.parquet.scope}</scope>
       </dependency>
       <dependency>
         <groupId>org.codehaus.janino</groupId>
@@ -2818,8 +2819,9 @@
         <hive.classifier>core</hive.classifier>
         <hive.version>${hive23.version}</hive.version>
         <hive.version.short>2.3.4</hive.version.short>
-        <hive.parquet.group>org.apache.parquet</hive.parquet.group>
-        <hive.parquet.version>1.8.1</hive.parquet.version>
+        <!-- Do not need parquet-hadoop-bundle because we already have
+          parquet-common, parquet-column and parquet-hadoop -->
+        <hive.parquet.scope>provided</hive.parquet.scope>
         <orc.classifier></orc.classifier>
         <datanucleus-core.version>4.1.17</datanucleus-core.version>
       </properties>

From d61b3bc875941d5a815e0e68fe7aa986e372b4e8 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Sun, 21 Apr 2019 16:53:11 +0900
Subject: [PATCH 0935/1072] [SPARK-27527][SQL][DOCS] Improve descriptions of
 Timestamp and Date types

## What changes were proposed in this pull request?

In the PR, I propose more precise description of `TimestampType` and `DateType`, how they store timestamps and dates internally.

Closes #24424 from MaxGekk/timestamp-date-type-doc.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/types/DateType.scala | 20 +++++++++++--------
 .../spark/sql/types/TimestampType.scala       | 19 +++++++++++++-----
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
index 7491014b22da..ba322fa80b62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
@@ -23,19 +23,18 @@ import scala.reflect.runtime.universe.typeTag
 import org.apache.spark.annotation.Stable
 
 /**
- * A date type, supporting "0001-01-01" through "9999-12-31".
- *
- * Please use the singleton `DataTypes.DateType`.
- *
- * Internally, this is represented as the number of days from 1970-01-01.
+ * The date type represents a valid date in the proleptic Gregorian calendar.
+ * Valid range is [0001-01-01, 9999-12-31].
  *
+ * Please use the singleton `DataTypes.DateType` to refer the type.
  * @since 1.3.0
  */
 @Stable
 class DateType private() extends AtomicType {
-  // The companion object and this class is separated so the companion object also subclasses
-  // this type. Otherwise, the companion object would be of type "DateType$" in byte code.
-  // Defined with a private constructor so the companion object is the only possible instantiation.
+  /**
+   * Internally, a date is stored as a simple incrementing count of days
+   * where day 0 is 1970-01-01. Negative numbers represent earlier days.
+   */
   private[sql] type InternalType = Int
 
   @transient private[sql] lazy val tag = typeTag[InternalType]
@@ -51,6 +50,11 @@ class DateType private() extends AtomicType {
 }
 
 /**
+ * The companion case object and the DateType class is separated so the companion object
+ * also subclasses the class. Otherwise, the companion object would be of type "DateType$"
+ * in byte code. The DateType class is defined with a private constructor so its companion
+ * object is the only possible instantiation.
+ *
  * @since 1.3.0
  */
 @Stable
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
index a20f155418f8..8dbe4dd15178 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
@@ -23,16 +23,20 @@ import scala.reflect.runtime.universe.typeTag
 import org.apache.spark.annotation.Stable
 
 /**
- * The data type representing `java.sql.Timestamp` values.
- * Please use the singleton `DataTypes.TimestampType`.
+ * The timestamp type represents a time instant in microsecond precision.
+ * Valid range is [0001-01-01T00:00:00.000000Z, 9999-12-31T23:59:59.999999Z] where
+ * the left/right-bound is a date and time of the proleptic Gregorian
+ * calendar in UTC+00:00.
  *
+ * Please use the singleton `DataTypes.TimestampType` to refer the type.
  * @since 1.3.0
  */
 @Stable
 class TimestampType private() extends AtomicType {
-  // The companion object and this class is separated so the companion object also subclasses
-  // this type. Otherwise, the companion object would be of type "TimestampType$" in byte code.
-  // Defined with a private constructor so the companion object is the only possible instantiation.
+  /**
+   * Internally, a timestamp is stored as the number of microseconds from
+   * the epoch of 1970-01-01T00:00:00.000000Z (UTC+00:00)
+   */
   private[sql] type InternalType = Long
 
   @transient private[sql] lazy val tag = typeTag[InternalType]
@@ -48,6 +52,11 @@ class TimestampType private() extends AtomicType {
 }
 
 /**
+ * The companion case object and its class is separated so the companion object also subclasses
+ * the TimestampType class. Otherwise, the companion object would be of type "TimestampType$"
+ * in byte code. Defined with a private constructor so the companion object is the only possible
+ * instantiation.
+ *
  * @since 1.3.0
  */
 @Stable

From 4cb1cd6ab7b7bffd045a786b0ddb7c7783afdf46 Mon Sep 17 00:00:00 2001
From: shivusondur <shivusondur@gmail.com>
Date: Sun, 21 Apr 2019 16:56:12 +0900
Subject: [PATCH 0936/1072] [SPARK-27532][DOC] Correct the default value in the
 Documentation for "spark.redaction.regex"

## What changes were proposed in this pull request?

Corrected the default value in the Documentation for "spark.redaction.regex"

## How was this patch tested?

NA

Closes #24428 from shivusondur/doc2.

Authored-by: shivusondur <shivusondur@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 5325f8a352f7..2cb1a5f2867c 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -472,7 +472,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.redaction.regex</code></td>
-  <td>(?i)secret|password</td>
+  <td>(?i)secret|password|token</td>
   <td>
     Regex to decide which Spark configuration properties and environment variables in driver and
     executor environments contain sensitive information. When this regex matches a property key or

From ad60c6d9be3234a0296d1620129d5ca108f0876b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sun, 21 Apr 2019 10:25:56 -0700
Subject: [PATCH 0937/1072] [SPARK-27439][SQL] Use analyzed plan when
 explaining Dataset

## What changes were proposed in this pull request?

Because a review is resolved during analysis when we create a dataset, the content of the view is determined when the dataset is created, not when it is evaluated. Now the explain result of a dataset is not correctly consistent with the collected result of it, because we use pre-analyzed logical plan of the dataset in explain command. The explain command will analyzed the logical plan passed in. So if a view is changed after the dataset was created, the plans shown by explain command aren't the same with the plan of the dataset.

```scala
scala> spark.range(10).createOrReplaceTempView("test")
scala> spark.range(5).createOrReplaceTempView("test2")
scala> spark.sql("select * from test").createOrReplaceTempView("tmp001")
scala> val df = spark.sql("select * from tmp001")
scala> spark.sql("select * from test2").createOrReplaceTempView("tmp001")
scala> df.show
+---+
| id|
+---+
|  0|
|  1|
|  2|
|  3|
|  4|
|  5|
|  6|
|  7|
|  8|
|  9|
+---+
scala> df.explain
```

Before:
```scala
== Physical Plan ==
*(1) Range (0, 5, step=1, splits=12)
```

After:
```scala
== Physical Plan ==
*(1) Range (0, 10, step=1, splits=12)

```

## How was this patch tested?

Manually test and unit test.

Closes #24415 from viirya/SPARK-27439.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala  |  5 ++++-
 .../org/apache/spark/sql/DataFrameSuite.scala | 19 ++++++++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 793714ff1113..e974912535b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -498,7 +498,10 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def explain(extended: Boolean): Unit = {
-    val explain = ExplainCommand(queryExecution.logical, extended = extended)
+    // Because views are possibly resolved in the analyzed plan of this dataset. We use analyzed
+    // plan in `ExplainCommand`, for consistency. Otherwise, the plans shown by explain command
+    // might be inconsistent with the evaluated data of this dataset.
+    val explain = ExplainCommand(queryExecution.analyzed, extended = extended)
     sparkSession.sessionState.executePlan(explain).executedPlan.executeCollect().foreach {
       // scalastyle:off println
       r => println(r.getString(0))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 8a9c526b6b9d..62fcca4cdae9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.io.File
+import java.io.{ByteArrayOutputStream, File}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.util.UUID
@@ -2133,4 +2133,21 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       checkAnswer(res, Row("1-1", 6, 6))
     }
   }
+
+  test("SPARK-27439: Explain result should match collected result after view change") {
+    withTempView("test", "test2", "tmp") {
+      spark.range(10).createOrReplaceTempView("test")
+      spark.range(5).createOrReplaceTempView("test2")
+      spark.sql("select * from test").createOrReplaceTempView("tmp")
+      val df = spark.sql("select * from tmp")
+      spark.sql("select * from test2").createOrReplaceTempView("tmp")
+
+      val captured = new ByteArrayOutputStream()
+      Console.withOut(captured) {
+        df.explain()
+      }
+      checkAnswer(df, spark.range(10).toDF)
+      assert(captured.toString().contains("Range (0, 10, step=1, splits=2)"))
+    }
+  }
 }

From 9793d9ec22ff7d9778554e4fa3f03ef4f93d473d Mon Sep 17 00:00:00 2001
From: WeichenXu <weichen.xu@databricks.com>
Date: Sun, 21 Apr 2019 12:45:59 -0700
Subject: [PATCH 0938/1072] [SPARK-27473][SQL] Support filter push down for
 status fields in binary file data source

## What changes were proposed in this pull request?

Support 4 kinds of filters:
- LessThan
- LessThanOrEqual
- GreatThan
- GreatThanOrEqual

Support filters applied on 2 columns:
- modificationTime
- length

Note:
In order to support datasource filter push-down, I flatten schema to be:
```
val schema = StructType(
    StructField("path", StringType, false) ::
    StructField("modificationTime", TimestampType, false) ::
    StructField("length", LongType, false) ::
    StructField("content", BinaryType, true) :: Nil)
```

## How was this patch tested?

To be added.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24387 from WeichenXu123/binary_ds_filter.

Lead-authored-by: WeichenXu <weichen.xu@databricks.com>
Co-authored-by: Xiangrui Meng <meng@databricks.com>
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 .../binaryfile/BinaryFileFormat.scala         | 134 ++++++++-----
 .../binaryfile/BinaryFileFormatSuite.scala    | 188 ++++++++++++++++--
 2 files changed, 256 insertions(+), 66 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
index ad9292a694f7..8617ae396bb5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources.binaryfile
 
+import java.sql.Timestamp
+
 import com.google.common.io.{ByteStreams, Closeables}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, GlobFilter, Path}
@@ -28,7 +30,8 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
-import org.apache.spark.sql.sources.{DataSourceRegister, Filter}
+import org.apache.spark.sql.sources.{And, DataSourceRegister, EqualTo, Filter, GreaterThan,
+  GreaterThanOrEqual, LessThan, LessThanOrEqual, Not, Or}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.SerializableConfiguration
@@ -55,10 +58,12 @@ import org.apache.spark.util.SerializableConfiguration
  */
 class BinaryFileFormat extends FileFormat with DataSourceRegister {
 
+  import BinaryFileFormat._
+
   override def inferSchema(
       sparkSession: SparkSession,
       options: Map[String, String],
-      files: Seq[FileStatus]): Option[StructType] = Some(BinaryFileFormat.schema)
+      files: Seq[FileStatus]): Option[StructType] = Some(schema)
 
   override def prepareWrite(
       sparkSession: SparkSession,
@@ -84,7 +89,7 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
       requiredSchema: StructType,
       filters: Seq[Filter],
       options: Map[String, String],
-      hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
+      hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
 
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
@@ -93,46 +98,49 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
 
     val pathGlobPattern = binaryFileSourceOptions.pathGlobFilter
 
-    (file: PartitionedFile) => {
+    val filterFuncs = filters.map(filter => createFilterFunction(filter))
+
+    file: PartitionedFile => {
       val path = file.filePath
       val fsPath = new Path(path)
 
       // TODO: Improve performance here: each file will recompile the glob pattern here.
-      val globFilter = pathGlobPattern.map(new GlobFilter(_))
-      if (!globFilter.isDefined || globFilter.get.accept(fsPath)) {
+      if (pathGlobPattern.forall(new GlobFilter(_).accept(fsPath))) {
         val fs = fsPath.getFileSystem(broadcastedHadoopConf.value.value)
         val fileStatus = fs.getFileStatus(fsPath)
-        val length = fileStatus.getLen()
-        val modificationTime = fileStatus.getModificationTime()
-        val stream = fs.open(fsPath)
-
-        val content = try {
-          ByteStreams.toByteArray(stream)
-        } finally {
-          Closeables.close(stream, true)
-        }
-
-        val fullOutput = dataSchema.map { f =>
-          AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()
-        }
-        val requiredOutput = fullOutput.filter { a =>
-          requiredSchema.fieldNames.contains(a.name)
-        }
-
-        // TODO: Add column pruning
-        // currently it still read the file content even if content column is not required.
-        val requiredColumns = GenerateUnsafeProjection.generate(requiredOutput, fullOutput)
-
-        val internalRow = InternalRow(
-          content,
-          InternalRow(
+        val length = fileStatus.getLen
+        val modificationTime = fileStatus.getModificationTime
+
+        if (filterFuncs.forall(_.apply(fileStatus))) {
+          val stream = fs.open(fsPath)
+          val content = try {
+            ByteStreams.toByteArray(stream)
+          } finally {
+            Closeables.close(stream, true)
+          }
+
+          val fullOutput = dataSchema.map { f =>
+            AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()
+          }
+          val requiredOutput = fullOutput.filter { a =>
+            requiredSchema.fieldNames.contains(a.name)
+          }
+
+          // TODO: Add column pruning
+          // currently it still read the file content even if content column is not required.
+          val requiredColumns = GenerateUnsafeProjection.generate(requiredOutput, fullOutput)
+
+          val internalRow = InternalRow(
             UTF8String.fromString(path),
             DateTimeUtils.fromMillis(modificationTime),
-            length
+            length,
+            content
           )
-        )
 
-        Iterator(requiredColumns(internalRow))
+          Iterator(requiredColumns(internalRow))
+        } else {
+          Iterator.empty
+        }
       } else {
         Iterator.empty
       }
@@ -142,26 +150,62 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
 
 object BinaryFileFormat {
 
-  private val fileStatusSchema = StructType(
-    StructField("path", StringType, false) ::
-      StructField("modificationTime", TimestampType, false) ::
-      StructField("length", LongType, false) :: Nil)
+  private[binaryfile] val PATH = "path"
+  private[binaryfile] val MODIFICATION_TIME = "modificationTime"
+  private[binaryfile] val LENGTH = "length"
+  private[binaryfile] val CONTENT = "content"
 
   /**
    * Schema for the binary file data source.
    *
    * Schema:
+   *  - path (StringType): The path of the file.
+   *  - modificationTime (TimestampType): The modification time of the file.
+   *    In some Hadoop FileSystem implementation, this might be unavailable and fallback to some
+   *    default value.
+   *  - length (LongType): The length of the file in bytes.
    *  - content (BinaryType): The content of the file.
-   *  - status (StructType): The status of the file.
-   *    - path (StringType): The path of the file.
-   *    - modificationTime (TimestampType): The modification time of the file.
-   *      In some Hadoop FileSystem implementation, this might be unavailable and fallback to some
-   *      default value.
-   *    - length (LongType): The length of the file in bytes.
    */
   val schema = StructType(
-    StructField("content", BinaryType, true) ::
-      StructField("status", fileStatusSchema, false) :: Nil)
+    StructField(PATH, StringType, false) ::
+    StructField(MODIFICATION_TIME, TimestampType, false) ::
+    StructField(LENGTH, LongType, false) ::
+    StructField(CONTENT, BinaryType, true) :: Nil)
+
+  private[binaryfile] def createFilterFunction(filter: Filter): FileStatus => Boolean = {
+    filter match {
+      case And(left, right) =>
+        s => createFilterFunction(left)(s) && createFilterFunction(right)(s)
+      case Or(left, right) =>
+        s => createFilterFunction(left)(s) || createFilterFunction(right)(s)
+      case Not(child) =>
+        s => !createFilterFunction(child)(s)
+
+      case LessThan(LENGTH, value: Long) =>
+        _.getLen < value
+      case LessThanOrEqual(LENGTH, value: Long) =>
+        _.getLen <= value
+      case GreaterThan(LENGTH, value: Long) =>
+        _.getLen > value
+      case GreaterThanOrEqual(LENGTH, value: Long) =>
+        _.getLen >= value
+      case EqualTo(LENGTH, value: Long) =>
+        _.getLen == value
+
+      case LessThan(MODIFICATION_TIME, value: Timestamp) =>
+        _.getModificationTime < value.getTime
+      case LessThanOrEqual(MODIFICATION_TIME, value: Timestamp) =>
+        _.getModificationTime <= value.getTime
+      case GreaterThan(MODIFICATION_TIME, value: Timestamp) =>
+        _.getModificationTime > value.getTime
+      case GreaterThanOrEqual(MODIFICATION_TIME, value: Timestamp) =>
+        _.getModificationTime >= value.getTime
+      case EqualTo(MODIFICATION_TIME, value: Timestamp) =>
+        _.getModificationTime == value.getTime
+
+      case _ => (_ => true)
+    }
+  }
 }
 
 class BinaryFileSourceOptions(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index 090f4177c781..3254a7f6123f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -24,14 +24,19 @@ import java.sql.Timestamp
 import scala.collection.JavaConverters._
 
 import com.google.common.io.{ByteStreams, Closeables}
-import org.apache.hadoop.fs.{FileSystem, GlobFilter, Path}
+import org.apache.hadoop.fs.{FileStatus, FileSystem, GlobFilter, Path}
+import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
+  import BinaryFileFormat._
 
   private var testDir: String = _
 
@@ -39,6 +44,8 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
 
   private var fs: FileSystem = _
 
+  private var file1Status: FileStatus = _
+
   override def beforeAll(): Unit = {
     super.beforeAll()
 
@@ -51,44 +58,64 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
     val year2015Dir = new File(testDir, "year=2015")
     year2015Dir.mkdir()
 
+    val file1 = new File(year2014Dir, "data.txt")
     Files.write(
-      new File(year2014Dir, "data.txt").toPath,
+      file1.toPath,
       Seq("2014-test").asJava,
       StandardOpenOption.CREATE, StandardOpenOption.WRITE
     )
+    file1Status = fs.getFileStatus(new Path(file1.getPath))
+
+    val file2 = new File(year2014Dir, "data2.bin")
     Files.write(
-      new File(year2014Dir, "data2.bin").toPath,
+      file2.toPath,
       "2014-test-bin".getBytes,
       StandardOpenOption.CREATE, StandardOpenOption.WRITE
     )
 
+    val file3 = new File(year2015Dir, "bool.csv")
     Files.write(
-      new File(year2015Dir, "bool.csv").toPath,
+      file3.toPath,
       Seq("bool", "True", "False", "true").asJava,
       StandardOpenOption.CREATE, StandardOpenOption.WRITE
     )
+
+    val file4 = new File(year2015Dir, "data.bin")
     Files.write(
-      new File(year2015Dir, "data.txt").toPath,
+      file4.toPath,
       "2015-test".getBytes,
       StandardOpenOption.CREATE, StandardOpenOption.WRITE
     )
   }
 
+  test("BinaryFileFormat methods") {
+    val format = new BinaryFileFormat
+    assert(format.shortName() === "binaryFile")
+    assert(format.isSplitable(spark, Map.empty, new Path("any")) === false)
+    assert(format.inferSchema(spark, Map.empty, Seq.empty) === Some(BinaryFileFormat.schema))
+    assert(BinaryFileFormat.schema === StructType(Seq(
+      StructField("path", StringType, false),
+      StructField("modificationTime", TimestampType, false),
+      StructField("length", LongType, false),
+      StructField("content", BinaryType, true))))
+  }
+
   def testBinaryFileDataSource(pathGlobFilter: String): Unit = {
-    val resultDF = spark.read.format("binaryFile")
-      .option("pathGlobFilter", pathGlobFilter)
-      .load(testDir)
-      .select(
-        col("status.path"),
-        col("status.modificationTime"),
-        col("status.length"),
-        col("content"),
+    val dfReader = spark.read.format("binaryFile")
+    if (pathGlobFilter != null) {
+      dfReader.option("pathGlobFilter", pathGlobFilter)
+    }
+    val resultDF = dfReader.load(testDir).select(
+        col(PATH),
+        col(MODIFICATION_TIME),
+        col(LENGTH),
+        col(CONTENT),
         col("year") // this is a partition column
       )
 
     val expectedRowSet = new collection.mutable.HashSet[Row]()
 
-    val globFilter = new GlobFilter(pathGlobFilter)
+    val globFilter = if (pathGlobFilter == null) null else new GlobFilter(pathGlobFilter)
     for (partitionDirStatus <- fs.listStatus(fsTestDir)) {
       val dirPath = partitionDirStatus.getPath
 
@@ -96,7 +123,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
       val year = partitionName.toInt // partition column "year" value which is `Int` type
 
       for (fileStatus <- fs.listStatus(dirPath)) {
-        if (globFilter.accept(fileStatus.getPath)) {
+        if (globFilter == null || globFilter.accept(fileStatus.getPath)) {
           val fpath = fileStatus.getPath.toString.replace("file:/", "file:///")
           val flen = fileStatus.getLen
           val modificationTime = new Timestamp(fileStatus.getModificationTime)
@@ -121,14 +148,15 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
   }
 
   test("binary file data source test") {
-    testBinaryFileDataSource(pathGlobFilter = "*.*")
-    testBinaryFileDataSource(pathGlobFilter = "*.bin")
-    testBinaryFileDataSource(pathGlobFilter = "*.txt")
-    testBinaryFileDataSource(pathGlobFilter = "*.{txt,csv}")
-    testBinaryFileDataSource(pathGlobFilter = "*.json")
+    testBinaryFileDataSource(null)
+    testBinaryFileDataSource("*.*")
+    testBinaryFileDataSource("*.bin")
+    testBinaryFileDataSource("*.txt")
+    testBinaryFileDataSource("*.{txt,csv}")
+    testBinaryFileDataSource("*.json")
   }
 
-  test ("binary file data source do not support write operation") {
+  test("binary file data source do not support write operation") {
     val df = spark.read.format("binaryFile").load(testDir)
     withTempDir { tmpDir =>
       val thrown = intercept[UnsupportedOperationException] {
@@ -140,4 +168,122 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
     }
   }
 
+  def mockFileStatus(length: Long, modificationTime: Long): FileStatus = {
+    val status = mock(classOf[FileStatus])
+    when(status.getLen).thenReturn(length)
+    when(status.getModificationTime).thenReturn(modificationTime)
+    when(status.toString).thenReturn(
+      s"FileStatus($LENGTH=$length, $MODIFICATION_TIME=$modificationTime)")
+    status
+  }
+
+  def testCreateFilterFunction(
+      filters: Seq[Filter],
+      testCases: Seq[(FileStatus, Boolean)]): Unit = {
+    val funcs = filters.map(BinaryFileFormat.createFilterFunction)
+    testCases.foreach { case (status, expected) =>
+      assert(funcs.forall(f => f(status)) === expected,
+        s"$filters applied to $status should be $expected.")
+    }
+  }
+
+  test("createFilterFunction") {
+    // test filter applied on `length` column
+    val l1 = mockFileStatus(1L, 0L)
+    val l2 = mockFileStatus(2L, 0L)
+    val l3 = mockFileStatus(3L, 0L)
+    testCreateFilterFunction(
+      Seq(LessThan(LENGTH, 2L)),
+      Seq((l1, true), (l2, false), (l3, false)))
+    testCreateFilterFunction(
+      Seq(LessThanOrEqual(LENGTH, 2L)),
+      Seq((l1, true), (l2, true), (l3, false)))
+    testCreateFilterFunction(
+      Seq(GreaterThan(LENGTH, 2L)),
+      Seq((l1, false), (l2, false), (l3, true)))
+    testCreateFilterFunction(
+      Seq(GreaterThanOrEqual(LENGTH, 2L)),
+      Seq((l1, false), (l2, true), (l3, true)))
+    testCreateFilterFunction(
+      Seq(EqualTo(LENGTH, 2L)),
+      Seq((l1, false), (l2, true), (l3, false)))
+    testCreateFilterFunction(
+      Seq(Not(EqualTo(LENGTH, 2L))),
+      Seq((l1, true), (l2, false), (l3, true)))
+    testCreateFilterFunction(
+      Seq(And(GreaterThan(LENGTH, 1L), LessThan(LENGTH, 3L))),
+      Seq((l1, false), (l2, true), (l3, false)))
+    testCreateFilterFunction(
+      Seq(Or(LessThanOrEqual(LENGTH, 1L), GreaterThanOrEqual(LENGTH, 3L))),
+      Seq((l1, true), (l2, false), (l3, true)))
+
+    // test filter applied on `modificationTime` column
+    val t1 = mockFileStatus(0L, 1L)
+    val t2 = mockFileStatus(0L, 2L)
+    val t3 = mockFileStatus(0L, 3L)
+    testCreateFilterFunction(
+      Seq(LessThan(MODIFICATION_TIME, new Timestamp(2L))),
+      Seq((t1, true), (t2, false), (t3, false)))
+    testCreateFilterFunction(
+      Seq(LessThanOrEqual(MODIFICATION_TIME, new Timestamp(2L))),
+      Seq((t1, true), (t2, true), (t3, false)))
+    testCreateFilterFunction(
+      Seq(GreaterThan(MODIFICATION_TIME, new Timestamp(2L))),
+      Seq((t1, false), (t2, false), (t3, true)))
+    testCreateFilterFunction(
+      Seq(GreaterThanOrEqual(MODIFICATION_TIME, new Timestamp(2L))),
+      Seq((t1, false), (t2, true), (t3, true)))
+    testCreateFilterFunction(
+      Seq(EqualTo(MODIFICATION_TIME, new Timestamp(2L))),
+      Seq((t1, false), (t2, true), (t3, false)))
+    testCreateFilterFunction(
+      Seq(Not(EqualTo(MODIFICATION_TIME, new Timestamp(2L)))),
+      Seq((t1, true), (t2, false), (t3, true)))
+    testCreateFilterFunction(
+      Seq(And(GreaterThan(MODIFICATION_TIME, new Timestamp(1L)),
+        LessThan(MODIFICATION_TIME, new Timestamp(3L)))),
+      Seq((t1, false), (t2, true), (t3, false)))
+    testCreateFilterFunction(
+      Seq(Or(LessThanOrEqual(MODIFICATION_TIME, new Timestamp(1L)),
+        GreaterThanOrEqual(MODIFICATION_TIME, new Timestamp(3L)))),
+      Seq((t1, true), (t2, false), (t3, true)))
+
+    // test filters applied on both columns
+    testCreateFilterFunction(
+      Seq(And(GreaterThan(LENGTH, 2L), LessThan(MODIFICATION_TIME, new Timestamp(2L)))),
+      Seq((l1, false), (l2, false), (l3, true), (t1, false), (t2, false), (t3, false)))
+
+    // test nested filters
+    testCreateFilterFunction(
+      // NOT (length > 2 OR modificationTime < 2)
+      Seq(Not(Or(GreaterThan(LENGTH, 2L), LessThan(MODIFICATION_TIME, new Timestamp(2L))))),
+      Seq((l1, false), (l2, false), (l3, false), (t1, false), (t2, true), (t3, true)))
+  }
+
+  test("buildReader") {
+    def testBuildReader(fileStatus: FileStatus, filters: Seq[Filter], expected: Boolean): Unit = {
+      val format = new BinaryFileFormat
+      val reader = format.buildReaderWithPartitionValues(
+        sparkSession = spark,
+        dataSchema = schema,
+        partitionSchema = StructType(Nil),
+        requiredSchema = schema,
+        filters = filters,
+        options = Map.empty,
+        hadoopConf = spark.sessionState.newHadoopConf())
+      val partitionedFile = mock(classOf[PartitionedFile])
+      when(partitionedFile.filePath).thenReturn(fileStatus.getPath.toString)
+      assert(reader(partitionedFile).nonEmpty === expected,
+        s"Filters $filters applied to $fileStatus should be $expected.")
+    }
+    testBuildReader(file1Status, Seq.empty, true)
+    testBuildReader(file1Status, Seq(LessThan(LENGTH, file1Status.getLen)), false)
+    testBuildReader(file1Status, Seq(
+      LessThan(MODIFICATION_TIME, new Timestamp(file1Status.getModificationTime))
+    ), false)
+    testBuildReader(file1Status, Seq(
+      EqualTo(LENGTH, file1Status.getLen),
+      EqualTo(MODIFICATION_TIME, file1Status.getModificationTime)
+    ), true)
+  }
 }

From 8a8643c28dd0416e226894ebacf36c7edb547081 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Sun, 21 Apr 2019 15:34:16 -0700
Subject: [PATCH 0939/1072] [SPARK-27480][SQL] Improve `EXPLAIN DESC QUERY` to
 show the input SQL statement

Currently running explain on describe query gives a little confusing output. This is a minor pr that improves the output of explain.

Before
```
1.EXPLAIN DESCRIBE WITH s AS (SELECT 'hello' as col1) SELECT * FROM s;
== Physical Plan ==
Execute DescribeQueryCommand
   +- DescribeQueryCommand CTE [s]
2.EXPLAIN EXTENDED DESCRIBE SELECT * from s1 where c1 > 0;
== Physical Plan ==
Execute DescribeQueryCommand
   +- DescribeQueryCommand 'Project [*]
```
After
```
1. EXPLAIN DESCRIBE WITH s AS (SELECT 'hello' as col1) SELECT * FROM s;
== Physical Plan ==
Execute DescribeQueryCommand
   +- DescribeQueryCommand WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
2. EXPLAIN DESCRIBE SELECT * from s1 where c1 > 0;
== Physical Plan ==
Execute DescribeQueryCommand
   +- DescribeQueryCommand SELECT * from s1 where c1 > 0
```
Added a couple of tests in describe-query.sql under SQLQueryTestSuite.

Closes #24385 from dilipbiswal/describe_query_explain.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/SparkSqlParser.scala  |  2 +-
 .../spark/sql/execution/command/tables.scala  |  6 ++-
 .../sql-tests/inputs/describe-query.sql       |  4 ++
 .../sql-tests/results/describe-query.sql.out  | 42 ++++++++++++++++---
 .../sql/execution/SparkSqlParserSuite.scala   |  4 +-
 5 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 2c20b5377217..bfb50be954f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -373,7 +373,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * Create a [[DescribeQueryCommand]] logical command.
    */
   override def visitDescribeQuery(ctx: DescribeQueryContext): LogicalPlan = withOrigin(ctx) {
-    DescribeQueryCommand(visitQuery(ctx.query))
+    DescribeQueryCommand(source(ctx.query), visitQuery(ctx.query))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index b31b2d355b41..395d61b4f4e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -639,12 +639,14 @@ case class DescribeTableCommand(
  *    select * from (from a select * select *)
  * 7. Common table expressions (CTEs)
  */
-case class DescribeQueryCommand(query: LogicalPlan)
+case class DescribeQueryCommand(queryText: String, plan: LogicalPlan)
   extends DescribeCommandBase {
 
+  override def simpleString(maxFields: Int): String = s"$nodeName $queryText".trim
+
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val result = new ArrayBuffer[Row]
-    val queryExecution = sparkSession.sessionState.executePlan(query)
+    val queryExecution = sparkSession.sessionState.executePlan(plan)
     describeSchema(queryExecution.analyzed.schema, result, header = false)
     result
   }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
index b6351f993b91..f8fbb43ba2f8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe-query.sql
@@ -22,6 +22,10 @@ DESCRIBE
      insert into desc_temp1 select *
      insert into desc_temp2 select *;
 
+-- Explain
+EXPLAIN DESC QUERY SELECT * FROM desc_temp2 WHERE key > 0;
+EXPLAIN EXTENDED DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s;
+
 -- cleanup
 DROP TABLE desc_temp1;
 DROP TABLE desc_temp2;
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index 15a346fc8c60..9c65e54c383b 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
+-- Number of queries: 19
 
 
 -- !query 0
@@ -154,16 +154,46 @@ DESCRIBE
 
 
 -- !query 15
-DROP TABLE desc_temp1
+EXPLAIN DESC QUERY SELECT * FROM desc_temp2 WHERE key > 0
 -- !query 15 schema
-struct<>
+struct<plan:string>
 -- !query 15 output
-
+== Physical Plan ==
+Execute DescribeQueryCommand
+   +- DescribeQueryCommand SELECT * FROM desc_temp2 WHERE key > 0
 
 
 -- !query 16
-DROP TABLE desc_temp2
+EXPLAIN EXTENDED DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
 -- !query 16 schema
-struct<>
+struct<plan:string>
 -- !query 16 output
+== Parsed Logical Plan ==
+DescribeQueryCommand WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
+
+== Analyzed Logical Plan ==
+col_name: string, data_type: string, comment: string
+DescribeQueryCommand WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
+
+== Optimized Logical Plan ==
+DescribeQueryCommand WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
+
+== Physical Plan ==
+Execute DescribeQueryCommand
+   +- DescribeQueryCommand WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
+
+
+-- !query 17
+DROP TABLE desc_temp1
+-- !query 17 schema
+struct<>
+-- !query 17 output
+
+
+
+-- !query 18
+DROP TABLE desc_temp2
+-- !query 18 schema
+struct<>
+-- !query 18 output
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index fd60779b49fd..c293ce3ba205 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -228,8 +228,8 @@ class SparkSqlParserSuite extends AnalysisTest {
 
   test("describe query") {
     val query = "SELECT * FROM t"
-    assertEqual("DESCRIBE QUERY " + query, DescribeQueryCommand(parser.parsePlan(query)))
-    assertEqual("DESCRIBE " + query, DescribeQueryCommand(parser.parsePlan(query)))
+    assertEqual("DESCRIBE QUERY " + query, DescribeQueryCommand(query, parser.parsePlan(query)))
+    assertEqual("DESCRIBE " + query, DescribeQueryCommand(query, parser.parsePlan(query)))
   }
 
   test("describe table column") {

From 009059e3c261a73d605bc49aee4aecb0eb0e8267 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Sun, 21 Apr 2019 17:00:07 -0700
Subject: [PATCH 0940/1072] [SPARK-27496][CORE] Fatal errors should also be
 sent back to the sender

## What changes were proposed in this pull request?

When a fatal error (such as StackOverflowError) throws from "receiveAndReply", we should try our best to notify the sender. Otherwise, the sender will hang until timeout.

In addition, when a MessageLoop is dying unexpectedly, it should resubmit a new one so that Dispatcher is still working.

## How was this patch tested?

New unit tests.

Closes #24396 from zsxwing/SPARK-27496.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/rpc/netty/Dispatcher.scala   | 10 +++-
 .../org/apache/spark/rpc/netty/Inbox.scala    |  2 +-
 .../spark/rpc/netty/NettyRpcEnvSuite.scala    | 53 ++++++++++++++++++-
 3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index ce238a256cfb..3db68978346f 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -225,7 +225,15 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
           }
         }
       } catch {
-        case ie: InterruptedException => // exit
+        case _: InterruptedException => // exit
+        case t: Throwable =>
+          try {
+            // Re-submit a MessageLoop so that Dispatcher will still work if
+            // UncaughtExceptionHandler decides to not kill JVM.
+            threadpool.execute(new MessageLoop)
+          } finally {
+            throw t
+          }
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
index d32eba64e13e..44d2622a42f5 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
@@ -106,7 +106,7 @@ private[netty] class Inbox(
                 throw new SparkException(s"Unsupported message $message from ${_sender}")
               })
             } catch {
-              case NonFatal(e) =>
+              case e: Throwable =>
                 context.sendFailure(e)
                 // Throw the exception -- this exception will be caught by the safelyCall function.
                 // The endpoint's onError function will be called.
diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
index f9481f875d43..59b4b706bbcd 100644
--- a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
@@ -17,13 +17,20 @@
 
 package org.apache.spark.rpc.netty
 
+import java.util.concurrent.ExecutionException
+
+import scala.concurrent.duration._
+
+import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.network.client.TransportClient
 import org.apache.spark.rpc._
 
-class NettyRpcEnvSuite extends RpcEnvSuite with MockitoSugar {
+class NettyRpcEnvSuite extends RpcEnvSuite with MockitoSugar with TimeLimits {
+
+  private implicit val signaler: Signaler = ThreadSignaler
 
   override def createRpcEnv(
       conf: SparkConf,
@@ -84,4 +91,48 @@ class NettyRpcEnvSuite extends RpcEnvSuite with MockitoSugar {
       msg3,
       RequestMessage(nettyEnv, client, msg3.serialize(nettyEnv)))
   }
+
+  test("StackOverflowError should be sent back and Dispatcher should survive") {
+    val numUsableCores = 2
+    val conf = new SparkConf
+    val config = RpcEnvConfig(
+      conf,
+      "test",
+      "localhost",
+      "localhost",
+      0,
+      new SecurityManager(conf),
+      numUsableCores,
+      clientMode = false)
+    val anotherEnv = new NettyRpcEnvFactory().create(config)
+    anotherEnv.setupEndpoint("StackOverflowError", new RpcEndpoint {
+      override val rpcEnv = anotherEnv
+
+      override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+        // scalastyle:off throwerror
+        case msg: String => throw new StackOverflowError
+        // scalastyle:on throwerror
+        case num: Int => context.reply(num)
+      }
+    })
+
+    val rpcEndpointRef = env.setupEndpointRef(anotherEnv.address, "StackOverflowError")
+    try {
+      // Send `numUsableCores` messages to trigger `numUsableCores` `StackOverflowError`s
+      for (_ <- 0 until numUsableCores) {
+        val e = intercept[SparkException] {
+          rpcEndpointRef.askSync[String]("hello")
+        }
+        // The root cause `e.getCause.getCause` because it is boxed by Scala Promise.
+        assert(e.getCause.isInstanceOf[ExecutionException])
+        assert(e.getCause.getCause.isInstanceOf[StackOverflowError])
+      }
+      failAfter(10.seconds) {
+        assert(rpcEndpointRef.askSync[Int](100) === 100)
+      }
+    } finally {
+      anotherEnv.shutdown()
+      anotherEnv.awaitTermination()
+    }
+  }
 }

From d4a16f46f71021178bfc7dca511e47390986197d Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 22 Apr 2019 12:02:16 +0800
Subject: [PATCH 0941/1072] [SPARK-27419][FOLLOWUP][DOCS] Add note about
 spark.executor.heartbeatInterval change to migration guide

## What changes were proposed in this pull request?

Add note about spark.executor.heartbeatInterval change to migration guide
See also https://github.com/apache/spark/pull/24329

## How was this patch tested?

N/A

Closes #24432 from srowen/SPARK-27419.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index b1935224150d..90a7d8d73425 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -124,6 +124,14 @@ license: |
 
   - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`.
 
+## Upgrading from Spark SQL 2.4 to 2.4.1
+
+  - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
+    inconsistently interpreted as both seconds and milliseconds in Spark 2.4.0 in different parts of the code.
+    Unitless values are now consistently interpreted as milliseconds. Applications that set values like "30"
+    need to specify a value with units like "30s" now, to avoid being interpreted as milliseconds; otherwise, 
+    the extremely short interval that results will likely cause applications to fail.
+
 ## Upgrading From Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.

From 777b797867045c9717813e9dab2ab9012a1889fc Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 22 Apr 2019 16:34:13 +0900
Subject: [PATCH 0942/1072] [SPARK-27522][SQL][TEST] Test migration from INT96
 to TIMESTAMP_MICROS for timestamps in parquet

## What changes were proposed in this pull request?

Added tests to check migration from `INT96` to `TIMESTAMP_MICROS` (`INT64`) for timestamps in parquet files. In particular:
- Append `TIMESTAMP_MICROS` timestamps to **existing parquet** files with `INT96` timestamps
- Append `TIMESTAMP_MICROS` timestamps to a table with `INT96` timestamps
- Append `INT96` to `TIMESTAMP_MICROS` timestamps in **parquet files**
- Append `INT96` to `TIMESTAMP_MICROS` timestamps in a **table**

Closes #24417 from MaxGekk/parquet-timestamp-int64-tests.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../parquet/ParquetQuerySuite.scala           | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 8cc3beec1fe5..4959275a7312 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import java.io.File
+import java.util.concurrent.TimeUnit
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.parquet.hadoop.ParquetOutputFormat
@@ -881,6 +882,48 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
     }
   }
 
+  test("Migration from INT96 to TIMESTAMP_MICROS timestamp type") {
+    def testMigration(fromTsType: String, toTsType: String): Unit = {
+      def checkAppend(write: DataFrameWriter[_] => Unit, readback: => DataFrame): Unit = {
+        def data(start: Int, end: Int): Seq[Row] = (start to end).map { i =>
+          val ts = new java.sql.Timestamp(TimeUnit.SECONDS.toMillis(i))
+          ts.setNanos(123456000)
+          Row(ts)
+        }
+        val schema = new StructType().add("time", TimestampType)
+        val df1 = spark.createDataFrame(sparkContext.parallelize(data(0, 1)), schema)
+        withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> fromTsType) {
+          write(df1.write)
+        }
+        val df2 = spark.createDataFrame(sparkContext.parallelize(data(2, 10)), schema)
+        withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> toTsType) {
+          write(df2.write.mode(SaveMode.Append))
+        }
+        Seq("true", "false").foreach { vectorized =>
+          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized) {
+            checkAnswer(readback, df1.unionAll(df2))
+          }
+        }
+      }
+
+      Seq(false, true).foreach { mergeSchema =>
+        withTempPath { file =>
+          checkAppend(_.parquet(file.getCanonicalPath),
+            spark.read.option("mergeSchema", mergeSchema).parquet(file.getCanonicalPath))
+        }
+
+        withSQLConf(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key -> mergeSchema.toString) {
+          val tableName = "parquet_timestamp_migration"
+          withTable(tableName) {
+            checkAppend(_.saveAsTable(tableName), spark.table(tableName))
+          }
+        }
+      }
+    }
+
+    testMigration(fromTsType = "INT96", toTsType = "TIMESTAMP_MICROS")
+    testMigration(fromTsType = "TIMESTAMP_MICROS", toTsType = "INT96")
+  }
 }
 
 object TestingUDT {

From d36cce18e262dc9cbd687ef42f8b67a62f0a3e22 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Mon, 22 Apr 2019 19:30:31 +0900
Subject: [PATCH 0943/1072] [SPARK-27276][PYTHON][SQL] Increase minimum version
 of pyarrow to 0.12.1 and remove prior workarounds

## What changes were proposed in this pull request?

This increases the minimum support version of pyarrow to 0.12.1 and removes workarounds in pyspark to remain compatible with prior versions. This means that users will need to have at least pyarrow 0.12.1 installed and available in the cluster or an `ImportError` will be raised to indicate an upgrade is needed.

## How was this patch tested?

Existing tests using:
Python 2.7.15, pyarrow 0.12.1, pandas 0.24.2
Python 3.6.7, pyarrow 0.12.1, pandas 0.24.0

Closes #24298 from BryanCutler/arrow-bump-min-pyarrow-SPARK-27276.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/serializers.py                 | 53 +++++-----------
 python/pyspark/sql/dataframe.py               |  8 ++-
 python/pyspark/sql/session.py                 |  7 +--
 python/pyspark/sql/tests/test_arrow.py        | 39 ++----------
 python/pyspark/sql/tests/test_pandas_udf.py   | 48 +++++---------
 .../sql/tests/test_pandas_udf_grouped_map.py  | 28 +++------
 .../sql/tests/test_pandas_udf_scalar.py       | 30 +++------
 python/pyspark/sql/types.py                   | 63 -------------------
 python/pyspark/sql/utils.py                   |  2 +-
 python/setup.py                               |  7 ++-
 10 files changed, 67 insertions(+), 218 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index ed419db2df6c..6058e94d471e 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -260,10 +260,14 @@ def __init__(self, timezone, safecheck, assign_cols_by_name):
         self._safecheck = safecheck
         self._assign_cols_by_name = assign_cols_by_name
 
-    def arrow_to_pandas(self, arrow_column, data_type):
-        from pyspark.sql.types import _arrow_column_to_pandas, _check_series_localize_timestamps
+    def arrow_to_pandas(self, arrow_column):
+        from pyspark.sql.types import _check_series_localize_timestamps
+
+        # If the given column is a date type column, creates a series of datetime.date directly
+        # instead of creating datetime64[ns] as intermediate data to avoid overflow caused by
+        # datetime64[ns] type handling.
+        s = arrow_column.to_pandas(date_as_object=True)
 
-        s = _arrow_column_to_pandas(arrow_column, data_type)
         s = _check_series_localize_timestamps(s, self._timezone)
         return s
 
@@ -275,8 +279,6 @@ def _create_batch(self, series):
         :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
         :return: Arrow RecordBatch
         """
-        import decimal
-        from distutils.version import LooseVersion
         import pandas as pd
         import pyarrow as pa
         from pyspark.sql.types import _check_series_convert_timestamps_internal
@@ -289,24 +291,10 @@ def _create_batch(self, series):
         def create_array(s, t):
             mask = s.isnull()
             # Ensure timestamp series are in expected form for Spark internal representation
-            # TODO: maybe don't need None check anymore as of Arrow 0.9.1
             if t is not None and pa.types.is_timestamp(t):
                 s = _check_series_convert_timestamps_internal(s.fillna(0), self._timezone)
                 # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
                 return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
-            elif t is not None and pa.types.is_string(t) and sys.version < '3':
-                # TODO: need decode before converting to Arrow in Python 2
-                # TODO: don't need as of Arrow 0.9.1
-                return pa.Array.from_pandas(s.apply(
-                    lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t)
-            elif t is not None and pa.types.is_decimal(t) and \
-                    LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-                # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
-                return pa.Array.from_pandas(s.apply(
-                    lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
-            elif LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
-                # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
-                return pa.Array.from_pandas(s, mask=mask, type=t)
 
             try:
                 array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck)
@@ -340,12 +328,7 @@ def create_array(s, t):
                                   for i, field in enumerate(t)]
 
                 struct_arrs, struct_names = zip(*arrs_names)
-
-                # TODO: from_arrays args switched for v0.9.0, remove when bump min pyarrow version
-                if LooseVersion(pa.__version__) < LooseVersion("0.9.0"):
-                    arrs.append(pa.StructArray.from_arrays(struct_names, struct_arrs))
-                else:
-                    arrs.append(pa.StructArray.from_arrays(struct_arrs, struct_names))
+                arrs.append(pa.StructArray.from_arrays(struct_arrs, struct_names))
             else:
                 arrs.append(create_array(s, t))
 
@@ -365,10 +348,8 @@ def load_stream(self, stream):
         """
         batches = super(ArrowStreamPandasSerializer, self).load_stream(stream)
         import pyarrow as pa
-        from pyspark.sql.types import from_arrow_type
         for batch in batches:
-            yield [self.arrow_to_pandas(c, from_arrow_type(c.type))
-                   for c in pa.Table.from_batches([batch]).itercolumns()]
+            yield [self.arrow_to_pandas(c) for c in pa.Table.from_batches([batch]).itercolumns()]
 
     def __repr__(self):
         return "ArrowStreamPandasSerializer"
@@ -384,17 +365,17 @@ def __init__(self, timezone, safecheck, assign_cols_by_name, df_for_struct=False
             .__init__(timezone, safecheck, assign_cols_by_name)
         self._df_for_struct = df_for_struct
 
-    def arrow_to_pandas(self, arrow_column, data_type):
-        from pyspark.sql.types import StructType, \
-            _arrow_column_to_pandas, _check_dataframe_localize_timestamps
+    def arrow_to_pandas(self, arrow_column):
+        import pyarrow.types as types
 
-        if self._df_for_struct and type(data_type) == StructType:
+        if self._df_for_struct and types.is_struct(arrow_column.type):
             import pandas as pd
-            series = [_arrow_column_to_pandas(column, field.dataType).rename(field.name)
-                      for column, field in zip(arrow_column.flatten(), data_type)]
-            s = _check_dataframe_localize_timestamps(pd.concat(series, axis=1), self._timezone)
+            series = [super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(column)
+                      .rename(field.name)
+                      for column, field in zip(arrow_column.flatten(), arrow_column.type)]
+            s = pd.concat(series, axis=1)
         else:
-            s = super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(arrow_column, data_type)
+            s = super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(arrow_column)
         return s
 
     def dump_stream(self, iterator, stream):
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 659cbc4487d1..f8aeb62a27fb 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2138,13 +2138,15 @@ def toPandas(self):
             # of PyArrow is found, if 'spark.sql.execution.arrow.enabled' is enabled.
             if use_arrow:
                 try:
-                    from pyspark.sql.types import _arrow_table_to_pandas, \
-                        _check_dataframe_localize_timestamps
+                    from pyspark.sql.types import _check_dataframe_localize_timestamps
                     import pyarrow
                     batches = self._collectAsArrow()
                     if len(batches) > 0:
                         table = pyarrow.Table.from_batches(batches)
-                        pdf = _arrow_table_to_pandas(table, self.schema)
+                        # Pandas DataFrame created from PyArrow uses datetime64[ns] for date type
+                        # values, but we should use datetime.date to match the behavior with when
+                        # Arrow optimization is disabled.
+                        pdf = table.to_pandas(date_as_object=True)
                         return _check_dataframe_localize_timestamps(pdf, timezone)
                     else:
                         return pd.DataFrame.from_records([], columns=self.columns)
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index b11e0f3ff69d..52e90370373f 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -530,7 +530,6 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
         to Arrow data, then sending to the JVM to parallelize. If a schema is passed in, the
         data types will be used to coerce the data in Pandas to Arrow conversion.
         """
-        from distutils.version import LooseVersion
         from pyspark.serializers import ArrowStreamPandasSerializer
         from pyspark.sql.types import from_arrow_type, to_arrow_type, TimestampType
         from pyspark.sql.utils import require_minimum_pandas_version, \
@@ -544,11 +543,7 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
 
         # Create the Spark schema from list of names passed in with Arrow types
         if isinstance(schema, (list, tuple)):
-            if LooseVersion(pa.__version__) < LooseVersion("0.12.0"):
-                temp_batch = pa.RecordBatch.from_pandas(pdf[0:100], preserve_index=False)
-                arrow_schema = temp_batch.schema
-            else:
-                arrow_schema = pa.Schema.from_pandas(pdf, preserve_index=False)
+            arrow_schema = pa.Schema.from_pandas(pdf, preserve_index=False)
             struct = StructType()
             for name, field in zip(schema, arrow_schema):
                 struct.add(name, from_arrow_type(field.type), nullable=field.nullable)
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index a45c3fb85601..22578cbe2e98 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -46,7 +46,6 @@ class ArrowTests(ReusedSQLTestCase):
     def setUpClass(cls):
         from datetime import date, datetime
         from decimal import Decimal
-        from distutils.version import LooseVersion
         super(ArrowTests, cls).setUpClass()
         cls.warnings_lock = threading.Lock()
 
@@ -68,23 +67,16 @@ def setUpClass(cls):
             StructField("5_double_t", DoubleType(), True),
             StructField("6_decimal_t", DecimalType(38, 18), True),
             StructField("7_date_t", DateType(), True),
-            StructField("8_timestamp_t", TimestampType(), True)])
+            StructField("8_timestamp_t", TimestampType(), True),
+            StructField("9_binary_t", BinaryType(), True)])
         cls.data = [(u"a", 1, 10, 0.2, 2.0, Decimal("2.0"),
-                     date(1969, 1, 1), datetime(1969, 1, 1, 1, 1, 1)),
+                     date(1969, 1, 1), datetime(1969, 1, 1, 1, 1, 1), bytearray(b"a")),
                     (u"b", 2, 20, 0.4, 4.0, Decimal("4.0"),
-                     date(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2)),
+                     date(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2), bytearray(b"bb")),
                     (u"c", 3, 30, 0.8, 6.0, Decimal("6.0"),
-                     date(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3)),
+                     date(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3), bytearray(b"ccc")),
                     (u"d", 4, 40, 1.0, 8.0, Decimal("8.0"),
-                     date(2262, 4, 12), datetime(2262, 3, 3, 3, 3, 3))]
-
-        # TODO: remove version check once minimum pyarrow version is 0.10.0
-        if LooseVersion("0.10.0") <= LooseVersion(pa.__version__):
-            cls.schema.add(StructField("9_binary_t", BinaryType(), True))
-            cls.data[0] = cls.data[0] + (bytearray(b"a"),)
-            cls.data[1] = cls.data[1] + (bytearray(b"bb"),)
-            cls.data[2] = cls.data[2] + (bytearray(b"ccc"),)
-            cls.data[3] = cls.data[3] + (bytearray(b"dddd"),)
+                     date(2262, 4, 12), datetime(2262, 3, 3, 3, 3, 3), bytearray(b"dddd"))]
 
     @classmethod
     def tearDownClass(cls):
@@ -123,8 +115,6 @@ def test_toPandas_fallback_enabled(self):
                         assert_frame_equal(pdf, pd.DataFrame({u'map': [{u'a': 1}]}))
 
     def test_toPandas_fallback_disabled(self):
-        from distutils.version import LooseVersion
-
         schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
         df = self.spark.createDataFrame([(None,)], schema=schema)
         with QuietTest(self.sc):
@@ -132,14 +122,6 @@ def test_toPandas_fallback_disabled(self):
                 with self.assertRaisesRegexp(Exception, 'Unsupported type'):
                     df.toPandas()
 
-        # TODO: remove BinaryType check once minimum pyarrow version is 0.10.0
-        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            schema = StructType([StructField("binary", BinaryType(), True)])
-            df = self.spark.createDataFrame([(None,)], schema=schema)
-            with QuietTest(self.sc):
-                with self.assertRaisesRegexp(Exception, 'Unsupported type.*BinaryType'):
-                    df.toPandas()
-
     def test_null_conversion(self):
         df_null = self.spark.createDataFrame([tuple([None for _ in range(len(self.data[0]))])] +
                                              self.data)
@@ -348,20 +330,11 @@ def test_createDataFrame_fallback_enabled(self):
                     self.assertEqual(df.collect(), [Row(a={u'a': 1})])
 
     def test_createDataFrame_fallback_disabled(self):
-        from distutils.version import LooseVersion
-
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
                 self.spark.createDataFrame(
                     pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")
 
-        # TODO: remove BinaryType check once minimum pyarrow version is 0.10.0
-        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            with QuietTest(self.sc):
-                with self.assertRaisesRegexp(TypeError, 'Unsupported type.*BinaryType'):
-                    self.spark.createDataFrame(
-                        pd.DataFrame([[{'a': b'aaa'}]]), "a: binary")
-
     # Regression test for SPARK-23314
     def test_timestamp_dst(self):
         # Daylight saving time for Los Angeles for 2015 is Sun, Nov 1 at 2:00 am
diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py
index fd6d4e18ed2a..72dc05bb02cd 100644
--- a/python/pyspark/sql/tests/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/test_pandas_udf.py
@@ -198,10 +198,8 @@ def foofoo(x, y):
         )
 
     def test_pandas_udf_detect_unsafe_type_conversion(self):
-        from distutils.version import LooseVersion
         import pandas as pd
         import numpy as np
-        import pyarrow as pa
 
         values = [1.0] * 3
         pdf = pd.DataFrame({'A': values})
@@ -209,15 +207,14 @@ def test_pandas_udf_detect_unsafe_type_conversion(self):
 
         @pandas_udf(returnType="int")
         def udf(column):
-            return pd.Series(np.linspace(0, 1, 3))
+            return pd.Series(np.linspace(0, 1, len(column)))
 
         # Since 0.11.0, PyArrow supports the feature to raise an error for unsafe cast.
-        if LooseVersion(pa.__version__) >= LooseVersion("0.11.0"):
-            with self.sql_conf({
-                    "spark.sql.execution.pandas.arrowSafeTypeConversion": True}):
-                with self.assertRaisesRegexp(Exception,
-                                             "Exception thrown when converting pandas.Series"):
-                    df.select(['A']).withColumn('udf', udf('A')).collect()
+        with self.sql_conf({
+                "spark.sql.execution.pandas.arrowSafeTypeConversion": True}):
+            with self.assertRaisesRegexp(Exception,
+                                         "Exception thrown when converting pandas.Series"):
+                df.select(['A']).withColumn('udf', udf('A')).collect()
 
         # Disabling Arrow safe type check.
         with self.sql_conf({
@@ -225,35 +222,24 @@ def udf(column):
             df.select(['A']).withColumn('udf', udf('A')).collect()
 
     def test_pandas_udf_arrow_overflow(self):
-        from distutils.version import LooseVersion
         import pandas as pd
-        import pyarrow as pa
 
         df = self.spark.range(0, 1)
 
         @pandas_udf(returnType="byte")
         def udf(column):
-            return pd.Series([128])
-
-        # Arrow 0.11.0+ allows enabling or disabling safe type check.
-        if LooseVersion(pa.__version__) >= LooseVersion("0.11.0"):
-            # When enabling safe type check, Arrow 0.11.0+ disallows overflow cast.
-            with self.sql_conf({
-                    "spark.sql.execution.pandas.arrowSafeTypeConversion": True}):
-                with self.assertRaisesRegexp(Exception,
-                                             "Exception thrown when converting pandas.Series"):
-                    df.withColumn('udf', udf('id')).collect()
-
-            # Disabling safe type check, let Arrow do the cast anyway.
-            with self.sql_conf({"spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+            return pd.Series([128] * len(column))
+
+        # When enabling safe type check, Arrow 0.11.0+ disallows overflow cast.
+        with self.sql_conf({
+                "spark.sql.execution.pandas.arrowSafeTypeConversion": True}):
+            with self.assertRaisesRegexp(Exception,
+                                         "Exception thrown when converting pandas.Series"):
                 df.withColumn('udf', udf('id')).collect()
-        else:
-            # SQL config `arrowSafeTypeConversion` no matters for older Arrow.
-            # Overflow cast causes an error.
-            with self.sql_conf({"spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
-                with self.assertRaisesRegexp(Exception,
-                                             "Integer value out of bounds"):
-                    df.withColumn('udf', udf('id')).collect()
+
+        # Disabling safe type check, let Arrow do the cast anyway.
+        with self.sql_conf({"spark.sql.execution.pandas.arrowSafeTypeConversion": False}):
+            df.withColumn('udf', udf('id')).collect()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
index c8bad99a7705..1d87c636ab34 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
@@ -21,7 +21,6 @@
 
 from collections import OrderedDict
 from decimal import Decimal
-from distutils.version import LooseVersion
 
 from pyspark.sql import Row
 from pyspark.sql.functions import array, explode, col, lit, udf, sum, pandas_udf, PandasUDFType
@@ -65,20 +64,17 @@ def test_supported_types(self):
             1, 2, 3,
             4, 5, 1.1,
             2.2, Decimal(1.123),
-            [1, 2, 2], True, 'hello'
+            [1, 2, 2], True, 'hello',
+            bytearray([0x01, 0x02])
         ]
         output_fields = [
             ('id', IntegerType()), ('byte', ByteType()), ('short', ShortType()),
             ('int', IntegerType()), ('long', LongType()), ('float', FloatType()),
             ('double', DoubleType()), ('decim', DecimalType(10, 3)),
-            ('array', ArrayType(IntegerType())), ('bool', BooleanType()), ('str', StringType())
+            ('array', ArrayType(IntegerType())), ('bool', BooleanType()), ('str', StringType()),
+            ('bin', BinaryType())
         ]
 
-        # TODO: Add BinaryType to variables above once minimum pyarrow version is 0.10.0
-        if LooseVersion(pa.__version__) >= LooseVersion("0.10.0"):
-            values.append(bytearray([0x01, 0x02]))
-            output_fields.append(('bin', BinaryType()))
-
         output_schema = StructType([StructField(*x) for x in output_fields])
         df = self.spark.createDataFrame([values], schema=output_schema)
 
@@ -95,6 +91,7 @@ def test_supported_types(self):
                 bool=False if pdf.bool else True,
                 str=pdf.str + 'there',
                 array=pdf.array,
+                bin=pdf.bin
             ),
             output_schema,
             PandasUDFType.GROUPED_MAP
@@ -112,6 +109,7 @@ def test_supported_types(self):
                 bool=False if pdf.bool else True,
                 str=pdf.str + 'there',
                 array=pdf.array,
+                bin=pdf.bin
             ),
             output_schema,
             PandasUDFType.GROUPED_MAP
@@ -130,6 +128,7 @@ def test_supported_types(self):
                 bool=False if pdf.bool else True,
                 str=pdf.str + 'there',
                 array=pdf.array,
+                bin=pdf.bin
             ),
             output_schema,
             PandasUDFType.GROUPED_MAP
@@ -291,10 +290,6 @@ def test_unsupported_types(self):
             StructField('struct', StructType([StructField('l', LongType())])),
         ]
 
-        # TODO: Remove this if-statement once minimum pyarrow version is 0.10.0
-        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            unsupported_types.append(StructField('bin', BinaryType()))
-
         for unsupported_type in unsupported_types:
             schema = StructType([StructField('id', LongType(), True), unsupported_type])
             with QuietTest(self.sc):
@@ -466,13 +461,8 @@ def invalid_positional_types(pdf):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(Exception, "KeyError: 'id'"):
                 grouped_df.apply(column_name_typo).collect()
-            if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
-                # TODO: see ARROW-1949. Remove when the minimum PyArrow version becomes 0.11.0.
-                with self.assertRaisesRegexp(Exception, "No cast implemented"):
-                    grouped_df.apply(invalid_positional_types).collect()
-            else:
-                with self.assertRaisesRegexp(Exception, "an integer is required"):
-                    grouped_df.apply(invalid_positional_types).collect()
+            with self.assertRaisesRegexp(Exception, "an integer is required"):
+                grouped_df.apply(invalid_positional_types).collect()
 
     def test_positional_assignment_conf(self):
         with self.sql_conf({
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index ebba074fec10..b219624e5801 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -28,7 +28,6 @@
 
 from datetime import date, datetime
 from decimal import Decimal
-from distutils.version import LooseVersion
 
 from pyspark.rdd import PythonEvalType
 from pyspark.sql import Column
@@ -240,19 +239,12 @@ def test_vectorized_udf_datatype_string(self):
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_binary(self):
-        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            with QuietTest(self.sc):
-                with self.assertRaisesRegexp(
-                        NotImplementedError,
-                        'Invalid returnType.*scalar Pandas UDF.*BinaryType'):
-                    pandas_udf(lambda x: x, BinaryType())
-        else:
-            data = [(bytearray(b"a"),), (None,), (bytearray(b"bb"),), (bytearray(b"ccc"),)]
-            schema = StructType().add("binary", BinaryType())
-            df = self.spark.createDataFrame(data, schema)
-            str_f = pandas_udf(lambda x: x, BinaryType())
-            res = df.select(str_f(col('binary')))
-            self.assertEquals(df.collect(), res.collect())
+        data = [(bytearray(b"a"),), (None,), (bytearray(b"bb"),), (bytearray(b"ccc"),)]
+        schema = StructType().add("binary", BinaryType())
+        df = self.spark.createDataFrame(data, schema)
+        str_f = pandas_udf(lambda x: x, BinaryType())
+        res = df.select(str_f(col('binary')))
+        self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_array_type(self):
         data = [([1, 2],), ([3, 4],)]
@@ -293,15 +285,7 @@ def func(id):
 
         struct_f = pandas_udf(lambda x: x, return_type)
         actual = df.select(struct_f(struct(col('id'), col('id').cast('string').alias('str'))))
-        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            with QuietTest(self.sc):
-                from py4j.protocol import Py4JJavaError
-                with self.assertRaisesRegexp(
-                        Py4JJavaError,
-                        'Unsupported type in conversion from Arrow'):
-                    self.assertEqual(expected, actual.collect())
-        else:
-            self.assertEqual(expected, actual.collect())
+        self.assertEqual(expected, actual.collect())
 
     def test_vectorized_udf_struct_complex(self):
         df = self.spark.range(10)
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 21595e5d63bf..72c437a499a9 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1581,7 +1581,6 @@ def convert(self, obj, gateway_client):
 def to_arrow_type(dt):
     """ Convert Spark data type to pyarrow type
     """
-    from distutils.version import LooseVersion
     import pyarrow as pa
     if type(dt) == BooleanType:
         arrow_type = pa.bool_()
@@ -1602,10 +1601,6 @@ def to_arrow_type(dt):
     elif type(dt) == StringType:
         arrow_type = pa.string()
     elif type(dt) == BinaryType:
-        # TODO: remove version check once minimum pyarrow version is 0.10.0
-        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
-                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
         arrow_type = pa.binary()
     elif type(dt) == DateType:
         arrow_type = pa.date32()
@@ -1639,8 +1634,6 @@ def to_arrow_schema(schema):
 def from_arrow_type(at):
     """ Convert pyarrow type to Spark data type.
     """
-    from distutils.version import LooseVersion
-    import pyarrow as pa
     import pyarrow.types as types
     if types.is_boolean(at):
         spark_type = BooleanType()
@@ -1661,10 +1654,6 @@ def from_arrow_type(at):
     elif types.is_string(at):
         spark_type = StringType()
     elif types.is_binary(at):
-        # TODO: remove version check once minimum pyarrow version is 0.10.0
-        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            raise TypeError("Unsupported type in conversion from Arrow: " + str(at) +
-                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
         spark_type = BinaryType()
     elif types.is_date32(at):
         spark_type = DateType()
@@ -1675,10 +1664,6 @@ def from_arrow_type(at):
             raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
         spark_type = ArrayType(from_arrow_type(at.value_type))
     elif types.is_struct(at):
-        # TODO: remove version check once minimum pyarrow version is 0.10.0
-        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
-            raise TypeError("Unsupported type in conversion from Arrow: " + str(at) +
-                            "\nPlease install pyarrow >= 0.10.0 for StructType support.")
         if any(types.is_struct(field.type) for field in at):
             raise TypeError("Nested StructType not supported in conversion from Arrow: " + str(at))
         return StructType(
@@ -1697,54 +1682,6 @@ def from_arrow_schema(arrow_schema):
          for field in arrow_schema])
 
 
-def _arrow_column_to_pandas(column, data_type):
-    """ Convert Arrow Column to pandas Series.
-
-    :param series: pyarrow.lib.Column
-    :param data_type: a Spark data type for the column
-    """
-    import pandas as pd
-    import pyarrow as pa
-    from distutils.version import LooseVersion
-    # If the given column is a date type column, creates a series of datetime.date directly instead
-    # of creating datetime64[ns] as intermediate data to avoid overflow caused by datetime64[ns]
-    # type handling.
-    if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
-        if type(data_type) == DateType:
-            return pd.Series(column.to_pylist(), name=column.name)
-        else:
-            return column.to_pandas()
-    else:
-        # Since Arrow 0.11.0, support date_as_object to return datetime.date instead of
-        # np.datetime64.
-        return column.to_pandas(date_as_object=True)
-
-
-def _arrow_table_to_pandas(table, schema):
-    """ Convert Arrow Table to pandas DataFrame.
-
-    Pandas DataFrame created from PyArrow uses datetime64[ns] for date type values, but we should
-    use datetime.date to match the behavior with when Arrow optimization is disabled.
-
-    :param table: pyarrow.lib.Table
-    :param schema: a Spark schema of the pyarrow.lib.Table
-    """
-    import pandas as pd
-    import pyarrow as pa
-    from distutils.version import LooseVersion
-    # If the given table contains a date type column, use `_arrow_column_to_pandas` for pyarrow<0.11
-    # or use `date_as_object` option for pyarrow>=0.11 to avoid creating datetime64[ns] as
-    # intermediate data.
-    if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
-        if any(type(field.dataType) == DateType for field in schema):
-            return pd.concat([_arrow_column_to_pandas(column, field.dataType)
-                              for column, field in zip(table.itercolumns(), schema)], axis=1)
-        else:
-            return table.to_pandas()
-    else:
-        return table.to_pandas(date_as_object=True)
-
-
 def _get_local_timezone():
     """ Get local timezone using pytz with environment variable, or dateutil.
 
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index bdb3a1467f1d..709d3a064261 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -136,7 +136,7 @@ def require_minimum_pyarrow_version():
     """ Raise ImportError if minimum version of pyarrow is not installed
     """
     # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
-    minimum_pyarrow_version = "0.8.0"
+    minimum_pyarrow_version = "0.12.1"
 
     from distutils.version import LooseVersion
     try:
diff --git a/python/setup.py b/python/setup.py
index 3c129c9251b1..e769bf52e7eb 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -102,10 +102,11 @@ def _supports_symlinks():
               file=sys.stderr)
         sys.exit(-1)
 
-# If you are changing the versions here, please also change ./python/pyspark/sql/utils.py and
-# ./python/run-tests.py. In case of Arrow, you should also check ./pom.xml.
+# If you are changing the versions here, please also change ./python/pyspark/sql/utils.py
+# For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the
+# binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
 _minimum_pandas_version = "0.19.2"
-_minimum_pyarrow_version = "0.8.0"
+_minimum_pyarrow_version = "0.12.1"
 
 try:
     # We copy the shell script to be under pyspark/python/pyspark so that the launcher scripts

From 79d3bc0409ccf978126d9c2bc0740ace196fe55b Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Mon, 22 Apr 2019 19:41:32 +0800
Subject: [PATCH 0944/1072] [SPARK-27438][SQL] Parse strings with timestamps by
 to_timestamp() in microsecond precision

## What changes were proposed in this pull request?

In the PR, I propose to parse strings to timestamps in microsecond precision by the ` to_timestamp()` function if the specified pattern contains a sub-pattern for seconds fractions.

Closes #24342

## How was this patch tested?

By `DateFunctionsSuite` and `DateExpressionsSuite`

Closes #24420 from MaxGekk/to_timestamp-microseconds3.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/datetimeExpressions.scala     | 46 ++++++++++++++-----
 .../benchmarks/DateTimeBenchmark-results.txt  | 20 ++++----
 .../apache/spark/sql/DateFunctionsSuite.scala | 15 +++++-
 3 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 9a6e6c73001a..1e6a3aaf64bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -626,9 +626,13 @@ case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Op
   override def prettyName: String = "unix_timestamp"
 }
 
-abstract class UnixTime
+abstract class ToTimestamp
   extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes {
 
+  // The result of the conversion to timestamp is microseconds divided by this factor.
+  // For example if the factor is 1000000, the result of the expression is in seconds.
+  protected def downScaleFactor: Long
+
   override def inputTypes: Seq[AbstractDataType] =
     Seq(TypeCollection(StringType, DateType, TimestampType), StringType)
 
@@ -650,16 +654,16 @@ abstract class UnixTime
     } else {
       left.dataType match {
         case DateType =>
-          DateTimeUtils.daysToMillis(t.asInstanceOf[Int], timeZone) / MILLIS_PER_SECOND
+          epochDaysToMicros(t.asInstanceOf[Int], zoneId) / downScaleFactor
         case TimestampType =>
-          t.asInstanceOf[Long] / MICROS_PER_SECOND
+          t.asInstanceOf[Long] / downScaleFactor
         case StringType if right.foldable =>
           if (constFormat == null || formatter == null) {
             null
           } else {
             try {
               formatter.parse(
-                t.asInstanceOf[UTF8String].toString) / MICROS_PER_SECOND
+                t.asInstanceOf[UTF8String].toString) / downScaleFactor
             } catch {
               case NonFatal(_) => null
             }
@@ -672,7 +676,7 @@ abstract class UnixTime
             val formatString = f.asInstanceOf[UTF8String].toString
             try {
               TimestampFormatter(formatString, zoneId).parse(
-                t.asInstanceOf[UTF8String].toString) / MICROS_PER_SECOND
+                t.asInstanceOf[UTF8String].toString) / downScaleFactor
             } catch {
               case NonFatal(_) => null
             }
@@ -697,7 +701,7 @@ abstract class UnixTime
             $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
             if (!${ev.isNull}) {
               try {
-                ${ev.value} = $formatterName.parse(${eval1.value}.toString()) / $MICROS_PER_SECOND;
+                ${ev.value} = $formatterName.parse(${eval1.value}.toString()) / $downScaleFactor;
               } catch (java.lang.IllegalArgumentException e) {
                 ${ev.isNull} = true;
               } catch (java.text.ParseException e) {
@@ -717,7 +721,7 @@ abstract class UnixTime
           s"""
             try {
               ${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
-                .parse($string.toString()) / $MICROS_PER_SECOND;
+                .parse($string.toString()) / $downScaleFactor;
             } catch (java.lang.IllegalArgumentException e) {
               ${ev.isNull} = true;
             } catch (java.text.ParseException e) {
@@ -736,10 +740,10 @@ abstract class UnixTime
           boolean ${ev.isNull} = ${eval1.isNull};
           $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
           if (!${ev.isNull}) {
-            ${ev.value} = ${eval1.value} / $MICROS_PER_SECOND;
+            ${ev.value} = ${eval1.value} / $downScaleFactor;
           }""")
       case DateType =>
-        val tz = ctx.addReferenceObj("timeZone", timeZone)
+        val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
         val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
         val eval1 = left.genCode(ctx)
         ev.copy(code = code"""
@@ -747,12 +751,16 @@ abstract class UnixTime
           boolean ${ev.isNull} = ${eval1.isNull};
           $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
           if (!${ev.isNull}) {
-            ${ev.value} = $dtu.daysToMillis(${eval1.value}, $tz) / $MILLIS_PER_SECOND;
+            ${ev.value} = $dtu.epochDaysToMicros(${eval1.value}, $zid) / $downScaleFactor;
           }""")
     }
   }
 }
 
+abstract class UnixTime extends ToTimestamp {
+  override val downScaleFactor: Long = MICROS_PER_SECOND
+}
+
 /**
  * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
  * representing the timestamp of that moment in the current system time zone in the given
@@ -1357,7 +1365,7 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child:
   extends RuntimeReplaceable {
 
   def this(left: Expression, format: Expression) = {
-    this(left, Option(format), Cast(UnixTimestamp(left, format), TimestampType))
+    this(left, Option(format), GetTimestamp(left, format))
   }
 
   def this(left: Expression) = this(left, None, Cast(left, TimestampType))
@@ -1581,3 +1589,19 @@ case class DateDiff(endDate: Expression, startDate: Expression)
     defineCodeGen(ctx, ev, (end, start) => s"$end - $start")
   }
 }
+
+/**
+ * Gets timestamps from strings using given pattern.
+ */
+private case class GetTimestamp(
+    left: Expression,
+    right: Expression,
+    timeZoneId: Option[String] = None)
+  extends ToTimestamp {
+
+  override val downScaleFactor = 1
+  override def dataType: DataType = TimestampType
+
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+}
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index d9947526d0e2..1a58b05a2abb 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -385,19 +385,19 @@ to timestamp str:                        Best/Avg Time(ms)    Rate(M/s)   Per Ro
 to timestamp str wholestage off                165 /  166          6.1         164.7       1.0X
 to timestamp str wholestage on                 160 /  163          6.2         160.5       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to_timestamp:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                   1316 / 1320          0.8        1315.7       1.0X
-to_timestamp wholestage on                    1288 / 1294          0.8        1287.5       1.0X
+to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_timestamp wholestage off                        1308           1353          64          0.8        1307.9       1.0X
+to_timestamp wholestage on                         1197           1230          21          0.8        1197.0       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to_unix_timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off              1295 / 1297          0.8        1295.1       1.0X
-to_unix_timestamp wholestage on               1409 / 1414          0.7        1409.2       0.9X
+to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_unix_timestamp wholestage off                   1221           1224           4          0.8        1221.0       1.0X
+to_unix_timestamp wholestage on                    1224           1228           4          0.8        1223.8       1.0X
 
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 29cef693ea83..3f91b91850e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
+import java.time.Instant
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
@@ -638,6 +639,8 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
     val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
     val s1 = "2015/07/24 10:00:00.5"
     val s2 = "2015/07/25 02:02:02.6"
+    val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
+    val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
     val ss1 = "2015-07-24 10:00:00"
     val ss2 = "2015-07-25 02:02:02"
     val fmt = "yyyy/MM/dd HH:mm:ss.S"
@@ -648,7 +651,7 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
     checkAnswer(df.select(to_timestamp(col("ss"))), Seq(
       Row(ts1), Row(ts2)))
     checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(
-      Row(ts1), Row(ts2)))
+      Row(ts1m), Row(ts2m)))
     checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(
       Row(ts1), Row(ts2)))
     checkAnswer(df.select(to_timestamp(col("d"), "yyyy-MM-dd")), Seq(
@@ -751,4 +754,14 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
           Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
     }
   }
+
+
+  test("to_timestamp with microseconds precision") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
+      val timestamp = "1970-01-01T00:00:00.123456Z"
+      val df = Seq(timestamp).toDF("t")
+      checkAnswer(df.select(to_timestamp($"t", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSX")),
+        Seq(Row(Instant.parse(timestamp))))
+    }
+  }
 }

From 5172190da19a2750e00e1eac00cebfbca1f3c173 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Mon, 22 Apr 2019 11:05:31 -0700
Subject: [PATCH 0945/1072] [SPARK-27392][SQL] TestHive test tables should be
 placed in shared test state, not per session

## What changes were proposed in this pull request?

Otherwise, tests that use tables from multiple sessions will run into issues if they access the same table. The correct location is in shared state.

A couple other minor test improvements.

cc gatorsmile srinathshankar

## How was this patch tested?

Existing unit tests.

Closes #24302 from ericl/test-conflicts.

Lead-authored-by: Eric Liang <ekl@databricks.com>
Co-authored-by: Eric Liang <ekhliang@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../mllib/regression/JavaRidgeRegressionSuite.java |  7 +++++--
 .../org/apache/spark/sql/hive/test/TestHive.scala  | 14 ++++++++------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
index cb0097741234..fb6c775a4961 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.regression;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
 
@@ -55,7 +56,8 @@ public void runRidgeRegressionUsingConstructor() {
     int numFeatures = 20;
     List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0);
 
-    JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples));
+    JavaRDD<LabeledPoint> testRDD = jsc.parallelize(
+      new ArrayList<LabeledPoint>(data.subList(0, numExamples)));
     List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);
 
     RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD();
@@ -79,7 +81,8 @@ public void runRidgeRegressionUsingStaticMethods() {
     int numFeatures = 20;
     List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0);
 
-    JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples));
+    JavaRDD<LabeledPoint> testRDD = jsc.parallelize(
+      new ArrayList<LabeledPoint>(data.subList(0, numExamples)));
     List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);
 
     RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0);
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 151580750f86..e8a749f21372 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -92,6 +92,10 @@ private[hive] class TestHiveSharedState(
     hiveClient: Option[HiveClient] = None)
   extends SharedState(sc, initialConfigs = Map.empty[String, String]) {
 
+  // The set of loaded tables should be kept in shared state, since there may be multiple sessions
+  // created that want to use the same tables.
+  val loadedTables = new collection.mutable.HashSet[String]
+
   override lazy val externalCatalog: ExternalCatalogWithListener = {
     new ExternalCatalogWithListener(new TestHiveExternalCatalog(
       sc.conf,
@@ -491,14 +495,12 @@ private[hive] class TestHiveSparkSession(
     hiveQTestUtilTables.foreach(registerTestTable)
   }
 
-  private val loadedTables = new collection.mutable.HashSet[String]
-
-  def getLoadedTables: collection.mutable.HashSet[String] = loadedTables
+  def getLoadedTables: collection.mutable.HashSet[String] = sharedState.loadedTables
 
   def loadTestTable(name: String) {
-    if (!(loadedTables contains name)) {
+    if (!sharedState.loadedTables.contains(name)) {
       // Marks the table as loaded first to prevent infinite mutually recursive table loading.
-      loadedTables += name
+      sharedState.loadedTables += name
       logDebug(s"Loading test table $name")
       val createCmds =
         testTables.get(name).map(_.commands).getOrElse(sys.error(s"Unknown test table $name"))
@@ -545,7 +547,7 @@ private[hive] class TestHiveSparkSession(
       warehouseDir.mkdir()
 
       sharedState.cacheManager.clearCache()
-      loadedTables.clear()
+      sharedState.loadedTables.clear()
       sessionState.catalog.reset()
       metadataHive.reset()
 

From 3240e52dc792fe434c2d29ee2eefc63fba631dd8 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Mon, 22 Apr 2019 13:02:10 -0700
Subject: [PATCH 0946/1072] [SPARK-27531][SQL] Improve `EXPLAIN DESC TABLE` to
 show the input parameters of the command.

## What changes were proposed in this pull request?
Currently "EXPLAIN DESC TABLE" is special cased and outputs a single row relation as following.
Current output:
```sql
spark-sql> EXPLAIN DESCRIBE TABLE t;
== Physical Plan ==
*(1) Scan OneRowRelation[]
```
This is not consistent with how we handle explain processing for other commands. In this PR, the inconsistency is handled by removing the special handling for "describe table".

After change:
```sql
spark-sql> EXPLAIN DESC EXTENDED t
== Physical Plan ==
Execute DescribeTableCommand
   +- DescribeTableCommand `t`, true
```
## How was this patch tested?
Added new tests in SQLQueryTestSuite.

Closes #24427 from dilipbiswal/describe_table_explain2.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/SparkSqlParser.scala  | 12 +--
 .../resources/sql-tests/inputs/describe.sql   |  7 ++
 .../sql-tests/results/describe.sql.out        | 82 ++++++++++++++++---
 3 files changed, 79 insertions(+), 22 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index bfb50be954f2..96af8b34b8bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -318,25 +318,15 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     val statement = plan(ctx.statement)
     if (statement == null) {
       null  // This is enough since ParseException will raise later.
-    } else if (isExplainableStatement(statement)) {
+    } else {
       ExplainCommand(
         logicalPlan = statement,
         extended = ctx.EXTENDED != null,
         codegen = ctx.CODEGEN != null,
         cost = ctx.COST != null)
-    } else {
-      ExplainCommand(OneRowRelation())
     }
   }
 
-  /**
-   * Determine if a plan should be explained at all.
-   */
-  protected def isExplainableStatement(plan: LogicalPlan): Boolean = plan match {
-    case _: DescribeTableCommand => false
-    case _ => true
-  }
-
   /**
    * Create a [[DescribeColumnCommand]] or [[DescribeTableCommand]] logical commands.
    */
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index f26d5efec076..a0ee9322372b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -80,6 +80,13 @@ DESC EXTENDED v;
 -- AnalysisException DESC PARTITION is not allowed on a view
 DESC v PARTITION (c='Us', d=1);
 
+-- Explain Describe Table
+EXPLAIN DESC t;
+EXPLAIN DESC EXTENDED t;
+EXPLAIN EXTENDED DESC t;
+EXPLAIN DESCRIBE t b;
+EXPLAIN DESCRIBE t PARTITION (c='Us', d=2);
+
 -- DROP TEST TABLES/VIEWS
 DROP TABLE t;
 
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 9c4b70d1b1ab..46d9ec30a8a7 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 36
+-- Number of queries: 41
 
 
 -- !query 0
@@ -514,32 +514,92 @@ DESC PARTITION is not allowed on a view: v;
 
 
 -- !query 32
-DROP TABLE t
+EXPLAIN DESC t
 -- !query 32 schema
-struct<>
+struct<plan:string>
 -- !query 32 output
-
+== Physical Plan ==
+Execute DescribeTableCommand
+   +- DescribeTableCommand `t`, false
 
 
 -- !query 33
-DROP VIEW temp_v
+EXPLAIN DESC EXTENDED t
 -- !query 33 schema
-struct<>
+struct<plan:string>
 -- !query 33 output
-
+== Physical Plan ==
+Execute DescribeTableCommand
+   +- DescribeTableCommand `t`, true
 
 
 -- !query 34
-DROP VIEW temp_Data_Source_View
+EXPLAIN EXTENDED DESC t
 -- !query 34 schema
-struct<>
+struct<plan:string>
 -- !query 34 output
+== Parsed Logical Plan ==
+DescribeTableCommand `t`, false
+
+== Analyzed Logical Plan ==
+col_name: string, data_type: string, comment: string
+DescribeTableCommand `t`, false
 
+== Optimized Logical Plan ==
+DescribeTableCommand `t`, false
+
+== Physical Plan ==
+Execute DescribeTableCommand
+   +- DescribeTableCommand `t`, false
 
 
 -- !query 35
-DROP VIEW v
+EXPLAIN DESCRIBE t b
 -- !query 35 schema
-struct<>
+struct<plan:string>
 -- !query 35 output
+== Physical Plan ==
+Execute DescribeColumnCommand
+   +- DescribeColumnCommand `t`, [b], false
+
+
+-- !query 36
+EXPLAIN DESCRIBE t PARTITION (c='Us', d=2)
+-- !query 36 schema
+struct<plan:string>
+-- !query 36 output
+== Physical Plan ==
+Execute DescribeTableCommand
+   +- DescribeTableCommand `t`, Map(c -> Us, d -> 2), false
+
+
+-- !query 37
+DROP TABLE t
+-- !query 37 schema
+struct<>
+-- !query 37 output
+
+
+
+-- !query 38
+DROP VIEW temp_v
+-- !query 38 schema
+struct<>
+-- !query 38 output
+
+
+
+-- !query 39
+DROP VIEW temp_Data_Source_View
+-- !query 39 schema
+struct<>
+-- !query 39 output
+
+
+
+-- !query 40
+DROP VIEW v
+-- !query 40 schema
+struct<>
+-- !query 40 output
 

From 43a73e387cb843486adcf5b8bbd8b99010ce6e02 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 23 Apr 2019 11:06:39 +0900
Subject: [PATCH 0947/1072] [SPARK-27528][SQL] Use Parquet logical type
 TIMESTAMP_MICROS by default

## What changes were proposed in this pull request?

In the PR, I propose to use the `TIMESTAMP_MICROS` logical type for timestamps written to parquet files. The type matches semantically to Catalyst's `TimestampType`, and stores microseconds since epoch in UTC time zone. This will allow to avoid conversions of microseconds to nanoseconds and to Julian calendar. Also this will reduce sizes of written parquet files.

## How was this patch tested?

By existing test suites.

Closes #24425 from MaxGekk/parquet-timestamp_micros.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md                       | 2 ++
 .../scala/org/apache/spark/sql/internal/SQLConf.scala     | 2 +-
 .../parquet/ParquetInteroperabilitySuite.scala            | 8 ++++++--
 .../org/apache/spark/sql/internal/SQLConfSuite.scala      | 2 +-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 90a7d8d73425..54512ae21abf 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -124,6 +124,8 @@ license: |
 
   - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`.
 
+  - Since Spark 3.0, parquet logical type `TIMESTAMP_MICROS` is used by default while saving `TIMESTAMP` columns. In Spark version 2.4 and earlier, `TIMESTAMP` columns are saved as `INT96` in parquet files. To set `INT96` to `spark.sql.parquet.outputTimestampType` restores the previous behavior.
+
 ## Upgrading from Spark SQL 2.4 to 2.4.1
 
   - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b223a481cb52..9ebd2c084889 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -405,7 +405,7 @@ object SQLConf {
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .checkValues(ParquetOutputTimestampType.values.map(_.toString))
-    .createWithDefault(ParquetOutputTimestampType.INT96.toString)
+    .createWithDefault(ParquetOutputTimestampType.TIMESTAMP_MICROS.toString)
 
   val PARQUET_INT64_AS_TIMESTAMP_MILLIS = buildConf("spark.sql.parquet.int64AsTimestampMillis")
     .doc(s"(Deprecated since Spark 2.3, please set ${PARQUET_OUTPUT_TIMESTAMP_TYPE.key}.) " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
index f06e1867151e..09793bdb5d16 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
@@ -120,8 +120,12 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS
       ).map { s => java.sql.Timestamp.valueOf(s) }
       import testImplicits._
       // match the column names of the file from impala
-      val df = spark.createDataset(ts).toDF().repartition(1).withColumnRenamed("value", "ts")
-      df.write.parquet(tableDir.getAbsolutePath)
+      withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key ->
+        SQLConf.ParquetOutputTimestampType.INT96.toString) {
+        val df = spark.createDataset(ts).toDF().repartition(1)
+          .withColumnRenamed("value", "ts")
+        df.write.parquet(tableDir.getAbsolutePath)
+      }
       FileUtils.copyFile(new File(impalaPath), new File(tableDir, "part-00001.parq"))
 
       Seq(false, true).foreach { int96TimestampConversion =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 5ecb79bcfcc2..829dea48a22b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -257,7 +257,7 @@ class SQLConfSuite extends QueryTest with SharedSQLContext {
 
     // check default value
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
-      SQLConf.ParquetOutputTimestampType.INT96)
+      SQLConf.ParquetOutputTimestampType.TIMESTAMP_MICROS)
 
     // PARQUET_INT64_AS_TIMESTAMP_MILLIS should be respected.
     spark.sessionState.conf.setConf(SQLConf.PARQUET_INT64_AS_TIMESTAMP_MILLIS, true)

From 55f26d809008d26e9727874128aee0a61dcfea00 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 23 Apr 2019 11:08:02 +0900
Subject: [PATCH 0948/1072] [SPARK-27533][SQL][TEST] Date and timestamp CSV
 benchmarks

## What changes were proposed in this pull request?

Added new CSV benchmarks related to date and timestamps operations:
- Write date/timestamp to CSV files
- `to_csv()` and `from_csv()` for dates and timestamps
- Read date/timestamps from CSV files, and infer schemas
- Parse and infer schemas from `Dataset[String]`

Also existing CSV benchmarks are ported on `NoOp` datasource.

Closes #24429 from MaxGekk/csv-timestamp-benchmark.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 sql/core/benchmarks/CSVBenchmark-results.txt  |  73 +++++--
 .../datasources/csv/CSVBenchmark.scala        | 201 ++++++++++++++++--
 2 files changed, 229 insertions(+), 45 deletions(-)

diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt
index 4fef15bb00f7..888c2ce9f284 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,29 +2,58 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-Parsing quoted values:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-One quoted string                           49754 / 50158          0.0      995072.2       1.0X
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+One quoted string                                 36998          37134         120          0.0      739953.1       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-Wide rows with 1000 columns:             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Select 1000 columns                       149402 / 151785          0.0      149401.9       1.0X
-Select 100 columns                          42986 / 43985          0.0       42986.1       3.5X
-Select one column                           33764 / 34057          0.0       33763.6       4.4X
-count()                                       9332 / 9508          0.1        9332.2      16.0X
-Select 100 columns, one bad input field     50963 / 51512          0.0       50962.5       2.9X
-Select 100 columns, corrupt record field    69627 / 71029          0.0       69627.5       2.1X
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 1000 columns                              140620         141162         737          0.0      140620.5       1.0X
+Select 100 columns                                35170          35287         183          0.0       35170.0       4.0X
+Select one column                                 27711          27927         187          0.0       27710.9       5.1X
+count()                                            7707           7804          84          0.1        7707.4      18.2X
+Select 100 columns, one bad input field           41762          41851         117          0.0       41761.8       3.4X
+Select 100 columns, corrupt record field          48717          48761          44          0.0       48717.4       2.9X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Linux 3.16.0-31-generic
-Intel(R) Xeon(R) CPU @ 2.50GHz
-Count a dataset with 10 columns:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Select 10 columns + count()                 22588 / 22623          0.4        2258.8       1.0X
-Select 1 column + count()                   14649 / 14690          0.7        1464.9       1.5X
-count()                                       3385 / 3453          3.0         338.5       6.7X
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 10 columns + count()                       16001          16053          53          0.6        1600.1       1.0X
+Select 1 column + count()                         11571          11614          58          0.9        1157.1       1.4X
+count()                                            4752           4766          18          2.1         475.2       3.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Create a dataset of timestamps                     1070           1072           2          9.3         107.0       1.0X
+to_csv(timestamp)                                 10446          10746         344          1.0        1044.6       0.1X
+write timestamps to files                          9573           9659         101          1.0         957.3       0.1X
+Create a dataset of dates                          1245           1260          17          8.0         124.5       0.9X
+to_csv(date)                                       7157           7167          11          1.4         715.7       0.1X
+write dates to files                               5415           5450          57          1.8         541.5       0.2X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                     1880           1887           8          5.3         188.0       1.0X
+read timestamps from files                        27135          27180          43          0.4        2713.5       0.1X
+infer timestamps from files                       51426          51534          97          0.2        5142.6       0.0X
+read date text from files                          1618           1622           4          6.2         161.8       1.2X
+read date from files                              20207          20218          13          0.5        2020.7       0.1X
+infer date from files                             19418          19479          94          0.5        1941.8       0.1X
+timestamp strings                                  2289           2300          13          4.4         228.9       0.8X
+parse timestamps from Dataset[String]             29367          29391          24          0.3        2936.7       0.1X
+infer timestamps from Dataset[String]             54782          54902         126          0.2        5478.2       0.0X
+date strings                                       2508           2524          16          4.0         250.8       0.7X
+parse dates from Dataset[String]                  21884          21902          19          0.5        2188.4       0.1X
+from_csv(timestamp)                               27188          27723         477          0.4        2718.8       0.1X
+from_csv(date)                                    21137          21191          84          0.5        2113.7       0.1X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
index 6e6fc4715f31..e41e81af508f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
@@ -16,10 +16,13 @@
  */
 package org.apache.spark.sql.execution.datasources.csv
 
+import java.io.File
+import java.time.{Instant, LocalDate}
+
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.{Column, Row}
+import org.apache.spark.sql.{Column, Dataset, Row}
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
-import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 
 /**
@@ -39,7 +42,9 @@ import org.apache.spark.sql.types._
 object CSVBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
-  def quotedValuesBenchmark(rowsNum: Int, numIters: Int): Unit = {
+  private def toNoop(ds: Dataset[_]): Unit = ds.write.format("noop").save()
+
+  private def quotedValuesBenchmark(rowsNum: Int, numIters: Int): Unit = {
     val benchmark = new Benchmark(s"Parsing quoted values", rowsNum, output = output)
 
     withTempPath { path =>
@@ -54,14 +59,14 @@ object CSVBenchmark extends SqlBasedBenchmark {
       val ds = spark.read.option("header", true).schema(schema).csv(path.getAbsolutePath)
 
       benchmark.addCase(s"One quoted string", numIters) { _ =>
-        ds.filter((_: Row) => true).count()
+        toNoop(ds)
       }
 
       benchmark.run()
     }
   }
 
-  def multiColumnsBenchmark(rowsNum: Int): Unit = {
+  private def multiColumnsBenchmark(rowsNum: Int, numIters: Int): Unit = {
     val colsNum = 1000
     val benchmark = new Benchmark(s"Wide rows with $colsNum columns", rowsNum, output = output)
 
@@ -78,25 +83,25 @@ object CSVBenchmark extends SqlBasedBenchmark {
 
       val ds = spark.read.schema(schema).csv(path.getAbsolutePath)
 
-      benchmark.addCase(s"Select $colsNum columns", 3) { _ =>
-        ds.select("*").filter((row: Row) => true).count()
+      benchmark.addCase(s"Select $colsNum columns", numIters) { _ =>
+        toNoop(ds.select("*"))
       }
       val cols100 = columnNames.take(100).map(Column(_))
-      benchmark.addCase(s"Select 100 columns", 3) { _ =>
-        ds.select(cols100: _*).filter((row: Row) => true).count()
+      benchmark.addCase(s"Select 100 columns", numIters) { _ =>
+        toNoop(ds.select(cols100: _*))
       }
-      benchmark.addCase(s"Select one column", 3) { _ =>
-        ds.select($"col1").filter((row: Row) => true).count()
+      benchmark.addCase(s"Select one column", numIters) { _ =>
+        toNoop(ds.select($"col1"))
       }
-      benchmark.addCase(s"count()", 3) { _ =>
+      benchmark.addCase(s"count()", numIters) { _ =>
         ds.count()
       }
 
       val schemaErr1 = StructType(StructField("col0", DateType) +:
         (1 until colsNum).map(i => StructField(s"col$i", IntegerType)))
       val dsErr1 = spark.read.schema(schemaErr1).csv(path.getAbsolutePath)
-      benchmark.addCase(s"Select 100 columns, one bad input field", 3) { _ =>
-        dsErr1.select(cols100: _*).filter((row: Row) => true).count()
+      benchmark.addCase(s"Select 100 columns, one bad input field", numIters) { _ =>
+        toNoop(dsErr1.select(cols100: _*))
       }
 
       val badRecColName = "badRecord"
@@ -104,15 +109,15 @@ object CSVBenchmark extends SqlBasedBenchmark {
       val dsErr2 = spark.read.schema(schemaErr2)
         .option("columnNameOfCorruptRecord", badRecColName)
         .csv(path.getAbsolutePath)
-      benchmark.addCase(s"Select 100 columns, corrupt record field", 3) { _ =>
-        dsErr2.select((Column(badRecColName) +: cols100): _*).filter((row: Row) => true).count()
+      benchmark.addCase(s"Select 100 columns, corrupt record field", numIters) { _ =>
+        toNoop(dsErr2.select((Column(badRecColName) +: cols100): _*))
       }
 
       benchmark.run()
     }
   }
 
-  def countBenchmark(rowsNum: Int): Unit = {
+  private def countBenchmark(rowsNum: Int, numIters: Int): Unit = {
     val colsNum = 10
     val benchmark =
       new Benchmark(s"Count a dataset with $colsNum columns", rowsNum, output = output)
@@ -128,13 +133,13 @@ object CSVBenchmark extends SqlBasedBenchmark {
 
       val ds = spark.read.schema(schema).csv(path.getAbsolutePath)
 
-      benchmark.addCase(s"Select $colsNum columns + count()", 3) { _ =>
+      benchmark.addCase(s"Select $colsNum columns + count()", numIters) { _ =>
         ds.select("*").filter((_: Row) => true).count()
       }
-      benchmark.addCase(s"Select 1 column + count()", 3) { _ =>
+      benchmark.addCase(s"Select 1 column + count()", numIters) { _ =>
         ds.select($"col1").filter((_: Row) => true).count()
       }
-      benchmark.addCase(s"count()", 3) { _ =>
+      benchmark.addCase(s"count()", numIters) { _ =>
         ds.count()
       }
 
@@ -142,11 +147,161 @@ object CSVBenchmark extends SqlBasedBenchmark {
     }
   }
 
+  private def datetimeBenchmark(rowsNum: Int, numIters: Int): Unit = {
+    def timestamps = {
+      spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+        iter.map(Instant.ofEpochSecond(_))
+      }.select($"value".as("timestamp"))
+    }
+
+    def dates = {
+      spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+        iter.map(d => LocalDate.ofEpochDay(d % (100 * 365)))
+      }.select($"value".as("date"))
+    }
+
+    withTempPath { path =>
+
+      val timestampDir = new File(path, "timestamp").getAbsolutePath
+      val dateDir = new File(path, "date").getAbsolutePath
+
+      val writeBench = new Benchmark("Write dates and timestamps", rowsNum, output = output)
+      writeBench.addCase(s"Create a dataset of timestamps", numIters) { _ =>
+        toNoop(timestamps)
+      }
+
+      writeBench.addCase("to_csv(timestamp)", numIters) { _ =>
+        toNoop(timestamps.select(to_csv(struct($"timestamp"))))
+      }
+
+      writeBench.addCase("write timestamps to files", numIters) { _ =>
+        timestamps.write.option("header", true).mode("overwrite").csv(timestampDir)
+      }
+
+      writeBench.addCase("Create a dataset of dates", numIters) { _ =>
+        toNoop(dates)
+      }
+
+      writeBench.addCase("to_csv(date)", numIters) { _ =>
+        toNoop(dates.select(to_csv(struct($"date"))))
+      }
+
+      writeBench.addCase("write dates to files", numIters) { _ =>
+        dates.write.option("header", true).mode("overwrite").csv(dateDir)
+      }
+
+      writeBench.run()
+
+      val readBench = new Benchmark("Read dates and timestamps", rowsNum, output = output)
+      val tsSchema = new StructType().add("timestamp", TimestampType)
+
+      readBench.addCase("read timestamp text from files", numIters) { _ =>
+        toNoop(spark.read.text(timestampDir))
+      }
+
+      readBench.addCase("read timestamps from files", numIters) { _ =>
+        val ds = spark.read
+          .option("header", true)
+          .schema(tsSchema)
+          .csv(timestampDir)
+        toNoop(ds)
+      }
+
+      readBench.addCase("infer timestamps from files", numIters) { _ =>
+        val ds = spark.read
+          .option("header", true)
+          .option("inferSchema", true)
+          .csv(timestampDir)
+        toNoop(ds)
+      }
+
+      val dateSchema = new StructType().add("date", DateType)
+
+      readBench.addCase("read date text from files", numIters) { _ =>
+        toNoop(spark.read.text(dateDir))
+      }
+
+      readBench.addCase("read date from files", numIters) { _ =>
+        val ds = spark.read
+          .option("header", true)
+          .schema(dateSchema)
+          .csv(dateDir)
+        toNoop(ds)
+      }
+
+      readBench.addCase("infer date from files", numIters) { _ =>
+        val ds = spark.read
+          .option("header", true)
+          .option("inferSchema", true)
+          .csv(dateDir)
+        toNoop(ds)
+      }
+
+      def timestampStr: Dataset[String] = {
+        spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+          iter.map(i => s"1970-01-01T01:02:03.${100 + i % 100}Z")
+        }.select($"value".as("timestamp")).as[String]
+      }
+
+      readBench.addCase("timestamp strings", numIters) { _ =>
+        toNoop(timestampStr)
+      }
+
+      readBench.addCase("parse timestamps from Dataset[String]", numIters) { _ =>
+        val ds = spark.read
+          .option("header", false)
+          .schema(tsSchema)
+          .csv(timestampStr)
+        toNoop(ds)
+      }
+
+      readBench.addCase("infer timestamps from Dataset[String]", numIters) { _ =>
+        val ds = spark.read
+          .option("header", false)
+          .option("inferSchema", true)
+          .csv(timestampStr)
+        toNoop(ds)
+      }
+
+      def dateStr: Dataset[String] = {
+        spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+          iter.map(i => LocalDate.ofEpochDay(i % 1000 * 365).toString)
+        }.select($"value".as("date")).as[String]
+      }
+
+      readBench.addCase("date strings", numIters) { _ =>
+        toNoop(dateStr)
+      }
+
+      readBench.addCase("parse dates from Dataset[String]", numIters) { _ =>
+        val ds = spark.read
+          .option("header", false)
+          .schema(dateSchema)
+          .csv(dateStr)
+        toNoop(ds)
+      }
+
+      readBench.addCase("from_csv(timestamp)", numIters) { _ =>
+        val ds = timestampStr.select(from_csv($"timestamp", tsSchema, Map.empty[String, String]))
+        toNoop(ds)
+      }
+
+      readBench.addCase("from_csv(date)", numIters) { _ =>
+        val ds = dateStr.select(from_csv($"date", dateSchema, Map.empty[String, String]))
+        toNoop(ds)
+      }
+
+      readBench.run()
+    }
+  }
+
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     runBenchmark("Benchmark to measure CSV read/write performance") {
-      quotedValuesBenchmark(rowsNum = 50 * 1000, numIters = 3)
-      multiColumnsBenchmark(rowsNum = 1000 * 1000)
-      countBenchmark(10 * 1000 * 1000)
+      val numIters = 3
+      quotedValuesBenchmark(rowsNum = 50 * 1000, numIters)
+      multiColumnsBenchmark(rowsNum = 1000 * 1000, numIters)
+      countBenchmark(rowsNum = 10 * 1000 * 1000, numIters)
+      datetimeBenchmark(rowsNum = 10 * 1000 * 1000, numIters)
     }
   }
 }

From 93a264d05a55c2617d34e977dbaf182987187a27 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Tue, 23 Apr 2019 11:09:14 +0900
Subject: [PATCH 0949/1072] [SPARK-27535][SQL][TEST] Date and timestamp JSON
 benchmarks

## What changes were proposed in this pull request?

Added new JSON benchmarks related to date and timestamps operations:
- Write date/timestamp to JSON files
- `to_json()` and `from_json()` for dates and timestamps
- Read date/timestamps from JSON files, and infer schemas
- Parse and infer schemas from `Dataset[String]`

Also existing JSON benchmarks are ported on `NoOp` datasource.

Closes #24430 from MaxGekk/json-datetime-benchmark.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 sql/core/benchmarks/JSONBenchmark-results.txt |  79 +++++++----
 .../datasources/json/JsonBenchmark.scala      | 126 +++++++++++++++++-
 2 files changed, 179 insertions(+), 26 deletions(-)

diff --git a/sql/core/benchmarks/JSONBenchmark-results.txt b/sql/core/benchmarks/JSONBenchmark-results.txt
index 2b784c330bbe..7846983b44fb 100644
--- a/sql/core/benchmarks/JSONBenchmark-results.txt
+++ b/sql/core/benchmarks/JSONBenchmark-results.txt
@@ -7,77 +7,106 @@ Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       51280          51722         420          2.0         512.8       1.0X
-UTF-8 is set                                      75009          77276        1963          1.3         750.1       0.7X
+No encoding                                       50949          51086         150          2.0         509.5       1.0X
+UTF-8 is set                                      72012          72147         120          1.4         720.1       0.7X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       39675          39738          83          2.5         396.7       1.0X
-UTF-8 is set                                      62755          64399        1436          1.6         627.5       0.6X
+No encoding                                       36799          36891          80          2.7         368.0       1.0X
+UTF-8 is set                                      59796          59880          74          1.7         598.0       0.6X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       56429          56468          65          0.2        5642.9       1.0X
-UTF-8 is set                                      81078          81454         374          0.1        8107.8       0.7X
+No encoding                                       55803          55967         152          0.2        5580.3       1.0X
+UTF-8 is set                                      80623          80825         178          0.1        8062.3       0.7X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       95329          95557         265          0.0      190658.2       1.0X
-UTF-8 is set                                     102827         102967         166          0.0      205654.2       0.9X
+No encoding                                       84263          85750        1476          0.0      168526.2       1.0X
+UTF-8 is set                                      98848         100183        1592          0.0      197696.0       0.9X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 14102          14136          52          0.7        1410.2       1.0X
-Select 1 column                                   17487          17537          51          0.6        1748.7       0.8X
+Select 10 columns                                 13930          13996          60          0.7        1393.0       1.0X
+Select 1 column                                   17092          17394         360          0.6        1709.2       0.8X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      6013           6066          70          1.7         601.3       1.0X
-Short column with UTF-8                            8031           8079          45          1.2         803.1       0.7X
-Wide column without encoding                     107093         108539         NaN          0.1       10709.3       0.1X
-Wide column with UTF-8                           130983         132518        1346          0.1       13098.3       0.0X
+Short column without encoding                      5596           5711         101          1.8         559.6       1.0X
+Short column with UTF-8                            7983           8158         160          1.3         798.3       0.7X
+Wide column without encoding                     110189         118451         NaN          0.1       11018.9       0.1X
+Wide column with UTF-8                           137827         142813         NaN          0.1       13782.7       0.0X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           939            950          11         10.6          93.9       1.0X
-from_json                                         12924          12944          26          0.8        1292.4       0.1X
-json_tuple                                        15312          15771         432          0.7        1531.2       0.1X
-get_json_object                                   13049          13475         714          0.8        1304.9       0.1X
+Text read                                           951            953           2         10.5          95.1       1.0X
+from_json                                         13015          13045          27          0.8        1301.5       0.1X
+json_tuple                                        16257          16306          43          0.6        1625.7       0.1X
+get_json_object                                   13195          13225          39          0.8        1319.5       0.1X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          4556           4630         108         11.0          91.1       1.0X
-schema inferring                                  23624          24338         626          2.1         472.5       0.2X
-parsing                                           22342          22420          81          2.2         446.8       0.2X
+Text read                                          4632           4687          49         10.8          92.6       1.0X
+schema inferring                                  29176          29297         146          1.7         583.5       0.2X
+parsing                                           24268          24457         175          2.1         485.4       0.2X
 
 Preparing data for benchmarking ...
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          7537           7556          26          6.6         150.7       1.0X
-Schema inferring                                  27875          28306         499          1.8         557.5       0.3X
-Parsing without charset                           26030          26083          67          1.9         520.6       0.3X
-Parsing with UTF-8                                37115          37480         392          1.3         742.3       0.2X
+Text read                                          8264           8272           7          6.1         165.3       1.0X
+Schema inferring                                  31910          32375         543          1.6         638.2       0.3X
+Parsing without charset                           29290          29397         124          1.7         585.8       0.3X
+Parsing with UTF-8                                41301          41390          81          1.2         826.0       0.2X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Create a dataset of timestamps                     1149           1160          11          8.7         114.9       1.0X
+to_json(timestamp)                                11585          11688         140          0.9        1158.5       0.1X
+write timestamps to files                         10212          10260          49          1.0        1021.2       0.1X
+Create a dataset of dates                          1322           1328          10          7.6         132.2       0.9X
+to_json(date)                                      7226           7241          14          1.4         722.6       0.2X
+write dates to files                               5634           5648          20          1.8         563.4       0.2X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                     2097           2137          41          4.8         209.7       1.0X
+read timestamps from files                        20438          20451          11          0.5        2043.8       0.1X
+infer timestamps from files                       41694          41770          66          0.2        4169.4       0.1X
+read date text from files                          1832           1847          16          5.5         183.2       1.1X
+read date from files                              13796          13837          49          0.7        1379.6       0.2X
+timestamp strings                                  3213           3233          26          3.1         321.3       0.7X
+parse timestamps from Dataset[String]             22686          22743          53          0.4        2268.6       0.1X
+infer timestamps from Dataset[String]             45301          45368          58          0.2        4530.1       0.0X
+date strings                                       3431           3439           7          2.9         343.1       0.6X
+parse dates from Dataset[String]                  17688          17734          41          0.6        1768.8       0.1X
+from_json(timestamp)                              33439          33456          24          0.3        3343.9       0.1X
+from_json(date)                                   24055          24164         107          0.4        2405.5       0.1X
+
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index f9e867bcc51b..f486e603e255 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -16,10 +16,13 @@
  */
 package org.apache.spark.sql.execution.datasources.json
 
+import java.io.File
+import java.time.{Instant, LocalDate}
+
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{Dataset, Row}
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
-import org.apache.spark.sql.functions.{from_json, get_json_object, json_tuple, lit}
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 
 /**
@@ -374,6 +377,126 @@ object JSONBenchmark extends SqlBasedBenchmark {
     }
   }
 
+  private def datetimeBenchmark(rowsNum: Int, numIters: Int): Unit = {
+    def timestamps = {
+      spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+        iter.map(Instant.ofEpochSecond(_))
+      }.select($"value".as("timestamp"))
+    }
+
+    def dates = {
+      spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+        iter.map(d => LocalDate.ofEpochDay(d % (100 * 365)))
+      }.select($"value".as("date"))
+    }
+
+    withTempPath { path =>
+
+      val timestampDir = new File(path, "timestamp").getAbsolutePath
+      val dateDir = new File(path, "date").getAbsolutePath
+
+      val writeBench = new Benchmark("Write dates and timestamps", rowsNum, output = output)
+      writeBench.addCase(s"Create a dataset of timestamps", numIters) { _ =>
+        run(timestamps)
+      }
+
+      writeBench.addCase("to_json(timestamp)", numIters) { _ =>
+        run(timestamps.select(to_json(struct($"timestamp"))))
+      }
+
+      writeBench.addCase("write timestamps to files", numIters) { _ =>
+        timestamps.write.option("header", true).mode("overwrite").json(timestampDir)
+      }
+
+      writeBench.addCase("Create a dataset of dates", numIters) { _ =>
+        run(dates)
+      }
+
+      writeBench.addCase("to_json(date)", numIters) { _ =>
+        run(dates.select(to_json(struct($"date"))))
+      }
+
+      writeBench.addCase("write dates to files", numIters) { _ =>
+        dates.write.option("header", true).mode("overwrite").json(dateDir)
+      }
+
+      writeBench.run()
+
+      val readBench = new Benchmark("Read dates and timestamps", rowsNum, output = output)
+      val tsSchema = new StructType().add("timestamp", TimestampType)
+
+      readBench.addCase("read timestamp text from files", numIters) { _ =>
+        run(spark.read.text(timestampDir))
+      }
+
+      readBench.addCase("read timestamps from files", numIters) { _ =>
+        run(spark.read.schema(tsSchema).json(timestampDir))
+      }
+
+      readBench.addCase("infer timestamps from files", numIters) { _ =>
+        run(spark.read.json(timestampDir))
+      }
+
+      val dateSchema = new StructType().add("date", DateType)
+
+      readBench.addCase("read date text from files", numIters) { _ =>
+        run(spark.read.text(dateDir))
+      }
+
+      readBench.addCase("read date from files", numIters) { _ =>
+        run(spark.read.schema(dateSchema).json(dateDir))
+      }
+
+      def timestampStr: Dataset[String] = {
+        spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+          iter.map(i => s"""{"timestamp":"1970-01-01T01:02:03.${100 + i % 100}Z"}""")
+        }.select($"value".as("timestamp")).as[String]
+      }
+
+      readBench.addCase("timestamp strings", numIters) { _ =>
+        run(timestampStr)
+      }
+
+      readBench.addCase("parse timestamps from Dataset[String]", numIters) { _ =>
+        run(spark.read.schema(tsSchema).json(timestampStr))
+      }
+
+      readBench.addCase("infer timestamps from Dataset[String]", numIters) { _ =>
+        run(spark.read.json(timestampStr))
+      }
+
+      def dateStr: Dataset[String] = {
+        spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+          iter.map(i => s"""{"date":"${LocalDate.ofEpochDay(i % 1000 * 365).toString}"}""")
+        }.select($"value".as("date")).as[String]
+      }
+
+      readBench.addCase("date strings", numIters) { _ =>
+        run(dateStr)
+      }
+
+      readBench.addCase("parse dates from Dataset[String]", numIters) { _ =>
+        val ds = spark.read
+          .option("header", false)
+          .schema(dateSchema)
+          .json(dateStr)
+        run(ds)
+      }
+
+      readBench.addCase("from_json(timestamp)", numIters) { _ =>
+        val ds = timestampStr.select(from_json($"timestamp", tsSchema, Map.empty[String, String]))
+        run(ds)
+      }
+
+      readBench.addCase("from_json(date)", numIters) { _ =>
+        val ds = dateStr.select(from_json($"date", dateSchema, Map.empty[String, String]))
+        run(ds)
+      }
+
+      readBench.run()
+    }
+  }
+
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     val numIters = 3
     runBenchmark("Benchmark for performance of JSON parsing") {
@@ -386,6 +509,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
       jsonFunctions(10 * 1000 * 1000, numIters)
       jsonInDS(50 * 1000 * 1000, numIters)
       jsonInFile(50 * 1000 * 1000, numIters)
+      datetimeBenchmark(rowsNum = 10 * 1000 * 1000, numIters)
     }
   }
 }

From d9b2ce0f0f71eb98ba556244ce50bdb57e566723 Mon Sep 17 00:00:00 2001
From: pengbo <bo.peng1019@gmail.com>
Date: Mon, 22 Apr 2019 20:30:08 -0700
Subject: [PATCH 0950/1072] [SPARK-27539][SQL] Fix inaccurate aggregate
 outputRows estimation with column containing null values

## What changes were proposed in this pull request?
This PR is follow up of https://github.com/apache/spark/pull/24286. As gatorsmile pointed out that column with null value is inaccurate as well.

```
> select key from test;
2
NULL
1
spark-sql> desc extended test key;
col_name key
data_type int
comment NULL
min 1
max 2
num_nulls 1
distinct_count 2
```

The distinct count should be distinct_count + 1 when column contains null value.
## How was this patch tested?

Existing tests & new UT added.

Closes #24436 from pengbo/aggregation_estimation.

Authored-by: pengbo <bo.peng1019@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../statsEstimation/AggregateEstimation.scala       |  4 ++--
 .../statsEstimation/AggregateEstimationSuite.scala  | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
index 1198d3fc53cb..ffe071ef25b0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
@@ -42,8 +42,8 @@ object AggregateEstimation {
         (res, expr) => {
           val columnStat = childStats.attributeStats(expr.asInstanceOf[Attribute])
           val distinctCount = columnStat.distinctCount.get
-          val distinctValue: BigInt = if (distinctCount == 0 && columnStat.nullCount.get > 0) {
-            1
+          val distinctValue: BigInt = if (columnStat.nullCount.get > 0) {
+            distinctCount + 1
           } else {
             distinctCount
           }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
index c24705043812..32bf20b8c17f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
@@ -40,7 +40,9 @@ class AggregateEstimationSuite extends StatsEstimationTestBase with PlanTest {
     attr("key31") -> ColumnStat(distinctCount = Some(0), min = None, max = None,
       nullCount = Some(0), avgLen = Some(4), maxLen = Some(4)),
     attr("key32") -> ColumnStat(distinctCount = Some(0), min = None, max = None,
-      nullCount = Some(4), avgLen = Some(4), maxLen = Some(4))
+      nullCount = Some(4), avgLen = Some(4), maxLen = Some(4)),
+    attr("key33") -> ColumnStat(distinctCount = Some(2), min = None, max = None,
+      nullCount = Some(2), avgLen = Some(4), maxLen = Some(4))
   ))
 
   private val nameToAttr: Map[String, Attribute] = columnInfo.map(kv => kv._1.name -> kv._1)
@@ -126,6 +128,15 @@ class AggregateEstimationSuite extends StatsEstimationTestBase with PlanTest {
       expectedOutputRowCount = nameToColInfo("key22")._2.distinctCount.get)
   }
 
+  test("group-by column with null value") {
+    checkAggStats(
+      tableColumns = Seq("key21", "key33"),
+      tableRowCount = 6,
+      groupByColumns = Seq("key21", "key33"),
+      expectedOutputRowCount = nameToColInfo("key21")._2.distinctCount.get *
+        (nameToColInfo("key33")._2.distinctCount.get + 1))
+  }
+
   test("non-cbo estimation") {
     val attributes = Seq("key12").map(nameToAttr)
     val child = StatsTestPlan(

From 7cc15af15632150c0abf7f9f3c43f7a54f9b6c98 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 23 Apr 2019 07:08:01 -0700
Subject: [PATCH 0951/1072] [SPARK-27481][BUILD] Upgrade commons-logging to
 1.1.3 for hadoop-3.2

## What changes were proposed in this pull request?

hadoop-2.7 gets `commons-logging` version from `hive-metastore`:
```
[INFO] +- org.spark-project.hive:hive-metastore:jar:1.2.1.spark2:compile
[INFO] |  +- com.jolbox:bonecp:jar:0.8.0.RELEASE:compile
[INFO] |  +- commons-cli:commons-cli:jar:1.2:compile
[INFO] |  +- commons-logging:commons-logging:jar:1.1.3:compile
```
But Hive removes `commons-logging` since [HIVE-12237(Hive 2.0.0)](https://issues.apache.org/jira/browse/HIVE-12237), so hadoop-3.2 gets `commons-logging` from `commons-httpclient`:
```
[INFO] +- commons-httpclient:commons-httpclient:jar:3.1:compile
[INFO] |  \- commons-logging:commons-logging:jar:1.0.4:compile
```
Thus. we may hint `LogConfigurationException`:
```
bin/spark-sql --conf spark.sql.hive.metastore.version=1.2.2 --conf spark.sql.hive.metastore.jars=file:///apache/hive-1.2.2-bin/lib/*
...
Caused by: org.apache.commons.logging.LogConfigurationException: Invalid class loader hierarchy. You have more than one version of 'org.apache.commons.logging.Log' visible, which is not allowed.
at org.apache.commons.logging.impl.LogFactoryImpl.getLogConstructor(LogFactoryImpl.java:385)
... 43 more
```

This pr upgrade `commons-logging` to 1.1.3 for hadoop-3.2 to fix this issue.

## How was this patch tested?

manual tests

Closes #24388 from wangyum/SPARK-27481.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 pom.xml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index 5879a76477dd..51539577085e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -474,6 +474,12 @@
         <artifactId>commons-beanutils</artifactId>
         <version>1.9.3</version>
       </dependency>
+      <dependency>
+        <groupId>commons-logging</groupId>
+        <artifactId>commons-logging</artifactId>
+        <!-- Hive uses commons-logging 1.1.3 from 0.13 to 1.2 -->
+        <version>1.1.3</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.ivy</groupId>
         <artifactId>ivy</artifactId>
@@ -1607,10 +1613,6 @@
             <groupId>${hive.group}</groupId>
             <artifactId>hive-common</artifactId>
           </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-common</artifactId>
-          </exclusion>
           <exclusion>
             <groupId>${hive.group}</groupId>
             <artifactId>hive-metastore</artifactId>

From ecfdffcb3560e21ccd318de6a0c614fa0c3aabf5 Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Tue, 23 Apr 2019 07:11:58 -0700
Subject: [PATCH 0952/1072] [SPARK-27503][DSTREAM] JobGenerator thread exit for
 some fatal errors but application keeps running

## What changes were proposed in this pull request?

In some corner cases, `JobGenerator` thread (including some other EventLoop threads) may exit for some fatal error, like OOM, but Spark Streaming job keep running with no batch job generating. Currently, we only report any non-fatal error.

```
override def run(): Unit = {
      try {
        while (!stopped.get) {
          val event = eventQueue.take()
          try {
            onReceive(event)
          } catch {
            case NonFatal(e) =>
              try {
                onError(e)
              } catch {
                case NonFatal(e) => logError("Unexpected error in " + name, e)
              }
          }
        }
      } catch {
        case ie: InterruptedException => // exit even if eventQueue is not empty
        case NonFatal(e) => logError("Unexpected error in " + name, e)
      }
    }
```

In this PR, we double check if event thread alive when post Event

## How was this patch tested?

existing unit tests

Closes #24400 from uncleGen/SPARK-27503.

Authored-by: uncleGen <hustyugm@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/util/EventLoop.scala | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/EventLoop.scala b/core/src/main/scala/org/apache/spark/util/EventLoop.scala
index 651ea4996f6c..5125adc9f7ca 100644
--- a/core/src/main/scala/org/apache/spark/util/EventLoop.scala
+++ b/core/src/main/scala/org/apache/spark/util/EventLoop.scala
@@ -100,7 +100,13 @@ private[spark] abstract class EventLoop[E](name: String) extends Logging {
    * Put the event into the event queue. The event thread will process it later.
    */
   def post(event: E): Unit = {
-    eventQueue.put(event)
+    if (!stopped.get) {
+      if (eventThread.isAlive) {
+        eventQueue.put(event)
+      } else {
+        onError(new IllegalStateException(s"$name has already been stopped accidentally."))
+      }
+    }
   }
 
   /**

From 00f2f311f775761cbb5f3ff0b8091217080293c0 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Tue, 23 Apr 2019 22:39:59 +0800
Subject: [PATCH 0953/1072] [SPARK-27128][SQL] Migrate JSON to File Data Source
 V2

## What changes were proposed in this pull request?
Migrate JSON to File Data Source V2

## How was this patch tested?

Unit test

Closes #24058 from gengliangwang/jsonV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  2 +-
 ...pache.spark.sql.sources.DataSourceRegister |  2 +-
 .../spark/sql/execution/command/tables.scala  |  4 +-
 .../datasources/json/JsonFileFormat.scala     | 35 --------
 .../datasources/json/JsonOutputWriter.scala   | 63 +++++++++++++++
 .../v2/json/JsonDataSourceV2.scala            | 44 ++++++++++
 .../v2/json/JsonPartitionReaderFactory.scala  | 61 ++++++++++++++
 .../datasources/v2/json/JsonScan.scala        | 80 +++++++++++++++++++
 .../datasources/v2/json/JsonScanBuilder.scala | 36 +++++++++
 .../datasources/v2/json/JsonTable.scala       | 73 +++++++++++++++++
 .../v2/json/JsonWriteBuilder.scala            | 63 +++++++++++++++
 .../spark/sql/FileBasedDataSourceSuite.scala  |  2 +-
 .../OptimizeMetadataOnlyQuerySuite.scala      |  5 +-
 .../datasources/json/JsonSuite.scala          |  4 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  | 20 ++---
 .../sql/sources/HadoopFsRelationTest.scala    |  8 +-
 16 files changed, 445 insertions(+), 57 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWriteBuilder.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 9ebd2c084889..3a2e7368279f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1494,7 +1494,7 @@ object SQLConf {
       " register class names for which data source V2 write paths are disabled. Writes from these" +
       " sources will fall back to the V1 sources.")
     .stringConf
-    .createWithDefault("csv,orc,text")
+    .createWithDefault("csv,json,orc,text")
 
   val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index d98828748a78..12e9067f0503 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,6 +1,6 @@
 org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2
 org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
-org.apache.spark.sql.execution.datasources.json.JsonFileFormat
+org.apache.spark.sql.execution.datasources.v2.json.JsonDataSourceV2
 org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 395d61b4f4e0..ea29effca4c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -37,9 +37,9 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}
 import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
-import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2
+import org.apache.spark.sql.execution.datasources.v2.json.JsonDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -238,7 +238,7 @@ case class AlterTableAddColumnsCommand(
         // TextFileFormat only default to one column "value"
         // Hive type is already considered as hive serde table, so the logic will not
         // come in here.
-        case _: JsonFileFormat | _: CSVDataSourceV2 | _: ParquetFileFormat | _: OrcDataSourceV2 =>
+        case _: JsonDataSourceV2 | _: CSVDataSourceV2 | _: ParquetFileFormat | _: OrcDataSourceV2 =>
         case s if s.getClass.getCanonicalName.endsWith("OrcFileFormat") =>
         case s =>
           throw new AnalysisException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index d3f04145b83d..95a63c3d1e2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -157,38 +157,3 @@ class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {
     case _ => false
   }
 }
-
-private[json] class JsonOutputWriter(
-    path: String,
-    options: JSONOptions,
-    dataSchema: StructType,
-    context: TaskAttemptContext)
-  extends OutputWriter with Logging {
-
-  private val encoding = options.encoding match {
-    case Some(charsetName) => Charset.forName(charsetName)
-    case None => StandardCharsets.UTF_8
-  }
-
-  if (JSONOptionsInRead.blacklist.contains(encoding)) {
-    logWarning(s"The JSON file ($path) was written in the encoding ${encoding.displayName()}" +
-         " which can be read back by Spark only if multiLine is enabled.")
-  }
-
-  private var jacksonGenerator: Option[JacksonGenerator] = None
-
-  override def write(row: InternalRow): Unit = {
-    val gen = jacksonGenerator.getOrElse {
-      val os = CodecStreams.createOutputStreamWriter(context, new Path(path), encoding)
-      // create the Generator without separator inserted between 2 records
-      val newGen = new JacksonGenerator(dataSchema, os, options)
-      jacksonGenerator = Some(newGen)
-      newGen
-    }
-
-    gen.write(row)
-    gen.writeLineEnding()
-  }
-
-  override def close(): Unit = jacksonGenerator.foreach(_.close())
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
new file mode 100644
index 000000000000..b3cd570cfb1c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.json
+
+import java.nio.charset.{Charset, StandardCharsets}
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.TaskAttemptContext
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JSONOptions, JSONOptionsInRead}
+import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter}
+import org.apache.spark.sql.types.StructType
+
+class JsonOutputWriter(
+    path: String,
+    options: JSONOptions,
+    dataSchema: StructType,
+    context: TaskAttemptContext)
+  extends OutputWriter with Logging {
+
+  private val encoding = options.encoding match {
+    case Some(charsetName) => Charset.forName(charsetName)
+    case None => StandardCharsets.UTF_8
+  }
+
+  if (JSONOptionsInRead.blacklist.contains(encoding)) {
+    logWarning(s"The JSON file ($path) was written in the encoding ${encoding.displayName()}" +
+      " which can be read back by Spark only if multiLine is enabled.")
+  }
+
+  private var jacksonGenerator: Option[JacksonGenerator] = None
+
+  override def write(row: InternalRow): Unit = {
+    val gen = jacksonGenerator.getOrElse {
+      val os = CodecStreams.createOutputStreamWriter(context, new Path(path), encoding)
+      // create the Generator without separator inserted between 2 records
+      val newGen = new JacksonGenerator(dataSchema, os, options)
+      jacksonGenerator = Some(newGen)
+      newGen
+    }
+
+    gen.write(row)
+    gen.writeLineEnding()
+  }
+
+  override def close(): Unit = jacksonGenerator.foreach(_.close())
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
new file mode 100644
index 000000000000..610bd4c1b9d8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.json
+
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class JsonDataSourceV2 extends FileDataSourceV2 {
+
+  override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[JsonFileFormat]
+
+  override def shortName(): String = "json"
+
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
+    val paths = getPaths(options)
+    val tableName = getTableName(paths)
+    JsonTable(tableName, sparkSession, options, paths, None, fallbackFileFormat)
+  }
+
+  override def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
+    val paths = getPaths(options)
+    val tableName = getTableName(paths)
+    JsonTable(tableName, sparkSession, options, paths, Some(schema), fallbackFileFormat)
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
new file mode 100644
index 000000000000..e5b7ae0bd228
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.json
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.json.{JacksonParser, JSONOptionsInRead}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.execution.datasources.json.JsonDataSource
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.SerializableConfiguration
+
+/**
+ * A factory used to create JSON readers.
+ *
+ * @param sqlConf SQL configuration.
+ * @param broadcastedConf Broadcast serializable Hadoop Configuration.
+ * @param dataSchema Schema of JSON files.
+ * @param readDataSchema Required schema of JSON files.
+ * @param partitionSchema Schema of partitions.
+ * @param parsedOptions Options for parsing JSON files.
+ */
+case class JsonPartitionReaderFactory(
+    sqlConf: SQLConf,
+    broadcastedConf: Broadcast[SerializableConfiguration],
+    dataSchema: StructType,
+    readDataSchema: StructType,
+    partitionSchema: StructType,
+    parsedOptions: JSONOptionsInRead) extends FilePartitionReaderFactory {
+
+  override def buildReader(partitionedFile: PartitionedFile): PartitionReader[InternalRow] = {
+    val actualSchema =
+      StructType(readDataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
+    val parser = new JacksonParser(actualSchema, parsedOptions, allowArrayAsStructs = true)
+    val iter = JsonDataSource(parsedOptions).readFile(
+      broadcastedConf.value.value,
+      partitionedFile,
+      parser,
+      readDataSchema)
+    val fileReader = new PartitionReaderFromIterator[InternalRow](iter)
+    new PartitionReaderWithPartitionValues(fileReader, readDataSchema,
+      partitionSchema, partitionedFile.partitionValues)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
new file mode 100644
index 000000000000..201572b4338b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.json
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.json.JSONOptionsInRead
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.json.JsonDataSource
+import org.apache.spark.sql.execution.datasources.v2.{FileScan, TextBasedFileScan}
+import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.SerializableConfiguration
+
+case class JsonScan(
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    dataSchema: StructType,
+    readDataSchema: StructType,
+    readPartitionSchema: StructType,
+    options: CaseInsensitiveStringMap)
+  extends TextBasedFileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema, options) {
+
+  private val parsedOptions = new JSONOptionsInRead(
+    CaseInsensitiveMap(options.asScala.toMap),
+    sparkSession.sessionState.conf.sessionLocalTimeZone,
+    sparkSession.sessionState.conf.columnNameOfCorruptRecord)
+
+  override def isSplitable(path: Path): Boolean = {
+    JsonDataSource(parsedOptions).isSplitable && super.isSplitable(path)
+  }
+
+  override def createReaderFactory(): PartitionReaderFactory = {
+    // Check a field requirement for corrupt records here to throw an exception in a driver side
+    ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, parsedOptions.columnNameOfCorruptRecord)
+
+    if (readDataSchema.length == 1 &&
+      readDataSchema.head.name == parsedOptions.columnNameOfCorruptRecord) {
+      throw new AnalysisException(
+        "Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the\n" +
+          "referenced columns only include the internal corrupt record column\n" +
+          s"(named _corrupt_record by default). For example:\n" +
+          "spark.read.schema(schema).json(file).filter($\"_corrupt_record\".isNotNull).count()\n" +
+          "and spark.read.schema(schema).json(file).select(\"_corrupt_record\").show().\n" +
+          "Instead, you can cache or save the parsed results and then send the same query.\n" +
+          "For example, val df = spark.read.schema(schema).json(file).cache() and then\n" +
+          "df.filter($\"_corrupt_record\".isNotNull).count()."
+      )
+    }
+    val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+    // Hadoop Configurations are case sensitive.
+    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
+    val broadcastedConf = sparkSession.sparkContext.broadcast(
+      new SerializableConfiguration(hadoopConf))
+    // The partition values are already truncated in `FileScan.partitions`.
+    // We should use `readPartitionSchema` as the partition schema here.
+    JsonPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
+      dataSchema, readDataSchema, readPartitionSchema, parsedOptions)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
new file mode 100644
index 000000000000..bb3c0366bdc2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.json
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
+import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
+import org.apache.spark.sql.sources.v2.reader.Scan
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class JsonScanBuilder (
+    sparkSession: SparkSession,
+    fileIndex: PartitioningAwareFileIndex,
+    schema: StructType,
+    dataSchema: StructType,
+    options: CaseInsensitiveStringMap)
+  extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
+  override def build(): Scan = {
+    JsonScan(sparkSession, fileIndex, dataSchema, readDataSchema(), readPartitionSchema(), options)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
new file mode 100644
index 000000000000..bbdd3ae69222
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.json
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.FileStatus
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.json.JSONOptionsInRead
+import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.execution.datasources.json.JsonDataSource
+import org.apache.spark.sql.execution.datasources.v2.FileTable
+import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+case class JsonTable(
+    name: String,
+    sparkSession: SparkSession,
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    userSpecifiedSchema: Option[StructType],
+    fallbackFileFormat: Class[_ <: FileFormat])
+  extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
+  override def newScanBuilder(options: CaseInsensitiveStringMap): JsonScanBuilder =
+    new JsonScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+
+  override def inferSchema(files: Seq[FileStatus]): Option[StructType] = {
+    val parsedOptions = new JSONOptionsInRead(
+      options.asScala.toMap,
+      sparkSession.sessionState.conf.sessionLocalTimeZone,
+      sparkSession.sessionState.conf.columnNameOfCorruptRecord)
+    JsonDataSource(parsedOptions).inferSchema(
+      sparkSession, files, parsedOptions)
+  }
+
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
+    new JsonWriteBuilder(options, paths, formatName, supportsDataType)
+
+  override def supportsDataType(dataType: DataType): Boolean = dataType match {
+    case _: AtomicType => true
+
+    case st: StructType => st.forall { f => supportsDataType(f.dataType) }
+
+    case ArrayType(elementType, _) => supportsDataType(elementType)
+
+    case MapType(keyType, valueType, _) =>
+      supportsDataType(keyType) && supportsDataType(valueType)
+
+    case udt: UserDefinedType[_] => supportsDataType(udt.sqlType)
+
+    case _: NullType => true
+
+    case _ => false
+  }
+
+  override def formatName: String = "JSON"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWriteBuilder.scala
new file mode 100644
index 000000000000..3c99e07489a7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWriteBuilder.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.json
+
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+
+import org.apache.spark.sql.catalyst.json.JSONOptions
+import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
+import org.apache.spark.sql.execution.datasources.json.JsonOutputWriter
+import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class JsonWriteBuilder(
+    options: CaseInsensitiveStringMap,
+    paths: Seq[String],
+    formatName: String,
+    supportsDataType: DataType => Boolean)
+  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
+  override def prepareWrite(
+      sqlConf: SQLConf,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory = {
+    val conf = job.getConfiguration
+    val parsedOptions = new JSONOptions(
+      options,
+      sqlConf.sessionLocalTimeZone,
+      sqlConf.columnNameOfCorruptRecord)
+    parsedOptions.compressionCodec.foreach { codec =>
+      CompressionCodecs.setCodecConfiguration(conf, codec)
+    }
+
+    new OutputWriterFactory {
+      override def newInstance(
+          path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): OutputWriter = {
+        new JsonOutputWriter(path, parsedOptions, dataSchema, context)
+      }
+
+      override def getFileExtension(context: TaskAttemptContext): String = {
+        ".json" + CodecStreams.getCompressionExtension(context)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 8fcffbfee54c..c6fdf41ca7d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -332,7 +332,7 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
       // TODO: test file source V2 after write path is fixed.
       Seq(true).foreach { useV1 =>
         val useV1List = if (useV1) {
-          "csv,orc"
+          "csv,json,orc"
         } else {
           ""
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
index a543eb835165..e20a82ba9bc4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
@@ -130,7 +130,10 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext {
   }
 
   test("Incorrect result caused by the rule OptimizeMetadataOnlyQuery") {
-    withSQLConf(OPTIMIZER_METADATA_ONLY.key -> "true") {
+    // This test case is only for file source V1. As the rule OptimizeMetadataOnlyQuery is disabled
+    // by default, we can skip testing file source v2 in current stage.
+    withSQLConf(OPTIMIZER_METADATA_ONLY.key -> "true",
+      SQLConf.USE_V1_SOURCE_READER_LIST.key -> "json") {
       withTempPath { path =>
         val tablePath = new File(s"${path.getCanonicalPath}/cOl3=c/cOl1=a/cOl5=e")
         Seq(("a", "b", "c", "d", "e")).toDF("cOl1", "cOl2", "cOl3", "cOl4", "cOl5")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 69761775e3d5..6316e89537ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1961,9 +1961,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         spark.read.schema(schema).json(path).select("_corrupt_record").collect()
       }.getMessage
       assert(msg.contains("only include the internal corrupt record column"))
-      intercept[catalyst.errors.TreeNodeException[_]] {
-        spark.read.schema(schema).json(path).filter($"_corrupt_record".isNotNull).count()
-      }
+
       // workaround
       val df = spark.read.schema(schema).json(path).cache()
       assert(df.filter($"_corrupt_record".isNotNull).count() == 1)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 6a64599eed66..95a57aaf2a13 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -913,24 +913,24 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
 
     withTable("appendParquetToJson") {
       createDF(0, 9).write.format("json").saveAsTable("appendParquetToJson")
-      val e = intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         createDF(10, 19).write.mode(SaveMode.Append).format("parquet")
           .saveAsTable("appendParquetToJson")
-      }
-      assert(e.getMessage.contains(
-        "The format of the existing table default.appendParquetToJson is `JsonFileFormat`. " +
-        "It doesn't match the specified format `ParquetFileFormat`"))
+      }.getMessage
+      // The format of the existing table can be JsonDataSourceV2 or JsonFileFormat.
+      assert(msg.contains("The format of the existing table default.appendParquetToJson is `Json"))
+      assert(msg.contains("It doesn't match the specified format `ParquetFileFormat`"))
     }
 
     withTable("appendTextToJson") {
       createDF(0, 9).write.format("json").saveAsTable("appendTextToJson")
-      val e = intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         createDF(10, 19).write.mode(SaveMode.Append).format("text")
           .saveAsTable("appendTextToJson")
-      }
-      assert(e.getMessage.contains(
-        "The format of the existing table default.appendTextToJson is `JsonFileFormat`. " +
-        "It doesn't match the specified format"))
+      }.getMessage
+      // The format of the existing table can be JsonDataSourceV2 or JsonFileFormat.
+      assert(msg.contains("The format of the existing table default.appendTextToJson is `Json"))
+      assert(msg.contains("It doesn't match the specified format"))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index bf6d0ea5788d..0587cfebc8d3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -817,10 +817,12 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
         assert(preferredLocations.distinct.length == 2)
       }
 
-      checkLocality()
-
-      withSQLConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key -> "0") {
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> dataSourceName) {
         checkLocality()
+
+        withSQLConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key -> "0") {
+          checkLocality()
+        }
       }
     }
   }

From 810be5dd204d559f05bb6283a94f3be156d2c115 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 23 Apr 2019 18:18:02 +0000
Subject: [PATCH 0954/1072] [SPARK-27493][BUILD][FOLLOWUP] Upgrade ASM to 7.1
 in plugins.sbt

## What changes were proposed in this pull request?

This is a follow-up of https://github.com/apache/spark/pull/24395. This PR update `plugins.sbt`, too.

## How was this patch tested?

Pass the Jenkins.

Closes #24444 from dongjoon-hyun/SPARK-ASM71-2.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 project/plugins.sbt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/project/plugins.sbt b/project/plugins.sbt
index e510443f6e97..bed2cc1bc9c4 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -45,9 +45,9 @@ addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2")
 
 addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1")
 
-libraryDependencies += "org.ow2.asm"  % "asm" % "7.0"
+libraryDependencies += "org.ow2.asm"  % "asm" % "7.1"
 
-libraryDependencies += "org.ow2.asm"  % "asm-commons" % "7.0"
+libraryDependencies += "org.ow2.asm"  % "asm-commons" % "7.1"
 
 // sbt 1.0.0 support: https://github.com/ihji/sbt-antlr4/issues/14
 addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.13")

From 5bf5d9d854db53541956dedb03e2de8eecf65b81 Mon Sep 17 00:00:00 2001
From: Andrew-Crosby <37139900+Andrew-Crosby@users.noreply.github.com>
Date: Tue, 23 Apr 2019 13:53:33 -0700
Subject: [PATCH 0955/1072] [SPARK-26970][PYTHON][ML] Add Spark ML interaction
 transformer to PySpark

## What changes were proposed in this pull request?

Adds the Spark ML Interaction transformer to PySpark

## How was this patch tested?

- Added Python doctest
- Ran the newly added example code
- Manually confirmed that a PipelineModel that contains an Interaction transformer can now be loaded in PySpark

Closes #24426 from Andrew-Crosby/pyspark-interaction-transformer.

Lead-authored-by: Andrew-Crosby <37139900+Andrew-Crosby@users.noreply.github.com>
Co-authored-by: Andrew-Crosby <andrew.crosby@autotrader.co.uk>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 .../src/main/python/ml/interaction_example.py | 56 +++++++++++++++++++
 python/pyspark/ml/feature.py                  | 54 ++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 examples/src/main/python/ml/interaction_example.py

diff --git a/examples/src/main/python/ml/interaction_example.py b/examples/src/main/python/ml/interaction_example.py
new file mode 100644
index 000000000000..4b632271916f
--- /dev/null
+++ b/examples/src/main/python/ml/interaction_example.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import Interaction, VectorAssembler
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("InteractionExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame(
+        [(1, 1, 2, 3, 8, 4, 5),
+         (2, 4, 3, 8, 7, 9, 8),
+         (3, 6, 1, 9, 2, 3, 6),
+         (4, 10, 8, 6, 9, 4, 5),
+         (5, 9, 2, 7, 10, 7, 3),
+         (6, 1, 1, 4, 2, 8, 4)],
+        ["id1", "id2", "id3", "id4", "id5", "id6", "id7"])
+
+    assembler1 = VectorAssembler(inputCols=["id2", "id3", "id4"], outputCol="vec1")
+
+    assembled1 = assembler1.transform(df)
+
+    assembler2 = VectorAssembler(inputCols=["id5", "id6", "id7"], outputCol="vec2")
+
+    assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2")
+
+    interaction = Interaction(inputCols=["id1", "vec1", "vec2"], outputCol="interactedCol")
+
+    interacted = interaction.transform(assembled2)
+
+    interacted.show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 595ab1818488..4f5809c37f03 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -39,6 +39,7 @@
            'IDF', 'IDFModel',
            'Imputer', 'ImputerModel',
            'IndexToString',
+           'Interaction',
            'MaxAbsScaler', 'MaxAbsScalerModel',
            'MinHashLSH', 'MinHashLSHModel',
            'MinMaxScaler', 'MinMaxScalerModel',
@@ -1227,6 +1228,59 @@ def surrogateDF(self):
         return self._call_java("surrogateDF")
 
 
+@inherit_doc
+class Interaction(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    Implements the feature interaction transform. This transformer takes in Double and Vector type
+    columns and outputs a flattened vector of their feature interactions. To handle interaction,
+    we first one-hot encode any nominal features. Then, a vector of the feature cross-products is
+    produced.
+
+    For example, given the input feature values `Double(2)` and `Vector(3, 4)`, the output would be
+    `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal
+    with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`.
+
+    >>> df = spark.createDataFrame([(0.0, 1.0), (2.0, 3.0)], ["a", "b"])
+    >>> interaction = Interaction(inputCols=["a", "b"], outputCol="ab")
+    >>> interaction.transform(df).show()
+    +---+---+-----+
+    |  a|  b|   ab|
+    +---+---+-----+
+    |0.0|1.0|[0.0]|
+    |2.0|3.0|[6.0]|
+    +---+---+-----+
+    ...
+    >>> interactionPath = temp_path + "/interaction"
+    >>> interaction.save(interactionPath)
+    >>> loadedInteraction = Interaction.load(interactionPath)
+    >>> loadedInteraction.transform(df).head().ab == interaction.transform(df).head().ab
+    True
+
+    .. versionadded:: 3.0.0
+    """
+
+    @keyword_only
+    def __init__(self, inputCols=None, outputCol=None):
+        """
+        __init__(self, inputCols=None, outputCol=None):
+        """
+        super(Interaction, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Interaction", self.uid)
+        self._setDefault()
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("3.0.0")
+    def setParams(self, inputCols=None, outputCol=None):
+        """
+        setParams(self, inputCols=None, outputCol=None)
+        Sets params for this Interaction.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+
 @inherit_doc
 class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """

From 596a5ff2737e531fbca2f31db1eb9aadd8f08882 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Wed, 24 Apr 2019 13:44:48 +0900
Subject: [PATCH 0956/1072] [MINOR][BUILD] Update genjavadoc to 0.13

## What changes were proposed in this pull request?

Kind of related to https://github.com/gatorsmile/spark/pull/5 - let's update genjavadoc to see if it generates fewer spurious javadoc errors to begin with.

## How was this patch tested?

Existing docs build

Closes #24443 from srowen/genjavadoc013.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/rpc/RpcCallContext.scala |  2 +-
 .../spark/status/api/v1/ApiRootResource.scala |  6 ++---
 .../org/apache/spark/util/SizeEstimator.scala |  2 +-
 .../kinesis/SparkAWSCredentials.scala         |  4 +--
 .../scala/org/apache/spark/ml/ann/Layer.scala |  6 ++---
 .../spark/ml/attribute/attributes.scala       |  2 +-
 .../apache/spark/ml/stat/Correlation.scala    |  2 +-
 .../org/apache/spark/ml/tree/treeParams.scala | 25 ++++++++++---------
 .../mllib/stat/test/StreamingTestMethod.scala |  2 +-
 project/SparkBuild.scala                      |  2 +-
 .../spark/sql/hive/client/HiveClient.scala    |  6 ++---
 11 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala b/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala
index 117f51c5b8f2..f6b20593462c 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala
@@ -24,7 +24,7 @@ package org.apache.spark.rpc
 private[spark] trait RpcCallContext {
 
   /**
-   * Reply a message to the sender. If the sender is [[RpcEndpoint]], its [[RpcEndpoint.receive]]
+   * Reply a message to the sender. If the sender is [[RpcEndpoint]], its `RpcEndpoint.receive`
    * will be called.
    */
   def reply(response: Any): Unit
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
index 84c2ad48f1f2..83f76db7e89d 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
@@ -77,7 +77,7 @@ private[spark] trait UIRoot {
   /**
    * Runs some code with the current SparkUI instance for the app / attempt.
    *
-   * @throws NoSuchElementException If the app / attempt pair does not exist.
+   * @throws java.util.NoSuchElementException If the app / attempt pair does not exist.
    */
   def withSparkUI[T](appId: String, attemptId: Option[String])(fn: SparkUI => T): T
 
@@ -85,8 +85,8 @@ private[spark] trait UIRoot {
   def getApplicationInfo(appId: String): Option[ApplicationInfo]
 
   /**
-   * Write the event logs for the given app to the [[ZipOutputStream]] instance. If attemptId is
-   * [[None]], event logs for all attempts of this application will be written out.
+   * Write the event logs for the given app to the `ZipOutputStream` instance. If attemptId is
+   * `None`, event logs for all attempts of this application will be written out.
    */
   def writeEventLogs(appId: String, attemptId: Option[String], zipStream: ZipOutputStream): Unit = {
     Response.serverError()
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index e09f1fc16f6f..09c69f5c68b0 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.collection.OpenHashSet
 /**
  * A trait that allows a class to give [[SizeEstimator]] more accurate size estimation.
  * When a class extends it, [[SizeEstimator]] will query the `estimatedSize` first.
- * If `estimatedSize` does not return [[None]], [[SizeEstimator]] will use the returned size
+ * If `estimatedSize` does not return `None`, [[SizeEstimator]] will use the returned size
  * as the size of the object. Otherwise, [[SizeEstimator]] will do the estimation work.
  * The difference between a [[KnownSizeEstimation]] and
  * [[org.apache.spark.util.collection.SizeTracker]] is that, a
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
index dcb60b21d985..7488971e6163 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
@@ -101,8 +101,8 @@ object SparkAWSCredentials {
      *
      * @note The given AWS keypair will be saved in DStream checkpoints if checkpointing is
      * enabled. Make sure that your checkpoint directory is secure. Prefer using the
-     * [[http://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default default provider chain]]
-     * instead if possible.
+     * default provider chain instead if possible
+     * (http://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default).
      *
      * @param accessKeyId AWS access key ID
      * @param secretKey AWS secret key
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
index 014ff07c2115..2b4b0fc55b95 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
@@ -371,7 +371,7 @@ private[ann] trait TopologyModel extends Serializable {
   def forward(data: BDM[Double], includeLastLayer: Boolean): Array[BDM[Double]]
 
   /**
-   * Prediction of the model. See {@link ProbabilisticClassificationModel}
+   * Prediction of the model. See `ProbabilisticClassificationModel``
    *
    * @param features input features
    * @return prediction
@@ -379,7 +379,7 @@ private[ann] trait TopologyModel extends Serializable {
   def predict(features: Vector): Vector
 
   /**
-   * Raw prediction of the model. See {@link ProbabilisticClassificationModel}
+   * Raw prediction of the model. See `ProbabilisticClassificationModel`
    *
    * @param features input features
    * @return raw prediction
@@ -389,7 +389,7 @@ private[ann] trait TopologyModel extends Serializable {
   def predictRaw(features: Vector): Vector
 
   /**
-   * Probability of the model. See {@link ProbabilisticClassificationModel}
+   * Probability of the model. See `ProbabilisticClassificationModel`
    *
    * @param rawPrediction raw prediction vector
    * @return probability
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 1cd2b1ad8409..756dd677cb57 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -121,7 +121,7 @@ sealed abstract class Attribute extends Serializable {
 private[attribute] trait AttributeFactory {
 
   /**
-   * Creates an [[Attribute]] from a [[Metadata]] instance.
+   * Creates an [[Attribute]] from a `Metadata` instance.
    */
   private[attribute] def fromMetadata(metadata: Metadata): Attribute
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
index 6e885d7c8aec..8167ea68a715 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
@@ -49,7 +49,7 @@ object Correlation {
    *               Supported: `pearson` (default), `spearman`
    * @return A dataframe that contains the correlation matrix of the column of vectors. This
    *         dataframe contains a single row and a single column of name
-   *         '$METHODNAME($COLUMN)'.
+   *         `$METHODNAME($COLUMN)`.
    * @throws IllegalArgumentException if the column is not a valid column in the dataset, or if
    *                                  the content of this column is not of type Vector.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index df01dc007882..c1e44e94af47 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -40,39 +40,39 @@ private[ml] trait DecisionTreeParams extends PredictorParams
   with HasCheckpointInterval with HasSeed with HasWeightCol {
 
   /**
-   * Maximum depth of the tree (>= 0).
+   * Maximum depth of the tree (nonnegative).
    * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
    * (default = 5)
    * @group param
    */
   final val maxDepth: IntParam =
-    new IntParam(this, "maxDepth", "Maximum depth of the tree. (>= 0)" +
+    new IntParam(this, "maxDepth", "Maximum depth of the tree. (Nonnegative)" +
       " E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.",
       ParamValidators.gtEq(0))
 
   /**
    * Maximum number of bins used for discretizing continuous features and for choosing how to split
    * on features at each node.  More bins give higher granularity.
-   * Must be >= 2 and >= number of categories in any categorical feature.
+   * Must be at least 2 and at least number of categories in any categorical feature.
    * (default = 32)
    * @group param
    */
   final val maxBins: IntParam = new IntParam(this, "maxBins", "Max number of bins for" +
-    " discretizing continuous features.  Must be >=2 and >= number of categories for any" +
-    " categorical feature.", ParamValidators.gtEq(2))
+    " discretizing continuous features.  Must be at least 2 and at least number of categories" +
+    " for any categorical feature.", ParamValidators.gtEq(2))
 
   /**
    * Minimum number of instances each child must have after split.
    * If a split causes the left or right child to have fewer than minInstancesPerNode,
    * the split will be discarded as invalid.
-   * Should be >= 1.
+   * Must be at least 1.
    * (default = 1)
    * @group param
    */
   final val minInstancesPerNode: IntParam = new IntParam(this, "minInstancesPerNode", "Minimum" +
     " number of instances each child must have after split.  If a split causes the left or right" +
     " child to have fewer than minInstancesPerNode, the split will be discarded as invalid." +
-    " Should be >= 1.", ParamValidators.gtEq(1))
+    " Must be at least 1.", ParamValidators.gtEq(1))
 
   /**
    * Minimum fraction of the weighted sample count that each child must have after split.
@@ -91,7 +91,7 @@ private[ml] trait DecisionTreeParams extends PredictorParams
 
   /**
    * Minimum information gain for a split to be considered at a tree node.
-   * Should be >= 0.0.
+   * Should be at least 0.0.
    * (default = 0.0)
    * @group param
    */
@@ -316,7 +316,7 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams {
    * Supported options:
    *  - "auto": Choose automatically for task:
    *            If numTrees == 1, set to "all."
-   *            If numTrees > 1 (forest), set to "sqrt" for classification and
+   *            If numTrees greater than 1 (forest), set to "sqrt" for classification and
    *              to "onethird" for regression.
    *  - "all": use all features
    *  - "onethird": use 1/3 of the features
@@ -361,8 +361,8 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams {
 private[ml] trait RandomForestParams extends TreeEnsembleParams {
 
   /**
-   * Number of trees to train (>= 1).
-   * If 1, then no bootstrapping is used.  If > 1, then bootstrapping is done.
+   * Number of trees to train (at least 1).
+   * If 1, then no bootstrapping is used.  If greater than 1, then bootstrapping is done.
    * TODO: Change to always do bootstrapping (simpler).  SPARK-7130
    * (default = 20)
    *
@@ -371,7 +371,8 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
    * are a bit different.
    * @group param
    */
-  final val numTrees: IntParam = new IntParam(this, "numTrees", "Number of trees to train (>= 1)",
+  final val numTrees: IntParam =
+    new IntParam(this, "numTrees", "Number of trees to train (at least 1)",
     ParamValidators.gtEq(1))
 
   setDefault(numTrees -> 20)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
index 14ac14d6d61f..8f3d0f8b3214 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
@@ -33,7 +33,7 @@ import org.apache.spark.util.StatCounter
 /**
  * Significance testing methods for [[StreamingTest]]. New 2-sample statistical significance tests
  * should extend [[StreamingTestMethod]] and introduce a new entry in
- * [[StreamingTestMethod.TEST_NAME_TO_OBJECT]]
+ * `StreamingTestMethod.TEST_NAME_TO_OBJECT`
  */
 private[stat] sealed trait StreamingTestMethod extends Serializable {
 
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index f55f187cee5d..83fe904cc1f0 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -219,7 +219,7 @@ object SparkBuild extends PomBuild {
       .map(file),
     incOptions := incOptions.value.withNameHashing(true),
     publishMavenStyle := true,
-    unidocGenjavadocVersion := "0.11",
+    unidocGenjavadocVersion := "0.13",
 
     // Override SBT's default resolvers:
     resolvers := Seq(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index f69717441d61..e1280d024638 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -41,7 +41,7 @@ private[hive] trait HiveClient {
 
   /**
    * Return the associated Hive SessionState of this [[HiveClientImpl]]
-   * @return [[Any]] not SessionState to avoid linkage error
+   * @return `Any` not SessionState to avoid linkage error
    */
   def getState: Any
 
@@ -76,7 +76,7 @@ private[hive] trait HiveClient {
   /** Return whether a table/view with the specified name exists. */
   def tableExists(dbName: String, tableName: String): Boolean
 
-  /** Returns the specified table, or throws [[NoSuchTableException]]. */
+  /** Returns the specified table, or throws `NoSuchTableException`. */
   final def getTable(dbName: String, tableName: String): CatalogTable = {
     getTableOption(dbName, tableName).getOrElse(throw new NoSuchTableException(dbName, tableName))
   }
@@ -166,7 +166,7 @@ private[hive] trait HiveClient {
       table: String,
       newParts: Seq[CatalogTablePartition]): Unit
 
-  /** Returns the specified partition, or throws [[NoSuchPartitionException]]. */
+  /** Returns the specified partition, or throws `NoSuchPartitionException`. */
   final def getPartition(
       dbName: String,
       tableName: String,

From a30983db575de5c87b3a4698b223229327fd65cf Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 24 Apr 2019 16:22:07 +0900
Subject: [PATCH 0957/1072] [SPARK-27512][SQL] Avoid to replace ',' in CSV's
 decimal type inference for backward compatibility

## What changes were proposed in this pull request?

The code below currently infers as decimal but previously it was inferred as string.

**In branch-2.4**, type inference path for decimal and parsing data are different.

https://github.com/apache/spark/blob/2a8343121e62aabe5c69d1e20fbb2c01e2e520e7/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala#L153

https://github.com/apache/spark/blob/c284c4e1f6f684ca8db1cc446fdcc43b46e3413c/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala#L125

So the code below:

```scala
scala> spark.read.option("delimiter", "|").option("inferSchema", "true").csv(Seq("1,2").toDS).printSchema()
```

produced string as its type.

```
root
 |-- _c0: string (nullable = true)
```

**In the current master**, it now infers decimal as below:

```
root
 |-- _c0: decimal(2,0) (nullable = true)
```

It happened after https://github.com/apache/spark/pull/22979 because, now after this PR, we only have one way to parse decimal:

https://github.com/apache/spark/blob/7a83d71403edf7d24fa5efc0ef913f3ce76d88b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala#L92

**After the fix:**

```
root
 |-- _c0: string (nullable = true)
```

This PR proposes to restore the previous behaviour back in `CSVInferSchema`.

## How was this patch tested?

Manually tested and unit tests were added.

Closes #24437 from HyukjinKwon/SPARK-27512.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala | 7 ++++++-
 .../spark/sql/catalyst/csv/CSVInferSchemaSuite.scala       | 4 +++-
 .../spark/sql/execution/datasources/csv/CSVSuite.scala     | 7 +++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index ae9f0571555e..03cc3cbdf790 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.csv
 
+import java.util.Locale
+
 import scala.util.control.Exception.allCatch
 
 import org.apache.spark.rdd.RDD
@@ -32,7 +34,10 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
     options.zoneId,
     options.locale)
 
-  private val decimalParser = {
+  private val decimalParser = if (options.locale == Locale.US) {
+    // Special handling the default locale for backward compatibility
+    s: String => new java.math.BigDecimal(s)
+  } else {
     ExprUtils.getDecimalParser(options.locale)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index c2b525ad1a9f..24d909ed99b9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -185,6 +185,8 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
       assert(inferSchema.inferField(NullType, input) == expectedType)
     }
 
-    Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
+    // input like '1,0' is inferred as strings for backward compatibility.
+    Seq("en-US").foreach(checkDecimalInfer(_, StringType))
+    Seq("ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 6584a30036fd..90deade2a51d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2052,4 +2052,11 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
 
     checkAnswer(rows, expectedRows)
   }
+
+  test("SPARK-27512: Decimal type inference should not handle ',' for backward compatibility") {
+    assert(spark.read
+      .option("delimiter", "|")
+      .option("inferSchema", "true")
+      .csv(Seq("1,2").toDS).schema.head.dataType === StringType)
+  }
 }

From cd4a284030e5142bdb405ff5b71735ac8cee2dde Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 24 Apr 2019 17:36:29 +0800
Subject: [PATCH 0958/1072] [SPARK-27460][FOLLOW-UP][TESTS] Fix flaky tests

## What changes were proposed in this pull request?

This patch makes several test flakiness fixes.

## How was this patch tested?
N/A

Closes #24434 from gatorsmile/fixFlakyTest.

Lead-authored-by: gatorsmile <gatorsmile@gmail.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../ExecutorAllocationManagerSuite.scala      | 18 +++++---
 .../apache/spark/SparkContextInfoSuite.scala  |  9 +++-
 .../org/apache/spark/SparkFunSuite.scala      | 46 ++++++++++++++++++-
 .../org/apache/spark/StatusTrackerSuite.scala |  2 +-
 .../history/FsHistoryProviderSuite.scala      | 20 ++++----
 .../SparkListenerWithClusterSuite.scala       | 10 ++--
 .../deploy/yarn/BaseYarnClusterSuite.scala    |  2 +-
 .../spark/deploy/yarn/YarnClusterSuite.scala  |  4 +-
 .../ui/SQLAppStatusListenerSuite.scala        |  3 +-
 .../sql/streaming/FileStreamSourceSuite.scala |  2 +-
 .../spark/sql/streaming/StreamTest.scala      |  2 +-
 .../StreamingQueryManagerSuite.scala          | 18 +++++---
 .../spark/streaming/ReceiverSuite.scala       |  2 +-
 13 files changed, 99 insertions(+), 39 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 12c8a9d6c9c4..9c30124dd472 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{mock, never, verify, when}
-import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
+import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.config
@@ -38,20 +38,24 @@ import org.apache.spark.util.ManualClock
  */
 class ExecutorAllocationManagerSuite
   extends SparkFunSuite
-  with LocalSparkContext
-  with BeforeAndAfter {
+  with LocalSparkContext {
 
   import ExecutorAllocationManager._
   import ExecutorAllocationManagerSuite._
 
   private val contexts = new mutable.ListBuffer[SparkContext]()
 
-  before {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
     contexts.clear()
   }
 
-  after {
-    contexts.foreach(_.stop())
+  override def afterEach(): Unit = {
+    try {
+      contexts.foreach(_.stop())
+    } finally {
+      super.afterEach()
+    }
   }
 
   private def post(bus: LiveListenerBus, event: SparkListenerEvent): Unit = {
@@ -282,7 +286,7 @@ class ExecutorAllocationManagerSuite
     assert(totalRunningTasks(manager) === 0)
   }
 
-  test("cancel pending executors when no longer needed") {
+  testRetry("cancel pending executors when no longer needed") {
     sc = createSparkContext(0, 10, 0)
     val manager = sc.executorAllocationManager.get
     post(sc.listenerBus, SparkListenerStageSubmitted(createStageInfo(2, 5)))
diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
index a57afdfeadf0..536b4aec7562 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark
 
+import scala.concurrent.duration._
+
 import org.scalatest.Assertions
+import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.storage.StorageLevel
 
@@ -58,10 +61,12 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
   test("getRDDStorageInfo only reports on RDDs that actually persist data") {
     sc = new SparkContext("local", "test")
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
-    assert(sc.getRDDStorageInfo.size === 0)
+    assert(sc.getRDDStorageInfo.length === 0)
     rdd.collect()
     sc.listenerBus.waitUntilEmpty(10000)
-    assert(sc.getRDDStorageInfo.size === 1)
+    eventually(timeout(10.seconds), interval(100.milliseconds)) {
+      assert(sc.getRDDStorageInfo.length === 1)
+    }
     assert(sc.getRDDStorageInfo.head.isCached)
     assert(sc.getRDDStorageInfo.head.memSize > 0)
     assert(sc.getRDDStorageInfo.head.storageLevel === StorageLevel.MEMORY_ONLY)
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 34e386819667..9dd113262653 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -20,8 +20,10 @@ package org.apache.spark
 // scalastyle:off
 import java.io.File
 
+import scala.annotation.tailrec
+
 import org.apache.log4j.{Appender, Level, Logger}
-import org.scalatest.{BeforeAndAfterAll, FunSuite, Outcome}
+import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, BeforeAndAfterEach, FunSuite, Outcome}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Tests.IS_TESTING
@@ -54,6 +56,7 @@ import org.apache.spark.util.{AccumulatorContext, Utils}
 abstract class SparkFunSuite
   extends FunSuite
   with BeforeAndAfterAll
+  with BeforeAndAfterEach
   with ThreadAudit
   with Logging {
 // scalastyle:on
@@ -89,6 +92,47 @@ abstract class SparkFunSuite
     getTestResourceFile(file).getCanonicalPath
   }
 
+  /**
+   * Note: this method doesn't support `BeforeAndAfter`. You must use `BeforeAndAfterEach` to
+   * set up and tear down resources.
+   */
+  def testRetry(s: String, n: Int = 2)(body: => Unit): Unit = {
+    test(s) {
+      retry(n) {
+        body
+      }
+    }
+  }
+
+  /**
+   * Note: this method doesn't support `BeforeAndAfter`. You must use `BeforeAndAfterEach` to
+   * set up and tear down resources.
+   */
+  def retry[T](n: Int)(body: => T): T = {
+    if (this.isInstanceOf[BeforeAndAfter]) {
+      throw new UnsupportedOperationException(
+        s"testRetry/retry cannot be used with ${classOf[BeforeAndAfter]}. " +
+          s"Please use ${classOf[BeforeAndAfterEach]} instead.")
+    }
+    retry0(n, n)(body)
+  }
+
+  @tailrec private final def retry0[T](n: Int, n0: Int)(body: => T): T = {
+    try body
+    catch { case e: Throwable =>
+      if (n > 0) {
+        logWarning(e.getMessage, e)
+        logInfo(s"\n\n===== RETRY #${n0 - n + 1} =====\n")
+        // Reset state before re-attempting in order so that tests which use patterns like
+        // LocalSparkContext to clean up state can work correctly when retried.
+        afterEach()
+        beforeEach()
+        retry0(n-1, n0)(body)
+      }
+      else throw e
+    }
+  }
+
   /**
    * Log the suite name and the test name before and after each test.
    *
diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
index c96db4e63941..f527bbe71852 100644
--- a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.JobExecutionStatus._
 
 class StatusTrackerSuite extends SparkFunSuite with Matchers with LocalSparkContext {
 
-  test("basic status API usage") {
+  testRetry("basic status API usage") {
     sc = new SparkContext("local", "test", new SparkConf(false))
     val jobFuture = sc.parallelize(1 to 10000, 2).map(identity).groupBy(identity).collectAsync()
     val jobId: Int = eventually(timeout(10.seconds)) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 1a326a8af3b2..86575b196922 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -34,7 +34,6 @@ import org.apache.hadoop.security.AccessControlException
 import org.json4s.jackson.JsonMethods._
 import org.mockito.ArgumentMatchers.{any, argThat}
 import org.mockito.Mockito.{doThrow, mock, spy, verify, when}
-import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
 
@@ -52,16 +51,21 @@ import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
 import org.apache.spark.util.{Clock, JsonProtocol, ManualClock, Utils}
 import org.apache.spark.util.logging.DriverLogger
 
-class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matchers with Logging {
+class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
 
   private var testDir: File = null
 
-  before {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
     testDir = Utils.createTempDir(namePrefix = s"a b%20c+d")
   }
 
-  after {
-    Utils.deleteRecursively(testDir)
+  override def afterEach(): Unit = {
+    try {
+      Utils.deleteRecursively(testDir)
+    } finally {
+      super.afterEach()
+    }
   }
 
   /** Create a fake log file using the new log format used in Spark 1.3+ */
@@ -733,7 +737,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
       provider.inSafeMode = false
       clock.setTime(10000)
 
-      eventually(timeout(1.second), interval(10.milliseconds)) {
+      eventually(timeout(3.second), interval(10.milliseconds)) {
         provider.getConfig().keys should not contain ("HDFS State")
       }
     } finally {
@@ -741,7 +745,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     }
   }
 
-  test("provider reports error after FS leaves safe mode") {
+  testRetry("provider reports error after FS leaves safe mode") {
     testDir.delete()
     val clock = new ManualClock()
     val provider = new SafeModeTestProvider(createTestConf(), clock)
@@ -751,7 +755,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
       provider.inSafeMode = false
       clock.setTime(10000)
 
-      eventually(timeout(1.second), interval(10.milliseconds)) {
+      eventually(timeout(3.second), interval(10.milliseconds)) {
         verify(errorHandler).uncaughtException(any(), any())
       }
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
index 123f7f49d21b..a6576e0d1c52 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
@@ -19,25 +19,23 @@ package org.apache.spark.scheduler
 
 import scala.collection.mutable
 
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
-
 import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite, TestUtils}
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 
 /**
  * Unit tests for SparkListener that require a local cluster.
  */
-class SparkListenerWithClusterSuite extends SparkFunSuite with LocalSparkContext
-  with BeforeAndAfter with BeforeAndAfterAll {
+class SparkListenerWithClusterSuite extends SparkFunSuite with LocalSparkContext {
 
   /** Length of time to wait while draining listener events. */
   val WAIT_TIMEOUT_MILLIS = 10000
 
-  before {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
     sc = new SparkContext("local-cluster[2,1,1024]", "SparkListenerSuite")
   }
 
-  test("SparkListener sends executor added message") {
+  testRetry("SparkListener sends executor added message") {
     val listener = new SaveExecutorInfo
     sc.addSparkListener(listener)
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 384a5f4e46b7..c7a99bf7f1cb 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -167,7 +167,7 @@ abstract class BaseYarnClusterSuite
 
     val handle = launcher.startApplication()
     try {
-      eventually(timeout(2.minutes), interval(1.second)) {
+      eventually(timeout(3.minutes), interval(1.second)) {
         assert(handle.getState().isFinal())
       }
     } finally {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index b072202b7ce3..c5142fa550bc 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -205,7 +205,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       .startApplication()
 
     try {
-      eventually(timeout(30.seconds), interval(100.milliseconds)) {
+      eventually(timeout(3.minutes), interval(100.milliseconds)) {
         handle.getState() should be (SparkAppHandle.State.RUNNING)
       }
 
@@ -213,7 +213,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       handle.getAppId() should startWith ("application_")
       handle.stop()
 
-      eventually(timeout(30.seconds), interval(100.milliseconds)) {
+      eventually(timeout(3.minutes), interval(100.milliseconds)) {
         handle.getState() should be (SparkAppHandle.State.KILLED)
       }
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index d845117b58c6..e3e5ddff9637 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -502,7 +502,8 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with
     }
 
     // Wait for listener to finish computing the metrics for the execution.
-    while (statusStore.executionsList().last.metricValues == null) {
+    while (statusStore.executionsList().isEmpty ||
+        statusStore.executionsList().last.metricValues == null) {
       Thread.sleep(100)
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 9235c6d7c896..33b4c08080d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -195,7 +195,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
 
   import testImplicits._
 
-  override val streamingTimeout = 20.seconds
+  override val streamingTimeout = 80.seconds
 
   /** Use `format` and `path` to create FileStreamSource via DataFrameReader */
   private def createFileStreamSource(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 89ce63635860..52b8e4732e80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -89,7 +89,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
   protected val defaultUseV2Sink = false
 
   /** How long to wait for an active stream to catch up when checking a result. */
-  val streamingTimeout = 10.seconds
+  val streamingTimeout = 60.seconds
 
   /** A trait for actions that can be performed while testing a streaming DataFrame. */
   trait StreamAction
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index b421809f62d8..b26d2556b2e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -23,7 +23,6 @@ import scala.concurrent.Future
 import scala.util.Random
 import scala.util.control.NonFatal
 
-import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.Span
 import org.scalatest.time.SpanSugar._
@@ -35,21 +34,26 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.streaming.util.BlockingSource
 import org.apache.spark.util.Utils
 
-class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
+class StreamingQueryManagerSuite extends StreamTest {
 
   import AwaitTerminationTester._
   import testImplicits._
 
   override val streamingTimeout = 20.seconds
 
-  before {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
     assert(spark.streams.active.isEmpty)
     spark.streams.resetTerminated()
   }
 
-  after {
-    assert(spark.streams.active.isEmpty)
-    spark.streams.resetTerminated()
+  override def afterEach(): Unit = {
+    try {
+      assert(spark.streams.active.isEmpty)
+      spark.streams.resetTerminated()
+    } finally {
+      super.afterEach()
+    }
   }
 
   testQuietly("listing") {
@@ -83,7 +87,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  testQuietly("awaitAnyTermination without timeout and resetTerminated") {
+  testRetry("awaitAnyTermination without timeout and resetTerminated") {
     val datasets = Seq.fill(5)(makeDataset._2)
     withQueriesOn(datasets: _*) { queries =>
       require(queries.size === datasets.size)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
index 48aa9e599504..6b664b7a7dfd 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -122,7 +122,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     }
 
     // Verify that stopping actually stops the thread
-    failAfter(100.milliseconds) {
+    failAfter(1.second) {
       receiver.stop("test")
       assert(receiver.isStopped)
       assert(!receiver.otherThread.isAlive)

From b7f9830670d3bf6c1f80c1a7310517dbc0052d1d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 25 Apr 2019 10:26:40 +0800
Subject: [PATCH 0959/1072] [MINOR][TEST] switch from 2.4.1 to 2.4.2 in
 HiveExternalCatalogVersionsSuite

## What changes were proposed in this pull request?

update `HiveExternalCatalogVersionsSuite` to test 2.4.2, as 2.4.1 will be removed from Mirror Network soon.

## How was this patch tested?

N/A

Closes #24452 from cloud-fan/release.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index d6a850d144bf..0a05ec5146ff 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -206,7 +206,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
 
 object PROCESS_TABLES extends QueryTest with SQLTestUtils {
   // Tests the latest version of every release line.
-  val testingVersions = Seq("2.3.3", "2.4.1")
+  val testingVersions = Seq("2.3.3", "2.4.2")
 
   protected var spark: SparkSession = _
 

From b1c6b60ce7f3538a369e0d406baa0fc39e5b0c63 Mon Sep 17 00:00:00 2001
From: Rob Vesse <rvesse@dotnetrdf.org>
Date: Wed, 24 Apr 2019 21:08:42 -0700
Subject: [PATCH 0960/1072] [SPARK-26729][K8S] Fix typo with default value for
 R image name

## What changes were proposed in this pull request?

As discovered by users making use of this feature, there is a bug in the
declaration of the `R_IMAGE_NAME` variable that causes the default name to
not be properly set to `spark-r` but rather to just `-r`

## How was this patch tested?

Verified that the image name for the R image is now appropriately populated in the integration test script via Bash debug output.

NB - The fact that this wasn't spotted earlier highlights the fact that currently the K8S integration test suite does not have any tests for the R image as if it had this would have failed integration testing in the original PR #23846

Closes #24449 from rvesse/SPARK-26729.

Authored-by: Rob Vesse <rvesse@dotnetrdf.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../integration-tests/dev/dev-run-integration-tests.sh          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
index fc17ce6e10cb..ecfe4058f559 100755
--- a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
+++ b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
@@ -142,7 +142,7 @@ fi
 BASE_IMAGE_NAME=${BASE_IMAGE_NAME:-spark}
 JVM_IMAGE_NAME=${JVM_IMAGE_NAME:-${BASE_IMAGE_NAME}}
 PYTHON_IMAGE_NAME=${PYTHON_IMAGE_NAME:-${BASE_IMAGE_NAME}-py}
-R_IMAGE_NAME=${R_IMAGE_NAME:-${R_IMAGE_NAME}-r}
+R_IMAGE_NAME=${R_IMAGE_NAME:-${BASE_IMAGE_NAME}-r}
 
 properties+=(
   -Dspark.kubernetes.test.jvmImage=$JVM_IMAGE_NAME

From f82ed5e8e0d2081db9894ae0ca737eddc4b0c390 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 24 Apr 2019 21:22:15 -0700
Subject: [PATCH 0961/1072] [MINOR][TEST] Remove out-dated hive version in
 run-tests.py

## What changes were proposed in this pull request?

```
========================================================================
Building Spark
========================================================================
[info] Building Spark (w/Hive 1.2.1) using SBT with these arguments:  -Phadoop-3.2 -Pkubernetes -Phive-thriftserver -Pkinesis-asl -Pyarn -Pspark-ganglia-lgpl -Phive -Pmesos test:package streaming-kinesis-asl-assembly/assembly
```

`(w/Hive 1.2.1)` is incorrect when testing hadoop-3.2, It's should be (w/Hive 2.3.4).
This pr removes `(w/Hive 1.2.1)` in run-tests.py.

## How was this patch tested?

N/A

Closes #24451 from wangyum/run-tests-invalid-info.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/run-tests.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 70dcb4a42e41..5211aea03ea4 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -297,8 +297,7 @@ def build_spark_maven(hadoop_version):
     mvn_goals = ["clean", "package", "-DskipTests"]
     profiles_and_goals = build_profiles + mvn_goals
 
-    print("[info] Building Spark (w/Hive 1.2.1) using Maven with these arguments: ",
-          " ".join(profiles_and_goals))
+    print("[info] Building Spark using Maven with these arguments: ", " ".join(profiles_and_goals))
 
     exec_maven(profiles_and_goals)
 
@@ -310,8 +309,7 @@ def build_spark_sbt(hadoop_version):
                  "streaming-kinesis-asl-assembly/assembly"]
     profiles_and_goals = build_profiles + sbt_goals
 
-    print("[info] Building Spark (w/Hive 1.2.1) using SBT with these arguments: ",
-          " ".join(profiles_and_goals))
+    print("[info] Building Spark using SBT with these arguments: ", " ".join(profiles_and_goals))
 
     exec_sbt(profiles_and_goals)
 
@@ -323,7 +321,7 @@ def build_spark_unidoc_sbt(hadoop_version):
     sbt_goals = ["unidoc"]
     profiles_and_goals = build_profiles + sbt_goals
 
-    print("[info] Building Spark unidoc (w/Hive 1.2.1) using SBT with these arguments: ",
+    print("[info] Building Spark unidoc using SBT with these arguments: ",
           " ".join(profiles_and_goals))
 
     exec_sbt(profiles_and_goals)
@@ -334,7 +332,7 @@ def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
     build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
     sbt_goals = ["assembly/package"]
     profiles_and_goals = build_profiles + sbt_goals
-    print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
+    print("[info] Building Spark assembly using SBT with these arguments: ",
           " ".join(profiles_and_goals))
     exec_sbt(profiles_and_goals)
 

From 8b8632652164aa10f72b529a9c9b4c513f0a4c86 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 25 Apr 2019 08:47:19 -0700
Subject: [PATCH 0962/1072] [SPARK-27551][SQL] Improve error message of
 mismatched types for CASE WHEN

## What changes were proposed in this pull request?

When there are mismatched types among cases or else values in case when expression, current error message is hard to read to figure out what and where the mismatch is.

This patch simply improves the error message for mismatched types for case when.

Before:
```scala
scala> spark.range(100).select(when('id === 1, array(struct('id * 123456789 + 123456789 as "x"))).otherwise(array(struct('id * 987654321 + 987654321 as
 "y"))))
org.apache.spark.sql.AnalysisException: cannot resolve 'CASE WHEN (`id` = CAST(1 AS BIGINT)) THEN array(named_struct('x', ((`id` * CAST(123456789 AS BI
GINT)) + CAST(123456789 AS BIGINT)))) ELSE array(named_struct('y', ((`id` * CAST(987654321 AS BIGINT)) + CAST(987654321 AS BIGINT)))) END' due to data
type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type;;
```

After:
```scala
scala> spark.range(100).select(when('id === 1, array(struct('id * 123456789 + 123456789 as "x"))).otherwise(array(struct('id * 987654321 + 987654321 as
 "y"))))
org.apache.spark.sql.AnalysisException: cannot resolve 'CASE WHEN (`id` = CAST(1 AS BIGINT)) THEN array(named_struct('x', ((`id` * CAST(123456789 AS BI
GINT)) + CAST(123456789 AS BIGINT)))) ELSE array(named_struct('y', ((`id` * CAST(987654321 AS BIGINT)) + CAST(987654321 AS BIGINT)))) END' due to data
type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN array<struct<x:bigint>> ELSE arr
ay<struct<y:bigint>> END;;
```

## How was this patch tested?

Added unit test.

Closes #24453 from viirya/SPARK-27551.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/conditionalExpressions.scala  |   6 +-
 .../ConditionalExpressionSuite.scala          |  26 ++++
 .../native/caseWhenCoercion.sql.out           | 140 +++++++++---------
 3 files changed, 101 insertions(+), 71 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index bed581a61b2d..f92c196250f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -148,8 +148,12 @@ case class CaseWhen(
             s"but the ${index + 1}th when expression's type is ${branches(index)._1}")
       }
     } else {
+      val branchesStr = branches.map(_._2.dataType).map(dt => s"WHEN ... THEN ${dt.catalogString}")
+        .mkString(" ")
+      val elseStr = elseValue.map(expr => s" ELSE ${expr.dataType.catalogString}").getOrElse("")
       TypeCheckResult.TypeCheckFailure(
-        "THEN and ELSE expressions should all be same type or coercible to a common type")
+        "THEN and ELSE expressions should all be same type or coercible to a common type," +
+          s" got CASE $branchesStr$elseStr END")
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
index 572116575776..6101d2dee1bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.types._
@@ -222,4 +223,29 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     CaseWhen(Seq((Literal.create(false, BooleanType), Literal(1))), Literal(-1)).genCode(ctx)
     assert(ctx.inlinedMutableStates.size == 1)
   }
+
+  test("SPARK-27551: informative error message of mismatched types for case when") {
+    val caseVal1 = Literal.create(
+      create_row(1),
+      StructType(Seq(StructField("x", IntegerType, false))))
+    val caseVal2 = Literal.create(
+      create_row(1),
+      StructType(Seq(StructField("y", IntegerType, false))))
+    val elseVal = Literal.create(
+      create_row(1),
+      StructType(Seq(StructField("z", IntegerType, false))))
+
+    val checkResult1 = CaseWhen(Seq((Literal.FalseLiteral, caseVal1),
+      (Literal.FalseLiteral, caseVal2))).checkInputDataTypes()
+    assert(checkResult1.isInstanceOf[TypeCheckResult.TypeCheckFailure])
+    assert(checkResult1.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
+      .contains("CASE WHEN ... THEN struct<x:int> WHEN ... THEN struct<y:int> END"))
+
+    val checkResult2 = CaseWhen(Seq((Literal.FalseLiteral, caseVal1),
+      (Literal.FalseLiteral, caseVal2)), Some(elseVal)).checkInputDataTypes()
+    assert(checkResult2.isInstanceOf[TypeCheckResult.TypeCheckFailure])
+    assert(checkResult2.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
+      .contains("CASE WHEN ... THEN struct<x:int> WHEN ... THEN struct<y:int> " +
+        "ELSE struct<z:int> END"))
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
index a739f8d73181..1e1cbc330414 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
@@ -80,7 +80,7 @@ SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2' as binary) END FROM
 struct<>
 -- !query 9 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE binary END; line 1 pos 7
 
 
 -- !query 10
@@ -89,7 +89,7 @@ SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query 10 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE boolean END; line 1 pos 7
 
 
 -- !query 11
@@ -98,7 +98,7 @@ SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query 11 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE timestamp END; line 1 pos 7
 
 
 -- !query 12
@@ -107,7 +107,7 @@ SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00' as
 struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE date END; line 1 pos 7
 
 
 -- !query 13
@@ -180,7 +180,7 @@ SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2' as binary) END FROM
 struct<>
 -- !query 21 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE binary END; line 1 pos 7
 
 
 -- !query 22
@@ -189,7 +189,7 @@ SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as boolean) END FROM
 struct<>
 -- !query 22 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE boolean END; line 1 pos 7
 
 
 -- !query 23
@@ -198,7 +198,7 @@ SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query 23 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE timestamp END; line 1 pos 7
 
 
 -- !query 24
@@ -207,7 +207,7 @@ SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00' a
 struct<>
 -- !query 24 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE date END; line 1 pos 7
 
 
 -- !query 25
@@ -280,7 +280,7 @@ SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query 33 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE binary END; line 1 pos 7
 
 
 -- !query 34
@@ -289,7 +289,7 @@ SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query 34 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE boolean END; line 1 pos 7
 
 
 -- !query 35
@@ -298,7 +298,7 @@ SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00.0' as t
 struct<>
 -- !query 35 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE timestamp END; line 1 pos 7
 
 
 -- !query 36
@@ -307,7 +307,7 @@ SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00' as dat
 struct<>
 -- !query 36 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE date END; line 1 pos 7
 
 
 -- !query 37
@@ -380,7 +380,7 @@ SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query 45 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE binary END; line 1 pos 7
 
 
 -- !query 46
@@ -389,7 +389,7 @@ SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query 46 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE boolean END; line 1 pos 7
 
 
 -- !query 47
@@ -398,7 +398,7 @@ SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00.0' a
 struct<>
 -- !query 47 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE timestamp END; line 1 pos 7
 
 
 -- !query 48
@@ -407,7 +407,7 @@ SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00' as
 struct<>
 -- !query 48 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE date END; line 1 pos 7
 
 
 -- !query 49
@@ -480,7 +480,7 @@ SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query 57 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE binary END; line 1 pos 7
 
 
 -- !query 58
@@ -489,7 +489,7 @@ SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query 58 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE boolean END; line 1 pos 7
 
 
 -- !query 59
@@ -498,7 +498,7 @@ SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00.0' as
 struct<>
 -- !query 59 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE timestamp END; line 1 pos 7
 
 
 -- !query 60
@@ -507,7 +507,7 @@ SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00' as d
 struct<>
 -- !query 60 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE date END; line 1 pos 7
 
 
 -- !query 61
@@ -580,7 +580,7 @@ SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query 69 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE binary END; line 1 pos 7
 
 
 -- !query 70
@@ -589,7 +589,7 @@ SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query 70 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE boolean END; line 1 pos 7
 
 
 -- !query 71
@@ -598,7 +598,7 @@ SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00.0' a
 struct<>
 -- !query 71 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE timestamp END; line 1 pos 7
 
 
 -- !query 72
@@ -607,7 +607,7 @@ SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00' as
 struct<>
 -- !query 72 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE date END; line 1 pos 7
 
 
 -- !query 73
@@ -680,7 +680,7 @@ SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2' as binary) EN
 struct<>
 -- !query 81 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE binary END; line 1 pos 7
 
 
 -- !query 82
@@ -689,7 +689,7 @@ SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as boolean) END
 struct<>
 -- !query 82 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE boolean END; line 1 pos 7
 
 
 -- !query 83
@@ -698,7 +698,7 @@ SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30
 struct<>
 -- !query 83 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE timestamp END; line 1 pos 7
 
 
 -- !query 84
@@ -707,7 +707,7 @@ SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30
 struct<>
 -- !query 84 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE date END; line 1 pos 7
 
 
 -- !query 85
@@ -780,7 +780,7 @@ SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query 93 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN string ELSE binary END; line 1 pos 7
 
 
 -- !query 94
@@ -789,7 +789,7 @@ SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query 94 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN string ELSE boolean END; line 1 pos 7
 
 
 -- !query 95
@@ -814,7 +814,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as tinyint) END FROM
 struct<>
 -- !query 97 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE tinyint END; line 1 pos 7
 
 
 -- !query 98
@@ -823,7 +823,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as smallint) END FROM
 struct<>
 -- !query 98 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE smallint END; line 1 pos 7
 
 
 -- !query 99
@@ -832,7 +832,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as int) END FROM t
 struct<>
 -- !query 99 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE int END; line 1 pos 7
 
 
 -- !query 100
@@ -841,7 +841,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as bigint) END FROM t
 struct<>
 -- !query 100 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE bigint END; line 1 pos 7
 
 
 -- !query 101
@@ -850,7 +850,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as float) END FROM t
 struct<>
 -- !query 101 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE float END; line 1 pos 7
 
 
 -- !query 102
@@ -859,7 +859,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as double) END FROM t
 struct<>
 -- !query 102 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE double END; line 1 pos 7
 
 
 -- !query 103
@@ -868,7 +868,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as decimal(10, 0)) EN
 struct<>
 -- !query 103 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE decimal(10,0) END; line 1 pos 7
 
 
 -- !query 104
@@ -877,7 +877,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END FROM t
 struct<>
 -- !query 104 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE string END; line 1 pos 7
 
 
 -- !query 105
@@ -894,7 +894,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as boolean) END FROM
 struct<>
 -- !query 106 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE boolean END; line 1 pos 7
 
 
 -- !query 107
@@ -903,7 +903,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query 107 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE timestamp END; line 1 pos 7
 
 
 -- !query 108
@@ -912,7 +912,7 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00' a
 struct<>
 -- !query 108 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE date END; line 1 pos 7
 
 
 -- !query 109
@@ -921,7 +921,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as tinyint) END FROM t
 struct<>
 -- !query 109 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE tinyint END; line 1 pos 7
 
 
 -- !query 110
@@ -930,7 +930,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as smallint) END FROM
 struct<>
 -- !query 110 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE smallint END; line 1 pos 7
 
 
 -- !query 111
@@ -939,7 +939,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as int) END FROM t
 struct<>
 -- !query 111 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE int END; line 1 pos 7
 
 
 -- !query 112
@@ -948,7 +948,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as bigint) END FROM t
 struct<>
 -- !query 112 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE bigint END; line 1 pos 7
 
 
 -- !query 113
@@ -957,7 +957,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as float) END FROM t
 struct<>
 -- !query 113 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE float END; line 1 pos 7
 
 
 -- !query 114
@@ -966,7 +966,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as double) END FROM t
 struct<>
 -- !query 114 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE double END; line 1 pos 7
 
 
 -- !query 115
@@ -975,7 +975,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as decimal(10, 0)) END
 struct<>
 -- !query 115 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE decimal(10,0) END; line 1 pos 7
 
 
 -- !query 116
@@ -984,7 +984,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END FROM t
 struct<>
 -- !query 116 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE string END; line 1 pos 7
 
 
 -- !query 117
@@ -993,7 +993,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2' as binary) END FROM
 struct<>
 -- !query 117 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE binary END; line 1 pos 7
 
 
 -- !query 118
@@ -1010,7 +1010,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query 119 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE timestamp END; line 1 pos 7
 
 
 -- !query 120
@@ -1019,7 +1019,7 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00' as
 struct<>
 -- !query 120 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE date END; line 1 pos 7
 
 
 -- !query 121
@@ -1028,7 +1028,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 121 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE tinyint END; line 1 pos 7
 
 
 -- !query 122
@@ -1037,7 +1037,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 122 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE smallint END; line 1 pos 7
 
 
 -- !query 123
@@ -1046,7 +1046,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 123 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE int END; line 1 pos 7
 
 
 -- !query 124
@@ -1055,7 +1055,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 124 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE bigint END; line 1 pos 7
 
 
 -- !query 125
@@ -1064,7 +1064,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 125 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE float END; line 1 pos 7
 
 
 -- !query 126
@@ -1073,7 +1073,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 126 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE double END; line 1 pos 7
 
 
 -- !query 127
@@ -1082,7 +1082,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 127 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE decimal(10,0) END; line 1 pos 7
 
 
 -- !query 128
@@ -1099,7 +1099,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 129 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE binary END; line 1 pos 7
 
 
 -- !query 130
@@ -1108,7 +1108,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query 130 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE boolean END; line 1 pos 7
 
 
 -- !query 131
@@ -1133,7 +1133,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as ti
 struct<>
 -- !query 133 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE tinyint END; line 1 pos 7
 
 
 -- !query 134
@@ -1142,7 +1142,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as sm
 struct<>
 -- !query 134 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE smallint END; line 1 pos 7
 
 
 -- !query 135
@@ -1151,7 +1151,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as in
 struct<>
 -- !query 135 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE int END; line 1 pos 7
 
 
 -- !query 136
@@ -1160,7 +1160,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as bi
 struct<>
 -- !query 136 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE bigint END; line 1 pos 7
 
 
 -- !query 137
@@ -1169,7 +1169,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as fl
 struct<>
 -- !query 137 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE float END; line 1 pos 7
 
 
 -- !query 138
@@ -1178,7 +1178,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as do
 struct<>
 -- !query 138 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE double END; line 1 pos 7
 
 
 -- !query 139
@@ -1187,7 +1187,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as de
 struct<>
 -- !query 139 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE decimal(10,0) END; line 1 pos 7
 
 
 -- !query 140
@@ -1204,7 +1204,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2' as
 struct<>
 -- !query 141 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE binary END; line 1 pos 7
 
 
 -- !query 142
@@ -1213,7 +1213,7 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as bo
 struct<>
 -- !query 142 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE boolean END; line 1 pos 7
 
 
 -- !query 143

From d5dbf053d39d7348c9d552b26328752796dc378c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 25 Apr 2019 18:38:52 -0700
Subject: [PATCH 0963/1072] Revert "[SPARK-27439][SQL] Use analyzed plan when
 explaining Dataset"

This reverts commit ad60c6d9be3234a0296d1620129d5ca108f0876b.
---
 .../scala/org/apache/spark/sql/Dataset.scala  |  5 +----
 .../org/apache/spark/sql/DataFrameSuite.scala | 19 +------------------
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index e974912535b8..793714ff1113 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -498,10 +498,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def explain(extended: Boolean): Unit = {
-    // Because views are possibly resolved in the analyzed plan of this dataset. We use analyzed
-    // plan in `ExplainCommand`, for consistency. Otherwise, the plans shown by explain command
-    // might be inconsistent with the evaluated data of this dataset.
-    val explain = ExplainCommand(queryExecution.analyzed, extended = extended)
+    val explain = ExplainCommand(queryExecution.logical, extended = extended)
     sparkSession.sessionState.executePlan(explain).executedPlan.executeCollect().foreach {
       // scalastyle:off println
       r => println(r.getString(0))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 62fcca4cdae9..8a9c526b6b9d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.io.{ByteArrayOutputStream, File}
+import java.io.File
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.util.UUID
@@ -2133,21 +2133,4 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       checkAnswer(res, Row("1-1", 6, 6))
     }
   }
-
-  test("SPARK-27439: Explain result should match collected result after view change") {
-    withTempView("test", "test2", "tmp") {
-      spark.range(10).createOrReplaceTempView("test")
-      spark.range(5).createOrReplaceTempView("test2")
-      spark.sql("select * from test").createOrReplaceTempView("tmp")
-      val df = spark.sql("select * from tmp")
-      spark.sql("select * from test2").createOrReplaceTempView("tmp")
-
-      val captured = new ByteArrayOutputStream()
-      Console.withOut(captured) {
-        df.explain()
-      }
-      checkAnswer(df, spark.range(10).toDF)
-      assert(captured.toString().contains("Range (0, 10, step=1, splits=2)"))
-    }
-  }
 }

From d2656aaecd4a7b5562d8d2065aaa66fdc72d253d Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Fri, 26 Apr 2019 14:25:31 +0800
Subject: [PATCH 0964/1072] [SPARK-27494][SS] Null values don't work in Kafka
 source v2

## What changes were proposed in this pull request?

Right now Kafka source v2 doesn't support null values. The issue is in org.apache.spark.sql.kafka010.KafkaRecordToUnsafeRowConverter.toUnsafeRow which doesn't handle null values.

## How was this patch tested?

add new unit tests

Closes #24441 from uncleGen/SPARK-27494.

Authored-by: uncleGen <hustyugm@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../KafkaRecordToUnsafeRowConverter.scala     |  7 ++-
 .../kafka010/KafkaContinuousSourceSuite.scala |  4 ++
 .../kafka010/KafkaMicroBatchSourceSuite.scala | 58 +++++++++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala
index f35a143e0037..306ef10b775a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala
@@ -30,13 +30,18 @@ private[kafka010] class KafkaRecordToUnsafeRowConverter {
 
   def toUnsafeRow(record: ConsumerRecord[Array[Byte], Array[Byte]]): UnsafeRow = {
     rowWriter.reset()
+    rowWriter.zeroOutNullBytes()
 
     if (record.key == null) {
       rowWriter.setNullAt(0)
     } else {
       rowWriter.write(0, record.key)
     }
-    rowWriter.write(1, record.value)
+    if (record.value == null) {
+      rowWriter.setNullAt(1)
+    } else {
+      rowWriter.write(1, record.value)
+    }
     rowWriter.write(2, UTF8String.fromString(record.topic))
     rowWriter.write(3, record.partition)
     rowWriter.write(4, record.offset)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
index be0cea212f50..9b3e78c84c34 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
@@ -169,6 +169,10 @@ class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuo
       }
     }
   }
+
+  test("SPARK-27494: read kafka record containing null key/values.") {
+    testNullableKeyValue(ContinuousTrigger(100))
+  }
 }
 
 class KafkaContinuousSourceTopicDeletionSuite extends KafkaContinuousTest {
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 21634ae2abfa..b98f8e97db2e 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -1040,6 +1040,10 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
       q.stop()
     }
   }
+
+  test("SPARK-27494: read kafka record containing null key/values.") {
+    testNullableKeyValue(Trigger.ProcessingTime(100))
+  }
 }
 
 
@@ -1511,6 +1515,60 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       CheckAnswer(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)
     )
   }
+
+  protected def testNullableKeyValue(trigger: Trigger): Unit = {
+    val table = "kafka_null_key_value_source_test"
+    withTable(table) {
+      val topic = newTopic()
+      testUtils.createTopic(topic)
+      testUtils.withTranscationalProducer { producer =>
+        val df = spark
+          .readStream
+          .format("kafka")
+          .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+          .option("kafka.isolation.level", "read_committed")
+          .option("startingOffsets", "earliest")
+          .option("subscribe", topic)
+          .load()
+          .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+          .as[(String, String)]
+
+        val q = df
+          .writeStream
+          .format("memory")
+          .queryName(table)
+          .trigger(trigger)
+          .start()
+        try {
+          var idx = 0
+          producer.beginTransaction()
+          val expected1 = Seq.tabulate(5) { _ =>
+            producer.send(new ProducerRecord[String, String](topic, null, null)).get()
+            (null, null)
+          }.asInstanceOf[Seq[(String, String)]]
+
+          val expected2 = Seq.tabulate(5) { _ =>
+            idx += 1
+            producer.send(new ProducerRecord[String, String](topic, idx.toString, null)).get()
+            (idx.toString, null)
+          }.asInstanceOf[Seq[(String, String)]]
+
+          val expected3 = Seq.tabulate(5) { _ =>
+            idx += 1
+            producer.send(new ProducerRecord[String, String](topic, null, idx.toString)).get()
+            (null, idx.toString)
+          }.asInstanceOf[Seq[(String, String)]]
+
+          producer.commitTransaction()
+          eventually(timeout(streamingTimeout)) {
+            checkAnswer(spark.table(table), (expected1 ++ expected2 ++ expected3).toDF())
+          }
+        } finally {
+          q.stop()
+        }
+      }
+    }
+  }
 }
 
 object KafkaSourceSuite {

From 2234667b159bf19a68758da3ff20cfae3c058c25 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 26 Apr 2019 16:37:43 +0900
Subject: [PATCH 0965/1072] [SPARK-27563][SQL][TEST] automatically get the
 latest Spark versions in HiveExternalCatalogVersionsSuite

## What changes were proposed in this pull request?

We can get the latest downloadable Spark versions from https://dist.apache.org/repos/dist/release/spark/

## How was this patch tested?

manually.

Closes #24454 from cloud-fan/test.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../HiveExternalCatalogVersionsSuite.scala    | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 0a05ec5146ff..ec10295358d5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, Paths}
 
 import scala.sys.process._
+import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
 
@@ -169,6 +170,10 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
       """.stripMargin.getBytes("utf8"))
     // scalastyle:on line.size.limit
 
+    if (PROCESS_TABLES.testingVersions.isEmpty) {
+      fail("Fail to get the lates Spark versions to test.")
+    }
+
     PROCESS_TABLES.testingVersions.zipWithIndex.foreach { case (version, index) =>
       val sparkHome = new File(sparkTestingDir, s"spark-$version")
       if (!sparkHome.exists()) {
@@ -206,7 +211,19 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
 
 object PROCESS_TABLES extends QueryTest with SQLTestUtils {
   // Tests the latest version of every release line.
-  val testingVersions = Seq("2.3.3", "2.4.2")
+  val testingVersions: Seq[String] = {
+    import scala.io.Source
+    try {
+      Source.fromURL("https://dist.apache.org/repos/dist/release/spark/").mkString
+        .split("\n")
+        .filter(_.contains("""<li><a href="spark-"""))
+        .map("""<a href="spark-(\d.\d.\d)/">""".r.findFirstMatchIn(_).get.group(1))
+        .filter(_ < org.apache.spark.SPARK_VERSION)
+    } catch {
+      // do not throw exception during object initialization.
+      case NonFatal(_) => Nil
+    }
+  }
 
   protected var spark: SparkSession = _
 

From 85fd552ed6304967f25574baef3cf9657957bcb1 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 26 Apr 2019 15:44:23 +0800
Subject: [PATCH 0966/1072] [SPARK-27190][SQL] add table capability for
 streaming

## What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/24012 , to add the corresponding capabilities for streaming.

## How was this patch tested?

existing tests

Closes #24129 from cloud-fan/capability.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/kafka010/KafkaSourceProvider.scala    |  11 +-
 .../sources/v2/SupportsContinuousRead.java    |  35 -----
 .../sources/v2/SupportsMicroBatchRead.java    |  35 -----
 .../sources/v2/SupportsStreamingWrite.java    |  34 -----
 .../spark/sql/sources/v2/TableCapability.java |  19 +++
 .../spark/sql/sources/v2/reader/Scan.java     |  10 +-
 .../datasources/noop/NoopDataSource.scala     |   7 +-
 .../v2/V2StreamingScanSupportCheck.scala      |  64 +++++++++
 .../streaming/MicroBatchExecution.scala       |  61 ++++----
 .../execution/streaming/StreamExecution.scala |   4 +-
 .../sql/execution/streaming/console.scala     |   9 +-
 .../continuous/ContinuousExecution.scala      |  22 +--
 .../sql/execution/streaming/memory.scala      |   9 +-
 .../sources/ForeachWriterTable.scala          |  12 +-
 .../sources/RateStreamProvider.scala          |   9 +-
 .../sources/TextSocketSourceProvider.scala    |   9 +-
 .../streaming/sources/memoryV2.scala          |  10 +-
 .../internal/BaseSessionStateBuilder.scala    |   3 +-
 .../sql/streaming/DataStreamReader.scala      |   5 +-
 .../sql/streaming/DataStreamWriter.scala      |   7 +-
 .../sql/streaming/StreamingQueryManager.scala |   6 +-
 .../v2/V2StreamingScanSupportCheckSuite.scala | 130 ++++++++++++++++++
 .../sources/StreamingDataSourceV2Suite.scala  | 106 ++++++++------
 .../sql/hive/HiveSessionStateBuilder.scala    |   3 +-
 24 files changed, 389 insertions(+), 231 deletions(-)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2StreamingScanSupportCheck.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2StreamingScanSupportCheckSuite.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index f7a20326f6e4..bb76a30b3881 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
-import java.util.{Collections, Locale, UUID}
+import java.util.{Locale, UUID}
 
 import scala.collection.JavaConverters._
 
@@ -29,9 +29,10 @@ import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySe
 import org.apache.spark.internal.Logging
 import org.apache.spark.kafka010.KafkaConfigUpdater
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
-import org.apache.spark.sql.execution.streaming.{Sink, Source}
+import org.apache.spark.sql.execution.streaming.{BaseStreamingSink, Sink, Source}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.sources.v2.writer.WriteBuilder
@@ -353,13 +354,15 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
   }
 
   class KafkaTable(strategy: => ConsumerStrategy) extends Table
-    with SupportsMicroBatchRead with SupportsContinuousRead with SupportsStreamingWrite {
+    with SupportsRead with SupportsWrite with BaseStreamingSink {
 
     override def name(): String = s"Kafka $strategy"
 
     override def schema(): StructType = KafkaOffsetReader.kafkaSchema
 
-    override def capabilities(): ju.Set[TableCapability] = Collections.emptySet()
+    override def capabilities(): ju.Set[TableCapability] = {
+      Set(MICRO_BATCH_READ, CONTINUOUS_READ, STREAMING_WRITE).asJava
+    }
 
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder =
       () => new KafkaScan(options)
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
deleted file mode 100644
index 5cc9848d9da8..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsContinuousRead.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
-import org.apache.spark.sql.util.CaseInsensitiveStringMap;
-
-/**
- * An empty mix-in interface for {@link Table}, to indicate this table supports streaming scan with
- * continuous mode.
- * <p>
- * If a {@link Table} implements this interface, the
- * {@link SupportsRead#newScanBuilder(CaseInsensitiveStringMap)} must return a {@link ScanBuilder}
- * that builds {@link Scan} with {@link Scan#toContinuousStream(String)} implemented.
- * </p>
- */
-@Evolving
-public interface SupportsContinuousRead extends SupportsRead { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
deleted file mode 100644
index c98f3f1aa5cb..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsMicroBatchRead.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
-import org.apache.spark.sql.util.CaseInsensitiveStringMap;
-
-/**
- * An empty mix-in interface for {@link Table}, to indicate this table supports streaming scan with
- * micro-batch mode.
- * <p>
- * If a {@link Table} implements this interface, the
- * {@link SupportsRead#newScanBuilder(CaseInsensitiveStringMap)} must return a {@link ScanBuilder}
- * that builds {@link Scan} with {@link Scan#toMicroBatchStream(String)} implemented.
- * </p>
- */
-@Evolving
-public interface SupportsMicroBatchRead extends SupportsRead { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
deleted file mode 100644
index ac11e483c18c..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsStreamingWrite.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2;
-
-import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.streaming.BaseStreamingSink;
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
-import org.apache.spark.sql.util.CaseInsensitiveStringMap;
-
-/**
- * An empty mix-in interface for {@link Table}, to indicate this table supports streaming write.
- * <p>
- * If a {@link Table} implements this interface, the
- * {@link SupportsWrite#newWriteBuilder(CaseInsensitiveStringMap)} must return a
- * {@link WriteBuilder} with {@link WriteBuilder#buildForStreaming()} implemented.
- * </p>
- */
-@Evolving
-public interface SupportsStreamingWrite extends SupportsWrite, BaseStreamingSink { }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
index 8d3fdcd694e2..4640c61faeb7 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
@@ -33,6 +33,16 @@ public enum TableCapability {
    */
   BATCH_READ,
 
+  /**
+   * Signals that the table supports reads in micro-batch streaming execution mode.
+   */
+  MICRO_BATCH_READ,
+
+  /**
+   * Signals that the table supports reads in continuous streaming execution mode.
+   */
+  CONTINUOUS_READ,
+
   /**
    * Signals that the table supports append writes in batch execution mode.
    * <p>
@@ -42,6 +52,15 @@ public enum TableCapability {
    */
   BATCH_WRITE,
 
+  /**
+   * Signals that the table supports append writes in streaming execution mode.
+   * <p>
+   * Tables that return this capability must support appending data and may also support additional
+   * write modes, like {@link #TRUNCATE}, {@link #OVERWRITE_BY_FILTER}, and
+   * {@link #OVERWRITE_DYNAMIC}.
+   */
+  STREAMING_WRITE,
+
   /**
    * Signals that the table can be truncated in a write operation.
    * <p>
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
index f6085b933c65..c3964e2176d4 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
@@ -21,8 +21,6 @@
 import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousStream;
 import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream;
 import org.apache.spark.sql.types.StructType;
-import org.apache.spark.sql.sources.v2.SupportsContinuousRead;
-import org.apache.spark.sql.sources.v2.SupportsMicroBatchRead;
 import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.sources.v2.TableCapability;
 
@@ -74,8 +72,8 @@ default Batch toBatch() {
   /**
    * Returns the physical representation of this scan for streaming query with micro-batch mode. By
    * default this method throws exception, data sources must overwrite this method to provide an
-   * implementation, if the {@link Table} that creates this scan implements
-   * {@link SupportsMicroBatchRead}.
+   * implementation, if the {@link Table} that creates this scan returns
+   * {@link TableCapability#MICRO_BATCH_READ} support in its {@link Table#capabilities()}.
    *
    * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
    *                           recovery. Data streams for the same logical source in the same query
@@ -90,8 +88,8 @@ default MicroBatchStream toMicroBatchStream(String checkpointLocation) {
   /**
    * Returns the physical representation of this scan for streaming query with continuous mode. By
    * default this method throws exception, data sources must overwrite this method to provide an
-   * implementation, if the {@link Table} that creates this scan implements
-   * {@link SupportsContinuousRead}.
+   * implementation, if the {@link Table} that creates this scan returns
+   * {@link TableCapability#CONTINUOUS_READ} support in its {@link Table#capabilities()}.
    *
    * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
    *                           recovery. Data streams for the same logical source in the same query
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index 96a78d3a0da2..c8b2f65ca62e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.streaming.BaseStreamingSink
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer._
@@ -39,11 +40,13 @@ class NoopDataSource extends TableProvider with DataSourceRegister {
   override def getTable(options: CaseInsensitiveStringMap): Table = NoopTable
 }
 
-private[noop] object NoopTable extends Table with SupportsWrite with SupportsStreamingWrite {
+private[noop] object NoopTable extends Table with SupportsWrite with BaseStreamingSink {
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = NoopWriteBuilder
   override def name(): String = "noop-table"
   override def schema(): StructType = new StructType()
-  override def capabilities(): util.Set[TableCapability] = Set(TableCapability.BATCH_WRITE).asJava
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.BATCH_WRITE, TableCapability.STREAMING_WRITE).asJava
+  }
 }
 
 private[noop] object NoopWriteBuilder extends WriteBuilder
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2StreamingScanSupportCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2StreamingScanSupportCheck.scala
new file mode 100644
index 000000000000..c029acc0bb2d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2StreamingScanSupportCheck.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
+import org.apache.spark.sql.sources.v2.TableCapability.{CONTINUOUS_READ, MICRO_BATCH_READ}
+
+/**
+ * This rules adds some basic table capability check for streaming scan, without knowing the actual
+ * streaming execution mode.
+ */
+object V2StreamingScanSupportCheck extends (LogicalPlan => Unit) {
+  import DataSourceV2Implicits._
+
+  override def apply(plan: LogicalPlan): Unit = {
+    plan.foreach {
+      case r: StreamingRelationV2 if !r.table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ) =>
+        throw new AnalysisException(
+          s"Table ${r.table.name()} does not support either micro-batch or continuous scan.")
+      case _ =>
+    }
+
+    val streamingSources = plan.collect {
+      case r: StreamingRelationV2 => r.table
+    }
+    val v1StreamingRelations = plan.collect {
+      case r: StreamingRelation => r
+    }
+
+    if (streamingSources.length + v1StreamingRelations.length > 1) {
+      val allSupportsMicroBatch = streamingSources.forall(_.supports(MICRO_BATCH_READ))
+      // v1 streaming data source only supports micro-batch.
+      val allSupportsContinuous = streamingSources.forall(_.supports(CONTINUOUS_READ)) &&
+        v1StreamingRelations.isEmpty
+      if (!allSupportsMicroBatch && !allSupportsContinuous) {
+        val microBatchSources =
+          streamingSources.filter(_.supports(MICRO_BATCH_READ)).map(_.name()) ++
+            v1StreamingRelations.map(_.sourceName)
+        val continuousSources = streamingSources.filter(_.supports(CONTINUOUS_READ)).map(_.name())
+        throw new AnalysisException(
+          "The streaming sources in a query do not have a common supported execution mode.\n" +
+            "Sources support micro-batch: " + microBatchSources.mkString(", ") + "\n" +
+            "Sources support continuous: " + continuousSources.mkString(", "))
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index fdd80ccaf052..d9fe836b1c49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable.{Map => MutableMap}
 
 import org.apache.spark.sql.{Dataset, SparkSession}
@@ -78,6 +77,7 @@ class MicroBatchExecution(
     val disabledSources =
       sparkSession.sqlContext.conf.disabledV2StreamingMicroBatchReaders.split(",")
 
+    import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
     val _logicalPlan = analyzedPlan.transform {
       case streamingRelation@StreamingRelation(dataSourceV1, sourceName, output) =>
         toExecutionRelationMap.getOrElseUpdate(streamingRelation, {
@@ -88,31 +88,33 @@ class MicroBatchExecution(
           logInfo(s"Using Source [$source] from DataSourceV1 named '$sourceName' [$dataSourceV1]")
           StreamingExecutionRelation(source, output)(sparkSession)
         })
-      case s @ StreamingRelationV2(ds, dsName, table: SupportsMicroBatchRead, options, output, _)
-          if !disabledSources.contains(ds.getClass.getCanonicalName) =>
-        v2ToRelationMap.getOrElseUpdate(s, {
-          // Materialize source to avoid creating it in every batch
-          val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
-          nextSourceId += 1
-          logInfo(s"Reading table [$table] from DataSourceV2 named '$dsName' [$ds]")
-          // TODO: operator pushdown.
-          val scan = table.newScanBuilder(options).build()
-          val stream = scan.toMicroBatchStream(metadataPath)
-          StreamingDataSourceV2Relation(output, scan, stream)
-        })
-      case s @ StreamingRelationV2(ds, dsName, _, _, output, v1Relation) =>
-        v2ToExecutionRelationMap.getOrElseUpdate(s, {
-          // Materialize source to avoid creating it in every batch
-          val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
-          if (v1Relation.isEmpty) {
-            throw new UnsupportedOperationException(
-              s"Data source $dsName does not support microbatch processing.")
-          }
-          val source = v1Relation.get.dataSource.createSource(metadataPath)
-          nextSourceId += 1
-          logInfo(s"Using Source [$source] from DataSourceV2 named '$dsName' [$ds]")
-          StreamingExecutionRelation(source, output)(sparkSession)
-        })
+
+      case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output, v1) =>
+        val v2Disabled = disabledSources.contains(src.getClass.getCanonicalName)
+        if (!v2Disabled && table.supports(TableCapability.MICRO_BATCH_READ)) {
+          v2ToRelationMap.getOrElseUpdate(s, {
+            // Materialize source to avoid creating it in every batch
+            val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
+            nextSourceId += 1
+            logInfo(s"Reading table [$table] from DataSourceV2 named '$srcName' [$src]")
+            // TODO: operator pushdown.
+            val scan = table.newScanBuilder(options).build()
+            val stream = scan.toMicroBatchStream(metadataPath)
+            StreamingDataSourceV2Relation(output, scan, stream)
+          })
+        } else if (v1.isEmpty) {
+          throw new UnsupportedOperationException(
+            s"Data source $srcName does not support microbatch processing.")
+        } else {
+          v2ToExecutionRelationMap.getOrElseUpdate(s, {
+            // Materialize source to avoid creating it in every batch
+            val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
+            val source = v1.get.dataSource.createSource(metadataPath)
+            nextSourceId += 1
+            logInfo(s"Using Source [$source] from DataSourceV2 named '$srcName' [$src]")
+            StreamingExecutionRelation(source, output)(sparkSession)
+          })
+        }
     }
     sources = _logicalPlan.collect {
       // v1 source
@@ -122,8 +124,9 @@ class MicroBatchExecution(
     }
     uniqueSources = sources.distinct
 
+    // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
     sink match {
-      case s: SupportsStreamingWrite =>
+      case s: SupportsWrite =>
         val streamingWrite = createStreamingWrite(s, extraOptions, _logicalPlan)
         WriteToMicroBatchDataSource(streamingWrite, _logicalPlan)
 
@@ -519,7 +522,7 @@ class MicroBatchExecution(
 
     val triggerLogicalPlan = sink match {
       case _: Sink => newAttributePlan
-      case _: SupportsStreamingWrite =>
+      case _: SupportsWrite =>
         newAttributePlan.asInstanceOf[WriteToMicroBatchDataSource].createPlan(currentBatchId)
       case _ => throw new IllegalArgumentException(s"unknown sink type for $sink")
     }
@@ -550,7 +553,7 @@ class MicroBatchExecution(
       SQLExecution.withNewExecutionId(sparkSessionToRunBatch, lastExecution) {
         sink match {
           case s: Sink => s.addBatch(currentBatchId, nextBatch)
-          case _: SupportsStreamingWrite =>
+          case _: SupportsWrite =>
             // This doesn't accumulate any data - it just forces execution of the microbatch writer.
             nextBatch.collect()
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index cc441937ce70..fd959619650e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.SupportsStreamingWrite
+import org.apache.spark.sql.sources.v2.SupportsWrite
 import org.apache.spark.sql.sources.v2.writer.SupportsTruncate
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming._
@@ -582,7 +582,7 @@ abstract class StreamExecution(
   }
 
   protected def createStreamingWrite(
-      table: SupportsStreamingWrite,
+      table: SupportsWrite,
       options: Map[String, String],
       inputPlan: LogicalPlan): StreamingWrite = {
     val writeBuilder = table.newWriteBuilder(new CaseInsensitiveStringMap(options.asJava))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index 884b92ae9421..c7161d311c02 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.streaming
 
 import java.util
-import java.util.Collections
+
+import scala.collection.JavaConverters._
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming.sources.ConsoleWrite
@@ -60,13 +61,15 @@ class ConsoleSinkProvider extends TableProvider
   def shortName(): String = "console"
 }
 
-object ConsoleTable extends Table with SupportsStreamingWrite {
+object ConsoleTable extends Table with SupportsWrite with BaseStreamingSink {
 
   override def name(): String = "console"
 
   override def schema(): StructType = StructType(Nil)
 
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.STREAMING_WRITE).asJava
+  }
 
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 4c13e722e828..a1fb212fd433 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming.{StreamingRelationV2, _}
 import org.apache.spark.sql.sources.v2
-import org.apache.spark.sql.sources.v2.{SupportsContinuousRead, SupportsStreamingWrite}
+import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, TableCapability}
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
@@ -42,14 +42,14 @@ class ContinuousExecution(
     name: String,
     checkpointRoot: String,
     analyzedPlan: LogicalPlan,
-    sink: SupportsStreamingWrite,
+    sink: SupportsWrite,
     trigger: Trigger,
     triggerClock: Clock,
     outputMode: OutputMode,
     extraOptions: Map[String, String],
     deleteCheckpointOnStop: Boolean)
   extends StreamExecution(
-    sparkSession, name, checkpointRoot, analyzedPlan, sink,
+    sparkSession, name, checkpointRoot, analyzedPlan, sink.asInstanceOf[BaseStreamingSink],
     trigger, triggerClock, outputMode, deleteCheckpointOnStop) {
 
   @volatile protected var sources: Seq[ContinuousStream] = Seq()
@@ -63,22 +63,23 @@ class ContinuousExecution(
   override val logicalPlan: WriteToContinuousDataSource = {
     val v2ToRelationMap = MutableMap[StreamingRelationV2, StreamingDataSourceV2Relation]()
     var nextSourceId = 0
+    import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
     val _logicalPlan = analyzedPlan.transform {
-      case s @ StreamingRelationV2(
-          ds, dsName, table: SupportsContinuousRead, options, output, _) =>
+      case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output, _) =>
+        if (!table.supports(TableCapability.CONTINUOUS_READ)) {
+          throw new UnsupportedOperationException(
+            s"Data source $sourceName does not support continuous processing.")
+        }
+
         v2ToRelationMap.getOrElseUpdate(s, {
           val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
           nextSourceId += 1
-          logInfo(s"Reading table [$table] from DataSourceV2 named '$dsName' [$ds]")
+          logInfo(s"Reading table [$table] from DataSourceV2 named '$sourceName' [$ds]")
           // TODO: operator pushdown.
           val scan = table.newScanBuilder(options).build()
           val stream = scan.toContinuousStream(metadataPath)
           StreamingDataSourceV2Relation(output, scan, stream)
         })
-
-      case StreamingRelationV2(_, sourceName, _, _, _, _) =>
-        throw new UnsupportedOperationException(
-          s"Data source $sourceName does not support continuous processing.")
     }
 
     sources = _logicalPlan.collect {
@@ -86,6 +87,7 @@ class ContinuousExecution(
     }
     uniqueSources = sources.distinct
 
+    // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
     WriteToContinuousDataSource(
       createStreamingWrite(sink, extraOptions, _logicalPlan), _logicalPlan)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index bfa9c0998550..0dcbdd3a1fd2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.execution.streaming
 
 import java.util
-import java.util.Collections
 import java.util.concurrent.atomic.AtomicInteger
 import javax.annotation.concurrent.GuardedBy
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 import scala.util.control.NonFatal
 
@@ -92,14 +92,15 @@ object MemoryStreamTableProvider extends TableProvider {
   }
 }
 
-class MemoryStreamTable(val stream: MemoryStreamBase[_]) extends Table
-  with SupportsMicroBatchRead with SupportsContinuousRead {
+class MemoryStreamTable(val stream: MemoryStreamBase[_]) extends Table with SupportsRead {
 
   override def name(): String = "MemoryStreamDataSource"
 
   override def schema(): StructType = stream.fullSchema()
 
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.MICRO_BATCH_READ, TableCapability.CONTINUOUS_READ).asJava
+  }
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
     new MemoryStreamScanBuilder(stream)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index 807e0b12c627..838ede6c563f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -18,14 +18,16 @@
 package org.apache.spark.sql.execution.streaming.sources
 
 import java.util
-import java.util.Collections
+
+import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.{ForeachWriter, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.python.PythonForeachWriter
-import org.apache.spark.sql.sources.v2.{SupportsStreamingWrite, Table, TableCapability}
+import org.apache.spark.sql.execution.streaming.BaseStreamingSink
+import org.apache.spark.sql.sources.v2.{SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.sources.v2.writer.{DataWriter, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
@@ -42,13 +44,15 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 case class ForeachWriterTable[T](
     writer: ForeachWriter[T],
     converter: Either[ExpressionEncoder[T], InternalRow => T])
-  extends Table with SupportsStreamingWrite {
+  extends Table with SupportsWrite with BaseStreamingSink {
 
   override def name(): String = "ForeachSink"
 
   override def schema(): StructType = StructType(Nil)
 
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.STREAMING_WRITE).asJava
+  }
 
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index a652eeb8d5f8..f61e9dbecd4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.streaming.sources
 
 import java.util
-import java.util.Collections
+
+import scala.collection.JavaConverters._
 
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.SparkSession
@@ -78,7 +79,7 @@ class RateStreamTable(
     rowsPerSecond: Long,
     rampUpTimeSeconds: Long,
     numPartitions: Int)
-  extends Table with SupportsMicroBatchRead with SupportsContinuousRead {
+  extends Table with SupportsRead {
 
   override def name(): String = {
     s"RateStream(rowsPerSecond=$rowsPerSecond, rampUpTimeSeconds=$rampUpTimeSeconds, " +
@@ -87,7 +88,9 @@ class RateStreamTable(
 
   override def schema(): StructType = RateStreamProvider.SCHEMA
 
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.MICRO_BATCH_READ, TableCapability.CONTINUOUS_READ).asJava
+  }
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = () => new Scan {
     override def readSchema(): StructType = RateStreamProvider.SCHEMA
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index a0452cf844d3..0f807e235661 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import java.text.SimpleDateFormat
 import java.util
-import java.util.{Collections, Locale}
+import java.util.Locale
 
+import scala.collection.JavaConverters._
 import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.internal.Logging
@@ -67,7 +68,7 @@ class TextSocketSourceProvider extends TableProvider with DataSourceRegister wit
 }
 
 class TextSocketTable(host: String, port: Int, numPartitions: Int, includeTimestamp: Boolean)
-  extends Table with SupportsMicroBatchRead with SupportsContinuousRead {
+  extends Table with SupportsRead {
 
   override def name(): String = s"Socket[$host:$port]"
 
@@ -79,7 +80,9 @@ class TextSocketTable(host: String, port: Int, numPartitions: Int, includeTimest
     }
   }
 
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.MICRO_BATCH_READ, TableCapability.CONTINUOUS_READ).asJava
+  }
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = () => new Scan {
     override def readSchema(): StructType = schema()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
index 8eb5de0f640a..219e25c1407b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.execution.streaming.sources
 
 import java.util
-import java.util.Collections
 import javax.annotation.concurrent.GuardedBy
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.execution.streaming.{MemorySinkBase, Sink}
-import org.apache.spark.sql.sources.v2.{SupportsStreamingWrite, TableCapability}
+import org.apache.spark.sql.sources.v2.{SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
@@ -43,13 +43,15 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
  * tests and does not provide durability.
  */
-class MemorySinkV2 extends SupportsStreamingWrite with MemorySinkBase with Logging {
+class MemorySinkV2 extends Table with SupportsWrite with MemorySinkBase with Logging {
 
   override def name(): String = "MemorySinkV2"
 
   override def schema(): StructType = StructType(Nil)
 
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.STREAMING_WRITE).asJava
+  }
 
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 3d29ff33ad48..0275df9f94fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.datasources.v2.V2WriteSupportCheck
+import org.apache.spark.sql.execution.datasources.v2.{V2StreamingScanSupportCheck, V2WriteSupportCheck}
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.ExecutionListenerManager
 
@@ -177,6 +177,7 @@ abstract class BaseSessionStateBuilder(
         PreReadCheck +:
         HiveOnlyCheck +:
         V2WriteSupportCheck +:
+        V2StreamingScanSupportCheck +:
         customCheckRules
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 01f29cdeddc2..01083a994e8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
 import org.apache.spark.sql.sources.StreamSourceProvider
 import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -181,8 +182,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
           case Some(schema) => provider.getTable(dsOptions, schema)
           case _ => provider.getTable(dsOptions)
         }
+        import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
         table match {
-          case _: SupportsMicroBatchRead | _: SupportsContinuousRead =>
+          case _: SupportsRead if table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ) =>
             Dataset.ofRows(
               sparkSession,
               StreamingRelationV2(
@@ -190,6 +192,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
                 sparkSession))
 
           // fallback to v1
+          // TODO (SPARK-27483): we should move this fallback logic to an analyzer rule.
           case _ => Dataset.ofRows(sparkSession, StreamingRelation(v1DataSource))
         }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 33d032eb78c2..d2df3a5349dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.execution.streaming.sources._
-import org.apache.spark.sql.sources.v2.{SupportsStreamingWrite, TableProvider}
+import org.apache.spark.sql.sources.v2.{SupportsWrite, TableProvider}
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
@@ -315,8 +316,10 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
           source = provider, conf = df.sparkSession.sessionState.conf)
         val options = sessionOptions ++ extraOptions
         val dsOptions = new CaseInsensitiveStringMap(options.asJava)
+        import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
         provider.getTable(dsOptions) match {
-          case s: SupportsStreamingWrite => s
+          case table: SupportsWrite if table.supports(STREAMING_WRITE) =>
+            table.asInstanceOf[BaseStreamingSink]
           case _ => createV1Sink()
         }
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 2e019ba614fc..5a08049ab55c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution,
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.STREAMING_QUERY_LISTENERS
-import org.apache.spark.sql.sources.v2.SupportsStreamingWrite
+import org.apache.spark.sql.sources.v2.SupportsWrite
 import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
@@ -261,7 +261,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
     }
 
     (sink, trigger) match {
-      case (v2Sink: SupportsStreamingWrite, trigger: ContinuousTrigger) =>
+      case (table: SupportsWrite, trigger: ContinuousTrigger) =>
         if (operationCheckEnabled) {
           UnsupportedOperationChecker.checkForContinuous(analyzedPlan, outputMode)
         }
@@ -270,7 +270,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
           userSpecifiedName.orNull,
           checkpointLocation,
           analyzedPlan,
-          v2Sink,
+          table,
           trigger,
           triggerClock,
           outputMode,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2StreamingScanSupportCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2StreamingScanSupportCheckSuite.scala
new file mode 100644
index 000000000000..8a0450fce76a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2StreamingScanSupportCheckSuite.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.util
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext}
+import org.apache.spark.sql.catalyst.plans.logical.Union
+import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.streaming.{Offset, Source, StreamingRelation, StreamingRelationV2}
+import org.apache.spark.sql.sources.StreamSourceProvider
+import org.apache.spark.sql.sources.v2.{Table, TableCapability, TableProvider}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class V2StreamingScanSupportCheckSuite extends SparkFunSuite with SharedSparkSession {
+  import TableCapability._
+
+  private def createStreamingRelation(table: Table, v1Relation: Option[StreamingRelation]) = {
+    StreamingRelationV2(FakeTableProvider, "fake", table, CaseInsensitiveStringMap.empty(),
+      FakeTableProvider.schema.toAttributes, v1Relation)(spark)
+  }
+
+  private def createStreamingRelationV1() = {
+    StreamingRelation(DataSource(spark, classOf[FakeStreamSourceProvider].getName))
+  }
+
+  test("check correct plan") {
+    val plan1 = createStreamingRelation(CapabilityTable(MICRO_BATCH_READ), None)
+    val plan2 = createStreamingRelation(CapabilityTable(CONTINUOUS_READ), None)
+    val plan3 = createStreamingRelation(CapabilityTable(MICRO_BATCH_READ, CONTINUOUS_READ), None)
+    val plan4 = createStreamingRelationV1()
+
+    V2StreamingScanSupportCheck(Union(plan1, plan1))
+    V2StreamingScanSupportCheck(Union(plan2, plan2))
+    V2StreamingScanSupportCheck(Union(plan1, plan3))
+    V2StreamingScanSupportCheck(Union(plan2, plan3))
+    V2StreamingScanSupportCheck(Union(plan1, plan4))
+    V2StreamingScanSupportCheck(Union(plan3, plan4))
+  }
+
+  test("table without scan capability") {
+    val e = intercept[AnalysisException] {
+      V2StreamingScanSupportCheck(createStreamingRelation(CapabilityTable(), None))
+    }
+    assert(e.message.contains("does not support either micro-batch or continuous scan"))
+  }
+
+  test("mix micro-batch only and continuous only") {
+    val plan1 = createStreamingRelation(CapabilityTable(MICRO_BATCH_READ), None)
+    val plan2 = createStreamingRelation(CapabilityTable(CONTINUOUS_READ), None)
+
+    val e = intercept[AnalysisException] {
+      V2StreamingScanSupportCheck(Union(plan1, plan2))
+    }
+    assert(e.message.contains(
+      "The streaming sources in a query do not have a common supported execution mode"))
+  }
+
+  test("mix continuous only and v1 relation") {
+    val plan1 = createStreamingRelation(CapabilityTable(CONTINUOUS_READ), None)
+    val plan2 = createStreamingRelationV1()
+    val e = intercept[AnalysisException] {
+      V2StreamingScanSupportCheck(Union(plan1, plan2))
+    }
+    assert(e.message.contains(
+      "The streaming sources in a query do not have a common supported execution mode"))
+  }
+}
+
+private object FakeTableProvider extends TableProvider {
+  val schema = new StructType().add("i", "int")
+
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
+    throw new UnsupportedOperationException
+  }
+}
+
+private case class CapabilityTable(_capabilities: TableCapability*) extends Table {
+  override def name(): String = "capability_test_table"
+  override def schema(): StructType = FakeTableProvider.schema
+  override def capabilities(): util.Set[TableCapability] = _capabilities.toSet.asJava
+}
+
+private class FakeStreamSourceProvider extends StreamSourceProvider {
+  override def sourceSchema(
+      sqlContext: SQLContext,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): (String, StructType) = {
+    "fake" -> FakeTableProvider.schema
+  }
+
+  override def createSource(
+      sqlContext: SQLContext,
+      metadataPath: String,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): Source = {
+    new Source {
+      override def schema: StructType = FakeTableProvider.schema
+      override def getOffset: Option[Offset] = {
+        throw new UnsupportedOperationException
+      }
+      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+        throw new UnsupportedOperationException
+      }
+      override def stop(): Unit = {}
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index f022edea275e..25a68e4f9a57 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -20,13 +20,16 @@ package org.apache.spark.sql.streaming.sources
 import java.util
 import java.util.Collections
 
-import org.apache.spark.sql.{DataFrame, SQLContext}
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext}
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.execution.streaming.{RateStreamOffset, Sink, StreamingQueryWrapper}
+import org.apache.spark.sql.execution.streaming.{BaseStreamingSink, RateStreamOffset, Sink, StreamingQueryWrapper}
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
 import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming._
 import org.apache.spark.sql.sources.v2.writer.{WriteBuilder, WriterCommitMessage}
@@ -77,24 +80,12 @@ class FakeWriteBuilder extends WriteBuilder with StreamingWrite {
   }
 }
 
-trait FakeMicroBatchReadTable extends Table with SupportsMicroBatchRead {
-  override def name(): String = "fake"
-  override def schema(): StructType = StructType(Seq())
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
-  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder
-}
-
-trait FakeContinuousReadTable extends Table with SupportsContinuousRead {
-  override def name(): String = "fake"
-  override def schema(): StructType = StructType(Seq())
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
-  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder
-}
-
-trait FakeStreamingWriteTable extends Table with SupportsStreamingWrite {
+trait FakeStreamingWriteTable extends Table with SupportsWrite with BaseStreamingSink {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
-  override def capabilities(): util.Set[TableCapability] = Collections.emptySet()
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(STREAMING_WRITE).asJava
+  }
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
     new FakeWriteBuilder
   }
@@ -110,7 +101,16 @@ class FakeReadMicroBatchOnly
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
     LastReadOptions.options = options
-    new FakeMicroBatchReadTable {}
+    new Table with SupportsRead {
+      override def name(): String = "fake"
+      override def schema(): StructType = StructType(Seq())
+      override def capabilities(): util.Set[TableCapability] = {
+        Set(MICRO_BATCH_READ).asJava
+      }
+      override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+        new FakeScanBuilder
+      }
+    }
   }
 }
 
@@ -124,7 +124,16 @@ class FakeReadContinuousOnly
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
     LastReadOptions.options = options
-    new FakeContinuousReadTable {}
+    new Table with SupportsRead {
+      override def name(): String = "fake"
+      override def schema(): StructType = StructType(Seq())
+      override def capabilities(): util.Set[TableCapability] = {
+        Set(CONTINUOUS_READ).asJava
+      }
+      override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+        new FakeScanBuilder
+      }
+    }
   }
 }
 
@@ -132,7 +141,16 @@ class FakeReadBothModes extends DataSourceRegister with TableProvider {
   override def shortName(): String = "fake-read-microbatch-continuous"
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
-    new Table with FakeMicroBatchReadTable with FakeContinuousReadTable {}
+    new Table with SupportsRead {
+      override def name(): String = "fake"
+      override def schema(): StructType = StructType(Seq())
+      override def capabilities(): util.Set[TableCapability] = {
+        Set(MICRO_BATCH_READ, CONTINUOUS_READ).asJava
+      }
+      override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+        new FakeScanBuilder
+      }
+    }
   }
 }
 
@@ -365,39 +383,37 @@ class StreamingDataSourceV2Suite extends StreamTest {
       val sinkTable = DataSource.lookupDataSource(write, spark.sqlContext.conf).getConstructor()
         .newInstance().asInstanceOf[TableProvider].getTable(CaseInsensitiveStringMap.empty())
 
-      (sourceTable, sinkTable, trigger) match {
-        // Valid microbatch queries.
-        case (_: SupportsMicroBatchRead, _: SupportsStreamingWrite, t)
-            if !t.isInstanceOf[ContinuousTrigger] =>
-          testPositiveCase(read, write, trigger)
-
-        // Valid continuous queries.
-        case (_: SupportsContinuousRead, _: SupportsStreamingWrite,
-              _: ContinuousTrigger) =>
-          testPositiveCase(read, write, trigger)
-
+      import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
+      trigger match {
         // Invalid - can't read at all
-        case (r, _, _) if !r.isInstanceOf[SupportsMicroBatchRead] &&
-            !r.isInstanceOf[SupportsContinuousRead] =>
+        case _ if !sourceTable.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ) =>
           testNegativeCase(read, write, trigger,
             s"Data source $read does not support streamed reading")
 
         // Invalid - can't write
-        case (_, w, _) if !w.isInstanceOf[SupportsStreamingWrite] =>
+        case _ if !sinkTable.supports(STREAMING_WRITE) =>
           testNegativeCase(read, write, trigger,
             s"Data source $write does not support streamed writing")
 
-        // Invalid - trigger is continuous but reader is not
-        case (r, _: SupportsStreamingWrite, _: ContinuousTrigger)
-            if !r.isInstanceOf[SupportsContinuousRead] =>
-          testNegativeCase(read, write, trigger,
-            s"Data source $read does not support continuous processing")
+        case _: ContinuousTrigger =>
+          if (sourceTable.supports(CONTINUOUS_READ)) {
+            // Valid microbatch queries.
+            testPositiveCase(read, write, trigger)
+          } else {
+            // Invalid - trigger is continuous but reader is not
+            testNegativeCase(
+              read, write, trigger, s"Data source $read does not support continuous processing")
+          }
 
-        // Invalid - trigger is microbatch but reader is not
-        case (r, _, t) if !r.isInstanceOf[SupportsMicroBatchRead] &&
-            !t.isInstanceOf[ContinuousTrigger] =>
-          testPostCreationNegativeCase(read, write, trigger,
-            s"Data source $read does not support microbatch processing")
+        case microBatchTrigger =>
+          if (sourceTable.supports(MICRO_BATCH_READ)) {
+            // Valid continuous queries.
+            testPositiveCase(read, write, trigger)
+          } else {
+            // Invalid - trigger is microbatch but reader is not
+            testPostCreationNegativeCase(read, write, trigger,
+              s"Data source $read does not support microbatch processing")
+          }
       }
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 3bca77094bad..70364e563bff 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.SparkPlanner
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.datasources.v2.V2WriteSupportCheck
+import org.apache.spark.sql.execution.datasources.v2.{V2StreamingScanSupportCheck, V2WriteSupportCheck}
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLoader, SessionState}
 
@@ -89,6 +89,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
       PreWriteCheck +:
         PreReadCheck +:
         V2WriteSupportCheck +:
+        V2StreamingScanSupportCheck +:
         customCheckRules
   }
 

From fe99305101fa01713021bc0ffae943066c07a0d0 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 26 Apr 2019 12:15:39 -0500
Subject: [PATCH 0967/1072] [SPARK-27556][BUILD] Exclude
 com.zaxxer:HikariCP-java7 from hadoop-yarn-server-web-proxy

## What changes were proposed in this pull request?

There are two HikariCP packages in classpath when building with `-Phive -Pyarn -Phadoop-3.2`.

The HikariCP dependency tree:
```
[INFO] | +- org.apache.hadoop:hadoop-yarn-server-web-proxy:jar:3.2.0:compile
[INFO] | | \- org.apache.hadoop:hadoop-yarn-server-common:jar:3.2.0:compile
[INFO] | | +- org.apache.hadoop:hadoop-yarn-registry:jar:3.2.0:compile
[INFO] | | | \- commons-daemon:commons-daemon:jar:1.0.13:compile
[INFO] | | +- org.apache.geronimo.specs:geronimo-jcache_1.0_spec:jar:1.0-alpha-1:compile
[INFO] | | +- org.ehcache:ehcache:jar:3.3.1:compile
[INFO] | | +- com.zaxxer:HikariCP-java7:jar:2.4.12:compile
```

```
[INFO] +- org.apache.hive:hive-metastore:jar:2.3.4:compile
[INFO] | +- javolution:javolution:jar:5.5.1:compile
[INFO] | +- com.google.protobuf:protobuf-java:jar:2.5.0:compile
[INFO] | +- com.jolbox:bonecp:jar:0.8.0.RELEASE:compile
[INFO] | +- com.zaxxer:HikariCP:jar:2.5.1:compile
```

This pr exclude `com.zaxxer:HikariCP-java7` from `hadoop-yarn-server-web-proxy`.

## How was this patch tested?

manual tests

Closes #24450 from wangyum/SPARK-27556.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/deps/spark-deps-hadoop-3.2 | 1 -
 pom.xml                        | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 8b3bd796d93c..587415126a06 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -1,4 +1,3 @@
-HikariCP-java7-2.4.12.jar
 JavaEWAH-0.3.2.jar
 RoaringBitmap-0.7.45.jar
 ST4-4.0.4.jar
diff --git a/pom.xml b/pom.xml
index 51539577085e..91661cd3896c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1210,6 +1210,11 @@
             <groupId>com.sun.jersey.contribs</groupId>
             <artifactId>*</artifactId>
           </exclusion>
+          <!-- Hadoop-3.2 -->
+          <exclusion>
+            <groupId>com.zaxxer</groupId>
+            <artifactId>HikariCP-java7</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>

From 7b367bfc86dbe7f61c0dda4f4811508137bbd0cc Mon Sep 17 00:00:00 2001
From: Koert Kuipers <koert@tresata.com>
Date: Fri, 26 Apr 2019 11:52:08 -0700
Subject: [PATCH 0968/1072] [SPARK-27477][BUILD] Kafka token provider should
 have provided dependency on Spark

## What changes were proposed in this pull request?

Change spark-token-provider-kafka-0-10 dependency on spark-core to be provided

## How was this patch tested?

Ran existing unit tests

Closes #24384 from koertkuipers/feat-kafka-token-provider-fix-deps.

Authored-by: Koert Kuipers <koert@tresata.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 external/kafka-0-10-token-provider/pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
index 40ef1f71e021..01ca96b4f8f4 100644
--- a/external/kafka-0-10-token-provider/pom.xml
+++ b/external/kafka-0-10-token-provider/pom.xml
@@ -39,6 +39,7 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>

From 6328be78f96ef9c90697318bea83bd2033dae471 Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Sat, 27 Apr 2019 09:28:31 +0800
Subject: [PATCH 0969/1072] [MINOR][TEST][DOC] Execute action miss name message

## What changes were proposed in this pull request?

some minor updates:
- `Execute` action miss `name` message
-  typo in SS document
-  typo in SQLConf

## How was this patch tested?

N/A

Closes #24466 from uncleGen/minor-fix.

Authored-by: uncleGen <hustyugm@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/structured-streaming-programming-guide.md                | 2 +-
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala    | 4 ++--
 .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 2 +-
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 2 +-
 .../sql/execution/datasources/DataSourceResolution.scala      | 2 +-
 .../scala/org/apache/spark/sql/streaming/StreamTest.scala     | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 2b6940d055e1..2c4169d1b921 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -2995,7 +2995,7 @@ the effect of the change is not well-defined. For all of them:
 
   - Changes to the user-defined foreach sink (that is, the `ForeachWriter` code) are allowed, but the semantics of the change depends on the code.
 
-- *Changes in projection / filter / map-like operations**: Some cases are allowed. For example:
+- *Changes in projection / filter / map-like operations*: Some cases are allowed. For example:
 
   - Addition / deletion of filters is allowed: `sdf.selectExpr("a")` to `sdf.where(...).selectExpr("a").filter(...)`.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3a2e7368279f..96d3f5c046fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2138,9 +2138,9 @@ class SQLConf extends Serializable with Logging {
   def continuousStreamingExecutorPollIntervalMs: Long =
     getConf(CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS)
 
-  def userV1SourceReaderList: String = getConf(USE_V1_SOURCE_READER_LIST)
+  def useV1SourceReaderList: String = getConf(USE_V1_SOURCE_READER_LIST)
 
-  def userV1SourceWriterList: String = getConf(USE_V1_SOURCE_WRITER_LIST)
+  def useV1SourceWriterList: String = getConf(USE_V1_SOURCE_WRITER_LIST)
 
   def disabledV2StreamingWriters: String = getConf(DISABLED_V2_STREAMING_WRITERS)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 3cd48baebf2e..8460c7902e7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -195,7 +195,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     }
 
     val useV1Sources =
-      sparkSession.sessionState.conf.userV1SourceReaderList.toLowerCase(Locale.ROOT).split(",")
+      sparkSession.sessionState.conf.useV1SourceReaderList.toLowerCase(Locale.ROOT).split(",")
     val lookupCls = DataSource.lookupDataSource(source, sparkSession.sessionState.conf)
     val cls = lookupCls.newInstance() match {
       case f: FileDataSourceV2 if useV1Sources.contains(f.shortName()) ||
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 3b8415121c27..18653b2bf542 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -248,7 +248,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
     val session = df.sparkSession
     val useV1Sources =
-      session.sessionState.conf.userV1SourceWriterList.toLowerCase(Locale.ROOT).split(",")
+      session.sessionState.conf.useV1SourceWriterList.toLowerCase(Locale.ROOT).split(",")
     val lookupCls = DataSource.lookupDataSource(source, session.sessionState.conf)
     val cls = lookupCls.newInstance() match {
       case f: FileDataSourceV2 if useV1Sources.contains(f.shortName()) ||
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
index f503ff03b971..f16138e8d790 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
@@ -58,7 +58,7 @@ case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with Ca
 
   object V1WriteProvider {
     private val v1WriteOverrideSet =
-      conf.userV1SourceWriterList.toLowerCase(Locale.ROOT).split(",").toSet
+      conf.useV1SourceWriterList.toLowerCase(Locale.ROOT).split(",").toSet
 
     def unapply(provider: String): Option[String] = {
       if (v1WriteOverrideSet.contains(provider.toLowerCase(Locale.ROOT))) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 52b8e4732e80..6ff3c94c5c4d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -294,7 +294,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
   /** Execute arbitrary code */
   object Execute {
     def apply(name: String)(func: StreamExecution => Any): AssertOnQuery =
-      AssertOnQuery(query => { func(query); true }, "name")
+      AssertOnQuery(query => { func(query); true }, name)
 
     def apply(func: StreamExecution => Any): AssertOnQuery = apply("Execute")(func)
   }

From 90085a184797f8bddbff8ca6ec7a60f3899c1a86 Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Sat, 27 Apr 2019 10:30:12 +0900
Subject: [PATCH 0970/1072] [SPARK-23619][DOCS] Add output description for some
 generator expressions / functions

## What changes were proposed in this pull request?

This PR addresses SPARK-23619: https://issues.apache.org/jira/browse/SPARK-23619

It adds additional comments indicating the default column names for the `explode` and `posexplode`
functions in Spark-SQL.

Functions for which comments have been updated so far:
* stack
* inline
* explode
* posexplode
* explode_outer
* posexplode_outer

## How was this patch tested?

This is just a change in the comments. The package builds and tests successfullly after the change.

Closes #23748 from jashgala/SPARK-23619.

Authored-by: Jash Gala <jashgala@amazon.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/R/functions.R                           | 12 +++++++++--
 python/pyspark/sql/functions.py               | 20 +++++++++++++++----
 .../sql/catalyst/expressions/generators.scala | 12 +++++++----
 .../org/apache/spark/sql/functions.scala      |  8 ++++++++
 4 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 0566a47cc875..3bd1f544d77a 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3589,6 +3589,8 @@ setMethod("element_at",
 
 #' @details
 #' \code{explode}: Creates a new row for each element in the given array or map column.
+#' Uses the default column name \code{col} for elements in the array and
+#' \code{key} and \code{value} for elements in the map unless specified otherwise.
 #'
 #' @rdname column_collection_functions
 #' @aliases explode explode,Column-method
@@ -3649,7 +3651,9 @@ setMethod("sort_array",
 
 #' @details
 #' \code{posexplode}: Creates a new row for each element with position in the given array
-#' or map column.
+#' or map column. Uses the default column name \code{pos} for position, and \code{col}
+#' for elements in the array and \code{key} and \code{value} for elements in the map
+#' unless specified otherwise.
 #'
 #' @rdname column_collection_functions
 #' @aliases posexplode posexplode,Column-method
@@ -3790,7 +3794,8 @@ setMethod("repeat_string",
 #' \code{explode}: Creates a new row for each element in the given array or map column.
 #' Unlike \code{explode}, if the array/map is \code{null} or empty
 #' then \code{null} is produced.
-#'
+#' Uses the default column name \code{col} for elements in the array and
+#' \code{key} and \code{value} for elements in the map unless specified otherwise.
 #'
 #' @rdname column_collection_functions
 #' @aliases explode_outer explode_outer,Column-method
@@ -3815,6 +3820,9 @@ setMethod("explode_outer",
 #' \code{posexplode_outer}: Creates a new row for each element with position in the given
 #' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
 #' then the row (\code{null}, \code{null}) is produced.
+#' Uses the default column name \code{pos} for position, and \code{col}
+#' for elements in the array and \code{key} and \code{value} for elements in the map
+#' unless specified otherwise.
 #'
 #' @rdname column_collection_functions
 #' @aliases posexplode_outer posexplode_outer,Column-method
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 22163f52b472..613822b7edf2 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2142,7 +2142,10 @@ def array_except(col1, col2):
 
 @since(1.4)
 def explode(col):
-    """Returns a new row for each element in the given array or map.
+    """
+    Returns a new row for each element in the given array or map.
+    Uses the default column name `col` for elements in the array and
+    `key` and `value` for elements in the map unless specified otherwise.
 
     >>> from pyspark.sql import Row
     >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
@@ -2163,7 +2166,10 @@ def explode(col):
 
 @since(2.1)
 def posexplode(col):
-    """Returns a new row for each element with position in the given array or map.
+    """
+    Returns a new row for each element with position in the given array or map.
+    Uses the default column name `pos` for position, and `col` for elements in the
+    array and `key` and `value` for elements in the map unless specified otherwise.
 
     >>> from pyspark.sql import Row
     >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
@@ -2184,8 +2190,11 @@ def posexplode(col):
 
 @since(2.3)
 def explode_outer(col):
-    """Returns a new row for each element in the given array or map.
+    """
+    Returns a new row for each element in the given array or map.
     Unlike explode, if the array/map is null or empty then null is produced.
+    Uses the default column name `col` for elements in the array and
+    `key` and `value` for elements in the map unless specified otherwise.
 
     >>> df = spark.createDataFrame(
     ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
@@ -2217,8 +2226,11 @@ def explode_outer(col):
 
 @since(2.3)
 def posexplode_outer(col):
-    """Returns a new row for each element with position in the given array or map.
+    """
+    Returns a new row for each element with position in the given array or map.
     Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
+    Uses the default column name `pos` for position, and `col` for elements in the
+    array and `key` and `value` for elements in the map unless specified otherwise.
 
     >>> df = spark.createDataFrame(
     ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 6b6da1c8b414..82a7d9825e30 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -127,14 +127,16 @@ case class UserDefinedGenerator(
  *   3      NULL
  * }}}
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows.",
+  usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
   examples = """
     Examples:
       > SELECT _FUNC_(2, 1, 2, 3);
        1  2
        3  NULL
   """)
+// scalastyle:on line.size.limit
 case class Stack(children: Seq[Expression]) extends Generator {
 
   private lazy val numRows = children.head.eval().asInstanceOf[Int]
@@ -352,7 +354,7 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns.",
+  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(10, 20));
@@ -375,7 +377,7 @@ case class Explode(child: Expression) extends ExplodeBase {
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions.",
+  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(10,20));
@@ -390,14 +392,16 @@ case class PosExplode(child: Expression) extends ExplodeBase {
 /**
  * Explodes an array of structs into a table.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Explodes an array of structs into a table.",
+  usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
        1  a
        2  b
   """)
+// scalastyle:on line.size.limit
 case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
   override val inline: Boolean = true
   override val position: Boolean = false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 7ac3ed5a44f0..c1997b6fdfac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3322,6 +3322,8 @@ object functions {
 
   /**
    * Creates a new row for each element in the given array or map column.
+   * Uses the default column name `col` for elements in the array and
+   * `key` and `value` for elements in the map unless specified otherwise.
    *
    * @group collection_funcs
    * @since 1.3.0
@@ -3330,6 +3332,8 @@ object functions {
 
   /**
    * Creates a new row for each element in the given array or map column.
+   * Uses the default column name `col` for elements in the array and
+   * `key` and `value` for elements in the map unless specified otherwise.
    * Unlike explode, if the array/map is null or empty then null is produced.
    *
    * @group collection_funcs
@@ -3339,6 +3343,8 @@ object functions {
 
   /**
    * Creates a new row for each element with position in the given array or map column.
+   * Uses the default column name `pos` for position, and `col` for elements in the array
+   * and `key` and `value` for elements in the map unless specified otherwise.
    *
    * @group collection_funcs
    * @since 2.1.0
@@ -3347,6 +3353,8 @@ object functions {
 
   /**
    * Creates a new row for each element with position in the given array or map column.
+   * Uses the default column name `pos` for position, and `col` for elements in the array
+   * and `key` and `value` for elements in the map unless specified otherwise.
    * Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
    *
    * @group collection_funcs

From bde30bc57c268ba459032d5ea91a2f2f1b60ec2e Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 27 Apr 2019 09:09:47 -0700
Subject: [PATCH 0971/1072] [SPARK-27467][FOLLOW-UP][BUILD] Upgrade Maven to
 3.6.1 in AppVeyor and Doc

## What changes were proposed in this pull request?

Update the `docs/building-spark.md`. Otherwise:
```
mvn package -DskipTests=true
...
[INFO] --- maven-enforcer-plugin:3.0.0-M2:enforce (enforce-versions)  spark-parent_2.12 ---
[WARNING] Rule 0: org.apache.maven.plugins.enforcer.RequireMavenVersion failed with message:
Detected Maven Version: 3.6.0 is not in the allowed range 3.6.1.
...
[ERROR] Failed to execute goal org.apache.maven.plugins:maven-enforcer-plugin:3.0.0-M2:enforce (enforce-versions) on project spark-parent_2.12: Some Enforcer rules have failed. Look above for specific messages explaining why the rule failed. -> [Help 1]
[ERROR]
...
```

## How was this patch tested?
Just test `https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.zip` is avilable.

Closes #24477 from wangyum/SPARK-27467.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/appveyor-install-dependencies.ps1 | 2 +-
 docs/building-spark.md                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index 7c7bdd623477..80f4f4efb7d2 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -81,7 +81,7 @@ if (!(Test-Path $tools)) {
 # ========================== Maven
 Push-Location $tools
 
-$mavenVer = "3.6.0"
+$mavenVer = "3.6.1"
 Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip"
 
 # extract
diff --git a/docs/building-spark.md b/docs/building-spark.md
index ab8036381922..fcc6108dd1b1 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -27,7 +27,7 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.6.0 and Java 8.
+Building Spark using Maven requires Maven 3.6.1 and Java 8.
 Spark requires Scala 2.12; support for Scala 2.11 was removed in Spark 3.0.0.
 
 ### Setting up Maven's Memory Usage

From 447d018a42585a382a39e0237b323bf81845fb67 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Sun, 28 Apr 2019 11:03:04 +0900
Subject: [PATCH 0972/1072] Revert "[SPARK-27467][BUILD][TEST-MAVEN] Upgrade
 Maven to 3.6.1"

This reverts commit 7c4a6439d6fc8ec0b47914603b68cff4ce6d0cfc.
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 91661cd3896c..6de33d45975b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,7 +115,7 @@
     <java.version>1.8</java.version>
     <maven.compiler.source>${java.version}</maven.compiler.source>
     <maven.compiler.target>${java.version}</maven.compiler.target>
-    <maven.version>3.6.1</maven.version>
+    <maven.version>3.6.0</maven.version>
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.16</slf4j.version>
     <log4j.version>1.2.17</log4j.version>

From d8db7db50b8db7ec21d6369b44514cb11f2c3336 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Sun, 28 Apr 2019 11:03:15 +0900
Subject: [PATCH 0973/1072] Revert "[SPARK-27467][FOLLOW-UP][BUILD] Upgrade
 Maven to 3.6.1 in AppVeyor and Doc"

This reverts commit bde30bc57c268ba459032d5ea91a2f2f1b60ec2e.
---
 dev/appveyor-install-dependencies.ps1 | 2 +-
 docs/building-spark.md                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index 80f4f4efb7d2..7c7bdd623477 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -81,7 +81,7 @@ if (!(Test-Path $tools)) {
 # ========================== Maven
 Push-Location $tools
 
-$mavenVer = "3.6.1"
+$mavenVer = "3.6.0"
 Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip"
 
 # extract
diff --git a/docs/building-spark.md b/docs/building-spark.md
index fcc6108dd1b1..ab8036381922 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -27,7 +27,7 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.6.1 and Java 8.
+Building Spark using Maven requires Maven 3.6.0 and Java 8.
 Spark requires Scala 2.12; support for Scala 2.11 was removed in Spark 3.0.0.
 
 ### Setting up Maven's Memory Usage

From 20a3ef7259490e0c9f6348f13db1e99da5f0df83 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Sun, 28 Apr 2019 07:57:03 -0700
Subject: [PATCH 0974/1072] [SPARK-27534][SQL] Do not load `content` column in
 binary data source if it is not selected

## What changes were proposed in this pull request?

A follow-up task from SPARK-25348. To save I/O cost, Spark shouldn't attempt to read the file if users didn't request the `content` column. For example:
```
spark.read.format("binaryFile").load(path).filter($"length" < 1000000).count()
```

## How was this patch tested?

Unit test added.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24473 from WeichenXu123/SPARK-27534.

Lead-authored-by: Xiangrui Meng <meng@databricks.com>
Co-authored-by: WeichenXu <weichen.xu@databricks.com>
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 .../binaryfile/BinaryFileFormat.scala         | 74 ++++++++-----------
 .../binaryfile/BinaryFileFormatSuite.scala    | 63 ++++++++++++++--
 2 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
index 8617ae396bb5..db9326817f66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -26,12 +26,10 @@ import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
-import org.apache.spark.sql.sources.{And, DataSourceRegister, EqualTo, Filter, GreaterThan,
-  GreaterThanOrEqual, LessThan, LessThanOrEqual, Not, Or}
+import org.apache.spark.sql.sources.{And, DataSourceRegister, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Not, Or}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.SerializableConfiguration
@@ -80,7 +78,7 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
     false
   }
 
-  override def shortName(): String = "binaryFile"
+  override def shortName(): String = BINARY_FILE
 
   override protected def buildReader(
       sparkSession: SparkSession,
@@ -90,54 +88,43 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
       filters: Seq[Filter],
       options: Map[String, String],
       hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+    require(dataSchema.sameType(schema),
+      s"""
+         |Binary file data source expects dataSchema: $schema,
+         |but got: $dataSchema.
+        """.stripMargin)
 
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
-
     val binaryFileSourceOptions = new BinaryFileSourceOptions(options)
-
     val pathGlobPattern = binaryFileSourceOptions.pathGlobFilter
-
     val filterFuncs = filters.map(filter => createFilterFunction(filter))
 
     file: PartitionedFile => {
-      val path = file.filePath
-      val fsPath = new Path(path)
-
+      val path = new Path(file.filePath)
       // TODO: Improve performance here: each file will recompile the glob pattern here.
-      if (pathGlobPattern.forall(new GlobFilter(_).accept(fsPath))) {
-        val fs = fsPath.getFileSystem(broadcastedHadoopConf.value.value)
-        val fileStatus = fs.getFileStatus(fsPath)
-        val length = fileStatus.getLen
-        val modificationTime = fileStatus.getModificationTime
-
-        if (filterFuncs.forall(_.apply(fileStatus))) {
-          val stream = fs.open(fsPath)
-          val content = try {
-            ByteStreams.toByteArray(stream)
-          } finally {
-            Closeables.close(stream, true)
-          }
-
-          val fullOutput = dataSchema.map { f =>
-            AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()
-          }
-          val requiredOutput = fullOutput.filter { a =>
-            requiredSchema.fieldNames.contains(a.name)
+      if (pathGlobPattern.forall(new GlobFilter(_).accept(path))) {
+        val fs = path.getFileSystem(broadcastedHadoopConf.value.value)
+        val status = fs.getFileStatus(path)
+        if (filterFuncs.forall(_.apply(status))) {
+          val writer = new UnsafeRowWriter(requiredSchema.length)
+          writer.resetRowWriter()
+          requiredSchema.fieldNames.zipWithIndex.foreach {
+            case (PATH, i) => writer.write(i, UTF8String.fromString(status.getPath.toString))
+            case (LENGTH, i) => writer.write(i, status.getLen)
+            case (MODIFICATION_TIME, i) =>
+              writer.write(i, DateTimeUtils.fromMillis(status.getModificationTime))
+            case (CONTENT, i) =>
+              val stream = fs.open(status.getPath)
+              try {
+                writer.write(i, ByteStreams.toByteArray(stream))
+              } finally {
+                Closeables.close(stream, true)
+              }
+            case (other, _) =>
+              throw new RuntimeException(s"Unsupported field name: ${other}")
           }
-
-          // TODO: Add column pruning
-          // currently it still read the file content even if content column is not required.
-          val requiredColumns = GenerateUnsafeProjection.generate(requiredOutput, fullOutput)
-
-          val internalRow = InternalRow(
-            UTF8String.fromString(path),
-            DateTimeUtils.fromMillis(modificationTime),
-            length,
-            content
-          )
-
-          Iterator(requiredColumns(internalRow))
+          Iterator.single(writer.getRow)
         } else {
           Iterator.empty
         }
@@ -154,6 +141,7 @@ object BinaryFileFormat {
   private[binaryfile] val MODIFICATION_TIME = "modificationTime"
   private[binaryfile] val LENGTH = "length"
   private[binaryfile] val CONTENT = "content"
+  private[binaryfile] val BINARY_FILE = "binaryFile"
 
   /**
    * Schema for the binary file data source.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index 3254a7f6123f..fb83c3c1dccc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.binaryfile
 
-import java.io.File
+import java.io.{File, IOException}
 import java.nio.file.{Files, StandardOpenOption}
 import java.sql.Timestamp
 
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, GlobFilter, Path}
 import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.sources._
@@ -101,7 +102,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
   }
 
   def testBinaryFileDataSource(pathGlobFilter: String): Unit = {
-    val dfReader = spark.read.format("binaryFile")
+    val dfReader = spark.read.format(BINARY_FILE)
     if (pathGlobFilter != null) {
       dfReader.option("pathGlobFilter", pathGlobFilter)
     }
@@ -124,7 +125,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
 
       for (fileStatus <- fs.listStatus(dirPath)) {
         if (globFilter == null || globFilter.accept(fileStatus.getPath)) {
-          val fpath = fileStatus.getPath.toString.replace("file:/", "file:///")
+          val fpath = fileStatus.getPath.toString
           val flen = fileStatus.getLen
           val modificationTime = new Timestamp(fileStatus.getModificationTime)
 
@@ -157,11 +158,11 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
   }
 
   test("binary file data source do not support write operation") {
-    val df = spark.read.format("binaryFile").load(testDir)
+    val df = spark.read.format(BINARY_FILE).load(testDir)
     withTempDir { tmpDir =>
       val thrown = intercept[UnsupportedOperationException] {
         df.write
-          .format("binaryFile")
+          .format(BINARY_FILE)
           .save(tmpDir + "/test_save")
       }
       assert(thrown.getMessage.contains("Write is not supported for binary file data source"))
@@ -286,4 +287,56 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
       EqualTo(MODIFICATION_TIME, file1Status.getModificationTime)
     ), true)
   }
+
+  test("column pruning") {
+    def getRequiredSchema(fieldNames: String*): StructType = {
+      StructType(fieldNames.map {
+        case f if schema.fieldNames.contains(f) => schema(f)
+        case other => StructField(other, NullType)
+      })
+    }
+    def read(file: File, requiredSchema: StructType): Row = {
+      val format = new BinaryFileFormat
+      val reader = format.buildReaderWithPartitionValues(
+        sparkSession = spark,
+        dataSchema = schema,
+        partitionSchema = StructType(Nil),
+        requiredSchema = requiredSchema,
+        filters = Seq.empty,
+        options = Map.empty,
+        hadoopConf = spark.sessionState.newHadoopConf()
+      )
+      val partitionedFile = mock(classOf[PartitionedFile])
+      when(partitionedFile.filePath).thenReturn(file.getPath)
+      val encoder = RowEncoder(requiredSchema).resolveAndBind()
+      encoder.fromRow(reader(partitionedFile).next())
+    }
+    val file = new File(Utils.createTempDir(), "data")
+    val content = "123".getBytes
+    Files.write(file.toPath, content, StandardOpenOption.CREATE, StandardOpenOption.WRITE)
+
+    read(file, getRequiredSchema(MODIFICATION_TIME, CONTENT, LENGTH, PATH)) match {
+      case Row(t, c, len, p) =>
+        assert(t === new Timestamp(file.lastModified()))
+        assert(c === content)
+        assert(len === content.length)
+        assert(p.asInstanceOf[String].endsWith(file.getAbsolutePath))
+    }
+    file.setReadable(false)
+    withClue("cannot read content") {
+      intercept[IOException] {
+        read(file, getRequiredSchema(CONTENT))
+      }
+    }
+    assert(read(file, getRequiredSchema(LENGTH)) === Row(content.length),
+      "Get length should not read content.")
+    intercept[RuntimeException] {
+      read(file, getRequiredSchema(LENGTH, "other"))
+    }
+
+    val df = spark.read.format(BINARY_FILE).load(file.getPath)
+    assert(df.count() === 1, "Count should not read content.")
+    assert(df.select("LENGTH").first().getLong(0) === content.length,
+      "column pruning should be case insensitive")
+  }
 }

From 05b85eb8cb1bec2017b0cd1202b011a185ba6875 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 29 Apr 2019 14:20:58 +0800
Subject: [PATCH 0975/1072] [SPARK-27474][CORE] avoid retrying a task failed
 with CommitDeniedException many times

## What changes were proposed in this pull request?

https://issues.apache.org/jira/browse/SPARK-25250 reports a bug that, a task which is failed with `CommitDeniedException` gets retried many times.

This can happen when a stage has 2 task set managers, one is zombie, one is active. A task from the zombie TSM completes, and commits to a central coordinator(assuming it's a file writing task). Then the corresponding task from the active TSM will fail with `CommitDeniedException`. `CommitDeniedException.countTowardsTaskFailures` is false, so the active TSM will keep retrying this task, until the job finishes. This wastes resource a lot.

#21131 firstly implements that a previous successful completed task from zombie `TaskSetManager` could mark the task of the same partition completed in the active `TaskSetManager`. Later #23871 improves the implementation to cover a corner case that, an active `TaskSetManager` hasn't been created when a previous task succeed.

However, #23871 has a bug and was reverted in #24359. With hindsight, #23781 is fragile because we need to sync the states between `DAGScheduler` and `TaskScheduler`, about which partitions are completed.

This PR proposes a new fix:
1. When `DAGScheduler` gets a task success event from an earlier attempt, notify the `TaskSchedulerImpl` about it
2. When `TaskSchedulerImpl` knows a partition is already completed, ask the active `TaskSetManager` to mark the corresponding task as finished, if the task is not finished yet.

This fix covers the corner case, because:
1. If `DAGScheduler` gets the task completion event from zombie TSM before submitting the new stage attempt, then `DAGScheduler` knows that this partition is completed, and it will exclude this partition when creating task set for the new stage attempt. See `DAGScheduler.submitMissingTasks`
2. If `DAGScheduler` gets the task completion event from zombie TSM after submitting the new stage attempt, then the active TSM is already created.

Compared to the previous fix, the message loop becomes longer, so it's likely that, the active task set manager has already retried the task multiple times. But this failure window won't be too big, and we want to avoid the worse case that retries the task many times until the job finishes. So this solution is acceptable.

## How was this patch tested?

a new test case.

Closes #24375 from cloud-fan/fix2.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/scheduler/DAGScheduler.scala |   9 ++
 .../spark/scheduler/TaskResultGetter.scala    |  10 ++
 .../spark/scheduler/TaskScheduler.scala       |   4 +
 .../spark/scheduler/TaskSchedulerImpl.scala   |  41 ++++---
 .../spark/scheduler/TaskSetManager.scala      |   7 +-
 .../spark/scheduler/DAGSchedulerSuite.scala   |  66 +++++++++++
 .../ExternalClusterManagerSuite.scala         |   2 +
 .../scheduler/TaskSchedulerImplSuite.scala    | 104 ------------------
 .../spark/scheduler/TaskSetManagerSuite.scala |  40 ++-----
 9 files changed, 125 insertions(+), 158 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 524b0c4f6c3a..af11a319083d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1389,6 +1389,15 @@ private[spark] class DAGScheduler(
 
     event.reason match {
       case Success =>
+        // An earlier attempt of a stage (which is zombie) may still have running tasks. If these
+        // tasks complete, they still count and we can mark the corresponding partitions as
+        // finished. Here we notify the task scheduler to skip running tasks for the same partition,
+        // to save resource.
+        if (task.stageAttemptId < stage.latestInfo.attemptNumber()) {
+          taskScheduler.notifyPartitionCompletion(
+            stageId, task.partitionId, event.taskInfo.duration)
+        }
+
         task match {
           case rt: ResultTask[_, _] =>
             // Cast to ResultStage here because it's part of the ResultTask
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index c6dedaaa9554..09c4d9b5bce0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -155,6 +155,16 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
     }
   }
 
+  // This method calls `TaskSchedulerImpl.handlePartitionCompleted` asynchronously. We do not want
+  // DAGScheduler to call `TaskSchedulerImpl.handlePartitionCompleted` directly, as it's
+  // synchronized and may hurt the throughput of the scheduler.
+  def enqueuePartitionCompletionNotification(
+      stageId: Int, partitionId: Int, taskDuration: Long): Unit = {
+    getTaskResultExecutor.execute(() => Utils.logUncaughtExceptions {
+      scheduler.handlePartitionCompleted(stageId, partitionId, taskDuration)
+    })
+  }
+
   def stop() {
     getTaskResultExecutor.shutdownNow()
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 94221eb0d551..1862e1682427 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -68,6 +68,10 @@ private[spark] trait TaskScheduler {
   // Throw UnsupportedOperationException if the backend doesn't support kill tasks.
   def killAllTaskAttempts(stageId: Int, interruptThread: Boolean, reason: String): Unit
 
+  // Notify the corresponding `TaskSetManager`s of the stage, that a partition has already completed
+  // and they can skip running tasks for it.
+  def notifyPartitionCompletion(stageId: Int, partitionId: Int, taskDuration: Long)
+
   // Set the DAG scheduler for upcalls. This is guaranteed to be set before submitTasks is called.
   def setDAGScheduler(dagScheduler: DAGScheduler): Unit
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index f461a6f0aae3..7e820c32fa78 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -301,6 +301,11 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
+  override def notifyPartitionCompletion(
+      stageId: Int, partitionId: Int, taskDuration: Long): Unit = {
+    taskResultGetter.enqueuePartitionCompletionNotification(stageId, partitionId, taskDuration)
+  }
+
   /**
    * Called to indicate that all task attempts (including speculated tasks) associated with the
    * given TaskSetManager have completed, so state associated with the TaskSetManager should be
@@ -637,6 +642,24 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
+  /**
+   * Marks the task has completed in the active TaskSetManager for the given stage.
+   *
+   * After stage failure and retry, there may be multiple TaskSetManagers for the stage.
+   * If an earlier zombie attempt of a stage completes a task, we can ask the later active attempt
+   * to skip submitting and running the task for the same partition, to save resource. That also
+   * means that a task completion from an earlier zombie attempt can lead to the entire stage
+   * getting marked as successful.
+   */
+  private[scheduler] def handlePartitionCompleted(
+      stageId: Int,
+      partitionId: Int,
+      taskDuration: Long) = synchronized {
+    taskSetsByStageIdAndAttempt.get(stageId).foreach(_.values.filter(!_.isZombie).foreach { tsm =>
+      tsm.markPartitionCompleted(partitionId, taskDuration)
+    })
+  }
+
   def error(message: String) {
     synchronized {
       if (taskSetsByStageIdAndAttempt.nonEmpty) {
@@ -868,24 +891,6 @@ private[spark] class TaskSchedulerImpl(
       manager
     }
   }
-
-  /**
-   * Marks the task has completed in all TaskSetManagers for the given stage.
-   *
-   * After stage failure and retry, there may be multiple TaskSetManagers for the stage.
-   * If an earlier attempt of a stage completes a task, we should ensure that the later attempts
-   * do not also submit those same tasks.  That also means that a task completion from an earlier
-   * attempt can lead to the entire stage getting marked as successful.
-   */
-  private[scheduler] def markPartitionCompletedInAllTaskSets(
-      stageId: Int,
-      partitionId: Int,
-      taskInfo: TaskInfo) = {
-    taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm =>
-      tsm.markPartitionCompleted(partitionId, taskInfo)
-    }
-  }
-
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index e1df1555b814..b3aa81453750 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -806,9 +806,6 @@ private[spark] class TaskSetManager(
       logInfo("Ignoring task-finished event for " + info.id + " in stage " + taskSet.id +
         " because task " + index + " has already completed successfully")
     }
-    // There may be multiple tasksets for this stage -- we let all of them know that the partition
-    // was completed.  This may result in some of the tasksets getting completed.
-    sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId, info)
     // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the
     // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not
     // "deserialize" the value when holding a lock to avoid blocking other threads. So we call
@@ -819,11 +816,11 @@ private[spark] class TaskSetManager(
     maybeFinishTaskSet()
   }
 
-  private[scheduler] def markPartitionCompleted(partitionId: Int, taskInfo: TaskInfo): Unit = {
+  private[scheduler] def markPartitionCompleted(partitionId: Int, taskDuration: Long): Unit = {
     partitionToIndex.get(partitionId).foreach { index =>
       if (!successful(index)) {
         if (speculationEnabled && !isZombie) {
-          successfulTaskDurations.insert(taskInfo.duration)
+          successfulTaskDurations.insert(taskDuration)
         }
         tasksSuccessful += 1
         successful(index) = true
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 72c20a817336..c8ae834e01e1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -134,6 +134,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   /** Stages for which the DAGScheduler has called TaskScheduler.cancelTasks(). */
   val cancelledStages = new HashSet[Int]()
 
+  val tasksMarkedAsCompleted = new ArrayBuffer[Task[_]]()
+
   val taskScheduler = new TaskScheduler() {
     override def schedulingMode: SchedulingMode = SchedulingMode.FIFO
     override def rootPool: Pool = new Pool("", schedulingMode, 0, 0)
@@ -156,6 +158,14 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskId: Long, interruptThread: Boolean, reason: String): Boolean = false
     override def killAllTaskAttempts(
       stageId: Int, interruptThread: Boolean, reason: String): Unit = {}
+    override def notifyPartitionCompletion(
+        stageId: Int, partitionId: Int, taskDuration: Long): Unit = {
+      taskSets.filter(_.stageId == stageId).lastOption.foreach { ts =>
+        val tasks = ts.tasks.filter(_.partitionId == partitionId)
+        assert(tasks.length == 1)
+        tasksMarkedAsCompleted += tasks.head
+      }
+    }
     override def setDAGScheduler(dagScheduler: DAGScheduler) = {}
     override def defaultParallelism() = 2
     override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {}
@@ -246,6 +256,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     failure = null
     sc.addSparkListener(sparkListener)
     taskSets.clear()
+    tasksMarkedAsCompleted.clear()
     cancelledStages.clear()
     cacheLocations.clear()
     results.clear()
@@ -658,6 +669,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
           stageId: Int, interruptThread: Boolean, reason: String): Unit = {
         throw new UnsupportedOperationException
       }
+      override def notifyPartitionCompletion(
+          stageId: Int, partitionId: Int, taskDuration: Long): Unit = {
+        throw new UnsupportedOperationException
+      }
       override def setDAGScheduler(dagScheduler: DAGScheduler): Unit = {}
       override def defaultParallelism(): Int = 2
       override def executorHeartbeatReceived(
@@ -2862,6 +2877,57 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(latch.await(10, TimeUnit.SECONDS))
   }
 
+  test("Completions in zombie tasksets update status of non-zombie taskset") {
+    val parts = 4
+    val shuffleMapRdd = new MyRDD(sc, parts, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(parts))
+    val reduceRdd = new MyRDD(sc, parts, List(shuffleDep), tracker = mapOutputTracker)
+    submit(reduceRdd, (0 until parts).toArray)
+    assert(taskSets.length == 1)
+
+    // Finish the first task of the shuffle map stage.
+    runEvent(makeCompletionEvent(
+      taskSets(0).tasks(0), Success, makeMapStatus("hostA", 4),
+      Seq.empty, createFakeTaskInfoWithId(0)))
+
+    // The second task of the shuffle map stage failed with FetchFailed.
+    runEvent(makeCompletionEvent(
+      taskSets(0).tasks(1),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleDep.shuffleId, 0, 0, "ignored"),
+      null))
+
+    scheduler.resubmitFailedStages()
+    assert(taskSets.length == 2)
+    // The first partition has completed already, so the new attempt only need to run 3 tasks.
+    assert(taskSets(1).tasks.length == 3)
+
+    // Finish the first task of the second attempt of the shuffle map stage.
+    runEvent(makeCompletionEvent(
+      taskSets(1).tasks(0), Success, makeMapStatus("hostA", 4),
+      Seq.empty, createFakeTaskInfoWithId(0)))
+
+    // Finish the third task of the first attempt of the shuffle map stage.
+    runEvent(makeCompletionEvent(
+      taskSets(0).tasks(2), Success, makeMapStatus("hostA", 4),
+      Seq.empty, createFakeTaskInfoWithId(0)))
+    assert(tasksMarkedAsCompleted.length == 1)
+    assert(tasksMarkedAsCompleted.head.partitionId == 2)
+
+    // Finish the forth task of the first attempt of the shuffle map stage.
+    runEvent(makeCompletionEvent(
+      taskSets(0).tasks(3), Success, makeMapStatus("hostA", 4),
+      Seq.empty, createFakeTaskInfoWithId(0)))
+    assert(tasksMarkedAsCompleted.length == 2)
+    assert(tasksMarkedAsCompleted.last.partitionId == 3)
+
+    // Now the shuffle map stage is completed, and the next stage is submitted.
+    assert(taskSets.length == 3)
+
+    // Finish
+    complete(taskSets(2), Seq((Success, 42), (Success, 42), (Success, 42), (Success, 42)))
+    assertDataStructuresEmpty()
+  }
+
   /**
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
index 30d0966691a3..347064dc9aad 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
@@ -84,6 +84,8 @@ private class DummyTaskScheduler extends TaskScheduler {
     taskId: Long, interruptThread: Boolean, reason: String): Boolean = false
   override def killAllTaskAttempts(
     stageId: Int, interruptThread: Boolean, reason: String): Unit = {}
+  override def notifyPartitionCompletion(
+      stageId: Int, partitionId: Int, taskDuration: Long): Unit = {}
   override def setDAGScheduler(dagScheduler: DAGScheduler): Unit = {}
   override def defaultParallelism(): Int = 2
   override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 137ff2bd167a..29614058485a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -1121,110 +1121,6 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
   }
 
-  test("Completions in zombie tasksets update status of non-zombie taskset") {
-    val taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
-    val valueSer = SparkEnv.get.serializer.newInstance()
-
-    def completeTaskSuccessfully(tsm: TaskSetManager, partition: Int): Unit = {
-      val indexInTsm = tsm.partitionToIndex(partition)
-      val matchingTaskInfo = tsm.taskAttempts.flatten.filter(_.index == indexInTsm).head
-      val result = new DirectTaskResult[Int](valueSer.serialize(1), Seq())
-      tsm.handleSuccessfulTask(matchingTaskInfo.taskId, result)
-    }
-
-    // Submit a task set, have it fail with a fetch failed, and then re-submit the task attempt,
-    // two times, so we have three active task sets for one stage.  (For this to really happen,
-    // you'd need the previous stage to also get restarted, and then succeed, in between each
-    // attempt, but that happens outside what we're mocking here.)
-    val zombieAttempts = (0 until 2).map { stageAttempt =>
-      val attempt = FakeTask.createTaskSet(10, stageAttemptId = stageAttempt)
-      taskScheduler.submitTasks(attempt)
-      val tsm = taskScheduler.taskSetManagerForAttempt(0, stageAttempt).get
-      val offers = (0 until 10).map{ idx => WorkerOffer(s"exec-$idx", s"host-$idx", 1) }
-      taskScheduler.resourceOffers(offers)
-      assert(tsm.runningTasks === 10)
-      // fail attempt
-      tsm.handleFailedTask(tsm.taskAttempts.head.head.taskId, TaskState.FAILED,
-        FetchFailed(null, 0, 0, 0, "fetch failed"))
-      // the attempt is a zombie, but the tasks are still running (this could be true even if
-      // we actively killed those tasks, as killing is best-effort)
-      assert(tsm.isZombie)
-      assert(tsm.runningTasks === 9)
-      tsm
-    }
-
-    // we've now got 2 zombie attempts, each with 9 tasks still active.  Submit the 3rd attempt for
-    // the stage, but this time with insufficient resources so not all tasks are active.
-
-    val finalAttempt = FakeTask.createTaskSet(10, stageAttemptId = 2)
-    taskScheduler.submitTasks(finalAttempt)
-    val finalTsm = taskScheduler.taskSetManagerForAttempt(0, 2).get
-    val offers = (0 until 5).map{ idx => WorkerOffer(s"exec-$idx", s"host-$idx", 1) }
-    val finalAttemptLaunchedPartitions = taskScheduler.resourceOffers(offers).flatten.map { task =>
-      finalAttempt.tasks(task.index).partitionId
-    }.toSet
-    assert(finalTsm.runningTasks === 5)
-    assert(!finalTsm.isZombie)
-
-    // We simulate late completions from our zombie tasksets, corresponding to all the pending
-    // partitions in our final attempt.  This means we're only waiting on the tasks we've already
-    // launched.
-    val finalAttemptPendingPartitions = (0 until 10).toSet.diff(finalAttemptLaunchedPartitions)
-    finalAttemptPendingPartitions.foreach { partition =>
-      completeTaskSuccessfully(zombieAttempts(0), partition)
-    }
-
-    // If there is another resource offer, we shouldn't run anything.  Though our final attempt
-    // used to have pending tasks, now those tasks have been completed by zombie attempts.  The
-    // remaining tasks to compute are already active in the non-zombie attempt.
-    assert(
-      taskScheduler.resourceOffers(IndexedSeq(WorkerOffer("exec-1", "host-1", 1))).flatten.isEmpty)
-
-    val remainingTasks = finalAttemptLaunchedPartitions.toIndexedSeq.sorted
-
-    // finally, if we finish the remaining partitions from a mix of tasksets, all attempts should be
-    // marked as zombie.
-    // for each of the remaining tasks, find the tasksets with an active copy of the task, and
-    // finish the task.
-    remainingTasks.foreach { partition =>
-      val tsm = if (partition == 0) {
-        // we failed this task on both zombie attempts, this one is only present in the latest
-        // taskset
-        finalTsm
-      } else {
-        // should be active in every taskset.  We choose a zombie taskset just to make sure that
-        // we transition the active taskset correctly even if the final completion comes
-        // from a zombie.
-        zombieAttempts(partition % 2)
-      }
-      completeTaskSuccessfully(tsm, partition)
-    }
-
-    assert(finalTsm.isZombie)
-
-    // no taskset has completed all of its tasks, so no updates to the blacklist tracker yet
-    verify(blacklist, never).updateBlacklistForSuccessfulTaskSet(anyInt(), anyInt(), any())
-
-    // finally, lets complete all the tasks.  We simulate failures in attempt 1, but everything
-    // else succeeds, to make sure we get the right updates to the blacklist in all cases.
-    (zombieAttempts ++ Seq(finalTsm)).foreach { tsm =>
-      val stageAttempt = tsm.taskSet.stageAttemptId
-      tsm.runningTasksSet.foreach { index =>
-        if (stageAttempt == 1) {
-          tsm.handleFailedTask(tsm.taskInfos(index).taskId, TaskState.FAILED, TaskResultLost)
-        } else {
-          val result = new DirectTaskResult[Int](valueSer.serialize(1), Seq())
-          tsm.handleSuccessfulTask(tsm.taskInfos(index).taskId, result)
-        }
-      }
-
-      // we update the blacklist for the stage attempts with all successful tasks.  Even though
-      // some tasksets had failures, we still consider them all successful from a blacklisting
-      // perspective, as the failures weren't from a problem w/ the tasks themselves.
-      verify(blacklist).updateBlacklistForSuccessfulTaskSet(meq(0), meq(stageAttempt), any())
-    }
-  }
-
   test("don't schedule for a barrier taskSet if available slots are less than pending tasks") {
     val taskCpus = 2
     val taskScheduler = setupSchedulerWithMaster(
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 79160d05b3e6..0666bc335aba 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -1372,7 +1372,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     assert(taskOption4.get.addedJars === addedJarsMidTaskSet)
   }
 
-  test("[SPARK-24677] Avoid NoSuchElementException from MedianHeap") {
+  test("SPARK-24677: Avoid NoSuchElementException from MedianHeap") {
     val conf = new SparkConf().set(config.SPECULATION_ENABLED, true)
     sc = new SparkContext("local", "test", conf)
     // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
@@ -1386,39 +1386,17 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     val dagScheduler = new FakeDAGScheduler(sc, sched)
     sched.setDAGScheduler(dagScheduler)
 
-    val taskSet1 = FakeTask.createTaskSet(10)
-    val accumUpdatesByTask: Array[Seq[AccumulatorV2[_, _]]] = taskSet1.tasks.map { task =>
-      task.metrics.internalAccums
-    }
+    val taskSet = FakeTask.createTaskSet(10)
 
-    sched.submitTasks(taskSet1)
+    sched.submitTasks(taskSet)
     sched.resourceOffers(
-      (0 until 10).map { idx => WorkerOffer(s"exec-$idx", s"host-$idx", 1) })
-
-    val taskSetManager1 = sched.taskSetManagerForAttempt(0, 0).get
-
-    // fail fetch
-    taskSetManager1.handleFailedTask(
-      taskSetManager1.taskAttempts.head.head.taskId, TaskState.FAILED,
-      FetchFailed(null, 0, 0, 0, "fetch failed"))
-
-    assert(taskSetManager1.isZombie)
-    assert(taskSetManager1.runningTasks === 9)
-
-    val taskSet2 = FakeTask.createTaskSet(10, stageAttemptId = 1)
-    sched.submitTasks(taskSet2)
-    sched.resourceOffers(
-      (11 until 20).map { idx => WorkerOffer(s"exec-$idx", s"host-$idx", 1) })
-
-    // Complete the 2 tasks and leave 8 task in running
-    for (id <- Set(0, 1)) {
-      taskSetManager1.handleSuccessfulTask(id, createTaskResult(id, accumUpdatesByTask(id)))
-      assert(sched.endedTasks(id) === Success)
-    }
+      (0 until 8).map { idx => WorkerOffer(s"exec-$idx", s"host-$idx", 1) })
 
-    val taskSetManager2 = sched.taskSetManagerForAttempt(0, 1).get
-    assert(!taskSetManager2.successfulTaskDurations.isEmpty())
-    taskSetManager2.checkSpeculatableTasks(0)
+    val taskSetManager = sched.taskSetManagerForAttempt(0, 0).get
+    assert(taskSetManager.runningTasks === 8)
+    taskSetManager.markPartitionCompleted(8, 0)
+    assert(!taskSetManager.successfulTaskDurations.isEmpty())
+    taskSetManager.checkSpeculatableTasks(0)
   }
 
 
From 07d07fec03df00d577c39f901dd9282a67d98b19 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 29 Apr 2019 17:54:12 +0800
Subject: [PATCH 0976/1072] [SPARK-27580][SQL] Implement `doCanonicalize` in
 BatchScanExec for comparing query plan results

## What changes were proposed in this pull request?

The method `QueryPlan.sameResult` is used for comparing logical plans in order to:
1. cache data in CacheManager
2. uncache data in CacheManager
3. Reuse subqueries
4. etc...

Currently the method `sameReuslt` always return false for `BatchScanExec`. We should fix it by implementing `doCanonicalize` for the node.

## How was this patch tested?

Unit test

Closes #24475 from gengliangwang/sameResultForV2.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/BatchScanExec.scala        |  5 ++
 .../execution/datasources/v2/FileScan.scala   |  6 ++
 .../datasources/v2/orc/OrcScan.scala          | 14 ++++-
 .../datasources/v2/orc/OrcScanBuilder.scala   |  2 +-
 .../spark/sql/execution/SameResultSuite.scala | 61 +++++++++++++++++++
 .../datasources/FileSourceStrategySuite.scala | 30 +++++----
 6 files changed, 104 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index c7fcc6723e45..3276ab506750 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.sources.v2.reader._
 
 /**
@@ -46,4 +47,8 @@ case class BatchScanExec(
   override lazy val inputRDD: RDD[InternalRow] = {
     new DataSourceRDD(sparkContext, partitions, readerFactory, supportsBatch)
   }
+
+  override def doCanonicalize(): BatchScanExec = {
+    this.copy(output = output.map(QueryPlan.normalizeExprId(_, output)))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 337aac9ea651..70d59321d2cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
+import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -85,6 +86,11 @@ abstract class FileScan(
   override def readSchema(): StructType =
     StructType(readDataSchema.fields ++ readPartitionSchema.fields)
 
+  // Returns whether the two given arrays of [[Filter]]s are equivalent.
+  protected def equivalentFilters(a: Array[Filter], b: Array[Filter]): Boolean = {
+    a.sortBy(_.hashCode()).sameElements(b.sortBy(_.hashCode()))
+  }
+
   private val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
 
   private def normalizeName(name: String): String = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index dc6b67ceb7e5..b129c942ccc5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -22,6 +22,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScan
+import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -34,7 +35,8 @@ case class OrcScan(
     dataSchema: StructType,
     readDataSchema: StructType,
     readPartitionSchema: StructType,
-    options: CaseInsensitiveStringMap)
+    options: CaseInsensitiveStringMap,
+    pushedFilters: Array[Filter])
   extends FileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema) {
   override def isSplitable(path: Path): Boolean = true
 
@@ -46,4 +48,14 @@ case class OrcScan(
     OrcPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
       dataSchema, readDataSchema, readPartitionSchema)
   }
+
+  override def equals(obj: Any): Boolean = obj match {
+    case o: OrcScan =>
+      fileIndex == o.fileIndex && dataSchema == o.dataSchema &&
+      readDataSchema == o.readDataSchema && readPartitionSchema == o.readPartitionSchema &&
+      options == o.options && equivalentFilters(pushedFilters, o.pushedFilters)
+    case _ => false
+  }
+
+  override def hashCode(): Int = getClass.hashCode()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index 4c1ec520c6ea..458b98c627be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -45,7 +45,7 @@ case class OrcScanBuilder(
 
   override def build(): Scan = {
     OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema,
-      readDataSchema(), readPartitionSchema(), options)
+      readDataSchema(), readPartitionSchema(), options, pushedFilters())
   }
 
   private var _pushedFilters: Array[Filter] = Array.empty
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
index d088e24e53bf..4731da47a19d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -20,7 +20,9 @@ package org.apache.spark.sql.execution
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.IntegerType
 
@@ -47,6 +49,65 @@ class SameResultSuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("FileScan: different orders of data filters and partition filters") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "") {
+      Seq("orc", "json", "csv").foreach { format =>
+        withTempPath { path =>
+          val tmpDir = path.getCanonicalPath
+          spark.range(10)
+            .selectExpr("id as a", "id + 1 as b", "id + 2 as c", "id + 3 as d")
+            .write
+            .partitionBy("a", "b")
+            .format(format)
+            .option("header", true)
+            .save(tmpDir)
+          val df = spark.read.format(format).option("header", true).load(tmpDir)
+          // partition filters: a > 1 AND b < 9
+          // data filters: c > 1 AND d < 9
+          val plan1 = df.where("a > 1 AND b < 9 AND c > 1 AND d < 9").queryExecution.sparkPlan
+          val plan2 = df.where("b < 9 AND a > 1 AND d < 9 AND c > 1").queryExecution.sparkPlan
+          assert(plan1.sameResult(plan2))
+          val scan1 = getBatchScanExec(plan1)
+          val scan2 = getBatchScanExec(plan2)
+          assert(scan1.sameResult(scan2))
+          val plan3 = df.where("b < 9 AND a > 1 AND d < 8 AND c > 1").queryExecution.sparkPlan
+          assert(!plan1.sameResult(plan3))
+          // The [[FileScan]]s should have different results if they support filter pushdown.
+          if (format == "orc") {
+            val scan3 = getBatchScanExec(plan3)
+            assert(!scan1.sameResult(scan3))
+          }
+        }
+      }
+    }
+  }
+
+  test("TextScan") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "") {
+      withTempPath { path =>
+        val tmpDir = path.getCanonicalPath
+        spark.range(10)
+          .selectExpr("id as a", "id + 1 as b", "cast(id as string) value")
+          .write
+          .partitionBy("a", "b")
+          .text(tmpDir)
+        val df = spark.read.text(tmpDir)
+        // partition filters: a > 1 AND b < 9
+        // data filters: c > 1 AND d < 9
+        val plan1 = df.where("a > 1 AND b < 9 AND value == '3'").queryExecution.sparkPlan
+        val plan2 = df.where("value == '3' AND a > 1 AND b < 9").queryExecution.sparkPlan
+        assert(plan1.sameResult(plan2))
+        val scan1 = getBatchScanExec(plan1)
+        val scan2 = getBatchScanExec(plan2)
+        assert(scan1.sameResult(scan2))
+      }
+    }
+  }
+
+  private def getBatchScanExec(plan: SparkPlan): BatchScanExec = {
+    plan.find(_.isInstanceOf[BatchScanExec]).get.asInstanceOf[BatchScanExec]
+  }
+
   private def getFileSourceScanExec(df: DataFrame): FileSourceScanExec = {
     df.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get
       .asInstanceOf[FileSourceScanExec]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index 955c3e3fa6f7..b38f0f7f228a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -414,19 +414,25 @@ class FileSourceStrategySuite extends QueryTest with SharedSQLContext with Predi
   }
 
   test("[SPARK-16818] partition pruned file scans implement sameResult correctly") {
-    withTempPath { path =>
-      val tempDir = path.getCanonicalPath
-      spark.range(100)
-        .selectExpr("id", "id as b")
-        .write
-        .partitionBy("id")
-        .parquet(tempDir)
-      val df = spark.read.parquet(tempDir)
-      def getPlan(df: DataFrame): SparkPlan = {
-        df.queryExecution.executedPlan
+    Seq("orc", "").foreach { useV1ReaderList =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> useV1ReaderList) {
+        withTempPath { path =>
+          val tempDir = path.getCanonicalPath
+          spark.range(100)
+            .selectExpr("id", "id as b")
+            .write
+            .partitionBy("id")
+            .orc(tempDir)
+          val df = spark.read.orc(tempDir)
+
+          def getPlan(df: DataFrame): SparkPlan = {
+            df.queryExecution.executedPlan
+          }
+
+          assert(getPlan(df.where("id = 2")).sameResult(getPlan(df.where("id = 2"))))
+          assert(!getPlan(df.where("id = 2")).sameResult(getPlan(df.where("id = 3"))))
+        }
       }
-      assert(getPlan(df.where("id = 2")).sameResult(getPlan(df.where("id = 2"))))
-      assert(!getPlan(df.where("id = 2")).sameResult(getPlan(df.where("id = 3"))))
     }
   }
 

From 5a6229521953ffc3e651b70e8db5eb0f0ebaeb24 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 29 Apr 2019 20:48:12 +0800
Subject: [PATCH 0977/1072] [SPARK-27580][HOT-FIX] Fix wrong import order in
 FileScan.scala

## What changes were proposed in this pull request?
```
========================================================================
Running Scala style checks
========================================================================
[info] Checking Scala style using SBT with these profiles:  -Phadoop-2.7 -Pkubernetes -Phive-thriftserver -Pkinesis-asl -Pyarn -Pspark-ganglia-lgpl -Phive -Pmesos
Scalastyle checks failed at following occurrences:
[error] /home/jenkins/workspace/SparkPullRequestBuilder/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala:29:0: org.apache.spark.sql.sources.Filter is in wrong order relative to org.apache.spark.sql.sources.v2.reader..
[error] Total time: 17 s, completed Apr 29, 2019 3:09:43 AM
```
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/104987/console

## How was this patch tested?
manual tests:
```
dev/scalastyle
```

Closes #24487 from wangyum/SPARK-27580.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/execution/datasources/v2/FileScan.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 70d59321d2cf..b2b45e8ca4d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -25,8 +25,8 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
 import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 

From 76785cd6f0d26825e9a79f831239633e953cef74 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 29 Apr 2019 21:17:32 +0800
Subject: [PATCH 0978/1072] [SPARK-27581][SQL] DataFrame countDistinct("*")
 shouldn't fail with AnalysisException

## What changes were proposed in this pull request?

Currently `countDistinct("*")` doesn't work. An analysis exception is thrown:

```scala
val df = sql("select id % 100 from range(100000)")
df.select(countDistinct("*")).first()

org.apache.spark.sql.AnalysisException: Invalid usage of '*' in expression 'count';
```

Users need to use `expr`.

```scala
df.select(expr("count(distinct(*))")).first()
```

This limits some API usage like `df.select(count("*"), countDistinct("*))`.

The PR takes the simplest fix that lets analyzer expand star and resolve `count` function.

## How was this patch tested?

Added unit test.

Closes #24482 from viirya/SPARK-27581.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/scala/org/apache/spark/sql/functions.scala    |  7 ++++---
 .../org/apache/spark/sql/DataFrameAggregateSuite.scala | 10 ++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index c1997b6fdfac..f92bf792a2d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -358,9 +358,10 @@ object functions {
    * @since 1.3.0
    */
   @scala.annotation.varargs
-  def countDistinct(expr: Column, exprs: Column*): Column = {
-    withAggregateFunction(Count.apply((expr +: exprs).map(_.expr)), isDistinct = true)
-  }
+  def countDistinct(expr: Column, exprs: Column*): Column =
+    // For usage like countDistinct("*"), we should let analyzer expand star and
+    // resolve function.
+    Column(UnresolvedFunction("count", (expr +: exprs).map(_.expr), isDistinct = true))
 
   /**
    * Aggregate function: returns the number of distinct items in a group.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 73259a0ed3b5..97aaa1b584af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -772,4 +772,14 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
       Row(Seq(0.0f, 0.0f), Row(0.0d, Double.NaN), Seq(Row(0.0d, Double.NaN)), 2)
     )
   }
+
+  test("SPARK-27581: DataFrame countDistinct(\"*\") shouldn't fail with AnalysisException") {
+    val df = sql("select id % 100 from range(100000)")
+    val distinctCount1 = df.select(expr("count(distinct(*))"))
+    val distinctCount2 = df.select(countDistinct("*"))
+    checkAnswer(distinctCount1, distinctCount2)
+
+    val countAndDistinct = df.select(count("*"), countDistinct("*"))
+    checkAnswer(countAndDistinct, Row(100000, 100))
+  }
 }

From fbc794268340bec868a0abcae3516e4ae3714286 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Mon, 29 Apr 2019 08:58:56 -0700
Subject: [PATCH 0979/1072] [SPARK-27472] add user guide for binary file data
 source

## What changes were proposed in this pull request?

Add user guide for binary file data source.

<img width="826" alt="Screen Shot 2019-04-28 at 10 21 26 PM" src="https://user-images.githubusercontent.com/829644/56877594-0488d300-6a04-11e9-9064-5047dfedd913.png">

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24484 from mengxr/SPARK-27472.

Authored-by: Xiangrui Meng <meng@databricks.com>
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 docs/sql-data-sources-binaryFile.md | 80 +++++++++++++++++++++++++++++
 docs/sql-data-sources.md            |  1 +
 2 files changed, 81 insertions(+)
 create mode 100644 docs/sql-data-sources-binaryFile.md

diff --git a/docs/sql-data-sources-binaryFile.md b/docs/sql-data-sources-binaryFile.md
new file mode 100644
index 000000000000..d861a24219be
--- /dev/null
+++ b/docs/sql-data-sources-binaryFile.md
@@ -0,0 +1,80 @@
+---
+layout: global
+title: Binary File Data Source
+displayTitle: Binary File Data Source
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+Since Spark 3.0, Spark supports binary file data source,
+which reads binary files and converts each file into a single record that contains the raw content
+and metadata of the file.
+It produces a DataFrame with the following columns and possibly partition columns:
+* `path`: StringType
+* `modificationTime`: TimestampType
+* `length`: LongType
+* `content`: BinaryType
+
+It supports the following read option:
+<table class="table">
+  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th></tr>
+  <tr>
+    <td><code>pathGlobFilter</code></td>
+    <td>none (accepts all)</td>
+    <td>
+    An optional glob pattern to only include files with paths matching the pattern.
+    The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+    It does not change the behavior of partition discovery.
+    </td>
+  </tr>
+</table>
+
+To read whole binary files, you need to specify the data source `format` as `binaryFile`.
+For example, the following code reads all PNG files from the input directory:
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+spark.read.format("binaryFile").option("pathGlobFilter", "*.png").load("/path/to/data")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+spark.read().format("binaryFile").option("pathGlobFilter", "*.png").load("/path/to/data");
+
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+spark.read.format("binaryFile").option("pathGlobFilter", "*.png").load("/path/to/data")
+
+{% endhighlight %}
+</div>
+<div data-lang="r" markdown="1">
+{% highlight r %}
+
+read.df("/path/to/data", source = "binaryFile", pathGlobFilter = "*.png")
+
+{% endhighlight %}
+</div>
+</div>
+
+Binary file data source does not support writing a DataFrame back to the original files.
diff --git a/docs/sql-data-sources.md b/docs/sql-data-sources.md
index d908aac2140e..079c54060d15 100644
--- a/docs/sql-data-sources.md
+++ b/docs/sql-data-sources.md
@@ -54,4 +54,5 @@ goes into specific options that are available for the built-in data sources.
   * [Compatibility with Databricks spark-avro](sql-data-sources-avro.html#compatibility-with-databricks-spark-avro)
   * [Supported types for Avro -> Spark SQL conversion](sql-data-sources-avro.html#supported-types-for-avro---spark-sql-conversion)
   * [Supported types for Spark SQL -> Avro conversion](sql-data-sources-avro.html#supported-types-for-spark-sql---avro-conversion)
+* [Whole Binary Files](sql-data-sources-binaryFile.html)
 * [Troubleshooting](sql-data-sources-troubleshooting.html)

From 8a17d26784c53fb50b6373b566aab71135c8956f Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 29 Apr 2019 11:02:01 -0500
Subject: [PATCH 0980/1072] [SPARK-27536][CORE][ML][SQL][STREAMING] Remove most
 use of scala.language.existentials

## What changes were proposed in this pull request?

I want to get rid of as much use of `scala.language.existentials` as possible for 3.0. It's a complicated language feature that generates warnings unless this value is imported. It might even be on the way out of Scala: https://contributors.scala-lang.org/t/proposal-to-remove-existential-types-from-the-language/2785

For Spark, it comes up mostly where the code plays fast and loose with generic types, not the advanced situations you'll often see referenced where this feature is explained. For example, it comes up in cases where a function returns something like `(String, Class[_])`. Scala doesn't like matching this to any other instance of `(String, Class[_])` because doing so requires inferring the existence of some type that satisfies both. Seems obvious if the generic type is a wildcard, but, not technically something Scala likes to let you get away with.

This is a large PR, and it only gets rid of _most_ instances of `scala.language.existentials`. The change should be all compile-time and shouldn't affect APIs or logic.

Many of the changes simply touch up sloppiness about generic types, making the known correct value explicit in the code.

Some fixes involve being more explicit about the existence of generic types in methods. For instance, `def foo(arg: Class[_])` seems innocent enough but should really be declared `def foo[T](arg: Class[T])` to let Scala select and fix a single type when evaluating calls to `foo`.

For kind of surprising reasons, this comes up in places where code evaluates a tuple of things that involve a generic type, but is OK if the two parts of the tuple are evaluated separately.

One key change was altering `Utils.classForName(...): Class[_]` to the more correct `Utils.classForName[T](...): Class[T]`. This caused a number of small but positive changes to callers that otherwise had to cast the result.

In several tests, `Dataset[_]` was used where `DataFrame` seems to be the clear intent.

Finally, in a few cases in MLlib, the return type `this.type` was used where there are no subclasses of the class that uses it. This really isn't needed and causes issues for Scala reasoning about the return type. These are just changed to be concrete classes as return types.

After this change, we have only a few classes that still import `scala.language.existentials` (because modifying them would require extensive rewrites to fix) and no build warnings.

## How was this patch tested?

Existing tests.

Closes #24431 from srowen/SPARK-27536.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/api/python/PythonHadoopUtil.scala   | 13 ++---
 .../apache/spark/api/python/PythonRDD.scala   | 56 +++++++++----------
 .../apache/spark/api/r/RBackendHandler.scala  |  2 -
 .../spark/deploy/history/HistoryServer.scala  |  4 +-
 .../internal/io/FileCommitProtocol.scala      |  2 +-
 .../apache/spark/io/CompressionCodec.scala    |  5 +-
 .../apache/spark/metrics/MetricsSystem.scala  | 19 ++++---
 .../network/netty/NettyBlockRpcServer.scala   | 22 +++-----
 .../org/apache/spark/rdd/CoGroupedRDD.scala   |  1 -
 .../org/apache/spark/rdd/CoalescedRDD.scala   |  1 -
 .../apache/spark/scheduler/DAGScheduler.scala |  4 +-
 .../spark/scheduler/DAGSchedulerEvent.scala   |  2 -
 .../spark/scheduler/ShuffleMapTask.scala      |  7 +--
 .../spark/security/SocketAuthServer.scala     |  1 -
 .../spark/serializer/KryoSerializer.scala     |  4 +-
 .../apache/spark/util/ClosureCleaner.scala    |  5 +-
 .../scala/org/apache/spark/util/Utils.scala   | 14 +++--
 .../apache/spark/ContextCleanerSuite.scala    | 10 ++--
 .../scala/org/apache/spark/FileSuite.scala    |  7 ++-
 .../scheduler/SchedulerIntegrationSuite.scala |  9 ++-
 .../util/MutableURLClassLoaderSuite.scala     |  3 +-
 .../KafkaDelegationTokenProvider.scala        |  1 -
 .../spark/kafka010/KafkaTokenUtil.scala       |  5 +-
 .../spark/graphx/util/BytecodeUtils.scala     |  8 +--
 .../classification/LogisticRegression.scala   |  2 +-
 .../spark/ml/feature/StringIndexer.scala      |  2 -
 .../spark/ml/feature/VectorAssembler.scala    | 13 ++---
 .../apache/spark/ml/image/HadoopUtils.scala   |  7 +--
 .../apache/spark/ml/recommendation/ALS.scala  |  4 +-
 .../mllib/api/python/PythonMLLibAPI.scala     |  1 -
 .../org/apache/spark/mllib/fpm/FPTree.scala   |  4 +-
 .../LogisticRegressionSuite.scala             | 13 ++---
 .../ml/clustering/BisectingKMeansSuite.scala  | 11 ++--
 .../ml/clustering/GaussianMixtureSuite.scala  | 14 ++---
 .../spark/ml/clustering/KMeansSuite.scala     |  3 +-
 .../apache/spark/ml/clustering/LDASuite.scala |  7 +--
 .../evaluation/ClusteringEvaluatorSuite.scala | 10 ++--
 .../spark/ml/recommendation/ALSSuite.scala    |  1 -
 .../apache/spark/ml/util/MLTestingUtils.scala |  5 +-
 .../spark/mllib/fpm/FPGrowthSuite.scala       |  6 +-
 .../apache/spark/mllib/fpm/FPTreeSuite.scala  |  2 -
 .../spark/mllib/fpm/PrefixSpanSuite.scala     |  6 +-
 .../catalyst/analysis/FunctionRegistry.scala  |  1 -
 .../expressions/codegen/CodeGenerator.scala   |  1 -
 .../expressions/codegen/javaCode.scala        |  2 +-
 .../expressions/objects/objects.scala         |  2 -
 .../sql/catalyst/plans/logical/object.scala   | 17 +++---
 .../sql/catalyst/util/ArrayBasedMapData.scala |  5 +-
 .../org/apache/spark/sql/types/DataType.scala |  2 +-
 .../apache/spark/sql/types/ObjectType.scala   |  2 -
 .../expressions/ObjectExpressionsSuite.scala  |  3 +-
 .../apache/spark/sql/UDFRegistration.scala    |  4 +-
 .../aggregate/TypedAggregateExpression.scala  |  2 -
 .../execution/datasources/DataSource.scala    |  3 +-
 .../org/apache/spark/sql/JoinSuite.scala      |  6 +-
 .../ParquetInteroperabilitySuite.scala        |  5 +-
 .../spark/sql/sources/FilteredScanSuite.scala |  2 -
 .../spark/sql/sources/PrunedScanSuite.scala   |  2 -
 .../apache/spark/sql/hive/TableReader.scala   |  2 +-
 .../sql/hive/client/HiveClientImpl.scala      |  5 +-
 .../execution/InsertIntoHiveDirCommand.scala  |  3 -
 .../execution/ScriptTransformationExec.scala  | 12 ++--
 .../sql/hive/execution/HiveDDLSuite.scala     |  2 -
 .../hive/execution/Hive_2_1_DDLSuite.scala    |  3 -
 .../streaming/dstream/WindowedDStream.scala   |  2 +-
 .../streaming/receiver/ReceivedBlock.scala    |  1 -
 .../receiver/ReceivedBlockHandler.scala       |  1 -
 .../streaming/scheduler/JobGenerator.scala    |  4 +-
 .../streaming/scheduler/ReceiverTracker.scala |  1 -
 .../streaming/util/WriteAheadLogUtils.scala   |  3 +-
 .../streaming/BasicOperationsSuite.scala      |  7 +--
 71 files changed, 180 insertions(+), 241 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
index 2ab8add63efa..a4817b3cf770 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
@@ -33,18 +33,17 @@ import org.apache.spark.util.{SerializableConfiguration, Utils}
  * A trait for use with reading custom classes in PySpark. Implement this trait and add custom
  * transformation code by overriding the convert method.
  */
-trait Converter[T, + U] extends Serializable {
+trait Converter[-T, +U] extends Serializable {
   def convert(obj: T): U
 }
 
 private[python] object Converter extends Logging {
 
-  def getInstance(converterClass: Option[String],
-                  defaultConverter: Converter[Any, Any]): Converter[Any, Any] = {
+  def getInstance[T, U](converterClass: Option[String],
+                        defaultConverter: Converter[_ >: T, _ <: U]): Converter[T, U] = {
     converterClass.map { cc =>
       Try {
-        val c = Utils.classForName(cc).getConstructor().
-          newInstance().asInstanceOf[Converter[Any, Any]]
+        val c = Utils.classForName[Converter[T, U]](cc).getConstructor().newInstance()
         logInfo(s"Loaded converter: $cc")
         c
       } match {
@@ -177,8 +176,8 @@ private[python] object PythonHadoopUtil {
    * [[org.apache.hadoop.io.Writable]], into an RDD of base types, or vice versa.
    */
   def convertRDD[K, V](rdd: RDD[(K, V)],
-                       keyConverter: Converter[Any, Any],
-                       valueConverter: Converter[Any, Any]): RDD[(Any, Any)] = {
+                       keyConverter: Converter[K, Any],
+                       valueConverter: Converter[V, Any]): RDD[(Any, Any)] = {
     rdd.map { case (k, v) => (keyConverter.convert(k), valueConverter.convert(v)) }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 5b492b1f3991..182c383cec2c 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -24,10 +24,6 @@ import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.concurrent.Promise
-import scala.concurrent.duration.Duration
-import scala.language.existentials
-import scala.util.Try
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.compress.CompressionCodec
@@ -228,8 +224,8 @@ private[spark] object PythonRDD extends Logging {
       batchSize: Int): JavaRDD[Array[Byte]] = {
     val keyClass = Option(keyClassMaybeNull).getOrElse("org.apache.hadoop.io.Text")
     val valueClass = Option(valueClassMaybeNull).getOrElse("org.apache.hadoop.io.Text")
-    val kc = Utils.classForName(keyClass).asInstanceOf[Class[K]]
-    val vc = Utils.classForName(valueClass).asInstanceOf[Class[V]]
+    val kc = Utils.classForName[K](keyClass)
+    val vc = Utils.classForName[V](valueClass)
     val rdd = sc.sc.sequenceFile[K, V](path, kc, vc, minSplits)
     val confBroadcasted = sc.sc.broadcast(new SerializableConfiguration(sc.hadoopConfiguration()))
     val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
@@ -296,9 +292,9 @@ private[spark] object PythonRDD extends Logging {
       keyClass: String,
       valueClass: String,
       conf: Configuration): RDD[(K, V)] = {
-    val kc = Utils.classForName(keyClass).asInstanceOf[Class[K]]
-    val vc = Utils.classForName(valueClass).asInstanceOf[Class[V]]
-    val fc = Utils.classForName(inputFormatClass).asInstanceOf[Class[F]]
+    val kc = Utils.classForName[K](keyClass)
+    val vc = Utils.classForName[V](valueClass)
+    val fc = Utils.classForName[F](inputFormatClass)
     if (path.isDefined) {
       sc.sc.newAPIHadoopFile[K, V, F](path.get, fc, kc, vc, conf)
     } else {
@@ -365,9 +361,9 @@ private[spark] object PythonRDD extends Logging {
       keyClass: String,
       valueClass: String,
       conf: Configuration) = {
-    val kc = Utils.classForName(keyClass).asInstanceOf[Class[K]]
-    val vc = Utils.classForName(valueClass).asInstanceOf[Class[V]]
-    val fc = Utils.classForName(inputFormatClass).asInstanceOf[Class[F]]
+    val kc = Utils.classForName[K](keyClass)
+    val vc = Utils.classForName[V](valueClass)
+    val fc = Utils.classForName[F](inputFormatClass)
     if (path.isDefined) {
       sc.sc.hadoopFile(path.get, fc, kc, vc)
     } else {
@@ -425,29 +421,33 @@ private[spark] object PythonRDD extends Logging {
     PythonHadoopUtil.mergeConfs(baseConf, conf)
   }
 
-  private def inferKeyValueTypes[K, V](rdd: RDD[(K, V)], keyConverterClass: String = null,
-      valueConverterClass: String = null): (Class[_], Class[_]) = {
+  private def inferKeyValueTypes[K, V, KK, VV](rdd: RDD[(K, V)], keyConverterClass: String = null,
+      valueConverterClass: String = null): (Class[_ <: KK], Class[_ <: VV]) = {
     // Peek at an element to figure out key/value types. Since Writables are not serializable,
     // we cannot call first() on the converted RDD. Instead, we call first() on the original RDD
     // and then convert locally.
     val (key, value) = rdd.first()
-    val (kc, vc) = getKeyValueConverters(keyConverterClass, valueConverterClass,
-      new JavaToWritableConverter)
+    val (kc, vc) = getKeyValueConverters[K, V, KK, VV](
+      keyConverterClass, valueConverterClass, new JavaToWritableConverter)
     (kc.convert(key).getClass, vc.convert(value).getClass)
   }
 
-  private def getKeyValueTypes(keyClass: String, valueClass: String):
-      Option[(Class[_], Class[_])] = {
+  private def getKeyValueTypes[K, V](keyClass: String, valueClass: String):
+      Option[(Class[K], Class[V])] = {
     for {
       k <- Option(keyClass)
       v <- Option(valueClass)
     } yield (Utils.classForName(k), Utils.classForName(v))
   }
 
-  private def getKeyValueConverters(keyConverterClass: String, valueConverterClass: String,
-      defaultConverter: Converter[Any, Any]): (Converter[Any, Any], Converter[Any, Any]) = {
-    val keyConverter = Converter.getInstance(Option(keyConverterClass), defaultConverter)
-    val valueConverter = Converter.getInstance(Option(valueConverterClass), defaultConverter)
+  private def getKeyValueConverters[K, V, KK, VV](
+      keyConverterClass: String,
+      valueConverterClass: String,
+      defaultConverter: Converter[_, _]): (Converter[K, KK], Converter[V, VV]) = {
+    val keyConverter = Converter.getInstance(Option(keyConverterClass),
+      defaultConverter.asInstanceOf[Converter[K, KK]])
+    val valueConverter = Converter.getInstance(Option(valueConverterClass),
+      defaultConverter.asInstanceOf[Converter[V, VV]])
     (keyConverter, valueConverter)
   }
 
@@ -459,7 +459,7 @@ private[spark] object PythonRDD extends Logging {
                                keyConverterClass: String,
                                valueConverterClass: String,
                                defaultConverter: Converter[Any, Any]): RDD[(Any, Any)] = {
-    val (kc, vc) = getKeyValueConverters(keyConverterClass, valueConverterClass,
+    val (kc, vc) = getKeyValueConverters[K, V, Any, Any](keyConverterClass, valueConverterClass,
       defaultConverter)
     PythonHadoopUtil.convertRDD(rdd, kc, vc)
   }
@@ -470,7 +470,7 @@ private[spark] object PythonRDD extends Logging {
    * [[org.apache.hadoop.io.Writable]] types already, since Writables are not Java
    * `Serializable` and we can't peek at them. The `path` can be on any Hadoop file system.
    */
-  def saveAsSequenceFile[K, V, C <: CompressionCodec](
+  def saveAsSequenceFile[C <: CompressionCodec](
       pyRDD: JavaRDD[Array[Byte]],
       batchSerialized: Boolean,
       path: String,
@@ -489,7 +489,7 @@ private[spark] object PythonRDD extends Logging {
    * `confAsMap` is merged with the default Hadoop conf associated with the SparkContext of
    * this RDD.
    */
-  def saveAsHadoopFile[K, V, F <: OutputFormat[_, _], C <: CompressionCodec](
+  def saveAsHadoopFile[F <: OutputFormat[_, _], C <: CompressionCodec](
       pyRDD: JavaRDD[Array[Byte]],
       batchSerialized: Boolean,
       path: String,
@@ -507,7 +507,7 @@ private[spark] object PythonRDD extends Logging {
     val codec = Option(compressionCodecClass).map(Utils.classForName(_).asInstanceOf[Class[C]])
     val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
       new JavaToWritableConverter)
-    val fc = Utils.classForName(outputFormatClass).asInstanceOf[Class[F]]
+    val fc = Utils.classForName[F](outputFormatClass)
     converted.saveAsHadoopFile(path, kc, vc, fc, new JobConf(mergedConf), codec = codec)
   }
 
@@ -520,7 +520,7 @@ private[spark] object PythonRDD extends Logging {
    * `confAsMap` is merged with the default Hadoop conf associated with the SparkContext of
    * this RDD.
    */
-  def saveAsNewAPIHadoopFile[K, V, F <: NewOutputFormat[_, _]](
+  def saveAsNewAPIHadoopFile[F <: NewOutputFormat[_, _]](
       pyRDD: JavaRDD[Array[Byte]],
       batchSerialized: Boolean,
       path: String,
@@ -548,7 +548,7 @@ private[spark] object PythonRDD extends Logging {
    * (mapred vs. mapreduce). Keys/values are converted for output using either user specified
    * converters or, by default, [[org.apache.spark.api.python.JavaToWritableConverter]].
    */
-  def saveAsHadoopDataset[K, V](
+  def saveAsHadoopDataset(
       pyRDD: JavaRDD[Array[Byte]],
       batchSerialized: Boolean,
       confAsMap: java.util.HashMap[String, String],
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
index aaa432d58548..f2f81b11fc81 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
@@ -20,8 +20,6 @@ package org.apache.spark.api.r
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
 import java.util.concurrent.TimeUnit
 
-import scala.language.existentials
-
 import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
 import io.netty.channel.ChannelHandler.Sharable
 import io.netty.handler.timeout.ReadTimeoutException
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 7c9ce14c652c..7df36c5aeba0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -34,7 +34,6 @@ import org.apache.spark.internal.config.History
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
-import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.{ShutdownHookManager, SystemClock, Utils}
 
 /**
@@ -274,10 +273,9 @@ object HistoryServer extends Logging {
 
     val providerName = conf.get(History.PROVIDER)
       .getOrElse(classOf[FsHistoryProvider].getName())
-    val provider = Utils.classForName(providerName)
+    val provider = Utils.classForName[ApplicationHistoryProvider](providerName)
       .getConstructor(classOf[SparkConf])
       .newInstance(conf)
-      .asInstanceOf[ApplicationHistoryProvider]
 
     val port = conf.get(History.HISTORY_SERVER_UI_PORT)
 
diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
index e6e9c9e32885..854093851f5d 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -149,7 +149,7 @@ object FileCommitProtocol extends Logging {
 
     logDebug(s"Creating committer $className; job $jobId; output=$outputPath;" +
       s" dynamic=$dynamicPartitionOverwrite")
-    val clazz = Utils.classForName(className).asInstanceOf[Class[FileCommitProtocol]]
+    val clazz = Utils.classForName[FileCommitProtocol](className)
     // First try the constructor with arguments (jobId: String, outputPath: String,
     // dynamicPartitionOverwrite: Boolean).
     // If that doesn't exist, try the one with (jobId: string, outputPath: String).
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 288c0d18191c..065f05e87cf3 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -77,8 +77,9 @@ private[spark] object CompressionCodec {
     val codecClass =
       shortCompressionCodecNames.getOrElse(codecName.toLowerCase(Locale.ROOT), codecName)
     val codec = try {
-      val ctor = Utils.classForName(codecClass).getConstructor(classOf[SparkConf])
-      Some(ctor.newInstance(conf).asInstanceOf[CompressionCodec])
+      val ctor =
+        Utils.classForName[CompressionCodec](codecClass).getConstructor(classOf[SparkConf])
+      Some(ctor.newInstance(conf))
     } catch {
       case _: ClassNotFoundException | _: IllegalArgumentException => None
     }
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 8dad42b6096a..c96640a6fab3 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -179,8 +179,8 @@ private[spark] class MetricsSystem private (
     sourceConfigs.foreach { kv =>
       val classPath = kv._2.getProperty("class")
       try {
-        val source = Utils.classForName(classPath).getConstructor().newInstance()
-        registerSource(source.asInstanceOf[Source])
+        val source = Utils.classForName[Source](classPath).getConstructor().newInstance()
+        registerSource(source)
       } catch {
         case e: Exception => logError("Source class " + classPath + " cannot be instantiated", e)
       }
@@ -195,13 +195,18 @@ private[spark] class MetricsSystem private (
       val classPath = kv._2.getProperty("class")
       if (null != classPath) {
         try {
-          val sink = Utils.classForName(classPath)
-            .getConstructor(classOf[Properties], classOf[MetricRegistry], classOf[SecurityManager])
-            .newInstance(kv._2, registry, securityMgr)
           if (kv._1 == "servlet") {
-            metricsServlet = Some(sink.asInstanceOf[MetricsServlet])
+            val servlet = Utils.classForName[MetricsServlet](classPath)
+              .getConstructor(
+                classOf[Properties], classOf[MetricRegistry], classOf[SecurityManager])
+              .newInstance(kv._2, registry, securityMgr)
+            metricsServlet = Some(servlet)
           } else {
-            sinks += sink.asInstanceOf[Sink]
+            val sink = Utils.classForName[Sink](classPath)
+              .getConstructor(
+                classOf[Properties], classOf[MetricRegistry], classOf[SecurityManager])
+              .newInstance(kv._2, registry, securityMgr)
+            sinks += sink
           }
         } catch {
           case e: Exception =>
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 27f4f94ea55f..55e7109f935c 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -20,7 +20,6 @@ package org.apache.spark.network.netty
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
-import scala.language.existentials
 import scala.reflect.ClassTag
 
 import org.apache.spark.internal.Logging
@@ -66,12 +65,7 @@ class NettyBlockRpcServer(
 
       case uploadBlock: UploadBlock =>
         // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer.
-        val (level: StorageLevel, classTag: ClassTag[_]) = {
-          serializer
-            .newInstance()
-            .deserialize(ByteBuffer.wrap(uploadBlock.metadata))
-            .asInstanceOf[(StorageLevel, ClassTag[_])]
-        }
+        val (level, classTag) = deserializeMetadata(uploadBlock.metadata)
         val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
         val blockId = BlockId(uploadBlock.blockId)
         logDebug(s"Receiving replicated block $blockId with level ${level} " +
@@ -87,12 +81,7 @@ class NettyBlockRpcServer(
       responseContext: RpcResponseCallback): StreamCallbackWithID = {
     val message =
       BlockTransferMessage.Decoder.fromByteBuffer(messageHeader).asInstanceOf[UploadBlockStream]
-    val (level: StorageLevel, classTag: ClassTag[_]) = {
-      serializer
-        .newInstance()
-        .deserialize(ByteBuffer.wrap(message.metadata))
-        .asInstanceOf[(StorageLevel, ClassTag[_])]
-    }
+    val (level, classTag) = deserializeMetadata(message.metadata)
     val blockId = BlockId(message.blockId)
     logDebug(s"Receiving replicated block $blockId with level ${level} as stream " +
       s"from ${client.getSocketAddress}")
@@ -101,5 +90,12 @@ class NettyBlockRpcServer(
     blockManager.putBlockDataAsStream(blockId, level, classTag)
   }
 
+  private def deserializeMetadata[T](metadata: Array[Byte]): (StorageLevel, ClassTag[T]) = {
+    serializer
+      .newInstance()
+      .deserialize(ByteBuffer.wrap(metadata))
+      .asInstanceOf[(StorageLevel, ClassTag[T])]
+  }
+
   override def getStreamManager(): StreamManager = streamManager
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 7e76731f5e45..909f58512153 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -20,7 +20,6 @@ package org.apache.spark.rdd
 import java.io.{IOException, ObjectOutputStream}
 
 import scala.collection.mutable.ArrayBuffer
-import scala.language.existentials
 import scala.reflect.ClassTag
 
 import org.apache.spark._
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index 44bc40b5babb..836d3e231269 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -21,7 +21,6 @@ import java.io.{IOException, ObjectOutputStream}
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
-import scala.language.existentials
 import scala.reflect.ClassTag
 
 import org.apache.spark._
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index af11a319083d..b817eb69cc2f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -26,7 +26,6 @@ import scala.annotation.tailrec
 import scala.collection.Map
 import scala.collection.mutable.{ArrayStack, HashMap, HashSet}
 import scala.concurrent.duration._
-import scala.language.existentials
 import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.SerializationUtils
@@ -382,7 +381,8 @@ private[spark] class DAGScheduler(
    * locations that are still available from the previous shuffle to avoid unnecessarily
    * regenerating data.
    */
-  def createShuffleMapStage(shuffleDep: ShuffleDependency[_, _, _], jobId: Int): ShuffleMapStage = {
+  def createShuffleMapStage[K, V, C](
+      shuffleDep: ShuffleDependency[K, V, C], jobId: Int): ShuffleMapStage = {
     val rdd = shuffleDep.rdd
     checkBarrierStageWithDynamicAllocation(rdd)
     checkBarrierStageWithNumSlots(rdd)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
index 54ab8f8b3e1d..b514c2e7056f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
@@ -19,8 +19,6 @@ package org.apache.spark.scheduler
 
 import java.util.Properties
 
-import scala.language.existentials
-
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.{AccumulatorV2, CallSite}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 189e35ee8311..710f5eb211dd 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -21,13 +21,10 @@ import java.lang.management.ManagementFactory
 import java.nio.ByteBuffer
 import java.util.Properties
 
-import scala.language.existentials
-
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.shuffle.ShuffleWriter
 
 /**
  * A ShuffleMapTask divides the elements of an RDD into multiple buckets (based on a partitioner
@@ -85,13 +82,15 @@ private[spark] class ShuffleMapTask(
       threadMXBean.getCurrentThreadCpuTime
     } else 0L
     val ser = SparkEnv.get.closureSerializer.newInstance()
-    val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
+    val rddAndDep = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
       ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
     _executorDeserializeTimeNs = System.nanoTime() - deserializeStartTimeNs
     _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
       threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
     } else 0L
 
+    val rdd = rddAndDep._1
+    val dep = rddAndDep._2
     dep.shuffleWriterProcessor.write(rdd, dep, partitionId, context, partition)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
index c65c8fd6e3e1..e616d239ce8d 100644
--- a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
+++ b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
@@ -21,7 +21,6 @@ import java.net.{InetAddress, ServerSocket, Socket}
 
 import scala.concurrent.Promise
 import scala.concurrent.duration.Duration
-import scala.language.existentials
 import scala.util.Try
 
 import org.apache.spark.SparkEnv
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index c426095b4c5c..39691069bf5f 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -164,8 +164,8 @@ class KryoSerializer(conf: SparkConf)
         }
         // Allow the user to register their own classes by setting spark.kryo.registrator.
         userRegistrators
-          .map(Utils.classForName(_, noSparkClassLoader = true).getConstructor().
-            newInstance().asInstanceOf[KryoRegistrator])
+          .map(Utils.classForName[KryoRegistrator](_, noSparkClassLoader = true).
+            getConstructor().newInstance())
           .foreach { reg => reg.registerClasses(kryo) }
       } catch {
         case e: Exception =>
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index df696a31278a..6d6ef5a74420 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -21,7 +21,6 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 import java.lang.invoke.SerializedLambda
 
 import scala.collection.mutable.{Map, Set, Stack}
-import scala.language.existentials
 
 import org.apache.xbean.asm7.{ClassReader, ClassVisitor, MethodVisitor, Type}
 import org.apache.xbean.asm7.Opcodes._
@@ -309,7 +308,9 @@ private[spark] object ClosureCleaner extends Logging {
       var outerPairs: List[(Class[_], AnyRef)] = outerClasses.zip(outerObjects).reverse
       var parent: AnyRef = null
       if (outerPairs.nonEmpty) {
-        val (outermostClass, outermostObject) = outerPairs.head
+        val outermostClass = outerPairs.head._1
+        val outermostObject = outerPairs.head._2
+
         if (isClosure(outermostClass)) {
           logDebug(s" + outermost object is a closure, so we clone it: ${outermostClass}")
         } else if (outermostClass.getName.startsWith("$line")) {
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 7c8648d61bfb..bed50865e7be 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -197,14 +197,15 @@ private[spark] object Utils extends Logging {
    * Preferred alternative to Class.forName(className), as well as
    * Class.forName(className, initialize, loader) with current thread's ContextClassLoader.
    */
-  def classForName(
+  def classForName[C](
       className: String,
       initialize: Boolean = true,
-      noSparkClassLoader: Boolean = false): Class[_] = {
+      noSparkClassLoader: Boolean = false): Class[C] = {
     if (!noSparkClassLoader) {
-      Class.forName(className, initialize, getContextOrSparkClassLoader)
+      Class.forName(className, initialize, getContextOrSparkClassLoader).asInstanceOf[Class[C]]
     } else {
-      Class.forName(className, initialize, Thread.currentThread().getContextClassLoader)
+      Class.forName(className, initialize, Thread.currentThread().getContextClassLoader).
+        asInstanceOf[Class[C]]
     }
     // scalastyle:on classforname
   }
@@ -2680,10 +2681,11 @@ private[spark] object Utils extends Logging {
    * other state) and decide they do not need to be added. A log message is printed in that case.
    * Other exceptions are bubbled up.
    */
-  def loadExtensions[T](extClass: Class[T], classes: Seq[String], conf: SparkConf): Seq[T] = {
+  def loadExtensions[T <: AnyRef](
+      extClass: Class[T], classes: Seq[String], conf: SparkConf): Seq[T] = {
     classes.flatMap { name =>
       try {
-        val klass = classForName(name)
+        val klass = classForName[T](name)
         require(extClass.isAssignableFrom(klass),
           s"$name is not a subclass of ${extClass.getName()}.")
 
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index c16e227edbfa..6a30a1d32f8c 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -21,7 +21,6 @@ import java.lang.ref.WeakReference
 import java.util.concurrent.TimeUnit
 
 import scala.collection.mutable.HashSet
-import scala.language.existentials
 import scala.util.Random
 
 import org.scalatest.BeforeAndAfter
@@ -70,10 +69,11 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So
   protected def newShuffleRDD() = newPairRDD().reduceByKey(_ + _)
   protected def newBroadcast() = sc.broadcast(1 to 100)
 
-  protected def newRDDWithShuffleDependencies(): (RDD[_], Seq[ShuffleDependency[_, _, _]]) = {
-    def getAllDependencies(rdd: RDD[_]): Seq[Dependency[_]] = {
+  protected def newRDDWithShuffleDependencies():
+      (RDD[(Int, Int)], Seq[ShuffleDependency[Int, Int, Int]]) = {
+    def getAllDependencies(rdd: RDD[(Int, Int)]): Seq[Dependency[_]] = {
       rdd.dependencies ++ rdd.dependencies.flatMap { dep =>
-        getAllDependencies(dep.rdd)
+        getAllDependencies(dep.rdd.asInstanceOf[RDD[(Int, Int)]])
       }
     }
     val rdd = newShuffleRDD()
@@ -81,7 +81,7 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So
     // Get all the shuffle dependencies
     val shuffleDeps = getAllDependencies(rdd)
       .filter(_.isInstanceOf[ShuffleDependency[_, _, _]])
-      .map(_.asInstanceOf[ShuffleDependency[_, _, _]])
+      .map(_.asInstanceOf[ShuffleDependency[Int, Int, Int]])
     (rdd, shuffleDeps)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 8f212f601c79..6651e38f7ed6 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -35,7 +35,7 @@ import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, TextInp
 import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => NewTextOutputFormat}
 
 import org.apache.spark.internal.config._
-import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD, RDD}
+import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
@@ -206,8 +206,9 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
     Utils.withContextClassLoader(loader) {
       sc = new SparkContext("local", "test")
-      val objs = sc.makeRDD(1 to 3).map { x =>
-        Utils.classForName(className, noSparkClassLoader = true).getConstructor().newInstance()
+      val objs = sc.makeRDD(1 to 3).map { _ =>
+        Utils.classForName[AnyRef](className, noSparkClassLoader = true).
+          getConstructor().newInstance()
       }
       val outputDir = new File(tempDir, "output").getAbsolutePath
       objs.saveAsObjectFile(outputDir)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index 76be6930ee23..577d77e890d7 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -23,7 +23,6 @@ import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference}
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.concurrent.Future
 import scala.concurrent.duration.{Duration, SECONDS}
-import scala.language.existentials
 import scala.reflect.ClassTag
 
 import org.scalactic.TripleEquals
@@ -52,7 +51,7 @@ abstract class SchedulerIntegrationSuite[T <: MockBackend: ClassTag] extends Spa
   var taskScheduler: TestTaskScheduler = null
   var scheduler: DAGScheduler = null
   var backend: T = _
-  // Even though the tests aren't doing much, occassionally we see flakiness from pauses over
+  // Even though the tests aren't doing much, occasionally we see flakiness from pauses over
   // a second (probably from GC?) so we leave a long timeout in here
   val duration = Duration(10, SECONDS)
 
@@ -297,11 +296,11 @@ private[spark] abstract class MockBackend(
    * Test backends should call this to get a task that has been assigned to them by the scheduler.
    * Each task should be responded to with either [[taskSuccess]] or [[taskFailed]].
    */
-  def beginTask(): (TaskDescription, Task[_]) = {
+  def beginTask[T](): (TaskDescription, Task[T]) = {
     synchronized {
       val toRun = assignedTasksWaitingToRun.remove(assignedTasksWaitingToRun.size - 1)
       runningTasks += toRun._1.taskId
-      toRun
+      toRun.asInstanceOf[(TaskDescription, Task[T])]
     }
   }
 
@@ -589,7 +588,7 @@ class BasicSchedulerIntegrationSuite extends SchedulerIntegrationSuite[SingleCor
           backend.taskSuccess(taskDescription, 4321 + partition)
       }
     }
-    withBackend(runBackend _) {
+    withBackend(() => runBackend()) {
       val jobFuture = submit(d, (0 until 30).toArray)
       awaitJobTermination(jobFuture, duration)
     }
diff --git a/core/src/test/scala/org/apache/spark/util/MutableURLClassLoaderSuite.scala b/core/src/test/scala/org/apache/spark/util/MutableURLClassLoaderSuite.scala
index 8f38ee3142a6..597e0b9597c3 100644
--- a/core/src/test/scala/org/apache/spark/util/MutableURLClassLoaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/MutableURLClassLoaderSuite.scala
@@ -134,7 +134,8 @@ class MutableURLClassLoaderSuite extends SparkFunSuite with Matchers {
 
     try {
       sc.makeRDD(1 to 5, 2).mapPartitions { x =>
-        Utils.classForName(className, noSparkClassLoader = true).getConstructor().newInstance()
+        Utils.classForName[AnyRef](className, noSparkClassLoader = true).
+          getConstructor().newInstance()
         Seq().iterator
       }.count()
     }
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
index cba4b40ca7f4..da12e51852f8 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.kafka010
 
-import scala.language.existentials
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index e0825e5d37cb..3f9a5939fdcd 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -24,7 +24,7 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.security.UserGroupInformation
-import org.apache.hadoop.security.token.{Token, TokenIdentifier}
+import org.apache.hadoop.security.token.Token
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.admin.{AdminClient, CreateDelegationTokenOptions}
@@ -47,7 +47,8 @@ private[spark] object KafkaTokenUtil extends Logging {
     override def getKind: Text = TOKEN_KIND
   }
 
-  private[kafka010] def obtainToken(sparkConf: SparkConf): (Token[_ <: TokenIdentifier], Long) = {
+  private[kafka010] def obtainToken(sparkConf: SparkConf):
+      (Token[KafkaDelegationTokenIdentifier], Long) = {
     checkProxyUser()
 
     val adminClient = AdminClient.create(createAdminClientProperties(sparkConf))
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
index 4ea09ec91d3a..5ece5ae5c359 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
@@ -20,7 +20,6 @@ package org.apache.spark.graphx.util
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 
 import scala.collection.mutable.HashSet
-import scala.language.existentials
 
 import org.apache.xbean.asm7.{ClassReader, ClassVisitor, MethodVisitor}
 import org.apache.xbean.asm7.Opcodes._
@@ -48,7 +47,7 @@ private[graphx] object BytecodeUtils {
           return true
         }
       }
-      return false
+      false
     }
   }
 
@@ -59,7 +58,8 @@ private[graphx] object BytecodeUtils {
     var stack = List[(Class[_], String)]((cls, method))
 
     while (stack.nonEmpty) {
-      val (c, m) = stack.head
+      val c = stack.head._1
+      val m = stack.head._2
       stack = stack.tail
       seen.add((c, m))
       val finder = new MethodInvocationFinder(c.getName, m)
@@ -72,7 +72,7 @@ private[graphx] object BytecodeUtils {
         }
       }
     }
-    return false
+    false
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index c0f23661bcb7..7790de064e7b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1290,7 +1290,7 @@ private[ml] class MultiClassSummarizer extends Serializable {
    * @param weight The weight of this instances.
    * @return This MultilabelSummarizer
    */
-  def add(label: Double, weight: Double = 1.0): this.type = {
+  def add(label: Double, weight: Double = 1.0): MultiClassSummarizer = {
     require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0")
 
     if (weight == 0.0) return this
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index f2e6012050af..94f40c352967 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.ml.feature
 
-import scala.language.existentials
-
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 57e23d5072b8..e6e9bdfd292b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -20,7 +20,6 @@ package org.apache.spark.ml.feature
 import java.util.NoSuchElementException
 
 import scala.collection.mutable
-import scala.language.existentials
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
@@ -131,14 +130,14 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
           throw new SparkException(s"VectorAssembler does not support the $otherType type")
       }
     }
-    val featureAttributes = featureAttributesMap.flatten[Attribute].toArray
-    val lengths = featureAttributesMap.map(a => a.length).toArray
+    val featureAttributes = featureAttributesMap.flatten[Attribute]
+    val lengths = featureAttributesMap.map(a => a.length)
     val metadata = new AttributeGroup($(outputCol), featureAttributes).toMetadata()
-    val (filteredDataset, keepInvalid) = $(handleInvalid) match {
-      case VectorAssembler.SKIP_INVALID => (dataset.na.drop($(inputCols)), false)
-      case VectorAssembler.KEEP_INVALID => (dataset, true)
-      case VectorAssembler.ERROR_INVALID => (dataset, false)
+    val filteredDataset = $(handleInvalid) match {
+      case VectorAssembler.SKIP_INVALID => dataset.na.drop($(inputCols))
+      case VectorAssembler.KEEP_INVALID | VectorAssembler.ERROR_INVALID => dataset
     }
+    val keepInvalid = $(handleInvalid) == VectorAssembler.KEEP_INVALID
     // Data transformation.
     val assembleFunc = udf { r: Row =>
       VectorAssembler.assemble(lengths, keepInvalid)(r.toSeq: _*)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/HadoopUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/image/HadoopUtils.scala
index 1fae1dc04ad7..faa8589cb532 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/image/HadoopUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/image/HadoopUtils.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.ml.image
 
-import scala.language.existentials
 import scala.util.Random
 
 import org.apache.commons.io.FilenameUtils
@@ -103,7 +102,7 @@ private object SamplePathFilter {
       // scalastyle:off hadoopconfiguration
       val hadoopConf = spark.sparkContext.hadoopConfiguration
       // scalastyle:on hadoopconfiguration
-      val old = Option(hadoopConf.getClass(flagName, null))
+      val old = hadoopConf.getClass(flagName, null)
       hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio)
       hadoopConf.setLong(SamplePathFilter.seedParam, seed)
       hadoopConf.setClass(flagName, classOf[SamplePathFilter], classOf[PathFilter])
@@ -111,8 +110,8 @@ private object SamplePathFilter {
         hadoopConf.unset(SamplePathFilter.ratioParam)
         hadoopConf.unset(SamplePathFilter.seedParam)
         old match {
-          case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter])
-          case None => hadoopConf.unset(flagName)
+          case null => hadoopConf.unset(flagName)
+          case v => hadoopConf.setClass(flagName, v, classOf[PathFilter])
         }
       }
     } else {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 50ef4330ddc8..4fd3ed59ca20 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -847,7 +847,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
     }
 
     /** Adds an observation. */
-    def add(a: Array[Float], b: Double, c: Double = 1.0): this.type = {
+    def add(a: Array[Float], b: Double, c: Double = 1.0): NormalEquation = {
       require(c >= 0.0)
       require(a.length == k)
       copyToDouble(a)
@@ -859,7 +859,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
     }
 
     /** Merges another normal equation object. */
-    def merge(other: NormalEquation): this.type = {
+    def merge(other: NormalEquation): NormalEquation = {
       require(other.k == k)
       blas.daxpy(ata.length, 1.0, other.ata, 1, ata, 1)
       blas.daxpy(atb.length, 1.0, other.atb, 1, atb, 1)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 438a616fd57c..3e1bbbacffd1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -23,7 +23,6 @@ import java.nio.charset.StandardCharsets
 import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
 
 import scala.collection.JavaConverters._
-import scala.language.existentials
 import scala.reflect.ClassTag
 
 import net.razorvine.pickle._
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
index b0fa287473c3..6ae8e204bd7c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
@@ -33,7 +33,7 @@ private[fpm] class FPTree[T] extends Serializable {
   private val summaries: mutable.Map[T, Summary[T]] = mutable.Map.empty
 
   /** Adds a transaction with count. */
-  def add(t: Iterable[T], count: Long = 1L): this.type = {
+  def add(t: Iterable[T], count: Long = 1L): FPTree[T] = {
     require(count > 0)
     var curr = root
     curr.count += count
@@ -53,7 +53,7 @@ private[fpm] class FPTree[T] extends Serializable {
   }
 
   /** Merges another FP-Tree. */
-  def merge(other: FPTree[T]): this.type = {
+  def merge(other: FPTree[T]): FPTree[T] = {
     other.transactions.foreach { case (t, c) =>
       add(t, c)
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 334f92bdbd9b..6dea4b1903da 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.ml.classification
 
 import scala.collection.JavaConverters._
-import scala.language.existentials
 import scala.util.Random
 import scala.util.control.Breaks._
 
@@ -31,7 +30,7 @@ import org.apache.spark.ml.optim.aggregator.LogisticAggregator
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
-import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions.{col, lit, rand}
 import org.apache.spark.sql.types.LongType
 
@@ -40,11 +39,11 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   import testImplicits._
 
   private val seed = 42
-  @transient var smallBinaryDataset: Dataset[_] = _
-  @transient var smallMultinomialDataset: Dataset[_] = _
-  @transient var binaryDataset: Dataset[_] = _
-  @transient var multinomialDataset: Dataset[_] = _
-  @transient var multinomialDatasetWithZeroVar: Dataset[_] = _
+  @transient var smallBinaryDataset: DataFrame = _
+  @transient var smallMultinomialDataset: DataFrame = _
+  @transient var binaryDataset: DataFrame = _
+  @transient var multinomialDataset: DataFrame = _
+  @transient var multinomialDatasetWithZeroVar: DataFrame = _
   private val eps: Double = 1e-5
 
   override def beforeAll(): Unit = {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index 57080972559c..12122c000ff6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -17,15 +17,13 @@
 
 package org.apache.spark.ml.clustering
 
-import scala.language.existentials
-
 import org.apache.spark.SparkException
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.clustering.DistanceMeasure
-import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.DataFrame
 
 
 class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
@@ -33,9 +31,8 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
   import testImplicits._
 
   final val k = 5
-  @transient var dataset: Dataset[_] = _
-
-  @transient var sparseDataset: Dataset[_] = _
+  @transient var dataset: DataFrame = _
+  @transient var sparseDataset: DataFrame = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
@@ -191,7 +188,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("BisectingKMeans with Array input") {
-    def trainAndComputeCost(dataset: Dataset[_]): Double = {
+    def trainAndComputeCost(dataset: DataFrame): Double = {
       val model = new BisectingKMeans().setK(k).setMaxIter(1).setSeed(1).fit(dataset)
       model.computeCost(dataset)
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
index 11fdd3a60216..133536f763f4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
@@ -17,15 +17,13 @@
 
 package org.apache.spark.ml.clustering
 
-import scala.language.existentials
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.stat.distribution.MultivariateGaussian
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
-import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.{DataFrame, Dataset, Row}
 
 
 class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
@@ -35,11 +33,11 @@ class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
 
   final val k = 5
   private val seed = 538009335
-  @transient var dataset: Dataset[_] = _
-  @transient var denseDataset: Dataset[_] = _
-  @transient var sparseDataset: Dataset[_] = _
-  @transient var decompositionDataset: Dataset[_] = _
-  @transient var rDataset: Dataset[_] = _
+  @transient var dataset: DataFrame = _
+  @transient var denseDataset: DataFrame = _
+  @transient var sparseDataset: DataFrame = _
+  @transient var decompositionDataset: DataFrame = _
+  @transient var rDataset: DataFrame = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index 5d439a2fe29b..e3c82fafca21 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.ml.clustering
 
-import scala.language.existentials
 import scala.util.Random
 
 import org.dmg.pmml.PMML
@@ -40,7 +39,7 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
   import testImplicits._
 
   final val k = 5
-  @transient var dataset: Dataset[_] = _
+  @transient var dataset: DataFrame = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
index bbd5408c9fce..d0898220b80d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.ml.clustering
 
-import scala.language.existentials
-
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.ml.linalg.{Vector, Vectors}
@@ -64,7 +62,7 @@ class LDASuite extends MLTest with DefaultReadWriteTest {
 
   val k: Int = 5
   val vocabSize: Int = 30
-  @transient var dataset: Dataset[_] = _
+  @transient var dataset: DataFrame = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
@@ -329,8 +327,7 @@ class LDASuite extends MLTest with DefaultReadWriteTest {
       (model.logLikelihood(dataset), model.logPerplexity(dataset))
     }
 
-    val (newDataset, newDatasetD, newDatasetF) = MLTestingUtils.generateArrayFeatureDataset(dataset)
-    val (ll, lp) = trainAndLogLikelihoodAndPerplexity(newDataset)
+    val (_, newDatasetD, newDatasetF) = MLTestingUtils.generateArrayFeatureDataset(dataset)
     val (llD, lpD) = trainAndLogLikelihoodAndPerplexity(newDatasetD)
     val (llF, lpF) = trainAndLogLikelihoodAndPerplexity(newDatasetF)
     // TODO: need to compare the results once we fix the seed issue for LDA (SPARK-22210)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
index e2d77560293f..1d65faa50357 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.DataFrame
 
 
 class ClusteringEvaluatorSuite
@@ -32,10 +32,10 @@ class ClusteringEvaluatorSuite
 
   import testImplicits._
 
-  @transient var irisDataset: Dataset[_] = _
-  @transient var newIrisDataset: Dataset[_] = _
-  @transient var newIrisDatasetD: Dataset[_] = _
-  @transient var newIrisDatasetF: Dataset[_] = _
+  @transient var irisDataset: DataFrame = _
+  @transient var newIrisDataset: DataFrame = _
+  @transient var newIrisDatasetD: DataFrame = _
+  @transient var newIrisDatasetF: DataFrame = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 2fc9754ecfe1..6d0321c4e240 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -23,7 +23,6 @@ import java.util.Random
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, WrappedArray}
-import scala.language.existentials
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 import org.apache.commons.io.FileUtils
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
index 2c73700ee962..ad4ac7259165 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
@@ -24,7 +24,6 @@ import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.HasWeightCol
-import org.apache.spark.ml.recommendation.{ALS, ALSModel}
 import org.apache.spark.ml.tree.impl.TreeTests
 import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.functions._
@@ -255,8 +254,8 @@ object MLTestingUtils extends SparkFunSuite {
    * one having double array "features" column with float precision, and one having float array
    * "features" column.
    */
-  def generateArrayFeatureDataset(dataset: Dataset[_],
-    featuresColName: String = "features"): (Dataset[_], Dataset[_], Dataset[_]) = {
+  def generateArrayFeatureDataset(dataset: DataFrame,
+    featuresColName: String = "features"): (DataFrame, DataFrame, DataFrame) = {
     val toFloatVectorUDF = udf { (features: Vector) =>
       Vectors.dense(features.toArray.map(_.toFloat.toDouble))}
     val toDoubleArrayUDF = udf { (features: Vector) => features.toArray}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala
index dc44c58e97eb..20bd2e5e0dc1 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.mllib.fpm
 
-import scala.language.existentials
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.util.Utils
@@ -301,7 +299,7 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
     val path = tempDir.toURI.toString
     try {
       model3.save(sc, path)
-      val newModel = FPGrowthModel.load(sc, path)
+      val newModel = FPGrowthModel.load(sc, path).asInstanceOf[FPGrowthModel[String]]
       val newFreqItemsets = newModel.freqItemsets.collect().map { itemset =>
         (itemset.items.toSet, itemset.freq)
       }
@@ -335,7 +333,7 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
     val path = tempDir.toURI.toString
     try {
       model3.save(sc, path)
-      val newModel = FPGrowthModel.load(sc, path)
+      val newModel = FPGrowthModel.load(sc, path).asInstanceOf[FPGrowthModel[String]]
       val newFreqItemsets = newModel.freqItemsets.collect().map { itemset =>
         (itemset.items.toSet, itemset.freq)
       }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala
index a56d7b357921..4e2548d6915a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.mllib.fpm
 
-import scala.language.existentials
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
index c2e08d078fc1..a700706cdc87 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.mllib.fpm
 
-import scala.language.existentials
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.util.Utils
@@ -420,7 +418,9 @@ class PrefixSpanSuite extends SparkFunSuite with MLlibTestSparkContext {
     val path = tempDir.toURI.toString
     try {
       model.save(sc, path)
-      val newModel = PrefixSpanModel.load(sc, path)
+      // Save/loading this model results in item type "Object" even if in this case
+      // the objects are Integers -- not Int as in the original saved model.
+      val newModel = PrefixSpanModel.load(sc, path).asInstanceOf[PrefixSpanModel[AnyRef]]
       val originalSet = model.freqSequences.collect().map { x =>
         (x.sequence.map(_.toSet).toSeq, x.freq)
       }.toSet
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index deb53cf6a4ba..d725ef5da06e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -21,7 +21,6 @@ import java.util.Locale
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
-import scala.language.existentials
 import scala.reflect.ClassTag
 import scala.util.{Failure, Success, Try}
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index de3223efce54..95fad412002e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -23,7 +23,6 @@ import java.util.{Map => JavaMap}
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
-import scala.language.existentials
 import scala.util.control.NonFatal
 
 import com.google.common.cache.{CacheBuilder, CacheLoader}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
index 2b73b96e9f25..80999ef3e876 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 import java.lang.{Boolean => JBool}
 
 import scala.collection.mutable.ArrayBuffer
-import scala.language.{existentials, implicitConversions}
+import scala.language.implicitConversions
 
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.types.{BooleanType, DataType}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 8182730feb4b..a6a48b6be03f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -21,7 +21,6 @@ import java.lang.reflect.{Method, Modifier}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.Builder
-import scala.language.existentials
 import scala.reflect.ClassTag
 import scala.util.Try
 
@@ -30,7 +29,6 @@ import org.apache.spark.serializer._
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection}
 import org.apache.spark.sql.catalyst.ScalaReflection.universe.TermName
-import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException}
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index 1d44cc31cf29..74a9f131d155 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import scala.language.existentials
-
 import org.apache.spark.api.java.function.FilterFunction
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.{Encoder, Row}
@@ -29,7 +27,6 @@ import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 object CatalystSerde {
   def deserialize[T : Encoder](child: LogicalPlan): DeserializeToObject = {
@@ -245,12 +242,12 @@ case class TypedFilter(
   }
 
   def typedCondition(input: Expression): Expression = {
-    val (funcClass, methodName) = func match {
-      case m: FilterFunction[_] => classOf[FilterFunction[_]] -> "call"
+    val funcMethod = func match {
+      case _: FilterFunction[_] => (classOf[FilterFunction[_]], "call")
       case _ => FunctionUtils.getFunctionOneName(BooleanType, input.dataType)
     }
-    val funcObj = Literal.create(func, ObjectType(funcClass))
-    Invoke(funcObj, methodName, BooleanType, input :: Nil)
+    val funcObj = Literal.create(func, ObjectType(funcMethod._1))
+    Invoke(funcObj, funcMethod._2, BooleanType, input :: Nil)
   }
 }
 
@@ -266,9 +263,9 @@ object FunctionUtils {
     }
   }
 
-  def getFunctionOneName(outputDT: DataType, inputDT: DataType): (Class[_], String) = {
-    // load "scala.Function1" using Java API to avoid requirements of type parameters
-    Utils.classForName("scala.Function1") -> {
+  def getFunctionOneName(outputDT: DataType, inputDT: DataType):
+      (Class[scala.Function1[_, _]], String) = {
+    classOf[scala.Function1[_, _]] -> {
       // if a pair of an argument and return types is one of specific types
       // whose specialized method (apply$mc..$sp) is generated by scalac,
       // Catalyst generated a direct method call to the specialized method.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
index 0989af26b8c1..5df2af93b94e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
@@ -48,11 +48,10 @@ object ArrayBasedMapData {
    * @param valueConverter This function is applied over all the values of the input map to
    *                       obtain the output map's values
    */
-  def apply(
-      javaMap: JavaMap[_, _],
+  def apply[K, V](
+      javaMap: JavaMap[K, V],
       keyConverter: (Any) => Any,
       valueConverter: (Any) => Any): ArrayBasedMapData = {
-    import scala.language.existentials
 
     val keys: Array[Any] = new Array[Any](javaMap.size())
     val values: Array[Any] = new Array[Any](javaMap.size())
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index c58f7a239737..d26a73a0d359 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -180,7 +180,7 @@ object DataType {
     ("pyClass", _),
     ("sqlType", _),
     ("type", JString("udt"))) =>
-      Utils.classForName(udtClass).getConstructor().newInstance().asInstanceOf[UserDefinedType[_]]
+      Utils.classForName[UserDefinedType[_]](udtClass).getConstructor().newInstance()
 
     // Python UDT
     case JSortedObject(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
index 6756b209f432..e79c0a4b62c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.types
 
-import scala.language.existentials
-
 import org.apache.spark.annotation.Evolving
 
 @Evolving
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index 436675bf5035..8520300ca59c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.expressions
 import java.sql.{Date, Timestamp}
 
 import scala.collection.JavaConverters._
-import scala.language.existentials
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.Random
@@ -316,7 +315,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-23587: MapObjects should support interpreted execution") {
-    def testMapObjects(collection: Any, collectionCls: Class[_], inputType: DataType): Unit = {
+    def testMapObjects[T](collection: Any, collectionCls: Class[T], inputType: DataType): Unit = {
       val function = (lambda: Expression) => Add(lambda, Literal(1))
       val elementType = IntegerType
       val expected = Seq(2, 3, 4)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 83425de70060..f0ef6e19b0aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -635,7 +635,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   private[sql] def registerJava(name: String, className: String, returnDataType: DataType): Unit = {
 
     try {
-      val clazz = Utils.classForName(className)
+      val clazz = Utils.classForName[AnyRef](className)
       val udfInterfaces = clazz.getGenericInterfaces
         .filter(_.isInstanceOf[ParameterizedType])
         .map(_.asInstanceOf[ParameterizedType])
@@ -699,7 +699,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   private[sql] def registerJavaUDAF(name: String, className: String): Unit = {
     try {
-      val clazz = Utils.classForName(className)
+      val clazz = Utils.classForName[AnyRef](className)
       if (!classOf[UserDefinedAggregateFunction].isAssignableFrom(clazz)) {
         throw new AnalysisException(s"class $className doesn't implement interface UserDefinedAggregateFunction")
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala
index b75752945a49..a2def6b510eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.aggregate
 
-import scala.language.existentials
-
 import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.UnresolvedDeserializer
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 08650c8c6fa8..52f30acd4822 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources
 import java.util.{Locale, ServiceConfigurationError, ServiceLoader}
 
 import scala.collection.JavaConverters._
-import scala.language.{existentials, implicitConversions}
+import scala.language.implicitConversions
 import scala.util.{Failure, Success, Try}
 
 import org.apache.hadoop.conf.Configuration
@@ -31,7 +31,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogUtils}
-import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.SparkPlan
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 81cc95847a79..38c634e1107b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -21,7 +21,6 @@ import java.util.Locale
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ListBuffer
-import scala.language.existentials
 
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -50,8 +49,9 @@ class JoinSuite extends QueryTest with SharedSQLContext {
     assert(planned.size === 1)
   }
 
-  def assertJoin(pair: (String, Class[_])): Any = {
-    val (sqlString, c) = pair
+  def assertJoin(pair: (String, Class[_ <: BinaryExecNode])): Any = {
+    val sqlString = pair._1
+    val c = pair._2
     val df = sql(sqlString)
     val physical = df.queryExecution.sparkPlan
     val operators = physical.collect {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
index 09793bdb5d16..edbc2493ac26 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.io.File
 
-import scala.language.existentials
-
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
@@ -186,12 +184,11 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS
                 assert(typeName === PrimitiveTypeName.INT96)
                 val oneBlockMeta = oneFooter.getBlocks().get(0)
                 val oneBlockColumnMeta = oneBlockMeta.getColumns().get(0)
-                val columnStats = oneBlockColumnMeta.getStatistics
                 // This is the important assert.  Column stats are written, but they are ignored
                 // when the data is read back as mentioned above, b/c int96 is unsigned.  This
                 // assert makes sure this holds even if we change parquet versions (if eg. there
                 // were ever statistics even on unsigned columns).
-                assert(!columnStats.hasNonNullValue)
+                assert(!oneBlockColumnMeta.getStatistics.hasNonNullValue)
               }
 
               // These queries should return the entire dataset with the conversion applied,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
index a538b9458177..daac207caf47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.sources
 
 import java.util.Locale
 
-import scala.language.existentials
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
index c1eaf948a4b9..309591dd90f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.sources
 
-import scala.language.existentials
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index ad117197d713..3f9925e73705 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -94,7 +94,7 @@ class HadoopTableReader(
   override def makeRDDForTable(hiveTable: HiveTable): RDD[InternalRow] =
     makeRDDForTable(
       hiveTable,
-      Utils.classForName(tableDesc.getSerdeClassName).asInstanceOf[Class[Deserializer]],
+      Utils.classForName[Deserializer](tableDesc.getSerdeClassName),
       filterOpt = None)
 
   /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 640cca0b24c7..92a1120673bf 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -922,11 +922,10 @@ private[hive] object HiveClientImpl {
   }
 
   private def toInputFormat(name: String) =
-    Utils.classForName(name).asInstanceOf[Class[_ <: org.apache.hadoop.mapred.InputFormat[_, _]]]
+    Utils.classForName[org.apache.hadoop.mapred.InputFormat[_, _]](name)
 
   private def toOutputFormat(name: String) =
-    Utils.classForName(name)
-      .asInstanceOf[Class[_ <: org.apache.hadoop.hive.ql.io.HiveOutputFormat[_, _]]]
+    Utils.classForName[org.apache.hadoop.hive.ql.io.HiveOutputFormat[_, _]](name)
 
   /**
    * Converts the native table metadata representation format CatalogTable to Hive's Table.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
index 1825af6191ce..24a67f989231 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive.execution
 
-import scala.language.existentials
-
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.hive.common.FileUtils
 import org.apache.hadoop.hive.ql.plan.TableDesc
@@ -30,7 +28,6 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
-import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.hive.client.HiveClientImpl
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
index 905cb5272d01..e12f663304e7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
@@ -407,8 +407,8 @@ case class HiveScriptIOSchema (
       columnTypes: Seq[DataType],
       serdeProps: Seq[(String, String)]): AbstractSerDe = {
 
-    val serde = Utils.classForName(serdeClassName).getConstructor().
-      newInstance().asInstanceOf[AbstractSerDe]
+    val serde = Utils.classForName[AbstractSerDe](serdeClassName).getConstructor().
+      newInstance()
 
     val columnTypesNames = columnTypes.map(_.toTypeInfo.getTypeName()).mkString(",")
 
@@ -428,8 +428,8 @@ case class HiveScriptIOSchema (
       inputStream: InputStream,
       conf: Configuration): Option[RecordReader] = {
     recordReaderClass.map { klass =>
-      val instance = Utils.classForName(klass).getConstructor().
-        newInstance().asInstanceOf[RecordReader]
+      val instance = Utils.classForName[RecordReader](klass).getConstructor().
+        newInstance()
       val props = new Properties()
       // Can not use props.putAll(outputSerdeProps.toMap.asJava) in scala-2.12
       // See https://github.com/scala/bug/issues/10418
@@ -441,8 +441,8 @@ case class HiveScriptIOSchema (
 
   def recordWriter(outputStream: OutputStream, conf: Configuration): Option[RecordWriter] = {
     recordWriterClass.map { klass =>
-      val instance = Utils.classForName(klass).getConstructor().
-        newInstance().asInstanceOf[RecordWriter]
+      val instance = Utils.classForName[RecordWriter](klass).getConstructor().
+        newInstance()
       instance.initialize(outputStream, conf)
       instance
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 5e97a05c0e97..a907fcae526c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.hive.execution
 import java.io.File
 import java.net.URI
 
-import scala.language.existentials
-
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
 import org.apache.parquet.hadoop.ParquetFileReader
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
index eaedac1fa95d..b20ef035594d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.sql.hive.execution
 
-import scala.language.existentials
-
 import org.apache.hadoop.conf.Configuration
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.StaticSQLConf._
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala
index fe0f87552566..88bfc88714fa 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala
@@ -46,7 +46,7 @@ class WindowedDStream[T: ClassTag](
 
   def windowDuration: Duration = _windowDuration
 
-  override def dependencies: List[DStream[_]] = List(parent)
+  override def dependencies: List[DStream[T]] = List(parent)
 
   override def slideDuration: Duration = _slideDuration
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
index 8c3a7977beae..a0b49e091eb0 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
@@ -20,7 +20,6 @@ package org.apache.spark.streaming.receiver
 import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
-import scala.language.existentials
 
 /** Trait representing a received block */
 private[streaming] sealed trait ReceivedBlock
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index f8ddabd62854..eb70232a7452 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -19,7 +19,6 @@ package org.apache.spark.streaming.receiver
 
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration._
-import scala.language.existentials
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index 4f3fa059ea30..709159879d7a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -51,11 +51,11 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     val clockClass = ssc.sc.conf.get(
       "spark.streaming.clock", "org.apache.spark.util.SystemClock")
     try {
-      Utils.classForName(clockClass).getConstructor().newInstance().asInstanceOf[Clock]
+      Utils.classForName[Clock](clockClass).getConstructor().newInstance()
     } catch {
       case e: ClassNotFoundException if clockClass.startsWith("org.apache.spark.streaming") =>
         val newClockClass = clockClass.replace("org.apache.spark.streaming", "org.apache.spark")
-        Utils.classForName(newClockClass).getConstructor().newInstance().asInstanceOf[Clock]
+        Utils.classForName[Clock](newClockClass).getConstructor().newInstance()
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index 829254b2b065..551d376fbc1e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -21,7 +21,6 @@ import java.util.concurrent.{CountDownLatch, TimeUnit}
 
 import scala.collection.mutable.HashMap
 import scala.concurrent.ExecutionContext
-import scala.language.existentials
 import scala.util.{Failure, Success}
 
 import org.apache.spark._
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogUtils.scala
index 7542e2f5ecf2..b0a4c98fc9e5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogUtils.scala
@@ -132,8 +132,7 @@ private[streaming] object WriteAheadLogUtils extends Logging {
     }
     val wal = classNameOption.map { className =>
       try {
-        instantiateClass(
-          Utils.classForName(className).asInstanceOf[Class[_ <: WriteAheadLog]], sparkConf)
+        instantiateClass(Utils.classForName[WriteAheadLog](className), sparkConf)
       } catch {
         case NonFatal(e) =>
           throw new SparkException(s"Could not create a write ahead log of class $className", e)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 0a764f61c0cd..287a43ac689e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.streaming
 import java.util.concurrent.ConcurrentLinkedQueue
 
 import scala.collection.mutable
-import scala.language.existentials
 import scala.reflect.ClassTag
 
 import org.scalatest.concurrent.Eventually.eventually
@@ -656,12 +655,12 @@ class BasicOperationsSuite extends TestSuiteBase {
 
     runCleanupTest(
         conf,
-        operation _,
+        operation,
         numExpectedOutput = cleanupTestInput.size / 2,
         rememberDuration = Seconds(3)) { operatedStream =>
       eventually(eventuallyTimeout) {
-        val windowedStream2 = operatedStream.asInstanceOf[WindowedDStream[_]]
-        val windowedStream1 = windowedStream2.dependencies.head.asInstanceOf[WindowedDStream[_]]
+        val windowedStream2 = operatedStream.asInstanceOf[WindowedDStream[Int]]
+        val windowedStream1 = windowedStream2.dependencies.head.asInstanceOf[WindowedDStream[Int]]
         val mappedStream = windowedStream1.dependencies.head
 
         // Checkpoint remember durations

From a6716d3f033825aa8a7a153674f29a2e2b673273 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 29 Apr 2019 11:16:45 -0500
Subject: [PATCH 0981/1072] [SPARK-27571][CORE][YARN][EXAMPLES] Avoid
 scala.language.reflectiveCalls

## What changes were proposed in this pull request?

This PR avoids usage of reflective calls in Scala. It removes the import that suppresses the warnings and rewrites code in small ways to avoid accessing methods that aren't technically accessible.

## How was this patch tested?

Existing tests.

Closes #24463 from srowen/SPARK-27571.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/scheduler/DAGSchedulerSuite.scala   | 83 +++++++++----------
 .../spark/storage/MemoryStoreSuite.scala      | 13 ++-
 .../spark/util/ClosureCleanerSuite.scala      |  2 -
 .../examples/mllib/DecisionTreeRunner.scala   | 20 ++---
 .../mllib/GradientBoostedTreesRunner.scala    | 14 +++-
 .../mesos/MesosSchedulerUtilsSuite.scala      | 26 +++---
 .../cluster/YarnSchedulerBackendSuite.scala   | 31 ++++---
 .../datasources/FileIndexSuite.scala          | 30 +++----
 .../StreamingQueryListenerSuite.scala         |  5 +-
 .../StreamingQueryListenersConfSuite.scala    |  2 -
 10 files changed, 109 insertions(+), 117 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index c8ae834e01e1..749e47c2727e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -23,7 +23,6 @@ import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
 
 import scala.annotation.meta.param
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
-import scala.language.reflectiveCalls
 import scala.util.control.NonFatal
 
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
@@ -175,13 +174,13 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
   /** Length of time to wait while draining listener events. */
   val WAIT_TIMEOUT_MILLIS = 10000
-  val sparkListener = new SparkListener() {
-    val submittedStageInfos = new HashSet[StageInfo]
-    val successfulStages = new HashSet[Int]
-    val failedStages = new ArrayBuffer[Int]
-    val stageByOrderOfExecution = new ArrayBuffer[Int]
-    val endedTasks = new HashSet[Long]
 
+  val submittedStageInfos = new HashSet[StageInfo]
+  val successfulStages = new HashSet[Int]
+  val failedStages = new ArrayBuffer[Int]
+  val stageByOrderOfExecution = new ArrayBuffer[Int]
+  val endedTasks = new HashSet[Long]
+  val sparkListener = new SparkListener() {
     override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) {
       submittedStageInfos += stageSubmitted.stageInfo
     }
@@ -249,10 +248,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
   private def init(testConf: SparkConf): Unit = {
     sc = new SparkContext("local[2]", "DAGSchedulerSuite", testConf)
-    sparkListener.submittedStageInfos.clear()
-    sparkListener.successfulStages.clear()
-    sparkListener.failedStages.clear()
-    sparkListener.endedTasks.clear()
+    submittedStageInfos.clear()
+    successfulStages.clear()
+    failedStages.clear()
+    endedTasks.clear()
     failure = null
     sc.addSparkListener(sparkListener)
     taskSets.clear()
@@ -375,11 +374,11 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   test("[SPARK-3353] parent stage should have lower stage id") {
-    sparkListener.stageByOrderOfExecution.clear()
+    stageByOrderOfExecution.clear()
     sc.parallelize(1 to 10).map(x => (x, x)).reduceByKey(_ + _, 4).count()
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.stageByOrderOfExecution.length === 2)
-    assert(sparkListener.stageByOrderOfExecution(0) < sparkListener.stageByOrderOfExecution(1))
+    assert(stageByOrderOfExecution.length === 2)
+    assert(stageByOrderOfExecution(0) < stageByOrderOfExecution(1))
   }
 
   /**
@@ -621,8 +620,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(failure.getMessage.startsWith(
       "Job aborted due to stage failure: Task not serializable:"))
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.failedStages.contains(0))
-    assert(sparkListener.failedStages.size === 1)
+    assert(failedStages.contains(0))
+    assert(failedStages.size === 1)
     assertDataStructuresEmpty()
   }
 
@@ -631,8 +630,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     failed(taskSets(0), "some failure")
     assert(failure.getMessage === "Job aborted due to stage failure: some failure")
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.failedStages.contains(0))
-    assert(sparkListener.failedStages.size === 1)
+    assert(failedStages.contains(0))
+    assert(failedStages.size === 1)
     assertDataStructuresEmpty()
   }
 
@@ -642,8 +641,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     cancel(jobId)
     assert(failure.getMessage === s"Job $jobId cancelled ")
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.failedStages.contains(0))
-    assert(sparkListener.failedStages.size === 1)
+    assert(failedStages.contains(0))
+    assert(failedStages.size === 1)
     assertDataStructuresEmpty()
   }
 
@@ -703,8 +702,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assertDataStructuresEmpty()
 
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.failedStages.isEmpty)
-    assert(sparkListener.successfulStages.contains(0))
+    assert(failedStages.isEmpty)
+    assert(successfulStages.contains(0))
   }
 
   test("run trivial shuffle") {
@@ -1088,7 +1087,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
       null))
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.failedStages.contains(1))
+    assert(failedStages.contains(1))
 
     // The second ResultTask fails, with a fetch failure for the output from the second mapper.
     runEvent(makeCompletionEvent(
@@ -1097,7 +1096,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       null))
     // The SparkListener should not receive redundant failure events.
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.failedStages.size == 1)
+    assert(failedStages.size == 1)
   }
 
   test("Retry all the tasks on a resubmitted attempt of a barrier stage caused by FetchFailure") {
@@ -1144,7 +1143,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(1),
       TaskKilled("test"),
       null))
-    assert(sparkListener.failedStages === Seq(0))
+    assert(failedStages === Seq(0))
     assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq(0, 1)))
 
     scheduler.resubmitFailedStages()
@@ -1198,7 +1197,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
+      submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
@@ -1220,7 +1219,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
       null))
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.failedStages.contains(1))
+    assert(failedStages.contains(1))
 
     // Trigger resubmission of the failed map stage.
     runEvent(ResubmitFailedStages)
@@ -1259,10 +1258,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(reduceRdd, Array(0, 1))
 
     def countSubmittedReduceStageAttempts(): Int = {
-      sparkListener.submittedStageInfos.count(_.stageId == 1)
+      submittedStageInfos.count(_.stageId == 1)
     }
     def countSubmittedMapStageAttempts(): Int = {
-      sparkListener.submittedStageInfos.count(_.stageId == 0)
+      submittedStageInfos.count(_.stageId == 0)
     }
 
     // The map stage should have been submitted.
@@ -1323,7 +1322,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     // verify stage exists
     assert(scheduler.stageIdToStage.contains(0))
-    assert(sparkListener.endedTasks.size == 2)
+    assert(endedTasks.size == 2)
 
     // finish other 2 tasks
     runEvent(makeCompletionEvent(
@@ -1333,7 +1332,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(3), Success, 42,
       Seq.empty, createFakeTaskInfoWithId(3)))
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.endedTasks.size == 4)
+    assert(endedTasks.size == 4)
 
     // verify the stage is done
     assert(!scheduler.stageIdToStage.contains(0))
@@ -1344,14 +1343,14 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(3), Success, 42,
       Seq.empty, createFakeTaskInfoWithId(5)))
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.endedTasks.size == 5)
+    assert(endedTasks.size == 5)
 
     // make sure non successful tasks also send out event
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(3), UnknownReason, 42,
       Seq.empty, createFakeTaskInfoWithId(6)))
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.endedTasks.size == 6)
+    assert(endedTasks.size == 6)
   }
 
   test("ignore late map task completions") {
@@ -1425,7 +1424,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Listener bus should get told about the map stage failing, but not the reduce stage
     // (since the reduce stage hasn't been started yet).
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.failedStages.toSet === Set(0))
+    assert(failedStages.toSet === Set(0))
 
     assertDataStructuresEmpty()
   }
@@ -1669,8 +1668,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // Make sure the listeners got told about both failed stages.
     sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(sparkListener.successfulStages.isEmpty)
-    assert(sparkListener.failedStages.toSet === Set(0, 2))
+    assert(successfulStages.isEmpty)
+    assert(failedStages.toSet === Set(0, 2))
 
     assert(listener1.failureMessage === s"Job aborted due to stage failure: $stageFailureMessage")
     assert(listener2.failureMessage === s"Job aborted due to stage failure: $stageFailureMessage")
@@ -2455,7 +2454,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
    *
    * A <------------s---------,
    *                           \
-   * B <--s-- C <--s-- D <--n---`-- E
+   * B <--s-- C <--s-- D <--n------ E
    *
    * Here, the direct shuffle dependency of C is just the shuffle dependency on B. The direct
    * shuffle dependencies of E are the shuffle dependency on A and the shuffle dependency on C.
@@ -2639,13 +2638,12 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   test("Barrier task failures from the same stage attempt don't trigger multiple stage retries") {
     val shuffleMapRdd = new MyRDD(sc, 2, Nil).barrier().mapPartitions(iter => iter)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2))
-    val shuffleId = shuffleDep.shuffleId
     val reduceRdd = new MyRDD(sc, 2, List(shuffleDep), tracker = mapOutputTracker)
     submit(reduceRdd, Array(0, 1))
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
+      submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
@@ -2657,7 +2655,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(0),
       TaskKilled("test"),
       null))
-    assert(sparkListener.failedStages === Seq(0))
+    assert(failedStages === Seq(0))
 
     // The second map task fails with TaskKilled.
     runEvent(makeCompletionEvent(
@@ -2676,13 +2674,12 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   test("Barrier task failures from a previous stage attempt don't trigger stage retry") {
     val shuffleMapRdd = new MyRDD(sc, 2, Nil).barrier().mapPartitions(iter => iter)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2))
-    val shuffleId = shuffleDep.shuffleId
     val reduceRdd = new MyRDD(sc, 2, List(shuffleDep), tracker = mapOutputTracker)
     submit(reduceRdd, Array(0, 1))
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
+      submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
@@ -2694,7 +2691,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(0),
       TaskKilled("test"),
       null))
-    assert(sparkListener.failedStages === Seq(0))
+    assert(failedStages === Seq(0))
 
     // Trigger resubmission of the failed map stage.
     runEvent(ResubmitFailedStages)
diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
index 3052ed9f176b..958d57d9a6bc 100644
--- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.storage
 import java.nio.ByteBuffer
 
 import scala.language.implicitConversions
-import scala.language.reflectiveCalls
 import scala.reflect.ClassTag
 
 import org.scalatest._
@@ -62,8 +61,8 @@ class MemoryStoreSuite
   def makeMemoryStore(maxMem: Long): (MemoryStore, BlockInfoManager) = {
     val memManager = new UnifiedMemoryManager(conf, maxMem, maxMem / 2, 1)
     val blockInfoManager = new BlockInfoManager
+    var memoryStore: MemoryStore = null
     val blockEvictionHandler = new BlockEvictionHandler {
-      var memoryStore: MemoryStore = _
       override private[storage] def dropFromMemory[T: ClassTag](
           blockId: BlockId,
           data: () => Either[Array[T], ChunkedByteBuffer]): StorageLevel = {
@@ -71,10 +70,9 @@ class MemoryStoreSuite
         StorageLevel.NONE
       }
     }
-    val memoryStore =
+    memoryStore =
       new MemoryStore(conf, blockInfoManager, serializerManager, memManager, blockEvictionHandler)
     memManager.setMemoryStore(memoryStore)
-    blockEvictionHandler.memoryStore = memoryStore
     (memoryStore, blockInfoManager)
   }
 
@@ -421,8 +419,8 @@ class MemoryStoreSuite
       val blockInfoManager = new BlockInfoManager
       blockInfoManager.registerTask(tc.taskAttemptId)
       var droppedSoFar = 0
+      var memoryStore: MemoryStore = null
       val blockEvictionHandler = new BlockEvictionHandler {
-        var memoryStore: MemoryStore = _
 
         override private[storage] def dropFromMemory[T: ClassTag](
             blockId: BlockId,
@@ -442,7 +440,7 @@ class MemoryStoreSuite
           }
         }
       }
-      val memoryStore = new MemoryStore(conf, blockInfoManager, serializerManager, memManager,
+      memoryStore = new MemoryStore(conf, blockInfoManager, serializerManager, memManager,
           blockEvictionHandler) {
         override def afterDropAction(blockId: BlockId): Unit = {
           if (readLockAfterDrop) {
@@ -454,11 +452,10 @@ class MemoryStoreSuite
         }
       }
 
-      blockEvictionHandler.memoryStore = memoryStore
       memManager.setMemoryStore(memoryStore)
 
       // Put in some small blocks to fill up the memory store
-      val initialBlocks = (1 to numInitialBlocks).map { id =>
+      (1 to numInitialBlocks).foreach { id =>
         val blockId = BlockId(s"rdd_1_$id")
         val blockInfo = new BlockInfo(StorageLevel.MEMORY_ONLY, ct, tellMaster = false)
         val initialWriteLock = blockInfoManager.lockNewBlockForWriting(blockId, blockInfo)
diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
index b7032e81a969..5e08a3dc1181 100644
--- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.util
 
 import java.io.NotSerializableException
 
-import scala.language.reflectiveCalls
-
 import org.apache.spark.{SparkContext, SparkException, SparkFunSuite, TaskContext}
 import org.apache.spark.LocalSparkContext._
 import org.apache.spark.partial.CountEvaluator
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
index dd363f534597..b5d1b02f9252 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -18,17 +18,15 @@
 // scalastyle:off println
 package org.apache.spark.examples.mllib
 
-import scala.language.reflectiveCalls
-
 import scopt.OptionParser
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.mllib.evaluation.MulticlassMetrics
-import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{impurity, DecisionTree, RandomForest}
 import org.apache.spark.mllib.tree.configuration.{Algo, Strategy}
 import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.model.{DecisionTreeModel, RandomForestModel}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.Utils
@@ -349,19 +347,17 @@ object DecisionTreeRunner {
 
   /**
    * Calculates the mean squared error for regression.
-   *
-   * This is just for demo purpose. In general, don't copy this code because it is NOT efficient
-   * due to the use of structural types, which leads to one reflection call per record.
    */
-  // scalastyle:off structural.type
-  private[mllib] def meanSquaredError(
-      model: { def predict(features: Vector): Double },
-      data: RDD[LabeledPoint]): Double = {
+  private[mllib] def meanSquaredError(model: RandomForestModel, data: RDD[LabeledPoint]): Double =
+    data.map { y =>
+      val err = model.predict(y.features) - y.label
+      err * err
+    }.mean()
+
+  private[mllib] def meanSquaredError(model: DecisionTreeModel, data: RDD[LabeledPoint]): Double =
     data.map { y =>
       val err = model.predict(y.features) - y.label
       err * err
     }.mean()
-  }
-  // scalastyle:on structural.type
 }
 // scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
index 4020c6b6bca7..3f264933cd3c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
@@ -22,8 +22,11 @@ import scopt.OptionParser
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.GradientBoostedTrees
 import org.apache.spark.mllib.tree.configuration.{Algo, BoostingStrategy}
+import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel
+import org.apache.spark.rdd.RDD
 import org.apache.spark.util.Utils
 
 /**
@@ -134,13 +137,20 @@ object GradientBoostedTreesRunner {
       } else {
         println(model) // Print model summary.
       }
-      val trainMSE = DecisionTreeRunner.meanSquaredError(model, training)
+      val trainMSE = meanSquaredError(model, training)
       println(s"Train mean squared error = $trainMSE")
-      val testMSE = DecisionTreeRunner.meanSquaredError(model, test)
+      val testMSE = meanSquaredError(model, test)
       println(s"Test mean squared error = $testMSE")
     }
 
     sc.stop()
   }
+
+  private[mllib] def meanSquaredError(
+      model: GradientBoostedTreesModel, data: RDD[LabeledPoint]): Double =
+    data.map { y =>
+      val err = model.predict(y.features) - y.label
+      err * err
+    }.mean()
 }
 // scalastyle:on println
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
index dfbdae18f23c..6a9ebfb8a4fa 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.scheduler.cluster.mesos
 import java.io.{File, FileNotFoundException}
 
 import scala.collection.JavaConverters._
-import scala.language.reflectiveCalls
 
 import com.google.common.io.Files
 import org.apache.mesos.Protos.{FrameworkInfo, Resource, Value}
@@ -35,14 +34,14 @@ import org.apache.spark.util.SparkConfWithEnv
 
 class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoSugar {
 
-  // scalastyle:off structural.type
-  // this is the documented way of generating fixtures in scalatest
-  def fixture: Object {val sc: SparkContext; val sparkConf: SparkConf} = new {
+  class SparkConfFixture {
     val sparkConf = new SparkConf
-    val sc = mock[SparkContext]
+    val sc: SparkContext = mock[SparkContext]
     when(sc.conf).thenReturn(sparkConf)
   }
 
+  def fixture: SparkConfFixture = new SparkConfFixture()
+
   private def createTestPortResource(range: (Long, Long), role: Option[String] = None): Resource = {
     val rangeValue = Value.Range.newBuilder()
     rangeValue.setBegin(range._1)
@@ -78,7 +77,6 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
   }
 
   val utils = new MesosSchedulerUtils { }
-  // scalastyle:on structural.type
 
   test("use at-least minimum overhead") {
     val f = fixture
@@ -253,9 +251,9 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
   }
 
   test("Principal specified via spark.mesos.principal.file") {
-    val pFile = File.createTempFile("MesosSchedulerUtilsSuite", ".txt");
+    val pFile = File.createTempFile("MesosSchedulerUtilsSuite", ".txt")
     pFile.deleteOnExit()
-    Files.write("test-principal".getBytes("UTF-8"), pFile);
+    Files.write("test-principal".getBytes("UTF-8"), pFile)
     val conf = new SparkConf()
     conf.set(mesosConfig.CREDENTIAL_PRINCIPAL_FILE, pFile.getAbsolutePath())
 
@@ -282,9 +280,9 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
   }
 
   test("Principal specified via SPARK_MESOS_PRINCIPAL_FILE") {
-    val pFile = File.createTempFile("MesosSchedulerUtilsSuite", ".txt");
+    val pFile = File.createTempFile("MesosSchedulerUtilsSuite", ".txt")
     pFile.deleteOnExit()
-    Files.write("test-principal".getBytes("UTF-8"), pFile);
+    Files.write("test-principal".getBytes("UTF-8"), pFile)
     val conf = new SparkConfWithEnv(Map("SPARK_MESOS_PRINCIPAL_FILE" -> pFile.getAbsolutePath()))
 
     val credBuilder = utils.buildCredentials(conf, FrameworkInfo.newBuilder())
@@ -313,9 +311,9 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
   }
 
   test("Principal specified via spark.mesos.secret.file") {
-    val sFile = File.createTempFile("MesosSchedulerUtilsSuite", ".txt");
+    val sFile = File.createTempFile("MesosSchedulerUtilsSuite", ".txt")
     sFile.deleteOnExit()
-    Files.write("my-secret".getBytes("UTF-8"), sFile);
+    Files.write("my-secret".getBytes("UTF-8"), sFile)
     val conf = new SparkConf()
     conf.set(mesosConfig.CREDENTIAL_PRINCIPAL, "test-principal")
     conf.set(mesosConfig.CREDENTIAL_SECRET_FILE, sFile.getAbsolutePath())
@@ -350,9 +348,9 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
   }
 
   test("Principal specified via SPARK_MESOS_SECRET_FILE") {
-    val sFile = File.createTempFile("MesosSchedulerUtilsSuite", ".txt");
+    val sFile = File.createTempFile("MesosSchedulerUtilsSuite", ".txt")
     sFile.deleteOnExit()
-    Files.write("my-secret".getBytes("UTF-8"), sFile);
+    Files.write("my-secret".getBytes("UTF-8"), sFile)
 
     val sFilePath = sFile.getAbsolutePath()
     val env = Map("SPARK_MESOS_SECRET_FILE" -> sFilePath)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 70f86aaa72f6..835f0736c5a1 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -20,9 +20,6 @@ import java.net.URL
 import java.util.concurrent.atomic.AtomicReference
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
-import scala.language.reflectiveCalls
-
-import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.mockito.Mockito.when
 import org.scalatest.mockito.MockitoSugar
 
@@ -45,22 +42,24 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
     }
   }
 
-  test("RequestExecutors reflects node blacklist and is serializable") {
-    sc = new SparkContext("local", "YarnSchedulerBackendSuite")
-    // Subclassing the TaskSchedulerImpl here instead of using Mockito. For details see SPARK-26891.
-    val sched = new TaskSchedulerImpl(sc) {
-      val blacklistedNodes = new AtomicReference[Set[String]]()
-
-      def setNodeBlacklist(nodeBlacklist: Set[String]): Unit = blacklistedNodes.set(nodeBlacklist)
+  private class TestTaskSchedulerImpl(sc: SparkContext) extends TaskSchedulerImpl(sc) {
+    val blacklistedNodes = new AtomicReference[Set[String]]()
+    def setNodeBlacklist(nodeBlacklist: Set[String]): Unit = blacklistedNodes.set(nodeBlacklist)
+    override def nodeBlacklist(): Set[String] = blacklistedNodes.get()
+  }
 
-      override def nodeBlacklist(): Set[String] = blacklistedNodes.get()
+  private class TestYarnSchedulerBackend(scheduler: TaskSchedulerImpl, sc: SparkContext)
+      extends YarnSchedulerBackend(scheduler, sc) {
+    def setHostToLocalTaskCount(hostToLocalTaskCount: Map[String, Int]): Unit = {
+      this.hostToLocalTaskCount = hostToLocalTaskCount
     }
+  }
 
-    val yarnSchedulerBackendExtended = new YarnSchedulerBackend(sched, sc) {
-      def setHostToLocalTaskCount(hostToLocalTaskCount: Map[String, Int]): Unit = {
-        this.hostToLocalTaskCount = hostToLocalTaskCount
-      }
-    }
+  test("RequestExecutors reflects node blacklist and is serializable") {
+    sc = new SparkContext("local", "YarnSchedulerBackendSuite")
+    // Subclassing the TaskSchedulerImpl here instead of using Mockito. For details see SPARK-26891.
+    val sched = new TestTaskSchedulerImpl(sc)
+    val yarnSchedulerBackendExtended = new TestYarnSchedulerBackend(sched, sc)
     yarnSchedulerBackend = yarnSchedulerBackendExtended
     val ser = new JavaSerializer(sc.conf).newInstance()
     for {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index e0a364121834..bc83f3d39d9d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -21,30 +21,37 @@ import java.io.File
 import java.net.URI
 
 import scala.collection.mutable
-import scala.language.reflectiveCalls
 
 import org.apache.hadoop.fs.{BlockLocation, FileStatus, LocatedFileStatus, Path, RawLocalFileSystem}
 
 import org.apache.spark.metrics.source.HiveCatalogMetrics
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
-import org.apache.spark.util.{KnownSizeEstimation, SizeEstimator}
+import org.apache.spark.util.KnownSizeEstimation
 
 class FileIndexSuite extends SharedSQLContext {
 
+  private class TestInMemoryFileIndex(
+      spark: SparkSession,
+      path: Path,
+      fileStatusCache: FileStatusCache = NoopCache)
+      extends InMemoryFileIndex(spark, Seq(path), Map.empty, None, fileStatusCache) {
+    def leafFilePaths: Seq[Path] = leafFiles.keys.toSeq
+    def leafDirPaths: Seq[Path] = leafDirToChildrenFiles.keys.toSeq
+    def leafFileStatuses: Iterable[FileStatus] = leafFiles.values
+  }
+
   test("InMemoryFileIndex: leaf files are qualified paths") {
     withTempDir { dir =>
       val file = new File(dir, "text.txt")
       stringToFile(file, "text")
 
       val path = new Path(file.getCanonicalPath)
-      val catalog = new InMemoryFileIndex(spark, Seq(path), Map.empty, None) {
-        def leafFilePaths: Seq[Path] = leafFiles.keys.toSeq
-        def leafDirPaths: Seq[Path] = leafDirToChildrenFiles.keys.toSeq
-      }
+      val catalog = new TestInMemoryFileIndex(spark, path)
       assert(catalog.leafFilePaths.forall(p => p.toString.startsWith("file:/")))
       assert(catalog.leafDirPaths.forall(p => p.toString.startsWith("file:/")))
     }
@@ -288,11 +295,7 @@ class FileIndexSuite extends SharedSQLContext {
       val fileStatusCache = FileStatusCache.getOrCreate(spark)
       val dirPath = new Path(dir.getAbsolutePath)
       val fs = dirPath.getFileSystem(spark.sessionState.newHadoopConf())
-      val catalog =
-        new InMemoryFileIndex(spark, Seq(dirPath), Map.empty, None, fileStatusCache) {
-          def leafFilePaths: Seq[Path] = leafFiles.keys.toSeq
-          def leafDirPaths: Seq[Path] = leafDirToChildrenFiles.keys.toSeq
-        }
+      val catalog = new TestInMemoryFileIndex(spark, dirPath, fileStatusCache)
 
       val file = new File(dir, "text.txt")
       stringToFile(file, "text")
@@ -342,10 +345,7 @@ class FileIndexSuite extends SharedSQLContext {
         val file = new File(dir, "text.txt")
         stringToFile(file, "text")
 
-        val inMemoryFileIndex = new InMemoryFileIndex(
-          spark, Seq(new Path(file.getCanonicalPath)), Map.empty, None) {
-          def leafFileStatuses = leafFiles.values
-        }
+        val inMemoryFileIndex = new TestInMemoryFileIndex(spark, new Path(file.getCanonicalPath))
         val blockLocations = inMemoryFileIndex.leafFileStatuses.flatMap(
           _.asInstanceOf[LocatedFileStatus].getBlockLocations)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 10b2966fd7b0..6b711f97431a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.streaming
 import java.util.UUID
 
 import scala.collection.mutable
-import scala.language.reflectiveCalls
 
 import org.scalactic.TolerantNumerics
 import org.scalatest.BeforeAndAfter
@@ -297,8 +296,8 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
       }
       spark.streams.addListener(listener)
       try {
+        var numTriggers = 0
         val input = new MemoryStream[Int](0, sqlContext) {
-          @volatile var numTriggers = 0
           override def latestOffset(): OffsetV2 = {
             numTriggers += 1
             super.latestOffset()
@@ -312,7 +311,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         }
         actions += AssertOnQuery { _ =>
           eventually(timeout(streamingTimeout)) {
-            assert(input.numTriggers > 100) // at least 100 triggers have occurred
+            assert(numTriggers > 100) // at least 100 triggers have occurred
           }
           true
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
index ddbc175e7ea4..88f510c726fa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.streaming
 
-import scala.language.reflectiveCalls
-
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.SparkConf

From fb6b19ab7c38aa0e1b2e208da86897bf3c07ae00 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Mon, 29 Apr 2019 09:44:23 -0700
Subject: [PATCH 0982/1072] [SPARK-23014][SS] Fully remove V1 memory sink.

## What changes were proposed in this pull request?

There is a MemorySink v2 already so v1 can be removed. In this PR I've removed it completely.
What this PR contains:
* V1 memory sink removal
* V2 memory sink renamed to become the only implementation
* Since DSv2 sends exceptions in a chained format (linking them with cause field) I've made python side compliant
* Adapted all the tests

## How was this patch tested?

Existing unit tests.

Closes #24403 from gaborgsomogyi/SPARK-23014.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/TestUtils.scala    | 14 +++
 .../sql/kafka010/KafkaContinuousTest.scala    |  1 -
 .../spark/ml/recommendation/ALSSuite.scala    | 10 +-
 .../org/apache/spark/ml/util/MLTest.scala     | 10 +-
 python/pyspark/sql/streaming.py               |  2 +-
 python/pyspark/sql/tests/test_streaming.py    | 12 ++-
 python/pyspark/sql/utils.py                   | 49 ++++++----
 .../spark/sql/execution/SparkStrategies.scala |  5 +-
 .../sql/execution/streaming/memory.scala      | 92 +------------------
 .../sources/{memoryV2.scala => memory.scala}  | 14 +--
 .../sql/streaming/DataStreamWriter.scala      | 12 +--
 .../execution/streaming/MemorySinkSuite.scala | 88 ++++++++++++++----
 .../streaming/MemorySinkV2Suite.scala         | 66 -------------
 .../streaming/EventTimeWatermarkSuite.scala   |  1 +
 .../sql/streaming/FileStreamSourceSuite.scala |  1 +
 .../spark/sql/streaming/StreamSuite.scala     | 16 ++--
 .../spark/sql/streaming/StreamTest.scala      | 16 +---
 .../streaming/StreamingAggregationSuite.scala |  1 +
 .../StreamingQueryListenerSuite.scala         |  2 +-
 .../sql/streaming/StreamingQuerySuite.scala   | 22 ++---
 ...ontinuousQueryStatusAndProgressSuite.scala |  2 +-
 .../continuous/ContinuousSuite.scala          |  9 +-
 22 files changed, 181 insertions(+), 264 deletions(-)
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/{memoryV2.scala => memory.scala} (93%)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index c2ebd388a236..c97b10ee63b1 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -192,6 +192,20 @@ private[spark] object TestUtils {
     assert(listener.numSpilledStages == 0, s"expected $identifier to not spill, but did")
   }
 
+  /**
+   * Asserts that exception message contains the message. Please note this checks all
+   * exceptions in the tree.
+   */
+  def assertExceptionMsg(exception: Throwable, msg: String): Unit = {
+    var e = exception
+    var contains = e.getMessage.contains(msg)
+    while (e.getCause != null && !contains) {
+      e = e.getCause
+      contains = e.getMessage.contains(msg)
+    }
+    assert(contains, s"Exception tree doesn't contain the expected message: $msg")
+  }
+
   /**
    * Test if a command is available.
    */
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
index ad1c2c59d9c8..9ee8cbfa1bef 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.test.TestSparkSession
 // Trait to configure StreamTest for kafka continuous execution tests.
 trait KafkaContinuousTest extends KafkaSourceTest {
   override val defaultTrigger = Trigger.Continuous(1000)
-  override val defaultUseV2Sink = true
 
   // We need more than the default local[2] to be able to schedule all partitions simultaneously.
   override protected def createSparkSession = new TestSparkSession(
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 6d0321c4e240..5ba39284f63b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -695,12 +695,14 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
     withClue("transform should fail when ids exceed integer range. ") {
       val model = als.fit(df)
       def testTransformIdExceedsIntRange[A : Encoder](dataFrame: DataFrame): Unit = {
-        assert(intercept[SparkException] {
+        val e1 = intercept[SparkException] {
           model.transform(dataFrame).first
-        }.getMessage.contains(msg))
-        assert(intercept[StreamingQueryException] {
+        }
+        TestUtils.assertExceptionMsg(e1, msg)
+        val e2 = intercept[StreamingQueryException] {
           testTransformer[A](dataFrame, model, "prediction") { _ => }
-        }.getMessage.contains(msg))
+        }
+        TestUtils.assertExceptionMsg(e2, msg)
       }
       testTransformIdExceedsIntRange[(Long, Int)](df.select(df("user_big").as("user"),
         df("item")))
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
index c23b6d8cb355..8a0a48ff6095 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import org.scalatest.Suite
 
-import org.apache.spark.{DebugFilesystem, SparkConf, SparkContext}
+import org.apache.spark.{DebugFilesystem, SparkConf, SparkContext, TestUtils}
 import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
 import org.apache.spark.ml.{Model, PredictionModel, Transformer}
 import org.apache.spark.ml.linalg.Vector
@@ -129,21 +129,17 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
     expectedMessagePart : String,
     firstResultCol: String) {
 
-    def hasExpectedMessage(exception: Throwable): Boolean =
-      exception.getMessage.contains(expectedMessagePart) ||
-        (exception.getCause != null && exception.getCause.getMessage.contains(expectedMessagePart))
-
     withClue(s"""Expected message part "${expectedMessagePart}" is not found in DF test.""") {
       val exceptionOnDf = intercept[Throwable] {
         testTransformerOnDF(dataframe, transformer, firstResultCol)(_ => Unit)
       }
-      assert(hasExpectedMessage(exceptionOnDf))
+      TestUtils.assertExceptionMsg(exceptionOnDf, expectedMessagePart)
     }
     withClue(s"""Expected message part "${expectedMessagePart}" is not found in stream test.""") {
       val exceptionOnStreamData = intercept[Throwable] {
         testTransformerOnStreamData(dataframe, transformer, firstResultCol)(_ => Unit)
       }
-      assert(hasExpectedMessage(exceptionOnStreamData))
+      TestUtils.assertExceptionMsg(exceptionOnStreamData, expectedMessagePart)
     }
   }
 
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index fa25267e078f..d15779bc4725 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -186,7 +186,7 @@ def exception(self):
             je = self._jsq.exception().get()
             msg = je.toString().split(': ', 1)[1]  # Drop the Java StreamingQueryException type info
             stackTrace = '\n\t at '.join(map(lambda x: x.toString(), je.getStackTrace()))
-            return StreamingQueryException(msg, stackTrace)
+            return StreamingQueryException(msg, stackTrace, je.getCause())
         else:
             return None
 
diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py
index ac4b6918030a..bbd3ddb1b90f 100644
--- a/python/pyspark/sql/tests/test_streaming.py
+++ b/python/pyspark/sql/tests/test_streaming.py
@@ -225,11 +225,19 @@ def test_stream_exception(self):
             self.fail("bad udf should fail the query")
         except StreamingQueryException as e:
             # This is expected
-            self.assertTrue("ZeroDivisionError" in e.desc)
+            self._assert_exception_tree_contains_msg(e, "ZeroDivisionError")
         finally:
             sq.stop()
         self.assertTrue(type(sq.exception()) is StreamingQueryException)
-        self.assertTrue("ZeroDivisionError" in sq.exception().desc)
+        self._assert_exception_tree_contains_msg(sq.exception(), "ZeroDivisionError")
+
+    def _assert_exception_tree_contains_msg(self, exception, msg):
+        e = exception
+        contains = msg in e.desc
+        while e.cause is not None and not contains:
+            e = e.cause
+            contains = msg in e.desc
+        self.assertTrue(contains, "Exception tree doesn't contain the expected message: %s" % msg)
 
     def test_query_manager_await_termination(self):
         df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 709d3a064261..1c96e330cebc 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -19,9 +19,10 @@
 
 
 class CapturedException(Exception):
-    def __init__(self, desc, stackTrace):
+    def __init__(self, desc, stackTrace, cause=None):
         self.desc = desc
         self.stackTrace = stackTrace
+        self.cause = convert_exception(cause) if cause is not None else None
 
     def __str__(self):
         return repr(self.desc)
@@ -57,27 +58,41 @@ class QueryExecutionException(CapturedException):
     """
 
 
+class UnknownException(CapturedException):
+    """
+    None of the above exceptions.
+    """
+
+
+def convert_exception(e):
+    s = e.toString()
+    stackTrace = '\n\t at '.join(map(lambda x: x.toString(), e.getStackTrace()))
+    c = e.getCause()
+    if s.startswith('org.apache.spark.sql.AnalysisException: '):
+        return AnalysisException(s.split(': ', 1)[1], stackTrace, c)
+    if s.startswith('org.apache.spark.sql.catalyst.analysis'):
+        return AnalysisException(s.split(': ', 1)[1], stackTrace, c)
+    if s.startswith('org.apache.spark.sql.catalyst.parser.ParseException: '):
+        return ParseException(s.split(': ', 1)[1], stackTrace, c)
+    if s.startswith('org.apache.spark.sql.streaming.StreamingQueryException: '):
+        return StreamingQueryException(s.split(': ', 1)[1], stackTrace, c)
+    if s.startswith('org.apache.spark.sql.execution.QueryExecutionException: '):
+        return QueryExecutionException(s.split(': ', 1)[1], stackTrace, c)
+    if s.startswith('java.lang.IllegalArgumentException: '):
+        return IllegalArgumentException(s.split(': ', 1)[1], stackTrace, c)
+    return UnknownException(s, stackTrace, c)
+
+
 def capture_sql_exception(f):
     def deco(*a, **kw):
         try:
             return f(*a, **kw)
         except py4j.protocol.Py4JJavaError as e:
-            s = e.java_exception.toString()
-            stackTrace = '\n\t at '.join(map(lambda x: x.toString(),
-                                             e.java_exception.getStackTrace()))
-            if s.startswith('org.apache.spark.sql.AnalysisException: '):
-                raise AnalysisException(s.split(': ', 1)[1], stackTrace)
-            if s.startswith('org.apache.spark.sql.catalyst.analysis'):
-                raise AnalysisException(s.split(': ', 1)[1], stackTrace)
-            if s.startswith('org.apache.spark.sql.catalyst.parser.ParseException: '):
-                raise ParseException(s.split(': ', 1)[1], stackTrace)
-            if s.startswith('org.apache.spark.sql.streaming.StreamingQueryException: '):
-                raise StreamingQueryException(s.split(': ', 1)[1], stackTrace)
-            if s.startswith('org.apache.spark.sql.execution.QueryExecutionException: '):
-                raise QueryExecutionException(s.split(': ', 1)[1], stackTrace)
-            if s.startswith('java.lang.IllegalArgumentException: '):
-                raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
-            raise
+            converted = convert_exception(e.java_exception)
+            if not isinstance(converted, UnknownException):
+                raise converted
+            else:
+                raise
     return deco
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index c0a28fad9d39..831fc7363486 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide}
 import org.apache.spark.sql.execution.python._
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.sources.MemoryPlanV2
+import org.apache.spark.sql.execution.streaming.sources.MemoryPlan
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery}
 import org.apache.spark.sql.types.StructType
@@ -624,9 +624,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case r: RunnableCommand => ExecutedCommandExec(r) :: Nil
 
       case MemoryPlan(sink, output) =>
-        val encoder = RowEncoder(sink.schema)
-        LocalTableScanExec(output, sink.allData.map(r => encoder.toRow(r).copy())) :: Nil
-      case MemoryPlanV2(sink, output) =>
         val encoder = RowEncoder(StructType.fromAttributes(output))
         LocalTableScanExec(output, sink.allData.map(r => encoder.toRow(r).copy())) :: Nil
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 0dcbdd3a1fd2..6efde0a27efe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -22,23 +22,19 @@ import java.util.concurrent.atomic.AtomicInteger
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.{ArrayBuffer, ListBuffer}
-import scala.util.control.NonFatal
+import scala.collection.mutable.ListBuffer
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.encoderFor
-import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeRow}
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
-import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
-import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2}
-import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -276,85 +272,3 @@ trait MemorySinkBase extends BaseStreamingSink {
   def dataSinceBatch(sinceBatchId: Long): Seq[Row]
   def latestBatchId: Option[Long]
 }
-
-/**
- * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
- * tests and does not provide durability.
- */
-class MemorySink(val schema: StructType, outputMode: OutputMode) extends Sink
-  with MemorySinkBase with Logging {
-
-  private case class AddedData(batchId: Long, data: Array[Row])
-
-  /** An order list of batches that have been written to this [[Sink]]. */
-  @GuardedBy("this")
-  private val batches = new ArrayBuffer[AddedData]()
-
-  /** Returns all rows that are stored in this [[Sink]]. */
-  def allData: Seq[Row] = synchronized {
-    batches.flatMap(_.data)
-  }
-
-  def latestBatchId: Option[Long] = synchronized {
-    batches.lastOption.map(_.batchId)
-  }
-
-  def latestBatchData: Seq[Row] = synchronized { batches.lastOption.toSeq.flatten(_.data) }
-
-  def dataSinceBatch(sinceBatchId: Long): Seq[Row] = synchronized {
-    batches.filter(_.batchId > sinceBatchId).flatMap(_.data)
-  }
-
-  def toDebugString: String = synchronized {
-    batches.map { case AddedData(batchId, data) =>
-      val dataStr = try data.mkString(" ") catch {
-        case NonFatal(e) => "[Error converting to string]"
-      }
-      s"$batchId: $dataStr"
-    }.mkString("\n")
-  }
-
-  override def addBatch(batchId: Long, data: DataFrame): Unit = {
-    val notCommitted = synchronized {
-      latestBatchId.isEmpty || batchId > latestBatchId.get
-    }
-    if (notCommitted) {
-      logDebug(s"Committing batch $batchId to $this")
-      outputMode match {
-        case Append | Update =>
-          val rows = AddedData(batchId, data.collect())
-          synchronized { batches += rows }
-
-        case Complete =>
-          val rows = AddedData(batchId, data.collect())
-          synchronized {
-            batches.clear()
-            batches += rows
-          }
-
-        case _ =>
-          throw new IllegalArgumentException(
-            s"Output mode $outputMode is not supported by MemorySink")
-      }
-    } else {
-      logDebug(s"Skipping already committed batch: $batchId")
-    }
-  }
-
-  def clear(): Unit = synchronized {
-    batches.clear()
-  }
-
-  override def toString(): String = "MemorySink"
-}
-
-/**
- * Used to query the data that has been written into a [[MemorySink]].
- */
-case class MemoryPlan(sink: MemorySink, output: Seq[Attribute]) extends LeafNode {
-  def this(sink: MemorySink) = this(sink, sink.schema.toAttributes)
-
-  private val sizePerRow = EstimationUtils.getSizePerRow(sink.schema.toAttributes)
-
-  override def computeStats(): Statistics = Statistics(sizePerRow * sink.allData.size)
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
similarity index 93%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
index 219e25c1407b..9008c63491cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
@@ -43,9 +43,9 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
  * tests and does not provide durability.
  */
-class MemorySinkV2 extends Table with SupportsWrite with MemorySinkBase with Logging {
+class MemorySink extends Table with SupportsWrite with MemorySinkBase with Logging {
 
-  override def name(): String = "MemorySinkV2"
+  override def name(): String = "MemorySink"
 
   override def schema(): StructType = StructType(Nil)
 
@@ -69,7 +69,7 @@ class MemorySinkV2 extends Table with SupportsWrite with MemorySinkBase with Log
       }
 
       override def buildForStreaming(): StreamingWrite = {
-        new MemoryStreamingWrite(MemorySinkV2.this, inputSchema, needTruncate)
+        new MemoryStreamingWrite(MemorySink.this, inputSchema, needTruncate)
       }
     }
   }
@@ -130,14 +130,14 @@ class MemorySinkV2 extends Table with SupportsWrite with MemorySinkBase with Log
     batches.clear()
   }
 
-  override def toString(): String = "MemorySinkV2"
+  override def toString(): String = "MemorySink"
 }
 
 case class MemoryWriterCommitMessage(partition: Int, data: Seq[Row])
   extends WriterCommitMessage {}
 
 class MemoryStreamingWrite(
-    val sink: MemorySinkV2, schema: StructType, needTruncate: Boolean)
+    val sink: MemorySink, schema: StructType, needTruncate: Boolean)
   extends StreamingWrite {
 
   override def createStreamingWriterFactory: MemoryWriterFactory = {
@@ -195,9 +195,9 @@ class MemoryDataWriter(partition: Int, schema: StructType)
 
 
 /**
- * Used to query the data that has been written into a [[MemorySinkV2]].
+ * Used to query the data that has been written into a [[MemorySink]].
  */
-case class MemoryPlanV2(sink: MemorySinkV2, override val output: Seq[Attribute]) extends LeafNode {
+case class MemoryPlan(sink: MemorySink, override val output: Seq[Attribute]) extends LeafNode {
   private val sizePerRow = EstimationUtils.getSizePerRow(output)
 
   override def computeStats(): Statistics = Statistics(sizePerRow * sink.allData.size)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index d2df3a5349dd..2f12efe04c50 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -254,16 +254,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       if (extraOptions.get("queryName").isEmpty) {
         throw new AnalysisException("queryName must be specified for memory sink")
       }
-      val (sink, resultDf) = trigger match {
-        case _: ContinuousTrigger =>
-          val s = new MemorySinkV2()
-          val r = Dataset.ofRows(df.sparkSession, new MemoryPlanV2(s, df.schema.toAttributes))
-          (s, r)
-        case _ =>
-          val s = new MemorySink(df.schema, outputMode)
-          val r = Dataset.ofRows(df.sparkSession, new MemoryPlan(s))
-          (s, r)
-      }
+      val sink = new MemorySink()
+      val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink, df.schema.toAttributes))
       val chkpointLoc = extraOptions.get("checkpointLocation")
       val recoverFromChkpoint = outputMode == OutputMode.Complete()
       val query = df.sparkSession.sessionState.streamingQueryManager.startQuery(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
index 3bc36ce55d90..3ead91fcf712 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
@@ -22,6 +22,8 @@ import scala.language.implicitConversions
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.streaming.sources._
 import org.apache.spark.sql.streaming.{OutputMode, StreamTest}
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
@@ -36,7 +38,8 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
 
   test("directly add data in Append output mode") {
     implicit val schema = new StructType().add(new StructField("value", IntegerType))
-    val sink = new MemorySink(schema, OutputMode.Append)
+    val sink = new MemorySink
+    val addBatch = addBatchFunc(sink, false) _
 
     // Before adding data, check output
     assert(sink.latestBatchId === None)
@@ -44,25 +47,25 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     checkAnswer(sink.allData, Seq.empty)
 
     // Add batch 0 and check outputs
-    sink.addBatch(0, 1 to 3)
+    addBatch(0, 1 to 3)
     assert(sink.latestBatchId === Some(0))
     checkAnswer(sink.latestBatchData, 1 to 3)
     checkAnswer(sink.allData, 1 to 3)
 
     // Add batch 1 and check outputs
-    sink.addBatch(1, 4 to 6)
+    addBatch(1, 4 to 6)
     assert(sink.latestBatchId === Some(1))
     checkAnswer(sink.latestBatchData, 4 to 6)
     checkAnswer(sink.allData, 1 to 6)     // new data should get appended to old data
 
     // Re-add batch 1 with different data, should not be added and outputs should not be changed
-    sink.addBatch(1, 7 to 9)
+    addBatch(1, 7 to 9)
     assert(sink.latestBatchId === Some(1))
     checkAnswer(sink.latestBatchData, 4 to 6)
     checkAnswer(sink.allData, 1 to 6)
 
     // Add batch 2 and check outputs
-    sink.addBatch(2, 7 to 9)
+    addBatch(2, 7 to 9)
     assert(sink.latestBatchId === Some(2))
     checkAnswer(sink.latestBatchData, 7 to 9)
     checkAnswer(sink.allData, 1 to 9)
@@ -70,7 +73,8 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
 
   test("directly add data in Update output mode") {
     implicit val schema = new StructType().add(new StructField("value", IntegerType))
-    val sink = new MemorySink(schema, OutputMode.Update)
+    val sink = new MemorySink
+    val addBatch = addBatchFunc(sink, false) _
 
     // Before adding data, check output
     assert(sink.latestBatchId === None)
@@ -78,25 +82,25 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     checkAnswer(sink.allData, Seq.empty)
 
     // Add batch 0 and check outputs
-    sink.addBatch(0, 1 to 3)
+    addBatch(0, 1 to 3)
     assert(sink.latestBatchId === Some(0))
     checkAnswer(sink.latestBatchData, 1 to 3)
     checkAnswer(sink.allData, 1 to 3)
 
     // Add batch 1 and check outputs
-    sink.addBatch(1, 4 to 6)
+    addBatch(1, 4 to 6)
     assert(sink.latestBatchId === Some(1))
     checkAnswer(sink.latestBatchData, 4 to 6)
     checkAnswer(sink.allData, 1 to 6) // new data should get appended to old data
 
     // Re-add batch 1 with different data, should not be added and outputs should not be changed
-    sink.addBatch(1, 7 to 9)
+    addBatch(1, 7 to 9)
     assert(sink.latestBatchId === Some(1))
     checkAnswer(sink.latestBatchData, 4 to 6)
     checkAnswer(sink.allData, 1 to 6)
 
     // Add batch 2 and check outputs
-    sink.addBatch(2, 7 to 9)
+    addBatch(2, 7 to 9)
     assert(sink.latestBatchId === Some(2))
     checkAnswer(sink.latestBatchData, 7 to 9)
     checkAnswer(sink.allData, 1 to 9)
@@ -104,7 +108,8 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
 
   test("directly add data in Complete output mode") {
     implicit val schema = new StructType().add(new StructField("value", IntegerType))
-    val sink = new MemorySink(schema, OutputMode.Complete)
+    val sink = new MemorySink
+    val addBatch = addBatchFunc(sink, true) _
 
     // Before adding data, check output
     assert(sink.latestBatchId === None)
@@ -112,25 +117,25 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     checkAnswer(sink.allData, Seq.empty)
 
     // Add batch 0 and check outputs
-    sink.addBatch(0, 1 to 3)
+    addBatch(0, 1 to 3)
     assert(sink.latestBatchId === Some(0))
     checkAnswer(sink.latestBatchData, 1 to 3)
     checkAnswer(sink.allData, 1 to 3)
 
     // Add batch 1 and check outputs
-    sink.addBatch(1, 4 to 6)
+    addBatch(1, 4 to 6)
     assert(sink.latestBatchId === Some(1))
     checkAnswer(sink.latestBatchData, 4 to 6)
     checkAnswer(sink.allData, 4 to 6)     // new data should replace old data
 
     // Re-add batch 1 with different data, should not be added and outputs should not be changed
-    sink.addBatch(1, 7 to 9)
+    addBatch(1, 7 to 9)
     assert(sink.latestBatchId === Some(1))
     checkAnswer(sink.latestBatchData, 4 to 6)
     checkAnswer(sink.allData, 4 to 6)
 
     // Add batch 2 and check outputs
-    sink.addBatch(2, 7 to 9)
+    addBatch(2, 7 to 9)
     assert(sink.latestBatchId === Some(2))
     checkAnswer(sink.latestBatchData, 7 to 9)
     checkAnswer(sink.allData, 7 to 9)
@@ -211,18 +216,19 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
 
   test("MemoryPlan statistics") {
     implicit val schema = new StructType().add(new StructField("value", IntegerType))
-    val sink = new MemorySink(schema, OutputMode.Append)
-    val plan = new MemoryPlan(sink)
+    val sink = new MemorySink
+    val addBatch = addBatchFunc(sink, false) _
+    val plan = new MemoryPlan(sink, schema.toAttributes)
 
     // Before adding data, check output
     checkAnswer(sink.allData, Seq.empty)
     assert(plan.stats.sizeInBytes === 0)
 
-    sink.addBatch(0, 1 to 3)
+    addBatch(0, 1 to 3)
     plan.invalidateStatsCache()
     assert(plan.stats.sizeInBytes === 36)
 
-    sink.addBatch(1, 4 to 6)
+    addBatch(1, 4 to 6)
     plan.invalidateStatsCache()
     assert(plan.stats.sizeInBytes === 72)
   }
@@ -285,6 +291,50 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("data writer") {
+    val partition = 1234
+    val writer = new MemoryDataWriter(
+      partition, new StructType().add("i", "int"))
+    writer.write(InternalRow(1))
+    writer.write(InternalRow(2))
+    writer.write(InternalRow(44))
+    val msg = writer.commit()
+    assert(msg.data.map(_.getInt(0)) == Seq(1, 2, 44))
+    assert(msg.partition == partition)
+
+    // Buffer should be cleared, so repeated commits should give empty.
+    assert(writer.commit().data.isEmpty)
+  }
+
+  test("streaming writer") {
+    val sink = new MemorySink
+    val write = new MemoryStreamingWrite(
+      sink, new StructType().add("i", "int"), needTruncate = false)
+    write.commit(0,
+      Array(
+        MemoryWriterCommitMessage(0, Seq(Row(1), Row(2))),
+        MemoryWriterCommitMessage(1, Seq(Row(3), Row(4))),
+        MemoryWriterCommitMessage(2, Seq(Row(6), Row(7)))
+      ))
+    assert(sink.latestBatchId.contains(0))
+    assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7))
+    write.commit(19,
+      Array(
+        MemoryWriterCommitMessage(3, Seq(Row(11), Row(22))),
+        MemoryWriterCommitMessage(0, Seq(Row(33)))
+      ))
+    assert(sink.latestBatchId.contains(19))
+    assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(11, 22, 33))
+
+    assert(sink.allData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7, 11, 22, 33))
+  }
+
+  private def addBatchFunc(sink: MemorySink, needTruncate: Boolean)(
+      batchId: Long,
+      vals: Seq[Int]): Unit = {
+    sink.write(batchId, needTruncate, vals.map(Row(_)).toArray)
+  }
+
   private def checkAnswer(rows: Seq[Row], expected: Seq[Int])(implicit schema: StructType): Unit = {
     checkAnswer(
       sqlContext.createDataFrame(sparkContext.makeRDD(rows), schema),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
deleted file mode 100644
index a90acf85c016..000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming
-
-import org.scalatest.BeforeAndAfter
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.streaming.sources._
-import org.apache.spark.sql.streaming.{OutputMode, StreamTest}
-import org.apache.spark.sql.types.StructType
-
-class MemorySinkV2Suite extends StreamTest with BeforeAndAfter {
-  test("data writer") {
-    val partition = 1234
-    val writer = new MemoryDataWriter(
-      partition, new StructType().add("i", "int"))
-    writer.write(InternalRow(1))
-    writer.write(InternalRow(2))
-    writer.write(InternalRow(44))
-    val msg = writer.commit()
-    assert(msg.data.map(_.getInt(0)) == Seq(1, 2, 44))
-    assert(msg.partition == partition)
-
-    // Buffer should be cleared, so repeated commits should give empty.
-    assert(writer.commit().data.isEmpty)
-  }
-
-  test("streaming writer") {
-    val sink = new MemorySinkV2
-    val write = new MemoryStreamingWrite(
-      sink, new StructType().add("i", "int"), needTruncate = false)
-    write.commit(0,
-      Array(
-        MemoryWriterCommitMessage(0, Seq(Row(1), Row(2))),
-        MemoryWriterCommitMessage(1, Seq(Row(3), Row(4))),
-        MemoryWriterCommitMessage(2, Seq(Row(6), Row(7)))
-      ))
-    assert(sink.latestBatchId.contains(0))
-    assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7))
-    write.commit(19,
-      Array(
-        MemoryWriterCommitMessage(3, Seq(Row(11), Row(22))),
-        MemoryWriterCommitMessage(0, Seq(Row(33)))
-      ))
-    assert(sink.latestBatchId.contains(19))
-    assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(11, 22, 33))
-
-    assert(sink.allData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7, 11, 22, 33))
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index 1ff9dec9a4e8..4bf49ff4d5c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Dataset}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 33b4c08080d5..4b0bab173ae9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.FileStreamSource.{FileEntry, SeenFilesMap}
+import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.ExistsThrowsExceptionFileSystem._
 import org.apache.spark.sql.streaming.util.StreamManualClock
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 659deb8cbb51..f229b08a20aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -29,7 +29,7 @@ import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.{SparkConf, SparkContext, TaskContext}
+import org.apache.spark.{SparkConf, SparkContext, TaskContext, TestUtils}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.Range
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
+import org.apache.spark.sql.execution.streaming.sources.{ContinuousMemoryStream, MemorySink}
 import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreConf, StateStoreId, StateStoreProvider}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -876,8 +876,8 @@ class StreamSuite extends StreamTest {
         query.awaitTermination()
       }
 
-      assert(e.getMessage.contains(providerClassName))
-      assert(e.getMessage.contains("instantiated"))
+      TestUtils.assertExceptionMsg(e, providerClassName)
+      TestUtils.assertExceptionMsg(e, "instantiated")
     }
   }
 
@@ -1083,15 +1083,15 @@ class StreamSuite extends StreamTest {
 
   test("SPARK-26379 Structured Streaming - Exception on adding current_timestamp " +
     " to Dataset - use v2 sink") {
-    testCurrentTimestampOnStreamingQuery(useV2Sink = true)
+    testCurrentTimestampOnStreamingQuery()
   }
 
   test("SPARK-26379 Structured Streaming - Exception on adding current_timestamp " +
     " to Dataset - use v1 sink") {
-    testCurrentTimestampOnStreamingQuery(useV2Sink = false)
+    testCurrentTimestampOnStreamingQuery()
   }
 
-  private def testCurrentTimestampOnStreamingQuery(useV2Sink: Boolean): Unit = {
+  private def testCurrentTimestampOnStreamingQuery(): Unit = {
     val input = MemoryStream[Int]
     val df = input.toDS().withColumn("cur_timestamp", lit(current_timestamp()))
 
@@ -1109,7 +1109,7 @@ class StreamSuite extends StreamTest {
 
     var lastTimestamp = System.currentTimeMillis()
     val currentDate = DateTimeUtils.millisToDays(lastTimestamp)
-    testStream(df, useV2Sink = useV2Sink) (
+    testStream(df) (
       AddData(input, 1),
       CheckLastBatch { rows: Seq[Row] =>
         lastTimestamp = assertBatchOutputAndUpdateLastTimestamp(rows, lastTimestamp, currentDate, 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 6ff3c94c5c4d..900098a5ef61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.streaming
 
-import java.lang.Thread.UncaughtExceptionHandler
-
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.language.experimental.macros
@@ -42,7 +40,7 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch}
-import org.apache.spark.sql.execution.streaming.sources.MemorySinkV2
+import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.execution.streaming.state.StateStore
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.test.SharedSQLContext
@@ -86,7 +84,6 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
   }
 
   protected val defaultTrigger = Trigger.ProcessingTime(0)
-  protected val defaultUseV2Sink = false
 
   /** How long to wait for an active stream to catch up when checking a result. */
   val streamingTimeout = 60.seconds
@@ -327,8 +324,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
    */
   def testStream(
       _stream: Dataset[_],
-      outputMode: OutputMode = OutputMode.Append,
-      useV2Sink: Boolean = defaultUseV2Sink)(actions: StreamAction*): Unit = synchronized {
+      outputMode: OutputMode = OutputMode.Append)(actions: StreamAction*): Unit = synchronized {
     import org.apache.spark.sql.streaming.util.StreamManualClock
 
     // `synchronized` is added to prevent the user from calling multiple `testStream`s concurrently
@@ -341,7 +337,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
     var currentStream: StreamExecution = null
     var lastStream: StreamExecution = null
     val awaiting = new mutable.HashMap[Int, Offset]() // source index -> offset to wait for
-    val sink = if (useV2Sink) new MemorySinkV2 else new MemorySink(stream.schema, outputMode)
+    val sink = new MemorySink
     val resetConfValues = mutable.Map[String, Option[String]]()
     val defaultCheckpointLocation =
       Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
@@ -391,10 +387,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
       }
 
     def testState = {
-      val sinkDebugString = sink match {
-        case s: MemorySink => s.toDebugString
-        case s: MemorySinkV2 => s.toDebugString
-      }
+      val sinkDebugString = sink.toDebugString
+
       s"""
          |== Progress ==
          |$testActions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 81b22be0b8bf..134e61ed12a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.execution.streaming.state.StreamingAggregationStateManager
 import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 6b711f97431a..422223b9522d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -179,7 +179,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     val listeners = (1 to 5).map(_ => new EventCollector)
     try {
       listeners.foreach(listener => spark.streams.addListener(listener))
-      testStream(df, OutputMode.Append, useV2Sink = true)(
+      testStream(df, OutputMode.Append)(
         StartStream(Trigger.Continuous(1000)),
         StopStream,
         AssertOnQuery { query =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 97a6ba84e7c8..13976ee1e2da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.CountDownLatch
 
 import scala.collection.mutable
 
-import org.apache.commons.io.{FileUtils, IOUtils}
+import org.apache.commons.io.FileUtils
 import org.apache.commons.lang3.RandomStringUtils
 import org.apache.hadoop.fs.Path
 import org.scalactic.TolerantNumerics
@@ -30,13 +30,13 @@ import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.mockito.MockitoSugar
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
 import org.apache.spark.sql.catalyst.expressions.{Literal, Rand, Randn, Shuffle, Uuid}
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.sources.TestForeachWriter
+import org.apache.spark.sql.execution.streaming.sources.{MemorySink, TestForeachWriter}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.reader.InputPartition
@@ -498,7 +498,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
   test("input row calculation with same V2 source used twice in self-union") {
     val streamInput = MemoryStream[Int]
 
-    testStream(streamInput.toDF().union(streamInput.toDF()), useV2Sink = true)(
+    testStream(streamInput.toDF().union(streamInput.toDF()))(
       AddData(streamInput, 1, 2, 3),
       CheckAnswer(1, 1, 2, 2, 3, 3),
       AssertOnQuery { q =>
@@ -519,7 +519,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       // relation, which breaks exchange reuse, as the optimizer will remove Project from one side.
       // Here we manually add a useful Project, to trigger exchange reuse.
       val streamDF = memoryStream.toDF().select('value + 0 as "v")
-      testStream(streamDF.join(streamDF, "v"), useV2Sink = true)(
+      testStream(streamDF.join(streamDF, "v"))(
         AddData(memoryStream, 1, 2, 3),
         CheckAnswer(1, 2, 3),
         check
@@ -556,7 +556,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val streamInput1 = MemoryStream[Int]
     val streamInput2 = MemoryStream[Int]
 
-    testStream(streamInput1.toDF().union(streamInput2.toDF()), useV2Sink = true)(
+    testStream(streamInput1.toDF().union(streamInput2.toDF()))(
       AddData(streamInput1, 1, 2, 3),
       CheckLastBatch(1, 2, 3),
       AssertOnQuery { q =>
@@ -587,7 +587,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val streamInput = MemoryStream[Int]
     val staticInputDF = spark.createDataFrame(Seq(1 -> "1", 2 -> "2")).toDF("value", "anotherValue")
 
-    testStream(streamInput.toDF().join(staticInputDF, "value"), useV2Sink = true)(
+    testStream(streamInput.toDF().join(staticInputDF, "value"))(
       AddData(streamInput, 1, 2, 3),
       AssertOnQuery { q =>
         q.processAllAvailable()
@@ -609,7 +609,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val streamInput2 = MemoryStream[Int]
     val staticInputDF2 = staticInputDF.union(staticInputDF).cache()
 
-    testStream(streamInput2.toDF().join(staticInputDF2, "value"), useV2Sink = true)(
+    testStream(streamInput2.toDF().join(staticInputDF2, "value"))(
       AddData(streamInput2, 1, 2, 3),
       AssertOnQuery { q =>
         q.processAllAvailable()
@@ -717,8 +717,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
         q3.processAllAvailable()
       }
       assert(e.getCause.isInstanceOf[SparkException])
-      assert(e.getCause.getCause.isInstanceOf[IllegalStateException])
-      assert(e.getMessage.contains("StreamingQuery cannot be used in executors"))
+      assert(e.getCause.getCause.getCause.isInstanceOf[IllegalStateException])
+      TestUtils.assertExceptionMsg(e, "StreamingQuery cannot be used in executors")
     } finally {
       q1.stop()
       q2.stop()
@@ -912,7 +912,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       AssertOnQuery(_.logicalPlan.toJSON.contains("StreamingDataSourceV2Relation"))
     )
 
-    testStream(df, useV2Sink = true)(
+    testStream(df)(
       StartStream(trigger = Trigger.Continuous(100)),
       AssertOnQuery(_.logicalPlan.toJSON.contains("StreamingDataSourceV2Relation"))
     )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala
index 10bea7f09057..59d6ac0af52a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala
@@ -34,7 +34,7 @@ class ContinuousQueryStatusAndProgressSuite extends ContinuousSuiteBase {
     }
 
     val trigger = Trigger.Continuous(100)
-    testStream(input.toDF(), useV2Sink = true)(
+    testStream(input.toDF())(
       StartStream(trigger),
       Execute(assertStatus),
       AddData(input, 0, 1, 2),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index d2e489a7d4ad..9840c7f06678 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -57,7 +57,6 @@ class ContinuousSuiteBase extends StreamTest {
   protected val longContinuousTrigger = Trigger.Continuous("1 hour")
 
   override protected val defaultTrigger = Trigger.Continuous(100)
-  override protected val defaultUseV2Sink = true
 }
 
 class ContinuousSuite extends ContinuousSuiteBase {
@@ -239,7 +238,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .load()
       .select('value)
 
-    testStream(df, useV2Sink = true)(
+    testStream(df)(
       StartStream(longContinuousTrigger),
       AwaitEpoch(0),
       Execute(waitForRateSourceTriggers(_, 10)),
@@ -257,7 +256,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .load()
       .select('value)
 
-    testStream(df, useV2Sink = true)(
+    testStream(df)(
       StartStream(Trigger.Continuous(2012)),
       AwaitEpoch(0),
       Execute(waitForRateSourceTriggers(_, 10)),
@@ -274,7 +273,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .load()
       .select('value)
 
-    testStream(df, useV2Sink = true)(
+    testStream(df)(
       StartStream(Trigger.Continuous(1012)),
       AwaitEpoch(2),
       StopStream,
@@ -365,7 +364,7 @@ class ContinuousEpochBacklogSuite extends ContinuousSuiteBase {
         .load()
         .select('value)
 
-      testStream(df, useV2Sink = true)(
+      testStream(df)(
         StartStream(Trigger.Continuous(1)),
         ExpectFailure[IllegalStateException] { e =>
           e.getMessage.contains("queue has exceeded its maximum")

From 75b40a53d3fb49ccadf032dd8e75c554c67676d2 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Mon, 29 Apr 2019 10:14:59 -0700
Subject: [PATCH 0983/1072] [SPARK-27575][CORE][YARN] Yarn file-related confs
 should merge new value with existing value

## What changes were proposed in this pull request?

This patch fixes a bug which YARN file-related configurations are being overwritten when there're some values to assign - e.g. if `--file` is specified as an argument, `spark.yarn.dist.files` is overwritten with the value of argument. After this patch the existing value and new value will be merged before assigning to the value of configuration.

## How was this patch tested?

Added UT, and manually tested with below command:

> ./bin/spark-submit --verbose --files /etc/spark2/conf/spark-defaults.conf.template --master yarn-cluster --class org.apache.spark.examples.SparkPi examples/jars/spark-examples_2.11-2.4.0.jar 10

where the spark conf file has

`spark.yarn.dist.files=file:/etc/spark2/conf/atlas-application.properties.yarn#atlas-application.properties`

```
Spark config:
...
(spark.yarn.dist.files,file:/etc/spark2/conf/atlas-application.properties.yarn#atlas-application.properties,file:///etc/spark2/conf/spark-defaults.conf.template)
...
```

Closes #24465 from HeartSaVioR/SPARK-27575.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 23 ++++--
 .../spark/deploy/SparkSubmitSuite.scala       | 80 ++++++++++++++++++-
 2 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 9efaaa765b5d..49d939539719 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -544,10 +544,14 @@ private[spark] class SparkSubmit extends Logging {
 
       // Yarn only
       OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.queue"),
-      OptionAssigner(args.pyFiles, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.pyFiles"),
-      OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.jars"),
-      OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.files"),
-      OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.archives"),
+      OptionAssigner(args.pyFiles, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.pyFiles",
+        mergeFn = Some(mergeFileLists(_, _))),
+      OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.jars",
+        mergeFn = Some(mergeFileLists(_, _))),
+      OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.files",
+        mergeFn = Some(mergeFileLists(_, _))),
+      OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.archives",
+        mergeFn = Some(mergeFileLists(_, _))),
 
       // Other options
       OptionAssigner(args.numExecutors, YARN | KUBERNETES, ALL_DEPLOY_MODES,
@@ -608,7 +612,13 @@ private[spark] class SparkSubmit extends Logging {
           (deployMode & opt.deployMode) != 0 &&
           (clusterManager & opt.clusterManager) != 0) {
         if (opt.clOption != null) { childArgs += (opt.clOption, opt.value) }
-        if (opt.confKey != null) { sparkConf.set(opt.confKey, opt.value) }
+        if (opt.confKey != null) {
+          if (opt.mergeFn.isDefined && sparkConf.contains(opt.confKey)) {
+            sparkConf.set(opt.confKey, opt.mergeFn.get.apply(sparkConf.get(opt.confKey), opt.value))
+          } else {
+            sparkConf.set(opt.confKey, opt.value)
+          }
+        }
       }
     }
 
@@ -1381,7 +1391,8 @@ private case class OptionAssigner(
     clusterManager: Int,
     deployMode: Int,
     clOption: String = null,
-    confKey: String = null)
+    confKey: String = null,
+    mergeFn: Option[(String, String) => String] = None)
 
 private[spark] trait SparkSubmitOperation {
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 2a172453da40..ef6213e3d8ff 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.deploy
 
 import java.io._
-import java.net.URI
+import java.net.{URI, URL}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, Paths}
 
-import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.io.Source
 
@@ -897,6 +896,83 @@ class SparkSubmitSuite
     }
   }
 
+  test("SPARK-27575: yarn confs should merge new value with existing value") {
+    val tmpJarDir = Utils.createTempDir()
+    val jar1 = TestUtils.createJarWithFiles(Map("test.resource" -> "1"), tmpJarDir)
+    val jar2 = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpJarDir)
+
+    val tmpJarDirYarnOpt = Utils.createTempDir()
+    val jar1YarnOpt = TestUtils.createJarWithFiles(Map("test.resource" -> "2"), tmpJarDirYarnOpt)
+    val jar2YarnOpt = TestUtils.createJarWithFiles(Map("test.resource" -> "USER2"),
+      tmpJarDirYarnOpt)
+
+    val tmpFileDir = Utils.createTempDir()
+    val file1 = File.createTempFile("tmpFile1", "", tmpFileDir)
+    val file2 = File.createTempFile("tmpFile2", "", tmpFileDir)
+
+    val tmpFileDirYarnOpt = Utils.createTempDir()
+    val file1YarnOpt = File.createTempFile("tmpPy1YarnOpt", ".py", tmpFileDirYarnOpt)
+    val file2YarnOpt = File.createTempFile("tmpPy2YarnOpt", ".egg", tmpFileDirYarnOpt)
+
+    val tmpPyFileDir = Utils.createTempDir()
+    val pyFile1 = File.createTempFile("tmpPy1", ".py", tmpPyFileDir)
+    val pyFile2 = File.createTempFile("tmpPy2", ".egg", tmpPyFileDir)
+
+    val tmpPyFileDirYarnOpt = Utils.createTempDir()
+    val pyFile1YarnOpt = File.createTempFile("tmpPy1YarnOpt", ".py", tmpPyFileDirYarnOpt)
+    val pyFile2YarnOpt = File.createTempFile("tmpPy2YarnOpt", ".egg", tmpPyFileDirYarnOpt)
+
+    val tmpArchiveDir = Utils.createTempDir()
+    val archive1 = File.createTempFile("archive1", ".zip", tmpArchiveDir)
+    val archive2 = File.createTempFile("archive2", ".zip", tmpArchiveDir)
+
+    val tmpArchiveDirYarnOpt = Utils.createTempDir()
+    val archive1YarnOpt = File.createTempFile("archive1YarnOpt", ".zip", tmpArchiveDirYarnOpt)
+    val archive2YarnOpt = File.createTempFile("archive2YarnOpt", ".zip", tmpArchiveDirYarnOpt)
+
+    val tempPyFile = File.createTempFile("tmpApp", ".py")
+    tempPyFile.deleteOnExit()
+
+    val args = Seq(
+      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+      "--name", "testApp",
+      "--master", "yarn",
+      "--deploy-mode", "client",
+      "--jars", s"${tmpJarDir.getAbsolutePath}/*.jar",
+      "--files", s"${tmpFileDir.getAbsolutePath}/tmpFile*",
+      "--py-files", s"${tmpPyFileDir.getAbsolutePath}/tmpPy*",
+      "--archives", s"${tmpArchiveDir.getAbsolutePath}/*.zip",
+      "--conf", "spark.yarn.dist.files=" +
+        s"${Seq(file1YarnOpt, file2YarnOpt).map(_.toURI.toString).mkString(",")}",
+      "--conf", "spark.yarn.dist.pyFiles=" +
+        s"${Seq(pyFile1YarnOpt, pyFile2YarnOpt).map(_.toURI.toString).mkString(",")}",
+      "--conf", "spark.yarn.dist.jars=" +
+        s"${Seq(jar1YarnOpt, jar2YarnOpt).map(_.toURI.toString).mkString(",")}",
+      "--conf", "spark.yarn.dist.archives=" +
+        s"${Seq(archive1YarnOpt, archive2YarnOpt).map(_.toURI.toString).mkString(",")}",
+      tempPyFile.toURI().toString())
+
+    def assertEqualsWithURLs(expected: Set[URL], confValue: String): Unit = {
+      val confValPaths = confValue.split(",").map(new Path(_)).toSet
+      assert(expected.map(u => new Path(u.toURI)) === confValPaths)
+    }
+
+    def assertEqualsWithFiles(expected: Set[File], confValue: String): Unit = {
+      assertEqualsWithURLs(expected.map(_.toURI.toURL), confValue)
+    }
+
+    val appArgs = new SparkSubmitArguments(args)
+    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+    assertEqualsWithURLs(
+      Set(jar1, jar2, jar1YarnOpt, jar2YarnOpt), conf.get("spark.yarn.dist.jars"))
+    assertEqualsWithFiles(
+      Set(file1, file2, file1YarnOpt, file2YarnOpt), conf.get("spark.yarn.dist.files"))
+    assertEqualsWithFiles(
+      Set(pyFile1, pyFile2, pyFile1YarnOpt, pyFile2YarnOpt), conf.get("spark.yarn.dist.pyFiles"))
+    assertEqualsWithFiles(Set(archive1, archive2, archive1YarnOpt, archive2YarnOpt),
+      conf.get("spark.yarn.dist.archives"))
+  }
+
   // scalastyle:on println
 
   private def checkDownloadedFile(sourcePath: String, outputPath: String): Unit = {

From 618d6bff71073c8c93501ab7392c3cc579730f0b Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Mon, 29 Apr 2019 16:24:49 -0700
Subject: [PATCH 0984/1072] [SPARK-27588] Binary file data source fails fast
 and doesn't attempt to read very large files

## What changes were proposed in this pull request?

If a file is too big (>2GB), we should fail fast and do not try to read the file.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24483 from mengxr/SPARK-27588.

Authored-by: Xiangrui Meng <meng@databricks.com>
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  8 +++++
 .../binaryfile/BinaryFileFormat.scala         |  8 +++++
 .../binaryfile/BinaryFileFormatSuite.scala    | 31 ++++++++++++++++++-
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 96d3f5c046fd..87bce1fd7fb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1744,6 +1744,14 @@ object SQLConf {
          "and from_utc_timestamp() functions.")
     .booleanConf
     .createWithDefault(false)
+
+  val SOURCES_BINARY_FILE_MAX_LENGTH = buildConf("spark.sql.sources.binaryFile.maxLength")
+    .doc("The max length of a file that can be read by the binary file data source. " +
+      "Spark will fail fast and not attempt to read the file if its length exceeds this value. " +
+      "The theoretical max is Int.MaxValue, though VMs might implement a smaller max.")
+    .internal()
+    .intConf
+    .createWithDefault(Int.MaxValue)
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
index db9326817f66..263778455493 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -24,11 +24,13 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, GlobFilter, Path}
 import org.apache.hadoop.mapreduce.Job
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
+import org.apache.spark.sql.internal.SQLConf.SOURCES_BINARY_FILE_MAX_LENGTH
 import org.apache.spark.sql.sources.{And, DataSourceRegister, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Not, Or}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -99,6 +101,7 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
     val binaryFileSourceOptions = new BinaryFileSourceOptions(options)
     val pathGlobPattern = binaryFileSourceOptions.pathGlobFilter
     val filterFuncs = filters.map(filter => createFilterFunction(filter))
+    val maxLength = sparkSession.conf.get(SOURCES_BINARY_FILE_MAX_LENGTH)
 
     file: PartitionedFile => {
       val path = new Path(file.filePath)
@@ -115,6 +118,11 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
             case (MODIFICATION_TIME, i) =>
               writer.write(i, DateTimeUtils.fromMillis(status.getModificationTime))
             case (CONTENT, i) =>
+              if (status.getLen > maxLength) {
+                throw new SparkException(
+                  s"The length of ${status.getPath} is ${status.getLen}, " +
+                    s"which exceeds the max length allowed: ${maxLength}.")
+              }
               val stream = fs.open(status.getPath)
               try {
                 writer.write(i, ByteStreams.toByteArray(stream))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index fb83c3c1dccc..01dc96c716ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -27,10 +27,12 @@ import com.google.common.io.{ByteStreams, Closeables}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, GlobFilter, Path}
 import org.mockito.Mockito.{mock, when}
 
-import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.internal.SQLConf.SOURCES_BINARY_FILE_MAX_LENGTH
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
 import org.apache.spark.sql.types._
@@ -339,4 +341,31 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
     assert(df.select("LENGTH").first().getLong(0) === content.length,
       "column pruning should be case insensitive")
   }
+
+  test("fail fast and do not attempt to read if a file is too big") {
+    assert(spark.conf.get(SOURCES_BINARY_FILE_MAX_LENGTH) === Int.MaxValue)
+    withTempPath { file =>
+      val path = file.getPath
+      val content = "123".getBytes
+      Files.write(file.toPath, content, StandardOpenOption.CREATE, StandardOpenOption.WRITE)
+      def readContent(): DataFrame = {
+        spark.read.format(BINARY_FILE)
+          .load(path)
+          .select(CONTENT)
+      }
+      val expected = Seq(Row(content))
+      QueryTest.checkAnswer(readContent(), expected)
+      withSQLConf(SOURCES_BINARY_FILE_MAX_LENGTH.key -> content.length.toString) {
+        QueryTest.checkAnswer(readContent(), expected)
+      }
+      // Disable read. If the implementation attempts to read, the exception would be different.
+      file.setReadable(false)
+      val caught = intercept[SparkException] {
+        withSQLConf(SOURCES_BINARY_FILE_MAX_LENGTH.key -> (content.length - 1).toString) {
+          QueryTest.checkAnswer(readContent(), expected)
+        }
+      }
+      assert(caught.getMessage.contains("exceeds the max length allowed"))
+    }
+  }
 }

From 7432e7ded44cc0014590d229827546f5d8f93868 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 30 Apr 2019 10:35:23 +0800
Subject: [PATCH 0985/1072] [SPARK-24935][SQL][FOLLOWUP] support INIT -> UPDATE
 -> MERGE -> FINISH in Hive UDAF adapter

## What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/24144 . #24144 missed one case: when hash aggregate fallback to sort aggregate, the life cycle of UDAF is: INIT -> UPDATE -> MERGE -> FINISH.

However, not all Hive UDAF can support it. Hive UDAF knows the aggregation mode when creating the aggregation buffer, so that it can create different buffers for different inputs: the original data or the aggregation buffer. Please see an example in the [sketches library](https://github.com/DataSketches/sketches-hive/blob/7f9e76e9e03807277146291beb2c7bec40e8672b/src/main/java/com/yahoo/sketches/hive/cpc/DataToSketchUDAF.java#L107). The buffer for UPDATE may not support MERGE.

This PR updates the Hive UDAF adapter in Spark to support INIT -> UPDATE -> MERGE -> FINISH, by turning it to  INIT -> UPDATE -> FINISH + IINIT -> MERGE -> FINISH.

## How was this patch tested?

a new test case

Closes #24459 from cloud-fan/hive-udaf.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/hive/hiveUDFs.scala  | 54 +++++++++++++------
 .../sql/hive/execution/HiveUDAFSuite.scala    | 38 ++++++++-----
 2 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 0938576a7147..76e4085712bd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -304,6 +304,13 @@ private[hive] case class HiveGenericUDTF(
  *  - `wrap()`/`wrapperFor()`: from 3 to 1
  *  - `unwrap()`/`unwrapperFor()`: from 1 to 3
  *  - `GenericUDAFEvaluator.terminatePartial()`: from 2 to 3
+ *
+ *  Note that, Hive UDAF is initialized with aggregate mode, and some specific Hive UDAFs can't
+ *  mix UPDATE and MERGE actions during its life cycle. However, Spark may do UPDATE on a UDAF and
+ *  then do MERGE, in case of hash aggregate falling back to sort aggregate. To work around this
+ *  issue, we track the ability to do MERGE in the Hive UDAF aggregate buffer. If Spark does
+ *  UPDATE then MERGE, we can detect it and re-create the aggregate buffer with a different
+ *  aggregate mode.
  */
 private[hive] case class HiveUDAFFunction(
     name: String,
@@ -312,7 +319,7 @@ private[hive] case class HiveUDAFFunction(
     isUDAFBridgeRequired: Boolean = false,
     mutableAggBufferOffset: Int = 0,
     inputAggBufferOffset: Int = 0)
-  extends TypedImperativeAggregate[GenericUDAFEvaluator.AggregationBuffer]
+  extends TypedImperativeAggregate[HiveUDAFBuffer]
   with HiveInspectors
   with UserDefinedExpression {
 
@@ -410,55 +417,70 @@ private[hive] case class HiveUDAFFunction(
   // aggregate buffer. However, the Spark UDAF framework does not expose this information when
   // creating the buffer. Here we return null, and create the buffer in `update` and `merge`
   // on demand, so that we can know what input we are dealing with.
-  override def createAggregationBuffer(): AggregationBuffer = null
+  override def createAggregationBuffer(): HiveUDAFBuffer = null
 
   @transient
   private lazy val inputProjection = UnsafeProjection.create(children)
 
-  override def update(buffer: AggregationBuffer, input: InternalRow): AggregationBuffer = {
+  override def update(buffer: HiveUDAFBuffer, input: InternalRow): HiveUDAFBuffer = {
     // The input is original data, we create buffer with the partial1 evaluator.
     val nonNullBuffer = if (buffer == null) {
-      partial1HiveEvaluator.evaluator.getNewAggregationBuffer
+      HiveUDAFBuffer(partial1HiveEvaluator.evaluator.getNewAggregationBuffer, false)
     } else {
       buffer
     }
 
+    assert(!nonNullBuffer.canDoMerge, "can not call `merge` then `update` on a Hive UDAF.")
+
     partial1HiveEvaluator.evaluator.iterate(
-      nonNullBuffer, wrap(inputProjection(input), inputWrappers, cached, inputDataTypes))
+      nonNullBuffer.buf, wrap(inputProjection(input), inputWrappers, cached, inputDataTypes))
     nonNullBuffer
   }
 
-  override def merge(buffer: AggregationBuffer, input: AggregationBuffer): AggregationBuffer = {
+  override def merge(buffer: HiveUDAFBuffer, input: HiveUDAFBuffer): HiveUDAFBuffer = {
     // The input is aggregate buffer, we create buffer with the final evaluator.
     val nonNullBuffer = if (buffer == null) {
-      finalHiveEvaluator.evaluator.getNewAggregationBuffer
+      HiveUDAFBuffer(finalHiveEvaluator.evaluator.getNewAggregationBuffer, true)
     } else {
       buffer
     }
 
+    // It's possible that we've called `update` of this Hive UDAF, and some specific Hive UDAF
+    // implementation can't mix the `update` and `merge` calls during its life cycle. To work
+    // around it, here we create a fresh buffer with final evaluator, and merge the existing buffer
+    // to it, and replace the existing buffer with it.
+    val mergeableBuf = if (!nonNullBuffer.canDoMerge) {
+      val newBuf = finalHiveEvaluator.evaluator.getNewAggregationBuffer
+      finalHiveEvaluator.evaluator.merge(
+        newBuf, partial1HiveEvaluator.evaluator.terminatePartial(nonNullBuffer.buf))
+      HiveUDAFBuffer(newBuf, true)
+    } else {
+      nonNullBuffer
+    }
+
     // The 2nd argument of the Hive `GenericUDAFEvaluator.merge()` method is an input aggregation
     // buffer in the 3rd format mentioned in the ScalaDoc of this class. Originally, Hive converts
     // this `AggregationBuffer`s into this format before shuffling partial aggregation results, and
     // calls `GenericUDAFEvaluator.terminatePartial()` to do the conversion.
     finalHiveEvaluator.evaluator.merge(
-      nonNullBuffer, partial1HiveEvaluator.evaluator.terminatePartial(input))
-    nonNullBuffer
+      mergeableBuf.buf, partial1HiveEvaluator.evaluator.terminatePartial(input.buf))
+    mergeableBuf
   }
 
-  override def eval(buffer: AggregationBuffer): Any = {
-    resultUnwrapper(finalHiveEvaluator.evaluator.terminate(buffer))
+  override def eval(buffer: HiveUDAFBuffer): Any = {
+    resultUnwrapper(finalHiveEvaluator.evaluator.terminate(buffer.buf))
   }
 
-  override def serialize(buffer: AggregationBuffer): Array[Byte] = {
+  override def serialize(buffer: HiveUDAFBuffer): Array[Byte] = {
     // Serializes an `AggregationBuffer` that holds partial aggregation results so that we can
     // shuffle it for global aggregation later.
-    aggBufferSerDe.serialize(buffer)
+    aggBufferSerDe.serialize(buffer.buf)
   }
 
-  override def deserialize(bytes: Array[Byte]): AggregationBuffer = {
+  override def deserialize(bytes: Array[Byte]): HiveUDAFBuffer = {
     // Deserializes an `AggregationBuffer` from the shuffled partial aggregation phase to prepare
     // for global aggregation by merging multiple partial aggregation results within a single group.
-    aggBufferSerDe.deserialize(bytes)
+    HiveUDAFBuffer(aggBufferSerDe.deserialize(bytes), false)
   }
 
   // Helper class used to de/serialize Hive UDAF `AggregationBuffer` objects
@@ -506,3 +528,5 @@ private[hive] case class HiveUDAFFunction(
     }
   }
 }
+
+case class HiveUDAFBuffer(buf: AggregationBuffer, canDoMerge: Boolean)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
index ef40323c6131..3252cdafa1be 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
@@ -28,10 +28,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo
 import test.org.apache.spark.sql.MyDoubleAvg
 
-import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 
 class HiveUDAFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
@@ -94,21 +94,33 @@ class HiveUDAFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
     ))
   }
 
-  test("customized Hive UDAF with two aggregation buffers") {
-    val df = sql("SELECT key % 2, mock2(value) FROM t GROUP BY key % 2")
+  test("SPARK-24935: customized Hive UDAF with two aggregation buffers") {
+    withTempView("v") {
+      spark.range(100).createTempView("v")
+      val df = sql("SELECT id % 2, mock2(id) FROM v GROUP BY id % 2")
 
-    val aggs = df.queryExecution.executedPlan.collect {
-      case agg: ObjectHashAggregateExec => agg
-    }
+      val aggs = df.queryExecution.executedPlan.collect {
+        case agg: ObjectHashAggregateExec => agg
+      }
 
-    // There should be two aggregate operators, one for partial aggregation, and the other for
-    // global aggregation.
-    assert(aggs.length == 2)
+      // There should be two aggregate operators, one for partial aggregation, and the other for
+      // global aggregation.
+      assert(aggs.length == 2)
 
-    checkAnswer(df, Seq(
-      Row(0, Row(1, 1)),
-      Row(1, Row(1, 1))
-    ))
+      withSQLConf(SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "1") {
+        checkAnswer(df, Seq(
+          Row(0, Row(50, 0)),
+          Row(1, Row(50, 0))
+        ))
+      }
+
+      withSQLConf(SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "100") {
+        checkAnswer(df, Seq(
+          Row(0, Row(50, 0)),
+          Row(1, Row(50, 0))
+        ))
+      }
+    }
   }
 
   test("call JAVA UDAF") {

From 25ee0474f47d9c30d6f553a7892d9549f91071cf Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Mon, 29 Apr 2019 19:47:20 -0700
Subject: [PATCH 0986/1072] [SPARK-26936][MINOR][FOLLOWUP] Don't need the
 JobConf anymore, it seems

## What changes were proposed in this pull request?

On a second look in comments, seems like the JobConf isn't needed anymore here. It was used inconsistently before, it seems, and I don't see any reason a Hadoop Job config is required here anyway.

## How was this patch tested?

Existing tests.

Closes #24491 from srowen/SPARK-26936.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/hive/execution/InsertIntoHiveDirCommand.scala   | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
index 24a67f989231..b66c302a7d7e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
@@ -22,7 +22,6 @@ import org.apache.hadoop.hive.common.FileUtils
 import org.apache.hadoop.hive.ql.plan.TableDesc
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
-import org.apache.hadoop.mapred._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{Row, SparkSession}
@@ -80,13 +79,12 @@ case class InsertIntoHiveDirCommand(
     )
 
     val hadoopConf = sparkSession.sessionState.newHadoopConf()
-    val jobConf = new JobConf(hadoopConf)
 
     val targetPath = new Path(storage.locationUri.get)
     val qualifiedPath = FileUtils.makeQualified(targetPath, hadoopConf)
     val (writeToPath: Path, fs: FileSystem) =
       if (isLocal) {
-        val localFileSystem = FileSystem.getLocal(jobConf)
+        val localFileSystem = FileSystem.getLocal(hadoopConf)
         (localFileSystem.makeQualified(targetPath), localFileSystem)
       } else {
         val dfs = qualifiedPath.getFileSystem(hadoopConf)

From a35043c9e22a9bd9e372246c8d337e016736536c Mon Sep 17 00:00:00 2001
From: Artem Kalchenko <artem.kalchenko@gmail.com>
Date: Wed, 1 May 2019 08:27:51 +0900
Subject: [PATCH 0987/1072] [SPARK-27591][SQL] Fix UnivocityParser for
 UserDefinedType

## What changes were proposed in this pull request?

Fix bug in UnivocityParser. makeConverter method didn't work correctly for UsedDefinedType

## How was this patch tested?

A test suite for UnivocityParser has been extended.

Closes #24496 from kalkolab/spark-27591.

Authored-by: Artem Kalchenko <artem.kalchenko@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/csv/UnivocityParser.scala    |  2 +-
 .../catalyst/csv/UnivocityParserSuite.scala   | 35 +++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index b26044e483e5..8456b7d218ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -166,7 +166,7 @@ class UnivocityParser(
     case _: StringType => (d: String) =>
       nullSafeDatum(d, name, nullable, options)(UTF8String.fromString)
 
-    case udt: UserDefinedType[_] => (datum: String) =>
+    case udt: UserDefinedType[_] =>
       makeConverter(name, udt.sqlType, nullable)
 
     // We don't actually hit this exception though, we keep it for understandability
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 986de122c8d2..933c5764d77d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -231,4 +231,39 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
 
     Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalParsing)
   }
+
+  test("SPARK-27591 UserDefinedType can be read") {
+
+    @SQLUserDefinedType(udt = classOf[StringBasedUDT])
+    case class NameId(name: String, id: Int)
+
+    class StringBasedUDT extends UserDefinedType[NameId] {
+      override def sqlType: DataType = StringType
+
+      override def serialize(obj: NameId): Any = s"${obj.name}\t${obj.id}"
+
+      override def deserialize(datum: Any): NameId = datum match {
+        case s: String =>
+          val split = s.split("\t")
+          if (split.length != 2) throw new RuntimeException(s"Can't parse $s into NameId");
+          NameId(split(0), Integer.parseInt(split(1)))
+        case _ => throw new RuntimeException(s"Can't parse $datum into NameId");
+      }
+
+      override def userClass: Class[NameId] = classOf[NameId]
+    }
+
+    object StringBasedUDT extends StringBasedUDT
+
+    val input = "name\t42"
+    val expected = UTF8String.fromString(input)
+
+    val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+    val parser = new UnivocityParser(StructType(Seq.empty), options)
+
+    val convertedValue = parser.makeConverter("_1", StringBasedUDT, nullable = false).apply(input)
+
+    assert(convertedValue.isInstanceOf[UTF8String])
+    assert(convertedValue == expected)
+  }
 }

From 6eca435ac9d068a9500dac526b825c0ca574f3fd Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 30 Apr 2019 19:15:56 -0700
Subject: [PATCH 0988/1072] [SPARK-27608][BUILD][test-maven] Upgrade Surefire
 plugin to 3.0.0-M3

## What changes were proposed in this pull request?

This PR aims to upgrade Surefire plugin to 3.0.0-M3 to bring [SUREFIRE-1613](https://issues.apache.org/jira/browse/SUREFIRE-1613).

## How was this patch tested?

Pass the Jenkins with the existing tests.

Closes #24501 from dongjoon-hyun/SPARK-27608.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 6de33d45975b..77c967c5cfca 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2232,7 +2232,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>3.0.0-M2</version>
+          <version>3.0.0-M3</version>
           <!-- Note config is repeated in scalatest config -->
           <configuration>
             <includes>

From 83751035685c84c681e88ac6e55fbcc9d6d37ef5 Mon Sep 17 00:00:00 2001
From: sangramga <sangramga@gmail.com>
Date: Wed, 1 May 2019 11:26:18 -0500
Subject: [PATCH 0989/1072] [SPARK-27557][DOC] Add copy button to Python API
 docs for easier copying of code-blocks

## What changes were proposed in this pull request?

Add a non-intrusive button for python API documentation, which will remove ">>>" prompts and outputs of code - for easier copying of code.

For example: The below code-snippet in the document is difficult to copy due to ">>>" prompts
```
>>> l = [('Alice', 1)]
>>> spark.createDataFrame(l).collect()
[Row(_1='Alice', _2=1)]

```
Becomes this - After the copybutton in the corner of of code-block is pressed - which is easier to copy
```
l = [('Alice', 1)]
spark.createDataFrame(l).collect()
```

![image](https://user-images.githubusercontent.com/9406431/56715817-560c3600-6756-11e9-8bae-58a3d2d57df3.png)

## File changes
Made changes to python/docs/conf.py and copybutton.js - thus only modifying sphinx frontend and no changes were made to the documentation itself- Build process for documentation remains the same.

copybutton.js -> This JS snippet was taken from the official python.org documentation site.

## How was this patch tested?
NA

Closes #24456 from sangramga/copybutton.

Authored-by: sangramga <sangramga@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 LICENSE                           |   4 +-
 licenses/LICENSE-copybutton.txt   |  49 +++++
 licenses/LICENSE-heapq.txt        | 327 +++++-------------------------
 python/docs/_static/copybutton.js |  66 ++++++
 python/docs/conf.py               |   2 +
 5 files changed, 167 insertions(+), 281 deletions(-)
 create mode 100644 licenses/LICENSE-copybutton.txt
 create mode 100644 python/docs/_static/copybutton.js

diff --git a/LICENSE b/LICENSE
index b771bd552b76..150ccc54ec6c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -222,7 +222,7 @@ Python Software Foundation License
 ----------------------------------
 
 pyspark/heapq3.py
-
+python/docs/_static/copybutton.js
 
 BSD 3-Clause
 ------------
@@ -258,4 +258,4 @@ data/mllib/images/kittens/29.5.a_b_EGDP022204.jpg
 data/mllib/images/kittens/54893.jpg
 data/mllib/images/kittens/DP153539.jpg
 data/mllib/images/kittens/DP802813.jpg
-data/mllib/images/multi-channel/chr30.4.184.jpg
\ No newline at end of file
+data/mllib/images/multi-channel/chr30.4.184.jpg
diff --git a/licenses/LICENSE-copybutton.txt b/licenses/LICENSE-copybutton.txt
new file mode 100644
index 000000000000..45be6b83a53b
--- /dev/null
+++ b/licenses/LICENSE-copybutton.txt
@@ -0,0 +1,49 @@
+PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+--------------------------------------------
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+("PSF"), and the Individual or Organization ("Licensee") accessing and
+otherwise using this software ("Python") in source or binary form and
+its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby
+grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+analyze, test, perform and/or display publicly, prepare derivative works,
+distribute, and otherwise use Python alone or in any derivative version,
+provided, however, that PSF's License Agreement and PSF's notice of copyright,
+i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Python Software Foundation;
+All Rights Reserved" are retained in Python alone or in any derivative version
+prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python.
+
+4. PSF is making Python available to Licensee on an "AS IS"
+basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between PSF and
+Licensee.  This License Agreement does not grant permission to use PSF
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+
diff --git a/licenses/LICENSE-heapq.txt b/licenses/LICENSE-heapq.txt
index 0c4c4b954bea..45be6b83a53b 100644
--- a/licenses/LICENSE-heapq.txt
+++ b/licenses/LICENSE-heapq.txt
@@ -1,280 +1,49 @@
+PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+--------------------------------------------
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+("PSF"), and the Individual or Organization ("Licensee") accessing and
+otherwise using this software ("Python") in source or binary form and
+its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby
+grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+analyze, test, perform and/or display publicly, prepare derivative works,
+distribute, and otherwise use Python alone or in any derivative version,
+provided, however, that PSF's License Agreement and PSF's notice of copyright,
+i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Python Software Foundation;
+All Rights Reserved" are retained in Python alone or in any derivative version
+prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python.
+
+4. PSF is making Python available to Licensee on an "AS IS"
+basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between PSF and
+Licensee.  This License Agreement does not grant permission to use PSF
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
 
-# A. HISTORY OF THE SOFTWARE
-# ==========================
-#
-# Python was created in the early 1990s by Guido van Rossum at Stichting
-# Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
-# as a successor of a language called ABC.  Guido remains Python's
-# principal author, although it includes many contributions from others.
-#
-# In 1995, Guido continued his work on Python at the Corporation for
-#     National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
-# in Reston, Virginia where he released several versions of the
-# software.
-#
-# In May 2000, Guido and the Python core development team moved to
-# BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
-# year, the PythonLabs team moved to Digital Creations (now Zope
-# Corporation, see http://www.zope.com).  In 2001, the Python Software
-# Foundation (PSF, see http://www.python.org/psf/) was formed, a
-# non-profit organization created specifically to own Python-related
-# Intellectual Property.  Zope Corporation is a sponsoring member of
-# the PSF.
-#
-# All Python releases are Open Source (see http://www.opensource.org for
-# the Open Source Definition).  Historically, most, but not all, Python
-# releases have also been GPL-compatible; the table below summarizes
-# the various releases.
-#
-# Release         Derived     Year        Owner       GPL-
-# from                                compatible? (1)
-#
-# 0.9.0 thru 1.2              1991-1995   CWI         yes
-# 1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
-# 1.6             1.5.2       2000        CNRI        no
-# 2.0             1.6         2000        BeOpen.com  no
-# 1.6.1           1.6         2001        CNRI        yes (2)
-# 2.1             2.0+1.6.1   2001        PSF         no
-# 2.0.1           2.0+1.6.1   2001        PSF         yes
-# 2.1.1           2.1+2.0.1   2001        PSF         yes
-# 2.2             2.1.1       2001        PSF         yes
-# 2.1.2           2.1.1       2002        PSF         yes
-# 2.1.3           2.1.2       2002        PSF         yes
-# 2.2.1           2.2         2002        PSF         yes
-# 2.2.2           2.2.1       2002        PSF         yes
-# 2.2.3           2.2.2       2003        PSF         yes
-# 2.3             2.2.2       2002-2003   PSF         yes
-# 2.3.1           2.3         2002-2003   PSF         yes
-# 2.3.2           2.3.1       2002-2003   PSF         yes
-# 2.3.3           2.3.2       2002-2003   PSF         yes
-# 2.3.4           2.3.3       2004        PSF         yes
-# 2.3.5           2.3.4       2005        PSF         yes
-# 2.4             2.3         2004        PSF         yes
-# 2.4.1           2.4         2005        PSF         yes
-# 2.4.2           2.4.1       2005        PSF         yes
-# 2.4.3           2.4.2       2006        PSF         yes
-# 2.4.4           2.4.3       2006        PSF         yes
-# 2.5             2.4         2006        PSF         yes
-# 2.5.1           2.5         2007        PSF         yes
-# 2.5.2           2.5.1       2008        PSF         yes
-# 2.5.3           2.5.2       2008        PSF         yes
-# 2.6             2.5         2008        PSF         yes
-# 2.6.1           2.6         2008        PSF         yes
-# 2.6.2           2.6.1       2009        PSF         yes
-# 2.6.3           2.6.2       2009        PSF         yes
-# 2.6.4           2.6.3       2009        PSF         yes
-# 2.6.5           2.6.4       2010        PSF         yes
-# 2.7             2.6         2010        PSF         yes
-#
-# Footnotes:
-#
-# (1) GPL-compatible doesn't mean that we're distributing Python under
-# the GPL.  All Python licenses, unlike the GPL, let you distribute
-# a modified version without making your changes open source.  The
-# GPL-compatible licenses make it possible to combine Python with
-#     other software that is released under the GPL; the others don't.
-#
-# (2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
-# because its license has a choice of law clause.  According to
-# CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
-# is "not incompatible" with the GPL.
-#
-# Thanks to the many outside volunteers who have worked under Guido's
-# direction to make these releases possible.
-#
-#
-# B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
-# ===============================================================
-#
-# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
-# --------------------------------------------
-#
-# 1. This LICENSE AGREEMENT is between the Python Software Foundation
-# ("PSF"), and the Individual or Organization ("Licensee") accessing and
-# otherwise using this software ("Python") in source or binary form and
-# its associated documentation.
-#
-# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
-# grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
-# analyze, test, perform and/or display publicly, prepare derivative works,
-# distribute, and otherwise use Python alone or in any derivative version,
-# provided, however, that PSF's License Agreement and PSF's notice of copyright,
-# i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-# 2011, 2012, 2013 Python Software Foundation; All Rights Reserved" are retained
-# in Python alone or in any derivative version prepared by Licensee.
-#
-# 3. In the event Licensee prepares a derivative work that is based on
-# or incorporates Python or any part thereof, and wants to make
-# the derivative work available to others as provided herein, then
-# Licensee hereby agrees to include in any such work a brief summary of
-# the changes made to Python.
-#
-# 4. PSF is making Python available to Licensee on an "AS IS"
-# basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
-# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
-# INFRINGE ANY THIRD PARTY RIGHTS.
-#
-# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
-# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-#
-# 6. This License Agreement will automatically terminate upon a material
-# breach of its terms and conditions.
-#
-# 7. Nothing in this License Agreement shall be deemed to create any
-# relationship of agency, partnership, or joint venture between PSF and
-# Licensee.  This License Agreement does not grant permission to use PSF
-# trademarks or trade name in a trademark sense to endorse or promote
-# products or services of Licensee, or any third party.
-#
-# 8. By copying, installing or otherwise using Python, Licensee
-# agrees to be bound by the terms and conditions of this License
-# Agreement.
-#
-#
-# BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
-# -------------------------------------------
-#
-# BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
-#
-# 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
-# office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
-# Individual or Organization ("Licensee") accessing and otherwise using
-# this software in source or binary form and its associated
-# documentation ("the Software").
-#
-# 2. Subject to the terms and conditions of this BeOpen Python License
-# Agreement, BeOpen hereby grants Licensee a non-exclusive,
-# royalty-free, world-wide license to reproduce, analyze, test, perform
-# and/or display publicly, prepare derivative works, distribute, and
-# otherwise use the Software alone or in any derivative version,
-# provided, however, that the BeOpen Python License is retained in the
-# Software, alone or in any derivative version prepared by Licensee.
-#
-# 3. BeOpen is making the Software available to Licensee on an "AS IS"
-# basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
-# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
-# INFRINGE ANY THIRD PARTY RIGHTS.
-#
-# 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
-# SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
-# AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
-# DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-#
-# 5. This License Agreement will automatically terminate upon a material
-# breach of its terms and conditions.
-#
-# 6. This License Agreement shall be governed by and interpreted in all
-# respects by the law of the State of California, excluding conflict of
-# law provisions.  Nothing in this License Agreement shall be deemed to
-# create any relationship of agency, partnership, or joint venture
-# between BeOpen and Licensee.  This License Agreement does not grant
-# permission to use BeOpen trademarks or trade names in a trademark
-# sense to endorse or promote products or services of Licensee, or any
-# third party.  As an exception, the "BeOpen Python" logos available at
-# http://www.pythonlabs.com/logos.html may be used according to the
-# permissions granted on that web page.
-#
-# 7. By copying, installing or otherwise using the software, Licensee
-# agrees to be bound by the terms and conditions of this License
-# Agreement.
-#
-#
-# CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
-# ---------------------------------------
-#
-# 1. This LICENSE AGREEMENT is between the Corporation for National
-#     Research Initiatives, having an office at 1895 Preston White Drive,
-# Reston, VA 20191 ("CNRI"), and the Individual or Organization
-# ("Licensee") accessing and otherwise using Python 1.6.1 software in
-# source or binary form and its associated documentation.
-#
-# 2. Subject to the terms and conditions of this License Agreement, CNRI
-# hereby grants Licensee a nonexclusive, royalty-free, world-wide
-# license to reproduce, analyze, test, perform and/or display publicly,
-# prepare derivative works, distribute, and otherwise use Python 1.6.1
-# alone or in any derivative version, provided, however, that CNRI's
-# License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
-# 1995-2001 Corporation for National Research Initiatives; All Rights
-# Reserved" are retained in Python 1.6.1 alone or in any derivative
-# version prepared by Licensee.  Alternately, in lieu of CNRI's License
-# Agreement, Licensee may substitute the following text (omitting the
-# quotes): "Python 1.6.1 is made available subject to the terms and
-# conditions in CNRI's License Agreement.  This Agreement together with
-# Python 1.6.1 may be located on the Internet using the following
-# unique, persistent identifier (known as a handle): 1895.22/1013.  This
-# Agreement may also be obtained from a proxy server on the Internet
-# using the following URL: http://hdl.handle.net/1895.22/1013".
-#
-# 3. In the event Licensee prepares a derivative work that is based on
-# or incorporates Python 1.6.1 or any part thereof, and wants to make
-# the derivative work available to others as provided herein, then
-# Licensee hereby agrees to include in any such work a brief summary of
-# the changes made to Python 1.6.1.
-#
-# 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
-# basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
-# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
-# INFRINGE ANY THIRD PARTY RIGHTS.
-#
-# 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-# 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
-# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-#
-# 6. This License Agreement will automatically terminate upon a material
-# breach of its terms and conditions.
-#
-# 7. This License Agreement shall be governed by the federal
-# intellectual property law of the United States, including without
-# limitation the federal copyright law, and, to the extent such
-# U.S. federal law does not apply, by the law of the Commonwealth of
-# Virginia, excluding Virginia's conflict of law provisions.
-# Notwithstanding the foregoing, with regard to derivative works based
-# on Python 1.6.1 that incorporate non-separable material that was
-# previously distributed under the GNU General Public License (GPL), the
-# law of the Commonwealth of Virginia shall govern this License
-# Agreement only as to issues arising under or with respect to
-# Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
-# License Agreement shall be deemed to create any relationship of
-# agency, partnership, or joint venture between CNRI and Licensee.  This
-# License Agreement does not grant permission to use CNRI trademarks or
-# trade name in a trademark sense to endorse or promote products or
-# services of Licensee, or any third party.
-#
-# 8. By clicking on the "ACCEPT" button where indicated, or by copying,
-# installing or otherwise using Python 1.6.1, Licensee agrees to be
-# bound by the terms and conditions of this License Agreement.
-#
-# ACCEPT
-#
-#
-# CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
-# --------------------------------------------------
-#
-# Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
-# The Netherlands.  All rights reserved.
-#
-# Permission to use, copy, modify, and distribute this software and its
-# documentation for any purpose and without fee is hereby granted,
-# provided that the above copyright notice appear in all copies and that
-# both that copyright notice and this permission notice appear in
-# supporting documentation, and that the name of Stichting Mathematisch
-# Centrum or CWI not be used in advertising or publicity pertaining to
-# distribution of the software without specific, written prior
-# permission.
-#
-# STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
-# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-# FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
-# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
-# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
\ No newline at end of file
diff --git a/python/docs/_static/copybutton.js b/python/docs/_static/copybutton.js
new file mode 100644
index 000000000000..01ee2bab36b4
--- /dev/null
+++ b/python/docs/_static/copybutton.js
@@ -0,0 +1,66 @@
+// Copyright 2014 PSF. Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+// File originates from the cpython source found in Doc/tools/sphinxext/static/copybutton.js
+
+$(document).ready(function() {
+    /* Add a [>>>] button on the top-right corner of code samples to hide
+     * the >>> and ... prompts and the output and thus make the code
+     * copyable. */
+    var div = $('.highlight-python .highlight,' +
+                '.highlight-default .highlight,' +
+                '.highlight-python3 .highlight')
+    var pre = div.find('pre');
+
+    // get the styles from the current theme
+    pre.parent().parent().css('position', 'relative');
+    var hide_text = 'Hide the prompts and output';
+    var show_text = 'Show the prompts and output';
+    var border_width = pre.css('border-top-width');
+    var border_style = pre.css('border-top-style');
+    var border_color = pre.css('border-top-color');
+    var button_styles = {
+        'cursor':'pointer', 'position': 'absolute', 'top': '0', 'right': '0',
+        'border-color': border_color, 'border-style': border_style,
+        'border-width': border_width, 'color': border_color, 'text-size': '75%',
+        'font-family': 'monospace', 'padding-left': '0.2em', 'padding-right': '0.2em',
+        'border-radius': '0 3px 0 0'
+    }
+
+    // create and add the button to all the code blocks that contain >>>
+    div.each(function(index) {
+        var jthis = $(this);
+        if (jthis.find('.gp').length > 0) {
+            var button = $('<span class="copybutton">&gt;&gt;&gt;</span>');
+            button.css(button_styles)
+            button.attr('title', hide_text);
+            button.data('hidden', 'false');
+            jthis.prepend(button);
+        }
+        // tracebacks (.gt) contain bare text elements that need to be
+        // wrapped in a span to work with .nextUntil() (see later)
+        jthis.find('pre:has(.gt)').contents().filter(function() {
+            return ((this.nodeType == 3) && (this.data.trim().length > 0));
+        }).wrap('<span>');
+    });
+
+    // define the behavior of the button when it's clicked
+    $('.copybutton').click(function(e){
+        e.preventDefault();
+        var button = $(this);
+        if (button.data('hidden') === 'false') {
+            // hide the code output
+            button.parent().find('.go, .gp, .gt').hide();
+            button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
+            button.css('text-decoration', 'line-through');
+            button.attr('title', show_text);
+            button.data('hidden', 'true');
+        } else {
+            // show the code output
+            button.parent().find('.go, .gp, .gt').show();
+            button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
+            button.css('text-decoration', 'none');
+            button.attr('title', hide_text);
+            button.data('hidden', 'false');
+        }
+    });
+});
+
diff --git a/python/docs/conf.py b/python/docs/conf.py
index 50fb3175a7dc..f507ee33bc65 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -335,6 +335,8 @@
 
 # If false, no index is generated.
 #epub_use_index = True
+def setup(app):
+    app.add_javascript('copybutton.js')
 
 # Skip sample endpoint link (not expected to resolve)
 linkcheck_ignore = [r'https://kinesis.us-east-1.amazonaws.com']

From 9623420b77d81677b3fc5f055e6f521274e9caef Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 1 May 2019 10:13:43 -0700
Subject: [PATCH 0990/1072] [SPARK-27276][PYTHON][DOCS][FOLLOW-UP] Update
 documentation about Arrow version in PySpark as well

## What changes were proposed in this pull request?

Looks updating documentation from 0.8.0 to 0.12.1 was missed.

## How was this patch tested?

N/A

Closes #24504 from HyukjinKwon/SPARK-27276-followup.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 docs/sql-pyspark-pandas-with-arrow.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md
index f40b33eb97a9..b35a5692d613 100644
--- a/docs/sql-pyspark-pandas-with-arrow.md
+++ b/docs/sql-pyspark-pandas-with-arrow.md
@@ -22,7 +22,7 @@ license: |
 * Table of contents
 {:toc}
 
-## Apache Arrow in Spark
+## Apache Arrow in PySpark
 
 Apache Arrow is an in-memory columnar data format that is used in Spark to efficiently transfer
 data between JVM and Python processes. This currently is most beneficial to Python users that
@@ -35,7 +35,7 @@ working with Arrow-enabled data.
 
 If you install PySpark using pip, then PyArrow can be brought in as an extra dependency of the
 SQL module with the command `pip install pyspark[sql]`. Otherwise, you must ensure that PyArrow
-is installed and available on all cluster nodes. The current supported version is 0.8.0.
+is installed and available on all cluster nodes. The current supported version is 0.12.1.
 You can install using pip or conda from the conda-forge channel. See PyArrow
 [installation](https://arrow.apache.org/docs/python/install.html) for details.
 
@@ -143,8 +143,7 @@ For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/p
 ### Supported SQL Types
 
 Currently, all Spark SQL data types are supported by Arrow-based conversion except `MapType`,
-`ArrayType` of `TimestampType`, and nested `StructType`. `BinaryType` is supported only when
-installed PyArrow is equal to or higher than 0.10.0.
+`ArrayType` of `TimestampType`, and nested `StructType`.
 
 ### Setting Arrow Batch Size
 

From fcc42d4682c93d0ca6e36beba65b870e88ad694b Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Wed, 1 May 2019 15:03:30 -0700
Subject: [PATCH 0991/1072] [SPARK-27493][BUILD][FOLLOWUP] Upgrade ASM to 7.1
 in Maven plugins

## What changes were proposed in this pull request?

One more place to update ASM 7.0 -> 7.1

## How was this patch tested?

Existing tests

Closes #24508 from srowen/SPARK-27493.3.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 77c967c5cfca..9a0c7a0031b4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2439,12 +2439,12 @@
             <dependency>
               <groupId>org.ow2.asm</groupId>
               <artifactId>asm</artifactId>
-              <version>7.0</version>
+              <version>7.1</version>
             </dependency>
             <dependency>
               <groupId>org.ow2.asm</groupId>
               <artifactId>asm-commons</artifactId>
-              <version>7.0</version>
+              <version>7.1</version>
             </dependency>
           </dependencies>
         </plugin>

From 3670826af6f40bf8cd6c6c850515d6c2f0a83519 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Thu, 2 May 2019 10:02:14 +0900
Subject: [PATCH 0992/1072] [SPARK-26921][R][DOCS] Document Arrow optimization
 and vectorized R APIs

## What changes were proposed in this pull request?

This PR adds SparkR with Arrow optimization documentation.

Note that looks CRAN issue in Arrow side won't look likely fixed soon, IMHO, even after Spark 3.0.
If it happen to be fixed, I will fix this doc too later.

Another note is that Arrow R package itself requires R 3.5+. So, I intentionally didn't note this.

## How was this patch tested?

Manually built and checked.

Closes #24506 from HyukjinKwon/SPARK-26924.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sparkr.md                                | 59 +++++++++++++++++++
 .../apache/spark/sql/internal/SQLConf.scala   | 18 ++++--
 2 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index 26eeae6fe03d..d6b5179a5fa0 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -657,6 +657,65 @@ The following example shows how to save/load a MLlib model by SparkR.
 
 SparkR supports the Structured Streaming API. Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. For more information see the R API on the [Structured Streaming Programming Guide](structured-streaming-programming-guide.html)
 
+# Apache Arrow in SparkR
+
+Apache Arrow is an in-memory columnar data format that is used in Spark to efficiently transfer data between JVM and R processes. See also PySpark optimization done, [PySpark Usage Guide for Pandas with Apache Arrow](sql-pyspark-pandas-with-arrow.html). This guide targets to explain how to use Arrow optimization in SparkR with some key points.
+
+## Ensure Arrow Installed
+
+Currently, Arrow R library is not on CRAN yet [ARROW-3204](https://issues.apache.org/jira/browse/ARROW-3204). Therefore, it should be installed directly from Github. You can use `remotes::install_github` as below.
+
+```bash
+Rscript -e 'remotes::install_github("apache/arrow@TAG", subdir = "r")'
+```
+
+`TAG` is a version tag that can be checked in [Arrow at Github](https://github.com/apache/arrow/releases). You must ensure that Arrow R packge is installed and available on all cluster nodes. The current supported version is 0.12.1.
+
+## Enabling for Conversion to/from R DataFrame, `dapply` and `gapply`
+
+Arrow optimization is available when converting a Spark DataFrame to an R DataFrame using the call `createDataFrame(r_df)`,
+when creating a Spark DataFrame from an R DataFrame with `collect(spark_df)`, when applying an R native function to each partition
+via `dapply(...)` and when applying an R native function to grouped data via `gapply(...)`.
+To use Arrow when executing these calls, users need to first set the Spark configuration ‘spark.sql.execution.arrow.enabled’
+to ‘true’. This is disabled by default.
+
+In addition, optimizations enabled by ‘spark.sql.execution.arrow.enabled’ could fallback automatically to non-Arrow optimization
+implementation if an error occurs before the actual computation within Spark during converting a Spark DataFrame to/from an R
+DataFrame.
+
+<div data-lang="r" markdown="1">
+{% highlight r %}
+# Start up spark session with Arrow optimization enabled
+sparkR.session(master = "local[*]",
+               sparkConfig = list(spark.sql.execution.arrow.enabled = "true"))
+
+# Converts Spark DataFrame from an R DataFrame
+spark_df <- createDataFrame(mtcars)
+
+# Converts Spark DataFrame to an R DataFrame
+collect(spark_df)
+
+# Apply an R native function to each partition.
+collect(dapply(spark_df, function(rdf) { data.frame(rdf$gear + 1) }, structType("gear double")))
+
+# Apply an R native function to grouped data.
+collect(gapply(spark_df,
+               "gear",
+               function(key, group) {
+                 data.frame(gear = key[[1]], disp = mean(group$disp) > group$disp)
+               },
+               structType("gear double, disp boolean")))
+{% endhighlight %}
+</div>
+
+Using the above optimizations with Arrow will produce the same results as when Arrow is not enabled. Note that even with Arrow,
+`collect(spark_df)` results in the collection of all records in the DataFrame to the driver program and should be done on a
+small subset of the data.
+
+## Supported SQL Types
+
+Currently, all Spark SQL data types are supported by Arrow-based conversion except `FloatType`, `BinaryType`, `ArrayType`, `StructType` and `MapType`.
+
 # R Function Name Conflicts
 
 When loading and attaching a new package in R, it is possible to have a name [conflict](https://stat.ethz.ch/R-manual/R-devel/library/base/html/library.html), where a
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 87bce1fd7fb0..7f577f015973 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1326,12 +1326,20 @@ object SQLConf {
 
   val ARROW_EXECUTION_ENABLED =
     buildConf("spark.sql.execution.arrow.enabled")
-      .doc("When true, make use of Apache Arrow for columnar data transfers. Currently available " +
-        "for use with pyspark.sql.DataFrame.toPandas, " +
-        "pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame, " +
-        "and createDataFrame when its input is an R DataFrame. " +
+      .doc("When true, make use of Apache Arrow for columnar data transfers." +
+        "In case of PySpark, " +
+        "1. pyspark.sql.DataFrame.toPandas " +
+        "2. pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame " +
         "The following data types are unsupported: " +
-        "BinaryType, MapType, ArrayType of TimestampType, and nested StructType.")
+        "BinaryType, MapType, ArrayType of TimestampType, and nested StructType." +
+
+        "In case of SparkR," +
+        "1. createDataFrame when its input is an R DataFrame " +
+        "2. collect " +
+        "3. dapply " +
+        "4. gapply " +
+        "The following data types are unsupported: " +
+        "FloatType, BinaryType, ArrayType, StructType and MapType.")
       .booleanConf
       .createWithDefault(false)
 

From 2da406cae58a359adc43472ee33843b706ca2640 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 1 May 2019 20:09:46 -0700
Subject: [PATCH 0993/1072] [SPARK-27618][SQL][FOLLOW-UP] Unnecessary access to
 externalCatalog

## What changes were proposed in this pull request?
This PR is to add test cases for ensuring that we do not have unnecessary access to externalCatalog.

In the future, we can follow these examples to improve our test coverage in this area.

## How was this patch tested?
N/A

Closes #24511 from gatorsmile/addTestcaseSpark-27618.

Authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 sql/catalyst/pom.xml                          |  5 ++
 .../AnalysisExternalCatalogSuite.scala        | 74 +++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala

diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 15fc5e3c2686..3a4020bcea7f 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -71,6 +71,11 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
 
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
new file mode 100644
index 000000000000..1464d5c645b0
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import java.net.URI
+
+import org.mockito.Mockito._
+import org.scalatest.Matchers
+
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType, ExternalCatalog, InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
+  private def getAnalyzer(externCatalog: ExternalCatalog): Analyzer = {
+    val conf = new SQLConf()
+    val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin, conf)
+    catalog.createDatabase(
+      CatalogDatabase("default", "", new URI("loc"), Map.empty),
+      ignoreIfExists = false)
+    catalog.createTable(
+      CatalogTable(
+        TableIdentifier("t1", Some("default")),
+        CatalogTableType.MANAGED,
+        CatalogStorageFormat.empty,
+        StructType(Seq(StructField("a", IntegerType, nullable = true)))),
+      ignoreIfExists = false)
+    new Analyzer(catalog, conf)
+  }
+
+  test("query builtin functions don't call the external catalog") {
+    val inMemoryCatalog = new InMemoryCatalog
+    val catalog = spy(inMemoryCatalog)
+    val analyzer = getAnalyzer(catalog)
+    reset(catalog)
+    val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)())
+    val func =
+      Alias(UnresolvedFunction("sum", Seq(UnresolvedAttribute("a")), isDistinct = false), "s")()
+    val plan = Project(Seq(func), testRelation)
+    analyzer.execute(plan)
+    verifyZeroInteractions(catalog)
+  }
+
+  test("check the existence of builtin functions don't call the external catalog") {
+    val inMemoryCatalog = new InMemoryCatalog
+    val externCatalog = spy(inMemoryCatalog)
+    val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin, conf)
+    catalog.createDatabase(
+      CatalogDatabase("default", "", new URI("loc"), Map.empty),
+      ignoreIfExists = false)
+    reset(externCatalog)
+    catalog.functionExists(FunctionIdentifier("sum"))
+    verifyZeroInteractions(externCatalog)
+  }
+
+}

From b73744a147d6e22167fbfecbe2892d94928cd5c2 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 1 May 2019 20:11:28 -0700
Subject: [PATCH 0994/1072] [SPARK-27611][BUILD] Exclude
 jakarta.activation:jakarta.activation-api from
 org.glassfish.jaxb:jaxb-runtime:2.3.2

PR #23890 introduced `org.glassfish.jaxb:jaxb-runtime:2.3.2` as a runtime dependency. As an unexpected side effect, `jakarta.activation:jakarta.activation-api:1.2.1` was also pulled in as a transitive dependency. As a result, for the Maven build, both of the following two jars can be found under `assembly/target/scala-2.12/jars/`:

```
activation-1.1.1.jar
jakarta.activation-api-1.2.1.jar
```

This PR exludes the Jakarta one.

Manually built Spark using Maven and checked files under `assembly/target/scala-2.12/jars/`. After this change, only `activation-1.1.1.jar` is there.

Closes #24507 from liancheng/spark-27611.

Authored-by: Cheng Lian <lian@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 LICENSE-binary                 | 1 -
 dev/deps/spark-deps-hadoop-2.7 | 1 -
 dev/deps/spark-deps-hadoop-3.2 | 1 -
 pom.xml                        | 8 ++++++++
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index 66c559983a6d..2ff881fac5fb 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -489,7 +489,6 @@ Eclipse Distribution License (EDL) 1.0
 org.glassfish.jaxb:jaxb-runtime
 jakarta.xml.bind:jakarta.xml.bind-api
 com.sun.istack:istack-commons-runtime
-jakarta.activation:jakarta.activation-api
 
 
 Mozilla Public License (MPL) 1.1
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index ece8c64154b6..fca0f0f53bf0 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -94,7 +94,6 @@ jackson-module-jaxb-annotations-2.9.8.jar
 jackson-module-paranamer-2.9.8.jar
 jackson-module-scala_2.12-2.9.8.jar
 jackson-xc-1.9.13.jar
-jakarta.activation-api-1.2.1.jar
 jakarta.xml.bind-api-2.3.2.jar
 janino-3.0.11.jar
 javassist-3.18.1-GA.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 587415126a06..232497b024d1 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -96,7 +96,6 @@ jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations-2.9.8.jar
 jackson-module-paranamer-2.9.8.jar
 jackson-module-scala_2.12-2.9.8.jar
-jakarta.activation-api-1.2.1.jar
 jakarta.xml.bind-api-2.3.2.jar
 janino-3.0.11.jar
 javassist-3.18.1-GA.jar
diff --git a/pom.xml b/pom.xml
index 9a0c7a0031b4..4bf0fca13b13 100644
--- a/pom.xml
+++ b/pom.xml
@@ -432,6 +432,14 @@
             <groupId>org.jvnet.staxex</groupId>
             <artifactId>stax-ex</artifactId>
           </exclusion>
+          <!--
+            SPARK-27611: Exclude redundant javax.activation implementation, which
+            conflicts with the existing javax.activation:activation:1.1.1 dependency.
+            -->
+          <exclusion>
+            <groupId>jakarta.activation</groupId>
+            <artifactId>jakarta.activation-api</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>

From 253a8793f014ee587916921dff896cfbef60be88 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 2 May 2019 20:36:34 +0900
Subject: [PATCH 0995/1072] [SPARK-26921][R][DOCS][FOLLOWUP] Document Arrow
 optimization and vectorized R APIs

## What changes were proposed in this pull request?

There are few suspect in the newly added doc. Open this followup to fix it and a typo.

## How was this patch tested?

N/A

Closes #24514 from viirya/SPARK-26924-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sparkr.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index d6b5179a5fa0..31a69d5dbaf0 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -669,12 +669,12 @@ Currently, Arrow R library is not on CRAN yet [ARROW-3204](https://issues.apache
 Rscript -e 'remotes::install_github("apache/arrow@TAG", subdir = "r")'
 ```
 
-`TAG` is a version tag that can be checked in [Arrow at Github](https://github.com/apache/arrow/releases). You must ensure that Arrow R packge is installed and available on all cluster nodes. The current supported version is 0.12.1.
+`TAG` is a version tag that can be checked in [Arrow at Github](https://github.com/apache/arrow/releases). You must ensure that Arrow R package is installed and available on all cluster nodes. The current supported version is 0.12.1.
 
 ## Enabling for Conversion to/from R DataFrame, `dapply` and `gapply`
 
-Arrow optimization is available when converting a Spark DataFrame to an R DataFrame using the call `createDataFrame(r_df)`,
-when creating a Spark DataFrame from an R DataFrame with `collect(spark_df)`, when applying an R native function to each partition
+Arrow optimization is available when converting a Spark DataFrame to an R DataFrame using the call `collect(spark_df)`,
+when creating a Spark DataFrame from an R DataFrame with `createDataFrame(r_df)`, when applying an R native function to each partition
 via `dapply(...)` and when applying an R native function to grouped data via `gapply(...)`.
 To use Arrow when executing these calls, users need to first set the Spark configuration ‘spark.sql.execution.arrow.enabled’
 to ‘true’. This is disabled by default.

From df8aa7ba8a99cbc172f09155cbf98157fbfdf323 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Thu, 2 May 2019 21:10:00 +0900
Subject: [PATCH 0996/1072] [SPARK-27606][SQL] Deprecate 'extended' field in
 ExpressionDescription/ExpressionInfo

## What changes were proposed in this pull request?

After we added other fields, `arguments`, `examples`, `note` and `since` at SPARK-21485 and `deprecated` at SPARK-27328, we have nicer way to separately describe extended usages.

`extended` field and method at `ExpressionDescription`/`ExpressionInfo` is now pretty useless - it's not used in Spark side and only exists to keep backward compatibility.

This PR proposes to deprecate it.

## How was this patch tested?

Manually checked the deprecation waring is properly shown.

Closes #24500 from HyukjinKwon/SPARK-27606.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/expressions/ExpressionDescription.java     | 6 ++++++
 .../spark/sql/catalyst/expressions/ExpressionInfo.java      | 6 +++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
index 6a64cb127e6c..acdf6afe10ce 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
@@ -88,6 +88,12 @@
 @Retention(RetentionPolicy.RUNTIME)
 public @interface ExpressionDescription {
     String usage() default "";
+    /**
+     * @deprecated This field is deprecated as of Spark 3.0. Use {@link #arguments},
+     *   {@link #examples}, {@link #note}, {@link #since} and {@link #deprecated} instead
+     *   to document the extended usage.
+     */
+    @Deprecated
     String extended() default "";
     String arguments() default "";
     String examples() default "";
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 0c699421f991..0d1f6c2b4d5b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -140,7 +140,11 @@ public ExpressionInfo(String className, String db, String name) {
         this(className, db, name, null, "", "", "", "", "");
     }
 
-    // This is to keep the original constructor just in case.
+    /**
+     * @deprecated This constructor is deprecated as of Spark 3.0. Use other constructors to fully
+     *   specify each argument for extended usage.
+     */
+    @Deprecated
     public ExpressionInfo(String className, String db, String name, String usage, String extended) {
         // `arguments` and `examples` are concatenated for the extended description. So, here
         // simply pass the `extended` as `arguments` and an empty string for `examples`.

From 7a8cc8e071b7a0b1aace9dd3750dc89f0e4ad191 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Thu, 2 May 2019 07:20:33 -0700
Subject: [PATCH 0997/1072] [SPARK-27607][SQL] Improve Row.toString performance

## What changes were proposed in this pull request?

`Row.toString` is currently causing the useless creation of an `Array` containing all the values in the row before generating the string containing it. This operation adds a considerable overhead.

The PR proposes to avoid this operation in order to get a faster implementation.

## How was this patch tested?

Run

```scala
test("Row toString perf test") {
    val n = 100000
    val rows = (1 to n).map { i =>
      Row(i, i.toDouble, i.toString, i.toShort, true, null)
    }
    // warmup
    (1 to 10).foreach { _ => rows.foreach(_.toString) }

    val times = (1 to 100).map { _ =>
      val t0 = System.nanoTime()
      rows.foreach(_.toString)
      val t1 = System.nanoTime()
      t1 - t0
    }
    // scalastyle:off println
    println(s"Avg time on ${times.length} iterations for $n toString:" +
      s" ${times.sum.toDouble / times.length / 1e6} ms")
    // scalastyle:on println
  }
```
Before the PR:
```
Avg time on 100 iterations for 100000 toString: 61.08408419 ms
```
After the PR:
```
Avg time on 100 iterations for 100000 toString: 38.16539432 ms
```
This means the new implementation is about 1.60X faster than the original one.

Closes #24505 from mgaido91/SPARK-27607.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/sql/Row.scala | 23 +++++++++++++++----
 .../scala/org/apache/spark/sql/RowSuite.scala | 21 +++++++++++++++++
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index f13eddee77e4..494387ecfe0c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -372,7 +372,7 @@ trait Row extends Serializable {
     }.toMap
   }
 
-  override def toString: String = s"[${this.mkString(",")}]"
+  override def toString: String = this.mkString("[", ",", "]")
 
   /**
    * Make a copy of the current [[Row]] object.
@@ -465,16 +465,31 @@ trait Row extends Serializable {
   }
 
   /** Displays all elements of this sequence in a string (without a separator). */
-  def mkString: String = toSeq.mkString
+  def mkString: String = mkString("")
 
   /** Displays all elements of this sequence in a string using a separator string. */
-  def mkString(sep: String): String = toSeq.mkString(sep)
+  def mkString(sep: String): String = mkString("", sep, "")
 
   /**
    * Displays all elements of this traversable or iterator in a string using
    * start, end, and separator strings.
    */
-  def mkString(start: String, sep: String, end: String): String = toSeq.mkString(start, sep, end)
+  def mkString(start: String, sep: String, end: String): String = {
+    val n = length
+    val builder = new StringBuilder
+    builder.append(start)
+    if (n > 0) {
+      builder.append(get(0))
+      var i = 1
+      while (i < n) {
+        builder.append(sep)
+        builder.append(get(i))
+        i += 1
+      }
+    }
+    builder.append(end)
+    builder.toString()
+  }
 
   /**
    * Returns the value at position i.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
index 57b5f5e4ab99..c53fd5bd7a15 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
@@ -84,4 +84,25 @@ class RowSuite extends SparkFunSuite with SharedSQLContext {
     val r3 = Row("World")
     assert(r3.hashCode() != r1.hashCode())
   }
+
+  test("toString") {
+    val r1 = Row(2147483647, 21474.8364, (-5).toShort, "this is a string", true, null)
+    assert(r1.toString == "[2147483647,21474.8364,-5,this is a string,true,null]")
+    val r2 = Row(null, Int.MinValue, Double.NaN, Short.MaxValue, "", false)
+    assert(r2.toString == "[null,-2147483648,NaN,32767,,false]")
+    val tsString = "2019-05-01 17:30:12.0"
+    val dtString = "2019-05-01"
+    val r3 = Row(
+      r1,
+      Seq(1, 2, 3),
+      Map(1 -> "a", 2 -> "b"),
+      java.sql.Timestamp.valueOf(tsString),
+      java.sql.Date.valueOf(dtString),
+      BigDecimal("1234567890.1234567890"),
+      (-1).toByte)
+    assert(r3.toString == "[[2147483647,21474.8364,-5,this is a string,true,null],List(1, 2, 3)," +
+      s"Map(1 -> a, 2 -> b),$tsString,$dtString,1234567890.1234567890,-1]")
+    val empty = Row()
+    assert(empty.toString == "[]")
+  }
 }

From 3ecafb0e14a9ea3ddd0fbd5b266411876eb261fa Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 2 May 2019 15:21:57 -0500
Subject: [PATCH 0998/1072] [SPARK-27601][BUILD] Upgrade stream-lib to 2.9.6

## What changes were proposed in this pull request?

[stream-lib 2.9.6](https://github.com/addthis/stream-lib/commits/v2.9.6) include several improvements:
![image](https://user-images.githubusercontent.com/5399861/56938062-7eb77580-6b32-11e9-8c36-711ab943d657.png)

## How was this patch tested?

N/A

Closes #24492 from wangyum/SPARK-27601.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.2 | 2 +-
 pom.xml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index fca0f0f53bf0..8384fadfb497 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -186,7 +186,7 @@ spire-macros_2.12-0.13.0.jar
 spire_2.12-0.13.0.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
-stream-2.7.0.jar
+stream-2.9.6.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-2.7.3.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 232497b024d1..fd09e457766f 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -206,7 +206,7 @@ spire-macros_2.12-0.13.0.jar
 spire_2.12-0.13.0.jar
 stax-api-1.0.1.jar
 stax2-api-3.1.4.jar
-stream-2.7.0.jar
+stream-2.9.6.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 token-provider-1.0.1.jar
diff --git a/pom.xml b/pom.xml
index 4bf0fca13b13..62be6d812a60 100644
--- a/pom.xml
+++ b/pom.xml
@@ -604,7 +604,7 @@
       <dependency>
         <groupId>com.clearspring.analytics</groupId>
         <artifactId>stream</artifactId>
-        <version>2.7.0</version>
+        <version>2.9.6</version>
         <exclusions>
           <!-- Only HyperLogLogPlus is used, which doesn't depend on fastutil -->
           <exclusion>

From f950e539304dffc01c3f3c48c2d369e54e301587 Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Thu, 2 May 2019 15:37:25 -0700
Subject: [PATCH 0999/1072] [MINOR][CORE] Update taskName in PythonRunner

## What changes were proposed in this pull request?

Update taskName in PythonRunner so it keeps align with that in Executor.

## How was this patch tested?

N/A

Closes #24510 from jiangxb1987/pylog.

Authored-by: Xingbo Jiang <xingbo.jiang@databricks.com>
Signed-off-by: Xingbo Jiang <xingbo.jiang@databricks.com>
---
 .../main/scala/org/apache/spark/api/python/PythonRunner.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index b7f14e062b43..fabd6fdf4d26 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -511,7 +511,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
         if (!context.isCompleted) {
           try {
             // Mimic the task name used in `Executor` to help the user find out the task to blame.
-            val taskName = s"${context.partitionId}.${context.taskAttemptId} " +
+            val taskName = s"${context.partitionId}.${context.attemptNumber} " +
               s"in stage ${context.stageId} (TID ${context.taskAttemptId})"
             logWarning(s"Incomplete task $taskName interrupted: Attempting to kill Python Worker")
             env.destroyPythonWorker(pythonExec, envVars.asScala.toMap, worker)

From 875e7e1d972b0fcdc948fe906d0da68efbd497b2 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 3 May 2019 09:25:54 +0900
Subject: [PATCH 1000/1072] [SPARK-27620][BUILD] Upgrade jetty to
 9.4.18.v20190429

## What changes were proposed in this pull request?

This pr upgrade jetty to [9.4.18.v20190429](https://github.com/eclipse/jetty.project/releases/tag/jetty-9.4.18.v20190429) because of [CVE-2019-10247](https://nvd.nist.gov/vuln/detail/CVE-2019-10247).

## How was this patch tested?

Existing test.

Closes #24513 from wangyum/SPARK-27620.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/deps/spark-deps-hadoop-3.2 | 4 ++--
 pom.xml                        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index fd09e457766f..643ba2fba4ec 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -117,8 +117,8 @@ jersey-container-servlet-core-2.22.2.jar
 jersey-guava-2.22.2.jar
 jersey-media-jaxb-2.22.2.jar
 jersey-server-2.22.2.jar
-jetty-webapp-9.4.12.v20180830.jar
-jetty-xml-9.4.12.v20180830.jar
+jetty-webapp-9.4.18.v20190429.jar
+jetty-xml-9.4.18.v20190429.jar
 jline-2.14.6.jar
 joda-time-2.9.3.jar
 jodd-core-3.5.2.jar
diff --git a/pom.xml b/pom.xml
index 62be6d812a60..f3ba896348ca 100644
--- a/pom.xml
+++ b/pom.xml
@@ -139,7 +139,7 @@
     <orc.classifier>nohive</orc.classifier>
     <hive.parquet.group>com.twitter</hive.parquet.group>
     <hive.parquet.version>1.6.0</hive.parquet.version>
-    <jetty.version>9.4.12.v20180830</jetty.version>
+    <jetty.version>9.4.18.v20190429</jetty.version>
     <javaxservlet.version>3.1.0</javaxservlet.version>
     <chill.version>0.9.3</chill.version>
     <ivy.version>2.4.0</ivy.version>

From 375cfa3d89f42c472e1c2df48aa5119a585f9e00 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 2 May 2019 20:01:17 -0700
Subject: [PATCH 1001/1072] [SPARK-27467][BUILD] Upgrade Maven to 3.6.1

## What changes were proposed in this pull request?

This PR aims to upgrade Maven to 3.6.1 to bring JDK9+ related patches like [MNG-6506](https://issues.apache.org/jira/browse/MNG-6506). For the full release note, please see the following.
- https://maven.apache.org/docs/3.6.1/release-notes.html

This was committed and reverted due to AppVeyor failure. It turns out that the root cause is `PATH` issue. With the updated AppVeyor script, it passed.

https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark/builds/24273412

## How was this patch tested?

Pass the Jenkins and AppVoyer

Closes #24481 from dongjoon-hyun/SPARK-R.

Lead-authored-by: Dongjoon Hyun <dhyun@apple.com>
Co-authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/appveyor-install-dependencies.ps1 | 4 ++--
 docs/building-spark.md                | 2 +-
 pom.xml                               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index 7c7bdd623477..dfb55ff5e6d1 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -81,14 +81,14 @@ if (!(Test-Path $tools)) {
 # ========================== Maven
 Push-Location $tools
 
-$mavenVer = "3.6.0"
+$mavenVer = "3.6.1"
 Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip"
 
 # extract
 Invoke-Expression "7z.exe x maven.zip"
 
 # add maven to environment variables
-$env:Path += ";$tools\apache-maven-$mavenVer\bin"
+$env:PATH = "$tools\apache-maven-$mavenVer\bin;" + $env:PATH
 $env:M2_HOME = "$tools\apache-maven-$mavenVer"
 $env:MAVEN_OPTS = "-Xmx2g -XX:ReservedCodeCacheSize=512m"
 
diff --git a/docs/building-spark.md b/docs/building-spark.md
index ab8036381922..fcc6108dd1b1 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -27,7 +27,7 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.6.0 and Java 8.
+Building Spark using Maven requires Maven 3.6.1 and Java 8.
 Spark requires Scala 2.12; support for Scala 2.11 was removed in Spark 3.0.0.
 
 ### Setting up Maven's Memory Usage
diff --git a/pom.xml b/pom.xml
index f3ba896348ca..dccd676ff496 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,7 +115,7 @@
     <java.version>1.8</java.version>
     <maven.compiler.source>${java.version}</maven.compiler.source>
     <maven.compiler.target>${java.version}</maven.compiler.target>
-    <maven.version>3.6.0</maven.version>
+    <maven.version>3.6.1</maven.version>
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.16</slf4j.version>
     <log4j.version>1.2.17</log4j.version>

From 3859ca37d9b20ec130d386254d86fbd353486fde Mon Sep 17 00:00:00 2001
From: gaoweikang <gaoweikang@bytedance.com>
Date: Thu, 2 May 2019 20:33:27 -0700
Subject: [PATCH 1002/1072] [SPARK-27586][SQL] Improve binary comparison:
 replace Scala's for-comprehension if statements with while loop

## What changes were proposed in this pull request?

This PR replaces for-comprehension if statement with while loop to gain better performance in `TypeUtils.compareBinary`.

## How was this patch tested?

Add UT to test old version and new version comparison result

Closes #24494 from woudygao/opt_binary_compare.

Authored-by: gaoweikang <gaoweikang@bytedance.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/util/TypeUtils.scala    |  9 +++++----
 .../sql/catalyst/util/TypeUtilsSuite.scala     | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
index 2a71fdb7592b..fed2a1ac4b8b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
@@ -73,11 +73,12 @@ object TypeUtils {
   }
 
   def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
-    for (i <- 0 until x.length; if i < y.length) {
-      val v1 = x(i) & 0xff
-      val v2 = y(i) & 0xff
-      val res = v1 - v2
+    val limit = if (x.length <= y.length) x.length else y.length
+    var i = 0
+    while (i < limit) {
+      val res = (x(i) & 0xff) - (y(i) & 0xff)
       if (res != 0) return res
+      i += 1
     }
     x.length - y.length
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TypeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TypeUtilsSuite.scala
index bc6852ca7e1f..d6d1e418d74e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TypeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TypeUtilsSuite.scala
@@ -43,4 +43,22 @@ class TypeUtilsSuite extends SparkFunSuite {
     typeCheckPass(ArrayType(StringType, containsNull = true) ::
       ArrayType(StringType, containsNull = false) :: Nil)
   }
+
+  test("compareBinary") {
+    val x1 = Array[Byte]()
+    val y1 = Array(1, 2, 3).map(_.toByte)
+    assert(TypeUtils.compareBinary(x1, y1) < 0)
+
+    val x2 = Array(200, 100).map(_.toByte)
+    val y2 = Array(100, 100).map(_.toByte)
+    assert(TypeUtils.compareBinary(x2, y2) > 0)
+
+    val x3 = Array(100, 200, 12).map(_.toByte)
+    val y3 = Array(100, 200).map(_.toByte)
+    assert(TypeUtils.compareBinary(x3, y3) > 0)
+
+    val x4 = Array(100, 200).map(_.toByte)
+    val y4 = Array(100, 200).map(_.toByte)
+    assert(TypeUtils.compareBinary(x4, y4) == 0)
+  }
 }

From 5c479243de8d88dbe6ac7ab8580826511af74ff5 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 3 May 2019 14:40:13 +0900
Subject: [PATCH 1003/1072] [SPARK-27612][PYTHON] Use Python's default protocol
 instead of highest protocol

## What changes were proposed in this pull request?

This PR partially reverts https://github.com/apache/spark/pull/20691

After we changed the Python protocol to highest ones, seems like it introduced a correctness bug. This potentially affects all Python related code paths.

I suspect a bug related to Pryolite (maybe opcodes `MEMOIZE`, `FRAME` and/or our `RowPickler`). I would like to stick to default protocol for now and investigate the issue separately.

I will separately investigate later to bring highest protocol back.

## How was this patch tested?

Unittest was added.

```bash
./run-tests --python-executables=python3.7 --testname "pyspark.sql.tests.test_serde SerdeTests.test_int_array_serialization"
```

Closes #24519 from HyukjinKwon/SPARK-27612.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/serializers.py          | 3 ++-
 python/pyspark/sql/tests/test_serde.py | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 6058e94d471e..531108738f6c 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -62,11 +62,12 @@
 if sys.version < '3':
     import cPickle as pickle
     from itertools import izip as zip, imap as map
+    pickle_protocol = 2
 else:
     import pickle
     basestring = unicode = str
     xrange = range
-pickle_protocol = pickle.HIGHEST_PROTOCOL
+    pickle_protocol = 3
 
 from pyspark import cloudpickle
 from pyspark.util import _exception_message
diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py
index 8707f46b6a25..1c18e930eb91 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -126,6 +126,12 @@ def test_BinaryType_serialization(self):
         df = self.spark.createDataFrame(data, schema=schema)
         df.collect()
 
+    def test_int_array_serialization(self):
+        # Note that this test seems dependent on parallelism.
+        data = self.spark.sparkContext.parallelize([[1, 2, 3, 4]] * 100, numSlices=12)
+        df = self.spark.createDataFrame(data, "array<integer>")
+        self.assertEqual(len(list(filter(lambda r: None in r.value, df.collect()))), 0)
+
 
 if __name__ == "__main__":
     import unittest

From 9a419c37ecd3e7638d35043156ac2e60dabe97df Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 3 May 2019 08:42:11 -0700
Subject: [PATCH 1004/1072] [SPARK-24360][FOLLOW-UP][SQL] Add missing options
 for sql-migration-guide-hive-compatibility.md

## What changes were proposed in this pull request?

This pr add missing options for `sql-migration-guide-hive-compatibility.md`.

## How was this patch tested?

N/A

Closes #24520 from wangyum/SPARK-24360.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 docs/sql-migration-guide-hive-compatibility.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide-hive-compatibility.md b/docs/sql-migration-guide-hive-compatibility.md
index 857e9748dd04..5602f5351b4a 100644
--- a/docs/sql-migration-guide-hive-compatibility.md
+++ b/docs/sql-migration-guide-hive-compatibility.md
@@ -25,7 +25,7 @@ license: |
 Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
 Currently, Hive SerDes and UDFs are based on Hive 1.2.1,
 and Spark SQL can be connected to different versions of Hive Metastore
-(from 0.12.0 to 2.3.4. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
+(from 0.12.0 to 2.3.4 and 3.1.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
 
 #### Deploying in Existing Hive Warehouses
 

From 6c2d351f5466d42c4d227f5627bd3709c266b5ce Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 3 May 2019 10:13:22 -0700
Subject: [PATCH 1005/1072] [SPARK-27626][K8S] Fix `docker-image-tool.sh` to be
 robust in non-bash shell env

## What changes were proposed in this pull request?

Although we use shebang `#!/usr/bin/env bash`, `minikube docker-env` returns invalid commands in `non-bash` environment and causes failures at `eval` because it only recognizes the default shell. We had better add `--shell bash` option explicitly in our `bash` script.

```bash
$ bash -c 'eval $(minikube docker-env)'
bash: line 0: set: -g: invalid option
set: usage: set [-abefhkmnptuvxBCHP] [-o option-name] [--] [arg ...]
bash: line 0: set: -g: invalid option
set: usage: set [-abefhkmnptuvxBCHP] [-o option-name] [--] [arg ...]
bash: line 0: set: -g: invalid option
set: usage: set [-abefhkmnptuvxBCHP] [-o option-name] [--] [arg ...]
bash: line 0: set: -g: invalid option
set: usage: set [-abefhkmnptuvxBCHP] [-o option-name] [--] [arg ...]

$ bash -c 'eval $(minikube docker-env --shell bash)'
```

## How was this patch tested?

Manual. Run the script with non-bash shell environment.
```
bin/docker-image-tool.sh -m -t testing build
```

Closes #24517 from dongjoon-hyun/SPARK-27626.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 bin/docker-image-tool.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index 0388e23979dd..68fafbb84800 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -282,7 +282,7 @@ do
    if ! minikube status 1>/dev/null; then
      error "Cannot contact minikube. Make sure it's running."
    fi
-   eval $(minikube docker-env)
+   eval $(minikube docker-env --shell bash)
    ;;
   u) SPARK_UID=${OPTARG};;
  esac

From 51de86baed0776304c6184f2c04b6303ef48df90 Mon Sep 17 00:00:00 2001
From: wuyi <ngone_5451@163.com>
Date: Fri, 3 May 2019 15:48:37 -0700
Subject: [PATCH 1006/1072] [SPARK-27510][CORE] Avoid Master falls into dead
 loop while launching executor failed in Worker

## What changes were proposed in this pull request?

This is a long standing issue which I met before and I've seen other people got trouble with it:
[test cases stuck on "local-cluster mode" of ReplSuite?](http://apache-spark-developers-list.1001551.n3.nabble.com/test-cases-stuck-on-quot-local-cluster-mode-quot-of-ReplSuite-td3086.html)
[Spark tests hang on local machine due to "testGuavaOptional" in JavaAPISuite](http://apache-spark-developers-list.1001551.n3.nabble.com/Spark-tests-hang-on-local-machine-due-to-quot-testGuavaOptional-quot-in-JavaAPISuite-tc10999.html)

When running test under local-cluster mode with wrong SPARK_HOME(spark.test.home), test just get stuck and no response forever. After looking into SPARK_WORKER_DIR, I found there's endless executor directories under it. So, this explains what happens during test getting stuck.

The whole process looks like:

1. Driver submits an app to Master and asks for N executors
2. Master inits executor state with LAUNCHING and sends `LaunchExecutor` to Worker
3. Worker receives `LaunchExecutor`, launches ExecutorRunner asynchronously and sends `ExecutorStateChanged(state=RUNNING)` to Mater immediately
4. Master receives `ExecutorStateChanged(state=RUNNING)` and reset `_retyCount` to 0.
5. ExecutorRunner throws exception during executor launching, sends `ExecutorStateChanged(state=FAILED)` to Worker, Worker forwards the msg to Master
6. Master receives `ExecutorStateChanged(state=FAILED)`. Since Master always reset `_retyCount` when it receives RUNNING msg, so, event if a Worker fails to launch executor for continuous many times, ` _retryCount` would never exceed `maxExecutorRetries`. So, Master continue to launch executor and fall into the dead loop.

The problem exists in step 3. Worker sends `ExecutorStateChanged(state=RUNNING)` to Master immediately while executor is still launching. And, when Master receive that msg, it believes the executor has launched successfully, and reset `_retryCount` subsequently. However, that's not true.

This pr suggests to remove step 3 and requires Worker only send `ExecutorStateChanged(state=RUNNING)` after executor has really launched successfully.

## How was this patch tested?

Tested Manually.

Closes #24408 from Ngone51/fix-dead-loop.

Authored-by: wuyi <ngone_5451@163.com>
Signed-off-by: Xingbo Jiang <xingbo.jiang@databricks.com>
---
 .../spark/deploy/worker/ExecutorRunner.scala  |  6 +-
 .../apache/spark/deploy/worker/Worker.scala   |  4 +-
 .../spark/deploy/master/MasterSuite.scala     | 60 +++++++++++++++++++
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 9d2301cea9b3..6f1484cee586 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -78,8 +78,8 @@ private[deploy] class ExecutorRunner(
     // Shutdown hook that kills actors on shutdown.
     shutdownHook = ShutdownHookManager.addShutdownHook { () =>
       // It's possible that we arrive here before calling `fetchAndRunExecutor`, then `state` will
-      // be `ExecutorState.RUNNING`. In this case, we should set `state` to `FAILED`.
-      if (state == ExecutorState.RUNNING) {
+      // be `ExecutorState.LAUNCHING`. In this case, we should set `state` to `FAILED`.
+      if (state == ExecutorState.LAUNCHING) {
         state = ExecutorState.FAILED
       }
       killProcess(Some("Worker shutting down")) }
@@ -183,6 +183,8 @@ private[deploy] class ExecutorRunner(
       Files.write(header, stderr, StandardCharsets.UTF_8)
       stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
 
+      state = ExecutorState.RUNNING
+      worker.send(ExecutorStateChanged(appId, execId, state, None, None))
       // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
       // or with nonzero exit code
       val exitCode = process.waitFor()
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index eb2add3af825..f8ec5b6b190c 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -540,12 +540,12 @@ private[deploy] class Worker(
             executorDir,
             workerUri,
             conf,
-            appLocalDirs, ExecutorState.RUNNING)
+            appLocalDirs,
+            ExecutorState.LAUNCHING)
           executors(appId + "/" + execId) = manager
           manager.start()
           coresUsed += cores_
           memoryUsed += memory_
-          sendToMaster(ExecutorStateChanged(appId, execId, manager.state, None, None))
         } catch {
           case e: Exception =>
             logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 61aaaa557529..0c4b1058e2d8 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -89,6 +89,17 @@ class MockWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf) extend
   }
 }
 
+class MockExecutorLaunchFailWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf)
+  extends MockWorker(master, conf) {
+  var failedCnt = 0
+  override def receive: PartialFunction[Any, Unit] = {
+    case LaunchExecutor(_, appId, execId, _, _, _) =>
+      failedCnt += 1
+      master.send(ExecutorStateChanged(appId, execId, ExecutorState.FAILED, None, None))
+    case otherMsg => super.receive(otherMsg)
+  }
+}
+
 class MasterSuite extends SparkFunSuite
   with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter {
 
@@ -635,6 +646,55 @@ class MasterSuite extends SparkFunSuite
     }
   }
 
+  test("SPARK-27510: Master should avoid dead loop while launching executor failed in Worker") {
+    val master = makeMaster()
+    master.rpcEnv.setupEndpoint(Master.ENDPOINT_NAME, master)
+    eventually(timeout(10.seconds)) {
+      val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
+      assert(masterState.status === RecoveryState.ALIVE, "Master is not alive")
+    }
+
+    var worker: MockExecutorLaunchFailWorker = null
+    try {
+      worker = new MockExecutorLaunchFailWorker(master.self)
+      worker.rpcEnv.setupEndpoint("worker", worker)
+      val workerRegMsg = RegisterWorker(
+        worker.id,
+        "localhost",
+        9999,
+        worker.self,
+        10,
+        1234 * 3,
+        "http://localhost:8080",
+        master.rpcEnv.address)
+      master.self.send(workerRegMsg)
+      val driver = DeployTestUtils.createDriverDesc()
+      // mimic DriverClient to send RequestSubmitDriver to master
+      master.self.askSync[SubmitDriverResponse](RequestSubmitDriver(driver))
+      var appId: String = null
+      eventually(timeout(10.seconds)) {
+        // an app would be registered with Master once Driver set up
+        assert(worker.apps.nonEmpty)
+        appId = worker.apps.head._1
+        assert(master.idToApp.contains(appId))
+      }
+
+      eventually(timeout(10.seconds)) {
+        // Master would continually launch executors until reach MAX_EXECUTOR_RETRIES
+        assert(worker.failedCnt == master.conf.get(MAX_EXECUTOR_RETRIES))
+        // Master would remove the app if no executor could be launched for it
+        assert(!master.idToApp.contains(appId))
+      }
+    } finally {
+      if (worker != null) {
+        worker.rpcEnv.shutdown()
+      }
+      if (master != null) {
+        master.rpcEnv.shutdown()
+      }
+    }
+  }
+
   test("SPARK-19900: there should be a corresponding driver for the app after relaunching driver") {
     val conf = new SparkConf().set(WORKER_TIMEOUT, 1L)
     val master = makeMaster(conf)

From 4241a72c654f13b6b4ceafb27daceb7bb553add6 Mon Sep 17 00:00:00 2001
From: asarb <asarb@expedia.com>
Date: Fri, 3 May 2019 18:17:04 -0500
Subject: [PATCH 1007/1072] [SPARK-27621][ML] Linear Regression - validate
 training related params such as loss only during fitting phase

## What changes were proposed in this pull request?

When transform(...) method is called on a LinearRegressionModel created directly with the coefficients and intercepts, the following exception is encountered.

```
java.util.NoSuchElementException: Failed to find a default value for loss
	at org.apache.spark.ml.param.Params$$anonfun$getOrDefault$2.apply(params.scala:780)
	at org.apache.spark.ml.param.Params$$anonfun$getOrDefault$2.apply(params.scala:780)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.ml.param.Params$class.getOrDefault(params.scala:779)
	at org.apache.spark.ml.PipelineStage.getOrDefault(Pipeline.scala:42)
	at org.apache.spark.ml.param.Params$class.$(params.scala:786)
	at org.apache.spark.ml.PipelineStage.$(Pipeline.scala:42)
	at org.apache.spark.ml.regression.LinearRegressionParams$class.validateAndTransformSchema(LinearRegression.scala:111)
	at org.apache.spark.ml.regression.LinearRegressionModel.validateAndTransformSchema(LinearRegression.scala:637)
	at org.apache.spark.ml.PredictionModel.transformSchema(Predictor.scala:192)
	at org.apache.spark.ml.PipelineModel$$anonfun$transformSchema$5.apply(Pipeline.scala:311)
	at org.apache.spark.ml.PipelineModel$$anonfun$transformSchema$5.apply(Pipeline.scala:311)
	at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57)
	at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66)
	at scala.collection.mutable.ArrayOps$ofRef.foldLeft(ArrayOps.scala:186)
	at org.apache.spark.ml.PipelineModel.transformSchema(Pipeline.scala:311)
	at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:74)
	at org.apache.spark.ml.PipelineModel.transform(Pipeline.scala:305)
```

This is because validateAndTransformSchema() is called both during training and scoring phases, but the checks against the training related params like loss should really be performed during training phase only, I think, please correct me if I'm missing anything :)

This issue was first reported for mleap (https://github.com/combust/mleap/issues/455) because basically when we serialize the Spark transformers for mleap, we only serialize the params that are relevant for scoring. We do have the option to de-serialize the serialized transformers back into Spark for scoring again, but in that case, we no longer have all the training params.

## How was this patch tested?
Added a unit test to check this scenario.

Please let me know if there's anything additional required, this is the first PR that I've raised in this project.

Closes #24509 from ancasarb/linear_regression_params_fix.

Authored-by: asarb <asarb@expedia.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/ml/regression/LinearRegression.scala      | 13 +++++++------
 .../spark/ml/regression/LinearRegressionSuite.scala | 12 ++++++++++++
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index cd0b7d3ff8aa..09f3f94d346b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -109,12 +109,13 @@ private[regression] trait LinearRegressionParams extends PredictorParams
       schema: StructType,
       fitting: Boolean,
       featuresDataType: DataType): StructType = {
-    if ($(loss) == Huber) {
-      require($(solver)!= Normal, "LinearRegression with huber loss doesn't support " +
-        "normal solver, please change solver to auto or l-bfgs.")
-      require($(elasticNetParam) == 0.0, "LinearRegression with huber loss only supports " +
-        s"L2 regularization, but got elasticNetParam = $getElasticNetParam.")
-
+    if (fitting) {
+      if ($(loss) == Huber) {
+        require($(solver)!= Normal, "LinearRegression with huber loss doesn't support " +
+          "normal solver, please change solver to auto or l-bfgs.")
+        require($(elasticNetParam) == 0.0, "LinearRegression with huber loss only supports " +
+          s"L2 regularization, but got elasticNetParam = $getElasticNetParam.")
+      }
     }
     super.validateAndTransformSchema(schema, fitting, featuresDataType)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index d3df0e5b4448..82d984933d81 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -187,6 +187,18 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
     assert(model.numFeatures === numFeatures)
   }
 
+  test("linear regression: can transform data with LinearRegressionModel") {
+    withClue("training related params like loss are only validated during fitting phase") {
+      val original = new LinearRegression().fit(datasetWithDenseFeature)
+
+      val deserialized = new LinearRegressionModel(uid = original.uid,
+        coefficients = original.coefficients,
+        intercept = original.intercept)
+      val output = deserialized.transform(datasetWithDenseFeature)
+      assert(output.collect().size > 0) // simple assertion to ensure no exception thrown
+    }
+  }
+
   test("linear regression: illegal params") {
     withClue("LinearRegression with huber loss only supports L2 regularization") {
       intercept[IllegalArgumentException] {

From c66ec439456c5a160e3849e23c2ce3970d4c6ec7 Mon Sep 17 00:00:00 2001
From: sandeep katta <sandeep.katta2007@gmail.com>
Date: Sat, 4 May 2019 09:02:12 +0900
Subject: [PATCH 1008/1072] [SPARK-27555][SQL] HiveSerDe should fall back to
 hadoopconf if hive.default.fileformat is not found in SQLConf

## What changes were proposed in this pull request?

SQLConf does not load hive-site.xml.So HiveSerDe should fall back to hadoopconf if  hive.default.fileformat is not found in SQLConf

## How was this patch tested?

Tested manually.
Added UT

Closes #24489 from sandeep-katta/spark-27555.

Authored-by: sandeep katta <sandeep.katta2007@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-migration-guide-upgrade.md           |  2 ++
 .../apache/spark/sql/internal/HiveSerDe.scala | 12 +++++++++-
 .../sql/hive/execution/HiveSerDeSuite.scala   | 22 ++++++++++++++++++-
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 54512ae21abf..5fe7c7c0d068 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -126,6 +126,8 @@ license: |
 
   - Since Spark 3.0, parquet logical type `TIMESTAMP_MICROS` is used by default while saving `TIMESTAMP` columns. In Spark version 2.4 and earlier, `TIMESTAMP` columns are saved as `INT96` in parquet files. To set `INT96` to `spark.sql.parquet.outputTimestampType` restores the previous behavior.
 
+  - Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`.
+
 ## Upgrading from Spark SQL 2.4 to 2.4.1
 
   - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
index bd25a6437033..4921e3ca903c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.internal
 
 import java.util.Locale
 
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.CatalogStorageFormat
 
 case class HiveSerDe(
@@ -88,7 +89,16 @@ object HiveSerDe {
   }
 
   def getDefaultStorage(conf: SQLConf): CatalogStorageFormat = {
-    val defaultStorageType = conf.getConfString("hive.default.fileformat", "textfile")
+    // To respect hive-site.xml, it peeks Hadoop configuration from existing Spark session,
+    // as an easy workaround. See SPARK-27555.
+    val defaultFormatKey = "hive.default.fileformat"
+    val defaultValue = {
+      val defaultFormatValue = "textfile"
+      SparkSession.getActiveSession.map { session =>
+        session.sessionState.newHadoopConf().get(defaultFormatKey, defaultFormatValue)
+      }.getOrElse(defaultFormatValue)
+    }
+    val defaultStorageType = conf.getConfString("hive.default.fileformat", defaultValue)
     val defaultHiveSerde = sourceToSerDe(defaultStorageType)
     CatalogStorageFormat.empty.copy(
       inputFormat = defaultHiveSerde.flatMap(_.inputFormat)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index d7752e987cb4..ed4304b9aa57 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -21,13 +21,14 @@ import java.net.URI
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils}
 import org.apache.spark.sql.execution.datasources.CreateTable
 import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper
 import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -210,4 +211,23 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte
     val e8 = intercept[IllegalArgumentException](analyzeCreateTable(v8))
     assert(e8.getMessage.contains("invalid fileFormat: 'wrong'"))
   }
+
+  test("SPARK-27555: fall back to hive-site.xml if hive.default.fileformat " +
+    "is not found in SQLConf ") {
+    val testSession = SparkSession.getActiveSession.get
+    try {
+      testSession.sparkContext.hadoopConfiguration.set("hive.default.fileformat", "parquetfile")
+      val sqlConf = new SQLConf()
+      var storageFormat = HiveSerDe.getDefaultStorage(sqlConf)
+      assert(storageFormat.serde.
+        contains("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+      // should take orc as it is present in sqlConf
+      sqlConf.setConfString("hive.default.fileformat", "orc")
+      storageFormat = HiveSerDe.getDefaultStorage(sqlConf)
+      assert(storageFormat.serde.contains("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+    }
+    finally {
+      testSession.sparkContext.hadoopConfiguration.unset("hive.default.fileformat")
+    }
+  }
 }

From 5182aa25f017fe7ef3616e8f4459296d13dbb716 Mon Sep 17 00:00:00 2001
From: Seth Fitzsimmons <seth@mojodna.net>
Date: Sat, 4 May 2019 09:13:23 +0900
Subject: [PATCH 1009/1072] [MINOR][DOCS] Correct date_trunc docs

## What changes were proposed in this pull request?

`date_trunc` argument order was flipped, phrasing was awkward.

## How was this patch tested?

Documentation-only.

Closes #24522 from mojodna/patch-2.

Authored-by: Seth Fitzsimmons <seth@mojodna.net>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../main/scala/org/apache/spark/sql/functions.scala  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index f92bf792a2d7..1abda54518fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2943,8 +2943,8 @@ object functions {
    *
    * @param date A date, timestamp or string. If a string, the data must be in a format that can be
    *             cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param format: 'year', 'yyyy', 'yy' for truncate by year,
-   *               or 'month', 'mon', 'mm' for truncate by month
+   * @param format: 'year', 'yyyy', 'yy' to truncate by year,
+   *               or 'month', 'mon', 'mm' to truncate by month
    *
    * @return A date, or null if `date` was a string that could not be cast to a date or `format`
    *         was an invalid value
@@ -2958,11 +2958,11 @@ object functions {
   /**
    * Returns timestamp truncated to the unit specified by the format.
    *
-   * For example, `date_tunc("2018-11-19 12:01:19", "year")` returns 2018-01-01 00:00:00
+   * For example, `date_trunc("year", "2018-11-19 12:01:19")` returns 2018-01-01 00:00:00
    *
-   * @param format: 'year', 'yyyy', 'yy' for truncate by year,
-   *                'month', 'mon', 'mm' for truncate by month,
-   *                'day', 'dd' for truncate by day,
+   * @param format: 'year', 'yyyy', 'yy' to truncate by year,
+   *                'month', 'mon', 'mm' to truncate by month,
+   *                'day', 'dd' to truncate by day,
    *                Other options are: 'second', 'minute', 'hour', 'week', 'month', 'quarter'
    * @param timestamp A date, timestamp or string. If a string, the data must be in a format that
    *                  can be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`

From d9bcacf94b93fe76542b5c1fd852559075ef6faa Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 4 May 2019 13:21:08 +0900
Subject: [PATCH 1010/1072] [SPARK-27629][PYSPARK] Prevent Unpickler from
 intervening each unpickling

## What changes were proposed in this pull request?

In SPARK-27612, one correctness issue was reported. When protocol 4 is used to pickle Python objects, we found that unpickled objects were wrong. A temporary fix was proposed by not using highest protocol.

It was found that Opcodes.MEMOIZE was appeared in the opcodes in protocol 4. It is suspect to this issue.

A deeper dive found that Opcodes.MEMOIZE stores objects into internal map of Unpickler object. We use single Unpickler object to unpickle serialized Python bytes. Stored objects intervenes next round of unpickling, if the map is not cleared.

We has two options:

1. Continues to reuse Unpickler, but calls its close after each unpickling.
2. Not to reuse Unpickler and create new Unpickler object in each unpickling.

This patch takes option 1.

## How was this patch tested?

Passing the test added in SPARK-27612 (#24519).

Closes #24521 from viirya/SPARK-27629.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../main/scala/org/apache/spark/api/python/SerDeUtil.scala    | 3 +++
 .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala    | 4 ++++
 python/pyspark/serializers.py                                 | 3 +--
 python/pyspark/sql/tests/test_serde.py                        | 4 ++++
 .../spark/sql/execution/python/BatchEvalPythonExec.scala      | 4 ++++
 5 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
index 01e64b6972ae..9462dfd950ba 100644
--- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
@@ -186,6 +186,9 @@ private[spark] object SerDeUtil extends Logging {
       val unpickle = new Unpickler
       iter.flatMap { row =>
         val obj = unpickle.loads(row)
+        // `Opcodes.MEMOIZE` of Protocol 4 (Python 3.4+) will store objects in internal map
+        // of `Unpickler`. This map is cleared when calling `Unpickler.close()`.
+        unpickle.close()
         if (batched) {
           obj match {
             case array: Array[Any] => array.toSeq
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 3e1bbbacffd1..322ef93473da 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -1347,6 +1347,10 @@ private[spark] abstract class SerDeBase {
       val unpickle = new Unpickler
       iter.flatMap { row =>
         val obj = unpickle.loads(row)
+        // `Opcodes.MEMOIZE` of Protocol 4 (Python 3.4+) will store objects in internal map
+        // of `Unpickler`. This map is cleared when calling `Unpickler.close()`. Pyrolite
+        // doesn't clear it up, so we manually clear it.
+        unpickle.close()
         if (batched) {
           obj match {
             case list: JArrayList[_] => list.asScala
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 531108738f6c..6058e94d471e 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -62,12 +62,11 @@
 if sys.version < '3':
     import cPickle as pickle
     from itertools import izip as zip, imap as map
-    pickle_protocol = 2
 else:
     import pickle
     basestring = unicode = str
     xrange = range
-    pickle_protocol = 3
+pickle_protocol = pickle.HIGHEST_PROTOCOL
 
 from pyspark import cloudpickle
 from pyspark.util import _exception_message
diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py
index 1c18e930eb91..ed4b9a775587 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -128,6 +128,10 @@ def test_BinaryType_serialization(self):
 
     def test_int_array_serialization(self):
         # Note that this test seems dependent on parallelism.
+        # This issue is because internal object map in Pyrolite is not cleared after op code
+        # STOP. If we use protocol 4 to pickle Python objects, op code MEMOIZE will store
+        # objects in the map. We need to clear up it to make sure next unpickling works on
+        # clear map.
         data = self.spark.sparkContext.parallelize([[1, 2, 3, 4]] * 100, numSlices=12)
         df = self.spark.createDataFrame(data, "array<integer>")
         self.assertEqual(len(list(filter(lambda r: None in r.value, df.collect()))), 0)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
index d3736d24e501..eff709ef7f72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
@@ -92,6 +92,10 @@ case class BatchEvalPythonExec(udfs: Seq[PythonUDF], output: Seq[Attribute], chi
 
     outputIterator.flatMap { pickedResult =>
       val unpickledBatch = unpickle.loads(pickedResult)
+      // `Opcodes.MEMOIZE` of Protocol 4 (Python 3.4+) will store objects in internal map
+      // of `Unpickler`. This map is cleared when calling `Unpickler.close()`. Pyrolite
+      // doesn't clear it up, so we manually clear it.
+      unpickle.close()
       unpickledBatch.asInstanceOf[java.util.ArrayList[Any]].asScala
     }.map { result =>
       if (udfs.length == 1) {

From 6001d476ce663ee6a458535431d30e8213181fcf Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Sun, 5 May 2019 21:52:23 -0700
Subject: [PATCH 1011/1072] [SPARK-27596][SQL] The JDBC 'query' option doesn't
 work for Oracle database

## What changes were proposed in this pull request?
**Description from JIRA**
For the JDBC option `query`, we use the identifier name to start with underscore: s"(${subquery}) _SPARK_GEN_JDBC_SUBQUERY_NAME${curId.getAndIncrement()}". This is not supported by Oracle.
The Oracle doesn't seem to support identifier name to start with non-alphabet character (unless it is quoted) and has length restrictions as well. [link](https://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements008.htm)

In this PR, the generated alias name 'SPARK_GEN_JDBC_SUBQUERY_NAME<int value>' is fixed to remove "_" prefix and also the alias name is shortened to not exceed the identifier length limit.

## How was this patch tested?
Tests are added for MySql, Postgress, Oracle and DB2 to ensure enough coverage.

Closes #24532 from dilipbiswal/SPARK-27596.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/jdbc/DB2IntegrationSuite.scala  | 26 +++++++++++++++++
 .../sql/jdbc/MySQLIntegrationSuite.scala      | 27 ++++++++++++++++++
 .../sql/jdbc/OracleIntegrationSuite.scala     | 28 +++++++++++++++++++
 .../sql/jdbc/PostgresIntegrationSuite.scala   | 26 +++++++++++++++++
 .../datasources/jdbc/JDBCOptions.scala        |  2 +-
 5 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index f5930bc281e8..32e56f03ee52 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -158,4 +158,30 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(rows(0).getInt(1) == 20)
     assert(rows(0).getString(2) == "1")
   }
+
+  test("query JDBC option") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+
+    val query = "SELECT x, y FROM tbl WHERE x > 10"
+    // query option to pass on the query string.
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", query)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+
+    // query option in the create table path.
+    sql(
+      s"""
+         |CREATE OR REPLACE TEMPORARY VIEW queryOption
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$jdbcUrl', query '$query')
+       """.stripMargin.replaceAll("\n", " "))
+    assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
+  }
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index a70ed98b52d5..9cd5c4ec41a5 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -21,6 +21,7 @@ import java.math.BigDecimal
 import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
 
+import org.apache.spark.sql.{Row, SaveMode}
 import org.apache.spark.tags.DockerTest
 
 @DockerTest
@@ -152,4 +153,30 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     df2.write.jdbc(jdbcUrl, "datescopy", new Properties)
     df3.write.jdbc(jdbcUrl, "stringscopy", new Properties)
   }
+
+  test("query JDBC option") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+
+    val query = "SELECT x, y FROM tbl WHERE x > 10"
+    // query option to pass on the query string.
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", query)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+
+    // query option in the create table path.
+    sql(
+      s"""
+         |CREATE OR REPLACE TEMPORARY VIEW queryOption
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$jdbcUrl', query '$query')
+       """.stripMargin.replaceAll("\n", " "))
+    assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
+  }
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index c9a541783679..64b9837cc5fa 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -485,4 +485,32 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
     }
     assert(df2.collect.toSet === expectedResult)
   }
+
+  test("query JDBC option") {
+    val expectedResult = Set(
+      (1, "1991-11-09", "1996-01-01 01:23:45")
+    ).map { case (id, date, timestamp) =>
+      Row(BigDecimal.valueOf(id), Date.valueOf(date), Timestamp.valueOf(timestamp))
+    }
+
+    val query = "SELECT id, d, t FROM datetime WHERE id = 1"
+    // query option to pass on the query string.
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", query)
+      .option("oracle.jdbc.mapDateToTimestamp", "false")
+      .load()
+    assert(df.collect.toSet === expectedResult)
+
+    // query option in the create table path.
+    sql(
+      s"""
+         |CREATE OR REPLACE TEMPORARY VIEW queryOption
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$jdbcUrl',
+         |   query '$query',
+         |   oracle.jdbc.mapDateToTimestamp false)
+       """.stripMargin.replaceAll("\n", " "))
+    assert(sql("select id, d, t from queryOption").collect.toSet == expectedResult)
+  }
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index e8d5b468df63..7caf3d6ba59f 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -21,6 +21,7 @@ import java.sql.Connection
 import java.util.Properties
 
 import org.apache.spark.sql.Column
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.types.{ArrayType, DecimalType, FloatType, ShortType}
 import org.apache.spark.tags.DockerTest
@@ -180,4 +181,29 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(rows(0).getSeq(8) == Seq("""{"a": "foo", "b": "bar"}""", """{"a": 1, "b": 2}"""))
     assert(rows(0).getSeq(9) == Seq("""{"a": 1, "b": 2, "c": 3}"""))
   }
+
+  test("query JDBC option") {
+    val expectedResult = Set(
+      (42, 123456789012345L)
+    ).map { case (c1, c3) =>
+      Row(Integer.valueOf(c1), java.lang.Long.valueOf(c3))
+    }
+
+    val query = "SELECT c1, c3 FROM bar WHERE c1 IS NOT NULL"
+    // query option to pass on the query string.
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", query)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+
+    // query option in the create table path.
+    sql(
+      s"""
+         |CREATE OR REPLACE TEMPORARY VIEW queryOption
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$jdbcUrl', query '$query')
+       """.stripMargin.replaceAll("\n", " "))
+    assert(sql("select c1, c3 from queryOption").collect.toSet == expectedResult)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index b4469cb538fa..d184f3cb71b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -87,7 +87,7 @@ class JDBCOptions(
       if (subquery.isEmpty) {
         throw new IllegalArgumentException(s"Option `$JDBC_QUERY_STRING` can not be empty.")
       } else {
-        s"(${subquery}) __SPARK_GEN_JDBC_SUBQUERY_NAME_${curId.getAndIncrement()}"
+        s"(${subquery}) SPARK_GEN_SUBQ_${curId.getAndIncrement()}"
       }
   }
 

From 4b725e50a729af0708f456e27cc95a370b66c1ef Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sun, 5 May 2019 23:19:19 -0700
Subject: [PATCH 1012/1072] [SPARK-27439][SQL] Explainging Dataset should show
 correct resolved plans

## What changes were proposed in this pull request?

Because a review is resolved during analysis when we create a dataset, the content of the view is determined when the dataset is created, not when it is evaluated. Now the explain result of a dataset is not correctly consistent with the collected result of it, because we use pre-analyzed logical plan of the dataset in explain command. The explain command will analyzed the logical plan passed in. So if a view is changed after the dataset was created, the plans shown by explain command aren't the same with the plan of the dataset.

```scala
scala> spark.range(10).createOrReplaceTempView("test")
scala> spark.range(5).createOrReplaceTempView("test2")
scala> spark.sql("select * from test").createOrReplaceTempView("tmp001")
scala> val df = spark.sql("select * from tmp001")
scala> spark.sql("select * from test2").createOrReplaceTempView("tmp001")
scala> df.show
+---+
| id|
+---+
|  0|
|  1|
|  2|
|  3|
|  4|
|  5|
|  6|
|  7|
|  8|
|  9|
+---+
scala> df.explain(true)
```

Before:
```scala
== Parsed Logical Plan ==
'Project [*]
+- 'UnresolvedRelation `tmp001`

== Analyzed Logical Plan ==
id: bigint
Project [id#2L]
+- SubqueryAlias `tmp001`
   +- Project [id#2L]
      +- SubqueryAlias `test2`
         +- Range (0, 5, step=1, splits=Some(12))

== Optimized Logical Plan ==
Range (0, 5, step=1, splits=Some(12))

== Physical Plan ==
*(1) Range (0, 5, step=1, splits=12)
```

After:
```scala
== Parsed Logical Plan ==
'Project [*]
+- 'UnresolvedRelation `tmp001`

== Analyzed Logical Plan ==
id: bigint
Project [id#0L]
+- SubqueryAlias `tmp001`
   +- Project [id#0L]
      +- SubqueryAlias `test`
         +- Range (0, 10, step=1, splits=Some(12))

== Optimized Logical Plan ==
Range (0, 10, step=1, splits=Some(12))

== Physical Plan ==
*(1) Range (0, 10, step=1, splits=12)
```

To fix it, this passes query execution of Dataset when explaining it. The query execution contains pre-analyzed plan which is consistent with Dataset's result.

## How was this patch tested?

Manually test and unit test.

Closes #24464 from viirya/SPARK-27439-2.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala  |  2 +-
 .../sql/execution/command/commands.scala      | 51 ++++++++++++++-----
 .../org/apache/spark/sql/DataFrameSuite.scala | 26 +++++++++-
 3 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 793714ff1113..5d6e5306f174 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -498,7 +498,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def explain(extended: Boolean): Unit = {
-    val explain = ExplainCommand(queryExecution.logical, extended = extended)
+    val explain = ExplainCommand(queryExecution, extended = extended)
     sparkSession.sessionState.executePlan(explain).executedPlan.executeCollect().foreach {
       // scalastyle:off println
       r => println(r.getString(0))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index a1f2785ed362..8d7b3c5c6cfb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
-import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.{LeafExecNode, QueryExecution, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.debug._
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata}
@@ -134,13 +134,13 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
  *   EXPLAIN (EXTENDED | CODEGEN) SELECT * FROM ...
  * }}}
  *
- * @param logicalPlan plan to explain
+ * @param queryExecution the query execution object of the plan to explain
  * @param extended whether to do extended explain or not
  * @param codegen whether to output generated code from whole-stage codegen or not
  * @param cost whether to show cost information for operators.
  */
 case class ExplainCommand(
-    logicalPlan: LogicalPlan,
+    queryExecution: QueryExecution,
     extended: Boolean = false,
     codegen: Boolean = false,
     cost: Boolean = false)
@@ -151,16 +151,6 @@ case class ExplainCommand(
 
   // Run through the optimizer to generate the physical plan.
   override def run(sparkSession: SparkSession): Seq[Row] = try {
-    val queryExecution =
-      if (logicalPlan.isStreaming) {
-        // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the
-        // output mode does not matter since there is no `Sink`.
-        new IncrementalExecution(
-          sparkSession, logicalPlan, OutputMode.Append(), "<unknown>",
-          UUID.randomUUID, UUID.randomUUID, 0, OffsetSeqMetadata(0, 0))
-      } else {
-        sparkSession.sessionState.executePlan(logicalPlan)
-      }
     val outputString =
       if (codegen) {
         codegenString(queryExecution.executedPlan)
@@ -177,6 +167,41 @@ case class ExplainCommand(
   }
 }
 
+object ExplainCommand {
+  /**
+   * Initializes an `ExplainCommand` object by passing `LogicalPlan`. This logical plan will be
+   * run through the analyzer and optimizer when this command is actually run.
+   */
+  def apply(
+      logicalPlan: LogicalPlan,
+      extended: Boolean,
+      codegen: Boolean,
+      cost: Boolean): ExplainCommand = {
+    val sparkSession = SparkSession.active
+
+    val queryExecution =
+      if (logicalPlan.isStreaming) {
+        // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the
+        // output mode does not matter since there is no `Sink`.
+        new IncrementalExecution(
+          sparkSession, logicalPlan, OutputMode.Append(), "<unknown>",
+          UUID.randomUUID, UUID.randomUUID, 0, OffsetSeqMetadata(0, 0))
+      } else {
+        sparkSession.sessionState.executePlan(logicalPlan)
+      }
+    new ExplainCommand(queryExecution, extended, codegen, cost)
+  }
+
+  /**
+   * This is mainly used for tests.
+   */
+  def apply(
+      logicalPlan: LogicalPlan,
+      extended: Boolean): ExplainCommand = {
+    ExplainCommand(logicalPlan, extended, codegen = false, cost = false)
+  }
+}
+
 /** An explain command for users to see how a streaming batch is executed. */
 case class StreamingExplainCommand(
     queryExecution: IncrementalExecution,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 8a9c526b6b9d..8c859f7dba5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.io.File
+import java.io.{ByteArrayOutputStream, File}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.util.UUID
@@ -2133,4 +2133,28 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       checkAnswer(res, Row("1-1", 6, 6))
     }
   }
+
+  test("SPARK-27439: Explain result should match collected result after view change") {
+    withTempView("test", "test2", "tmp") {
+      spark.range(10).createOrReplaceTempView("test")
+      spark.range(5).createOrReplaceTempView("test2")
+      spark.sql("select * from test").createOrReplaceTempView("tmp")
+      val df = spark.sql("select * from tmp")
+      spark.sql("select * from test2").createOrReplaceTempView("tmp")
+
+      val captured = new ByteArrayOutputStream()
+      Console.withOut(captured) {
+        df.explain(extended = true)
+      }
+      checkAnswer(df, spark.range(10).toDF)
+      val output = captured.toString
+      assert(output.contains(
+        """== Parsed Logical Plan ==
+          |'Project [*]
+          |+- 'UnresolvedRelation `tmp`""".stripMargin))
+      assert(output.contains(
+        """== Physical Plan ==
+          |*(1) Range (0, 10, step=1, splits=2)""".stripMargin))
+    }
+  }
 }

From 6ef45301a46c47c12fbc74bb9ceaffea685ed944 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 6 May 2019 20:41:57 +0800
Subject: [PATCH 1013/1072] [SPARK-27579][SQL] remove BaseStreamingSource and
 BaseStreamingSink

## What changes were proposed in this pull request?

`BaseStreamingSource` and `BaseStreamingSink` is used to unify v1 and v2 streaming data source API in some code paths.

This PR removes these 2 interfaces, and let the v1 API extend v2 API to keep API compatibility.

The motivation is https://github.com/apache/spark/pull/24416 . We want to move data source v2 to catalyst module, but `BaseStreamingSource` and `BaseStreamingSink` are in sql/core.

## How was this patch tested?

existing tests

Closes #24471 from cloud-fan/streaming.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/kafka010/KafkaSourceProvider.scala    |  4 +--
 .../kafka010/KafkaMicroBatchSourceSuite.scala |  5 ++--
 .../streaming/BaseStreamingSink.java          | 27 -----------------
 .../streaming/BaseStreamingSource.java        | 29 -------------------
 .../v2/reader/streaming/SparkDataStream.java  |  8 +++--
 .../datasources/noop/NoopDataSource.scala     |  3 +-
 .../streaming/MicroBatchExecution.scala       |  8 ++---
 .../sql/execution/streaming/OffsetSeq.scala   |  3 +-
 .../streaming/ProgressReporter.scala          | 21 +++++++-------
 .../spark/sql/execution/streaming/Sink.scala  | 21 +++++++++++++-
 .../sql/execution/streaming/Source.scala      | 20 +++++++++++--
 .../execution/streaming/StreamExecution.scala |  9 +++---
 .../execution/streaming/StreamProgress.scala  | 20 +++++++------
 .../streaming/StreamingRelation.scala         |  3 +-
 .../sql/execution/streaming/console.scala     |  2 +-
 .../continuous/ContinuousExecution.scala      |  2 +-
 .../sql/execution/streaming/memory.scala      | 24 ++++++++-------
 .../sources/ForeachWriterTable.scala          |  3 +-
 .../execution/streaming/sources/memory.scala  |  4 +--
 .../sql/streaming/DataStreamWriter.scala      |  4 +--
 .../sql/streaming/StreamingQueryManager.scala |  6 ++--
 .../sources/RateStreamProviderSuite.scala     |  4 +--
 .../sources/TextSocketStreamSuite.scala       |  4 +--
 .../spark/sql/streaming/StreamTest.scala      |  5 ++--
 .../sources/StreamingDataSourceV2Suite.scala  |  6 ++--
 25 files changed, 118 insertions(+), 127 deletions(-)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index bb76a30b3881..c27382de2477 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -29,7 +29,7 @@ import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySe
 import org.apache.spark.internal.Logging
 import org.apache.spark.kafka010.KafkaConfigUpdater
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
-import org.apache.spark.sql.execution.streaming.{BaseStreamingSink, Sink, Source}
+import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.TableCapability._
@@ -354,7 +354,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
   }
 
   class KafkaTable(strategy: => ConsumerStrategy) extends Table
-    with SupportsRead with SupportsWrite with BaseStreamingSink {
+    with SupportsRead with SupportsWrite {
 
     override def name(): String = s"Kafka $strategy"
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index b98f8e97db2e..672c3b556336 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
+import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.streaming.{StreamTest, Trigger}
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSQLContext
@@ -94,7 +95,7 @@ abstract class KafkaSourceTest extends StreamTest with SharedSQLContext with Kaf
       message: String = "",
       topicAction: (String, Option[Int]) => Unit = (_, _) => {}) extends AddData {
 
-    override def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset) = {
+    override def addData(query: Option[StreamExecution]): (SparkDataStream, Offset) = {
       query match {
         // Make sure no Spark job is running when deleting a topic
         case Some(m: MicroBatchExecution) => m.processAllAvailable()
@@ -114,7 +115,7 @@ abstract class KafkaSourceTest extends StreamTest with SharedSQLContext with Kaf
         query.nonEmpty,
         "Cannot add data when there is no query for finding the active kafka source")
 
-      val sources: Seq[BaseStreamingSource] = {
+      val sources: Seq[SparkDataStream] = {
         query.get.logicalPlan.collect {
           case StreamingExecutionRelation(source: KafkaSource, _) => source
           case r: StreamingDataSourceV2Relation if r.stream.isInstanceOf[KafkaMicroBatchStream] ||
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java
deleted file mode 100644
index ac96c2765368..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming;
-
-/**
- * The shared interface between V1 and V2 streaming sinks.
- *
- * This is a temporary interface for compatibility during migration. It should not be implemented
- * directly, and will be removed in future versions.
- */
-public interface BaseStreamingSink {
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java
deleted file mode 100644
index c44b8af2552f..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming;
-
-/**
- * The shared interface between V1 streaming sources and V2 streaming readers.
- *
- * This is a temporary interface for compatibility during migration. It should not be implemented
- * directly, and will be removed in future versions.
- */
-public interface BaseStreamingSource {
-  /** Stop this source and free any resources it has allocated. */
-  void stop();
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
index 30f38ce37c40..2068a84fc6bb 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.sources.v2.reader.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.execution.streaming.BaseStreamingSource;
 
 /**
  * The base interface representing a readable data stream in a Spark streaming query. It's
@@ -28,7 +27,7 @@
  * {@link MicroBatchStream} and {@link ContinuousStream}.
  */
 @Evolving
-public interface SparkDataStream extends BaseStreamingSource {
+public interface SparkDataStream {
 
   /**
    * Returns the initial offset for a streaming query to start reading from. Note that the
@@ -50,4 +49,9 @@ public interface SparkDataStream extends BaseStreamingSource {
    * equal to `end` and will only request offsets greater than `end` in the future.
    */
   void commit(Offset end);
+
+  /**
+   * Stop this source and free any resources it has allocated.
+   */
+  void stop();
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index c8b2f65ca62e..e91e2b48db48 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -23,7 +23,6 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.streaming.BaseStreamingSink
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.writer._
@@ -40,7 +39,7 @@ class NoopDataSource extends TableProvider with DataSourceRegister {
   override def getTable(options: CaseInsensitiveStringMap): Table = NoopTable
 }
 
-private[noop] object NoopTable extends Table with SupportsWrite with BaseStreamingSink {
+private[noop] object NoopTable extends Table with SupportsWrite {
   override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = NoopWriteBuilder
   override def name(): String = "noop-table"
   override def schema(): StructType = new StructType()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index d9fe836b1c49..58c265d0a850 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relat
 import org.apache.spark.sql.execution.streaming.sources.{RateControlMicroBatchStream, WriteToMicroBatchDataSource}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2}
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
 import org.apache.spark.util.Clock
 
@@ -38,7 +38,7 @@ class MicroBatchExecution(
     name: String,
     checkpointRoot: String,
     analyzedPlan: LogicalPlan,
-    sink: BaseStreamingSink,
+    sink: Table,
     trigger: Trigger,
     triggerClock: Clock,
     outputMode: OutputMode,
@@ -48,7 +48,7 @@ class MicroBatchExecution(
     sparkSession, name, checkpointRoot, analyzedPlan, sink,
     trigger, triggerClock, outputMode, deleteCheckpointOnStop) {
 
-  @volatile protected var sources: Seq[BaseStreamingSource] = Seq.empty
+  @volatile protected var sources: Seq[SparkDataStream] = Seq.empty
 
   private val triggerExecutor = trigger match {
     case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock)
@@ -354,7 +354,7 @@ class MicroBatchExecution(
     if (isCurrentBatchConstructed) return true
 
     // Generate a map from each unique source to the next available offset.
-    val latestOffsets: Map[BaseStreamingSource, Option[Offset]] = uniqueSources.map {
+    val latestOffsets: Map[SparkDataStream, Option[Offset]] = uniqueSources.map {
       case s: Source =>
         updateStatusMessage(s"Getting offsets from $s")
         reportTimeTaken("getOffset") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index 73cf355dbe75..0f7ad7517e8f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -24,6 +24,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.RuntimeConfig
 import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, StreamingAggregationStateManager}
 import org.apache.spark.sql.internal.SQLConf.{FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, _}
+import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 
 /**
  * An ordered collection of offsets, used to track the progress of processing data from one or more
@@ -39,7 +40,7 @@ case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[OffsetSeqMet
    * This method is typically used to associate a serialized offset with actual sources (which
    * cannot be serialized).
    */
-  def toStreamProgress(sources: Seq[BaseStreamingSource]): StreamProgress = {
+  def toStreamProgress(sources: Seq[SparkDataStream]): StreamProgress = {
     assert(sources.size == offsets.size, s"There are [${offsets.size}] sources in the " +
       s"checkpoint offsets and now there are [${sources.size}] sources requested by the query. " +
       s"Cannot continue.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 859c327d757d..6cb75083d0c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -29,7 +29,8 @@ import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalP
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.{MicroBatchScanExec, StreamingDataSourceV2Relation, StreamWriterCommitProgress}
-import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream
+import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, SparkDataStream}
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
 import org.apache.spark.util.Clock
@@ -44,7 +45,7 @@ import org.apache.spark.util.Clock
 trait ProgressReporter extends Logging {
 
   case class ExecutionStats(
-    inputRows: Map[BaseStreamingSource, Long],
+    inputRows: Map[SparkDataStream, Long],
     stateOperators: Seq[StateOperatorProgress],
     eventTimeStats: Map[String, String])
 
@@ -55,10 +56,10 @@ trait ProgressReporter extends Logging {
   protected def triggerClock: Clock
   protected def logicalPlan: LogicalPlan
   protected def lastExecution: QueryExecution
-  protected def newData: Map[BaseStreamingSource, LogicalPlan]
+  protected def newData: Map[SparkDataStream, LogicalPlan]
   protected def sinkCommitProgress: Option[StreamWriterCommitProgress]
-  protected def sources: Seq[BaseStreamingSource]
-  protected def sink: BaseStreamingSink
+  protected def sources: Seq[SparkDataStream]
+  protected def sink: Table
   protected def offsetSeqMetadata: OffsetSeqMetadata
   protected def currentBatchId: Long
   protected def sparkSession: SparkSession
@@ -67,8 +68,8 @@ trait ProgressReporter extends Logging {
   // Local timestamps and counters.
   private var currentTriggerStartTimestamp = -1L
   private var currentTriggerEndTimestamp = -1L
-  private var currentTriggerStartOffsets: Map[BaseStreamingSource, String] = _
-  private var currentTriggerEndOffsets: Map[BaseStreamingSource, String] = _
+  private var currentTriggerStartOffsets: Map[SparkDataStream, String] = _
+  private var currentTriggerEndOffsets: Map[SparkDataStream, String] = _
   // TODO: Restore this from the checkpoint when possible.
   private var lastTriggerStartTimestamp = -1L
 
@@ -240,9 +241,9 @@ trait ProgressReporter extends Logging {
   }
 
   /** Extract number of input sources for each streaming source in plan */
-  private def extractSourceToNumInputRows(): Map[BaseStreamingSource, Long] = {
+  private def extractSourceToNumInputRows(): Map[SparkDataStream, Long] = {
 
-    def sumRows(tuples: Seq[(BaseStreamingSource, Long)]): Map[BaseStreamingSource, Long] = {
+    def sumRows(tuples: Seq[(SparkDataStream, Long)]): Map[SparkDataStream, Long] = {
       tuples.groupBy(_._1).mapValues(_.map(_._2).sum) // sum up rows for each source
     }
 
@@ -262,7 +263,7 @@ trait ProgressReporter extends Logging {
       val sourceToInputRowsTuples = lastExecution.executedPlan.collect {
         case s: MicroBatchScanExec =>
           val numRows = s.metrics.get("numOutputRows").map(_.value).getOrElse(0L)
-          val source = s.stream.asInstanceOf[BaseStreamingSource]
+          val source = s.stream
           source -> numRows
       }
       logDebug("Source -> # input rows\n\t" + sourceToInputRowsTuples.mkString("\n\t"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
index 34bc085d920c..190325fb7ec2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
@@ -17,14 +17,21 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.util
+
 import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.sources.v2.{Table, TableCapability}
+import org.apache.spark.sql.types.StructType
 
 /**
  * An interface for systems that can collect the results of a streaming query. In order to preserve
  * exactly once semantics a sink must be idempotent in the face of multiple attempts to add the same
  * batch.
+ *
+ * Note that, we extends `Table` here, to make the v1 streaming sink API be compatible with
+ * data source v2.
  */
-trait Sink extends BaseStreamingSink {
+trait Sink extends Table {
 
   /**
    * Adds a batch of data to this sink. The data for a given `batchId` is deterministic and if
@@ -38,4 +45,16 @@ trait Sink extends BaseStreamingSink {
    * after data is consumed by sink successfully.
    */
   def addBatch(batchId: Long, data: DataFrame): Unit
+
+  override def name: String = {
+    throw new IllegalStateException("should not be called.")
+  }
+
+  override def schema: StructType = {
+    throw new IllegalStateException("should not be called.")
+  }
+
+  override def capabilities: util.Set[TableCapability] = {
+    throw new IllegalStateException("should not be called.")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index dbbd59e06909..7f66d0b055cc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -18,14 +18,19 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
+import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.types.StructType
 
 /**
  * A source of continually arriving data for a streaming query. A [[Source]] must have a
  * monotonically increasing notion of progress that can be represented as an [[Offset]]. Spark
  * will regularly query each [[Source]] to see if any more data is available.
+ *
+ * Note that, we extends `SparkDataStream` here, to make the v1 streaming source API be compatible
+ * with data source v2.
  */
-trait Source extends BaseStreamingSource {
+trait Source extends SparkDataStream {
 
   /** Returns the schema of the data from this source */
   def schema: StructType
@@ -62,6 +67,15 @@ trait Source extends BaseStreamingSource {
    */
   def commit(end: Offset) : Unit = {}
 
-  /** Stop this source and free any resources it has allocated. */
-  def stop(): Unit
+  override def initialOffset(): OffsetV2 = {
+    throw new IllegalStateException("should not be called.")
+  }
+
+  override def deserializeOffset(json: String): OffsetV2 = {
+    throw new IllegalStateException("should not be called.")
+  }
+
+  override def commit(end: OffsetV2): Unit = {
+    throw new IllegalStateException("should not be called.")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index fd959619650e..5d66b61ae711 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -40,7 +40,8 @@ import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.SupportsWrite
+import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
+import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.sources.v2.writer.SupportsTruncate
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming._
@@ -69,7 +70,7 @@ abstract class StreamExecution(
     override val name: String,
     private val checkpointRoot: String,
     analyzedPlan: LogicalPlan,
-    val sink: BaseStreamingSink,
+    val sink: Table,
     val trigger: Trigger,
     val triggerClock: Clock,
     val outputMode: OutputMode,
@@ -205,7 +206,7 @@ abstract class StreamExecution(
   /**
    * A list of unique sources in the query plan. This will be set when generating logical plan.
    */
-  @volatile protected var uniqueSources: Seq[BaseStreamingSource] = Seq.empty
+  @volatile protected var uniqueSources: Seq[SparkDataStream] = Seq.empty
 
   /** Defines the internal state of execution */
   protected val state = new AtomicReference[State](INITIALIZING)
@@ -214,7 +215,7 @@ abstract class StreamExecution(
   var lastExecution: IncrementalExecution = _
 
   /** Holds the most recent input data for each source. */
-  protected var newData: Map[BaseStreamingSource, LogicalPlan] = _
+  protected var newData: Map[SparkDataStream, LogicalPlan] = _
 
   @volatile
   protected var streamDeathCause: StreamingQueryException = null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
index 8531070b1bc4..8a1d064f49d1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -19,32 +19,34 @@ package org.apache.spark.sql.execution.streaming
 
 import scala.collection.{immutable, GenTraversableOnce}
 
+import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
+
 /**
  * A helper class that looks like a Map[Source, Offset].
  */
 class StreamProgress(
-    val baseMap: immutable.Map[BaseStreamingSource, Offset] =
-        new immutable.HashMap[BaseStreamingSource, Offset])
-  extends scala.collection.immutable.Map[BaseStreamingSource, Offset] {
+    val baseMap: immutable.Map[SparkDataStream, Offset] =
+        new immutable.HashMap[SparkDataStream, Offset])
+  extends scala.collection.immutable.Map[SparkDataStream, Offset] {
 
-  def toOffsetSeq(source: Seq[BaseStreamingSource], metadata: OffsetSeqMetadata): OffsetSeq = {
+  def toOffsetSeq(source: Seq[SparkDataStream], metadata: OffsetSeqMetadata): OffsetSeq = {
     OffsetSeq(source.map(get), Some(metadata))
   }
 
   override def toString: String =
     baseMap.map { case (k, v) => s"$k: $v"}.mkString("{", ",", "}")
 
-  override def +[B1 >: Offset](kv: (BaseStreamingSource, B1)): Map[BaseStreamingSource, B1] = {
+  override def +[B1 >: Offset](kv: (SparkDataStream, B1)): Map[SparkDataStream, B1] = {
     baseMap + kv
   }
 
-  override def get(key: BaseStreamingSource): Option[Offset] = baseMap.get(key)
+  override def get(key: SparkDataStream): Option[Offset] = baseMap.get(key)
 
-  override def iterator: Iterator[(BaseStreamingSource, Offset)] = baseMap.iterator
+  override def iterator: Iterator[(SparkDataStream, Offset)] = baseMap.iterator
 
-  override def -(key: BaseStreamingSource): Map[BaseStreamingSource, Offset] = baseMap - key
+  override def -(key: SparkDataStream): Map[SparkDataStream, Offset] = baseMap - key
 
-  def ++(updates: GenTraversableOnce[(BaseStreamingSource, Offset)]): StreamProgress = {
+  def ++(updates: GenTraversableOnce[(SparkDataStream, Offset)]): StreamProgress = {
     new StreamProgress(baseMap ++ updates)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 0d7e9ba363d0..142b6e7d1806 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Stati
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.sources.v2.{Table, TableProvider}
+import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object StreamingRelation {
@@ -63,7 +64,7 @@ case class StreamingRelation(dataSource: DataSource, sourceName: String, output:
  * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]].
  */
 case class StreamingExecutionRelation(
-    source: BaseStreamingSource,
+    source: SparkDataStream,
     output: Seq[Attribute])(session: SparkSession)
   extends LeafNode with MultiInstanceRelation {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index c7161d311c02..9ae39c79c515 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -61,7 +61,7 @@ class ConsoleSinkProvider extends TableProvider
   def shortName(): String = "console"
 }
 
-object ConsoleTable extends Table with SupportsWrite with BaseStreamingSink {
+object ConsoleTable extends Table with SupportsWrite {
 
   override def name(): String = "console"
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index a1fb212fd433..82708a331b0e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -49,7 +49,7 @@ class ContinuousExecution(
     extraOptions: Map[String, String],
     deleteCheckpointOnStop: Boolean)
   extends StreamExecution(
-    sparkSession, name, checkpointRoot, analyzedPlan, sink.asInstanceOf[BaseStreamingSink],
+    sparkSession, name, checkpointRoot, analyzedPlan, sink,
     trigger, triggerClock, outputMode, deleteCheckpointOnStop) {
 
   @volatile protected var sources: Seq[ContinuousStream] = Seq()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 6efde0a27efe..022c8da0c074 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2}
+import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -49,7 +49,7 @@ object MemoryStream {
 /**
  * A base class for memory stream implementations. Supports adding data and resetting.
  */
-abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends BaseStreamingSource {
+abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends SparkDataStream {
   val encoder = encoderFor[A]
   protected val attributes = encoder.schema.toAttributes
 
@@ -78,6 +78,18 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Bas
   }
 
   def addData(data: TraversableOnce[A]): Offset
+
+  override def initialOffset(): OffsetV2 = {
+    throw new IllegalStateException("should not be called.")
+  }
+
+  override def deserializeOffset(json: String): OffsetV2 = {
+    throw new IllegalStateException("should not be called.")
+  }
+
+  override def commit(end: OffsetV2): Unit = {
+    throw new IllegalStateException("should not be called.")
+  }
 }
 
 // This class is used to indicate the memory stream data source. We don't actually use it, as
@@ -264,11 +276,3 @@ object MemoryStreamReaderFactory extends PartitionReaderFactory {
     }
   }
 }
-
-/** A common trait for MemorySinks with methods used for testing */
-trait MemorySinkBase extends BaseStreamingSink {
-  def allData: Seq[Row]
-  def latestBatchData: Seq[Row]
-  def dataSinceBatch(sinceBatchId: Long): Seq[Row]
-  def latestBatchId: Option[Long]
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index 838ede6c563f..6da1b3a49c44 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.python.PythonForeachWriter
-import org.apache.spark.sql.execution.streaming.BaseStreamingSink
 import org.apache.spark.sql.sources.v2.{SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.sources.v2.writer.{DataWriter, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
@@ -44,7 +43,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 case class ForeachWriterTable[T](
     writer: ForeachWriter[T],
     converter: Either[ExpressionEncoder[T], InternalRow => T])
-  extends Table with SupportsWrite with BaseStreamingSink {
+  extends Table with SupportsWrite {
 
   override def name(): String = "ForeachSink"
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
index 9008c63491cb..de8d00d4ac34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
-import org.apache.spark.sql.execution.streaming.{MemorySinkBase, Sink}
+import org.apache.spark.sql.execution.streaming.Sink
 import org.apache.spark.sql.sources.v2.{SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
@@ -43,7 +43,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
  * tests and does not provide durability.
  */
-class MemorySink extends Table with SupportsWrite with MemorySinkBase with Logging {
+class MemorySink extends Table with SupportsWrite with Logging {
 
   override def name(): String = "MemorySink"
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 2f12efe04c50..d051cf9c1d4a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -311,7 +311,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
         provider.getTable(dsOptions) match {
           case table: SupportsWrite if table.supports(STREAMING_WRITE) =>
-            table.asInstanceOf[BaseStreamingSink]
+            table
           case _ => createV1Sink()
         }
       } else {
@@ -331,7 +331,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     }
   }
 
-  private def createV1Sink(): BaseStreamingSink = {
+  private def createV1Sink(): Sink = {
     val ds = DataSource(
       df.sparkSession,
       className = source,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 5a08049ab55c..1705d5624409 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution,
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.STREAMING_QUERY_LISTENERS
-import org.apache.spark.sql.sources.v2.SupportsWrite
+import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
 import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
@@ -209,7 +209,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       userSpecifiedCheckpointLocation: Option[String],
       df: DataFrame,
       extraOptions: Map[String, String],
-      sink: BaseStreamingSink,
+      sink: Table,
       outputMode: OutputMode,
       useTempCheckpointLocation: Boolean,
       recoverFromCheckpointLocation: Boolean,
@@ -315,7 +315,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       userSpecifiedCheckpointLocation: Option[String],
       df: DataFrame,
       extraOptions: Map[String, String],
-      sink: BaseStreamingSink,
+      sink: Table,
       outputMode: OutputMode,
       useTempCheckpointLocation: Boolean = false,
       recoverFromCheckpointLocation: Boolean = true,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index c04f6e3f255c..883201b5fb47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relati
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.sources.v2.reader.streaming.Offset
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ManualClock
@@ -39,7 +39,7 @@ class RateStreamProviderSuite extends StreamTest {
   import testImplicits._
 
   case class AdvanceRateManualClock(seconds: Long) extends AddData {
-    override def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset) = {
+    override def addData(query: Option[StreamExecution]): (SparkDataStream, Offset) = {
       assert(query.nonEmpty)
       val rateSource = query.get.logicalPlan.collect {
         case r: StreamingDataSourceV2Relation
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index 22f84376ecb5..fd3c31fbbacf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relati
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.streaming.Offset
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.streaming.{StreamingQueryException, StreamTest}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
@@ -55,7 +55,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before
   private var serverThread: ServerThread = null
 
   case class AddSocketData(data: String*) extends AddData {
-    override def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset) = {
+    override def addData(query: Option[StreamExecution]): (SparkDataStream, Offset) = {
       require(
         query.nonEmpty,
         "Cannot add data when there is no query for finding the active socket source")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 900098a5ef61..89c62ba9fc82 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -42,6 +42,7 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch}
 import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.execution.streaming.state.StateStore
+import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.{Clock, SystemClock, Utils}
@@ -123,7 +124,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
      * the active query, and then return the source object the data was added, as well as the
      * offset of added data.
      */
-    def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset)
+    def addData(query: Option[StreamExecution]): (SparkDataStream, Offset)
   }
 
   /** A trait that can be extended when testing a source. */
@@ -134,7 +135,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
   case class AddDataMemory[A](source: MemoryStreamBase[A], data: Seq[A]) extends AddData {
     override def toString: String = s"AddData to $source: ${data.mkString(",")}"
 
-    override def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset) = {
+    override def addData(query: Option[StreamExecution]): (SparkDataStream, Offset) = {
       (source, source.addData(data))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 25a68e4f9a57..7b2c1a56e8ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -22,9 +22,9 @@ import java.util.Collections
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext}
+import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.execution.streaming.{BaseStreamingSink, RateStreamOffset, Sink, StreamingQueryWrapper}
+import org.apache.spark.sql.execution.streaming.{RateStreamOffset, Sink, StreamingQueryWrapper}
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
@@ -80,7 +80,7 @@ class FakeWriteBuilder extends WriteBuilder with StreamingWrite {
   }
 }
 
-trait FakeStreamingWriteTable extends Table with SupportsWrite with BaseStreamingSink {
+trait FakeStreamingWriteTable extends Table with SupportsWrite {
   override def name(): String = "fake"
   override def schema(): StructType = StructType(Seq())
   override def capabilities(): util.Set[TableCapability] = {

From eec1a3c2862955bf2620d4cc5116fbd86e29952e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tibor=20Cs=C3=B6g=C3=B6r?= <tibi@tiborius.net>
Date: Mon, 6 May 2019 10:00:49 -0700
Subject: [PATCH 1014/1072] [SPARK-23299][SQL][PYSPARK] Fix __repr__ behaviour
 for Rows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is PR is meant to replace #20503, which lay dormant for a while.  The solution in the original PR is still valid, so this is just that patch rebased onto the current master.

Original summary follows.

## What changes were proposed in this pull request?

Fix `__repr__` behaviour for Rows.

Rows `__repr__` assumes data is a string when column name is missing.
Examples,

```
>>> from pyspark.sql.types import Row
>>> Row ("Alice", "11")
<Row(Alice, 11)>

>>> Row (name="Alice", age=11)
Row(age=11, name='Alice')

>>> Row ("Alice", 11)
<snip stack trace>
TypeError: sequence item 1: expected string, int found
```

This is because Row () when called without column names assumes everything is a string.

## How was this patch tested?

Manually tested and a unit test was added to `python/pyspark/sql/tests/test_types.py`.

Closes #24448 from tbcs/SPARK-23299.

Lead-authored-by: Tibor Csögör <tibi@tiborius.net>
Co-authored-by: Shashwat Anand <me@shashwat.me>
Signed-off-by: Holden Karau <holden@pigscanfly.ca>
---
 python/pyspark/sql/tests/test_types.py | 12 ++++++++++++
 python/pyspark/sql/types.py            | 15 +++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 3afb88c4351f..bb96828f5fb6 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -739,6 +740,17 @@ def test_timestamp_microsecond(self):
         tst = TimestampType()
         self.assertEqual(tst.toInternal(datetime.datetime.max) % 1000000, 999999)
 
+    # regression test for SPARK-23299
+    def test_row_without_column_name(self):
+        row = Row("Alice", 11)
+        self.assertEqual(repr(row), "<Row('Alice', 11)>")
+
+        # test __repr__ with unicode values
+        if sys.version_info.major >= 3:
+            self.assertEqual(repr(Row("数", "量")), "<Row('数', '量')>")
+        else:
+            self.assertEqual(repr(Row(u"数", u"量")), r"<Row(u'\u6570', u'\u91cf')>")
+
     def test_empty_row(self):
         row = Row()
         self.assertEqual(len(row), 0)
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 72c437a499a9..f9b12f15117d 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1435,13 +1435,24 @@ class Row(tuple):
 
     >>> Person = Row("name", "age")
     >>> Person
-    <Row(name, age)>
+    <Row('name', 'age')>
     >>> 'name' in Person
     True
     >>> 'wrong_key' in Person
     False
     >>> Person("Alice", 11)
     Row(name='Alice', age=11)
+
+    This form can also be used to create rows as tuple values, i.e. with unnamed
+    fields. Beware that such Row objects have different equality semantics:
+
+    >>> row1 = Row("Alice", 11)
+    >>> row2 = Row(name="Alice", age=11)
+    >>> row1 == row2
+    False
+    >>> row3 = Row(a="Alice", b=11)
+    >>> row1 == row3
+    True
     """
 
     def __new__(self, *args, **kwargs):
@@ -1549,7 +1560,7 @@ def __repr__(self):
             return "Row(%s)" % ", ".join("%s=%r" % (k, v)
                                          for k, v in zip(self.__fields__, tuple(self)))
         else:
-            return "<Row(%s)>" % ", ".join(self)
+            return "<Row(%s)>" % ", ".join("%r" % field for field in self)
 
 
 class DateConverter(object):

From d5308cd86fff1e4bf9c24e0dd73d8d2c92737c4f Mon Sep 17 00:00:00 2001
From: Shaochen Shi <shishaochen@bytedance.com>
Date: Tue, 7 May 2019 08:41:58 -0500
Subject: [PATCH 1015/1072] [SPARK-27577][MLLIB] Correct thresholds downsampled
 in BinaryClassificationMetrics

## What changes were proposed in this pull request?

Choose the last record in chunks when calculating metrics with downsampling in `BinaryClassificationMetrics`.

## How was this patch tested?

A new unit test is added to verify thresholds from downsampled records.

Closes #24470 from shishaochen/spark-mllib-binary-metrics.

Authored-by: Shaochen Shi <shishaochen@bytedance.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../evaluation/BinaryClassificationMetrics.scala      | 11 +++++++----
 .../evaluation/BinaryClassificationMetricsSuite.scala | 11 +++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index cc89edc286a3..d34a7ca6c9c7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -194,12 +194,15 @@ class BinaryClassificationMetrics @Since("3.0.0") (
             grouping = Int.MaxValue
           }
           counts.mapPartitions(_.grouped(grouping.toInt).map { pairs =>
-            // The score of the combined point will be just the first one's score
-            val firstScore = pairs.head._1
-            // The point will contain all counts in this chunk
+            // The score of the combined point will be just the last one's score, which is also
+            // the minimal in each chunk since all scores are already sorted in descending.
+            val lastScore = pairs.last._1
+            // The combined point will contain all counts in this chunk. Thus, calculated
+            // metrics (like precision, recall, etc.) on its score (or so-called threshold) are
+            // the same as those without sampling.
             val agg = new BinaryLabelCounter()
             pairs.foreach(pair => agg += pair._2)
-            (firstScore, agg)
+            (lastScore, agg)
           })
         }
       }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
index 06a522f43482..96c1f220f673 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
@@ -183,6 +183,17 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
         (1.0, 1.0), (1.0, 1.0)
       ) ==
       downsampledROC)
+
+    val downsampledRecall = downsampled.recallByThreshold().collect().sorted.toList
+    assert(
+      // May have to add 1 if the sample factor didn't divide evenly
+      numBins + (if (scoreAndLabels.size % numBins == 0) 0 else 1) ==
+      downsampledRecall.size)
+    assert(
+      List(
+        (0.1, 1.0), (0.2, 1.0), (0.4, 0.75), (0.6, 0.75), (0.8, 0.25)
+      ) ==
+      downsampledRecall)
   }
 
 }

From d124ce9c7ea537c59a776c05977c9f918d38febc Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 7 May 2019 12:02:08 -0500
Subject: [PATCH 1016/1072] [SPARK-27590][CORE] do not consider skipped tasks
 when scheduling speculative tasks

## What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/24375

When `TaskSetManager` skips a task because its corresponding partition is already completed by other `TaskSetManager`s, we should not consider the duration of the task that is finished by other `TaskSetManager`s to schedule the speculative tasks of this `TaskSetManager`.

## How was this patch tested?

updated test case

Closes #24485 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../apache/spark/scheduler/DAGScheduler.scala |  3 +--
 .../spark/scheduler/TaskResultGetter.scala    |  5 ++--
 .../spark/scheduler/TaskScheduler.scala       |  2 +-
 .../spark/scheduler/TaskSchedulerImpl.scala   | 12 ++++------
 .../spark/scheduler/TaskSetManager.scala      | 24 +++++++++----------
 .../spark/scheduler/DAGSchedulerSuite.scala   |  6 ++---
 .../ExternalClusterManagerSuite.scala         |  3 +--
 .../spark/scheduler/TaskSetManagerSuite.scala |  4 ++--
 8 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index b817eb69cc2f..1d4972edfc4d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1394,8 +1394,7 @@ private[spark] class DAGScheduler(
         // finished. Here we notify the task scheduler to skip running tasks for the same partition,
         // to save resource.
         if (task.stageAttemptId < stage.latestInfo.attemptNumber()) {
-          taskScheduler.notifyPartitionCompletion(
-            stageId, task.partitionId, event.taskInfo.duration)
+          taskScheduler.notifyPartitionCompletion(stageId, task.partitionId)
         }
 
         task match {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 09c4d9b5bce0..9b7f901c55e0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -158,10 +158,9 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
   // This method calls `TaskSchedulerImpl.handlePartitionCompleted` asynchronously. We do not want
   // DAGScheduler to call `TaskSchedulerImpl.handlePartitionCompleted` directly, as it's
   // synchronized and may hurt the throughput of the scheduler.
-  def enqueuePartitionCompletionNotification(
-      stageId: Int, partitionId: Int, taskDuration: Long): Unit = {
+  def enqueuePartitionCompletionNotification(stageId: Int, partitionId: Int): Unit = {
     getTaskResultExecutor.execute(() => Utils.logUncaughtExceptions {
-      scheduler.handlePartitionCompleted(stageId, partitionId, taskDuration)
+      scheduler.handlePartitionCompleted(stageId, partitionId)
     })
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 1862e1682427..bfdbf0217210 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -70,7 +70,7 @@ private[spark] trait TaskScheduler {
 
   // Notify the corresponding `TaskSetManager`s of the stage, that a partition has already completed
   // and they can skip running tasks for it.
-  def notifyPartitionCompletion(stageId: Int, partitionId: Int, taskDuration: Long)
+  def notifyPartitionCompletion(stageId: Int, partitionId: Int)
 
   // Set the DAG scheduler for upcalls. This is guaranteed to be set before submitTasks is called.
   def setDAGScheduler(dagScheduler: DAGScheduler): Unit
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 7e820c32fa78..532eb322769a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -301,9 +301,8 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def notifyPartitionCompletion(
-      stageId: Int, partitionId: Int, taskDuration: Long): Unit = {
-    taskResultGetter.enqueuePartitionCompletionNotification(stageId, partitionId, taskDuration)
+  override def notifyPartitionCompletion(stageId: Int, partitionId: Int): Unit = {
+    taskResultGetter.enqueuePartitionCompletionNotification(stageId, partitionId)
   }
 
   /**
@@ -651,12 +650,9 @@ private[spark] class TaskSchedulerImpl(
    * means that a task completion from an earlier zombie attempt can lead to the entire stage
    * getting marked as successful.
    */
-  private[scheduler] def handlePartitionCompleted(
-      stageId: Int,
-      partitionId: Int,
-      taskDuration: Long) = synchronized {
+  private[scheduler] def handlePartitionCompleted(stageId: Int, partitionId: Int) = synchronized {
     taskSetsByStageIdAndAttempt.get(stageId).foreach(_.values.filter(!_.isZombie).foreach { tsm =>
-      tsm.markPartitionCompleted(partitionId, taskDuration)
+      tsm.markPartitionCompleted(partitionId)
     })
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index b3aa81453750..52323b3331d7 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -62,14 +62,8 @@ private[spark] class TaskSetManager(
   private val addedJars = HashMap[String, Long](sched.sc.addedJars.toSeq: _*)
   private val addedFiles = HashMap[String, Long](sched.sc.addedFiles.toSeq: _*)
 
-  // Quantile of tasks at which to start speculation
-  val speculationQuantile = conf.get(SPECULATION_QUANTILE)
-  val speculationMultiplier = conf.get(SPECULATION_MULTIPLIER)
-
   val maxResultSize = conf.get(config.MAX_RESULT_SIZE)
 
-  val speculationEnabled = conf.get(SPECULATION_ENABLED)
-
   // Serializer for closures and tasks.
   val env = SparkEnv.get
   val ser = env.closureSerializer.newInstance()
@@ -80,6 +74,12 @@ private[spark] class TaskSetManager(
   val numTasks = tasks.length
   val copiesRunning = new Array[Int](numTasks)
 
+  val speculationEnabled = conf.get(SPECULATION_ENABLED)
+  // Quantile of tasks at which to start speculation
+  val speculationQuantile = conf.get(SPECULATION_QUANTILE)
+  val speculationMultiplier = conf.get(SPECULATION_MULTIPLIER)
+  val minFinishedForSpeculation = math.max((speculationQuantile * numTasks).floor.toInt, 1)
+
   // For each task, tracks whether a copy of the task has succeeded. A task will also be
   // marked as "succeeded" if it failed with a fetch failure, in which case it should not
   // be re-run because the missing map data needs to be regenerated first.
@@ -816,12 +816,9 @@ private[spark] class TaskSetManager(
     maybeFinishTaskSet()
   }
 
-  private[scheduler] def markPartitionCompleted(partitionId: Int, taskDuration: Long): Unit = {
+  private[scheduler] def markPartitionCompleted(partitionId: Int): Unit = {
     partitionToIndex.get(partitionId).foreach { index =>
       if (!successful(index)) {
-        if (speculationEnabled && !isZombie) {
-          successfulTaskDurations.insert(taskDuration)
-        }
         tasksSuccessful += 1
         successful(index) = true
         if (tasksSuccessful == numTasks) {
@@ -1035,10 +1032,13 @@ private[spark] class TaskSetManager(
       return false
     }
     var foundTasks = false
-    val minFinishedForSpeculation = (speculationQuantile * numTasks).floor.toInt
     logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation)
 
-    if (tasksSuccessful >= minFinishedForSpeculation && tasksSuccessful > 0) {
+    // It's possible that a task is marked as completed by the scheduler, then the size of
+    // `successfulTaskDurations` may not equal to `tasksSuccessful`. Here we should only count the
+    // tasks that are submitted by this `TaskSetManager` and are completed successfully.
+    val numSuccessfulTasks = successfulTaskDurations.size()
+    if (numSuccessfulTasks >= minFinishedForSpeculation) {
       val time = clock.getTimeMillis()
       val medianDuration = successfulTaskDurations.median
       val threshold = max(speculationMultiplier * medianDuration, minTimeToSpeculation)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 749e47c2727e..d58ee4e651e1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -157,8 +157,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskId: Long, interruptThread: Boolean, reason: String): Boolean = false
     override def killAllTaskAttempts(
       stageId: Int, interruptThread: Boolean, reason: String): Unit = {}
-    override def notifyPartitionCompletion(
-        stageId: Int, partitionId: Int, taskDuration: Long): Unit = {
+    override def notifyPartitionCompletion(stageId: Int, partitionId: Int): Unit = {
       taskSets.filter(_.stageId == stageId).lastOption.foreach { ts =>
         val tasks = ts.tasks.filter(_.partitionId == partitionId)
         assert(tasks.length == 1)
@@ -668,8 +667,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
           stageId: Int, interruptThread: Boolean, reason: String): Unit = {
         throw new UnsupportedOperationException
       }
-      override def notifyPartitionCompletion(
-          stageId: Int, partitionId: Int, taskDuration: Long): Unit = {
+      override def notifyPartitionCompletion(stageId: Int, partitionId: Int): Unit = {
         throw new UnsupportedOperationException
       }
       override def setDAGScheduler(dagScheduler: DAGScheduler): Unit = {}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
index 347064dc9aad..ead34e535723 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
@@ -84,8 +84,7 @@ private class DummyTaskScheduler extends TaskScheduler {
     taskId: Long, interruptThread: Boolean, reason: String): Boolean = false
   override def killAllTaskAttempts(
     stageId: Int, interruptThread: Boolean, reason: String): Unit = {}
-  override def notifyPartitionCompletion(
-      stageId: Int, partitionId: Int, taskDuration: Long): Unit = {}
+  override def notifyPartitionCompletion(stageId: Int, partitionId: Int): Unit = {}
   override def setDAGScheduler(dagScheduler: DAGScheduler): Unit = {}
   override def defaultParallelism(): Int = 2
   override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 0666bc335aba..72c6ab964ccf 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -1394,8 +1394,8 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
     val taskSetManager = sched.taskSetManagerForAttempt(0, 0).get
     assert(taskSetManager.runningTasks === 8)
-    taskSetManager.markPartitionCompleted(8, 0)
-    assert(!taskSetManager.successfulTaskDurations.isEmpty())
+    taskSetManager.markPartitionCompleted(8)
+    assert(taskSetManager.successfulTaskDurations.isEmpty())
     taskSetManager.checkSpeculatableTasks(0)
   }
 

From 8ef4da753d16b83aece8d5e164ab578398c0d83c Mon Sep 17 00:00:00 2001
From: Adi Muraru <amuraru@adobe.com>
Date: Tue, 7 May 2019 10:47:36 -0700
Subject: [PATCH 1017/1072] [SPARK-27610][YARN] Shade netty native libraries

## What changes were proposed in this pull request?

Fixed the `spark-<version>-yarn-shuffle.jar` artifact packaging to shade the native netty libraries:
- shade the `META-INF/native/libnetty_*` native libraries when packagin
the yarn shuffle service jar. This is required as netty library loader
derives that based on shaded package name.
- updated the `org/spark_project` shade package prefix to `org/sparkproject`
(i.e. removed underscore) as the former breaks the netty native lib loading.

This was causing the yarn external shuffle service to fail
when spark.shuffle.io.mode=EPOLL

## How was this patch tested?
Manual tests

Closes #24502 from amuraru/SPARK-27610_master.

Authored-by: Adi Muraru <amuraru@adobe.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 common/network-yarn/pom.xml                   | 46 ++++++++++++++++++-
 conf/log4j.properties.template                |  4 +-
 .../apache/spark/log4j-defaults.properties    |  4 +-
 core/src/test/resources/log4j.properties      |  2 +-
 .../src/main/resources/log4j.properties       |  4 +-
 .../src/test/resources/log4j.properties       |  2 +-
 graphx/src/test/resources/log4j.properties    |  2 +-
 launcher/src/test/resources/log4j.properties  |  2 +-
 mllib/src/test/resources/log4j.properties     |  2 +-
 pom.xml                                       |  2 +-
 project/MimaExcludes.scala                    |  5 +-
 repl/src/test/resources/log4j.properties      |  2 +-
 .../core/src/test/resources/log4j.properties  |  2 +-
 .../src/test/resources/log4j.properties       |  2 +-
 .../yarn/src/test/resources/log4j.properties  |  2 +-
 .../deploy/yarn/BaseYarnClusterSuite.scala    |  2 +-
 .../src/test/resources/log4j.properties       |  2 +-
 streaming/src/test/resources/log4j.properties |  2 +-
 18 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 55cdc3140aa0..c642f3b4a160 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -35,7 +35,7 @@
     <!-- Make sure all Hadoop dependencies are provided to avoid repackaging. -->
     <hadoop.deps.scope>provided</hadoop.deps.scope>
     <shuffle.jar>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</shuffle.jar>
-    <shade>org/spark_project/</shade>
+    <shade>org/sparkproject/</shade>
   </properties>
 
   <dependencies>
@@ -128,6 +128,50 @@
           </execution>
         </executions>
       </plugin>
+      <!-- shade the native netty libs as well -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>regex-property</id>
+            <goals>
+              <goal>regex-property</goal>
+            </goals>
+            <configuration>
+              <name>spark.shade.native.packageName</name>
+              <value>${spark.shade.packageName}</value>
+              <regex>\.</regex>
+              <replacement>_</replacement>
+              <failIfNoMatch>true</failIfNoMatch>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>unpack</id>
+            <phase>package</phase>
+            <configuration>
+                <target>
+                    <echo message="Shade netty native libraries to ${spark.shade.native.packageName}" />
+                    <unzip src="${shuffle.jar}" dest="${project.build.directory}/exploded/" />
+                    <move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_epoll_x86_64.so"
+                          tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_epoll_x86_64.so" />
+                    <move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_kqueue_x86_64.jnilib"
+                          tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_kqueue_x86_64.jnilib" />
+                    <jar destfile="${shuffle.jar}" basedir="${project.build.directory}/exploded" />
+                </target>
+            </configuration>
+            <goals>
+              <goal>run</goal>
+            </goals>
+          </execution>
+        </executions>
+    </plugin>
 
       <!-- probes to validate that those dependencies which must be shaded are  -->
       <plugin>
diff --git a/conf/log4j.properties.template b/conf/log4j.properties.template
index ec1aa187dfb3..e91595dd324b 100644
--- a/conf/log4j.properties.template
+++ b/conf/log4j.properties.template
@@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
 log4j.logger.org.apache.spark.repl.Main=WARN
 
 # Settings to quiet third party logs that are too verbose
-log4j.logger.org.spark_project.jetty=WARN
-log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.sparkproject.jetty=WARN
+log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
 log4j.logger.org.apache.parquet=ERROR
diff --git a/core/src/main/resources/org/apache/spark/log4j-defaults.properties b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
index 277010015072..eb12848900b5 100644
--- a/core/src/main/resources/org/apache/spark/log4j-defaults.properties
+++ b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
@@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
 log4j.logger.org.apache.spark.repl.Main=WARN
 
 # Settings to quiet third party logs that are too verbose
-log4j.logger.org.spark_project.jetty=WARN
-log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.sparkproject.jetty=WARN
+log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
 
diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties
index fb9d9851cb4d..2f46ce1553ee 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -33,4 +33,4 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
 log4j.appender.console.layout.ConversionPattern=%t: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/external/kinesis-asl/src/main/resources/log4j.properties b/external/kinesis-asl/src/main/resources/log4j.properties
index 4f5ea7bafe48..e45049491734 100644
--- a/external/kinesis-asl/src/main/resources/log4j.properties
+++ b/external/kinesis-asl/src/main/resources/log4j.properties
@@ -31,7 +31,7 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
 log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
 
 # Settings to quiet third party logs that are too verbose
-log4j.logger.org.spark_project.jetty=WARN
-log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.sparkproject.jetty=WARN
+log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/external/kinesis-asl/src/test/resources/log4j.properties b/external/kinesis-asl/src/test/resources/log4j.properties
index 3706a6e36130..a9166df3a7fb 100644
--- a/external/kinesis-asl/src/test/resources/log4j.properties
+++ b/external/kinesis-asl/src/test/resources/log4j.properties
@@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/graphx/src/test/resources/log4j.properties b/graphx/src/test/resources/log4j.properties
index 3706a6e36130..a9166df3a7fb 100644
--- a/graphx/src/test/resources/log4j.properties
+++ b/graphx/src/test/resources/log4j.properties
@@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/launcher/src/test/resources/log4j.properties b/launcher/src/test/resources/log4j.properties
index 366a6b9f817c..31ba3d8299fc 100644
--- a/launcher/src/test/resources/log4j.properties
+++ b/launcher/src/test/resources/log4j.properties
@@ -34,4 +34,4 @@ log4j.logger.org.apache.spark.launcher.app.outputredirtest=INFO, outputredirtest
 log4j.logger.org.apache.spark.launcher.app.outputredirtest.additivity=false
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/mllib/src/test/resources/log4j.properties b/mllib/src/test/resources/log4j.properties
index fd51f8faf56b..afcd5915b748 100644
--- a/mllib/src/test/resources/log4j.properties
+++ b/mllib/src/test/resources/log4j.properties
@@ -24,5 +24,5 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
 
diff --git a/pom.xml b/pom.xml
index dccd676ff496..322528ad8fa4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -202,7 +202,7 @@
     <test.include.tags></test.include.tags>
 
     <!-- Package to use when relocating shaded classes. -->
-    <spark.shade.packageName>org.spark_project</spark.shade.packageName>
+    <spark.shade.packageName>org.sparkproject</spark.shade.packageName>
 
     <!-- Modules that copy jars to the build directory should do so under this location. -->
     <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 2a5d01e3772c..36797b8cb399 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -441,7 +441,9 @@ object MimaExcludes {
     // [SPARK-15526][ML][FOLLOWUP] Make JPMML provided scope to avoid including unshaded JARs
     (problem: Problem) => problem match {
       case MissingClassProblem(cls) =>
-        !cls.fullName.startsWith("org.spark_project.jpmml") &&
+        !cls.fullName.startsWith("org.sparkproject.jpmml") &&
+          !cls.fullName.startsWith("org.sparkproject.dmg.pmml") &&
+          !cls.fullName.startsWith("org.spark_project.jpmml") &&
           !cls.fullName.startsWith("org.spark_project.dmg.pmml")
       case _ => true
     }
@@ -716,6 +718,7 @@ object MimaExcludes {
       ProblemFilters.exclude[Problem]("org.apache.spark.rpc.*"),
       ProblemFilters.exclude[Problem]("org.spark-project.jetty.*"),
       ProblemFilters.exclude[Problem]("org.spark_project.jetty.*"),
+      ProblemFilters.exclude[Problem]("org.sparkproject.jetty.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.internal.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.unused.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.unsafe.*"),
diff --git a/repl/src/test/resources/log4j.properties b/repl/src/test/resources/log4j.properties
index 7665bd5e7c07..71580b7d8b1a 100644
--- a/repl/src/test/resources/log4j.properties
+++ b/repl/src/test/resources/log4j.properties
@@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/resource-managers/kubernetes/core/src/test/resources/log4j.properties b/resource-managers/kubernetes/core/src/test/resources/log4j.properties
index ad95fadb7c0c..2cf448e4bffb 100644
--- a/resource-managers/kubernetes/core/src/test/resources/log4j.properties
+++ b/resource-managers/kubernetes/core/src/test/resources/log4j.properties
@@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
 log4j.logger.org.apache.hadoop=WARN
 log4j.logger.org.eclipse.jetty=WARN
 log4j.logger.org.mortbay=WARN
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/log4j.properties b/resource-managers/kubernetes/integration-tests/src/test/resources/log4j.properties
index 866126bc3c1c..ff66ba853ab8 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/log4j.properties
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/log4j.properties
@@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
 log4j.logger.org.apache.hadoop=WARN
 log4j.logger.org.eclipse.jetty=WARN
 log4j.logger.org.mortbay=WARN
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/resource-managers/yarn/src/test/resources/log4j.properties b/resource-managers/yarn/src/test/resources/log4j.properties
index d13454d5ae5d..f2f3d17efa12 100644
--- a/resource-managers/yarn/src/test/resources/log4j.properties
+++ b/resource-managers/yarn/src/test/resources/log4j.properties
@@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
 log4j.logger.org.apache.hadoop=WARN
 log4j.logger.org.eclipse.jetty=WARN
 log4j.logger.org.mortbay=WARN
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index c7a99bf7f1cb..e16857e84887 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -52,7 +52,7 @@ abstract class BaseYarnClusterSuite
     |log4j.logger.org.apache.hadoop=WARN
     |log4j.logger.org.eclipse.jetty=WARN
     |log4j.logger.org.mortbay=WARN
-    |log4j.logger.org.spark_project.jetty=WARN
+    |log4j.logger.org.sparkproject.jetty=WARN
     """.stripMargin
 
   private var yarnCluster: MiniYARNCluster = _
diff --git a/sql/catalyst/src/test/resources/log4j.properties b/sql/catalyst/src/test/resources/log4j.properties
index 3706a6e36130..a9166df3a7fb 100644
--- a/sql/catalyst/src/test/resources/log4j.properties
+++ b/sql/catalyst/src/test/resources/log4j.properties
@@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
diff --git a/streaming/src/test/resources/log4j.properties b/streaming/src/test/resources/log4j.properties
index fd51f8faf56b..afcd5915b748 100644
--- a/streaming/src/test/resources/log4j.properties
+++ b/streaming/src/test/resources/log4j.properties
@@ -24,5 +24,5 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.sparkproject.jetty=WARN
 

From 614a5cc600b0ac01c5d03b1dc5fdf996ef18ac0e Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 7 May 2019 11:08:55 -0700
Subject: [PATCH 1018/1072] [SPARK-27624][CORE] Fix CalenderInterval to show an
 empty interval correctly

## What changes were proposed in this pull request?

If the interval is `0`, it doesn't show both the value `0` and the unit at all. For example, this happens in the explain plans and Spark Web UI on `EventTimeWatermark` diagram.

**BEFORE**
```scala
scala> spark.readStream.schema("ts timestamp").parquet("/tmp/t").withWatermark("ts", "1 microsecond").explain
== Physical Plan ==
EventTimeWatermark ts#0: timestamp, interval 1 microseconds
+- StreamingRelation FileSource[/tmp/t], [ts#0]

scala> spark.readStream.schema("ts timestamp").parquet("/tmp/t").withWatermark("ts", "0 microsecond").explain
== Physical Plan ==
EventTimeWatermark ts#3: timestamp, interval
+- StreamingRelation FileSource[/tmp/t], [ts#3]
```

**AFTER**
```scala
scala> spark.readStream.schema("ts timestamp").parquet("/tmp/t").withWatermark("ts", "1 microsecond").explain
== Physical Plan ==
EventTimeWatermark ts#0: timestamp, interval 1 microseconds
+- StreamingRelation FileSource[/tmp/t], [ts#0]

scala> spark.readStream.schema("ts timestamp").parquet("/tmp/t").withWatermark("ts", "0 microsecond").explain
== Physical Plan ==
EventTimeWatermark ts#3: timestamp, interval 0 microseconds
+- StreamingRelation FileSource[/tmp/t], [ts#3]
```

## How was this patch tested?

Pass the Jenkins with the updated test case.

Closes #24516 from dongjoon-hyun/SPARK-27624.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../java/org/apache/spark/unsafe/types/CalendarInterval.java   | 2 ++
 .../org/apache/spark/unsafe/types/CalendarIntervalSuite.java   | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
index 621f2c6bf377..611b2a217aad 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -319,6 +319,8 @@ public String toString() {
       appendUnit(sb, rest / MICROS_PER_MILLI, "millisecond");
       rest %= MICROS_PER_MILLI;
       appendUnit(sb, rest, "microsecond");
+    } else if (months == 0) {
+      sb.append(" 0 microseconds");
     }
 
     return sb.toString();
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
index 9e69e264ff28..1e556913b2dc 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
@@ -41,6 +41,9 @@ public void equalsTest() {
   public void toStringTest() {
     CalendarInterval i;
 
+    i = new CalendarInterval(0, 0);
+    assertEquals("interval 0 microseconds", i.toString());
+
     i = new CalendarInterval(34, 0);
     assertEquals("interval 2 years 10 months", i.toString());
 

From 2f558094257c38d26650049f2ac93be6d65d6d85 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Tue, 7 May 2019 11:40:43 -0700
Subject: [PATCH 1019/1072] [SPARK-27294][SS] Add multi-cluster Kafka
 delegation token

## What changes were proposed in this pull request?

The actual implementation doesn't support multi-cluster Kafka connection with delegation token. In this PR I've added this functionality.

What this PR contains:
* New way of configuration
* Multiple delegation token obtain/store/use functionality
* Documentation
* The change works on DStreams also

## How was this patch tested?

Existing + additional unit tests.
Additionally tested on cluster.

Test scenario:

* 2 * 4 node clusters
* The 4-4 nodes are in different kerberos realms
* Cross-Realm trust between the 2 realms
* Yarn
* Kafka broker version 2.1.0
* security.protocol = SASL_SSL
* sasl.mechanism = SCRAM-SHA-512
* Artificial exceptions during processing
* Source reads from realm1 sink writes to realm2

Kafka broker settings:

* delegation.token.expiry.time.ms=600000 (10 min)
* delegation.token.max.lifetime.ms=1200000 (20 min)
* delegation.token.expiry.check.interval.ms=300000 (5 min)

Closes #24305 from gaborgsomogyi/SPARK-27294.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/internal/config/Kafka.scala  |  91 -------------
 .../structured-streaming-kafka-integration.md | 103 +++++++++++++-
 .../sql/kafka010/CachedKafkaProducer.scala    |  14 +-
 .../spark/sql/kafka010/ConsumerStrategy.scala |  10 +-
 .../sql/kafka010/KafkaDataConsumer.scala      |   2 +-
 .../spark/kafka010/KafkaConfigUpdater.scala   |  21 +--
 .../KafkaDelegationTokenProvider.scala        |  47 +++++--
 .../spark/kafka010/KafkaTokenSparkConf.scala  |  96 +++++++++++++
 .../spark/kafka010/KafkaTokenUtil.scala       | 102 ++++++++------
 .../kafka010/KafkaConfigUpdaterSuite.scala    |  45 ++++--
 .../kafka010/KafkaDelegationTokenTest.scala   |   7 +-
 .../kafka010/KafkaTokenSparkConfSuite.scala   | 128 ++++++++++++++++++
 .../spark/kafka010/KafkaTokenUtilSuite.scala  | 123 ++++++++++-------
 external/kafka-0-10/pom.xml                   |   5 +
 .../kafka010/KafkaDataConsumerSuite.scala     |  10 +-
 15 files changed, 573 insertions(+), 231 deletions(-)
 delete mode 100644 core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
 create mode 100644 external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
 create mode 100644 external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala b/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
deleted file mode 100644
index e91ddd3e9741..000000000000
--- a/core/src/main/scala/org/apache/spark/internal/config/Kafka.scala
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.internal.config
-
-private[spark] object Kafka {
-
-  val BOOTSTRAP_SERVERS =
-    ConfigBuilder("spark.kafka.bootstrap.servers")
-      .doc("A list of coma separated host/port pairs to use for establishing the initial " +
-        "connection to the Kafka cluster. For further details please see kafka documentation. " +
-        "Only used to obtain delegation token.")
-      .stringConf
-      .createOptional
-
-  val SECURITY_PROTOCOL =
-    ConfigBuilder("spark.kafka.security.protocol")
-      .doc("Protocol used to communicate with brokers. For further details please see kafka " +
-        "documentation. Only used to obtain delegation token.")
-      .stringConf
-      .createWithDefault("SASL_SSL")
-
-  val KERBEROS_SERVICE_NAME =
-    ConfigBuilder("spark.kafka.sasl.kerberos.service.name")
-      .doc("The Kerberos principal name that Kafka runs as. This can be defined either in " +
-        "Kafka's JAAS config or in Kafka's config. For further details please see kafka " +
-        "documentation. Only used to obtain delegation token.")
-      .stringConf
-      .createWithDefault("kafka")
-
-  val TRUSTSTORE_LOCATION =
-    ConfigBuilder("spark.kafka.ssl.truststore.location")
-      .doc("The location of the trust store file. For further details please see kafka " +
-        "documentation. Only used to obtain delegation token.")
-      .stringConf
-      .createOptional
-
-  val TRUSTSTORE_PASSWORD =
-    ConfigBuilder("spark.kafka.ssl.truststore.password")
-      .doc("The store password for the trust store file. This is optional for client and only " +
-        "needed if ssl.truststore.location is configured. For further details please see kafka " +
-        "documentation. Only used to obtain delegation token.")
-      .stringConf
-      .createOptional
-
-  val KEYSTORE_LOCATION =
-    ConfigBuilder("spark.kafka.ssl.keystore.location")
-      .doc("The location of the key store file. This is optional for client and can be used for " +
-        "two-way authentication for client. For further details please see kafka documentation. " +
-        "Only used to obtain delegation token.")
-      .stringConf
-      .createOptional
-
-  val KEYSTORE_PASSWORD =
-    ConfigBuilder("spark.kafka.ssl.keystore.password")
-      .doc("The store password for the key store file. This is optional for client and only " +
-        "needed if ssl.keystore.location is configured. For further details please see kafka " +
-        "documentation. Only used to obtain delegation token.")
-      .stringConf
-      .createOptional
-
-  val KEY_PASSWORD =
-    ConfigBuilder("spark.kafka.ssl.key.password")
-      .doc("The password of the private key in the key store file. This is optional for client. " +
-        "For further details please see kafka documentation. Only used to obtain delegation token.")
-      .stringConf
-      .createOptional
-
-  val TOKEN_SASL_MECHANISM =
-    ConfigBuilder("spark.kafka.sasl.token.mechanism")
-      .doc("SASL mechanism used for client connections with delegation token. Because SCRAM " +
-        "login module used for authentication a compatible mechanism has to be set here. " +
-        "For further details please see kafka documentation (sasl.mechanism). Only used to " +
-        "authenticate against Kafka broker with delegation token.")
-      .stringConf
-      .createWithDefault("SCRAM-SHA-512")
-}
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 9e1bbc06e5cd..4a295e07c71b 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -676,7 +676,7 @@ This way the application can be configured via Spark parameters and may not need
 configuration (Spark can use Kafka's dynamic JAAS configuration feature). For further information
 about delegation tokens, see [Kafka delegation token docs](http://kafka.apache.org/documentation/#security_delegation_token).
 
-The process is initiated by Spark's Kafka delegation token provider. When `spark.kafka.bootstrap.servers` is set,
+The process is initiated by Spark's Kafka delegation token provider. When `spark.kafka.clusters.${cluster}.auth.bootstrap.servers` is set,
 Spark considers the following log in options, in order of preference:
 - **JAAS login configuration**, please see example below.
 - **Keytab file**, such as,
@@ -684,13 +684,13 @@ Spark considers the following log in options, in order of preference:
       ./bin/spark-submit \
           --keytab <KEYTAB_FILE> \
           --principal <PRINCIPAL> \
-          --conf spark.kafka.bootstrap.servers=<KAFKA_SERVERS> \
+          --conf spark.kafka.clusters.${cluster}.auth.bootstrap.servers=<KAFKA_SERVERS> \
           ...
 
 - **Kerberos credential cache**, such as,
 
       ./bin/spark-submit \
-          --conf spark.kafka.bootstrap.servers=<KAFKA_SERVERS> \
+          --conf spark.kafka.clusters.${cluster}.auth.bootstrap.servers=<KAFKA_SERVERS> \
           ...
 
 The Kafka delegation token provider can be turned off by setting `spark.security.credentials.kafka.enabled` to `false` (default: `true`).
@@ -703,10 +703,103 @@ Kafka broker configuration):
 
 After obtaining delegation token successfully, Spark distributes it across nodes and renews it accordingly.
 Delegation token uses `SCRAM` login module for authentication and because of that the appropriate
-`spark.kafka.sasl.token.mechanism` (default: `SCRAM-SHA-512`) has to be configured. Also, this parameter
+`spark.kafka.clusters.${cluster}.sasl.token.mechanism` (default: `SCRAM-SHA-512`) has to be configured. Also, this parameter
 must match with Kafka broker configuration.
 
-When delegation token is available on an executor it can be overridden with JAAS login configuration.
+When delegation token is available on an executor Spark considers the following log in options, in order of preference:
+- **JAAS login configuration**, please see example below.
+- **Delegation token**, please see <code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code> parameter for further details.
+
+When none of the above applies then unsecure connection assumed.
+
+
+#### Configuration
+
+Delegation tokens can be obtained from multiple clusters and <code>${cluster}</code> is an arbitrary unique identifier which helps to group different configurations.
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.auth.bootstrap.servers</code></td>
+    <td>None</td>
+    <td>
+      A list of coma separated host/port pairs to use for establishing the initial connection
+      to the Kafka cluster. For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code></td>
+    <td>.*</td>
+    <td>
+      Regular expression to match against the <code>bootstrap.servers</code> config for sources and sinks in the application.
+      If a server address matches this regex, the delegation token obtained from the respective bootstrap servers will be used when connecting.
+      If multiple clusters match the address, an exception will be thrown and the query won't be started.
+      Kafka's secure and unsecure listeners are bound to different ports. When both used the secure listener port has to be part of the regular expression.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.security.protocol</code></td>
+    <td>SASL_SSL</td>
+    <td>
+      Protocol used to communicate with brokers. For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.sasl.kerberos.service.name</code></td>
+    <td>kafka</td>
+    <td>
+      The Kerberos principal name that Kafka runs as. This can be defined either in Kafka's JAAS config or in Kafka's config.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.location</code></td>
+    <td>None</td>
+    <td>
+      The location of the trust store file. For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.password</code></td>
+    <td>None</td>
+    <td>
+      The store password for the trust store file. This is optional and only needed if <code>spark.kafka.clusters.${cluster}.ssl.truststore.location</code> is configured.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.location</code></td>
+    <td>None</td>
+    <td>
+      The location of the key store file. This is optional for client and can be used for two-way authentication for client.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.password</code></td>
+    <td>None</td>
+    <td>
+      The store password for the key store file. This is optional and only needed if <code>spark.kafka.clusters.${cluster}.ssl.keystore.location</code> is configured.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.key.password</code></td>
+    <td>None</td>
+    <td>
+      The password of the private key in the key store file. This is optional for client.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.sasl.token.mechanism</code></td>
+    <td>SCRAM-SHA-512</td>
+    <td>
+      SASL mechanism used for client connections with delegation token. Because SCRAM login module used for authentication a compatible mechanism has to be set here.
+      For further details please see Kafka documentation (<code>sasl.mechanism</code>). Only used to authenticate against Kafka broker with delegation token.
+    </td>
+  </tr>
+</table>
 
 #### Caveats
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
index 062ce9a57994..2bab2871d283 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
@@ -65,12 +65,8 @@ private[kafka010] object CachedKafkaProducer extends Logging {
       .build[Seq[(String, Object)], Producer](cacheLoader)
 
   private def createKafkaProducer(producerConfiguration: ju.Map[String, Object]): Producer = {
-    val updatedKafkaProducerConfiguration =
-      KafkaConfigUpdater("executor", producerConfiguration.asScala.toMap)
-        .setAuthenticationConfigIfNeeded()
-        .build()
-    val kafkaProducer: Producer = new Producer(updatedKafkaProducerConfiguration)
-    logDebug(s"Created a new instance of KafkaProducer for $updatedKafkaProducerConfiguration.")
+    val kafkaProducer: Producer = new Producer(producerConfiguration)
+    logDebug(s"Created a new instance of KafkaProducer for $producerConfiguration.")
     kafkaProducer
   }
 
@@ -80,7 +76,11 @@ private[kafka010] object CachedKafkaProducer extends Logging {
    * one instance per specified kafkaParams.
    */
   private[kafka010] def getOrCreate(kafkaParams: ju.Map[String, Object]): Producer = {
-    val paramsSeq: Seq[(String, Object)] = paramsToSeq(kafkaParams)
+    val updatedKafkaProducerConfiguration =
+      KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
+        .setAuthenticationConfigIfNeeded()
+        .build()
+    val paramsSeq: Seq[(String, Object)] = paramsToSeq(updatedKafkaProducerConfiguration)
     try {
       guavaCache.get(paramsSeq)
     } catch {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
index 2326619b9d83..7bb829c282eb 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
@@ -36,7 +36,7 @@ import org.apache.spark.kafka010.KafkaConfigUpdater
  * All three strategies have overloaded constructors that allow you to specify
  * the starting offset for a particular partition.
  */
-sealed trait ConsumerStrategy {
+private[kafka010] sealed trait ConsumerStrategy {
   /** Create a [[KafkaConsumer]] and subscribe to topics according to a desired strategy */
   def createConsumer(kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]]
 
@@ -53,7 +53,8 @@ sealed trait ConsumerStrategy {
 /**
  * Specify a fixed collection of partitions.
  */
-case class AssignStrategy(partitions: Array[TopicPartition]) extends ConsumerStrategy {
+private[kafka010] case class AssignStrategy(partitions: Array[TopicPartition])
+    extends ConsumerStrategy {
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
     val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
@@ -68,7 +69,7 @@ case class AssignStrategy(partitions: Array[TopicPartition]) extends ConsumerStr
 /**
  * Subscribe to a fixed collection of topics.
  */
-case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
+private[kafka010] case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
     val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
@@ -83,7 +84,8 @@ case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
 /**
  * Use a regex to specify topics of interest.
  */
-case class SubscribePatternStrategy(topicPattern: String) extends ConsumerStrategy {
+private[kafka010] case class SubscribePatternStrategy(topicPattern: String)
+    extends ConsumerStrategy {
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
     val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
index 83bf4b18f0ff..45ea3d20f968 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
@@ -347,7 +347,7 @@ private[kafka010] case class InternalKafkaConsumer(
    * consumer's `isolation.level` is `read_committed`), it will return a `FetchedRecord` with the
    * next offset to fetch.
    *
-   * This method also will try the best to detect data loss. If `failOnDataLoss` is true`, it will
+   * This method also will try the best to detect data loss. If `failOnDataLoss` is `true`, it will
    * throw an exception when we detect an unavailable offset. If `failOnDataLoss` is `false`, this
    * method will return `null` if the next available record is within [offset, untilOffset).
    *
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
index d24eb4ae1952..38f3b98c6ace 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
@@ -21,11 +21,11 @@ import java.{util => ju}
 
 import scala.collection.JavaConverters._
 
+import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.SaslConfigs
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.Kafka
 
 /**
  * Class to conveniently update Kafka config params, while logging the changes
@@ -57,14 +57,17 @@ private[spark] case class KafkaConfigUpdater(module: String, kafkaParams: Map[St
     //   configuration.
     if (KafkaTokenUtil.isGlobalJaasConfigurationProvided) {
       logDebug("JVM global security configuration detected, using it for login.")
-    } else if (KafkaTokenUtil.isTokenAvailable()) {
-      logDebug("Delegation token detected, using it for login.")
-      val jaasParams = KafkaTokenUtil.getTokenJaasParams(SparkEnv.get.conf)
-      set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
-      val mechanism = SparkEnv.get.conf.get(Kafka.TOKEN_SASL_MECHANISM)
-      require(mechanism.startsWith("SCRAM"),
-        "Delegation token works only with SCRAM mechanism.")
-      set(SaslConfigs.SASL_MECHANISM, mechanism)
+    } else {
+      val clusterConfig = KafkaTokenUtil.findMatchingToken(SparkEnv.get.conf,
+        map.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
+      clusterConfig.foreach { clusterConf =>
+        logDebug("Delegation token detected, using it for login.")
+        val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
+        set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+        require(clusterConf.tokenMechanism.startsWith("SCRAM"),
+          "Delegation token works only with SCRAM mechanism.")
+        set(SaslConfigs.SASL_MECHANISM, clusterConf.tokenMechanism)
+      }
     }
     this
   }
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
index da12e51852f8..69fcf55d521b 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
@@ -25,7 +25,6 @@ import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, S
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.Kafka
 import org.apache.spark.security.HadoopDelegationTokenProvider
 
 private[spark] class KafkaDelegationTokenProvider
@@ -37,25 +36,49 @@ private[spark] class KafkaDelegationTokenProvider
       hadoopConf: Configuration,
       sparkConf: SparkConf,
       creds: Credentials): Option[Long] = {
+    var lowestNextRenewalDate: Option[Long] = None
     try {
-      logDebug("Attempting to fetch Kafka security token.")
-      val (token, nextRenewalDate) = KafkaTokenUtil.obtainToken(sparkConf)
-      creds.addToken(token.getService, token)
-      return Some(nextRenewalDate)
+      KafkaTokenSparkConf.getAllClusterConfigs(sparkConf).foreach { clusterConf =>
+        try {
+          if (delegationTokensRequired(clusterConf)) {
+            logDebug(
+              s"Attempting to fetch Kafka security token for cluster ${clusterConf.identifier}.")
+            val (token, nextRenewalDate) = KafkaTokenUtil.obtainToken(sparkConf, clusterConf)
+            creds.addToken(token.getService, token)
+            if (lowestNextRenewalDate.isEmpty || nextRenewalDate < lowestNextRenewalDate.get) {
+              lowestNextRenewalDate = Some(nextRenewalDate)
+            }
+          } else {
+            logDebug(
+              s"Cluster ${clusterConf.identifier} does not require delegation token, skipping.")
+          }
+        } catch {
+          case NonFatal(e) =>
+            logWarning(s"Failed to get token from service: $serviceName " +
+              s"cluster: ${clusterConf.identifier}", e)
+        }
+      }
     } catch {
       case NonFatal(e) =>
-        logWarning(s"Failed to get token from service $serviceName", e)
+        logWarning(s"Failed to get token cluster configuration", e)
     }
-    None
+    lowestNextRenewalDate
   }
 
   override def delegationTokensRequired(
       sparkConf: SparkConf,
       hadoopConf: Configuration): Boolean = {
-    val protocol = sparkConf.get(Kafka.SECURITY_PROTOCOL)
-    sparkConf.contains(Kafka.BOOTSTRAP_SERVERS) &&
-      (protocol == SASL_SSL.name ||
-        protocol == SSL.name ||
-        protocol == SASL_PLAINTEXT.name)
+    try {
+      KafkaTokenSparkConf.getAllClusterConfigs(sparkConf).exists(delegationTokensRequired(_))
+    } catch {
+      case NonFatal(e) =>
+        logWarning(s"Failed to get token cluster configuration", e)
+        false
+    }
   }
+
+  private def delegationTokensRequired(clusterConf: KafkaTokenClusterConf): Boolean =
+    clusterConf.securityProtocol == SASL_SSL.name ||
+      clusterConf.securityProtocol == SSL.name ||
+      clusterConf.securityProtocol == SASL_PLAINTEXT.name
 }
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
new file mode 100644
index 000000000000..84d58d8c419a
--- /dev/null
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.kafka010
+
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.common.config.{SaslConfigs, SslConfigs}
+import org.apache.kafka.common.security.auth.SecurityProtocol.SASL_SSL
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+
+private[spark] case class KafkaTokenClusterConf(
+    identifier: String,
+    authBootstrapServers: String,
+    targetServersRegex: String,
+    securityProtocol: String,
+    kerberosServiceName: String,
+    trustStoreLocation: Option[String],
+    trustStorePassword: Option[String],
+    keyStoreLocation: Option[String],
+    keyStorePassword: Option[String],
+    keyPassword: Option[String],
+    tokenMechanism: String) {
+  override def toString: String = s"KafkaTokenClusterConf{" +
+    s"identifier=$identifier, " +
+    s"authBootstrapServers=$authBootstrapServers, " +
+    s"targetServersRegex=$targetServersRegex, " +
+    s"securityProtocol=$securityProtocol, " +
+    s"kerberosServiceName=$kerberosServiceName, " +
+    s"trustStoreLocation=$trustStoreLocation, " +
+    s"trustStorePassword=${trustStorePassword.map(_ => "xxx")}, " +
+    s"keyStoreLocation=$keyStoreLocation, " +
+    s"keyStorePassword=${keyStorePassword.map(_ => "xxx")}, " +
+    s"keyPassword=${keyPassword.map(_ => "xxx")}, " +
+    s"tokenMechanism=$tokenMechanism}"
+}
+
+private [kafka010] object KafkaTokenSparkConf extends Logging {
+  val CLUSTERS_CONFIG_PREFIX = "spark.kafka.clusters."
+  val DEFAULT_TARGET_SERVERS_REGEX = ".*"
+  val DEFAULT_SASL_KERBEROS_SERVICE_NAME = "kafka"
+  val DEFAULT_SASL_TOKEN_MECHANISM = "SCRAM-SHA-512"
+
+  def getClusterConfig(sparkConf: SparkConf, identifier: String): KafkaTokenClusterConf = {
+    val configPrefix = s"$CLUSTERS_CONFIG_PREFIX$identifier."
+    val sparkClusterConf = sparkConf.getAllWithPrefix(configPrefix).toMap
+    val result = KafkaTokenClusterConf(
+      identifier,
+      sparkClusterConf
+        .getOrElse(s"auth.${CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG}",
+          throw new NoSuchElementException(
+            s"${configPrefix}auth.${CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG}")),
+      sparkClusterConf.getOrElse(s"target.${CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG}.regex",
+        KafkaTokenSparkConf.DEFAULT_TARGET_SERVERS_REGEX),
+      sparkClusterConf.getOrElse(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SASL_SSL.name),
+      sparkClusterConf.getOrElse(SaslConfigs.SASL_KERBEROS_SERVICE_NAME,
+        KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME),
+      sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG),
+      sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG),
+      sparkClusterConf.get(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG),
+      sparkClusterConf.get(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG),
+      sparkClusterConf.get(SslConfigs.SSL_KEY_PASSWORD_CONFIG),
+      sparkClusterConf.getOrElse("sasl.token.mechanism",
+        KafkaTokenSparkConf.DEFAULT_SASL_TOKEN_MECHANISM)
+    )
+    logDebug(s"getClusterConfig($identifier): $result")
+    result
+  }
+
+  def getAllClusterConfigs(sparkConf: SparkConf): Set[KafkaTokenClusterConf] = {
+    sparkConf.getAllWithPrefix(KafkaTokenSparkConf.CLUSTERS_CONFIG_PREFIX).toMap.keySet
+      .flatMap { k =>
+        val split = k.split('.')
+        if (split.length > 0 && split(0).nonEmpty) {
+          Some(split(0))
+        } else {
+          None
+        }
+      }.map(getClusterConfig(sparkConf, _))
+  }
+}
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index 3f9a5939fdcd..7078b4fd964e 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -19,7 +19,9 @@ package org.apache.spark.kafka010
 
 import java.{util => ju}
 import java.text.SimpleDateFormat
+import java.util.regex.Pattern
 
+import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.io.Text
@@ -38,20 +40,28 @@ import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.util.Utils
 
 private[spark] object KafkaTokenUtil extends Logging {
   val TOKEN_KIND = new Text("KAFKA_DELEGATION_TOKEN")
-  val TOKEN_SERVICE = new Text("kafka.server.delegation.token")
+  private val TOKEN_SERVICE_PREFIX = "kafka.server.delegation.token"
+
+  private[kafka010] def getTokenService(identifier: String): Text =
+    new Text(s"$TOKEN_SERVICE_PREFIX.$identifier")
+
+  private def getClusterIdentifier(service: Text): String =
+    service.toString().replace(s"$TOKEN_SERVICE_PREFIX.", "")
 
   private[spark] class KafkaDelegationTokenIdentifier extends AbstractDelegationTokenIdentifier {
     override def getKind: Text = TOKEN_KIND
   }
 
-  private[kafka010] def obtainToken(sparkConf: SparkConf):
-      (Token[KafkaDelegationTokenIdentifier], Long) = {
+  private[kafka010] def obtainToken(
+      sparkConf: SparkConf,
+      clusterConf: KafkaTokenClusterConf): (Token[KafkaDelegationTokenIdentifier], Long) = {
     checkProxyUser()
 
-    val adminClient = AdminClient.create(createAdminClientProperties(sparkConf))
+    val adminClient = AdminClient.create(createAdminClientProperties(sparkConf, clusterConf))
     val createDelegationTokenOptions = new CreateDelegationTokenOptions()
     val createResult = adminClient.createDelegationToken(createDelegationTokenOptions)
     val token = createResult.delegationToken().get()
@@ -61,7 +71,7 @@ private[spark] object KafkaTokenUtil extends Logging {
       token.tokenInfo.tokenId.getBytes,
       token.hmacAsBase64String.getBytes,
       TOKEN_KIND,
-      TOKEN_SERVICE
+      getTokenService(clusterConf.identifier)
     ), token.tokenInfo.expiryTimestamp)
   }
 
@@ -73,23 +83,23 @@ private[spark] object KafkaTokenUtil extends Logging {
       "user is not yet supported.")
   }
 
-  private[kafka010] def createAdminClientProperties(sparkConf: SparkConf): ju.Properties = {
+  private[kafka010] def createAdminClientProperties(
+      sparkConf: SparkConf,
+      clusterConf: KafkaTokenClusterConf): ju.Properties = {
     val adminClientProperties = new ju.Properties
 
-    val bootstrapServers = sparkConf.get(Kafka.BOOTSTRAP_SERVERS)
-    require(bootstrapServers.nonEmpty, s"Tried to obtain kafka delegation token but bootstrap " +
-      "servers not configured.")
-    adminClientProperties.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers.get)
+    adminClientProperties.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG,
+      clusterConf.authBootstrapServers)
 
-    val protocol = sparkConf.get(Kafka.SECURITY_PROTOCOL)
-    adminClientProperties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, protocol)
-    protocol match {
+    adminClientProperties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
+      clusterConf.securityProtocol)
+    clusterConf.securityProtocol match {
       case SASL_SSL.name =>
-        setTrustStoreProperties(sparkConf, adminClientProperties)
+        setTrustStoreProperties(clusterConf, adminClientProperties)
 
       case SSL.name =>
-        setTrustStoreProperties(sparkConf, adminClientProperties)
-        setKeyStoreProperties(sparkConf, adminClientProperties)
+        setTrustStoreProperties(clusterConf, adminClientProperties)
+        setKeyStoreProperties(clusterConf, adminClientProperties)
         logWarning("Obtaining kafka delegation token with SSL protocol. Please " +
           "configure 2-way authentication on the broker side.")
 
@@ -114,11 +124,11 @@ private[spark] object KafkaTokenUtil extends Logging {
       adminClientProperties.put(SaslConfigs.SASL_MECHANISM, SaslConfigs.GSSAPI_MECHANISM)
       if (sparkConf.contains(KEYTAB)) {
         logDebug("Keytab detected, using it for login.")
-        val jaasParams = getKeytabJaasParams(sparkConf)
+        val jaasParams = getKeytabJaasParams(sparkConf, clusterConf)
         adminClientProperties.put(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
       } else {
         logDebug("Using ticket cache for login.")
-        val jaasParams = getTicketCacheJaasParams(sparkConf)
+        val jaasParams = getTicketCacheJaasParams(clusterConf)
         adminClientProperties.put(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
       }
     }
@@ -135,34 +145,40 @@ private[spark] object KafkaTokenUtil extends Logging {
     }
   }
 
-  private def setTrustStoreProperties(sparkConf: SparkConf, properties: ju.Properties): Unit = {
-    sparkConf.get(Kafka.TRUSTSTORE_LOCATION).foreach { truststoreLocation =>
+  private def setTrustStoreProperties(
+      clusterConf: KafkaTokenClusterConf,
+      properties: ju.Properties): Unit = {
+    clusterConf.trustStoreLocation.foreach { truststoreLocation =>
       properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, truststoreLocation)
     }
-    sparkConf.get(Kafka.TRUSTSTORE_PASSWORD).foreach { truststorePassword =>
+    clusterConf.trustStorePassword.foreach { truststorePassword =>
       properties.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, truststorePassword)
     }
   }
 
-  private def setKeyStoreProperties(sparkConf: SparkConf, properties: ju.Properties): Unit = {
-    sparkConf.get(Kafka.KEYSTORE_LOCATION).foreach { keystoreLocation =>
+  private def setKeyStoreProperties(
+      clusterConf: KafkaTokenClusterConf,
+      properties: ju.Properties): Unit = {
+    clusterConf.keyStoreLocation.foreach { keystoreLocation =>
       properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keystoreLocation)
     }
-    sparkConf.get(Kafka.KEYSTORE_PASSWORD).foreach { keystorePassword =>
+    clusterConf.keyStorePassword.foreach { keystorePassword =>
       properties.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, keystorePassword)
     }
-    sparkConf.get(Kafka.KEY_PASSWORD).foreach { keyPassword =>
+    clusterConf.keyPassword.foreach { keyPassword =>
       properties.put(SslConfigs.SSL_KEY_PASSWORD_CONFIG, keyPassword)
     }
   }
 
-  private def getKeytabJaasParams(sparkConf: SparkConf): String = {
+  private def getKeytabJaasParams(
+      sparkConf: SparkConf,
+      clusterConf: KafkaTokenClusterConf): String = {
     val params =
       s"""
       |${getKrb5LoginModuleName} required
       | debug=${isGlobalKrbDebugEnabled()}
       | useKeyTab=true
-      | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}"
+      | serviceName="${clusterConf.kerberosServiceName}"
       | keyTab="${sparkConf.get(KEYTAB).get}"
       | principal="${sparkConf.get(PRINCIPAL).get}";
       """.stripMargin.replace("\n", "")
@@ -170,16 +186,13 @@ private[spark] object KafkaTokenUtil extends Logging {
     params
   }
 
-  private def getTicketCacheJaasParams(sparkConf: SparkConf): String = {
-    val serviceName = sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)
-    require(serviceName.nonEmpty, "Kerberos service name must be defined")
-
+  private def getTicketCacheJaasParams(clusterConf: KafkaTokenClusterConf): String = {
     val params =
       s"""
       |${getKrb5LoginModuleName} required
       | debug=${isGlobalKrbDebugEnabled()}
       | useTicketCache=true
-      | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}";
+      | serviceName="${clusterConf.kerberosServiceName}";
       """.stripMargin.replace("\n", "")
     logDebug(s"Krb ticket cache JAAS params: $params")
     params
@@ -223,14 +236,27 @@ private[spark] object KafkaTokenUtil extends Logging {
     }
   }
 
-  def isTokenAvailable(): Boolean = {
-    UserGroupInformation.getCurrentUser().getCredentials.getToken(
-      KafkaTokenUtil.TOKEN_SERVICE) != null
+  def findMatchingToken(
+      sparkConf: SparkConf,
+      bootStrapServers: String): Option[KafkaTokenClusterConf] = {
+    val tokens = UserGroupInformation.getCurrentUser().getCredentials.getAllTokens.asScala
+    val clusterConfigs = tokens
+      .filter(_.getService().toString().startsWith(TOKEN_SERVICE_PREFIX))
+      .map { token =>
+        KafkaTokenSparkConf.getClusterConfig(sparkConf, getClusterIdentifier(token.getService()))
+      }
+      .filter { clusterConfig =>
+        val pattern = Pattern.compile(clusterConfig.targetServersRegex)
+        Utils.stringToSeq(bootStrapServers).exists(pattern.matcher(_).matches())
+      }
+    require(clusterConfigs.size <= 1, "More than one delegation token matches the following " +
+      s"bootstrap servers: $bootStrapServers.")
+    clusterConfigs.headOption
   }
 
-  def getTokenJaasParams(sparkConf: SparkConf): String = {
+  def getTokenJaasParams(clusterConf: KafkaTokenClusterConf): String = {
     val token = UserGroupInformation.getCurrentUser().getCredentials.getToken(
-      KafkaTokenUtil.TOKEN_SERVICE)
+      getTokenService(clusterConf.identifier))
     val username = new String(token.getIdentifier)
     val password = new String(token.getPassword)
 
@@ -239,7 +265,7 @@ private[spark] object KafkaTokenUtil extends Logging {
       s"""
       |$loginModuleName required
       | tokenauth=true
-      | serviceName="${sparkConf.get(Kafka.KERBEROS_SERVICE_NAME)}"
+      | serviceName="${clusterConf.kerberosServiceName}"
       | username="$username"
       | password="$password";
       """.stripMargin.replace("\n", "")
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
index 538486b9912a..8f4cedfb2f0c 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
@@ -17,16 +17,19 @@
 
 package org.apache.spark.kafka010
 
+import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.SaslConfigs
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.config._
 
 class KafkaConfigUpdaterSuite extends SparkFunSuite with KafkaDelegationTokenTest {
+  private val identifier = "cluster1"
+  private val tokenService = KafkaTokenUtil.getTokenService(identifier)
   private val testModule = "testModule"
   private val testKey = "testKey"
   private val testValue = "testValue"
   private val otherTestValue = "otherTestValue"
+  private val bootStrapServers = "127.0.0.1:0"
 
   test("set should always set value") {
     val params = Map.empty[String, String]
@@ -73,24 +76,38 @@ class KafkaConfigUpdaterSuite extends SparkFunSuite with KafkaDelegationTokenTes
   }
 
   test("setAuthenticationConfigIfNeeded with token should set values") {
-    val params = Map.empty[String, String]
-    setSparkEnv(Map.empty)
-    addTokenToUGI()
+    val params = Map(
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers
+    )
+    setSparkEnv(
+      Map(
+        s"spark.kafka.clusters.$identifier.auth.bootstrap.servers" -> bootStrapServers
+      )
+    )
+    addTokenToUGI(tokenService)
 
     val updatedParams = KafkaConfigUpdater(testModule, params)
       .setAuthenticationConfigIfNeeded()
       .build()
 
-    assert(updatedParams.size() === 2)
+    assert(updatedParams.size() === 3)
+    assert(updatedParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG) === bootStrapServers)
     assert(updatedParams.containsKey(SaslConfigs.SASL_JAAS_CONFIG))
     assert(updatedParams.get(SaslConfigs.SASL_MECHANISM) ===
-      Kafka.TOKEN_SASL_MECHANISM.defaultValueString)
+      KafkaTokenSparkConf.DEFAULT_SASL_TOKEN_MECHANISM)
   }
 
-  test("setAuthenticationConfigIfNeeded with token and invalid mechanism should throw exception") {
-    val params = Map.empty[String, String]
-    setSparkEnv(Map[String, String](Kafka.TOKEN_SASL_MECHANISM.key -> "INVALID"))
-    addTokenToUGI()
+  test("setAuthenticationConfigIfNeeded with invalid mechanism should throw exception") {
+    val params = Map(
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers
+    )
+    setSparkEnv(
+      Map(
+        s"spark.kafka.clusters.$identifier.auth.bootstrap.servers" -> bootStrapServers,
+        s"spark.kafka.clusters.$identifier.sasl.token.mechanism" -> "intentionally_invalid"
+      )
+    )
+    addTokenToUGI(tokenService)
 
     val e = intercept[IllegalArgumentException] {
       KafkaConfigUpdater(testModule, params)
@@ -102,12 +119,16 @@ class KafkaConfigUpdaterSuite extends SparkFunSuite with KafkaDelegationTokenTes
   }
 
   test("setAuthenticationConfigIfNeeded without security should not set values") {
-    val params = Map.empty[String, String]
+    val params = Map(
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers
+    )
+    setSparkEnv(Map.empty)
 
     val updatedParams = KafkaConfigUpdater(testModule, params)
       .setAuthenticationConfigIfNeeded()
       .build()
 
-    assert(updatedParams.size() === 0)
+    assert(updatedParams.size() === 1)
+    assert(updatedParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG) === bootStrapServers)
   }
 }
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
index bd9b87374dde..ac59c61ab038 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
@@ -20,6 +20,7 @@ package org.apache.spark.kafka010
 import java.{util => ju}
 import javax.security.auth.login.{AppConfigurationEntry, Configuration}
 
+import org.apache.hadoop.io.Text
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.Token
 import org.mockito.Mockito.mock
@@ -70,15 +71,15 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
     Configuration.setConfiguration(new KafkaJaasConfiguration)
   }
 
-  protected def addTokenToUGI(): Unit = {
+  protected def addTokenToUGI(tokenService: Text): Unit = {
     val token = new Token[KafkaDelegationTokenIdentifier](
       tokenId.getBytes,
       tokenPassword.getBytes,
       KafkaTokenUtil.TOKEN_KIND,
-      KafkaTokenUtil.TOKEN_SERVICE
+      tokenService
     )
     val creds = new Credentials()
-    creds.addToken(KafkaTokenUtil.TOKEN_SERVICE, token)
+    creds.addToken(token.getService, token)
     UserGroupInformation.getCurrentUser.addCredentials(creds)
   }
 
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
new file mode 100644
index 000000000000..60bb8a2bc6c3
--- /dev/null
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.kafka010
+
+import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_SSL, SSL}
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+
+class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach {
+  private val identifier1 = "cluster1"
+  private val identifier2 = "cluster2"
+  private val authBootStrapServers = "127.0.0.1:0"
+  private val targetServersRegex = "127.0.0.*:0"
+  private val securityProtocol = SSL.name
+  private val kerberosServiceName = "kafka1"
+  private val trustStoreLocation = "/path/to/trustStore"
+  private val trustStorePassword = "trustStoreSecret"
+  private val keyStoreLocation = "/path/to/keyStore"
+  private val keyStorePassword = "keyStoreSecret"
+  private val keyPassword = "keySecret"
+  private val tokenMechanism = "SCRAM-SHA-256"
+
+  private var sparkConf: SparkConf = null
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    sparkConf = new SparkConf()
+  }
+
+  test("getClusterConfig should trow exception when not exists") {
+    val thrown = intercept[NoSuchElementException] {
+      KafkaTokenSparkConf.getClusterConfig(sparkConf, "invalid")
+    }
+    assert(thrown.getMessage contains "spark.kafka.clusters.invalid.auth.bootstrap.servers")
+  }
+
+  test("getClusterConfig should return entry with defaults") {
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", authBootStrapServers)
+
+    val clusterConfig = KafkaTokenSparkConf.getClusterConfig(sparkConf, identifier1)
+    assert(clusterConfig.identifier === identifier1)
+    assert(clusterConfig.authBootstrapServers === authBootStrapServers)
+    assert(clusterConfig.targetServersRegex === KafkaTokenSparkConf.DEFAULT_TARGET_SERVERS_REGEX)
+    assert(clusterConfig.securityProtocol === SASL_SSL.name)
+    assert(clusterConfig.kerberosServiceName ===
+      KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME)
+    assert(clusterConfig.trustStoreLocation === None)
+    assert(clusterConfig.trustStorePassword === None)
+    assert(clusterConfig.keyStoreLocation === None)
+    assert(clusterConfig.keyStorePassword === None)
+    assert(clusterConfig.keyPassword === None)
+    assert(clusterConfig.tokenMechanism === KafkaTokenSparkConf.DEFAULT_SASL_TOKEN_MECHANISM)
+  }
+
+  test("getClusterConfig should return entry overwrite defaults") {
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", authBootStrapServers)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
+      targetServersRegex)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.security.protocol", securityProtocol)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.sasl.kerberos.service.name",
+      kerberosServiceName)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.truststore.location", trustStoreLocation)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.truststore.password", trustStorePassword)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.keystore.location", keyStoreLocation)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.keystore.password", keyStorePassword)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.key.password", keyPassword)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.sasl.token.mechanism", tokenMechanism)
+
+    val clusterConfig = KafkaTokenSparkConf.getClusterConfig(sparkConf, identifier1)
+    assert(clusterConfig.identifier === identifier1)
+    assert(clusterConfig.authBootstrapServers === authBootStrapServers)
+    assert(clusterConfig.targetServersRegex === targetServersRegex)
+    assert(clusterConfig.securityProtocol === securityProtocol)
+    assert(clusterConfig.kerberosServiceName === kerberosServiceName)
+    assert(clusterConfig.trustStoreLocation === Some(trustStoreLocation))
+    assert(clusterConfig.trustStorePassword === Some(trustStorePassword))
+    assert(clusterConfig.keyStoreLocation === Some(keyStoreLocation))
+    assert(clusterConfig.keyStorePassword === Some(keyStorePassword))
+    assert(clusterConfig.keyPassword === Some(keyPassword))
+    assert(clusterConfig.tokenMechanism === tokenMechanism)
+  }
+
+  test("getAllClusterConfigs should return empty list when nothing configured") {
+    assert(KafkaTokenSparkConf.getAllClusterConfigs(sparkConf).isEmpty)
+  }
+
+  test("getAllClusterConfigs should return empty list with malformed configuration") {
+    sparkConf.set(s"spark.kafka.clusters.", authBootStrapServers)
+    assert(KafkaTokenSparkConf.getAllClusterConfigs(sparkConf).isEmpty)
+  }
+
+  test("getAllClusterConfigs should return multiple entries") {
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", authBootStrapServers)
+    sparkConf.set(s"spark.kafka.clusters.$identifier2.auth.bootstrap.servers", authBootStrapServers)
+
+    val clusterConfigs = KafkaTokenSparkConf.getAllClusterConfigs(sparkConf)
+    assert(clusterConfigs.size === 2)
+    clusterConfigs.foreach { clusterConfig =>
+      assert(clusterConfig.authBootstrapServers === authBootStrapServers)
+      assert(clusterConfig.targetServersRegex === KafkaTokenSparkConf.DEFAULT_TARGET_SERVERS_REGEX)
+      assert(clusterConfig.securityProtocol === SASL_SSL.name)
+      assert(clusterConfig.kerberosServiceName ===
+        KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME)
+      assert(clusterConfig.trustStoreLocation === None)
+      assert(clusterConfig.trustStorePassword === None)
+      assert(clusterConfig.keyStoreLocation === None)
+      assert(clusterConfig.keyStorePassword === None)
+      assert(clusterConfig.keyPassword === None)
+      assert(clusterConfig.tokenMechanism === KafkaTokenSparkConf.DEFAULT_SASL_TOKEN_MECHANISM)
+    }
+  }
+}
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
index 763f8dbec821..11f954b4b2f8 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.kafka010
 
 import java.security.PrivilegedExceptionAction
 
+import org.apache.hadoop.io.Text
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.{SaslConfigs, SslConfigs}
@@ -28,7 +29,13 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config._
 
 class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
+  private val identifier1 = "cluster1"
+  private val identifier2 = "cluster2"
+  private val tokenService1 = KafkaTokenUtil.getTokenService(identifier1)
+  private val tokenService2 = KafkaTokenUtil.getTokenService(identifier2)
   private val bootStrapServers = "127.0.0.1:0"
+  private val matchingTargetServersRegex = "127.0.0.*:0"
+  private val nonMatchingTargetServersRegex = "127.0.1.*:0"
   private val trustStoreLocation = "/path/to/trustStore"
   private val trustStorePassword = "trustStoreSecret"
   private val keyStoreLocation = "/path/to/keyStore"
@@ -59,25 +66,11 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
     )
   }
 
-  test("createAdminClientProperties without bootstrap servers should throw exception") {
-    val thrown = intercept[IllegalArgumentException] {
-      KafkaTokenUtil.createAdminClientProperties(sparkConf)
-    }
-    assert(thrown.getMessage contains
-      "Tried to obtain kafka delegation token but bootstrap servers not configured.")
-  }
-
   test("createAdminClientProperties with SASL_PLAINTEXT protocol should not include " +
       "keystore and truststore config") {
-    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
-    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_PLAINTEXT.name)
-    sparkConf.set(Kafka.TRUSTSTORE_LOCATION, trustStoreLocation)
-    sparkConf.set(Kafka.TRUSTSTORE_PASSWORD, trustStoreLocation)
-    sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
-    sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
-    sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
+    val clusterConf = createClusterConf(SASL_PLAINTEXT.name)
 
-    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf, clusterConf)
 
     assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
       === bootStrapServers)
@@ -91,15 +84,9 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
   }
 
   test("createAdminClientProperties with SASL_SSL protocol should include truststore config") {
-    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
-    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
-    sparkConf.set(Kafka.TRUSTSTORE_LOCATION, trustStoreLocation)
-    sparkConf.set(Kafka.TRUSTSTORE_PASSWORD, trustStorePassword)
-    sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
-    sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
-    sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
+    val clusterConf = createClusterConf(SASL_SSL.name)
 
-    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf, clusterConf)
 
     assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
       === bootStrapServers)
@@ -116,15 +103,9 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
 
   test("createAdminClientProperties with SSL protocol should include keystore and truststore " +
       "config") {
-    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
-    sparkConf.set(Kafka.SECURITY_PROTOCOL, SSL.name)
-    sparkConf.set(Kafka.TRUSTSTORE_LOCATION, trustStoreLocation)
-    sparkConf.set(Kafka.TRUSTSTORE_PASSWORD, trustStorePassword)
-    sparkConf.set(Kafka.KEYSTORE_LOCATION, keyStoreLocation)
-    sparkConf.set(Kafka.KEYSTORE_PASSWORD, keyStorePassword)
-    sparkConf.set(Kafka.KEY_PASSWORD, keyPassword)
+    val clusterConf = createClusterConf(SSL.name)
 
-    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf, clusterConf)
 
     assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
       === bootStrapServers)
@@ -140,11 +121,10 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
   }
 
   test("createAdminClientProperties with global config should not set dynamic jaas config") {
-    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
-    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
+    val clusterConf = createClusterConf(SASL_SSL.name)
     setGlobalKafkaClientConfig()
 
-    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf, clusterConf)
 
     assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
       === bootStrapServers)
@@ -155,12 +135,11 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
   }
 
   test("createAdminClientProperties with keytab should set keytab dynamic jaas config") {
-    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
-    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
     sparkConf.set(KEYTAB, keytab)
     sparkConf.set(PRINCIPAL, principal)
+    val clusterConf = createClusterConf(SASL_SSL.name)
 
-    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf, clusterConf)
 
     assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
       === bootStrapServers)
@@ -176,10 +155,9 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
   }
 
   test("createAdminClientProperties without keytab should set ticket cache dynamic jaas config") {
-    sparkConf.set(Kafka.BOOTSTRAP_SERVERS, bootStrapServers)
-    sparkConf.set(Kafka.SECURITY_PROTOCOL, SASL_SSL.name)
+    val clusterConf = createClusterConf(SASL_SSL.name)
 
-    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf)
+    val adminClientProperties = KafkaTokenUtil.createAdminClientProperties(sparkConf, clusterConf)
 
     assert(adminClientProperties.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
       === bootStrapServers)
@@ -202,24 +180,73 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
     assert(KafkaTokenUtil.isGlobalJaasConfigurationProvided)
   }
 
-  test("isTokenAvailable without token should return false") {
-    assert(!KafkaTokenUtil.isTokenAvailable())
+  test("findMatchingToken without token should return None") {
+    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) === None)
   }
 
-  test("isTokenAvailable with token should return true") {
-    addTokenToUGI()
+  test("findMatchingToken with non-matching tokens should return None") {
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
+      nonMatchingTargetServersRegex)
+    sparkConf.set(s"spark.kafka.clusters.$identifier2.bootstrap.servers", bootStrapServers)
+    sparkConf.set(s"spark.kafka.clusters.$identifier2.target.bootstrap.servers.regex",
+      matchingTargetServersRegex)
+    addTokenToUGI(tokenService1)
+    addTokenToUGI(new Text("intentionally_garbage"))
+
+    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) === None)
+  }
+
+  test("findMatchingToken with one matching token should return cluster configuration") {
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
+      matchingTargetServersRegex)
+    addTokenToUGI(tokenService1)
 
-    assert(KafkaTokenUtil.isTokenAvailable())
+    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) ===
+      Some(KafkaTokenSparkConf.getClusterConfig(sparkConf, identifier1)))
+  }
+
+  test("findMatchingToken with multiple matching tokens should throw exception") {
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
+      matchingTargetServersRegex)
+    sparkConf.set(s"spark.kafka.clusters.$identifier2.auth.bootstrap.servers", bootStrapServers)
+    sparkConf.set(s"spark.kafka.clusters.$identifier2.target.bootstrap.servers.regex",
+      matchingTargetServersRegex)
+    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService2)
+
+    val thrown = intercept[IllegalArgumentException] {
+      KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers)
+    }
+    assert(thrown.getMessage.contains("More than one delegation token matches"))
   }
 
   test("getTokenJaasParams with token should return scram module") {
-    addTokenToUGI()
+    addTokenToUGI(tokenService1)
+    val clusterConf = createClusterConf(SASL_SSL.name)
 
-    val jaasParams = KafkaTokenUtil.getTokenJaasParams(new SparkConf())
+    val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
 
     assert(jaasParams.contains("ScramLoginModule required"))
     assert(jaasParams.contains("tokenauth=true"))
     assert(jaasParams.contains(tokenId))
     assert(jaasParams.contains(tokenPassword))
   }
+
+  private def createClusterConf(securityProtocol: String): KafkaTokenClusterConf = {
+    KafkaTokenClusterConf(
+      identifier1,
+      bootStrapServers,
+      KafkaTokenSparkConf.DEFAULT_TARGET_SERVERS_REGEX,
+      securityProtocol,
+      KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME,
+      Some(trustStoreLocation),
+      Some(trustStorePassword),
+      Some(keyStoreLocation),
+      Some(keyStorePassword),
+      Some(keyPassword),
+      KafkaTokenSparkConf.DEFAULT_SASL_TOKEN_MECHANISM)
+  }
 }
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index f78bdace3523..397de87d3cdf 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -95,6 +95,11 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
index d934c64962ad..d8df5496f612 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
@@ -25,11 +25,13 @@ import scala.util.Random
 import org.apache.kafka.clients.consumer.ConsumerConfig._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.mockito.Mockito.when
 import org.scalatest.BeforeAndAfterAll
+import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark._
 
-class KafkaDataConsumerSuite extends SparkFunSuite with BeforeAndAfterAll {
+class KafkaDataConsumerSuite extends SparkFunSuite with MockitoSugar with BeforeAndAfterAll {
   private var testUtils: KafkaTestUtils = _
   private val topic = "topic" + Random.nextInt()
   private val topicPartition = new TopicPartition(topic, 0)
@@ -37,6 +39,11 @@ class KafkaDataConsumerSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   override def beforeAll(): Unit = {
     super.beforeAll()
+    val conf = new SparkConf()
+    val env = mock[SparkEnv]
+    SparkEnv.set(env)
+    when(env.conf).thenReturn(conf)
+
     testUtils = new KafkaTestUtils
     testUtils.setup()
     KafkaDataConsumer.init(16, 64, 0.75f)
@@ -47,6 +54,7 @@ class KafkaDataConsumerSuite extends SparkFunSuite with BeforeAndAfterAll {
       testUtils.teardown()
       testUtils = null
     }
+    SparkEnv.set(null)
     super.afterAll()
   }
 

From 5e79ae3b40b76e3473288830ab958fc4834dcb33 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 7 May 2019 14:47:39 -0700
Subject: [PATCH 1020/1072] [SPARK-23961][SPARK-27548][PYTHON] Fix error when
 toLocalIterator goes out of scope and properly raise errors from worker

## What changes were proposed in this pull request?

This fixes an error when a PySpark local iterator, for both RDD and DataFrames, goes out of scope and the connection is closed before fully consuming the iterator. The error occurs on the JVM in the serving thread, when Python closes the local socket while the JVM is writing to it. This usually happens when there is enough data to fill the socket read buffer, causing the write call to block.

Additionally, this fixes a problem when an error occurs in the Python worker and the collect job is cancelled with an exception. Previously, the Python driver was never notified of the error so the user could get a partial result (iteration until the error) and the application will continue. With this change, an error in the worker is sent to the Python iterator and is then raised.

The change here introduces a protocol for PySpark local iterators that work as follows:

1) The local socket connection is made when the iterator is created
2) When iterating, Python first sends a request for partition data as a non-zero integer
3) While the JVM local iterator over partitions has next, it triggers a job to collect the next partition
4) The JVM sends a nonzero response to indicate it has the next partition to send
5) The next partition is sent to Python and read by the PySpark deserializer
6) After sending the entire partition, an `END_OF_DATA_SECTION` is sent to Python which stops the deserializer and allows to make another request
7) When the JVM gets a request from Python but has already consumed it's local iterator, it will send a zero response to Python and both will close the socket cleanly
8) If an error occurs in the worker, a negative response is sent to Python followed by the error message. Python will then raise a RuntimeError with the message, stopping iteration.
9) When the PySpark local iterator is garbage-collected, it will read any remaining data from the current partition (this is data that has already been collected) and send a request of zero to tell the JVM to stop collection jobs and close the connection.

Steps 1, 3, 5, 6 are the same as before. Step 8 was completely missing before because errors in the worker were never communicated back to Python. The other steps add synchronization to allow for a clean closing of the socket, with a small trade-off in performance for each partition. This is mainly because the JVM does not start collecting partition data until it receives a request to do so, where before it would eagerly write all data until the socket receive buffer is full.

## How was this patch tested?

Added new unit tests for DataFrame and RDD `toLocalIterator` and tested not fully consuming the iterator. Manual tests with Python 2.7  and 3.6.

Closes #24070 from BryanCutler/pyspark-toLocalIterator-clean-stop-SPARK-23961.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 .../apache/spark/api/python/PythonRDD.scala   | 57 +++++++++++++++-
 python/pyspark/rdd.py                         | 66 +++++++++++++++++--
 python/pyspark/sql/dataframe.py               |  4 +-
 python/pyspark/sql/tests/test_dataframe.py    | 28 ++++++++
 python/pyspark/tests/test_rdd.py              | 31 ++++++++-
 5 files changed, 174 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 182c383cec2c..bc843827acaf 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -163,8 +163,63 @@ private[spark] object PythonRDD extends Logging {
     serveIterator(rdd.collect().iterator, s"serve RDD ${rdd.id}")
   }
 
+  /**
+   * A helper function to create a local RDD iterator and serve it via socket. Partitions are
+   * are collected as separate jobs, by order of index. Partition data is first requested by a
+   * non-zero integer to start a collection job. The response is prefaced by an integer with 1
+   * meaning partition data will be served, 0 meaning the local iterator has been consumed,
+   * and -1 meaining an error occurred during collection. This function is used by
+   * pyspark.rdd._local_iterator_from_socket().
+   *
+   * @return 2-tuple (as a Java array) with the port number of a local socket which serves the
+   *         data collected from these jobs, and the secret for authentication.
+   */
   def toLocalIteratorAndServe[T](rdd: RDD[T]): Array[Any] = {
-    serveIterator(rdd.toLocalIterator, s"serve toLocalIterator")
+    val (port, secret) = SocketAuthServer.setupOneConnectionServer(
+        authHelper, "serve toLocalIterator") { s =>
+      val out = new DataOutputStream(s.getOutputStream)
+      val in = new DataInputStream(s.getInputStream)
+      Utils.tryWithSafeFinally {
+
+        // Collects a partition on each iteration
+        val collectPartitionIter = rdd.partitions.indices.iterator.map { i =>
+          rdd.sparkContext.runJob(rdd, (iter: Iterator[Any]) => iter.toArray, Seq(i)).head
+        }
+
+        // Read request for data and send next partition if nonzero
+        var complete = false
+        while (!complete && in.readInt() != 0) {
+          if (collectPartitionIter.hasNext) {
+            try {
+              // Attempt to collect the next partition
+              val partitionArray = collectPartitionIter.next()
+
+              // Send response there is a partition to read
+              out.writeInt(1)
+
+              // Write the next object and signal end of data for this iteration
+              writeIteratorToStream(partitionArray.toIterator, out)
+              out.writeInt(SpecialLengths.END_OF_DATA_SECTION)
+              out.flush()
+            } catch {
+              case e: SparkException =>
+                // Send response that an error occurred followed by error message
+                out.writeInt(-1)
+                writeUTF(e.getMessage, out)
+                complete = true
+            }
+          } else {
+            // Send response there are no more partitions to read and close
+            out.writeInt(0)
+            complete = true
+          }
+        }
+      } {
+        out.close()
+        in.close()
+      }
+    }
+    Array(port, secret)
   }
 
   def readRDDFromFile(
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index ef382f37ea21..f0682e71a178 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -39,9 +39,9 @@
     from itertools import imap as map, ifilter as filter
 
 from pyspark.java_gateway import local_connect_and_auth
-from pyspark.serializers import NoOpSerializer, CartesianDeserializer, \
-    BatchedSerializer, CloudPickleSerializer, PairDeserializer, \
-    PickleSerializer, pack_long, AutoBatchedSerializer
+from pyspark.serializers import AutoBatchedSerializer, BatchedSerializer, NoOpSerializer, \
+    CartesianDeserializer, CloudPickleSerializer, PairDeserializer, PickleSerializer, \
+    UTF8Deserializer, pack_long, read_int, write_int
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_full_outer_join, python_cogroup
 from pyspark.statcounter import StatCounter
@@ -138,15 +138,69 @@ def _parse_memory(s):
     return int(float(s[:-1]) * units[s[-1].lower()])
 
 
-def _load_from_socket(sock_info, serializer):
+def _create_local_socket(sock_info):
     (sockfile, sock) = local_connect_and_auth(*sock_info)
-    # The RDD materialization time is unpredicable, if we set a timeout for socket reading
+    # The RDD materialization time is unpredictable, if we set a timeout for socket reading
     # operation, it will very possibly fail. See SPARK-18281.
     sock.settimeout(None)
+    return sockfile
+
+
+def _load_from_socket(sock_info, serializer):
+    sockfile = _create_local_socket(sock_info)
     # The socket will be automatically closed when garbage-collected.
     return serializer.load_stream(sockfile)
 
 
+def _local_iterator_from_socket(sock_info, serializer):
+
+    class PyLocalIterable(object):
+        """ Create a synchronous local iterable over a socket """
+
+        def __init__(self, _sock_info, _serializer):
+            self._sockfile = _create_local_socket(_sock_info)
+            self._serializer = _serializer
+            self._read_iter = iter([])  # Initialize as empty iterator
+            self._read_status = 1
+
+        def __iter__(self):
+            while self._read_status == 1:
+                # Request next partition data from Java
+                write_int(1, self._sockfile)
+                self._sockfile.flush()
+
+                # If response is 1 then there is a partition to read, if 0 then fully consumed
+                self._read_status = read_int(self._sockfile)
+                if self._read_status == 1:
+
+                    # Load the partition data as a stream and read each item
+                    self._read_iter = self._serializer.load_stream(self._sockfile)
+                    for item in self._read_iter:
+                        yield item
+
+                # An error occurred, read error message and raise it
+                elif self._read_status == -1:
+                    error_msg = UTF8Deserializer().loads(self._sockfile)
+                    raise RuntimeError("An error occurred while reading the next element from "
+                                       "toLocalIterator: {}".format(error_msg))
+
+        def __del__(self):
+            # If local iterator is not fully consumed,
+            if self._read_status == 1:
+                try:
+                    # Finish consuming partition data stream
+                    for _ in self._read_iter:
+                        pass
+                    # Tell Java to stop sending data and close connection
+                    write_int(0, self._sockfile)
+                    self._sockfile.flush()
+                except Exception:
+                    # Ignore any errors, socket is automatically closed when garbage-collected
+                    pass
+
+    return iter(PyLocalIterable(sock_info, serializer))
+
+
 def ignore_unicode_prefix(f):
     """
     Ignore the 'u' prefix of string in doc tests, to make it works
@@ -2386,7 +2440,7 @@ def toLocalIterator(self):
         """
         with SCCallSiteSync(self.context) as css:
             sock_info = self.ctx._jvm.PythonRDD.toLocalIteratorAndServe(self._jrdd.rdd())
-        return _load_from_socket(sock_info, self._jrdd_deserializer)
+        return _local_iterator_from_socket(sock_info, self._jrdd_deserializer)
 
     def barrier(self):
         """
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index f8aeb62a27fb..9f4a082d8ed5 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -28,7 +28,7 @@
 import warnings
 
 from pyspark import copy_func, since, _NoValue
-from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix
+from pyspark.rdd import RDD, _load_from_socket, _local_iterator_from_socket, ignore_unicode_prefix
 from pyspark.serializers import ArrowCollectSerializer, BatchedSerializer, PickleSerializer, \
     UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
@@ -528,7 +528,7 @@ def toLocalIterator(self):
         """
         with SCCallSiteSync(self._sc) as css:
             sock_info = self._jdf.toPythonIterator()
-        return _load_from_socket(sock_info, BatchedSerializer(PickleSerializer()))
+        return _local_iterator_from_socket(sock_info, BatchedSerializer(PickleSerializer()))
 
     @ignore_unicode_prefix
     @since(1.3)
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index eb34bbb77925..446ef0a10e43 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -677,6 +677,34 @@ def test_repr_behaviors(self):
                     self.assertEquals(None, df._repr_html_())
                     self.assertEquals(expected, df.__repr__())
 
+    def test_to_local_iterator(self):
+        df = self.spark.range(8, numPartitions=4)
+        expected = df.collect()
+        it = df.toLocalIterator()
+        self.assertEqual(expected, list(it))
+
+        # Test DataFrame with empty partition
+        df = self.spark.range(3, numPartitions=4)
+        it = df.toLocalIterator()
+        expected = df.collect()
+        self.assertEqual(expected, list(it))
+
+    def test_to_local_iterator_not_fully_consumed(self):
+        # SPARK-23961: toLocalIterator throws exception when not fully consumed
+        # Create a DataFrame large enough so that write to socket will eventually block
+        df = self.spark.range(1 << 20, numPartitions=2)
+        it = df.toLocalIterator()
+        self.assertEqual(df.take(1)[0], next(it))
+        with QuietTest(self.sc):
+            it = None  # remove iterator from scope, socket is closed when cleaned up
+            # Make sure normal df operations still work
+            result = []
+            for i, row in enumerate(df.toLocalIterator()):
+                result.append(row)
+                if i == 7:
+                    break
+            self.assertEqual(df.take(8), result)
+
 
 class QueryExecutionListenerTests(unittest.TestCase, SQLTestUtils):
     # These tests are separate because it uses 'spark.sql.queryExecutionListeners' which is
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index e789cbe90d27..448fcd36b518 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -60,15 +60,12 @@ def test_sum(self):
         self.assertEqual(6, self.sc.parallelize([1, 2, 3]).sum())
 
     def test_to_localiterator(self):
-        from time import sleep
         rdd = self.sc.parallelize([1, 2, 3])
         it = rdd.toLocalIterator()
-        sleep(5)
         self.assertEqual([1, 2, 3], sorted(it))
 
         rdd2 = rdd.repartition(1000)
         it2 = rdd2.toLocalIterator()
-        sleep(5)
         self.assertEqual([1, 2, 3], sorted(it2))
 
     def test_save_as_textfile_with_unicode(self):
@@ -736,6 +733,34 @@ def test_overwritten_global_func(self):
         global_func = lambda: "Yeah"
         self.assertEqual(self.sc.parallelize([1]).map(lambda _: global_func()).first(), "Yeah")
 
+    def test_to_local_iterator_failure(self):
+        # SPARK-27548 toLocalIterator task failure not propagated to Python driver
+
+        def fail(_):
+            raise RuntimeError("local iterator error")
+
+        rdd = self.sc.range(10).map(fail)
+
+        with self.assertRaisesRegexp(Exception, "local iterator error"):
+            for _ in rdd.toLocalIterator():
+                pass
+
+    def test_to_local_iterator_collects_single_partition(self):
+        # Test that partitions are not computed until requested by iteration
+
+        def fail_last(x):
+            if x == 9:
+                raise RuntimeError("This should not be hit")
+            return x
+
+        rdd = self.sc.range(12, numSlices=4).map(fail_last)
+        it = rdd.toLocalIterator()
+
+        # Only consume first 4 elements from partitions 1 and 2, this should not collect the last
+        # partition which would trigger the error
+        for i in range(4):
+            self.assertEqual(i, next(it))
+
 
 if __name__ == "__main__":
     import unittest

From 303ee3fce0d320a330d6b80662b13f6daeeb9c16 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 8 May 2019 10:31:06 +0800
Subject: [PATCH 1021/1072] [SPARK-24252][SQL] Add TableCatalog API

## What changes were proposed in this pull request?

This adds the TableCatalog API proposed in the [Table Metadata API SPIP](https://docs.google.com/document/d/1zLFiA1VuaWeVxeTDXNg8bL6GP3BVoOZBkewFtEnjEoo/edit#heading=h.m45webtwxf2d).

For `TableCatalog` to use `Table`, it needed to be moved into the catalyst module where the v2 catalog API is located. This also required moving `TableCapability`. Most of the files touched by this PR are import changes needed by this move.

## How was this patch tested?

This adds a test implementation and contract tests.

Closes #24246 from rdblue/SPARK-24252-add-table-catalog-api.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalog/v2/IdentifierImpl.java  |  25 +
 .../spark/sql/catalog/v2/TableCatalog.java    | 137 ++++
 .../spark/sql/catalog/v2/TableChange.java     | 366 ++++++++++
 .../catalog/v2/expressions/Expressions.java   |  18 +-
 .../apache/spark/sql/sources/v2/Table.java    |  21 +
 .../spark/sql/sources/v2/TableCapability.java |   6 +-
 .../sql/catalog/v2/CatalogV2Implicits.scala   |  98 +++
 .../catalog/v2/expressions/expressions.scala  |  45 +-
 .../analysis/AlreadyExistException.scala      |  23 +-
 .../analysis/NoSuchItemException.scala        |  22 +-
 .../sql/catalog/v2/TableCatalogSuite.scala    | 657 ++++++++++++++++++
 .../sql/catalog/v2/TestTableCatalog.scala     | 220 ++++++
 .../datasources/DataSourceResolution.scala    |   2 +-
 .../execution/datasources/v2/FileTable.scala  |  11 +-
 .../v2/JavaPartitionAwareDataSource.java      |   7 +
 15 files changed, 1594 insertions(+), 64 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java
 rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/Table.java (72%)
 rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java (93%)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
index 8874faa71b5b..cd131432008a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
@@ -17,8 +17,12 @@
 
 package org.apache.spark.sql.catalog.v2;
 
+import com.google.common.base.Preconditions;
 import org.apache.spark.annotation.Experimental;
 
+import java.util.Arrays;
+import java.util.Objects;
+
 /**
  *  An {@link Identifier} implementation.
  */
@@ -29,6 +33,8 @@ class IdentifierImpl implements Identifier {
   private String name;
 
   IdentifierImpl(String[] namespace, String name) {
+    Preconditions.checkNotNull(namespace, "Identifier namespace cannot be null");
+    Preconditions.checkNotNull(name, "Identifier name cannot be null");
     this.namespace = namespace;
     this.name = name;
   }
@@ -42,4 +48,23 @@ public String[] namespace() {
   public String name() {
     return name;
   }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    IdentifierImpl that = (IdentifierImpl) o;
+    return Arrays.equals(namespace, that.namespace) && name.equals(that.name);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(Arrays.hashCode(namespace), name);
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java
new file mode 100644
index 000000000000..681629d2d540
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2;
+
+import org.apache.spark.sql.catalog.v2.expressions.Transform;
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
+import org.apache.spark.sql.sources.v2.Table;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.Map;
+
+/**
+ * Catalog methods for working with Tables.
+ * <p>
+ * TableCatalog implementations may be case sensitive or case insensitive. Spark will pass
+ * {@link Identifier table identifiers} without modification. Field names passed to
+ * {@link #alterTable(Identifier, TableChange...)} will be normalized to match the case used in the
+ * table schema when updating, renaming, or dropping existing columns when catalyst analysis is case
+ * insensitive.
+ */
+public interface TableCatalog extends CatalogPlugin {
+  /**
+   * List the tables in a namespace from the catalog.
+   * <p>
+   * If the catalog supports views, this must return identifiers for only tables and not views.
+   *
+   * @param namespace a multi-part namespace
+   * @return an array of Identifiers for tables
+   * @throws NoSuchNamespaceException If the namespace does not exist (optional).
+   */
+  Identifier[] listTables(String[] namespace) throws NoSuchNamespaceException;
+
+  /**
+   * Load table metadata by {@link Identifier identifier} from the catalog.
+   * <p>
+   * If the catalog supports views and contains a view for the identifier and not a table, this
+   * must throw {@link NoSuchTableException}.
+   *
+   * @param ident a table identifier
+   * @return the table's metadata
+   * @throws NoSuchTableException If the table doesn't exist or is a view
+   */
+  Table loadTable(Identifier ident) throws NoSuchTableException;
+
+  /**
+   * Invalidate cached table metadata for an {@link Identifier identifier}.
+   * <p>
+   * If the table is already loaded or cached, drop cached data. If the table does not exist or is
+   * not cached, do nothing. Calling this method should not query remote services.
+   *
+   * @param ident a table identifier
+   */
+  default void invalidateTable(Identifier ident) {
+  }
+
+  /**
+   * Test whether a table exists using an {@link Identifier identifier} from the catalog.
+   * <p>
+   * If the catalog supports views and contains a view for the identifier and not a table, this
+   * must return false.
+   *
+   * @param ident a table identifier
+   * @return true if the table exists, false otherwise
+   */
+  default boolean tableExists(Identifier ident) {
+    try {
+      return loadTable(ident) != null;
+    } catch (NoSuchTableException e) {
+      return false;
+    }
+  }
+
+  /**
+   * Create a table in the catalog.
+   *
+   * @param ident a table identifier
+   * @param schema the schema of the new table, as a struct type
+   * @param partitions transforms to use for partitioning data in the table
+   * @param properties a string map of table properties
+   * @return metadata for the new table
+   * @throws TableAlreadyExistsException If a table or view already exists for the identifier
+   * @throws UnsupportedOperationException If a requested partition transform is not supported
+   * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
+   */
+  Table createTable(
+      Identifier ident,
+      StructType schema,
+      Transform[] partitions,
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException;
+
+  /**
+   * Apply a set of {@link TableChange changes} to a table in the catalog.
+   * <p>
+   * Implementations may reject the requested changes. If any change is rejected, none of the
+   * changes should be applied to the table.
+   * <p>
+   * If the catalog supports views and contains a view for the identifier and not a table, this
+   * must throw {@link NoSuchTableException}.
+   *
+   * @param ident a table identifier
+   * @param changes changes to apply to the table
+   * @return updated metadata for the table
+   * @throws NoSuchTableException If the table doesn't exist or is a view
+   * @throws IllegalArgumentException If any change is rejected by the implementation.
+   */
+  Table alterTable(
+      Identifier ident,
+      TableChange... changes) throws NoSuchTableException;
+
+  /**
+   * Drop a table in the catalog.
+   * <p>
+   * If the catalog supports views and contains a view for the identifier and not a table, this
+   * must not drop the view and must return false.
+   *
+   * @param ident a table identifier
+   * @return true if a table was deleted, false if no table exists for the identifier
+   */
+  boolean dropTable(Identifier ident);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java
new file mode 100644
index 000000000000..9b87e676d9b2
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java
@@ -0,0 +1,366 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2;
+
+import org.apache.spark.sql.types.DataType;
+
+/**
+ * TableChange subclasses represent requested changes to a table. These are passed to
+ * {@link TableCatalog#alterTable}. For example,
+ * <pre>
+ *   import TableChange._
+ *   val catalog = Catalogs.load(name)
+ *   catalog.asTableCatalog.alterTable(ident,
+ *       addColumn("x", IntegerType),
+ *       renameColumn("a", "b"),
+ *       deleteColumn("c")
+ *     )
+ * </pre>
+ */
+public interface TableChange {
+
+  /**
+   * Create a TableChange for setting a table property.
+   * <p>
+   * If the property already exists, it will be replaced with the new value.
+   *
+   * @param property the property name
+   * @param value the new property value
+   * @return a TableChange for the addition
+   */
+  static TableChange setProperty(String property, String value) {
+    return new SetProperty(property, value);
+  }
+
+  /**
+   * Create a TableChange for removing a table property.
+   * <p>
+   * If the property does not exist, the change will succeed.
+   *
+   * @param property the property name
+   * @return a TableChange for the addition
+   */
+  static TableChange removeProperty(String property) {
+    return new RemoveProperty(property);
+  }
+
+  /**
+   * Create a TableChange for adding an optional column.
+   * <p>
+   * If the field already exists, the change will result in an {@link IllegalArgumentException}.
+   * If the new field is nested and its parent does not exist or is not a struct, the change will
+   * result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the new column
+   * @param dataType the new column's data type
+   * @return a TableChange for the addition
+   */
+  static TableChange addColumn(String[] fieldNames, DataType dataType) {
+    return new AddColumn(fieldNames, dataType, true, null);
+  }
+
+  /**
+   * Create a TableChange for adding a column.
+   * <p>
+   * If the field already exists, the change will result in an {@link IllegalArgumentException}.
+   * If the new field is nested and its parent does not exist or is not a struct, the change will
+   * result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the new column
+   * @param dataType the new column's data type
+   * @param isNullable whether the new column can contain null
+   * @return a TableChange for the addition
+   */
+  static TableChange addColumn(String[] fieldNames, DataType dataType, boolean isNullable) {
+    return new AddColumn(fieldNames, dataType, isNullable, null);
+  }
+
+  /**
+   * Create a TableChange for adding a column.
+   * <p>
+   * If the field already exists, the change will result in an {@link IllegalArgumentException}.
+   * If the new field is nested and its parent does not exist or is not a struct, the change will
+   * result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the new column
+   * @param dataType the new column's data type
+   * @param isNullable whether the new column can contain null
+   * @param comment the new field's comment string
+   * @return a TableChange for the addition
+   */
+  static TableChange addColumn(
+      String[] fieldNames,
+      DataType dataType,
+      boolean isNullable,
+      String comment) {
+    return new AddColumn(fieldNames, dataType, isNullable, comment);
+  }
+
+  /**
+   * Create a TableChange for renaming a field.
+   * <p>
+   * The name is used to find the field to rename. The new name will replace the leaf field name.
+   * For example, renameColumn(["a", "b", "c"], "x") should produce column a.b.x.
+   * <p>
+   * If the field does not exist, the change will result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames the current field names
+   * @param newName the new name
+   * @return a TableChange for the rename
+   */
+  static TableChange renameColumn(String[] fieldNames, String newName) {
+    return new RenameColumn(fieldNames, newName);
+  }
+
+  /**
+   * Create a TableChange for updating the type of a field that is nullable.
+   * <p>
+   * The field names are used to find the field to update.
+   * <p>
+   * If the field does not exist, the change will result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the column to update
+   * @param newDataType the new data type
+   * @return a TableChange for the update
+   */
+  static TableChange updateColumnType(String[] fieldNames, DataType newDataType) {
+    return new UpdateColumnType(fieldNames, newDataType, true);
+  }
+
+  /**
+   * Create a TableChange for updating the type of a field.
+   * <p>
+   * The field names are used to find the field to update.
+   * <p>
+   * If the field does not exist, the change will result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the column to update
+   * @param newDataType the new data type
+   * @return a TableChange for the update
+   */
+  static TableChange updateColumnType(
+      String[] fieldNames,
+      DataType newDataType,
+      boolean isNullable) {
+    return new UpdateColumnType(fieldNames, newDataType, isNullable);
+  }
+
+  /**
+   * Create a TableChange for updating the comment of a field.
+   * <p>
+   * The name is used to find the field to update.
+   * <p>
+   * If the field does not exist, the change will result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the column to update
+   * @param newComment the new comment
+   * @return a TableChange for the update
+   */
+  static TableChange updateColumnComment(String[] fieldNames, String newComment) {
+    return new UpdateColumnComment(fieldNames, newComment);
+  }
+
+  /**
+   * Create a TableChange for deleting a field.
+   * <p>
+   * If the field does not exist, the change will result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the column to delete
+   * @return a TableChange for the delete
+   */
+  static TableChange deleteColumn(String[] fieldNames) {
+    return new DeleteColumn(fieldNames);
+  }
+
+  /**
+   * A TableChange to set a table property.
+   * <p>
+   * If the property already exists, it must be replaced with the new value.
+   */
+  final class SetProperty implements TableChange {
+    private final String property;
+    private final String value;
+
+    private SetProperty(String property, String value) {
+      this.property = property;
+      this.value = value;
+    }
+
+    public String property() {
+      return property;
+    }
+
+    public String value() {
+      return value;
+    }
+  }
+
+  /**
+   * A TableChange to remove a table property.
+   * <p>
+   * If the property does not exist, the change should succeed.
+   */
+  final class RemoveProperty implements TableChange {
+    private final String property;
+
+    private RemoveProperty(String property) {
+      this.property = property;
+    }
+
+    public String property() {
+      return property;
+    }
+  }
+
+  /**
+   * A TableChange to add a field.
+   * <p>
+   * If the field already exists, the change must result in an {@link IllegalArgumentException}.
+   * If the new field is nested and its parent does not exist or is not a struct, the change must
+   * result in an {@link IllegalArgumentException}.
+   */
+  final class AddColumn implements TableChange {
+    private final String[] fieldNames;
+    private final DataType dataType;
+    private final boolean isNullable;
+    private final String comment;
+
+    private AddColumn(String[] fieldNames, DataType dataType, boolean isNullable, String comment) {
+      this.fieldNames = fieldNames;
+      this.dataType = dataType;
+      this.isNullable = isNullable;
+      this.comment = comment;
+    }
+
+    public String[] fieldNames() {
+      return fieldNames;
+    }
+
+    public DataType dataType() {
+      return dataType;
+    }
+
+    public boolean isNullable() {
+      return isNullable;
+    }
+
+    public String comment() {
+      return comment;
+    }
+  }
+
+  /**
+   * A TableChange to rename a field.
+   * <p>
+   * The name is used to find the field to rename. The new name will replace the leaf field name.
+   * For example, renameColumn("a.b.c", "x") should produce column a.b.x.
+   * <p>
+   * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
+   */
+  final class RenameColumn implements TableChange {
+    private final String[] fieldNames;
+    private final String newName;
+
+    private RenameColumn(String[] fieldNames, String newName) {
+      this.fieldNames = fieldNames;
+      this.newName = newName;
+    }
+
+    public String[] fieldNames() {
+      return fieldNames;
+    }
+
+    public String newName() {
+      return newName;
+    }
+  }
+
+  /**
+   * A TableChange to update the type of a field.
+   * <p>
+   * The field names are used to find the field to update.
+   * <p>
+   * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
+   */
+  final class UpdateColumnType implements TableChange {
+    private final String[] fieldNames;
+    private final DataType newDataType;
+    private final boolean isNullable;
+
+    private UpdateColumnType(String[] fieldNames, DataType newDataType, boolean isNullable) {
+      this.fieldNames = fieldNames;
+      this.newDataType = newDataType;
+      this.isNullable = isNullable;
+    }
+
+    public String[] fieldNames() {
+      return fieldNames;
+    }
+
+    public DataType newDataType() {
+      return newDataType;
+    }
+
+    public boolean isNullable() {
+      return isNullable;
+    }
+  }
+
+  /**
+   * A TableChange to update the comment of a field.
+   * <p>
+   * The field names are used to find the field to update.
+   * <p>
+   * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
+   */
+  final class UpdateColumnComment implements TableChange {
+    private final String[] fieldNames;
+    private final String newComment;
+
+    private UpdateColumnComment(String[] fieldNames, String newComment) {
+      this.fieldNames = fieldNames;
+      this.newComment = newComment;
+    }
+
+    public String[] fieldNames() {
+      return fieldNames;
+    }
+
+    public String newComment() {
+      return newComment;
+    }
+  }
+
+  /**
+   * A TableChange to delete a field.
+   * <p>
+   * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
+   */
+  final class DeleteColumn implements TableChange {
+    private final String[] fieldNames;
+
+    private DeleteColumn(String[] fieldNames) {
+      this.fieldNames = fieldNames;
+    }
+
+    public String[] fieldNames() {
+      return fieldNames;
+    }
+  }
+
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
index 009e89bd4eb6..7b264e7480e1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
@@ -40,7 +40,7 @@ private Expressions() {
    * @param args expression arguments to the transform
    * @return a logical transform
    */
-  public Transform apply(String name, Expression... args) {
+  public static Transform apply(String name, Expression... args) {
     return LogicalExpressions.apply(name,
         JavaConverters.asScalaBuffer(Arrays.asList(args)).toSeq());
   }
@@ -51,7 +51,7 @@ public Transform apply(String name, Expression... args) {
    * @param name a column name
    * @return a named reference for the column
    */
-  public NamedReference column(String name) {
+  public static NamedReference column(String name) {
     return LogicalExpressions.reference(name);
   }
 
@@ -65,7 +65,7 @@ public NamedReference column(String name) {
    * @param <T> the JVM type of the value
    * @return a literal expression for the value
    */
-  public <T> Literal<T> literal(T value) {
+  public static <T> Literal<T> literal(T value) {
     return LogicalExpressions.literal(value);
   }
 
@@ -81,7 +81,7 @@ public <T> Literal<T> literal(T value) {
    * @param columns input columns for the bucket transform
    * @return a logical bucket transform with name "bucket"
    */
-  public Transform bucket(int numBuckets, String... columns) {
+  public static Transform bucket(int numBuckets, String... columns) {
     return LogicalExpressions.bucket(numBuckets,
         JavaConverters.asScalaBuffer(Arrays.asList(columns)).toSeq());
   }
@@ -96,7 +96,7 @@ public Transform bucket(int numBuckets, String... columns) {
    * @param column an input column
    * @return a logical identity transform with name "identity"
    */
-  public Transform identity(String column) {
+  public static Transform identity(String column) {
     return LogicalExpressions.identity(column);
   }
 
@@ -110,7 +110,7 @@ public Transform identity(String column) {
    * @param column an input timestamp or date column
    * @return a logical yearly transform with name "years"
    */
-  public Transform years(String column) {
+  public static Transform years(String column) {
     return LogicalExpressions.years(column);
   }
 
@@ -125,7 +125,7 @@ public Transform years(String column) {
    * @param column an input timestamp or date column
    * @return a logical monthly transform with name "months"
    */
-  public Transform months(String column) {
+  public static Transform months(String column) {
     return LogicalExpressions.months(column);
   }
 
@@ -140,7 +140,7 @@ public Transform months(String column) {
    * @param column an input timestamp or date column
    * @return a logical daily transform with name "days"
    */
-  public Transform days(String column) {
+  public static Transform days(String column) {
     return LogicalExpressions.days(column);
   }
 
@@ -155,7 +155,7 @@ public Transform days(String column) {
    * @param column an input timestamp column
    * @return a logical hourly transform with name "hours"
    */
-  public Transform hours(String column) {
+  public static Transform hours(String column) {
     return LogicalExpressions.hours(column);
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java
similarity index 72%
rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java
index 78f979a2a9a4..482d3c22e230 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/Table.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java
@@ -18,8 +18,11 @@
 package org.apache.spark.sql.sources.v2;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.catalog.v2.expressions.Transform;
 import org.apache.spark.sql.types.StructType;
 
+import java.util.Collections;
+import java.util.Map;
 import java.util.Set;
 
 /**
@@ -29,6 +32,10 @@
  * <p>
  * This interface can mixin the following interfaces to support different operations, like
  * {@code SupportsRead}.
+ * <p>
+ * The default implementation of {@link #partitioning()} returns an empty array of partitions, and
+ * the default implementation of {@link #properties()} returns an empty map. These should be
+ * overridden by implementations that support partitioning and table properties.
  */
 @Evolving
 public interface Table {
@@ -45,6 +52,20 @@ public interface Table {
    */
   StructType schema();
 
+  /**
+   * Returns the physical partitioning of this table.
+   */
+  default Transform[] partitioning() {
+    return new Transform[0];
+  }
+
+  /**
+   * Returns the string map of table properties.
+   */
+  default Map<String, String> properties() {
+    return Collections.emptyMap();
+  }
+
   /**
    * Returns the set of capabilities for this table.
    */
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
similarity index 93%
rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
index 4640c61faeb7..7fff09fae6a3 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
@@ -66,7 +66,7 @@ public enum TableCapability {
    * <p>
    * Truncating a table removes all existing rows.
    * <p>
-   * See {@link org.apache.spark.sql.sources.v2.writer.SupportsTruncate}.
+   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsTruncate}.
    */
   TRUNCATE,
 
@@ -74,7 +74,7 @@ public enum TableCapability {
    * Signals that the table can replace existing data that matches a filter with appended data in
    * a write operation.
    * <p>
-   * See {@link org.apache.spark.sql.sources.v2.writer.SupportsOverwrite}.
+   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsOverwrite}.
    */
   OVERWRITE_BY_FILTER,
 
@@ -82,7 +82,7 @@ public enum TableCapability {
    * Signals that the table can dynamically replace existing data partitions with appended data in
    * a write operation.
    * <p>
-   * See {@link org.apache.spark.sql.sources.v2.writer.SupportsDynamicOverwrite}.
+   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsDynamicOverwrite}.
    */
   OVERWRITE_DYNAMIC
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala
new file mode 100644
index 000000000000..f512cd5e23c6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalog.v2.expressions.{BucketTransform, IdentityTransform, LogicalExpressions, Transform}
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Conversion helpers for working with v2 [[CatalogPlugin]].
+ */
+object CatalogV2Implicits {
+  implicit class PartitionTypeHelper(partitionType: StructType) {
+    def asTransforms: Array[Transform] = partitionType.names.map(LogicalExpressions.identity)
+  }
+
+  implicit class BucketSpecHelper(spec: BucketSpec) {
+    def asTransform: BucketTransform = {
+      if (spec.sortColumnNames.nonEmpty) {
+        throw new AnalysisException(
+          s"Cannot convert bucketing with sort columns to a transform: $spec")
+      }
+
+      LogicalExpressions.bucket(spec.numBuckets, spec.bucketColumnNames: _*)
+    }
+  }
+
+  implicit class TransformHelper(transforms: Seq[Transform]) {
+    def asPartitionColumns: Seq[String] = {
+      val (idTransforms, nonIdTransforms) = transforms.partition(_.isInstanceOf[IdentityTransform])
+
+      if (nonIdTransforms.nonEmpty) {
+        throw new AnalysisException("Transforms cannot be converted to partition columns: " +
+            nonIdTransforms.map(_.describe).mkString(", "))
+      }
+
+      idTransforms.map(_.asInstanceOf[IdentityTransform]).map(_.reference).map { ref =>
+        val parts = ref.fieldNames
+        if (parts.size > 1) {
+          throw new AnalysisException(s"Cannot partition by nested column: $ref")
+        } else {
+          parts(0)
+        }
+      }
+    }
+  }
+
+  implicit class CatalogHelper(plugin: CatalogPlugin) {
+    def asTableCatalog: TableCatalog = plugin match {
+      case tableCatalog: TableCatalog =>
+        tableCatalog
+      case _ =>
+        throw new AnalysisException(s"Cannot use catalog ${plugin.name}: not a TableCatalog")
+    }
+  }
+
+  implicit class NamespaceHelper(namespace: Array[String]) {
+    def quoted: String = namespace.map(quote).mkString(".")
+  }
+
+  implicit class IdentifierHelper(ident: Identifier) {
+    def quoted: String = {
+      if (ident.namespace.nonEmpty) {
+        ident.namespace.map(quote).mkString(".") + "." + quote(ident.name)
+      } else {
+        quote(ident.name)
+      }
+    }
+  }
+
+  implicit class MultipartIdentifierHelper(namespace: Seq[String]) {
+    def quoted: String = namespace.map(quote).mkString(".")
+  }
+
+  private def quote(part: String): String = {
+    if (part.contains(".") || part.contains("`")) {
+      s"`${part.replace("`", "``")}`"
+    } else {
+      part
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
index 813d88255c6a..2d4d6e7c6d5e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.sql.catalog.v2.expressions
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst
-import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
@@ -35,38 +33,6 @@ private[sql] object LogicalExpressions {
   // because this is only used for field names, the SQL conf passed in does not matter.
   private lazy val parser = new CatalystSqlParser(SQLConf.get)
 
-  def fromPartitionColumns(columns: String*): Array[IdentityTransform] =
-    columns.map(identity).toArray
-
-  def fromBucketSpec(spec: BucketSpec): BucketTransform = {
-    if (spec.sortColumnNames.nonEmpty) {
-      throw new AnalysisException(
-        s"Cannot convert bucketing with sort columns to a transform: $spec")
-    }
-
-    bucket(spec.numBuckets, spec.bucketColumnNames: _*)
-  }
-
-  implicit class TransformHelper(transforms: Seq[Transform]) {
-    def asPartitionColumns: Seq[String] = {
-      val (idTransforms, nonIdTransforms) = transforms.partition(_.isInstanceOf[IdentityTransform])
-
-      if (nonIdTransforms.nonEmpty) {
-        throw new AnalysisException("Transforms cannot be converted to partition columns: " +
-            nonIdTransforms.map(_.describe).mkString(", "))
-      }
-
-      idTransforms.map(_.asInstanceOf[IdentityTransform]).map(_.reference).map { ref =>
-        val parts = ref.fieldNames
-        if (parts.size > 1) {
-          throw new AnalysisException(s"Cannot partition by nested column: $ref")
-        } else {
-          parts(0)
-        }
-      }
-    }
-  }
-
   def literal[T](value: T): LiteralValue[T] = {
     val internalLit = catalyst.expressions.Literal(value)
     literal(value, internalLit.dataType)
@@ -183,17 +149,10 @@ private[sql] final case class LiteralValue[T](value: T, dataType: DataType) exte
 }
 
 private[sql] final case class FieldReference(parts: Seq[String]) extends NamedReference {
+  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.MultipartIdentifierHelper
   override def fieldNames: Array[String] = parts.toArray
-  override def describe: String = parts.map(quote).mkString(".")
+  override def describe: String = parts.quoted
   override def toString: String = describe
-
-  private def quote(part: String): String = {
-    if (part.contains(".") || part.contains("`")) {
-      s"`${part.replace("`", "``")}`"
-    } else {
-      part
-    }
-  }
 }
 
 private[sql] object FieldReference {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
index 6d587abd8fd4..f5e9a146bf35 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+import org.apache.spark.sql.catalog.v2.Identifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 
 /**
@@ -25,13 +27,26 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
  * as an [[org.apache.spark.sql.AnalysisException]] with the correct position information.
  */
 class DatabaseAlreadyExistsException(db: String)
-  extends AnalysisException(s"Database '$db' already exists")
+  extends NamespaceAlreadyExistsException(s"Database '$db' already exists")
 
-class TableAlreadyExistsException(db: String, table: String)
-  extends AnalysisException(s"Table or view '$table' already exists in database '$db'")
+class NamespaceAlreadyExistsException(message: String) extends AnalysisException(message) {
+  def this(namespace: Array[String]) = {
+    this(s"Namespace '${namespace.quoted}' already exists")
+  }
+}
+
+class TableAlreadyExistsException(message: String) extends AnalysisException(message) {
+  def this(db: String, table: String) = {
+    this(s"Table or view '$table' already exists in database '$db'")
+  }
+
+  def this(tableIdent: Identifier) = {
+    this(s"Table ${tableIdent.quoted} already exists")
+  }
+}
 
 class TempTableAlreadyExistsException(table: String)
-  extends AnalysisException(s"Temporary view '$table' already exists")
+  extends TableAlreadyExistsException(s"Temporary view '$table' already exists")
 
 class PartitionAlreadyExistsException(db: String, table: String, spec: TablePartitionSpec)
   extends AnalysisException(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
index 8bf6f69f3b17..7ac8ae61ed53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+import org.apache.spark.sql.catalog.v2.Identifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 
 
@@ -25,10 +27,24 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
  * Thrown by a catalog when an item cannot be found. The analyzer will rethrow the exception
  * as an [[org.apache.spark.sql.AnalysisException]] with the correct position information.
  */
-class NoSuchDatabaseException(val db: String) extends AnalysisException(s"Database '$db' not found")
+class NoSuchDatabaseException(
+    val db: String) extends NoSuchNamespaceException(s"Database '$db' not found")
 
-class NoSuchTableException(db: String, table: String)
-  extends AnalysisException(s"Table or view '$table' not found in database '$db'")
+class NoSuchNamespaceException(message: String) extends AnalysisException(message) {
+  def this(namespace: Array[String]) = {
+    this(s"Namespace '${namespace.quoted}' not found")
+  }
+}
+
+class NoSuchTableException(message: String) extends AnalysisException(message) {
+  def this(db: String, table: String) = {
+    this(s"Table or view '$table' not found in database '$db'")
+  }
+
+  def this(tableIdent: Identifier) = {
+    this(s"Table ${tableIdent.quoted} not found")
+  }
+}
 
 class NoSuchPartitionException(
     db: String,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala
new file mode 100644
index 000000000000..9c1b9a3e53de
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala
@@ -0,0 +1,657 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2
+
+import java.util
+import java.util.Collections
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class TableCatalogSuite extends SparkFunSuite {
+  import CatalogV2Implicits._
+
+  private val emptyProps: util.Map[String, String] = Collections.emptyMap[String, String]
+  private val schema: StructType = new StructType()
+      .add("id", IntegerType)
+      .add("data", StringType)
+
+  private def newCatalog(): TableCatalog = {
+    val newCatalog = new TestTableCatalog
+    newCatalog.initialize("test", CaseInsensitiveStringMap.empty())
+    newCatalog
+  }
+
+  private val testIdent = Identifier.of(Array("`", "."), "test_table")
+
+  test("Catalogs can load the catalog") {
+    val catalog = newCatalog()
+
+    val conf = new SQLConf
+    conf.setConfString("spark.sql.catalog.test", catalog.getClass.getName)
+
+    val loaded = Catalogs.load("test", conf)
+    assert(loaded.getClass == catalog.getClass)
+  }
+
+  test("listTables") {
+    val catalog = newCatalog()
+    val ident1 = Identifier.of(Array("ns"), "test_table_1")
+    val ident2 = Identifier.of(Array("ns"), "test_table_2")
+    val ident3 = Identifier.of(Array("ns2"), "test_table_1")
+
+    assert(catalog.listTables(Array("ns")).isEmpty)
+
+    catalog.createTable(ident1, schema, Array.empty, emptyProps)
+
+    assert(catalog.listTables(Array("ns")).toSet == Set(ident1))
+    assert(catalog.listTables(Array("ns2")).isEmpty)
+
+    catalog.createTable(ident3, schema, Array.empty, emptyProps)
+    catalog.createTable(ident2, schema, Array.empty, emptyProps)
+
+    assert(catalog.listTables(Array("ns")).toSet == Set(ident1, ident2))
+    assert(catalog.listTables(Array("ns2")).toSet == Set(ident3))
+
+    catalog.dropTable(ident1)
+
+    assert(catalog.listTables(Array("ns")).toSet == Set(ident2))
+
+    catalog.dropTable(ident2)
+
+    assert(catalog.listTables(Array("ns")).isEmpty)
+    assert(catalog.listTables(Array("ns2")).toSet == Set(ident3))
+  }
+
+  test("createTable") {
+    val catalog = newCatalog()
+
+    assert(!catalog.tableExists(testIdent))
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
+    assert(parsed == Seq("`", ".", "test_table"))
+    assert(table.schema == schema)
+    assert(table.properties.asScala == Map())
+
+    assert(catalog.tableExists(testIdent))
+  }
+
+  test("createTable: with properties") {
+    val catalog = newCatalog()
+
+    val properties = new util.HashMap[String, String]()
+    properties.put("property", "value")
+
+    assert(!catalog.tableExists(testIdent))
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+
+    val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
+    assert(parsed == Seq("`", ".", "test_table"))
+    assert(table.schema == schema)
+    assert(table.properties == properties)
+
+    assert(catalog.tableExists(testIdent))
+  }
+
+  test("createTable: table already exists") {
+    val catalog = newCatalog()
+
+    assert(!catalog.tableExists(testIdent))
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    val exc = intercept[TableAlreadyExistsException] {
+      catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    }
+
+    assert(exc.message.contains(table.name()))
+    assert(exc.message.contains("already exists"))
+
+    assert(catalog.tableExists(testIdent))
+  }
+
+  test("tableExists") {
+    val catalog = newCatalog()
+
+    assert(!catalog.tableExists(testIdent))
+
+    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(catalog.tableExists(testIdent))
+
+    catalog.dropTable(testIdent)
+
+    assert(!catalog.tableExists(testIdent))
+  }
+
+  test("loadTable") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val loaded = catalog.loadTable(testIdent)
+
+    assert(table.name == loaded.name)
+    assert(table.schema == loaded.schema)
+    assert(table.properties == loaded.properties)
+  }
+
+  test("loadTable: table does not exist") {
+    val catalog = newCatalog()
+
+    val exc = intercept[NoSuchTableException] {
+      catalog.loadTable(testIdent)
+    }
+
+    assert(exc.message.contains(testIdent.quoted))
+    assert(exc.message.contains("not found"))
+  }
+
+  test("invalidateTable") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.invalidateTable(testIdent)
+
+    val loaded = catalog.loadTable(testIdent)
+
+    assert(table.name == loaded.name)
+    assert(table.schema == loaded.schema)
+    assert(table.properties == loaded.properties)
+  }
+
+  test("invalidateTable: table does not exist") {
+    val catalog = newCatalog()
+
+    assert(catalog.tableExists(testIdent) === false)
+
+    catalog.invalidateTable(testIdent)
+  }
+
+  test("alterTable: add property") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.properties.asScala == Map())
+
+    val updated = catalog.alterTable(testIdent, TableChange.setProperty("prop-1", "1"))
+    assert(updated.properties.asScala == Map("prop-1" -> "1"))
+
+    val loaded = catalog.loadTable(testIdent)
+    assert(loaded.properties.asScala == Map("prop-1" -> "1"))
+
+    assert(table.properties.asScala == Map())
+  }
+
+  test("alterTable: add property to existing") {
+    val catalog = newCatalog()
+
+    val properties = new util.HashMap[String, String]()
+    properties.put("prop-1", "1")
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+
+    assert(table.properties.asScala == Map("prop-1" -> "1"))
+
+    val updated = catalog.alterTable(testIdent, TableChange.setProperty("prop-2", "2"))
+    assert(updated.properties.asScala == Map("prop-1" -> "1", "prop-2" -> "2"))
+
+    val loaded = catalog.loadTable(testIdent)
+    assert(loaded.properties.asScala == Map("prop-1" -> "1", "prop-2" -> "2"))
+
+    assert(table.properties.asScala == Map("prop-1" -> "1"))
+  }
+
+  test("alterTable: remove existing property") {
+    val catalog = newCatalog()
+
+    val properties = new util.HashMap[String, String]()
+    properties.put("prop-1", "1")
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+
+    assert(table.properties.asScala == Map("prop-1" -> "1"))
+
+    val updated = catalog.alterTable(testIdent, TableChange.removeProperty("prop-1"))
+    assert(updated.properties.asScala == Map())
+
+    val loaded = catalog.loadTable(testIdent)
+    assert(loaded.properties.asScala == Map())
+
+    assert(table.properties.asScala == Map("prop-1" -> "1"))
+  }
+
+  test("alterTable: remove missing property") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.properties.asScala == Map())
+
+    val updated = catalog.alterTable(testIdent, TableChange.removeProperty("prop-1"))
+    assert(updated.properties.asScala == Map())
+
+    val loaded = catalog.loadTable(testIdent)
+    assert(loaded.properties.asScala == Map())
+
+    assert(table.properties.asScala == Map())
+  }
+
+  test("alterTable: add top-level column") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val updated = catalog.alterTable(testIdent, TableChange.addColumn(Array("ts"), TimestampType))
+
+    assert(updated.schema == schema.add("ts", TimestampType))
+  }
+
+  test("alterTable: add required column") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.addColumn(Array("ts"), TimestampType, false))
+
+    assert(updated.schema == schema.add("ts", TimestampType, nullable = false))
+  }
+
+  test("alterTable: add column with comment") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.addColumn(Array("ts"), TimestampType, false, "comment text"))
+
+    val field = StructField("ts", TimestampType, nullable = false).withComment("comment text")
+    assert(updated.schema == schema.add(field))
+  }
+
+  test("alterTable: add nested column") {
+    val catalog = newCatalog()
+
+    val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
+    val tableSchema = schema.add("point", pointStruct)
+
+    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+
+    assert(table.schema == tableSchema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.addColumn(Array("point", "z"), DoubleType))
+
+    val expectedSchema = schema.add("point", pointStruct.add("z", DoubleType))
+
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: add column to primitive field fails") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val exc = intercept[IllegalArgumentException] {
+      catalog.alterTable(testIdent, TableChange.addColumn(Array("data", "ts"), TimestampType))
+    }
+
+    assert(exc.getMessage.contains("Not a struct"))
+    assert(exc.getMessage.contains("data"))
+
+    // the table has not changed
+    assert(catalog.loadTable(testIdent).schema == schema)
+  }
+
+  test("alterTable: add field to missing column fails") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val exc = intercept[IllegalArgumentException] {
+      catalog.alterTable(testIdent,
+        TableChange.addColumn(Array("missing_col", "new_field"), StringType))
+    }
+
+    assert(exc.getMessage.contains("missing_col"))
+    assert(exc.getMessage.contains("Cannot find"))
+  }
+
+  test("alterTable: update column data type") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val updated = catalog.alterTable(testIdent, TableChange.updateColumnType(Array("id"), LongType))
+
+    val expectedSchema = new StructType().add("id", LongType).add("data", StringType)
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: update column data type and nullability") {
+    val catalog = newCatalog()
+
+    val originalSchema = new StructType()
+        .add("id", IntegerType, nullable = false)
+        .add("data", StringType)
+    val table = catalog.createTable(testIdent, originalSchema, Array.empty, emptyProps)
+
+    assert(table.schema == originalSchema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.updateColumnType(Array("id"), LongType, true))
+
+    val expectedSchema = new StructType().add("id", LongType).add("data", StringType)
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: update optional column to required fails") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val exc = intercept[IllegalArgumentException] {
+      catalog.alterTable(testIdent, TableChange.updateColumnType(Array("id"), LongType, false))
+    }
+
+    assert(exc.getMessage.contains("Cannot change optional column to required"))
+    assert(exc.getMessage.contains("id"))
+  }
+
+  test("alterTable: update missing column fails") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val exc = intercept[IllegalArgumentException] {
+      catalog.alterTable(testIdent,
+        TableChange.updateColumnType(Array("missing_col"), LongType))
+    }
+
+    assert(exc.getMessage.contains("missing_col"))
+    assert(exc.getMessage.contains("Cannot find"))
+  }
+
+  test("alterTable: add comment") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.updateColumnComment(Array("id"), "comment text"))
+
+    val expectedSchema = new StructType()
+        .add("id", IntegerType, nullable = true, "comment text")
+        .add("data", StringType)
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: replace comment") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    catalog.alterTable(testIdent, TableChange.updateColumnComment(Array("id"), "comment text"))
+
+    val expectedSchema = new StructType()
+        .add("id", IntegerType, nullable = true, "replacement comment")
+        .add("data", StringType)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.updateColumnComment(Array("id"), "replacement comment"))
+
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: add comment to missing column fails") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val exc = intercept[IllegalArgumentException] {
+      catalog.alterTable(testIdent,
+        TableChange.updateColumnComment(Array("missing_col"), "comment"))
+    }
+
+    assert(exc.getMessage.contains("missing_col"))
+    assert(exc.getMessage.contains("Cannot find"))
+  }
+
+  test("alterTable: rename top-level column") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val updated = catalog.alterTable(testIdent, TableChange.renameColumn(Array("id"), "some_id"))
+
+    val expectedSchema = new StructType().add("some_id", IntegerType).add("data", StringType)
+
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: rename nested column") {
+    val catalog = newCatalog()
+
+    val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
+    val tableSchema = schema.add("point", pointStruct)
+
+    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+
+    assert(table.schema == tableSchema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.renameColumn(Array("point", "x"), "first"))
+
+    val newPointStruct = new StructType().add("first", DoubleType).add("y", DoubleType)
+    val expectedSchema = schema.add("point", newPointStruct)
+
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: rename struct column") {
+    val catalog = newCatalog()
+
+    val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
+    val tableSchema = schema.add("point", pointStruct)
+
+    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+
+    assert(table.schema == tableSchema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.renameColumn(Array("point"), "p"))
+
+    val newPointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
+    val expectedSchema = schema.add("p", newPointStruct)
+
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: rename missing column fails") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val exc = intercept[IllegalArgumentException] {
+      catalog.alterTable(testIdent,
+        TableChange.renameColumn(Array("missing_col"), "new_name"))
+    }
+
+    assert(exc.getMessage.contains("missing_col"))
+    assert(exc.getMessage.contains("Cannot find"))
+  }
+
+  test("alterTable: multiple changes") {
+    val catalog = newCatalog()
+
+    val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
+    val tableSchema = schema.add("point", pointStruct)
+
+    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+
+    assert(table.schema == tableSchema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.renameColumn(Array("point", "x"), "first"),
+      TableChange.renameColumn(Array("point", "y"), "second"))
+
+    val newPointStruct = new StructType().add("first", DoubleType).add("second", DoubleType)
+    val expectedSchema = schema.add("point", newPointStruct)
+
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: delete top-level column") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.deleteColumn(Array("id")))
+
+    val expectedSchema = new StructType().add("data", StringType)
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: delete nested column") {
+    val catalog = newCatalog()
+
+    val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
+    val tableSchema = schema.add("point", pointStruct)
+
+    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+
+    assert(table.schema == tableSchema)
+
+    val updated = catalog.alterTable(testIdent,
+      TableChange.deleteColumn(Array("point", "y")))
+
+    val newPointStruct = new StructType().add("x", DoubleType)
+    val expectedSchema = schema.add("point", newPointStruct)
+
+    assert(updated.schema == expectedSchema)
+  }
+
+  test("alterTable: delete missing column fails") {
+    val catalog = newCatalog()
+
+    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(table.schema == schema)
+
+    val exc = intercept[IllegalArgumentException] {
+      catalog.alterTable(testIdent, TableChange.deleteColumn(Array("missing_col")))
+    }
+
+    assert(exc.getMessage.contains("missing_col"))
+    assert(exc.getMessage.contains("Cannot find"))
+  }
+
+  test("alterTable: delete missing nested column fails") {
+    val catalog = newCatalog()
+
+    val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
+    val tableSchema = schema.add("point", pointStruct)
+
+    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+
+    assert(table.schema == tableSchema)
+
+    val exc = intercept[IllegalArgumentException] {
+      catalog.alterTable(testIdent, TableChange.deleteColumn(Array("point", "z")))
+    }
+
+    assert(exc.getMessage.contains("z"))
+    assert(exc.getMessage.contains("Cannot find"))
+  }
+
+  test("alterTable: table does not exist") {
+    val catalog = newCatalog()
+
+    val exc = intercept[NoSuchTableException] {
+      catalog.alterTable(testIdent, TableChange.setProperty("prop", "val"))
+    }
+
+    assert(exc.message.contains(testIdent.quoted))
+    assert(exc.message.contains("not found"))
+  }
+
+  test("dropTable") {
+    val catalog = newCatalog()
+
+    assert(!catalog.tableExists(testIdent))
+
+    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+
+    assert(catalog.tableExists(testIdent))
+
+    val wasDropped = catalog.dropTable(testIdent)
+
+    assert(wasDropped)
+    assert(!catalog.tableExists(testIdent))
+  }
+
+  test("dropTable: table does not exist") {
+    val catalog = newCatalog()
+
+    assert(!catalog.tableExists(testIdent))
+
+    val wasDropped = catalog.dropTable(testIdent)
+
+    assert(!wasDropped)
+    assert(!catalog.tableExists(testIdent))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala
new file mode 100644
index 000000000000..7a0b014a8546
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2
+
+import java.util
+import java.util.Collections
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RemoveProperty, RenameColumn, SetProperty, UpdateColumnComment, UpdateColumnType}
+import org.apache.spark.sql.catalog.v2.expressions.Transform
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.sources.v2.{Table, TableCapability}
+import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class TestTableCatalog extends TableCatalog {
+  import CatalogV2Implicits._
+
+  private val tables: util.Map[Identifier, Table] = new ConcurrentHashMap[Identifier, Table]()
+  private var _name: Option[String] = None
+
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    _name = Some(name)
+  }
+
+  override def name: String = _name.get
+
+  override def listTables(namespace: Array[String]): Array[Identifier] = {
+    tables.keySet.asScala.filter(_.namespace.sameElements(namespace)).toArray
+  }
+
+  override def loadTable(ident: Identifier): Table = {
+    Option(tables.get(ident)) match {
+      case Some(table) =>
+        table
+      case _ =>
+        throw new NoSuchTableException(ident)
+    }
+  }
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident)
+    }
+
+    if (partitions.nonEmpty) {
+      throw new UnsupportedOperationException(
+        s"Catalog $name: Partitioned tables are not supported")
+    }
+
+    val table = InMemoryTable(ident.quoted, schema, properties)
+
+    tables.put(ident, table)
+
+    table
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    val table = loadTable(ident)
+    val properties = TestTableCatalog.applyPropertiesChanges(table.properties, changes)
+    val schema = TestTableCatalog.applySchemaChanges(table.schema, changes)
+    val newTable = InMemoryTable(table.name, schema, properties)
+
+    tables.put(ident, newTable)
+
+    newTable
+  }
+
+  override def dropTable(ident: Identifier): Boolean = Option(tables.remove(ident)).isDefined
+}
+
+private object TestTableCatalog {
+  /**
+   * Apply properties changes to a map and return the result.
+   */
+  def applyPropertiesChanges(
+      properties: util.Map[String, String],
+      changes: Seq[TableChange]): util.Map[String, String] = {
+    val newProperties = new util.HashMap[String, String](properties)
+
+    changes.foreach {
+      case set: SetProperty =>
+        newProperties.put(set.property, set.value)
+
+      case unset: RemoveProperty =>
+        newProperties.remove(unset.property)
+
+      case _ =>
+      // ignore non-property changes
+    }
+
+    Collections.unmodifiableMap(newProperties)
+  }
+
+  /**
+   * Apply schema changes to a schema and return the result.
+   */
+  def applySchemaChanges(schema: StructType, changes: Seq[TableChange]): StructType = {
+    changes.foldLeft(schema) { (schema, change) =>
+      change match {
+        case add: AddColumn =>
+          add.fieldNames match {
+            case Array(name) =>
+              val newField = StructField(name, add.dataType, nullable = add.isNullable)
+              Option(add.comment) match {
+                case Some(comment) =>
+                  schema.add(newField.withComment(comment))
+                case _ =>
+                  schema.add(newField)
+              }
+
+            case names =>
+              replace(schema, names.init, parent => parent.dataType match {
+                case parentType: StructType =>
+                  val field = StructField(names.last, add.dataType, nullable = add.isNullable)
+                  val newParentType = Option(add.comment) match {
+                    case Some(comment) =>
+                      parentType.add(field.withComment(comment))
+                    case None =>
+                      parentType.add(field)
+                  }
+
+                  Some(StructField(parent.name, newParentType, parent.nullable, parent.metadata))
+
+                case _ =>
+                  throw new IllegalArgumentException(s"Not a struct: ${names.init.last}")
+              })
+          }
+
+        case rename: RenameColumn =>
+          replace(schema, rename.fieldNames, field =>
+            Some(StructField(rename.newName, field.dataType, field.nullable, field.metadata)))
+
+        case update: UpdateColumnType =>
+          replace(schema, update.fieldNames, field => {
+            if (!update.isNullable && field.nullable) {
+              throw new IllegalArgumentException(
+                s"Cannot change optional column to required: $field.name")
+            }
+            Some(StructField(field.name, update.newDataType, update.isNullable, field.metadata))
+          })
+
+        case update: UpdateColumnComment =>
+          replace(schema, update.fieldNames, field =>
+            Some(field.withComment(update.newComment)))
+
+        case delete: DeleteColumn =>
+          replace(schema, delete.fieldNames, _ => None)
+
+        case _ =>
+          // ignore non-schema changes
+          schema
+      }
+    }
+  }
+
+  private def replace(
+      struct: StructType,
+      path: Seq[String],
+      update: StructField => Option[StructField]): StructType = {
+
+    val pos = struct.getFieldIndex(path.head)
+        .getOrElse(throw new IllegalArgumentException(s"Cannot find field: ${path.head}"))
+    val field = struct.fields(pos)
+    val replacement: Option[StructField] = if (path.tail.isEmpty) {
+      update(field)
+    } else {
+      field.dataType match {
+        case nestedStruct: StructType =>
+          val updatedType: StructType = replace(nestedStruct, path.tail, update)
+          Some(StructField(field.name, updatedType, field.nullable, field.metadata))
+        case _ =>
+          throw new IllegalArgumentException(s"Not a struct: ${path.head}")
+      }
+    }
+
+    val newFields = struct.fields.zipWithIndex.flatMap {
+      case (_, index) if pos == index =>
+        replacement
+      case (other, _) =>
+        Some(other)
+    }
+
+    new StructType(newFields)
+  }
+}
+
+case class InMemoryTable(
+    name: String,
+    schema: StructType,
+    override val properties: util.Map[String, String]) extends Table {
+  override def partitioning: Array[Transform] = Array.empty
+  override def capabilities: util.Set[TableCapability] = InMemoryTable.CAPABILITIES
+}
+
+object InMemoryTable {
+  val CAPABILITIES: util.Set[TableCapability] = Set.empty[TableCapability].asJava
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
index f16138e8d790..03d0c303dcc3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.sources.v2.TableProvider
 import org.apache.spark.sql.types.StructType
 
 case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport  {
-  import org.apache.spark.sql.catalog.v2.expressions.LogicalExpressions.TransformHelper
+  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.TransformHelper
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case CreateTableStatement(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index c0c57b862055..3b0cde5efbbd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -16,11 +16,14 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
+import java.util
+
 import scala.collection.JavaConverters._
 
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.sources.v2.TableCapability._
@@ -35,6 +38,8 @@ abstract class FileTable(
     userSpecifiedSchema: Option[StructType])
   extends Table with SupportsRead with SupportsWrite {
 
+  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+
   lazy val fileIndex: PartitioningAwareFileIndex = {
     val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
     // Hadoop Configurations are case sensitive.
@@ -82,7 +87,11 @@ abstract class FileTable(
     StructType(fields)
   }
 
-  override def capabilities(): java.util.Set[TableCapability] = FileTable.CAPABILITIES
+  override def partitioning: Array[Transform] = fileIndex.partitionSchema.asTransforms
+
+  override def properties: util.Map[String, String] = options.asCaseSensitiveMap
+
+  override def capabilities: java.util.Set[TableCapability] = FileTable.CAPABILITIES
 
   /**
    * When possible, this method should return the schema of the given `files`.  When the format
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
index dfbea927e477..391af5a306a1 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
@@ -20,6 +20,8 @@
 import java.io.IOException;
 import java.util.Arrays;
 
+import org.apache.spark.sql.catalog.v2.expressions.Expressions;
+import org.apache.spark.sql.catalog.v2.expressions.Transform;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
 import org.apache.spark.sql.sources.v2.Table;
@@ -56,6 +58,11 @@ public Partitioning outputPartitioning() {
   @Override
   public Table getTable(CaseInsensitiveStringMap options) {
     return new JavaSimpleBatchTable() {
+      @Override
+      public Transform[] partitioning() {
+        return new Transform[] { Expressions.identity("i") };
+      }
+
       @Override
       public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
         return new MyScanBuilder();

From 83f628b57da39ad9732d1393aebac373634a2eb9 Mon Sep 17 00:00:00 2001
From: Jose Torres <torres.joseph.f+github@gmail.com>
Date: Tue, 7 May 2019 20:04:09 -0700
Subject: [PATCH 1022/1072] [SPARK-27253][SQL][FOLLOW-UP] Add a legacy flag to
 restore old session init behavior

## What changes were proposed in this pull request?

Add a legacy flag to restore the old session init behavior, where SparkConf defaults take precedence over configs in a parent session.

Closes #24540 from jose-torres/oss.

Authored-by: Jose Torres <torres.joseph.f+github@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-migration-guide-upgrade.md                       | 2 +-
 .../org/apache/spark/sql/internal/StaticSQLConf.scala     | 7 +++++++
 .../spark/sql/internal/BaseSessionStateBuilder.scala      | 8 +++++++-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 5fe7c7c0d068..14ccc259ef47 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -122,7 +122,7 @@ license: |
 
   - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion uses the default time zone of the Java virtual machine.
 
-  - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`.
+  - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`. The old behavior can be restored by setting `spark.sql.legacy.sessionInitWithConfigDefaults` to `true`.
 
   - Since Spark 3.0, parquet logical type `TIMESTAMP_MICROS` is used by default while saving `TIMESTAMP` columns. In Spark version 2.4 and earlier, `TIMESTAMP` columns are saved as `INT96` in parquet files. To set `INT96` to `spark.sql.parquet.outputTimestampType` restores the previous behavior.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index e12f05bbc71d..b9d3f256533c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -138,4 +138,11 @@ object StaticSQLConf {
     .intConf
     .checkValue(_ >= 0, "Must be set greater or equal to zero")
     .createWithDefault(Int.MaxValue)
+
+  val SQL_LEGACY_SESSION_INIT_WITH_DEFAULTS =
+    buildStaticConf("spark.sql.legacy.sessionInitWithConfigDefaults")
+      .doc("Flag to revert to legacy behavior where a cloned SparkSession receives SparkConf " +
+        "defaults, dropping any overrides in its parent SparkSession.")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 0275df9f94fe..b0127535559c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -85,7 +85,13 @@ abstract class BaseSessionStateBuilder(
    * with its [[SparkConf]] only when there is no parent session.
    */
   protected lazy val conf: SQLConf = {
-    parentState.map(_.conf.clone()).getOrElse {
+    parentState.map { s =>
+      val cloned = s.conf.clone()
+      if (session.sparkContext.conf.get(StaticSQLConf.SQL_LEGACY_SESSION_INIT_WITH_DEFAULTS)) {
+        mergeSparkConf(cloned, session.sparkContext.conf)
+      }
+      cloned
+    }.getOrElse {
       val conf = new SQLConf
       mergeSparkConf(conf, session.sparkContext.conf)
       conf

From 8329e7debdaf6db9f3a52094bbc5dc4c1e2771ea Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Wed, 8 May 2019 11:43:03 +0800
Subject: [PATCH 1023/1072] [SPARK-27649][SS] Unify the way use
 'spark.network.timeout'

## What changes were proposed in this pull request?

For historical reasons, structured streaming still has some old way of use
`spark.network.timeout`
, even though
`org.apache.spark.internal.config.Network.NETWORK_TIMEOUT`
is now available.

## How was this patch tested?
Exists UT.

Closes #24545 from beliefer/unify-spark-network-timeout.

Authored-by: gengjiaan <gengjiaan@360.cn>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/kafka010/KafkaMicroBatchStream.scala    | 3 ++-
 .../scala/org/apache/spark/sql/kafka010/KafkaRelation.scala  | 5 ++---
 .../scala/org/apache/spark/sql/kafka010/KafkaSource.scala    | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 6972f391f285..76c7b5deb35e 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -26,6 +26,7 @@ import org.apache.kafka.clients.consumer.ConsumerConfig
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
@@ -64,7 +65,7 @@ private[kafka010] class KafkaMicroBatchStream(
 
   private val pollTimeoutMs = options.getLong(
     "kafkaConsumer.pollTimeoutMs",
-    SparkEnv.get.conf.getTimeAsSeconds("spark.network.timeout", "120s") * 1000L)
+    SparkEnv.get.conf.get(NETWORK_TIMEOUT) * 1000L)
 
   private val maxOffsetsPerTrigger = Option(options.get("maxOffsetsPerTrigger")).map(_.toLong)
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 9effa2959110..48cc089e77d5 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -22,6 +22,7 @@ import java.util.UUID
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
@@ -47,9 +48,7 @@ private[kafka010] class KafkaRelation(
 
   private val pollTimeoutMs = sourceOptions.getOrElse(
     "kafkaConsumer.pollTimeoutMs",
-    (sqlContext.sparkContext.conf.getTimeAsSeconds(
-      "spark.network.timeout",
-      "120s") * 1000L).toString
+    (sqlContext.sparkContext.conf.get(NETWORK_TIMEOUT) * 1000L).toString
   ).toLong
 
   override def schema: StructType = KafkaOffsetReader.kafkaSchema
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 82d746ef0fbd..fa93e8f8c461 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -27,6 +27,7 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
@@ -85,7 +86,7 @@ private[kafka010] class KafkaSource(
 
   private val pollTimeoutMs = sourceOptions.getOrElse(
     "kafkaConsumer.pollTimeoutMs",
-    (sc.conf.getTimeAsSeconds("spark.network.timeout", "120s") * 1000L).toString
+    (sc.conf.get(NETWORK_TIMEOUT) * 1000L).toString
   ).toLong
 
   private val maxOffsetsPerTrigger =

From 3ea44e52e79b5d02c30332bec50c805c6d56ea76 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 7 May 2019 21:00:13 -0700
Subject: [PATCH 1024/1072] [SPARK-27639][SQL] InMemoryTableScan shows the
 table name on UI if possible

## What changes were proposed in this pull request?
<img src="https://user-images.githubusercontent.com/5399861/57213799-7bccf100-701a-11e9-9872-d90b4a185dc6.png" width="200">

It only shows `InMemoryTableScan` when scanning InMemoryTable.
When there are many InMemoryTables, it is difficult to distinguish which one is what we are looking for. This PR show the table name when scanning InMemoryTable.

## How was this patch tested?

unit tests and manual tests

After this PR:
<img src="https://user-images.githubusercontent.com/5399861/57269120-d3219e80-70b8-11e9-9e56-1b5d4c071660.png" width="200">

Closes #24534 from wangyum/SPARK-27639.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/execution/WholeStageCodegenExec.scala |  2 ++
 .../execution/columnar/InMemoryRelation.scala |  7 +++---
 .../columnar/InMemoryTableScanExec.scala      |  9 ++++++++
 .../execution/metric/SQLMetricsSuite.scala    | 22 +++++++++++++++++++
 .../HiveThriftServer2Suites.scala             |  4 ++--
 5 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 027885b1077d..99dcca8b3310 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.internal.SQLConf
@@ -50,6 +51,7 @@ trait CodegenSupport extends SparkPlan {
     case _: SortMergeJoinExec => "smj"
     case _: RDDScanExec => "rdd"
     case _: DataSourceScanExec => "scan"
+    case _: InMemoryTableScanExec => "memoryScan"
     case _ => nodeName.toLowerCase(Locale.ROOT)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 3edfd8fc8c81..16a5094fa8d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -55,6 +55,9 @@ case class CachedRDDBuilder(
   val sizeInBytesStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator
   val rowCountStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator
 
+  val cachedName = tableName.map(n => s"In-memory table $n")
+    .getOrElse(StringUtils.abbreviate(cachedPlan.toString, 1024))
+
   def cachedColumnBuffers: RDD[CachedBatch] = {
     if (_cachedColumnBuffers == null) {
       synchronized {
@@ -130,9 +133,7 @@ case class CachedRDDBuilder(
       }
     }.persist(storageLevel)
 
-    cached.setName(
-      tableName.map(n => s"In-memory table $n")
-        .getOrElse(StringUtils.abbreviate(cachedPlan.toString, 1024)))
+    cached.setName(cachedName)
     cached
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index b827878f82fc..06634c13ec43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -36,6 +36,15 @@ case class InMemoryTableScanExec(
     @transient relation: InMemoryRelation)
   extends LeafExecNode with ColumnarBatchScan {
 
+  override val nodeName: String = {
+    relation.cacheBuilder.tableName match {
+      case Some(_) =>
+        "Scan " + relation.cacheBuilder.cachedName
+      case _ =>
+        super.nodeName
+    }
+  }
+
   override protected def innerChildren: Seq[QueryPlan[_]] = Seq(relation) ++ super.innerChildren
 
   override def doCanonicalize(): SparkPlan =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index b77048aba779..a665fe6f987a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -574,4 +574,26 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       )
     }
   }
+
+  test("InMemoryTableScan shows the table name on UI if possible") {
+    // Show table name on UI
+    withView("inMemoryTable", "```a``b```") {
+      sql("CREATE TEMPORARY VIEW inMemoryTable AS SELECT 1 AS c1")
+      sql("CACHE TABLE inMemoryTable")
+      testSparkPlanMetrics(spark.table("inMemoryTable"), 1,
+        Map(0L -> (("Scan In-memory table `inMemoryTable`", Map.empty)))
+      )
+
+      sql("CREATE TEMPORARY VIEW ```a``b``` AS SELECT 2 AS c1")
+      sql("CACHE TABLE ```a``b```")
+      testSparkPlanMetrics(spark.table("```a``b```"), 1,
+        Map(0L -> (("Scan In-memory table ```a``b```", Map.empty)))
+      )
+    }
+
+    // Show InMemoryTableScan on UI
+    testSparkPlanMetrics(spark.range(1).cache().select("id"), 1,
+      Map(0L -> (("InMemoryTableScan", Map.empty)))
+    )
+  }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 0f53fcd327f1..ef7c50002324 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -295,7 +295,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
         val plan = statement.executeQuery("explain select * from test_table")
         plan.next()
         plan.next()
-        assert(plan.getString(1).contains("InMemoryTableScan"))
+        assert(plan.getString(1).contains("Scan In-memory table `test_table`"))
 
         val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC")
         val buf1 = new collection.mutable.ArrayBuffer[Int]()
@@ -381,7 +381,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
         val plan = statement.executeQuery("explain select key from test_map ORDER BY key DESC")
         plan.next()
         plan.next()
-        assert(plan.getString(1).contains("InMemoryTableScan"))
+        assert(plan.getString(1).contains("Scan In-memory table `test_table`"))
 
         val rs = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
         val buf = new collection.mutable.ArrayBuffer[Int]()

From e63fbfcf206b8c98396668736fd880fb6787a4f2 Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Wed, 8 May 2019 13:47:26 +0900
Subject: [PATCH 1025/1072] [SPARK-25139][SPARK-18406][CORE] Avoid NonFatals to
 kill the Executor in PythonRunner

## What changes were proposed in this pull request?

Python uses a prefetch approach to read the result from upstream and serve them in another thread, thus it's possible that if the children operator doesn't consume all the data then the Task cleanup may happen before Python side read process finishes, this in turn create a race condition that the block read locks are freed during Task cleanup and then the reader try to release the read lock it holds and find it has been released, in this case we shall hit a AssertionError.

We shall catch the AssertionError in PythonRunner and prevent this kill the Executor.

## How was this patch tested?

Hard to write a unit test case for this case, manually verified with failed job.

Closes #24542 from jiangxb1987/pyError.

Authored-by: Xingbo Jiang <xingbo.jiang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/api/python/PythonRDD.scala   |  2 +-
 .../spark/api/python/PythonRunner.scala       | 34 +++++++++++--------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index bc843827acaf..fe25c3aac81b 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -86,7 +86,7 @@ private[spark] case class PythonFunction(
 private[spark] case class ChainedPythonFunctions(funcs: Seq[PythonFunction])
 
 /** Thrown for exceptions in user Python code. */
-private[spark] class PythonException(msg: String, cause: Exception)
+private[spark] class PythonException(msg: String, cause: Throwable)
   extends RuntimeException(msg, cause)
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index fabd6fdf4d26..dca87044513c 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -24,6 +24,7 @@ import java.nio.charset.StandardCharsets.UTF_8
 import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
@@ -165,15 +166,15 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       context: TaskContext)
     extends Thread(s"stdout writer for $pythonExec") {
 
-    @volatile private var _exception: Exception = null
+    @volatile private var _exception: Throwable = null
 
     private val pythonIncludes = funcs.flatMap(_.funcs.flatMap(_.pythonIncludes.asScala)).toSet
     private val broadcastVars = funcs.flatMap(_.funcs.flatMap(_.broadcastVars.asScala))
 
     setDaemon(true)
 
-    /** Contains the exception thrown while writing the parent iterator to the Python process. */
-    def exception: Option[Exception] = Option(_exception)
+    /** Contains the throwable thrown while writing the parent iterator to the Python process. */
+    def exception: Option[Throwable] = Option(_exception)
 
     /** Terminates the writer thread, ignoring any exceptions that may occur due to cleanup. */
     def shutdownOnTaskCompletion() {
@@ -347,18 +348,21 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
         dataOut.writeInt(SpecialLengths.END_OF_STREAM)
         dataOut.flush()
       } catch {
-        case e: Exception if context.isCompleted || context.isInterrupted =>
-          logDebug("Exception thrown after task completion (likely due to cleanup)", e)
-          if (!worker.isClosed) {
-            Utils.tryLog(worker.shutdownOutput())
-          }
-
-        case e: Exception =>
-          // We must avoid throwing exceptions here, because the thread uncaught exception handler
-          // will kill the whole executor (see org.apache.spark.executor.Executor).
-          _exception = e
-          if (!worker.isClosed) {
-            Utils.tryLog(worker.shutdownOutput())
+        case t: Throwable if (NonFatal(t) || t.isInstanceOf[Exception]) =>
+          if (context.isCompleted || context.isInterrupted) {
+            logDebug("Exception/NonFatal Error thrown after task completion (likely due to " +
+              "cleanup)", t)
+            if (!worker.isClosed) {
+              Utils.tryLog(worker.shutdownOutput())
+            }
+          } else {
+            // We must avoid throwing exceptions/NonFatals here, because the thread uncaught
+            // exception handler will kill the whole executor (see
+            // org.apache.spark.executor.Executor).
+            _exception = t
+            if (!worker.isClosed) {
+              Utils.tryLog(worker.shutdownOutput())
+            }
           }
       }
     }

From bae5baae5281d01dc8c67077b90592be857329bd Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 7 May 2019 23:03:15 -0700
Subject: [PATCH 1026/1072] [SPARK-27642][SS] make v1 offset extends v2 offset

## What changes were proposed in this pull request?

To move DS v2 to the catalyst module, we can't make v2 offset rely on v1 offset, as v1 offset is in sql/core.

## How was this patch tested?

existing tests

Closes #24538 from cloud-fan/offset.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/kafka010/KafkaContinuousStream.scala  |  2 +-
 .../sql/kafka010/KafkaSourceOffset.scala      |  4 +-
 .../spark/sql/execution/streaming/Offset.java | 42 ++-----------------
 .../sources/v2/reader/streaming/Offset.java   | 11 ++---
 .../sql/execution/streaming/LongOffset.scala  | 14 +------
 .../streaming/MicroBatchExecution.scala       | 10 ++---
 .../sql/execution/streaming/OffsetSeq.scala   |  9 ++--
 .../execution/streaming/OffsetSeqLog.scala    |  3 +-
 .../execution/streaming/StreamExecution.scala |  4 +-
 .../execution/streaming/StreamProgress.scala  | 19 +++++----
 .../sql/execution/streaming/memory.scala      | 25 ++++-------
 .../sources/TextSocketMicroBatchStream.scala  |  5 +--
 .../spark/sql/streaming/StreamTest.scala      |  8 ++--
 13 files changed, 49 insertions(+), 107 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
index d60ee1cadd19..92686d24e2b8 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
@@ -76,7 +76,7 @@ class KafkaContinuousStream(
   }
 
   override def planInputPartitions(start: Offset): Array[InputPartition] = {
-    val oldStartPartitionOffsets = KafkaSourceOffset.getPartitionOffsets(start)
+    val oldStartPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
 
     val currentPartitionSet = offsetReader.fetchEarliestOffsets().keySet
     val newPartitions = currentPartitionSet.diff(oldStartPartitionOffsets.keySet)
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
index 8d41c0da2b13..90d70439c532 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
@@ -20,14 +20,14 @@ package org.apache.spark.sql.kafka010
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset}
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset}
+import org.apache.spark.sql.sources.v2.reader.streaming.PartitionOffset
 
 /**
  * An [[Offset]] for the [[KafkaSource]]. This one tracks all partitions of subscribed topics and
  * their offsets.
  */
 private[kafka010]
-case class KafkaSourceOffset(partitionToOffsets: Map[TopicPartition, Long]) extends OffsetV2 {
+case class KafkaSourceOffset(partitionToOffsets: Map[TopicPartition, Long]) extends Offset {
 
   override val json = JsonUtils.partitionOffsets(partitionToOffsets)
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
index 43ad4b3384ec..7c167dc01232 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
@@ -18,44 +18,10 @@
 package org.apache.spark.sql.execution.streaming;
 
 /**
- * This is an internal, deprecated interface. New source implementations should use the
- * org.apache.spark.sql.sources.v2.reader.streaming.Offset class, which is the one that will be
- * supported in the long term.
+ * This class is an alias of {@link org.apache.spark.sql.sources.v2.reader.streaming.Offset}. It's
+ * internal and deprecated. New streaming data source implementations should use data source v2 API,
+ * which will be supported in the long term.
  *
  * This class will be removed in a future release.
  */
-public abstract class Offset {
-    /**
-     * A JSON-serialized representation of an Offset that is
-     * used for saving offsets to the offset log.
-     * Note: We assume that equivalent/equal offsets serialize to
-     * identical JSON strings.
-     *
-     * @return JSON string encoding
-     */
-    public abstract String json();
-
-    /**
-     * Equality based on JSON string representation. We leverage the
-     * JSON representation for normalization between the Offset's
-     * in memory and on disk representations.
-     */
-    @Override
-    public boolean equals(Object obj) {
-        if (obj instanceof Offset) {
-            return this.json().equals(((Offset) obj).json());
-        } else {
-            return false;
-        }
-    }
-
-    @Override
-    public int hashCode() {
-        return this.json().hashCode();
-    }
-
-    @Override
-    public String toString() {
-        return this.json();
-    }
-}
+public abstract class Offset extends org.apache.spark.sql.sources.v2.reader.streaming.Offset {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
index a06671383ac5..1d34fdd1c28a 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
@@ -25,13 +25,9 @@
  * During execution, offsets provided by the data source implementation will be logged and used as
  * restart checkpoints. Each source should provide an offset implementation which the source can use
  * to reconstruct a position in the stream up to which data has been seen/processed.
- *
- * Note: This class currently extends {@link org.apache.spark.sql.execution.streaming.Offset} to
- * maintain compatibility with DataSource V1 APIs. This extension will be removed once we
- * get rid of V1 completely.
  */
 @Evolving
-public abstract class Offset extends org.apache.spark.sql.execution.streaming.Offset {
+public abstract class Offset {
     /**
      * A JSON-serialized representation of an Offset that is
      * used for saving offsets to the offset log.
@@ -49,9 +45,8 @@ public abstract class Offset extends org.apache.spark.sql.execution.streaming.Of
      */
     @Override
     public boolean equals(Object obj) {
-        if (obj instanceof org.apache.spark.sql.execution.streaming.Offset) {
-            return this.json()
-                .equals(((org.apache.spark.sql.execution.streaming.Offset) obj).json());
+        if (obj instanceof Offset) {
+            return this.json().equals(((Offset) obj).json());
         } else {
             return false;
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
index 3ff5b86ac45d..a27898cb0c9f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
@@ -17,12 +17,10 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
-
 /**
  * A simple offset for sources that produce a single linear stream of data.
  */
-case class LongOffset(offset: Long) extends OffsetV2 {
+case class LongOffset(offset: Long) extends Offset {
 
   override val json = offset.toString
 
@@ -37,14 +35,4 @@ object LongOffset {
    * @return new LongOffset
    */
   def apply(offset: SerializedOffset) : LongOffset = new LongOffset(offset.json.toLong)
-
-  /**
-   * Convert generic Offset to LongOffset if possible.
-   * @return converted LongOffset
-   */
-  def convert(offset: Offset): Option[LongOffset] = offset match {
-    case lo: LongOffset => Some(lo)
-    case so: SerializedOffset => Some(LongOffset(so))
-    case _ => None
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 58c265d0a850..7a3cdbc92644 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -296,7 +296,7 @@ class MicroBatchExecution(
                * batch will be executed before getOffset is called again. */
               availableOffsets.foreach {
                 case (source: Source, end: Offset) =>
-                  val start = committedOffsets.get(source)
+                  val start = committedOffsets.get(source).map(_.asInstanceOf[Offset])
                   source.getBatch(start, end)
                 case nonV1Tuple =>
                   // The V2 API does not have the same edge case requiring getBatch to be called
@@ -354,7 +354,7 @@ class MicroBatchExecution(
     if (isCurrentBatchConstructed) return true
 
     // Generate a map from each unique source to the next available offset.
-    val latestOffsets: Map[SparkDataStream, Option[Offset]] = uniqueSources.map {
+    val latestOffsets: Map[SparkDataStream, Option[OffsetV2]] = uniqueSources.map {
       case s: Source =>
         updateStatusMessage(s"Getting offsets from $s")
         reportTimeTaken("getOffset") {
@@ -411,7 +411,7 @@ class MicroBatchExecution(
           val prevBatchOff = offsetLog.get(currentBatchId - 1)
           if (prevBatchOff.isDefined) {
             prevBatchOff.get.toStreamProgress(sources).foreach {
-              case (src: Source, off) => src.commit(off)
+              case (src: Source, off: Offset) => src.commit(off)
               case (stream: MicroBatchStream, off) =>
                 stream.commit(stream.deserializeOffset(off.json))
               case (src, _) =>
@@ -448,9 +448,9 @@ class MicroBatchExecution(
     // Request unprocessed data from all sources.
     newData = reportTimeTaken("getBatch") {
       availableOffsets.flatMap {
-        case (source: Source, available)
+        case (source: Source, available: Offset)
           if committedOffsets.get(source).map(_ != available).getOrElse(true) =>
-          val current = committedOffsets.get(source)
+          val current = committedOffsets.get(source).map(_.asInstanceOf[Offset])
           val batch = source.getBatch(current, available)
           assert(batch.isStreaming,
             s"DataFrame returned by getBatch from $source did not have isStreaming=true\n" +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index 0f7ad7517e8f..b6fa2e9dc361 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -24,14 +24,15 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.RuntimeConfig
 import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, StreamingAggregationStateManager}
 import org.apache.spark.sql.internal.SQLConf.{FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, _}
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
+
 
 /**
  * An ordered collection of offsets, used to track the progress of processing data from one or more
  * [[Source]]s that are present in a streaming query. This is similar to simplified, single-instance
  * vector clock that must progress linearly forward.
  */
-case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[OffsetSeqMetadata] = None) {
+case class OffsetSeq(offsets: Seq[Option[OffsetV2]], metadata: Option[OffsetSeqMetadata] = None) {
 
   /**
    * Unpacks an offset into [[StreamProgress]] by associating each offset with the ordered list of
@@ -57,13 +58,13 @@ object OffsetSeq {
    * Returns a [[OffsetSeq]] with a variable sequence of offsets.
    * `nulls` in the sequence are converted to `None`s.
    */
-  def fill(offsets: Offset*): OffsetSeq = OffsetSeq.fill(None, offsets: _*)
+  def fill(offsets: OffsetV2*): OffsetSeq = OffsetSeq.fill(None, offsets: _*)
 
   /**
    * Returns a [[OffsetSeq]] with metadata and a variable sequence of offsets.
    * `nulls` in the sequence are converted to `None`s.
    */
-  def fill(metadata: Option[String], offsets: Offset*): OffsetSeq = {
+  def fill(metadata: Option[String], offsets: OffsetV2*): OffsetSeq = {
     OffsetSeq(offsets.map(Option(_)), metadata.map(OffsetSeqMetadata.apply))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
index 2c8d7c7b0f3c..8a05dade092c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
@@ -24,6 +24,7 @@ import java.nio.charset.StandardCharsets._
 import scala.io.{Source => IOSource}
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
 
 /**
  * This class is used to log offsets to persistent files in HDFS.
@@ -47,7 +48,7 @@ class OffsetSeqLog(sparkSession: SparkSession, path: String)
 
   override protected def deserialize(in: InputStream): OffsetSeq = {
     // called inside a try-finally where the underlying stream is closed in the caller
-    def parseOffset(value: String): Offset = value match {
+    def parseOffset(value: String): OffsetV2 = value match {
       case OffsetSeqLog.SERIALIZED_VOID_OFFSET => null
       case json => SerializedOffset(json)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 5d66b61ae711..4c08b3aa7866 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.sources.v2.writer.SupportsTruncate
 import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming._
@@ -438,7 +438,7 @@ abstract class StreamExecution(
    * Blocks the current thread until processing for data from the given `source` has reached at
    * least the given `Offset`. This method is intended for use primarily when writing tests.
    */
-  private[sql] def awaitOffset(sourceIndex: Int, newOffset: Offset, timeoutMs: Long): Unit = {
+  private[sql] def awaitOffset(sourceIndex: Int, newOffset: OffsetV2, timeoutMs: Long): Unit = {
     assertAwaitThread()
     def notDone = {
       val localCommittedOffsets = committedOffsets
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
index 8a1d064f49d1..8783eaa0e68b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -19,15 +19,16 @@ package org.apache.spark.sql.execution.streaming
 
 import scala.collection.{immutable, GenTraversableOnce}
 
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
+
 
 /**
  * A helper class that looks like a Map[Source, Offset].
  */
 class StreamProgress(
-    val baseMap: immutable.Map[SparkDataStream, Offset] =
-        new immutable.HashMap[SparkDataStream, Offset])
-  extends scala.collection.immutable.Map[SparkDataStream, Offset] {
+    val baseMap: immutable.Map[SparkDataStream, OffsetV2] =
+        new immutable.HashMap[SparkDataStream, OffsetV2])
+  extends scala.collection.immutable.Map[SparkDataStream, OffsetV2] {
 
   def toOffsetSeq(source: Seq[SparkDataStream], metadata: OffsetSeqMetadata): OffsetSeq = {
     OffsetSeq(source.map(get), Some(metadata))
@@ -36,17 +37,17 @@ class StreamProgress(
   override def toString: String =
     baseMap.map { case (k, v) => s"$k: $v"}.mkString("{", ",", "}")
 
-  override def +[B1 >: Offset](kv: (SparkDataStream, B1)): Map[SparkDataStream, B1] = {
+  override def +[B1 >: OffsetV2](kv: (SparkDataStream, B1)): Map[SparkDataStream, B1] = {
     baseMap + kv
   }
 
-  override def get(key: SparkDataStream): Option[Offset] = baseMap.get(key)
+  override def get(key: SparkDataStream): Option[OffsetV2] = baseMap.get(key)
 
-  override def iterator: Iterator[(SparkDataStream, Offset)] = baseMap.iterator
+  override def iterator: Iterator[(SparkDataStream, OffsetV2)] = baseMap.iterator
 
-  override def -(key: SparkDataStream): Map[SparkDataStream, Offset] = baseMap - key
+  override def -(key: SparkDataStream): Map[SparkDataStream, OffsetV2] = baseMap - key
 
-  def ++(updates: GenTraversableOnce[(SparkDataStream, Offset)]): StreamProgress = {
+  def ++(updates: GenTraversableOnce[(SparkDataStream, OffsetV2)]): StreamProgress = {
     new StreamProgress(baseMap ++ updates)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 022c8da0c074..df149552dfb3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -61,10 +61,12 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Spa
     Dataset.ofRows(sqlContext.sparkSession, logicalPlan)
   }
 
-  def addData(data: A*): Offset = {
+  def addData(data: A*): OffsetV2 = {
     addData(data.toTraversable)
   }
 
+  def addData(data: TraversableOnce[A]): OffsetV2
+
   def fullSchema(): StructType = encoder.schema
 
   protected val logicalPlan: LogicalPlan = {
@@ -77,8 +79,6 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Spa
       None)(sqlContext.sparkSession)
   }
 
-  def addData(data: TraversableOnce[A]): Offset
-
   override def initialOffset(): OffsetV2 = {
     throw new IllegalStateException("should not be called.")
   }
@@ -226,22 +226,15 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
   }
 
   override def commit(end: OffsetV2): Unit = synchronized {
-    def check(newOffset: LongOffset): Unit = {
-      val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt
+    val newOffset = end.asInstanceOf[LongOffset]
+    val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt
 
-      if (offsetDiff < 0) {
-        sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end")
-      }
-
-      batches.trimStart(offsetDiff)
-      lastOffsetCommitted = newOffset
+    if (offsetDiff < 0) {
+      sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end")
     }
 
-    LongOffset.convert(end) match {
-      case Some(lo) => check(lo)
-      case None => sys.error(s"MemoryStream.commit() received an offset ($end) " +
-        "that did not originate with an instance of this class")
-    }
+    batches.trimStart(offsetDiff)
+    lastOffsetCommitted = newOffset
   }
 
   override def stop() {}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
index dab64e10dec3..25e9af2bc292 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
@@ -150,10 +150,7 @@ class TextSocketMicroBatchStream(host: String, port: Int, numPartitions: Int)
     }
 
   override def commit(end: Offset): Unit = synchronized {
-    val newOffset = LongOffset.convert(end).getOrElse(
-      sys.error(s"TextSocketStream.commit() received an offset ($end) that did not " +
-        s"originate with an instance of this class")
-    )
+    val newOffset = end.asInstanceOf[LongOffset]
 
     val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 89c62ba9fc82..3a4414f6e6ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch}
 import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.execution.streaming.state.StateStore
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
+import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.{Clock, SystemClock, Utils}
@@ -124,7 +124,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
      * the active query, and then return the source object the data was added, as well as the
      * offset of added data.
      */
-    def addData(query: Option[StreamExecution]): (SparkDataStream, Offset)
+    def addData(query: Option[StreamExecution]): (SparkDataStream, OffsetV2)
   }
 
   /** A trait that can be extended when testing a source. */
@@ -135,7 +135,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
   case class AddDataMemory[A](source: MemoryStreamBase[A], data: Seq[A]) extends AddData {
     override def toString: String = s"AddData to $source: ${data.mkString(",")}"
 
-    override def addData(query: Option[StreamExecution]): (SparkDataStream, Offset) = {
+    override def addData(query: Option[StreamExecution]): (SparkDataStream, OffsetV2) = {
       (source, source.addData(data))
     }
   }
@@ -337,7 +337,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
     var pos = 0
     var currentStream: StreamExecution = null
     var lastStream: StreamExecution = null
-    val awaiting = new mutable.HashMap[Int, Offset]() // source index -> offset to wait for
+    val awaiting = new mutable.HashMap[Int, OffsetV2]() // source index -> offset to wait for
     val sink = new MemorySink
     val resetConfValues = mutable.Map[String, Option[String]]()
     val defaultCheckpointLocation =

From 09422f5139cc13abaf506453819c2bb91e174ae3 Mon Sep 17 00:00:00 2001
From: Asaf Levy <asaf400@gmail.com>
Date: Wed, 8 May 2019 23:45:05 +0900
Subject: [PATCH 1027/1072] [MINOR][DOCS] Fix invalid documentation for
 StreamingQueryManager Class

## What changes were proposed in this pull request?

When following the example for using `spark.streams().awaitAnyTermination()`
a valid pyspark code will output the following error:

```
Traceback (most recent call last):
  File "pyspark_app.py", line 182, in <module>
    spark.streams().awaitAnyTermination()
TypeError: 'StreamingQueryManager' object is not callable
```

Docs URL: https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#managing-streaming-queries

This changes the documentation line to properly call the method under the StreamingQueryManager Class
https://spark.apache.org/docs/latest/api/python/pyspark.sql.html#pyspark.sql.streaming.StreamingQueryManager

## How was this patch tested?

After changing the syntax, error no longer occurs and pyspark application works

This is only docs change

Closes #24547 from asaf400/patch-1.

Authored-by: Asaf Levy <asaf400@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/structured-streaming-programming-guide.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 2c4169d1b921..be30f6e7599e 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -2554,11 +2554,11 @@ spark.streams().awaitAnyTermination();   // block until any one of them terminat
 {% highlight python %}
 spark = ...  # spark session
 
-spark.streams().active  # get the list of currently active streaming queries
+spark.streams.active  # get the list of currently active streaming queries
 
-spark.streams().get(id)  # get a query object by its unique id
+spark.streams.get(id)  # get a query object by its unique id
 
-spark.streams().awaitAnyTermination()  # block until any one of them terminates
+spark.streams.awaitAnyTermination()  # block until any one of them terminates
 {% endhighlight %}
 
 </div>

From 57450ed9b738d81305da14e3d4a7b299f5d597ef Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Wed, 8 May 2019 11:01:26 -0700
Subject: [PATCH 1028/1072] [MINOR][SS] Rename `secondLatestBatchId` to
 `secondLatestOffsets`

## What changes were proposed in this pull request?

The mothod `populateStartOffsets` exists a inappropriate identifier `secondLatestBatchId`.
I think `secondLatestBatchId = latestBatchId - 1` and `offsetLog.get(latestBatchId - 1)` is a offset.
So I change the identifier as follows:
`secondLatestOffsets = offsetLog.get(latestBatchId - 1)`

## How was this patch tested?

Exists UT.

Closes #24550 from beliefer/fix-inappropriate-identifier.

Authored-by: gengjiaan <gengjiaan@360.cn>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/streaming/MicroBatchExecution.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 7a3cdbc92644..f0bf7e7041ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -268,10 +268,10 @@ class MicroBatchExecution(
         /* Initialize committed offsets to a committed batch, which at this
          * is the second latest batch id in the offset log. */
         if (latestBatchId != 0) {
-          val secondLatestBatchId = offsetLog.get(latestBatchId - 1).getOrElse {
+          val secondLatestOffsets = offsetLog.get(latestBatchId - 1).getOrElse {
             throw new IllegalStateException(s"batch ${latestBatchId - 1} doesn't exist")
           }
-          committedOffsets = secondLatestBatchId.toStreamProgress(sources)
+          committedOffsets = secondLatestOffsets.toStreamProgress(sources)
         }
 
         // update offset metadata

From 78a403fab92c263937e96a1673cf5edce7a8185d Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 9 May 2019 08:41:43 +0900
Subject: [PATCH 1029/1072] [SPARK-27627][SQL] Make option "pathGlobFilter" as
 a general option for all file sources

## What changes were proposed in this pull request?

### Background:
The data source option `pathGlobFilter` is introduced for Binary file format: https://github.com/apache/spark/pull/24354 , which can be used for filtering file names, e.g. reading `.png` files only while there is `.json` files in the same directory.

### Proposal:
Make the option `pathGlobFilter` as a general option for all file sources. The path filtering should happen in the path globbing on Driver.

### Motivation:
Filtering the file path names in file scan tasks on executors is kind of ugly.

### Impact:
1. The splitting of file partitions will be more balanced.
2. The metrics of file scan will be more accurate.
3. Users can use the option for reading other file sources.

## How was this patch tested?

Unit tests

Closes #24518 from gengliangwang/globFilter.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-data-sources-binaryFile.md           |  16 +---
 docs/sql-data-sources-load-save-functions.md  |  21 ++++++
 .../sql/JavaSQLDataSourceExample.java         |   5 ++
 examples/src/main/python/sql/datasource.py    |   5 ++
 examples/src/main/r/RSparkSQLExample.R        |   4 +
 .../do_not_read_this.txt                      |   1 +
 .../users.orc                                 | Bin 0 -> 448 bytes
 .../favorite_color=red/users.orc              | Bin 0 -> 402 bytes
 .../examples/sql/SQLDataSourceExample.scala   |   5 ++
 .../spark/sql/avro/AvroFileFormat.scala       |   4 +
 .../apache/spark/sql/avro/AvroOptions.scala   |   5 +-
 python/pyspark/sql/readwriter.py              |   6 ++
 python/pyspark/sql/streaming.py               |   6 ++
 .../apache/spark/sql/DataFrameReader.scala    |   9 +++
 .../execution/datasources/DataSource.scala    |   3 +-
 .../PartitioningAwareFileIndex.scala          |  11 ++-
 .../binaryfile/BinaryFileFormat.scala         |  70 +++++++-----------
 .../streaming/FileStreamSource.scala          |   4 +-
 .../streaming/MetadataLogFileIndex.scala      |   3 +-
 .../sql/streaming/DataStreamReader.scala      |   9 +++
 .../spark/sql/FileBasedDataSourceSuite.scala  |  32 ++++++++
 .../sql/streaming/FileStreamSourceSuite.scala |  19 +++++
 22 files changed, 173 insertions(+), 65 deletions(-)
 create mode 100644 examples/src/main/resources/partitioned_users.orc/do_not_read_this.txt
 create mode 100644 examples/src/main/resources/partitioned_users.orc/favorite_color=__HIVE_DEFAULT_PARTITION__/users.orc
 create mode 100644 examples/src/main/resources/partitioned_users.orc/favorite_color=red/users.orc

diff --git a/docs/sql-data-sources-binaryFile.md b/docs/sql-data-sources-binaryFile.md
index d861a24219be..0d41c9e441c6 100644
--- a/docs/sql-data-sources-binaryFile.md
+++ b/docs/sql-data-sources-binaryFile.md
@@ -28,21 +28,9 @@ It produces a DataFrame with the following columns and possibly partition column
 * `length`: LongType
 * `content`: BinaryType
 
-It supports the following read option:
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th></tr>
-  <tr>
-    <td><code>pathGlobFilter</code></td>
-    <td>none (accepts all)</td>
-    <td>
-    An optional glob pattern to only include files with paths matching the pattern.
-    The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
-    It does not change the behavior of partition discovery.
-    </td>
-  </tr>
-</table>
-
 To read whole binary files, you need to specify the data source `format` as `binaryFile`.
+To load files with paths matching a given glob pattern while keeping the behavior of partition discovery,
+you can use the general data source option `pathGlobFilter`.
 For example, the following code reads all PNG files from the input directory:
 
 <div class="codetabs">
diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
index a7efb9347ac6..07482137a28a 100644
--- a/docs/sql-data-sources-load-save-functions.md
+++ b/docs/sql-data-sources-load-save-functions.md
@@ -102,6 +102,27 @@ To load a CSV file you can use:
 </div>
 </div>
 
+To load files with paths matching a given glob pattern while keeping the behavior of partition discovery,
+you can use:
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% include_example load_with_path_glob_filter scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% include_example load_with_path_glob_filter java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% include_example load_with_path_glob_filter python/sql/datasource.py %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% include_example load_with_path_glob_filter r/RSparkSQLExample.R %}
+</div>
+</div>
+
 The extra options are also used during write operation.
 For example, you can control bloom filters and dictionary encodings for ORC data sources.
 The following ORC example will create bloom filter and use dictionary encoding only for `favorite_color`.
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
index cbe9dfdaa907..b2ce0bc08642 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
@@ -123,6 +123,11 @@ private static void runBasicDataSourceExample(SparkSession spark) {
       .option("header", "true")
       .load("examples/src/main/resources/people.csv");
     // $example off:manual_load_options_csv$
+    // $example on:load_with_path_glob_filter$
+    Dataset<Row> partitionedUsersDF = spark.read().format("orc")
+      .option("pathGlobFilter", "*.orc")
+      .load("examples/src/main/resources/partitioned_users.orc");
+    // $example off:load_with_path_glob_filter$
     // $example on:manual_save_options_orc$
     usersDF.write().format("orc")
       .option("orc.bloom.filter.columns", "favorite_color")
diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py
index 04660724b308..0d78097ea975 100644
--- a/examples/src/main/python/sql/datasource.py
+++ b/examples/src/main/python/sql/datasource.py
@@ -57,6 +57,11 @@ def basic_datasource_example(spark):
                          format="csv", sep=":", inferSchema="true", header="true")
     # $example off:manual_load_options_csv$
 
+    # $example on:load_with_path_glob_filter$
+    df = spark.read.load("examples/src/main/resources/partitioned_users.orc",
+                         format="orc", pathGlobFilter="*.orc")
+    # $example off:load_with_path_glob_filter$
+
     # $example on:manual_save_options_orc$
     df = spark.read.orc("examples/src/main/resources/users.orc")
     (df.write.format("orc")
diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index 196a110f351c..fa083d5542fa 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -118,6 +118,10 @@ df <- read.df("examples/src/main/resources/people.csv", "csv", sep = ";", inferS
 namesAndAges <- select(df, "name", "age")
 # $example off:manual_load_options_csv$
 
+# $example on:load_with_path_glob_filter$
+df <- read.df("examples/src/main/resources/partitioned_users.orc", "orc", pathGlobFilter = "*.orc")
+# $example off:load_with_path_glob_filter$
+
 # $example on:manual_save_options_orc$
 df <- read.df("examples/src/main/resources/users.orc", "orc")
 write.orc(df, "users_with_options.orc", orc.bloom.filter.columns = "favorite_color", orc.dictionary.key.threshold = 1.0, orc.column.encoding.direct = "name")
diff --git a/examples/src/main/resources/partitioned_users.orc/do_not_read_this.txt b/examples/src/main/resources/partitioned_users.orc/do_not_read_this.txt
new file mode 100644
index 000000000000..9c19f2a0449e
--- /dev/null
+++ b/examples/src/main/resources/partitioned_users.orc/do_not_read_this.txt
@@ -0,0 +1 @@
+do not read this
diff --git a/examples/src/main/resources/partitioned_users.orc/favorite_color=__HIVE_DEFAULT_PARTITION__/users.orc b/examples/src/main/resources/partitioned_users.orc/favorite_color=__HIVE_DEFAULT_PARTITION__/users.orc
new file mode 100644
index 0000000000000000000000000000000000000000..890395a9281abb71a8444a3c9041155fae6c0f9f
GIT binary patch
literal 448
zcmZ9I&q~8U5XNVBo9(zw?Ydr;SjFZbMIi*dg&ryutq3;s6fbI&s73pSL<`=EPtdpU
zMZ_oa0o<ettqU{X4EqiIW_tb327utIgd~JhQ#OPJ`BNH+s{}uKe#^30{i?P@*fOq)
zV7aH5Ixzj_qPMZ6IC9z4S0PjZ7Q<zL6SygIy8`gVxSh-a!n(-4peq(4(R(ISEiLRa
zt<4XSM6Vg8M!AJREewfd8Wlw_LH{BJXhk9r1&nZ3u&Y?lk)xX6#66C<SYIZL@QeU5
z`AHmM<Wg_%o;QiRgSMiCI;BxOOdNS1Psi!tCAl8G4sVk*b19Zx3xy)glx)8t^f-O+
z;z@k>n0VPFPM^G~@eRf!WgU9H=ldim;DS}SrJF{zz-(+ldGL?0J-_Gzeh^9ZY$ja_
PcC+R4_ix5}{f_<tkfB2j

literal 0
HcmV?d00001

diff --git a/examples/src/main/resources/partitioned_users.orc/favorite_color=red/users.orc b/examples/src/main/resources/partitioned_users.orc/favorite_color=red/users.orc
new file mode 100644
index 0000000000000000000000000000000000000000..150615a6f3b2401843a414932873bb41142b4363
GIT binary patch
literal 402
zcmZvW!AiqG5Qb-Vo9(a-xGoizR;0#D@em@tNW2t#(31&0^(IlG7HL{i+jH^c(Yue}
zOZZSefRm`b=)(Rp!~Fd7CF9X9fL69an8F#iUF|{jtR)P5Ezk49R^n6&UEa5}thTlP
zi07T}nFy?KkU|rH-Ic-z>k!}tAl=uBlmXZBh|u*|g)<(VO4sHM8gz=Q*dq|0Nzq8h
zaaAOV-=FcZP}g^L`!CzXuYyvTD7|GlK_F?~v7@;p&h{?)7#xsq1&U6i$Yy!R+-2)|
zIep5ni`DEVFPDL!X5f*wD7lr#huY1{`!HK%w-0%^Tx8{AxMtsUURZAsMqw0TTvNsW
iQpG=1@s?`%dUXl(hDNx}#smWV{vcA%pHe2{k^2I!Wi|=`

literal 0
HcmV?d00001

diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
index 18615d9b9b90..c7b6a50f0ae7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
@@ -56,6 +56,11 @@ object SQLDataSourceExample {
       .option("header", "true")
       .load("examples/src/main/resources/people.csv")
     // $example off:manual_load_options_csv$
+    // $example on:load_with_path_glob_filter$
+    val partitionedUsersDF = spark.read.format("orc")
+      .option("pathGlobFilter", "*.orc")
+      .load("examples/src/main/resources/partitioned_users.orc")
+    // $example off:load_with_path_glob_filter$
     // $example on:manual_save_options_orc$
     usersDF.write.format("orc")
       .option("orc.bloom.filter.columns", "favorite_color")
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
index c2a7f3175943..be8223ccc9df 100755
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
@@ -57,6 +57,10 @@ private[avro] class AvroFileFormat extends FileFormat
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
     val conf = spark.sessionState.newHadoopConf()
+    if (options.contains("ignoreExtension")) {
+      logWarning(s"Option ${AvroOptions.ignoreExtensionKey} is deprecated. Please use the " +
+        "general data source option pathGlobFilter for filtering file names.")
+    }
     val parsedOptions = new AvroOptions(options, conf)
 
     // User can specify an optional avro json schema.
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
index fec17bfff542..338244aa9e53 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
@@ -59,6 +59,7 @@ class AvroOptions(
    * If the option is not set, the Hadoop's config `avro.mapred.ignore.inputs.without.extension`
    * is taken into account. If the former one is not set too, file extensions are ignored.
    */
+  @deprecated("Use the general data source option pathGlobFilter for filtering file names", "3.0")
   val ignoreExtension: Boolean = {
     val ignoreFilesWithoutExtensionByDefault = false
     val ignoreFilesWithoutExtension = conf.getBoolean(
@@ -66,7 +67,7 @@ class AvroOptions(
       ignoreFilesWithoutExtensionByDefault)
 
     parameters
-      .get("ignoreExtension")
+      .get(AvroOptions.ignoreExtensionKey)
       .map(_.toBoolean)
       .getOrElse(!ignoreFilesWithoutExtension)
   }
@@ -93,4 +94,6 @@ object AvroOptions {
       .getOrElse(new Configuration())
     new AvroOptions(CaseInsensitiveMap(parameters), hadoopConf)
   }
+
+  val ignoreExtensionKey = "ignoreExtension"
 }
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 1353a0f6e910..6413d88d1dcf 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -120,6 +120,9 @@ def option(self, key, value):
             * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
                 in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
+            * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching
+                the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter.
+                It does not change the behavior of partition discovery.
         """
         self._jreader = self._jreader.option(key, to_str(value))
         return self
@@ -132,6 +135,9 @@ def options(self, **options):
             * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
                 in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
+            * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching
+                the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter.
+                It does not change the behavior of partition discovery.
         """
         for k in options:
             self._jreader = self._jreader.option(k, to_str(options[k]))
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index d15779bc4725..b100cd1acd36 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -341,6 +341,9 @@ def option(self, key, value):
             * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
                 in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
+            * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching
+                the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter.
+                It does not change the behavior of partition discovery.
 
         .. note:: Evolving.
 
@@ -357,6 +360,9 @@ def options(self, **options):
             * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
                 in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
+            * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching
+                the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter.
+                It does not change the behavior of partition discovery.
 
         .. note:: Evolving.
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 8460c7902e7d..dfc6d8ce96a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -98,6 +98,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV datasources or partition values.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 1.4.0
@@ -135,6 +138,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV datasources or partition values.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 1.4.0
@@ -151,6 +157,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV datasources or partition values.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 1.4.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 52f30acd4822..ef430f408b54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -348,7 +348,8 @@ case class DataSource(
             sparkSession.sessionState.newHadoopConf(),
             sparkSession.sessionState.conf) =>
         val basePath = new Path((caseInsensitiveOptions.get("path").toSeq ++ paths).head)
-        val fileCatalog = new MetadataLogFileIndex(sparkSession, basePath, userSpecifiedSchema)
+        val fileCatalog = new MetadataLogFileIndex(sparkSession, basePath,
+          caseInsensitiveOptions, userSpecifiedSchema)
         val dataSchema = userSpecifiedSchema.orElse {
           format.inferSchema(
             sparkSession,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 29b304a1e487..3c932555179f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -56,6 +56,12 @@ abstract class PartitioningAwareFileIndex(
 
   protected def leafDirToChildrenFiles: Map[Path, Array[FileStatus]]
 
+  protected lazy val pathGlobFilter = parameters.get("pathGlobFilter").map(new GlobFilter(_))
+
+  protected def matchGlobPattern(file: FileStatus): Boolean = {
+    pathGlobFilter.forall(_.accept(file.getPath))
+  }
+
   override def listFiles(
       partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[PartitionDirectory] = {
     def isNonEmptyFile(f: FileStatus): Boolean = {
@@ -69,7 +75,7 @@ abstract class PartitioningAwareFileIndex(
           val files: Seq[FileStatus] = leafDirToChildrenFiles.get(path) match {
             case Some(existingDir) =>
               // Directory has children files in it, return them
-              existingDir.filter(isNonEmptyFile)
+              existingDir.filter(f => matchGlobPattern(f) && isNonEmptyFile(f))
 
             case None =>
               // Directory does not exist, or has no children files
@@ -89,7 +95,7 @@ abstract class PartitioningAwareFileIndex(
   override def sizeInBytes: Long = allFiles().map(_.getLen).sum
 
   def allFiles(): Seq[FileStatus] = {
-    if (partitionSpec().partitionColumns.isEmpty) {
+    val files = if (partitionSpec().partitionColumns.isEmpty) {
       // For each of the root input paths, get the list of files inside them
       rootPaths.flatMap { path =>
         // Make the path qualified (consistent with listLeafFiles and bulkListLeafFiles).
@@ -118,6 +124,7 @@ abstract class PartitioningAwareFileIndex(
     } else {
       leafFiles.values.toSeq
     }
+    files.filter(matchGlobPattern)
   }
 
   protected def inferPartitioning(): PartitionSpec = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
index 263778455493..cdc7cd53c8b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -47,12 +47,10 @@ import org.apache.spark.util.SerializableConfiguration
  * {{{
  *   // Scala
  *   val df = spark.read.format("binaryFile")
- *     .option("pathGlobFilter", "*.png")
  *     .load("/path/to/fileDir")
  *
  *   // Java
  *   Dataset<Row> df = spark.read().format("binaryFile")
- *     .option("pathGlobFilter", "*.png")
  *     .load("/path/to/fileDir");
  * }}}
  */
@@ -98,44 +96,37 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
 
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
-    val binaryFileSourceOptions = new BinaryFileSourceOptions(options)
-    val pathGlobPattern = binaryFileSourceOptions.pathGlobFilter
     val filterFuncs = filters.map(filter => createFilterFunction(filter))
     val maxLength = sparkSession.conf.get(SOURCES_BINARY_FILE_MAX_LENGTH)
 
     file: PartitionedFile => {
       val path = new Path(file.filePath)
-      // TODO: Improve performance here: each file will recompile the glob pattern here.
-      if (pathGlobPattern.forall(new GlobFilter(_).accept(path))) {
-        val fs = path.getFileSystem(broadcastedHadoopConf.value.value)
-        val status = fs.getFileStatus(path)
-        if (filterFuncs.forall(_.apply(status))) {
-          val writer = new UnsafeRowWriter(requiredSchema.length)
-          writer.resetRowWriter()
-          requiredSchema.fieldNames.zipWithIndex.foreach {
-            case (PATH, i) => writer.write(i, UTF8String.fromString(status.getPath.toString))
-            case (LENGTH, i) => writer.write(i, status.getLen)
-            case (MODIFICATION_TIME, i) =>
-              writer.write(i, DateTimeUtils.fromMillis(status.getModificationTime))
-            case (CONTENT, i) =>
-              if (status.getLen > maxLength) {
-                throw new SparkException(
-                  s"The length of ${status.getPath} is ${status.getLen}, " +
-                    s"which exceeds the max length allowed: ${maxLength}.")
-              }
-              val stream = fs.open(status.getPath)
-              try {
-                writer.write(i, ByteStreams.toByteArray(stream))
-              } finally {
-                Closeables.close(stream, true)
-              }
-            case (other, _) =>
-              throw new RuntimeException(s"Unsupported field name: ${other}")
-          }
-          Iterator.single(writer.getRow)
-        } else {
-          Iterator.empty
+      val fs = path.getFileSystem(broadcastedHadoopConf.value.value)
+      val status = fs.getFileStatus(path)
+      if (filterFuncs.forall(_.apply(status))) {
+        val writer = new UnsafeRowWriter(requiredSchema.length)
+        writer.resetRowWriter()
+        requiredSchema.fieldNames.zipWithIndex.foreach {
+          case (PATH, i) => writer.write(i, UTF8String.fromString(status.getPath.toString))
+          case (LENGTH, i) => writer.write(i, status.getLen)
+          case (MODIFICATION_TIME, i) =>
+            writer.write(i, DateTimeUtils.fromMillis(status.getModificationTime))
+          case (CONTENT, i) =>
+            if (status.getLen > maxLength) {
+              throw new SparkException(
+                s"The length of ${status.getPath} is ${status.getLen}, " +
+                  s"which exceeds the max length allowed: ${maxLength}.")
+            }
+            val stream = fs.open(status.getPath)
+            try {
+              writer.write(i, ByteStreams.toByteArray(stream))
+            } finally {
+              Closeables.close(stream, true)
+            }
+          case (other, _) =>
+            throw new RuntimeException(s"Unsupported field name: ${other}")
         }
+        Iterator.single(writer.getRow)
       } else {
         Iterator.empty
       }
@@ -204,14 +195,3 @@ object BinaryFileFormat {
   }
 }
 
-class BinaryFileSourceOptions(
-    @transient private val parameters: CaseInsensitiveMap[String]) extends Serializable {
-
-  def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
-
-  /**
-   * An optional glob pattern to only include files with paths matching the pattern.
-   * The syntax follows [[org.apache.hadoop.fs.GlobFilter]].
-   */
-  val pathGlobFilter: Option[String] = parameters.get("pathGlobFilter")
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index cef814b5b6d2..67e26dc1a2db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.datasources.{DataSource, InMemoryFileIndex, LogicalRelation}
 import org.apache.spark.sql.types.StructType
 
@@ -195,7 +196,8 @@ class FileStreamSource(
   private def allFilesUsingMetadataLogFileIndex() = {
     // Note if `sourceHasMetadata` holds, then `qualifiedBasePath` is guaranteed to be a
     // non-glob path
-    new MetadataLogFileIndex(sparkSession, qualifiedBasePath, None).allFiles()
+    new MetadataLogFileIndex(sparkSession, qualifiedBasePath,
+      CaseInsensitiveMap(options), None).allFiles()
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
index 80eed7b27721..6eaccfb6d934 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
@@ -36,8 +36,9 @@ import org.apache.spark.sql.types.StructType
 class MetadataLogFileIndex(
     sparkSession: SparkSession,
     path: Path,
+    parameters: Map[String, String],
     userSpecifiedSchema: Option[StructType])
-  extends PartitioningAwareFileIndex(sparkSession, Map.empty, userSpecifiedSchema) {
+  extends PartitioningAwareFileIndex(sparkSession, parameters, userSpecifiedSchema) {
 
   private val metadataDirectory = {
     val metadataDir = new Path(path, FileStreamSink.metadataDir)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 01083a994e8a..bb536b6fee47 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -83,6 +83,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV datasources or partition values.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 2.0.0
@@ -120,6 +123,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV data sources or partition values.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 2.0.0
@@ -136,6 +142,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV data sources or partition values.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 2.0.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index c6fdf41ca7d9..3ab5bf26157a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -539,6 +539,38 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
     }
   }
 
+  test("Option pathGlobFilter: filter files correctly") {
+    withTempPath { path =>
+      val dataDir = path.getCanonicalPath
+      Seq("foo").toDS().write.text(dataDir)
+      Seq("bar").toDS().write.mode("append").orc(dataDir)
+      val df = spark.read.option("pathGlobFilter", "*.txt").text(dataDir)
+      checkAnswer(df, Row("foo"))
+
+      // Both glob pattern in option and path should be effective to filter files.
+      val df2 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*.orc")
+      checkAnswer(df2, Seq.empty)
+
+      val df3 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*xt")
+      checkAnswer(df3, Row("foo"))
+    }
+  }
+
+  test("Option pathGlobFilter: simple extension filtering should contains partition info") {
+    withTempPath { path =>
+      val input = Seq(("foo", 1), ("oof", 2)).toDF("a", "b")
+      input.write.partitionBy("b").text(path.getCanonicalPath)
+      Seq("bar").toDS().write.mode("append").orc(path.getCanonicalPath + "/b=1")
+
+      // If we use glob pattern in the path, the partition column won't be shown in the result.
+      val df = spark.read.text(path.getCanonicalPath + "/*/*.txt")
+      checkAnswer(df, input.select("a"))
+
+      val df2 = spark.read.option("pathGlobFilter", "*.txt").text(path.getCanonicalPath)
+      checkAnswer(df2, input)
+    }
+  }
+
   test("Return correct results when data columns overlap with partition columns") {
     Seq("parquet", "orc", "json").foreach { format =>
       withTempPath { path =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 4b0bab173ae9..2b8d77386925 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -483,6 +483,25 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     }
   }
 
+  test("Option pathGlobFilter") {
+    val testTableName = "FileStreamSourceTest"
+    withTable(testTableName) {
+      withTempPath { output =>
+        Seq("foo").toDS().write.text(output.getCanonicalPath)
+        Seq("bar").toDS().write.mode("append").orc(output.getCanonicalPath)
+        val df = spark.readStream.option("pathGlobFilter", "*.txt")
+          .format("text").load(output.getCanonicalPath)
+        val query = df.writeStream.format("memory").queryName(testTableName).start()
+        try {
+          query.processAllAvailable()
+          checkDatasetUnorderly(spark.table(testTableName).as[String], "foo")
+        } finally {
+          query.stop()
+        }
+      }
+    }
+  }
+
   test("read from textfile") {
     withTempDirs { case (src, tmp) =>
       val textStream = spark.readStream.textFile(src.getCanonicalPath)

From 0969d7aa0ca7c4b32f4753387db8ad67ac6764b0 Mon Sep 17 00:00:00 2001
From: pgandhi <pgandhi@verizonmedia.com>
Date: Thu, 9 May 2019 11:12:20 +0800
Subject: [PATCH 1030/1072] =?UTF-8?q?[SPARK-27207][SQL]=20:=20Ensure=20agg?=
 =?UTF-8?q?regate=20buffers=20are=20initialized=20again=20for=20So?=
 =?UTF-8?q?=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…rtBasedAggregate

Normally, the aggregate operations that are invoked for an aggregation buffer for User Defined Aggregate Functions(UDAF) follow the order like initialize(), update(), eval() OR initialize(), merge(), eval(). However, after a certain threshold configurable by spark.sql.objectHashAggregate.sortBased.fallbackThreshold is reached, ObjectHashAggregate falls back to SortBasedAggregator which invokes the merge or update operation without calling initialize() on the aggregate buffer.

## What changes were proposed in this pull request?

The fix here is to initialize aggregate buffers again when fallback to SortBasedAggregate operator happens.

## How was this patch tested?

The patch was tested as part of [SPARK-24935](https://issues.apache.org/jira/browse/SPARK-24935) as documented in PR https://github.com/apache/spark/pull/23778.

Closes #24149 from pgandhi999/SPARK-27207.

Authored-by: pgandhi <pgandhi@verizonmedia.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/aggregate/interfaces.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
index 56c2ee6b53fe..6fc20530f084 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -158,7 +158,7 @@ case class AggregateExpression(
  * ([[aggBufferAttributes]]) of an aggregation buffer which is used to hold partial aggregate
  * results. At runtime, multiple aggregate functions are evaluated by the same operator using a
  * combined aggregation buffer which concatenates the aggregation buffers of the individual
- * aggregate functions.
+ * aggregate functions. Please note that aggregate functions should be stateless.
  *
  * Code which accepts [[AggregateFunction]] instances should be prepared to handle both types of
  * aggregate functions.

From b5ffec12ebd08e6251fb7e918992a45a2b9e8089 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 9 May 2019 08:45:52 -0500
Subject: [PATCH 1031/1072] [SPARK-27563][FOLLOWUP] Fix to download new release
 from `dist.apache.org`

## What changes were proposed in this pull request?

`https://archive.apache.org/dist/spark/` does not have latest Spark 2.4.3:
<img src="https://user-images.githubusercontent.com/5399861/57288553-4264b600-70ec-11e9-8dcc-71b7589f5ad0.png" width="400">

This pr add `https://dist.apache.org/repos/dist/release/spark/` to mirrors list to download latest Spark.

https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/105202/testReport/org.apache.spark.sql.hive/HiveExternalCatalogVersionsSuite/_It_is_not_a_test_it_is_a_sbt_testing_SuiteSelector_/

## How was this patch tested?

manual tests:
```
build/sbt "hive/testOnly *.HiveExternalCatalogVersionsSuite"  -Phive
```

Closes #24544 from wangyum/Unable-to-download-Spark-2.4.3.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index ec10295358d5..f4c314cd6092 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -70,7 +70,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
           case _: Exception => None
         }
       }
-    val sites = mirrors.distinct :+ "https://archive.apache.org/dist"
+    val sites =
+      mirrors.distinct :+ "https://archive.apache.org/dist" :+ PROCESS_TABLES.releaseMirror
     logInfo(s"Trying to download Spark $version from $sites")
     for (site <- sites) {
       val filename = s"spark-$version-bin-hadoop2.7.tgz"
@@ -210,11 +211,12 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
 }
 
 object PROCESS_TABLES extends QueryTest with SQLTestUtils {
+  val releaseMirror = "https://dist.apache.org/repos/dist/release"
   // Tests the latest version of every release line.
   val testingVersions: Seq[String] = {
     import scala.io.Source
     try {
-      Source.fromURL("https://dist.apache.org/repos/dist/release/spark/").mkString
+      Source.fromURL(s"${releaseMirror}/spark").mkString
         .split("\n")
         .filter(_.contains("""<li><a href="spark-"""))
         .map("""<a href="spark-(\d.\d.\d)/">""".r.findFirstMatchIn(_).get.group(1))

From 9b3211a194b28cbc0c7c389857cc0a80a783f49e Mon Sep 17 00:00:00 2001
From: qb-tarushg <tarush.grover@quantumblack.com>
Date: Thu, 9 May 2019 08:47:05 -0500
Subject: [PATCH 1032/1072] [SPARK-27540][MLLIB] Add
 'meanAveragePrecision_at_k' metric to RankingMetrics

## What changes were proposed in this pull request?

Added method 'meanAveragePrecisionAt' k to RankingMetrics.

This branch is rebased with squashed commits from https://github.com/apache/spark/pull/24458

## How was this patch tested?

Added code in the existing test RankingMetricsSuite.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24543 from qb-tarushg/SPARK-27540-REBASE.

Authored-by: qb-tarushg <tarush.grover@quantumblack.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../mllib/JavaRankingMetricsExample.java      |  3 +
 .../mllib/RankingMetricsExample.scala         |  3 +
 .../mllib/evaluation/RankingMetrics.scala     | 61 ++++++++++++++-----
 .../evaluation/RankingMetricsSuite.scala      |  4 ++
 python/pyspark/mllib/evaluation.py            | 13 ++++
 5 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java
index 414d3763ddc5..de398de78055 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java
@@ -110,6 +110,9 @@ public static void main(String[] args) {
     // Mean average precision
     System.out.format("Mean average precision = %f\n", metrics.meanAveragePrecision());
 
+    //Mean average precision at k
+    System.out.format("Mean average precision at 2 = %f\n", metrics.meanAveragePrecisionAt(2));
+
     // Evaluate the model using numerical ratings and regression metrics
     JavaRDD<Tuple2<Object, Object>> userProducts =
         ratings.map(r -> new Tuple2<>(r.user(), r.product()));
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
index 34fbe0851dd9..ea13ec05e2fa 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
@@ -84,6 +84,9 @@ object RankingMetricsExample {
     // Mean average precision
     println(s"Mean average precision = ${metrics.meanAveragePrecision}")
 
+    // Mean average precision at k
+    println(s"Mean average precision at 2 = ${metrics.meanAveragePrecisionAt(2)}")
+
     // Normalized discounted cumulative gain
     Array(1, 3, 5).foreach { k =>
       println(s"NDCG at $k = ${metrics.ndcgAt(k)}")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index ff9663abcc92..15bda99848e2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -71,25 +71,54 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
   lazy val meanAveragePrecision: Double = {
     predictionAndLabels.map { case (pred, lab) =>
       val labSet = lab.toSet
+      val k = math.max(pred.length, labSet.size)
+      averagePrecision(pred, labSet, k)
+    }.mean()
+  }
 
-      if (labSet.nonEmpty) {
-        var i = 0
-        var cnt = 0
-        var precSum = 0.0
-        val n = pred.length
-        while (i < n) {
-          if (labSet.contains(pred(i))) {
-            cnt += 1
-            precSum += cnt.toDouble / (i + 1)
-          }
-          i += 1
+  /**
+   * Returns the mean average precision (MAP) at ranking position k of all the queries.
+   * If a query has an empty ground truth set, the average precision will be zero and a log
+   * warning is generated.
+   * @param k the position to compute the truncated precision, must be positive
+   * @return the mean average precision at first k ranking positions
+   */
+  @Since("3.0.0")
+  def meanAveragePrecisionAt(k: Int): Double = {
+    require(k > 0, "ranking position k should be positive")
+    predictionAndLabels.map { case (pred, lab) =>
+      averagePrecision(pred, lab.toSet, k)
+    }.mean()
+  }
+
+  /**
+   * Computes the average precision at first k ranking positions of all the queries.
+   * If a query has an empty ground truth set, the value will be zero and a log
+   * warning is generated.
+   *
+   * @param pred predicted ranking
+   * @param lab ground truth
+   * @param k use the top k predicted ranking, must be positive
+   * @return average precision at first k ranking positions
+   */
+  private def averagePrecision(pred: Array[T], lab: Set[T], k: Int): Double = {
+    if (lab.nonEmpty) {
+      var i = 0
+      var cnt = 0
+      var precSum = 0.0
+      val n = math.min(k, pred.length)
+      while (i < n) {
+        if (lab.contains(pred(i))) {
+          cnt += 1
+          precSum += cnt.toDouble / (i + 1)
         }
-        precSum / labSet.size
-      } else {
-        logWarning("Empty ground truth set, check input data")
-        0.0
+        i += 1
       }
-    }.mean()
+      precSum / math.min(lab.size, k)
+    } else {
+      logWarning("Empty ground truth set, check input data")
+      0.0
+    }
   }
 
   /**
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
index 1969098a51c5..489eb15f4dba 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
@@ -45,6 +45,10 @@ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
 
     assert(map ~== 0.355026 absTol eps)
 
+    assert(metrics.meanAveragePrecisionAt(1) ~== 0.333334 absTol eps)
+    assert(metrics.meanAveragePrecisionAt(2) ~== 0.25 absTol eps)
+    assert(metrics.meanAveragePrecisionAt(3) ~== 0.24074 absTol eps)
+
     assert(metrics.ndcgAt(3) ~== 1.0/3 absTol eps)
     assert(metrics.ndcgAt(5) ~== 0.328788 absTol eps)
     assert(metrics.ndcgAt(10) ~== 0.487913 absTol eps)
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 30032b31982b..30df4a1846d0 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -377,6 +377,10 @@ class RankingMetrics(JavaModelWrapper):
     0.17...
     >>> metrics.meanAveragePrecision
     0.35...
+    >>> metrics.meanAveragePrecisionAt(1)
+    0.3333333333333333...
+    >>> metrics.meanAveragePrecisionAt(2)
+    0.25...
     >>> metrics.ndcgAt(3)
     0.33...
     >>> metrics.ndcgAt(10)
@@ -423,6 +427,15 @@ def meanAveragePrecision(self):
         """
         return self.call("meanAveragePrecision")
 
+    @since('3.0.0')
+    def meanAveragePrecisionAt(self, k):
+        """
+        Returns the mean average precision (MAP) at first k ranking of all the queries.
+        If a query has an empty ground truth set, the average precision will be zero and
+        a log warining is generated.
+        """
+        return self.call("meanAveragePrecisionAt", int(k))
+
     @since('1.4.0')
     def ndcgAt(self, k):
         """

From fbb56f2b8fd5ffc976213e54c95014baafea6088 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Thu, 9 May 2019 09:27:31 -0500
Subject: [PATCH 1033/1072] [SPARK-27636][MLLIB] Remove cached RDD blocks after
 PIC execution

## What changes were proposed in this pull request?

Test steps to reproduce:

1) bin/spark-shell
```
val dataset = spark.createDataFrame(Seq(
                                    (0L, 1L, 1.0),
                                    (1L,2L,1.0),
                                    (3L, 4L,1.0),
                                    (4L,0L,0.1))).toDF("src", "dst", "weight")

val model = new PowerIterationClustering().
                           setMaxIter(10).
                           setInitMode("degree").
                          setWeightCol("weight")

val prediction = model.assignClusters(dataset).select("id", "cluster")
```

2) Open storage tab of the UI. We can see many RDD block cached, even after running the PIC.

In this PR, basically materializes the new graph before unpersisting the old ones.
## How was this patch tested?
Manually tested and existing UTs.
Before patch:
![Screenshot from 2019-05-06 02-53-45](https://user-images.githubusercontent.com/23054875/57201033-daf61b80-6fb0-11e9-97ff-7534909ce2d3.png)

After patch:
![Screenshot from 2019-05-06 03-41-04](https://user-images.githubusercontent.com/23054875/57201043-07aa3300-6fb1-11e9-855b-f63ee18ea371.png)

Closes #24531 from shahidki31/SPARK-27636.

Authored-by: Shahid <shahidki31@gmail.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../clustering/PowerIterationClustering.scala | 58 ++++++++++++++-----
 1 file changed, 43 insertions(+), 15 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index 48172f07216c..ba541bbcccd2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -183,6 +183,8 @@ class PowerIterationClustering private[clustering] (
       case "random" => randomInit(w)
       case "degree" => initDegreeVector(w)
     }
+    // Materialized the graph w0 in randomInit/initDegreeVector, hence we can unpersist w.
+    w.unpersist()
     pic(w0)
   }
 
@@ -205,6 +207,8 @@ class PowerIterationClustering private[clustering] (
       case "random" => randomInit(w)
       case "degree" => initDegreeVector(w)
     }
+    // Materialized the graph w0 in randomInit/initDegreeVector, hence we can unpersist w.
+    w.unpersist()
     pic(w0)
   }
 
@@ -226,11 +230,10 @@ class PowerIterationClustering private[clustering] (
    */
   private def pic(w: Graph[Double, Double]): PowerIterationClusteringModel = {
     val v = powerIter(w, maxIterations)
-    val assignments = kMeans(v, k).mapPartitions({ iter =>
-      iter.map { case (id, cluster) =>
-        Assignment(id, cluster)
-      }
-    }, preservesPartitioning = true)
+    val assignments = kMeans(v, k).map {
+      case (id, cluster) => Assignment(id, cluster)
+    }
+
     new PowerIterationClusteringModel(k, assignments)
   }
 }
@@ -296,12 +299,15 @@ object PowerIterationClustering extends Logging {
       },
       mergeMsg = _ + _,
       TripletFields.EdgeOnly)
-    Graph(vD, gA.edges)
-      .mapTriplets(
-        e => e.attr / math.max(e.srcAttr, MLUtils.EPSILON),
-        new TripletFields(/* useSrc */ true,
-                          /* useDst */ false,
-                          /* useEdge */ true))
+    val graph = Graph(vD, gA.edges).mapTriplets(
+      e => e.attr / math.max(e.srcAttr, MLUtils.EPSILON),
+      new TripletFields(/* useSrc */ true,
+        /* useDst */ false,
+        /* useEdge */ true))
+    materialize(graph)
+    gA.unpersist()
+
+    graph
   }
 
   /**
@@ -322,7 +328,10 @@ object PowerIterationClustering extends Logging {
       }, preservesPartitioning = true).cache()
     val sum = r.values.map(math.abs).sum()
     val v0 = r.mapValues(x => x / sum)
-    Graph(VertexRDD(v0), g.edges)
+    val graph = Graph(VertexRDD(v0), g.edges)
+    materialize(graph)
+    r.unpersist()
+    graph
   }
 
   /**
@@ -337,7 +346,9 @@ object PowerIterationClustering extends Logging {
   def initDegreeVector(g: Graph[Double, Double]): Graph[Double, Double] = {
     val sum = g.vertices.values.sum()
     val v0 = g.vertices.mapValues(_ / sum)
-    Graph(VertexRDD(v0), g.edges)
+    val graph = Graph(VertexRDD(v0), g.edges)
+    materialize(graph)
+    graph
   }
 
   /**
@@ -397,10 +408,17 @@ object PowerIterationClustering extends Logging {
             s" difference delta = ${diffDelta} and norm = ${norm}")
         }
       }
+      curG.vertices.unpersist()
+      curG.edges.unpersist()
       // update v
       curG = Graph(VertexRDD(v1), g.edges)
+      materialize(curG)
+      v.unpersist()
       prevDelta = delta
     }
+
+    curG.edges.unpersist()
+
     curG.vertices
   }
 
@@ -412,11 +430,21 @@ object PowerIterationClustering extends Logging {
    */
   private[clustering]
   def kMeans(v: VertexRDD[Double], k: Int): VertexRDD[Int] = {
-    val points = v.mapValues(x => Vectors.dense(x)).cache()
+    val points = v.mapValues(Vectors.dense(_)).cache()
     val model = new KMeans()
       .setK(k)
       .setSeed(0L)
       .run(points.values)
-    points.mapValues(p => model.predict(p)).cache()
+
+    val predict = points.mapValues(model.predict(_))
+    points.unpersist()
+    predict
+  }
+
+  /**
+   * Forces materialization of a Graph by iterating its RDDs.
+   */
+  private def materialize(g: Graph[_, _]): Unit = {
+    g.edges.foreachPartition(_ => {})
   }
 }

From cfe236f6957dd9ae799831d47914de34a03716aa Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Fri, 10 May 2019 17:55:23 +0900
Subject: [PATCH 1034/1072] [MINOR][DOCS] Make Spark's description consistent
 in docs with websites

We updated our website a long time ago to describe Spark as the unified analytics engine, which is also how Spark is described in the community now. But our README and docs page still use the same description from 2011 ... This patch updates them.

The patch also updates the README example to use more modern APIs, and refer to Structured Streaming rather than Spark Streaming.

Closes #24573 from rxin/consistent-message.

Authored-by: Reynold Xin <rxin@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 README.md        | 20 ++++++++++----------
 docs/index.md    |  4 ++--
 python/README.md |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 271f2f5f5b1c..482c00764380 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,18 @@
 # Apache Spark
 
-[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7)
-[![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
-[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site)
-
-Spark is a fast and general cluster computing system for Big Data. It provides
+Spark is a unified analytics engine for large-scale data processing. It provides
 high-level APIs in Scala, Java, Python, and R, and an optimized engine that
 supports general computation graphs for data analysis. It also supports a
 rich set of higher-level tools including Spark SQL for SQL and DataFrames,
 MLlib for machine learning, GraphX for graph processing,
-and Spark Streaming for stream processing.
+and Structured Streaming for stream processing.
 
 <http://spark.apache.org/>
 
+[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7)
+[![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
+[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site)
+
 
 ## Online Documentation
 
@@ -41,9 +41,9 @@ The easiest way to start using Spark is through the Scala shell:
 
     ./bin/spark-shell
 
-Try the following command, which should return 1000:
+Try the following command, which should return 1,000,000,000:
 
-    scala> sc.parallelize(1 to 1000).count()
+    scala> spark.range(1000 * 1000 * 1000).count()
 
 ## Interactive Python Shell
 
@@ -51,9 +51,9 @@ Alternatively, if you prefer Python, you can use the Python shell:
 
     ./bin/pyspark
 
-And run the following command, which should also return 1000:
+And run the following command, which should also return 1,000,000,000:
 
-    >>> sc.parallelize(range(1000)).count()
+    >>> spark.range(1000 * 1000 * 1000).count()
 
 ## Example Programs
 
diff --git a/docs/index.md b/docs/index.md
index 5adbebdb74a1..bec0feed225c 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -20,10 +20,10 @@ license: |
   limitations under the License.
 ---
 
-Apache Spark is a fast and general-purpose cluster computing system.
+Apache Spark is a unified analytics engine for large-scale data processing.
 It provides high-level APIs in Java, Scala, Python and R,
 and an optimized engine that supports general execution graphs.
-It also supports a rich set of higher-level tools including [Spark SQL](sql-programming-guide.html) for SQL and structured data processing, [MLlib](ml-guide.html) for machine learning, [GraphX](graphx-programming-guide.html) for graph processing, and [Spark Streaming](streaming-programming-guide.html).
+It also supports a rich set of higher-level tools including [Spark SQL](sql-programming-guide.html) for SQL and structured data processing, [MLlib](ml-guide.html) for machine learning, [GraphX](graphx-programming-guide.html) for graph processing, and [Structured Streaming](structured-streaming-programming-guide.html) for incremental computation and stream processing.
 
 # Security
 
diff --git a/python/README.md b/python/README.md
index ffb6147dbee8..d775b65d188a 100644
--- a/python/README.md
+++ b/python/README.md
@@ -1,11 +1,11 @@
 # Apache Spark
 
-Spark is a fast and general cluster computing system for Big Data. It provides
+Spark is a unified analytics engine for large-scale data processing. It provides
 high-level APIs in Scala, Java, Python, and R, and an optimized engine that
 supports general computation graphs for data analysis. It also supports a
 rich set of higher-level tools including Spark SQL for SQL and DataFrames,
 MLlib for machine learning, GraphX for graph processing,
-and Spark Streaming for stream processing.
+and Structured Streaming for stream processing.
 
 <http://spark.apache.org/>
 

From 78748b5752bb73d13e49ffb59efedda02f54f5a9 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Fri, 10 May 2019 22:48:36 +0800
Subject: [PATCH 1035/1072] [SPARK-27625][SQL] ScalaReflection support for
 annotated types

## What changes were proposed in this pull request?

If a type is annotated, `ScalaReflection` can fail if the datatype is an `Option`, a `Seq`, a `Map` and other similar types. This is because it assumes we are dealing with `TypeRef`, while types with annotations are `AnnotatedType`.

The PR deals with the case the annotation is present.

## How was this patch tested?

added UT

Closes #24564 from mgaido91/SPARK-27625.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/ScalaReflection.scala       | 14 ++++++++++----
 .../spark/sql/catalyst/ScalaReflectionSuite.scala  | 11 +++++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index fa8993e8d24c..2a4e84241a94 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -142,6 +142,13 @@ object ScalaReflection extends ScalaReflection {
     case _ => false
   }
 
+  private def baseType(tpe: `Type`): `Type` = {
+    tpe.dealias match {
+      case annotatedType: AnnotatedType => annotatedType.underlying
+      case other => other
+    }
+  }
+
   /**
    * Returns an expression that can be used to deserialize a Spark SQL representation to an object
    * of type `T` with a compatible schema. The Spark SQL representation is located at ordinal 0 of
@@ -174,7 +181,7 @@ object ScalaReflection extends ScalaReflection {
       tpe: `Type`,
       path: Expression,
       walkedTypePath: WalkedTypePath): Expression = cleanUpReflectionObjects {
-    tpe.dealias match {
+    baseType(tpe) match {
       case t if !dataTypeFor(t).isInstanceOf[ObjectType] => path
 
       case t if isSubtype(t, localTypeOf[Option[_]]) =>
@@ -423,8 +430,7 @@ object ScalaReflection extends ScalaReflection {
           createSerializerForGenericArray(input, dt, nullable = schemaFor(elementType).nullable)
       }
     }
-
-    tpe.dealias match {
+    baseType(tpe) match {
       case _ if !inputObject.dataType.isInstanceOf[ObjectType] => inputObject
 
       case t if isSubtype(t, localTypeOf[Option[_]]) =>
@@ -625,7 +631,7 @@ object ScalaReflection extends ScalaReflection {
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor(tpe: `Type`): Schema = cleanUpReflectionObjects {
-    tpe.dealias match {
+    baseType(tpe) match {
       // this must be the first case, since all objects in scala are instances of Null, therefore
       // Null type would wrongly match the first of them, which is Option as of now
       case t if isSubtype(t, definitions.NullTpe) => Schema(NullType, nullable = true)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index e8df031a1a32..a2e58c3eaa0b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -81,6 +81,10 @@ case class MultipleConstructorsData(a: Int, b: String, c: Double) {
   def this(b: String, a: Int) = this(a, b, c = 1.0)
 }
 
+class FooAnnotation extends scala.annotation.StaticAnnotation
+
+case class FooWithAnnotation(f1: String @FooAnnotation, f2: Option[String] @FooAnnotation)
+
 case class SpecialCharAsFieldData(`field.1`: String, `field 2`: String)
 
 object TestingUDT {
@@ -399,4 +403,11 @@ class ScalaReflectionSuite extends SparkFunSuite {
       StructType(Seq(
         StructField("x", IntegerType, nullable = false))), nullable = true))
   }
+
+  test("SPARK-27625: annotated data types") {
+    assert(serializerFor[FooWithAnnotation].dataType == StructType(Seq(
+      StructField("f1", StringType),
+      StructField("f2", StringType))))
+    assert(deserializerFor[FooWithAnnotation].dataType == ObjectType(classOf[FooWithAnnotation]))
+  }
 }

From 80de449f51e6ce99854aaf6b227e14f673aa0cb5 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Fri, 10 May 2019 23:08:02 +0800
Subject: [PATCH 1036/1072] [MINOR][TEST] Fix schema mismatch error

## What changes were proposed in this pull request?

- the accumulator warning is too verbose
- when a test fails with schema mismatch, you never see the error message / exception

Closes #24549 from ericl/test-nits.

Lead-authored-by: Eric Liang <ekl@databricks.com>
Co-authored-by: Eric Liang <ekhliang@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index d1842d329cc7..7dc9d44fabea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -268,7 +268,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
       assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") {
         output.sql
       }
-      assertResult(expected.schema, s"Schema did not match for query #$i\n${expected.sql}") {
+      assertResult(expected.schema,
+        s"Schema did not match for query #$i\n${expected.sql}: $output") {
         output.schema
       }
       assertResult(expected.output, s"Result did not match for query #$i\n${expected.sql}") {

From 3442fcaa9bbe2e9306ef33a655fb6d1fe75ceb47 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 10 May 2019 09:11:12 -0700
Subject: [PATCH 1037/1072] [SPARK-27672][SQL] Add `since` info to string
 expressions

## What changes were proposed in this pull request?

This PR adds since information to the all string expressions below:

SPARK-8241 ConcatWs
SPARK-16276 Elt
SPARK-1995 Upper / Lower
SPARK-20750 StringReplace
SPARK-8266 StringTranslate
SPARK-8244 FindInSet
SPARK-8253 StringTrimLeft
SPARK-8260 StringTrimRight
SPARK-8267 StringTrim
SPARK-8247 StringInstr
SPARK-8264 SubstringIndex
SPARK-8249 StringLocate
SPARK-8252 StringLPad
SPARK-8259 StringRPad
SPARK-16281 ParseUrl
SPARK-9154 FormatString
SPARK-8269 Initcap
SPARK-8257 StringRepeat
SPARK-8261 StringSpace
SPARK-8263 Substring
SPARK-21007 Right
SPARK-21007 Left
SPARK-8248 Length
SPARK-20749 BitLength
SPARK-20749 OctetLength
SPARK-8270 Levenshtein
SPARK-8271 SoundEx
SPARK-8238 Ascii
SPARK-20748 Chr
SPARK-8239 Base64
SPARK-8268 UnBase64
SPARK-8242 Decode
SPARK-8243 Encode
SPARK-8245 format_number
SPARK-16285 Sentences

## How was this patch tested?

N/A

Closes #24578 from HyukjinKwon/SPARK-27672.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/stringExpressions.scala       | 108 ++++++++++++------
 1 file changed, 72 insertions(+), 36 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index cd824ee87ca5..44d776f3ff54 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -50,7 +50,8 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
     Examples:
       > SELECT _FUNC_(' ', 'Spark', 'SQL');
         Spark SQL
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class ConcatWs(children: Seq[Expression])
   extends Expression with ImplicitCastInputTypes {
@@ -203,7 +204,8 @@ case class ConcatWs(children: Seq[Expression])
     Examples:
       > SELECT _FUNC_(1, 'scala', 'java');
        scala
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class Elt(children: Seq[Expression]) extends Expression {
 
@@ -326,7 +328,8 @@ trait String2StringExpression extends ImplicitCastInputTypes {
     Examples:
       > SELECT _FUNC_('SparkSql');
        SPARKSQL
-  """)
+  """,
+  since = "1.0.1")
 case class Upper(child: Expression)
   extends UnaryExpression with String2StringExpression {
 
@@ -348,7 +351,8 @@ case class Upper(child: Expression)
     Examples:
       > SELECT _FUNC_('SparkSql');
        sparksql
-  """)
+  """,
+  since = "1.0.1")
 case class Lower(child: Expression) extends UnaryExpression with String2StringExpression {
 
   // scalastyle:off caselocale
@@ -421,7 +425,8 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate
     Examples:
       > SELECT _FUNC_('ABCabc', 'abc', 'DEF');
        ABCDEF
-  """)
+  """,
+  since = "2.3.0")
 // scalastyle:on line.size.limit
 case class StringReplace(srcExpr: Expression, searchExpr: Expression, replaceExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
@@ -479,7 +484,8 @@ object StringTranslate {
     Examples:
       > SELECT _FUNC_('AaBbCc', 'abc', '123');
        A1B2C3
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
@@ -543,7 +549,8 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
     Examples:
       > SELECT _FUNC_('ab','abc,b,ab,c,def');
        3
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class FindInSet(left: Expression, right: Expression) extends BinaryExpression
     with ImplicitCastInputTypes {
@@ -625,7 +632,8 @@ object StringTrim {
        parkSQLS
       > SELECT _FUNC_(TRAILING 'SL' FROM 'SSparkSQLS');
        SSparkSQ
-  """)
+  """,
+  since = "1.5.0")
 case class StringTrim(
     srcStr: Expression,
     trimStr: Option[Expression] = None)
@@ -724,7 +732,8 @@ object StringTrimLeft {
        SparkSQL
       > SELECT _FUNC_('Sp', 'SSparkSQLS');
        arkSQLS
-  """)
+  """,
+  since = "1.5.0")
 case class StringTrimLeft(
     srcStr: Expression,
     trimStr: Option[Expression] = None)
@@ -825,7 +834,8 @@ object StringTrimRight {
        SparkSQL
       > SELECT _FUNC_('LQSa', 'SSparkSQLS');
        SSpark
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class StringTrimRight(
     srcStr: Expression,
@@ -905,7 +915,8 @@ case class StringTrimRight(
     Examples:
       > SELECT _FUNC_('SparkSQL', 'SQL');
        6
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class StringInstr(str: Expression, substr: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
@@ -946,7 +957,8 @@ case class StringInstr(str: Expression, substr: Expression)
     Examples:
       > SELECT _FUNC_('www.apache.org', '.', 2);
        www.apache
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression)
  extends TernaryExpression with ImplicitCastInputTypes {
@@ -985,7 +997,8 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr:
        7
       > SELECT POSITION('bar' IN 'foobarbar');
        4
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class StringLocate(substr: Expression, str: Expression, start: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
@@ -1070,7 +1083,8 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
        ???hi
       > SELECT _FUNC_('hi', 1, '??');
        h
-  """)
+  """,
+  since = "1.5.0")
 case class StringLPad(str: Expression, len: Expression, pad: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
 
@@ -1103,7 +1117,8 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression)
        hi???
       > SELECT _FUNC_('hi', 1, '??');
        h
-  """)
+  """,
+  since = "1.5.0")
 case class StringRPad(str: Expression, len: Expression, pad: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
 
@@ -1148,7 +1163,8 @@ object ParseUrl {
        query=1
       > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query')
        1
-  """)
+  """,
+  since = "2.0.0")
 case class ParseUrl(children: Seq[Expression])
   extends Expression with ExpectsInputTypes with CodegenFallback {
 
@@ -1299,7 +1315,8 @@ case class ParseUrl(children: Seq[Expression])
     Examples:
       > SELECT _FUNC_("Hello World %d %s", 100, "days");
        Hello World 100 days
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes {
 
@@ -1386,7 +1403,8 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
     Examples:
       > SELECT _FUNC_('sPark sql');
        Spark Sql
-  """)
+  """,
+  since = "1.5.0")
 case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[DataType] = Seq(StringType)
@@ -1411,7 +1429,8 @@ case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastI
     Examples:
       > SELECT _FUNC_('123', 2);
        123123
-  """)
+  """,
+  since = "1.5.0")
 case class StringRepeat(str: Expression, times: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -1440,7 +1459,8 @@ case class StringRepeat(str: Expression, times: Expression)
     Examples:
       > SELECT concat(_FUNC_(2), '1');
          1
-  """)
+  """,
+  since = "1.5.0")
 case class StringSpace(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes {
 
@@ -1477,7 +1497,8 @@ case class StringSpace(child: Expression)
        SQL
       > SELECT _FUNC_('Spark SQL', 5, 1);
        k
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class Substring(str: Expression, pos: Expression, len: Expression)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -1523,7 +1544,8 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
     Examples:
       > SELECT _FUNC_('Spark SQL', 3);
        SQL
-  """)
+  """,
+  since = "2.3.0")
 // scalastyle:on line.size.limit
 case class Right(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable {
   def this(str: Expression, len: Expression) = {
@@ -1545,7 +1567,8 @@ case class Right(str: Expression, len: Expression, child: Expression) extends Ru
     Examples:
       > SELECT _FUNC_('Spark SQL', 3);
        Spa
-  """)
+  """,
+  since = "2.3.0")
 // scalastyle:on line.size.limit
 case class Left(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable {
   def this(str: Expression, len: Expression) = {
@@ -1571,7 +1594,8 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run
        10
       > SELECT CHARACTER_LENGTH('Spark SQL ');
        10
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
   override def dataType: DataType = IntegerType
@@ -1599,7 +1623,8 @@ case class Length(child: Expression) extends UnaryExpression with ImplicitCastIn
     Examples:
       > SELECT _FUNC_('Spark SQL');
        72
-  """)
+  """,
+  since = "2.3.0")
 case class BitLength(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
@@ -1629,7 +1654,8 @@ case class BitLength(child: Expression) extends UnaryExpression with ImplicitCas
     Examples:
       > SELECT _FUNC_('Spark SQL');
        9
-  """)
+  """,
+  since = "2.3.0")
 case class OctetLength(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
@@ -1658,7 +1684,8 @@ case class OctetLength(child: Expression) extends UnaryExpression with ImplicitC
     Examples:
       > SELECT _FUNC_('kitten', 'sitting');
        3
-  """)
+  """,
+  since = "1.5.0")
 case class Levenshtein(left: Expression, right: Expression) extends BinaryExpression
     with ImplicitCastInputTypes {
 
@@ -1683,7 +1710,8 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres
     Examples:
       > SELECT _FUNC_('Miller');
        M460
-  """)
+  """,
+  since = "1.5.0")
 case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputTypes {
 
   override def dataType: DataType = StringType
@@ -1708,7 +1736,8 @@ case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputT
        50
       > SELECT _FUNC_(2);
        50
-  """)
+  """,
+  since = "1.5.0")
 case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = IntegerType
@@ -1748,7 +1777,8 @@ case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInp
     Examples:
       > SELECT _FUNC_(65);
        A
-  """)
+  """,
+  since = "2.3.0")
 // scalastyle:on line.size.limit
 case class Chr(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
@@ -1791,7 +1821,8 @@ case class Chr(child: Expression) extends UnaryExpression with ImplicitCastInput
     Examples:
       > SELECT _FUNC_('Spark SQL');
        U3BhcmsgU1FM
-  """)
+  """,
+  since = "1.5.0")
 case class Base64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = StringType
@@ -1820,7 +1851,8 @@ case class Base64(child: Expression) extends UnaryExpression with ImplicitCastIn
     Examples:
       > SELECT _FUNC_('U3BhcmsgU1FM');
        Spark SQL
-  """)
+  """,
+  since = "1.5.0")
 case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = BinaryType
@@ -1849,7 +1881,8 @@ case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCast
     Examples:
       > SELECT _FUNC_(encode('abc', 'utf-8'), 'utf-8');
        abc
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class Decode(bin: Expression, charset: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
@@ -1888,7 +1921,8 @@ case class Decode(bin: Expression, charset: Expression)
     Examples:
       > SELECT _FUNC_('abc', 'utf-8');
        abc
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class Encode(value: Expression, charset: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
@@ -1932,7 +1966,8 @@ case class Encode(value: Expression, charset: Expression)
        12,332.1235
       > SELECT _FUNC_(12332.123456, '##################.###');
        12332.123
-  """)
+  """,
+  since = "1.5.0")
 case class FormatNumber(x: Expression, d: Expression)
   extends BinaryExpression with ExpectsInputTypes {
 
@@ -2102,7 +2137,8 @@ case class FormatNumber(x: Expression, d: Expression)
     Examples:
       > SELECT _FUNC_('Hi there! Good morning.');
        [["Hi","there"],["Good","morning"]]
-  """)
+  """,
+  since = "2.0.0")
 case class Sentences(
     str: Expression,
     language: Expression = Literal(""),

From c71f217de1e0b2265f585369aa556ed26db98589 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 10 May 2019 09:24:04 -0700
Subject: [PATCH 1038/1072] [SPARK-27673][SQL] Add `since` info to random,
 regex, null expressions

## What changes were proposed in this pull request?

We should add since info to all expressions.

SPARK-7886 Rand / Randn
https://github.com/apache/spark/commit/af3746ce0d724dc624658a2187bde188ab26d084 RLike, Like (I manually checked that it exists from 1.0.0)
SPARK-8262 Split
SPARK-8256 RegExpReplace
SPARK-8255 RegExpExtract
https://github.com/apache/spark/commit/9aadcffabd226557174f3ff566927f873c71672e Coalesce / IsNull / IsNotNull (I manually checked that it exists from 1.0.0)
SPARK-14541 IfNull / NullIf / Nvl / Nvl2
SPARK-9080 IsNaN
SPARK-9168 NaNvl

## How was this patch tested?

N/A

Closes #24579 from HyukjinKwon/SPARK-27673.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/nullExpressions.scala         | 27 ++++++++++++-------
 .../expressions/randomExpressions.scala       |  6 +++--
 .../expressions/regexpExpressions.scala       | 15 +++++++----
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index b683d2a7e9ef..293d28e93039 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -42,7 +42,8 @@ import org.apache.spark.sql.types._
     Examples:
       > SELECT _FUNC_(NULL, 1, NULL);
        1
-  """)
+  """,
+  since = "1.0.0")
 // scalastyle:on line.size.limit
 case class Coalesce(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
@@ -127,7 +128,8 @@ case class Coalesce(children: Seq[Expression]) extends ComplexTypeMergingExpress
     Examples:
       > SELECT _FUNC_(NULL, array('2'));
        ["2"]
-  """)
+  """,
+  since = "2.0.0")
 case class IfNull(left: Expression, right: Expression, child: Expression)
   extends RuntimeReplaceable {
 
@@ -146,7 +148,8 @@ case class IfNull(left: Expression, right: Expression, child: Expression)
     Examples:
       > SELECT _FUNC_(2, 2);
        NULL
-  """)
+  """,
+  since = "2.0.0")
 case class NullIf(left: Expression, right: Expression, child: Expression)
   extends RuntimeReplaceable {
 
@@ -165,7 +168,8 @@ case class NullIf(left: Expression, right: Expression, child: Expression)
     Examples:
       > SELECT _FUNC_(NULL, array('2'));
        ["2"]
-  """)
+  """,
+  since = "2.0.0")
 case class Nvl(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable {
 
   def this(left: Expression, right: Expression) = {
@@ -184,7 +188,8 @@ case class Nvl(left: Expression, right: Expression, child: Expression) extends R
     Examples:
       > SELECT _FUNC_(NULL, 2, 1);
        1
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child: Expression)
   extends RuntimeReplaceable {
@@ -207,7 +212,8 @@ case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child:
     Examples:
       > SELECT _FUNC_(cast('NaN' as double));
        true
-  """)
+  """,
+  since = "1.5.0")
 case class IsNaN(child: Expression) extends UnaryExpression
   with Predicate with ImplicitCastInputTypes {
 
@@ -249,7 +255,8 @@ case class IsNaN(child: Expression) extends UnaryExpression
     Examples:
       > SELECT _FUNC_(cast('NaN' as double), 123);
        123.0
-  """)
+  """,
+  since = "1.5.0")
 case class NaNvl(left: Expression, right: Expression)
     extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -309,7 +316,8 @@ case class NaNvl(left: Expression, right: Expression)
     Examples:
       > SELECT _FUNC_(1);
        false
-  """)
+  """,
+  since = "1.0.0")
 case class IsNull(child: Expression) extends UnaryExpression with Predicate {
   override def nullable: Boolean = false
 
@@ -335,7 +343,8 @@ case class IsNull(child: Expression) extends UnaryExpression with Predicate {
     Examples:
       > SELECT _FUNC_(1);
        true
-  """)
+  """,
+  since = "1.0.0")
 case class IsNotNull(child: Expression) extends UnaryExpression with Predicate {
   override def nullable: Boolean = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index 91a8ac07b908..cc05828cfccc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -80,7 +80,8 @@ trait ExpressionWithRandomSeed {
   """,
   note = """
     The function is non-deterministic in general case.
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class Rand(child: Expression) extends RDG with ExpressionWithRandomSeed {
 
@@ -122,7 +123,8 @@ object Rand {
   """,
   note = """
     The function is non-deterministic in general case.
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class Randn(child: Expression) extends RDG with ExpressionWithRandomSeed {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 4f5ea1e95f83..65c485845daf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -101,7 +101,8 @@ abstract class StringRegexExpression extends BinaryExpression
   """,
   note = """
     Use RLIKE to match with standard regular expressions.
-  """)
+  """,
+  since = "1.0.0")
 case class Like(left: Expression, right: Expression) extends StringRegexExpression {
 
   override def escape(v: String): String = StringUtils.escapeLikeRegex(v)
@@ -179,7 +180,8 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
   """,
   note = """
     Use LIKE to match with simple string pattern.
-  """)
+  """,
+  since = "1.0.0")
 case class RLike(left: Expression, right: Expression) extends StringRegexExpression {
 
   override def escape(v: String): String = v
@@ -254,7 +256,8 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
        ["one","two","three",""]
       > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]', 2);
        ["one","twoBthreeC"]
-  """)
+  """,
+  since = "1.5.0")
 case class StringSplit(str: Expression, regex: Expression, limit: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
 
@@ -294,7 +297,8 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression)
     Examples:
       > SELECT _FUNC_('100-200', '(\\d+)', 'num');
        num-num
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
@@ -393,7 +397,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
     Examples:
       > SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1);
        100
-  """)
+  """,
+  since = "1.5.0")
 case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
   def this(s: Expression, r: Expression) = this(s, r, Literal(1))

From fa5dc0a45a414c34b31c5d7efe396aa04f1e66e3 Mon Sep 17 00:00:00 2001
From: "jiafu.zhang@intel.com" <jiafu.zhang@intel.com>
Date: Fri, 10 May 2019 10:42:43 -0700
Subject: [PATCH 1039/1072] [SPARK-26632][CORE] Separate Thread Configurations
 of Driver and Executor

## What changes were proposed in this pull request?

For the below three thread configuration items applied to both driver and executor,
spark.rpc.io.serverThreads
spark.rpc.io.clientThreads
spark.rpc.netty.dispatcher.numThreads,
we separate them to driver specifics and executor specifics.
spark.driver.rpc.io.serverThreads                     < - > spark.executor.rpc.io.serverThreads
spark.driver.rpc.io.clientThreads                      < - > spark.executor.rpc.io.clientThreads
spark.driver.rpc.netty.dispatcher.numThreads < - > spark.executor.rpc.netty.dispatcher.numThreads

Spark reads these specifics first and fall back to the common configurations.

## How was this patch tested?
We ran the SimpleMap app without shuffle for benchmark purpose to test Spark's scalability in HPC with omini-path NIC which has higher bandwidth than normal ethernet NIC.

Spark's base version is 2.4.0.
Spark ran in the Standalone mode. Driver was in a standalone node.
After the separation, the performance is improved a lot in 256 nodes and 512 nodes. see below test results of SimpleMapTask before and after the enhancement. You can view the tables in the  [JIRA](https://issues.apache.org/jira/browse/SPARK-26632) too.

ds: spark.driver.rpc.io.serverThreads
dc: spark.driver.rpc.io.clientThreads
dd: spark.driver.rpc.netty.dispatcher.numThreads
ed: spark.executor.rpc.netty.dispatcher.numThreads
time: Overall Time (s)
old time: Overall Time without Separation (s)

**Before:**

 nodes | ds | dc | dd | ed | time
-- |-- | -- | -- | -- | --
128 nodes | 8 | 8 | 8 | 8 | 108
256 nodes | 8 | 8 | 8 | 8 | 196
512 nodes | 8 | 8 | 8 | 8 | 377

**After:**

nodes | ds | dc | dd | ed | time | improvement
-- | -- | -- | -- | -- | -- | --
128 nodes | 15 | 15 | 10 | 30 | 107 | 0.9%
256 nodes | 12 | 15 | 10 | 30 | 159 | 18.8%
512 nodes | 12 | 15 | 10 | 30 | 283 | 24.9%

Closes #23560 from zjf2012/thread_conf_separation.

Authored-by: jiafu.zhang@intel.com <jiafu.zhang@intel.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../CoarseGrainedExecutorBackend.scala        |  2 +
 .../network/netty/SparkTransportConf.scala    | 24 ++++--
 .../apache/spark/rpc/netty/Dispatcher.scala   | 19 ++++-
 .../apache/spark/rpc/netty/NettyRpcEnv.scala  |  9 ++-
 .../netty/SparkTransportConfSuite.scala       | 81 +++++++++++++++++++
 docs/configuration.md                         | 38 +++++++++
 project/MimaExcludes.scala                    |  3 +
 .../spark/executor/MesosExecutorBackend.scala |  2 +
 8 files changed, 165 insertions(+), 13 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 645f58716de6..f57b731bedee 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -31,6 +31,7 @@ import org.apache.spark.TaskState.TaskState
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.worker.WorkerWatcher
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.EXECUTOR_ID
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -239,6 +240,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         SparkHadoopUtil.get.addDelegationTokens(tokens, driverConf)
       }
 
+      driverConf.set(EXECUTOR_ID, arguments.executorId)
       val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.hostname,
         arguments.cores, cfg.ioEncryptionKey, isLocal = false)
 
diff --git a/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala b/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
index 3ba0a0a750f9..c9103045260f 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
@@ -36,16 +36,26 @@ object SparkTransportConf {
    * @param numUsableCores if nonzero, this will restrict the server and client threads to only
    *                       use the given number of cores, rather than all of the machine's cores.
    *                       This restriction will only occur if these properties are not already set.
+   * @param role           optional role, could be driver, executor, worker and master. Default is
+   *                      [[None]], means no role specific configurations.
    */
-  def fromSparkConf(_conf: SparkConf, module: String, numUsableCores: Int = 0): TransportConf = {
+  def fromSparkConf(
+      _conf: SparkConf,
+      module: String,
+      numUsableCores: Int = 0,
+      role: Option[String] = None): TransportConf = {
     val conf = _conf.clone
-
-    // Specify thread configuration based on our JVM's allocation of cores (rather than necessarily
-    // assuming we have all the machine's cores).
-    // NB: Only set if serverThreads/clientThreads not already set.
+    // specify default thread configuration based on our JVM's allocation of cores (rather than
+    // necessarily assuming we have all the machine's cores).
     val numThreads = NettyUtils.defaultNumThreads(numUsableCores)
-    conf.setIfMissing(s"spark.$module.io.serverThreads", numThreads.toString)
-    conf.setIfMissing(s"spark.$module.io.clientThreads", numThreads.toString)
+    // override threads configurations with role specific values if specified
+    // config order is role > module > default
+    Seq("serverThreads", "clientThreads").foreach { suffix =>
+      val value = role.flatMap { r => conf.getOption(s"spark.$r.$module.io.$suffix") }
+        .getOrElse(
+          conf.get(s"spark.$module.io.$suffix", numThreads.toString))
+      conf.set(s"spark.$module.io.$suffix", value)
+    }
 
     new TransportConf(module, new ConfigProvider {
       override def get(name: String): String = conf.get(name)
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index 3db68978346f..2f923d7902b0 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -24,8 +24,9 @@ import scala.collection.JavaConverters._
 import scala.concurrent.Promise
 import scala.util.control.NonFatal
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.EXECUTOR_ID
 import org.apache.spark.internal.config.Network.RPC_NETTY_DISPATCHER_NUM_THREADS
 import org.apache.spark.network.client.RpcResponseCallback
 import org.apache.spark.rpc._
@@ -194,12 +195,22 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
     endpoints.containsKey(name)
   }
 
-  /** Thread pool used for dispatching messages. */
-  private val threadpool: ThreadPoolExecutor = {
+  private def getNumOfThreads(conf: SparkConf): Int = {
     val availableCores =
       if (numUsableCores > 0) numUsableCores else Runtime.getRuntime.availableProcessors()
-    val numThreads = nettyEnv.conf.get(RPC_NETTY_DISPATCHER_NUM_THREADS)
+
+    val modNumThreads = conf.get(RPC_NETTY_DISPATCHER_NUM_THREADS)
       .getOrElse(math.max(2, availableCores))
+
+    conf.get(EXECUTOR_ID).map { id =>
+      val role = if (id == SparkContext.DRIVER_IDENTIFIER) "driver" else "executor"
+      conf.getInt(s"spark.$role.rpc.netty.dispatcher.numThreads", modNumThreads)
+    }.getOrElse(modNumThreads)
+  }
+
+  /** Thread pool used for dispatching messages. */
+  private val threadpool: ThreadPoolExecutor = {
+    val numThreads = getNumOfThreads(nettyEnv.conf)
     val pool = ThreadUtils.newDaemonFixedThreadPool(numThreads, "dispatcher-event-loop")
     for (i <- 0 until numThreads) {
       pool.execute(new MessageLoop)
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index 472db45490e9..5dce43b7523d 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -29,8 +29,9 @@ import scala.reflect.ClassTag
 import scala.util.{DynamicVariable, Failure, Success, Try}
 import scala.util.control.NonFatal
 
-import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.{SecurityManager, SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.EXECUTOR_ID
 import org.apache.spark.internal.config.Network._
 import org.apache.spark.network.TransportContext
 import org.apache.spark.network.client._
@@ -47,11 +48,15 @@ private[netty] class NettyRpcEnv(
     host: String,
     securityManager: SecurityManager,
     numUsableCores: Int) extends RpcEnv(conf) with Logging {
+  val role = conf.get(EXECUTOR_ID).map { id =>
+    if (id == SparkContext.DRIVER_IDENTIFIER) "driver" else "executor"
+  }
 
   private[netty] val transportConf = SparkTransportConf.fromSparkConf(
     conf.clone.set(RPC_IO_NUM_CONNECTIONS_PER_PEER, 1),
     "rpc",
-    conf.get(RPC_IO_THREADS).getOrElse(numUsableCores))
+    conf.get(RPC_IO_THREADS).getOrElse(numUsableCores),
+    role)
 
   private val dispatcher: Dispatcher = new Dispatcher(this, numUsableCores)
 
diff --git a/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala
new file mode 100644
index 000000000000..d7265b6c24fe
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import org.scalatest.Matchers
+import org.scalatest.mockito.MockitoSugar
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.network.util.NettyUtils
+
+class SparkTransportConfSuite extends SparkFunSuite with MockitoSugar {
+  val module = "rpc"
+  val serThreads = "serverThreads"
+  val cliThreads = "clientThreads"
+
+  test("default value is get when neither role nor module is set") {
+    val numUsableCores = 4
+    val conf = new SparkConf()
+    val sparkTransportConf = SparkTransportConf.fromSparkConf(conf, module, numUsableCores, None)
+    val expected = NettyUtils.defaultNumThreads(numUsableCores)
+    val serActual = sparkTransportConf.get(s"spark.$module.io.$serThreads", "")
+    val cliActual = sparkTransportConf.get(s"spark.$module.io.$cliThreads", "")
+    assert(serActual == expected.toString)
+    assert(cliActual == expected.toString)
+  }
+
+  test("module value is get when role is not set") {
+    val numUsableCores = 3
+    val serExpected = "7"
+    val cliExpected = "5"
+    val conf = new SparkConf()
+      .set(s"spark.$module.io.$serThreads", serExpected)
+      .set(s"spark.$module.io.$cliThreads", cliExpected)
+    val sparkTransportConf = SparkTransportConf.fromSparkConf(conf, module, numUsableCores, None)
+    val serActual = sparkTransportConf.get(s"spark.$module.io.$serThreads", "")
+    val cliActual = sparkTransportConf.get(s"spark.$module.io.$cliThreads", "")
+    assert(serActual == serExpected)
+    assert(cliActual == cliExpected)
+  }
+
+  test("use correct configuration when both module and role configs are present") {
+    val role = Some("driver")
+    val numUsableCores = 10
+    val serModule = "7"
+    val cliModule = "5"
+    val serExpected = "8"
+    val cliExpected = "6"
+    val conf = new SparkConf()
+      .set(s"spark.$module.io.$serThreads", serModule)
+      .set(s"spark.$module.io.$cliThreads", cliModule)
+      .set(s"spark.${role.get}.$module.io.$serThreads", serExpected)
+      .set(s"spark.${role.get}.$module.io.$cliThreads", cliExpected)
+    val sparkTransportConf = SparkTransportConf.fromSparkConf(conf, module, numUsableCores, role)
+    val serActual = sparkTransportConf.get(s"spark.$module.io.$serThreads", "")
+    val cliActual = sparkTransportConf.get(s"spark.$module.io.$cliThreads", "")
+    assert(serActual == serExpected)
+    assert(cliActual == cliExpected)
+
+    val exeRole = Some("executor")
+    val sparkTransConfExe = SparkTransportConf.fromSparkConf(conf, module, numUsableCores, exeRole)
+    val serActualExe = sparkTransConfExe.get(s"spark.$module.io.$serThreads", "")
+    val cliActualExe = sparkTransConfExe.get(s"spark.$module.io.$cliThreads", "")
+    assert(serActualExe == serModule)
+    assert(cliActualExe == cliModule)
+  }
+}
diff --git a/docs/configuration.md b/docs/configuration.md
index 2cb1a5f2867c..da2d279e2837 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1954,6 +1954,44 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
+### Thread Configurations
+
+Depending on jobs and cluster configurations, we can set number of threads in several places in Spark to utilize 
+available resources efficiently to get better performance. Prior to Spark 3.0, these thread configurations apply 
+to all roles of Spark, such as driver, executor, worker and master. From Spark 3.0, we can configure threads in 
+finer granularity starting from driver and executor. Take RPC module as example in below table. For other modules,
+like shuffle, just replace "rpc" with "shuffle" in the property names except 
+<code>spark.{driver|executor}.rpc.netty.dispatcher.numThreads</code>, which is only for RPC module.
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.{driver|executor}.rpc.io.serverThreads</code></td>
+  <td>
+    Fall back on <code>spark.rpc.io.serverThreads</code>
+  </td>
+  <td>Number of threads used in the server thread pool</td>
+</tr>
+<tr>
+  <td><code>spark.{driver|executor}.rpc.io.clientThreads</code></td>
+  <td>
+    Fall back on <code>spark.rpc.io.clientThreads</code>
+  </td>
+  <td>Number of threads used in the client thread pool</td>
+</tr>
+<tr>
+  <td><code>spark.{driver|executor}.rpc.netty.dispatcher.numThreads</code></td>
+  <td>
+    Fall back on <code>spark.rpc.netty.dispatcher.numThreads</code>
+  </td>
+  <td>Number of threads used in RPC message dispatcher thread pool</td>
+</tr>
+</table>
+
+The default value for number of thread-related config keys is the minimum of the number of cores requested for 
+the driver or executor, or, in the absence of that value, the number of cores available for the JVM (with a hardcoded upper limit of 8).
+
+    
 ### Security
 
 Please refer to the [Security](security.html) page for available options on how to secure different
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 36797b8cb399..5ccaa38c0867 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -75,6 +75,9 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart.copy"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart.this"),
     ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart$"),
+    
+    // [SPARK-26632][Core] Separate Thread Configurations of Driver and Executor
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.network.netty.SparkTransportConf.fromSparkConf"),
 
     // [SPARK-25765][ML] Add training cost to BisectingKMeans summary
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel.this"),
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 61bfa27a84fd..bfa88d68d06c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -29,6 +29,7 @@ import org.apache.spark.{SparkConf, SparkEnv, TaskState}
 import org.apache.spark.TaskState
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.EXECUTOR_ID
 import org.apache.spark.scheduler.TaskDescription
 import org.apache.spark.scheduler.cluster.mesos.MesosSchedulerUtils
 import org.apache.spark.util.Utils
@@ -74,6 +75,7 @@ private[spark] class MesosExecutorBackend
     val properties = Utils.deserialize[Array[(String, String)]](executorInfo.getData.toByteArray) ++
       Seq[(String, String)](("spark.app.id", frameworkInfo.getId.getValue))
     val conf = new SparkConf(loadDefaults = true).setAll(properties)
+    conf.set(EXECUTOR_ID, executorId)
     val env = SparkEnv.createExecutorEnv(
       conf, executorId, slaveInfo.getHostname, cpusPerTask, None, isLocal = false)
 

From bcd3b61c4be98565352491a108e6394670a0f413 Mon Sep 17 00:00:00 2001
From: Sam Tran <stran@mesosphere.com>
Date: Fri, 10 May 2019 10:53:31 -0700
Subject: [PATCH 1040/1072] [SPARK-27347][MESOS] Fix supervised driver retry
 logic for outdated tasks

## What changes were proposed in this pull request?

This patch fixes a bug where `--supervised` Spark jobs would retry multiple times whenever an agent would crash, come back, and re-register even when those jobs had already relaunched on a different agent.

That is:
```
- supervised driver is running on agent1
- agent1 crashes
- driver is relaunched on another agent as `<task-id>-retry-1`
- agent1 comes back online and re-registers with scheduler
- spark relaunches the same job as `<task-id>-retry-2`
- now there are two jobs running simultaneously
```

This is because when an agent would come back and re-register it would send a status update `TASK_FAILED` for its old driver-task. Previous logic would indiscriminately remove the `submissionId` from Zookeeper's `launchedDrivers` node and add it to `retryList` node. Then, when a new offer came in, it would relaunch another `-retry-`  task even though one was previously running.

For example logs, scroll to bottom

## How was this patch tested?

- Added a unit test to simulate behavior described above
- Tested manually on a DC/OS cluster by
  ```
  - launching a --supervised spark job
  - dcos node ssh <to the agent with the running spark-driver>
  - systemctl stop dcos-mesos-slave
  - docker kill <driver-container-id>
  - [ wait until spark job is relaunched ]
  - systemctl start dcos-mesos-slave
  - [ observe spark driver is not relaunched as `-retry-2` ]
  ```

Log snippets included below. Notice the `-retry-1` task is running when status update for the old task comes in afterward:
```
19/01/15 19:21:38 TRACE MesosClusterScheduler: Received offers from Mesos:
... [offers] ...
19/01/15 19:21:39 TRACE MesosClusterScheduler: Using offer 5d421001-0630-4214-9ecb-d5838a2ec149-O2532 to launch driver driver-20190115192138-0001 with taskId: value: "driver-20190115192138-0001"
...
19/01/15 19:21:42 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001 state=TASK_STARTING message=''
19/01/15 19:21:43 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001 state=TASK_RUNNING message=''
...
19/01/15 19:29:12 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001 state=TASK_LOST message='health check timed out' reason=REASON_SLAVE_REMOVED
...
19/01/15 19:31:12 TRACE MesosClusterScheduler: Using offer 5d421001-0630-4214-9ecb-d5838a2ec149-O2681 to launch driver driver-20190115192138-0001 with taskId: value: "driver-20190115192138-0001-retry-1"
...
19/01/15 19:31:15 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001-retry-1 state=TASK_STARTING message=''
19/01/15 19:31:16 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001-retry-1 state=TASK_RUNNING message=''
...
19/01/15 19:33:45 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001 state=TASK_FAILED message='Unreachable agent re-reregistered'
...
19/01/15 19:33:45 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001 state=TASK_FAILED message='Abnormal executor termination: unknown container' reason=REASON_EXECUTOR_TERMINATED
19/01/15 19:33:45 ERROR MesosClusterScheduler: Unable to find driver with driver-20190115192138-0001 in status update
...
19/01/15 19:33:47 TRACE MesosClusterScheduler: Using offer 5d421001-0630-4214-9ecb-d5838a2ec149-O2729 to launch driver driver-20190115192138-0001 with taskId: value: "driver-20190115192138-0001-retry-2"
...
19/01/15 19:33:50 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001-retry-2 state=TASK_STARTING message=''
19/01/15 19:33:51 INFO MesosClusterScheduler: Received status update: taskId=driver-20190115192138-0001-retry-2 state=TASK_RUNNING message=''
```

Closes #24276 from samvantran/SPARK-27347-duplicate-retries.

Authored-by: Sam Tran <stran@mesosphere.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../cluster/mesos/MesosClusterScheduler.scala | 14 ++++
 .../mesos/MesosClusterSchedulerSuite.scala    | 80 +++++++++++++++++++
 2 files changed, 94 insertions(+)

diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 3ffccb095c41..8566a30d7b9a 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -766,6 +766,10 @@ private[spark] class MesosClusterScheduler(
         val state = launchedDrivers(subId)
         // Check if the driver is supervise enabled and can be relaunched.
         if (state.driverDescription.supervise && shouldRelaunch(status.getState)) {
+          if (isTaskOutdated(taskId, state)) {
+            // Prevent outdated task from overwriting a more recent status
+            return
+          }
           removeFromLaunchedDrivers(subId)
           state.finishDate = Some(new Date())
           val retryState: Option[MesosClusterRetryState] = state.driverDescription.retryState
@@ -786,6 +790,16 @@ private[spark] class MesosClusterScheduler(
     }
   }
 
+  /**
+   * Check if the task is outdated i.e. has already been launched or is pending
+   * If neither, the taskId is outdated and should be ignored
+   * This is to avoid scenarios where an outdated status update arrives
+   * after a supervised driver has already been relaunched
+   */
+  private def isTaskOutdated(taskId: String, state: MesosClusterSubmissionState): Boolean =
+    taskId != state.taskId.getValue &&
+      !pendingRetryDrivers.exists(_.submissionId == state.driverDescription.submissionId)
+
   private def retireDriver(
       submissionId: String,
       state: MesosClusterSubmissionState) = {
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 536f5a23c0df..924a991c50f2 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -450,6 +450,86 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     assert(state.finishedDrivers.size == 1)
   }
 
+  test("SPARK-27347: do not restart outdated supervised drivers") {
+    // Covers scenario where:
+    // - agent goes down
+    // - supervised job is relaunched on another agent
+    // - first agent re-registers and sends status update: TASK_FAILED
+    // - job should NOT be relaunched again
+    val conf = new SparkConf()
+    conf.setMaster("mesos://localhost:5050")
+    conf.setAppName("SparkMesosDriverRetries")
+    setScheduler(conf.getAll.toMap)
+
+    val mem = 1000
+    val cpu = 1
+    val offers = List(
+      Utils.createOffer("o1", "s1", mem, cpu, None),
+      Utils.createOffer("o2", "s2", mem, cpu, None),
+      Utils.createOffer("o3", "s1", mem, cpu, None))
+
+    val response = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map(("spark.mesos.executor.home", "test"), ("spark.app.name", "test")), "sub1", new Date()))
+    assert(response.success)
+
+    // Offer a resource to launch the submitted driver
+    scheduler.resourceOffers(driver, Collections.singletonList(offers.head))
+    var state = scheduler.getSchedulerState()
+    assert(state.launchedDrivers.size == 1)
+
+    // Signal agent lost with status with TASK_LOST
+    val agent1 = SlaveID.newBuilder().setValue("s1").build()
+    var taskStatus = TaskStatus.newBuilder()
+      .setTaskId(TaskID.newBuilder().setValue(response.submissionId).build())
+      .setSlaveId(agent1)
+      .setReason(TaskStatus.Reason.REASON_SLAVE_REMOVED)
+      .setState(MesosTaskState.TASK_LOST)
+      .build()
+
+    scheduler.statusUpdate(driver, taskStatus)
+    state = scheduler.getSchedulerState()
+    assert(state.pendingRetryDrivers.size == 1)
+    assert(state.pendingRetryDrivers.head.submissionId == taskStatus.getTaskId.getValue)
+    assert(state.launchedDrivers.isEmpty)
+
+    // Offer new resource to retry driver on a new agent
+    Thread.sleep(1500) // sleep to cover nextRetry's default wait time of 1s
+    scheduler.resourceOffers(driver, Collections.singletonList(offers(1)))
+
+    val agent2 = SlaveID.newBuilder().setValue("s2").build()
+    taskStatus = TaskStatus.newBuilder()
+      .setTaskId(TaskID.newBuilder().setValue(response.submissionId).build())
+      .setSlaveId(agent2)
+      .setState(MesosTaskState.TASK_RUNNING)
+      .build()
+
+    scheduler.statusUpdate(driver, taskStatus)
+    state = scheduler.getSchedulerState()
+    assert(state.pendingRetryDrivers.isEmpty)
+    assert(state.launchedDrivers.size == 1)
+    assert(state.launchedDrivers.head.taskId.getValue.endsWith("-retry-1"))
+    assert(state.launchedDrivers.head.taskId.getValue != taskStatus.getTaskId.getValue)
+
+    // Agent1 comes back online and sends status update: TASK_FAILED
+    taskStatus = TaskStatus.newBuilder()
+      .setTaskId(TaskID.newBuilder().setValue(response.submissionId).build())
+      .setSlaveId(agent1)
+      .setState(MesosTaskState.TASK_FAILED)
+      .setMessage("Abnormal executor termination")
+      .setReason(TaskStatus.Reason.REASON_EXECUTOR_TERMINATED)
+      .build()
+
+    scheduler.statusUpdate(driver, taskStatus)
+    scheduler.resourceOffers(driver, Collections.singletonList(offers.last))
+
+    // Assert driver does not restart 2nd time
+    state = scheduler.getSchedulerState()
+    assert(state.pendingRetryDrivers.isEmpty)
+    assert(state.launchedDrivers.size == 1)
+    assert(state.launchedDrivers.head.taskId.getValue.endsWith("-retry-1"))
+  }
+
   test("Declines offer with refuse seconds = 120.") {
     setScheduler()
 

From dbb8143501ab87865d6e202c17297b9a73a0b1c3 Mon Sep 17 00:00:00 2001
From: "ryne.yang" <ryne.yang@acuityads.com>
Date: Fri, 10 May 2019 16:19:41 -0700
Subject: [PATCH 1041/1072] [MINOR][SS][DOC] Added missing config `maxFileAge`
 in file streaming source

## What changes were proposed in this pull request?

added the missing config for structured streaming when using file source.

from the code we have
```
  /**
   * Maximum age of a file that can be found in this directory, before it is ignored. For the
   * first batch all files will be considered valid. If `latestFirst` is set to `true` and
   * `maxFilesPerTrigger` is set, then this parameter will be ignored, because old files that are
   * valid, and should be processed, may be ignored. Please refer to SPARK-19813 for details.
   *
   * The max age is specified with respect to the timestamp of the latest file, and not the
   * timestamp of the current system. That this means if the last file has timestamp 1000, and the
   * current system time is 2000, and max age is 200, the system will purge files older than
   * 800 (rather than 1800) from the internal state.
   *
   * Default to a week.
   */
  val maxFileAgeMs: Long =
    Utils.timeStringAsMs(parameters.getOrElse("maxFileAge", "7d"))
```
which is not documented.

also the file processing order was not mentioned but in the code we specifically select the file list based on file mtime:
```scala
private val fileSortOrder = if (sourceOptions.latestFirst) {
      logWarning(
        """'latestFirst' is true. New files will be processed first, which may affect the watermark
          |value. In addition, 'maxFileAge' will be ignored.""".stripMargin)
      implicitly[Ordering[Long]].reverse
    } else {
      implicitly[Ordering[Long]]
    }

val files = allFiles.sortBy(_.getModificationTime)(fileSortOrder).map { status =>
      (status.getPath.toUri.toString, status.getModificationTime)
    }
```
---------
![Screen Shot 2019-05-07 at 5 55 01 PM](https://user-images.githubusercontent.com/1124115/57335683-5a8b0400-70f1-11e9-98c8-99f173872842.png)
---------
![Screen Shot 2019-05-07 at 5 54 55 PM](https://user-images.githubusercontent.com/1124115/57335684-5a8b0400-70f1-11e9-996a-4bb1639e3d6b.png)

Closes #24548 from linehrr/master.

Lead-authored-by: ryne.yang <ryne.yang@acuityads.com>
Co-authored-by: Ryne Yang <ryne.yang@acuityads.com>
Co-authored-by: linehrr <linehrr@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/structured-streaming-programming-guide.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index be30f6e7599e..b107f637536b 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -510,8 +510,7 @@ returned by `SparkSession.readStream()`. In [R](api/R/read.stream.html), with th
 #### Input Sources
 There are a few built-in sources.
 
-  - **File source** - Reads files written in a directory as a stream of data. Supported file formats are text, csv, json, orc, parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
-
+  - **File source** - Reads files written in a directory as a stream of data. Files will be processed in the order of file modification time. If `latestFirst` is set, order will be reversed. Supported file formats are text, CSV, JSON, ORC, Parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
   - **Kafka source** - Reads data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-0-10-integration.html) for more details.
 
   - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees. 
@@ -541,6 +540,8 @@ Here are the details of all the sources in Spark.
         <br/>
         <code>fileNameOnly</code>: whether to check new files based on only the filename instead of on the full path (default: false). With this set to `true`, the following files would be considered as the same file, because their filenames, "dataset.txt", are the same:
         <br/>
+        <code>maxFileAge</code>: Maximum age of a file that can be found in this directory, before it is ignored. For the first batch all files will be considered valid. If <code>latestFirst</code> is set to `true` and <code>maxFilesPerTrigger</code> is set, then this parameter will be ignored, because old files that are valid, and should be processed, may be ignored. The max age is specified with respect to the timestamp of the latest file, and not the timestamp of the current system.(default: 1 week)
+        <br/>
         "file:///dataset.txt"<br/>
         "s3://a/dataset.txt"<br/>
         "s3n://a/b/dataset.txt"<br/>

From 9ff77b198ea6e19a7cbed949fe2be863266a29b5 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 12 May 2019 19:59:56 +0900
Subject: [PATCH 1042/1072] [SPARK-27675][SQL] do not use MutableColumnarRow in
 ColumnarBatch

## What changes were proposed in this pull request?

To move DS v2 API to the catalyst module, we can't refer to an internal class (`MutableColumnarRow`) in `ColumnarBatch`.

This PR creates a read-only version of `MutableColumnarRow`, and use it in `ColumnarBatch`.

close https://github.com/apache/spark/pull/24546

## How was this patch tested?

existing tests

Closes #24581 from cloud-fan/mutable-row.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../vectorized/MutableColumnarRow.java        |  44 ++---
 .../spark/sql/vectorized/ColumnarBatch.java   | 175 +++++++++++++++++-
 2 files changed, 189 insertions(+), 30 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
index 4e4242fe8d9b..fca7e3685912 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
@@ -26,7 +26,6 @@
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 import org.apache.spark.sql.vectorized.ColumnarMap;
 import org.apache.spark.sql.vectorized.ColumnarRow;
-import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 
@@ -39,17 +38,10 @@
  */
 public final class MutableColumnarRow extends InternalRow {
   public int rowId;
-  private final ColumnVector[] columns;
-  private final WritableColumnVector[] writableColumns;
-
-  public MutableColumnarRow(ColumnVector[] columns) {
-    this.columns = columns;
-    this.writableColumns = null;
-  }
+  private final WritableColumnVector[] columns;
 
   public MutableColumnarRow(WritableColumnVector[] writableColumns) {
     this.columns = writableColumns;
-    this.writableColumns = writableColumns;
   }
 
   @Override
@@ -228,54 +220,54 @@ public void update(int ordinal, Object value) {
 
   @Override
   public void setNullAt(int ordinal) {
-    writableColumns[ordinal].putNull(rowId);
+    columns[ordinal].putNull(rowId);
   }
 
   @Override
   public void setBoolean(int ordinal, boolean value) {
-    writableColumns[ordinal].putNotNull(rowId);
-    writableColumns[ordinal].putBoolean(rowId, value);
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putBoolean(rowId, value);
   }
 
   @Override
   public void setByte(int ordinal, byte value) {
-    writableColumns[ordinal].putNotNull(rowId);
-    writableColumns[ordinal].putByte(rowId, value);
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putByte(rowId, value);
   }
 
   @Override
   public void setShort(int ordinal, short value) {
-    writableColumns[ordinal].putNotNull(rowId);
-    writableColumns[ordinal].putShort(rowId, value);
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putShort(rowId, value);
   }
 
   @Override
   public void setInt(int ordinal, int value) {
-    writableColumns[ordinal].putNotNull(rowId);
-    writableColumns[ordinal].putInt(rowId, value);
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putInt(rowId, value);
   }
 
   @Override
   public void setLong(int ordinal, long value) {
-    writableColumns[ordinal].putNotNull(rowId);
-    writableColumns[ordinal].putLong(rowId, value);
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putLong(rowId, value);
   }
 
   @Override
   public void setFloat(int ordinal, float value) {
-    writableColumns[ordinal].putNotNull(rowId);
-    writableColumns[ordinal].putFloat(rowId, value);
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putFloat(rowId, value);
   }
 
   @Override
   public void setDouble(int ordinal, double value) {
-    writableColumns[ordinal].putNotNull(rowId);
-    writableColumns[ordinal].putDouble(rowId, value);
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putDouble(rowId, value);
   }
 
   @Override
   public void setDecimal(int ordinal, Decimal value, int precision) {
-    writableColumns[ordinal].putNotNull(rowId);
-    writableColumns[ordinal].putDecimal(rowId, value, precision);
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putDecimal(rowId, value, precision);
   }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
index 07546a54013e..9f917ea11d72 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
@@ -20,7 +20,10 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.InternalRow;
-import org.apache.spark.sql.execution.vectorized.MutableColumnarRow;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.types.*;
+import org.apache.spark.unsafe.types.CalendarInterval;
+import org.apache.spark.unsafe.types.UTF8String;
 
 /**
  * This class wraps multiple ColumnVectors as a row-wise table. It provides a row view of this
@@ -33,7 +36,7 @@ public final class ColumnarBatch {
   private final ColumnVector[] columns;
 
   // Staging row returned from `getRow`.
-  private final MutableColumnarRow row;
+  private final ColumnarBatchRow row;
 
   /**
    * Called to close all the columns in this batch. It is not valid to access the data after
@@ -50,7 +53,7 @@ public void close() {
    */
   public Iterator<InternalRow> rowIterator() {
     final int maxRows = numRows;
-    final MutableColumnarRow row = new MutableColumnarRow(columns);
+    final ColumnarBatchRow row = new ColumnarBatchRow(columns);
     return new Iterator<InternalRow>() {
       int rowId = 0;
 
@@ -108,6 +111,170 @@ public InternalRow getRow(int rowId) {
 
   public ColumnarBatch(ColumnVector[] columns) {
     this.columns = columns;
-    this.row = new MutableColumnarRow(columns);
+    this.row = new ColumnarBatchRow(columns);
   }
 }
+
+/**
+ * An internal class, which wraps an array of {@link ColumnVector} and provides a row view.
+ */
+class ColumnarBatchRow extends InternalRow {
+  public int rowId;
+  private final ColumnVector[] columns;
+
+  ColumnarBatchRow(ColumnVector[] columns) {
+    this.columns = columns;
+  }
+
+  @Override
+  public int numFields() { return columns.length; }
+
+  @Override
+  public InternalRow copy() {
+    GenericInternalRow row = new GenericInternalRow(columns.length);
+    for (int i = 0; i < numFields(); i++) {
+      if (isNullAt(i)) {
+        row.setNullAt(i);
+      } else {
+        DataType dt = columns[i].dataType();
+        if (dt instanceof BooleanType) {
+          row.setBoolean(i, getBoolean(i));
+        } else if (dt instanceof ByteType) {
+          row.setByte(i, getByte(i));
+        } else if (dt instanceof ShortType) {
+          row.setShort(i, getShort(i));
+        } else if (dt instanceof IntegerType) {
+          row.setInt(i, getInt(i));
+        } else if (dt instanceof LongType) {
+          row.setLong(i, getLong(i));
+        } else if (dt instanceof FloatType) {
+          row.setFloat(i, getFloat(i));
+        } else if (dt instanceof DoubleType) {
+          row.setDouble(i, getDouble(i));
+        } else if (dt instanceof StringType) {
+          row.update(i, getUTF8String(i).copy());
+        } else if (dt instanceof BinaryType) {
+          row.update(i, getBinary(i));
+        } else if (dt instanceof DecimalType) {
+          DecimalType t = (DecimalType)dt;
+          row.setDecimal(i, getDecimal(i, t.precision(), t.scale()), t.precision());
+        } else if (dt instanceof DateType) {
+          row.setInt(i, getInt(i));
+        } else if (dt instanceof TimestampType) {
+          row.setLong(i, getLong(i));
+        } else {
+          throw new RuntimeException("Not implemented. " + dt);
+        }
+      }
+    }
+    return row;
+  }
+
+  @Override
+  public boolean anyNull() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean isNullAt(int ordinal) { return columns[ordinal].isNullAt(rowId); }
+
+  @Override
+  public boolean getBoolean(int ordinal) { return columns[ordinal].getBoolean(rowId); }
+
+  @Override
+  public byte getByte(int ordinal) { return columns[ordinal].getByte(rowId); }
+
+  @Override
+  public short getShort(int ordinal) { return columns[ordinal].getShort(rowId); }
+
+  @Override
+  public int getInt(int ordinal) { return columns[ordinal].getInt(rowId); }
+
+  @Override
+  public long getLong(int ordinal) { return columns[ordinal].getLong(rowId); }
+
+  @Override
+  public float getFloat(int ordinal) { return columns[ordinal].getFloat(rowId); }
+
+  @Override
+  public double getDouble(int ordinal) { return columns[ordinal].getDouble(rowId); }
+
+  @Override
+  public Decimal getDecimal(int ordinal, int precision, int scale) {
+    return columns[ordinal].getDecimal(rowId, precision, scale);
+  }
+
+  @Override
+  public UTF8String getUTF8String(int ordinal) {
+    return columns[ordinal].getUTF8String(rowId);
+  }
+
+  @Override
+  public byte[] getBinary(int ordinal) {
+    return columns[ordinal].getBinary(rowId);
+  }
+
+  @Override
+  public CalendarInterval getInterval(int ordinal) {
+    return columns[ordinal].getInterval(rowId);
+  }
+
+  @Override
+  public ColumnarRow getStruct(int ordinal, int numFields) {
+    return columns[ordinal].getStruct(rowId);
+  }
+
+  @Override
+  public ColumnarArray getArray(int ordinal) {
+    return columns[ordinal].getArray(rowId);
+  }
+
+  @Override
+  public ColumnarMap getMap(int ordinal) {
+    return columns[ordinal].getMap(rowId);
+  }
+
+  @Override
+  public Object get(int ordinal, DataType dataType) {
+    if (dataType instanceof BooleanType) {
+      return getBoolean(ordinal);
+    } else if (dataType instanceof ByteType) {
+      return getByte(ordinal);
+    } else if (dataType instanceof ShortType) {
+      return getShort(ordinal);
+    } else if (dataType instanceof IntegerType) {
+      return getInt(ordinal);
+    } else if (dataType instanceof LongType) {
+      return getLong(ordinal);
+    } else if (dataType instanceof FloatType) {
+      return getFloat(ordinal);
+    } else if (dataType instanceof DoubleType) {
+      return getDouble(ordinal);
+    } else if (dataType instanceof StringType) {
+      return getUTF8String(ordinal);
+    } else if (dataType instanceof BinaryType) {
+      return getBinary(ordinal);
+    } else if (dataType instanceof DecimalType) {
+      DecimalType t = (DecimalType) dataType;
+      return getDecimal(ordinal, t.precision(), t.scale());
+    } else if (dataType instanceof DateType) {
+      return getInt(ordinal);
+    } else if (dataType instanceof TimestampType) {
+      return getLong(ordinal);
+    } else if (dataType instanceof ArrayType) {
+      return getArray(ordinal);
+    } else if (dataType instanceof StructType) {
+      return getStruct(ordinal, ((StructType)dataType).fields().length);
+    } else if (dataType instanceof MapType) {
+      return getMap(ordinal);
+    } else {
+      throw new UnsupportedOperationException("Datatype not supported " + dataType);
+    }
+  }
+
+  @Override
+  public void update(int ordinal, Object value) { throw new UnsupportedOperationException(); }
+
+  @Override
+  public void setNullAt(int ordinal) { throw new UnsupportedOperationException(); }
+}

From 5a8aad01c2aaf0ceef8e9a3cfabbd2e88c8d9f0d Mon Sep 17 00:00:00 2001
From: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Date: Sun, 12 May 2019 10:46:18 -0500
Subject: [PATCH 1043/1072] [SPARK-27343][KAFKA][SS] Avoid hardcoded for
 spark-sql-kafka-0-10

## What changes were proposed in this pull request?

[SPARK-27343](https://issues.apache.org/jira/projects/SPARK/issues/SPARK-27343)

Based on the previous PR: https://github.com/apache/spark/pull/24270

Extracting parameters , building the objects of ConfigEntry.

For example:
for the parameter "spark.kafka.producer.cache.timeout",we build
```
private[kafka010] val PRODUCER_CACHE_TIMEOUT =
    ConfigBuilder("spark.kafka.producer.cache.timeout")
      .doc("The expire time to remove the unused producers.")
      .timeConf(TimeUnit.MILLISECONDS)
      .createWithDefaultString("10m")
```

Closes #24574 from hehuiyuan/hehuiyuan-patch-9.

Authored-by: hehuiyuan <hehuiyuan@ZBMAC-C02WD3K5H.local>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/kafka010/CachedKafkaProducer.scala    |  7 +++--
 .../sql/kafka010/KafkaContinuousStream.scala  |  3 ++-
 .../sql/kafka010/KafkaDataConsumer.scala      |  2 +-
 .../sql/kafka010/KafkaMicroBatchStream.scala  |  5 ++--
 .../kafka010/KafkaOffsetRangeCalculator.scala |  3 ++-
 .../sql/kafka010/KafkaOffsetReader.scala      |  4 +--
 .../spark/sql/kafka010/KafkaRelation.scala    |  2 +-
 .../spark/sql/kafka010/KafkaSource.scala      |  4 +--
 .../sql/kafka010/KafkaSourceProvider.scala    | 27 ++++++++++++-------
 .../apache/spark/sql/kafka010/package.scala   | 16 +++++++++++
 10 files changed, 49 insertions(+), 24 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
index 2bab2871d283..ce22e3f2c889 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
@@ -36,10 +36,9 @@ private[kafka010] object CachedKafkaProducer extends Logging {
 
   private val defaultCacheExpireTimeout = TimeUnit.MINUTES.toMillis(10)
 
-  private lazy val cacheExpireTimeout: Long =
-    Option(SparkEnv.get).map(_.conf.getTimeAsMs(
-      "spark.kafka.producer.cache.timeout",
-      s"${defaultCacheExpireTimeout}ms")).getOrElse(defaultCacheExpireTimeout)
+  private lazy val cacheExpireTimeout: Long = Option(SparkEnv.get)
+    .map(_.conf.get(PRODUCER_CACHE_TIMEOUT))
+    .getOrElse(defaultCacheExpireTimeout)
 
   private val cacheLoader = new CacheLoader[Seq[(String, Object)], Producer] {
     override def load(config: Seq[(String, Object)]): Producer = {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
index 92686d24e2b8..03f82a505602 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
@@ -53,7 +53,8 @@ class KafkaContinuousStream(
     failOnDataLoss: Boolean)
   extends ContinuousStream with Logging {
 
-  private val pollTimeoutMs = sourceOptions.getOrElse("kafkaConsumer.pollTimeoutMs", "512").toLong
+  private val pollTimeoutMs =
+    sourceOptions.getOrElse(KafkaSourceProvider.CONSUMER_POLL_TIMEOUT, "512").toLong
 
   // Initialized when creating reader factories. If this diverges from the partitions at the latest
   // offsets, we need to reconfigure.
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
index 45ea3d20f968..cbb99fd7118e 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
@@ -523,7 +523,7 @@ private[kafka010] object KafkaDataConsumer extends Logging {
   //   tasks simultaneously using consumers than the capacity.
   private lazy val cache = {
     val conf = SparkEnv.get.conf
-    val capacity = conf.getInt("spark.sql.kafkaConsumerCache.capacity", 64)
+    val capacity = conf.get(CONSUMER_CACHE_CAPACITY)
     new ju.LinkedHashMap[CacheKey, InternalKafkaConsumer](capacity, 0.75f, true) {
       override def removeEldestEntry(
         entry: ju.Map.Entry[CacheKey, InternalKafkaConsumer]): Boolean = {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 76c7b5deb35e..32d5f925c765 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -64,10 +64,11 @@ private[kafka010] class KafkaMicroBatchStream(
     failOnDataLoss: Boolean) extends RateControlMicroBatchStream with Logging {
 
   private val pollTimeoutMs = options.getLong(
-    "kafkaConsumer.pollTimeoutMs",
+    KafkaSourceProvider.CONSUMER_POLL_TIMEOUT,
     SparkEnv.get.conf.get(NETWORK_TIMEOUT) * 1000L)
 
-  private val maxOffsetsPerTrigger = Option(options.get("maxOffsetsPerTrigger")).map(_.toLong)
+  private val maxOffsetsPerTrigger = Option(options.get(KafkaSourceProvider.MAX_OFFSET_PER_TRIGGER))
+    .map(_.toLong)
 
   private val rangeCalculator = KafkaOffsetRangeCalculator(options)
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
index 1af8404b89c6..c188b4c70ffb 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
@@ -92,7 +92,8 @@ private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int
 private[kafka010] object KafkaOffsetRangeCalculator {
 
   def apply(options: CaseInsensitiveStringMap): KafkaOffsetRangeCalculator = {
-    val optionalValue = Option(options.get("minPartitions")).map(_.toInt)
+    val optionalValue = Option(options.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY))
+      .map(_.toInt)
     new KafkaOffsetRangeCalculator(optionalValue)
   }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index c64b0706f62b..429cbeba8c42 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -89,10 +89,10 @@ private[kafka010] class KafkaOffsetReader(
   }
 
   private val maxOffsetFetchAttempts =
-    readerOptions.getOrElse("fetchOffset.numRetries", "3").toInt
+    readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_NUM_RETRY, "3").toInt
 
   private val offsetFetchAttemptIntervalMs =
-    readerOptions.getOrElse("fetchOffset.retryIntervalMs", "1000").toLong
+    readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong
 
   private def nextGroupId(): String = {
     groupId = driverGroupIdPrefix + "-" + nextId
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 48cc089e77d5..d1ff96a82910 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -47,7 +47,7 @@ private[kafka010] class KafkaRelation(
     "Ending offset not allowed to be set to earliest offsets.")
 
   private val pollTimeoutMs = sourceOptions.getOrElse(
-    "kafkaConsumer.pollTimeoutMs",
+    KafkaSourceProvider.CONSUMER_POLL_TIMEOUT,
     (sqlContext.sparkContext.conf.get(NETWORK_TIMEOUT) * 1000L).toString
   ).toLong
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index fa93e8f8c461..037c01bbedd2 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -85,12 +85,12 @@ private[kafka010] class KafkaSource(
   private val sc = sqlContext.sparkContext
 
   private val pollTimeoutMs = sourceOptions.getOrElse(
-    "kafkaConsumer.pollTimeoutMs",
+    KafkaSourceProvider.CONSUMER_POLL_TIMEOUT,
     (sc.conf.get(NETWORK_TIMEOUT) * 1000L).toString
   ).toLong
 
   private val maxOffsetsPerTrigger =
-    sourceOptions.get("maxOffsetsPerTrigger").map(_.toLong)
+    sourceOptions.get(KafkaSourceProvider.MAX_OFFSET_PER_TRIGGER).map(_.toLong)
 
   /**
    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index c27382de2477..dbd3310d48a8 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -185,11 +185,11 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
 
   private def strategy(caseInsensitiveParams: Map[String, String]) =
       caseInsensitiveParams.find(x => STRATEGY_OPTION_KEYS.contains(x._1)).get match {
-    case ("assign", value) =>
+    case (ASSIGN, value) =>
       AssignStrategy(JsonUtils.partitions(value))
-    case ("subscribe", value) =>
+    case (SUBSCRIBE, value) =>
       SubscribeStrategy(value.split(",").map(_.trim()).filter(_.nonEmpty))
-    case ("subscribepattern", value) =>
+    case (SUBSCRIBE_PATTERN, value) =>
       SubscribePatternStrategy(value.trim())
     case _ =>
       // Should never reach here as we are already matching on
@@ -217,22 +217,22 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     }
 
     val strategy = caseInsensitiveParams.find(x => STRATEGY_OPTION_KEYS.contains(x._1)).get match {
-      case ("assign", value) =>
+      case (ASSIGN, value) =>
         if (!value.trim.startsWith("{")) {
           throw new IllegalArgumentException(
             "No topicpartitions to assign as specified value for option " +
               s"'assign' is '$value'")
         }
 
-      case ("subscribe", value) =>
+      case (SUBSCRIBE, value) =>
         val topics = value.split(",").map(_.trim).filter(_.nonEmpty)
         if (topics.isEmpty) {
           throw new IllegalArgumentException(
             "No topics to subscribe to as specified value for option " +
               s"'subscribe' is '$value'")
         }
-      case ("subscribepattern", value) =>
-        val pattern = caseInsensitiveParams("subscribepattern").trim()
+      case (SUBSCRIBE_PATTERN, value) =>
+        val pattern = caseInsensitiveParams(SUBSCRIBE_PATTERN).trim()
         if (pattern.isEmpty) {
           throw new IllegalArgumentException(
             "Pattern to subscribe is empty as specified value for option " +
@@ -348,7 +348,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     validateGeneralOptions(caseInsensitiveParams)
 
     // Don't want to throw an error, but at least log a warning.
-    if (caseInsensitiveParams.get("maxoffsetspertrigger").isDefined) {
+    if (caseInsensitiveParams.get(MAX_OFFSET_PER_TRIGGER.toLowerCase(Locale.ROOT)).isDefined) {
       logWarning("maxOffsetsPerTrigger option ignored in batch queries")
     }
   }
@@ -458,11 +458,18 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
 }
 
 private[kafka010] object KafkaSourceProvider extends Logging {
-  private val STRATEGY_OPTION_KEYS = Set("subscribe", "subscribepattern", "assign")
+  private val ASSIGN = "assign"
+  private val SUBSCRIBE_PATTERN = "subscribepattern"
+  private val SUBSCRIBE = "subscribe"
+  private val STRATEGY_OPTION_KEYS = Set(SUBSCRIBE, SUBSCRIBE_PATTERN, ASSIGN)
   private[kafka010] val STARTING_OFFSETS_OPTION_KEY = "startingoffsets"
   private[kafka010] val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
   private val FAIL_ON_DATA_LOSS_OPTION_KEY = "failondataloss"
-  private val MIN_PARTITIONS_OPTION_KEY = "minpartitions"
+  private[kafka010] val MIN_PARTITIONS_OPTION_KEY = "minpartitions"
+  private[kafka010] val MAX_OFFSET_PER_TRIGGER = "maxOffsetsPerTrigger"
+  private[kafka010] val FETCH_OFFSET_NUM_RETRY = "fetchOffset.numRetries"
+  private[kafka010] val FETCH_OFFSET_RETRY_INTERVAL_MS = "fetchOffset.retryIntervalMs"
+  private[kafka010] val CONSUMER_POLL_TIMEOUT = "kafkaConsumer.pollTimeoutMs"
   private val GROUP_ID_PREFIX = "groupidprefix"
 
   val TOPIC_OPTION_KEY = "topic"
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
index 43acd6a8d947..115ec44b6c01 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
@@ -16,9 +16,25 @@
  */
 package org.apache.spark.sql
 
+import java.util.concurrent.TimeUnit
+
 import org.apache.kafka.common.TopicPartition
 
+import org.apache.spark.internal.config.ConfigBuilder
+
 package object kafka010 {   // scalastyle:ignore
   // ^^ scalastyle:ignore is for ignoring warnings about digits in package name
   type PartitionOffsetMap = Map[TopicPartition, Long]
+
+  private[kafka010] val PRODUCER_CACHE_TIMEOUT =
+    ConfigBuilder("spark.kafka.producer.cache.timeout")
+      .doc("The expire time to remove the unused producers.")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("10m")
+
+  private[kafka010] val CONSUMER_CACHE_CAPACITY =
+    ConfigBuilder("spark.sql.kafkaConsumerCache.capacity")
+      .doc("The size of consumers cached.")
+      .intConf
+      .createWithDefault(64)
 }

From be6d39c379c87e9a36789b397510425061d36c25 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 13 May 2019 14:16:11 +0800
Subject: [PATCH 1044/1072] [SPARK-27668][SQL] File source V2: support
 reporting statistics

## What changes were proposed in this pull request?

In File source V1, the statistics of `HadoopFsRelation` is `compressionFactor * sizeInBytesOfAllFiles`.
To follow it, we can implement the interface SupportsReportStatistics in FileScan and report the same statistics.

## How was this patch tested?

Unit test

Closes #24571 from gengliangwang/stats.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/v2/FileScan.scala   | 18 +++-
 .../spark/sql/FileBasedDataSourceSuite.scala  | 62 +++++++++++++-
 .../columnar/InMemoryColumnarQuerySuite.scala | 73 ++++++++--------
 .../datasources/HadoopFsRelationSuite.scala   | 83 -------------------
 4 files changed, 114 insertions(+), 122 deletions(-)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index b2b45e8ca4d0..84a1274ea4f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
-import java.util.Locale
+import java.util.{Locale, OptionalLong}
 
 import org.apache.hadoop.fs.Path
 
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, Scan}
+import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -34,7 +34,7 @@ abstract class FileScan(
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
     readDataSchema: StructType,
-    readPartitionSchema: StructType) extends Scan with Batch {
+    readPartitionSchema: StructType) extends Scan with Batch with SupportsReportStatistics {
   /**
    * Returns whether a file with `path` could be split or not.
    */
@@ -81,6 +81,18 @@ abstract class FileScan(
     partitions.toArray
   }
 
+  override def estimateStatistics(): Statistics = {
+    new Statistics {
+      override def sizeInBytes(): OptionalLong = {
+        val compressionFactor = sparkSession.sessionState.conf.fileCompressionFactor
+        val size = (compressionFactor * fileIndex.sizeInBytes).toLong
+        OptionalLong.of(size)
+      }
+
+      override def numRows(): OptionalLong = OptionalLong.empty()
+    }
+  }
+
   override def toBatch: Batch = this
 
   override def readSchema(): StructType =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 3ab5bf26157a..dd11b5c50398 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.io.{File, FileNotFoundException}
+import java.io.{File, FilenameFilter, FileNotFoundException}
 import java.util.Locale
 
 import scala.collection.mutable
@@ -28,6 +28,7 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.TestingUDT.{IntervalData, IntervalUDT, NullData, NullUDT}
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
@@ -584,6 +585,65 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo
       }
     }
   }
+
+  test("sizeInBytes should be the total size of all files") {
+    Seq("orc", "").foreach { useV1SourceReaderList =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> useV1SourceReaderList) {
+        withTempDir { dir =>
+          dir.delete()
+          spark.range(1000).write.orc(dir.toString)
+          // ignore hidden files
+          val allFiles = dir.listFiles(new FilenameFilter {
+            override def accept(dir: File, name: String): Boolean = {
+              !name.startsWith(".") && !name.startsWith("_")
+            }
+          })
+          val totalSize = allFiles.map(_.length()).sum
+          val df = spark.read.orc(dir.toString)
+          assert(df.queryExecution.logical.stats.sizeInBytes === BigInt(totalSize))
+        }
+      }
+    }
+  }
+
+  test("SPARK-22790,SPARK-27668: spark.sql.sources.compressionFactor takes effect") {
+    Seq(1.0, 0.5).foreach { compressionFactor =>
+      withSQLConf("spark.sql.sources.fileCompressionFactor" -> compressionFactor.toString,
+        "spark.sql.autoBroadcastJoinThreshold" -> "250") {
+        withTempPath { workDir =>
+          // the file size is 486 bytes
+          val workDirPath = workDir.getAbsolutePath
+          val data1 = Seq(100, 200, 300, 400).toDF("count")
+          data1.write.orc(workDirPath + "/data1")
+          val df1FromFile = spark.read.orc(workDirPath + "/data1")
+          val data2 = Seq(100, 200, 300, 400).toDF("count")
+          data2.write.orc(workDirPath + "/data2")
+          val df2FromFile = spark.read.orc(workDirPath + "/data2")
+          val joinedDF = df1FromFile.join(df2FromFile, Seq("count"))
+          if (compressionFactor == 0.5) {
+            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
+              case bJoin: BroadcastHashJoinExec => bJoin
+            }
+            assert(bJoinExec.nonEmpty)
+            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
+              case smJoin: SortMergeJoinExec => smJoin
+            }
+            assert(smJoinExec.isEmpty)
+          } else {
+            // compressionFactor is 1.0
+            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
+              case bJoin: BroadcastHashJoinExec => bJoin
+            }
+            assert(bJoinExec.isEmpty)
+            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
+              case smJoin: SortMergeJoinExec => smJoin
+            }
+            assert(smJoinExec.nonEmpty)
+          }
+        }
+      }
+    }
+  }
 }
 
 object TestingUDT {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index e40528fbcb8b..10a105c0d440 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -505,42 +505,45 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   }
 
   test("SPARK-22673: InMemoryRelation should utilize existing stats of the plan to be cached") {
-    // This test case depends on the size of parquet in statistics.
-    withSQLConf(
+    Seq("orc", "").foreach { useV1SourceReaderList =>
+      // This test case depends on the size of ORC in statistics.
+      withSQLConf(
         SQLConf.CBO_ENABLED.key -> "true",
-        SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "parquet") {
-      withTempPath { workDir =>
-        withTable("table1") {
-          val workDirPath = workDir.getAbsolutePath
-          val data = Seq(100, 200, 300, 400).toDF("count")
-          data.write.parquet(workDirPath)
-          val dfFromFile = spark.read.parquet(workDirPath).cache()
-          val inMemoryRelation = dfFromFile.queryExecution.optimizedPlan.collect {
-            case plan: InMemoryRelation => plan
-          }.head
-          // InMemoryRelation's stats is file size before the underlying RDD is materialized
-          assert(inMemoryRelation.computeStats().sizeInBytes === 868)
-
-          // InMemoryRelation's stats is updated after materializing RDD
-          dfFromFile.collect()
-          assert(inMemoryRelation.computeStats().sizeInBytes === 16)
-
-          // test of catalog table
-          val dfFromTable = spark.catalog.createTable("table1", workDirPath).cache()
-          val inMemoryRelation2 = dfFromTable.queryExecution.optimizedPlan.
-            collect { case plan: InMemoryRelation => plan }.head
-
-          // Even CBO enabled, InMemoryRelation's stats keeps as the file size before table's stats
-          // is calculated
-          assert(inMemoryRelation2.computeStats().sizeInBytes === 868)
-
-          // InMemoryRelation's stats should be updated after calculating stats of the table
-          // clear cache to simulate a fresh environment
-          dfFromTable.unpersist(blocking = true)
-          spark.sql("ANALYZE TABLE table1 COMPUTE STATISTICS")
-          val inMemoryRelation3 = spark.read.table("table1").cache().queryExecution.optimizedPlan.
-            collect { case plan: InMemoryRelation => plan }.head
-          assert(inMemoryRelation3.computeStats().sizeInBytes === 48)
+        SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "orc",
+        SQLConf.USE_V1_SOURCE_READER_LIST.key -> useV1SourceReaderList) {
+        withTempPath { workDir =>
+          withTable("table1") {
+            val workDirPath = workDir.getAbsolutePath
+            val data = Seq(100, 200, 300, 400).toDF("count")
+            data.write.orc(workDirPath)
+            val dfFromFile = spark.read.orc(workDirPath).cache()
+            val inMemoryRelation = dfFromFile.queryExecution.optimizedPlan.collect {
+              case plan: InMemoryRelation => plan
+            }.head
+            // InMemoryRelation's stats is file size before the underlying RDD is materialized
+            assert(inMemoryRelation.computeStats().sizeInBytes === 486)
+
+            // InMemoryRelation's stats is updated after materializing RDD
+            dfFromFile.collect()
+            assert(inMemoryRelation.computeStats().sizeInBytes === 16)
+
+            // test of catalog table
+            val dfFromTable = spark.catalog.createTable("table1", workDirPath).cache()
+            val inMemoryRelation2 = dfFromTable.queryExecution.optimizedPlan.
+              collect { case plan: InMemoryRelation => plan }.head
+
+            // Even CBO enabled, InMemoryRelation's stats keeps as the file size before table's
+            // stats is calculated
+            assert(inMemoryRelation2.computeStats().sizeInBytes === 486)
+
+            // InMemoryRelation's stats should be updated after calculating stats of the table
+            // clear cache to simulate a fresh environment
+            dfFromTable.unpersist(blocking = true)
+            spark.sql("ANALYZE TABLE table1 COMPUTE STATISTICS")
+            val inMemoryRelation3 = spark.read.table("table1").cache().queryExecution.optimizedPlan.
+              collect { case plan: InMemoryRelation => plan }.head
+            assert(inMemoryRelation3.computeStats().sizeInBytes === 48)
+          }
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala
deleted file mode 100644
index 6e08ee3c4ba3..000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources
-
-import java.io.{File, FilenameFilter}
-
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
-import org.apache.spark.sql.test.SharedSQLContext
-
-class HadoopFsRelationSuite extends QueryTest with SharedSQLContext {
-
-  test("sizeInBytes should be the total size of all files") {
-    withTempDir{ dir =>
-      dir.delete()
-      spark.range(1000).write.parquet(dir.toString)
-      // ignore hidden files
-      val allFiles = dir.listFiles(new FilenameFilter {
-        override def accept(dir: File, name: String): Boolean = {
-          !name.startsWith(".") && !name.startsWith("_")
-        }
-      })
-      val totalSize = allFiles.map(_.length()).sum
-      val df = spark.read.parquet(dir.toString)
-      assert(df.queryExecution.logical.stats.sizeInBytes === BigInt(totalSize))
-    }
-  }
-
-  test("SPARK-22790: spark.sql.sources.compressionFactor takes effect") {
-    import testImplicits._
-    Seq(1.0, 0.5).foreach { compressionFactor =>
-      withSQLConf("spark.sql.sources.fileCompressionFactor" -> compressionFactor.toString,
-        "spark.sql.autoBroadcastJoinThreshold" -> "434") {
-        withTempPath { workDir =>
-          // the file size is 740 bytes
-          val workDirPath = workDir.getAbsolutePath
-          val data1 = Seq(100, 200, 300, 400).toDF("count")
-          data1.write.parquet(workDirPath + "/data1")
-          val df1FromFile = spark.read.parquet(workDirPath + "/data1")
-          val data2 = Seq(100, 200, 300, 400).toDF("count")
-          data2.write.parquet(workDirPath + "/data2")
-          val df2FromFile = spark.read.parquet(workDirPath + "/data2")
-          val joinedDF = df1FromFile.join(df2FromFile, Seq("count"))
-          if (compressionFactor == 0.5) {
-            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
-              case bJoin: BroadcastHashJoinExec => bJoin
-            }
-            assert(bJoinExec.nonEmpty)
-            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
-              case smJoin: SortMergeJoinExec => smJoin
-            }
-            assert(smJoinExec.isEmpty)
-          } else {
-            // compressionFactor is 1.0
-            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
-              case bJoin: BroadcastHashJoinExec => bJoin
-            }
-            assert(bJoinExec.isEmpty)
-            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
-              case smJoin: SortMergeJoinExec => smJoin
-            }
-            assert(smJoinExec.nonEmpty)
-          }
-        }
-      }
-    }
-  }
-}

From 2bc42ad46efcf4aa83b7e49e1ad74d9edbcc7bb0 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 13 May 2019 14:55:17 +0800
Subject: [PATCH 1045/1072] [MINOR][REPL] Remove dead code of Spark Repl in
 Scala 2.11

## What changes were proposed in this pull request?

Remove the dead code of Spark Repl in Scala 2.11. We only support Scala 2.12

## How was this patch tested?
N/A

Closes #24588 from gatorsmile/removeDeadCode.

Authored-by: gatorsmile <gatorsmile@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/repl/SparkExprTyper.scala    |  74 ------
 .../spark/repl/SparkILoopInterpreter.scala    | 239 ------------------
 2 files changed, 313 deletions(-)
 delete mode 100644 repl/src/main/scala-2.11/org/apache/spark/repl/SparkExprTyper.scala
 delete mode 100644 repl/src/main/scala-2.11/org/apache/spark/repl/SparkILoopInterpreter.scala

diff --git a/repl/src/main/scala-2.11/org/apache/spark/repl/SparkExprTyper.scala b/repl/src/main/scala-2.11/org/apache/spark/repl/SparkExprTyper.scala
deleted file mode 100644
index 724ce9af49f7..000000000000
--- a/repl/src/main/scala-2.11/org/apache/spark/repl/SparkExprTyper.scala
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc.interpreter.{ExprTyper, IR}
-
-trait SparkExprTyper extends ExprTyper {
-
-  import repl._
-  import global.{reporter => _, Import => _, _}
-  import naming.freshInternalVarName
-
-  def doInterpret(code: String): IR.Result = {
-    // interpret/interpretSynthetic may change the phase,
-    // which would have unintended effects on types.
-    val savedPhase = phase
-    try interpretSynthetic(code) finally phase = savedPhase
-  }
-
-  override def symbolOfLine(code: String): Symbol = {
-    def asExpr(): Symbol = {
-      val name = freshInternalVarName()
-      // Typing it with a lazy val would give us the right type, but runs
-      // into compiler bugs with things like existentials, so we compile it
-      // behind a def and strip the NullaryMethodType which wraps the expr.
-      val line = "def " + name + " = " + code
-
-      doInterpret(line) match {
-        case IR.Success =>
-          val sym0 = symbolOfTerm(name)
-          // drop NullaryMethodType
-          sym0.cloneSymbol setInfo exitingTyper(sym0.tpe_*.finalResultType)
-        case _ => NoSymbol
-      }
-    }
-
-    def asDefn(): Symbol = {
-      val old = repl.definedSymbolList.toSet
-
-      doInterpret(code) match {
-        case IR.Success =>
-          repl.definedSymbolList filterNot old match {
-            case Nil => NoSymbol
-            case sym :: Nil => sym
-            case syms => NoSymbol.newOverloaded(NoPrefix, syms)
-          }
-        case _ => NoSymbol
-      }
-    }
-
-    def asError(): Symbol = {
-      doInterpret(code)
-      NoSymbol
-    }
-
-    beSilentDuring(asExpr()) orElse beSilentDuring(asDefn()) orElse asError()
-  }
-
-}
diff --git a/repl/src/main/scala-2.11/org/apache/spark/repl/SparkILoopInterpreter.scala b/repl/src/main/scala-2.11/org/apache/spark/repl/SparkILoopInterpreter.scala
deleted file mode 100644
index e736607a9a6b..000000000000
--- a/repl/src/main/scala-2.11/org/apache/spark/repl/SparkILoopInterpreter.scala
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.repl
-
-import scala.collection.mutable
-import scala.tools.nsc.Settings
-import scala.tools.nsc.interpreter._
-
-class SparkILoopInterpreter(settings: Settings, out: JPrintWriter) extends IMain(settings, out) {
-  self =>
-
-  override lazy val memberHandlers = new {
-    val intp: self.type = self
-  } with MemberHandlers {
-    import intp.global._
-
-    override def chooseHandler(member: intp.global.Tree): MemberHandler = member match {
-      case member: Import => new SparkImportHandler(member)
-      case _ => super.chooseHandler(member)
-    }
-
-    class SparkImportHandler(imp: Import) extends ImportHandler(imp: Import) {
-
-      override def targetType: Type = intp.global.rootMirror.getModuleIfDefined("" + expr) match {
-        case NoSymbol => intp.typeOfExpression("" + expr)
-        case sym => sym.tpe
-      }
-
-      private def safeIndexOf(name: Name, s: String): Int = fixIndexOf(name, pos(name, s))
-      private def fixIndexOf(name: Name, idx: Int): Int = if (idx == name.length) -1 else idx
-      private def pos(name: Name, s: String): Int = {
-        var i = name.pos(s.charAt(0), 0)
-        val sLen = s.length()
-        if (sLen == 1) return i
-        while (i + sLen <= name.length) {
-          var j = 1
-          while (s.charAt(j) == name.charAt(i + j)) {
-            j += 1
-            if (j == sLen) return i
-          }
-          i = name.pos(s.charAt(0), i + 1)
-        }
-        name.length
-      }
-
-      private def isFlattenedSymbol(sym: Symbol): Boolean =
-        sym.owner.isPackageClass &&
-          sym.name.containsName(nme.NAME_JOIN_STRING) &&
-          sym.owner.info.member(sym.name.take(
-            safeIndexOf(sym.name, nme.NAME_JOIN_STRING))) != NoSymbol
-
-      private def importableTargetMembers =
-        importableMembers(exitingTyper(targetType)).filterNot(isFlattenedSymbol).toList
-
-      def isIndividualImport(s: ImportSelector): Boolean =
-        s.name != nme.WILDCARD && s.rename != nme.WILDCARD
-      def isWildcardImport(s: ImportSelector): Boolean =
-        s.name == nme.WILDCARD
-
-      // non-wildcard imports
-      private def individualSelectors = selectors filter isIndividualImport
-
-      override val importsWildcard: Boolean = selectors exists isWildcardImport
-
-      lazy val importableSymbolsWithRenames: List[(Symbol, Name)] = {
-        val selectorRenameMap =
-          individualSelectors.flatMap(x => x.name.bothNames zip x.rename.bothNames).toMap
-        importableTargetMembers flatMap (m => selectorRenameMap.get(m.name) map (m -> _))
-      }
-
-      override lazy val individualSymbols: List[Symbol] = importableSymbolsWithRenames map (_._1)
-      override lazy val wildcardSymbols: List[Symbol] =
-        if (importsWildcard) importableTargetMembers else Nil
-
-    }
-
-  }
-
-  object expressionTyper extends {
-    val repl: SparkILoopInterpreter.this.type = self
-  } with SparkExprTyper { }
-
-  override def symbolOfLine(code: String): global.Symbol =
-    expressionTyper.symbolOfLine(code)
-
-  override def typeOfExpression(expr: String, silent: Boolean): global.Type =
-    expressionTyper.typeOfExpression(expr, silent)
-
-
-  import global.Name
-  override def importsCode(wanted: Set[Name], wrapper: Request#Wrapper,
-                           definesClass: Boolean, generousImports: Boolean): ComputedImports = {
-
-    import global._
-    import definitions.{ ObjectClass, ScalaPackage, JavaLangPackage, PredefModule }
-    import memberHandlers._
-
-    val header, code, trailingBraces, accessPath = new StringBuilder
-    val currentImps = mutable.HashSet[Name]()
-    // only emit predef import header if name not resolved in history, loosely
-    var predefEscapes = false
-
-    /**
-     * Narrow down the list of requests from which imports
-     * should be taken.  Removes requests which cannot contribute
-     * useful imports for the specified set of wanted names.
-     */
-    case class ReqAndHandler(req: Request, handler: MemberHandler)
-
-    def reqsToUse: List[ReqAndHandler] = {
-      /**
-       * Loop through a list of MemberHandlers and select which ones to keep.
-       * 'wanted' is the set of names that need to be imported.
-       */
-      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
-        // Single symbol imports might be implicits! See bug #1752.  Rather than
-        // try to finesse this, we will mimic all imports for now.
-        def keepHandler(handler: MemberHandler) = handler match {
-          // While defining classes in class based mode - implicits are not needed.
-          case h: ImportHandler if isClassBased && definesClass =>
-            h.importedNames.exists(x => wanted.contains(x))
-          case _: ImportHandler => true
-          case x if generousImports => x.definesImplicit ||
-            (x.definedNames exists (d => wanted.exists(w => d.startsWith(w))))
-          case x => x.definesImplicit ||
-            (x.definedNames exists wanted)
-        }
-
-        reqs match {
-          case Nil =>
-            predefEscapes = wanted contains PredefModule.name ; Nil
-          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
-          case rh :: rest =>
-            import rh.handler._
-            val augment = rh match {
-              case ReqAndHandler(_, _: ImportHandler) => referencedNames
-              case _ => Nil
-            }
-            val newWanted = wanted ++ augment -- definedNames -- importedNames
-            rh :: select(rest, newWanted)
-        }
-      }
-
-      /** Flatten the handlers out and pair each with the original request */
-      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
-    }
-
-    // add code for a new object to hold some imports
-    def addWrapper() {
-      import nme.{ INTERPRETER_IMPORT_WRAPPER => iw }
-      code append (wrapper.prewrap format iw)
-      trailingBraces append wrapper.postwrap
-      accessPath append s".$iw"
-      currentImps.clear()
-    }
-
-    def maybeWrap(names: Name*) = if (names exists currentImps) addWrapper()
-
-    def wrapBeforeAndAfter[T](op: => T): T = {
-      addWrapper()
-      try op finally addWrapper()
-    }
-
-    // imports from Predef are relocated to the template header to allow hiding.
-    def checkHeader(h: ImportHandler) = h.referencedNames contains PredefModule.name
-
-    // loop through previous requests, adding imports for each one
-    wrapBeforeAndAfter {
-      // Reusing a single temporary value when import from a line with multiple definitions.
-      val tempValLines = mutable.Set[Int]()
-      for (ReqAndHandler(req, handler) <- reqsToUse) {
-        val objName = req.lineRep.readPathInstance
-        handler match {
-          case h: ImportHandler if checkHeader(h) =>
-            header.clear()
-            header append f"${h.member}%n"
-          // If the user entered an import, then just use it; add an import wrapping
-          // level if the import might conflict with some other import
-          case x: ImportHandler if x.importsWildcard =>
-            wrapBeforeAndAfter(code append (x.member + "\n"))
-          case x: ImportHandler =>
-            maybeWrap(x.importedNames: _*)
-            code append (x.member + "\n")
-            currentImps ++= x.importedNames
-
-          case x if isClassBased =>
-            for (sym <- x.definedSymbols) {
-              maybeWrap(sym.name)
-              x match {
-                case _: ClassHandler =>
-                  code.append(s"import ${objName}${req.accessPath}.`${sym.name}`\n")
-                case _ =>
-                  val valName = s"${req.lineRep.packageName}${req.lineRep.readName}"
-                  if (!tempValLines.contains(req.lineRep.lineId)) {
-                    code.append(s"val $valName: ${objName}.type = $objName\n")
-                    tempValLines += req.lineRep.lineId
-                  }
-                  code.append(s"import ${valName}${req.accessPath}.`${sym.name}`\n")
-              }
-              currentImps += sym.name
-            }
-          // For other requests, import each defined name.
-          // import them explicitly instead of with _, so that
-          // ambiguity errors will not be generated. Also, quote
-          // the name of the variable, so that we don't need to
-          // handle quoting keywords separately.
-          case x =>
-            for (sym <- x.definedSymbols) {
-              maybeWrap(sym.name)
-              code append s"import ${x.path}\n"
-              currentImps += sym.name
-            }
-        }
-      }
-    }
-
-    val computedHeader = if (predefEscapes) header.toString else ""
-    ComputedImports(computedHeader, code.toString, trailingBraces.toString, accessPath.toString)
-  }
-
-  private def allReqAndHandlers =
-    prevRequestList flatMap (req => req.handlers map (req -> _))
-
-}

From 126310ca68f2f248ea8b312c4637eccaba2fdc2b Mon Sep 17 00:00:00 2001
From: zhoukang <zhoukang199191@gmail.com>
Date: Mon, 13 May 2019 20:40:21 +0900
Subject: [PATCH 1046/1072] [SPARK-26601][SQL] Make broadcast-exchange thread
 pool configurable

## What changes were proposed in this pull request?

Currently,thread number of broadcast-exchange thread pool is fixed and keepAliveSeconds is also fixed as 60s.

```
object BroadcastExchangeExec {
  private[execution] val executionContext = ExecutionContext.fromExecutorService(
    ThreadUtils.newDaemonCachedThreadPool("broadcast-exchange", 128))
}

 /**
   * Create a cached thread pool whose max number of threads is `maxThreadNumber`. Thread names
   * are formatted as prefix-ID, where ID is a unique, sequentially assigned integer.
   */
  def newDaemonCachedThreadPool(
      prefix: String, maxThreadNumber: Int, keepAliveSeconds: Int = 60): ThreadPoolExecutor = {
    val threadFactory = namedThreadFactory(prefix)
    val threadPool = new ThreadPoolExecutor(
      maxThreadNumber, // corePoolSize: the max number of threads to create before queuing the tasks
      maxThreadNumber, // maximumPoolSize: because we use LinkedBlockingDeque, this one is not used
      keepAliveSeconds,
      TimeUnit.SECONDS,
      new LinkedBlockingQueue[Runnable],
      threadFactory)
    threadPool.allowCoreThreadTimeOut(true)
    threadPool
  }
```

But some times, if the Thead object do not GC quickly it may caused server(driver) OOM. In such case,we need to make this thread pool configurable.
A case has described in https://issues.apache.org/jira/browse/SPARK-26601

## How was this patch tested?
UT

Closes #23670 from caneGuy/zhoukang/make-broadcat-config.

Authored-by: zhoukang <zhoukang199191@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/internal/StaticSQLConf.scala   | 13 +++++++++++++
 .../execution/exchange/BroadcastExchangeExec.scala  |  5 +++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index b9d3f256533c..3fda4c806a0b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -132,6 +132,19 @@ object StaticSQLConf {
       .intConf
       .createWithDefault(1000)
 
+  val BROADCAST_EXCHANGE_MAX_THREAD_THRESHOLD =
+    buildStaticConf("spark.sql.broadcastExchange.maxThreadThreshold")
+      .internal()
+      .doc("The maximum degree of parallelism to fetch and broadcast the table. " +
+        "If we encounter memory issue like frequently full GC or OOM when broadcast table " +
+        "we can decrease this number in order to reduce memory usage. " +
+        "Notice the number should be carefully chosen since decreasing parallelism might " +
+        "cause longer waiting for other broadcasting. Also, increasing parallelism may " +
+        "cause memory problem.")
+      .intConf
+      .checkValue(thres => thres > 0 && thres <= 128, "The threshold must be in [0,128].")
+      .createWithDefault(128)
+
   val SQL_EVENT_TRUNCATE_LENGTH = buildStaticConf("spark.sql.event.truncate.length")
     .doc("Threshold of SQL length beyond which it will be truncated before adding to " +
       "event. Defaults to no truncation. If set to 0, callsite will be logged instead.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index b9972b8cf017..aa0dd1d62840 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, BroadcastPar
 import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
 import org.apache.spark.sql.execution.joins.HashedRelation
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.util.{SparkFatalException, ThreadUtils}
 
 /**
@@ -157,5 +157,6 @@ case class BroadcastExchangeExec(
 
 object BroadcastExchangeExec {
   private[execution] val executionContext = ExecutionContext.fromExecutorService(
-    ThreadUtils.newDaemonCachedThreadPool("broadcast-exchange", 128))
+      ThreadUtils.newDaemonCachedThreadPool("broadcast-exchange",
+        SQLConf.get.getConf(StaticSQLConf.BROADCAST_EXCHANGE_MAX_THREAD_THRESHOLD)))
 }

From d169b0aac369d373968a6c66ee43440a2ad751a5 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 13 May 2019 22:37:34 +0800
Subject: [PATCH 1047/1072] [SPARK-27653][SQL] Add max_by() and min_by() SQL
 aggregate functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

This PR goes to add `max_by()` and `min_by()` SQL aggregate functions.

Quoting from the [Presto docs](https://prestodb.github.io/docs/current/functions/aggregate.html#max_by)

> max_by(x, y) → [same as x]
> Returns the value of x associated with the maximum value of y over all input values.

`min_by()` works similarly.

## How was this patch tested?

Added tests.

Closes #24557 from viirya/SPARK-27653.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |   2 +
 .../expressions/aggregate/MaxByAndMinBy.scala | 128 ++++++++++++++++++
 .../spark/sql/DataFrameAggregateSuite.scala   | 112 +++++++++++++++
 3 files changed, 242 insertions(+)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index d725ef5da06e..986662c95114 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -289,8 +289,10 @@ object FunctionRegistry {
     expression[Last]("last"),
     expression[Last]("last_value"),
     expression[Max]("max"),
+    expression[MaxBy]("max_by"),
     expression[Average]("mean"),
     expression[Min]("min"),
+    expression[MinBy]("min_by"),
     expression[Percentile]("percentile"),
     expression[Skewness]("skewness"),
     expression[ApproximatePercentile]("percentile_approx"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
new file mode 100644
index 000000000000..c7fdb15130c4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.types._
+
+/**
+ * The shared abstract superclass for `MaxBy` and `MinBy` SQL aggregate functions.
+ */
+abstract class MaxMinBy extends DeclarativeAggregate {
+
+  def valueExpr: Expression
+  def orderingExpr: Expression
+
+  protected def funcName: String
+  // The predicate compares two ordering values.
+  protected def predicate(oldExpr: Expression, newExpr: Expression): Expression
+  // The arithmetic expression returns greatest/least value of all parameters.
+  // Used to pick up updated ordering value.
+  protected def orderingUpdater(oldExpr: Expression, newExpr: Expression): Expression
+
+  override def children: Seq[Expression] = valueExpr :: orderingExpr :: Nil
+
+  override def nullable: Boolean = true
+
+  // Return data type.
+  override def dataType: DataType = valueExpr.dataType
+
+  override def checkInputDataTypes(): TypeCheckResult =
+    TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function $funcName")
+
+  // The attributes used to keep extremum (max or min) and associated aggregated values.
+  private lazy val extremumOrdering =
+    AttributeReference("extremumOrdering", orderingExpr.dataType)()
+  private lazy val valueWithExtremumOrdering =
+    AttributeReference("valueWithExtremumOrdering", valueExpr.dataType)()
+
+  override lazy val aggBufferAttributes: Seq[AttributeReference] =
+    valueWithExtremumOrdering :: extremumOrdering :: Nil
+
+  private lazy val nullValue = Literal.create(null, valueExpr.dataType)
+  private lazy val nullOrdering = Literal.create(null, orderingExpr.dataType)
+
+  override lazy val initialValues: Seq[Literal] = Seq(
+    /* valueWithExtremumOrdering = */ nullValue,
+    /* extremumOrdering = */ nullOrdering
+  )
+
+  override lazy val updateExpressions: Seq[Expression] = Seq(
+    /* valueWithExtremumOrdering = */
+    CaseWhen(
+      (extremumOrdering.isNull && orderingExpr.isNull, nullValue) ::
+        (extremumOrdering.isNull, valueExpr) ::
+        (orderingExpr.isNull, valueWithExtremumOrdering) :: Nil,
+      If(predicate(extremumOrdering, orderingExpr), valueWithExtremumOrdering, valueExpr)
+    ),
+    /* extremumOrdering = */ orderingUpdater(extremumOrdering, orderingExpr)
+  )
+
+  override lazy val mergeExpressions: Seq[Expression] = Seq(
+    /* valueWithExtremumOrdering = */
+    CaseWhen(
+      (extremumOrdering.left.isNull && extremumOrdering.right.isNull, nullValue) ::
+        (extremumOrdering.left.isNull, valueWithExtremumOrdering.right) ::
+        (extremumOrdering.right.isNull, valueWithExtremumOrdering.left) :: Nil,
+      If(predicate(extremumOrdering.left, extremumOrdering.right),
+        valueWithExtremumOrdering.left, valueWithExtremumOrdering.right)
+    ),
+    /* extremumOrdering = */ orderingUpdater(extremumOrdering.left, extremumOrdering.right)
+  )
+
+  override lazy val evaluateExpression: AttributeReference = valueWithExtremumOrdering
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(x, y) - Returns the value of `x` associated with the maximum value of `y`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);
+       b
+  """,
+  since = "3.0")
+case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMinBy {
+  override protected def funcName: String = "max_by"
+
+  override protected def predicate(oldExpr: Expression, newExpr: Expression): Expression =
+    oldExpr > newExpr
+
+  override protected def orderingUpdater(oldExpr: Expression, newExpr: Expression): Expression =
+    greatest(oldExpr, newExpr)
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(x, y) - Returns the value of `x` associated with the minimum value of `y`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);
+       a
+  """,
+  since = "3.0")
+case class MinBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMinBy {
+  override protected def funcName: String = "min_by"
+
+  override protected def predicate(oldExpr: Expression, newExpr: Expression): Expression =
+    oldExpr < newExpr
+
+  override protected def orderingUpdater(oldExpr: Expression, newExpr: Expression): Expression =
+    least(oldExpr, newExpr)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 97aaa1b584af..d89ecc22a7c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -782,4 +782,116 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
     val countAndDistinct = df.select(count("*"), countDistinct("*"))
     checkAnswer(countAndDistinct, Row(100000, 100))
   }
+
+  test("max_by") {
+    val yearOfMaxEarnings =
+      sql("SELECT course, max_by(year, earnings) FROM courseSales GROUP BY course")
+    checkAnswer(yearOfMaxEarnings, Row("dotNET", 2013) :: Row("Java", 2013) :: Nil)
+
+    checkAnswer(
+      sql("SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y)"),
+      Row("b") :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', null)), (('c', 20)) AS tab(x, y)"),
+      Row("c") :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT max_by(x, y) FROM VALUES (('a', null)), (('b', null)), (('c', 20)) AS tab(x, y)"),
+      Row("c") :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', null)) AS tab(x, y)"),
+      Row("b") :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT max_by(x, y) FROM VALUES (('a', null)), (('b', null)) AS tab(x, y)"),
+      Row(null) :: Nil
+    )
+
+    // structs as ordering value.
+    checkAnswer(
+      sql("select max_by(x, y) FROM VALUES (('a', (10, 20))), (('b', (10, 50))), " +
+        "(('c', (10, 60))) AS tab(x, y)"),
+      Row("c") :: Nil
+    )
+
+    checkAnswer(
+      sql("select max_by(x, y) FROM VALUES (('a', (10, 20))), (('b', (10, 50))), " +
+        "(('c', null)) AS tab(x, y)"),
+      Row("b") :: Nil
+    )
+
+    withTempView("tempView") {
+      val dfWithMap = Seq((0, "a"), (1, "b"), (2, "c"))
+        .toDF("x", "y")
+        .select($"x", map($"x", $"y").as("y"))
+        .createOrReplaceTempView("tempView")
+      val error = intercept[AnalysisException] {
+        sql("SELECT max_by(x, y) FROM tempView").show
+      }
+      assert(
+        error.message.contains("function max_by does not support ordering on type map<int,string>"))
+    }
+  }
+
+  test("min_by") {
+    val yearOfMinEarnings =
+      sql("SELECT course, min_by(year, earnings) FROM courseSales GROUP BY course")
+    checkAnswer(yearOfMinEarnings, Row("dotNET", 2012) :: Row("Java", 2012) :: Nil)
+
+    checkAnswer(
+      sql("SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y)"),
+      Row("a") :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', null)), (('c', 20)) AS tab(x, y)"),
+      Row("a") :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT min_by(x, y) FROM VALUES (('a', null)), (('b', null)), (('c', 20)) AS tab(x, y)"),
+      Row("c") :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', null)) AS tab(x, y)"),
+      Row("a") :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT min_by(x, y) FROM VALUES (('a', null)), (('b', null)) AS tab(x, y)"),
+      Row(null) :: Nil
+    )
+
+    // structs as ordering value.
+    checkAnswer(
+      sql("select min_by(x, y) FROM VALUES (('a', (10, 20))), (('b', (10, 50))), " +
+        "(('c', (10, 60))) AS tab(x, y)"),
+      Row("a") :: Nil
+    )
+
+    checkAnswer(
+      sql("select min_by(x, y) FROM VALUES (('a', null)), (('b', (10, 50))), " +
+        "(('c', (10, 60))) AS tab(x, y)"),
+      Row("b") :: Nil
+    )
+
+    withTempView("tempView") {
+      val dfWithMap = Seq((0, "a"), (1, "b"), (2, "c"))
+        .toDF("x", "y")
+        .select($"x", map($"x", $"y").as("y"))
+        .createOrReplaceTempView("tempView")
+      val error = intercept[AnalysisException] {
+        sql("SELECT min_by(x, y) FROM tempView").show
+      }
+      assert(
+        error.message.contains("function min_by does not support ordering on type map<int,string>"))
+    }
+  }
 }

From f3ddd6f9da27925607c06e55cdfb9a809633238b Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 13 May 2019 10:35:26 -0700
Subject: [PATCH 1048/1072] [SPARK-27402][SQL][TEST-HADOOP3.2][TEST-MAVEN] Fix
 hadoop-3.2 test issue(except the hive-thriftserver module)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

This pr fix hadoop-3.2 test issues(except the `hive-thriftserver` module):
1. Add `hive.metastore.schema.verification` and `datanucleus.schema.autoCreateAll` to HiveConf.
2. hadoop-3.2 support access the Hive metastore from 0.12 to 2.2

After [SPARK-27176](https://issues.apache.org/jira/browse/SPARK-27176) and this PR, we upgraded the built-in Hive to 2.3 when enabling the Hadoop 3.2+ profile. This upgrade fixes the following issues:
- [HIVE-6727](https://issues.apache.org/jira/browse/HIVE-6727): Table level stats for external tables are set incorrectly.
- [HIVE-15653](https://issues.apache.org/jira/browse/HIVE-15653): Some ALTER TABLE commands drop table stats.
- [SPARK-12014](https://issues.apache.org/jira/browse/SPARK-12014): Spark SQL query containing semicolon is broken in Beeline.
- [SPARK-25193](https://issues.apache.org/jira/browse/SPARK-25193): insert overwrite doesn't throw exception when drop old data fails.
- [SPARK-25919](https://issues.apache.org/jira/browse/SPARK-25919): Date value corrupts when tables are "ParquetHiveSerDe" formatted and target table is Partitioned.
- [SPARK-26332](https://issues.apache.org/jira/browse/SPARK-26332): Spark sql write orc table on viewFS throws exception.
- [SPARK-26437](https://issues.apache.org/jira/browse/SPARK-26437): Decimal data becomes bigint to query, unable to query.

## How was this patch tested?
This pr test Spark’s Hadoop 3.2 profile on jenkins and #24591 test Spark’s Hadoop 2.7 profile on jenkins

This PR close #24591

Closes #24391 from wangyum/SPARK-27402.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 dev/sparktestsupport/modules.py               |  14 +++++-
 .../sql/execution/command/DDLSuite.scala      |   2 +-
 .../datasources/orc/OrcSourceSuite.scala      |   4 +-
 .../sql/hive/client/HiveClientImpl.scala      |  25 +++++++++-
 .../hive/client/IsolatedClientLoader.scala    |   2 +
 .../apache/spark/sql/hive/test/TestHive.scala |   9 +++-
 .../src/test/resources/hive-contrib-2.3.4.jar | Bin 0 -> 125719 bytes
 .../resources/hive-hcatalog-core-2.3.4.jar    | Bin 0 -> 263795 bytes
 .../sql/hive/ClasspathDependenciesSuite.scala |  21 +++++++--
 .../HiveExternalCatalogVersionsSuite.scala    |   4 ++
 .../sql/hive/HiveMetastoreCatalogSuite.scala  |  17 +++++--
 .../apache/spark/sql/hive/HiveShimSuite.scala |  12 ++++-
 .../spark/sql/hive/StatisticsSuite.scala      |  34 ++++++++++----
 .../sql/hive/orc/HiveOrcFilterSuite.scala     |  43 ++++++++++++------
 .../sql/hive/orc/HiveOrcSourceSuite.scala     |   9 +++-
 15 files changed, 154 insertions(+), 42 deletions(-)
 create mode 100644 sql/hive/src/test/resources/hive-contrib-2.3.4.jar
 create mode 100644 sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index d496eecd4d70..812c2efae993 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -15,9 +15,17 @@
 # limitations under the License.
 #
 
+from __future__ import print_function
 from functools import total_ordering
 import itertools
 import re
+import os
+
+if os.environ.get("AMPLAB_JENKINS"):
+    hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
+else:
+    hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
+print("[info] Choosing supported modules with Hadoop profile", hadoop_version)
 
 all_modules = []
 
@@ -72,7 +80,11 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=
         self.dependent_modules = set()
         for dep in dependencies:
             dep.dependent_modules.add(self)
-        all_modules.append(self)
+        # TODO: Skip hive-thriftserver module for hadoop-3.2. remove this once hadoop-3.2 support it
+        if name == "hive-thriftserver" and hadoop_version == "hadoop3.2":
+            print("[info] Skip unsupported module:", name)
+        else:
+            all_modules.append(self)
 
     def contains_file(self, filename):
         return any(re.match(p, filename) for p in self.source_file_prefixes)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 001e4aca22da..6a7849d58eb8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1371,7 +1371,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       // if (isUsingHiveMetastore) {
       //  assert(storageFormat.properties.get("path") === expected)
       // }
-      assert(storageFormat.locationUri === Some(expected))
+      assert(storageFormat.locationUri.map(_.getPath) === Some(expected.getPath))
     }
     // set table location
     sql("ALTER TABLE dbx.tab1 SET LOCATION '/path/to/your/lovely/heart'")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 48910103e702..8f9cc629880e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -118,7 +118,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
     }
   }
 
-  protected def testSelectiveDictionaryEncoding(isSelective: Boolean) {
+  protected def testSelectiveDictionaryEncoding(isSelective: Boolean, isHive23: Boolean = false) {
     val tableName = "orcTable"
 
     withTempDir { dir =>
@@ -171,7 +171,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
           // Hive 0.11 and RLE v2 is introduced in Hive 0.12 ORC with more improvements.
           // For more details, see https://orc.apache.org/specification/
           assert(stripe.getColumns(1).getKind === DICTIONARY_V2)
-          if (isSelective) {
+          if (isSelective || isHive23) {
             assert(stripe.getColumns(2).getKind === DIRECT_V2)
           } else {
             assert(stripe.getColumns(2).getKind === DICTIONARY_V2)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 92a1120673bf..bad211089beb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -52,6 +52,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX}
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.client.HiveClientImpl._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CircularBuffer, Utils}
@@ -191,7 +192,29 @@ private[hive] class HiveClientImpl(
   }
 
   /** Returns the configuration for the current session. */
-  def conf: HiveConf = state.getConf
+  def conf: HiveConf = if (!HiveUtils.isHive23) {
+    state.getConf
+  } else {
+    val hiveConf = state.getConf
+    // Hive changed the default of datanucleus.schema.autoCreateAll from true to false
+    // and hive.metastore.schema.verification from false to true since Hive 2.0.
+    // For details, see the JIRA HIVE-6113, HIVE-12463 and HIVE-1841.
+    // isEmbeddedMetaStore should not be true in the production environment.
+    // We hard-code hive.metastore.schema.verification and datanucleus.schema.autoCreateAll to allow
+    // bin/spark-shell, bin/spark-sql and sbin/start-thriftserver.sh to automatically create the
+    // Derby Metastore when running Spark in the non-production environment.
+    val isEmbeddedMetaStore = {
+      val msUri = hiveConf.getVar(ConfVars.METASTOREURIS)
+      val msConnUrl = hiveConf.getVar(ConfVars.METASTORECONNECTURLKEY)
+      (msUri == null || msUri.trim().isEmpty) &&
+        (msConnUrl != null && msConnUrl.startsWith("jdbc:derby"))
+    }
+    if (isEmbeddedMetaStore) {
+      hiveConf.setBoolean("hive.metastore.schema.verification", false)
+      hiveConf.setBoolean("datanucleus.schema.autoCreateAll", true)
+    }
+    hiveConf
+  }
 
   private val userName = conf.getUser
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 98999eb12400..4e779da44260 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -28,6 +28,7 @@ import org.apache.commons.io.{FileUtils, IOUtils}
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hadoop.hive.shims.ShimLoader
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkSubmitUtils
@@ -196,6 +197,7 @@ private[hive] class IsolatedClientLoader(
   protected def isBarrierClass(name: String): Boolean =
     name.startsWith(classOf[HiveClientImpl].getName) ||
     name.startsWith(classOf[Shim].getName) ||
+    name.startsWith(classOf[ShimLoader].getName) ||
     barrierPrefixes.exists(name.startsWith)
 
   protected def classToPath(name: String): String =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index e8a749f21372..4db6e433c0e1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -63,6 +63,9 @@ object TestHive
         // SPARK-8910
         .set(UI_ENABLED, false)
         .set(config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
+        // Hive changed the default of hive.metastore.disallow.incompatible.col.type.changes
+        // from false to true. For details, see the JIRA HIVE-12320 and HIVE-17764.
+        .set("spark.hadoop.hive.metastore.disallow.incompatible.col.type.changes", "false")
         // Disable ConvertToLocalRelation for better test coverage. Test cases built on
         // LocalRelation will exercise the optimization rules better by disabling it as
         // this rule may potentially block testing of other optimization rules such as
@@ -120,8 +123,10 @@ class TestHiveContext(
     @transient override val sparkSession: TestHiveSparkSession)
   extends SQLContext(sparkSession) {
 
-  val HIVE_CONTRIB_JAR: String = "hive-contrib-0.13.1.jar"
-  val HIVE_HCATALOG_CORE_JAR: String = "hive-hcatalog-core-0.13.1.jar"
+  val HIVE_CONTRIB_JAR: String =
+    if (HiveUtils.isHive23) "hive-contrib-2.3.4.jar" else "hive-contrib-0.13.1.jar"
+  val HIVE_HCATALOG_CORE_JAR: String =
+    if (HiveUtils.isHive23) "hive-hcatalog-core-2.3.4.jar" else "hive-hcatalog-core-0.13.1.jar"
 
   /**
    * If loadTestTables is false, no test tables are loaded. Note that this flag can only be true
diff --git a/sql/hive/src/test/resources/hive-contrib-2.3.4.jar b/sql/hive/src/test/resources/hive-contrib-2.3.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..01c5a249af07e77b7816462107c2f3dfe928fdf3
GIT binary patch
literal 125719
zcmb@t1#l!yk|ru<W|msa%*@OxF*7qWOD$$*W@ct)sl{qBQ>(?S>)n~%+qw7NKVxsM
zvaGVq%Hs=9_l&Q?Ar)mn!C-*?<JIP(FZZ8c{{8{|=PV}%5TcWk7iUoXyBHKu>mM<u
z+$YbuKR@RJ0|BA^Pcb<mc`0!*6#%`Qcuf2h3<wjP*i-1Ih+qE5s&JQz5C92mq~?dE
zc#$!W3pfAgcE;~e#CcVsjt(vkR9mW1+V9}6(b=ksX>7CNZ>$4+8~Gv&XwERu)L4ta
z1;1==nqTL6;cyzMw6k4(o2l+Uxzfo!CRy4MIWLZq!4`OE7&BYVXU(X@(NtXAn@2st
z@8E?wmX(yu=n*8YkRj;XN)vnRKgP7UqY{*M-?h#sby5M@4}9kVrGuTiIj5jn78o7e
z>J}uUX6n6_bk<YyAE@sS==ybEvm@)^|78FmphKsBv=i7L1K2y6|F?<$pAxWtmoRiN
zG`2AP3qh2B6tpljvA1{l3wg}{S9uFdx4)W%@c(;|vAvy(lcmvL%t-pr2fMmh+WZ9q
z!@r2&;^|;&V&v&!>iidA#Qz-3*6A<CQ~yijO+5^49c=!}rvCztnWc@XnZ1*(q03+B
z!}QPf`5!n&hR&w!tbYObFQ{d1YG>+XY3yoZ_7^zJ|6HyAf%DJJ^{?DTCN6)a5$!+M
z$n`%E{<$tB{|eGysY3Iw0sM1K$p1Nzv#FDbDf3@=it1m+aAx@nn19JZhJR(1f5}0A
z5dOJY{)J1jHFYs`cCmN*t0np`@Xr5+v9Pzd{@=U<^`D$y8-&_o_U8|83Iqs9_Me;i
zKWXrPkQm@U_)Pde_)OW<*xt!R%Fe;ng_Mcj*v8P=xkh<P8k7l*?`+7@x<yM{-#TF>
zRH+l7qd|vJ6_gq@H7$Eu94(bGf%dB2Nlp0-?3bJ#4KAj`9=|0q%lDMv<I~#<YGGIw
z!pn~LLwY>_+F)q_V_>l&UN{fg1a}5lsJ6yOUhxci!aj*2O~Us$Y*ginDlQZh1KN5n
zhYSCEdj3{QqQq7F2ouM#s)Crv<mfakCwzL{k8}v5!HTlt*bo_1I^oJP<LfS!8RLz%
z^A6Qt9c9FQG-DClJp4@4t(=B;VG$GR)~wzn?1ro+PLjX2ke`}`QV0Mjp4}uia$R4I
z_#2E%t0nis=&)Sfd)$hC*fI`K^cYI@M;qj)w56UwuL@-!MElbU`T@J04dpNFketG^
z?=W4=W?6;-`39_3q)g2IjG&%Ubx~CP+)ql(<}bJbrqx&(L)6rFyBLjUy1(HwB$!1W
zUl4`vFohiD<43e2wItX|ri!7kieJnvLqq@je*c?4r>oUVZ~yqR3LFSX{@?MZx|5}g
zp^=U0-#p6vcaJ9NDl92tjeS9mcE$js0u+^$Dgzh*LOz)I-hq~FiMGPg9B#TpkuAEN
z2tzp$en6-nK;)So7uwNGVd=Xm>g2D$veEiO&%8^;I;Ww-#UCGYb-rHmZ-@5>ac$N>
z<Oy+x0%1j`!FVMLz$?Kh;u_-E#?d0tL(wGBMbVC7gPfzBv*`x1%<_`yi4M{RTJ%3#
zeTq_7FB&iCCcJbxtV(s3s%o{<tXCqbaBB~|)x_*xQ|aEST${8sA7;XIN(%%xAF>4x
zyA8Etj}%~=<6`NcLX_=N>>kTgjWIIYs7uvXXxMenRCPt#FkR-1zI+?14lhRKyh2l#
zPN0XS?Uq=kt4E$7JuA-P>N-y~`7w{Oy_N{|SJYj!$PTbtH|w)Yy3ba(Dnph~W``Xy
zPWIQrt{k(+LODwvl?*5Z3fDTDiqW=|m)mTJv-=N|UkhfSqM;Yda#S(u|~jZ>M%
z#+m3@Y8e4KNa;lw)O}jz?^XqabO66L#XSsaA=s2YpETZYNON4VXC*d03yioR5xYGt
zlZ}?oAnRXE>HUY~<S0;er*295SNErDuCAym*{G-K9cr`5V5RIY-G^SPQwvustwp|A
z`!AfVawXOs!Wk=+GnQhL@s;RiQcabqr0``eRV>u#;W9@?vA;qvnJXIAicfAQJU5o2
zMs&E=sn4d-7PVT_r7TBS<pAxKl@Z@wDcTkVs8K+>$2>xyUYP_tf=%$RSV4OG0yVrb
zy(3yNeVY5kDU12UPNijritz)bi}wUcik0(~7A5nU7Kg*m<sadX93SBybT1eByVi{Z
zpX%Kqat-1jdqaC@ZuB0nQw1bA3V+1<xhR5$ImSDIp{5t|O8EmanS(u(DW}?jgBv5c
zagzD3cj=G#w#Gmw!#zUAk`->?7^w93_*SR{c2W6N{07v0ifqQ@t?b*E5794)TNk(_
zPw*fx$u7k|5h>qcEn@g;JfDodGl%#NC%KGTEb~i~5(@zT*ADq_tE<;@R@(9pTW<VW
zVG92@R+oaS%RdsPJjfry4BexOB!1RtcLt`f4l5ZI{f$gKC>-Rto?)4u=*XRM7JsAO
zNxU$OcvBR8C0WSiD3SEY?{(b~_jqvkfG>#KWqMezDcBPYhXZbdUN09ffF7u54z_M)
zEo$IeJOy3JhkWCAYjS5XZlmvQ#dpT&KTK?R8M>sL`yHD4C(*9bn7QM0i|1z2LV9C7
z7eN_;a|v7pMQJuSvtmNMWnqZIGVNh4p9*iONRb}a!#9*kMMM;Ia;vXr(WI=`5UDE)
zZquIibGfx-r95zB?s|$XB^=e>G^xWIYs1Nnx44^m(evwCkg>D|s8M$v&&3*|IFNyv
z#7TA+9<ZM!a)J53mu}~yIGvYPiB37vI4@S9B<k-em}rXHa$%q%wTiS*=$s*)<J0at
zlR={I@JxNxdVw_tDpN656m`Mc66AhK+mG`may8ERL(%&qCu4wVR+p8-|4{CfjSLV!
zw@*{kcnA4MuQE>!xkSJN0ZEemi4FhndG&9U*`f*U0a!-=RA8QfWnc&ifQArt8<z$O
zRTD%5SrEh^#R9W&O@x~`jgMeK1*_4jTJCh+(b2l%{&^uxs8PMx(^BJVZLO|-WmVPE
z`sZl*^87VpvYjpwq`aHu{j1~T)4bzk+k5ul^ULqI9uUQN3wh-4;t0yz?EdY#hZ7ER
zzo;GT=bqV*fslx@<|heI|HXkNZjaIaycXY;e*I?O*?#?&kF3Bx>$mPee~Y)EK!2+@
z2GFmSM={W^g~vV!JtG=M(PlcvV#Uq)Z(<u5xJDM*M&n?%aw5_#EpSvzZiS!8S(Hgq
zuPQ<#CXEqJ_^Hm6Du-Als+r~S)K_wBOk%OidMy#s8KpgE<fT@1TCq!eaHvZy93ru+
zd$Oo3rm2r+S?M&+%^a)4HW_6-FXYElAXjt%T`O7IWG(y^%er2)pNoprF^!V`nQ)rQ
zk#dbc;!&IV7DQ{DR;7EsO}Q+Kilk<7!Ybh0;-GFGwQQksO(df?E+(rwIZ?4sSuu}O
zp&kM>iv+A)b(@&$SWJ7GPj6U7<T)1V7;8YPTpbt_En-8HCM1;=u?Xd%SDHsqKjiib
z9DWHFTM}(y#v!u7i@Hl>dL0B;h&Gp5a-zqul{_Mdy?Q;C5>J%UZRl40oZb5|?_gDu
zPr1BQp`o%l3;PR!V`MP+Lrxn4CBZ>oyPE@#MB;blsR3(Crm(eD7kI(3hrX!dHU@sQ
z3*4{s2-4ONZ=+&aHw>3>R3&`xVMQw<X<u#qj#6#Zg=8aBw%o+%;#+v+>MGCWS;Y&O
zHxjU}Ps#v%u_zI~S+_iugKH($?PmIHm@h&we3ne0(b3Z&Hmp~?nMit46nF1g3%`TP
zXd?3%9m;erh`ju28uV+vK0e+<@Wc*31$z@p^cX~P`1Ss0XVKU-Vhd07tYejW#$^%o
ztT+&yWw#&qtU6e`xoK63`8d<Kh#WaP2ZflI8Z_zgDHIwwr|~AF3z(Xjz;{x1+=Fc0
znK}u-J!nw~(0)K*pD{m&&|wi)<qsz;aF1(b5eCdj^}!ao>}5B2vhZKH&0~V)yPphU
zUaP%#9PG|QSwtZ|Lc-2#_I)-O_z*gik9X<<3kj`!FhhrS84oC}@Kr{qYVc;mr0&VS
z)vRID4$je0x{6juA@V?e4m*n0O<iEFNd(HodRUJ3*9Lk7LF6(b>>*U?yCpVa5}r-U
z`q)HV791Xc>QpsdKS-6Hh3`#hG?FJk{es`7Hs2`gaUR{8Y9+$!w*Cywz1=uLXSE)~
zdg4}5Dh-CXp1`xzyBUzkOyY-+BJTFIR9Wz9t8vLAJ0T|j#Tzc00^<fw_@N^!lp<ih
z2Y=1SVIB+7V+D6Hny%Ng7#|Z9VVBPYM(i8=@&J5nuK8ZZUa=D64UK>0Q6qDgd5c#i
zxqH}Aw4v$mA(MH~6*d_0JA<~ppTTAMOS`bsQ}bBS6>iK(mOT4&#l|PxYw&lm*|&^l
z@jO<F`kz~+G$UXhn63<$D!rCP#R3GdRY0XLVa?FwT*51ekFM8ScxN#buVDBR-^d(C
zX0-RW8ND%J*$KoH!^SGN)6;gdbbs%O+82{m@@FD9TP1K03FC=i%t}lT(!=2_+8M^V
zK*{q+cWSQ1z@j%L3G5mQ9I2WeR{kb?9a?HSR;97rqt=;`F}sAD8Ek2npZ5^8ObKl}
z!0!$kOCr$pz}%kca<7&h8}$CoKXA#!f&t61cT6<8)!aV_>1Z8~+WC=9I4Tju>FXH~
z8b>n2dJjeD0Gl^h2EH8lt1y(uG0k8hfo~RrKYR}I?8+OOZf%XQ8I}vPF;wK4d8z-m
z?heNFn0rvG@UK?nQHgtA{zwk3Myc6)lO`9<MzLAe;?2YwCyXJ*)-%tI*|kLc4W7-!
z)rpIi)seM$=Vo{MC&1vw#h$!m-Q(-2PK1R$#mBdgR5-~WiFb!SB_p)!CDsfo3&GTE
zwL_35<B71O0~B+K#go!f;opX;cO*K#=+<k}#-fL|uP|sNm{VSIQR%ra$>=%RPj8P+
ziM(moX)v*!8p!P9%fF?S>!ww*UFwpXV+mK$r*UXHh3ZuB`!tcM8;-|U(RoJc81I?y
z!ft#0pf}+sA=xqH_MVNpFJ&fJTH4B}2?9@&c%RRVBps~qARfwmiA3$2_x(jJ@zFZu
z)mPd|r;nExQX}yuBJwCBEagvngg#V|sa0k`Rzk<TmWSiO&)8yyGBlGJgu}+O(%O1H
z#9=wCVIBpoxPeQ!G1Zn@xipNDrur>v&6T+%bEActf@Lk2F3vOXeAw*-Ra~x$qfZDD
zw_7L*L92U!8^8JvLYMp0iTN0ioq)DnIrU2_EtAfYSvG}@;Y2B$%Vi{;P?UO$IF1KR
zA$yiC+M!Kk;#4d_R(xV8xj@~dd1B&Tx^SJS;#ST)D4cgxkaK-6`CEZd8P#2mst}XA
zwXsQESHjP|HO3P4IMFBuHYZ+D_LCwqlBJVx;gS(Yo}}mfWcYi!3HjO_2Z<~dfW?<B
zM(OI<>yt$QueU_~s5khYY*eeA-Oold+fA>5Ms+8mduP46J8)Hj3k>TY+$D1Mw7I0e
z+ZXhI%g=2nhtoMXsm82rSG&}S_uHV6z~l$`XB0G;TP2RVaZ|}|^;heBEd|un(^YQ9
z1@UBqLx*<+^wA&<N|UZP>5vyQUfwV=-2tib5C`mczB1%95*o6tX73XLI&=tv9szU&
z5^`6()ozGY`|=?+X!yd8+I$bp=k{}h@}l4TIZ%6HkJb`FbrV{a`o+-ph4n9m7#=QZ
zD|iADLDLz7G%n|dxXe<K&fsmtSj6moqslU+d`ag3>SZ}DY67MRho>fHGi-d$=yxcH
z&V1^W>Y6`=(l^haid+P$eGo_U*B_uvG%*_C(v;;w54)zggePHZGaK_irzpFSQ7_IB
z3#Lbhn*j;R*GiMNveGi&lVReN4UL^}<`S4kiewucWpVmQOBRa&8|Bgmz{;gw$rbFe
zat8#;m3-1IF_xA-k;!KSrkybdiyi`0EWDF&xMO9G=u|9xld`x?i;@Y<n?=*DOcXH$
z%Ew;G9qh*?4H3yN1g5<);Fi_orUeq37DePS5Xx43(ls%itfdLc{-_71Sz7|Q@Rngh
zJu%`Bt_24d)=c4jB5*mdj2X#RUOohTx(fOX=FNH2f}A;fKPEJO&fdVV)EnLVC1!b2
zef4H|-?&{5zBzC$ZE@@+wS&AlkRdo7<5r{ghFAT@+723XgwHK(?%U<={(ez{#4#?~
zty=k;J!8bhpC9$*Qg13~TCd5SODVLxpDCTUWAJH=7C{LEOJ3Ns&@}=V(c(e4y3mdg
z#`2c<!?Lk~Q($^KN(#XL#A-TN9b~01GOq(nYt9R7z7&Y_p*pNvxGRIW8{(F-VsJ40
z&f<g`{AdQ)$B<a0bvIR0Mz0-%7iBo0@}k)uP3d6Pg!g1#^)z3&jn*<V_Uz?rD@1EM
zS~E5+(MZ0X<@MI!12^a!9ksq|5D>nMgD~o&K+Hl{2YQrZS35K=i};8Dx_mkq<+Z`E
z1YV<&82Dsu5I_y^h2@VApXsEJ(mfs0Gd(!;xtv3Ndjz`NyeZ(dQ40cItCAS#jaqvH
zsTYWhJ2Gr?gS(laVway8J1&U=1zyYBWqLGj>W7>6j)`e`S%Zx5RRDT4zghn?HUrp;
zV^bq7&X=$O5!_%Wmglx!^9jPBu5BSP?nBkcpAN{Iu=mg0nX5yewA#T}DEqm`+rUp7
zoIM3azqgMH29Ly2#8L_niy9jmEzTnMJI{@9Yt%8MUQ5cvulFo{Vgv1&t)DUKqV4Yx
zdI?=^Fb}$7g<a8j;&>rFaH&rctF2@lIC>>rBj*Ljc0^Qb9d2Os1_5Ze&xwKF@LOa4
zgB+jWco!@|;2q|}LapCg>2ibF*Ve{Muk4y(H*l{Zrsi;!#^lTBH-9LtdT`9nUz=?d
z1=mg!eJql=b25w5zVYk65n0_JrS2J4n~wD6f_on`SASboKlSRt&~8j^*}d=)3DI*z
zMN7NTK2(O4QMw_fhWB~Xx46x45$HY0xCh%LZpF`)($-0u<qff8`|jq->gC49Zg#1K
zTv>&1q*Ypkw?u%)WQ)K;zAFJZ8Ux%=qzC00Ay@6IK6A0}V#;0+%-L-d<DI0J(_fEg
zt_@s$5V+<Jy4(SCSM)Qf70n&_$G=~1iENL`iw^iR&h!t(9?AHo3JivCwfK0lUlbj&
zw;oHpqDHS$_-5o)Dba5fWgNmNEFGPXxmgN)Z)&|7YmE(F>#cDVaBJq6FEHQq_zB$I
zo6FBv21MPsFxufx+>OY`?)5_SDCIo_EV@{)7SgPO-}wvSM6gzvB`^ycerCO?K3N*L
z@96SZ#Wmq+K$!#Y=l7RqRCw>q^%oYMCt7(^_J!$I_&L*`Hck(SHzE{?8%=h0PmSL}
zY!2P{jyoEq=OkHRa#(JSgK;S#{IvF+2j_xxI$3r~{L5tW1hQo)0;z81deELpC2-r`
z<B=&jDD-okA<<1^D2+t<W?xX=nYBlzT|GRo5DN3&m>n>MVaT}*tjBL@B>GV~<E~5|
zUvtFB)<~m%FYiNtf&OF6C%SrGOY$d2ri=&#B=hgYe1FS({w<FGdy-83%^g(??Xy%W
z%cUzcbukDKqDAm+8A4Qy00Y{Rh|M--g?J<=%OuNA%KgA>2{Lkb^FW`3UpY4W;J5h~
zWt_Vi$7cKbi`ZgrPR6v~o$u3j&y(HllYsv_^e(R-U_KZfT%vwarpnu=y>3^<f(h-p
zrw|03tas5^<A`wbeF#S%+ehPM2$Uc2Tt5JrgqRvc&3{0nEi|Lc#q<LmZ&VsC3(jhm
z)%DFwtZMllM`XB?2X6H?Raf3pwWhtaY^6ETR8_@#v^1AmtJD}b6!4RYn##PaG7W|&
zR!@fR)-;ph<U+pXTF30v4dyQULeWom^M^9}3~d?4s*NtqQB9%RH_<utA=Ydv6uE3o
zqwjic(%eIz-G=AR4>3V@3|65qS9nN}sjQp3c|9}bO=j^u9G`t9ZTE$yX+GZJR-LFW
z!QYZR_7oj<h)r|G{my|j$KGQwhKuy1+ULgVm9b|Qxapl4qjMXHby9_wyG(Iob6cxn
zh9r$T$h<Km*J)vqXy#jF9bKE572?^Ef|+}Ce^kX9X0%|n`sgty+?A4bTZZuiPiwd)
zn3F_AgJ~7gh;W1C)@Wo7Eoee}rPR`j=;S+>_8*gK1lQ^;?RT4|a(Re@lrlDClrCSx
zhb=|8H6QY#E9uWH*qBQ(aHr11-_J%$_{_An<a8@Jn~YV2!iDcos0;B_sbZtlWJk5u
zF4aam23s+K$^D+6QC=D+(1JaFhy4MtuFU16_|erm$9%{wE+y2iT<u{iuUUWYrZk7c
zTPHp=C+jq(iZ_-XndS%>q2a_BYK>%BGfcn!u1hUXGJs8Z`)$JE0lW*P%{>uuICJoX
zntm4+J?%zv1g#g4Flvmh$*aT^TxayGHNx?%HsbDqH}pj9*Kcffh!4Xx)UT&CgSkSb
zPnnn5L(cKb9ei?g9uu{tdN{);J~;RAO5T04M=e_egP~t^BZv7q*|jX2XXeI|v$*!<
zQ&AFpu^{ueB4(ZXIeBoQQ2T>(QucC1B*f-qv|(QV!Xmg~bgv~|!7nP^dj)JMnotU|
z8Ifj@2$(SSkteK5-OR3}u2U~+glJd?%oyYED5qdG0L+jiPBO;iNP>ancp$Gt)IJgv
z6HX~}dFU`?%p;~7bdIPEPzAF%>y-nXckl&>&M#pFzo5%Vx%bPZYmt!AC{cgtH^+`p
zulXmIFlZFV3jMqCotyV)io9>1a6R79d`4epQCW^gUO8r?)<Zmb53(vA*&wx@>_=1@
zO~qWRMoIf!RUX$)vr=)+neq+*|68tP9@MVW%}|`^T{>o~g3!jDvP(=er%ANgMLzi#
z{+X@x?L+(T(d_et#oENb>|i?&=;zBWM!1m4db#5`pg%Bbikk1(bun*ZMUC_ojWS5D
zrOD|3ycZ5^q(v=OlQ@6&f94PKCAKppv<JCslM;U=qz^7LGK}J4K}n-Y7E&g7Ag&hc
z6+7ypln>+&Q%fb2p<quUWQbyL41%~;2Z9PkxF&iKCA3FmvnsL-zOplr<6yx^r;{2=
zk=}~Z-WjD*kEEpEkpdL<4M2y4&~bh5Wh@lL9#jF-2cQLoVdXxO;ApkXi~R-pljIdP
zQUHv4O4XO~rz2|Jchb!wH{9tV46WlO<{b|V;jTkmh7w=W0$Z(3ai0E#wjPAxsD!tf
zE`QvIh1A-mD;4(x<E?@>$I8278513YfZh{MA9GBGkH}M%f*y%fqH*1CH_+l@-Pj-L
zNB+P4|BtLH>6~p-_Me7>#6K+?68{bt|5wf8Z#-P0Ca1EghQ>FS8m}ZO3?grXLPQ45
z#x6>Qlnnw7_u|y>4IWI|GHX(Ngm}~`Ms~8dudqtKYaGLjyFVGb^TOqo<CUWGw75r|
zc~p=b_F!b`4;ANl&-y;;p4GMW;e%lHhefiJdZYswO9taYg2Jgyc$I%+DR_8H9v4S4
z@~bk|5S0jvXbr$+nW^d=_RIlvM$M^njbp<-(HZLJC5MQ;eQS-N^(i&fZb}4)%2n&L
znSlL^&4mOr8`Wh4WvHqQW|LQ0%q+LZ4=0?+OwD87gOnPd{YNOdxU5ACs7=$<T#GDB
zoJ(_A)NH!SPpaiq<~G2~Oib?^jgMT-t1N0wTJfAk@=nf36)<H|hcrk)oJ>3usm5#L
z0_;7NiRulxVa0o82Z9oQWHwlydKT4J&Y7)DSF`LZ&tkE6xO-Kl*04Af0+?IPrsNwg
zr@6?MG|GVdD2!oA&nYhUkP{8KmpvxdC3ehb3XRvnV~xyLICah2VOVFl#2-~RC_B%J
z?nr#ARDEj_a@}o-J+lvDn{E?BRnBPxBVLy~9Kec^$Xa2BBb4wRWH%gvuUA`i2W-Lh
zmF)1cerSd+FLnH`Mib9znkuP&S=%@2c5AJ?z9oQPbqo%t4}5%D`7U^B@lmjc)gsgV
zWev{2<^w7`ov$r|L4zafLo<IHtBhp9WA@&y<52C|)H&`zdNKMo3lKq|A+1VXj5xZe
zN2_KkQpdSHRTmruv4I!!Dcd7yO=k50>HHbtyrURc+k}OF&L%H1<(ewXsa$h|`jlGt
z{LEEH`xAlMDsbcgYYdWCSv?`|9O~A35@@1tcWt?;*c$0<kNkLNcNgZN%_Q+C#ENI#
zr`Dv;O`C|pQ6T;PRsS^I=1R`IuHjDCDG-wabx-%h`Eak?T~ai`$}Jy($P3-piG=hB
zk<#{oJ-7Pp6GbZUcxeWWaDKB6{@prYw^q=&>+QS*|310M8;OVYcR{dtN^ZZcWP7lD
zre!Xw9SV|8KrKu^mS>~A2x}|kj%5hJ2C6G^t(P7;Snf|^9|S+hpXk*BRJpQq&%p08
z-tmi&@o8R9^Yw_Pr3|f6g1_i-!b>bPZo^B4bj$7MMdfjL*gkp>68}V{^z&&TNvGl@
zG29^S;cDkFhWDr$z_-}Mbwc-h{4@LX15AZzL{5an!T@b#(mKjLm&9akJL*>`I`KCR
z$!=;=`Nu9a&9V*<eZ7Gm@mCqT^1^dWkc9L#V0wBncSlRanF>%Hy}}UPbqSeF(U#eJ
z+cFKw=+Jg<Zpzz)SoVF0=pY`g@9C|^*=iKuMFx-fB`ufcg(o2_?Z11}(-XeD=(zda
z-nV;W-?m@qv2~M<n#3Ilh29{%bp1}Zp?-t}X&55>7Iozdw$u}D{P~TFvgIrKaXbHq
z0B!h@z9ZW);WIS6q30W<RQJm-`v$o;zH8lN)m>;NAL%>`DOTgmSu!zbY1B<ixPy$U
zJ@Oa#q_HuR1l<+_nT&MLl0l!~_^%N$aFTsG9Q0@y<j-&&u(ns2k56eS$nT#mx*@Gq
zZ0oxlm{!!>iZ!Q>tJeGe|F{`t;thjxLIwgNX8kvd@c%`C|5>#C`)*Vd+6QpdB0w>5
z7?y>bY6zbN4AGcaA2E(XHjpd`B1wcJX!xi8AebJJ1Pv@&f+Mz-wxOowSf}lak4#}<
zQh}>xbFnh=xcuj^<-fAtsq$5=`sIJ!!!o_KH5EeExASrAx8vS>m-F?NZO_GF@AEQf
zNSrzGtCBTftlR}OPRXNM`i$2MNNZGzNiWhFFna(EqkuLbMSWB+u}x0HD@OgIOcmcY
z2<$?FS)nscw#cp;sZ2FloK8OCLS-h&q!OvAoJ})gLp7UZ6f14&(>v(g<dG}w$*aEy
z4wFXX6F7)T-L1F>6H0@wS9~J|qfYhC7K%xuLx(}DWQoF%QYa2st~^Ax;Ods$PL<h&
zNg>Cz#hludwyaX|NRhsaIbOb1W0*-R2Us-gQrp8Xwu+ZfEwYawX;$5Er(obJr%h}g
zEq3db_*hPHRN?}t8AZEqS14`esL`=)XKs&^T4Siuac${tSEz0MLOCh`gIOgOs2sKv
z`vK!%B^K(IIYjh(G@{(aivSUkaKMoARdQPmjC)11rcbw=Uz^MULTDYJA}5WyTcrd^
zzvOn=CnltyKBaGVwbBRQoyMnUkfXdg$9?LOLwb%N#U6#nF7^0j9WdE8{;aLq8)J_-
z<4);QCjFE7r!LG+%?(F-^SNL@W~I%Yo69qI`r<oqDXeu+K`OI-S6juh`pUU20>tK&
z)`zyQsoa1d=b=6A1>D=_<lyF}?@Y|*P;YFb>aEtdy5+#f*&E(cxbJo*nMOrYUX#>H
z&)VD~+w}riNG}e?Y-R+SO%q+q+AGKK&B--Z@GspnYy;%>xM0jDZ=jzJTq<5dhJ1%K
z!881&LTsM__L?WD;`$aTZ4K~rcD{l=9EWM0>*>gL51?E7OJlB5<AfNxOg`5L`fEX%
z6^*;H5vUetbo{5CXVb1Ze9dPuC>UwzSlhoLr!YK+www5;6mcJ9No>r~b2CTE;h<4U
zp~2n43}kIk_(mDomO9FenZizfg!af~S@&FdDN^|$CV4HilIwScFc!+7%80H&&D`10
zozIDCGKuAbNy}G5`KAaP2<G&W?ZUf<fywDdyo^wlQ)1#XM_faclDRd60ZZvMKDZEm
zyCjOymCgN}=){2sU0hM4Y$n_cO~R}JZ8BP6v31a)#P3yXvKO~`mU7IN&8@ghmXLLK
z>pe)f!B=5iJH8{XP2+kQMv^>3k4pbgv!T)0Jq_V6j7-<T?{`x@GN9*H$;hN6hC~`P
zH}WUtlXoMJIt$cW#eo}`9Q7NHr1zp8NaO{A;tnv_sEEs2$V+cPSi}_WxegjZpfCRw
zj(DELmGy(6VnQCFe%uuM#3O%XUHnYW)Ho5&!tTO}^#}2}MY)(fVv5(0av5JkuR!UF
zHX4sjxEj;g)f{y>uGTWfeOM0sa+eBrCM}pU2e<vUnx=$xO;Dvq7v)-o{z&Fq0~q|e
zie_*sGSDI%(UOAk?(}E%Ab(Lb$z~h;XZpb|;+d;c@b=g+7XNWE%0V_K$J;}{1{REG
z8xLgIb`CXTVyHnTp#q!712^P%S6Q<xhItgbP*q(slGxYo7AkrQ7+;sfwZ_?kJi+!P
zt~|k0S#WkGu&2ox{(5)$$%|s^s<s+b<eb)#mtpCZOAjNmsH0N~j7QJBK${ZK#&$_+
zA7AhcWt}vdVC{{bg0-0ZFRQ3*q7CQ^j$;jc%d#Qfie0kT+6s6k_Tl5%X@1AV5?@V5
z9rQ~$*0H+ZiDax^Wa@Z?bKHlSrQtcbUOz$&?=dv3<T@dKVep;t7GQyV3PxQ*>pGf9
zs^&Ad*%PC#vJ-RJjvVdC>YC?{Ug_;cysoC@!01=pa6H(9?+j`Q8EICO9X+b^ZVv05
zeTS!Rc8;6DnwYVgBl4D$hUHsw^|jnAH5B3T=HAPBk+VI<1aF~V!?wCreF^>5fDkM_
z0*~`k$lVD4DI7A!u0WfuGs14=l)Y1T&)(XFVJ9l){4#(3RJ*upD=O%bIVErX31pyE
z0fy2yh+y@Z=Utby17~N<!Y>oS;vNtG$El~6Qjzg}m^j0NHBOJ%FROck59Ok#GY%YI
z!M@c~kyni(h0{75UwOpHh$og9e&kUa9#F$C41J#kLy}WC8w9W}5Kkiln_b9y4Hb$Y
zSrrAUiN*(=o#8M`;YsV7Bwr~GHj08`SQM4Bil46uak=~8;M#EqB?A+q#laY3Zk5ik
z?ny~@GV;ar7dF;W6GYR`LXKY|I0Dmgl0QCB3u6gW&Yws>3vmQS<76J7BZe`v;bd--
z#lw;Mv3`?opE3@n<|Up#kp<|dD*FALXcg7Ba>>q1b*w+-?4b($Vh73dC$dG>1Q8fQ
z*)=cj)W;aAWA#f#lzafz#OO2C@)(8HZ}*fxpyaD?L?j#*uiX*4y)UAyTi#5-L^x)9
zy|od)y&D{a-NnRsn8f)A9%YzZ^aHX^M*MB4+2}vGT;)H;q2edNfK>SYT|94br@kxq
zQqdIW%s~+5x(Mb=k<wS{AngS`RDb!IjlcBf0_W@djv!Gn_hv@e=nr*<xVFgi1Drmp
z<j-`(-S%O<W%QtvJ0nF|FYVo`fKh<~`U!y&9&|sMgSHp^^}}94Gw;?}^3nl{cv}0d
zL+e4DoaFQ21wN-%oAf~IG0Sr4#;taU5zaW@c|<qtRD`yV={Z)~Gwaeog7Am)ruQSR
zN864qmDM9ejuztDBa<7fN!~E#ILf>9N#4R~XW@)7pzKx*t^^b~wl`2Czdv04AjQu?
zdhMF->mjsbUG&)(6ExU@KF$EMqISB|!E^Wp3}dyt?A4QBFC20|TfHVB(&HP`T*u@x
zY;sYoY_66nX&Rwfktm&_sY#k5ro6E9kp7m4kJ7h^Ky&Iml3KvkZn!cPTg>qd^j>%2
zECtsS1+Y|ObY<`n1#2}hZ`A!FY$o2Oyf`fdUg0F>E6)NN9iy=Nh2F%kO7A#Y5hgGK
z2aWv$dyef~^qry<9%Se9J~l=-rX2j^<?&){v;mY%0_jB6hC?}P!cE93NguwE^;>K=
zzH3g5ocM?_DkHm=4FaJrdfmOPjYzUVA3L%sq>t`$U~E0dJQzvZ<MQ^L&WK$bwyL6W
zljWI>`N%0#BULc5?*QP%*LKFjjjaWohcvFfFO4>Cy)J5GwkPDvQB}H&i%>2f*aej7
zs?<d~2@nL;H%@a?C9M{?W7JGU6Zk{7TvagfH&1(c_+q3NRZl@Ub1um$%j<ZT5<&>r
z0`Fx03+s1`c@D*;XeJj#UzIcwGsANl2|D<`46%_prDJHi%5AT29To+(h9Tcft>QTv
zFf<zv$xCasR953Zibbg|DB;Vg!ZNHJhmE$V%k|+8&QSq~OnX(u^X`lh1+QJD=h)Ba
zP?}<pRBuF<6s%_FS8~vdP5Ed1?~RAt$IsMDtmu#R;FX3dG*EZOvvSbv6^`wd(CRMN
zEUtPX$iyk29EYrEPxJi*I3}iyKt(~&)Xw|V&ZmCqRMd__B7^{^5&}rkI86NfX-fE7
zDt>YL*vqnSBT7zb{8R^aKMEq&L82_Ea?w514m5+!`Et#8{e0aD5qLLmbV5(Gcz=p1
z;ot}{$wuxeN5>l#qZJj8rx-P(85d*GjBrwoN^m;G&qmxU>jirYBg*lD_?~fZn6jdo
zLLIsTIbyBYmbj*`%J5i_Touo_u(d4FpNGKnlxNinZMlO^5g>Wv5`Wqy??7~T4-E0&
zxCS+h$&EJlFNxeU)(Zq_(ZhcRK~UeHo&d5EZpO~;`CQBeZFq9W7<p$<gdkd0pgK9I
zf}*KV+4zJMKq@&a#$6V#C>)jN_lvD~**sDYI?(2%mkh0c>%@*(bJu{+k@MPKOCh&W
z?C0V+Dnzv3kz5CT?_%7ik8F=3xla7<Z8fY5W8OZM?dwQra0#0KW;h&tGcxo&TA~6u
z`5CA*B{xVRS3KlBpf=xpx52yu7J3cgT&S2AQQ=`wCH&NjJ8?H^^PqR_YB#DmL_l7~
z;D##a*RKP7sl2l!>v;-m`@(_4W-Ugp@-%X`<oo&i%rVNYcG8|hhh(!;p6|eRV6CKu
z>IQ>VKu<U^g{xF?+w(EgEX^ZnYoY;B2BWk%@ZaB<s?vD^7JKuRVEC#?xt=Y#4Kmr_
z`>KCMj1QY>fgVvn2PRsgQc4=h@x>gMCc$c~_Z$wW@xk&G#y)IO&d3WMjXD&%EB3-J
z3lt^X-+^s7AAENTEFUYzhYU2!c*(~Px~qorO927Cweto`3N(<18+cL5+=Lu}r1Xbf
z{sIB(a~GYz2-)@YT0j`!r3vzv3*1G*Z<Syqgu)m`N|rba;)nV~J(5uTU_$cQ57>pm
zcNKQ{?q?CcD@+sAS1fpg(qj{sj|7D=iUbg?XhaNBQ2K<g{6YZB<0A3^uNng$kkJ6J
zLt$tl`(%I&Fwx3tgQ*1U7J?B1G?5K<;(c|DF=Gy8B?#Zgp$C|3AladM&4l``V8F)C
zW;lcHMZuy1^SQ$eq>@t~g&2{bAb<yuh#Yu`o|#%vApgjexj|V~Ly7-U1Ea?_j))M-
z3Kfa=BG`ZxDH=G)QIK&Pd4NYAYI{)JEZCp}eP9?3YJ14c0#^)Nu%0+<cb~H6GS~nH
z=@3-t8xk59(KFY_AoPIuy$lbSs~T7d6;gNH$I1$%m|#71n1O5Z`5RF>Fc?eVt`1T9
zRlx4*{oEhuvYoYW?Zc4<b}(Rs1~P3ALQF_HI3n{%C2YP($)E#&CPIRu#zT5XC<_G*
zLUj1PMx<darkDaU5PlD-GAGf_KMrn$j|6vJk_hqCNze)B9;2hPP48Y8wfNeTT%=#g
zK3+1}f2_;+h?GJvlb6H9YkVp4hfi*z^|V$ET-YBKRdn4U1Rj?Oqsqi9xB^3zC&OH2
z1P7II2H99+t*P?TRIuyvHGP=qc2k=1`l>w9Yxb78BQ@)P@!}}>4^vTWzI1Y`UYzcn
z?gW-bmoEC}bHF!(bJp_@+6eCuO=2;>%is*dd|>*EAJC%t*hrMjNt=qmPzw|YWO(U~
zG-o{L7=sM~z=lR~f)i80&yEfRe|V6>*EqA>FREr|C0s-%ekESJI6N4F$f-C`ko3Z_
z>c(oU3l6Qx7`G>oJD{Fgm0Fh19mjN<9;G{j<bQH}V6z_?AL`#63+o)6B~QRR$Y(cl
z-3#3J)q7!i%1(xq+eoi|3V~vg<`@G%sq-YVYf5b-2Nf$g0^xe;aY$vF$2Vh>wrEOb
z65ZpK&|&0)Kq&NG45=XTwgi_Ud7J|&7~VQip#N6OjCg(NrkhQYI63<=P*Va?NZRL}
ziMnG&<o(b+q1wqUi@@;7TmOyLeA~<du8_BcX?E<*0jE6Q6mIQZ@l!{0$?G)Iv<J;9
zeM1Dlg$$C4mjjeHwO}<R3%Nwz-7>j#*7c~f_=w0AzeJh#D5#@WZ(}?D;=BM~qQ1H=
zdQl#^Pg;L{6L(AfX`W3YQYQYWSV=0mceZ83Sy$2Dww8vOL&blB7bKXsIUeI%gj{e#
zm%$c&v1pge9r!-VrQ>iHWlY`3es8R}Gsn^IhnrD5+8f-4{-=52-cXF+a)1$zC79ln
zl{?$R6Q1&%U&V|&V%oqyJUc~8%~)X5g?O)!wQBSxG1R``+d8ZvLDTK1B_x9!4&3Ia
zn}fOSzaDpU@6F>g#Jjjqtp#G5WEWj+d4aeI7Iag7qwlt(siDuRX{c~Y49JC|6D0GU
z(};?Db<5yDYU@1Y=S<Z#b^838(47@=AY|^%OHN2gxR`CKTu;j%HLWxxm>e2E?n;@u
zT@ED_7XGXsa;JIegs^deMC5SIsY{{=BWm#?Uq_PWb=P%dqbC?3S;!kH+Rr;CKVv4H
z>c*XVe>ImvHqFkjMwy*Rfp+NG5OI<$J=<}%aKLcA2yXf${KwqRAOv`$(Vv###y>5?
z|5kGQ|7;bOlL2Ky@;y^)t68?P{b={L0FT*MQfj7zs^ZgtA!zC<z}4h1<)FFOKdZ>!
zMZA%SNNUNyuM9LHPvWrOCVhGK@r85<xx&Gyzb;f2fwslOrDw;+PyiH8Qw1-bAz)AP
z;Y){wm7#cM3A{4nLkW~hE-eLq4<}Z26M#*XD`pORbxj;^#*Cs(YQFO1#&=}G5F+&?
z6|NcMRaQuZ<Yub4^|S*_&zR7P_#2swLay}G+X-+SY+d3JpjIrG*i3MiDc8uadLt<W
zjS*x2?)@SH6)E4ynOzdpqoOI}Q)sqlDbJcB{q9<txDdmdfN-G^GUf0nk&$W^wf8>v
zGx0WW1wx5hOk<0ENcDv9x{+rXc=G<)E!jf($)G%bY=XHFwo9cxZNFU$$i@MYP3vL@
zh}JN}2AjrWIx%h+&3u_7`Fb5~_6~1gfFbBSx_`hbP)npKp9UQYDX~|jp#Jv1pN;%m
z=Qr-;c=G+9`h){E5RllvQ=|CDN-?ZQmMXTNLKeC0ZJTwM+f}FCROf=LtpqkBG8S$R
zICrC2v#ud-kY!dYwx;r;i%tFz?zSM9m|=N2rP?rTbb=Cu@pob(YL)&jkZtheKkbq}
zeMtiT^m5sV^ZZ<2PyQ#_Pak$Cw*t3$U5Fr@LioQz@ZYjehk8+@-KA08jRdZp48&ir
z-F5>|N7^Oyu3dNYQR%lR7{c+|cOsa5boB@vc5&qayIHrl`Jg*ag)oQ{Q$q;6`=ao@
zbPRFaH~5}@tAW2J?wBAErtaPl-UV~xcy2_Qa$O361zh$do*P&N5{}Cjc@=~5_D&Yx
zg~h*=i_3~VwY>6>&(s&+we$$l&NztqXcz6pya2^J5%=B+pS#0^zpGInJmhb3G0(ok
z`VZ!nyhNRTQXhPhFYf4Sgpd$_h`4+uAM4Y%qdMy&orz)U4I)E+UJZJ6)6UQ^p9?H@
zPl^u|B%|Us%oV_(MdvzjB3;3~P*8v-!GtDMqzk9GXZeTs(P3F$+>X8XR;&#JE&-v1
zYw-%5)+j%}`Fm8`8t~%W#CebmeIiM$t%3~=tZD7D)?fkZUB$&gV4S-L9P-_*-V}+r
zcL`yXR4ORoUW<f0IZ|5JJmSfly-5}t*uO&jJ$UEYuL&bW$u~xWc|dH35*%o8@T{^T
z!9{EvbWsBn3UnL#O*R9*;}dJ8rpiMYQ7u_Is?f&`4-Yl~z0FPh{DROh!d*nB>*U0V
zUKl$sZC-l`YXuLJV<8sVB&BWQK9ljE7Vz1F_zJ-$H-DPl>ldZPf>z)L6bLtjUEk@;
zrk2*=2dNd$C{eC$47jMQFOgVzo4B|rZ>jS6K!Xn&H#qK}z2HY+yJ1(Wa8#?P8@Vbk
z7Y{5W`QxjplcFTn(sf$PEN16A$Ts6hgC_;|B;2HzuUfO6R6I!eDNydOs_=H&Ckw;{
z$vnA=m7;(z{wU#*c)pO9za)$keHixpDM1MEcQO)aB8|pYQlNDGL1SK(qymZkE(s`D
z$*xrSNf7hu?irjEk-hGoZd~b%;7FMzEQYg|p+gy1jt(2El%9oP|7}yavx&DWPG%xA
zOH{^^H$hUCH3F{Q;M9Pdmkp<fvj6<&h`IZ|W)7EGhFVgULzm$6gs_3o#CtwK#!KH{
z@G>i{qrQV+y3%$UualhOL<^!uvVitXaB<A3<`g0BS^N!TDB)C+`FoGbH?@Savj_Sg
zrd2K`Fm428^DhtqOwiV0BFB2CmvORU;h?$s!v);l_YEjepeII6fxtG*jNhH{Chhv`
z@L(?V%IF~I`?awb068*c1(B)|f%v<*N(n0?EqFKK@J9f8(6rsfXg&I~ehnfkN|6*C
z4<0x3{>eplKt`tVF1`na49RjyT5G|iYG~lou2U(?a+=0QSS7$2zMGJ6{8W}!3RL|7
zr#2bSdb(f2(RLBeIs<Dh7`NTt43HkEaKHlNN_F2`;XvEYXK+;tBNO(q=RkVAQgRe~
z3h%LqkL@}&aF|LdlUwUpP7O)|C54(`)+VBgYn5Bc#26X{CmVFVa;rL%R*H~@V-+a*
zut=M1mcSZiI-0U(OW9WGSWb2iV8gPi7=H3nVo(`#z+?iD+!dtKgvT>!$T9y?QD)?5
zRH6Z^(sXH?EXG)_RA1P_qnTEui71RsDU~b5r<kEu9Ws7$RreLYMj2CIgvqVq>ur`)
zFSWVY%RO4%w6GjS7$`}+OC8vhT8h*;F^O1XJ4KWOzkdj(NM=f=JWW=fEi>RuRlXsj
zg>G<B!Qn~}FELV~=K3z~Qc&umD@DOv3kO5d9meQHbG^npKVxkDV7=~rnELJRXz$MR
ztj(2q$x~}{iKm~xuI>qr*|WRXt#_l#CyTo~Tjhs;X`KSv2dFO}bD?1?^!;x<;Ys=4
z2ndH0{e-*0<hSG&IS2-5$=w>+IT<s?B9KE1>?egs2)9X3f)&I!1NW1FI)&+9XP7h`
z%}+VYGfzgo-_N)!#sy`vMcni*W?^m|XiDR!H^LrcrcK=K)8>{|f1X_!BiQz2b?edY
z9d*R*5QD#;5XeqYZ8GLP{BW=i8^m}~+f(_i`J<Z<yzK&7`(YQPHKyK>YqrRQ6k?B#
zQ$KDG!v>^!87-iv<47LOjD*!$K1w`24a=1HJE>WN+TK`vM9aW%<r=Iuo>;iSQ|mmv
z#FQ<D4K~bruK06Hda$eIJbG##0$h^lJNGK{lCdz)$Q-Q-YI=@KYt*qznISiwb53c8
z^drq9vPvkYf;hfi2jPgf;=+~_)QDs9@Lu+)5VdaUB6>Ko#2KqyLE|(ab%co`_SRBI
z#x%5|`g;*`Y{yc2^dz;4Cc1<x-d$-!%p{JACaQ!j-d$Nk#AGKQC90_|9&G7?lw40m
zMV#>-tC~{N_z&7h<KL_pDhKRr*PN-cMfX5B?CSJC<4m8VEMQzMS{ac7;=cX-Zk0W=
ze`YNI))2X;F&5yV0RW|~s)2Kj7h@a1?s{?K3ie<NcE=xbN3A--AU7gp@eSzki~r^W
zS7d3(Zxi~$_+Sio%Xy^SURiZm7vxVC0?ATbAJYemEPWWDDRa2I+>IDY$HG|(w9dvk
za3W@DLmxF0dIg0_kIHQ%Y+pJ{?q+fN8p>kPJOk1P=fuOQkk(0H6PT8{ODs1Z07555
zg>Hs)YrzKH_M0l@6l3reo0cJ<l%F%0TxHs+UfEesN}GT9UMF<qxxRHIr@z6*QM<MM
zi1*}D7B+F=l9^`1!frhZ{cRpkLZ)@h8><y}anMqORy@|>R@lr4F*BnQRpPNQ<uo-4
zwmG?nSYQs_kLCCP*3!lmLmS`CNF4*0!J}hc&8z{RC+mv6@uz%ikLUV!>c}Xpv0ZB`
zZENYe%~Sl9UL*eWkQ;VW^28`l`G{IMlWgn3;yT8hf$AP@Yxwvh<2u2_&?9{}PgG3X
z##S$W-fXK21jY2@m185HP%Fh<eGlfZDWd~6t~Ps$(k8mRf%ElVKC#xaiAPz2R>b{C
z+E$tlEvMJV((3LU*3Gt)@kieE*h9mvNE*hZ7<Jzy+kTTOx%wWioSGy0?ChBbkPbaj
zcs=QZ%9eJkiH8JQdOx0YokYHMT%0WJeNM7il^2bs0#}GKs@h#?nD^)lTRrTt&NKBq
zIoszu4+D{P#p=j<#r1`JJWg?R?XF4T$0-cDM>efi)_vpW_72-F#T#a6{Pt_I%v`&z
zhnLi;GYMV#N)+cGI@9W^o`VEAEjrHwCXKp=VTBQ0nmq1`dPR2-K_c8L1LX&F;8w^s
zR!kU_99ve<sg)44>M}I}w34urNv2Au{DBIkki#h?MDN>7a>+8um$OF^0WM^qi7ii+
z7-0o$SHfy`D5XW)>4+^ngy#{Wy1r-tKda30xZ4Swz_N^nLw8X;b45-TWPqQ~eJ!hT
zmrqIT)wgPakrmz@_TVOy8*VzU7S50Jq&91W22*4PY{jyniVdX#?)ge?$Ha4^&d!Az
z-WBZ4$$Ni{yd|m!4dADn=JpIXH3PP}D9ep!v8B9_V+^ZNS1$DKAT7w=;=6^+y@Q$;
z7?+_1SQipw4|dq*Utjd|3@1}6_PxBe#ACZkxA!SN<-I=TC#VlWU!htEde^;H+eHe;
zDn3cSz*B-6FDl?IXE8NrG54%_bzXi7pC6i!Df5L{9{{o2ZO1kJ3Xtmu;H*g_tXDx_
z?}n;ebAxw>OzJbOIG_DAVR-pJ9#{TFIMH_q3Ivn^2?WIT|8HCUw=#E5vbG9|7~0sE
zh+U2el$>a)={oLyrUUM^djb;3Fnk6ZYUAhwJG3Si95gyg<>Sw~-6scgnbpO`4ahv7
zN1A7JAHbYtX>0yr1n;~5oq+GFY~FS5`|r8AKHzjQ7B~SIWDUx&dV)beFsWpX+}rUo
zkRoKGj2Ux~gK2XR)af~J@(g__tZ4@D(sX@jS(ZMGbi)mhrs>Fjye4YKIbI|UgB380
zj5+Ys^l*@=X_7$nQq;kQuzs_S^aj@Hlgt{CA^w&s3ATfAehYSm7?P82GeB01+f)Ts
zPOA#X27|mcWD4fhMut-Hm@2$q?i}MxHiSOAK4}um9S^{Z$GmhLqHX$P29FH9#^~D=
z$$&DsE-P$yMhoRoTbh^f%Sx^0?%B%+-ZF{;?7ES(T(sG(_OaWyBz9PBR&NGXkL<nm
z<zpea;M8R#9I6thx2^HPCR{fZ-nNUJjf}7o(c4nCcgvQ--0Y?DBOZzrttt!kb2R9R
z{bOhNb<yiJ{<fITpPa0;)GM4ia@#kqzjd0dR%P6#_S!t^ZaFTKaN<6GvHdv6TyogB
zz_HV9inL+ZEIhj+31|Oi!xawS_0!UEBm@W9dbyM1XWDN6Yza<=_=(AtxNY@PC$U=7
zQsqi3z8%r@o)mq}fyk%VimOj>XV^M!@@m3pM652WOSMr<UL&_SQ>k&bX^?5+h(U%?
zhFOMThG~XzhPkGJrirGJrkUnIdf3`>On#S64F~QI8GT|;%cSW^$C`~&zY_*~|4zHS
zoEklMnp(9I&yQ7n?Z##%xgF=kPtSeawhoQ}&TGfPgJOxHgXA&GI2HnPzFE)gr}kaP
zg@eQ~(>NMg$!*g?B0_vmfx%}e-1&H74%tKf;m}8t&h7puJ}&sIVj(tm!52##&ll%Y
z<ps$tfmc)z``wfGTACMZ<qu-m%{8S1m+@j9Lri`xP}AsG_#SbG`)fp~9v^KI@gl(>
zD>CGgoCB>7)=_i;wT~NA_roD$HX6u4zW>GAJB3%mZ0o`?XKdScI<{@ww$ZU|cRK8(
zW83zOZ95%zI{CBCx7M@w`fkqJ|JfJ0$-J61N4-^J)QbbAr855rkTR=jrkjlY{$t;`
z8#=MnnAbn!r{nI>VB<rn=de&|DdP{}BI848zLGG9uc^M8P>13UJ$Ef;QU+#7)PcHi
z-bsSxZv;5$#)lE9J#=KO-{1HTuP4f5X+MNsB2+Tk?*_(us5%Z~@5sh`m`uC8et;#Y
z?SQY&#{{YqlN{Ed-?iQUN&my>R*CO~(l9QJ1*DD88?35#nzKx!2z!M^$0Js9N<{Wx
z5&O38lz{Ah`}JD~Hl<q!Hp!qKAL)Iolvd5RE<ay=sMFc6yw8}T&je4mRD^p>u{)0K
zsQO#RSzh0t$Nol|2~a@m{D*VJ;^$!Hf6?jS@7@*n_Mg@jL_0w7^L}c5T1I+Ix++F`
zjEt<TS!tXU))boi8aH*N8}K)3799BKo$8<i0hdoD_Q79D?BAbCZ2iBK*ca%vvWbFN
zDktz@WvjR-X1wFa5R2W3Z=BvP-t1-$R(ws}cUs&gNwtr_ti_;DB{n~e_X4aozN_ih
zy*hiT-FWBM`L8cB^47!_xjr&yQmyQBq{Ti0huyr(pGxdX1D#JL_L|Dm0sCA;nPvET
zI+fiKGYii7lty5`MQpfS{AnKVZa!!EU6%}Lf@fIO*$z>+DV~*2N3ub=3CRd}HTa=s
zqGZswj_mIY>x6v|6%s<T&!?W)V!1D;vdDn!${bhQP&Bitk6Jp@W*k_UXo(^TR5Djd
z_`ihMKN~{oztC-i{50MirRP-Ahicg^_AKEtBOC*p{zVjg#S{!o?(MUPUK3&Xr+0<3
zb=c6~4ub?ym1}uq5Rd}u{}oQYin+0wxyxV2!#`BmdW-Jw3G6_;xO_X3nmaI77~N$4
zbQY*st{YO*phO`~&S9fB_Fv=(gj1spE0~EOG%>3pD}Cg4pb()$9A0)s5K<FM<nn9W
z$n_bVsp!CB1bLY#l9!nreL@oB@@~hgu4BgIeAk)4Pw$dbULwIKzOVa-)<}%Qpx}ac
z-(aA}kq6G9wr;*b2x{!fGX#7uW_wg+2q+zzL3ryAfqPSlGQ@etLZZD*NBywb+2+Fz
zF{Ha~4hr<#CqN1a`Ec6TN3yxehwd2)JlnY*zx937{PK<$IJ@T${;}UjbW4Twjzx5m
z|6#c!ie%${EqiN;gt$irPUNdphWarS0<{E&WMR!fIJwk58uCR4%a>WxHUs)sBjfFk
zm?S(6$KcJ58UDBFz0w^Ojqs4Y!UdIT|58kh4Q&Z!0M6y^cNqsJ$&<SZ7aTHEccWaA
zeSTJ9N)@IboMy=+Dg0J_pe=I7462(h#Y>XjPT>pKtnyDhIx(h=<%@e-45hX#qA@%S
zXYK<~`SDlG4fuHG>AIcmV|HnEa}2JH0@DnxO#)jCzZwPl7^rOglXZ`5{p)oxt-59z
zVj2zPLdH-R_t6)h)I`&L%(CdY%)+2`AGl3`#p6}%2kfF6I6!*l98nV9dOj=hA`dmC
z3#3qpMFuQw-Kt@sR_8E`hHTNU+m$n|!Db&!IK8@f3kB^()Oa@zov!qnb%%@Qw_PVk
zQ%|GH%@g<tlO!QYQ$9=>&f!-d`#?#<$#~A{xg%El>KF(U4ar%L=j5U}V{y_0CT?fy
zQr%!$Ia}_=cBTW>thGHBMgz?2i$*dtWz=({gl{hmvS|l$`9n!JoPDha%G~Ga5~Lc2
zvcwfd=9S?3By^YwE3_U#Ut-`QMF@cF^m-zchZ`akR!u&mrQOVUWo%Yq?YQ0&K}wn~
zB~19SPoZusRT^0htQ3Zq->ONTvF9Rds=kO3+DcZ0y5=kM?zJ3qhVwGk`_~CkuT9Y6
zTidyw9I@wZ^b^HVwA{?X>0U}r#wSwi=2Jpo{Smy#GbXB<Vkht(9%kh<M4QfkKf%YD
zZuI$byYZG!a`-}S$3X9QVQRCuO<E_!jjoRpNlW|&_C-;tof;W#FdaF-{lWIseqta%
zvQ=o*A@0d-C5eMaTWJyXQM<26gZ!IY#XQD2;*FIeuOE|Di~nw9&t~&myrH*R=2r}W
zp4^q~8LKe~7diS$>~A;Dy9Hiw{oBYA9mdHo6Ah%h28)~VjA|GYQAilyzAWHi(qj*w
z(Yk+S8>5&NU2R@FZGtEGTGAz=c#>Fzg>LPIr>TTQSTN3zDZm<M^<7I+iMhMh2A!Br
zzM@A_sgB3ug%eF7e$0yk*M$etaqX(Xmw+4F1uAbqGRefFL*kpY`@=ve>Z{gZJk5^)
zLnjA=$o*g$I&AW<CLZkxpx>@)K?FLTsxwU!&=6zzGa@c7)h<p%_6{A?X4Zdt>4vH%
z;A)V2((gYV#(vd)hDF*qJcblC=rVpyX8-KlD)T%iIjfI@P0!#qS-q3X_n}0(*3n7*
zyY$ZB!;AE6+*a$zxsR7Quy|Tg@*)yQ`V!fG43Qc#V=(Yl4%|l07~m+$DllHmG+>lb
zdh8MQbq=&y$@{?u4(zBCYo$~*X1C%lXSOQD<TSQw$LKo68TBK>hE?0L-HPi`YQ4Oe
zp^x9uqh;*FN%YUq5Gdta=Cq<}!de8Ve3eNDpwDGuM572|j1eQ}Rdvj^t;=gO*hW@r
zm*$e$B4}HVx7BYABDBsW{*qfIK4v4&bP2L!XCcM{YC?~&h;n9`d`h4NRNqW;fS(eN
z#9jXFR0mk(*-}079LO!j_@*C=tH{c_UJ*{*R=R~3joHET*`jq9sjCB5o$b8%W7?n8
z_?dL4ays6!8_=JC@G5+XxX+J1<!+M~(PzPS%gnS{t?-`klokk$?EY0;;-tHgG{UDW
z6y(OLKDW6vx=sl+D64idY9^(F=9OcS4Xc9&7i{Jo_gSq3p4cvRG#R5MI)1{n&uoXn
z;eK<e69+D;)k=wS<Kt_DNJC)!9!F==zSfAq^dWHJ9t$f~4occk+x|R`NgZ3i&0O=}
zwmf9J`M9dx<N!c*wpQjPoqAlJh-TJnlru23@(@?mW3;JMk7_x`C1k^DJmZj3wVRYH
z$0cUNX58MTBTDl&d*@<?pMd=ku}y88NE#%^rEJ4|e6v2q|6l<NW=`U1R1?R#h`*DW
z;MLZJX`p?+Q}N2Oe+lKpr5yS|SmISdJcq_GBZB+DGtq2(;11?TxodIh{X`LEccqk<
z93j`Oe<!)ss~#(-Xt(h=TD`K97^g%{{rAmDE;&D>53BK%J2e9_X~Owg<v6V1uFXe|
zBZptwdp3*(Yx|kx=8*QK()8K1{sUH%RujU<w-I$TM3#7nPx*%!Q!#!fyFWD}hL;Im
z6+ECQ50HxUQ+ZgMm8wU{L!*Q_C36itt<H1|sdE^j9-%l+bCj+6!bRw4p*J@-Iu@eO
z5E!|e89k*@vEk2={B`2TuM0gPSkv(gGdhUvm8X<Cs;nwgMsJM=Jf~OMUq!i!nj|v+
zl(99ZrSovj*!^@34#|fIUJ90c!O6!y7tlvPlW!Ph#Y4|z#l=sl6sG($T0r0GIZE&E
zbsC}SXSJK*!R)0xyL&J%nRIxp>Y=9(N<DWWYoqB1Kj?>>$evX$52u=gUaArHL%z+t
zinN^bJiSTUlX0z=ecK2IHuUE=a7rKr3a`xwdnKXD&~7(z*1b)SLz16U0Xut{aleGO
z>Unf?+NJ!&`kLnKcCMtT>ge`TR;-`j`rI^wM>*Y6o*(j0Cj-3d_E&_7@`R!Y<rZV@
z1~h*?<>S*!xG#=rer03a?&4dXaIB{)l=JXe-x6oy=3AtK-V1}@%DSAqThXbHU1x%J
znfW#>OJ-W%=3o1yy6IWs*L3$ZW?{>_x;!G&ALFUx%UtM>)o~R(5bg@@RrdO-m=|Nz
zYOPyU@l(5zDj#=VUj0=(Wi2EX=n16X1sAY_`sQ<MFw+ZWzy$t;8|D47z>c=RLg&e-
zcgsf%jz_^)+M+Whr%LT1Hb?dkhtWVN6uTlbr)Ic2Dqe6RWDK5<Fv@wV#))=(Cl8Xu
zp(5f2`+z~D{$OEGM#F6+br-YE?Ff8FZa+CuqUAP)kUiUHF|`5CWp{||Ms{rgk?0^d
z!<OStL$?9J=aSJzK04YiKlW{01)D09f<}xJ`}b}O2xHDJ5#dKf;|Zc7%mx*}TwWtV
zvQRj}T5!k)ast}i(@W;qubF34QMh3f$OKUcbM@L6;f6VkkK_Yez5krhtK<gyvRFT?
z&j}4aS`uDYzAVL^2M+k`OV1Lz@W!9m46zS7u|Q<>iX{acfN2@V(`jj*j<82`qiTM+
zUu+F`_2l+nPVQ!m=+5uKb21HoXB}}I$N7Le&4He4IhbwfD>B4nz0XTSdsv5DH@{eq
z^3@7RTX=A)gJkEr2xA9)v-)FzejOAumHaT|tK1GPp|F10`1IWliEq`yyV?4^s$p|k
zix4*C(9fY3yU3ZA(v-%+2=eHA6hECaljPLFQ7h6@118^B=(g<OF^Fo{hMI&<x}}+o
z7-e7P?91^7gc*~21N=H@b@CDF)3um7U*>6D4w9fKcs+#0>WVW6%FBTR{>=Vcn%^wy
zwcm={%>?7oKc&I;1QcE<9Pl!gvQM9*hmc?6m2v*uJVB;Q4<CmJ>cuO{#X**^xuV9)
zDpBsmGbO0!KY-q~wV<2aM9y}D9(pI#P0`VwMPP;_qITNYF%h>gGU{2W-`CDYisJA5
zy#NWS*7pA3icT|a+z5&;VepD|p6Jm6{WwthoVDkolfmO9oa<BDY+jt+dRoIe-d<2j
zva<N=tgf2F$b^=~AtR@*2Y3a!S;<54Hb|%k>4$p7+hr;+FRIbM;x{t|*n)5xh&<7S
zYD}5AC9?Oiarsad3Atl>Twm4)4WB^HsFnXzgR<A=kNIHQGM%sf#M7QJ_QyYO`<Y<L
zpYQ<tQ_;*HSiPCqgLVkcowX2*Wu`gI*q}UJAHxgX`pe>KV<2x}!yDc+9th<u(-!A(
zT2!N-T=Y9WmcYYEFu#aU@r%*@B$rAc#syw4S4!FE*gPt@%}VapBup0!ez~q%#z?vH
zNj(*ve|Ix(D8Y!4cZ8yr{L68wuQLBH@de5ssxhivu=Mgx`AT-Uu=Ktlo*Z6JCNE$a
zMd#0MAO6}+4d7RApm{o%DwAiKTtOIXTXB@v0YB+okQFxQ`encL6&VTgaI7!H)4=9F
zl4{Vrs!sV^dFXv*3Bpgw<B%RMyB_QW&8-UnVc2+^tXP6rbD>PC;E54;anS{{ycX{u
zGj{2>QFm(Rg8yFT%DgbTP4m`t>q!!5{LUV*HMs5Xv$j}`rJT`=E8`)a?I5*-57Y(T
z|AbgdZU)|8KrAH*@XF6f@8|(#=kGTlmQn;5<!5Af_<-}k`zTmDVt+9KD7;cNwu9V2
z^Q8O0dt<2hnjLOQGlXZta36%5eVk+})>@(=A>KSte?-w&nTmyRqb|Z0)Y2A|+?JH~
z5TT9~l3m93z0J?Y{(hTHDlj59P<rP*`@t9D^4h5feIGO{655=QFsbAu<I+p6x_`eO
zTygZ$OXaG6{*KvWtZ0S7_a^pYfY2KU)*IpXf%`=U7YniDe$E{pA5-8U{2gewzFKrf
zH0_OxGDVXTmvxymr}B~Gu6KASghDSoK#HQ+5={k(B~yKDB>o&-HJoT?eVUI)YOQYN
zmG|-XkAWKmhj&!jT`HPPC1O7rw6gJzd5tqN0+d14GYRJ#jy!Z-?%>ISp|}CJ4@OPN
z6(m9$1-`f8{_DY?*4{p^%6{*9@cOuh$9<7Y^0iL&9MsyYBYEKwx!sZ?qTgNW-ERSV
z<J$?)o$4L|g17Ul3ASD8`Y(S5wYiLstKbfv&<wF2OuJfaTmuB1G087F)%ia^)Ga4^
z*s5^){MWKC%tbwy3hy;KGJeqw|H9ty8z4l?jbf+P2afHHsNPp_m_5nZz#6u7uaIxQ
zS&**fw?qYZ<8D>6Z!Ps+T;tz^XJ0Yizqa5v%*YbjQ}T3QXDZ|_3C`w<Z`|5n{f0R#
zpUTun;Ds_LGEQ=p?P4W)(M<AG_s_6WFi<Z`QAsa)APo%jhNy&SHfzEg!cCpnF(|rG
zQ25TwL$Rtlt%sA=Aet3$rJ%2!QNqk>N<_-QR$!>tX?B(h&0>AD&<3x!SZ#@t=O!pq
zsozmepZ?y{nc`X$>zUpqYri#s{+90Wn=yXqnvH7&d%W<QCync$#De57)WBF)a6x38
zCs6NJ>lnB2hx8%@CrNespl1c_>VVehg1$FUyAtgd{t!wD+hqZ}n!8I$Ll$<g4C*!-
z(RA9?G!g^~a2#OO!4W|Fbn<EpIDPMw&ZxPj09<ur@Xnc*q`01wW>?{#CSL5<JCHl#
zud2n#+V3>1>0bqCUB+)wP_rtQB%pbWA4NQavDY#5msB{H;NfMBZ0hJdYW_RH2k_97
zqW}N_2_paj5&Qo)!v4!a^}ma@wEi-UyQcRZI4~zOmIlMd90)Cz5K<<Af+0tiij)UK
zuU(R6r4$X%a$_}Cbru!gz*<;qSNjPsj_~c%kDQ=#b0foHsl->mU8idHqtp8rYf3Cc
z;qLFN4d2f}#ryHwah@+D6TaWQGf0Yh5+2RUnFK>KqRDjx(9U%ql&^f3B<oyZ$m3$R
z))kp>m<)J<dh5XDQ!5#sFz!@?_xmOjK$Ni?z?I*d!s;gzuGn!4$WPmKW#!u${`H&$
zFihNyad^WPOxjg6e`ezfh|e&4t_K(<ZB+t&<$Y>ouMtkWXu4X8pPIuBTW@5!qHpMM
zK6ESs$fkIKZyMR2CEkkJPbDux+1qVx&rc@X4%Uydzo4z(siz7)*(2HDz)os}x<5F9
zNnL@9H-&g?w@Cnj^erl&o&3;taYk^D?Q;a4>rjj9ts=sQi>W}f&2vkz0RiB(zX5D(
zTEVa^c#24Ev|9U0|CHe&LdMdUZ0pQOQqwFum<*^kHD)|F_LXZzXKm$ET%v0uGPMPQ
zRh;?~@-EdDys6q4d2Sh|;|GVs@&3(GcsZKVuyt-7{hL{BopBP^MMfQso7mbLU!a6M
z^@h&b<qc=ci_Cf_L!pwSbxvSOP2(5z*w8=nCHg3V-`ML*2bO`2lzZp&_NfkGZl>H1
z3WkIIRm=-1gSdT`jt|a0#=uikNn=M;MV>)HxG(wNxoZ|V7@j!rZ2bydVP?UUv^CY+
z+E<rRZX<&CZY&0ZDs)JyNcQX<Y92hWxAvBM9fkVrtF9JqegspNE&6IYSC(06hXu<*
z1k3ND&nV-V-#AcnzmzxIJ|h{sHUH-Q&Ww#Kivb18IG`V!{gr72*ILzyQyh|0xH_ja
zGe)ltAslMDY<Oy<RJ2fNBb0@ePlX^Bd%a>R1aD1bx4bw=fPEP+MsLmuu^G4Fmrs+p
z`S4YR&?w%f{v!N5I_|hdM0;%HQS_?Wv>r+dt}N~evvMXjmhx%i7Jr@jaK3nKmQv=n
zp<Tl6b=GdJr+i$GYQOZ$=~Qh^1Dz1HXfmV8ccsw_b*>D4Lv9Oh&6{(%S;q<wh}gEt
zceY2Rd6$>s-(fA`GqDh+O1+~L#Sl|*#svu`%5zt`qOY1h;Q9h|FXTUsUs=2Ql>x3L
z$JjkUXS6szBVGURB13iO-?1jL!7bq2xI)|er@Co-obu-_Z(&Ue<DuBk$_H?Gy5oHi
z3eLIFEa_5-@K6b4>*NLk-Ti{x`FOQm=A4v9NJ8D%i^NB4e$HV}$k60eUyur58OwhW
z+gnhNmYN>nk+&vwKD9}dV(bmec8MWZ*KC(!9Hz*YL6dH;W)*XxVVqYhm8y5+&?3ik
zZY!-9PcEt?A#DTGJhLgkuO8-T$EEC=YL(U;DDlK|SXO5gf%EPn(b&d_eb#)Y_l4=E
zTOkBg!C^+4p6H~NRr;;=z+Lhz$++gTV@Wp{WFr9Wl8<OPZ9;K>R4SiwTf=>hzwnvE
zcNVk=xT47#Qv4KMX1NKr`&Zj}3@}&^-0hNvwEviBY3`>*JgwJ-;|TV57w7=+q^qlA
zevPG?OJ?29qxHB1v`N>zWoF%KmSyQ<Lh=5o5-!N9ry_y`9OXt+#HFz?53efQXD3mg
zdAVBn((e5#lasBkD6g!HGLBKWVZ8AUQGutIUqMcjg{v@_&8!Vu4ysYFpHxzGf5TLr
zW|$6dbAQ-KMak$b!jg?IBO$4ZQFB^cV8~1{QoPp~dz_3hWE4?0R9EF<Q||H}JMgO5
zTgFrQ(QUEJ@U#4fTx7yZ3ELnWZ-A^Cy)Q2Ac_pOU`1f(`IQIvz`u3o*HCxMkVe8Au
z*O#=1`koICiXZbd=(niyEY!RF{<P=uMS%8`!!-<;QT-il4U$)mcMVNhCJv7QJihqT
zK!^}0=M|!)J1Y_-;`G}wx(@QEssYnn<EQ4v`?|%>iDHX7yg7fH<W=(X-!aVD?p_Q9
zUpU`QftE82-UU67sW*%uG0NUhx0<)hUyQlz*5eRjVuv`JM+SmK=q(fz82h<u2ek%@
z9cih#;z3j>(FMVL4H3jQ*!pIf+1tk#i1~xWNxcyK=NHs%&&Jn4M;N&agk<DdoUMm8
z)7RWqRuzg~S<QCtxfiw5^Khs4NK7<CARETjE-`t33&ow~N}JNbSZd@dv_LM~_p2ud
ze38tv5quh(_9QSzBMkRP(t|U9VvSe#d`E00k~qtg%BzL<K>~^;J4mzoB+?|)%d)mY
zuab_{k&F5;tC0`TkKGU+M3c`XA3Pa_N=fJ>EAw+JkVA$KijP_0#MgzJnsZO%5;6^l
z*m4_$6Pli=ZTc!VnlTe-bQXB5V?*HFCv4uV>cpCKd<C)*H;F?P(WTsSWF~P0Z{dun
zQ_ZHRp7i0|@9Kri@vmyhwChTFeZ{XTW?G)h!P^~bvg)t;iFCDd@#kh&ICntPgN{%X
z53zT~^3J6*-lH=L14<BbFllVQYlBLz%QozYSH&E;CZQ-vN>I#-6Jc^&MzoTF?loh&
zN38-%T2rm2ynD0w!7pWrpxLZ3U90<5hiQV9uQ0;|C;jn`ZUD{qN9yjN0iikfj|?#!
z1qHL4Vh9yhszXO%i2%&}ZIga(fc!+#$fKUjOxc^r^v|MoMq+uaq|%ryN=9K8x>Pi6
zUil&O%xRI5>`?mNL$?!3RCh?})?_z*@%HM{Qrp%$ooT%ho<pCFP&}>RjmY0CvJw-B
zG3i&_I(LOng%q1_vQsj|6hh4iDmQR}s?WMh1*xCivF@E$n}4)vr0IeFC=<?JCi`O6
zdGWyAwU|tGgKwAUrtb;(;^quL+Ot0Rc9|Re;gi#mL;nZprYjR4yA7%7E%{@aNsjRr
z1nD`)%mBY|Xy#tz1go2k$lI6S*gnW1>~7Q?7pe|1To3CoIi;L<%t5i`g{tV7yv>U%
zEjTpXX_mvYNJkb%Tn8UTZWqG1giS7**-<KwW|=E~5^L%lgGo+qmh|^g>lUE_2KOhb
zC;M;y)jBw77kxiCQ|~SIPEKd;?vFEtk&L+WTSUI%PO@#@YYyH>^00WYMB2ex*3?gL
zNk@!Mhjm$Y+|3-FQaqk$dg_}-k-<mYu|?GAk95zUSmORcFS=%Z&Awk{5$d>m+F_UE
zyAWwd^2+KpAZgTuz2_J=7$NWM>$s4g?9-LAbR5a5pJg@gww9r!P08%{jI$n>J}NnD
z#@?{VD=?YtB=W2rX3X-^w`FL_mCh0yfH@Y>l&|B@cK*JE!HXa6KIAX`l6WcU_B_&f
zmC|}{7wVlHVg?(K=5*Ai0rA);*d+Kq1Kk_Am9ei#gxurO-)CvWBbdo|$Yb*6$3{64
zC?ojhhfhc-XqYqRE`MHQI|$Owcy<k+aT@cJ+t6yvUs=j9wv37IUVIMacd(-=d=CR*
zU{*i4|BWIc$cGR7hc@NAa#n~Oc=!-b;;D>7-)U#lcv4}hX=pZzWgqn~dMcL`t=<+t
z7bpcydPG!)Os=nxc-^@{ok2KO<lyjQy`-$Q&yhGQqj>Bz;D>NX4}I4JARnaz%*=N%
z*j7~FpscmQ0EmRlAUdyWNyR)f)}3TiALgzOr1wSVXMO1#Lw7Uj?wyaEgT*1IO4-C8
zODV0g%H@m$<V`{E+mT<+T-eLyC6f+nEI`pY%-No#>lw@tIE(C<EEvbO<DzwZUq;_L
zm|H`E+8xZkw?4eK2?+qZIbB}GGwi0U)Q!P!?Kq+f4A?{4&Sy)lxMUIH8ZkjXXGyIa
z-BS=PU)=efN8_e*-ihNDoI+j!tFT6nZ*VxbSD+?NY;>LF)d&_z%o=uEpOiC}rsjh$
zW>^hU<@LsOB;OW?{Hp>Y&2_ne3m#@a2u9U;S#<bONBXsX8DYr|<~YMA$eWf2L@<Sv
zSMk^x63Y*Ga`-XLEv!3rz&a9fNSAOYQ6D8{v<+(616gGf<=wHX8pxNE&Nsrmm7}*}
zimU*jLj1zf`Q$5QiADJqF#I845A@O8jOx&U*+V|+gu_Jwh?8%yAx>1j3Nmo?uar9X
zX_@yWx6W>P%5FOM=84@MQrph3oc1qp_g!w>udIE^pMzTpzie%T@fbsUMYRFs^C?7y
z^V@SXX+NZ6eflud4`=2jt$|=Srla`*4&Gg4b6N#$*o+NJd?!I7^kCWB^JKy050HQF
zSG+8xHJm@+%r8D2cm5Zg&;DUK@vm3bDpg%YG~rL@GpzJ#+nBEp!O_J1K(G)md1hRa
za`_5!ksDCM=Ji5?dbcx=^q7xvF_zoD=Zgf^d~FTtQYa$771!z9KhrPW{H~89Gd0^F
z8$)DxoRlu3yUk(Jc-C?_`)ipJW101zHnJ8BliE7~YYgjf6ZiG?hgLN|4&bF!&8d<X
zZw$ac!Cdx?D%7=%3!8PKf)N`|eHFE)C@&jkxl2bVtFPj&%V+?;<O|hUtiw3sKuiT3
z-iEDjfa*=Dre?)dj*F&~VUruREgz6|=k*)ISra$|kKbmfETtcrAI8#lxJpdo!iq%f
zJ=jZ7yP&&o%vGO+n66(|tese4S&0q9u%YQ4Amu|t_@3@}w^felNI##b#|(@qwx_Qf
zv9?4@mY7^R;v(U|362!1dQ1>nSJ16m`>eL05<#+2%@6APAy?}o0if?!R|`GFJ8Etx
zr+l{p`Q05vO#HQ<zL;0M*nQy8Nb(^p=-3+X6GN<kK`b1vDk#G$d-wv*`nQawYjDQL
zuo*=z?AI>Xw8ed1lM_c6f`&P@&C@B0aD4NM?~AijUPW|(fzRo3p9@z8ErzS(Y8CMr
z`>C(%y)8>_5taF{<cDs!NjBvwD39}9tY7mz<OLZdjUMqj7S#x|)Y2C)Bbb2$9LDcb
z)T(dEerw-k4`-zE)YcDzXeyld5xEkSq@mGpXz1e^cUcQ-$os2-Y{9nDAD*dfX+w9O
z#fE7DBpj}ha@lf^$o^D*FWBmo-8d>->qFXnNA!C<4oXk->aoD}`P@cq*y9E>Vn|U>
znDfzx4{1)Mm~{Gtuxb<x)cUL8edo<8!+)>rCQ}q!!%s#?)n|1}{6BZ%|4K0_`DcoW
zby-cFPE}PxSB%EsClf?mhZenpksk8Dm>|#pEybi{9V+?vXxp(sa?SOVA_xI~2ena8
zXm|{MHU)mFlax{?rpgJUaQ9+9GPgg5Q}CLUR6v1%FCQTr6Knpf)C+xft(KPWp_>~u
zYVeto4B-=5I{8LsS5kCY>Qoky3vRv8k1q&A!K#MR1n>!^I$~-)lioXBR{67a>b@ET
z<K7jDj5UBV2HBNZWgAuwoy)mc2JE=ZTp+aLu)n?1RgsKo!8V+(ylLF=22`d26fi-J
z^x!`JTnSPgU77U6L8I9G3u)_Wqk8a>q2WIB5&bBoJhPR(z{n#6meXX%dK%k$BVus<
zTzS&qX#Xkd0YgW%8OS+bi0$+l6Nv5#C$JCErg8`IZdLyfyp5_}?A<-?>pNNbPnf%v
zDJ9=>OSd)vXb8I?O=a!d-)tGn%IM~-KYzZ6PjLEwvf}^CmhmqN$iIHcsucUr^pa1G
zU`CvKjyUaS#KKn<@0*}#nD5xAgq(W#mD*IpH^Uu|7IaG<r#ygbx!8iCWgC-N7C#Wd
z)Z1KPRyi+mxwVgv_ch<?wx@%w?Z5y+kjxPmLQN-*2vEI=xf7oK)^I!kmo}xAT)HlO
z3cV`*Jgx=%28tF--c!IHXN%cH>|QvHF3_H~fu@VvCQCk0G0Op+vd)l;H;O28EI`)#
z;g49;6fOeftA`BOYMH}ML_j@hVs`#2rc{k2;U9eZ--Q)!TZ`&?DGy(}0cm8KQ0uIZ
zSqTD&+)-}ZK-6|QJmgVcWhxK#x#JI0y>Z?B3iU&YI!$$;>g`4KMv7W7a=Y-t<G^sb
zOm<S)ZgsdXkA?9mc4*<gz>ynYZ)Y!cgjNs$PlLjR$D!e*vX^eq8U5UAbRJU+Q=71k
zzK+krJNQeG{8^Iw3fXzBB<9m7&sowCf4vWXy@Eljbk@BJc7+#z;$^c9Ja$*u>Vy6v
zC0N*pimcH)x;F%?FYC$8czUU&$9S1kom`pGrF#`|t<$(u9OrHX>j_~6YFWc2tMCsM
zv=h(#RrNc!)oMFb%!x-@V~c@+HjUEnIAt0P>YQ=T?F5#m%`m2+NZ<*HLtg+%1ytA)
z(GtC7M99!RciXd1X$1?O#q*Dr#RZ-$qAS~?am@1r^o2_U_{Pl<-Xi-e7i^OwrRS3o
zzj|hYF0Ef>Pex!UnWY89sXT-HP%2l5nRX!Y?{K$zDL%!FIedN4e&mYEe2J$H6+y;8
z0UsuyFWxX+oQq=~aav$leXJW9lW8hnT%7}7Vp%8n0dxJ|?d?*rwz`cz>v<3X1VrZl
z1(yD*>Hj_^O#7pXW{Et~qlhZyf{!ZnZL!&=kez&qT0KVOhlm_3gtB*?QDqnT@&WUf
zIb+1oshs7j?nh>~mG00Fw!HTZ!HW4!stht$sL510O5fibm%qF{o>naj0^k4qVge!B
zFAR1t!-yruqp(jJN)E<PA7~xh)<CCj9a_6&zPd3=gSP!y)v#jZoH2w&&#7%!*E+H{
z!Q8FF9BPOC1iA3j`s~r~9~Pt8LcPq8)ra&QslW=I!k=a1I&<|EKk9F_6fx-M>*D-u
zMm`-zkEPX_jtaPa7@CI9K6he6`@S4}Xs%zn6j_`7WnFrOl?6?cMzecWW&@l~7@*w}
zFz;jm8GIXsP+!mz-7aJ}&HRwVn{e0OReDHwZy}_xGHVwyay5~t$~n3^`SJ8C-k|ov
zWMDlg`|8;QIWvKX0|8&FwixYvq^N-h!<KnQ*N@3y8)rbKghTTD1~-IZ2W)_0xfox(
z^%MRoNtw|dD>Bt}7U<;_?UI(`HTF!rE{ndpn6`Y=Bp6_|HRiLL9sL1r<;or=4_u0;
zFHM)Q4x+|0r%gY?H%dW)X^>yy8znvWNokk_f>Op*b3N4cdeIeW7(XrGPkEeq3x1XV
z5qD-$XFTv4Zs0_p6HjzHp`&he3X`_lcxsAu;1aa?3k*JwROQhE4x>epx^gLAf0hmE
zdNnFtb=nzDtuy9HtuqddR$(uXxm$w;QJ#1cwni;>7aZzH+mD*rx6yTKi&7i8@$2{K
z271cE%GO9nH9ZOGA@p(D>V74b5PKWnJn0%CyB;%*yLB}jHy1oA_KrOQx{B&_eZ3v8
z&NlXQz*^G-B3Fh4c+>(Ru6h82V;lp`aj~3iRuU`S;rDIcC#4;Lej1tE9Mp-VxJY)t
zyU2{Nk7yHkkm?;N<~?{jsxJ&*3YvoKKFF9V%-jjU+GZ2x+Ei^#$-DKdb{oWOo|J&(
ztRi;F7t>^By%rbPpLM!!dTqUWOZ5|H9|(n5C$D&$ZR9T)3IY$cBgo`0u)Afm5p?n=
z0Zh}(dyP^A{JY<&Ho3{7G9o+0+zM70wk@;!W07fu75gC1r)OHLX>zFy%iRkwX|!b<
zEvm<;hTt`w2h1&i=3C!z>=tvpAkEp@BJ%mm-vL{sWe!{<zWio2ZYfEYw<!Lm2Vik4
zwCT^_eOM=5x`Bkh$0YTOh?jH9SrTvuz7$8g<liInGtCAjj~^%n6cqGGzg0EAU^$QN
zL+jj{_PjnnMSXm;e>bPi6#aAkF*L%2+_S<N7!7x;^!`;)(0mWi{#T93>&+YB{bFB`
zw0o5^5UPOi>f^Rdpj-Y;XZ=mv@93_k<IO7Mn(4_AwsRdK-e<=O{)Pj|Puk^V@(TPM
z`gaFq=8e&J$cxt;<LwrN9ACDJh({w9rHM>W-C+!_z>R7{@O(Qk5AR;a;ve&~O(Oh1
z=N=?W5N3bDiRSoXzY`Q%x?K9NaA)~3Vrlj2y7-<1r>2m8{LQA*Fc7!sAB5Z4&-3|z
z;cWi9p820yaUG`r;M=ykv#2x6WJJTuruAD1%l4UDP`arM|6j7=PK>riP}a@a84hM?
zP0M2SgMnd@q3z_O>z63Pg>?Q3N}gha*FdU_5z(XYwRxxFk$#kzu)2)tU^p=mrE^J?
zgbor})at2jvV9hF)g#DUsL2eM&j7pqg_NY|Fj-X!k%}YLs}9#8G#>V|H0A3yD$~Hk
z3hN5QnQ4xu9}1a+#uaFsm(6U(Y!afFai_g`PKws455)5;`^)|4?l*#)QBm`G52Q?~
zQIW$fdXT`H!cbOWM+yqOdj8-Xj0eTpS8EUK!Q7WsI%Mw0af=PYU3ADl9FyaxRmY&g
z(M#p&ViS9FU<VW()fOQ8jmD=9j|jcEOHK4f;1A|b&@UI&ui+bH+Qc7k5j)RV!tT<E
zBNpLnB4kAq<}g;p?{?Z@X@BF};`>Cf{zH}?_9?z#`@bJdpP8eQf8}b*tC0UQ5Uxqx
z_8)<80n?lsClz=*kTr67?e0Q2Ucyi|B1*8}lg!uyW!1c^GBPtj4~U-U0y%Oo={L}K
z>V01wBl2hCp_S6(&SU;#|1ICoes4PvEOTgr1h5x`PgsF1qCA8x1LuYTo;hlSstN~y
zoQI<YWbx0_cy+EewT>uN!`7H*63pAU7TI%#onYJhCg7zzfzLQ#cjPN4AZp?Oh1Vb9
z=K(4HkzGfOPj<FdjF4D%%ZT}Gc37s;!?o3oLy0=5^3#b`tBi)%SDdX+({|j0I6q(~
z+?wTP($C2(WPAp`Rkap7jIsC^%mX7$jf}%{C*1gqowp5-e+LC?i~Mwz^_t@0%3&R@
z(h|T@fPX^!8s{*33RU=FeJyqP_QHW04TFMn|K7w_KJgp#8lu_BA2PwJi>5t)$Z(Z)
z0ZTDe{xZ{-vczh&34eFp!8Q)seopO1YXFUJ(Fqib!|a>&sENV=+aw$P@U2B!U@X24
zY^x<ya6gM*IMo~$I=xJZT6#*I%XDIj0e1Xpb8Or&Vhgp_;xkGh-eO!Mvu`+?=N5o9
z_VFjLG+dJpptm;F%#h_LmMuvxL+pMwpFz*l#kU?@{>|dC@A&HT$@!SH^Y^@~&k=|;
z_RBt34nX4;b@iLzPbK2TaG?EQz8QsE<ASFiHs^<h-RLP)vnhUt{p5%6&-!X&MXpg~
zYA&e*mwIT>>>nhj>C&?q;_)64Q99q<Rb1}xKr;oLzlH`x9vt87z{FYez;yOe$7#lg
zGNFVeCMU*jRkkYlqB=p#z+^7>5G%h&tGOIOLkWnfWf73_-y+Ds@gtB)&9%x{afoOS
zaW9G3zrY-e=@N1|u{BaA=(frO_r=O0$rQI>Z?E<YM(K707|}}RhrUFf;E-K$0GWkc
zvn<k%vKpgp8vj85d&`KO)Elz-S^62DxW@D!Z2=W?Gk4Q}4Ls5mg=N5nk$=XYS4Txb
zyoI){MBr<7(!d0Lg*R}?mJ8p6&B-#qfbE_NLAp`iBai;#75Vt1{p75|=fle%oOwt*
z=ogr%2;n5Hjc9qbg>94i2Cb$mMPGaSo5}Z;bfQ5B3V~EMA{&i7VBD@mRp)hUo$kO|
z2f5B2<qe+<R!J@&3X~_)nwv?NFC#PQJ)JHu1q=Dekz+hp%(i9AN1XBtc4J$M{T+e8
z6<x&nrf&bibD1Nkv7Z#HVRam4|6$PUX*L=R|3x@!KWu$tUMYjDUp#3c;WX;i_1q5H
zzpoU-7Ub98pDXnCxl+Ra(MtUnSp0QyRdR7OHFtG&{3j(pPq9yCKnU4i*Hm+UW$E!~
zHH5Sknn@%<MO=vvrPGm1G#RzQ*i-e@p70GsP$rjDtuMx%k<HO!E64kqsKyb5kvW|^
z5gX<n*Al_|L#UFN1zk#ob;+FOG!}ngy`9oi-THbA{(#DJ=}?Ai4ZRpHPkX)myIwMD
zGh%gaC03b3Buz=1VnU#e{CjV1Q=#?1!M!$del>uju(2tF+w9Fu*Yc$Or_{HeFV5Yw
zvJ5P!nRXv5`x%}S0?<7?15Nc6Zh4ife!`S6dhg2}z0bP#rgA%W@O_NLvwb{b<h9^q
zUJ1&B-UU=F%LMnmG1$%+Bz=tWUfJKa;{^Y9uA1O4Xl;JNFfP=8y0ZU@m(~1CTorY6
zcX0cwDF4;xmZ+_(pa~=Mr`XjaW42L7V|5l%ZeSuOVfV^H$g6*)H8n0|Qs6YLue~Vc
z>_GR6Hr#1Y1XI@$_)QQHUh)Wlp(vJFh}b+mJry{1Q@jp%>3)Lkwda#34(j6s7;AM)
zxW!T$naGWJfjP@HDo+;2^YWCsZJ0;StW*<BvWU%RVL&llt3!xN&Up-}v?^o1yBc_m
z&g<XPU;T92PX7&FhPiM76{75<zk5uuWd*Y~jmh6*bH~$S)Bp1}`gn0FyNgC{!WDd*
zF$b@krukW76Xghgrskk)79l@M#wsf47GL@<ZAwW7Yx$Xyi{MIQ3bmwtaig0GU^hY!
z(SICte5}ydf)Dk;JUWJ2bZ(-%<O-H8{1hf0Tpqq<!ACx&$_)r9J2s2dt`~vQFp~PS
zHFEh{V_=cZJl)Cj!zCVMT^6)u3CsHrF&9gZd6bWluY>3ayQuA0bvqx7KTl;|b=1_4
z_v(hIWUSY8t=f{(EmKWi+Fk4VBmIe6T!<d(;B3vgBn~H}FW^vX?vvP=L%Bh}y?STo
z#ZuxUpa@B3X1(7gdrc?LwU}=p7K%3l<#ruUCJs4_=N`=80j&ksDfdP910BYr?+xUy
zOPsCp$UodtN=J`83p!PCPjDUSqpF1bx0qlD)pNGoO1(M-&a{@3YIdN$)52T-aP1+1
zbMZ@&r=$@jt8#RqLZ5In6C;_w=#VM!h=rNF!;d&H0#w~onOYTX>x2^GBCX|-OcbZh
z;o`l4JQ@&q&hl30f}g1M(Sqoo6sEyRpem<zN^W6St)8TEcE-RA=-yM5;Fv!H<a1Vp
zFCJ1KuSS-WRwM^Ci$qJ*8Gc*}F<pMA0Fy4K=}!e~7w^R(#qZ~cHOXF(22J@*4h>eE
z)~`)L8{>MSiec_dh(*mCO`|tRA#y30%2>pxx`0vdF4mpY9-ybb5F1%tSpiM`5MJ+)
z4De2;q}enZ8*ZZAOE36$RFdb{oFDl_q^wU3snCCfO8?qB(q`rkZq{zz^2Sb1<}Ux*
zI(aG+4rreVoC-iWDQF#B>T6&+NFg!6Bt$AKv4Rw%#7ubsfzEPxyfwQ{lCX@nU<j@+
z7-^>nd09a$R4uAJChPaj=g=?k&-*P}FCCvlQ4sG$Ym6@nC+S8)?bz&wx=vdQ;9)Dp
zG<-N;nqpvF$oj=uCARnvxx^RmB`8Xim%7^VbvDZZ^9qwlq{eg)CM<P|u?+%j-lOm6
z(v;qmDyA~|KUBxiit^OS473<$#FUSciHcQt54-WB>r<P5*DHphe>d}5{>{gYQWktH
zlJJ5ubm|-ky0<%+hPM3F#a<;BE04o-tTfjEs}!C$WR0`-+cJtikF`CzR{Sw3YelSE
zGk`EJaNYZ=0m%I1o}3e1Pbia-#oq4`3w<(dMH+p>JWS?>v!&-}1E~B36;XtirKH>S
z@k@UH$CdZ}_9M1|VIwh5tO&J<mY`!)00;a6XRbY!j9k8|s8<jV0rLQ=zH6KSE}=F9
zkXu^9#rha;W%<UB2Bgy!-2Qb$6(rp`i+lj%C-OLSAh9;ps?Xy+uMNlK{yNDNX2~id
zevax!?0{`$E3TKWmT1K6veXlW0o8%ePFqk;pC^3F52%?jF!-UjzX2@&bDrn+XOX0T
z7Kz~hs7U?>u>X`AtLXpZm4kntQC~+>SfElJrAnuln?xWB*ASP5TphrHy*Ka1YaMYh
zZD-*L`%dqjD25vNRdC$PQkEJO71g8um-AJZ<rTl{$J?I=yk6VRpM~M!3D6v=v*mjv
zgl*PCBm20tR@`2TBWOg5IKlQ+C6{8P^5)}8-j$8xTP&%%w!Q#^5dissoTl{)O~P+r
z0Lq`p#I|Z!@kXkjF2`)M6B5zfYx<@UdgP?|(gLX9rl<lc5y=q}mKqP<c5Qs9&ZyGw
ziz~t-2R!R(A$+O!q)}l&fNYiHCb%=T7x{hS6SV@;y0}a~;%V{=-6WvtNwQFm)!gl@
zNcuQ`?9&h=Rn~DAXkLRa{1v1HhqGGoC4{_C?b&&Gqyc2+mQ)DdR_!XT(%u0|YXoW_
z2TzrGQo(HHlxUQh%${<#jqu~kV|Q6rERKxA=}=wb_>Fzu0gg3|xSUS9e7=5T0A(E2
zQZ#=@V2```+n3f9ORU**c!Fi~)2g~CTKD?jGVCXLy-sCT&Jy66I)b3Tl`}1^Jp2)U
z_72cmSYyTJIbpdrYOo|g-Z??&hVtnCK`;4(mYT3jV0wm*!SgK0;-A$JWiDgUuRTJV
zZlzzkO;)U6@D!c-yH#B*Nl1Ke{_n44xkJo48=tik|5-Z%|D)RZcYyh4>8RQ(E(jq9
zJQ`3hz>UHN=STXOAjRVCVHJfliA_>OASdXt2dtgjU!^y8$ahfOG#rGGCqIKfD-T+e
zIANora-L34va!ufPunTZyuU%-AUfjnIrk|Q^s3Syt+PSPGFFry-?(Bio*^1=dyVZh
zHKvbAPy^LU0cldyQ_RO7cGpq<eWHcXou+6d&_p@<caE9eW|=m03_TXgij*DSu~1p(
zIfGz2NR`|CK{do!iGgbfW`L{X5TRiWoID;Y=2(9fomKbndc$0^h-eqVMtRm9Qp`T3
zyTTJZv|oj58GOvynLd0nt$fJJ_%d5(vi52G8>r6oDQ}VSW644e6`h%D?oaET<l&3!
zr#(_<iEz!`-k%3#h$dnB?OI3z<)Y{E9j+~*M(y~`do=(suW7$|bvf1xWT^Qz@ooTr
zfnTa20VwgCZa5B64sUVG)y|GXN8zZpW92>DEX<1YB12*GdvcWPhG=2q4c-bEi}}IP
zp)~-(Kj)Vw^~<T?fLuH0J``u{1+A87gqonxFLWHX!YuYr_^-;*%<r2Nu=IN<xS5nw
zyfXvflZij)G)WFf#OK7MbLa!`HN@04<5}JX7g;Hu$<)-p=QEjLM*m^C!l;Yod%^wq
zTVqk=UsG%Nd8woSywv?qd+UEnkW1A5`I=g#uZ7th{Cy9h>8u&DIT8y@Mh*ryCEbQ4
z^W^^kO={lMRr*bf9JvAE|LLu>?Ayp+XNNW~CUe=2{f=3;FKV_01HiQU&baTW&vv_E
z!I9)9_jV!hQvs<6Y5;oXOz2L{jbmYAk#HCJFoF!|jdQq`Pm6p=2TeklWz_)u@$?R&
z_J=`_JQ5;LbG4tW^zl0nI?tr323<c03S<q3@c=}5YWYP}nrr5x_7%-WwYEU4^*?{o
zVYYv1B(TqO9Num*B9){^jOQ|j_xbFrHck;HkP3g#q3qdXUF1Xw9Y<4cWI!|pmknJ(
z&xc!=?^|yl$Wx=hn&C-M2`nOl{=@x4kO)z<I!}<aupGjf-3CyE_M;h*A7xPGB?&a+
zfM5}c|5oNqLop9RcWoa!H{NRE$%i&Mpp0>^+6J#&M{Af29<7!>cA$|t3IjgjcCc5q
z>nT<%<ID;hjwf?{KVZbZ@iA1AqpHkta=q`C_N?+xhQ&i?oJHoBsjFg7KpOg2fOh*Z
zz`ZQpNcxcQ*OAsc^0*f}VP2c0wqye3lUv%5O&X)$zMz!es~RtZdqnIXM3iA14vO%M
zX+dP59>U~2(HzJCHfvaLDV7kMbKHA2&MPI0cn4<{r}Q+?E`v_e=rD}TY}_#EPcAm5
z)np1uZFmg(H03mBsWYsU25$14FOH_QB6v4hE9efE8SQqW?sGi6b}m>p>?r)W5gBM6
zQDy;XS9lwFdE9xc@Di7G9@!K_eP`az<i5i9ju_v}!bSYc0~Rj}Y2Pq*Q+B4KDZzig
zNZF*3I97e)+uG+J<9~#6|J}$=Jf0t}UkEvDd+@}@gajs^gww&;x+*P^)X20+JL9d^
zLUaRS#$p$gFSS*f8GK-pYchwc=4l_+DCWyRED15Qb)yClRb%c&D_X&cp(2*5nWn<7
zMF1uEkAB4(W@$T1Xgu0HrwRdF6^!x5T6<HDfpnx?inB6&7F~W!49ln2iGs5=ttZpr
za`?w1Y)l`W!o)T7gz<e#E;0<^D{%pH@U_=Pc@0{|<O5+T(Z7H3#QnSl?N4yWfdK&#
z{g1x*fAzJp=HCB&lli1T7`y$ev#nD9=f>65Du=J~>M8ox2)P-|mD&**t(UDt@m+Di
z16bYI;oc!08FlNpLv%9)EiD#Hes}fBgjm%tEPPhFDP*&p&j00q%sFq^6Zi(<gmwz^
zy%k3o(2_0P&XB>2lH04_b}#9K^i(qTHM$Qb+gY|ELJc7KZA=uMjCNR!Pl;vv8-0{|
zc=PG9U)DLK!{L4Gd8UTL*TOjzK0161B%H=C#w!XQ)AO^=!1_=&gPp^AYPOAn=Dgf#
z*^N<n{+Whmec-R7`dNf^(nj&FlamBU@66#zJxh+%VjOmcc>)Wm5!)heZYPs`5<Uk@
zNKv=7#zDK1R_a_!zdB;G?&d!DUmGlkUzEZ(JEh0)^H3`VfW>t9S8J))Qu@vdAWG>r
z`5Bf&(Xto`R0gb#W_k|vQ#U|Emx<qAr1@SC2?pqw=DXgiCL{Y%w3YXJw#Kas*RbJF
zA5D?&-j6;UNL9rLK)MV8s4fMV@T1tQhE`o?>h+*CdVl#L4*1vYJH#v1dSj7jvx`q!
zoMb7z(>dY<g7wc`!1Q>DtSDZ*HFx|xF5zxnI8~<Ft=a&3DTm%LM);%tv^K9Q%N-h|
zGI~01ok?V}?OJ*`jvTAoW%%vqWJZQ!-C3MdKiN20J<Xe{x+2pYn`c3fW$<w+!lI*D
z2=E7*05tsEog=Yg8bLI5BCkc29Y*w4bYmx@!>9|D-XFWxzee4zCf*_2OLaG_zB9UM
zK$Ad4CVW9(Qb=lU9S^*V8D?&BZeh-MixN8FQ35z823&S^)A^Pn;>+F^rBv?=E>mv_
zzU!_M=CtJsKi@898<hV{s$;whX1(UmrAH2%Pi~z_CH)<Fj2#F%1z(@O&68}1@*W?~
zkxL<cs~9J<%~fuzG*E!7A&`=_r_ANU7%y4&as{dn{RbDhoo`%99*wYe()(xcj{`1t
z|G^{%b|yR0`pbFzm->*6Gsq82S`F$APjx_91vd2oHBC(+UGFvgO-}PS6sL(tg|s&%
zK{Zg07oj?qXePqvbEB;msOXx93BqH&G*{6bz?mAWlY%N=J%&wvhb5Ywzu{A{WBoh^
z#-^~Q+P#;463~5vm^o}O37h8L$Cro#2^SFdJ|yX!TugnLVWsG$_53%kfXKF7KiB8G
z>I>|DdNup6o%`3e{qGJDCF+~5XzJ)6n|}7m4(rI@*dIc^ryYr|s1<9#MKVCbSyx(D
zH9j}3Zg9e%I#{F_eSjFcy=5BX8;TkForG|Qu!&fjNo>0f_lzmebaglYJ&36J4lcTm
z_dNrSH(VXxUyql!L2~<bP$QhhBehAY;hg0olY`sHO)?ctK;YrFWJrT5Ve2~?Ce7<~
z(A!J6x1>#Qu4!AVjVPknHNxJbgP+hlXOLZ64Gs|pj801fbeH9Hb8Kp=oFg<RfUIN4
zb<dy9s5=PDCDpp;YbFycRqm|A)7j~(Jc=ONM8-=tjk*VY4{>7~te|r1>eI7^tPBF%
zTjfrc(lraxDU7S(28K%nsu%p`#p-OI@sL?~&HHuVE`ORkTeeqo<>daflxc3LK1J9p
zcdjbcwC^ZfoLtvqvx_NLZ$yynn55}vKN=_7R2uwZrUQroKUK-f<LLxZ7O`nb2B~lB
zjSnSI*o=-Do}L&MKT~U==IqUKE{I~Ew8QP3W3`X5>xyC6Y~?89GBZW@`(`ASzwQoI
zQp%-&@5I?6^946|+vNXa?Hz+GeVnYp?y_y$wr$&0UAAp?)h*k$yKLJwyKLLon)%He
zG4tPzoq2cPh#T>IxS#HgCr{?dJeemH%cXs^C|-ixhvig5i5+IsIvOQH6cBu#@I>Eo
zI%8U!VDv{r<(ja6AzSZ~`XbfZ$J{AWaNSHlWZdF=P*YOo)(Fh@xuCzRfp*bkbS+(+
z`pCC+Q)r$PzRZxftD32H0~1|=A+9D0rU{Vg>1ZqMd(b2#!vG7ct_>%~P>FM!1*mv~
zWEIm3Di`Gg{XXQ=hrk4ItvRlL|4|$4;X$gPD*XcqO}{Ty!Ts_a6R<0BsodHy<j&8o
zJIB@In=cjE&{?WxO$%(5>SoXNtgijxUA^cQao&s^s1E|r9tGh=Ju|?MJ>X}*rKv>f
z&P4aB7NX#gfFpDE-w+o8oA1N}YEbJ6O^BbGZT#{~lu<cKGiDr86+f3T``r;^D-b(G
z6o779z*~0(O>fsjk*$S#$nbN?<)I#adU9V|ID+z#zkn(|$GiJ3CXwxxj4GrnrU6VB
zq@AK%`zrXqpgpC(jL<LNwtXNHI(}1hk!5c=|9tZ5Q7oqymqqOHz<-7tgse6K*|<aS
zDeB8nSV2PycK#AByiCC(q?O`&<_3irA?6Su%<}9K#c`8-!(F`Vl;M_n<S3%uHVMZp
z>>fhqpe%^-P+`$DdA2J=K`}##{E&t!-bN*qL&R^Z!0V=bW%4jqdB#^D(hd!}it9T-
zNEpI5)?0KrA43!^Jqj#!+o)E*yUquyp65e1tAHjy-3b~v6I23;@#=MG)(mltnTk=k
zRz?@w=y%WZMV!Nt7<2$j@G7+?Zp#=W=Pu1eWkB(!W8k>u=*`XZ>4)x;*fi8JeCdmZ
zp26ik_k32yB+gW{h}i+!6bU_+G2rr-JKi)}bd0MqGM8t7HmxUanU}xqAo-x*1Lce(
z^n|pX$%NhMFpm0HwkSdQwU_q|NjKk8561t!V&WfySgEe7jV+4e8{!Q^^HWL7@V9D1
zz7UYEfV71Yl@R41m^mTP&q8Z9V$F2pJ+g&Y<#$k>r{c0(9>=Sgn@#9SGgl)dAv%p8
z*=ExdX`ENwM@Cb&pYKmk{6D-wr3E1(aJDGT`<0`EfasG^?hVq6M1Kx5b9&I>SKwGd
zamDRF>j|PSak8P@>nqDCWwiv;a{f)8$BoA4aK-dghZ`U4ztvhr(%Qsi20?=cHJChT
zP?AhEZ=-UaANvDEOH31u4zrvc{tM^8P*X!|mE-|WIHthMG_8#yNhYHAtA_&PAOt2m
zZ^s4fDl-kSf!+XR;MRbT_D^B*R9rE;!Kv2wWiS}dYO2nIKZ5rKkn?Nmjrql9!Rd=k
z#hNPhL)3Yvrg_$w7OZw<(1CE)$I=xJ`D0e1fXTdpfJZ51SY=(B1)j*|#3<k4>E4<`
z)QG-tY5KKDAmLxq?uP6pAi|X%6a*gFA%u>;a$qK(jdbBA{WOUjEP$dmCpooKUmFcE
z%_&_E6l;K$iYnsCdGZL(X=3UE>agqLkV|wQE>gI{LNICnt!hx=4Z-IhwUH5~jdrMg
ztpGT-zla$X1sdH<^C?30^(6442<<-<Xg_@G*Pu;x@-i>Tf)*RqmhMwjY#%#|FGZrl
zh;5n)vWShI;7lfY8k}!dVhOLR2sh*M;I@rm7=pA>vYhl=114UzQF9ZBTla>4@P$ah
z4n~RZ%{lV>d!<6@10~Q+k{5a;8=U61#y*1e_m7(h=Hq}|Benjxr2x@4B=9Hl&9^!P
zN?QxW-OW^6QCFV>7b<~Db9HK9riTMOlb&Ynczy^pl6gpFOlTcWPeB*G-SgTiaESKV
z*SnY{xUujov~`a*RuZaTQMgTdyp8o@uWy{MULKx$vi#O5B;_IqFCH(1+r-5#$ag%r
zKe=S>7@X0(C>`HrO*;N0otZMwyl|d_%*ke!&cB@4FUry!=J44F{+><i*P+hcA*Yv-
zU`qZf1bPI3**rJ2N&0ec7Mt~~`cE@R#4CQi1-eS^u~>AH2*(*U6y>A3NAJi+scd8W
zK>Oj!`nm%pI0t#L84lEz6OGNLVAN>}VH7X2d7%tK@L&|xrYv2qOjss)gmh_<iZLMJ
za)QZ=icy<a=N6<fi{V!=!)EqknZ<GpTTc-<M6b91rsW`~fI7NXvoQ4)*_>*>eze-)
z?dMsYTbEm37l5|QiEXB)>##YrPOzg0Nr0Slk*JQDO$x7rYBSn2x-S$n$B2CC+jMJr
zqz)`Fuv>Y;_T=U8{8C?eLR&r<iz0R&e1D`k=jZGbh|JNma$8!H?|a{eRn<<EV;8kn
zPOL4nd_0n*BM2ZFC0VlJl*cN2TG8Vt@e5#m@d~D4EB(VhY8z7HI$Dn&)DWS09yiFg
zdV#P+mz}^TsZ{W_&IZ_;y7-EdS|&=W5~YfIbEbp8u_lNHM@d64kWHwB-<o;#Y54k$
zFbp%?%#MUpk6VB59%I|!G~vLrn|kDaZk?+$6*wyEhwN^2jehRT*DD*3?ED9hDi3}}
zp1<)GfoxU&R@ifs=q2vgr;O8Q3-RZ_lwbl}v}ldrIQQebL-s#ojsLXU{{g$;7+$zO
z2AJTS7YlzT0tZNqk)Dx0AZ0{gBxq%<{H9Xb{8*x?p`M={C`718xV6TsIS>10-J3Dw
z{-i@N!T`iPZAfX2X}M9+4G{X^spsByA+w5smPlx7<0^N$sUeMQboRlbS;5%bu^XQY
zn+x?jVuOZOX%ioV`y@g*Z-&MMOYOvPgsY&&F<LD6zVol$>TCv!>a`MdQeK0#Xy0VS
zfEhvL1!(y^^cMWmGDQirS2(bLVUJC^+IOYj?6LJ*LLu_M2lyoa5NKs*M-u~^|4gR8
z$9J~aq8Oh;uC5m*7Xxy;bBT%fq3v>YlH-(xsqrk7l&;F@@#VWfmFyM9eWq@Wdg?wu
zvIC*%T6WkZDwWreAW5hwgm^&d15~ziYHR0>p~GufzNX$o?7F_TJzkIcb$_7OloQ5i
zqN1~sTvq{rsHisQXF^tCH0i?rzFAB_HhVa<WDW&}i^6KE1ho(ifKyeKKP!kP5uUIU
z19nb}U51%j?eySV(f>ku><87ujEP>}l=j&-EIeq~iU~4ng$WI>rSmqA#il<k;{ZOX
zo-(Z_E8CDa8&2sgpj5Y-HdLwicQ+J^^I<m$*7Z^!vju<M@{nh`Rf_c!=18mZ>#$e$
z)Uo3@^=sXI^lWUq;mkjFQtZ1pQL|KYp!8m>Ma_f%qzo|Ru>thz31wM}QUSju!T2<t
zv4^_Vk>E50w>Ke+Y&>C6S;CZGv{xA+KN+4RxYk&CoRR(Q?{o<(3yQu7v2r$v1m5v$
zJ8_CJAfw|0V}$$dpd}5H20+%xLw(7k@QpOeS1i%{rbDc)?PT?nRU*kBnv+n*&E}3O
zI!3A73V(!MOtqE>9o(csEx_(#XH_0iyD`_Mh|rM}&(_F5sc}Uk86yW++a}fAaS(S=
zSP4yMT)wBL!;%JOn7CkLT*(euKVii66lieqcL33-8!9)9!UtbbR8e|Y*$1!1IAwEw
zMxV&MD-oA+6#WBvr!w<6q4_N+oI0Mp4BCR*?kp3?4sc&g8Yd%m6&L`iT45bN5KWm`
zVD;Qfm^rG@$Jx8mnGA_2vdM90ErIYUfhG5#fs0$Xc<yh@8=`;$5>|^2aNH+Gp?1S{
z%wc<+*mlAWDFq<MKfs!20*6t!g%C~hJycJ5kNQp$@g&T6`aiZV1E$>a?m&D33DQzI
z>L8EV4=^RLpK_aGH|6gcRlLb(@0)&sU$9i7SWL}uMJ#ifM+^vw9Dr!G#(Mmy3{H78
z@|BnTtO+rw8fOSWnnE;$*G2XVm&4Qw%Fe%vya{@~^!-_sYsWA2_z36NXl$~u@&Pq0
z)-7V%u_5$T>0or<@$8#z{OjxC%^eze9VIhSWQs`mRy<ywqBf_LfqxH`rzreOsOcA@
z<k9j?28qQ?kth*IG^FM}rX-RhJ7mHLEaEJsj55=a<a$h(%q}Jq6<y6Ie=IJ7N7wA=
z4ihIN!XO1dJ6iak%cc(dKNqu=;X=Axgcneoed{Q3xfP=O+!#f60j?f2ia^97TA!ca
z2Tp)Nz8xWc{9_Q&_QdPW@Qnlr-)!`MppG-Our@KZbF?vVrZ+TjGGSw(7y9>yiix}P
zKVTsC@8AA|l!K}lj>zmNypA~Zu78zeAYCYJ{6TI73+4(fYGq9cl%>*Pe#S%2%Y2PA
zSyTW0)1<{I``G|IJp&%V9~6Lz=Wn)ynW{GmM>-fJFyG5f{5Pq{>yp#!@^bztCi_R~
zKoJg{0nrcm>qcn${n$Qs+f*0^)NO4*A8OplNtt$CvFt~aq8b+9T?a*y89SUL4pO){
zpUA`v<5f&d9b>M(jSdrzzEncbu3EiF6Kye84GOxAtPj;7vHgm!*hX40x)szWU1uPF
zDZYdBRBO1Sh>fKBWCTm~02Xx&z;@PmTTUP5Mx~vc;aY36*VK9XMav9xKC~L{R%45Y
zB4Ogu)zP7pO;}SPYf-Hjoq`H=jT_dWU0*`X+p{ccZJhO0SW(Yqndo(HPVx?@sl4q=
zUqRqOQB3gqJ6@5M)TBz-ZQx83rOI@YWN0QmKSJUllqmkIq~DG(C8KVrIv!4%>eVG~
z4ys~uW?U!K*|KuT{&g})A{Y=ZBC*Da21EXsI}*E&V4&^k`yH=&GOH>~70X8AB1@Pn
zRI3wQQePC?cz*IG7;uBSh!}vUB4o|Cr^~&!gWNyN_m;wlQ~TYcYC~Q>s3!kH{>r%1
zy9}93kF1<iMB4TOoX1;=^)<nIe604q+TcHQsEdE9rqL%o(#>*;W574x7Iz+`@j~VM
zRP7z5$FVu;*<ReZj?kVol}W1ergb+HlVw_ac>DA8pMD)NS*~O!R$FVPonKGwA>+6;
z>P!@X5MA&6L>*A@bQ1(t!JzsYB)rdlJR~V>9Yojdw{KNX=?K2rc%qZmVo{lQ`+LpT
zHkGww7Z$DzaSVLnOUZDAip5{pKF-3N{x;myd#OAV5@SB`JP6GpW7Jju6Mqk4GFX>d
z+`{xSZJsmblCTcX^qNtLKPD=N{vKBv{VgKRrm%VT7gTufGq7=ZZzsA0XraaX@7T{m
z8aI0+H{R1qxoo=f2tLu-J-A)wOad&r*UZPKmt$z}6K2>smvBp`S&K|Uvf;8~LYRaf
zRgVFeqjDEnIA)Z?oZ<6{ejo*Ug^XHfB6}QHN7b=+55>n&6lQ3{3{e3xk9PkxG|^7Z
z2(CeY{NVY%(xLy~C+YubXp;OBvH!G^{_`4Kqx$-HD+%XQF8-o;)KI#0MTrWcsMx|n
z1p%Q(AiyxMNN60lFvB>5uCU#p-Xg*cW$-M5Z#06r!+9tUd;txy_%s6kllYaLA=mfF
zIwLtx&S7S1f5Z34?aeoZ_v2z3zvPE?j~OFR@Fzy}wmB)5y$sy2OJ=B`aQ8;$f&&$R
zLT-E~lE*`JcohR?v}n|bmIE)eS47K}+2N0n8k`!+Z^nHFlwjO`&dlYwm^`?Cu1pLa
z6la~8os(;qL=Y-5-t}glpL;b)4xQ(<X*MZ)8k?6LnlxIX0gtxJ<3$sJX_$3N!lp|u
z&}D>P15&ypC4U&H6HXiM)N(E|uV^*0PulhNbR5<}(2D3^)f8<foi~xpGsSZ+in$Iq
ziL^~-r#Op$-Hsq}Eed+go6fE^C=xJ!yRhTpxn>a^(b!0?N1y1*xRhp~)D<dv?!2&7
zpVvkxIjad|7B+l0Z{99|+L2<8^zRe{Mr<w7OHZ3{&-5)gY&<>!s#*Wq4o%>*@@ij}
zz!QJQ%IaGIQR9Pr-ScB6IN)y8&zA{eF&}&O7w14-r@A(6VcmI2`{wPtQCaTw&xM5)
z8`9O7@|9o)F@4TaPr6db5mX)8j*q?wgH%WGXLKtDs6tC1&WQ8YE5zoiw~TQe3g_6F
zPCcc9f`&u~30Plg3{jO*oM7Y~%52g~6(W%*5t^<&jA%+CfL&xtg}K~$Fj%AUP-m=N
z6}#OPJLZ>XiunvokB+8hGK){BY&tuZZAZF7g<C^GIB$n4)1X2vSeQI<P0(yoYLATX
zIN7FS&{_LOD=y^<ID3)tXW8xqHBc=bC3i%UGE3+BenK=!TEYAoCd4%YquXT#o|M3H
z$BJ6$vPZ<!2q6n-^RSb$0n;1C2HPCa2Gg3MQs9o5s7h;FJ@cqC+r}hu?y%X$)nU4(
z+ho25V#8dC;svt$En|I0Sx*b$q#aveV{-4J!*<Qbr_T~|?^d-XWM246eGVQI*cJfq
zAfdnIr-IAH$8^oehsg2rwU{CBK$G2h;-m)f<VRik<oEzw*yS}OpdvY^lQPe^J|xF^
z_mrolAA7@~v}H0uyt^CojNeyApQvTYu<(1<M<3GsBCVl$H(kA@_F$TK)8IJgk&PjX
zx)_S56oH{aD&b13v7gNk;A+;@tpii14<godj4z_Ru^%?it`4Kgxyo13Roz}w_o?1u
zX1-Ns^iuS(7|m=9XKPG>j#>UtQskVvQa$}e2HP$IjE_y&1+8|M^zHUQ@U?=>0WcPH
zs*0lpEv-s|_4k4e#4U+33UB`z<bkY_)&Uv!{xc6UD1jC^l*CS$+W{FBdB%>yV%kxs
z>b`+EkZ_<DcyT4n^SsX~ym)`x!&*h{kIG}s)fk%)R%00bh^FA$L-H?#52SHzUomH-
zB;H;eD}3=??Sblifb#{_ZnMD*K$JhON_ICaTffRu-Z$p73=qIQTN!svW|LO@xLosx
z4(U&d1@<ewmEn8!7HaDbmZh15GH|8IDLxOxvM5=tdqv@ii&l$VM!E<rJEDg@w6sv{
zWk}Lnmc~RaG}mJR6nG5~uk#rJTS1Ec{Y?Y@@bmq+eN*eLJx&V8uU-r9Xy#`qvm02#
z6nj`AF-(VBGKi^!g&I(L$8$n^?bf+i@`m1U|0ILXXqy!|1$HCjbT{x!i_MRPccf{e
z1+{4g(vf!IV4+{N_k88f=O|CL5^`>ee(;2H)EIoR!nwcs+1?0Lbb?g`_K+f8wKcGQ
zAnpsw#MNb1a@NMAL`53WJIV48v?0G0J6&3(V;yFimua$v9q?9m=X0%U-R+ZDk}L2=
zYcP3}m#fWYFG>Ds5#(bSU%oEF1~G))n(AQy-cd-2(9$939JnJ|>N7ixp=gg^s7iQc
z`i(|JDijJwp^a>!jeJaYEc8<~YJ_*;l*1hz6_?ACc;OBTPIkRharGxnRawr!Sj7zS
zUT9Faus2#Nl#59OQZb?N`TjUs2x@E0s|o9vC#|7uu=n2b9Y0{K`(M;X&X-pCGvC>U
z9sb7;ng4&;=HFMn{&O8TsjjV#t%~vqR|u0P7ceJurQ|6qbW++`3aSl_mVqQinut}q
zG`b%PBWt>AEEMr%)!Fru%U25Db}xuuJ}#uKroF)K_q%)WfREe)i9kT7qKeeY;V_eV
zYqi<#%7@AC)8~!OU-(%?6nlyqLs>E8&i8x(l#z!#nepcGr4eHo8j6m}U9p!Gl}b-(
zCxj4?>OY8@3WelrFGC?wt`U<6Bwh4S7(OmRJyL3psAUZr9d0v1<*pe}u}ip*;tq8S
zRgAIr7Ky{}$ziza9h4EBfw3bN*bbw$($u=1!{*4DZDgKmzY)2^7(^7Eu@LpgptvQQ
z2`M^Zky&i%0MaM%*F&Ex+ZC*YHN5agA+p&@<9i&lT*p=|ImFtG)oQX^<`hVv5loja
zc`s&quz}fK8i$*BlzY0X7ICyi;V{fH(%EkDw6v!2{s~pM(h|zD?Dwq>>MD5+3q5<0
z80qeZz&B~yr};QY>NGdW*Ug?&IEajlgbi(sfoU}k$_LSUi}5o}-S^v2#B3Lxn0DUY
zBnFd$5q8?qFs<>#vC+<SYh!cgh;j_qWqo+XQ?!2jlb<(rC7yNhR1XWP31+<GwUn*Z
z2tJC`7TDQ^SJhwmke#af$pzv#+#bD+JaG$)4TyzyYkRFohb<#2V_YXcQG4`Z)3D&8
zmnB-Zrpzv-O%2Ad6CfVk13*qe0zfy@<;aA0O3VT?JU8lK-P$CGM{m=zh0|sb(^!gO
zcx_fB{~jP)*m`nlCf#jx{6L6BG|ECPt(Rl5D+aWrI3kh8dx<Dh8ugL%)bNfYTqT1R
z&GT3k%BPb}A5aPYRkem%S6(m$(O<Bq>`8il48}F*^6G{TR(GvxUNVPNTNdT*@8B^u
zF0!^fs!ND~o4Pg%&fa+@Dbplmr5-!#HThevNyb$&pkyJI)nRq}Q##1}+u(tZ;(ZoD
zB$QC$>>=NP{?TG3#vc%J6AGwERl5!csNZ1q4}LZ5F@U77I#IeV*J&W4Xd3jrEDv4-
z0DBz0HPuZzdD11B(6@DJ*TsP5Yc}D(?l}OrYdYcaUGLdzyiIj`*?#cn*mB1tN28Ti
zty;uJlM#vRO*8kCBKz}#V4-p}V&Ua(3Q<`6J!C)tct1;jww9%N5r}T$8w_?Ky9)o#
z4ir8%FS%6msw5z4L1-Kg-L=e^OTE*)l=G<c!pnJ3(a9E?CW**57DlxlZU7#;)41f^
zVm!r(Qn0AwH?<DEG@XfqzK_Y8I}atyQ`FE@R=#?iXqdDwJ7Om)Y{HI$zBeRI%PDHU
z4CV<Bes-=JE(AESv^Bv<G^z>1pT?IYq5O=2y-!e66m47k6SV$ADKatTDko7_LBt5L
zoy@rDxDHupbNc=n$*rF@_itUu?dCzp_6U9p`{p{pWS%5Zx(%noyp#UPGc+3Ja4xl)
z3tRqIW@{DwKd~|(etN^<cL^|eiLh5T^kL!z$;p#(nJI2!a*;Z-1-IGi%O0ygx0%k=
zXsoFl4=!tj*cCQB_J!rMFd`?<kw%N32$$%MbppV+fI-?MvmK$MTvERw9h}uQ*9y<d
zEN{~AFXI;4Rsbf8&5bU-g*EATfZfU65x4WUSNIUT(%DX6NBq){3!FG#(85nHPVVsG
zkwCatV^19U*g~U*Gh!8Z&msW1g<NH_Y2Vm<5b9^y+|32EIo<j!fyoz%%N0$oRNG?y
zu;C+$++0{)w%BfIH-hdG-74^qmiE#{@2kcKy?NA-cO`Z{^9lz>erJ=0&xdToJXt}f
z?45ZlA4dq9p-V)nbp+z{`W0>VWUercaberY+T^VVi1xu9i0g=1YS*jxTnfS}<VGP)
zyXe{a{yeVGR+0H%;(MN#YwSF~66@yzlruK#zIhJO3fo(YAJa4E6oW4Q`CwdxrUq$(
zSd&vT)-|aX!3l_3u&B?4@bmR0s4yX@nFU{Fl<A;sVhBmoisx~)g<#^fo3LO1qQOb>
z!xhp0){B9EXAYzPW0~VWXo`%5t;zp=>fNZW>4fr~P`K)y=T1^FWC(02q*G;*9eVu(
zDZ)UIfhZsqMFz9erV9HDz0OYt20&3#N<(+)Y&E(RO!7!oO>-CW-EVuhw&`nC-Y+jq
zN0=nVq|0_U*N?J4ypJ|UtNlLQ5qq*YDh*iSEZm{QP;JGOXeePVQg>Ns<S=U^OKVtE
zAeUIg1|2nMG+~Wu26xRzb}V4&ZiVKMC-#z3XRcw$s2PiPG<iyPJzy)Y1I40=4fECt
z4E+=brKzIJ+%*TOq4KLN3T4#=t-AM2k;JAXqEf>Ui7z->2U6plCY|%Q>Lz_m#~kM0
zNbb-;4W={iQ8pBWMjVlTcaSKnZjP-0*e}zesNh^@HReV#vanGHOvj5dYZ-^9oa8gg
zMCQJ6m1>rhvy7x|v5wX*BQSb}|GLH`l+RT;FHp;&)y$j5qV2aBleCmicLGzLFu*G8
z${*uV>`zCa*w(doFx0)?I@{mNKXKC;``jvhX^2)rB1T?SNis0nnocVY!!Yza;<!O1
zj`LPWL!EdYtyK-YV0e-ySv<JVgCY!T(W8lJcQj;DgB24t;lXzkx`&B@54Fj2F2`=?
z6U0>qVGw1tYblsab$biz9jPkIiDrjpVWP~%Pq7Ip+4qVO&(2wqR);=X@Cjzp9QR37
zv)=q;jBB?T#3%>}#%-~z7m@7w0w*R^-l07D?qHB;J{&Z4HBEnTM9AKcmy^)FCjHF{
z;jY-eQw0^<$R+0(8Y-#%xdAotPNy%V{Uhh7t)rC85}LN5KSeDwP-MYSP}~TXE7{Jx
zxf<fb4gK+gFE@YoIA@hvvD?Zy(k;rYz~M}3f=Z342e?YdP;Krum)d9fZ0D#VGwGU(
zur;9zp=h${wxv-HEn!0`c|Bp*1ApIG6X`i<Tm%s)=ptCA1G~wiM*dMX{c1ce5dp4)
zwF+b9o=Zm_F#rwMszz-vgX?n00r#v%??`2<)Cif4`D5Ix+z7hqv;}?Lc_XA*C3EJ6
zZFANE>8jjae)#QKU@(M>uWy(uy;g9ToI`Z+^Zgj%W5xiIpZmRcP7x#_T;9bSFdcUF
z@tgM|L(?Mf4SKf&2jS#d3B-~8QqlgbP8D%;t9m=^%jE}EX+{jStEOhp?wt}H0m`42
z_?*qoLFb8DpCWm$&kXyQ*k|PyYImqTr{U|l7z625`rN<_w2Z6XvyP7~c&jo&n80Gu
zYE+fmEQldDjwPb(FDj+i--1(a1y54h!@SfPE0=9Ui5L%*<m`=Z8Ua|`D~r?z>Eq+@
zCXxwuzKL~8EQ#!$0;<)i-OV&1FyY2Q5>D1TI<}Oo<=E1UQ1gwP*}N~lFl}OE1C{R&
zjaaBH!6giSNRF8T?|URWT7+ELUY)6|8x?}qxk*;o-Pzkdpijlm9uzmwrKy4xn_R!`
zI~x5*VDgU&!(FSQLhXJ0$)XiU;TG~j_(9zAg*PQhCAQ%#+9bJppTuOCHX)U>Zm~-v
zzx?fgo~ll!CvJkGekf&J-hr-N?tU>)3S|>B+P_D)u2FT<TSiE|3X`%>8X}<{P`|h5
zD7b9%G`!rtNsl?rfX$*D_`EG{O(pnQr%7;BMx-B#<omJ9&*z!2IGDetC*r9|985OM
zER2zA2~1MTyp&iyK6}}iwGeLj-mo-00<J+9ARS9P{jILTg18D`XNyM$t80r1r5t(X
zE7FNzXSoN#o2wNbq*OSaUYfUGsaYCXMfc+j9p%w$h`b*s4v6#Wok>Z6PefpsSu7bN
zlSUBA3I_HKz}SWY`6e6MByOuCX~=GfzQ#BJM>kZ_BR%-W64ayZSxAIZWV6AxVBMHL
zAbVhdj$ncfK`oUay`XKDomOQsn{+PEw0yqo<@huChKiM<cSv39$&^(dLLvD<TU0NR
zOEWV!`nY}KL+o^ThnU8tU+Mz=mrcJE(CsShX!H+YPNxMTLVT+dOUC}%fU;yI45`;6
ztASc*^uI#7sdRlTy8pi4>+lP?M|M890drCU;FP!|HNzX>cWRDT|AZOC3q$RXK+F?v
zAP6iMeun>}bTe`l>RRT`qSKru!H@8q1#yiUhHn4ioobf>S`}KI*_R;&wWW}K?K997
zb1o}5WVu_JVsErgs)A}ize<|8K;!QQ0Z#h0m#JdthtxbCkY|6$8@#UY!2Vwwaoc5y
zHM!rSXt!^AB>CSbIJ1AYja`gQ>HpW(&VMMlR7J!Djg1)@{@2=0ql(o(D;deQ+7UDm
zP|re|9?$w75PSlLC%_dx2wK;IGMSgr7rS-q*3G3`GLhYb)BkqZ<^6|W;*Fe@lE{aC
z;^ZZ#)8z)Y)6vwq_s63Ld=E~t5l8U6qVt|iqLx_^j-tI>_#KiMsW}pz1V#b_oq<qq
zVW2b&K+V(kMT~a(Ed5NCRqsel)|}P0%~rQUvt7svVj87BU3(P`V8<E76?bM~mECCc
zvaew6V(fPz;rL5M_li_lpL!fYlgWC#_T2=qJ+MzEOmUJPm`85LnLo2p22njh&8GS4
zSk-$Wd#S1P#o(IaZOc=@qn)lcia^~eJB@%T{rFT3CGMVF8#hKG<7Db-nQ`Ta#e0F#
zv|f{GsrSsftYWX(tAaD18dd;qiBq2%u}5`?2`~w@bt)-G-%y$_HK8%cu$Xr0<UlK2
zXN6oQ-H5eU?#mA%onRy>3-oONw@=%jT-%oDD^1kSwW%5lzQsZ4F6HK6Ujhpo0AaSv
zNOFTm0U;KQC3%@^5hl_XPfVaT4E!g9j932yFm~0~B;e_D2yOGgRk`Z;W?*0KvgT0F
z_?YT1WmO6$9_}GY_*q(V|HYC)@jR}UhlX6j2%7n!LY0O_kqA?(VW4q~ir%!8Fo)`f
z#%NqdycBu;i%2x#ou8wM_%RxXR5Oq=_(5$lt;kFS->wk}7(yZ1^&#@gX37dU$mTfv
zGh&{?4Xx0V7L!a}1u<&;PIP3f3Kx?U(_9>5{3+iRvC@@OjP&0}Kk+ir*%;*GJ_0dD
zF&;VoU7i~tBniZlS^$y*ro28vMrncI`5vksGB_n3L^GNTI&jd>*XVgS#8aQ(p{Xx0
zD1P85rj>A<J_ZK}a_yVkI?&SG`%=U<x@v)L-h15t8U}qRXiL=JlOtS=|ItC1|JN`O
z5&F-g<3<e&XPiOAPfztV%}Ozxx*A&IUJb@vqsdn(3FGmUAb}<l{CFGdy_JH$aV(~+
z^%E0oNI`D^dob&SSOkLYARLU}Hj&8DL9szPL9wxKhjyFNpZmN(4qe%uBvvNGF#JvJ
zB^7PuZKoBdmp&JM)7?Mx`8K0#BvTCFTq#yC1rhphjYQbg6ZfRNi(%AvEJ2qO3t*O$
zcer%E7ZgF+#S{V>0yFwrG~w*xCYk{b8SgE;K&Wm(H>*q=VeKdzy3wpHB{=gA)hOOM
z)_mj{Z^Z6D+ELDv@xA)oYjARg(F-x*llCH><5gbmIlCgojqggW@e+cDQM^ebhx^@$
zBJ&s?#aiAgpt{g{6P`UmwtCo6y`>3OT2F}r#^ly9Lt95bLD!A49((+EXi!3rrvyAl
zYC2SLfuz~rXgsB2;IV22c@5UfR&rbWLt6m}42Of~T^C{-%(U!-Q?7;;?^n?q&57vE
zw9>aL$vv1^>kqh|6P-MmR+I7xl&P-qb9g!F3zY#HF-NIOKO{*l$_-QIbkzy=F>JX`
zAQw;>CmM`!lbd@Pc{}MX>}|~BEYda&$Exi#<S{E<`w%!9=Nlp~?-g=(?xn=G5FL11
z_1t7$va)Dyz+^40b4GwS&~45mU@?++Qg58|`AuQYgv$yM+bNhEt13HfUdkI%q`c-3
z=g^ep)72H8T^`8KRsjjoG7KgeK<M&HQ<lRxyL_$~d!SI^$Re_ua6-<%L}N;Z>Cfhr
zHLJzwaH6p(F<p`Rhv0bAmUxAGm4QkcFh2l$7-5mdKO27|IA|qAb_iZRAiqVr$A3-(
z3=_Z1qLL)5Q3;Z>kRUMnf##uR&Nb)Lnw8>}RrHhzs$C-!<>EHd%tmbJCV<ydxIHVk
zlT&g!R%t+>9vNd$txW;c?pnRR0rOxm)sxQ3l0GB}^gu_ml6!Tfc^Zbnd%sPYb}(ed
zDMgoJ4HS9OCUYcWvRrq?+9`!OY<N1RQ^rBdmy2nqahR4esz%M}>uECh#s;`NamX?H
z#dwxXTO5WbII9NcLQ1JKkL!J4fr^l*GuN6LeRSQ<mR>Kyx?0ZGv64uIZpBUFRxG%~
z%&D-6+0hG*4|yn>S<LHMmurx!l4p2z@2q8)3t4D)W(dT09ao6|N<if8CtEnN%rABb
z3Tw|cb*NISu+kcdblF-uIYrLV$gZj8PL9tyCZkE*kxDkR<jc47*l-Zf)$sk25@M$H
zTjV%KO<tx?wP7{yG<J+6%yOs)Yj{s?%xgH4G$_+~R8kJSk0ewP&=o~BPkWC+&?FaN
z+?nByk2h&1GH?eli(u<aM}m7~-!5o}n^5tE$wQixmv6+sV4_CUiVTw52Lzg_;`9)L
zPM2{+9%&}<XT}%pLUa{ugQC*!k_JG_{ZO#A*IdeTEO=ot6T`{i9iNw>4J$0((@eMU
zKtL+;t<9wGPl+l0Tm4W{2{XjvHX}!l%oOB;*|8|v*ws+7SmutorFz5L(Z^DmfN|P9
zuO>A3bQA8pUoniV9ZMCECyj41Nc9{Lj=w7oq?_R6if5py9(ng-^;{dCvt2@!UaK%j
zwA$cLdTB=G8@;S49f4+u3VrR6{bu7EZ7HcOFA=&vb*#do%Rbq^k50AOL$BHqW`^=E
zHCTO<tnz}rq=4Ezc%|}==cjNZPGfXIt<(ZKr8#`>7Ei1cEF(cIK8Iw{N==*Hpo>;;
zf_zhZFRuCy>4#?i#z2Pv^_jn0?JnB8rSgvZg^<d;DlDo$Dw!<|#7xEqI^0s{HdE_n
zD)+v(!fEW@M$=mx!O1xGM*4tz+qLAV#Eui-h!<1YVB6y=rm+CskNQCF;gz10&}ct5
zXig;A!B(laKD#)(Ui;ZMx-}MU^b55!w%Q-mrUX5$-FeP8t+ZhZi##h?=T>p_P`10o
z%|@Jki{<;3g$=q_34a0#SbexUicjJNMKg`1n2xmlQc_m^^)7U{+N!;=@ET17m5PMT
zXU>>yo^#P=ebS4`U0*^Ezw3~;z%5ygc5j=iQ0^TtLp$6?>_?r$h1E<gF};}UbT(X%
z?sB}~T|0(@rQ22nKcd_!ga%NYo~t6-@#bfSMdmS+=YG_kT&Er{y*J*wQ9U>-#=pq5
z+8;mFAYF*-+>I{pb|Moy5V=Z5eqq9oVoHyP?$K~QRGZZk-AtIv-r;XycbIQgj{huH
zqt4?FGuou4j#$yzXT9ayVktp8d^aPO%|De{lI3Qj@HkG1BS$ZBig>l(_##OS<4m27
zs0_rsHSC*xxUD*U>Nu!_A9;(~q91#t_5DRj-*hfbxL=fd;pnRyi4^T$78OE(HTV+*
zD~6zj(H?>nT;a8$yDRB}GWO|&bl+AOQ@!ye#mbMAPK4VlAc6xwR!NjvXdeX6%9_hx
zp{7X81H~%C@(>q5RD-`mUX%KZKJSm$qk+i2%FCWP7m$n5B%+EnPmnT|Ja_2)2Wk=!
zj+&f-6M|}p{ZIru*ub~{F(oalEIm&+X~6&<wb3c0aYqOK{NwCIZug+6Es};fxNcvi
zP2q3$*}o}<Y)vz|>k_RX8F2zQV07aAwtWP6`%nE6ua_D<NzALR`k<s#$AePNbA;N*
z@e`qNqg>L_Z5V6;w$T*`o+;YT^lh7hC+!G^gkFD$M8U+{JXnifs$kXP@@EElqoStg
z8l89ViwT-FXrIN>5VwCH$mA&PDoYJ1Sb9s9$y4GAmn9`Kzf|m0KXPV@5}Xe<RDH})
znTf#{&_N(cNyG0NodZ|z!mdhM5dP9KPi*K<MHNV?Yf%G`Q+ZgLlx9fhH|B}BNGjVz
zKjDme*&?umVMef*9q{0>oSz0))1<1pV!#4<)lRY(QyNX^{kh>U09}_Fs+-0g*IU-%
z3U~K(8Yxi=@pg1MiBRg}3&MoUBIL!k+sk=I$8vz(BpaDVQ_WX1U%g5N;WjhoS7(Vp
zV+qF)ujNV1CLxa%19e3Z<95IUKt>|=9Ya~RgmYCDBv(KdTE+G?7H;;;PPSk-BPlf_
zC^gTHt%sd(Kijs#R`1Dm1h+pswqzvp3amfQwCO&xyYBJwNI+j90zE;YK7~>75LniR
zEL|co*J_JM#_aeqrrb?t+U>j}O;zu=V8g%6T!;rX$q)`KBb&Z4{snh{#1otdv5iG*
zGy+B@5Xd^%Ul0AkDrXRG5Bjiwn2b>6h!T%58IV;#S1bp1&P(wHj|4@Rb&Ge3YK?fc
z{a!GGpm1#gs!fPueMaVCRlFQC7m-IJyTtqVsFXuW8R+HYdti8jUSh>OXv279yd;W6
zZN{#e?T1k7^Ad=58?j9M`B-Awa@$IoH7oCl0sJC_`S7B7Pj!SduyMq2g=sGTW>T$y
zm!dSq)MiN84dx?D)>O|QQ83Pt;>-v#9vqc8j<joE+YX7R3)Dvv%1U!#C43A2N$p^7
z!78W(u%oa1{Fm0FGo*IJ<+pN)4(G=Y@&6Od>_2o%-?pnN>ZqS~P3&nr5U>GA1h+uI
zlVAxo$&H$10ox=2Btc5$*GzZuJgFV*9UjQ>OP10XWNH_Nf3+(r7c5x`Fxg<uwc9ir
zwQMT2D&LeEFSTsmJTASMM`LA>0KM|Jo$cA*w>XdZpR&CGC0jh4Ao`ekZuwZ?AREUb
zSa8M#P*-U73CL-#N2WiUId*Eeo1t1J!*JT~@P2}Wb}WbK;Mi}w>UGDUUmXpCXQqa8
zJKB?9jt%{N&h_`gc}BSMTM5%_5$+M^h>1DWr{f#a#-A30<=YX#y__8Kc`ihL+Ee!4
zo$q-2L65gTkmI0q77GmTK$$Zsh>kZUNX<9w^3Fq#HyZRYJ^FMFs^`sEF@}m;zzcW1
zTjNj6sS!+nbAjWcKNAKKeY4Af&$-j%B8TLw)I<667uq!{SgqhZN&y=|h>%n$q4r{+
zL475EDFlXLi*ZEWV`=eP#ll$eToLvMjJk(Q!8k3r$*<<!%jAzXJZCrLVD3T;Uz0NJ
ztCfOh8C7V|vN_l%h6q!ud_uMztbQ-rG9!vS`=0ybooN3)1<o)URk!uRb=bwkGBPx|
zj9~TUHViZpeSwH)<BfQ0FUjvpY(l;x@NgGRCak>dc>#~Jr^eBdmtB!v3cvb~wR5l+
z%A~ce{)%%PjkI@(7RS=5R<&VjIN>(Sg=5S3OnPCdA`*ZzSf^h$KD|+HQN;;Z*waCh
zTUe~}k{rRQv%Huk*ozCLK(<ML^6r$6bA}F^>Pj4(dBQe{r^c0M(!0$R%QB>0nlMDw
z$H1>d`y|0D@aJIXEZ*8-S@q)^HFb+_D^WR)LN(zOJIa-eO}mw}l8*dP%uiYEKT@i=
zU?F&HkouRb>V*^5QWcbMk8ERt(2zgPMtog-_(;vm+r(bE67@I{iiN4REB3YUij)~)
z2JM&)&GEN7-hgbQl~L+AGIfra2!XHSyiD8#mfG^695Y@n?)uzTllSR<*VZHYT@Tsv
zcBbHOr7y4xtUV5a*n{KO<No|ru%0Fq<u8s|>_S;A4hstG=tu3r+FedZ%pCovPJFsn
z+Bx+4+NI)Wcb+-SJn|pHd##6>9pzJ1->O^jtY}j2pTdID8qAA$MY^zML7t7pC&f6R
z9o|4r6h7h87m7lB!j1302TaTz;UUKoIlanTn-%<4t6ptn8{5O*qzVV)^HL3jp(T@0
zl^77<Bu$%%P5^5~mW~2-XcC?!3&UBJ(<&_8yJjo8W;s`^a*}HVjY}S9mg|KS^WjKk
zT}7>mP?&t9>oY4eWU5u9bz|Wm4H4=jWO{`8xa*i3l3<vK83J^gEEeX7wU~A$v1Abx
zL>=bF8oq>rA|lA?m}>HY7~=e%z#gcT<L7vM+b}87u6gMUFX-^dqKkO)CZZ0Iz0LH0
zlx7mBidIC`=v+>UNp=}|rXuQzzFmbj^5th?iFPe`h_|tcy29+@s584=9Xx643Z>i7
zj#Fej=q2&_7UL!56BT5F&2C79KL#PQiJJ5)LhbgriFj5hb~(M(2cE7w{Lw|c@#&$S
z146$mQX;Jy*#YV2+R&VB6GZ;OX_0rFZvE?M#SRx`C+UoKQ*rIA_Pnym3me<D<3|>|
zikqd@v(Cvyo6Du9WH3H8_4OHz3tl&`n4J_N-{4;n&#=DZ+S3uwxOzL4TX#$N?q@L_
z4^+bAgWcq!|5kP0puZ;(R@MpiY>E6^#d#zD9wF@`JJ5ZD4>(OEW)~K%Luc&7P(nK^
zH*UH0dnh+q@tob#f;m&(m8=G!0OJMM%o5rPU3;P^KaLk$w`y)+PRIWiY`ernL(h@F
zkJyw!aXv(2mtnbJc+k+Fq?f}GH6uZis9wT>nj~J~6id&DQ1%9E+eOME`Ti*!v-L*O
z&@tw)7cQgZ2*}67l|A!4WYi5u%Jcz7zp@%c<E6lSs+K;=PtJ-{$|43@tQqOFBO@Xl
zRt<Fu23q1dvn;qNlcC#oj^q)kUGhF3p*ux~6LjeB;s+)-@nJVmzJ@c$p#2VP@l&WV
z9rr}r{3-a4LX7s&JgYbc_xHXsQlb)urosdVwj7NZ){>dTAXnlS`chTiYB|L&9}^GA
zjB(tSup8vE4E`2|r1Q(E<l{O#u2>p25qy@1O5)Y0d67G1LK>4mzlIW)a|;y<f;%vM
zEJZ_!L&h)6-jHQA@p3J|lmG&!AL%^j<oaCQB|Ss$#&ZdlZU0JUc$hgg?Cs#fUrpd`
zt?}rSBDn^1tH_c{IxxdX4?iL2Y|a;0p?xp;hK|SYtIfRim{B;ZGP@&n!N;o+WW}oh
zq8<L)btIt^SZdx1LCk6zAE_xXz1QvFKS1|>8!~cqZ^0-8QK<kU*!!EYE{yF-{!vwq
zwp>j7t;Kqfpn|{c5v!`4F6;t^F`Zo25%1|0Cr6uGcaNcQLRomh0k{`5R$xv@*Zhr8
zdXj7UNRYWOkyp$SN*Q9Ih~=uliTuufQ8eJr=~xR?UyB4?3#C3=YR)_?rz6YSzkcni
zB^tO|-$0dh+E<qs`NnXCQhdn~tYgSWdle6HI|d+1#S{EwoZ!$+?z#stWy0w#f;Zfu
zoWLrk*-rF!WdmlVedR2Xwm614gv+mKxTk=U-=>B6ojQyaBak29frvuuk@~gi8&QP3
zoogq#p)UN(Yoj^Di6`)7xK<y5&XNUBsD!tPfvqWaNb9^-w6<21rKwl#5QNT%BYIa%
zPRSM-M@aGARLUH@$+KuSDiJ>R+If5=iz3&Nx31>ron$Z6!dy-WLnZmVZJ-oiO$yKU
zPoWP6LOzL9d~s?#@`;>$Ssr2g4v9p1L=bu-^n<ka8>q!<q2kQx5cb>}&f_M51Z(~L
z+?yr3eA9t12t_vpe?F|KV56=#7i-k*96q1d4Iv7}qjb?<<dnvH-tqQ_pEt$9%9z4N
zFUTXtzy-A&+;C_c!B2xeZnISCFOZ2Pd2#7e7E`y(_98XzZXnXnJw2@;LVOz{ahQTd
zIK4`IOyNYt_~W5Ow1KEy5NfxGa*FO?73pZms|OYu35u}}$r~pW#VE!iHC%R+4u7Kb
zbwSQvFmmuxF~$rCv}5H>;Op`Xpq!cZ_yXx=6dw9wK8%}vQ(M5;_q1>Z+U?=1hJ{;x
z#%zlLJ&?h~E%SwI-vHSXGr%LIHf!h7l2gOy!+8B6<`t1cB3L)EA0PHFT%wDYDFZY4
zz#44|$OPSS*@uGAqeDgQ{Mqd90~EPz&IT|K<x6mGsE%|Q;Ey6!O>fSl38V%C*agx+
z_qNmen69z(*WosFf3W04^F?&!JVH$5##vdM;}YtGxB<+&mqX0!8(CDPuy?@`=W$B=
zbr#f7W8*Hxwbj_#j^WOycE4QQ$h-7ZuYdj5E+<;Vkv{bIE~M0VSt<PAmy|BX&i@Z>
zeigBQzi;;+*G7v}wbj?f5I&dK5KApvP}dUm7TPa>cH_wi$v}V@9Zr8hZfCN=pAgZT
zIvI~9sUf_krhEcEBbrJGIWH{y1pHLw%uImk7%z6NbF=<trP<MOGrztqUitk@6&n<x
zr^|STQunk+REO1>cOEJ{XbT901WsvZtT>6no7gE1^Pn*g)&@g^rD1e#6+EW~^xh(O
zjpN3=v6k#pD<T^N^lrfQ@#3keH)qF=6tH2oAyyH^LD$cvSVj|OO{!urMF^@g+$R^&
ztL?rfL$E(5i?ahC^c?5~hbZM}r(iY`69Y012>x((^!~-&Ils0<V9CU7rN&{uR;6gp
zY$<QTy-8fg&bChurWZP~rL#dnZ@g0m#RZ^?fS7Oko`wVB1Mo3hWX&!z_p{#vQu$5s
zvrH+F3{+(}1(A~O3&7$CtVK1sc+efq7JjK4oR=q1B(mTwsY#=}ynFD@*^vaR>+ZM+
zx(iQ|hbP@<1~crxnP8r%pUMRxCG>y!hJFm~80~YNOf_|KiV}Zl8AM-GWtmK)kL&ab
z#Pym71yB=RST+fdlB$Zm5O=lW4dhXWE4A~#Yw1Q>=>nwTj|tB5iHIcw=$?IZ-Ia)7
z3QAhvJH;Q!bj&10Y$+1L^J|#-iqg(cJ+0Sr=hhGOWKx)8A~L8WV(M+T56&6AoXX6>
zYb^(;>K3DGJ_<3<_1EEkcaB5{U5nc^lIt?s?$E+uYBbq{lS2C(n;mmwHV6NN(Q1jP
zE<@%f$;l)hPe2=MG1DLHqgsO5Xuke&hQ?MiYQLwWJH+7OS+#rGq-<r`V8tYpS*45~
zuHL#kh1<q)stq{#@<19p7t)A0>r=PU`lz0yFE8A;P-}J}O>)E~&zw!SU*KAuYD_*{
zoNw7;;^EOAWf#&c2@nCY3Ta2yRoM6ZzOLTL>v#{6V{j2^<7$0pD=sqDH`j{wG-A?D
zw$Qfgn}oFHaL)*`+B6ymQ<890KfUtBP%55tuUfR4e;+}$L;USfI`_pKY}U4D{v^|k
zHVcz_DWVl}(jbErd93gWs=@bxRCXkB#Oz0Ohojp~^22W46VXDezA!W7n_a+V{V67*
z+tJO3A#RW-&3%&8x)ZzY&ZCx<_T;T@25+&Kh{*$oK(W_1417!gVOs>_p9}P>K6rV%
z_wk3<2KW+{H}by$53$qF_*9Fki-CK>>GR>dq1u^qaXlg}O_QX`$-osvEKwg1sY*?;
zwq+2GoQ17=BiERyU0F+!k*3P{Maqxp;}5=2Ud#>5Ym}i~-4So14{$qkNdJmf$bd@;
zO(0sUzKiAS$w<3Or{tdsNrRX`UJ%-yu~*(1$@7)^A_%}UBMF`r@q*bCc{`rUn->uo
z%?xMwfd8*p^W(l)q~d$1=>ENTE&Bf*Yee1ct?i6W{sV0))os*~#Zi38Agv)HlvGf(
zFV<e;LF`e_GYGH(Bv?R_&?}nO2r>J{>ag9|Asz>pe2?vpOMFjH)Gx}kPRlNuv*-C|
zzUG`>9nI<oFoe7~o(Y}IE+;zgHjXyavfe+R<fnh!?~P+T8vX&*9iiZ$KL7<?r5G|Y
z7pg)<ZdnPGZowHW45S2|XDr2tFccf3R_xdcSJR2w!KxbH1h0HXVApa$#vcU?kL*~Z
zcz}X}2Ij4@=-3hUGh{Yq<xffT@YCOW=+#`r^V!ir{za7&ygzw7r;U4p8K;e_!R4jC
z@YXezW$!`AsL$fwpqzo}k3L(~_6|EEYYz7`p{2FJW1;ECHiQ^PW`>C~XcypUGJ}=g
zAB(8(OWUm{v*4<8&CX!nVBF_W5bN;hH<7RbVYAcDHkvp+2xKpbFzM1@3ig%L=wrN{
zCssb0J&2cldq9jX?yzXzXJTv1L?@lDp;hn2i6Z6soiU`F{!ZoVlhCo?6u(mVQFxSi
z2b&sYUm_8gvr2`RMl75fjPna+-%g!W)MT3BA#v{-HBDU$<pEvR*j(}`B20s_l9YIM
z#I=U(2qnS*6&x_KC9~J49U;nPn{n^%Ymk`&q#!j~d)JrLvIl!ApN!*V?7~VRYl=L5
zE5OjOybVjSs#t<00|!e%L6u70#?y{O(?8{I$O}5~TBz%KOJpMv>xGrLuV^ta<I-yF
zF_fRmsPh|0N;CPwp_>cSlWq5`Npg_x_BGW0E_w&Oo(40*QD#M?26*uuiG94V&6}Aw
z3%d*R2(nV0ISMon)XKAAwXS5<q*~=sXR_VMN=y&|kLt&_y68Z3d~w%#h*iENF*RR}
zikp;6%vNadY(}Lcd^xnk(^mDo-xu)t6~NCMFUL7}OLU>Zn+O9AWPKBKcmuj)*<@0s
zK+J+ilk9~JWm~4<U(}<f*k=PEegJF`A(!(;DgUB&2zkB%Q5Civcf(H-{|!`vgJ3nR
z%3)9a!H0@B29>O`(OVaex>1Csbi>G(C!};k$*1f9pHuM+^i-Dq8v#Sk?q<k{ql@wF
zpf6MJ&&t9Y6VCbypmY!P(!nPZh4p0ZahEc$;87=zu&H%=sp|Pvm}vU2jP@@COr_`!
ztdl&7jajhNmxubK2Fh~HsIK~^*U?@^XX;rKSIc2KoYw1sdp^{tDOm}|kRDzG13k*=
zie5WpHRCyPlX1M5EaCJl7n`!SNf<(;^{8O_cyj411eNZCT7VupY<C=tYscqY6~5i4
z(K|l2lC@M1et)kRO!XH)B<d9MIP(AE?45%<3$w1#?oKD^*tTukwr$(VFSc#lw$-t1
z+qRQ0@11w*o0+;*cj{K1zs_H$>e+igd#}CDT1%7FnUE)rS?z4K=Pxt`7Psh6pfp}8
zWm}X)06VLgeKgD*lr3#H^&nfF&B^(88XJQ+Dc#%IqQ@VR!lu9kE3qEGk_@9^#hE`M
zt_cLOUl(m;+oAN#qjCLytH%tcGs;savxW!|qCv+G#az{{;mlpN`Wx%aXc!Ft>La)h
zpC*;={vC&eB3TGUz_{ng6Yk;PRBYO|I`jT}lxAb6OGyx|OJuCsmz(%OEbSSoGT@VJ
zzFcOS8GBe$W~gas*{iTPUn^{LALdWxE>h4#e%2<=%+22+0=g$S`4@VdL9m8Bth`Gt
ze%H8r|M{$5HAp=Fxt0)y7GFfk+2Q9nq<ub*x}`|$uuH;~r(IGZ0P;|d0*xg6F`<;v
zCBUgF`ta40F!ymoDavqHdb`r+aLZH*VI%>v_sB@(&mLF#@<_~?)&O--wOtB(W`j!r
zvQV#9tQZ-A{OF{?`R8Re54N(vCQRTegsDK%-)MvaOe+??o>5#Hr9O@TE<ieItZz}7
zZxLz8$M4!m-YyZm!1wF<GGiac4=<*`&t7R@e{ks!vHkg^zp!xXrDNK0b+Wx!?%4AS
zGmcVNf&LeLzh$e$;rw$vW_U2rBU?~~RtMN9<Cuk4B@m-ri_ZR|^JL}s&$$1lnQ(mH
z2>*p<<G=DtzKJD506i;POQUao$v-g6%!H}`;g$?_3p{})-KOJ12tkD1gBn4Y4&W09
zh|d=2<IjeJVy^mDHGM035ot<2OSAxJ>D>5see*%TAU{YrLv4~kfGs2~>#im`JnuG5
zx)?TSc(^lvsQ&H5b(92!6VOZ(gcTbXji03}?=zgf)Mz^3(*4uFD+FS+Kn4-C>1P!F
zU~Hw)BJ<*$hz|r@++QwqscydsPOd#*PHPAe)oyXiPNNi{wqrC@nvP^4lS1^(YPVRk
zOmdN|ow9f+qh)chfSC%SIJkuuC|=%MAbu**<b15S_PesHAHC3GuyNN{X_2^LrUTJZ
z6p~uT?$cd+ieP|jq!`L^iyA6Vh%-6fyat>c5NJQS?1$6$(|Z0SJYqcA0FF`?9i1g+
zT{dQF96Jf2{mqh5qycY{RDBA}WPCslvL`n8Sdm7m4i%nMwak3umShk=&)^;Zs+5R`
zOvFHfPP9}u=6G7V+#zh$roL-)W1sp+16*wCW?|*v=|n;NcxKI+7S_a@m)@G#Sfa9{
zJ&&Y4*NDLgHzQn+%uiBvEE-H3`~s_0y~chS5YwZ@re>)6zUBUx_9fa=j$PmjXGrX6
zsIGH&CFmG@F81{E5&Mc3a_IJgMrP|^4=;4|b6celQ-C$e!r?f@NBE2U1;R&6Y#7AM
zIL>5$p<kp1hVhseMQs8`7#1IcU4oazZMa*Q=2bm>OTLLR4Vsc<Nr427C`PW&T$N3b
z5J~x{J}uW%SfzhDYWl|Gh1bYNx?3m;4sc~K%$+|E)*|hchl)3BC#fGcC{|bJ@(X;R
zeJ1)R55q=YOg<kZoCv5f+#u~sIU*A*YhiC%6dV>vJWLP97MPA-bkpB0xfSV`+LIBI
z`mo##!+Bm|2n)Rj*u5b5#l!lxTm<*=>FAt<TTt{`??^0x&T;JG$?x@v9U{Z6gAnH-
z+gr*M_hEU1?PM44Ib619)bfzG$(41j4#YIUiU^TAsMv3v0<c^8PqDNO({17+^p=PJ
zkbd{C1X1h$&ZqV7Y3{$8<o@p<_b>TmZ?EU7;AHiWq$*UDkWu({3Jlt8;vEw}gvC*H
zMF!VY6h!QYHAjd85B;M-uL;7wys2^J?_=Q|iqYI6XQ!uWwmX^GJ<vboLbEX3*HSYw
zp223)<<NE4@^Sxh!SRE|wjJ&PRT3{e*A_z>f9Vep!47j;zbbqYufFiwYBo}cQLr*_
zgsX`FSOa2XC9?l$n?26R13i<K&`{ig(#h$ywgS4BnES7%u&8#U(Xs~dQOXghq0%&>
z&V0$ST2Gbg^q#?@QfH$=b>&0xF555J3<s^&W=zE*;M0XDO3>NjsV+-9jZ0C6Yo`@T
z^QmlXcEyCkPNTz2pV&}XxZ*j;Gz(t|@CIxbBG@M^og}kK2uI8bv+7rh>Mnzp)OcUx
zlXA0_<i$PEI(2_0$}=jBS6>O&QviDLD=Smupb?2JmC|(X)MWevIt1M>Ef}D+C8WkO
zE~)JdD+UG~4rUJe+vX@vvU<2t`n%D3O)aQwhCID7aD&E0w+7-qQEgeh*50mZ#x7-5
zwG5rHvW+s11hk2HQX}PChrkG(IIlq^$@NEo92)<eDX#(io|$If17oqB-)A&Q`k<6#
zksXA>FPATipx<tvv2=95CPdtO$GJWfh5DvqL#@clR_h_Wn~cmnHPm+Y3z+dI%Y%XO
z{16f%M(Z2U@@|V65maLLMNmIaaeL!zWp0db5jpqzs`0di4;h~X3(kcRp6+*m81n;3
z%7Ym^ko<V=fTXW8COri;&sk{ti6fl}TzuGlkB%oAF+f8WcDN!qZmOO^3+l9PR6;Z$
zrXX*2hmy}R2p_>%@?wAtfo~b71&Tcl*P@}#-RXvTA}I%5vjwU182Q*c6IN}L<il1S
zOiAJt%^3wsfiep;rn0g{P{?$iGbK&iMNi}l=-{_>`vtdDUfVDebJEYm35|yTNTzxo
zR4x(Jdcgqwr+_~cXEVL!cWwlHOGpa-H`B-eTYh{CqX^ho8|XRyZ-OXm*nEqCa3R=q
zMAdf^M#cx7H9^f=@1eWp6XG#Sq0>rfnx-Wr@TV-t0~FksHnf71#5wV9W>v6ZX+n~_
zkiiNA#6wsJz5ejz$&;BZN~`ZF;@0ecV;<cOU1J$&Z%;hlpTK{?Jd*lxi6t}61__Je
zj{mxHrC$zm0GcseJBo2oB*#-yr0~CeEx_G6mLsV<Y<B&BQ=gn@{z`82)OMgGv;)RD
zRTFGA-7O1^{=I7ubr!JqtJcw&?{CVzxVY|A#9kSHXIsVln0+h1#8re!LL%@9hua1t
zy50)d@5xEPr@8c^VQq!Ik=kvCdewPFx#o<6z?RzNf&>MdlY+7ZQet{`8uwPp<mhU_
z5wa00d5r8?oqRH$7)+tb;(pusf<SRx{*-A^ZI!4C6N?D9OQMgo(5VLI6pJv$RoiHG
z#W1d{B-z3!9il57F{anuySSF21*ZCk@_xY8m<EG%4#WAMBw&y=c{UXna-xH|-0nt!
zW^)ZK^bNA)+Q+<As=WfPg|N@1mT_sPbDzbo_CtQP4(Ufy1M#VTrZ9;di%B&nBh@E0
zVeXG?`W3sUI0x+wnldwyCzCJeIMyP*^QXc#40ff)?cdN0>9$%l3AuZ%>$^K9rI*5i
z`U(T<3E*K}3Uil`W-WE2x?ECN)JjH`sDXJw4Uf;(&Sq|1w5<usEEZmaldBU2=P#w9
zW*n4b(~n5*J0MNS%)qG2n_&|fR%pw!+BfgyNSbvRM(P0QDTwnmJq$??%N+ztvt$`6
zQOa~ubyDBXZeu;*-^n`-KFx^)wAYYu@_)@BELyB{^Jd(?E=7XjAe26&4Jli$Dd%nb
z0&MVjO>?81d~d#7(tx2q`vzhAAr8dMUSwbB?^C|2lH6L&eD7MrE{U_$2r`#c6!F<#
z9Kw9vW@XAVie`|*hDmlT>z|riY=npgDaUehtb^nV%GD`m@@PI905sIqrFl`X$&G3D
z62Dd)+nd%7KKFw{s>`nZc0q_))l5yP;oLOb=g_U+D*6pWYvoTK6&=e;Nr0u?62T`T
z=k$JOO~F5Md79>@OE%Lsm2t%D^Q?*4{dju;D&WC_RqylY`l}R`zrF0b&)p9*bB6K~
zAu<RA2$d<{@k!QmKy1J+K<UW~L%sOxBH}Gj-177TD_n#H4xt6cAgNy3SE?Tvd_^kd
z*B0)S82`44V<c)HOpInn^rf~1P&-JZ)cGRKqwP$4v)Rc?)BSC&uP4TFyZzHmbRQ4i
zf*^{1abyBrhwtYVT@02q8V0JVZk2~wNPG0xFuZd#8@im<kQx+i{{ltD4ee{ppqL?`
zF6wWeTvjrr^&V=MzCqXFGnIC<8?E$JJgdI|AaR*y*eOL+U}^I_Jj-af7i$QM9;#lX
z8I4FH33Y|HS~Y&Jk(gfKk?L#jo`>%Ymwhjq)^xWZQ~^c~5U@ck9Ug4?j4|_=fj5yp
zWBEHD^C?b0qP+Q+@r$!hJtX)i-!|72530aTRri1R1AAV>&HVuS@gw}Zd;V`+RR4vY
z{eN%!Z&f2F14mhVGwXlWvww<NYpsF{)yQ_7*~_WtR6If8k5&#A_`&rYL!?@Rva32T
zZK&mOze#x9DCyvu>U+@A-7Gzy`@0!y(5Tvkrk|&>ZKXO)x=dzlJY{Qlf!LxCC_Cyh
zf>DGbRE8KdK+$DL?j$HnTi6i?)}wP4C*F~1PM-}B4#u_g$Dk%hDI!4LXnKYAq&QRf
z<>nJ&(V<i&2YRO`TLc)2S4~(*tS776_ym=W`skJIzm_9EwP+zJC#q;J)tA`|s5YIE
z>LgXx)ai!?du)`0S=be^YHU%H@-!?+>9B`|6!e2s2WrYK@3qdNZ*40_a+EK6xJoUb
z%721}U>4Skb4$*;XKGi0F~7w4E;}tk<;F3U69PdE`O)`%7KJF}RwxC?fVw#=zYV{s
z-~5&78BDL2j`4o;+kO1NSPx%6ZSI?H+YaiY3X>OMzaA5gIJND+P(CJYrA{)$CHq^j
z*Ahl?!mc<qeAV`mBP@m7#6<9GEjf!@S`jk1TA{5<_$%3;G_^RMuHiT<{^8c%&d%D6
z?I>yK*0{&=njXc3IJ~IL-sqJ-unQsORX8yJ*PIqzt3jLGh?%24Oy6Z@rQY&eI!G~H
z)R-e%D<iS3P<VMkWFcKBS*28bl8gaJJ*3K)O9Sfb=PZqVJ~QxbZAjOzwBp}SoI8rB
z;H`(X7FqpQ4i2O)A2Nr}Ww_icnZaF97CYI4pY9C__GhpHE|(}<pl-G4UeK?hz9$^`
zsdy84i8HJ84Zx51^e(ZIg4=SA@sdk0xZ&99C*-x&JMTY1cK&wmxR~lq1#jUx?;-8F
zH0zEJOrfb67^d5D4!rOKO%gK4j-#iG8adBc6Ixj2GX+UC7|j;g<Yn$>l7P5BzF7G-
z93gImpg!Osi2;4|U!tzkiUOj*#|*L-g&;*Nr(pB3HG8};bL#smdo-?~cVX#l^6!G}
z2+Qu#7ffe)YMSLtHWo#iy!hGSWIXsq;DN+24RFjb4Op=sOk?QHIr7ixAb}G+2zhrP
z{vEIUX(OaK_^p{n_MK;Z|9?A%fBiAMP+rPr?%PZ$eF}b-u>p}3(VUg2p$3#X=ZE}4
zjDjMexcv#<D(f16!emLSI$!%m#P*lnHt2Oe?01mKlJ5GzaBwhOMQ&ldy+wcfZgUFA
z3V^v-v;=REU#i^r<lf4g9cVLr1E6>UC&QA2eZo3st{D@9vt1~ADdg#%2w?!yirbeu
zRr^?RH0_7TO#9dn+Bi;l1Os^N($@XlnaMJ`DPj{%?JzZX3GU;^P0A`R1Cw4S)Wv<x
z{#0AW8=-P<>O;RFewi1r*oOv(SB;a4XU(9hxlaTk<;h!L?K7l}xEh7oHm3o`pw-Y3
zOH%jI_Eqh~=XnNH=S`#4(p1bc#n=yl%18uhG${j^&nCa{l#H<$7bA4_L_iBl{y@a$
zyehTPy@cxujUW1yNN3N@ObHPU!WwkeZwa=Z9QS1iO+roG!m$cze_#b^zrY#x(Gta4
z6%n8_$jq;2TFB_nolsQBi3P1ays-Q9m*O9X57TOe5ekNgW6?bRo}+6Pj>@;Y)KxQT
zIzMFloWgKOb#&s|1E}5ZKYLIRxvOfP?44_PDOWxh{|awb=Y(SPlj<^<Snm)I5I;1L
zwI}W}UluICfc<-rOses&6Mm0fJ>NmX^4|r?cLA2SaZxa`b273vF#1O)nW*qjyOM@^
zp@tBjY>k|0Wk&(Pe2OU;Ou>$CG`cI^+(iRceI{9bea@FSLZBeRKkP~rB8kagX<aTi
zTAwl)TrRy{UT+wDq$-kS1q*!&;q6N#bA61CDp?h%bQ^t`g2V{o7@NMaWvf>_oEuZz
z-k(?IT;PS!8tM-^sAjGt$kAjUp^CHIM#D>|Rv^`-?S6ViM2X8%!B~3Gn)m{(g#_5m
zt+CYD-uLIc#E|LTdg#LnaKh?p1RQ#+7lFYVYwoX>IvEB0LQ$=x(ykfy608gAAKCp$
zk51<JwNzLy=hY$6+BXW5H7&{H6v25^2x3B03A0HWrAQ~5U-6UitccY@h}NNm^!EF+
zp%V%oqMThbCPO1BfqK2AMISFWk2}_PEy^(1Anv_I!bCq|UirZUTuwn0kvyuw6&FWw
zYu8mvU-_nv^=f)*9#Mq$im*D+TFYfz0+EbGqZ3up4$QE5sR(%~+|17P5BI=5f<_B7
z@96^LqR~&e(E)#JZnmz5Y8=IA$g+dT;TS`;t%fF)yRLqv)nh4|I)|?y*-JyXvfYYt
zub727$CALPnriwmapOJR{(6DAM!(vT$~4&3QnpL9M8jS|s*_0JL%LHf#6p8#*h8-}
z;WQeLs!dNBo;<`%(Kt$>wEN}sxrX!iviR5O@9AeSTWiRazR2;(9pa0p;q>Ch7+tt#
z`1L`ouuJTjo)3cGTmR7k(990I@<RXkp^EcgnJPIL*?;d$XdD=56c{9JOazU-Uzq7x
znz{YcC9hExOcztd^)stVEt0q{Uoa|A-vl3C<0z}m`GBB~a;=VNt=5pE3*Oj8q`Xn3
zeeAr#pg4h;ahg9DR#byOy@^-7nXxbmYfKz6RDjeBC}Ec04X2^-Nz1Je@vbv9v4Nt}
zhy;Y>bu0bp^X+TvYqPuB>*xjLM}T0jzXLh+PRBPRUUAohig}lx%Gg$JfHEsSdrHDq
ztj9#LgtdCN9>6yc-a^TvbGz+e@6igKJyh*}4UAO1&7$~DtevQ+qj*E)p*CQ%LjnKp
zOeN%@<hLbv-45P$*t4Xx9<{VTdnI=L2LJB++nclpN9=kC{ykLO*_+(No46-i;u;F<
zli#m9VY`~}x#+id<+hFD^K~=)duAB+d#9gwOx&kj504mxcGCH$T2D9Ny1Mb1<-HyB
zD|c7pSqAuvsQEbhOSx7p>MLI#_K53QXXMBU=_}YqBH}A~n`0J^hN2};Uoc1dj~pIf
zA%5Auc%#TXAb=kfEIQ0st{hxx-i~-<nUYayo$!=PAGniHnS9sScXWnp4H#XRLAf_s
zLdqHEDbg4oYz`gPGy<)bjG>FfZ9QUz;6yV?U=V)!92Y~a^(>u!H&t?xC6UwY)xHRc
z#lI^;ASQ$0no6H{5_?QCrMNDLYMRk)oz?95)|vjmLD|zCYK^&)aYlJis!kHUbYI{w
zCV_a`ooS_3a<wULh7HReZwnBT6tSkvZ7dP%9CT;Mj%RCOkxs#ARDke3$Dt|-q_PrK
zph^`@Y`mP5m`S0|B%H947#BdJ&IfD-4v@Q(ry?w0gFLLX>Bc$=P6QdUo7w22s;@!+
zDd{WLH(r!Bo;9o-r9w&MwxJ>jp(pkf_&p-HNl&=K8_V*fo?2nHBF4ai>#Bl1B?!Ez
zg@ci|Hk0tT?DqY4*~PeM+K$Fn+kE1{CQ;mpas05DOw!Gp)Ls)D2c5AYL{=q5Slizj
z`yd^W{$!OgDX~%{Ax31YaZujNfnyMgJKV}l=JRF6Rzs|Kj4IFjAXGgi1wns&YrR&u
zKZy$5sW(^+aZQX(9nx5r`e`AxWPPKlO*SF<aC^17QL{<#RA>wBqv0#|tIQ5}b-x#p
z#W2LtTGwM@>%(1@WQZpeHMgTIN+!|@Lp0RxRQFY9QI`YEXi5?MIZas9$;yf%4bxng
z-3lamQx_Ve1~s78NGGZSe~_3V4_GCdC>yiGXia{P(~=g{HirSO;_}LdqHRY|>NQgt
zk|TkVs}p^N$gSuI(MNhla@>)`4)W%SdPFS_5@_xRc9f-p#q|!~J7|1i4ED{O&8nug
z0&}2r)UD_{_Fbv|F$uWH4r|2PFp@+$?BuHb(8f~3BPO5@A1aCKAk{M2OLKdKaTD^c
zrg>@PUnT~uM$+kKCooQ&@y+^&O$Ce&3vuYDiSxgjBZHyS=Y&_8!&ss*=VJQpd@orP
z_!&E(g7B^9`ZWaW@vWEo(QB3a<YLbBFmtB|;8x>gigt^HqRn=pb18PEr5bRHLYu?d
zk2^!UWM}r!^FqjWt)(gu&QgoCC^6DZk*vZ@mjprW(JzO9!j9(!ldtP?OnXV*)Bc3y
z5c3aP<YBlTilxpD{M8gu44Y0L5@4TLd>V->8j82$%B*!NJ`_6Uo5>H=Caab4^8)LI
z7bHZD(FOWRc}mF~CY82KxpWy@&j;2dN=w+I+)79JVBPB6GuzKxtA)VPA42RLruEBe
zNej7fHlK1yg=-TXyPwQ=Ex9~cQaUplF)KQQ+U!1}FqyO@gih)l**yge!2|O6ohM#8
za}g?2w8{ElE^N6x)ZNH~z~(dc@2wY?3-cD!!KFDK_vuwu24vu`&Rj17Y>0r@H}~Dn
z+RL17lJ&VQES5-9ML*S!ViNX~=rm&&R@h?*bWgqfZW{DIPp2a%0Z({O09S)lvk%d~
zR~X~!T{&AgQP1x9xtAe4IWEsHS1FrEPeFd0pFS*@^X~4D-nhEI`)Z#JyV1_T0>@QJ
zu|ozR`yfGq=sY!85wXwLRxvKO(9bbWp-$d{hDbWrf=N2uo6a$@)268y(nblfL)_|5
z0ycZ0HdHzqUKCa6yJ~4L8G3WOHmf#eYnsbBU@y+VLET^5B<GHcT6m`;KpwCV5Zpa~
zR(A^(uev2;|9O0d$Vv9*((5cN7qVGNY8W3GC)>EXSkibw9M3kln7NAxiq+*?U8YZR
zB~2Aa>%y+qvFW6NMV)?vIfl;4I={HWnx1f6`n>tM!Hxz7a9_K>Fg_O)b}L)k!Z==p
zIQKN%!moGi#D6r?*OVNN@MsPiy4Lfpy*kOv5B=Evan?)>J(};B%}NOCHP`G<3=Of^
zRp!V&@RVyZJWNt_9zt!gE+>WAXD75?9SYjN#;6xkg0K3qyDdPz;O5woFXgf`Lr}T-
zIAJ9ceKJrkp(jz^?0>0)ZHC>LS#p_kqc#-I-O`chKa)W<CUT;u(b`frh4flj&F#xH
z))g<F(kcg6Lep6a9JM};VU~`!bJC7Xb(MOCudHPyfFV|_#%i=;kjT3**#qSNYsmV1
zps9noW3t(cX1U(>j;Jo?Q9?V5VL&VNVE1L;!EF1@d^3{Ag{xr<49w;+j(S0jus2Ld
z;upA>Gsc``6FNaU)0{__i&j1vg=^(7x4Yfl6p6!n#9``d<bmpl3(J*ng<dK`YWa4=
zpY@IcsDdVvNX5-{2DL+3q*t$9$tkRASvnKfmFRSvsq~X&k3GdE1d8dSjH*{!D>2nn
z59L!f`KFud@IwO<Nsc>R^-;x*>W019NBSb9N=eCDE=LTu69fwD<)X}lfJ9|_3YCH`
zHjUfNbP}{2byp4)>kIO;fRPAgh&9)6tzdSewH3_l3D?v=Mr$YPb9MB}<1Zt5%;ecb
zYzoZ#;PI44j+2efgUs?(><|0t%*R2F>sqn^a>=n7i%)xpxhdCO*~-Nx0*#ijL^+1c
znqo{F`Z6ZFxx}`%hvB5yTWqZjHzg9J<fXa$amJU~gcs_J0|%Pc<$WXcIUom`^;x6S
z>_i>1*XjrM&9=8x;s&#U-Z%I4i$6H6r_~U^w{cq**83zW<zKXOq9`y$@<J{ZYnaRp
zO^IeO8_UXZ#$x<aPaSm_H2O!*L&9hB^BAV^W8jzM1O~0W_4sFlj#U>1N~)S+hZ0@!
zp@F9jRhGUKcL+suwaB$gsS8V{tm(-shm~w9IU8~1!}P}uMPu{p^xamw(w6;rKa@=4
zCJ0TC>gxB8V?A;9d782V$O9~q!1mrxcS%{5nM3NKW5rbWD6Rey<{nxT_>x8OKX&)W
z6=<}OKPflq2nxLsDPp&UltGku8E46G(fLd$nxiOhXW@HMK)g${lHYwE=zz}wPZo=k
zkab=_B_7E#KpgzyJd$+>BR86FoZ?OIkzy#ZQRSocPn_C01g9Lf!j;+9UagJ3eViKx
zin81|P>-Q9F*C&U>=@p?DA1lRR({L|Cb`&+vM>|qgS37@EgM*QR9kmjyczc^3>I}l
zb#nBmEo8pMlwohliz2l|(h?Y8A+1O=_+Vpn9@kw^T_~DFTX(MMbePCb)CVc2$tgAc
zOkyw%Rn+Re2nNqGRTX&S(8OU>(&}GD39q>#O*<31;|M>b61r<fp12}U+v4KM9%zG5
z)oEBzZ(C4)UD1ABk$YWH>n|=hy|m(y7%Ekbc4kCb#~3i@_ix2Kkj@w}w6}?471Jl0
z9`T8yrF&us0Rdu}n9eNP+mc%15nkd6#p#~|kQm<x0Dxikm^l4xcD0_7y;A7h6iorq
z2zA!N`3M%GZ!wa1a?v=plMxx6{vCk)Q1n|jlD*<Ea(nj_^wH_fpfC!>Lh&8~@XbQe
zF8-c;iz9}zRPXa)aNn&f5bP$@GXPi@Nln)ty+?by@psqxXB?l^8H8e8cWl_Xdsrs#
zk@F`E&?d*?CuJFj$&8Dv^yDSAcv#*ZoU=l{gmIAnED@XBf!k?4GG0I})Y{Bd#L&@3
zXDBp?S43M^_Q~r{eeW1|9O8jsKoE9*CQfcSHpzlaEx1PlX4YS5j|fb!phxbL7JE>{
z-7?1mcT)>4nfd5#kr8iD^_EBNN$Hqo#|BQe$M@8R8*Xvr1S>Y`YTI>p7V|1u&W$%O
zU>sY{_r2L391kofBqsxKZp6Z7iBT|)Eq=BeBy&wBtphd918y}(<?>|3LUr}m77bu>
zWjm>ZNsA*`Gt`uxAc<vBZyWT^fC<biwDthLD2?OeECDD1lv*bFg_#v1W4iSjMJ9i%
zSaW%Lz*^>y3tZ3_J2ZmWw4yY$^9a(!&MaY>_*+b8FpppDp_>JaZdrY%z|;m2dL&W`
zQVLEdr<bBu3KtY5$!-^TgJ>qJif6k#r*I}!oSq;qQTBFK*5^)VgPoGlDv_R3!~28v
zEyJQJY@e;!Vo)Ao<udhQALP;7#b`5Xj@z!k?ZDtXKsavHl!s7gqfpddsP9<2`ulF!
z+%Aj`2aZlQXkt$3U%XN?bch~MZj?O0bcR$gyG_)nc=$tke6tIxqjzu_PW8k!LyJXu
zgff*GinDYRt8&hVGRd{Xpf<-W-^@<Y^gcWM8y4pO;vU9Foa%i()jzWxZdvY-jxBcu
zWe*LpdF8=cp`69{$GsrMzBoZPf@j&wpQ9kY=lL;!dx`bXP&^~Dd0_OtdtjX+i5r}9
z4PeBuH3xNg1!CLVgiZ37DN(jcbt@;BAO&*XO_vWrB$W1I)mP~ez+6!dPK~`)rOu?K
z^n7YvB=(?TvM$@&B0pt{?Dc9b@l*R`>-;=DDCTLe1?Ot;Ky|rR+?ueHz9m<GGkZ@H
zI6HlcFeT>%Gp40b-kQ7D`gd65CHvGN`x`r({Dwx@|G#08|Cmny0Zjidvm;hQ29XbT
zsN6NOy@C^IfW`JFwDT@H4Ll7oF);>L#m**!!sLCOBMj35>br7=DChOhXW4+$6=wU#
z+3vDaxA*qf!^bJ6ACP+0Fn8C){+W=@NR&sptbC8%m~noQGyNGDTufE72E=D&(crK3
zh$hl<e9_{4!_2F9hUut1^*&TXuiZ^l1CLN~dS{t_!0bH`Cka(RYIf#_MMizON;R%F
z*D6D*cq9AR@UG+@XrM<Z5mw<*NQT_m4UaAnbtVj%BWkf-lQA$=8Nl?t*`k?M(_u+v
z6x!@{leZ+X-QKp8v)nEhDk6(J&9-R-AG9xsd(<9!MMFWB_X@VY78Le85u8e^j724K
zOGWoxGmTs#S^yG@vBn6Bn@sCqH%`nKac4B}sCLXZUSO}<+LhJL_U4-pAiysfq>iub
z!`aI%@a`i7N>pcE=}&ZBEI~HPFrq1$ad;fUNBFnUu~7g1;82R1RXeJevym;)bxDte
zXVwENqOxaUw&sDkOqdz~0Qgisf(<}?B#*KlbYu^040ioX+xeintzyaL-;dq$yuX~~
z`^e?L7p?z=iSPd&yOfiqqnV(QrJ0qPqk@sWpwT}SV=Ch{Z02YYhrX=ai>S~j49*9w
zYHA?-frEbFQR|;7!&Pn!%8&MCt<Rft)wS5q;dMdNa00>e|G5CV27YD^*>lYM)qtXk
zn7x<1=kPgM`*AUpEbzmnM*c5h(e1DSyG-XD2#sos>2@4la%5_)?LaSOpTm$h)n1O)
z0hY)~QXdDWDMax^y0}wD6DO>+%5jmZouYxv@7jnqEPHVd4KVlamVSd7`Bx|b^Cg2}
zE^vOdk!LUgH@(`)7_!nLUDVQHQOn0i4E`kmK|$<H7&t)nNFALU9rMuuyCYADNb<~t
z^tr4UNG|E{4bX@(XfG9DuQS*+ZK3b_eYuQwV*}@&j+Q7P*=aCQu_7N@QtHkOk}We=
z8H@}<tsAB=<5>^qC$*QpYfn>4z%eg1Dz6UKr5>U?KuAEsl(1WO|IbrZ8ii3px>&dK
zSib++NO{EUe<2h*T^Qrio;N0oIs6ztZ2N^%uaR?qVBi<+_qoja^c*<`)wTkJ{Wz}A
zf{g4!K|*jHgV)`;r_`CIqGJMZwl||H{01BJ>=%!(jmWcf2uiA~=c=C@Gf{|ohrS9`
zGIBYTaySOV;tz3CvF%X@&29+<ZDNbCWj`UEpBt~?N?{JAy!naiAD~ksf7b^H05$xn
z2O5V{2kR0Ex*w$V#cQjsPa)umMp`HGhACGi$QF*huM0hO$-qm`V-~l<Y)wX<%zJIm
z4XHI;WJ~)gpMvYo6e<>r&NYBps@3;CSDO9rF?m3d35x;y<3}Xnj~~qcbt#fJGBE=D
zKf(BqGE}Js;h}Kk^o1jFo47wFvFy`Bw2QWen$EjRc!)HFYL3lZ3-w#{*V1gGXDB5Y
z7j6LI#2>4rB8z4wGO0Fdi>AaTv)P2-KaHRqH`17_maL@Woi&;rwVItaW<?gMhzQTW
zHr$EEnj~=<yPv(T{%3pk#mRlstD)lt{g>X=8<w~WhnMYB+w+sp^Ha{#D`zMB9Y7Z1
zJxF2xF2JL$e9>+C9E(HqdZWejxe$E?JQIRr@!X8d|LoGZ!}I<e%Kngz?p22TCE@wW
z5u$QakLwksQ^M6<@z>%R2dW!fzp3xTdtv;`48Y~1v!@B`a1#`ChrS7^kJp9MZ{LB`
z5BxJ2--}G4a)?G-%EOCHg+R*losTlchqwp#N*NATi#Vo65^|I<Rg#-t$dljPDUVoT
zAxA#G3|Dzof0z*`<IB%0by7`;mXN_uo{GU=GrN#^;rT!-Y{jx$QwdKmH9zTyJnfD#
z^(q<IOYJzh{6oH6ObeDv{wi6>%j|kgUkZIePyyIFktj`5!8fxm0X)tqP4g`I?0vSv
z+O|{Q)4%OHlFuymAguxx!0cR!5Wq|<4<vP>`qk*3#!L(XGCpt5CnR!oNY&Ziwzkx`
zRn~M)_lPD%N5DNjA1|hM^V2$YIw_=>L720CA;d_E&uA%0+sW8U<*8_KQ;Vx}11pn|
zr#@u1s%bInQH$#$lMAa8OJl9{9pGF-1vdsgF!$?ZI{-<>m|^44#}#FTxDG99kyu_p
zcyNbe`T{FGQgTpo0C*wFbR`1GLcG*K>Jpylz7YPcU9@4ND>s%A-`{vH2TqDGPRf6g
zyPnLdDDQB#NN@|AA+C1!gfLxDT#F84#Brq_>qTw$E+#9dEX2Tq+z4~ow2(m)>RuJd
zjv!u24#lbPu>!SxKuJ7$g|%8_CT}D3LmcW1SYh{2E0v1iNI;Aru{L74Xm`ZSeD4q*
zqOF``EJBDaqpd!q)q!DPb)_GnlaL@qiC<1RPmId`Y5qkx4Sh_t;SHU_>V9CHbYKII
zR7Y>1a-7_}q{+Jc?^;rbJXk!lx?Md3$Gl0A+k!EMIUGULf~cDLxwIRDKQfUfdsZxd
zC~M+EGD}iqBsN3ipr&=-fV@icC2{UTBa!H}qc=kAstuH(LQtHiYpfM>F}#=)0Yj%I
z7&Tm2zVshtZFDMawmFo1EOCT`L;~gxj!3+@Odi>kbFLW5v<l21uOVK}So*aHMjIxl
zl)vZ<#!za%Sw0*9Np0zx{To6C1GEs<e^~RYpn+3kg||VLVDCia&_JuL44Smo+v)dp
z2j0L4oY|LaOV{mF6@}l9O|KfbN=Xa^FV_tTFCgePOzY(1l_HMQDu`8AufkHqFuE9f
zeArg|fGZoFR7isaQm!3&(X7H}OicS%+whH6<Ja9bmRFF%#oM==5)$Q->#o=$DcMir
zt89plBSupA{<vB!Ez!9FcFCoc1ScG|p?m7&C*>qtsuX=-vnt5ck0%Ei7Gu<837Y|L
z`k~xCN;U%8Ifb`+!a?E#RwN#UDQRWj9;2?&HB@c9WzH2oDy9|kgV<#C)j9)Hv0ybh
zm=oHHV8#zf`Nn=yV>k3dwP*BX#0uo}B~a9nbeN~%h+QM%#jPV^6_usF=P_tgh!_h3
zujIk^7i70lVf;<9@Y3Ysv>V3WZM1z49#xm$Zp4MtACIZahkX0>&w8PSViXb>W#e`Y
zOvIhs0*_EOXB~a*u0mg7qT>_lJz0XxNOVi;<lxfi(zKD{Zu|>EAIcO~aLAQ60DOZM
zz5Q9$TV`^?;#RX^S-5PHM$xXz)a^%f2I-or8U#LrQ8GVrtV-s`wLx>q_OeQ{)p=CC
zBGZD!alk5wZ&V+kItr*oV~FpeRmAghV8f3nIo$}2*Qr%))say5M6?&4vTeCA)2546
zvix`UXj)qZ9J0sUQ0bzh8pWC8Ik27b!Erv_<o+V+#1W^~W+iz4s>JjYa-7UrMLzR!
zogA~-Y^g-D<>Ef_w+4&I5FY2>h#ZM(OSo}V=Zi%T&$%pdB6;>8-kzpWX1+P2Lslv4
z1*7)!X`|w}wjKBa>SrBo38B!Z!wRHua;){BDO3A)YL{VZEoX=sH5BQ71JGV!!A$93
zERO&!QKo>mIy9FW1uWfy$1QB-w8$VemWV?Eoj)+>CpB#{su$76@I(}t%Bi~Mp?S(w
z4;J(}V>zmA=dsXIm-9r#0_?Nf_Q-F-wD$P%QE%7SZHp}rMNw}z8h(e`$iaFXn8EpA
z3|K{1a=o6$%gS1;CrL6ptwNu)E>YBWxlq=#L}czx9?&w!$Dq}qPGtI)$}O@^!Kul!
zdIUy6ZwM=Z2BHeEo_IKZW{Nk*6`K3r)V(9#9={rmv^nU#SgV}2;vj+`aO@o~3T!Bt
z0{9g|lQH$@&}KG@w^Vn40)at5C4hKmvtud(b)^9<OM@JF97-cfU2(c0MI?`tU7m|l
zs4w)>)_B_S4|84E8x&`#BO5LxS!C6DprI9mJt?B#5u#G=JfBg}g=KP6j~eVK2)2V4
zc01zjpi2goBOB?{u{{kp)$%=tRAJjX$<@q(7aqF>4O8LlV`rdynXE+&4vl-Cs7T`j
z^WjAKxkbU1A{RP0XMZtYzJ0p@c)H*TP@PJAUt5RD<`Z+StC@lY4xfQwM*m3MmpnGx
zIA0+)i(e=~y?f~K<)WisqKYCe9SQvk3{N8RVw7jveBx`QeqAezMgQ!;RcV>LiOcc}
zfYeOoM<kK6<Gj|sOHf#c5K9WEFr96<P<jvd1BaMO_v(zYygJSpAl$s8Eg^(MV$()Z
zut;t+vcVt%xRGe{-N`^FY)RYLyuO|yGX;4>rf{hUUcfwN3g2lD!*u+Lc(Xxk%ia7e
zg_NtH3~^rWmTlG}HLr>Z(akhIRJvq*3PPRwVWy1@2MWAerEHlGNr9};Ne2)w%(ZW7
z^B``JSNJxc#OTr*E1723MUAWjpRs}N18N~$W^#YKEB~Wo*vp$Zwpl~$NVfYph9KSQ
ziZKqkfpxSN0;EjYQesepZ;HbUq7o;%#BCQ<uFPu!X`s;WS?>W^uN8wyekQI|KBffg
z!YzL&`E9LF2(9etM3(!Nnu6J4EhqLA&u;?IugT3^age<ecxSf8;zGJy*^L@O9Q}!w
zTC-wPvSsS&T*@$52@$m|mhL=%*1~EHp_dc3Y8^QcHrsjk18O1ZkImy1T_49%O#?it
z%BKPU&0Ir4!raKRHa1Y8nKQiuUm5M`Icnn4;DaU-dj@z6k~~;27SDM!p}fwn<&5MN
zE`{n*F<#qeLrH3rym7;|p#0S_Wg@p{!wQ5n+&0slC~0Qx(0d=yVGbs-4_S#aFo*jB
zY5;H#_TyNNorlQ`>#$vgDQBuq*Xp^q-H!W$b-eZ1P~pa{ntroK>h-$U+xoZIe5>UY
zj^mp9`lEL9LHL063++$hDcNe#s9yy=OU&I@dnKUQkEBi?V4|MetfQy?LpyR@)u%6A
zruFVT-E%pgW+f2X9%+J6`uN=8r5<JP%qC93f4F^M>X0}?U~Ul60M6Hnz{OGXECCIe
z??Z69G^S7F===!FCzgMt4Ci*Cq|E<F8Pf!lO?17Qz&)cIFO@snw~RjhVce1faJ9?m
zpV}#62X<>OGY6E}r#${~Y~MS&-w%pUnUU{mY2yq&jZaz0Q*xKlR}TOBZe@j}8lAzf
z^nX09O&FB0My{;MY?m=ST#niWa83A@{Xh%G30AtO2G8itpwAdsH%LkDdl#8;MTcn0
z9^}tzZ{rSL#XPkyfuY(hCXW#iFr{-}P~XwbSGSUPaMfp{9N9@jP<1=B<&Dw?95W_a
zxx8fmZKV1s<4{9>EGnyp#Ts-bo!bhz1`XQ!LPDs8#Tt4Ry%*%>Bzv(}y4O;BtQs;A
z87C0#a9k`NyF+8}Vpp-9n~3PSU{|4h_^^8TL3?<^!5aXb7xyPaC}5Jn*&_7YfGQa_
zdkG<ZqkoTCpN^lAICRjzf48%L7ccKp0n;<9g+pxLGrZ*u33>!_ouvNeXKFNzI01G7
zvWMvEiuGR+LkezkV(hq1fa;~OJ5SWG|8t86Wb|?kdj#0?ua{3~E<)TT-J%VMQCwqI
zE?`O&$Fe6q55b?Yom&G}a6a|>*zep|yFHvE+xh3NwSpkmuv|UC1)7n;Y${Ry^8V2~
ze>xvx6nFN{U&zXZ$f96RqvHT6j2T+lr01{xy;U;yOE{}p-&&W8xte_j=%1o}#f+Sj
zU#WEGsY3_9SR%C^+REXtrPcZ-HB`-MKIp!9qQP!KK&E=0i1{(c*c7uxpwdsC5!L_|
z881)8_u^}N)BUlx!UudToLaV9_U^awLYJowOe&Ixq}+RPYUgn`0m@Otw*WY}Xo-F>
ziR^<X2S6_rH5K8RTya%dztm!UehH_ULh2wJ(;j+k=M;iy*h4(v+XGlTx8%TbA(A|M
z==XtVZhGP!C3|>c`n;t8v=v*$Nx@#@9M4%ZPk3EUZtqI{WA;(5Oiruziw{X~V}$Kc
z&7{-zbo)t)xdt>=?!!3?>!d3<#wf0gds?TG_D~_VEVa^smgd33<)oegbb1rkfNr>q
z-5t5LdG_mFtVa99go(pl<iVlRdYhu=az$Q{icqA6FrCYC=&r-E1WuQR6sOd}x{M-E
zgfzKIsRKpXvu99?N0RIDZzVTyhZ~cUzT{fSDLtl1U7)UB%UdPmxFgrsGtMu3vJPGk
zZ-4DKDe^ZB&IhY^$gLhWPMNn3!*mlQB=3N(Phd_dg7f0kge})ZI&iO}J`}qMd8xe<
zlN5{5;SZ67K=r0hedu?E&3<brHYk#P&e45mD3U1yj_xB^!UhrEYs9Yo<<5#Iho4m)
zgYGK(7n_ymo3_YYql=wXo95M6Oo@TmeNIBYYz;!dnkGtSF6H^pHK#TPTan#a#yW{p
zwO6YH;boml+$^SBX?>4Lx4WSVx=JICd6@%~$UIXOCAUg%<zq5CP4xGek28WK1wwG;
zvf(6l?hp#fo)WILitS<j%NHSXT3){f(WI-Yh_zo8Mbuu%GKwB9ZTIf590A)%>My|!
zx8+Su#d}93Jv6e>I{LuceO7?XnYX2eX*S_q7QiK^!LCL`l@TOt^&S}6kizbe+00GG
zZ(C)*=UU0X<@7U6&T*x$xTTrIZlkXD>~Qy(Y~17Q2artLL)4ZHaNB#PKjm(a4pR0r
zT(*|3k2oouugW<!gxS1(x^XW#6Yh=PiDvc>CRT5lgr&2wG?Z}d7PF4h_{ZTB5a`aB
z-V?EYj-g&KYw`|5W$G+l`_=c8g3Vk<1`_<Zm(pJW<At(@q==WplOzx4S52kp#PQZD
z312-&$|3G2$G;_ItS;lqvKw?Ff%6183zEps!Rlw0m1thReUmNPBFuPi>O8QyZniXu
z+uypm;^5ek*c)K%N*|Ok>&O^<`o#HnxFG;$DH#~~$B!0{?|<9=Yq&wt)z-+6-__B`
z;a@O^Ms+X`#f1f)BPnBTdRN{*0kDW*u(<AqLqEV*Okv>(E#Vub>EjJ;=@CqbDZ`xd
z(;>{Qlp37)sFXFrO^GShD>fRbtQHnmC%hUP7tNm*@lT(4o+c!Wi5Un96L9V}JD=7a
zE<2C1?z)dU(vNsp-<j~!%Bh@Yxj1ExUZiIM+8`Sv5RTf%gzai-1$S)H;z#18l#8yR
z@$^!o@|jakv!>iK<YSK7S(6SKnga_boGh6Z1*+u^nem6W;I1D1BM%+&?AbKYO0!Ms
zM<67b19utg!xjZEev;Z_QtZ_ZvyKiK53_ZAN0HKL1L^e7{F@aH^^+)UrGlt5hY}g@
za$SmLRgTuNE*9+VPLBl+>GP`xGdl(PkE6O8hBae?Wm6-W=gEA(Zja&1WbSQSIsNM9
zV;Zb%{m_%-n=1eoAU1m!>{qa{DxzbF>{kh5nu&bbN6~C_L*-6g!sJWrrM}e;x8ENy
zCT2lrMC+k>B*SXfsrFJZnQcEyTz_kpX=5v2uhuGel%C-js@AG@v>|PunyBH}wIF6)
zZ=XK-ttHm3)~a<hRdjch(Gb{q;t_nlpUJ*;Q1Ihw=Ed4|D_$)5?#M_^C#JJg<#nAT
zI;Y~zi==WJl-`xbX7;5=vMsK9?j={UEet2=#amAG@#I~USZhotPKdMY&LFiRyqhxo
zIczf8WR~Qmye{N}ohMd|<@%>ue>^)+q8N)qG$nqXgAMV<D5w!!up@VdSo0km$-(Vh
zU7U&IXBz2!NqqJ}_1Gb2@g0lfCvm*o)sws50=Ek3J{aAKGE<fAs5a7{CY1mWtGgz}
z3h;(zcfat|jL*x-Q=jVgxo9t1?mAGQHT0V*rA{raszqoSq2MvBKaNU{fMz#x{;Zre
zhGP8bx`8?L$5#O@0I(mD)Q;{qA62e#QKJ&|FNmuoTX|DUQyY0(pSI4xt?qQAK_Uwx
z2vh#Z+NDciyFYeOo}p(_N+fr3cV{nR*R$re)L+UD>g*?QnQ#3Fk_NhCsCIP^Sos*k
z)lFU@O`7X7sw!Pr1!MIX^UUP8fLgq5o7EkgO1nU<$c>$<OIsxujr7?Ju5~};qF2RV
z>Oh~i(`)+Fq#gNVd)BdPiWTwt>j9D@dC=9}qiM_Ucrq26I2_dtwi2D$Cj<o9NpTRN
zsg<L==$e`Ol4^8oh#Lcb0Y%GFed-;b&W&brZ1k}EiZHtDtvw@(ou5rUX{KD@;)gB2
zYHxIxF|LEnJMdDQFoHT#M{4`eo`R+Vfsv7|eALT;te1yUl(f5xbKC0&5HwBHA=mR3
z4%W8)xN@<rBq3Y?bVGsuWOVG!UK>oVeoXqh(rD`qvPE~`@x#6$O=0Go9TyvX$&|Ww
z5yFeVfwbx<WlU?wvoz>Pc68I&7}Zo=S_VL6#HbO^F1Hp;-=O6NeKOC^$imOB*s8KS
z2lwur-TJUmt%6ooRdM2C_Yfg2OYCV_2Qp;>(gb7RI8YZ5og1~ARu>8f|F#z0HP=8i
zu4!;-jk4B6ILqL3(^1h-qVF6RZ4*O8uY)_~X7}cyYLy8)X7bcHiHX?{p9)YMuv^RE
z?9xq08OzjC3IWL>Fz*jc%>@q}>ufC-<NS<5*2xJp8^x`gnSq+-V+@$XvV~=+XRjUt
zy=tdfxep5DDux6DTFTT!>e=@qBoV^hzH51>8>>V$M4s6r3fepGVM}BG)nf}e*05h+
z`ZVO6meny==F`npq!LC@Gl^n50G`hEOla|I;~1mQLD+ugjB`z>{E>PEwrDihdMzzy
z8-&#JZK%NY<5#!%w5|eX%NKk%piLkPKzO$1VxZwjb(Zj6E5_+~N&BvFWJ8c3bRHIh
zWJjK-&|eTD2<MG|`kQ?4*}BGdyI(kjcLbJl(}3;A0Lb>&Q6%cQC0^{FDS}G!G(6@s
zMHXGoYRsSO5k*!2ZRxgP=@WH88@sp2(G`89bU=F3T=fhgt)Ia1BEE!<Ls(rqB|&qJ
zQI50<j1HlXNlr(DZ1Q-6-m*RtJJE`Jb%+*|Wi4+#A-^ZS<1;zzy##)vl_9#UX{um_
zMn5yOMnz}|&;hhg9$Fz(!my~6<tU`Q@h>vNr(ApZTs*^eQODR7{A?lQ@O30Kid#iN
zU1LPWJw8HY5c&jYbhU@4pyPX@$}ZEtmzLm;pc*@y@RU(9A(dkTOsThv&ToC2(o-Nd
z!w`|wPj5haDvZ+@t1YStzm@SEPaVZHA*4GO!~;cyBo6Ec10dN2gY86NvNX`||9FO)
zZQ90(`(qalr6^|<dBDdXUZLF^y@E?O>g~sYa7pVJm2q&@W{e-Y`ajMgND`5(;oOB9
z5+gdl^wGAUKz?$6{je)<B|7jbz&x;~IHG+H_J}C8{iD%^%>ae=PViaV!?T0p<TbLL
z{89{pl|uq`+T-s%SdHr_bBo1)l#TID{?(hK%^tGeyR9R>6#4-=U{3*42?YWqyZLeN
z)6>1HL*T<XKsva+b#3GH$>Z-mRNWUFRMXJKnnlJ!=U;yM_`05jJ1OJuEki?~O7{%&
z9@OK#&w|Irv;A~2?9v$KBkP4zvoC^`!`NNOfmz82F_pbtj&d|c+;Ru^NfH$V#NOS9
zG-~4%$aUen9p$m(I=0PYAei!Z?G`Y!-F>YC^@)Kq_i_dD<^N?it?f3Q6MU@a|26Q%
z{LJ$n)^m014fRUJhcNWQ<u7yn#_p|q9gMElj+i~b+cFqbG&3ft5YKKQ&^)ztRGua!
z!NAylNXw|@>@Z`yrz{|Wf`_JFRD~AuH#klWb(PeLQDKFQ2qlPFcub$1r}4dadeD4D
z^2NUpHHcXnBU{{|RxK#^z!X7#M^7_P&$M;5pHEAdw@S~{b(Jv3W7M&4dUATwFm2w(
zYX%5!5Mi+L36!hS*SWjk;HAzzv1S?U>?8NGin;_eGAU8HS8{8Cm_aE!s5&diIf*&&
zBtvhgBPgj_imAcuKCdU=H6mB4xGzW<*V+yfJ9pxnu}hi(o+fjnE0W>pCZOKaX^>LR
zJU4A@7ASd?J@|~=+lUXR9U*(~2-1SzBKSL*zLL`uuPTHpKGNOfx0@*Ze%LoyYd!Gi
zK8A7}smCOId*#oVj71Jb+)>DWD?v;K<2F-+>#B#qw-OhkRQCj<w<?Y`lvS*ARFx9?
zs=iey-_xw8|8EKh@}%Z~UOts_v{!m6%*7U9*@xrjj8rqp-r$gyzC}%*xmh@(wh(0S
zw=R&!;C%A6OjsE@o?$ROk2;vqkOH``CvWU5HP&khd%3#kvD7>?6k|Xas3n`2{Y&1&
zpYi54kBzKB<`cP0#VD@%gFAk_`Z%cdj0ge>yzxGC%&M5uH?+VBG9IGAO%GNNY;X4#
z0WJFZV(2#oAvbdT0Wy#UHL2V--5dpSWmcsG^?0DFxmo^GjCiP9L4^k~9<NbhC6Otv
ze*2GsM1l`wZQT)dk5Ll^axqCXi^aw2Djk*X+{bx^mH2TQtp?q;#HhS=gTD<B@vBLF
z3kwG~?Qj)P|A)1A4AL!JvV_a#DciPf+qP}<lx^F#ZQE6+Y+I*{se8NUi@A4xd=uRr
zvEMgh|9<w$T$%agTH_UkfNbcqQ9A*`jv9<sd=lS}_I(WzS;kE9R<u!JYVE`aW4W6(
zd7X4rQEDM3W`@q^jfdsFRyeQ2;nPPUidLe;B{L*)s}jCb35g*l(*6U&@Um68=NYn`
z`b!hYQ<F2uENcB(^9__xm`{0R#5omctZK`ZK8@vXRReDXy2j`Nv6D1hhTFEZ(pUXt
zQh(4&zxeGLS@|Vwgk+4i*4NKRnpw!&>v4@Q!|PkRYKYKRO?3?g@Xl7=0RC(vC|bWd
z0U`{$59@$?_oWW*2LzPZY};FnTB@ZQ8-gB3K}}(xjQ#vyQH!t61T2vku>f!X<)6Fy
z+vzP`J%~%Fg0L-jis#(LsR+&-E3@D_Phs>tM;1V{{@9xUi<LkVbBs79tf*~6sykWS
z_JHp0RZ1<c9e1|SPfvSCYDb?8*Y!I2V<_*~zr+}{w_ghwO0T=$olgr)c-H5iDd<$M
z7Dmy8`g^jBnG_OyW&@e=z!qBxLZ&TQIYZ$r1O!v4y+E3P?GAU5B4FA%aGVDun{3k(
zPT&mqT0lnq-g;CMwOBo}14*Vn=+x(|V2PJ&6%X$S03+qyn_&lL#dH184jU>Fr2PB_
z>CG#Go%GL`-L#?ISwfC%JGy#2c-D}HBjQE*-%hNJ6;DmTN>gN0+3D~FOF8mG%hcW>
zicM;8e;o#T8LUS+Jl3Ky!U|zk6-md$xJ<=_3#F!7<d$TLweukD)C6s!Lrem>h~T%E
zu;_9^i!67>#e~VUWSMAK)wa`Dy-iNC@FcP}9s;|$gX?#6+CN-d1j%@N#xpV>&_g|^
zGQlDh6e`#3yRm2%lv@(d2=M|Q2^EPy{z16%H{FkUo_xaaNQ-C+3+8w^v2&@)?}d+P
zp1;$33btbx)t0^Zc=ETwPse$CN^dX2%f%W<CujgBWSejb2HkN5s)6iAHgq?oK7!8Q
zm~-HkSNUhl$}8X=SvVuu78@U39)TKZz^l*9ITgKwnxMVLSA;Cc*A<)}<6;cof`{ee
zx1}(64ux5ekPAaa^Vv1Y3q38Q;Z2fF&BV;Lg)E-jyC@eW8=VN54d5{<=xy<e&6078
z!jqr&8@RoK?%@`Sa5)RpyiTXOnjmC;YC*wWjW`}qnfEdB87VIDS8}|(ayKPjFwO%6
z86&|xX_dBE#@IP$dSq;x>%ylwm<=GY7+_eYC&gCh83*iwbWBxlD#2jzvQvX~=b#8W
zZb6b=8k!h+ji3o4<#-=fT#gus>6x6sgs@AaY^p~OQ>(5%i0Rn509{RY=hVPwpBD-Z
zp&e^a3x(pr7XI1!=~wAVCvw0Jl3o04r!Y#E^)@N+2f8KBdu(tE2Y4Ean1~EO3Z5or
zQE66PDGtGt3~Rgbl+tpVZy1!_HPQ4XS){T+(>YYmOxnVSF*z4xKL+P{(+=z<{9p=U
zuG_AZjx*GCu<a{nG#$voRf(PA$}^IVknI91j?kKuU+cq@r<OH^Snd9{`!dc<ofd5G
zr>NT@&njhla;*>5&SX1cZg)map}7O^^dE;9EJS480Bf`TQrMf)!4*Xhx*6svLjZ*%
z6vcLPY{U8rZ>T<4A)2FGk2q5kqOhA1J3<uE>mE2fu?aYh7Qv5X2DdwGGCb?|z)c?r
zG)`mgxwOk?=wE+!-2x(RrE^^aMsB^3d1arTExLXTV-A=vK(d3?Xg}4@kdH%l<W5Bv
zH4M40C^Q@R`WLUI_ATlSbqi7p4JQf=t=BXQ;zA9BRtp5BZ@wwkeMM{22-&ed7w38!
z4>cB4Be7CMPr0#hCkdN2U9S&$i8iH<4Cxl)NLJX^7<rd5^D6?Caq=^wJFV-A(UMwa
zl6nv;c=%o5pS{+2F_-aklvbqhI4Gyy849u0)yb;+v`t`#SIzp_x#VfV$5a8dD@QvK
z)muQvmL(9?4e6?`>NHQdAE$~%9-UVGV3S}xlYRYvt(a&4OZFD&84z=|`NASEFXfm{
zmK>cwVqLBbNT_gjryv$yI=6R40&V_k;>fGlZS(XIT5Gj%)_9tL$nFJCY5WGLrc}`F
z>iAH2Y!~z=qL`Lty*P%~v@Mh7;*SPWwIX!(cP68aewvk6CA}c99H0x^v4L6+kJ)>M
z>xka}EZ)gi5HqJ~T5F1ZTrjhsxs~6w*D2<i#U-f)_Xw2G#UrtcBCrE`6-?}+kle-=
zvITo3Ox(mK(Zd$%0XdA#IFu?llrkEljnk5DppvwWCg>D+N;7s#+C&wy1#cfDZWSBX
z72y@sqVAgRfg#6KDG1N%DXc$~jgvC8Uv_JP-*jDc=I(_Pa)*PvhfV8iHRlG<ht$u5
zy7q;IIpAc!z`1Sm+OvZ1)9}=npY1Ek2|nu*uI*3qmFms^bB6cm<r(|Mk2jZTU)<{c
z>oc8fQO}+AGkP|^_u<zYT5bL}fcQ43cK0YCCt#jeZmr+J_Ry5qYB`KMTJ(T>`Xkp^
zlMyq;CvI`7@)<n&r%>{?U%g4?0-i!fwWM7^lW6i+kRpHbHjo&KT@l~nhGEbq^rC`4
zpycoTUP}4P;@<fkgH2anfV1zQPGJMO^?SOeut?>_L!<z!Z?H0z3-u{-?j4rHSG~X7
zH90rsCy;-=^?3N?XW^p{oK<t>ssSn{ek1wRum*yy)5HU6dlI#9&-n#5RWeSu^yL__
z!B?1=ka$>dJdy8e<L2uZ6SeFi5C~Q-MLuAAI5yEt`wuJsRdi3qk$;5I%tCYtDNP@=
zZce|kL0s>NZIGsqn1kc+DgNVo6v^d>PW5w}L!yvj!y#$rzyeve!68dZ$(?X|kGo>H
z#E$>-XJ(<vk?VTvygsgUMwug;dkoavjn?0IA`fUK-xg?TLA}8aTe=LART7}#YTX-q
z74~Y)9J9XauS6bQGhnLD-qHy_>e>iLQ6+g6SMb*+bN>*=exgODv<cokU`F;qxZ*Re
zBu?f;IvX}y46&%obcjEjl&$$^CuK{Hkz;`sctsK(F^UDs+MFuVzlEaAY6V`)%>;QJ
zP5{u@AFG287uXKS=Hbq2)DGU4ZugmY4CST$w(~Q*tI6!FLefhEdn_4pjH9mqDM9Fw
zVW`fz`O*q^zsd@Ev>7!t*vllq3w6|Gz`=)mls0lOV1UtIF!JDF;4a)fMotqOl`{pB
zQxiM)SB%`K)26<Ix}OHApM}FFIX2s{)26k9I=Ut)x~0P<YK+!Chix-EwK@%wdNaE%
zQq1g-(<ZGs)Vdi<h4W(~*npKY5e)>5!0BiEL{kHM60H(>wiz0Ij}T<wvrZ!UBx4~u
zGi&VI$x+_{A*F~tJ2<#O+GIJ_SWDu`Ts3!WPv65?WS8F)pd8+bq$gaOXH@C}l{f8Y
zR_daaw#fG(roIIYS9=UodrqSV$?V~S2W{;>NmoYdJ?3MZADaV`mpjh82M@UX#su0u
z?B<l)y@)rT-W2?8@drfbi-qmcO?~F?fNeTQ@ZJes-|W`g4R4IzLEmWhyy!ifb&%~E
zl!xSX>+NzCpsAtd?=XI2vSfSI*kG-lhz$J>V91OnlS(Brwfs)t6LH8`;G$Lko_44c
zoKuAX4sJknZl&gK{}#-qBOqI1;c1oCo84;jN#MuTSFq(L(sf*c4GeG6^&C~3-+~*J
z%sE%=iI*Ro9DoXT2AdooR5?Ipl8dAONhOl#RVYd+1?$5%G<EZ=RAYuJ4HR{V6<Juv
zYjw$_sX2KloOQA@5IMABgAN&Gl%S4+>d6?6^6JUyj=Ji}8jjZL$$}g_Rg&@8gk{p4
zvajb5oTMk?4wzVOS;&KqQy?wOnDACv2yhQ%h64sP9Uac<naOWCwdBHPraKt<o!6v1
zHh9HrIYs4)bbR7oIYpiCO#9ZszLgY_D|z04f=eutSgs3ECep05N{QrX@`J|dvm<t{
zOcn$=Nc-)Npv-asV0?5!rp1F$&-Yf7_i=C9<s7L;pM3w?UDnaf=6gd108nE90O0w5
z?k@jd7cBqdPFu4Ew7&8%O1Il@yT<o>f^jfZK59%0aeM)UA%1^i5XcZe34Xxx9&6n3
z2~)Pq4S@RU>T10zWt(M7t?CxcW=#<i1cm;Uh~*Z|>SfKVKbpOt8x?tvo6k?F#zOp6
z4NMQmKR$iwUN>3k+^_S2u?X`pS}~!#vm_kK14bxMUSqaCQED&b@a<Kxg~hWBP<R)6
zkl44k!n`v=xV*Z1ifmpy-av8*wz>CnFtkeU4VjZX%0nnnKT{uNi;72k6uHEOaNC(=
z?+)=af^mt2sU+hRVlpX4tpp|F@~2zu94aDSnqm*A+0<CjgeCX(lnZ3t(|n4QlD`Sm
ziZ9CWG6%nc(Wo|e^M8_pGM5bMQrWE{eD)Mc0&}}Z`r#yr#pCVS=H)T!*#_nKulNSR
zgk)UG114e)r9Bngij(DpDu)LRM(65GzJudN&~7K<dEt82;6Po?k*fxnE#To5icNw(
zXUW{;gdpK!ClU=p3G&dHlfSxfPN|_p*eY!sQdS;9aHFg8?AOIkZDK1(_qU?k#Cf)=
zZGWUVbIHcdM2=68zq(Qdy?coD;tnHqd;EG2kLVtPb$i0-QVxFw)BRll>yb{NfzrD^
zu5~A$K?&GJ!QGzFOFopGf2WkFN%8s47|>ID!6a=a_%(0jVlYkb99V8|@L7<75?yW9
zx<prNQDdRAvN>R^r8KE5-&$2)-&50Ups~0HeSw9QQs8vZzAjo#StY_*p}Ai#Ut|QQ
zWUIq$IMgieU5t7?pUxehQ`2GIR!RkQx7nA}N?R1Vfp)$puSQ<e%0$s`&c0dTR*3P?
zwsO&ZEG8fi?RtLoL)^L%wvnQkR9Hlfa8!OahLv3`ujb9&RLsl^bxls|U41y2Ici%e
zc>va5s0k+5XO;u+Lc->wsWFW*_#%qRIPjCO<ynLQh%0Tn9lA+T7d<oox=54ZMp!mu
z2;mP{wb1-A)I_UG_Zf>^G=i>H@YIgQKuC(EC7{9Plp7szsfH$}y$E-E9uoT_rQ86P
ztSN*~OGi>88^VB-EFw;>xm;nFaxGG9$X)Lafz?ZIY;I$)rlJc=a~wjL|H4h`wRV^+
z9n)dXRdmD_Lg=X^r<RFpl{{l8KA(rGTW+B0O-6}xH-n(hVNwJNQi!;!t>s7{<u!08
zKTshZl$S16(yAIjLpMnXDY&B2Z}=ye)6+16+91YciRVtf&C-K<od$X_nxo`vA=6?Z
zf4slqz{a2?;Sy+%g2sWS&9{6dTfMH-*Gc|{)c}MT$J0A)c_m4P*RT_%pJx2ih7AW#
z3!>jbkCkekrIcg|aekkNft=0BJ+>WXZF)aSot%~`gGml`4OgeE^fKH+v_)qOJBcH@
zn5W1dOUdHN->^^AlG&OTbqgd=R9!%T8u@(%HAb*6NWoN<nA2mUkjBEs$YijYV*qEY
zB`{r6zdmi%Y&tR|#Dt=eKXVZ#Cs4=^ie%VYJ4D4EVFg?Up(M^!J#H&3B<&pD309RI
z$5x8gNpMw8AS9YC1V&*5QIvLtZ>-s?Gx2(h*;riF3R9<Cq`=?8qF4wNF4P?N$vt`C
z52ZH_dGx?eAIA(csY#B^{G0e(l!DLN)Z?zh15#@eOEWBZcxR>6pQ@BRW2=pz>{zj3
zzq~e?C$~F;nJ#Wp1tgphI|d=`n0&W`HU`+R3w>dO^rCBevUna0+MSw6>e3kX?Z~d}
z2_FovZ~zTURjp$@q)JTS1pWKSlqS<B+IVHJG))0d)Z&#$@^?|DtA&oEDa@j{$MA`}
za|r=pI{T2n(VT=mm}iKBjrn4+AoU~>wjj;697sH><uyuyJ=<Y5tyq6eqwT4)QbmMh
z+O%Oda$%#|k4&}ZcUc=zjgi;@f@uN%s-4#ECgUeS>DAuC?neCh)GQK?beLCJDNwIB
z7r4w@m$Vm#9tA1@h;70&y;6)<D@fVaxU5qL)tWo-Gn*_SQEy>6qIpX<GZvJ4+0tj}
z6Xc|r%TbrnK}md2V2Gkqf=c#nZ6bw-c048%5P=}F#EUc6kjNehDdD;rH*h(hFC31s
zQ72eA>~0E|GsftL+0|kQQ*DSK8FbIr;lfRLk5i(cLU_sfqLK|Jmn#?NRRel`?^-hS
z$FEaURl$r5=ra`uZrIn2QqjOt9jtq9l|`NHJ6IaZ3x+2U{35Xq_-0Cm#7-8{WE=e}
z?qvcJFju<p_>0<mWK5^|=_$_JYLB2$xx>Gwd_l#zd0=NKc$Yq~Nm7qU+w+5Hy+QKN
zf(zx=kC4>#o4Yr5Xyn9BK_~K8%LLoKjC^|Upt1wKg=$6@hrrM@<<7v4%Gs~ChKZd6
ztv!xVeGiHffFrH`9N-cpkoCe2L_ZX8&gN*(K5T^3mNav_-8Ps9F53cfFRaCJyhJCh
zC#)5wOSNJ;N#b**SJxh)VyQF6<QDuruGe?Uong1i4`7MwJuj8*k>_%6zMYDD-~{Uv
z-(a`0u0(L<j)0bm5BwX$3{Uk|{$$$H+OkJjL4YpRqtF+F5{I+K4pI-d^4Q@5hGg1;
zNR>BA9gYeI*v@$d95zlNP(dNOX-d9;X5g1C%A38+v=O)~g$*RA)(eJYydykD6;<%Z
zX-8EIKpy0qB_n?UCXj-9zcj}_&U4a<GCYcpv<bfBn|%PYWW1AZN<M;zPht7r3Au`|
zr5{XLXSo1$%C9F4xpEIo^3Ae|w;19KRX<a4a#ZuhK+RYK)!|3KkleC2=#o3YNHwr^
z&;4TnE1m2NL;ws(;)BwOSwhrXrw^&*+!MXMikEuGq6xaWY|7rTUez;<04NcS(^l(w
zDa!8<Jk?i|^hcC08up@z-UB}J53%HL;*#dm#C%=K-ica@542>x6SlMz+?=KBqNR#;
zdEv;va7bUIy~Vd1l)Zz$RbK5r+%{=q)27Qm;C)4penTb9-u2ph;!&U|z3vcR>PUPI
zeuIW8yz=<!?ZGQ=kG>0-D}TuY4U2i>x0?l8Bv=yh=B=x*kCtvxIt-Z&P1aKpUJ%X8
z#PiEXUf|5D+wsSoocmceu3bh<=dWk6rdle+%>lm27k`Rd+lEgy!uL#NumU%cqu0u1
z#j|AUpNE@sBn-!(nn}qSPhd@<fP<n!i<QXX&BKCdn;;amxt8=|*{^5}bzaKD8^kfp
z^$#TqeZ~C&ktb$m-9rE!#&}$f^VH=24AZp_kxIHya%kznH<=^3i@q2uYM>5mv?vRT
zdaJR;Fr>?b5xkjXwLdBg^*9I70_^@Z1`)NZqXDuaTLa3+6l7jXD>Ty(qM1%E2f%hv
zj8AnVDPhiZbAF~<jY`&R?)7GRK%Pl$*Z0~W6nM=#YJBul2>8<(GY2S;20`aqvz{5t
zk~=(Dw31fNwxI)ZPN{f=A<!%gDf_qaL10B*oCWd?#?X;tb)4q!ST*QiE4f`QH;aAU
zs>ZF*o#kv5b~AcsjTTjr7jK7QhrHH-MqyFAF}DYzPhl?VZ{C%es7ljTcVZc1ds9tP
zt=d1x#U*hFy~Uo=o_!fDdLb@yJx;4kHnSH%7c@8t$th25BQ16gYRcC_9(eL_MZO6A
zm2)2#q^Z%MHA%*H<N`6kwV?~?-DP8jFf`gF4avbMAiy!#bbq6VZ<^V~M2GD9+&4Ir
z6ml+SsN&b<WZX{I^>Wr2?3Z`h-!Bo$a7z5kNX)5$xxrPm&F1btAvjX7ykxOzfp1Vi
z@h~qq093HqCheoeWi&6dvW2a5q%U5Gdlu$4u~8`P<!LclBG0`_A;t_q*Zj{1rB8~c
zPbw7s_ZfnZ<i#(tX6_tjbpkG(BU!Xm?6XP_%My-InH$j+jqs5^Fy>XKdXE)i2(6C6
zG-?+!oF_%m{nX;YA|nU0uU=!Kv?W&NW#(E#&ChS`z87ioM6?3qrtIwHYPhplsC2g$
zU80)6i9_>WU;*>C)HU3ZhauD|CC?}sUug>qo@5)!DW12jx|Gx-&WEuRM7&p|JFz4P
zng%Cn!sWbGZfEg(zkM8&(%)i**w}9@-sZG|LHRPKQr8TkX)EK;UK9p0|Mr}-<_2jI
z8%JRbX~?sfAuzGLw+T`3YBH92C6r1&j>0yKzeUOqO@Td3DT5%()N@~WOMhT!LApw&
zjsEzy#6h%BDyXTL9XW{}4Ul@s#c}Gr)gw<7<r-#+x&N$fjEy<rWrd=^W>CL!YAw`Y
z<)>}ucml_}{lN_+RfP1(33IgQ>e|j@&gl`>BQ1eeXqi|4_=$tl_atShABr7QQ?11J
zlV?<>zMwWN548))roJdT(OvSm`%@p99WiD4SzWVfObuPP1fQrqstGtG1^lmY`;ObP
zE%!Rcdd-2@8{^>x&>|*W?XBqvy*3k0_|+nwq(^g>lqHR)qDy5d*d5_LfmuiX@I>_B
zb|h@31nX+L3?|@5{1s*@!%6qP_fLr(fsQ5%6v3~b%5~WzNqqc9xTV_TZ{@e16I&u8
zl~9pxadvE+(ITbq=2=D~viEkScV!$C6%Lv#Vn$ih{>I*XI7B9735CVfc%RNy$ue~f
ze#tUT4F^@1?o?D;5@gn;5{$MBvTsYclpC^%H--0D@@C<fc~C<nQaKtu;a#2PPym@7
zo!pJHt`*2Hm!#e@%0bFRXw3$b8|6vP84?%fyw#<P<&go%!{1wv8jSkr4%RmC{F~x#
z3#1)bF3(DIJP|8wXHJiR(x;+G&PdxJkBV=!KC_!KKF(BIAvUK%8U*iCSw0BPT)f!k
zv;#MA-kF`kDlZ8H0jot`N*h;<RJ}>{!!twN>4rUWZX`p}GGX;-9U)Nd#6n>_1c$bb
zv+(k6@LQR6siI*hSe+Od%2&nQ=u^yjp8Kv974AtTdU8foPjinYI?P7POTL^I;M3ac
zVodXg9XU{Qhn9kWsh4DsH`YYRjBc$bH)ScRu1h&Sng2AAh2;{4v?TKCu-E1uKQ9LO
zRtQWRTQo>5$Ti14DrGOp2sE0coHol_UdvorvK$d~1EJdm2YP3J*moQ4xgXhgC+@nJ
z_WeEXzp~tQH!~U%<*x??@P@V}%%dI|GXk7u?STI^dDn?2Yd6SF#sh%i8YQ~|n6@Ol
z19NZ=?rt_nP_4RVLB|0#igs0MguKpRp6R;a7hG=x>HP<|Dw)EoO!rn(`6^9wUanM?
z>ocavL~UD`i3y)t8|Ly?_2-g&8%XYg)I=$4!19`0^pgf{u2hCyJ`L}mX!{j#>5X3>
zm*LoRw12st69#;W-m-cB0I;l;Y2I=D8s=%T_Df+>=`?B5yX!>Cxj;YckjQOrSaef#
z!aGts3A_Hv!ddEg7esr;p`W{KY0@%iy-)sh4q4iUwzn-sJ0-CMRBtG(M9hOU=Tx`}
z&O{zi0IGX9t9vv}JEy!%is|M+<FW_z>js<KmWAK@4n0GAg!uebjJ8`&H*f2Sap4a8
zxl=N_D$2*|=8d!a8>1K80<C2q^Q9+B8Q%z3RBDVHde56J1|Zd1$#GO+J<!)W1y_U*
zi-X`H^pR*aH1%oGDDK9u<!0f4oWp_+EIBWA`AN9qip(5ItDN9jY^iE45sD}H_d7+X
z)K(<Q+ZmvyFbC8pj7YF=&UxQTtMvEd`{8FF2)Ts;uE%;a_0%g+3MhmP4vJTK*(QA5
zY?D+6lvmFcuO^R)-%1!xwu(n7^%UzkH_O=L2jt7}ofnRZPu*PevTm=NAMYT*-Q~uq
zajzSEwh{wBW|YX^%Fm^zxK2(rD6+NBcGpHzGSJE5EGp3zG0-Cv-`d%uK*+12CmSW7
z7!0-934d!HV*VA*vHO>0z=3Y;pApYEoA3p1hFxy*$>f8VG+JA5$?LuCkjL@lS?P>p
z?`W#odKDkq0M1DmhgKttUK;p?7b8dMT7)1jDZI+b`vdr?wzLL*@L%$%yAz>o`X!1-
zG}A@HMkCeuUVA_OcyI`{mvh74cK{+745rmd`~c5$A(pwdr=An_vy|sj7pGYGb8No>
zo$XC6)vqU|S|+DGrske{fZ0s=RrPcZ=@Q};F6kJJLddwZWu7#6bLl|;c<0(7X!AnX
z><bf|T@21W0CwwaxGHd?gn1&^Bs@7qV7Rhgv&x8ZKgTj-oiV+<flA+NAS9ug1Aby2
zQ*VD_af17z6{wwZztN1X+}Klp;v&As-u9~*wEYe#sO=y92I}?2*?tB4OxhkOYn{Kq
zY5IWOenRa$)5{XNaRmGf{d~lg%X@B*;@w5Ez?;qg?DFG1H0A80vPXYne0a4!fL`m@
zS{>doV%r(#_(1papgXxo%9@pP0_2-DdNH9rRBI30*)`lAC1Hh+Z9aNn&x{@2K;891
zl&j00Kq3jSCy|YE>ch(kRw`4US9ruGRG?jHhnYX|aQ|3j=kpDQNC<Ut0SrdlOK_Xl
zWMXu$n}N%_|21M;s#;}Fb>v8w4d(1C#j7j8pzp+kVi3($U9Wz|!@nmoG3a@d#|O~p
zUG>KGNESuYw_Xbzpk)Dr&<4cbBO3gHD9bwEH!8Py9F(Ni?;s8P;xR=KUm2zc_8vc`
zLJbJKs%I1u6k}W^G#=ri=!sL=lv&J%zh;oKJ+vv?_=xK4nqJ&0kciUw2=|F{r%bpf
zacV+v%Qj_K{d6-ov+#m9x4Iy!Nt2!9CmWWy>Y(Q^ECzta_+8#3=Im#xkR$jlIWqd2
zh_Gi;{3~!r>GZE2WMWtcEA6WvDQ@2-hel^)*|2hwjxynKui47M&bu%kkX116svKbO
z6UR<rJ2)Cowl|Sg`pay@HHAqjj&uRWwa#iJs(Y%?mv~~EB(+%f?HqT$zErTf&ALS9
zBJmv2`zYCNS4L95=7XX(DP6xwR^38uT`sFQpS}HAw@`7akmoRgq#5gX`a&tOdS!HI
zoT|D@M$<aA1t|31TFcOwMH!0hF{KzWw;lOWb~eL$GG;g5`{^w1fVpEMkBf(vA#RHB
zC|b(n;)yN(@Vpq@`=|1?=UDf|wYkobeM^mFhvrSG!`XW#B(gBtoM0jV`*;bhenaD+
zc5mVy4?X6Zjp;My1PlA9er&&Ppi1Cfx|w}XKPXr%L)+XgVPGWe(>xkI+{@x?fg)gy
zH}{P30M-kT=Wy~MBHY6b+6#xM)$1(u$2ai5HdPONzygeZ^cz8bq{08+o2vhIW%qyY
ztT!ib*lh|UkMv5gUR>_a*+}F=wgQ#NcrwXIKtBpLw2m0NWsl6XoE`Pw#WdLJcpbSY
zf^Ns};y``@fcOX^D-@%gV=4NZ5f9-7vX5#VA}mx;R2%=^`7j+hai4j)ztD5L0b~z}
z^yrHA;kX?6!fqZxA4S2|XCHG6JF>?W%X_GDb6nz_V4tamOfjOqX^?KfHsk2O$DMqK
z;+40T>|3tF(!FfCkQIY2G?_kQx7D(=DMbmvslKX~tfneV$(kD|u3U)j&f6e>CbFTu
zukm7#p-%)n=mjPrOPMriBAs&^M__**DrYql2Zqma^r5@mX9D^(I*{>~rm&Xq$6=w1
zi%!0fVr<AJ$@O$y7W{LUDSODkcZ%I$INB}BmPD_jj&B!C6;D-(8w<Z-g|kLkGu&V`
zUPc|O?V^ekp$n<acj?*hs55d^SG?O40IwolSY`;~rJN+Msvw}EpF}tBL#DohmEXMI
zQknCpl}rvrgkt8g4io9$5TF64pmz5w)VxobVbn3kBxl&LUISG<gZjh(Dl0GGy!)Sz
zw)_@sAp3pFqprbUnaTMWs4`K6v9d$1t(r?H<b{nZ6&?Nia6_s#e0iU-VtCrMrr9^5
zCk`7^HJsnUb}p{gABtCJ#OxGtnVD?oAvVlZKumIURlh`O=<GiZUyD-lm6)hA^LTm0
zq<$=Hf)lFs6?O-n6m3tgHrhpYlCRKi?tU~<nPe8pp|!nJIFbr1Rwx%?CsehYEOx-E
zFr0EKt$vzt&1>vZ*~=LmGHEoK(8HR=YA%6dvfs5j+fR$1b<HSG3uGT6Z@14VPq&ru
zaj|uQxZY9wCiiL(JwklVetdrju0NB$0exCuPJ4egk3YcD*M1i5fhj11){wy=lR^H(
zO9dR--Q8{4lzoE`U=w1C-+2sJM{q$TBSQxTQd|>LTc>qLdJI0IrLj%nna+@W@Pxz-
z(yJKwt$d?C#ovsAkdDVLw$@8;_Yu-2>Iq=|8S~lf8$Hb3Rq_`7;^^s1J=EK?@|N;N
z;xo+tfb$*kx!1ep`og~%tyG@meiOf0Y<*+<S$5z*xo}RQ_tfpzUHYb(rg{5iVNH;<
zA5DCi*@TP_9Q{a~UC7*!i5)EP7Mt6@v;^^)$R}uHd1&{q=>519JX-mO@x_l6w$lIq
zNW{h1;Qv(i7x>>+GBO74#6Le_I~PN16A@PfYZn7&JIDWA{!i5Ur@PGeqHU*hCo%{q
z=uJTjbc5p^n7JT1F(wo#x#&*G)hg|^>xHY-?eNVW*={hn3`i6yFGYBodw4iJ@5?SY
zL(Um1D9aGPbRvh-OxD-TcUJn=$Ll?dA3>Bw*g+K->^M3NMLRmC@}uSHAP36l0)xqb
z2TCAih%yWlxoij>LmjiwQo8fR7m?$2Jd<fM^*5#vdwND`6C@iQx@JjrLRvYe#@^Px
zMkb-nqHWB^!)9ZV9z3P*e8n6_{-SNNU}EHEjpNQ=^v8?hgj-T;-ExSD^laDefbmIg
zHDuHH6P;FT)M^of<IRK{4UNW(lV_Rwq3i~lsg1L{@^O8+%becVLS=0zGkF?4_f0aE
z!SQTM<)Bowz>=d3z~dOR*M`7(WEsD97~sw?GNFg8StoeLE^Rzfsx$klUlQ%(JUjcW
zgYt~cR`fdfLPLnU{%Q4Ki2|8UmAbOh8V&02H%RAtyOG+XyiI4P2~teII5p=g4uEJY
z+KLVsYv|KDm5JhsI6EBFL7TH)Yu&_&5*uVPQ=yKnT?GzLIp1B$@8(GB#z2>71l#aG
z);EgXvR4aC+VqO*h;qG&(`6@(NYYd2yp3;oc0mqD#q6*M-sYpuB4NfE5~CHfj^(Vl
zJPM65*3#<XOn)q%t5Q00S3m}udi(=g%?H5X;!45CPx}ZOq_+)loRb@lG&aSn2(8I0
z<-dfxg+MZkE<ah7yZj3}D+_Uzbi=ImZom{roXF+oY#(a~2>T6y=nALv+|@^P@g)sR
zg6{{LHFcH<ZvBma6TguW`jS4Op}%NWZXniOxEx(W#3&vaHK`$-%g%kV%h1m;3Ne&7
zGR@^Tm=Nm<bW=^%7M@uYS~sOU*=Sj1|NZr=iV(Rsf1em9%kq&!u@mR5(weqqAx+iq
ztw+YDDW6qSZE~(rEt>DEm?3D9g~rT2u1Yi<6O5eY>EG0@Eg}SW?tT<|=6<AG`2QO)
zvatOR%oHo@{6o8iM|Z>I3hIKjXT)q`!W<C+#Ezao=86)wgQu#hHaXKXQo#^>wEU^n
zU2xwYuP;_`vk6J^6=5Mm{CRbB^jTZ%&+FI01sg!^TtyzhAAzlYA@B_ttWd?j)$!-n
zzXt}Bv~18Tz$GCU;XD$;h6H|8d`_K_BlH--W6@W_Z$dIQu)7E4i8@N1IB>X4)rp??
zkBX-?LVWY0r(fxABuTEcLIfLG^vPTPs?R;;DDRU;iw=12DnqI)FY>~JZ;n~}3ZUBH
zBZX|`u$tk<w&Ow2?HW}Z(ZwO{y=u;Ti0N`3m*%jaC}2a~6gB?Y%8f41VS`ETk&UM7
zV3ca-=>7O0%C!%vK2X-c2=|vJ=D49KrfF5Aa{T>9_bf-?0kv}xHpIp_b9kt#%uSfP
z9O)PmuMAUs8Hn(1xXQy}37@&o%}Vo}?nP~Fsl30=-$tCIfZ488UMaUFtvt-himt2o
z1x#=KUHT<GYwzw6IkJOk*YDlyLOuw@Gxt*1h_OwVVtXx0-=#&Z^+ihKz3_?^#+h=;
z7ta%=drlgW$^1~_l=pZDy=sO@y@xq`M4>V260w~AW6_gX=clE#jB1OtVtxfTeNEaH
z`VNUZC%a2BOju8nw1-fGkOE1CtOi*bqy|y@s|ImJ{_Ed#v4cETtEPUUa{2#ARQ?f!
zvamJ#_o%G&%WvgLXPT9$`TxDVRAMM#B42gouH6#_B@LOkDk#@G_+RlY&v-@D64
zf0vDh%Zw<_d^ZpN9g$HM{zv#A{kmiyG-|}!ac1h0W9N0o%{1q?&j)fJ{S0aRKs|8+
zdk%hLGHI!x6k$jg^XEcV93U{IYz4Y9!%*JpqIY_}*Ti2*&RY9I>PJRB`>hDuj<+oJ
zr&09%r?wx{8}wC~^(-cw#oj)I>EmO_yj@y|kQItWL^Uf{1$U<|^LdGlBrAKX9Iuki
z(9j4y*2>4&h?%#Xv!s#KHkE^(3x*;x95Hv$NVUoULsMU&a%)=1EqfeEn)7B*Un#W7
z%^G#VUf<XKA@@4c%O1VEa#)dHX7V!s2+pkF4O~sxg}UjJbKDnB2T#nkK{UqX8>~1$
zZ6iE8hp0kwtgXNZu>J(5(6z#A;i9ZUjw{t3A!+m^*6*=6Fx2-XIU}@9#r1L-rcFij
z5iml3T8fQ7>@kVhlSeGt&qsVw%+O1|E0c=+jZ3vQDbARNMGWF9J~xuuj1cSSftGF{
zxTNHsDGl0OgS`x6MDuKryu*VYRiXa!b7a+T66;J>;Byv!Z!^FN^pJGKh0Tc#ECNT0
zUyYBRWrYZ~WV;$2V!Q^mK$h6x-`C-GWLiTP80+!(OEK@EJk8PKfD{7j9HwJiLpbL&
z9QG!StROBYxsdY-b(fsHhop6H0VP}V&ld@mbt*Fo`VxP`1g;a2KhKr(2waN#A~`_f
zlH95|&()gy0-yU(Vp3-&u}nm+SmF9fKH(U2mr~-*g~y%_lbOFn5qOt7okE-kVINk6
zwk=p&$Y6V#ICZn5-*^17Oon7Hw??U;{>W2MvV<OKc}#_@Cn<;!ugF`lM~7p}z^RT6
zQ)JoCRkspH<sWXDi()e$5z2rO`^wPXmpddptxRN(=srwDP79JfC+PwFS4uN&)8jez
z1D@g^@F@N_V)1`a8c}OI1LuFoPU6Nt7|lqpMJnx9ildVJZ3M(sKvjeqKT;s`JU~Vs
zx=&_Gv(BdLMFYvp{`X#BMj51~_!7B3@jt*re!ym<bc+hXkU5pk;dSiR!(?{+9!sYO
z2xBzG5Az8ILtKchXh{!yicU`-hQt}NG@uAb=90C-P<0UHr<#@^1w=@LKp~C%BaujZ
z?Z9x1oW0w?|HEZWR@>Ojpqb@3MZ7w+$5y{+(jCq$G9McjvXgrXS6E6>53@o2RSOnt
z$O6Mf&QeTvUxDtdh6}D@I~@)BO~)RKPEM#b64ttGj*hZUlB>EJ&9yXY(O+aoC}+pt
zQ@V+vN&d9h>UAgNl+A=+e**JOEJ?QbU4yjH0b8EQeUStoXe4L5d6UT}h$&HPxC3=_
zL86*xKpH>wCs8j$*&4J2C>alAF2G}-K|PWvaT(Op0l|W9%pxLH8-YY&)LxohH5I;G
z^{VnR)1jGQSDQx`O2NvA56wrVu+Qvg#XLA@_lfxDT!E5Lh+}WD=LH6T{&Bt4PLiKY
z9?tray|FV}OYi?}?0%YHWmY*SBgk$Bo-8#6ov~UZGmbCkCa|^jqoG~xYc<}~Q9;y6
z2Pl@Wi)cq7_~BmRtx4cg*M#`yv5&pcj#-S-q~L@zpe=MNMB)rT3NnD5D>%r+ywCCE
zh*Aqy2>fmle69)W9NxLqn}(q!vbv<2{~6=y57t%Gpr$2X&lyo&nUBlFDa^%w3rq2U
z)I?^^2EH<v*JlurI&Zpwr`j_cHdkFu&#OJYJPhS;GT|%phV^SFp&JP4hSGzp;!;zW
zg)2;G$~(;oHvbyrM9e)vg=VpcGCNxZr$b%T5co|e*%v4J=E8y~xh_^q@}jC(ULPla
z)dh#=!=Xy|M7)@5vmzq0W=XCxLUX5pRZ$JAy!;4hr+Os%iaEr-tDqSE-5eGSlYMyZ
zRPJAK6I#ymr}~G(5dReO;{SErh&kH1{HxE`k8uYQ1A>ojLP!WdA30tyu&BMcya7CR
zWSryzeo+~CUb`sL=o-s<GulSxlbwU$`9HbL6e$Pl-#MMDs2z9H?T4!GkM1|vU7Q@(
zxxPYH+}69IXkh=7lfcBiGZGY&8a@xTWFIq>;l-Z@rN{|rM{f0UPp%RKCxmqm+sY6;
zQ1sxodfu8F6HOftqiU|ou=2Yd^E5{Tn`6X1gNM?AOC1K&AP*SP$zqVCV!AA228SAH
zy2hz62Ud4F>2lO@xNf;*jhZ|4-0L}I?c>~1b*@hC(-Rks`0lQhNg>7KD#)OuY?VuV
zfkVnN_NUx*Y!-`oGK{nQc9IKcBP$KcRBLYvn}k(ExMl-n(|P}0O9ssB>|3JH(S43L
z(Vqch8pV;Q0&NuYNxitFlLT)~l=Tt(lWi17(_^PA45=I10A^NB2}blx$IL#Z1`9Ns
zO~e=XcLS)dE(0)^?7qh*<yzY|dv|}>p2ELGFGolXTZT#040E2J(EDU`taApP?J!UF
zsWxuOeP6imVAphwE%k%Gq0g=uSD@T|zI{S9Aa1*O6Z5DoU;h?LK__#P;(k7ffuC?H
z@L&HVekw!R|KzobleS6>2t2k4C9o7e_vF745Otc$h9haA!zgWti<qJF)<hGr23b1(
z^sM$M_*PO8qk0421*4a0_#;HFFGQWVo6g)FoYda%0c7n<fWvZIA1GkOZmgoM_Vlpd
z=*@e(a}*YooOba)XE(1uLqQ3E`!B1FWW0#xIk+?bGJo1shPH-_S|04kGXIVz1tsDF
zPE}wav{UQ?$z)*LJowx9IvYVb;aKgM!KqfhIn^|g6}>I3%u_5UIa^)@Rh>{Wx#5!F
zN;>ztYngl2Bf<0hwfg%t*p#^LyQgJ7$<sK%;M&Pbr^r0Z<m=$~-EW#S6|-}}(5Xzy
zVkVtM#c^RQDS?B8V<W0LNw=Yw4To>fRV=)7kE7VbaBa(YiPR_#ku#U7Zdh#d@nJ{u
z(i&3*$9V$h`H_siH5^)z-PhkSNl+j)*L@DRe@^<Cs&3&Wg>Da(>U2)TX$<rJ?qdzX
z(y9I&cLzZ3KJLb~4bVI66YQp*v5jHmC+dxjQxj5MWVau<Hstk>`;s~IMen~I_vJ0?
z#6ONY^(S-6|F@3&AI$&>+y6Un{?n%=WXGaar-KGmK%gI9c+OQoby)5vamFp(n!M2G
zF{J-w&dYyfPWOZFTQvK@U{WAaB)aZfF^nCH%mCrVpfs7|sVts{tmmop*Vl^+H-N%4
zdW2CHU@%1nI183EFlQJH^br>vB1?mEKP*n$GxTK#Eq*7nqZYq02;nV?R_v|=!{|uJ
z(nF9~kb&|?s6o<h7p**-5sEb@6gI2(pF5C6rA8x@<NFbhIL$R&&3U`lj2WlW6lv=T
z!x1ptdie1a$=1Ic$l2Izz^10$eYX=#(y3Ne1!}x>*C^SwT9O|m{cnEE89Kae);1Ps
z&}^(;#}CfR*Ryv0jcy%ER1!d)$?AMC<VW?-=^(LZ_j^uykjU!~EKsSq{lAX$fvO|@
zr+z#?jy&_PrVcqtNVVd8cgG6)_$jxthHN2(wKrywep&H@M2XbypIlmq&2xQMxf=`F
zj)TbN6Zzw^G!drrW33~#1$J^?li1_IgS=%Ft3}m=csl5UM|!?}XY?T{5LOpZA636z
zhGZM@g?#t_)*@b>S&j=0vfqZQ_!FbX=pmk&EZ}dewzVQonW)g)tSqWCQ=f?DHz|J;
z!iGfP%dNm$633~$PX5Vl6O*kSy$H3Q<^f=EP4rlf>6u~l%OK`quFemYdk~}MSF^tJ
z9(J`nPGumjW9l0k+n^Uj<~nZEK^ci~owNy+nEV}BM0920_CIna_am^y15*?HDRbx(
zqJsWHGmdlGV`%aXbb+%~XB51e)89Hl{p5x};=cv<r5g15gQ||ND(WvhmM?*586QYy
zSb{tgv!3rjtTd0sRX97XIGyVQ57|yDDL(-<d07`kD-E9F7dkE|kk!S=zm&!JersDS
zaj0IwwN~sKc1AB&<Yy6C7Q!~qcWCmCCt5mZTEH08RUA>yEMUSg+b8DN@BC}VEUUsU
z75mAS@jtC^_5V6v1YFICRfPY)UfR#=zw=Gi4MijkWF0b)j3Cr_ge(lYG3g~@VJtc(
z&TRxp_96TfG_aGaA`U^OZ0Ad2<gb9)2WCEcI$rbS)Fjo{(?C3yBhGAA$N;(w$-md#
zZn=$5n(x|_c;CO@ar-znBaF~FfJ+a;@X-c&)g3Y{aB}94t5{F=*<<yZpEFs}oZaX9
zZop_U+NkIBDknRB(c>MOp_#U>+yw6#78iVQsI6F5kk+X`ez-`f7I)`Y!L}cJs8|iI
zDW+VhocAa-U|QYB7q^(SpHoT+O}BazmUP5xIP69!Ih_9id0R+F@juOK9SL}kloi>s
zO_G}ErIVs%DioPbx&FZ)PUyI~&mci=AeYpXokT0aMGl?)w^#bLzvP0#v__x(`2;*>
zk~A9IQA(4xhDPhXZr~-Y+1D?G9MhhQ#6I5h)^FW}W%+T4_zBryeYYY#w<0Qgs_Zk3
z+;+OQHj|^;MpcF_Chcm5l@eNd(V5OA%r-)V-V+vYEgE)B8$+^V3!dR2^aQXPsLe}6
zE+iuHH1?>XaH}*12b@MyTm6_I=|cuPLqIl}kNjyuC?GelUahvK>=5cy*5v8v#COT;
zQQG<Bt$=N${AERdshyc5D*_v>Wy|ujvqm7P?hV*i=^kL;lF{8@>HN%BXS-*HaQJG_
zZXJUS6?>d31gHAezffDNONOV*s*|ygDhWFWFa`;!uk`Lr_x{_fO~8*H=H{aqD067u
zSd%^eV?9ELdsvU%g3RxjpLA)lyZBe+1x-v%f5KU$vanwKF&bIW`1TQY1amVG+<1p%
zjP(dPH;4KvTn%N=D{)p0;DF>5qrfealsV(VSq~AhVF}pg@uG6G-uwzNZ75Q$4DHWZ
z44+PbWQNoVXW7eYBcFlSQAU0n0>K9o9uV{A)rCEg+DDm?2BsSZT+hCcRB0VLPgAT>
z(j&j0=%MfPp>z6f*RUYKP`ZSRhrD1e^BV`LiAUhY90-SPLZBR#kuMg9$!RIXVH#SB
zpC4cSI$21;P>U03meQ`Fp%rrok7=XcN)!)8zLeKKhjuuqcBhx-`<p?}rKttoIt?VZ
z+ImGhT~5cm!xBf3Yh8D&bXesf_QDkmJolSjc8_d`_urBo9qBXB|3r_)!j@RZ!uEfp
z$8tNYrt3Z@j6XjlhW9~bSwO0b6m8a#8G%NR**MO@)Og~3!nKp`@%8sk31E;tQy6MA
zL44mmKw32;3yLNj{eIz5%1ICmri{JZP<|L8&`>TDPUt!5BH3AYPgLX7q-VDSWy|?5
zNBw0i!_bvO=YB1HU1k%98E46Fe}atZX(axB9R!$ar4pi=mCJ(LWB2)j#1@J_yDQw!
z((Ul@D81IIXLu;te|Z)uqUjteM|}PmiOX@u+(Mz&s)7zreS|2kY5(fj<4n?AvV#6d
zp-pMmsSon~ydMs~)0bKG>D^btiXfiJ%jmgnle301b~EP^>t;;N_gFa@xw6oO)Ebj-
zvEc-<kM`;rrwPfiwgtz>1QMD;)eUbzinb0ptkQabVK9(feZu2HS3i{Ej@B`kGRkG1
zHkT+w!3e3bks5#5V-~fij9zqHj{c;ap_l$vqmTd|mu+oRS}==<9Kct3Z>4Y;CDb<n
zDceDGP06`d8L+>CdLF`#=HDQF`2~4ajePF!$Zpgw(UGDe;4J#xZG;`*E#*vrm>(Nd
z3XU4To*28x0UK=1bu}`^bPs8RB)!3ZXu#vlwuUM^-s2aLYCb@9nXkhMAqw6x#>BCT
zaLR2m{+lAYny7-}Lf$vTQ)c!VmcgqNlycdxKq6G$t=u^1L;MQ^tXWL{I$y~vU^V)K
zbpIE(<Zk6@p5FWi*wVW)vpOq<RT5hDA8vr;ORfP=IThYKB)s`Zxw%^mp%=xIIm9JM
zjxiN@hvKz`4ED!~V^0VALl=@IN+f&5HA-cT7lFc(CDbUJGdd(gDPfd&Wr5;7dTd)}
zE=@edV#^`E`jt3pzX+Rr47<6gP-cvn50>_!+%b_kH8KZeuW>R8ddTc0Der$9`bM71
z$SM6429`f;QTD%{`F>d94|Zf#oGq-0m7EQn|I^Q}T0uq*=%->l5&}^`7@kcG2fx#Y
z$0Z<!z+|YzmA@WT%r`zuSYUZ+<W6_L1%D}w8Z_Jw&cuCNe||G9S$zHQ{06X(3B*ny
zH>=C+&+)_eZ(u7hONd|ew^o;;%gO1e;BP4e579PG<~K<%Z9xEL1YUzG)Xl4Ki9rfc
z)6y?4iFS4IKa?n@in9#ynNhxNQgEAZE4>7E_)#%|_JQg!{+sn3M!~kE<nKHhZ<Jw&
zRcOtWccxtLeGu%5Zz1?4KThy>(4H{#O+$i@dFDJP>g|zuIcg-4e|%K+;M~!nr+BTJ
z)h%{&)FCTlC-WjG{_k>f&L19aTi#5!3^6z>j7w<kZ#579{TsaPXVD2aAhh4JF+{}f
zGx{Kk;(?=xjW7q44N=@h9(++Ari{n@Z;!D1EG;!mHADN5tmqHWz)<AF&sq$&Mv2eZ
zJ0x|1gbn{jT-GCrM5%sAz995}tJMELj_*IpyJ}S(CnObgU)coHL>qG{i$3H6&<YtP
zV$>o@rO0AGVJvw~{{0hHjfG(6#5HU5iD)#&^Jrc6ZFWW?XJh*j4DgS<q#s{O?bE0)
zpx;nNJ~KacV%mAsg2k-Itk-tz$tU~G(;q&&-|+fuP*94LAUSHc0Z_G@WXe@yn!||L
zHp7wb611bp(iQRyB&kLTPF*Nu_KLWAs`C_oWhutU-9_--Z?ja(%w6YyK@>|XwT?h*
zwpXR1bWzSRT3EHv*3s5(91O1|n6@W`tDEjexyni*Gc)CuZA+s)8uw1ttcnuU7sMgo
zMx!|~FYy|ry~R&lHrdKZfcnpb8>&)e%~1>GR7&fmbO2%fX+T|?(Ab*7FxBNZ4r5YF
z&(5x-+KiRdt-5DatMFLgTpAu*Qg%$T)V+?%(HP5-u_6%@>$d`-w>XrUu2E-ZU}a?C
z-dw7;GG$KL+va9s5zDt2T&ikM(paI<!pF6W%b}leZSc@f|3Z>h^#=mNd9I|VX=o8f
zCr6nKu|bwGr6NJ-k!t`mJk$fWs2CgWpVy?-Y{`+jP|e|eu0b4_;7GKb8c#E^a8lXC
zwUMXogh-8BwiY_^koGXSpHNCr4o~V;ua`pca?y`|>d3s)xrNd_aY9UcwX0_{sX<hy
znf^i?WWH2zRhy#7<uOVEg_B0E_7<eFSi|69C_IW7Rx})8FC}XbS!{_q6DBFmmOVXb
zUQ-m1!{(!RJ<XFUZGNyA<aW&JG1tzfR;{Y?9kb8qY+sdyjKTfx9o+6m7;)I`Uo|A@
z%s{~p3Pn)36@{=9H0l;i6J-s;M&An|^+<`64UHRx$+d(4n-Pz01bYAG6X$J22@@-w
zJ=JL00xr|G8${>*0GLuaaaC(fCz@;NuzIz}Jc0rh9X3#?i7l4UX>4Eu8vl(cWy_i{
zE#}HM8lyIO#2#)`-b=-ClV`0xmn|lsp+`;Pq3i1wMyu*2AI%9A!G&J7gr)^OMoH)R
z#1elws~?v;FFS^Z2h^~$E5IiAu-Uyyw1rn_1G&Eh#C>LEt`m@2MAv=LQP_}fMdr4v
zQ*)D#gQt(gJAmrAe;8y=91&^}RdNgAiLHicJ<RGm%NBpLZA;b~+EC+dr}85cTrDNB
zZFC5>XVD8TB7kSe?Kvm6=>VhbpjV;<SxUxRC$cFpN%LYSfDx@Lh;RAsSa^5ZM+l_5
zSvI&6hALJItA;+oz2zgCr*=UguFrMb2HXYCg|nNG(C=Qh91fC$e|Z%{Bd6k9M@uIZ
z*Y}C=TqmKda6@>$<-Q<7^Oc8}{L#rWHQ4h)7yu+h1AKk`3`en@-}h@tX&$r*4xFez
z6GG(V&OjZqRofpEF6i^2ja=?wl}Ij^5c6($aDs5fgB}5n{he^Vt1YNTJdO<wWslyj
z;D2%Vj^UNPYu;dN+qPA)ZQH5XwzXrYV!LA7R>ele72B0mGW*}ByXTzqYUb^8%{4RM
zvh(#>&${m)*PjyGYqs~*`q_pSKb;Mi*rin4QGN0$M~RSbU;ziKi~Xw|Mf(Eo+x);I
zuh?6(_3y00fFap{sb&V2mKdBNdxB9k>SfRNAmSU&t6jqP$md)1-r?Dj0Ov`k4F#kp
z^(iN8v63*c4)eq%HAP(Y4c{z*BUk<_%DV7?zJ3;3p`O5a3fgd8Eml!k&RJ?NsN#$2
z%g1CnqfmFF(1)Xq%cC0lzJ$BRYP~{+lu|mcv_8>#*6_qloXx_z;uc4-mi@QaF(oJk
zcfQXhIO?FZ7cgE2ZZI~}O7tnmhr2Fx?{UmQ5ds&pJlB;W7}p)i?}&cA2E8Qsjso~x
zC;U1rRC`x}|Fn$GGuU=d0cYvTe^Ime_hr<|(dB>2y%_XjV!UIuY>Q&+p)gt%!N?#M
z$Y2(GE_&EuKNmZ+iPycO8ot7}?}?&=7DuHU0w=6xtAYq3E6Spo#ms5uarg1`I<Zin
zA9&(e2!Nv2dM;6RE;%ixP((s<h@wp0tkh&O$QqLe(;Jf=A-c(?1EG2t#WO8O@&*ul
zY{sBni@9YIS+;PF(R!i*DfStC6y1`hhV@};*gC=9ysy#TGvhC}L0FEqR6{jHQYhy>
zi=4*UvArLB%zh%ViuqCWBU?d))RsQah}}C>>uj+4zVi6XbY~0gbo4P)uLh@{#>XH=
zUB_a5`0L@v_EG6m<{Wz9-Z5=U>W>25;2UpY-MjBYsvO})m^_Pj(}{OWZD6#4f(l3M
z2`0ZQuyK9lK<m2okKz=`p`^o2J17oOpTT1`5jxa=SLplj0W!$n@kr4PC_**wbOR(9
z^w?H!uD2%~ZWL-wQN(yh4;Nwt3IE{CvBu!j{c_tHW&~W!4@-X)8RMR*C<ysHzgOui
zv0D7)@WW60Ckm#|#Do}IcViSio&AX*sV=$*cQePF|68*L(bB}_xK2W8^oU!MB&9}j
zu>p4gY&*>B^1M2^sL&aUe>Cd4*~Y<P`PCw?{0jeN>{D0+w8aJ49V>!>h%IqZb6EjN
z91#w;OxjPRQ5eO=nL~IF;kMJXcV=sGPn0jIgfxLgWhi24shQBq6HL*0wV^(8^QgG_
zuSi2L5Jv)lxM`-ZX!366F?M%SrPu8H*W-0|i{VxocCoS$E+u?QFSw#JXnshz7=y#)
z_FmKt{JA4Mvi^B9aAXI6+<Ju>?P7GuD=~OCu!<TPAN<3A%m3ODn~iYT@WSyp7Pc?W
z#Q<6_lKl$y(f&Ah=vk5a5p~j4$=P{sE=z9n6>$F4z`w3A&)}{dJqomr-Ic94qEno&
zRVe_RV`RNR{v!#)$0Al5_b=e6fqUqGa@2}?K#rOUHkhM(=UZZ|F(}MU2xTc9oVX~Q
zPBZ1^c!#rZ+*J#6tK+a^L*<f$P*6f8FOrT5Dc0J5Fx02lK!#fJwjmg9<$SHev?-nu
zP`Wu4^AyLqU6nK~i(74KWr$y_PoMIWewO8r3_hVCyZRHd55jU9Asrw*p0iT#7x+={
zl!)ZG%NP~gxlFk!KWG){^3p-K>9!Ind}fJkiHnszsj7e~SpyZf0Lqc_2g;eqFNreo
zCAY`!_=Oo9ydAq!wB?nC<FlAQDSSN12zo|x=x((>8@BtsETd4k&0mHCAMif?C9COB
z(<iAFu~~CZDkXo3b8w+9&9<BZTVuM(2m2m2+qN|WjRY{Rf`El{SVZo(iIe@gaHZ^d
zU?bl%JYVP1D54YiUZ~D8Ot^i=N?9&?olCU)o$it1dp-rBJy;}!XpkgR6h%DJ$XMfp
zH};t8rF7*BMLc{E91g(Of4PFa^4wugCp2NzO9v!9rP1g<X%AG6;c0~M&Z(j34qg0H
zfd57US6>-;)<?is<$uMk20ZJ3W8D4%AOFm?sZJ?^v0?}iLP?@Xpcu&^vf9KNv=3*H
zh<*iYSJrN)XaDvcSC<WWP^!b(e18!6rZ_g&1Pj9REB5ivpPQ~eK8tnN2}&U8+%zPi
zjp6q2d=MS<q!L~Vi99@nC>aVzP9dsN(m!KH#jb=J3(M)h#W!Yh8TUp$ThAjHprWL{
zwZ(a8jT_odc1MO!Qq%mDnfOyS77}z${!Dn3K$Fd~d9-96uZn&aO<UfYT*wUIa9LwN
zV&}~B${F>k@g|yD2{>JSgd`{81k09_8dv%H8C7soK(_PC9<{5AX+podo|b-4d}WYG
z<k^T&XN~&ho_w7F>pWtxigB8yFL71NNzSRMla*j_j+ojIN}69Hj;E)X7mcvV@|wg^
zW^*A!E?P=^Q9FlN*wAur3DK_X@IKWpqfT!60J1EJ4KWpMYp^pOGZz;RzB5|{v&-TG
z`X4Xrn}{*UGVt*Jt9HY`&AI=(cEhhgyCKtm+6_yAgU;xkC;;p)&ZXaKKP5RbDm~+6
zHxxMl<sj2RO<4?Km?M+@c>47EeKvDFE}z*59A_{CK%r<mm#8?Gob=bcs{;t;sLV4H
zs!`==uvUDP9$^XHm)BzgpJE%%g)JxGCp%{1F-^0D)WRt`o#ro%^MxFkV>9pgT6T6A
z7MOBu7u~Gj?d>q@x|19rPys<%C2AQq<D&tmz~Is~2|jB%SJzQ{GiLucx8dD;5zuXD
zgk}+i&0lAiRt<C;3UJ-j<mfQo_?CH%|K&E+Tdzt%-^*JQ2)vj2-i|Tzw|Qp+SlnE$
zfaxQFQkY^Fo3`J9GlMS1%>uc{f%14)0{rR-I=)uAck#!mDH&?ePj(4?3m&lt6uMcA
z8Fvc*HSow-(GjUaIgd5|bZ0c1H9YN5FZK0fcf<==EzVP>Ta>sp7if&}<Y+mIj7jK!
zW$~<#B$IM;J1Bs&@&4WSlQd3#lGFYQ>&N<5$oQf=FgUUwtYDmk{5?C&N(G{&P41ev
zMJD@~NYr4w-(7<nCw;;<H_hIgr0wS*(?nw89((Z<DfTjEHnK5!PDSS|p_<FXxMXuP
z^ijNwhUoDo-7`a7waA6vP;ftsV9(d2@`$L4!J)_5yd&J_%qP<<Wyo7XDhjKGU(p|t
zSnU!UpY@a{prWs8<9+14Q5*ex&c|_P3;9L5=DVUZAf#Zs!#G3tRd9WSxx>GiPbRF`
zxT{3;Lnr@9I?0cG$y_rs-nFf9kQ%RD{njPME&nZH&QPR;PvWhZV?cQdVWebcMmb_K
zEzt+^F`l6hP*xt-c&oe7##rONA}3uREq&J+ec<L)G=D68>2Xrm*fH#4s!MwIhShQl
z*zhu76q|MU^6`=moY*VPIfDU>c$79c1xv%s|9Fc+sTmai6*}GDZqa{Fr~5x0yAPrw
z%E~t(FdIzcm0pk>tS~aLTtg#4qh`7*9t~Z3>5TK(eG&OVe|f`LvR;5%!{keTE3Gt#
zLgc#H%=9{Y-@TBlMZm}J--P`%RQuAqx&+Q8rUYy@Ec<h;7-KB8=Qt|xYLHuV&T?ZF
z@ta^-?3P_%Wb`QRWjQW4@iFd9RCcRKM6h9tnHm5EAN(}o&FV|tdrWG}*C{rwqH@!f
zsi}p^2m621>8u=Cvrek<<?W|bC7|+{6Q?_CkCf65SJFm;)^%o?Ga4E0C8iiA5ohVM
zC44v8?(^_pJZ7rx#<j{GHF=Mc&1?C+`2{qx(fOSa1ZSFM8-pC6)0n}}aO6KJgD*3;
zo~PTuDLwdyzJ^oJ?eMD?xYH)SuDUkrGdJgE5^S0`N}NKsGQc8~u>X*`00<8^yn~T8
zK#7y|9%{UIf3c_cInKvSu$#2c9Hw=nm@fusmLyw=`J#C`X^r(zTpa`p8#O&dv!;nN
z&zu{puz%LU@}p8NFvzqSGaYi)@V~LXBY$M0eb?*-@Kc1rYby76bSHlo+sHwPcH=v1
zm0Vb_{#{erbmkJD+YO=*@Zsvtgt<M4x?>GrDqN5}J+^keIJC*poEwb=hcP8CR9c=}
z1U4$V>IVTNns;euoW4~<7Q-J~g`a5?J|qIYEn%oX#n$GY3J9eb$wM&d^aMg=Ih5`6
z2_N(3NjLTiiN|sB3;l)5S(_6g!BT233x*1Ws{M>mO7%9){o1ptA~3;hQ+pJDyP_E-
z{y~E>`{Jk-XE0l3f$fx($?RN9x|F@&oj|R<$fi`eux_we7(-anD`KQ}iadAMki@Br
z*c6|-VxUym&np^*OZeyAxm4v?yXD`UEti~%Re;8Xia5?C$#a`m3Z=?<>oV4`fy%Th
zZaI8QV*)N*yf?&uyf1fKjkSMK{r^>T^*2YD+W!(=O{Ry0>=d=?oqUBaf|S7^{SM0U
z{p*9nM*QWzLz_bz#Q$}8HPj8@cCeaDjGa$T{%hRm4na_9n3VwG6)mO?5MV6=;nidt
zu(Xo&jDZL76A`EkX0)B90X>t7Wh=CH^EEJlUd$?{6<tLcFJM@lYIZkT@7ELWGlR8f
zPP9=|76Nz}(~ADA(Z-=i%v<eP0rr{u9^*#$ItE5#>%Nah3YS+0)ihpRyTeemPWB)}
zm-|kWtVPwJQP$TOK196?CE_hTLz{e9fe-9}cvZP|gvulMgtrpPbbdMsH80GhFN~`Q
zG_(9u>37Gf>VmGgW%0gAg>5JIcU2lcPdJM5)wwlu!}x#YIXY*PX3G<%%Q=prIEQIb
znpMY`%|$y*(Z`s}{lfd*!<UKJJK>ICtULyUU`ccH5dnnk?WJEe$LzOE#tD>SDSo-f
z?&ZMIz4QqZKgvaO+8fqoCCK6z$usjfivw0Uoh3RDfnOD@8bk)9LGd~Z>m3EeZN=w2
zN;fJ?V_K;vOYm&Grlbc=B^nVbd=Y&pMp`V#NmeKmkPSE!tPwN&F+XFKfcs`VN+e1g
zI;Yl5g;Mqf7LxX%zu&s?!Gi_hoaRAgR1e-M*LE1_Ox_(+Uw9$<ivA@EhOK0pKP=i)
z<-v_`NiP~{;^n~;XOx=en$@fXSm$Y6M89d4BE;aIkLoWKU>?B~7HU?kPL@cy%`>Qk
z?gH3tk|rmnAaWSui#j^xXHq_kannS>J1@Scv@y|}##1SsOKL^=W>0>LdFGlKNGw3y
zX<aO`HdweJ$xBP2EkX04Jc9ZAz@}}^?}pz^Llb-Rw6!X(Gf>*Txr%c`Wr`(0HE!y2
z0cWs9c*;Ge(4bftk&Bk&pPlV97hI{tz`J7ybhgX>YnXnve<zF8<o@!7edMfio&o?1
z04Z+_8V+WbRPYG(Z?YTGNy#o&8{k})rf6E(?#=7!M@l3C(#-n8PQp2_Dv`HT!K$tL
z!n)V7eVMNoqo#pxAFuF1mKA1+gJ=Bv?Y4V{P%>L>^%i@<;SmVm6^_0e$B-J*5V29%
zbkgHuWlg5|9*9W&wx4%~)v>dYYL6JyvJiqq7cl}hlLL-r`hG>vOE0wkTnb2tDRlas
z{44>a&#tRXCkQVgd(Up1wYq=Zi6a1N525)sAGf1^qERy*T@pe}8f;duX~z>GgeygP
z6?#*|Lwbg<9<-o*!Bj7v{~AhKmd|!rd^y@_Qr*|`V8Lxm|I;Bw$p_d`h#IT#;TBBt
z8{z)<x~`!|d<nYxr+DmfaSXxO(q46e@LTq9w*)Txo6iMbW34<Y{XF#j8p>Rdhrrk}
z`O%{;+sS!txH~>1hW#|DeQ_AukgEDUr%U^E{k@Rna3bH%F2i{T60YI9uPDGy3CdE&
zdv6(%i{vinhjO`AWesi`Ih2mwE^JRC#>vzCdDLPNc&Pz^Loqo?K20#Bhnf=L{kvTB
zhSBc#COWiPgK%=@5EV1}7Afw6#$-N9%pOO{C*~(ptWj=R&M>|`s+Q;*L7H@oN#+Cp
zL=@xaqD6g`iNdPOX6p0r=9A-LEAFP!JFQ{*W<e6?-|72Ji-%FP+lbX+cT^7HC-z$d
zlnZQ!xj7iG|0Fow==O8j0v|zB*#D}j_df-ve+GV<n-2II7zSfRzF#dDwz5h(Kub%x
zJtBiSX@86;WWl@Fr!;q@hJ|FTg|d%4wI4WJ(Hri;CtA_(f|5eQ(gmlMVSYnV;=hrB
zK{<~#*etn1PZF?vx@el+u-zH`ILJ2&fONvThMtd-!E7Mm&p;>u#}3sm)0l?XOUFZI
znlRU_JT7rCWP+#riBtrc9-1MlZ-Z6(5FZ^DT@p<RYBWDj82Y??XApCI4-m*+j*%#)
zsn4jtkXL6u`A$gQSzOLog|f}+8D}*)$=BLJ$XNQAP&CRYpf!#+|Jack{@R$c<VgAB
zeWBCTLaA#G9qdDf-I13S)X~aHOgOV;;bAR{<$hF^w#|Oc_P0k`R(#mEg(mz?`^n>I
zRc8-*G`Q*Ov-RXF?biMUmBgz3qyn;6R%qOzOxm(ozl~{2{Z>7&%2S-y?4m2>eL!!O
zRFjV&dTx_!2^VTQ6<pIV>EE+<@EoNzUcFXpTi6_!Satk$++qv1y}B7yOJg!UyOKZd
zV4LS1XQme)qaFCl@-U^t0P(d<cJygNmTS19X$XXb_3|<WvW5i_)!WmlR_M76_J2GE
z8F(pJ#D+B3#hQ{8hNMl0CM@sqOf5pIG$#~s;ma<PKSpCE4`mR0PShG0oL(!wF3p9b
z<0%#?6*K`qCQIL5L>gj($b66rsM21p2e_v_I<eq!Iw=fwtu57uyCYuj@yhw9GJX)(
zO>J_;Vr)9U@DMpzMA?>17JLbl`^-n#G{io!SwqMvD)Jt4Dvxj4JEB?Oz#x_-uuxq5
z)tuddm`tCaad6|%w0*Iu6v0SP>3ZN(0iM<30=b=*!r4H%BOS0guG1A}1Jq_rOHz&s
z+iwVBH|{mNoo(r<Uj?ti`kf7l)*1Zrt5Vma^xs~aVVEg*y<SkoUtsyO6L$S!DE&Zo
zW|w?f`dh%^R#j?Etwy}R_J)htG~elT2c>&qH=@(G-@-X4c@a;dzra=ril_LI`4vF<
z#{z_7pJPr&-3aZJa5YuTt~+naZ)I@}{e<ZP&WI8%S*TQ?Bk^7djpV$E6`-w%Ua%?~
zr2<W5(_M%sCKZ1O%Mc5{Au=BH4)~tL5bSE#B}{GCRUSXhsc8hhDiCf7<W%j7LlGCZ
z%$$0j*q&5eX5Ep)`feAD;W8zLA<Mn{>r3)MKO)pRsn&f3t*V~Hb3-zN&2ArZe$QEa
zfvi1%_B)}Xp$arWJ<8OIa)!z$4I^!nM5b#=RL$rb{PqH6-0A;9R5w2pgKG1J`T}1*
z-28%XDk|ZoOJl#duXMvVX<By>Crg{sEthenEBss-El{C)tM-Ma&$dNPYv2kl6SeJ<
z|FVhh=i#VMPL*(=s3MVd|D*(kt{E{XRwJ39LLo!rmUpbjld=KPl)uidPl~@ayX=_z
zj)7#&udiVp+#>npS2C>f&Z#vQ&N9{N`{x1#M)gJ91pDRgq)$X}NuS;`uH0sxM!R3G
zWYI6oWmbk}F(xMZl-0acVBh~KI~FNF2gn1GT7*C+nehMqgaz8h{)4|%P}&s6$e~06
zBx1o}$O=)&J%GT}qi_j-D-y1hMTMs6kYt|W)G@oxdfeC#c~Qto<h}uaQPIdJCEzZQ
zU82A4&LzB@uKhWb-{lL^^`a{VN$g+!@^X04)PJ{b(&X<ZBZD_(!OBlZGc$opul)JF
zrMV`fQQktZmqpD+$L(xfCU^q8^xGSgvX0(0oEIazu}5ohGyhCVZ*yM-G5g|4M&v$|
zjz;ZR$+09II`;_<9|CQu6~z!LVyw(i5}g?i?q&5^#A{GM#2u~C3)^|G@a57WXR^Hp
zQOAvo?^_NWnl{fwi(1r0rF?^{n(ff-xA}|S*Q_^*={z<#WOFQ?Z)q+b_Z*ZL?CkN2
z#b0TFk;~Zi8pF4X_GP^o`EDXR$O8hAOj;+ZNcrSnicsp(a$-7T95!ISj(t7-XF`MN
zKEWw%dVanwTmkRLg|iA_gz`>eY5BX+J7Lbh8q^?>3p$<Qorgvs&iGexg6X~5NBL^c
zl*qcr7)&8u6DGYmaE`qkgSBFC1&wKk*r+n?Sq^TE%-Qpu;THc{_Yorxgg*kFJqS?u
zQTQ*N{lD><z-Qp^Z500*yxm-I{{|NljSW#XC&*nx1%M$Xt-;BvMAkqf^2%@+^O!Kq
z>f!LCHI-+f5_|!DQ4V9@EM1}*&G<oeoCD0;I(}HPyZSsGPzPaa)00I=O;(?bn`(&Z
z450%;7H`P7%#0ZUIg3#QaM`Iljr#Ok7UQ*APuAsoj}$|8n=VIP<EOHpp(h)unYO-V
z_0Qps-`$g^!@=o|Sp+#fJv=4<DAI^d;`!Fn{{b+D2rpb2Zr;D?BZTuL`jPnLaUFu&
zQgobl6gs+lFQMnR^JC4-rRf(MCYX|?l}}_Nbh0SmLLBK{@(gW0E{*?y`x8CwtdRNp
zi1jfw!CFIik$ukj@?qB|Ol}YVv{7%-ZlT7rdz|a2wzPKOy{=|9epniZMwp!?v<Q@N
zm4n~hD<zo#kjxvhIe2;hyiIPs&=c_Z=~O-EI<a^^F(=h{5Sg0797OwShN$a&7wgyi
zn{d6(dHt~jrs4of$5cN^XBxB=cltncp<8C1!Mmo@;{`F3Ind6igy5U8NXytf*{Lx^
zXB=N&a2o8e;3vCujEH9g2QGC=tgFknyclP!Waq2BfQ5I}J#~r4U5E>1Y%N(__E^}1
zfiPCa-MSx9ARB==o90^-b&G~b2`Zwu1pkP1=W|oP|BJVE*8l0Y9{xYt)`>|1<o-Xt
zt#jer&3l;o+x-7zTmMG?|Jb&^PqTvY0|*-PfUh-`|NYk*a8>`yTPg4O-=<enrj*cC
zfC;=!MLsv)zd8ks#)mwFB}b?WU`3>>8G#YJ?d>#MKJElwdHPW)zv1C<cx;Dv__g|f
zqxu)Z15<Ce{-P;SzEX80uv-S~cwTkf-><G-gGjQ}lExq4aZs3YlpGpcDYI6Xz)-6r
zOvTZnD^_yXd=($Tg&LXAWdfID7tf_F$KWseVXzUaSBt4{0Yk0kn#y_l10IkTdla47
zn1cSHkk>@Jkal~wm3Pof`TSUbhEXE{&=j@gZ)4He^T>qGn=LSMH`r6!@N-sCmgGEm
zg-l8J>ZO?`%YW`p)@i02oc49t)#GX6pS{lf8pTJcm(f-$FR!$`B7U_gOH6KJoh8x&
zws#+tgFx|FWb9q`e$(m*xp1u=eP&K1c8%nMn`H@}*Oz}(>eEnDa-_{PvAr8$?3`UF
zl(_*Ktvd*0TZ~9#t3^!aEP5LAu@}>Z+dg$3Z@}uD2z}?uisg`gNS|1VRK+)Om+yDS
z!?%ymHP!Tn#h#svieRd*w0y3?@?uyoHh1W;)E#4q=Vh@yAO}Npnn?7HZjpq+lU4i_
z?97Zp)J=nyfaAMckW4;p_N$>-yo-;&>IHic{t$E%LM@7zN&Wbq$gi1Ij8T4H)&R_}
za<FHaj`C(S0OrbV&J6L;VHUN6Ln5pO(VpfN>nMTbzLiCzrTZM^WZLz<q#dS-m`->j
z<|7U&KdGe>SK&S)hC7hlm-Hshihv3vG4})9fnJX8;*p^dP~EU?AVf!)U%(e(Xll|3
z=`WuZ!37_iy+O_!q1|FO7!CDjS2{?=8LWP*0+RcaQdTu2OSuJ)D!c<(>A2A{`MF<+
z>GUM`@t62i0|*s}@f+uDHZ~PX-B&38B3$w`ons9jIL`=bJR$_Yjf`mGm1~IgY#wrM
zIN{fHnV)Rdj6H99BNX<XnS;Dpuh6Ir(aOgCf%?Z+xOcNJ5d#qE<^e+mR{#5V=kH>8
ze;+RRyMO-^{%uiLaK)EE@h7ioPHT*}BbZS6It~r`aKykJh0uCrOz)vg#s^BtNvbco
zApWy{1GDN|jMFb8r>SuG2xUk(iAq%-=keL`xIp1jIMLbKqH!-o=;)!D=^3}afbBZV
zypPu@Q;_2uZ-NwU<pvxn7<g8BW0_i6y0nAHuOdj;M>mCAhDk#<1m^n5RbP0jtJ<rJ
ziuR+Us}N*Ux^hT$dZs)(e!3#6895UK)Y-UgO8~TgSm|tC@`RA$n>8IeW^LJ=epzL$
zUfE}Rl}JaJa!SV?%)G#opxsmul~zZTvwPZb-`m=CPw_CU+CX%0a2`wi3{@D}X6;0v
zb}-b{F?x$`<11^wSnE+-Z^hd(*gUdG=xQA*&|r|`Hc#>F3L(hS*t9<F9>V7n-Pu#v
zygK0<t)0Vfl~_9SshhYy8I)QIou7S|Q=U{v`p#MbpeMLO+fo1Z^g(APM2>C}-CD1o
zc`OJHP;Q$^{OQ@a!9HD<n3VB@1vBUei=nIcK@8&0TiIWynT15g86>Lqp%mz=9S3+@
zW*IXObW$|0yD8utan0e2MQ#ZeV0&hR-+Nqso_D7<qnx1QNbe5=bXW+yWn3H~GenHS
zNe!EP6~(IY*`{xeA7^9(5f`~-9}#v7P2i+B)Q?^tKwYfX`pW>e?FD<hf-5($HgpbF
z?NN+~n=k4^j3|#$eQ#CmKa@W|`COGwY#+OdMEte5Nv_hVB+wCsmgvKzhcI$}(OkQQ
zEt_ThfN*;fa=XZ|ebaNh_f7#xE(Q^#SK6b=t<6}<J)NJS*({U60@!;?P86Vbp2)6x
z&~$uP(dvUf(p&t}i}iCzY#Y@9Rx7!}?0L~x`_FadE=JElN&&tiu^I7vPKgE#tq&m6
zju&sSxhcBdl0C^6Ce1f&t|kKJ6XFPZ_BT#k!~_^OG9-4QFzVQ;(!D=RC^`pw4#7UA
zb(*DV2XWy8s&_c~j<+!^oh&(ysaUKUw?My_Xd2y*()T)TuP_=FRj#}2ri2*Tb{rYW
z<VpV$i<G2}a4S;6OH;eIY?!yJ5l1$&!US#NTl*tlFc{z9otu8T)P77jpKpWeNu@et
zR9L4Q(ri4u0tz0E`v{oQ#Z7HDty1kABs5yrBw`p%%>jga*y{5Y(K*rG(O_}0cdvw}
z$K2wQP26%<Z211lNs%Sb#@g`}Z5;xt(c>h^%&Bg^*w=`{S91*O6crR%5R=c&<4UTy
zD&Iu{6=G95!W3Sah9J;VgD%24Or$$f@Jht}Sp*~f<6CD^wt5M67Lp+H)13nWoXRD5
zOg~=85_Y6`nbX<*2?9dG%B3SQyd@b-kpZFv#U;U<-`0!9Id9JjgzpIm60T($vr7bN
zzWy{76f({$Xq4me%fk~5G)|VC$xD%fx%N#JBFQvQm=6t2kj`5(`o}j`^+3e&zoPua
z#OvPy?0=S@gnIyKO``*Lb4XnXhyi?gX|*odPzPlCbK9e`pI=kg2AnHD81oXncA;;I
zQMfL{R0YGDsTKsDZf^xv=iPfhUw$L@(>K%6^t&s2-t-S34gDg`fBGMkpD_JL`3W*E
zXXL+?pZtj|lyp-%cH#A!u2;MWo|MbJNBAR*pM7t%4J4@B5W+2N`<3m^8wxH3z7MP5
z#f820e@CmVEcYiuYEN2!&tcdQrb6fvu%F>4auq@`>>tw|GbEuNyl*MGkMDMyRuW)6
z`p@!{gnuYMX=(pk`N`$~#qyJudlvv|K3i%dvgk#O$@5`_fL*k;o$eL;v-a9j#c)fo
zlMGEUhl{1ER(@w@MKq!vTX*jZwDH=j(x1${y_<ckxV|qxlV1bQl>wAK1-DX`?A=C8
z+k<h&UTylV?+UuVOJcZC+^gpT$$W8ofK8-rR=<Ay5<Z>4uAk-59?*!cVsDJM#i#;)
zh5G=?1eD9N(teAqdVa$`Yg@(%`x$MkJhxviTH>o5%@Ap~xHeU_vOak=njuxUXuG1e
z*emrvK28V4$E*K}<?(N7-Txbx$2&@>G>b@HHAPx4@$9Z<+%|ps_GzGE_lw4UAsZWk
z5qAoc-JF{GJ)7TBt%UL5s6$}PU+(H$gpx)#2B9Ww#>>H^a`2K3nLl5o@2qn$zxj2)
zs36vE^d6py`PNS*LqhP{o3hDDBP{cItG~^~!aHw=eFV)@ww1-vATuMcvL<S)?VF(F
z-~vae4dmc4C=agmhs400{Ocaj8d9>;fU!Kq5BN!V$i^`X#UCg%CHMGMPi6|}4Cxy9
z)4$It7Rc5Dj?ofEx)0MQoT?o<Sn$KgT!^`hG0O3?>1+?uaF6$?KtU><APXqfg=i95
zIxCKN69T2*LQ6e=Sls3LxKO%^YTM5SbT^uXGHcg1W5Zd-Nd7P8C&XaLc7xG=aUCKs
z*y74P{%-8(_{|K+(HI_wrEwIC)~~uUWk-aB+y0Oz!SDVj0c7IHxwL=ONWH5$q-Yh7
z6-<G_94iZsu~>H&Q$(Q1z0|`2)an|*Ad%<8C@+e04do=Ek>;k&X3q-bQW<wf;@0Q}
z0gZ6x^k=MOUZOL34x%IYG-p7SJN8bL0~zr{c=83|2z(=M;EOl~xBRYoGcE@5)Ec%p
zZFRMBKzrq~1SZ(*#1ni`AH_cX*FHcnm&SHHURN!>R)kYVYQmhZNCltBd9ho_7iM;h
z6hS^)LG{+yVXg|#l6T%LK~A&c<>t0*V1>h9&Jbn(nthy}6Z<6|-Ag2Yx|o<64xrHO
z(>7t>yvM2QIW^a&mvG&7k0TnmJO%Q!Sgle7P)q%9l<tR2h}eHc_sQRY*MGnJ<UhP2
zz~UeeS}sV)y2!bI_l8KLBNo6$|3`1oRN6_*&Jgq$74)UV3YB5${bJ+*nuE-L=sx+d
zoSA+mHtGe#8EB73fO0z-FSQ<@8dxecQ658yx5i0>mxCf$by6LQ1gIfYa$UY4RQD&|
zQT50^Bw~45v0l9>x+RHB939ixk!1icj#@mkMWvxi7?;;PrP7<av8!i%M7;hguxiZ%
z3wHy?zzLj4fyudTvY5n-rS_#4hFZV3orcEDf(}LwtIny5p_(lpIV4)cBQ4AvIsk)L
zqKbF+T6Z7xnXb0p!LV07qxuKz_A_)q>|auM7J5{oREassb2^5i@d%$|xFQ8*!qIm#
zES3j*>Wo5f&i$tzYs2r@Te9o?W94D1-Z(s4<|ng9Z9_%9EEEE$X)q}c#yAh_1$Y}p
zyV3z;b>EPgZzvrK5HF!RkLF^lgRz?NxZ~LIX7IQp?h$(Q;1g3-zy;RIgW=xlE(=_f
zL>bfHD9?4BMb?%w3j@vCuhF#DzMMdb&_>bphPjC};Wt|oeVe$P=*phPgu83Yq93o~
z?Yu{G!nwnoXOsvL<kj%eh4rpCQ#&k2FK-CdGzAv8HFrQ+1c>hY6LiLmGGO543y-LA
z?BRMWPF6q`qx(*<t+()=fL$hkvRPPQMt`pr{X_UjAk$4^&dgQFhKSJuL|x+Zu)r$)
zokT7nQU6#Z*BkU?v}zPD+)Jug%+t46jE-f@XsvzyZFSOm`9>quKVMLHP<$b&evW$c
z*<Sb<@@yu6r@oT>(4g>X<PZ}7%OE1A{=KQHOw4p@yjHA>aj~xO)PUTc4(YA_dvmQ1
z9PCQsxc>5-oKQT+89`i^n4Y=wDX?T|S=lywsSM9X&AsJe*t62vpFR$jAg<-X6j>)1
z%sr=I?%|)hPrjMC6)OG<jHSS$i@zC5fq(oHzN!HlO9zE91QxOABFQ4_N=I?NjOzTI
zu@rwJDV~<S<nN88iwlqglr#HV#~rS}EP4Y4`#|*f3SiJN@i#d=VD45$qfY?+`~vtW
z+6qT5AufN}O2@>A_!|oA*q7s7GK7qSrS}=ECGuck>Hc&{c1>CItfaoO#;6M`ccv#p
zE98fht}-GMV#We(rDF$6rnNv@X%uaFe6k7&fWu=Q{x4hUOAgh&uB#BP@4IDyff38T
z3b|XmTR1`KT|R7d0v!^;PpqiUjfIRUElzVv?eX<B@{^A$GW<6Pdj3XwvpQnfPcM^*
zaLB&i5Z#c?<?uZezz6pPt8g--&GdPsja>7O+`ypja&tXXK?Ncc1yk#vK_O2C-|^R`
z$FXtOS$5JgsNv<wP4Q^T+r!=Mt2Wp<u-uo$5xX?^u>WyouwwF_uLkl>vVUdT{@0k<
z-rUW^71&Q{&TQrAXzR+XW@7JTXRe_tFAe<ppF8MZ6;A?U@SAB8_0pK{!Ue*#gD>G|
z(ULV^B1;vlNN=O29m+4$JlfVBqTcB_Dx#2kvw%Sf&onrgk;IpJ&#UPzp64xJrKjUM
z#~o1qK^F*<EkyFLC>AQ#QhENcQy2>td{NP%96$-c8xSzoLYKyPrst4LnpG1eFmOIt
zdlPc3>p?j(*JiVx_MAhSn6YUAQ<j@-hBK|(I^BW~aElqNkb7KXmGIYVPuuWB3N>7|
zVdbCYbqJ3sW2s8kl9+JL>bAL*ap2`VV)YX9DLUd|+UA{ch`|;w)=AgP1{Ut1MeRjD
ztjH;DG}v4Fq?x|?X`vZ7!8T6Q*4t@p)G9Z8?{IL3)F2s}J|e+^g3Dv^<PvXE)1|l{
zNL6rOUvNqwwcyLs_!&qkYpESiF;lLUi_pAmU$Wb5pw`>)PV}MiT;x+`3ZmuqAhudx
zaR5qm#_=^56p!d!dk`JaqC3H&CA;wDNlTS-4_2(%Op)V|U8#ME=juyV_evS-6#<t|
zx!DDdxHXq~r>YmmS!>k5*(XA@$w|pVw@NV;O9_)Oe@ycSZ(yZ!AV`D~NWxpq9_`TJ
zi1?IW9RHWTb~Xx`WaE?J<sDT#E(7eEv=Vzeay364nqOS(r+!T7GT5}QUDLi$AIQrd
z6C!x?PhqxdJGlGlDwx(mWkdB_5~KzkiwYwKcRA&eu)ndxM7|c+Nm)mIpo8AY=$b)W
zk6BYH6mq>{UXb$$;E-(|6aO65X#A}LQiN0XZ4Xb9MpmGfOqz%#k6(Nic{uq#AB9_-
zW#bLfKYt#58ENT9aK6TX<S8hN1X}z7Zy75<M@ZrVrYuF&o3+!j-Fes(q4*Hu)Ht>`
zyM39o@9=)ad;cASj!)8DJ@LwOKb}}9{;j$4r~FsY_mPvoYjkaMGg4@7-UrwtgMb(^
zfPkO??}CDax(K6;q9n7tjF^O?n#8uQp6li)X25gZnoF6QHB?zHhc*6jRKp>lM$B5g
z!+|O#g0h~Z4XS>ei6r{Zqd<C3aEY1cZ%u`=FESVFO~-e+*Eu8bw;g>F%u~5&*Y4kW
z%MJPO4~0_QH{ABmU(ZF~6+DWYp>H40S9kb3thf;VWNXzuz5UT&;B+-@tHb*_+2a!o
ze_eMSpc^xCCfnVkW59Tw&Q;ao<>^W-SX~G2?CJ08>H4_G7+^4`rZx5WXZtP!`2`X7
z#fGU?y@pR9$i?Af(3bt4It#WfMS)zGl8?1t2HISY+Dt2I!40zxt)=0sTP2F-<BGaz
zlo7TehM=A`mv#^CtuJYAg_E2{)DMwrjS10Su-BE?&D<Bo3_L+AubEWSs?I%aW>X~$
zdWFDxpDz5cb<asSWmv4UJ(5H2i`eDQvOnE9uJnLh0tma#7BnmmJ)iMSQZ-?FbMji5
zhbT1D2xyASZ;6yLu)0CGyE@FH{osZoH3W``P=wXiaHKVG#CWQm{foP8rdk7y3(YmB
z-4ve%Yv(3oE?z?&UX~1}^9ik7D6A8e^LqnQ?9AqpQqKFLMt7=BTCE_@IyY!^g?t~L
zq)_Ctu!>{H-54KH48{`ld>tAm{9CJWM>F)Ak$)8DZwKC*>nPSc%%AXzVN3L_=EAlP
zJUXRhT51QfdKxLCdtaU&ubHj+z<-$xL5m#hLe=yB2H7(E29G9kDCuq-rW#65y}&Mz
zG8M>qAg|xA`<|X_@XIjbbB+qR6Hg@#)7#Gr^bECm83_}#NAHpVl79r38nokxVrivb
z50)0@beTv>*HNRUI{fItRsriXOs~0Y!HGjg2a}+>s}XksBQdOqJ=Dsr%GvX#nEkPS
z?8c>;l)Dt{JYtJn#1KJ9Iqbde(WCi+uf0?S*Kf=21G3PmvhxDzov_{1jz-AFR`;c0
zYXYem2^LIcp2trW5LD!d_t}7@TJJ5b&ni%m?(8Nnoo#3;imN(9><XU*Vi{CN4Lvgt
zulT$F<Wd>w*rAG!am4B5-ZYToX%Uv@Voz=%!wH6*u{7vvko7BA{povyt`2A-zx4LF
z0{WfMP$}3v26%m{V5-@0c#wlJlo$6Pz8)OWikT)4g0|kcsE8i(J{VE~k@@D(Og%~g
z;b0E-eo9QQR{cFWg_`M8`UITT<Qa7Rekht13Kw5I$kstO;`?b2MXQLiA2}g!U9oz#
z6LM@oB+)3{R`gbQNBz#JOL{OUw`~v}NT&62D3#V%j8JR8JQzqUPEJ2C)So}RIlWD^
zk5WUWNzdihra4kE>g65Xp}TT^zhfkTD=w!zFb_Apc0aOY^;}DL*WqclsE)m>_cUYB
zT4D1_R#n>|B|>jgvu!~YK^~N()N~>!u7ZfSrcq)>kyRzlS5V6qx~G0mS=}K^eZvb}
zIT+(XQRCcX#+#$ouP9YP#^Ah#9dSC!luk5T<7|ct2$X##lZysb+38<V2(ssL0O8~J
zg7_&iFOikvBZz55K6s0+i`p%JbhU1igpMh0zCm>0U)-PZC1c<E?#_JKaC$$}!Vqiq
z4Q8rd`b%Qa3J4DNNqC$cP5cwqs=K)0=!~9`G1j4^)`YTE`9P@|>08(Sx++Ek{SNqt
z94{!@85A~`Fs+ELW+NG<3p7{7$`K0E7f*F<QQ69*4_FT&O+gF#B-<lNW;UEmaS;pq
zW_+hD7dQ1I>#?+(g~joNFt6_%-^Yx@PIbii2UYazrsiXjlH0yLp^N4#T+k>p9X+&?
zUqmR26DOI>N|J>v+d)31k<Bi@UT}mbytwmwjQ7u~=4}N<CS1;!eTAO1!x%<amXwf2
zl0AMap>R8EL)|YQu$KdW`sETA#w8~NRaAGpUngDVwApt1608&TI6ksGcD8OoCD%mo
z4O3kv30ImVENEtUKN1^+Crk}&=zJL@4R#NDZdKhkPQRFaTkftq6#ZZ<z2D~o3!mCG
z@rRR_jviz_2TB*T8d+k#y6ev89-weD1T)c8VDuJtfs*@r46z!LUbNsTE=)LKCum0B
zhS2{YO5$hR5r@!M)}*5p!r4^UK`f6sVs_|lS%~dV@N4~kwq@iD<2e_EV6xzlc>E%{
z`FkDqYfuAd6Ta7i$Ro(`iUdygIDX{lARCZ?*FaE}Si&_{RA_#cr|LvwsvHD`enGgd
z$zI3$Z4N^2-0Il1BMo}|?*l*I?L4^a$zUgH@SU4NFWeP%71nk1g!AX-X$WnM@?Nm7
z&^C-0X^l`pKZ26PHI=5~vBSWfBhITBD`qJiD|2?fCe*^TI54Gs`7QJ$ij)Xv%FAm$
zr>806K=~n0a*Y2Rqh3$e);ijeZpOkdX)SrE^p4BVEJ6P4umn!jqN2wHX53KVw+|sz
z@j}qAqF_9d>)4W#!^U%r5|8;lkERC6DnC+|ghQidCFx%lCP{vj!R#HZ;q2azA#ok9
zoy3IF3fGG;fL#N>P_I;t1J|1w!}krCooRNbM}}RGSAlU-hIU{dD7Mrl>VU8&x8npG
zS0NxR4*uk7AXD13VA@k6(Mi%JWs`=h(WHHHvBx`!YVU=<@IgPj2=6{jiAS;4Tf$yG
z2ms%z+>3<K<#~+TCjH1IP*?TmzM|+YXTy8iGV{ILrBboJD6S7YE{q>R33(Fn#G&32
z=(O_<YsXojLDzd`#1(7EIh`?YZPEaz&_?xgT^UsK1i>RDX4>R-$N($qN03`?xS2N~
zVbFy0R!d5Pg_|cW%@)E>AelLFMfm=bo`GL%nJ6-;0f1pt$K|KWN@1Tt`tEcqqbT>d
zWVK8gqWRG0_&&GXwEW}G-Q^=f^0hOGuj$6CK%$G~+u`~*k^W41o(6_jdUQ-(C0M~!
z%*PDclO>%dG8&njdyB996{u`CA}25->1$umU4IT*Udj$YvEfk?wyM3R{$!R}hZ~0B
z^haMx6&W)-H!GA;@33@#5rn>%Nj&lvAQXl*pw1ac?ohzIQSWUS`Tos(*JnL->KV$i
zTAkP&4i)AEWL<}3PLugIG2$c}>DYCSg}gx-Eo<(L(9<N+sN7=aUEwfT@09y6l!)w0
z-MDhu*~=E$Ft5DDeS_V2<zS%Rh38%5bP2?APk1i{I$UcnxYVI#Q2^W+zD8-MjUBjs
zuDCo1*41YSUjl0!0s#QW7|=8!zDReHnE}0a%l)y&qO`4-^F`&1F0yr@J`c8kewW@A
z67KeRTw^pTM{<SNq{9-&5AQO!gY7Hpx<S(SSu3@Y=I`Hi`1$9f4;MQ!WGRr_X3D%}
zf>SmtWiaEb@hvCBOIhQoOf%QM6S#w+NBGJ8i0^~lHT=>b%!N(NTTv9BQ$}P`tZMou
zht>q2%)P@+&@^tB@hz?)kHCPkEP;;**(Kx5{_gZXa52j}OTxwibcigYQ*-YrX|--T
zC}#f>aXCG1uMA<)L++9=4OR6=oH9~QC591%37#3#Ek0}4E-0-wYpb~4$tleZvY=Q6
zxmrTyG)Z0%7v6AXO|xH}+m`Q3SC9}W1{Byn3Klz&0WcUGOzY`E9tL>Ewvjg)`~t9Z
zcEG|#RP0Ih@6YW0gyDU~`4u5H2Nfq)Qo^}hxg&Y$m{4E9cLST|=0b}Ru(<!oVFRuS
z^;2aKw1@udVsOev%B$UY*)Eq6;!G+?>r(#lzAT?j?#)6g7IAQ3hyfBSY*A%4I9L6W
zw`t5b<DhQoA97hCm=h*~&jzZGVD2M@1I{s7LU9LEQLe!331p&>UiFBJ0J;#v%9K0H
z)!5OW1TNyMk+G5O?XT{A-G(vQUGKUdzjC#!1F!;(YHNe8ta?EMhEFTxsOQ7zH8`mh
zaK&9!ah=XU6qquj@yLAP8jPpkj{dxzgH`wXbm;eV>D7Mt{8@ZF>qQYV*y;I$+N}Rx
z@7LG;nJId*eHY-~^J^6v%W(8^ABo^;@6W#CRzQ1KkB*UR%hBc_lp~h#ClTke+qp0o
z`xd6+5G22)g5#G3S6(N?`S1kgcgo=(ze@S--H^CZiMaY(Ji;z!Qh8e^5Y9gGzLhn+
zN}OA>Pc=NJ^Y|fAjYwR#Y@Gm@X4AF3A?NZPNCdQ@{FdkH(cQun`S4r9vKe3sNQrUX
zY&NZojY>Ni_U%1cagRnPW-Sl?ghYYOJmB|`ib_q|I}@Q^GY+PK_Xh8tc)a@su=prW
zLL-@nOyMcuzeRy`XtuiW<?bDv-N*|2@bo{zU9U84T1wN9wEBGuf&f@{dXXN7LxqiI
z;-ADF*i3o;(jGCb8Pz*DkW8=Uk&MNC#ozX}v}n&#LSV}4K}XizYrr1@?fO!-aZZ&_
z7!Xoq_uFtk6@_OYGI~{&tN6%<BHQNzt)r04>z4vbH)Q|rkIMmkdg$O#=SYY@UMR=r
z!zP{8L>vamD<6-D;~<V<H!h*rv$!|g`V6wwuvXzg-R<{em}d;Mf>(*1N{f##jW_G(
zt^J}?xvcz<r838djky-J{hKxtUSP^nc$2m2evx@&1w?<4Fgz<SZ<doeP)wURMLbP7
z<MKsO9lgWrL#Na0ba(kPn0xycKt)VLZ;RJWsfTOKIB?e4(Q{^`9Gl$~Jn+$Y+WN2x
zNozmxf;k3gPUYmOw~Hvml~Rfr5Z>i+e5nlB1x*KCdsFPstg>a7=W1`@j7OI<=R2Ai
zNmWQz=c8$JrEC}NGxG_+sl>G6f6yVT#4L8){@R_8<edX$OtizBco&QrjpdXy;0~(O
z($dzN?SYc&V|H-dOF9zTDS)@u1&Q>zsbfB@yV`RsnH2m<Ik58wn~CP#7Gu#`v|t&k
zaa0svFzmZz67Q>SRhbJI90Ubl=m(S|EvERdi*zGP)E6tZy{c_e2=v0r>)(xvK1@ut
z^3M@3y~1h5A}`lWknZP8WxNI0D6qn}#YDY)<C7j9TrnN{NI}O5w`8eVKAIp-U|5_;
z%oQ)zLez*finA_T!l|UDN3Ahd7R1rc?Yi3)50h})iBJjzOgX_W9PN?_c_uX(KmC5E
z8!0Qm@DpAi^c)TL*b>Z^kVZ5gAvsp_;4GN$Gef_;X}pX2gyu=LRrf6%2ufF0TV?^2
zlKN2Sm~Mh!W{!@YX~-VPVIcH--(UFWytZFCQwvxn)l0a^-2YgJEwb_8u7?Y!iNQS2
z_PG2eYJaow|AVGpMrkxS2dq*3$OTgOalkL$IStB%z6Y1%ynIUcf!|U!)qKmUKncmQ
zLL5dQsM(B1aT10tU~n6Ae0sH(*+0K}(7(t2{JWNqzo2<g?Ss)78VJaO8wd#I-$Qe8
z31taIaS25+840x`10X(cPY#%^GhTLBj<t;_q1=>#a%{}yews{2_jb8U3z3#C5=|m7
zG^5vh+iAncMkArsjYoI3@URg@6$ZbkbGGHct3TI$iuv?C>Rj=cEoK-G{nF%@RzGf;
zgk3)!ZK>G{!$vQ2KzZ*XJ+0FTNA^pviNjEYZ0Lf%(<14cuz1dlHHKg4!>J^s713}~
zsGCsGx)yQbL=*SqqVeyu<*Mg?sD^;TBn@$+;uli3-rrO7Q;jwAm$?!{ECoqp)kfIf
zc_^!;&PDq<3{0vCKS-!u$<m0mb;;&;(m5!>k5Ob>s-1TmXiSGY>B~%Zb&hYNp^`@W
zUV?u+=V-4Yv_CW!J|<B*!xc5{;;(q)Q6u*opV5O2s6*-h2=!BuCuBD4nBvx}XEFVL
zo{oC~$-HJb5rWK6;AU3F20qUwJ?SIK_l>|=I%1|DH>*Gr_7O`yJg`Zyfdf2PSL}F>
z&iv)(W>;D9B<NyG8O8GI<?dqp$6+qwu;NDsu~TL@GTCQyhUF8RTmq8Pq^JdoQ22`v
zV>_BY!K@e*+HGdBbQDd<eE`Hxg~qr53)5@<aMOEmnGnXfgve^DHg*oHRdn}*wXE6&
ztM-6aq9N7~TxOB#t>IkL06ujc@imYs(?~DHm~muWTmo?T(4KvLvh0K^CtZj_WP4A(
zBD~V-4oZUtJB^apQQA9_FkSWuvqioS9z-SM_V9t}&aADU%^nR~%tbVjzZ?+E=9mto
zooe@CB_|X=J?a|!ifMa5UrUSpzq6r5#<OHSM?f;N8EWD(t{V-H_$3JqpRP{RE=oTF
zA^|f=Q73LLNYk<obczN28U*G~sq>Lq#mli|KX6E!x#qrj({e}$Q_1M0jcCX%RD!__
z8mDzZ?FC~F$IFvSM%(d88Q34c?i(FeDHmvhH<9}DcokiohD@&1L1`n?7|b|*`OV9g
zbRu3UMa&bIc{p|(DA7|l=TS37V|kxC8}4VtMwL@eVsajchIUQn8~kwkJKSlds(ij$
z%eONBi$M5HvJN@&5>xPrKU@#GZ_VTR^2r|&9gg^HMNRRq%?hS{H+QjPUjv8QGU3_D
zecjS@T>8f@r6SUf&@MIHrZ)IpMt$qtXI2<uQ!ry(VkCRriOed@Fmt%VUss#%?{9-!
zh*H;HGw>D|vLV+8Z`U{W-uDsmoPLatFl2}fF?%79)oM88YX?~??i1ngNHc<DJn{0H
zUK%WtG)sD>E5U&p;qW<&@FB<%;E=qN+8n;rDO2lwU8Ja|h$)b+6@%KxoOfABJm`?O
zD_%`D%S+KBJTlF$@i!$oq4u7nVc`6wKfSgsWR#%cpiG_<i=N0(o?z|}WVWil9+NFp
z4Ug533Xs*p{=G3;{o$>i;Vz^kYItlhvSnx#%Io}bwb*rgx&7Fc?Q84i>X2)scqL?*
z_u(7}8`TK3ny4T;C_6e;D*5&j-$e+D#+qbms7hTl2qW?oy;7knaLlmPjtq4&DsnxM
z%@j0Ed&6F_g-y?`+(e9*qg>Xnt2pgzDNGn6w~!U4X1ios^fH1gHcCVEk!yPwHd3;|
z5OOCR+}I}GcjDKaa3u~Lj=u~iAFZpzj}U94azfdab@$vs6a~E4efmCoy{j6au0J=r
zXQ}{l?WnNA$BDbAtcn8kzR^ys-|#_a%>VfFQv9Vus8#t=h!Y<X0c4)vvWlXF2fZjX
zLadly9`<mL;MPw8G9hqf@htOrfoCDB0K&OXTI7??^^m{9Bq<-;BuUoTojsd~kSGZv
z?O5AY%9pUfXtcxcJmS&O`Tbi11V<on+fGw=9N$5mCa5#&VJOb9QC>Bm6scf*c-O8_
zT^1ceX0vj~f3|UR^>#fEh}4?a=<K6zf-g*4Ps7kuW&N%;;cyR?!BXNoM6>}-@#5yd
zMlK5|yxoJ3l{Qj;Teh&452V6VvEl>V9hbY#-0=%P_KX5zsg7dUkSb{JbACOP-H2)~
zr7^8R_)Eu%lO691=`#YD6(H;w)0Sut*T9JVnpO=#P3;x28x)RR^UVhcEL^hz_uS!_
zhq93}$%%@pGoP?GfnS-i@$+FDMyzAahao;?1fa3rRpD{{YajS`@s|;sc##=mZBIyj
zIah-uA;f&YZ&a{}yDbjW?nk|&qF+oX%_F78r779;W$Z3p{2#5I2RzkZ6u@5+LJ=aA
zT_P*Hvf|l$?|69G9?xFMD5ErtN>)ZxR#_=ZDI+7Il2MVB3YE;r|Gv=kUeAdC@Bi_C
z)yF;GbI!f@-21!l-h0y2edS+Qv(yIf?2x|I;4x#H8Dw&Gv-+oudg}(?y#+-PK8!N@
zl)`<reiy60`B;Whz04&JuJL-@QXF-4k8wJMBJ_!da7E8F)i28UCfAd^gZfwdb$AU<
z&OrvPVJEX<w;LUzDN*Qlv4b7#rG)NOG)j%B^lZ9LB%wXwyB%2|l~z^#ZN|%x;`BGa
zbDv6nxwAPFJ|}Pb?#YtW$0X4?apQ1Aswhn{p-Fs9WbAbps%(n~*SKKE^fKNQ)=zCg
zo_MSy6UTX`Yf3?10v`Ql)Y;oz60a$E;N-D>suG4T=kywzC{+v#J1^Yg6cZ?f-Z=v8
z-KVLmXn6C>%#WcIx-{8`A3u;s{j^XuI$kWmemZCNPG<4AO%#9m*DKsrO1XLNx7(D;
zJ#$3_BSUm;4x~k-GrlX!yk(sBDO5%<J>^_H+l5fijxE`uAKZes2DZK;@Th%%;MUux
z2=a+qL~V##S?Z3&*4H08X!H6$?R>mBzFm<{(#fOuLHhfjC%FW314nCAX_}CkM|bto
zTNV}TlF=VN|Dl1Pk|dO-^-8m@gsP@?h!TwcaNcK)34u70&PM1x$_l%fp`BM@_up)a
z@AUP@OAsyxhvfKA1o*)d2`--ZrEfeLcS^vEAdAh9&O!F4{>d4Hu>W?AU0O+UL{M=l
z?K71Vf^~+|Q$8j++pNWiMKa78`0sXSRq~P1_otoCxfo48RcD-|Do~_3St{@*Pkh?$
zN82yaqhycVyOha<hc$XCHP4awcV6(>`e?7eN&!QY&^S>wS1?D;J>C5xhSfBlJ)39a
zH;={(bkcK5h8>gb%akdKHlVZ3vDdg8BWU(cL9eV(BL3Ncu0i~%Hp_F!N7wtwq^Ca4
z2H5T^J;$GWu>K>-gfL@PzTjTJZXUf@gI8OVIp|M)y4rZcpC_;I#qC$Id4Z}<HeVnb
zp0(0e?7rjqR3=?&byRLajQTFOo}RcJt6Ea?CWP1Ngv<jTm%<EEMI>R0T^+Me*<=fx
z>Kb!dGhMBXd|j0xAAcA#l=Ws*xT0a3sj<lsm&UFd2@=?@b_>0C#j~3ZImQ-51vyg-
z57k`FjCPw==@A?M&~%vlxWK;IFn6KyL8e&qOST6EIzx5YG<nChM%fq+vyVN`D&~2@
znBT~H``2+(xuk+^cYdV5W)Gp3w`I6JP_gaKFzLA$R!Om+XS*3hlto7?=6<G1QJ%^9
z8Hn(B6mrkxo8UFYWZ|S#k0a9|W4Tu=9qn06iak#0+d@3>OyurC9F!aLguB#YLw7y(
zy?n~9L7De@L8n7Ci&{c+P1&i0)P3(#&nS6Ujv53V>}xyIb7@cA^Hfq^$I3z7FWUIJ
zJ-Ss&`<SiXw@&Eoj6B0DJbXPhE!Bs;oqog?ao3^T|NLF;XMTb54%uY%<-$rl!V(%M
ze|3NJw?7xO#g$sswyNnXegxtiJFiiH!(LvhiK(~6Tp+{cG{ly**^lc}!N7ST2M$G(
zYFnqo=4}kf+6eN*X_fHoljcey?VQnl*ASHEHWKP>3H@gY&BuE`MJc>iHb1tDEzX?C
zUs3o&I1eRDIX9W#O)sImTW7`EIi4vhRvFjO>yRameRV!LS8lu0=s_vo*^%*g0<{6`
z2HBtcbiZxmRWYUO<la9{>{ilXaw%lo$JyY}A!5yU87)lmJF>oMB&U^vo+a1n9N-vz
zfSol%KPRjwG_k~raDT_+?yfOOwfmG#dhZmIkb0Vt?mK_V5_OJ3nGIg)dr@6fw@J<W
z_OwG%-6Np4Wc;=K6YDT_=!b8KIr%*|=fpPMN}2G82$~d$zq;9}K8^8>&Zrx$YObn)
zP=lV3bXcG*RTBQ()>+x<!SK$Cq5J}f@l<WhmYY|O2TpMK2JJaQNRhLjRIe{aOW`K_
zcHw<4?H}eSM%^6W=Hq`Wi0qWh)f-?jIzp5Zt>{Q3-X$w`Fe=M%Vxs+C*y!lNiH;ln
zKffo)bkK&*-7ldJp!K6An&4@<;wr*mM3(FBF){p8N#GIv>2SxUN{LG9E<0bmrag1w
zQoMCz#)Kc?MSk`{ueTfgGL^D!pLw~-_=4jWo~XIwO;ZsLp?c5B2CQ?FvY3hFFYZWG
zq4k>k@V$i2U!q7tPw9P>gj!bFPMyLA7s}lfA4^?i25NGXC;5*am!$4<J(Fdgiaa1^
z=~P|*Ncx=&^Xp939FA*{v#!Sp+mGfb;lCmnz5UR(H9VkWM=?!}`CXPe!*6DHDXWdI
z2y`dA^xx<!fAdNfX6h>RlBu`!&a3T&WE6K~jSrnIcOeiOts3i_9X)$KiwTibPmvIb
z;60c3<?-!1-)8Uw{0iLpV#FTi^3Iw)Po;2<=gw@{qW?)!b)e_dZe>A|{!?Q6Ixh}M
z&anrcP=dutrX4#mD04Y6qD8m-q4=a(1AAMGGTYD{Es6Jz$#!=Kzi`H6TrTRgC+#N{
z+3Mj^J-#=Gz(n?<+*4&<S-x(TtdXixzGu>r;?xvFMPtZI4_-;dTbJ;4(M;h98Vwp*
ztM(e|l~tT3*x}qX7*wD4+E$dMg==eBb8!2fE+t!<04o8r>sBN0v#0$nV#O2f21xAi
zV|cl{!mVX$kJ^_T``9s6Z2lGS^?<<HSc^IZop<pmLc`}egnkrNbcm0T#iV_i%qQ8w
z)$?PMAZgey9)tIJGFn37ll7mi_G~@-g0bQ$=`-!0{$4t|q7~$qCaR7H9Jm@)|464Q
zs#p|8W%JQS@?ETU${|Vd($7KN>JOmFSC|yeN-+whUMVjdvJejwstW$J-{IPGy74DJ
zuGlG=lXQ6Lsaoz1eoCY;P<rAD&GtaknKRr8-@WqPm|yM)-=#&C6@2)o`!%f^r?a!K
z2yP+$pBbJefAKz+JCCet<b&v!Y%+ybgKDOOc+$h{vuAIyYH+CdI>pdOgyx#XeX1Op
z-90FMuDUR+OWc2ZYm>KZMpBzu=BqY8;y}j<esj9+^IE3<JzpjgdTF;kV6qkxQiIaD
z=7cprZ?1#JNx9((Jr~U7o!J_l-8(17jTb-zk)ZHk>7o*s+C}&xx&2_rc2efdFZj#~
z<6+V_ohj;x5PjcmXy`Z<ULxps%V+EuD}}>ui-?XHw<VpvKD_tsmZa3Tk}5+<RAr~W
z($<FFFKOC+;g#C4#9MEPezj_m95H9A)Xck8Bh)}+x=ZiEyU0x(N~E(Y7p!dzU-H!b
zfO>TZazuv{wR47f&UVF>iR*{-KJcJ{QU*mtbv(=gXG?1%i+DBKg(B5^u5x@(v)QXV
z_fdkMS3!_4YP-WtrXlD)+vz8J&q$D+q!fEuf7b>izOgnhah&3Od*ynR)S+ix2O3h3
zvUZB<-@Hsko~TxE_i*U=m~;8*($h>;18GNif-_p1c$%e@d$nusl{T3{#-^U$-(41&
zR1unYqQ8Qrko3)_-i{!x;cohLU6}zJ!k<NAmu>3O@aGzuIh1?uw~f-`O>!!eUuxVV
z3p@TvfB$!0g0H^#I-5wsT9hB%{*ld3ZXx!9;+5lhUtuk0qB+i8<3>%n#D}flYP_+`
zxy8wJ<t6)B7lr8i7EKY%_!Ytrt_7uP{y1+FO0aKo$PP{?I}$eBc6U!#?-yelYK39S
znvm<iC{6duL~)aQ88yy~9x#!3mna^Nw{=Lc?e&OI#IYURLAvAk)NF01@mi*P=O~lE
zIV7kJkx&=ySI@cfnuT61bUeZF^Dm)WzdjE((J~Ba7z#HF@f}Ldx3iiJZinBXo^hhh
zihnfCn7^fP>n*cWC&Ceg_f>X`KyIfGZS7%EzgaCPV!xj$tjxm5ETOiA)T@-O|H8Sz
z!x9YYg?P$G8dJE>kfwSw+82;{_e|HHvesdMrpEG}`${g8g|yNjXUL{XuR4_~*<etc
z@-g<+HhkVgaV_i@bwtQVNUE=Ugxfp1avLD($9SRSGnqVyA-I2le2XN>rK}EO^-}{*
zgL7BIdL8wkaVoue(*-c(YzEix0|RG?JAC7KLu@@pav%5Jv0;GLs~Xnqr@mq1VA4#d
zpi9hQ)}+%e+B4OH2&kcOaY^(Z^I^ynvnwp7>kHhgpum0Xn6p0~r6Z$Mb9+OBv5P{*
zVbWR70r51iOV$>}4MLjrm3-{o6mKU-`Gk-M?-EyJ?9kNqIuPZ!xvnMHtZt$-YjnIP
z?q0{9%&N(4kNbH<pcJu8)e&jKk}t+Aj*TgmcFQx3Je@J~z92WF!hIO#VzsT7T6%_6
zQw$20J<e$4C8mr@gxO5G9#1B2WL4fw$C5HgY+R`<8XdOpS;mw#VUhM%mh<s0N+))P
zIoRj(U6d+xduvr%O>fy%zBG?GYH{r4*Xd#}uqc)ee!%t>^u84}bseyC#lQXzaRndw
z$?n;t4|1jyLu^i3<P+mtntL6#EAg;hCU}2rhe_hEzMfFRk&rYt?y^Vwr*yp{Bc2TU
zJw8P7E`Kyx<@?KF;%-wTVl_jGN4xJ?@K9XU@9n)yeZyk*h13zpD2{8|lXDd^It}{v
zz9UusaToL3(>9$HS=?v_fuNKGdi{AYlyro7z@7eF)Asi-GlF7&cH+$cG6HGM19OI1
z+Q4~iY(3yB6b3taRx6B>z-@_e0;xVN{zxK%c)OUX_GEeQzYOjc7$Sy11pZNyGs2O_
z%h91hPZQB0Ob+%8m2CBW15rk{NRZx4tx(p|F|eb#*pho*(7iZLF<nmUQe?`Uzr3P{
z6}x*W^RE8OA!Q@O4@-Y4aQYN*APv%lrJs?E$nt%M?A4FEP)9Q2H2!+7(wCAfmC-K%
z;>;?l6ilnbFSE_#6dw<5req?Vh_OS%nfh!OJK@*E(G#Qg5w~@BQaO-l8Qvh(B7&JS
zBk?<(xwwh8n(T!0T<Ik$x}CL8m4@vad2%VaofB;eZMU)!_q3?Fhm^*Iv7vEND9p5V
zlYI$+67Q3;%DAul?v*8_w@Q3-XYk~gx0LEhj&0KU=tq*xCY;a^Zu#6!gYJzMYg;zb
z!JeB-$eu`!+o3^FD@J8kR%KZ}VNSbCTEtI?9d&e9q_%<<*@-=wiigv8B}cWD4!$Lo
zh(sW&qYaUX6+ftXGNZGsE~&5=`jGTIv!3LRGugK3re+L*$aRmI@N{R@r}ge!vSLiP
zHMp6zS!hTe-{iTM{e}W|Q-L_DHju)d`7Hf)W*=JFXR^#(1r5*BQiB3VqE(b~hA$d3
zX)3yQyFcK4LkBPa$(S+nME`Nv)7RU=Pej&DRfRo>r#GC<cM+wJJQh-3UarU?{n(E=
z(tY}4DZPsm-RH;;+v-LX$joFMR9-txZ{xXEM5CR5LxZ+Iv8tzv`V^t>cAY5V^hkQ-
z3$3hMD*0C`hWWFf(xo|D+rU(e@0fQs%h?Y*7KRO9sIq=s*QHC9%SaKWX{jcg_ib}Q
zmMBq$*3M1tg2ZH`=S)g0MZ6MT4JaKpWw6eSm}GQNydO3a=fUiuY#o#RXmZcJCw<Q!
zwAt}rU{O%gyEb^`zSvPk2fQr2oS41$H7C99_O*|C(9>R0XpWv?W_YX8^qzo|%-~o-
z4)ZfQ&x(&Rbl2kxG7_BzH9Q*_rM36pmwwFQ#i*sRxBmU0H=^CTy()41q3a`cL3)uI
z#>RUhgN$>Zz7xKEe!1sPXtiHb?6sqIt#k3+N>u%Av>v1Kv*-KO8}*KQvL>d~H;mzr
zvGyvC6TTT3;jfF~RG+vPILUn;al!G9nWdtJ?)~_<zB{jWMtYXd<lWl8>%n=$%E64Z
zYE2_f<pAaGM#GoE54q@m@<Kh#6k+Om?aXSIUYe`vSbdBpwv<0e+%cEDTLbZ+JY^uZ
z#*_DwW^BKgETdK@=Y#7{UFFQqO9yO??C%yg7KjMIw=*>z{!!?wJi=ivlf%ky!?2_E
zz1P=2v55%Mi!@WcSM{RE0rYQY@N;+_{oEOeaE2q@Y~ijupWPu2<|BtR%|86_=-y+t
z?JOlbBZ3QM9#5UkJg(SVMld~5)YGF&rpA?C)-_Xkk(m8PhH~SzkKFAUt`D<}{ocO+
zXxAYPJ7lT$K;WI=ZN*=^pEO&yk?x`L5ccIFWuwz4_IdUG?uBTjgl=#@>?S;_%^);^
z2Rp)oyPg&va>&q8)F1zZP;@aONHGY^2a_he8^JH2E>fXdD1`b*4`c`b|5Hr$sG6dj
zthNr1svKGZ<hT2l6oP=`w+jynydTH}!5V~Bw5lkFFa8ed6C~1Dcu*yj7D~*A_Z16*
zuY;Bpbj2)a1G7RPoL4MQvsfH#_8dBzhO_*_@Up!2sL^TuK?!-Yyb$yVv%E02MOnW!
z+6lI0qnx+kiUo^)YqTiL9i)F*(MDbjiTZ1l=;rMVx3chdgS)OMoCQO8`s%_Rkt@pM
z#E{p!CV6nL-#gI%K`s{<&{uKmaa#xYaRky4=C-2TgBbd{{gYe^m@8aZa7FQ$?s&4g
zcx$*59BFImZgqS`x%?PvlYkz(iaVCbU5mRhTe0!#^4+c6R@N9bi<^H?oh)&U64#<D
z>!RS_J#Jtvl2=ys0EWQ*;8evb&PCPMS~T5<A=qVg!LD$m6`X&?mcW4_vuMpST?JMY
zi`f8XRu}7T1zXvxm<>RJU=3ac64&BZOc#W$E*X6SW~D(5GwwG2i`e;tHE7}UQ<U5w
z%A(_7bU~#$QTbDCs8M=1;p*N%4HHX(x(+J7OQYhu7Tgknv{H0(c6Vc2xULABT%WC<
z@f}dJKs}-omMuQ%fnUHq8>_YMWy$(TTQ``61ALynaIq;i`=R%9C0k&1JK(D?g288c
zpc3i-z*cv6TfuPtx+iYo+d!JHy<i%I0cYC&zrp9vOkvv%ZT*zX3jD1_27xGJps99m
z06kyZ^H$>qvo$&vB!DOw1*Vnq7~pHn8vrj>Hg4!avC~Az9S{hMAa>hM0a5S&X<@j@
z6$D%IEkOIq2ki^fTz}CGkXLS3^tClHKY)LoX(13<3?D<m%g4ISU8!0}7@w8*f(b+%
zh!ux0knuo+bs%xO^Mo2@87fGj@>F1kir0$k!psM$#n!W8m^v?`HKGbmZ6)u=?lLz(
z4_b$bO9P}99A*Va;x?LetUO)~M59xCAP^{qZ;zR50Q}on+(a*HMLieb;xC}1Vn&DU
zwi_VMM+cO%mov4<a)-ScsNWm_m%}jiqx}ZJE7ogf=7ATsfENmk#f-vPjvHXk8#`aG
z%Z+?7H18go1ME{U2q<Bg8vs@cHrS+As9&=Hx-?WbY650snk?_WVeZPYX}3=(KPniW
z0)Yax;<@;w2k!D*6%AhR<mX>ORJGWa8PRuLd=&f^NWBc~Lr;=WBRaj$S^!#bD|gG~
z+9&tBEhm2h;WI$^(M8q4XL_Kl?^=YTwV!Vu8c2jC+|?DaOwYI2{nFqqa26ey-eKaB
z{nmn8oS*80DVPkx-N|j<xaFFos<?hjHLxrW1ac`1>pBAd1*d2QcXG3J^Hzm9gH@~L
zXyTKJp6>ze5}-+9&>%toLc=l+cSK95<TL67uyX(=fdLZ>`4<?bX}DQw{&}~}K&^3s
zTElGOZl~A6LN}6SCd_uR%!nLdL_7H5!7zp`d@X>L2J#C%i7yR-lW4)zTLwe+o`|)`
zMk}uj_nvR$iXhy>z>$93-jaT=QXv9j;M7*^nrV&vH{jBUi<_muG1rp<Sl>XrMX#-(
zy02Z-Ke2F&mpaT%U4-%(XlR)AVsv>e;^+GdZrR1A#-letz1#zF6N*tU##h!N8(lB+
zwXf}lgu@(HY6<rz#QpdHksMSE<{$x2Se3YBfoowQO8WPDln&g>Z5}H3d-)5uvDAZ~
znZAOz6EQIBF~=|#a_xUZv(0NX-)dHxl(-Gsr#1m?M8LZe=9{ro#)h={JwmVKhuDBU
zP;(GP2m#p=!w)yI|1a`_8&qta;H!6_uA4EpuYu6!3EZKGp-_0istQ@7R}U8lriE6i
z3za)+Wp$8mxuK$hiu&~d@Itd^7xp~qLh;71Q0bMrfZJB%GsB?y^MH3^%(pkz#~b7R
zHg}oEe`dfOwPvaWz%m#<_NZGG`g^7GFYoHe{T}_7nK40rWI$1CFJ2%j%3=UNG_D&c
z>*ef#0Lz28Z4{LbPWS=Z(Gh^f9FD)fS_2Zj@&;Ot?EC^f@F;Fpa1McCIbd}HpJMt`
zs%^b2BogMW?e2&hxJ~RXzXO1UfFTq!RGW9M69`5I8HAH1%ndiu|Lcwx$Yu!S3~;9$
zMh)_GuagMI2X{+14WzBpGR#c~S)D2{&~X6nQ4D5O?>d<)g~I2CQk`s|79Rtg2nOzM
z|C(^1rD!2Mwc#%Aa3@PRZa?xi*LX{Tr8ficrJEE6gl}+F$ii|sSi060P(fHj;ou)^
zVGg!F%e_oYC4X{F1|ED4%tC!By!fOCKA-qk+I)54w#wnl(<~9d&;o4|bD~{2wN{R*
zyMvo86z*W_XzQj8M?&Gth3dWoiRAQPl_Zi40uf%c2z;go8qTa0rUkc#d;O;1HYkLw
z_3{>K2&Ci)h#(k@8q!Uxy*4jY6MBVu1(4~nM9FFJm9;*YEQ(`*_OY%N^q1#w+tlMv
z5M&A3Ha-X%m?&S-wW6?WTCR#_zDW1I2JK-n(d(jN>4AsDRz>~(CS`Ge(S;se48N-L
zS9;=1`-;8zY#Sl{e(9yOXa&Gw_0{TcSp{sJaC<hy`%N|~!{AL29JLow;4?k2@#y+#
z3;71Yp02+xz{fVX!SSKb(u*G#bx02+mEEWUxUPZ3=7-)CZ_NR5h7B|?g+=&p2S63p
z&0lg^Ic~RasY{Bv0H~;BsF>q}KG=e>+S(TC`#RjMY#|dkwvk`H3fC^pySiv+dZ3)j
zMsRGPVgH?l6`Ot|dv~ThaPLLXdkq%Rzg_I1zHU0m7_;<xc<@)OYYVZp;N3TYhr6W<
zXs=s~g+vcvfLXnzpZTP!4oDC?Q0JJ-^UiwfR{-V(bWqa;U-Ypp$)JO6eES(zxdWh!
zOOs%?a$bY~gN?hDzu>YjLw#eqn7(we?$ZMwnXaEYf5j`VWTx0wAK{a^9*MJ;{WRYQ
zf0<r}o#Y*D?tsOHaPv2F{+;q2+g|J><!A+NSZzcBR4$V>ja}IJozV&mTW?f>)#D6y
zwp=uw!G8Vp<#Xp^yB|B_D4K8Nv=RQF4!+F$B6f;NG`HLNf8nkYS+VoLq4{O*>*oK-
z3Wx1w?95|mT8ZaIXumrdW*#zZG<K3IG@8g~-Dog`{;A+)mb9_65}~>0{npQ2Hb)V*
z+1QC)(D=H5b>o*zk(Q3*Oa6J4R)?Lu0j-sP$hx%xm4SOG!@k=eO{+h>5gKX+h2y4w
z>?**%9vzJ*ida8>`ODI=`PkP?qxn)`fZf1+b$)33`_gG_1+XtlMk_FPZdC=)o5tpM
zcj34;c_|OIn;ZKwS~QO&Y7=@}_QKV)*rl=W%R!gUNLf;P>Fqh#BCt;tp+!6?Tq0ur
zj1ju<-+R%)>iEJ#4w;*?DOyr^@$3(JOEmUIcCgg5q;Txtg+bM0odsH67}ugKu-kKP
zu6cD~SZ9M4CF4FFv?v^G$t(WLMr<`PBrlv5LOT)nu>>>#|Ng%N(1#Q>l(&FyEg%p#
P@ZTXm2&4u`gFyZRS@jkP

literal 0
HcmV?d00001

diff --git a/sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar b/sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d67afda89b087806367a9555a12aa811062a44da
GIT binary patch
literal 263795
zcmb?@1C%9AvtUo#Hm7adwr$(CJ#Aalwr$+DZQJgip61TC|I`2Wo$u}2v-i}wl_x7A
zGAgPfGa@pg6r@2wp@IJT&AaH!{_n=W{6YOW%ZjQ9(n`vS(JTB57$nf#A25o%P4109
zmm9wU0ipg!Fj+x4Nik7n6*^h5l=v+xU`E8y8xYTM&p^=X?y~j;u(==w1uyL|6E}7?
zGHEvJiw$@8>PY_YdT^7IoDVsg1+AP*l0HgRwztq>PJL@e+MMf&^6Bd(8XJca{hQa2
z$5>=8`2jQ{9cw&mU84y;M>fc=YZd#Rw|l3p<$D~vHtQN`gD$mwKesM9wN;C3?Vjf5
zvJla#q=O_wJ7_E8O9`Su8)rQhYIQ4f39sLW1}Grw<5%<I@v>;11V=T|p-?h%?+J_|
zxbGRPjAIe7qCMDmw!XFxcddE4rW}7~bYjLCJ3T}Et1y6o-kttgW}ttBVee%AZ{qqt
zAz=Om!qCCc*uwO001^Kg(8AK~KSF=}r-8Bl3up^tLl;9E`~S$ze@6TMcW6$=W|lU8
zHz?lUVH?|6{)fB!&jZu_cc{vywuW{tmd1j1hBltgmd<}e>u+zu*xuIG-tKR(3I2`~
zdwVa-zmovP-yu7@*gF}Tn_3v!nb?>*{S6C$D@O~%zeC0UI|fYtJpaELmi)iPb#XGZ
zb2j^1-f8~tiCEe?*!&GUwEy>%oLvlE{_e4m{<ox@|0tjB-wB8K??~7hIyjk{xElXE
z50F5Az2Uk(#oQtwKtSw&=1$?iBk(V(r6er&N5f<d9hCkzHF;MT2Uiy{dna2%7ZOG~
zV;e(f=WaD?c@#lZ-UXr7>dNm_J!s7qluM#_vZDDyLP3cEar9P9HsNlLosHAe1@A~-
z;Somr5C`V|$#>H&M8g%NvNj8AYn{$ByeHG$jUTU93+6yc=`)1k6J~9R;`m9Pt8G?c
z*620bZ3(61?6jK=4q5%{yMBVh>q4P~4=SEo6yV;3kRcVXJdq-U3VO*E*SGPVaC<Bs
zvg5Os9ykM@N8GtCsg)18^w6{?ltFWF^i(noo4<!<I9POQ+UYN1omNImuU-SfCkoIz
zjVPB5t4@looNNQA^mflI!FyLx=Iu`7cnwc0$1@EuH6DTQ80>1GpMKQ7nqX-q)BVbG
zILdfysh=4&ZQFnu8Zb@V_*z#m(4cqwE+YabjM6}rMW}A}EN+^h!0I^USWLhUQj@(V
zOIu)C&sK=K^B$OFA=`_Z1#7G;PLip@#5k9IP0InetzFxM^*3o!?shd%rj$jI^Sh3r
z7P=by-rx{U*t)+2Qe!NtFjeDhS?48h-g`0eV_(iKa^o;xs%6`AurQ5TE6S{_l4*87
z#tto)j2kd$=S!=Qq(ebh^8k1K!c)dAU3oE3JpMIhG|=@y{n_A9T+zIqfB)5xc6k^X
zokA^>#Sr~otWSrfyO{4BIM#}>NC3GN&kEyiy9tPn5xi`zhX7#<W3Voad}9-1l-4{~
zLKH5>;AD&4$IJ=f&4Gd)vSS|~z!ffkbB^5;P{KCR*I2(M2vg>VF$=VkF6US*Xff_r
zL5Q>foD1C)P2Jl3ikWwPQW7nWD%oY`ffZFS=6H)c_-+~JtXqUc{D?la@W#f<VQ~)o
zlNt?A^YxTH1jK+tqLE5TV!`z%%jdT$mCpc`GvfK1XlnSm^!(61W7q`_88)p*00%G$
zGsxJf^pXez_^l@ngSuCoiYJUOI8~0o5z4QV(S1>1_Ia%~xHN!cw#2}Z19GXwlV#?H
z?S!M&j02gXK*m6Oh*98iA%{FO$~*p62Bs#856lO@X_FC*hQ<aVX(t|c6bwbn9rNQQ
z%`mvRgj#uwPjDM{i?{TFemtdrU~{O=2~9KPjjiwv5XnC6`~d~|N>p3&s3Sw5?^sLG
zsCT5#)K1~y1Kz)`*Z)vb6n(QN18^Xq)ITe%;Qw4nl}wH8olKNW4NXj){-L79>YB=^
z>ZqUBG%T>Rm<q(#k=U4vZes-jA|&zOe~f+yXPh%TW@3TeGImFZG-IDj_a!VJSMr>S
zsy9*%AW<v6&)!KgPjdFsN)Jqc-7~!Q>wVa|$#s66)<^LF0_j8Y!RvR^p#iXU4GdKx
zsq7)Q(mCflGfs+TlOrQZM2nPV9bQvl=uIdvpbL@?Pb3V408<CC)Z9i-0pKy{+?k=t
zh*y6^LDz`YL1AEBVq+*n*HE_+A~MJXCi0b&7t#K7%PeWyQ0h345I4|oH6>@|tsW6C
zW~(5lp{|a>PEzVjoQIXZu)bhWa&mWbP7+vkblpHhECmgkmw^l+qtTx41H;BlQwDAZ
zLxd{dU`jf%1uTUtCkB!|sf0Z@^Gx>;Uxn~g4OOd{4J_JT9VS!f@Nv#tZM>?$D@ZmP
zV%x!J)>aaH=dx7`GNA5+Q+_rf3oG85fdQ-NsBX)}p0y2r`!pEEW0eRw@FZD%wa{^Y
zN={k4e1U}8y>QTy|1~B;M)zj2*?{s;VKX|CE5$!xuMg+o?60o;C7=vbi6*ZJzbCq0
zJ7l`fO0%GNo7tCH<g&rkLMz)E)x4vut{?IWtw$b;-U4q@yD!o|MconY1bo$8T||pF
z(wP9ikLmpM!jUt(BEovaYIdPrEPBXAh>yCetq$s|N3bW&{+R5T^F0juDsy(Bg}i*(
zvV_@=CWmRW#B}B9V%nqfVmzX0!z_ai$;v1Wn!8K&rv)*v6i=uA?yeMVqHx?`12_V0
zC%Y-EH8{@FY9PKQe6g#$ih*-rHF4~+bW+!k)0p^GF!Z}<F&(jJli}sHxl!$5)UNzp
zS^B)L;$7Q7*F!4f*P@zY7v}ODEc_(L7Cm(4#FA?>4#M}X%_WUNQ+QOZCg42kWPWs!
zahOt~)v81oNyG?EEN;5ra-eQBC|eL3{Ah2E0z=AW%Jm@4RJliXyaiO)b@D@cmx}dS
zQSJ`P{hd_X09rI#55XbZuG)QgJ7^2)M-()IUEvOKG=e>2usleSs4vKP`mz_)JjSOP
zRiD5ts-8n5Y<;N9clj8wOm*UhNHbh@N7xu(*-D8T)R`f!Sb^q~3zzt5<_9PZgAE~2
z7p2>EH?pg7k?Ek!5%B!JYa^pLqo%fOVh00L&tdtcRAZAX1@sbnuc3hmId7U^ly^7C
z6;~IJ44=KEIdfe!3)*C4bwv7XUFtK!{ox`OV`a1K7lnHhqui%zFn$RbPaE2Yv6af2
z-Be)Bm~BeY7oX=zy<Hj3qC5ffd}fS3f7k~w3Zw5ZhDY>T$M2%|-(#fDVCyeCtD`fw
z=4BeX2*XKAE3Q>OfZXPz9JzT7q!D*zM5=+v@oTp)R+3A)GVhbrl_LwdfcrJ0rDD8F
zEp%=zT;PeYGX;0~o`N_|t6>$|VF(8}MQB+&k;Bi{V0m)b?_rmM76`8lH`B^SoQvo5
z5wk@t7|)R?Lh1svIzXEj#eHM_2(dY(_@#*X{3_!J$lh;f9(GB$cjs_-Z;^K$;29>|
zQDs@=EGvLq#*9u6zCb#_X5MC3e3HR~)K1m8K4=Gyel1o>nDP+a@MI>rPwFGJpWx*|
zsURZllDv2;1wP{v!AbvMc@dHGcoZEX5~~JHGCjO!S!$l6pymvBa{6Wk)*}ekXar~0
z4+`RblA)pq=aN*tP$q`ZzQ2ac)-1icc-**_IzrZ$C!RMY4?4L$H~B1)xvOi2W|KaI
zou}qDn-uk%a$(0%mgQ7_d#bb@!&D`gSRUu+^7rKf;1+`)Uq_0dIz&oo5o2}YzQW5j
z@;TbLtVAb>Q=H+D#2PJj#<|ulg~Z;Fi~{1+`57B<{sj%@Uo^{XVb337<e=DqIfyds
zDyXFpbH%A8+~ukG(IyMt54KZgg|@3Vz$3z%^lrW;57tw-IYq^)VAq2`0_h{MP%bD!
zoudOhE3*N2R$03zIO(sGCk`3%Z?H!EVNTHtOU0+jD5%ZobMl24@Z|?e*2wG*Xsx6A
znpvrrj>paIF&N!1$7-c%f&-7+XGGzU$k*0~sJtfineo$zW*p*x4Nhz;NIsF;phoCd
z)WI;+$1i}LU~`bOi}`~x<ZSw=Yi4t`3*JfE-~~+!P3(#;b&PrtpBkdqMMb{luZdj|
z9*Nk|CqzRN6elzpsWa0r;D27eN-AHz7yhv^DgSuw0{`>nOVaM&Ow50*UQ=pYP8%wS
zpRorbF~k;*2Lit>C7^~PHp*z51Z;$K$@As1n~c3DlW~qU_}oxUBysUvGrjR4+rFck
zc>vXTz-$K@#@QH!a}mNb!*+1^uYWvm@ODyvypG8C0(lU0Qg%h_K(jkIgHzYRZb}hG
zY{J?dOtV1w80=n??2o+AhI+p!So@gl8iQ2ADuazUFJc2k+{*joX@1l8JJj!c7_pzT
zg;qV3NRyvM+B>)Q7Z?8e02Hk|3xx>{rQt<EY;L10=RPEZXHj3j;z_UDj-aVEH%TtQ
zBUBWitrc8?C9FT1rd9F)HXToccj;fA`~$Vi>`yLY-m&*eZ7JK3RSn-7RMEOfNdW9?
z#=BidZ-Glkfzg3gnC0kgvEzAE>L+{ZqQxTY3Zy&A`x)c(ITZ|(L2a^O>)?QlH3m$#
z8AL#}V>x3HTkaZ>LhMZ__&qo@>;h})fg_q^<jc)ha2cwY1XqvhUHA0KydoE?m}cAq
zF=_+pwMNU0m~Nc(MSmc&p=wi<(tR+oZ4T-J-rPgTz5A0(ZR`7SUzG1>vUY-#PUU_G
zR{_eK-$UNDJ!D|MtDSWr5%ay%sNgX4My2@=6XWDrj(VPyqze{W@rMSN9Qd>of2XnX
zOae)5iw)QZa`q@RJaZHb$FV+gvgO<Iz&El;kG^<<MhLjlhQP!KR~yV#7R6z7z<@-n
z_5Ou~2X6s*i>ykW@jf1nl3i{u7}F;2$`*r8>_hC26au^_n6S#b@`-NvqfBHau|VW%
z!a!+|K=CPGLzH>2qm^WrE_i@VOWsVqz)kYCg=v?VPzsLH5tWsON_)m-RQxNm+lLLB
zSaLbsY{``i(U`ZMNn@cG8tYbzt4w0s4iBNQ@m_e-Jam-T&sv<hZU&7^(;1Y8!+QqQ
zCfOTQ0(JIf_wml=krbzHwNInqey(z0u>+L+p+*V?T?I(j@&t51lH!L&NT+e)nwFKp
zH~P8WFh8<B7tgsD=&MK;=1lT5g#Ml={Zh;9NQBxwrWiAMpvKWz9{0;b)l4is0NVV5
zgDr^#SdsmqpTIsR8S)F(7<qX5`!giBC{rKu)CA+_25$^sk6w2yyAh0KS469XmjUxu
zt0(JmdLNG^{VbQDP6piqUmX3}h_v?DVXkrHo;PM=`X>_eM7wahf2olokKKsMv6qy%
z3X=!M3Y3SycV?0AlH?D*4>w3g_9>(*k6qE>^<ZNDe!p(lKB)cw(*!VNz0h<B1_acK
z@c(4L{zoVCUtCOeOLY`=Og}qF9Za!6i3Eu)s^<8zgXMKHiGE1daWw@>bnEajl4u#T
z{YlXL=C_`Y053WI=SS-_=YVt0Y69=qyz?_}b7XWV)FxZL$?o=vPM(>!?O)sc1lw<L
zeUv`Lh6vynvyp_chES@Q$qYg2q}W(Tm`Qz(?fB{rsskK)NeqG3`ww2UWJ8%3`w!qF
zaC1GBf<vg`!}-z{VyVRB`CtpUVIf)Rdkk2IO{QmZ<0`u9?pSWkQLzVDEl0WRI<Di3
z%&?c(!m??GSbFTjs^TZAVGiTdr?7A!VZ{h&56T_RnlxrVU{^D(^i=VL-|6APx-El?
z^6=_%vJ22vY_-%&Vl-bZDoQim<Q$u-49~|cD)KS~n;9z1BQyDQ9-&2RciiqP(nxG;
zu8VCN(_4X4VC!R9iI$>mF;J&k8l)om#A;5<2GUH{TQy}EaJSdd!?>!eqP|NoUt&I}
z*oH+@doIN<bSvsM=1jI3dkkccNgW?91s!-7>ZomH=*=uVfH?u~A35pQ+Rwt4=EuHE
z63?>c(S(%*W}R16q*tUUzvmbAWw0>d?$n_5)c)?;`>p3<+p)z`g1T`cilhUET0Gtv
zn$myy<6M@xNi{yj6^&6+T3FNR(BOzv=w-D_+6{U{#DziS4M@dpwk~3bNAs*>&}5bn
zUo_i_BtNpMfP#+MwI~RbpU6H#NC`3gYGSmtD(3vR5NAUcfoVSVa!!%T(WtXVvTum^
zPId{#d-8rACv?bjdP1X0?P+`n|0f{5e2~PGoGkP=Fxe6kui=x}k6F+7Ie98Urm|Qk
zyX}Q7Mc$1Ij&mPPm4+;ZimUm_dCI$wXJZP9hp#mg%Sk$}%<Y@-l|b3pJmVm9thYM-
z;f2+S*$yEe``?q7&bS+UZKMa9xT$*q1Kf8nV<Z6K1Y@;f+et-{vGfsbR5hmUVLYsx
zU3d_auBcxoU2zDf9Gg6}(-)e}7G8c*qc4aAjy^O8C_dv3Hr`M}cbcNMIK}Zm*UnTx
z@(!P^d5A(i1`>RMh_UphKB2W((>!KAOd{+&bC}Ox07WshWdxi7a^5Z<I)+Pas1g>^
z_5)Oy++59Ofi>V|>#1l3S1*!WOuP4q@Tu2@-rU95jcJ*Hk?mNiH+5)%(7KID?6BeN
zHsGp?1z0>ZSCy($Zi`=5O!L}Zd^Wp-naHYzGgvj^P`$N@vkenD`&21;W0<ix3M%80
z-!vT{ZSx9LNi8OwVX0dax=ClVd|-n^;2}+Z*w;K2@HXQj9tvKZuihW6%AIL3)tL%j
zT+`>E*|CJLz)*XINvhIWpJ#qQf&L)veoFP@wr$U~vIN~Pj&*Hl8Gjt_*(=W~3X0}v
z3OzJ#YyEapSzuz#W+S8*N=XolG%^i^{oc{CHIdMUlVu&VbG_xm-Ab8Xvxnq%8kRDr
z8ysZPo;q*3LEZ7`eSi%=A8dxuouFY{S=O@%6C7bAvjg%dpPy<)SYaGxz^_Oyq8)(k
zUxzXyuqf&O4L(0-ol7xaI;(c!9I?5YpV$Qh&UPpNldpmd9qE&Ixh2Gkuy#DHHA!pN
zQ{)!d?I4=Im2@@gWcJ%jx7?tF)8@_)(IRh9;UcN(C35$!m8B6Iy1u{^$ZbC{Oc@>*
z_7|EfEKEG|p2f>4@cE6<J@>r_4gOkvKu&~JYFPw*{+@Qz5OIvOh14wt!ZR)5Jy0hy
z>#sQ;Bsd7;xpR*uGbDM^7vCnC!2%zoPX9+ol-O(%jsRy+;mGWBVjF!z1G@OVv{O7T
ziEFq$nlMYVklZZe^!!=}w?wzce54UbN4j^4eoC(q#~`0-;oPD;A_X@i>{6&Iszg3X
z<eB(u<)Rx=?8=faUJod!0t@gbiA1*8Yh>BLury1NfHs``@E?wk4v$XRZ_<zDNdumF
zdx*3*<PTsc7?7KfP72x2SP+>+)i6J{!4SRz9G2)Is}bVc2(Ss;!}qI6E{FsP*S|F>
z;^gbW1nDy~oeFL3{Id;7j9in7g#-lTLJtHa@pm@lUqSr;BN8tmZ0Pb&onHSM3F+2?
z^HEt!`N~U+OY`)U{O$w)13Us#*dU(%B0xX_7%Ge;@#nyIj*Kockg>^^<52n(tEF|S
zmv05juODbp)dp3;<1MSpLF;Z?&2LKxD_1q>7k;O9?#<d7o6b9(%#$Pl6QfPi-Y(}I
z-;-?b>-I0_>vr}o&+`-lpmb?^i#F)m_Bwq2xt|9Xgt2?BX!aJP&K*08x61D_Lo=*x
zx0$=Yozd*eUtrMuH3M2%Q4cUeU#JNmSLoj1KGlYHEHR^2pl_uG{YmkA(c^Y3D2BMP
zFW$WgpK2og5j*^~1N;|n<vrv9Z!bA8BX@zraXV2X;X!YCL*f3EgT4DZEIuJ|tnb{P
zfKdKj{-KpGWPSd*TS`83Uw_ovuj)Pdpc~)pSqJ2=!aY>J1Bk%4A1q2DJ8#K_igZVo
z$?{f&Xz|iKEGf{Ut<wO7gfLN+Dn+Om<k~`2swD=n8}xZhB~|PR@L7(0%Puh#ivlsb
zOcG4;sFg+$>qv_nTKto?vgneA7?+wbu1qqM30AJS5^EPeRSTEEQ|~+~)=WY%eR<V=
zM&-w;`BC9U)eePX&XCO-2M=aLN8{DY#~0#HfwaA|d-o6klDxV@)LwhZ2FhKQB0zZy
zzgfn=&x+OSosb%{4E}d#>kY%CSE|0m=$$z20GM#4rL8rzJ0?v+F9jyFR+MJbe$d2P
zJoYtNG7Gq{n6kZmr(w(Qjn4McCPv&_cl~Sj;C)@UV=di&ihMRx9!3TAKKtLDfSg<b
z_t_*PYZlC>#g!Kb*#Ox&q7p)Vs5h#xY!Tkx;s;?_4il`6s!&aDLyk`+kXSdNryp=6
zsLfiReaEVwKU~VxT4@bLsth;=?P#Mz3e^S(ZS@0$*?zaa@OG=!Z7g6$YrzZ!p;SJb
zX!JVB%X662Y7DTnc*}und%;fZ^zXNDjtS8*<F>57&rR{PDuUP&M$3z?K1<-5(t>zk
zm|||Ont}2dMT;`ymK&tsjZk$4bBDBqM*R+dz&591D`Q>7y4UU|LvWhGxZXy}qD(bM
zrAEKA>ns@C0i?>+I*4?9<jcudnt}`GMDGgfg63e*BVWV0ayS*AdEB>zO~LEMG4{CV
zi~shnuBpS?G+bO}(?eoo|I!~Zg?3{CXTUj65L=5?;#!9Q=9CmVjnOh(*o_mEr;VGv
zj$1#gE!?`BZY%mt9?X_@GT6^YjZ>(d&Ps7~PXp1FvIe<cX>H!u2CkO~d^eBH2)t<{
z$?_DYFv?)v@iAA{FM|PhM$CLklBPwBO%ZpoR}U>btcK)h!~zd?!P0jROejs#rjt+c
zV3*jTN7JwbE-4mGQj2TeXQA$4?IT!_!5l@qo;gWRW?D-$(~$bj!I16xhctOeh8Z~x
zrgA7ufp)OkPw5{sc=Jx-tg3-z$M*CJEFgYK<Tnh`cZne!HzaEG7wKuu@Zv&LPOgK2
ziCCAxf}~`7H!a911Mx{^at!3dW|>c`j@xZ@w*h^wELo)SU2@TqaMIxkZ3}~3H{>kp
z#hz4C>Xw0080C+oDRgRMmGeTKecDCyq*S!WOe)2HPDh!_#a47|s#+yVaBsD&o7Ujb
zM}m9b+&k_qQW+G*xq-QA!t_hEDwHkT<*3VY9ma0YYq1w@wWqJC?i*s^R?C!vW52W2
zbMwVDa8%g(HYL)LX6KnI4skMN;T4NE$;8g*ryrn`HE<XjIS(>i$*7}4Yn#q(OOV)T
ziH*=aQ?OPvMKU;Rkj6{Na<MAI7!LH=I&0e3bqZr@bZ9Oz6OBx~+XiH&NPESNTF
z@o2eP6|x#VPPldeBh7UrGI+PCro=K3b^Wf+8~V9mNn354eRf#*M5SB20KQ(fur%mk
zi4h0nda85dn}JlWOzz5P;R-#E9aburWzi_L#r32h;SlB;Im;StnIkiYLK-z!txRug
z_~W8U^@r_(CHK|rnN}{0uJ{V`rR%z+zTxGC3&>X4yt->ggx#^g>>2uNrd;d%3e*Gt
zN^bE2`yfN>+!fbV!4}n4{z6W{30+4POghU3dpLH!6-PgfS&w*Ww+jM9ncUkT$<5Lk
zpB6cIk3Q_XY!3cU`?JTNn6HFhBE{maUeWj~XHa{k<mHMKo{_}5rKRQa#pW)rY**@M
znmIkfJ4f*=j(9aIXPjI=7X-a>nFHiLn!@UA4o4^k?h~jrhXb>bqR-fk<URnKzK!vi
zbw_5gUyq%a0lWB4+D9Gk1`5u)Oz*m^#LwU$In2XnzGeN$P}Gwb?Is~dZo?wkiK;Ia
z3A$^;V0ta0wD}ya&UjvoN)y2}+37lnCV_E+q(JJ9Iblm=N-Ty>p;hj}YOQCC-fNY-
zVI!ZT#RDPCK15|gV0oHXQOV!~1qqvn9Oz3fcXC1$l@=1_iKf^*9Q><Qj%TqUJ`|bR
zg3&N<`7K>s3$R(!MP$p>dRtvy#=^?lTjwR`!!fSv%ZtrTtSszEX0V>#@w3Dgw*DkW
zAs$-Hja<XyTP}M*pU2O#UG-*%^mPMwt9aT8&pusEMwqFk?sd670%r828>`EvVXf;B
z-2E9CF)k3I3?*4^>wsSxVFH=B+_gjpu1guQ62hVh5<n4Dk~%U=?D~q@TWZ}^9nCeY
zQ4Ktzxg%MF%j_XU-OE-7ncF`tU7FJSX?1qoBAurbp>yvu(8OW{YO$JHy!W@8Q@oI|
zH9kz#b~deZ1beCLX+kcj#aI@9gbWSD8A?Okcsqyd4Y7!;CYWT)@Ddlk_ZfRe)CpEN
zO}~Un@`hwK)pKliqTZbRLM3c<b7f9j$e7mC>?3j+&I)JN_z11K{)tVRwS?!B7Qdsl
zqM^Ppe5NDfm&s)2hn}Bz9h)y=me}%ammxwoNf%M1!`e2ea%8(!C1V#Q7MiDpM>S4Z
zPuakLp@<bOOFnjx8d+Vp!V;r^Y&4RatK`(*Y=HS7)&5D5syP0NE8ExzGkcf?nd3T$
zK%E^zHt3Gm`CQH>`{m}jz)L2}Zm-cVGR4j(%AyAo2|9Ze$F?Slr;c^OVI}k|eGX2}
zE<W<JDC!{Sg*u;9whw_Atf;e-MkbT@Bq)DNy_MD|^5F}b*d)Eb?|xfBfXk`>B<WB$
z$`+B}0OyOtGD-P@cB6-f!b)05xddkX8#A|YO5%HKRBjb&ZVxlI<vJ45)#8|Aml@Jr
z@bwl;eT3TtKY|Ix*;TJv)!tddosoBQa4|2GW|}EjsViV$p)DD+?3Z|_+HYb>MRXVF
z6j^|tly=^1N)GQ>+N;sN?AXe&=Rh!$Ufe<t+7i)QnOEC|BA>v}z_%4IfLh<4!q(`K
zm$9>KvvN5-R=sFuwPPU#Ea!e<nCm)P{~paC<Q{!XLCZ_%T?SrG$H_>bYstni%jPt2
ztDDKBdAU>d=Dqv+Cvz|oxpf|89vmShh%Byscq27j8v(uC;((JBcY_wi34%j#p;rhs
zyVUi@bExmLs%(d~_te_72>5_X)rcw(>gQTOjN66POU7~FU^OQ{_sbL@S;YQ`aQ1;M
ztAEC))0wYlQhv)&fV?9;u?@wX4qfe982(EjLqqe4D7gaDNl%P!Pg5K2D+}Pl9+hq6
z6<*}d=o@pkb?;kS^h?SsuLwcnHN7ZK`Zc6TUh=i6$T#si?U7RwL28};3?}h2{p?!_
zf5J_z^eebBzP6`w&zIKxj>Kzh(T(IQw<Lf3<F7<NiZk)VTqg^75>=8M$}@P$+LT8*
z$>KbxX9=p06z902H<FakaZiPLagvmWDNZ_zBg4aL-jH{Ud^JCWb6CBGpy183eL33V
zMk@BO0rmWm6_=*_*}~CEsfDl6LJF?v{pMoQaE5b8XI!2Sa7M$xQZS1B6J)VDB`@&i
zN8t0*x3bceIRX2^1in71S47#%WOPx0mFNgxG3HLwo%*qX%)WdRTcNf=qBih6v{zdv
z;yIL;+2tzK31XgC8*!Ru<Zt$9NNR(&<7@!nq(OsMURe^Hx-%>dW`RU!g7mM0U4kQr
z-cXtDuj3%8jxw$8PI5I_di90GCRX~d#=o`y^!OmN@j>C_g~Z4Y43qx_{%^43q<>u;
zA^i*KAAo<_e|7r4D74AYd`w`<uG+uB4t#>hNkq7v+Y=c1dKkv9x{*JonCCoy&DaxA
zy*YduCf8hzqysP~EC|%O_`;x&AsJ<&`+XskVpX6Y<*xxDWZ(8P!X1_q7HE<1#Pdgj
z4=+zdQ<cY)Eh(DMWMm9|pxSCtc|8)%h{c>8)46Vm&j;x(QMU8r?kGRLm7byynIa!i
znHEtlQ0P;Vt7b@ojng<$VkFeiln|MMrK}P%Ab3h~>a>BA2^*wtMXnnv=0QfiifAkX
zgy=U~NOQZca;upXHYvCS8&lRIoq$^@#+$s*Hn3G}+*3$Gr{K9m^(T5)_Pp9vtM=Ig
z0RvdYCxZ$t6;$Ta{exloQ2MhZWh}p4W}OtIF4~$=^bJL+5F9}4Os;i&(AYU~B)5a#
z*)mnJn2poVr(93)htkLWvT*<`Z<_c3&v(lX_L&@aRAuw7=m)6lE=($;w!W;JX^e^e
z=^JrVSmyNl@iT5|rsEL?Wrm6ryJmt$T>Ln7*UD1QMNBi8+#o4;!Jrc%`@qg!3bPIv
zqjYE3SLt*dKo=kCDxRKtF{)d=iI7Fl%8?rs;k9l&5eZk9mX3rpddFyGdvjr%@}u7w
z=KY&P<5u*Zk$ryF!H9G=JaNGg_eZ71;Zh~;P|#H(Cc1ajsJ6}#os+Yj9x`Aav!Z**
zm*OrJcR0D(ZhC}Od~=B2`98FDP?v~R4xLupILBS3*gN@Z;K(=cqvfVi*Jc@-@kMbH
znV^y(>@{sRo;%#wtXQ4gKH;?EkwQ$p-L-R!06S+}b=ycV^L6f>#cESj+)=K$vy6b2
z2PjJu7hoFki!WRIZ29WhiZO5*a~*UfQ-mJJxn14;LY%Q!$w9{vQr#<RzlWv)uRS$m
z`EFN~O<56^uU`e%oXRad7h+DyDqoGpc_2kfabm`D^dwnb;=~YFc0{Bk=?qw=SW2c8
zOG7~Qk{qGao=$C^hh@RXrU>k`APR|yzMu(_eU9j|fOJ(@+L1ujn4cE#t)d^nk~4M=
z`aSymj*fMgS1UsPlqtJtNn}xqOIK30DVC0qK-&#%!;STq-A~r01poJ@r1dj3=vmuE
z!OrriXQlY-QrSmL+ma6%1O<dA<sW+a4Bc{M-J;rO>P<1*#aEBtbfpRATwQ|PsM=kE
znKJDTo1Ae<b2d^QEYg^=+^D5b>55~o)zs$kP2sLoxh(P^jh=^5x7(u}neFx>9f(Wu
z=9D;Xa@4-TI~2TOL$tu$jHQZMb71KNYby$Iw|`x{fQ!wMX7kzst|*LVss`!g<cBV2
z;8tTge>&r}tbJA}^OwR9DvH?eDQ!-@My9M$^yAy1QCgc+Y7|9^>@(M?lktK4GRVtG
zhTDWu%sHmUapVW^U?`_(@dVgALe(i{L_^#m&QOTt?Hlq8Fs}>u_7!?Wo4Z8g4<C30
z(eg_Q@<fz+h<V?!<%$tNr!Xz<Vfo;6D2-6s@v9?<-my@dKjXE?6|J!|e3Fn=zFbSH
zlzCi}l$3KA@FT2$%&b(L5*V}V$4@y@^|&L`q0#$=iBc#cDLYCGNmJ>W@m7Vf)^SWU
zz&<b)Myjjes@<7?N86$2i2&C2qI$F`3gnM~22}1twET%lGlt9lCZ6Vta6+qHpCKS?
z38lPTCs@P!8rkIv#?a{}8Xv+#{JQc6=IvVi=>6yLi&QRLoD3WgP~M;Hy}wNp`!B;U
z!cL}tvfoq;jciOM?Cq`p5yP+6gz>;tcm0fOT3;LCXfT<09v)mnBu_PwwC1i)s(Kfg
zk*KHP*5mB|mKf_MK0jEQGj&;ITwIh4Oxt8>Nl3`S5-21<xn~$I(&F$#c7p?Pyb^3(
zAWuQ-G)c+A(#YtOyD^c}lU6#fzBi}c`KJ9OcjPP2yY^$hGS3&j&owdfd#HG!2Q0zj
zEg_sg>OeR#<SsE%$X%P4j|yOf2y+C*@N$U2`lSuoj|mtrPa#geVx*Mlg(a5atWc3B
zbSC7sN$Fk&6omtQ$Wf+ZW`>HDeDS1F(FcB}d_)qil&B3j4`EN9s13`%VsD114LUCD
zw#zF|Eto#!7VPOxw0Sw|{H_@BQ-|`kA*@zpHl)_-MGKVn#S7Fwuy^q`29j-G@o6vP
zS$63))vK!vUq5oU_I3qZPl;ehP0UZc`BMzABUUGy@!|gB4flmOz+Z9fUigClNf~)_
zpb7VdD~j<c3P4zV3%6SDrONP++TFP$FnlNfq#oe-kbrw{4I>D@<wtr4<zKq(#r~8>
zbc^Y)TC<PN3|jB$j0qMavOWw$p<JnV0#pa>s!^M_Z-Avd!L>pG<GF<uEcvQUU((0I
zXHEww{UqD4B=*kQ(>U*3#Bva?3NjGSyMiT;?n=OCL?K=l<eLw(Ho4jp;j$)GIxb6a
zu6EI$9*sq-5-I>N!mX^#8E-&Xh?3l-Z{PYZzAAjs_=*j;JoK!4H$-&pp-ze*^qQ4-
z&$uL4z;mBuhHoxMQCUdCJuWOPhk;2$Q!xbx{U(dftVzf}$o0(r?qaqebkqr7Jk@2r
z#+iajZykN9#BVCPGEVkd6;xS;Ni|uhT29l}V(c_4<)UrImQ*lV)}KMhb?COLIbACi
zr?EEqzCG#M%cPE5EZ+@GdljxZ>o%5Apoc9JRn~1B5-_C&QL;<w&VVZ1!fd7R-Epp1
z>2U*-t+N|SrOSxTWaM7iYh^m(eEA`wKIegZ21VOtHYJmkaz%n*)%37?sk`DAb6oD%
zjMT_$$(|`(Z9K>Vq(M-DXXmaQv(au^-w#d!C6n3*Zd&fSPH(bzV9$|hW>Q_w6)cp~
z?)L067n2wvLBj^SFe<O+<)r;8s+Bmq10lqlA))*=p22MHmMeJ7P>@CZ(e%(J1Aq@a
zvm&##1i3kdM`!RFFH6_Y@f8{rP7lp~2=Tat2@_fIS*__PCIJeB7+#oA@F`Y=jI)q?
zyNlN|k5ZW+ChM&eQNzwsrw5y0bc@I^W^BLdtH92vFm<enWY%ov%>sZ7&x>^U6zN;J
z0fyw7D7mehHZ<+Ti48cdRYmNE12<H`0Zk<k8%;|HG@D+NMsjv8<4Fm8XZqCPa{Ha(
zLJW22-Sl%?3U!+pooxdWeKOfl%)A=<OdKXyW9W0_sS;sI2WXL0on4@*YSK@-%n%n?
zgS?-fillOBbs~%NiaAmAb4N_&%O>S?tV)#=Oqw;4Ids%TO0%8Z_FQ33%(;zQ9{K4r
z8?n}ATKj%42l(y>VuZO&jb`gH3#_CTg9uE%8Ew3*Th6Ye)ai7UB*XK?6GxAkIhC%S
zF*^AZ*zJi&GHGo(#S_*?=P;!=L8i)Bx>NJ|w+b~ATIFa}*-@X*M6C>>N8I$MI`ZNE
zun`;Wm#d3FNyEOg_lFB^EgYHDgID=&6CZ@vwX4YH0n)}}ZFDQ6%^g+BWVCw{jA?fv
zOA_5G7xO>yW2`tbuvfcVE19=bBQfsH*0CGrdvsw-E!&Apgi(vH=6se-CT+ha95HXv
zb^oMT?XE}NRyH=uE6SvqS!SrOQzs_(Y`Ib7#KTpgcGx7#24y;DUo%1z)39~ulzw;a
z9+)u;Eu;-HHGKxxwNvQXreHryRDmCvW;ZB{Uo~zcrV;CC8dvLI1}oNg-D2Gux-Zl?
zsnF7^*mRG1Db`N2ZOCDjPs6+Cl3g#M$<ZBiwN#(g=E7j<a2Z|da2Y4l?I_Yt%5ptR
z(37>BGcA2QYmk0*U8D6rj(2>{U&hC4c7_VA!3v{OKcKaYlc(K`(sJT)wRWnc^A0_^
zuuXFjLwBLK>tV99Gqg9a&TM6yswm-E(ci!Bw{?Gh?zbLnkJVGJUnUFGMsHn73TtGd
zE7L!X2sT$=cH8NEdWzzlU|*i;Bt(ejc<$b&D|Krl)hQCrscSDh)VZ-6GIqA8P)Dr2
zp{31rZj>9yPiD0u(-~nw0r7F@zo19=NttTzC*N-$?ZGWHIh>02zu9oRGwy4@P{^Pz
zfW~6x8stcW!?Fhvh9Wh9!}#Eul!QA%rzI{QP-QkTZES2|6I>vQK4#4iV=Fnvy-Qt5
zb3Hl7^719aFq66@t{6T3p?cc7RWsK$f#RT>aagIUDh6>0LWY1)c`-q{h;x`pP0w%$
zp{^@SR|i?BTuIXPX7GkdO|1#Aw4Q*<^Vft$rE&GwQ1urq6YDzk>ijPKG!X13p0Omf
z%u=krID?y4&;g8sAoHPT`<)VVKw$bUJ*RRj*xgya=uzn*w;+M`5&mScWH+VuXTnSt
zr&HPQ0YZ+iWQCx5`5*DtZ^1MQmzPsVQP-%~6>IgKr_-n9CUN6(_L?v$YRw}R-`SJ-
zf4vsbpw;(R=?}0?Y5USS3`I~;$m}>-nGLP>o>-{mhiIl~WbS$E^u~9~5b87Z2<+1x
z`ZdJof?@2fJ(kqIqB2TpPG$T`jeLJ@nAI3;RsqxcxCr!TETc=eBM+Z>s-nawnjae}
zR83Y^$&?X4GTM628i8#HmvQxyq#T@*qwmx*xAn=DiFdnZ8F`m-$x5Kj{5%E>nolCj
zq@tGez>Q#{ZMHEOkEcu@?$3O;6?GqkScb0r$!8PZq@Z{?UT$EHUaz{zI1;obNt1q7
zVBBsDWqeE(!5ixu&pu_S%Ek5U)(q!TKRp#38dYwHIiM`OXgQ_Mbh3M{q?QA`#QD0i
zJY(34tl1ea(NLm@!e&6P1Vq1zcK$l1M(W??*?96~L;90$M?m#}aa56wsxEKl=SoH&
zp3iv?&~^a3SLAjGH|5Y+3OC{|#o_L&qzdHgh+-0Kwt#`ZttyQJY=4q0%4`cLb`}*m
zB3J0Gqe!k`laurk)ae_idBiRzoisRzb`w`M-K}A{UB!aWoCyVbbSRmEFQ&9{KNJmb
zY$o8vZ~<valRLsRh&=z(XQL4g4dJdkhAs5>^>JtJ&B|Sl>KAGMFUp>WT-%Ab{Odyw
zd^nqO>=D3Jj65+{AljG%jSu!x{Vj<aG`_IvLV{y7DZomK#s{`iZdizvohpvFWRTpE
zvmZM`l&oIM0h3$xf;C9FjR-47@ho$lnjFy|6sq_Tv}WBOwzj(43)p&Tm(^!QPy1B^
z@C$U=Gc||z)lGI-|3guCiMUPZx1K+?;**`nH)$RO>=c~^B(ra`h=x+~yICF)Hvwtg
zBXzMeSsX1ng_6f*XqB^Jtfzgdr-v30nRB8JF2)dEvlG1_ZK0l95&D7zG?xS4HY3Sb
z-%wfk0Fwmf+wy&V5151O@%^m~18Cv5c(4a=qx%q;L-O%SrgfyTT{c0~+zQEjo7A0i
z#BaA|?Kr=pqFTMmy!b0D^+~VyBU73bO0Naizj&<TOFkJ`#k+bKT(?1!tVP=gaQ<ZV
z;`LEC^$9^<;r3=dTR}&VCReDXqDQQZE7ky49!}3&Ssy;taS{i>&ByRu69&1B!Z|2>
zzt@%17xdgoBwD#w=N(<{#=Q`oqQ!>K=|_e$HlGWK4|`*IuLxsr6c#B=>%&@XG`4Ol
zIKOw368Dcy!S<5{XzK}n%_!V4RiuXBU8rFEOgR6g_6{zq++_8WGo+qF79X8JaMvPf
z2iAnFz9$HXxIR-p-xN6F^B99;n<o%%On~MV_z`7QBHLC9eAQjIflAF4cZERA0e6b;
zK!x2u>-WaM4%~>Nabn@p*iVS3pFv&^mOd*dEToAj7P8=Lt6ta>M#<|f`o6e;XQ)es
zb-9BEjk*?DR`OYV7?}cLh6uSt;V4T|rz&;YI9y%6R`R*(9y_&a{Mw<h1M^-v)^1Z)
z7FrPiZEIZKN9N6`<0TJlHzT?av@gmk>&lRZ0q{qU$rXMt<fd1$<&G>v46?(o3A#0A
z*>ZV9-n<(lJ^b%9{s@(Kb~?d3=FbEo#M|OjywOuKOKv{#pl^uHKA_&W9Mo6hP`~05
zv<^b<Q9|wx7QZBGZb=AVprZF6ugS;o`^<0kc&0Q_UN|cEOV2))efWjvyF<+HjD6tw
zhnyR*$xWH%#$r75xE!_I_C&RQgRMo;kK^2#VhySv;`=NWYUGC0^CEZ0)cHX%cH#Ag
zBD(5ax3-<1+{t;N9lk^QWub$kU6)86$|Q~U-^9D2wS`gh1?-sWut{(q+}aJ7Dr;dz
z_=;2)$Ps(LH-8#<(Y8o&Z42XlPX1Don=40KN?>kN0tB1a$pD1S>r?=5+eLeOr&uf&
z9{w>SO|4zgG3E+atZd!UNBHGth^KX}?z{s#5M9dz`|Q7C9jah<0Ng=)`au8etYHD3
zE%zb*)b#u*v6K1#vR5u-=xq8AuWd;E%^hU~?NcnVxut8E(ID7jLEwQY(GW!f+bFOc
z<eM<wNHrcsQEhWm3&w<&XERe6i>;n)mN@|-g2jA%QMK?-TOT>={pTi0e@pLI@~0x%
z-i(xx_aEJ2eBlQNTb10*zx*DyxZSTm-~9bQaRRnp7{aPKmIAaH(1p<t?h}%@$i;Mf
zkWd_qDf*I>>XDK-wti4%C&V>k<45H~?X29kWq*Lc@h{$ngpnIt^eZbpxe({}$FcBK
z?u{tDOTy0VdBV|;_G1(ulm7P51jw_#%LuSXZCmtS$gE>cpq*a1ZXy9Dv(6MJFj<*n
zli0<sBq!2GvB9WyU2Jcn3DY=ge3u_>$ju)e+B&tATT+pKTsJ$ntdJtw?fA)jcj_C{
z0G{f0n?9$GMbNzd$(dYi=3AS@);J=wlG-ah{)62L1JvtSFi}Hnp~T!Q2Z=bRIp&-7
z=A@@v2e-M|4C-ajoO9pfG3AWbHHcY1QT8#?#6@oFPS5j|b>~6Jw;7o@UXx?#EQ?8Q
zuv$=IgF0U<d(Q=0jqw+u7v5NAx4tpY<>a(;UQ@YDvAM=Zv0B|*r9^ewD+Fkl6&wAZ
z{jRw7voz5O?^);_C>B`+mCV1Lx?V9Rw>n|bt;eNs=0~=RddJ-`B{L@zm9(T?@8rKf
z@2aeiOGic&8}{LkW$4Wn*E(CvnM_%jet&c-(11XAW(t<)zE1CJ&}cN9-aS0X&Pe2x
zrw49@_(ohRUEJry$z$hf7Iq?f*H21lEoJa4@-ii(5;Q<sS#Wcb8*`~r+xNU}2dPkN
zM-}G|+plI1qawSzU{9x*h{?S&*ilDxe~DEh>!%6?T*qWq7_>^@$OeI~ujIM=l_a-y
zj7E7Tx-?B^&~+Wx4@T}Zdarpzg!7(EhB`AY(OR1r_6&h*X8_qZG@g-W4htLzC|ilt
zS1nFR(=gCzRjwn)$%V*1r(%-6=M~k&d=-ruy0m7P#Ork|oLe1kb|q#s*IWc5B+y{s
zN!k$GIBBk68qUP5L(A#1lqIaCpYP8Z2Nk>;O|)(-*m`JnAdUwI6)BN-0zI#?D~(HJ
zR!<m)qWd{L&aQf%m=M_oSuOBuOm^)4di+hwP8A1gn<d6LJym<Gc(n&`cqa*q^&s4?
zm;G{Ts(2W;(s1_Y9x%Q0cWiVG)8`%#Hx0Mv9tge5cR25AyUReKe+<}dj5cBo$Hlp*
zaL{YLCtFg7FV`JaV4T#pauVe<7n1nJ+rU^p9vX)gD{9i2!KafD7q5(!oLK6YoW9%(
zUY&V4F+M+f0)Iq~CN?-Wm>ns{b9^d~nhd=r9lbhWckrSlusy9Cx4}l8q(9TNe~dae
z_04-~e2(_)`haqWjsXl9*4VeNDSA*%Jit5Vo^9s<#OQv?J#pb9ULhUSwa$UPol$x~
z8q3H`u%WPAop@x8W(9IiWn9POZ^*Sh@c`X)e-Ed`P+0A`Vyi>sKdEewhHrN=bQ1#u
z=q=kdgIzH&7$?9d7KLW{@?a@>xsPaL<pekr4@-M~9;eAEsXHvl5j%u-bn+UjmgD&q
z#leL&&RTGzQob%%J%nI2RV^Fs^&dvO?bJDeHP1sBu0Q)y-@?vRH;>ufR?^hhU^XS1
zy0-26)qrelj#T9-#WL9W^I9L$`dfE9=^W9F!h|5*Z?ROz;Wx)hujK`}Td0M*Af%&u
zr`MdSXz#>BJ|jv7#UW>i?#V-n_Jw0ZQkq?TK-`4^bWOY4RF$@*m3US)7Ws)+5Tcgb
z``UCiQY73p$c7EZ=yMrhYRZG_qSkLIOVyu2iK>)XZPK1r1z);@ot^vIR@eE*dgDk|
zTDNG;UvDqg@>n5lqx#rSC}W%!zy$f9z(maNbKhrI7{f6qEJ^S)gbNMcT;h^l0_A5H
zH8E<lhB|r+fAh3Z+@Y|ziyh4Q|71+qM2ef<FFBj}WQ}Rl`EoqF!)VI>Azov2Fv1Kv
z8*;!QX>k9cTcv&j%Z8XpvWqAlAl{y1?>IOpR@6QO`<&1Yz%&D44HC@W&xF##I;f(l
zj&?SSZN!}(eV9=lgkz?MUl{*5CCTqhr=+|zon@b&;6uheK4*~pr1H&@IxitnzZwBa
zmQV<NjDS8vA)SACKhj>S4#KDM<Pbpkq`~?DpF;8^`!&P`2f@nYvEz^U8VzCy7tVsa
zJPVS)2N@oF|7D$5knoTiP2epq#S{@+rbr`qJ+jz?@@W9pS!VDHDLk1(6G|P|c0Wnh
zV649pH-T+GLbHi11IgBqtwYwADUthKK*~90az(ay(pGuHFQ}um+!uMu13K$Y226n&
zTf(|`{Kx}LEBoXMXIu>zB-v+xCFGR_mW{$ik!mFgGU6ryxhIWNbbq7dHeyu2{<lb*
zu>t(us1gS|;+9YLjN&h%qmy#~K=R9C=xJG1j#gC%I{;%zw!%ES<39GZvGgv_*!z>C
zUO@)MPW64d{aLgO@wz5^(hqUB$^9rkwwSZ$P~;X`t9$CjZn-N?{GgTeo|B9^4AdU{
zSPLrKURn10LI3bE`d<rLg6kDRG|7*=C9V>Z$c;Q+PsPo66-FVkR+z7-gfZcl>*(()
zqbg;wYKppMT9bRn0V=(+Tb;@DN?-8he)ggjq=Nw}zv^kK0v@ElIrhkJnU1HGeFbpT
zR{d~JZ>B($3siJfxcUP5cU$7GPX&0o99bIvsofC!V^92T?Z$r@`28#IPSDlGLe<66
z<{$P$v+9mIia6?L7*G~XltG2kh82itmVqJb(mHYl9btSEp(UJym(Lw+mL#T*+rf2+
z^KJ8&=vM&WT>-CnJp$izzJGG5J9-Mm&!&?{!<)&?ZvLM3hpC)i{m<XPeJf>InEf8m
zC@%wpBy<^V(e%xuR=RQ*C9pb-cW5Qbn8Py2&xkeqnSouBpDS>i1`;Y4(y(V%h%Hy6
z*19`Qm4cnL1HgdY?8o8`QMR>BY+-*!3+S+fibv7v&Sts+w4UpT)wQE-Y=mOHUZv+s
zI4s7=p+L{@PMm@5_ze=Bnl_9k+m$3YVH-z%$(6hIR+O<(I*fLw4l83-$++*?NeOyj
z`96awu<wbrh3r^PYpuW!W{55}?6=x(2_ZJ_fYqM(PH}~%+`aoT9?USD$WDY*&yyv!
zL}yUfUSu=k)36$iWG(*=jLS)~on!R*m(f0U^d3M}U{ENhkNs|H=ti4ye}7jfkr^$`
z&%xUVLmtOad@nbWO*@LE*eP#!d-@;fH(nJoYW68dr3|a7vPjq(O?4p+p^DjN8oKC_
z;#82L$F8_YW+urI_@i>-EX>v4C##OU{S)^Al2qt^eQ~gjUmrGbDzr3(!weNL=H8q3
z8&6NNdsJa(#?UBvsl&zcHcBu=cnLI~49!$M^aBMWg%Y#(NlvEKE<2jKlq)xC*PPTT
zURQqQ$CK*PFtE=^As+JhM#G1axIaj3^N)iC^r6D^<wh6%p~O(Uk9|QSiKfzrKP95o
z23JA98IPJfkC1W?E9*l$xo+Qv<Ua>~7r+0dA%_;rS7BOuOp|i0L|t&{$a?gpGIjn9
z28SxyFAFgS_tZ#;5fij<xV0?Nyg|Gpx@SYI{7s@4jUjsqzhy<&^-?!Z^m;yK%TPkG
zVMUPc$!o*_x*b59mHEcd4F#mC><*MkdRmw(lF7`{XlcR%FT)GsEtABTP0EQ$%@5#A
zT<-Zce9IU^nO&;zHr<=p{F#>L71YB^;s+2ga!2@(XS~IjFT&jnPLSghJ$H}h3CzIv
zg0Dn~*!tYU{4vcENHPX16RliA8R-XxaTb_4$YKVG-z~>WasQI6@D0OA1j^dx$0mKx
z+LVWl$oB<+H^&pi<jCDJf}2YqqCfr(Q?R`yizK&TyWKYD7s9`*?_cHpmnE*hum4mI
zyZlL=75g8%0{_^z5&82?2BSX?z`t+ZH2-k|aKzC*hZDP6x*`tPS_(mfEQE>p_hWL{
zn%wr6Q**e6Sjp6s=1Y>BSrS@O-Ilggkvj#1^Az-a?@I(=$WQv{13}pGPZa#l5#A+v
zkM5>wss<6prXA-vxw$hl-nP7Zz0HrWdp{q3`2%sp?u;Z1uCYxL7Vc$-J<xja#8UKN
zi&N8;9Yev%4?7~sH}5;YWMX@E!0Cr$jqo(sjtW*hrJ;)G3Fm{U;m#Nc?x7!0^e^uY
zDh@YRII13$562baq^Ri(r*H(;T4^lSk^DT7*~w<-<upz$>eF4ANl%k4v{?5vo_43@
z<|mr<`Cp8ELx3m1*WB3Hv2A<Dwr%a$wr$(CZO`o3w)Gp^%Kq~uhcAaD$L_xM;dOPr
zdQ}UJbbcy?iRYXQzht>N>22~_tvQvcrlr}9W-vevD6G(DvuaDe)gg;~2kyZ}%JA6j
z(oI;j<HZI#r^uzJMJeozqf(hDq)V@~*O@kH={-};ighjC9SbeDk`HO;;@#I%1|rEo
z`jWIjElzODaEbY~Stm*_B#k0}RtOm+)dfrNHq+F<66k<OPMBGoU5m{&X}~ibA3$Qj
zq!!rGE&@DIe>t0)cU@4_nVPJ(;bsLUZ1H4lql8u;Xp0(c+<VLYZ3M4Mx3V7gY$9<j
z5z(H3GGC9!4`{$+k<nQsQHlnf1q!TgplZ_Am5vv<=w)xEhZl)#vYlqFhm1}+5~5od
zoVFXJW3w4bNNTU&eQFgN^M#0LiZ^UP)YKlnVl8Ut53a+ntgcelO;i^g$vuZCr4Bt(
z5Q$nza*Fb&l+`$??fm|Vlt$e7UgBMA2#5^eH63Q)sBIyoATK3w+Y0%4Db?w6!nQEn
z!Bi~*QI7G9LMuMyu01L?sm^6T;;$@y>qLHCo~zkRfo?V)DvB!WzpE);$#q0on2a@l
zg=q4asc6^q*ih0g@;cPy$Ui7N)Wco4mBrf~d;l4}egs3p+@e;ia_E#j8E96fI=LkA
zWoGy)29c4Arl8w=kmaUXm-JLEDAweiN;(F)!dH0KTfDRB&Oa3GCbc)tDoYAYbLkFL
zck#x50lXsgR{cZmyF;*BUtGO$1m>>=u>@3mUU!|b?6;-;^Y0k3%G8{#Oew#8$o~yh
zpW^pwPa_5o;EYWpu66%n>n=I8=`J#YJH+;_KBRa(;{wnnMIbHTv3%-`GN?RAFMq<^
zP;2|u?6Y(EM!h(9$L&J%bT3LP?LFoQQX;S1j5q|cMbBh<TV{)8nRWCFbMuh9%QF24
zefd;yci4I<<J6^YiZO$)b|B+Aoo}s)l3BQ9Fkjg4m;1X5w^ql(kSd#1PT2GTu<6w`
z$t+%Llf^>e(RSKWsv>WmYVVy(5}kgJGfy&OsF{_u^6A>C(q^GLPFzPf5q88N7XN19
zPCpvF=29lQINv}A7~8Rx?z|pUpM5MGM+#@%ZZ<WDt@pSFQ`DGMY<lWf+A=2FhW;SO
z9m<l-bRH)bpliHETB$`93vX(U_IoD&#<j@CsoI%K9op<d6w3U)3G~7)v*Fn$EFqK*
z@1>jt!HHk5LM@q6*YzfzqBAk>RNL~~vZ+=%wbo8D*B7s1)95cql!Wr8VbP8Ryu>`4
z&aSGwDNXG$19sK2$e(Ja3#~Mduj4awr}xVv4O69j#pOn{XzB{fr$~QYH=5-NDA&Ty
zQ=ZtnBARh|fq}3#%uPxHy*=DF5HE;bOZt^AkUI+3O#5^jCq{Qn@wM}Pkm5NMpr^vP
zDoWaKfqL}Vo^>}ASrC@hc_!R1pm|ugVdQx<R=>4AxGRz7HvY!wRKLO}yYL@0n|YLm
z84a^XUY*OE=n#~l>4*~(0DFV<UL2HfQNetITJH<~)fenWYJ-=Cv^hZmZ^5=mSASRd
z({6k0vw`fHj5*EP!@e|=!&l$U%Wi&%e<%m>XSl9m335TrMAIVRk5{zQC?~hXO=vwL
zh5awYJ<8BK(da+8Wt40qH=7Q$#4)G0P)OtsF$;AL(--Lo=~P8A97vDK%tEAiprou)
z9SD|~M_X*?3P#uorR8TF8N6bbANKrMWKODTsyARRQTZ=Gt8DX?CDiwVukP{4isuN2
zNS2sH*OIYYL8;<w+3#RotkK^%+d|$T!b8Fzn%vp%m^K8;t3!eS(cAiR!b+IFX}91T
zda?Q-{ROu$k(;Zl3w^<|i7<)&jq5+ln#~UneJC0IAI6lL5BXImf*71Q#gJ7ip(H4{
z9VWxZ@m6K)UcpmfZ^1amWf?aGMUw#RSn@)Gf>TgueF>4Y26;B<<xBPH05oq?j+LzN
zFRVP>kpb=0Ys6pl^*({~9>|dP3Fv&HUbzU>_{TCZHO9{}zXA#}zl7O7!fq)&>|i|g
z4Xa6#&qn`^!?Z-+pvUhC!t2+*q3pBZxId=+0RQ(Fl^?mnY7+_wsOq1K*#Fi4=Rbh>
zKloR~@t+UY|MuO`i1Jfb$NR}8zv}7Gp+v2Wf`dvD7g{Xl84+{0p{kS8<^qvPuiHFo
zg(vOU-_gc5O6rhpEjU;@!?o=t-wU$YGmRRWeiO>E<ttr06ZwJvB7Eg#r=la9)>?gb
za>Vc3eew3IbCP>MKgQ69s7L)-SVJ{7qN1c;s~UjR4k;QyR2Sxd9^dhWm$nypMwsa6
zLc>*Zhzs?)b3hAY+X|C6Zzn)3cUM5*c#}jEXXObFW8(=dYJ5u$l|N*`j@|#qvJIaa
zKX`!aL;bpCLep2dC4+CeYX|$MVW6<rvGfrgNTSYFx>ZHfPm<PGzlDdFeJ$1T(R%JJ
zIS9thR&kITXhZWB&$m@=jdxjvdL2x_1Nl~?F@jlYGlMlX^XuJgZo}H<oMCO&wjgO6
zs&8`C$$jkd(G(|KuwCzQC^9epm~z*5CCF;t$L6eJ&z&(j%+i8)%bB+DB5P~-fCZ_r
z|FAX6Lz=}3*>Rs}X$sj%SuMoGnxszce?B=7-IU6THCmVJUY<CO5h}GZH?2BDtqC+j
z){bA8R#lj&%qh)Sx9of#c!FFoouAeMKVMn10BMG%JWEVwedCHf%A?-^vD#%@#SV+L
zssK4u2wFO*STLJl)2U$3$a%~~C<27^M;*9j#OTTS%KwQp>cmmfEzNP`V&4!ADA|v=
zMk+w@wsn*HqlmWLeTIz%!Fhd0$P_}Y8@frs*c7N0hmsbxaEp)4*VZ@|YCR)^=d8oR
z0RDkFLv~nDAAv$*w9Y|nzxtPBk(W!eicqP5eo>dXVnk=kT3$N@KL8vJ=m@4r7cl<(
zyea0veU>0cV&}<G5?iM}B0@Lu%2)zs*;6Y}-6(uy*@{{OFSoipAO&8A##6MX2|i23
zQM5-p3yBW|qBU{ewpjW|I_cj2Xt+SemAY?=N7E1)ap$)b(SPoi^78M|I|V27=<fGk
zz5|5=VE*OA!viEQ@-N*H&%m<A`4s=k{*Eu5Ka90P)#0-5)QKyxNut7?bRLCyYzrhf
zbBAVtox<#xs83Flmj>ZC1wV5K=SONM%3{!J#hFZx&7yO6i0&vUl6&~&X;an5!Fnq+
z$_RL+@*UBTsL#821*d1E=JJiuXJX~@jZw!U!0Q{ZbLkFU9d8lxJb%UWo%P=R!|0e1
zZxy03Ua<el?O<m|VT>lUX)E7aCm*af?Y0tpviMvv<gVT6mSv$nN^sj9%YOh5?zeDf
z)?IPP;GP6IBT^;#=Yg>(+6GNJEA9EWr_Lh6lYNevk;{Z8`-x&qoS?w<b$hDK2=|)w
z_>L=kR#9jolUMXNuFX5m&OydwERY*^>$=&~q_uWv-`Oko&dMF4&e1kwsgv-Oni8x>
zFxjrlc-8mvo%~s6OwngOJtq=H^y9#w!%iwlLBkLmDYMHX-f)w#A!KiV_Z_Y|$b1B@
zV$oD}q7}&gogNFw08<@$0OyT;+3lW74H3mOlsL8S&A$ES(<0gqHTRLGZ71!TeG9Bf
zv~+iFrLdXd*}(8BMxCx-yDTw{j_uQR=It6lFSz)p_-G<huv;${hcoeM5ay<CLT!hO
zZ(OQZrE%und!84d*%o!^BUZb+C4@Zg!1q(R<qsmO%FF*wt$w!Xh)|QUclhvLhR`5I
z#a=}mwm&*!EEOx$-zrtt023F+q)_KUAZto0zM}0#c1c8y;#3RQE%i$G0ec@UsV0hV
z*OA`p^JJ0Qa-gf@&|!<cfEg2e{wD#m=e}dfXx$^oqO(w__}Y@dXH#{i_YCc~At2H*
zxu^@9#st^{;^<6oCYj60u8)YJz$!S*)kI2s1V<i3zA)k<U2Hp47_bsSx_^6YirREX
z6n`(s%t~uwJWN{-p~M^Dau!3RIdz=2SrX6B2lbX}+?8F#!T(k*d{rnI-gLRNhu-S4
zbHuX+^LF$ARu2O03-bm+M+M6l_y%i@gcp?wp}6P(v&cO=)Ynw%0n`HT5~MJb9khXk
zC$Pt1N^6N9+`7d8j{Oysz9dN1V<08G4Rqdftx|`>EBuOGo-R_$7dL~P2t=H1bKo6K
zqd3EAM6SgcRt0OW$YRqO5}ZW3A()u>j*VSdgkQD(peQt?wKZb*TCDS1{MoiFcB=uj
zMq_lAQ$#loerav&31`2HxPo%6IA3-Ek`UFpwl6pXW`nn8<{#(9WXB#(k!3C;-fTS6
zESI@*3uY>UQ<^q%j6lJ&SVs0aA1X$~-7+8g;-1r)ICGT0`%(RrgFdj4lujDxLUBx~
z;f8P{OR03ufsY3Lglj8NE|`KcAVP?V+5ds8rBJkMMqXZ=?T??0x)eA8WJ|gLP3$zb
z17!LpM*$+s@JCL#{F!WO`^-x2>QOp4OX>9X8+Zyu@CghA|M^onJ!_6<KO`mOtBiMI
zl<X;V3<%xhh}ZRWzL|w9+*~1rSR_ia6kqh;hcQ%(kui}dOdYFor}S>2RIqPL6>lfy
z6}jmc)s}j*(Y^SH?Gw`nXZC<|>Llu$t~&<Yuo8gm_NSFH67=BLsWhh>aMb$F2$;Mf
z3`<c$2UC_ugTLl{pzKuM?^^2-yeWUw&Ra$6kbo<|P)_}=;B_PHhXV$}p)f~a+#V(w
z{?O<xYUds6f;$Ac=Hme^o8FLZcL48^QGy~qhGM^;!e6p>Lzw4?3Jywahqmy&c#UYU
zzm8ipD+l!&aUXxtx8$D@!%^{3MRAcJZ=q`3CAOl%h8=L9&iBw{DB{phJTX9wQHU;T
z;(y9ArLQ;BcQ7(eHyeVS%q$iyH-6=Vi(8D!m2Y{ec3=pmyV{8Q^9beEj^vom%UQ+?
zXO&e=P-9jyJ{z;$)&GiH#2_%8mO&Z?B#`oM6FWXiCn{<ZEj-F8y=z|<QY2NE@-;;7
zND1Fj-?_+qFrZV8)gv&FoU0anW_gDk*Lppz>uKTLjow7^!g!)RQjLK;sY{S5h^nc*
zTuX=7%TlkgT<rc@{A==;2Qy-ysN9Ii{ahWeN9}d_9Bhh0eu~%r_J5)z(Oj?z5KJJT
zYi1xI>i-w-@E?>UY;Ezs2tqCBe*?M8{HLbOHzxP~jK2s88EJ*o0{sPr1QLG{#)C@|
zhKgd6Gb9C!%cO@4m(*BQwYIdl3TjumMyPe=vxykBM0sIocWrK{{o87)(vm*@$on8S
z06_`geffAj^LokoH->wf>oxO}?L{Hq_q8`9(yZa)0iaBCZ%>|t)BAHPgqSxJ<v2DR
z1+Qm#+X12OK6X@s&-~hrt2dIuX|9Uht;1I-qqkqs(5)1%D#WR;b&AtH5fo>MrJ6L<
z(!H_A_0OcEh1fbr;k=|}=LjAj#y)5u2LIk6VqEs#Hg!}ii6Q>qhu3yrEr{K#T)}yJ
z(1u)}^2Rl(ZNe*yr&am|A&G(6qinoO<~i%pDalP*zyAoqW0Tws8<(|X@+f}Xi>EpJ
z1snHyjyhp$8Y!p<zq)tyhzWmZl<6(pi!u!5J|lVdYk}*}y`bhBQGt&2Eg88!HBF~X
z?=D%K8_p+w62prEjlRKs3WUDN{W5-MeBCdHRf&rpd|v|ntjX28r;bnGZ+{*>^V=}T
z@mYf3uVNm4e~+?czRNYvjL7}AhVu4??%Or-+r~ulSYMU#&rh`BznjAfy|Z|1+20U|
z^hgL+$D;ER@ZPG&-m=H@*gIDDxOttKR^S(O?_UGGcdqumUZcqU#`hQ|>#-4aXY><B
zw;>4b=9ZcFEbI3j>IPms09s-9susF=K}4zxo&Rn)Yl{Nkb?L|#k-r}in`f^e>lNmu
z5=SlJljV(Q{je_Mvwk4X9f1DbIk0;>SN0d9m+Hn2`c5&4;GgRk`x`Xk(|r;-zsc<)
zx!>l{?*7fhp_h=~$kEN@2jso`$`Jn%LB^-i+iS!)f67f>23(!HoJ1DfH@LEz@Pih)
zoiHmpZCn_4+9j1;I7E=EHD^*6H7YcjT3}hdIc!qZ?+pW%7%M2z%TnVPTke(fUA)$(
zJ%9f0VFK83w@84LbI5_rL-QZMIQ}`4G`EoFL!?`Z&wcZHo?i$lbyZIkmP14`C+E*$
z#4<@RZETl!H!o>nUkQIaq>0fZ!O|u#t)m#X(IQjrV*e)b^3H1vUYAFK72@qxa%0we
zAs3i~4W5+EVu!oBiIX_+?uz-*+Lk%Z;39XBqqb%hMzGBr7n9_W_=Oksa^_s$j|OV&
z1;f2FS8#Mnh1G2DBE>v&kpGgcKS$@r535cM<~NAy7hqlRXdGS;FX-~d(`qpvdtOT*
zY!$l96Aa28k>$R~Tv=gXX2MdeVO6=0F-|uP6aM=g9%E>jxrn8R`xnaDjny3OB=)a%
zR4adB2V=hxK{IG<q>O2q#@24k{yMK~HO{#SxDl_6(Vv9=9WlQ+S2AEzPpP3h>X<Qh
z5Lo!{l7U&!8h>Vg^m1f=)H-fVDMmUf+;RVp!DENfqvIhNeTMIr+nm_JY$opgzhJ8@
zF>#aX#ZE@Fk;|k)zgL{T!#m-2Wz0a6A{5#`|Jq+Yue&6OT>eT@C5TvS*r?~n#~66C
zAo)|?k!jU|i99sVPt_!HhKXJk<_U4=;z1svYLUC#?G=Y7LCc@g=-4sPXE-su1dKW6
z5dJv`X8=djQH0dh+6B6Uzv8V~U%-)gNKR2K!CL61@6E0}Kvig&SYRLKDj3HxPrkWJ
zgwO4u!N{$02hgbWn^5qRZERYMUB{!sFaBkWXlxMV2cIEqfFSLu1Y=QxB{ahuQ&imD
zGmbu9ug|4-L@Kn+_^gLRIa<}w>Mt=BOYzC1;lFy(^7kNOrQFoP?Jptf16mH2pt{w{
z@kS;Er??C43IEcsND9_|b0f2LLqMU7jB1%}=A|vT5`iOLV_K^?W#Ss{3uFWTB+P6u
zg)q9E9JyyAG*rvi_C}NHV{l&XR@_c5=-FJCBDqae2(o7e%Jaui0Rq^*SihLWD5){H
zI6^C&`-dFJ!;wxgSAY^MvjiYI?FFtvrI*}Sh+pH*LO>5q#lf+%j4U%6+<J7jMg?Do
za8dJ-5FYarCrW=0BcoDWMWvc<-IL{+5djQgMc5gtv}287gus!nw4&@Hn>!QE=GKnx
z8?Vn;k)O9;g~wAmk99zzG*oaG3us8@)61jV`j!U0Z^F;t2<`!6G=uh=<Oe#SwZlbr
z-DB{_Eb50ixub}RoYpty$MSCX^#<Z+=)mvpLWZr3O(O~(N+K9!8OiTefd_RJ9apS-
zEf(uDblC1b3jHT*xAB$#vvYX#wa>ohI(^vg!J%}V#2>%gT$U9bXuyn0i54+r@0qNS
z(D*JY)KQaKyCcIin~wwDL}NmQ6O-0sT?s!0wrR}Ln@ZpryrDO|js^V+pBUO+dqSxr
z6}xCe%QAZ+C|K@l?CmV*u=iJ}rKc++x-7AIDxj^71^a=l(^`M@`aZarK)-twA(%_p
zH4Ls|TSa|a>ojVT-fa^}K<zl9JhE?SAQii=TgJ#Mo7UYNyq*O5i(;(vl=QH5_Kq#D
z)M|ew6B3e9mcIroU|UziFAZ<xo!nydVH`3!pxzeG#-5uEX?YbYa78RpPBx@*MCr{E
zd2zjn`VU?142uGkre$NM;e6g11A3e?cxq;4spcb0k7Y&El5=UL+M1Pb2JbEf4o6?L
zQ!j%m@;k`41Zn2&l^nYf&r%K?#U~PXW^@>$re&z3Hc!|ob4twRI|8mnyT4AfoWyhT
zH@N2_-L)zUooCQaGobI0e9CiU0tZ-uKu~NB;RpqdyTmV`Zcd4BK$Ko8>ZN%hyQ2Pg
zCY&39X>Abdx1mI=b)5=mI6j>PI=qHuYo<|oM=F>0G|Q5=G+HIG3TrC08d)l}s>wgd
z$gzBTKc;kC8BKL_B+;@YRDhDKV_V_0M9kWXW7$xWTJA)rT9B%B&ckQx5W6<uWjRm$
z9r&7)+@X`OW0G3Aq;{)e*-_cJ1k<vhbP7vl<<$)~+kW)!BsQ|7qKgtU46GRu6QrN2
zRZ>Z(T4YJrqP2`=8B?-SX$f@+h>la+JD+)l4w!Sri#0B)Hms*$-VBCYjwm@__LR0E
zOeRWNc{Qj(*bPu9oHRqc<^`%3WE-ST3AJogY1$ZTe)UtVB_e4n)OsK*)~Q<J@B+49
z#b`@t;yR$9z%FA<2(iV^y;>k~!(%jvfd~%vf2EH}PJ5lO-o32!k>qwYKepa2>1~wr
zj;wlqMbXXIIoUj+fekD*f(phH0sspVWI#YynW>A~Cu(&RN)4sQ(=8Wj(Y0u5Uld;5
z9SqtuEP$3kN>94#G+s6m|Ju9PCS9}-E7@2IxfKbsF<4)~OQUtwu#fbiFhK~WV^!%Y
zX|1$L-Kcd9SEM)0)Fqf_%ewj7F4zri9*jzIqtZottlkCJvZ(|dLfkT<eg-Ns>V5XQ
zn-gu-eI7e<SrGfRR!#5s_w*>OS(DbJN>`>O-q~>%OGGrm*l<YHg*scCA-G&Q*!hh~
zrqP_)0PzCe#j+px@dF_|3-01^biu&w7rk=Wes_|*7KvG5Oa;luB}?8Q<egZU8n4cL
zUib(Eo|gQ|23i!pJwnKXn7Y@&7`<vuvLhkgdQg&vfnuq2#5lz+bIlzjBd?>i+?*T-
z=~3@vQxV@iJ;Q`EKN}M$LFGfCB7SEGJ6<Q^up(1FNRm}~vvi)sdjly=rcAaYJ=QrL
z-IRCsdgdEug2||1=X0VJta?FjC}ZFZ9Y~{w!MwR$;Dp!^yE;C{t5Mq&%Csb%OU$xt
z%~K0^RaPa=M%#+bMn`Rj2dMH}IXM9`4BbdhGuDh@HY`4SBhp&>v+)Klof2tC;COmy
z&^+F(+*Iy*|J%iR+tO8aSADIkla-It;+#)9clrYP<4=*S*zRo83)$DcyALBseZ|wL
zcqi#P%-1I`?|ZynYQ#Qk8`03K@+wQ&Av?<az-D`Q5-1!!71||~mMs?NJg3RsoW=0=
zah&>SU>V0_xNyFDZLtfSV|E#H5t<m4Wp=oh%_wMt0(DClD3lau+6;X)ZxCij>z$i3
z9QymRS^G6dk{U8us8MYDp2}Cb`hJS4b;8n4-~?gFdd8(AwF%hm_{wUx%#~U9;J7mp
zf9J&xbU2H+)O(5f-I$!uxzF*nn|BsBv1P1xLRJtj)|L@QHV6@itAvY<OGMyCw1Exa
zqn=T0hG5w>{EGM24iwrWo|Y>iZb}U1>fsTx-yb`xDFPr9DT5In5@d2yW{iXNR3phK
zmG48?MH?7oXFQf@pyxS-?>yFMpL%+UH>)Hc#sWX^Km#45FD9OWvV7AuI1VySiLL=L
zUv{=5OKH<luffDS=JhJd!#XM_W_N9D@k)q+1MUNx@FYdM5L${kw2<5fG4N)?ScPtg
z<R1HLl&+*s(bO2Gk~OFE>gxP8_xUjkZ{sjr0*MjBHEGUZ*3dzNH!}DWeFpxjVIfMT
zEdy@k<di@yM95eQw^3z*T!#8Y!G~%MWVsDTa$3AMg|x`4f}m}3?wz2VUq0%ed0_o7
z29*O0rx+>GqB@~dJeYErIdGFPLwl{}1WOpZjnh!f+Q_}_29+Ecj8j25_YUshIho55
zY0mNwT&)N$rFoc93v}En!e-BfO)%<t^mG`}3gQ!nPOLK3i*2$ENkGIdf_nVer~D}A
zt7h?ysuZy^_RI*gEcS`Rf2bZFGn%m2tfnb@E?8fVhR(-QYs)`54zBw<w7U;3Swbpq
zC+mb9EQ)U-h!%+3cZfIQ0zj=tG%L9FF10L1Zh18M!@N3@&{rmy9PPdL&FTOCwE=1f
z>eFH^1lr<oUNg3wJh~t$XY`yYpd}w&6LdL+KUX-#Rkad|h+*PJD6hCdMYW2xAXrE_
z;!ck+O^Aw3+;aCgf&^*iG3S>6b>|DNbL@Z};_iB-yhCC|^?{dfBc~^Mv%;M;$QK7(
zxS!6k1F~lJF42tVFyT5H^Hj7MI<UKIeJ6V3wm^-W5#!gdr#Yo^i;_1L92qoU7Sj`f
zu{V7gVw^l4Vi#UN#<PfJFiB-D$(oRa$WtEYk46@}EF#<T<4T_wS(|6i%g$tXuu|x6
zq8s@#Jy___3*8_*Z&EmYuEZ@;-CPN#fji<(JZHrgxk2OjGGBnE)iqp*+#{)*U>iRw
z+NZS_<V$RJ8hy3qQq5kJJqYNmrCoB0y3o&RXZCfYTvm~{(QtQMYOgEjba{n!0oofU
zyz=kw7MfXYN-uTz=LGr3S3A@<k+e4xMb5o*5awv8HMP!8C5n6WWAxWWnNme*BkGyp
z>-FmVHRv=c=$Jd5d|He0Rt?E6HsE4<>IBOKrFY=#Wcp+bYf_Ex7Q6sg0-OFnBz4Nz
zoZu&itWAWc4w*2$5j&xLmP^9q6Ti|}ip4p2!E;B)m6f#i&dSJt1qn^jjrn9dO84Nv
zI(`5^5=>H1)4!S}Ax;2w*}j$o>I^nD%_7%(&(<TZs2V=aqR*w%>o_vCr<Wy-`%_m`
zCpB2<VO3sW2vWB$L5=0{E6OCt4+seN_?4=T7koKdPhPybPzdcDo~?!=Y$Mep<=D&>
zvG?Y5#HKl^_0hlg*1;s<-#a3HE8*k<TNd;zNhxl~Kw7}+BAn;My9B$=`8XpPT2ly_
zVDO6hdLlR9C_wteS-&H>EI6SS?<!z?p*N=y9jn4DS}n=`6heJLBJY;=R8x-7M>d5a
zXNybbihOhcHJ>?$`#3Yr7D>0K(;tU8W2w(|xnlun0FJ%>wMRA1XgB#Uj`6nr@3qH_
zx}(tVgapMnZ&L4+eQ|sP><b3HA+{%*0JYvI+f%PI1mAGJp}2XoH?K#|Z=&ttzG8-3
zSZDS=A^nrSdG%Kpu0cEHyoti1i9%quTzn<vGewOj1GXf5MYczm7WCbVGn~qR-ipFU
zx|Dp1C6mYE&cNISMn}l%O)x9JD10*TY1(Fd5|l&O`M}vf#m%__h=ky+UNA+b72A)j
zJ~;l1rBViyHz4`Xl~nZrmbKCHOo?6bN${`S5`wv=<Z11Wa9=2^u7;ln!000ibN$RI
zplpw&VS`|9c5Y$-+w1Jq+@pRFXZ{E7c!7b@2;;bh4Up!H>stWlxs$Jtg!5>%y4k#*
zzl-?@^39Ea%9=Anlqtx7{iq$}OL55%lkuakb>xg&6!D%Bcbd+`2JyvbJvS*TaG<^i
zjDVmY<zlkW3!2bdlXj1TbX2l&2M$j0IR9)B{z8HsIXtPXC&MyFUcCYy17E%JSE^e>
z9GF*rv7kd^2U0I-#A=h8_r&5o=$obu>rwn2#dx9<UP<F+@~~j#AF`!tu^m#t*pUtx
z<;KsC!aVOE$Us=+VRdD44D73DzXaZ(n+C$h>jc&S@P03nXlc(oRqJiT&Yg)?`u`xF
zv*nNr*7cq3lz93{wtdU}_{03jln{hn=<gxRI!WS@#Qi%JC7XMO_gtJ$r7-d^j0M$H
z)$3|lgDr>sox-cLKT$b0Gdtt9Er^kFY{pROjM3hs`-RFS)1)k+Of7JDUB(N-nu9RM
z`m8ixe~sGFocD}R?#2LApEt<7sJg!8_4#b^^2gFlotl(x;ih1QroTkJVS!(XiM9u#
zv_|~zO@AbJ{RrU-YUwJ{Tb&Z}<gY5!AwxdPQokXR?M6&bSUSlm$v?rDYQKj8nC4R2
zM!!u6%Ak)zof@9oPO;=1qi2RkAKS*<|D_we86bWd7Lz)ol3bla*y842yBdE>3;~I5
zO2;b68(9vg&{uotdnFk|t4FGlk3_&1-e$M%9QIM$7r<}TjnR8#!*NJ<quRmA9+~Yb
zpYdgA?D>Lewflqmn(slTzK2<>fqMVyL34H?dLQ|NmgFDG5)c}aEnBr-OrC#0K2t)+
z6WV&Oaev4ie$(?Bd!T*U26n-lRoT4UXik3A3vY4o_rv8VitPBtv-sKpSRKu*gtj9l
z`VcdoPR%$ifD?WM0k^Eff_2+Is_+o&GJFH&(i(~?EE<LMFPbYjqH-7ri9PTAMd4wC
z=4k+g`o8>&a$`Z>vpUHqqBZ3+e(<R&olw_;>fSxRP>WM~DdODlTBvm7Fd*%D-UM)6
z<n}b*bpt%>#X4IJ+}VwX-giK$%b9&cy^oV$MYNAy;m0`dC0J>H+J(5-9(c*;sjn@`
zC-mnFNEf#BLc!e_1JlR(%Rr0x=B3D!QMrL%R*UNuG=)=Qh=6hoSm6oZPY{!T&Fgbq
zPsXtDFmyoXn<#>?DUL7NQf><{vh=bdn3s6JPa=%9qZ;s9iI)=|7fUHrS2;N4wkspn
zs~IqSrJu<{HR`rl(+tG$(HZgB0BGELl;jJzQ&w2Qecs_ArP}pDe10Q;ooQO*>*geo
zhg`KSd0XrCb|jL|;sMvw7Et!Uo<43eCv$w}$`#Dvf!*PR?D(+uWOrOx=E<uSN9c=?
zP6(Pb_s5tk(vg!VEaM+ad8B11B)Bv5%t@;sj`G0Go7Ag4@~+(*?@ms;^Vo`hA#eWu
z&&%)>?ea9m+KKIEAA4uC#ha?`ME)Kd4@rGs7$W|TC-EhoM(teY4+#Cd&||tc)f?j?
zCLxUuk^`yYflvye2Z))|H;40B5Pa$VGLPQrCM83#9Lg`NM(P1F_LZbb6a(0xHf6fY
zJBsKn;Zvbgo+<9|V}w=IKXVMM3)*&zQbDjf;aJ5}9T4)zMq@QRSD42Z(Rn6>CcU2!
z%b4CPZ=>e8oisY(g=dU4NhQQU9Fl}ikN|YS26>QSzL2Z#7H6LAqc2vBuC454@iyQ<
zTYiU;I~dAa&wny$fzphV+mV81FwM^Fct0l(au^2OTMO*jgLTZSguV`m<Lop-(IeWR
zThN#-mGhpWIKHBy<-79P!8j>A{qwR}wkIX0ikF8>lW(A>R>a2bj2TyT0-t@#ZTOS*
zVT<EiUJCcU)M2F;kfgrwpS~q|{oafdGs9=!<76cL!5?usmV+JTe?|HOVbR!(ym^EJ
z`5H%ZP(4hV@@^;376I-W$GClOOaWp|=#9}B*0Npgbq;;v@Hh1dtOZk>GLBu5&u!IL
zHwxP|{Zf^0XA<brDBhz{>lx=#H-iI4&j^txiXvmi%e^M9VDTfIiK=yCEtzCn@yeg=
z_l}to2!?|uq#U^zrqpO773<s21+3F-79Rwj4tu~9kFL4_#}9LCr}*Rk``?TX<YAkH
zS4@!4I(PXH0sdWqx(+AGC~2e<P90c>A05?O{{ODGkTw0SN%hY=8V?Q#i0S{@N%*hW
ztpC|eu(NeGad-Z&hC+>+&3_cfzAGZXH8BbZ)#g@~O5?#yeCS$>p-O>?i8KN=TI`Zc
zpo6lu>(^s8*f+mn{RI08Tg$I35O;aX_{X1{dzx@iNqL1*a9v$ZuYIn2W_z5>eqKM)
zc7d}85fy`(YX5+6VuhLLD9pu3-i<}&R6xPNlsYI5h^May5t|MlCPq(s%|*tU=tS&Q
z2L9GTLCy;wmLJ|UgW(&+KzgJMJ`iTYB7;uQDd6DN8Z#!to!peuG*T^Pz}_IPTNE~<
zXSr%D-AswJu|UB(F}0p#v+Wc{9_+NClK^ZNkDS<aRYu^&kYc#6JEc$BL>(Q!Zkc~8
zwNe`FZ6L_DEM=Twu|bcbjOUz(vOISa70}Jn744}?g|bXak=Kmz0a!yAGshSjxE5th
zQ(6iy?5lW^w+b?1yK7jSVmWp4*=2(1)R_M6pu0y>P19W?NU!bw^`z$%-?%0&OD_`D
zYfZo43`Ug-hwm_jnml?d`TM-KYr)zDKj~sSpAIj^W~LdM#H3k6zA%U8e%Z0LRI0&t
zEggZXJrdL3@RfDeuBuyRT%KX+Ndu~_9y%^yHE;6nvhXl(3@~o$mK)iOFwQNUpJ>(n
z9rM&LRc&g%s457`0&RFdtaNsjK#dz&?9!Al>V%^+qoqgILox%NuGGvc&OXvkSm`r#
z?JtmOZ$HwcqsQc^mb1!%#AvD6dcm!nB*;aF(UUkHk$gvv^h(c4dqJG!9dOaxm`Xx3
z1F{&SXqStVYWo*7p9sq&l1aO5nyW-$Fytpf+JeR*sjKbaLPMB(2NnLhK2p>k`iq-T
zS`=5BI#Hs6BbdFsU<z{*-!1x0BZbrXXw0&@PBF@-AyVDRNJORD_%H1&E2OcGx<MiF
zkdi@>luNN8R%|48JDj?ZSrW18KD78|9)nDw?kktT%#wrfuxvaaZI8NlaFw}N@s7^6
z3NRoD?Zuu3)e>qJ>UhNFWFtP}hTK?3>c`xh#&}n$Bz7ghZWDEGO^{PymLQXs3a9HF
z!-B(|u}hnYdwAqD8vJ<h<ABe=1%cP)agD*jMieQ~r&qrXLZ&#l{#T8lwU*U|wKwNM
zHe?ou3Q3kLV>gp}{vKVkMu6??<)o_c_#<gW7F>i(NY`>9UyfQmUD{L>#m4pGmr%IK
znU;+w1zU$WF)1H~E%QP(^mH$IrffX`pYt=2Pa>It!&P(+kM)K{mU35a7SFfCvUKM?
zk&VYOu-Cow43>s<fliLN=L)$7)cgj?C;WsioH(?Na35+qc8>Bp*(+YH!3w?rEps04
zP&$$~=IGG9jL0wZ_HX3^k{s{fYqnr@ZycOsud@mWC4lhYZW+Fqe84TA&?8BN%1;5L
zxuhh&aPJ4IZ~+pBphg+{UOC{!i1U0BkK_h&Om1%;MF@7WeQOuVBOGwNBp**8Ect|D
zYbiRVRv?4t6U>Wz{U=*b1=`A-4%aKV=b)yh$Oo-%Td?uxY=W8#zvyE<TaOC|=fn5Y
ztR+BJY^ZmU$`MCGc!cr!0GEp?kUo3V!5EPAD4fg`N*1jQn|m7+nJ_uZB=J+`O`VFf
zAJMj$OiSG_1_)T{B4v}Hv4L{?0@oZ_=TGoDFgxHqVA*>jod#1!Lb?}r=FLmXfcF;r
zea9*Cz}kR)%L!<L#{ERwr@{LWM!bFZ!G-z}YntQ5rQ8W~ezAxEJL8hn-^H!~w<q`o
zYse!`3x<q^J?hP=pcpFQi9n%Mh&jwN^CA{e0B-D6^nZqS^l{O{1^-ix=lm17vHkzl
z-G3^#e@$EyTW8__#6<pA?VZ;0@K#<;;e(v?Y<vVV>i?rg228xU2GRtcHHW$GFMt_1
zZ3q=$cD9b5@Dz>C*vx8!(zsA8scybdDvRA&Oef9AAqOhCF~iD|Q?h8%I>RTMW>Lnn
zXhCsRJmt2N6ihqW`sbD8)3<Yz`z70H_G_l&RPSNrM&C0eJoUB&8C#w|;h;?EQzp_c
z>EKl9b5Qa}u1^KPpM8}7<GkQEUx)f`2l=H{u*-4g2cU!eS`qAXn){Kuv7spa+>um1
ziyRDon}UgRT43!w*dt!DPAXg*aD}8<aBgo8&XIF%pdNOoB2T^*p-)*My**^MkcCjr
zJ=m*|qd&Ef%bh`knYF)_$>5jXUq{F#neGs}L?35IOM0r4Pv4>)rh}PvT7b!|y9J@B
zNq$$OV4Fijy>zfISyEL};r1X}ZEsKQfnjh=j63D_U|Q|?r<-!QW7fMqK=aOuQg`e@
zg@>tgoEPie9;m|mq$lbUPF)4;u`S$5mPu;;bLOS#taegjs7p)LC%26^Ecyjj+#<P;
zPI0wA&D<!u&z;j8*G9cnX`eN?iQBy`iPt?e&fTHAuPb+cvJJC)T>NGD#w*k0tyQnR
z%0>MCek<sclsl(|A|~@9I_SwdQ2bt`$a@lHe%Fp~R(WeS<|KjEn^hs>uHlnpTjZN=
zA*lSYF=7+Y+H>)>nf3CH?Q@a!^804%V0WXpZMv_*!_KP7&PiIC2IV7T<RjXgr$7{C
zvqeIQAyL<$lME5kFRE{P7gNAax@dNHg}&HD#_1v6)^aO@$f(nWQnY_}Yax}`+Q!P-
zYI}2Qp`Cng3(UtAS{a0|%S5x05DE$`2H!5tz**FpA(K~Z2`5=zFDV~*sb;^nJXK;u
zz{k`1TossP<#^e18VTv(3c(VZ&d|9e!U>Mct~~&!He)p0Ypovp90UA!-H=?S&SAn#
z$wv=xQI&Mueo`XT3qTCHKHVCtx~9XyYVib*&I>fl2ghW=_bUG+#(bXs<|OvZlQBT@
zxqK(>HY~lerfyt>17x~zz?c>yLyykjF*s;Ptm$4HrIXCco3YSp-uT9o4m+|x5%zx8
zc34ViJRDyu3pL8f&_pn}s?xZIo|oFxT-F@ge=`9Pd!{AfoRiv;_}hu~4_FISDecsc
ziRy~ICsJuJJ@0&~f%Z`a*oX}uNido$TyT-6bYq!tTuTIc>HL6Y7dosoKbyI<b6COF
zfz`GtR37X;o!HW4=)yJLtn1BoE|GOhyuqW^mO-an6d5*r3Q59KF{kHO$DcsKF69T4
z517QEBkOC}^PULCL7@63YFu~<H6vx%WhZX9lRN2S>jf_(F6-i<TQuz?Mc|ufdmgx|
z-(p1&&s)e!xh*d0r%+qGzI}daw+Hy8SenL6JuD#3UJXaK!V*|GF3RYFQS+H_uG)Dl
zuprw@w3(N85sfqT=y&5VL(n3!>TxS>EoIK6LkaQtD8jMWiYpq<gUrVQfg@{zbRcBx
z{Wbnid%RiAdMLUaS!g>^4@jrP$*>@0&^}c=M*e)iVksWW!>gZFVkud}G=B3YX1LQW
zPJ|i$*xO<MFrZBw<zUL_({DN7h&)kfhu>;SG#;dL9EJ`CR_7CC2k3JZmaWT---EZ8
zVMU0VN(31cqAzRFL8a^gJ>0|M3X#osC@fdM>YLWowQy%ZVTBc5z#rnXNDLjHmd@ry
z4=kuyd`*VtfPozT`5S%_lMv2E;cLcRO+l>DU%glEu`+_w?o{82Kkp^-86Nszk{J%3
z?$8U==!P$Zwo_IgdYisMYmFbvQ_Y_~&X;8yya3}y*e7}(I~h4Bc%iY2`;Y-J@Emj^
z30B!rfU-9PSbzp&hbhtj4vb=bIU6X(l~9)_*P*mev>eFCkDJwQHb+v9F$-QNIdV7~
zk21MOl{e|_W|mc^V4YsIo(i4t_Bd_t6;<bCK$^K)o%XooNJDQ{0i)|d5FCTWs|rj|
z={>Y&OKiyR{Y^`;myCO$H7{9$sGw5<c}A3SUG$;qNzkKTV)W=Q6}R9JB24Y0-Iypj
zbgz`cedZ9-rE-13_}f`V)kllihelXHGyYqTW~oI`xlU_w-}0$<$Ntj3##l>c$Eq1P
zTjuyH8cK&ZGF+uFSdWXn(*@O%R8ccISCR725wg7H8k8u7WUFXF(o-x_JwwU~xO9QU
z8SEvv>aZq4gJYwGJg02>MLRMy?$46VM7i4q<dEQ9RpyfDxob+1wM4+2>WN%U4SCJo
z6>~UQuva3lI;1LJKU2sdwGL0jqwH~zw?xTlhy3x7x5&O*)h9k*<-OONUdqgzX3Fm9
zjrtp?HvtI7f;I^{vmFo2ZrweTufRT}s&6>KNCCST#ALIrw$4gX2jY1HhuD@_Sr<)&
z=Cx|ng(b#vr|isiK2idhCe)yV+eGDc?9g@mP6ZiBgjNc{J}JTn&l%%Pwap2b+n~QB
z0`?2wnzm+}w^VNLmXbjIE$A~{>Yx6dTDKTDg?`eOu6IR1E6nC(w9e=Nx%7DGp-ZtT
z8C6K-`xk$JwkX0V_5<mn5!(asAjR8IV}Bgwr(z2K2!Z-53U%II(rN)fDTKe`Uiq_m
zFi+`i@e?jqE`@(6PxVzZCGT){A6NCwgRkWNl+60oZcVfJ9>Z8ZHAAwPdz~g6Db{xK
z>tM~$tx&s7=kIm}(NT!y_IE8M2jQgFpL>@mFJeUm$V1y26s!{N+=-F%?YmnfJh?m3
z??jFTSdRBCYQKy=;-0!nZ>{24f&gi1I|-J8QGi3r?5$=>ulTO|8*WeKeUxGH!&cpW
z$OXtMGw`Y?^e8Bl8hCCub>H70iytT)y`K}>pK_cB=Gv#wW5-Gp*?XueK0Dk*LnsnG
zx(vXx(3QOk!ht<&)Z@vC#+1JUX%CX!4SsIJM5g^jclG`1E1EGXco`V7LS82V4ZUgS
z$*1{a*)-eFPd0o(W&9=wEtjinjD}yz7^3~4XF6wHyjvVm0Bh125%AQ9n#r+&hZ1>5
zb;eDtOj`_eLyae~D<%NbR2|askdCPcaiycG-z-aa^`+N1jI=MD_65ul@c^Q)SVc~u
znhEai{!^uk+ASb%WwT0LIB^T1tQ_~Z^HYpT>L~w)nfPFRs3TpjVN5qMJC12%NKp<%
zD}5gIeKp!LqRF3QtYZ(NQ_gZF6}tG9lnG8PBvj?qFA%et89!<9)+YE4czXGa(eDlt
z9e{H29^3d7`I8v*QjDqVbn{M=L9_>W6&F+L$Bau^+1^WKU`aSjx*u;q6heSMC#B^P
z98=1l$P_oG?F2~ht}urR+$#&mcZpqpFPbhxturiq1Xo0Y2^`i)VNY>ns5ZO>L;J=`
z9_-VeP?+?nX*1c<tdT*7p(ou+!a~woip(htvQmiIT5m@f$<-qU<h`ZBC%`|)hBydX
z$a{#W3)8+o#ZfNTIIG!;N>C)^h#OEy890?}Ua*FQi>`5fzmSY{Jr@hLo-nUj8rj)a
zS%I-fl=EDJQMRpyd_2n~<IFk!X&*OPA1K=mtjic~Gx;v%GJC|=XZ^F9%S}TMBeW=4
zAARh#^zgu~n#6}0_iBfygM9zi%=n}<5hO2SYIFIF!Rep4f@e~_jU3a?7XLaHCNEQl
zeFGJ5L${m%Kt9Q$m=V-oy5vCaM7J9BWI|?-1M=P>EZYv9m+Lxce==(ZH{xvah_M>u
zw0Q_l*8yIk+5L^Fa21WktDcmX3$h6J)570y>`@j(taU5EOzXGX5_XuCz_q6#MmcQ4
zZ`=zhH{Hev?RIj=sQmA!5$C?YecM|Xr*s2KV~H3EcHKi+lykMYxY~3h-ihub$3e^1
zHOXTpjfFnvcIxGbb1t+8IWcQwQH5BkBG3?(q7=;YkTT+Xp!%EW@x$>w>`bEc$Cf(j
z1>|wp>iMPw>)tigHG`)j<H^UF{~YT++w_^BIE?ub@$d!eb^JyA%-!@eC?AIcLn7K6
ziiAdi<^0ZUd`Eu5#PLyi9G3hHh4SiT)YWrACs9&@pfoO@z94R4RDetdy&%=@nR-*y
zF?S}Thr+5ghfgj~zz2jMN9>=XqFBB(IU+tVL*${da&l>**Edr^_jg0{OtrM~Q*og$
zb(cWWta0%akUDV;xdLQL(4jSq0S;?t2$$^mG5T!s&H^&tTJk3<T1KLeinUTlrD^I)
z$_g4h(jF`SQ+})|ld}nC=M<iS=f<TEb<3@M)`R>4%gVtkw?;jvQJt!8K9y<^YYX0!
z2X$qpM{c`i<rPm#9@!?L?9l6DW2A2;Q&XO4>a>tiDywNx00G1)+}xIfbHWy8pT%>7
zZ>PseLzA;K|D|id$;)Ek`$6P^Z)eNY(3gs)U0uC8YLRNoA!^6P)i0%1dw<K3d8$lf
zmmaidU@a99Rc4zzkIF2~jTc-_Q+#gGXR2o7ke<rQE#H9DoHB>_`cJqOG?3O<r4~(s
zGOct)S{HU%VXRT9EDPdz9`N=&VCy>2R`x(gSo<E7_Fh;gVSyXqKQB=Bo~Tg07_UB<
zSBC<3M&>yp7n=T;xUCmfKvPwiNEdE~1PDOBc?GkBGcScNw7g*}M*eF}kDMrQbtcaz
zElWER7aq+c88yo0Xc=^ONI1?NS$y*PKu|M+Hw+7D{i4}_5YY4k99=_tr(VnE)S-se
zwNbfveebbx+H(q;s3_JTV#(<tEJV4doVD|{cnT<Ch2({=SUd<-0(Uq)kt}oNJtEl_
z@*Fl{Npbk%k)KFEMZUS$?gh1|3eUA6DuC-XO)kndC)HjSyWY=YJ+o>+uGch_!AD|!
zMo&P9L7O2ePu&r4b9;efdLw>4LTcQ%?akVyYpD&e#wRkJIC?^tB3)UU$Lo!3oXj~}
z&(P#IJ$480npmeC&*CVNXD~|e%{Ngg_#|Z3ajk$kq4dJ~@{8!ab<s}BjA@i&W<7Rx
z1nA2@9IwYyWa14koI{w?HEx{)YmG9_d6vowu+uM7l;gsaf~8p)NlycTr_1SjJfO<L
zK^?KHYKQwJOvukg(vDgMWNBKnv&b}+T+U4Ot6`bTe(Yi?S8!=TJZ?@Ms~BWOe4`!5
zl_&0yGJIj~`awkI^*i&Ih`n>xl<5Qg^i8F=A#2~lr}siN@i6`n1YS4-3CGiZ#+AY)
zK!l1u(KUnRDVM+6P^$yrL%soYnN(R9m7{I^Gu8v3rs-sst*LV(9;w|mw`65^I;I;x
z>T7xW%2Z#WnSh#V%H3=wEfrewZ#+I0A8sF=TdgG+zN<P2@>cy~ysUQcgUF2wn>tmx
zZUJtSu8j<8>$*H*jKGYmr^o|o%uO;PFU?R?Y{M|OI1=wpSXdmsWqReSG)p<*?Qv(8
z>9D1@`mUua2gWPmo;c^GR&+H)01}2W^yc`mT;>#g<)4(3d-uwH=;myS1xtGH8FEF~
zw1NDjf)&*BlY82<^*o1+uZp2kFvx|xxje&T`i-oOI}90*RvLveOeC=aer3Qc+Iy^!
zACbXHS0C9i3=r!A`@=66fB9dK`cT<@;=A(T;RHach2jF%D!dY28&#DIbK9WJZ4z7i
zkEJ1B#Vt$SQg{?&icT3^^aXg$8-f;L)vkaALYL;WGd7I!hs2JS<M7)LrB>#<nFn)h
zFAFY)!u9U-69+uidt|M@(iT_@MXKG|*l(@THl{-~z5;&G2#Xd}3g7CbU69PrFzITI
zbG#AO+^FvtZ5GH<)$7L-v>l*P2q|0uJAj?CK%PluSoHxdnso4CHC!=pGQWs7mYBt)
z_}j}PB9)XKH77Ot*l{Tvyrw@xsUPYW(_0=O??DD%eeK9k@D`IV!)dXBC&?h0z{K0P
z1xwArGbN@^LihCVeAi~eLh!8O9pP2j?5<Pnr5Uqy+u$wstp+5+I*6uSCbD0&_b7H8
zA<s+xPWVQBngYW)DBy<ZB9B3}PG*@#>B+ar`IF$k)9LW2Hy}G;KtK_&|F0aa|3#<&
zcUp&3;=iw@O+5ZL%dYLTsP-?D^-7}^2@DQQ2!xHbBsM$$pA|FoDk(V<NAVtq&0q;-
ztRbxR_!x7@&MVuMFa@GbSh^g`;+ZnRdlA=m3Tx~kM^J_<&rTE9>E*RsZ&TO8?$75p
zq`#AUMn89YEJ+j&^wgi<u^i1{wg>1|oaj!pw$iV(f`@oJe(FMez*vj4=o+XRv{J7;
z0iZlSK>GC>h7Gj)dLW?bvHN^+h=%TChiOba2a0A5#08Y~ZI<nxJ1uxc_Ucuzq)u^9
zW}RB|HHn#Sc#G7sP{BB4%6rx`u2PkHi%}-RbQe{vy2=+~4g2ZWuq_Jg*;u2tORQGY
zu9clu8|a!N6DKT26Rj&?a&Angsx1<e=hpRcHd~EGfU1p3|2eS2G?_`~Iw;Ds9Uk+d
zD3AmyujOBW*^E?&E=66YY;d0v!&~61LOsnTyrjIK@o??mZ@0N|kro=)sE{2d-M`}5
zCYiG<BK7KUn$0|Mpknpt&ujG>q````TIgDa#>eE!oX13-J?6`qq+Ot^kfDORPu7;#
z(e;;e<34@}H<X#TY5-@C=5;S<Y`MQ5VHBM>wo`c?XyRfDxtTg;+RW`{tNx4^>AF!t
zKGf=YGncIfZic5kd9L^M*?X!SM%RDopmK<`cN&`$+p%XCIt&?q2;-e{4?!?5Xe22l
z8XPnPQ!rDPPe|++gGq<RWJW8ytB*kKBspN;NV`)rzU@{V_ZKna{t`a4x%P&1`5~l%
zZ)&vUuv{r}T<}ZWFB~|?h_1WU5%5WRvlh_F3c{f6O}&G!g~2^5reCA9{~w&aQ<SDb
zkS$ua(Pi6Km#r?_wr$(C|FUgcUAAo-UAQ%8=B{(+Je@mh<>R-$m&}aZ5wT-$XNub>
zD(b6A>#u47y+}|0ms`9o@d*Znn>R146?+|hzY}m++{cnXT1&@TXR_tPjWey9VcYhT
z)C!iGma!%qsvTd$S5;S;;Uu(WRS`$4sU2?go%X!FjD8%_S9Ef4&&i8)ddV6j-I(&o
z@=x(0eadzwo#+57zP$Be@osC?3(J1C_H9m6gV6H>?G-~GB|Fs}=GF*xm{T^Pj&&QC
zQyE8^Su3_dz=+Q2JWK~`*rwLfAXuLA!uB?`>2jlB93UDHn|!rgzJ1zK*fDnZ=8S0W
znn|YZ)v~ldvcQe1ZzgMv73VhG^Y6=Gq-}%wsh8777YLcO9V0RIko`1f5xP4elPE?Z
zxGgC``LsNi@zXzP!U~G)fksFsm;*_{S9k{kAY}@g$lMjM%Fsj=3^#6c7`S~J9-eiI
z(aK2Zh>7AP5wU?;_>_Eubnpsv%L+lr6GIei&+dOm<RdohfKLFfUxW046cZscl$Knu
zBJr^qiEV<oAB$now}Xxw&Gt;w@`nD3i6POTmm(IsQ`!}{c-FAnp8N?9Y#tzYYoY(4
z)E8f3>?@*Y%=>`xOeJ$UKKl*%7_!3RN6gm>?QzETtmr?6`I*&pf(|GW^I;RStcv&|
zf38hxtb^PIBaLi^liVCsa3B~R291sgYa_%rP&~D<wxnZc(;fm7<d&^WGp7%p#J{a;
zibM1OR|DOQY8o<=ve^d`l%~e73W|h`fDCjhix|A1<4Y9bAHsePM@a-k3mUXIBw0EL
z*G3SIC?W}O4FL^vCi1Kg<rA1Qdi-6XT9<cRU7<R>BbRm74Osdm20swYcs5RzX<$w?
zk?iqff5g{jCf~#U%JAP~Xy32Q;?f^Iqzxn>AjSXxrE${#RUrRgV)#G7RK~>FK*`z8
z@xO}a{}oNutekMn(0#X4s&po^R>x#Bn=Cp4npPe0$cXgoF0`vCTcWLn@hC{OSLQV}
z#n)9-X}IB&QyqJiJOjWe<)w6_^fmL@i?z*O!T9|*)OKn9_T9!HoLtr<r&PC%!x{bS
z&U4Cr$bQIm`|MQndzJ>W8$dj0!?;Pi3*aO@&A}W3;qb79=`yu%99sZHE8;q&!=$$L
z_Q_}nhImEcbi%4=8v~0A$^=yAPrs*`R1HZElG)<R+y7{bzkP(ce-Y-r)<uV*@%+6f
zP`H)AB^VxUd*dYd7aS-@{hAyoH+qNVtvYgI;w?CW0si}(ZLk~~?l|?)gjdCF%8}Ym
z>#gGeGc{{or_=CQiK2_%s$Fc(UJTqbT3Tg+38oacGjaKWOjsZrL;kO3hxJNypui>H
z0`pcONaH8h235%;HGy=>W1x+i5}DJQK1)=&BzCa{<M{@|g(}lh9jYdF>s3R6Rw1Yv
z7{H=a0HV{W#N_WH4Q!~BrXJ;j6tUUK(>>M}lLtPn*jA$C*VUc=J&IN~ySC4CGaiW!
z`?w4+o7GQClm4PPQ?S%blaXarQP=IML5Fnlr$PJH#%myV_f@@!YM|h%x|^GIASB>A
z6=|>ntzm~_2-b3T)X<$sbOH-r(%A~H26FeexJ$uBW}{-<J-O-pEy^V_OQ0w(W0U>l
z{0(-&lWkYZ6)v%AuB_FxEklNhoGR9F&i>@H3hSJpt+*>9qj{~uK%phRrQ3jcGtr2?
zqkKVTs4)5NuwSz99CL`5yZzr+$8c9RlNI?MuYpEOrPns}B^<K7PUs)kLoO036CGhU
z*>xIKSBLEiFRlvKR4~`zp+O}4eWmB@i*!|m{Mx-5w`n~x)CqvanaM)~(ZS@SblJ&e
zM-qo@#vjX_P-U(@Y-MWPIB2*{Ru9_Swn)&Q9%#5QHG<OvSg!INx@}J+4M?jE7>6#$
zB)A?YNqlDO={B|pu^3Brmyz;VrYVD^HfU3q%%vsl2xyDSto0=3oU-=tO559S@V{9;
z!o!{RvLi~=JrO?!nC!V|oG6Nwoiz&O9hp{E;DrW6MY+*B?6_VcsHZcuS-F3<55#EB
z*rpn)iMA0VZs$79tZAIatJ<BfNw)^UCln_72B<bNSRWg-q|%~4z|rLnxSbbTC#ca;
z$I3XToZ)7yTS^p;X_L_RYXYK@L}=twquoy|jX-iV<-bSLlg*U>HqLXYu5XaXo#q<j
zj*_D$M^<IjcBvK+D;y9bWB$%JiTWi23z@!o<G5esvLcs_9K*x(I1o;Lpn+cLE<5~k
z%cJ$pr?WxMILsp0M!9;{r@lK9d6v5VZ~;5nR<h5-%<M~e>qqC8OvF*<DFdTuGu+S`
zNxTtUFS&9}_l(d>&eLsKqE3r8FP|@$hU+z4n~!1l+}vp)MsD*@XnGk|t1IkN<GLeD
z?LAl}ol(11+pL_<^I-Eb_(dpjOVi^=vX0seBM4BXR!)x5mT6aGn+2;YuOwlQh_!9l
zDY|tzfT9vkxAbH^Bvw;>ilBDl+P8XS8e`E`mDm^qBUn4|h2wRIzJ<GlHk0t61uWe%
zmuQ9Ri>M?w39+T&X&N-Jo!-hv@<rXN9L%eAJHS4iUeZEpMbhkiV0YIcP-r*mB^V$C
z2JNeP>gAfPhjC8kt6!$pZMR9n>c<k3id;;fZ|5ivS1nAc7vN0a5IZIvPF{YtOa2U?
z`2J<-1^`7;Ba9lIrNJMizXQ8QQ)Bw$C8>wH9jVZVvlYf9G+0>3DTeLtj7tHv#TirB
zls3YbEfOjgeJr@X_aL6cFX#+^uhFXpn9)BKKlKK`bGkT{DzjSsby;QL1;{G3I@K7w
z&=nH`DU5M9M~#-?ffcPnSwCSU&wYun=t%)Oqq^zBB=JX;p+tm~kA)1)9dPwq^m1(m
z|Fcsa-~sG#N9-C-aFFg!1$OKX^#^S}7MR$yA)*93p9@XBBk+?5id&PAm6oCqttaN#
zPxja^K=95A7p)gMgMV>in2-)%jJ+-wkc;RNpRn3&N<ZYT@WEbGOfpN%7}DB`9NSYT
z`1)cK;$+OV51n?(S}NnlT<?U}iu_cseW>}ENkO>qE!YWi^U?JIdUTXGKFAM{{(#;X
z(&KsJmK4SKlrb7fx<GiN7S2p$qS?dKo#<f^aOHpc_xg#LjR~K9`lrT-Bjn}zg2RxT
z$p4iFuy2=a=7-3&h_$*lsz(t35DyZN<pA|yL>uGQbh3!!zfvE}naY3fRO`0B%=ePe
zf7V58kQ?L=5A-us|A0-}-x-Ve4i=DUJzldXsGjE7;}!Buw~Hb1NolG*pr0;WrT%mi
z6F~h3p*M0tlDt$^3E8de=soEbAr=0u2&5~DQGS|0e40|mDur^L2$&LhRG=(z7)e?Y
zQb!Z{@tw@R0^q$9!{5@f*%|~Da3GQK4}Y<3c87YQ*mV)<Mfm#RKD2MdyCu#aAB(>I
zNS{Q>LGt`*hF|Ej3X1>;ZIY=W&p7bE(wJJOUq*8n9WjwV5z{{zsIP}8oVnsAUH5T5
zX;{cX?J3j94JOy$#Ii}BdO@9e@qTZIA>RO`AK|<a><Sx&7yYP~-~JD#Qt0FfaRo3C
z(61l4M2`OfEMWQ1w?9Qu#txMU$;WoywvpCGLVD5-_M8YB1r}ysXh~WGoYG&UavQHU
z#pP%<#puP4ND!I#7W7piydJe2rPaK-F}L((`)};Y%+AFQ5QO`(5bBF#)xK{m9?#e<
z<6t3N9M-aTwJ9V6mTpW{R`N$+lxAvkM-gG$$-1nRL8+3}#0wZgbK(dh$r)V)Vv537
z_@0ZKh09HJ0asIs@hUDA34hp`dWRC<a<H?Qs=d!hPUIVi^@u7~B;tq!$D%iTQZhnn
zwKrq_G!8vx<kqUFuVE1Cz4|cm#&^A5_Jg!6T@Q=UMh0_|-26(*#BEU@{xhtxI>W3`
zX`hm_d9<|K;A||L)YMeb2Qy#VzygqMer<pH8_)z<H&#n<(k3c-J2XZqn+dvb?Z0{s
zDeyqG%pozb6A6{v_zD<J)zN-Pj9N}qC%R{g5LH1pgk{^&-jXTJg4U4iSuN+^03KqG
z?>EG{FS#mj>FmHC?F8LD_Td~ZSR8ZEGqf#CUbk5d_ijn9y6tZS*N~AIrQF3{38$RR
zNG`j*<vvwI+XH%^EM+T<EJ@LSNfmqlM`lgf1m@5O0|cat4FtsUf3yAnG4*a|>*Q=;
z>+JMjTb}`$j<bY3da`D=^G@6PVBoZ>9~8r6gD3<;7tCwn(yFc*v9&Y|37y4jA*V}X
zVPbLgI*)Uf6Tj^x*9V4zB0&LNUi7l<bwl*lOE;f%J<Zv3vv7Sg)#YJhx-HRjk-KqC
z2!LIkHs~-47CQ?X{PF#MXY?_KXL>(n^oi!@?+LvwlDmzQbNoQ_>x}C4@l59<nKL7M
z&lbr$f!D(<>BNeWv0|yPDQeV|ZEmj0nWickH)nO5<+}W{m@Vcq0H>*2=m}luV$nN?
zkS)e|u=u-1L=X2=H@s8&kxZR&iAK62SCwSav)-6#eRD;fQp4aP^{mNoP1I(By*jYq
zPkBTJ1Izks$`Oo|7#rz3OyL!1jC;Ny&1p-yCW=qWjFo^eQ;Ghq6+n;=ljGGzf!8v6
zski?$xF(om4U4Vxh%Nd~MvQ`iYJjCO*-`#2m2+;@_FJTP%Dk(x;OsYAl_D~CA&rL$
z;D+(Hi-v<;_6>bY;?klvdCPtGu&U>NZpuxR<(>7?XR76?R1LRHgf9>W)Y5Z#<8zUr
zp+5mR#aI@ArFrVHDN~K~9l20OXAM0>@(-a_{ORcUa`1K1v_*xjK@-6A?|SNp*(a)4
zDee52c?omGQz?3S+G%O@w0aTh1Y@+RMY_8t6}81wX3D8qv@2a<w%RjnB)?gc7nbOP
zBJF~z3tvRENUy{%vyeuq<qv62ztwCtI<ig4PGj?EF~F5d43GAO8nhf%h@t{Ok3u$`
zU3pm1#)=suZ^sUa9YD&01?xL`Iuzg#8Cp`>3`mqlU@6SZEHJ$U7^-WYx_eUDWesPW
z8ldz_|H?;;s>rQ3+WGQf#@G`mEc_ZoLGcBQM|wCl*?{wA{fXny4Dq4P*3FtnuQV=W
zM147qBp+%i|C{geup~{EYyRos1T$Jd-I2q#NJI8+<o1a)G@V3+RbTi;btff3A?+Ow
zNR4&7o{4s)rCNBT>Qo=Iuz2m)1AE{t1_W$2>j8Sn8>L){jO1vJn?(GiN2dNd2Avw5
zZ9cGZ{@uaK!JlAlq&c1*RoUe^^NFxyGE}P;X%NrcUN)oE84x^i$o>`gC*mt%$QO4M
zwDH4i-{c&wOC8|aKu$*TE<*dMwvw(uN&hq2J141df(xNX`z^<T=t7neSVO+=g=us3
zaLZ_JxaXIo6J;I6d?}h8GHCG>q0U@P%8nMkS03Rl+OS=Bk?(vYaAPeg2McQ`?9&Zd
zTnVvUc!uTTN36ZYal$ue|E>7XSP>;Y)3!p)(b`%D#noep_3e|h5Zm<hXvHmvB-I#J
zOt(LDyYJK*U&~cCL)po-H#SkMCB3=Xn<=dg21jr}r$yK}xmt8x7T1Q#2t}IcF1@j&
zVulcx8QfRb;4tjP=jE5d1rpSrz6bb$nUn=v7j9h=7A<DPMiT{_4wETa3!miZ$D?~k
z`W_C824!#tP=xRToeRth=SJUY_QBjA!;m=(UKi;FGaHuLLq~1+PQ@_{K)bE6^K$pa
zQ}lHtBI86R)`t87IEj(gLa3!IhW!mOft0n9qZ^)Zy;Y(~7=72j<!Z_C`ZCyiRbv;{
z@qf}4kCzra?Nt)&go<Le$7<mvGn8C0W{_6DnL9G(a=qp*G<AK-S8f9*F{?Mdj^PRm
zn)DnLoQ}FeE8WhWY$qL$E$M3+J{y#>95jDy;2kJAcZq~$4nI^~1}uIbfnE;*-!Sq`
z$nGwFoQtg0u6&LRIj=(}YBt6E6YOZxjvB>d`joA_H0`!e<`ey4H%3o?_EwdintCD@
zJ`BnJVC^ham(n#y8q^Z6vIBMia<f<?s`01l7If&EdSp&tOObi@2xg9-_9tvkz7?S;
z#oQ_*H`8!S{S}ofb4;<sAS5@q1O!Ki!)--ykccX`iCT?^6SH0>0d`9FNLL)Fp6Ya~
z5dFWHA9w-&)od$l>$QZ&8;rls5j3zV*ow=TSg**XS&m!H%Is4{?$r}*2lM$a_<%1K
zVSl7=I0Rp&mU_meOLLd)5TM2e+IytjK6$4RHlKN;y!nfHMF1Ax{MNf_G-R$`uzsPU
zMU>kn{n&K`Lep@#ddqa$ak$#Ug<%(Om1}?3q4cYNw9~M5&Y$&^)v9LG{#<=rVim20
zRyF%&Q?po($+{mO9^n+f-xyoAnwxpR=#GnJy}eB#R<1V&zLq4gg|5JHHZMxYS`J07
zUV4eA6?yFFwrP_@rnZ?75<l;z#)wk2i+u)KeQa%XcAx3SK+@zIPPfiK+Y|1p!%1Xz
z(+f@}m~TF0BfPYZO532Xyc3dS?yIBPlRLn2@`e@lo3d4#!v09V=2zadX?;60MyuGU
zvcm>GyRz-?>OR<Rz+Cy{mFX1h`uMGE;~6`(<EcuSbSmtealU6~BwM?zaIRf@*lfWD
zcAjAILgbddG;!ud6Y`!+Ev}sHXd=iVR{PmX*L|RVb7_FZl$Us_b0R6Kf%FS4O*QQ;
z)5K;$+y1tB(Ei9Z#GPa^xBb=cb&%_|Ha=oV-4(SBX3ZVFDdHl?bG5QZwW?@E!<Y&4
zYWbA?CNCGtXn5sZ%--7NOAFE`Bf#82vbrGovynQDazvYMVFyyN=8D&aZD&k0#SFIz
z*n8mRQWhJRtoSF4!*iGV-`341NY=MyeCt8TT1Hq;zVqyu=m$@awTc=Bms@8by;{N&
zq<kBXX)DgI@9R@eCc<7T1d`furyhcLi(Qgj(AEj1V+&q3(r55?9XCrh`cSVo7ykQ=
zF<xHZv@s`HQQLNtUX_F(P`Xy1z7t7%ZwvS~`>vq+zm@FqB*0-gnWKY@oc>N1TQ+Bz
z3u1%_YFh$`Bu3UZ8Bx8Z2#t84OQOzmVWJSd8WbgWp%aF_9Nz+WpjnDHY{hpUc(F$y
zD=xjuX>e1xv{BbftP_B}L0Ou}!>jH*sOR#LB1^PCcddb2qedCiqZsuHi!tO<nNzGI
zy{UK9i7o!EFy6$NqKMn09`L9?MZf#2-6U{6@GjxqoLwbwKKL%_-L2gP@NG~&;oa_C
z7H~dT@0Pw3{~2_32stQk{8#UQ8_*ts-G6~M6t|!Kh)AzOx1u0=a6KZs(*1~NugteW
zU_H21P(0AGu-U+sAZGnaAaX&??mM+e>n{OLASeADec<*AyEOe`)}v_aboP?FShqpo
z+laa7?!2$m0UZ9dzuZYMmy9{5*@Qabebf68++}x}+lcF6b1~Xr_z3GD`6zFjcd?f`
zH-Y{Ic0u!z+QV-C;skO7<Ad{U>l5#T1>pzzkO&|E_igHn1LjxV9StC0xJB)gLwl9n
zy#VuNex(B8NB>|A$V2cA?VADPm)<oC$|JdT>YqV<ecQbN_XU212+)J_P3U8Q=oQ&b
z3)CaOo$RkgdQI6?gZ{9*wF2{{dvyUldGZweg1E(jJhVeOw1emk*<L?t4cMU{+X42a
z=&qc+f!LuOvjZWxYU~B^d-V)Y)q4H@i}YO{X}+Y<Iv<H6oT7Ed6-YtU7K)^OGpt3e
z2zZ&ZahH1hssV=l{jnojNMlz}P}pSY?DC4+6H%09F{Oar9<dzCP`=c<(6N?McrmqB
zFb98pSuq!Ltg!BE^cqD(;0y~vXsimI;bMeYzd{(N8&>X3n7w*+S!8b+eQm6@_Ih;i
z|CNsa5AA7ir`g5*pM0qLf0z$3{TC=!`Wf6|MEmM;o%#jRIUD29i*7zToHK&6P*m<8
zuADww(cM`i$wP+VnEgfXj)jW&0pyFePfBCx?4zIH80+)sv(vu%^>*<O5op8xd~nc(
z0H<y5fZX=2Ery#9yX|5xH|!r=44zHXhDlj7cNpB0F1sR}w2#eV7%?6-H_~7*apmWq
z$U}K`ookBzjwqLDq~H<C4QY{FnswV37958=jWP8*^|*0~J&{CFZBvU(tgqO>ZV{RT
zPqMP+r$~{B-_9AtR1%)mTW16j^}7*_$cQmY2@POYU#L3-_*7?<UGDiLB-#cM@0W1;
zNLplKDp%YrkFF8t1jZpL=hgJwma-|&fWujyZOjbgcJ?{1qkZecJwbnPS_t>>)G%>i
z{DX)vSAPU{P+Fr9gNp&B{2DXW={|-sQ5|S1mE6{jE&9O+eqowy*>FQ+13H6tdyNXR
zoyyG}BDXJ`KZOy=8!Uyh5|xU9DjZbC>RcQ)5O&)5*IyK&Pp+I7-gCvBX9XXYj|;(q
z)w7XFW``#ny#YSO!?^SbHp=~RP-76KK-wcHZ=N1dsP~i5YyAD2OqLX7ei@3G*$TIE
z=&5>11Ly6Jqv?6%GqQfoLTvS_Z!X%E03735Wy#Ys6(9N!i2q$({}0CjWP%-bYiJ;#
z78D>LhX0E^O4QNO&QaOJ{=X>aDXMFJs4M7vK^Qt41G!k%u46-{2_`{A0l~p!P#%3S
z>+!}$8KTj}^{gV93PlMq3uG1@MQ7VhGOf;b9VPJLNapiJ&N5k#1^>32;%s)_5g%8a
zYDOBY7tucKds%jPH+WArX?oxDFmxgOx%W=+W?=|UgAoV9wB;T9!?^drcmyZK%})xO
zyrWYoEZ#VSFoWcr0k4wpUuZ#KL2^m2Xh9ATc|>=OaO9ZYu|qQ~oa4K&q`nFLq(KY`
zuO<+Z5I^6bLny)ZNbRp+W*gqRp!D|dvIf~DzQP5u!1ajkw_;_R+|ov1U=?9%6W34d
z;s#BU@{I}@*@q0BVEtifliFv4uz=Vhx&w#6f?!B|)q(gOlqTVw(GM4Nli;2snA-m$
z`2ie+8$^)!N&*29M3D4~iqS>fC&l(FYDlqATrt3&kf=|7mfA$T36MH{M#5$EE3L`Y
z%7cbGu>}Tv5?4W*tTH*B;rt)?X|+aWlCKJ$Oo(@qf*xGX<Kk&sra<kSgR$yTx`6ut
z23&LrN)L&k3Xp>W19R?f<VCjkNMXPKPMW;ZAyYh*$N<RW((^!qy%fAC>Nq%u3tvAX
zavze?c^S|JMj!-AxjQk+1D=wSQO~Wpe9RI@;emKyz^gi;cNNpT7zNXJI`)0>N5ZZ<
zWi|}>K9Sx4ziO*X%Y|$Cfu-Y{DKVA`4lM^m^&o)CH?GnFn0*m#mbizsG}CT;E#^;I
zd0~%0yzH4eDKWCOz>fi^OsG$RKR>!m@PP}gZjk;Ehm?3)>QYFXN)%gkoOoe^dZ0J=
z=hNG#B~Ppi6op}ezK#bEE|h4Hr3AdwQ8TP9swM3u?0tFo^7&f9!t85xG`&9(Qr}ds
zuxERfNkU@KNwbCxX>G<@LYofrU)t%^D)gw1j61882Hh3360lR|2`0SerZXPq8amiw
z96lH6#tpv8BN|T)|D4=@tw!r=ydnzobk8=$^kr#i3XE%JW|BUiT%0&ocWJ@H{FLQ;
zRL!ewS$|a}8p0+5#y^96q<#8Hi~#~kv#91)CvZGA;+tccnL1Pjc}uz91sC0ty-G!0
zB^ek!EY6L#ZO`i^zOSj=l^f^#P|fZT4&k1^#@?MDJt4AWP$~{q>p;)Cl-NmU&-x*B
zma5PYHii5U3O1(ORH%xd3L#udI(u4^x;pj*-{Gm-_eyVBE*E6HTnBNx&O<r+t!x?l
z9fcE8A8!3|?&Xp6{jI2-k%T945ci-{;NCNrCpr{IgfLSjkL_HA;cgW-y#Ro;<CMgo
z*-;zO-uUIG=v)Pub?`!YDoSz!z2QuSNX>Dvyi^U<ChzUc1r}S0_^=I2<jD>jCB{Ko
zRcmdtDn-FYk0uAjjlbf`R2WZ9Oh)?Q(Zy-#3xIsFHP=Bf3o7Xx4fKByb`+8~xu-(E
z(`6)Am>;B&XY#XJn9WwX$f879F`QKKXHQbreOw1gxiDe0j$_~XN{k*Je`OcbEiA+F
z&@4^EOEPmY9<N+N&74xog_-HqHsxZKbXmbj$<olY6#uz^*`DV?w~7q2na^#oNwMdd
z8~S_bvQE2^nK7c8M~qrps7Wq75hX5k1HQF9hOHv!bOVfvXkpCw>0Ru9(mKJWQ&|7Y
zB)b6w{~RvkGKRifL6hZ7KK)sUYi@D+4F0$|GMnt+YI1*#Gk?mcSfblc(8r;7Icd1u
zevoS(H+FlKOZUofJd@2_z=1H?(B!vWvXkwsdyY_>C=1J=lCmjlmpi8^ddCsW771vn
zb*8f0tF;ybnoWfPHjeEDr2!Y4lkWK6102gDoZ~Hc3+DC3A{Pac%0oK3!UNIit^|}D
z6zVg0DOClAyNqKB8nsB#N@FExQwhn)^Wwn%o65vF8Qm;e8*5%orUm2BFPlsCV&wt8
zqmz_K3$#d&ZhyU^Y?31#>g`ivM8o2COR<be1B0?e3r_?_!AvV}VEiFj{Sdf@3*Gle
z=-Jfi12&DXV6l>!3k#f`j_w$ScK+Iw$$wd&eRNM9dA<gjA0TkDxZ5$`{<(dA8yxt$
z;x}kOh6DW$kQBH)XS+o~l8~jiJV(10kTaa_%l*kgC%B8a*{+ON`Xho{{aBEV2*<dr
zPzK<<6S#Le$Kf+<qr?SmxxT=+ov6R_;n<<%`)hqg-Y=>GZ+WWP5a=-Or2RFtH{P~2
z7xRK(W54hp#jQ&>&$}UPRtlbo<L1iv<0?B#z!ACHcM*YV4*P?DnHNfVg|X{~xY}Ky
z+k0r$06o}tfZ%Pryl#1vUpw)7f^)`JB6jGbi9iVv1nYwad*Fh17cjWP)&ff^hhU4l
zn1{4F>LEVh;yXCHqp^+Ied8SNURepdG9^vz4!8h_l9JNLNlg=jH|k%KV=xXBWH1`e
zYRwjIjD=C2il@deY<OzLbGmXxbRzfPdObY;ttaUIts%gyISajHPS1&5o6>jHSdCp^
zp7|%<{IbaM_@9dtD^a9z;rj61_415+H)onOnou!rqiaTRlw~rs&k{3`ogy9RCU7(p
z(~UkazrAEc)ZGo$sD<Z#P$p)oWTfV3B#w>A^p>-$)U<0KosX@K_=~uS{HW$v`4K_G
zKkBM3xUK9nixkZ-s;KM)PLfthy6;3ahmtfFpR2?RjC;%Js=dFOX@~E{U9l_|T#W&e
zHG1PXT#XSdS)#1}n!RyK(PX#i0b6#5zOwN4mQmrMy(z7ZmN=y?QFw_LZVh2dLD~0_
zIf)l9_eiCoT+PCliQ0bgZjP40N}VFDhwp`5SxRp9=dupPhwn*UVM<<HE@`WD?O(Jz
zQ{_I{9paY>+I|T)M~l9)cu1n-Pik+3r3en^d<u!$wUc37M+b|@3aQ$DNuSx+N6FMR
zKmSgPb3Df^9U0OszsSVhUHr`cP5&$jiqADxj#N6ZZ)u?~A8SudUE={go=WXJnpCY4
zzdznW{Wy|zrJ{~1*HKZ&musxDiY6B+K?OAiPe|AfUr-1W1obL`K;o=o0M@S)!i}V=
zYQP}mhW<?SsBFL=s2(B=rHJkg<%lkZtg5nyDP)I!Eq+=l1ZCiY4!D~n)UM#~TzCxz
z(bdMvtD}3p+zi2Rhhum}gWE;l3PkNe&-J&tWz{{mau?ML5_HGQ%k>+KpM@D5z+NSC
zHz>775(}ohM47MCN)O>3IIwTMMY}}J75%2kruo!$Cm4PC#`xdB?LY87f(TZ7;t$rJ
z{{e6Rll}+We{mmFb(|K&(0DIuV?qc=f}6Zsz(HG30Rln=q`~CmtPm8yB0rfQj)_%N
zLUA;y*?=^`JK_XG02UOMslNNMI8(Qkm*FUpf9NWS#mjc5+053KS5aKvb`K<DsJMkO
zQ-y6BYmtM-phC1mw3GJUYsvxjC>uP<s|@I<3%pPIZu(;UP;zM2YmSj-VLuBlS*#Y^
zdxyPaYMteMjrR!CWPDEJiph9=F;42bD-2C%&7*WI6bKgMW`(=&5|Y(~-6G|7nFcXh
ztQs3_t?r9<AtY%lj&mRp`tl_q$Amo<buKoKMW-w0a9u(ik0<{rMk*!>nVkyrqnfoE
zJ?lBmydR}1Zi6vRE?G9&-bTd^JGpWPoKb>Vuh1De)QkbAs%<9PZWmk#mZvXwvH_o7
zI9xXcu1xuqt-2#!6EDYZ+Z3a~tK4u0BkrDR%y3HSJj|K%XMa5VRRzsJsGP=EBX^03
zMR1ESt9IqyKh^*RK=9&F(mx4LsX6xUP`mMs7R`r%+<KBon5)$)D=uCAP8Fdtk4jvp
z-xF<x2bi%Jn0;}-Wp`_0VUDRfk`x-jFdes&hbvC1o70s%{aQ~IgZAvr^w;1`RNlF*
zS#nP#_Lz-5)BcQt{BqeG^l|a(L-L4eNt57m#Y;)jbG%@@i$9RVlYWgV=(5`=Jb-gl
zzDk(kA1r6UCkoBDsWlIs^t!annTJxwK5T+dS#&jF#67#zNm~jZ3)>S=361g{Pa`hc
z_Dc}GYQ%1?^+cMXnNs56rr_Br<nmeytdKU)V1{x$C@gwCm=o=kH{E^gupAT3bZ6AN
zRxNb(kf9tsuodqd8S9UD4J@0~C1MfIbM^<R@p1V_FgEMAj1rUcv?#v%;%yg%F!%7$
zN;j4y6<2XtcPT$@o9RXG7G~1!c5%zneZP?37^zNPxCoz5A&KXsG<5bmAnA}-v>L@w
zM%vHelug~?mCbONBfOVX39j@k7a&lF>kD`$T%%0h^#jhN10O$?bCKl(f3>p$JC3YH
z+7KarLKr`CXE&jzXxP-2HU0O8xj5~>hJUSmxFz3a59ke5Yj*0gUj}zw=o`{l*O~>Z
zC=R=h2c=4&f;pIqJ=(&O#?;eln!VA<nQY9Fk45G$eNzUOOo>24Tf`s(UzR>B`5LIO
z5@0r(!LaNNcVxK4E?B@Xe;Lb?pm>|m$IoQ$-K5NqJ!M4Db6a5N`@dgv%}pbB-X8`j
z&JTn1e{#*4|MQwpsa`1IsGxo8Kv_d=u!94a$0-i8@67=XOF;ub1c?LqK$QDdOq|we
zw=SnHZOz}5KK7&Iu*9!87qE)+S)S&w_&L8sV+(UTp+!;faIV708((t0o;RI*dmB%3
zxAOeHkMaFsDVy@)jgXrT6u}N8EYrhxiFIv<Np<ILEy1wqL+&y&ONrA*{(w>atvI@v
zFqPFP)j}S?u}oiypR3C^NGMpF(bBy^8nhSdFb%|LPr!)iYL*G=_py~`F&GmP{2jri
zD7%qr|6yU0Tr`E6YV?)_S6JWSHmZgxw7^V(DQiqCPMZpYGoZ7N!)~$8Jiq%4<3`t9
zLn#bH9hB>eyC=J``2a@8u!z&{798Iyags?nW@F0&H_U9D!C1^{<4`va^`T<@tJuy)
z^Kdy)7D(Ie3t)g*H3=--InYk%o%>X*)ju+ugK$gL!1i){Vfq#~P3e0w_jrvqNwsXB
zp{RFcu6xP2SsAt~vZZb%Oin<wBWdVOd*O620>YpgE(<2UD`gK;qsV1Ux3K3sUd;Tj
zpvKYtW31x+gRF75)nni|HlwYpo(zuv&8~<!^#t~}dK>{!sDx^&w(wug3V0rc=9eyj
zlnm(+X3Hb(C@G>x#)7?!_%G?-zh}z}VJ9EET&i||3%@7yFWix<vQ?d_k7~i`gjvkI
z(;H+^Yq-j`!BHq}r0yk^j6$);zMQ(_+?czoje6~3vpZvsogu9*8W7hG4y4B7s4p8B
zWnepR`NR@w>OjDq;5Q&(mVEgUF+Q$1q?R$^@9Ru--kY}HQJjMf%o?->{sV+6`q6&X
zo%m1GFqKRzuA+UxwEpxg%)O@i0r!WYL_D=_?&f9Ig}0v1$ET{qBr+_w=+(<ixUF4S
z^$N^Nv49NN{pvaC6pW~Wf+KxeB^}vu_m~PrgK-i>?oMG+n4$*9WVFehj04@CkA|!8
zCB^X4ask9<i%|^V0ad-}2(2SdOS2Ako?-ZZ_kD-vf%xjIM`$w@tB}#E2&kL@l=OGR
z9TdM8<#pb@Qd@L7wL?=KjWT7iMU2gy1dg1?;|8UB5d_{xn<az+gkLcgrh<9e@XjuK
zlJl-ftdds65sy!P@XKx(@NOVkO04Wj@VCIZfci%3kkUI^B#8-}!f%Vn`F!ts2v$JE
zb@j^);La=f7liM?5-;mjVaA@%W8y1a(^4l?Yx5;Bb6wan?<wik{wiNANB#{>9dEX}
z&WD3OfdT}!oMQFnH(J8o?C$6FteYF<B3h071E|hQ5Eq(1Z`3brei2Lp!O>BXJJjes
zmWD4}r&o}uwiq^VtoSpce6R=L443SFClRtgU%8{OXa}?wPyRg}7!>7khb-nF@J870
z-}&)G@SxZb7R7r!>7hfl$@K+<UV~s2v@x?3HJRdfhColmx&thvImA#ll;6oMt?l3^
z=CMQ{6#qRUt(*D90)YSl3I2c&#{Y|o(SJz9{D&hY>Hk|{RNSy#5J2M1%0fMuZ9;=O
zQY|BrY*%j*;_;8K*bOg}q+#nOd@5MC-gJ*Th_R;l0PY=NAtVZx=l72j`im_^nj3kP
zlhMfSWa?_>`|;rlqmPFuqb=GTOk9&Pra6Dx5rBiexwxMiumN$+ShdmZ#*FNn7H5MF
zZ^L&y@M4{Zn2fzKTF?6oNQgEn7wSZ}3lD}1{cDg_@G1nxoIEZaYTaEw6K_=!Fv_@+
zP|(VT_0(;i_5!-DaZ1@A<by96^KG5#1&v`xWfxh_3DH<eo^Y$l%<H#xIIc?^G)mz;
z5wQ#}ns11O(5bs$84%D3p-Q{jX4XFh4*R0eC@&slAQ{F9j2JeZdM*>Zlc>vRUnn8^
zOSO3ebP-eaY-IWdhr1^`5s7Cn0Q<L4j2N}^v{YRlqxu!&BfzxpU^NPhx)E*T6IG5B
z?&)Fv^B}XQ*$J1svDMYoPAC0dAHc#_!GXYi65}Nce$$3i8@tJ2>twf5Jo&@IC;$Y&
zdBnG*dfJp|Lvq_pDa$@#IhkaPREZdkQmbr1+RVj<FrP&QZ<9#>y1k@XJ#%|tVa|KZ
z`wY~!{RirpMMaDBcP3x3E`A^mO?QA8-UeT#N<QtO%m7|a(u_%b2wY}nS)Pbn*osdl
z1QZnZ^b7sao61Ff$Yh>No2dQI<e!V;TE!3W|9+a($>ACq$$@|djDUdX|9_mO|9sYl
zz5kbe!7JB`%sqJ$Bs3}r+=V~#fUyxO5h03@zrZh2U^qg~3=c^Naux^E0b-YGuWD5*
zmBpD3<#P0u7Eo4IYn%G@;o|l6br~(IjTud=W|gS-9H*VEw2X05xbn~Me@WaoTW)EM
zb}uC}-XGPY!1^_gNub*!Y7cj8AKiH0`lBm&uj1Z=z+|r!++S<4e)R|6jw@NzUrWGm
zuN9GYT>4;NF<GtcZ`^LZ2mWTa1k^p1`(D&v3&6uaT4LYBy*J$XQ2e{4h<J?H-&0lu
zN_Ql=clDs;dlVHwkZs_OB$q-8AyY(-BJ^<|!Po`&l{)AiKc-^12nh^L-b_N#MQVXL
zxiI&l1(?GzO6Z~$m{<NGfOJ<RBV0!DXqRvYfI2#T?r&Xm)cd^+L*lrMp($fsLl{Ko
zO2Z0JSqj==R>;NON`!PeSwkeL`{8lJ-*FlJh0^VICDc3~fIrC_q9HoUD3O2(degOG
zYskeYE*>L#zdSRH(p#YiynW^RKB8YJHlocsv;gt?ReIS;5`nbCsD)ugeBC69)Cf)6
z)RX=l(VU=M)!PdxTU7_8nJG(^Dh1IXYHYk&s!ZHjqD+<gWnk_htd9m+bUFkmCKRH%
zi7C&pRYG=0Cvpy(O>0AH8`p5UeOlWV$HthfMwLC6a91(E+FOSMwGdkjJcK!_j#ueE
zqMt;x0%~2f6_maY@sfDR8M+c6j()N#Is~j1S*9}-eg~Fxiq4H^n6^6g?^<EF-L;;B
zfF3!|p|8=fgL?WP?V-KB511now!J+(E2_N!I<GlO+C4@3YM=S5JDWncAd=>4pIYp|
ziRZ;q#e18+#-YzQk<FocwLFp`9xMt(MLP`<G5n(ajo7a}IcxH2A0qde%ycDYb2FL2
z_7^yh2?0mz>TorJyXNMx|LE{HW2LRyaGHt(Wbe-Slukw427Qkq#!K10E~K~IrsREy
zUu)Fo--X(WeLfZMm^AZ-sDE!SK%oEpsqu|p?e=qhD%{6_8N$Qfj*wM&&i&JdR^E<q
zDqc%Kb$>bda+O62Be;v=9;ex@iR$IL|MAY!!#Bg?9GKz@a~JX+rSF-na-XN~DUH10
z-i1$%;22Wqbf2L2?aRwm9cDpzrFl3C+ZV5Rt*7=4%wGx<@+|rpix7LF44j|sLg`;a
zigi{M4Z?e9QDY0{=6)I@4i2p8lkH~5oE5IutgV$6Pp1p>@OrI|>{WGn>teEF+|)RN
zb#n_7R;g4houzJ%0xb}*hQ3n%?F1TF&3;*$0)q`g)Lq`%sT0@axe?R>)_($Tdn*R=
zVe7hXr~?c#@ATng)m{HoFA&ep69Mt&T7}TM5-QB(<5tyUq+>^k2IW!{9^9Gc;luqH
zPf$?BY9dq1k9%Ey{`@TB>{lySd!E;0eoPLw85{ou7K}{Cv|c*5VFXCPSc0zB-_`bf
zPG+}$xqegyA5ft)WkA$qT{uPX6wXnhtB30E9LrqYl>H}He@wT5x?n|&e3N^;tuAL5
zEq1YOi;ri!W7|der%7zVcnd=sClXB3VLsNS)5lYe771aFgY54{%s3pm2M|~oTXuTV
z%}Ua2mhNw)IO9b)bMv%u(7AF%vLH?(_@KpFDb`;G1>*uu-n@q-L<c6&T};JUX(mNB
zvYku<*1F^g5*bOr(D>{nN^$jN)ljE`ez4F&t*t~zF+__gw$p_8iprf&=^WUw;?D1w
zWm1_L`O*yIHs}n4-e&^oPWv<onJ%|9{d{hi{<Dwa_*d{K#37&t*hv)`UY;oH?bJ4x
z`~6Fca=j`F?OH0>z!-Ey1;o7{w9xj_`7DPqPh#eG4}9>E2sp&S7R6`DV|_J8!q;i&
zW5M0-2z!~2SE0>|rQzRD%qGaS45rRR*(u{+<76|?Qt5|*)g^S3o?6l%PKKfK9t}GX
z{aNVpR^18e0@$s4K&dN9>9O)2h8qb1#6e7b>VFCF)yA&x+t5n@1mcFW{%gY0GO2(t
zeF|-v$=fx-xA|TX9}?b{+d^bUT5NMCmi9!{_*m^c?R4vX#{7b_uV->Qj!rwO%#W(b
z6P3F_YTw9RDSaz>eIvr<X7*nd@Q`c8&UJdxPkwmJ&e$UNl;eVw03BKnz_3^6+R?hV
zV^%%)_}RSm7IwmF^*^xWr(oboeMqOG7b4&k&RRy@<ZE+^HDqa@Ri`3UVrXU<;*a<2
z3d|Uo;NJr(JYBi?zsNB4|BeEER+d6FP~e??<+8ow!}M@YT!tn8u1bOXQLuf~fi}_1
z`|aoep@Exf;+N2%63qRr*I<;s&P`7ACx+Fq8vhJIq>vTXz=(X88=%5_j2YRZCSv71
zziYyuSZJA;3AHi!hukmTq9{cna*;DIFBhoIw)OfV=4=wAz8P&e{#tRU*6VXBAtpf}
z8mWvE7>rc?Bf!{@_zXYfsTz42i+J)WfBP)O5w`>lq=0v!0ZdaIoeTsoV0t@zC0|li
zEV}NT%>G4r*!_vReU!NTj0gt0)T|YKZC6*3WYCgo!$&YUQLyn>Mgyre-Uu@VXOMq0
z$7Df<aXr78RMuWArFM$gr6<2279kF50^oh>`}ff|w>|e%d_^-2%4v9p;bJn;%4?j6
zSW+#kohYYJ#ID3aGk+Z)hZc4+o}WI-Iop*&#O5q%+kw*Xb1%p~rJ@%CSXa(FwcQxO
zA_?>IgzP>9+xF)?Nl<k7qqT-8i?*U<NEk_fSu>2#60~iU{ZnZ;A|$~#5&I`oV1WEE
z^i@=y-ztQGJMZAAv$HrS_5#V-eKR9Yu$poDM1&=ZcSF?*T5L7cOX$Zq8(#G!CIe)A
zbm>e15lNwiz>&z>g#sbY?gXqBDuV|!9W9My#0uLPjmhG;b@<?3qHWpE{c~n~FB`Hm
z3GJA=GWKtI%?o%&7qPdE>X0Hq!pkUjL~YsON~9J9r@TLV`~LVXbi={fRYnOnIylfu
z`N**L{T6bnz=jY26B3RHco{Mo#E7Ws-&#IS4P)8At%+7ZB)JhX-r+z<NA*Q=%2Yy<
zcb`%J!W^buDD2FV1>qY8AZt9R{|dk6b+lxD6StN#8KoMXfncsvLr1LG=TQMuxl4?^
zA@Y9v#!zca9hKx;;5(0ax37S1`%>!P<#%L&(EkO%q3m3ys+lT>9VYgoJ(JPbCGOPV
z3onKMpt(tgmX>dZ+uskIsjO7_3#unuyi}FnXs$>Hh`06-A;Ba0vlYm5l0&1a49_4h
zHYfDSGMF)D3@8xbXhmvzi&-lX5bw<HJ)N<2`$nGmct&t#4c%iTV=P|5LAps<(sBh1
zYlb@p5P~a_MZ9Gjy$<ktS(@n1^JH@x&MN%XCc`F*X?b&zg)?c-QJ+g6)3%xloIVyj
zQlzJmD>yZ<7w0wJ8zot5?&ME=d>-kjkU#+;;=_XtPMj+0p4_B$N=*f(^gwx$R@syh
zGAB;&v$`k^Qwy{$FXKwRAe|gC-Lk_{qe}h!)P}tUA5`cjsAKU2^`w<pXZPcD;Wf4L
z1yPVJE1?}Wm1k8d7rY<14y?P7RJ=VnFY!QQs|%BP^QmZ?zFip^*M-c(&!q5%f?vzS
zN67KR+2wB0LY26RNkfReY)I$MZT{i9HHWHEsHUrB9$&}x5<Ag|b3|X1%p1I=T(UcW
z0V)qixtxynZpnpwO7ZtRqp7V${@LCj_*8Q&t1Rqa&8mT+^zYuuVpfClc|lI?flJK1
zHgTgAj<uC@NNgJ!oW(oFYu`(Ap_#SifR+*;LL33DJJ&Oq<GXWf4s;{u1!ogVWXr4F
zTf={W-&F3S%X~<EU>jJ=<u@j->PkAwkP?zrUCtWuf#53N;mCZbe#oNMMy^mwa6oBD
zPP7y<@1CIEOEA#sjnRMw_UlgD8-el245W?sBYK$rnP+*WV(O>#t&QLhUtzlwch^I;
z3+J~`k2a$8pSHKpK9z|ffo^qfpud0z`}|X3c%h#m-ti<sid*h#wLi1Zkol^2S3>?J
z1uuA)O8!N@Etbx?7*QWek2_QNEgS1Hn`DH48%zF0v`sdass*`1g@nYmV4ynemHArw
znnu1&K^%@a6b9{FY$>7M_B=89t%W|ydYAC3jy_|#k9wC`;vZPyE~^CAQ$_`I!+RlO
z)s%dcr*ELHWGxjNifT_TSbSI!DfO(+Ng<MHsCJJe)ktSx)h6I@oBNgtK!%!tx?^`x
z!IC_)b1eVeB>aK{X;}C(h~ZJFV{E}%?C)NS7RjL^$cj_U$yOY`jMA$jW+hcGB&41&
zm4hUWq+(K460M}&q_K64j{>uG^F025_F|}5NoH&JcMCOkf*5QvpE-9Ze{-up!g*dm
zdmvm!pOf5jf>f8mET(6)TV079wQ2%Q#(_pj#ROX;P}6ETob1S!x1I)`SF%fTr@WB`
zx6thsva6DsEYq=pSx3o~l3s;sDLJD}MTuuKnd&H0QBXvg`mH%KOJnozNcACEtlqAu
zJ;4kq$=@mBFOx)eilgs8(OjGH&~EUXfvj~Y-q?tQL&sF5@lVq=4W(2M9uwsgASxPV
zR7<pQQEeK3rz{rr;a(~xP-M7ryiy&N2B(-yuF3~#OUtC_Ks&XS&Xdtanh#V|kz{rE
z$o^fsjSgJvuU@#}JmIx!NISTZE$ft1RVI7NXjE4zq(I16C}&d6;Ho-?FgC~1rEnu9
zdL{Zu?bIeV6w#$*&R12*pvM0Nk7m6{oFmjQ!1H2?tX1ya-n)XAy3P%#TmDUYh2a9i
zxLk}XL%el8+@;aUQ4wZ7mN+r&*rwGJZ{Lx$L$gIA$v_#koa`bMRncTQ;b{_7Vy!$a
zLWy(!I%_VOTgYTbITBKX2CIh!*JLG?)nFlEm8!`+n>;l^F0&>CPvn_eVy)6*F|h`t
zaX74j<#4@j>O$6UYGlk-oCE$VLWA#@UN6ARVm(ncQ(uzJNPWD}gtPyo44c|~AsHb=
zv6t%eUYp<$`^U$bhfkn!$V0Q2Q>z%*ts_=mo7vC)-T`P9i&%M_zTugpbP+a+8SE$%
z1?k3e!qqi82?Ah9dJe%nfl#cZ6nLrJiOzh!xKNl~NQLZ9@H6<GL5-Cvb@15xqYI<@
z4ytfmrl$mu`lIB@RMYQuD;K;*kv$7{6v?6&WIVn$j;xUl#;~!8viG=dZ#PSjiR|=`
z(lsK%74h%c#8xO>zDUR*-^+W-Q<a@9AEZdzVB~Kl6d5<d^J{ofsw!8nwQdjLYgbQa
z@y9YajV-~g&cUk{ugCZ%`a={owwA>X&@EoU?yO&o`(#EnwNYWt7kmx80p@PSUOvih
zr8ilr{YfT-EN4$@B_X71rVUU|a7|naY%Lt(oia6@@K}%1proMO^L0A=bis+%juWh)
zOp7T>ikX5Y%kjdRa!Rf=W|r#yAyk6$j)IoTiH?fY5^KRFmj<PNH4Pj`7Y+95ufp{A
zEV#0{rP*?K(%&ngjNsoB;1eO4jtP!TLW19==!bL?&-|6z0ID?VG<fhf=i!t&fzk@?
z8Y(qvHH(Rp<VuZLwX%s5wj*qIO_G+TnRP`jv+LA}i3U1j_1dGlKU?#BVZV8xG>KtO
z;3SlK`jm!J>5MNcSdV_+aj*ON^HxMSKR5W34cn|(O<<(pC<Z!`REggjPZlPwL1SyI
zkWwaG+>}w|$+RVnD4UrqWDU@D_t1kzlZND^vvQFmdQ@sA@y(1|+WdMdk|a@tJC<*u
zaI#TX=}AVew7v+da5NH=OY<3LB-69LiVSwcN*iJh4$P3E9P(f$rV1gNpPv5{k`k>(
z%zXN<&jAX?Ch@I^YSK}-##gpB4YY6{ZWz4Aw(Z@{w{&wP^{HORTaAop1xszWBf4tx
zZmFRz1w;u~f%=C0#M8_3aOpX+3Wn4adwPjtpi3c6Y<xJ$fqF_?IwAyh$$4g$v;mWr
zXd3>!K(?ljvY6!p8n`D~X6xWxGQqoAD>a1J+ijFogql^gnM}hGP9{$E?#V_^#f$JT
z=CWK<%;EUM!#MD7<?eB1PRmY{q+%2UO=fF1+rxD$7iF5kAyLb`Cj{lc>@|tBsoPbq
z+X~KsXWH{p&>L37hnBMarzF-@&{P{7rA%+x<T?*l#jAU<tfGoFAu)JG%j_T6Jm}I3
z2yiQmkUyeu<Vd}AIA2-?9va79ppD!EW$3EKc@|BElcu20Ry7&lR+Z+5NxE@U?E7mW
zsQg7VJg}!|6x7dGs4kTXAnKBiTl8GBr7W_vV7Hwmb{89-l)n9w+5R4F1Ov})KBPC&
zGn_$-qA`>)`mn=1#&Yc=Tcy=n<FVC}x~}39!?t0Wg|p1TZevwtXHjKQts`o&=kP5u
zrWvK5oetyGA*%fw><Y3f6}TDRLgl<b;x`|7Z8O`2=7rWpR#r=OxfKhhMLxfJI?rmg
z#sW<HU+}glImTBmgRzTPuMMKkjrO1v1c)rdY>2xoqQa0f6W93wooxwor1rIV*~aCT
zW*5oCc%sG`@3o}If|7y&b}LKU3gL9E`34%K2ZyHw;ZPK#h@X`hYcBbXLiQ;gK6SXf
zXdiq}%1{i_;^M+KSzdVkw`2XOy2)~S#O3MZVi$4aZ`wI(Z>{m>yuqOy{>LTNk#`8*
ztiN=nO9pwCNe61XvAXf=pI_xq3Hm|+Tm{}`fVeA!_t5gVux+zJ8MSzcmeq)by(QH^
z*VIsH8C}DzQJ0f|(oVwTe6~B)q~YXHH#17(UmR>jZfX;yLObhy&p3rEiFfAM0txD1
zQXgswBggf##RzR_qXhysLlfqjHT(u!%m(a|o07W7YRSI;hqHH#uJqg1eS?Z^r()Z-
zZEME1ZQHhORqRxZij#^_6&o|j$^Wj^?%C_K+x9-~etBnmKhM#}`;0!G=hvlFDP?=g
za9i(B1^PJ8G8{QWdvCSI(@<N;{q}`QNNsl%1n9xzb0D)gje5m#QlN0+YL|F}yO-sc
zz>XvC^&gtQY*Ah1t^+#jgyI&B+uRF;BydBqu>p*k)baUAc~|7=mQnqT%%)BTXUI-Q
z)<HYjZB!Sz)H&Sc9OPBo=jZMUU4}nx$W}SV%EXeWvAX1r^z)T=ii7rBCZ2gSU1y5#
ziBdF(>3$9zP7|E%l8H38{tzjVfilRUjYY&hmJ_r)If7k-&na^dy{DheCe~Rl{*}K+
zR+!GF&`8ssRgcX;_=iSwgT4#i(v(zcri!!L8AcJ-Do)QPgmdC#8EuInw+u8(vA|?U
zh{}38WffV6i&60i#Eo92Dt3ZdM(w*87Hf{o&vG}1k!xggdHTD85Bq&BH`wvzy~Ns+
zS5^a=pO%iS`a65Q@7_KR6~wmegGMX}=Rr!rI$g@?uUh7nrq0!Ec!4=+YoOnuFtSsz
z;3Ifua8ffGIC>zl5Lp#<=<jY3>j@v{bc7QZ>9kd+`RqnPC<U>lWjfb)<HU}tto9dv
zX|3ukEpGJG+w0ogWp|g`(Ld1USnI4d)X4VV`fs_#_2ZG8Mr}{En5cekO)sYGt*J-v
zLmt4!6oJv{m_wmt(V@r1;{tk}m`70-q@R4f5q`_y<PbtPT)#>UJYsukl5hxk6}shQ
zoqhHZi?-c(viG-&?$W{eF=6Bfb$|*DcnbV1ulVKLF49jP=(`lQfpl6I?VsfD!UqNV
zJqb;qlta@!AM~?agB5as`8m@6Zr9{_B-a;)>Q*=IgbBL2EqVzv0^i>GNjPl1iozq_
zWn<>Dhl{K7FXn_Y;u_*SN%WQ{yxq6PB>%ZkSj&(lBeYyZ-DHp|;=nVku-uReQNn_9
zlr8uL<wj*nq23ef3AYWx%Mf6DNFn?EU}Sr92vc*@7FtB`Ef;$FC}5e`Q9!OhqT@lB
z83FGAA1}h`eo$P{30o130>6k~ohmF#$y9_ZPYT45!|t$*(Vq&0U`!ZFE?A-18!HYz
z5UO1^lAo;_WXuGSMR<IWN^L=*(Ax<1xJYzgHQ}W)>$#Fhts$D!k*s&^^v-gq9t2MQ
zywAkA<7++-e&~c)F(GPv$_8J=*Hqg~k%>;6xvIF({Z_Rtg`bg|&_fpCVVi7?s(9hP
zr$vm)jw0}S^Rn-}R?7^<!}f5DSg~Y15(~IX(6AVJ-%PDg4QQL7C38I2g(t_`T}Psw
zVyUIzBLDJPn0`-f^+G>F&h<qu7tE!IKOvVMl){fW(87O-PU`xJubO@#mW>lp$maJU
zE`4_a>FP+XFc1~WKNk?`@8Xpwkhy;B1tjvs<%>H?U{pTdjJ8X;42C%xZRh^QZC0Nv
zs^pIS6T-G<B>YX#tM?obrQQ!TfJE#>r5IKhtD*&9`WjrGmp@R3gr_(*<Fi9m6z2F7
zC`GF};YK}3Q}{|CSyh+rXdxFM(>km1X^);QHv{{TQs|6V4&Ws)WTWuW9Lu9-QABpe
zM#%LOjagC-3yXv98&HdyfJ|z{=cJ{G!*rpPM((QGVPUHuiUzVG)+{g-0}uC);w1AK
zMr2!3wS6$hLo-YTD^f(7urH|GVG^G3sh$BMF1Rp5(#d>7Kn-KRyG#)k5SdF!CY55X
zN8(PXLQREJb)&K>csTh3)p;nFjmQHRq`{sLM;V?a2SsCVP>nshjTs#+1}>jOh!YEg
z>-toPlijUWh!Yg>5l3vtYSV$>L7c`fgN}@nZ=95;Aj%sE^De0IK!_DQml+87v4v6K
z_^SpBHaL89fs7RWKp2IJJXyDG2<)2?Ag3ZWut8&&h)#xbiIc9-l1}4*1mvYPa~bhy
z$?Ber>}-N>7nP47V^M(}GvunGORle54U%*IV1L*Vcd35bQ-R{7lg%rBI5{*Nm&3ki
z`9?JKqQeO6#u7A6U#7ebGrv{HK<E*Wg>JgBgN%zS_}#{-t15+5*!Np$yj!wbipu#$
zG#R?>JJtQ**~n*E0WPc{abi9>dOoRR0VTQt5xM~%11xQe2moURPeUwA%I7F0aBBmK
z{o6;g1l&$JEZUA028?Ho-4TQHeVdckWivrIPZ-1t#<6}1_4_XlGk*mq#JyG#)LjO>
zvAI$12tb(;IT1qTEd6X1Q{T`!#!AKduWH${QT;CG2=$JEQ9q^Mk5sh^ot^B5n}^7+
zZX8OgxhA~4h;g$NABi^pmK=P$!dgD1TjzTI*t3o8T(E>D+ip>Ax{z6yDh0tJ88c}%
zf9US7D_2At`!;4BCo1?dw#5f$!*yR*H#2*7vVbDs8_#BF{cAH=E)~|}C5`JxT=2Tz
zumkgsoUb1<4tGBi4j6R%^Ex55#sd^#A9!9K1c$1wA=QQpsDoeOYJ)kc^SxvCtNN8o
zZ!y2^5!{}B77|_ZcLtFRzfF|+W@vaNf%R}oiT)P*C=l^i8Ff1Rr6_@5kqFf=l*E-2
zoE*j#eLomwv1_>{)8`8nKiGSi##pwnx^$l7l#D(mYZ}W8tC_;IacW0jUqp%VG^Axq
zgtj|Ca}6NnNE%=W<*+TU;Dg?G<tEKw>inGvYGb*q!x!R5xnmv8CRk^ZRK9LFf@w=W
z!%NVy>)q@LTU(ruaBrH@p73i3yKMpYs7XRuj`UYC{P}m~<!B3;w#*x5beRw?f`mZ~
zns$N^^5ytfkl^ph${ns_t!X(@9vlaltVAyucEhrl_k6IrS3$wel7LNq^!(IqIPP}v
zgrW6PoSmNy(o>yDPD!|krAzpk;LD4lk3Iv!f_*;1ZAsuti1$w~DR02DgVc{goo*x+
z8F8^@<6KdR07AB1UoJc>A@~~doH6XgeeF*5p%%1`g%kj_gs3oz0$veandgEv^is=$
zjd6FrIqN+`La2S}I7p=bz0VQ}Fe#R#o=oVEG}sE6mnv5K-LbPQ>}d@3xXAa{8OLMi
zWQg`M)wjYBTPus4@pU(3FereJiP!h@-9}FAh|BV5f8Mi07&0aWJt(xsu;8T!jb|CS
zqId$-)v7h>-sBW~Z!AdY4C;}oi_Q?B3JDpjckpt`u2<6Jojn+9C;)9*-#k{J2uY;$
z9FXO7PbTcC;KdWDD^1fe2+fy6op{JWX_OGF{Bnj5Q<%qL)))$rzJMm}Tq;XVE)!{z
zDii}~IE$MlnE`BI`b->3oEoNnjGh`^5wERZ^_O`N`+j>AV#pVT#VvB=m7&2`NKQ45
z@gD6&^GGTsdrQE+oNYa0c<spfxF`xZF<*1Vvg?lE;LqULmd)`G=h$YSDEF~cdoA~g
zlW&UWFvIF9Ij6G9b8)~IAYGcn@oVg;8#1F6gg#L}>2!`{Kl$NI_%)+XY|^WUHln4N
zWs|t`6j_fB3O{&G05l_$6!q~7Jh;g1)H%&GKgPDvjjTYS>H#_(l4fep$P{;rzk7e_
z9qnmP#OOns8_GH8+LI*$^kYKmmlJqV&<gfQ!g+<Q9rH1Q+o4t75ixf}EqcJ=OPHrF
z8zDxTtEeCs%Z0lZ@z#+~wz)^C#<X(Z6*rG<17|$OmKE?z&qsf8h2dYP17*?pk!qWj
z$lpHnKx4pB9F~o~ZxFAUiR%Ajav_cI<jASBxaizzlOVTCfBhJkdqSl5?zn!y?J+=g
zNu_$6x_<0v$nqkfXnIH1bILBMGK||(@O8nw7!GBGF94l#LrgU;nQUrQ29M9JX%MZs
zuU;PY6Wj@Ytm?U|7?k71I{4$xGmaW}73CgZ?zcRNryYJDa>QTUsXW8dLLlTfYD&Y6
zH7L$@<#BoF<(*ie{pN-$62uNZ2vXW<E-J(hl{vd#92qlSKcaBxU(DRMJg}=C^C07$
zI&Dh0t9T+bo&3wrk+)1jCG%miMme8o(6<t*XD>qD?PeRgosow8B&8T&qVgQ+$qh>6
zm32T%pu>jIk5td2{m3K|ln)>5xG=s)m^~Aq7F~&O`X|IbDRUtOfM6|NnLWen#mo&p
zJe!EzP&z{O5mj|XPdJ94SKE};Z^;|qK~M{M9JrCyO96h*9MU@w4b22o`A3Om_>$X>
z<y6i|B&sJSIFJuYM<y?{%%PX5C<z3c(d-oO*A)n{9#(tiM1PvNPCUvL&hI~>P~`N<
z*>;Dh2{%gvqt*_;{(vK==j_4q@@V1p!=C<Nt&yHYIU2|H*DkSGu7PS>$Qm;sS;;S`
zYL_EV#NUKsy9E)*sLFrufEOJ3mQ5K#UR;9)-u|XUK5-Cv3F%LFtI$aO^?^XskqmZ*
zUoWSC=WrrV1?GISXKfKc$vc&t%TacLLl8627J+%?5TbuFaSX*XJSr=qKyLc>r=Axo
ztxjqIVX~qVSLbI6me!)0>Y`eDt7N1Hk_vj;PIAkX5zWHKDw#z;G4~t&&4W4@gkcCS
zkUuvpfat}W;Bgm^%rGFh=7!Oc@_i@7VUflGMaf|p@X8;2HL&zd$QSV$nsIG-P1A=I
zFpRXT4@3t2jC1FcSUBu&k|N5d`NjbUu?mB+3WsNp&V7pJPF-=5Y?~*Zf5X6%(1rdZ
zeV4!V(_Kb(e6o*ECXivwM;cW<`Y*iKvEz6_qJ_8dQsk|bpnn<}-S^E>Q`*q=DwSqC
zM`EC`jQf{NaH3xCQfcPaVtVXP@Q&5LYH{C|^ualJTF^{?lZpRs1J5itCXN?PVi5rU
z2A@>%89xa92MA(LpRzc&R}gae2^yU64j>c=PRIuQ@@Iv}R}}G$lsg8cmCVhbK>1#X
z&pdb*jV|#CM3JhJi4tjyQf5--0>o1VtBzRGu4F=+kkPLTPmed`#G4BEhn$(8buj-C
zh-Su~^KI|+LhWS%d(FY-<o{5t!>zq*M2&E`D2aSRzUK#0@jasF9s=EUhdqu+yJE*T
zCjj^_JbW6DhyIP6>HOOMp#mtyir8(kCmwM;Wg>k=U|c%|5H3eo)Q#8*CAHhv_=N2k
zLkNh>2(TlrRG6$rJyZ}KnJ{L>v5s}2OJW#wC@pk~&7q}4NJ!c2Yoey=gYfPyu<?W6
zF#6l{M*ENk+f_!#OI0Vy8R*kyuQyE{>`>QnTP)SYxU=u1StZ{HOCH`ocE<+eW$DPT
zty6}!NnDmn%Wfjk*BT}XM<`3Vd?RAJi4lKTu|hiaoKPJm9q~q!5knyKJgAyQ>_Nfu
zk!ZRr{ZvmqR~w}<_T#_!4MI9%jZM80D9ff(icKvINVOZsYs(f}UlA7JAt@;IOx}tH
zWS^k?$qlCe6H-A99dJwpY#clQD&ylTelc<qYd<p2zfTdt9t;ydPoiDZVU-o6XiU)%
z^k@LG=<qb*rF{Fr$={5ear(0!r&btO=B^^ZS30pC2bbjz3?hdW53cw9J11~o7l}j-
zqlf4AukYR)2XS9-zh2{q8TYTJ-j&P|{LGMr45Nk{_OJiG7sy?61&E%04-=7@@we$2
z+4GQ2>%F6H0%=0L9ficE=NRI0qL3^9joO4`9!bAj^n2ZymMzAPOxI$VVJ}Qpj8~7+
z*Aef1ha7mFIKRid0IIlfl^{+DRB;`ABMIa-=H(eh%oXdmUx=fOBBaAQ+0{cCe7XOj
zbgy%!#LAcOn-b0_Wc?bEFwLMq%N8>2g-aIJrk|c^OwjCjT9x<;JGJsTB*7@Q)x~Vw
zC@EUehmTOENN`t=fjiw7GQd*io<_A8&P;?-F&uHI2EpvC+eKjVd{kQ4i1-8gh#6$&
znHhpTmqtKVI35s0$T4#)qS#+Dz2V%KZp>{KPQegCy${<=qB#WSlxD_Wk;w16)0z!e
zHV*Pa2HIR*H&*#ptNbQ+l>Dg5dR~D8)+?k1L^ou!YMGg?8vh>ryWxiG6FFkl0d(d7
zNXmbH04Aam?hxpOVD-|*B;gx0a>_HcxCR}VQCotILxq;&KSuV(-Dhj>t}_(>Fo<W{
z`+*)I$oshu{LR&qrLm5|S+>?_IAO$x6cE*rDVrxM%iPQ`r^?h0q~iKHCHLc5Qz7U#
z&n<Lm?qifL^ttXJgFWe{A8(MqqzdAB@%f`>ay^4KJ<FeO{ox)_dgghB0}2ZR4z*}A
zRnG81hrY+iz>B|)SslZrKV9ME-|I@Bu67BiKe(?w@|7(kphhM=J?sg082!Vc%1`z}
z_RV}Pz4JZc^EK+V{=EJdgw#LTSC&}pcFr$vwbK_z=>L`>B<^Wq=ICnWVE-=zTARAO
z3!XT#pUI?wRc~~^F{O}gDu_gZh7=L<__y(B)Ts&QMAMUyZ>;H@w-m`5wHne?(Rg}#
z+|ET`v}$4{B2}ePW+;C&@def&>G(EzMpUku7g=Zi4R7*urwRT3?*s$dsp?eELhyGq
z6pqwVD<a5NdK!t!j^d;0bk*2G8t}59zEpkOX?*EnX;{P2nFeUXlA!2n=y_?$f%@u=
zV@zAi^dXw7t~FMf55LaMAlpZfWpW<MXZ}9oHniXSa|w<iFVj!8$`o+AhI`Jo(opa0
z9?|o+nei1CP1_l;(^fTW*JM4YnXkTp4G-ihbDqb*vnGj-h*L`jY;2Kr7v(Ci-qme{
z+owHCq-jXFm}fbksDaLGi}&}MvEisNl4;Ai@ZMF{a@tnsqRM@zXxQTa2&|~?F_2Dc
zr@JfKO`m+s@T!rviuLhkn*NkcFSf&Zh%TMKmuw%+crZ`VEQRGafUo(CJ07?lBi-Sq
zZrgS>CM1L!inDEv{Zl?&syes*N$SQJ2W+=J$nbu|6jSLIXDBM3nR?3b66L_!<_i}t
z(Vn-fIKuW#I;H1Hinh1t#v;XuBK%|RI+Q0tIZY=zz>#N*9WTLmrx{~DMe|n^-U}<=
z>I7%69suivK%I-Z5*v<|sphCZj3CE?&s%6M^QpusIVg|N!9>Z0YW$o@13eeczBNXG
zaKcPmtH`E@Eb!O(8U2LnEz40y55rkqsH-o;1($<qoQZ;!bz056Y<LLyeLR{4e-0g%
zHtfN+FO~X`@o0sWb1}=N>|7?_Ajy9HjK*P6wkYw{%fKbHA|HP@F5KWZ`uOR>-#M#v
zw50R9_6T>yE*}>H3Tjacr|FUaX?j?9eaB>jD;V$iw`-R8J+8{I?e$77<7Ox;JJ^Or
z?DBO!U@P1=W2Z6q0sq7uN-MRaSGK+0C{<H=E`LvihAAjPqMJ-HpJ8!b61IYv^lKqr
zc}oZB*2aU?5Q<tW<MWixB!jI`&GZu(kH@f1<q^HD&X_5fRmI;ZL-HTNeTRs?P0w@J
z9x(Az18kG-7{gkrWXzP6qAZKVI8M@`<Ug;ayF+C=cBAXY{KW#cY7$9;sV<UqFB~hS
zP@74gVq-E~%$%-!zV*v{dOSie#Zb~oUKe^I;G4d(K273w1Pl+%D3`?zAO^pbW?l#p
z5ZH>ww4qoZNa2aHb@c_<WC~nr0I3bU%z1aVN#~LroM>*)RS7bbH>6sbR~}>*pJ2=m
zQqtsA%Ln{ImC7Ki7S=OeISLl;00#^@oe>ewxQ6H4Rd&|h(%KL0eBUYPEB+w&k&(|L
zKWghB6kjRU(t7H;vi0)R&s&sU7k~N(_s5m|_An(({A(!9GIPNyX&mlxlaE*6eDEd?
zOz|5`c>4C_5qxHMPl0qn`8k%7Yp_gs%Q5zp5^y5@(Rbhd%=w=l*yuVE50x)RwdU7e
z^#3R0!S=7cC{OL*#_Ce}oNi<hyq|55U}V(G2r!C;GE_<!)I>#U;`eqx3~Vzl?2lwo
z3c^Opj)IVOaiXTiiOlngW&B9Uewvy;PG=q5tQiIbe1H|k_$+0PmBG?<>J38ZWex4a
z>*=)(w&CbW4tsO{?9?8k51(X<O&)_4wJ_QrIPlR6U4$#~V+pR%%Njs4_gJ-b$V>La
zkj{IgKNY|bjC4({XSvTd(d^*Bk?}i_4POe8nzC5GY>(x9ZZDRzGy0~P!xwMsn@Xo$
z1wV^cXSSuuCruW%?ZnIJUD|_fk!`zFT?sFb3D<?WPT@j1KgH`?kTNo^LzN=N<4N-s
z0ykqLXjSrH0v{i&eb~@4*1DvfWZ&DeoojQ}OG@b)YVJyMj{u|#usF2h;ofz}ZX#Y9
ztp`P<vG5>X7XK}b;Mq=UVvH@aKJt$KJFp3DGU*5DAcvdXSNKy+r$_)spE8e)oFP;$
zS;8VL-K>87=Xm}XQIo*IC|GEW?bn}4{NYf>j+Q_`A2(Mzhnz{aQXg1=)sOVYqk!6l
zNDf(2ab)^tu5wlfDGUGd(*E=^ZCj7F8%8c2tHHL!==Qp<UmM1io0|;ZY1pl^`Q}Kn
zsypt*<d;Yx8?ArUgLpx_&g`RGYES5vt(ntYlpz^iC{p!IF{?MHgo5_%!LPqUhv6Ov
zZnfCP_T9O1>~E08);S_6)utgbD+|5xuYpp;L{MQ}4@rDycH1fnM_s=fc?5@hjuHp{
z{J~9{Ut%wwJ4K7>nExSruhSXX(gi7@nGx^-@}J9;Z#W#|<R9YV*D_W7zp-lnC%m3a
z`u`;{|9?}MRrM9ogwgp`8#hU;A;eJnMdA5QN1jck<g$&2DX1X9ka`f7Hr*#Kr+IjF
z-|YmR!GX$SKQ>9q#p4ztwsW{WF1t?hf4n_xo-l$K>`RCKK)Dp}WJIQZFJj97cFfTc
z53zy>izg>DPQh6L(^lsu3~^w~aBV&J5~P;3D4Hz#%Y`9Dl$9h)Maj!Czx36~u+Igm
zG_S36xP8D`VQ;1)miM81rz|g7w2-;bF*U{F)*?sO$HKbQ6h)4t>ubh&3W5qm1}^=k
zdZb+;s27E8W=~3Iu6NokT2S}o6KgXV9B-=EB28-7FJ`Oab0LFWz2C5<!1y=IgO4P7
zn*1^D5&Vey>+Eg{KpO22K2u8O+VaKso>FwJ0xgWqg;;GA!sd>vUK}UhpwUdf>pZa|
z?wi^l{VXa!*v$UAdM(_?5%OvyUcAYu{y1%dmd8Wezkp8owqGzQlTN%^HG(jER(|L(
zqOMf?OwV}PsJF#6^Y>G}-fi#j;|!-keRuS`V5Da>#gIwR<tL7z(8tj!Si@(Xo4!!t
z*c|?+^g_=7U;TB}Yz5Cav{9`S%*x0RygNn=>l*u*btj@4RcCcLo>>FUW_RezG27Ti
z`m_Epiw4ChL43!Hw(&hCq0xg?$^v`sYR+%a|6Ei1L$>VtUqKSaFTgkBf48O_9K5Z}
z7-azN0AZ8=Ex*##?Nm1<ko^(iSw!fU$y)EjIH*DKIQ!7E`-HN`uvK}PIoj|mrOo*w
zxMEqF52a)%4g>BZTEEMcPv1k`OS*MaTI{o=vBnFW?_6G7ws<^kom~Zd0O1CB2>(7K
z;~>=VjHS*}+^C~+x*ZO<kvYn~^yd%ZuCV)-9B}lh=U=6s_^`hU4~NHV+y3bc`Od!e
zi#Z||RnV%lf_RR}Yux?fnqk<Jr)4gY%5yv!P;UIA`L))C>^+>zc_O9SGu`nq%z^f;
zjTI$-15~tq|HY1>dx$3u_?3blLM?La;dtuFXg_=!bxG<F*>vHgt|GO~00_KS-i89V
zQ0#MZiX47i%v3_`byS_|O|!}C<ogUY`obvV`xtZ})s}ljNkHH#^%(d9ChJ8I*=&>S
zyytvnFW0KyvxE(~!_0O`>6qx^RB`}&p8!hG-)MW(qPL+S99$>xnkDoTQ3A2mNj?is
zZ0PW|C6z=T@W9yQ@a7;`cbxX{%krbOtyF@&@b)y1Fc-`<P(}<f&&d~+SKMky>owME
zxId6`=eU-;#KV6Y+mw%jO>$VEE9F_yi`42aY`B=NX|o;H)BKOy6~i5NkrS|4NbiCG
z?v6j(N+`ebv!_t?oZ}wU^L%h3>wrxADaW~|?nR`zNc1yok<lbeatfyWn5F&aPKJ<3
zR`{4R?2TT5J1XEY)wH%sFaxR+A2&9x*=m~qAc`>2WG-jSj~D9_J<Fq-7n!?8Er&Gg
zHYj*svg7{2!e6uS8klJo^Ol>{#iF#`$?@BTlS1LCGmQit`yGR$4hKW_O-Y=1ZQe}}
z^K|XFW!_#D3oLi$VfJJIJFk4Sy}Ewdog$h79C})7a!#__s^JiUmzMxW&|xuT*a9gp
zHgr=d8mB7)v`7d2-z^hm`G(#+`_TGh?9)qM^avB9Ka(Uc%-XqZr2I|Zh>t#O?=X{-
z6E8Rd<W~3K6FOx4VRJVS;#G&gCwL{geS?~gxyNYN;mES9qLvS7El;E#nbO|Dy{)^U
zN+O;tf&Rr2S#Z-FEho>r7%EU$nI+S-H3v5Qdi>aIjnHw2>|{=Pg?%F|>8D8U;CO5_
z0CH>FJ|qR@RcXqkicWItM@Vg$#VNLaRCT!nP^&y&8`JInj_dye!!Lj3RWI~asdNY+
zAfo?WrMi6K^#K-UmH>NGTQg@yF*iFquYWRN!fvjXO3n`MR;Fgo|4#2{462|BBVR~^
z(L$plD=VX8FpHTfiHW1bhG6PYi#K(8IyK{PirTpkn_`aAynhQA4k5fZO@k5~_`D|V
z(R!5QAQlRgFOxz1k#&*(WYE7eIsNhac0dUtH{?n=0$2>j!<5r@Oc8q1--BV7CCbOu
zB~F}p_EIRMq~-e{qdM;N9du@pcx~>R*}|X*^G6JUQAaidbI~^BNlnYt-)$vYxiM!Q
z)%#s!b|iaz4R&|Skl{*9g~^G@DmMdF-AMNCmUQAA9(d%KcGJ81O{J=d-j?p_jojKK
z{twI$44LZC6I-PoL|j=hAXao!NTQ^g%ryYoRlw8cE;&ApzEhki3d06%OBHP9HF>~y
zcu`((MNjA0ohWRhFb2C)l*B}zH2H|2^$9P_CS3i;rMa&3iB5g1DC?_A&#>FIzyk}c
zAqJrGG#UNx&=F#R6-@0mIl0Bli7!2J@`+pYKySJf%zfV=Oi%t&@aDI^^H-K=J?7pG
zZVCwffrkQ<Bh22mtxPQe$v_TmmLNqRdhWQN2YS{VLEs!~<uts?Jl=!f1SFd~MoLl5
z(YUl?qEInKjC#7VVSx7g<Q(I9`euJ)A!Rz*Xn+0*d^1_^!$arii%8ytBV}rQSlggd
zPpD6_bGg#U*FWZV)t27lW^M7#(8Y`&S0h5XJqciM1<*rn9?qHxCUOK&xn4oy5gr_L
zpE6j6xNrICz7h>^cC80ibgA1PyasHwhWxovkFmW~6!YZAHQ>F6k&_IxRjvU5u#KO8
zg?*X=zwyRmwL=%s;VT2ld6GX65AhI<9igu-D7at=Jg=(d0u{i?pSK8t8$x%nU%C;p
z>P5$_CrIbfNGjy;-DOn3@hizh3kk`KRwEyX#cds;d3{vyN1ea|-fnopTOM6lHs>du
zxS9%5rWwp~LxoA*VHF}2j<2IRi#^S$!f7XYqKUaoZokd$Q<?L%9XJTi{u((E*_Rx%
z0pHgQ=hBy6K`DrUBfd!_u9q3{kcvI0+`A%iLeW%bi;{hl_FWSvfiOWKU&L;aYvLi4
z8Tm^lfE%?g(_92cuaVRlTSH}#BEM|9xod3)ai)E+30pL_^6j`jYvCc_KR=~$GC`N<
z&>$cRUt^?2{<{LT1UNHF0h|@xTpiuMn8<c^R<5pQ&Pon0u3zP8V&>xF@bCIe)AafW
z?!|91mEFV_C5#;j7A4-YtB-C8k~JU5EFRTF#iAr!#hRXf0cs^d5En~qm2)YN)y*jV
z#9_C=W+y6Z8i{tA-G0^=5cu)S|LfAnK>~A#F!JU7oYSG}YRBcW_o}Z#^W$mA3`F;O
z4!V8VZj?ifL@Oo>TET>4Xb)#SH*Ob&uX>*Vcc*Gk-_8^MO5-LU+O<=~S7X?Ry@xUQ
z!LvUX7#NQ9^Qu<uju;`K=D9IEVd5d-=dp?N5;w;8)oU2;kJ4~OtLN6}IkCn;%@=Kq
zf~D&h=)OS8(uow@J}RvK()}rv`(0WTUkpRl8#jdCiRFDb!h*Bn*XVs^`<uA=E7t@F
zzvGNp^ObM-G3ID2*{6onnU#^fAbv7K?B$i4b^kzT^KkpFjEviSCnEe7X%)2~<0Qgm
zqM{}2Ix>qBMjCGN)5(tIP$9<*;)jH-1q~oIcB@rNE{E0DIot;QylSL%kL(dop?g1_
z<}y*AF1wyKH^hCN|8$L8nPGFb&>}~P#B6V+DXcqQny<Q2F;Y88m1)T>j+xY9vrKG(
zikrl6GHN8`aU{{M%XozVKJl&*Bg`gMe-lNI4GjumoLzsZJ@}AvO3$MLil2<zup!ut
zw6H@K?hl4-x3&;0TbDkGQsX7$^=bFsdM?$Zz8S`EO08GF0&3AzXc65LcIU-RW>kpb
zY#CF%@z(N6S^BPY2UgtHEdg@qNtE;-j0Z7p6F7p|B#cWVC$!9tWIo~tsYaMoQjCJ$
zvT}@hH$%ozOIxf}yE`DiV%JSY7;;oG#w)$rtdhBZ5U<fMT4k}CX`)ki4i<cGQG}6Y
zpmW4)H)k1gg9)nQrT2nZaJKs*N!(5i+EY@B>Zh*|m^UTw3zoEW2dVLg-U=PcTdTuO
z$HY=mTeA8>^bL!Kvx>vk#6yjqq{rGEDiAQq?BJ;1fTv@p6JjjU(bM=(Yz#&ZrcSP5
zP*MWvy587lPXpOXO^|ur&V^LQUAP+dC3*Z~;dl`DXL#cGzhwEU@@Q^w-js|RH@iWB
zapuo@K$q!?dHT3b?9rr09&D7}vJ(s=mDZ2U`}}C_3Aoo9?ggI?Fw7yZp2`Dqz2hCw
zG0AZZJ)<fU&MYaaNc|@<CK?z5!9hhgAiQ8kXh4zCsBsAF{ReK3atW0y5S5X&@f|*O
zQPhHg63Gn++pjSnu>CX3gvge*DPY18kI&`pW7}uxEabrCEFxTT{HaVLd|Yx-^vAf$
znKDNI!VRo{&4I^S36X)I1~}zKR1p3fUrV_%6_Q+L`d;qBjSnzr+}taR*6&6VBVh5y
z0vOhXmDmAmZ-P5Yu&049dw4M9WJ{TU5Qdtu@JyScIR5+k$*kFaev|pU#yON&QC}~c
zd-m3oMBr>qd|h2ZD)3V))r->yLm%z${fKmu3sz;eJ4u>-Ao^~5Ay0YX9s8fyV9Zs4
z^UT(5^_uZ?N3;l}GSAG1F=m(_k0Oxnt`L}4K{-E6v>1TSvux$8nMSl&_ZoUFiAFd{
z<VV>s`K&xjgOBw|qqDerJWIW<(XC%TRqW<xuA|ZHdc%v5=kPccX)QX`cTk@6%gQuZ
zuPLvdqbP?Ibq=$mQCn}7K3<jWdsmw-7UlXbZ+T4`3`<|PMEM=*z2L_3QtLh&mA)9f
zgFM)7dNBLyOEW#?xVEZt0XdhUY(O-_6!{{SFB+!oB@Wmzn%v;n(P)U=j||zcX1sJ6
zZ|bjOk4vSowgH3(&GaO_u+HD+5YW|p*SpvHKq`F%PcBASS9qkuI0LcqL($fgG2nPP
z!CPyOo^(vZjUDh8$n{PVC#i0npFyha)rlo<c2Aqs96n>j@yUW~ehI~SvI`K*gE-XZ
zT4Ew*DrE$G?ZRp+h@YJv#{Wo1b`FA#Usn_Pk>Up~N5?WJ62QUkjzLCNbT8ix^!Tl_
z;`dVHSVpx~VlMzSIs9o)UN5EFHW=Y%bA8iL+y$eE&|jB<WJ@y)*Dg+ip{Nc=vH`aX
zW5$-#1R}s_JE|_fh|o}cTZ!_K^l0#EB*b$fWFZO#e|JauQB+U%uHY&bHx_1Ay7$KG
zbe(%^QFpX%;qf$@MneN7uHINxd_1V{1T$${0-l!--YFn}Komd1QgP24?ZR}8Lx-?N
zmvCR8s;YPOpugMvp_*8b;)&C<wb#D1*Su8uVXym-q)lluyGom)E5iD6?Mn|PbKH(Q
zfA^0pe^CV26gRR-T-2<5Nz4ViGzaeQs!QItRtG-=UNy+7BQLVYH*5{>2-LR(BHSkD
zqpxMP3hd%{k|x7nFQNIT={i$vvTS8rSyK?6%$u#*@+YmEkik=>+ow&yse2i4;=_#h
zXf9={7kn7@cS<pr3?wAwYK#w3=1ul2GtNEcN+|K*=88Diac$z5XI?L3YUBBWX1(q$
zu(nu*9&1R|kqyW3IrO8dP)!+c7Eb>r2*k-xUh{FR9(5@7DR$e{T&-H(BlLZimc7;I
zKHa{f8XQ(xKKgP;tJ@+8mldoqVQ?&(Anjt85tetrztTiF9V!P3g<Y6X?K^34jW6B8
z2LKI+w5@YS&2k6(Zz8c0TKF2Qa>dfUhn9_YxprjW4)6;K3F36zckS>yR8}c;Sg@C-
zPR;(DbcUeZAR_b_<ny98oK)0D2cX0V18tsnvrS7y#oo}%nP!&kRh$_`8c=O(qEl!T
zZWNW3m6X*0RgK(`A2D?_GCymyqOOgZuiqZ~Y;M<XoEw1uhvw*qBHxYq(j4_zARuD@
zo#rqFxB?jeIR&VgnK(F`DjHjxnYc>ZyEuORI5<lH{(1g;w&AFjjjNg_#)myi7H?Nq
z&k!l3EY7JCB{2ZL0W64(I1!mAbn%)=C^aW5XOcW6>ufqNbp8phZpFf;=GUvGda90H
z;qijyV6~cZ%~V;y^V#CBeujYV?vbl4-mI)cibN<I?ie4!x7^-2e_x9N`TF;#3l9+6
zp#lusAmv!xTn(;^BovGk10-WuU}Ox1axhd9-IzuxdZrYdPJ!jCSuXV;$SjRkWW>lR
z04IU!FhEU?P<40cfF(cirZkuZn~;zEwSwPM0-EP!uwoRL8;tQB%k`n{^k`w`I~#I>
zo*I21o0NFt1z|XVW#%hAm~n%FGe=uF0%v@yj>AY(IVM~Dl3nPlKQ;y17v%8MQ+$HW
z)K_+_gS?WyDKp<T*19S+KUoQzZ;h^q4O>wHtjcorcqzz;4|O4>4Yh}uez`8<iQW1b
z+?y&N{`)y!UBq;{7R?|-KH1aau2r2PT&X-M?pj4zH<Bd`3uXsSQl@!%N_XgZF0Z>z
zgcS@i|9oB8!Hrnq=IIhRH9yV@C**WcsQpOye$X&}tD_Qckns6AqE$I&oFo|FG02L&
zwyuPO)|NH*r=ZKGx(mxlood{TD!3~3>5kP1d`o=OwI+Vru61n0S*N%#X?aJv5AVYv
zppLa#R~Xi6p+sj9`3`=;%pI?_%)g$&dQps5i2XA1^tg4CjO?BDK|Uu-E@HYig60^X
zA0~OQL40Uv`CAt1WmX9bYAS<hLbz_5LD0vj7cw+1b;@73x(6r7OKp3U-0Pc6Z9CZg
zWtTU;b3JC}R`gt$28ZNe-U~0a81Yam2|8op%7rGob}md6#2J;Jas$1EGs`0UJv!;a
z(=~F2EPcaV=lp1kXyqsg3mG10K3mvuHVT0h2fbaLbNFFPyAF>UOg!FdSHDck4-~d|
z9Tv*?Ujs$Yn+52zR|pQXwR2`ELm5$ux>?a@4zbJV^A*DHDMSlxw(Ay$AVHxq9xRob
zz}sm0Vl=&8j-g#KHXHSas*gL*eyQ+#uWe!|dLy-KZHp#&DqjCpy20&h*oWd`Jcyw2
z4~3&K7*8<kDcYxeu8QVluG}Yqe0e?G-AI(P1_x^yNspE_Hss$yef^LVF-rGWT?kUF
z#vW=+WFaZ~a}2g|e@Ms=X8~iDa&>yyih}SLzKF#?^eHr2xre@}Xn3=stcSUqZ0D$L
zDOc0-u<r2~|LoTsG}fYt7jJbBinH=O_AINsJG5Cq{XUkbq&NX|z5boZ$$d~YYSN#?
zM!RtTUO}sSTnf#xx(p}nQ4FBUP&#*+6V6rjGloAsU<v*Og3WpF9ihJ36RAI~(J35A
z5@fa>K)Kcsb^P=k659R3-^=n*x$g&*9zvDmAHt%EAkWEjcn8kj{kl%d$voBeW&&kJ
zcne8MFnM)NBujPVO;{jJ9luY;yL_5s-F_W2GgCk@z)E0mDefP7d0r4sq1A*D2lTQM
z?8TgWuyEC74Vh{HB7E)M!*NiBuW!B;*d@+!Oi;q#u0y_1;2h@65~agXYW-x}CVY}g
z!~u|&i`R801#8ahA}mz;sb=3s=QcZy%TK4?Z_7Z*8f+iP;8=(UojIU<XwBNwkfB@e
zHEQJz#f+109Tn&q$>+nZ+tVk_o1m-mix1=q&zUaiC<l+)Es`OOX;c4TN}_1&kT3cT
zU*vD=ej^im(yh0W^NK5@C4XhXd2uos{|$H>TSFijo#(cmqS{lGk^rC9lV$HZo$m$8
z{TIT);LHMiVK4l-PMJ;C#KGIB&MzZ$x7U3yqehT(o28$-=AU8b>r^8=biS7wL4E4v
z+?r5wMF<c2B7RxVmrGF2>5OewlHyj`CG|gBVrvkG>t|LQKT`~I+vn-cQVO!^B4Z#7
zSQ_TKfqEwIdE_Odp@I!Y2-b}dLc|);&6%VLR87PH%5jLbQ7`=+pg|{>Ir*$RCpG9J
z>Ra(ofK<pzx<tEu%;`8`dv!0U&%#I__RQM2zG-^BYNp(VA!Bi&Gjy2dE^K`Ok|twu
z<+6P6^c-unF&3Pi_;iF)^PZblKcT<Mv95+!p*6HMZ4Lylo{83)!6oh}3CyroPf4X?
zjZYV^s3*qGZL#yJ^AlA%dgCK4!Qi4Y9qm9<#rEP0-s<?Sbprz<?%3_Fmo3h4^doa>
z8`1%3*^#Ig^)f*7RoO?*?GkxO-8S6ugh|5?GrabD@YC8WXH~}3tvFhG<90kMk2}hC
zXShOL1*u1iT{$U|<MLphviD!=$GPMKh}To{`kt_m+B?KOM#~o8u7nSsFxXZjLBAe6
zNyqQO_vWmZCzl%GHz1mb&WFX*1f}hZclS(Y-_|pfHo#NK_n0gUzG6=arrZhU+zCl{
zXj^SKmq^NtCGU_LqUlb#qBZ=0Pm^yS;GYO|LP5RI%bt#l%>~@CC?}sLww>ts#C4gp
zpM8<+?jx(wk)NVsKn7G_;L4Hs7A1-QVEnTs#Rh}_U;2w6%I97!F9?r>U6a_gJN#D%
zEX>Fd6g8zk3^HX5f4lmI^)Cs9F{1oYYa$wjk<EPYkxdl4^e%A*$Ggg;U}0G)5B8*G
z@7%}sxo#YjJx`5n1x6z+@4X^ZnIw5gPDL0u{ap|6;d_QY*!Mrcj6K0&;P-|1Nrn5#
zoN%anV%X}76HpXd60a!DXLlXW8%#g~#<hWHNC)72(E)o-aI*SJQUv3R(j+5e$zv;H
zDxp%XP0I;&ICRWh%Ma+>JCZn0`6MI_OX&*nd?d}OXw8X_ph@mDNLKAzq}pAQEzaX#
z8OCTqF(nDDuAnwBYo2jc;W{lLdAi%nhBQ9DPTHI#_l0jay6QTt!EqT%Y~k`fgE@>v
zM7%nzmH=rn6XFL@8tP)$vNfUO$h6V!Dg-I&!f+t{<3Opx?;6I>@Bi6gG3Vpx0RN(G
z>U@bEmj6-g$N{|npSI_}1kb-3p1oRc0r=_|pF3n1wiHXH@>bSG37{M1E$gOdqD>8{
z$sBCt#F?daDBtDW9XOWc%vZA3%dM2@G;Uz11cYvcA^w23L3CS1s`%3O{odCNxHG)I
zQ|9w9cW+6M7PUNzIN$1e+PdO?;_G{oGkCsgnFC!54p<=xQf1M(&gbdDdXn)h$n@&Z
zd=VnBz81#$J`_Q|MY;C@O_)f0K#D3rlQJK|lD=zV?AoyQ<^)p^xtqd*dz0uHz!V%o
z`kWf8cz}XaFla&gtoN-TL;7!t8&@3T@;);k!yXLI-|Na)*5~3_#=WWi2U-+9oI%a7
zwbc<``IUWQ7(*;Z3sNr1J!%}MCi?gaaz0Y`{EVdGRHo11P^<7i`F*<BxIT2ed5*D3
zb&{z38ay)>1n{yHw)81`RegJ;d}pvlq#Mfg084Y8y}2Z%Hj^{>x1M($3EBADk;?Su
zS~+7i!JlY|9jd-AL$@PtOOx}%&1m5DdGg%?QXc*qg=O5-Iw%+RFiYx7R%KkGq6zl5
zo6QHqTZGHu2$xv}%dw)_bumvy&E_M=r0~23NmlwioGPWsC`xa9(BNA;Q%?l4Uz@RR
zdE*knue@77<6%n`7xrNpN|k%Ld&_akCl70TK}DT*B}^O?oio$YjvS!iuJvS-0sR3%
zWOEFF`NbjuFdtPC6wzF7Pxcj~eU4Bj%baO!b*`#f7EQ_O$ymvlysUNg9Y=(jV_8N#
zwNiCy;kek)DiUxlkU~P4FSqnKa-Jd%7nfuwr8Pb}9Y0ehuq$$z;#gHN0Ksqi^}9+p
zs`s$fo;Bs6L*tBM_%=}-jOyEP5|gM7h3z)^b6S~}pEf6whMn2ubj?sY4x(gCY{mWj
z%zeC<$lrNjXFBo%!F4#V2wL|FQYXi(Yz?i_P~n<^{WCUv4rXn7e0fE@^fTlzUc(6U
z&~WG4@xh8Rie$B(cZ0M}mUMXgDc`gBI%E5&K_A!IR#i^-<{%Sx9h-F!yP<-Zfj^AF
zreBtjST|}P^&U42R#l(=)Dv%+PAfUxj`}RqaCLNAPD^*vKG-EpIM06IaO%AO`xuNR
zsDVk<ON%Ar_l4$gT^V`a6p3928WraRJ|nXWva7`~lquP&TK%<#-G{aET`zp1N2NxB
z(B=ya)u!OpJqd{4Lme2tP`}fz`5}IfBb@@__RPcGTIw%2;PY4DNwhn(jN>}Q>8C5H
z6|}m|PsgcIVd;R0h<vjCKDN3!=<t=h$cQly%n*V@M51@K^&#acGp2N#Wt(incJd%t
zXR%r7HP+z(`Rmj_I@NjX>Ljz4F*2{RezfR41CGV68ovnCIrZ(!eL<XDYO<?d2@&vX
zF!^e%<EyL0H#gO`)m1S))BsVFeyM<ZSQ-sUTN^!kWLw_h?S^Fc!Od^9J8W+0DVyX#
ztwF_WRTx3lYZrWqYM7fsMHzsCsU<=ZRw;YNl%fg(?K6Zw{T?T*f6VU%I~K__l|kBP
z{tu!(aR@Z7^4)}IG_#OwbOz=)X*WH*16QJu*2?00wSpr*K~~#qg5%1S$UmOsh25?I
zUMDxMztw1Z^17Gq>VpMNu|;>VYz9L^{m7ilEM%Bc2(a+68R~n!dGW#h!S$v6)X%&h
zMMks+Q^<Z@_+v_j8=i+TN%j|)eZ|?<IcUsQaS6KR5!z(^tw2=3nw$1v0q1lGol5J_
z)n4>OT|t6U#gf#WcnZljPU}?vRy5wmJcB7Y^<z3MVq?X*v-UHDXBAhf_{xDktV6zC
zmjxR5r{LBVz&<kUL9&H4q@<ml_)@PQu&hFTQ>4-*UOTHQj$xM;Q1#oMySv~kqtGd?
zx-HiJ!MV{KE!|xc6@&ER*A_@l&u6#3oFHx<A=6>y{9|bzKSK4ApmR&8VNNtfUSAV3
zN~b4i@FOJ8HRZD3c}zT1a-^w>Pu82sA^98b&+-ZQ8w}g>9{<N=0JYtU?^TQ<egB{X
z+~k4mMY?52zl+(+xK{>6yf|SsIZl9#Z%UZ9=8b-W&DmxET%dI=>XvK?&4QknqV@Wz
zHWI}v`|Gx}gN2f4^FaC<s}Z-FK5N_`nm>Q7y+^;j(?_2)atR<r?SYY4_*f7jv9F9)
z6zY*d8<A!!%Od}<?7>8Rw;yFu@nx^lmyK?yaAK{BPrgOeiAx$XvmCPD5~C1Tj^OXi
zlGrz7&Rc`?Eb%)fx!1Mq!KR}J2==-ifK0-ae20mRpD~(LqKkN?X1iCr(xDHpXexGd
zvvot1t!S#nt*Bnh%J}%@onC6ghnLeB>F*&*{4QVm#^Ws^-O?yezU<u$)L1k)TK}V4
zIxp=#FUzrY+}mVS&e*6d{3}3-anJqH!TAl3&*Eum0|XMB^NP|G(8L!qRy1~`+WDuL
zvTaJRh%%`ouoN~w5nM5HLHO1jBd!VsLCK-CLQ5C*n*%N_c0L{W<wGEbArnaXl{h@a
z>^V`II>b`8h}_>N<ZRz6lBJzcUd#8Hd>fn{kpPF$O%Y|y@Iu6yAaBZ6mgNg<M5U`j
z3j}_={W|U*iU@UrCU*3*76{ArkRK^5Dzv6}QCPcpiDExhe}lxMB-fL>EFK>G3I3_f
z=<HqCT%cjifYX&kB0mZ>X_E{t|AL)v%@f?Dl%Y>hY-Nj>QPzu3Em77-kN%L2ygZFx
z$<H*UM4$YiY!M859w!fAZyLr8uV{R9vdtI1n1^3MkQXkq?%{~|>)hGr)cd?HBuLhs
z6a>l5rbz6~j2S>ke-dS>GiS2EozP((H9}0zkWLbVSNUrv?m*3!a-#M}v`krtkz>QS
zi~m?Z0S69^Obg06`GyYtVA8HkXxLCv{NnevYhh-7e0(ciqYt_wTg#aOsd*X1rZJ7r
zZ}&cHq?uS&#V@vlzF*<sFldEj(5J$@d|jt%qjtIV<daOhK~R#lP-z}xl#&Tl1>};}
z`7M1<$=w&-GwMh7X(9!Vwujc_q>H*1a;2-heF3nZf<Ny;#k(BnYSNyRmkcoWq0{_0
zp2oc=;w;6?Ny7+Ojb~Q20w+ylwu>V$H-w5OKW=Cl^|1+ut$?@X>i>0U(LWAcw!>3K
z>#s{?T}TiRs{c_^DFLkheKQrOX6J&Yj_z;oe)qFIs8t12c|;_|)yfz2n@(|S8R8<C
z$)dt9r?E{}>GrYh&UK~|!{?ENdBNQb|3DmSR-S`XtgIWXmqF!%7Q7{)pO!2>bKG<J
zK65SxmmeRi34-9BP_*2KPQ)?HdZ|a-Yz#+6`y!AT;s!~pcQ|yWj_T)LO*K|Ml(+9}
z^<_<!j-7wM)!RF839ml%KHyEk{t*ppBLDq2d;^Zt^~VVr%4e#`Z9Sf=XxPx7_`V8S
zy9;*Ojt=)+39g33x-NOeh~5EZrax{ZRrO2Py>`s2qk4|~oV7I5=#ozfBxMK?E|b5H
zonF*eUF;wFww!~zcdU7b1{@MQDf~A4=HYeiH38ngkg_qh7vj=Yt-78%*<33({ac#l
zwYy5T16`N(1zZJR&y@{5KKN%pzMs%BDN|^s0b`mNdRU7L;lNjIXyh<uMY~3+dRghW
z?MByeMi&t<dollvVcB$}iCa`?F`d-p+o$ZyLZR2*!@TW9&{G<nO2K{)OThf2Dbc%%
zG*Hd>C-|?X^@367Z)mLMnO{JIc}ESsDp?J%nW%~|%|_Ebs&YE~@zkN^yDWyst;cpH
zwE`WjF*i8zRLgZ=ffl8)+M%;ghL$jHHLiv^G;=Q`WBgyKc2Z{w<4K@=n)nxkmb$N|
zCpSC}YJ{;+AzlQ+L9gE$guqgkGZ|JhNZ+vA4n!YecGyR&b4x#HlHO7w&H8}fs`=iq
zqnh8b$4EsW4Gwg6A&s*C;+Q&N9%K$ulT_^6l{)iu`+4=CgrV4F>jZxc8(2UYj6)hw
zo?<Lz6ORqKV5snljaXb#W_}kGZcf|~?FNFQ`b;R%nwl(~=5RCkaNRo&aARD>d+PY{
zAA23+h;dP$ISF2ZUuJl;k9bG7zVJIcY5l!UN*CLLLI9=IDY#nv9b0@nq0{HP#3hKn
zP8enTM7rrqVMM5-Z1cxqFL#Wa>={FqMb1wq;Qp<J`wd;%#N$yj)(?xi-^iA}oZDU%
zNKx2@=Yl@6CW-2vI{e&`nfT-`mann6#r}B4=$sumb!$hFRg>FDlj9nP);1zR&l>et
z?gCt;Upl`=&KUvA2`@uQH18AP;|a>dg$X;ylNC2sI{yZ<X+(*dBSMBb-j(K#l0q9I
zO_LI;<c33*<`(#09P|GS9cafIJ0kkBY8fJffUy6M^(tm&4sf$|{oifhf1QHrG<03?
zC2{zBf81JHNZM$(A8{o|F*~*DNNY!A$|lwly@-gJPPGS{;ep{?#A_bZnsc<YL|=SM
z7Lp45h8i3c%1dFy;18`%Bv~3QN%#!GcPE7rFrYq|dubK-WKOGg)5y=iZ5VKJ@4?*?
z@HuDr0lO=#$9e-uMWD-)D$E_T?{sz-2%%I?8g9nt<9>Fo{JB>J54J29TA9N)*oMJJ
zZ^$^Y9fPAc#Omz1GV;}*1)s}Wi*LryzZ!$PeNpH3of(OF?vBd1ZgxGG+`~(r<3#YC
z8lVN+7XJ-Lz*q}&Kbzm7c`m~5N*ku#ff)~1da~VY5%Dm9(%E6=L*m`AX3)jd;kiQ{
ze(=9od&lTV12tW=JM6e)Cmq|iZQH2WW+&;`w%M_5+jdg1ZJg}cv(BEGd)Jvccilg=
z>PP+izIySz58}CN&3o_H?G%5dV)K&3a2_SQx!snZatxh}h09*0bg1u0Zk6lRQx%_e
zCti9mpwhM&-*lJbC&0KY(9&`QlDGy6&A^|B>Cq-UQR6sFuG~vL4GVB_AEUhI2(K&K
zh<*i_cVTueu1h&BGvTiekVUVmfOkv9>u*!PQmsq17Q%oW?7)Z=W3_gwF<q5*FKSz2
z%(GBtK;5y$cdF}<@y>gc8aWjd8?WT%pBt~)^f+ZWW=e+o5z1l7s!&EK+cfhqJvPv6
zlH!k~^`A~n)Z0>VQZ@cL>JNl6{x#8EBPB*$mga<v+3K_N6PW?1avGcd=aj(-m-V=4
z*A1eRX8PV~J1Aly%9PCU-flx`wkrO#QFiT5iH@OH(%Gt_vSKelbFa+r(vIlvu&?`m
zlsnrJ*1EoQk`DUJYAv<0Ein!`qAnN=&EcGktz-aeDf!&Le88Or_cCSKDM8(H#XQPI
zdJ0;ZM2wf!#?M9FZ|)P}A#<!C+(a=MPV=q|9emIM`^nl&W+Bm!|04k3U}56*KIaI>
z-o){~>HhUSt|0K0zQ43>0YXVYUoW9U-vEJ^@No#X-E_y<Ybg&&P8}n;4`s8u8X=g*
z3qc5iR_lou9{b5J5Pw$&)K=pOagh2AgRkaF=`G^kWascHFi>wd%ui2)-LH=txTYKz
z*mP~F%RxidRddCW!DzS8YkSQF`bE5ggyIe~hlzJn&$;50%;dia7>spqtuUX_(|zDE
z!GYQKpk8rja)c}MZX)DH=JdmTkp?tscrUlR{3Q;o5c-~(2(lvao6}iRt!8%_Y<ZTH
z6eI{GA2Yh{Ei=F_&PBHR7kF^x><z|-xCZVCxj|;gUBlQ<XVK7<JpIzYE?nyh^PFr-
zP1s^JT@r&!`BHWPqb}nuKPfRy-xRYv%oiI|ENoA|wD~R#U+cp{nJcVU9c<&WC2C9j
zI|TT85j_Z|&q~xQ3tMSrF5DG{WpQ~-V?E8Ny2lR1Uzs<hx^%wZl5}m^Q=ZRB-MUr>
z8Hzm&Ly6|Z9H1VzO9VHU1oj7<alWD_(W4$IMoDNn!6-ad2hZGF<wvlh^riIVG^<<K
z_3;mVHC=wEM|pjFem#Cv%SoTeiiU9MNa}731eKd_O<<CgIc+tez@NB#M;;hlKN^Ck
za0G~tY*z1c@gMW~E&fbjIW$IFzDe74f`_IvOJRMPVT@qEZIP9@7;zIm<L)ZqH4?}A
zv0xB8oK_aCt_8#DNDO}rvqIEX>wWi*xJAjvS9AUR^Wd%d%DS*2=7bUtr{EP+{6I8u
z+ti+3<BFWEoa#XyRjs{(eR!dip-}5wfWK&l=^cLowi!xSD}n1<@|_g9&)AM%M^JoZ
zVLsnAM~c(ZnQ0~FmiC;~@Xowap`9h4V^d+XR*V5lM~HI;Z#}@tiF}eQ(nyylEja^M
zITY!ptswJq30CIV3$CcU0WB?>d72qG%k4jP#ML<h2%DN2782DrhD+-9+uN(`p;{(L
z7TXkwuAC$;yVa{$aE};CH*&@UIA`fwL?HuocMgu*5V%yFM?E74AF)X4Fd2^re({!B
zJo=GiUFOS$U^i3I7K=Tn4=7ALGQNv<w!Bf+LW)8xRdalwa2Af+D2)4{_26>U(XW+d
z!UI&J0w^E=%UaPx+I4W+jtomUw(MePI@JBZ3GKRbZMpuHmU1I4aT(#64MMvF)M>IS
zG-C=YRCd+}w!6DcrfTNKFl#w!?T+UuKTDbf>sAfplrZWjB=anztt6l!Ch>7RbwmN>
za)^UJ+O7TV<F994C5!DE9X93ntIHtMr^cv*ufbO~@-*A}utn__XNGjICI7Ng<xG2E
z0|))<4H^1TeA!>{ib<xAxp7sZ%!^$=iseH|+8%tZNyc3P<vM5K=N|e4pn{6QE;P#K
zCd>Myk@+bGbipDy>!yTIt0`+>Q?r?q%pO8_qT-37yAjSel-ZHO+AW8dWDVNR<eOiu
zothr2v)>V)J!N0PAS<)qJA-oG7Zpy|i2PYxZ&7#))?R!pgzU#N^`m1tN9!1adZL!R
zA*3o_Hv9Fz@Gfx!k-ouvg?}icesM4V`<DB4XZ=??W1%{XmzE0VXZ9K~TZhEfxv1$O
z1okv!zUT1oMr_N*D3v<2RU35(@g!57Uow(uwZS3C3iAYnR@K+5DULwIl=j*v12#Np
zSz&%~1%6$^lHUl*V8$O^@%3#hrBT`M`&qA_lh2bJTd(VL-uN4QVERFdXtIN7$Xl_G
zs>oX`H52_V$W1BD3;oulHf*ts_Q9cCGFFT=ef^Kp>n6K4$dBsuc2&%e8pC3%{(+fj
z85s53y|gf-7fsuPHZbtq`aJ=cFt;&~x@WPbRWS%2^1pdV@oMRz5TtHHzDS@5JZ>n_
z61POiuiPbr(KIMufbzgoXdn`|K`shkgM1K~47bUyIm}%3KxFpF>*4HMZzN#vy*%*m
zQ(@>K3Qk2h$anC4HS!ef=r0qPn!R0pr(YRjj^yBTTZ?FBHWsL}5-z@KfI5BjU`<V~
zbUAAq^jSqa7iu%hOO#%rPJXS^zy9Iu^nBBc1(2ht+PAA{&!8Sq_<y4&ACh2!p#^U=
z6*A{Kf-DZ9Eih?!t98u_yG0KM4y+^Y6OLMFtl)pR12b;Z7zHO3hA*u&me*%i>aUoO
z%@l{VmJ1AAO`Aj<8;BVYYYK&ci6}KK9UT=swtSz%ix{CIaqFa2r5-iIh?nqGDBy%O
zcBlN8vvzq*&CH`&v;5pB5w!56gn|Gr+>!lQpTJUn?inTYGnk;H;L{{AT4Q8r(tsH^
zIUTkw=i6nc$`cqi^5$d_!wr8_%2JR+a5~sUk${@4*2coaK%tQ!3lGr&lO~}ca#wOJ
z?GDY{epYrith39*<rMAfW&zdyn}AeoOVjwS93o8E%r5%n;uYz)Fc<g%&sTq_Ne&m3
z5z*KNk)9B_c+*zFdxMGVSCb^F`L&_yFEXt1A1dJ%Y5O4WUznkb;DqmaHHiD8?lbhb
zN6<rYp(=Czo**dgG&Sn%xhl|ekf@C-%9gbU78qGWCdwgFB%UpTsgYmQdHDDaf#}$=
z5NYh1R8upFAmkonZh~3iKLV=cqZ`tL#tyolC+4lF>zNrvYsW3vVWybq%n_vaz#Vl3
z(`>I2q+p)MJB2&TH@<>cmNADzwncd{9ro<GhFb#UE>nX1<T`}|y+l|x5+nhS*>cTY
zy*$u9_1>o@&Rx(@c{d;7-X?u^*Qn(E4t40(>y$nzHoC9$jrHr_{kS8-#EX5foYM<P
zQbC@*P=URm-hR#WsJKTd_>qwD98?qG2uF9d6?@S``8)d50FoI-Kw;GWwXj2q3GuHI
zK-4{o#W^KJ8}Pa_qNrZimRmKsz{444nZeaPrWW5fWxX6BH=4yjmmTFeQF>x{$yY$R
zoNC|h&qc&tkbBJCgF9l6yY8J)AUfSH+iW}W#Mo>*;Y8VNJL!aNq5DQSwtYN$S0A=+
zd+65K`d+ofxgU+qX8^iyzZU7{!!GVy=*-?g_BA@)N0=+7SAKsbx^G`w?Zj5$b1?Iz
z#I2n9rW%H8H^n(<4RM2+E++IN36XuP-B}L<5I;n?jYPUV-0*Nnt)Db3l+@$;J8!~)
z9&}b{>~g=Ao#!>;gOY_RuIIEd5-Ay;(>oR`KvwXk{0NGy4rrpXkdN~IcIJ}F%`mup
zYRih^97;Qesn30~iSu9uk26Xen&UZk-oc6O<T{C%BoQj~$^^mfg!x6{TARVwz+EJx
z59PL?p{iHL*R(JzDGiXU!dbP^0dg7Zj#TF|gSKHgI7r<#6A_W(#rN5i-5=cO`&zo0
zC`vh#ibR;_wOc6v?N@r?k~FZm80!8M*Qxl3=*x<m*ox+$O7G{mSa^}7Jl(8W+05`-
zB&yWc(_<f;`b&}Dau-u+l{QVB*aFY(wwxJ(fDY77H%<OPV5ncVC+<p>J8;#;^ePbp
zMJ|s`@Y5&&tQ{$dPu0xG6uh%~&>p+9wx-^BVS7^^gR`~<-W6Nl0mLD#?gVEmSX(3S
z%voQYX3tq$1DuM#_+%b6k-PzS0Ba}oV<zi6q*=hqQJqA#Q9hs8i#9gy-hlAP^+;nn
zk8{j!|I}Mrro5|pQnlCd>7-FjL<L*lZ#?QlF1=At(^^e2n*eVuOT56j*is21I(e;=
zWeBzF(7&sS#oxlaNpeEEQRHQ}(YA?(w;+nsZKXK1!SKFkDNh7k*$`_|l<><p<p>Hy
z`iFr{ASqLuHRd1*6#t+^Wfqf9%_&NV&C7tVSW`(EZ%m!{I$7z3(fE!2H;}v-I5=Yf
zJg$~-nrCpDw|A1q#dQL~bwt8tN{3Gs3LY;e(mV=b<;=;1Kg!#g%G(iQeqzLGPY$}s
zPgSc1uvp)dTzFB4EzX9iO-G%Y55Uy}Ni(8p_RzSZN0#Gw6k6mAh+d8Aow!c}q~-8u
zz|S~9NGcH&^a*zkvX5O30iQ<%S=kzoFMkWmHPw~0m_lwec>_wsarX{=);<C;re9%N
zW5kc|RgD)NCwDvwGuj&+-b{hJqiZI)Oe+b)0}5-Zgy8DMOAxGMEU|KumSF7!O+vf@
z4h(y@A=*&S+!V<Iyi;Q=F;i_0;Crjt32pdWEDpy&&m!%or-w0~=u6d>J^Rxb?c*nx
z{X&Ze!mcDaEF&<jepM_ZaIVlya~y(Yku%9YvSnh{ISjI8nKM}+vOjKr6f-5bX9$mZ
z1iJRHp7jAsh@50}ldfCGy==A-Iw|?MlS;FfEF4>|9hhtJjcFRHX`m%csaF)VBRVP8
z6-S-^Pq9fSwPws})QkDTqSek%jS5+Tjvwmsmk+U*<g{6rj)31Pv3ds9eskhb%ajt$
z*zv7i2eQ(Yk7h?`NB0VM`K~({fiEE0u3+dd&@Mab1KlSm%7xYF3-=<Oy@=XpRI7nl
z;CeMap&=xwaR5}3zn_ft-k&s@cZATo38EF-hP%7WQOMeY+Hr#@J%#;+<~VfC5zW+*
zoTUs6mTX1N=lyrHnC4!kjFHHa0Hgx@UD=HKVuv@hltCAE97v>B%pwYmQrVzhh@uVW
zCy+)1xQoGLhCfw?_?@y@W7p7~flZjDmqr~oh5?iVqRwEIN@BYnEZ}S<@pkbyM)4=T
zrq1v-ssgh-%cX?PA_MkiSl!kqhU=BILXtXuT=R*|N;=`V7DN{cTJ}||ey{U31q=Dh
zf3tdXJifQhr#I_)hinA(f(C&K4XU3pP}VG^{0@%HzY1Z%jK)-|nKJADO84S6aN3ms
zYI9qc=k~{CyvobEug_bdu9x}+nzO`8tW0$u+9Tr3Z#$iMB4*4ReWyJr3Fmsf7SZ-6
zyx@yhZ3Y`iFhr7?p-hlpWO;%<-_7j@ZH^e0Y9V^UI#=dDo*HBef&P2zO|6~m*!2CZ
zn=N#G-WMBYg>r8Wnr$|#jCdllp>H67T!|Y@XXJKh`e;qqy)}m>ATja16mg|0?+i1v
z3NQA_LNX)PaU&$-RCG_9kMQsXk)sMT_7-bZN^)OQ`sugLgYl;%uV6)}<#Rp_tn=Ng
zT_<Ei_Wh=jA*in;Wol^T8<f-K5DH2pqwy~1ds`OYl~UJCKiu2jNBYTSxeZ*%gQG~Y
z=EEyZs^m;7XS@Q2y=;Yh_c}_;pvIh{S{JiAUI{9|yz1mK?2-!#pnB4p$oX$i>6P@k
z{KT~C$O|{r2RH89oV%#_yAu#x^#WZXB3WHSPn1M=V8$&H@O)Zw;D&oC$8?P*SLf=a
zFSF+r04>Qb@m^IN%69ysCx>n`n#CGOKcEVZCpDvO<6yH07@pu>Dd?rD)cMU)`Q{7y
zuT%a%aA!kz3EcayVcPji&idb<^8W>385mmqd+@nQ%!FhgKT6<~wV~0ep7l(omIQxB
zpp?i@0az3;nAXlr*yDs;r8o(6-ZU8e?qDR_Fc_BbXRa-tL-@zr(|6ErR8lBWs3_zJ
zC<Swn9pZ_)<2bf*oGQtCW<4z2N+|E#ul_c8j9}~UY+-Sl1aI1?ZEr>?#D0mg?gVN(
zfU;oSH^+RrdlvX}=|N^Uk9iELFV&XP8m}DL{Kjo+XT?kpmD}~8wgH*eONCOmP8Xx0
z8heEq(xp>5Fyo{unEque!R~>}59V@d)Z<=rWm@wEOi)ghI6vIz`IpNcVZH$2No5HS
z>>f}5rs~#^pV^;28jhF$5FP&yFc5AWYg7CS3~WUC_D$@+-AtSu3~U@tzXW0C)^=9^
zMg^6y`O=?=+B#VOgB5D<zg1xk>gHP5OQ=3oe=vU<arK0zw?={fMTd&nUk;5Y+?_{c
zAC4vXZV1Wsmnr1?V3O(H%#XF>IMd@a9+|WY&-9A$cvAh|+xq8qzIi(D)lPsYHHgL!
zGrptAj>D|`j>C1&ZT!!VJKJwj`nrA3KTxv5enAVF8qo|JY6q-DfTsMsDF~v_P)B42
z`j69!#t7*`XhD$q^N@#J75>mh(b~=>9-pIWj-aabRB_2p+9|{8EZRApc!J0Z)kp9A
zyQ2X&1vN(kHzhV5Bt`h(FW-sU({%lB0uubr;+<D4UFGG~Y{qs1rqgDi`IO4^T%&dC
z-wXxnG?74>0l~=|lswF#Olb^oA}$lsq6qsT3gR2l9+n{~4W`&e7!q4`_IT(>o+NAc
zA(SGjz;bhVjXoxaNn^u|tv`oYF7~}n$U;R7viVWxscP-Ut-3XN05?-Z^+7Rhz-nHU
z@mal~&?qJwY9oMp4TpSxNZPd@V=Q(y&AwLjqJBrSmKGViogzH8=%m47AExC`&ViVB
zul+4Z4p{eggVoPJy)%*ERnv;&m~$pwsLGlI%GBg{RuHIxozA$T+2<OY3&scj4A#Vw
zS&32^jL_d3lX!N>aaMk_7D3FUR+H|VQL<2-m2;<+1qK3JisKr{)DK`i;}}|#cv7`x
zI79;{`W6eU-AIx-X>E9)IM1PlX{f{fchd5tb~??h@>73yZbwkmFHv{IPhxOj7u7EQ
z>D^#!tS7D^bf)MavGk0qMJO)=qimOn7nBLp-@@89<Osdf-%2dkxS}39&|PC}pG9%l
zQ0!qEO0KgqSu~|Xvi>}KA3N6w943SrtM9p2*pN_8o+Oj84muRi!Y>V73{hxzTfZIm
z>e2G&*sGo7v={|P-!n{P({$Mp&I;jCYEqU%l@)XvY~T)M+22cLWR$A3{~`6rGB?pR
z@TD?}>kUvr)Hq!%nHDVx&WuZEkyBT6Z!6VxfE)<veNUG}#iawH;@Y>D0$JKCci?dO
zy3?UwPd!bYobsgh+E6dl^SPbc1M#6R*EoIt;KA`NUL(IL4^0_AwZL|D(*^FRVfj=5
z*`6wbwvL~8HfOGBzmhhyq8_jP++M_ReS5Q=-@t9D&Pw7Ap|7z);+=2YhaL#@_Ll5q
zhJv5O$3ImJ<j#>4*xNrq)It_CkdAe6@qx&=OerR^$IVe3fju%^;ht0;uu%6(fxvXt
zGCQ?QPLNkW>H5vEeIs7;QI?)?meyS)guOi$se(7Mihc{23;H_EGUY(WIvH7}%v(#p
zWJ61lb3l2Xw&fZng_kUO&nZV*6ir(w<PdF&hJo$yOYB7z+oXD?eOMKN`=3*u@<KB9
zAGNMl-NQ;BNoH2};bTFG)E|zBcGib+OX!wl+wx|VX#yAI?b>aTiB0uc03VjZL1ewt
z9XK)YOwcm#8x@q|axVskaRwk5R7+&f{VR^(evhll#F)wKSaZCJV;3h~ItGJCu!)4S
ziDu1K9sHQ5n)qCN`e<Q83Yk>`YiZPK?HM$8{9&@DSywnVMIR+p?1hXfOuawgZ{m?I
zQxBS38S0r}9&N&X)R82q1bRiY9o4|1hsBHbl6_&a>B=J<92|TmJ_Zg)HlBENK{Q|%
z@aloeX`9hPl{h`194YF7`dZ6~nd*=^=Hp6)xQ>RibYd#K^ZYqbqrz=&iKDj=Rn1~o
zeZ%6Gd(q{$WLe!9Oyw~Z^5$%f$#H6)b<*;gEyrE#W_^wtH6WXh3;7eg`&D<9X%dS)
z8u)7nOfC5*DY)6?(HpD<MD7!H@4kEA1&0;X4imEv$hi<>pj`-%HSF6X|Byocj_&$^
zidYU<uszrnz%^dzxh{VX>bdUqiexnG*+#MdjiXW|eQ{=mC7hH2=^;|sE7pdY@EF?+
zW*H66tl;qpf_XjKXwT=jqj&6$iDJ|%EuXk1bcdfn1EiLhn&Yh*-uV+otVX>tC&vt+
zn&rs~hIg$BSfq3-pI_JP{%g}2x59C`XEmdT#sa=6XeH)6qvvo}Fr~-(+A`Js4D0=e
z-ciQDMVA`&n>yaWPu@%4Dd2-c!Uh=fol0>P+uR)2^M;%)!t9pN5m6!fJG-CQ)z89L
zh6v_Dxm^nGvwVk$r=#^tV6-*J)FXjIAbD4E2AR1@$}0j9(dYMDK(W^!2NN7fuW`Z8
z{+OVi2vM%+Bqn2YU!er}*AiS(8F?<QXr6vx)Hn#1|2FrkxMg-<rUC4a1WhjpJ?;gS
ztaES3CWM5U5Ev}faQ8Fp{gKNq(W>+m=0_x==Le*G2>g8b1CS#rWIQA2OjES%oe$(a
zuM8*_$;rBiPaGcK*`b2$!>uUX2;V=Ou8f^S<fw$UzlBrQzazyd0LH|bXL>P5DNoF4
zrNl+g?@mRXxF<EL$(RIe*m^%>*B9wa6=Erkp6HI!E~h}^1n_u!w&Fj|s3M@u_O0;C
zm<FF{`}**)(m!GTHSzqTSrakxilKminRk3aiT_9W_P;4L{&TbbXLlB|wQ(|WbCURK
z-~YVh6{<=(qNt#JT(onhV*UX8@kb01WS7{lUU2>g0T>b?XguuCA9Sd;rkoOe1)gV+
z#3fvh7v&CrX_uR)DgDBfHOk*%lB7Pt3U^&Exo>k|O=RvnyydcQGcVg)51V{_o>9B)
zy<+=i_u#Z83H`mOIPF%t6Nz^d`;?*8>}7C^m71flPu!RSwVErE5a7Iww}Hf~^c7AQ
z1y@i7oCgaIV&+pA<QU{w)v)8xR@XFBNrmQPx7JH7Nw~;A(ZN_FTchyB$x@OEr^7xs
zy&Ei#Ppdjn#h>=$aG#jRHxeHUF(F(=##emo6my(MvmA|odL(s+sze7nRI&j^YpKVz
z4%(?(ZC12nOr|J<z{kcp_vup<hslDWP^g;WTX1!UM0pq_Y!<U%cras_?vaHWjUf9#
zW~8T>C&V@gDeCvFhSFfsO{p(SGbj{hvm9X}R2U7j%6ir5ZJDy>s4`X_TTqODNnK=U
z?AbtMIlXC&F`WlGjI<k$<n70b>C-@HNDdp;*@8#%|7fA9;}i?i8InYj+Q*7jDqGMa
z0zfxAPnNdXOx=}NOig3$SgI5*K%rYaNjs@_=L{+6l)~n+sZ;k=Vcr|Fn&$gUq11<D
zK6lrFo~|3}yGKz&IjPo}wwd}IRI2>AG2mZaC9h2X$|}Ml1$FG>Uog{=_Q-}d-r4*^
zeVT^(D|fGDz+XjG*c<c~#5}rVX1mFfh3{N;?$p<LjAc0!KhfEt_h6u1%t}musI?h{
zB8h@eiz4B>CUNolG?hBA(V{krC3CGxf7T5&c%>%tfgR}XAaO5_^*TdhXGT;g3H$|*
z*_=Me16!cb>;D~ECt>ah1@AU72thEYdFpvnT?H(vT{dgh?Y9W1lsUID4sYKfPQ@q7
zn%By4dqM$|noMJNHw1xN4iwH?6|<_YawMnmia4WPho-&yrB&yq1BecsMLx;#YrGBp
zMUZTbgvsq_?jhAw6aK7}c640#q-c0ltmoV}Ydu?7FwJ^{MFsiPaz(3}Q)VvJb`y)c
zh3zRRUqv!M+h{Fj=Q{&WnJ$WsdD&71tfmUZ8MPED3|3LIHBKgexlz@*@KM9q{5Xx%
z+(We1CxMNTsWVz9!my3EbH8p~pJ1Zpp>Xf1+wa~N2Lqc2-Y_OOXqC1%PN_-$uUVVL
zKRc#=wmhTE424{?POkMEVepy%{#vH`o4+EX60(6loXj&q60IP9x_93-f#ftOZzLCp
z7onTH{*(nJqLLQJ3_ieU%Rf<Js<;FSBwKo7YH=4H?4(OgS1t?T5-qc#j~@}s(ax<<
zG+s@ihV3UgAPy0=K#USo!yM>1<zHiFh))Knvm>Mh#;6k8xE?<(%`nQ$Bg%z@ws~Mx
zBZJeK77}hHiza7G-%wLMvQ1Q?<YM$m@V6Y*60sD7U$Sx<s_^C*;}Df)Df~i-#>8Q~
z0(9C}h_7(c$)b71u&o=pJ)Qyix^2gh&D{qr!%1Td8ps&E#Ii(H8qNx~zvY7M!dxj=
zvAPgNUDD1(-LeJu=y2%m4!~b;slgd|G4@#ozfsL++eJI#mn&|qHbuB}e#`HBeeeku
znwlQ0Tz4T{#02)8Q^v>!)e`xi>WL2IC;Kv!yT-#m1=oNt&3{UeUT|t1L3n<WVgu6t
zc2)dEnBMYfeE5ls)2Avs2+3X*fKd*~5JDYrsK=<Oy*C#EA6)p1trh4dFkaV+f+!lF
zzxjuDYp?4+7u~#VYUIr?dQ;n%L{8=Zj}3|VU#qBu)4y|}8dNtNl|)cJt$97t*VYhV
z;C@Jk=M+HMh3^8S8iXlpaxt~acjMR6NjmHtFP-a}x+B7X0k$9CKM=UPBW5vE5wpB+
z5FF;~((5IpAQ|&{ji**NZm(Z69a%>%ecxZOf3wNgF$A$WVhm`pZVqyJxG(j3qM2I5
z>QDB9do$bac*Ees8ukqSq+K=EYqT5b%?pgiz@dw!>eWJQrzIAu!5Rq~GrF+;;To`%
zPwtY=os^g5)=SbSzPQ#{2iv03h3Jqd-ecEfJJjM${XYCI{SsCMnTNw-MBBjHHa5Lg
zFaQjLROqXSccM#fv4G6VBx443^f@YX4`K@ZkdUroFdkLN#E4LRm!~G1a3-iaxq}5R
z3<G<xk~=qD;h8M@Y#Cr(>(o$CC;$G*joJU*r0HMD=ML{e;=bHUTX4z-?56+w$372@
zXl34jr7^yZ@n=s$0r&Gp%0>TLbLVCD{pZ|w9ZAc8rlbMuff!n8Ujd$2!HVnOeltOc
zqoF5oog>bTlIOG5a{Z&xg1f8>ya;Z~c2v1YiljA2_qy>rDgzzqYHn6P(h-X^y7+OS
zWCrH~k3V*>%qGl~Fbe*>{c?9Dm|wL0As4$aIv0E%FZ}GMMUbN{;tBn-F43TtFYIAB
zv*gWHYkNC&LJWVCVox6hibl&-vi&C@nxW55KbugofObBSSlLJEO#4J!3U(+<#~4y2
z!-)J&DriTUZM)cjDikQL*sWPUCiva68;(ZIA`DW%Not-YjS7BpyFAb&Xw~;k8DsE)
zHFEQ2PG_y}PJ=FJvk9-3#6Gx)Be&9|<CjG&xAqZcOc^#~tI1|$R>@3|{JyXxVrkTz
zaMg8boX;|S63l1ox0Jl9dj|S;{_KP-j5x?+0O1!AMB>0c<gTy4Tb5B|me$4(RjTXP
z{cjiZcJb365;;+@U=GbuM1yncK=P>XqUp>t#p15wjdG+eH@IZys2MpbfY$&*i%-RC
zKmi^hkLw+oG1KE}oAitBJt9P9VxvCKg+&4$25GkjNCKPUWwg=sS%X!dcO*Xun>qf^
z1k#N`usy1qn1ptbq_7;~Ako-7c*R)RT@_FovCU#w<qA~TSjFHyW^Z_ccQVoCt-XHg
zo1#MH8oM-Da~rNRpR?+8tJ@JCKW|?>(Q_ntOj1^Zb<-F&GQGJ8w*GCc))pzi7G}Sa
zG4O1A{Bb+utjC<FVLtG{un6>=+dch&yLY<59stu9E#v95_}%d~V5D|K5ZT_!Z#eT*
z*iPc4&FY#!^y1R~qjrd6%`g5!y8HC*P<ZUt?JM7^_&35e(wXmnv0&><!F0*^atieS
zn&kd(r#LkS^Dmo^|K}8^r1cNmwU45v!Uhn3l=gsM5B}5+N&<+~G4sq<Ifh<W2Fvg!
z!?kN?lZ0{~#QC9;d7-wQPx%pmq{jLmHK7Pxjt3dH-nZu*0H61d2heU<G{qyqo^D}i
zM3gvsJ2|a*4+c1C%w}8t)oxg!Dmw0wbDEv!S!dluRm^3_pm33;YgE^Hv?fr*PkP2r
zmzVb`lc1D+PA*uW$x>Nb`(TBR!NpD3>7q0)15^`HncooV3=JbOo|ITANst+9lgL$`
z79uQ<$#F=ezOR-W-E*T%R%fkcEUc{Ik+$pLeB~WxR-+Cnf>}-TZle|pReky>Fsh4)
zTj^rS>b8L_@k%@xDY#S^Ptia*kTR&ZZbiFz7Ow02s0ml!5udbrZn)%=EQ|&hF#~)P
zW~K3%=iBe}Sr)dY$3KOvpeiuRKf+r542Lr^yJIxD>V;+WcCb@S#6A7hWK)7=nUXUg
z?1edDK><N<<A}E~Y**AZw9YL|zKJ%9pmRq;e0}O<B;xt#$+Ia#0nB*2h<)*&7Pu1t
z@{I%82HGrxMxjww)}R^RV+66vE``!r)4C(rJ~GNM*2i}^wTd=BC3A_;;B!+9Zu3FP
z5p#0~Nmpwf=r%#7OmjI)C)mC_^|qr_(p>yg6>WYjW~sY!qNAedQ7M-UuXz+ry18S_
zJmc>j&qUR4uELu*1TWw<as(E?7i~tr-Gt=+#*_XfRD8z(5rC1Gc@S^Ib--l4RO9x+
zWQd<m96HUh020O4vt-u+ubX!kGC7I9N?ag#iDPge6f|zHnkk^gcC>Uiz-AwAm}Xfx
znK&;9t%t}b)5PDPX_YI*dK{-bf=JjV;}Y?p@G@w3h_l;ozH36L%Mt8`<9gZ;6&6V-
zRjeH{_4!}C;sC5y=%BFQz9kWS`zHSXp_~5G@aCU@<3Ill|ISC$fO1n_K>e^yav#|X
zBmpt_hL}JCLvS|mM+7O35d%q@5UVUGIaQ32I5oA?y)Um~F|f5sS-8KjQ8S{UF{}Yf
zylmcPk+yMh@uH$)qio~cwD7aT)|63#bcFZ^{^O?W*2)FPC&y#^&T~_i2UfR3&1j^>
zdg%@;8{^gY=c^uVWrHqk{N8g1_FutV>+B(1EI4tO7Y==qeKyjA8xGFV+8b2CugqTR
zlaFINL^fV12zxsMUepL;S1~vqd*Q(wPwaRzS5R=#2fOi43D`{6DM3Sp;9tKjnZFUn
z<BcTS(DoJ;s@l77G{TFLxBx0nD!H@!O^xn2Vr97iF*6eS4oQ0I9G?7fI>}YMC?NHc
zG&>0gxXGHj(!vj~Sz2B^;5&B)*{%iQI}3N#Y?OCDaN5ZaFNb$XIK0#pz2y4FusZ3s
zHIaWnoH>Iuh&U6Jb*%ru?u4&vC#nt)n5vGfl=p1Oc5Jc6>=}liqwE@nUN)7PhF|ui
zI(5Nni?}JlJ)+z){6iJq1ZAue{8rT6-^xE-9_`YUs#-U*IFRYz8Z&jYYO4PI8E0+0
zmJ0;YFY=fY!6~?HluZTOMQ9#g?0hw5!FqX=uuSon5t~RQ#+1Mjlpc9A(KIa$z8krp
zIVjB*c9d3^!>?nk!<w-ljmc~)s=t<XIh+8v;0LVWs{ui{4dnT!)^=WLVJ2>QMoIvc
zVE@LJp-SmwDnsl>iA0<x&0L-De4s@4rmy}Fcv|`kPxDNTvDMvaa|}ZK2wJAg)zTzd
z`pyd1ZWSYN13hF4^yM)&0a)(}*&JYX39&6q^Q82y-<oz10(&K_(uJ%GP?%jxk!Scz
z*4O0gh5$k5{N%%~j&X$Nh3-kHaBD?;slxe$wsvCT`Wl=?oTfG|Zo{2E(jy$*O1c;F
zg1n$Rr^NCqn<cBapeii*09-UXvIrB3;mvcf;%agzpl%rRSqx~Q^w=F#LWE(WlUx2V
zjD}C@x?~AUie+^Z?#qXRA+1RPO~Y}+2oFi3L;=hbCQ(&)9AY6GcI+fvqqR%$Iq??_
zcVpwb7N#02VH<MN2|I^-mCP%>rJ_8N#7$wuk3dG6JqwwU))_^fk(%11%;a9O{KDgJ
zh*&<uW2$}8%x~&_a$u!OB0kcDB$C=uM&OgUfPr{H=Iq;FK;qNn{<@&-+M!?ak&Q6}
zY|n^u2OB@HAx2sP)HGw&YFpC<6~+LI5PKpFO!tWm>RC!dqVqq>c`AtKu9@DnB|Suj
zo=fJgU~s$o%Q$^Vg7tuG<LtOy1D9ZvScRKM|4>1RyekY@y6FsM7wqu?={P^ax}d1>
z#Tw#wKPrLvZbE&0&TlZEi&yx#p95N0vd7YRx#6&}f5WL`WiXC<1KGeP#TBkoNcv8a
z5fQ)>Z@@Z8nRWI;v^9UNgVc%9CbbU74{Zj8Gv}TX>Pb)A6WJh<%QDq;U^_D?NXM;`
zp62mq<c3oiQW8{#o9D@+&jpI*kn4~$;W{~mVYa4FkW3%}P7E8hGf9Fu2$M<vF8=C$
zb98aFOCuV?x%Bw^WrZ+_c6eln=S_=tlqZgInkSraLuya0XEk>=Fz(5UGO7^nq(F)W
zBQ5SjC#AKst(!-Qb#yZpreelEWMi$<%U6+<);ulJqD~lG*s3lH7PfB~$iHyR<_;n?
zheG%a<dz!rIIYpb)$|8Li>_#gNr=kLLFgS2-_MY_aFfJ3)o3&4QYkf)zXX=V^VqpJ
zSp3Q7uAAdc5jj@KvI%dtI`gJKWt6M0sn|R+P0Z|Nf;0DAxsMfQ!Gm8A=Cz5;|9G>u
z;vgM*j`Q$Io??Vs39d&koYO}5_=zQ#ow%<Qs}W<P9X1ycerHWfM<JHo-KgvZsx?`Y
zPM0VhmP?hJ)<AsmN*Z>;3Z^;B;(@{}lE9s~|An|7#@O?qXH60W&*Y>{5jrc{JaOV8
zFbB%u3n+7z_U;WD?#OGJ2o?}c<5Jk}s-rV1hbH2!7m)AKqS8pFwn(f?%qLQWOIm39
zasQRXOO75#Zo#?IeS`?0Ce;xoHoUQ)z<^C<6UTtl;?dP*g^+cCZ+2b5D5wXdSf)Wp
z;}eo7kX2#JjbLl&tv25KVx!a##eelOGh9pn1z2Rc`w>6{evTY#lbTO9JXj`Hy-LMM
zgFxzou(vU<sHP^jrlB*pWkKuG`oX5;R#&mFIsrZ#5D=(bdcfy>Q^2{tZV>1)CJ4Mc
zkA31%WB8B?rQrqLzAq~{T*};xMu%zwH}96&DL1<diJiozo13X#H^}CB;J0<OY4WS+
zEwp9Qhwvu0#1$>jC4uX3g0ij2@xX9HXRAMF_C}h82@kq}->~<gKY<q6bH-(s@fFt8
zz<tfZ4mL7NUVFxK@=7o3+{n0hf^6D+IY=9Pfpe~4L04O{vVLHlhe?R=dPL}7Cx*m(
z57hm2<C1{r-WR3H8)uW;F9*Fo*FUP@@l_MX#fWZE?v5Xp&q_D8E8>s2ZlaL_o=ooL
zD^(VNyY*k%(c_rOtmG&?cjyD;u3NC8!jOVd_GCo(bb+pRo*+e7YceA}UhYF`pM&Tt
z&LMaXt#KGEh9A#;53#ca&*mJUjtYSm!+cnO-r&Atmh8{9#f)^V(@^NlL)s^dNw>%6
zzkUAD2iH~)mC>HCAKo4~NJ+U>5L0ky0KYG=+18Xhv)Z2o*9PP$P+0Afqq6p+93&@X
z35(J({$)KaAv{xSOY@(UQB2R4Pmea$E=kxML$Fmu<}kCDC3+wT_w^GZkt1wFrv=Pr
zP#j`7oXU1@^bWQ29%-Q^;t74^iFBmc!6HW+%tA@q9196}V>V5aa|KZhF(l$9cwCl8
z8>QhuGtGn_d+i+eq~4R!*X=`%Q=ayvPQkjd`@xQKNsXjWhjW-j&q*V2PKRPzYzC|_
z*s>Wy4>b(;aZ2x>^+1dRh9xoT6Yn-PA<DzV%8Bj^Gh`-ly-o_^TaPk$oThX|jq!PW
zvGeU?d^QXA4q}>6l1uof$PNpeBUGJcdXn=KJv|pJ+8l0kQ_^{wp&?d}R=vqX?AHgU
znft^z`6x^YZc!2eX7ns)zo(~li*tc8M_3@s5@FO1Q+r*qC!+~HBTk4?Enc^r?|hH!
zlW6K%jAaOi{Tq0$wXjoA<Ce#+FJ7eb_-N!qYQ~{qXN+=apz^WjBT3<bCp9I#Djqlk
zAi+C3dBWSol2pvmQf~FS)F&MGr9&jgqc}wa!8<Oh`#jC%g2-8CL@E1c)W-q>B2mbr
zg;_XcNlaSMN$&N$)V`+D{bI+VlxIWV2jB3xe-mZw-%1RAaJ2tMf6geRzw#la_+i85
z#}hkEOI(ePv|7ed1pq6<2%d;o0FTguAzB>7aCVGfr1Zo{EDH@w$B1}u_Fv@;@%zT&
zq6mo5Y?~H3Aww)Wa0P0p&)X<{+Gze9kXjB@3kh7ZE9{-Pm#(rgEKYC7YEbzlEQ@Zk
z7vGj>3y-9NB!J}(^qeZ~IA1qPK&E)*bQTE#W791hB(G5pNj3`6jvfjr)fp9wo=xHz
zE`lR0svuDPhV$;(0u*oaM}bdVk9YF|yHFyYL&2*e#@Wi>>A<R-JwhF)<rXQd?72j-
zkTYO(2x{Mfxp(QMFg2DOjM~BAdcjoEici3m_4dMFTH0P!hf2q#GFUTlUq>2?EYM6y
ztwke24gsnYu&G6<XGAILv3BSRy^XXmD1?{NhXNPYJAxW^G#B$v=`%x8rktT3N7YeP
z5i3wL%IsiB*7?V53;v|5@7t0h*{EP>3P3f4a?T|v0~*PyYcgw1IHpLkD#@all$im;
zCyD{m`(55x&I2$Jyg7~QF;}bWDNA0(#|>oFrRbO9efBKeyRa;J?8El2EuLHmp1-5o
z0;0IYhj4R%HpRe;0wDa1y=&a1u4Hp~6uUH(E7Sg^0p(V*T?X4uv$aIjEeXz{6#K<Z
zPt+Q78jx|XYQE-ld5FgyNSmP2q<=rIn^C5UN+vIXUrC|97Vf;RvlV{pQIq?bmz4wS
z5u&jX0Pd!a$vc6VNbit4>e{oBj=DwjgntN98MQ6YempvGYuKsomUZ~~pJ6*avXH2n
zuLa}V7dwRGf4*q^<4huA<7DpSPR#Jn%_31|EUcIRXZdv^2KX%$-ox21pI)S%KEfmP
z`uI&^#_{2Ct8Ts9ZOAh*nzh}Yx&>v_4U~m0-zKi#aHg2Guu6fuN51|$)&LCyd;L<X
z<W?u0%)ewU%9HB&TuWZ|7T%e;^2=XA=B+X28cjNMhUBsLk(C3^(0jXVkna|+>Qv*d
zllD*bC`4j`H`n}H4w0MLlU8xPFXx*MP=VjGFh=<s5eIjPq=g*~2r%z|Bs~8YwHH>4
z*msXFvv=jMo6-L<m*79&jQ)vH{)^C2p=zy!to9|={6Q_Dn*XGDPDMHJbDE~1aZ#R{
zP!i>PKw(~kM_tRG#Om}FE5;k87f{akamX=!0O$Mq)H}iZIzq!#2b4Hv=FRTeIL~1g
z&tp6HVMg2Q>lU3axE+)Lk{CnmwgXcr-umo%f$j~O4cSbf7R7pxGPEm0W-qMiy~C6s
z0XViJ+J)syq-sXfRYf2qNR2#{b{ceVEm7I5j^bq8()RACxT=4x5}|XR{7ZufDknIJ
z6td5rNEwG;Gxk$B$h&p03B`}nC?8;i<Mrshyr}NBrT3H`EEw-rI1}ouWI>h~H-4E4
zxTuR~E82)ShoeU(TtJYRQhjV`P${KQp*4*52<#bK%3i;tnu@b23k-}jEi;<j?CzK9
zLzd@;QkGY7oOe13*->{TSwW}I+gq&gbIi&?TNHdB!hnKJGC+qY>rx~i1Uc1P**5*N
ztu_ri1pomQ2<<5>=GK#0QVXMOrwd1w^e6&RQ#~*_I`)>rh)!x>#p5i-?66YJ9b>7;
z&1@JF__mf=p~{A<Yoms1ZbLOoS2vt{nCxgP$t*ht84O77WDcRMYcppTN4MnTNyVka
z(6agA^mRE_>7IP$#cm|DWEl|5G+Vl4()>f#6z{dPjb~_l!8nyln98+JP(=t6eP?%$
zJ+P-OkD%i`Hy0?^Ws@1MskhxBm}#P^;NG>fz|Dzw`~EAg!oX|%Bdr3RDvQd_GS~v6
zc|+fX<O)}<4@l!t$#J&Fv9Xiqw4mOxN3o^iEhEGMBL~(>h9ywgLk@0pIY+xg>4EB^
zzExC_5$X8UKgfWyw626rW;Z)%-cB{$l&(s+%Ss%Erj1Q<hvbP39nLzcj>O(5<ZkWu
zvJmT=ZwtgsvZBr>q;EfLv0-9Y@+w*v-|qEIGbbpVvehZHDrvM^Iker3M^kk>?nrFa
z%o!VLQjzqFin!RcDHO<3lKwPXlaW)Xq!G!VJ8wWju1+$xPTN8^Iwqjg*s_YU)m(w7
z=oA^FY|Ups*)TpZDV#JoE-{ZkWNJ%g!96|du|sKg;lJAjJiX5Lb)~c8!~5;9g9LR3
zC<QrSiE{yQyxS4{5F(Vfg}xYhbcR(V>_W6bb?ccz_$|>bt`c10N-Md}A!pk#{fo`#
z{CrD&2Mf+lhBy-I-R>72=_kYb-{>>oWiV*N1=6qyZEYoJ<!7C|X4qYxM7ea$IAj4!
zGr8}BbwYI0EvCv1w|J$XbJ~EIZt8%RCYs-Os^82xIn37!;jkEZ+u1aq-zCmvXZ>An
zVOr4^4K`2%_za!wKsP|J#eYcaE%PgGL&C;LK)pe{T6i5fd4UGs9aJC82EAg6`FPw1
zM=^Jr+AYLpiLfm}uG7$82bZrB!2-GVt}&F{dYiM;hU?}rOgDnAQ%5SQ6NP&Chv<-P
z(xBM{=Ij6ZwiH(F?Aa1ND}{6zngh`~tL+AQlhG<cd=d9ZmpKl244bRyCDVx6L_PZ6
zXad-sr`K@3Ruj8f249?g{R2PnfbXII6+F$RPX?HNfu{{$Vsrlgd0GEAsOaD8x}dYU
z)&C}{6((4JA*VlwKG%|TJ@NcisCaY9PZSc6$wg@v3!Z)&5tzwD(lwp0RB~LTb<hOa
z7Sh3nlKOn(OR#_N(3c>XdC(mnpO}1eaRJnJ`S^Z9>7!bb);7nBKs2JkXrk1sGutf*
zV?DxBW7=WNp7x@bj>puapTPc&%-Ls@Rtdbcxp%>;^cup(b9Y$C8R_6m7W^EVOgIUN
z=_$GUv*bV$a6kH})3S3-?__S!fi>LN*VevbpBc4GGP4)vDX?Z9>yohN^E8t<=}D3B
zx~?X8J~32{6!5H3GO5)>1Cnj9R_U|bwtn5b<zgWT-KC?65uKgHz{M6(RClzIjabwp
z#A(^Ou)ND<Y}~-t`AvwWz0+0Ev#F(khH%eR+eQv*YGrT3-jb!Y-$w_***3}_^G3|G
zZi(k;YISD%e#Z#ja};cRQF!gJlq?i+l}KQa`$AYK9TwqCf#1W$LO_mn16^>d@2-0_
zj&{*MR0(a8$ksqK{8f|MD9K>|vuUngC1@x=JH_A`KmOG{(TuoGvZ;m7SM^c<!Au+$
z1=Hh!od$!%wWT0CW|urQC4Yzl=Do1T$yHqQ?l8u*fb1*JSZ_pJpAjC!`J7pz4F5O9
zo;Xv+Knwdbg$S8AcHn@mI7%l@QF^RrVq6#^;TA$T;%eSZh)bs(H#G;Cr+Bp(S+}X-
z@&M4_FTpahf1=!PoiKxB`@lu!vx#3;g#ILu`l$Za8Q<E%Gz4|H+C|P=WEZuSr{QXC
z40$VCLX=UAKKNgR{LcHHv|PII=^$SW2HS}3sN$OJgPfwuVMXMW>eQT|=2HJlOYTgp
zi_!ZUOB}!6<Nu46{Lj}{(7^HE*{g{PGE#l~KfNuDlq~D-Uy<*&1$df6e)#VSDE#b#
zPuwg%POI#fS+RoopqBH8^!?2znIQvBm<4ORxt7XiOPW4a^@#8dQoSnQfBz>HfvQkB
zlrmk!+=a-mRz}g4+k_kK{fs8cnN-A-4);8QkpyW%tx1b`92jZ#lY*0|EL5tt%s651
z+Oq+jC2JS6gh<e0C3h+Bt%cc80=8;u-ZvedXRdwQg9M&ZvQ;xzO8J#da~QBgjW7G|
z)snUzN3j*iu2em)a0=`Ag%55?h{|I3x`6a?(zq9t$Aq(Eg5VUKLJh*Uh*=HB=lt5*
z_H?FERJqEgTEEDJ!Ux&N?Afd>CP7dczxOX5HM4K$ZRc%)A(`EsZF)r<L^=i#9~SH_
zsHBGvnW#M44ha#gC?(P$4fE|Hf@z#Me=6d#Q=lC!P9ti8u*HufN`a!A8!ha`5HVRD
zD0W41YtCKb{U6B2|DfKK>&GYdd_6DMFRtwW#WVZYIQj2%8<n+xq$ThO39^QkX$y;p
z>>&}O8z6%~Lnu^+5o+pR^GXC&t1ip8VM~5UmWmJD^dMa2Q@6Q-QV=5$%&XpKq#iTz
zY<M$1-##wce$(&@$wN3Me~As<#xo^k&NN6cNibm^GfSGm$!{UgMUT*Bw9c9UNmsLS
zz(Ql8Hg}`SvDHkfRU4bNPslgW;Me+_wx3sPZo5U@HoEC-Jx2+3XX_n5G)pd3X8dHy
z;WwRUo_Z0-|0`=5`A4N{m}CxMtbX3*p1$g=icwEhx)R0MU2c_h3OiijvD*o(RtvSw
zc$i3oU4GEdUApL(jz(od1Yu;5rPSDpT4PPVvHgaF%;O(Ix`8UWa2AKLYn_HS_q-PD
z4B?Tagf{!XilEoRWtufWbaUj5a&`Pi^4m_wp;F7FNV%JkcEY`eq^+c2<zJcAYE-_X
zT3n^VR^Javk6rmE+*nRuvP|O<Q%;Xy{f=x3EznU=Bb;Z~GY&Y9+(Hmpqn*n2<%cNK
zSy)e(r?25Jow>J<!Lp81gr&tYWI!1F#v5!443zXcWzys{Vxz*IW9EI~YjIJ8^mBpm
zie_yDf8=Wjl0w#$!+e}106c2SXo-RT4jJm4GH~x+`Pqt8f1B3AOl24(t5o<pTnp%I
z7;@<e;y^06A2tO9@TW7&^G_4_(SwFX&B0;=Fouu?TQC0@aNm20%SVFBnhka;dGRwk
z2l2xR;I&!)xGq`vdBC73#GLvPs>O0N!QVGMEGwTQQGEg-?j$5m7=)5FH`H_a`W>qW
zGtzLp?UDWYd5NC(hCSc|xcd~-WTril>zX>?6R16O{y7prGZIfjxMW|<<Iqmh^7WZV
z($a*>SYkT@;U3uzdX_1@Kp?ACE9Mm)-Wyy(VYKNJqRF)H?6YcwHP0SX0@6Gd)nV{b
ze3m~bBJNdnO^WdW`Jb1pZ0+;1hObIC_{ERl{hxQ^e^oN#e}4VvpCdEw-@>DUw)_eU
zot+zg`A|^`68J$e4AS)mmW0+I8OdDAt=V%Kr&^m96Ms;Ni;Fj3LH^^2*~-#?zIjA?
zJGoMK$iZ|w`SIrb0otv^d|I+A3x~S?wL6S?(;<|qSMKKWn}}hbyMUXfpW^!I)16cK
zl@vy<;^Vp`zDsWWVTx2;%`XsAeSMT*v;)nwk9Hk2f7JQ)rXNx331yPH`&OoEVI(1R
z0+)buM3jWbAvvr5;GQ%YH_}N#<@f<-VnKqS2QzQD-&vwwEUI7AvdNi46bF^6-k7;S
zdJq>QHORG6ER=|B+vbjA|CUPqxC?>L)t+kU>k*M5yGMrg!m`TAx+j_xdN^>&H76wt
zaA}e1$c(a*>sZ>1JKCkz>g4R_wjjXUu@6C|lWQ+Gl-VBmY$d9GpCaFi%WPgDzpXu>
zjVQk(qYdioqK<%ufEartQ`yPp!1I!?#>SDz3Y@aZ5Xi8j50ENPw08g{uuMVUW~{s!
zUkVsR@d`@?g0yQu(<SuQiMX|)=u!s;5Wbb8bViX>QuaEEJefdw8EHGl4^`MZVIxjC
zS6&zCLLK}^O~yaUWX4x=O66CX9Q-PieE;(o;$LNw`2Te}14yk|Mhj6|h<JfitfK9Q
z{q`S7`yE}A^V@uP%(|`KGRm^9N#t2I%}kv68uTeQqRw+jsDpLpVd89kebV-`jj8S5
zhO=)l)oFi-*O_|$aO|&)q9mlM9dv$&A;qXR>yG=5@I$?AJy!Zmyk`T#FZ2eoRn9uN
z6>cpW6{=sYWy~bBTbxym;HQr50;b<Jqw9((plY<AOVc?;(q^GVDc8U3@)t&;%mpI-
zBIo*FCO@i92OZKy+!J?LUz9PLnQHU&#k6so8*gS7%D;0K%g#8~6eXwUj0Uz$n!X2;
zv<QsfVG2|6TdZvAAZ%N}HjVpE9EEm(g#20-nZ+scg?KiUx>Yu_JWg?xcxUr5Dnrpx
ze~+{%V~Xm={Hmk&qB;cDgi0A#Lwt0umH!uK?-*rimn{od+O}=mwrv}gHY;u0wlgbj
z+qSdPw)4wA@44sn+v6MKcHbX+kNtB$h_zzHj5!0Qtrd7~lZD+JZ_%L4^VIUWv@Z@3
zFoCOMS8)|#1b)>IzyCrt<+=4z5wNa2eBb6b5I;)flI6r5W00W0)A&xGL#K`80g~Zl
zma5nSx}<9J0kN|zi8?)GGKi7gkiWzq8HzjkG?P?P`mgP9<Z}eSB){g{tB6<kpRbU_
zMW<BV+f|PWHAZTfzF@mBjtxMCQ)r9h4fOnk6Fc?hd-Gw7N{s?^e!QsQ7wo@Q*%7gg
zUW7uu1jqm>8tiejgX_&{d;q0;Kwu>M2k~DZrlNPu56rrI#Xs-t+sgW57|IF}EPOP5
z<+lol5L*_oP+@wP;G4F<SW$XtpMnPkYtg_xSHWCJe073;KFjqs$@cGIvN}Mjimr&_
ztvIKOE==4|l#jsN+gJ)<+}k`XNeiYo+ofIbjz@x~+xBPuXaZO0%zXihd~$aH4@1h<
z+ZZd`OXsCZ@})iy%(_7t*&f}@idz;Wq_4Qp`8I3=dq^a@d{1vtOp|>7r%Lxf+LJtD
zIKtw-AyEDO;s5uc;OJ!FWJ0I(kMF++EvZCv!G7ydLOcX`xq_L1n8msKKg7%U#~^|y
zYbB6PW=%(T$=QE|2pUUibGO(1nSOczwHqksSJ0OjsSN&2O6~F%Ut7H~kHyp6iP)qP
zo2zzJ+@$JrY>e19>OOu8KTpnf!Q3w%yQNWWcC=%Hes$^|sq?9YZ?y;d#Hil2!O22E
z29GGLz?s93iS#=S;ui1kTY(L5;z-Ch@+ZHwrvIRk=&#HD?;%6~KUZC({1;(LSnvA{
z>`$uQ0pL$6T)#!<s}R^1jxvK-B)LtRF>1=NDsv%A?$e`pGYUqh;ulZJ*Aw&mTtl@M
z880#9V(a9D*SomgtjqiB7Ny6Tdm(P9A^I*=!B8~}3X}F!oYE1|9S3BB!ck$EMEf$t
zFCJ8hjHhZhJh%e7F1No{{lRPf0LO+UXw|7nLS6ed*(}3(uK2zci#gC1G+xTUhWusW
zsVS@N8Vy}BBKmBEQHWb_#aNu$)47*k*;%`8%X&HRD|1=X7^BPs)z;c!Tj6XZHCBd9
zU;UE}k}AKqo#urrp^(P72p6%89^EF@d+}~vn=w+Q`I(g<jBQT)%OqrP4|!OA3N2bK
zRDe;vX4#F5Gox$Q*;}fT(~{vA@=<)gu|6;vvo=#{1sn9n01Z&HeTV@&@kWyDxYy8T
z+cK*i8WQFq47L7j4^JY?2HW)yDbV0J19eUp6dzM-`(@Ugg{DA}q_nTE-}DF3v|RnZ
zr^!+b)gljRxu;)^0tJ4tKbcJ~Y})rE4WN7m>u@9j5=PV(W?<s_iMG#Nu$O-!gto1@
z6|pj>?2rc&c)L}z&}J++>qh)AS67S*Rbl;NCq!(s5zoO76Fi(jRnR{^v*Uv&@L>Hi
zlh5;~XtT0_=X-TpBx-B7e=Uhl;Fh3DU?$4STnBg)=VLIIC>GpjcOZ`P28%R|WR5~4
z&EFyQ8x)^DCsd_*e1NAL@)SpYiuGy@P=;HhecgJT>4y6F0C0(}<zGNd73J%MvzUmr
zW}Qe{AC7P!p$}7Hmsy2m52#mIaySMuUL}L>=-<JG9%FsR0BZQOizSsg@FisS@Ma|6
z-S}K(3S`9Kl==7<2iHsr1Zu-Kq=CMv59hxR?SB>_{zqKqvH@g3{_c^bgHn71M_y3(
zvVr##Mko@Gd)@Uu$JAi`4Wn2s+6&-NN)A^fk-_G{glWNi_u$<Lpl(o^$07>(;K@xk
zu63l5_aa_>O^U3>dnDEt+jmE*j#m%0NtMFegxT1~Sv)lpW;I>xibfz6<aI7hWht2-
z3zs>O)3z#T@Egi+KB6R(V|>lK*9|hueI0KVHR#V*L<R*i?zJZnzyQn*Ip;uv9f_Wl
z=q8ZN!@nGie2slhe|_UV^IMQ%|9`p{|L0`%UqZ~UCK^|39_zBu&~R`ODTHW1Lcm+a
znE~}Mgn*pp3(X~LHHS{b54HG}*ymonofyU`?HDv6zC?!<Zm#w>Zl-pogmyk3pAS&_
z7(+w~X_C01dSDwPPJwxgCexL4gK^Vw)1S#z8l-6Ghv<hGsM6eQH+=(A0B2Qal(yq>
zQNfm}>ZtS)G8f88WcrnsYHvRQXEzSA`K>B+RRGK4T^42Uaw|LJexJ`s7Ax0?o5^{k
zAMF}WY%jz31u1S;tlrA2J|=b6#DG)IrO0-gs#!F6%e6((jn(scuBp}N=)G;L0WUf7
z9RJ=OReNqQeYwZ1Shg2=s-pM`xHnf>-gNbJD*4<nKB?f2cbQ*~{?Nrmq!OsSxQ*t{
zt$@qC<eIBmZSN}C#8jZp2OhPkSHDqM94X_!(_Fp~(E^W0eqkM#hgUzJ4C8lb-nyqe
zjXKOPp*x?lgQVW*1qv2`qEc_X4mXH0z`;VrHCX5~iqt?i8!?pDzdR*lsH3P|D5|ER
z7z-Y2k~zIsGRjM#2B&_~<$=5m(uqA;U=d|GFAwzD;#%7Efk|syhz_DU@-L6gw*}%O
zCDdjB-+<}j^w-U$^YI^Hz(9aTAhiHqq!aD)G}f?247TUnk>3;o17kM21m+r%`w(sj
zLXYvLLFZ1V8E}_wh~<W(hYqmI1T-Ff0S3Je)aEVHpao}Nalr&NJ|hstpUc=#HZXtd
zTeW>>ceDnM5K5VrF|-_lHLT#eaTB|lyc}4K-qYF)6*$QTA6y@ZX5`{?2DIN$;8`=x
z{4<ri5e@tXYQm0&Y{e*l7zbgAAi{?<Z~t0XSc(b(*%n%){{Am}*={eqX8iZNT>Jg`
zPx1YK%$a|^)hZD@uzmE%0XV-f^#!4^1K4_C;47h_Cu(+Qi^CC-1y$^sL1BCH;3=?@
z-!*=pKOMaQ)($Bb%*<dS1+6k{6vTBp=hw1}WDwf~$jue0^wUsT308*ISXt(+HDLrJ
zwad?PkQI%5y2Oe5=moidkq&p|3$2EZF5arG^0(8{@o>HE%t_?s(vRTgr}U9K`re@Z
zeb-hi@**VvKD?!VTk`n-ee??dV?xn?_X7Vt_eE7pX`UbXGg-@A6IcOZ5Xv8oTs$#D
zvTm%($gG^cZ{fxsA`e(-PNEs)moJ}?-UC98zSGb*kR7-)5(S~)EQCk<b!8vDwmG&h
zR}Yr2uJ`~V4Fp7>V^AW?&3jVA?WIdY4Hac`$lb~XdzmcK;tK-(EkLF*G>c`{ktKqM
z{CcT{<}>R}=^M<X*WHvhG+8!Kx-8?0nD&o1#vGOt?|#=wSfzplpG58-GG`yX2IpC)
zl2|&?w){D0U^!*kb!e}(X<6f*R2@q|UJly7Bt>6y2zFx`9@Gk`!JzGb<d^3@snds2
zIwxh!=oo0sr>D7KnI;1b5X44MB8H(QVL|oJa$3Og+?<d&DJL>GE>E^$^C90BewefH
zv9#wZp@CX%a12XGCI%2qp3IySQ4oDQxf+BV!ID+1sD6faovI6}BSJi}a|*wFW0DTu
zHwi_h)}_Sz1)6O7jL903YXj07B=8*%e85SfbclEhjCSk1uy4^J_V7H#5|pS<X7`#s
zjDcJ@O+CL_OvH;DoV|O85%@tTSrESw@(NgqU14W6*+z3(9$~2%Oh?fFVJjjpp`zmw
zUTsq-?LO*~b32O>=ZP+aMN2;cY)s#Ln=F>lbSDz$uu;|jJAq#UgwD_{EDfEuKu^;Y
z!}KgjmpUA>DJM!eJz|$v^uRpZfbzX0IdA{?oVMBqlM^Q-sQ!7yG+_Mr>b9e9Dk8ab
z&FU2-FCv711Kw>}uDe_E$U!Fow2Y~h9HP_>w<viK=anl`fEO~g64r9vqF7<N)HeDm
zz`H;Ih_lrCCeph_+<HRKS5`ZpS5U+*NLAv9gn1TR-Y0k0+shm;%9HsgcQE}afzwMx
zY4>SRZa&uX@y7#XTf_a#aWGo1Ow1ouj)9{<@(-e$Y#?TwT=yE%iB%(FgbmReF~q#g
zx8u&hQ#zp{iYU?30^J^-{x|b7kZ}+F&d=YF>J8AOVy^Zv+=R+4aI&YlZk!EEr?C}t
z20sG)oI*XRh5qb_jS?<54DnzPHpWF5wJ(MnaNi5(o)w+vR7NRQi9D;L6vg0+F&eo`
zGuwU?{EHymO_VuU{LPyW{_`5`-#oCYTCU$sB%h=*p6U1exf?g2>QWwlKn@5JG=?+{
z@{mR9zyMj9(sLGa(_v&}0KdY!d(3cSRdmMxB*5{$@p(N7yqm^10iQA=tE!qlp0C!^
zZJ&2ezu!zxcLTBqX`*s5B<<2?iSSjUpO#~oVr!4*gQ>mJN1S=L__gwG40K@Fa6Fg8
z(V}8+u-+|;UPlMC!*(HvzbUArqGyKfg_Bjx=_zmJQ;R4pGI4DIgXxqFGtMfg!;AAM
z1D328ItK*MAz=uk)C{aj8DcDrQ9jxXb1Ebl<R=VsXDk_TAhN${mKb@&mWqYd=~O^Q
zI(xJwj|jWpnRXzE!RRP5IJYdStC&<)+W0xX<W+ejWO*G_l=~GO?qu^n^%swMzoe0@
z#_ICS`ms$CF^28+&G_@1>Z%E*Ef`74Sdl*X)XB(<92DOcv?!xDV|u=un~0DB<{UPo
z1}n_bL?}q(@ECC}jJEU?Eh{axZKLhAl2)iXdJs}FENw)>E?RjPmF{W%MvtB6LwaFU
zkF8@pLuJaMK27f_la0b+PM<}lMkG~;s&nB3er&?>)Yf5gE7ozUY=t&fH&`3stVGuR
z6RBiyje31DlPaL_hl^^8jRM-Jgs*!Pr8pUD0zMd?&-6w*gusz3wlRHA=E5QcuU2uO
zq!y#w$f<dj$tUgToMx5RIp<=s7f7jEOO$~NZ_!9kXYTOO5v$d4V+5+hv3}~Oroy~|
z&@wZO9&|r>T-EJoG=ximE_4H(oqNK1tzCv@q~J8$_3`h=3j^EqAk>ti8rmf(&7dw!
zb4p5fa>#pG*8E%nI*@8B1-f0zQ(p|$+Tw~GL--Qy^Ilz{7g`sC<_I8ma{3twN(6f}
z68QJu5s;{1olsFEXyf3(F4hj6w5k(J(J8a82!wVBepPP=Khaz=`pyPP{?yCiyvec9
z6i&Q698LsUI;OaPTJAevclI1;wW@*Ldz@P96!EKV8Qaz{;!jfrcPD(K3VejJA;{ds
z!<mZSrJMEU;VRCzp8RM1JE%I6>*_C|_LWZ3FTlg%ZdGLml{&Edc=cF;ER_WnrtiUm
zToi&x1J4-%)ONct1~PbuT4gzMWz0U65K?uL=N)iw7ZZ%T=Y^|C*R;S}_1b<<qc1-~
zV%K(d=*y6!?T&<?JBZ&DxI*^vqZK*jqNiIU9JjBJnL>BtO2m0~e?tjjH?LQD9uIjN
z-~8y;#<2`H0xs|^id^qg9Jb3m<(|9bJObvBIg-`v$8#J;@0w@K#QGIXYsALG{ffLP
zS6RxiC1=lXPfgUnwjDi)hCNcn6<ZLuxuiWsn7kJwQ-ZRsmw0)|Z8q`lpUpMBqmA`~
zvrklUjG`_gs{gn``5TZ=KU6rU$JTDTL=3J_t!|RK35(LKET?vV_{axSY+jB!b7+?6
z_)p8gBI7=6Z9vo^@Y@7(c}qBEPEVM#S1KZzTVu&h7>u<IPasDGcDH3}!|2R_9o<p!
zduwusKmr~0+*_Q-RHir=`;{+@*F>*b&-TLHfJdf*UhTqB%Ls?msnk@;3*Y#cz-7Bv
zUe3wr4wP3!STD4-ls4wX`@gg_W4&8{lztPBvHy%}{jHn%zo}NV3@m^Ea^Ts^&wf$r
zOur~yJ0OC(csqZy!!!$v&4eXk?8!a=Y)NLWh}UvO36cj7zU&{S$5c)zj6t{rjw$oi
zO{CcnlQ+gP#~1G;2frCr!TYQ12n=?0616gMU&aDk;ffB^?<Ac}sv$VUnz|N59B_cz
z?*#lm*AYRt8=KB0J6G{Y<Q9+BFltho$$RTqT(`7N;K8E`O<$#G1fec$1<C3F9{xoo
zN&;!79s4F=)!*HPJpWe$_P?Ej{}R6U*Dd%TlMZ6Nlr}t37nV+7Q^pUQ2y1T%$q4*(
zO!%<5{CT*PbCOCO7p8Ws@DHkw1s{GhPJ?o$CEKhM+fyfxOPkQxLYzj%{_jaEp1Rq0
z8;)0p)7?4Uz;-AxOz6ETAUWmF)B(V}YIZaML@gY?NaVO8-!}c!QT-o!<<H;U@eVCT
zhO$$4kTz!Yksur!3U+D&X#_gIb+|GO(3vRZURc>O@-9+QxWFSe&)zA8ET`&$`|H9l
z!ix@<+OF|#okN&c!`1euD4(obZK#xXi*e{TO5J{K9>D0XBOexb$_Pb;Y}}j{i`2Wj
zMu$-4AHFPoJe1_4CU52nBpjh(1P9F6F7<(Eu`eNRBO=?hV4?TF0IBLQjO;ELSWRQT
zJrA`LChax$Z?<W`c&}HXnUTK+;MNB`*p1&|+H3e-(=b?jAOL#e`K{_CMRLeDp3%Z(
z@b<2p6nD`@KfwxSn>Eq)o&LN$JYh#~vB&p0SMwUT0y9Lt>zplRsU%lv+Rknx_)r}H
z?f)7b&lUC?yMvC@^sdFcU3-e?nC1Pb5BUjad{ggtkjz;C2lBGjWrW-Rz(!B=kUl~>
zqTE@mhXmCW=y;=7(~ELaxl1g|p#2M{^qf6^Dodk#YXi~7&9j6QGw-4m8bl*}YexW|
zdeh}$X1+|7$53edx|8Mu4ENY(-d@yXr3RziR*K1$=2DBnTgPq~Ek`5}@e4<Ss=fO9
z2pfW8#tsezCSrs_ea;RKC3t=l$EU)AEBTtF%Op6##5IGfX|cN3g{H26Z}$2Jl|$*A
zf>+5d0WJf&?dyuU`XC!-*C1wpqpMdQ`emosVX17v)9JWpdfuJZitS-xTne45FbI#W
zP>5&LrK<iHIU&=n^qr_sixaTYhu~A`T7#0KF8$b7V^y$jr6D?l2h1p;M5>9%pDsB5
zbQd$6knA4?M}`K8UFJlwJ`#n-`}0Sa%vHcu3CvVMVWT{?dV~Q?%wtysb`TcF>{9W#
z8N1k1Vg|_C{IJyVwDF1fTEYhVJe~Dyl5OL}Wb~qK_Fnu2(jS?=h32~1&p-imvFhWb
zPqAxRUO_}Og=P+X2JC9jC}Xzr7{7*6nce;T8hMO=i)qa`&_J7ph@0=CbA0mNgkx*6
zcdeK0V&{RGNGnVcpa;^%#K`yHNQ<zb?7e5DC*KfU)QdBYunOR^javHe$;7LQI2_Yu
z8l;SvLaMDHC!3R0sFL`i{%%*OV)Wq*XTkXso~FRns+_7&othSN@|7T-J`@$|E!r(4
zYKOD-FZ@usRPwXtTVvt>uBu4?Ul$2SBXbk$zk)iI{_CfTlZBNdv5>8m)pt;*g{{rs
zN-T{@)^hXm$irXF&|__~=yKt5&%P@td61!FzQAax&=Fny49}O<RRx7VCDsvSymrOr
zghfdHtH^TX;*6z)q(Lup)!GTy{l@o{?fLpK{9TqlRFIGDRmOz08Jj`A*OwH)Z4;!K
z$JpqLjMM=6pf}z~whs~TK3S=5G`U=3!DWO_etHIAJB{1NG$D(Ggg&7uIlU>xU4h2J
z*&H|FE5-E6nfEeU=E&_kKydviH;7~DOZ{nu?5=ECrKC<`wf%daW0q1VCVAUb9K|~V
z6`h<b`=dp-Vx?C}*S^f8^gLr@^U#l|D`7~v;VgDdd)8HFJraA{Q{7d29Cjp6EZYoB
zBqbyd2XgSMvX<Zy;{{dr6O?rDW8=3W9r~8}aP7wJ#|LPqsTg<Udrz_Ku(>OzkB20q
zb;uCC>-Q@%_DVIsg|;F;#NDq|)n33YQsYpkwxCqw-tDDzE{h^$D`JOn*oZ`h++P1@
z8EBrCgK$NZb6`50E7uU71BLQ8bbpmp9V+m9V+#f&s`AEQ{`V1gq@`q!7ndHLY=JOY
z!;m~{iqsX#(wj}7m9Js)9mVFgG5u<A!*qjz+KDKbQE)Mu6E@S}>TBz}U)uT54Yd$k
zw9L;y`DdY6il@rHgh|X}SFDe~O`fyq;yY`#;DM#@tdxTABlIFMG`>i(e}ramX_JJ6
zb|l6kT(8q3Lj145>@1<e(pTPWM(IED+}3GhB!zYU2$geGGdIQ_Q-cvv``nZ8J1D;q
zR~vXgFgkN)o~j|vBFL%+ed~sDf$=TT?BZVO6nI1)9iX$s7plo-Dn9|{?$JgNv(IR1
z5nZbj_KR5Enjoh4Ag!Sbj=pWkoM;I!@-Z{rM71H}u9BNpcPcXGG=7Mr`!{TMtXmWi
zdnN3m)y5-JMRR=rvU@6vjFlpN@1A_$z!3URfFWvOVrBdnI*7jmZB5MnC%LvtdEI87
zAD(BOy>7ovTaZGA{0TMQ0pdwZFaur_T+*UUs3^3=+ENj1m?B!qZ{X<~S_BXn@Mc1c
zz?2oB(bvtqwkVBh>n4+_Uf1XC^9IUSss%43xPr8Lw=J3hhfU^OC|ZU$nyF=QTq&{`
z0|gxgW7AZLk=`nP8Vj8zmN|CLUJSphRy~_aZCr{8P*fsVwg9CAzILqZ3YbK$X<b%J
zzikc@Q=^sktU}ghaT8xDCGTD{>KyXLDkGcC<PjGo)fvZjQ1#|{+JnQ-+}^%U9Vv7I
z1r6BH#rZ~|j50Td4uvYl)XJ}Kzl*~VYwoOeWBo+>Yq3G`=ODt0j=lGhKuPO4uuALs
zERxRb1Rp}}d=g75L4Ec;mIfEJc6Z$Py-a3l!|9w>yx+JNkK$yCOvv&leqq;L^;NJE
zAd%0k(iHXV`fj<DW7xV)e~JNFu4EiD9lD&qYW=I>=Bxmo?~wS8iH8}__4-$y4~;UV
zgLf!A`fni(A5OAm&pW1D=Nh)65UpV5eb@9CUKBZmEQ@ANfz34Mw4cw-3yu?JyZAY)
z3{dqg75mveRbA#Z*g~s)5}Cj0!|$wM)x{Yk86+7c88L?d{B+hmQCsTvPv#7&J!x{V
zWXzf-9Dcb^mW41!j&o7a*qNLySN!5>?Z&COh&q5oJ1%L8`HYzrjMoAgiFx$;eWY~$
zROkl9j%1JBX8?|!cg*hIH6at~ivAov@-rMbIzFnbk!6gp_?G>P5p+R`DNp7X|2-A+
z6y8d*kALOjWFI@}0_O#?MztLPLK}eh?fr`9z}EY|Gmb@abmUkXF+quaH5u_SMA}NB
zWKFJZpIc{`8(KOrSo;hi`v4|U_yIi%NOl|yeJCp;ID<c|jB!AlS67f1phl*ehr4N<
zz<i)zL}C8;S3Kyn+5=L9rlHyak4yJpEaam(CB1SjTRET<NLVVQ^aqm&0;1_MGosCM
zKJh~Jpm^&C>fU4x4LX744n^?JEGw%;D6e%dCd+)*C`tH=UY@i(6HwtP=;M5G4uMZo
z?7!AEN#yOe8@}a2Q^J2o9Q~)w=$|n67hN4x|L>k%#soH|j#_C%VqhnJOd&=CWq7|p
zl0f`k;u&J!UJyM}y3zj84tA&r!$!+G)w5vdGefqXrUt@A{vwoS$;BmF8};S#%?j0u
zMO7-Eqb@eB)LP<(!?(lBrwzx?`Y!Iro6aYI)bN;1@?KN6L!$_=yV|4@SGzkLo38Q6
zr<T>B$*0yAzmRk;6e~5m6tI_-(>~ZspOMK;+|rie0M7fHx-4n8l#q11R*MT=IG6q5
zjvJ)!VJ`Y`a+hYm;jU-FTHVEa`)S`3M|t-8bkTkt_7%`_ANs*PwOqGA@wx7zL1tg)
z0(Lv)`Mv42&K&e1l0N-JOLsr9$$lRSAiGI~lDnIqZn>_6dee5B3flI#=GNUg6gj*i
z!R;*EF#)L|f6nr)P>ndzF*WY=)x?86i(89DjXG5TsexqITz)zQBr5P2m=PR*5MQA0
zA3sF{4;=)2X*6ZWobRjqqe))PW$*z3;98Ow!kONH+jeHHUR5;<NI<iJVjd<EX{VOt
zi^%Nhac>u?8#;<sjyWN&#(%cxzv+>?t}Y}aA7MdIPc<XpjW2n|qp6R;&>m~iJn}+p
z8Z+XKdfbVSMSF}xEKg4{v(c*{gD;r4IqTNmNNG2>t5jc2Z^ol}b@J)~HMDo=#%_9l
zJRVg#i#zwT&w#0T5PY_@=a@NFagH7YbObQkcwfT1Wga;-sW)oLAiWb+sjvd}<KRaQ
z^RcnM#lSIxVZ*%THc5WJl9k57hG@gm`3cO)8L-1RX$|q<LORaCT_`9~CEK9q+Jkt7
zsC2#HB>t+uD5`*?=Pbe*t9bZH0AMEim~EIv?;U2NZicRIBLb_TBQewXsBYVRYS8Za
z?@=MfBm-dzlN*+RqGVITi42p|AYqz_I?w0Pz)rY&k~;mG%coC-^19CRo~K~nDZc@9
zPr1#u+^wG(WiF*?G2;GKaEg`6*XjPtAgnkhH_;2;_4QBX{@V#l1rSweCcE+Wj)8w3
zk#Z8@zp!4aRtSK85z<COh&nldx#)V3UTFAriY;qL`~L`lk++HGBuGl}N<AJ7s7qFM
zN%bJkBbI0i6<-u4TzhScodO^fpvhUnnpYeyjci5XK5vB9L3X%|UO^y5IjW|4>(0<-
z6f|unOnS&<CxVJIQmkZ4t8==fdj)Pa)74>Miu1j@xXwyF5?-gM!NcEsZ$UVnAyQH1
zRLl}Nk>=y84$A-VAB=xo^q^NATxDIh(#z7ITFn5IWm>8d3yrX$S+gcFkqmzjX3iJe
zCm5sfC(TA>!i9)`vVWvLC!0bhrII&X#(UUh0aM0@{1it{8Cn+-mEURb%S=3DWx@t`
zh%$G=6maK|$BRCRlcg#BP8fye!CoNK2o(S1L)v%#IWdC?vAicRS9F@Y364?skbu}w
zuFPB@GN{)wJ6o0y5Jj2ZqTydesaYpI=z!aJvnH8Ywg>5@t8WOo|0*wjH7CdtSIy3)
z$2c3u0Gq4ew~s`UN>|F@)#b8@79Ap}(q%l$r8TH`!-k?K9O~ZAKZxa~G^o;|IjA*p
zwSChR?pmA37#8lcql0qxvY@3-NpDU6I5ur1Z;uM~g4=x!s|idAMr_hQsXP~$-IIJF
zl{npnOSD!n=)bAt<$UHbiuwSH2lXh&$g1bbeQ1R5b%4lAl-VYH`j{j^Y)(!Jm53W(
zDr1%yGdodOG#0XCsi3!zNeC~FG=fNAc~ZJLC!4!CF@xHF%X|0k7yeTn!)zbBoLc!E
zia>UkF<7nQnN1EvCBq$ac+XOD3;D7P=~uakve$?!+pZ`|^N#8(_UTp8fxW{hJrK>U
z)VE?uX=DaR@eDqJ?X)qz!-!C23eEapD(J)BD$^?l=qyGOPW~LzlV5<sPM$<(U2R)w
zTcxKv40(02*nhvMo>mo(MPIEo$_6n{2zAqF5>R6u{HT)fy)@+YFJdrwQme8;lbFDM
zWQ=IcZ)nepdLc*}51Kp#RJY%1YG$M?Ha#lI6FTAw3urLWkldlHO3qOy#?UV)7x7-4
zPXp2-79q3D-iI5gPgv0}pT=p@NR}KzPYMAa!p_ps1CXSGy&?yL8QEi^&bHw4!=frl
zNvM1(m5g3eG|qjNT=+c5gC_I*(Ogw(EAm{rgna!oyUIjd#p=!%GGBW>9shy8RuU_H
zL7Bcd+m`#w_+=BGE4CgZ+=vRhb={l>?P%@ZcbdOIwQ(vRN8ps}_(*u_+Fd!`o<lz&
za)o$&%$2ZNdjklJP1feN1!04Hqou`un<Q<&L~^(IaotXzD5|>0PCE*4?AVALE^=5V
zHB`$Uu&J<4(X^g$Cdekg?+VO($Ws=P!qzjsg=9F(A|>n51d0m)sKOdyP9pS^*u|89
zzz$bXgO=-6Z!6)!L2hY$>N5pz%!31(Y1~BaYdprZv$7!Nej)|0a7*B$uU|PMr?SWp
zEjUT-T7fSoAB9&<Vp&l#OT-d5n>$c3hww0Gh?zSe%`Mr>2v6hCF@E16f-1uZx6TKg
zLzB=tN9>%cl|5$&cI}SD`J0~+zXSroN=WcYzfiYY#ffvr_|Qdf!Zcd5-LAVBwzvx8
ziK@=jz!kXkwU=ykLxD(B5p@0Gy&Ki>J%S_ngu31rksg#v^-EERQX5IqBWcnjBJ<>d
z0p)=R$PN@pfz2ioLxnz16@h;X3l#$JHXT47C@cDdEEqY7iIq{eaMvBr6~q73E-$Ww
z;B0})j~>}1!=JZl)T9r*njgO&!eB@iJ|2OCu|pl4vc?7rFqM%iX!1&-+&kU3o+{=1
zXrbK0wxbgsc!ltFl|YOq9$T!TNGxzF?Z1%LkR*%2;rJ(X*Fn5xh+ey5N4lp`wYTjv
z^v^X~z=frw`Ov~`;Wfq2r;$atu$UNPv4SckiRfFqyo1o$ZGHD_JB1uE&&`mSUkIMM
zCc$8rJ?i6N5mq2A#A}kIpHtA=iO=(IVBc#I0z9QG%1bF~zXH{_*%LjC#1J_df{t0i
zmsWwiD9dOxF0qL#%*Pe+%O<VP;m<MQT%b_2I?@!-`?8Gtks(*5-VYH=*A1J*hPnn~
z55H~)vY|-_WG-<3n?Z}&rE#Z0^cE2YNMVTZ!4_sHjFgGG{m=|1l8}93HVy~lRt11g
z9|(!Pk?Zv7J;jUyOwN8bDE8U&o;vI+4IE&%a0Cl@L%`sOFbjZ|o?!B<YZD*@-b?&I
zsjzE^5PUC=-HePVJB9Gt3cg(mVRk$DzA*SVjxj!s7g-{{F20w9u=fR_>rQ)o{%}~p
z5hp^gQvnFiXFWvLU9R)O+S*J&H6@cl(}`6WJ;njKTETbWs~cwWqh4Z6<=Y)4=Ii-s
zt#ok!vU3R#*75i99puI9(A82TN2YiTE}(<ZaSp1qItd5#>fWRVQxSW#0kKl0>mcaQ
z8bY?l=kD~Sm84>Zuc*&(n(LA~y<~k)2f^4|zt$|djU~ipD0&s@#En<#H3^%N)eTMx
z4W<|l3fvOYTk_{GdY4k@^)W7cHp(^fftSvt%5Gli8_<@q-QKW)WeINfG%8xmdjm_!
zdLxd?(80phdHYE79=F-h(R_$sb;L6BWTX-4KM`#s)=#WmPP~PeKEQ*7Pn*_{nP)QQ
zZ31fiD~dX=Ypt!5vP$4e;MJ*`_DIL822nW=vt(DQcIGAVWRr}ppf8*H8Zs9+Ssi<f
zBB<w!j2tOD>Jf*Bv~1fen}^qJ)!ZsTbCqPnu@^&5lM*4D$edRq1>>7R!p(yyRdeMt
z{Saq^F$_r#BbXHErt<B)SyjUv^+Q#TQH~4)W@;50_Bjy-Ci8(@A#mD-$#7A1+_cdx
zAgpqttO;<38e5`gE@Ev?RG*acN8&W@Jisj<6RHs%Pixfdgmf)V%Fns>!)8b~%&Ep@
z<m!5>Xvw%-IGa??gP2^k27$<1WneGGB#8QtO14dP4Vc!Tk`ANA<c%jk<{C`Lv8<Fk
z<gyy&fV!EPe{R=R;dvQ203_gZ<Mj&zzYvPj4>4#~l8`UwDOZZ6wZ#731(ilaLuf!V
z%4-3R7${1Fq3d_B=a_=Wnx&}bZ*F0n3{oXy+OzZuyDiVX>LzJPTcSq81W4I)@(xr0
zo(6v{0BV*$ZG1nXXkReL<|y|#uqWAs?$sSTN$!lTpWtAtyA-cN0(ZP{4)brM4Jvtv
zPdrF=)QQlx$B}U7;rN0{+-KW$vNuWxU|vH!P@J^G>axP>kv_O6IquagiWenjmfj8!
zq~<wJ1oswseNww&;d1c-dcOdp&fOLcC%S_uTyr11Cy@6KTG)=}+c_(;@j~+HYnI&n
z0{Odr!rxE3Md;fE=Kd|t{XYwH{}A>6k|O>l%vCAK$bI`gy@7%xMPA^$qe*dlfS-J`
znQ@5`r9*M$FV}zjkyxiR1U)x%r@8%?4}RW~dDd;FCB&V{_NX&!l((Ck7vNZbSpXM{
zmDSn`A-v?ffQdn{Z50Ksx*Um;$2gPMrU_gfZc}*1q=bhq_ez9u((sF{%DCJ}uxdML
zbS9&mKbx+BPZc503nQ5F^p!ChrTHF=G##8HWrL*HW3)FR?0!->5<?`jY_Me3I{(A%
zE8_+tZ>N|dZ{F%-DRFUXFstM+jMc!(DB~<oLpb~N#3^Se<q6KrfnvN?zP3*2_+uJ2
z(DIbZ3qy9GkxNNe(ERK8A`PG3!1RrQYxkJ0Ufpx#=BR`WO!8?@{Xmi)(P(!Ez$r&(
z9XkTUJrX7QPcvvDdw_^~KPMB)A@vX?m1SCWw2Ej*D>kOxw_x@6hxLnwHSP5KK|Oto
zMI!%+SoF^-s*Hi%-=C7oUz68@QB!D>xI$eOVig0EBG~xB5g`W32MEF8(obB>!J6AA
zFH!&Weg#5+CJm?|`6S*<GzpHA<SnvtF?k+lUU4<2eLTFq!Tm5wU78L31a}%(pwA_!
zCW`!p6VNgrN&x+n-idjqI#>b52zt%4uCpTx4H^kzcoLm^da{!KH}?g*I}ufL;4TCk
zjVMe`Lt*DGx>WUqM2<`1CZPl=24-`M3cLj4q<wH4BzaIAOY1ZZnNgtyYX|?ddg}hK
z*+BRy840b|dFZ^A9!Md#DTxH7NeGJwv><KCmT_*AxI&uU)D6wuYUL{ugOzE?7y+ND
zJ2}WXNvITpHuLqqtooKCh_#HWrIO{YhK?;E;oCaiqGV{JIzLUqjce+Y7igL=nXbVo
zKm>G%_!(U_6XnDeMPn#2r;;gIZn~f(+B!L;kk9)9irX$HE4hB2v5W7qgKA@Ehr{Kt
z6{o_Pu}h+VM(Vr0pD9#5i9X?M?+caeWEkjFUb&)6VJj?!XZ@GR^V#u4kkT>M;C?cn
zwRvfwNcMk_<L=}Iiz!I^`bJTDD={l!;xbLV8!1EIu@GQ);r(dL#>e7B*>G~;7>2cj
zOOG+=NJce1X4UES7BDFDP_z1oM2QI-yoiju*Re1o{dtvN4%cgcONm+cp32Cqg6rA_
z>z;0tId%I(ub4IHL=Lu6>1Z%|i{$i{+tYu%JPNn8225TI?tyh7W-VigKGo74bN$#l
znHha@@`k6D{mq|~C6E`a_`**ibclq}GYCBX82kqil2d+$X>de~@EYWlkT?HobIkP`
z?kQ3Lb(d6XJ9u3U`xA^Nj_T8FEo)222rNbLcfb3I)+4A|EpDt6ik~DgOiTE9!>~Cm
zr+lJz!Qz9WGTZS<@1&wJlW`4v!!tez@-|1PINX5k=)9|mux6Nem2y7%hi%>yac!HC
zKB-@k9DvfQ8Sw>_mCZijq;<m_gWwuM`5gFAf80NnB>v%UpZKD|r~S68IAQ!dA+rCZ
z0{sQ1zXYJpZvjYK1?6*_+C5=ZN;8-?uok+g@LM0_*IFY+o@<z)u+{``Xac-tyo>Lk
zztc^1M_LPqgv<xeCk)ru2>Oj|?nfL5iM)#Z{mcfvhjkR7L{ELLGSVibi^Kh<y|z1C
zQyiyxj=H}%j=H#?UWsA=*mcXm_l&L7?2X`YH<ZIzH!Am%@!wZyIh(@`R@<r|a|SO|
zT7b9)_GM9A%UZa6J^f32r|`?|ZDS3eRCa%zE7oy^Vyxiy0PTI~|9}g7vV$Mi7Z#6%
zw1%rdhuVq5haH~!@d$j)k+(aGkK4a5XkHUeHZXED_?t}RjH2$`;>JxGoLec#%)ULG
zPB{-m4>{&&U?T<4P1owl|JX~T@+~~}(-K~<e;Mdm^v5>xBW%QQv$B_#xR=;S7rAD?
z;CVY`SIOQRWN}GUVi_4LdRc>omYIkexx6Y>>19JzLxI*5C{G;2yWJNuh-Qri@!n?L
zFQT%NR<B84-a;|P=(NyEhI!cx|1q2T>A5WFG5OkZ<2LW8tka0&0M2Ex*GU3i!?psl
z^DgnsD{;-!sLIoYEG5!M3oX&c`9DKv7Y!#fGmWpg-=4cQ4*tBsL81#AwmdEGI2$q(
zO>b)uB872=*T}>L+{Cr^`$B<|L)qUR@tAeR`I`;WW@-KbSdt2<-2#3!^U+v+;`5Ac
zjH9?j&c#~^JL2Y1I7EA#_GWxs>S^Drr}UbfpB^MQS_v@0h5f4xr|UVEUXI==%f8k~
zM5({VV3RG*#H!xr#c|zTSnGL$6Q!==4G@M1pR%1Mg|L#Tzi*Qu9yT+ze_extrDY(Z
ztq<KUc=ZM%6DUa0O4&J+HbavT%pIAd=R;vHTkkRDW%7Q#zdKBVocAw`ra8N6q~gSc
z9748o$7nO6+^)zR6C-^KA*ZnnP|&-nx4A>s_>pz`9Binf$Y@|kafQk^tb7G~OMrd{
zMXj_XbQhD^8!3P;wKiKCOAXO-ro58*zWTI548!U~L~#1`BFN~1zRL?4sLb_o8n^wh
zB0>}53xYT>U`mi5c2eWABoDwmuKit~-Sqf5kY34&aO_Q7h5DjG8Ulxr%IUo187OXZ
zJdl=T5;W7PVeHl>B1G(kem`}@GzVN{y@TupMCHKmA!W3ene+!z&q_CbK0zZNRh<)C
z*Jk}968ZV|;MARg7_jL=ioztzAhMUr8fSqT`UmoKYD7IcjX8fQHjOn+hLd{S#R;Q;
z@|U#Zl9>mhsdR6d$in=76VEXty(w8j6G~W^5}W5_sy6lXcYN${WVG}Kb7d;SvWoDL
zk0g*9>}nXn$<-;vsR`^k15$ClvyH4;M)1QkMk}X4ezdaWHl?7X^oMqTnR?MqZrF>;
zK$!5n9p;kxaUZ3rBFuL%Mw%?kJ7o9LHC9KZ{;qPLY;PuDHYuWLPWG|*gGqd3x)&*N
z&OyALG}=grXJqZStP<GSy?D)~UA_C!qC9xT@(!RzmFoRNk>OdJ$G_&L78J<mz%S%M
zcx+~0tA7YJKX&OFAn!)*xjwjirv_6Ofl~|iSmzNs&<F&T?$i#Oh~qdAhwEGw%)+r{
z;H%am2nxJJfAzq*NQ#y@>(emfB>%pQWnx7$w^u-PaGUuZ=W9maK<|<ps@^XcI3811
zL^QN;V4nr%YoI0E2jzw+$C$YzI)&wYm{-QHr?9bSsU?mco2W8`X5BJQ^ai+}fh$fz
zb9XB-E72xU2Tc>?F+6`4QM<|j94&l_7u8heX2;=mW<sx`*iD4&mTwD=ITd~88X@l$
z)7VXkY7+1u0h{qcyt%_VeSJepNX9<O05lCfl(E=pSQh2P@uY~77ldj%ent(2&$i4~
zrC}U$h)_k#ZT+MH!<3;;S(gjYbgNE7M1}Q;-m!R0d)36szOZYOaCcnRK6%<~CGF2C
zsH<Yy<mQ;7&H(Kew_($t<KiUVN&O%zcOhpoHS$tdaWACGGZa{1I6D!HiJ2`DlX8)?
zSA{9E8r-7?8&eMCK{c@<$u)c2u-v%uOpb)BorD_Ohd*O#trtM1kK<ywSTuBmR~Van
z;dbACW{1)$a0d#c9Rp()&^e^Z4S~c=L5y`txxBHu<rTDB(}EJ1CqOmzx|Gh=V|lx|
zj7SLVLAl9<yj&n&?C?Lw#-1SE1U%|tvE&}-VkSN;Z<Wq3pPG;DkcZ0#nCoZuoa<)S
zL{3;Ns&<EdLki?ko;#}x%#c^~URy}3cea=!gY?QA4B<WWlo><t$}@|M!IU65Ln2o6
z1(zbu6*Wsw)a6gPrb^~(<i&hj>StKJ0tMiWuUL5gnrC?Zy|}b#WWta7peR$&lbF3$
z6vM8^@Vuh(RA}QUAv}>n!}3RiqK{NT!TsDTZe?PtbzkwD4QnAbyeUSUIZ&%w151V;
zUVyN}h|KyFah63-*7py>nE@v3cx1bldu~W-oBgYAq5C%ZZp8~bB@-v?P*^Lch3BFj
zS(;(6tOoV(mOl6qF5qnYIP4kZ+5HMpM3)<uO;aYGuwHKnesY6cDw!pvGk*3H3bRbn
zE+}>0+)7Wvd)fxM%RF!$10Nk;>oSPr{cJ(e=!0=FY^<&F)hI2@W`5k76Gy(#NbRNo
zdkP%zG06C@>Dm*(zb0Rb*EZPwIlA0%bpeXi{54`EdDUhk$t}f;R*C1e=gdJQ>Z5n8
ziqlX@K=cik7r5D3U%}D8Z!{!44`R5ao8?elsBD2=c3R)Of^gpWs~5H>V7G7WlL2<8
z)E;4ES?qqsA5+>PzjISsmm|kWHUQqBY$fyPkgM41+Bk05D1+*d4eny)0s@vPhoZZ~
zkiywM)QIYqCTC-3Rkm=VQoP;{1m?tYCa49KqeT_7MHEh+R9DV;XUzdold?Wzzu`~9
z7`oV{qTjf5N1-bXqWEfT!7qL5r;65S$f;@xY&D=3w`%CUSZb=<eU%CVdA86|kS@u`
zW60{L6Q{SKTJoy>5d%^O3AFa0M&^q2!H51{b;K!jn!;V^-x(3S#fy4pWDU;E`leNK
ztZ-a%4orewmn&cQ!j*WGjkbfcEurvT8491cLmPv{$0RT4(Ydf%<ohKH+*SpNp}W-`
zM>^V$P4~G_S2W=UYM+K<IaFdD`>8`S*4|N!kgg*lCs3>8I3uDf>)b84V#vY!b&-OI
zhb?>J+`55!yovR7=ZcBEZ&P=Xh6XN9v%)jsDo6mFAq+ph=Wgl@LA!E94~NV|Q?Wl8
zXnK>3x~T}*!5(zt_PiO}H5myA*m>kJBhxPs@rwQyTODjRW>$(TKlhdFN4pwZ-@V-}
z0L%R}iGMW&L&@eZ0skf9m0|4M-3PLVm&c&ziCWi4`v>ypc)0(XqAO2%h7cM2ViecT
zQ`30*4YVoqQTXuC;SAynB9K(k#wP>mz#2qE%zhZ)vcs8Ne0vU|$8uiU8}$&(!K{nw
z3f5>*M(Tl$flYVP(l%-5PhxhMfmS*#7(@51vuFHSHDh9Jfp;Bgv;HqmQ~D!4iOa}+
za#HU<v1%l})vrDTGp~5?E4@`$g_Ewc742a?Ji<G=gm!!gD<Z%yNPb{pJwo@U&sCzn
zIjkDMHbu3$=7n`9u+QFH5=`B$X7fnq?YgXw`PMY!%3M0;c>Q671MKJPX-~y*?SL`x
zLp;W-1*LsP##_1*JxIm}2@`;Fc@T01Ak(PA!o<^a;Hz&8m`cMact@!}Jy9Z>_~@fG
zs5dh^NOQ2s#umVnqjn1C7M{J*82=2C6)rRuW{RigTLI<lUJ<5EbTD83>Zr(%cX~&t
zuBa(Y;Om8XNcsZ(-_z_r2>4Dn`j+swX)qk=-|4jeCj$OYWmw|>O_^>~d-44KyF5mt
zzvKjct&DnTkwe&{QvCZ+QL&Z<lk51KBC|V2Sc|9U>S+k2%2AVTP6VF{-y~h<_){7{
zFsv{!bEVn}-;ysq=iZ@SDDJmr@50|6m`ER=COeNh(przEHqxH%C#2+b!D@l%R`dtO
zzQcuhtMQ9>kX*2b$i71{_&CNVOIlwdVK{j!b{r{rZHFPfdSwsLS~<*yjo3?eq^$6o
z3y_acv}^arpTi-$VL-daHoJ!Ex%2WP(rTEVgTslSyGD2N!|?;8pl>OeL6M+oE5I3A
z`t%h=*a9d()l}~3pHTzIRPGsDhIZB|U(vY-ciO_~1F)cN%8$*iaX`0K7<zU*p}T4f
zV|(YJf939xo4pGqe-fGgsx%BuGedtD9%KOJGr490;X{*$1qS?8)2`Hm$(h9njNK*N
zC+@lDL26_SjQKHoUn(Vc635Wbg32WzBRqY2b5Kepxs*ikOWU>4^WI8pB2<he?5A6`
zXgbfq8T1>c5McSz?&Dk0&p7K*fNg;j<mMM38&XZ6O|W-e%!Sj3=TbHWP4eS2Zt*ql
zi%Wc-6MHZy+^9<@u>SkPMVx$wrr4VPil7Y~9p*ePbf<YDsgU*xda>pZlvo=h<}}`_
zos>XsQbhBdlzdznQl%hiCQ(jHD_4?`7_2xe>NtrwiDDgqFz@*mQxDRyX6B^msNvtt
zD@;h|%fhZSg&?mf=+36$LA$mY8Oxo;!xvYNmVh!q<7+*)nAHJJtjW^GnnoDj>^8z6
z!hBU{5IouYkQUWdTDs5`WEe$#S{E@Sky9JbwQ;ca*kgjh;E~|KmLJ0pNNrTrWTHBn
ztT<AUtV=Xh=4pHwv-><AoI`aJzDt3;HrMs*{t8_(b;8u^)btIZsN0*Ad2P6gaX1mP
zZJOj*eo30DG29DzC56ImC&=%jsI@W?QKa`)v@-s1%UhGrW$m#$h~-H_lwLO6nOCz#
z#lHu{sYIjB&k4AdwOpTIzcAB1;sSVE4$!4IPm3H2n$Rw<6TWN_>KAB|(IuvW_xyMb
zlie$Utqlfivl(nM)b+QsBJ!D$T5sIBb7I#x@v>rD<w8*g%4FhN+_lS)tUZS_;~(dI
zi%pDIr5(;S8B?Ostr@ylm#B}Ml8iX$hxAApz<xz+L!5W+yTKM-R|0XoZ}}|~?(riR
zz+w*z!M+0^_&wC8wU%62+Hz(|L!Cp<$|&Nr=2(_+G9^z87A*FQI@1~nSBQm%6Avrd
z?<+stN7SXe;`ir$9`j@G!)Eq!A-_xPWUr!AmoZ6@m>EY3FL_cL7P999`3&i~9qUn?
zXC0L9*us9(RlA<)mp|9^o&aE<qqPy8K6S2vo%o&tkDrs-L%N}z@SdCJ&h^Xvx^SMI
z=MTS^hjsBh4IcZ>AL^H<w|;m6ox}WQ8`*N~IOWK++ad2K2o^fWF}gwr1gtnrN%21d
zS-YH?%-`Vy^rl;eA>7e+Tb)N!CeD_&Gnijwd266YL;0$1r2~H{T4`B1bN`H=X=#k;
zRDX6_YxD1d)f-tB(sMv*Yz{f#^}M{lCEs}YZS`l}`<Eiq)zhoUz@&ko(aC|tFAyn)
z3bX^cR*@)aYHMM$X#CFR`qn-Y5k09TAcIe7nx!=$fmhVVSD@SarK@@H3q935>eiW0
zHTwpu0zHtBlP47sDH1#$U#k?0m{eAC&B+FWd6$f);fBuQ3nuH8hL3OO@kOphOuEZR
z!|+Oeq%F6_$sACoidR{@&h!)B4eM)31SBAYV8-Ze4i6kdSbj#WHZgH*VUsX#MbRfk
z$JBW@l1O}GipE(@{xfOiMz^`{7jt8@Af0h+()0;sZnBvh8+C&zzmRu-Fh$~mZ7X1r
zuC`x#$OZM%qF!)($OpHoZhyF4nh_+Gt#<w;Duy9Vc-ez|sp{*uF#*ZCRIDAkH5qWG
zft@17uT!Z39yOD+8+#Acxq;HH)1M6-#FqG0MsYQxs4YasIlZiULs?8w9WP9oZjxA&
zwEbCpkf5^gg07${M@62uAQ#&<kES-&Z%b0@mbGJ3TO3V8H{Vd!Z5PvJN~7P<k+at}
zSp>8K3M>Q_!>XU_hSu@ApfFp_ep#jRmQ8!x)?%w=7n_yUYNKmf(;{o&^i_g=x}b2*
z<0eu>yX9enB{4XR3Q38)yH20VgJP7|DqnL``WMaO;k{s@5zB=$KkXuI2iO(KxK<rx
za*Do^K7!##1ak<?RX<U2gP1w?^qRg!CYenDg-si)<P^WS^|Trf15s)CEf|Uq!iQ!}
z4}H1C(-lcKED=*aCApV?GQv&F-bZn%gSw4^fJV>)NTrbpC9f|WY9b0p5pC%3_>y$~
z9yRhAh&l~Ytxf=ocGFV)a2rKs4u8>r$`ZFO!3}5_8|u-GWEh)-wod#o529`Iv#w-?
z=-wOgFBF{MH<Ie4mMICKQ<7QyFf}fV0aOXAq`+|rA>7yjRtYQDwc8bfu&nr%KZQ)6
zxx_9~hB1yJxSk|+a)?o~Vz@H#D|BI|M`PT&L}92BEm;+}?DQbhz?vkx&ec-|GKgR#
zvzJbXy9omXW+zI*vvbOp2AZ)jnyJYK38=-|tUnDTcX(-}%YQb~mn`!Kw^SL2ZiogW
zu1EwMUy%wKKE)A)y^Y4!qB0VfBteI<iw6th&5`1p%2|?KQ0qy^TIG4Hgo7O)H^{FC
zSu0blpBSzS966BjN|U^z;|`dd56!OxnAM@t>Er1NC)65rmkQRK=eo;|GpQVyBQOoW
z3Nfc%h>@eC5p3iAoDq1ci`LUmt@P6~C@i||{$GOfk5Mq*8+B8i-wI(N(SPU@`zN{m
zFPX4W6T%C55#y`-oHW6TlsFJRas&k+MW#;>KCxdsv`{!f976x5)Fz%pW8-{iGYWgF
zx#~$HW=#mWL!>f8ze7_CuCl7}$wbR@<*nM9=k3eX#WV(Zl$QDB&*TK#_S@yx(-zjs
zTW`Yj5A|Hs)|uc7%u9O$x$393odFlLjTM4^S+vXk4~HjTs4BOappa`jgsDCvOx(rV
z@gPKQ(&=^<Zdz<T%08q}D0lf0A<V}X`7p9So3$ODk|T0I-G+ij-Bg2*0^hz976|)t
zTn#Y0PWzUx{iWR%f>Xk87#~shQM`15)mXg#Kf=x_IJ2l**Xg8VTVIk6zu0ESwr$(C
z-LaF7ZQHhO+t%q_dtdBxs!siv>vq+uxyBgt8P6+N@mT~{-Aa$Y?VwNbWZ|Zb-$iq9
zJ<@S)@uEZ6b=`+}BZzslFDmd+%l?9j?L!H6yu1sK?NhXSOccuh5)3=6=SjGgcuog<
zOV#?NVe&0<&_N3EtrCLA_#qPlNg~pXnamf~=(}(Og!fgkYkYlEh5H$>`4twhg#z<R
zEX}XWd=^U?WeWot_*JF$3m{Qr_5wRvp6*o}Q>EjZGo7toLyZJ8fP)6O;y6dol-{-J
z7BIK`bzn<ya{eH?^u`%jkk}m3?$!I)CH6ZTRmil0?YDEJMt>ZBb@jIB?ABJ#Dxl4a
zL6?4G|MF-Nmqp1VVQqO3wD&|IcnsgSf*1q)WUS%L?Kv~kXbP~)-l!xeY56O?)DUzu
zN*|(;LUnS~NzRzV7*^1+hv0&>6^*rZp5(o)f7eHmKZoxqa;OiPPZy$tA|csCU+RoG
zxFcFh%KHfhJ%V-2u)TVE<c(~5mDhF#^X!&U-d_$8`$0x`q1(l>yPLFup@QC5S0e9A
zthsv&m)O)VQ^yiUxJAY0^zK<QJrC!r%r4rqAg^LD1})C<LBf+MAw@XaG>D0F#vD2=
z$J*T9Cp<33sHL&WPB(`S%@CjDr_tkc5V0o9hj#9ahk{yT&P^~E8?=nTtdC9~@^$!-
zW8@?Ed^`z;(}0(@X<Zlmf+8{(b7?qAvlo|P)MA#m{(3HX7@5<eaiPR->!l%(xU(dc
zKM8Mp(WT(>H4HcfB*Ar3s}u6Yi!v&B9jq#8TmEV9LYO*f`aN~&7y8>ZiBydtzBT^_
zJ3NHCR=)&BH5vDlGfuPucyc2<d)06hBCWP*nCq!h$0Igz9T;-Saq8t=m0~>5+lH;~
zeqf)UNkc2{^Q-m2&>Xie%co|BJROO($X2kdQ&ZU`K4m4K)}rOMjq3AYghCqpqbhdU
z*Y4#b&r@+9GPRON6p*uux|lcdt2!6+A2N}n`(~-eozz3v;{p8cwUnqnt*<M;rH8!>
zN*JEQd?~fK@+b~0ludp+)P*b0BjblTy=H*T!BH_rcvO|l%=$ONOhOi8QUN^uZsu<3
zIx6?K-8B1sojx$HPux_=i;4g4^59+XgW(YbklGp1)+)1mMDdt9lgwa--9|5ZVNnXm
zQp|SJ7iSmm!)TS3a>K~WAg5H9gCaNUMeP-!S?NnC26M9RQ);8jXF7xzyfH|`^F-2=
zqy{9?3Wnnp(-W`O1{L!FH1TdlTWjl@i`poYGM!2orK6YaGbk|yr)KRLWe8E6SooFg
zMi&-^luOIS&ya%MS21d;$`*3l(kl?=7tvUfI)By2omt?F2U^WLXGSQZb*N+HfHJcP
zsxF?{*_#x_%-@J>Pb$<qI|n(OTOf2Q;VX{{vxN^2i48P2F0SlIjWLf7`I}VDj5{gk
zTc9fiR-KU;E$>5UPb%wZPofSVW$0Hd+r!!{E`mB6u{vjg<~H1<axH1e^Nh*2*yIEl
zjpHF;GhUp{EcC%`w%XRY&q#=L1&?RO+P01qHj55rlt;PKm)lKaUz(A@DNJJD;`J91
zBX5@lfe;#ZkHy~Jh~h~v5e{V>dfEImYQt-ppaX|=nxoL@EGn!twjO^=g6RTdkB>Pi
z%g>TtzK5Hwb(w4<tCEUzJiTL@l;WBkpUYIy7lh#HHuv2!0n(p$ZlB!RH#Ip`u<)Tm
zgQ!4!Dlm^KsMTyA#H#qdMq4g1+w9<%SK2pzVw-i+vH!V~H|!AS>Fs6_=PIDuGp!`S
zNNPBI8VuB=h9vZ8^{FD4VFsz6bQ8I!GTyz24q_W8To_T2#t3zA;eIB6-|h3;{GgoQ
z(v+6>(5j|eyV^!!;#JR{AcDq5JjI*+*(&zn?23y|nfd<x`PI~Y@7Z-LuV*wHb7@9`
zWd;W^S*cS07#xG=si2X0Va#=TZsGU_EPzEEQ$O_!kWbcU#G|+oo>aC;eou@`0~D%}
zMRDub!bGw`8gxQ$P-e0n%|DA_;YT6%78*p)YF4zx8Y6#zMS7+a$^<mW?m!EvK0OXM
zNwmDbe-bVkZkCL{@jiJCh5FYt65G}EwYarvI@qusj{5!(^C6=VfA}RixfgpS@LnLs
z!5-!t9DXHF8PU0)3bw|n2ca<w@rn&eYAO|T{&H}bv~F|Y36YDxFO9$CNKGGBZ;v9n
zY19L(`PXuAD3XxL0-Hb35QuI2e*xOvsuhl=37<<1kE)2-p5KXEvks=F53*&i=ItR`
zgWj3hUtp!~2IObP_;CgFHKl2;BSLo8jJa5+^td9ggyaHBc6Xc{DZNgZ0W_kRZKVZ2
zX<usNm^{T?<L4w{5W9*LobjY7b~5M|2IM!oa6L8=qyQqL5VDhwSgKrMbEH#QEpBXM
zF9tZ~`=N4OB-=YH(a_4TMx{u}^R9+O>Sdz}*uw>c00T#5(pHS?q1=OX;i^%4fQg;^
zL@33A=n$b4`cR#|)cWs?qH_KTx27?QEiSn042JA|led;6^UaV_cF1}=+M_*g<{ht!
zKJbbt*was8s9ggGErGGZmiT2$J8W6Wlzm!W@r5S&KBKXJB0%hnR&qfk@5Bwg`K7f6
zn~WxSRFnowD;#1oA;6G!7_#b+c4`B+!&bPD$r%6SQWVy9tmv0#YMK6DkK}ir6Pzg-
zaYf{;jrfx2H=%IMI;^r#JGVNl35hn&!ZBFvRO^l+NzTxSWo)uT>?6h*CQTksddb{M
z7eo`PjTnh7K<*7-<C4YMeuBgNIMkK_GFv-@WL8C7wFimQ9t_Nv*kq3k;Gc1ck}2Ui
zWehFBu*67sjLWns)f3!sK(sF6?#fWjke_+KC0ZLI$*lE;Fdx_O&e%Ubu3n@1kF6lF
z+2^YDjnnr9G*mCS=%pY2#>2ar32u!T3GbJb-KycXFK{E8V~2O8e|1HP&1vLG3sE4>
z-5VwHg2`d6D3f(FSg+J{nVtxHR)+TDi8c_NKW}JkNdhTf^`~giuF2)DaK~_?peeAo
z=DMZ#7YR|vM>V^|AkI}09fHZYRp^V38b;99@yr@Qo>8AdF#Lizp3gk~GkW+K^viIU
zqPjRJhL9R-AP#L-j9Nh`Yj}ugkw$B;!okDp_rH-bQ*6ZkNz+d<9t<+egj(+GOA$&c
z0#~N)^;O4feP9PBY;7QoC(!-MaJVza_#33EKQQV=@W3rKQM3j!mQlC;h8w@%(rqKm
zm(S3J-B?%p&!PMn378GSSe+R#filfbE?c-AGL-b?>NpA(N(M-(X`A*#OHxh3(Q6ST
zKd-A}Ky&o?=Z}4>fZ5M~>+DQBZrU0O)uywieQ`AD{^LRWdN=@E<~IQkceVH`GU+JI
zUH2cPQI~t{$r~{aqdWA;4zp@QMwd843?0_we5^5tMjAR>BZM*Nf@T(#_5=1>0IGyj
zayAgg2~cks#Tz(`ef9oq2}M*wswe6qJhplF`c(757JGBntpH8baWybOvTP-|ewtU?
zqwV3BW(y>mv~0edJ$G>4{={(yPh3Ttw|1&opEJ}g<EFo(7y*)DJA{C4NZ^70iFZ9&
zJ#^>DNLP?W$ArRqq`TW-hz-m5F@l8)R$AM<nLzLj(#A0`{SP*@yKBnlm1AiQ?c$9)
z*wQ?zwAk7aW^{YIJyA9O+Kv%_(|kL(j_UoJz0NZG;rHMNeB7b38z|{mZO57K|J@X*
zkYXN0z=MD|<AQ+D|3?Przqh>qx8fp871m4h(D|D{{cdr8jV$I+D4c@qM;^I-Ceqz+
z)*pc3a6-h0dlHg|wRmfAb;dfYcr<eQCYN(CHPL1pn!<)en@fp~#=1?AT>mb=#^1Sx
zq%`L$dgq?!OS8%GU(=n-0uB>)WjR;R?Eefju3h(8-`_sZx69ox^nOVWmPny?+(DwZ
zsTjITdsGlz7CV+cUOZ3)_-r>qNIw9}d*U=-q+wJy@L=0;c^ZR@lvP8D{8q#BoV`r^
z7embrk5)*Yx0y{{cmqCOKq%O0%jb4}k{cc{x}URtsQgNM?}SzZh;SWPLc}sbGa<e#
zH~qgb&QM^T!-HSgz`pSZh?1xkMu|Q~dL;R&<wsGHsC&mDbfELpNhc}IhkUVKrhloF
z85K&X><vTs80-}DR}9+o`4l})H@*ZDbT7Ph2dD*pBkZZZ<RE=vcC+62p3Ri6y+$|r
zl;eL-^;n)glxuwO1})n&k|53Y8Hx!*X%3VJPydk^l7frIXTwG6Cj1d18330V)bMw}
zXr&;+Qmm1XB`PyBH%mQafzgdolgl|I2wSLhc?uwS#2AAzJH?HkJh7}2=<kZ_Kn6x;
zJ0GB!GpHt>k8zPXNk0lov!Xr%HOiq;<47Awd~Du3Mda@~{1np|ur>)<fn3gEZTyzr
ze%Gp0R4pvm$eV#ZrkVC3L&cMcrqoo#So*=qFYI(HlU_AVMqas^IY!cqfanBtT(7)x
zQUH!Wshwu2&=z{Do=iR$hy0XeGj;TyT>85>pJR0;8(ni!d#A#=zMQGI>18}8LE_l%
zQ0m1fiV4c`v|jmf<EJOTV9%IjT{Lhu0f%s`iIKwIRr&tA4k%kbcD1{;WhAq+x;~U7
zs*u*9r@uO;LX?5GQ0n4ha@UsWA+ESB#+h-7nt4yQmcgNsrxPb*k_wxpRkm(GJa`zQ
z5ZbDXJu|a11%Iq~dPKAI>ybw=v@}izem0j$LWE5*Z~pOFi%yMWa?PbAB)(PX&g;<-
zwf+3PQmd5elej^fhe;D(W?JcfU}ADTfv5BbmA1i^{P|IH)6poY<m4T5`%dL#Q!zxm
z)S!Q_!aABDpJ#gJj1d=eiD}P#HJz-;zki#SvJZP@G1**cyfaNY_TeyVbZ(il^I>JS
zB}Z*%CZW6nw73Y;hFuMiBUKhz^FpgS?TYMW@9yU|Yv%H1CTC~BbjrkmW2l;~okcxU
zg80!=ab<2U$uWG2?G->XQfDz~gbSL1Dn~gLTKc2^b?aYZLu1tl!f|BV;g+4<fJ6$a
z2(oK^aR^yaT$t3cq~!#D2G1c9;55^`W1$sJU22YzWT?m4IMk76G$s-MutH%;t*D92
zW<VOQjY86>ltQ|0BGE)$W=8sugWu{XYQQalE@R2TL!FTQod$a_@!q(Q0`ugp+^A8n
zS93n1tXlrDu}F*D-nub$juO+{GK&IpoOG>CDUwxjNwU3(l^)Jk$U1>#VcM-M2an9A
zKPSCTo|R$CwnQs^Ts`z5QEG14H~_s^nzKlYHUoN_FT`GIl}OXnf-BfYYL!eCDqSYh
zin+`dGt>7?$#5)bjs=^O(#eL3D7BQ6C1))}@ifRp!cCOS-R@717#R*r98OXKj`S)m
zSHrImQ<f)_P^R!9r6POsY0OQK1f!ITCBb#W5#^$br1G@8qKgCserJv*sf}Fm#+>8v
zEN#QR`WMkU8SG3c_fWMEr;Isb=)R%Ef+Lygha_VG<iw%}$(+#?wQVEDRa`?};CnoW
zBEm_;`@x#H>4m(sw&I+W`dcMkCtC>ToO<MG(k#i;m>%}qT7AEZRrP$w6vdCEik0Na
zq6C$p#Ev&EYGPw5O35^bPSE$iB1J}nRbN&yRQL)~fTfuZMH{)4@DEa3hdZ8Ce}AhY
zb=l(7-L@oL@NpBfy`v_LO=d?bw(`X@?F1m3llIz_2Kts%G7U_mdMR;pqVQbq_UKDe
z7c>w^o_`~*kKG>s^;lFG*I4c9WsgI7vXFuLo^_>MVaT4f)GXs<o6!|~RKxptR6@+J
z$(1ApbgO%u-`Xfqo=rLU7z*2>Ykn*<Fg*>PlJ3A*2Ul3QI595lPj^pn72;a7;vo~#
zZpc@f#<#ThcDS2bvI2_bUlyiM{ZbZmVa}v(aIRK3uA*FY7JVeeu%^P*LjrEr-OpzK
zmE1g|T&y2{sOyuqq`s(E$ITk$F<R5K4Nej}i7ApVekKg@VC-~PjHLCcmt1yo+qqb*
z>dZJdnoE=2T}zzUJNh!8+*`9tR+nc3NwCYEIG7OrbE2PAf_C_3tp|@l#ssIN=#)Oe
zBYZH&0gpT8=y+Dcmy^{Yw;(L5Y?bD~NGiv1{oU<LZqT&8tnO<|nP8VbpUudBUVojS
z)M$y@x$Dp!dAiqZ`$rq5$AH^f=5I;ig|xx>i=uE@37sNcs|e2L(#+BUzlA50i!y#V
zTi3LkN(yk4*oSo^R?@fZC5yv!&9f$i@-KmDRdR~aU!nQ(6Bpwt=e0vnI(0vo+#q-h
zRpJA!7HyOplrJTQc0B5K&Z0_1q3)EH<;M1KOI6|>vlT0BfC3<eV<%6?^?0k~yqL@L
zlU*C1hXznaOl4}t7~Vc#pFWVLu_#guGO7Q0`nS=o_}NNn4+*4~8!*JBzTJ}z2|>AX
z6rFk4PHt{OQ{i39Fj7+Kz%(wQ*#c;;Va5aBh~U%5o1vm^G`r9Sr<?ja8nE({J7DQG
z$N%e~nc8GEQn1Wgb0_7g!Y>B=mAeNxU{80B!y9{By6+eu8#F!cwRH1J#ap?s&)E|8
zm{qY{`-OHMsZL<*4Sim$raSgVx&*!S@7c&3-n$%$`(ztMlk-P4r0DK?eZvw~;P;69
z&!;HDmPQJd-zB9WjsRse6-71FXORxH9OPRJdP_i~DEqWkFZ)>o`jSd#(~+6w6*3TX
zKg`TByXlA^j(BRV+*^M;@XO7ZTEW~CsWcNnACspqo7$C${j6Gn5Co>buOmeCwZgtt
zH2rg`tfy)n)Zu$Hh4@Rnyhon-3L0|A&V^g8mJ<yj80?t{0L_U3%iRDqyEh#7i>>}o
z1_uy7BB&+JkvJE0D&Gz{c(w(bfkF$w02?OX64Y0CTPB7cEFD({nk>H}wR`90T#(*}
z9(M*hRcuEZj5%OQ#DZ28v4B>S-pAq@V?YB*TM>;p!`SV&v=+HSuD6Lr%R4lGvzGRV
zUZK~T9^L{$zz=xN4SqgUk%Jy~i;ulU%Ms$r?uY2`L->Hodf{N*SGGb$tU^QOD!^|y
zrhSqiu1dbIkxka`uJ9wp%+p+(CDx4l^I3tq^NB+-y5|QNYIMMzbAUWYBqSyn`nwT%
zelR~Wdi*Da0d8CnD^Ap2E_;B`9U}D5ed8D30qHgR1_@vP<elXPQT47$Y$%qT5t^Lj
zx8?1>ixgygIAu4)uS^in80=3>H@c1oJB+Q+=YCN>!55H6N@L=wfy+ejlWWF}-emr2
z%srV8(hmpXdR<Y$2hI@0HIGnx40}7wfQOZV(3@odp5eLejehFG(!loKwVbf7^w2|o
zNl`J`)zC1SdNz0qBtda=h$-*YATVr9woY~0%UPu}ww#m7D`KQ0$M9Y58;6(Z{_N=I
z1JF^HDC)1jY-FG<m@i$#K}f<z({NmQgt}5~(pRrax@MkIF`w6v41_-D&j-a((gZAH
z5(wGzczZ1e`3il|6ptA_rikLAAUS*&C}9__Afir$TzU$Cz$jOc0V)Wzum+w$+&DlW
zPE%XpHAs?tSWDowNRphcE$9w5MNWn@D1eSDC;*Yx{in5N23bwE+`~}c7IcHBT<$MK
z&+vXV#T7PxczlK5!`8fmyW}*iL*BJehaWf1kByt9)?0=9NfK}=u(C{LG(IDI0W|i+
z7z|`J%B=<cHa7Jc&pBQ$O5u?SI`Td1!*-<dD=cLV!n04HE>ZeG;dT+ji)&iF>7P}1
zJy(}BJ7>@r<#rzLuq`H_*LK4dpl!gJ{U;P~65z}@K37)@GJ{1l=-b|x+CSE=6O5H?
z=9;|VB^J^kuIE6m2OCHEYxl*UK}#}aE?<J@0x^oP-^WEsJ@m7eL2KB@MhQCfGne6D
zl>lG4O5fKAZn@sq&WUo8xdVIhqsPa+BPH#QS77%usOM7Zfh*P8br1IE8iT+&^pY&9
z$~2PLG}8+mvPd<OST|9aPi(9Nh^r(XFPZ5szPexRxPX^}$}X?^q}D&u!fCI*E+OZw
z-7ZLpiBjB4=U>z^oW7RLP|!vI<RO@wn9VS;17;VA>we2dtmxqnZZgpu<+PsV1NJW3
z*nN9<(CuG;a^f*>8ZvvD4}fkeJ`uL#*Ee_yQ?5LOUektqKM6m)Wx;eBPLJtbS+C}T
zW7{q3#Pb3?xtGZh=v?Jur}Yz!-nnU;p@)VyX^IE=;=-=_ygrz&As~3l2am;NeBP8S
z=aqiPjfkOJeNgbkqI&aA<75bMvj&qJWGO#(9#lLI?rDfdTJnfxQdU|iXtavz3<C}!
zwos0~MBMX{L;OSGxTTyD)Zwz>lPS(9BlgzTC_h=I1f#zsum%K6P6`ht*$wXf^k7-{
z{Kt9-{vRVU8zvj}FES92T}==Wn*Z3>NjTctC>lEm{ZEk1peMAK(&FO{$Fa;E8514J
zPtYhRI3#9RLh>*a4F8`b)IXrPh!93eqNGgrCVujtcd6KyHqo{ymk+fOnPHeyIp?np
z?#(UbRl3w_Xf`fsXuak*ZfB5=2K_vFet)^$>TtboJ<jyJQon9@Kg}rqXDdck%7E<}
z;8Cw3@Fi=;%zBB(^^NJa+oRKdq4HAK@v+{~0rCQ`tJ!befnAib)z~j>uvf#|BbUt(
zUsK@j7)5LahtISdp|))|G#`Pe-{fXnE_+;apFp-R>~Dh|U2S@}f4l$M9~u7FtvB8u
z?M}Y`lF4@Xv|k8)4g#ECY8yVs!*kegx946;{l6J*m@pG}eBn2)0~9t4caCMedV_U$
zQfSHhtBppMA$<1xz<y3(<nkW`gcoSp5plS|8MlxpL{U2^B=ge(Teaz<rQD}odVS7J
z<zf;X(yY(unz;iZTJ#X$(Zald@bSBWX#_Y#JNTG*!h%^&4Ic;z$-54?y#c?f6GUJl
z<RPM6dKowN3qZdL8Duy`d}5syaz8Mjn+%fSldzaoLQ=-Y*)fx5-GF$<dHd!88)R)K
zKhsH?QG++c`{s;xi?wVN>vpKp$KvzNY13tZlGkvjsT9rXv}J!K+|w`fIGiH5C!OWd
zCmm<KG=5dXYAZ=c9+=lg$`q2yl-bWJXbi;{9@1*?Q-Qxy2CB605NR0BnA5fdE#Ww|
z!P4e8%4h&drjdT-Na=MZSiqM#_NwSu**w6WdAM8r5x22Z^qX>;6N!_TQ?p4_4-T5G
zxmh8($T}<tv%^9^GU!vSk7S))m07#Uqj8GiKW^Kxc}q^1a-4f5nu3LEle^J1ni-sP
z4#&m+D_MBR2-LnRpbFYb%0nv>yI8k*iK=p#U39AB*DO55VY?`>)ON~h(7Fy`eTzx3
zp+6B};}+gVN7-gNA!GYUwP8KMVB?l<#%Y*@=PO(<xOWM>vg>zrD(=`Yo_XG^3B5{A
z@q~Nx$JNR6)czy*jP2Bo>myD>v`-X>Z`WUa8p_D#<30mlJ@;=;ei5>Kij2&}#(NgB
zJ!?p0>9cxOFsxk@D;4O{vWwWDXd}yhDh_PKSvU&eQ*7VhJa|HjOs7eAnT_3eFnkN&
z$YwQb#^wE+6H&%X+-!RIl-a;TN95W2m&fKM$miepe)ts6&*wTv2lrMO=_5?nezM%K
z{8tsu1HHEbCio*R;x8S}qbTx1fN=4z0~#n%oUy##w{X*B8YnjA7h*-EjmBL)N5hh}
zWpck(*r^BWyCjxj(B3+5f3v6`brzuTgt`?Rd!pngb3BV4h;IV$#w@jBzZU3?IT1&r
zmTYd=-jHJE(Y&R3edyMaRtB0ibSE(3*b*0SZPUP2S)2PX%V%TzX3}Qu%wnopF=DLv
zu?iU$Le4*My(VYtn7kWtTidO4P(nfvmSOzcgwLlRmWRp&X34rAeo0*gcg$$-Rl|$6
zAsA;)sX5isp*%9B&)Jw+^hZZe1Sj{;fk@IGSz6WgxS#leB72fxK!xTd`Ip#w&*+Bw
zm8;V$g600U;-nMPpLO^t3O)@>Xv+WVpxVYp>pGSiU9VH2v1`Xcm*m&AOFnTEVUMKJ
z!gzrYGs~A)VGf5qbgLg77JKM$j@F8KK{YMFW-l%1Y@G51!^|w^CFqcX!m@)M#U?)d
zC0{W}S5r0F#vDbPe@K0aiK!_{XsDiT`7MLlCOcS8h`q?Rqo%;%JP;~yj%kouxX%=p
zPdWFEzAxOo#-hrN<LqNNWe|(8=TBb6WMl2i@1cYgW-#1u4D}G3vn;>FTJez|qq|98
zdVd*^PzSLW+_Bw*Iup@1FG^y)fb1Ad<5l6c9Mlgi<`ox^a2}I?msfXj9&iy30lDhn
zD1FDIj?1suhFXMV1&U$6TdaghkWix(BNUEo)u9w8!-Cn~1fjOFX&g&`)049jMqOZ2
z>(dK&_7t4Sb`7G|t2dS;=u{$xEE7ka7)wkj!RecMloWnVG73fkHs)9JQh9?FaRYrU
zXRg^LcQ3$mbroAmIZh&9X{rP4)b&+E*D@A7(3c6@m1?yP2B*sqUjm);@~A7{is;$k
z#nP0O$&Z=^1Y-0uiF;U@2yzx&^C<R7+!k0xiP}o*SyqXdPvC<z+sDW&E55%+Cg*oz
zCyn)xWk?QSRY)c>S!!#_$g=`_D1#WdZS2``kPb6Zq1m3I^0`8Y@NFTi^kAuhUu`Rx
z36Lh#!Y0T%&6CD1DfBa`Rld1&L-!Fi6TY@HygKP8L%-)8bZgZ2JoKygP|aIdp<9xh
z`EBdoYC473VN`F3-US&vvjRJbxwxG*vX%%qmY7sO>dpP#NJT5Z<h!778Eg(LQM*~*
z(n%)Bg!^O^146T9wMOgnCeQb$(Rf7Xu<%G%D!}A<%ssQC<Px}ZnfjP5GM3C|G$B5F
z|MKBHH1S_!Jpcp0tY&oK-l&dmGP@iX^=H~4J|`83EqC(XBIr?LS%U?gD@Y=VQ4G}_
zP>_(O#PUVF;VH7@PFLbpCTNp)y}mIbbzXOgF*$&Oas~9!GYBzU6}h0dN|hCsXDSPx
zKcQ}5MEbpQyu%tQ8gf1B%rw&Tl9cDH@rR?mWW_~fdHdYM6{=*hT*f4(Ef9X5H|$qk
zory^zSo(P2Xef$0@(c_6SC@_wlxZS|S+h5C01`HAWn6<J>*I!U@q!Mcq>~}X)aj(}
zEijZd{~0p9iEjDku%Y9qwJIcA!cWwD>m*_hCiZ)bB#Bm2CK|+*%&rLIWz>Y8AC>v}
z)$GHdjC0%fIVndb>jS?;o>Vu_@}g=U?yfK7q#`OlreZ37vqetYKG@b8PnnF<-8PQi
zXo4}Ffg$r3-~`{RhmLS0oK3lP9h$b?a;L64H=Hf)Ls>por3Z(O2_ERHhY>ru(*B8r
zM)3k`OINBj+RAnU&CsZLhj0pqnX{Ntp<RMs)0fNOzqYTOUgS0vf_~0s%t{MB<zC5k
zhvC;keXeu%!1cqXl423s9Qg}$vXwq(RnRV><)ubR@}U(li1N0owgToBc4TYdHdyMb
zO1E$*oadna87tAv^TGpa)t00%hZ<NpkAuyE%x7c61RL_UXP)8;Cr1o;P?i3Pj~2yI
z=^T<>eVT7cRbG6G9hCOc@@AhK*+^eObd;J9>JPHr0ZDOIzAa0S?#<9!`TH#W#^v3!
zXNy9)rM6OzP4Umuhreu0_8Is^fkZ%*2#B&jp9-e0M>cqSK=!820Hn{_6HY-5FAhdZ
zC433z{6c%;M;ccn#$N3{RZRF2%&8S1L3cJG*;^-iv#z_!7ymB&gz&bR!+a>@*G+q1
z@$S!_#YmbkrPgJ{#1Q49X?4qA7DBCW1z|yliX#+1Ovm0vPs0ljA`THyj8mux2vu&t
z9n&v^OES+xt%oyC9V@M4w`5F`Y?U@HvqHs7jZ<^4wi~f+gNZm}Q+!n9fGP}a2<<J~
zg4750sCz2Ws*ZPB8#>R1>$s%BEp98hH|W+N($5ClG$rC&Y6!wJD|g|L%mgCG#Hh~H
zHw}Dv9F_}6PcmRsZ!epb5snf2iKbbUaHgMYD#9!Fb8NFMLUHp}v1FawR{dJ3A!exF
z!o8^r_c-sL%;mgZ)v|x-{F>ZL`*4m*=A}=<jAgUOQ27N|A8sSiPh7&jPIsKvJh$HC
zt>s=Ne;9J}X>-pk6`A|7B+depd$9v9QrNw<42U9q{x<5%msb7@EV(DBUQv&CWD&tL
zEHhNj4KLn9snT5iql7Hb;BjkZ>-`&exJA~!w?8V{p)pMz0!CO}8F<IJM3tmLEE^SC
z6T6eW7QOYmlMU$}B2Pzm1h@LBBe_?-;wKpH=a%yZ<CfCkLt1#beRTL@%ZL)G1$7@|
zDZ@OXarwio0m;gJsM?W07FW>X3WP7vji5S>?g)N)QCM1;l9jh5^uG@8X(L!^7MY}4
z1B}PtI$%+vSn3O0xWnNev8o&fTc}86_%h(;d#5c^N~nv_A?ZeCsP@RFr$lpoEc=qc
zY8r~YwG|~`>1Yw9Dxl#x&7v&br0kUFAi+Mo)THLDEX^XhlybpNQMqijC_VoOr)*5=
z-c5eN>xbGeSy*M3W%oQXZns*_Br0BaeG(Q6ie;a>x(T`QL)YL^l`>^5__1*d#Y%ZC
zs4sDyB_cpV82wgT4PRL$)!N1anu%wIvNDh3C}nkvSQU|R&uNiH@zjpNoDy&2mQrJc
z;~MJwiN5J;-d6;Cm)OQ|`WPLrmlk5bH6&^OxDb5uJ5eLIMpS9nG1jtkB9ak&JwU8w
ztDHw6;*cdwjID-C^#`=GAc~*LR)SxYWocG<Fzeg-(OVuaJ0#O$8lzW!iJV+*ZBFR1
zW4Lvh$Y7Q6PTkCq+{?wE<h;Y?vCh2bi?Y5l@gNQI=K2GSI87zied0+U_n6U2b(@Tq
zu#{~kqzJ0M$vpwLLW}-fX~1L0W1L++TbZ?JRJNi@WOnWp>!OJ>9PsNqP-8(!lO&Hr
zV6yj@=$+$W&(tnVRrzbW8=g)q{cFv*pkh{emvPyB&E^2^jY!d8?W5YbINOV$&uaJe
zLEhsBP0(KHBDrD~-IAk!+lU;dqws4tMZT1CxL&7{woM6D3lT4Kg|vNzaed$J-|gi_
zM!jQV%2fGlSP>5OEZ>zCjvE)z%BsY}H`YGFqi^vJ7Tgtz0)8*_zK;Gu^~Fj*v?x`A
z-V4kiL3ndHo1|4;$Dj2%sBrIdw;q+hx)AhtcZsUt*c?yeAnqRnUb#g5R_lgrQf=kf
z($CuC9|@ZN7+({?Z(S2o^J*oA1DN5rGLVu1v^vnBNndNf^XXC{?k7dVRuLr3>9+%F
zPQ>U{3q(Ds`sBrZEmNu~N-zGnb%^WD$Nk+HZ~R8K{Y<sMZ8vhxXv@;R!|7u8Pqi;t
zXDn(H{If;T+`jM(@7!E^CC^8H!lSrJ1wH3|_9b&@WFR}BM04h&$nz`iITZBuug8cI
zqU1U;%DoBRt4(w%KKk%$>Y7d2FzJglSKcNn?IPzPSZ2GcKbHA2jrYq>+Y-E@wbCN&
zGiXJgQVmavk65~lR^GDil9O2|Vefxi>;P3g@7KRmt1@kfr&<H9TSo#|v*T=lAbrS9
z5`mNL%XNcdxXO9ea%P^VXUd>m$nU#liuT%MN?yE8l3ENxASqb<uT^LvPz4|oo;W9A
zlK_Z-OYx+BP{);KzxQ;Cbus2J4qKMa__ED5<-V60oHD~`mr#OZd-n70Lg7?*S8Hx4
zV(d6_riW?s&~*JZ!8Z1STB|t>l}{U@^rp-HqdM{{XgWKAy%eXzDQ|Y0x~U;=)70x1
zjdn{Ev2L{9Ni{^|8F(HMP4>iJz6GT9zp`sew+;Eyy}zk>zJ)F42uCjKIEGl`Pa`#&
z7>ewqak|<FveMn`46j|v1g~f8AM|DiGxF{p)pYhWuZ4Le>+WUn=W{lS7bUuVVoJV#
zvz;Op|5=$lqkHeqV!c4JXv+yXx!YQgv3U-phnh#c|DzhPn14v@D=YVgd$^lLd3IT_
z)EkrY=<VBo2hwzAw5}N59^A%zzhzoApS7w8zei6t?jP3AO&*^jmy&*<`T`vs+8;Hd
zC-YGPJ5EwfA8n2x&JtaSOSD^Gi7Gqpxf+VzY^@$obE|c!ZW3~q9Bd+L>3!$yC8v)Q
zOO$$_V5Jlk$S*GV;0i~qglsdX?<S?MoWmUnT#S!zp~jrLPx%XMM++9VG^u^2(qfYI
zaIhryiP0$K|0w7?a4$O`yGh)VF$#|`32v!s7Q89x3ycaQfMl{vp3<sw+RRhGH&LZd
zN-$qc3dbB?<sAi$K)kBuq+9T_xe_8!NK<6-<UedXOM^A7HyTW0%*cF3D~e^KfOPhN
zLNuQigFo|n0j}=C1sOb*rfYqxPj=}wlo6uE{F`vhW>39WBqNs9IT8_y_*#gPUn=%U
z`H0d<V^Sc3UPlG*Ba>3u(9PPIrlS<|DJ@jzn@+E-V)IHfT4bP@7PPs{75~0ZYj6gX
zmCd^Y1Es6PeG$3C0CU;ij5zz|`#}qF3T2Bo4lja@U(EaaruiuQ`JDEDg3ZH%GB}7G
z$3!7H`UWWtOK&GgB~7a0^9gXlq|tHJs*o9iW+9n^f=&-cIx72xntmu_EM(&8jVW;4
zzJN-)fD<(Yw2V_BrKsw)b_CGUHuP|&3R;#~6)smJoSUxYzRXsM6+5kxaToMXz=byd
z7K_(fzDt8mp43t8wCV*NzbZpXHdvz__c5NGM!C;UD$|@J_`agX_CjeAsc0`sXoBrQ
zgRLfc7GH4>aMhu#c#u6+s$B8(?Q93brN?(HFcVWqC)S!22VVw(+lckA^dN%Prf5=4
zgR$n9LQXo1i%b-x{A>s)LL&@@BuMs%C!LRgtq3+B257JtaxEGyu{ACy{D9=6C9^aG
z-UrUzsFKR?Ng9M($L%ROVOSCPhMS^}sj8YD7Q$ZUliX(eh&wiX#KIfpxYiftvU8H{
z8`D8MaptV(stP^*Jl0sUcgeZ0cN%_(87PAFn71C^7<#}6wURuC&$2cSgi$SK`@3~A
zU|rlSz^Kmx#H-RuII|>}MR$gQQ0(%izFl_oV&~L8OoOW#4N{5Kr!p44$9uIOULf``
zU$nC0&UB?RlDQ-tGRspbsSU$$BGi}xSN>6+*v-EzLsWi{!Su?0Ef&i_8YabY5G%-2
z|BtLDIyntgtRogT)+;LnX1o`hb6x%!TyPapv!nXf-M!McHA!9(PB*+2Zx`dLC<=Wl
z!(TgwPIC{HGKqo9a2)BUG*&z84S|=?zHx`c*be0`k~MENqV&<`9&)^!<%yLHe+`BV
zm4LrZGUrF+s3Sned6%3(<JfomX+}v6=qI6LA0brjzq&@r@Te*cB?<6h(-JKtf#2k0
zs+<a>)fgkdE4X+(vsN+vRjbQYGA|?SMW9+158q|hNNb3f%e-jS_Y6zrvA@vku`sd^
zI=dPzUH2n{B0h%#YK5DsLOLMvz*FW<Ht^@Lf^HzV!MC`mtwpAu-tTAB%2^q%^C$HM
ze&;W<UwhS(0O&-=+{q1wmh)t1gA|!#|0i0PVyj-xQH_PqSWSJBi%_~BJ%0HI2V{J4
z0NaD_f*f>Rgo&ByG_w<3pvkE&X4?;k&=GwRpXoz}Px{d7)(;mHq+j1{5$hn4x8i??
zA5*{PAOhZ*?Rq>W_JQ*O13%dR_#)riBMCBMcwjO5nd`42$hpTLjQsjjJW*Sz=j5l@
zyTYwX)iTaBbd+|N!NE$`j{dd@Q{-dU?8oG4#dVO3n;oTc(hdKHc^%N9Y)!u-j<wQf
zn++E`3-fwOABM}FhEtRSudoUF51Mqr(xPDwbz_Xk@$<*qu{Xnyxid;$qZ4fIr7yoW
zs~URzq_w*a(2p{HnV(Z!Lm+kuY~GN+uy5!e0qQ6EU{z}5{zV_+h8~6*GsJ$t;-PV;
zN^DjkpM$6Qqj<FePFl`A#ftULJe`*q7i(C1XSiBc>%Q;abZMuB3-~6Ha?1p7OIeoV
zXJBs_5u{hz)<y~;L;|N9_=}XTPh=I4R)bxp2V3Fi*o%nyn^<dv2DAUw4qFR0rQh0)
z^dg{q$Fv6h$zhz!4$tf7@_qW|2wGSFm^~QX`|}NX3$$qOBs+r5?6}7CLOTDho(gbK
zjusGrZ#U+!CP^bJx+|yd<HLy)T5FJr7^_K~ssn~Fj9fvcGr><6V`NM&1SWc`NapxS
z7;^*!gT+Foijua3i*2yGLB;R)hX3OXGS*9fqXq^GPi<g5Z19CBH~L(yN55Tvk3jEX
z3Odw-f-3<g!vj+6bj4z4RhQFB>3l!npC(MMk4M{on~bo}JCrfXgGbt~zb7zT8C0dm
zl{o{EphlAovIY*UGU((jg(qtkaCbJRl3M+tcpZ45O>eEd^}x#hYHqHnoLviMvH@8U
zOV<OULM$s%Y9@g@Va0JH13Qu8lcc&kinabDNkeruVfIHoy4mL_(&s7c1Z`^l*uGhy
zVpF|Ur~ux$eezc)PSL6c3*><ho$QUzl{?5#FXHMAfl-U79tHPM4IcNIJrsSgbPXzP
z0MP9r!4AHmkJT*j{cl#Ti@7yNG<+va$ez6$_dYIUf<36rK_grb`BDaI%O%byGxPwq
z^8ia&j?6%U+6nf}PnjWyqeq4%X3Gn?7S!SzT8@U8u#8%bB8Nz`A-Itj2or4f$)3}W
z0%F9Q6S&S1pd)b@!p+!J35(s)53H;?<KFu{vXsn&7TR(7<PTmORLs|n6+C*jt^nmH
zm?^61Kf;!WNn;O2SN=x^?&>oLu;Wa||9TK0M}sM{Z$nrYqWWN=b<K;t%kQ>AbQ$Yl
z(sjME-^=t)CXm$5en_j}<0~}O)0(p|RN&`h9m;nD`6|biyTvN((k(aa8v2z(i$97{
zFG8FTMeIw5<*e;QTI+?L-7@iZ6l(-FT+S@`CUXtg_;|nqch#SWr+cy7@Q3<g>9c5_
z(A<zQdL(O*eBg`go1yy3?U+9B@dLa&u59nZno-Ip@5C$i96hq+_`)In_|&P$XH%~F
zvZwA5vunQppxcOayg_5F2<-`Utc_^(Kyk3cpMo?sXr$>-Tm=|6I{tqA#wjvO(7qv6
zBE_&x$2-Sh)MKHoBVIAOihGL6y{z%^Byuyh=&(IkhpPiYsUOkH#t8g5t!N(}eFPI?
ze{+4)@efJTWN#`!|1j8iTxU<`(Hf(gok76GH-Rd=MVx#4#GiN1Ilghl0wMU#`=K~b
z)kIHv0zZtC3~@|M|1)KevB*dOpLQ6%>DUp>2mTeJirEgmz8hy*jTf2-jxykpo=p*>
zE*atq4D(y6lOS`$sYu5;GKX9G9`@*r>gW(e(ZNio10&!k9e$^VK`7WQvpWAIKL!#a
zlU=2tBv-=el)*E<!`R9m>AeGfeE$NOL8~E_s7+CKEq9<{^$l4J<%mvgExN!yJ{spk
z?ErjNP%BOs>zy;SZu4zpLiP6jBXnTP3r;U)yJzO;vPM_>Yg=SBCN|>pEDQb+L01k<
zC%BeWwRpwMtN@~uIbS&*(VUNW%99z_K!LYo%F~g{3dyBuK?5$S!MoKR-S2<z$rQF{
zl>B*AJH*x|lxB{%PV$}O(ypq()LHP{tXH-%<u$qKdaD1ZvELo^Rzkv)F9sX6pc;KD
zrEBTRkoM0Lm--9lRY;`9?w+^^jMNovi&0X~A+EJu>F*(hUs+j%boIYDoLjYi9j|t(
zGrDI|+WNPobO^jOqt-Z7f^zO?yAuE|M=5Fbaki5S!uRK}@$ArhBa*>v-RrOrz0xDl
zx00HL9y-?g+LCH@6t4_OmSN%@T*W1R)6gC@pSN7UvA)Q(s%!1pTl%J6+Ph21x<GFH
z4O$z=Puc12Ky94dEM>@JI4l}ozy=JziLWx2e_XgtT&GB6pw9YaN1?=)=D&Dm`vf>K
zzdyGKcE@Ek8$*<@hPw8+a<p$<*;~bOnX<Nkdri-ZSl0L-)u|)wadU<U?yP6JSRXQ<
z+i<6E9@!zJ5$}jzus=DpJ>hVTyK*SM`At6ZsX@JSWW1sb8_*`#nWyLz#tfry;}_Su
zPxqj%^k#>h-;ij<9=g$TE#J2ui)6;I*<bUpqxw$h+p)Eu>(=Cd;kNg8*E@dwEWhF7
zcHpDcYr>3&#heAvlG5Q$A~S@S?k{8pOPSZw;=i{iRo=JIh_O6@(~|tbot4)TLp#89
zqVX_G(~^BPsA18|=>hfmK+Y;hW%CySZYI`_m?xC~4L#(91#W)b4Z}MK`3)w2EO@ev
zz((QIzMit5=8XMFXG4vOtHmmL&7AYn4fF06PMq!*-=&Jz<p|;{AZO`5Apzny#gaqn
zSe~VNlQY0VstsJ~;a-syT#t3h5Uhn9M$rx|l!UEwsFt)Rcf6o^mz_JI)@WWdbI8hw
z`m<rWeiO*2ex;aAk{Ra%<6vC)o1r;cs{cpef?cE{-rfrMI=8fOHuYQP-a1;50H1xY
zm-Hu$13a8BDfGk|oqnY!0_5+|jf{s+YVTO~4RlUClNE!f=Q)f;9)|MaI8wSvglBFr
zw$qq;JY7erA#ezrF92Zsrru|@jsURwiD*}LJ%^2YEOBgykIp&(d0qiC1lUpgv)ZW|
za_FCCf)&e^f(t?hFe<@xn1fI1-4>93DZv+s6*B<?q)heSUtV;^tdq=KCb2IdY)1~~
zLSB#$ltb0OrVlR@btT_5MpSEbvED3>sHRracxBm&mQlU76&B#xtW7m%9J)kq*lg;=
zH%BwIyR=B_*ru%GtxGlmKRMYlft9XfmjRb+;IWNbv|PKFYqMlqblchK$Xmu7f3ATE
zJwib_;Ec1d`V2a7<2MRw(JB3$9Vllvlu)}WVS9}=*wvwqHxMmo-Q!mWWGxuByJxnz
zA0W1&S#G8hA~kGr$QQ<J^a`-%bUHmRl~H;i2VT&Tcpr4*yT7{t92^vNq8(wYHAdf%
zO=7F|E2gq}>Blavt%7ySo&)v{n>^g|lH#9ptv9r*V5^1hd^_0HX8va8Tb*|WWX4CP
zYc<b3cOMg6y~^q1@vXMOiSl6gOqYo-UoK|q^68pYp4>4gvLbos{wgo}XO@uMVOpcy
zPUd7yfR<+FYyHTB8HZ4#lUOm<?-Ffwj*2mhy)uPPm{{v$ajsTM3&{ngat^ScUVil}
zp(}m_CZP{dQhlPi0D?dXFTl}0f=+H$hk)#Fo~Awwt3X(el8q?5i;Xn%E8m~4_AC#I
zX8k!e^}aaS!<#pZy!oo;(DUU-vne%9V@8ji1ayPrYA>&Lm^v}>-Oe?R37nF5UEbk~
z8=<N-umYb>q&^vViKse*<u?{fd2U_SyDMW?;wm0L*}MJ4A#@CP8m&B6b?s{aAK(+O
z0LIp%bPo1HH;2*fwH0V4(=yIVe1#6tV?_7C)_gZ)cCD%f#C6n&az%SDUQ%>|JZ#X<
z2RwP+$*>;>6(+o?6Yy^Ng<3L*)65yC$5(!rTHt&!02miFilQ}J+-9S_{MTEyOiG!<
zrypLMlM*fU8cyXaOEiyn2yS@|VA*JBl^8HRW)Xp@mRw*@IyFhvT(9xw-2KazUY`lo
z-;8cmSyeK-mcpktW>lwsE&<{(dtSqpI}__mpxEws7_*Cma5rdR7er+5#5QSEG<-+N
zIRlzx;^XrhbzEBtzi9XllT8Z_)on%-l8;*s-Ob(oHGgY7*^N&Z91fz>JP@*V48ZdN
zRql0H+SO<_ZawDM7)I!@4^)4)b))Mv{dTb%u5LA?b2U0qct$VZS0TKoeC{AKQgIwd
z-?-sq`w+qLp-MFJ1MiG`_ZjxK72Ce%dZt+ee@BVAlE#qrVo<Rp_B=P&spC^}+$V@j
zqcWzpg=NZ)N)#qhwcp~}#1ry&K^jaBB$vVJoY-c3-bOqA6esS%OdHDy9@Aw`LU^J2
zHi2q%O-rYJL|wRfG8o6>U@tXE^c8Gsd&b(LFn)fAYVH%hdvLWG8)LHxre+y66Q|!B
zU4th`Y`>sLY3H$tEeHGZ$I!)RlWj-;GB?g4un8o@Pnh3~br9Vg1h`4jFb|P-QmLY8
z(pIU_dr{jYGBg0#o7t}hd6A3%Rbf1bF-n!8Y!UnTEX2CkYG(eG6Vqbosl<H5r=;Jc
zI(4M{{|w8GnqC$_v-M(=nm^a#c8bFrUUQ>wJ4zX`2FQ=yG{Um+Cw{WxPe$K?de7@<
zyoLwR4Xb(l?%(Lz;uo$vOfv(|Df--3!KgRrsZrGe;9UsKJGQIyUopxT@Mgno`iNcN
zbi3;}5M5xld%EmNUsPUU*S*Ulu;Jf5fP*tUzM;4mI_e(T+OULO=of6@%=h8JD=as7
z_Yvn8u2v)@`>3#t9yD8AA5CJoYP`RyB8JYi_&v>sCuHlb4r6Wb+kSsysO*g`Byhgn
z{1>le;O#Us+;;x%3vYB!EJyEGrpO)}b3~@nZw}^=ka-5?^q4{e%DmAJAI78^0A(R1
zj_jfnfTnog99;VEyg3oe3^ZDSctf~M-aXcv0A+EzIn1W4;;DYR43{|sk1*4zdb+S@
zX~2^_skd0YIp1U<7_+SLoEM89MhSm|9DKg;n55(aR+&6$gHqBgBTJe}33h`5O`fs^
zMWu@639~asrE<oplQT`FV)N~F)`l<L>w=(CIewu>V*-v6uC*LrA=BctS-wYffCT|d
zS+|_n<?%Kh#l<hq@i=}4Q3UJwnI0Wy@(oz9+xAQ;E;~@sAES~0HL%qeo+)Dh)<N;8
z8`E$l*ToiGOwki+^=G~t?AF+*3qEk2)8_8%%?@{K$p6GpEx6f{+iB-GlYf&CIWc>$
ziO>zRcyHQa{h5QL)tn$R$d!pV;VJ|wMdF^rxEtxJhk#xcLb;#Hjc#k0Ik}#8lkVlj
z$qhpDg(47#J%X>NP`Ic}udR<i1gGCURmi4LVh07U70&`l-Ak|+7$3^vu%G)U2fvQ!
zUu{nvr#XDS_j5Rwgxx2RqsfeCsDZje^Rjd9IoIq%^^fV!6P^C)kJX~NxUBl);Z}I#
z{D*X~y6^ecv<IqX)PE}2ZbEgzkOu<J2Ux6Icr>gQ!TzL`(%zoWI%PhUrZZG_mou(!
zK1{!Ic(VHeRr)ATrtY}QcOcD2T3;N~C4KhGj}g$M6EtqYip+r`u4l)QIY-M+P|7+^
z<9zE(1$9poV?ArRG1I^D+D~Ao0f~s}Pd<9Z(?9TD5~GV2MDp**T!bCWVNi^TYhB1(
z86pUFV=v*%E20Bxs6xsW=>KH3aohsKs~wn}5dDC*zEoaa@NlA$DjB)%@9VpfS%2Gn
z3N%L>drv(+nfxZO?+B00;p;NyCH?%Pp0T^KwZSMOriE``$-8>?&>!RB1;0|xgAiLH
zx|x_Fi~2d8$KVjP;d4r1U!OTwN@+88U;meR^uN3UcU5<o?Ef^rw*LZiMgAjd_Ww;x
z{GZrr5{CbF3{)yg+y0Av^3Eu*N72HO$R+SXKWl;x#tGN~t<(q>YD{k+h2EW+AJQ45
z9m3_1k^4r#obn4o68QY_K|W9s2&E(d&=}`pvUN4xGBbI4kF5rR;O8dx1CHuP3<xEc
zSiq|-TXK7(No^=<OLm(=NJB71?kh1@ZL*%>uwCG{_M52SQn(bJ;FfV=^63Ob6X8}l
zP+ShcKk+HBao!o4eD)b*g;Ya^;lc$Y3GXMKI(s5nvEzdcG6<EEWAswX-2GQ@J5IxD
z2Dx}IrIrgOKUooZf_<%JScz@Aiz*DHUYQQk9gx=Y&PwkO@%AjCG()CJh03%>_aBLQ
z4}?^qt#qF{|7v2!QQ^UdgrMVrqd9B&RqfL*Ic@ZNhkzd%ym6nTd)9H(Y?cANicvxs
z7iz1j(j>ThaQZRO#{uZ$O3IAd8KN60+Vh0gUvymign>5T{!rt**U!Q^gnT9huk=>g
z(L!LgO5ah)uFhx{3(EIOs*}F1EKIEPf>(%2Q|-Zf`j=i%*$OKr<+pVk|A(@73hpiX
zzCDwi*fvgV+qP}nwoY(je`DLWZQHi(<RqP6{crb0SM|O9u-|s=+7GMNT4T;JKf`F1
zvnLf>bp)oBK%Ow#^f}jXsVaG?dRAn)lB|yOVgmve$2@<Mem-bo?w83MC#%}H5ICS!
z!Yg8yn`8{6T)$WDC#!()PVwK^j6~7%4sjFC083vu<05472!pUf<Q_y}m_2wyYVJs9
zFxq?t#UA4s`RcIqyyRgxJzj&|F7g;?m8F(?yl2rK)44y)eQLP9ZXV9>5X}U4qa4t>
zxs(~i4JNggV}FqUdAw~*k1yX23<!w*XT**Ef4esS9~<QVy$G99d~HxnaYuS)s<xVR
z*`bM|FwpYqW#gcu*dbc0e_*HGQO!6}P#)GZO$8TI*6f*`ui)b!8AKGN->Cxm-(hBS
zQm+#t`D9-O@gm<W|MeJu{?ocmhYAKJyE^&sxz7I3%{j^L*y?`&Xov(tKj4K(-)kBO
zLg#Mssr17FPF5Sn7K_W{s71HkWHl*KOI@0&f+QZdFt0Qo#B>x3oa;L^sWi7SaA9QQ
znvE`LJtgCAVrzJBYx;Y~B5eA|kPw9`Y|JxoYFdbeB}QOOO+0aGerR&al#O=Vaj?Pk
z*MzGQzSXs0r3$`utnpaRJOom*Dc9^t&pl#3tK{lmIrphDOcfSwMnnn^BTp`r#8!mB
zJz3LYVl_gd)G;e@5v|h&udi19&#)*2h0DN}oyj{h*f}EYwZd0N&~*9J`EYG-J2&6N
z>b{xm#<&R^U;<SQMrD!R1zN+$D<XuIFmcQ>zV8cvN}`lkbDfE@kbKFe#L8r~#<nsB
zWARY!T~M^7h-f$LB1lZhdg}Hoy~&doV+FRSIaeM}sm$(o%H;W*Of`EY+Ps%k2w}nQ
zAYwD;T$rdFPpBbz_J~`+nNJvb#jzJJ5-(q>42C(s0cQK&ApCVRpf2(bB*e6H&{fZI
zUaFX@qi!VpCnlEhndzDFnHin|o(Y~2o;ijghAD<I#yB>Lz4b6dj^;4I+i^ZzIjRNI
zp4pB0|MecT9J3pf+g_o-FI_45GbOKAY_Dp1J_LHcnI4o=J&IiwC(fx`(<L9h37FPN
zC@NjUfcA|_vV$lAYtNH?Dk?l>EzxYoic#lCRTDPq(8bH&->uE>A=k5AoPvNqUA)D?
zNx}1y)HEJ9<Kyk|<A;kjj!z5GHc*szppvT_a>C){MtFQm*5V@JVkVu$wT3<=_%m0$
zci<ar(f-Kx--$e5$rdcAw_>1dUpXapjV`38{h9}T7EvYY;Upf_1@Wwnb*!jQ32N^J
zc(i!}mg{KS%4_)|opp5QXD|1A&%wy>gDBe!x;SeDx0HtVVOm++sr0AOcLk9`!yO>h
zcD@*i_@~)8{Ij%08oX}DBdQ_8n=b0&6Huh{s+nZknITwIY?eaHs5ol&G{;5Zny%{X
z{jCH*>>bA-(e6xYD>U!Y-yB_@Dy>#;z%UAQ-O16JxqHS?$x3v;Xf6~4X(jd)!y++o
z352<3aw3p4XyzJf8_}8)7Ol!<v@FY`RoIr~tf<c^aEH=WPu>o21)rYM3t0=*l*!}N
z($9&KWV0l!m|c;}Dk@)mFHN_|8&aNna<=T=QQbn%l-DX`YE0eJ`J%hlAI~mXSB5zX
zIEK?s-X3}(@jLm%=0hc_il>RHSe^`())v<a*_2P>+Q+>*)+XBd`}LOn={mL^xqiO$
zbg5eWV@8nmPyG3g=Ot_LjCqd1yYVI60!FPo&AYh6K+ISwG_@5Ygfr!Z#T{*)4mAB|
zB5)(ITpN_7P0^$gPFKE0U6vlhv*Ps#lox&Nymj9PhqNbr&aU#fqE7%lCH)@vr+_CG
z{{hB0^(&sgf*u+=;Q*D5QGM>iPhkq(HSsB5k*44=A}&m+Inu5!%_;H}<zFHvBaNHF
zHLCXz@OaREB8RM}SSAXAf**z0&`Bc3An6_J5&M4qYy3UAqv)s9TShC@SK+60y`*Zb
zlw6rwx*(@=dCIZkbV{<cY$A10?xI*v1IWagS!G`ED>~F&mj+$20hle|^aIpa98rh9
z0BR2>_rZ*t=%BBwj0v12KWRv@vfoMyCYuC))}5`1H_>73v?q%PJfmcepMrkhblk2p
z24!mD>wk%o^opEMR{co^*dzx6qWj-&-T(bs@!v@1sc-I!DySp&rwNv6ZtXyHt$#q=
z@}k11m0@ftZ5=hV$H}bHix(1QloBQ-`taO06%_>qRS?m@o_5`0oIdCH#}9nz(^wwJ
z@Dui$P8tF1_V?E>e%Bo@M_r!}H#hzuz1Xj9=;(dnS{jf9;kS%!enNb55w&W;%0xa?
z{pLiO=zCF+Igt)(!So`pO2IWE9OV6;NZsgrfsi|4w^fikPzqaR=jB_Z0;19vVL(Y;
z!>L39L{;bG$vSQ-jG;w}aNg<jAvBi631ta4cIHLAd0HTiMR3f3#9|UpAxXJX&(Ang
z?fEDfC)k>?0KKv8ywMQOWTC7mBbeF>QeMiVqC&E>w8J>taxXGhIBN7~b39|=#x$#^
zwN);zvQf76nk#M+YUO6D(_}~t#Tg>64$sr1@%T8^45{T&fJGzhJmUE9Vj~b9?OuUU
zh^)hNtvY~)nnQ&3pgcd?Hoo3PPg(^mEMt*=q!g%v_=8X&sZuf&0cKWXA6X8mSRmHW
zBBIba9gDx%I_*@0FN-(xKs%1mSv@JG5f{9Hb2gfzFRMApZXuC@@x2Y`=0b2lEq)Ie
zKjh^JG1qgj^YopzSMJ(5*o=`WUsVBfkC$8IPuveXb~w)3qMyN5uHKKqD#b!08$6|`
zS|qTR6&w<<j=gA0T9DeHpj~a+(#(Um-J}{v7&jGgPr|+slAZQ<lf-Om(*@PzgbdBV
zdEtj#(Wb~;xHc+lwK65E9^`Jvg=}Mo9&C+h7ZhKh4RMVFG{T@*XQtx8|E!AyvWi+N
zn~YAy3=_P4kcNn^;(^jm2AKx~0N8e`CO4?krm4}UBMV!JrCer)16Vn6u(Pg;xO1!1
zFqJrBekT8qsU7%l^Y?upA5{$LyLbu@Ak6yK3}}S}Rx~?<ZW>I)TB|B*7nS0Vdv`eb
z^de{$wzxs;l8ZqxkEcS_lp7G)ST@$g29#{us+}q2hMkc!M4)7ybBQ}oN(|D-ehWRz
zNwkjDwwAs^NEnAO9z_A7ZI)*NKh;SmAnq`drJFF8eoD*{eZb75IDmYENk_-zT+ok>
z__%yNcMH!ouEG8ZcV^1+)JHC^Du}2;Q!?Y=w5@zDUmf#MWX9RYu7LdjlyV6Ux#c0>
zxSy+(G+gmJ@bHQA=-A?3r^grMFp9U*3~eI0zIE~XNcvc_*aIfe&taS3RcE(;y%}73
z2_B16S&gmSL_8tYQ@AN&leTvsk8^8y1LCREs;t%aY}s`nc{9b9f0x)2$1YeE%NKvq
zb*PZu2UYsdm$eB%#!h8KHElirgvVJYA?bQEwfwZcezGnDd5<Yu|NTBBxuj!9)^r`O
zzDQA3`RdFROuLL@6Ow&s&rKsRG$mn$<1loYV-souw3Obj;7@>2%$_KCr*OS}s&n-t
zoO7%(++}=zX+`Z(4gK~Qq=Ne6DQYfnKeO;3)w$%c+h3hvW!0snx2G!U-XM*^zS>%-
zDCP}Rz2mh7SyOH!-`m$halqS=CP;p$xv|pVk+%S#U!4hRf2uDYr2e?BoJk)kX+TB_
z{J{ar1*_2zef4DalipYDwN~Ar|E)7T%%c&Nv((stXV*8=>ilaHx&>GTy70Z$obp+9
z{|WxGFT&U>c>ucjgpTU+X_p+Gx*UnTSqRJz0&!zqT`#~h1(O{=*RGMXg~M)E4Tp<j
zc3JAQ7pbrbI-JUf>9sIqsd|#^vf{S@#e#|>7}Zaeq#`L_OFHv|r>eCC&AsUp`lT0J
z-4OF5EQL|!v=u~^8MvzGt4bJQ<-@`#TWC@_%#($&73132;(Ek_Ln}i!9W<}3m}Tj8
z1mx-c4|29nKR_B3h!+y!FX*{=Zh(Y<HC8UF3%G2oM1V*#neOl-53DYsWi#Q_vjocp
zX8O}Co(M_-3uCgGITi~qN5P)O^Y5u`2^Jj8biP$g5oKXZD~<Gjrn8U6&%p)pab_&Y
zQ@@29pzxvc%ZgZ<Xr?@{SS)GA9asPo&cBhT&_m3q&8&f@e59CjismMA*WkH@=2In%
zMf0(YEJEtgK`izY$)@KICvvd@e*OZUcq(3kr2{LSSHm5c#Wk@wrpU7xHr@BuLLwa_
z?%0AP-7Un7pC^Qul;tV0IJ?M`lxgZ<u^(kMxwap#=nkkHnqY~s(2E#3De7P(w>E>2
zR|=`*V2GocIo@N34&V;5VP&y>F#7T3dM^ili{IM}zV>??^hmb0lYIPesm``nMPeH;
z>P>}w(}%3b{AZ=V8Llm^$r-zg17$3dN10o`KQA{aH(<CqAo^;JNiEP)r>1`{Q#k!0
z-Xey9MZRxAkR>{?IKHS&0ep)2)<z;dQsTjvB>e<4{lw80HrFzrMV4l&h{+<RKa$YW
zPc(gpXs&Z`wlj*_9!QqC$kNR6iz4V))q|E~N?*d66?y6cc8XDF>z4((LOK}D)K0u5
z)vm170j<tY_-^dMlklfM!gt2)2Y0IOO?7FX>I<?@ZT|89qWj0c8B;Bgfq?$}oX*Vu
zKMv^sVbHAW2PCMn)b{P4CV9=!W`Hcw4>bN;uwGV7Q4x7Cm<Evq7%FgAiF{0!i6JeV
z83nycyUF`1-w3RuY^kxSDGV9e60Pb(Ddpw;-74i}eMRd-FGby2Uz=+EyX|G#gb6F*
zcjuSHHSY=ciO&i5&dYbQcmKl;-#x`2<|9`2eGg}7wLNyaPj|X6h#uEHce_t~e}$~?
zrCV`~ZH0d=I<HMI{z_9Vte<Pq-vj!K2Q}DV$70<>hqd{iThZ<gBCb2G2fP8&!JQ-%
zr0~c?Y1C^81>wiTV;&2%bZPnF+R=yYqA6wNtW%`qZKU$|Xh9&d#nw#103r7A#Rwuw
zw+WfMTIvQJChe}1Rz)iElW|HW_XrQu>~w|^(h{n1A6Y3D&0VsCg-s0_m@N)*silIb
z1nD;9QX#;&4h>39x@r-H-_VK660dL*x}{U`d1eupxplpU>-5~AP6~}8h)PEx83wg!
znOA1X7!&*QzU&ewzq(bKTp8e~O^F`ve6CbdnT~B7`blF9YCbbkOtT0d!=j!GBSkVY
zA>TEkGQP=<kIrR+%ThV^DP5;<3|yA2{m_V=%M^eUV&hhzPB(X`l%i8Sc4TLhgg~Lx
zMyB36M-E(+;MU?IQ9>5dTjw<eE!cN1(YQeX<C@hp)WbEuWh-J0+J+ae;BVi`I%_=$
z>RHhO+6?yj@6KSfhYK^(xj(Vlz?}o}CN@D{a{X4Rf&e<W<Dc!F1;eIpgH7<sdzNp6
z`M*?AsVD*c+=7wY7f>$Zcs_r5hm5^gtT{R3c>ZbAJ_q%|nq~F|!@DT4W^%|tXR?VP
zr6>16Kqly4JS*9g^k-Tz@lH<Mivwz8G#acS#k%t|$iZKEKd?c7{rC90fKS#OQg|^v
z>C72v049431o57RFax>;0#+8t8qKICHY~)!ghjx7JsgO+Zy~^n$I_L-rt$fu8E5Xv
zs@^g^M!-V&H@1pz#%a_GXc_-^A*nTUJ{<UwBm4C!!@G>*sv<Jzue@DV6Hh-&@F7=`
ze%&uGzQC=2bAPnd07msqa}qgR8Auh3vAJe2av8V;_&nSA$f%}fqd~E~GsQP5>zKbP
z4rNTo<I@(zli4xz4D1=~W|NgS_5)(7Xao3^<I@$k8>Gl*jiMg)q|<E87PxE?&H|ZZ
zJQ;yEED=$lhH}y*@A|_0jae|&g_aYGGS`GKJvq?a`!=DL=OvY!Ba&lGSpzymCN=v&
z3;&%V(q||DL1;#?>D(i7g(~4*eI6wKy$0}99&H8g-Vv-Bu|Dq_@8=azBNsZOD94mS
z+vj^tT@Imk-38lB^v<EN2xXbvz&dJQ8*t5){uUIk{UvJc6<p<^uf$SDJ7vJFm)Ww^
zQ7Hzn6Fki+dMi4Ts<o6{$moZT`bMZV2Xb^vY1)B($Z8fr8HVJg#Qrq{g%ss8yZ{}V
zWFW?YZ%Q(0*SPKF%wZ#PAxuS##P_!VBQdoBHNqQBD}|s(#H2~J&dG*(D_`BT#RaRW
z6zP<uX@87ldfsWF**q41;!m3lg$cZbc1dfU*#b%F7`kk8cHAvP9qpyEjBLL_(^>1*
zNKcCqOIk0g5_%~=@FbQiEMP5g($OGMxm-Awojwnn)qa9s980Z|LQc!bRz(%u#0nfv
zZE;d%ON<LgP}j0p#NB30lF?4uX4CSWkWecfTSX=x<Ws|Hcr+{BQZ`Ien}y*Y*#4m&
zXWlAtL)&IkBe98k(RH(ge`Ao9qNa(M7NKU5Nx?+*1;R*B*KmjdjRe$r|DFVo!Ade<
zBc)yODNdE7Y(1KqwZg&Jx_ql#iPc+XB)U~noWB|Bn1!55UG2{{%NYyQSrnV0Ri}L2
z<1n$F^NQ8Y(uxK0T#<aW3bVyZ#9d)yHjTFNod5^^0x6l}Q<{O4Dw>(8=2993t?L;E
zv>I2qvc3qzu!h%py{_A7y>c}XteAloMl`q^Rpt3Ae2hLV!QvsdZgtXyi%v9Nx=zgm
z+&y72k2e3g&ZnYyd%WTHqFP`G&MjfFv96uc#w0GVs_uSAAtSb`=lMXpj|iXCX;;k*
zIo`^ymMmZK#K;i}SM|UXjm-#i0{b>f^rQQWhd7mSgm%#wx$9fvX5~c55wqnv?#)Mt
zLQe28fPfzs_-qGh;v*<ePMFk2moGBM;+|92PBCzks%7qUNzXX+{!n9F@c67glG&jk
zI8Di=P4{}%q=nA=P);zZg&6T!tUoVn+obVsSf<ER7Dm-;<KtlaDx`%Qx#s!&9;_`*
z-{pXyweJ-AF?7G|q?#X?G%2QLU(Yp${`{Wwt)5OlzC-23PWL*#<{swNcO|NQGjhfC
zawM1TW$X`~K7iZxQ^B>Tx>3b_lTdGD!b3F0p2DtaB%{rhCol+PhtXFJ#;P+5iTK#`
z^}!BKyxF_u1$3Y4+(oQ&Bj^slMzzkHSa<uYHXV_SZC}itq+J>7ORX&@$8uW=1wG<c
zwp^zB?+Vx(v7w>q?`7ctS5JX%TQz!bS5JlKx`??WLQjp9e@q-yF(i=`y6})syoLA!
z_hEDpmbz~-`#UZ+(9&iTE=^<ZM~bGqw&^^@6NH{RDddOV={#i<%2LhVi8iEwHVlc(
zwa3Y$c!^QS;1OmC@xm5%WH#9Z*u_n$Xmv_sRmVtMSQphR^qO*yLMqJSd(}}tt?aoY
zP10RA&zK`iDVfu75v&V&yggyo%gTr+a3R8`?ufJ^b|$giW6+b6Be3C<M8IVfPJ1T>
z{n@>*rv@O!)l*b2>-RU!{Yn$F5h&L_+w}c>keX4fi7&HNj45fK=ZI<>141ka+QPX+
zPg)thf!w%skKkdbc*)$hJQj?yHTTD|m~P(WsLR6u)!?U{B{Rj(rfUT(j$HBgFIH=G
z6&~8Rj+!trhER!V0?~r25*;B5f&u{sCig<HJj2c;ld1OaMf=J#CFXPH`!Oi2y=#wH
z&w&*g+rf9CJ`c0E3A(Yt-}F@!xpqz<j1!YEQuJfLS61i0D~&r9StC*UXv*mXBUq6x
z$^pGPYjdoe?SXq=(qyJplh4k>liC$SBObxUe*$S`Q)<fh6ip*?t5|f!&;Er|t<<H=
z2AbYHgm6%v<3@NbY90@5eRM3*amRFAKv~5zvS&Pk7CAKNVn~BcM$JdT?@Iz}at&iM
z8(pr_9_-^km#9g3v`}J0EP>VCS9J3cPdbgWmr)pEL`L}YBp_=3&G6!`aW*JyJNMwn
zmqzZiis3XRh(?LsZ1&p{eYyanmd|u(YZj#~k|Q{<9<M-?I2X!GBy!A}6E#LnG=tR7
zE`-ToHii??%fbVZW)eNFqsc6qveFiBk)rr}Y0^GOeQco|^1)pV<ARJXS~QhryqMT@
zk*=M-!ftPIC|8lOQ7SJky|vErJ35(;IL=H7#7zA-laXk$K7=h5irklB1RHfSM*wg6
zueKB&J98y<>g29y0c_mp<#|FiIifYI+u)SU@Zt$<M%J8OmP!%w=w#;jlT2D$bxjV;
z^pwj8b9F+3Oj3!K8vmLdYe<wHI<)blq6wLAhn%jcTsYNfBNS}HW`$A0klE&9U`1P#
zt*@>%@6`>rS1n%0JfBL}8+*f>3Teq}2XXv$Tzu*lei+sl>oJ7&@4+``@>C%faAWHN
zu_!O3)-hg90Sr?}>x?8?*ABX7I8~N){!1^le|rqYRQti4(i7{gn%t4=NaIe`*DOYT
zrE_VW+6oxb{~oXQANLv3UW+N5*Pr$SRS19de|?>Q>M&2G5Uv2cS=q+#=K4ihC{XUH
zXVuRxuA*<jFTEh^sb|+#{LzduJJHpgVNUjavU&c>F9J~Ses{Mw;L)Jt$JpDmW1P~z
zD6X#*+U=}R1Op>)U!eY3g^BksfY-z!scSOYP-6a_&`OXIhD=R`s>^Y~Ls((w+<JrY
z-<wrQU~N9C+|)3Sf-+lhTQ;o_yW<tO{gO*--GYZ#f_`s;hxS~50{^A*{pobdA@@dP
z4pA8G9!cXXvH<dG8{rH#YF=t)_B}!{erMp3Q;yKYC;t$2^MA(osM_NMiJTrziT6e%
z2kcPBCRHf1G6c7U?rz0;Mljh!mRyhc6p@9N6{}M0HvB!e8j{foYtsn*Yf+%KqU?!N
z3xeZ_#FmGbVhBT5;#-}I#&xHO`3l{B%lo(s0lCl5S-$uKOw6q^TWZJ3VN?Bf_6ez|
zZyi8+#kJe}f_Yo>qt=$YcEPYSrpD>uaAs>H*2v#;`(n9$!9%?(!)X7K(~fE0Jh4T@
zG%30460j^U>}OH)9Qut8yVn?1(juTFpeBIXb{Yhso)60SfGUdKa7c;13x9bWQkHSk
zm-T#%G{5+HA2CwrfFK2*@3J76I0t)l6qJiP7+aGY5i9tLu=_Yd^-T7}A8n7ggpH&t
zH9mTMG`2Ny2j&ycea)mUc}Bt5EmELubV(-5XkCswj3pFB(~i;2Sf5#D!WoG$&85vi
zv?xfkt)l1&c>=q)`AdU|D8Hys_c3?FvYxv(Rb1W3t7<F3hz$wRJ+0WTf2%cAX$^#4
z4r#UOx>7>rx;fy65_e6iu3VdnLSMGx2FqkZ?+7)CEFZyu^53Opr31}0-DI<cyRoFH
z;WNYZL_Z-7&hT5{Ic|<@*)mT$xjAIh5?yQq!^Q!&Sk^Cg(}`Yhd5=tQS;56JbYrR<
zed5Dzxrd!tz^&}z!3NJ#P%v;(=s$~GFD=S`G1xiaXQXq8DS*CI9gYL^^#P?bhcP&~
zhg0-})7TwdYiSAB{Xg)w2ajSsgXZfKy3gXm1E0w1#GyC!UEpTi%@>gC?x6XrCRghz
zF9uF)eWzu<X{C1B-~-5eR-VBeVv0j$_=Oa)MqAPOP|9mR{2^#3^9_bIRl@<3H(yvD
zG~rTvtX?>%{rnFHCC|Tq!RLdZyn<xyafMLMFvw1+ZQjQTSpeN6VD4ZQHjBEwp{)Lh
z6);+r+Z3>BF}c+dCE(d)=h=IlIi~L(@O(V6q#nT09`SkSq#=bVX%c#-h;JARbkvqX
zAl2$@qxl;oS`&b(sj+0CN8k;*4vQV{(pqiF7BTlUEaHyZ*49_N%ql|<)L;Tacp6A{
zz_AkHA972y#@i;bSuaFu&wH4PEVQEm4RHlC?|NASZ%C!%UBgQC)F*}t9TJz#1=UE?
zGeR2@zbt%PObnP*X)9G{6l+wr(7LsN=wCNosn7>b5x+izA-*Y%$7U2JB`!@Xb^qui
z2KSKY_zj(Yf(v;tm*vAiZr$uMHmh7>*CrwEUB^EB!8ZH~KI>jUH1O#O6{j5@@<5h%
zl&4>wrI1I;gi6)g%--X&UTzrEoLRf;1dVy2kbz$+-SdQ_Pvxh-hwy+<X6<yrj2y{*
z{@_`g@EB5ZS=LswN~TT{qaPEJE-I5h|4=&LU<WxmvaB;!+2mL{c*N^B4L@a!npisc
zrMYSW9!8+p(2WF9iWi%-av)QCr)ihae>y@Lg&h+fe_v6+pyenNl}KF7$dxYtF7{SP
zKSd5u=!ggb_ml4Yj_JPh?mviqoNPHw^Y1_N$SC`siTY!|K>gkC@uV(vZBKIw&&>Uq
zg>=hMaD+5G=#CY!JphnFsR`d@_xTpY^^nf=P_H*FKOD80GeO!p8AmOP*U_X`mOa%Q
z(w!mMt!nk+`sOtlp0I|(igaTw2od!rlk{d11ayK%e<D9XmInW-eWKC}dZ(`uWJWg6
zQ=l_7QG~BuabYd#s)*=W*83h`281V1y+F9R@DV8Q^s#^+xhonG>4~1y4~%jJ0;;C_
z9l4Ep_IFN^_zO)3VR`u?ijW^{kCMp|SB}VXwNvS8c0#GakuXI*>;_S;VuVj^s(f&V
z?L2(JwqgVyFU63QyBWiF?JTB&Iq-y6>r<SYuZMB<1xM4B9T2-RIP5Qe_d_5m9{8pW
ze}Fp{aq=M#r!dwD6)STx#}SK;rWj2E(n=FFHHaG0I^_Gf(jq4YBjM9xOA~joHUHpf
z9&ZvQ74IeOS7<nwODUOFZzKeKuyH$_G<ac4fM1`Dj&TN(F>ii)S8*KUo6S+a{$LuY
zN&W8udxn!c8I|D9sEX5ByUu$fQO)Q$Ia9-mwG3Hfo|H1UY$y|9{G?aruLu)irHz^<
zl3bzhsPLL>5Ewxpvd`mM&Y-sx>M_@KRWFEU7?s#UmxLxo{?N37_*HciJgUih6S4fo
zP^?E7uk_syV>c<E|CUHcBle;({i4Nxh}=jbR(D&tPx>YD@iQx4|6^zAKZcdt9}a}h
zk0Aj2CkTW0|Eq9iYv|x)YT{~aO8=ksl8~XZsl2O;gR6^}y_2n>%YQYRrZk~^a8>cX
z^s3ivifysi6H(XgV1lp6EOQ~mk=KhQrIdw~5*k_mDv@^pOxCC5Rtpr64wVEI5y8qf
zA=QAByhz}Xv;+hZZ`DB2L=<nkW3?)Gyc;^kiUIXy<MMt}-pux2H~v1~7hOkZ{=AQn
ze@OHDBGEU`hcsKKBG+2CBDoz(e7dClT00;KMtGgB^yv8lbngeqS$(7id_`Lc5IpWI
zeZ&ULyJqilL$>r}QXWotUOO@LdofsH(TyOzJ1K^04{zf_kl^$aeDIU+@#6>dI4`OD
z89h{#_>1=(>Ct!95V~njYDV{<5V9Y4EAH<o$F3|TznX}6g3OPuPe>kWtY1S+Ze=BJ
zX(exM;dbtfpzM<9cWC=xZhbj<YW5(UE?}JAMB(@!^U?Ve24auyrA0lkc9LrN;|Hjn
zUds~inc-@r82qk9iTwygyU9}YQwHddZmHq^lQ$X=$VF{5lmc`3j$wy5YF#T_+}nsw
zZPZNm-Z5oSGKY68B^imw(vK5WO1cAx^t|JvCUfwL#Jj3u1CKJu(cCh(JwasI7hAoq
zx@HH@H={sesaRpbyi*mMw2>Z!K)zdYYupK2ftI;qJ&4eTV1K~@(fxw86B})YC=YTg
zENKr2TFMB+cASV=q|4N>2y=Foxl=a6TSiSLe}hakCl?ghuySV?_p%8XT1!1mZC+j*
zxY{m`3_SjGh_-O69bjY!Z|7Xf1wQ#Zufn0hEs@w$v82^gu~W@U9EOV?MALrZmb)GB
zJ9bDnm)W~5loa=7N@FtyyXzR67#TW4F+f?@jrx@iQvD^vQMk<t4ugw{Enz7!rWI+^
zt(h@5U)c2E1`L|Jaytx3UIfb*;S<58)H$#5@;q9Rnr_LW_5&iS)TbPVrp6ZlnV7d?
z2YzKy>edzo&ShvzmCg3=M3K*N6OGA+dtZ@yp8CP9qHe1c9w&a=$~>Cz?pFY3Uty++
zkXw{<rn@yZU%+~0yjvmZzFKShPz0wUHO&_*Gqf-_kysC!j=9Kzc^Wt7zoCOMNfDoe
z<VN2hB)L>Z%}B9uO!&gPV9iJps5d5fvlvFkEXSs_`_ZUMDdx!4m;e<*Zof<-*U)@2
z-Buc9Gz_X1U+k}5x%|CY?61Lo`dPO^hxT$(LU{8T$N4)Jct1)zt7392TAkgXp!#h9
zWatsO<^jv6=59!fOc#xAepFbRW0@)OL>%dp+-t1ByGceIOnRHzDN#Vd@adWigO@7W
zBDfMF#$W>6{UNc1PXydiT_yZ#T{FI=dk}3jo!U(_H&KB!DST+OTIY-$MOdpxzo8Ig
zQ8nxn0rckm@&N}_i~gPe91Pc`2#ZB^v&kK$GdDJE!ubUUP8C<PQAW&v;%mQ(4{lyV
z;eTy$O%IqQqvL(t$>TxXhJ^T^ag2TjMNipuX>u4g;YDW5-8J>lWn`)NR(3P}fd8-y
zY!=!PiNsOb+DB2|V6(!3W&|>tBR~SsG)L`NyrV=ouBr5Lt%TqOJ9>6vDD>m_(AkNY
z>Q4%>=%KZ@B1xL6IOyv!(`Yu@XM@S5yU)G~!i97_MB`bcSB6j-Rd)-=ioqgP%$6lr
z^l~FD^EEIQM8oM~keOW=l@rg6k|~f=S@}-YNh=*B@{zR(el9tY8{IA^FQgN#^jpqt
z11rYjzGKFjH8W7*Qu;k;#N&N@`Yl*Jvn<5jVePoDm)Ubu-B;ztAD4DV5wmUW99oHE
zhRp&74Y8f@Ewom8akXLnZI|a6u8y!%#BL)}k0-Ux=SJ13B``Uu@`!^V=rSqQHIWCn
zqp>SPQ29Kfg|`*4V$pceB98gD4fJP5c@!(60xGqzF%WVQRg0oY8YBw5WBY}XR_%Ya
zG7BRCLq@VRb3;mXGW9&77$p%4E{^Rg(>X(H8h8pWio{l=16;b9gvl31e@(iS;Gc|C
z-O8@jOmtKo=tC$eG=VZU<~?90r)+-BSs4Ac3mHbF;m(HvqPJYj!t`*PIQ~5~oHEC-
zdL*`!QN|9)7J6E=T^#E!`l_7XL0*5XUcbEOF^+YHvm-&`cF5=NFK;|_$S1#i7&2j=
zw(HrsdOq;l%Ie>mhcjDLJU_ggB-m2orpqNOPq+wg?|m$6t<(E9NPfweJU;>#bX%Fl
zI;ft8Gb9#!!4<U-Bi5%~ZXd3Mu61i5w*fEOpLepOk4VK}tAIy61m30jChV;5!i%}K
zzSx#RQXyivGvt0~ub$3kiS_zG;H9PIagj9G%kccE1~e7i8x2`&Dtmite<UwAPt#6S
zO=V3z(N1NgV}ntVJ#TrCCAOY2gpLW17Kw@yGMn88Vl;0XcDI>YE=qZ2Z3ql6IeU#y
z7}Mp5lGsS@@SDHtI;PVucs|A?o>xs*wU7i^-*0DND8C=SW84?>tOQiIx&9Kg-mI_g
z?g9WcH`(odycfgkfky=ZGb8Wkkx<W&$^)Z1SN6O)<nTIjKMnMaj{)7}gX%IeD{Ek1
z=nUufT7C)l;I;2xSEfcbNPW_|Q?F#0v!#9SPz$GF;FT?2CVFRZx45`8H?8EXHJZx$
zS{q9%cL)q1?QgGPG|?x*4ZRi3MzB6147!(wt&5Cko#CseBl(qWlhcR$idCvMEp~>b
zGX-%+`?W<LZ2q$e7Gi?d87)&HbT(-+4G}KSZ2#w=QaiOdM|h!ZhVN9yR**xDotAQO
znR;}16mFk@QNVuLRGp~VqGu}D{*5j&<>$@)@cKC5apyo&xMr$0S>#q3n+Cc7;L05x
z=P*<U1klFblJRRzBPEQh%BRBEJt_e-dD692R%ECX-jlldD>|JxWZ!WQl<*^s$2*Mu
z+c>D267LR%8a2xGOH_2ZpwtVy;@Any^T6(VgdjcPPF9Cotfm8<{R|+D*a@6!V~hi|
zbi$lD&f$UA=kl7}jd;rrkIDST_IOBZAvkH#f=2N~wu3Cm^qw4{TIf`xr0|Svdc}rj
zgIe+;Y1r;)V;|Z+xe+n|lj@GikJ~_4Cl56SFC5d<rIX`~nO%}-M+<MO35Pnp_9y#S
zi3YhiZ4ochJ`)F>Wk#6kKu!D`p5-cR9?!Y8O-2Zvs<HVN<i}A0H|5yFsB9l3{}z2y
z*@E{W#2L1pq{BrB)z8{ffk3PYzgIR~-VM-CjKjWptd!&;ptT&JPm(~J!)L1xXl$W5
zu#0Wi6C1~*p~JP|+bRVR3C9`Vx*+%ADi>!9^4wtyeW6(*6>Nu^Ng569;{Qy+B~D@v
zP9{f?GrEEn>E;Yn65@8HB&S>rTq>eSB~oq8QdEb5NK+c3KDSARTCvbZ`AH?6d;Esa
zRS`CcqQifamQxB<=A3Ayf(`TKFhhvJyV;@`Gd2dPMf}xBaImVo>%n4<bWj$juLn!P
zpQ&3$;|y!*x?DcrT{U*_keTii<7^F0mEEiAldfJc7s|IxEi3DuJKSgT1&Qtgx7V6*
z0O1%Y%oKK?fby11l2qf@4R5rE2;%K5e>eF^!oL*_X8LBjE;GYG<)jdw8LF5T8LjR)
zSRFK@o^*teUrtj~Z-sz<)n8Wy+diLlt8yiLrY9Z*C=YSC-`--;8X)#al*}mkvo3-F
zl;ZTnG}(IuTSBcEUV-U!V<@2;&80K&$D1JNTTAE(*slwoeb;Mq#*>{dgm6>L?<MZw
z$|DM#|1b9)uBspK3)QaoX~YYIK!Hne=q8;QE|gS8z=sOtvC{q&73c%S3%Je5UzY(R
zsg5v51%YgFV~A}*E-tb&zBd=fHVN(S0wQvVo5S*0m6%}Z*B>(mWgF*X3Q57aj0J>c
zi1<K<MPtZBmdA<%7_n^spI@U$A4sdQpFyx;n=FLEA=C&0C#^YxJbZ%HO8m^-8{uS(
zDGkF<51MYrO=`TrS(4ov2IR%X{jM0;Vzt4Jb%}E<QKlfQZ>Dbp9HL6Jaq=b5mjjlp
zU@o?reGGCbb~R&t!@3aqkcmg6#M_Q0Z4p~D3!C%|Yu#a-F0!1FlEf`8`-Fik{ezww
zoTi)swjzvp#DsL_L|rKk<Dk6ys*t7RCH3Pa6=#XKPS;A)6S6^9mz^YS4_1L$`Ryc{
ziZNnez=?=_H#ih(6nYH=enj4oUN^*EHzgZcI+??o>kmrExyvjH#HCk6g|5OJ1!O%=
zm=j=dI)bOrBtp>bL-&+WS!#;Ka7L!;(I-%o<x~$GuSrK0NAH4_>#nzFy}z;D_HpGN
zaOtD!%QOBgeJZMXR?ECZ=hji@j*BiJs-+y=^rhkaszkCrZ53pJFd47LbIk#S#xjcI
zn}_>{pxtRM9$eTo>=*?51ENnhCgU+Q{PjsB_>HDNG2QE+wqjE>x%CLaYnTxHr$4=u
zxcvj{_gHyiS0tY%T=1(+UH<{`Nnky>?6|sghu|>sx4F@7{B0o1$Z+9XTzGnogAPhb
zm1lzGdFA7)>^I<I{zpvZaLn{=PonF`#e(}Tz3|9hbY@S~1sg4eifdvxtMcxvQnX`w
zPLNXroO~rd4J>o=0L@pl;b6&SV0WDJ&<Jpqm~Q#R`JcVq4rclB6+g1=mIMfh^Z#77
zDVZAEJDL1fa>;*_Y+9Z^s4uSHe3nJ=ZV<2_phV3`sm7#)w4qV9bPy!L7{Lq@!^3fd
z16GJX1sPK}6P%W^jbTZg5=Q`syH}yO6biRoEiGZ%AXy0txh#NeYL2*MfyFYt)qSD8
zc!#|~>q;uJG5@RJw4vwipWnleZ}6sT$H(Kks3aS_R<XW`;}3cN=@5VmXW39B^noiX
zgfXO~KT~)1XvF3D4l;X~8Juo;7sb#AIS2Y+;Wa8?CghHWuVml^jt65H#7EGvyR_z8
zrlkjUIOkxZ{Zk6mKcFV$j`g#s`?cNaO$>Bru;p<`|L@((E6y+fiqoCttQ(4Bch}eK
zK);<{1Xv#X-B3f8y)o?1EIxnK;hlpKt`A8}#m^w4Z<SF0s9p0{PdL4>-JIK*_E(Hw
z-{mv@%(pktcLeRP@@Q`uNHin<Sg-cJ-I&8{pRoZck5i<=h{HVb3~o|?r>TgM6UxI>
zBJ+f!37gvS;H5=m{-&eEw>hC%PVivM9bYJzj|SG#B#MSC!W@q*q!HSuTyWXtSd+@F
zRrf}fEHrP#*Vk)uQ<q+D_h>7uT($nHvpM(p7}IEBW-#c`hP1>WynZ&Uj7s7#9FZfr
zIgYF4p+15g1h0y+enBAp>3!fOyU)sT*nJCboVRQxDU<x(b1)ipy=rV(;Ul)7BC>ws
zmLp4cF>2|MoZ(8Y=g#blIy#pj9Vf)ER}yYESbHoLNzD;XMU<ZP@#KAfqfrO|J;rOO
zcgqtBWF>6{2M<F7L2hei=ZtIaBuuH#rbDo{URzETe7QR0XTRXAW;NI`D*8~SdWR#8
z^MT2f8v?hTfqpy@vq#F;>fV`Nx0{zDy-^mjrWX$r`1c&5VGhhvhp|}&Kk>|=$Fn7g
zk0%$A=dSu9d0|J*c1WsR=c^Xtu!3=8^TU6p=0&`C+BESpknWrQGA6$YKB2ba62>2>
zOqqw(3VdSr;J6rgi-A{QA$ywQ2sp1AbD7_u3lp!IH3FzubCE#0a{Ozy%97dO6%HD)
zX_4nWBIpiI;xd^t&^cif-VQfWqjdrcjX7V$Sl|I*m0Wc4U?-hnFL75k!x{-X6rz6o
zM2FbSWk`~CDc$h`ub7+4U)}O}GaR^bSk82>JZP$#qNysdp$HBVm*K+owXZoH5vqqi
zg8OWYKP_{-aCYGXh_24ON8*N9R;s;G+=ivO&MFC|Psf6-wX?gNrSQGDxVY`q&tKNw
z^DZAPGF@VjE?NzSjvSvs8P3ZJ->W7ZA!yURo{kjvQ5-}nU@liQ6MMG*eJ}pN&FjcV
z<`y5<tvj;Z_~e>v^$jWDR`tXMb#23^i|h4R1G1>At;zRh6ex&E94angTiw(;A}o|s
z3>4##N13~O6xL7D>J}DLoQ_kGzU0cUEtoPB#5{&OQL3Hg<9gB2fF7gmsVDCWv(9ho
zz^{ym+Ac-TA3GSFsNT2Iq(w@6H?gHWMX96`dN^=sgt1qf>I|7pC_9*rvqZ+nz^!J3
zU@Y5^rGl$O0*5u*?6%s0ibW<=8(8DVld?Zb;IdQl%u|>1v{VPWrsFbdI}*Cr$T8B`
zRObc>H|>a`#bq*u#s_oZ|JBN73e3l;r#dz181K7!^bYF&{=~V<C0t3bm%#?BN=YK$
zgN5g^)F;A^sr<1Dho~BF1KE}x*0KGygOT#rR#<@3v>S(Rn~qB=u1r`8T&2+lJ~mKZ
zmRL+A3x{^=qT!|g@r99H_Zli|nN(3$&6`O~Cc4&dC&Jaw#knb0r^tqE8jMFf0Vp#}
zBPq2_GYB_i+cfno$Kidu782plOi;A_zD@Tzz-87*ql;a}?3C>kulDOh&3)J`Lu_@z
z4`?)a-YTzfrHVhHn>r_qE+5&Cae%sQXlzI*=k(6lOmZlB>vs~hIcHTMWH!Tz$@HRE
zFr8)xH*RZp*LkGptut#q3odZB;`Ef?!>C&J6xq&9@u8IADPLI^0y8)w)YF_gHVwxO
zL%4`9rX`0nur;@qea*=VS&qVRRk!q5?S}V3&BiN^SeeBx5-Ux{b&V+3jEhMMcMy@e
z=zdK~7iac%P&8Pv+t-K66xRbLjl9(4(h?E`h3DHHWPBuM?zL~#P!RT7z;J4IfiM#o
zj~b6REZR72xSW$IMqxvtvi51RtGN^JF24e6qmnZ&H0a2*HaH|TY4#V|@@oMk9_vzD
zVk*nC!AVc=nIMEW6dga2oXg4AxEp3GS`3hl{S5#qcv!`OUJ+95V9)>~(KwTqdRz{4
z0VYFx^eip^45_5UiI39X-w?ymsE5&MoQILuI?#tN%LnQm(ag=PMRuX<I<DwCh#69x
z&B1$(cp4&`Iv5*ATtS|pF7gDpz~eHsPa@uxUtLR1U4=|BC9w@D5way0pyWeJpvabK
zU>>~*AhlN$t0t8>faBN~KCqVeA}Dg_V#w;5HRkR27IE^vnaT?6r*~?r(KK&Lr@nu^
z->}15`7u*Fg_}6zJPCLG-eW>cx!4d_U-?&-*Ik}n#bFH5JCQdWWNGJMj45u*i99QR
zQx7yv>>N0`EX;9Nls`7vd=;jjyD{Quv-pdxkadJ3Ci({!Fsn0bz)+U6G$-$Cs?!lZ
zuPHDDDoTjnfqiHBvW<xL!xw49dL`Do$**4;po;6d<|z5-nQIcqUB&1IAnta}yLL01
zQ&J#Uk{>j@oarVh4$^A;_O0dalDr=436iCJQE23+)}(naHmk=9*T+pRS#ScD>!H=F
zdzQ2xNN&LRAg^<WT6%rPD(*53rt!N$TM|;VlN?nw^TslH-qa`|w~X6<AkTf8FZ}Q#
zfu7gTcv2r=ma;BZDzaO1kzO2Wa-cR1T8iecsJcuVtgV4yvBDt;S8Z;IGCF?+4__(9
z91~YbkV!Vxr6Rv^6r22MRiA5I7Cj^%>+PUT|9urQt)re0VXyGYqU#<Ikca81s}Oep
zH2G05ZWt62-qaeqAq_i4mr(rs$Dd!jANYGX)=XwYs?rE!n(X2DI5zdDz|DO_q&Qod
zDyj^8S@ej47b!$l{guSo;ngFq_s4*@P0Ik7i%pGv9+Nfc>6+3$4uZt+xflYV(ofRh
z=4f8hp<f4Ri~jf5C;Xl`PdhtcFoDKjJONz)*7s&SC!TZ2<0gCf$95<SKi_h{GY8x)
zIdsfhy=ZfieArlW#6=rm3zR|?oF^E{(^udN-f{KS_E|=XN{?GH2wn2gI~XvwH2(N%
z1md^)EMOuEp=Z<E1<|LzGNYA&2xn%z;i1tY-rHSo1$jFY`NF6k0^?d@{heFb++6=8
zDPPVOpJK3jh;(>ZXB5F4cNSxCM^)Dy<VpG`slpd2;d}vke33B2wR8-Ejtjp#W4y<J
zE=#c_HWBn(onM79JHIkJJYlg%#>cMEnPIVqhsq}BcvDEKI(ID4*_kiy0RHl3h1W;F
zmr^E%=KH4g3GKN#vxPkxxoWtperK(t`rwP&Gkxdk@w?<6LP9aRquUkrDMOf+Sc4~!
z6fRGZg(aBcRY{G#^CuKj7)@Vy3z2A*)wq={IQIQnS+x3ZAEPi+2^KOPx?-m(DuYx-
z%WYYpJwoasnEnk6<(G`F>fVgXUn!yH-6xM<VA^6{Jpns~N>qMfw~9jDNC?Nl-<&Vy
zEHX=^6B+|P!m;^xM9B!oV7~Q%^wq}pHx*4x9+lWb=M5Iyxb1&V_h2lzeDJ4SeVW*Q
zGl9I@T|us63!NZ&ALF(yD>u7Uj&a)kB8EzA3YnkYYsfCvlS2`wG{rf&p!DUQ>I^sA
zmK7$*qq7Qf8*hiIxF!2cz8Nm?w8ov%KCsxv7I$7&V8<@Kh3)io4ZsrNS3I0VJdld7
zD7|fm>QVW~KzQp6TCEv(q=5yWL%=76tjdu&&0<FO!ofc-4OT+cNzxD!YBFB^?NRyR
zD6uu1%-E<-be}3;g#O+G1yZp5M)u}=-I;T;k6^LLfB&`wkPG&=@89svB;}lbfpEHF
zyoJB+LAileDJ{*j!H%IXrOh64oeBV5fuS>6U|2p{W<Px&Hry<f20OYbX0)ju^kyL1
zs*k-8C0@-ZMD1MH3dn|8b<<erjbOA{`Q_{UqN(!Icx~;8?u-_o*l=)J6syxaJ&=wd
zHtUF=#ha8N)MvGV*$(QJJY1_iYMC-^r>MA}Q2~WwRf^x2J(J(<s#sdow}aKUjTAQ}
zWap^6rNA5Fiv~M?4I$+8JpXpQY*>lFtMx`vH9N$UIY{k{&O6o!m07@wKVrnwcU#1V
z>V<<aI^_{DOCU~=O*Ty)_5`~}KX0FtkH$Y0{E8K_bK)_}7t4OYsq_Yt*Aqs6ApMFz
za&m|E38_CM|8wPs-MQs;jf2RK_48pzt9Vqt^uFVpo;D=*gXodAw>|iLN!<$m<QLNK
ziDf`w+TD@&S5Juj%U`&{3a>|Tdr!7l-8LwrVYwR%=hNGpBfz0D=?gUhn|bspuP@RO
zfH3_5R!(QBfaoA);|Cdg82CA?6BimJaf*~VA?~#ryMDvK)T51fwHIjdBEQp#$HNjU
z$H_vx%KtW7nejCnH!DmEwZ)|^#QMsBKatg4;Y%?-PF&(r7$yg2`z-<Q<OyHjeBDa{
z&j-xO4(0O;4@G4>_IUD<!|p+rh(-(I^fvt)?7u6(&!5I!Cv{TQF$xe60Sgci&;Pjs
z{7>WVCz|4a>Dy^Td1x;s@r@)u7M7fl%4LcW1|TN<zR}G39cmMTNFLL`5k3aaBRvyi
zq^U&034;pFQY2uhWI;(PpbfHUYW=IRImi;E5m5?;Rsp)vg==8arFg+`UDF^{ChTM9
zgG965R2$x<8~AhE*PZiZv*Gh(vm!QShQ~KE?D)Qes%bV9`d>>HpVFECX`&SPzZ$8M
zGvmM0rHdoM2xd`^G2?AEimT@)8K-O-imTPN!bL)AXl~^yA_V0sed&M)j-4?u9SVG7
zR61mp;c}Mvk?_AmM*SXloPw}2s0b+l@;go=m-?&{$dD5*$%6;f?DBb7K7E?B;(IV2
zU7u;hNOP_?Au{d`G&&?7^N49B!F(B>akOd$%m17cRDaFOSo9%FZ*BgbguUo6GR=%V
zv}yU!DU`)vl=~og<LV4Vs&uDfM9z`Q>5Wf_Shw{zQRy1}q$3_ld;Yc1Exwhyt486i
zxa`zAEz~Re+bPC_J!o8&wL;mcR|NN%m&IPXIf|pYJ(z)Jr&J~>W4}M4y4|n+cey!y
z*=?@nm-H)iQuRJHTKD*Ldsc?%3cMhZxM58{U$NttK`U`{KLLYUFwvD)C0nj|fsFCF
z`<eG+`5%0nssvqDnHVvDYlT=>5!m8Lh!}dR3ItKDipSdPElZWFMc+Jy)^c40aajG(
zmC|y64oPVGP_C)=A;P<HWSY|!X+|1N6Cpt<`r=2Bn6S8a1JYV4JH)KA3c>w(z?9b@
zefL3D@~RxaiB9V9Tn_|74gSKfyk*p3YU`&}Ctj;UXdtmFF&kTPSlwui&1z;_NAtKH
z9O!11rTsnotFTi>e+ms{CKhFaZB<1{`Fd8#pbXJj!}K&fh~i8ImCH{3f#|w0{?&;1
z&f(N+TkV3-BC_3>uu|z4&*)l>a8a<ifRFV=OZ|W6nrK$lSWhv*Lr(60)6=F2;yFN3
z50UpL`Cy)+gX(IJ;6mt!!ell=w$pZL61A8|Bc((10M~>IVr0`Qr#Jd22$`W@UP`p%
z3=;(9RuQkp{Ww;_SBAVqTC4-|iNL3SlyC-s{b8BB#U7Eu@R*;6ewU(l5g#L{EPWIR
zDW)$1A1{Jw3k_g<0AUZN`~f8{j#N%ec(jPpED(|?#v(42xE2iyf{eqvcwCj!u&C|z
zyxnw-S6B@j{0s?8OQdvQo^*Iu{zs#{YUYwAl!NqCfCH2~OS97VodL`r9+S3FzbTGy
zgY^JIn(9?GDpqq*OIE$`X+eWJryZT-HH*POBm=|5knG|HQNA9XN;hCn%7wjv9++4^
z@u+QMsy>gFTJMIP>&w#pkI}LN8Qc&%2mbD304g<yuh%USoH+mX@a^n*f`R&hYv%Vo
zF&V=GhtXYvAx5l8_7E%6KagH+6Izvghl+k~wC93jMURwzTWAOcW=fZo{wta8hL~B!
ztMXAC#>dpUzS|I@Zn%(Zlo)3Nk8MhlkgOKP*k;<up{QqvO8w<M^g<~`=7f%-p%`Xx
zJxTs7R;@aq7$(!SUgb2~r1)Jvw^;mBhI}}})pz8{N;t6mg2(x$sSa3snf5W2V)ris
z%v~DinO#tN7E|P8e0C^HNa*!Sh<ViIIlA(+IPb-!{?h(h6U2LJ+BuIrr7YeHG(X~9
zX*Ft2!k#~&guQH5B0j^J<#8;v{FrbDXc2>OdMU5yap9D+QeJqNFEAVyk-KZTEx*W`
z6tw!x5k^`dTG128-f0~sDCErlFUH;}N|UI|)=t~DZQHh;`DUeU+qP}nwr$(CQI(bT
zXOHik|8(E=MZ_3^s~8bG)?9n;XTm^c-hK72+NABp)quMvT;wNg(Qv{HUjznVOVN~(
zRmUn8S`jwUI1+7U4zbi?0n{<~jl-;{$D@lvtREp6m4_>oQRwIlwHtDh1C?c4WYk!V
zJZ0IO7h|c9;MO|?qku3^hwl*h55k{(%<xSRi6&vx2bw`78q{C7d+-wnzFQ4Ja}iY0
zHV_{+afCukKxs&@4r?ziG10#8l|{jLztF}5EZ`K^3wjyvAAS@gL9nPXRnRl$*F0gm
z1nSF`&^{RpgScX(e&k1RH|rV@7r`-84HKTxb-7UmF-`1s6aUe7>w=0coU2$*SOeSg
z2%O}$3~{@JE!aT?R#FS+${qoX%#o55TN$LdEeCsN>Qq2~r^-wnTVd&v7^oU8PPk{g
zM7^@wVZ@knShrd%iZt5Yf(PRLst4iy`iE7Y#NoV!5B;z3B*ttN+|Xo+{u0haE7lxr
zl->pR-7OPFm!W)lIB8zSXt9^SLmN21<~a+1mOmuE@v4|&5g*Rk`y%@X)Hq-kmOsq;
z#E*49qjB&{+S&T#6FyN0S3Z#ZwU4(y5mMZ6{6qVz7U5k;LA0;9{H!YE(#BE;<KASN
zjn+OCzOy7a9|3Zw#g(EXe*JZiyFOZn-fVs8_sbu+U-Gg2qwlmnU|h+ATR=TB{UfV|
zwprK=?0x}+D^w!Nv3bSknP+CyHde93CVo=K_Rf?_#rsOMuNMj@NU{CHoCgW)Of2kv
z8H9@;$^sEp+Q;J`4Gv4=?B5LjiigEGf`f`QUnyhz=Y&){rH-6}^ky$KUom5XC;GAR
z&(Hx%G+<!caQQFr5id1E^Oj$H?p9x<#DhJ})NpLy%>4zA(MI;xh3Azb^Fx`2_mzW6
zHde9~q!*(1E8jYOPE|xnIJ=R}wcTQMgN@V%A8Am;idHWK{UwizAEvSXQ-a1MWzeDp
zEX$osdruj<mPf1tWI~!S7GuI<$C@ANq5gG`j34YoUkTdM$LpW)2K8h8C!Flx=-_OA
z(fey3_+TJzf30Kvhr2V&;LC_oZDrb^<sJ1I!b9Z|BW2H@MR%^6yDyBpm}CdW2Kl<5
z)g>Zu^c)J3r01zBjv**n4eq2SVIaesdD^0_<5pl|Dh&ooQF99V%Mr4p#dwC8htoky
zv`rIIKF?#7(hKy29w7kT;c|nQ>$!I}U`TbrDBw(1VqXr2)v<}(WU!ZTB;}d-cT|d0
zmTj)xtoS)B`lmc9AvCeDp))I3_o?hcW3}fJp=As+4e@Zi+c}H=rMH||^uUfQP~gYJ
zXK*;)KLM!kB4w+Ua5K{rHndl8TR9mrw#Rc)7WJLN6X}rbb+RU0j=i^YO8DD<CnvD-
zIr3D{X?R~Pf@4PX+@Y;hWtIG5oXt}zHu&hFHZ{dWZYQbg2uDL@Ffa!d8Y&evHZGgU
z<f}f4(~ll{_g^a2FWVE0cY+m*lY0suzFRzTMWyvJ&Iy?OO}Inx$0EG<ft4I})5<H9
z34rI{`W$Vl6SH2AEZ8~ZD<k(yY@F&k#AlGGUCCXSh9%YH0Mc#iP1#}GT;@t58fvHW
z)Ln?>J4WvYgct&O0B(voYp;QjVUT))SX(&qYvHo#=05SMtA-q}WAzN2?X$t2sa*8b
z6j*C|#}`iMnd!IyB+^b8H5y5GP_u9SgdsOSjW_vonj6h@>F&*rKLJD-Em10~nH^A>
zyg344Uzc(PcTpqi$}1z4u7wr*Bb7hRE%u2J+19B9IaJH$o_#Q8Au+jk$l_{vgRQEV
zk+~-n`3Y)2Xr|aED&s!+Ud3p`pM}HKYvNzGNKWyIS^(Ap141)fBWXV6G?|i+%$Epz
zbT^KJIE9jAKV(wTEq^KoQBadL_YnDFQY3|ovR|^G?<LWcF_$Q1Y_VJhFsjE?r+4Pv
zR#ZhB)28`S*ih&E6A2PzRv^+(xae9ky4A1{u@N))LxKBPlNO|z2dE7HLI@W6FE1y}
zW&^f36UA6oC9VZs?<yR7%y6;cKSL}HAq<<)>}$LY0;vWZ*DMY<d(;&E00t9<#9M5=
zxNN`@y%}*vQ$1+3pbN*!hK!Ts9Vx_i=&m(Z$T{v6_?zHpnmVp!tVX2^i@9<7VF~W*
zs;9L?qNeXp3zY^If-`0FYK!zbh)hY(gsHQ5sN?LZB-4xAZJ~tQDVikzUD6vT;XbzU
zKZCc<Za^kSsajAJMdI2MhteChguflsB(2xAV_&1bZ!NWlX>;H3ckyG$>~w3IxMJP*
z&#%Q)Xd>d8wV(R1RD_P0$|AHbDfImPm?{L66zxg?^KCDxhH@U;(%(|}i6}rHStjSg
zQiqhADHJf>j_IPOTV{}Bv>)UKzmF^x?JzX0r00@(TOw-5r@bcJYOBN<(P=KxlECKC
zN)=8|NdF}KXh?4z&%UA+BlMu-h<aNve%ubH=YFuE)3oH*jirP4hMp8yU$8pf!57eL
zoTs}b_EKT;;R8zE?gvjgq18A=ujErC<*E6eFLk=&q}l{z*Mq$TF+Jg7u7akUJ_ztl
zH!`2Km*Oa}E1V2)^LSODc!fTklNy2%q-NhyIcy(1TkMM%GvJ&kMO%*-{)IOgr;N9h
z^@Km+@>C8OUzQHhGstqraxcn?>vmbzjH@h)AM<IXzmje0r4=(=Gt-zoZ2;bk+43&!
zyp&ofnD}K;`4KEePL*W<QLs^&EAr<aYUy4v6w#4wbA6g(?`ZMMr!Lg|E_E4eqUWAA
z4BO0EFjY8SEd)IK$(s6X@IP28p8eEwm8j?V#TUINv<?`)(7K`HRb!{>=&0zZalArU
zz0gr{XFg*9lJ6Ks`Y?N2wi!V&taHVs5VHGQ@AjrU_sG~A`b7uOKb;bi_XRY*?S*dK
zzsxU^2hhqqkfh6ph@}tVG!>2rH!TEhmO(%14o*lgd+_Ra#o30#x{rKP?aRbn$$fI+
zjg$5VpFq2!h|9e$XulYjIWOBi{_*cB_K!y}>0o||1ykFV=|13r7VpXY3ejU+*6NDw
z3n@ozK@6&b5wI=r;pO{q+aijs4iv@T75)|I&Mzc;S$&X;sPBAf?SlE%NMQUc{D)c6
z58`|;Y{DM6)|;S@ED<l6YOxDB8Dh4VyqFNY+mDlO#nTHki6~s9!XvydG987F)JNxd
z8An#?4YZW`Oei<!5A`fA#xMLP0F_$3(2jnkcELLW!|#)o1H=Es9IJVu{N)Z22uV$F
zUHoZ8B!J2~V+h9|rySZxD^P(L$mjMz7uuK3qgYrB!(X_NeDIF;ZHz%{o@AVnDn=*_
zttrbiC4A!}NWd=`kknPN9YbI@PLX?oIJCJ74;7CRKWchew3N)YX-X|9Vai380-8fc
zV%XCZT{R^RHEDsmch>~fuBoSi%;47O5j75c-Wpxd@VY?9Y}h7wTgt7Agkr@x%TGh9
z({V@B5$`}kOXjr1+%dg85U8IO^)BZXCiB2he(bQ18nAp0L@5aI)hlFuDL$|y>%#pP
z?BWrHeR;lwH*9v7Y?lOz@+J`o5A3zP(VzBZ0d}f0<1%&56!?{V$+zY}nSOU1LIHj0
z5bYtG8h;L<?pzwuEg)Y)P)s&8jx1ps&zp$bO|J_r5Y}Jr6ec%-n)RodEax4U7oTlR
zxrgvVNOxW2Y3*exU{Ku!4AeUyMzcW?r*>OJo@iA#u5;JqwkS?%iJImaVrk{Xbb6$+
z*ekVB5zTN{I7)CTtwS<kWofk6glA#ui9pmaWQx9_nJp`yn-b0#=l_CVb-}rA)FHT-
zt+Y~))M&3v-YXA#!RbQ%7rs{bgdG^OZCP+p{binMpr2^{3;FT2K4M645XnD{Vhc#T
zE^x{pOf||P57@bns&QJH(olxC!Y)|*JqVT!n5<Jj>xt+uTDH;49*FTT#NAWiY5hQC
zr&46GJa+H6V#x|W6z#bMu~7Zg$L9@8D`5^k@i^7T0VL_ub4T7U5HElF#7u5T;~;M=
zY1;n2^c4DD7ib$tOU_y?Rf^(+CI%hq*d--o&dHFd4)H6#G$&-pYMwuS_6qvByC+ZI
zF@mbU2eG9VC6AHrVTqTG=1NU%$b)THAgG|409Gz}E_%4V2(da!9b`Hrcr#N2FgB=K
z6H=+$m++OnC1GW<4*^L8yyD4<s)3tQ(%n@b>Kyv)Eu+<8)TJmtR;VOO6kBCq&RT9m
z3<h*i7)%C=P|Ad4N)hm6Rj0oxCWkBp2&oz!Xyup;+(+f>9%J>wzlbY1gaX1wJ6F?o
ztgP)?=@U;<cdWb{eW#GO0QxrfMXjrGM&rdpfprNg$PXbE82;-A_Bh{lqx&_W7z}Or
zW#DO6x|Fj52A8yLcx6dO+q+V0Mb~=FS5DNYqZ-1lxtVFvw`tW?wU9SbbKxf&DJM#c
zUh%!@96}`$DJbH&%D^^N8h|M5i3*&fI3%=M4<hT)tKAgFs<7Fnkq=u&c3VfwU6)SE
z`BX^Ft)odyRf-NJ8JESi;S@>@d*iDc@1dLRJ#k(oT_C^i3m8pdL_nmwYT{j^`x`!*
z^>YYRg&~5fpSE&yg<%ii#F_f9uwTf|!kt{xQ|QsZvVdI~s(e{u6K<Gn7$;}))n_`L
z>1IpP`NG<s0JjGbtJCO;TujxF^o7`-6uYsn;GUqn<5U;H^+nPioJwnW{$cnRlKRqh
zPyD}e>&xCdQuogizXLcMLpht1v$^uyAISEmFdsM=PH*)lc|ACOP0D`K=+DkhE)Bz1
zcxLYjH^>Ds{>3>B#-HoQPiVmmco0zf0;JG+z5oNjB7Wn)F+&qSv}4gD5u{9@T0n=G
zZ~o?uAyE`MdKY5E<k1RDI#)=f?o9gzrl-B2EA<E4*epl|2kH@hIQ(<zIIyQL^#BzW
zE(r9E_kzYULCCBqKGq2;^dTU`kjOlfD%r4v_lGm)HW|HfE&MiP62H<%@R+c~P0BsX
z%vpbtI?E-h##c&dYk=E=`h_y>^B>#n!1=gZ*+M@0@Ar5Moj#t5jBiAn1)IN=f75;Z
zfle?-xkWcIo#SU8G5=rx@V|`z8)a%mG*Cc5gBU<SivPR*p}eb&i{*cJ{geH-Gotpk
zww5j~rcVFym9%PFdZVtR``xdykb1^oG7%Y}1l!W!;s!F0B{Bok(m^mny8$(WjV%$y
z(_5=qiWTZEaqPbLmUw5aS!Oeo3?3{jvE7z`1$_s6NAOvyzV9Tm5ljwEgbBp^-e1gd
zpXa^jyytkI@BZs^MI6xo8V=`>`&>i-47Ct4To{>-#e<oRCZrgA5R0ZKqCAI>Ce(>Q
zlzHT$5336b1X-ih{ZS5&*Elr(=)e+C4plq~rRj-Kj67UM5g<J#e>`gzd2lxR=-K!U
z%hE@6#u#}x&C*A5RzLNTmC?^|jBE5=7GodzxsB3Cde{xuDzPSKIvc^vTb*5&HpjFA
zCtt0(s<gI(T4>0=I(~q=d@Q{>Q^p=P2FzX@i$N<MQ&M>)O+Q-zM`0O9wjw7%s;e1o
zi$C#Ku42-|Vn*(I(qlg%F)n;11t)h&MD@2iPEl1QMU01qhJ0zO7DrWRSTnTh{H^DT
z)O?lMw2Z<FK3RJ2SxTPe=Ilaq8GJ}6sFw;SBFn{KYbMo_ig8$?Q$&1qX6Eq8o6Qtj
zob~!lU)wU=T?F#77!1^el$)E1Y<zVxv`!@<OOe%4OV--x{Z~;Dg$sM~y7@-$7Cv?r
zC3S)u6gH10n=*>y(z_M`)O#9v+CT&zlZ`{jSmWwst7%^j^OkHBMiG%V1N2j^;IFyr
zD`wT$IV9{_A|2NUe(3#lB*O>7PK;HRGCML>6hERvh~S*X8JnvR=E(-u4kH(-vCOF~
z5!209<~RQXo=-m8`)Q@da1{NruC@kfBc>aG_N#zTEsN#uUN1$ZW#ddoD;XakdDR*2
zIt0HQ+bwR3pjAQyw>8a4U|k{?xKZz+GFgJ>-&VS}?WscB+NJVPd_>y_a5Bt=&FG_7
zLJm{LY>Sp#?SuzQC%r~pg^D}~mzl}3@@)kJjL$I(#|=u1Mgt@O2A@yRY@=nxpwe>2
zqVd?uENY|HfRZO-3&t-fpXLMGPd~vmWmrf<aRiKAm()_8-LjN=e2ap&NnSI?C0SkN
zkOI8^024-kl#%>PW)M$b^}+f}V$lAvD`cPgga3Qr;fB<_GvK!*G$(pbpdhseYCG8s
zZAUDxm@zo4hwLEWD-%i?CSk5RB1;O&Ae4sz;stX@^+B_z_Q2jlau6vDyA7ifiUIvP
z#!zq2GRM%Php2O8%4iW5%YCD{Q=9I^Cow^O%cxLxsCkS;cG`r2bc*R?VvEMLM?D7u
zZr6G<({TNY2kh0AcDf502e0)H`>7H2doWpsg1KrLhBMR(qxuw4uckaH{oy?#khV$z
zTwOthT|Z8&Ri>Qmx?Pvii>4n#3V$NZ*c*&7ZjfzStK5;yyNks{#CR>ky}BwjJ$A7l
zik@AO?WRg!gS0Y7tOzyKvT0jA{cfCs9lz(Ofdy4NW}$tAwOQv#hUtms?mtccNa~S#
z-4XkqClfV?^+x;PK^fCXbF^I3R!7TM$U~ZXqjb%^LN!DBa$na)(>$v{I+honW&s}V
zTJg&cy|$$`pGYnKVgTXtgajZkE6q{oigs%@De={3b*WQ}Ouxd+5k5zSl@tTBQsI)~
zP(ep9D%0NnJ_@#*ck1JpMo-!ZWYqn1;epli2_>o9pbNiHFWs&#?Um++#IJ(e7tbkW
z#yXIKHN(GK0A1QT#232dEq#~eErEdBKukX!-$vha`i8gdgIo8*tPc2hdJmqv0p2C-
zpJ<AAFTOeg`{4G$XP7y#9r(Sv1{cUBzy#S<_4|A=4bdVKqABny9&n_~dgy6@4<tLV
zCqb*>uRqxjU{a-aBnPk)?aKWDuM9b!lhbDyzq+LbIu=Ba8PpPoQGE2qAnrs1{w$7G
zGYwsSoc<(ft{Wz;AMn)n4_r6qyTpkW6?bCydL%u_8=($e1jj!R3<JL`>xx7KbAf3o
zlAz3Z@yOwvFdShCh46V~X|xA17J(;)kfOxNa8Vp`{bgSAKs;xXBGFgj2~0K#xkKnw
zIs&M?FZd_}0R|u>G*ckL4&%UWOgaLMUm7A0ZEXk~Ll}gVuU*6xsN5M0U?~TAkbisf
zQG|_o+T`ly{EAw}a02lmZF`o#IL~Gt0nIqjbqEwR8~uq%aH_*0#VReoZJ{7JL_yd}
z?1(UbpbJE*s2=zij00W1N68+zD9@;{APDdwfH^!)CG{zS8={0VMEx=BLbam~nTc#P
z!Gz2!hY<!O*tvR!{42`j811O;$2yiW|4}dIz{VT4%v-jik8FK0ufXvE$BhSUy8$P8
z!P~2X?H>xq*BlM_V)73}T=t^0_TvJ5sJ=a*;U1JEZ1GP3u2E5p3A?Bd0hmyrOJUkT
z45Hm3N!$|;0<v-HoKdh@kUKLE0U><S9zuhDu-_Rx_dFweq=Y#ej2oMP63!x}m_H`&
zM%3M>Dp+q?(s16Oc|Xvry(^-@HWZYTkhXdHKCfmny4{p}-;H7Kh3WTW>b+>;Z!#mk
z7;HZv={@_<Q=(E!M0HPw_uiF7u~F{_wAY?!G&sgb!-cb8!|*AG2Sa>4Zh<e{^Zjxn
zuHmOI(WJgkr=|83;=hJ4LqLfFA|?J=QvM-Q{x6G3-%Fw#b!$_|cT~X-V8ndVP~lLA
zOvHG^A;s}+j*H>S1c!psfRt9R{33x%Dl5nl#7N^=K+vN0gY4)YbNs>15pF5f_`=M1
z<jB0Te12K+H5|ipvl}2D1>D(nSMG&=rPZof?U`y+6;7wG4&Ni+@!+(>DEGxNq~;b-
zAsVEhD!2=UqJ|k0^M#Zg4)PE5f0iXBddyKs5I{g-NI*bR|GTn8%+$>Ar!x6ZVe<c%
zA)@wnE{2wN|F<3~)zsG9mcZ%-g}~MYb3j2!Y9XeU#^*>jvnCEDLxQA|MM*QWC7BMU
zPoURKYj)Xx_q}Jlko~okCt8Sjt4r`Z+so_stQN=E#0p#ZumzcK*Sp`e59{}J{{BB8
z2RK292P_7N2F!Gjj+jg!GiJ&#Pi8*GJOFZYwNVE}*!9pySjH~ea4eDjKrN7+D5@wO
z6uu+)PJAd3id2-hXpgp9!-`OTmS0Jb)UZkd^nv&WOcPuy>M|yl2x?kxgvo5^adaXy
zZQ{Wjkmz4aQd_QIIA_C8ooNfNB*`S5ohFvVl(a@qzVhdZ4s_hMC4~NSIJK5&x$_*k
zIqWtS>S0Hy?+LzO`;92)urg@U*>a~!takI(jLA|w0P8vXHq!f@Xnr!A)WZZ1!Z8AT
z*O^6GDpexe5N1oxWlADUDvl%cIT}A>W}($N7LKt_8s;GuPxX5`vBP)X9BZ6mX){bj
zyKuC28*8;`I#!%5g9uhGd4}2;bp-O`_4bt{nMiunQ;odk+_5(E{Pou_o;v1_=AB7X
zDit_@x}+-C0+HZM^y9QcLSPUtoGWYGnbOi~BipI<#!8d6!esx`Me4;C^p{?n=#mtD
z;ONn~SpMwtyA5<0-c-0|7)e34o@}?{W3&wU2YA>pK}J(tB?%@;aUFhOWnAmXGHV=u
z>rILlTJ#n^y^+<WSO@{mCD<ktN@w$lc;nc}?^XHWa&Zk>=CY1rQ~c?(#760?xrob#
zFDW!$?ovj1E9}m1w4G9cq630Yr>ubI+(HgIQBLDin_NlwMmgSZ<J(u5Mw%^0p;6zS
zk^@up`U7?F`pOI8`X~Wryirve{c&8H)~RC}g$Q%X#d;&D9fYHfAWrq3(025m;v+y$
z`H`=W5Q(}^L_C!vdUnV}LTX$Lf^|_C^}J;`%(Ve?b#EE+HTqhbOMxauA*VGNnorFU
zs!!Dst54k#tj}b_F6k2u!MI--sB-KPvb}qa8$kAY$0pu6nUc+Li9s0Mr}W6$XV~Fx
zQ8z2dUfn1B9vh+xs5(UDVWNS=cv(Ci?MsN(8<sZk@FI=Y4!~rd+~&%;|E7z&!!2ps
zF2^ED=3}zF+Sv$7SWCiE#87d@`23M$oB`MEnw>0(%S$+1Tgy2;^(WN4eUI%77i1|X
zokxFZysX-OJhilT8w61^T+)ilz9<npGKW4o&rN4OX0ok)F3DnOwPxQZEJ%z^=cZ0o
zpTJwmgZlAisFvZ&q|gcW)N40-610e9xoo#R(!zZG^&dVOe>{FcF^+qsDk^~~V-NBW
zs;haQc`TZ4+KR}0FOUVS#i~5*bm{z3uq<~l@0o8{GJ0`K<R6g6&Mz?;%7N}Ic^n>o
zHaU+M1nA~!Jc_Ez5wR&8DiZJ$(b9ycmcz2z8!4ZPDhjFWZGE0yP&n@qGofanS<7ir
zc!Z^0EUQaIC7$jse^76sWgI@@Su2YhQGVM1d^YR(>_a33P^UMtAuw_?$pz^(0NeXw
zvfc0U+huaB!fGKE-gM>V)}SrZk11^hA}QS}1IyWM3~e=|NF;q~w|%n>*VB!^A^b}0
z9JX%^Z<(<huY#D3*QMv8>c>lqg+XSf*xydGq$Qs|;x`W1Z;`V{i%N0)5*x?r*JZ(R
z(Y~YfDN4fb?<^p>fN7{YW}9RT)eyE=3_ZHaY^6kUvCWBfd8qP78|_^<jAORT-%Cvx
zl&i^hX?R(ptY@-0R@6(a9WJG0qTncNvQV!t62C|-O_BlC8)FSYWtDm8Y>IC6MWKcJ
z>u25(ZC~9BZ>BdM5eDwr_UOFU3%x;W`3?S8m^TQmuQf)BwSS~;>rs>2x&#T4d`Xoh
z8)fzDQEXHEbfT}-_0rdMF4u^%EP5pDcFCr7P5g2H6eM~x=M?2HiAot^LHN!(0>g$R
zlAXhgNF|LH!3`62wnpO#1~-Rn3Ckbg`5u%0AyS|8#OCV7BHOL4k{F6il82Wtz3}lj
zmKxMj8~iW@pZ}CCx)=v5jhIx!Pq+lb5%Jb+xYxY#Bf_2A;(<mzN|D=U%X8FSpgwtc
z$!zogZ+zwd62lG^^hqhmKtR>ZKtSyOyT6XK-H+C_Gqh26v9$S*<n7ggG(cO={Pu6f
zjt!%@!U44=c_?agX&^%tl9C{Two#_jduZU*fv<5}Zzjz`r6hAZ+E8$e$vp^4&IlQ?
z+)xNHL0KnLkaC_v-_K2sW_0gNyQb&f-g)1%*+;)IVi@DSzs-LCeowz=ysw?>d!1<(
zYl-nMQ%=#>yr^fRDPFg}Sbte(f8l(|#`&%LoA>d<JM>!^?q6}ngKq41<hORQf1y9R
z@xQW#{`Cd1--wSNilN`KJ>T|^AB4>3?l6D#OaHA}!1NX_WYhGPF4FOoGg-1_>#t{Y
z)`U%8a>8g@u*K7|*BObwsKGh6N{H52>J59bk9q`2OnVlMC3`dtsj}6bR>CMQR|J+v
zkHaxLmJKztJvB@EbjlrW;?SQ?qvw%sb<)f)Z%k`OjQa?G;p41723fse(D;-}*uHd4
z3AFS;Ssi-C;%cdPD#x%zN;<>;@oXHE^{5=0R@1M0gwcndeV|FzjAqoV+#%W0NZl6m
zZk@7L^DCdSR@X0o^s)=*@d(yhDs(89Sbt`Ru~+MqJ(b6?KeHqDiXZFk5kIXh>eV>C
z9y#-+u~+r%9^z%g37GXLmk6%0DtUS}QtoG(v`617eT4TZ9^$0-VNKpckIZoM-iR2V
z<x=&0PqWUj>MnMhAVfr@+rPe#3lk~Zol;vh6Q?tWKl{MAHK?+PhmbFm-qjen!6(=<
z-rA1gH&kUnz&gYavyHru9E34n0Qn?OYCvWM2`=+r*ov1NaENhWJ0WB-YQx4u$oQ^{
zFy2BY?b&S{+HjBZTxg8_=_QNVF5gALiDe@EZi2!dv|u}1!ezlHWiUG`J;k(+N$4Dm
zfwZ9wW31ivj{O_vk5%%QU!ulUnHjFs9fJ4t&y1*ulr*J{n23k1ZX%Mt`WW0cmdG};
zqm5ivUaxj8-dInQM}p<+s0`41#O7j#C2v~8MhRYI1_caqo0(i@A~QMv9*w?9=9Lni
zW!;)cPW$O}j$jjniKa<CgV%0|z&O;+Fw7))P~1>AvF-R^0Jd|D1z3^VyLMs3A>6jX
z(+4?-aN<`gLh2^)HX9x)S{UeD8o&2!z%qK|_=4CiADlTdT+!<|vw+D5XB)0U{>@oK
z0<ptROYoVBzuKK4`TGOrPN%vDsa)=L?B~e{9c<ADI137kJPMzijv3d*(@0B`2Q$lB
zU<v`go6)dB(wwK!FxM3jCuoq73>%P%Q<!acZ3pR&xWW~CSV{LtsRGN*f@5Tgm+jn~
zE&0-HDHUH4L%bQV0gVqVh|0uFcm{u6c#QR-3UOL}I5hJ%P7_|uCWZ2v3LT;@>pJ;C
z*+8ov%XBpP@^PIM(t<FNDCxB<8sf+kx!N#O==5roQRkHcopb}caKgkvb+1?-Sf|fe
z75Nfijt%qTE?s@JX)|3X#XwCqF$R`FHrPX;2TbR@;6cO*Z8gL+q;~iWgA*a+4Whwo
zJ(Q{2PI0Fea^!0bBpGyZBA_21k(SK{m&CCJ1GNjlz#;zL;TM2X&T`kZqJ|;~RUZUe
z7CQO}k#O(IX{w{cQw&4uHYY`kYB|@k%izH7MQm%P&>u2|rmSP;IB9$tiLkXEtewP`
zMgnDHfL^~~W2SsLhDu$jf~Moh+3^->%TisnJ(t|im`gH7yNiXVCcQ6aF52jq3BWQf
zGfuRn=z5)UHKViQ6*=FAEu^I1<{<5}&ppl%Wrl=uR4T`mBfJCgPpSd(8OWV$=~#hJ
z7z9N!)K#OQ#%L2d`Tg9yiH^J|E=OM%Caa7=?z^*Fbb+5y)~T~|CB~5*?uK<}h$<ao
zx!;lJVXd_#P9h{ynm~xbw66hbkU-E$br09j;sPe5HD3FD9es{=UYLPOZ8QGd1ov|H
z6H)B-s->g;F@+%jfVns9x<@Q;E<&WX(k05_g0l8qG~_jK(-E;$Wr?qyHL6rVfCohk
z<-<C<#5*V$Dw_lSVSGuR3%9xKP5)6EJ+rHI&Iosjb5J|Xm}_sUb<i^p4|m<0=TcCh
zcQ9D~1iD}TEGXDeTRId!7xmFP-yS&z_+(VpQr$)s?p6|a*z7Qq#Ob<Dvu@l8lcK<7
zbEpm9E3nv#{{`r({G>>G0+5A<VZ3&q1J`4t^p(%FB0Pi}Ki=pf*Eic7rN9J2_luvs
zKO^WO(|^r$ik-hd8iwxIKjZt9&k-Wwd=TxIE|*`j!uU&_cYhM(-k%~~e<6Qo3=yt>
z`UylGe~n^v!Uz^T2Y*3-*j$3w&E=mM^9Yg=AU>1IO*0hXt$r#uesc+idA$`~iRB88
z=9@jGYr$WAVK>e+^EWY~@FP7=xumhRXVg9%^W-1QUwv^4R4%t)^1}G*pEEw7@@#_X
z&Ca|gzYc3~QK_tl)R~BfDA&*ZI$^y0GX0Jk`W0I5g{U=G!08)HLYcGlDQ)<t+{G)<
zUh~5;w}0%HxfNHw)*UP8)r9TgVIdTskJ$M#U)3{~V6f9VzFph_))pSyFbMbqsK9Ws
z0Y_t*HWYmOcNkVq4j!CXc$nP6+Gn=WPI8U23HurzHbQtdWYJqfAe*|IF(Zh=TgVBo
z{tqaDM{xW{Zh-VxJj;I7;tj;0Vj{JPv|-0nvUWHO*cX$~lBXASCDgiGaNT*lXXuyV
z+!J`b3%PKi{8)7GbAO$Y^7yC6(|qyg&OafS#a4RUx;BTdM|l`mGSdc$kRVWr^4Ce5
zLOesv0&62llEy|N=k_xWX_dM!IK=)19m#}~`5Am4$71e{aO9AtHwhM`C39mXe3Z)G
zMoZ6ha{Z}}WHS6yj@iU_B51yhbAKJ?n~+1lQ%$s#h#9%cD(#e!Uk%}k=1EeTVWx18
z&Ar=tzD!0OqBbodC-I~0m<P?F{W3cmRkB_);g)Pw@tK*($~#d^NoY{2rz4*Bjdm;C
zi=SXzPI_itA&E*!DCk~D|5x|mQi31y0<s)U*6?)1K%opvu80%)TwuTRCj(DK<X_~m
ztg|I;fxk_cx*i~V!Jcnj!cMp{tEed9ta9jO_6S8!TvEe!0Jf_M=1xv5+eWmcZh|t>
znkI8C8MV9%r5seFJ#=dR0bMm7T`<Wz00na968sJ6_}JUzP!va%-Q+?pXVj`WCA}jE
z4F^MeewVzQjhaLi<`=lFVo(g$RZFpGCK)e`v@<eh56l~qd+7rRDb~!Q3d*xX2fY1t
zUc)kyqQM01O#BKq+sy$N=Be_pIuYmobU#MFJor&{I{;ymw2;_4kJ3*zz<OuOHP|Un
zx)T85v<pwi6(|xrDJX4sAv6Wja#_@>tJ~5?l2M_sGFJ!vn_Bg}lo*0b?6x4(hWMC{
zY(pF8a0TGd^?;8E{Vawiqp$@AdJcS8SXAHqT%RPXZlCpda=Z8GfhwpP<a+=f$lOU8
z^#e+4O@LHzrJkI~Dr2V{R(`TC&79Mh;>PUi^v|AVT!Vz&YnTsc7;5GnDx>mU*2%~&
z3<YsNkbCa~u<O;}h1#Dmg7#kz8FDVTP}DDl(jC1drP8Q`Z-jhpPNN9^qGu=?nC>EU
zlZ{$|CsT>9zmRR1;azk&QlRM7bHIF$&aIE&VM)BDC}nI#?T5$v{MRwdphXz-i(L)7
zF#}uJ$c84vVf<&cX4h)c7cZ=bhlV2=Wav*nz^RQ&$^OjtP%Qa$;AE%&U_#k9t#t&N
z6(=zarOTbMhjotj4%xs@pMXK>GR7S;fkD7~A(>Minl*Z0jUm1Z<+!MB^0#lNU5{-u
z?Q~X0=2vmJD0l0hj#k6LEnf)?LEI$CS2>B(c|*?_FV*{PRLZv=>d_ASwDZnV<Foue
zI_f$9=5Hinj1O!<z|2$ZpKcL!KD<Q9mBvngnr6`tpGiTe8>%;<B%Z78SC=QA4iW@&
z0ztbI02t`+JI68dFdw}6h(J42@6P&bA3L>UvF>6>x$fPF>>{8$nuz4gPJ*;56R<zh
z<+=yc&oAPk-(6Q%?WwI3vZ83@Bqn%{tYP}_0toQNCW2mmEir9P-)xV`7b%OteAOoh
zt1d{2ySYN)7fd>-cE#?>omw-v(t})!jjU>G{#jGrIAaw-mN7J$UfE+6QI}Yri^>^!
zl&z&ji>iwUl~~1BEI7or=abI$tMN*+P?yGR#O?rq)2>*jT@1xxPOupZ<yxOtSlNrQ
zFTH>&fM*%U%3SJdqxO+)mJj@lrbOtq<ggRrrk;|pb(v;Mh`uG!u8_gP6m{e>(4g0a
z(Kb<pG2B2POYFXETlf$oOXj|i^g>w6i?BCe>g}$8w-7J&t{8CvvaqY0i3hZn_n&Cw
zr~ATghAZN4*2*LKtg?k(7~hZgmIL@K65i+w(j;XrB44$w1LLHN@kLZGeIQ>=!<{Ps
zOp2UB?Z2&g%PyPF<`x_+exB|{w}A*}F^fBJ_zbdF4*Ig8<whYr#V+Eqro|U?gvIVD
zi*pc)r&Oz*f0NJWI5=XKg7(T<_R@yF#I_%8A>~W_q}rp;mSR^VIF%`H%dUy8XH5=%
z0sRX?BT>DwhsU}L;d)<IwrGkyTLyh8SP2);FHn@>e+?WCCwkG_l0z)k3|lpQf#Qd8
zOwMvK$5)*7f-@FLZdDU_q<^A%q5c$1>EIEK%<4#0q`H?^Le22Mqt=0f2XKq@ol!pg
zg}y{5^UMmKPyMJFHJAMR+){z-RUsIvFRFM9#V#Ulk+l$`!nzWS>8Dv7eY%m9CZkx2
z1=vzKt;8ubrI#6^qOX!n<gl!KQ=_b5QG6-UPHVRySS>cyI$&?~36%`R9*I(2+M*3@
zRi(ftLd^39o+q{{f?^*YOdsmh1fHlq+`_l$J~+rWjIcgz^?lg(YvBYvfH=SdVIu{4
z1_t^R7W#AqdT|DN_!({(i+y3H{_&*H&0qyzWf=^NUxKByMwKm^Gq=ONOka>`q!@=!
zzJOCTgS<>%M!%5H_$5#TEGcIYgQfq5@kKX?SYUU7k<e(k$FcN4=S`J$)`WeQs!*DG
zxcj11eU3NoWAb68?z_@no(D$E0)aUa49!yj4upzxMbI*Rx5YuD@r8GiRsyO(-@s+6
zvJWuSJZ$z#sXJ1(k{%zJE}8$~l}a`agC?9&P3$SE0vQzI*2@AfUbLJPrAZ(UX}#qU
zhm5T3?GNnjmzbk^6AouYWYxw3t*FzZD=J@OTY+lT)d5&V?!hG1;}&>ctk#c-h1LW_
zaK|c&W3G_-n=@-tKYX#^1VE7ZqB7k0(EA&}SAO@H)Ul81(e?^48ns^Zf+^CCZ=Jr-
zWA&2+8_IuGBpH0TXOOye5~H6XTS!zkw>P?xie2Co%Jbq{a~5ykx_K|$x_0X>>|VLm
z*Y?iczO33M<4bk<hPcHn%t;_SOqB{{6QK-xsvFp`C{ZGDqTbpZ*by*s65fgs)e}@$
zi*$o+=mPK5N^C>axB}VsHuwnE_$op8O?IJWncSN~(FeKZqjkY`+mzjd#M><(Z_eR=
z5=gMnd!+T{OJz+zNK`cah6C)!k8JwWP&&q$k<D5C2L7wmCRjvB$>t@6LT***ALxcp
zik@hk?(#~8WQTONpBgY2t6c$vfpS>D2MWwM6vdeML0v=OCVRgpQoo?Mut$2{oOg;S
zSej4H;t6F8vzHTKX@6yjSDc;MaB8>d8N<Wk**$7Hj#P8&_~)(%8D<57c_HJsS=lGQ
zD5w_x75y*mLk1*fWMx)lR9tbzv5~>FDhgz<TI8*5E_`VJU)Tw_4CT4*Y7al-m<d^V
zY6L6TsfI`^YoQ*Y7yzM?he%WKFLcpv^}~yxXp$fN-6Jg2lPuA-W+j*_rOy}n@6MNJ
z)ECK@aTH?8h9||sLYWzV^)hl$@ZbU!1S{*y`AfzIe-?$#W&+vSF5KpjrgGI!+?Du@
z7YupL3-&Z_D@*;Nf8a9@(`xK)p)J@AD!gnmiRG2A(wQYo13JOv{?wS?QFf`!Rd~!e
z|Kv50v;E}nYpc$reF*(sYR~@x{m;n-$8X(X-XCg4DiILSPp1dS-pQQN(818y!j#d%
z((PwwZ0KTWV{gv*|6qrzrpB&LmM)(EhnAuBL(5pj|MxvX(cFy<RihV8G)+uFW+E&^
z3=IVxA|@h)s-%!>6J}!AjB0MKd|}wSy4R*NU)8JYZGqehomljSG`hC;?z=g>^`}ke
zk8iW2TQWJ>OTORJ<V^Ov-}~RU%zu2pu>wk+LJ4&pHo|2e3c^VB^$HJ(1*0At!X&>w
z(0;SM{5c#}h6fd}87tPuqa7`G)EhR|=c_w}7Nj_2#Sy4Jbk_ITi=XsZibwU3sm;@<
zPPTI0h&EsO18a32UFw1Hi)J+aF;&F}{g-O|f8h!-_vuFUd}N2)dq3iI{MO@1d=&Zn
zeo%#ebR&j8pu%F9@YS<;nMdQV5yeiwDgl3usQhD0Kfdr^Rv)Q^(`YO8kJ&1G@%w2<
z;9qm``^Sj-U&_iJD!|{9@q)*U`d{k9`#m4sg#QKv|9)WVtBL)qz35eoN`EADeSi@J
z&x+6-TZB+4i5!oQUD1#Ft3r}Rp)c=~^#T5kal9}{LAz$T1=7n+z<C>%ktme`Ev!6!
z1|K98ZR#G1+Soh&7OGoQ$FN+7p0Xl-tZdG1I7QnA%?GbhPEBPUIX1gyq8M)3j*HK{
zEFOo|Xd8yFzJDnnzl=}*Db8S6ydL3_eRy{PEI<-wJW^BV5Wi@hojVzUZO2L~&3lDM
zHowHMY#B$1M@m4P=-@eu^++j>npywqNG@GY`;8Fz0%g{6!}`<Q$w0n`**@FLz;+zU
zmMh=BBCAR`%)txLsHVrhRKTzAr!acsQKnVsD9RH{SMfq>nr$q8tkWsIb@)B|2+9^e
zWXow|l`d;#hv4H(XXb|8*qRuTDRlx8YwsL0@hvY=Z>=3BP@GGw0FbWT-LfCsE&@0X
zOK+(aH2SbXZ0ERmCC`m&NILt}tLrLDr5dr`Z9snN=X)ti1C!@k(8+0^b#Ke?(59yy
zd^J9smu#AmU^{~i8Wyck7Sq_FE164HFN)&(#<7T5-QKg?m}eu)R%tOzrzdi$K8rpZ
z*i-OX!&a(t?jQ}jYb4)1`P?WUHBvQZSg2G*GGr{+j_?|rQRsmG{j6i<p|9r5ks=Jo
zwczO@vl>BZHRf~#o?<YHzr=zbZ<=*r1>Ln!oetAViGe8;tNGY-03}?kE4dAGxkXUN
zo_Z@iUQk1}Z=9ebkVcmr40~p>P^Fg_V=BUTN{D0|#76liK3P{`Fe!!BMj_`NY)kH)
zYI;LJnroJvA{<t6IDOa1A`ZdtfP?MqJK7Q30fA1(56ak(KvNBi5i>_vbH5wrTj`08
zI;1?RsP%h>*{itDDFte_F3Y=kMlltmTQbtk>}5BMLq8chE5(u}5jBV+nGL!D%4FJ9
zB)OR_lg^_dAsC`L*z?gpU0nQ64Z>=m0%bdl_$q<%<-m@r)GBKNZ9MF$^Ne+l4*3Rm
z=YT>x{_b_iwli-EhO{uILUfP|c6sHB>x~3OA%!Z}6jYC^T-wUQp?!0b+lWm}l@q^!
zDn+$AV6_oG{gMT}n~Nldd0D-6D$UFtX2~>54l}2fwcO19Zev`|U;^AVli0kO2QF__
zS7V0lsB2~<HE6RO8_T7|xY|HlB2bA9IZ9W7D)HvF)uzm%;@CiHR;CcNESE@RVhdM>
zTBUZ94ltLv88pQ_gN{dw1FuuddZI<_)~{u;A*ZIFNa^AAa*%VF!5w6;nj>1Ctpv8L
zu-FW)Bqk!K4LF<71Ds9z0OnFV>10>C=#+HNrMY>F7|@%Pl4Sr@Cdk{x467X~=?!#c
z#xO2cC{Wi;V)%-9Q@Q5VF5JH`t~*4KwP~g|8Wj^sE*<pEX;$@&&)fqJ9C|9y?aY5}
z)?+XLUpf`hE-Z$OiAZHhAC*qWF}zs0Y?8s6$d-_aBv!9yr5&#nTRI^DtWD_1U%6T)
zyk@aoH3?_2S+1QLZBFPZyVAAHLwAz7S|-ZZWOubpnPr{9C{&iy`5;%9hu37|z=cOv
zl{tj>+SzIBTxqow>j;;J^;xBhx{_>-deuy0bc!X*x>_c?;jbTh0orK1<rYn|nE;IG
zw>;g-QcTVzUPCAuxP4U%p%Al4%2wr3vuY*_gtOVs3+Zu1b+quRt-Xu%a9KA?rtLR|
zWwD2SiI_f>NyHrnaA0TW`ZHtaqulmb!TYmulM01Lj)NCt2nq(qw{|&MV>=RH&yvgg
zV5`QsLK17Nkb&>9%P_Vt6079%6VQpx5W8v3x&h{ed!r1iJ6mggZLNm<lwultYNMDn
z)Pw@B=N#zm+b6qiVY|ZR`ByQ<CMn?lg5HNyPlvHpKkPZIzR|MnL@WKf*vLcPm$i9E
z)~m6hA&{<<n-=%&wbiQB4$z?#kb%42fHCdyifbm6%j>O^r=av%l;ZaF7}ZJfP^wr5
z%9T}gj|Ywm*BUuQs0S0+`!MCx?HO=Kl`!8q^u=p^mu)%o3+8sK=cc95M~UW6E_Ot|
z<YFs=D-M)eyAqG1JKA5jHJmb3#}dh=Bh%tr<ft7sQd=CQD#x(Xn+f|u67JcLrhd2u
zYs=3kHVsBJbt(p>(45Choh{7^A8B^(t)bE}RJz{)`M)j2CGwpzpXO50s5MyJR|?oj
zCHewcWo@ZT$?QL$k=1Qm9^3VrauY#o*LiLc6M0uLf20F$K7HCDhTrKE1*XXDlA$^{
zATKx<lwa*N(Lhx|=_f#CMzM8<w;mVt{8O}ndNPW2^t3>Hl7SudZl-@S0+&w0nwQsB
zAuGr=^r&foodzGvQB{7tY}riwEn6W=#_%VNBmHus5-M}0kVdYHd=s)LF{ep4!;84t
zlqDP#G1VB<(tPEX^VZOCpIPSbce%$4PHwKHUo7d_Clz&<=2{3%<asQEE0o|LHhd1i
zx6^q>C~12CSH8|vQ-4HRUFwZPl>RrJihhF8;J!*!SBf93#A*tZ06BXd!n1v&<#v^!
z9PXAKE<F1!o?Pm+9Hc;d&%g}R&ABJtn3)YQQAS~1hMwo=d^EE=!#T|ir@aA{{stoV
z&?uM9dGp9FoH69e?F{&U!p+fk`0hrsMBZf{P+mCGopAaj(Vfiqvr%eL%I8Bt1aCK}
zII!ZPBiegmNvX;Sw1w}30PruuulII7Fn*>!Cm_@)2g0;AFbcaP1$C=qEDGEDsvF6J
zN7Y-*-TNl4Y@^h!dzqBZdx)G-RB|17iigfD8h}^@dq!w0)%a*gjU@Wvke1}&y7TJ9
zD#SPYAqt!7Yiu2LRspg*37m^kqxK1^Fr}uUTv%i;n?8D(V|}zYZj<-*POsfMUH7tC
z%u_#H9suX9)oQ~(Gi-y+?$Ix!ZNn8!yYtF-oXOMMKt{04uIgsEWlfy&pju}o{4)KK
zRqGhqOToY_7369>W?+*K%Io7|c!q7JChN%H5!wb81MvWF5XghPiW)i%F2KE}yFsV+
z3@UG62d9>XN%7}JY6YIr^gi#(4+P)beQxT0?|srj@1*MY);nHXW-93X>PV?-UA(x3
zVp%a`-8fS%S7$Q>U9~R)HewSCa&+N0KADY4;<a4rww#JdG-fLat?noSU!qW@&DIh@
zXdf0%Dx)8s<q{&rOq_P@$f8B`ycuKXnuy>R%c%^JQT9f!ES=MzWjzhhe&~rQ2wS)C
zxRuQqKcCL1JJf-%_fFIg?#e^Sy~E-Rr{Zx}AQ1lqcsx<jSjog0636Om7lXGNR~$Jh
zH<Tc*f<j>yj^VxtgnDBxiA7uc+zq0<GY33JBfa2{F5%p#9Ca_az)eSV^^6g|5RQU_
zb|e7iSOd_H9nff<q0%5^^M_(&MyZvS<F^XNOWo02eYH;Q%7X`X%fdmb)tNq21`CjS
zz~H<<tXKB7NVWTK1EYGlp4W_oevLNp3t8*OcKT>8=&W`CPd{q39NfO#$-gY}UdCrP
z=HKi0q52RG(RX09BOf93MBi^aA`+DM=Z!%E@(GF_-3k!u$8j%uMdkW%9Y{OrDPM3e
z*$LOjA1TH}pg#qJ+L07U-`|t&W9<lXW+Vosdt!Y?0F@qC=XG{NygM&~(w25*W>&7S
zmE}c5Bh6_4bPpAsJJ_cWIh>RpmBJnCm0bG6w%K<I4;A3uEj`*TI&_Emaqdh*fa(pP
zWx|4x{_uajCqGLp*%aN;`0VAw5bCsr_{)8U&P@ES5*MCFOB)Cq47zq-?Z|(<1YS3&
zRz1)zoqhn$QrS_LxNNFfGW|JRl0n2g6a8?|$vE$*uUKymsymjOPHJkh0_rJ;SbHga
z1f!wFgf|%G!Z_q+&l7T%XP&lg&=Nl#tvayVsT26U`1FA+gI3~ro+w#0#85yR7;d>G
z>9I84Y|M8%-WhWSgOPvgk;4A&p)n34=U#2XOl;O9<2SNeBv9?%TV32y{pUg1s-O^q
z&{<W#ldeLK1Cd~z@MUB?HTt4KamGnrNd(Lkgi$`*($jefFpat4f>w(tD#%jeN>985
zT4ydW$pYavM*{I}mez_b7iGSvdo&E|p}&^;@eTVwm00pW3T+TzAfSn#P5pnT#MBI(
zt^ech`Y&sjsfUY-skx=Ii<2kW|GV-tC9mXU@9}@sTB#zx;xH4EucnNMNFWF=Dkz=D
z1r%(QP*sQ;Sj4WYVx=sKWSnlPe{3M(K)lF?`e=HWW3b}=ZuhzSJz>2)5Cm6DBd&3N
z!bD@^oiqny$lzvf<4;hfnkt4WH**<AX$`&NS(G>ufOp*VmPM7@Zuj()mAKaueO)2D
zD$?<wk;SxfJ?*kMQ+Kqz%V0?QplQ{`_F<sYE(f4Lb0RW!fhr-0dJ*cM;?l(QnNNgk
ztI$swciYQ;o8-_Ec>`Y^Wvek!b{MvR_%B*SqZ2FEyaw;kL)?-ythgQ&TJv8dCNdO$
z$dwDU?uA#+t@1^p`O$vN`W+ZO%FSmU$xZo%_$1~3eCX%qdx@t%mj4BGARxK_-9!Ix
z2LJzn{{Lh7{|`q}^G6u}Uw>V%mbV9<I{Lre8)J@U)n|+4h8D|ooM{UArp8!$I~i*f
z9Mr6m)Q~YHtIQh(j$|9Rw9U=7MjbVpiVwPq07jw050U8GGGd*fY9U~7wAAu|r?=1g
z{QSqA&0Q1D%roWRpQg_DxxD9j<M(`boBXf)-U>ik;r@{u0e?XRh)EFtJU0^Lqd5X;
zJ|3!q2+)fR_+bhRpI3i6EX2+q;e7wr;S>m;?|uMBUO$Wm=_4zG`gJy}DfXj*kB5mB
zJbD9g#eDGhk(8tCyBtaFP#l^A?4du}B1}IJ2>WP@;P#Ih?U5a-VcR<&e37fUhE=|V
zMx1u$!g)LT!nrqd?Y@u%oqiU6LlgSZ958TNIfTOs9zWlH?aXt(A_Kcen`|(G;h{Uk
z6jo@)(WX-g%Okp8R~243h6S|&d&b`=Vp)ysGmKwhKk=?QsBxfl4INnKxD`o9<22!@
zyFgU3%Q&lw#f_sIq0O7NFu!6Y790AP7gjGi_-2=d|8<f>c%%LHb47#~-b^6X*$^Zf
zcOg%KqWOLk8&PT}kI@>kSbHJfq!O%!c+Hm5K9^vx!pX*3`*hlvCYFyjeY{ON4u%ga
zgd2sM6WTOi(Y!He;It$T4HkmqMG@_d+9Q9Luqf}4hn-5qZ>oDbe=+Mbqj5`=Sz~NQ
zTE~31PmlFep{<Wc;-1ec(T=6c9d=v_g(BxJntE)Jfop|j4@#MGxG>hTb>x$>|N3gh
zQMhO-v{FXMPU9%{D9e_jrk@6eOKqk}y`|&1W?CgSiS<h@BUvuo{5H1WZWhWIj4Xj3
zgmVg_PKP`?%GKbYpLM-(pgUg4^3-i_Y)T5>DN+zcKv0RuhMU=uf$2zb(iwmX?lAqO
z6zQ0A3=#*6MK1T|<cp0{W2CnFwSzZT?H9MsXJi{Z#WXy{O(lIsz@0>t`L=roT<uT_
z3i$sRd&lQYqi<WgJL#BDY}>YN+qUhF?I*Tv+qP|X%#PD}v+KO4&e{9Jso#HaSKYPN
zoNLTEu2GKRbS<ac*+MnQtu#4peo_nzdZ1;Hl6hT~SpR90`lG@jJ<<zw9VgntQE5A0
z1TH(;%|>#Xw7s}|Yl{pltorGTK18~Y7xwhMLB#d~S0PaBa|-wRF+kIpMSE<-9o>+p
zXIPFl9|Bdcgu>QrO?8kpsSe5*wpjT^5e!#Fh}NkqH$KVXCm+RLDw1CaGSc<1M<^T2
z%1AI4)LogzauKykT7hA$oz?;CqUiIJWyk8&xxe~=Z=3!AsE0zIFp}JXAynL+;sg&=
z=`F@`ItmVJ`GhL6lB<*$RNNs{$B>+Mc{}FN+3HJ-KRGE+NGa(@3PSH7Gc2z8>J^9|
z{XW7AG5TAT|0xMi96C+2G&$K|$PE=3CUmJW-M(lS$-cZty7e72#2;0sJ`_!Bl7ir4
zi6A^VSVc716{b+<22+pc3bEf4oO%^)JAN638T(hNNoDHrAxVhre`k^Y1+J2G%|Kv!
zlXeBLgLRSa+ja?4671K$WCr&hvK|;FM7o5qDT>VkFu%1TQk{rtSkp#ucJ=(!{MSnz
zKG>eu6OwI4n9gO)iBsn|a{*#(?N(DyE7270h};@-qfn#?B8r@Wj^v%mP%*9zpRR?a
zY$z@j<|6}zh|$T_l@+C>nVm@8s=snA1}W;Uq3c{THW9T0M`&@6%WyB7O%7{aFWF8s
zB_hJG%RT8dRAx5C)^NBkm|9lU!y7_A`!0?y5f$@%G1mZU&0kuQ8jMw478-N(ZPL0P
z;u;d?GB7B)vypM8))s|}OOi7%YCCX%R~7NeIjdtI7(ul+8T(lE89^b#yi4Una8_UH
zsv)ztyU59KCyRKxkQWt~<-0%U8NbWuilB0uS&q!)e(tAg)6nU^u%~;rLKJP*tgoLc
z<c_OXh;oKx9YjQ%wi)<!&rH!yXb6=#uI*KKZ>!fgCiL9T$z>wJVSk#fTNPjLwl>x`
zCr9bq5{@O>CYwrSp|O#VZ%uHYZB--`n!c^LreB$p%NG{F)<;0tE}#A|S!i1t$LeAU
z<5A9hea7i9;GW75dQxK9EIbKHn|M@d*5nQoMEe(reo_DP$2L@#X(6Y8yk{Gto0k26
znbz-cy<QM<5-;Z}r(ac0u%5icb+|#_i+vm$m0T6_Rjr#V+)_BCPn!<Dt`WwzeV#Zo
zjF2LyR5S8Ij~tc~1H90b2zcbUIw*)d0~9;LOuBgLj)F)b0Y38oBvrmI&QTzL;DP@i
z0B;Qm8pIY}YgjCOJLZ|HWUDwwZ1K<fdx}duB-IV!CLyF#k6?g@m}3pxHU#eFt;i81
zrJXov46cUENc7;$weZhQeCajNG82S?blzZiH>B-LfKESN$bcN-2;LxrH;#HExdW~@
zQrMbuTL^Cqw`EV%46f;HUV_Jypa|^bTyT^HtBio1AoCW<1+>q%3vf>McXb;E-Nsp;
zS`#6DSRKyQBS`@gM19Ot7LK0HD5-z@c~K*ZZ5yg?ThdltkQ?cSWcFnV5WhGZZH-I;
zywo4>XDZa1B>sp$6s+LXHi|?0Yf}_jRv*MB$xp{Oj8AUK8M}qVJ2lEHFDUqifJ+_b
zSW{fpLk+yTxcsV8t_V~%EVbA^Y=<;Ghv<VYp6EiBy+;mwu~!VWza~a|-8|qpEGVg^
zAcntgI#VHrt>Sni9$xpP3j*IK%-*a}_racMa4tF9)*mR0>GLwW!<#QjU6cf<E>)4g
zQj*+&V}A+55jFeNp{wsDey6V(9$b(~zcUKVD~RFkmvJk|8uEujuFu@*{A3fHgCS!Q
zJ|U+PV6Kmv7C4)b3EVS1xX#l&5r|e&FhsG&*r*g!9Cko%St!T=u(9GU7v3@>lX|Pg
zE+J7po_v1DYPwadEh1LS8g1`Kbe0Cb>fkY5x>gz4BVMrAXLpC_UfM$Mxyu+(Mbuz5
zmpHlwHsW!?Dgl$Seo4!PX_B=msX!GR7_=ok<D)>XJ=k-_n|d08-kdC^*b^AdxG0i>
zN#G8P>muQE7^x+IKR~tz#-w9OmT03D7n=)PO0Q=7CJFfb<6z4mG!UnRkC*!}p8|b>
ze4)>W$Xn0}!UnjXd?Ebzb_!_i)~gEfo%#Kp`z`dJwNwA6Q~e)3?Ee}sRI9(cW2*!H
zt+;hldjdJ!7KJ1NWflmy>z6pdpqcU`wn^kZC2ZMADOs!3tUYZ#)pe_JN^gX|ovFnN
z7C^SH{H_S#iX2qf6NKM^{lxz5bPdU5qL8i8S=M%^`{~RS`|i|M=hwg4-7ly;h8pqs
zq1D1JA7L>ulx~U1F_naf0@RdH%XV|tsj+RlL7Eemqa!~0gIMTRxT~>B5229Kd_+<5
z4>pwgyo0XLVG6pv@$U<F5i2L*)m(JZndr#}g?=xSst0vAs*cr%I})R(aKE$Q&KpZ_
z;kE(8Q*^KrtA~CNUt}Iadf58D?;WAg1D78O(&wc<FjlgVfqidgcC*Lcvy|8UE)c|b
z%#p^Pz#IiyO;GURb`44jzEh7)b4+%ZJrl>RQFBt7(!x1vCtmp$ZnOyn2TCNm%Wy_x
z9Md}heepIXD^qp`+jDjTMV7kV%AwEI$9f*WwZX0bnNDF_t#<=E;~FQ26)oCizSVgg
zC}wCjG^R|$WN3~%Nbl9uu0cPKFLw+}o|$#ayE$HBH(Brdmr)8AaH~0~5B4R{_3pOc
z3fqQhwZUe?y<*|WXY2WGHXKba!>o#LhK3(qU1Mab$*g5`NQ#{~okUaGHV1i=A$A|R
zwNcbusC<ETQxUax8gZVTq;?$1tsnQd3Y9|H>Qro;%Hq5TOshE*?oWZ*WjGyE+l!23
z*FgOBwpU<z?7Qg-E$EXT8fwo=q6N+l-<iLy$<BKu)4BiaE;$GFu{x}$z95(Wt#p7j
zF;Z;fuhfu*o<Q+2rHA9-jGb2~^ckR$r*v;$wl&YddxS16G!=79GgNsB-`%pbdDqS6
z3vxUz3P!6pomy6ee2rG-qK&q^^)#gfX&@t(e$Nlrn4|hI>LoEYcdwM|yN#x9>r7cT
z7UfQ@=fOR_O$)qMWFL{4El$^~XChW3W4rRIuV_PnQ##A^h_Xxxmz`RF<zXdt(HSB|
zwZHIyNVTj8POGh0bvmAP&>Wq;g8?(n#8JZ)|K5bI6;w?zw`J#KQ7cCQ(*?C-OpV~I
z!DkMM&|uVu_>1FLexT0f7qd(8r@MGIk)a02!)Xv+xv=S@c5n@yBr+pMF0ug!O=#E%
ztMZ{{bjOW)bN+3#%-fr*5Y7?(rgu@dlsH+fho8L=|04<h(A4R!I-uwJ7uj2XVbesR
zs$*#EEij5iSKo=7{8n{puVjRSPhT9?RhuLgBxCq%q}peOK{Q?s-KJGTLs!sO-7%(|
z9U~=3f)P}hX-t$jgg3DS6^IWky^$=it=;^C^0+H^U+$ql%A|(UR7<w0;kCl_$*O#;
zwoaxUV8|Vkhkp}}uv#PKnrr-t{n~YGl4f%E_D_2^QF?gNtl%}d#K*kL@W!r|H^3p4
z%icpShS^m6l8@GHZX>f@nR;z^?C_1!X;DCXP*+KYgl*+dMBvP=;l>PFdpj5yPAw;Q
zuocGj*%6lHTZ=uf6RU<jrqB7Nb}>E8KcVI!Vi|T$u(c55tn5zFqS=tLm7;1A0b&)n
zAZC`d3duC^k0ml0A6{LV9}KR{Y5}HpN0;Q)aR_pxfR5EGKM$-VW;Kj2Gd2>iXLf<6
z862CxRi5qvGdsGOkE3vl?!0OSBy|`o{k+R5fk5wy&VPNeba)@{FSLu%*$wGVD9JG?
z&t&Un`2?uZ@A-s*^oNk<Jp<{wU6ZxPR7uob4{ejy)(n|>YId4BqLRg^v>%|ee{nPf
z7bRxBE%@4^Q>542QQfx$F4j@Ya5tEUvSM6BJT=8OA?7mcR7mTJNf5g>H-Zy0Z1M9y
z`ykeKg53ew-2pR=$6)R?bboaFV-ZsSXyKO_<OLZFuj2`U$t`@xNRCqC3<xJ-3_{8&
z+a)F*N*II89>9WwyrS{l3=rTIfrAX(5hyTm3Gx<&>stvgEs4nXp|R|W<VG6$9mHG?
zl!-(MW&2Dk%V6MsRH9^47PLi?AIj4l0*N7NjZZ}{t3w|sh4RPfLOwm&8LJIu!%*HI
z#eZH3lzenZAG~1dd8O8~NB1TB8~*e~zux;xW1@UDWc(58n*}Sc`eOkIT~&@mlO5o}
z;h#L97fW6y{liMZ?ttN^n&CxajEi#m?lXUS@9CHM4w3$%37_<dIYk2T!$XpHU?N@m
z4A9fwIsImUSf(WY^|wN@h&fOAj0Nh!C!xDF;zRo}(UXd#FI#YXCr+b4B=VhjDwWMm
zk#V1L7oY6r>>dGOcHe`^LkQK#D7C|gw1debMURxc_IS;YN}~@E+C%fn=tJKLNq;io
zk;<+syD1`Iex+1Jsg@MF)!)Ru04^2!yEmrlULlp*g~8Rd)LMu#qkIHZ6(DM&SfxZ{
z-Wi!3AgZN=<#k-a%5k(zOlW?mF))aieNGx3S)>hxMa0p>+Tagd4R56F2La&FtF6RD
zXK<D3Em4QGcP948b<0~;fxP^Fj+H_>86FER_{rHYt8f%lS-FkgV~{sl3cgkBe+o|x
ziYb<HP(Oa$e`62+L#KzbgQ>Blp^fl2Io0|9uhSa;m2-&s)oatxm@Z@4Ns_OvRfKM{
zz_J+yBB`Be2Q-@`E8i|ERWf%c&xFd%o}RKHC=P@M0a5!^>lL-2B@f*uTaKne6X_AP
z;ze!MO|SAx`eSFxtWo_O?d3C({pRD}?q_$aeAn%*;}2>Kk*dDBAKh5sGCM^g(R*5e
zRl_E5Z-?&iZVz2JyS^9@z1h_rf`jjv`=MzMV7RsewY}XT_Rdu1CfF8X0jT@^2`$JI
zlt*Q_7i^~kI{zmdUt0pOSG$jw=)@pxu&W#VrjOh6f$rxG8Y9BHXcxrIvGq+QnbHlq
zVi-dB3jrU=!C+e|^Bn+yKkQ3~vu6#V`;rC|CgOvWi+C#u$Q>xHtVhWOIPeD%uQew0
z<1Rg>&4xeM;yi*iYsk!NV`fz@2V2z?^X`OJ!zTODRxrpQR6rGTv!bXy>T%+zR}BI<
zdsJ6c`DSF)GC(W(>!yfk$dAYT53m))%Ob`~a8`|a2sz8C=T$`RLK3A!k`MjX6<`N!
zQwry|%BLIX5GYFuu)DS7CuB;dWVJjRYe-SIRZv0B9>5`v0qdzu%WbvNaxE#K8O10P
zkJi;}0c}~~uOE)<3|j3&H6#Hp$ZV;OPQqhL%OX{*jdC*4Oj8pfOON5T;&Cs{BK%iK
zvNeF2+WzL$S<0Zk2%F?X5Ih5pUR#k>FY+h(F>){@DACrJgTWZjgK<S!=?x{UNIcb)
zcawts!(btwEpdgB0VEfw&I*0i1I18PWojBJ{anEZ;fC5p`b^vpkufai^+ZmamD8~l
zh{H$=#~QH>W@2-B1=gu4s~d~NS~=_~_9_O8%rL{(_ROSM>rX%pOK7_2)A8j=JFR29
z<M!meZBxhE<1n*z#NGhNx)jdQ=7;F!jGY{iLs^ml-+VqXTAA{GZ8=JMuHF@>C=*tK
zFka&7r8SmByc+M2G6sT&avSeI3oQY|Arv9mm*!sxyG+QM5<i_JXVpIYUhsIoV0?!|
za;WG%o^%$};)o7!<)ku?>E<at7o#xu=5bU}pG22gkwPI2BDcCwOd60($4Pon!UL<+
z%<0%AV?P^4V$Tjv<Em(xpQnWzVcyV+v7qE7a;DI3KRdN7!>ryEtlE~<&mMvdKk8d?
zvUzYi>YPlwmliyG*cL2a7uzL1Q1}w)PcQX?gh%|GaBG@{c-t0SyH|N+fRxj>ijk6o
zN(SU`b|m46sD*h64Yohf2I-;Q=YFa2zmfES-c7v)%}Tlz&Pu!`{zxl0s5)wlNN};A
zP#JmjDJs21hT<Ro4kZnVol?-IYEMW<KGH$*8+m=<#4AMMYp<1EJyyg!K=KRFR}zqX
zLCs0|COCu|9!E*GBl$%ELFA-gaQ{m)(YfU(BKVd@wEA*MKfr#Y-m*jeKK=METaYiM
z!M`07+D<ZUyUes%r*T?531n*Kw%T(x;>*vRWX_V1=i3UTU)bdoGh{02PlC~;MpPGT
z;X;;!kL$YzE-0mNXQf^ca;P2hn4r7;Pf0(}%}J_;Og*t~pb)44PIc%Q8~Q0yqcGBT
zCsYoDPKijW*MU_-d+V&O^EX?68&J<4?kB;vMajrPl2%L_GW;DwNtFq-&Psq@S*;;m
z`32%1on)QpuHY>#HzgDZzc$_9y#sx9w>vxEJ62ll^A+78G~KBjrI1h0EJRk2SGFy^
zWEgEtH{_=31~wJ`#SnY1r)>;M;pb;bjj27xhuaO%ba<;^o#PF6?159T%V}}MGbCfw
z0v2Ve$%P1zHS=b$Eht=3UE*C})jfhj?6$Oc78Q6z-Fb|f-5WrvcrrsvYJMKSxb9%y
z2SOQic%b{Aq@_cb*~Ax8HkG|qeg0b2_-Cg?`crnrNXb1%@N&|@_sqYbbu1i5{!XH2
zZqp&9?oi8yd7NTDNix29Cvl`AV(oReO$zgvjMlIq+eE2A^l#^q6(y)0mBA(Hun*j-
z>eF4qUpQ|L+kr}EcdOKlagd^3o@_Z22)r23hWTn8bFH2pU@EP3d~IG84Nkx=8+n)C
zG3Sl~?L38`+1PD4ye{?x>H0No%T&@1`>|}=Q2r*~@mPFsV|S&kJHy;&Pcf~;eTCS&
zLSpY!#zgBlxSt?V&edZ-=aaQgY+dW)WC=rV5}rI|K3Mj&-QQyMv91!fd=@)&y&}}@
z*lp8e(};26YB1G<FFb-CrZgaWkj$$y)gWbhrMV#|yG}z=|KZsJk{f+Kd53_$!V6F&
z42O_U?5fF5o{{DrjrR&m>NBnWGE2Cz$Y7Wi;h_$N`Uk_*i8|)Fs!}f9@{*n)F}^RR
zZzWcoKW>Q}SWVH2rL0~oniZ)xs8)SO&$32Pb5j&NH&T;4$%c347Agco@7+%hJfY(Q
zmWl-jypTQy@Kr?>?z$_2&G{oVguFO4>xKA*v=!da2Q4bz<WY51PBqSpELYWMqma^u
znf5NxTS!Y8dq>*4xjRhMZ)mz$9B+o^&&-2qIX8!o@t|nd_HBq;AH3}cn6sM6eC<qW
z_nlw!;)<(Ex>!dE#$~&Xzt9~Dt~GxhNg6+~X-~muSHv2^SI^iwnt}C(f0OHmsWAkn
zPP?Np38$<bJ+tuCu^vY2_9)QQ;`K)46E@HQ>S*UsQxmvI%9c6}_|DA094Yza(eV)n
zODw(2!NC;C_GvppV^F~cN%rrwQjfVH&L#a|bwkFMg!xuwdx&OkKtEmD(q>SlBeKb*
zFh-cglb?tqEHrP_+u1%+N$Mbh+#wcE5Mh>JAZLEP7>OoK=sFgL4YLfHaj<1uO7S0W
z-X|@-#yE~Oxj(f-Ty2o9KR4>|K<?^(`n|a$4#+euTCp0knLObVncE8c<CFrJ>!8%<
zxA?qx*U04#@e*sIig<&tC2H_g0Q{7RO<^Wv^dwnu{>z(zh|gN*e+%NeT&ZPii*?!l
zu@#st9`j~yJkd1cYFceFEqO^AqZTp7yDStqPK6NcEraM0?^#tgW&+k09HEiK-2#Nl
zhh|pqv#oPPmpMWTsej9&A-4P@>T|xO!yrdKSs>oLeNmAx{|q+;d67%(6WkpcCMQUQ
zVB?9M82rc-Gge=OT4HOrTx!B+jkzYhK4J0MGR8?6ojOT8P+Qt#i@e`djN%>(yC(A9
zvempZ%47o1`<^D+$NL08GoJ&<qt>|q`(^p93pfEjLm?t;%~Or{;HI$83yd`0h?5A~
zo)%2qI+DLwq>D(n&Mr#yAmEJ;xxVAOqGzigKK?IR=D)!B#ORFPHQ!!dOQs(`l>ReW
zM#R(3(ALse!O+RY(#6u=PR!EA^#2}l{STJJuoje$%5wYHZ=lJw$w@<qE23%q56$!+
zAZ$OSBm@f9B|tEA4yLDMnHbWcn2}eMtdk?rv{S99t2YYJY?p~9NEoa0suzgVE^2DM
zJ#BMqD{E)f-?cn<+n;Arwt^-1p1p4~oo6|peRf};Zn#fRjz(vBU<?4C+H4?m8M%=s
z<^`|qxVkSPe?FC1dK-*k{Aj&;0o{uQ&o@YZ)`K^f@ps-(UxuR`UGyhk$}GPTr}1%L
zkpE@HeCP4H`S^b|9NNFwL+}lKaooE59QE<v_7=Q;7b;E$Q2b5DPy)a@Bap@@o=~v_
z7yZx|!gQZFqRMTGa9oaUq6L_Z!z0UNT*O#_nJIf{(ItX>Nip)yHnC4Gi!x1uEo;VQ
z5ouAfVUdyM!f<wEB4pW-O_zu1cCGtIfcu|KX_Q5Y$G+1XO-I_PMN77mWvqyEI#dR^
zVvfiw8&zFlBJ^+pQAFewOYRnkYD6~LoDc+U!8*tj(mV|seaf0exk#$iBNQ5O^2WUX
z?q+@Lgh*p_m<$SSDJsZ6U_tAG*fE<HX$LA>#rHwdB2LjI%V>tLDyHgxJqFoE4Q#Sv
ztV$?bk#l8HF7S$2kz6tei^CdL0g7B)H3hIqxsVFhK>u1&2+@ZMTV^0E?6_tUl5=HP
zx`uz>q+7qNfp2N8%x4<lxS8=^ID1&4&X*9Kn~fS(w^wVSyuJ>fy{UN$A?~8+oL|LL
zAlfEOW-#x&FnwFA{k5^J8W2u3S7VSSnLQCZ@ykws2pyKet-?r1E4j|9YZK#MbP2H+
zBQ>~H?92%ExnzdKehCBW739hkOIRxP;7nl~H<BIW(#f4lmvH&QPJy>SUR}JWYRx2@
z{tudcvRbbe;PO&QsGzmVKy+ObjoCi1^G=e6r6OMnP3F^L?CRE5dE1cnNEN+fQhP8V
z{vg8QrAe2J{AxhO?Tx6T%4$ZC$#pxS=2`+LvqgRik}9yO4>#eKBAwCALTNyI0@K}G
zeMq3pmT7sdsK&MNJVFI*K=%U@`kUpT>LdtPOdq<yYXGy2XS`|sq5mB`0Nn%zbs^r;
zy4EyUD1!34ZhNnFv#>)s-fDANL%Z3~Rr>gx_4uMPQ|o*ZJEJ0_;@nD`ElIsrpVr55
zj_=n6({~uVrr=sqYK|#F^)JDs2)uiCL?-G$J=K>?`n(l4_m+x$GQ%}3pIeH5a8hIB
zBO@keMT4VM6|&;cpxeGNv$bSE>M{inb%6WjK+|8#fi9NyW0He;PQs+<B=><N6>xXA
z;k2!%FJuWwV=UrGbC}I+F~~57nKUHUMh~@{+cI6PsU!>J{dpGJ10Ns<7U<;tOORp8
zc|pD<9=Mj(ww*=@ZIK(8u$KFi!`4OA1WAWfLFIb%=5lz6g`poK3^u!knflHg+0}8W
zw`~a0yr_0cZtPzK%d2+KC$sLjtH{?e@ZsI+k`Yi|uRE$R@;~;_DekW2Kj{q%Bnysx
zCi)&`k?@*>k=Q8!H@w^V@$tV!r!uE#4F5RHpa%3Aknl0D<v(DBaqhplT8FxN^XAn+
zg>s?RZ;QZ@usQ>v7iOJkoCF!g|8Pt?hla}0AXhciED01XjB@K4fGnc9tx_kz%TnO-
z^{T^0gAlIctkJieb<j!d-nJz^z#K-DR)!GIiUmVSvgVlC<IGj_xQBEA<GE#c%}q*B
zFSX&2bcyGibt<Hnh~`~&BAn)@{xa<o#g5zdxYfWLg8@!;igk4>e|7owd#v9E5J+x-
zO3MQf7r^Cc&C(Uk8tt02Gb07#jn#SZSaD`(DJcn?V39?{fanhpsQoFpq!GO6;k@L>
z$C;@2^{g<`i9gUH0&LSq?NQJ$J+WY7`mf`~QB53fB{Dl+1KBWFja#F!MUaIwM)c-8
z!CEKUFyBp-ZmVRfj}q!ZxlQjAsfYvZVv&TQCkFMfU@gq>5tM$}s}7;6Ezi_ZFXV^Y
z6(tCACwKXz(qBUVBBpzS={<J^1Eu?!fx~#xdHzZDo?EHBUeGaMlg^bW$GQM`g!aI7
z<5QNiy(EAtuT>qurSa@v;AVP)JaLOS5xYDYqEY5{l{f-g!n~56Cs7IrMs%2LND)R1
zFo~9F7uYh{mf~i`m~2RPnrz7C9(VxNKvEs?uM++&GU>|a?sr^#r{rFP^xwcS`KCNM
z$;dM3;z3h#hb%V&M~f1&%uU{KJlEb$JJO%(zb0K^wy73REqi5JBTirr{-T?9WMA(I
z{mwhBIzOqS0O=R&-f;oj)Cn!^IEY}lv{Ln~o<vqkhOAUIRaWS(D%WthYhZJ4)3EQq
zD&UX|?2;<sKBJrWiE!^N89Ro7q%pLO8IU@q^_<>5N<G87Jk0ZonOr@AX7C#X@1Z%A
z+ajG;H|a=)AM=1JC3&lm(Jy>A>Bz;4|8v3Q8w5Y`NWe=nKgi?@ga<&2$G@0(1>=#T
z#E__tCJnJ_s_HIWKC(v2r}>fmL5jUn{^-%Iklqb#e@&6uEeRUb)M;pxcubRtTSR8^
z4TG0{ERqrbLm82iaz-I)dY-RGn8Zhy>$>;48I(`^5QG)_s+WK&9R^eHr(db9U!^J$
zl-TAx7EQItY-2hl-z&3Qd?PuOaEyVQddBIcr*ZI%$<ZSpO_1QW-Y_&SiV?2y3<^Tx
zC0K%f7A){AobD$=Zt4{wH~z@<sZpZ-M>X9~s5WA`HnYNYOiQAG9rCo9hlGZ|bde4}
zMofzsvb>9OQJW(fkKR%oCt@e%4A(2Q?ovl&NYl!M9Ewg#DM*82X?s|abUe%~`>#m9
zswa_%+Ffvgq`Qq(ejk`r$1#q22x#R;5+$$JJ;1`bYAbWXJRNJ%n`P!m6$kId*Ox(O
zb%Yv;-GkHfHR$G+>=drXLWGeaM^OcXQEgrnn3`&@#2O)RDW|E)Mo>wfUeV$~`}|9w
z#^Pmdx@a9~TXN)B;=`_}rOS-9pLfY}0EcujJ`?_jde;<%FrtqcYd(}aF$u};rPOG4
z=(_b2rozXCruIQr0cl?_pa%bykt=4A$xueBeEfdh3<s{HI1M2)Nk14{b{jZ#qFLF~
z<}M@oGuAVvt^G`4pgF_3;mq#f7H19|cz-_~+v6P8eZ$~^c#<inGcVskbclgurLeh9
zDdh9!Sf}W46ix9Hp)@+#(=z)pY-lvNmV1{x5N<;>ypMk1%=>aX;>xpv4`r+0XN$#%
zn3G)08DcR{`5Z<h8aoCyr7E0DwdAc?G_c+x)#|6??6^YL&QHZjf3%gu+doLq*5+$d
z46FRXAC0(-YGFeC@omqCz>o9L(2mfQl8%m)jQfp~lnFC<epOW3zp~9SIzM4)vjfV8
zLPCvn=20?-*94WtIBDo2l#7{f|FQ&?c?4Hc_9{K53s`0%Elg%54`OT)XiY2k1+vHp
znlQp6!&RswC@(%Tn5KAX&zScyb^B^%ko8v{w2FC^Pm1feOuU#zp0RdH5=l@~z;?M{
zV&gf$G`N)cG%VkoFh>@k!|)_{`3{t!#X-jt^DtN9LN_4<=+G8<WUy(Xp-1G9ILj*k
zo@my{gq#gpwYcjMv}H~VtV7ya04TJQA6%e%20h8EgOV|tI2~LT!tA!iZJC#><kORg
zE5a6;f($f5%YX<i$S8|T@K0Gqk=GL?Rxi(GT-dzL)pkMPRz-#?AMd@--*pI+@5Yz>
z_H<J#WW-^o*A3(l4qldUM;DIrVxfH$3T<4}C|m843@ENpvY_fFP*J74#uQ!0g>DI3
za#$<%70fV)u6Y%)$uBV<_%t^yq3yP>l#$fZj%8`%ioPNDvQ)Pgv%8$|X4!TB^%m1V
zhxVEAA*9?+TC*f;_1-@-J}jhxg8dNs+)@;sT8){S7cW0KNMQnrcT$pkn*JfZSU3jj
za^}#$+N~0C%mH{>h49htjKSWCO;`;rB;8O+akOSs4arlRTW<qWZ^;QQmnAy+l>VX`
zlvnQU=i&GjAZcLPX3q6Hmw`we=Qf!;sF}0RBf8352`;mE#PKdTPd_6nu6|I3Q40*P
zqGsg^gdg&tn*)M4uU|#Lc9?HCw#b(SDQ1itv~|)hW>129>^UTAB$<_`9SE7oUZAi)
zmfJr5?qN*ix5o5M2fF=qHvD+oW+?qhda@LtU$8<E$ifFSwYC(I0i3|ryvy_&E@z)I
zQ|dtG!T94Y4zE&OABWc}64)H}fNj3Ef^s*%>aOADexbxYlf>{(A6@`Qa`*4xBoAlr
zUq*`VWIr}DS^nXC*^TgBn?G#PwxMFa@CY=%r1v*^!$3!Q#j6U48`4sEU@4)r6ppcV
zJQX@P%sPS?zY(&ZMxXVs8H0UdtZoI}i8B(;PfNN`&Nq{EAR}*21s3qh5aIM6>B;QJ
z<eCUE)#q+1MIX0dsU-~tV95y5n&rcLmW>Lr7;>%{)3Ox+)DN^+2E$}&Rw~o3Djl(f
zJ6uX`!F&L7Uag1lW;&18L%KB4^p5&$W_F|7Tb<&{AldtHOe8*<1L`DOlg<z?=BD9N
zKFf$Fs~&%S+j6~Wgu0?>{voKKJQ-QRMNyU_EJFG^INd32g`EvqWNo$kJK!)&F=wYw
zJ%^y-a5;CNqoY0<n>h<2=qOG}LGBr?wROh=sH^JL(830$Dk%i9(egnn(`@=Je%8(&
zHX|^2rw4{}Orl~H4q>S6UA%}PO}@Q#dUhLl($IVjkU&9@^1LAEBn{Y+;SI36vN6ju
zEn%GeGpjfo9j@wMZyq3di<d^^!HlLu<BeJ5EY=K}siHU$W2{}$x@^kv&nB!2GjP@_
z9J>(i0mDYf=2V;tT96^+_^EBs|C4LF&}I^}O$ObZFm;sY4lxD#fjh9k4}}@*!FfwD
zDV$xAW=wPJlUU=Cg>GMN#>lQfV<D(`)T6(+`$fc3Er2=dnab7dZ-8$%`eblR%CjMu
z;FPb@R5efRy;9|(6%Ez-aD{N10@dHb22S&4<@5dwW3DCBas_K?QezBJ)$-}|<)}&N
z#%wlnWcLu}PK4#RH>^)Z>b`TS#%N!s^Y{9ic6_*LQAL)gyz%tqpudVX;YP+N>P%QE
zMNfS2h{I%h>Q<{{S2G+Xl}<IW2N_w`-t!AV7gD%#l3mL?qWKssT)5F1a~#bG)tVH^
zu6NwWd>%}73F62eCNXj0;w>*@1BF#D76r`82*^8nWq0c`wkt#`8)k;i7VN=0`L%z-
zYTF4)oNfDIag!N0s(=R36RyD{7SaeGVe9eohqW)**x#?Q-$IzJt#$3!rw-oO(Cerv
zb<9f*ziW-n*T$~%8jvpy7k?-EzEHg1GG7kQ)-z`f2zEt6U>*)eKPZlPLE;|(8ul0)
z4ji$9$SJ~d7w=%F1^B0j8<WC}RiaPNkh%n<Z;5C}p(+l{Y2_ANIYhN@nQV{!e$_76
zb&0hex^nrmp8>iA!|vEb@8EGqJ6)h}=Bc-S!|)d`zF_+L=W6@wYm6MErdWPQ)f4%h
z9IU1weT+0D=33usOOyMRwtopmo8y%-HpkU1q8YI*{-bq!MOdHjjTt3i?0&!F$k}UO
zT#CHeW2j=uj?@4VCz#i9+Nb1!!$t)HNs&a^7BNK;s3&Grb_NZr(BVs}qfA{PjE{dH
z!Z2=KjJj(gpiS!eAj~!#U8V4HjGV=wPCeqC1_Ln>xZgTYU$P;&I}P74fTTS1`|miO
zGkF+^Xs*8zbAh*k!RUfKT!S8=9=7vh<p1C|`R50+nQ^coXm|<t!;t()7_xC50IDRz
z9O;1Gq4U)SLUKS<3P)AQ<#RR>mbpcV>8a-)0?iOVSuwgb&x+Q1m)=TZ1S-GT6L2vq
z%`?^<-l9I+L!LLHi)m+s>=`)+z}@*usb&r(%wI*jR{Jw*e0ZaTtAnPCjLmd!^)B=$
zkFfXPpEv^HJ|!laTAxK1Y-z8}E>mb3%OB1I9@sY+eT}eKjm{w5kbEK0<PeA*!Smgf
zguV#c&9G?^>2qJNYae(gr~0=)ZS~HIupL9~8qB`%|NGKTy|vMU;WuAY;5#}|^gmRG
z{uf{L{~-u{!_Kvl)lvNDe}O0d1W;AOplwn~3W$kbAXSK>hyqm5Dw`)wnSui($>hlV
z-FC0co_+24n?C<Mpaeg8YVkjdH}kra1&bi1m3{tTexB0YdDwBj=6!wthKv513&{Og
z3k>)ncNmRt&ZrS%2K&kw4d9>IXXKwb42<Wc>(PVVgW@?GRrb&VK|fH;uXqE5kfj(O
z(2^PI1nPxA@nbQZk+%~b-Tgpf_*{!F!e^q3C{_(g`lT(TRVFK$mclTokl7#`HQIE@
z17Kx|GPKXN97f4GI&Uk9ekijfOLp{?Ql`U{(rNTe+SK8c3+(3^gcKH)YK*mj-Hh_g
zyHT`FU0}^<psm>m63V_Tf^u$RXEbBFoM=QLJERI>o=QVHm6VPuQI*A%*c@;29cU(W
z*8G#Ttg;vh|Hy>%t=MpK<(IeQNRXZxYEAezUhC=r2Pfos^EV+d>3+eyXn`;dDFby|
zs)vDYt?6~mnA}P#`i?(y8eKBmpko+bI;JTSnoH#6Ek4g)%FN*I6oW|q`j$+@0PN0;
zvH?tQ*7~H(VY7wI6)8=w%z_awmyW!{bH^de#2Xm$*lu((2`J<)fyk1s%qDA!ROGO(
zg*?T_US<||V<^JLe7LE=s3aLr=XuRC99Lz4en_DmbIwtU#|12YbX5Tkr$o3FxNVJG
z&VeLE_@YEAsZPG0$*uWoshM|ZzL!EAX<K#+<ZbPjGUxUctf;G6S!g=Z6fUW;TU99-
zLui7~3Rr?@<Z7Aso(ug`8*-kHQs)%n{be?RtRt~({J;(<V^gva&7kM(8T97@lO5EP
z>@8&t1`l8|OEcjZDgO+gWih4V@1Fw6ma3)Is{uVRjB1sBrkr7Cq@2_{5WS>&I9epV
z#5*RvgxiYf)!U9>b`zc9OeVa(Af}yxO{ATKJGvT45BZVz7yta`;6nWo;Wes5VIPux
zW6az=WlX_f6Q9u8h<7D;?ojYB7@|&h0`ZRgFl4xSvrQW`^Nty~&&ZXm2)q$=rk){n
zrk)Y@r9LM3Lv766eQ!+uJ#kF&xzy7@A#<L#RLHBy(N8l-uWehv6NRp^yN4O`<-=Zn
zxTUux?`+-jqTq0yXStF0_)^1yq;}MajfSoavfk*U#>PPn1qRL1sSAD#rDw*HKQ5jU
znoK9LNSx&`XTY=a6mk@?L_c@0K;7I`G!?g0bN*RqALny)=N9fGiUvt~^F0Q~Y^WV4
zG>sU~w9_hgx2PUdM3u^81!i0(6!}SAH8!j@^S+b#yfxl)frw7X#Q0|LW|7%})ivEV
zberq(W+EHwbIO*pHUkqkacrGkP^m?abzZjSIkmWiMl0=e4p~V#;Wz2<M<$dYPdWtB
z21`sYn|zszT=q8<`M8-dWm}A88)*qv)dcs{C{H6lIx9&zwnQ1L2JouLb6Mw{N7A;x
zvM9E^&?dX3X}R2EI%1yoZ7(aZSVno(Rb*f)Zm>HenqWBmXVsxq<`(&7)8U<+@LM8c
z0QPj{$8kQnQG4UaT$`yJw{fz9T^=o8mrV}>Do+!P{7dt-TJ~~c$uR~(DxXUY%9Ja$
zRzce?DQki<>G6`O0_rk-OUfdbua^{xtE(HlK^=6AsrwVCOL94zQ14D=J(xejU!!l^
zrx&9nr##kX7WuSdEI9Ha?!-KJoov%nf(zBwh8rl&R);@RsAtmbK&oc&?;v}4ROhm+
zFqWu_6;G8gNsZ<o{J8d*L4C4gjb%`s8_f?I%u!rgOEjG6qZ_&PC0__ES+%|9?{HP`
z7hHyzBN#41r-N<-^RYiZe`6%CMS))u7rjfm{TM|goEDdOR$Sl<hr$uU(tU(0IxKYk
z0qv%smIUKwgud71YQR2>Xzx8{jp+$?RAquM`LyUwJn@gT^%x0WB3$CnFW{|&|ABve
z5+G3+1^sbFv?Xj_)_w@j>yM_`?8Yq)Rz#{dx`RGu4R;0+cJD(o1Z!~bb_{V-<BJvZ
zi_;+k<3)RCkiNqHrJTfzdEj&lXH}{BHVzw&fiY)8u-ya_bk%4HO@7P~2#09HBFU9F
zGI$x{j7fy`1R-4#7#h*sD2_fMj<`LZ<H07B7xIuKMW;q^InY(tNMnYfITa)vk#&D1
z)55AVV%oXwcSM=gsB=`%M2r%!KT&>~a<t%!|G(F;`%F8?)Ndcp;`fx}KipxF5H@uA
zuc^lWp_NvvYkQ!WWBQSMr)^Io8bE~=C()HQ5C+rE5y{rk!sQpq3K2j+|2i;nZ@}c@
zZk(C{7s^`*QnS4%w*o_0R*NaMvUH^s)s8D$_9{>1`wnK-^EeAp>wC^!w{aU!fvA`T
ze!S1}@7{gx+`PN*_S}zT{$M+d1K0?#fgr14(2j{ZptMB${Q7ybmY^6;K!P{oC_^(!
z9Bw3(m5mq&Jh&^0=U0srzca)v750$n^3>__6zcL+(s+~L<%=XY<v|@74SpIGei4C`
zkGf(84Bb{Kd=+W%ksdArs)ibkUZXW~0403XZnL7doIZLq`iNq=2>Ve)C(KMU6EBIu
zujN8J^$pz>%p<?W8)c#DYlzR=GH5tD5}KN$-FP&;mdOa)WZ7L$@t}G{7W01n?o3`G
znYvn#tuBqnoxk*R9omy@nv^OZ$+NmRXm|PJKJwTV9HX0(Em^_I%34A?`sheUxvU6B
z4V<dr)Rsn9(sEMoJilCmv-MY;yYbPU)E;SD(SuhXFgOr{y`tl24}8dDU9gPRvPCU-
zt!!@rmE8%*&qz1PHNG$logt!}q%fJrW4$m_5-!*QVyMJ`h#nE=xI$&x@ey@M%cpf$
zT=B4^p)06~j{%da%VS&ARR*MWxCo4@1cYWP;CVNityZtPkpxfw6m7$0#iFT5Tq`{o
z`(_AZa<bz}==v4e)DXSC!*nFLO-~#19d@XWxu>1UF<0Z78)PG)sjB-CvFm<zRxod}
zVXv-}KhMo>tOu;jp(Z-N;jsRNwC%)O*JSFMI!U@ibt?{b$jxC?Ej3_H1AZ~P+cEsx
zfYnZz$7wy<2%gOUkehreqn&ehnRGeRl?{2ub}Y7@Zz5DY?8!<YGBT=!`Hh!AH6r-T
zQe03}nz5{`8b*$M0~7P54()uOq09-5C$V(<d$A|jlv3JkHm|Rvu9aj<$y8QWTaAu0
z13kzw`WV(58q|CI3!ENF**Vb%jn-5<<**03cw~)d%gJE!g$kjiIC_C6=1~)jyEr0Y
zlu1Rd7j7UYFlLS95DZLza21urF)SJ#^|DVoDQ6iUx+lP1O5+1>da^~~mR5%Hg{`M>
zuk$4*@aE7IjK2>7wTlQRYp~J=8?1I;eCSY%8z}`I;pQCY5@Tn+-uYcX{mbFzsrxhg
zGrQ9}Gz|4aLxVQ*xJyHC6b#c|LW9$|WRamM+RCPtIi$*r3wKmmYsR>n(IE09hFgut
zG`-L(^a|^FFD5k{gI$WZfe&x?+|&JOSe)fFtt8!Xu{-2o#X_?~=B|ys>A}%@NYT@<
z7$LHWUTV#Uq9)@ETdBFZ5G?zMQ=$Y%=@vO)S?oilKl3|TWcJp1X6o4_GEjdn721z9
z{)?3^0R-AFXqW0Go8f`YPyTpl?mU0IWT<~o`1NS+Xs$dzUwB$JB41e+`k%i$35osv
zbydaAbpo;0L^|2XrF3A7>hdyDk`$A4+o74al#sDmhpPM36abIh3VuX0A-Ol%rpD&(
ziE;Vbn39FLxS3V=bS5eVyjok-eyPC_Nw(2W^Q+9RWV@ENO9#|5jO!X12!9Gg{EBpg
zPRhk?4UVVc%qgp3QO+1?W<8}-7!YlX8vZySVmtUZcG(nn@zRZTc6?8H(UNe})mCv+
zWQ)&?djCnw-W*?X3y#CW!b~h_D0fwUZdeJ6GPNQYM|fXV^iP56oa>pVVQK!G$UuWu
z`&1E*Q`H$~LmiqH&LE$X#J~zx`j&y6Al8~42GN1F73~xRj&<M4I`-n3O0+faZo)Jt
za#iOj?hH9ptJ19(yTH&2>`aj(djD~RRbiuVTMF2T^!=*K=+tL#%SyHyG7YDxsq^))
zBkuk33s)TK2V7$Szm0~AW3dDJUJg6l>a>s4y)shi#f&i5s4VXF5ta)=>LFcCB)LEB
z%Mbp+wa89N(c=!-kFwIMKOIEpUV0(5#gM-%zlcEIXNlLaEWSCjp3qGClhyrEkarwg
zRPQapM*oP2%pGXRT4`MH&!ZF~(;Ds9F{fh28TsR|hMcGH9o(I@2kQlBbcVWo>kVjp
zem-1qL={#iXdEG7l>q<?%4Kom^I*xLfu_QuEOt<`y1rJ1;f8cR02rN9BmH~a!b;I-
z*E=wpgj<jnJrSGVl;?kxq}o74w~%y4!W!~8+L9qeY53zdcOu8Px`c{8=n~md9BqmG
zrY#H@(He0a2YNyw?ed1ijC|M=eB;}m@=zfh5LmXNXs$;|?7tTO^euY)m{mORblE3;
zyyw~tN}ZRV1#J_F;Xfq!7fbko97YOOq1}u_U8vZKYFo8V!fSU(M^q7lumG{O-Q6ay
zlG=Ios2=9e0&$h}kS}<@=nc`R5$#?N1pLBy;IE6q5)}K#?!rWre)olnz*vLWVaX&X
zJ8Oq<<(1JnnZdhs=_-ld9=K_8nNY7Trr#O;k2-Bob;LiZji%@I67bvkE95rO9djs8
zddgSk$nf%cEX*B4%Rl$(NSFCl{2U6p#+`EQV1Zh%U_H^5Hc%8Vm5$(gu&2E8x28Dl
zD~Q_CzD~QAG$vs@p_{SA5TGiszvC!;S`mGjn>TgngD-%<9YlJ38<)h+b!lrt$^|h@
z?8I|Sw61eYRH!j5xBFUh+mxjP@lsb+>W)DfXC$oa#01R8jt4=1XyVgmPIY_A&UN-p
zT4Q!PgUF6QDp_yFdaG&MXO?J;Z*P6$`uj7Gunxcgl6Ax(BEUUIWx$2V4j=oCY&1AV
z>9euPnWK{%2vpWyU)neGD{ZlG>Y7FK9^>L4hxA==)F>SjL4fB`))+9sDA>ut9goRy
zr5Bg-WJRE-Xj~G%nr4K;-ESj9x{0-b3eht7h6&3gdVKCHXnE*(w+Y_}w`6n~8eLQ_
z-{Rpy;|t5@bZecBxk1gzcPM1xh5o;HI(*am%mWBNe&|yC_`&<1z1GF-oxe@V$}aX!
zhUTUchIS@4|J$Nmt@*vRtM2*ro7&Rc-OXm5S|6Lc<&vX?Mp|psElWi0a*T_Jl*>l4
z-Z;ontdrRCayhnv+lk0IWDjpH6i)#w56)dCAf;$15eFEAh9UtPj3Q(y1KAYPQglqA
z4l?X>Gkt00)=XWC<hL@@<@wNk^Y7n{_aXW3hugv*ZgZBX`u&Sk;J2!%VYf2z;$Pq9
zKIGKeN)NPMg<JZcKFq<thn=D6%3jcN)NgH3qizGg<)BWge{f$ld;BUi!_0&}2)hXT
z{CD~X_q))31SR^gd%uT1e}lf{N20zYw0&sw%Y(e|Gkz)ef13aN&>5z`m3H)$)X_sr
z)u()E4*WvcW2a&}H1f|wEMKE42GR?d{-iP&EhiH2OGe=-Rq_v%5v+(r)Kj4-GD5fX
z>6b^<LdhHcS%GqsTGo^_V5CAyH>m-mg_nm4U9D1&Fbny!a*ZdlPSYh_s?rj!v1k(P
zuU5ZHw#TG&g{qc3_8PlOh!D2w<3jSKz(&!W8LF>`SUy&Ai$ZnUR)qp?epThOm{8lK
z6rA{CM1J;Yq3+6UUBZSeMV#h1;Q-mI9R8v)v6zcUoc4-_N>@b7?Jy3W{8@`7X}aIM
zfcEo0<LYeR(eH!0`D^j;O_Dcb4mddP$8Aa$1_r2=+VooBD<dC?`LW!HKb@ZvoFj*v
z;-xl5sfT#U9tP~-&0&I!dA-SOK~u0(!I#FovoLqE*|KX@D(vX?MqpN|icZ^xF<j)f
zJH1z26suTv&}2;=vKr#C{I(fvt>M|?$!t>cnmxnNCFb{z%Va#^4Y;y8^Quiur3B$r
zp;6D^Ra`Ow#b#gr<RxIuK_G|1+RDh|EGd_GxRFm@0f5cDbmHpyM-nove}h>swfOOv
z3$D~Q*pxoMxiw)i@VuDvW0$v?!n|)35BJ*F1QfI`BUYQ)jK(q~Vp{p&12q|(`Q4Af
zb=IOaZ^*`&$TQRX8eU9YkuSaH9x#O!p>Ugp37OKWtlH2=zZbXGLrgxI(2iOAlU+Xt
z+GZ#NMW<SJl_6+J?=yRmgr=G|>;A$|atXFYdZO(!Oo&*{NDy!I$EA}_8A?upksgv+
z*NGme+siW2wUQSYSJ|tWsZ=J!UeR6uOwESXpPQi}SW216BK+eaRt}e%BBGJaDkhe!
ze<`nNEH=Rw(<BiMwnl2qY?m2f(aLv%0XUViXO9uds%+$dhpe#3I2HeEAs%HniTZh;
z&blO{UO#gbKMbG-ljq`<b~Dd9tJ>9@@X8++FpH7E$~THi#=w^?IaMXxO{zk*T0xyM
zh!|DZHQPWN#{+JGH59)?KqYV@cG+*q43W}=+Ju(Tn2yP?k50A0Xu1Tio_}mj3MO@y
z<fO%v2`Tz*>s+#OcQU85C^fVU9rkz**lQ{F|3PCT>aM4dR%=!A+NCaLqF47gX+x$-
zwF-nYG*01mp($@<hj|*%9Cco7BA(5at)*H0#iNs5sijQmUHGfgPdhGq50wdi3zV`+
zHovkh>IhsZ{|fp_!GgRs7Fu^Yn~2>jxL;jIm$ZV^%0afJ57&RX@^&~S3za+1fiC}`
z$RfkzU2Z})1-t1N@b%bcBOnzFMM<qhTjD$|hbi&NZyV20?=He5EC&yO?Yo6(lZ8AE
zV&<rP2}XBT#M7vDY%i&{u+nZT2`~!j(z8I=Q0fTq6#8YQnS&lYO*&yxT6Y5Pw~Dfr
z3EeU*#nosbO?Uxa1t=%XTH>*b)1Z7v$}&aBVL2R-<Q;w%otbU|V9TdJiKsl+e6O<$
z`r*{;1`8<D?7QWgvI_NV*vFI_iFH!?Q!AB0*dWfU|G|o>DZXHd?n6s0W!VDP71q%S
z?fbRKVny;K>^mPmTs5!w{R?4%Jh%=SRWslXgof~JkmulXx8T6W<Vy8IvW}d6E`A8q
zjly}ojsQg6J+mT3<||{4xv~%~Ok@_TLbrhlB7@oZ{P~!r{=9re^6jGe=_S#%SE)VO
zLc4QAo$AH1)Tf6$#X``fQbqp1jt$c=kJ;ULrPWR^>=@*plSqbk4o@{rVxWP$v_ti}
z+R<f#HhNtE+=V}CTvc~|f3afpq{UgiaB&a8n|~Z2uS~y{GAA&E{{1yrZR<fnbj4Mw
z*tOA8(T-M;eO`%YoV6dFzX_Q-3)d`65+u9UqQJ5!fgZc5l0;va$!9epB9wj?*q)+D
zP%}^9Scq=X>6%wvTiPz2-zF3)Cd^VbAC8Eej-8wD;?irKE4ijU!5F1`v^F<zY_OqY
zk7q!`zv7jKOyW@0TU_gzS1eq3xKug^6@$c5p{O*pQm~B`6Tu@yriN6RsZ8lB^rJ<r
zRk>1((rT4LwngX=E4zrBr9;`|=2te-dCq}xTcN7(h&tKn+Xbr^Xt!b^`+R2mU!8bf
z&mT0wTgCg3dcqua1~}HNp^taLx+pV{t6c-k)k603f9$*}L$w{#9Bdtzht7o28Y^Y!
z@OMh$BIpT&&%2Nn<A!@M<VSdm9S19~_D@8^w)jLJ&343!n>r>eTTelk2d-G%8QXP1
z33<5ZEn$FFDmhf5TeN}KciSgK(py(YWGO`1qvp}U#iQH6^JFl-1{5xL`h7T#T1U+c
z&LgjkLnbogygQ~VTpqijOPrEx6}`Gh+dv^}$M5v0JezZ<hol7ZeE$tiC@kuuh}D^l
zM6Kcv;bo(fg>zo@tF$=zd7x_ix(U46KF&?Mc%^^(v_C564Rbo-cXP|TZM<bUxpEZ#
z$M1NsAtUhV9a+pt!=+AUYdr34EiJ($ytX$O^Z~)CVAZ+D^cXthGm#Q!`n{OB|Nmp`
zoq{`yyD#sKJGO1xwr$(C)v@!7ZQHifvDL9{d-7cT>wRY~W}d23b*@kC`r@p;_Ggu>
zs+nnwl#TvYBA}F#Nt7+_2hJs!eE4$=%FY-!pJ>T@lox#C`8B4GBj|}dou7#FX6&Wp
z6Ev>vAd;gj!mNc8@OCWms*5ALQgn>U6-WQDG=WmGJpB#pW<X-PNw`58k!oUP`xH*r
zKy+M}n7G6VPDjl$L%~dKn`=r6gNPObFta3nZ|hQ^!*}F+YK|_owQgEEwSS|L#&M8(
ztD%9(80_PONtu`A#<d-A=(DyQm(Y$#3>}=@b*eMlEuH6F(<nn4HXmI39V`m!;#T6p
zbAlH$v4A96*>&Bp+CmYebk_-+T8jrb=Hyha5~pjwH1Q$eTa7*=hasHqkI!a(eM$Y+
zndKUmX^n}En4!yWTA2=%bf{}Gcm!ZFZfR<{ypom&Ts}_MjaM;%IC%9@y%_GLcv{;7
z1x&rK;y>7UqzfJVyL30t?w~#>{Fl&+U6+*n`n|F=EnrbhmUTIvK7Nj@gL?c$kGX98
zE9}>cWBLg;p_jd#?Q0Ks?XaSs7cRh;V}zd%<NiL+C-#>9%9HJd{lF7Xj1(aL6E?yz
zlSXmu-#7oSdcjBTK%e)!WXD?}A$R8-O68_GS=BNq(v4>C7HK0KbY__=iZPOeKs-f7
zE_%e{AhB@WQzaAyF!8FZMP<+l^})Ff1~6?_rl{QXLOX1BHCI&B_DA!}7fu%s%S>;8
z7IZOY^il}X1Her*aM3v95{V6<@^K=`Q4DebDqpV@h8tAGQmoRpxoWt^?63Q>YjPC*
zkQPy!;qyNNs+>QZ8uY0OI6-TO;1VHoxK-mMMfno7qnJE-XS(wVkp1}*3q{kr;*Yn2
z+3!^u%AM0+C~^n$!h&WF1=^tP9o4Ur1$Wya?$KiZhQzv4V#5w>dQ|$zbHr@#qXut}
z;&ZgTka4(0y=;b5Cn|WI4fZ=L_<G>vkK9ST!jupnF3jSM7K}tn43nU-zY+fc7TBl|
zO_8LrK#uDlZcqkd`X>cqK&SH~XE7p!rwuW8dD{e|Q;?Z;=gLF_6E!^XD~S4UDkMeV
zL>F&N@9?565;ZPmB6hJDd7N*VU3Z&}raGL<=mzsUZr;K(eD|c*>Zny(d01koJ-X@@
zE2AtH+U{Veyr+%tcO7IuQCfJO@|wOVeZVqTwRUSs)b5w|er?7wtPD%lgg$23vu4>V
z#@^G(Wp-z=;o`g+#&!+&cM5}F({*Ko+@fDBc&}AHH;UsM+4D_)`QZ46&+OTz8Ag`g
zz@<Emdi<y-oFC=U6Wuy5gBAuAt&R>W`^YlS;#GpMd;>|(4lso;)_Tw63L0OXo=dre
zeOeoXsBdyL?QkiNba>@7w3Ap}*n}yEqxyTkleNI|JGCCZn7!T@&yH9rjSmQ00QkQ?
zZE;g!&}%+zf8g(+^jpYq%khoM;acH4srmmB<6BdnZTlla<Watq1-_$Z#rxwc->^gR
z)$AZc@s;n`A8v00^@?=|%6A8=qSu3%uCey<fx!oFR3P-fzRPf>xHnqJiEneo$L{`<
zVH(HT_@~St(IL6Wo-)x2yQWedH#2w9$gjj0RLIj#>$61CA`Q;H6(rl`cMP)}u})Ic
z13xiiys|6N?vXRVn%LG-zme|P$hq-XlH!hUfHF0oBZ%5L!VQ|ppDtcur-Bpe_n0iR
z2C1D=!0_YHqpNSB6Sz4ssyqRg`PiXMr#U$FkSyKYkOU0}En4_%_pt-WZnwhVlo6jJ
z-n&LEnqt;8!O2UtE0V38Z3W^|^^~p=i8b>g22z(ub%)*}5zy`8RZx|jxy#WWj66iC
zV`1MPOciK?6A$|J0}j#x4!SgvV@#FyMCG=z^l_`qrIRCg14-RBX^ca)A3W2M<xe)M
zb$f|-)|wFNW#vOiBkCuGIy{Ge#JM3Sc1sHGd>?05SYvC*{WHPwwW3ebbLxA05C4eg
zw<Axe{O-NSuk9TYpPa;-caErB1Ek&|l=8Hai5mvpfcqw7z^omQShYu1$1Nw<pP2>1
zp7pf+1&_e30Q%YP7ou)>`FZgh(vd^@1(=)?^IKLgApKJGTbk}*`!}Dqo%?<59)fM^
zTVU(&u3D(=r|MyMQX!vk_eXrEDc=lM)iN&b4mNJCDeePdo_UQYvE?TUp{ga@DrRG_
zbGP5hJWx&|`XemUw;jz7#v>pB;II$4S;vZCPqXT`n0T+6>&S5C%fwNrJ+7ElJMmGd
zJmdXP>Sx57<$yYsFsdL_cjH@8x8}Y6NO4lNaDIdRx65@ky&>>mC}atW*P@~L)H#=U
zK}v-<1tx9%_FW=1PsY$Yf}?tvAqF4MmKmm*r`K+2yWB>)*#_o;jc5#qO`qk>;#zaQ
zg5urm`sbTv%8QxpBY%S7YKQQKy}%cUk3t(*zR`9`=eHP;R~tW-ylsLlj2s8_YeOr-
z0eY=YkVe`dDs%r4YriYSp<LA=`c7TK7wDIQ^Nz~~&tDjtZ$Hny>ua)j>YoY91sA?C
z=I<fw!onpl$-$#gh!V$%QpX#?@+X?g^R(Mkl6QJ%QfSu*_gQ@hQwab6A4BdxgC3Ky
zv`k??{EeL-{s!BBCl36l4WeRbWMe96XJ-FD%#9d#ZBxweZrg;+qE=o0C&AYKk*t4Y
zn^)j&<6Pv#rd6vo3vR=*@tb7sr<-<8jR$5ndEsRY!;ocpfdc}DdCFn)b{)h8IYD^e
zu!p%f9$CH5-RzbLC9^W#PTPQEzU}L4&#m>|@8f5CkTzU?fH5@_Rux6;XdRp;=j2#_
z${-|XR#FhMIJuMK!LD20A7!k_1f|G9Vb0hE4~4;W4lf6`*sz;A5`W5A`hCRq&&W`5
z(!)w84;ndF{Y1B3I#wUz*wMo}RXDtyxOL~dVa|45a{a7@8(G+Gq!T(#(RCNIDx)#C
z`>;_lu%T|?Y)W?hyU8wgV=hx_m~Z*tCzuP&=2Ho-vy+;@-TUN|3nja0uT_kPsFyx&
zA_QV^<OU~kACxPU$9JRw+52{k)@fkOyylqBPnK|&J?<M-gs;&^r<@BMaZKHm%|$I&
zftn9CfGyRf;AEhHd@fhB5m)G*<*dVW(ivJUA4}|ogv^Y+NEMt3e0Q+uL*23Tkb-2&
zQrb`z#5@lZKz3_)H}E%(29_(bRAv4bK3Im>tW*QIIj9gN`yfdzR_pz8zmwfEmE61K
zHLm+YaJ1x(13rTAR@PKuyDh2Q7)Fvt+}bqdj7)A819YT>iM2<<Qr_-5*7QP9)nUwX
zzujY>0rSqONtWJ_o;rd0@YE%WHSQl@6=w@baBj)XV~!=?jjA=5_pM~!b;gm)1aVsH
z^F+561<8#7@GV|ycgJ2^&h);SZsHz!Wmb&sSHD}jpt~z6xZ++WqX3H#lDcleeTd~O
zH!Zffv3=lCa4q1Sm>r4)=nWYf^RuMFs&WxEo#+?LL&Pm#GFw!WNmAJDUFdU_H5ZR8
zYb$6!r>EfLCoyn9gL20sE`x{a>7Oq`k>knvEPbg<kRW3f?ItPH(LxUPTgp})8SQsl
zRSj1l+tf%`n0-pON_d$~Q1vqccw~tO%?XWB*^IgPp}1IEr&T!NQMrwh`;51=SX`%f
zmYlg8UEJ-#XI7rdea?@({tc)K5wBO_<%(GRT~Ap0lNUxMU=V6vFv@RaSo$*$NOYI3
zXmyqkmC>1-idb(4K?Cthi`iQ&B`&c;hG<<bx;<B1afhU8(P{gW7a}}@4QzS9lxV!o
zN}7^-;b$D6{AwhWxURZ*kBCNH@0SfXD*FgoWt$3uk#%f6k?QSzo;P)OUCX+SR3|6#
zd57jGQ`=;_r(Epd3kt;WL)YU@kI@_t^)v98Rye>@jj?P%bV}8%;<|!eLMAgErZZpQ
zTOdYaoLzEMwfzB>g^S7<a-t*|akYDXnTXlStXi=$iS8em-N|eXcH+S^WHr4TS79?A
z>%y;FuHT@Rsbxu|V^=oWw~=OC6qo_;sjMwc(wl#~mrl2}yP!O?cy4pJ@@`siJ$r@p
zMQ${5?mQQ63s1^L6oby0sb$vZ6RjP}9?zxJ22QH{w<o-*4%3Pgg0(Io0*PXqe7eLP
zse3D1g?0vK>+taV#oO_A!W^){=cuu%SGpToE7C7jr?>;tv(;`J)|{4K>yQ-(N88`f
zPqEC29l?m=Kd1}3FVPXDriqg<GQ>IRZSmqo{7vw*3ix>d76tq)fYnSC%TaO;JHe!V
z!C(eS<*<_z^E$*_Qr}2AxrOw>wjdk`ALi8<#79_riq;S<$<g&isgA80wfWaLEy>nF
zD^idM1NarnCS7rK7TDayI`vP!Vn27QRS3a5S0~#0jw$km7hRQu<Nh6*Y)JAgp_2xw
z)M&bbaD4^D6Fy;tfH>an2!}RBjs=w$y9;|F97`0wHI<i~l7V`$T_fdP5Y=5^W!i-v
zQ7t`z4=S#aL?y(P-k`0}_nQ2Bi94tcI(0y&H13S1%Z-1pA5Rrc)nhX#8rSat#tmug
zT)3+DB*()u(ENtQM~lL90IRESL&oi{KwHqjKVUW5yiUd8eO4;2iaKr)54z@BRw(|Z
zA-|-u9HLYdzX0xtv0dvE&Vl(;TzNgwZIQs58%<wSYq*3uH}?~)Wn98TGQLmY&wSeh
z-CheD-l>3X3dXL#lRZC^=mP&HR-@IEK=^LLf}mpSWlA*Vx}H~o%rFG8+u6&2ElVde
z@+_{1?Zb6)_Xp`=`SldZT6sF3k;YaK;B=*WqUgMu3CE6AgV*UvxBELFAw%5jwd91M
zP_c7IE26^TDY@EBd|zM)=Y^o#+Q$(p7>0a8SK|ie3qmzQ2+x>BB%|ndQmBNR2J&8E
z{>5=^$*O`^4<I~Y4itr0`~Id5oh_y^aL-TNKfmC!-y!<|X5UY}Am}xE{l<FU|8ezX
zdVu!!7rxLUf1`axdwzhZtGuzlLskYJ^ZMj;3n+|kQ*aLikLxudS~t<%+S{0DM})gR
z@;lwX({TiHQxHlYcI6ID2TSdkJ*_|SZf3GgzB)=eaS-5F&Jy!wLpWLE@*XW1$GA+D
zGJ9&Dohar`SNZ=fwbzhYqOB192J-dDoO%V&w76s3<SG~`<qYXtS$+D%CGfcg7&Bnz
z$P6i-@VZBy59MFf!RY=ayQp0cPJ6{~X=13XAp6w+-*tcFhEqn_A2h50@ULGy|J~94
zKS3QNGKOBB5<g$}KeB(eqOLTsAPVpPO3itOndYfA?}MQ2EQBCZFbZT8FNF|bpX=mr
z2lal9!MBtb0@P0;OWDbl*`p1!!_0>BHScquk55kz2#4^J0JtfvHaHT6dHhIAC@w52
zc=w!5vwh|xcc}5Yx^EPqof%<SK#r1wXQyVnRm`VKgx)212}!QZgrU>y?_v>o5O(On
zmtYv7>a%xXzJatn%b=oq8QXQoP?F8|u3Z>4=AkqbD$GG}f8Fc-MZdeZ5Mvtb406TQ
z-I$2Nr4=f_xBrjXyq5E7K5Ony_%PfX({}2+DHrB7Df91u0f&}h3?wYq;Nf=`BT8X+
z%0ze1<0>u`c^~DcSg2n_K9c1EwP0#AVq)PWkxB6BAlNDoT_y@Mqh~CS=zS-UwFdd3
z;v4Q(@zu$}R-A_x!E4|m&(<TkqaZDI{RLTM@SH_P18~LvQKtCIc4@PpGC_m<`X%_^
zmFfSH(w;CQHz0s8(i<XFtaeL()*l>%RW^jjQ-DWuYY?7MwvpNpfHlr$D(yof%R+|d
z^9z5RLwlA2Jm|K8oB93HzQ^+U@$wa{kB^N8Lt~6EMim(h*&2pZ9z38nDI6oKBl;g5
zeESNbU07=o%;ul4c#HPqdMIPn_gOW}1_-|yb#a)L;)oR^`L@yq%|u>OJ5^UNpZ0!(
zMH6X;gL6OmtYJZA5%sRM;&CNiudel-_}Y8LIR)I_8HbnZF#f#Z{YZDy_o(7}yMK74
zWL{JY*j}b-UR0q%+GWOD=*BwWttwy1clVWAsZ6mvj(g|nY~+}mox31*gF_uLFbyzE
z{xVZ_?+^f(Q>Au8lv7yDhmsR}3kZ~A3N}Vz>(~b9@r5SsY2-9zPmwdeIgO@ia&m1K
z|I8-twBjlbmBA65IFb9m$kO5&wWrV;ICHxH9yLci1<tOJvt%QaK`r3Hi1S*?RVS|h
zzqR(CYcyC1tF;WruU|o-|Ft9bKTe|mdm)#6AbeC*pKjWpd&V*+yIg=_rhq63lj&ff
zLQx=#(u4K(;$#wj{IHWU2p3?w)Yhh8^@b5OHMXRnj{W2R++E!_jaGC!&pZ5AR6*~v
zowqYnWClQcub+Ll;qSIQpEsSR+e^2+e}ZN9_aS~6bU2lUOdUC^`Za)Y@RK5n-FdpC
z%cm+=GsK{4y#VW(><)JYk4v38Ve!&6-7xk19YDW#qN-HzBu!S)BuG}zd_8rkt-sOI
z6K_t{*wO1rqM}oNbz;`fze`5ziL5OtU`6XmS<yz|8XU;>$qr$^5rcNWCF_tVOCQ(z
zZAJK3_Hh}xDS!^oYF$bJ`|-G$iQ2yPE@s30&<`(9I&7T1{6hv><~Z}TVR+z2ouJ~^
zK0MYiVB=RmJVyLF*6!=4CrF}ix`}!jUc6W9&=I1iE57nKJ4)4jJ^X;8YM&pd-BVW%
zu{Ha0COXABd({#;(xU2#rCU+1#-98Tvueo3^E2FKzr{7{Nd!;b-;}v_n>$2H-nI%j
z{6Jyujk#*ge8pY7xASTXvDzrV>!fi2=MQP~VHbQxSd^UYrs@UY`ANQP+Xs++{X+eg
zI)v9U0AbsOzYVMYK)h0ZEkygW{=(qB$meQsL!TPa^SUUR;A8gRn0#LN;1%5b5<7$l
z{}Uv#l8DBU$uD}HQ!;8!^P%~bm5jgF+kAKY3a#=)B)Kf@;+alYJBv_@^zLVV)Jq<y
z$9%ss^#O18CGPcx-0c+v{=%&LWwh&m=wCRIxrlIlK>O7)zFpr(AD-a;>W%ir_uV(n
zPvZ1t7oz)tkM@OFe`C!29n8>~0L4!l|1vqif2==Cfu8o7s&Xfv>@Q06L2uN*!ZOes
zA^t54*w%RUe`z)Q(r5mjp6q46A2j;{Vf?0K<~KSLk7&b(9K~z=)o5VWuS$p%LL_4r
zZ-7<-mV;o{D8T{^t%j`DI6FxMnHgcyD8U$|(kQ_k<wBxiqA`RJFKHXL_uD8<6b7BG
z5yKFT7!oJMC49~WQ#ciQav)YEyxxp)nnsQ#>eh_0pN1@Bn56M<!53MkXp81Snkb!A
z%3tA;D4#&66Q*IBl8oUdk+1eNx$r54VHs#2x+VQdA`&^Ks5LW2az&0IosKLUv{H>B
z9!C5%`oA>cLRtFtznT?LG)3wu2+L4pLj>0AG-!&XF{Pq3L{f+US}Dq<LU}1l(pEUg
ziL&a-k!M+*C`osf7fv8$WWlS{Ctc|)f10w;dpqb2IaSmJ(os(tp(=J_nM+j>60Ky2
z@*+BfYI2lc{WOXQVp+9<rCH{R(@nIjU?`(3@=NR*H2}`^YzPf_Q5M6LXtriQmKyGV
zp!M-E!sJU2b6IawL;{0u3S(rA9En$%s`bM((*<{ps*bUmS=NZh5%;Q&KtV+v36W=S
zdkq<H;{%aM;HQV@*_l~jc;?#9g0_9fcGfvj$&p>QUw;~D>lRN>9d?~Pmn@qRW0a!Q
zRi4hsn_J3C>S`<u%l^{P4TXj*Es-V<KQ>ECNl&COW7zVbtJ3zvS)HY#E~_lB&{k2F
z>uRX#P5w!S{B;W%)TDNNH1Oo6u5D~cuw!Kl|AP5@C=lW4Ed%-^rv&pHdc1pTiVmNM
zr^-<}vvFxt;qn1&1E{qX_Eclfoz)0HjR|83kM<t5`2~JWmI}~Y!{Yh?vbV0^YT8)e
zDWdXfM6A!CSyj4(X=0F=^IS%B@P?wwy5#UPs_tx`!aTVlY)ldF<!z@A+OI;;WP<-D
zOo!cS`D5*zM=uox*d(u?TD!8ba71n5!0z693RE-acAJ~qq0%_eRWEX<9mSj%Si-Uf
z(aUXIYo1+OK5=}-$U-RupgohR_lwC3R#NZSxPWJYLDv&>5W1B$bNDXt*#}KrKO2eP
z7jPBeX~n(D0a`ITA5m#s-YU3&FL48i?5!)YkGIt?A0dP;>wA-)1hivTuz$;8fw*?N
z>uPEzruH{|al<25J@-o32jQA6(os@TQBu-T*3i>Z(=!JDqm#C_mQGDtp|8+V&~GlP
z@si9&xs4pi7$#+fL6UQ8Z3nr>pc#uy9sFFTJ`sJ~7?Z}N5Zw|(AIs{TQeB(tm_Dp@
zuDNe3B4Xj-mCoj4=fw_@${UWUP{M$Rm)gWruSJ9uC5IE?QTyq`BCwm5-JECnsh4qs
zuiYX~^r6f~gfEo=vLpj7hyML(Gz1y%qUlqJ4WxEN4E+N`Lz;=LbEy=iD4u=Z)gzF-
z&>PVI5Fl9~6}kzTAg0V{S{I|@$VfCtAh`%3XX)II^$B_&jE_!bF+b_2o>z#gIYU5x
zbxMeZWGak{*?90<O<|byudj*8ulCyH=KNnMo>_jq&}%Ipr4!AHfVb-#K<c1`4n}MW
zreV2IW5|M)?a9q+WFQj7M-MnSv2pb50X+e9IIWUn9hSN=1|c}|TWSwQxhlvxPY3Ry
zti;{w>V90$qTTJjjsfz-Tezc)U$(1fkWl~?o$ByR+%w8+)C9wcIQQZO)Jde`#gi(B
zFZQepa~h5{oC0qq6ha7@yK|VlmLMf2R5EiKhCVdPZ(W{ZiuBAlk!(b?+94r-g-n-u
z$uwR%^GrfdV}v!LQ(<90_J0)K4dC`s`$a5jt44yQmsZewHf_J7D==ZiW#0Q8mDILt
z#*N%&HoR(A(bhHpacNb8*hc9Nr96N`rTO5(?kZw*?!B;(^U)0>GCxErz2S@e2-PXA
z1@ClD;8vs@2Jb*IcZ^HqH7B5%O`Sf~cD?xRnhIyNaJ)Mg&q2_Hfp;g)yVCB07Ic4e
zSlV@S#Il*d8~4n`EZcD1j7!=S&>5(;{?uWRvbC>g;pwudR0Idkaig80&z&%eJ;9kp
z>3;oss(~+3JZ6E5U^EIyt`z1tC8igRSakV>MdERtZ3N`Qx*gdJ_bi&}dlxsN!e&L7
zflA*&u<QTLQQ*N{kHIs3LOQtE?JGwV(!6>~yv4)9g|M}R9}!{(P*0mFY*<GpLhwi2
zA60zNsp{0+fkb!Tekb9EC&Kd2Y2nOJd~dHMxtM!*non5r*^7G{Y8yA#V4)9XT<VoQ
z;g7(;9rv)*7PXBNZ8pQOg2S1appuBNbod_d3G)_~up?`hweg{g8Tpw6kAIa%Qe$L=
z_cSF%>)r~OoqcH7ocy5tTva|T1!dfQpB;^F5>-tUJj{srDGf(xQZK5Zoe75m`pX@`
z<h>^TZV{)D%3#30&cdckJ$ReB#N0NdO;#q^qXYyB!i^c&RnZY+YBbyZOiVGf+C%=6
z2pIVHfIJqxUIh<`oy*FpTi2NbOrKGCR`pS(9R4mPU4GiJVCuaw-mp1eI1GeqEmb_~
zV|*mjQ`tKh?)zQp+Ei$EBFBQkr?Xy57gBAk2^Q#-6Uk$T(zVZeb<2U>;FO_5OL6DL
zrA65-ZxL5$B;<=zHXo@p8x3aJ($qf9;-x;EtWmhsIVKv>ShK)!kp2xJ^I5M~orJb^
zU}x=OJK`-24F!jFqvqd;XF|w2Y%*DWiDw>EIV^1yf~Zrtjo{j76R?C4t&JLHEug=5
z4-xrbh~h+JLgxePgwTvA+Y^k0<*RMQMFHp5+Tw<B&OymURud^f(wuiai(mWN+us*Z
ztY6Xx1Va@6j}7y_0(L8wtv$`ujacm9@J6?=ixf=gu5Kvo)Uct(&bCINKOuuj>*b-H
z=LC^`tV#=2y?oCq8XGI-aXs$5=uda~AhE2HZD%E+32`(*lkE>9Dt}?SAk$S<-@A{-
zf1@6aBjYGxN>E0bC;=+T3dcdB(%IP_J$Bd(u(D=h!ok$~yXgcYpAfofhq$3YwgZN@
zCv6+Qz!Cch(s%FDp_%D`hxRtHF)bqNzCEK}jO7^g=)lXYrf}+?rQe3^-&~OuGj=f2
zsU?^_n0@N*W?Wd>4nU+%oRsCY#<AnOwv`Sx_lDDVpG<Yso33rV??iD_h#0SNafA!?
z{LagHS4hkvzLEgmH2!57v@hQ=8u7|*(|8iak~KoqrU}P+plyLoHa-NI(lXve)c0a`
z6xe90a!8@=SrHGKx*VTemP)-WD!=+LYekzH>Pq>ZS2BMYR8sbw$;qQ@wSgbL=@ZDP
zY4hHUwGpj*{uF!S*c_>kMNngP{~@KDr_L0Y-xe&Lr3f%6L2|L4K6Mc0tz&wZVNn?%
z!UoOawmlOV%yk#4VznUB86iJ$?(i%VpGf)$w|NUrKd4fLE2196mi}NN$-;(IJH+c%
z`6;jtp6_Q7R>>JlrwJQNuQADP5ZI{<yUBhCZ4^aLu+$4*EUsGG<d2Ns5ZR%1I&85z
zMzyJ;wp#AFIfjkMnX|$)>}9jdUZA<}?d;JV2#?)$BxH;aILU6%UMY%}l3k%bTkfUq
z`N{GS_!ZZS+7JeH`JEP_YRFtl$Od88a->>5@NHAF3*PhEwQDkbliQu=&06mX-o}kY
zt^up^R7HquWRl^b_NomVXRH!;sMM<)S69iRZf2~K<E;$&RrRyY508FicnE!n83Qfn
z*UM^*a0;3?G_tA<#{oeM399mxN0bgNRJ^2<<p@qknGZW#5bbM&7#i@WNE5f<&kxxT
z-&eGS3yMX<1{;*GYb&UsQWb~N=UHE_d|U5{W-(ViCXnf&Jy-6b%63U1dq8)p8nf5q
zX1rv-bdN}BPzI%k$99iJY}ie<uDrIa9?o-QpnHpVX0zFJ@sXz(S;gcGMa@<;&gdrA
zMy{~jcDw|UeNlV`&;23WLhxUHQCaRckP0V!MF}~f_JHk|oV0g+V|(GolDJ`1>%;hE
zzE8hbi`9vD{HJtI<RdFz<6E{XLP*p8LKlhC-g3|OHkN8@X~^4$ElBxHOUDi62Xa)%
z+m&bU{(c!lH}uPNe96MSh4tSj?G~JTVfPevU9IyPb`~Cy8uInI&YJ)-e&jnQ%sF|s
zAiIu!SAi0;Kl---&~N2q$Jz<<u_ek$cA-QclCjOQ2*40;#!H$U&;k6kq{W`HQu~a>
z8z1=z^es}5rlYHoji#~7%8qmEeh^uY2rT+Hiauib_x#Yxp1xpx<dZ<fODoxzxKCl^
z#M}Yi%s>OxS7C(xus`c<_DdpJ4|*8uZ4gI6L?P%Wh1V<=$|5#7jT(E+bZ&14V;GPH
z6org(+Y5Mzd0kXe2GLL^mUjYgF(s6fG~-;=SNf-@DiVvR>SRSi86}#iax#gmq?!`T
z6Op&1vXWwPG7G7$GE)*^Mnc4fT4-c8M$>~XTqE}d$NM-#l6a>Mm)k02lC?Cb)zj=<
zf}V3*$!ggk;<$t?W^uIIFlq)<s2Go+PXU#iEM{f2uw8Ize>mG*i1m2HxQA+b9wl<J
zWJLlht9E{wm@-+^u?lHts$gv7S`lXn!<4>sXYo7*v9GA=p)?iMx%D~Jxz#yzRAkDI
zPm+aHtcRSkQ!<b0*h&(xBfMB%X&wr-GDgwUGK_3`h0H?Isf9`s<{mZ*t!YWgq>|i#
zhKRBmwX+32GuMm=Tlo`NZfi1zSp!6ui+r8&K(RAJlEYkKF`tQNofGePw!ZOp!?#tW
zu;7ecWumpql$(1ZZWv)WP0V1dABQ3AlWbod>l1VFKXwl854z5Ut*d*Z=W54}y0~`L
z<V4m(6tKY{!(ol7tA`hz=@D8t%*6N0E4Cqbo0{(W!p(iML}PX$8%w)?-G05;;#1^k
zQ=@6kL{Pdm<I5J~<x+$}$Wq0KWEa?sL<6ybioMxNXjN9wD+EXXTswsUSnm+;zuPtN
zlj%yagF<ZB>(%n#D8Rbk<`L(U!1hR0Dd&@nR8WdV(n~3e$1T%tEysn5VXF1x8)cC~
zG3$8$9I>ZiNa5hRaj_fngT2c)O~Bw9CU~k$h>jb{QmRv(C`II}bvNd)6h{UWQkY~j
zE{H|hURyZZUpTB`4^r22L1%RFMN|tH{M=HkM|t$ZoK2A-%}+;bE-a!iIX+_=z)S~D
zC>542#Mop4nIOT6hc{=1Vhb^kQzAiVC63eOa$SO%tnxELtnV(DPAuZYl2A|x_JCRv
zoUEU{%QgSdfD-+as;708bZIv=b>T`o2lu||)OiXqm6pCclF8*&O;`?Ro4&j)d{a#v
zmWQNeG#OhH)a>Wa33GPhN>9KlR#QGxcuykTQ-={-Tv2v^940Leksd!ktAw^b8$TPb
zpgKRVH*G1QQBAQNcQO^7U3i>wWo^MI#5W~{d}sPhirri{VaLhj&9cwOCLLjsuwyOA
zW1ICv@D4(}G)6Q#EThD`PETOnO;KWWR<tgw*6*1^kGiZ>F>NlXQvG3#!4Rov3!lkC
z$i~@M1cv%$&*xFbL`o}Xps)ziMq(TDns!&%%%TA1BL%bPODOe<b(Ptu_+)RXfn-k!
z)NGZe7n7z=l9FhD!UQ;_IklCi6{nYy?j=}e)ahoEpv%Wa$A_`JL%J*KH!tMCB^5)}
zm0PG%Eyjyi31^PMu)H(7gZw-V4Pj*)rxB4@VG0!(UD7$;b-bgHP5WHUNS>$p&<g=8
z$+?!}rsIbhP9aRe{XA#l<B{9HRlrwrok*wt1wy&RcRYBl6TQfJ;WbIy&F{2y@ij{A
z=Xb<S^+Lgl__^WhX9JX6n%m@6TP(+C#!U5vQ>`UIYn5xv8CE#<g6{DR;e<Ir5ohNh
zF@3UGWZMId_QXs(D?W%SDjgua(Bh-c&V;Du#znglsn7+oN#&VULVdli73pzCdp!y$
z*%1S|ld?@F?8T6kNfy)rEcN*~%XH4_<ufMu@%bz(C@>cV1kO6WyM;EX*Y1TT+d^mj
zwl|XWq4J6|CM!HBH&0RIYgmVB;Dz8)$#cfbuqE=U?Xt>t>Q{Z5(cbpu_6g)c{y*A}
zSyTPYwkWBRD=pvVukvv_aLyo(rJ<c6j%l6zC|K=qCLCPyg%#@17<2I$L8pffBr%Xt
zjRmnF#8jW8W?Lt(jTqHSI$u~aBc<lQqFX@XNtJ>yBcqLf2Mx~h*y;9+H5cFXWo+JR
zRN+uAhX17)$8o1sTf;b?-e|CVYHHRm9~tLpW#W;qbu=YONr%oc#$qO9CEwyzaOd0)
zv9<O-|MNPL==fXU?A3&d#7N29u!mLptYoKTaj$W0X63UfV!LZLjJ!lI2s_v{fkAht
z^3uZb%5h@5wa;r~2j`R`=qD2qnSa^8Fa+jk;}UwlP*9p2i}uz<aSSBWPBy|Ji{ZS&
zPpi|?cV5hj@oAB|jp{WSy@c*NjlkuGiNvUdf|r2moCg}cm3hh=iHlVUYNI0-YHTs-
z3@`<$9v#s#UR0$XcrJR?4KUT)4*hG}o6m6q45RhlmKB(Gzfc>MxjrEVmq&tn$52Pt
zJoxC)d97##{uspXyZN1>9sO_AA;%X33I2YtbTmfM>YxO}^h1ravng<6rvRD65i`i|
zotUw>TB@|RUP7OghLk?LqLFz%Q+?PUoOhad4Knj&f%sv0u#pJf2gQ>YFPhesoPk8G
zy+_s>6h*iY3jzf8U)Z2Yo>D<g)0lSC6Nb17xZ@obt)SP*h1*c@5gfrT<B+#Uky>a0
z!<w(vY*h$+_DB;#s4UdVc<gq;=?LjT!-<1)^*c@Kl{pb{M#H{SPCa10R4Mr{e20V5
z#(d^5SLUXBO86e=$_O0!rImO+iH0F5u0ou&Fh(`3OP?93YB2?&Nw9Icq$ahX{)iES
zSsq;ZQBptdY5c(n)kvO8-}Jko9W8C^X+wx_N7m4Rq@!9fGbvu}HtOKi2}CS_ksMJZ
z4Mgt~@^lVa0fuV-4zRng032@+R|Z`S;~_q(Qg#CbSE1b{$6q6Z-wC>_&}GW=R)|-w
znw<|!2IYjHF5cBIoZAh;g^q4cFxOhmv%SBfNP%(CPmvv`v|s1qmGsCQi!jtHnC~u0
zhw?53_k@jO@%iic37kt9|0kviFa%AG8>jF|w|^lere;47Zk-<ABJI~vQ5qZt_@=Rf
z0Z&k%WzAAf(2p&nACioHwv%7q<4e`irF$@-VdCa{j6fYLeqx&{Q?JIx+T`;T*+!iu
zUBkLb@t$#cjm#<<VqDZiy0Uj{eS@~T)w*&WXSL7R326XNetW;yL98aWdj1J^tf=fH
zr!;u^OAyjm_Dl|Mba30PMNHNRXr<KxYt7YB)?6Rho7V!r_n&`dY50t=^OwYiM9a=`
zwk>g1$@CkCa0SVU$NZgI9BRD+FV}`OxGOlBM1Emp^t)gQA5)LSxZ+%EWe%r+nLY&?
zuWtk}uEjZPtxWuAccL`}EcE)v&eg&ETT+_0Y^$I2M@ZE>mgG4mll_b8DWXJ0ODg!e
zes?k%(!tnZn>%s+2s%S_+D*foBY#$4s{~_*O${lAcYcNK9=0tqx*1P9dDrAg%X!gM
zHlo48ZY3|UoG+u|N0R(cTk8R3>*W$CAozhP^AnE}fI0AN4jeNaL(n-knvd+(GEtb(
zL7VLXBMBG7C{u%KG^Doqj^cQ3;4_m45PW`=i_{-Ew{o6U{YRv$B=SZwDC1H&PyMJ3
zDVw%*DtN4BpLJRtGvN>GhDe&E2byXDAis!F=^Y!@TA;dp?dsw3oMx{sDk-Tb=_#<X
zl=QVU)bx~OG`swq5xs^xg8L{J%RT!P`QS_S+whp)nbP{Zo~FYhMX!vU_?ho&BK$x*
zuan%=vOts~Yc6q{aCX;FFwl{36HDP+c%4k1tsI1f$9HbS*wR$|D|Rf>Z?m*zDX1)I
zj`6923}v0B{VD0>=YNsiDRi=GHFp<oiYn^UNTmwgkiTH|2Rx6>G<`eAsE6+5@Su+7
zLSsmtO$xud;J&(~=dY?<|K(X+BRyExM08Ysx(huK86{112oPT+>ODh4(h%=On45vn
zV6miV@cL(O`2^akh>_*!gJj@>mg<gGKI5RPs_H9hcnHN+tg4}?rlPN_u2~sr4Z($t
zTrbX$Lnob$;-4|N<51WbH>-j|N8Z-*d@MS8k+N*bwp%3G&GAGv!ZeAX-Gz(tRMNu5
z-gdI;RX(#`v3}RJmH*0EP+Tw86hkKkc#`h!IT7xk$NXNDUwM<vj){S45<!lw7on=0
zqfdL#JSbZB9JRnz#r1A~@j$zWxMHyb9f<P34VHQ^{q<wKf=^CxJ+s!i)XUXz=N_4<
zhp{tcU%lM$>j^%4923R4M=g|M$gvtJo2m64;aHIp4~qqWsyh|9TkwpgI^xLNVCwk7
z*b}RL1t$-r<to6icGUi+@^heeQSHrFb3u2k5*T_Bm^^z*4dJ(yvWk`aDE!%Qm~pLx
zdZ_nO73sJ<xB^7Oo-KW4jfmg*x(6?zJnBZO_Fw<TV&sG>A3L*&*n-;eT|Jfp_1Z<7
z^T$W8SiY?cCdeXkt|xt=UB7)F{(0bw;_};!P#68|`_Vz4xZXY^f(Sd-X8lKn-RStc
zHyyeJy>TSH^f?tb{fKWekXO9iFHmnxq7Tp4iG>m{tW8Y0jvtB_=e2kXaq>rjm!#2>
zQkUpxDtTy`HqKFlLw?YuOPB(YpL3(U4;m}@5&jH4Mc4H@95k2G#lZz#U^Qg5Ga7!d
z)75C4yLaz99!RlUMQ}e2CX;q|G)FB0;tqi?BBXlO1j2?mzVb8GBIXRVl~aRKyZ{nb
zqXry7O&{riZpy?=c+t^x2a#(E#7I{~d>GS_kGkJMwE>DTS8f{DpIJ(!JR+6y&JI!L
zLTdPIXNk+_rJrO&t}nkY7!szy|G;4MA2C2Mu%YL2T=X9&66bUpaB!3Ed6M$YK=U`g
zYJ4w-Kils(6KW%P>4Tm=T*3?xpD-hIl`f=;-sz_QypQ?rJQ=A+-B6KghI%W)kFBcQ
zxhe%QPR>a+TR=yP-<4WmuGw06fB{rP&Z0y2LUn{)3DpB{mGuBOg&TYUbvL;yJ%hFP
zfZA1PBY2868~C)7gt+WRcopzXgOad|QG$GEq~|{&0uyoy$#M#HAxmYY>a!gL<=6?&
z0^@!Xx`<TL>(Hmki*^tP$HnF@{c(1GH-PpZpoHQ39YD?k4O@*;$MG<>AdhS={M+XI
z-@;jLCDEh*ffV8kPJ(F%t$@Wvs<V^ySxE#nGoVcWiLb+JFaVJWQ=<rE%VMVY$it-z
zjd5UoG2njLRXh2^ZOA<RjMjqY{dc*&|7;C&$T&F52A2VnwNC6H4+4!5-Fm<=v)%=Y
zf|mh5cw9^6NNvr_;P(@P-VYd-fE(CarA5#X&!^M?(CCP#rJ2bG@4);-bHk^6Y-uLP
z9zAvyh<syr9ax{uP=Y;j=+YnYX6r*vqKaW~)h}MhjUDOa7LL2$7ro*v7;@hSb-ceG
zb@NHe07*BjmD&f>CX>L85;iO-d@wEI)E$lyZw78ysix^|gffo3FXAcm`!FI%@Rdr$
z6T9HU*~c5Vrx#IiE4JBAB*~sjF3ES&KCcUIUCBB#)=7+IJP}U;?{|J89hf0{firZ0
zGc`;QH&JM7NNB4l4E-P2C@juCuITvln0WD7hcE(Vae_4ro=Rx4FjG#=hGyjJXCm|~
z0EU~0#VTTl4tQ9zC~PMsi}*y0A#=f~uu-Eo;29y|jUnOFV#otC|CXs>Oa_2;E#ub0
zhvyF|6Blk~8d<Y<bdl8^Ck(1~<^sQTE0JH?wL$myG8ols39IYyGROz~J?~F|a6RR_
z0aOs~PexKXOir*R+=5?P&wyEin*sggNnkn~vKw<HvJwvDyWta*_JCfRb6w-{THI6t
z8=MXC)XYY=Lj$oOz9Z09=yfpisViGI|8QB-GT8mofKNqaiEYCz-zdg|N+A#J>Lq1E
zU&#q-N8W5c9~AU_)bC3pAhzMqrz#V23$~>a3SEVc=r$*;iXh>;W0p+JCBYpX&pC?h
z;0^34vK?38Oc8hEgy|sUn+|mHa%>h4oCE`YtPb+J2lhA+2m>sfA-tSGG7qeG&R||%
zXdn-`^-naqV06WLJt0Xy%&S249X}6L?N2z)H>#8#aQp%MU27Lj{sHYtVEabG8#rv(
z>qf^LAfp>T>Ob(s3Rxk<5PbvFhQ|f@x^T#Ao)p1k9(ayCxw?PuZuEXj8Dn+E=?1&_
z!nTX8sXA1<D3J{v0J|}!knW_B*HvF26+su1QP};~t;p03_8XxSJ|*h629xCmHdb+a
zMZ~CRp!Ul9g%oJ^Bt<jA6g&p1?}||s7V!Psl!H>e(q_F5&m<j7*E91ADi1;vqhUoI
zIBR(#RO19%62(QS+>+$m#JXVtJ(7;XTi{JEqX1O%E8QfyWW{i$;XEAgZF&BtZM}X5
zl*U^rbwH?`Xl_71-a7p2Wh3uEJz?iq^slcTI?dU|tF`};38Skvo>E{Wfl56{q@;th
zYkT!1v@~rbgPT^n@Je{sLq)g3B}4v8LA3f+FbOVKH?(SU5t|MFa)Bf03%cCyio`Q}
z9Q=1c`wi{e!M6~k4}7x&M#v$AM#yus0}58$HU>+<sHp?w{tYwP4po=_giKu`X&>(-
z3_LS6&-DB*!9?7LPI&k6AcEy1=tj^iECnB#iz)M3%x}mxG_uA^3JH^zT$r_La#%sD
z5G_it4uX6~eqdrnEx5shU2y}{als_PchSFgGlMx}NLqi}idL>dI)dizW6=0*=xOsE
zX+wSCp2tsx4TMAahFZwFas%$o-e_qi=3<mdw^DHe=UftR<wLU|y*8kwI}J38eYW5y
z7{#>Xdm5nC4ohmU0$G2%^lMe^D>T?z7^NtlgzEC+WE4is(*TCr@n9)!U|@{N|75Qz
zPL2pNawA%;dT0!jNoko8*ikA>yBTdz<bfltO)2>zZX@`bOV}|%#z&v~Gz0SkE334e
zGj!O<<?>^L^2gvJ_QN%_bN3D5EmO`A1v*I}4AYP*V>V+aD1x6;w^CI#9&fLCekLZw
z6de#)y~d4~<e}$4?XS9|1wZq!A%_^bg(oRmMA;3_Y73fkNhJtfO(PNw_uG~J(iCzx
z;LT1_Q9uF+ab-ZbVJN{DzBs;#K{^U}4PkQxoQ4liyzjzfL6-_QTCzE#SDgof<62z+
zr^>Fw%Tk||w?k_^1$II&Ux&f2_U%g$)y{UnGe|p;tS7*R@COi--|C4+(vGFf!(P4{
z5Qmv&^V+W~i21Y>(cLT{uqm^HcI1H=Hbb+WMlWS22;ot-BVR2|4V1GvbIj++W%&Pg
zIwdo;!4c%pka|l$&FnOYB36Bhw|bbP5S8N#*Q6Ag(BzsgZ&3j`6$&JoH|L&bQf?{9
z%Yttu{g=?WlV!RFx?05+wF-S~MjXEN*hR;&ZT1fuedibNdk|~q?trfYVrWRjP;eC3
zMew46b%>U9kY<i-lW{}CQhXpO#bxTnFDcO^922l7m+}K-R0$y^glD=JO>~`5Jau!!
zNFRwM{JLQ#pB76J!^osMeI<>^F;yb%z<?PgjF<wx<Z6Le4aiDHSEoW44}~7Kd{EtB
zD?d)DN!BVbJJD@t{E_!wGwdDA=zhJvxKluyQTGH}0r_J;I!3r(h@c_Xd0)6KGImtF
zGT(Y&#J*<LZ{GtWHz29QkjX(hLq8Us<M``AfA+eGHQoy-H~cMpBlUV;iX{0wX)hpE
zK+y*>OJ5~n#OCmo*l;(d=>uNUorwofXoFnRCCbCKnj5`P`HkHTt+cSZGyXfN7^1#l
z_F^208{VYp^8^~jI-&iC69c4>1ePKsFToUuV6Tuw{A^HKMFz<Of_K^mju5~gor+K@
za}tRNkV}y-USBOL)N2OF2a+_IspEyYBy@V`YDsl1T>k>zVs_XHJ6VlU#5V!_h+lzm
zR*TyJAB&WX+Rxv`RNg|Sz44)b1B>{1C*l}#STSJY1{vdUJ`lnUv3<an^+@e#Y%5|N
zz6jqrCi0jy-grvEZ38hF+n0ly;%m842D5njf=_MGH8Q<W0X)DpVt+1fD;{h;#zdZ9
zotqZkcd+0%+coz8$t=K8O!SMba3a}2LYS2srEo4}m0yLPwgk)Evxpba)<f590|_@c
z|L!1!7nV=~LowaQ<X*E9GyVxJ$2XS_{Q@riAfqtq%!}RxPPT|-uUqe%u?wrVVYeAP
z#*Ql&-0lF9Gm&pJw@}&{iL>ldltW3JZR7yb5t`W*frP}JC&$a*ZV0hhinwKCkQ7^2
zdEyzYaxWOy6FBk<t0m6fjk+KWs#K1vq`-Gbe*o*Kz6iuNRjCd;xpxv$8x`sAZQ>$F
zGSSEmw&5Ko8%&UP6YZas;2H?KFOtShMt3Pc?FgAn<ZJ$yj{#^ez-}=HVuf4-%8u_9
z!FJ#$(}6Fr;|aFAH7qj%3?Q{YEazph)pySZV704(LqlS`6iHSWAY3;f5Ys0%%#et1
z4Umd-kPv{ZC%RB8bu=qfj}GRXP{_p1Q4Ld<Aj7QrV1DZv)?x*Wn}>$!HskvR%zG$<
z-PwqvUu;0IGnY*=r?Jt`?5xr8_IHE`^=dEJUk7r#4Ww;2APRPpHo;$^9QeT<g<>)b
zRXhPj+vg6Hom-krGS?;XLQpFZ1y)id&LMusc9R_%tA-<1uAX1OOB7)4Gcv*GAj!@p
z>U{$=^!?FE_+f_>|EjFz#(m)LsKA`TzHRsQ@;CNkJZuNSMto@&#k6latmw66TFCT5
zu-aoz-SC|pNW6Bx#6}i1#YY=#sNlxZnQQ?X(t|&yOlk$tyjf8s4Xz1=%<6mhvKS#Z
z!>sy9MVzT;jbzRIjqSyzPG~p_2l_r~KYrv@<DIYecqEmBBiBLSx5*DBIdoCzcERK}
ze|jDPD1lo45W;}lzyc@!wK&(86^W4d9YHWW$f;!vOTX>mKc~JZ)9?m7?+D5<fLQ~-
z=)Dh1371jg`s;#zQXy)gV70pEI^0AANXFDzh%#{CcUn_?BWO~VC@=i?dic*xKcCXt
z$vjF5ac!EIKVXnKQ(x01hGiqCr6?|9X*GRnCtf1nt9>aV7^`7{;7_}G0lq^Fb?PPj
zSS)p?y<`-G1B+*l!huZGK-l~#J>pRB5|Hka9HIlNzpsBC#@nz=v5_(x(qFii4XxOP
zrSIjOyE8$roj;~Sa=%;;VjPJYw9$wE<jkc>&HC|jti<r4s$T7^T!xXW@j&%LvT-9+
z_%dLO^LsKp{g!{k-96nRUR35tPL$qZ=6(gGDKrhsW~f=In*r}+5QAYe{Jv7XZRbNB
zGbUvU8&2`~of4tBEK^B)+h<47N@}K6ny(MV?B!(=KW*=AqL@r0l^_ZyO-^FhT)^$U
zXvPV8Z;N*NA=g04RmFB5pqY`DoRpqaei>TNW^FdlBF@w?Y_dsfmSHgtXDd6feC^T@
zD6=5<B9;3#N}`OTW_ezH+DkfEd$?Nlw*>dkpOVsM!%%ZkhL2Z*L45);4OcSIL#XV;
zHM2|5L|Io7!uN!5Uk38kr{S^|NiS`2rYAYVD`^}UzV!rBb=!=*!+g1DDF|7HN#Xin
z&d}g~6wr>{!)yjg{Jmr#QCb}C4`f`7x@!iv7rf>VaTpkH1I%LwHgpF#`9BlwkwxSK
zvKzpy4%8P%V-`_LJ2O1ETl<(YyMvlgt@WHfEYy3(8eseZwEJ!?5ddTK8{{=xAEwSQ
z^Tu}%IPbXQ25}F(_0jt^>#jea+46?pN6Kr@K3LoX<_^>Hdz2gf<oE3!t~W${x%3C6
zcT|2@`r{KGAo@eRU4iZfV_;nw+f(N+cs;@RL#@|0rXR>XS>Iu`2jZ_(GJ7kLh+TRI
zJHFuV5yM@Y9-f&^zPfSKcmWG*qZq*2uO?0Lxq1B3N7n`^ACwtSCOs%zYvvYL)m=bt
z4Ks-IFL)Y%dju#Xqkp#uuzvx+Cww9Yt-jA9`-PaIHJAx%N5&z_6F$)M<_Miw^OB05
zY<uOz%zi8IkmlPX5cT*^eg)Y_ykCz=OwnAPIn{msLX99FP}HCl4n}t}&&qlvE;4)w
zfxm#zaKVsax)`>&;KrB%45(;2Y6mnU)QbP9*$y}quM0i%0rU_uX}qU=s<+M>0UIR;
zBQ?Mdzp07?W9_O_-n?ZCH<PX|dJ}Y}9=`a_D)pL`1rj=v1lB4Gj6^>EmO+a%%8+#A
zPX?fTmK?eF<Nxj1fMIJu)V&dJ;0ny+=S1Qbl-<6lqi9r!nNqqtHnS>AD|8=Z={5#I
zj!aQ6?iK6-TKb$Z(R4W-MQpVy6Ouo_v<t)RmUxj?i1@}TI<yz8HAYA*FOHDE3dBTI
z+n?GIIJGAvVXyki`eMt!k(<@(ogj3>yVY~(dkzBeLvV6NnL@}XBsO8@w^1J0l|~-f
z?wbh!^`k4gy*J?C$3MD{GC<shqJAY+n4u3L^TYIbJw(`1?`!{%Y75kdU4MCWpzQ^^
zI$$4|@bJ&x2DQJ#mD%rHMeu{B-gR%Z@`LY;-X5(vfS2fA3#B}2$Dr^fS8gMdaqbnT
zK%98SGS%}CUZHA3!!r&;oHroN9QPBDED>?grWoEpTZ9SzL0q5s=rSX46<C7{ZA(l^
z^fkRu{3``r>~_|K7pho<kf8<)_#}gTT?dHf%~jBucMMG|%J(F|cUpAe;TZHuz2H~k
z3cik>ILpDG2^;k;0r~v_L&+DTfd5>~&*u>h_rOAO<b%p^CFiE@;*qXI<v7H5>)*b8
z0!VfFut-N}lohwpcd29wFm6{C5mv~`esg>xYqcsCcU7vaMUPwfq3~S>&fr9pCFB<B
z_|`-8zyx`o)VK=?k$+Gt@3!XiOYy{&Z-Bx5b7($;x|zx2@e^&Riu+%Ty<>DH(YLSL
zVaK-JvCWQc+qP}HW81dv<c)3f4La-?C;z+e8E4-!?uY%MYK{87YSgMZfAe{Me8Coo
z?_Z4V?UGG=p}G;B)zPv~25PD?Nlz{@*L3YuWRWXpok%c?iHwo=m{!tdyzjwp-^l(d
zUQaBU9Og98<aqyS?St(@_9fvNE_a{An4?yHv#$-=Oj%lob`|`E^45D1cHOnlSR--#
zeQw?VsU?f(nq28l=Gv{9h#41CH{^l-Y|QDcTTZvl7$tyU{Nk5B><`BB3_^J%KVbC9
zN_#vTm_!()_Q`8=!q7kRZrUEz2g&nt;eh)E);+o#H2bo71OFOB_}n!$wQDZ<4wf4D
zRPcUHoAnhueMH!>{*KC?=RXQ~DXWIeOS_%b<ogPGdC|6k{bJyW_a|Jaw%_xiZx7Nl
zrI<RTc<7-@r;kh2rmsk6))`fO0F|Wtsu?Yu(r69KV#2{>g!Yz8md<5aBe6@MIS#!<
zt1yPHPnSJaTari5wqac@rf+Rz%@xUw9%d;{ImNA78#vP&qrz@TGA`I%bsty_-T+e%
zBySIOVuN@s#w#Z8Do*8<?^MKdVHPLG?MVVCCwUocH*EYP!q=$_;uNh^(^6=u#&I&j
zGDCV2v*mr<_$)6jg6b%6h8j2$u$ln*M=M{6<y@|^Y=}_1Mq%XS7Bbw88&|kP<s=Kb
z*qk$mTv5JaqrwljV?!b6>NkuO``t+T?L*%71E@MnoKAHOY`Zod-^OQTdKhQyYnl*T
znJ-Z^qPpjfxxn%#i)gW#n+6YyUZ8YYlybpT#%o@hkcm4YdZc>Qx<S*1IpI)o#-6+~
zN=Ig5+cevroN5j-k{*;uCdZ+faT@tI={qHq>E^tYXfJFr;hsh+>y}|Fon-wp-2k;2
zm)LB3jCG?flkj5uH|f(QRAG*l^gO2DLU{?EH?yGAB?}&~RV7pTE`qhF^}#N6X(X{B
zT;MF|M%5znnQ#qNx&Y{<%pnSUH2T-rpion~NOOqEhC)l92h9uRLFBi^=K>m^n0z63
z)O2H7iP@qIGM~35&g(5)YVEA%Ie)4ksu(F^0krHuK&ZY5-eROs2z+4_G;;)i5ovLR
zr3lui)5fhqM#vQ^jz=VnJ7fk4KPT5tdt-p#11@%uR5a?M<}x1*qIH4tcT*k(9Y1dB
zK$x_3F3BSkZrXUUD4@01BNbzCJV8+AH?<A6l){=Yx#6w+FOomL5@Y~fNZk1mF(Wma
zytCy-JNBsY#Gc7P;%yt&NBjEBGnusdbT)8HnF}N1Gbd2p%eOTR2)vBeDd9P?@GZkR
zusV}^$HZKj!StL6`}Jpfc0sf3GAveHJ^EnT;yFNhqyc1BvAC{SQht@P89WfYi4^rZ
zL)f8QZxMFpYkL8mFMzJ0jbA2uDg2TVGnOmS?e_+Fhr`x=AuCai14!t6r^s*Bdi7>)
zvAT6|?4E@Q2xrVJnKYYoK<%Y57lqQ&D$jhJ7Q7ltXU9+5)Uv%u{67{=ywLJ^F<!@5
z)IZzcM`9MIz~0^ber1#GsN<xVr9xEg`6{z#gA-<r&Tq3$F4^53vZU+=spSD~?EviC
z6nQ0x;(Z>(bFD>D8L^|@29%I7MG(Qf0|g3Dl&CPtu&7HB;2PvpduWk!B(!tHiF+go
za2ye6?W)B>WfI}yB9M4U!h7Ti(AUwc`=m^8{G;!Gt(j1I2i~5HSPzZhq|tuWt1zXm
z4|AGf{%X`=%0)lu5t1?TOBk8KNLt4y$+{AM)=Zf6qk&nw9zlCdNe7?~v*uD_+}#=>
z&kpdqw3Z)~6lShQE;4b{40Jcg)nQ6|{H8s{j6i?qZb-aFsd~N|9JxlTdf7Ij^H*EC
zM;yU^m)<|xje`8_u{aVR_A<o49fsORJ`}AQN5&nY-A5fM6wan+k8C-R*+<VB2{npt
z*UKiiLpG0o8|gT3bj<b8&1U!{qer@qm>hXKkh<+@mwKnHRYMtOH;rglo4{D5DvQER
zUL0{7p>m^+Jzy`<yj0pcP#Yn&Q)iFjNp2dkJN9vGb4oZ<p~rlp-W>4-=pLj2iVpma
zp>G53vON?$^>~ZB=?N5fP~xetBkfAWFIjSk2smO<>#`^!1=1Tw>}5I*0xVNKHkw6v
zjkRcU+1L(yRC&#<Fc~u65B^jL^V)YxblY?absKexz0bJ7wqBw67q>^&jCuSeahT{H
z0KBI!F2cCryupOsx`cW1=fOkV+2VX5F?&D?2|qV6#v%^1VZYlwM7gw+%ksu$C{v6|
z2oFEi2O^HNWklf?<v2exj-!<|`K*)3R+}1|`U3|il}u9;iFd-aUmT{C!YDU`i>UYv
z(&!cjDtJe1eZbAccWIK2t=dx@M`py0Sj!qjMkv~b6E!Lm>A#l@S;?a!##(7X?8^P3
z7c2=jI>GmHf$1d>$#?MKbt^z<EQ&Ymh~NMg0<rJCT{)1OR1VL!Q|a$I>^Msf-Kq;Z
zB`ez`7#9U4%QCK`QHN!V&YNn%Xmj%Bi$)BpdIt3~=P<eAXvlhbaN^{<$Wh~PQas{5
z7s#QCCgCIsmE3Rwg*D$<V2eZl*sn^PVC1`7N$T0c4{m-<3T0=6;EJC*f4nZwbw-o0
zSnlv>3w-EA?b6!gg)#xo<T^KC)%%gQ)LrM4q1oya@S%&0x<g-3?@+7bS3*+2DRD!J
zuzdOVAIHy5fq;7l$VE$4Q-_GXx!*Yqdy$gj{j{^R({VBG#;&BZgv4>eE_jGT$q-QF
z#^u%cO;i`IPLYcYWjMIDEu2X%qfXv+_4GbVcB*IP+qHWQ9R_<8-zmPlROwJJIB|2=
zr$IHZGPBeCF{%2B&Ttlj;^2$R4q^7<4bK5E7pDsdV=U`9M@stalnGmoTM8L8`vJUL
zc3R}?e(qa(j}h-ZRKk9>TVfr$PXvyrc#~F@V3p}WO{GN?1L&AN<s+$e5z3@j`8VuM
z;s0FTng7oU@4E&Av@u^&%f19F*+kol<`!Cra6aG#J*0e!H~F_TM_kub&$2PUPBK*A
z?Q6jh5Q<=kUoot5U?KXK-z?jO7o7CL9ot3<vziNBZ6nH9nd?YOpRY*ikp-V8lQ@RC
zOOKtVQN}qPsy<_`kHSFAg*tn*bmEPAiUjyqv{){f+fyHc?(T1B>|Jdg5nZSZxBx`e
zGhE^@<{#SL5W|B!Mkyb#!x4WXZ6DaC5qF2mvp^#VZ`5$Zf@hHP8&59&Iw6c24DtD~
z$AF)CJs!K&^>cFfjOnyjdB3&!qU4i!#$o60a)!!`*R7zZ|GZ6V3puxr)Ei3fC16PI
zVo!lDB6Fgf_Ng4RSgZ0^VTA7DF_D`2q2$?S|Hh#y!y{tyHR0d^uGR+}#foNcR*gVs
z3uifR1ulx8CZM-!+L&s~wG7aknfA#`9L?SfN3jtrawe++GL>z={u}k<<9l2&>XX6>
z@C`msr~mPT`+qibD(h<FV)<Y5iemOowuUbMMgC~{kH+=5Mc^09M9wkAZ;fzYXrxA=
z-*aRlLQu>mf)e1Q)H-JL6a6L>Gcp0q^fhbKH7ja0M%KDoHYped@X!_cy3H+Go4qx?
zBdzvLS&uI^yVlE6U#B}fNf{FmLN@*aciZ+{yIiL{Z%^<H-R^g2$aJTed@53PUGv?<
zJ-d?k0t8wz_Fj}yUl%0^UeYIi*98J|U!G@Qz<9myp*T9&YY(9AYg2XaX$i{%1`fp(
z6HWGEfsh;J<-;7=%s(dr8kjC#)|4HW23-9|iNk-5fS2C`5JZ5(fYdQ>4^T{M-g)7B
zkHo<wzUF|kaexY<ym8*(B6@DYjSM2)z9!79+by2Mm~%<S>G4bV?@W?VOixQ}2DNsL
zUTKq#;b4}ugLa*h<4_%X+0Z|A$;%~;^;46o-CH9xIz9>Y?MkiH$vf1c437$6B135o
zE#cPfE0a|nax3&HJG{UeN5`J%)y{T7_S$;~(z>S`({4e-zHs)%EExL-g{<)cNeQ;D
zj!<n;n=G{6t&uF<ZqYR>XUDGSI)_^|kTr_w6oS8dRXW|<L+bZz%+58eJZL);tj;wU
zs~rC*yu7|06o3CJkQ9vJ6efgVM4d5FEE5j25m%2KdSzQGN!xQG13=X^jPIax>AZ+=
zVC|wqnG<9f-AWjkJ2D@7YqZ+c)jcqkKS$5#ZPxJ5FJmXH^zKV1>8-a0Ro3ugUDZa1
zy3{W#Y~tM9&Kqzj<R?q<|4o0swvMs~t`%J^QV(erH#O2B!-{f)Bp{mHhG0lyk^QFY
z7#aa74GNR3xo~G?bGy`7UAwe5#IcvLsj=@ZWa2uuU0YgE%EOa|!kQY{EXsNWHfT^n
zWgp58_CzYQH*%rL2UK^_En&~2aDpi-t~gyu@L$xI5QTP##=9Erwm19IX(9fm2V^%0
zn#6P0v7<qgpQq!ENVhU`;T8&l2_!FUZ1zaUw^(-#b`l>3VC+<m2r;)(<V4mYq4h5O
z>02p+=zIXw&hfK?&=rIh<)mzD$;%)<LOuXki!=yhU9Cx?ZUfV6$sb%w+%U|E7K7$3
z<Z&?4e@`beCC~us36R_`BL~Y*a?<eNN|fc(yHH;P!IkUR#5zv}jcsR)KJfx%I<z>8
z1-qq)B%iw*v3D-nzkkg3^Co&DO_}2sYe$||h8ViMQ7T@BLkX<O67TV5&A=8HV0RO{
z^g+Ru+wjMRShC!%=E0!5GVT>0#0@wmpv-dMcJ|k*o=hQ8VTm<1%pnudjR)X4z|9N{
zI$dl=bKsJHvym?<zj-C@oLktVG}-S)JvKgxmtvF$h=WFy-TwZ4Eg6C&jgCMKC*<2A
zyG&}@ylhrGBNj+;-cV`5%MSjSd2h?-_2W~S+o}y2+=}(8Uxxg2D+_&=S`1!CTZv?F
z(q7skuqR?QEDG%vTZo~Xg#}j6*Y1;H4R*01IQ-{V*qOC0k?_2fW-{2OC6}+JhzgXa
z$uf!z&nJM_9Qnt31yy6|;Ja7_Qh;k){7yFgu-mZXNttJ`qFjp@$Q&+osZaGNrF<ES
z?hD7>ATxkUQ@k$ik~nnxe137uPTirxaa+W!l>EcDcpmoF!oCencEBVhyxHD02P8Ot
zYgB?Hteu~$DTJE2bf9$schwlTM8iRH*2FIX{%24vv!<~{SZBSrx;-TxTG78)&c9mD
z&<bpB&ElHFRNc5wgrPi+xPlq+U6DXGu^KEizv_ECUiVJkizyTyQw$N~f*A!7X81ot
z+*`=L{*{EQ-mDQs)NR46w__~QtL(b8{(>hlw(!*sm`2{_nIFwo+nfs|S)}WY7t28d
zkL4A35#BDP=HH1|N`(E{_+tF}IoW15_jnAbvs;h(#uDiaCj_i%X3NU3)k+gOE)Du%
z$q{LrIu?v6m_Wy6$U(LB+DnAcf9yTsma}Ic?;ql(^4iTD7;~%P#r5gTg$m_Jj^!@d
zy|oLjXv#n`_=*QvFRhX+U_OAjQ}vjyTfHQ2fu>$RW$5lojcZUb)LW%D?vVOdyZXK_
z_4O$j^#{vG^q?=be}F*w-9I0&gZZ~g!+w}usJdo88=j!>A6vyY;VPZ-Kgx1Pyd9&h
z=LYA*Pq!{NjQ{?%u&Gt_QJ+<P8S-fY{CvR(6)3zDf1ybEo6n=lEfZh$&h*(nn5Xcf
z)*Ea2kv_wA@Jai=J99+H(^}n333aXTBKKK5=w9(o)r+g1IFEzBxOGp?QOY$$^y#2+
zy);Pq2gM6Ui1V2>>aqbZ($dSSrBGW<5vXcm=A`_>;u8;ey7P@FqV`X{R(}EEFTM%B
zvCEpPzB_$JB2j9UEr0jqFTN9ghyv*Eg{yW(em{_&Ii?e;0!?2N0P(k&s;@-4bvJ1$
z{8Nae6tp@KrKMuWV=t4`{-Jpi8(IHYk=Cnza#-Bb4~vK9sk}h>tM1oT_$22ES)g@s
zQ8aSZ-mSjiGM+c(G=l<b2c^7WG4&YG;&1q1o~nF=Xq|r_3-V_;p1f+vTqKP_mqHIU
z4Z;-%awVyRYO<??3p1sJKO2VjRoqeW*W9tZ(;hsW-MNOv{VAvE4C*>OmgszYz;gPT
zR`E{v$qs5z*d-2{w-q<PT{KaN$w!BQ6db_cN7~#sS%&T>cYONdXHrc-H?Nuikb&`+
zKfaDG)I*vhys<2uHoi-7?u%=Q!#>s21FZCZog$W6nm7F^{)-I$k+q=X?3QUQq~4%m
zFE1qjv|TuVB(Fj-#D4S>r%7ylw){s})3(xtTjFEWA2iHO)t1(OX+_FN^aRkh5%fFg
z=W*(-1-5ZkVvGzw-XXz+%!uQdwY4m}7W1ubzzX?j;nXD9ETFLq858YBy{;e>=m8t>
zaylnXDgOA2LfSIe1ZVo;beQoeI_Go@+i8bxPY}4|u|X}c9_&@^^;rd$`j+=3b$9pU
zG+=nFWf|dZLw+}P#33?X#vUfxI=q?gOjLHiHdU9>>pE|KVyld6$;T5wNBtI8E<@Z6
z*NH6WGQ}8XU;g)eNrH_{SAKnd#j4v5k_}7{k$@dym$q5PUIpuksPRp)tY>Sj03awK
zV8X4051D%!G>*^*>G<v2W3HhCw%E?a9~<T}lmGoNP_Z6|#g1a6xEe7XzNLHp%V+~r
z>I)Hua^)a^=O=1OO%Q0EjLF!wx@rkr+vqZy{}`1OMK~NVlqrfd?+{#ovt_WaRiNgQ
zmCWTKwVYV)m5y%1ar;ZD=Y*cbVg$zw&pu-8Cx#7u4`M8PmrLuo(OEFLg<&he{p#8i
zmLS{es8etZpAI3o^}Asl0T2f*zBL|J<OPKUL^t{5hcHWW88Fs}ccJKvS)8=xEDv|}
zOvzKde9D__!z>1J1q4yyM3#YRoh0I3Lx=yqNu0m6sv5r4RIT1`cDyHojc&_RH3JH)
zzC<ma;1n88mcx^CMt_iyxp%v(Z$+so+aMzH9yU(lpMaTW)nO?&8LxZM{Xh*i+76Hm
z<9KUB$Z7-q35u$GsNw86f81aZPayq}F;$T<^d9ya1|Q4wJt&*`Q6szFM;4PI7yELT
zBWc~GTaE|oq#etNw5M<vL(j6or5s2V^-2ei`>uU#ZdS+Du(L~BbkGE8vv^JuYTDoi
zUZJpc)MMp&Qqsr}cV?p&H33!-VrNlnZ{5}1>nn_N<v5fC>6<BD(wrWl!>YSuFqb9g
zD21fV@XDnUcH7&0>lcGq9DBj|R~lD8>h|Zt=r`7k9ODo-<~A1Vur&+EKb@k`p9NpU
z8`g~a*3`#YT2A4wGUT}U+zED4UVeO`s_c(fEMGi*4l{fF2G4-II5S)R)P@)K&h0gy
zpWBwUz0r6_!|tL$BC2vo$nj<Qzo{U1aEX|}&2-T!W_YFEV690&Yy}84C1EKW{!&D4
zDJZw7GLUt8tSvpafZA&%U*BjUUtUPe<l%3jd|)uB)^})#{^uOEFOD;U!xMJrSXh^1
z>sBnw#_ixXcyyQJ<5mxp6+>VbVNh4v?`u<sr=lh&_d0m@C!DQq@w^fBc5kFNr`sGZ
zkx={+(_a=wEloZV6t2IclYD0cGY^OqONaIff9MzHC8hZZvYhS018Al`r~oWLKd=6|
zWf|@ey}0=<*krsljzE6#yr0wIj6AFT6gT=30|Ia!sOQMP`K;D{Bk+moH&_3a4&arE
zFZ#8W2Lg%Nt6KtgWI@HFda41;zRdmy<(e$(rlxvIP?Sxax#1X8ivId)*~x$@D=>{!
zoWXT8IZ~>*?AhB5a$q%5_I&qh$&SYKQ<8Z0`C(D#H*~-~#xFvCXAQD_R<Z^tC9+rh
zvja9QxxQkYSM-_^{iHlUY5WInOO1=--4ZgDV0<H7&wxx}{nENWK|Q$e6_M|qv}J#C
zE!7b<+3g1fK%RdwCCPJLC#V;YPWgvKm%>H>s~*^-YpQP12fD&7^u&SCnR!>r{*0Vi
zMxZ4YU3690R8K|ezA(8KfF1;;KRLO?T62n^@1pC^yOKlKlKW3<dr#vmjiH!YtTsk|
z6GU_&bsBb^XV_X>KIwPbVHT1<ZJLi<fQrEY5e3RtR%Toj60wbAJP&Sw=1d9IWq!qF
zaYuIAU1DTD(Ve)~sAnGtIc)qrd_}bEiAEKo=6lwSyuqkW(oRyCol`5d%Ym*Fjlq2K
z0+F0^Vi@hLekAt365Fhz#)|y0rjwDpmW?tU0q<R+-NAUmq9@Z+81{%3opY^QD)ymY
zW%<m%V+1Agp;!b-dn!^3I@B(LVrn2mxMNY@;AI!US4u!v<?68`ugE8FGu)#24y5x0
ztLBa>*u@dy25q95%~3MW1~>oQ3eoMA_d~@wojP8zhPD>NqHYRII(C+-ybN<CvZ|xl
zohs8deAa^BT%aaF@x!<o%Amq<?(A`K!}b^j%aj!?J?z5<RyHWvY`?k*nfP>$Iaj*M
zli<aZ-}VHeIVrk43v)qGPo(YEs52=Vh{RAJ-jV#;5hLeB&NnR$gwPWcIHmT=QO82r
zg{LAEx#WcSy6@Mu>lZ|+ClYW15<loPX9$ghLa&W8lU16ZX^VmTk)ogO<lE!=QS;qi
z;$8a1_6qNY_QaH_e{0ZpqMQqLQOBMF4kZs&zKzVn1=lnWK|%tY!WZL3bI~w8Chtw(
z`?Y|H>MH{9D<_ceBo>_1E6a)6F)A$q7Ou*$0mlPuTjP%RIF$gu5E5&;bLB3v5yT2F
z|6j0+CW0*u!!%ZuWy<egCC{4~UH9O&zp1f8im9fD&B|nKxMZfNn4G;bDvs5C<-!YQ
zm=$dVY<V*mjG)^|^+e@*)9>0dVcNsY9O+VSt+a+*UFh_syglGqHSI0R5S%(OoKR&W
zmh6iOwUzo3{EU_;7v6YC>(CVtKlnbtihCj7m`m(xKS)<Z&O?vJz2nc(Uw<h(*efGH
zzf$!Wy;uhy*Y%prSPTwG4aa3ocPCtR$6C%@G{Fs#3Ycg@*^67a<Lp=0m*i<_<GD6{
zD=3AMq$kZ(Q>LB%RtHt=9*lGOoNx?LxkVL&Jr3=06v9{(Vn5Furm8$UEwa9<zjCn1
z;8f(q?65HMf(sNMs{_sV7GM<pQ*T^Cc1tS`btSgcGz!FM=BUYyz~tVXrl)3MoO&ch
z1V=gO^a{R^qJILuWJJ3jP-YTx5STCe^d-`VPQHW^Svh7|)ePvz85`3UpFr=}iX#$C
zJkU(`Ci-Mn%D2K%CpNBO4Ue)Q&+ZPDh-|_d+wjUZh^>U{J~(THJZQR&{<IF#%*m!&
zjah6G=~5}~=xC^VsO~=sc5ZyPVI&E4viR2$*s;V~7k`^|6`Rt+&(*$7=(hI&88`ZW
zKx8ooxPrA0Lw=7=rWm4X$G=7?h<g*~O)wU4m8(ksNyAk@*FzxegH+ulc;4|_vT-lc
z;t8s}<UDT>Ji1sc@7EKAc*L-(HwnA`xucKG?u644w~l&$uQbEM`b>W;_3orWb+`I}
zd-$duqAmn-B$lBP^&OXmz1ar)cR9il)1c$fc}q9UA#~>tklvqx8iRV8J?Z$ailC0&
zZQaDWzW~{K`+oc%pw)jWM8pa5?XBNbGHLK1KluM=6{7!M91^y-b1}5E```Gu8sz~y
zR3;?f9h*do5J)WvOJG48g|-2CI}yt9@8ZgED6*&py|OL}CaHszi`XR^eQ*@0NHD^F
zapKV|1vb?xdi3yrSsD0!90cBeAD_3V{jfIb`^H5<padxmnxm-!Wdcx_k1g0?$qKBA
z`w>PpNe)7h369U~Es|DGEg64oVO0>89i-UVlnhes)lY%wo>JlBVw&98lexKMCfUo-
zomaSto4WLcONz>-U7&(2=KD2)0lSM#>bloe6+TIFa>bt4SP(0(+=aIRF;1T(XPXJ!
z=pHR(`DWcBCYIIVusN)W(n@@MEyFNj7dEu0dF1{Rvra1?ecGoR_58_?Y&n2vZqOPt
znkU?67u%C#Gndbj_X{3;maw0Ejn^i{ve`X>=LK@IWKSs<rz<HOH<zT-<zX-cs|q5L
zd&jodO8FU2<Dj8t)_d1CF)zH%yasYv4O6+{=${@@3Z~sd3C3da4LCmGP><hjkU>2V
zHa&IhzyQxjk1^CO(>kWv6Mt8trjyI3wOXbbQkqk*r**9%?m9U-6Bn+LJ(})W%^*~2
z(}B~B{_Ns*q#F!Y#2dz;(f=m>{SV@-vHuIM4C2R+d-xweRQ_i>Ld4U~(ALse!O+RY
z(#6u=?mx)0|6c<z8Posf2!E-gsiJ-HYCO_)6_HSfSrQk}QPM{Ff;FNsDH<rkq0@Lz
zA#{Otu^e8~1{q~OEJyWfWVp)py*ihHRycEiY~>vRMV_W<-yksJ(Wcfj-u`x7_qq!_
zZ64P7`@Mnv#`P;7wtM)g7;k~pg9WwEIupVe(L~%GNBqJ)=ExuNgWdsd@>(>Gqxdol
z&mN)ydHx6e#Py~iB~l^|w4kUAz(Q%4Qd|cC1<wm<lL@1rQ#_FreV88W&J@zXQRk$d
z3lFj{X>Jj|2JWM$6h9tHx*VD=aqiby35+<|v<DeEt#-7j3Z0V;0EtQ$AE~v7r_&N|
zHE9D|mhEbdOWU2bYmybO05@}6Fq6ks*d2i-8=<c5e3TecKMi{ki~S!;nLrZGH?G`>
zOfvgiPZ@oJ*zm$B+w`?K0ZuAj@S-b`Bxl8fv%YrBit{(SdKB`a+f390r?7UzI*D1P
zTA&e?%dlf}^05T|M)4ueNxUp}Su>U8czQZgLNVh(g5cOnEGKZ+zzmP(Fq>7}D@HMC
zfA26IiB3mYsxms+1vgH1Q`Mwzv)h&>E;r@@x@y35k)+lu+{8k+N)wK4L@uK-70<b0
z$u5V6PQNQ7oLnn7PGL;4t_ybzT)5s_X_*!{&N_yUQ)Nb7V$O#_cG;CWrLq;6O~$~&
zDzIQ>f1T|So@J2ic1o2%6;Aaf@s^*_pehTABs^(6rWy?%OwzNdkYtwGf3}z%f_<2-
z!WbG61bL}gkExEXGsHUP3UR#$IVKBneSm>oV*q(g<v~cuXhQ|k7KC`Lvg|a6?nCxc
zV;!nDFxEa@@7vK^gw2PYr0|!MBq@Z`6=7h9w%eitm+guh;@jE_k!PqjaGHMnEHsez
zEH!{HL-UNY%NU}5Kjid_@)=_h?oBO4AG;Dp%OTCsTupp}H1T=`<13z1C~{8m2LCUW
z2Lv*xm4i+jt!I=^au{w?*_r<CfhyXM`2;3eSI%DZ2>wy_JDpq*&7@fiM?Tqt<v)1^
z+mJTSN*Jw)Q$7ZdNa~!WJ!mzZ*ucn>g;Yb`Dt2`K47KEhV?#~9U?J5OR<>a^tH`vK
zE=P2A9?gO%EI1HZv!i@5!6q&pO1WwyM|Fz))A~v(6#YR3jK3igPhEx*7zoM?tXl&s
z<S(G%-=)}9ytWVU2rQI|nAEYOfMJo^r2OKfqVQ3Y9y8v*Y~F^NnaD-l*m{~hxa#$d
zEddx|7KH2gyt^2A)T@u6alm=Bl1YwV(JpP6b~G){fzq2tx08n`Z9V%bt!W{~mREuu
zt*-yVdbK!qO)Ru%a}b}uTxf{OTtD+@MeucbF9vSlG*nYAF<l7h%69d5rMDOr!xhC4
zk!&`eXfzgi0e*#w*f_weU)V%}6ke+yN^9X}{UychV)C#gtm2((Y719o9T}zi{RH_w
zApBtvDvS?T`T=?S1fvmqaQ-B7yWIg&k`SA|B4K0|&FZ}UVY>~tgS_mj>eGhKwj+3z
zK%Q_q*flLk`TJI_T~I54#fV${tw@|APdNS)=CuT=Pc8C4mn_IpV7+8BNhksqR+wab
zC~kX<cD*hbVnNVJ(F4zK8XgC@pTKiH!HY1<zn++_8QCu^9v}>fw+zTNbE`>KzbX(8
zLKGf3D!I4`7uXrP#0^7OlScTWmvvE6erD<8R}roTEGJbNrG%TAclmSqbZvOj)%}~@
zukc>mQ<UEo_YFMgM2Wg)&;uN93ax;mhV-Kk+9h7*k<iwnEUY@9F1E*BEcMzSm<xHu
zIV5T1{rI(>fwz*RS*^?&rw*+yFwwRT6_>b0Fm-a1H@h#;2l}-R`o6h;f1_WVr4UdQ
zeTWsy*~t~>+<k%||0CK1=D5RhkC^M%b*<AwdxZl^nT{UAYkgihK5G;1j())BKd%{m
zN$6CuQT><y%P(i^pCrWn-AwXl`0+#NfA(ScKkUo@>LvYO$AuOxSRWnLj4yv+u{(1I
zo$V`$uR%ZmomHtkrKPQ9QnPFprsPJVlBK<wUDBqU*&weLKp5h<fbveIf`n2^QIP6@
zj9dbwg_7uRXVu%1=r@1O6jR+@*H-Y+|8GvlOb8FK*vP+cYpmnc=agsn>FaaXedZxl
zfbWGh;P{S@aPS$I5b~z2<QduTa%9)?*_-ea<m+rmU_X!TeK<O=a?JmBAN^fsXx4Y`
zwvPWD*RMe9vj?qjxQhS%r(enbs$TbrF!twS_}Ab?<)J<6dt2D<D<f>*;*EgbdmUoa
ztvCGF=@9SxK(v6Dqf(^6@er<uvhRCRSX=p@00xqCrK2S11F96GjI$x1_gWR!_qAVo
zY$G&Eh4lQgQWZ&O{LiL|IQ*Jyc|{XwvZb%}N&wc3QV|*TD;CxaNeGiv#D7k;ILfv3
zP12@uQzbjh@{yeKd_B^-)KbSj9lw*@)5d)?_k@#ZRX#9Zij^vTl<3&_D^f<zt#c^+
z2(?Tj&+AqA=<Wed|C9>!qL%j1OB$6<Dt9owj!EjOmGtQ!rFtl;eA{(WDfw2C0p2Dx
zoz;s+D1?ucY2c<kI9-BCkZj%aG_rc@<(>9-05jQ4-?d-bOQuy6beYXC6txY5YCbjj
zosHxuSNFKVV)?rdPq+gTCUXJ?aAMQ>eSFA-GQ%8Vg<xVnuFGzLWyC0~5U0%=hD~%#
zbJk*Au{%iG04)*_(&p_5t#iKRv&biHrh$!Y=xVTT@R)YwsE3jhNpP7jF&Yyy`Bfz7
zG58r2T{_CBOLSUl1=)cq9bE+Y(tlhE=@D$vy04nwvFvhm^&lJ)uwXG2zM2ZXu2;Am
z^|}%<a$Pn3wTNGZDhCRIkA7t>3V(UZW@Fql10`6?`O_5#YTq{~>s)XfJ}wR&oek#P
zB0SLz8BYRifB@(DV39Qsv1A8DGtM0g#SSidu&2>?e6eo*e{c`l1^Hgc79iq01Bth_
z;*6I1#cf>O91H02TKoK=$G)Q=LxjwOzJ6t~In64+1{#I-E<K#0S;VMtqs0qLN&EF9
z0_c|oGI97cW7%J)ro}k1l<d03O;cKnX@oyPSnR;5X8SVWb-BQ(Dml%>RH5XjwM>M^
zBSw<jj06L56K&=n`%ORp^)z@aK@E3JSrknOa-dyqvmJuC_~CL{TQe$2w1U}uoGn*Z
zV6QSK$4KfrlGgVvV>Pc2o=mW#P&;Q)TxGggI8ixgP~=Pr%Bn+54YZ>}kFf$#cyBRd
z3j6YxOY5}_`oNZJ1zCRA)?#~;gS(hP9HG`_2E!MhhQ3@*k8fEEEB-qy7P_e!U)3~R
zjYIQmey{PF)W{T`og~1xj@R-&{QP@~KrMmqN<S{&ID?Z+4N;c@WJYR~kPbRFSL@LP
z+K$-=^I5b|Y-iduMaNlY&e@qB8IE1L>;|B>Rd_t{z!fT@Dn#Q?#eT4(vQj__V-y&=
zk$DNL#lXFq42)t4eB^{q!0dDI$mMj8B1LG!fN#bs43fo_afbYf&?feeMpvd^0{a$-
z)8SuE5|Fz+a{`Mj7;tCYXW1(mf!t_C`&2`i4VWi49*$d7G+TGt1f~%8;#93CR}*G*
zx~>@pkb(;dP2cFz`sHb5&y&f_m|?O0?rG|gpUEbmykeGMqh}AP9PAugW|V8D@dapg
zbfL=S8;tI9QT%_L0DvuddTDnKv3Lp|%}W?N-is1U&Q%Mr4zKVD7PC=zkwU6-7sh6v
z_x&zDFiP=|I-0}?NSRR1MBSaJb~#`%X^<d3!{JXE<xU;?F7T#o;5lA~>)W)jD@Y1l
zr0pu+LMl|~f@jFOY}yk5J@azKG#&{m`hL_iAR7gGb3H_A<UmXVVySz|=t4-%KIc3~
zY1pX!Y~dT@62$$I_ug&NO;#wK_h4-VY?ObzKuH8CHPIbJnKw~LqX~u6Vu?#;pqy<<
zg;yHW`v!8awbMg$<xdKBNGU*Fuo1xEf~gUqHUom!D|inD2ZFkRRNg;wXv51bcbL;d
za<xVT2bp@3^G)NswG<pnY0F=AlNv(rbfv`VkXbr#TB4PYD!1HgC{}@Y{GNR{4<IOt
zf~M8T48tUyIt(0VcY-f}=sBp-c8VvbouVlSR~&16hG+8I&hHdo(&^hK#xlBK$w77P
z(s+_b-OoKhcju1jJF;lMQ`{nq(-dHka?akRs*=6pjPBI{kuk74XkC7)Jd|qORXxt6
z9(i0C@nbzUprI~5ojULiyCWX1V)C0ztV{4`7U+R73%v8}5K8-*Lf!_vgLp9tyv?51
zObU15B0;RG=q;X1J4TT0Eu191E2q$3KDL&+Gw#Go8FPK<<dwO@o=(O^*~;a@@)Jt4
zr}Rx^9tN$KV}rMLl1cluJRaGaN1Yx`{Tizgdd@!hpq?Okj&9PZVJoUnrkCIrG(C4w
zq<HwJ&{8)y(VkiI?5>%4!qL+VZ3r!K)WuT`xJEPE;DgnJrQ2Jg^3R41QSqm~sEK{3
zY}V4_<{iFShz>J!<(2h^#!CPi(4D)hLtFDKgno9?9m4uEdl7Y@GAQHGR`_BxZEiFu
zeRDJwVnFlCNYglG<EmpJ@XakM^54Ur**ShJ1>~U_Bh3^hh8h%XYFt;X?9gg%PnQ4=
z*IDu(&t*d0k*-1j#HwvVtw?@fk9!_=ggKq9lbz>K&Nwj4EXo{kf~KtDS$Smek9^Gt
zvAWu2(`L=PD?8T;b#&g=?Ic}K^Vlt!xerM&2%5GLJenG|VoQaJtw8>t{@KE=YBWIT
z%mEjkloCFZmY9~*MAtUg6LRd#YDh5D#>%<B*C-kKemFdnoyE%4FhywwYk*8H8{bSC
zHjC?Y+|o))%x!J^MD3u{z#u!RgSe`Gh}OHHGE=&tzZtK;B)&!GU>(w@tb;>DUeWm2
z41obY>u1)L7rVj6TlIV+jCL;#764Bb9|=PFRZk_~o}(UX^2EZ7C~7m2KVeHVUmr$j
z;4Ep<iH5zimi(Y?WQs_*w$aKw7pPmQ$#I|K7|c3Ex3Q+DdX~BFu4;&PFp8q;sm{)Q
z%x$rS(ON5$j__eiFhma&E(cxKIG~{RMf+=)Z@F}G)n4Ir?D({+xa1|5=LeNJ3uE^8
zC)LikXmeZt2j!IrKD!VCp*(_!Ea^<_umPFX>9Fnf-h74to!dW+u4bJo{`4_EO=*5y
z%J!Is$SP7CV030fy7Q;1!3}gO<~VzYH4Qn;!dP2Ss{j~S?brZ|@`3i^v2>?GC-1eU
zd~HNGORQt3#CwS|OO?**ABT#H1#yqh?#zyV>J6MMNKwNw+NRop(x|D}h>qs|h9HGI
zr9FurcI+jlFp_VJqwb(Fk3c^nW{+`4@3(c6PbRL8$0-q)*R$6c<lJiyFPRFdSS6Yj
zUdBHXIY)96{7%Pe@l>7F3&(Rys7*A2rF~|<n%raf{c|%}rnr8p_VuzeA1uvBB@r{D
z7v5`O6h6mLE{jQK!AR*9xn4TWcT(HGmr?JOTCk$1_NB30aJag)y8pG?TA5$is_*Tx
zbRT=2`FryB<dI!a_LJGDIe(eDdvsp6qgVu+96f6}^k-(iMv6wY)Hy8tnF`;!#VDwv
z^&bY6r@<y%;7##s{Lh(_5q~kJjB+{jJpL6;wdP2!sP6i|)kd7UNack#VTb*#kFDFX
zHTc_nU0tj87M6`nS&fdyb-|sHqkH+-YJKk*&8|}T7jr6UF=I`CMFGXxSo=F$^PL^G
zZ?CB*ql@{Q(@(atx<MT&)5aA?EzDVK8C=NeJ6n|>mVL<9O;6Xk^}dtarDoKR3m(Pf
zPIEErj5^|1uS&i;;nR+(a~oUB%Ui8f*3jBnUKW0&Sa_;SrJ+|-NxCeiJDMu9Fv;iP
zFsIcXxz+~7$3>uEP{JRJMCYdvX!}yVt~YwaZ^P%A#N&$mvcHqn^R_!dMl62E$g||=
zMUWawXGDN;N=T~TYMyReat)VNz37BkKfBP?7O~DKp&eD1*!VY@VTrPhF$VgaQPId5
z8KG#M_mDyBTVF9s<pu7vaz;u@%G5q_5>E0BahCPAcRyp5{sE&%m3P?)XZnYoyj&(K
zL$jKgo=fM7#gdrr$xs?^pcc;??A!sk^SVYJ7Gv!C#b$$GPB6Ijin6RWbJ4w&Mx32?
zeplNVMK(h?3L)&_-eV7b?_HUsp`dr*BWGkdlkcRD65YN8FfONtEUT`4O7$iz0^x0S
zw9n@WnD((AFljcTzFwRkT|vl>msX6FqZP$WEx^UR@5R`xhu&;};F+6wL(qW)<2yU_
zC3Ks=^u9p`ztIH0<&@!_euMV&MgDlU)_A7!?uq*FfG6&rX-xtTa5`Uh*`9Wpf4lMX
z;PS%c54cbqY@jJW{09!Ao9g(I^-RPs(x$fes~wqRej?==d2Ck*TKX1+Uk0Cd=^v-v
zh&Xh%Z1d!p18!gg7~=k6dkzt2Musd%S&;qAyL<fz(Erz4-c<AUh+ee91BR$K+g8m!
z0es%vjU;^Df<s-{5Dq_lcVI-=>>fXEsHnz72`6QD-Z_83ESE-F){862rhRFV6&kyh
zb_v}_cl%C}g4(K@a+e4AXRa1*pmbQh3)UnCqVWszRNYgIfvJ3lITR+rtS7x1J|H^Y
zd2VW`Tq>|&6jHcHTu>CiH&$;2nvk7)b5PWrDmPhE*s|P%jJf`VP}v>@bv6!ej9Yq(
zQzn3N>4?;1(c}5>3L2(?e=QBHNqLx|hScGsTa|#dFJ|Cb+Y#2kDrh5Veqei;mwkx4
z^ZixB1KV`#$t%HkPW1-+Op?dGA&+=VHFzz4!<c8$2nS<#!mA(jT9>)H-pf!Zwz@a@
z4CAW0)A?h>UJ*I|sz%&KoeyUIX1NBZceHP;yM-$b1&`bepPAdJ4OALUHYUHfjDjPI
zjVs8KD<O6GoVePP*0_RMdvnvXCm3gE!lIKWx;NECt${4mwB%MPw625F=_V9f&!b!F
z%X?>#D??4bRCQ?88DmemkGBNLExCU6`AYF_)f-{RIr*t&tX^)-UN;s#t_$<tZ-Q}U
z)!XW&InW>em>{UA0SjShz2{eae#NPy4c=hsp?Lk<1JQ_Lawk{h30W)U#zDRnu5vN4
zXzcOG(3%$(TU<yga`PjcAd{6h3chLD-QcO*PU0Dk(9>-Zw%&#$PvGQYI~l`>Y+2;#
z-0QtVOar$}cWgeS{JasV%k82J;%q`suWWV>QEu)p>74u+%ycU~lOKM07=9TTpCAMS
zh_(!uByF@r)?`$x!q6;~&Jh%w6Z3MLKUN%TFV@}I)~_XJia;L~ah>b)a!F2x{gl#a
z8uNN3btz1+Ml$w2%)$;cuvIp7-JI?{<O>?k(CFKAC)_)oAP%pwYu_|$0dm?-xZ;&B
z`XoE84F<A2_kOILquaU6cIyb4O&bkX>!veJi)@j(jX5?BWH)rC56C(*fn8cTao47v
z_0L$fNL$f-K5KMKCWuY?9lxMSBX^9U&>E=9vp;xKxtp?p>rYNWDzkBoy^EE_CzYz@
z=L~~!r1snD-rTUuEKdPCdf%WMq`J8s0prFmF1mlP8y9KKr&UK$`b?RAvSuE!?0z}?
zpYR`V+M8K47CG7tkh>&hXie@s2{-sWpL&gapl?9%F{oqFTV1dA@yA5e*z?UN=H0j^
z*%vc*-m<wDic%{N!J%mTLh&JKvJZj!fcDuH-7}i-oj!T{9vL65OGjA8sw4U;H>cK-
z<{c$x(0k=;XR-RaF}b&x%yZ&Pd6Hq?X{~*ib(YP?r|Stz|6#O~0;z10p<J?q-SLA2
zQ+Dt2L385Is491lVOUQv=?bgV+`;!M){p%!u>Wrp=lRP{%^L2<4+gRyKfdK8KkS{%
z84Mi^jV(+WEG*r=Z^ni$hBo%*47P?2PNpWV#-<FCcHfnHLmNx4Z@Zs^>;D__oz{eQ
zQCY_R{IqeuyAvV`=_eBUW83)05R(E*2nt0)NMcR*+hHmJNx^bB1s=JAUSe0Grmd~r
zqSaubtv&B1AyehHxUy2Krro8f-L>Z1v{L!d_cSeImMG~lF~69b)N$SW_WAGgqQ~>K
zMw}3=KK;?cVt5<p@4y(V&2SG++yOtAj}F4vkwYII0n&2_SY9$?-ogzLf6YFNhr~=A
zEHCX%L>P&WTCcwx0#hI=juhFk-h-tF+D9Vb^!Nf3<_3#^p&#|R5KJEciQlVD;;*?!
z^YToci{>FUB(rtZzj}0k;pCx(usg>Hwwt^*i?*lz+=|yrdZQb7ZCO6}M1E+_rgHGp
z$wLnC+8suCAda(g2od>Y41I?RFOWji;~}?aj*}`)6@PGgw;7G{i3!SJ$tg`Q5TOs)
zSBLPx?xXLwzlrI$Kc2iM>!$(jTfO=Du6Xbz>Gaanuh069?muaM;*a_Kleo+9o-5=a
z%%FV;|Mpt9adsQ&@{DnaEu=f8H-Mot%24Nc6Pl}Kz%5B(X%9ACc3aoh%%f`lgaf^C
z^~RoKkpG21e=5@Ll_Z2bsveoSJEyd3(LLa29WvYd0D^lOGECOhw%(%dtc_3Du6v{=
zA(X3lMhmHt*m8FLbc)!Z1tO#{RXc95E!(39BO7Eb;wAiwglppy#3mGryks^b@rLE=
zU<Ei7toU?N{xbeUW<favRs__0Qaw+S*^YrhU03Q)eR21-KmEqa4B{vxQ!Qgb`ELsk
zH0KexL>*3JJmj`R{D%SikGO#d?$)fAur5o<@rKU#Byd}%F%}taew}ojPhnc58<nLC
z+d^WTRux>k2-hc^oJD5gg&>1LaNb3Kx~9sJBO!+<{2!=z^?in%$>rnz+@^kRE9UKP
z??p>oW9p6yLQCa)LB?nyt1{umN54nKF35@Iv6>5!N+vSS%o0;$*FC)yeG8~B|LC6b
zJ-qsw1q@*3Y300%<+cJWYlITJC<U#;U3o-*b4`VD;vFWJp2p=WkoZ^QH`CFGcMVmg
zzn&vUW&_$2g$%8iZvNV{3Q5rhs;hFnr1e{8TS9!&-E{W*gYWG9VL}ue3gg(wekf_v
zDS8z3)&lr)a8J3K4dXtBK9^94Dq^ssX0(wv#ISQdX7PnoOYj)JXs{i2PxRU_renvA
z-zC*j`KzrNT2F4fk=e+SFurm^&*342$von*kmx!E4E1-0#ApoKGJ;<DVT+h=L`ncJ
zYb+xcYTBQR?{qS2mjMOyTrQ@nvHe+~_R!gA*(^7SL@3Fk<M#ht!DB4k*T^zI`kVx_
zsNeHAD~-v^_^a0EwSu<|>n0)-n-nJUSBkYN^NX;x!A&ocg*&+23=qq}a_%I~j{XSu
z;;~ZCNtOxGQw;$Q;N4syz^v{y_a&B~#-kjxl!mSP>T0xv)SLQL-Za$k^_pRE%9N2~
zPeqCs!Kqg)Nm7u^HYYklMA=)#q>9n>2;tnZ6<5+JfGSc+u9-q|gTo-&b<p9l1Q|M9
zxaX`Xa;U7bWs8=baxCRCWf?Azza(?a=D$HQ+|vaeA(m1V#p0^Y1F+=G?jmKLM~xk}
z6RA=P9dhU@lIgr_(<^Ro?^n!vW#Yjy`Ly|&tBrd*jUc(Ey-<@{qW8xNHAz{r<5CQB
zmc(=iaCM7DT7`>BT4Ln{y5uA(>k_Dv4ik?uIjM^<IlL-Gi&EUuV=bJt7AbO7K%1PE
zIjtNk%bbu()aG36B@k~LMv@#nf9@P%T$jJ!#I)u}oV64dQ??wO3dyqRr72C3GP)&_
z@)YmH0qycdwT;q6w++E^JW&fruLa98xNzNOZ7a80zAs!fx1gCtpik#-aa8uQ$hHpK
zJYXyZo)R6oZjkqYVjB~7k%fiWfe;HIS;S%8cDH?@xQ%YB7M^y=8J|^Sk2R;wEi4#Q
zyL|{Ym1`F@HI{CFk~%f>Ry=A=rX{UvCmmoe<fuQV+s8Gs#v<pKXxz6iJWa&XsdMZm
zFL5W)94BhULSlycM2e>rlU)$If~|nP5Ck|~71`7szhCMUAQv>WXiGSS&k{?%t#eqX
zDjBMQqu@<#N|}_S*cpw{SdYMN1x-VwbsSh%sJT^j<(_D_T=zNnHPuv7_1!+YkwaK&
zrpO9klo_4zpr?{VFJ7@1A&IiK%(gZrVMWnA+>!w`$>**9L7G>MLLkb?N?U|Xxwf%1
zkH=6VZyzhIlEW0jZ1M;E4m1lsBB+fyl+)n3YkJxS2y1&L&_9*hR(6|si_4_!$$(!F
zN)Q+EfSzHSsOp=vKD4iwjq&$((N9i9RJye)8}Y5gPfx~3bmftZS^NX5>9;jxoaeby
zcuy-U*t{2&*h>XTFa7t+cI=oVTvA(N>O7g-nbYFxdhy6wyT0CCYD&`vJ#@WcfTKi4
zipC7)BbS{gSX=;2y0gG^z^eWsk%vE@-y*{44bFp*K<T{T_?$S>#yXuGSxC!Z&mAj~
z<jGh`+nF_utH1zk8MM;IdVM$C_Ow`weS{Q|(!uIWmRRx{!stW8F>aTgG<~u%Rrb21
z2$Ko_6r%A;JpF{We6!7YzxrnoL&*59=1JWUW#y~_dE}O%ta8}+mD4<k)DTTd6)6Z^
zpfp6p=_+p9e9tfE1pf~Ys9AKI)3DYs=vQd!T|zzbD&qn_!&B+!^)V1pQCUzaquuFi
zwzNCMccuZzEaoKM-=p$97R+KhNM5yQ1&D9hE`S5yf;W~)?GL9tVOX>wcn+@?i<2im
zmh76xQiV;4xWe8t1_Tq@b}ubc=nTB_@0DaX1ce7o+{Yd5O54YiO`4Vk5>w58v`PME
z&w{_jeMtV?xoah_Wpc%y?e*nwE^CaNE*g+<A8+l-oe2I)XwMC1!TQ_aC1xGT_)FNE
z??L|$<&YRFD3MIcFaO-*+d6D*kCISxJ3kZwXBxSHxO+0#8W;(B`av}USJ)q$^*MPj
zYk@g%<O!=cEYh{A!+%uQqc1@~Ghg=t!PufCDj*_s*Wk<7J2>N^^Jeey{Pgmnt|L+T
zr!E^`xVo6b@IH_47~=g4G2=#~jA{y_ycFkP`N9T4a?z*I9jBc<$@91{8`lm!YeGWv
z06}TD&KT0frqoRydqP-JrR$CvQdmW?=9*LsmUk<2iS*vQ%7kOu8$z;26BQmPUZM=6
zo=$OwIs#E{=<x!9(7AZ#atu*fC%;(FMnvT81f<+@`pM+=N?16;7W8D|45PBS-xyOI
zc#Q6q*=Ej%EIV$X%;j#I4;GEnd#Co%<c%7XDQ_g>iqco(vAYPwGzH9<k6EJ3M;LiN
zxx%=ZruYIq<0m07-BDj=-uaWI`Lm9ygKog7`LqNo-ME}{HlU}jMO(`HHTRZ`93~9~
zA+lTx#NE$P@-d^0rub$no(v^1-0*lj-?37;G}}sOixs#>j)dUMwIUG8iU=n)8__We
zhlHwKPDb`B9-z!L4$%-jVU~+;9C!A0w50$2`D)uF%j^DHXl*k@h{wEh4|d<3Y_%0<
zdOUD(Y2@>z9OGBHbtZnlbOosv|8q-kBptpUg*@6BLCkQnE~pbsuoXXfgGN7g-;Tm?
zK;1nxtM0(2H$wdQXibS5PKCItNL>5tmt8*aJv$V4A~wqcB5uxVOUBwU`mTud5`MM`
zoMKYBUqxzupxdRiFNl0&tamXo-atT_BK>Rn%;*JArbEgQs?^$$v;I$M{TA`coUkZu
zgT5F|3mI>+xh+8Wn0Bg}^SvBf(WU9vsS+tdeHd4D0lGcLD#3}BVpm9oZe`)_EkzW>
z4!J{y8wv-J{+?4H4R~!c21G4Nd*)aABCZ1zo)^4!BSwx0veVDNAn<E*t8AUg`7FA_
zkjHv4i1+_R**kCvwsl*AnPJ<JVcWKC+qP}nwr$(CBg3}s=(t_4`m1`?qsQ&vaMsy-
z?ztw2RMnOulIGuM^kq~J$1-TNjGOGyVd5LOqRY7}@gDT7(i5`d9Ou<zhNlaZ*qXR2
zC-i5k4Yob<aL!$nSb;^x+2)%v+2|};{oX?2r5RE-fjYqfT<k@+gUDJz8SH45cS$j?
zxTRU2?}@o`BU@91bVg*akF1i+2umXYnL1!rD*m7ybhXP7)#uWSHK&Uu9MHx{Zv;Nq
zo(?-;aQHvp8O!Lwu<Uz%SP_$KYoM*fF-8krDrc*0TOZn_loKX9AuP=TZPDEP_FG55
z{NnS21>dMvhWTAHkhJjx+Yn6;zSYNgC0YEGCT=X<G=%9<cM8w=N5x4Ow~J=?mOAKV
z?i=7g514;6M^M|hFxo%u<nze?x}E%gsL;f1^#9XN-lU>o`)3Hbt6-$M6GLrYJlT*9
z90t~-iJtNfFmZ<!1id!W5_|$>l~beQ)_n46nx4Ue5&<kWwbPY|7dw>&hZk9CRjCxJ
zHC(1NAEJ9q<?AkE<^b||Bm3`WnsSY}vy$dk_EmQGarXNe$J^WcOb>28)`RwsWj~Iu
zM|_T!E2*%A<0Q>V^*cRe`^J`7Y^K;0i)$!i`|gGV+dHR_r_hw^lTK)6?!NOL0G8(N
z1_9P*Z+C{Xb8>)!vvW<%C$ES2)JE*BgYU?&&9V(qXxA+{<k0_SsPm>$Am0!@F`J!C
zI!se85wcBfyJ;dw14=yvap;CO#-y#8bXu+2gyXOv;T?(|25pgPH<3#x@smxiQvDL?
zgTcH$;Se#JZ@uJCODrOCoKB;C#1k2mn<))@kfxZTrbS+Qi~T!Epidz&;9;@CBZZO=
z3O$kDtf~W4GvV==0|F7;Z$Xi{KJ;0pQWl*@&yq%)GD>mhuiE{Wv#rP*ROqv6u$2}y
zVGJM<C(!J&H)lE*S;bYV2IoA=k8;SCi}0S!uK6emm#1F^6??7=B*wXx{bc+SGC(Hd
zPo96;r2mHN%OrbM=(Zu0i}3e_gQ%D9u_HqU*Zu~C&rEyc26~s+bKHEHHP2)rhQ`p8
zed>Fswg9)O(F#%;<aUs24XF~@<=a;Obz~QS;I@Lm?WsIkIV2IjF0vdiut0AF!+Ng=
z(<H?Ti4rnPtr1dQ<TjCnK71NINjeQ)$hf?F#Ovu$Chu|kfefc@K&%<37Pjg9_vsnU
zaRJ%Q9wz!0)ygI+sLcF2{ZZ%(9#`yoM_6LsLX6#F_VZhW#m_;e@6KdTDPcOYvNEHJ
zf;mG;S_hg=nX~m&k;@BR2^w*{w8NdtvvFp@j6UyMbU%F~O-+Hh6xE1Dw%EYsFnY;1
zeJgo0JG_Cx;V-SFWk2L0N1tW6H}Lby9)b;hmiEIeh!hOn5uCJ?V9j~(=rU6gR=@))
zQfw}4<tmIiU%ElDbAY3;g4BK*h4h&tUqK;Dav$G9o7$#^Lj_%zYVxHzQR!5CWSV{Y
zWrP>Q;sT4Ra~)Y+Hv^KvpL5O(-qT!GtCVsgDM0fH$@>02f6hX-*>v<Ys5>X0%85p!
z=#|uA9o}EirUjQ}x;JxjF)~nqlK1o{ZW<~S52~yokQKd_YLEKif|N|>ikqEIAygVl
z7hhd|z)<bC_SaSk#HKi~HJFJJ5I-GAWk6^|6R4#CE8|#$mdPd}eMplDkjvn5RByze
zvEGLBN}QGj=W^`rzcLyVoYNc$qS9WDumm#NG@DnV>6TdHABx3n=GKX|L^LGV#L*Lk
zRb7%p<gl&tGo;?gHudLOqliwi61IvIS_d$_v$A>24nHxJ*2kvCVb$Jmb6cKE;d${#
z&LoXzHq_6Fw<y)D1puL2yx}$bDWCtKHVP{@CYP1+f^Qc9#Kp><?~}lzu6J}P-lm+4
zylmhM+aNkAqz44X+%S%S&*-gDZtFz3L23oybdnw0mU>2yI#69Gzy5*F0i!#xj{N}_
zp{8D4SUp(d7<wrVKtkLj1lK<PID3Jw#2;=A8Q#H<*p*iW&gMf6_xR`|^#$wHvJ-5S
z8mGr-h;St71zl(?pd<YIuER&@_QQf2C(4skZ!m~3irc|B^*o9%(6(zglZG_17S2Bp
zZqb$=8LNVc#8GRFV#B95qq9-%LZ=aGS@SgZnSIL)Lk1{rL0H{K+{|}4==p&MKm^<U
zPQz4a+f(Ou0s2YN>flVe6%;mu>MHnZ^s%O-t3l65b?V)k&St_(nfbyGA77|SYPzsh
zYl&J2sZPl9k`rB1eVAUT&Fs&>NryY$fsBZFi`)*b0}S6k0Hcj4g%ZGa%D`!P9Rjf-
zb@~kp1*m49bgrH63TI>eD@U*Hu`3h3+#ro4r778oqkw*jn@Oj%&luMGS(lF&>`+_{
zPE`!9^`8DrPE3qN2UqB{-pu(#ax3v*4DESAF8S(E?EE5h<uGSTSPz-Qd?56%*ip9X
z$lhuco-|mHLC=Dh5;-*zpif;OX$?{r9MO&jL(g-c82Db~aW0L5*t}mBp$}z(+`f|`
zQlXK7Nw;wFguQz^V-O~l-qb_T(7m`K+#ow_zBw^kU`(^pZa%<=JQ&Ftvul5A*4+3E
zNb<^^`Y{!EX&U?8)Mt$hLeisj_>U@xr6I1iO_{9|6#Xq5X*u|SrP`cJk$SAut0T~?
z*vleJYA|N5ao=Nz72Aqv{X`nMB}xOo%A_jum08Omz9lF$^Xr9M6_CGLlvne{Ay6&V
zi+?WxfBQ}PWVHHrU{iP}YC^~P#USCQ{6u!#ISGOw<DY<E3X34(Zy2lDB}towLXitb
zT2sZ1ob`wB??%C4zs0}3+@f1C+RhQK&$4Oo<g4~DgAn*)3iNUixlZh853^VGl!YX&
z(N_D%Wn`eSb?+5pkFW?=F(=b{Xs(Y3CkE11o2g^=^j@dfKLNdQCiIh}D$4wK$}#P{
zevIv>aGdK$H_q~JDMv>mduu&QB}X&M|3?U{ux`D;3&)jFXosMgp3G0P0l{P?kXI~p
z(=Vi$2d|vaylCcnvaqZ@1XInj#wzpyL&FJ(zz_Ba_(D2Z86e8H_q!?C#dzA?WZKpE
z={=@q<kyu!RiN1<lH8zWT;&9YHl3|ffQ7o|ivDyjG~hiH4b`($sw-cB3su<~%C$WW
zoN})hS%)$lHh2X|BF!?T+bp~`!f+QgSy|;J5=W0+5=lPQl1A`k_kgVHg^=DKVj1*$
zi6j((YI3hWT4h*+x+NY?xbiVr{I#@Fi5L;8YW^r6tLS#{XKe*#vf>YP@#+f-nMM9j
zuZ=LoJbmm-BE81>U6D+Fn?Y?t54{cRP{P>m-xIz5VML0OnE_^d%*j$4eOkdk3wRMu
z>vfI!OAkIOq`02EL`7od2Re-i4^?B;qW;x@{?&GT=bhU?k$NkfyJ`gQZN#TB$4VuT
zg)oD>a`@{<!I(VGLd?aezOR{55K_y_LWQ*gzw2C)CmpW(@6Ce#*-4z+(wc+~&GL(t
zncy9G0XvfwGghgRroNlKi+&9GSG*)bEQ<67O9QWv;EJnOsW;@7whk4d{jsu_aR)~)
zamOrGH+9(PX4ziS=>%+o2N<<_-!;_QeVZbgQO`x2CkFer%jZ$6Vp<tnLojInS3F8M
zM){ZDY<n4lNWv%P(*}MI_{%rG4+53@Y^RjtgDEm<9B3qIc}#rg2oN<;n!NimeAo%6
zWf6`5gS0|-#%Bq5%<kjB`W-<-CJa2k|AQa@aWRxxouVKA;K#ckc_RJ4n?_LqJx2vw
zOSAu?zLQkKe2`4gy^UigjT!vlL`6e;lj98?>Y8@ue8B<>^6>gVN`E(IT}a1^8M`e1
z{R?zcx~6Vb)!g*N?@`*MihM|#A#2&!aJGFQ<JFz^mmmX=u*cH4`}L?R`}yN&`}_08
z;1|bj4XL}{Vm_pq-I!qA?_yP>A2A+%C78Y3Xl=o$K{Je1YJ#1#Y-Y?3KUR0NP3V-(
z_=_@<%-*`3C+K$4ts;`lz6X7FSU&o8%pRz#^033+mjL?AN**jv{H*~xjbI(HFc3}n
z4_dlQAVun=9Q;rO^CSva?j$elByQDCxea!0MH`yAaX_~jca`9`H9ZIo9L8Hnmpq6J
z(^?3_bFi0haF!^YDNn9iZ%;#?luSi<`-9I^L0r-}n8-aLA>X7lkSSTXR9RGXCq`wL
z$?2c@VKK6l=`1<LU0x*I5Ns`Bszb;pm1ewbAO&Sy@3xLdaQt>@PF<qL?{t-$tuO-p
z(>uaWm1i82J$KpF$g$-l&TrZ)X|~8%Xwtar>8{xz4W<t&E6*fdmQ0=9H5JQMbWC`m
z3LYH-x){$!vR}d9jUU_p7A!W!-Mu;z=DI@ORVh3An|;l}&LqjVgQG1w5|qp>(a}t)
zfBI0d#8u|tkN67HbiDksZ210Ge(IZ?hSW6eMfQS)7;T{u@g*6FsR+8r6*II{MT0p%
z3t|94$TU$?rEQ;eatNfT;7k}d*}j&EroO<GB(boS#Ehi8-QxV>qMjCmJfV4X$^p12
zjP%IRTqZAvE^Q8BWku}1rJSTzyt*YtPNS4W|1t!GL<VhRVHO%GVk~#2!{>$~pBhN-
zbNlQR(i2^1M}+wJ5Ptj;sLsACU1@aLfO)#Lg?8rhal_D}cPYkNb~bWRpSC%BJ?9Cs
zguYMQEptcOLUwk^y+m~0{eVNV^}>8qgaA`~OGI5cT~<>dv^Nrz&KN<ATw;VrMU2$}
z1wt%EJ&Bns7XiEZQB|R{Dr%;MIC8gusltt#r*t3SOQSz%okAZ6nrtm(j@%98hdjT;
zUy=8SuG3hfu)<RZeOv&H38^$U0^|&Nlcv?yVk3RFspv9iNyco|<wb@kn7k7e`Yoj!
zi@aUhRqvH|qyT>`QQEztsMPGRB+KO6Y=4QDD1Q;!L23CcUivMS8;uX?;cH~gVI0DT
z%KWO)mjZu^*70feBxQ)|-S$XWqzzuZxUINbGva7}H%ie~A7J!?9kk$xRn!&uhC+;4
zzfS8yMt2XZzj%nrY0+e@VX(*>{W*aqr?D|Nb^hzE1v@h@5&k{lsA9I>57ot&6fc-x
z)cZIP+<iPmjvLUiy5t7&wQhes4H0O}ASoZm*<>#Y#OvzX1N2tLK>})f2Aeb-XY$VA
z*qWYt&5hdb^?UT7QiZvxAlmcZMtgsAl&C{#yLg%jRIIYm8KpT$J3&Y^Q=Qi)G+Q5Z
z8tYi5WOYw%)r1URhtr!CFBlK|kt*84{FDU$)Z4xR3zrRm@~z0r%qJHubA)rgij3{@
zl`=^(9YW_nd4$rylo-?HHx=g5;{zr+n45e|BHUGbOjl1$Akg+-D8DC&QKc+w-phY9
zhRSLQXJ8VhrHh&Ozngf<kI%3$nKM@ysk(YS`*xJ}3r#dj6xa!Du+%mo*PT(943xH<
z6Q|%9WHAJV&v$s5#KqJ-@CVl2^tW(dp;D#+Z6z90pF*jb$=-+>9y?6GhM25IEd@EK
zB7gB*XC-y2t0xi+fzhxP?Q?reKj8P?dtvgg5(b0xNCW7{vbjvt=jI7X(sQryAvi>@
z%y4Yeaz@@_UuB0lF5&rvu8>~idW24XYLZdII~%}W^Y@~1{x&Fty6aX3JGpA}<8`@)
z2AasnAOkXliYr3V8N|475FFNImMK&P_aHJMqJ**SN_nVG7aW_0g=>{Z(B8MizHPF@
zd3vV>tw<B`OoP?!HSnG_hz6Si4lIAgso050e1T1VnL#LVUGawAjaWSqFM9=gY{89T
z5<ZgiSYM~WB@YCpz`tP~cohutcb4hH3z1$Ml*-2Dlmcl1Qi$+%Lv%u*hSd`A+E35U
zP{kCyG!hY{8YI!L#b;)BDNaLW_@I~M7*8iFiS~gtnB6rila*K$G5T}J9fOr^ys)rw
zWNsa6TO{vrLr~dak9yd!kycRdpy=MG;+9ckkNrnf<S<^CjedLTfim}2)#VysdAkpU
zINuffN`qIyr<F+(GaYhuRcApRkvzAkDHyW~*s?vK`AgO!G}xQm_i7o=;TfU%F~o)s
zy(5Uq;C^)tjbf*p`wO+UiitDk-A1SAw-n<Iyk)WdZ)2JU6xVR{2S$t=sBLZu+?lx-
zRMi}sd_bByVIAULl~j$I_-APQ9$~bag=bW(9h1{+X60M3Xa#A|=3NQb#Mt*bn1aes
zxH6W6d!TIu>oR-eAE$t+92(-}FwFv$lqdE+x#-{c5sx*8s)}7nu>m!6xav*+-fvTR
z(P0%sPo?4g55D-@L7!To;BdS7KvF3}{2BS``!MF6KssuB*Wc?gkF(#GT==jMC#1Ov
zGJAThG1$?dvR9B9rKp^$A~)1$x7e5MgV{IS>~6-$TPMzk$+E+F@HiQ?Wtxd})g*L6
ziAL-AlSleC>&Jz#vumqLyw5_Tc@kj2H3S>0O}^RZr8QSB>*(#$Bj|cSiC+0vC0Oxk
z|0{_7hjuzrH-HNJ0kKOzb~lE92OAU|ZS0Ng|HC;I{Z9bPDs6$F?hX#UVlT51;g_7j
zdze=Whh$om*m=_^)_kg5jrCU==LLovh!CF;`{frm1YL)7dVD)<sAyVRR#t<<(Z$=!
zH4e}3O1;f~pbN2sy28nNdW!K%^8Eulo)}O!PQz3fA>6igBtBBFYk^B-K#vL8jfXDU
zJ?+H<Yn=nQZ?u3V$d7C=d@}xE`A=UQyAm9+mA`D$JSL;>9vRfIWroDmc5N$87;?B>
z)oaNERuOA1g7vA;1Ux%hx4|A*DnMe(o_Jj7R<t+crwv~u4ALEIhP2F|(YmSS5;C3F
zIL(5!6B^aB4lh>mVF{{@((%HLG+}uP&jHl~9-Un@nO<&!#Zv@VZ(7H(&j53$Ex=7r
zfbff*Tsc6s?fB);OsN%$3xfD&!c6yM4HVsHxtDr!aVSxU?#YYnQu0cz4tv<B!BEoM
zhIAi4+C?@E&WhdVfacYB@r<&$-U<jgUT_QGN&RA9$ni-shMGyFVszy%F#-2h0<LBa
z(0r`8Sau?>lDzH452S7RW_hbUkgj)dTFL%FB2`uqJHKp45TQ3jKrBSMOA|z%loz*4
z<nF)-LsZkpv^(Ngp?8kdE=JyAZEReN8YtaWBLm+|t!Zdo?<JDNF>L~vvvbB;X-@22
zPHcq|lE!DGmD@<y-<Y>)t&tRO2^x!<snJLsk<mysmC;BH0wU(Yf0VR~xakB!I{A+-
zY(_oViuKM8BsQzzPzf*5=2RtnF%_%Q)7%^9;F71U;g6cQ^N>`q5SbDCBMD^);;~5X
zsFOyg6l>{Vwxoc!#et89`*0NcTI@)GEu#Z8%62+hQ;Kugs&fb@`_SLYA{3)2Fcd!j
zn<9h?@>VhKXR^USe*F^oca!~p1iukV7`Z9ySvnd056@<if|LRxADmZ$)F@~KuI>$_
zkdPQaJ-MsgV7)9UIVotV>kXr2asN(z#@b|l<_DFxqT3CAa$+**PioM3y@?AVc$oaS
zo=LmTbLW*e!~6bm%k?i~TaFMU;s#NDahjEd+WFSFVJVRPwpAlE&A)p;72K^F4azLd
zfmK)ZhAW0shvYZFMO7)_VwbigU|kSyTn_U|=Gl7rvu>S;WXfpqklIPp&o!bdoZ!tY
zfj!eKKMu<q%PYlJnkJk8u_iK1sI>akD#4&BqZQ;Y{{c*BzPNXaY@G(zto4lkwM34_
zRZY8*3bpmls|a&l$aA|L(l(4z_goY%pNxuIVw|a1o0<*{n`Lcw?4RwL5dM?}I%Hhc
z@pT!v(y~7+YWOCh403qhEc$qa$a1VQ$76p=_(z<o<*6EMSzf8TsI{vf{*kp_@J2yO
zb1B7XTNI;p+%=@OR$_lyeDtgHv{8##S}gT|Yzpo`L)%@}G(5Rh?JIT7er{{$nVXZ;
zcOvoP*}3ydXK%|;{-R6V^{ksZL;A@;WEdkov3s1DLGtcaopqGnj%@{0fzM^qfmwM&
z=h}QKbwTc9#%GQ*t?;)g#DjWlZ*F$X1?A@`_`&Ek*G`6>HjZ@crpzeC8uZ$`gifz`
z!n<f<@sr*KyrHh|GG9K3M--t)0Od~s>jjoH0yhd)!3!B~V}AYGJTT&Di&0c#y?qsB
zLVbR-G@>(z$$l|*Y>9(>(Tu34Wz&2C-xD{b%}*W6t0!i@gizx#7{_+fGOOhKE{>@P
zc!zHU-KBge{LmKOxD1{VH$-!wKH)f#y3bE-`v<+3*j@E3OLT?EoX;oVCzIqKKAS-Y
zBw46ugkZCk=lDU!;MjNI|2zTzIm;i{g+5k(jO@RD&hme0WdDD)DF0X7q2i1U0w2;B
z2nt+bbCY^gb1{pal~w_&0$5l)8th<zmFeDP9fzNsF7p8I!<o;%6k?PZ>?h!x2$NTX
z2100+d_hRov%yy9wTI2t^w{Yfov(?jtbd4ykYSx%?<W8=urfPiyp2(*l%r{4lJJh6
zM20mlT@V-g1L-_F3<we?I&gy@liyzV8O&I86jCasb9uK1U*^2udXcma^CK?teq;M~
z=_g+6vM$1FI|lhWoU2M(Vn1tQ-|r~y&RS&{{q;ow3ZAt!{~+ac0Br>k%|m+$=);WL
zT!JY-16<pNB7w7(F;bHp4z6_t0>3V{Urf71gc4qxS-VV-hoSHUtw$*RiAB8$70X1m
z+%snioJLQFJWQuL+ZE@p>MCm~J&`6&@>;IEJm1Z$Y+ovQKCex}lmRPJ^=4Owyb|y3
zmi({cZC^H;uA4F23xVUL`W2CX?f_=Koo#{0Ba$g&s}$F_n0Y|4d0;umVKR*Ts-UWG
zM|xPrDQg-^2Iwd1555C$T)c7bqpB#!JGVrgJ*V$OWiE8XsxBGRpOgIrHTuX+9McEJ
zXt7@&7WZ%D515`d`>nLGtb6B&4nD3&J*5Et7;@WjOXbcJwmFoVr8{ki9c(iEXJr0i
z4GIps5v&1!{qlhL^-J#G9g+%;dXE3b=>A_ecjZg_1z{wv4A=-XCUEe=z-F_T=q#Wj
zaM^GvQ(9R~gnl?Mzz_8a4Uw|SrOIXW6U^J1Nk{QK2F_owL$)g7oXol+*8Aw!+ErJZ
z4p&QU6kXppPguT0i4Clge<_b>bA(V(VFvpAgK&aR^yi=?j9XKOaH-W=Eq6tNCFdYH
zyC!cOjg`vN2L!IH!A&AmilDOCVUtSPVFM|Y9_#KJWrL!vp;cI6-5%b<m-DgD;-MN{
zt0CK@g8kwvA%f$SXyBeP9>Zl}-G)k%>Xc6HeG({UG_R>)PN-<~RV@VWoK0=pj=C$G
ze*J_jaXoXZtSw|iS$lMw8d-?i0w@6oTLy{Xs~NNA&})E9NFnH{o-jEDhdq602YRqw
z6%No$`}EBuM7LQmlaINNCe?+%9`<uj2N@{@Y0PNBr>hI0RI9#(>6s2x0Bf=!-MXjA
zgMdn1^pu*<J{k#Trm#C)A$W;~$(E}^m2@0tvdSF}oAZyvEQDIhh!Esdt^V{kD&M&w
zfy>|GFk~5N7X58g*kdoV#7hyr#Uk=PLi{PZvkiHNE|=xz0)<o&BgCxF)6{#?PnB6W
zKy7}7Y0=xKfCR=A(rl=UJV)_vrVBz229%l^hEBBH7^K2%xsB-S8nAu-H3JpA_l-A6
zYYm|vsI6_|j5VOxd3K+BlqeQ0&cshuP6cz1C8woUGlA><6Ok(okxVDw`zf+^j~=W=
z)QSR|j&)P|j3U0ou)oN_a<Xbg1t(x#Y~i?m>%p8vFwj;Fx<ZY&@Jvu&PxWJI%#S4&
z%UkJzLtmjoC?`kBBJ5p<v|V0jq4g5ZW|6A6Xjl*@nh)=AA85^RnfOT@4cI0e&`;k|
zwK=KH^O4erh%{=?q#^@ad!5~Uj@p)Rb1xUWHX60k$2gi8`FQJ_d=<E-)#55>%Q2?l
z;dsWlP`aSyH$d@XWFOW2U0^6hV$JGb=ZXor&nm8gevU>w`fgEwxtV15hr}ZO0&0`Z
z&*p~euh&boa>)&s8i;3tal`q^1<#4b#COzJ8w=XWK*j1xEn4)o4r7jiqgPwdm}$b8
zX`~E90Z@ruJidk<s4awv%k6S#+L-yB4ixtoX6J{_=<Ig>cisxxTLXAz7-Aa+p!5&J
z)GL-b@Y?)fY&tnq=av~3kuu8$?FsaspZlNt;8X5J@d@nLFD>-{UvB09k2|4C^;H0A
z1?xMKa*`fu3)IwNg?P?LXgE~>5J-YfdC?bn$lRUv0*02JqhWeFKfdDBtfo=Qz3AC$
zfuZ>~Uh`bG*}{8w%meqg-<RyW8$(BJZ#C2zEXVs+#xobs_VcIrHs@y#Nj8uz;#UE0
zfJzkcfjMm?B82o_Vrn#LE9Nt4E9x^@D-I2+1?-Po*R{xBWp{~Qn?PBc!Z}O%hP_i;
zavN9JCh?XS9ai|(7<l3arEb4!?@)jqJ9^Iw;_Svb=-!2NHPR(~q+@U&{?^q6dMn_0
zdB@AO$JLW}7!+AE1eVNnFa+5X2nV?!hD`SM79+EdB;QqWn8e6a-1eg}a%S|UGIB;t
zwQ)u4Y*~7N9;U6>q!nRNdYyR#Mq<V|sxmN0ZN~2Lw?T?^3FWQ#x%LRw>_cSsAI3&L
zu0vLItJy!yjr+;T%$!_B9_`H$kX#F38ib0*&@Lf(dJ1eXo;mc*rk7r7GfN5DF{{$;
zLekrG1gB1Gf1$qroYi{4!2I@8!<}s}VZfbPi@b8jicaF%xuB&4k9fvj2yvo1PD^fH
znRMTf-!y<(ZBKD1yMt!XZUJ$SXaQ}MUS|L?)x|VBFp$<M*}N8ArI!-QTi1=xuFL#Q
zRb`?{kR*Yj?lL`b(7E|L3Ri~R(0M+gwk<G7!4dS~Lbl@={S?aThD=m*$$YJNg?LxA
zld`UPTa?|Jytmz`n;yfRNRT}<5pQp}F{)a#ea05{#0y0w`7Lg-qN6zV7?tQN{pi<3
z$ZK?h7{Pv8w3Gn;R_@rS1>`;-eA^H+xRO&jF|G33!-b)$4cI9nI7JDjJ-Gs{G3%d*
zW%*kx_H<VYG$&RwkQ}ro;x+Z7)e8|NtC$j!kgG+8Zv3xPFUeswVmJ)8)3%?FBjL(O
z__CrtK@nC&1Q@)+K+v^;V$-?@)R6dLeAp1@`pOES>U5&Lla?G@0nv7DGKxZW9f&4&
zLKP$~X45h$pwUj7?pWywp`naVB36n<FD*uTxG=w0c3*3KSn7b^NjwR<EZO6tWudeD
zf)8<#^4@Tjmc)0P;^EI<@RqO{l(@l~6AC*{WKkGNQyV_vsySR5IFRRuBzY8a80-q~
z?`K5SpfV_3G&0sECeSL<zg918i7-gMMpvxQkv)=_xA$k*NV3!w;8<xRni->bFXAOW
zl0-?fR|s84c`l-+^upab`Bn*8XJEz1Q(#Epr8qF^o^+Gky0417JE+9SQ)9^GMhjV^
zPHO1+#w(vLr@Z&%35!q0#Tm6~`Et?Bll+K{4uxlvUL;ag+nK{VcQ526E_{7<vbcZ}
zNsJ`R*%Bubf4WSXCYdSHk@*PvO#kFDma%FCQ{R}`m_s`<k4TWiSuz%+$g^a>*r|rB
zd4^uqL*LA6G}P!XACP2f7f4oOF05%rU^0*LE&d<nTf`5=e<*k4o?aTI4~U-RTTJAh
zp=&f(a_<<PQdbZk!{tn;Nu>{PALU!r52fL4#Sg4+*;^UC*Pa`jWC&ytTh0D6GM%{*
zHxuA21OpmFMTyXmC^Nh#YVazR1Q0??NIvc0hje1xC*Qm?^7pJ)C4TIkQ{Q%L>V5RG
zeAUj+@^&regnKI?HSv<SP3iK&cTegC)#S1}ybS8N1(XKYgv#wdqjH%xE}e!nz1VF!
z8O;epyTx$C)11YV1&M5C_G0JaV313MlNT3C;&&Y_yvtL!vYLUz35ry^zL~-UTXfq7
z=+?A7beRWa@yQcdulXBpv32;neX-J+B`_*mh+FnTfo?rv;(ObHYCb0|We#UF@$YYc
zH$E8}woh6H(;O{#QQ}uthrzQ&Wm*y@^E~z%T!W&z)INWC(3E2gYRKlhVo-zD+t@E}
zp8}%7#l@fiAoB42ZAZ@SB0j}v7ujxWD`cU#%*?E~&#n?w%bX}|kB>JShp<D25!c*e
z;1h9eS9DGOezLvPcXQ{4d!?591K2pw2pZne1sG86(0@G&IcArWaCx<z@@?4%r3E&+
zXM;i^PK*2i3CU8xA7@UQrY@lqs)8J9g14dlRgdx|A?fFVjc+BL-Ovq-%qm%Y7klbt
z9Iwr~nbWD2me6E#$moJ<p8sOZP-@cg!LAK&!-|im|DJGo8*jp7zNT$s+q3s14X#uj
z=N!xyHOK)he=g3fuf;Y&&S+3r*mof!&Tc+ze5)1Ou}+n}d^XlB<F9d6ZRaFAZ8u^W
zHaH6e&5N8IF##+xF$CTBeca{yEg@6`15WjahL-|=FSy@MP*dkcXUyf&m79F_x9EEt
z_U_q0*l^S_E)j-E9qt^f9|ZV*Xk<P;j~Hk4IGth?AGU}i(h@keq73gY7GY1@5&ojw
zBC&rK<Vc+<QUqIBmOgDXkx6aoIcGAKcDv^STV-qPc9%U;1*`falKD)*)y@2ygR0jq
zCGzFaujsi#FlX7ZEY<#FYmA=)yVVGGM?I$YI_&q^ULR<hLFx;VrQh>d_=kK@cFdF1
zZ@)K!7F_AD`zLHc+XzP;LN%TDfE|Icc*muCaW08{#&Ez>sB#H;r)n;Sv;H{QbwVJg
zkmmCXw?ul;8%fYVtyJ>G#LNIt7GV5_0QFLUxfRP%kQy^lCi8qr?a>R7!AF7C>EKm2
z%2go5w`S2y)7Kn9w*hg36KPf0<zSg(4QA5H7`~}I0?V1Pu0C+EazM_RF-r~#A%RPn
z{Zkbqt(z4nQY##pO|gq#ZBcDqS%W?BXwbSlS!8lXu&=74BNU8z7AETn0FBHVKr|(X
z(=F(wxI;p_7EmB6t-ly7<hi!EkcW(3jZ@N0z1&~G;?QWCibT+OXnn@RzxFQb1$)_@
z!g2jvbzQ==E+^Hf-Cx4T4Nz2CYl}dHN<$PM&%+LprCsJ9#C=!3^W|MgO2mIEBGwc2
zRaVgR<|bQS8WtMOJSx-*VIAP6i3#5}Xj}ed$7J*;f$jj%a!hoh)<`}0TYnQTUrPhv
zVsg*fPVGR{w=3x_^rc+NvNJR)3Y9O$nw<miK_h~<W-3E<()Bko5|WzbZco2gMjEI!
z&=>6gj;a>o(U<XlcqNmcwg10#8vSoVz5luQ|7q5<L8OBl`L>>?{A+ExmUtKZ%kN02
zrE?IE2mwM70-{j9Vk;{u!?H<DMB66rMN*ShmJlYC0SAI^OR%(X7fvl3Yi9bxn6ugE
z^Xm(+m%-8`j&CkF$b)&l)n?aEDmgwV1O;1dRe$s(4=2ePE!(*eqt$b=E6+CMiwr!*
z0(;btW6yx-t&U{Fx^kq>o5bVtMwOY=eFfHQ)6i8;o3qn?FxGmkTUYh+CGQ0;#+@JQ
zPMbIRG5W#Pb}-3iS;1fpX=0~wK(<u5w?K6E?zO*6LZ;MtBB6jzC9o=ygXANz<2Nj*
zHOHbpuA)J7*?ci+pVAKGkZkIHU+o`Tt1s&;6K4;S{)MOr6oE<Xmw?#wRmHnAAHKRL
zz?LMW$tJ}P2YVK>4li7T!!Rx$vt1sKEoS(|)0RBc(NS2?>mcm$?x6Yrh;o?vTCB?4
zqul5bC<|12R#M#n>hZlz<I$O`2t9+XVKiryAw*6DcOBu)eXnXq2@ppTh(iaj^Orbj
zYmIVCLudD!5Ttgt>k|qqq|-E2-BzIFnT+LG5AiuoKY`^xW>S=Mzb?!#DO4XZle=fl
z!BfPHM?qggmEtoHX%6aO2-EZEFczoC&Y@q?+>5n21kUcEjuraJq&dIr>09|NlTB5m
zl0Pi)wKEWO;#+QKlq`7Ssuws1?kHX5a}!S5$XKJ!!&aa-B)eN|IqJ6hv?_@X7b;{R
zdjFS9x_@5kI#10?&p%YA0ph<NyMOlk|F^a6e<Tn67p<dm?SeRh++{GKEmld#w73(?
zi0>3hi5a0Q3I@<J7_6%g6@`T;x>l<mbr`f%ty%>|XqE5<25u%g-zX(u|HgKCn(~l2
zID3@Yob#6FBl#WJ?4n%J1P3u%?O{1HIo+A&@(<&K`F+3b^Ge}sw;{f#kt(wXOi7rx
zYx#$U*be4+-&Jg2!rYv@@913&Y6sN(K^y^9*L2i^Heof=Up=l|O;L(2nrKM_gL>GG
z3>>qF@J}noxSgi5gf;^5OL>{ODoSWtbpt6a<G%Y)r5y7lQJNM#M773rdC~<TW`*h_
z)V6(Armr>IVY$!zlnh5ibuXgJWkd4c)L@%cYdQ`3@oY*(skAKX)qa9@SdHjI88FA<
z5Y$lJc`8xAE%b@Fh9!Az_j}>3BgySZ3ydfKU9~fg%*a;>Wg%T34tE(z4Da0c*7GE&
zobr8SwG4Pys!jj9GwuGVDNV-Ys7fo=p^MCGHR-yc$(uJ{M{UP@fDUXhzh&|RlPG!J
zXpcllLQkXQbl$sFC57aq9MjwkOaYq#Fc%v2W*RO(7WLm2@aEqllsMM7fzW_NCw(@|
znJ8=t7F`l95s(j)ufcAoiE=R2LBSLl)$~TR479{(2O|O&Iifaby$6X0_(Vp0x;wAs
z`x-=Tx=NG)tY@s%Hi3C7+qw2mureojwG7!B$=Nny5YvG><=sXmDyYlQwHvn1KIa9%
z%_!jgJ1P7er)x{`{hrlRv8}L;0PmJT_e2E0SWuysu%P9QsdTK1pU5DD!xJ2=ml%jX
zg}!Yp@R^EPO<j{#jwP9FG>Gr78}~(R=9$Tng&0;Kn$;Uos2F@FFD67)8718s)oTk4
z#{1UP*r0g~g*lrq)+V4J{lb_shi)b`nh$j_E3aoxUzkCdK14@iZrh`wV89AF-P+9O
z49Es3lqKeG6*`iXTuLg%$II55gIOFufOwEyJA+IUH}Kv3yewj%v10Ix_Dx>eqP1Kl
z2QnbGoQC-$&XseTHZC4cO_k$3Bu3U-^Y*cpJ^-%k&6|5WF#_RDu~_T`bZ1uZSvKXv
zmX&JJt&z8FJ>Q&71WhNK?+nD<BaI9u^$Bl>&s|}!Enav>Xm0a(ouh(Ob@S>HSL&jR
z&xq)o3~k3B6;=v!#wAV*RimuzA2$_;RFlc{#nfPW5=9h}%-bwA>S>GjRbHBcw@+MA
za86mnVt}$O7tChRklj?WF)fdkAuP?L?J>M5ZXVjNgM(v$VZLcr_;%(%7j^QD=}qzD
zrD0p%5ikuG4I`>G8jZMo?-+3!vI;%>AR6kX9VR6js(COqa{{7(cOAd-89z6=7zUoD
z=|I!64T(`*yM*M-e2lgJcAtX(l}1QCB3PieJXb>Rewl93cv=Y;>C$&$Tr>0@bEqlZ
zK&E^QX47#id6HjG#<qS;T_9uadCNX0+tQ+TS#;UdSVS0Fir*fiQ&5=b-{v=K&#HU?
zV1lqD9}RynXZETMCbh;_Z)$Ad*>v`balTFLve+b`_2!Y%(Wri|;<AukVz&pjh#UU?
z_J#SX<p(BMCV2t}Q<D12JzYcT(brAkfVn5*w+gp@rEAaP^Q(*4moYft5<OB@+$vib
zD(iy9NO_iyp8P8J{=S%CIDkN&i7rwf7A|raV9LBPr2vMr&n}W70}%L~AJ`!?qKbTV
zfxj<~m#>)E83)*$aaztKq5Lzid>=2xwU{Z+1T%C{Afa4NOQw@34siGMJO8u&uZ+=~
zzRRqWb7PH$lMWw;3+AmrouBHF1R;q2f#~5F&6q3h6EO3p-xs<&tdm#J)G<iAb-}X@
zVBL({8?BV%FD^j_q*|GJE{T*L(XQh<ESg>TPRTMA(v*?|U`}8_5jikbNw^7xjeEj%
z>LBtx2ISkc@C#<-H7gD)k64r88%7fx;SnEK+mv7d4Ex9J0XU3kOY>TQZSNPDk#Pu$
z#o~(;%vVqtWBP*@f2yV{OoFCv8pCg3gf#Z=Y=!WzQvDpc1DCm}zo8#!SdguXa>Zjb
z29TT=zA;%JTR}$S+652D^KIfgHA2=K5@trfgy>K~S}>wOL1dAb9B=3x3Fmr`I{aqm
zXvwqnXlTVCmpVU=W48xIx0MgKh{Kz#*F`arK*9BO5QkX6PuOktHCD$*_5@n{AicBV
zujQY-#J33_o=^N>@x%9_kx1~v5m|>#V@TblkPx9B5%KaM`Mz=+7`#3yR=q(vzM}oo
z;<`hSI^vKoH2VfBP>XoLiZv8Jz^5D{Zg_^6Kd5~dbT~+$8nGYf-8hB4qTFSwg{}#v
z+<6MNwiRfi!gyq_an={t!;bwEi~(AR+1L>cv3D51-(I}J;Tbs+d!Ny`W?+fF{#z-O
zx?J8c$q#-@`*Do_OX=o6m#LJVt-O)pKjO{*+6SswxnKw(c?ZW@swFcLGb*o}&li%#
zGG;J$Hxesr)CP+uV5FPwOJL~&WB^S3)hgfq1pfN%Bk&ji?H*l{^IpKY^>--H1!aP5
za&nsUxou{n%>h;W^Zt1I*Rq_P9ZU<7C(<wovl7rv-Jeb6TX1eAJ5U#aP~0;7@Wk*L
zTbjt3I%HvtXJk*j_r%Pxz}sIL^?#FRvD#`2cJlmb(21F&%6B)6(ISSP(f!35aM7lX
zBg<E-RqXrS2OW&k)2371w6~6v@p3W}_(}{{q0Z44$j>^5?}_qKB25+#+6E?!<!Wxy
zi69<3XItVo+<GlK@@tRL4xxkZ-6U*7N-s)W{)i}=OV3xb?~#P;id9C%p-MI^^DkM|
z!(Us>LQdr#eBU=Qms14ic#IMB3uMuT2-jKLjnGzLdZ#Uv{eF8d2NREhlK5(*1&Rb1
zxC2W^^S<<fdGlf*;d<CM1l54m%tGMvF-;SaSZ$sxjf=Uxv-N@kWS(o&t6I(v-VLXj
zgJRGs%ao=w1T~=pC(HpoZM<%9A&SO{!^%4FhqdR&WY%H_apqOo#3MHwpNCS&Hlz{X
zl3GPCUm3Bo;DZkL1jQSmsYx&|;qcz1v{)!Ko@!4*eTGVdWVi&jd<^B;$1uJn9@wBg
zgm7juycOMhX1v&^0M@Z^jRXYVHhn?)Nle6+Vbtu8_s3zhJwQ5OLXuUKjlaK#y7$_=
z2v0X->z~)v73zx%E+Mse0D1M@eH(2`ix;q#rnxtl+aALT<0Kk6GkpwW4evLj;7f$5
z(~i4VtK8$3sPr@ulfhECR+o|aq*2;nN!^V!FvglFMM2L$IAT=XqO7BOtzdaTVP~we
z($2eBi*!PKr3kM6+Ii9@W+)}jEClnbaSqrRtd$TRT5ec0mehtr86XsLo)8*Ub6===
zh_r}sf>ht~=~4YjJrOb^#W{Tr`O{owacZulI!51Or8nXF`eXLv6YjVE($N@iE`O=p
zM|;n9@klCI%#&(&3fHC*pkJggNiD&#JjUgLNUdaE%q}6xau@z|-2+~=VFcTuGAG#>
zW9yWvI{Iz<8?_8p4%)56tTTcR_8Ao*1`#@vu9G(wWFN{$cMom+n$;n4_8xIdto|2D
zsSaUt5Xu)@r-V+7Pxn|3zN7D3nL#OZAUeVXw5fsXRvyFE-K4v!Cb4#FfskgN(^#ux
zKI;j2Xf}Oyr(mJqm@eu$Q;O5{0OGk)3X^yxoeP(^H=-eYKfQALhX+y#?j!u3jApBA
znP0=c)}RHoT|Z!ahEfVEze(LuwTLYi%f^)8=wE0AxO{{U?S&y_hLZ-QW3u2Bu-P4G
z5SGP-tUT=y@#KZYoM884{?BeqbMlJ;0EPvuR1Gh{hBcJbU)+vVHhZz|Nz3yOH2ji*
zU;k}O>)SM6v;+R@7w-=%PxtRuZvUi8|3_~#rQ+tQsDk{xIYyt5#zL3|#4k@ekPQ^C
z7lE5WrG|?JW<@I6l$<gtad%?4MSCC2-_+QA)nr{nzF5?(+^94$9|!=kCNFEg34vTH
zQRP&V^Y+^2Y4v!Y`MfpR>E<>j@-lId(UIvfV?V=n>~h8NzHLDAi*gX>Q`Nsid^WJ7
z&5efTdjIHi!_!xV=VT=0MlX8{u~)>FCWd>wAm;XDpYshUhI?$v%iH|v=ER$Z`*5J-
zv$WUbV>QBSgL7xZ<d)5vdu9r?)onJ0W)IKUjn(((hNS7f7(v=N9PqyC>7UR!oGakw
z9R6H!U0?Zp0l~i8wZnmVv7HSO;QsTp3yuTgg?6_a3M*4jCGyD{yszz<AahH_BfV3I
zy8X1r0RW7ZRRPS8rCv`HAO1+ahH^Ul_Lg`+?^mA~uXwmhOq3?y^a6cJ1)yKWTdNjP
z;>cEQ6K`Nv1a_1IM=CWcrnO{dqaxEoj|(};GB^&Nb`<CB1j@u!Pd(DuEt>cyI=+gk
zwm^AK3~5#+eyT2irLa7DYbq(mJaV9D-o=?)8@GOdWlu;v@{X_NE+6$g_ax4gEqZRH
zZqK`H<uPJmwi(48J&mMv0n}_NATL2`*b$GAs%tKGZcZ6rkU*>>Sy1K3{w@ZM1E*3J
zWs#Mcdr72auA{lUASOSb*wgO$!WXQd93Q#ET(xek5k~DYlKA9WLtLwCxD7KJR)`?=
zOd4#$V(coaI|RkfDp5dTDW}khP^8f%m+RVwq4JmnaBM!}Y32rHueg9fy#1tV$UA2g
zU;z!^erLUWIC)j=(&~J1T)j>xyILuP+)ct%T`2?WNFiph3HFH%VV}mIb%atUd#y6I
z4V_Y80XXD;xMlZRM^XC%p*5hFYO-7{INZ4MHFJGTAMieIO}AKbf|*I$P}_V3VOtz5
z=oAISltefWY*`*!vWaX%JTG3tagf-64|*2J;D1L|d;B&S2|f@<%DaK3Mhj77$vw6s
zSGpX>bkRlBq~fSF@Fz!3KHDT8+#@2ZaRQ`+UcQbQ=b8}TYLag&e~#MSa5h2SO+H;%
zEO$<yru2EjnJpm!qNGEgkUU{6e$CgaD|dP4Z2mA?V-yy>H7x#P&{*lZ#R_^OUD2ti
z!6Re708+B9T5JiQcnpftJ_#Qdtr*D^!_8#gbRFoUio+u@@b{xOr_iH?o6Katt4~U@
z^b@HSxdzhYFI9qwgj#qGLd^&zgynppevC$m6Ty%`=vL|e1=hnM1_bE>4SojoPF8~5
z4pySPBp06;kqS4*WDzS)cM66QwX?^Q{NCx!uckEp#dJw?pp=GsZt&;P+rRFu8sH-f
zb*ie%1CgV<fyKj*!p*m8G~G12Y?etkfFH9FIASk!84rlo*~dqJ+Phe3c7Z<>`{gO`
z_xuARS_RE6B72m~m{5#5x7I96rHT-dK+6<U=L-4+V}@;+zo^7h<C!fL51{ahNCEY<
zAVrq8w29c5{y2ArCI=IBhUSR9uzB|t=*W-J5*;k-=&$<(rF6Y8Y1t_sUnsKnv<6xL
zb2{nyr4=_Twx;aOBr^9O3HzCM@M0$32Afs;|H&}F0JsPpiCEAgVs%kk6@sI0CY8)O
z7`LF3SPjF{?!@0^S;eC@fihC-x3L?G*cN-?`r;)`=y;dxRx+M1op8M_Z%Ip2kHc$m
zF=hy>55*n(Wxp1yLI{Olq*WF`xI=`3_>@sy3YS2f(k>8Yfs%|el&;dggzFBm;><c@
z={wq4myZifVP={qHKk6;1viu0i<`>GGTFl!I9(SzfH6GCOGEJ7FI76Mebyw!?s3nB
zJmpGGKYR?p6_a#n#>O1aa{8%x^x=zdQ&xsj#Id=8(X#bd5&or!iUdJCvlzN*0CM-n
zBqT`cLx``$R3S-qvGn?hem$@F!3f3=eO(y-WPvGxP!`EZ0&8SqwULa*ik^~*Kn15@
z@^Sn$wfnF3DzRWvqBu)>kXLz>d%Cp3p`&)jh5$?F1gFDr4ueHVk;dlDwX$Rp>=qH6
zD}(<H!_C5q8FaD%@SO7eYCy|u3`8bfF@o_!mGZXnV?`AA;Ni^4in=4YF`-EoowkgX
z$<~K7bv*B1L24&Kijt<NR=;KDxr^&XkFx7>9soHW6S(ZRU_PWp6;zn~+U|B!IHAX-
zp#akA91fssIL`pmvK$X2@81RpJ7I=W{+#G0QsS`js{oe8&G5g?SMp*f$?)!n<HV8-
z6`|9Rhz*Wn*(63YmDjwuwuHZP1u%^DAgvj>k2;{9!3-XpI|C+<;5%}A(drZ+qTq3Q
zM$vBjBJ2>Oh$0#iC`s!z@1^rMBdZqc3Gf&1v-96bcGz?+br&L__ojlyievF72!Eq*
zQcn^|A8D%`VXGYZCb;z`v7L=;o*6D{r6H>v<tDspRZn)EIOtJ3(r;gmZ(NtR^^SSQ
zP&9;FDNkDGX4mwj?l)1Q8hZ!0<J?}*5lYg!7zvcFjwfpJ(i(X&t61mBMRH!D8|$Yn
z41#J(G6oHX6}Lf<*+cwXxp?4~E_TXw3Zn+p{Jw<pC=MBilsp_2sr8*7y2|~m1TNSl
z-GGTHpjOiNCPc4f;nf6c*xu9{^g6`c%PsG4{O_`3##wwftmMss<T1J*V$yjGXeu+1
z0;#>Z{BCw2hbTqnV2J1<q+9o)OOYf<BW{Bj{s_@CoO?<@%xw`myvLx?`4A(vOwk1}
zB($(3;OaT}l5XjDUyNBtP(#~l?HML^99;7b+Ur(XkIHYavYJZTVm^*rS!ws~+DtR&
zWoU<;C3R^W=c59tjVaUkCnxW9cW}fwLrVZ2b+N%CbVd{KfdUF4v{7oov|eBg{0viN
zV_XJc4bl3|pckzRq4p{Fn{eh-r$p~{#3WupQtnw~)q(A@Tr3XtcT(^R{DCJ6mhJ$>
z*}PZm_U-@#XJ)iRAocDTm+s|S4~gmS;tnjC3kvM{*`Z|3j1?x?S~5`)*|}vTsgUWJ
z**l@`M;ZD7YFv;P?$!2ZW5B!Illf0>5d98FfyZPPI=%Xux%p0U<LgBI7pUMU)bi%t
zfm#ntPBcYxpRT~L^UnihZ@8>7pF|&F%GyIbHzpsTKJeThN|Qs|gAivbyE*w--v}(b
z+x&eN_<k!x*qwOTd0%pPgM7R2Gob*tnWPA)f_1I}u3UU_7vN#M)T>;`&AK=-yi18i
z3)&(PpCWNRkS%I2P1B>Ev4}CcctW%Jp1;8D#*h(ZNy~Q-5l``CtpMY6Q3U-JrM9H<
ztxN3i9y`e9u15REn1nF^mhy`6I?Zr8gWly-ZPD^%7ZOxG(GnaWq(XDT=_OgBWr1QG
zp$qo2hLfK^{+D&bKfLd@*cZjyKN42BU%y2E9bEiBWvo9L0YAC(#%3l?Kg#X@5@xBq
z`yeWzd~Y$26)yaK%a=zYm8bKXZIneOT@HenUz1-E&+9H3Bg&$m8Z`8;qH*-1;CA7=
zZK}AczSfczD|1ruxxc@ax!V`vIwsB{HbiMZgKcxTN^_Xm_rPYk|CG`BMD1n%5D}0R
zL9@fcip7XN0pnGMC+V9g1c}g*F=H(k2$`C2WQw;<6~DE|p)or$Vr<2zXFtOmU>%-$
zA4uIju8d~q?B$15-pAwAj1sZUOc7z?9ESK(jCyp{96@xAFDqf>#2RM6$X>#``BUMl
zJwmK~W``e%>ev~Y!q`bE%XTOUX8Ff{KCCp2-XMXV-E0_(1UZXY!3=$7*wAq{?F3nM
z*KkvD`O+m`q*72=cWE}!Z8FkWYa$vZgDk!jqX|K5VXC;OH>$|S<*=nWO;S)~voT5X
zL3`d1rwr<{K0bM!WEP>V(DOW%H+MTs;%Q2J9PsZccv|;VW^I@P%fX<e$E2i2Ck-v&
zRiRK^W$waj?dW5pFI}wUxMPo%F+Kf29+F0L^c(_%)`ByF5Db)w)e43j*rmh#Lh-tM
zexg8FQPFAq(jYzmtC!?iU4E%@+O2C{WA9g}jB+w7gs%}T+H<a%hH<bS^#ZBvIZ~RE
zNvKM(Fs`TKG>Z25h$IIEWCrh~*c1d~a<MIQQ|fRxq|>E&4{84pX(_n{slx2ArPsBj
zWU=Oin7mCl{>={k&D*!+``^cXy1~3aN6<F4np?X9CpKu#v-G-x#YhO|bg{)cZNFK<
z&{Dzi!VKct0FV2br6RA5{MAB91@Pl1Q}TObndSQZ@?TA5o<b0oPj<0UBB+qdhMA|M
zxjB70EZnB)=B`QuIb=!3l*IoRW8V~IX}2wzw(Y94ZQH7J=1$wTZQHhORNA&}RNAP{
zzt0`rr*Gef^SqzFvFBK^Rz%DQ=Jb@B8an?7Y`}v8d1lPmw`1g&p$5HkgP?zo?m{Sp
z%brKzpHKz=iIjr;bSLHE=gf4;hzS8SZHSgUCA}G2TbGt);<`PQJggU3xrR$h_*!f?
z{L=3TP<HsPp^1&IuE5_gTJ2!Ze%2E~=GSuM4GYS+I?^;DsyVLytV1zuJGSkKQx#en
z@E`EkFpKQ+^JHgjQ>co=ndS^*ZT`<Ue4S6OpnrS$S-tdjg}p@9m~R;Q7;h-~3KRK;
z^mlb%TK$<{WCk=qa}QuabsOxaf)QZC5U7Tv{Y1(-96}o05I8{{Ic)U^bfK#ri?~WT
zaaoqQV6_E|P4s|uQ&;iQ(-Ih6=S(NU5u(uYR~oW>^87o{Zv3djFOYgzSzzemB1JyL
z{A@#;-?sSq5_k|)lXhi=1Lmpfy4^sI1}<q&no=6g7EOw{6@BnoM7(N~0rGDq*A`{Z
zZib?+BBX*s<&XB0E75W3Qe#G1dXd)*pF60umt2v}i%681B;iXwIFu=M8C8XsCRh8C
z;579f(s8Ox`V-QwhT$&;*f(y)Y+M4x6(P@8P6}j8C5w#o$H`i(%Dso7#_K^EnYL3#
zMl_i%n`>tAG>~$o=(|1ehpu4?F(a-UEdrgoW3M=Xt}}`qX{FB0(w2u#@6?s?i4>(}
z>vWm6M3@EO_2FXLSKEz_(ca!6szRm#^Uk;ZzsOx4y<%?+r+KwuFmIOft$qckQh9$b
zwj#D8kg8dh!X4r55egEXde2)pm0M_kGVq1&at5uh&j@l?#%M`f;(PlG;0X!f4N}=-
z!5iSMApd6c#oCyIrHF_|8T3*^mwCXHQKtXNBWF?(vOeP$;p{{DM<lteQbOOk@0n`-
zG2SAht_|4tr)bp;w@1k7S~rk)*Ojyxr}9kSDmDq%;x<~=NQ4xg1&qsKT?1tsX{d^9
zUx@$$E1rBYQ)db~6fUa)&V>j{W>BDNyijVJeEVD~`|i+E^wz4d9K%v9ZgS$*s<d)(
z!4opN0DPY>Dks69&psRG_M>5M;*W<zNXajg0?;DovbZe$0z|zczYS=Qk$GOKD)&Xz
zUCq-0{2l!Tkn>^8U-Io1Wt!Q-hTNa$bcLj$td3@HB?nQM=6sH^dc_<uT(=Z05hE%=
z?$;h&P@XWzHG?MT!r(T9Hdy=OR!cYH9*)#v_c`JC!C+ZbFAT~O{S2O5vCRQCEfbVU
zwD)a7yDVbi>g1#EkKCZ{bMUhM?kvpGrLqXP?!Khg>{oD#Fq}CU0E$Sj-o&1B9yZ^b
z`;jZOn<W~gaOPy;i~N`lHlmfoSs&H>@p@N9LcuHn^)7*QALNa}2`sc)J(;cuXnPb5
zdl=?@Rvvnt5lMT%-n{S{?f^uvSR9TqqD`T<Um>j?dP(i3Fo%r#;x1*>flGhHy)_a!
zy)Wb#vqy8;3p6gvX89f+yz5`T0apFZ16DERB~~OW?%0Up6evce&5AC@JCL^4+3^wp
zd7YAROTc{~{vz4M8S$51**WI_mGl3>SC<f@x}|T^{6BQ?f7tc)|KqEWzT^Mg_Vr&_
z`(GPUmfVbtC6vH0M>r$|ic0!mUH)-D6-kR`3l!0Bdk2O!NHy_ln|0K@FeH>Ecy55b
z5HZ6;BQ(hK(E5v^G&0ihXE;q=hJAkj{Wd^nw&CT496|78H6Lfi!t&8E1p!E;5Od7T
zgG|Caq!0~r?u|(Hk$+Gmj~M1yGE$%-+fFZTo7|u#W8d~OX#J(Nio~3`#wH$aKa)cO
ztJ#T9XK}^OasJY|uVbR#fYVWf2NP~2XRg<Yc^9(1^^y7x{UznuItHxYvz(>J(_p1s
z-wRDjX^P3gu@v42iuGjP6(A|UoeC&9q{{Te%P-lLw1yg69EzFW%~w(K9v_d>gPXBU
z<uhbLJ~e!cf0HI4M5jKQ)6x8TCpF}5RZ`hutmdH&4@&PVE-!S5$h(SRTpvi{vo~`l
z>VJO^=&3hJ*^f8scg^h}Lu-#-t#)s}VUjkT9n{YK*vm7<EiE(lB)<-@i7qq}=He}e
zN%dm(0L9SW3Pi2`)90-s7^}j7sP|*HKGv*GUR^J@Uf}f!?7Ls7jEH)mViblGEw^{9
zTt;TQr&6?buj@ljr&aQ)m<Wy>TuCTINj(G9#TV>)2AG>edySbxjxW!ZVA>G5PwoF5
zBVE6@qrUGE!vALWr2n&P+CPfIzr41J#`;Fa4kZ6us~2@}uys&&&^NUFKQ}d1O?h1L
z??ntyolI6>NBH8g62e8cz7kT|pGA@rwEh{hAcV!w`rWIrPce-;!fy#bkjfENh?^Cc
zK0+n>Y;elmtJYhkj&hqSJQUWMJ0TKx^U5lUd6^xLCtassk0+yiJwCRcfkJN0kv_4W
z4TbuQK1XMcYJQe5;P+B$soE6-Z`aCMm=?LHMzj`5yfK#US|ClOJs((uUxUp>xP!wg
zHe(H<)V$W?iC_++Yo%UwDppar$@jfCybGf84T{6?r3A{O*>UQq<h{q=4d_7uhBgE8
zVYyZG^cvxS6=l|1>dZH-MyC~r*<i5aW#juuun%P{SMqKH%+seuL(E7hFP#ET+H)9D
zuyas|a%Y2^Q__CwmqfC;%IHUnredFyi$w7JkjzN2Z%)%>y(I0)AeH5ybjr~_ZlN4O
zH||bk98<c8N~Y(k*&)rmzzvzon^sTJmp5|H*NYoFA{Sy{mpt9v(}<TlblYZA6y1{0
zO5cr1y$<M83fXc6a*gsDAH{ylr6(tvYkJ^C@kUNJ@>A}laTZUw@Jb|a*pwm<ldUIJ
za%?x7b*~rDa9M29yIDY>u?wXUibM!zAGt@@35YB(yBKbwgv6IGMNhS(f?ynHc##1(
z#ZN#_90e&-acnwJY#uo9*+}W^SMs2@YHzkRYC`x_l}iv85`_SV6gE$lODU(RtJ&iq
zqti<xiw5PFW}FGlIZS3TIxDf?B}j!BE}&J}v51?MG*hy{RZCfucu1Nw_06gVueP!|
zO|91rrWCiUOxX2XH&??c6s?IwnisRO%S}br|J-ZvIaF3hg46zHXGX{_ZVS?7$E3Jf
zRX`&&<@3yw^a$9C^3Ws5%*3k9;bZ}VW|o`F1t+ZN;2b_^Dpo|&%9b6pMvoRA#YIs`
zE#gg~ZK5M|75gPJUW3x%Ff$x9wJ_&=6d~n*kL(zllqEBCwb`9Q^RL}wpcN9>vd*e@
zn+hENhTpUiWX8xTbtlRys-LZsH7a!EaaqWW7NS+=`yRLBej!@TrXO_Jpuu<x3k$<t
z#g}>VqcvEXE~84u%XPYn6VIH$M?(FmLO%gkDIPS_w>5gIGao|E8Nc@e%5XV8Gdm^P
z*l!`tCr1plv8#l^;pO~%KwDZ6A6fEFDM(I{>)6O~8R2__K!y|!@L-?y<1)vk4jh8$
z@3HfShR?MwjEDGgp&$iookN4Q5>6&^lJ9ZFB3cU+OZc5Iv2$wtjz4k(DYq~0aEGar
zpDu@pRlDCT#bIh6vk5|sRo(W3^jUEMO|u=5b#zR6NpKe2P3DT9F7G`34{**%8sq>5
zl$c2mvAz%r8CvJH&v|wy6tBp=)Qbc*Z7hUOT;X!EU0YNO$rL-A?<0JMcHomg6BmJZ
z<aFpMfU=O=t0{;8TB#p)Pp7ta{3~*ies8rG6z^FOXETMlbx3B&bRoNz5i@>QcYDYP
zy$&Pg_z_o{1KDS6z}qj;*^c<!k+|c;CWGXVMkvV&KVULcZW$<=^3)J`Fg?PNCdhtS
zXhjXzrk>8Luj7{ZqL036g;d^=-N!Vx3ty}ELS%<lYebYPj!TQk8t4&Fc4h1Jhj<A0
zk6c$c^B<jAQK`QgA^9KwroH~NmGEnpycz8~s6_iasD$-DJ1PJ7!TA5=+*PZZyP~dO
z`_#~STDijw+vABqfqnO&9znr1!6T6mh76Xs>M?}HH5r;)A;z07&W!>$Tzh8n$}W^h
zW0>krhhHI`9Nti7E&Q0J|HS{CcLcm`aud6g9OB7b7;0}FY3Dq5&%B?1UGeYuAnK9#
z%<hU2#>E=I^1uf}OtLt5=i<qY?KOokE3ot@M!p~2_~8*m8Nd_F-`Js5-iC#Izf&<p
zsR>fw<wy4<IeE~B06f)u*zhxA$ODcJ)gdRWU9&-!HwLIafji@Sr113O_0YAWdn)j?
z=5E?~q_!O?@%YSJT}P|pZ|`7(_#ji=G1<>>8@S93;Ef1hxk$6jA*NBd?vA5rY~TP$
zIUHs&Cd<{tF@>4t%du{d=%i5ykgu+t96PWMNIX|2^TVizr1Y!?6OJNt+6x)7(&#Kp
z6Bu)5chA~Ae7f^l_0OQ>M;W!B8O&NUcVSHOt35jbrpAe&s@sV=;?<HHH!yVF+gkQr
z*NQx`vpo*$jcOi_*;$gIWZli7lhH{}q-{->I0X27oA&lZbl#=&H!afs8@4Af+xXLN
zaitQqiG*=Wpefb}=p`evYQ_pW@rlRaxLqbSfm3hZDc0`1AOnAOcMZYzWi=+(VkW}!
z-4~hXpAm34gA<wy_u!Wy3o}@{dzki{ovkXxc(VgSQ&-ghZ(y5C_LAw<MR_^&jSZcK
zM`A_>IMBy>0>>uP;zC|Fv>~V_4_}dNYSRbDUhRd{TvVSDW>-geoY~ZOhs#p8$wYDA
zMxT5KfF_2Th9K+K2_T1)$7f!~69gCZIwkR;H3XIbdSHVs+T6Hf+1||VGd5g-!ccD3
za`Xh7I`xv7yrD`v6{OMuc(-uA^O6-(u6Qa-Dm{d~y`)$Mc_~CnIbw+L7rdw=h&HuU
zYLxi$^b-I6&!8tH!StH*ukUs|4E-1`aCYkiqpl9tz5NbB9JDZes?{7;>lh99$mh<T
z7W^5gelJY{T>EP5rI`nx4YsZAs`!}+T<YJ>#0MPhMgRf;swx0mkCNFDRcw7zcT~M3
zo2?aW_{rjh*wI06cx<{(0^CDYFA83Na~!aJMl0$~_Orczy1?*<O~L>-dPk3~vBCO7
zRM$zHi9=kY0w=dn34=@YB_oW6yf`@uSI1!;K6Fc-1>SN!F8`7}Xabi@p5{ny(aO{L
zVq)4CO`VPV6O2_&4Gu<v$rn-R?{kg-SLd!!SEnv|{E-yWgx(T|iOHk}yo7i*{#koO
zld+CFzPy9)H}v+|3s&ywi*kfhjo$^A&A=7cTi=M-gd#EhvbrYnbD2kuy6K0@qT<@!
z+H6F^=Rk3RF~7FZ#!MRAc4=?3^m@daaE$9L<!X9DPRI<{lNR3+tJgH(Rd;m?VU|!+
z!cw54aL)zn>@?MsTj-N~^EBncR*}3VDu$<%=>zP>yZp1mX?biy3`LI<?cCoo^-{&<
z_nq!>F9EKfN=N8Z!<TN?k8v|4F`rSMqjDNN)7}<<Q|*Tsn2XLZ>y19m%P-6qXqKu0
zRjnC!$R9ndtHUGg7rFPUYaojRky_riY8N9$-AX>C>l;Ie+`uPXy3hOrGwu$=YM~0T
zVVl}P5<}dm+OTD>isqMl>hhF&Se~52-tE1WS(3knriw;zSQGgyI88%gc|H;?U-hnI
z==0dN-3j2Uog2fPrt|r7(gHEAGlYeCFgRn6MOjKPkg`R!X_!JwYRgnA?W%}T>QY4*
zhTUTKsEX<cMzHs^VK(S?Kt1dl`<JVK!LVMy)HzcpHCvD=h}prOh$FZAsgy0$=?+v6
z{ndf8(b|0#g=GC2hIXb);VuP!*b|5gAo)$2*734ZRy+BpEkfWKJOBy|K~-g$Dp_7t
z#e}-Dph}IZQzeN|Q!j5**gU=cje}nLZ$%NlN+;s46G7wrtyC(FGBuf`7+(d}h!Clt
z^2*T$K!HgW$~HC1K0Of)Gp~e0Tk*Z45*!)Fq&BZ}T|!%Gso}%5h1}9QhtoBR#`ybI
zNzP98S+dE$c4|PHLz)O+fAXQC85TGaRASU~EzP(2rSyqbNzxXuE~TsYqQjcuo+r4c
zjU;Q|o9T97{Fc}*4MrS2)vy{Ax5E0-h{gwvZdZV&9&pGnMpIemf8>;<%2tpL?i>})
z*>?{$^WBcb`(o^OeJw%@ODpQ7Mr%N;Fr=6btKRBY33(9iUd113nWhDO4SAWOYg#X;
zS9U?e2GbQhm8C^(hmiRsVi50c#fQj?5x<!=;@pV&RqsTtxmN7Q8IUH9+e6sYxcQzU
z+ALw<HB#g)pLZhV|ELe|11=n#Mirc9;9QI?hU=cTS0%ReVX^pyyTN+%Y;Q2}yUkr_
z>lb`scF{X!vO;9@QR2YybM0$-h*b+~8|H6u7fWKi58(Fd=)eDFGx+CSyX>OrkNC|S
zKYzol=zj*YBE}~A&Q?zUa;yK#GyLD%O4Z!e7FE&y@?wo*1$@&gE`k*tlR)*&#Q=&F
zQM3tzHJW8rVKTV6bjggdJg1$@o>lLi-vZX25VR)`pbx^24?U?Ilf6KF!ha=ozU!*L
z{(RYdy*}6e2FB~7XTrH{3lhXO#;WLN5bNha9Y$pmV;l}Y(3i35^O3omYGle8Op0=J
zlNxsCnZnMAH-G-tH$SQ+(^3>*wZ-tn&`sjv>OmV(WaN5C3=!$n2*Z(kQWN4shDg^w
zZfSfuP?%7(LeBTX^K(!OG2g_+LOVvVpWt-nk6D&jYdX&D*jTMOK2s9sHxWX{<(REI
zwBhDP^NvWAr<NZygtM3SQ;aJqr(B}ixDuSNsxETPbBv%K0rr9V8+qw$JpKpkP6-sO
zfhjel<g6oyJA90X(;3(ye~5q_V+cBqEhMS;4vd_Iub54mtmbJ&E}Q3T!~_t^HBF`%
z@L<X)g|+Ls?w_oaH}#0Pr0ddCJeC=?=}K3m)-ndvvgc}In%%iOBywpDO^B-7HA0o!
zP-gcsD?wv*ocnL^d8&#f5_^_d_Xxk6EK@tRS0vVm(2_*(r*wo!o1KL49r2m7e;$x*
zZ5VTODYz#|6)lwIYszI&TXMjiTnkh-BcMr<_x=K`>L`t)ybh{t)oCwCDe)SUPBMPH
z@>@8bAk$G5aC9#zb!S|Q$J?>K{z^<rI)j<fSvAs30Y<uX*<Yadd|=#|Eq^n#49saM
zxz|f7@K47^tISiUfTa(YwLp!oTuCL@3z3@vr|Dmg9yG%@K~A?T&3VwYbq~s<iPg{$
zXMdC3&V8y~g4}Q$r{V}B3)z}jXeGBKI|vzQm6vQQ*rhtgKEI!~yx&}6t2TzK1rsW2
zr{3cPpE&>rr{5=qSgzRR!~1o^q!WKr5wLR85yXzBHIRw2t$4$>t#CuPt#ktv!+?jl
zy$;LFZ<`SCd<!f*;DLN#;!Uwf48C$(A0&^~bf6^n5tVORxFXGG2)lUNf1<UCiPEEd
zgVmF8(;Q%XQyp-9(;e{RrZ6D)=01dm4qGIo5SXOOp-ELFThT&~WTyh_kH&Z~a0SB1
z*m?~zy`I^I`kE5H()+FrJF15Gy^#n}aRzYn7j|=4qT+`D5}%YW2%)Wrs>^<IQUa!Y
zvB`!Q*YqqvXm}jeC)mmD7(I=5c0{yl>mp)RQp8cuq~p_@qsIouj~EDN<;0-u=NPE{
zoFs(VIVJB^e4)w5P|fIBLJnMKN~_5#HszRtv;^-WmsmuG%{|UCP3~BR^y3rC>wsqg
zPM(NJMQQV4kR=&*%Xv(G`*4}QW=z{98QX1xl5Hd@GXg|I>fVat3o~}|^K1Pp9APTX
zddij(sg&VSIt`})?}0k>$f(gEZ(|9m=)!C}yeDOz8NpD>V`HJ6n>x7xc&e{mxmI+E
zo+9=eJF$^Lga)g(^ZcY#r1{juMxD*t{q`mkN}WJJssg$#;#pK>v2(}u^LEXp;@`kd
z<`8ZXM@g#8{orJVEIwxa4wl|vB2cYmHUo|`b&bphJ0iou*WvbKNtRi9E8<VAL@z$h
z*@}c^mcm)zKZI{w!q-f4(Zd+MHnSCY$R6VX9$@HhNK;J}_iad`a7dLHO$kM80P2S)
z3aV;^g$}sqL^K@1)6Bv_WE7sONQz8=fcpc65DU24qVn<2Bcz%&q1YWl)b0pENYc=A
zOHM0N3xo|HRFu>vkF+W-s;0ZR4at$vpy;b6H992gAaigLL-u-aGi2@(gE>5lRU(U0
z0}yXuZcQ`C71~9W*^xh_N!9y@&<))`R5&Q(aCpq!-wQz!0X(mF6ftvS;z5Yxv;J~B
zkg@YVm@)4;1Sp*XpF~eGLlF<7`98p&sc2<rCbAeDJ$#5-*PxjFj5y&P-f0&_W?|d1
zo126`Omh9OZS<>K?bJ8{yz{b~-Do#lOyF%2^6=lf58tT~K<gCgk>uFfj0oQ&==Ob1
z+WeOoxy76~9rX-(Q0vO?Nk)B#6H0!{80ZN<ZFc*aA90i)XqTR6DmlUf@W}Et>d`>4
z@nLMrN+DMy*GzCwrrH0#81)b0h)W8kO@A|~|Ky>n{bz{tKQpQS0w7^qYio0-@4VFi
zhC$Uz)|QI`NIou!oeu_97aA9uOQ;`q-4)1>nM9|8f*iE-iZ_{_?WC>N3Gibj52np}
z$RE3iH{#)q?Mmw&%5p~fCN3^H?#IEOUk4ZLKr?nb;-KuBcJ>ny8w7vc+9v9Xs1wyn
zY8RBTiD5&;LA3|-M@3mu_SB=ArU#S{gYi`2;aaCde(T*3Wn;UWVdZFrZ6Y6iS@suk
z_F01YXVR;bOj3u)Z?&797z1{U?cO~|$`Aa<58tWq#|^~cQ16-`xlqPxD3ZG6vum&`
z6ZEg5DULW@GL?U!;HciP5pT(hXh=nD^C_ir{X~W)M#0aey4Fs{VwG(F&N)hh#I&D{
zOZk4)j+T=KZ#{CZ94aMz)yndI!g?Lod{}Fv$8RdT@KpLb-MY!+g#q#X0XOT1t@BF>
zMHmSE%ZG{+P6~?}KeV@OCoi^`xX>)^^F?oTI-6=X%fDx~u|8(8aVN>0H*6nnB+wd*
zwil;!?hSllo#ppTkkiXqlb>ptCSBq;5o&e|73mcudgn^Uz-67+LIYn(D?K>Y6Ol|Q
zE3w|g>XF+Q>qBk}AdkHW>Lbz1h}*+aHTBEB-~Ghai@9j-@~1BH_He8ZF@E{?5YvCS
zfr_*Jm89?SQGo>l;{4A>=$C`7se`ejqoT2$t;6@U{FgIZbwd$X9ErCPg+>A-stpy@
z(Y`K@p&=Td8`uyEA~Z6Z5o@2rT|JUE|7UfJDs}2>;D=C;U#qfWGKVOE;<Pluor2_a
zl-Rrjqkj>FiK%JFdlugob0z=Zw?}rM#22anHXbW6YK+4=vwa0LeJ{FDQdH`@cAOzY
zp7_RQiW~!4p#f)39_D&|gM~pyU}hyJulHzrJJ04*<AoGUW0$Lknr0=Mj<bLxk0&`!
z!&3I@`R#;*l+Wgd8T+1nR#K$rcIK0Jm%i#6n9Y|Jb#Y#-QhTnG!w+o#u|d)7kruH2
z`yqVIJZsjIRr&E?wU$=%?r}%b9>ApfEA7O@m}@uIldB`CKeu?CLoKHf&CWPxBXDo2
z4&%jI@jUYetw+yQyyZlQt??6qW(`c~B1!OZ!j<CyJ+p(F!6WWPbMmyN99#FECntHO
z8Cyz|;KyN)0Es1YIaA}Edipv|%OOV#dv2=~X=crHU_EA*gTBROcSNys+`I}eq10+U
z%;N6+Ed|Jyc=NhzrH3<KW47G1O#MXKy%^mE197Za<A}RtWu$^8zi5k6P}i@?kSaBr
zPLR2*#9NCDmmDN}jb-vC)9XelRl#-WFk}Cmg?Yu{aL0O;5*`zrwqRCONb`ZI6DWIH
zdoeC&;Wd0%Tc(zalL)!7K{ix_A`8a^cLOVhyfJt>B)219Dk^ya?TJoIv+ly2_-KQ*
ziPcEXH975(nZ=+e9AQj?!Pyu$IQ&zzHs@Pd$`ER-D19(TvK}NXZr4JITn$uugBKKf
z!?Zro9P@S#!cYYr>A!mwND?{8LPrf{xQN}U)>|g~2Cl^Kl{$4Na^;s50^F80R~ik;
z7QD_FUMe*H@s7%wtpPk1E`2HD6j{5#y+8T6t#^w-1AS>2e;b5UG36@MxaMaXBJT94
zVDy^ZVTKjGV29udmG2;bAykhoatF1$_>~w%Oz_V$1?j~6=s)C`tldIqq3%K!>mFaD
z;1dGz-n^<25##3rTCB>31EX`4Gx1$mWnk1}!a9Ijlg?ObF+rFA0y{u#8WlfBf`>SV
z1H6I_j~#Fwlz@ia;uzowAF2z&2{t*7|0#$E8?%h-fsqJ>FlV_zI0o+Dhm_Dksf>@g
za}~fZtPPn9wVX_ynsJv|8%r({W7U7Cn^Pj8lD5-kOq(@dCm~Hdtz|e>h8sqlLxWL-
z0;kwiLW7{zVTJjW@m0TiM)gSJ!0AvEyoUs_IWKxX!#Qqz7MTwWHAA5E9su^fW}bxO
zH3jYMX5H1<RBWGEBe6j(tO?kZUT(gggV|JY;#-Ds+q5WK`B~QcD_Xv88*$9k|2h^e
zw`iPHJ+cDHyR9-Pj(Lnm`Dkw9%JW5Jf^nMN+=ik!+Ham3CNCi(ADB<HSfTWVU)FqZ
z4r}f@c1WF{G$bxH`{@JViJ;tIn;0uB0)~Kez~dBF21S9Swq~Q?Z=G|5IeBUfq$dEX
zrAiVVQzGqnm$3AU-MKP-Z1F{NFvgS{3Cc(ngz}m5%11aidD!Tj7BGEhv&aJHRWj_Z
z11S*XNtWU+$nxKV7j2T7Cw#xxi21*%P{sf3s1q}{`kz%s5*b@VeXD<dR&y}_KWAXI
zvbH>`0upcQ4^0YKWf5e4g~}#aV|4FCJ&9;ZS<rASubwTP4bq8ilg=adm-L(6K*TR7
zSt}W@8_1W!C`WqDfC(I_^NG#1PRHrVthF`W-@e}z3efXN{J<8^a9vqf0+X>Zjd6-Y
z*<oEj!=7ZQCp{=T8t{ODHUw>^Yuf^bOD22q<*H{FYFNpq76+3V0R*2x+EQEfaE)Dm
z#?I9&Vfd~+h~UN+ri&VwY`ujvyd5MEx0t}7yKEP20C_;Z1I(6KAuhxwv@?glEW8gH
zF6-Z9BlS<%8awOI(9Oi^vdSFd%W9p~vtZ;9j;OjTm&j+V<xoL>XI^`)jEV>0KW_kH
z!>PwDyKKfn7M!IxC2WOvW^->B;|XrK^a`ta_0o&QWfR+4!T`*!y}2nFZFrkd$Qq9r
zI%W$LDnPf=IUJu-<8dO5N~~*V00^77Vk=f*X`gJfh&q1{=8>ZN=~No7pFGWo7}3Pq
zvY|sSfpwTSm8q9_2_&4j<sHK?<6*&Jt1-P@kYdc<8~>yB+R^*wAm?Nn2ja#c#~m4&
zUPjtrQn)#qL!R?{t*{YASyRi%r)m+Aa7R&3U$erUp1)6y;X~jQQ85l7eX++Jd2^xs
zITQt&@&s3c!cL<R{p<p8H~XnpUCNu@vTIS*5#PLOacmT8?isP=l<7M|k3~tO5&gx>
zO41el?QhZ=*R&DO&0fgKxy(mlKCOOq0Z%IWg`5h8VqO(PfM>;c)RS^{MMq^DhKlJz
zd7Q=t?!0b{6vM-jznm!ngvgoMjq<3Z9T~z`8kA1&6qp?w!dDVhPw)n`9x=jK7LZ(i
z*%iJbW)X5>IbCAK%YRE_oIyQDb^KmkMq>Gok^LWi^xwINe<6Fbrn?^6(&t|<Q}*;J
zqkiNXx}Ok{SVE&zAVL8YK|qypdMLkcddL2dV6PJYxg@Fw0AiOlFEpB$D>0fEsmvq>
zBgntA?9OKz0kP{9&DG1DXvI71jh$=SCi;P2U!$4N--&gvJu9A#?spgO1VDCWBol6$
zf@r=9Hwa%;8NVr^_O!vT_q^vmwEG}mLe0J?L;fb*>QnOd|CYXG<f9xaeUV|wop=$F
z@h#uwUnM;FO}ZLh%ku*0W4Kg=5`mFCR2-ERC7{BPPgjT=5hgXrb1PdEt`IV2k;w03
z5$Qw1p$a(yvBP2(7?3sxbH`$iewd|5?<vfaV)z1fAIh97W`W2ZWobd<TbJ*RMeY6^
z>V%n#N<9MtU%(D}eXWr}MVl-F9m)(3NDdTX-WE;<IUqq)aW`uaT4EUzW+k|*e{ibv
z;}OX(cPhh@Q!?Y4LYhECoswC`o1)q)QH8r0v}?g4pbK#X8$@QpryR_#%u7F}RF=ON
ztrg4%Z~B!ldWh*L9)4^VjIn?+#}ZK@=|`V@hDGShT$v02`%;~wL7j3y#HU`yBZr_y
z14xcU9tWXt#mgz0p=Z-1mi)p62a^^|cM7nyUl0;{-8ZWmFb!ERt$w17adA4W=jy+y
zYq9Zh6@kU|F9y)q3`Uy`8l8QDH7iB3UX+qJG1ox4Taf0vwIDO@J7O6g6&-=Odb5n4
zLejGlc^DFHmW+J=w{bS{FlPBjfJvl-!y@i&bB6OX3|v4e#_6JW_AtN(1fT^g%I&3J
zO<e+s3cS@?rW46dy00Dn5@1GCHI%-9q9iYeoT6qzI2SNyrUvn#FxS!{RJMBR*33Uo
zP>qVxt}4C(b?_XK{R?9Xj7$cfLoi^zjxNttgDbT>+b$(h%ipYMOKm24E<ViMz&?9`
zN(qs~K&)#+ZKml#VslpLn2#!dGz{#~ImiN%2_vPBU}e&crQ}|E-~_XB2tq?I8nDXb
zx3mtJ!kj0<?@4i+40E~(ATps|o|=JpFMlIO=2iVMFQa5SfTce`vM)B;DsqvbS8c#h
zzBQ?0tD}Q~<w3F9Z`qF{90PN#&USC#v^{kiVl)e|N=xx>#${yw6MLZ_FzFY`(wNxF
z8K-x-R}A2(86g6j0njGRrt|7QX?!q67%UV_R)Sd?OhBRDGul{R6dZEpXSZFEsk{Qo
zL2L9wxI=oUNEbWPk?8TJUludNE-1l1Hz9cU*gzPFVIqP(i_NCAFYXtSQce)QMvoc{
zrL5hpHtnNFDo#GZ=@P20CO{zdEG4hky_to%TE>|-c1lEo3427@sS|RcbXbpp7%e(<
z(GF&gDaP3M&e^;TO6!pKABVva%W+fvc7(k9Qx#a-zz6zCCu!$kFMe`_4;=(YfQ+#9
zUh|e}w=!?g4L-O_{7p!>s!^e=aJsKwZfUzbr*lTR2*q4;07V!tG=`%^^;D78fG;xy
z4OPkDGdYhNjWz*FSeb&2U?$MwDaV_`WVD&GiIk#HOJqv2NfbjmFEdHPc}i&e55~Qr
z=Yxq-)k;w!XtH_{Snxf%G!R7$`wZ74f<ANWOy>Zsj#F9)>U%XDjc6+?Ll<OM$Ujj#
z?GpUpMLFE^oQw?05-<vh3x-e`Tbez;erRN0)y;=}f22P5;~c}quLl#gskIH4Fz-5h
zO;z>n_zE^G>D4ez3*ATBi3DK*BO0>K!Fr<1Ly--_c$xk2>avAt-hMj_V|06P!##r;
z=FEXu?LU;Xb0Rc(^b8^oSn--!OHlmBe(PDgqv*?@@y#LTb;~DWM00H^RW{2G9GV$s
zu-L%;{U0(x19M^YKczqlx1iK}we;#7)|>tK*pUxsF=okE!&X<@q;6TNqC=J)9)99A
z$GUOuH(?XT{6<x5bE7UfY)&O%%ZE@+!CoP<R~iCjQ7N@bmCijYb7fEevWJ+CKYORD
zF8xvVAk39mm?mX9w?>ssobh8W);*HRK+lu~<AkAfzTJsHoHKd46qaML+=)=TXnx2E
zdWE)~P^nn@jGRfkWd6uW!C$7-F<nNj%!yQ6)U1g~JQz7#oVp2S!jnplbw3*&P$VR6
z2Rw)atE@gGChAFpQ=@M2eWA0#bqLm1y-`(53$LL|5kzcYJGF>eqWVm#i9Z7**A(X3
zkE)(|^cf3dupUR<+QDO0Nt}EJgw4*JK_uItVa1O1uf`51<kKRn7miQ|cyuzH1k~?{
zup`B6pc*Uy#z2Bl_C|qj8-Evp3d)X}RU&g6=Tch0@wMDeZ_BifKENj16zf(r`P(S{
z(>qkGJ13UxBkSsLr*y_{@LW<1DjKYklYwrMN}wlfdc0o9NQRf5V3K{3kbFG*1x5?Q
zf2>m6d1{l)HfNIlhY-$@0#&&ql}@2tT>S=P?xL74`A=cA#>C060icxvL3y2O(d|Z{
z(lz-HCYp~lVp&r(!B1s7^*JH2lIW^E#U5S7WP+m5HmZrrr9=I^5az1B^IEjp`9y7O
z@gSVe;}g!x)6_HlWP*HgXv4rOlW6V4r9mfJbwij`c&vJX{@o_E#cAWrnt8(}?c$Lk
z2;Thp>r=_(oPtv0x9-(JQ{^|%PlaOBGfwnxZWi61MA;+fOMd8nmkdIw81qy#S*IWs
zdb>-sLaMedWOYs_`AnXwTdcP7!PG;zgk8lWay#XWo~oOe?5~us(wVl0e95!Q8@G4f
zJiiFfEBU8%G2&U_VvliZJJn}q4a(|w$`}@b+8OGba`DWmW53VqRSGO-a!($;${XTU
z+}UrcoIxe3oFPZ1E9EoG^&!?QDnX(e^XEMrffN#z?-+A*ZI0krVDN&p(sU29jHFR5
z>X*!UB1dCQ3GERwRdwK8!^dE}->*%qyFxqhS6fP4xL)}uQhavCYA;#{kMcLr>FCHu
z4raqW_^j=NGZEBYLU?+`U8OuHPDPVUx69M&56YU8_MeTav`ErU_C|l?V-=B4Dr}3D
z<BJXY3v4U72rqx9Cc>-1XuTh;S{oNDah|&w;gjSs%eIdrv(2<OH^fmHVcYWOMvI1L
zu+0sF#Hz}5ap+V;$rR`&FpVY6;OUwXcFB|5D!9#<#&2-ktYR7rX!s(i8BozTc}?pa
zNy6W2f&YniAT+(~LgCz^hli<ue^+|;u!0O#!4N7mp8xY~NkELxA*O25BM**de{;Dw
zDR!}YR)Hpt<T{9BL|t2M-Aqu&(9rVnq+(q6vjrj#?u2@}qF>F?-8k+3(Itk;qfWzC
z8jH~WYdzDHWzb+Qt*v4&ZE1FMbFr(UzT8=5qM=wiGeZ;i7?tdVf8@A8FC?}Kuh#+c
zk>+Het8T1FVVOe=7!tkcvXmj1v>ubO7(-RjI$t@<hEZjpO8y50#}dP|yXvQ{E@_Gy
zS6Ycx8oZa;b<2PP?26Nq0cI@67WZmOAv5B6uBL%DBgROR1OrY>ReGyjOT;}{j3Y7C
z3Ace=dcdwAICkS|=H8@U#}g*|2JNtUhv>Aeu6~g&UF$TsRg%Wt#>(=6mGC_~U2f+p
zzwgp~`&9Lps;|-k6#>1anZ8U3Xvy6;o=5;;tn<(gQPF70ppvsj_zpm6l7?wxf1I;}
z!=r+r^2E=ayG1law7%zgvTAeduWPP79E#0?;#i!P?5)d9626P{kawzyh)iTEGQyw;
zgM+ysTiIMy<mN^CgK4mSa{Dl8Qo`@9F*>E8$B1n7i8KlC^8rxYc_H%1)lyq-W4vF6
zfS=kD*=g)kmpKSz@GoKuPyEZMmFb0D8fkP@<KzXwR@!i~_E`$k)zm*Zxgf)|GVyO(
z!}E!R4!vU9UT}AyxKXGnl(>D4C=PcCTgKMaG>geG%jUZ?ga}}WSTa4xhR!@kWnbf<
zPzkcWkg}Ne%{_aYLOWgjDc(I6wc8cDunYmMbz&UIE^Xn?YHvdYpCDu1vIMk_(fbVn
zJQMowJ-nLhTc4snsJg+IpnXuKlf#u2`1JT459+JCs4y#Vu#?4Sz=Ft2HQP0$Li>^a
zhIiA5IZ?<&Pt8u5%PVSs**U-&@_m$@V<3yGNU;|W06cvg+#}&0=L5Sr5`Y^|vUC!C
z@F5v+3vKQEKgQB5j_`X$JP-G0w!P<1b<X7G_?_3k*;i4;t%MR`p`FJT3awLoF35O$
z&sNnm)YatzRy2xTEEbWF`vSb(f0Z7|e~PrAeH=OO_B7L*HV3fS4i(1x|5{wEW-bT!
zl<A*6AjB{q?!iL=A0~7eTbZBjs4&;Km^ZEtdh_O?D!Nvp86?6eqHl!xR0PbroP4>M
z{A!@cp4rDP0LY^y$v@TE1|PzQX0#428d|ZbrfVcem5C)yE#$}^ONUzc+P&c#o&y@b
z_pqxV@AMfz7brtB`A?cTF)d$hM9IQT+R4s2<$YVd;(gr*Hjh1S&CIg6ZR<H|-tMr=
znESal0**xs_g!Iw%7e8R9tI+Xyp{i4Z(`WJVxj-$psJtGO(VifOaG|)K}edMaSz+h
zxpN5;XE(^+^)%oXgap3ZZ)Sfz#VhJXR+nQ>Qbitq9lKd`5FeX6I8#tT8~5ipCdkGn
zvU-dwYq;8izW3RVi(lFfqGzTO%nmd${zXG|2^8~hJNa`#*^h98ZmnJ@3_zIJdSpET
zKTy>+qCFWgf8@K3PmVPq|A?5*2MaS}8aQV<PNj!}*2PnYCn39B`5*t#zfvCgix|b?
z!YkM3f#+h@^{s7)#zBBJ>SukVJ)B9D?!F*M#-ioKkQ<LTh9qFAE>d2K-x$Iz2CEUW
zSUP+LuwphFbr;CC3W#ElyWAj3h+QiWwOF*OVPlLN^|_Ci1L{H$F`tUm!=!3a6WaB^
z)fH59<^pf2EqG%Kn=m0C_B)#ia_&7m7zic5dM)jt8oBMcha<#)AdCwrY4Azrm2XE>
zpldR;LoVT#F>#jB*7sqo=_x^Zm+5y?=#ZAD;><NZ>{rr0rnpJJ+tn2p6W|d+D1ndW
z)8TdMuQ0RKD6rL-2cfyL8!o8In0hc<pg%`_?^>l*`z}?Ioynl`Mov$zGnFVU2WrR^
zAU2~!GEG)K%q&U<tfOwDWJ2AuXU5`E8n8c1c(;+Zs-5*^RooHNkXC$3DZ5WsHlS3F
z#~!-MG+S|ByJ<MnAFD=w*3?kts_EF8w`1O~K_qz6=D>HW|Eer1a=<YEOA{|e3#_>L
zTjxl{qrOhHx~1;PKmqXsW2rC*`bRf*WF@=4Jz`3j{(%Y0ZCyE}bhiaMv)iO9ORdE1
zLSPE5Sv9R8qWUqqO^a3>R}19Gqn;?e>eK!UK~`_cSoYYGQUy&Hx!l>1Brbzw+oUj-
z3%v9nAEbenwGQsJi)|2V&8wgF<J@K&Tm9kMO4^Ne;~sRTxtT{?ay)C)Yq91C8EwC#
zJ{`b$)WZci2nOsy#BLpF<o2-~m~is#H$vpVIL1#e`>rlGvN?W0)EuWZA?vf1(D4i~
zp2_2RZy%xma8P~$DNPJwHUnE;0Op897;a~QJ(-K=$X&Mw&JKq?!_yMN+*9m8PF=Wa
z3t-!q@_<mA%WTWw>t}s{u1OrX>t?}WmqKsTs2HZ5XW68h8n&MI+N6864_yr6E;e2z
z$qt)RM0%FEYEuJ5($0H4NU`V3cc4DozjW%J53rpv@=)*whZddO);&-W#BUE5&cCnH
zc?Xee+3=iD4c*tof8a%8YC-Oct|)iyW%#pe1k!h&-$Yar)OLKhMYFzySfXz?80^To
zY48)@5Pzf<Y7ry!w{sv8q}bD3oDwB9h(PWdX!VDD=eCAR-=b2`oYA&LJ!(+N_KRpu
zBT-2!M^Lw^i8Y$h)C{Un=tGaIf5&UZR>JTbgE|}1+`G}%9Q0l}w%;%ht3@#vciQp#
zAYflhM7Igr^{%@?u<w%%H()#;n3R;&sj?kl!YPcuoeoz|#*Qbf=z-b@V^45Ev>qUo
z^vz-a-tP`xdT0HERvzF<R3mLAOTNe67I`Io7-UW%CDj>T+-$r#$+WJxIRL1C-Hh5N
z`JAFT>xO$}5WDV|aWi@tT_q}16U&WyH?m5${~fgh(&#F%9&XP>fW_}a9x}Gd)_h5Q
zNq->d(+#>H<{PchmEz`vMV~#|*=N}^c#2Q49(+bI5%>nILwZ1njaW)?)$8BM;oZ&*
z%rP?NW<(C**csWV{UlBHRz^-e!W(im2F9gkCP|_aFTm;Eazr-cy0*7HA(2CRcBs*h
z>!T!h=LHu_j+{Fnb#m2*g@ETzN@T&VDsK{ssG59%XSmLJw`HvYy@hd3WW#^_gi7{K
z8M4b<L%`QMs=8*T*g46YX@xAztAEuHkpF_OncqE8pk8PCXD{97Si)=T1iNoDt7uUQ
z-Lwkf>!ORg^X8{($f#7q4N`aM?R(Ma(8nLEmQ8;3IN!rH(_1aYm#rJz_LorfC|%^=
zO%lsejjxnHH|H-IaU0nqy5h=+Kr3<S>oEKGMHY31sOk5MNlMCjs3WKlAupGB)f*W@
ze?wyjr;>+{1*R^jY`9&(;|jW$VeU<ZK`kkX9l3B+zM;dORpQxLll`5XX??jC`r`J(
zez2eI&T9j!RRF1#r|H9WqT0@)_+mFg_jWFN_Wi$`4s%YrvQ16<A=ZdMKsq$vY~D9<
zEi0-lNGmBPMlT~NEGnlYx&_d5+!#jpd9Gb`C{Z$pD9L6q$2|$JKMbiBHWz8PAxjJ;
zt^3&uQ8!BWGvf1+H?`Zp$jJ3gRknmx@^Y>5<SzR<YY6tX{kJInL^jH`b2(S34$u7|
zf0FaM)BeTlh0uqrOJNh_?c>GD4o|xoJKSfcTJ6*Or`9~1qhV_;&h~h>hd=Cf?X?de
za_C&Dt6M{x_Bxflvf0hmk({rp7S`U?+tbzYagWwVdsazp;_-9qE)?+v9{R<Cu12|<
zo7d05=BwY5`JOxjx;0Uj7(mL+*eeNXtVwR97C!HUUW?LPZ{Spcr24q5Y#6SKse{I+
zY0j?RjeYA$m|bQktr9*ZSfw&1)C2Om9JP`CBA154XXZAYWLVj;r%rDuk47!)Tj$Y<
z8?@#+4)Y5d<9zSuA;)FZ(%+KLt}I9DkZe3KtBz(A3>Qt0(G5Z+0c&I88p(%n6vI$R
zlB?u6Qb}llANH;W{cta+j$k#OEj$E%l{pMyHOvnj#g5*E-Bv@jzJ~dx>a#AAzj><{
z`Xdf*1MP06G-q?ME$m2)V-<6Ied5gY#$w|3`$D>Rij8V5K+hUCC{+2}U#^4@#8S|5
zBPU&GU*R-*qSV~&DyKY~E73>O)T&{ha&xzR?@hHNYwgBQIEBzfT4u9Bn+Gl(;*!lZ
zeHq;iq!B$YPmkC1=G>si`rmbL2fGk;TyH>|M#-=!f`?+xdO?bT)a3KbyonROYzH!0
zy?~F@Z0%#6(7&@}h#fczDd_HAZoucrO-l&qz}=cxc;Gxk*yO<NM<k2Ob(+wWP-ja7
z;s9Hf>Z;(Q2TNIuzd@Q!CG&PHk{alERh<pkV`y<f1<ZkFPL=kqH-*fPbt5+pO@ti9
zAQz#V#DY3-{J(-eY9C#i9=O|z6|lWFt-iqX?J7Dh5kBzS46P{mEi3^q^_yb|O$g8+
z3Ui!ZiXnalwm5(5F%;|ECA1iM^HS}dWF#_m427^2r++xY#siu9)sjO_&%w&Q^`2h+
zLO8Lhq@o?NJH0pbWqDeFrnuabo=>xbBBm|&`_s>808)4MQ4i1nj^mNo8kI%8;~yvn
znL`7uOX5p18VvTc(SvZ~=*QKBAy_t2<%Cn$92FAOq~8ZY$RjY`7?`d@%ERx^!rV`c
z>`|+`Cnixcd`caIQ5!#ptlJMnF+*bKjse;{=z{+^>n3Rt{N+WA&sAHfTxEwC<r79Q
zOtl%Y8QfmCbLNoh56ofdhXa&uz7$BNW`GuG?v(=tj={#}1%kXi<<91=uYQyiC_#83
zy*kN~gjOeG>kQeM_4a`l3o5si_&`5c|JwD)kkNfD(N&A1+N?P8q1@esMrnr0C01Ew
zfe;6|Mb5MdQ2>5Wlvv#XC$|D5(vm`o9!^vlKUYRBmG7KXp186@l=P0{yL>RhiKN7~
zL60*_u2oj7fQZI+3q53alr9lxw93{5<Krv!N+cZttgzF&EbC{@ZUe;4;|8`ZI47Es
z=)s3>M%;gk3i!^}JNmO`5s!*4V!Tdp;9c08#+tToes^cQq%*mnZlZ&+@(wjoC&3!$
zw+w`Zc^VvTMG^CavEnSkH$1JWtA}wYrZ)D=thBG#i1590e@zjso_YuLOPULq=o|u*
zU4T*$pxQu$?f}VNwtR$y!0M{3E+kbE{{`*BugYga9dCIgM$d$mE+S}R-Gpnm>ENV%
zWImE|Gruqz8{}5bQa+*=bfzK9)32aaJ24l9kkFd^gesIPdr9$&?&zU~_%igD$d7pa
z88M=OB`ff!6r!1>*Grb**cWFWm(kuC#hgvQu-L1)5(CI_E3`q>Ut*#X2vR5SMI=t=
zt;qYOeb&;TPsa|?LG04}5Cyd-`?V64b{nm?Fa8?gkE26NBj;--WYUdz$>_?G@z@eS
zgZ!ok_roxOID?cx1}>I>QlR%BXIGRxqqPc|x1{g70#Oe}QhPlvF>uKp<EHG~G&I3;
zS&%v*m5AbUl^u8f?uF!U1fa(m@($lZ&y#XokHA-fQw!xiMF$DQ?)Xh>S>Ssggo|#s
z9<lHnFvcGx;?E>G_G7rr{$PgOk^<ZM3wo{9%d~`;rZ?*V=TGDx5c6G(G<UDTd=0D(
zsn7kI7j^_5To%jb9L<9m;b#Hl^Xdz%5QV?Whz!Z2@KhD2N0tRA+sg;jIo@Mix5a|b
zkzEzFdZa>)Ti&<*VdcV6M+7}qjceZ+c<C$$kX_Tz6v&;OqrkV&&3VE6gR-u-K&gui
z{N)!fqAEWTgBb*BA9_(qTQ);tTamS65L*M)Y(tmA`o{kxgb)W~$i-zmtEnnzL;5B2
z^91)fQn`+(wPm<H)rf&d%v|hH{sWtbUX=LRW)YO2SwWKyM6W*2YadL!>;<n^K~`@l
z+qO9|i;43DDF*#x4n+x^QD&$l9*aWBOw79^NR0R>jo#f~#ooCWP4qfgJ)RD!1*QkC
z53?F}zD~Xh3#KPMk~>*G<haS18WDOeMghw17o-hyU!kcwK|7c^u@xK0r~)2gq5og@
zdLsD^6S_V5pBnK1LM92AYE{Z72Wy<u@U|YvOAplZ%iylV#26%V%|*<m10T@MioGx>
z0Ow=$7U5Smp0c7h#~+fOQYM_IO(V~{T`~pp%fdR}ll+(=q<|+uS1j^P-VQ6zpf;>|
z3RKN!T5RF^tg~t3mPQp&5_M!Z$K`${S0EhxA9NcWHfbOQy>QY?^*3|cKWS89+?C?v
zp<(6-i!=FgV}DMcI>ME|q^98(n#Ku^tAs$&Dr57IWh63BBb3|SO3Fz;E}AWo2B<##
zw*8o0YFwK7yt{gYOSrcG>1nwB${Xik`hK{UEZCba!&y)BN{xyRkcZ|=LVrx7JYCdi
zB%+Yax;HW4DMMzu5j=$&N?m0|b=>YZy^`vKV8S8AZ&7+p+NPIWgBgTk^G01v5*#tQ
zFv^!yZZ~y);e))Fj5~7Y#TS6qCeP|iXqQF5QSPZ9Do-}v^;k=qc!n^oQvP8KgA8>F
zwD#S;p-O)n7kZkBaN;=2KwSR|C1ds--&H?Mx71|%L-x>L^Niy#kbsD_cJx=t`O7BJ
zAeW5EeZAFaMZd4+rR!bTWD(d>cW@60DojfcsQ962fe*|Gu1ax-g%zlEwulT6#-G0d
z+_8+&aCjkDdLfNtA`4Wf>1mLww;W%q4Dwr=S*!}@RADXibve+zbGy`z;4rr*qbkGk
zSz^mv`t7DzUN~3T?Mw!YYufSUGiFLfP37epJUnv|hYRg#QY472(<Sbb{)rnElISs2
zxTa$w#f&jzhUu&2c+McGp<dEcF~6aAby({K*fD=_l@-Kfl@RC`DjL2^qcp-MaO`m4
zHI7=PB}doi;AxYV#BviLI;5Rj-<{q2E@Zf8h+3Ec4-lnwsO~+*uhf2vO#4^xOR3R&
zC2$Ka(pUH?$cj_ZzYwx2&~(A{ag6A0aTz;zfhpA)TSPQZ&nRvX`Gm`em0~L<f9CkH
z;|x|*H+j`MZF;_R`tbv!L4fQdVK5VDV+&3DQ@Xkk2ZcOiTFB`3vxZpN+hAbB%XKGt
z_ony!h2na}It~?{g@_g|Dq>rz*b%$3jjhY$zJX40a-c->S=fJMu>e(t*jDHV?xr?a
z@K0P%e6<=a*=5&-pH2d5S<F4zm*Td_zL{^q_~GvxppC!?T~Lt;%3inVZWx)Y=hr1M
zC7ls~KBmw2tgZM6;yje!XCIltAAK+p?nrN610Kh(SvBOqi^@;4JmJjnXXNNJ_J5Uj
z9q?3s|NmMcTeir|2xW%sy|YOg$liNplL*<P?3E&!MYfO;*(IB7m06VWKR11=D|Gw+
zp6~0ry?i~N_xbG4=RD^*=lrRUjV~LAhxHw6DzDzTU$LdLGo7TJ=Sk#sEkED;qe(S_
zXTw~MJVO^am5S%IBB_MEDyhvpM3F5an2g5#c;WR|-=?;|Eg<GqyOrx!Rb0;Baog@$
zpRdLhzA{?1jaQ<(RdBPWayLP2c6i0pv1<AaD$&)Z4-?p6v%KvIr6JF<imFT3>`SKx
z>yKFziR@DH^f@euoaY=ORC<9TV614(-EGfrgVhyqS9y!Rp?x|_z|sMm7mxCMZK;$0
zVn;Op(-zG6o#dG8!cEBqQ_i-+FL8V~u}?QjekvWBy~XxAPTLiwGsTKZQ2W@;zRrRZ
z4*p6P$V<W6ZnRYDQ=|@NgC^InWj$w^uykf8ooZ@%Z9KW{8-&=gMqWiNCf#ajONsZ1
zwLTW9+GlLmY3ruvIX9(NRBS`A{6vxbTM(pN^Qqm1r18o5B{>l{zS{NUpZbdjjb#|A
zp5Bb{M!seI+=Y53;Fy0?0{Lv{q*<Huw05Jx<EG1#lTvYcd{QB#o8-f;#ztlLE@FZc
zwNK#aj2Du>K&ar(ez$Nsg~HP}-Qw%jiD+EDy4%5TRL^HjnbF;MTf{F<qj8y5#H~cB
z9c%wkM}7v?$ItdQ^0o`^jA4V`^E^tfE0O&>>sZYYYyTxXKjPP<OT)S>a(U<`0p68m
zt2Bi3EKEWl!=GR5S>GsH8d!K*E7q38E`X9HJM+FM$tb^e&`i<=QCXU-HDCSa?c|rK
zl-t+@&W}DU^+j^w8Wi)0I_vR-rikHLy9N~ay^4FHGbRv!wz{SpLDax+Ib*<pq2cij
zE|LMx6Nw$t)+G*#PUQG}&uWO$sPlKpe7Ncu=j0U_hSTXJE=HxZU~VapxSzhlDP&M@
zUmayRZ*pI*uQPw?`1Pi~gs$=uR%#WhH(kL@Nz&#$mI)~*kxq+ig|#!|sNCr(U!ERo
zk>c+}U=6n*YuOUI(-guZb%odaiI$eO*6XTc(luwTtj8m!ghx{`Om!f)-Iku5egEX+
zxOHZ$&<K6q=r%DM<CZx=kEvK{-?5@5F>)dQhf)#zn>xAKc8JH3=`Q%~9J6L3l$c(O
zyJn2HVZt$<J4}syBJI(quSH63Y;5@oUjjF(MPf5VH$JhUI4<;N^QRKyqKhnNP*z`P
zZgp}xf2Vu`+k58jG7Z>ZN)uv(NW_DEHfO_Bm;sAX>9yUENHn#<)YCgAMyG;p%XEfw
z)>@AbhaXcs$(HD2$5keUnXLCXe&puYxNFLaXaW?UCab3EtISWn&ZG`3nZmZt<0I)d
zoH&cR@l9h(%*`)Z`dMC0_oR@_qden#w=PE1;4-r<`D}DdO?_dcS))Zm)~(*!5a@In
z-sFi+WtURV<fU=kHWKgQ;3FLf5Q!B>`|`TV?ki>4H#3iIeD!SlYxSKe6O=nioCHy!
zy-K4t_40mOJ`YGXc4caimQNQyj=7hE>zinT)qr{O@n9IPjR<-wi^EIn&zq*Q6AK^L
zbXSRY5nx+$`wDW&3K9s!KNA8W{Jk}okW`jbl8{sqmz7leaAnSZkhf?zxyD!HlSX@)
zlguT%M5NK1^o3eN{VO*fe3m~>LmhPd$vE|(>-x@@H@xUekpzS3#-rK#!N_FScfF5w
zLY~*v)fS~}eQLjceK?D^b!+GvUK=fXkN|$UP)nYR(G-SoaU|DO{PTg$iJ57q@Lu1`
zvPFyVKaH~-ETM^rV9Wn9Vi#%R!QB;v8PAh^(H}|UJyruaHm8thcyG`2%O*yiw;s8P
zXcWGV5tc{um9p_m#PY{$BigZ+6jw&IdC_j3EN}Sg7%_uRhG~=7e2(GHonCrQ8QH|!
z@7-gGg4Vv#D|K}a1nRPyFILdCq7_*Mn~qiVo1WAcE_zG;Nv24q(9yxZV>GS~W5%<Z
zF*}Sr5Q*4$RIrx4nKWsm(01a}C9B0wlfb|<j^L`Bqpn&W+rz%v(&$d(#`v#S+!OIh
zH}9{<WRgbvaoc@nMFD4TjMT9NAcdnMFmxQ7T#YKuZArjtP}&LqG^cE0Ywg#F@|EWt
zSN=7wYhkuhEIG2Zgr}*P)r7&WDV7kn>~Iv3v=+<ehgcRd`3z&k5Uz&KO0^hIxnLG!
z+_?OZOh&Q5##9dV=IuJGOFO<*yh?fUkq@fxhvr?5Y+zawBjQqD;0ld$H<^0KfKGPC
zn$#Er{{|g}3$stvb<)AK=6D05Aa%q{dfezfe<x;Md`Er<DI<iSde<h7&G*tHC1Af!
z6@o^-g-MRtO_S1lXVnIb)v?pmoJ&*_f^;o(r}d+K<>;7H)bN>zI;7ppFvec4ZMU%q
zz7<wakg0y;u^}=RH0de=DUp_A5x{8LMyND+rn+5TdnP3BbqM>&<FIYhU^V`E)s|{+
zml$pXT^}^mmg`DAgFXuNDOd$8$jb`^H)yHAM#|bG9|>k6i#BnO_0;36U40OiBgSNA
z6(k-RAbC%iU-BcW`ddtnuIZ8LkTF}#H>tRnh@%#Ao2u!*1T(H=Y*HQL2yf$^NNlr_
z@RK0lEa)k!sbi}4U6C)<BVUoB;!Us16J)W8D6CX3xbB5VyejE=>cJ?dbYt2|7!4U8
z>gn{!7N&D&%9!{fJc>`x+zkec5>&005HVcCJy<pH{TT%&!Z}#=!<DGxNj1oAvp9#I
zGZMR!*dROL`d4=(<DX;~Etwb1p{&o{7F=qytMio8@Q%K5N%tv=V!SuilJRCO)ll))
zTRisW+j!kxOR71cZiCs?-s5&-7Wo`q3>3WFieEQ3YHB|^n|SoQMn}1}-a6A?PWHT>
zU#4U`KcInZQ;Eu>>LyL1Mj2&0^ALJGX@}iA<yPC1#%lx19HUG$!fO;MVdI``$y6s>
zBosDA`2ssCh`ae6_;kh%OnOso%FoG8)9*3`c!qzz+y8`EI_0x^zk|PB5zWM;4w-wv
zePfpB&lqb~JP-<h0p2#20R!6ietsI&G8I$9(uNH}dQ~Bga+-9vqrxT1QRd4X=Z%$J
zdwiF(duyV4dX|$*$(R={){fi6wr^RCyWE&M=lL){x`1@i`*a#Q*H)smu)aCugE;TG
zc9~=DQpeR@Aqo{qYqbk!*<}*=@T}R8-Nc*6Ax6tS^`fV6E0)Myzmd(;$<MlZ(`);e
zw=z>HR|;r{+}0n$i%?wCiI%b7$fosTE{<`=9L-UkUXW4uMtH*f;WJN!!KHfF1YFUA
z$eXdHL=THIA3O|?pJQ;9vq5`KG8yza-b**kkS$fdULp`bmE|Rl&b&tjN5r?A>lNz>
z5_F9|x*sE2L#Pv0bIvu|B_k`GR7@zZu0&2+SzBBf*!ViVvN+$t|D3;6={1A%vzeDQ
zqe<eK;l@myMXZ>)mZC(D7hDl4W|Q8$z1!M#fd&U9>9i!Tmx9YeP;p4R9;fi#9%*+x
z<Ib=dmcTj=?*?VrHPoOpPu5kcu+%<Wlq;{J)Ck>t^@=6Vbb4T-PL>FL<_lvHV^!k`
zoATC4-i?nlc^|{Xwvbw{+g-2Jmt9$;dNA<m5?+iNvuz$zAsU@meEnLXyNvt1|M<kQ
zz^qvYUWX#mYbu$Eb%I#pTiv}ctVd6+#HmkLb8{yW<=%g+&b+KUrG!koIhZqtu_Gls
zz;NoyaH#d;mf_2HI%G%h)nj@XalMv}N9JXylpCKIW5iKKHYJf}x#ID;?&^Zs%1hNb
za4e3vciH94RGnM`KKI^&+izBlDeqjL7ralocq)|+J3)eC(LuqTY-S?=RKHilva=X|
zeC+(Ihsx?BDm&QM3p~C|o1$;dCKhU374Foeu8cQtnPPdn${GIrWnE)e-)7(XnEc5|
zpTz!h8osmYF&063_fBOn$C&VZ%AKDhEVus1INn#@g87>C`QkV0PBqV{m0TtsE>C$b
zfgU5w`I$yO2G@^036}z}@Irb+b;~2gH<>z?HO7x+Hs{^xG3QBT5M}>DD^4;e=h4tY
zBD~ApDK?91eO6HAm=3ZT6~0G%&bv32Hnd%~WW}G>XE&1NiMdK|AiCvo-W(+_80?vB
zYx&$4=hky>L8@6s?)E9?3!}Jp1EIIgQ$qD~(i<kY-dhD$e8{XB>r|ADefDXC6}i!y
z*(6doz)Y3VFC34-8H;C8|7kzXStWg(lG`C)&b5`q3-Qax1*WasI`g$JV4!Wa;)6EY
zcBM}M?~0esr&}fN{0mDV8v(haeTvyLdZZ@ymA5SgQrziQXU@GTv^{rME$2%(aTb+m
zQf5e-_}EJ4RW?e&`U{_ULrp2!)4lY4k4v7NHj(xsd3;N$>6NJA>vX-+OYb_Ixj2;B
zd(T<d4beHUCtV^LPe;a{laRd_N2KyGF!P=Q@*CB<MS|XeclY>k@-MS2+0c1z+?ZLC
zG&1Xc@5{7O6EDfcZFQwjxWEc!#Yk20g|SwK1a_(fGvm|lb;4r8!VLmHw>m`$IYXmI
zZN6!iDUl9eo2AY@zi*u+Y0qDrjUi|}9_>Q8occ*vs8C7GE6Y4VDvYvSLKka25rx{*
zFdZwhYV`SoWzG{%EQ9ruo0jLhu+Oc@7;3Eer(DuK^YCHMvNqjq8{FHmyH0^*eLS^s
z8yA%9b%}k?;(Fn4I9NXM(^Nn@7F}=Hd-GdxJ65YmFD{*P`UU5%XS!i;kCi9r6|)(%
za`ar~vTVGg@p1_J{ew763BHEaXYw)PxUXj%I(zNYkda8J(#8e^C$zX@NIvMyus-H`
z+nQu{o-P1EFl0UC`Z*g;!i<DVH~peVP4c>ob6B+bdt;Q?xIeg*RhC^q?0jJ<a@HPG
z7w5cmF2gK(cC!x2nK>gqeux#HzdwRX->q9p<=2KBa_UXy*{<i!zgG=kYtoN6`~0on
zY_;}@4ABerIxk01-EXQr@)MNmBzC{1B&^6Ef7zv&qBXH(?uD#eWj%fR{B>S=eisXx
zPbtRESE`<6x?m-WKdB@fZT<W}I=@(w{XWWNcDBgH#4pl`!K}TSSLtPiD<8*P34L@5
zTaDvE#n-zFF3}FbZN%rp`bL$WEzJ2UM2e7Q8?boa=k>_Ms~)7!BhUKuU=c;@K6;So
z(3u*had~Fyu%*f(kG>8UdW|P&A6t5rx;OM5_iD{K=j$%bjd!}{Y&gtDz3rEsY<3-U
z=6Qv0%3zY;+3fHxPZhs)V5-?wE8saeUF~HReQPLgtbyp0&X6kwSOw_vz1(A5Sm|00
zQOggW5%(9K>hcJ(ylzy7neQ2oyigZg@&sW;xW$1ywKFSE8cjk_<lCksPhp8ndsk*e
z{eZYqHyUc*ZZK^p5hoFjVnxYPmXtZxov?g44l0`a2!jNvp}gYrQf`ZjpFGh&pXl&r
zho}>^grKO%)Yh1mU12LJGByy-F{-bX3Ewbe9c)VA8rYh}bguAovt0}HxR91yd}j2-
zN3&N!4WSu{Y~-CDwGSY=6M<X{2v^$a>=ZaiJ}<;Z+^qWgW_jlRa`H|`+GYnSH=o+s
z%w(?lJ|C_`^=zbm`kc%nt`s&p>bA!h**9r1neQF9ELUmj$4)_?^82<cslfBW@R-#3
zj3*5|u2VTfH^%J)GdW%sDaG@sw5h+rc3xb{UB4A-bduYm)!3a?WH>aC=EBgjkid*D
zS%-qCwOe&?6yIk*#UL9n`F0<&Bjq^S&?P~d-f-H3sSROWa4Ea3`N93XETb%63RY5s
zI+@xE+|bMrsndq#5&dHwGS8-P7Na1~(_&fMcrcJ;OxvOjnQTxmsd;qYRPxU-Ae$*9
zNA0&|zmGS5A@^dyd_FU}A_uF{t+qT&iIt|SW+TP4GX?KlvdUEpusSu>j)$$EUl6cM
z;%TUN*FJ?ymgqO&CpZ&ku7PSo7BZcnb7G*>G-JtiN4L|Ev0{LsgyMuE_m~#ay(Kn%
zb~~i!7qyM27_nEASyt29mOI|KDw8tGOV%oR4=~P@iC!KrXEM&!Vdphe?Tw!9Bkc5d
zCl)Nd;UCp)B$+Od?W9XoRDSLH^V@T_aqP`q7Ax{(xr%rL#YL3n*<4^%c|rg+trWk5
zx>n}d$y1)VV~>n9`6)EGio$z}wXThX%%vzgkDOuDvQ<`Bw73y(WRhP0t~qao)bv@p
zvyqg&<B7s|xHpW<zBbDw$5{m%yJiV#lL#x)*%SND1y#(ZM+TA<C`*X<l2TifFAne*
zo4sG7`K-OE<wBU&joJ6&^A$xi<aY0ow_?|O1-n1S7u<A_GDwcx9OL!uY@or;UKZzT
zrFe<QS6+x~MoBBTQb)izEJ477AtfLq_fiP**4gTHld*JEu@qG<Yef950AdtDvevIF
zvl<RDjTxu}=P+y%d``cR+~Ny*>nf6JEYkLRF5wi`yM;(YxprezMcJ|%%}vB!iA+9a
zb;q|eSA4bTm)+lM-B&^oy(gd=TtuXi*=Q9}X4kWAbbhcO+sum->y@IfUd*R9-uA}^
zTWFtd7ty4~zO7MrtKdqRkhTmBet+&V>YTv6*J>Ou`Sfs7-HTg@?)KYLS&cCy6eMO8
zZppFcxVl|FS$z6MmL}VYqU+KTw31E13xY|y{JC8_;X7WV+5>%snAf*Tax)@!NsSA2
zxSiamX0z%R^$nh(ZFU>tzs()5Hj;TAyuzR4J0yZ9Z<sc3(4|?;uE=|CDE@wHOayh$
zg?oX8lRnxl*ro&6Ujo~uvA1I6o$Gl%@-Saavy;`sGI)aK?M~E!q4|`M$iE>bssEgV
z)b1_A^Q0J;ZTfQAgCZMKHD}3M8#{`;3mO*Wa^4MFp1(e~wUgWvBjwRMkpA42O3*ul
zBFahNWzD4kuh^Gd-t}@6=iixEjtCVY$5ZgclHFC6<jH&0{P0W7dc!J>St9uir$vLH
zW6+`qRr+O>l9uI*n{V4huSep(NkJzkY+kUD*L}B19KNY{;_300JAIMrlKBs!UwB16
zeCXZ4vXQLd_%gaBtDHToxIhezI(6U$VVAJb0AWXo0QacV;;Y(N@(^ZPQq|W>Z1m=D
z3+<`8iqh{I3OyoFU0lgnj^wp`D-n9B^6Bw1WLKKI_4WCAh?IF0OX*QT=wyOIeu1>K
z&wG=Fc{EFTsM*Y)KLjVlZW?V_Zdf*(zZ)VeC%v2Z&BxKs{<D2K=~$@Y4$tgVf*Dq=
z{OBv9O;5+vI5X}Nq_rYl%*THHVYA<Iy*s!(9wYHEn4@%L?CN(8tPCGhrR*w>EAmNx
zq=_3wC(fTR#O5(IWox_kvd}R%rD*LOYfr55oD%V1XFBrp?bpqGImrV$2nm8`TP=&v
z=yj_keQC1LN;Kt6$>CXD7de5V18I_}c#yQBP9$pD&GYCUy5od3c|(fwvHOM(r5e3-
zFP?PP3-kACN}$!EE2ki3&6Cd_Bk_N=lbaY?Al(pJFXuJ?rK13xUMrA4l~}yIKJ$$D
zvyD2Z<K-8pI6skvA>aLECjaK5kqVJbPrX+ZMO>%X{8Qa2J<hIIDZ~@;O<^~7w}sJ5
z)Dow6Fe^68Q5Bt9?}~g~e=)LaHj<H_OlyWsIJMeypR~R0)qNtl;ZvI0A2;m9o()|<
zi^00jV8T7df!2qZ?3QP>9g<hPd>Z6fgS*Hw6_#7PwH;=uU4e0yfyA4LTsnLz_ip<w
ziOAV6O4o|=VRE~e6yXT;ZgJ^R&Q-M5In;*LyuswUJj~thkQG@qxGfX0K3sJQxha$V
z0d?4^@<xIg+eb6a=^MmkT&XMs56f08ye{8Bv@91kslQB<W08_4+0s*r*tf-e^@L{g
z7DISzu^E}|e9A)?LPkMhdxOrwQ>=6C#W5R?-Y5B5C|7+&OqF9zGj}39Q_XrjL+!Xi
zk(TDk-QdZZRlntiiI@GXA{0q)bRbjXZdLkT?_3zwx$-E+Rj6w%6w&h62*|Q5KlT@?
zn1!!3%#99QT5ZC==6C!dcH*0o$S{Msb;)F$3zUvAZURD|#E^aS%&UFwuDN%7werxC
zs~VxTVpxc-RURBCO}IEgGAHyr1Ya5#_st?Jc^4xGQq*xtMZJ}w=fq#WMwuwFqgOn*
zM48cPOvnP!pzKT2uW&?C<_)`O#jm0;Eb$U@x7lS@ya-gwZ?5EhD{LyeMj1cfRy_7q
z&n=`><My$qbA_5@lWTaSIQ64>r>4tsbcuI|h-4`A&s0u3NC+Tnq)dtOZ*%#dc|lfk
zOODlsBX%5p?u#AgL!2h-3EtT}oZD}-9{`N8DbKgBXKe_kVz3$+&l50S5$5X4yt{p}
zT!N2slE(L&19^Vh@`{9DagAKti9~jR*|zh?25|%5N<NOObarwN8$id_6=ooGj7BBn
zN5XmZI!6Mz8~ui`+Bd{O3`Pv!-JX)ADn*D(bioOPj9CQmLc&gOSlvP`?zvVa+{7D!
zjG4^LwI{n?50{l;U`^AlJVOtooe1BuOc-0lR9Y`qSkP;+ma{^<*!__*+v%RrHs%0P
zYv7s4c#0w}`iH9ApX#gGN(+#VW%15x5SBM=relbsEf1401`IK6V~8vFb-pI?5_6s@
z!PzO_D!0re?z%1fZY9FAH+}r%!?=93I`^sfgZ#*(bP5m1@Dk{<3=FTOmRu}*{E|DW
zA*qu<V|UgnXo2Bggf;5Z=qM+e6I|Ql_?J??)}jjrhkS55shFRb<|<Jba{k!6hPYd=
z)`*O|3O?&zz><C4BF&%l)Oc=jFfQY&6I+`vNebm~0Y}bdCu>zQ2Nf3zJEQLH<H{*Y
zd8^}rCKN($3x-JX(z&REfex&U0ThX%I>-6NFARM+?Jbs@IJm3S%_KWHe|(JyW$DWM
zTH>_xEXO%jY64cKNI2zpGH$K%6ZNhtz79r-ALV%iDb`lcn48-$LC@o8eR|!DM9-1;
z-NP4xG~=463*o(%n3!}~p><y20u-{YY$Z-Zon*RGDqZ2(gvp}o{K_N4FNOUaN=SRV
z7HZw1A`^D6f7LCil_z1ce`?a0-L;0z;uNHnNnVTZPmzi&xf{MAQca1Mv4Gfs=GLUP
zOkyeeqAy63B^#+eKVsvIJ_$`H4h0GBSOWHg)8BA!-w5HCH+WXTow(Xs+uMBeMS=Rt
zlT)EX82(-ay|?e=mlC=tJx>b{4DQfDOjGfwCqd<XIA^6uf=FedK1x6kn(ie*jz%)`
zOum&Xc$dn}Nuuv+8KH`rv;|G~*9jrK;fPb@wJj}o*;sIg8N%fw4cv5h?y<2(-(Xc%
zznP*fQeVl<%OTh8>(jXYN|+PxYM?iMsX?cZTj!bJ!TV96;cr2J)-fZfqS4vVdwL9m
z-c($4^6UHEGnda5&zU7|CrDtKKM9T36d=8#SUi(ydHbRA`n8MSI%co?wH7g5!HRlu
zCReOqI7hJ;p~L!)kTPn2jZSTv;_SR7UY;5jZ%+c<2qZQA1NtP*#X|X#C~PV}sn^GB
zyHU3Z7!;{*+B@E4IcZAz$a123o4@*fsdXruZpI2#r^bWr^GZyZ#~UcyOeH@o<W4l6
zMQ-f$=lAJU`!?gk8maA3=c&cpQF?Yn^A*On#*|t$uJ>%pR=4HZLBI37+r?Dnx;TQ<
z9jh$T^S;STvplrOc%cnXbjJw?g}bLso-J@kd3(s#g=dEZaDU@jF1vNMZV4^35#ver
z-E(ZEJliw)tQoP@kyK9WeRDBro%Z9X{#dH`xC)$6Twr%U&hQqJOskv$nGcIE<92co
zWUoYqmd&s?>t$n!<FF{bz7rU-IF;OJjkn8p7U$91EZdKoY(|#^jXfBT`D8BWv<js8
zB;q_yEU@xTovWaiy<AgtRfK$G^h(s5XADN}choYz-Fo>v;Vd!@X<9Y6{Ze}6e2h~g
zc6r743$MB3B!$JbImE}sH8U)zO19qWUA%ow@{_Yj4cq&hE3_voZ{XCGk)K3CSd;l&
z>hwHS|6C-hcfYpA17F{8-oUf3@>p(~JQc!bOq=t*bn8x0iUhALW#f#ZX4d!<rw!JN
zoXsTk7Xq(wVMdVN7161*N-FyHntKaF7-Agl_}I8}NkJ_q_8TvDW$%}12`Btf+j;rB
zPd*YL$-a8MzF9XCcq4Tu!k5A+X6e%|1oml%Q99Qvckq<s5%_@hh`$yhDqU0uOA$R@
zu^&fd#}(=FRASHK3DOBlLcZ~B7z48<9yvBCT!F{MU9zC`Yy0GjXy>Muh;C-QB=utq
zQyTUP!ErsC!Sec*#49PUR(eHamZ?23GB5FX(@)*XlAFECZteNOhy3X&tjbm90HnzJ
zU1G{ev+_$9n$E2mN|}{NBSEbH8z%(P4His-q5ZsI#%ZbNWN3Bv@IPh*xBXmQx%bD`
zcE)UaHhKmohHNILPT+@uo`ar+web;DF8l)(C{5P?XtEku+Zp~Oj0SPFy{1@Ml~Krx
z0)e2Mf<VOnH^Mg7mTWGT79TEMwjSgm0Lw+kLIg`h=*mr#IoiGBPTwx<Wz%=Ql5$*!
zKXy(oQ%be^`dhD2YWjf{`Xawfd5-t=>@D`29ektIOf9=Dwq*W81&}h|a1+rrWEqAm
zgdOtRg~@8@@8xa?WZpwp8gV$LWsQz4ieHBi6F?Uvk|tuqI>6Y&i#Kx&J(NxaQTI$-
zID&WS$O29Uaomkp`WciQF}Z2q_<Bwd=mozD*ivdT3A#ni8D^?L@-nEQUPKTbFNS&&
zk6$S+Qu2giL!zJhiP-dKX+Z`y8e%Ht!}Ux}<ph`Tr06GT@EG3Xlo-?tI$nN{bv#w6
zSlgm{yOEi}LAI=uPOadn;&nWcM|fxI<+|flC`qPhiegZaH%berrS7HkmfGU7=Lh3G
zBF|B5mRMLDTAf}kmuFSiYHmJbuxfQKt*S1|&n9%sn9@%*KD~zHH2aeb`ZOkA^jS3O
zCI_ovBXa}!u#p-3;#V0b)HMzJa$_zTKa#kGpoMZyt2a7pfHeGLs;uV%=5xOjD_IvR
zKYCF{o=qAYRI+|sbuxU1wS4xAkVD|GCyumMX3w|a;2LN5Q%Y#plRNQPR846_;$MLy
z6_0z#A>X2OHB~>3Kwb<HpHz{erHY96d4G$7so9@NO(6hH{JnU634MfEds5lZt;rH{
zEa4da<36YF&`r<3F^bsr@9@?-v-EJc>w^7dt6VVKa3|#~!R3eXk?o_G$9psvlD^Em
zypLeU(5t6wDt=d0qtM<^%Sz%p0_Aw@5WOA)*p=d0|FhT%WxZm{+v5y0{$*6ogE~u9
zp?tK_`s?LZm$g0VBx9Xk@Q|^7xcC-xmr9E4PV*PJAl+oS_9m`QUcL8;rMian#7Trg
z7l*%mO!(B^k87hM79ty8$MA^3^3A$xL|Ocm(LwEVHdXl68BJ9u#wbfe5oochd(cd~
zlT}SIW5kP_O%XqsbBX1jwWEDsPgaQ0PhNz)nMIm|SgNQP+^oy0%jD0~dF9O)Ct|x<
zLL?&^e}eEx0qy$@toPI(1kEXuy`EF0PKYrTvbav4X(?{H5wd-iV9NjP?K%ks%wnz6
zq#yfxnM|v7bX;|&PdIa4&s}o2JJn$EY(D1s?w0fT2&>V~z-ganSTnZEL3DUqm&6rB
z)Uv)Pf4P+AkAl2w@6DJS$5g`2_~C7PJ$4&+K%jH=to!Dtih5=@ocC33I1lHg#3b0u
zTx#1*ESm!)EXy>fmmlGck#eLZ*gYr<VD3@p-Z)OmP$!Q*?ek~@@>XnoxkX%@y4&rm
zHeR4z<^0sMTS(gW0T29{kzQ;X-(Afaeny!^*hKD}IPh)Q&CcA=UJnVJMq%7TQC5-q
zIML79yLhgm$0Kz$?}MW&?rm9=X))4o(_d|olLg!fLo@I+vU|NWEi4>w;<g~VGu5`e
z8(DZ;WxlJk?XI=^5bYfL%e&RI2^qa7%cWCNnI}IIgw_}B+$^FAL*%<7dW!#ko=tn{
z_(pCW!kVoZacSAItF95#2oa9R)s}6O-iyv`8=?x!X<n~H-cg0S$0Q($S~-1KXj-{>
zHwN_+GtP}xwZz0IRB2uDZ(1WY(MXPg#A@uVUfVKml`E6%8_Vf)%bii{G2&+f#O0aG
zt5EzLf>mp3Gk6Ru8Xq1rf3t86t7x_?_4EUrpd5~?9ylb1ENsGYnYOnoFO@2==j&ej
zf}G+-%*^%0rsUr3t*EYOIfl5=i^`*w^6hI*tN9O}kxO;DRJ5-Sv{tBJ80uYhB2dIw
z+{jVL=bY3~Qwvj1o}|h6+;hiu`<>PAn`%S%UNcX+66Xlwu^0Hr{_k<w#?IQt(9Xfs
z&|b~yV!J%69E+@?ip(SRB}Eqb_DNN2)RnC$d4douLQKWHkdVg;tOSU<Vgw<Wn5<>3
zs&|4pS63}U*Fx46%DV5Y$YThoS*+gaUdPA4kK$Ba=iq4bmax;8)mQT5*V0#!HPX?y
zLs!O<5hFgGc0ci2bSnzT^9YCpNXG#me`Zi{zQT_S7xF<A`p=&!^4D!>5a^o<j44AG
z2EiW`gH7oOe%u88%Jg9XKY!k)D5@kYC8?&)rYQA`0t5oxkL<UbAYfav9~Tsu3(y3~
z7^KHvL*0J|*u?C&J2ZdXv6sG&yb=0Z#oh&l@`Ak)nxkRvhU)*nzlVN<b7g;A(7PWW
zaQA;1{zx)Z|4{*Nd3+Fke-s%z10%5a(2>+AV5rf6jgO$x`yq?(Ed3mmfraUjq}X6c
zUHKa+HA721D+f~pQ7b(QS9??YBWc4j{qRWIV1GhuX!-Wb0QPGF6iGiWs0e3{qGoOF
zW_q*)q1E8t1tms@bc6)$9jxv2j15intZrBs+8sGWSiP7XMb$*_Xi}8_jqkg`WF1B7
z2B=z(92V3;_bw>2<A0^)V5etgZ}b;l!3#^j5j_nY`cNDEKER(uO)YIKjvOxQe}uoY
zZUaAMdj~y-qt#9iL%QOxr0qd(V0pA~X<<n297WPn&&JO1hNHpnV?={M?flBktP2uA
zFXur(7XL{Q8eD}ze{iT^e}oEEap-Pkih4Gx->Lt;%@ZB_-bPQq9BU8o#5JS!>tI9c
zSP&gWVF2Hw{WIY2=k6Q~mz}#ol>zu;0%=GD1}+0MasSBQ_X8~UcGNx?t<k)v<UNqv
zGe8@bxlFKW)<2^CF3p4CXz}x7Nx{b84rlicmHdytOrIH$Jr0F~M&>`lDRi4Im|wd6
zw+ZKd4ay~B-BrMXKvF=YfHfJTGyjSzZf6KQM_o_f!cfNA+WcTPNnd<6odEmmod--Q
z{Kq5s&h$9}b{GBM*@PbZ^(Qj43~+UrGSvgtBL&b!V34l?_5M4usH204hJ&fapJ-5D
zT+DXd{0Lmb0jN|828~kkU(xmiD**zuKH%<f*$U%7G#><xmkTPZU*QibTN@ziBRPax
zjD#I1V}5Mby5CMDlaX5y!7|p591sXI3_GzJaF7<aFg;jIibgz_n1GW}U_v15Fm%T(
zkD|Mm=%J&AZbfY9;&3qe8h$oRf1rURAP(rkHve6jKHBzhkO%SrBR8~i5dS{CV_>0Y
zZ@*tMSOlLCM?ky|0;Q$QfAqn3rq9`ce}<rwfrXSebUm1{TB3Vd4}c*Lg2)Tqs`9@p
z)5kve-%-D__uX#+yrQ9lo*JlDez1Np{;jqdnn$4iBmxd~{y%W=o#_)C`S19g2ZK<R
zAluf0Sbvoa0^x)K35tdTWY0K2I=6DLKNtYTzO#1%aPjyLF7Bnk0RXG3gOVgP?%<so
z<*@)6`6)gG0%H5mE7PYd?e8FaXV1CX91zr?VFp`*v^)}wz8e7S(?jmxiSM&EI_`tT
zq%&mT0^LEJ9GEyPi0zYq2jMsv<ejls7zC8#qJW!oKZOLfD5MfNK!CEL!T2C0K;@);
z8ngJ^X$Yj=00Q~dtU#%&JcXb7pCgAVm?MPb>3JX?TVSfNX4*akA8kn&14A1JQ){a~
z0ic5qeoW)63Phs?Kw#ysF%Ad7J~QPy7)0R3EpS#F@Mr*X5k~%7lm8AP`!l4#6;3AV
zEx4}$#h(Qs4c14Lr;h;N>(J!%ob*Hu4i}%|C)a!tK%52CIpQ#!eVsdk_`Z%!!qL*w
z6`F9Mc^~X>?qmwihdfx6p>jSS2|ylJ0+~k)2JrreBLJbzoQ$5`zOD9^t?eBEoB`NR
z-TDx`8dUnRI{+`A7y^-i!Nd7{1m5?C3d+%5<$Y1#%+SC=*2>-nd|BH`=|O)Ul75T?
zc%C7FkfQ?-K}GQM%JkXV`5U5wp4)dh?xFrEL8wb{y`Mb`ZG10-3X1V3b*Q^uL_;{d
zlG*EF^-K?u(^KN6DJo!L*Fe&Qtp;{6{zmo3K|H_2_2<B$u1XPr55WLoQk@Xk8_dA%
zPW%-D6nN?fMtZ17^j&2s)&WIF;G?i2e@Xc_%D+YpP4Wki9P1U?hzt<UQ&2og!;Bo0
z>TmR6yK*SnZ{5}2Nva2f(sBq%6M*uM2#5^UHOm?P4RjCXM=>Z1CI=1%Odn%8F#-&$
z0=SC?43})I{|0y<mvp}*E$eWI6~2fkbxH)$+!yc-%d;2zzYSq;XYs%CY|OJnj0EcI
zSnB;1>P;|ZdT3j4z<djoyWcz6-?uj3Ul(6vxE)pql`ybZ(AoXGGJU2*;6{aNRT9h;
zI=CK|*vZPJ5!^t^Qvahi`5+26%%0Yu2mT&xH7V!fjE#WGN<lah`N;`1992re4f0)B
zKbR47G_^PstvaSS{2Gv&JwSv__ujH_qwN75+zmqmRo?Zem^uL93|t--gi9W7kYDOK
zL~gnta!UaxO~B)XVB{vJ1UJYpPU!ajc`#PuH+r*tz)%xlNEim|vNGIQzZjxBR7KEW
z>~@zgC<1`wgO3Odn2{RXV0#mdYS8J%qw5JP<ml!=z*;-7bJ)<Vq5h9(KhhW+pq+8(
zM4<47AOZdH!>}l-1vlvLHh;(*qW2B2c~8JxBJeF<7#v0I{}0Z=)-hg(s{<NSNCEi3
zazlOj-%!4veRSxAgI3l}8b)C4@gUxcz;J`13pWmQrUx81{(biR$TF;kHnA}q7{&WP
zf?os(AaF)@Xz)8Kkdtp&Zs!12TK^DT$PKtbVAAuSDuVh{49YXMRv@~3keUTyL>FTC
zH=JK-cV89zCloaJsprV;JOMT*0%C;%3{;`%-=Ka>=0g$HzR&AJ+fI8racc_;P&b1f
z?#RHQF4>xM0x$_ZVniGUxz*wykflrwEpF_=(}95)8Xs7xL#0ZuY%^K{jM0c@zu^nn
z{Qm$C7OAg~Zkad;6|O+6up*W7_y?TdMfyv`hl&)cOixyxgn5Aa1&CA<Mx<Xn|DVX<
zA;_VSP@%rsWu;;RKIZ@?v0)nt3EzK){8OG#h_M=y5eeW4!Qfc~Y&vTU{RfEO4dvk8
z0xCj}p~8+Tz)vRaej_o8{Rgn$l=ml7P#jHq;`hOz3wI>nKgZ*q{x_Vx2y#@QI!}C_
zl<G7DGJbjg^tW2U->A#kTU)6a+DRPJ9IfQ95c2{Xvj)Ld3dY8&`~ODz(Ae}Nv>#U4
zwp<lTw*o!Fc_1*@IT4+KzhVAl=OE%x^K!e>9jgHxW{6?`d>G;A--!QW=wL0k?y|?5
zfXPm8Fwq7(ANF(_e%eQ9W64n`Nf$_k445lyrm)^T0^rw!Qz!?}()jmY=3riIjM5`=
z0n`%R{x0S;D&pZ878dI0C!0`pRGx`>JPkbk6c+>{0ApW_gmA+^Ir^>dKA54m+3M-c
zAg<IA?Vsi|VS)?k&+{RF`_0b-A`b@7)MXFG1$EvnFaaY2!{#nKT;MRrA^!y1cN*pX
zPShrcH8L7Z#pSZ^kJgUBrw^@?e(6vaJYli}OH~ek+Go3S`NaibCNWg|A7kmt!3Cja
zXy6FW2|HxEEo%4lB}9;1o52Tm)?!)<F7oPn_U3!y`Y~IeYG`a~?_lRj_v`-v9zh%8
za>x*7j7K7-fZcQw?$7U+FTpj0->hWssXN{G-|YFuQDHsG+^Gf{s=Pr+5rZ+BNBVFf
z|6*|;^9LJgkxH!TT`;$~0z3pZxnDDb3r`J9pqlDghy!;#;JFjj-D!#0-SdG(nu3Wu
zRT!2;9pHkHaJAC2G&NAx0}BG7HRqwn&>hTTp}GF59`HjE5OQEkrfaw0!{0lW{CDO3
z#R*sLTcO1P`{@B}!%CGl6fQ_;%!aagxDyJsU*oLDfk3V>?oSI?v2a01fvI*wJLt1d
zuu@BAZ>83s0)#5?RTwKrAh5|XV3RyB3S`cP3ku53_mvW|Rz}u;VnB<f6Ge0EiooGT
zK-~eG6HvfB2OMGRznDVzea7AT@Cm1g59RVk00{#$CZaIh<bf^;4y1jLERHY*sPIs-
zxy=<otM7MfzxCM`!UYF41Nb4KvJBoiZPp4bFCLU6u<bt;XdU3-@Q>(qu+FF>E|xt7
zl9UH^BJ6~h*cMzkzgHxG5o1y9F6IMJAFhJDqYk4r^&Pmd;0_`O6_HS>T6#r^@dK%P
zf$|hKGFu{pS8fhlO!_Oa{!-*#N;raJDBi}dm8Lo9WzT|Q40fsr6|6&m15efpL`yvj
zQ@6c3=}#R(%@cEU8T%^m?QW3iV12up1}>C69m-kjAMV=;3q>t_z^|cC)nR8}-I(FR
z_`M`KOg#@R=SXJ&BP&2fAkqB1GJVik;R1mMDK#4l(*qXQKm|3eV}Kb1nCJsW2wUe1
zbHhdd`zjM~+|ywZkQHNDJpurrfm8)MSwhMS7r-B0O9vLzXzE!w9?<4NIa(i-+<F7t
z7Xmnfbzh7NZ~^{9Ld=+|#<v0i*Bz`HQ2D7F=raW&LAbD>bGZM%5&+8Tw#tV3XONGt
zfGG@E-z5htFyQc|e<(Sics-G%{nS90#=zoXeb-hFE<6~oI?Uv+eSWI(1z3v#C~RO+
zW)<K<ftvhZdng^1v?d0?M*@gNu-?n94mXP8_l33xS2Iw7d6#x)je#;$1Sp6d#y$$Q
z;R1mchll&tsO-*}uTa1GqZJ#|hl~85z6By42;fGh#*TJ}JxFu9b)%pUI2crVusKTv
zJno0X>i$H9cD?`LcGtYWPSpW)3Lsi}7_kOgz%>G>h3w(&%}CLGH<!I-T6TH{<_ED3
z)u(bNLn#4>##cZzhAn^#ZQ%m`j&o#I>36#9g9CK>1wg=-^(3xvLBO=vaHV8vS+V#I
zpipQc(}EFauoqmA2P+i*>JBQ}M{^TzqQL+(z!WCzqvm!WxCR0GUSm73z(&>3#@g=Z
zVcoyaBS2yH9}oT?yO};^x8Q;~G-MupSorVw(DG;hdCkA@9|XaL4>jH23(P<3pMwsQ
z`2}N}g!Jnbtsh6r{60wN-TU{u{dG4cSY`S11+U+Czuiw_|4Zb*NPKPljl|w7<-czJ
z^Sw0C3;wupA-lWj;3@P^{Qutb{`I2J{@21m6ZzZCum8LmM61Kz`#zj1^v&<R3pymi
zmwycDP^!=uvi^7v{6C<B-vs{;D$~d6e*^mQUidFr@4f!9{|(1qc*olRC&r+7Hb38Y
XRF+2pK@9>41OK^$tUdvVLm>YL=D+^v

literal 0
HcmV?d00001

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
index f262ef62be03..a696d6aaff27 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
@@ -58,11 +58,19 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
   }
 
   test("shaded Protobuf") {
-    assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
+    if (HiveUtils.isHive23) {
+      assertLoads("com.google.protobuf.ServiceException")
+    } else {
+      assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
+    }
   }
 
   test("shaded Kryo") {
-    assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
+    if (HiveUtils.isHive23) {
+      assertLoads("com.esotericsoftware.kryo.Kryo")
+    } else {
+      assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
+    }
   }
 
   test("hive-common") {
@@ -81,7 +89,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
   }
 
   test("parquet-hadoop-bundle") {
-    assertLoads("parquet.hadoop.ParquetOutputFormat")
-    assertLoads("parquet.hadoop.ParquetInputFormat")
+    if (HiveUtils.isHive23) {
+      assertLoads("org.apache.parquet.hadoop.ParquetOutputFormat")
+      assertLoads("org.apache.parquet.hadoop.ParquetInputFormat")
+    } else {
+      assertLoads("parquet.hadoop.ParquetOutputFormat")
+      assertLoads("parquet.hadoop.ParquetInputFormat")
+    }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index f4c314cd6092..4351dc703684 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -186,6 +186,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
         "--master", "local[2]",
         "--conf", "spark.ui.enabled=false",
         "--conf", "spark.master.rest.enabled=false",
+        "--conf", "spark.sql.hive.metastore.version=1.2.1",
+        "--conf", "spark.sql.hive.metastore.jars=maven",
         "--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}",
         "--conf", s"spark.sql.test.version.index=$index",
         "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
@@ -203,6 +205,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
       "--master", "local[2]",
       "--conf", "spark.ui.enabled=false",
       "--conf", "spark.master.rest.enabled=false",
+      "--conf", "spark.sql.hive.metastore.version=1.2.1",
+      "--conf", "spark.sql.hive.metastore.jars=maven",
       "--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}",
       "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
       unusedJar.toString)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 5c9261c206ea..deb0a1085714 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -206,7 +206,13 @@ class DataSourceWithHiveMetastoreCatalogSuite
         assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType))
 
         checkAnswer(table("t"), testDF)
-        assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === Seq("1.1\t1", "2.1\t2"))
+        if (HiveUtils.isHive23) {
+          assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
+            Seq("1.100\t1", "2.100\t2"))
+        } else {
+          assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
+            Seq("1.1\t1", "2.1\t2"))
+        }
       }
     }
 
@@ -238,8 +244,13 @@ class DataSourceWithHiveMetastoreCatalogSuite
           assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType))
 
           checkAnswer(table("t"), testDF)
-          assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
-            Seq("1.1\t1", "2.1\t2"))
+          if (HiveUtils.isHive23) {
+            assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
+              Seq("1.100\t1", "2.100\t2"))
+          } else {
+            assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
+              Seq("1.1\t1", "2.1\t2"))
+          }
         }
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
index a716f739b5c2..14d07cdf8db0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
@@ -35,10 +35,18 @@ class HiveShimSuite extends SparkFunSuite {
 
     // test when READ_COLUMN_NAMES_CONF_STR is empty
     HiveShim.appendReadColumns(conf, ids, names)
-    assert(names.asJava === ColumnProjectionUtils.getReadColumnNames(conf))
+    if (HiveUtils.isHive23) {
+      assert(names === ColumnProjectionUtils.getReadColumnNames(conf))
+    } else {
+      assert(names.asJava === ColumnProjectionUtils.getReadColumnNames(conf))
+    }
 
     // test when READ_COLUMN_NAMES_CONF_STR is non-empty
     HiveShim.appendReadColumns(conf, moreIds, moreNames)
-    assert((names ++ moreNames).asJava === ColumnProjectionUtils.getReadColumnNames(conf))
+    if (HiveUtils.isHive23) {
+      assert((names ++ moreNames) === ColumnProjectionUtils.getReadColumnNames(conf))
+    } else {
+      assert((names ++ moreNames).asJava === ColumnProjectionUtils.getReadColumnNames(conf))
+    }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 630f02c8e2f8..81cac9d95b7a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -100,8 +100,14 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
             .asInstanceOf[HiveTableRelation]
 
           val properties = relation.tableMeta.ignoredProperties
-          assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0")
-          assert(properties("rawDataSize").toLong <= 0, "external table rawDataSize must be <= 0")
+          if (HiveUtils.isHive23) {
+            // Since HIVE-6727, Hive fixes table-level stats for external tables are incorrect.
+            assert(properties("totalSize").toLong == 6)
+            assert(properties.get("rawDataSize").isEmpty)
+          } else {
+            assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0")
+            assert(properties("rawDataSize").toLong <= 0, "external table rawDataSize must be <= 0")
+          }
 
           val sizeInBytes = relation.stats.sizeInBytes
           assert(sizeInBytes === BigInt(file1.length() + file2.length()))
@@ -865,17 +871,25 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         val totalSize = extractStatsPropValues(describeResult, "totalSize")
         assert(totalSize.isDefined && totalSize.get > 0, "totalSize is lost")
 
-        // ALTER TABLE SET/UNSET TBLPROPERTIES invalidates some Hive specific statistics, but not
-        // Spark specific statistics. This is triggered by the Hive alterTable API.
         val numRows = extractStatsPropValues(describeResult, "numRows")
-        assert(numRows.isDefined && numRows.get == -1, "numRows is lost")
-        val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize")
-        assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost")
-
-        if (analyzedBySpark) {
+        if (HiveUtils.isHive23) {
+          // Since HIVE-15653(Hive 2.3.0), Hive fixs some ALTER TABLE commands drop table stats.
+          assert(numRows.isDefined && numRows.get == 500)
+          val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize")
+          assert(rawDataSize.isDefined && rawDataSize.get == 5312)
           checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500))
         } else {
-          checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = None)
+          // ALTER TABLE SET/UNSET TBLPROPERTIES invalidates some Hive specific statistics, but not
+          // Spark specific statistics. This is triggered by the Hive alterTable API.
+          assert(numRows.isDefined && numRows.get == -1, "numRows is lost")
+          val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize")
+          assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost")
+
+          if (analyzedBySpark) {
+            checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500))
+          } else {
+            checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = None)
+          }
         }
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
index 5094763b0cd2..6de47a68a9bc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.orc.OrcTest
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.types._
 
@@ -76,15 +77,27 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
     checkFilterPredicate(df, predicate, checkComparisonOperator)
   }
 
-  private def checkFilterPredicate
+  private def checkFilterPredicateWithDiffHiveVersion
       (predicate: Predicate, stringExpr: String)
       (implicit df: DataFrame): Unit = {
     def checkLogicalOperator(filter: SearchArgument) = {
-      assert(filter.toString == stringExpr)
+      if (HiveUtils.isHive23) {
+        assert(filter.toString == stringExpr.replace("\n", ", "))
+      } else {
+        assert(filter.toString == stringExpr)
+      }
     }
     checkFilterPredicate(df, predicate, checkLogicalOperator)
   }
 
+  private def assertResultWithDiffHiveVersion(expected : String)(c : scala.Any) = {
+    if (HiveUtils.isHive23) {
+      assertResult(expected.replace("\n", ", "))(c)
+    } else {
+      assertResult(expected)(c)
+    }
+  }
+
   private def checkNoFilterPredicate
       (predicate: Predicate)
       (implicit df: DataFrame): Unit = {
@@ -295,30 +308,30 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
       // such as `and`, `or` or `not`. So, this function uses `SearchArgument.toString()`
       // to produce string expression and then compare it to given string expression below.
       // This might have to be changed after Hive version is upgraded.
-      checkFilterPredicate(
+      checkFilterPredicateWithDiffHiveVersion(
         '_1.isNotNull,
         """leaf-0 = (IS_NULL _1)
           |expr = (not leaf-0)""".stripMargin.trim
       )
-      checkFilterPredicate(
+      checkFilterPredicateWithDiffHiveVersion(
         '_1 =!= 1,
         """leaf-0 = (IS_NULL _1)
           |leaf-1 = (EQUALS _1 1)
           |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
       )
-      checkFilterPredicate(
+      checkFilterPredicateWithDiffHiveVersion(
         !('_1 < 4),
         """leaf-0 = (IS_NULL _1)
           |leaf-1 = (LESS_THAN _1 4)
           |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
       )
-      checkFilterPredicate(
+      checkFilterPredicateWithDiffHiveVersion(
         '_1 < 2 || '_1 > 3,
         """leaf-0 = (LESS_THAN _1 2)
           |leaf-1 = (LESS_THAN_EQUALS _1 3)
           |expr = (or leaf-0 (not leaf-1))""".stripMargin.trim
       )
-      checkFilterPredicate(
+      checkFilterPredicateWithDiffHiveVersion(
         '_1 < 2 && '_1 > 3,
         """leaf-0 = (IS_NULL _1)
           |leaf-1 = (LESS_THAN _1 2)
@@ -341,9 +354,11 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
       checkNoFilterPredicate('_1 <=> 1.b)
     }
     // DateType
-    val stringDate = "2015-01-01"
-    withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
-      checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
+    if (!HiveUtils.isHive23) {
+      val stringDate = "2015-01-01"
+      withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
+        checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
+      }
     }
     // MapType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
@@ -358,7 +373,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
       Array(
         StructField("a", IntegerType, nullable = true),
         StructField("b", StringType, nullable = true)))
-    assertResult(
+    assertResultWithDiffHiveVersion(
       """leaf-0 = (LESS_THAN a 10)
         |expr = leaf-0
       """.stripMargin.trim
@@ -370,7 +385,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
     }
 
     // The `LessThan` should be converted while the whole inner `And` shouldn't
-    assertResult(
+    assertResultWithDiffHiveVersion(
       """leaf-0 = (LESS_THAN a 10)
         |expr = leaf-0
       """.stripMargin.trim
@@ -396,7 +411,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
     )).isEmpty)
 
     // Safely remove unsupported `StringContains` predicate and push down `LessThan`
-    assertResult(
+    assertResultWithDiffHiveVersion(
       """leaf-0 = (LESS_THAN a 10)
         |expr = leaf-0
       """.stripMargin.trim
@@ -410,7 +425,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
     }
 
     // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`.
-    assertResult(
+    assertResultWithDiffHiveVersion(
       """leaf-0 = (LESS_THAN a 10)
         |leaf-1 = (LESS_THAN_EQUALS a 1)
         |expr = (and leaf-0 (not leaf-1))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index c46512b6f585..6bcb2225e66d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -149,7 +149,12 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
   test("Check BloomFilter creation") {
     Seq(true, false).foreach { convertMetastore =>
       withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") {
-        testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER) // Before ORC-101
+        if (HiveUtils.isHive23) {
+          testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER_UTF8)
+        } else {
+          // Before ORC-101
+          testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER)
+        }
       }
     }
   }
@@ -157,7 +162,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
   test("Enforce direct encoding column-wise selectively") {
     Seq(true, false).foreach { convertMetastore =>
       withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") {
-        testSelectiveDictionaryEncoding(isSelective = false)
+        testSelectiveDictionaryEncoding(isSelective = false, isHive23 = HiveUtils.isHive23)
       }
     }
   }

From 8b0bdaa8e018607f1c4e790d1c0eb8cd480dee24 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 13 May 2019 12:40:46 -0700
Subject: [PATCH 1049/1072] [SPARK-27671][SQL] Fix error when casting from a
 nested null in a struct

## What changes were proposed in this pull request?

When a null in a nested field in struct, casting from the struct throws error, currently.

```scala
scala> sql("select cast(struct(1, null) as struct<a:int,b:int>)").show
scala.MatchError: NullType (of class org.apache.spark.sql.types.NullType$)
  at org.apache.spark.sql.catalyst.expressions.Cast.castToInt(Cast.scala:447)
  at org.apache.spark.sql.catalyst.expressions.Cast.cast(Cast.scala:635)
  at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castStruct$1(Cast.scala:603)
```

Similarly, inline table, which casts null in nested field under the hood, also throws an error.

```scala
scala> sql("select * FROM VALUES (('a', (10, null))), (('b', (10, 50))), (('c', null)) AS tab(x, y)").show
org.apache.spark.sql.AnalysisException: failed to evaluate expression named_struct('col1', 10, 'col2', NULL): NullType (of class org.apache.spark.sql.t
ypes.NullType$); line 1 pos 14
  at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:47)
  at org.apache.spark.sql.catalyst.analysis.ResolveInlineTables.$anonfun$convert$6(ResolveInlineTables.scala:106)
```

This fixes the issue.

## How was this patch tested?

Added tests.

Closes #24576 from viirya/cast-null.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala     |  6 ++++++
 .../sql/catalyst/expressions/CastSuite.scala      | 15 +++++++++++++++
 .../org/apache/spark/sql/DataFrameSuite.scala     |  9 +++++++++
 3 files changed, 30 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 3de9f5a951fe..4c5419961ee9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -620,6 +620,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     // We can return what the children return. Same thing should happen in the codegen path.
     if (DataType.equalsStructurally(from, to)) {
       identity
+    } else if (from == NullType) {
+      // According to `canCast`, NullType can be casted to any type.
+      // For primitive types, we don't reach here because the guard of `nullSafeEval`.
+      // But for nested types like struct, we might reach here for nested null type field.
+      // We won't call the returned function actually, but returns a placeholder.
+      _ => throw new SparkException(s"should not directly cast from NullType to $to.")
     } else {
       to match {
         case dt if dt == from => identity[Any]
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 1b7f2581f895..f6a1d00c519c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -990,4 +990,19 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
   }
+
+  test("SPARK-27671: cast from nested null type in struct") {
+    import DataTypeTestUtils._
+
+    atomicTypes.foreach { atomicType =>
+      val struct = Literal.create(
+        InternalRow(null),
+        StructType(Seq(StructField("a", NullType, nullable = true))))
+
+      val ret = cast(struct, StructType(Seq(
+        StructField("a", atomicType, nullable = true))))
+      assert(ret.resolved)
+      checkEvaluation(ret, InternalRow(null))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 8c859f7dba5e..dd3740d24f72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2157,4 +2157,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
           |*(1) Range (0, 10, step=1, splits=2)""".stripMargin))
     }
   }
+
+  test("SPARK-27671: Fix analysis exception when casting null in nested field in struct") {
+    val df = sql("SELECT * FROM VALUES (('a', (10, null))), (('b', (10, 50))), " +
+      "(('c', null)) AS tab(x, y)")
+    checkAnswer(df, Row("a", Row(10, null)) :: Row("b", Row(10, 50)) :: Row("c", null) :: Nil)
+
+    val cast = sql("SELECT cast(struct(1, null) AS struct<a:int,b:int>)")
+    checkAnswer(cast, Row(Row(1, null)) :: Nil)
+  }
 }

From 66f5a42ca5d259038f0749ae2b9a04cc2f658880 Mon Sep 17 00:00:00 2001
From: "mingbo.pb" <mingbo.pb@alibaba-inc.com>
Date: Tue, 14 May 2019 17:10:36 +0800
Subject: [PATCH 1050/1072] [SPARK-27638][SQL] Cast string to date/timestamp in
 binary comparisons with dates/timestamps

## What changes were proposed in this pull request?

The below example works with both Mysql and Hive, however not with spark.

```
mysql> select * from date_test where date_col >= '2000-1-1';
+------------+
| date_col   |
+------------+
| 2000-01-01 |
+------------+
```
The reason is that Spark casts both sides to String type during date and string comparison for partial date support. Please find more details in https://issues.apache.org/jira/browse/SPARK-8420.

Based on some tests, the behavior of Date and String comparison in Hive and Mysql:
Hive: Cast to Date, partial date is not supported
Mysql: Cast to Date, certain "partial date" is supported by defining certain date string parse rules. Check out str_to_datetime in https://github.com/mysql/mysql-server/blob/5.5/sql-common/my_time.c

As below date patterns have been supported, the PR is to cast string to date when comparing string and date:
```
`yyyy`
`yyyy-[m]m`
`yyyy-[m]m-[d]d`
`yyyy-[m]m-[d]d `
`yyyy-[m]m-[d]d *`
`yyyy-[m]m-[d]dT*
```

## How was this patch tested?
UT has been added

Closes #24567 from pengbo/SPARK-27638.

Authored-by: mingbo.pb <mingbo.pb@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide-upgrade.md           |  2 +
 .../sql/catalyst/analysis/TypeCoercion.scala  | 15 ++--
 .../apache/spark/sql/internal/SQLConf.scala   |  9 ++
 .../results/predicate-functions.sql.out       | 12 +--
 .../native/binaryComparison.sql.out           | 80 +++++++++---------
 .../native/promoteStrings.sql.out             | 84 +++++++++----------
 sql/core/src/test/resources/tpcds/q32.sql     |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 78 +++++++++++++++++
 8 files changed, 186 insertions(+), 96 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
index 14ccc259ef47..1aea26208694 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -128,6 +128,8 @@ license: |
 
   - Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`.
 
+  - Since Spark 3.0, Spark will cast `String` to `Date/TimeStamp` in binary comparisons with dates/timestamps. The previous behaviour of casting `Date/Timestamp` to `String` can be restored by setting `spark.sql.legacy.typeCoercion.datetimeToString` to `true`.
+
 ## Upgrading from Spark SQL 2.4 to 2.4.1
 
   - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 13cc9b9c125e..1ff1e7fffebd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -120,13 +120,14 @@ object TypeCoercion {
    */
   private def findCommonTypeForBinaryComparison(
       dt1: DataType, dt2: DataType, conf: SQLConf): Option[DataType] = (dt1, dt2) match {
-    // We should cast all relative timestamp/date/string comparison into string comparisons
-    // This behaves as a user would expect because timestamp strings sort lexicographically.
-    // i.e. TimeStamp(2013-01-01 00:00 ...) < "2014" = true
-    case (StringType, DateType) => Some(StringType)
-    case (DateType, StringType) => Some(StringType)
-    case (StringType, TimestampType) => Some(StringType)
-    case (TimestampType, StringType) => Some(StringType)
+    case (StringType, DateType)
+      => if (conf.castDatetimeToString) Some(StringType) else Some(DateType)
+    case (DateType, StringType)
+      => if (conf.castDatetimeToString) Some(StringType) else Some(DateType)
+    case (StringType, TimestampType)
+      => if (conf.castDatetimeToString) Some(StringType) else Some(TimestampType)
+    case (TimestampType, StringType)
+      => if (conf.castDatetimeToString) Some(StringType) else Some(TimestampType)
     case (StringType, NullType) => Some(StringType)
     case (NullType, StringType) => Some(StringType)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 7f577f015973..b5e40dde0b4f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1760,6 +1760,13 @@ object SQLConf {
     .internal()
     .intConf
     .createWithDefault(Int.MaxValue)
+
+  val LEGACY_CAST_DATETIME_TO_STRING =
+    buildConf("spark.sql.legacy.typeCoercion.datetimeToString")
+      .doc("If it is set to true, date/timestamp will cast to string in binary comparisons " +
+        "with String")
+    .booleanConf
+    .createWithDefault(false)
 }
 
 /**
@@ -2211,6 +2218,8 @@ class SQLConf extends Serializable with Logging {
   def setCommandRejectsSparkCoreConfs: Boolean =
     getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)
 
+  def castDatetimeToString: Boolean = getConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
index cf828c69af62..d38cab8fa786 100644
--- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
@@ -85,7 +85,7 @@ false
 -- !query 10
 select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
 -- !query 10 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') > CAST(2009-07-30 04:17:52 AS DATE)):boolean>
 -- !query 10 output
 false
 
@@ -141,9 +141,9 @@ true
 -- !query 17
 select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
 -- !query 17 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') >= CAST(2009-07-30 04:17:52 AS DATE)):boolean>
 -- !query 17 output
-false
+true
 
 
 -- !query 18
@@ -197,9 +197,9 @@ false
 -- !query 24
 select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
 -- !query 24 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') < CAST(2009-07-30 04:17:52 AS DATE)):boolean>
 -- !query 24 output
-true
+false
 
 
 -- !query 25
@@ -253,7 +253,7 @@ true
 -- !query 31
 select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
 -- !query 31 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') <= CAST(2009-07-30 04:17:52 AS DATE)):boolean>
 -- !query 31 output
 true
 
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
index b764ce45d029..55caab8528fa 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
@@ -1549,7 +1549,7 @@ NULL
 -- !query 193
 SELECT '1996-09-09' = date('1996-09-09') FROM t
 -- !query 193 schema
-struct<(1996-09-09 = CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(1996-09-09 AS DATE) = CAST(1996-09-09 AS DATE)):boolean>
 -- !query 193 output
 true
 
@@ -1557,7 +1557,7 @@ true
 -- !query 194
 SELECT '1996-9-10' > date('1996-09-09') FROM t
 -- !query 194 schema
-struct<(1996-9-10 > CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(1996-9-10 AS DATE) > CAST(1996-09-09 AS DATE)):boolean>
 -- !query 194 output
 true
 
@@ -1565,7 +1565,7 @@ true
 -- !query 195
 SELECT '1996-9-10' >= date('1996-09-09') FROM t
 -- !query 195 schema
-struct<(1996-9-10 >= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(1996-9-10 AS DATE) >= CAST(1996-09-09 AS DATE)):boolean>
 -- !query 195 output
 true
 
@@ -1573,7 +1573,7 @@ true
 -- !query 196
 SELECT '1996-9-10' < date('1996-09-09') FROM t
 -- !query 196 schema
-struct<(1996-9-10 < CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(1996-9-10 AS DATE) < CAST(1996-09-09 AS DATE)):boolean>
 -- !query 196 output
 false
 
@@ -1581,7 +1581,7 @@ false
 -- !query 197
 SELECT '1996-9-10' <= date('1996-09-09') FROM t
 -- !query 197 schema
-struct<(1996-9-10 <= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(1996-9-10 AS DATE) <= CAST(1996-09-09 AS DATE)):boolean>
 -- !query 197 output
 false
 
@@ -1589,7 +1589,7 @@ false
 -- !query 198
 SELECT '1996-9-10' <> date('1996-09-09') FROM t
 -- !query 198 schema
-struct<(NOT (1996-9-10 = CAST(CAST(1996-09-09 AS DATE) AS STRING))):boolean>
+struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(1996-09-09 AS DATE))):boolean>
 -- !query 198 output
 true
 
@@ -1597,7 +1597,7 @@ true
 -- !query 199
 SELECT cast(null as string) = date('1996-09-09') FROM t
 -- !query 199 schema
-struct<(CAST(NULL AS STRING) = CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS DATE) = CAST(1996-09-09 AS DATE)):boolean>
 -- !query 199 output
 NULL
 
@@ -1605,7 +1605,7 @@ NULL
 -- !query 200
 SELECT cast(null as string)> date('1996-09-09') FROM t
 -- !query 200 schema
-struct<(CAST(NULL AS STRING) > CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS DATE) > CAST(1996-09-09 AS DATE)):boolean>
 -- !query 200 output
 NULL
 
@@ -1613,7 +1613,7 @@ NULL
 -- !query 201
 SELECT cast(null as string)>= date('1996-09-09') FROM t
 -- !query 201 schema
-struct<(CAST(NULL AS STRING) >= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS DATE) >= CAST(1996-09-09 AS DATE)):boolean>
 -- !query 201 output
 NULL
 
@@ -1621,7 +1621,7 @@ NULL
 -- !query 202
 SELECT cast(null as string)< date('1996-09-09') FROM t
 -- !query 202 schema
-struct<(CAST(NULL AS STRING) < CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS DATE) < CAST(1996-09-09 AS DATE)):boolean>
 -- !query 202 output
 NULL
 
@@ -1629,7 +1629,7 @@ NULL
 -- !query 203
 SELECT cast(null as string)<= date('1996-09-09') FROM t
 -- !query 203 schema
-struct<(CAST(NULL AS STRING) <= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS DATE) <= CAST(1996-09-09 AS DATE)):boolean>
 -- !query 203 output
 NULL
 
@@ -1637,7 +1637,7 @@ NULL
 -- !query 204
 SELECT cast(null as string)<> date('1996-09-09') FROM t
 -- !query 204 schema
-struct<(NOT (CAST(NULL AS STRING) = CAST(CAST(1996-09-09 AS DATE) AS STRING))):boolean>
+struct<(NOT (CAST(CAST(NULL AS STRING) AS DATE) = CAST(1996-09-09 AS DATE))):boolean>
 -- !query 204 output
 NULL
 
@@ -1645,7 +1645,7 @@ NULL
 -- !query 205
 SELECT date('1996-09-09') = '1996-09-09' FROM t
 -- !query 205 schema
-struct<(CAST(CAST(1996-09-09 AS DATE) AS STRING) = 1996-09-09):boolean>
+struct<(CAST(1996-09-09 AS DATE) = CAST(1996-09-09 AS DATE)):boolean>
 -- !query 205 output
 true
 
@@ -1653,7 +1653,7 @@ true
 -- !query 206
 SELECT date('1996-9-10') > '1996-09-09' FROM t
 -- !query 206 schema
-struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) > 1996-09-09):boolean>
+struct<(CAST(1996-9-10 AS DATE) > CAST(1996-09-09 AS DATE)):boolean>
 -- !query 206 output
 true
 
@@ -1661,7 +1661,7 @@ true
 -- !query 207
 SELECT date('1996-9-10') >= '1996-09-09' FROM t
 -- !query 207 schema
-struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) >= 1996-09-09):boolean>
+struct<(CAST(1996-9-10 AS DATE) >= CAST(1996-09-09 AS DATE)):boolean>
 -- !query 207 output
 true
 
@@ -1669,7 +1669,7 @@ true
 -- !query 208
 SELECT date('1996-9-10') < '1996-09-09' FROM t
 -- !query 208 schema
-struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) < 1996-09-09):boolean>
+struct<(CAST(1996-9-10 AS DATE) < CAST(1996-09-09 AS DATE)):boolean>
 -- !query 208 output
 false
 
@@ -1677,7 +1677,7 @@ false
 -- !query 209
 SELECT date('1996-9-10') <= '1996-09-09' FROM t
 -- !query 209 schema
-struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) <= 1996-09-09):boolean>
+struct<(CAST(1996-9-10 AS DATE) <= CAST(1996-09-09 AS DATE)):boolean>
 -- !query 209 output
 false
 
@@ -1685,7 +1685,7 @@ false
 -- !query 210
 SELECT date('1996-9-10') <> '1996-09-09' FROM t
 -- !query 210 schema
-struct<(NOT (CAST(CAST(1996-9-10 AS DATE) AS STRING) = 1996-09-09)):boolean>
+struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(1996-09-09 AS DATE))):boolean>
 -- !query 210 output
 true
 
@@ -1693,7 +1693,7 @@ true
 -- !query 211
 SELECT date('1996-09-09') = cast(null as string) FROM t
 -- !query 211 schema
-struct<(CAST(CAST(1996-09-09 AS DATE) AS STRING) = CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-09-09 AS DATE) = CAST(CAST(NULL AS STRING) AS DATE)):boolean>
 -- !query 211 output
 NULL
 
@@ -1701,7 +1701,7 @@ NULL
 -- !query 212
 SELECT date('1996-9-10') > cast(null as string) FROM t
 -- !query 212 schema
-struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) > CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-9-10 AS DATE) > CAST(CAST(NULL AS STRING) AS DATE)):boolean>
 -- !query 212 output
 NULL
 
@@ -1709,7 +1709,7 @@ NULL
 -- !query 213
 SELECT date('1996-9-10') >= cast(null as string) FROM t
 -- !query 213 schema
-struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) >= CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-9-10 AS DATE) >= CAST(CAST(NULL AS STRING) AS DATE)):boolean>
 -- !query 213 output
 NULL
 
@@ -1717,7 +1717,7 @@ NULL
 -- !query 214
 SELECT date('1996-9-10') < cast(null as string) FROM t
 -- !query 214 schema
-struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) < CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-9-10 AS DATE) < CAST(CAST(NULL AS STRING) AS DATE)):boolean>
 -- !query 214 output
 NULL
 
@@ -1725,7 +1725,7 @@ NULL
 -- !query 215
 SELECT date('1996-9-10') <= cast(null as string) FROM t
 -- !query 215 schema
-struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) <= CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-9-10 AS DATE) <= CAST(CAST(NULL AS STRING) AS DATE)):boolean>
 -- !query 215 output
 NULL
 
@@ -1733,7 +1733,7 @@ NULL
 -- !query 216
 SELECT date('1996-9-10') <> cast(null as string) FROM t
 -- !query 216 schema
-struct<(NOT (CAST(CAST(1996-9-10 AS DATE) AS STRING) = CAST(NULL AS STRING))):boolean>
+struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(CAST(NULL AS STRING) AS DATE))):boolean>
 -- !query 216 output
 NULL
 
@@ -1749,7 +1749,7 @@ true
 -- !query 218
 SELECT '1996-09-09 12:12:12.5' > timestamp('1996-09-09 12:12:12.4') FROM t
 -- !query 218 schema
-struct<(1996-09-09 12:12:12.5 > CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 218 output
 true
 
@@ -1757,7 +1757,7 @@ true
 -- !query 219
 SELECT '1996-09-09 12:12:12.5' >= timestamp('1996-09-09 12:12:12.4') FROM t
 -- !query 219 schema
-struct<(1996-09-09 12:12:12.5 >= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 219 output
 true
 
@@ -1765,7 +1765,7 @@ true
 -- !query 220
 SELECT '1996-09-09 12:12:12.5' < timestamp('1996-09-09 12:12:12.4') FROM t
 -- !query 220 schema
-struct<(1996-09-09 12:12:12.5 < CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 220 output
 false
 
@@ -1773,7 +1773,7 @@ false
 -- !query 221
 SELECT '1996-09-09 12:12:12.5' <= timestamp('1996-09-09 12:12:12.4') FROM t
 -- !query 221 schema
-struct<(1996-09-09 12:12:12.5 <= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 221 output
 false
 
@@ -1797,7 +1797,7 @@ NULL
 -- !query 224
 SELECT cast(null as string) > timestamp('1996-09-09 12:12:12.4') FROM t
 -- !query 224 schema
-struct<(CAST(NULL AS STRING) > CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 224 output
 NULL
 
@@ -1805,7 +1805,7 @@ NULL
 -- !query 225
 SELECT cast(null as string) >= timestamp('1996-09-09 12:12:12.4') FROM t
 -- !query 225 schema
-struct<(CAST(NULL AS STRING) >= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 225 output
 NULL
 
@@ -1813,7 +1813,7 @@ NULL
 -- !query 226
 SELECT cast(null as string) < timestamp('1996-09-09 12:12:12.4') FROM t
 -- !query 226 schema
-struct<(CAST(NULL AS STRING) < CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 226 output
 NULL
 
@@ -1821,7 +1821,7 @@ NULL
 -- !query 227
 SELECT cast(null as string) <= timestamp('1996-09-09 12:12:12.4') FROM t
 -- !query 227 schema
-struct<(CAST(NULL AS STRING) <= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 227 output
 NULL
 
@@ -1845,7 +1845,7 @@ true
 -- !query 230
 SELECT timestamp('1996-09-09 12:12:12.5' )> '1996-09-09 12:12:12.4' FROM t
 -- !query 230 schema
-struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) > 1996-09-09 12:12:12.4):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 230 output
 true
 
@@ -1853,7 +1853,7 @@ true
 -- !query 231
 SELECT timestamp('1996-09-09 12:12:12.5' )>= '1996-09-09 12:12:12.4' FROM t
 -- !query 231 schema
-struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) >= 1996-09-09 12:12:12.4):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 231 output
 true
 
@@ -1861,7 +1861,7 @@ true
 -- !query 232
 SELECT timestamp('1996-09-09 12:12:12.5' )< '1996-09-09 12:12:12.4' FROM t
 -- !query 232 schema
-struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) < 1996-09-09 12:12:12.4):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 232 output
 false
 
@@ -1869,7 +1869,7 @@ false
 -- !query 233
 SELECT timestamp('1996-09-09 12:12:12.5' )<= '1996-09-09 12:12:12.4' FROM t
 -- !query 233 schema
-struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) <= 1996-09-09 12:12:12.4):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
 -- !query 233 output
 false
 
@@ -1893,7 +1893,7 @@ NULL
 -- !query 236
 SELECT timestamp('1996-09-09 12:12:12.5' )> cast(null as string) FROM t
 -- !query 236 schema
-struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) > CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
 -- !query 236 output
 NULL
 
@@ -1901,7 +1901,7 @@ NULL
 -- !query 237
 SELECT timestamp('1996-09-09 12:12:12.5' )>= cast(null as string) FROM t
 -- !query 237 schema
-struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) >= CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
 -- !query 237 output
 NULL
 
@@ -1909,7 +1909,7 @@ NULL
 -- !query 238
 SELECT timestamp('1996-09-09 12:12:12.5' )< cast(null as string) FROM t
 -- !query 238 schema
-struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) < CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
 -- !query 238 output
 NULL
 
@@ -1917,7 +1917,7 @@ NULL
 -- !query 239
 SELECT timestamp('1996-09-09 12:12:12.5' )<= cast(null as string) FROM t
 -- !query 239 schema
-struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) <= CAST(NULL AS STRING)):boolean>
+struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
 -- !query 239 output
 NULL
 
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
index 0beb1f6263d2..c54ceba85ce7 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
@@ -1253,9 +1253,9 @@ NULL
 -- !query 150
 SELECT '1' = cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query 150 schema
-struct<(1 = CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+struct<(CAST(1 AS DATE) = CAST(2017-12-11 09:30:00 AS DATE)):boolean>
 -- !query 150 output
-false
+NULL
 
 
 -- !query 151
@@ -1341,9 +1341,9 @@ NULL
 -- !query 161
 SELECT cast('2017-12-11 09:30:00' as date)        = '1' FROM t
 -- !query 161 schema
-struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) = 1):boolean>
+struct<(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DATE)):boolean>
 -- !query 161 output
-false
+NULL
 
 
 -- !query 162
@@ -1437,7 +1437,7 @@ false
 -- !query 173
 SELECT '1' <=> cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query 173 schema
-struct<(1 <=> CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+struct<(CAST(1 AS DATE) <=> CAST(2017-12-11 09:30:00 AS DATE)):boolean>
 -- !query 173 output
 false
 
@@ -1525,7 +1525,7 @@ false
 -- !query 184
 SELECT cast('2017-12-11 09:30:00' as date)        <=> '1' FROM t
 -- !query 184 schema
-struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) <=> 1):boolean>
+struct<(CAST(2017-12-11 09:30:00 AS DATE) <=> CAST(1 AS DATE)):boolean>
 -- !query 184 output
 false
 
@@ -1613,17 +1613,17 @@ false
 -- !query 195
 SELECT '1' < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query 195 schema
-struct<(1 < CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(1 AS TIMESTAMP) < CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
 -- !query 195 output
-true
+NULL
 
 
 -- !query 196
 SELECT '1' < cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query 196 schema
-struct<(1 < CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+struct<(CAST(1 AS DATE) < CAST(2017-12-11 09:30:00 AS DATE)):boolean>
 -- !query 196 output
-true
+NULL
 
 
 -- !query 197
@@ -1709,17 +1709,17 @@ true
 -- !query 207
 SELECT '1' <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query 207 schema
-struct<(1 <= CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(1 AS TIMESTAMP) <= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
 -- !query 207 output
-true
+NULL
 
 
 -- !query 208
 SELECT '1' <= cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query 208 schema
-struct<(1 <= CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+struct<(CAST(1 AS DATE) <= CAST(2017-12-11 09:30:00 AS DATE)):boolean>
 -- !query 208 output
-true
+NULL
 
 
 -- !query 209
@@ -1805,17 +1805,17 @@ false
 -- !query 219
 SELECT '1' > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query 219 schema
-struct<(1 > CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(1 AS TIMESTAMP) > CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
 -- !query 219 output
-false
+NULL
 
 
 -- !query 220
 SELECT '1' > cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query 220 schema
-struct<(1 > CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+struct<(CAST(1 AS DATE) > CAST(2017-12-11 09:30:00 AS DATE)):boolean>
 -- !query 220 output
-false
+NULL
 
 
 -- !query 221
@@ -1901,17 +1901,17 @@ true
 -- !query 231
 SELECT '1' >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query 231 schema
-struct<(1 >= CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean>
+struct<(CAST(1 AS TIMESTAMP) >= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
 -- !query 231 output
-false
+NULL
 
 
 -- !query 232
 SELECT '1' >= cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query 232 schema
-struct<(1 >= CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+struct<(CAST(1 AS DATE) >= CAST(2017-12-11 09:30:00 AS DATE)):boolean>
 -- !query 232 output
-false
+NULL
 
 
 -- !query 233
@@ -2005,9 +2005,9 @@ NULL
 -- !query 244
 SELECT '1' <> cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query 244 schema
-struct<(NOT (1 = CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean>
+struct<(NOT (CAST(1 AS DATE) = CAST(2017-12-11 09:30:00 AS DATE))):boolean>
 -- !query 244 output
-true
+NULL
 
 
 -- !query 245
@@ -2093,17 +2093,17 @@ false
 -- !query 255
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) < '1' FROM t
 -- !query 255 schema
-struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) < 1):boolean>
+struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) < CAST(1 AS TIMESTAMP)):boolean>
 -- !query 255 output
-false
+NULL
 
 
 -- !query 256
 SELECT cast('2017-12-11 09:30:00' as date)        < '1' FROM t
 -- !query 256 schema
-struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) < 1):boolean>
+struct<(CAST(2017-12-11 09:30:00 AS DATE) < CAST(1 AS DATE)):boolean>
 -- !query 256 output
-false
+NULL
 
 
 -- !query 257
@@ -2189,17 +2189,17 @@ true
 -- !query 267
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= '1' FROM t
 -- !query 267 schema
-struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) <= 1):boolean>
+struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <= CAST(1 AS TIMESTAMP)):boolean>
 -- !query 267 output
-false
+NULL
 
 
 -- !query 268
 SELECT cast('2017-12-11 09:30:00' as date)        <= '1' FROM t
 -- !query 268 schema
-struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) <= 1):boolean>
+struct<(CAST(2017-12-11 09:30:00 AS DATE) <= CAST(1 AS DATE)):boolean>
 -- !query 268 output
-false
+NULL
 
 
 -- !query 269
@@ -2285,17 +2285,17 @@ false
 -- !query 279
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) > '1' FROM t
 -- !query 279 schema
-struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) > 1):boolean>
+struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) > CAST(1 AS TIMESTAMP)):boolean>
 -- !query 279 output
-true
+NULL
 
 
 -- !query 280
 SELECT cast('2017-12-11 09:30:00' as date)        > '1' FROM t
 -- !query 280 schema
-struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) > 1):boolean>
+struct<(CAST(2017-12-11 09:30:00 AS DATE) > CAST(1 AS DATE)):boolean>
 -- !query 280 output
-true
+NULL
 
 
 -- !query 281
@@ -2381,17 +2381,17 @@ true
 -- !query 291
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= '1' FROM t
 -- !query 291 schema
-struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) >= 1):boolean>
+struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) >= CAST(1 AS TIMESTAMP)):boolean>
 -- !query 291 output
-true
+NULL
 
 
 -- !query 292
 SELECT cast('2017-12-11 09:30:00' as date)        >= '1' FROM t
 -- !query 292 schema
-struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) >= 1):boolean>
+struct<(CAST(2017-12-11 09:30:00 AS DATE) >= CAST(1 AS DATE)):boolean>
 -- !query 292 output
-true
+NULL
 
 
 -- !query 293
@@ -2485,9 +2485,9 @@ NULL
 -- !query 304
 SELECT cast('2017-12-11 09:30:00' as date)        <> '1' FROM t
 -- !query 304 schema
-struct<(NOT (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) = 1)):boolean>
+struct<(NOT (CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DATE))):boolean>
 -- !query 304 output
-true
+NULL
 
 
 -- !query 305
diff --git a/sql/core/src/test/resources/tpcds/q32.sql b/sql/core/src/test/resources/tpcds/q32.sql
index 1a907961e74b..a6f59ecb8736 100755
--- a/sql/core/src/test/resources/tpcds/q32.sql
+++ b/sql/core/src/test/resources/tpcds/q32.sql
@@ -10,6 +10,6 @@ WHERE
     SELECT 1.3 * avg(cs_ext_discount_amt)
     FROM catalog_sales, date_dim
     WHERE cs_item_sk = i_item_sk
-      AND d_date BETWEEN '2000-01-27]' AND (cast('2000-01-27' AS DATE) + interval 90 days)
+      AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days)
       AND d_date_sk = cs_sold_date_sk)
 LIMIT 100
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index db896b3b36ac..6ae77a8e68c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3024,6 +3024,84 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       sql("reset")
     }
   }
+
+  test("string date comparison") {
+    spark.range(1).selectExpr("date '2000-01-01' as d").createOrReplaceTempView("t1")
+    val result = Date.valueOf("2000-01-01")
+    checkAnswer(sql("select * from t1 where d < '2000'"), Nil)
+    checkAnswer(sql("select * from t1 where d < '2001'"), Row(result))
+    checkAnswer(sql("select * from t1 where d < '2000-01'"), Nil)
+    checkAnswer(sql("select * from t1 where d < '2000-01-01'"), Nil)
+    checkAnswer(sql("select * from t1 where d < '2000-1-1'"), Nil)
+    checkAnswer(sql("select * from t1 where d <= '2000-1-1'"), Row(result))
+    checkAnswer(sql("select * from t1 where d <= '1999-12-30'"), Nil)
+    checkAnswer(sql("select * from t1 where d = '2000-1-1'"), Row(result))
+    checkAnswer(sql("select * from t1 where d = '2000-01-01'"), Row(result))
+    checkAnswer(sql("select * from t1 where d = '2000-1-02'"), Nil)
+    checkAnswer(sql("select * from t1 where d > '2000-01-01'"), Nil)
+    checkAnswer(sql("select * from t1 where d > '1999'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-1'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-1-1'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-1-01'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-01-1'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-01-01'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-01-02'"), Nil)
+    checkAnswer(sql("select * from t1 where '2000' >= d"), Row(result))
+    checkAnswer(sql("select * from t1 where d > '2000-13'"), Nil)
+
+    withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") {
+      checkAnswer(sql("select * from t1 where d < '2000'"), Nil)
+      checkAnswer(sql("select * from t1 where d < '2001'"), Row(result))
+      checkAnswer(sql("select * from t1 where d < '2000-1-1'"), Row(result))
+      checkAnswer(sql("select * from t1 where d <= '1999'"), Nil)
+      checkAnswer(sql("select * from t1 where d >= '2000'"), Row(result))
+      checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result))
+      checkAnswer(sql("select to_date('2000-01-01') > '1'"), Row(true))
+    }
+  }
+
+  test("string timestamp comparison") {
+    spark.range(1)
+      .selectExpr("timestamp '2000-01-01 01:10:00.000' as d")
+      .createOrReplaceTempView("t1")
+    val result = Timestamp.valueOf("2000-01-01 01:10:00")
+    checkAnswer(sql("select * from t1 where d < '2000'"), Nil)
+    checkAnswer(sql("select * from t1 where d < '2001'"), Row(result))
+    checkAnswer(sql("select * from t1 where d < '2000-01'"), Nil)
+    checkAnswer(sql("select * from t1 where d < '2000-1-1'"), Nil)
+    checkAnswer(sql("select * from t1 where d < '2000-01-01 01:10:00.000'"), Nil)
+    checkAnswer(sql("select * from t1 where d < '2000-01-01 02:10:00.000'"), Row(result))
+    checkAnswer(sql("select * from t1 where d <= '2000-1-1 01:10:00'"), Row(result))
+    checkAnswer(sql("select * from t1 where d <= '2000-1-1 01:00:00'"), Nil)
+    checkAnswer(sql("select * from t1 where d = '2000-1-1 01:10:00.000'"), Row(result))
+    checkAnswer(sql("select * from t1 where d = '2000-01-01 01:10:00.000'"), Row(result))
+    checkAnswer(sql("select * from t1 where d = '2000-1-02 01:10:00.000'"), Nil)
+    checkAnswer(sql("select * from t1 where d > '2000'"), Row(result))
+    checkAnswer(sql("select * from t1 where d > '2000-1'"), Row(result))
+    checkAnswer(sql("select * from t1 where d > '2000-1-1'"), Row(result))
+    checkAnswer(sql("select * from t1 where d > '2000-1-1 01:00:00.000'"), Row(result))
+    checkAnswer(sql("select * from t1 where d > '2001'"), Nil)
+    checkAnswer(sql("select * from t1 where d > '2000-01-02'"), Nil)
+    checkAnswer(sql("select * from t1 where d >= '2000-1-01'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-01-1'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-01-01'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-01-01 01:10:00.000'"), Row(result))
+    checkAnswer(sql("select * from t1 where d >= '2000-01-02 01:10:00.000'"), Nil)
+    checkAnswer(sql("select * from t1 where '2000' >= d"), Nil)
+    checkAnswer(sql("select * from t1 where d > '2000-13'"), Nil)
+
+    withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") {
+      checkAnswer(sql("select * from t1 where d < '2000'"), Nil)
+      checkAnswer(sql("select * from t1 where d < '2001'"), Row(result))
+      checkAnswer(sql("select * from t1 where d <= '2000-1-1'"), Row(result))
+      checkAnswer(sql("select * from t1 where d <= '2000-01-02'"), Row(result))
+      checkAnswer(sql("select * from t1 where d <= '1999'"), Nil)
+      checkAnswer(sql("select * from t1 where d >= '2000'"), Row(result))
+      checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result))
+      checkAnswer(sql("select to_timestamp('2000-01-01 01:10:00') > '1'"), Row(true))
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From db2e3c43412e4a7fb4a46c58d73d9ab304a1e949 Mon Sep 17 00:00:00 2001
From: Thomas Graves <tgraves@nvidia.com>
Date: Tue, 14 May 2019 08:41:41 -0500
Subject: [PATCH 1051/1072] [SPARK-27024] Executor interface for cluster
 managers to support GPU and other resources

## What changes were proposed in this pull request?

Add in GPU and generic resource type allocation to the executors.

Note this is part of a bigger feature for gpu-aware scheduling and is just how the executor find the resources. The general flow :

   - users ask for a certain set of resources, for instance number of gpus - each cluster manager has a specific way to do this.
  -  cluster manager allocates a container or set of resources (standalone mode)
-    When spark launches the executor in that container, the executor either has to be told what resources it has or it has to auto discover them.
  -  Executor has to register with Driver and tell the driver the set of resources it has so the scheduler can use that to schedule tasks that requires a certain amount of each of those resources

In this pr I added configs and arguments to the executor to be able discover resources. The argument to the executor is intended to be used by standalone mode or other cluster managers that don't have isolation so that it can assign specific resources to specific executors in case there are multiple executors on a node. The argument is a file contains JSON Array of ResourceInformation objects.

The discovery script is meant to be used in an isolated environment where the executor only sees the resources it should use.

Note that there will be follow on PRs to add other parts like the scheduler part. See the epic high level jira: https://issues.apache.org/jira/browse/SPARK-24615

## How was this patch tested?

Added unit tests and manually tested.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24406 from tgravescs/gpu-sched-executor-clean.

Authored-by: Thomas Graves <tgraves@nvidia.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../org/apache/spark/ResourceDiscoverer.scala |  93 +++++++
 .../apache/spark/ResourceInformation.scala    |  37 +++
 .../CoarseGrainedExecutorBackend.scala        | 120 +++++++-
 .../spark/internal/config/package.scala       |   7 +
 .../cluster/CoarseGrainedClusterMessage.scala |   4 +-
 .../CoarseGrainedSchedulerBackend.scala       |   3 +-
 .../apache/spark/HeartbeatReceiverSuite.scala |   6 +-
 .../spark/ResourceDiscovererSuite.scala       | 186 +++++++++++++
 .../StandaloneDynamicAllocationSuite.scala    |   6 +-
 .../CoarseGrainedExecutorBackendSuite.scala   | 262 ++++++++++++++++++
 .../CoarseGrainedSchedulerBackendSuite.scala  |   6 +-
 docs/configuration.md                         |  28 ++
 ...osCoarseGrainedSchedulerBackendSuite.scala |   2 +-
 .../YarnCoarseGrainedExecutorBackend.scala    |   8 +-
 14 files changed, 749 insertions(+), 19 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala
 create mode 100644 core/src/main/scala/org/apache/spark/ResourceInformation.scala
 create mode 100644 core/src/test/scala/org/apache/spark/ResourceDiscovererSuite.scala
 create mode 100644 core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala b/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala
new file mode 100644
index 000000000000..19639420b8b9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.io.File
+
+import com.fasterxml.jackson.core.JsonParseException
+import org.json4s.{DefaultFormats, MappingException}
+import org.json4s.JsonAST.JValue
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.util.Utils.executeAndGetOutput
+
+/**
+ * Discovers resources (GPUs/FPGAs/etc). It currently only supports resources that have
+ * addresses.
+ * This class finds resources by running and parsing the output of the user specified script
+ * from the config spark.{driver/executor}.resource.{resourceType}.discoveryScript.
+ * The output of the script it runs is expected to be JSON in the format of the
+ * ResourceInformation class.
+ *
+ * For example:  {"name": "gpu", "addresses": ["0","1"]}
+ */
+private[spark] object ResourceDiscoverer extends Logging {
+
+  private implicit val formats = DefaultFormats
+
+  def findResources(sparkConf: SparkConf, isDriver: Boolean): Map[String, ResourceInformation] = {
+    val prefix = if (isDriver) {
+      SPARK_DRIVER_RESOURCE_PREFIX
+    } else {
+      SPARK_EXECUTOR_RESOURCE_PREFIX
+    }
+    // get unique resource types by grabbing first part config with multiple periods,
+    // ie resourceType.count, grab resourceType part
+    val resourceNames = sparkConf.getAllWithPrefix(prefix).map { case (k, _) =>
+      k.split('.').head
+    }.toSet
+    resourceNames.map { rName => {
+      val rInfo = getResourceInfoForType(sparkConf, prefix, rName)
+      (rName -> rInfo)
+    }}.toMap
+  }
+
+  private def getResourceInfoForType(
+      sparkConf: SparkConf,
+      prefix: String,
+      resourceType: String): ResourceInformation = {
+    val discoveryConf = prefix + resourceType + SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX
+    val script = sparkConf.getOption(discoveryConf)
+    val result = if (script.nonEmpty) {
+      val scriptFile = new File(script.get)
+      // check that script exists and try to execute
+      if (scriptFile.exists()) {
+        try {
+          val output = executeAndGetOutput(Seq(script.get), new File("."))
+          val parsedJson = parse(output)
+          val name = (parsedJson \ "name").extract[String]
+          val addresses = (parsedJson \ "addresses").extract[Array[String]].toArray
+          new ResourceInformation(name, addresses)
+        } catch {
+          case e @ (_: SparkException | _: MappingException | _: JsonParseException) =>
+            throw new SparkException(s"Error running the resource discovery script: $scriptFile" +
+              s" for $resourceType", e)
+        }
+      } else {
+        throw new SparkException(s"Resource script: $scriptFile to discover $resourceType" +
+          s" doesn't exist!")
+      }
+    } else {
+      throw new SparkException(s"User is expecting to use $resourceType resources but " +
+        s"didn't specify a script via conf: $discoveryConf, to find them!")
+    }
+    result
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ResourceInformation.scala b/core/src/main/scala/org/apache/spark/ResourceInformation.scala
new file mode 100644
index 000000000000..6a5b725ac21d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ResourceInformation.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.annotation.Evolving
+
+/**
+ * Class to hold information about a type of Resource. A resource could be a GPU, FPGA, etc.
+ * The array of addresses are resource specific and its up to the user to interpret the address.
+ *
+ * One example is GPUs, where the addresses would be the indices of the GPUs
+ *
+ * @param name the name of the resource
+ * @param addresses an array of strings describing the addresses of the resource
+ */
+@Evolving
+class ResourceInformation(
+    val name: String,
+    val addresses: Array[String]) extends Serializable {
+
+  override def toString: String = s"[name: ${name}, addresses: ${addresses.mkString(",")}]"
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index f57b731bedee..af01e0b23dad 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.executor
 
+import java.io.{BufferedInputStream, FileInputStream}
 import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Locale
@@ -26,12 +27,18 @@ import scala.collection.mutable
 import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
 
+import com.fasterxml.jackson.databind.exc.MismatchedInputException
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST.JArray
+import org.json4s.MappingException
+import org.json4s.jackson.JsonMethods._
+
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.worker.WorkerWatcher
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.EXECUTOR_ID
+import org.apache.spark.internal.config._
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -45,9 +52,12 @@ private[spark] class CoarseGrainedExecutorBackend(
     hostname: String,
     cores: Int,
     userClassPath: Seq[URL],
-    env: SparkEnv)
+    env: SparkEnv,
+    resourcesFile: Option[String])
   extends ThreadSafeRpcEndpoint with ExecutorBackend with Logging {
 
+  private implicit val formats = DefaultFormats
+
   private[this] val stopping = new AtomicBoolean(false)
   var executor: Executor = null
   @volatile var driver: Option[RpcEndpointRef] = None
@@ -58,11 +68,12 @@ private[spark] class CoarseGrainedExecutorBackend(
 
   override def onStart() {
     logInfo("Connecting to driver: " + driverUrl)
+    val resources = parseOrFindResources(resourcesFile)
     rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
       // This is a very fast action so we can use "ThreadUtils.sameThread"
       driver = Some(ref)
       ref.ask[Boolean](RegisterExecutor(executorId, self, hostname, cores, extractLogUrls,
-        extractAttributes))
+        extractAttributes, resources))
     }(ThreadUtils.sameThread).onComplete {
       // This is a very fast action so we can use "ThreadUtils.sameThread"
       case Success(msg) =>
@@ -72,6 +83,97 @@ private[spark] class CoarseGrainedExecutorBackend(
     }(ThreadUtils.sameThread)
   }
 
+  // Check that the actual resources discovered will satisfy the user specified
+  // requirements and that they match the configs specified by the user to catch
+  // mismatches between what the user requested and what resource manager gave or
+  // what the discovery script found.
+  private def checkResourcesMeetRequirements(
+      resourceConfigPrefix: String,
+      reqResourcesAndCounts: Array[(String, String)],
+      actualResources: Map[String, ResourceInformation]): Unit = {
+
+    reqResourcesAndCounts.foreach { case (rName, reqCount) =>
+      if (actualResources.contains(rName)) {
+        val resourceInfo = actualResources(rName)
+
+        if (resourceInfo.addresses.size < reqCount.toLong) {
+          throw new SparkException(s"Resource: $rName with addresses: " +
+            s"${resourceInfo.addresses.mkString(",")} doesn't meet the " +
+            s"requirements of needing $reqCount of them")
+        }
+        // also make sure the resource count on start matches the
+        // resource configs specified by user
+        val userCountConfigName =
+          resourceConfigPrefix + rName + SPARK_RESOURCE_COUNT_POSTFIX
+        val userConfigCount = env.conf.getOption(userCountConfigName).
+          getOrElse(throw new SparkException(s"Resource: $rName not specified " +
+            s"via config: $userCountConfigName, but required, " +
+            "please fix your configuration"))
+
+        if (userConfigCount.toLong > resourceInfo.addresses.size) {
+          throw new SparkException(s"Resource: $rName, with addresses: " +
+            s"${resourceInfo.addresses.mkString(",")} " +
+            s"is less than what the user requested for count: $userConfigCount, " +
+            s"via $userCountConfigName")
+        }
+      } else {
+        throw new SparkException(s"Executor resource config missing required task resource: $rName")
+      }
+    }
+  }
+
+  // visible for testing
+  def parseOrFindResources(resourcesFile: Option[String]): Map[String, ResourceInformation] = {
+    // only parse the resources if a task requires them
+    val taskResourceConfigs = env.conf.getAllWithPrefix(SPARK_TASK_RESOURCE_PREFIX)
+    val resourceInfo = if (taskResourceConfigs.nonEmpty) {
+      val execResources = resourcesFile.map { resourceFileStr => {
+        val source = new BufferedInputStream(new FileInputStream(resourceFileStr))
+        val resourceMap = try {
+          val parsedJson = parse(source).asInstanceOf[JArray].arr
+          parsedJson.map { json =>
+            val name = (json \ "name").extract[String]
+            val addresses = (json \ "addresses").extract[Array[String]]
+            new ResourceInformation(name, addresses)
+          }.map(x => (x.name -> x)).toMap
+        } catch {
+          case e @ (_: MappingException | _: MismatchedInputException) =>
+            throw new SparkException(
+              s"Exception parsing the resources in $resourceFileStr", e)
+        } finally {
+          source.close()
+        }
+        resourceMap
+      }}.getOrElse(ResourceDiscoverer.findResources(env.conf, isDriver = false))
+
+      if (execResources.isEmpty) {
+        throw new SparkException("User specified resources per task via: " +
+          s"$SPARK_TASK_RESOURCE_PREFIX, but can't find any resources available on the executor.")
+      }
+      // get just the map of resource name to count
+      val resourcesAndCounts = taskResourceConfigs.
+        withFilter { case (k, v) => k.endsWith(SPARK_RESOURCE_COUNT_POSTFIX)}.
+        map { case (k, v) => (k.dropRight(SPARK_RESOURCE_COUNT_POSTFIX.size), v)}
+
+      checkResourcesMeetRequirements(SPARK_EXECUTOR_RESOURCE_PREFIX, resourcesAndCounts,
+        execResources)
+
+      logInfo("===============================================================================")
+      logInfo(s"Executor $executorId Resources:")
+      execResources.foreach { case (k, v) => logInfo(s"$k -> $v") }
+      logInfo("===============================================================================")
+
+      execResources
+    } else {
+      if (resourcesFile.nonEmpty) {
+        logWarning(s"A resources file was specified but the application is not configured " +
+          s"to use any resources, see the configs with prefix: ${SPARK_TASK_RESOURCE_PREFIX}")
+      }
+      Map.empty[String, ResourceInformation]
+    }
+    resourceInfo
+  }
+
   def extractLogUrls: Map[String, String] = {
     val prefix = "SPARK_LOG_URL_"
     sys.env.filterKeys(_.startsWith(prefix))
@@ -189,13 +291,14 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       cores: Int,
       appId: String,
       workerUrl: Option[String],
-      userClassPath: mutable.ListBuffer[URL])
+      userClassPath: mutable.ListBuffer[URL],
+      resourcesFile: Option[String])
 
   def main(args: Array[String]): Unit = {
     val createFn: (RpcEnv, Arguments, SparkEnv) =>
       CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env) =>
       new CoarseGrainedExecutorBackend(rpcEnv, arguments.driverUrl, arguments.executorId,
-        arguments.hostname, arguments.cores, arguments.userClassPath, env)
+        arguments.hostname, arguments.cores, arguments.userClassPath, env, arguments.resourcesFile)
     }
     run(parseArguments(args, this.getClass.getCanonicalName.stripSuffix("$")), createFn)
     System.exit(0)
@@ -257,6 +360,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
     var executorId: String = null
     var hostname: String = null
     var cores: Int = 0
+    var resourcesFile: Option[String] = None
     var appId: String = null
     var workerUrl: Option[String] = None
     val userClassPath = new mutable.ListBuffer[URL]()
@@ -276,6 +380,9 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         case ("--cores") :: value :: tail =>
           cores = value.toInt
           argv = tail
+        case ("--resourcesFile") :: value :: tail =>
+          resourcesFile = Some(value)
+          argv = tail
         case ("--app-id") :: value :: tail =>
           appId = value
           argv = tail
@@ -301,7 +408,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
     }
 
     Arguments(driverUrl, executorId, hostname, cores, appId, workerUrl,
-      userClassPath)
+      userClassPath, resourcesFile)
   }
 
   private def printUsageAndExit(classNameForEntry: String): Unit = {
@@ -315,6 +422,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       |   --executor-id <executorId>
       |   --hostname <hostname>
       |   --cores <cores>
+      |   --resourcesFile <fileWithJSONResourceInformation>
       |   --app-id <appid>
       |   --worker-url <workerUrl>
       |   --user-class-path <url>
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 8e59ce710170..0aed1af023f8 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -30,6 +30,13 @@ import org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillReader.MAX_
 
 package object config {
 
+  private[spark] val SPARK_DRIVER_RESOURCE_PREFIX = "spark.driver.resource."
+  private[spark] val SPARK_EXECUTOR_RESOURCE_PREFIX = "spark.executor.resource."
+  private[spark] val SPARK_TASK_RESOURCE_PREFIX = "spark.task.resource."
+
+  private[spark] val SPARK_RESOURCE_COUNT_POSTFIX = ".count"
+  private[spark] val SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX = ".discoveryScript"
+
   private[spark] val DRIVER_CLASS_PATH =
     ConfigBuilder(SparkLauncher.DRIVER_EXTRA_CLASSPATH).stringConf.createOptional
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index afb48a31754f..89425e702677 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -19,6 +19,7 @@ package org.apache.spark.scheduler.cluster
 
 import java.nio.ByteBuffer
 
+import org.apache.spark.ResourceInformation
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.ExecutorLossReason
@@ -64,7 +65,8 @@ private[spark] object CoarseGrainedClusterMessages {
       hostname: String,
       cores: Int,
       logUrls: Map[String, String],
-      attributes: Map[String, String])
+      attributes: Map[String, String],
+      resources: Map[String, ResourceInformation])
     extends CoarseGrainedClusterMessage
 
   case class StatusUpdate(executorId: String, taskId: Long, state: TaskState,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 4830d0e6f800..f7cf212d0bfe 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -185,7 +185,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
 
-      case RegisterExecutor(executorId, executorRef, hostname, cores, logUrls, attributes) =>
+      case RegisterExecutor(executorId, executorRef, hostname, cores, logUrls,
+          attributes, resources) =>
         if (executorDataMap.contains(executorId)) {
           executorRef.send(RegisterExecutorFailed("Duplicate executor ID: " + executorId))
           context.reply(true)
diff --git a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
index 061aeb366cf5..dc4f4b4c66d9 100644
--- a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
@@ -174,9 +174,11 @@ class HeartbeatReceiverSuite
     val dummyExecutorEndpointRef1 = rpcEnv.setupEndpoint("fake-executor-1", dummyExecutorEndpoint1)
     val dummyExecutorEndpointRef2 = rpcEnv.setupEndpoint("fake-executor-2", dummyExecutorEndpoint2)
     fakeSchedulerBackend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor(executorId1, dummyExecutorEndpointRef1, "1.2.3.4", 0, Map.empty, Map.empty))
+      RegisterExecutor(executorId1, dummyExecutorEndpointRef1, "1.2.3.4", 0, Map.empty, Map.empty,
+        Map.empty))
     fakeSchedulerBackend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor(executorId2, dummyExecutorEndpointRef2, "1.2.3.5", 0, Map.empty, Map.empty))
+      RegisterExecutor(executorId2, dummyExecutorEndpointRef2, "1.2.3.5", 0, Map.empty, Map.empty,
+        Map.empty))
     heartbeatReceiverRef.askSync[Boolean](TaskSchedulerIsSet)
     addExecutorAndVerify(executorId1)
     addExecutorAndVerify(executorId2)
diff --git a/core/src/test/scala/org/apache/spark/ResourceDiscovererSuite.scala b/core/src/test/scala/org/apache/spark/ResourceDiscovererSuite.scala
new file mode 100644
index 000000000000..77f8a0bae742
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ResourceDiscovererSuite.scala
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files => JavaFiles}
+import java.nio.file.attribute.PosixFilePermission._
+import java.util.EnumSet
+
+import com.google.common.io.Files
+
+import org.apache.spark.internal.config._
+import org.apache.spark.util.Utils
+
+class ResourceDiscovererSuite extends SparkFunSuite
+    with LocalSparkContext {
+
+  def mockDiscoveryScript(file: File, result: String): String = {
+    Files.write(s"echo $result", file, StandardCharsets.UTF_8)
+    JavaFiles.setPosixFilePermissions(file.toPath(),
+      EnumSet.of(OWNER_READ, OWNER_EXECUTE, OWNER_WRITE))
+    file.getPath()
+  }
+
+  test("Resource discoverer no resources") {
+    val sparkconf = new SparkConf
+    val resources = ResourceDiscoverer.findResources(sparkconf, isDriver = false)
+    assert(resources.size === 0)
+    assert(resources.get("gpu").isEmpty,
+      "Should have a gpus entry that is empty")
+  }
+
+  test("Resource discoverer multiple gpus") {
+    val sparkconf = new SparkConf
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val gpuFile = new File(dir, "gpuDiscoverScript")
+      val scriptPath = mockDiscoveryScript(gpuFile,
+        """'{"name": "gpu","addresses":["0", "1"]}'""")
+      sparkconf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" +
+        SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, scriptPath)
+      val resources = ResourceDiscoverer.findResources(sparkconf, isDriver = false)
+      val gpuValue = resources.get("gpu")
+      assert(gpuValue.nonEmpty, "Should have a gpu entry")
+      assert(gpuValue.get.name == "gpu", "name should be gpu")
+      assert(gpuValue.get.addresses.size == 2, "Should have 2 indexes")
+      assert(gpuValue.get.addresses.deep == Array("0", "1").deep, "should have 0,1 entries")
+    }
+  }
+
+  test("Resource discoverer no addresses errors") {
+    val sparkconf = new SparkConf
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val gpuFile = new File(dir, "gpuDiscoverScript")
+      val scriptPath = mockDiscoveryScript(gpuFile,
+        """'{"name": "gpu"}'""")
+      sparkconf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" +
+        SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, scriptPath)
+      val resources = ResourceDiscoverer.findResources(sparkconf, isDriver = false)
+      val gpuValue = resources.get("gpu")
+      assert(gpuValue.nonEmpty, "Should have a gpu entry")
+      assert(gpuValue.get.name == "gpu", "name should be gpu")
+      assert(gpuValue.get.addresses.size == 0, "Should have 0 indexes")
+    }
+  }
+
+  test("Resource discoverer multiple resource types") {
+    val sparkconf = new SparkConf
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val gpuFile = new File(dir, "gpuDiscoverScript")
+      val gpuDiscovery = mockDiscoveryScript(gpuFile,
+        """'{"name": "gpu", "addresses": ["0", "1"]}'""")
+      sparkconf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" +
+        SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, gpuDiscovery)
+
+      val fpgaFile = new File(dir, "fpgaDiscoverScript")
+      val fpgaDiscovery = mockDiscoveryScript(fpgaFile,
+        """'{"name": "fpga", "addresses": ["f1", "f2", "f3"]}'""")
+      sparkconf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "fpga" +
+        SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, fpgaDiscovery)
+
+      val resources = ResourceDiscoverer.findResources(sparkconf, isDriver = false)
+      assert(resources.size === 2)
+      val gpuValue = resources.get("gpu")
+      assert(gpuValue.nonEmpty, "Should have a gpu entry")
+      assert(gpuValue.get.name == "gpu", "name should be gpu")
+      assert(gpuValue.get.addresses.size == 2, "Should have 2 indexes")
+      assert(gpuValue.get.addresses.deep == Array("0", "1").deep, "should have 0,1 entries")
+
+      val fpgaValue = resources.get("fpga")
+      assert(fpgaValue.nonEmpty, "Should have a gpu entry")
+      assert(fpgaValue.get.name == "fpga", "name should be fpga")
+      assert(fpgaValue.get.addresses.size == 3, "Should have 3 indexes")
+      assert(fpgaValue.get.addresses.deep == Array("f1", "f2", "f3").deep,
+        "should have f1,f2,f3 entries")
+    }
+  }
+
+  test("Resource discoverer multiple gpus on driver") {
+    val sparkconf = new SparkConf
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val gpuFile = new File(dir, "gpuDiscoverScript")
+      val gpuDiscovery = mockDiscoveryScript(gpuFile,
+        """'{"name": "gpu", "addresses": ["0", "1"]}'""")
+      sparkconf.set(SPARK_DRIVER_RESOURCE_PREFIX + "gpu" +
+        SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, gpuDiscovery)
+      sparkconf set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" +
+        SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, "boguspath")
+      // make sure it reads from correct config, here it should use driver
+      val resources = ResourceDiscoverer.findResources(sparkconf, isDriver = true)
+      val gpuValue = resources.get("gpu")
+      assert(gpuValue.nonEmpty, "Should have a gpu entry")
+      assert(gpuValue.get.name == "gpu", "name should be gpu")
+      assert(gpuValue.get.addresses.size == 2, "Should have 2 indexes")
+      assert(gpuValue.get.addresses.deep == Array("0", "1").deep, "should have 0,1 entries")
+    }
+  }
+
+  test("Resource discoverer script returns invalid format") {
+    val sparkconf = new SparkConf
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val gpuFile = new File(dir, "gpuDiscoverScript")
+      val gpuDiscovery = mockDiscoveryScript(gpuFile,
+        """'{"addresses": ["0", "1"]}'""")
+      sparkconf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" +
+        SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, gpuDiscovery)
+      val error = intercept[SparkException] {
+        ResourceDiscoverer.findResources(sparkconf, isDriver = false)
+      }.getMessage()
+
+      assert(error.contains("Error running the resource discovery"))
+    }
+  }
+
+  test("Resource discoverer script doesn't exist") {
+    val sparkconf = new SparkConf
+    withTempDir { dir =>
+      val file1 = new File(dir, "bogusfilepath")
+      try {
+        sparkconf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" +
+          SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, file1.getPath())
+
+        val error = intercept[SparkException] {
+          ResourceDiscoverer.findResources(sparkconf, isDriver = false)
+        }.getMessage()
+
+        assert(error.contains("doesn't exist"))
+      } finally {
+        JavaFiles.deleteIfExists(file1.toPath())
+      }
+    }
+  }
+
+  test("gpu's specified but not discovery script") {
+    val sparkconf = new SparkConf
+    sparkconf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" +
+      SPARK_RESOURCE_COUNT_POSTFIX, "2")
+
+    val error = intercept[SparkException] {
+      ResourceDiscoverer.findResources(sparkconf, isDriver = false)
+    }.getMessage()
+
+    assert(error.contains("User is expecting to use"))
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index 1ed2a1a2aeb4..39daf51ee613 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -497,7 +497,8 @@ class StandaloneDynamicAllocationSuite
     val endpointRef = mock(classOf[RpcEndpointRef])
     val mockAddress = mock(classOf[RpcAddress])
     when(endpointRef.address).thenReturn(mockAddress)
-    val message = RegisterExecutor("one", endpointRef, "blacklisted-host", 10, Map.empty, Map.empty)
+    val message = RegisterExecutor("one", endpointRef, "blacklisted-host", 10, Map.empty, Map.empty,
+      Map.empty)
 
     // Get "localhost" on a blacklist.
     val taskScheduler = mock(classOf[TaskSchedulerImpl])
@@ -621,7 +622,8 @@ class StandaloneDynamicAllocationSuite
       val endpointRef = mock(classOf[RpcEndpointRef])
       val mockAddress = mock(classOf[RpcAddress])
       when(endpointRef.address).thenReturn(mockAddress)
-      val message = RegisterExecutor(id, endpointRef, "localhost", 10, Map.empty, Map.empty)
+      val message = RegisterExecutor(id, endpointRef, "localhost", 10, Map.empty, Map.empty,
+        Map.empty)
       val backend = sc.schedulerBackend.asInstanceOf[CoarseGrainedSchedulerBackend]
       backend.driverEndpoint.askSync[Boolean](message)
     }
diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
new file mode 100644
index 000000000000..e59d1b0e047b
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.executor
+
+
+import java.io.{File, PrintWriter}
+import java.net.URL
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files => JavaFiles}
+import java.nio.file.attribute.PosixFilePermission.{OWNER_EXECUTE, OWNER_READ, OWNER_WRITE}
+import java.util.EnumSet
+
+import com.google.common.io.Files
+import org.json4s.JsonAST.{JArray, JObject, JString}
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods.{compact, render}
+import org.mockito.Mockito.when
+import org.scalatest.mockito.MockitoSugar
+
+import org.apache.spark._
+import org.apache.spark.internal.config._
+import org.apache.spark.rpc.RpcEnv
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.util.Utils
+
+class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
+    with LocalSparkContext with MockitoSugar {
+
+  private def writeFileWithJson(dir: File, strToWrite: JArray): String = {
+    val f1 = File.createTempFile("test-resource-parser1", "", dir)
+    JavaFiles.write(f1.toPath(), compact(render(strToWrite)).getBytes())
+    f1.getPath()
+  }
+
+  test("parsing no resources") {
+    val conf = new SparkConf
+    conf.set(SPARK_TASK_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "2")
+    val serializer = new JavaSerializer(conf)
+    val env = createMockEnv(conf, serializer)
+
+    // we don't really use this, just need it to get at the parser function
+    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1",
+      4, Seq.empty[URL], env, None)
+    withTempDir { tmpDir =>
+      val testResourceArgs: JObject = ("" -> "")
+      val ja = JArray(List(testResourceArgs))
+      val f1 = writeFileWithJson(tmpDir, ja)
+      var error = intercept[SparkException] {
+        val parsedResources = backend.parseOrFindResources(Some(f1))
+      }.getMessage()
+
+      assert(error.contains("Exception parsing the resources in"),
+        s"Calling with no resources didn't error as expected, error: $error")
+    }
+  }
+
+  test("parsing one resources") {
+    val conf = new SparkConf
+    conf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "2")
+    conf.set(SPARK_TASK_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "2")
+    val serializer = new JavaSerializer(conf)
+    val env = createMockEnv(conf, serializer)
+    // we don't really use this, just need it to get at the parser function
+    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1",
+      4, Seq.empty[URL], env, None)
+    withTempDir { tmpDir =>
+      val testResourceArgs =
+        ("name" -> "gpu") ~
+        ("addresses" -> Seq("0", "1"))
+      val ja = JArray(List(testResourceArgs))
+      val f1 = writeFileWithJson(tmpDir, ja)
+      val parsedResources = backend.parseOrFindResources(Some(f1))
+
+      assert(parsedResources.size === 1)
+      assert(parsedResources.get("gpu").nonEmpty)
+      assert(parsedResources.get("gpu").get.name === "gpu")
+      assert(parsedResources.get("gpu").get.addresses.deep === Array("0", "1").deep)
+    }
+  }
+
+  test("parsing multiple resources") {
+    val conf = new SparkConf
+    conf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "fpga" + SPARK_RESOURCE_COUNT_POSTFIX, "3")
+    conf.set(SPARK_TASK_RESOURCE_PREFIX + "fpga" + SPARK_RESOURCE_COUNT_POSTFIX, "3")
+    conf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "2")
+    conf.set(SPARK_TASK_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "2")
+    val serializer = new JavaSerializer(conf)
+    val env = createMockEnv(conf, serializer)
+    // we don't really use this, just need it to get at the parser function
+    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1",
+      4, Seq.empty[URL], env, None)
+
+    withTempDir { tmpDir =>
+      val gpuArgs =
+        ("name" -> "gpu") ~
+          ("addresses" -> Seq("0", "1"))
+      val fpgaArgs =
+        ("name" -> "fpga") ~
+          ("addresses" -> Seq("f1", "f2", "f3"))
+      val ja = JArray(List(gpuArgs, fpgaArgs))
+      val f1 = writeFileWithJson(tmpDir, ja)
+      val parsedResources = backend.parseOrFindResources(Some(f1))
+
+      assert(parsedResources.size === 2)
+      assert(parsedResources.get("gpu").nonEmpty)
+      assert(parsedResources.get("gpu").get.name === "gpu")
+      assert(parsedResources.get("gpu").get.addresses.deep === Array("0", "1").deep)
+      assert(parsedResources.get("fpga").nonEmpty)
+      assert(parsedResources.get("fpga").get.name === "fpga")
+      assert(parsedResources.get("fpga").get.addresses.deep === Array("f1", "f2", "f3").deep)
+    }
+  }
+
+  test("error checking parsing resources and executor and task configs") {
+    val conf = new SparkConf
+    conf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "2")
+    conf.set(SPARK_TASK_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "2")
+    val serializer = new JavaSerializer(conf)
+    val env = createMockEnv(conf, serializer)
+    // we don't really use this, just need it to get at the parser function
+    val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1",
+      4, Seq.empty[URL], env, None)
+
+    // not enough gpu's on the executor
+    withTempDir { tmpDir =>
+      val gpuArgs =
+        ("name" -> "gpu") ~
+          ("addresses" -> Seq("0"))
+      val ja = JArray(List(gpuArgs))
+      val f1 = writeFileWithJson(tmpDir, ja)
+
+      var error = intercept[SparkException] {
+        val parsedResources = backend.parseOrFindResources(Some(f1))
+      }.getMessage()
+
+      assert(error.contains("doesn't meet the requirements of needing"))
+    }
+
+    // missing resource on the executor
+    withTempDir { tmpDir =>
+      val gpuArgs =
+        ("name" -> "fpga") ~
+          ("addresses" -> Seq("0"))
+      val ja = JArray(List(gpuArgs))
+      val f1 = writeFileWithJson(tmpDir, ja)
+
+      var error = intercept[SparkException] {
+        val parsedResources = backend.parseOrFindResources(Some(f1))
+      }.getMessage()
+
+      assert(error.contains("Executor resource config missing required task resource"))
+    }
+  }
+
+  test("executor resource found less than required") {
+    val conf = new SparkConf
+    conf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "4")
+    conf.set(SPARK_TASK_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "1")
+    val serializer = new JavaSerializer(conf)
+    val env = createMockEnv(conf, serializer)
+    // we don't really use this, just need it to get at the parser function
+    val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1",
+      4, Seq.empty[URL], env, None)
+
+    // executor resources < required
+    withTempDir { tmpDir =>
+      val gpuArgs =
+        ("name" -> "gpu") ~
+          ("addresses" -> Seq("0", "1"))
+      val ja = JArray(List(gpuArgs))
+      val f1 = writeFileWithJson(tmpDir, ja)
+
+      var error = intercept[SparkException] {
+        val parsedResources = backend.parseOrFindResources(Some(f1))
+      }.getMessage()
+
+      assert(error.contains("is less than what the user requested for count"))
+    }
+  }
+
+  test("parsing resources task configs with missing executor config") {
+    val conf = new SparkConf
+    conf.set(SPARK_TASK_RESOURCE_PREFIX + "gpu" + SPARK_RESOURCE_COUNT_POSTFIX, "2")
+    val serializer = new JavaSerializer(conf)
+    val env = createMockEnv(conf, serializer)
+    // we don't really use this, just need it to get at the parser function
+    val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1",
+      4, Seq.empty[URL], env, None)
+
+    withTempDir { tmpDir =>
+      val gpuArgs =
+        ("name" -> "gpu") ~
+          ("addresses" -> Seq("0", "1"))
+      val ja = JArray(List(gpuArgs))
+      val f1 = writeFileWithJson(tmpDir, ja)
+
+      var error = intercept[SparkException] {
+        val parsedResources = backend.parseOrFindResources(Some(f1))
+      }.getMessage()
+
+      assert(error.contains("Resource: gpu not specified via config: " +
+        "spark.executor.resource.gpu.count, but required, please " +
+        "fix your configuration"))
+    }
+  }
+
+  test("use discoverer") {
+    val conf = new SparkConf
+    conf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "fpga" + SPARK_RESOURCE_COUNT_POSTFIX, "3")
+    conf.set(SPARK_TASK_RESOURCE_PREFIX + "fpga" + SPARK_RESOURCE_COUNT_POSTFIX, "3")
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val fpgaDiscovery = new File(dir, "resourceDiscoverScriptfpga")
+      Files.write("""echo '{"name": "fpga","addresses":["f1", "f2", "f3"]}'""",
+        fpgaDiscovery, StandardCharsets.UTF_8)
+      JavaFiles.setPosixFilePermissions(fpgaDiscovery.toPath(),
+        EnumSet.of(OWNER_READ, OWNER_EXECUTE, OWNER_WRITE))
+      conf.set(SPARK_EXECUTOR_RESOURCE_PREFIX + "fpga" +
+        SPARK_RESOURCE_DISCOVERY_SCRIPT_POSTFIX, fpgaDiscovery.getPath())
+
+      val serializer = new JavaSerializer(conf)
+      val env = createMockEnv(conf, serializer)
+
+      // we don't really use this, just need it to get at the parser function
+      val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1",
+        4, Seq.empty[URL], env, None)
+
+      val parsedResources = backend.parseOrFindResources(None)
+
+      assert(parsedResources.size === 1)
+      assert(parsedResources.get("fpga").nonEmpty)
+      assert(parsedResources.get("fpga").get.name === "fpga")
+      assert(parsedResources.get("fpga").get.addresses.deep === Array("f1", "f2", "f3").deep)
+    }
+  }
+
+  private def createMockEnv(conf: SparkConf, serializer: JavaSerializer): SparkEnv = {
+    val mockEnv = mock[SparkEnv]
+    val mockRpcEnv = mock[RpcEnv]
+    when(mockEnv.conf).thenReturn(conf)
+    when(mockEnv.serializer).thenReturn(serializer)
+    when(mockEnv.closureSerializer).thenReturn(serializer)
+    when(mockEnv.rpcEnv).thenReturn(mockRpcEnv)
+    SparkEnv.set(mockEnv)
+    mockEnv
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 33f48b858a73..4858d38cad40 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -164,11 +164,11 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     sc.addSparkListener(listener)
 
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, logUrls, attributes))
+      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, logUrls, attributes, Map.empty))
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, logUrls, attributes))
+      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, logUrls, attributes, Map.empty))
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, logUrls, attributes))
+      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, logUrls, attributes, Map.empty))
 
     sc.listenerBus.waitUntilEmpty(executorUpTimeout.toMillis)
     assert(executorAddedCount === 3)
diff --git a/docs/configuration.md b/docs/configuration.md
index da2d279e2837..d0b2699a5dc7 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -221,6 +221,25 @@ of the most common options to set are:
     This option is currently supported on YARN and Kubernetes.
   </td>
 </tr>
+<tr>
+ <td><code>spark.executor.resource.{resourceType}.count</code></td>
+  <td>0</td>
+  <td>
+    The number of a particular resource type to use per executor process.
+    If this is used, you must also specify the
+    <code>spark.executor.resource.{resourceType}.discoveryScript</code>
+    for the executor to find the resource on startup.
+  </td>
+</tr>
+<tr>
+ <td><code>spark.executor.resource.{resourceType}.discoveryScript</code></td>
+  <td>None</td>
+  <td>
+    A script for the executor to run to discover a particular resource type. This should
+    write to STDOUT a JSON string in the format of the ResourceInformation class. This has a
+    name and an array of addresses.
+  </td>
+</tr>
 <tr>
   <td><code>spark.extraListeners</code></td>
   <td>(none)</td>
@@ -1793,6 +1812,15 @@ Apart from these, the following properties are also available, and may be useful
     Number of cores to allocate for each task.
   </td>
 </tr>
+<tr>
+  <td><code>spark.task.resource.{resourceType}.count</code></td>
+  <td>1</td>
+  <td>
+    Number of a particular resource type to allocate for each task. If this is specified
+    you must also provide the executor config <code>spark.executor.resource.{resourceType}.count</code>
+    and any corresponding discovery configs so that your executors are created with that resource type.
+  </td>
+</tr>
 <tr>
   <td><code>spark.task.maxFailures</code></td>
   <td>4</td>
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index 37c0f5f45075..34b5f6cd19bb 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -692,7 +692,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     val mockEndpointRef = mock[RpcEndpointRef]
     val mockAddress = mock[RpcAddress]
     val message = RegisterExecutor(executorId, mockEndpointRef, slaveId, cores, Map.empty,
-      Map.empty)
+      Map.empty, Map.empty)
 
     backend.driverEndpoint.askSync[Boolean](message)
   }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala
index 53e99d992db8..380da361e5fc 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala
@@ -37,7 +37,8 @@ private[spark] class YarnCoarseGrainedExecutorBackend(
     hostname: String,
     cores: Int,
     userClassPath: Seq[URL],
-    env: SparkEnv)
+    env: SparkEnv,
+    resourcesFile: Option[String])
   extends CoarseGrainedExecutorBackend(
     rpcEnv,
     driverUrl,
@@ -45,7 +46,8 @@ private[spark] class YarnCoarseGrainedExecutorBackend(
     hostname,
     cores,
     userClassPath,
-    env) with Logging {
+    env,
+    resourcesFile) with Logging {
 
   private lazy val hadoopConfiguration = SparkHadoopUtil.get.newConfiguration(env.conf)
 
@@ -66,7 +68,7 @@ private[spark] object YarnCoarseGrainedExecutorBackend extends Logging {
     val createFn: (RpcEnv, CoarseGrainedExecutorBackend.Arguments, SparkEnv) =>
       CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env) =>
       new YarnCoarseGrainedExecutorBackend(rpcEnv, arguments.driverUrl, arguments.executorId,
-        arguments.hostname, arguments.cores, arguments.userClassPath, env)
+        arguments.hostname, arguments.cores, arguments.userClassPath, env, arguments.resourcesFile)
     }
     val backendArgs = CoarseGrainedExecutorBackend.parseArguments(args,
       this.getClass.getCanonicalName.stripSuffix("$"))

From 695dbe27ced4776e9517b87e870a4d69a5501a73 Mon Sep 17 00:00:00 2001
From: pgandhi <pgandhi@verizonmedia.com>
Date: Tue, 14 May 2019 09:05:13 -0500
Subject: [PATCH 1052/1072] [SPARK-25719][UI] : Search functionality in
 datatables in stages page should search over formatted data rather than the
 raw data

The Pull Request to add datatables to stage page SPARK-21809 got merged. The search functionality in those datatables being a great improvement for searching through a large number of tasks, also performs search over the raw data rather than the formatted data displayed in the tables. It would be great if the search can happen for the formatted data as well.

## What changes were proposed in this pull request?

Added code to enable searching over displayed data in tables e.g. searching on "165.7 MiB" or "0.3 ms" will now return the search results. Also, earlier we were missing search for two columns in the task table "Shuffle Read Bytes" as well as "Shuffle Remote Reads", which I have added here.

## How was this patch tested?

Manual Tests

Closes #24419 from pgandhi999/SPARK-25719.

Authored-by: pgandhi <pgandhi@verizonmedia.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../spark/status/api/v1/StagesResource.scala  | 41 ++++++++++++-------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
index 9d1d66a0e15a..db53a400ed62 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
@@ -24,8 +24,9 @@ import org.apache.spark.SparkException
 import org.apache.spark.scheduler.StageInfo
 import org.apache.spark.status.api.v1.StageStatus._
 import org.apache.spark.status.api.v1.TaskSorting._
-import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.{SparkUI, UIUtils}
 import org.apache.spark.ui.jobs.ApiHelper._
+import org.apache.spark.util.Utils
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class StagesResource extends BaseAppResource {
@@ -189,32 +190,42 @@ private[v1] class StagesResource extends BaseAppResource {
     val taskMetricsContainsValue = (task: TaskData) => task.taskMetrics match {
       case None => false
       case Some(metrics) =>
-        (containsValue(task.taskMetrics.get.executorDeserializeTime)
-        || containsValue(task.taskMetrics.get.executorRunTime)
-        || containsValue(task.taskMetrics.get.jvmGcTime)
-        || containsValue(task.taskMetrics.get.resultSerializationTime)
-        || containsValue(task.taskMetrics.get.memoryBytesSpilled)
-        || containsValue(task.taskMetrics.get.diskBytesSpilled)
-        || containsValue(task.taskMetrics.get.peakExecutionMemory)
-        || containsValue(task.taskMetrics.get.inputMetrics.bytesRead)
+        (containsValue(UIUtils.formatDuration(task.taskMetrics.get.executorDeserializeTime))
+        || containsValue(UIUtils.formatDuration(task.taskMetrics.get.executorRunTime))
+        || containsValue(UIUtils.formatDuration(task.taskMetrics.get.jvmGcTime))
+        || containsValue(UIUtils.formatDuration(task.taskMetrics.get.resultSerializationTime))
+        || containsValue(Utils.bytesToString(task.taskMetrics.get.memoryBytesSpilled))
+        || containsValue(Utils.bytesToString(task.taskMetrics.get.diskBytesSpilled))
+        || containsValue(Utils.bytesToString(task.taskMetrics.get.peakExecutionMemory))
+        || containsValue(Utils.bytesToString(task.taskMetrics.get.inputMetrics.bytesRead))
         || containsValue(task.taskMetrics.get.inputMetrics.recordsRead)
-        || containsValue(task.taskMetrics.get.outputMetrics.bytesWritten)
+        || containsValue(Utils.bytesToString(
+          task.taskMetrics.get.outputMetrics.bytesWritten))
         || containsValue(task.taskMetrics.get.outputMetrics.recordsWritten)
-        || containsValue(task.taskMetrics.get.shuffleReadMetrics.fetchWaitTime)
+        || containsValue(UIUtils.formatDuration(
+          task.taskMetrics.get.shuffleReadMetrics.fetchWaitTime))
+        || containsValue(Utils.bytesToString(
+          task.taskMetrics.get.shuffleReadMetrics.remoteBytesRead))
+        || containsValue(Utils.bytesToString(
+          task.taskMetrics.get.shuffleReadMetrics.localBytesRead +
+          task.taskMetrics.get.shuffleReadMetrics.remoteBytesRead))
         || containsValue(task.taskMetrics.get.shuffleReadMetrics.recordsRead)
-        || containsValue(task.taskMetrics.get.shuffleWriteMetrics.bytesWritten)
+        || containsValue(Utils.bytesToString(
+          task.taskMetrics.get.shuffleWriteMetrics.bytesWritten))
         || containsValue(task.taskMetrics.get.shuffleWriteMetrics.recordsWritten)
-        || containsValue(task.taskMetrics.get.shuffleWriteMetrics.writeTime))
+        || containsValue(UIUtils.formatDuration(
+          task.taskMetrics.get.shuffleWriteMetrics.writeTime / 1000000)))
     }
     val filteredTaskDataSequence: Seq[TaskData] = taskDataList.filter(f =>
       (containsValue(f.taskId) || containsValue(f.index) || containsValue(f.attempt)
-        || containsValue(f.launchTime)
+        || containsValue(UIUtils.formatDate(f.launchTime))
         || containsValue(f.resultFetchStart.getOrElse(defaultOptionString))
         || containsValue(f.executorId) || containsValue(f.host) || containsValue(f.status)
         || containsValue(f.taskLocality) || containsValue(f.speculative)
         || containsValue(f.errorMessage.getOrElse(defaultOptionString))
         || taskMetricsContainsValue(f)
-        || containsValue(f.schedulerDelay) || containsValue(f.gettingResultTime)))
+        || containsValue(UIUtils.formatDuration(f.schedulerDelay))
+        || containsValue(UIUtils.formatDuration(f.gettingResultTime))))
     filteredTaskDataSequence
   }
 

From a10608cb82907157b1858aa77eac585b1ed37baf Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Tue, 14 May 2019 09:14:56 -0500
Subject: [PATCH 1053/1072] [SPARK-27680][CORE][SQL][GRAPHX] Remove usage of
 Traversable

## What changes were proposed in this pull request?

This removes usage of `Traversable`, which is removed in Scala 2.13. This is mostly an internal change, except for the change in the `SparkConf.setAll` method. See additional comments below.

## How was this patch tested?

Existing tests.

Closes #24584 from srowen/SPARK-27680.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkConf.scala     | 9 +++++++++
 core/src/main/scala/org/apache/spark/SparkContext.scala  | 2 +-
 .../main/scala/org/apache/spark/util/Distribution.scala  | 8 ++++----
 .../main/scala/org/apache/spark/util/JsonProtocol.scala  | 2 +-
 .../org/apache/spark/scheduler/SparkListenerSuite.scala  | 2 +-
 .../apache/spark/kafka010/KafkaDelegationTokenTest.scala | 2 +-
 .../org/apache/spark/graphx/lib/LabelPropagation.scala   | 2 +-
 .../org/apache/spark/graphx/lib/ShortestPaths.scala      | 2 +-
 .../spark/sql/catalyst/expressions/AttributeSet.scala    | 4 ++--
 .../spark/sql/catalyst/expressions/Expression.scala      | 2 +-
 .../sql/catalyst/expressions/codegen/javaCode.scala      | 2 +-
 .../org/apache/spark/sql/catalyst/plans/QueryPlan.scala  | 8 ++++----
 .../org/apache/spark/sql/catalyst/trees/TreeNode.scala   | 2 +-
 13 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 913a1704ad5c..240707e16e20 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -168,6 +168,15 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   }
 
   /** Set multiple parameters together */
+  def setAll(settings: Iterable[(String, String)]): SparkConf = {
+    settings.foreach { case (k, v) => set(k, v) }
+    this
+  }
+
+  /**
+   * Set multiple parameters together
+   */
+  @deprecated("Use setAll(Iterable) instead", "3.0.0")
   def setAll(settings: Traversable[(String, String)]): SparkConf = {
     settings.foreach { case (k, v) => set(k, v) }
     this
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 8b744356daae..997941080fc6 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2556,7 +2556,7 @@ object SparkContext extends Logging {
   private[spark] val DRIVER_IDENTIFIER = "driver"
 
 
-  private implicit def arrayToArrayWritable[T <: Writable : ClassTag](arr: Traversable[T])
+  private implicit def arrayToArrayWritable[T <: Writable : ClassTag](arr: Iterable[T])
     : ArrayWritable = {
     def anyToWritable[U <: Writable](u: U): Writable = u
 
diff --git a/core/src/main/scala/org/apache/spark/util/Distribution.scala b/core/src/main/scala/org/apache/spark/util/Distribution.scala
index 950b69f7db64..240dcfbab60a 100644
--- a/core/src/main/scala/org/apache/spark/util/Distribution.scala
+++ b/core/src/main/scala/org/apache/spark/util/Distribution.scala
@@ -31,7 +31,7 @@ import scala.collection.immutable.IndexedSeq
  */
 private[spark] class Distribution(val data: Array[Double], val startIdx: Int, val endIdx: Int) {
   require(startIdx < endIdx)
-  def this(data: Traversable[Double]) = this(data.toArray, 0, data.size)
+  def this(data: Iterable[Double]) = this(data.toArray, 0, data.size)
   java.util.Arrays.sort(data, startIdx, endIdx)
   val length = endIdx - startIdx
 
@@ -42,7 +42,7 @@ private[spark] class Distribution(val data: Array[Double], val startIdx: Int, va
    * given from 0 to 1
    * @param probabilities
    */
-  def getQuantiles(probabilities: Traversable[Double] = defaultProbabilities)
+  def getQuantiles(probabilities: Iterable[Double] = defaultProbabilities)
       : IndexedSeq[Double] = {
     probabilities.toIndexedSeq.map { p: Double => data(closestIndex(p)) }
   }
@@ -75,7 +75,7 @@ private[spark] class Distribution(val data: Array[Double], val startIdx: Int, va
 
 private[spark] object Distribution {
 
-  def apply(data: Traversable[Double]): Option[Distribution] = {
+  def apply(data: Iterable[Double]): Option[Distribution] = {
     if (data.size > 0) {
       Some(new Distribution(data))
     } else {
@@ -83,7 +83,7 @@ private[spark] object Distribution {
     }
   }
 
-  def showQuantiles(out: PrintStream = System.out, quantiles: Traversable[Double]) {
+  def showQuantiles(out: PrintStream = System.out, quantiles: Iterable[Double]) {
     // scalastyle:off println
     out.println("min\t25%\t50%\t75%\tmax")
     quantiles.foreach{q => out.print(q + "\t")}
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index d84dd5800ebb..bab1d5f0ee7c 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -309,7 +309,7 @@ private[spark] object JsonProtocol {
 
   private lazy val accumulableBlacklist = Set("internal.metrics.updatedBlockStatuses")
 
-  def accumulablesToJson(accumulables: Traversable[AccumulableInfo]): JArray = {
+  def accumulablesToJson(accumulables: Iterable[AccumulableInfo]): JArray = {
     JArray(accumulables
         .filterNot(_.name.exists(accumulableBlacklist.contains))
         .toList.map(accumulableInfoToJson))
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 2940aef2e21a..a7869d3251eb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -535,7 +535,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
   /**
    * Assert that the given list of numbers has an average that is greater than zero.
    */
-  private def checkNonZeroAvg(m: Traversable[Long], msg: String) {
+  private def checkNonZeroAvg(m: Iterable[Long], msg: String) {
     assert(m.sum / m.size.toDouble > 0.0, msg)
   }
 
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
index ac59c61ab038..2675085bb2cb 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
@@ -83,7 +83,7 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
     UserGroupInformation.getCurrentUser.addCredentials(creds)
   }
 
-  protected def setSparkEnv(settings: Traversable[(String, String)]): Unit = {
+  protected def setSparkEnv(settings: Iterable[(String, String)]): Unit = {
     val conf = new SparkConf().setAll(settings)
     val env = mock(classOf[SparkEnv])
     doReturn(conf).when(env).conf
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala
index cb3025f8bef5..507d21dd1231 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala
@@ -55,7 +55,7 @@ object LabelPropagation {
         val count1Val = count1.getOrElse(i, 0L)
         val count2Val = count2.getOrElse(i, 0L)
         i -> (count1Val + count2Val)
-      }(collection.breakOut) // more efficient alternative to [[collection.Traversable.toMap]]
+      }(collection.breakOut)
     }
     def vertexProgram(vid: VertexId, attr: Long, message: Map[VertexId, Long]): VertexId = {
       if (message.isEmpty) attr else message.maxBy(_._2)._1
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala
index aff0b932e942..9edfe589ce90 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala
@@ -36,7 +36,7 @@ object ShortestPaths extends Serializable {
   private def addMaps(spmap1: SPMap, spmap2: SPMap): SPMap = {
     (spmap1.keySet ++ spmap2.keySet).map {
       k => k -> math.min(spmap1.getOrElse(k, Int.MaxValue), spmap2.getOrElse(k, Int.MaxValue))
-    }(collection.breakOut) // more efficient alternative to [[collection.Traversable.toMap]]
+    }(collection.breakOut)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
index a7e09eee617e..038ebb2037a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
@@ -63,7 +63,7 @@ object AttributeSet {
  * when the transformation was a no-op).
  */
 class AttributeSet private (val baseSet: Set[AttributeEquals])
-  extends Traversable[Attribute] with Serializable {
+  extends Iterable[Attribute] with Serializable {
 
   override def hashCode: Int = baseSet.hashCode()
 
@@ -99,7 +99,7 @@ class AttributeSet private (val baseSet: Set[AttributeEquals])
    * Returns a new [[AttributeSet]] that does not contain any of the [[Attribute Attributes]] found
    * in `other`.
    */
-  def --(other: Traversable[NamedExpression]): AttributeSet = {
+  def --(other: Iterable[NamedExpression]): AttributeSet = {
     other match {
       case otherSet: AttributeSet =>
         new AttributeSet(baseSet -- otherSet.baseSet)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 2cd84b54661c..3529671a4b1d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -253,7 +253,7 @@ abstract class Expression extends TreeNode[Expression] {
   def prettyName: String = nodeName.toLowerCase(Locale.ROOT)
 
   protected def flatArguments: Iterator[Any] = productIterator.flatMap {
-    case t: Traversable[_] => t
+    case t: Iterable[_] => t
     case single => single :: Nil
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
index 80999ef3e876..c3b79900d308 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
@@ -183,7 +183,7 @@ trait Block extends TreeNode[Block] with JavaCode {
     def doTransform(arg: Any): AnyRef = arg match {
       case e: ExprValue => transform(e)
       case Some(value) => Some(doTransform(value))
-      case seq: Traversable[_] => seq.map(doTransform)
+      case seq: Iterable[_] => seq.map(doTransform)
       case other: AnyRef => other
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 3d4de5c7e871..a6968c117782 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -119,7 +119,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
       case m: Map[_, _] => m
       case d: DataType => d // Avoid unpacking Structs
       case stream: Stream[_] => stream.map(recursiveTransform).force
-      case seq: Traversable[_] => seq.map(recursiveTransform)
+      case seq: Iterable[_] => seq.map(recursiveTransform)
       case other: AnyRef => other
       case null => null
     }
@@ -142,16 +142,16 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
   /** Returns all of the expressions present in this query plan operator. */
   final def expressions: Seq[Expression] = {
     // Recursively find all expressions from a traversable.
-    def seqToExpressions(seq: Traversable[Any]): Traversable[Expression] = seq.flatMap {
+    def seqToExpressions(seq: Iterable[Any]): Iterable[Expression] = seq.flatMap {
       case e: Expression => e :: Nil
-      case s: Traversable[_] => seqToExpressions(s)
+      case s: Iterable[_] => seqToExpressions(s)
       case other => Nil
     }
 
     productIterator.flatMap {
       case e: Expression => e :: Nil
       case s: Some[_] => seqToExpressions(s.toSeq)
-      case seq: Traversable[_] => seqToExpressions(seq)
+      case seq: Iterable[_] => seqToExpressions(seq)
       case other => Nil
     }.toSeq
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 66342af171ce..84ca0666e4cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -353,7 +353,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
         }.view.force // `mapValues` is lazy and we need to force it to materialize
         case d: DataType => d // Avoid unpacking Structs
         case args: Stream[_] => args.map(mapChild).force // Force materialization on stream
-        case args: Traversable[_] => args.map(mapChild)
+        case args: Iterable[_] => args.map(mapChild)
         case nonChild: AnyRef => nonChild
         case null => null
       }

From fee695d0cf211e4119c7df7a984708628dc9368a Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 14 May 2019 09:05:22 -0700
Subject: [PATCH 1054/1072] [SPARK-27690][SQL] Remove materialized views first
 in `HiveClientImpl.reset`

## What changes were proposed in this pull request?

We should remove materialized view first otherwise(note that Hive 3.1 could reproduce this issue):
```scala
Cause: org.apache.derby.shared.common.error.DerbySQLIntegrityConstraintViolationException: DELETE on table 'TBLS' caused a violation of foreign key constraint 'MV_TABLES_USED_FK2' for key (4).  The statement has been rolled back.
at org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.Util.generateCsSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.TransactionResourceImpl.wrapInSQLException(Unknown Source)
at org.apache.derby.impl.jdbc.TransactionResourceImpl.handleException(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedConnection.handleException(Unknown Source)
at org.apache.derby.impl.jdbc.ConnectionChild.handleException(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedStatement.executeStatement(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedPreparedStatement.executeBatchElement(Unknown Source)
at org.apache.derby.impl.jdbc.EmbedStatement.executeLargeBatch(Unknown Source)
```

## How was this patch tested?

Existing test

Closes #24592 from wangyum/SPARK-27690.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/hive/client/HiveClientImpl.scala    | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index bad211089beb..eaca03c60836 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -879,9 +879,20 @@ private[hive] class HiveClientImpl(
   }
 
   def reset(): Unit = withHiveState {
-    client.getAllTables("default").asScala.foreach { t =>
+    val allTables = client.getAllTables("default")
+    val (mvs, others) = allTables.asScala.map(t => client.getTable("default", t))
+      .partition(_.getTableType.toString.equals("MATERIALIZED_VIEW"))
+
+    // Remove materialized view first, otherwise caused a violation of foreign key constraint.
+    mvs.foreach { table =>
+      val t = table.getTableName
+      logDebug(s"Deleting materialized view $t")
+      client.dropTable("default", t)
+    }
+
+    others.foreach { table =>
+      val t = table.getTableName
       logDebug(s"Deleting table $t")
-      val table = client.getTable("default", t)
       try {
         client.getIndexes("default", t, 255).asScala.foreach { index =>
           shim.dropIndex(client, "default", t, index.getIndexName)

From 2da5b21834a4633af36c98297a5368c66b8d3191 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 15 May 2019 11:24:03 +0800
Subject: [PATCH 1055/1072] [SPARK-24923][SQL] Implement v2 CreateTableAsSelect

## What changes were proposed in this pull request?

This adds a v2 implementation for CTAS queries

* Update the SQL parser to parse CREATE queries using multi-part identifiers
* Update `CheckAnalysis` to validate partitioning references with the CTAS query schema
* Add `CreateTableAsSelect` v2 logical plan and `CreateTableAsSelectExec` v2 physical plan
* Update create conversion from `CreateTableAsSelectStatement` to support the new v2 logical plan
* Update `DataSourceV2Strategy` to convert v2 CTAS logical plan to the new physical plan
* Add `findNestedField` to `StructType` to support reference validation

## How was this patch tested?

We have been running these changes in production for several months. Also:

* Add a test suite `CreateTablePartitioningValidationSuite` for new analysis checks
* Add a test suite for v2 SQL, `DataSourceV2SQLSuite`
* Update catalyst `DDLParserSuite` to use multi-part identifiers (`Seq[String]`)
* Add test cases to `PlanResolutionSuite` for v2 CTAS: known catalog and v2 source implementation

Closes #24570 from rdblue/SPARK-24923-add-v2-ctas.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |   2 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  17 ++
 .../sql/catalyst/parser/AstBuilder.scala      |   5 +-
 .../plans/logical/basicLogicalOperators.scala |  24 ++
 .../logical/sql/CreateTableStatement.scala    |   5 +-
 .../apache/spark/sql/types/StructType.scala   |  23 ++
 .../sql/catalog/v2/TestTableCatalog.scala     |   2 +-
 ...eateTablePartitioningValidationSuite.scala | 153 ++++++++++++
 .../sql/catalyst/parser/DDLParserSuite.scala  |  23 +-
 .../spark/sql/execution/SparkSqlParser.scala  |  28 ++-
 .../datasources/DataSourceResolution.scala    |  82 ++++++-
 .../datasources/v2/DataSourceV2Strategy.scala |   9 +-
 .../v2/WriteToDataSourceV2Exec.scala          |  58 +++++
 .../internal/BaseSessionStateBuilder.scala    |   2 +-
 .../command/PlanResolutionSuite.scala         |  92 ++++++-
 .../sql/sources/v2/DataSourceV2SQLSuite.scala | 166 +++++++++++++
 .../sources/v2/TestInMemoryTableCatalog.scala | 229 ++++++++++++++++++
 .../sql/hive/HiveSessionStateBuilder.scala    |   2 +-
 18 files changed, 888 insertions(+), 34 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TestInMemoryTableCatalog.scala

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 7ed7152692cf..a9e3099b9081 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -238,7 +238,7 @@ unsupportedHiveNativeCommands
     ;
 
 createTableHeader
-    : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? tableIdentifier
+    : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? multipartIdentifier
     ;
 
 bucketSpec
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 427f196344df..ae5198c200b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -33,6 +33,8 @@ import org.apache.spark.sql.types._
  */
 trait CheckAnalysis extends PredicateHelper {
 
+  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+
   /**
    * Override to provide additional checks for correct analysis.
    * These rules will be evaluated after our built-in check rules.
@@ -296,6 +298,21 @@ trait CheckAnalysis extends PredicateHelper {
               }
             }
 
+          case CreateTableAsSelect(_, _, partitioning, query, _, _, _) =>
+            val references = partitioning.flatMap(_.references).toSet
+            val badReferences = references.map(_.fieldNames).flatMap { column =>
+              query.schema.findNestedField(column) match {
+                case Some(_) =>
+                  None
+                case _ =>
+                  Some(s"${column.quoted} is missing or is in a map or array")
+              }
+            }
+
+            if (badReferences.nonEmpty) {
+              failAnalysis(s"Invalid partitioning: ${badReferences.mkString(", ")}")
+            }
+
           case _ => // Fallbacks to the following checks
         }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 68cd1bd2f0a7..8137db854cbc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2019,7 +2019,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   /**
    * Type to keep track of a table header: (identifier, isTemporary, ifNotExists, isExternal).
    */
-  type TableHeader = (TableIdentifier, Boolean, Boolean, Boolean)
+  type TableHeader = (Seq[String], Boolean, Boolean, Boolean)
 
   /**
    * Validate a create table statement and return the [[TableIdentifier]].
@@ -2031,7 +2031,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     if (temporary && ifNotExists) {
       operationNotAllowed("CREATE TEMPORARY TABLE ... IF NOT EXISTS", ctx)
     }
-    (visitTableIdentifier(ctx.tableIdentifier), temporary, ifNotExists, ctx.EXTERNAL != null)
+    val multipartIdentifier = ctx.multipartIdentifier.parts.asScala.map(_.getText)
+    (multipartIdentifier, temporary, ifNotExists, ctx.EXTERNAL != null)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 60a254d5271d..2b98132f188f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
+import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
+import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.AliasIdentifier
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
@@ -402,6 +404,28 @@ trait V2WriteCommand extends Command {
   }
 }
 
+/**
+ * Create a new table from a select query with a v2 catalog.
+ */
+case class CreateTableAsSelect(
+    catalog: TableCatalog,
+    tableName: Identifier,
+    partitioning: Seq[Transform],
+    query: LogicalPlan,
+    properties: Map[String, String],
+    writeOptions: Map[String, String],
+    ignoreIfExists: Boolean) extends Command {
+
+  override def children: Seq[LogicalPlan] = Seq(query)
+
+  override lazy val resolved: Boolean = {
+    // the table schema is created from the query schema, so the only resolution needed is to check
+    // that the columns referenced by the table's partitioning exist in the query schema
+    val references = partitioning.flatMap(_.references).toSet
+    references.map(_.fieldNames).forall(query.schema.findNestedField(_).isDefined)
+  }
+}
+
 /**
  * Append data to an existing table.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
index ed1b3e3778c7..7a26e01cde83 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.plans.logical.sql
 
 import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -30,7 +29,7 @@ import org.apache.spark.sql.types.StructType
  * This is a metadata-only command and is not used to write data to the created table.
  */
 case class CreateTableStatement(
-    table: TableIdentifier,
+    tableName: Seq[String],
     tableSchema: StructType,
     partitioning: Seq[Transform],
     bucketSpec: Option[BucketSpec],
@@ -50,7 +49,7 @@ case class CreateTableStatement(
  * A CREATE TABLE AS SELECT command, as parsed from SQL.
  */
 case class CreateTableAsSelectStatement(
-    table: TableIdentifier,
+    tableName: Seq[String],
     asSelect: LogicalPlan,
     partitioning: Seq[Transform],
     bucketSpec: Option[BucketSpec],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 73a86acc808d..fe324a489432 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -307,6 +307,29 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
     nameToIndex.get(name)
   }
 
+  /**
+   * Returns a field in this struct and its child structs.
+   *
+   * This does not support finding fields nested in maps or arrays.
+   */
+  private[sql] def findNestedField(fieldNames: Seq[String]): Option[StructField] = {
+    fieldNames.headOption.flatMap(nameToField.get) match {
+      case Some(field) =>
+        if (fieldNames.tail.isEmpty) {
+          Some(field)
+        } else {
+          field.dataType match {
+            case struct: StructType =>
+              struct.findNestedField(fieldNames.tail)
+            case _ =>
+              None
+          }
+        }
+      case _ =>
+        None
+    }
+  }
+
   protected[sql] def toAttributes: Seq[AttributeReference] =
     map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala
index 7a0b014a8546..78b4763484cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala
@@ -91,7 +91,7 @@ class TestTableCatalog extends TableCatalog {
   override def dropTable(ident: Identifier): Boolean = Option(tables.remove(ident)).isDefined
 }
 
-private object TestTableCatalog {
+object TestTableCatalog {
   /**
    * Apply properties changes to a map and return the result.
    */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
new file mode 100644
index 000000000000..1ce8852f71bc
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog, TestTableCatalog}
+import org.apache.spark.sql.catalog.v2.expressions.LogicalExpressions
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LeafNode}
+import org.apache.spark.sql.types.{DoubleType, LongType, StringType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class CreateTablePartitioningValidationSuite extends AnalysisTest {
+  import CreateTablePartitioningValidationSuite._
+
+  test("CreateTableAsSelect: fail missing top-level column") {
+    val plan = CreateTableAsSelect(
+      catalog,
+      Identifier.of(Array(), "table_name"),
+      LogicalExpressions.bucket(4, "does_not_exist") :: Nil,
+      TestRelation2,
+      Map.empty,
+      Map.empty,
+      ignoreIfExists = false)
+
+    assert(!plan.resolved)
+    assertAnalysisError(plan, Seq(
+      "Invalid partitioning",
+      "does_not_exist is missing or is in a map or array"))
+  }
+
+  test("CreateTableAsSelect: fail missing top-level column nested reference") {
+    val plan = CreateTableAsSelect(
+      catalog,
+      Identifier.of(Array(), "table_name"),
+      LogicalExpressions.bucket(4, "does_not_exist.z") :: Nil,
+      TestRelation2,
+      Map.empty,
+      Map.empty,
+      ignoreIfExists = false)
+
+    assert(!plan.resolved)
+    assertAnalysisError(plan, Seq(
+      "Invalid partitioning",
+      "does_not_exist.z is missing or is in a map or array"))
+  }
+
+  test("CreateTableAsSelect: fail missing nested column") {
+    val plan = CreateTableAsSelect(
+      catalog,
+      Identifier.of(Array(), "table_name"),
+      LogicalExpressions.bucket(4, "point.z") :: Nil,
+      TestRelation2,
+      Map.empty,
+      Map.empty,
+      ignoreIfExists = false)
+
+    assert(!plan.resolved)
+    assertAnalysisError(plan, Seq(
+      "Invalid partitioning",
+      "point.z is missing or is in a map or array"))
+  }
+
+  test("CreateTableAsSelect: fail with multiple errors") {
+    val plan = CreateTableAsSelect(
+      catalog,
+      Identifier.of(Array(), "table_name"),
+      LogicalExpressions.bucket(4, "does_not_exist", "point.z") :: Nil,
+      TestRelation2,
+      Map.empty,
+      Map.empty,
+      ignoreIfExists = false)
+
+    assert(!plan.resolved)
+    assertAnalysisError(plan, Seq(
+      "Invalid partitioning",
+      "point.z is missing or is in a map or array",
+      "does_not_exist is missing or is in a map or array"))
+  }
+
+  test("CreateTableAsSelect: success with top-level column") {
+    val plan = CreateTableAsSelect(
+      catalog,
+      Identifier.of(Array(), "table_name"),
+      LogicalExpressions.bucket(4, "id") :: Nil,
+      TestRelation2,
+      Map.empty,
+      Map.empty,
+      ignoreIfExists = false)
+
+    assertAnalysisSuccess(plan)
+  }
+
+  test("CreateTableAsSelect: success using nested column") {
+    val plan = CreateTableAsSelect(
+      catalog,
+      Identifier.of(Array(), "table_name"),
+      LogicalExpressions.bucket(4, "point.x") :: Nil,
+      TestRelation2,
+      Map.empty,
+      Map.empty,
+      ignoreIfExists = false)
+
+    assertAnalysisSuccess(plan)
+  }
+
+  test("CreateTableAsSelect: success using complex column") {
+    val plan = CreateTableAsSelect(
+      catalog,
+      Identifier.of(Array(), "table_name"),
+      LogicalExpressions.bucket(4, "point") :: Nil,
+      TestRelation2,
+      Map.empty,
+      Map.empty,
+      ignoreIfExists = false)
+
+    assertAnalysisSuccess(plan)
+  }
+}
+
+private object CreateTablePartitioningValidationSuite {
+  val catalog: TableCatalog = {
+    val cat = new TestTableCatalog()
+    cat.initialize("test", CaseInsensitiveStringMap.empty())
+    cat
+  }
+
+  val schema: StructType = new StructType()
+      .add("id", LongType)
+      .add("data", StringType)
+      .add("point", new StructType().add("x", DoubleType).add("y", DoubleType))
+}
+
+private case object TestRelation2 extends LeafNode with NamedRelation {
+  override def name: String = "source_relation"
+  override def output: Seq[AttributeReference] =
+    CreateTablePartitioningValidationSuite.schema.toAttributes
+}
+
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 98388a74cd29..08baebbf140e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.sql.catalog.v2.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.plans.logical.sql.{CreateTableAsSelectStatement, CreateTableStatement}
@@ -40,7 +39,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(sql) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableName == Seq("my_tab"))
         assert(create.tableSchema == new StructType()
             .add("a", IntegerType, nullable = true, "test")
             .add("b", StringType))
@@ -67,7 +66,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(sql) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableName == Seq("my_tab"))
         assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
         assert(create.partitioning.isEmpty)
         assert(create.bucketSpec.isEmpty)
@@ -90,7 +89,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(query) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableName == Seq("my_tab"))
         assert(create.tableSchema == new StructType()
             .add("a", IntegerType, nullable = true, "test")
             .add("b", StringType))
@@ -125,7 +124,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(sql) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableName == Seq("my_tab"))
         assert(create.tableSchema == new StructType()
             .add("a", IntegerType)
             .add("b", StringType)
@@ -161,7 +160,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(query) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableName == Seq("my_tab"))
         assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
         assert(create.partitioning.isEmpty)
         assert(create.bucketSpec.contains(BucketSpec(5, Seq("a"), Seq("b"))))
@@ -183,7 +182,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(sql) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableName == Seq("my_tab"))
         assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
         assert(create.partitioning.isEmpty)
         assert(create.bucketSpec.isEmpty)
@@ -205,7 +204,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(sql) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableName == Seq("my_tab"))
         assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
         assert(create.partitioning.isEmpty)
         assert(create.bucketSpec.isEmpty)
@@ -227,7 +226,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(sql) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("my_tab"))
+        assert(create.tableName == Seq("my_tab"))
         assert(create.tableSchema == new StructType().add("a", IntegerType).add("b", StringType))
         assert(create.partitioning.isEmpty)
         assert(create.bucketSpec.isEmpty)
@@ -249,7 +248,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(sql) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("2g", Some("1m")))
+        assert(create.tableName == Seq("1m", "2g"))
         assert(create.tableSchema == new StructType().add("a", IntegerType))
         assert(create.partitioning.isEmpty)
         assert(create.bucketSpec.isEmpty)
@@ -292,7 +291,7 @@ class DDLParserSuite extends AnalysisTest {
 
     parsePlan(sql) match {
       case create: CreateTableStatement =>
-        assert(create.table == TableIdentifier("table_name"))
+        assert(create.tableName == Seq("table_name"))
         assert(create.tableSchema == new StructType)
         assert(create.partitioning.isEmpty)
         assert(create.bucketSpec.isEmpty)
@@ -347,7 +346,7 @@ class DDLParserSuite extends AnalysisTest {
     def checkParsing(sql: String): Unit = {
       parsePlan(sql) match {
         case create: CreateTableAsSelectStatement =>
-          assert(create.table == TableIdentifier("page_view", Some("mydb")))
+          assert(create.tableName == Seq("mydb", "page_view"))
           assert(create.partitioning.isEmpty)
           assert(create.bucketSpec.isEmpty)
           assert(create.properties == Map("p1" -> "v1", "p2" -> "v2"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 96af8b34b8bc..d9e351523c3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -25,7 +25,7 @@ import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.tree.TerminalNode
 
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser._
@@ -366,6 +366,25 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     DescribeQueryCommand(source(ctx.query), visitQuery(ctx.query))
   }
 
+  /**
+   * Converts a multi-part identifier to a TableIdentifier.
+   *
+   * If the multi-part identifier has too many parts, this will throw a ParseException.
+   */
+  def tableIdentifier(
+      multipart: Seq[String],
+      command: String,
+      ctx: ParserRuleContext): TableIdentifier = {
+    multipart match {
+      case Seq(tableName) =>
+        TableIdentifier(tableName)
+      case Seq(database, tableName) =>
+        TableIdentifier(tableName, Some(database))
+      case _ =>
+        operationNotAllowed(s"$command does not support multi-part identifiers", ctx)
+    }
+  }
+
   /**
    * Create a table, returning a [[CreateTable]] logical plan.
    *
@@ -376,7 +395,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * it is deprecated.
    */
   override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
-    val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
+    val (ident, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
 
     if (!temp || ctx.query != null) {
       super.visitCreateTable(ctx)
@@ -405,6 +424,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       logWarning(s"CREATE TEMPORARY TABLE ... USING ... is deprecated, please use " +
           "CREATE TEMPORARY VIEW ... USING ... instead")
 
+      val table = tableIdentifier(ident, "CREATE TEMPORARY VIEW", ctx)
       CreateTempViewUsing(table, schema, replace = false, global = false, provider, options)
     }
   }
@@ -938,7 +958,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * }}}
    */
   override def visitCreateHiveTable(ctx: CreateHiveTableContext): LogicalPlan = withOrigin(ctx) {
-    val (name, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
+    val (ident, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
     // TODO: implement temporary tables
     if (temp) {
       throw new ParseException(
@@ -996,6 +1016,8 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       CatalogTableType.MANAGED
     }
 
+    val name = tableIdentifier(ident, "CREATE TABLE ... STORED AS ...", ctx)
+
     // TODO support the sql text - have a proper location for this!
     val tableDesc = CatalogTable(
       identifier = name,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
index 03d0c303dcc3..09506f05ccfa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
@@ -19,25 +19,34 @@ package org.apache.spark.sql.execution.datasources
 
 import java.util.Locale
 
+import scala.collection.mutable
+
 import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, LookupCatalog, TableCatalog}
 import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.CastSupport
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType, CatalogUtils}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.logical.sql.{CreateTableAsSelectStatement, CreateTableStatement}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.v2.TableProvider
 import org.apache.spark.sql.types.StructType
 
-case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport  {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.TransformHelper
+case class DataSourceResolution(
+    conf: SQLConf,
+    findCatalog: String => CatalogPlugin)
+  extends Rule[LogicalPlan] with CastSupport with LookupCatalog {
+
+  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+
+  override def lookupCatalog: Option[String => CatalogPlugin] = Some(findCatalog)
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case CreateTableStatement(
-        table, schema, partitionCols, bucketSpec, properties, V1WriteProvider(provider), options,
-        location, comment, ifNotExists) =>
+        AsTableIdentifier(table), schema, partitionCols, bucketSpec, properties,
+        V1WriteProvider(provider), options, location, comment, ifNotExists) =>
 
       val tableDesc = buildCatalogTable(table, schema, partitionCols, bucketSpec, properties,
         provider, options, location, comment, ifNotExists)
@@ -46,14 +55,23 @@ case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with Ca
       CreateTable(tableDesc, mode, None)
 
     case CreateTableAsSelectStatement(
-        table, query, partitionCols, bucketSpec, properties, V1WriteProvider(provider), options,
-        location, comment, ifNotExists) =>
+        AsTableIdentifier(table), query, partitionCols, bucketSpec, properties,
+        V1WriteProvider(provider), options, location, comment, ifNotExists) =>
 
       val tableDesc = buildCatalogTable(table, new StructType, partitionCols, bucketSpec,
         properties, provider, options, location, comment, ifNotExists)
       val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
 
       CreateTable(tableDesc, mode, Some(query))
+
+    case create: CreateTableAsSelectStatement =>
+      // the provider was not a v1 source, convert to a v2 plan
+      val CatalogObjectIdentifier(maybeCatalog, identifier) = create.tableName
+      val catalog = maybeCatalog
+          .getOrElse(throw new AnalysisException(
+            s"No catalog specified for table ${identifier.quoted} and no default catalog is set"))
+          .asTableCatalog
+      convertCTAS(catalog, identifier, create)
   }
 
   object V1WriteProvider {
@@ -112,4 +130,54 @@ case class DataSourceResolution(conf: SQLConf) extends Rule[LogicalPlan] with Ca
       properties = properties,
       comment = comment)
   }
+
+  private def convertCTAS(
+      catalog: TableCatalog,
+      identifier: Identifier,
+      ctas: CreateTableAsSelectStatement): CreateTableAsSelect = {
+    if (ctas.options.contains("path") && ctas.location.isDefined) {
+      throw new AnalysisException(
+        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
+            "you can only specify one of them.")
+    }
+
+    if ((ctas.options.contains("provider") || ctas.properties.contains("provider"))
+        && ctas.comment.isDefined) {
+      throw new AnalysisException(
+        "COMMENT and option/property 'comment' are both used to set the table comment, you can " +
+            "only specify one of them.")
+    }
+
+    if (ctas.options.contains("provider") || ctas.properties.contains("provider")) {
+      throw new AnalysisException(
+        "USING and option/property 'provider' are both used to set the provider implementation, " +
+            "you can only specify one of them.")
+    }
+
+    val options = ctas.options.filterKeys(_ != "path")
+
+    // convert the bucket spec and add it as a transform
+    val partitioning = ctas.partitioning ++ ctas.bucketSpec.map(_.asTransform)
+
+    // create table properties from TBLPROPERTIES and OPTIONS clauses
+    val properties = new mutable.HashMap[String, String]()
+    properties ++= ctas.properties
+    properties ++= options
+
+    // convert USING, LOCATION, and COMMENT clauses to table properties
+    properties += ("provider" -> ctas.provider)
+    ctas.comment.map(text => properties += ("comment" -> text))
+    ctas.location
+        .orElse(ctas.options.get("path"))
+        .map(loc => properties += ("location" -> loc))
+
+    CreateTableAsSelect(
+      catalog,
+      identifier,
+      partitioning,
+      ctas.asSelect,
+      properties.toMap,
+      writeOptions = options,
+      ignoreIfExists = ctas.ifNotExists)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 7681dc8dfb37..4325aaa4e993 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -17,18 +17,20 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.sql.{AnalysisException, Strategy}
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Repartition}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Repartition}
 import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources
 import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object DataSourceV2Strategy extends Strategy with PredicateHelper {
 
@@ -150,6 +152,11 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
     case WriteToDataSourceV2(writer, query) =>
       WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil
 
+    case CreateTableAsSelect(catalog, ident, parts, query, props, options, ifNotExists) =>
+      val writeOptions = new CaseInsensitiveStringMap(options.asJava)
+      CreateTableAsSelectExec(
+        catalog, ident, parts, planLater(query), props, writeOptions, ifNotExists) :: Nil
+
     case AppendData(r: DataSourceV2Relation, query, _) =>
       AppendDataExec(r.table.asWritable, r.options, planLater(query)) :: Nil
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 607f2fa0f82c..1797166bbe0b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.util.UUID
 
+import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkEnv, SparkException, TaskContext}
@@ -26,7 +27,10 @@ import org.apache.spark.executor.CommitDeniedException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
+import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
@@ -47,6 +51,60 @@ case class WriteToDataSourceV2(batchWrite: BatchWrite, query: LogicalPlan)
   override def output: Seq[Attribute] = Nil
 }
 
+/**
+ * Physical plan node for v2 create table as select.
+ *
+ * A new table will be created using the schema of the query, and rows from the query are appended.
+ * If either table creation or the append fails, the table will be deleted. This implementation does
+ * not provide an atomic CTAS.
+ */
+case class CreateTableAsSelectExec(
+    catalog: TableCatalog,
+    ident: Identifier,
+    partitioning: Seq[Transform],
+    query: SparkPlan,
+    properties: Map[String, String],
+    writeOptions: CaseInsensitiveStringMap,
+    ifNotExists: Boolean) extends V2TableWriteExec {
+
+  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.IdentifierHelper
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    if (catalog.tableExists(ident)) {
+      if (ifNotExists) {
+        return sparkContext.parallelize(Seq.empty, 1)
+      }
+
+      throw new TableAlreadyExistsException(ident)
+    }
+
+    Utils.tryWithSafeFinallyAndFailureCallbacks({
+      catalog.createTable(ident, query.schema, partitioning.toArray, properties.asJava) match {
+        case table: SupportsWrite =>
+          val builder = table.newWriteBuilder(writeOptions)
+              .withInputDataSchema(query.schema)
+              .withQueryId(UUID.randomUUID().toString)
+          val batchWrite = builder match {
+            case supportsSaveMode: SupportsSaveMode =>
+              supportsSaveMode.mode(SaveMode.Append).buildForBatch()
+
+            case _ =>
+              builder.buildForBatch()
+          }
+
+          doWrite(batchWrite)
+
+        case _ =>
+          // table does not support writes
+          throw new SparkException(s"Table implementation does not support writes: ${ident.quoted}")
+      }
+
+    })(catchBlock = {
+      catalog.dropTable(ident)
+    })
+  }
+}
+
 /**
  * Physical plan node for append into a v2 table.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index b0127535559c..b2d065274b15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -169,7 +169,7 @@ abstract class BaseSessionStateBuilder(
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
         new FallBackFileSourceV2(session) +:
-        DataSourceResolution(conf) +:
+        DataSourceResolution(conf, session.catalog(_)) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 7fae54bb95ed..c525b4cbcba5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -20,19 +20,39 @@ package org.apache.spark.sql.execution.command
 import java.net.URI
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.catalog.v2.{CatalogNotFoundException, CatalogPlugin, Identifier, TableCatalog, TestTableCatalog}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LogicalPlan}
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceResolution}
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class PlanResolutionSuite extends AnalysisTest {
   import CatalystSqlParser._
 
+  private val orc2 = classOf[OrcDataSourceV2].getName
+
+  private val testCat: TableCatalog = {
+    val newCatalog = new TestTableCatalog
+    newCatalog.initialize("testcat", CaseInsensitiveStringMap.empty())
+    newCatalog
+  }
+
+  private val lookupCatalog: String => CatalogPlugin = {
+    case "testcat" =>
+      testCat
+    case name =>
+      throw new CatalogNotFoundException(s"No such catalog: $name")
+  }
+
   def parseAndResolve(query: String): LogicalPlan = {
-    DataSourceResolution(conf).apply(parsePlan(query))
+    val newConf = conf.copy()
+    newConf.setConfString("spark.sql.default.catalog", "testcat")
+    DataSourceResolution(newConf, lookupCatalog).apply(parsePlan(query))
   }
 
   private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
@@ -274,4 +294,72 @@ class PlanResolutionSuite extends AnalysisTest {
       assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
     }
   }
+
+  test("Test v2 CTAS with known catalog in identifier") {
+    val sql =
+      s"""
+         |CREATE TABLE IF NOT EXISTS testcat.mydb.table_name
+         |USING parquet
+         |COMMENT 'table comment'
+         |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+         |OPTIONS (path 's3://bucket/path/to/data', other 20)
+         |AS SELECT * FROM src
+      """.stripMargin
+
+    val expectedProperties = Map(
+      "p1" -> "v1",
+      "p2" -> "v2",
+      "other" -> "20",
+      "provider" -> "parquet",
+      "location" -> "s3://bucket/path/to/data",
+      "comment" -> "table comment")
+
+    parseAndResolve(sql) match {
+      case ctas: CreateTableAsSelect =>
+        assert(ctas.catalog.name == "testcat")
+        assert(ctas.tableName == Identifier.of(Array("mydb"), "table_name"))
+        assert(ctas.properties == expectedProperties)
+        assert(ctas.writeOptions == Map("other" -> "20"))
+        assert(ctas.partitioning.isEmpty)
+        assert(ctas.ignoreIfExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableAsSelect].getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
+
+  // TODO(rblue): enable this test after the default catalog is available
+  ignore("Test v2 CTAS with data source v2 provider") {
+    val sql =
+      s"""
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING $orc2
+        |COMMENT 'This is the staging page view table'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val expectedProperties = Map(
+      "p1" -> "v1",
+      "p2" -> "v2",
+      "provider" -> orc2,
+      "location" -> "/user/external/page_view",
+      "comment" -> "This is the staging page view table")
+
+    parseAndResolve(sql) match {
+      case ctas: CreateTableAsSelect =>
+        assert(ctas.catalog.name == "testcat")
+        assert(ctas.tableName == Identifier.of(Array("mydb"), "page_view"))
+        assert(ctas.properties == expectedProperties)
+        assert(ctas.writeOptions.isEmpty)
+        assert(ctas.partitioning.isEmpty)
+        assert(ctas.ignoreIfExists)
+
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTableAsSelect].getName} from query," +
+            s"got ${other.getClass.getName}: $sql")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala
new file mode 100644
index 000000000000..a9bc0369ad20
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2
+
+import scala.collection.JavaConverters._
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.catalog.v2.Identifier
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{LongType, StringType, StructType}
+
+class DataSourceV2SQLSuite extends QueryTest with SharedSQLContext with BeforeAndAfter {
+
+  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+
+  private val orc2 = classOf[OrcDataSourceV2].getName
+
+  before {
+    spark.conf.set("spark.sql.catalog.testcat", classOf[TestInMemoryTableCatalog].getName)
+    spark.conf.set("spark.sql.default.catalog", "testcat")
+
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
+    df2.createOrReplaceTempView("source2")
+  }
+
+  after {
+    spark.catalog("testcat").asInstanceOf[TestInMemoryTableCatalog].clearTables()
+    spark.sql("DROP TABLE source")
+  }
+
+  test("CreateTableAsSelect: use v2 plan because catalog is set") {
+    spark.sql("CREATE TABLE testcat.table_name USING foo AS SELECT id, data FROM source")
+
+    val testCatalog = spark.catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == Map("provider" -> "foo").asJava)
+    assert(table.schema == new StructType()
+        .add("id", LongType, nullable = false)
+        .add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+  }
+
+  // TODO(rblue): enable this test after the default catalog is available
+  ignore("CreateTableAsSelect: use v2 plan because provider is v2") {
+    spark.sql(s"CREATE TABLE table_name USING $orc2 AS SELECT id, data FROM source")
+
+    val testCatalog = spark.catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == Map("provider" -> orc2).asJava)
+    assert(table.schema == new StructType()
+        .add("id", LongType, nullable = false)
+        .add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+  }
+
+  test("CreateTableAsSelect: fail if table exists") {
+    spark.sql("CREATE TABLE testcat.table_name USING foo AS SELECT id, data FROM source")
+
+    val testCatalog = spark.catalog("testcat").asTableCatalog
+
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == Map("provider" -> "foo").asJava)
+    assert(table.schema == new StructType()
+        .add("id", LongType, nullable = false)
+        .add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+
+    // run a second CTAS query that should fail
+    val exc = intercept[TableAlreadyExistsException] {
+      spark.sql(
+        "CREATE TABLE testcat.table_name USING bar AS SELECT id, data, id as id2 FROM source2")
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+
+    // table should not have changed
+    val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table2.name == "testcat.table_name")
+    assert(table2.partitioning.isEmpty)
+    assert(table2.properties == Map("provider" -> "foo").asJava)
+    assert(table2.schema == new StructType()
+        .add("id", LongType, nullable = false)
+        .add("data", StringType))
+
+    val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), spark.table("source"))
+  }
+
+  test("CreateTableAsSelect: if not exists") {
+    spark.sql(
+      "CREATE TABLE IF NOT EXISTS testcat.table_name USING foo AS SELECT id, data FROM source")
+
+    val testCatalog = spark.catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == Map("provider" -> "foo").asJava)
+    assert(table.schema == new StructType()
+        .add("id", LongType, nullable = false)
+        .add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+
+    spark.sql(
+      "CREATE TABLE IF NOT EXISTS testcat.table_name USING foo AS SELECT id, data FROM source2")
+
+    // check that the table contains data from just the first CTAS
+    val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), spark.table("source"))
+  }
+
+  test("CreateTableAsSelect: fail analysis when default catalog is needed but missing") {
+    val originalDefaultCatalog = conf.getConfString("spark.sql.default.catalog")
+    try {
+      conf.unsetConf("spark.sql.default.catalog")
+
+      val exc = intercept[AnalysisException] {
+        spark.sql(s"CREATE TABLE table_name USING $orc2 AS SELECT id, data FROM source")
+      }
+
+      assert(exc.getMessage.contains("No catalog specified for table"))
+      assert(exc.getMessage.contains("table_name"))
+      assert(exc.getMessage.contains("no default catalog is set"))
+
+    } finally {
+      conf.setConfString("spark.sql.default.catalog", originalDefaultCatalog)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TestInMemoryTableCatalog.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TestInMemoryTableCatalog.scala
new file mode 100644
index 000000000000..2ecf1c2f184f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TestInMemoryTableCatalog.scala
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2
+
+import java.util
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalog.v2.{CatalogV2Implicits, Identifier, TableCatalog, TableChange, TestTableCatalog}
+import org.apache.spark.sql.catalog.v2.expressions.Transform
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, PartitionReader, PartitionReaderFactory, Scan, ScanBuilder}
+import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+// this is currently in the spark-sql module because the read and write API is not in catalyst
+// TODO(rdblue): when the v2 source API is in catalyst, merge with TestTableCatalog/InMemoryTable
+class TestInMemoryTableCatalog extends TableCatalog {
+  import CatalogV2Implicits._
+
+  private val tables: util.Map[Identifier, InMemoryTable] =
+    new ConcurrentHashMap[Identifier, InMemoryTable]()
+  private var _name: Option[String] = None
+
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    _name = Some(name)
+  }
+
+  override def name: String = _name.get
+
+  override def listTables(namespace: Array[String]): Array[Identifier] = {
+    tables.keySet.asScala.filter(_.namespace.sameElements(namespace)).toArray
+  }
+
+  override def loadTable(ident: Identifier): Table = {
+    Option(tables.get(ident)) match {
+      case Some(table) =>
+        table
+      case _ =>
+        throw new NoSuchTableException(ident)
+    }
+  }
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident)
+    }
+
+    if (partitions.nonEmpty) {
+      throw new UnsupportedOperationException(
+        s"Catalog $name: Partitioned tables are not supported")
+    }
+
+    val table = new InMemoryTable(s"$name.${ident.quoted}", schema, properties)
+
+    tables.put(ident, table)
+
+    table
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    Option(tables.get(ident)) match {
+      case Some(table) =>
+        val properties = TestTableCatalog.applyPropertiesChanges(table.properties, changes)
+        val schema = TestTableCatalog.applySchemaChanges(table.schema, changes)
+        val newTable = new InMemoryTable(table.name, schema, properties, table.data)
+
+        tables.put(ident, newTable)
+
+        newTable
+      case _ =>
+        throw new NoSuchTableException(ident)
+    }
+  }
+
+  override def dropTable(ident: Identifier): Boolean = Option(tables.remove(ident)).isDefined
+
+  def clearTables(): Unit = {
+    tables.clear()
+  }
+}
+
+/**
+ * A simple in-memory table. Rows are stored as a buffered group produced by each output task.
+ */
+private class InMemoryTable(
+    val name: String,
+    val schema: StructType,
+    override val properties: util.Map[String, String])
+  extends Table with SupportsRead with SupportsWrite {
+
+  def this(
+      name: String,
+      schema: StructType,
+      properties: util.Map[String, String],
+      data: Array[BufferedRows]) = {
+    this(name, schema, properties)
+    replaceData(data)
+  }
+
+  def rows: Seq[InternalRow] = data.flatMap(_.rows)
+
+  @volatile var data: Array[BufferedRows] = Array.empty
+
+  def replaceData(buffers: Array[BufferedRows]): Unit = synchronized {
+    data = buffers
+  }
+
+  override def capabilities: util.Set[TableCapability] = Set(
+    TableCapability.BATCH_READ, TableCapability.BATCH_WRITE, TableCapability.TRUNCATE).asJava
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    () => new InMemoryBatchScan(data.map(_.asInstanceOf[InputPartition]))
+  }
+
+  class InMemoryBatchScan(data: Array[InputPartition]) extends Scan with Batch {
+    override def readSchema(): StructType = schema
+
+    override def toBatch: Batch = this
+
+    override def planInputPartitions(): Array[InputPartition] = data
+
+    override def createReaderFactory(): PartitionReaderFactory = BufferedRowsReaderFactory
+  }
+
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
+    new WriteBuilder with SupportsTruncate {
+      private var shouldTruncate: Boolean = false
+
+      override def truncate(): WriteBuilder = {
+        shouldTruncate = true
+        this
+      }
+
+      override def buildForBatch(): BatchWrite = {
+        if (shouldTruncate) TruncateAndAppend else Append
+      }
+    }
+  }
+
+  private object TruncateAndAppend extends BatchWrite {
+    override def createBatchWriterFactory(): DataWriterFactory = {
+      BufferedRowsWriterFactory
+    }
+
+    override def commit(messages: Array[WriterCommitMessage]): Unit = {
+      replaceData(messages.map(_.asInstanceOf[BufferedRows]))
+    }
+
+    override def abort(messages: Array[WriterCommitMessage]): Unit = {
+    }
+  }
+
+  private object Append extends BatchWrite {
+    override def createBatchWriterFactory(): DataWriterFactory = {
+      BufferedRowsWriterFactory
+    }
+
+    override def commit(messages: Array[WriterCommitMessage]): Unit = {
+      replaceData(data ++ messages.map(_.asInstanceOf[BufferedRows]))
+    }
+
+    override def abort(messages: Array[WriterCommitMessage]): Unit = {
+    }
+  }
+}
+
+private class BufferedRows extends WriterCommitMessage with InputPartition with Serializable {
+  val rows = new mutable.ArrayBuffer[InternalRow]()
+}
+
+private object BufferedRowsReaderFactory extends PartitionReaderFactory {
+  override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
+    new BufferedRowsReader(partition.asInstanceOf[BufferedRows])
+  }
+}
+
+private class BufferedRowsReader(partition: BufferedRows) extends PartitionReader[InternalRow] {
+  private var index: Int = -1
+
+  override def next(): Boolean = {
+    index += 1
+    index < partition.rows.length
+  }
+
+  override def get(): InternalRow = partition.rows(index)
+
+  override def close(): Unit = {}
+}
+
+private object BufferedRowsWriterFactory extends DataWriterFactory {
+  override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = {
+    new BufferWriter
+  }
+}
+
+private class BufferWriter extends DataWriter[InternalRow] {
+  private val buffer = new BufferedRows
+
+  override def write(row: InternalRow): Unit = buffer.rows.append(row.copy())
+
+  override def commit(): WriterCommitMessage = buffer
+
+  override def abort(): Unit = {}
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 70364e563bff..0e7df8e92197 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -73,7 +73,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
         new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
         new FallBackFileSourceV2(session) +:
-        DataSourceResolution(conf) +:
+        DataSourceResolution(conf, session.catalog(_)) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =

From 7dd2dd5dc5d4210bef88b75b7f5b06266dc4ce1c Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Wed, 15 May 2019 16:01:43 +0900
Subject: [PATCH 1056/1072] [MINOR][SS] Remove duplicate 'add' in comment of
 `StructuredSessionization`.

## What changes were proposed in this pull request?

`StructuredSessionization` comment contains duplicate 'add', I think it should be changed.

## How was this patch tested?

Exists UT.

Closes #24589 from beliefer/remove-duplicate-add-in-comment.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/examples/sql/streaming/StructuredSessionization.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
index ed63fb677b9e..29dbb0d95cc5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
@@ -70,7 +70,7 @@ object StructuredSessionization {
         line.split(" ").map(word => Event(sessionId = word, timestamp))
       }
 
-    // Sessionize the events. Track number of events, start and end timestamps of session, and
+    // Sessionize the events. Track number of events, start and end timestamps of session,
     // and report session updates.
     val sessionUpdates = events
       .groupByKey(event => event.sessionId)

From fd9acf23b03e13d9422937b4b73c434baa92b34f Mon Sep 17 00:00:00 2001
From: xy_xin <xianyin.xxy@alibaba-inc.com>
Date: Wed, 15 May 2019 15:24:21 +0800
Subject: [PATCH 1057/1072] [SPARK-27713][SQL] Move
 org.apache.spark.sql.execution.* in catalyst to core

## What changes were proposed in this pull request?

`RecordBinaryComparator`, `UnsafeExternalRowSorter` and `UnsafeKeyValueSorter` now locates in catalyst, which should be moved to core, as they're used only in physical plan.

## How was this patch tested?

exist tests.

Closes #24607 from xianyinxin/SPARK-27713.

Authored-by: xy_xin <xianyin.xxy@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/execution/RecordBinaryComparator.java    | 0
 .../org/apache/spark/sql/execution/UnsafeExternalRowSorter.java   | 0
 .../java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename sql/{catalyst => core}/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java (100%)
 rename sql/{catalyst => core}/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java (100%)
 rename sql/{catalyst => core}/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java (100%)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java b/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
similarity index 100%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
similarity index 100%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java
similarity index 100%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java

From bfb3ffe9b33a403a1f3b6f5407d34a477ce62c85 Mon Sep 17 00:00:00 2001
From: Sean Owen <sean.owen@databricks.com>
Date: Wed, 15 May 2019 09:29:12 -0500
Subject: [PATCH 1058/1072] [SPARK-27682][CORE][GRAPHX][MLLIB] Replace use of
 collections and methods that will be removed in Scala 2.13 with work-alikes

## What changes were proposed in this pull request?

This replaces use of collection classes like `MutableList` and `ArrayStack` with workalikes that are available in 2.12, as they will be removed in 2.13. It also removes use of `.to[Collection]` as its uses was superfluous anyway. Removing `collection.breakOut` will have to wait until 2.13

## How was this patch tested?

Existing tests

Closes #24586 from srowen/SPARK-27682.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../apache/spark/scheduler/DAGScheduler.scala | 50 +++++++++----------
 .../util/random/RandomSamplerSuite.scala      |  4 +-
 .../spark/graphx/lib/LabelPropagation.scala   | 10 ++--
 .../spark/graphx/lib/ShortestPaths.scala      | 10 ++--
 .../spark/graphx/lib/PageRankSuite.scala      |  2 +-
 .../spark/ml/tree/impl/RandomForest.scala     | 22 ++++----
 .../ml/tree/impl/RandomForestSuite.scala      | 10 ++--
 .../apache/spark/mllib/feature/PCASuite.scala |  4 +-
 .../KubernetesTestComponents.scala            |  2 +-
 .../expressions/EquivalentExpressions.scala   |  6 +--
 .../apache/spark/sql/types/StructType.scala   | 11 ++--
 11 files changed, 71 insertions(+), 60 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 1d4972edfc4d..de57807639f3 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -24,7 +24,7 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import scala.annotation.tailrec
 import scala.collection.Map
-import scala.collection.mutable.{ArrayStack, HashMap, HashSet}
+import scala.collection.mutable.{HashMap, HashSet, ListBuffer}
 import scala.concurrent.duration._
 import scala.util.control.NonFatal
 
@@ -468,21 +468,21 @@ private[spark] class DAGScheduler(
 
   /** Find ancestor shuffle dependencies that are not registered in shuffleToMapStage yet */
   private def getMissingAncestorShuffleDependencies(
-      rdd: RDD[_]): ArrayStack[ShuffleDependency[_, _, _]] = {
-    val ancestors = new ArrayStack[ShuffleDependency[_, _, _]]
+      rdd: RDD[_]): ListBuffer[ShuffleDependency[_, _, _]] = {
+    val ancestors = new ListBuffer[ShuffleDependency[_, _, _]]
     val visited = new HashSet[RDD[_]]
     // We are manually maintaining a stack here to prevent StackOverflowError
     // caused by recursively visiting
-    val waitingForVisit = new ArrayStack[RDD[_]]
-    waitingForVisit.push(rdd)
+    val waitingForVisit = new ListBuffer[RDD[_]]
+    waitingForVisit += rdd
     while (waitingForVisit.nonEmpty) {
-      val toVisit = waitingForVisit.pop()
+      val toVisit = waitingForVisit.remove(0)
       if (!visited(toVisit)) {
         visited += toVisit
         getShuffleDependencies(toVisit).foreach { shuffleDep =>
           if (!shuffleIdToMapStage.contains(shuffleDep.shuffleId)) {
-            ancestors.push(shuffleDep)
-            waitingForVisit.push(shuffleDep.rdd)
+            ancestors.prepend(shuffleDep)
+            waitingForVisit.prepend(shuffleDep.rdd)
           } // Otherwise, the dependency and its ancestors have already been registered.
         }
       }
@@ -506,17 +506,17 @@ private[spark] class DAGScheduler(
       rdd: RDD[_]): HashSet[ShuffleDependency[_, _, _]] = {
     val parents = new HashSet[ShuffleDependency[_, _, _]]
     val visited = new HashSet[RDD[_]]
-    val waitingForVisit = new ArrayStack[RDD[_]]
-    waitingForVisit.push(rdd)
+    val waitingForVisit = new ListBuffer[RDD[_]]
+    waitingForVisit += rdd
     while (waitingForVisit.nonEmpty) {
-      val toVisit = waitingForVisit.pop()
+      val toVisit = waitingForVisit.remove(0)
       if (!visited(toVisit)) {
         visited += toVisit
         toVisit.dependencies.foreach {
           case shuffleDep: ShuffleDependency[_, _, _] =>
             parents += shuffleDep
           case dependency =>
-            waitingForVisit.push(dependency.rdd)
+            waitingForVisit.prepend(dependency.rdd)
         }
       }
     }
@@ -529,10 +529,10 @@ private[spark] class DAGScheduler(
    */
   private def traverseParentRDDsWithinStage(rdd: RDD[_], predicate: RDD[_] => Boolean): Boolean = {
     val visited = new HashSet[RDD[_]]
-    val waitingForVisit = new ArrayStack[RDD[_]]
-    waitingForVisit.push(rdd)
+    val waitingForVisit = new ListBuffer[RDD[_]]
+    waitingForVisit += rdd
     while (waitingForVisit.nonEmpty) {
-      val toVisit = waitingForVisit.pop()
+      val toVisit = waitingForVisit.remove(0)
       if (!visited(toVisit)) {
         if (!predicate(toVisit)) {
           return false
@@ -542,7 +542,7 @@ private[spark] class DAGScheduler(
           case _: ShuffleDependency[_, _, _] =>
             // Not within the same stage with current rdd, do nothing.
           case dependency =>
-            waitingForVisit.push(dependency.rdd)
+            waitingForVisit.prepend(dependency.rdd)
         }
       }
     }
@@ -554,7 +554,8 @@ private[spark] class DAGScheduler(
     val visited = new HashSet[RDD[_]]
     // We are manually maintaining a stack here to prevent StackOverflowError
     // caused by recursively visiting
-    val waitingForVisit = new ArrayStack[RDD[_]]
+    val waitingForVisit = new ListBuffer[RDD[_]]
+    waitingForVisit += stage.rdd
     def visit(rdd: RDD[_]) {
       if (!visited(rdd)) {
         visited += rdd
@@ -568,15 +569,14 @@ private[spark] class DAGScheduler(
                   missing += mapStage
                 }
               case narrowDep: NarrowDependency[_] =>
-                waitingForVisit.push(narrowDep.rdd)
+                waitingForVisit.prepend(narrowDep.rdd)
             }
           }
         }
       }
     }
-    waitingForVisit.push(stage.rdd)
     while (waitingForVisit.nonEmpty) {
-      visit(waitingForVisit.pop())
+      visit(waitingForVisit.remove(0))
     }
     missing.toList
   }
@@ -2000,7 +2000,8 @@ private[spark] class DAGScheduler(
     val visitedRdds = new HashSet[RDD[_]]
     // We are manually maintaining a stack here to prevent StackOverflowError
     // caused by recursively visiting
-    val waitingForVisit = new ArrayStack[RDD[_]]
+    val waitingForVisit = new ListBuffer[RDD[_]]
+    waitingForVisit += stage.rdd
     def visit(rdd: RDD[_]) {
       if (!visitedRdds(rdd)) {
         visitedRdds += rdd
@@ -2009,17 +2010,16 @@ private[spark] class DAGScheduler(
             case shufDep: ShuffleDependency[_, _, _] =>
               val mapStage = getOrCreateShuffleMapStage(shufDep, stage.firstJobId)
               if (!mapStage.isAvailable) {
-                waitingForVisit.push(mapStage.rdd)
+                waitingForVisit.prepend(mapStage.rdd)
               }  // Otherwise there's no need to follow the dependency back
             case narrowDep: NarrowDependency[_] =>
-              waitingForVisit.push(narrowDep.rdd)
+              waitingForVisit.prepend(narrowDep.rdd)
           }
         }
       }
     }
-    waitingForVisit.push(stage.rdd)
     while (waitingForVisit.nonEmpty) {
-      visit(waitingForVisit.pop())
+      visit(waitingForVisit.remove(0))
     }
     visitedRdds.contains(target.rdd)
   }
diff --git a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
index c2e3830d955c..fef514e0c4e3 100644
--- a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
@@ -333,7 +333,7 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
 
     // ArrayBuffer iterator (indexable type)
     d = medianKSD(
-      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).to[ArrayBuffer].iterator)),
+      gaps(sampler.sample(ArrayBuffer(Iterator.from(0).take(20*sampleSize).toSeq: _*).iterator)),
       gaps(sample(Iterator.from(0), 0.1)))
     d should be < D
 
@@ -557,7 +557,7 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
 
     // ArrayBuffer iterator (indexable type)
     d = medianKSD(
-      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).to[ArrayBuffer].iterator)),
+      gaps(sampler.sample(ArrayBuffer(Iterator.from(0).take(20*sampleSize).toSeq: _*).iterator)),
       gaps(sampleWR(Iterator.from(0), 0.1)))
     d should be < D
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala
index 507d21dd1231..7381f59771a4 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.graphx.lib
 
+import scala.collection.{mutable, Map}
 import scala.reflect.ClassTag
 
 import org.apache.spark.graphx._
@@ -51,11 +52,14 @@ object LabelPropagation {
     }
     def mergeMessage(count1: Map[VertexId, Long], count2: Map[VertexId, Long])
       : Map[VertexId, Long] = {
-      (count1.keySet ++ count2.keySet).map { i =>
+      // Mimics the optimization of breakOut, not present in Scala 2.13, while working in 2.12
+      val map = mutable.Map[VertexId, Long]()
+      (count1.keySet ++ count2.keySet).foreach { i =>
         val count1Val = count1.getOrElse(i, 0L)
         val count2Val = count2.getOrElse(i, 0L)
-        i -> (count1Val + count2Val)
-      }(collection.breakOut)
+        map.put(i, count1Val + count2Val)
+      }
+      map
     }
     def vertexProgram(vid: VertexId, attr: Long, message: Map[VertexId, Long]): VertexId = {
       if (message.isEmpty) attr else message.maxBy(_._2)._1
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala
index 9edfe589ce90..a410473afd3e 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.graphx.lib
 
+import scala.collection.{mutable, Map}
 import scala.reflect.ClassTag
 
 import org.apache.spark.graphx._
@@ -34,9 +35,12 @@ object ShortestPaths extends Serializable {
   private def incrementMap(spmap: SPMap): SPMap = spmap.map { case (v, d) => v -> (d + 1) }
 
   private def addMaps(spmap1: SPMap, spmap2: SPMap): SPMap = {
-    (spmap1.keySet ++ spmap2.keySet).map {
-      k => k -> math.min(spmap1.getOrElse(k, Int.MaxValue), spmap2.getOrElse(k, Int.MaxValue))
-    }(collection.breakOut)
+    // Mimics the optimization of breakOut, not present in Scala 2.13, while working in 2.12
+    val map = mutable.Map[VertexId, Int]()
+    (spmap1.keySet ++ spmap2.keySet).foreach { k =>
+      map.put(k, math.min(spmap1.getOrElse(k, Int.MaxValue), spmap2.getOrElse(k, Int.MaxValue)))
+    }
+    map
   }
 
   /**
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index 1e4c6c74bd18..d8f1c497718b 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.graphx.util.GraphGenerators
 
 object GridPageRank {
   def apply(nRows: Int, nCols: Int, nIter: Int, resetProb: Double): Seq[(VertexId, Double)] = {
-    val inNbrs = Array.fill(nRows * nCols)(collection.mutable.MutableList.empty[Int])
+    val inNbrs = Array.fill(nRows * nCols)(collection.mutable.ArrayBuffer.empty[Int])
     val outDegree = Array.fill(nRows * nCols)(0)
     // Convert row column address into vertex ids (row major order)
     def sub2ind(r: Int, c: Int): Int = r * nCols + c
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index b041dd42d6da..7921823374ed 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -194,14 +194,16 @@ private[spark] object RandomForest extends Logging with Serializable {
       training the same tree in the next iteration.  This focus allows us to send fewer trees to
       workers on each iteration; see topNodesForGroup below.
      */
-    val nodeStack = new mutable.ArrayStack[(Int, LearningNode)]
+    val nodeStack = new mutable.ListBuffer[(Int, LearningNode)]
 
     val rng = new Random()
     rng.setSeed(seed)
 
     // Allocate and queue root nodes.
     val topNodes = Array.fill[LearningNode](numTrees)(LearningNode.emptyNode(nodeIndex = 1))
-    Range(0, numTrees).foreach(treeIndex => nodeStack.push((treeIndex, topNodes(treeIndex))))
+    for (treeIndex <- 0 until numTrees) {
+      nodeStack.prepend((treeIndex, topNodes(treeIndex)))
+    }
 
     timer.stop("init")
 
@@ -398,7 +400,7 @@ private[spark] object RandomForest extends Logging with Serializable {
       nodesForGroup: Map[Int, Array[LearningNode]],
       treeToNodeToIndexInfo: Map[Int, Map[Int, NodeIndexInfo]],
       splits: Array[Array[Split]],
-      nodeStack: mutable.ArrayStack[(Int, LearningNode)],
+      nodeStack: mutable.ListBuffer[(Int, LearningNode)],
       timer: TimeTracker = new TimeTracker,
       nodeIdCache: Option[NodeIdCache] = None): Unit = {
 
@@ -639,10 +641,10 @@ private[spark] object RandomForest extends Logging with Serializable {
 
           // enqueue left child and right child if they are not leaves
           if (!leftChildIsLeaf) {
-            nodeStack.push((treeIndex, node.leftChild.get))
+            nodeStack.prepend((treeIndex, node.leftChild.get))
           }
           if (!rightChildIsLeaf) {
-            nodeStack.push((treeIndex, node.rightChild.get))
+            nodeStack.prepend((treeIndex, node.rightChild.get))
           }
 
           logDebug("leftChildIndex = " + node.leftChild.get.id +
@@ -1042,8 +1044,8 @@ private[spark] object RandomForest extends Logging with Serializable {
       var partNumSamples = 0.0
       var unweightedNumSamples = 0.0
       featureSamples.foreach { case (sampleWeight, feature) =>
-        partValueCountMap(feature) = partValueCountMap.getOrElse(feature, 0.0) + sampleWeight;
-        partNumSamples += sampleWeight;
+        partValueCountMap(feature) = partValueCountMap.getOrElse(feature, 0.0) + sampleWeight
+        partNumSamples += sampleWeight
         unweightedNumSamples += 1.0
       }
 
@@ -1131,7 +1133,7 @@ private[spark] object RandomForest extends Logging with Serializable {
    *          The feature indices are None if not subsampling features.
    */
   private[tree] def selectNodesToSplit(
-      nodeStack: mutable.ArrayStack[(Int, LearningNode)],
+      nodeStack: mutable.ListBuffer[(Int, LearningNode)],
       maxMemoryUsage: Long,
       metadata: DecisionTreeMetadata,
       rng: Random): (Map[Int, Array[LearningNode]], Map[Int, Map[Int, NodeIndexInfo]]) = {
@@ -1146,7 +1148,7 @@ private[spark] object RandomForest extends Logging with Serializable {
     // so we allow one iteration if memUsage == 0.
     var groupDone = false
     while (nodeStack.nonEmpty && !groupDone) {
-      val (treeIndex, node) = nodeStack.top
+      val (treeIndex, node) = nodeStack.head
       // Choose subset of features for node (if subsampling).
       val featureSubset: Option[Array[Int]] = if (metadata.subsamplingFeatures) {
         Some(SamplingUtils.reservoirSampleAndCount(Range(0,
@@ -1157,7 +1159,7 @@ private[spark] object RandomForest extends Logging with Serializable {
       // Check if enough memory remains to add this node to the group.
       val nodeMemUsage = RandomForest.aggregateSizeForNode(metadata, featureSubset) * 8L
       if (memUsage + nodeMemUsage <= maxMemoryUsage || memUsage == 0) {
-        nodeStack.pop()
+        nodeStack.remove(0)
         mutableNodesForGroup.getOrElseUpdate(treeIndex, new mutable.ArrayBuffer[LearningNode]()) +=
           node
         mutableTreeToNodeToIndexInfo
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
index b89cc6053b06..a63ab913f2c2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
@@ -40,8 +40,6 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   import RandomForestSuite.mapToVec
 
-  private val seed = 42
-
   /////////////////////////////////////////////////////////////////////////////
   // Tests for split calculation
   /////////////////////////////////////////////////////////////////////////////
@@ -350,7 +348,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     val treeToNodeToIndexInfo = Map(0 -> Map(
       topNode.id -> new RandomForest.NodeIndexInfo(0, None)
     ))
-    val nodeStack = new mutable.ArrayStack[(Int, LearningNode)]
+    val nodeStack = new mutable.ListBuffer[(Int, LearningNode)]
     RandomForest.findBestSplits(baggedInput, metadata, Map(0 -> topNode),
       nodesForGroup, treeToNodeToIndexInfo, splits, nodeStack)
 
@@ -392,7 +390,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     val treeToNodeToIndexInfo = Map(0 -> Map(
       topNode.id -> new RandomForest.NodeIndexInfo(0, None)
     ))
-    val nodeStack = new mutable.ArrayStack[(Int, LearningNode)]
+    val nodeStack = new mutable.ListBuffer[(Int, LearningNode)]
     RandomForest.findBestSplits(baggedInput, metadata, Map(0 -> topNode),
       nodesForGroup, treeToNodeToIndexInfo, splits, nodeStack)
 
@@ -505,11 +503,11 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
         val failString = s"Failed on test with:" +
           s"numTrees=$numTrees, featureSubsetStrategy=$featureSubsetStrategy," +
           s" numFeaturesPerNode=$numFeaturesPerNode, seed=$seed"
-        val nodeStack = new mutable.ArrayStack[(Int, LearningNode)]
+        val nodeStack = new mutable.ListBuffer[(Int, LearningNode)]
         val topNodes: Array[LearningNode] = new Array[LearningNode](numTrees)
         Range(0, numTrees).foreach { treeIndex =>
           topNodes(treeIndex) = LearningNode.emptyNode(nodeIndex = 1)
-          nodeStack.push((treeIndex, topNodes(treeIndex)))
+          nodeStack.prepend((treeIndex, topNodes(treeIndex)))
         }
         val rng = new scala.util.Random(seed = seed)
         val (nodesForGroup: Map[Int, Array[LearningNode]],
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
index fe49162c6642..e478f14906ba 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
@@ -57,8 +57,8 @@ class PCASuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("number of features more than 65535") {
     val data1 = sc.parallelize(Array(
-      Vectors.dense((1 to 100000).map(_ => 2.0).to[scala.Vector].toArray),
-      Vectors.dense((1 to 100000).map(_ => 0.0).to[scala.Vector].toArray)
+      Vectors.dense(Array.fill(100000)(2.0)),
+      Vectors.dense(Array.fill(100000)(0.0))
     ), 2)
 
     val pca = new PCA(2).fit(data1)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index 40059454dacf..50a7ef7d1761 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -125,7 +125,7 @@ private[spark] object SparkAppLauncher extends Logging {
         appConf.toStringArray :+ appArguments.mainAppResource
 
     if (appArguments.appArgs.nonEmpty) {
-      commandLine ++= appArguments.appArgs.to[ArrayBuffer]
+      commandLine ++= appArguments.appArgs
     }
     logInfo(s"Launching a spark app with command line: ${commandLine.mkString(" ")}")
     ProcessUtils.executeProcess(commandLine.toArray, timeoutSecs)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index 8d06804ce1e9..72ff9361d8f7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -41,7 +41,7 @@ class EquivalentExpressions {
   }
 
   // For each expression, the set of equivalent expressions.
-  private val equivalenceMap = mutable.HashMap.empty[Expr, mutable.MutableList[Expression]]
+  private val equivalenceMap = mutable.HashMap.empty[Expr, mutable.ArrayBuffer[Expression]]
 
   /**
    * Adds each expression to this data structure, grouping them with existing equivalent
@@ -56,7 +56,7 @@ class EquivalentExpressions {
         f.get += expr
         true
       } else {
-        equivalenceMap.put(e, mutable.MutableList(expr))
+        equivalenceMap.put(e, mutable.ArrayBuffer(expr))
         false
       }
     } else {
@@ -102,7 +102,7 @@ class EquivalentExpressions {
    * an empty collection if there are none.
    */
   def getEquivalentExprs(e: Expression): Seq[Expression] = {
-    equivalenceMap.getOrElse(Expr(e), mutable.MutableList())
+    equivalenceMap.getOrElse(Expr(e), Seq.empty)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index fe324a489432..edf8d2c1b31a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.types
 
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.{mutable, Map}
 import scala.util.Try
 import scala.util.control.NonFatal
 
@@ -516,7 +516,7 @@ object StructType extends AbstractDataType {
           leftContainsNull || rightContainsNull)
 
       case (StructType(leftFields), StructType(rightFields)) =>
-        val newFields = ArrayBuffer.empty[StructField]
+        val newFields = mutable.ArrayBuffer.empty[StructField]
 
         val rightMapped = fieldsMap(rightFields)
         leftFields.foreach {
@@ -575,7 +575,10 @@ object StructType extends AbstractDataType {
     }
 
   private[sql] def fieldsMap(fields: Array[StructField]): Map[String, StructField] = {
-    import scala.collection.breakOut
-    fields.map(s => (s.name, s))(breakOut)
+    // Mimics the optimization of breakOut, not present in Scala 2.13, while working in 2.12
+    val map = mutable.Map[String, StructField]()
+    map.sizeHint(fields.length)
+    fields.foreach(s => map.put(s.name, s))
+    map
   }
 }

From d14e2d7874493a1765b1adba47107161313b1f0f Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 15 May 2019 09:58:12 -0700
Subject: [PATCH 1059/1072] [SPARK-27678][UI] Allow user impersonation in the
 UI.

This feature allows proxy servers to identify the actual request user
using a request parameter, and performs access control checks against
that user instead of the authenticated user. Impersonation is only
allowed if the authenticated user is configured as an admin.

The request parameter used ("doAs") matches the one currently used by
Knox, but it should be easy to change / customize if different proxy
servers use a different way of identifying the original user.

Tested with updated unit tests and also with a live server behind Knox.

Closes #24582 from vanzin/SPARK-27678.

Authored-by: Marcelo Vanzin <vanzin@cloudera.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/SecurityManager.scala    | 42 ++++++++++-----
 .../apache/spark/ui/HttpSecurityFilter.scala  | 26 +++++++--
 .../apache/spark/SecurityManagerSuite.scala   |  8 ++-
 .../spark/ui/HttpSecurityFilterSuite.scala    | 53 ++++++++++++++-----
 4 files changed, 96 insertions(+), 33 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 26b18564be77..5a5c5a403f20 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -214,6 +214,15 @@ private[spark] class SecurityManager(
    */
   def aclsEnabled(): Boolean = aclsOn
 
+  /**
+   * Checks whether the given user is an admin. This gives the user both view and
+   * modify permissions, and also allows the user to impersonate other users when
+   * making UI requests.
+   */
+  def checkAdminPermissions(user: String): Boolean = {
+    isUserInACL(user, adminAcls, adminAclsGroups)
+  }
+
   /**
    * Checks the given user against the view acl and groups list to see if they have
    * authorization to view the UI. If the UI acls are disabled
@@ -227,13 +236,7 @@ private[spark] class SecurityManager(
   def checkUIViewPermissions(user: String): Boolean = {
     logDebug("user=" + user + " aclsEnabled=" + aclsEnabled() + " viewAcls=" +
       viewAcls.mkString(",") + " viewAclsGroups=" + viewAclsGroups.mkString(","))
-    if (!aclsEnabled || user == null || viewAcls.contains(user) ||
-        viewAcls.contains(WILDCARD_ACL) || viewAclsGroups.contains(WILDCARD_ACL)) {
-      return true
-    }
-    val currentUserGroups = Utils.getCurrentUserGroups(sparkConf, user)
-    logDebug("userGroups=" + currentUserGroups.mkString(","))
-    viewAclsGroups.exists(currentUserGroups.contains(_))
+    isUserInACL(user, viewAcls, viewAclsGroups)
   }
 
   /**
@@ -249,13 +252,7 @@ private[spark] class SecurityManager(
   def checkModifyPermissions(user: String): Boolean = {
     logDebug("user=" + user + " aclsEnabled=" + aclsEnabled() + " modifyAcls=" +
       modifyAcls.mkString(",") + " modifyAclsGroups=" + modifyAclsGroups.mkString(","))
-    if (!aclsEnabled || user == null || modifyAcls.contains(user) ||
-        modifyAcls.contains(WILDCARD_ACL) || modifyAclsGroups.contains(WILDCARD_ACL)) {
-      return true
-    }
-    val currentUserGroups = Utils.getCurrentUserGroups(sparkConf, user)
-    logDebug("userGroups=" + currentUserGroups)
-    modifyAclsGroups.exists(currentUserGroups.contains(_))
+    isUserInACL(user, modifyAcls, modifyAclsGroups)
   }
 
   /**
@@ -371,6 +368,23 @@ private[spark] class SecurityManager(
     }
   }
 
+  private def isUserInACL(
+      user: String,
+      aclUsers: Set[String],
+      aclGroups: Set[String]): Boolean = {
+    if (user == null ||
+        !aclsEnabled ||
+        aclUsers.contains(WILDCARD_ACL) ||
+        aclUsers.contains(user) ||
+        aclGroups.contains(WILDCARD_ACL)) {
+      true
+    } else {
+      val userGroups = Utils.getCurrentUserGroups(sparkConf, user)
+      logDebug(s"user $user is in groups ${userGroups.mkString(",")}")
+      aclGroups.exists(userGroups.contains(_))
+    }
+  }
+
   // Default SecurityManager only has a single secret key, so ignore appId.
   override def getSaslUser(appId: String): String = getSaslUser()
   override def getSecretKey(appId: String): String = getSecretKey()
diff --git a/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala b/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
index fc9b50f14a08..1c0dd7dee222 100644
--- a/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
+++ b/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
@@ -53,9 +53,24 @@ private class HttpSecurityFilter(
     val hres = res.asInstanceOf[HttpServletResponse]
     hres.setHeader("Cache-Control", "no-cache, no-store, must-revalidate")
 
-    if (!securityMgr.checkUIViewPermissions(hreq.getRemoteUser())) {
+    val requestUser = hreq.getRemoteUser()
+
+    // The doAs parameter allows proxy servers (e.g. Knox) to impersonate other users. For
+    // that to be allowed, the authenticated user needs to be an admin.
+    val effectiveUser = Option(hreq.getParameter("doAs"))
+      .map { proxy =>
+        if (requestUser != proxy && !securityMgr.checkAdminPermissions(requestUser)) {
+          hres.sendError(HttpServletResponse.SC_FORBIDDEN,
+            s"User $requestUser is not allowed to impersonate others.")
+          return
+        }
+        proxy
+      }
+      .getOrElse(requestUser)
+
+    if (!securityMgr.checkUIViewPermissions(effectiveUser)) {
       hres.sendError(HttpServletResponse.SC_FORBIDDEN,
-        "User is not authorized to access this page.")
+        s"User $effectiveUser is not authorized to access this page.")
       return
     }
 
@@ -77,12 +92,13 @@ private class HttpSecurityFilter(
         hres.setHeader("Strict-Transport-Security", _))
     }
 
-    chain.doFilter(new XssSafeRequest(hreq), res)
+    chain.doFilter(new XssSafeRequest(hreq, effectiveUser), res)
   }
 
 }
 
-private class XssSafeRequest(req: HttpServletRequest) extends HttpServletRequestWrapper(req) {
+private class XssSafeRequest(req: HttpServletRequest, effectiveUser: String)
+  extends HttpServletRequestWrapper(req) {
 
   private val NEWLINE_AND_SINGLE_QUOTE_REGEX = raw"(?i)(\r\n|\n|\r|%0D%0A|%0A|%0D|'|%27)".r
 
@@ -92,6 +108,8 @@ private class XssSafeRequest(req: HttpServletRequest) extends HttpServletRequest
     }.toMap
   }
 
+  override def getRemoteUser(): String = effectiveUser
+
   override def getParameterMap(): JMap[String, Array[String]] = parameterMap.asJava
 
   override def getParameterNames(): Enumeration[String] = {
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index 20421456cefb..b31a6b4e2f9a 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -226,6 +226,8 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     securityManager.setAdminAcls(Seq("user6"))
     securityManager.setViewAcls(Set[String]("user8"), Seq("user9"))
     securityManager.setModifyAcls(Set("user11"), Seq("user9"))
+    assert(securityManager.checkAdminPermissions("user6"))
+    assert(!securityManager.checkAdminPermissions("user8"))
     assert(securityManager.checkModifyPermissions("user6"))
     assert(securityManager.checkModifyPermissions("user11"))
     assert(securityManager.checkModifyPermissions("user9"))
@@ -252,6 +254,7 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(securityManager.aclsEnabled())
 
     // group1,group2,group3 match
+    assert(securityManager.checkAdminPermissions("user1"))
     assert(securityManager.checkModifyPermissions("user1"))
     assert(securityManager.checkUIViewPermissions("user1"))
 
@@ -261,8 +264,9 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     securityManager.setViewAclsGroups(Nil)
     securityManager.setModifyAclsGroups(Nil)
 
-    assert(securityManager.checkModifyPermissions("user1") === false)
-    assert(securityManager.checkUIViewPermissions("user1") === false)
+    assert(!securityManager.checkAdminPermissions("user1"))
+    assert(!securityManager.checkModifyPermissions("user1"))
+    assert(!securityManager.checkUIViewPermissions("user1"))
 
     // change modify groups so they match
     securityManager.setModifyAclsGroups(Seq("group3"))
diff --git a/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala b/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala
index 098d012eed88..c435852a4670 100644
--- a/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/HttpSecurityFilterSuite.scala
@@ -46,16 +46,8 @@ class HttpSecurityFilterSuite extends SparkFunSuite {
     val conf = new SparkConf()
     val filter = new HttpSecurityFilter(conf, new SecurityManager(conf))
 
-    def newRequest(): HttpServletRequest = {
-      val req = mock(classOf[HttpServletRequest])
-      when(req.getParameterMap()).thenReturn(Map.empty[String, Array[String]].asJava)
-      req
-    }
-
     def doRequest(k: String, v: String): HttpServletRequest = {
-      val req = newRequest()
-      when(req.getParameterMap()).thenReturn(Map(k -> Array(v)).asJava)
-
+      val req = mockRequest(params = Map(k -> Array(v)))
       val chain = mock(classOf[FilterChain])
       val res = mock(classOf[HttpServletResponse])
       filter.doFilter(req, res, chain)
@@ -97,7 +89,7 @@ class HttpSecurityFilterSuite extends SparkFunSuite {
       .set(UI_VIEW_ACLS, Seq("alice"))
     val secMgr = new SecurityManager(conf)
 
-    val req = mockEmptyRequest()
+    val req = mockRequest()
     val res = mock(classOf[HttpServletResponse])
     val chain = mock(classOf[FilterChain])
 
@@ -128,7 +120,7 @@ class HttpSecurityFilterSuite extends SparkFunSuite {
       .set(UI_X_CONTENT_TYPE_OPTIONS, true)
       .set(UI_STRICT_TRANSPORT_SECURITY, "tsec")
     val secMgr = new SecurityManager(conf)
-    val req = mockEmptyRequest()
+    val req = mockRequest()
     val res = mock(classOf[HttpServletResponse])
     val chain = mock(classOf[FilterChain])
 
@@ -147,8 +139,43 @@ class HttpSecurityFilterSuite extends SparkFunSuite {
     }
   }
 
-  private def mockEmptyRequest(): HttpServletRequest = {
-    val params: Map[String, Array[String]] = Map.empty
+  test("doAs impersonation") {
+    val conf = new SparkConf(false)
+      .set(ACLS_ENABLE, true)
+      .set(ADMIN_ACLS, Seq("admin"))
+      .set(UI_VIEW_ACLS, Seq("proxy"))
+
+    val secMgr = new SecurityManager(conf)
+    val req = mockRequest()
+    val res = mock(classOf[HttpServletResponse])
+    val chain = mock(classOf[FilterChain])
+    val filter = new HttpSecurityFilter(conf, secMgr)
+
+    // First try with a non-admin so that the admin check is verified. This ensures that
+    // the admin check is setting the expected error, since the impersonated user would
+    // have permissions to process the request.
+    when(req.getParameter("doAs")).thenReturn("proxy")
+    when(req.getRemoteUser()).thenReturn("bob")
+    filter.doFilter(req, res, chain)
+    verify(res, times(1)).sendError(meq(HttpServletResponse.SC_FORBIDDEN), any())
+
+    when(req.getRemoteUser()).thenReturn("admin")
+    filter.doFilter(req, res, chain)
+    verify(chain, times(1)).doFilter(any(), any())
+
+    // Check that impersonation was actually performed by checking the wrapped request.
+    val captor = ArgumentCaptor.forClass(classOf[HttpServletRequest])
+    verify(chain).doFilter(captor.capture(), any())
+    val wrapped = captor.getValue()
+    assert(wrapped.getRemoteUser() === "proxy")
+
+    // Impersonating a user without view permissions should cause an error.
+    when(req.getParameter("doAs")).thenReturn("alice")
+    filter.doFilter(req, res, chain)
+    verify(res, times(2)).sendError(meq(HttpServletResponse.SC_FORBIDDEN), any())
+  }
+
+  private def mockRequest(params: Map[String, Array[String]] = Map()): HttpServletRequest = {
     val req = mock(classOf[HttpServletRequest])
     when(req.getParameterMap()).thenReturn(params.asJava)
     req

From efa303581ac61d6f517aacd08883da2d01530bd2 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Wed, 15 May 2019 10:42:09 -0700
Subject: [PATCH 1060/1072] [SPARK-27687][SS] Rename Kafka consumer cache
 capacity conf and document caching

## What changes were proposed in this pull request?

Kafka related Spark parameters has to start with `spark.kafka.` and not with `spark.sql.`. Because of this I've renamed `spark.sql.kafkaConsumerCache.capacity`.

Since Kafka consumer caching is not documented I've added this also.

## How was this patch tested?

Existing + added unit test.

```
cd docs
SKIP_API=1 jekyll build
```
and manual webpage check.

Closes #24590 from gaborgsomogyi/SPARK-27687.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/SparkConf.scala    |  4 ++-
 .../structured-streaming-kafka-integration.md | 18 +++++++++++
 .../apache/spark/sql/kafka010/package.scala   |  5 ++--
 .../sql/kafka010/KafkaSparkConfSuite.scala    | 30 +++++++++++++++++++
 4 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSparkConfSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 240707e16e20..aa93f42141fc 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -714,7 +714,9 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.yarn.kerberos.relogin.period", "3.0")),
     KERBEROS_FILESYSTEMS_TO_ACCESS.key -> Seq(
       AlternateConfig("spark.yarn.access.namenodes", "2.2"),
-      AlternateConfig("spark.yarn.access.hadoopFileSystems", "3.0"))
+      AlternateConfig("spark.yarn.access.hadoopFileSystems", "3.0")),
+    "spark.kafka.consumer.cache.capacity" -> Seq(
+      AlternateConfig("spark.sql.kafkaConsumerCache.capacity", "3.0"))
   )
 
   /**
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 4a295e07c71b..bbff82259e56 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -416,6 +416,24 @@ The following configurations are optional:
 </tr>
 </table>
 
+### Consumer Caching
+
+It's time-consuming to initialize Kafka consumers, especially in streaming scenarios where processing time is a key factor.
+Because of this, Spark caches Kafka consumers on executors. The caching key is built up from the following information:
+* Topic name
+* Topic partition
+* Group ID
+
+The size of the cache is limited by <code>spark.kafka.consumer.cache.capacity</code> (default: 64).
+If this threshold is reached, it tries to remove the least-used entry that is currently not in use.
+If it cannot be removed, then the cache will keep growing. In the worst case, the cache will grow to
+the max number of concurrent tasks that can run in the executor (that is, number of tasks slots),
+after which it will never reduce.
+
+If a task fails for any reason the new task is executed with a newly created Kafka consumer for safety reasons.
+At the same time the cached Kafka consumer which was used in the failed execution will be invalidated. Here it has to
+be emphasized it will not be closed if any other task is using it.
+
 ## Writing Data to Kafka
 
 Here, we describe the support for writing Streaming Queries and Batch Queries to Apache Kafka. Take note that
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
index 115ec44b6c01..ff19862c20cc 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
@@ -33,8 +33,9 @@ package object kafka010 {   // scalastyle:ignore
       .createWithDefaultString("10m")
 
   private[kafka010] val CONSUMER_CACHE_CAPACITY =
-    ConfigBuilder("spark.sql.kafkaConsumerCache.capacity")
-      .doc("The size of consumers cached.")
+    ConfigBuilder("spark.kafka.consumer.cache.capacity")
+      .doc("The maximum number of consumers cached. Please note it's a soft limit" +
+        " (check Structured Streaming Kafka integration guide for further details).")
       .intConf
       .createWithDefault(64)
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSparkConfSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSparkConfSuite.scala
new file mode 100644
index 000000000000..ca8b8b6e186e
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSparkConfSuite.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite}
+import org.apache.spark.util.ResetSystemProperties
+
+class KafkaSparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSystemProperties {
+  test("deprecated configs") {
+    val conf = new SparkConf()
+
+    conf.set("spark.sql.kafkaConsumerCache.capacity", "32")
+    assert(conf.get(CONSUMER_CACHE_CAPACITY) === 32)
+  }
+}

From 0bba5cf56832f0690a4ebd733d01a0416e4c7252 Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Wed, 15 May 2019 14:47:15 -0700
Subject: [PATCH 1061/1072] [SPARK-20774][SPARK-27036][SQL] Cancel the running
 broadcast execution on BroadcastTimeout

## What changes were proposed in this pull request?

In the existing code, a broadcast execution timeout for the Future only causes a query failure, but the job running with the broadcast and the computation in the Future are not canceled. This wastes resources and slows down the other jobs. This PR tries to cancel both the running job and the running hashed relation construction thread.

## How was this patch tested?

Add new test suite `BroadcastExchangeExec`

Closes #24595 from jiangxb1987/SPARK-20774.

Authored-by: Xingbo Jiang <xingbo.jiang@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |   5 +-
 .../exchange/BroadcastExchangeExec.scala      | 151 +++++++++---------
 .../execution/BroadcastExchangeSuite.scala    |  93 +++++++++++
 3 files changed, 176 insertions(+), 73 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b5e40dde0b4f..b4c68a701411 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2026,7 +2026,10 @@ class SQLConf extends Serializable with Logging {
 
   def columnNameOfCorruptRecord: String = getConf(COLUMN_NAME_OF_CORRUPT_RECORD)
 
-  def broadcastTimeout: Long = getConf(BROADCAST_TIMEOUT)
+  def broadcastTimeout: Long = {
+    val timeoutValue = getConf(BROADCAST_TIMEOUT)
+    if (timeoutValue < 0) Long.MaxValue else timeoutValue
+  }
 
   def defaultDataSourceName: String = getConf(DEFAULT_DATA_SOURCE_NAME)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index aa0dd1d62840..8017188eb165 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.sql.execution.exchange
 
-import java.util.concurrent.TimeoutException
+import java.util.UUID
+import java.util.concurrent._
 
-import scala.concurrent.{ExecutionContext, Future}
-import scala.concurrent.duration._
+import scala.concurrent.ExecutionContext
+import scala.concurrent.duration.NANOSECONDS
 import scala.util.control.NonFatal
 
 import org.apache.spark.{broadcast, SparkException}
@@ -43,6 +44,8 @@ case class BroadcastExchangeExec(
     mode: BroadcastMode,
     child: SparkPlan) extends Exchange {
 
+  private val runId: UUID = UUID.randomUUID
+
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
     "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"),
@@ -56,79 +59,79 @@ case class BroadcastExchangeExec(
   }
 
   @transient
-  private val timeout: Duration = {
-    val timeoutValue = sqlContext.conf.broadcastTimeout
-    if (timeoutValue < 0) {
-      Duration.Inf
-    } else {
-      timeoutValue.seconds
-    }
-  }
+  private val timeout: Long = SQLConf.get.broadcastTimeout
 
   @transient
   private lazy val relationFuture: Future[broadcast.Broadcast[Any]] = {
-    // broadcastFuture is used in "doExecute". Therefore we can get the execution id correctly here.
+    // relationFuture is used in "doExecute". Therefore we can get the execution id correctly here.
     val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-    Future {
-      // This will run in another thread. Set the execution id so that we can connect these jobs
-      // with the correct execution.
-      SQLExecution.withExecutionId(sqlContext.sparkSession, executionId) {
-        try {
-          val beforeCollect = System.nanoTime()
-          // Use executeCollect/executeCollectIterator to avoid conversion to Scala types
-          val (numRows, input) = child.executeCollectIterator()
-          if (numRows >= 512000000) {
-            throw new SparkException(
-              s"Cannot broadcast the table with 512 million or more rows: $numRows rows")
-          }
-
-          val beforeBuild = System.nanoTime()
-          longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
-
-          // Construct the relation.
-          val relation = mode.transform(input, Some(numRows))
-
-          val dataSize = relation match {
-            case map: HashedRelation =>
-              map.estimatedSize
-            case arr: Array[InternalRow] =>
-              arr.map(_.asInstanceOf[UnsafeRow].getSizeInBytes.toLong).sum
-            case _ =>
-              throw new SparkException("[BUG] BroadcastMode.transform returned unexpected type: " +
-                  relation.getClass.getName)
-          }
-
-          longMetric("dataSize") += dataSize
-          if (dataSize >= (8L << 30)) {
-            throw new SparkException(
-              s"Cannot broadcast the table that is larger than 8GB: ${dataSize >> 30} GB")
+    val task = new Callable[broadcast.Broadcast[Any]]() {
+      override def call(): broadcast.Broadcast[Any] = {
+        // This will run in another thread. Set the execution id so that we can connect these jobs
+        // with the correct execution.
+        SQLExecution.withExecutionId(sqlContext.sparkSession, executionId) {
+          try {
+            // Setup a job group here so later it may get cancelled by groupId if necessary.
+            sparkContext.setJobGroup(runId.toString, s"broadcast exchange (runId $runId)",
+              interruptOnCancel = true)
+            val beforeCollect = System.nanoTime()
+            // Use executeCollect/executeCollectIterator to avoid conversion to Scala types
+            val (numRows, input) = child.executeCollectIterator()
+            if (numRows >= 512000000) {
+              throw new SparkException(
+                s"Cannot broadcast the table with 512 million or more rows: $numRows rows")
+            }
+
+            val beforeBuild = System.nanoTime()
+            longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
+
+            // Construct the relation.
+            val relation = mode.transform(input, Some(numRows))
+
+            val dataSize = relation match {
+              case map: HashedRelation =>
+                map.estimatedSize
+              case arr: Array[InternalRow] =>
+                arr.map(_.asInstanceOf[UnsafeRow].getSizeInBytes.toLong).sum
+              case _ =>
+                throw new SparkException("[BUG] BroadcastMode.transform returned unexpected " +
+                  s"type: ${relation.getClass.getName}")
+            }
+
+            longMetric("dataSize") += dataSize
+            if (dataSize >= (8L << 30)) {
+              throw new SparkException(
+                s"Cannot broadcast the table that is larger than 8GB: ${dataSize >> 30} GB")
+            }
+
+            val beforeBroadcast = System.nanoTime()
+            longMetric("buildTime") += NANOSECONDS.toMillis(beforeBroadcast - beforeBuild)
+
+            // Broadcast the relation
+            val broadcasted = sparkContext.broadcast(relation)
+            longMetric("broadcastTime") += NANOSECONDS.toMillis(
+              System.nanoTime() - beforeBroadcast)
+
+            SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
+            broadcasted
+          } catch {
+            // SPARK-24294: To bypass scala bug: https://github.com/scala/bug/issues/9554, we throw
+            // SparkFatalException, which is a subclass of Exception. ThreadUtils.awaitResult
+            // will catch this exception and re-throw the wrapped fatal throwable.
+            case oe: OutOfMemoryError =>
+              throw new SparkFatalException(
+                new OutOfMemoryError("Not enough memory to build and broadcast the table to all " +
+                  "worker nodes. As a workaround, you can either disable broadcast by setting " +
+                  s"${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key} to -1 or increase the spark " +
+                  s"driver memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher value.")
+                  .initCause(oe.getCause))
+            case e if !NonFatal(e) =>
+              throw new SparkFatalException(e)
           }
-
-          val beforeBroadcast = System.nanoTime()
-          longMetric("buildTime") += NANOSECONDS.toMillis(beforeBroadcast - beforeBuild)
-
-          // Broadcast the relation
-          val broadcasted = sparkContext.broadcast(relation)
-          longMetric("broadcastTime") += NANOSECONDS.toMillis(System.nanoTime() - beforeBroadcast)
-
-          SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
-          broadcasted
-        } catch {
-          // SPARK-24294: To bypass scala bug: https://github.com/scala/bug/issues/9554, we throw
-          // SparkFatalException, which is a subclass of Exception. ThreadUtils.awaitResult
-          // will catch this exception and re-throw the wrapped fatal throwable.
-          case oe: OutOfMemoryError =>
-            throw new SparkFatalException(
-              new OutOfMemoryError(s"Not enough memory to build and broadcast the table to " +
-              s"all worker nodes. As a workaround, you can either disable broadcast by setting " +
-              s"${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key} to -1 or increase the spark driver " +
-              s"memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher value")
-              .initCause(oe.getCause))
-          case e if !NonFatal(e) =>
-            throw new SparkFatalException(e)
         }
       }
-    }(BroadcastExchangeExec.executionContext)
+    }
+    BroadcastExchangeExec.executionContext.submit[broadcast.Broadcast[Any]](task)
   }
 
   override protected def doPrepare(): Unit = {
@@ -143,11 +146,15 @@ case class BroadcastExchangeExec(
 
   override protected[sql] def doExecuteBroadcast[T](): broadcast.Broadcast[T] = {
     try {
-      ThreadUtils.awaitResult(relationFuture, timeout).asInstanceOf[broadcast.Broadcast[T]]
+      relationFuture.get(timeout, TimeUnit.SECONDS).asInstanceOf[broadcast.Broadcast[T]]
     } catch {
       case ex: TimeoutException =>
-        logError(s"Could not execute broadcast in ${timeout.toSeconds} secs.", ex)
-        throw new SparkException(s"Could not execute broadcast in ${timeout.toSeconds} secs. " +
+        logError(s"Could not execute broadcast in $timeout secs.", ex)
+        if (!relationFuture.isDone) {
+          sparkContext.cancelJobGroup(runId.toString)
+          relationFuture.cancel(true)
+        }
+        throw new SparkException(s"Could not execute broadcast in $timeout secs. " +
           s"You can increase the timeout for broadcasts via ${SQLConf.BROADCAST_TIMEOUT.key} or " +
           s"disable broadcast join by setting ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key} to -1",
           ex)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
new file mode 100644
index 000000000000..4e39df928603
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import java.util.concurrent.{CountDownLatch, TimeUnit}
+
+import org.apache.spark.SparkException
+import org.apache.spark.scheduler._
+import org.apache.spark.sql.execution.exchange.BroadcastExchangeExec
+import org.apache.spark.sql.execution.joins.HashedRelation
+import org.apache.spark.sql.functions.broadcast
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
+
+class BroadcastExchangeSuite extends SparkPlanTest with SharedSQLContext {
+
+  import testImplicits._
+
+  test("BroadcastExchange should cancel the job group if timeout") {
+    val startLatch = new CountDownLatch(1)
+    val endLatch = new CountDownLatch(1)
+    var jobEvents: Seq[SparkListenerEvent] = Seq.empty[SparkListenerEvent]
+    spark.sparkContext.addSparkListener(new SparkListener {
+      override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
+        jobEvents :+= jobEnd
+        endLatch.countDown()
+      }
+      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+        jobEvents :+= jobStart
+        startLatch.countDown()
+      }
+    })
+
+    withSQLConf(SQLConf.BROADCAST_TIMEOUT.key -> "0") {
+      val df = spark.range(100).join(spark.range(15).as[Long].map { x =>
+        Thread.sleep(5000)
+        x
+      }).where("id = value")
+
+      // get the exchange physical plan
+      val hashExchange = df.queryExecution.executedPlan
+        .collect { case p: BroadcastExchangeExec => p }.head
+
+      // materialize the future and wait for the job being scheduled
+      hashExchange.prepare()
+      startLatch.await(5, TimeUnit.SECONDS)
+
+      // check timeout exception is captured by just executing the exchange
+      val hashEx = intercept[SparkException] {
+        hashExchange.executeBroadcast[HashedRelation]()
+      }
+      assert(hashEx.getMessage.contains("Could not execute broadcast"))
+
+      // wait for cancel is posted and then check the results.
+      endLatch.await(5, TimeUnit.SECONDS)
+      assert(jobCancelled())
+    }
+
+    def jobCancelled(): Boolean = {
+      val events = jobEvents.toArray
+      val hasStart = events(0).isInstanceOf[SparkListenerJobStart]
+      val hasCancelled = events(1).asInstanceOf[SparkListenerJobEnd].jobResult
+        .asInstanceOf[JobFailed].exception.getMessage.contains("cancelled job group")
+      events.length == 2 && hasStart && hasCancelled
+    }
+  }
+
+  test("set broadcastTimeout to -1") {
+    withSQLConf(SQLConf.BROADCAST_TIMEOUT.key -> "-1") {
+      val df = spark.range(1).toDF()
+      val joinDF = df.join(broadcast(df), "id")
+      val broadcastExchangeExec = joinDF.queryExecution.executedPlan
+        .collect { case p: BroadcastExchangeExec => p }
+      assert(broadcastExchangeExec.size == 1, "one and only BroadcastExchangeExec")
+      assert(joinDF.collect().length == 1)
+    }
+  }
+}

From 02c33694c8254f69cb36c71c0876194dccdbc014 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 15 May 2019 14:52:08 -0700
Subject: [PATCH 1062/1072] [SPARK-27354][SQL] Move incompatible code from the
 hive-thriftserver module to sql/hive-thriftserver/v1.2.1

## What changes were proposed in this pull request?

When we upgraded the built-in Hive to 2.3.4, the current `hive-thriftserver` module is not compatible, such as these Hive changes:
1. [HIVE-12442](https://issues.apache.org/jira/browse/HIVE-12442) HiveServer2: Refactor/repackage HiveServer2's Thrift code so that it can be used in the tasks
2. [HIVE-12237](https://issues.apache.org/jira/browse/HIVE-12237) Use slf4j as logging facade
3. [HIVE-13169](https://issues.apache.org/jira/browse/HIVE-13169) HiveServer2: Support delegation token based connection when using http transport

So this PR moves the incompatible code to `sql/hive-thriftserver/v1.2.1` and copies it to `sql/hive-thriftserver/v2.3.4` for the next code review.

## How was this patch tested?

manual tests:
```
diff -urNa sql/hive-thriftserver/v1.2.1 sql/hive-thriftserver/v2.3.4
```

Closes #24282 from wangyum/SPARK-27354.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 sql/hive-thriftserver/pom.xml                 |     4 +-
 .../{ => v1.2.1}/if/TCLIService.thrift        |     0
 .../service/cli/thrift/TArrayTypeEntry.java   |     0
 .../service/cli/thrift/TBinaryColumn.java     |     0
 .../hive/service/cli/thrift/TBoolColumn.java  |     0
 .../hive/service/cli/thrift/TBoolValue.java   |     0
 .../hive/service/cli/thrift/TByteColumn.java  |     0
 .../hive/service/cli/thrift/TByteValue.java   |     0
 .../hive/service/cli/thrift/TCLIService.java  |     0
 .../cli/thrift/TCLIServiceConstants.java      |     0
 .../cli/thrift/TCancelDelegationTokenReq.java |     0
 .../thrift/TCancelDelegationTokenResp.java    |     0
 .../cli/thrift/TCancelOperationReq.java       |     0
 .../cli/thrift/TCancelOperationResp.java      |     0
 .../cli/thrift/TCloseOperationReq.java        |     0
 .../cli/thrift/TCloseOperationResp.java       |     0
 .../service/cli/thrift/TCloseSessionReq.java  |     0
 .../service/cli/thrift/TCloseSessionResp.java |     0
 .../hive/service/cli/thrift/TColumn.java      |     0
 .../hive/service/cli/thrift/TColumnDesc.java  |     0
 .../hive/service/cli/thrift/TColumnValue.java |     0
 .../service/cli/thrift/TDoubleColumn.java     |     0
 .../hive/service/cli/thrift/TDoubleValue.java |     0
 .../cli/thrift/TExecuteStatementReq.java      |     0
 .../cli/thrift/TExecuteStatementResp.java     |     0
 .../service/cli/thrift/TFetchOrientation.java |     0
 .../service/cli/thrift/TFetchResultsReq.java  |     0
 .../service/cli/thrift/TFetchResultsResp.java |     0
 .../service/cli/thrift/TGetCatalogsReq.java   |     0
 .../service/cli/thrift/TGetCatalogsResp.java  |     0
 .../service/cli/thrift/TGetColumnsReq.java    |     0
 .../service/cli/thrift/TGetColumnsResp.java   |     0
 .../cli/thrift/TGetDelegationTokenReq.java    |     0
 .../cli/thrift/TGetDelegationTokenResp.java   |     0
 .../service/cli/thrift/TGetFunctionsReq.java  |     0
 .../service/cli/thrift/TGetFunctionsResp.java |     0
 .../hive/service/cli/thrift/TGetInfoReq.java  |     0
 .../hive/service/cli/thrift/TGetInfoResp.java |     0
 .../hive/service/cli/thrift/TGetInfoType.java |     0
 .../service/cli/thrift/TGetInfoValue.java     |     0
 .../cli/thrift/TGetOperationStatusReq.java    |     0
 .../cli/thrift/TGetOperationStatusResp.java   |     0
 .../cli/thrift/TGetResultSetMetadataReq.java  |     0
 .../cli/thrift/TGetResultSetMetadataResp.java |     0
 .../service/cli/thrift/TGetSchemasReq.java    |     0
 .../service/cli/thrift/TGetSchemasResp.java   |     0
 .../service/cli/thrift/TGetTableTypesReq.java |     0
 .../cli/thrift/TGetTableTypesResp.java        |     0
 .../service/cli/thrift/TGetTablesReq.java     |     0
 .../service/cli/thrift/TGetTablesResp.java    |     0
 .../service/cli/thrift/TGetTypeInfoReq.java   |     0
 .../service/cli/thrift/TGetTypeInfoResp.java  |     0
 .../service/cli/thrift/THandleIdentifier.java |     0
 .../hive/service/cli/thrift/TI16Column.java   |     0
 .../hive/service/cli/thrift/TI16Value.java    |     0
 .../hive/service/cli/thrift/TI32Column.java   |     0
 .../hive/service/cli/thrift/TI32Value.java    |     0
 .../hive/service/cli/thrift/TI64Column.java   |     0
 .../hive/service/cli/thrift/TI64Value.java    |     0
 .../service/cli/thrift/TMapTypeEntry.java     |     0
 .../service/cli/thrift/TOpenSessionReq.java   |     0
 .../service/cli/thrift/TOpenSessionResp.java  |     0
 .../service/cli/thrift/TOperationHandle.java  |     0
 .../service/cli/thrift/TOperationState.java   |     0
 .../service/cli/thrift/TOperationType.java    |     0
 .../cli/thrift/TPrimitiveTypeEntry.java       |     0
 .../service/cli/thrift/TProtocolVersion.java  |     0
 .../cli/thrift/TRenewDelegationTokenReq.java  |     0
 .../cli/thrift/TRenewDelegationTokenResp.java |     0
 .../apache/hive/service/cli/thrift/TRow.java  |     0
 .../hive/service/cli/thrift/TRowSet.java      |     0
 .../service/cli/thrift/TSessionHandle.java    |     0
 .../hive/service/cli/thrift/TStatus.java      |     0
 .../hive/service/cli/thrift/TStatusCode.java  |     0
 .../service/cli/thrift/TStringColumn.java     |     0
 .../hive/service/cli/thrift/TStringValue.java |     0
 .../service/cli/thrift/TStructTypeEntry.java  |     0
 .../hive/service/cli/thrift/TTableSchema.java |     0
 .../hive/service/cli/thrift/TTypeDesc.java    |     0
 .../hive/service/cli/thrift/TTypeEntry.java   |     0
 .../hive/service/cli/thrift/TTypeId.java      |     0
 .../cli/thrift/TTypeQualifierValue.java       |     0
 .../service/cli/thrift/TTypeQualifiers.java   |     0
 .../service/cli/thrift/TUnionTypeEntry.java   |     0
 .../cli/thrift/TUserDefinedTypeEntry.java     |     0
 .../apache/hive/service/AbstractService.java  |     0
 .../apache/hive/service/CompositeService.java |     0
 .../org/apache/hive/service/CookieSigner.java |     0
 .../hive/service/ServiceOperations.java       |     0
 .../org/apache/hive/service/ServiceUtils.java |     0
 .../hive/service/auth/HiveAuthFactory.java    |     0
 .../hive/service/auth/HttpAuthUtils.java      |     0
 .../hive/service/auth/KerberosSaslHelper.java |     0
 .../hive/service/auth/PlainSaslHelper.java    |     0
 .../service/auth/TSetIpAddressProcessor.java  |     0
 .../apache/hive/service/cli/CLIService.java   |     0
 .../org/apache/hive/service/cli/Column.java   |     0
 .../hive/service/cli/ColumnBasedSet.java      |     0
 .../hive/service/cli/ColumnDescriptor.java    |     0
 .../apache/hive/service/cli/ColumnValue.java  |     0
 .../service/cli/EmbeddedCLIServiceClient.java |     0
 .../hive/service/cli/FetchOrientation.java    |     0
 .../apache/hive/service/cli/GetInfoType.java  |     0
 .../apache/hive/service/cli/GetInfoValue.java |     0
 .../org/apache/hive/service/cli/Handle.java   |     0
 .../hive/service/cli/HandleIdentifier.java    |     0
 .../hive/service/cli/HiveSQLException.java    |     0
 .../apache/hive/service/cli/ICLIService.java  |     0
 .../hive/service/cli/OperationHandle.java     |     0
 .../hive/service/cli/OperationState.java      |     0
 .../hive/service/cli/OperationType.java       |     0
 .../hive/service/cli/PatternOrIdentifier.java |     0
 .../apache/hive/service/cli/RowBasedSet.java  |     0
 .../org/apache/hive/service/cli/RowSet.java   |     0
 .../hive/service/cli/RowSetFactory.java       |     0
 .../hive/service/cli/SessionHandle.java       |     0
 .../apache/hive/service/cli/TableSchema.java  |     0
 .../org/apache/hive/service/cli/Type.java     |     0
 .../hive/service/cli/TypeDescriptor.java      |     0
 .../hive/service/cli/TypeQualifiers.java      |     0
 .../operation/ClassicTableTypeMapping.java    |     0
 .../operation/ExecuteStatementOperation.java  |     0
 .../cli/operation/GetCatalogsOperation.java   |     0
 .../cli/operation/GetColumnsOperation.java    |     0
 .../cli/operation/GetFunctionsOperation.java  |     0
 .../cli/operation/GetSchemasOperation.java    |     0
 .../cli/operation/GetTableTypesOperation.java |     0
 .../cli/operation/GetTablesOperation.java     |     0
 .../cli/operation/GetTypeInfoOperation.java   |     0
 .../cli/operation/HiveCommandOperation.java   |     0
 .../cli/operation/HiveTableTypeMapping.java   |     0
 .../cli/operation/MetadataOperation.java      |     0
 .../hive/service/cli/operation/Operation.java |     0
 .../cli/operation/OperationManager.java       |     0
 .../service/cli/operation/SQLOperation.java   |     0
 .../cli/operation/TableTypeMapping.java       |     0
 .../hive/service/cli/session/HiveSession.java |     0
 .../service/cli/session/HiveSessionBase.java  |     0
 .../service/cli/session/HiveSessionImpl.java  |     0
 .../cli/session/HiveSessionImplwithUGI.java   |     0
 .../service/cli/session/SessionManager.java   |     0
 .../cli/thrift/ThriftBinaryCLIService.java    |     0
 .../service/cli/thrift/ThriftCLIService.java  |     0
 .../cli/thrift/ThriftCLIServiceClient.java    |     0
 .../cli/thrift/ThriftHttpCLIService.java      |     0
 .../service/cli/thrift/ThriftHttpServlet.java |     0
 .../hive/service/server/HiveServer2.java      |     0
 .../server/ThreadWithGarbageCleanup.java      |     0
 .../v2.3.4/if/TCLIService.thrift              |  1174 ++
 .../service/cli/thrift/TArrayTypeEntry.java   |   383 +
 .../service/cli/thrift/TBinaryColumn.java     |   550 +
 .../hive/service/cli/thrift/TBoolColumn.java  |   548 +
 .../hive/service/cli/thrift/TBoolValue.java   |   386 +
 .../hive/service/cli/thrift/TByteColumn.java  |   548 +
 .../hive/service/cli/thrift/TByteValue.java   |   386 +
 .../hive/service/cli/thrift/TCLIService.java  | 15414 ++++++++++++++++
 .../cli/thrift/TCLIServiceConstants.java      |   103 +
 .../cli/thrift/TCancelDelegationTokenReq.java |   491 +
 .../thrift/TCancelDelegationTokenResp.java    |   390 +
 .../cli/thrift/TCancelOperationReq.java       |   390 +
 .../cli/thrift/TCancelOperationResp.java      |   390 +
 .../cli/thrift/TCloseOperationReq.java        |   390 +
 .../cli/thrift/TCloseOperationResp.java       |   390 +
 .../service/cli/thrift/TCloseSessionReq.java  |   390 +
 .../service/cli/thrift/TCloseSessionResp.java |   390 +
 .../hive/service/cli/thrift/TColumn.java      |   732 +
 .../hive/service/cli/thrift/TColumnDesc.java  |   700 +
 .../hive/service/cli/thrift/TColumnValue.java |   671 +
 .../service/cli/thrift/TDoubleColumn.java     |   548 +
 .../hive/service/cli/thrift/TDoubleValue.java |   386 +
 .../cli/thrift/TExecuteStatementReq.java      |   769 +
 .../cli/thrift/TExecuteStatementResp.java     |   505 +
 .../service/cli/thrift/TFetchOrientation.java |    57 +
 .../service/cli/thrift/TFetchResultsReq.java  |   710 +
 .../service/cli/thrift/TFetchResultsResp.java |   608 +
 .../service/cli/thrift/TGetCatalogsReq.java   |   390 +
 .../service/cli/thrift/TGetCatalogsResp.java  |   505 +
 .../service/cli/thrift/TGetColumnsReq.java    |   818 +
 .../service/cli/thrift/TGetColumnsResp.java   |   505 +
 .../cli/thrift/TGetDelegationTokenReq.java    |   592 +
 .../cli/thrift/TGetDelegationTokenResp.java   |   500 +
 .../service/cli/thrift/TGetFunctionsReq.java  |   707 +
 .../service/cli/thrift/TGetFunctionsResp.java |   505 +
 .../hive/service/cli/thrift/TGetInfoReq.java  |   503 +
 .../hive/service/cli/thrift/TGetInfoResp.java |   493 +
 .../hive/service/cli/thrift/TGetInfoType.java |   180 +
 .../service/cli/thrift/TGetInfoValue.java     |   593 +
 .../cli/thrift/TGetOperationStatusReq.java    |   390 +
 .../cli/thrift/TGetOperationStatusResp.java   |   827 +
 .../cli/thrift/TGetResultSetMetadataReq.java  |   390 +
 .../cli/thrift/TGetResultSetMetadataResp.java |   505 +
 .../service/cli/thrift/TGetSchemasReq.java    |   606 +
 .../service/cli/thrift/TGetSchemasResp.java   |   505 +
 .../service/cli/thrift/TGetTableTypesReq.java |   390 +
 .../cli/thrift/TGetTableTypesResp.java        |   505 +
 .../service/cli/thrift/TGetTablesReq.java     |   870 +
 .../service/cli/thrift/TGetTablesResp.java    |   505 +
 .../service/cli/thrift/TGetTypeInfoReq.java   |   390 +
 .../service/cli/thrift/TGetTypeInfoResp.java  |   505 +
 .../service/cli/thrift/THandleIdentifier.java |   506 +
 .../hive/service/cli/thrift/TI16Column.java   |   548 +
 .../hive/service/cli/thrift/TI16Value.java    |   386 +
 .../hive/service/cli/thrift/TI32Column.java   |   548 +
 .../hive/service/cli/thrift/TI32Value.java    |   386 +
 .../hive/service/cli/thrift/TI64Column.java   |   548 +
 .../hive/service/cli/thrift/TI64Value.java    |   386 +
 .../service/cli/thrift/TMapTypeEntry.java     |   478 +
 .../service/cli/thrift/TOpenSessionReq.java   |   785 +
 .../service/cli/thrift/TOpenSessionResp.java  |   790 +
 .../service/cli/thrift/TOperationHandle.java  |   705 +
 .../service/cli/thrift/TOperationState.java   |    63 +
 .../service/cli/thrift/TOperationType.java    |    66 +
 .../cli/thrift/TPrimitiveTypeEntry.java       |   512 +
 .../service/cli/thrift/TProtocolVersion.java  |    63 +
 .../cli/thrift/TRenewDelegationTokenReq.java  |   491 +
 .../cli/thrift/TRenewDelegationTokenResp.java |   390 +
 .../apache/hive/service/cli/thrift/TRow.java  |   439 +
 .../hive/service/cli/thrift/TRowSet.java      |   702 +
 .../service/cli/thrift/TSessionHandle.java    |   390 +
 .../hive/service/cli/thrift/TStatus.java      |   874 +
 .../hive/service/cli/thrift/TStatusCode.java  |    54 +
 .../service/cli/thrift/TStringColumn.java     |   548 +
 .../hive/service/cli/thrift/TStringValue.java |   389 +
 .../service/cli/thrift/TStructTypeEntry.java  |   448 +
 .../hive/service/cli/thrift/TTableSchema.java |   439 +
 .../hive/service/cli/thrift/TTypeDesc.java    |   439 +
 .../hive/service/cli/thrift/TTypeEntry.java   |   610 +
 .../hive/service/cli/thrift/TTypeId.java      |   105 +
 .../cli/thrift/TTypeQualifierValue.java       |   361 +
 .../service/cli/thrift/TTypeQualifiers.java   |   450 +
 .../service/cli/thrift/TUnionTypeEntry.java   |   448 +
 .../cli/thrift/TUserDefinedTypeEntry.java     |   385 +
 .../apache/hive/service/AbstractService.java  |   184 +
 .../apache/hive/service/CompositeService.java |   133 +
 .../org/apache/hive/service/CookieSigner.java |   108 +
 .../hive/service/ServiceOperations.java       |   141 +
 .../org/apache/hive/service/ServiceUtils.java |    44 +
 .../hive/service/auth/HiveAuthFactory.java    |   419 +
 .../hive/service/auth/HttpAuthUtils.java      |   189 +
 .../hive/service/auth/KerberosSaslHelper.java |   111 +
 .../hive/service/auth/PlainSaslHelper.java    |   154 +
 .../service/auth/TSetIpAddressProcessor.java  |   114 +
 .../apache/hive/service/cli/CLIService.java   |   507 +
 .../org/apache/hive/service/cli/Column.java   |   423 +
 .../hive/service/cli/ColumnBasedSet.java      |   149 +
 .../hive/service/cli/ColumnDescriptor.java    |    99 +
 .../apache/hive/service/cli/ColumnValue.java  |   307 +
 .../service/cli/EmbeddedCLIServiceClient.java |   208 +
 .../hive/service/cli/FetchOrientation.java    |    54 +
 .../apache/hive/service/cli/GetInfoType.java  |    96 +
 .../apache/hive/service/cli/GetInfoValue.java |    82 +
 .../org/apache/hive/service/cli/Handle.java   |    78 +
 .../hive/service/cli/HandleIdentifier.java    |   113 +
 .../hive/service/cli/HiveSQLException.java    |   249 +
 .../apache/hive/service/cli/ICLIService.java  |   105 +
 .../hive/service/cli/OperationHandle.java     |   102 +
 .../hive/service/cli/OperationState.java      |   108 +
 .../hive/service/cli/OperationType.java       |    58 +
 .../hive/service/cli/PatternOrIdentifier.java |    47 +
 .../apache/hive/service/cli/RowBasedSet.java  |   140 +
 .../org/apache/hive/service/cli/RowSet.java   |    38 +
 .../hive/service/cli/RowSetFactory.java       |    41 +
 .../hive/service/cli/SessionHandle.java       |    67 +
 .../apache/hive/service/cli/TableSchema.java  |   102 +
 .../org/apache/hive/service/cli/Type.java     |   349 +
 .../hive/service/cli/TypeDescriptor.java      |   159 +
 .../hive/service/cli/TypeQualifiers.java      |   133 +
 .../operation/ClassicTableTypeMapping.java    |    86 +
 .../operation/ExecuteStatementOperation.java  |    83 +
 .../cli/operation/GetCatalogsOperation.java   |    81 +
 .../cli/operation/GetColumnsOperation.java    |   234 +
 .../cli/operation/GetFunctionsOperation.java  |   147 +
 .../cli/operation/GetSchemasOperation.java    |    96 +
 .../cli/operation/GetTableTypesOperation.java |    93 +
 .../cli/operation/GetTablesOperation.java     |   135 +
 .../cli/operation/GetTypeInfoOperation.java   |   142 +
 .../cli/operation/HiveCommandOperation.java   |   213 +
 .../cli/operation/HiveTableTypeMapping.java   |    51 +
 .../cli/operation/MetadataOperation.java      |   134 +
 .../hive/service/cli/operation/Operation.java |   322 +
 .../cli/operation/OperationManager.java       |   284 +
 .../service/cli/operation/SQLOperation.java   |   460 +
 .../cli/operation/TableTypeMapping.java       |    44 +
 .../hive/service/cli/session/HiveSession.java |   156 +
 .../service/cli/session/HiveSessionBase.java  |    90 +
 .../service/cli/session/HiveSessionImpl.java  |   834 +
 .../cli/session/HiveSessionImplwithUGI.java   |   182 +
 .../service/cli/session/SessionManager.java   |   361 +
 .../cli/thrift/ThriftBinaryCLIService.java    |   108 +
 .../service/cli/thrift/ThriftCLIService.java  |   689 +
 .../cli/thrift/ThriftCLIServiceClient.java    |   440 +
 .../cli/thrift/ThriftHttpCLIService.java      |   182 +
 .../service/cli/thrift/ThriftHttpServlet.java |   545 +
 .../hive/service/server/HiveServer2.java      |   291 +
 .../server/ThreadWithGarbageCleanup.java      |    77 +
 295 files changed, 67784 insertions(+), 1 deletion(-)
 rename sql/hive-thriftserver/{ => v1.2.1}/if/TCLIService.thrift (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/AbstractService.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/CompositeService.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/CookieSigner.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/ServiceOperations.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/ServiceUtils.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/CLIService.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/Column.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/ColumnValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/FetchOrientation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/GetInfoType.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/GetInfoValue.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/Handle.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/HiveSQLException.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/ICLIService.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/OperationHandle.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/OperationState.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/OperationType.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/RowBasedSet.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/RowSet.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/RowSetFactory.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/SessionHandle.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/TableSchema.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/Type.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/Operation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/session/HiveSession.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/session/SessionManager.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/server/HiveServer2.java (100%)
 rename sql/hive-thriftserver/{ => v1.2.1}/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java (100%)
 create mode 100644 sql/hive-thriftserver/v2.3.4/if/TCLIService.thrift
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/AbstractService.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/CompositeService.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/CookieSigner.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/ServiceOperations.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/ServiceUtils.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/CLIService.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Column.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/GetInfoType.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Handle.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ICLIService.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationHandle.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationState.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationType.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowSet.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/SessionHandle.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TableSchema.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Type.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/Operation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/server/HiveServer2.java
 create mode 100644 sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java

diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 8aff383d54d1..70d244e1a8e9 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -128,7 +128,9 @@
             </goals>
             <configuration>
               <sources>
-                <source>src/gen/</source>
+                <source>v${hive.version.short}/src/gen/java</source>
+                <source>v${hive.version.short}/src/main/java</source>
+                <source>v${hive.version.short}/src/main/scala</source>
               </sources>
             </configuration>
           </execution>
diff --git a/sql/hive-thriftserver/if/TCLIService.thrift b/sql/hive-thriftserver/v1.2.1/if/TCLIService.thrift
similarity index 100%
rename from sql/hive-thriftserver/if/TCLIService.thrift
rename to sql/hive-thriftserver/v1.2.1/if/TCLIService.thrift
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
rename to sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/AbstractService.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/AbstractService.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CompositeService.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CompositeService.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CookieSigner.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CookieSigner.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/ServiceOperations.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/ServiceOperations.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/ServiceUtils.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/ServiceUtils.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/CLIService.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/CLIService.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Column.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Column.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Column.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Column.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnValue.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/FetchOrientation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/GetInfoType.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/GetInfoType.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Handle.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Handle.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Handle.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Handle.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HiveSQLException.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ICLIService.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ICLIService.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationHandle.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationHandle.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationHandle.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationHandle.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationState.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationState.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationState.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationState.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationType.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationType.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationType.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationType.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowBasedSet.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSet.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowSet.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSet.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowSet.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSetFactory.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/SessionHandle.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/SessionHandle.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/SessionHandle.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/SessionHandle.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TableSchema.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TableSchema.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TableSchema.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TableSchema.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Type.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Type.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Type.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Type.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/Operation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/Operation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/HiveServer2.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/HiveServer2.java
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
similarity index 100%
rename from sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
rename to sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
diff --git a/sql/hive-thriftserver/v2.3.4/if/TCLIService.thrift b/sql/hive-thriftserver/v2.3.4/if/TCLIService.thrift
new file mode 100644
index 000000000000..7cd6fa37cec3
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/if/TCLIService.thrift
@@ -0,0 +1,1174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Coding Conventions for this file:
+//
+// Structs/Enums/Unions
+// * Struct, Enum, and Union names begin with a "T",
+//   and use a capital letter for each new word, with no underscores.
+// * All fields should be declared as either optional or required.
+//
+// Functions
+// * Function names start with a capital letter and have a capital letter for
+//   each new word, with no underscores.
+// * Each function should take exactly one parameter, named TFunctionNameReq,
+//   and should return either void or TFunctionNameResp. This convention allows
+//   incremental updates.
+//
+// Services
+// * Service names begin with the letter "T", use a capital letter for each
+//   new word (with no underscores), and end with the word "Service".
+
+namespace java org.apache.hive.service.cli.thrift
+namespace cpp apache.hive.service.cli.thrift
+
+// List of protocol versions. A new token should be
+// added to the end of this list every time a change is made.
+enum TProtocolVersion {
+  HIVE_CLI_SERVICE_PROTOCOL_V1,
+  
+  // V2 adds support for asynchronous execution
+  HIVE_CLI_SERVICE_PROTOCOL_V2
+
+  // V3 add varchar type, primitive type qualifiers
+  HIVE_CLI_SERVICE_PROTOCOL_V3
+
+  // V4 add decimal precision/scale, char type
+  HIVE_CLI_SERVICE_PROTOCOL_V4
+
+  // V5 adds error details when GetOperationStatus returns in error state
+  HIVE_CLI_SERVICE_PROTOCOL_V5
+
+  // V6 uses binary type for binary payload (was string) and uses columnar result set
+  HIVE_CLI_SERVICE_PROTOCOL_V6
+
+  // V7 adds support for delegation token based connection
+  HIVE_CLI_SERVICE_PROTOCOL_V7
+
+  // V8 adds support for interval types
+  HIVE_CLI_SERVICE_PROTOCOL_V8
+}
+
+enum TTypeId {
+  BOOLEAN_TYPE,
+  TINYINT_TYPE,
+  SMALLINT_TYPE,
+  INT_TYPE,
+  BIGINT_TYPE,
+  FLOAT_TYPE,
+  DOUBLE_TYPE,
+  STRING_TYPE,
+  TIMESTAMP_TYPE,
+  BINARY_TYPE,
+  ARRAY_TYPE,
+  MAP_TYPE,
+  STRUCT_TYPE,
+  UNION_TYPE,
+  USER_DEFINED_TYPE,
+  DECIMAL_TYPE,
+  NULL_TYPE,
+  DATE_TYPE,
+  VARCHAR_TYPE,
+  CHAR_TYPE,
+  INTERVAL_YEAR_MONTH_TYPE,
+  INTERVAL_DAY_TIME_TYPE
+}
+  
+const set<TTypeId> PRIMITIVE_TYPES = [
+  TTypeId.BOOLEAN_TYPE,
+  TTypeId.TINYINT_TYPE,
+  TTypeId.SMALLINT_TYPE,
+  TTypeId.INT_TYPE,
+  TTypeId.BIGINT_TYPE,
+  TTypeId.FLOAT_TYPE,
+  TTypeId.DOUBLE_TYPE,
+  TTypeId.STRING_TYPE,
+  TTypeId.TIMESTAMP_TYPE,
+  TTypeId.BINARY_TYPE,
+  TTypeId.DECIMAL_TYPE,
+  TTypeId.NULL_TYPE,
+  TTypeId.DATE_TYPE,
+  TTypeId.VARCHAR_TYPE,
+  TTypeId.CHAR_TYPE,
+  TTypeId.INTERVAL_YEAR_MONTH_TYPE,
+  TTypeId.INTERVAL_DAY_TIME_TYPE
+]
+
+const set<TTypeId> COMPLEX_TYPES = [
+  TTypeId.ARRAY_TYPE
+  TTypeId.MAP_TYPE
+  TTypeId.STRUCT_TYPE
+  TTypeId.UNION_TYPE
+  TTypeId.USER_DEFINED_TYPE
+]
+
+const set<TTypeId> COLLECTION_TYPES = [
+  TTypeId.ARRAY_TYPE
+  TTypeId.MAP_TYPE
+]
+
+const map<TTypeId,string> TYPE_NAMES = {
+  TTypeId.BOOLEAN_TYPE: "BOOLEAN",
+  TTypeId.TINYINT_TYPE: "TINYINT",
+  TTypeId.SMALLINT_TYPE: "SMALLINT",
+  TTypeId.INT_TYPE: "INT",
+  TTypeId.BIGINT_TYPE: "BIGINT",
+  TTypeId.FLOAT_TYPE: "FLOAT",
+  TTypeId.DOUBLE_TYPE: "DOUBLE",
+  TTypeId.STRING_TYPE: "STRING",
+  TTypeId.TIMESTAMP_TYPE: "TIMESTAMP",
+  TTypeId.BINARY_TYPE: "BINARY",
+  TTypeId.ARRAY_TYPE: "ARRAY",
+  TTypeId.MAP_TYPE: "MAP",
+  TTypeId.STRUCT_TYPE: "STRUCT",
+  TTypeId.UNION_TYPE: "UNIONTYPE",
+  TTypeId.DECIMAL_TYPE: "DECIMAL",
+  TTypeId.NULL_TYPE: "NULL"
+  TTypeId.DATE_TYPE: "DATE"
+  TTypeId.VARCHAR_TYPE: "VARCHAR"
+  TTypeId.CHAR_TYPE: "CHAR"
+  TTypeId.INTERVAL_YEAR_MONTH_TYPE: "INTERVAL_YEAR_MONTH"
+  TTypeId.INTERVAL_DAY_TIME_TYPE: "INTERVAL_DAY_TIME"
+}
+
+// Thrift does not support recursively defined types or forward declarations,
+// which makes it difficult to represent Hive's nested types.
+// To get around these limitations TTypeDesc employs a type list that maps
+// integer "pointers" to TTypeEntry objects. The following examples show
+// how different types are represented using this scheme:
+//
+// "INT":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     }
+//   ]
+// }
+//
+// "ARRAY<INT>":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.array_entry {
+//       object_type_ptr = 1
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     }
+//   ]
+// }
+//
+// "MAP<INT,STRING>":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.map_entry {
+//       key_type_ptr = 1
+//       value_type_ptr = 2
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = STRING_TYPE
+//     }
+//   ]
+// }
+
+typedef i32 TTypeEntryPtr
+
+// Valid TTypeQualifiers key names
+const string CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"
+
+// Type qualifier key name for decimal
+const string PRECISION = "precision"
+const string SCALE = "scale"
+
+union TTypeQualifierValue {
+  1: optional i32 i32Value
+  2: optional string stringValue
+}
+
+// Type qualifiers for primitive type.
+struct TTypeQualifiers {
+  1: required map <string, TTypeQualifierValue> qualifiers
+}
+
+// Type entry for a primitive type.
+struct TPrimitiveTypeEntry {
+  // The primitive type token. This must satisfy the condition
+  // that type is in the PRIMITIVE_TYPES set.
+  1: required TTypeId type
+  2: optional TTypeQualifiers typeQualifiers
+}
+
+// Type entry for an ARRAY type.
+struct TArrayTypeEntry {
+  1: required TTypeEntryPtr objectTypePtr
+}
+
+// Type entry for a MAP type.
+struct TMapTypeEntry {
+  1: required TTypeEntryPtr keyTypePtr
+  2: required TTypeEntryPtr valueTypePtr
+}
+
+// Type entry for a STRUCT type.
+struct TStructTypeEntry {
+  1: required map<string, TTypeEntryPtr> nameToTypePtr
+}
+
+// Type entry for a UNIONTYPE type.
+struct TUnionTypeEntry {
+  1: required map<string, TTypeEntryPtr> nameToTypePtr
+}
+
+struct TUserDefinedTypeEntry {
+  // The fully qualified name of the class implementing this type.
+  1: required string typeClassName
+}
+
+// We use a union here since Thrift does not support inheritance.
+union TTypeEntry {
+  1: TPrimitiveTypeEntry primitiveEntry
+  2: TArrayTypeEntry arrayEntry
+  3: TMapTypeEntry mapEntry
+  4: TStructTypeEntry structEntry
+  5: TUnionTypeEntry unionEntry
+  6: TUserDefinedTypeEntry userDefinedTypeEntry
+}
+
+// Type descriptor for columns.
+struct TTypeDesc {
+  // The "top" type is always the first element of the list.
+  // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE
+  // type, then subsequent elements represent nested types.
+  1: required list<TTypeEntry> types
+}
+
+// A result set column descriptor.
+struct TColumnDesc {
+  // The name of the column
+  1: required string columnName
+
+  // The type descriptor for this column
+  2: required TTypeDesc typeDesc
+  
+  // The ordinal position of this column in the schema
+  3: required i32 position
+
+  4: optional string comment
+}
+
+// Metadata used to describe the schema (column names, types, comments)
+// of result sets.
+struct TTableSchema {
+  1: required list<TColumnDesc> columns
+}
+
+// A Boolean column value.
+struct TBoolValue {
+  // NULL if value is unset.
+  1: optional bool value
+}
+
+// A Byte column value.
+struct TByteValue {
+  // NULL if value is unset.
+  1: optional byte value
+}
+
+// A signed, 16 bit column value.
+struct TI16Value {
+  // NULL if value is unset
+  1: optional i16 value
+}
+
+// A signed, 32 bit column value
+struct TI32Value {
+  // NULL if value is unset
+  1: optional i32 value
+}
+
+// A signed 64 bit column value
+struct TI64Value {
+  // NULL if value is unset
+  1: optional i64 value
+}
+
+// A floating point 64 bit column value
+struct TDoubleValue {
+  // NULL if value is unset
+  1: optional double value
+}
+
+struct TStringValue {
+  // NULL if value is unset
+  1: optional string value
+}
+
+// A single column value in a result set.
+// Note that Hive's type system is richer than Thrift's,
+// so in some cases we have to map multiple Hive types
+// to the same Thrift type. On the client-side this is
+// disambiguated by looking at the Schema of the
+// result set.
+union TColumnValue {
+  1: TBoolValue   boolVal      // BOOLEAN
+  2: TByteValue   byteVal      // TINYINT
+  3: TI16Value    i16Val       // SMALLINT
+  4: TI32Value    i32Val       // INT
+  5: TI64Value    i64Val       // BIGINT, TIMESTAMP
+  6: TDoubleValue doubleVal    // FLOAT, DOUBLE
+  7: TStringValue stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL, INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME
+}
+
+// Represents a row in a rowset.
+struct TRow {
+  1: required list<TColumnValue> colVals
+}
+
+struct TBoolColumn {
+  1: required list<bool> values
+  2: required binary nulls
+}
+
+struct TByteColumn {
+  1: required list<byte> values
+  2: required binary nulls
+}
+
+struct TI16Column {
+  1: required list<i16> values
+  2: required binary nulls
+}
+
+struct TI32Column {
+  1: required list<i32> values
+  2: required binary nulls
+}
+
+struct TI64Column {
+  1: required list<i64> values
+  2: required binary nulls
+}
+
+struct TDoubleColumn {
+  1: required list<double> values
+  2: required binary nulls
+}
+
+struct TStringColumn {
+  1: required list<string> values
+  2: required binary nulls
+}
+
+struct TBinaryColumn {
+  1: required list<binary> values
+  2: required binary nulls
+}
+
+// Note that Hive's type system is richer than Thrift's,
+// so in some cases we have to map multiple Hive types
+// to the same Thrift type. On the client-side this is
+// disambiguated by looking at the Schema of the
+// result set.
+union TColumn {
+  1: TBoolColumn   boolVal      // BOOLEAN
+  2: TByteColumn   byteVal      // TINYINT
+  3: TI16Column    i16Val       // SMALLINT
+  4: TI32Column    i32Val       // INT
+  5: TI64Column    i64Val       // BIGINT, TIMESTAMP
+  6: TDoubleColumn doubleVal    // FLOAT, DOUBLE
+  7: TStringColumn stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, DECIMAL, NULL
+  8: TBinaryColumn binaryVal    // BINARY
+}
+
+// Represents a rowset
+struct TRowSet {
+  // The starting row offset of this rowset.
+  1: required i64 startRowOffset
+  2: required list<TRow> rows
+  3: optional list<TColumn> columns
+}
+
+// The return status code contained in each response.
+enum TStatusCode {
+  SUCCESS_STATUS,
+  SUCCESS_WITH_INFO_STATUS,
+  STILL_EXECUTING_STATUS,
+  ERROR_STATUS,
+  INVALID_HANDLE_STATUS
+}
+
+// The return status of a remote request
+struct TStatus {
+  1: required TStatusCode statusCode
+
+  // If status is SUCCESS_WITH_INFO, info_msgs may be populated with
+  // additional diagnostic information.
+  2: optional list<string> infoMessages
+
+  // If status is ERROR, then the following fields may be set
+  3: optional string sqlState  // as defined in the ISO/IEF CLI specification
+  4: optional i32 errorCode    // internal error code
+  5: optional string errorMessage
+}
+
+// The state of an operation (i.e. a query or other
+// asynchronous operation that generates a result set)
+// on the server.
+enum TOperationState {
+  // The operation has been initialized
+  INITIALIZED_STATE,
+
+  // The operation is running. In this state the result
+  // set is not available.
+  RUNNING_STATE,
+
+  // The operation has completed. When an operation is in
+  // this state its result set may be fetched.
+  FINISHED_STATE,
+
+  // The operation was canceled by a client
+  CANCELED_STATE,
+
+  // The operation was closed by a client
+  CLOSED_STATE,
+
+  // The operation failed due to an error
+  ERROR_STATE,
+
+  // The operation is in an unrecognized state
+  UKNOWN_STATE,
+
+  // The operation is in an pending state
+  PENDING_STATE,
+}
+
+// A string identifier. This is interpreted literally.
+typedef string TIdentifier
+
+// A search pattern.
+//
+// Valid search pattern characters:
+// '_': Any single character.
+// '%': Any sequence of zero or more characters.
+// '\': Escape character used to include special characters,
+//      e.g. '_', '%', '\'. If a '\' precedes a non-special
+//      character it has no special meaning and is interpreted
+//      literally.
+typedef string TPattern
+
+
+// A search pattern or identifier. Used as input
+// parameter for many of the catalog functions.
+typedef string TPatternOrIdentifier
+
+struct THandleIdentifier {
+  // 16 byte globally unique identifier
+  // This is the public ID of the handle and
+  // can be used for reporting.
+  1: required binary guid,
+
+  // 16 byte secret generated by the server
+  // and used to verify that the handle is not
+  // being hijacked by another user.
+  2: required binary secret,
+}
+
+// Client-side handle to persistent
+// session information on the server-side.
+struct TSessionHandle {
+  1: required THandleIdentifier sessionId
+}
+
+// The subtype of an OperationHandle.
+enum TOperationType {
+  EXECUTE_STATEMENT,
+  GET_TYPE_INFO,
+  GET_CATALOGS,
+  GET_SCHEMAS,
+  GET_TABLES,
+  GET_TABLE_TYPES,
+  GET_COLUMNS,
+  GET_FUNCTIONS,
+  UNKNOWN,
+}
+
+// Client-side reference to a task running
+// asynchronously on the server.
+struct TOperationHandle {
+  1: required THandleIdentifier operationId
+  2: required TOperationType operationType
+
+  // If hasResultSet = TRUE, then this operation
+  // generates a result set that can be fetched.
+  // Note that the result set may be empty.
+  //
+  // If hasResultSet = FALSE, then this operation
+  // does not generate a result set, and calling
+  // GetResultSetMetadata or FetchResults against
+  // this OperationHandle will generate an error.
+  3: required bool hasResultSet
+
+  // For operations that don't generate result sets,
+  // modifiedRowCount is either:
+  //
+  // 1) The number of rows that were modified by
+  //    the DML operation (e.g. number of rows inserted,
+  //    number of rows deleted, etc).
+  //
+  // 2) 0 for operations that don't modify or add rows.
+  //
+  // 3) < 0 if the operation is capable of modifiying rows,
+  //    but Hive is unable to determine how many rows were
+  //    modified. For example, Hive's LOAD DATA command
+  //    doesn't generate row count information because
+  //    Hive doesn't inspect the data as it is loaded.
+  //
+  // modifiedRowCount is unset if the operation generates
+  // a result set.
+  4: optional double modifiedRowCount
+}
+
+
+// OpenSession()
+//
+// Open a session (connection) on the server against
+// which operations may be executed.
+struct TOpenSessionReq {
+  // The version of the HiveServer2 protocol that the client is using.
+  1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8
+
+  // Username and password for authentication.
+  // Depending on the authentication scheme being used,
+  // this information may instead be provided by a lower
+  // protocol layer, in which case these fields may be
+  // left unset.
+  2: optional string username
+  3: optional string password
+
+  // Configuration overlay which is applied when the session is
+  // first created.
+  4: optional map<string, string> configuration
+}
+
+struct TOpenSessionResp {
+  1: required TStatus status
+
+  // The protocol version that the server is using.
+  2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8
+
+  // Session Handle
+  3: optional TSessionHandle sessionHandle
+
+  // The configuration settings for this session.
+  4: optional map<string, string> configuration
+}
+
+
+// CloseSession()
+//
+// Closes the specified session and frees any resources
+// currently allocated to that session. Any open
+// operations in that session will be canceled.
+struct TCloseSessionReq {
+  1: required TSessionHandle sessionHandle
+}
+
+struct TCloseSessionResp {
+  1: required TStatus status
+}
+
+
+
+enum TGetInfoType {
+  CLI_MAX_DRIVER_CONNECTIONS =           0,
+  CLI_MAX_CONCURRENT_ACTIVITIES =        1,
+  CLI_DATA_SOURCE_NAME =                 2,
+  CLI_FETCH_DIRECTION =                  8,
+  CLI_SERVER_NAME =                      13,
+  CLI_SEARCH_PATTERN_ESCAPE =            14,
+  CLI_DBMS_NAME =                        17,
+  CLI_DBMS_VER =                         18,
+  CLI_ACCESSIBLE_TABLES =                19,
+  CLI_ACCESSIBLE_PROCEDURES =            20,
+  CLI_CURSOR_COMMIT_BEHAVIOR =           23,
+  CLI_DATA_SOURCE_READ_ONLY =            25,
+  CLI_DEFAULT_TXN_ISOLATION =            26,
+  CLI_IDENTIFIER_CASE =                  28,
+  CLI_IDENTIFIER_QUOTE_CHAR =            29,
+  CLI_MAX_COLUMN_NAME_LEN =              30,
+  CLI_MAX_CURSOR_NAME_LEN =              31,
+  CLI_MAX_SCHEMA_NAME_LEN =              32,
+  CLI_MAX_CATALOG_NAME_LEN =             34,
+  CLI_MAX_TABLE_NAME_LEN =               35,
+  CLI_SCROLL_CONCURRENCY =               43,
+  CLI_TXN_CAPABLE =                      46,
+  CLI_USER_NAME =                        47,
+  CLI_TXN_ISOLATION_OPTION =             72,
+  CLI_INTEGRITY =                        73,
+  CLI_GETDATA_EXTENSIONS =               81,
+  CLI_NULL_COLLATION =                   85,
+  CLI_ALTER_TABLE =                      86,
+  CLI_ORDER_BY_COLUMNS_IN_SELECT =       90,
+  CLI_SPECIAL_CHARACTERS =               94,
+  CLI_MAX_COLUMNS_IN_GROUP_BY =          97,
+  CLI_MAX_COLUMNS_IN_INDEX =             98,
+  CLI_MAX_COLUMNS_IN_ORDER_BY =          99,
+  CLI_MAX_COLUMNS_IN_SELECT =            100,
+  CLI_MAX_COLUMNS_IN_TABLE =             101,
+  CLI_MAX_INDEX_SIZE =                   102,
+  CLI_MAX_ROW_SIZE =                     104,
+  CLI_MAX_STATEMENT_LEN =                105,
+  CLI_MAX_TABLES_IN_SELECT =             106,
+  CLI_MAX_USER_NAME_LEN =                107,
+  CLI_OJ_CAPABILITIES =                  115,
+
+  CLI_XOPEN_CLI_YEAR =                   10000,
+  CLI_CURSOR_SENSITIVITY =               10001,
+  CLI_DESCRIBE_PARAMETER =               10002,
+  CLI_CATALOG_NAME =                     10003,
+  CLI_COLLATION_SEQ =                    10004,
+  CLI_MAX_IDENTIFIER_LEN =               10005,
+}
+
+union TGetInfoValue {
+  1: string stringValue
+  2: i16 smallIntValue
+  3: i32 integerBitmask
+  4: i32 integerFlag
+  5: i32 binaryValue
+  6: i64 lenValue
+}
+
+// GetInfo()
+//
+// This function is based on ODBC's CLIGetInfo() function.
+// The function returns general information about the data source
+// using the same keys as ODBC.
+struct TGetInfoReq {
+  // The session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  2: required TGetInfoType infoType
+}
+
+struct TGetInfoResp {
+  1: required TStatus status
+
+  2: required TGetInfoValue infoValue
+}
+
+
+// ExecuteStatement()
+//
+// Execute a statement.
+// The returned OperationHandle can be used to check on the
+// status of the statement, and to fetch results once the
+// statement has finished executing.
+struct TExecuteStatementReq {
+  // The session to execute the statement against
+  1: required TSessionHandle sessionHandle
+
+  // The statement to be executed (DML, DDL, SET, etc)
+  2: required string statement
+
+  // Configuration properties that are overlayed on top of the
+  // the existing session configuration before this statement
+  // is executed. These properties apply to this statement
+  // only and will not affect the subsequent state of the Session.
+  3: optional map<string, string> confOverlay
+  
+  // Execute asynchronously when runAsync is true
+  4: optional bool runAsync = false
+}
+
+struct TExecuteStatementResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+// GetTypeInfo()
+//
+// Get information about types supported by the HiveServer instance.
+// The information is returned as a result set which can be fetched
+// using the OperationHandle provided in the response.
+//
+// Refer to the documentation for ODBC's CLIGetTypeInfo function for
+// the format of the result set.
+struct TGetTypeInfoReq {
+  // The session to run this request against.
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetTypeInfoResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}  
+
+
+// GetCatalogs()
+//
+// Returns the list of catalogs (databases) 
+// Results are ordered by TABLE_CATALOG 
+//
+// Resultset columns :
+// col1
+// name: TABLE_CAT
+// type: STRING
+// desc: Catalog name. NULL if not applicable.
+//
+struct TGetCatalogsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetCatalogsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetSchemas()
+//
+// Retrieves the schema names available in this database. 
+// The results are ordered by TABLE_CATALOG and TABLE_SCHEM.
+// col1
+// name: TABLE_SCHEM
+// type: STRING
+// desc: schema name
+// col2
+// name: TABLE_CATALOG
+// type: STRING
+// desc: catalog name
+struct TGetSchemasReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog. Must not contain a search pattern.
+  2: optional TIdentifier catalogName
+
+  // schema name or pattern
+  3: optional TPatternOrIdentifier schemaName
+}
+
+struct TGetSchemasResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTables()
+//
+// Returns a list of tables with catalog, schema, and table
+// type information. The information is returned as a result
+// set which can be fetched using the OperationHandle
+// provided in the response.
+// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME
+//
+// Result Set Columns:
+//
+// col1
+// name: TABLE_CAT
+// type: STRING
+// desc: Catalog name. NULL if not applicable.
+//
+// col2
+// name: TABLE_SCHEM
+// type: STRING
+// desc: Schema name.
+//
+// col3
+// name: TABLE_NAME
+// type: STRING
+// desc: Table name.
+//
+// col4
+// name: TABLE_TYPE
+// type: STRING
+// desc: The table type, e.g. "TABLE", "VIEW", etc.
+//
+// col5
+// name: REMARKS
+// type: STRING
+// desc: Comments about the table
+//
+struct TGetTablesReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog or a search pattern.
+  2: optional TPatternOrIdentifier catalogName
+
+  // Name of the schema or a search pattern.
+  3: optional TPatternOrIdentifier schemaName
+
+  // Name of the table or a search pattern.
+  4: optional TPatternOrIdentifier tableName
+
+  // List of table types to match
+  // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY",
+  // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc.
+  5: optional list<string> tableTypes
+}
+
+struct TGetTablesResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTableTypes()
+//
+// Returns the table types available in this database. 
+// The results are ordered by table type. 
+// 
+// col1
+// name: TABLE_TYPE
+// type: STRING
+// desc: Table type name.
+struct TGetTableTypesReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetTableTypesResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetColumns()
+//
+// Returns a list of columns in the specified tables.
+// The information is returned as a result set which can be fetched
+// using the OperationHandle provided in the response.
+// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME, 
+// and ORDINAL_POSITION. 
+//
+// Result Set Columns are the same as those for the ODBC CLIColumns
+// function.
+//
+struct TGetColumnsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog. Must not contain a search pattern.
+  2: optional TIdentifier catalogName
+
+  // Schema name or search pattern
+  3: optional TPatternOrIdentifier schemaName
+
+  // Table name or search pattern
+  4: optional TPatternOrIdentifier tableName
+
+  // Column name or search pattern
+  5: optional TPatternOrIdentifier columnName
+}
+
+struct TGetColumnsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetFunctions()
+//
+// Returns a list of functions supported by the data source. The
+// behavior of this function matches
+// java.sql.DatabaseMetaData.getFunctions() both in terms of
+// inputs and outputs.
+//
+// Result Set Columns:
+//
+// col1
+// name: FUNCTION_CAT
+// type: STRING
+// desc: Function catalog (may be null)
+//
+// col2
+// name: FUNCTION_SCHEM
+// type: STRING
+// desc: Function schema (may be null)
+//
+// col3
+// name: FUNCTION_NAME
+// type: STRING
+// desc: Function name. This is the name used to invoke the function.
+//
+// col4
+// name: REMARKS
+// type: STRING
+// desc: Explanatory comment on the function.
+//
+// col5
+// name: FUNCTION_TYPE
+// type: SMALLINT
+// desc: Kind of function. One of:
+//       * functionResultUnknown - Cannot determine if a return value or a table
+//                                 will be returned.
+//       * functionNoTable       - Does not a return a table.
+//       * functionReturnsTable  - Returns a table.
+//
+// col6
+// name: SPECIFIC_NAME
+// type: STRING
+// desc: The name which uniquely identifies this function within its schema.
+//       In this case this is the fully qualified class name of the class
+//       that implements this function.
+//
+struct TGetFunctionsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // A catalog name; must match the catalog name as it is stored in the
+  // database; "" retrieves those without a catalog; null means
+  // that the catalog name should not be used to narrow the search.
+  2: optional TIdentifier catalogName
+
+  // A schema name pattern; must match the schema name as it is stored
+  // in the database; "" retrieves those without a schema; null means
+  // that the schema name should not be used to narrow the search.
+  3: optional TPatternOrIdentifier schemaName
+
+  // A function name pattern; must match the function name as it is stored
+  // in the database.
+  4: required TPatternOrIdentifier functionName
+}
+
+struct TGetFunctionsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+  
+
+// GetOperationStatus()
+//
+// Get the status of an operation running on the server.
+struct TGetOperationStatusReq {
+  // Session to run this request against
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetOperationStatusResp {
+  1: required TStatus status
+  2: optional TOperationState operationState
+
+  // If operationState is ERROR_STATE, then the following fields may be set
+  // sqlState as defined in the ISO/IEF CLI specification
+  3: optional string sqlState
+
+  // Internal error code
+  4: optional i32 errorCode
+
+  // Error message
+  5: optional string errorMessage
+}
+
+
+// CancelOperation()
+//
+// Cancels processing on the specified operation handle and
+// frees any resources which were allocated.
+struct TCancelOperationReq {
+  // Operation to cancel
+  1: required TOperationHandle operationHandle
+}
+
+struct TCancelOperationResp {
+  1: required TStatus status
+}
+
+
+// CloseOperation()
+//
+// Given an operation in the FINISHED, CANCELED,
+// or ERROR states, CloseOperation() will free
+// all of the resources which were allocated on
+// the server to service the operation.
+struct TCloseOperationReq {
+  1: required TOperationHandle operationHandle
+}
+
+struct TCloseOperationResp {
+  1: required TStatus status
+}
+
+
+// GetResultSetMetadata()
+//
+// Retrieves schema information for the specified operation
+struct TGetResultSetMetadataReq {
+  // Operation for which to fetch result set schema information
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetResultSetMetadataResp {
+  1: required TStatus status
+  2: optional TTableSchema schema
+}
+
+
+enum TFetchOrientation {
+  // Get the next rowset. The fetch offset is ignored.
+  FETCH_NEXT,
+
+  // Get the previous rowset. The fetch offset is ignored.
+  // NOT SUPPORTED
+  FETCH_PRIOR,
+
+  // Return the rowset at the given fetch offset relative
+  // to the current rowset.
+  // NOT SUPPORTED
+  FETCH_RELATIVE,
+
+  // Return the rowset at the specified fetch offset.
+  // NOT SUPPORTED
+  FETCH_ABSOLUTE,
+
+  // Get the first rowset in the result set.
+  FETCH_FIRST,
+
+  // Get the last rowset in the result set.
+  // NOT SUPPORTED
+  FETCH_LAST
+}
+
+// FetchResults()
+//
+// Fetch rows from the server corresponding to
+// a particular OperationHandle.
+struct TFetchResultsReq {
+  // Operation from which to fetch results.
+  1: required TOperationHandle operationHandle
+
+  // The fetch orientation. For V1 this must be either
+  // FETCH_NEXT or FETCH_FIRST. Defaults to FETCH_NEXT.
+  2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
+  
+  // Max number of rows that should be returned in
+  // the rowset.
+  3: required i64 maxRows
+
+  // The type of a fetch results request. 0 represents Query output. 1 represents Log
+  4: optional i16 fetchType = 0
+}
+
+struct TFetchResultsResp {
+  1: required TStatus status
+
+  // TRUE if there are more rows left to fetch from the server.
+  2: optional bool hasMoreRows
+
+  // The rowset. This is optional so that we have the
+  // option in the future of adding alternate formats for
+  // representing result set data, e.g. delimited strings,
+  // binary encoded, etc.
+  3: optional TRowSet results
+}
+
+// GetDelegationToken()
+// Retrieve delegation token for the current user
+struct  TGetDelegationTokenReq {
+  // session handle
+  1: required TSessionHandle sessionHandle
+
+  // userid for the proxy user
+  2: required string owner
+
+  // designated renewer userid
+  3: required string renewer
+}
+
+struct TGetDelegationTokenResp {
+  // status of the request
+  1: required TStatus status
+
+  // delegation token string
+  2: optional string delegationToken
+}
+
+// CancelDelegationToken()
+// Cancel the given delegation token
+struct TCancelDelegationTokenReq {
+  // session handle
+  1: required TSessionHandle sessionHandle
+
+  // delegation token to cancel
+  2: required string delegationToken
+}
+
+struct TCancelDelegationTokenResp {
+  // status of the request
+  1: required TStatus status
+}
+
+// RenewDelegationToken()
+// Renew the given delegation token
+struct TRenewDelegationTokenReq {
+  // session handle
+  1: required TSessionHandle sessionHandle
+
+  // delegation token to renew
+  2: required string delegationToken
+}
+
+struct TRenewDelegationTokenResp {
+  // status of the request
+  1: required TStatus status
+}
+
+service TCLIService {
+
+  TOpenSessionResp OpenSession(1:TOpenSessionReq req);
+
+  TCloseSessionResp CloseSession(1:TCloseSessionReq req);
+
+  TGetInfoResp GetInfo(1:TGetInfoReq req);
+
+  TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req);
+
+  TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req);
+
+  TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req);
+
+  TGetSchemasResp GetSchemas(1:TGetSchemasReq req);
+
+  TGetTablesResp GetTables(1:TGetTablesReq req);
+
+  TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req);
+
+  TGetColumnsResp GetColumns(1:TGetColumnsReq req);
+
+  TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req);
+
+  TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req);
+  
+  TCancelOperationResp CancelOperation(1:TCancelOperationReq req);
+
+  TCloseOperationResp CloseOperation(1:TCloseOperationReq req);
+
+  TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req);
+
+  TFetchResultsResp FetchResults(1:TFetchResultsReq req);
+
+  TGetDelegationTokenResp GetDelegationToken(1:TGetDelegationTokenReq req);
+
+  TCancelDelegationTokenResp CancelDelegationToken(1:TCancelDelegationTokenReq req);
+
+  TRenewDelegationTokenResp RenewDelegationToken(1:TRenewDelegationTokenReq req);
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
new file mode 100644
index 000000000000..6323d34eac73
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
@@ -0,0 +1,383 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TArrayTypeEntry implements org.apache.thrift.TBase<TArrayTypeEntry, TArrayTypeEntry._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TArrayTypeEntry");
+
+  private static final org.apache.thrift.protocol.TField OBJECT_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("objectTypePtr", org.apache.thrift.protocol.TType.I32, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TArrayTypeEntryStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TArrayTypeEntryTupleSchemeFactory());
+  }
+
+  private int objectTypePtr; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    OBJECT_TYPE_PTR((short)1, "objectTypePtr");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // OBJECT_TYPE_PTR
+          return OBJECT_TYPE_PTR;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __OBJECTTYPEPTR_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.OBJECT_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("objectTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TArrayTypeEntry.class, metaDataMap);
+  }
+
+  public TArrayTypeEntry() {
+  }
+
+  public TArrayTypeEntry(
+    int objectTypePtr)
+  {
+    this();
+    this.objectTypePtr = objectTypePtr;
+    setObjectTypePtrIsSet(true);
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TArrayTypeEntry(TArrayTypeEntry other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.objectTypePtr = other.objectTypePtr;
+  }
+
+  public TArrayTypeEntry deepCopy() {
+    return new TArrayTypeEntry(this);
+  }
+
+  @Override
+  public void clear() {
+    setObjectTypePtrIsSet(false);
+    this.objectTypePtr = 0;
+  }
+
+  public int getObjectTypePtr() {
+    return this.objectTypePtr;
+  }
+
+  public void setObjectTypePtr(int objectTypePtr) {
+    this.objectTypePtr = objectTypePtr;
+    setObjectTypePtrIsSet(true);
+  }
+
+  public void unsetObjectTypePtr() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID);
+  }
+
+  /** Returns true if field objectTypePtr is set (has been assigned a value) and false otherwise */
+  public boolean isSetObjectTypePtr() {
+    return EncodingUtils.testBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID);
+  }
+
+  public void setObjectTypePtrIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case OBJECT_TYPE_PTR:
+      if (value == null) {
+        unsetObjectTypePtr();
+      } else {
+        setObjectTypePtr((Integer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case OBJECT_TYPE_PTR:
+      return Integer.valueOf(getObjectTypePtr());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case OBJECT_TYPE_PTR:
+      return isSetObjectTypePtr();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TArrayTypeEntry)
+      return this.equals((TArrayTypeEntry)that);
+    return false;
+  }
+
+  public boolean equals(TArrayTypeEntry that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_objectTypePtr = true;
+    boolean that_present_objectTypePtr = true;
+    if (this_present_objectTypePtr || that_present_objectTypePtr) {
+      if (!(this_present_objectTypePtr && that_present_objectTypePtr))
+        return false;
+      if (this.objectTypePtr != that.objectTypePtr)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_objectTypePtr = true;
+    builder.append(present_objectTypePtr);
+    if (present_objectTypePtr)
+      builder.append(objectTypePtr);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TArrayTypeEntry other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TArrayTypeEntry typedOther = (TArrayTypeEntry)other;
+
+    lastComparison = Boolean.valueOf(isSetObjectTypePtr()).compareTo(typedOther.isSetObjectTypePtr());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetObjectTypePtr()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.objectTypePtr, typedOther.objectTypePtr);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TArrayTypeEntry(");
+    boolean first = true;
+
+    sb.append("objectTypePtr:");
+    sb.append(this.objectTypePtr);
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetObjectTypePtr()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'objectTypePtr' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TArrayTypeEntryStandardSchemeFactory implements SchemeFactory {
+    public TArrayTypeEntryStandardScheme getScheme() {
+      return new TArrayTypeEntryStandardScheme();
+    }
+  }
+
+  private static class TArrayTypeEntryStandardScheme extends StandardScheme<TArrayTypeEntry> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // OBJECT_TYPE_PTR
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.objectTypePtr = iprot.readI32();
+              struct.setObjectTypePtrIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      oprot.writeFieldBegin(OBJECT_TYPE_PTR_FIELD_DESC);
+      oprot.writeI32(struct.objectTypePtr);
+      oprot.writeFieldEnd();
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TArrayTypeEntryTupleSchemeFactory implements SchemeFactory {
+    public TArrayTypeEntryTupleScheme getScheme() {
+      return new TArrayTypeEntryTupleScheme();
+    }
+  }
+
+  private static class TArrayTypeEntryTupleScheme extends TupleScheme<TArrayTypeEntry> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeI32(struct.objectTypePtr);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.objectTypePtr = iprot.readI32();
+      struct.setObjectTypePtrIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
new file mode 100644
index 000000000000..6b1b054d1aca
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
@@ -0,0 +1,550 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TBinaryColumn implements org.apache.thrift.TBase<TBinaryColumn, TBinaryColumn._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBinaryColumn");
+
+  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TBinaryColumnStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TBinaryColumnTupleSchemeFactory());
+  }
+
+  private List<ByteBuffer> values; // required
+  private ByteBuffer nulls; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUES((short)1, "values"),
+    NULLS((short)2, "nulls");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUES
+          return VALUES;
+        case 2: // NULLS
+          return NULLS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING            , true))));
+    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBinaryColumn.class, metaDataMap);
+  }
+
+  public TBinaryColumn() {
+  }
+
+  public TBinaryColumn(
+    List<ByteBuffer> values,
+    ByteBuffer nulls)
+  {
+    this();
+    this.values = values;
+    this.nulls = nulls;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TBinaryColumn(TBinaryColumn other) {
+    if (other.isSetValues()) {
+      List<ByteBuffer> __this__values = new ArrayList<ByteBuffer>();
+      for (ByteBuffer other_element : other.values) {
+        ByteBuffer temp_binary_element = org.apache.thrift.TBaseHelper.copyBinary(other_element);
+;
+        __this__values.add(temp_binary_element);
+      }
+      this.values = __this__values;
+    }
+    if (other.isSetNulls()) {
+      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
+;
+    }
+  }
+
+  public TBinaryColumn deepCopy() {
+    return new TBinaryColumn(this);
+  }
+
+  @Override
+  public void clear() {
+    this.values = null;
+    this.nulls = null;
+  }
+
+  public int getValuesSize() {
+    return (this.values == null) ? 0 : this.values.size();
+  }
+
+  public java.util.Iterator<ByteBuffer> getValuesIterator() {
+    return (this.values == null) ? null : this.values.iterator();
+  }
+
+  public void addToValues(ByteBuffer elem) {
+    if (this.values == null) {
+      this.values = new ArrayList<ByteBuffer>();
+    }
+    this.values.add(elem);
+  }
+
+  public List<ByteBuffer> getValues() {
+    return this.values;
+  }
+
+  public void setValues(List<ByteBuffer> values) {
+    this.values = values;
+  }
+
+  public void unsetValues() {
+    this.values = null;
+  }
+
+  /** Returns true if field values is set (has been assigned a value) and false otherwise */
+  public boolean isSetValues() {
+    return this.values != null;
+  }
+
+  public void setValuesIsSet(boolean value) {
+    if (!value) {
+      this.values = null;
+    }
+  }
+
+  public byte[] getNulls() {
+    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
+    return nulls == null ? null : nulls.array();
+  }
+
+  public ByteBuffer bufferForNulls() {
+    return nulls;
+  }
+
+  public void setNulls(byte[] nulls) {
+    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
+  }
+
+  public void setNulls(ByteBuffer nulls) {
+    this.nulls = nulls;
+  }
+
+  public void unsetNulls() {
+    this.nulls = null;
+  }
+
+  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
+  public boolean isSetNulls() {
+    return this.nulls != null;
+  }
+
+  public void setNullsIsSet(boolean value) {
+    if (!value) {
+      this.nulls = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUES:
+      if (value == null) {
+        unsetValues();
+      } else {
+        setValues((List<ByteBuffer>)value);
+      }
+      break;
+
+    case NULLS:
+      if (value == null) {
+        unsetNulls();
+      } else {
+        setNulls((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUES:
+      return getValues();
+
+    case NULLS:
+      return getNulls();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUES:
+      return isSetValues();
+    case NULLS:
+      return isSetNulls();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TBinaryColumn)
+      return this.equals((TBinaryColumn)that);
+    return false;
+  }
+
+  public boolean equals(TBinaryColumn that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_values = true && this.isSetValues();
+    boolean that_present_values = true && that.isSetValues();
+    if (this_present_values || that_present_values) {
+      if (!(this_present_values && that_present_values))
+        return false;
+      if (!this.values.equals(that.values))
+        return false;
+    }
+
+    boolean this_present_nulls = true && this.isSetNulls();
+    boolean that_present_nulls = true && that.isSetNulls();
+    if (this_present_nulls || that_present_nulls) {
+      if (!(this_present_nulls && that_present_nulls))
+        return false;
+      if (!this.nulls.equals(that.nulls))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_values = true && (isSetValues());
+    builder.append(present_values);
+    if (present_values)
+      builder.append(values);
+
+    boolean present_nulls = true && (isSetNulls());
+    builder.append(present_nulls);
+    if (present_nulls)
+      builder.append(nulls);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TBinaryColumn other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TBinaryColumn typedOther = (TBinaryColumn)other;
+
+    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValues()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNulls()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TBinaryColumn(");
+    boolean first = true;
+
+    sb.append("values:");
+    if (this.values == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.values);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("nulls:");
+    if (this.nulls == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetValues()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
+    }
+
+    if (!isSetNulls()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TBinaryColumnStandardSchemeFactory implements SchemeFactory {
+    public TBinaryColumnStandardScheme getScheme() {
+      return new TBinaryColumnStandardScheme();
+    }
+  }
+
+  private static class TBinaryColumnStandardScheme extends StandardScheme<TBinaryColumn> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TBinaryColumn struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list110 = iprot.readListBegin();
+                struct.values = new ArrayList<ByteBuffer>(_list110.size);
+                for (int _i111 = 0; _i111 < _list110.size; ++_i111)
+                {
+                  ByteBuffer _elem112; // optional
+                  _elem112 = iprot.readBinary();
+                  struct.values.add(_elem112);
+                }
+                iprot.readListEnd();
+              }
+              struct.setValuesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // NULLS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.nulls = iprot.readBinary();
+              struct.setNullsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TBinaryColumn struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.values != null) {
+        oprot.writeFieldBegin(VALUES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size()));
+          for (ByteBuffer _iter113 : struct.values)
+          {
+            oprot.writeBinary(_iter113);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.nulls != null) {
+        oprot.writeFieldBegin(NULLS_FIELD_DESC);
+        oprot.writeBinary(struct.nulls);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TBinaryColumnTupleSchemeFactory implements SchemeFactory {
+    public TBinaryColumnTupleScheme getScheme() {
+      return new TBinaryColumnTupleScheme();
+    }
+  }
+
+  private static class TBinaryColumnTupleScheme extends TupleScheme<TBinaryColumn> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.values.size());
+        for (ByteBuffer _iter114 : struct.values)
+        {
+          oprot.writeBinary(_iter114);
+        }
+      }
+      oprot.writeBinary(struct.nulls);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list115 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+        struct.values = new ArrayList<ByteBuffer>(_list115.size);
+        for (int _i116 = 0; _i116 < _list115.size; ++_i116)
+        {
+          ByteBuffer _elem117; // optional
+          _elem117 = iprot.readBinary();
+          struct.values.add(_elem117);
+        }
+      }
+      struct.setValuesIsSet(true);
+      struct.nulls = iprot.readBinary();
+      struct.setNullsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
new file mode 100644
index 000000000000..efd571cfdfbb
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
@@ -0,0 +1,548 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TBoolColumn implements org.apache.thrift.TBase<TBoolColumn, TBoolColumn._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolColumn");
+
+  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TBoolColumnStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TBoolColumnTupleSchemeFactory());
+  }
+
+  private List<Boolean> values; // required
+  private ByteBuffer nulls; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUES((short)1, "values"),
+    NULLS((short)2, "nulls");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUES
+          return VALUES;
+        case 2: // NULLS
+          return NULLS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))));
+    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolColumn.class, metaDataMap);
+  }
+
+  public TBoolColumn() {
+  }
+
+  public TBoolColumn(
+    List<Boolean> values,
+    ByteBuffer nulls)
+  {
+    this();
+    this.values = values;
+    this.nulls = nulls;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TBoolColumn(TBoolColumn other) {
+    if (other.isSetValues()) {
+      List<Boolean> __this__values = new ArrayList<Boolean>();
+      for (Boolean other_element : other.values) {
+        __this__values.add(other_element);
+      }
+      this.values = __this__values;
+    }
+    if (other.isSetNulls()) {
+      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
+;
+    }
+  }
+
+  public TBoolColumn deepCopy() {
+    return new TBoolColumn(this);
+  }
+
+  @Override
+  public void clear() {
+    this.values = null;
+    this.nulls = null;
+  }
+
+  public int getValuesSize() {
+    return (this.values == null) ? 0 : this.values.size();
+  }
+
+  public java.util.Iterator<Boolean> getValuesIterator() {
+    return (this.values == null) ? null : this.values.iterator();
+  }
+
+  public void addToValues(boolean elem) {
+    if (this.values == null) {
+      this.values = new ArrayList<Boolean>();
+    }
+    this.values.add(elem);
+  }
+
+  public List<Boolean> getValues() {
+    return this.values;
+  }
+
+  public void setValues(List<Boolean> values) {
+    this.values = values;
+  }
+
+  public void unsetValues() {
+    this.values = null;
+  }
+
+  /** Returns true if field values is set (has been assigned a value) and false otherwise */
+  public boolean isSetValues() {
+    return this.values != null;
+  }
+
+  public void setValuesIsSet(boolean value) {
+    if (!value) {
+      this.values = null;
+    }
+  }
+
+  public byte[] getNulls() {
+    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
+    return nulls == null ? null : nulls.array();
+  }
+
+  public ByteBuffer bufferForNulls() {
+    return nulls;
+  }
+
+  public void setNulls(byte[] nulls) {
+    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
+  }
+
+  public void setNulls(ByteBuffer nulls) {
+    this.nulls = nulls;
+  }
+
+  public void unsetNulls() {
+    this.nulls = null;
+  }
+
+  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
+  public boolean isSetNulls() {
+    return this.nulls != null;
+  }
+
+  public void setNullsIsSet(boolean value) {
+    if (!value) {
+      this.nulls = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUES:
+      if (value == null) {
+        unsetValues();
+      } else {
+        setValues((List<Boolean>)value);
+      }
+      break;
+
+    case NULLS:
+      if (value == null) {
+        unsetNulls();
+      } else {
+        setNulls((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUES:
+      return getValues();
+
+    case NULLS:
+      return getNulls();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUES:
+      return isSetValues();
+    case NULLS:
+      return isSetNulls();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TBoolColumn)
+      return this.equals((TBoolColumn)that);
+    return false;
+  }
+
+  public boolean equals(TBoolColumn that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_values = true && this.isSetValues();
+    boolean that_present_values = true && that.isSetValues();
+    if (this_present_values || that_present_values) {
+      if (!(this_present_values && that_present_values))
+        return false;
+      if (!this.values.equals(that.values))
+        return false;
+    }
+
+    boolean this_present_nulls = true && this.isSetNulls();
+    boolean that_present_nulls = true && that.isSetNulls();
+    if (this_present_nulls || that_present_nulls) {
+      if (!(this_present_nulls && that_present_nulls))
+        return false;
+      if (!this.nulls.equals(that.nulls))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_values = true && (isSetValues());
+    builder.append(present_values);
+    if (present_values)
+      builder.append(values);
+
+    boolean present_nulls = true && (isSetNulls());
+    builder.append(present_nulls);
+    if (present_nulls)
+      builder.append(nulls);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TBoolColumn other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TBoolColumn typedOther = (TBoolColumn)other;
+
+    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValues()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNulls()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TBoolColumn(");
+    boolean first = true;
+
+    sb.append("values:");
+    if (this.values == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.values);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("nulls:");
+    if (this.nulls == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetValues()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
+    }
+
+    if (!isSetNulls()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TBoolColumnStandardSchemeFactory implements SchemeFactory {
+    public TBoolColumnStandardScheme getScheme() {
+      return new TBoolColumnStandardScheme();
+    }
+  }
+
+  private static class TBoolColumnStandardScheme extends StandardScheme<TBoolColumn> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolColumn struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list54 = iprot.readListBegin();
+                struct.values = new ArrayList<Boolean>(_list54.size);
+                for (int _i55 = 0; _i55 < _list54.size; ++_i55)
+                {
+                  boolean _elem56; // optional
+                  _elem56 = iprot.readBool();
+                  struct.values.add(_elem56);
+                }
+                iprot.readListEnd();
+              }
+              struct.setValuesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // NULLS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.nulls = iprot.readBinary();
+              struct.setNullsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolColumn struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.values != null) {
+        oprot.writeFieldBegin(VALUES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, struct.values.size()));
+          for (boolean _iter57 : struct.values)
+          {
+            oprot.writeBool(_iter57);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.nulls != null) {
+        oprot.writeFieldBegin(NULLS_FIELD_DESC);
+        oprot.writeBinary(struct.nulls);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TBoolColumnTupleSchemeFactory implements SchemeFactory {
+    public TBoolColumnTupleScheme getScheme() {
+      return new TBoolColumnTupleScheme();
+    }
+  }
+
+  private static class TBoolColumnTupleScheme extends TupleScheme<TBoolColumn> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.values.size());
+        for (boolean _iter58 : struct.values)
+        {
+          oprot.writeBool(_iter58);
+        }
+      }
+      oprot.writeBinary(struct.nulls);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list59 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, iprot.readI32());
+        struct.values = new ArrayList<Boolean>(_list59.size);
+        for (int _i60 = 0; _i60 < _list59.size; ++_i60)
+        {
+          boolean _elem61; // optional
+          _elem61 = iprot.readBool();
+          struct.values.add(_elem61);
+        }
+      }
+      struct.setValuesIsSet(true);
+      struct.nulls = iprot.readBinary();
+      struct.setNullsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
new file mode 100644
index 000000000000..c7495ee79e4b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
@@ -0,0 +1,386 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TBoolValue implements org.apache.thrift.TBase<TBoolValue, TBoolValue._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolValue");
+
+  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BOOL, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TBoolValueStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TBoolValueTupleSchemeFactory());
+  }
+
+  private boolean value; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUE((short)1, "value");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUE
+          return VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __VALUE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.VALUE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolValue.class, metaDataMap);
+  }
+
+  public TBoolValue() {
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TBoolValue(TBoolValue other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.value = other.value;
+  }
+
+  public TBoolValue deepCopy() {
+    return new TBoolValue(this);
+  }
+
+  @Override
+  public void clear() {
+    setValueIsSet(false);
+    this.value = false;
+  }
+
+  public boolean isValue() {
+    return this.value;
+  }
+
+  public void setValue(boolean value) {
+    this.value = value;
+    setValueIsSet(true);
+  }
+
+  public void unsetValue() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  /** Returns true if field value is set (has been assigned a value) and false otherwise */
+  public boolean isSetValue() {
+    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  public void setValueIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUE:
+      if (value == null) {
+        unsetValue();
+      } else {
+        setValue((Boolean)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUE:
+      return Boolean.valueOf(isValue());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUE:
+      return isSetValue();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TBoolValue)
+      return this.equals((TBoolValue)that);
+    return false;
+  }
+
+  public boolean equals(TBoolValue that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_value = true && this.isSetValue();
+    boolean that_present_value = true && that.isSetValue();
+    if (this_present_value || that_present_value) {
+      if (!(this_present_value && that_present_value))
+        return false;
+      if (this.value != that.value)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_value = true && (isSetValue());
+    builder.append(present_value);
+    if (present_value)
+      builder.append(value);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TBoolValue other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TBoolValue typedOther = (TBoolValue)other;
+
+    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValue()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TBoolValue(");
+    boolean first = true;
+
+    if (isSetValue()) {
+      sb.append("value:");
+      sb.append(this.value);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TBoolValueStandardSchemeFactory implements SchemeFactory {
+    public TBoolValueStandardScheme getScheme() {
+      return new TBoolValueStandardScheme();
+    }
+  }
+
+  private static class TBoolValueStandardScheme extends StandardScheme<TBoolValue> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolValue struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUE
+            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
+              struct.value = iprot.readBool();
+              struct.setValueIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolValue struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.isSetValue()) {
+        oprot.writeFieldBegin(VALUE_FIELD_DESC);
+        oprot.writeBool(struct.value);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TBoolValueTupleSchemeFactory implements SchemeFactory {
+    public TBoolValueTupleScheme getScheme() {
+      return new TBoolValueTupleScheme();
+    }
+  }
+
+  private static class TBoolValueTupleScheme extends TupleScheme<TBoolValue> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetValue()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetValue()) {
+        oprot.writeBool(struct.value);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.value = iprot.readBool();
+        struct.setValueIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
new file mode 100644
index 000000000000..169bfdeab3ee
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
@@ -0,0 +1,548 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TByteColumn implements org.apache.thrift.TBase<TByteColumn, TByteColumn._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteColumn");
+
+  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TByteColumnStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TByteColumnTupleSchemeFactory());
+  }
+
+  private List<Byte> values; // required
+  private ByteBuffer nulls; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUES((short)1, "values"),
+    NULLS((short)2, "nulls");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUES
+          return VALUES;
+        case 2: // NULLS
+          return NULLS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE))));
+    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteColumn.class, metaDataMap);
+  }
+
+  public TByteColumn() {
+  }
+
+  public TByteColumn(
+    List<Byte> values,
+    ByteBuffer nulls)
+  {
+    this();
+    this.values = values;
+    this.nulls = nulls;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TByteColumn(TByteColumn other) {
+    if (other.isSetValues()) {
+      List<Byte> __this__values = new ArrayList<Byte>();
+      for (Byte other_element : other.values) {
+        __this__values.add(other_element);
+      }
+      this.values = __this__values;
+    }
+    if (other.isSetNulls()) {
+      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
+;
+    }
+  }
+
+  public TByteColumn deepCopy() {
+    return new TByteColumn(this);
+  }
+
+  @Override
+  public void clear() {
+    this.values = null;
+    this.nulls = null;
+  }
+
+  public int getValuesSize() {
+    return (this.values == null) ? 0 : this.values.size();
+  }
+
+  public java.util.Iterator<Byte> getValuesIterator() {
+    return (this.values == null) ? null : this.values.iterator();
+  }
+
+  public void addToValues(byte elem) {
+    if (this.values == null) {
+      this.values = new ArrayList<Byte>();
+    }
+    this.values.add(elem);
+  }
+
+  public List<Byte> getValues() {
+    return this.values;
+  }
+
+  public void setValues(List<Byte> values) {
+    this.values = values;
+  }
+
+  public void unsetValues() {
+    this.values = null;
+  }
+
+  /** Returns true if field values is set (has been assigned a value) and false otherwise */
+  public boolean isSetValues() {
+    return this.values != null;
+  }
+
+  public void setValuesIsSet(boolean value) {
+    if (!value) {
+      this.values = null;
+    }
+  }
+
+  public byte[] getNulls() {
+    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
+    return nulls == null ? null : nulls.array();
+  }
+
+  public ByteBuffer bufferForNulls() {
+    return nulls;
+  }
+
+  public void setNulls(byte[] nulls) {
+    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
+  }
+
+  public void setNulls(ByteBuffer nulls) {
+    this.nulls = nulls;
+  }
+
+  public void unsetNulls() {
+    this.nulls = null;
+  }
+
+  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
+  public boolean isSetNulls() {
+    return this.nulls != null;
+  }
+
+  public void setNullsIsSet(boolean value) {
+    if (!value) {
+      this.nulls = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUES:
+      if (value == null) {
+        unsetValues();
+      } else {
+        setValues((List<Byte>)value);
+      }
+      break;
+
+    case NULLS:
+      if (value == null) {
+        unsetNulls();
+      } else {
+        setNulls((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUES:
+      return getValues();
+
+    case NULLS:
+      return getNulls();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUES:
+      return isSetValues();
+    case NULLS:
+      return isSetNulls();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TByteColumn)
+      return this.equals((TByteColumn)that);
+    return false;
+  }
+
+  public boolean equals(TByteColumn that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_values = true && this.isSetValues();
+    boolean that_present_values = true && that.isSetValues();
+    if (this_present_values || that_present_values) {
+      if (!(this_present_values && that_present_values))
+        return false;
+      if (!this.values.equals(that.values))
+        return false;
+    }
+
+    boolean this_present_nulls = true && this.isSetNulls();
+    boolean that_present_nulls = true && that.isSetNulls();
+    if (this_present_nulls || that_present_nulls) {
+      if (!(this_present_nulls && that_present_nulls))
+        return false;
+      if (!this.nulls.equals(that.nulls))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_values = true && (isSetValues());
+    builder.append(present_values);
+    if (present_values)
+      builder.append(values);
+
+    boolean present_nulls = true && (isSetNulls());
+    builder.append(present_nulls);
+    if (present_nulls)
+      builder.append(nulls);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TByteColumn other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TByteColumn typedOther = (TByteColumn)other;
+
+    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValues()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNulls()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TByteColumn(");
+    boolean first = true;
+
+    sb.append("values:");
+    if (this.values == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.values);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("nulls:");
+    if (this.nulls == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetValues()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
+    }
+
+    if (!isSetNulls()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TByteColumnStandardSchemeFactory implements SchemeFactory {
+    public TByteColumnStandardScheme getScheme() {
+      return new TByteColumnStandardScheme();
+    }
+  }
+
+  private static class TByteColumnStandardScheme extends StandardScheme<TByteColumn> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TByteColumn struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list62 = iprot.readListBegin();
+                struct.values = new ArrayList<Byte>(_list62.size);
+                for (int _i63 = 0; _i63 < _list62.size; ++_i63)
+                {
+                  byte _elem64; // optional
+                  _elem64 = iprot.readByte();
+                  struct.values.add(_elem64);
+                }
+                iprot.readListEnd();
+              }
+              struct.setValuesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // NULLS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.nulls = iprot.readBinary();
+              struct.setNullsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TByteColumn struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.values != null) {
+        oprot.writeFieldBegin(VALUES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, struct.values.size()));
+          for (byte _iter65 : struct.values)
+          {
+            oprot.writeByte(_iter65);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.nulls != null) {
+        oprot.writeFieldBegin(NULLS_FIELD_DESC);
+        oprot.writeBinary(struct.nulls);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TByteColumnTupleSchemeFactory implements SchemeFactory {
+    public TByteColumnTupleScheme getScheme() {
+      return new TByteColumnTupleScheme();
+    }
+  }
+
+  private static class TByteColumnTupleScheme extends TupleScheme<TByteColumn> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.values.size());
+        for (byte _iter66 : struct.values)
+        {
+          oprot.writeByte(_iter66);
+        }
+      }
+      oprot.writeBinary(struct.nulls);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list67 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, iprot.readI32());
+        struct.values = new ArrayList<Byte>(_list67.size);
+        for (int _i68 = 0; _i68 < _list67.size; ++_i68)
+        {
+          byte _elem69; // optional
+          _elem69 = iprot.readByte();
+          struct.values.add(_elem69);
+        }
+      }
+      struct.setValuesIsSet(true);
+      struct.nulls = iprot.readBinary();
+      struct.setNullsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
new file mode 100644
index 000000000000..23d969375996
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
@@ -0,0 +1,386 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TByteValue implements org.apache.thrift.TBase<TByteValue, TByteValue._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteValue");
+
+  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BYTE, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TByteValueStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TByteValueTupleSchemeFactory());
+  }
+
+  private byte value; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUE((short)1, "value");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUE
+          return VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __VALUE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.VALUE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteValue.class, metaDataMap);
+  }
+
+  public TByteValue() {
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TByteValue(TByteValue other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.value = other.value;
+  }
+
+  public TByteValue deepCopy() {
+    return new TByteValue(this);
+  }
+
+  @Override
+  public void clear() {
+    setValueIsSet(false);
+    this.value = 0;
+  }
+
+  public byte getValue() {
+    return this.value;
+  }
+
+  public void setValue(byte value) {
+    this.value = value;
+    setValueIsSet(true);
+  }
+
+  public void unsetValue() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  /** Returns true if field value is set (has been assigned a value) and false otherwise */
+  public boolean isSetValue() {
+    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  public void setValueIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUE:
+      if (value == null) {
+        unsetValue();
+      } else {
+        setValue((Byte)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUE:
+      return Byte.valueOf(getValue());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUE:
+      return isSetValue();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TByteValue)
+      return this.equals((TByteValue)that);
+    return false;
+  }
+
+  public boolean equals(TByteValue that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_value = true && this.isSetValue();
+    boolean that_present_value = true && that.isSetValue();
+    if (this_present_value || that_present_value) {
+      if (!(this_present_value && that_present_value))
+        return false;
+      if (this.value != that.value)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_value = true && (isSetValue());
+    builder.append(present_value);
+    if (present_value)
+      builder.append(value);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TByteValue other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TByteValue typedOther = (TByteValue)other;
+
+    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValue()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TByteValue(");
+    boolean first = true;
+
+    if (isSetValue()) {
+      sb.append("value:");
+      sb.append(this.value);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TByteValueStandardSchemeFactory implements SchemeFactory {
+    public TByteValueStandardScheme getScheme() {
+      return new TByteValueStandardScheme();
+    }
+  }
+
+  private static class TByteValueStandardScheme extends StandardScheme<TByteValue> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TByteValue struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUE
+            if (schemeField.type == org.apache.thrift.protocol.TType.BYTE) {
+              struct.value = iprot.readByte();
+              struct.setValueIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TByteValue struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.isSetValue()) {
+        oprot.writeFieldBegin(VALUE_FIELD_DESC);
+        oprot.writeByte(struct.value);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TByteValueTupleSchemeFactory implements SchemeFactory {
+    public TByteValueTupleScheme getScheme() {
+      return new TByteValueTupleScheme();
+    }
+  }
+
+  private static class TByteValueTupleScheme extends TupleScheme<TByteValue> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetValue()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetValue()) {
+        oprot.writeByte(struct.value);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.value = iprot.readByte();
+        struct.setValueIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
new file mode 100644
index 000000000000..54851b8d5131
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
@@ -0,0 +1,15414 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCLIService {
+
+  public interface Iface {
+
+    public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException;
+
+    public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException;
+
+    public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException;
+
+    public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException;
+
+    public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException;
+
+    public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException;
+
+    public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException;
+
+    public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException;
+
+    public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException;
+
+    public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException;
+
+    public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException;
+
+    public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException;
+
+    public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException;
+
+    public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException;
+
+    public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException;
+
+    public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException;
+
+    public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException;
+
+    public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException;
+
+    public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException;
+
+  }
+
+  public interface AsyncIface {
+
+    public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.OpenSession_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.CloseSession_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetInfo_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.ExecuteStatement_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetTypeInfo_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetCatalogs_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetSchemas_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetTables_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetTableTypes_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetColumns_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetFunctions_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetOperationStatus_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.CancelOperation_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.CloseOperation_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetResultSetMetadata_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.FetchResults_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetDelegationToken_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.CancelDelegationToken_call> resultHandler) throws org.apache.thrift.TException;
+
+    public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.RenewDelegationToken_call> resultHandler) throws org.apache.thrift.TException;
+
+  }
+
+  public static class Client extends org.apache.thrift.TServiceClient implements Iface {
+    public static class Factory implements org.apache.thrift.TServiceClientFactory<Client> {
+      public Factory() {}
+      public Client getClient(org.apache.thrift.protocol.TProtocol prot) {
+        return new Client(prot);
+      }
+      public Client getClient(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) {
+        return new Client(iprot, oprot);
+      }
+    }
+
+    public Client(org.apache.thrift.protocol.TProtocol prot)
+    {
+      super(prot, prot);
+    }
+
+    public Client(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) {
+      super(iprot, oprot);
+    }
+
+    public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException
+    {
+      send_OpenSession(req);
+      return recv_OpenSession();
+    }
+
+    public void send_OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException
+    {
+      OpenSession_args args = new OpenSession_args();
+      args.setReq(req);
+      sendBase("OpenSession", args);
+    }
+
+    public TOpenSessionResp recv_OpenSession() throws org.apache.thrift.TException
+    {
+      OpenSession_result result = new OpenSession_result();
+      receiveBase(result, "OpenSession");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "OpenSession failed: unknown result");
+    }
+
+    public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException
+    {
+      send_CloseSession(req);
+      return recv_CloseSession();
+    }
+
+    public void send_CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException
+    {
+      CloseSession_args args = new CloseSession_args();
+      args.setReq(req);
+      sendBase("CloseSession", args);
+    }
+
+    public TCloseSessionResp recv_CloseSession() throws org.apache.thrift.TException
+    {
+      CloseSession_result result = new CloseSession_result();
+      receiveBase(result, "CloseSession");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseSession failed: unknown result");
+    }
+
+    public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException
+    {
+      send_GetInfo(req);
+      return recv_GetInfo();
+    }
+
+    public void send_GetInfo(TGetInfoReq req) throws org.apache.thrift.TException
+    {
+      GetInfo_args args = new GetInfo_args();
+      args.setReq(req);
+      sendBase("GetInfo", args);
+    }
+
+    public TGetInfoResp recv_GetInfo() throws org.apache.thrift.TException
+    {
+      GetInfo_result result = new GetInfo_result();
+      receiveBase(result, "GetInfo");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetInfo failed: unknown result");
+    }
+
+    public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException
+    {
+      send_ExecuteStatement(req);
+      return recv_ExecuteStatement();
+    }
+
+    public void send_ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException
+    {
+      ExecuteStatement_args args = new ExecuteStatement_args();
+      args.setReq(req);
+      sendBase("ExecuteStatement", args);
+    }
+
+    public TExecuteStatementResp recv_ExecuteStatement() throws org.apache.thrift.TException
+    {
+      ExecuteStatement_result result = new ExecuteStatement_result();
+      receiveBase(result, "ExecuteStatement");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "ExecuteStatement failed: unknown result");
+    }
+
+    public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException
+    {
+      send_GetTypeInfo(req);
+      return recv_GetTypeInfo();
+    }
+
+    public void send_GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException
+    {
+      GetTypeInfo_args args = new GetTypeInfo_args();
+      args.setReq(req);
+      sendBase("GetTypeInfo", args);
+    }
+
+    public TGetTypeInfoResp recv_GetTypeInfo() throws org.apache.thrift.TException
+    {
+      GetTypeInfo_result result = new GetTypeInfo_result();
+      receiveBase(result, "GetTypeInfo");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTypeInfo failed: unknown result");
+    }
+
+    public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException
+    {
+      send_GetCatalogs(req);
+      return recv_GetCatalogs();
+    }
+
+    public void send_GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException
+    {
+      GetCatalogs_args args = new GetCatalogs_args();
+      args.setReq(req);
+      sendBase("GetCatalogs", args);
+    }
+
+    public TGetCatalogsResp recv_GetCatalogs() throws org.apache.thrift.TException
+    {
+      GetCatalogs_result result = new GetCatalogs_result();
+      receiveBase(result, "GetCatalogs");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetCatalogs failed: unknown result");
+    }
+
+    public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException
+    {
+      send_GetSchemas(req);
+      return recv_GetSchemas();
+    }
+
+    public void send_GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException
+    {
+      GetSchemas_args args = new GetSchemas_args();
+      args.setReq(req);
+      sendBase("GetSchemas", args);
+    }
+
+    public TGetSchemasResp recv_GetSchemas() throws org.apache.thrift.TException
+    {
+      GetSchemas_result result = new GetSchemas_result();
+      receiveBase(result, "GetSchemas");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetSchemas failed: unknown result");
+    }
+
+    public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException
+    {
+      send_GetTables(req);
+      return recv_GetTables();
+    }
+
+    public void send_GetTables(TGetTablesReq req) throws org.apache.thrift.TException
+    {
+      GetTables_args args = new GetTables_args();
+      args.setReq(req);
+      sendBase("GetTables", args);
+    }
+
+    public TGetTablesResp recv_GetTables() throws org.apache.thrift.TException
+    {
+      GetTables_result result = new GetTables_result();
+      receiveBase(result, "GetTables");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTables failed: unknown result");
+    }
+
+    public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException
+    {
+      send_GetTableTypes(req);
+      return recv_GetTableTypes();
+    }
+
+    public void send_GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException
+    {
+      GetTableTypes_args args = new GetTableTypes_args();
+      args.setReq(req);
+      sendBase("GetTableTypes", args);
+    }
+
+    public TGetTableTypesResp recv_GetTableTypes() throws org.apache.thrift.TException
+    {
+      GetTableTypes_result result = new GetTableTypes_result();
+      receiveBase(result, "GetTableTypes");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTableTypes failed: unknown result");
+    }
+
+    public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException
+    {
+      send_GetColumns(req);
+      return recv_GetColumns();
+    }
+
+    public void send_GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException
+    {
+      GetColumns_args args = new GetColumns_args();
+      args.setReq(req);
+      sendBase("GetColumns", args);
+    }
+
+    public TGetColumnsResp recv_GetColumns() throws org.apache.thrift.TException
+    {
+      GetColumns_result result = new GetColumns_result();
+      receiveBase(result, "GetColumns");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetColumns failed: unknown result");
+    }
+
+    public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException
+    {
+      send_GetFunctions(req);
+      return recv_GetFunctions();
+    }
+
+    public void send_GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException
+    {
+      GetFunctions_args args = new GetFunctions_args();
+      args.setReq(req);
+      sendBase("GetFunctions", args);
+    }
+
+    public TGetFunctionsResp recv_GetFunctions() throws org.apache.thrift.TException
+    {
+      GetFunctions_result result = new GetFunctions_result();
+      receiveBase(result, "GetFunctions");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetFunctions failed: unknown result");
+    }
+
+    public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException
+    {
+      send_GetOperationStatus(req);
+      return recv_GetOperationStatus();
+    }
+
+    public void send_GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException
+    {
+      GetOperationStatus_args args = new GetOperationStatus_args();
+      args.setReq(req);
+      sendBase("GetOperationStatus", args);
+    }
+
+    public TGetOperationStatusResp recv_GetOperationStatus() throws org.apache.thrift.TException
+    {
+      GetOperationStatus_result result = new GetOperationStatus_result();
+      receiveBase(result, "GetOperationStatus");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetOperationStatus failed: unknown result");
+    }
+
+    public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException
+    {
+      send_CancelOperation(req);
+      return recv_CancelOperation();
+    }
+
+    public void send_CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException
+    {
+      CancelOperation_args args = new CancelOperation_args();
+      args.setReq(req);
+      sendBase("CancelOperation", args);
+    }
+
+    public TCancelOperationResp recv_CancelOperation() throws org.apache.thrift.TException
+    {
+      CancelOperation_result result = new CancelOperation_result();
+      receiveBase(result, "CancelOperation");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelOperation failed: unknown result");
+    }
+
+    public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException
+    {
+      send_CloseOperation(req);
+      return recv_CloseOperation();
+    }
+
+    public void send_CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException
+    {
+      CloseOperation_args args = new CloseOperation_args();
+      args.setReq(req);
+      sendBase("CloseOperation", args);
+    }
+
+    public TCloseOperationResp recv_CloseOperation() throws org.apache.thrift.TException
+    {
+      CloseOperation_result result = new CloseOperation_result();
+      receiveBase(result, "CloseOperation");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseOperation failed: unknown result");
+    }
+
+    public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException
+    {
+      send_GetResultSetMetadata(req);
+      return recv_GetResultSetMetadata();
+    }
+
+    public void send_GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException
+    {
+      GetResultSetMetadata_args args = new GetResultSetMetadata_args();
+      args.setReq(req);
+      sendBase("GetResultSetMetadata", args);
+    }
+
+    public TGetResultSetMetadataResp recv_GetResultSetMetadata() throws org.apache.thrift.TException
+    {
+      GetResultSetMetadata_result result = new GetResultSetMetadata_result();
+      receiveBase(result, "GetResultSetMetadata");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetResultSetMetadata failed: unknown result");
+    }
+
+    public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException
+    {
+      send_FetchResults(req);
+      return recv_FetchResults();
+    }
+
+    public void send_FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException
+    {
+      FetchResults_args args = new FetchResults_args();
+      args.setReq(req);
+      sendBase("FetchResults", args);
+    }
+
+    public TFetchResultsResp recv_FetchResults() throws org.apache.thrift.TException
+    {
+      FetchResults_result result = new FetchResults_result();
+      receiveBase(result, "FetchResults");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "FetchResults failed: unknown result");
+    }
+
+    public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException
+    {
+      send_GetDelegationToken(req);
+      return recv_GetDelegationToken();
+    }
+
+    public void send_GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException
+    {
+      GetDelegationToken_args args = new GetDelegationToken_args();
+      args.setReq(req);
+      sendBase("GetDelegationToken", args);
+    }
+
+    public TGetDelegationTokenResp recv_GetDelegationToken() throws org.apache.thrift.TException
+    {
+      GetDelegationToken_result result = new GetDelegationToken_result();
+      receiveBase(result, "GetDelegationToken");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetDelegationToken failed: unknown result");
+    }
+
+    public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException
+    {
+      send_CancelDelegationToken(req);
+      return recv_CancelDelegationToken();
+    }
+
+    public void send_CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException
+    {
+      CancelDelegationToken_args args = new CancelDelegationToken_args();
+      args.setReq(req);
+      sendBase("CancelDelegationToken", args);
+    }
+
+    public TCancelDelegationTokenResp recv_CancelDelegationToken() throws org.apache.thrift.TException
+    {
+      CancelDelegationToken_result result = new CancelDelegationToken_result();
+      receiveBase(result, "CancelDelegationToken");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelDelegationToken failed: unknown result");
+    }
+
+    public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException
+    {
+      send_RenewDelegationToken(req);
+      return recv_RenewDelegationToken();
+    }
+
+    public void send_RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException
+    {
+      RenewDelegationToken_args args = new RenewDelegationToken_args();
+      args.setReq(req);
+      sendBase("RenewDelegationToken", args);
+    }
+
+    public TRenewDelegationTokenResp recv_RenewDelegationToken() throws org.apache.thrift.TException
+    {
+      RenewDelegationToken_result result = new RenewDelegationToken_result();
+      receiveBase(result, "RenewDelegationToken");
+      if (result.isSetSuccess()) {
+        return result.success;
+      }
+      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "RenewDelegationToken failed: unknown result");
+    }
+
+  }
+  public static class AsyncClient extends org.apache.thrift.async.TAsyncClient implements AsyncIface {
+    public static class Factory implements org.apache.thrift.async.TAsyncClientFactory<AsyncClient> {
+      private org.apache.thrift.async.TAsyncClientManager clientManager;
+      private org.apache.thrift.protocol.TProtocolFactory protocolFactory;
+      public Factory(org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.protocol.TProtocolFactory protocolFactory) {
+        this.clientManager = clientManager;
+        this.protocolFactory = protocolFactory;
+      }
+      public AsyncClient getAsyncClient(org.apache.thrift.transport.TNonblockingTransport transport) {
+        return new AsyncClient(protocolFactory, clientManager, transport);
+      }
+    }
+
+    public AsyncClient(org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.transport.TNonblockingTransport transport) {
+      super(protocolFactory, clientManager, transport);
+    }
+
+    public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback<OpenSession_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      OpenSession_call method_call = new OpenSession_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class OpenSession_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TOpenSessionReq req;
+      public OpenSession_call(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback<OpenSession_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("OpenSession", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        OpenSession_args args = new OpenSession_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TOpenSessionResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_OpenSession();
+      }
+    }
+
+    public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback<CloseSession_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      CloseSession_call method_call = new CloseSession_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class CloseSession_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TCloseSessionReq req;
+      public CloseSession_call(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback<CloseSession_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseSession", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        CloseSession_args args = new CloseSession_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TCloseSessionResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_CloseSession();
+      }
+    }
+
+    public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback<GetInfo_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetInfo_call method_call = new GetInfo_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetInfo_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetInfoReq req;
+      public GetInfo_call(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback<GetInfo_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetInfo", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetInfo_args args = new GetInfo_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetInfoResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetInfo();
+      }
+    }
+
+    public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback<ExecuteStatement_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      ExecuteStatement_call method_call = new ExecuteStatement_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class ExecuteStatement_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TExecuteStatementReq req;
+      public ExecuteStatement_call(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback<ExecuteStatement_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("ExecuteStatement", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        ExecuteStatement_args args = new ExecuteStatement_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TExecuteStatementResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_ExecuteStatement();
+      }
+    }
+
+    public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback<GetTypeInfo_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetTypeInfo_call method_call = new GetTypeInfo_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetTypeInfo_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetTypeInfoReq req;
+      public GetTypeInfo_call(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback<GetTypeInfo_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTypeInfo", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetTypeInfo_args args = new GetTypeInfo_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetTypeInfoResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetTypeInfo();
+      }
+    }
+
+    public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback<GetCatalogs_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetCatalogs_call method_call = new GetCatalogs_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetCatalogs_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetCatalogsReq req;
+      public GetCatalogs_call(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback<GetCatalogs_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetCatalogs", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetCatalogs_args args = new GetCatalogs_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetCatalogsResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetCatalogs();
+      }
+    }
+
+    public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback<GetSchemas_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetSchemas_call method_call = new GetSchemas_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetSchemas_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetSchemasReq req;
+      public GetSchemas_call(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback<GetSchemas_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetSchemas", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetSchemas_args args = new GetSchemas_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetSchemasResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetSchemas();
+      }
+    }
+
+    public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback<GetTables_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetTables_call method_call = new GetTables_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetTables_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetTablesReq req;
+      public GetTables_call(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback<GetTables_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTables", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetTables_args args = new GetTables_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetTablesResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetTables();
+      }
+    }
+
+    public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback<GetTableTypes_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetTableTypes_call method_call = new GetTableTypes_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetTableTypes_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetTableTypesReq req;
+      public GetTableTypes_call(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback<GetTableTypes_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTableTypes", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetTableTypes_args args = new GetTableTypes_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetTableTypesResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetTableTypes();
+      }
+    }
+
+    public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback<GetColumns_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetColumns_call method_call = new GetColumns_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetColumns_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetColumnsReq req;
+      public GetColumns_call(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback<GetColumns_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetColumns", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetColumns_args args = new GetColumns_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetColumnsResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetColumns();
+      }
+    }
+
+    public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback<GetFunctions_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetFunctions_call method_call = new GetFunctions_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetFunctions_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetFunctionsReq req;
+      public GetFunctions_call(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback<GetFunctions_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetFunctions", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetFunctions_args args = new GetFunctions_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetFunctionsResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetFunctions();
+      }
+    }
+
+    public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback<GetOperationStatus_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetOperationStatus_call method_call = new GetOperationStatus_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetOperationStatus_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetOperationStatusReq req;
+      public GetOperationStatus_call(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback<GetOperationStatus_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetOperationStatus", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetOperationStatus_args args = new GetOperationStatus_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetOperationStatusResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetOperationStatus();
+      }
+    }
+
+    public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback<CancelOperation_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      CancelOperation_call method_call = new CancelOperation_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class CancelOperation_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TCancelOperationReq req;
+      public CancelOperation_call(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback<CancelOperation_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelOperation", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        CancelOperation_args args = new CancelOperation_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TCancelOperationResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_CancelOperation();
+      }
+    }
+
+    public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback<CloseOperation_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      CloseOperation_call method_call = new CloseOperation_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class CloseOperation_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TCloseOperationReq req;
+      public CloseOperation_call(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback<CloseOperation_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseOperation", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        CloseOperation_args args = new CloseOperation_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TCloseOperationResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_CloseOperation();
+      }
+    }
+
+    public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback<GetResultSetMetadata_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetResultSetMetadata_call method_call = new GetResultSetMetadata_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetResultSetMetadata_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetResultSetMetadataReq req;
+      public GetResultSetMetadata_call(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback<GetResultSetMetadata_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetResultSetMetadata", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetResultSetMetadata_args args = new GetResultSetMetadata_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetResultSetMetadataResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetResultSetMetadata();
+      }
+    }
+
+    public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback<FetchResults_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      FetchResults_call method_call = new FetchResults_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class FetchResults_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TFetchResultsReq req;
+      public FetchResults_call(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback<FetchResults_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("FetchResults", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        FetchResults_args args = new FetchResults_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TFetchResultsResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_FetchResults();
+      }
+    }
+
+    public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<GetDelegationToken_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      GetDelegationToken_call method_call = new GetDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class GetDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TGetDelegationTokenReq req;
+      public GetDelegationToken_call(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<GetDelegationToken_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        GetDelegationToken_args args = new GetDelegationToken_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TGetDelegationTokenResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_GetDelegationToken();
+      }
+    }
+
+    public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<CancelDelegationToken_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      CancelDelegationToken_call method_call = new CancelDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class CancelDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TCancelDelegationTokenReq req;
+      public CancelDelegationToken_call(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<CancelDelegationToken_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        CancelDelegationToken_args args = new CancelDelegationToken_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TCancelDelegationTokenResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_CancelDelegationToken();
+      }
+    }
+
+    public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<RenewDelegationToken_call> resultHandler) throws org.apache.thrift.TException {
+      checkReady();
+      RenewDelegationToken_call method_call = new RenewDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
+      this.___currentMethod = method_call;
+      ___manager.call(method_call);
+    }
+
+    public static class RenewDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
+      private TRenewDelegationTokenReq req;
+      public RenewDelegationToken_call(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<RenewDelegationToken_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+        super(client, protocolFactory, transport, resultHandler, false);
+        this.req = req;
+      }
+
+      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
+        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("RenewDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
+        RenewDelegationToken_args args = new RenewDelegationToken_args();
+        args.setReq(req);
+        args.write(prot);
+        prot.writeMessageEnd();
+      }
+
+      public TRenewDelegationTokenResp getResult() throws org.apache.thrift.TException {
+        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
+          throw new IllegalStateException("Method call not finished!");
+        }
+        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
+        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
+        return (new Client(prot)).recv_RenewDelegationToken();
+      }
+    }
+
+  }
+
+  public static class Processor<I extends Iface> extends org.apache.thrift.TBaseProcessor<I> implements org.apache.thrift.TProcessor {
+    private static final Logger LOGGER = LoggerFactory.getLogger(Processor.class.getName());
+    public Processor(I iface) {
+      super(iface, getProcessMap(new HashMap<String, org.apache.thrift.ProcessFunction<I, ? extends org.apache.thrift.TBase>>()));
+    }
+
+    protected Processor(I iface, Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> processMap) {
+      super(iface, getProcessMap(processMap));
+    }
+
+    private static <I extends Iface> Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> getProcessMap(Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> processMap) {
+      processMap.put("OpenSession", new OpenSession());
+      processMap.put("CloseSession", new CloseSession());
+      processMap.put("GetInfo", new GetInfo());
+      processMap.put("ExecuteStatement", new ExecuteStatement());
+      processMap.put("GetTypeInfo", new GetTypeInfo());
+      processMap.put("GetCatalogs", new GetCatalogs());
+      processMap.put("GetSchemas", new GetSchemas());
+      processMap.put("GetTables", new GetTables());
+      processMap.put("GetTableTypes", new GetTableTypes());
+      processMap.put("GetColumns", new GetColumns());
+      processMap.put("GetFunctions", new GetFunctions());
+      processMap.put("GetOperationStatus", new GetOperationStatus());
+      processMap.put("CancelOperation", new CancelOperation());
+      processMap.put("CloseOperation", new CloseOperation());
+      processMap.put("GetResultSetMetadata", new GetResultSetMetadata());
+      processMap.put("FetchResults", new FetchResults());
+      processMap.put("GetDelegationToken", new GetDelegationToken());
+      processMap.put("CancelDelegationToken", new CancelDelegationToken());
+      processMap.put("RenewDelegationToken", new RenewDelegationToken());
+      return processMap;
+    }
+
+    public static class OpenSession<I extends Iface> extends org.apache.thrift.ProcessFunction<I, OpenSession_args> {
+      public OpenSession() {
+        super("OpenSession");
+      }
+
+      public OpenSession_args getEmptyArgsInstance() {
+        return new OpenSession_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public OpenSession_result getResult(I iface, OpenSession_args args) throws org.apache.thrift.TException {
+        OpenSession_result result = new OpenSession_result();
+        result.success = iface.OpenSession(args.req);
+        return result;
+      }
+    }
+
+    public static class CloseSession<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CloseSession_args> {
+      public CloseSession() {
+        super("CloseSession");
+      }
+
+      public CloseSession_args getEmptyArgsInstance() {
+        return new CloseSession_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public CloseSession_result getResult(I iface, CloseSession_args args) throws org.apache.thrift.TException {
+        CloseSession_result result = new CloseSession_result();
+        result.success = iface.CloseSession(args.req);
+        return result;
+      }
+    }
+
+    public static class GetInfo<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetInfo_args> {
+      public GetInfo() {
+        super("GetInfo");
+      }
+
+      public GetInfo_args getEmptyArgsInstance() {
+        return new GetInfo_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetInfo_result getResult(I iface, GetInfo_args args) throws org.apache.thrift.TException {
+        GetInfo_result result = new GetInfo_result();
+        result.success = iface.GetInfo(args.req);
+        return result;
+      }
+    }
+
+    public static class ExecuteStatement<I extends Iface> extends org.apache.thrift.ProcessFunction<I, ExecuteStatement_args> {
+      public ExecuteStatement() {
+        super("ExecuteStatement");
+      }
+
+      public ExecuteStatement_args getEmptyArgsInstance() {
+        return new ExecuteStatement_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public ExecuteStatement_result getResult(I iface, ExecuteStatement_args args) throws org.apache.thrift.TException {
+        ExecuteStatement_result result = new ExecuteStatement_result();
+        result.success = iface.ExecuteStatement(args.req);
+        return result;
+      }
+    }
+
+    public static class GetTypeInfo<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTypeInfo_args> {
+      public GetTypeInfo() {
+        super("GetTypeInfo");
+      }
+
+      public GetTypeInfo_args getEmptyArgsInstance() {
+        return new GetTypeInfo_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetTypeInfo_result getResult(I iface, GetTypeInfo_args args) throws org.apache.thrift.TException {
+        GetTypeInfo_result result = new GetTypeInfo_result();
+        result.success = iface.GetTypeInfo(args.req);
+        return result;
+      }
+    }
+
+    public static class GetCatalogs<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetCatalogs_args> {
+      public GetCatalogs() {
+        super("GetCatalogs");
+      }
+
+      public GetCatalogs_args getEmptyArgsInstance() {
+        return new GetCatalogs_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetCatalogs_result getResult(I iface, GetCatalogs_args args) throws org.apache.thrift.TException {
+        GetCatalogs_result result = new GetCatalogs_result();
+        result.success = iface.GetCatalogs(args.req);
+        return result;
+      }
+    }
+
+    public static class GetSchemas<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetSchemas_args> {
+      public GetSchemas() {
+        super("GetSchemas");
+      }
+
+      public GetSchemas_args getEmptyArgsInstance() {
+        return new GetSchemas_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetSchemas_result getResult(I iface, GetSchemas_args args) throws org.apache.thrift.TException {
+        GetSchemas_result result = new GetSchemas_result();
+        result.success = iface.GetSchemas(args.req);
+        return result;
+      }
+    }
+
+    public static class GetTables<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTables_args> {
+      public GetTables() {
+        super("GetTables");
+      }
+
+      public GetTables_args getEmptyArgsInstance() {
+        return new GetTables_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetTables_result getResult(I iface, GetTables_args args) throws org.apache.thrift.TException {
+        GetTables_result result = new GetTables_result();
+        result.success = iface.GetTables(args.req);
+        return result;
+      }
+    }
+
+    public static class GetTableTypes<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTableTypes_args> {
+      public GetTableTypes() {
+        super("GetTableTypes");
+      }
+
+      public GetTableTypes_args getEmptyArgsInstance() {
+        return new GetTableTypes_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetTableTypes_result getResult(I iface, GetTableTypes_args args) throws org.apache.thrift.TException {
+        GetTableTypes_result result = new GetTableTypes_result();
+        result.success = iface.GetTableTypes(args.req);
+        return result;
+      }
+    }
+
+    public static class GetColumns<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetColumns_args> {
+      public GetColumns() {
+        super("GetColumns");
+      }
+
+      public GetColumns_args getEmptyArgsInstance() {
+        return new GetColumns_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetColumns_result getResult(I iface, GetColumns_args args) throws org.apache.thrift.TException {
+        GetColumns_result result = new GetColumns_result();
+        result.success = iface.GetColumns(args.req);
+        return result;
+      }
+    }
+
+    public static class GetFunctions<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetFunctions_args> {
+      public GetFunctions() {
+        super("GetFunctions");
+      }
+
+      public GetFunctions_args getEmptyArgsInstance() {
+        return new GetFunctions_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetFunctions_result getResult(I iface, GetFunctions_args args) throws org.apache.thrift.TException {
+        GetFunctions_result result = new GetFunctions_result();
+        result.success = iface.GetFunctions(args.req);
+        return result;
+      }
+    }
+
+    public static class GetOperationStatus<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetOperationStatus_args> {
+      public GetOperationStatus() {
+        super("GetOperationStatus");
+      }
+
+      public GetOperationStatus_args getEmptyArgsInstance() {
+        return new GetOperationStatus_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetOperationStatus_result getResult(I iface, GetOperationStatus_args args) throws org.apache.thrift.TException {
+        GetOperationStatus_result result = new GetOperationStatus_result();
+        result.success = iface.GetOperationStatus(args.req);
+        return result;
+      }
+    }
+
+    public static class CancelOperation<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CancelOperation_args> {
+      public CancelOperation() {
+        super("CancelOperation");
+      }
+
+      public CancelOperation_args getEmptyArgsInstance() {
+        return new CancelOperation_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public CancelOperation_result getResult(I iface, CancelOperation_args args) throws org.apache.thrift.TException {
+        CancelOperation_result result = new CancelOperation_result();
+        result.success = iface.CancelOperation(args.req);
+        return result;
+      }
+    }
+
+    public static class CloseOperation<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CloseOperation_args> {
+      public CloseOperation() {
+        super("CloseOperation");
+      }
+
+      public CloseOperation_args getEmptyArgsInstance() {
+        return new CloseOperation_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public CloseOperation_result getResult(I iface, CloseOperation_args args) throws org.apache.thrift.TException {
+        CloseOperation_result result = new CloseOperation_result();
+        result.success = iface.CloseOperation(args.req);
+        return result;
+      }
+    }
+
+    public static class GetResultSetMetadata<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetResultSetMetadata_args> {
+      public GetResultSetMetadata() {
+        super("GetResultSetMetadata");
+      }
+
+      public GetResultSetMetadata_args getEmptyArgsInstance() {
+        return new GetResultSetMetadata_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetResultSetMetadata_result getResult(I iface, GetResultSetMetadata_args args) throws org.apache.thrift.TException {
+        GetResultSetMetadata_result result = new GetResultSetMetadata_result();
+        result.success = iface.GetResultSetMetadata(args.req);
+        return result;
+      }
+    }
+
+    public static class FetchResults<I extends Iface> extends org.apache.thrift.ProcessFunction<I, FetchResults_args> {
+      public FetchResults() {
+        super("FetchResults");
+      }
+
+      public FetchResults_args getEmptyArgsInstance() {
+        return new FetchResults_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public FetchResults_result getResult(I iface, FetchResults_args args) throws org.apache.thrift.TException {
+        FetchResults_result result = new FetchResults_result();
+        result.success = iface.FetchResults(args.req);
+        return result;
+      }
+    }
+
+    public static class GetDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetDelegationToken_args> {
+      public GetDelegationToken() {
+        super("GetDelegationToken");
+      }
+
+      public GetDelegationToken_args getEmptyArgsInstance() {
+        return new GetDelegationToken_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public GetDelegationToken_result getResult(I iface, GetDelegationToken_args args) throws org.apache.thrift.TException {
+        GetDelegationToken_result result = new GetDelegationToken_result();
+        result.success = iface.GetDelegationToken(args.req);
+        return result;
+      }
+    }
+
+    public static class CancelDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CancelDelegationToken_args> {
+      public CancelDelegationToken() {
+        super("CancelDelegationToken");
+      }
+
+      public CancelDelegationToken_args getEmptyArgsInstance() {
+        return new CancelDelegationToken_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public CancelDelegationToken_result getResult(I iface, CancelDelegationToken_args args) throws org.apache.thrift.TException {
+        CancelDelegationToken_result result = new CancelDelegationToken_result();
+        result.success = iface.CancelDelegationToken(args.req);
+        return result;
+      }
+    }
+
+    public static class RenewDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, RenewDelegationToken_args> {
+      public RenewDelegationToken() {
+        super("RenewDelegationToken");
+      }
+
+      public RenewDelegationToken_args getEmptyArgsInstance() {
+        return new RenewDelegationToken_args();
+      }
+
+      protected boolean isOneway() {
+        return false;
+      }
+
+      public RenewDelegationToken_result getResult(I iface, RenewDelegationToken_args args) throws org.apache.thrift.TException {
+        RenewDelegationToken_result result = new RenewDelegationToken_result();
+        result.success = iface.RenewDelegationToken(args.req);
+        return result;
+      }
+    }
+
+  }
+
+  public static class OpenSession_args implements org.apache.thrift.TBase<OpenSession_args, OpenSession_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new OpenSession_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new OpenSession_argsTupleSchemeFactory());
+    }
+
+    private TOpenSessionReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_args.class, metaDataMap);
+    }
+
+    public OpenSession_args() {
+    }
+
+    public OpenSession_args(
+      TOpenSessionReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public OpenSession_args(OpenSession_args other) {
+      if (other.isSetReq()) {
+        this.req = new TOpenSessionReq(other.req);
+      }
+    }
+
+    public OpenSession_args deepCopy() {
+      return new OpenSession_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TOpenSessionReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TOpenSessionReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TOpenSessionReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof OpenSession_args)
+        return this.equals((OpenSession_args)that);
+      return false;
+    }
+
+    public boolean equals(OpenSession_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(OpenSession_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      OpenSession_args typedOther = (OpenSession_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("OpenSession_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class OpenSession_argsStandardSchemeFactory implements SchemeFactory {
+      public OpenSession_argsStandardScheme getScheme() {
+        return new OpenSession_argsStandardScheme();
+      }
+    }
+
+    private static class OpenSession_argsStandardScheme extends StandardScheme<OpenSession_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TOpenSessionReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class OpenSession_argsTupleSchemeFactory implements SchemeFactory {
+      public OpenSession_argsTupleScheme getScheme() {
+        return new OpenSession_argsTupleScheme();
+      }
+    }
+
+    private static class OpenSession_argsTupleScheme extends TupleScheme<OpenSession_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TOpenSessionReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class OpenSession_result implements org.apache.thrift.TBase<OpenSession_result, OpenSession_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new OpenSession_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new OpenSession_resultTupleSchemeFactory());
+    }
+
+    private TOpenSessionResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_result.class, metaDataMap);
+    }
+
+    public OpenSession_result() {
+    }
+
+    public OpenSession_result(
+      TOpenSessionResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public OpenSession_result(OpenSession_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TOpenSessionResp(other.success);
+      }
+    }
+
+    public OpenSession_result deepCopy() {
+      return new OpenSession_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TOpenSessionResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TOpenSessionResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TOpenSessionResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof OpenSession_result)
+        return this.equals((OpenSession_result)that);
+      return false;
+    }
+
+    public boolean equals(OpenSession_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(OpenSession_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      OpenSession_result typedOther = (OpenSession_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("OpenSession_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class OpenSession_resultStandardSchemeFactory implements SchemeFactory {
+      public OpenSession_resultStandardScheme getScheme() {
+        return new OpenSession_resultStandardScheme();
+      }
+    }
+
+    private static class OpenSession_resultStandardScheme extends StandardScheme<OpenSession_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TOpenSessionResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class OpenSession_resultTupleSchemeFactory implements SchemeFactory {
+      public OpenSession_resultTupleScheme getScheme() {
+        return new OpenSession_resultTupleScheme();
+      }
+    }
+
+    private static class OpenSession_resultTupleScheme extends TupleScheme<OpenSession_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TOpenSessionResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class CloseSession_args implements org.apache.thrift.TBase<CloseSession_args, CloseSession_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new CloseSession_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new CloseSession_argsTupleSchemeFactory());
+    }
+
+    private TCloseSessionReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_args.class, metaDataMap);
+    }
+
+    public CloseSession_args() {
+    }
+
+    public CloseSession_args(
+      TCloseSessionReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public CloseSession_args(CloseSession_args other) {
+      if (other.isSetReq()) {
+        this.req = new TCloseSessionReq(other.req);
+      }
+    }
+
+    public CloseSession_args deepCopy() {
+      return new CloseSession_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TCloseSessionReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TCloseSessionReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TCloseSessionReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof CloseSession_args)
+        return this.equals((CloseSession_args)that);
+      return false;
+    }
+
+    public boolean equals(CloseSession_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(CloseSession_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      CloseSession_args typedOther = (CloseSession_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("CloseSession_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class CloseSession_argsStandardSchemeFactory implements SchemeFactory {
+      public CloseSession_argsStandardScheme getScheme() {
+        return new CloseSession_argsStandardScheme();
+      }
+    }
+
+    private static class CloseSession_argsStandardScheme extends StandardScheme<CloseSession_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TCloseSessionReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class CloseSession_argsTupleSchemeFactory implements SchemeFactory {
+      public CloseSession_argsTupleScheme getScheme() {
+        return new CloseSession_argsTupleScheme();
+      }
+    }
+
+    private static class CloseSession_argsTupleScheme extends TupleScheme<CloseSession_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TCloseSessionReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class CloseSession_result implements org.apache.thrift.TBase<CloseSession_result, CloseSession_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new CloseSession_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new CloseSession_resultTupleSchemeFactory());
+    }
+
+    private TCloseSessionResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_result.class, metaDataMap);
+    }
+
+    public CloseSession_result() {
+    }
+
+    public CloseSession_result(
+      TCloseSessionResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public CloseSession_result(CloseSession_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TCloseSessionResp(other.success);
+      }
+    }
+
+    public CloseSession_result deepCopy() {
+      return new CloseSession_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TCloseSessionResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TCloseSessionResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TCloseSessionResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof CloseSession_result)
+        return this.equals((CloseSession_result)that);
+      return false;
+    }
+
+    public boolean equals(CloseSession_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(CloseSession_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      CloseSession_result typedOther = (CloseSession_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("CloseSession_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class CloseSession_resultStandardSchemeFactory implements SchemeFactory {
+      public CloseSession_resultStandardScheme getScheme() {
+        return new CloseSession_resultStandardScheme();
+      }
+    }
+
+    private static class CloseSession_resultStandardScheme extends StandardScheme<CloseSession_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TCloseSessionResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class CloseSession_resultTupleSchemeFactory implements SchemeFactory {
+      public CloseSession_resultTupleScheme getScheme() {
+        return new CloseSession_resultTupleScheme();
+      }
+    }
+
+    private static class CloseSession_resultTupleScheme extends TupleScheme<CloseSession_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TCloseSessionResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetInfo_args implements org.apache.thrift.TBase<GetInfo_args, GetInfo_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetInfo_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetInfo_argsTupleSchemeFactory());
+    }
+
+    private TGetInfoReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_args.class, metaDataMap);
+    }
+
+    public GetInfo_args() {
+    }
+
+    public GetInfo_args(
+      TGetInfoReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetInfo_args(GetInfo_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetInfoReq(other.req);
+      }
+    }
+
+    public GetInfo_args deepCopy() {
+      return new GetInfo_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetInfoReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetInfoReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetInfoReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetInfo_args)
+        return this.equals((GetInfo_args)that);
+      return false;
+    }
+
+    public boolean equals(GetInfo_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetInfo_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetInfo_args typedOther = (GetInfo_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetInfo_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetInfo_argsStandardSchemeFactory implements SchemeFactory {
+      public GetInfo_argsStandardScheme getScheme() {
+        return new GetInfo_argsStandardScheme();
+      }
+    }
+
+    private static class GetInfo_argsStandardScheme extends StandardScheme<GetInfo_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetInfoReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetInfo_argsTupleSchemeFactory implements SchemeFactory {
+      public GetInfo_argsTupleScheme getScheme() {
+        return new GetInfo_argsTupleScheme();
+      }
+    }
+
+    private static class GetInfo_argsTupleScheme extends TupleScheme<GetInfo_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetInfoReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetInfo_result implements org.apache.thrift.TBase<GetInfo_result, GetInfo_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetInfo_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetInfo_resultTupleSchemeFactory());
+    }
+
+    private TGetInfoResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_result.class, metaDataMap);
+    }
+
+    public GetInfo_result() {
+    }
+
+    public GetInfo_result(
+      TGetInfoResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetInfo_result(GetInfo_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetInfoResp(other.success);
+      }
+    }
+
+    public GetInfo_result deepCopy() {
+      return new GetInfo_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetInfoResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetInfoResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetInfoResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetInfo_result)
+        return this.equals((GetInfo_result)that);
+      return false;
+    }
+
+    public boolean equals(GetInfo_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetInfo_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetInfo_result typedOther = (GetInfo_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetInfo_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetInfo_resultStandardSchemeFactory implements SchemeFactory {
+      public GetInfo_resultStandardScheme getScheme() {
+        return new GetInfo_resultStandardScheme();
+      }
+    }
+
+    private static class GetInfo_resultStandardScheme extends StandardScheme<GetInfo_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetInfoResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetInfo_resultTupleSchemeFactory implements SchemeFactory {
+      public GetInfo_resultTupleScheme getScheme() {
+        return new GetInfo_resultTupleScheme();
+      }
+    }
+
+    private static class GetInfo_resultTupleScheme extends TupleScheme<GetInfo_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetInfoResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class ExecuteStatement_args implements org.apache.thrift.TBase<ExecuteStatement_args, ExecuteStatement_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new ExecuteStatement_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new ExecuteStatement_argsTupleSchemeFactory());
+    }
+
+    private TExecuteStatementReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_args.class, metaDataMap);
+    }
+
+    public ExecuteStatement_args() {
+    }
+
+    public ExecuteStatement_args(
+      TExecuteStatementReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public ExecuteStatement_args(ExecuteStatement_args other) {
+      if (other.isSetReq()) {
+        this.req = new TExecuteStatementReq(other.req);
+      }
+    }
+
+    public ExecuteStatement_args deepCopy() {
+      return new ExecuteStatement_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TExecuteStatementReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TExecuteStatementReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TExecuteStatementReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof ExecuteStatement_args)
+        return this.equals((ExecuteStatement_args)that);
+      return false;
+    }
+
+    public boolean equals(ExecuteStatement_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(ExecuteStatement_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      ExecuteStatement_args typedOther = (ExecuteStatement_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("ExecuteStatement_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class ExecuteStatement_argsStandardSchemeFactory implements SchemeFactory {
+      public ExecuteStatement_argsStandardScheme getScheme() {
+        return new ExecuteStatement_argsStandardScheme();
+      }
+    }
+
+    private static class ExecuteStatement_argsStandardScheme extends StandardScheme<ExecuteStatement_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TExecuteStatementReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class ExecuteStatement_argsTupleSchemeFactory implements SchemeFactory {
+      public ExecuteStatement_argsTupleScheme getScheme() {
+        return new ExecuteStatement_argsTupleScheme();
+      }
+    }
+
+    private static class ExecuteStatement_argsTupleScheme extends TupleScheme<ExecuteStatement_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TExecuteStatementReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class ExecuteStatement_result implements org.apache.thrift.TBase<ExecuteStatement_result, ExecuteStatement_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new ExecuteStatement_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new ExecuteStatement_resultTupleSchemeFactory());
+    }
+
+    private TExecuteStatementResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_result.class, metaDataMap);
+    }
+
+    public ExecuteStatement_result() {
+    }
+
+    public ExecuteStatement_result(
+      TExecuteStatementResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public ExecuteStatement_result(ExecuteStatement_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TExecuteStatementResp(other.success);
+      }
+    }
+
+    public ExecuteStatement_result deepCopy() {
+      return new ExecuteStatement_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TExecuteStatementResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TExecuteStatementResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TExecuteStatementResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof ExecuteStatement_result)
+        return this.equals((ExecuteStatement_result)that);
+      return false;
+    }
+
+    public boolean equals(ExecuteStatement_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(ExecuteStatement_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      ExecuteStatement_result typedOther = (ExecuteStatement_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("ExecuteStatement_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class ExecuteStatement_resultStandardSchemeFactory implements SchemeFactory {
+      public ExecuteStatement_resultStandardScheme getScheme() {
+        return new ExecuteStatement_resultStandardScheme();
+      }
+    }
+
+    private static class ExecuteStatement_resultStandardScheme extends StandardScheme<ExecuteStatement_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TExecuteStatementResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class ExecuteStatement_resultTupleSchemeFactory implements SchemeFactory {
+      public ExecuteStatement_resultTupleScheme getScheme() {
+        return new ExecuteStatement_resultTupleScheme();
+      }
+    }
+
+    private static class ExecuteStatement_resultTupleScheme extends TupleScheme<ExecuteStatement_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TExecuteStatementResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetTypeInfo_args implements org.apache.thrift.TBase<GetTypeInfo_args, GetTypeInfo_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetTypeInfo_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetTypeInfo_argsTupleSchemeFactory());
+    }
+
+    private TGetTypeInfoReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_args.class, metaDataMap);
+    }
+
+    public GetTypeInfo_args() {
+    }
+
+    public GetTypeInfo_args(
+      TGetTypeInfoReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetTypeInfo_args(GetTypeInfo_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetTypeInfoReq(other.req);
+      }
+    }
+
+    public GetTypeInfo_args deepCopy() {
+      return new GetTypeInfo_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetTypeInfoReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetTypeInfoReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetTypeInfoReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetTypeInfo_args)
+        return this.equals((GetTypeInfo_args)that);
+      return false;
+    }
+
+    public boolean equals(GetTypeInfo_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetTypeInfo_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetTypeInfo_args typedOther = (GetTypeInfo_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetTypeInfo_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetTypeInfo_argsStandardSchemeFactory implements SchemeFactory {
+      public GetTypeInfo_argsStandardScheme getScheme() {
+        return new GetTypeInfo_argsStandardScheme();
+      }
+    }
+
+    private static class GetTypeInfo_argsStandardScheme extends StandardScheme<GetTypeInfo_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetTypeInfoReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetTypeInfo_argsTupleSchemeFactory implements SchemeFactory {
+      public GetTypeInfo_argsTupleScheme getScheme() {
+        return new GetTypeInfo_argsTupleScheme();
+      }
+    }
+
+    private static class GetTypeInfo_argsTupleScheme extends TupleScheme<GetTypeInfo_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetTypeInfoReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetTypeInfo_result implements org.apache.thrift.TBase<GetTypeInfo_result, GetTypeInfo_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetTypeInfo_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetTypeInfo_resultTupleSchemeFactory());
+    }
+
+    private TGetTypeInfoResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_result.class, metaDataMap);
+    }
+
+    public GetTypeInfo_result() {
+    }
+
+    public GetTypeInfo_result(
+      TGetTypeInfoResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetTypeInfo_result(GetTypeInfo_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetTypeInfoResp(other.success);
+      }
+    }
+
+    public GetTypeInfo_result deepCopy() {
+      return new GetTypeInfo_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetTypeInfoResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetTypeInfoResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetTypeInfoResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetTypeInfo_result)
+        return this.equals((GetTypeInfo_result)that);
+      return false;
+    }
+
+    public boolean equals(GetTypeInfo_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetTypeInfo_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetTypeInfo_result typedOther = (GetTypeInfo_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetTypeInfo_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetTypeInfo_resultStandardSchemeFactory implements SchemeFactory {
+      public GetTypeInfo_resultStandardScheme getScheme() {
+        return new GetTypeInfo_resultStandardScheme();
+      }
+    }
+
+    private static class GetTypeInfo_resultStandardScheme extends StandardScheme<GetTypeInfo_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetTypeInfoResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetTypeInfo_resultTupleSchemeFactory implements SchemeFactory {
+      public GetTypeInfo_resultTupleScheme getScheme() {
+        return new GetTypeInfo_resultTupleScheme();
+      }
+    }
+
+    private static class GetTypeInfo_resultTupleScheme extends TupleScheme<GetTypeInfo_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetTypeInfoResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetCatalogs_args implements org.apache.thrift.TBase<GetCatalogs_args, GetCatalogs_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetCatalogs_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetCatalogs_argsTupleSchemeFactory());
+    }
+
+    private TGetCatalogsReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_args.class, metaDataMap);
+    }
+
+    public GetCatalogs_args() {
+    }
+
+    public GetCatalogs_args(
+      TGetCatalogsReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetCatalogs_args(GetCatalogs_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetCatalogsReq(other.req);
+      }
+    }
+
+    public GetCatalogs_args deepCopy() {
+      return new GetCatalogs_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetCatalogsReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetCatalogsReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetCatalogsReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetCatalogs_args)
+        return this.equals((GetCatalogs_args)that);
+      return false;
+    }
+
+    public boolean equals(GetCatalogs_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetCatalogs_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetCatalogs_args typedOther = (GetCatalogs_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetCatalogs_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetCatalogs_argsStandardSchemeFactory implements SchemeFactory {
+      public GetCatalogs_argsStandardScheme getScheme() {
+        return new GetCatalogs_argsStandardScheme();
+      }
+    }
+
+    private static class GetCatalogs_argsStandardScheme extends StandardScheme<GetCatalogs_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetCatalogsReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetCatalogs_argsTupleSchemeFactory implements SchemeFactory {
+      public GetCatalogs_argsTupleScheme getScheme() {
+        return new GetCatalogs_argsTupleScheme();
+      }
+    }
+
+    private static class GetCatalogs_argsTupleScheme extends TupleScheme<GetCatalogs_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetCatalogsReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetCatalogs_result implements org.apache.thrift.TBase<GetCatalogs_result, GetCatalogs_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetCatalogs_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetCatalogs_resultTupleSchemeFactory());
+    }
+
+    private TGetCatalogsResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_result.class, metaDataMap);
+    }
+
+    public GetCatalogs_result() {
+    }
+
+    public GetCatalogs_result(
+      TGetCatalogsResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetCatalogs_result(GetCatalogs_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetCatalogsResp(other.success);
+      }
+    }
+
+    public GetCatalogs_result deepCopy() {
+      return new GetCatalogs_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetCatalogsResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetCatalogsResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetCatalogsResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetCatalogs_result)
+        return this.equals((GetCatalogs_result)that);
+      return false;
+    }
+
+    public boolean equals(GetCatalogs_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetCatalogs_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetCatalogs_result typedOther = (GetCatalogs_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetCatalogs_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetCatalogs_resultStandardSchemeFactory implements SchemeFactory {
+      public GetCatalogs_resultStandardScheme getScheme() {
+        return new GetCatalogs_resultStandardScheme();
+      }
+    }
+
+    private static class GetCatalogs_resultStandardScheme extends StandardScheme<GetCatalogs_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetCatalogsResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetCatalogs_resultTupleSchemeFactory implements SchemeFactory {
+      public GetCatalogs_resultTupleScheme getScheme() {
+        return new GetCatalogs_resultTupleScheme();
+      }
+    }
+
+    private static class GetCatalogs_resultTupleScheme extends TupleScheme<GetCatalogs_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetCatalogsResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetSchemas_args implements org.apache.thrift.TBase<GetSchemas_args, GetSchemas_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetSchemas_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetSchemas_argsTupleSchemeFactory());
+    }
+
+    private TGetSchemasReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_args.class, metaDataMap);
+    }
+
+    public GetSchemas_args() {
+    }
+
+    public GetSchemas_args(
+      TGetSchemasReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetSchemas_args(GetSchemas_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetSchemasReq(other.req);
+      }
+    }
+
+    public GetSchemas_args deepCopy() {
+      return new GetSchemas_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetSchemasReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetSchemasReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetSchemasReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetSchemas_args)
+        return this.equals((GetSchemas_args)that);
+      return false;
+    }
+
+    public boolean equals(GetSchemas_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetSchemas_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetSchemas_args typedOther = (GetSchemas_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetSchemas_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetSchemas_argsStandardSchemeFactory implements SchemeFactory {
+      public GetSchemas_argsStandardScheme getScheme() {
+        return new GetSchemas_argsStandardScheme();
+      }
+    }
+
+    private static class GetSchemas_argsStandardScheme extends StandardScheme<GetSchemas_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetSchemasReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetSchemas_argsTupleSchemeFactory implements SchemeFactory {
+      public GetSchemas_argsTupleScheme getScheme() {
+        return new GetSchemas_argsTupleScheme();
+      }
+    }
+
+    private static class GetSchemas_argsTupleScheme extends TupleScheme<GetSchemas_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetSchemasReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetSchemas_result implements org.apache.thrift.TBase<GetSchemas_result, GetSchemas_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetSchemas_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetSchemas_resultTupleSchemeFactory());
+    }
+
+    private TGetSchemasResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_result.class, metaDataMap);
+    }
+
+    public GetSchemas_result() {
+    }
+
+    public GetSchemas_result(
+      TGetSchemasResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetSchemas_result(GetSchemas_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetSchemasResp(other.success);
+      }
+    }
+
+    public GetSchemas_result deepCopy() {
+      return new GetSchemas_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetSchemasResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetSchemasResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetSchemasResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetSchemas_result)
+        return this.equals((GetSchemas_result)that);
+      return false;
+    }
+
+    public boolean equals(GetSchemas_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetSchemas_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetSchemas_result typedOther = (GetSchemas_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetSchemas_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetSchemas_resultStandardSchemeFactory implements SchemeFactory {
+      public GetSchemas_resultStandardScheme getScheme() {
+        return new GetSchemas_resultStandardScheme();
+      }
+    }
+
+    private static class GetSchemas_resultStandardScheme extends StandardScheme<GetSchemas_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetSchemasResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetSchemas_resultTupleSchemeFactory implements SchemeFactory {
+      public GetSchemas_resultTupleScheme getScheme() {
+        return new GetSchemas_resultTupleScheme();
+      }
+    }
+
+    private static class GetSchemas_resultTupleScheme extends TupleScheme<GetSchemas_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetSchemasResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetTables_args implements org.apache.thrift.TBase<GetTables_args, GetTables_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetTables_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetTables_argsTupleSchemeFactory());
+    }
+
+    private TGetTablesReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_args.class, metaDataMap);
+    }
+
+    public GetTables_args() {
+    }
+
+    public GetTables_args(
+      TGetTablesReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetTables_args(GetTables_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetTablesReq(other.req);
+      }
+    }
+
+    public GetTables_args deepCopy() {
+      return new GetTables_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetTablesReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetTablesReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetTablesReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetTables_args)
+        return this.equals((GetTables_args)that);
+      return false;
+    }
+
+    public boolean equals(GetTables_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetTables_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetTables_args typedOther = (GetTables_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetTables_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetTables_argsStandardSchemeFactory implements SchemeFactory {
+      public GetTables_argsStandardScheme getScheme() {
+        return new GetTables_argsStandardScheme();
+      }
+    }
+
+    private static class GetTables_argsStandardScheme extends StandardScheme<GetTables_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetTablesReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetTables_argsTupleSchemeFactory implements SchemeFactory {
+      public GetTables_argsTupleScheme getScheme() {
+        return new GetTables_argsTupleScheme();
+      }
+    }
+
+    private static class GetTables_argsTupleScheme extends TupleScheme<GetTables_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetTablesReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetTables_result implements org.apache.thrift.TBase<GetTables_result, GetTables_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetTables_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetTables_resultTupleSchemeFactory());
+    }
+
+    private TGetTablesResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_result.class, metaDataMap);
+    }
+
+    public GetTables_result() {
+    }
+
+    public GetTables_result(
+      TGetTablesResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetTables_result(GetTables_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetTablesResp(other.success);
+      }
+    }
+
+    public GetTables_result deepCopy() {
+      return new GetTables_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetTablesResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetTablesResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetTablesResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetTables_result)
+        return this.equals((GetTables_result)that);
+      return false;
+    }
+
+    public boolean equals(GetTables_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetTables_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetTables_result typedOther = (GetTables_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetTables_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetTables_resultStandardSchemeFactory implements SchemeFactory {
+      public GetTables_resultStandardScheme getScheme() {
+        return new GetTables_resultStandardScheme();
+      }
+    }
+
+    private static class GetTables_resultStandardScheme extends StandardScheme<GetTables_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetTablesResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetTables_resultTupleSchemeFactory implements SchemeFactory {
+      public GetTables_resultTupleScheme getScheme() {
+        return new GetTables_resultTupleScheme();
+      }
+    }
+
+    private static class GetTables_resultTupleScheme extends TupleScheme<GetTables_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetTablesResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetTableTypes_args implements org.apache.thrift.TBase<GetTableTypes_args, GetTableTypes_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetTableTypes_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetTableTypes_argsTupleSchemeFactory());
+    }
+
+    private TGetTableTypesReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_args.class, metaDataMap);
+    }
+
+    public GetTableTypes_args() {
+    }
+
+    public GetTableTypes_args(
+      TGetTableTypesReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetTableTypes_args(GetTableTypes_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetTableTypesReq(other.req);
+      }
+    }
+
+    public GetTableTypes_args deepCopy() {
+      return new GetTableTypes_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetTableTypesReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetTableTypesReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetTableTypesReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetTableTypes_args)
+        return this.equals((GetTableTypes_args)that);
+      return false;
+    }
+
+    public boolean equals(GetTableTypes_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetTableTypes_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetTableTypes_args typedOther = (GetTableTypes_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetTableTypes_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetTableTypes_argsStandardSchemeFactory implements SchemeFactory {
+      public GetTableTypes_argsStandardScheme getScheme() {
+        return new GetTableTypes_argsStandardScheme();
+      }
+    }
+
+    private static class GetTableTypes_argsStandardScheme extends StandardScheme<GetTableTypes_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetTableTypesReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetTableTypes_argsTupleSchemeFactory implements SchemeFactory {
+      public GetTableTypes_argsTupleScheme getScheme() {
+        return new GetTableTypes_argsTupleScheme();
+      }
+    }
+
+    private static class GetTableTypes_argsTupleScheme extends TupleScheme<GetTableTypes_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetTableTypesReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetTableTypes_result implements org.apache.thrift.TBase<GetTableTypes_result, GetTableTypes_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetTableTypes_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetTableTypes_resultTupleSchemeFactory());
+    }
+
+    private TGetTableTypesResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_result.class, metaDataMap);
+    }
+
+    public GetTableTypes_result() {
+    }
+
+    public GetTableTypes_result(
+      TGetTableTypesResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetTableTypes_result(GetTableTypes_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetTableTypesResp(other.success);
+      }
+    }
+
+    public GetTableTypes_result deepCopy() {
+      return new GetTableTypes_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetTableTypesResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetTableTypesResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetTableTypesResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetTableTypes_result)
+        return this.equals((GetTableTypes_result)that);
+      return false;
+    }
+
+    public boolean equals(GetTableTypes_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetTableTypes_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetTableTypes_result typedOther = (GetTableTypes_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetTableTypes_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetTableTypes_resultStandardSchemeFactory implements SchemeFactory {
+      public GetTableTypes_resultStandardScheme getScheme() {
+        return new GetTableTypes_resultStandardScheme();
+      }
+    }
+
+    private static class GetTableTypes_resultStandardScheme extends StandardScheme<GetTableTypes_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetTableTypesResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetTableTypes_resultTupleSchemeFactory implements SchemeFactory {
+      public GetTableTypes_resultTupleScheme getScheme() {
+        return new GetTableTypes_resultTupleScheme();
+      }
+    }
+
+    private static class GetTableTypes_resultTupleScheme extends TupleScheme<GetTableTypes_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetTableTypesResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetColumns_args implements org.apache.thrift.TBase<GetColumns_args, GetColumns_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetColumns_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetColumns_argsTupleSchemeFactory());
+    }
+
+    private TGetColumnsReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_args.class, metaDataMap);
+    }
+
+    public GetColumns_args() {
+    }
+
+    public GetColumns_args(
+      TGetColumnsReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetColumns_args(GetColumns_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetColumnsReq(other.req);
+      }
+    }
+
+    public GetColumns_args deepCopy() {
+      return new GetColumns_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetColumnsReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetColumnsReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetColumnsReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetColumns_args)
+        return this.equals((GetColumns_args)that);
+      return false;
+    }
+
+    public boolean equals(GetColumns_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetColumns_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetColumns_args typedOther = (GetColumns_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetColumns_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetColumns_argsStandardSchemeFactory implements SchemeFactory {
+      public GetColumns_argsStandardScheme getScheme() {
+        return new GetColumns_argsStandardScheme();
+      }
+    }
+
+    private static class GetColumns_argsStandardScheme extends StandardScheme<GetColumns_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetColumnsReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetColumns_argsTupleSchemeFactory implements SchemeFactory {
+      public GetColumns_argsTupleScheme getScheme() {
+        return new GetColumns_argsTupleScheme();
+      }
+    }
+
+    private static class GetColumns_argsTupleScheme extends TupleScheme<GetColumns_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetColumnsReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetColumns_result implements org.apache.thrift.TBase<GetColumns_result, GetColumns_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetColumns_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetColumns_resultTupleSchemeFactory());
+    }
+
+    private TGetColumnsResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_result.class, metaDataMap);
+    }
+
+    public GetColumns_result() {
+    }
+
+    public GetColumns_result(
+      TGetColumnsResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetColumns_result(GetColumns_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetColumnsResp(other.success);
+      }
+    }
+
+    public GetColumns_result deepCopy() {
+      return new GetColumns_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetColumnsResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetColumnsResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetColumnsResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetColumns_result)
+        return this.equals((GetColumns_result)that);
+      return false;
+    }
+
+    public boolean equals(GetColumns_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetColumns_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetColumns_result typedOther = (GetColumns_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetColumns_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetColumns_resultStandardSchemeFactory implements SchemeFactory {
+      public GetColumns_resultStandardScheme getScheme() {
+        return new GetColumns_resultStandardScheme();
+      }
+    }
+
+    private static class GetColumns_resultStandardScheme extends StandardScheme<GetColumns_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetColumnsResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetColumns_resultTupleSchemeFactory implements SchemeFactory {
+      public GetColumns_resultTupleScheme getScheme() {
+        return new GetColumns_resultTupleScheme();
+      }
+    }
+
+    private static class GetColumns_resultTupleScheme extends TupleScheme<GetColumns_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetColumnsResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetFunctions_args implements org.apache.thrift.TBase<GetFunctions_args, GetFunctions_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetFunctions_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetFunctions_argsTupleSchemeFactory());
+    }
+
+    private TGetFunctionsReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_args.class, metaDataMap);
+    }
+
+    public GetFunctions_args() {
+    }
+
+    public GetFunctions_args(
+      TGetFunctionsReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetFunctions_args(GetFunctions_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetFunctionsReq(other.req);
+      }
+    }
+
+    public GetFunctions_args deepCopy() {
+      return new GetFunctions_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetFunctionsReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetFunctionsReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetFunctionsReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetFunctions_args)
+        return this.equals((GetFunctions_args)that);
+      return false;
+    }
+
+    public boolean equals(GetFunctions_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetFunctions_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetFunctions_args typedOther = (GetFunctions_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetFunctions_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetFunctions_argsStandardSchemeFactory implements SchemeFactory {
+      public GetFunctions_argsStandardScheme getScheme() {
+        return new GetFunctions_argsStandardScheme();
+      }
+    }
+
+    private static class GetFunctions_argsStandardScheme extends StandardScheme<GetFunctions_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetFunctionsReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetFunctions_argsTupleSchemeFactory implements SchemeFactory {
+      public GetFunctions_argsTupleScheme getScheme() {
+        return new GetFunctions_argsTupleScheme();
+      }
+    }
+
+    private static class GetFunctions_argsTupleScheme extends TupleScheme<GetFunctions_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetFunctionsReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetFunctions_result implements org.apache.thrift.TBase<GetFunctions_result, GetFunctions_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetFunctions_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetFunctions_resultTupleSchemeFactory());
+    }
+
+    private TGetFunctionsResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_result.class, metaDataMap);
+    }
+
+    public GetFunctions_result() {
+    }
+
+    public GetFunctions_result(
+      TGetFunctionsResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetFunctions_result(GetFunctions_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetFunctionsResp(other.success);
+      }
+    }
+
+    public GetFunctions_result deepCopy() {
+      return new GetFunctions_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetFunctionsResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetFunctionsResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetFunctionsResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetFunctions_result)
+        return this.equals((GetFunctions_result)that);
+      return false;
+    }
+
+    public boolean equals(GetFunctions_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetFunctions_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetFunctions_result typedOther = (GetFunctions_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetFunctions_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetFunctions_resultStandardSchemeFactory implements SchemeFactory {
+      public GetFunctions_resultStandardScheme getScheme() {
+        return new GetFunctions_resultStandardScheme();
+      }
+    }
+
+    private static class GetFunctions_resultStandardScheme extends StandardScheme<GetFunctions_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetFunctionsResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetFunctions_resultTupleSchemeFactory implements SchemeFactory {
+      public GetFunctions_resultTupleScheme getScheme() {
+        return new GetFunctions_resultTupleScheme();
+      }
+    }
+
+    private static class GetFunctions_resultTupleScheme extends TupleScheme<GetFunctions_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetFunctionsResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetOperationStatus_args implements org.apache.thrift.TBase<GetOperationStatus_args, GetOperationStatus_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetOperationStatus_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetOperationStatus_argsTupleSchemeFactory());
+    }
+
+    private TGetOperationStatusReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_args.class, metaDataMap);
+    }
+
+    public GetOperationStatus_args() {
+    }
+
+    public GetOperationStatus_args(
+      TGetOperationStatusReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetOperationStatus_args(GetOperationStatus_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetOperationStatusReq(other.req);
+      }
+    }
+
+    public GetOperationStatus_args deepCopy() {
+      return new GetOperationStatus_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetOperationStatusReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetOperationStatusReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetOperationStatusReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetOperationStatus_args)
+        return this.equals((GetOperationStatus_args)that);
+      return false;
+    }
+
+    public boolean equals(GetOperationStatus_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetOperationStatus_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetOperationStatus_args typedOther = (GetOperationStatus_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetOperationStatus_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetOperationStatus_argsStandardSchemeFactory implements SchemeFactory {
+      public GetOperationStatus_argsStandardScheme getScheme() {
+        return new GetOperationStatus_argsStandardScheme();
+      }
+    }
+
+    private static class GetOperationStatus_argsStandardScheme extends StandardScheme<GetOperationStatus_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetOperationStatusReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetOperationStatus_argsTupleSchemeFactory implements SchemeFactory {
+      public GetOperationStatus_argsTupleScheme getScheme() {
+        return new GetOperationStatus_argsTupleScheme();
+      }
+    }
+
+    private static class GetOperationStatus_argsTupleScheme extends TupleScheme<GetOperationStatus_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetOperationStatusReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetOperationStatus_result implements org.apache.thrift.TBase<GetOperationStatus_result, GetOperationStatus_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetOperationStatus_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetOperationStatus_resultTupleSchemeFactory());
+    }
+
+    private TGetOperationStatusResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_result.class, metaDataMap);
+    }
+
+    public GetOperationStatus_result() {
+    }
+
+    public GetOperationStatus_result(
+      TGetOperationStatusResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetOperationStatus_result(GetOperationStatus_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetOperationStatusResp(other.success);
+      }
+    }
+
+    public GetOperationStatus_result deepCopy() {
+      return new GetOperationStatus_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetOperationStatusResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetOperationStatusResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetOperationStatusResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetOperationStatus_result)
+        return this.equals((GetOperationStatus_result)that);
+      return false;
+    }
+
+    public boolean equals(GetOperationStatus_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetOperationStatus_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetOperationStatus_result typedOther = (GetOperationStatus_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetOperationStatus_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetOperationStatus_resultStandardSchemeFactory implements SchemeFactory {
+      public GetOperationStatus_resultStandardScheme getScheme() {
+        return new GetOperationStatus_resultStandardScheme();
+      }
+    }
+
+    private static class GetOperationStatus_resultStandardScheme extends StandardScheme<GetOperationStatus_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetOperationStatusResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetOperationStatus_resultTupleSchemeFactory implements SchemeFactory {
+      public GetOperationStatus_resultTupleScheme getScheme() {
+        return new GetOperationStatus_resultTupleScheme();
+      }
+    }
+
+    private static class GetOperationStatus_resultTupleScheme extends TupleScheme<GetOperationStatus_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetOperationStatusResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class CancelOperation_args implements org.apache.thrift.TBase<CancelOperation_args, CancelOperation_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new CancelOperation_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new CancelOperation_argsTupleSchemeFactory());
+    }
+
+    private TCancelOperationReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_args.class, metaDataMap);
+    }
+
+    public CancelOperation_args() {
+    }
+
+    public CancelOperation_args(
+      TCancelOperationReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public CancelOperation_args(CancelOperation_args other) {
+      if (other.isSetReq()) {
+        this.req = new TCancelOperationReq(other.req);
+      }
+    }
+
+    public CancelOperation_args deepCopy() {
+      return new CancelOperation_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TCancelOperationReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TCancelOperationReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TCancelOperationReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof CancelOperation_args)
+        return this.equals((CancelOperation_args)that);
+      return false;
+    }
+
+    public boolean equals(CancelOperation_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(CancelOperation_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      CancelOperation_args typedOther = (CancelOperation_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("CancelOperation_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class CancelOperation_argsStandardSchemeFactory implements SchemeFactory {
+      public CancelOperation_argsStandardScheme getScheme() {
+        return new CancelOperation_argsStandardScheme();
+      }
+    }
+
+    private static class CancelOperation_argsStandardScheme extends StandardScheme<CancelOperation_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TCancelOperationReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class CancelOperation_argsTupleSchemeFactory implements SchemeFactory {
+      public CancelOperation_argsTupleScheme getScheme() {
+        return new CancelOperation_argsTupleScheme();
+      }
+    }
+
+    private static class CancelOperation_argsTupleScheme extends TupleScheme<CancelOperation_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TCancelOperationReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class CancelOperation_result implements org.apache.thrift.TBase<CancelOperation_result, CancelOperation_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new CancelOperation_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new CancelOperation_resultTupleSchemeFactory());
+    }
+
+    private TCancelOperationResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_result.class, metaDataMap);
+    }
+
+    public CancelOperation_result() {
+    }
+
+    public CancelOperation_result(
+      TCancelOperationResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public CancelOperation_result(CancelOperation_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TCancelOperationResp(other.success);
+      }
+    }
+
+    public CancelOperation_result deepCopy() {
+      return new CancelOperation_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TCancelOperationResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TCancelOperationResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TCancelOperationResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof CancelOperation_result)
+        return this.equals((CancelOperation_result)that);
+      return false;
+    }
+
+    public boolean equals(CancelOperation_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(CancelOperation_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      CancelOperation_result typedOther = (CancelOperation_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("CancelOperation_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class CancelOperation_resultStandardSchemeFactory implements SchemeFactory {
+      public CancelOperation_resultStandardScheme getScheme() {
+        return new CancelOperation_resultStandardScheme();
+      }
+    }
+
+    private static class CancelOperation_resultStandardScheme extends StandardScheme<CancelOperation_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TCancelOperationResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class CancelOperation_resultTupleSchemeFactory implements SchemeFactory {
+      public CancelOperation_resultTupleScheme getScheme() {
+        return new CancelOperation_resultTupleScheme();
+      }
+    }
+
+    private static class CancelOperation_resultTupleScheme extends TupleScheme<CancelOperation_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TCancelOperationResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class CloseOperation_args implements org.apache.thrift.TBase<CloseOperation_args, CloseOperation_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new CloseOperation_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new CloseOperation_argsTupleSchemeFactory());
+    }
+
+    private TCloseOperationReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_args.class, metaDataMap);
+    }
+
+    public CloseOperation_args() {
+    }
+
+    public CloseOperation_args(
+      TCloseOperationReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public CloseOperation_args(CloseOperation_args other) {
+      if (other.isSetReq()) {
+        this.req = new TCloseOperationReq(other.req);
+      }
+    }
+
+    public CloseOperation_args deepCopy() {
+      return new CloseOperation_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TCloseOperationReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TCloseOperationReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TCloseOperationReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof CloseOperation_args)
+        return this.equals((CloseOperation_args)that);
+      return false;
+    }
+
+    public boolean equals(CloseOperation_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(CloseOperation_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      CloseOperation_args typedOther = (CloseOperation_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("CloseOperation_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class CloseOperation_argsStandardSchemeFactory implements SchemeFactory {
+      public CloseOperation_argsStandardScheme getScheme() {
+        return new CloseOperation_argsStandardScheme();
+      }
+    }
+
+    private static class CloseOperation_argsStandardScheme extends StandardScheme<CloseOperation_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TCloseOperationReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class CloseOperation_argsTupleSchemeFactory implements SchemeFactory {
+      public CloseOperation_argsTupleScheme getScheme() {
+        return new CloseOperation_argsTupleScheme();
+      }
+    }
+
+    private static class CloseOperation_argsTupleScheme extends TupleScheme<CloseOperation_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TCloseOperationReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class CloseOperation_result implements org.apache.thrift.TBase<CloseOperation_result, CloseOperation_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new CloseOperation_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new CloseOperation_resultTupleSchemeFactory());
+    }
+
+    private TCloseOperationResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_result.class, metaDataMap);
+    }
+
+    public CloseOperation_result() {
+    }
+
+    public CloseOperation_result(
+      TCloseOperationResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public CloseOperation_result(CloseOperation_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TCloseOperationResp(other.success);
+      }
+    }
+
+    public CloseOperation_result deepCopy() {
+      return new CloseOperation_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TCloseOperationResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TCloseOperationResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TCloseOperationResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof CloseOperation_result)
+        return this.equals((CloseOperation_result)that);
+      return false;
+    }
+
+    public boolean equals(CloseOperation_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(CloseOperation_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      CloseOperation_result typedOther = (CloseOperation_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("CloseOperation_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class CloseOperation_resultStandardSchemeFactory implements SchemeFactory {
+      public CloseOperation_resultStandardScheme getScheme() {
+        return new CloseOperation_resultStandardScheme();
+      }
+    }
+
+    private static class CloseOperation_resultStandardScheme extends StandardScheme<CloseOperation_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TCloseOperationResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class CloseOperation_resultTupleSchemeFactory implements SchemeFactory {
+      public CloseOperation_resultTupleScheme getScheme() {
+        return new CloseOperation_resultTupleScheme();
+      }
+    }
+
+    private static class CloseOperation_resultTupleScheme extends TupleScheme<CloseOperation_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TCloseOperationResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetResultSetMetadata_args implements org.apache.thrift.TBase<GetResultSetMetadata_args, GetResultSetMetadata_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetResultSetMetadata_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetResultSetMetadata_argsTupleSchemeFactory());
+    }
+
+    private TGetResultSetMetadataReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_args.class, metaDataMap);
+    }
+
+    public GetResultSetMetadata_args() {
+    }
+
+    public GetResultSetMetadata_args(
+      TGetResultSetMetadataReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetResultSetMetadata_args(GetResultSetMetadata_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetResultSetMetadataReq(other.req);
+      }
+    }
+
+    public GetResultSetMetadata_args deepCopy() {
+      return new GetResultSetMetadata_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetResultSetMetadataReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetResultSetMetadataReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetResultSetMetadataReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetResultSetMetadata_args)
+        return this.equals((GetResultSetMetadata_args)that);
+      return false;
+    }
+
+    public boolean equals(GetResultSetMetadata_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetResultSetMetadata_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetResultSetMetadata_args typedOther = (GetResultSetMetadata_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetResultSetMetadata_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetResultSetMetadata_argsStandardSchemeFactory implements SchemeFactory {
+      public GetResultSetMetadata_argsStandardScheme getScheme() {
+        return new GetResultSetMetadata_argsStandardScheme();
+      }
+    }
+
+    private static class GetResultSetMetadata_argsStandardScheme extends StandardScheme<GetResultSetMetadata_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetResultSetMetadataReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetResultSetMetadata_argsTupleSchemeFactory implements SchemeFactory {
+      public GetResultSetMetadata_argsTupleScheme getScheme() {
+        return new GetResultSetMetadata_argsTupleScheme();
+      }
+    }
+
+    private static class GetResultSetMetadata_argsTupleScheme extends TupleScheme<GetResultSetMetadata_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetResultSetMetadataReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetResultSetMetadata_result implements org.apache.thrift.TBase<GetResultSetMetadata_result, GetResultSetMetadata_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetResultSetMetadata_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetResultSetMetadata_resultTupleSchemeFactory());
+    }
+
+    private TGetResultSetMetadataResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_result.class, metaDataMap);
+    }
+
+    public GetResultSetMetadata_result() {
+    }
+
+    public GetResultSetMetadata_result(
+      TGetResultSetMetadataResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetResultSetMetadata_result(GetResultSetMetadata_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetResultSetMetadataResp(other.success);
+      }
+    }
+
+    public GetResultSetMetadata_result deepCopy() {
+      return new GetResultSetMetadata_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetResultSetMetadataResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetResultSetMetadataResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetResultSetMetadataResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetResultSetMetadata_result)
+        return this.equals((GetResultSetMetadata_result)that);
+      return false;
+    }
+
+    public boolean equals(GetResultSetMetadata_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetResultSetMetadata_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetResultSetMetadata_result typedOther = (GetResultSetMetadata_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetResultSetMetadata_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetResultSetMetadata_resultStandardSchemeFactory implements SchemeFactory {
+      public GetResultSetMetadata_resultStandardScheme getScheme() {
+        return new GetResultSetMetadata_resultStandardScheme();
+      }
+    }
+
+    private static class GetResultSetMetadata_resultStandardScheme extends StandardScheme<GetResultSetMetadata_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetResultSetMetadataResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetResultSetMetadata_resultTupleSchemeFactory implements SchemeFactory {
+      public GetResultSetMetadata_resultTupleScheme getScheme() {
+        return new GetResultSetMetadata_resultTupleScheme();
+      }
+    }
+
+    private static class GetResultSetMetadata_resultTupleScheme extends TupleScheme<GetResultSetMetadata_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetResultSetMetadataResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class FetchResults_args implements org.apache.thrift.TBase<FetchResults_args, FetchResults_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new FetchResults_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new FetchResults_argsTupleSchemeFactory());
+    }
+
+    private TFetchResultsReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_args.class, metaDataMap);
+    }
+
+    public FetchResults_args() {
+    }
+
+    public FetchResults_args(
+      TFetchResultsReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public FetchResults_args(FetchResults_args other) {
+      if (other.isSetReq()) {
+        this.req = new TFetchResultsReq(other.req);
+      }
+    }
+
+    public FetchResults_args deepCopy() {
+      return new FetchResults_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TFetchResultsReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TFetchResultsReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TFetchResultsReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof FetchResults_args)
+        return this.equals((FetchResults_args)that);
+      return false;
+    }
+
+    public boolean equals(FetchResults_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(FetchResults_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      FetchResults_args typedOther = (FetchResults_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("FetchResults_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class FetchResults_argsStandardSchemeFactory implements SchemeFactory {
+      public FetchResults_argsStandardScheme getScheme() {
+        return new FetchResults_argsStandardScheme();
+      }
+    }
+
+    private static class FetchResults_argsStandardScheme extends StandardScheme<FetchResults_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TFetchResultsReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class FetchResults_argsTupleSchemeFactory implements SchemeFactory {
+      public FetchResults_argsTupleScheme getScheme() {
+        return new FetchResults_argsTupleScheme();
+      }
+    }
+
+    private static class FetchResults_argsTupleScheme extends TupleScheme<FetchResults_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TFetchResultsReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class FetchResults_result implements org.apache.thrift.TBase<FetchResults_result, FetchResults_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new FetchResults_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new FetchResults_resultTupleSchemeFactory());
+    }
+
+    private TFetchResultsResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_result.class, metaDataMap);
+    }
+
+    public FetchResults_result() {
+    }
+
+    public FetchResults_result(
+      TFetchResultsResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public FetchResults_result(FetchResults_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TFetchResultsResp(other.success);
+      }
+    }
+
+    public FetchResults_result deepCopy() {
+      return new FetchResults_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TFetchResultsResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TFetchResultsResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TFetchResultsResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof FetchResults_result)
+        return this.equals((FetchResults_result)that);
+      return false;
+    }
+
+    public boolean equals(FetchResults_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(FetchResults_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      FetchResults_result typedOther = (FetchResults_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("FetchResults_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class FetchResults_resultStandardSchemeFactory implements SchemeFactory {
+      public FetchResults_resultStandardScheme getScheme() {
+        return new FetchResults_resultStandardScheme();
+      }
+    }
+
+    private static class FetchResults_resultStandardScheme extends StandardScheme<FetchResults_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TFetchResultsResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class FetchResults_resultTupleSchemeFactory implements SchemeFactory {
+      public FetchResults_resultTupleScheme getScheme() {
+        return new FetchResults_resultTupleScheme();
+      }
+    }
+
+    private static class FetchResults_resultTupleScheme extends TupleScheme<FetchResults_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TFetchResultsResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetDelegationToken_args implements org.apache.thrift.TBase<GetDelegationToken_args, GetDelegationToken_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetDelegationToken_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetDelegationToken_argsTupleSchemeFactory());
+    }
+
+    private TGetDelegationTokenReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_args.class, metaDataMap);
+    }
+
+    public GetDelegationToken_args() {
+    }
+
+    public GetDelegationToken_args(
+      TGetDelegationTokenReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetDelegationToken_args(GetDelegationToken_args other) {
+      if (other.isSetReq()) {
+        this.req = new TGetDelegationTokenReq(other.req);
+      }
+    }
+
+    public GetDelegationToken_args deepCopy() {
+      return new GetDelegationToken_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TGetDelegationTokenReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TGetDelegationTokenReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TGetDelegationTokenReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetDelegationToken_args)
+        return this.equals((GetDelegationToken_args)that);
+      return false;
+    }
+
+    public boolean equals(GetDelegationToken_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetDelegationToken_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetDelegationToken_args typedOther = (GetDelegationToken_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetDelegationToken_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
+      public GetDelegationToken_argsStandardScheme getScheme() {
+        return new GetDelegationToken_argsStandardScheme();
+      }
+    }
+
+    private static class GetDelegationToken_argsStandardScheme extends StandardScheme<GetDelegationToken_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TGetDelegationTokenReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
+      public GetDelegationToken_argsTupleScheme getScheme() {
+        return new GetDelegationToken_argsTupleScheme();
+      }
+    }
+
+    private static class GetDelegationToken_argsTupleScheme extends TupleScheme<GetDelegationToken_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TGetDelegationTokenReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class GetDelegationToken_result implements org.apache.thrift.TBase<GetDelegationToken_result, GetDelegationToken_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new GetDelegationToken_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new GetDelegationToken_resultTupleSchemeFactory());
+    }
+
+    private TGetDelegationTokenResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_result.class, metaDataMap);
+    }
+
+    public GetDelegationToken_result() {
+    }
+
+    public GetDelegationToken_result(
+      TGetDelegationTokenResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public GetDelegationToken_result(GetDelegationToken_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TGetDelegationTokenResp(other.success);
+      }
+    }
+
+    public GetDelegationToken_result deepCopy() {
+      return new GetDelegationToken_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TGetDelegationTokenResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TGetDelegationTokenResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TGetDelegationTokenResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof GetDelegationToken_result)
+        return this.equals((GetDelegationToken_result)that);
+      return false;
+    }
+
+    public boolean equals(GetDelegationToken_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(GetDelegationToken_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      GetDelegationToken_result typedOther = (GetDelegationToken_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("GetDelegationToken_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class GetDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
+      public GetDelegationToken_resultStandardScheme getScheme() {
+        return new GetDelegationToken_resultStandardScheme();
+      }
+    }
+
+    private static class GetDelegationToken_resultStandardScheme extends StandardScheme<GetDelegationToken_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TGetDelegationTokenResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class GetDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
+      public GetDelegationToken_resultTupleScheme getScheme() {
+        return new GetDelegationToken_resultTupleScheme();
+      }
+    }
+
+    private static class GetDelegationToken_resultTupleScheme extends TupleScheme<GetDelegationToken_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TGetDelegationTokenResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class CancelDelegationToken_args implements org.apache.thrift.TBase<CancelDelegationToken_args, CancelDelegationToken_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new CancelDelegationToken_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new CancelDelegationToken_argsTupleSchemeFactory());
+    }
+
+    private TCancelDelegationTokenReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_args.class, metaDataMap);
+    }
+
+    public CancelDelegationToken_args() {
+    }
+
+    public CancelDelegationToken_args(
+      TCancelDelegationTokenReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public CancelDelegationToken_args(CancelDelegationToken_args other) {
+      if (other.isSetReq()) {
+        this.req = new TCancelDelegationTokenReq(other.req);
+      }
+    }
+
+    public CancelDelegationToken_args deepCopy() {
+      return new CancelDelegationToken_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TCancelDelegationTokenReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TCancelDelegationTokenReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TCancelDelegationTokenReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof CancelDelegationToken_args)
+        return this.equals((CancelDelegationToken_args)that);
+      return false;
+    }
+
+    public boolean equals(CancelDelegationToken_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(CancelDelegationToken_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      CancelDelegationToken_args typedOther = (CancelDelegationToken_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("CancelDelegationToken_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class CancelDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
+      public CancelDelegationToken_argsStandardScheme getScheme() {
+        return new CancelDelegationToken_argsStandardScheme();
+      }
+    }
+
+    private static class CancelDelegationToken_argsStandardScheme extends StandardScheme<CancelDelegationToken_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TCancelDelegationTokenReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class CancelDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
+      public CancelDelegationToken_argsTupleScheme getScheme() {
+        return new CancelDelegationToken_argsTupleScheme();
+      }
+    }
+
+    private static class CancelDelegationToken_argsTupleScheme extends TupleScheme<CancelDelegationToken_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TCancelDelegationTokenReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class CancelDelegationToken_result implements org.apache.thrift.TBase<CancelDelegationToken_result, CancelDelegationToken_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new CancelDelegationToken_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new CancelDelegationToken_resultTupleSchemeFactory());
+    }
+
+    private TCancelDelegationTokenResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_result.class, metaDataMap);
+    }
+
+    public CancelDelegationToken_result() {
+    }
+
+    public CancelDelegationToken_result(
+      TCancelDelegationTokenResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public CancelDelegationToken_result(CancelDelegationToken_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TCancelDelegationTokenResp(other.success);
+      }
+    }
+
+    public CancelDelegationToken_result deepCopy() {
+      return new CancelDelegationToken_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TCancelDelegationTokenResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TCancelDelegationTokenResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TCancelDelegationTokenResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof CancelDelegationToken_result)
+        return this.equals((CancelDelegationToken_result)that);
+      return false;
+    }
+
+    public boolean equals(CancelDelegationToken_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(CancelDelegationToken_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      CancelDelegationToken_result typedOther = (CancelDelegationToken_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("CancelDelegationToken_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class CancelDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
+      public CancelDelegationToken_resultStandardScheme getScheme() {
+        return new CancelDelegationToken_resultStandardScheme();
+      }
+    }
+
+    private static class CancelDelegationToken_resultStandardScheme extends StandardScheme<CancelDelegationToken_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TCancelDelegationTokenResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class CancelDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
+      public CancelDelegationToken_resultTupleScheme getScheme() {
+        return new CancelDelegationToken_resultTupleScheme();
+      }
+    }
+
+    private static class CancelDelegationToken_resultTupleScheme extends TupleScheme<CancelDelegationToken_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TCancelDelegationTokenResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class RenewDelegationToken_args implements org.apache.thrift.TBase<RenewDelegationToken_args, RenewDelegationToken_args._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_args");
+
+    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new RenewDelegationToken_argsStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new RenewDelegationToken_argsTupleSchemeFactory());
+    }
+
+    private TRenewDelegationTokenReq req; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      REQ((short)1, "req");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 1: // REQ
+            return REQ;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenReq.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_args.class, metaDataMap);
+    }
+
+    public RenewDelegationToken_args() {
+    }
+
+    public RenewDelegationToken_args(
+      TRenewDelegationTokenReq req)
+    {
+      this();
+      this.req = req;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public RenewDelegationToken_args(RenewDelegationToken_args other) {
+      if (other.isSetReq()) {
+        this.req = new TRenewDelegationTokenReq(other.req);
+      }
+    }
+
+    public RenewDelegationToken_args deepCopy() {
+      return new RenewDelegationToken_args(this);
+    }
+
+    @Override
+    public void clear() {
+      this.req = null;
+    }
+
+    public TRenewDelegationTokenReq getReq() {
+      return this.req;
+    }
+
+    public void setReq(TRenewDelegationTokenReq req) {
+      this.req = req;
+    }
+
+    public void unsetReq() {
+      this.req = null;
+    }
+
+    /** Returns true if field req is set (has been assigned a value) and false otherwise */
+    public boolean isSetReq() {
+      return this.req != null;
+    }
+
+    public void setReqIsSet(boolean value) {
+      if (!value) {
+        this.req = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case REQ:
+        if (value == null) {
+          unsetReq();
+        } else {
+          setReq((TRenewDelegationTokenReq)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case REQ:
+        return getReq();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case REQ:
+        return isSetReq();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof RenewDelegationToken_args)
+        return this.equals((RenewDelegationToken_args)that);
+      return false;
+    }
+
+    public boolean equals(RenewDelegationToken_args that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_req = true && this.isSetReq();
+      boolean that_present_req = true && that.isSetReq();
+      if (this_present_req || that_present_req) {
+        if (!(this_present_req && that_present_req))
+          return false;
+        if (!this.req.equals(that.req))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_req = true && (isSetReq());
+      builder.append(present_req);
+      if (present_req)
+        builder.append(req);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(RenewDelegationToken_args other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      RenewDelegationToken_args typedOther = (RenewDelegationToken_args)other;
+
+      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetReq()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("RenewDelegationToken_args(");
+      boolean first = true;
+
+      sb.append("req:");
+      if (this.req == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.req);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (req != null) {
+        req.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class RenewDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
+      public RenewDelegationToken_argsStandardScheme getScheme() {
+        return new RenewDelegationToken_argsStandardScheme();
+      }
+    }
+
+    private static class RenewDelegationToken_argsStandardScheme extends StandardScheme<RenewDelegationToken_args> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 1: // REQ
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.req = new TRenewDelegationTokenReq();
+                struct.req.read(iprot);
+                struct.setReqIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.req != null) {
+          oprot.writeFieldBegin(REQ_FIELD_DESC);
+          struct.req.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class RenewDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
+      public RenewDelegationToken_argsTupleScheme getScheme() {
+        return new RenewDelegationToken_argsTupleScheme();
+      }
+    }
+
+    private static class RenewDelegationToken_argsTupleScheme extends TupleScheme<RenewDelegationToken_args> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetReq()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetReq()) {
+          struct.req.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.req = new TRenewDelegationTokenReq();
+          struct.req.read(iprot);
+          struct.setReqIsSet(true);
+        }
+      }
+    }
+
+  }
+
+  public static class RenewDelegationToken_result implements org.apache.thrift.TBase<RenewDelegationToken_result, RenewDelegationToken_result._Fields>, java.io.Serializable, Cloneable   {
+    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_result");
+
+    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
+
+    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+    static {
+      schemes.put(StandardScheme.class, new RenewDelegationToken_resultStandardSchemeFactory());
+      schemes.put(TupleScheme.class, new RenewDelegationToken_resultTupleSchemeFactory());
+    }
+
+    private TRenewDelegationTokenResp success; // required
+
+    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+      SUCCESS((short)0, "success");
+
+      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+      static {
+        for (_Fields field : EnumSet.allOf(_Fields.class)) {
+          byName.put(field.getFieldName(), field);
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, or null if its not found.
+       */
+      public static _Fields findByThriftId(int fieldId) {
+        switch(fieldId) {
+          case 0: // SUCCESS
+            return SUCCESS;
+          default:
+            return null;
+        }
+      }
+
+      /**
+       * Find the _Fields constant that matches fieldId, throwing an exception
+       * if it is not found.
+       */
+      public static _Fields findByThriftIdOrThrow(int fieldId) {
+        _Fields fields = findByThriftId(fieldId);
+        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+        return fields;
+      }
+
+      /**
+       * Find the _Fields constant that matches name, or null if its not found.
+       */
+      public static _Fields findByName(String name) {
+        return byName.get(name);
+      }
+
+      private final short _thriftId;
+      private final String _fieldName;
+
+      _Fields(short thriftId, String fieldName) {
+        _thriftId = thriftId;
+        _fieldName = fieldName;
+      }
+
+      public short getThriftFieldId() {
+        return _thriftId;
+      }
+
+      public String getFieldName() {
+        return _fieldName;
+      }
+    }
+
+    // isset id assignments
+    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+    static {
+      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenResp.class)));
+      metaDataMap = Collections.unmodifiableMap(tmpMap);
+      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_result.class, metaDataMap);
+    }
+
+    public RenewDelegationToken_result() {
+    }
+
+    public RenewDelegationToken_result(
+      TRenewDelegationTokenResp success)
+    {
+      this();
+      this.success = success;
+    }
+
+    /**
+     * Performs a deep copy on <i>other</i>.
+     */
+    public RenewDelegationToken_result(RenewDelegationToken_result other) {
+      if (other.isSetSuccess()) {
+        this.success = new TRenewDelegationTokenResp(other.success);
+      }
+    }
+
+    public RenewDelegationToken_result deepCopy() {
+      return new RenewDelegationToken_result(this);
+    }
+
+    @Override
+    public void clear() {
+      this.success = null;
+    }
+
+    public TRenewDelegationTokenResp getSuccess() {
+      return this.success;
+    }
+
+    public void setSuccess(TRenewDelegationTokenResp success) {
+      this.success = success;
+    }
+
+    public void unsetSuccess() {
+      this.success = null;
+    }
+
+    /** Returns true if field success is set (has been assigned a value) and false otherwise */
+    public boolean isSetSuccess() {
+      return this.success != null;
+    }
+
+    public void setSuccessIsSet(boolean value) {
+      if (!value) {
+        this.success = null;
+      }
+    }
+
+    public void setFieldValue(_Fields field, Object value) {
+      switch (field) {
+      case SUCCESS:
+        if (value == null) {
+          unsetSuccess();
+        } else {
+          setSuccess((TRenewDelegationTokenResp)value);
+        }
+        break;
+
+      }
+    }
+
+    public Object getFieldValue(_Fields field) {
+      switch (field) {
+      case SUCCESS:
+        return getSuccess();
+
+      }
+      throw new IllegalStateException();
+    }
+
+    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+    public boolean isSet(_Fields field) {
+      if (field == null) {
+        throw new IllegalArgumentException();
+      }
+
+      switch (field) {
+      case SUCCESS:
+        return isSetSuccess();
+      }
+      throw new IllegalStateException();
+    }
+
+    @Override
+    public boolean equals(Object that) {
+      if (that == null)
+        return false;
+      if (that instanceof RenewDelegationToken_result)
+        return this.equals((RenewDelegationToken_result)that);
+      return false;
+    }
+
+    public boolean equals(RenewDelegationToken_result that) {
+      if (that == null)
+        return false;
+
+      boolean this_present_success = true && this.isSetSuccess();
+      boolean that_present_success = true && that.isSetSuccess();
+      if (this_present_success || that_present_success) {
+        if (!(this_present_success && that_present_success))
+          return false;
+        if (!this.success.equals(that.success))
+          return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      HashCodeBuilder builder = new HashCodeBuilder();
+
+      boolean present_success = true && (isSetSuccess());
+      builder.append(present_success);
+      if (present_success)
+        builder.append(success);
+
+      return builder.toHashCode();
+    }
+
+    public int compareTo(RenewDelegationToken_result other) {
+      if (!getClass().equals(other.getClass())) {
+        return getClass().getName().compareTo(other.getClass().getName());
+      }
+
+      int lastComparison = 0;
+      RenewDelegationToken_result typedOther = (RenewDelegationToken_result)other;
+
+      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSuccess()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
+      return 0;
+    }
+
+    public _Fields fieldForId(int fieldId) {
+      return _Fields.findByThriftId(fieldId);
+    }
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+      }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder("RenewDelegationToken_result(");
+      boolean first = true;
+
+      sb.append("success:");
+      if (this.success == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.success);
+      }
+      first = false;
+      sb.append(")");
+      return sb.toString();
+    }
+
+    public void validate() throws org.apache.thrift.TException {
+      // check for required fields
+      // check for sub-struct validity
+      if (success != null) {
+        success.validate();
+      }
+    }
+
+    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+      try {
+        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+      try {
+        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+      } catch (org.apache.thrift.TException te) {
+        throw new java.io.IOException(te);
+      }
+    }
+
+    private static class RenewDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
+      public RenewDelegationToken_resultStandardScheme getScheme() {
+        return new RenewDelegationToken_resultStandardScheme();
+      }
+    }
+
+    private static class RenewDelegationToken_resultStandardScheme extends StandardScheme<RenewDelegationToken_result> {
+
+      public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
+        org.apache.thrift.protocol.TField schemeField;
+        iprot.readStructBegin();
+        while (true)
+        {
+          schemeField = iprot.readFieldBegin();
+          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+            break;
+          }
+          switch (schemeField.id) {
+            case 0: // SUCCESS
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.success = new TRenewDelegationTokenResp();
+                struct.success.read(iprot);
+                struct.setSuccessIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            default:
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+          }
+          iprot.readFieldEnd();
+        }
+        iprot.readStructEnd();
+        struct.validate();
+      }
+
+      public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
+        struct.validate();
+
+        oprot.writeStructBegin(STRUCT_DESC);
+        if (struct.success != null) {
+          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
+          struct.success.write(oprot);
+          oprot.writeFieldEnd();
+        }
+        oprot.writeFieldStop();
+        oprot.writeStructEnd();
+      }
+
+    }
+
+    private static class RenewDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
+      public RenewDelegationToken_resultTupleScheme getScheme() {
+        return new RenewDelegationToken_resultTupleScheme();
+      }
+    }
+
+    private static class RenewDelegationToken_resultTupleScheme extends TupleScheme<RenewDelegationToken_result> {
+
+      @Override
+      public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol oprot = (TTupleProtocol) prot;
+        BitSet optionals = new BitSet();
+        if (struct.isSetSuccess()) {
+          optionals.set(0);
+        }
+        oprot.writeBitSet(optionals, 1);
+        if (struct.isSetSuccess()) {
+          struct.success.write(oprot);
+        }
+      }
+
+      @Override
+      public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
+        TTupleProtocol iprot = (TTupleProtocol) prot;
+        BitSet incoming = iprot.readBitSet(1);
+        if (incoming.get(0)) {
+          struct.success = new TRenewDelegationTokenResp();
+          struct.success.read(iprot);
+          struct.setSuccessIsSet(true);
+        }
+      }
+    }
+
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
new file mode 100644
index 000000000000..25a38b178428
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
@@ -0,0 +1,103 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCLIServiceConstants {
+
+  public static final Set<TTypeId> PRIMITIVE_TYPES = new HashSet<TTypeId>();
+  static {
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BOOLEAN_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.TINYINT_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.SMALLINT_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INT_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BIGINT_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.FLOAT_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DOUBLE_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.STRING_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.TIMESTAMP_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BINARY_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DECIMAL_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.NULL_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DATE_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.VARCHAR_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.CHAR_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE);
+    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE);
+  }
+
+  public static final Set<TTypeId> COMPLEX_TYPES = new HashSet<TTypeId>();
+  static {
+    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE);
+    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE);
+    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.STRUCT_TYPE);
+    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.UNION_TYPE);
+    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.USER_DEFINED_TYPE);
+  }
+
+  public static final Set<TTypeId> COLLECTION_TYPES = new HashSet<TTypeId>();
+  static {
+    COLLECTION_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE);
+    COLLECTION_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE);
+  }
+
+  public static final Map<TTypeId,String> TYPE_NAMES = new HashMap<TTypeId,String>();
+  static {
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BOOLEAN_TYPE, "BOOLEAN");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.TINYINT_TYPE, "TINYINT");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.SMALLINT_TYPE, "SMALLINT");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INT_TYPE, "INT");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BIGINT_TYPE, "BIGINT");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.FLOAT_TYPE, "FLOAT");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DOUBLE_TYPE, "DOUBLE");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.STRING_TYPE, "STRING");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.TIMESTAMP_TYPE, "TIMESTAMP");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BINARY_TYPE, "BINARY");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE, "ARRAY");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE, "MAP");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.STRUCT_TYPE, "STRUCT");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.UNION_TYPE, "UNIONTYPE");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DECIMAL_TYPE, "DECIMAL");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.NULL_TYPE, "NULL");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DATE_TYPE, "DATE");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.VARCHAR_TYPE, "VARCHAR");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.CHAR_TYPE, "CHAR");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE, "INTERVAL_YEAR_MONTH");
+    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE, "INTERVAL_DAY_TIME");
+  }
+
+  public static final String CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength";
+
+  public static final String PRECISION = "precision";
+
+  public static final String SCALE = "scale";
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
new file mode 100644
index 000000000000..e23fcdd77a1a
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
@@ -0,0 +1,491 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCancelDelegationTokenReq implements org.apache.thrift.TBase<TCancelDelegationTokenReq, TCancelDelegationTokenReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TCancelDelegationTokenReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TCancelDelegationTokenReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private String delegationToken; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    DELEGATION_TOKEN((short)2, "delegationToken");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // DELEGATION_TOKEN
+          return DELEGATION_TOKEN;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenReq.class, metaDataMap);
+  }
+
+  public TCancelDelegationTokenReq() {
+  }
+
+  public TCancelDelegationTokenReq(
+    TSessionHandle sessionHandle,
+    String delegationToken)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+    this.delegationToken = delegationToken;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TCancelDelegationTokenReq(TCancelDelegationTokenReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetDelegationToken()) {
+      this.delegationToken = other.delegationToken;
+    }
+  }
+
+  public TCancelDelegationTokenReq deepCopy() {
+    return new TCancelDelegationTokenReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.delegationToken = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public String getDelegationToken() {
+    return this.delegationToken;
+  }
+
+  public void setDelegationToken(String delegationToken) {
+    this.delegationToken = delegationToken;
+  }
+
+  public void unsetDelegationToken() {
+    this.delegationToken = null;
+  }
+
+  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
+  public boolean isSetDelegationToken() {
+    return this.delegationToken != null;
+  }
+
+  public void setDelegationTokenIsSet(boolean value) {
+    if (!value) {
+      this.delegationToken = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case DELEGATION_TOKEN:
+      if (value == null) {
+        unsetDelegationToken();
+      } else {
+        setDelegationToken((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case DELEGATION_TOKEN:
+      return getDelegationToken();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case DELEGATION_TOKEN:
+      return isSetDelegationToken();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TCancelDelegationTokenReq)
+      return this.equals((TCancelDelegationTokenReq)that);
+    return false;
+  }
+
+  public boolean equals(TCancelDelegationTokenReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_delegationToken = true && this.isSetDelegationToken();
+    boolean that_present_delegationToken = true && that.isSetDelegationToken();
+    if (this_present_delegationToken || that_present_delegationToken) {
+      if (!(this_present_delegationToken && that_present_delegationToken))
+        return false;
+      if (!this.delegationToken.equals(that.delegationToken))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_delegationToken = true && (isSetDelegationToken());
+    builder.append(present_delegationToken);
+    if (present_delegationToken)
+      builder.append(delegationToken);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TCancelDelegationTokenReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TCancelDelegationTokenReq typedOther = (TCancelDelegationTokenReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetDelegationToken()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TCancelDelegationTokenReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("delegationToken:");
+    if (this.delegationToken == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.delegationToken);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    if (!isSetDelegationToken()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TCancelDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
+    public TCancelDelegationTokenReqStandardScheme getScheme() {
+      return new TCancelDelegationTokenReqStandardScheme();
+    }
+  }
+
+  private static class TCancelDelegationTokenReqStandardScheme extends StandardScheme<TCancelDelegationTokenReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // DELEGATION_TOKEN
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.delegationToken = iprot.readString();
+              struct.setDelegationTokenIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.delegationToken != null) {
+        oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
+        oprot.writeString(struct.delegationToken);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TCancelDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
+    public TCancelDelegationTokenReqTupleScheme getScheme() {
+      return new TCancelDelegationTokenReqTupleScheme();
+    }
+  }
+
+  private static class TCancelDelegationTokenReqTupleScheme extends TupleScheme<TCancelDelegationTokenReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      oprot.writeString(struct.delegationToken);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      struct.delegationToken = iprot.readString();
+      struct.setDelegationTokenIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
new file mode 100644
index 000000000000..77c9ee77ec59
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCancelDelegationTokenResp implements org.apache.thrift.TBase<TCancelDelegationTokenResp, TCancelDelegationTokenResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TCancelDelegationTokenRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TCancelDelegationTokenRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenResp.class, metaDataMap);
+  }
+
+  public TCancelDelegationTokenResp() {
+  }
+
+  public TCancelDelegationTokenResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TCancelDelegationTokenResp(TCancelDelegationTokenResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+  }
+
+  public TCancelDelegationTokenResp deepCopy() {
+    return new TCancelDelegationTokenResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TCancelDelegationTokenResp)
+      return this.equals((TCancelDelegationTokenResp)that);
+    return false;
+  }
+
+  public boolean equals(TCancelDelegationTokenResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TCancelDelegationTokenResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TCancelDelegationTokenResp typedOther = (TCancelDelegationTokenResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TCancelDelegationTokenResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TCancelDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
+    public TCancelDelegationTokenRespStandardScheme getScheme() {
+      return new TCancelDelegationTokenRespStandardScheme();
+    }
+  }
+
+  private static class TCancelDelegationTokenRespStandardScheme extends StandardScheme<TCancelDelegationTokenResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TCancelDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
+    public TCancelDelegationTokenRespTupleScheme getScheme() {
+      return new TCancelDelegationTokenRespTupleScheme();
+    }
+  }
+
+  private static class TCancelDelegationTokenRespTupleScheme extends TupleScheme<TCancelDelegationTokenResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
new file mode 100644
index 000000000000..45eac48ab12d
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCancelOperationReq implements org.apache.thrift.TBase<TCancelOperationReq, TCancelOperationReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationReq");
+
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TCancelOperationReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TCancelOperationReqTupleSchemeFactory());
+  }
+
+  private TOperationHandle operationHandle; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    OPERATION_HANDLE((short)1, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationReq.class, metaDataMap);
+  }
+
+  public TCancelOperationReq() {
+  }
+
+  public TCancelOperationReq(
+    TOperationHandle operationHandle)
+  {
+    this();
+    this.operationHandle = operationHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TCancelOperationReq(TCancelOperationReq other) {
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TCancelOperationReq deepCopy() {
+    return new TCancelOperationReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.operationHandle = null;
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TCancelOperationReq)
+      return this.equals((TCancelOperationReq)that);
+    return false;
+  }
+
+  public boolean equals(TCancelOperationReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TCancelOperationReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TCancelOperationReq typedOther = (TCancelOperationReq)other;
+
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TCancelOperationReq(");
+    boolean first = true;
+
+    sb.append("operationHandle:");
+    if (this.operationHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.operationHandle);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetOperationHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TCancelOperationReqStandardSchemeFactory implements SchemeFactory {
+    public TCancelOperationReqStandardScheme getScheme() {
+      return new TCancelOperationReqStandardScheme();
+    }
+  }
+
+  private static class TCancelOperationReqStandardScheme extends StandardScheme<TCancelOperationReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.operationHandle != null) {
+        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+        struct.operationHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TCancelOperationReqTupleSchemeFactory implements SchemeFactory {
+    public TCancelOperationReqTupleScheme getScheme() {
+      return new TCancelOperationReqTupleScheme();
+    }
+  }
+
+  private static class TCancelOperationReqTupleScheme extends TupleScheme<TCancelOperationReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.operationHandle.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.operationHandle = new TOperationHandle();
+      struct.operationHandle.read(iprot);
+      struct.setOperationHandleIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
new file mode 100644
index 000000000000..2a39414d601a
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCancelOperationResp implements org.apache.thrift.TBase<TCancelOperationResp, TCancelOperationResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TCancelOperationRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TCancelOperationRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationResp.class, metaDataMap);
+  }
+
+  public TCancelOperationResp() {
+  }
+
+  public TCancelOperationResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TCancelOperationResp(TCancelOperationResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+  }
+
+  public TCancelOperationResp deepCopy() {
+    return new TCancelOperationResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TCancelOperationResp)
+      return this.equals((TCancelOperationResp)that);
+    return false;
+  }
+
+  public boolean equals(TCancelOperationResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TCancelOperationResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TCancelOperationResp typedOther = (TCancelOperationResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TCancelOperationResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TCancelOperationRespStandardSchemeFactory implements SchemeFactory {
+    public TCancelOperationRespStandardScheme getScheme() {
+      return new TCancelOperationRespStandardScheme();
+    }
+  }
+
+  private static class TCancelOperationRespStandardScheme extends StandardScheme<TCancelOperationResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TCancelOperationRespTupleSchemeFactory implements SchemeFactory {
+    public TCancelOperationRespTupleScheme getScheme() {
+      return new TCancelOperationRespTupleScheme();
+    }
+  }
+
+  private static class TCancelOperationRespTupleScheme extends TupleScheme<TCancelOperationResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
new file mode 100644
index 000000000000..0cbb7ccced07
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCloseOperationReq implements org.apache.thrift.TBase<TCloseOperationReq, TCloseOperationReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationReq");
+
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TCloseOperationReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TCloseOperationReqTupleSchemeFactory());
+  }
+
+  private TOperationHandle operationHandle; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    OPERATION_HANDLE((short)1, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationReq.class, metaDataMap);
+  }
+
+  public TCloseOperationReq() {
+  }
+
+  public TCloseOperationReq(
+    TOperationHandle operationHandle)
+  {
+    this();
+    this.operationHandle = operationHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TCloseOperationReq(TCloseOperationReq other) {
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TCloseOperationReq deepCopy() {
+    return new TCloseOperationReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.operationHandle = null;
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TCloseOperationReq)
+      return this.equals((TCloseOperationReq)that);
+    return false;
+  }
+
+  public boolean equals(TCloseOperationReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TCloseOperationReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TCloseOperationReq typedOther = (TCloseOperationReq)other;
+
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TCloseOperationReq(");
+    boolean first = true;
+
+    sb.append("operationHandle:");
+    if (this.operationHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.operationHandle);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetOperationHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TCloseOperationReqStandardSchemeFactory implements SchemeFactory {
+    public TCloseOperationReqStandardScheme getScheme() {
+      return new TCloseOperationReqStandardScheme();
+    }
+  }
+
+  private static class TCloseOperationReqStandardScheme extends StandardScheme<TCloseOperationReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.operationHandle != null) {
+        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+        struct.operationHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TCloseOperationReqTupleSchemeFactory implements SchemeFactory {
+    public TCloseOperationReqTupleScheme getScheme() {
+      return new TCloseOperationReqTupleScheme();
+    }
+  }
+
+  private static class TCloseOperationReqTupleScheme extends TupleScheme<TCloseOperationReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.operationHandle.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.operationHandle = new TOperationHandle();
+      struct.operationHandle.read(iprot);
+      struct.setOperationHandleIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
new file mode 100644
index 000000000000..7334d67173d7
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCloseOperationResp implements org.apache.thrift.TBase<TCloseOperationResp, TCloseOperationResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TCloseOperationRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TCloseOperationRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationResp.class, metaDataMap);
+  }
+
+  public TCloseOperationResp() {
+  }
+
+  public TCloseOperationResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TCloseOperationResp(TCloseOperationResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+  }
+
+  public TCloseOperationResp deepCopy() {
+    return new TCloseOperationResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TCloseOperationResp)
+      return this.equals((TCloseOperationResp)that);
+    return false;
+  }
+
+  public boolean equals(TCloseOperationResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TCloseOperationResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TCloseOperationResp typedOther = (TCloseOperationResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TCloseOperationResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TCloseOperationRespStandardSchemeFactory implements SchemeFactory {
+    public TCloseOperationRespStandardScheme getScheme() {
+      return new TCloseOperationRespStandardScheme();
+    }
+  }
+
+  private static class TCloseOperationRespStandardScheme extends StandardScheme<TCloseOperationResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TCloseOperationRespTupleSchemeFactory implements SchemeFactory {
+    public TCloseOperationRespTupleScheme getScheme() {
+      return new TCloseOperationRespTupleScheme();
+    }
+  }
+
+  private static class TCloseOperationRespTupleScheme extends TupleScheme<TCloseOperationResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
new file mode 100644
index 000000000000..027e8295436b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCloseSessionReq implements org.apache.thrift.TBase<TCloseSessionReq, TCloseSessionReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TCloseSessionReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TCloseSessionReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionReq.class, metaDataMap);
+  }
+
+  public TCloseSessionReq() {
+  }
+
+  public TCloseSessionReq(
+    TSessionHandle sessionHandle)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TCloseSessionReq(TCloseSessionReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+  }
+
+  public TCloseSessionReq deepCopy() {
+    return new TCloseSessionReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TCloseSessionReq)
+      return this.equals((TCloseSessionReq)that);
+    return false;
+  }
+
+  public boolean equals(TCloseSessionReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TCloseSessionReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TCloseSessionReq typedOther = (TCloseSessionReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TCloseSessionReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TCloseSessionReqStandardSchemeFactory implements SchemeFactory {
+    public TCloseSessionReqStandardScheme getScheme() {
+      return new TCloseSessionReqStandardScheme();
+    }
+  }
+
+  private static class TCloseSessionReqStandardScheme extends StandardScheme<TCloseSessionReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TCloseSessionReqTupleSchemeFactory implements SchemeFactory {
+    public TCloseSessionReqTupleScheme getScheme() {
+      return new TCloseSessionReqTupleScheme();
+    }
+  }
+
+  private static class TCloseSessionReqTupleScheme extends TupleScheme<TCloseSessionReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
new file mode 100644
index 000000000000..168c8fc775e3
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TCloseSessionResp implements org.apache.thrift.TBase<TCloseSessionResp, TCloseSessionResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TCloseSessionRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TCloseSessionRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionResp.class, metaDataMap);
+  }
+
+  public TCloseSessionResp() {
+  }
+
+  public TCloseSessionResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TCloseSessionResp(TCloseSessionResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+  }
+
+  public TCloseSessionResp deepCopy() {
+    return new TCloseSessionResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TCloseSessionResp)
+      return this.equals((TCloseSessionResp)that);
+    return false;
+  }
+
+  public boolean equals(TCloseSessionResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TCloseSessionResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TCloseSessionResp typedOther = (TCloseSessionResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TCloseSessionResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TCloseSessionRespStandardSchemeFactory implements SchemeFactory {
+    public TCloseSessionRespStandardScheme getScheme() {
+      return new TCloseSessionRespStandardScheme();
+    }
+  }
+
+  private static class TCloseSessionRespStandardScheme extends StandardScheme<TCloseSessionResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TCloseSessionRespTupleSchemeFactory implements SchemeFactory {
+    public TCloseSessionRespTupleScheme getScheme() {
+      return new TCloseSessionRespTupleScheme();
+    }
+  }
+
+  private static class TCloseSessionRespTupleScheme extends TupleScheme<TCloseSessionResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
new file mode 100644
index 000000000000..fc2171dc99e4
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
@@ -0,0 +1,732 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TColumn extends org.apache.thrift.TUnion<TColumn, TColumn._Fields> {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumn");
+  private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+  private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3);
+  private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4);
+  private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5);
+  private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6);
+  private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7);
+  private static final org.apache.thrift.protocol.TField BINARY_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryVal", org.apache.thrift.protocol.TType.STRUCT, (short)8);
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    BOOL_VAL((short)1, "boolVal"),
+    BYTE_VAL((short)2, "byteVal"),
+    I16_VAL((short)3, "i16Val"),
+    I32_VAL((short)4, "i32Val"),
+    I64_VAL((short)5, "i64Val"),
+    DOUBLE_VAL((short)6, "doubleVal"),
+    STRING_VAL((short)7, "stringVal"),
+    BINARY_VAL((short)8, "binaryVal");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // BOOL_VAL
+          return BOOL_VAL;
+        case 2: // BYTE_VAL
+          return BYTE_VAL;
+        case 3: // I16_VAL
+          return I16_VAL;
+        case 4: // I32_VAL
+          return I32_VAL;
+        case 5: // I64_VAL
+          return I64_VAL;
+        case 6: // DOUBLE_VAL
+          return DOUBLE_VAL;
+        case 7: // STRING_VAL
+          return STRING_VAL;
+        case 8: // BINARY_VAL
+          return BINARY_VAL;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolColumn.class)));
+    tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteColumn.class)));
+    tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Column.class)));
+    tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Column.class)));
+    tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Column.class)));
+    tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleColumn.class)));
+    tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringColumn.class)));
+    tmpMap.put(_Fields.BINARY_VAL, new org.apache.thrift.meta_data.FieldMetaData("binaryVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBinaryColumn.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumn.class, metaDataMap);
+  }
+
+  public TColumn() {
+    super();
+  }
+
+  public TColumn(TColumn._Fields setField, Object value) {
+    super(setField, value);
+  }
+
+  public TColumn(TColumn other) {
+    super(other);
+  }
+  public TColumn deepCopy() {
+    return new TColumn(this);
+  }
+
+  public static TColumn boolVal(TBoolColumn value) {
+    TColumn x = new TColumn();
+    x.setBoolVal(value);
+    return x;
+  }
+
+  public static TColumn byteVal(TByteColumn value) {
+    TColumn x = new TColumn();
+    x.setByteVal(value);
+    return x;
+  }
+
+  public static TColumn i16Val(TI16Column value) {
+    TColumn x = new TColumn();
+    x.setI16Val(value);
+    return x;
+  }
+
+  public static TColumn i32Val(TI32Column value) {
+    TColumn x = new TColumn();
+    x.setI32Val(value);
+    return x;
+  }
+
+  public static TColumn i64Val(TI64Column value) {
+    TColumn x = new TColumn();
+    x.setI64Val(value);
+    return x;
+  }
+
+  public static TColumn doubleVal(TDoubleColumn value) {
+    TColumn x = new TColumn();
+    x.setDoubleVal(value);
+    return x;
+  }
+
+  public static TColumn stringVal(TStringColumn value) {
+    TColumn x = new TColumn();
+    x.setStringVal(value);
+    return x;
+  }
+
+  public static TColumn binaryVal(TBinaryColumn value) {
+    TColumn x = new TColumn();
+    x.setBinaryVal(value);
+    return x;
+  }
+
+
+  @Override
+  protected void checkType(_Fields setField, Object value) throws ClassCastException {
+    switch (setField) {
+      case BOOL_VAL:
+        if (value instanceof TBoolColumn) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TBoolColumn for field 'boolVal', but got " + value.getClass().getSimpleName());
+      case BYTE_VAL:
+        if (value instanceof TByteColumn) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TByteColumn for field 'byteVal', but got " + value.getClass().getSimpleName());
+      case I16_VAL:
+        if (value instanceof TI16Column) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TI16Column for field 'i16Val', but got " + value.getClass().getSimpleName());
+      case I32_VAL:
+        if (value instanceof TI32Column) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TI32Column for field 'i32Val', but got " + value.getClass().getSimpleName());
+      case I64_VAL:
+        if (value instanceof TI64Column) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TI64Column for field 'i64Val', but got " + value.getClass().getSimpleName());
+      case DOUBLE_VAL:
+        if (value instanceof TDoubleColumn) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TDoubleColumn for field 'doubleVal', but got " + value.getClass().getSimpleName());
+      case STRING_VAL:
+        if (value instanceof TStringColumn) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TStringColumn for field 'stringVal', but got " + value.getClass().getSimpleName());
+      case BINARY_VAL:
+        if (value instanceof TBinaryColumn) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TBinaryColumn for field 'binaryVal', but got " + value.getClass().getSimpleName());
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(field.id);
+    if (setField != null) {
+      switch (setField) {
+        case BOOL_VAL:
+          if (field.type == BOOL_VAL_FIELD_DESC.type) {
+            TBoolColumn boolVal;
+            boolVal = new TBoolColumn();
+            boolVal.read(iprot);
+            return boolVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case BYTE_VAL:
+          if (field.type == BYTE_VAL_FIELD_DESC.type) {
+            TByteColumn byteVal;
+            byteVal = new TByteColumn();
+            byteVal.read(iprot);
+            return byteVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case I16_VAL:
+          if (field.type == I16_VAL_FIELD_DESC.type) {
+            TI16Column i16Val;
+            i16Val = new TI16Column();
+            i16Val.read(iprot);
+            return i16Val;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case I32_VAL:
+          if (field.type == I32_VAL_FIELD_DESC.type) {
+            TI32Column i32Val;
+            i32Val = new TI32Column();
+            i32Val.read(iprot);
+            return i32Val;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case I64_VAL:
+          if (field.type == I64_VAL_FIELD_DESC.type) {
+            TI64Column i64Val;
+            i64Val = new TI64Column();
+            i64Val.read(iprot);
+            return i64Val;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case DOUBLE_VAL:
+          if (field.type == DOUBLE_VAL_FIELD_DESC.type) {
+            TDoubleColumn doubleVal;
+            doubleVal = new TDoubleColumn();
+            doubleVal.read(iprot);
+            return doubleVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case STRING_VAL:
+          if (field.type == STRING_VAL_FIELD_DESC.type) {
+            TStringColumn stringVal;
+            stringVal = new TStringColumn();
+            stringVal.read(iprot);
+            return stringVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case BINARY_VAL:
+          if (field.type == BINARY_VAL_FIELD_DESC.type) {
+            TBinaryColumn binaryVal;
+            binaryVal = new TBinaryColumn();
+            binaryVal.read(iprot);
+            return binaryVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case BOOL_VAL:
+        TBoolColumn boolVal = (TBoolColumn)value_;
+        boolVal.write(oprot);
+        return;
+      case BYTE_VAL:
+        TByteColumn byteVal = (TByteColumn)value_;
+        byteVal.write(oprot);
+        return;
+      case I16_VAL:
+        TI16Column i16Val = (TI16Column)value_;
+        i16Val.write(oprot);
+        return;
+      case I32_VAL:
+        TI32Column i32Val = (TI32Column)value_;
+        i32Val.write(oprot);
+        return;
+      case I64_VAL:
+        TI64Column i64Val = (TI64Column)value_;
+        i64Val.write(oprot);
+        return;
+      case DOUBLE_VAL:
+        TDoubleColumn doubleVal = (TDoubleColumn)value_;
+        doubleVal.write(oprot);
+        return;
+      case STRING_VAL:
+        TStringColumn stringVal = (TStringColumn)value_;
+        stringVal.write(oprot);
+        return;
+      case BINARY_VAL:
+        TBinaryColumn binaryVal = (TBinaryColumn)value_;
+        binaryVal.write(oprot);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(fieldID);
+    if (setField != null) {
+      switch (setField) {
+        case BOOL_VAL:
+          TBoolColumn boolVal;
+          boolVal = new TBoolColumn();
+          boolVal.read(iprot);
+          return boolVal;
+        case BYTE_VAL:
+          TByteColumn byteVal;
+          byteVal = new TByteColumn();
+          byteVal.read(iprot);
+          return byteVal;
+        case I16_VAL:
+          TI16Column i16Val;
+          i16Val = new TI16Column();
+          i16Val.read(iprot);
+          return i16Val;
+        case I32_VAL:
+          TI32Column i32Val;
+          i32Val = new TI32Column();
+          i32Val.read(iprot);
+          return i32Val;
+        case I64_VAL:
+          TI64Column i64Val;
+          i64Val = new TI64Column();
+          i64Val.read(iprot);
+          return i64Val;
+        case DOUBLE_VAL:
+          TDoubleColumn doubleVal;
+          doubleVal = new TDoubleColumn();
+          doubleVal.read(iprot);
+          return doubleVal;
+        case STRING_VAL:
+          TStringColumn stringVal;
+          stringVal = new TStringColumn();
+          stringVal.read(iprot);
+          return stringVal;
+        case BINARY_VAL:
+          TBinaryColumn binaryVal;
+          binaryVal = new TBinaryColumn();
+          binaryVal.read(iprot);
+          return binaryVal;
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
+    }
+  }
+
+  @Override
+  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case BOOL_VAL:
+        TBoolColumn boolVal = (TBoolColumn)value_;
+        boolVal.write(oprot);
+        return;
+      case BYTE_VAL:
+        TByteColumn byteVal = (TByteColumn)value_;
+        byteVal.write(oprot);
+        return;
+      case I16_VAL:
+        TI16Column i16Val = (TI16Column)value_;
+        i16Val.write(oprot);
+        return;
+      case I32_VAL:
+        TI32Column i32Val = (TI32Column)value_;
+        i32Val.write(oprot);
+        return;
+      case I64_VAL:
+        TI64Column i64Val = (TI64Column)value_;
+        i64Val.write(oprot);
+        return;
+      case DOUBLE_VAL:
+        TDoubleColumn doubleVal = (TDoubleColumn)value_;
+        doubleVal.write(oprot);
+        return;
+      case STRING_VAL:
+        TStringColumn stringVal = (TStringColumn)value_;
+        stringVal.write(oprot);
+        return;
+      case BINARY_VAL:
+        TBinaryColumn binaryVal = (TBinaryColumn)value_;
+        binaryVal.write(oprot);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
+    switch (setField) {
+      case BOOL_VAL:
+        return BOOL_VAL_FIELD_DESC;
+      case BYTE_VAL:
+        return BYTE_VAL_FIELD_DESC;
+      case I16_VAL:
+        return I16_VAL_FIELD_DESC;
+      case I32_VAL:
+        return I32_VAL_FIELD_DESC;
+      case I64_VAL:
+        return I64_VAL_FIELD_DESC;
+      case DOUBLE_VAL:
+        return DOUBLE_VAL_FIELD_DESC;
+      case STRING_VAL:
+        return STRING_VAL_FIELD_DESC;
+      case BINARY_VAL:
+        return BINARY_VAL_FIELD_DESC;
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TStruct getStructDesc() {
+    return STRUCT_DESC;
+  }
+
+  @Override
+  protected _Fields enumForId(short id) {
+    return _Fields.findByThriftIdOrThrow(id);
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+
+  public TBoolColumn getBoolVal() {
+    if (getSetField() == _Fields.BOOL_VAL) {
+      return (TBoolColumn)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setBoolVal(TBoolColumn value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.BOOL_VAL;
+    value_ = value;
+  }
+
+  public TByteColumn getByteVal() {
+    if (getSetField() == _Fields.BYTE_VAL) {
+      return (TByteColumn)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setByteVal(TByteColumn value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.BYTE_VAL;
+    value_ = value;
+  }
+
+  public TI16Column getI16Val() {
+    if (getSetField() == _Fields.I16_VAL) {
+      return (TI16Column)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setI16Val(TI16Column value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.I16_VAL;
+    value_ = value;
+  }
+
+  public TI32Column getI32Val() {
+    if (getSetField() == _Fields.I32_VAL) {
+      return (TI32Column)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setI32Val(TI32Column value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.I32_VAL;
+    value_ = value;
+  }
+
+  public TI64Column getI64Val() {
+    if (getSetField() == _Fields.I64_VAL) {
+      return (TI64Column)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setI64Val(TI64Column value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.I64_VAL;
+    value_ = value;
+  }
+
+  public TDoubleColumn getDoubleVal() {
+    if (getSetField() == _Fields.DOUBLE_VAL) {
+      return (TDoubleColumn)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setDoubleVal(TDoubleColumn value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.DOUBLE_VAL;
+    value_ = value;
+  }
+
+  public TStringColumn getStringVal() {
+    if (getSetField() == _Fields.STRING_VAL) {
+      return (TStringColumn)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setStringVal(TStringColumn value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.STRING_VAL;
+    value_ = value;
+  }
+
+  public TBinaryColumn getBinaryVal() {
+    if (getSetField() == _Fields.BINARY_VAL) {
+      return (TBinaryColumn)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'binaryVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setBinaryVal(TBinaryColumn value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.BINARY_VAL;
+    value_ = value;
+  }
+
+  public boolean isSetBoolVal() {
+    return setField_ == _Fields.BOOL_VAL;
+  }
+
+
+  public boolean isSetByteVal() {
+    return setField_ == _Fields.BYTE_VAL;
+  }
+
+
+  public boolean isSetI16Val() {
+    return setField_ == _Fields.I16_VAL;
+  }
+
+
+  public boolean isSetI32Val() {
+    return setField_ == _Fields.I32_VAL;
+  }
+
+
+  public boolean isSetI64Val() {
+    return setField_ == _Fields.I64_VAL;
+  }
+
+
+  public boolean isSetDoubleVal() {
+    return setField_ == _Fields.DOUBLE_VAL;
+  }
+
+
+  public boolean isSetStringVal() {
+    return setField_ == _Fields.STRING_VAL;
+  }
+
+
+  public boolean isSetBinaryVal() {
+    return setField_ == _Fields.BINARY_VAL;
+  }
+
+
+  public boolean equals(Object other) {
+    if (other instanceof TColumn) {
+      return equals((TColumn)other);
+    } else {
+      return false;
+    }
+  }
+
+  public boolean equals(TColumn other) {
+    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
+  }
+
+  @Override
+  public int compareTo(TColumn other) {
+    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
+    if (lastComparison == 0) {
+      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
+    }
+    return lastComparison;
+  }
+
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    hcb.append(this.getClass().getName());
+    org.apache.thrift.TFieldIdEnum setField = getSetField();
+    if (setField != null) {
+      hcb.append(setField.getThriftFieldId());
+      Object value = getFieldValue();
+      if (value instanceof org.apache.thrift.TEnum) {
+        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
+      } else {
+        hcb.append(value);
+      }
+    }
+    return hcb.toHashCode();
+  }
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
new file mode 100644
index 000000000000..247db6489457
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
@@ -0,0 +1,700 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TColumnDesc implements org.apache.thrift.TBase<TColumnDesc, TColumnDesc._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnDesc");
+
+  private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)1);
+  private static final org.apache.thrift.protocol.TField TYPE_DESC_FIELD_DESC = new org.apache.thrift.protocol.TField("typeDesc", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+  private static final org.apache.thrift.protocol.TField POSITION_FIELD_DESC = new org.apache.thrift.protocol.TField("position", org.apache.thrift.protocol.TType.I32, (short)3);
+  private static final org.apache.thrift.protocol.TField COMMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("comment", org.apache.thrift.protocol.TType.STRING, (short)4);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TColumnDescStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TColumnDescTupleSchemeFactory());
+  }
+
+  private String columnName; // required
+  private TTypeDesc typeDesc; // required
+  private int position; // required
+  private String comment; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    COLUMN_NAME((short)1, "columnName"),
+    TYPE_DESC((short)2, "typeDesc"),
+    POSITION((short)3, "position"),
+    COMMENT((short)4, "comment");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // COLUMN_NAME
+          return COLUMN_NAME;
+        case 2: // TYPE_DESC
+          return TYPE_DESC;
+        case 3: // POSITION
+          return POSITION;
+        case 4: // COMMENT
+          return COMMENT;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __POSITION_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.COMMENT};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.TYPE_DESC, new org.apache.thrift.meta_data.FieldMetaData("typeDesc", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeDesc.class)));
+    tmpMap.put(_Fields.POSITION, new org.apache.thrift.meta_data.FieldMetaData("position", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
+    tmpMap.put(_Fields.COMMENT, new org.apache.thrift.meta_data.FieldMetaData("comment", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnDesc.class, metaDataMap);
+  }
+
+  public TColumnDesc() {
+  }
+
+  public TColumnDesc(
+    String columnName,
+    TTypeDesc typeDesc,
+    int position)
+  {
+    this();
+    this.columnName = columnName;
+    this.typeDesc = typeDesc;
+    this.position = position;
+    setPositionIsSet(true);
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TColumnDesc(TColumnDesc other) {
+    __isset_bitfield = other.__isset_bitfield;
+    if (other.isSetColumnName()) {
+      this.columnName = other.columnName;
+    }
+    if (other.isSetTypeDesc()) {
+      this.typeDesc = new TTypeDesc(other.typeDesc);
+    }
+    this.position = other.position;
+    if (other.isSetComment()) {
+      this.comment = other.comment;
+    }
+  }
+
+  public TColumnDesc deepCopy() {
+    return new TColumnDesc(this);
+  }
+
+  @Override
+  public void clear() {
+    this.columnName = null;
+    this.typeDesc = null;
+    setPositionIsSet(false);
+    this.position = 0;
+    this.comment = null;
+  }
+
+  public String getColumnName() {
+    return this.columnName;
+  }
+
+  public void setColumnName(String columnName) {
+    this.columnName = columnName;
+  }
+
+  public void unsetColumnName() {
+    this.columnName = null;
+  }
+
+  /** Returns true if field columnName is set (has been assigned a value) and false otherwise */
+  public boolean isSetColumnName() {
+    return this.columnName != null;
+  }
+
+  public void setColumnNameIsSet(boolean value) {
+    if (!value) {
+      this.columnName = null;
+    }
+  }
+
+  public TTypeDesc getTypeDesc() {
+    return this.typeDesc;
+  }
+
+  public void setTypeDesc(TTypeDesc typeDesc) {
+    this.typeDesc = typeDesc;
+  }
+
+  public void unsetTypeDesc() {
+    this.typeDesc = null;
+  }
+
+  /** Returns true if field typeDesc is set (has been assigned a value) and false otherwise */
+  public boolean isSetTypeDesc() {
+    return this.typeDesc != null;
+  }
+
+  public void setTypeDescIsSet(boolean value) {
+    if (!value) {
+      this.typeDesc = null;
+    }
+  }
+
+  public int getPosition() {
+    return this.position;
+  }
+
+  public void setPosition(int position) {
+    this.position = position;
+    setPositionIsSet(true);
+  }
+
+  public void unsetPosition() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __POSITION_ISSET_ID);
+  }
+
+  /** Returns true if field position is set (has been assigned a value) and false otherwise */
+  public boolean isSetPosition() {
+    return EncodingUtils.testBit(__isset_bitfield, __POSITION_ISSET_ID);
+  }
+
+  public void setPositionIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __POSITION_ISSET_ID, value);
+  }
+
+  public String getComment() {
+    return this.comment;
+  }
+
+  public void setComment(String comment) {
+    this.comment = comment;
+  }
+
+  public void unsetComment() {
+    this.comment = null;
+  }
+
+  /** Returns true if field comment is set (has been assigned a value) and false otherwise */
+  public boolean isSetComment() {
+    return this.comment != null;
+  }
+
+  public void setCommentIsSet(boolean value) {
+    if (!value) {
+      this.comment = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case COLUMN_NAME:
+      if (value == null) {
+        unsetColumnName();
+      } else {
+        setColumnName((String)value);
+      }
+      break;
+
+    case TYPE_DESC:
+      if (value == null) {
+        unsetTypeDesc();
+      } else {
+        setTypeDesc((TTypeDesc)value);
+      }
+      break;
+
+    case POSITION:
+      if (value == null) {
+        unsetPosition();
+      } else {
+        setPosition((Integer)value);
+      }
+      break;
+
+    case COMMENT:
+      if (value == null) {
+        unsetComment();
+      } else {
+        setComment((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case COLUMN_NAME:
+      return getColumnName();
+
+    case TYPE_DESC:
+      return getTypeDesc();
+
+    case POSITION:
+      return Integer.valueOf(getPosition());
+
+    case COMMENT:
+      return getComment();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case COLUMN_NAME:
+      return isSetColumnName();
+    case TYPE_DESC:
+      return isSetTypeDesc();
+    case POSITION:
+      return isSetPosition();
+    case COMMENT:
+      return isSetComment();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TColumnDesc)
+      return this.equals((TColumnDesc)that);
+    return false;
+  }
+
+  public boolean equals(TColumnDesc that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_columnName = true && this.isSetColumnName();
+    boolean that_present_columnName = true && that.isSetColumnName();
+    if (this_present_columnName || that_present_columnName) {
+      if (!(this_present_columnName && that_present_columnName))
+        return false;
+      if (!this.columnName.equals(that.columnName))
+        return false;
+    }
+
+    boolean this_present_typeDesc = true && this.isSetTypeDesc();
+    boolean that_present_typeDesc = true && that.isSetTypeDesc();
+    if (this_present_typeDesc || that_present_typeDesc) {
+      if (!(this_present_typeDesc && that_present_typeDesc))
+        return false;
+      if (!this.typeDesc.equals(that.typeDesc))
+        return false;
+    }
+
+    boolean this_present_position = true;
+    boolean that_present_position = true;
+    if (this_present_position || that_present_position) {
+      if (!(this_present_position && that_present_position))
+        return false;
+      if (this.position != that.position)
+        return false;
+    }
+
+    boolean this_present_comment = true && this.isSetComment();
+    boolean that_present_comment = true && that.isSetComment();
+    if (this_present_comment || that_present_comment) {
+      if (!(this_present_comment && that_present_comment))
+        return false;
+      if (!this.comment.equals(that.comment))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_columnName = true && (isSetColumnName());
+    builder.append(present_columnName);
+    if (present_columnName)
+      builder.append(columnName);
+
+    boolean present_typeDesc = true && (isSetTypeDesc());
+    builder.append(present_typeDesc);
+    if (present_typeDesc)
+      builder.append(typeDesc);
+
+    boolean present_position = true;
+    builder.append(present_position);
+    if (present_position)
+      builder.append(position);
+
+    boolean present_comment = true && (isSetComment());
+    builder.append(present_comment);
+    if (present_comment)
+      builder.append(comment);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TColumnDesc other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TColumnDesc typedOther = (TColumnDesc)other;
+
+    lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(typedOther.isSetColumnName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetColumnName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, typedOther.columnName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetTypeDesc()).compareTo(typedOther.isSetTypeDesc());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetTypeDesc()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeDesc, typedOther.typeDesc);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetPosition()).compareTo(typedOther.isSetPosition());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetPosition()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.position, typedOther.position);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetComment()).compareTo(typedOther.isSetComment());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetComment()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.comment, typedOther.comment);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TColumnDesc(");
+    boolean first = true;
+
+    sb.append("columnName:");
+    if (this.columnName == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.columnName);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("typeDesc:");
+    if (this.typeDesc == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.typeDesc);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("position:");
+    sb.append(this.position);
+    first = false;
+    if (isSetComment()) {
+      if (!first) sb.append(", ");
+      sb.append("comment:");
+      if (this.comment == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.comment);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetColumnName()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'columnName' is unset! Struct:" + toString());
+    }
+
+    if (!isSetTypeDesc()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeDesc' is unset! Struct:" + toString());
+    }
+
+    if (!isSetPosition()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'position' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (typeDesc != null) {
+      typeDesc.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TColumnDescStandardSchemeFactory implements SchemeFactory {
+    public TColumnDescStandardScheme getScheme() {
+      return new TColumnDescStandardScheme();
+    }
+  }
+
+  private static class TColumnDescStandardScheme extends StandardScheme<TColumnDesc> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TColumnDesc struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // COLUMN_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.columnName = iprot.readString();
+              struct.setColumnNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // TYPE_DESC
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.typeDesc = new TTypeDesc();
+              struct.typeDesc.read(iprot);
+              struct.setTypeDescIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // POSITION
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.position = iprot.readI32();
+              struct.setPositionIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // COMMENT
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.comment = iprot.readString();
+              struct.setCommentIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TColumnDesc struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.columnName != null) {
+        oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC);
+        oprot.writeString(struct.columnName);
+        oprot.writeFieldEnd();
+      }
+      if (struct.typeDesc != null) {
+        oprot.writeFieldBegin(TYPE_DESC_FIELD_DESC);
+        struct.typeDesc.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldBegin(POSITION_FIELD_DESC);
+      oprot.writeI32(struct.position);
+      oprot.writeFieldEnd();
+      if (struct.comment != null) {
+        if (struct.isSetComment()) {
+          oprot.writeFieldBegin(COMMENT_FIELD_DESC);
+          oprot.writeString(struct.comment);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TColumnDescTupleSchemeFactory implements SchemeFactory {
+    public TColumnDescTupleScheme getScheme() {
+      return new TColumnDescTupleScheme();
+    }
+  }
+
+  private static class TColumnDescTupleScheme extends TupleScheme<TColumnDesc> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeString(struct.columnName);
+      struct.typeDesc.write(oprot);
+      oprot.writeI32(struct.position);
+      BitSet optionals = new BitSet();
+      if (struct.isSetComment()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetComment()) {
+        oprot.writeString(struct.comment);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.columnName = iprot.readString();
+      struct.setColumnNameIsSet(true);
+      struct.typeDesc = new TTypeDesc();
+      struct.typeDesc.read(iprot);
+      struct.setTypeDescIsSet(true);
+      struct.position = iprot.readI32();
+      struct.setPositionIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.comment = iprot.readString();
+        struct.setCommentIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
new file mode 100644
index 000000000000..8504c6d608d4
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
@@ -0,0 +1,671 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TColumnValue extends org.apache.thrift.TUnion<TColumnValue, TColumnValue._Fields> {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnValue");
+  private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+  private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3);
+  private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4);
+  private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5);
+  private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6);
+  private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7);
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    BOOL_VAL((short)1, "boolVal"),
+    BYTE_VAL((short)2, "byteVal"),
+    I16_VAL((short)3, "i16Val"),
+    I32_VAL((short)4, "i32Val"),
+    I64_VAL((short)5, "i64Val"),
+    DOUBLE_VAL((short)6, "doubleVal"),
+    STRING_VAL((short)7, "stringVal");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // BOOL_VAL
+          return BOOL_VAL;
+        case 2: // BYTE_VAL
+          return BYTE_VAL;
+        case 3: // I16_VAL
+          return I16_VAL;
+        case 4: // I32_VAL
+          return I32_VAL;
+        case 5: // I64_VAL
+          return I64_VAL;
+        case 6: // DOUBLE_VAL
+          return DOUBLE_VAL;
+        case 7: // STRING_VAL
+          return STRING_VAL;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolValue.class)));
+    tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteValue.class)));
+    tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Value.class)));
+    tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Value.class)));
+    tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Value.class)));
+    tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleValue.class)));
+    tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringValue.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnValue.class, metaDataMap);
+  }
+
+  public TColumnValue() {
+    super();
+  }
+
+  public TColumnValue(TColumnValue._Fields setField, Object value) {
+    super(setField, value);
+  }
+
+  public TColumnValue(TColumnValue other) {
+    super(other);
+  }
+  public TColumnValue deepCopy() {
+    return new TColumnValue(this);
+  }
+
+  public static TColumnValue boolVal(TBoolValue value) {
+    TColumnValue x = new TColumnValue();
+    x.setBoolVal(value);
+    return x;
+  }
+
+  public static TColumnValue byteVal(TByteValue value) {
+    TColumnValue x = new TColumnValue();
+    x.setByteVal(value);
+    return x;
+  }
+
+  public static TColumnValue i16Val(TI16Value value) {
+    TColumnValue x = new TColumnValue();
+    x.setI16Val(value);
+    return x;
+  }
+
+  public static TColumnValue i32Val(TI32Value value) {
+    TColumnValue x = new TColumnValue();
+    x.setI32Val(value);
+    return x;
+  }
+
+  public static TColumnValue i64Val(TI64Value value) {
+    TColumnValue x = new TColumnValue();
+    x.setI64Val(value);
+    return x;
+  }
+
+  public static TColumnValue doubleVal(TDoubleValue value) {
+    TColumnValue x = new TColumnValue();
+    x.setDoubleVal(value);
+    return x;
+  }
+
+  public static TColumnValue stringVal(TStringValue value) {
+    TColumnValue x = new TColumnValue();
+    x.setStringVal(value);
+    return x;
+  }
+
+
+  @Override
+  protected void checkType(_Fields setField, Object value) throws ClassCastException {
+    switch (setField) {
+      case BOOL_VAL:
+        if (value instanceof TBoolValue) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TBoolValue for field 'boolVal', but got " + value.getClass().getSimpleName());
+      case BYTE_VAL:
+        if (value instanceof TByteValue) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TByteValue for field 'byteVal', but got " + value.getClass().getSimpleName());
+      case I16_VAL:
+        if (value instanceof TI16Value) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TI16Value for field 'i16Val', but got " + value.getClass().getSimpleName());
+      case I32_VAL:
+        if (value instanceof TI32Value) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TI32Value for field 'i32Val', but got " + value.getClass().getSimpleName());
+      case I64_VAL:
+        if (value instanceof TI64Value) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TI64Value for field 'i64Val', but got " + value.getClass().getSimpleName());
+      case DOUBLE_VAL:
+        if (value instanceof TDoubleValue) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TDoubleValue for field 'doubleVal', but got " + value.getClass().getSimpleName());
+      case STRING_VAL:
+        if (value instanceof TStringValue) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TStringValue for field 'stringVal', but got " + value.getClass().getSimpleName());
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(field.id);
+    if (setField != null) {
+      switch (setField) {
+        case BOOL_VAL:
+          if (field.type == BOOL_VAL_FIELD_DESC.type) {
+            TBoolValue boolVal;
+            boolVal = new TBoolValue();
+            boolVal.read(iprot);
+            return boolVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case BYTE_VAL:
+          if (field.type == BYTE_VAL_FIELD_DESC.type) {
+            TByteValue byteVal;
+            byteVal = new TByteValue();
+            byteVal.read(iprot);
+            return byteVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case I16_VAL:
+          if (field.type == I16_VAL_FIELD_DESC.type) {
+            TI16Value i16Val;
+            i16Val = new TI16Value();
+            i16Val.read(iprot);
+            return i16Val;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case I32_VAL:
+          if (field.type == I32_VAL_FIELD_DESC.type) {
+            TI32Value i32Val;
+            i32Val = new TI32Value();
+            i32Val.read(iprot);
+            return i32Val;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case I64_VAL:
+          if (field.type == I64_VAL_FIELD_DESC.type) {
+            TI64Value i64Val;
+            i64Val = new TI64Value();
+            i64Val.read(iprot);
+            return i64Val;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case DOUBLE_VAL:
+          if (field.type == DOUBLE_VAL_FIELD_DESC.type) {
+            TDoubleValue doubleVal;
+            doubleVal = new TDoubleValue();
+            doubleVal.read(iprot);
+            return doubleVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case STRING_VAL:
+          if (field.type == STRING_VAL_FIELD_DESC.type) {
+            TStringValue stringVal;
+            stringVal = new TStringValue();
+            stringVal.read(iprot);
+            return stringVal;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case BOOL_VAL:
+        TBoolValue boolVal = (TBoolValue)value_;
+        boolVal.write(oprot);
+        return;
+      case BYTE_VAL:
+        TByteValue byteVal = (TByteValue)value_;
+        byteVal.write(oprot);
+        return;
+      case I16_VAL:
+        TI16Value i16Val = (TI16Value)value_;
+        i16Val.write(oprot);
+        return;
+      case I32_VAL:
+        TI32Value i32Val = (TI32Value)value_;
+        i32Val.write(oprot);
+        return;
+      case I64_VAL:
+        TI64Value i64Val = (TI64Value)value_;
+        i64Val.write(oprot);
+        return;
+      case DOUBLE_VAL:
+        TDoubleValue doubleVal = (TDoubleValue)value_;
+        doubleVal.write(oprot);
+        return;
+      case STRING_VAL:
+        TStringValue stringVal = (TStringValue)value_;
+        stringVal.write(oprot);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(fieldID);
+    if (setField != null) {
+      switch (setField) {
+        case BOOL_VAL:
+          TBoolValue boolVal;
+          boolVal = new TBoolValue();
+          boolVal.read(iprot);
+          return boolVal;
+        case BYTE_VAL:
+          TByteValue byteVal;
+          byteVal = new TByteValue();
+          byteVal.read(iprot);
+          return byteVal;
+        case I16_VAL:
+          TI16Value i16Val;
+          i16Val = new TI16Value();
+          i16Val.read(iprot);
+          return i16Val;
+        case I32_VAL:
+          TI32Value i32Val;
+          i32Val = new TI32Value();
+          i32Val.read(iprot);
+          return i32Val;
+        case I64_VAL:
+          TI64Value i64Val;
+          i64Val = new TI64Value();
+          i64Val.read(iprot);
+          return i64Val;
+        case DOUBLE_VAL:
+          TDoubleValue doubleVal;
+          doubleVal = new TDoubleValue();
+          doubleVal.read(iprot);
+          return doubleVal;
+        case STRING_VAL:
+          TStringValue stringVal;
+          stringVal = new TStringValue();
+          stringVal.read(iprot);
+          return stringVal;
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
+    }
+  }
+
+  @Override
+  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case BOOL_VAL:
+        TBoolValue boolVal = (TBoolValue)value_;
+        boolVal.write(oprot);
+        return;
+      case BYTE_VAL:
+        TByteValue byteVal = (TByteValue)value_;
+        byteVal.write(oprot);
+        return;
+      case I16_VAL:
+        TI16Value i16Val = (TI16Value)value_;
+        i16Val.write(oprot);
+        return;
+      case I32_VAL:
+        TI32Value i32Val = (TI32Value)value_;
+        i32Val.write(oprot);
+        return;
+      case I64_VAL:
+        TI64Value i64Val = (TI64Value)value_;
+        i64Val.write(oprot);
+        return;
+      case DOUBLE_VAL:
+        TDoubleValue doubleVal = (TDoubleValue)value_;
+        doubleVal.write(oprot);
+        return;
+      case STRING_VAL:
+        TStringValue stringVal = (TStringValue)value_;
+        stringVal.write(oprot);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
+    switch (setField) {
+      case BOOL_VAL:
+        return BOOL_VAL_FIELD_DESC;
+      case BYTE_VAL:
+        return BYTE_VAL_FIELD_DESC;
+      case I16_VAL:
+        return I16_VAL_FIELD_DESC;
+      case I32_VAL:
+        return I32_VAL_FIELD_DESC;
+      case I64_VAL:
+        return I64_VAL_FIELD_DESC;
+      case DOUBLE_VAL:
+        return DOUBLE_VAL_FIELD_DESC;
+      case STRING_VAL:
+        return STRING_VAL_FIELD_DESC;
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TStruct getStructDesc() {
+    return STRUCT_DESC;
+  }
+
+  @Override
+  protected _Fields enumForId(short id) {
+    return _Fields.findByThriftIdOrThrow(id);
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+
+  public TBoolValue getBoolVal() {
+    if (getSetField() == _Fields.BOOL_VAL) {
+      return (TBoolValue)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setBoolVal(TBoolValue value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.BOOL_VAL;
+    value_ = value;
+  }
+
+  public TByteValue getByteVal() {
+    if (getSetField() == _Fields.BYTE_VAL) {
+      return (TByteValue)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setByteVal(TByteValue value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.BYTE_VAL;
+    value_ = value;
+  }
+
+  public TI16Value getI16Val() {
+    if (getSetField() == _Fields.I16_VAL) {
+      return (TI16Value)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setI16Val(TI16Value value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.I16_VAL;
+    value_ = value;
+  }
+
+  public TI32Value getI32Val() {
+    if (getSetField() == _Fields.I32_VAL) {
+      return (TI32Value)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setI32Val(TI32Value value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.I32_VAL;
+    value_ = value;
+  }
+
+  public TI64Value getI64Val() {
+    if (getSetField() == _Fields.I64_VAL) {
+      return (TI64Value)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setI64Val(TI64Value value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.I64_VAL;
+    value_ = value;
+  }
+
+  public TDoubleValue getDoubleVal() {
+    if (getSetField() == _Fields.DOUBLE_VAL) {
+      return (TDoubleValue)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setDoubleVal(TDoubleValue value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.DOUBLE_VAL;
+    value_ = value;
+  }
+
+  public TStringValue getStringVal() {
+    if (getSetField() == _Fields.STRING_VAL) {
+      return (TStringValue)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setStringVal(TStringValue value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.STRING_VAL;
+    value_ = value;
+  }
+
+  public boolean isSetBoolVal() {
+    return setField_ == _Fields.BOOL_VAL;
+  }
+
+
+  public boolean isSetByteVal() {
+    return setField_ == _Fields.BYTE_VAL;
+  }
+
+
+  public boolean isSetI16Val() {
+    return setField_ == _Fields.I16_VAL;
+  }
+
+
+  public boolean isSetI32Val() {
+    return setField_ == _Fields.I32_VAL;
+  }
+
+
+  public boolean isSetI64Val() {
+    return setField_ == _Fields.I64_VAL;
+  }
+
+
+  public boolean isSetDoubleVal() {
+    return setField_ == _Fields.DOUBLE_VAL;
+  }
+
+
+  public boolean isSetStringVal() {
+    return setField_ == _Fields.STRING_VAL;
+  }
+
+
+  public boolean equals(Object other) {
+    if (other instanceof TColumnValue) {
+      return equals((TColumnValue)other);
+    } else {
+      return false;
+    }
+  }
+
+  public boolean equals(TColumnValue other) {
+    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
+  }
+
+  @Override
+  public int compareTo(TColumnValue other) {
+    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
+    if (lastComparison == 0) {
+      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
+    }
+    return lastComparison;
+  }
+
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    hcb.append(this.getClass().getName());
+    org.apache.thrift.TFieldIdEnum setField = getSetField();
+    if (setField != null) {
+      hcb.append(setField.getThriftFieldId());
+      Object value = getFieldValue();
+      if (value instanceof org.apache.thrift.TEnum) {
+        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
+      } else {
+        hcb.append(value);
+      }
+    }
+    return hcb.toHashCode();
+  }
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
new file mode 100644
index 000000000000..4fc54544c1be
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
@@ -0,0 +1,548 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TDoubleColumn implements org.apache.thrift.TBase<TDoubleColumn, TDoubleColumn._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleColumn");
+
+  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TDoubleColumnStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TDoubleColumnTupleSchemeFactory());
+  }
+
+  private List<Double> values; // required
+  private ByteBuffer nulls; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUES((short)1, "values"),
+    NULLS((short)2, "nulls");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUES
+          return VALUES;
+        case 2: // NULLS
+          return NULLS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))));
+    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleColumn.class, metaDataMap);
+  }
+
+  public TDoubleColumn() {
+  }
+
+  public TDoubleColumn(
+    List<Double> values,
+    ByteBuffer nulls)
+  {
+    this();
+    this.values = values;
+    this.nulls = nulls;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TDoubleColumn(TDoubleColumn other) {
+    if (other.isSetValues()) {
+      List<Double> __this__values = new ArrayList<Double>();
+      for (Double other_element : other.values) {
+        __this__values.add(other_element);
+      }
+      this.values = __this__values;
+    }
+    if (other.isSetNulls()) {
+      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
+;
+    }
+  }
+
+  public TDoubleColumn deepCopy() {
+    return new TDoubleColumn(this);
+  }
+
+  @Override
+  public void clear() {
+    this.values = null;
+    this.nulls = null;
+  }
+
+  public int getValuesSize() {
+    return (this.values == null) ? 0 : this.values.size();
+  }
+
+  public java.util.Iterator<Double> getValuesIterator() {
+    return (this.values == null) ? null : this.values.iterator();
+  }
+
+  public void addToValues(double elem) {
+    if (this.values == null) {
+      this.values = new ArrayList<Double>();
+    }
+    this.values.add(elem);
+  }
+
+  public List<Double> getValues() {
+    return this.values;
+  }
+
+  public void setValues(List<Double> values) {
+    this.values = values;
+  }
+
+  public void unsetValues() {
+    this.values = null;
+  }
+
+  /** Returns true if field values is set (has been assigned a value) and false otherwise */
+  public boolean isSetValues() {
+    return this.values != null;
+  }
+
+  public void setValuesIsSet(boolean value) {
+    if (!value) {
+      this.values = null;
+    }
+  }
+
+  public byte[] getNulls() {
+    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
+    return nulls == null ? null : nulls.array();
+  }
+
+  public ByteBuffer bufferForNulls() {
+    return nulls;
+  }
+
+  public void setNulls(byte[] nulls) {
+    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
+  }
+
+  public void setNulls(ByteBuffer nulls) {
+    this.nulls = nulls;
+  }
+
+  public void unsetNulls() {
+    this.nulls = null;
+  }
+
+  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
+  public boolean isSetNulls() {
+    return this.nulls != null;
+  }
+
+  public void setNullsIsSet(boolean value) {
+    if (!value) {
+      this.nulls = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUES:
+      if (value == null) {
+        unsetValues();
+      } else {
+        setValues((List<Double>)value);
+      }
+      break;
+
+    case NULLS:
+      if (value == null) {
+        unsetNulls();
+      } else {
+        setNulls((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUES:
+      return getValues();
+
+    case NULLS:
+      return getNulls();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUES:
+      return isSetValues();
+    case NULLS:
+      return isSetNulls();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TDoubleColumn)
+      return this.equals((TDoubleColumn)that);
+    return false;
+  }
+
+  public boolean equals(TDoubleColumn that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_values = true && this.isSetValues();
+    boolean that_present_values = true && that.isSetValues();
+    if (this_present_values || that_present_values) {
+      if (!(this_present_values && that_present_values))
+        return false;
+      if (!this.values.equals(that.values))
+        return false;
+    }
+
+    boolean this_present_nulls = true && this.isSetNulls();
+    boolean that_present_nulls = true && that.isSetNulls();
+    if (this_present_nulls || that_present_nulls) {
+      if (!(this_present_nulls && that_present_nulls))
+        return false;
+      if (!this.nulls.equals(that.nulls))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_values = true && (isSetValues());
+    builder.append(present_values);
+    if (present_values)
+      builder.append(values);
+
+    boolean present_nulls = true && (isSetNulls());
+    builder.append(present_nulls);
+    if (present_nulls)
+      builder.append(nulls);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TDoubleColumn other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TDoubleColumn typedOther = (TDoubleColumn)other;
+
+    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValues()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNulls()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TDoubleColumn(");
+    boolean first = true;
+
+    sb.append("values:");
+    if (this.values == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.values);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("nulls:");
+    if (this.nulls == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetValues()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
+    }
+
+    if (!isSetNulls()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TDoubleColumnStandardSchemeFactory implements SchemeFactory {
+    public TDoubleColumnStandardScheme getScheme() {
+      return new TDoubleColumnStandardScheme();
+    }
+  }
+
+  private static class TDoubleColumnStandardScheme extends StandardScheme<TDoubleColumn> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleColumn struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list94 = iprot.readListBegin();
+                struct.values = new ArrayList<Double>(_list94.size);
+                for (int _i95 = 0; _i95 < _list94.size; ++_i95)
+                {
+                  double _elem96; // optional
+                  _elem96 = iprot.readDouble();
+                  struct.values.add(_elem96);
+                }
+                iprot.readListEnd();
+              }
+              struct.setValuesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // NULLS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.nulls = iprot.readBinary();
+              struct.setNullsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleColumn struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.values != null) {
+        oprot.writeFieldBegin(VALUES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, struct.values.size()));
+          for (double _iter97 : struct.values)
+          {
+            oprot.writeDouble(_iter97);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.nulls != null) {
+        oprot.writeFieldBegin(NULLS_FIELD_DESC);
+        oprot.writeBinary(struct.nulls);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TDoubleColumnTupleSchemeFactory implements SchemeFactory {
+    public TDoubleColumnTupleScheme getScheme() {
+      return new TDoubleColumnTupleScheme();
+    }
+  }
+
+  private static class TDoubleColumnTupleScheme extends TupleScheme<TDoubleColumn> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.values.size());
+        for (double _iter98 : struct.values)
+        {
+          oprot.writeDouble(_iter98);
+        }
+      }
+      oprot.writeBinary(struct.nulls);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list99 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, iprot.readI32());
+        struct.values = new ArrayList<Double>(_list99.size);
+        for (int _i100 = 0; _i100 < _list99.size; ++_i100)
+        {
+          double _elem101; // optional
+          _elem101 = iprot.readDouble();
+          struct.values.add(_elem101);
+        }
+      }
+      struct.setValuesIsSet(true);
+      struct.nulls = iprot.readBinary();
+      struct.setNullsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
new file mode 100644
index 000000000000..d21573633ef5
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
@@ -0,0 +1,386 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TDoubleValue implements org.apache.thrift.TBase<TDoubleValue, TDoubleValue._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleValue");
+
+  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.DOUBLE, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TDoubleValueStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TDoubleValueTupleSchemeFactory());
+  }
+
+  private double value; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUE((short)1, "value");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUE
+          return VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __VALUE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.VALUE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleValue.class, metaDataMap);
+  }
+
+  public TDoubleValue() {
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TDoubleValue(TDoubleValue other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.value = other.value;
+  }
+
+  public TDoubleValue deepCopy() {
+    return new TDoubleValue(this);
+  }
+
+  @Override
+  public void clear() {
+    setValueIsSet(false);
+    this.value = 0.0;
+  }
+
+  public double getValue() {
+    return this.value;
+  }
+
+  public void setValue(double value) {
+    this.value = value;
+    setValueIsSet(true);
+  }
+
+  public void unsetValue() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  /** Returns true if field value is set (has been assigned a value) and false otherwise */
+  public boolean isSetValue() {
+    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  public void setValueIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUE:
+      if (value == null) {
+        unsetValue();
+      } else {
+        setValue((Double)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUE:
+      return Double.valueOf(getValue());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUE:
+      return isSetValue();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TDoubleValue)
+      return this.equals((TDoubleValue)that);
+    return false;
+  }
+
+  public boolean equals(TDoubleValue that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_value = true && this.isSetValue();
+    boolean that_present_value = true && that.isSetValue();
+    if (this_present_value || that_present_value) {
+      if (!(this_present_value && that_present_value))
+        return false;
+      if (this.value != that.value)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_value = true && (isSetValue());
+    builder.append(present_value);
+    if (present_value)
+      builder.append(value);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TDoubleValue other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TDoubleValue typedOther = (TDoubleValue)other;
+
+    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValue()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TDoubleValue(");
+    boolean first = true;
+
+    if (isSetValue()) {
+      sb.append("value:");
+      sb.append(this.value);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TDoubleValueStandardSchemeFactory implements SchemeFactory {
+    public TDoubleValueStandardScheme getScheme() {
+      return new TDoubleValueStandardScheme();
+    }
+  }
+
+  private static class TDoubleValueStandardScheme extends StandardScheme<TDoubleValue> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleValue struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUE
+            if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) {
+              struct.value = iprot.readDouble();
+              struct.setValueIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleValue struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.isSetValue()) {
+        oprot.writeFieldBegin(VALUE_FIELD_DESC);
+        oprot.writeDouble(struct.value);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TDoubleValueTupleSchemeFactory implements SchemeFactory {
+    public TDoubleValueTupleScheme getScheme() {
+      return new TDoubleValueTupleScheme();
+    }
+  }
+
+  private static class TDoubleValueTupleScheme extends TupleScheme<TDoubleValue> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetValue()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetValue()) {
+        oprot.writeDouble(struct.value);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.value = iprot.readDouble();
+        struct.setValueIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
new file mode 100644
index 000000000000..4f157ad5a645
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
@@ -0,0 +1,769 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TExecuteStatementReq implements org.apache.thrift.TBase<TExecuteStatementReq, TExecuteStatementReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField STATEMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("statement", org.apache.thrift.protocol.TType.STRING, (short)2);
+  private static final org.apache.thrift.protocol.TField CONF_OVERLAY_FIELD_DESC = new org.apache.thrift.protocol.TField("confOverlay", org.apache.thrift.protocol.TType.MAP, (short)3);
+  private static final org.apache.thrift.protocol.TField RUN_ASYNC_FIELD_DESC = new org.apache.thrift.protocol.TField("runAsync", org.apache.thrift.protocol.TType.BOOL, (short)4);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TExecuteStatementReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TExecuteStatementReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private String statement; // required
+  private Map<String,String> confOverlay; // optional
+  private boolean runAsync; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    STATEMENT((short)2, "statement"),
+    CONF_OVERLAY((short)3, "confOverlay"),
+    RUN_ASYNC((short)4, "runAsync");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // STATEMENT
+          return STATEMENT;
+        case 3: // CONF_OVERLAY
+          return CONF_OVERLAY;
+        case 4: // RUN_ASYNC
+          return RUN_ASYNC;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __RUNASYNC_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.CONF_OVERLAY,_Fields.RUN_ASYNC};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.STATEMENT, new org.apache.thrift.meta_data.FieldMetaData("statement", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.CONF_OVERLAY, new org.apache.thrift.meta_data.FieldMetaData("confOverlay", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
+    tmpMap.put(_Fields.RUN_ASYNC, new org.apache.thrift.meta_data.FieldMetaData("runAsync", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementReq.class, metaDataMap);
+  }
+
+  public TExecuteStatementReq() {
+    this.runAsync = false;
+
+  }
+
+  public TExecuteStatementReq(
+    TSessionHandle sessionHandle,
+    String statement)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+    this.statement = statement;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TExecuteStatementReq(TExecuteStatementReq other) {
+    __isset_bitfield = other.__isset_bitfield;
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetStatement()) {
+      this.statement = other.statement;
+    }
+    if (other.isSetConfOverlay()) {
+      Map<String,String> __this__confOverlay = new HashMap<String,String>();
+      for (Map.Entry<String, String> other_element : other.confOverlay.entrySet()) {
+
+        String other_element_key = other_element.getKey();
+        String other_element_value = other_element.getValue();
+
+        String __this__confOverlay_copy_key = other_element_key;
+
+        String __this__confOverlay_copy_value = other_element_value;
+
+        __this__confOverlay.put(__this__confOverlay_copy_key, __this__confOverlay_copy_value);
+      }
+      this.confOverlay = __this__confOverlay;
+    }
+    this.runAsync = other.runAsync;
+  }
+
+  public TExecuteStatementReq deepCopy() {
+    return new TExecuteStatementReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.statement = null;
+    this.confOverlay = null;
+    this.runAsync = false;
+
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public String getStatement() {
+    return this.statement;
+  }
+
+  public void setStatement(String statement) {
+    this.statement = statement;
+  }
+
+  public void unsetStatement() {
+    this.statement = null;
+  }
+
+  /** Returns true if field statement is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatement() {
+    return this.statement != null;
+  }
+
+  public void setStatementIsSet(boolean value) {
+    if (!value) {
+      this.statement = null;
+    }
+  }
+
+  public int getConfOverlaySize() {
+    return (this.confOverlay == null) ? 0 : this.confOverlay.size();
+  }
+
+  public void putToConfOverlay(String key, String val) {
+    if (this.confOverlay == null) {
+      this.confOverlay = new HashMap<String,String>();
+    }
+    this.confOverlay.put(key, val);
+  }
+
+  public Map<String,String> getConfOverlay() {
+    return this.confOverlay;
+  }
+
+  public void setConfOverlay(Map<String,String> confOverlay) {
+    this.confOverlay = confOverlay;
+  }
+
+  public void unsetConfOverlay() {
+    this.confOverlay = null;
+  }
+
+  /** Returns true if field confOverlay is set (has been assigned a value) and false otherwise */
+  public boolean isSetConfOverlay() {
+    return this.confOverlay != null;
+  }
+
+  public void setConfOverlayIsSet(boolean value) {
+    if (!value) {
+      this.confOverlay = null;
+    }
+  }
+
+  public boolean isRunAsync() {
+    return this.runAsync;
+  }
+
+  public void setRunAsync(boolean runAsync) {
+    this.runAsync = runAsync;
+    setRunAsyncIsSet(true);
+  }
+
+  public void unsetRunAsync() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __RUNASYNC_ISSET_ID);
+  }
+
+  /** Returns true if field runAsync is set (has been assigned a value) and false otherwise */
+  public boolean isSetRunAsync() {
+    return EncodingUtils.testBit(__isset_bitfield, __RUNASYNC_ISSET_ID);
+  }
+
+  public void setRunAsyncIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __RUNASYNC_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case STATEMENT:
+      if (value == null) {
+        unsetStatement();
+      } else {
+        setStatement((String)value);
+      }
+      break;
+
+    case CONF_OVERLAY:
+      if (value == null) {
+        unsetConfOverlay();
+      } else {
+        setConfOverlay((Map<String,String>)value);
+      }
+      break;
+
+    case RUN_ASYNC:
+      if (value == null) {
+        unsetRunAsync();
+      } else {
+        setRunAsync((Boolean)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case STATEMENT:
+      return getStatement();
+
+    case CONF_OVERLAY:
+      return getConfOverlay();
+
+    case RUN_ASYNC:
+      return Boolean.valueOf(isRunAsync());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case STATEMENT:
+      return isSetStatement();
+    case CONF_OVERLAY:
+      return isSetConfOverlay();
+    case RUN_ASYNC:
+      return isSetRunAsync();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TExecuteStatementReq)
+      return this.equals((TExecuteStatementReq)that);
+    return false;
+  }
+
+  public boolean equals(TExecuteStatementReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_statement = true && this.isSetStatement();
+    boolean that_present_statement = true && that.isSetStatement();
+    if (this_present_statement || that_present_statement) {
+      if (!(this_present_statement && that_present_statement))
+        return false;
+      if (!this.statement.equals(that.statement))
+        return false;
+    }
+
+    boolean this_present_confOverlay = true && this.isSetConfOverlay();
+    boolean that_present_confOverlay = true && that.isSetConfOverlay();
+    if (this_present_confOverlay || that_present_confOverlay) {
+      if (!(this_present_confOverlay && that_present_confOverlay))
+        return false;
+      if (!this.confOverlay.equals(that.confOverlay))
+        return false;
+    }
+
+    boolean this_present_runAsync = true && this.isSetRunAsync();
+    boolean that_present_runAsync = true && that.isSetRunAsync();
+    if (this_present_runAsync || that_present_runAsync) {
+      if (!(this_present_runAsync && that_present_runAsync))
+        return false;
+      if (this.runAsync != that.runAsync)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_statement = true && (isSetStatement());
+    builder.append(present_statement);
+    if (present_statement)
+      builder.append(statement);
+
+    boolean present_confOverlay = true && (isSetConfOverlay());
+    builder.append(present_confOverlay);
+    if (present_confOverlay)
+      builder.append(confOverlay);
+
+    boolean present_runAsync = true && (isSetRunAsync());
+    builder.append(present_runAsync);
+    if (present_runAsync)
+      builder.append(runAsync);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TExecuteStatementReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TExecuteStatementReq typedOther = (TExecuteStatementReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetStatement()).compareTo(typedOther.isSetStatement());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatement()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statement, typedOther.statement);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetConfOverlay()).compareTo(typedOther.isSetConfOverlay());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetConfOverlay()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.confOverlay, typedOther.confOverlay);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetRunAsync()).compareTo(typedOther.isSetRunAsync());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetRunAsync()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.runAsync, typedOther.runAsync);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TExecuteStatementReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("statement:");
+    if (this.statement == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.statement);
+    }
+    first = false;
+    if (isSetConfOverlay()) {
+      if (!first) sb.append(", ");
+      sb.append("confOverlay:");
+      if (this.confOverlay == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.confOverlay);
+      }
+      first = false;
+    }
+    if (isSetRunAsync()) {
+      if (!first) sb.append(", ");
+      sb.append("runAsync:");
+      sb.append(this.runAsync);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    if (!isSetStatement()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'statement' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TExecuteStatementReqStandardSchemeFactory implements SchemeFactory {
+    public TExecuteStatementReqStandardScheme getScheme() {
+      return new TExecuteStatementReqStandardScheme();
+    }
+  }
+
+  private static class TExecuteStatementReqStandardScheme extends StandardScheme<TExecuteStatementReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // STATEMENT
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.statement = iprot.readString();
+              struct.setStatementIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // CONF_OVERLAY
+            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
+              {
+                org.apache.thrift.protocol.TMap _map162 = iprot.readMapBegin();
+                struct.confOverlay = new HashMap<String,String>(2*_map162.size);
+                for (int _i163 = 0; _i163 < _map162.size; ++_i163)
+                {
+                  String _key164; // required
+                  String _val165; // required
+                  _key164 = iprot.readString();
+                  _val165 = iprot.readString();
+                  struct.confOverlay.put(_key164, _val165);
+                }
+                iprot.readMapEnd();
+              }
+              struct.setConfOverlayIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // RUN_ASYNC
+            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
+              struct.runAsync = iprot.readBool();
+              struct.setRunAsyncIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.statement != null) {
+        oprot.writeFieldBegin(STATEMENT_FIELD_DESC);
+        oprot.writeString(struct.statement);
+        oprot.writeFieldEnd();
+      }
+      if (struct.confOverlay != null) {
+        if (struct.isSetConfOverlay()) {
+          oprot.writeFieldBegin(CONF_OVERLAY_FIELD_DESC);
+          {
+            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.confOverlay.size()));
+            for (Map.Entry<String, String> _iter166 : struct.confOverlay.entrySet())
+            {
+              oprot.writeString(_iter166.getKey());
+              oprot.writeString(_iter166.getValue());
+            }
+            oprot.writeMapEnd();
+          }
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.isSetRunAsync()) {
+        oprot.writeFieldBegin(RUN_ASYNC_FIELD_DESC);
+        oprot.writeBool(struct.runAsync);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TExecuteStatementReqTupleSchemeFactory implements SchemeFactory {
+    public TExecuteStatementReqTupleScheme getScheme() {
+      return new TExecuteStatementReqTupleScheme();
+    }
+  }
+
+  private static class TExecuteStatementReqTupleScheme extends TupleScheme<TExecuteStatementReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      oprot.writeString(struct.statement);
+      BitSet optionals = new BitSet();
+      if (struct.isSetConfOverlay()) {
+        optionals.set(0);
+      }
+      if (struct.isSetRunAsync()) {
+        optionals.set(1);
+      }
+      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetConfOverlay()) {
+        {
+          oprot.writeI32(struct.confOverlay.size());
+          for (Map.Entry<String, String> _iter167 : struct.confOverlay.entrySet())
+          {
+            oprot.writeString(_iter167.getKey());
+            oprot.writeString(_iter167.getValue());
+          }
+        }
+      }
+      if (struct.isSetRunAsync()) {
+        oprot.writeBool(struct.runAsync);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      struct.statement = iprot.readString();
+      struct.setStatementIsSet(true);
+      BitSet incoming = iprot.readBitSet(2);
+      if (incoming.get(0)) {
+        {
+          org.apache.thrift.protocol.TMap _map168 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+          struct.confOverlay = new HashMap<String,String>(2*_map168.size);
+          for (int _i169 = 0; _i169 < _map168.size; ++_i169)
+          {
+            String _key170; // required
+            String _val171; // required
+            _key170 = iprot.readString();
+            _val171 = iprot.readString();
+            struct.confOverlay.put(_key170, _val171);
+          }
+        }
+        struct.setConfOverlayIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.runAsync = iprot.readBool();
+        struct.setRunAsyncIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
new file mode 100644
index 000000000000..fdde51e70f78
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TExecuteStatementResp implements org.apache.thrift.TBase<TExecuteStatementResp, TExecuteStatementResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TExecuteStatementRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TExecuteStatementRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationHandle operationHandle; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    OPERATION_HANDLE((short)2, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementResp.class, metaDataMap);
+  }
+
+  public TExecuteStatementResp() {
+  }
+
+  public TExecuteStatementResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TExecuteStatementResp(TExecuteStatementResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TExecuteStatementResp deepCopy() {
+    return new TExecuteStatementResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationHandle = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TExecuteStatementResp)
+      return this.equals((TExecuteStatementResp)that);
+    return false;
+  }
+
+  public boolean equals(TExecuteStatementResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TExecuteStatementResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TExecuteStatementResp typedOther = (TExecuteStatementResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TExecuteStatementResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("operationHandle:");
+      if (this.operationHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationHandle);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TExecuteStatementRespStandardSchemeFactory implements SchemeFactory {
+    public TExecuteStatementRespStandardScheme getScheme() {
+      return new TExecuteStatementRespStandardScheme();
+    }
+  }
+
+  private static class TExecuteStatementRespStandardScheme extends StandardScheme<TExecuteStatementResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationHandle != null) {
+        if (struct.isSetOperationHandle()) {
+          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+          struct.operationHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TExecuteStatementRespTupleSchemeFactory implements SchemeFactory {
+    public TExecuteStatementRespTupleScheme getScheme() {
+      return new TExecuteStatementRespTupleScheme();
+    }
+  }
+
+  private static class TExecuteStatementRespTupleScheme extends TupleScheme<TExecuteStatementResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationHandle()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetOperationHandle()) {
+        struct.operationHandle.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.operationHandle = new TOperationHandle();
+        struct.operationHandle.read(iprot);
+        struct.setOperationHandleIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
new file mode 100644
index 000000000000..b2a22effd91a
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
@@ -0,0 +1,57 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.thrift.TEnum;
+
+public enum TFetchOrientation implements org.apache.thrift.TEnum {
+  FETCH_NEXT(0),
+  FETCH_PRIOR(1),
+  FETCH_RELATIVE(2),
+  FETCH_ABSOLUTE(3),
+  FETCH_FIRST(4),
+  FETCH_LAST(5);
+
+  private final int value;
+
+  private TFetchOrientation(int value) {
+    this.value = value;
+  }
+
+  /**
+   * Get the integer value of this enum value, as defined in the Thrift IDL.
+   */
+  public int getValue() {
+    return value;
+  }
+
+  /**
+   * Find a the enum type by its integer value, as defined in the Thrift IDL.
+   * @return null if the value is not found.
+   */
+  public static TFetchOrientation findByValue(int value) { 
+    switch (value) {
+      case 0:
+        return FETCH_NEXT;
+      case 1:
+        return FETCH_PRIOR;
+      case 2:
+        return FETCH_RELATIVE;
+      case 3:
+        return FETCH_ABSOLUTE;
+      case 4:
+        return FETCH_FIRST;
+      case 5:
+        return FETCH_LAST;
+      default:
+        return null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
new file mode 100644
index 000000000000..068711fc4444
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
@@ -0,0 +1,710 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TFetchResultsReq implements org.apache.thrift.TBase<TFetchResultsReq, TFetchResultsReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsReq");
+
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField ORIENTATION_FIELD_DESC = new org.apache.thrift.protocol.TField("orientation", org.apache.thrift.protocol.TType.I32, (short)2);
+  private static final org.apache.thrift.protocol.TField MAX_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("maxRows", org.apache.thrift.protocol.TType.I64, (short)3);
+  private static final org.apache.thrift.protocol.TField FETCH_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("fetchType", org.apache.thrift.protocol.TType.I16, (short)4);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TFetchResultsReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TFetchResultsReqTupleSchemeFactory());
+  }
+
+  private TOperationHandle operationHandle; // required
+  private TFetchOrientation orientation; // required
+  private long maxRows; // required
+  private short fetchType; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    OPERATION_HANDLE((short)1, "operationHandle"),
+    /**
+     * 
+     * @see TFetchOrientation
+     */
+    ORIENTATION((short)2, "orientation"),
+    MAX_ROWS((short)3, "maxRows"),
+    FETCH_TYPE((short)4, "fetchType");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        case 2: // ORIENTATION
+          return ORIENTATION;
+        case 3: // MAX_ROWS
+          return MAX_ROWS;
+        case 4: // FETCH_TYPE
+          return FETCH_TYPE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __MAXROWS_ISSET_ID = 0;
+  private static final int __FETCHTYPE_ISSET_ID = 1;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.FETCH_TYPE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    tmpMap.put(_Fields.ORIENTATION, new org.apache.thrift.meta_data.FieldMetaData("orientation", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TFetchOrientation.class)));
+    tmpMap.put(_Fields.MAX_ROWS, new org.apache.thrift.meta_data.FieldMetaData("maxRows", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.FETCH_TYPE, new org.apache.thrift.meta_data.FieldMetaData("fetchType", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsReq.class, metaDataMap);
+  }
+
+  public TFetchResultsReq() {
+    this.orientation = org.apache.hive.service.cli.thrift.TFetchOrientation.FETCH_NEXT;
+
+    this.fetchType = (short)0;
+
+  }
+
+  public TFetchResultsReq(
+    TOperationHandle operationHandle,
+    TFetchOrientation orientation,
+    long maxRows)
+  {
+    this();
+    this.operationHandle = operationHandle;
+    this.orientation = orientation;
+    this.maxRows = maxRows;
+    setMaxRowsIsSet(true);
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TFetchResultsReq(TFetchResultsReq other) {
+    __isset_bitfield = other.__isset_bitfield;
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+    if (other.isSetOrientation()) {
+      this.orientation = other.orientation;
+    }
+    this.maxRows = other.maxRows;
+    this.fetchType = other.fetchType;
+  }
+
+  public TFetchResultsReq deepCopy() {
+    return new TFetchResultsReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.operationHandle = null;
+    this.orientation = org.apache.hive.service.cli.thrift.TFetchOrientation.FETCH_NEXT;
+
+    setMaxRowsIsSet(false);
+    this.maxRows = 0;
+    this.fetchType = (short)0;
+
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  /**
+   * 
+   * @see TFetchOrientation
+   */
+  public TFetchOrientation getOrientation() {
+    return this.orientation;
+  }
+
+  /**
+   * 
+   * @see TFetchOrientation
+   */
+  public void setOrientation(TFetchOrientation orientation) {
+    this.orientation = orientation;
+  }
+
+  public void unsetOrientation() {
+    this.orientation = null;
+  }
+
+  /** Returns true if field orientation is set (has been assigned a value) and false otherwise */
+  public boolean isSetOrientation() {
+    return this.orientation != null;
+  }
+
+  public void setOrientationIsSet(boolean value) {
+    if (!value) {
+      this.orientation = null;
+    }
+  }
+
+  public long getMaxRows() {
+    return this.maxRows;
+  }
+
+  public void setMaxRows(long maxRows) {
+    this.maxRows = maxRows;
+    setMaxRowsIsSet(true);
+  }
+
+  public void unsetMaxRows() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MAXROWS_ISSET_ID);
+  }
+
+  /** Returns true if field maxRows is set (has been assigned a value) and false otherwise */
+  public boolean isSetMaxRows() {
+    return EncodingUtils.testBit(__isset_bitfield, __MAXROWS_ISSET_ID);
+  }
+
+  public void setMaxRowsIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MAXROWS_ISSET_ID, value);
+  }
+
+  public short getFetchType() {
+    return this.fetchType;
+  }
+
+  public void setFetchType(short fetchType) {
+    this.fetchType = fetchType;
+    setFetchTypeIsSet(true);
+  }
+
+  public void unsetFetchType() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __FETCHTYPE_ISSET_ID);
+  }
+
+  /** Returns true if field fetchType is set (has been assigned a value) and false otherwise */
+  public boolean isSetFetchType() {
+    return EncodingUtils.testBit(__isset_bitfield, __FETCHTYPE_ISSET_ID);
+  }
+
+  public void setFetchTypeIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __FETCHTYPE_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    case ORIENTATION:
+      if (value == null) {
+        unsetOrientation();
+      } else {
+        setOrientation((TFetchOrientation)value);
+      }
+      break;
+
+    case MAX_ROWS:
+      if (value == null) {
+        unsetMaxRows();
+      } else {
+        setMaxRows((Long)value);
+      }
+      break;
+
+    case FETCH_TYPE:
+      if (value == null) {
+        unsetFetchType();
+      } else {
+        setFetchType((Short)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    case ORIENTATION:
+      return getOrientation();
+
+    case MAX_ROWS:
+      return Long.valueOf(getMaxRows());
+
+    case FETCH_TYPE:
+      return Short.valueOf(getFetchType());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    case ORIENTATION:
+      return isSetOrientation();
+    case MAX_ROWS:
+      return isSetMaxRows();
+    case FETCH_TYPE:
+      return isSetFetchType();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TFetchResultsReq)
+      return this.equals((TFetchResultsReq)that);
+    return false;
+  }
+
+  public boolean equals(TFetchResultsReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    boolean this_present_orientation = true && this.isSetOrientation();
+    boolean that_present_orientation = true && that.isSetOrientation();
+    if (this_present_orientation || that_present_orientation) {
+      if (!(this_present_orientation && that_present_orientation))
+        return false;
+      if (!this.orientation.equals(that.orientation))
+        return false;
+    }
+
+    boolean this_present_maxRows = true;
+    boolean that_present_maxRows = true;
+    if (this_present_maxRows || that_present_maxRows) {
+      if (!(this_present_maxRows && that_present_maxRows))
+        return false;
+      if (this.maxRows != that.maxRows)
+        return false;
+    }
+
+    boolean this_present_fetchType = true && this.isSetFetchType();
+    boolean that_present_fetchType = true && that.isSetFetchType();
+    if (this_present_fetchType || that_present_fetchType) {
+      if (!(this_present_fetchType && that_present_fetchType))
+        return false;
+      if (this.fetchType != that.fetchType)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    boolean present_orientation = true && (isSetOrientation());
+    builder.append(present_orientation);
+    if (present_orientation)
+      builder.append(orientation.getValue());
+
+    boolean present_maxRows = true;
+    builder.append(present_maxRows);
+    if (present_maxRows)
+      builder.append(maxRows);
+
+    boolean present_fetchType = true && (isSetFetchType());
+    builder.append(present_fetchType);
+    if (present_fetchType)
+      builder.append(fetchType);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TFetchResultsReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TFetchResultsReq typedOther = (TFetchResultsReq)other;
+
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOrientation()).compareTo(typedOther.isSetOrientation());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOrientation()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.orientation, typedOther.orientation);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetMaxRows()).compareTo(typedOther.isSetMaxRows());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetMaxRows()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.maxRows, typedOther.maxRows);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetFetchType()).compareTo(typedOther.isSetFetchType());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetFetchType()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.fetchType, typedOther.fetchType);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TFetchResultsReq(");
+    boolean first = true;
+
+    sb.append("operationHandle:");
+    if (this.operationHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.operationHandle);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("orientation:");
+    if (this.orientation == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.orientation);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("maxRows:");
+    sb.append(this.maxRows);
+    first = false;
+    if (isSetFetchType()) {
+      if (!first) sb.append(", ");
+      sb.append("fetchType:");
+      sb.append(this.fetchType);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetOperationHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
+    }
+
+    if (!isSetOrientation()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'orientation' is unset! Struct:" + toString());
+    }
+
+    if (!isSetMaxRows()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'maxRows' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TFetchResultsReqStandardSchemeFactory implements SchemeFactory {
+    public TFetchResultsReqStandardScheme getScheme() {
+      return new TFetchResultsReqStandardScheme();
+    }
+  }
+
+  private static class TFetchResultsReqStandardScheme extends StandardScheme<TFetchResultsReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // ORIENTATION
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.orientation = TFetchOrientation.findByValue(iprot.readI32());
+              struct.setOrientationIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // MAX_ROWS
+            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
+              struct.maxRows = iprot.readI64();
+              struct.setMaxRowsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // FETCH_TYPE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I16) {
+              struct.fetchType = iprot.readI16();
+              struct.setFetchTypeIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.operationHandle != null) {
+        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+        struct.operationHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.orientation != null) {
+        oprot.writeFieldBegin(ORIENTATION_FIELD_DESC);
+        oprot.writeI32(struct.orientation.getValue());
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldBegin(MAX_ROWS_FIELD_DESC);
+      oprot.writeI64(struct.maxRows);
+      oprot.writeFieldEnd();
+      if (struct.isSetFetchType()) {
+        oprot.writeFieldBegin(FETCH_TYPE_FIELD_DESC);
+        oprot.writeI16(struct.fetchType);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TFetchResultsReqTupleSchemeFactory implements SchemeFactory {
+    public TFetchResultsReqTupleScheme getScheme() {
+      return new TFetchResultsReqTupleScheme();
+    }
+  }
+
+  private static class TFetchResultsReqTupleScheme extends TupleScheme<TFetchResultsReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.operationHandle.write(oprot);
+      oprot.writeI32(struct.orientation.getValue());
+      oprot.writeI64(struct.maxRows);
+      BitSet optionals = new BitSet();
+      if (struct.isSetFetchType()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetFetchType()) {
+        oprot.writeI16(struct.fetchType);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.operationHandle = new TOperationHandle();
+      struct.operationHandle.read(iprot);
+      struct.setOperationHandleIsSet(true);
+      struct.orientation = TFetchOrientation.findByValue(iprot.readI32());
+      struct.setOrientationIsSet(true);
+      struct.maxRows = iprot.readI64();
+      struct.setMaxRowsIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.fetchType = iprot.readI16();
+        struct.setFetchTypeIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
new file mode 100644
index 000000000000..19991f1da3eb
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
@@ -0,0 +1,608 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TFetchResultsResp implements org.apache.thrift.TBase<TFetchResultsResp, TFetchResultsResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField HAS_MORE_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("hasMoreRows", org.apache.thrift.protocol.TType.BOOL, (short)2);
+  private static final org.apache.thrift.protocol.TField RESULTS_FIELD_DESC = new org.apache.thrift.protocol.TField("results", org.apache.thrift.protocol.TType.STRUCT, (short)3);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TFetchResultsRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TFetchResultsRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private boolean hasMoreRows; // optional
+  private TRowSet results; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    HAS_MORE_ROWS((short)2, "hasMoreRows"),
+    RESULTS((short)3, "results");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // HAS_MORE_ROWS
+          return HAS_MORE_ROWS;
+        case 3: // RESULTS
+          return RESULTS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __HASMOREROWS_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.HAS_MORE_ROWS,_Fields.RESULTS};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.HAS_MORE_ROWS, new org.apache.thrift.meta_data.FieldMetaData("hasMoreRows", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
+    tmpMap.put(_Fields.RESULTS, new org.apache.thrift.meta_data.FieldMetaData("results", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRowSet.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsResp.class, metaDataMap);
+  }
+
+  public TFetchResultsResp() {
+  }
+
+  public TFetchResultsResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TFetchResultsResp(TFetchResultsResp other) {
+    __isset_bitfield = other.__isset_bitfield;
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    this.hasMoreRows = other.hasMoreRows;
+    if (other.isSetResults()) {
+      this.results = new TRowSet(other.results);
+    }
+  }
+
+  public TFetchResultsResp deepCopy() {
+    return new TFetchResultsResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    setHasMoreRowsIsSet(false);
+    this.hasMoreRows = false;
+    this.results = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public boolean isHasMoreRows() {
+    return this.hasMoreRows;
+  }
+
+  public void setHasMoreRows(boolean hasMoreRows) {
+    this.hasMoreRows = hasMoreRows;
+    setHasMoreRowsIsSet(true);
+  }
+
+  public void unsetHasMoreRows() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASMOREROWS_ISSET_ID);
+  }
+
+  /** Returns true if field hasMoreRows is set (has been assigned a value) and false otherwise */
+  public boolean isSetHasMoreRows() {
+    return EncodingUtils.testBit(__isset_bitfield, __HASMOREROWS_ISSET_ID);
+  }
+
+  public void setHasMoreRowsIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASMOREROWS_ISSET_ID, value);
+  }
+
+  public TRowSet getResults() {
+    return this.results;
+  }
+
+  public void setResults(TRowSet results) {
+    this.results = results;
+  }
+
+  public void unsetResults() {
+    this.results = null;
+  }
+
+  /** Returns true if field results is set (has been assigned a value) and false otherwise */
+  public boolean isSetResults() {
+    return this.results != null;
+  }
+
+  public void setResultsIsSet(boolean value) {
+    if (!value) {
+      this.results = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case HAS_MORE_ROWS:
+      if (value == null) {
+        unsetHasMoreRows();
+      } else {
+        setHasMoreRows((Boolean)value);
+      }
+      break;
+
+    case RESULTS:
+      if (value == null) {
+        unsetResults();
+      } else {
+        setResults((TRowSet)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case HAS_MORE_ROWS:
+      return Boolean.valueOf(isHasMoreRows());
+
+    case RESULTS:
+      return getResults();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case HAS_MORE_ROWS:
+      return isSetHasMoreRows();
+    case RESULTS:
+      return isSetResults();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TFetchResultsResp)
+      return this.equals((TFetchResultsResp)that);
+    return false;
+  }
+
+  public boolean equals(TFetchResultsResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_hasMoreRows = true && this.isSetHasMoreRows();
+    boolean that_present_hasMoreRows = true && that.isSetHasMoreRows();
+    if (this_present_hasMoreRows || that_present_hasMoreRows) {
+      if (!(this_present_hasMoreRows && that_present_hasMoreRows))
+        return false;
+      if (this.hasMoreRows != that.hasMoreRows)
+        return false;
+    }
+
+    boolean this_present_results = true && this.isSetResults();
+    boolean that_present_results = true && that.isSetResults();
+    if (this_present_results || that_present_results) {
+      if (!(this_present_results && that_present_results))
+        return false;
+      if (!this.results.equals(that.results))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_hasMoreRows = true && (isSetHasMoreRows());
+    builder.append(present_hasMoreRows);
+    if (present_hasMoreRows)
+      builder.append(hasMoreRows);
+
+    boolean present_results = true && (isSetResults());
+    builder.append(present_results);
+    if (present_results)
+      builder.append(results);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TFetchResultsResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TFetchResultsResp typedOther = (TFetchResultsResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetHasMoreRows()).compareTo(typedOther.isSetHasMoreRows());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetHasMoreRows()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasMoreRows, typedOther.hasMoreRows);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetResults()).compareTo(typedOther.isSetResults());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetResults()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.results, typedOther.results);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TFetchResultsResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetHasMoreRows()) {
+      if (!first) sb.append(", ");
+      sb.append("hasMoreRows:");
+      sb.append(this.hasMoreRows);
+      first = false;
+    }
+    if (isSetResults()) {
+      if (!first) sb.append(", ");
+      sb.append("results:");
+      if (this.results == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.results);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (results != null) {
+      results.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TFetchResultsRespStandardSchemeFactory implements SchemeFactory {
+    public TFetchResultsRespStandardScheme getScheme() {
+      return new TFetchResultsRespStandardScheme();
+    }
+  }
+
+  private static class TFetchResultsRespStandardScheme extends StandardScheme<TFetchResultsResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // HAS_MORE_ROWS
+            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
+              struct.hasMoreRows = iprot.readBool();
+              struct.setHasMoreRowsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // RESULTS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.results = new TRowSet();
+              struct.results.read(iprot);
+              struct.setResultsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.isSetHasMoreRows()) {
+        oprot.writeFieldBegin(HAS_MORE_ROWS_FIELD_DESC);
+        oprot.writeBool(struct.hasMoreRows);
+        oprot.writeFieldEnd();
+      }
+      if (struct.results != null) {
+        if (struct.isSetResults()) {
+          oprot.writeFieldBegin(RESULTS_FIELD_DESC);
+          struct.results.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TFetchResultsRespTupleSchemeFactory implements SchemeFactory {
+    public TFetchResultsRespTupleScheme getScheme() {
+      return new TFetchResultsRespTupleScheme();
+    }
+  }
+
+  private static class TFetchResultsRespTupleScheme extends TupleScheme<TFetchResultsResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetHasMoreRows()) {
+        optionals.set(0);
+      }
+      if (struct.isSetResults()) {
+        optionals.set(1);
+      }
+      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetHasMoreRows()) {
+        oprot.writeBool(struct.hasMoreRows);
+      }
+      if (struct.isSetResults()) {
+        struct.results.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(2);
+      if (incoming.get(0)) {
+        struct.hasMoreRows = iprot.readBool();
+        struct.setHasMoreRowsIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.results = new TRowSet();
+        struct.results.read(iprot);
+        struct.setResultsIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
new file mode 100644
index 000000000000..cfd157f701b2
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetCatalogsReq implements org.apache.thrift.TBase<TGetCatalogsReq, TGetCatalogsReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetCatalogsReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetCatalogsReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsReq.class, metaDataMap);
+  }
+
+  public TGetCatalogsReq() {
+  }
+
+  public TGetCatalogsReq(
+    TSessionHandle sessionHandle)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetCatalogsReq(TGetCatalogsReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+  }
+
+  public TGetCatalogsReq deepCopy() {
+    return new TGetCatalogsReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetCatalogsReq)
+      return this.equals((TGetCatalogsReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetCatalogsReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetCatalogsReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetCatalogsReq typedOther = (TGetCatalogsReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetCatalogsReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetCatalogsReqStandardSchemeFactory implements SchemeFactory {
+    public TGetCatalogsReqStandardScheme getScheme() {
+      return new TGetCatalogsReqStandardScheme();
+    }
+  }
+
+  private static class TGetCatalogsReqStandardScheme extends StandardScheme<TGetCatalogsReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetCatalogsReqTupleSchemeFactory implements SchemeFactory {
+    public TGetCatalogsReqTupleScheme getScheme() {
+      return new TGetCatalogsReqTupleScheme();
+    }
+  }
+
+  private static class TGetCatalogsReqTupleScheme extends TupleScheme<TGetCatalogsReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
new file mode 100644
index 000000000000..1c5a35437d41
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetCatalogsResp implements org.apache.thrift.TBase<TGetCatalogsResp, TGetCatalogsResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetCatalogsRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetCatalogsRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationHandle operationHandle; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    OPERATION_HANDLE((short)2, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsResp.class, metaDataMap);
+  }
+
+  public TGetCatalogsResp() {
+  }
+
+  public TGetCatalogsResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetCatalogsResp(TGetCatalogsResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetCatalogsResp deepCopy() {
+    return new TGetCatalogsResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationHandle = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetCatalogsResp)
+      return this.equals((TGetCatalogsResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetCatalogsResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetCatalogsResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetCatalogsResp typedOther = (TGetCatalogsResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetCatalogsResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("operationHandle:");
+      if (this.operationHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationHandle);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetCatalogsRespStandardSchemeFactory implements SchemeFactory {
+    public TGetCatalogsRespStandardScheme getScheme() {
+      return new TGetCatalogsRespStandardScheme();
+    }
+  }
+
+  private static class TGetCatalogsRespStandardScheme extends StandardScheme<TGetCatalogsResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationHandle != null) {
+        if (struct.isSetOperationHandle()) {
+          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+          struct.operationHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetCatalogsRespTupleSchemeFactory implements SchemeFactory {
+    public TGetCatalogsRespTupleScheme getScheme() {
+      return new TGetCatalogsRespTupleScheme();
+    }
+  }
+
+  private static class TGetCatalogsRespTupleScheme extends TupleScheme<TGetCatalogsResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationHandle()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetOperationHandle()) {
+        struct.operationHandle.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.operationHandle = new TOperationHandle();
+        struct.operationHandle.read(iprot);
+        struct.setOperationHandleIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
new file mode 100644
index 000000000000..a2c793bd9592
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
@@ -0,0 +1,818 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetColumnsReq implements org.apache.thrift.TBase<TGetColumnsReq, TGetColumnsReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
+  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
+  private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4);
+  private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)5);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetColumnsReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetColumnsReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private String catalogName; // optional
+  private String schemaName; // optional
+  private String tableName; // optional
+  private String columnName; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    CATALOG_NAME((short)2, "catalogName"),
+    SCHEMA_NAME((short)3, "schemaName"),
+    TABLE_NAME((short)4, "tableName"),
+    COLUMN_NAME((short)5, "columnName");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // CATALOG_NAME
+          return CATALOG_NAME;
+        case 3: // SCHEMA_NAME
+          return SCHEMA_NAME;
+        case 4: // TABLE_NAME
+          return TABLE_NAME;
+        case 5: // COLUMN_NAME
+          return COLUMN_NAME;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.COLUMN_NAME};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
+    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsReq.class, metaDataMap);
+  }
+
+  public TGetColumnsReq() {
+  }
+
+  public TGetColumnsReq(
+    TSessionHandle sessionHandle)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetColumnsReq(TGetColumnsReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetCatalogName()) {
+      this.catalogName = other.catalogName;
+    }
+    if (other.isSetSchemaName()) {
+      this.schemaName = other.schemaName;
+    }
+    if (other.isSetTableName()) {
+      this.tableName = other.tableName;
+    }
+    if (other.isSetColumnName()) {
+      this.columnName = other.columnName;
+    }
+  }
+
+  public TGetColumnsReq deepCopy() {
+    return new TGetColumnsReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.catalogName = null;
+    this.schemaName = null;
+    this.tableName = null;
+    this.columnName = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public String getCatalogName() {
+    return this.catalogName;
+  }
+
+  public void setCatalogName(String catalogName) {
+    this.catalogName = catalogName;
+  }
+
+  public void unsetCatalogName() {
+    this.catalogName = null;
+  }
+
+  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
+  public boolean isSetCatalogName() {
+    return this.catalogName != null;
+  }
+
+  public void setCatalogNameIsSet(boolean value) {
+    if (!value) {
+      this.catalogName = null;
+    }
+  }
+
+  public String getSchemaName() {
+    return this.schemaName;
+  }
+
+  public void setSchemaName(String schemaName) {
+    this.schemaName = schemaName;
+  }
+
+  public void unsetSchemaName() {
+    this.schemaName = null;
+  }
+
+  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
+  public boolean isSetSchemaName() {
+    return this.schemaName != null;
+  }
+
+  public void setSchemaNameIsSet(boolean value) {
+    if (!value) {
+      this.schemaName = null;
+    }
+  }
+
+  public String getTableName() {
+    return this.tableName;
+  }
+
+  public void setTableName(String tableName) {
+    this.tableName = tableName;
+  }
+
+  public void unsetTableName() {
+    this.tableName = null;
+  }
+
+  /** Returns true if field tableName is set (has been assigned a value) and false otherwise */
+  public boolean isSetTableName() {
+    return this.tableName != null;
+  }
+
+  public void setTableNameIsSet(boolean value) {
+    if (!value) {
+      this.tableName = null;
+    }
+  }
+
+  public String getColumnName() {
+    return this.columnName;
+  }
+
+  public void setColumnName(String columnName) {
+    this.columnName = columnName;
+  }
+
+  public void unsetColumnName() {
+    this.columnName = null;
+  }
+
+  /** Returns true if field columnName is set (has been assigned a value) and false otherwise */
+  public boolean isSetColumnName() {
+    return this.columnName != null;
+  }
+
+  public void setColumnNameIsSet(boolean value) {
+    if (!value) {
+      this.columnName = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case CATALOG_NAME:
+      if (value == null) {
+        unsetCatalogName();
+      } else {
+        setCatalogName((String)value);
+      }
+      break;
+
+    case SCHEMA_NAME:
+      if (value == null) {
+        unsetSchemaName();
+      } else {
+        setSchemaName((String)value);
+      }
+      break;
+
+    case TABLE_NAME:
+      if (value == null) {
+        unsetTableName();
+      } else {
+        setTableName((String)value);
+      }
+      break;
+
+    case COLUMN_NAME:
+      if (value == null) {
+        unsetColumnName();
+      } else {
+        setColumnName((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case CATALOG_NAME:
+      return getCatalogName();
+
+    case SCHEMA_NAME:
+      return getSchemaName();
+
+    case TABLE_NAME:
+      return getTableName();
+
+    case COLUMN_NAME:
+      return getColumnName();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case CATALOG_NAME:
+      return isSetCatalogName();
+    case SCHEMA_NAME:
+      return isSetSchemaName();
+    case TABLE_NAME:
+      return isSetTableName();
+    case COLUMN_NAME:
+      return isSetColumnName();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetColumnsReq)
+      return this.equals((TGetColumnsReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetColumnsReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_catalogName = true && this.isSetCatalogName();
+    boolean that_present_catalogName = true && that.isSetCatalogName();
+    if (this_present_catalogName || that_present_catalogName) {
+      if (!(this_present_catalogName && that_present_catalogName))
+        return false;
+      if (!this.catalogName.equals(that.catalogName))
+        return false;
+    }
+
+    boolean this_present_schemaName = true && this.isSetSchemaName();
+    boolean that_present_schemaName = true && that.isSetSchemaName();
+    if (this_present_schemaName || that_present_schemaName) {
+      if (!(this_present_schemaName && that_present_schemaName))
+        return false;
+      if (!this.schemaName.equals(that.schemaName))
+        return false;
+    }
+
+    boolean this_present_tableName = true && this.isSetTableName();
+    boolean that_present_tableName = true && that.isSetTableName();
+    if (this_present_tableName || that_present_tableName) {
+      if (!(this_present_tableName && that_present_tableName))
+        return false;
+      if (!this.tableName.equals(that.tableName))
+        return false;
+    }
+
+    boolean this_present_columnName = true && this.isSetColumnName();
+    boolean that_present_columnName = true && that.isSetColumnName();
+    if (this_present_columnName || that_present_columnName) {
+      if (!(this_present_columnName && that_present_columnName))
+        return false;
+      if (!this.columnName.equals(that.columnName))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_catalogName = true && (isSetCatalogName());
+    builder.append(present_catalogName);
+    if (present_catalogName)
+      builder.append(catalogName);
+
+    boolean present_schemaName = true && (isSetSchemaName());
+    builder.append(present_schemaName);
+    if (present_schemaName)
+      builder.append(schemaName);
+
+    boolean present_tableName = true && (isSetTableName());
+    builder.append(present_tableName);
+    if (present_tableName)
+      builder.append(tableName);
+
+    boolean present_columnName = true && (isSetColumnName());
+    builder.append(present_columnName);
+    if (present_columnName)
+      builder.append(columnName);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetColumnsReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetColumnsReq typedOther = (TGetColumnsReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetCatalogName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSchemaName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetTableName()).compareTo(typedOther.isSetTableName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetTableName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, typedOther.tableName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(typedOther.isSetColumnName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetColumnName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, typedOther.columnName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetColumnsReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (isSetCatalogName()) {
+      if (!first) sb.append(", ");
+      sb.append("catalogName:");
+      if (this.catalogName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.catalogName);
+      }
+      first = false;
+    }
+    if (isSetSchemaName()) {
+      if (!first) sb.append(", ");
+      sb.append("schemaName:");
+      if (this.schemaName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.schemaName);
+      }
+      first = false;
+    }
+    if (isSetTableName()) {
+      if (!first) sb.append(", ");
+      sb.append("tableName:");
+      if (this.tableName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.tableName);
+      }
+      first = false;
+    }
+    if (isSetColumnName()) {
+      if (!first) sb.append(", ");
+      sb.append("columnName:");
+      if (this.columnName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.columnName);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetColumnsReqStandardSchemeFactory implements SchemeFactory {
+    public TGetColumnsReqStandardScheme getScheme() {
+      return new TGetColumnsReqStandardScheme();
+    }
+  }
+
+  private static class TGetColumnsReqStandardScheme extends StandardScheme<TGetColumnsReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // CATALOG_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.catalogName = iprot.readString();
+              struct.setCatalogNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // SCHEMA_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.schemaName = iprot.readString();
+              struct.setSchemaNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // TABLE_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.tableName = iprot.readString();
+              struct.setTableNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 5: // COLUMN_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.columnName = iprot.readString();
+              struct.setColumnNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.catalogName != null) {
+        if (struct.isSetCatalogName()) {
+          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
+          oprot.writeString(struct.catalogName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.schemaName != null) {
+        if (struct.isSetSchemaName()) {
+          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
+          oprot.writeString(struct.schemaName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.tableName != null) {
+        if (struct.isSetTableName()) {
+          oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC);
+          oprot.writeString(struct.tableName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.columnName != null) {
+        if (struct.isSetColumnName()) {
+          oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC);
+          oprot.writeString(struct.columnName);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetColumnsReqTupleSchemeFactory implements SchemeFactory {
+    public TGetColumnsReqTupleScheme getScheme() {
+      return new TGetColumnsReqTupleScheme();
+    }
+  }
+
+  private static class TGetColumnsReqTupleScheme extends TupleScheme<TGetColumnsReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetCatalogName()) {
+        optionals.set(0);
+      }
+      if (struct.isSetSchemaName()) {
+        optionals.set(1);
+      }
+      if (struct.isSetTableName()) {
+        optionals.set(2);
+      }
+      if (struct.isSetColumnName()) {
+        optionals.set(3);
+      }
+      oprot.writeBitSet(optionals, 4);
+      if (struct.isSetCatalogName()) {
+        oprot.writeString(struct.catalogName);
+      }
+      if (struct.isSetSchemaName()) {
+        oprot.writeString(struct.schemaName);
+      }
+      if (struct.isSetTableName()) {
+        oprot.writeString(struct.tableName);
+      }
+      if (struct.isSetColumnName()) {
+        oprot.writeString(struct.columnName);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      BitSet incoming = iprot.readBitSet(4);
+      if (incoming.get(0)) {
+        struct.catalogName = iprot.readString();
+        struct.setCatalogNameIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.schemaName = iprot.readString();
+        struct.setSchemaNameIsSet(true);
+      }
+      if (incoming.get(2)) {
+        struct.tableName = iprot.readString();
+        struct.setTableNameIsSet(true);
+      }
+      if (incoming.get(3)) {
+        struct.columnName = iprot.readString();
+        struct.setColumnNameIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
new file mode 100644
index 000000000000..d6cf1be6d304
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetColumnsResp implements org.apache.thrift.TBase<TGetColumnsResp, TGetColumnsResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetColumnsRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetColumnsRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationHandle operationHandle; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    OPERATION_HANDLE((short)2, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsResp.class, metaDataMap);
+  }
+
+  public TGetColumnsResp() {
+  }
+
+  public TGetColumnsResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetColumnsResp(TGetColumnsResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetColumnsResp deepCopy() {
+    return new TGetColumnsResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationHandle = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetColumnsResp)
+      return this.equals((TGetColumnsResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetColumnsResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetColumnsResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetColumnsResp typedOther = (TGetColumnsResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetColumnsResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("operationHandle:");
+      if (this.operationHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationHandle);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetColumnsRespStandardSchemeFactory implements SchemeFactory {
+    public TGetColumnsRespStandardScheme getScheme() {
+      return new TGetColumnsRespStandardScheme();
+    }
+  }
+
+  private static class TGetColumnsRespStandardScheme extends StandardScheme<TGetColumnsResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationHandle != null) {
+        if (struct.isSetOperationHandle()) {
+          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+          struct.operationHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetColumnsRespTupleSchemeFactory implements SchemeFactory {
+    public TGetColumnsRespTupleScheme getScheme() {
+      return new TGetColumnsRespTupleScheme();
+    }
+  }
+
+  private static class TGetColumnsRespTupleScheme extends TupleScheme<TGetColumnsResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationHandle()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetOperationHandle()) {
+        struct.operationHandle.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.operationHandle = new TOperationHandle();
+        struct.operationHandle.read(iprot);
+        struct.setOperationHandleIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
new file mode 100644
index 000000000000..6c6bb00e43e4
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
@@ -0,0 +1,592 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetDelegationTokenReq implements org.apache.thrift.TBase<TGetDelegationTokenReq, TGetDelegationTokenReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OWNER_FIELD_DESC = new org.apache.thrift.protocol.TField("owner", org.apache.thrift.protocol.TType.STRING, (short)2);
+  private static final org.apache.thrift.protocol.TField RENEWER_FIELD_DESC = new org.apache.thrift.protocol.TField("renewer", org.apache.thrift.protocol.TType.STRING, (short)3);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetDelegationTokenReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetDelegationTokenReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private String owner; // required
+  private String renewer; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    OWNER((short)2, "owner"),
+    RENEWER((short)3, "renewer");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // OWNER
+          return OWNER;
+        case 3: // RENEWER
+          return RENEWER;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.OWNER, new org.apache.thrift.meta_data.FieldMetaData("owner", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.RENEWER, new org.apache.thrift.meta_data.FieldMetaData("renewer", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenReq.class, metaDataMap);
+  }
+
+  public TGetDelegationTokenReq() {
+  }
+
+  public TGetDelegationTokenReq(
+    TSessionHandle sessionHandle,
+    String owner,
+    String renewer)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+    this.owner = owner;
+    this.renewer = renewer;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetDelegationTokenReq(TGetDelegationTokenReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetOwner()) {
+      this.owner = other.owner;
+    }
+    if (other.isSetRenewer()) {
+      this.renewer = other.renewer;
+    }
+  }
+
+  public TGetDelegationTokenReq deepCopy() {
+    return new TGetDelegationTokenReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.owner = null;
+    this.renewer = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public String getOwner() {
+    return this.owner;
+  }
+
+  public void setOwner(String owner) {
+    this.owner = owner;
+  }
+
+  public void unsetOwner() {
+    this.owner = null;
+  }
+
+  /** Returns true if field owner is set (has been assigned a value) and false otherwise */
+  public boolean isSetOwner() {
+    return this.owner != null;
+  }
+
+  public void setOwnerIsSet(boolean value) {
+    if (!value) {
+      this.owner = null;
+    }
+  }
+
+  public String getRenewer() {
+    return this.renewer;
+  }
+
+  public void setRenewer(String renewer) {
+    this.renewer = renewer;
+  }
+
+  public void unsetRenewer() {
+    this.renewer = null;
+  }
+
+  /** Returns true if field renewer is set (has been assigned a value) and false otherwise */
+  public boolean isSetRenewer() {
+    return this.renewer != null;
+  }
+
+  public void setRenewerIsSet(boolean value) {
+    if (!value) {
+      this.renewer = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case OWNER:
+      if (value == null) {
+        unsetOwner();
+      } else {
+        setOwner((String)value);
+      }
+      break;
+
+    case RENEWER:
+      if (value == null) {
+        unsetRenewer();
+      } else {
+        setRenewer((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case OWNER:
+      return getOwner();
+
+    case RENEWER:
+      return getRenewer();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case OWNER:
+      return isSetOwner();
+    case RENEWER:
+      return isSetRenewer();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetDelegationTokenReq)
+      return this.equals((TGetDelegationTokenReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetDelegationTokenReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_owner = true && this.isSetOwner();
+    boolean that_present_owner = true && that.isSetOwner();
+    if (this_present_owner || that_present_owner) {
+      if (!(this_present_owner && that_present_owner))
+        return false;
+      if (!this.owner.equals(that.owner))
+        return false;
+    }
+
+    boolean this_present_renewer = true && this.isSetRenewer();
+    boolean that_present_renewer = true && that.isSetRenewer();
+    if (this_present_renewer || that_present_renewer) {
+      if (!(this_present_renewer && that_present_renewer))
+        return false;
+      if (!this.renewer.equals(that.renewer))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_owner = true && (isSetOwner());
+    builder.append(present_owner);
+    if (present_owner)
+      builder.append(owner);
+
+    boolean present_renewer = true && (isSetRenewer());
+    builder.append(present_renewer);
+    if (present_renewer)
+      builder.append(renewer);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetDelegationTokenReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetDelegationTokenReq typedOther = (TGetDelegationTokenReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOwner()).compareTo(typedOther.isSetOwner());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOwner()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.owner, typedOther.owner);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetRenewer()).compareTo(typedOther.isSetRenewer());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetRenewer()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.renewer, typedOther.renewer);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetDelegationTokenReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("owner:");
+    if (this.owner == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.owner);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("renewer:");
+    if (this.renewer == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.renewer);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    if (!isSetOwner()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'owner' is unset! Struct:" + toString());
+    }
+
+    if (!isSetRenewer()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'renewer' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
+    public TGetDelegationTokenReqStandardScheme getScheme() {
+      return new TGetDelegationTokenReqStandardScheme();
+    }
+  }
+
+  private static class TGetDelegationTokenReqStandardScheme extends StandardScheme<TGetDelegationTokenReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OWNER
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.owner = iprot.readString();
+              struct.setOwnerIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // RENEWER
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.renewer = iprot.readString();
+              struct.setRenewerIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.owner != null) {
+        oprot.writeFieldBegin(OWNER_FIELD_DESC);
+        oprot.writeString(struct.owner);
+        oprot.writeFieldEnd();
+      }
+      if (struct.renewer != null) {
+        oprot.writeFieldBegin(RENEWER_FIELD_DESC);
+        oprot.writeString(struct.renewer);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
+    public TGetDelegationTokenReqTupleScheme getScheme() {
+      return new TGetDelegationTokenReqTupleScheme();
+    }
+  }
+
+  private static class TGetDelegationTokenReqTupleScheme extends TupleScheme<TGetDelegationTokenReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      oprot.writeString(struct.owner);
+      oprot.writeString(struct.renewer);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      struct.owner = iprot.readString();
+      struct.setOwnerIsSet(true);
+      struct.renewer = iprot.readString();
+      struct.setRenewerIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
new file mode 100644
index 000000000000..d14c5e029a35
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
@@ -0,0 +1,500 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetDelegationTokenResp implements org.apache.thrift.TBase<TGetDelegationTokenResp, TGetDelegationTokenResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetDelegationTokenRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetDelegationTokenRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private String delegationToken; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    DELEGATION_TOKEN((short)2, "delegationToken");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // DELEGATION_TOKEN
+          return DELEGATION_TOKEN;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.DELEGATION_TOKEN};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenResp.class, metaDataMap);
+  }
+
+  public TGetDelegationTokenResp() {
+  }
+
+  public TGetDelegationTokenResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetDelegationTokenResp(TGetDelegationTokenResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetDelegationToken()) {
+      this.delegationToken = other.delegationToken;
+    }
+  }
+
+  public TGetDelegationTokenResp deepCopy() {
+    return new TGetDelegationTokenResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.delegationToken = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public String getDelegationToken() {
+    return this.delegationToken;
+  }
+
+  public void setDelegationToken(String delegationToken) {
+    this.delegationToken = delegationToken;
+  }
+
+  public void unsetDelegationToken() {
+    this.delegationToken = null;
+  }
+
+  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
+  public boolean isSetDelegationToken() {
+    return this.delegationToken != null;
+  }
+
+  public void setDelegationTokenIsSet(boolean value) {
+    if (!value) {
+      this.delegationToken = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case DELEGATION_TOKEN:
+      if (value == null) {
+        unsetDelegationToken();
+      } else {
+        setDelegationToken((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case DELEGATION_TOKEN:
+      return getDelegationToken();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case DELEGATION_TOKEN:
+      return isSetDelegationToken();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetDelegationTokenResp)
+      return this.equals((TGetDelegationTokenResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetDelegationTokenResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_delegationToken = true && this.isSetDelegationToken();
+    boolean that_present_delegationToken = true && that.isSetDelegationToken();
+    if (this_present_delegationToken || that_present_delegationToken) {
+      if (!(this_present_delegationToken && that_present_delegationToken))
+        return false;
+      if (!this.delegationToken.equals(that.delegationToken))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_delegationToken = true && (isSetDelegationToken());
+    builder.append(present_delegationToken);
+    if (present_delegationToken)
+      builder.append(delegationToken);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetDelegationTokenResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetDelegationTokenResp typedOther = (TGetDelegationTokenResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetDelegationToken()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetDelegationTokenResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetDelegationToken()) {
+      if (!first) sb.append(", ");
+      sb.append("delegationToken:");
+      if (this.delegationToken == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.delegationToken);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
+    public TGetDelegationTokenRespStandardScheme getScheme() {
+      return new TGetDelegationTokenRespStandardScheme();
+    }
+  }
+
+  private static class TGetDelegationTokenRespStandardScheme extends StandardScheme<TGetDelegationTokenResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // DELEGATION_TOKEN
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.delegationToken = iprot.readString();
+              struct.setDelegationTokenIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.delegationToken != null) {
+        if (struct.isSetDelegationToken()) {
+          oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
+          oprot.writeString(struct.delegationToken);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
+    public TGetDelegationTokenRespTupleScheme getScheme() {
+      return new TGetDelegationTokenRespTupleScheme();
+    }
+  }
+
+  private static class TGetDelegationTokenRespTupleScheme extends TupleScheme<TGetDelegationTokenResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetDelegationToken()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetDelegationToken()) {
+        oprot.writeString(struct.delegationToken);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.delegationToken = iprot.readString();
+        struct.setDelegationTokenIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
new file mode 100644
index 000000000000..ff45ee0386cb
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
@@ -0,0 +1,707 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetFunctionsReq implements org.apache.thrift.TBase<TGetFunctionsReq, TGetFunctionsReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
+  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
+  private static final org.apache.thrift.protocol.TField FUNCTION_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("functionName", org.apache.thrift.protocol.TType.STRING, (short)4);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetFunctionsReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetFunctionsReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private String catalogName; // optional
+  private String schemaName; // optional
+  private String functionName; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    CATALOG_NAME((short)2, "catalogName"),
+    SCHEMA_NAME((short)3, "schemaName"),
+    FUNCTION_NAME((short)4, "functionName");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // CATALOG_NAME
+          return CATALOG_NAME;
+        case 3: // SCHEMA_NAME
+          return SCHEMA_NAME;
+        case 4: // FUNCTION_NAME
+          return FUNCTION_NAME;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
+    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    tmpMap.put(_Fields.FUNCTION_NAME, new org.apache.thrift.meta_data.FieldMetaData("functionName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsReq.class, metaDataMap);
+  }
+
+  public TGetFunctionsReq() {
+  }
+
+  public TGetFunctionsReq(
+    TSessionHandle sessionHandle,
+    String functionName)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+    this.functionName = functionName;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetFunctionsReq(TGetFunctionsReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetCatalogName()) {
+      this.catalogName = other.catalogName;
+    }
+    if (other.isSetSchemaName()) {
+      this.schemaName = other.schemaName;
+    }
+    if (other.isSetFunctionName()) {
+      this.functionName = other.functionName;
+    }
+  }
+
+  public TGetFunctionsReq deepCopy() {
+    return new TGetFunctionsReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.catalogName = null;
+    this.schemaName = null;
+    this.functionName = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public String getCatalogName() {
+    return this.catalogName;
+  }
+
+  public void setCatalogName(String catalogName) {
+    this.catalogName = catalogName;
+  }
+
+  public void unsetCatalogName() {
+    this.catalogName = null;
+  }
+
+  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
+  public boolean isSetCatalogName() {
+    return this.catalogName != null;
+  }
+
+  public void setCatalogNameIsSet(boolean value) {
+    if (!value) {
+      this.catalogName = null;
+    }
+  }
+
+  public String getSchemaName() {
+    return this.schemaName;
+  }
+
+  public void setSchemaName(String schemaName) {
+    this.schemaName = schemaName;
+  }
+
+  public void unsetSchemaName() {
+    this.schemaName = null;
+  }
+
+  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
+  public boolean isSetSchemaName() {
+    return this.schemaName != null;
+  }
+
+  public void setSchemaNameIsSet(boolean value) {
+    if (!value) {
+      this.schemaName = null;
+    }
+  }
+
+  public String getFunctionName() {
+    return this.functionName;
+  }
+
+  public void setFunctionName(String functionName) {
+    this.functionName = functionName;
+  }
+
+  public void unsetFunctionName() {
+    this.functionName = null;
+  }
+
+  /** Returns true if field functionName is set (has been assigned a value) and false otherwise */
+  public boolean isSetFunctionName() {
+    return this.functionName != null;
+  }
+
+  public void setFunctionNameIsSet(boolean value) {
+    if (!value) {
+      this.functionName = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case CATALOG_NAME:
+      if (value == null) {
+        unsetCatalogName();
+      } else {
+        setCatalogName((String)value);
+      }
+      break;
+
+    case SCHEMA_NAME:
+      if (value == null) {
+        unsetSchemaName();
+      } else {
+        setSchemaName((String)value);
+      }
+      break;
+
+    case FUNCTION_NAME:
+      if (value == null) {
+        unsetFunctionName();
+      } else {
+        setFunctionName((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case CATALOG_NAME:
+      return getCatalogName();
+
+    case SCHEMA_NAME:
+      return getSchemaName();
+
+    case FUNCTION_NAME:
+      return getFunctionName();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case CATALOG_NAME:
+      return isSetCatalogName();
+    case SCHEMA_NAME:
+      return isSetSchemaName();
+    case FUNCTION_NAME:
+      return isSetFunctionName();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetFunctionsReq)
+      return this.equals((TGetFunctionsReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetFunctionsReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_catalogName = true && this.isSetCatalogName();
+    boolean that_present_catalogName = true && that.isSetCatalogName();
+    if (this_present_catalogName || that_present_catalogName) {
+      if (!(this_present_catalogName && that_present_catalogName))
+        return false;
+      if (!this.catalogName.equals(that.catalogName))
+        return false;
+    }
+
+    boolean this_present_schemaName = true && this.isSetSchemaName();
+    boolean that_present_schemaName = true && that.isSetSchemaName();
+    if (this_present_schemaName || that_present_schemaName) {
+      if (!(this_present_schemaName && that_present_schemaName))
+        return false;
+      if (!this.schemaName.equals(that.schemaName))
+        return false;
+    }
+
+    boolean this_present_functionName = true && this.isSetFunctionName();
+    boolean that_present_functionName = true && that.isSetFunctionName();
+    if (this_present_functionName || that_present_functionName) {
+      if (!(this_present_functionName && that_present_functionName))
+        return false;
+      if (!this.functionName.equals(that.functionName))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_catalogName = true && (isSetCatalogName());
+    builder.append(present_catalogName);
+    if (present_catalogName)
+      builder.append(catalogName);
+
+    boolean present_schemaName = true && (isSetSchemaName());
+    builder.append(present_schemaName);
+    if (present_schemaName)
+      builder.append(schemaName);
+
+    boolean present_functionName = true && (isSetFunctionName());
+    builder.append(present_functionName);
+    if (present_functionName)
+      builder.append(functionName);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetFunctionsReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetFunctionsReq typedOther = (TGetFunctionsReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetCatalogName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSchemaName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetFunctionName()).compareTo(typedOther.isSetFunctionName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetFunctionName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.functionName, typedOther.functionName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetFunctionsReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (isSetCatalogName()) {
+      if (!first) sb.append(", ");
+      sb.append("catalogName:");
+      if (this.catalogName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.catalogName);
+      }
+      first = false;
+    }
+    if (isSetSchemaName()) {
+      if (!first) sb.append(", ");
+      sb.append("schemaName:");
+      if (this.schemaName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.schemaName);
+      }
+      first = false;
+    }
+    if (!first) sb.append(", ");
+    sb.append("functionName:");
+    if (this.functionName == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.functionName);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    if (!isSetFunctionName()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'functionName' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetFunctionsReqStandardSchemeFactory implements SchemeFactory {
+    public TGetFunctionsReqStandardScheme getScheme() {
+      return new TGetFunctionsReqStandardScheme();
+    }
+  }
+
+  private static class TGetFunctionsReqStandardScheme extends StandardScheme<TGetFunctionsReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // CATALOG_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.catalogName = iprot.readString();
+              struct.setCatalogNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // SCHEMA_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.schemaName = iprot.readString();
+              struct.setSchemaNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // FUNCTION_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.functionName = iprot.readString();
+              struct.setFunctionNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.catalogName != null) {
+        if (struct.isSetCatalogName()) {
+          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
+          oprot.writeString(struct.catalogName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.schemaName != null) {
+        if (struct.isSetSchemaName()) {
+          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
+          oprot.writeString(struct.schemaName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.functionName != null) {
+        oprot.writeFieldBegin(FUNCTION_NAME_FIELD_DESC);
+        oprot.writeString(struct.functionName);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetFunctionsReqTupleSchemeFactory implements SchemeFactory {
+    public TGetFunctionsReqTupleScheme getScheme() {
+      return new TGetFunctionsReqTupleScheme();
+    }
+  }
+
+  private static class TGetFunctionsReqTupleScheme extends TupleScheme<TGetFunctionsReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      oprot.writeString(struct.functionName);
+      BitSet optionals = new BitSet();
+      if (struct.isSetCatalogName()) {
+        optionals.set(0);
+      }
+      if (struct.isSetSchemaName()) {
+        optionals.set(1);
+      }
+      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetCatalogName()) {
+        oprot.writeString(struct.catalogName);
+      }
+      if (struct.isSetSchemaName()) {
+        oprot.writeString(struct.schemaName);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      struct.functionName = iprot.readString();
+      struct.setFunctionNameIsSet(true);
+      BitSet incoming = iprot.readBitSet(2);
+      if (incoming.get(0)) {
+        struct.catalogName = iprot.readString();
+        struct.setCatalogNameIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.schemaName = iprot.readString();
+        struct.setSchemaNameIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
new file mode 100644
index 000000000000..3adafdacb54e
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetFunctionsResp implements org.apache.thrift.TBase<TGetFunctionsResp, TGetFunctionsResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetFunctionsRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetFunctionsRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationHandle operationHandle; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    OPERATION_HANDLE((short)2, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsResp.class, metaDataMap);
+  }
+
+  public TGetFunctionsResp() {
+  }
+
+  public TGetFunctionsResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetFunctionsResp(TGetFunctionsResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetFunctionsResp deepCopy() {
+    return new TGetFunctionsResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationHandle = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetFunctionsResp)
+      return this.equals((TGetFunctionsResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetFunctionsResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetFunctionsResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetFunctionsResp typedOther = (TGetFunctionsResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetFunctionsResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("operationHandle:");
+      if (this.operationHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationHandle);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetFunctionsRespStandardSchemeFactory implements SchemeFactory {
+    public TGetFunctionsRespStandardScheme getScheme() {
+      return new TGetFunctionsRespStandardScheme();
+    }
+  }
+
+  private static class TGetFunctionsRespStandardScheme extends StandardScheme<TGetFunctionsResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationHandle != null) {
+        if (struct.isSetOperationHandle()) {
+          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+          struct.operationHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetFunctionsRespTupleSchemeFactory implements SchemeFactory {
+    public TGetFunctionsRespTupleScheme getScheme() {
+      return new TGetFunctionsRespTupleScheme();
+    }
+  }
+
+  private static class TGetFunctionsRespTupleScheme extends TupleScheme<TGetFunctionsResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationHandle()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetOperationHandle()) {
+        struct.operationHandle.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.operationHandle = new TOperationHandle();
+        struct.operationHandle.read(iprot);
+        struct.setOperationHandleIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
new file mode 100644
index 000000000000..0139bf04ec7d
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
@@ -0,0 +1,503 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetInfoReq implements org.apache.thrift.TBase<TGetInfoReq, TGetInfoReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField INFO_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoType", org.apache.thrift.protocol.TType.I32, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetInfoReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetInfoReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private TGetInfoType infoType; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    /**
+     * 
+     * @see TGetInfoType
+     */
+    INFO_TYPE((short)2, "infoType");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // INFO_TYPE
+          return INFO_TYPE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.INFO_TYPE, new org.apache.thrift.meta_data.FieldMetaData("infoType", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TGetInfoType.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoReq.class, metaDataMap);
+  }
+
+  public TGetInfoReq() {
+  }
+
+  public TGetInfoReq(
+    TSessionHandle sessionHandle,
+    TGetInfoType infoType)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+    this.infoType = infoType;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetInfoReq(TGetInfoReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetInfoType()) {
+      this.infoType = other.infoType;
+    }
+  }
+
+  public TGetInfoReq deepCopy() {
+    return new TGetInfoReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.infoType = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  /**
+   * 
+   * @see TGetInfoType
+   */
+  public TGetInfoType getInfoType() {
+    return this.infoType;
+  }
+
+  /**
+   * 
+   * @see TGetInfoType
+   */
+  public void setInfoType(TGetInfoType infoType) {
+    this.infoType = infoType;
+  }
+
+  public void unsetInfoType() {
+    this.infoType = null;
+  }
+
+  /** Returns true if field infoType is set (has been assigned a value) and false otherwise */
+  public boolean isSetInfoType() {
+    return this.infoType != null;
+  }
+
+  public void setInfoTypeIsSet(boolean value) {
+    if (!value) {
+      this.infoType = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case INFO_TYPE:
+      if (value == null) {
+        unsetInfoType();
+      } else {
+        setInfoType((TGetInfoType)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case INFO_TYPE:
+      return getInfoType();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case INFO_TYPE:
+      return isSetInfoType();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetInfoReq)
+      return this.equals((TGetInfoReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetInfoReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_infoType = true && this.isSetInfoType();
+    boolean that_present_infoType = true && that.isSetInfoType();
+    if (this_present_infoType || that_present_infoType) {
+      if (!(this_present_infoType && that_present_infoType))
+        return false;
+      if (!this.infoType.equals(that.infoType))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_infoType = true && (isSetInfoType());
+    builder.append(present_infoType);
+    if (present_infoType)
+      builder.append(infoType.getValue());
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetInfoReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetInfoReq typedOther = (TGetInfoReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetInfoType()).compareTo(typedOther.isSetInfoType());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetInfoType()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoType, typedOther.infoType);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetInfoReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("infoType:");
+    if (this.infoType == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.infoType);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    if (!isSetInfoType()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoType' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetInfoReqStandardSchemeFactory implements SchemeFactory {
+    public TGetInfoReqStandardScheme getScheme() {
+      return new TGetInfoReqStandardScheme();
+    }
+  }
+
+  private static class TGetInfoReqStandardScheme extends StandardScheme<TGetInfoReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // INFO_TYPE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.infoType = TGetInfoType.findByValue(iprot.readI32());
+              struct.setInfoTypeIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.infoType != null) {
+        oprot.writeFieldBegin(INFO_TYPE_FIELD_DESC);
+        oprot.writeI32(struct.infoType.getValue());
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetInfoReqTupleSchemeFactory implements SchemeFactory {
+    public TGetInfoReqTupleScheme getScheme() {
+      return new TGetInfoReqTupleScheme();
+    }
+  }
+
+  private static class TGetInfoReqTupleScheme extends TupleScheme<TGetInfoReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      oprot.writeI32(struct.infoType.getValue());
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      struct.infoType = TGetInfoType.findByValue(iprot.readI32());
+      struct.setInfoTypeIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
new file mode 100644
index 000000000000..2faaa9211b3b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
@@ -0,0 +1,493 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetInfoResp implements org.apache.thrift.TBase<TGetInfoResp, TGetInfoResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField INFO_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoValue", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetInfoRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetInfoRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TGetInfoValue infoValue; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    INFO_VALUE((short)2, "infoValue");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // INFO_VALUE
+          return INFO_VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.INFO_VALUE, new org.apache.thrift.meta_data.FieldMetaData("infoValue", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoValue.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoResp.class, metaDataMap);
+  }
+
+  public TGetInfoResp() {
+  }
+
+  public TGetInfoResp(
+    TStatus status,
+    TGetInfoValue infoValue)
+  {
+    this();
+    this.status = status;
+    this.infoValue = infoValue;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetInfoResp(TGetInfoResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetInfoValue()) {
+      this.infoValue = new TGetInfoValue(other.infoValue);
+    }
+  }
+
+  public TGetInfoResp deepCopy() {
+    return new TGetInfoResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.infoValue = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TGetInfoValue getInfoValue() {
+    return this.infoValue;
+  }
+
+  public void setInfoValue(TGetInfoValue infoValue) {
+    this.infoValue = infoValue;
+  }
+
+  public void unsetInfoValue() {
+    this.infoValue = null;
+  }
+
+  /** Returns true if field infoValue is set (has been assigned a value) and false otherwise */
+  public boolean isSetInfoValue() {
+    return this.infoValue != null;
+  }
+
+  public void setInfoValueIsSet(boolean value) {
+    if (!value) {
+      this.infoValue = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case INFO_VALUE:
+      if (value == null) {
+        unsetInfoValue();
+      } else {
+        setInfoValue((TGetInfoValue)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case INFO_VALUE:
+      return getInfoValue();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case INFO_VALUE:
+      return isSetInfoValue();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetInfoResp)
+      return this.equals((TGetInfoResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetInfoResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_infoValue = true && this.isSetInfoValue();
+    boolean that_present_infoValue = true && that.isSetInfoValue();
+    if (this_present_infoValue || that_present_infoValue) {
+      if (!(this_present_infoValue && that_present_infoValue))
+        return false;
+      if (!this.infoValue.equals(that.infoValue))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_infoValue = true && (isSetInfoValue());
+    builder.append(present_infoValue);
+    if (present_infoValue)
+      builder.append(infoValue);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetInfoResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetInfoResp typedOther = (TGetInfoResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetInfoValue()).compareTo(typedOther.isSetInfoValue());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetInfoValue()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoValue, typedOther.infoValue);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetInfoResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("infoValue:");
+    if (this.infoValue == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.infoValue);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    if (!isSetInfoValue()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoValue' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetInfoRespStandardSchemeFactory implements SchemeFactory {
+    public TGetInfoRespStandardScheme getScheme() {
+      return new TGetInfoRespStandardScheme();
+    }
+  }
+
+  private static class TGetInfoRespStandardScheme extends StandardScheme<TGetInfoResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // INFO_VALUE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.infoValue = new TGetInfoValue();
+              struct.infoValue.read(iprot);
+              struct.setInfoValueIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.infoValue != null) {
+        oprot.writeFieldBegin(INFO_VALUE_FIELD_DESC);
+        struct.infoValue.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetInfoRespTupleSchemeFactory implements SchemeFactory {
+    public TGetInfoRespTupleScheme getScheme() {
+      return new TGetInfoRespTupleScheme();
+    }
+  }
+
+  private static class TGetInfoRespTupleScheme extends TupleScheme<TGetInfoResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      struct.infoValue.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      struct.infoValue = new TGetInfoValue();
+      struct.infoValue.read(iprot);
+      struct.setInfoValueIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
new file mode 100644
index 000000000000..d9dd62414f00
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
@@ -0,0 +1,180 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.thrift.TEnum;
+
+public enum TGetInfoType implements org.apache.thrift.TEnum {
+  CLI_MAX_DRIVER_CONNECTIONS(0),
+  CLI_MAX_CONCURRENT_ACTIVITIES(1),
+  CLI_DATA_SOURCE_NAME(2),
+  CLI_FETCH_DIRECTION(8),
+  CLI_SERVER_NAME(13),
+  CLI_SEARCH_PATTERN_ESCAPE(14),
+  CLI_DBMS_NAME(17),
+  CLI_DBMS_VER(18),
+  CLI_ACCESSIBLE_TABLES(19),
+  CLI_ACCESSIBLE_PROCEDURES(20),
+  CLI_CURSOR_COMMIT_BEHAVIOR(23),
+  CLI_DATA_SOURCE_READ_ONLY(25),
+  CLI_DEFAULT_TXN_ISOLATION(26),
+  CLI_IDENTIFIER_CASE(28),
+  CLI_IDENTIFIER_QUOTE_CHAR(29),
+  CLI_MAX_COLUMN_NAME_LEN(30),
+  CLI_MAX_CURSOR_NAME_LEN(31),
+  CLI_MAX_SCHEMA_NAME_LEN(32),
+  CLI_MAX_CATALOG_NAME_LEN(34),
+  CLI_MAX_TABLE_NAME_LEN(35),
+  CLI_SCROLL_CONCURRENCY(43),
+  CLI_TXN_CAPABLE(46),
+  CLI_USER_NAME(47),
+  CLI_TXN_ISOLATION_OPTION(72),
+  CLI_INTEGRITY(73),
+  CLI_GETDATA_EXTENSIONS(81),
+  CLI_NULL_COLLATION(85),
+  CLI_ALTER_TABLE(86),
+  CLI_ORDER_BY_COLUMNS_IN_SELECT(90),
+  CLI_SPECIAL_CHARACTERS(94),
+  CLI_MAX_COLUMNS_IN_GROUP_BY(97),
+  CLI_MAX_COLUMNS_IN_INDEX(98),
+  CLI_MAX_COLUMNS_IN_ORDER_BY(99),
+  CLI_MAX_COLUMNS_IN_SELECT(100),
+  CLI_MAX_COLUMNS_IN_TABLE(101),
+  CLI_MAX_INDEX_SIZE(102),
+  CLI_MAX_ROW_SIZE(104),
+  CLI_MAX_STATEMENT_LEN(105),
+  CLI_MAX_TABLES_IN_SELECT(106),
+  CLI_MAX_USER_NAME_LEN(107),
+  CLI_OJ_CAPABILITIES(115),
+  CLI_XOPEN_CLI_YEAR(10000),
+  CLI_CURSOR_SENSITIVITY(10001),
+  CLI_DESCRIBE_PARAMETER(10002),
+  CLI_CATALOG_NAME(10003),
+  CLI_COLLATION_SEQ(10004),
+  CLI_MAX_IDENTIFIER_LEN(10005);
+
+  private final int value;
+
+  private TGetInfoType(int value) {
+    this.value = value;
+  }
+
+  /**
+   * Get the integer value of this enum value, as defined in the Thrift IDL.
+   */
+  public int getValue() {
+    return value;
+  }
+
+  /**
+   * Find a the enum type by its integer value, as defined in the Thrift IDL.
+   * @return null if the value is not found.
+   */
+  public static TGetInfoType findByValue(int value) { 
+    switch (value) {
+      case 0:
+        return CLI_MAX_DRIVER_CONNECTIONS;
+      case 1:
+        return CLI_MAX_CONCURRENT_ACTIVITIES;
+      case 2:
+        return CLI_DATA_SOURCE_NAME;
+      case 8:
+        return CLI_FETCH_DIRECTION;
+      case 13:
+        return CLI_SERVER_NAME;
+      case 14:
+        return CLI_SEARCH_PATTERN_ESCAPE;
+      case 17:
+        return CLI_DBMS_NAME;
+      case 18:
+        return CLI_DBMS_VER;
+      case 19:
+        return CLI_ACCESSIBLE_TABLES;
+      case 20:
+        return CLI_ACCESSIBLE_PROCEDURES;
+      case 23:
+        return CLI_CURSOR_COMMIT_BEHAVIOR;
+      case 25:
+        return CLI_DATA_SOURCE_READ_ONLY;
+      case 26:
+        return CLI_DEFAULT_TXN_ISOLATION;
+      case 28:
+        return CLI_IDENTIFIER_CASE;
+      case 29:
+        return CLI_IDENTIFIER_QUOTE_CHAR;
+      case 30:
+        return CLI_MAX_COLUMN_NAME_LEN;
+      case 31:
+        return CLI_MAX_CURSOR_NAME_LEN;
+      case 32:
+        return CLI_MAX_SCHEMA_NAME_LEN;
+      case 34:
+        return CLI_MAX_CATALOG_NAME_LEN;
+      case 35:
+        return CLI_MAX_TABLE_NAME_LEN;
+      case 43:
+        return CLI_SCROLL_CONCURRENCY;
+      case 46:
+        return CLI_TXN_CAPABLE;
+      case 47:
+        return CLI_USER_NAME;
+      case 72:
+        return CLI_TXN_ISOLATION_OPTION;
+      case 73:
+        return CLI_INTEGRITY;
+      case 81:
+        return CLI_GETDATA_EXTENSIONS;
+      case 85:
+        return CLI_NULL_COLLATION;
+      case 86:
+        return CLI_ALTER_TABLE;
+      case 90:
+        return CLI_ORDER_BY_COLUMNS_IN_SELECT;
+      case 94:
+        return CLI_SPECIAL_CHARACTERS;
+      case 97:
+        return CLI_MAX_COLUMNS_IN_GROUP_BY;
+      case 98:
+        return CLI_MAX_COLUMNS_IN_INDEX;
+      case 99:
+        return CLI_MAX_COLUMNS_IN_ORDER_BY;
+      case 100:
+        return CLI_MAX_COLUMNS_IN_SELECT;
+      case 101:
+        return CLI_MAX_COLUMNS_IN_TABLE;
+      case 102:
+        return CLI_MAX_INDEX_SIZE;
+      case 104:
+        return CLI_MAX_ROW_SIZE;
+      case 105:
+        return CLI_MAX_STATEMENT_LEN;
+      case 106:
+        return CLI_MAX_TABLES_IN_SELECT;
+      case 107:
+        return CLI_MAX_USER_NAME_LEN;
+      case 115:
+        return CLI_OJ_CAPABILITIES;
+      case 10000:
+        return CLI_XOPEN_CLI_YEAR;
+      case 10001:
+        return CLI_CURSOR_SENSITIVITY;
+      case 10002:
+        return CLI_DESCRIBE_PARAMETER;
+      case 10003:
+        return CLI_CATALOG_NAME;
+      case 10004:
+        return CLI_COLLATION_SEQ;
+      case 10005:
+        return CLI_MAX_IDENTIFIER_LEN;
+      default:
+        return null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
new file mode 100644
index 000000000000..fe2a211c4630
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
@@ -0,0 +1,593 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetInfoValue extends org.apache.thrift.TUnion<TGetInfoValue, TGetInfoValue._Fields> {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoValue");
+  private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)1);
+  private static final org.apache.thrift.protocol.TField SMALL_INT_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("smallIntValue", org.apache.thrift.protocol.TType.I16, (short)2);
+  private static final org.apache.thrift.protocol.TField INTEGER_BITMASK_FIELD_DESC = new org.apache.thrift.protocol.TField("integerBitmask", org.apache.thrift.protocol.TType.I32, (short)3);
+  private static final org.apache.thrift.protocol.TField INTEGER_FLAG_FIELD_DESC = new org.apache.thrift.protocol.TField("integerFlag", org.apache.thrift.protocol.TType.I32, (short)4);
+  private static final org.apache.thrift.protocol.TField BINARY_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryValue", org.apache.thrift.protocol.TType.I32, (short)5);
+  private static final org.apache.thrift.protocol.TField LEN_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("lenValue", org.apache.thrift.protocol.TType.I64, (short)6);
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STRING_VALUE((short)1, "stringValue"),
+    SMALL_INT_VALUE((short)2, "smallIntValue"),
+    INTEGER_BITMASK((short)3, "integerBitmask"),
+    INTEGER_FLAG((short)4, "integerFlag"),
+    BINARY_VALUE((short)5, "binaryValue"),
+    LEN_VALUE((short)6, "lenValue");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STRING_VALUE
+          return STRING_VALUE;
+        case 2: // SMALL_INT_VALUE
+          return SMALL_INT_VALUE;
+        case 3: // INTEGER_BITMASK
+          return INTEGER_BITMASK;
+        case 4: // INTEGER_FLAG
+          return INTEGER_FLAG;
+        case 5: // BINARY_VALUE
+          return BINARY_VALUE;
+        case 6: // LEN_VALUE
+          return LEN_VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.SMALL_INT_VALUE, new org.apache.thrift.meta_data.FieldMetaData("smallIntValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
+    tmpMap.put(_Fields.INTEGER_BITMASK, new org.apache.thrift.meta_data.FieldMetaData("integerBitmask", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
+    tmpMap.put(_Fields.INTEGER_FLAG, new org.apache.thrift.meta_data.FieldMetaData("integerFlag", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
+    tmpMap.put(_Fields.BINARY_VALUE, new org.apache.thrift.meta_data.FieldMetaData("binaryValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
+    tmpMap.put(_Fields.LEN_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lenValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoValue.class, metaDataMap);
+  }
+
+  public TGetInfoValue() {
+    super();
+  }
+
+  public TGetInfoValue(TGetInfoValue._Fields setField, Object value) {
+    super(setField, value);
+  }
+
+  public TGetInfoValue(TGetInfoValue other) {
+    super(other);
+  }
+  public TGetInfoValue deepCopy() {
+    return new TGetInfoValue(this);
+  }
+
+  public static TGetInfoValue stringValue(String value) {
+    TGetInfoValue x = new TGetInfoValue();
+    x.setStringValue(value);
+    return x;
+  }
+
+  public static TGetInfoValue smallIntValue(short value) {
+    TGetInfoValue x = new TGetInfoValue();
+    x.setSmallIntValue(value);
+    return x;
+  }
+
+  public static TGetInfoValue integerBitmask(int value) {
+    TGetInfoValue x = new TGetInfoValue();
+    x.setIntegerBitmask(value);
+    return x;
+  }
+
+  public static TGetInfoValue integerFlag(int value) {
+    TGetInfoValue x = new TGetInfoValue();
+    x.setIntegerFlag(value);
+    return x;
+  }
+
+  public static TGetInfoValue binaryValue(int value) {
+    TGetInfoValue x = new TGetInfoValue();
+    x.setBinaryValue(value);
+    return x;
+  }
+
+  public static TGetInfoValue lenValue(long value) {
+    TGetInfoValue x = new TGetInfoValue();
+    x.setLenValue(value);
+    return x;
+  }
+
+
+  @Override
+  protected void checkType(_Fields setField, Object value) throws ClassCastException {
+    switch (setField) {
+      case STRING_VALUE:
+        if (value instanceof String) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName());
+      case SMALL_INT_VALUE:
+        if (value instanceof Short) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type Short for field 'smallIntValue', but got " + value.getClass().getSimpleName());
+      case INTEGER_BITMASK:
+        if (value instanceof Integer) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type Integer for field 'integerBitmask', but got " + value.getClass().getSimpleName());
+      case INTEGER_FLAG:
+        if (value instanceof Integer) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type Integer for field 'integerFlag', but got " + value.getClass().getSimpleName());
+      case BINARY_VALUE:
+        if (value instanceof Integer) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type Integer for field 'binaryValue', but got " + value.getClass().getSimpleName());
+      case LEN_VALUE:
+        if (value instanceof Long) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type Long for field 'lenValue', but got " + value.getClass().getSimpleName());
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(field.id);
+    if (setField != null) {
+      switch (setField) {
+        case STRING_VALUE:
+          if (field.type == STRING_VALUE_FIELD_DESC.type) {
+            String stringValue;
+            stringValue = iprot.readString();
+            return stringValue;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case SMALL_INT_VALUE:
+          if (field.type == SMALL_INT_VALUE_FIELD_DESC.type) {
+            Short smallIntValue;
+            smallIntValue = iprot.readI16();
+            return smallIntValue;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case INTEGER_BITMASK:
+          if (field.type == INTEGER_BITMASK_FIELD_DESC.type) {
+            Integer integerBitmask;
+            integerBitmask = iprot.readI32();
+            return integerBitmask;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case INTEGER_FLAG:
+          if (field.type == INTEGER_FLAG_FIELD_DESC.type) {
+            Integer integerFlag;
+            integerFlag = iprot.readI32();
+            return integerFlag;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case BINARY_VALUE:
+          if (field.type == BINARY_VALUE_FIELD_DESC.type) {
+            Integer binaryValue;
+            binaryValue = iprot.readI32();
+            return binaryValue;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case LEN_VALUE:
+          if (field.type == LEN_VALUE_FIELD_DESC.type) {
+            Long lenValue;
+            lenValue = iprot.readI64();
+            return lenValue;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case STRING_VALUE:
+        String stringValue = (String)value_;
+        oprot.writeString(stringValue);
+        return;
+      case SMALL_INT_VALUE:
+        Short smallIntValue = (Short)value_;
+        oprot.writeI16(smallIntValue);
+        return;
+      case INTEGER_BITMASK:
+        Integer integerBitmask = (Integer)value_;
+        oprot.writeI32(integerBitmask);
+        return;
+      case INTEGER_FLAG:
+        Integer integerFlag = (Integer)value_;
+        oprot.writeI32(integerFlag);
+        return;
+      case BINARY_VALUE:
+        Integer binaryValue = (Integer)value_;
+        oprot.writeI32(binaryValue);
+        return;
+      case LEN_VALUE:
+        Long lenValue = (Long)value_;
+        oprot.writeI64(lenValue);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(fieldID);
+    if (setField != null) {
+      switch (setField) {
+        case STRING_VALUE:
+          String stringValue;
+          stringValue = iprot.readString();
+          return stringValue;
+        case SMALL_INT_VALUE:
+          Short smallIntValue;
+          smallIntValue = iprot.readI16();
+          return smallIntValue;
+        case INTEGER_BITMASK:
+          Integer integerBitmask;
+          integerBitmask = iprot.readI32();
+          return integerBitmask;
+        case INTEGER_FLAG:
+          Integer integerFlag;
+          integerFlag = iprot.readI32();
+          return integerFlag;
+        case BINARY_VALUE:
+          Integer binaryValue;
+          binaryValue = iprot.readI32();
+          return binaryValue;
+        case LEN_VALUE:
+          Long lenValue;
+          lenValue = iprot.readI64();
+          return lenValue;
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
+    }
+  }
+
+  @Override
+  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case STRING_VALUE:
+        String stringValue = (String)value_;
+        oprot.writeString(stringValue);
+        return;
+      case SMALL_INT_VALUE:
+        Short smallIntValue = (Short)value_;
+        oprot.writeI16(smallIntValue);
+        return;
+      case INTEGER_BITMASK:
+        Integer integerBitmask = (Integer)value_;
+        oprot.writeI32(integerBitmask);
+        return;
+      case INTEGER_FLAG:
+        Integer integerFlag = (Integer)value_;
+        oprot.writeI32(integerFlag);
+        return;
+      case BINARY_VALUE:
+        Integer binaryValue = (Integer)value_;
+        oprot.writeI32(binaryValue);
+        return;
+      case LEN_VALUE:
+        Long lenValue = (Long)value_;
+        oprot.writeI64(lenValue);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
+    switch (setField) {
+      case STRING_VALUE:
+        return STRING_VALUE_FIELD_DESC;
+      case SMALL_INT_VALUE:
+        return SMALL_INT_VALUE_FIELD_DESC;
+      case INTEGER_BITMASK:
+        return INTEGER_BITMASK_FIELD_DESC;
+      case INTEGER_FLAG:
+        return INTEGER_FLAG_FIELD_DESC;
+      case BINARY_VALUE:
+        return BINARY_VALUE_FIELD_DESC;
+      case LEN_VALUE:
+        return LEN_VALUE_FIELD_DESC;
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TStruct getStructDesc() {
+    return STRUCT_DESC;
+  }
+
+  @Override
+  protected _Fields enumForId(short id) {
+    return _Fields.findByThriftIdOrThrow(id);
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+
+  public String getStringValue() {
+    if (getSetField() == _Fields.STRING_VALUE) {
+      return (String)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setStringValue(String value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.STRING_VALUE;
+    value_ = value;
+  }
+
+  public short getSmallIntValue() {
+    if (getSetField() == _Fields.SMALL_INT_VALUE) {
+      return (Short)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'smallIntValue' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setSmallIntValue(short value) {
+    setField_ = _Fields.SMALL_INT_VALUE;
+    value_ = value;
+  }
+
+  public int getIntegerBitmask() {
+    if (getSetField() == _Fields.INTEGER_BITMASK) {
+      return (Integer)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'integerBitmask' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setIntegerBitmask(int value) {
+    setField_ = _Fields.INTEGER_BITMASK;
+    value_ = value;
+  }
+
+  public int getIntegerFlag() {
+    if (getSetField() == _Fields.INTEGER_FLAG) {
+      return (Integer)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'integerFlag' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setIntegerFlag(int value) {
+    setField_ = _Fields.INTEGER_FLAG;
+    value_ = value;
+  }
+
+  public int getBinaryValue() {
+    if (getSetField() == _Fields.BINARY_VALUE) {
+      return (Integer)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'binaryValue' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setBinaryValue(int value) {
+    setField_ = _Fields.BINARY_VALUE;
+    value_ = value;
+  }
+
+  public long getLenValue() {
+    if (getSetField() == _Fields.LEN_VALUE) {
+      return (Long)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'lenValue' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setLenValue(long value) {
+    setField_ = _Fields.LEN_VALUE;
+    value_ = value;
+  }
+
+  public boolean isSetStringValue() {
+    return setField_ == _Fields.STRING_VALUE;
+  }
+
+
+  public boolean isSetSmallIntValue() {
+    return setField_ == _Fields.SMALL_INT_VALUE;
+  }
+
+
+  public boolean isSetIntegerBitmask() {
+    return setField_ == _Fields.INTEGER_BITMASK;
+  }
+
+
+  public boolean isSetIntegerFlag() {
+    return setField_ == _Fields.INTEGER_FLAG;
+  }
+
+
+  public boolean isSetBinaryValue() {
+    return setField_ == _Fields.BINARY_VALUE;
+  }
+
+
+  public boolean isSetLenValue() {
+    return setField_ == _Fields.LEN_VALUE;
+  }
+
+
+  public boolean equals(Object other) {
+    if (other instanceof TGetInfoValue) {
+      return equals((TGetInfoValue)other);
+    } else {
+      return false;
+    }
+  }
+
+  public boolean equals(TGetInfoValue other) {
+    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
+  }
+
+  @Override
+  public int compareTo(TGetInfoValue other) {
+    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
+    if (lastComparison == 0) {
+      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
+    }
+    return lastComparison;
+  }
+
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    hcb.append(this.getClass().getName());
+    org.apache.thrift.TFieldIdEnum setField = getSetField();
+    if (setField != null) {
+      hcb.append(setField.getThriftFieldId());
+      Object value = getFieldValue();
+      if (value instanceof org.apache.thrift.TEnum) {
+        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
+      } else {
+        hcb.append(value);
+      }
+    }
+    return hcb.toHashCode();
+  }
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
new file mode 100644
index 000000000000..b88591ea1945
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetOperationStatusReq implements org.apache.thrift.TBase<TGetOperationStatusReq, TGetOperationStatusReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusReq");
+
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetOperationStatusReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetOperationStatusReqTupleSchemeFactory());
+  }
+
+  private TOperationHandle operationHandle; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    OPERATION_HANDLE((short)1, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusReq.class, metaDataMap);
+  }
+
+  public TGetOperationStatusReq() {
+  }
+
+  public TGetOperationStatusReq(
+    TOperationHandle operationHandle)
+  {
+    this();
+    this.operationHandle = operationHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetOperationStatusReq(TGetOperationStatusReq other) {
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetOperationStatusReq deepCopy() {
+    return new TGetOperationStatusReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.operationHandle = null;
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetOperationStatusReq)
+      return this.equals((TGetOperationStatusReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetOperationStatusReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetOperationStatusReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetOperationStatusReq typedOther = (TGetOperationStatusReq)other;
+
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetOperationStatusReq(");
+    boolean first = true;
+
+    sb.append("operationHandle:");
+    if (this.operationHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.operationHandle);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetOperationHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetOperationStatusReqStandardSchemeFactory implements SchemeFactory {
+    public TGetOperationStatusReqStandardScheme getScheme() {
+      return new TGetOperationStatusReqStandardScheme();
+    }
+  }
+
+  private static class TGetOperationStatusReqStandardScheme extends StandardScheme<TGetOperationStatusReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.operationHandle != null) {
+        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+        struct.operationHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetOperationStatusReqTupleSchemeFactory implements SchemeFactory {
+    public TGetOperationStatusReqTupleScheme getScheme() {
+      return new TGetOperationStatusReqTupleScheme();
+    }
+  }
+
+  private static class TGetOperationStatusReqTupleScheme extends TupleScheme<TGetOperationStatusReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.operationHandle.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.operationHandle = new TOperationHandle();
+      struct.operationHandle.read(iprot);
+      struct.setOperationHandleIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
new file mode 100644
index 000000000000..94ba6bb1146d
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
@@ -0,0 +1,827 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetOperationStatusResp implements org.apache.thrift.TBase<TGetOperationStatusResp, TGetOperationStatusResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationState", org.apache.thrift.protocol.TType.I32, (short)2);
+  private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3);
+  private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4);
+  private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetOperationStatusRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetOperationStatusRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationState operationState; // optional
+  private String sqlState; // optional
+  private int errorCode; // optional
+  private String errorMessage; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    /**
+     * 
+     * @see TOperationState
+     */
+    OPERATION_STATE((short)2, "operationState"),
+    SQL_STATE((short)3, "sqlState"),
+    ERROR_CODE((short)4, "errorCode"),
+    ERROR_MESSAGE((short)5, "errorMessage");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_STATE
+          return OPERATION_STATE;
+        case 3: // SQL_STATE
+          return SQL_STATE;
+        case 4: // ERROR_CODE
+          return ERROR_CODE;
+        case 5: // ERROR_MESSAGE
+          return ERROR_MESSAGE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __ERRORCODE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.OPERATION_STATE,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_STATE, new org.apache.thrift.meta_data.FieldMetaData("operationState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationState.class)));
+    tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
+    tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusResp.class, metaDataMap);
+  }
+
+  public TGetOperationStatusResp() {
+  }
+
+  public TGetOperationStatusResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetOperationStatusResp(TGetOperationStatusResp other) {
+    __isset_bitfield = other.__isset_bitfield;
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationState()) {
+      this.operationState = other.operationState;
+    }
+    if (other.isSetSqlState()) {
+      this.sqlState = other.sqlState;
+    }
+    this.errorCode = other.errorCode;
+    if (other.isSetErrorMessage()) {
+      this.errorMessage = other.errorMessage;
+    }
+  }
+
+  public TGetOperationStatusResp deepCopy() {
+    return new TGetOperationStatusResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationState = null;
+    this.sqlState = null;
+    setErrorCodeIsSet(false);
+    this.errorCode = 0;
+    this.errorMessage = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  /**
+   * 
+   * @see TOperationState
+   */
+  public TOperationState getOperationState() {
+    return this.operationState;
+  }
+
+  /**
+   * 
+   * @see TOperationState
+   */
+  public void setOperationState(TOperationState operationState) {
+    this.operationState = operationState;
+  }
+
+  public void unsetOperationState() {
+    this.operationState = null;
+  }
+
+  /** Returns true if field operationState is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationState() {
+    return this.operationState != null;
+  }
+
+  public void setOperationStateIsSet(boolean value) {
+    if (!value) {
+      this.operationState = null;
+    }
+  }
+
+  public String getSqlState() {
+    return this.sqlState;
+  }
+
+  public void setSqlState(String sqlState) {
+    this.sqlState = sqlState;
+  }
+
+  public void unsetSqlState() {
+    this.sqlState = null;
+  }
+
+  /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */
+  public boolean isSetSqlState() {
+    return this.sqlState != null;
+  }
+
+  public void setSqlStateIsSet(boolean value) {
+    if (!value) {
+      this.sqlState = null;
+    }
+  }
+
+  public int getErrorCode() {
+    return this.errorCode;
+  }
+
+  public void setErrorCode(int errorCode) {
+    this.errorCode = errorCode;
+    setErrorCodeIsSet(true);
+  }
+
+  public void unsetErrorCode() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
+  }
+
+  /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */
+  public boolean isSetErrorCode() {
+    return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
+  }
+
+  public void setErrorCodeIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value);
+  }
+
+  public String getErrorMessage() {
+    return this.errorMessage;
+  }
+
+  public void setErrorMessage(String errorMessage) {
+    this.errorMessage = errorMessage;
+  }
+
+  public void unsetErrorMessage() {
+    this.errorMessage = null;
+  }
+
+  /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */
+  public boolean isSetErrorMessage() {
+    return this.errorMessage != null;
+  }
+
+  public void setErrorMessageIsSet(boolean value) {
+    if (!value) {
+      this.errorMessage = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_STATE:
+      if (value == null) {
+        unsetOperationState();
+      } else {
+        setOperationState((TOperationState)value);
+      }
+      break;
+
+    case SQL_STATE:
+      if (value == null) {
+        unsetSqlState();
+      } else {
+        setSqlState((String)value);
+      }
+      break;
+
+    case ERROR_CODE:
+      if (value == null) {
+        unsetErrorCode();
+      } else {
+        setErrorCode((Integer)value);
+      }
+      break;
+
+    case ERROR_MESSAGE:
+      if (value == null) {
+        unsetErrorMessage();
+      } else {
+        setErrorMessage((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_STATE:
+      return getOperationState();
+
+    case SQL_STATE:
+      return getSqlState();
+
+    case ERROR_CODE:
+      return Integer.valueOf(getErrorCode());
+
+    case ERROR_MESSAGE:
+      return getErrorMessage();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_STATE:
+      return isSetOperationState();
+    case SQL_STATE:
+      return isSetSqlState();
+    case ERROR_CODE:
+      return isSetErrorCode();
+    case ERROR_MESSAGE:
+      return isSetErrorMessage();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetOperationStatusResp)
+      return this.equals((TGetOperationStatusResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetOperationStatusResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationState = true && this.isSetOperationState();
+    boolean that_present_operationState = true && that.isSetOperationState();
+    if (this_present_operationState || that_present_operationState) {
+      if (!(this_present_operationState && that_present_operationState))
+        return false;
+      if (!this.operationState.equals(that.operationState))
+        return false;
+    }
+
+    boolean this_present_sqlState = true && this.isSetSqlState();
+    boolean that_present_sqlState = true && that.isSetSqlState();
+    if (this_present_sqlState || that_present_sqlState) {
+      if (!(this_present_sqlState && that_present_sqlState))
+        return false;
+      if (!this.sqlState.equals(that.sqlState))
+        return false;
+    }
+
+    boolean this_present_errorCode = true && this.isSetErrorCode();
+    boolean that_present_errorCode = true && that.isSetErrorCode();
+    if (this_present_errorCode || that_present_errorCode) {
+      if (!(this_present_errorCode && that_present_errorCode))
+        return false;
+      if (this.errorCode != that.errorCode)
+        return false;
+    }
+
+    boolean this_present_errorMessage = true && this.isSetErrorMessage();
+    boolean that_present_errorMessage = true && that.isSetErrorMessage();
+    if (this_present_errorMessage || that_present_errorMessage) {
+      if (!(this_present_errorMessage && that_present_errorMessage))
+        return false;
+      if (!this.errorMessage.equals(that.errorMessage))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationState = true && (isSetOperationState());
+    builder.append(present_operationState);
+    if (present_operationState)
+      builder.append(operationState.getValue());
+
+    boolean present_sqlState = true && (isSetSqlState());
+    builder.append(present_sqlState);
+    if (present_sqlState)
+      builder.append(sqlState);
+
+    boolean present_errorCode = true && (isSetErrorCode());
+    builder.append(present_errorCode);
+    if (present_errorCode)
+      builder.append(errorCode);
+
+    boolean present_errorMessage = true && (isSetErrorMessage());
+    builder.append(present_errorMessage);
+    if (present_errorMessage)
+      builder.append(errorMessage);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetOperationStatusResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetOperationStatusResp typedOther = (TGetOperationStatusResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationState()).compareTo(typedOther.isSetOperationState());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationState()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationState, typedOther.operationState);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(typedOther.isSetSqlState());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSqlState()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, typedOther.sqlState);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(typedOther.isSetErrorCode());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetErrorCode()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, typedOther.errorCode);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(typedOther.isSetErrorMessage());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetErrorMessage()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, typedOther.errorMessage);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetOperationStatusResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationState()) {
+      if (!first) sb.append(", ");
+      sb.append("operationState:");
+      if (this.operationState == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationState);
+      }
+      first = false;
+    }
+    if (isSetSqlState()) {
+      if (!first) sb.append(", ");
+      sb.append("sqlState:");
+      if (this.sqlState == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.sqlState);
+      }
+      first = false;
+    }
+    if (isSetErrorCode()) {
+      if (!first) sb.append(", ");
+      sb.append("errorCode:");
+      sb.append(this.errorCode);
+      first = false;
+    }
+    if (isSetErrorMessage()) {
+      if (!first) sb.append(", ");
+      sb.append("errorMessage:");
+      if (this.errorMessage == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.errorMessage);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetOperationStatusRespStandardSchemeFactory implements SchemeFactory {
+    public TGetOperationStatusRespStandardScheme getScheme() {
+      return new TGetOperationStatusRespStandardScheme();
+    }
+  }
+
+  private static class TGetOperationStatusRespStandardScheme extends StandardScheme<TGetOperationStatusResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_STATE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.operationState = TOperationState.findByValue(iprot.readI32());
+              struct.setOperationStateIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // SQL_STATE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.sqlState = iprot.readString();
+              struct.setSqlStateIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // ERROR_CODE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.errorCode = iprot.readI32();
+              struct.setErrorCodeIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 5: // ERROR_MESSAGE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.errorMessage = iprot.readString();
+              struct.setErrorMessageIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationState != null) {
+        if (struct.isSetOperationState()) {
+          oprot.writeFieldBegin(OPERATION_STATE_FIELD_DESC);
+          oprot.writeI32(struct.operationState.getValue());
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.sqlState != null) {
+        if (struct.isSetSqlState()) {
+          oprot.writeFieldBegin(SQL_STATE_FIELD_DESC);
+          oprot.writeString(struct.sqlState);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.isSetErrorCode()) {
+        oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC);
+        oprot.writeI32(struct.errorCode);
+        oprot.writeFieldEnd();
+      }
+      if (struct.errorMessage != null) {
+        if (struct.isSetErrorMessage()) {
+          oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC);
+          oprot.writeString(struct.errorMessage);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetOperationStatusRespTupleSchemeFactory implements SchemeFactory {
+    public TGetOperationStatusRespTupleScheme getScheme() {
+      return new TGetOperationStatusRespTupleScheme();
+    }
+  }
+
+  private static class TGetOperationStatusRespTupleScheme extends TupleScheme<TGetOperationStatusResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationState()) {
+        optionals.set(0);
+      }
+      if (struct.isSetSqlState()) {
+        optionals.set(1);
+      }
+      if (struct.isSetErrorCode()) {
+        optionals.set(2);
+      }
+      if (struct.isSetErrorMessage()) {
+        optionals.set(3);
+      }
+      oprot.writeBitSet(optionals, 4);
+      if (struct.isSetOperationState()) {
+        oprot.writeI32(struct.operationState.getValue());
+      }
+      if (struct.isSetSqlState()) {
+        oprot.writeString(struct.sqlState);
+      }
+      if (struct.isSetErrorCode()) {
+        oprot.writeI32(struct.errorCode);
+      }
+      if (struct.isSetErrorMessage()) {
+        oprot.writeString(struct.errorMessage);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(4);
+      if (incoming.get(0)) {
+        struct.operationState = TOperationState.findByValue(iprot.readI32());
+        struct.setOperationStateIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.sqlState = iprot.readString();
+        struct.setSqlStateIsSet(true);
+      }
+      if (incoming.get(2)) {
+        struct.errorCode = iprot.readI32();
+        struct.setErrorCodeIsSet(true);
+      }
+      if (incoming.get(3)) {
+        struct.errorMessage = iprot.readString();
+        struct.setErrorMessageIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
new file mode 100644
index 000000000000..3bf363c95846
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetResultSetMetadataReq implements org.apache.thrift.TBase<TGetResultSetMetadataReq, TGetResultSetMetadataReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataReq");
+
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetResultSetMetadataReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetResultSetMetadataReqTupleSchemeFactory());
+  }
+
+  private TOperationHandle operationHandle; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    OPERATION_HANDLE((short)1, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataReq.class, metaDataMap);
+  }
+
+  public TGetResultSetMetadataReq() {
+  }
+
+  public TGetResultSetMetadataReq(
+    TOperationHandle operationHandle)
+  {
+    this();
+    this.operationHandle = operationHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetResultSetMetadataReq(TGetResultSetMetadataReq other) {
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetResultSetMetadataReq deepCopy() {
+    return new TGetResultSetMetadataReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.operationHandle = null;
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetResultSetMetadataReq)
+      return this.equals((TGetResultSetMetadataReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetResultSetMetadataReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetResultSetMetadataReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetResultSetMetadataReq typedOther = (TGetResultSetMetadataReq)other;
+
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetResultSetMetadataReq(");
+    boolean first = true;
+
+    sb.append("operationHandle:");
+    if (this.operationHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.operationHandle);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetOperationHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetResultSetMetadataReqStandardSchemeFactory implements SchemeFactory {
+    public TGetResultSetMetadataReqStandardScheme getScheme() {
+      return new TGetResultSetMetadataReqStandardScheme();
+    }
+  }
+
+  private static class TGetResultSetMetadataReqStandardScheme extends StandardScheme<TGetResultSetMetadataReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.operationHandle != null) {
+        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+        struct.operationHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetResultSetMetadataReqTupleSchemeFactory implements SchemeFactory {
+    public TGetResultSetMetadataReqTupleScheme getScheme() {
+      return new TGetResultSetMetadataReqTupleScheme();
+    }
+  }
+
+  private static class TGetResultSetMetadataReqTupleScheme extends TupleScheme<TGetResultSetMetadataReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.operationHandle.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.operationHandle = new TOperationHandle();
+      struct.operationHandle.read(iprot);
+      struct.setOperationHandleIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
new file mode 100644
index 000000000000..a9bef9f722c1
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetResultSetMetadataResp implements org.apache.thrift.TBase<TGetResultSetMetadataResp, TGetResultSetMetadataResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField SCHEMA_FIELD_DESC = new org.apache.thrift.protocol.TField("schema", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetResultSetMetadataRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetResultSetMetadataRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TTableSchema schema; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    SCHEMA((short)2, "schema");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // SCHEMA
+          return SCHEMA;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.SCHEMA};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.SCHEMA, new org.apache.thrift.meta_data.FieldMetaData("schema", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTableSchema.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataResp.class, metaDataMap);
+  }
+
+  public TGetResultSetMetadataResp() {
+  }
+
+  public TGetResultSetMetadataResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetResultSetMetadataResp(TGetResultSetMetadataResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetSchema()) {
+      this.schema = new TTableSchema(other.schema);
+    }
+  }
+
+  public TGetResultSetMetadataResp deepCopy() {
+    return new TGetResultSetMetadataResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.schema = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TTableSchema getSchema() {
+    return this.schema;
+  }
+
+  public void setSchema(TTableSchema schema) {
+    this.schema = schema;
+  }
+
+  public void unsetSchema() {
+    this.schema = null;
+  }
+
+  /** Returns true if field schema is set (has been assigned a value) and false otherwise */
+  public boolean isSetSchema() {
+    return this.schema != null;
+  }
+
+  public void setSchemaIsSet(boolean value) {
+    if (!value) {
+      this.schema = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case SCHEMA:
+      if (value == null) {
+        unsetSchema();
+      } else {
+        setSchema((TTableSchema)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case SCHEMA:
+      return getSchema();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case SCHEMA:
+      return isSetSchema();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetResultSetMetadataResp)
+      return this.equals((TGetResultSetMetadataResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetResultSetMetadataResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_schema = true && this.isSetSchema();
+    boolean that_present_schema = true && that.isSetSchema();
+    if (this_present_schema || that_present_schema) {
+      if (!(this_present_schema && that_present_schema))
+        return false;
+      if (!this.schema.equals(that.schema))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_schema = true && (isSetSchema());
+    builder.append(present_schema);
+    if (present_schema)
+      builder.append(schema);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetResultSetMetadataResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetResultSetMetadataResp typedOther = (TGetResultSetMetadataResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSchema()).compareTo(typedOther.isSetSchema());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSchema()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schema, typedOther.schema);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetResultSetMetadataResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetSchema()) {
+      if (!first) sb.append(", ");
+      sb.append("schema:");
+      if (this.schema == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.schema);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (schema != null) {
+      schema.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetResultSetMetadataRespStandardSchemeFactory implements SchemeFactory {
+    public TGetResultSetMetadataRespStandardScheme getScheme() {
+      return new TGetResultSetMetadataRespStandardScheme();
+    }
+  }
+
+  private static class TGetResultSetMetadataRespStandardScheme extends StandardScheme<TGetResultSetMetadataResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // SCHEMA
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.schema = new TTableSchema();
+              struct.schema.read(iprot);
+              struct.setSchemaIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.schema != null) {
+        if (struct.isSetSchema()) {
+          oprot.writeFieldBegin(SCHEMA_FIELD_DESC);
+          struct.schema.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetResultSetMetadataRespTupleSchemeFactory implements SchemeFactory {
+    public TGetResultSetMetadataRespTupleScheme getScheme() {
+      return new TGetResultSetMetadataRespTupleScheme();
+    }
+  }
+
+  private static class TGetResultSetMetadataRespTupleScheme extends TupleScheme<TGetResultSetMetadataResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetSchema()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetSchema()) {
+        struct.schema.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.schema = new TTableSchema();
+        struct.schema.read(iprot);
+        struct.setSchemaIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
new file mode 100644
index 000000000000..c2aadaa49a1e
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
@@ -0,0 +1,606 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetSchemasReq implements org.apache.thrift.TBase<TGetSchemasReq, TGetSchemasReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
+  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetSchemasReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetSchemasReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private String catalogName; // optional
+  private String schemaName; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    CATALOG_NAME((short)2, "catalogName"),
+    SCHEMA_NAME((short)3, "schemaName");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // CATALOG_NAME
+          return CATALOG_NAME;
+        case 3: // SCHEMA_NAME
+          return SCHEMA_NAME;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
+    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasReq.class, metaDataMap);
+  }
+
+  public TGetSchemasReq() {
+  }
+
+  public TGetSchemasReq(
+    TSessionHandle sessionHandle)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetSchemasReq(TGetSchemasReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetCatalogName()) {
+      this.catalogName = other.catalogName;
+    }
+    if (other.isSetSchemaName()) {
+      this.schemaName = other.schemaName;
+    }
+  }
+
+  public TGetSchemasReq deepCopy() {
+    return new TGetSchemasReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.catalogName = null;
+    this.schemaName = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public String getCatalogName() {
+    return this.catalogName;
+  }
+
+  public void setCatalogName(String catalogName) {
+    this.catalogName = catalogName;
+  }
+
+  public void unsetCatalogName() {
+    this.catalogName = null;
+  }
+
+  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
+  public boolean isSetCatalogName() {
+    return this.catalogName != null;
+  }
+
+  public void setCatalogNameIsSet(boolean value) {
+    if (!value) {
+      this.catalogName = null;
+    }
+  }
+
+  public String getSchemaName() {
+    return this.schemaName;
+  }
+
+  public void setSchemaName(String schemaName) {
+    this.schemaName = schemaName;
+  }
+
+  public void unsetSchemaName() {
+    this.schemaName = null;
+  }
+
+  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
+  public boolean isSetSchemaName() {
+    return this.schemaName != null;
+  }
+
+  public void setSchemaNameIsSet(boolean value) {
+    if (!value) {
+      this.schemaName = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case CATALOG_NAME:
+      if (value == null) {
+        unsetCatalogName();
+      } else {
+        setCatalogName((String)value);
+      }
+      break;
+
+    case SCHEMA_NAME:
+      if (value == null) {
+        unsetSchemaName();
+      } else {
+        setSchemaName((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case CATALOG_NAME:
+      return getCatalogName();
+
+    case SCHEMA_NAME:
+      return getSchemaName();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case CATALOG_NAME:
+      return isSetCatalogName();
+    case SCHEMA_NAME:
+      return isSetSchemaName();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetSchemasReq)
+      return this.equals((TGetSchemasReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetSchemasReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_catalogName = true && this.isSetCatalogName();
+    boolean that_present_catalogName = true && that.isSetCatalogName();
+    if (this_present_catalogName || that_present_catalogName) {
+      if (!(this_present_catalogName && that_present_catalogName))
+        return false;
+      if (!this.catalogName.equals(that.catalogName))
+        return false;
+    }
+
+    boolean this_present_schemaName = true && this.isSetSchemaName();
+    boolean that_present_schemaName = true && that.isSetSchemaName();
+    if (this_present_schemaName || that_present_schemaName) {
+      if (!(this_present_schemaName && that_present_schemaName))
+        return false;
+      if (!this.schemaName.equals(that.schemaName))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_catalogName = true && (isSetCatalogName());
+    builder.append(present_catalogName);
+    if (present_catalogName)
+      builder.append(catalogName);
+
+    boolean present_schemaName = true && (isSetSchemaName());
+    builder.append(present_schemaName);
+    if (present_schemaName)
+      builder.append(schemaName);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetSchemasReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetSchemasReq typedOther = (TGetSchemasReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetCatalogName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSchemaName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetSchemasReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (isSetCatalogName()) {
+      if (!first) sb.append(", ");
+      sb.append("catalogName:");
+      if (this.catalogName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.catalogName);
+      }
+      first = false;
+    }
+    if (isSetSchemaName()) {
+      if (!first) sb.append(", ");
+      sb.append("schemaName:");
+      if (this.schemaName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.schemaName);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetSchemasReqStandardSchemeFactory implements SchemeFactory {
+    public TGetSchemasReqStandardScheme getScheme() {
+      return new TGetSchemasReqStandardScheme();
+    }
+  }
+
+  private static class TGetSchemasReqStandardScheme extends StandardScheme<TGetSchemasReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // CATALOG_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.catalogName = iprot.readString();
+              struct.setCatalogNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // SCHEMA_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.schemaName = iprot.readString();
+              struct.setSchemaNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.catalogName != null) {
+        if (struct.isSetCatalogName()) {
+          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
+          oprot.writeString(struct.catalogName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.schemaName != null) {
+        if (struct.isSetSchemaName()) {
+          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
+          oprot.writeString(struct.schemaName);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetSchemasReqTupleSchemeFactory implements SchemeFactory {
+    public TGetSchemasReqTupleScheme getScheme() {
+      return new TGetSchemasReqTupleScheme();
+    }
+  }
+
+  private static class TGetSchemasReqTupleScheme extends TupleScheme<TGetSchemasReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetCatalogName()) {
+        optionals.set(0);
+      }
+      if (struct.isSetSchemaName()) {
+        optionals.set(1);
+      }
+      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetCatalogName()) {
+        oprot.writeString(struct.catalogName);
+      }
+      if (struct.isSetSchemaName()) {
+        oprot.writeString(struct.schemaName);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      BitSet incoming = iprot.readBitSet(2);
+      if (incoming.get(0)) {
+        struct.catalogName = iprot.readString();
+        struct.setCatalogNameIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.schemaName = iprot.readString();
+        struct.setSchemaNameIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
new file mode 100644
index 000000000000..ac1ea3e7cc7a
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetSchemasResp implements org.apache.thrift.TBase<TGetSchemasResp, TGetSchemasResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetSchemasRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetSchemasRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationHandle operationHandle; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    OPERATION_HANDLE((short)2, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasResp.class, metaDataMap);
+  }
+
+  public TGetSchemasResp() {
+  }
+
+  public TGetSchemasResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetSchemasResp(TGetSchemasResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetSchemasResp deepCopy() {
+    return new TGetSchemasResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationHandle = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetSchemasResp)
+      return this.equals((TGetSchemasResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetSchemasResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetSchemasResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetSchemasResp typedOther = (TGetSchemasResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetSchemasResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("operationHandle:");
+      if (this.operationHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationHandle);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetSchemasRespStandardSchemeFactory implements SchemeFactory {
+    public TGetSchemasRespStandardScheme getScheme() {
+      return new TGetSchemasRespStandardScheme();
+    }
+  }
+
+  private static class TGetSchemasRespStandardScheme extends StandardScheme<TGetSchemasResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationHandle != null) {
+        if (struct.isSetOperationHandle()) {
+          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+          struct.operationHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetSchemasRespTupleSchemeFactory implements SchemeFactory {
+    public TGetSchemasRespTupleScheme getScheme() {
+      return new TGetSchemasRespTupleScheme();
+    }
+  }
+
+  private static class TGetSchemasRespTupleScheme extends TupleScheme<TGetSchemasResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationHandle()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetOperationHandle()) {
+        struct.operationHandle.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.operationHandle = new TOperationHandle();
+        struct.operationHandle.read(iprot);
+        struct.setOperationHandleIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
new file mode 100644
index 000000000000..6f2c713e0be6
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetTableTypesReq implements org.apache.thrift.TBase<TGetTableTypesReq, TGetTableTypesReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetTableTypesReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetTableTypesReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesReq.class, metaDataMap);
+  }
+
+  public TGetTableTypesReq() {
+  }
+
+  public TGetTableTypesReq(
+    TSessionHandle sessionHandle)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetTableTypesReq(TGetTableTypesReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+  }
+
+  public TGetTableTypesReq deepCopy() {
+    return new TGetTableTypesReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetTableTypesReq)
+      return this.equals((TGetTableTypesReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetTableTypesReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetTableTypesReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetTableTypesReq typedOther = (TGetTableTypesReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetTableTypesReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetTableTypesReqStandardSchemeFactory implements SchemeFactory {
+    public TGetTableTypesReqStandardScheme getScheme() {
+      return new TGetTableTypesReqStandardScheme();
+    }
+  }
+
+  private static class TGetTableTypesReqStandardScheme extends StandardScheme<TGetTableTypesReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetTableTypesReqTupleSchemeFactory implements SchemeFactory {
+    public TGetTableTypesReqTupleScheme getScheme() {
+      return new TGetTableTypesReqTupleScheme();
+    }
+  }
+
+  private static class TGetTableTypesReqTupleScheme extends TupleScheme<TGetTableTypesReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
new file mode 100644
index 000000000000..6f33fbcf5dad
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetTableTypesResp implements org.apache.thrift.TBase<TGetTableTypesResp, TGetTableTypesResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetTableTypesRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetTableTypesRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationHandle operationHandle; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    OPERATION_HANDLE((short)2, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesResp.class, metaDataMap);
+  }
+
+  public TGetTableTypesResp() {
+  }
+
+  public TGetTableTypesResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetTableTypesResp(TGetTableTypesResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetTableTypesResp deepCopy() {
+    return new TGetTableTypesResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationHandle = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetTableTypesResp)
+      return this.equals((TGetTableTypesResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetTableTypesResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetTableTypesResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetTableTypesResp typedOther = (TGetTableTypesResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetTableTypesResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("operationHandle:");
+      if (this.operationHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationHandle);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetTableTypesRespStandardSchemeFactory implements SchemeFactory {
+    public TGetTableTypesRespStandardScheme getScheme() {
+      return new TGetTableTypesRespStandardScheme();
+    }
+  }
+
+  private static class TGetTableTypesRespStandardScheme extends StandardScheme<TGetTableTypesResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationHandle != null) {
+        if (struct.isSetOperationHandle()) {
+          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+          struct.operationHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetTableTypesRespTupleSchemeFactory implements SchemeFactory {
+    public TGetTableTypesRespTupleScheme getScheme() {
+      return new TGetTableTypesRespTupleScheme();
+    }
+  }
+
+  private static class TGetTableTypesRespTupleScheme extends TupleScheme<TGetTableTypesResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationHandle()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetOperationHandle()) {
+        struct.operationHandle.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.operationHandle = new TOperationHandle();
+        struct.operationHandle.read(iprot);
+        struct.setOperationHandleIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
new file mode 100644
index 000000000000..c973fcc24cb1
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
@@ -0,0 +1,870 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetTablesReq implements org.apache.thrift.TBase<TGetTablesReq, TGetTablesReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
+  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
+  private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4);
+  private static final org.apache.thrift.protocol.TField TABLE_TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("tableTypes", org.apache.thrift.protocol.TType.LIST, (short)5);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetTablesReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetTablesReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private String catalogName; // optional
+  private String schemaName; // optional
+  private String tableName; // optional
+  private List<String> tableTypes; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    CATALOG_NAME((short)2, "catalogName"),
+    SCHEMA_NAME((short)3, "schemaName"),
+    TABLE_NAME((short)4, "tableName"),
+    TABLE_TYPES((short)5, "tableTypes");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // CATALOG_NAME
+          return CATALOG_NAME;
+        case 3: // SCHEMA_NAME
+          return SCHEMA_NAME;
+        case 4: // TABLE_NAME
+          return TABLE_NAME;
+        case 5: // TABLE_TYPES
+          return TABLE_TYPES;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.TABLE_TYPES};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
+    tmpMap.put(_Fields.TABLE_TYPES, new org.apache.thrift.meta_data.FieldMetaData("tableTypes", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesReq.class, metaDataMap);
+  }
+
+  public TGetTablesReq() {
+  }
+
+  public TGetTablesReq(
+    TSessionHandle sessionHandle)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetTablesReq(TGetTablesReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetCatalogName()) {
+      this.catalogName = other.catalogName;
+    }
+    if (other.isSetSchemaName()) {
+      this.schemaName = other.schemaName;
+    }
+    if (other.isSetTableName()) {
+      this.tableName = other.tableName;
+    }
+    if (other.isSetTableTypes()) {
+      List<String> __this__tableTypes = new ArrayList<String>();
+      for (String other_element : other.tableTypes) {
+        __this__tableTypes.add(other_element);
+      }
+      this.tableTypes = __this__tableTypes;
+    }
+  }
+
+  public TGetTablesReq deepCopy() {
+    return new TGetTablesReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.catalogName = null;
+    this.schemaName = null;
+    this.tableName = null;
+    this.tableTypes = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public String getCatalogName() {
+    return this.catalogName;
+  }
+
+  public void setCatalogName(String catalogName) {
+    this.catalogName = catalogName;
+  }
+
+  public void unsetCatalogName() {
+    this.catalogName = null;
+  }
+
+  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
+  public boolean isSetCatalogName() {
+    return this.catalogName != null;
+  }
+
+  public void setCatalogNameIsSet(boolean value) {
+    if (!value) {
+      this.catalogName = null;
+    }
+  }
+
+  public String getSchemaName() {
+    return this.schemaName;
+  }
+
+  public void setSchemaName(String schemaName) {
+    this.schemaName = schemaName;
+  }
+
+  public void unsetSchemaName() {
+    this.schemaName = null;
+  }
+
+  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
+  public boolean isSetSchemaName() {
+    return this.schemaName != null;
+  }
+
+  public void setSchemaNameIsSet(boolean value) {
+    if (!value) {
+      this.schemaName = null;
+    }
+  }
+
+  public String getTableName() {
+    return this.tableName;
+  }
+
+  public void setTableName(String tableName) {
+    this.tableName = tableName;
+  }
+
+  public void unsetTableName() {
+    this.tableName = null;
+  }
+
+  /** Returns true if field tableName is set (has been assigned a value) and false otherwise */
+  public boolean isSetTableName() {
+    return this.tableName != null;
+  }
+
+  public void setTableNameIsSet(boolean value) {
+    if (!value) {
+      this.tableName = null;
+    }
+  }
+
+  public int getTableTypesSize() {
+    return (this.tableTypes == null) ? 0 : this.tableTypes.size();
+  }
+
+  public java.util.Iterator<String> getTableTypesIterator() {
+    return (this.tableTypes == null) ? null : this.tableTypes.iterator();
+  }
+
+  public void addToTableTypes(String elem) {
+    if (this.tableTypes == null) {
+      this.tableTypes = new ArrayList<String>();
+    }
+    this.tableTypes.add(elem);
+  }
+
+  public List<String> getTableTypes() {
+    return this.tableTypes;
+  }
+
+  public void setTableTypes(List<String> tableTypes) {
+    this.tableTypes = tableTypes;
+  }
+
+  public void unsetTableTypes() {
+    this.tableTypes = null;
+  }
+
+  /** Returns true if field tableTypes is set (has been assigned a value) and false otherwise */
+  public boolean isSetTableTypes() {
+    return this.tableTypes != null;
+  }
+
+  public void setTableTypesIsSet(boolean value) {
+    if (!value) {
+      this.tableTypes = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case CATALOG_NAME:
+      if (value == null) {
+        unsetCatalogName();
+      } else {
+        setCatalogName((String)value);
+      }
+      break;
+
+    case SCHEMA_NAME:
+      if (value == null) {
+        unsetSchemaName();
+      } else {
+        setSchemaName((String)value);
+      }
+      break;
+
+    case TABLE_NAME:
+      if (value == null) {
+        unsetTableName();
+      } else {
+        setTableName((String)value);
+      }
+      break;
+
+    case TABLE_TYPES:
+      if (value == null) {
+        unsetTableTypes();
+      } else {
+        setTableTypes((List<String>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case CATALOG_NAME:
+      return getCatalogName();
+
+    case SCHEMA_NAME:
+      return getSchemaName();
+
+    case TABLE_NAME:
+      return getTableName();
+
+    case TABLE_TYPES:
+      return getTableTypes();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case CATALOG_NAME:
+      return isSetCatalogName();
+    case SCHEMA_NAME:
+      return isSetSchemaName();
+    case TABLE_NAME:
+      return isSetTableName();
+    case TABLE_TYPES:
+      return isSetTableTypes();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetTablesReq)
+      return this.equals((TGetTablesReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetTablesReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_catalogName = true && this.isSetCatalogName();
+    boolean that_present_catalogName = true && that.isSetCatalogName();
+    if (this_present_catalogName || that_present_catalogName) {
+      if (!(this_present_catalogName && that_present_catalogName))
+        return false;
+      if (!this.catalogName.equals(that.catalogName))
+        return false;
+    }
+
+    boolean this_present_schemaName = true && this.isSetSchemaName();
+    boolean that_present_schemaName = true && that.isSetSchemaName();
+    if (this_present_schemaName || that_present_schemaName) {
+      if (!(this_present_schemaName && that_present_schemaName))
+        return false;
+      if (!this.schemaName.equals(that.schemaName))
+        return false;
+    }
+
+    boolean this_present_tableName = true && this.isSetTableName();
+    boolean that_present_tableName = true && that.isSetTableName();
+    if (this_present_tableName || that_present_tableName) {
+      if (!(this_present_tableName && that_present_tableName))
+        return false;
+      if (!this.tableName.equals(that.tableName))
+        return false;
+    }
+
+    boolean this_present_tableTypes = true && this.isSetTableTypes();
+    boolean that_present_tableTypes = true && that.isSetTableTypes();
+    if (this_present_tableTypes || that_present_tableTypes) {
+      if (!(this_present_tableTypes && that_present_tableTypes))
+        return false;
+      if (!this.tableTypes.equals(that.tableTypes))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_catalogName = true && (isSetCatalogName());
+    builder.append(present_catalogName);
+    if (present_catalogName)
+      builder.append(catalogName);
+
+    boolean present_schemaName = true && (isSetSchemaName());
+    builder.append(present_schemaName);
+    if (present_schemaName)
+      builder.append(schemaName);
+
+    boolean present_tableName = true && (isSetTableName());
+    builder.append(present_tableName);
+    if (present_tableName)
+      builder.append(tableName);
+
+    boolean present_tableTypes = true && (isSetTableTypes());
+    builder.append(present_tableTypes);
+    if (present_tableTypes)
+      builder.append(tableTypes);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetTablesReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetTablesReq typedOther = (TGetTablesReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetCatalogName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSchemaName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetTableName()).compareTo(typedOther.isSetTableName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetTableName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, typedOther.tableName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetTableTypes()).compareTo(typedOther.isSetTableTypes());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetTableTypes()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableTypes, typedOther.tableTypes);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetTablesReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (isSetCatalogName()) {
+      if (!first) sb.append(", ");
+      sb.append("catalogName:");
+      if (this.catalogName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.catalogName);
+      }
+      first = false;
+    }
+    if (isSetSchemaName()) {
+      if (!first) sb.append(", ");
+      sb.append("schemaName:");
+      if (this.schemaName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.schemaName);
+      }
+      first = false;
+    }
+    if (isSetTableName()) {
+      if (!first) sb.append(", ");
+      sb.append("tableName:");
+      if (this.tableName == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.tableName);
+      }
+      first = false;
+    }
+    if (isSetTableTypes()) {
+      if (!first) sb.append(", ");
+      sb.append("tableTypes:");
+      if (this.tableTypes == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.tableTypes);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetTablesReqStandardSchemeFactory implements SchemeFactory {
+    public TGetTablesReqStandardScheme getScheme() {
+      return new TGetTablesReqStandardScheme();
+    }
+  }
+
+  private static class TGetTablesReqStandardScheme extends StandardScheme<TGetTablesReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // CATALOG_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.catalogName = iprot.readString();
+              struct.setCatalogNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // SCHEMA_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.schemaName = iprot.readString();
+              struct.setSchemaNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // TABLE_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.tableName = iprot.readString();
+              struct.setTableNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 5: // TABLE_TYPES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list172 = iprot.readListBegin();
+                struct.tableTypes = new ArrayList<String>(_list172.size);
+                for (int _i173 = 0; _i173 < _list172.size; ++_i173)
+                {
+                  String _elem174; // optional
+                  _elem174 = iprot.readString();
+                  struct.tableTypes.add(_elem174);
+                }
+                iprot.readListEnd();
+              }
+              struct.setTableTypesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.catalogName != null) {
+        if (struct.isSetCatalogName()) {
+          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
+          oprot.writeString(struct.catalogName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.schemaName != null) {
+        if (struct.isSetSchemaName()) {
+          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
+          oprot.writeString(struct.schemaName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.tableName != null) {
+        if (struct.isSetTableName()) {
+          oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC);
+          oprot.writeString(struct.tableName);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.tableTypes != null) {
+        if (struct.isSetTableTypes()) {
+          oprot.writeFieldBegin(TABLE_TYPES_FIELD_DESC);
+          {
+            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.tableTypes.size()));
+            for (String _iter175 : struct.tableTypes)
+            {
+              oprot.writeString(_iter175);
+            }
+            oprot.writeListEnd();
+          }
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetTablesReqTupleSchemeFactory implements SchemeFactory {
+    public TGetTablesReqTupleScheme getScheme() {
+      return new TGetTablesReqTupleScheme();
+    }
+  }
+
+  private static class TGetTablesReqTupleScheme extends TupleScheme<TGetTablesReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetCatalogName()) {
+        optionals.set(0);
+      }
+      if (struct.isSetSchemaName()) {
+        optionals.set(1);
+      }
+      if (struct.isSetTableName()) {
+        optionals.set(2);
+      }
+      if (struct.isSetTableTypes()) {
+        optionals.set(3);
+      }
+      oprot.writeBitSet(optionals, 4);
+      if (struct.isSetCatalogName()) {
+        oprot.writeString(struct.catalogName);
+      }
+      if (struct.isSetSchemaName()) {
+        oprot.writeString(struct.schemaName);
+      }
+      if (struct.isSetTableName()) {
+        oprot.writeString(struct.tableName);
+      }
+      if (struct.isSetTableTypes()) {
+        {
+          oprot.writeI32(struct.tableTypes.size());
+          for (String _iter176 : struct.tableTypes)
+          {
+            oprot.writeString(_iter176);
+          }
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      BitSet incoming = iprot.readBitSet(4);
+      if (incoming.get(0)) {
+        struct.catalogName = iprot.readString();
+        struct.setCatalogNameIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.schemaName = iprot.readString();
+        struct.setSchemaNameIsSet(true);
+      }
+      if (incoming.get(2)) {
+        struct.tableName = iprot.readString();
+        struct.setTableNameIsSet(true);
+      }
+      if (incoming.get(3)) {
+        {
+          org.apache.thrift.protocol.TList _list177 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+          struct.tableTypes = new ArrayList<String>(_list177.size);
+          for (int _i178 = 0; _i178 < _list177.size; ++_i178)
+          {
+            String _elem179; // optional
+            _elem179 = iprot.readString();
+            struct.tableTypes.add(_elem179);
+          }
+        }
+        struct.setTableTypesIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
new file mode 100644
index 000000000000..d526f4478a24
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetTablesResp implements org.apache.thrift.TBase<TGetTablesResp, TGetTablesResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetTablesRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetTablesRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationHandle operationHandle; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    OPERATION_HANDLE((short)2, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesResp.class, metaDataMap);
+  }
+
+  public TGetTablesResp() {
+  }
+
+  public TGetTablesResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetTablesResp(TGetTablesResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetTablesResp deepCopy() {
+    return new TGetTablesResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationHandle = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetTablesResp)
+      return this.equals((TGetTablesResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetTablesResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetTablesResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetTablesResp typedOther = (TGetTablesResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetTablesResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("operationHandle:");
+      if (this.operationHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationHandle);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetTablesRespStandardSchemeFactory implements SchemeFactory {
+    public TGetTablesRespStandardScheme getScheme() {
+      return new TGetTablesRespStandardScheme();
+    }
+  }
+
+  private static class TGetTablesRespStandardScheme extends StandardScheme<TGetTablesResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationHandle != null) {
+        if (struct.isSetOperationHandle()) {
+          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+          struct.operationHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetTablesRespTupleSchemeFactory implements SchemeFactory {
+    public TGetTablesRespTupleScheme getScheme() {
+      return new TGetTablesRespTupleScheme();
+    }
+  }
+
+  private static class TGetTablesRespTupleScheme extends TupleScheme<TGetTablesResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationHandle()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetOperationHandle()) {
+        struct.operationHandle.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.operationHandle = new TOperationHandle();
+        struct.operationHandle.read(iprot);
+        struct.setOperationHandleIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
new file mode 100644
index 000000000000..d40115e83ec4
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetTypeInfoReq implements org.apache.thrift.TBase<TGetTypeInfoReq, TGetTypeInfoReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetTypeInfoReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetTypeInfoReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoReq.class, metaDataMap);
+  }
+
+  public TGetTypeInfoReq() {
+  }
+
+  public TGetTypeInfoReq(
+    TSessionHandle sessionHandle)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetTypeInfoReq(TGetTypeInfoReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+  }
+
+  public TGetTypeInfoReq deepCopy() {
+    return new TGetTypeInfoReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetTypeInfoReq)
+      return this.equals((TGetTypeInfoReq)that);
+    return false;
+  }
+
+  public boolean equals(TGetTypeInfoReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetTypeInfoReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetTypeInfoReq typedOther = (TGetTypeInfoReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetTypeInfoReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetTypeInfoReqStandardSchemeFactory implements SchemeFactory {
+    public TGetTypeInfoReqStandardScheme getScheme() {
+      return new TGetTypeInfoReqStandardScheme();
+    }
+  }
+
+  private static class TGetTypeInfoReqStandardScheme extends StandardScheme<TGetTypeInfoReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetTypeInfoReqTupleSchemeFactory implements SchemeFactory {
+    public TGetTypeInfoReqTupleScheme getScheme() {
+      return new TGetTypeInfoReqTupleScheme();
+    }
+  }
+
+  private static class TGetTypeInfoReqTupleScheme extends TupleScheme<TGetTypeInfoReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
new file mode 100644
index 000000000000..59be1a33b55e
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
@@ -0,0 +1,505 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TGetTypeInfoResp implements org.apache.thrift.TBase<TGetTypeInfoResp, TGetTypeInfoResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TGetTypeInfoRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TGetTypeInfoRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TOperationHandle operationHandle; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    OPERATION_HANDLE((short)2, "operationHandle");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // OPERATION_HANDLE
+          return OPERATION_HANDLE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoResp.class, metaDataMap);
+  }
+
+  public TGetTypeInfoResp() {
+  }
+
+  public TGetTypeInfoResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TGetTypeInfoResp(TGetTypeInfoResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetOperationHandle()) {
+      this.operationHandle = new TOperationHandle(other.operationHandle);
+    }
+  }
+
+  public TGetTypeInfoResp deepCopy() {
+    return new TGetTypeInfoResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.operationHandle = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public TOperationHandle getOperationHandle() {
+    return this.operationHandle;
+  }
+
+  public void setOperationHandle(TOperationHandle operationHandle) {
+    this.operationHandle = operationHandle;
+  }
+
+  public void unsetOperationHandle() {
+    this.operationHandle = null;
+  }
+
+  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationHandle() {
+    return this.operationHandle != null;
+  }
+
+  public void setOperationHandleIsSet(boolean value) {
+    if (!value) {
+      this.operationHandle = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case OPERATION_HANDLE:
+      if (value == null) {
+        unsetOperationHandle();
+      } else {
+        setOperationHandle((TOperationHandle)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case OPERATION_HANDLE:
+      return getOperationHandle();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case OPERATION_HANDLE:
+      return isSetOperationHandle();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TGetTypeInfoResp)
+      return this.equals((TGetTypeInfoResp)that);
+    return false;
+  }
+
+  public boolean equals(TGetTypeInfoResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_operationHandle = true && this.isSetOperationHandle();
+    boolean that_present_operationHandle = true && that.isSetOperationHandle();
+    if (this_present_operationHandle || that_present_operationHandle) {
+      if (!(this_present_operationHandle && that_present_operationHandle))
+        return false;
+      if (!this.operationHandle.equals(that.operationHandle))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_operationHandle = true && (isSetOperationHandle());
+    builder.append(present_operationHandle);
+    if (present_operationHandle)
+      builder.append(operationHandle);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TGetTypeInfoResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TGetTypeInfoResp typedOther = (TGetTypeInfoResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TGetTypeInfoResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (isSetOperationHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("operationHandle:");
+      if (this.operationHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.operationHandle);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (operationHandle != null) {
+      operationHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TGetTypeInfoRespStandardSchemeFactory implements SchemeFactory {
+    public TGetTypeInfoRespStandardScheme getScheme() {
+      return new TGetTypeInfoRespStandardScheme();
+    }
+  }
+
+  private static class TGetTypeInfoRespStandardScheme extends StandardScheme<TGetTypeInfoResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationHandle = new TOperationHandle();
+              struct.operationHandle.read(iprot);
+              struct.setOperationHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationHandle != null) {
+        if (struct.isSetOperationHandle()) {
+          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
+          struct.operationHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TGetTypeInfoRespTupleSchemeFactory implements SchemeFactory {
+    public TGetTypeInfoRespTupleScheme getScheme() {
+      return new TGetTypeInfoRespTupleScheme();
+    }
+  }
+
+  private static class TGetTypeInfoRespTupleScheme extends TupleScheme<TGetTypeInfoResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      BitSet optionals = new BitSet();
+      if (struct.isSetOperationHandle()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetOperationHandle()) {
+        struct.operationHandle.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.operationHandle = new TOperationHandle();
+        struct.operationHandle.read(iprot);
+        struct.setOperationHandleIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
new file mode 100644
index 000000000000..368273c341c7
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
@@ -0,0 +1,506 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class THandleIdentifier implements org.apache.thrift.TBase<THandleIdentifier, THandleIdentifier._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("THandleIdentifier");
+
+  private static final org.apache.thrift.protocol.TField GUID_FIELD_DESC = new org.apache.thrift.protocol.TField("guid", org.apache.thrift.protocol.TType.STRING, (short)1);
+  private static final org.apache.thrift.protocol.TField SECRET_FIELD_DESC = new org.apache.thrift.protocol.TField("secret", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new THandleIdentifierStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new THandleIdentifierTupleSchemeFactory());
+  }
+
+  private ByteBuffer guid; // required
+  private ByteBuffer secret; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    GUID((short)1, "guid"),
+    SECRET((short)2, "secret");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // GUID
+          return GUID;
+        case 2: // SECRET
+          return SECRET;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.GUID, new org.apache.thrift.meta_data.FieldMetaData("guid", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    tmpMap.put(_Fields.SECRET, new org.apache.thrift.meta_data.FieldMetaData("secret", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(THandleIdentifier.class, metaDataMap);
+  }
+
+  public THandleIdentifier() {
+  }
+
+  public THandleIdentifier(
+    ByteBuffer guid,
+    ByteBuffer secret)
+  {
+    this();
+    this.guid = guid;
+    this.secret = secret;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public THandleIdentifier(THandleIdentifier other) {
+    if (other.isSetGuid()) {
+      this.guid = org.apache.thrift.TBaseHelper.copyBinary(other.guid);
+;
+    }
+    if (other.isSetSecret()) {
+      this.secret = org.apache.thrift.TBaseHelper.copyBinary(other.secret);
+;
+    }
+  }
+
+  public THandleIdentifier deepCopy() {
+    return new THandleIdentifier(this);
+  }
+
+  @Override
+  public void clear() {
+    this.guid = null;
+    this.secret = null;
+  }
+
+  public byte[] getGuid() {
+    setGuid(org.apache.thrift.TBaseHelper.rightSize(guid));
+    return guid == null ? null : guid.array();
+  }
+
+  public ByteBuffer bufferForGuid() {
+    return guid;
+  }
+
+  public void setGuid(byte[] guid) {
+    setGuid(guid == null ? (ByteBuffer)null : ByteBuffer.wrap(guid));
+  }
+
+  public void setGuid(ByteBuffer guid) {
+    this.guid = guid;
+  }
+
+  public void unsetGuid() {
+    this.guid = null;
+  }
+
+  /** Returns true if field guid is set (has been assigned a value) and false otherwise */
+  public boolean isSetGuid() {
+    return this.guid != null;
+  }
+
+  public void setGuidIsSet(boolean value) {
+    if (!value) {
+      this.guid = null;
+    }
+  }
+
+  public byte[] getSecret() {
+    setSecret(org.apache.thrift.TBaseHelper.rightSize(secret));
+    return secret == null ? null : secret.array();
+  }
+
+  public ByteBuffer bufferForSecret() {
+    return secret;
+  }
+
+  public void setSecret(byte[] secret) {
+    setSecret(secret == null ? (ByteBuffer)null : ByteBuffer.wrap(secret));
+  }
+
+  public void setSecret(ByteBuffer secret) {
+    this.secret = secret;
+  }
+
+  public void unsetSecret() {
+    this.secret = null;
+  }
+
+  /** Returns true if field secret is set (has been assigned a value) and false otherwise */
+  public boolean isSetSecret() {
+    return this.secret != null;
+  }
+
+  public void setSecretIsSet(boolean value) {
+    if (!value) {
+      this.secret = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case GUID:
+      if (value == null) {
+        unsetGuid();
+      } else {
+        setGuid((ByteBuffer)value);
+      }
+      break;
+
+    case SECRET:
+      if (value == null) {
+        unsetSecret();
+      } else {
+        setSecret((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case GUID:
+      return getGuid();
+
+    case SECRET:
+      return getSecret();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case GUID:
+      return isSetGuid();
+    case SECRET:
+      return isSetSecret();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof THandleIdentifier)
+      return this.equals((THandleIdentifier)that);
+    return false;
+  }
+
+  public boolean equals(THandleIdentifier that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_guid = true && this.isSetGuid();
+    boolean that_present_guid = true && that.isSetGuid();
+    if (this_present_guid || that_present_guid) {
+      if (!(this_present_guid && that_present_guid))
+        return false;
+      if (!this.guid.equals(that.guid))
+        return false;
+    }
+
+    boolean this_present_secret = true && this.isSetSecret();
+    boolean that_present_secret = true && that.isSetSecret();
+    if (this_present_secret || that_present_secret) {
+      if (!(this_present_secret && that_present_secret))
+        return false;
+      if (!this.secret.equals(that.secret))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_guid = true && (isSetGuid());
+    builder.append(present_guid);
+    if (present_guid)
+      builder.append(guid);
+
+    boolean present_secret = true && (isSetSecret());
+    builder.append(present_secret);
+    if (present_secret)
+      builder.append(secret);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(THandleIdentifier other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    THandleIdentifier typedOther = (THandleIdentifier)other;
+
+    lastComparison = Boolean.valueOf(isSetGuid()).compareTo(typedOther.isSetGuid());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetGuid()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.guid, typedOther.guid);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSecret()).compareTo(typedOther.isSetSecret());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSecret()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.secret, typedOther.secret);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("THandleIdentifier(");
+    boolean first = true;
+
+    sb.append("guid:");
+    if (this.guid == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.guid, sb);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("secret:");
+    if (this.secret == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.secret, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetGuid()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'guid' is unset! Struct:" + toString());
+    }
+
+    if (!isSetSecret()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'secret' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class THandleIdentifierStandardSchemeFactory implements SchemeFactory {
+    public THandleIdentifierStandardScheme getScheme() {
+      return new THandleIdentifierStandardScheme();
+    }
+  }
+
+  private static class THandleIdentifierStandardScheme extends StandardScheme<THandleIdentifier> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, THandleIdentifier struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // GUID
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.guid = iprot.readBinary();
+              struct.setGuidIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // SECRET
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.secret = iprot.readBinary();
+              struct.setSecretIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, THandleIdentifier struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.guid != null) {
+        oprot.writeFieldBegin(GUID_FIELD_DESC);
+        oprot.writeBinary(struct.guid);
+        oprot.writeFieldEnd();
+      }
+      if (struct.secret != null) {
+        oprot.writeFieldBegin(SECRET_FIELD_DESC);
+        oprot.writeBinary(struct.secret);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class THandleIdentifierTupleSchemeFactory implements SchemeFactory {
+    public THandleIdentifierTupleScheme getScheme() {
+      return new THandleIdentifierTupleScheme();
+    }
+  }
+
+  private static class THandleIdentifierTupleScheme extends TupleScheme<THandleIdentifier> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeBinary(struct.guid);
+      oprot.writeBinary(struct.secret);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.guid = iprot.readBinary();
+      struct.setGuidIsSet(true);
+      struct.secret = iprot.readBinary();
+      struct.setSecretIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
new file mode 100644
index 000000000000..c83663072f87
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
@@ -0,0 +1,548 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TI16Column implements org.apache.thrift.TBase<TI16Column, TI16Column._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Column");
+
+  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TI16ColumnStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TI16ColumnTupleSchemeFactory());
+  }
+
+  private List<Short> values; // required
+  private ByteBuffer nulls; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUES((short)1, "values"),
+    NULLS((short)2, "nulls");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUES
+          return VALUES;
+        case 2: // NULLS
+          return NULLS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))));
+    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Column.class, metaDataMap);
+  }
+
+  public TI16Column() {
+  }
+
+  public TI16Column(
+    List<Short> values,
+    ByteBuffer nulls)
+  {
+    this();
+    this.values = values;
+    this.nulls = nulls;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TI16Column(TI16Column other) {
+    if (other.isSetValues()) {
+      List<Short> __this__values = new ArrayList<Short>();
+      for (Short other_element : other.values) {
+        __this__values.add(other_element);
+      }
+      this.values = __this__values;
+    }
+    if (other.isSetNulls()) {
+      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
+;
+    }
+  }
+
+  public TI16Column deepCopy() {
+    return new TI16Column(this);
+  }
+
+  @Override
+  public void clear() {
+    this.values = null;
+    this.nulls = null;
+  }
+
+  public int getValuesSize() {
+    return (this.values == null) ? 0 : this.values.size();
+  }
+
+  public java.util.Iterator<Short> getValuesIterator() {
+    return (this.values == null) ? null : this.values.iterator();
+  }
+
+  public void addToValues(short elem) {
+    if (this.values == null) {
+      this.values = new ArrayList<Short>();
+    }
+    this.values.add(elem);
+  }
+
+  public List<Short> getValues() {
+    return this.values;
+  }
+
+  public void setValues(List<Short> values) {
+    this.values = values;
+  }
+
+  public void unsetValues() {
+    this.values = null;
+  }
+
+  /** Returns true if field values is set (has been assigned a value) and false otherwise */
+  public boolean isSetValues() {
+    return this.values != null;
+  }
+
+  public void setValuesIsSet(boolean value) {
+    if (!value) {
+      this.values = null;
+    }
+  }
+
+  public byte[] getNulls() {
+    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
+    return nulls == null ? null : nulls.array();
+  }
+
+  public ByteBuffer bufferForNulls() {
+    return nulls;
+  }
+
+  public void setNulls(byte[] nulls) {
+    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
+  }
+
+  public void setNulls(ByteBuffer nulls) {
+    this.nulls = nulls;
+  }
+
+  public void unsetNulls() {
+    this.nulls = null;
+  }
+
+  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
+  public boolean isSetNulls() {
+    return this.nulls != null;
+  }
+
+  public void setNullsIsSet(boolean value) {
+    if (!value) {
+      this.nulls = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUES:
+      if (value == null) {
+        unsetValues();
+      } else {
+        setValues((List<Short>)value);
+      }
+      break;
+
+    case NULLS:
+      if (value == null) {
+        unsetNulls();
+      } else {
+        setNulls((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUES:
+      return getValues();
+
+    case NULLS:
+      return getNulls();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUES:
+      return isSetValues();
+    case NULLS:
+      return isSetNulls();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TI16Column)
+      return this.equals((TI16Column)that);
+    return false;
+  }
+
+  public boolean equals(TI16Column that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_values = true && this.isSetValues();
+    boolean that_present_values = true && that.isSetValues();
+    if (this_present_values || that_present_values) {
+      if (!(this_present_values && that_present_values))
+        return false;
+      if (!this.values.equals(that.values))
+        return false;
+    }
+
+    boolean this_present_nulls = true && this.isSetNulls();
+    boolean that_present_nulls = true && that.isSetNulls();
+    if (this_present_nulls || that_present_nulls) {
+      if (!(this_present_nulls && that_present_nulls))
+        return false;
+      if (!this.nulls.equals(that.nulls))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_values = true && (isSetValues());
+    builder.append(present_values);
+    if (present_values)
+      builder.append(values);
+
+    boolean present_nulls = true && (isSetNulls());
+    builder.append(present_nulls);
+    if (present_nulls)
+      builder.append(nulls);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TI16Column other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TI16Column typedOther = (TI16Column)other;
+
+    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValues()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNulls()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TI16Column(");
+    boolean first = true;
+
+    sb.append("values:");
+    if (this.values == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.values);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("nulls:");
+    if (this.nulls == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetValues()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
+    }
+
+    if (!isSetNulls()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TI16ColumnStandardSchemeFactory implements SchemeFactory {
+    public TI16ColumnStandardScheme getScheme() {
+      return new TI16ColumnStandardScheme();
+    }
+  }
+
+  private static class TI16ColumnStandardScheme extends StandardScheme<TI16Column> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Column struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list70 = iprot.readListBegin();
+                struct.values = new ArrayList<Short>(_list70.size);
+                for (int _i71 = 0; _i71 < _list70.size; ++_i71)
+                {
+                  short _elem72; // optional
+                  _elem72 = iprot.readI16();
+                  struct.values.add(_elem72);
+                }
+                iprot.readListEnd();
+              }
+              struct.setValuesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // NULLS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.nulls = iprot.readBinary();
+              struct.setNullsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Column struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.values != null) {
+        oprot.writeFieldBegin(VALUES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, struct.values.size()));
+          for (short _iter73 : struct.values)
+          {
+            oprot.writeI16(_iter73);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.nulls != null) {
+        oprot.writeFieldBegin(NULLS_FIELD_DESC);
+        oprot.writeBinary(struct.nulls);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TI16ColumnTupleSchemeFactory implements SchemeFactory {
+    public TI16ColumnTupleScheme getScheme() {
+      return new TI16ColumnTupleScheme();
+    }
+  }
+
+  private static class TI16ColumnTupleScheme extends TupleScheme<TI16Column> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.values.size());
+        for (short _iter74 : struct.values)
+        {
+          oprot.writeI16(_iter74);
+        }
+      }
+      oprot.writeBinary(struct.nulls);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list75 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, iprot.readI32());
+        struct.values = new ArrayList<Short>(_list75.size);
+        for (int _i76 = 0; _i76 < _list75.size; ++_i76)
+        {
+          short _elem77; // optional
+          _elem77 = iprot.readI16();
+          struct.values.add(_elem77);
+        }
+      }
+      struct.setValuesIsSet(true);
+      struct.nulls = iprot.readBinary();
+      struct.setNullsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
new file mode 100644
index 000000000000..bb5ae9609de8
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
@@ -0,0 +1,386 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TI16Value implements org.apache.thrift.TBase<TI16Value, TI16Value._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Value");
+
+  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I16, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TI16ValueStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TI16ValueTupleSchemeFactory());
+  }
+
+  private short value; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUE((short)1, "value");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUE
+          return VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __VALUE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.VALUE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Value.class, metaDataMap);
+  }
+
+  public TI16Value() {
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TI16Value(TI16Value other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.value = other.value;
+  }
+
+  public TI16Value deepCopy() {
+    return new TI16Value(this);
+  }
+
+  @Override
+  public void clear() {
+    setValueIsSet(false);
+    this.value = 0;
+  }
+
+  public short getValue() {
+    return this.value;
+  }
+
+  public void setValue(short value) {
+    this.value = value;
+    setValueIsSet(true);
+  }
+
+  public void unsetValue() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  /** Returns true if field value is set (has been assigned a value) and false otherwise */
+  public boolean isSetValue() {
+    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  public void setValueIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUE:
+      if (value == null) {
+        unsetValue();
+      } else {
+        setValue((Short)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUE:
+      return Short.valueOf(getValue());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUE:
+      return isSetValue();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TI16Value)
+      return this.equals((TI16Value)that);
+    return false;
+  }
+
+  public boolean equals(TI16Value that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_value = true && this.isSetValue();
+    boolean that_present_value = true && that.isSetValue();
+    if (this_present_value || that_present_value) {
+      if (!(this_present_value && that_present_value))
+        return false;
+      if (this.value != that.value)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_value = true && (isSetValue());
+    builder.append(present_value);
+    if (present_value)
+      builder.append(value);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TI16Value other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TI16Value typedOther = (TI16Value)other;
+
+    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValue()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TI16Value(");
+    boolean first = true;
+
+    if (isSetValue()) {
+      sb.append("value:");
+      sb.append(this.value);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TI16ValueStandardSchemeFactory implements SchemeFactory {
+    public TI16ValueStandardScheme getScheme() {
+      return new TI16ValueStandardScheme();
+    }
+  }
+
+  private static class TI16ValueStandardScheme extends StandardScheme<TI16Value> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Value struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I16) {
+              struct.value = iprot.readI16();
+              struct.setValueIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Value struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.isSetValue()) {
+        oprot.writeFieldBegin(VALUE_FIELD_DESC);
+        oprot.writeI16(struct.value);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TI16ValueTupleSchemeFactory implements SchemeFactory {
+    public TI16ValueTupleScheme getScheme() {
+      return new TI16ValueTupleScheme();
+    }
+  }
+
+  private static class TI16ValueTupleScheme extends TupleScheme<TI16Value> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetValue()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetValue()) {
+        oprot.writeI16(struct.value);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.value = iprot.readI16();
+        struct.setValueIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
new file mode 100644
index 000000000000..6c6c5f35b7c8
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
@@ -0,0 +1,548 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TI32Column implements org.apache.thrift.TBase<TI32Column, TI32Column._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Column");
+
+  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TI32ColumnStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TI32ColumnTupleSchemeFactory());
+  }
+
+  private List<Integer> values; // required
+  private ByteBuffer nulls; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUES((short)1, "values"),
+    NULLS((short)2, "nulls");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUES
+          return VALUES;
+        case 2: // NULLS
+          return NULLS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))));
+    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Column.class, metaDataMap);
+  }
+
+  public TI32Column() {
+  }
+
+  public TI32Column(
+    List<Integer> values,
+    ByteBuffer nulls)
+  {
+    this();
+    this.values = values;
+    this.nulls = nulls;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TI32Column(TI32Column other) {
+    if (other.isSetValues()) {
+      List<Integer> __this__values = new ArrayList<Integer>();
+      for (Integer other_element : other.values) {
+        __this__values.add(other_element);
+      }
+      this.values = __this__values;
+    }
+    if (other.isSetNulls()) {
+      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
+;
+    }
+  }
+
+  public TI32Column deepCopy() {
+    return new TI32Column(this);
+  }
+
+  @Override
+  public void clear() {
+    this.values = null;
+    this.nulls = null;
+  }
+
+  public int getValuesSize() {
+    return (this.values == null) ? 0 : this.values.size();
+  }
+
+  public java.util.Iterator<Integer> getValuesIterator() {
+    return (this.values == null) ? null : this.values.iterator();
+  }
+
+  public void addToValues(int elem) {
+    if (this.values == null) {
+      this.values = new ArrayList<Integer>();
+    }
+    this.values.add(elem);
+  }
+
+  public List<Integer> getValues() {
+    return this.values;
+  }
+
+  public void setValues(List<Integer> values) {
+    this.values = values;
+  }
+
+  public void unsetValues() {
+    this.values = null;
+  }
+
+  /** Returns true if field values is set (has been assigned a value) and false otherwise */
+  public boolean isSetValues() {
+    return this.values != null;
+  }
+
+  public void setValuesIsSet(boolean value) {
+    if (!value) {
+      this.values = null;
+    }
+  }
+
+  public byte[] getNulls() {
+    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
+    return nulls == null ? null : nulls.array();
+  }
+
+  public ByteBuffer bufferForNulls() {
+    return nulls;
+  }
+
+  public void setNulls(byte[] nulls) {
+    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
+  }
+
+  public void setNulls(ByteBuffer nulls) {
+    this.nulls = nulls;
+  }
+
+  public void unsetNulls() {
+    this.nulls = null;
+  }
+
+  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
+  public boolean isSetNulls() {
+    return this.nulls != null;
+  }
+
+  public void setNullsIsSet(boolean value) {
+    if (!value) {
+      this.nulls = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUES:
+      if (value == null) {
+        unsetValues();
+      } else {
+        setValues((List<Integer>)value);
+      }
+      break;
+
+    case NULLS:
+      if (value == null) {
+        unsetNulls();
+      } else {
+        setNulls((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUES:
+      return getValues();
+
+    case NULLS:
+      return getNulls();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUES:
+      return isSetValues();
+    case NULLS:
+      return isSetNulls();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TI32Column)
+      return this.equals((TI32Column)that);
+    return false;
+  }
+
+  public boolean equals(TI32Column that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_values = true && this.isSetValues();
+    boolean that_present_values = true && that.isSetValues();
+    if (this_present_values || that_present_values) {
+      if (!(this_present_values && that_present_values))
+        return false;
+      if (!this.values.equals(that.values))
+        return false;
+    }
+
+    boolean this_present_nulls = true && this.isSetNulls();
+    boolean that_present_nulls = true && that.isSetNulls();
+    if (this_present_nulls || that_present_nulls) {
+      if (!(this_present_nulls && that_present_nulls))
+        return false;
+      if (!this.nulls.equals(that.nulls))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_values = true && (isSetValues());
+    builder.append(present_values);
+    if (present_values)
+      builder.append(values);
+
+    boolean present_nulls = true && (isSetNulls());
+    builder.append(present_nulls);
+    if (present_nulls)
+      builder.append(nulls);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TI32Column other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TI32Column typedOther = (TI32Column)other;
+
+    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValues()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNulls()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TI32Column(");
+    boolean first = true;
+
+    sb.append("values:");
+    if (this.values == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.values);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("nulls:");
+    if (this.nulls == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetValues()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
+    }
+
+    if (!isSetNulls()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TI32ColumnStandardSchemeFactory implements SchemeFactory {
+    public TI32ColumnStandardScheme getScheme() {
+      return new TI32ColumnStandardScheme();
+    }
+  }
+
+  private static class TI32ColumnStandardScheme extends StandardScheme<TI32Column> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Column struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list78 = iprot.readListBegin();
+                struct.values = new ArrayList<Integer>(_list78.size);
+                for (int _i79 = 0; _i79 < _list78.size; ++_i79)
+                {
+                  int _elem80; // optional
+                  _elem80 = iprot.readI32();
+                  struct.values.add(_elem80);
+                }
+                iprot.readListEnd();
+              }
+              struct.setValuesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // NULLS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.nulls = iprot.readBinary();
+              struct.setNullsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Column struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.values != null) {
+        oprot.writeFieldBegin(VALUES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, struct.values.size()));
+          for (int _iter81 : struct.values)
+          {
+            oprot.writeI32(_iter81);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.nulls != null) {
+        oprot.writeFieldBegin(NULLS_FIELD_DESC);
+        oprot.writeBinary(struct.nulls);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TI32ColumnTupleSchemeFactory implements SchemeFactory {
+    public TI32ColumnTupleScheme getScheme() {
+      return new TI32ColumnTupleScheme();
+    }
+  }
+
+  private static class TI32ColumnTupleScheme extends TupleScheme<TI32Column> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.values.size());
+        for (int _iter82 : struct.values)
+        {
+          oprot.writeI32(_iter82);
+        }
+      }
+      oprot.writeBinary(struct.nulls);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list83 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, iprot.readI32());
+        struct.values = new ArrayList<Integer>(_list83.size);
+        for (int _i84 = 0; _i84 < _list83.size; ++_i84)
+        {
+          int _elem85; // optional
+          _elem85 = iprot.readI32();
+          struct.values.add(_elem85);
+        }
+      }
+      struct.setValuesIsSet(true);
+      struct.nulls = iprot.readBinary();
+      struct.setNullsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
new file mode 100644
index 000000000000..059408b96c8c
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
@@ -0,0 +1,386 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TI32Value implements org.apache.thrift.TBase<TI32Value, TI32Value._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Value");
+
+  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I32, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TI32ValueStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TI32ValueTupleSchemeFactory());
+  }
+
+  private int value; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUE((short)1, "value");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUE
+          return VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __VALUE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.VALUE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Value.class, metaDataMap);
+  }
+
+  public TI32Value() {
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TI32Value(TI32Value other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.value = other.value;
+  }
+
+  public TI32Value deepCopy() {
+    return new TI32Value(this);
+  }
+
+  @Override
+  public void clear() {
+    setValueIsSet(false);
+    this.value = 0;
+  }
+
+  public int getValue() {
+    return this.value;
+  }
+
+  public void setValue(int value) {
+    this.value = value;
+    setValueIsSet(true);
+  }
+
+  public void unsetValue() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  /** Returns true if field value is set (has been assigned a value) and false otherwise */
+  public boolean isSetValue() {
+    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  public void setValueIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUE:
+      if (value == null) {
+        unsetValue();
+      } else {
+        setValue((Integer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUE:
+      return Integer.valueOf(getValue());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUE:
+      return isSetValue();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TI32Value)
+      return this.equals((TI32Value)that);
+    return false;
+  }
+
+  public boolean equals(TI32Value that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_value = true && this.isSetValue();
+    boolean that_present_value = true && that.isSetValue();
+    if (this_present_value || that_present_value) {
+      if (!(this_present_value && that_present_value))
+        return false;
+      if (this.value != that.value)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_value = true && (isSetValue());
+    builder.append(present_value);
+    if (present_value)
+      builder.append(value);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TI32Value other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TI32Value typedOther = (TI32Value)other;
+
+    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValue()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TI32Value(");
+    boolean first = true;
+
+    if (isSetValue()) {
+      sb.append("value:");
+      sb.append(this.value);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TI32ValueStandardSchemeFactory implements SchemeFactory {
+    public TI32ValueStandardScheme getScheme() {
+      return new TI32ValueStandardScheme();
+    }
+  }
+
+  private static class TI32ValueStandardScheme extends StandardScheme<TI32Value> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Value struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.value = iprot.readI32();
+              struct.setValueIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Value struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.isSetValue()) {
+        oprot.writeFieldBegin(VALUE_FIELD_DESC);
+        oprot.writeI32(struct.value);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TI32ValueTupleSchemeFactory implements SchemeFactory {
+    public TI32ValueTupleScheme getScheme() {
+      return new TI32ValueTupleScheme();
+    }
+  }
+
+  private static class TI32ValueTupleScheme extends TupleScheme<TI32Value> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetValue()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetValue()) {
+        oprot.writeI32(struct.value);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.value = iprot.readI32();
+        struct.setValueIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
new file mode 100644
index 000000000000..cc383ed089fa
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
@@ -0,0 +1,548 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TI64Column implements org.apache.thrift.TBase<TI64Column, TI64Column._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Column");
+
+  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TI64ColumnStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TI64ColumnTupleSchemeFactory());
+  }
+
+  private List<Long> values; // required
+  private ByteBuffer nulls; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUES((short)1, "values"),
+    NULLS((short)2, "nulls");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUES
+          return VALUES;
+        case 2: // NULLS
+          return NULLS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))));
+    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Column.class, metaDataMap);
+  }
+
+  public TI64Column() {
+  }
+
+  public TI64Column(
+    List<Long> values,
+    ByteBuffer nulls)
+  {
+    this();
+    this.values = values;
+    this.nulls = nulls;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TI64Column(TI64Column other) {
+    if (other.isSetValues()) {
+      List<Long> __this__values = new ArrayList<Long>();
+      for (Long other_element : other.values) {
+        __this__values.add(other_element);
+      }
+      this.values = __this__values;
+    }
+    if (other.isSetNulls()) {
+      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
+;
+    }
+  }
+
+  public TI64Column deepCopy() {
+    return new TI64Column(this);
+  }
+
+  @Override
+  public void clear() {
+    this.values = null;
+    this.nulls = null;
+  }
+
+  public int getValuesSize() {
+    return (this.values == null) ? 0 : this.values.size();
+  }
+
+  public java.util.Iterator<Long> getValuesIterator() {
+    return (this.values == null) ? null : this.values.iterator();
+  }
+
+  public void addToValues(long elem) {
+    if (this.values == null) {
+      this.values = new ArrayList<Long>();
+    }
+    this.values.add(elem);
+  }
+
+  public List<Long> getValues() {
+    return this.values;
+  }
+
+  public void setValues(List<Long> values) {
+    this.values = values;
+  }
+
+  public void unsetValues() {
+    this.values = null;
+  }
+
+  /** Returns true if field values is set (has been assigned a value) and false otherwise */
+  public boolean isSetValues() {
+    return this.values != null;
+  }
+
+  public void setValuesIsSet(boolean value) {
+    if (!value) {
+      this.values = null;
+    }
+  }
+
+  public byte[] getNulls() {
+    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
+    return nulls == null ? null : nulls.array();
+  }
+
+  public ByteBuffer bufferForNulls() {
+    return nulls;
+  }
+
+  public void setNulls(byte[] nulls) {
+    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
+  }
+
+  public void setNulls(ByteBuffer nulls) {
+    this.nulls = nulls;
+  }
+
+  public void unsetNulls() {
+    this.nulls = null;
+  }
+
+  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
+  public boolean isSetNulls() {
+    return this.nulls != null;
+  }
+
+  public void setNullsIsSet(boolean value) {
+    if (!value) {
+      this.nulls = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUES:
+      if (value == null) {
+        unsetValues();
+      } else {
+        setValues((List<Long>)value);
+      }
+      break;
+
+    case NULLS:
+      if (value == null) {
+        unsetNulls();
+      } else {
+        setNulls((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUES:
+      return getValues();
+
+    case NULLS:
+      return getNulls();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUES:
+      return isSetValues();
+    case NULLS:
+      return isSetNulls();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TI64Column)
+      return this.equals((TI64Column)that);
+    return false;
+  }
+
+  public boolean equals(TI64Column that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_values = true && this.isSetValues();
+    boolean that_present_values = true && that.isSetValues();
+    if (this_present_values || that_present_values) {
+      if (!(this_present_values && that_present_values))
+        return false;
+      if (!this.values.equals(that.values))
+        return false;
+    }
+
+    boolean this_present_nulls = true && this.isSetNulls();
+    boolean that_present_nulls = true && that.isSetNulls();
+    if (this_present_nulls || that_present_nulls) {
+      if (!(this_present_nulls && that_present_nulls))
+        return false;
+      if (!this.nulls.equals(that.nulls))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_values = true && (isSetValues());
+    builder.append(present_values);
+    if (present_values)
+      builder.append(values);
+
+    boolean present_nulls = true && (isSetNulls());
+    builder.append(present_nulls);
+    if (present_nulls)
+      builder.append(nulls);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TI64Column other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TI64Column typedOther = (TI64Column)other;
+
+    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValues()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNulls()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TI64Column(");
+    boolean first = true;
+
+    sb.append("values:");
+    if (this.values == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.values);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("nulls:");
+    if (this.nulls == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetValues()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
+    }
+
+    if (!isSetNulls()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TI64ColumnStandardSchemeFactory implements SchemeFactory {
+    public TI64ColumnStandardScheme getScheme() {
+      return new TI64ColumnStandardScheme();
+    }
+  }
+
+  private static class TI64ColumnStandardScheme extends StandardScheme<TI64Column> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Column struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list86 = iprot.readListBegin();
+                struct.values = new ArrayList<Long>(_list86.size);
+                for (int _i87 = 0; _i87 < _list86.size; ++_i87)
+                {
+                  long _elem88; // optional
+                  _elem88 = iprot.readI64();
+                  struct.values.add(_elem88);
+                }
+                iprot.readListEnd();
+              }
+              struct.setValuesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // NULLS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.nulls = iprot.readBinary();
+              struct.setNullsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Column struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.values != null) {
+        oprot.writeFieldBegin(VALUES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, struct.values.size()));
+          for (long _iter89 : struct.values)
+          {
+            oprot.writeI64(_iter89);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.nulls != null) {
+        oprot.writeFieldBegin(NULLS_FIELD_DESC);
+        oprot.writeBinary(struct.nulls);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TI64ColumnTupleSchemeFactory implements SchemeFactory {
+    public TI64ColumnTupleScheme getScheme() {
+      return new TI64ColumnTupleScheme();
+    }
+  }
+
+  private static class TI64ColumnTupleScheme extends TupleScheme<TI64Column> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.values.size());
+        for (long _iter90 : struct.values)
+        {
+          oprot.writeI64(_iter90);
+        }
+      }
+      oprot.writeBinary(struct.nulls);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list91 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, iprot.readI32());
+        struct.values = new ArrayList<Long>(_list91.size);
+        for (int _i92 = 0; _i92 < _list91.size; ++_i92)
+        {
+          long _elem93; // optional
+          _elem93 = iprot.readI64();
+          struct.values.add(_elem93);
+        }
+      }
+      struct.setValuesIsSet(true);
+      struct.nulls = iprot.readBinary();
+      struct.setNullsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
new file mode 100644
index 000000000000..9a941cce0c07
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
@@ -0,0 +1,386 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TI64Value implements org.apache.thrift.TBase<TI64Value, TI64Value._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Value");
+
+  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I64, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TI64ValueStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TI64ValueTupleSchemeFactory());
+  }
+
+  private long value; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUE((short)1, "value");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUE
+          return VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __VALUE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.VALUE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Value.class, metaDataMap);
+  }
+
+  public TI64Value() {
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TI64Value(TI64Value other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.value = other.value;
+  }
+
+  public TI64Value deepCopy() {
+    return new TI64Value(this);
+  }
+
+  @Override
+  public void clear() {
+    setValueIsSet(false);
+    this.value = 0;
+  }
+
+  public long getValue() {
+    return this.value;
+  }
+
+  public void setValue(long value) {
+    this.value = value;
+    setValueIsSet(true);
+  }
+
+  public void unsetValue() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  /** Returns true if field value is set (has been assigned a value) and false otherwise */
+  public boolean isSetValue() {
+    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
+  }
+
+  public void setValueIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUE:
+      if (value == null) {
+        unsetValue();
+      } else {
+        setValue((Long)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUE:
+      return Long.valueOf(getValue());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUE:
+      return isSetValue();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TI64Value)
+      return this.equals((TI64Value)that);
+    return false;
+  }
+
+  public boolean equals(TI64Value that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_value = true && this.isSetValue();
+    boolean that_present_value = true && that.isSetValue();
+    if (this_present_value || that_present_value) {
+      if (!(this_present_value && that_present_value))
+        return false;
+      if (this.value != that.value)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_value = true && (isSetValue());
+    builder.append(present_value);
+    if (present_value)
+      builder.append(value);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TI64Value other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TI64Value typedOther = (TI64Value)other;
+
+    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValue()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TI64Value(");
+    boolean first = true;
+
+    if (isSetValue()) {
+      sb.append("value:");
+      sb.append(this.value);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TI64ValueStandardSchemeFactory implements SchemeFactory {
+    public TI64ValueStandardScheme getScheme() {
+      return new TI64ValueStandardScheme();
+    }
+  }
+
+  private static class TI64ValueStandardScheme extends StandardScheme<TI64Value> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Value struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
+              struct.value = iprot.readI64();
+              struct.setValueIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Value struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.isSetValue()) {
+        oprot.writeFieldBegin(VALUE_FIELD_DESC);
+        oprot.writeI64(struct.value);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TI64ValueTupleSchemeFactory implements SchemeFactory {
+    public TI64ValueTupleScheme getScheme() {
+      return new TI64ValueTupleScheme();
+    }
+  }
+
+  private static class TI64ValueTupleScheme extends TupleScheme<TI64Value> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetValue()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetValue()) {
+        oprot.writeI64(struct.value);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.value = iprot.readI64();
+        struct.setValueIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
new file mode 100644
index 000000000000..425603cbdecb
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
@@ -0,0 +1,478 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TMapTypeEntry implements org.apache.thrift.TBase<TMapTypeEntry, TMapTypeEntry._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TMapTypeEntry");
+
+  private static final org.apache.thrift.protocol.TField KEY_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("keyTypePtr", org.apache.thrift.protocol.TType.I32, (short)1);
+  private static final org.apache.thrift.protocol.TField VALUE_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("valueTypePtr", org.apache.thrift.protocol.TType.I32, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TMapTypeEntryStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TMapTypeEntryTupleSchemeFactory());
+  }
+
+  private int keyTypePtr; // required
+  private int valueTypePtr; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    KEY_TYPE_PTR((short)1, "keyTypePtr"),
+    VALUE_TYPE_PTR((short)2, "valueTypePtr");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // KEY_TYPE_PTR
+          return KEY_TYPE_PTR;
+        case 2: // VALUE_TYPE_PTR
+          return VALUE_TYPE_PTR;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __KEYTYPEPTR_ISSET_ID = 0;
+  private static final int __VALUETYPEPTR_ISSET_ID = 1;
+  private byte __isset_bitfield = 0;
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.KEY_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("keyTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
+    tmpMap.put(_Fields.VALUE_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("valueTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TMapTypeEntry.class, metaDataMap);
+  }
+
+  public TMapTypeEntry() {
+  }
+
+  public TMapTypeEntry(
+    int keyTypePtr,
+    int valueTypePtr)
+  {
+    this();
+    this.keyTypePtr = keyTypePtr;
+    setKeyTypePtrIsSet(true);
+    this.valueTypePtr = valueTypePtr;
+    setValueTypePtrIsSet(true);
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TMapTypeEntry(TMapTypeEntry other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.keyTypePtr = other.keyTypePtr;
+    this.valueTypePtr = other.valueTypePtr;
+  }
+
+  public TMapTypeEntry deepCopy() {
+    return new TMapTypeEntry(this);
+  }
+
+  @Override
+  public void clear() {
+    setKeyTypePtrIsSet(false);
+    this.keyTypePtr = 0;
+    setValueTypePtrIsSet(false);
+    this.valueTypePtr = 0;
+  }
+
+  public int getKeyTypePtr() {
+    return this.keyTypePtr;
+  }
+
+  public void setKeyTypePtr(int keyTypePtr) {
+    this.keyTypePtr = keyTypePtr;
+    setKeyTypePtrIsSet(true);
+  }
+
+  public void unsetKeyTypePtr() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID);
+  }
+
+  /** Returns true if field keyTypePtr is set (has been assigned a value) and false otherwise */
+  public boolean isSetKeyTypePtr() {
+    return EncodingUtils.testBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID);
+  }
+
+  public void setKeyTypePtrIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID, value);
+  }
+
+  public int getValueTypePtr() {
+    return this.valueTypePtr;
+  }
+
+  public void setValueTypePtr(int valueTypePtr) {
+    this.valueTypePtr = valueTypePtr;
+    setValueTypePtrIsSet(true);
+  }
+
+  public void unsetValueTypePtr() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID);
+  }
+
+  /** Returns true if field valueTypePtr is set (has been assigned a value) and false otherwise */
+  public boolean isSetValueTypePtr() {
+    return EncodingUtils.testBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID);
+  }
+
+  public void setValueTypePtrIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case KEY_TYPE_PTR:
+      if (value == null) {
+        unsetKeyTypePtr();
+      } else {
+        setKeyTypePtr((Integer)value);
+      }
+      break;
+
+    case VALUE_TYPE_PTR:
+      if (value == null) {
+        unsetValueTypePtr();
+      } else {
+        setValueTypePtr((Integer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case KEY_TYPE_PTR:
+      return Integer.valueOf(getKeyTypePtr());
+
+    case VALUE_TYPE_PTR:
+      return Integer.valueOf(getValueTypePtr());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case KEY_TYPE_PTR:
+      return isSetKeyTypePtr();
+    case VALUE_TYPE_PTR:
+      return isSetValueTypePtr();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TMapTypeEntry)
+      return this.equals((TMapTypeEntry)that);
+    return false;
+  }
+
+  public boolean equals(TMapTypeEntry that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_keyTypePtr = true;
+    boolean that_present_keyTypePtr = true;
+    if (this_present_keyTypePtr || that_present_keyTypePtr) {
+      if (!(this_present_keyTypePtr && that_present_keyTypePtr))
+        return false;
+      if (this.keyTypePtr != that.keyTypePtr)
+        return false;
+    }
+
+    boolean this_present_valueTypePtr = true;
+    boolean that_present_valueTypePtr = true;
+    if (this_present_valueTypePtr || that_present_valueTypePtr) {
+      if (!(this_present_valueTypePtr && that_present_valueTypePtr))
+        return false;
+      if (this.valueTypePtr != that.valueTypePtr)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_keyTypePtr = true;
+    builder.append(present_keyTypePtr);
+    if (present_keyTypePtr)
+      builder.append(keyTypePtr);
+
+    boolean present_valueTypePtr = true;
+    builder.append(present_valueTypePtr);
+    if (present_valueTypePtr)
+      builder.append(valueTypePtr);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TMapTypeEntry other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TMapTypeEntry typedOther = (TMapTypeEntry)other;
+
+    lastComparison = Boolean.valueOf(isSetKeyTypePtr()).compareTo(typedOther.isSetKeyTypePtr());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetKeyTypePtr()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.keyTypePtr, typedOther.keyTypePtr);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetValueTypePtr()).compareTo(typedOther.isSetValueTypePtr());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValueTypePtr()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.valueTypePtr, typedOther.valueTypePtr);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TMapTypeEntry(");
+    boolean first = true;
+
+    sb.append("keyTypePtr:");
+    sb.append(this.keyTypePtr);
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("valueTypePtr:");
+    sb.append(this.valueTypePtr);
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetKeyTypePtr()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'keyTypePtr' is unset! Struct:" + toString());
+    }
+
+    if (!isSetValueTypePtr()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'valueTypePtr' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TMapTypeEntryStandardSchemeFactory implements SchemeFactory {
+    public TMapTypeEntryStandardScheme getScheme() {
+      return new TMapTypeEntryStandardScheme();
+    }
+  }
+
+  private static class TMapTypeEntryStandardScheme extends StandardScheme<TMapTypeEntry> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TMapTypeEntry struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // KEY_TYPE_PTR
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.keyTypePtr = iprot.readI32();
+              struct.setKeyTypePtrIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // VALUE_TYPE_PTR
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.valueTypePtr = iprot.readI32();
+              struct.setValueTypePtrIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TMapTypeEntry struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      oprot.writeFieldBegin(KEY_TYPE_PTR_FIELD_DESC);
+      oprot.writeI32(struct.keyTypePtr);
+      oprot.writeFieldEnd();
+      oprot.writeFieldBegin(VALUE_TYPE_PTR_FIELD_DESC);
+      oprot.writeI32(struct.valueTypePtr);
+      oprot.writeFieldEnd();
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TMapTypeEntryTupleSchemeFactory implements SchemeFactory {
+    public TMapTypeEntryTupleScheme getScheme() {
+      return new TMapTypeEntryTupleScheme();
+    }
+  }
+
+  private static class TMapTypeEntryTupleScheme extends TupleScheme<TMapTypeEntry> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeI32(struct.keyTypePtr);
+      oprot.writeI32(struct.valueTypePtr);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.keyTypePtr = iprot.readI32();
+      struct.setKeyTypePtrIsSet(true);
+      struct.valueTypePtr = iprot.readI32();
+      struct.setValueTypePtrIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
new file mode 100644
index 000000000000..c0481615b06d
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
@@ -0,0 +1,785 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TOpenSessionReq implements org.apache.thrift.TBase<TOpenSessionReq, TOpenSessionReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionReq");
+
+  private static final org.apache.thrift.protocol.TField CLIENT_PROTOCOL_FIELD_DESC = new org.apache.thrift.protocol.TField("client_protocol", org.apache.thrift.protocol.TType.I32, (short)1);
+  private static final org.apache.thrift.protocol.TField USERNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("username", org.apache.thrift.protocol.TType.STRING, (short)2);
+  private static final org.apache.thrift.protocol.TField PASSWORD_FIELD_DESC = new org.apache.thrift.protocol.TField("password", org.apache.thrift.protocol.TType.STRING, (short)3);
+  private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TOpenSessionReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TOpenSessionReqTupleSchemeFactory());
+  }
+
+  private TProtocolVersion client_protocol; // required
+  private String username; // optional
+  private String password; // optional
+  private Map<String,String> configuration; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    /**
+     * 
+     * @see TProtocolVersion
+     */
+    CLIENT_PROTOCOL((short)1, "client_protocol"),
+    USERNAME((short)2, "username"),
+    PASSWORD((short)3, "password"),
+    CONFIGURATION((short)4, "configuration");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // CLIENT_PROTOCOL
+          return CLIENT_PROTOCOL;
+        case 2: // USERNAME
+          return USERNAME;
+        case 3: // PASSWORD
+          return PASSWORD;
+        case 4: // CONFIGURATION
+          return CONFIGURATION;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.USERNAME,_Fields.PASSWORD,_Fields.CONFIGURATION};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.CLIENT_PROTOCOL, new org.apache.thrift.meta_data.FieldMetaData("client_protocol", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class)));
+    tmpMap.put(_Fields.USERNAME, new org.apache.thrift.meta_data.FieldMetaData("username", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.PASSWORD, new org.apache.thrift.meta_data.FieldMetaData("password", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionReq.class, metaDataMap);
+  }
+
+  public TOpenSessionReq() {
+    this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8;
+
+  }
+
+  public TOpenSessionReq(
+    TProtocolVersion client_protocol)
+  {
+    this();
+    this.client_protocol = client_protocol;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TOpenSessionReq(TOpenSessionReq other) {
+    if (other.isSetClient_protocol()) {
+      this.client_protocol = other.client_protocol;
+    }
+    if (other.isSetUsername()) {
+      this.username = other.username;
+    }
+    if (other.isSetPassword()) {
+      this.password = other.password;
+    }
+    if (other.isSetConfiguration()) {
+      Map<String,String> __this__configuration = new HashMap<String,String>();
+      for (Map.Entry<String, String> other_element : other.configuration.entrySet()) {
+
+        String other_element_key = other_element.getKey();
+        String other_element_value = other_element.getValue();
+
+        String __this__configuration_copy_key = other_element_key;
+
+        String __this__configuration_copy_value = other_element_value;
+
+        __this__configuration.put(__this__configuration_copy_key, __this__configuration_copy_value);
+      }
+      this.configuration = __this__configuration;
+    }
+  }
+
+  public TOpenSessionReq deepCopy() {
+    return new TOpenSessionReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8;
+
+    this.username = null;
+    this.password = null;
+    this.configuration = null;
+  }
+
+  /**
+   * 
+   * @see TProtocolVersion
+   */
+  public TProtocolVersion getClient_protocol() {
+    return this.client_protocol;
+  }
+
+  /**
+   * 
+   * @see TProtocolVersion
+   */
+  public void setClient_protocol(TProtocolVersion client_protocol) {
+    this.client_protocol = client_protocol;
+  }
+
+  public void unsetClient_protocol() {
+    this.client_protocol = null;
+  }
+
+  /** Returns true if field client_protocol is set (has been assigned a value) and false otherwise */
+  public boolean isSetClient_protocol() {
+    return this.client_protocol != null;
+  }
+
+  public void setClient_protocolIsSet(boolean value) {
+    if (!value) {
+      this.client_protocol = null;
+    }
+  }
+
+  public String getUsername() {
+    return this.username;
+  }
+
+  public void setUsername(String username) {
+    this.username = username;
+  }
+
+  public void unsetUsername() {
+    this.username = null;
+  }
+
+  /** Returns true if field username is set (has been assigned a value) and false otherwise */
+  public boolean isSetUsername() {
+    return this.username != null;
+  }
+
+  public void setUsernameIsSet(boolean value) {
+    if (!value) {
+      this.username = null;
+    }
+  }
+
+  public String getPassword() {
+    return this.password;
+  }
+
+  public void setPassword(String password) {
+    this.password = password;
+  }
+
+  public void unsetPassword() {
+    this.password = null;
+  }
+
+  /** Returns true if field password is set (has been assigned a value) and false otherwise */
+  public boolean isSetPassword() {
+    return this.password != null;
+  }
+
+  public void setPasswordIsSet(boolean value) {
+    if (!value) {
+      this.password = null;
+    }
+  }
+
+  public int getConfigurationSize() {
+    return (this.configuration == null) ? 0 : this.configuration.size();
+  }
+
+  public void putToConfiguration(String key, String val) {
+    if (this.configuration == null) {
+      this.configuration = new HashMap<String,String>();
+    }
+    this.configuration.put(key, val);
+  }
+
+  public Map<String,String> getConfiguration() {
+    return this.configuration;
+  }
+
+  public void setConfiguration(Map<String,String> configuration) {
+    this.configuration = configuration;
+  }
+
+  public void unsetConfiguration() {
+    this.configuration = null;
+  }
+
+  /** Returns true if field configuration is set (has been assigned a value) and false otherwise */
+  public boolean isSetConfiguration() {
+    return this.configuration != null;
+  }
+
+  public void setConfigurationIsSet(boolean value) {
+    if (!value) {
+      this.configuration = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case CLIENT_PROTOCOL:
+      if (value == null) {
+        unsetClient_protocol();
+      } else {
+        setClient_protocol((TProtocolVersion)value);
+      }
+      break;
+
+    case USERNAME:
+      if (value == null) {
+        unsetUsername();
+      } else {
+        setUsername((String)value);
+      }
+      break;
+
+    case PASSWORD:
+      if (value == null) {
+        unsetPassword();
+      } else {
+        setPassword((String)value);
+      }
+      break;
+
+    case CONFIGURATION:
+      if (value == null) {
+        unsetConfiguration();
+      } else {
+        setConfiguration((Map<String,String>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case CLIENT_PROTOCOL:
+      return getClient_protocol();
+
+    case USERNAME:
+      return getUsername();
+
+    case PASSWORD:
+      return getPassword();
+
+    case CONFIGURATION:
+      return getConfiguration();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case CLIENT_PROTOCOL:
+      return isSetClient_protocol();
+    case USERNAME:
+      return isSetUsername();
+    case PASSWORD:
+      return isSetPassword();
+    case CONFIGURATION:
+      return isSetConfiguration();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TOpenSessionReq)
+      return this.equals((TOpenSessionReq)that);
+    return false;
+  }
+
+  public boolean equals(TOpenSessionReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_client_protocol = true && this.isSetClient_protocol();
+    boolean that_present_client_protocol = true && that.isSetClient_protocol();
+    if (this_present_client_protocol || that_present_client_protocol) {
+      if (!(this_present_client_protocol && that_present_client_protocol))
+        return false;
+      if (!this.client_protocol.equals(that.client_protocol))
+        return false;
+    }
+
+    boolean this_present_username = true && this.isSetUsername();
+    boolean that_present_username = true && that.isSetUsername();
+    if (this_present_username || that_present_username) {
+      if (!(this_present_username && that_present_username))
+        return false;
+      if (!this.username.equals(that.username))
+        return false;
+    }
+
+    boolean this_present_password = true && this.isSetPassword();
+    boolean that_present_password = true && that.isSetPassword();
+    if (this_present_password || that_present_password) {
+      if (!(this_present_password && that_present_password))
+        return false;
+      if (!this.password.equals(that.password))
+        return false;
+    }
+
+    boolean this_present_configuration = true && this.isSetConfiguration();
+    boolean that_present_configuration = true && that.isSetConfiguration();
+    if (this_present_configuration || that_present_configuration) {
+      if (!(this_present_configuration && that_present_configuration))
+        return false;
+      if (!this.configuration.equals(that.configuration))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_client_protocol = true && (isSetClient_protocol());
+    builder.append(present_client_protocol);
+    if (present_client_protocol)
+      builder.append(client_protocol.getValue());
+
+    boolean present_username = true && (isSetUsername());
+    builder.append(present_username);
+    if (present_username)
+      builder.append(username);
+
+    boolean present_password = true && (isSetPassword());
+    builder.append(present_password);
+    if (present_password)
+      builder.append(password);
+
+    boolean present_configuration = true && (isSetConfiguration());
+    builder.append(present_configuration);
+    if (present_configuration)
+      builder.append(configuration);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TOpenSessionReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TOpenSessionReq typedOther = (TOpenSessionReq)other;
+
+    lastComparison = Boolean.valueOf(isSetClient_protocol()).compareTo(typedOther.isSetClient_protocol());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetClient_protocol()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.client_protocol, typedOther.client_protocol);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetUsername()).compareTo(typedOther.isSetUsername());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetUsername()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.username, typedOther.username);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetPassword()).compareTo(typedOther.isSetPassword());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetPassword()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.password, typedOther.password);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(typedOther.isSetConfiguration());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetConfiguration()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, typedOther.configuration);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TOpenSessionReq(");
+    boolean first = true;
+
+    sb.append("client_protocol:");
+    if (this.client_protocol == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.client_protocol);
+    }
+    first = false;
+    if (isSetUsername()) {
+      if (!first) sb.append(", ");
+      sb.append("username:");
+      if (this.username == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.username);
+      }
+      first = false;
+    }
+    if (isSetPassword()) {
+      if (!first) sb.append(", ");
+      sb.append("password:");
+      if (this.password == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.password);
+      }
+      first = false;
+    }
+    if (isSetConfiguration()) {
+      if (!first) sb.append(", ");
+      sb.append("configuration:");
+      if (this.configuration == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.configuration);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetClient_protocol()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'client_protocol' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TOpenSessionReqStandardSchemeFactory implements SchemeFactory {
+    public TOpenSessionReqStandardScheme getScheme() {
+      return new TOpenSessionReqStandardScheme();
+    }
+  }
+
+  private static class TOpenSessionReqStandardScheme extends StandardScheme<TOpenSessionReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // CLIENT_PROTOCOL
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.client_protocol = TProtocolVersion.findByValue(iprot.readI32());
+              struct.setClient_protocolIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // USERNAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.username = iprot.readString();
+              struct.setUsernameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // PASSWORD
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.password = iprot.readString();
+              struct.setPasswordIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // CONFIGURATION
+            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
+              {
+                org.apache.thrift.protocol.TMap _map142 = iprot.readMapBegin();
+                struct.configuration = new HashMap<String,String>(2*_map142.size);
+                for (int _i143 = 0; _i143 < _map142.size; ++_i143)
+                {
+                  String _key144; // required
+                  String _val145; // required
+                  _key144 = iprot.readString();
+                  _val145 = iprot.readString();
+                  struct.configuration.put(_key144, _val145);
+                }
+                iprot.readMapEnd();
+              }
+              struct.setConfigurationIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.client_protocol != null) {
+        oprot.writeFieldBegin(CLIENT_PROTOCOL_FIELD_DESC);
+        oprot.writeI32(struct.client_protocol.getValue());
+        oprot.writeFieldEnd();
+      }
+      if (struct.username != null) {
+        if (struct.isSetUsername()) {
+          oprot.writeFieldBegin(USERNAME_FIELD_DESC);
+          oprot.writeString(struct.username);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.password != null) {
+        if (struct.isSetPassword()) {
+          oprot.writeFieldBegin(PASSWORD_FIELD_DESC);
+          oprot.writeString(struct.password);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.configuration != null) {
+        if (struct.isSetConfiguration()) {
+          oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC);
+          {
+            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size()));
+            for (Map.Entry<String, String> _iter146 : struct.configuration.entrySet())
+            {
+              oprot.writeString(_iter146.getKey());
+              oprot.writeString(_iter146.getValue());
+            }
+            oprot.writeMapEnd();
+          }
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TOpenSessionReqTupleSchemeFactory implements SchemeFactory {
+    public TOpenSessionReqTupleScheme getScheme() {
+      return new TOpenSessionReqTupleScheme();
+    }
+  }
+
+  private static class TOpenSessionReqTupleScheme extends TupleScheme<TOpenSessionReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeI32(struct.client_protocol.getValue());
+      BitSet optionals = new BitSet();
+      if (struct.isSetUsername()) {
+        optionals.set(0);
+      }
+      if (struct.isSetPassword()) {
+        optionals.set(1);
+      }
+      if (struct.isSetConfiguration()) {
+        optionals.set(2);
+      }
+      oprot.writeBitSet(optionals, 3);
+      if (struct.isSetUsername()) {
+        oprot.writeString(struct.username);
+      }
+      if (struct.isSetPassword()) {
+        oprot.writeString(struct.password);
+      }
+      if (struct.isSetConfiguration()) {
+        {
+          oprot.writeI32(struct.configuration.size());
+          for (Map.Entry<String, String> _iter147 : struct.configuration.entrySet())
+          {
+            oprot.writeString(_iter147.getKey());
+            oprot.writeString(_iter147.getValue());
+          }
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.client_protocol = TProtocolVersion.findByValue(iprot.readI32());
+      struct.setClient_protocolIsSet(true);
+      BitSet incoming = iprot.readBitSet(3);
+      if (incoming.get(0)) {
+        struct.username = iprot.readString();
+        struct.setUsernameIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.password = iprot.readString();
+        struct.setPasswordIsSet(true);
+      }
+      if (incoming.get(2)) {
+        {
+          org.apache.thrift.protocol.TMap _map148 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+          struct.configuration = new HashMap<String,String>(2*_map148.size);
+          for (int _i149 = 0; _i149 < _map148.size; ++_i149)
+          {
+            String _key150; // required
+            String _val151; // required
+            _key150 = iprot.readString();
+            _val151 = iprot.readString();
+            struct.configuration.put(_key150, _val151);
+          }
+        }
+        struct.setConfigurationIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
new file mode 100644
index 000000000000..351f78b2de20
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
@@ -0,0 +1,790 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TOpenSessionResp implements org.apache.thrift.TBase<TOpenSessionResp, TOpenSessionResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField SERVER_PROTOCOL_VERSION_FIELD_DESC = new org.apache.thrift.protocol.TField("serverProtocolVersion", org.apache.thrift.protocol.TType.I32, (short)2);
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)3);
+  private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TOpenSessionRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TOpenSessionRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+  private TProtocolVersion serverProtocolVersion; // required
+  private TSessionHandle sessionHandle; // optional
+  private Map<String,String> configuration; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status"),
+    /**
+     * 
+     * @see TProtocolVersion
+     */
+    SERVER_PROTOCOL_VERSION((short)2, "serverProtocolVersion"),
+    SESSION_HANDLE((short)3, "sessionHandle"),
+    CONFIGURATION((short)4, "configuration");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        case 2: // SERVER_PROTOCOL_VERSION
+          return SERVER_PROTOCOL_VERSION;
+        case 3: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 4: // CONFIGURATION
+          return CONFIGURATION;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.SESSION_HANDLE,_Fields.CONFIGURATION};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    tmpMap.put(_Fields.SERVER_PROTOCOL_VERSION, new org.apache.thrift.meta_data.FieldMetaData("serverProtocolVersion", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class)));
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionResp.class, metaDataMap);
+  }
+
+  public TOpenSessionResp() {
+    this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8;
+
+  }
+
+  public TOpenSessionResp(
+    TStatus status,
+    TProtocolVersion serverProtocolVersion)
+  {
+    this();
+    this.status = status;
+    this.serverProtocolVersion = serverProtocolVersion;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TOpenSessionResp(TOpenSessionResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+    if (other.isSetServerProtocolVersion()) {
+      this.serverProtocolVersion = other.serverProtocolVersion;
+    }
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetConfiguration()) {
+      Map<String,String> __this__configuration = new HashMap<String,String>();
+      for (Map.Entry<String, String> other_element : other.configuration.entrySet()) {
+
+        String other_element_key = other_element.getKey();
+        String other_element_value = other_element.getValue();
+
+        String __this__configuration_copy_key = other_element_key;
+
+        String __this__configuration_copy_value = other_element_value;
+
+        __this__configuration.put(__this__configuration_copy_key, __this__configuration_copy_value);
+      }
+      this.configuration = __this__configuration;
+    }
+  }
+
+  public TOpenSessionResp deepCopy() {
+    return new TOpenSessionResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+    this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8;
+
+    this.sessionHandle = null;
+    this.configuration = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  /**
+   * 
+   * @see TProtocolVersion
+   */
+  public TProtocolVersion getServerProtocolVersion() {
+    return this.serverProtocolVersion;
+  }
+
+  /**
+   * 
+   * @see TProtocolVersion
+   */
+  public void setServerProtocolVersion(TProtocolVersion serverProtocolVersion) {
+    this.serverProtocolVersion = serverProtocolVersion;
+  }
+
+  public void unsetServerProtocolVersion() {
+    this.serverProtocolVersion = null;
+  }
+
+  /** Returns true if field serverProtocolVersion is set (has been assigned a value) and false otherwise */
+  public boolean isSetServerProtocolVersion() {
+    return this.serverProtocolVersion != null;
+  }
+
+  public void setServerProtocolVersionIsSet(boolean value) {
+    if (!value) {
+      this.serverProtocolVersion = null;
+    }
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public int getConfigurationSize() {
+    return (this.configuration == null) ? 0 : this.configuration.size();
+  }
+
+  public void putToConfiguration(String key, String val) {
+    if (this.configuration == null) {
+      this.configuration = new HashMap<String,String>();
+    }
+    this.configuration.put(key, val);
+  }
+
+  public Map<String,String> getConfiguration() {
+    return this.configuration;
+  }
+
+  public void setConfiguration(Map<String,String> configuration) {
+    this.configuration = configuration;
+  }
+
+  public void unsetConfiguration() {
+    this.configuration = null;
+  }
+
+  /** Returns true if field configuration is set (has been assigned a value) and false otherwise */
+  public boolean isSetConfiguration() {
+    return this.configuration != null;
+  }
+
+  public void setConfigurationIsSet(boolean value) {
+    if (!value) {
+      this.configuration = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    case SERVER_PROTOCOL_VERSION:
+      if (value == null) {
+        unsetServerProtocolVersion();
+      } else {
+        setServerProtocolVersion((TProtocolVersion)value);
+      }
+      break;
+
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case CONFIGURATION:
+      if (value == null) {
+        unsetConfiguration();
+      } else {
+        setConfiguration((Map<String,String>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    case SERVER_PROTOCOL_VERSION:
+      return getServerProtocolVersion();
+
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case CONFIGURATION:
+      return getConfiguration();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    case SERVER_PROTOCOL_VERSION:
+      return isSetServerProtocolVersion();
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case CONFIGURATION:
+      return isSetConfiguration();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TOpenSessionResp)
+      return this.equals((TOpenSessionResp)that);
+    return false;
+  }
+
+  public boolean equals(TOpenSessionResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    boolean this_present_serverProtocolVersion = true && this.isSetServerProtocolVersion();
+    boolean that_present_serverProtocolVersion = true && that.isSetServerProtocolVersion();
+    if (this_present_serverProtocolVersion || that_present_serverProtocolVersion) {
+      if (!(this_present_serverProtocolVersion && that_present_serverProtocolVersion))
+        return false;
+      if (!this.serverProtocolVersion.equals(that.serverProtocolVersion))
+        return false;
+    }
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_configuration = true && this.isSetConfiguration();
+    boolean that_present_configuration = true && that.isSetConfiguration();
+    if (this_present_configuration || that_present_configuration) {
+      if (!(this_present_configuration && that_present_configuration))
+        return false;
+      if (!this.configuration.equals(that.configuration))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    boolean present_serverProtocolVersion = true && (isSetServerProtocolVersion());
+    builder.append(present_serverProtocolVersion);
+    if (present_serverProtocolVersion)
+      builder.append(serverProtocolVersion.getValue());
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_configuration = true && (isSetConfiguration());
+    builder.append(present_configuration);
+    if (present_configuration)
+      builder.append(configuration);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TOpenSessionResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TOpenSessionResp typedOther = (TOpenSessionResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetServerProtocolVersion()).compareTo(typedOther.isSetServerProtocolVersion());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetServerProtocolVersion()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.serverProtocolVersion, typedOther.serverProtocolVersion);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(typedOther.isSetConfiguration());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetConfiguration()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, typedOther.configuration);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TOpenSessionResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("serverProtocolVersion:");
+    if (this.serverProtocolVersion == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.serverProtocolVersion);
+    }
+    first = false;
+    if (isSetSessionHandle()) {
+      if (!first) sb.append(", ");
+      sb.append("sessionHandle:");
+      if (this.sessionHandle == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.sessionHandle);
+      }
+      first = false;
+    }
+    if (isSetConfiguration()) {
+      if (!first) sb.append(", ");
+      sb.append("configuration:");
+      if (this.configuration == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.configuration);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    if (!isSetServerProtocolVersion()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'serverProtocolVersion' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TOpenSessionRespStandardSchemeFactory implements SchemeFactory {
+    public TOpenSessionRespStandardScheme getScheme() {
+      return new TOpenSessionRespStandardScheme();
+    }
+  }
+
+  private static class TOpenSessionRespStandardScheme extends StandardScheme<TOpenSessionResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // SERVER_PROTOCOL_VERSION
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.serverProtocolVersion = TProtocolVersion.findByValue(iprot.readI32());
+              struct.setServerProtocolVersionIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // CONFIGURATION
+            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
+              {
+                org.apache.thrift.protocol.TMap _map152 = iprot.readMapBegin();
+                struct.configuration = new HashMap<String,String>(2*_map152.size);
+                for (int _i153 = 0; _i153 < _map152.size; ++_i153)
+                {
+                  String _key154; // required
+                  String _val155; // required
+                  _key154 = iprot.readString();
+                  _val155 = iprot.readString();
+                  struct.configuration.put(_key154, _val155);
+                }
+                iprot.readMapEnd();
+              }
+              struct.setConfigurationIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.serverProtocolVersion != null) {
+        oprot.writeFieldBegin(SERVER_PROTOCOL_VERSION_FIELD_DESC);
+        oprot.writeI32(struct.serverProtocolVersion.getValue());
+        oprot.writeFieldEnd();
+      }
+      if (struct.sessionHandle != null) {
+        if (struct.isSetSessionHandle()) {
+          oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+          struct.sessionHandle.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.configuration != null) {
+        if (struct.isSetConfiguration()) {
+          oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC);
+          {
+            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size()));
+            for (Map.Entry<String, String> _iter156 : struct.configuration.entrySet())
+            {
+              oprot.writeString(_iter156.getKey());
+              oprot.writeString(_iter156.getValue());
+            }
+            oprot.writeMapEnd();
+          }
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TOpenSessionRespTupleSchemeFactory implements SchemeFactory {
+    public TOpenSessionRespTupleScheme getScheme() {
+      return new TOpenSessionRespTupleScheme();
+    }
+  }
+
+  private static class TOpenSessionRespTupleScheme extends TupleScheme<TOpenSessionResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+      oprot.writeI32(struct.serverProtocolVersion.getValue());
+      BitSet optionals = new BitSet();
+      if (struct.isSetSessionHandle()) {
+        optionals.set(0);
+      }
+      if (struct.isSetConfiguration()) {
+        optionals.set(1);
+      }
+      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetSessionHandle()) {
+        struct.sessionHandle.write(oprot);
+      }
+      if (struct.isSetConfiguration()) {
+        {
+          oprot.writeI32(struct.configuration.size());
+          for (Map.Entry<String, String> _iter157 : struct.configuration.entrySet())
+          {
+            oprot.writeString(_iter157.getKey());
+            oprot.writeString(_iter157.getValue());
+          }
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+      struct.serverProtocolVersion = TProtocolVersion.findByValue(iprot.readI32());
+      struct.setServerProtocolVersionIsSet(true);
+      BitSet incoming = iprot.readBitSet(2);
+      if (incoming.get(0)) {
+        struct.sessionHandle = new TSessionHandle();
+        struct.sessionHandle.read(iprot);
+        struct.setSessionHandleIsSet(true);
+      }
+      if (incoming.get(1)) {
+        {
+          org.apache.thrift.protocol.TMap _map158 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+          struct.configuration = new HashMap<String,String>(2*_map158.size);
+          for (int _i159 = 0; _i159 < _map158.size; ++_i159)
+          {
+            String _key160; // required
+            String _val161; // required
+            _key160 = iprot.readString();
+            _val161 = iprot.readString();
+            struct.configuration.put(_key160, _val161);
+          }
+        }
+        struct.setConfigurationIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
new file mode 100644
index 000000000000..8fbd8752eaca
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
@@ -0,0 +1,705 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TOperationHandle implements org.apache.thrift.TBase<TOperationHandle, TOperationHandle._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOperationHandle");
+
+  private static final org.apache.thrift.protocol.TField OPERATION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("operationId", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField OPERATION_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationType", org.apache.thrift.protocol.TType.I32, (short)2);
+  private static final org.apache.thrift.protocol.TField HAS_RESULT_SET_FIELD_DESC = new org.apache.thrift.protocol.TField("hasResultSet", org.apache.thrift.protocol.TType.BOOL, (short)3);
+  private static final org.apache.thrift.protocol.TField MODIFIED_ROW_COUNT_FIELD_DESC = new org.apache.thrift.protocol.TField("modifiedRowCount", org.apache.thrift.protocol.TType.DOUBLE, (short)4);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TOperationHandleStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TOperationHandleTupleSchemeFactory());
+  }
+
+  private THandleIdentifier operationId; // required
+  private TOperationType operationType; // required
+  private boolean hasResultSet; // required
+  private double modifiedRowCount; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    OPERATION_ID((short)1, "operationId"),
+    /**
+     * 
+     * @see TOperationType
+     */
+    OPERATION_TYPE((short)2, "operationType"),
+    HAS_RESULT_SET((short)3, "hasResultSet"),
+    MODIFIED_ROW_COUNT((short)4, "modifiedRowCount");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // OPERATION_ID
+          return OPERATION_ID;
+        case 2: // OPERATION_TYPE
+          return OPERATION_TYPE;
+        case 3: // HAS_RESULT_SET
+          return HAS_RESULT_SET;
+        case 4: // MODIFIED_ROW_COUNT
+          return MODIFIED_ROW_COUNT;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __HASRESULTSET_ISSET_ID = 0;
+  private static final int __MODIFIEDROWCOUNT_ISSET_ID = 1;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.MODIFIED_ROW_COUNT};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.OPERATION_ID, new org.apache.thrift.meta_data.FieldMetaData("operationId", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class)));
+    tmpMap.put(_Fields.OPERATION_TYPE, new org.apache.thrift.meta_data.FieldMetaData("operationType", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationType.class)));
+    tmpMap.put(_Fields.HAS_RESULT_SET, new org.apache.thrift.meta_data.FieldMetaData("hasResultSet", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
+    tmpMap.put(_Fields.MODIFIED_ROW_COUNT, new org.apache.thrift.meta_data.FieldMetaData("modifiedRowCount", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOperationHandle.class, metaDataMap);
+  }
+
+  public TOperationHandle() {
+  }
+
+  public TOperationHandle(
+    THandleIdentifier operationId,
+    TOperationType operationType,
+    boolean hasResultSet)
+  {
+    this();
+    this.operationId = operationId;
+    this.operationType = operationType;
+    this.hasResultSet = hasResultSet;
+    setHasResultSetIsSet(true);
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TOperationHandle(TOperationHandle other) {
+    __isset_bitfield = other.__isset_bitfield;
+    if (other.isSetOperationId()) {
+      this.operationId = new THandleIdentifier(other.operationId);
+    }
+    if (other.isSetOperationType()) {
+      this.operationType = other.operationType;
+    }
+    this.hasResultSet = other.hasResultSet;
+    this.modifiedRowCount = other.modifiedRowCount;
+  }
+
+  public TOperationHandle deepCopy() {
+    return new TOperationHandle(this);
+  }
+
+  @Override
+  public void clear() {
+    this.operationId = null;
+    this.operationType = null;
+    setHasResultSetIsSet(false);
+    this.hasResultSet = false;
+    setModifiedRowCountIsSet(false);
+    this.modifiedRowCount = 0.0;
+  }
+
+  public THandleIdentifier getOperationId() {
+    return this.operationId;
+  }
+
+  public void setOperationId(THandleIdentifier operationId) {
+    this.operationId = operationId;
+  }
+
+  public void unsetOperationId() {
+    this.operationId = null;
+  }
+
+  /** Returns true if field operationId is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationId() {
+    return this.operationId != null;
+  }
+
+  public void setOperationIdIsSet(boolean value) {
+    if (!value) {
+      this.operationId = null;
+    }
+  }
+
+  /**
+   * 
+   * @see TOperationType
+   */
+  public TOperationType getOperationType() {
+    return this.operationType;
+  }
+
+  /**
+   * 
+   * @see TOperationType
+   */
+  public void setOperationType(TOperationType operationType) {
+    this.operationType = operationType;
+  }
+
+  public void unsetOperationType() {
+    this.operationType = null;
+  }
+
+  /** Returns true if field operationType is set (has been assigned a value) and false otherwise */
+  public boolean isSetOperationType() {
+    return this.operationType != null;
+  }
+
+  public void setOperationTypeIsSet(boolean value) {
+    if (!value) {
+      this.operationType = null;
+    }
+  }
+
+  public boolean isHasResultSet() {
+    return this.hasResultSet;
+  }
+
+  public void setHasResultSet(boolean hasResultSet) {
+    this.hasResultSet = hasResultSet;
+    setHasResultSetIsSet(true);
+  }
+
+  public void unsetHasResultSet() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASRESULTSET_ISSET_ID);
+  }
+
+  /** Returns true if field hasResultSet is set (has been assigned a value) and false otherwise */
+  public boolean isSetHasResultSet() {
+    return EncodingUtils.testBit(__isset_bitfield, __HASRESULTSET_ISSET_ID);
+  }
+
+  public void setHasResultSetIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASRESULTSET_ISSET_ID, value);
+  }
+
+  public double getModifiedRowCount() {
+    return this.modifiedRowCount;
+  }
+
+  public void setModifiedRowCount(double modifiedRowCount) {
+    this.modifiedRowCount = modifiedRowCount;
+    setModifiedRowCountIsSet(true);
+  }
+
+  public void unsetModifiedRowCount() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID);
+  }
+
+  /** Returns true if field modifiedRowCount is set (has been assigned a value) and false otherwise */
+  public boolean isSetModifiedRowCount() {
+    return EncodingUtils.testBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID);
+  }
+
+  public void setModifiedRowCountIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID, value);
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case OPERATION_ID:
+      if (value == null) {
+        unsetOperationId();
+      } else {
+        setOperationId((THandleIdentifier)value);
+      }
+      break;
+
+    case OPERATION_TYPE:
+      if (value == null) {
+        unsetOperationType();
+      } else {
+        setOperationType((TOperationType)value);
+      }
+      break;
+
+    case HAS_RESULT_SET:
+      if (value == null) {
+        unsetHasResultSet();
+      } else {
+        setHasResultSet((Boolean)value);
+      }
+      break;
+
+    case MODIFIED_ROW_COUNT:
+      if (value == null) {
+        unsetModifiedRowCount();
+      } else {
+        setModifiedRowCount((Double)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case OPERATION_ID:
+      return getOperationId();
+
+    case OPERATION_TYPE:
+      return getOperationType();
+
+    case HAS_RESULT_SET:
+      return Boolean.valueOf(isHasResultSet());
+
+    case MODIFIED_ROW_COUNT:
+      return Double.valueOf(getModifiedRowCount());
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case OPERATION_ID:
+      return isSetOperationId();
+    case OPERATION_TYPE:
+      return isSetOperationType();
+    case HAS_RESULT_SET:
+      return isSetHasResultSet();
+    case MODIFIED_ROW_COUNT:
+      return isSetModifiedRowCount();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TOperationHandle)
+      return this.equals((TOperationHandle)that);
+    return false;
+  }
+
+  public boolean equals(TOperationHandle that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_operationId = true && this.isSetOperationId();
+    boolean that_present_operationId = true && that.isSetOperationId();
+    if (this_present_operationId || that_present_operationId) {
+      if (!(this_present_operationId && that_present_operationId))
+        return false;
+      if (!this.operationId.equals(that.operationId))
+        return false;
+    }
+
+    boolean this_present_operationType = true && this.isSetOperationType();
+    boolean that_present_operationType = true && that.isSetOperationType();
+    if (this_present_operationType || that_present_operationType) {
+      if (!(this_present_operationType && that_present_operationType))
+        return false;
+      if (!this.operationType.equals(that.operationType))
+        return false;
+    }
+
+    boolean this_present_hasResultSet = true;
+    boolean that_present_hasResultSet = true;
+    if (this_present_hasResultSet || that_present_hasResultSet) {
+      if (!(this_present_hasResultSet && that_present_hasResultSet))
+        return false;
+      if (this.hasResultSet != that.hasResultSet)
+        return false;
+    }
+
+    boolean this_present_modifiedRowCount = true && this.isSetModifiedRowCount();
+    boolean that_present_modifiedRowCount = true && that.isSetModifiedRowCount();
+    if (this_present_modifiedRowCount || that_present_modifiedRowCount) {
+      if (!(this_present_modifiedRowCount && that_present_modifiedRowCount))
+        return false;
+      if (this.modifiedRowCount != that.modifiedRowCount)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_operationId = true && (isSetOperationId());
+    builder.append(present_operationId);
+    if (present_operationId)
+      builder.append(operationId);
+
+    boolean present_operationType = true && (isSetOperationType());
+    builder.append(present_operationType);
+    if (present_operationType)
+      builder.append(operationType.getValue());
+
+    boolean present_hasResultSet = true;
+    builder.append(present_hasResultSet);
+    if (present_hasResultSet)
+      builder.append(hasResultSet);
+
+    boolean present_modifiedRowCount = true && (isSetModifiedRowCount());
+    builder.append(present_modifiedRowCount);
+    if (present_modifiedRowCount)
+      builder.append(modifiedRowCount);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TOperationHandle other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TOperationHandle typedOther = (TOperationHandle)other;
+
+    lastComparison = Boolean.valueOf(isSetOperationId()).compareTo(typedOther.isSetOperationId());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationId()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationId, typedOther.operationId);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOperationType()).compareTo(typedOther.isSetOperationType());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOperationType()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationType, typedOther.operationType);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetHasResultSet()).compareTo(typedOther.isSetHasResultSet());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetHasResultSet()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasResultSet, typedOther.hasResultSet);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetModifiedRowCount()).compareTo(typedOther.isSetModifiedRowCount());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetModifiedRowCount()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.modifiedRowCount, typedOther.modifiedRowCount);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TOperationHandle(");
+    boolean first = true;
+
+    sb.append("operationId:");
+    if (this.operationId == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.operationId);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("operationType:");
+    if (this.operationType == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.operationType);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("hasResultSet:");
+    sb.append(this.hasResultSet);
+    first = false;
+    if (isSetModifiedRowCount()) {
+      if (!first) sb.append(", ");
+      sb.append("modifiedRowCount:");
+      sb.append(this.modifiedRowCount);
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetOperationId()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationId' is unset! Struct:" + toString());
+    }
+
+    if (!isSetOperationType()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationType' is unset! Struct:" + toString());
+    }
+
+    if (!isSetHasResultSet()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'hasResultSet' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (operationId != null) {
+      operationId.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TOperationHandleStandardSchemeFactory implements SchemeFactory {
+    public TOperationHandleStandardScheme getScheme() {
+      return new TOperationHandleStandardScheme();
+    }
+  }
+
+  private static class TOperationHandleStandardScheme extends StandardScheme<TOperationHandle> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TOperationHandle struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // OPERATION_ID
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.operationId = new THandleIdentifier();
+              struct.operationId.read(iprot);
+              struct.setOperationIdIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPERATION_TYPE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.operationType = TOperationType.findByValue(iprot.readI32());
+              struct.setOperationTypeIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // HAS_RESULT_SET
+            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
+              struct.hasResultSet = iprot.readBool();
+              struct.setHasResultSetIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // MODIFIED_ROW_COUNT
+            if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) {
+              struct.modifiedRowCount = iprot.readDouble();
+              struct.setModifiedRowCountIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TOperationHandle struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.operationId != null) {
+        oprot.writeFieldBegin(OPERATION_ID_FIELD_DESC);
+        struct.operationId.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.operationType != null) {
+        oprot.writeFieldBegin(OPERATION_TYPE_FIELD_DESC);
+        oprot.writeI32(struct.operationType.getValue());
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldBegin(HAS_RESULT_SET_FIELD_DESC);
+      oprot.writeBool(struct.hasResultSet);
+      oprot.writeFieldEnd();
+      if (struct.isSetModifiedRowCount()) {
+        oprot.writeFieldBegin(MODIFIED_ROW_COUNT_FIELD_DESC);
+        oprot.writeDouble(struct.modifiedRowCount);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TOperationHandleTupleSchemeFactory implements SchemeFactory {
+    public TOperationHandleTupleScheme getScheme() {
+      return new TOperationHandleTupleScheme();
+    }
+  }
+
+  private static class TOperationHandleTupleScheme extends TupleScheme<TOperationHandle> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.operationId.write(oprot);
+      oprot.writeI32(struct.operationType.getValue());
+      oprot.writeBool(struct.hasResultSet);
+      BitSet optionals = new BitSet();
+      if (struct.isSetModifiedRowCount()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetModifiedRowCount()) {
+        oprot.writeDouble(struct.modifiedRowCount);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.operationId = new THandleIdentifier();
+      struct.operationId.read(iprot);
+      struct.setOperationIdIsSet(true);
+      struct.operationType = TOperationType.findByValue(iprot.readI32());
+      struct.setOperationTypeIsSet(true);
+      struct.hasResultSet = iprot.readBool();
+      struct.setHasResultSetIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.modifiedRowCount = iprot.readDouble();
+        struct.setModifiedRowCountIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
new file mode 100644
index 000000000000..219866223a6b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
@@ -0,0 +1,63 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.thrift.TEnum;
+
+public enum TOperationState implements org.apache.thrift.TEnum {
+  INITIALIZED_STATE(0),
+  RUNNING_STATE(1),
+  FINISHED_STATE(2),
+  CANCELED_STATE(3),
+  CLOSED_STATE(4),
+  ERROR_STATE(5),
+  UKNOWN_STATE(6),
+  PENDING_STATE(7);
+
+  private final int value;
+
+  private TOperationState(int value) {
+    this.value = value;
+  }
+
+  /**
+   * Get the integer value of this enum value, as defined in the Thrift IDL.
+   */
+  public int getValue() {
+    return value;
+  }
+
+  /**
+   * Find a the enum type by its integer value, as defined in the Thrift IDL.
+   * @return null if the value is not found.
+   */
+  public static TOperationState findByValue(int value) { 
+    switch (value) {
+      case 0:
+        return INITIALIZED_STATE;
+      case 1:
+        return RUNNING_STATE;
+      case 2:
+        return FINISHED_STATE;
+      case 3:
+        return CANCELED_STATE;
+      case 4:
+        return CLOSED_STATE;
+      case 5:
+        return ERROR_STATE;
+      case 6:
+        return UKNOWN_STATE;
+      case 7:
+        return PENDING_STATE;
+      default:
+        return null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
new file mode 100644
index 000000000000..b6d4b2fab9f9
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
@@ -0,0 +1,66 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.thrift.TEnum;
+
+public enum TOperationType implements org.apache.thrift.TEnum {
+  EXECUTE_STATEMENT(0),
+  GET_TYPE_INFO(1),
+  GET_CATALOGS(2),
+  GET_SCHEMAS(3),
+  GET_TABLES(4),
+  GET_TABLE_TYPES(5),
+  GET_COLUMNS(6),
+  GET_FUNCTIONS(7),
+  UNKNOWN(8);
+
+  private final int value;
+
+  private TOperationType(int value) {
+    this.value = value;
+  }
+
+  /**
+   * Get the integer value of this enum value, as defined in the Thrift IDL.
+   */
+  public int getValue() {
+    return value;
+  }
+
+  /**
+   * Find a the enum type by its integer value, as defined in the Thrift IDL.
+   * @return null if the value is not found.
+   */
+  public static TOperationType findByValue(int value) { 
+    switch (value) {
+      case 0:
+        return EXECUTE_STATEMENT;
+      case 1:
+        return GET_TYPE_INFO;
+      case 2:
+        return GET_CATALOGS;
+      case 3:
+        return GET_SCHEMAS;
+      case 4:
+        return GET_TABLES;
+      case 5:
+        return GET_TABLE_TYPES;
+      case 6:
+        return GET_COLUMNS;
+      case 7:
+        return GET_FUNCTIONS;
+      case 8:
+        return UNKNOWN;
+      default:
+        return null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
new file mode 100644
index 000000000000..9d2abf2b3b08
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
@@ -0,0 +1,512 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TPrimitiveTypeEntry implements org.apache.thrift.TBase<TPrimitiveTypeEntry, TPrimitiveTypeEntry._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPrimitiveTypeEntry");
+
+  private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type", org.apache.thrift.protocol.TType.I32, (short)1);
+  private static final org.apache.thrift.protocol.TField TYPE_QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("typeQualifiers", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TPrimitiveTypeEntryStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TPrimitiveTypeEntryTupleSchemeFactory());
+  }
+
+  private TTypeId type; // required
+  private TTypeQualifiers typeQualifiers; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    /**
+     * 
+     * @see TTypeId
+     */
+    TYPE((short)1, "type"),
+    TYPE_QUALIFIERS((short)2, "typeQualifiers");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // TYPE
+          return TYPE;
+        case 2: // TYPE_QUALIFIERS
+          return TYPE_QUALIFIERS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.TYPE_QUALIFIERS};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.TYPE, new org.apache.thrift.meta_data.FieldMetaData("type", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TTypeId.class)));
+    tmpMap.put(_Fields.TYPE_QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("typeQualifiers", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifiers.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPrimitiveTypeEntry.class, metaDataMap);
+  }
+
+  public TPrimitiveTypeEntry() {
+  }
+
+  public TPrimitiveTypeEntry(
+    TTypeId type)
+  {
+    this();
+    this.type = type;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TPrimitiveTypeEntry(TPrimitiveTypeEntry other) {
+    if (other.isSetType()) {
+      this.type = other.type;
+    }
+    if (other.isSetTypeQualifiers()) {
+      this.typeQualifiers = new TTypeQualifiers(other.typeQualifiers);
+    }
+  }
+
+  public TPrimitiveTypeEntry deepCopy() {
+    return new TPrimitiveTypeEntry(this);
+  }
+
+  @Override
+  public void clear() {
+    this.type = null;
+    this.typeQualifiers = null;
+  }
+
+  /**
+   * 
+   * @see TTypeId
+   */
+  public TTypeId getType() {
+    return this.type;
+  }
+
+  /**
+   * 
+   * @see TTypeId
+   */
+  public void setType(TTypeId type) {
+    this.type = type;
+  }
+
+  public void unsetType() {
+    this.type = null;
+  }
+
+  /** Returns true if field type is set (has been assigned a value) and false otherwise */
+  public boolean isSetType() {
+    return this.type != null;
+  }
+
+  public void setTypeIsSet(boolean value) {
+    if (!value) {
+      this.type = null;
+    }
+  }
+
+  public TTypeQualifiers getTypeQualifiers() {
+    return this.typeQualifiers;
+  }
+
+  public void setTypeQualifiers(TTypeQualifiers typeQualifiers) {
+    this.typeQualifiers = typeQualifiers;
+  }
+
+  public void unsetTypeQualifiers() {
+    this.typeQualifiers = null;
+  }
+
+  /** Returns true if field typeQualifiers is set (has been assigned a value) and false otherwise */
+  public boolean isSetTypeQualifiers() {
+    return this.typeQualifiers != null;
+  }
+
+  public void setTypeQualifiersIsSet(boolean value) {
+    if (!value) {
+      this.typeQualifiers = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case TYPE:
+      if (value == null) {
+        unsetType();
+      } else {
+        setType((TTypeId)value);
+      }
+      break;
+
+    case TYPE_QUALIFIERS:
+      if (value == null) {
+        unsetTypeQualifiers();
+      } else {
+        setTypeQualifiers((TTypeQualifiers)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case TYPE:
+      return getType();
+
+    case TYPE_QUALIFIERS:
+      return getTypeQualifiers();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case TYPE:
+      return isSetType();
+    case TYPE_QUALIFIERS:
+      return isSetTypeQualifiers();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TPrimitiveTypeEntry)
+      return this.equals((TPrimitiveTypeEntry)that);
+    return false;
+  }
+
+  public boolean equals(TPrimitiveTypeEntry that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_type = true && this.isSetType();
+    boolean that_present_type = true && that.isSetType();
+    if (this_present_type || that_present_type) {
+      if (!(this_present_type && that_present_type))
+        return false;
+      if (!this.type.equals(that.type))
+        return false;
+    }
+
+    boolean this_present_typeQualifiers = true && this.isSetTypeQualifiers();
+    boolean that_present_typeQualifiers = true && that.isSetTypeQualifiers();
+    if (this_present_typeQualifiers || that_present_typeQualifiers) {
+      if (!(this_present_typeQualifiers && that_present_typeQualifiers))
+        return false;
+      if (!this.typeQualifiers.equals(that.typeQualifiers))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_type = true && (isSetType());
+    builder.append(present_type);
+    if (present_type)
+      builder.append(type.getValue());
+
+    boolean present_typeQualifiers = true && (isSetTypeQualifiers());
+    builder.append(present_typeQualifiers);
+    if (present_typeQualifiers)
+      builder.append(typeQualifiers);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TPrimitiveTypeEntry other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TPrimitiveTypeEntry typedOther = (TPrimitiveTypeEntry)other;
+
+    lastComparison = Boolean.valueOf(isSetType()).compareTo(typedOther.isSetType());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetType()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.type, typedOther.type);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetTypeQualifiers()).compareTo(typedOther.isSetTypeQualifiers());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetTypeQualifiers()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeQualifiers, typedOther.typeQualifiers);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TPrimitiveTypeEntry(");
+    boolean first = true;
+
+    sb.append("type:");
+    if (this.type == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.type);
+    }
+    first = false;
+    if (isSetTypeQualifiers()) {
+      if (!first) sb.append(", ");
+      sb.append("typeQualifiers:");
+      if (this.typeQualifiers == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.typeQualifiers);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetType()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'type' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (typeQualifiers != null) {
+      typeQualifiers.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TPrimitiveTypeEntryStandardSchemeFactory implements SchemeFactory {
+    public TPrimitiveTypeEntryStandardScheme getScheme() {
+      return new TPrimitiveTypeEntryStandardScheme();
+    }
+  }
+
+  private static class TPrimitiveTypeEntryStandardScheme extends StandardScheme<TPrimitiveTypeEntry> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // TYPE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.type = TTypeId.findByValue(iprot.readI32());
+              struct.setTypeIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // TYPE_QUALIFIERS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.typeQualifiers = new TTypeQualifiers();
+              struct.typeQualifiers.read(iprot);
+              struct.setTypeQualifiersIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.type != null) {
+        oprot.writeFieldBegin(TYPE_FIELD_DESC);
+        oprot.writeI32(struct.type.getValue());
+        oprot.writeFieldEnd();
+      }
+      if (struct.typeQualifiers != null) {
+        if (struct.isSetTypeQualifiers()) {
+          oprot.writeFieldBegin(TYPE_QUALIFIERS_FIELD_DESC);
+          struct.typeQualifiers.write(oprot);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TPrimitiveTypeEntryTupleSchemeFactory implements SchemeFactory {
+    public TPrimitiveTypeEntryTupleScheme getScheme() {
+      return new TPrimitiveTypeEntryTupleScheme();
+    }
+  }
+
+  private static class TPrimitiveTypeEntryTupleScheme extends TupleScheme<TPrimitiveTypeEntry> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeI32(struct.type.getValue());
+      BitSet optionals = new BitSet();
+      if (struct.isSetTypeQualifiers()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetTypeQualifiers()) {
+        struct.typeQualifiers.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.type = TTypeId.findByValue(iprot.readI32());
+      struct.setTypeIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.typeQualifiers = new TTypeQualifiers();
+        struct.typeQualifiers.read(iprot);
+        struct.setTypeQualifiersIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
new file mode 100644
index 000000000000..a4279d29f662
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
@@ -0,0 +1,63 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.thrift.TEnum;
+
+public enum TProtocolVersion implements org.apache.thrift.TEnum {
+  HIVE_CLI_SERVICE_PROTOCOL_V1(0),
+  HIVE_CLI_SERVICE_PROTOCOL_V2(1),
+  HIVE_CLI_SERVICE_PROTOCOL_V3(2),
+  HIVE_CLI_SERVICE_PROTOCOL_V4(3),
+  HIVE_CLI_SERVICE_PROTOCOL_V5(4),
+  HIVE_CLI_SERVICE_PROTOCOL_V6(5),
+  HIVE_CLI_SERVICE_PROTOCOL_V7(6),
+  HIVE_CLI_SERVICE_PROTOCOL_V8(7);
+
+  private final int value;
+
+  private TProtocolVersion(int value) {
+    this.value = value;
+  }
+
+  /**
+   * Get the integer value of this enum value, as defined in the Thrift IDL.
+   */
+  public int getValue() {
+    return value;
+  }
+
+  /**
+   * Find a the enum type by its integer value, as defined in the Thrift IDL.
+   * @return null if the value is not found.
+   */
+  public static TProtocolVersion findByValue(int value) { 
+    switch (value) {
+      case 0:
+        return HIVE_CLI_SERVICE_PROTOCOL_V1;
+      case 1:
+        return HIVE_CLI_SERVICE_PROTOCOL_V2;
+      case 2:
+        return HIVE_CLI_SERVICE_PROTOCOL_V3;
+      case 3:
+        return HIVE_CLI_SERVICE_PROTOCOL_V4;
+      case 4:
+        return HIVE_CLI_SERVICE_PROTOCOL_V5;
+      case 5:
+        return HIVE_CLI_SERVICE_PROTOCOL_V6;
+      case 6:
+        return HIVE_CLI_SERVICE_PROTOCOL_V7;
+      case 7:
+        return HIVE_CLI_SERVICE_PROTOCOL_V8;
+      default:
+        return null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
new file mode 100644
index 000000000000..a3e39c8cdf32
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
@@ -0,0 +1,491 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TRenewDelegationTokenReq implements org.apache.thrift.TBase<TRenewDelegationTokenReq, TRenewDelegationTokenReq._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenReq");
+
+  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TRenewDelegationTokenReqStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TRenewDelegationTokenReqTupleSchemeFactory());
+  }
+
+  private TSessionHandle sessionHandle; // required
+  private String delegationToken; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_HANDLE((short)1, "sessionHandle"),
+    DELEGATION_TOKEN((short)2, "delegationToken");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_HANDLE
+          return SESSION_HANDLE;
+        case 2: // DELEGATION_TOKEN
+          return DELEGATION_TOKEN;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
+    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenReq.class, metaDataMap);
+  }
+
+  public TRenewDelegationTokenReq() {
+  }
+
+  public TRenewDelegationTokenReq(
+    TSessionHandle sessionHandle,
+    String delegationToken)
+  {
+    this();
+    this.sessionHandle = sessionHandle;
+    this.delegationToken = delegationToken;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TRenewDelegationTokenReq(TRenewDelegationTokenReq other) {
+    if (other.isSetSessionHandle()) {
+      this.sessionHandle = new TSessionHandle(other.sessionHandle);
+    }
+    if (other.isSetDelegationToken()) {
+      this.delegationToken = other.delegationToken;
+    }
+  }
+
+  public TRenewDelegationTokenReq deepCopy() {
+    return new TRenewDelegationTokenReq(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionHandle = null;
+    this.delegationToken = null;
+  }
+
+  public TSessionHandle getSessionHandle() {
+    return this.sessionHandle;
+  }
+
+  public void setSessionHandle(TSessionHandle sessionHandle) {
+    this.sessionHandle = sessionHandle;
+  }
+
+  public void unsetSessionHandle() {
+    this.sessionHandle = null;
+  }
+
+  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionHandle() {
+    return this.sessionHandle != null;
+  }
+
+  public void setSessionHandleIsSet(boolean value) {
+    if (!value) {
+      this.sessionHandle = null;
+    }
+  }
+
+  public String getDelegationToken() {
+    return this.delegationToken;
+  }
+
+  public void setDelegationToken(String delegationToken) {
+    this.delegationToken = delegationToken;
+  }
+
+  public void unsetDelegationToken() {
+    this.delegationToken = null;
+  }
+
+  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
+  public boolean isSetDelegationToken() {
+    return this.delegationToken != null;
+  }
+
+  public void setDelegationTokenIsSet(boolean value) {
+    if (!value) {
+      this.delegationToken = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_HANDLE:
+      if (value == null) {
+        unsetSessionHandle();
+      } else {
+        setSessionHandle((TSessionHandle)value);
+      }
+      break;
+
+    case DELEGATION_TOKEN:
+      if (value == null) {
+        unsetDelegationToken();
+      } else {
+        setDelegationToken((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_HANDLE:
+      return getSessionHandle();
+
+    case DELEGATION_TOKEN:
+      return getDelegationToken();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_HANDLE:
+      return isSetSessionHandle();
+    case DELEGATION_TOKEN:
+      return isSetDelegationToken();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TRenewDelegationTokenReq)
+      return this.equals((TRenewDelegationTokenReq)that);
+    return false;
+  }
+
+  public boolean equals(TRenewDelegationTokenReq that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
+    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
+    if (this_present_sessionHandle || that_present_sessionHandle) {
+      if (!(this_present_sessionHandle && that_present_sessionHandle))
+        return false;
+      if (!this.sessionHandle.equals(that.sessionHandle))
+        return false;
+    }
+
+    boolean this_present_delegationToken = true && this.isSetDelegationToken();
+    boolean that_present_delegationToken = true && that.isSetDelegationToken();
+    if (this_present_delegationToken || that_present_delegationToken) {
+      if (!(this_present_delegationToken && that_present_delegationToken))
+        return false;
+      if (!this.delegationToken.equals(that.delegationToken))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionHandle = true && (isSetSessionHandle());
+    builder.append(present_sessionHandle);
+    if (present_sessionHandle)
+      builder.append(sessionHandle);
+
+    boolean present_delegationToken = true && (isSetDelegationToken());
+    builder.append(present_delegationToken);
+    if (present_delegationToken)
+      builder.append(delegationToken);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TRenewDelegationTokenReq other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TRenewDelegationTokenReq typedOther = (TRenewDelegationTokenReq)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionHandle()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetDelegationToken()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TRenewDelegationTokenReq(");
+    boolean first = true;
+
+    sb.append("sessionHandle:");
+    if (this.sessionHandle == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionHandle);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("delegationToken:");
+    if (this.delegationToken == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.delegationToken);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionHandle()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
+    }
+
+    if (!isSetDelegationToken()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionHandle != null) {
+      sessionHandle.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TRenewDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
+    public TRenewDelegationTokenReqStandardScheme getScheme() {
+      return new TRenewDelegationTokenReqStandardScheme();
+    }
+  }
+
+  private static class TRenewDelegationTokenReqStandardScheme extends StandardScheme<TRenewDelegationTokenReq> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_HANDLE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionHandle = new TSessionHandle();
+              struct.sessionHandle.read(iprot);
+              struct.setSessionHandleIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // DELEGATION_TOKEN
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.delegationToken = iprot.readString();
+              struct.setDelegationTokenIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionHandle != null) {
+        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
+        struct.sessionHandle.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      if (struct.delegationToken != null) {
+        oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
+        oprot.writeString(struct.delegationToken);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TRenewDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
+    public TRenewDelegationTokenReqTupleScheme getScheme() {
+      return new TRenewDelegationTokenReqTupleScheme();
+    }
+  }
+
+  private static class TRenewDelegationTokenReqTupleScheme extends TupleScheme<TRenewDelegationTokenReq> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionHandle.write(oprot);
+      oprot.writeString(struct.delegationToken);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionHandle = new TSessionHandle();
+      struct.sessionHandle.read(iprot);
+      struct.setSessionHandleIsSet(true);
+      struct.delegationToken = iprot.readString();
+      struct.setDelegationTokenIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
new file mode 100644
index 000000000000..5f3eb6c4d4b9
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TRenewDelegationTokenResp implements org.apache.thrift.TBase<TRenewDelegationTokenResp, TRenewDelegationTokenResp._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenResp");
+
+  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TRenewDelegationTokenRespStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TRenewDelegationTokenRespTupleSchemeFactory());
+  }
+
+  private TStatus status; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    STATUS((short)1, "status");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS
+          return STATUS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenResp.class, metaDataMap);
+  }
+
+  public TRenewDelegationTokenResp() {
+  }
+
+  public TRenewDelegationTokenResp(
+    TStatus status)
+  {
+    this();
+    this.status = status;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TRenewDelegationTokenResp(TRenewDelegationTokenResp other) {
+    if (other.isSetStatus()) {
+      this.status = new TStatus(other.status);
+    }
+  }
+
+  public TRenewDelegationTokenResp deepCopy() {
+    return new TRenewDelegationTokenResp(this);
+  }
+
+  @Override
+  public void clear() {
+    this.status = null;
+  }
+
+  public TStatus getStatus() {
+    return this.status;
+  }
+
+  public void setStatus(TStatus status) {
+    this.status = status;
+  }
+
+  public void unsetStatus() {
+    this.status = null;
+  }
+
+  /** Returns true if field status is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatus() {
+    return this.status != null;
+  }
+
+  public void setStatusIsSet(boolean value) {
+    if (!value) {
+      this.status = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS:
+      if (value == null) {
+        unsetStatus();
+      } else {
+        setStatus((TStatus)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS:
+      return getStatus();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS:
+      return isSetStatus();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TRenewDelegationTokenResp)
+      return this.equals((TRenewDelegationTokenResp)that);
+    return false;
+  }
+
+  public boolean equals(TRenewDelegationTokenResp that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_status = true && this.isSetStatus();
+    boolean that_present_status = true && that.isSetStatus();
+    if (this_present_status || that_present_status) {
+      if (!(this_present_status && that_present_status))
+        return false;
+      if (!this.status.equals(that.status))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_status = true && (isSetStatus());
+    builder.append(present_status);
+    if (present_status)
+      builder.append(status);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TRenewDelegationTokenResp other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TRenewDelegationTokenResp typedOther = (TRenewDelegationTokenResp)other;
+
+    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatus()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TRenewDelegationTokenResp(");
+    boolean first = true;
+
+    sb.append("status:");
+    if (this.status == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.status);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatus()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (status != null) {
+      status.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TRenewDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
+    public TRenewDelegationTokenRespStandardScheme getScheme() {
+      return new TRenewDelegationTokenRespStandardScheme();
+    }
+  }
+
+  private static class TRenewDelegationTokenRespStandardScheme extends StandardScheme<TRenewDelegationTokenResp> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.status = new TStatus();
+              struct.status.read(iprot);
+              struct.setStatusIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.status != null) {
+        oprot.writeFieldBegin(STATUS_FIELD_DESC);
+        struct.status.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TRenewDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
+    public TRenewDelegationTokenRespTupleScheme getScheme() {
+      return new TRenewDelegationTokenRespTupleScheme();
+    }
+  }
+
+  private static class TRenewDelegationTokenRespTupleScheme extends TupleScheme<TRenewDelegationTokenResp> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.status.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.status = new TStatus();
+      struct.status.read(iprot);
+      struct.setStatusIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
new file mode 100644
index 000000000000..a44cfb08ff01
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
@@ -0,0 +1,439 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TRow implements org.apache.thrift.TBase<TRow, TRow._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRow");
+
+  private static final org.apache.thrift.protocol.TField COL_VALS_FIELD_DESC = new org.apache.thrift.protocol.TField("colVals", org.apache.thrift.protocol.TType.LIST, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TRowStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TRowTupleSchemeFactory());
+  }
+
+  private List<TColumnValue> colVals; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    COL_VALS((short)1, "colVals");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // COL_VALS
+          return COL_VALS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.COL_VALS, new org.apache.thrift.meta_data.FieldMetaData("colVals", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnValue.class))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRow.class, metaDataMap);
+  }
+
+  public TRow() {
+  }
+
+  public TRow(
+    List<TColumnValue> colVals)
+  {
+    this();
+    this.colVals = colVals;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TRow(TRow other) {
+    if (other.isSetColVals()) {
+      List<TColumnValue> __this__colVals = new ArrayList<TColumnValue>();
+      for (TColumnValue other_element : other.colVals) {
+        __this__colVals.add(new TColumnValue(other_element));
+      }
+      this.colVals = __this__colVals;
+    }
+  }
+
+  public TRow deepCopy() {
+    return new TRow(this);
+  }
+
+  @Override
+  public void clear() {
+    this.colVals = null;
+  }
+
+  public int getColValsSize() {
+    return (this.colVals == null) ? 0 : this.colVals.size();
+  }
+
+  public java.util.Iterator<TColumnValue> getColValsIterator() {
+    return (this.colVals == null) ? null : this.colVals.iterator();
+  }
+
+  public void addToColVals(TColumnValue elem) {
+    if (this.colVals == null) {
+      this.colVals = new ArrayList<TColumnValue>();
+    }
+    this.colVals.add(elem);
+  }
+
+  public List<TColumnValue> getColVals() {
+    return this.colVals;
+  }
+
+  public void setColVals(List<TColumnValue> colVals) {
+    this.colVals = colVals;
+  }
+
+  public void unsetColVals() {
+    this.colVals = null;
+  }
+
+  /** Returns true if field colVals is set (has been assigned a value) and false otherwise */
+  public boolean isSetColVals() {
+    return this.colVals != null;
+  }
+
+  public void setColValsIsSet(boolean value) {
+    if (!value) {
+      this.colVals = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case COL_VALS:
+      if (value == null) {
+        unsetColVals();
+      } else {
+        setColVals((List<TColumnValue>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case COL_VALS:
+      return getColVals();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case COL_VALS:
+      return isSetColVals();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TRow)
+      return this.equals((TRow)that);
+    return false;
+  }
+
+  public boolean equals(TRow that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_colVals = true && this.isSetColVals();
+    boolean that_present_colVals = true && that.isSetColVals();
+    if (this_present_colVals || that_present_colVals) {
+      if (!(this_present_colVals && that_present_colVals))
+        return false;
+      if (!this.colVals.equals(that.colVals))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_colVals = true && (isSetColVals());
+    builder.append(present_colVals);
+    if (present_colVals)
+      builder.append(colVals);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TRow other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TRow typedOther = (TRow)other;
+
+    lastComparison = Boolean.valueOf(isSetColVals()).compareTo(typedOther.isSetColVals());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetColVals()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.colVals, typedOther.colVals);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TRow(");
+    boolean first = true;
+
+    sb.append("colVals:");
+    if (this.colVals == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.colVals);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetColVals()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'colVals' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TRowStandardSchemeFactory implements SchemeFactory {
+    public TRowStandardScheme getScheme() {
+      return new TRowStandardScheme();
+    }
+  }
+
+  private static class TRowStandardScheme extends StandardScheme<TRow> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TRow struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // COL_VALS
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list46 = iprot.readListBegin();
+                struct.colVals = new ArrayList<TColumnValue>(_list46.size);
+                for (int _i47 = 0; _i47 < _list46.size; ++_i47)
+                {
+                  TColumnValue _elem48; // optional
+                  _elem48 = new TColumnValue();
+                  _elem48.read(iprot);
+                  struct.colVals.add(_elem48);
+                }
+                iprot.readListEnd();
+              }
+              struct.setColValsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TRow struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.colVals != null) {
+        oprot.writeFieldBegin(COL_VALS_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.colVals.size()));
+          for (TColumnValue _iter49 : struct.colVals)
+          {
+            _iter49.write(oprot);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TRowTupleSchemeFactory implements SchemeFactory {
+    public TRowTupleScheme getScheme() {
+      return new TRowTupleScheme();
+    }
+  }
+
+  private static class TRowTupleScheme extends TupleScheme<TRow> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.colVals.size());
+        for (TColumnValue _iter50 : struct.colVals)
+        {
+          _iter50.write(oprot);
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list51 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
+        struct.colVals = new ArrayList<TColumnValue>(_list51.size);
+        for (int _i52 = 0; _i52 < _list51.size; ++_i52)
+        {
+          TColumnValue _elem53; // optional
+          _elem53 = new TColumnValue();
+          _elem53.read(iprot);
+          struct.colVals.add(_elem53);
+        }
+      }
+      struct.setColValsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
new file mode 100644
index 000000000000..d16c8a4bb32d
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
@@ -0,0 +1,702 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TRowSet implements org.apache.thrift.TBase<TRowSet, TRowSet._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRowSet");
+
+  private static final org.apache.thrift.protocol.TField START_ROW_OFFSET_FIELD_DESC = new org.apache.thrift.protocol.TField("startRowOffset", org.apache.thrift.protocol.TType.I64, (short)1);
+  private static final org.apache.thrift.protocol.TField ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("rows", org.apache.thrift.protocol.TType.LIST, (short)2);
+  private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)3);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TRowSetStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TRowSetTupleSchemeFactory());
+  }
+
+  private long startRowOffset; // required
+  private List<TRow> rows; // required
+  private List<TColumn> columns; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    START_ROW_OFFSET((short)1, "startRowOffset"),
+    ROWS((short)2, "rows"),
+    COLUMNS((short)3, "columns");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // START_ROW_OFFSET
+          return START_ROW_OFFSET;
+        case 2: // ROWS
+          return ROWS;
+        case 3: // COLUMNS
+          return COLUMNS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __STARTROWOFFSET_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.COLUMNS};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.START_ROW_OFFSET, new org.apache.thrift.meta_data.FieldMetaData("startRowOffset", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+    tmpMap.put(_Fields.ROWS, new org.apache.thrift.meta_data.FieldMetaData("rows", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRow.class))));
+    tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumn.class))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRowSet.class, metaDataMap);
+  }
+
+  public TRowSet() {
+  }
+
+  public TRowSet(
+    long startRowOffset,
+    List<TRow> rows)
+  {
+    this();
+    this.startRowOffset = startRowOffset;
+    setStartRowOffsetIsSet(true);
+    this.rows = rows;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TRowSet(TRowSet other) {
+    __isset_bitfield = other.__isset_bitfield;
+    this.startRowOffset = other.startRowOffset;
+    if (other.isSetRows()) {
+      List<TRow> __this__rows = new ArrayList<TRow>();
+      for (TRow other_element : other.rows) {
+        __this__rows.add(new TRow(other_element));
+      }
+      this.rows = __this__rows;
+    }
+    if (other.isSetColumns()) {
+      List<TColumn> __this__columns = new ArrayList<TColumn>();
+      for (TColumn other_element : other.columns) {
+        __this__columns.add(new TColumn(other_element));
+      }
+      this.columns = __this__columns;
+    }
+  }
+
+  public TRowSet deepCopy() {
+    return new TRowSet(this);
+  }
+
+  @Override
+  public void clear() {
+    setStartRowOffsetIsSet(false);
+    this.startRowOffset = 0;
+    this.rows = null;
+    this.columns = null;
+  }
+
+  public long getStartRowOffset() {
+    return this.startRowOffset;
+  }
+
+  public void setStartRowOffset(long startRowOffset) {
+    this.startRowOffset = startRowOffset;
+    setStartRowOffsetIsSet(true);
+  }
+
+  public void unsetStartRowOffset() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID);
+  }
+
+  /** Returns true if field startRowOffset is set (has been assigned a value) and false otherwise */
+  public boolean isSetStartRowOffset() {
+    return EncodingUtils.testBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID);
+  }
+
+  public void setStartRowOffsetIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID, value);
+  }
+
+  public int getRowsSize() {
+    return (this.rows == null) ? 0 : this.rows.size();
+  }
+
+  public java.util.Iterator<TRow> getRowsIterator() {
+    return (this.rows == null) ? null : this.rows.iterator();
+  }
+
+  public void addToRows(TRow elem) {
+    if (this.rows == null) {
+      this.rows = new ArrayList<TRow>();
+    }
+    this.rows.add(elem);
+  }
+
+  public List<TRow> getRows() {
+    return this.rows;
+  }
+
+  public void setRows(List<TRow> rows) {
+    this.rows = rows;
+  }
+
+  public void unsetRows() {
+    this.rows = null;
+  }
+
+  /** Returns true if field rows is set (has been assigned a value) and false otherwise */
+  public boolean isSetRows() {
+    return this.rows != null;
+  }
+
+  public void setRowsIsSet(boolean value) {
+    if (!value) {
+      this.rows = null;
+    }
+  }
+
+  public int getColumnsSize() {
+    return (this.columns == null) ? 0 : this.columns.size();
+  }
+
+  public java.util.Iterator<TColumn> getColumnsIterator() {
+    return (this.columns == null) ? null : this.columns.iterator();
+  }
+
+  public void addToColumns(TColumn elem) {
+    if (this.columns == null) {
+      this.columns = new ArrayList<TColumn>();
+    }
+    this.columns.add(elem);
+  }
+
+  public List<TColumn> getColumns() {
+    return this.columns;
+  }
+
+  public void setColumns(List<TColumn> columns) {
+    this.columns = columns;
+  }
+
+  public void unsetColumns() {
+    this.columns = null;
+  }
+
+  /** Returns true if field columns is set (has been assigned a value) and false otherwise */
+  public boolean isSetColumns() {
+    return this.columns != null;
+  }
+
+  public void setColumnsIsSet(boolean value) {
+    if (!value) {
+      this.columns = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case START_ROW_OFFSET:
+      if (value == null) {
+        unsetStartRowOffset();
+      } else {
+        setStartRowOffset((Long)value);
+      }
+      break;
+
+    case ROWS:
+      if (value == null) {
+        unsetRows();
+      } else {
+        setRows((List<TRow>)value);
+      }
+      break;
+
+    case COLUMNS:
+      if (value == null) {
+        unsetColumns();
+      } else {
+        setColumns((List<TColumn>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case START_ROW_OFFSET:
+      return Long.valueOf(getStartRowOffset());
+
+    case ROWS:
+      return getRows();
+
+    case COLUMNS:
+      return getColumns();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case START_ROW_OFFSET:
+      return isSetStartRowOffset();
+    case ROWS:
+      return isSetRows();
+    case COLUMNS:
+      return isSetColumns();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TRowSet)
+      return this.equals((TRowSet)that);
+    return false;
+  }
+
+  public boolean equals(TRowSet that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_startRowOffset = true;
+    boolean that_present_startRowOffset = true;
+    if (this_present_startRowOffset || that_present_startRowOffset) {
+      if (!(this_present_startRowOffset && that_present_startRowOffset))
+        return false;
+      if (this.startRowOffset != that.startRowOffset)
+        return false;
+    }
+
+    boolean this_present_rows = true && this.isSetRows();
+    boolean that_present_rows = true && that.isSetRows();
+    if (this_present_rows || that_present_rows) {
+      if (!(this_present_rows && that_present_rows))
+        return false;
+      if (!this.rows.equals(that.rows))
+        return false;
+    }
+
+    boolean this_present_columns = true && this.isSetColumns();
+    boolean that_present_columns = true && that.isSetColumns();
+    if (this_present_columns || that_present_columns) {
+      if (!(this_present_columns && that_present_columns))
+        return false;
+      if (!this.columns.equals(that.columns))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_startRowOffset = true;
+    builder.append(present_startRowOffset);
+    if (present_startRowOffset)
+      builder.append(startRowOffset);
+
+    boolean present_rows = true && (isSetRows());
+    builder.append(present_rows);
+    if (present_rows)
+      builder.append(rows);
+
+    boolean present_columns = true && (isSetColumns());
+    builder.append(present_columns);
+    if (present_columns)
+      builder.append(columns);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TRowSet other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TRowSet typedOther = (TRowSet)other;
+
+    lastComparison = Boolean.valueOf(isSetStartRowOffset()).compareTo(typedOther.isSetStartRowOffset());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStartRowOffset()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.startRowOffset, typedOther.startRowOffset);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetRows()).compareTo(typedOther.isSetRows());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetRows()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.rows, typedOther.rows);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetColumns()).compareTo(typedOther.isSetColumns());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetColumns()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, typedOther.columns);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TRowSet(");
+    boolean first = true;
+
+    sb.append("startRowOffset:");
+    sb.append(this.startRowOffset);
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("rows:");
+    if (this.rows == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.rows);
+    }
+    first = false;
+    if (isSetColumns()) {
+      if (!first) sb.append(", ");
+      sb.append("columns:");
+      if (this.columns == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.columns);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStartRowOffset()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'startRowOffset' is unset! Struct:" + toString());
+    }
+
+    if (!isSetRows()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'rows' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TRowSetStandardSchemeFactory implements SchemeFactory {
+    public TRowSetStandardScheme getScheme() {
+      return new TRowSetStandardScheme();
+    }
+  }
+
+  private static class TRowSetStandardScheme extends StandardScheme<TRowSet> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TRowSet struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // START_ROW_OFFSET
+            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
+              struct.startRowOffset = iprot.readI64();
+              struct.setStartRowOffsetIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // ROWS
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list118 = iprot.readListBegin();
+                struct.rows = new ArrayList<TRow>(_list118.size);
+                for (int _i119 = 0; _i119 < _list118.size; ++_i119)
+                {
+                  TRow _elem120; // optional
+                  _elem120 = new TRow();
+                  _elem120.read(iprot);
+                  struct.rows.add(_elem120);
+                }
+                iprot.readListEnd();
+              }
+              struct.setRowsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // COLUMNS
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list121 = iprot.readListBegin();
+                struct.columns = new ArrayList<TColumn>(_list121.size);
+                for (int _i122 = 0; _i122 < _list121.size; ++_i122)
+                {
+                  TColumn _elem123; // optional
+                  _elem123 = new TColumn();
+                  _elem123.read(iprot);
+                  struct.columns.add(_elem123);
+                }
+                iprot.readListEnd();
+              }
+              struct.setColumnsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TRowSet struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      oprot.writeFieldBegin(START_ROW_OFFSET_FIELD_DESC);
+      oprot.writeI64(struct.startRowOffset);
+      oprot.writeFieldEnd();
+      if (struct.rows != null) {
+        oprot.writeFieldBegin(ROWS_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.rows.size()));
+          for (TRow _iter124 : struct.rows)
+          {
+            _iter124.write(oprot);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.columns != null) {
+        if (struct.isSetColumns()) {
+          oprot.writeFieldBegin(COLUMNS_FIELD_DESC);
+          {
+            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size()));
+            for (TColumn _iter125 : struct.columns)
+            {
+              _iter125.write(oprot);
+            }
+            oprot.writeListEnd();
+          }
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TRowSetTupleSchemeFactory implements SchemeFactory {
+    public TRowSetTupleScheme getScheme() {
+      return new TRowSetTupleScheme();
+    }
+  }
+
+  private static class TRowSetTupleScheme extends TupleScheme<TRowSet> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeI64(struct.startRowOffset);
+      {
+        oprot.writeI32(struct.rows.size());
+        for (TRow _iter126 : struct.rows)
+        {
+          _iter126.write(oprot);
+        }
+      }
+      BitSet optionals = new BitSet();
+      if (struct.isSetColumns()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetColumns()) {
+        {
+          oprot.writeI32(struct.columns.size());
+          for (TColumn _iter127 : struct.columns)
+          {
+            _iter127.write(oprot);
+          }
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.startRowOffset = iprot.readI64();
+      struct.setStartRowOffsetIsSet(true);
+      {
+        org.apache.thrift.protocol.TList _list128 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
+        struct.rows = new ArrayList<TRow>(_list128.size);
+        for (int _i129 = 0; _i129 < _list128.size; ++_i129)
+        {
+          TRow _elem130; // optional
+          _elem130 = new TRow();
+          _elem130.read(iprot);
+          struct.rows.add(_elem130);
+        }
+      }
+      struct.setRowsIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        {
+          org.apache.thrift.protocol.TList _list131 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
+          struct.columns = new ArrayList<TColumn>(_list131.size);
+          for (int _i132 = 0; _i132 < _list131.size; ++_i132)
+          {
+            TColumn _elem133; // optional
+            _elem133 = new TColumn();
+            _elem133.read(iprot);
+            struct.columns.add(_elem133);
+          }
+        }
+        struct.setColumnsIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
new file mode 100644
index 000000000000..82c00dd68a98
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
@@ -0,0 +1,390 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TSessionHandle implements org.apache.thrift.TBase<TSessionHandle, TSessionHandle._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSessionHandle");
+
+  private static final org.apache.thrift.protocol.TField SESSION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionId", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TSessionHandleStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TSessionHandleTupleSchemeFactory());
+  }
+
+  private THandleIdentifier sessionId; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    SESSION_ID((short)1, "sessionId");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // SESSION_ID
+          return SESSION_ID;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.SESSION_ID, new org.apache.thrift.meta_data.FieldMetaData("sessionId", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TSessionHandle.class, metaDataMap);
+  }
+
+  public TSessionHandle() {
+  }
+
+  public TSessionHandle(
+    THandleIdentifier sessionId)
+  {
+    this();
+    this.sessionId = sessionId;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TSessionHandle(TSessionHandle other) {
+    if (other.isSetSessionId()) {
+      this.sessionId = new THandleIdentifier(other.sessionId);
+    }
+  }
+
+  public TSessionHandle deepCopy() {
+    return new TSessionHandle(this);
+  }
+
+  @Override
+  public void clear() {
+    this.sessionId = null;
+  }
+
+  public THandleIdentifier getSessionId() {
+    return this.sessionId;
+  }
+
+  public void setSessionId(THandleIdentifier sessionId) {
+    this.sessionId = sessionId;
+  }
+
+  public void unsetSessionId() {
+    this.sessionId = null;
+  }
+
+  /** Returns true if field sessionId is set (has been assigned a value) and false otherwise */
+  public boolean isSetSessionId() {
+    return this.sessionId != null;
+  }
+
+  public void setSessionIdIsSet(boolean value) {
+    if (!value) {
+      this.sessionId = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case SESSION_ID:
+      if (value == null) {
+        unsetSessionId();
+      } else {
+        setSessionId((THandleIdentifier)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case SESSION_ID:
+      return getSessionId();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case SESSION_ID:
+      return isSetSessionId();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TSessionHandle)
+      return this.equals((TSessionHandle)that);
+    return false;
+  }
+
+  public boolean equals(TSessionHandle that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_sessionId = true && this.isSetSessionId();
+    boolean that_present_sessionId = true && that.isSetSessionId();
+    if (this_present_sessionId || that_present_sessionId) {
+      if (!(this_present_sessionId && that_present_sessionId))
+        return false;
+      if (!this.sessionId.equals(that.sessionId))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_sessionId = true && (isSetSessionId());
+    builder.append(present_sessionId);
+    if (present_sessionId)
+      builder.append(sessionId);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TSessionHandle other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TSessionHandle typedOther = (TSessionHandle)other;
+
+    lastComparison = Boolean.valueOf(isSetSessionId()).compareTo(typedOther.isSetSessionId());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSessionId()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionId, typedOther.sessionId);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TSessionHandle(");
+    boolean first = true;
+
+    sb.append("sessionId:");
+    if (this.sessionId == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.sessionId);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetSessionId()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionId' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+    if (sessionId != null) {
+      sessionId.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TSessionHandleStandardSchemeFactory implements SchemeFactory {
+    public TSessionHandleStandardScheme getScheme() {
+      return new TSessionHandleStandardScheme();
+    }
+  }
+
+  private static class TSessionHandleStandardScheme extends StandardScheme<TSessionHandle> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TSessionHandle struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // SESSION_ID
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.sessionId = new THandleIdentifier();
+              struct.sessionId.read(iprot);
+              struct.setSessionIdIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TSessionHandle struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.sessionId != null) {
+        oprot.writeFieldBegin(SESSION_ID_FIELD_DESC);
+        struct.sessionId.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TSessionHandleTupleSchemeFactory implements SchemeFactory {
+    public TSessionHandleTupleScheme getScheme() {
+      return new TSessionHandleTupleScheme();
+    }
+  }
+
+  private static class TSessionHandleTupleScheme extends TupleScheme<TSessionHandle> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      struct.sessionId.write(oprot);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.sessionId = new THandleIdentifier();
+      struct.sessionId.read(iprot);
+      struct.setSessionIdIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
new file mode 100644
index 000000000000..24a746e94965
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
@@ -0,0 +1,874 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TStatus implements org.apache.thrift.TBase<TStatus, TStatus._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStatus");
+
+  private static final org.apache.thrift.protocol.TField STATUS_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("statusCode", org.apache.thrift.protocol.TType.I32, (short)1);
+  private static final org.apache.thrift.protocol.TField INFO_MESSAGES_FIELD_DESC = new org.apache.thrift.protocol.TField("infoMessages", org.apache.thrift.protocol.TType.LIST, (short)2);
+  private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3);
+  private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4);
+  private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TStatusStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TStatusTupleSchemeFactory());
+  }
+
+  private TStatusCode statusCode; // required
+  private List<String> infoMessages; // optional
+  private String sqlState; // optional
+  private int errorCode; // optional
+  private String errorMessage; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    /**
+     * 
+     * @see TStatusCode
+     */
+    STATUS_CODE((short)1, "statusCode"),
+    INFO_MESSAGES((short)2, "infoMessages"),
+    SQL_STATE((short)3, "sqlState"),
+    ERROR_CODE((short)4, "errorCode"),
+    ERROR_MESSAGE((short)5, "errorMessage");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // STATUS_CODE
+          return STATUS_CODE;
+        case 2: // INFO_MESSAGES
+          return INFO_MESSAGES;
+        case 3: // SQL_STATE
+          return SQL_STATE;
+        case 4: // ERROR_CODE
+          return ERROR_CODE;
+        case 5: // ERROR_MESSAGE
+          return ERROR_MESSAGE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private static final int __ERRORCODE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private _Fields optionals[] = {_Fields.INFO_MESSAGES,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.STATUS_CODE, new org.apache.thrift.meta_data.FieldMetaData("statusCode", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TStatusCode.class)));
+    tmpMap.put(_Fields.INFO_MESSAGES, new org.apache.thrift.meta_data.FieldMetaData("infoMessages", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
+    tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
+    tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStatus.class, metaDataMap);
+  }
+
+  public TStatus() {
+  }
+
+  public TStatus(
+    TStatusCode statusCode)
+  {
+    this();
+    this.statusCode = statusCode;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TStatus(TStatus other) {
+    __isset_bitfield = other.__isset_bitfield;
+    if (other.isSetStatusCode()) {
+      this.statusCode = other.statusCode;
+    }
+    if (other.isSetInfoMessages()) {
+      List<String> __this__infoMessages = new ArrayList<String>();
+      for (String other_element : other.infoMessages) {
+        __this__infoMessages.add(other_element);
+      }
+      this.infoMessages = __this__infoMessages;
+    }
+    if (other.isSetSqlState()) {
+      this.sqlState = other.sqlState;
+    }
+    this.errorCode = other.errorCode;
+    if (other.isSetErrorMessage()) {
+      this.errorMessage = other.errorMessage;
+    }
+  }
+
+  public TStatus deepCopy() {
+    return new TStatus(this);
+  }
+
+  @Override
+  public void clear() {
+    this.statusCode = null;
+    this.infoMessages = null;
+    this.sqlState = null;
+    setErrorCodeIsSet(false);
+    this.errorCode = 0;
+    this.errorMessage = null;
+  }
+
+  /**
+   * 
+   * @see TStatusCode
+   */
+  public TStatusCode getStatusCode() {
+    return this.statusCode;
+  }
+
+  /**
+   * 
+   * @see TStatusCode
+   */
+  public void setStatusCode(TStatusCode statusCode) {
+    this.statusCode = statusCode;
+  }
+
+  public void unsetStatusCode() {
+    this.statusCode = null;
+  }
+
+  /** Returns true if field statusCode is set (has been assigned a value) and false otherwise */
+  public boolean isSetStatusCode() {
+    return this.statusCode != null;
+  }
+
+  public void setStatusCodeIsSet(boolean value) {
+    if (!value) {
+      this.statusCode = null;
+    }
+  }
+
+  public int getInfoMessagesSize() {
+    return (this.infoMessages == null) ? 0 : this.infoMessages.size();
+  }
+
+  public java.util.Iterator<String> getInfoMessagesIterator() {
+    return (this.infoMessages == null) ? null : this.infoMessages.iterator();
+  }
+
+  public void addToInfoMessages(String elem) {
+    if (this.infoMessages == null) {
+      this.infoMessages = new ArrayList<String>();
+    }
+    this.infoMessages.add(elem);
+  }
+
+  public List<String> getInfoMessages() {
+    return this.infoMessages;
+  }
+
+  public void setInfoMessages(List<String> infoMessages) {
+    this.infoMessages = infoMessages;
+  }
+
+  public void unsetInfoMessages() {
+    this.infoMessages = null;
+  }
+
+  /** Returns true if field infoMessages is set (has been assigned a value) and false otherwise */
+  public boolean isSetInfoMessages() {
+    return this.infoMessages != null;
+  }
+
+  public void setInfoMessagesIsSet(boolean value) {
+    if (!value) {
+      this.infoMessages = null;
+    }
+  }
+
+  public String getSqlState() {
+    return this.sqlState;
+  }
+
+  public void setSqlState(String sqlState) {
+    this.sqlState = sqlState;
+  }
+
+  public void unsetSqlState() {
+    this.sqlState = null;
+  }
+
+  /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */
+  public boolean isSetSqlState() {
+    return this.sqlState != null;
+  }
+
+  public void setSqlStateIsSet(boolean value) {
+    if (!value) {
+      this.sqlState = null;
+    }
+  }
+
+  public int getErrorCode() {
+    return this.errorCode;
+  }
+
+  public void setErrorCode(int errorCode) {
+    this.errorCode = errorCode;
+    setErrorCodeIsSet(true);
+  }
+
+  public void unsetErrorCode() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
+  }
+
+  /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */
+  public boolean isSetErrorCode() {
+    return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
+  }
+
+  public void setErrorCodeIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value);
+  }
+
+  public String getErrorMessage() {
+    return this.errorMessage;
+  }
+
+  public void setErrorMessage(String errorMessage) {
+    this.errorMessage = errorMessage;
+  }
+
+  public void unsetErrorMessage() {
+    this.errorMessage = null;
+  }
+
+  /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */
+  public boolean isSetErrorMessage() {
+    return this.errorMessage != null;
+  }
+
+  public void setErrorMessageIsSet(boolean value) {
+    if (!value) {
+      this.errorMessage = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case STATUS_CODE:
+      if (value == null) {
+        unsetStatusCode();
+      } else {
+        setStatusCode((TStatusCode)value);
+      }
+      break;
+
+    case INFO_MESSAGES:
+      if (value == null) {
+        unsetInfoMessages();
+      } else {
+        setInfoMessages((List<String>)value);
+      }
+      break;
+
+    case SQL_STATE:
+      if (value == null) {
+        unsetSqlState();
+      } else {
+        setSqlState((String)value);
+      }
+      break;
+
+    case ERROR_CODE:
+      if (value == null) {
+        unsetErrorCode();
+      } else {
+        setErrorCode((Integer)value);
+      }
+      break;
+
+    case ERROR_MESSAGE:
+      if (value == null) {
+        unsetErrorMessage();
+      } else {
+        setErrorMessage((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case STATUS_CODE:
+      return getStatusCode();
+
+    case INFO_MESSAGES:
+      return getInfoMessages();
+
+    case SQL_STATE:
+      return getSqlState();
+
+    case ERROR_CODE:
+      return Integer.valueOf(getErrorCode());
+
+    case ERROR_MESSAGE:
+      return getErrorMessage();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case STATUS_CODE:
+      return isSetStatusCode();
+    case INFO_MESSAGES:
+      return isSetInfoMessages();
+    case SQL_STATE:
+      return isSetSqlState();
+    case ERROR_CODE:
+      return isSetErrorCode();
+    case ERROR_MESSAGE:
+      return isSetErrorMessage();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TStatus)
+      return this.equals((TStatus)that);
+    return false;
+  }
+
+  public boolean equals(TStatus that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_statusCode = true && this.isSetStatusCode();
+    boolean that_present_statusCode = true && that.isSetStatusCode();
+    if (this_present_statusCode || that_present_statusCode) {
+      if (!(this_present_statusCode && that_present_statusCode))
+        return false;
+      if (!this.statusCode.equals(that.statusCode))
+        return false;
+    }
+
+    boolean this_present_infoMessages = true && this.isSetInfoMessages();
+    boolean that_present_infoMessages = true && that.isSetInfoMessages();
+    if (this_present_infoMessages || that_present_infoMessages) {
+      if (!(this_present_infoMessages && that_present_infoMessages))
+        return false;
+      if (!this.infoMessages.equals(that.infoMessages))
+        return false;
+    }
+
+    boolean this_present_sqlState = true && this.isSetSqlState();
+    boolean that_present_sqlState = true && that.isSetSqlState();
+    if (this_present_sqlState || that_present_sqlState) {
+      if (!(this_present_sqlState && that_present_sqlState))
+        return false;
+      if (!this.sqlState.equals(that.sqlState))
+        return false;
+    }
+
+    boolean this_present_errorCode = true && this.isSetErrorCode();
+    boolean that_present_errorCode = true && that.isSetErrorCode();
+    if (this_present_errorCode || that_present_errorCode) {
+      if (!(this_present_errorCode && that_present_errorCode))
+        return false;
+      if (this.errorCode != that.errorCode)
+        return false;
+    }
+
+    boolean this_present_errorMessage = true && this.isSetErrorMessage();
+    boolean that_present_errorMessage = true && that.isSetErrorMessage();
+    if (this_present_errorMessage || that_present_errorMessage) {
+      if (!(this_present_errorMessage && that_present_errorMessage))
+        return false;
+      if (!this.errorMessage.equals(that.errorMessage))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_statusCode = true && (isSetStatusCode());
+    builder.append(present_statusCode);
+    if (present_statusCode)
+      builder.append(statusCode.getValue());
+
+    boolean present_infoMessages = true && (isSetInfoMessages());
+    builder.append(present_infoMessages);
+    if (present_infoMessages)
+      builder.append(infoMessages);
+
+    boolean present_sqlState = true && (isSetSqlState());
+    builder.append(present_sqlState);
+    if (present_sqlState)
+      builder.append(sqlState);
+
+    boolean present_errorCode = true && (isSetErrorCode());
+    builder.append(present_errorCode);
+    if (present_errorCode)
+      builder.append(errorCode);
+
+    boolean present_errorMessage = true && (isSetErrorMessage());
+    builder.append(present_errorMessage);
+    if (present_errorMessage)
+      builder.append(errorMessage);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TStatus other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TStatus typedOther = (TStatus)other;
+
+    lastComparison = Boolean.valueOf(isSetStatusCode()).compareTo(typedOther.isSetStatusCode());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetStatusCode()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statusCode, typedOther.statusCode);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetInfoMessages()).compareTo(typedOther.isSetInfoMessages());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetInfoMessages()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoMessages, typedOther.infoMessages);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(typedOther.isSetSqlState());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetSqlState()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, typedOther.sqlState);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(typedOther.isSetErrorCode());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetErrorCode()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, typedOther.errorCode);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(typedOther.isSetErrorMessage());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetErrorMessage()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, typedOther.errorMessage);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TStatus(");
+    boolean first = true;
+
+    sb.append("statusCode:");
+    if (this.statusCode == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.statusCode);
+    }
+    first = false;
+    if (isSetInfoMessages()) {
+      if (!first) sb.append(", ");
+      sb.append("infoMessages:");
+      if (this.infoMessages == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.infoMessages);
+      }
+      first = false;
+    }
+    if (isSetSqlState()) {
+      if (!first) sb.append(", ");
+      sb.append("sqlState:");
+      if (this.sqlState == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.sqlState);
+      }
+      first = false;
+    }
+    if (isSetErrorCode()) {
+      if (!first) sb.append(", ");
+      sb.append("errorCode:");
+      sb.append(this.errorCode);
+      first = false;
+    }
+    if (isSetErrorMessage()) {
+      if (!first) sb.append(", ");
+      sb.append("errorMessage:");
+      if (this.errorMessage == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.errorMessage);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetStatusCode()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'statusCode' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TStatusStandardSchemeFactory implements SchemeFactory {
+    public TStatusStandardScheme getScheme() {
+      return new TStatusStandardScheme();
+    }
+  }
+
+  private static class TStatusStandardScheme extends StandardScheme<TStatus> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TStatus struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // STATUS_CODE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.statusCode = TStatusCode.findByValue(iprot.readI32());
+              struct.setStatusCodeIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // INFO_MESSAGES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list134 = iprot.readListBegin();
+                struct.infoMessages = new ArrayList<String>(_list134.size);
+                for (int _i135 = 0; _i135 < _list134.size; ++_i135)
+                {
+                  String _elem136; // optional
+                  _elem136 = iprot.readString();
+                  struct.infoMessages.add(_elem136);
+                }
+                iprot.readListEnd();
+              }
+              struct.setInfoMessagesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 3: // SQL_STATE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.sqlState = iprot.readString();
+              struct.setSqlStateIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 4: // ERROR_CODE
+            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
+              struct.errorCode = iprot.readI32();
+              struct.setErrorCodeIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 5: // ERROR_MESSAGE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.errorMessage = iprot.readString();
+              struct.setErrorMessageIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TStatus struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.statusCode != null) {
+        oprot.writeFieldBegin(STATUS_CODE_FIELD_DESC);
+        oprot.writeI32(struct.statusCode.getValue());
+        oprot.writeFieldEnd();
+      }
+      if (struct.infoMessages != null) {
+        if (struct.isSetInfoMessages()) {
+          oprot.writeFieldBegin(INFO_MESSAGES_FIELD_DESC);
+          {
+            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.infoMessages.size()));
+            for (String _iter137 : struct.infoMessages)
+            {
+              oprot.writeString(_iter137);
+            }
+            oprot.writeListEnd();
+          }
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.sqlState != null) {
+        if (struct.isSetSqlState()) {
+          oprot.writeFieldBegin(SQL_STATE_FIELD_DESC);
+          oprot.writeString(struct.sqlState);
+          oprot.writeFieldEnd();
+        }
+      }
+      if (struct.isSetErrorCode()) {
+        oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC);
+        oprot.writeI32(struct.errorCode);
+        oprot.writeFieldEnd();
+      }
+      if (struct.errorMessage != null) {
+        if (struct.isSetErrorMessage()) {
+          oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC);
+          oprot.writeString(struct.errorMessage);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TStatusTupleSchemeFactory implements SchemeFactory {
+    public TStatusTupleScheme getScheme() {
+      return new TStatusTupleScheme();
+    }
+  }
+
+  private static class TStatusTupleScheme extends TupleScheme<TStatus> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeI32(struct.statusCode.getValue());
+      BitSet optionals = new BitSet();
+      if (struct.isSetInfoMessages()) {
+        optionals.set(0);
+      }
+      if (struct.isSetSqlState()) {
+        optionals.set(1);
+      }
+      if (struct.isSetErrorCode()) {
+        optionals.set(2);
+      }
+      if (struct.isSetErrorMessage()) {
+        optionals.set(3);
+      }
+      oprot.writeBitSet(optionals, 4);
+      if (struct.isSetInfoMessages()) {
+        {
+          oprot.writeI32(struct.infoMessages.size());
+          for (String _iter138 : struct.infoMessages)
+          {
+            oprot.writeString(_iter138);
+          }
+        }
+      }
+      if (struct.isSetSqlState()) {
+        oprot.writeString(struct.sqlState);
+      }
+      if (struct.isSetErrorCode()) {
+        oprot.writeI32(struct.errorCode);
+      }
+      if (struct.isSetErrorMessage()) {
+        oprot.writeString(struct.errorMessage);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.statusCode = TStatusCode.findByValue(iprot.readI32());
+      struct.setStatusCodeIsSet(true);
+      BitSet incoming = iprot.readBitSet(4);
+      if (incoming.get(0)) {
+        {
+          org.apache.thrift.protocol.TList _list139 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+          struct.infoMessages = new ArrayList<String>(_list139.size);
+          for (int _i140 = 0; _i140 < _list139.size; ++_i140)
+          {
+            String _elem141; // optional
+            _elem141 = iprot.readString();
+            struct.infoMessages.add(_elem141);
+          }
+        }
+        struct.setInfoMessagesIsSet(true);
+      }
+      if (incoming.get(1)) {
+        struct.sqlState = iprot.readString();
+        struct.setSqlStateIsSet(true);
+      }
+      if (incoming.get(2)) {
+        struct.errorCode = iprot.readI32();
+        struct.setErrorCodeIsSet(true);
+      }
+      if (incoming.get(3)) {
+        struct.errorMessage = iprot.readString();
+        struct.setErrorMessageIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
new file mode 100644
index 000000000000..e7fde45fd131
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
@@ -0,0 +1,54 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.thrift.TEnum;
+
+public enum TStatusCode implements org.apache.thrift.TEnum {
+  SUCCESS_STATUS(0),
+  SUCCESS_WITH_INFO_STATUS(1),
+  STILL_EXECUTING_STATUS(2),
+  ERROR_STATUS(3),
+  INVALID_HANDLE_STATUS(4);
+
+  private final int value;
+
+  private TStatusCode(int value) {
+    this.value = value;
+  }
+
+  /**
+   * Get the integer value of this enum value, as defined in the Thrift IDL.
+   */
+  public int getValue() {
+    return value;
+  }
+
+  /**
+   * Find a the enum type by its integer value, as defined in the Thrift IDL.
+   * @return null if the value is not found.
+   */
+  public static TStatusCode findByValue(int value) { 
+    switch (value) {
+      case 0:
+        return SUCCESS_STATUS;
+      case 1:
+        return SUCCESS_WITH_INFO_STATUS;
+      case 2:
+        return STILL_EXECUTING_STATUS;
+      case 3:
+        return ERROR_STATUS;
+      case 4:
+        return INVALID_HANDLE_STATUS;
+      default:
+        return null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
new file mode 100644
index 000000000000..3dae460c8621
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
@@ -0,0 +1,548 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TStringColumn implements org.apache.thrift.TBase<TStringColumn, TStringColumn._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringColumn");
+
+  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TStringColumnStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TStringColumnTupleSchemeFactory());
+  }
+
+  private List<String> values; // required
+  private ByteBuffer nulls; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUES((short)1, "values"),
+    NULLS((short)2, "nulls");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUES
+          return VALUES;
+        case 2: // NULLS
+          return NULLS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
+    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringColumn.class, metaDataMap);
+  }
+
+  public TStringColumn() {
+  }
+
+  public TStringColumn(
+    List<String> values,
+    ByteBuffer nulls)
+  {
+    this();
+    this.values = values;
+    this.nulls = nulls;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TStringColumn(TStringColumn other) {
+    if (other.isSetValues()) {
+      List<String> __this__values = new ArrayList<String>();
+      for (String other_element : other.values) {
+        __this__values.add(other_element);
+      }
+      this.values = __this__values;
+    }
+    if (other.isSetNulls()) {
+      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
+;
+    }
+  }
+
+  public TStringColumn deepCopy() {
+    return new TStringColumn(this);
+  }
+
+  @Override
+  public void clear() {
+    this.values = null;
+    this.nulls = null;
+  }
+
+  public int getValuesSize() {
+    return (this.values == null) ? 0 : this.values.size();
+  }
+
+  public java.util.Iterator<String> getValuesIterator() {
+    return (this.values == null) ? null : this.values.iterator();
+  }
+
+  public void addToValues(String elem) {
+    if (this.values == null) {
+      this.values = new ArrayList<String>();
+    }
+    this.values.add(elem);
+  }
+
+  public List<String> getValues() {
+    return this.values;
+  }
+
+  public void setValues(List<String> values) {
+    this.values = values;
+  }
+
+  public void unsetValues() {
+    this.values = null;
+  }
+
+  /** Returns true if field values is set (has been assigned a value) and false otherwise */
+  public boolean isSetValues() {
+    return this.values != null;
+  }
+
+  public void setValuesIsSet(boolean value) {
+    if (!value) {
+      this.values = null;
+    }
+  }
+
+  public byte[] getNulls() {
+    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
+    return nulls == null ? null : nulls.array();
+  }
+
+  public ByteBuffer bufferForNulls() {
+    return nulls;
+  }
+
+  public void setNulls(byte[] nulls) {
+    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
+  }
+
+  public void setNulls(ByteBuffer nulls) {
+    this.nulls = nulls;
+  }
+
+  public void unsetNulls() {
+    this.nulls = null;
+  }
+
+  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
+  public boolean isSetNulls() {
+    return this.nulls != null;
+  }
+
+  public void setNullsIsSet(boolean value) {
+    if (!value) {
+      this.nulls = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUES:
+      if (value == null) {
+        unsetValues();
+      } else {
+        setValues((List<String>)value);
+      }
+      break;
+
+    case NULLS:
+      if (value == null) {
+        unsetNulls();
+      } else {
+        setNulls((ByteBuffer)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUES:
+      return getValues();
+
+    case NULLS:
+      return getNulls();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUES:
+      return isSetValues();
+    case NULLS:
+      return isSetNulls();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TStringColumn)
+      return this.equals((TStringColumn)that);
+    return false;
+  }
+
+  public boolean equals(TStringColumn that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_values = true && this.isSetValues();
+    boolean that_present_values = true && that.isSetValues();
+    if (this_present_values || that_present_values) {
+      if (!(this_present_values && that_present_values))
+        return false;
+      if (!this.values.equals(that.values))
+        return false;
+    }
+
+    boolean this_present_nulls = true && this.isSetNulls();
+    boolean that_present_nulls = true && that.isSetNulls();
+    if (this_present_nulls || that_present_nulls) {
+      if (!(this_present_nulls && that_present_nulls))
+        return false;
+      if (!this.nulls.equals(that.nulls))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_values = true && (isSetValues());
+    builder.append(present_values);
+    if (present_values)
+      builder.append(values);
+
+    boolean present_nulls = true && (isSetNulls());
+    builder.append(present_nulls);
+    if (present_nulls)
+      builder.append(nulls);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TStringColumn other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TStringColumn typedOther = (TStringColumn)other;
+
+    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValues()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNulls()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TStringColumn(");
+    boolean first = true;
+
+    sb.append("values:");
+    if (this.values == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.values);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("nulls:");
+    if (this.nulls == null) {
+      sb.append("null");
+    } else {
+      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetValues()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
+    }
+
+    if (!isSetNulls()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TStringColumnStandardSchemeFactory implements SchemeFactory {
+    public TStringColumnStandardScheme getScheme() {
+      return new TStringColumnStandardScheme();
+    }
+  }
+
+  private static class TStringColumnStandardScheme extends StandardScheme<TStringColumn> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TStringColumn struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list102 = iprot.readListBegin();
+                struct.values = new ArrayList<String>(_list102.size);
+                for (int _i103 = 0; _i103 < _list102.size; ++_i103)
+                {
+                  String _elem104; // optional
+                  _elem104 = iprot.readString();
+                  struct.values.add(_elem104);
+                }
+                iprot.readListEnd();
+              }
+              struct.setValuesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // NULLS
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.nulls = iprot.readBinary();
+              struct.setNullsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TStringColumn struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.values != null) {
+        oprot.writeFieldBegin(VALUES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size()));
+          for (String _iter105 : struct.values)
+          {
+            oprot.writeString(_iter105);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      if (struct.nulls != null) {
+        oprot.writeFieldBegin(NULLS_FIELD_DESC);
+        oprot.writeBinary(struct.nulls);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TStringColumnTupleSchemeFactory implements SchemeFactory {
+    public TStringColumnTupleScheme getScheme() {
+      return new TStringColumnTupleScheme();
+    }
+  }
+
+  private static class TStringColumnTupleScheme extends TupleScheme<TStringColumn> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.values.size());
+        for (String _iter106 : struct.values)
+        {
+          oprot.writeString(_iter106);
+        }
+      }
+      oprot.writeBinary(struct.nulls);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list107 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+        struct.values = new ArrayList<String>(_list107.size);
+        for (int _i108 = 0; _i108 < _list107.size; ++_i108)
+        {
+          String _elem109; // optional
+          _elem109 = iprot.readString();
+          struct.values.add(_elem109);
+        }
+      }
+      struct.setValuesIsSet(true);
+      struct.nulls = iprot.readBinary();
+      struct.setNullsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
new file mode 100644
index 000000000000..af7a109775a8
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
@@ -0,0 +1,389 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TStringValue implements org.apache.thrift.TBase<TStringValue, TStringValue._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringValue");
+
+  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.STRING, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TStringValueStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TStringValueTupleSchemeFactory());
+  }
+
+  private String value; // optional
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    VALUE((short)1, "value");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // VALUE
+          return VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  private _Fields optionals[] = {_Fields.VALUE};
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringValue.class, metaDataMap);
+  }
+
+  public TStringValue() {
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TStringValue(TStringValue other) {
+    if (other.isSetValue()) {
+      this.value = other.value;
+    }
+  }
+
+  public TStringValue deepCopy() {
+    return new TStringValue(this);
+  }
+
+  @Override
+  public void clear() {
+    this.value = null;
+  }
+
+  public String getValue() {
+    return this.value;
+  }
+
+  public void setValue(String value) {
+    this.value = value;
+  }
+
+  public void unsetValue() {
+    this.value = null;
+  }
+
+  /** Returns true if field value is set (has been assigned a value) and false otherwise */
+  public boolean isSetValue() {
+    return this.value != null;
+  }
+
+  public void setValueIsSet(boolean value) {
+    if (!value) {
+      this.value = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case VALUE:
+      if (value == null) {
+        unsetValue();
+      } else {
+        setValue((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case VALUE:
+      return getValue();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case VALUE:
+      return isSetValue();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TStringValue)
+      return this.equals((TStringValue)that);
+    return false;
+  }
+
+  public boolean equals(TStringValue that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_value = true && this.isSetValue();
+    boolean that_present_value = true && that.isSetValue();
+    if (this_present_value || that_present_value) {
+      if (!(this_present_value && that_present_value))
+        return false;
+      if (!this.value.equals(that.value))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_value = true && (isSetValue());
+    builder.append(present_value);
+    if (present_value)
+      builder.append(value);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TStringValue other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TStringValue typedOther = (TStringValue)other;
+
+    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetValue()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TStringValue(");
+    boolean first = true;
+
+    if (isSetValue()) {
+      sb.append("value:");
+      if (this.value == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.value);
+      }
+      first = false;
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TStringValueStandardSchemeFactory implements SchemeFactory {
+    public TStringValueStandardScheme getScheme() {
+      return new TStringValueStandardScheme();
+    }
+  }
+
+  private static class TStringValueStandardScheme extends StandardScheme<TStringValue> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TStringValue struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // VALUE
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.value = iprot.readString();
+              struct.setValueIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TStringValue struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.value != null) {
+        if (struct.isSetValue()) {
+          oprot.writeFieldBegin(VALUE_FIELD_DESC);
+          oprot.writeString(struct.value);
+          oprot.writeFieldEnd();
+        }
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TStringValueTupleSchemeFactory implements SchemeFactory {
+    public TStringValueTupleScheme getScheme() {
+      return new TStringValueTupleScheme();
+    }
+  }
+
+  private static class TStringValueTupleScheme extends TupleScheme<TStringValue> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetValue()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetValue()) {
+        oprot.writeString(struct.value);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.value = iprot.readString();
+        struct.setValueIsSet(true);
+      }
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
new file mode 100644
index 000000000000..20f5fb6c2907
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
@@ -0,0 +1,448 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TStructTypeEntry implements org.apache.thrift.TBase<TStructTypeEntry, TStructTypeEntry._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStructTypeEntry");
+
+  private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TStructTypeEntryStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TStructTypeEntryTupleSchemeFactory());
+  }
+
+  private Map<String,Integer> nameToTypePtr; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    NAME_TO_TYPE_PTR((short)1, "nameToTypePtr");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // NAME_TO_TYPE_PTR
+          return NAME_TO_TYPE_PTR;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32            , "TTypeEntryPtr"))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStructTypeEntry.class, metaDataMap);
+  }
+
+  public TStructTypeEntry() {
+  }
+
+  public TStructTypeEntry(
+    Map<String,Integer> nameToTypePtr)
+  {
+    this();
+    this.nameToTypePtr = nameToTypePtr;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TStructTypeEntry(TStructTypeEntry other) {
+    if (other.isSetNameToTypePtr()) {
+      Map<String,Integer> __this__nameToTypePtr = new HashMap<String,Integer>();
+      for (Map.Entry<String, Integer> other_element : other.nameToTypePtr.entrySet()) {
+
+        String other_element_key = other_element.getKey();
+        Integer other_element_value = other_element.getValue();
+
+        String __this__nameToTypePtr_copy_key = other_element_key;
+
+        Integer __this__nameToTypePtr_copy_value = other_element_value;
+
+        __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value);
+      }
+      this.nameToTypePtr = __this__nameToTypePtr;
+    }
+  }
+
+  public TStructTypeEntry deepCopy() {
+    return new TStructTypeEntry(this);
+  }
+
+  @Override
+  public void clear() {
+    this.nameToTypePtr = null;
+  }
+
+  public int getNameToTypePtrSize() {
+    return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size();
+  }
+
+  public void putToNameToTypePtr(String key, int val) {
+    if (this.nameToTypePtr == null) {
+      this.nameToTypePtr = new HashMap<String,Integer>();
+    }
+    this.nameToTypePtr.put(key, val);
+  }
+
+  public Map<String,Integer> getNameToTypePtr() {
+    return this.nameToTypePtr;
+  }
+
+  public void setNameToTypePtr(Map<String,Integer> nameToTypePtr) {
+    this.nameToTypePtr = nameToTypePtr;
+  }
+
+  public void unsetNameToTypePtr() {
+    this.nameToTypePtr = null;
+  }
+
+  /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */
+  public boolean isSetNameToTypePtr() {
+    return this.nameToTypePtr != null;
+  }
+
+  public void setNameToTypePtrIsSet(boolean value) {
+    if (!value) {
+      this.nameToTypePtr = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case NAME_TO_TYPE_PTR:
+      if (value == null) {
+        unsetNameToTypePtr();
+      } else {
+        setNameToTypePtr((Map<String,Integer>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case NAME_TO_TYPE_PTR:
+      return getNameToTypePtr();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case NAME_TO_TYPE_PTR:
+      return isSetNameToTypePtr();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TStructTypeEntry)
+      return this.equals((TStructTypeEntry)that);
+    return false;
+  }
+
+  public boolean equals(TStructTypeEntry that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr();
+    boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr();
+    if (this_present_nameToTypePtr || that_present_nameToTypePtr) {
+      if (!(this_present_nameToTypePtr && that_present_nameToTypePtr))
+        return false;
+      if (!this.nameToTypePtr.equals(that.nameToTypePtr))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_nameToTypePtr = true && (isSetNameToTypePtr());
+    builder.append(present_nameToTypePtr);
+    if (present_nameToTypePtr)
+      builder.append(nameToTypePtr);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TStructTypeEntry other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TStructTypeEntry typedOther = (TStructTypeEntry)other;
+
+    lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(typedOther.isSetNameToTypePtr());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNameToTypePtr()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, typedOther.nameToTypePtr);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TStructTypeEntry(");
+    boolean first = true;
+
+    sb.append("nameToTypePtr:");
+    if (this.nameToTypePtr == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.nameToTypePtr);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetNameToTypePtr()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TStructTypeEntryStandardSchemeFactory implements SchemeFactory {
+    public TStructTypeEntryStandardScheme getScheme() {
+      return new TStructTypeEntryStandardScheme();
+    }
+  }
+
+  private static class TStructTypeEntryStandardScheme extends StandardScheme<TStructTypeEntry> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TStructTypeEntry struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // NAME_TO_TYPE_PTR
+            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
+              {
+                org.apache.thrift.protocol.TMap _map10 = iprot.readMapBegin();
+                struct.nameToTypePtr = new HashMap<String,Integer>(2*_map10.size);
+                for (int _i11 = 0; _i11 < _map10.size; ++_i11)
+                {
+                  String _key12; // required
+                  int _val13; // required
+                  _key12 = iprot.readString();
+                  _val13 = iprot.readI32();
+                  struct.nameToTypePtr.put(_key12, _val13);
+                }
+                iprot.readMapEnd();
+              }
+              struct.setNameToTypePtrIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TStructTypeEntry struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.nameToTypePtr != null) {
+        oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC);
+        {
+          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size()));
+          for (Map.Entry<String, Integer> _iter14 : struct.nameToTypePtr.entrySet())
+          {
+            oprot.writeString(_iter14.getKey());
+            oprot.writeI32(_iter14.getValue());
+          }
+          oprot.writeMapEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TStructTypeEntryTupleSchemeFactory implements SchemeFactory {
+    public TStructTypeEntryTupleScheme getScheme() {
+      return new TStructTypeEntryTupleScheme();
+    }
+  }
+
+  private static class TStructTypeEntryTupleScheme extends TupleScheme<TStructTypeEntry> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.nameToTypePtr.size());
+        for (Map.Entry<String, Integer> _iter15 : struct.nameToTypePtr.entrySet())
+        {
+          oprot.writeString(_iter15.getKey());
+          oprot.writeI32(_iter15.getValue());
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TMap _map16 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32());
+        struct.nameToTypePtr = new HashMap<String,Integer>(2*_map16.size);
+        for (int _i17 = 0; _i17 < _map16.size; ++_i17)
+        {
+          String _key18; // required
+          int _val19; // required
+          _key18 = iprot.readString();
+          _val19 = iprot.readI32();
+          struct.nameToTypePtr.put(_key18, _val19);
+        }
+      }
+      struct.setNameToTypePtrIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
new file mode 100644
index 000000000000..ff5e54db7c16
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
@@ -0,0 +1,439 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TTableSchema implements org.apache.thrift.TBase<TTableSchema, TTableSchema._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTableSchema");
+
+  private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TTableSchemaStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TTableSchemaTupleSchemeFactory());
+  }
+
+  private List<TColumnDesc> columns; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    COLUMNS((short)1, "columns");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // COLUMNS
+          return COLUMNS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnDesc.class))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTableSchema.class, metaDataMap);
+  }
+
+  public TTableSchema() {
+  }
+
+  public TTableSchema(
+    List<TColumnDesc> columns)
+  {
+    this();
+    this.columns = columns;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TTableSchema(TTableSchema other) {
+    if (other.isSetColumns()) {
+      List<TColumnDesc> __this__columns = new ArrayList<TColumnDesc>();
+      for (TColumnDesc other_element : other.columns) {
+        __this__columns.add(new TColumnDesc(other_element));
+      }
+      this.columns = __this__columns;
+    }
+  }
+
+  public TTableSchema deepCopy() {
+    return new TTableSchema(this);
+  }
+
+  @Override
+  public void clear() {
+    this.columns = null;
+  }
+
+  public int getColumnsSize() {
+    return (this.columns == null) ? 0 : this.columns.size();
+  }
+
+  public java.util.Iterator<TColumnDesc> getColumnsIterator() {
+    return (this.columns == null) ? null : this.columns.iterator();
+  }
+
+  public void addToColumns(TColumnDesc elem) {
+    if (this.columns == null) {
+      this.columns = new ArrayList<TColumnDesc>();
+    }
+    this.columns.add(elem);
+  }
+
+  public List<TColumnDesc> getColumns() {
+    return this.columns;
+  }
+
+  public void setColumns(List<TColumnDesc> columns) {
+    this.columns = columns;
+  }
+
+  public void unsetColumns() {
+    this.columns = null;
+  }
+
+  /** Returns true if field columns is set (has been assigned a value) and false otherwise */
+  public boolean isSetColumns() {
+    return this.columns != null;
+  }
+
+  public void setColumnsIsSet(boolean value) {
+    if (!value) {
+      this.columns = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case COLUMNS:
+      if (value == null) {
+        unsetColumns();
+      } else {
+        setColumns((List<TColumnDesc>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case COLUMNS:
+      return getColumns();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case COLUMNS:
+      return isSetColumns();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TTableSchema)
+      return this.equals((TTableSchema)that);
+    return false;
+  }
+
+  public boolean equals(TTableSchema that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_columns = true && this.isSetColumns();
+    boolean that_present_columns = true && that.isSetColumns();
+    if (this_present_columns || that_present_columns) {
+      if (!(this_present_columns && that_present_columns))
+        return false;
+      if (!this.columns.equals(that.columns))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_columns = true && (isSetColumns());
+    builder.append(present_columns);
+    if (present_columns)
+      builder.append(columns);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TTableSchema other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TTableSchema typedOther = (TTableSchema)other;
+
+    lastComparison = Boolean.valueOf(isSetColumns()).compareTo(typedOther.isSetColumns());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetColumns()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, typedOther.columns);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TTableSchema(");
+    boolean first = true;
+
+    sb.append("columns:");
+    if (this.columns == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.columns);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetColumns()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'columns' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TTableSchemaStandardSchemeFactory implements SchemeFactory {
+    public TTableSchemaStandardScheme getScheme() {
+      return new TTableSchemaStandardScheme();
+    }
+  }
+
+  private static class TTableSchemaStandardScheme extends StandardScheme<TTableSchema> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TTableSchema struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // COLUMNS
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list38 = iprot.readListBegin();
+                struct.columns = new ArrayList<TColumnDesc>(_list38.size);
+                for (int _i39 = 0; _i39 < _list38.size; ++_i39)
+                {
+                  TColumnDesc _elem40; // optional
+                  _elem40 = new TColumnDesc();
+                  _elem40.read(iprot);
+                  struct.columns.add(_elem40);
+                }
+                iprot.readListEnd();
+              }
+              struct.setColumnsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TTableSchema struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.columns != null) {
+        oprot.writeFieldBegin(COLUMNS_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size()));
+          for (TColumnDesc _iter41 : struct.columns)
+          {
+            _iter41.write(oprot);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TTableSchemaTupleSchemeFactory implements SchemeFactory {
+    public TTableSchemaTupleScheme getScheme() {
+      return new TTableSchemaTupleScheme();
+    }
+  }
+
+  private static class TTableSchemaTupleScheme extends TupleScheme<TTableSchema> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.columns.size());
+        for (TColumnDesc _iter42 : struct.columns)
+        {
+          _iter42.write(oprot);
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list43 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
+        struct.columns = new ArrayList<TColumnDesc>(_list43.size);
+        for (int _i44 = 0; _i44 < _list43.size; ++_i44)
+        {
+          TColumnDesc _elem45; // optional
+          _elem45 = new TColumnDesc();
+          _elem45.read(iprot);
+          struct.columns.add(_elem45);
+        }
+      }
+      struct.setColumnsIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
new file mode 100644
index 000000000000..251f86a91471
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
@@ -0,0 +1,439 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TTypeDesc implements org.apache.thrift.TBase<TTypeDesc, TTypeDesc._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeDesc");
+
+  private static final org.apache.thrift.protocol.TField TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("types", org.apache.thrift.protocol.TType.LIST, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TTypeDescStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TTypeDescTupleSchemeFactory());
+  }
+
+  private List<TTypeEntry> types; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    TYPES((short)1, "types");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // TYPES
+          return TYPES;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.TYPES, new org.apache.thrift.meta_data.FieldMetaData("types", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
+            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeEntry.class))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeDesc.class, metaDataMap);
+  }
+
+  public TTypeDesc() {
+  }
+
+  public TTypeDesc(
+    List<TTypeEntry> types)
+  {
+    this();
+    this.types = types;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TTypeDesc(TTypeDesc other) {
+    if (other.isSetTypes()) {
+      List<TTypeEntry> __this__types = new ArrayList<TTypeEntry>();
+      for (TTypeEntry other_element : other.types) {
+        __this__types.add(new TTypeEntry(other_element));
+      }
+      this.types = __this__types;
+    }
+  }
+
+  public TTypeDesc deepCopy() {
+    return new TTypeDesc(this);
+  }
+
+  @Override
+  public void clear() {
+    this.types = null;
+  }
+
+  public int getTypesSize() {
+    return (this.types == null) ? 0 : this.types.size();
+  }
+
+  public java.util.Iterator<TTypeEntry> getTypesIterator() {
+    return (this.types == null) ? null : this.types.iterator();
+  }
+
+  public void addToTypes(TTypeEntry elem) {
+    if (this.types == null) {
+      this.types = new ArrayList<TTypeEntry>();
+    }
+    this.types.add(elem);
+  }
+
+  public List<TTypeEntry> getTypes() {
+    return this.types;
+  }
+
+  public void setTypes(List<TTypeEntry> types) {
+    this.types = types;
+  }
+
+  public void unsetTypes() {
+    this.types = null;
+  }
+
+  /** Returns true if field types is set (has been assigned a value) and false otherwise */
+  public boolean isSetTypes() {
+    return this.types != null;
+  }
+
+  public void setTypesIsSet(boolean value) {
+    if (!value) {
+      this.types = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case TYPES:
+      if (value == null) {
+        unsetTypes();
+      } else {
+        setTypes((List<TTypeEntry>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case TYPES:
+      return getTypes();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case TYPES:
+      return isSetTypes();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TTypeDesc)
+      return this.equals((TTypeDesc)that);
+    return false;
+  }
+
+  public boolean equals(TTypeDesc that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_types = true && this.isSetTypes();
+    boolean that_present_types = true && that.isSetTypes();
+    if (this_present_types || that_present_types) {
+      if (!(this_present_types && that_present_types))
+        return false;
+      if (!this.types.equals(that.types))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_types = true && (isSetTypes());
+    builder.append(present_types);
+    if (present_types)
+      builder.append(types);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TTypeDesc other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TTypeDesc typedOther = (TTypeDesc)other;
+
+    lastComparison = Boolean.valueOf(isSetTypes()).compareTo(typedOther.isSetTypes());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetTypes()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.types, typedOther.types);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TTypeDesc(");
+    boolean first = true;
+
+    sb.append("types:");
+    if (this.types == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.types);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetTypes()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'types' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TTypeDescStandardSchemeFactory implements SchemeFactory {
+    public TTypeDescStandardScheme getScheme() {
+      return new TTypeDescStandardScheme();
+    }
+  }
+
+  private static class TTypeDescStandardScheme extends StandardScheme<TTypeDesc> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeDesc struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // TYPES
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
+              {
+                org.apache.thrift.protocol.TList _list30 = iprot.readListBegin();
+                struct.types = new ArrayList<TTypeEntry>(_list30.size);
+                for (int _i31 = 0; _i31 < _list30.size; ++_i31)
+                {
+                  TTypeEntry _elem32; // optional
+                  _elem32 = new TTypeEntry();
+                  _elem32.read(iprot);
+                  struct.types.add(_elem32);
+                }
+                iprot.readListEnd();
+              }
+              struct.setTypesIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeDesc struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.types != null) {
+        oprot.writeFieldBegin(TYPES_FIELD_DESC);
+        {
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.types.size()));
+          for (TTypeEntry _iter33 : struct.types)
+          {
+            _iter33.write(oprot);
+          }
+          oprot.writeListEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TTypeDescTupleSchemeFactory implements SchemeFactory {
+    public TTypeDescTupleScheme getScheme() {
+      return new TTypeDescTupleScheme();
+    }
+  }
+
+  private static class TTypeDescTupleScheme extends TupleScheme<TTypeDesc> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.types.size());
+        for (TTypeEntry _iter34 : struct.types)
+        {
+          _iter34.write(oprot);
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TList _list35 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
+        struct.types = new ArrayList<TTypeEntry>(_list35.size);
+        for (int _i36 = 0; _i36 < _list35.size; ++_i36)
+        {
+          TTypeEntry _elem37; // optional
+          _elem37 = new TTypeEntry();
+          _elem37.read(iprot);
+          struct.types.add(_elem37);
+        }
+      }
+      struct.setTypesIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
new file mode 100644
index 000000000000..d0d70c127957
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
@@ -0,0 +1,610 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TTypeEntry extends org.apache.thrift.TUnion<TTypeEntry, TTypeEntry._Fields> {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeEntry");
+  private static final org.apache.thrift.protocol.TField PRIMITIVE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("primitiveEntry", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+  private static final org.apache.thrift.protocol.TField ARRAY_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("arrayEntry", org.apache.thrift.protocol.TType.STRUCT, (short)2);
+  private static final org.apache.thrift.protocol.TField MAP_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("mapEntry", org.apache.thrift.protocol.TType.STRUCT, (short)3);
+  private static final org.apache.thrift.protocol.TField STRUCT_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("structEntry", org.apache.thrift.protocol.TType.STRUCT, (short)4);
+  private static final org.apache.thrift.protocol.TField UNION_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("unionEntry", org.apache.thrift.protocol.TType.STRUCT, (short)5);
+  private static final org.apache.thrift.protocol.TField USER_DEFINED_TYPE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("userDefinedTypeEntry", org.apache.thrift.protocol.TType.STRUCT, (short)6);
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    PRIMITIVE_ENTRY((short)1, "primitiveEntry"),
+    ARRAY_ENTRY((short)2, "arrayEntry"),
+    MAP_ENTRY((short)3, "mapEntry"),
+    STRUCT_ENTRY((short)4, "structEntry"),
+    UNION_ENTRY((short)5, "unionEntry"),
+    USER_DEFINED_TYPE_ENTRY((short)6, "userDefinedTypeEntry");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // PRIMITIVE_ENTRY
+          return PRIMITIVE_ENTRY;
+        case 2: // ARRAY_ENTRY
+          return ARRAY_ENTRY;
+        case 3: // MAP_ENTRY
+          return MAP_ENTRY;
+        case 4: // STRUCT_ENTRY
+          return STRUCT_ENTRY;
+        case 5: // UNION_ENTRY
+          return UNION_ENTRY;
+        case 6: // USER_DEFINED_TYPE_ENTRY
+          return USER_DEFINED_TYPE_ENTRY;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.PRIMITIVE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("primitiveEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TPrimitiveTypeEntry.class)));
+    tmpMap.put(_Fields.ARRAY_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("arrayEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TArrayTypeEntry.class)));
+    tmpMap.put(_Fields.MAP_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("mapEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TMapTypeEntry.class)));
+    tmpMap.put(_Fields.STRUCT_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("structEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStructTypeEntry.class)));
+    tmpMap.put(_Fields.UNION_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("unionEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUnionTypeEntry.class)));
+    tmpMap.put(_Fields.USER_DEFINED_TYPE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("userDefinedTypeEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUserDefinedTypeEntry.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeEntry.class, metaDataMap);
+  }
+
+  public TTypeEntry() {
+    super();
+  }
+
+  public TTypeEntry(TTypeEntry._Fields setField, Object value) {
+    super(setField, value);
+  }
+
+  public TTypeEntry(TTypeEntry other) {
+    super(other);
+  }
+  public TTypeEntry deepCopy() {
+    return new TTypeEntry(this);
+  }
+
+  public static TTypeEntry primitiveEntry(TPrimitiveTypeEntry value) {
+    TTypeEntry x = new TTypeEntry();
+    x.setPrimitiveEntry(value);
+    return x;
+  }
+
+  public static TTypeEntry arrayEntry(TArrayTypeEntry value) {
+    TTypeEntry x = new TTypeEntry();
+    x.setArrayEntry(value);
+    return x;
+  }
+
+  public static TTypeEntry mapEntry(TMapTypeEntry value) {
+    TTypeEntry x = new TTypeEntry();
+    x.setMapEntry(value);
+    return x;
+  }
+
+  public static TTypeEntry structEntry(TStructTypeEntry value) {
+    TTypeEntry x = new TTypeEntry();
+    x.setStructEntry(value);
+    return x;
+  }
+
+  public static TTypeEntry unionEntry(TUnionTypeEntry value) {
+    TTypeEntry x = new TTypeEntry();
+    x.setUnionEntry(value);
+    return x;
+  }
+
+  public static TTypeEntry userDefinedTypeEntry(TUserDefinedTypeEntry value) {
+    TTypeEntry x = new TTypeEntry();
+    x.setUserDefinedTypeEntry(value);
+    return x;
+  }
+
+
+  @Override
+  protected void checkType(_Fields setField, Object value) throws ClassCastException {
+    switch (setField) {
+      case PRIMITIVE_ENTRY:
+        if (value instanceof TPrimitiveTypeEntry) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TPrimitiveTypeEntry for field 'primitiveEntry', but got " + value.getClass().getSimpleName());
+      case ARRAY_ENTRY:
+        if (value instanceof TArrayTypeEntry) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TArrayTypeEntry for field 'arrayEntry', but got " + value.getClass().getSimpleName());
+      case MAP_ENTRY:
+        if (value instanceof TMapTypeEntry) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TMapTypeEntry for field 'mapEntry', but got " + value.getClass().getSimpleName());
+      case STRUCT_ENTRY:
+        if (value instanceof TStructTypeEntry) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TStructTypeEntry for field 'structEntry', but got " + value.getClass().getSimpleName());
+      case UNION_ENTRY:
+        if (value instanceof TUnionTypeEntry) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TUnionTypeEntry for field 'unionEntry', but got " + value.getClass().getSimpleName());
+      case USER_DEFINED_TYPE_ENTRY:
+        if (value instanceof TUserDefinedTypeEntry) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type TUserDefinedTypeEntry for field 'userDefinedTypeEntry', but got " + value.getClass().getSimpleName());
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(field.id);
+    if (setField != null) {
+      switch (setField) {
+        case PRIMITIVE_ENTRY:
+          if (field.type == PRIMITIVE_ENTRY_FIELD_DESC.type) {
+            TPrimitiveTypeEntry primitiveEntry;
+            primitiveEntry = new TPrimitiveTypeEntry();
+            primitiveEntry.read(iprot);
+            return primitiveEntry;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case ARRAY_ENTRY:
+          if (field.type == ARRAY_ENTRY_FIELD_DESC.type) {
+            TArrayTypeEntry arrayEntry;
+            arrayEntry = new TArrayTypeEntry();
+            arrayEntry.read(iprot);
+            return arrayEntry;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case MAP_ENTRY:
+          if (field.type == MAP_ENTRY_FIELD_DESC.type) {
+            TMapTypeEntry mapEntry;
+            mapEntry = new TMapTypeEntry();
+            mapEntry.read(iprot);
+            return mapEntry;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case STRUCT_ENTRY:
+          if (field.type == STRUCT_ENTRY_FIELD_DESC.type) {
+            TStructTypeEntry structEntry;
+            structEntry = new TStructTypeEntry();
+            structEntry.read(iprot);
+            return structEntry;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case UNION_ENTRY:
+          if (field.type == UNION_ENTRY_FIELD_DESC.type) {
+            TUnionTypeEntry unionEntry;
+            unionEntry = new TUnionTypeEntry();
+            unionEntry.read(iprot);
+            return unionEntry;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case USER_DEFINED_TYPE_ENTRY:
+          if (field.type == USER_DEFINED_TYPE_ENTRY_FIELD_DESC.type) {
+            TUserDefinedTypeEntry userDefinedTypeEntry;
+            userDefinedTypeEntry = new TUserDefinedTypeEntry();
+            userDefinedTypeEntry.read(iprot);
+            return userDefinedTypeEntry;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case PRIMITIVE_ENTRY:
+        TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_;
+        primitiveEntry.write(oprot);
+        return;
+      case ARRAY_ENTRY:
+        TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_;
+        arrayEntry.write(oprot);
+        return;
+      case MAP_ENTRY:
+        TMapTypeEntry mapEntry = (TMapTypeEntry)value_;
+        mapEntry.write(oprot);
+        return;
+      case STRUCT_ENTRY:
+        TStructTypeEntry structEntry = (TStructTypeEntry)value_;
+        structEntry.write(oprot);
+        return;
+      case UNION_ENTRY:
+        TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_;
+        unionEntry.write(oprot);
+        return;
+      case USER_DEFINED_TYPE_ENTRY:
+        TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_;
+        userDefinedTypeEntry.write(oprot);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(fieldID);
+    if (setField != null) {
+      switch (setField) {
+        case PRIMITIVE_ENTRY:
+          TPrimitiveTypeEntry primitiveEntry;
+          primitiveEntry = new TPrimitiveTypeEntry();
+          primitiveEntry.read(iprot);
+          return primitiveEntry;
+        case ARRAY_ENTRY:
+          TArrayTypeEntry arrayEntry;
+          arrayEntry = new TArrayTypeEntry();
+          arrayEntry.read(iprot);
+          return arrayEntry;
+        case MAP_ENTRY:
+          TMapTypeEntry mapEntry;
+          mapEntry = new TMapTypeEntry();
+          mapEntry.read(iprot);
+          return mapEntry;
+        case STRUCT_ENTRY:
+          TStructTypeEntry structEntry;
+          structEntry = new TStructTypeEntry();
+          structEntry.read(iprot);
+          return structEntry;
+        case UNION_ENTRY:
+          TUnionTypeEntry unionEntry;
+          unionEntry = new TUnionTypeEntry();
+          unionEntry.read(iprot);
+          return unionEntry;
+        case USER_DEFINED_TYPE_ENTRY:
+          TUserDefinedTypeEntry userDefinedTypeEntry;
+          userDefinedTypeEntry = new TUserDefinedTypeEntry();
+          userDefinedTypeEntry.read(iprot);
+          return userDefinedTypeEntry;
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
+    }
+  }
+
+  @Override
+  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case PRIMITIVE_ENTRY:
+        TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_;
+        primitiveEntry.write(oprot);
+        return;
+      case ARRAY_ENTRY:
+        TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_;
+        arrayEntry.write(oprot);
+        return;
+      case MAP_ENTRY:
+        TMapTypeEntry mapEntry = (TMapTypeEntry)value_;
+        mapEntry.write(oprot);
+        return;
+      case STRUCT_ENTRY:
+        TStructTypeEntry structEntry = (TStructTypeEntry)value_;
+        structEntry.write(oprot);
+        return;
+      case UNION_ENTRY:
+        TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_;
+        unionEntry.write(oprot);
+        return;
+      case USER_DEFINED_TYPE_ENTRY:
+        TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_;
+        userDefinedTypeEntry.write(oprot);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
+    switch (setField) {
+      case PRIMITIVE_ENTRY:
+        return PRIMITIVE_ENTRY_FIELD_DESC;
+      case ARRAY_ENTRY:
+        return ARRAY_ENTRY_FIELD_DESC;
+      case MAP_ENTRY:
+        return MAP_ENTRY_FIELD_DESC;
+      case STRUCT_ENTRY:
+        return STRUCT_ENTRY_FIELD_DESC;
+      case UNION_ENTRY:
+        return UNION_ENTRY_FIELD_DESC;
+      case USER_DEFINED_TYPE_ENTRY:
+        return USER_DEFINED_TYPE_ENTRY_FIELD_DESC;
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TStruct getStructDesc() {
+    return STRUCT_DESC;
+  }
+
+  @Override
+  protected _Fields enumForId(short id) {
+    return _Fields.findByThriftIdOrThrow(id);
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+
+  public TPrimitiveTypeEntry getPrimitiveEntry() {
+    if (getSetField() == _Fields.PRIMITIVE_ENTRY) {
+      return (TPrimitiveTypeEntry)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'primitiveEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setPrimitiveEntry(TPrimitiveTypeEntry value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.PRIMITIVE_ENTRY;
+    value_ = value;
+  }
+
+  public TArrayTypeEntry getArrayEntry() {
+    if (getSetField() == _Fields.ARRAY_ENTRY) {
+      return (TArrayTypeEntry)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'arrayEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setArrayEntry(TArrayTypeEntry value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.ARRAY_ENTRY;
+    value_ = value;
+  }
+
+  public TMapTypeEntry getMapEntry() {
+    if (getSetField() == _Fields.MAP_ENTRY) {
+      return (TMapTypeEntry)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'mapEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setMapEntry(TMapTypeEntry value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.MAP_ENTRY;
+    value_ = value;
+  }
+
+  public TStructTypeEntry getStructEntry() {
+    if (getSetField() == _Fields.STRUCT_ENTRY) {
+      return (TStructTypeEntry)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'structEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setStructEntry(TStructTypeEntry value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.STRUCT_ENTRY;
+    value_ = value;
+  }
+
+  public TUnionTypeEntry getUnionEntry() {
+    if (getSetField() == _Fields.UNION_ENTRY) {
+      return (TUnionTypeEntry)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'unionEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setUnionEntry(TUnionTypeEntry value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.UNION_ENTRY;
+    value_ = value;
+  }
+
+  public TUserDefinedTypeEntry getUserDefinedTypeEntry() {
+    if (getSetField() == _Fields.USER_DEFINED_TYPE_ENTRY) {
+      return (TUserDefinedTypeEntry)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'userDefinedTypeEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setUserDefinedTypeEntry(TUserDefinedTypeEntry value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.USER_DEFINED_TYPE_ENTRY;
+    value_ = value;
+  }
+
+  public boolean isSetPrimitiveEntry() {
+    return setField_ == _Fields.PRIMITIVE_ENTRY;
+  }
+
+
+  public boolean isSetArrayEntry() {
+    return setField_ == _Fields.ARRAY_ENTRY;
+  }
+
+
+  public boolean isSetMapEntry() {
+    return setField_ == _Fields.MAP_ENTRY;
+  }
+
+
+  public boolean isSetStructEntry() {
+    return setField_ == _Fields.STRUCT_ENTRY;
+  }
+
+
+  public boolean isSetUnionEntry() {
+    return setField_ == _Fields.UNION_ENTRY;
+  }
+
+
+  public boolean isSetUserDefinedTypeEntry() {
+    return setField_ == _Fields.USER_DEFINED_TYPE_ENTRY;
+  }
+
+
+  public boolean equals(Object other) {
+    if (other instanceof TTypeEntry) {
+      return equals((TTypeEntry)other);
+    } else {
+      return false;
+    }
+  }
+
+  public boolean equals(TTypeEntry other) {
+    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
+  }
+
+  @Override
+  public int compareTo(TTypeEntry other) {
+    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
+    if (lastComparison == 0) {
+      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
+    }
+    return lastComparison;
+  }
+
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    hcb.append(this.getClass().getName());
+    org.apache.thrift.TFieldIdEnum setField = getSetField();
+    if (setField != null) {
+      hcb.append(setField.getThriftFieldId());
+      Object value = getFieldValue();
+      if (value instanceof org.apache.thrift.TEnum) {
+        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
+      } else {
+        hcb.append(value);
+      }
+    }
+    return hcb.toHashCode();
+  }
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
new file mode 100644
index 000000000000..40f05894623c
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
@@ -0,0 +1,105 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.thrift.TEnum;
+
+public enum TTypeId implements org.apache.thrift.TEnum {
+  BOOLEAN_TYPE(0),
+  TINYINT_TYPE(1),
+  SMALLINT_TYPE(2),
+  INT_TYPE(3),
+  BIGINT_TYPE(4),
+  FLOAT_TYPE(5),
+  DOUBLE_TYPE(6),
+  STRING_TYPE(7),
+  TIMESTAMP_TYPE(8),
+  BINARY_TYPE(9),
+  ARRAY_TYPE(10),
+  MAP_TYPE(11),
+  STRUCT_TYPE(12),
+  UNION_TYPE(13),
+  USER_DEFINED_TYPE(14),
+  DECIMAL_TYPE(15),
+  NULL_TYPE(16),
+  DATE_TYPE(17),
+  VARCHAR_TYPE(18),
+  CHAR_TYPE(19),
+  INTERVAL_YEAR_MONTH_TYPE(20),
+  INTERVAL_DAY_TIME_TYPE(21);
+
+  private final int value;
+
+  private TTypeId(int value) {
+    this.value = value;
+  }
+
+  /**
+   * Get the integer value of this enum value, as defined in the Thrift IDL.
+   */
+  public int getValue() {
+    return value;
+  }
+
+  /**
+   * Find a the enum type by its integer value, as defined in the Thrift IDL.
+   * @return null if the value is not found.
+   */
+  public static TTypeId findByValue(int value) { 
+    switch (value) {
+      case 0:
+        return BOOLEAN_TYPE;
+      case 1:
+        return TINYINT_TYPE;
+      case 2:
+        return SMALLINT_TYPE;
+      case 3:
+        return INT_TYPE;
+      case 4:
+        return BIGINT_TYPE;
+      case 5:
+        return FLOAT_TYPE;
+      case 6:
+        return DOUBLE_TYPE;
+      case 7:
+        return STRING_TYPE;
+      case 8:
+        return TIMESTAMP_TYPE;
+      case 9:
+        return BINARY_TYPE;
+      case 10:
+        return ARRAY_TYPE;
+      case 11:
+        return MAP_TYPE;
+      case 12:
+        return STRUCT_TYPE;
+      case 13:
+        return UNION_TYPE;
+      case 14:
+        return USER_DEFINED_TYPE;
+      case 15:
+        return DECIMAL_TYPE;
+      case 16:
+        return NULL_TYPE;
+      case 17:
+        return DATE_TYPE;
+      case 18:
+        return VARCHAR_TYPE;
+      case 19:
+        return CHAR_TYPE;
+      case 20:
+        return INTERVAL_YEAR_MONTH_TYPE;
+      case 21:
+        return INTERVAL_DAY_TIME_TYPE;
+      default:
+        return null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
new file mode 100644
index 000000000000..a3e382937227
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
@@ -0,0 +1,361 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TTypeQualifierValue extends org.apache.thrift.TUnion<TTypeQualifierValue, TTypeQualifierValue._Fields> {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifierValue");
+  private static final org.apache.thrift.protocol.TField I32_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Value", org.apache.thrift.protocol.TType.I32, (short)1);
+  private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)2);
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    I32_VALUE((short)1, "i32Value"),
+    STRING_VALUE((short)2, "stringValue");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // I32_VALUE
+          return I32_VALUE;
+        case 2: // STRING_VALUE
+          return STRING_VALUE;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.I32_VALUE, new org.apache.thrift.meta_data.FieldMetaData("i32Value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
+    tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifierValue.class, metaDataMap);
+  }
+
+  public TTypeQualifierValue() {
+    super();
+  }
+
+  public TTypeQualifierValue(TTypeQualifierValue._Fields setField, Object value) {
+    super(setField, value);
+  }
+
+  public TTypeQualifierValue(TTypeQualifierValue other) {
+    super(other);
+  }
+  public TTypeQualifierValue deepCopy() {
+    return new TTypeQualifierValue(this);
+  }
+
+  public static TTypeQualifierValue i32Value(int value) {
+    TTypeQualifierValue x = new TTypeQualifierValue();
+    x.setI32Value(value);
+    return x;
+  }
+
+  public static TTypeQualifierValue stringValue(String value) {
+    TTypeQualifierValue x = new TTypeQualifierValue();
+    x.setStringValue(value);
+    return x;
+  }
+
+
+  @Override
+  protected void checkType(_Fields setField, Object value) throws ClassCastException {
+    switch (setField) {
+      case I32_VALUE:
+        if (value instanceof Integer) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type Integer for field 'i32Value', but got " + value.getClass().getSimpleName());
+      case STRING_VALUE:
+        if (value instanceof String) {
+          break;
+        }
+        throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName());
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(field.id);
+    if (setField != null) {
+      switch (setField) {
+        case I32_VALUE:
+          if (field.type == I32_VALUE_FIELD_DESC.type) {
+            Integer i32Value;
+            i32Value = iprot.readI32();
+            return i32Value;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        case STRING_VALUE:
+          if (field.type == STRING_VALUE_FIELD_DESC.type) {
+            String stringValue;
+            stringValue = iprot.readString();
+            return stringValue;
+          } else {
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
+            return null;
+          }
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case I32_VALUE:
+        Integer i32Value = (Integer)value_;
+        oprot.writeI32(i32Value);
+        return;
+      case STRING_VALUE:
+        String stringValue = (String)value_;
+        oprot.writeString(stringValue);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
+    _Fields setField = _Fields.findByThriftId(fieldID);
+    if (setField != null) {
+      switch (setField) {
+        case I32_VALUE:
+          Integer i32Value;
+          i32Value = iprot.readI32();
+          return i32Value;
+        case STRING_VALUE:
+          String stringValue;
+          stringValue = iprot.readString();
+          return stringValue;
+        default:
+          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
+      }
+    } else {
+      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
+    }
+  }
+
+  @Override
+  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    switch (setField_) {
+      case I32_VALUE:
+        Integer i32Value = (Integer)value_;
+        oprot.writeI32(i32Value);
+        return;
+      case STRING_VALUE:
+        String stringValue = (String)value_;
+        oprot.writeString(stringValue);
+        return;
+      default:
+        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
+    switch (setField) {
+      case I32_VALUE:
+        return I32_VALUE_FIELD_DESC;
+      case STRING_VALUE:
+        return STRING_VALUE_FIELD_DESC;
+      default:
+        throw new IllegalArgumentException("Unknown field id " + setField);
+    }
+  }
+
+  @Override
+  protected org.apache.thrift.protocol.TStruct getStructDesc() {
+    return STRUCT_DESC;
+  }
+
+  @Override
+  protected _Fields enumForId(short id) {
+    return _Fields.findByThriftIdOrThrow(id);
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+
+  public int getI32Value() {
+    if (getSetField() == _Fields.I32_VALUE) {
+      return (Integer)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'i32Value' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setI32Value(int value) {
+    setField_ = _Fields.I32_VALUE;
+    value_ = value;
+  }
+
+  public String getStringValue() {
+    if (getSetField() == _Fields.STRING_VALUE) {
+      return (String)getFieldValue();
+    } else {
+      throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name);
+    }
+  }
+
+  public void setStringValue(String value) {
+    if (value == null) throw new NullPointerException();
+    setField_ = _Fields.STRING_VALUE;
+    value_ = value;
+  }
+
+  public boolean isSetI32Value() {
+    return setField_ == _Fields.I32_VALUE;
+  }
+
+
+  public boolean isSetStringValue() {
+    return setField_ == _Fields.STRING_VALUE;
+  }
+
+
+  public boolean equals(Object other) {
+    if (other instanceof TTypeQualifierValue) {
+      return equals((TTypeQualifierValue)other);
+    } else {
+      return false;
+    }
+  }
+
+  public boolean equals(TTypeQualifierValue other) {
+    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
+  }
+
+  @Override
+  public int compareTo(TTypeQualifierValue other) {
+    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
+    if (lastComparison == 0) {
+      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
+    }
+    return lastComparison;
+  }
+
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder hcb = new HashCodeBuilder();
+    hcb.append(this.getClass().getName());
+    org.apache.thrift.TFieldIdEnum setField = getSetField();
+    if (setField != null) {
+      hcb.append(setField.getThriftFieldId());
+      Object value = getFieldValue();
+      if (value instanceof org.apache.thrift.TEnum) {
+        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
+      } else {
+        hcb.append(value);
+      }
+    }
+    return hcb.toHashCode();
+  }
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
new file mode 100644
index 000000000000..39355551d372
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
@@ -0,0 +1,450 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TTypeQualifiers implements org.apache.thrift.TBase<TTypeQualifiers, TTypeQualifiers._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifiers");
+
+  private static final org.apache.thrift.protocol.TField QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("qualifiers", org.apache.thrift.protocol.TType.MAP, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TTypeQualifiersStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TTypeQualifiersTupleSchemeFactory());
+  }
+
+  private Map<String,TTypeQualifierValue> qualifiers; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    QUALIFIERS((short)1, "qualifiers");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // QUALIFIERS
+          return QUALIFIERS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("qualifiers", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
+            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifierValue.class))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifiers.class, metaDataMap);
+  }
+
+  public TTypeQualifiers() {
+  }
+
+  public TTypeQualifiers(
+    Map<String,TTypeQualifierValue> qualifiers)
+  {
+    this();
+    this.qualifiers = qualifiers;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TTypeQualifiers(TTypeQualifiers other) {
+    if (other.isSetQualifiers()) {
+      Map<String,TTypeQualifierValue> __this__qualifiers = new HashMap<String,TTypeQualifierValue>();
+      for (Map.Entry<String, TTypeQualifierValue> other_element : other.qualifiers.entrySet()) {
+
+        String other_element_key = other_element.getKey();
+        TTypeQualifierValue other_element_value = other_element.getValue();
+
+        String __this__qualifiers_copy_key = other_element_key;
+
+        TTypeQualifierValue __this__qualifiers_copy_value = new TTypeQualifierValue(other_element_value);
+
+        __this__qualifiers.put(__this__qualifiers_copy_key, __this__qualifiers_copy_value);
+      }
+      this.qualifiers = __this__qualifiers;
+    }
+  }
+
+  public TTypeQualifiers deepCopy() {
+    return new TTypeQualifiers(this);
+  }
+
+  @Override
+  public void clear() {
+    this.qualifiers = null;
+  }
+
+  public int getQualifiersSize() {
+    return (this.qualifiers == null) ? 0 : this.qualifiers.size();
+  }
+
+  public void putToQualifiers(String key, TTypeQualifierValue val) {
+    if (this.qualifiers == null) {
+      this.qualifiers = new HashMap<String,TTypeQualifierValue>();
+    }
+    this.qualifiers.put(key, val);
+  }
+
+  public Map<String,TTypeQualifierValue> getQualifiers() {
+    return this.qualifiers;
+  }
+
+  public void setQualifiers(Map<String,TTypeQualifierValue> qualifiers) {
+    this.qualifiers = qualifiers;
+  }
+
+  public void unsetQualifiers() {
+    this.qualifiers = null;
+  }
+
+  /** Returns true if field qualifiers is set (has been assigned a value) and false otherwise */
+  public boolean isSetQualifiers() {
+    return this.qualifiers != null;
+  }
+
+  public void setQualifiersIsSet(boolean value) {
+    if (!value) {
+      this.qualifiers = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case QUALIFIERS:
+      if (value == null) {
+        unsetQualifiers();
+      } else {
+        setQualifiers((Map<String,TTypeQualifierValue>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case QUALIFIERS:
+      return getQualifiers();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case QUALIFIERS:
+      return isSetQualifiers();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TTypeQualifiers)
+      return this.equals((TTypeQualifiers)that);
+    return false;
+  }
+
+  public boolean equals(TTypeQualifiers that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_qualifiers = true && this.isSetQualifiers();
+    boolean that_present_qualifiers = true && that.isSetQualifiers();
+    if (this_present_qualifiers || that_present_qualifiers) {
+      if (!(this_present_qualifiers && that_present_qualifiers))
+        return false;
+      if (!this.qualifiers.equals(that.qualifiers))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_qualifiers = true && (isSetQualifiers());
+    builder.append(present_qualifiers);
+    if (present_qualifiers)
+      builder.append(qualifiers);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TTypeQualifiers other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TTypeQualifiers typedOther = (TTypeQualifiers)other;
+
+    lastComparison = Boolean.valueOf(isSetQualifiers()).compareTo(typedOther.isSetQualifiers());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetQualifiers()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.qualifiers, typedOther.qualifiers);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TTypeQualifiers(");
+    boolean first = true;
+
+    sb.append("qualifiers:");
+    if (this.qualifiers == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.qualifiers);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetQualifiers()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'qualifiers' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TTypeQualifiersStandardSchemeFactory implements SchemeFactory {
+    public TTypeQualifiersStandardScheme getScheme() {
+      return new TTypeQualifiersStandardScheme();
+    }
+  }
+
+  private static class TTypeQualifiersStandardScheme extends StandardScheme<TTypeQualifiers> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeQualifiers struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // QUALIFIERS
+            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
+              {
+                org.apache.thrift.protocol.TMap _map0 = iprot.readMapBegin();
+                struct.qualifiers = new HashMap<String,TTypeQualifierValue>(2*_map0.size);
+                for (int _i1 = 0; _i1 < _map0.size; ++_i1)
+                {
+                  String _key2; // required
+                  TTypeQualifierValue _val3; // required
+                  _key2 = iprot.readString();
+                  _val3 = new TTypeQualifierValue();
+                  _val3.read(iprot);
+                  struct.qualifiers.put(_key2, _val3);
+                }
+                iprot.readMapEnd();
+              }
+              struct.setQualifiersIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeQualifiers struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.qualifiers != null) {
+        oprot.writeFieldBegin(QUALIFIERS_FIELD_DESC);
+        {
+          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, struct.qualifiers.size()));
+          for (Map.Entry<String, TTypeQualifierValue> _iter4 : struct.qualifiers.entrySet())
+          {
+            oprot.writeString(_iter4.getKey());
+            _iter4.getValue().write(oprot);
+          }
+          oprot.writeMapEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TTypeQualifiersTupleSchemeFactory implements SchemeFactory {
+    public TTypeQualifiersTupleScheme getScheme() {
+      return new TTypeQualifiersTupleScheme();
+    }
+  }
+
+  private static class TTypeQualifiersTupleScheme extends TupleScheme<TTypeQualifiers> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.qualifiers.size());
+        for (Map.Entry<String, TTypeQualifierValue> _iter5 : struct.qualifiers.entrySet())
+        {
+          oprot.writeString(_iter5.getKey());
+          _iter5.getValue().write(oprot);
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TMap _map6 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
+        struct.qualifiers = new HashMap<String,TTypeQualifierValue>(2*_map6.size);
+        for (int _i7 = 0; _i7 < _map6.size; ++_i7)
+        {
+          String _key8; // required
+          TTypeQualifierValue _val9; // required
+          _key8 = iprot.readString();
+          _val9 = new TTypeQualifierValue();
+          _val9.read(iprot);
+          struct.qualifiers.put(_key8, _val9);
+        }
+      }
+      struct.setQualifiersIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
new file mode 100644
index 000000000000..73dd45d3dd01
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
@@ -0,0 +1,448 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TUnionTypeEntry implements org.apache.thrift.TBase<TUnionTypeEntry, TUnionTypeEntry._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUnionTypeEntry");
+
+  private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TUnionTypeEntryStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TUnionTypeEntryTupleSchemeFactory());
+  }
+
+  private Map<String,Integer> nameToTypePtr; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    NAME_TO_TYPE_PTR((short)1, "nameToTypePtr");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // NAME_TO_TYPE_PTR
+          return NAME_TO_TYPE_PTR;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32            , "TTypeEntryPtr"))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUnionTypeEntry.class, metaDataMap);
+  }
+
+  public TUnionTypeEntry() {
+  }
+
+  public TUnionTypeEntry(
+    Map<String,Integer> nameToTypePtr)
+  {
+    this();
+    this.nameToTypePtr = nameToTypePtr;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TUnionTypeEntry(TUnionTypeEntry other) {
+    if (other.isSetNameToTypePtr()) {
+      Map<String,Integer> __this__nameToTypePtr = new HashMap<String,Integer>();
+      for (Map.Entry<String, Integer> other_element : other.nameToTypePtr.entrySet()) {
+
+        String other_element_key = other_element.getKey();
+        Integer other_element_value = other_element.getValue();
+
+        String __this__nameToTypePtr_copy_key = other_element_key;
+
+        Integer __this__nameToTypePtr_copy_value = other_element_value;
+
+        __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value);
+      }
+      this.nameToTypePtr = __this__nameToTypePtr;
+    }
+  }
+
+  public TUnionTypeEntry deepCopy() {
+    return new TUnionTypeEntry(this);
+  }
+
+  @Override
+  public void clear() {
+    this.nameToTypePtr = null;
+  }
+
+  public int getNameToTypePtrSize() {
+    return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size();
+  }
+
+  public void putToNameToTypePtr(String key, int val) {
+    if (this.nameToTypePtr == null) {
+      this.nameToTypePtr = new HashMap<String,Integer>();
+    }
+    this.nameToTypePtr.put(key, val);
+  }
+
+  public Map<String,Integer> getNameToTypePtr() {
+    return this.nameToTypePtr;
+  }
+
+  public void setNameToTypePtr(Map<String,Integer> nameToTypePtr) {
+    this.nameToTypePtr = nameToTypePtr;
+  }
+
+  public void unsetNameToTypePtr() {
+    this.nameToTypePtr = null;
+  }
+
+  /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */
+  public boolean isSetNameToTypePtr() {
+    return this.nameToTypePtr != null;
+  }
+
+  public void setNameToTypePtrIsSet(boolean value) {
+    if (!value) {
+      this.nameToTypePtr = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case NAME_TO_TYPE_PTR:
+      if (value == null) {
+        unsetNameToTypePtr();
+      } else {
+        setNameToTypePtr((Map<String,Integer>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case NAME_TO_TYPE_PTR:
+      return getNameToTypePtr();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case NAME_TO_TYPE_PTR:
+      return isSetNameToTypePtr();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TUnionTypeEntry)
+      return this.equals((TUnionTypeEntry)that);
+    return false;
+  }
+
+  public boolean equals(TUnionTypeEntry that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr();
+    boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr();
+    if (this_present_nameToTypePtr || that_present_nameToTypePtr) {
+      if (!(this_present_nameToTypePtr && that_present_nameToTypePtr))
+        return false;
+      if (!this.nameToTypePtr.equals(that.nameToTypePtr))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_nameToTypePtr = true && (isSetNameToTypePtr());
+    builder.append(present_nameToTypePtr);
+    if (present_nameToTypePtr)
+      builder.append(nameToTypePtr);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TUnionTypeEntry other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TUnionTypeEntry typedOther = (TUnionTypeEntry)other;
+
+    lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(typedOther.isSetNameToTypePtr());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNameToTypePtr()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, typedOther.nameToTypePtr);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TUnionTypeEntry(");
+    boolean first = true;
+
+    sb.append("nameToTypePtr:");
+    if (this.nameToTypePtr == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.nameToTypePtr);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetNameToTypePtr()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TUnionTypeEntryStandardSchemeFactory implements SchemeFactory {
+    public TUnionTypeEntryStandardScheme getScheme() {
+      return new TUnionTypeEntryStandardScheme();
+    }
+  }
+
+  private static class TUnionTypeEntryStandardScheme extends StandardScheme<TUnionTypeEntry> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // NAME_TO_TYPE_PTR
+            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
+              {
+                org.apache.thrift.protocol.TMap _map20 = iprot.readMapBegin();
+                struct.nameToTypePtr = new HashMap<String,Integer>(2*_map20.size);
+                for (int _i21 = 0; _i21 < _map20.size; ++_i21)
+                {
+                  String _key22; // required
+                  int _val23; // required
+                  _key22 = iprot.readString();
+                  _val23 = iprot.readI32();
+                  struct.nameToTypePtr.put(_key22, _val23);
+                }
+                iprot.readMapEnd();
+              }
+              struct.setNameToTypePtrIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.nameToTypePtr != null) {
+        oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC);
+        {
+          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size()));
+          for (Map.Entry<String, Integer> _iter24 : struct.nameToTypePtr.entrySet())
+          {
+            oprot.writeString(_iter24.getKey());
+            oprot.writeI32(_iter24.getValue());
+          }
+          oprot.writeMapEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TUnionTypeEntryTupleSchemeFactory implements SchemeFactory {
+    public TUnionTypeEntryTupleScheme getScheme() {
+      return new TUnionTypeEntryTupleScheme();
+    }
+  }
+
+  private static class TUnionTypeEntryTupleScheme extends TupleScheme<TUnionTypeEntry> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      {
+        oprot.writeI32(struct.nameToTypePtr.size());
+        for (Map.Entry<String, Integer> _iter25 : struct.nameToTypePtr.entrySet())
+        {
+          oprot.writeString(_iter25.getKey());
+          oprot.writeI32(_iter25.getValue());
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      {
+        org.apache.thrift.protocol.TMap _map26 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32());
+        struct.nameToTypePtr = new HashMap<String,Integer>(2*_map26.size);
+        for (int _i27 = 0; _i27 < _map26.size; ++_i27)
+        {
+          String _key28; // required
+          int _val29; // required
+          _key28 = iprot.readString();
+          _val29 = iprot.readI32();
+          struct.nameToTypePtr.put(_key28, _val29);
+        }
+      }
+      struct.setNameToTypePtrIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
new file mode 100644
index 000000000000..3a111a2c8c2c
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
@@ -0,0 +1,385 @@
+/**
+ * Autogenerated by Thrift Compiler (0.9.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.hive.service.cli.thrift;
+
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TUserDefinedTypeEntry implements org.apache.thrift.TBase<TUserDefinedTypeEntry, TUserDefinedTypeEntry._Fields>, java.io.Serializable, Cloneable {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUserDefinedTypeEntry");
+
+  private static final org.apache.thrift.protocol.TField TYPE_CLASS_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("typeClassName", org.apache.thrift.protocol.TType.STRING, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TUserDefinedTypeEntryStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TUserDefinedTypeEntryTupleSchemeFactory());
+  }
+
+  private String typeClassName; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    TYPE_CLASS_NAME((short)1, "typeClassName");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // TYPE_CLASS_NAME
+          return TYPE_CLASS_NAME;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.TYPE_CLASS_NAME, new org.apache.thrift.meta_data.FieldMetaData("typeClassName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUserDefinedTypeEntry.class, metaDataMap);
+  }
+
+  public TUserDefinedTypeEntry() {
+  }
+
+  public TUserDefinedTypeEntry(
+    String typeClassName)
+  {
+    this();
+    this.typeClassName = typeClassName;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TUserDefinedTypeEntry(TUserDefinedTypeEntry other) {
+    if (other.isSetTypeClassName()) {
+      this.typeClassName = other.typeClassName;
+    }
+  }
+
+  public TUserDefinedTypeEntry deepCopy() {
+    return new TUserDefinedTypeEntry(this);
+  }
+
+  @Override
+  public void clear() {
+    this.typeClassName = null;
+  }
+
+  public String getTypeClassName() {
+    return this.typeClassName;
+  }
+
+  public void setTypeClassName(String typeClassName) {
+    this.typeClassName = typeClassName;
+  }
+
+  public void unsetTypeClassName() {
+    this.typeClassName = null;
+  }
+
+  /** Returns true if field typeClassName is set (has been assigned a value) and false otherwise */
+  public boolean isSetTypeClassName() {
+    return this.typeClassName != null;
+  }
+
+  public void setTypeClassNameIsSet(boolean value) {
+    if (!value) {
+      this.typeClassName = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case TYPE_CLASS_NAME:
+      if (value == null) {
+        unsetTypeClassName();
+      } else {
+        setTypeClassName((String)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case TYPE_CLASS_NAME:
+      return getTypeClassName();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case TYPE_CLASS_NAME:
+      return isSetTypeClassName();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TUserDefinedTypeEntry)
+      return this.equals((TUserDefinedTypeEntry)that);
+    return false;
+  }
+
+  public boolean equals(TUserDefinedTypeEntry that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_typeClassName = true && this.isSetTypeClassName();
+    boolean that_present_typeClassName = true && that.isSetTypeClassName();
+    if (this_present_typeClassName || that_present_typeClassName) {
+      if (!(this_present_typeClassName && that_present_typeClassName))
+        return false;
+      if (!this.typeClassName.equals(that.typeClassName))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    HashCodeBuilder builder = new HashCodeBuilder();
+
+    boolean present_typeClassName = true && (isSetTypeClassName());
+    builder.append(present_typeClassName);
+    if (present_typeClassName)
+      builder.append(typeClassName);
+
+    return builder.toHashCode();
+  }
+
+  public int compareTo(TUserDefinedTypeEntry other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+    TUserDefinedTypeEntry typedOther = (TUserDefinedTypeEntry)other;
+
+    lastComparison = Boolean.valueOf(isSetTypeClassName()).compareTo(typedOther.isSetTypeClassName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetTypeClassName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeClassName, typedOther.typeClassName);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TUserDefinedTypeEntry(");
+    boolean first = true;
+
+    sb.append("typeClassName:");
+    if (this.typeClassName == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.typeClassName);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    if (!isSetTypeClassName()) {
+      throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeClassName' is unset! Struct:" + toString());
+    }
+
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TUserDefinedTypeEntryStandardSchemeFactory implements SchemeFactory {
+    public TUserDefinedTypeEntryStandardScheme getScheme() {
+      return new TUserDefinedTypeEntryStandardScheme();
+    }
+  }
+
+  private static class TUserDefinedTypeEntryStandardScheme extends StandardScheme<TUserDefinedTypeEntry> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // TYPE_CLASS_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.typeClassName = iprot.readString();
+              struct.setTypeClassNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.typeClassName != null) {
+        oprot.writeFieldBegin(TYPE_CLASS_NAME_FIELD_DESC);
+        oprot.writeString(struct.typeClassName);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TUserDefinedTypeEntryTupleSchemeFactory implements SchemeFactory {
+    public TUserDefinedTypeEntryTupleScheme getScheme() {
+      return new TUserDefinedTypeEntryTupleScheme();
+    }
+  }
+
+  private static class TUserDefinedTypeEntryTupleScheme extends TupleScheme<TUserDefinedTypeEntry> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      oprot.writeString(struct.typeClassName);
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      struct.typeClassName = iprot.readString();
+      struct.setTypeClassNameIsSet(true);
+    }
+  }
+
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/AbstractService.java
new file mode 100644
index 000000000000..7e557aeccf5b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/AbstractService.java
@@ -0,0 +1,184 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+
+/**
+ * AbstractService.
+ *
+ */
+public abstract class AbstractService implements Service {
+
+  private static final Log LOG = LogFactory.getLog(AbstractService.class);
+
+  /**
+   * Service state: initially {@link STATE#NOTINITED}.
+   */
+  private Service.STATE state = STATE.NOTINITED;
+
+  /**
+   * Service name.
+   */
+  private final String name;
+  /**
+   * Service start time. Will be zero until the service is started.
+   */
+  private long startTime;
+
+  /**
+   * The configuration. Will be null until the service is initialized.
+   */
+  private HiveConf hiveConf;
+
+  /**
+   * List of state change listeners; it is final to ensure
+   * that it will never be null.
+   */
+  private final List<ServiceStateChangeListener> listeners =
+      new ArrayList<ServiceStateChangeListener>();
+
+  /**
+   * Construct the service.
+   *
+   * @param name
+   *          service name
+   */
+  public AbstractService(String name) {
+    this.name = name;
+  }
+
+  @Override
+  public synchronized Service.STATE getServiceState() {
+    return state;
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @throws IllegalStateException
+   *           if the current service state does not permit
+   *           this action
+   */
+  @Override
+  public synchronized void init(HiveConf hiveConf) {
+    ensureCurrentState(STATE.NOTINITED);
+    this.hiveConf = hiveConf;
+    changeState(STATE.INITED);
+    LOG.info("Service:" + getName() + " is inited.");
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @throws IllegalStateException
+   *           if the current service state does not permit
+   *           this action
+   */
+  @Override
+  public synchronized void start() {
+    startTime = System.currentTimeMillis();
+    ensureCurrentState(STATE.INITED);
+    changeState(STATE.STARTED);
+    LOG.info("Service:" + getName() + " is started.");
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @throws IllegalStateException
+   *           if the current service state does not permit
+   *           this action
+   */
+  @Override
+  public synchronized void stop() {
+    if (state == STATE.STOPPED ||
+        state == STATE.INITED ||
+        state == STATE.NOTINITED) {
+      // already stopped, or else it was never
+      // started (eg another service failing canceled startup)
+      return;
+    }
+    ensureCurrentState(STATE.STARTED);
+    changeState(STATE.STOPPED);
+    LOG.info("Service:" + getName() + " is stopped.");
+  }
+
+  @Override
+  public synchronized void register(ServiceStateChangeListener l) {
+    listeners.add(l);
+  }
+
+  @Override
+  public synchronized void unregister(ServiceStateChangeListener l) {
+    listeners.remove(l);
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+
+  @Override
+  public synchronized HiveConf getHiveConf() {
+    return hiveConf;
+  }
+
+  @Override
+  public long getStartTime() {
+    return startTime;
+  }
+
+  /**
+   * Verify that a service is in a given state.
+   *
+   * @param currentState
+   *          the desired state
+   * @throws IllegalStateException
+   *           if the service state is different from
+   *           the desired state
+   */
+  private void ensureCurrentState(Service.STATE currentState) {
+    ServiceOperations.ensureCurrentState(state, currentState);
+  }
+
+  /**
+   * Change to a new state and notify all listeners.
+   * This is a private method that is only invoked from synchronized methods,
+   * which avoid having to clone the listener list. It does imply that
+   * the state change listener methods should be short lived, as they
+   * will delay the state transition.
+   *
+   * @param newState
+   *          new service state
+   */
+  private void changeState(Service.STATE newState) {
+    state = newState;
+    // notify listeners
+    for (ServiceStateChangeListener l : listeners) {
+      l.stateChanged(this);
+    }
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/CompositeService.java
new file mode 100644
index 000000000000..897911872b80
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/CompositeService.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+
+/**
+ * CompositeService.
+ *
+ */
+public class CompositeService extends AbstractService {
+
+  private static final Log LOG = LogFactory.getLog(CompositeService.class);
+
+  private final List<Service> serviceList = new ArrayList<Service>();
+
+  public CompositeService(String name) {
+    super(name);
+  }
+
+  public Collection<Service> getServices() {
+    return Collections.unmodifiableList(serviceList);
+  }
+
+  protected synchronized void addService(Service service) {
+    serviceList.add(service);
+  }
+
+  protected synchronized boolean removeService(Service service) {
+    return serviceList.remove(service);
+  }
+
+  @Override
+  public synchronized void init(HiveConf hiveConf) {
+    for (Service service : serviceList) {
+      service.init(hiveConf);
+    }
+    super.init(hiveConf);
+  }
+
+  @Override
+  public synchronized void start() {
+    int i = 0;
+    try {
+      for (int n = serviceList.size(); i < n; i++) {
+        Service service = serviceList.get(i);
+        service.start();
+      }
+      super.start();
+    } catch (Throwable e) {
+      LOG.error("Error starting services " + getName(), e);
+      // Note that the state of the failed service is still INITED and not
+      // STARTED. Even though the last service is not started completely, still
+      // call stop() on all services including failed service to make sure cleanup
+      // happens.
+      stop(i);
+      throw new ServiceException("Failed to Start " + getName(), e);
+    }
+
+  }
+
+  @Override
+  public synchronized void stop() {
+    if (this.getServiceState() == STATE.STOPPED) {
+      // The base composite-service is already stopped, don't do anything again.
+      return;
+    }
+    if (serviceList.size() > 0) {
+      stop(serviceList.size() - 1);
+    }
+    super.stop();
+  }
+
+  private synchronized void stop(int numOfServicesStarted) {
+    // stop in reserve order of start
+    for (int i = numOfServicesStarted; i >= 0; i--) {
+      Service service = serviceList.get(i);
+      try {
+        service.stop();
+      } catch (Throwable t) {
+        LOG.info("Error stopping " + service.getName(), t);
+      }
+    }
+  }
+
+  /**
+   * JVM Shutdown hook for CompositeService which will stop the given
+   * CompositeService gracefully in case of JVM shutdown.
+   */
+  public static class CompositeServiceShutdownHook implements Runnable {
+
+    private final CompositeService compositeService;
+
+    public CompositeServiceShutdownHook(CompositeService compositeService) {
+      this.compositeService = compositeService;
+    }
+
+    @Override
+    public void run() {
+      try {
+        // Stop the Composite Service
+        compositeService.stop();
+      } catch (Throwable t) {
+        LOG.info("Error stopping " + compositeService.getName(), t);
+      }
+    }
+  }
+
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/CookieSigner.java
new file mode 100644
index 000000000000..ee51c24351c3
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/CookieSigner.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+/**
+ * The cookie signer generates a signature based on SHA digest
+ * and appends it to the cookie value generated at the
+ * server side. It uses SHA digest algorithm to sign and verify signatures.
+ */
+public class CookieSigner {
+  private static final String SIGNATURE = "&s=";
+  private static final String SHA_STRING = "SHA";
+  private byte[] secretBytes;
+  private static final Log LOG = LogFactory.getLog(CookieSigner.class);
+
+  /**
+   * Constructor
+   * @param secret Secret Bytes
+   */
+  public CookieSigner(byte[] secret) {
+    if (secret == null) {
+      throw new IllegalArgumentException(" NULL Secret Bytes");
+    }
+    this.secretBytes = secret.clone();
+  }
+
+  /**
+   * Sign the cookie given the string token as input.
+   * @param str Input token
+   * @return Signed token that can be used to create a cookie
+   */
+  public String signCookie(String str) {
+    if (str == null || str.isEmpty()) {
+      throw new IllegalArgumentException("NULL or empty string to sign");
+    }
+    String signature = getSignature(str);
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Signature generated for " + str + " is " + signature);
+    }
+    return str + SIGNATURE + signature;
+  }
+
+  /**
+   * Verify a signed string and extracts the original string.
+   * @param signedStr The already signed string
+   * @return Raw Value of the string without the signature
+   */
+  public String verifyAndExtract(String signedStr) {
+    int index = signedStr.lastIndexOf(SIGNATURE);
+    if (index == -1) {
+      throw new IllegalArgumentException("Invalid input sign: " + signedStr);
+    }
+    String originalSignature = signedStr.substring(index + SIGNATURE.length());
+    String rawValue = signedStr.substring(0, index);
+    String currentSignature = getSignature(rawValue);
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Signature generated for " + rawValue + " inside verify is " + currentSignature);
+    }
+    if (!originalSignature.equals(currentSignature)) {
+      throw new IllegalArgumentException("Invalid sign, original = " + originalSignature +
+        " current = " + currentSignature);
+    }
+    return rawValue;
+  }
+
+  /**
+   * Get the signature of the input string based on SHA digest algorithm.
+   * @param str Input token
+   * @return Signed String
+   */
+  private String getSignature(String str) {
+    try {
+      MessageDigest md = MessageDigest.getInstance(SHA_STRING);
+      md.update(str.getBytes());
+      md.update(secretBytes);
+      byte[] digest = md.digest();
+      return new Base64(0).encodeToString(digest);
+    } catch (NoSuchAlgorithmException ex) {
+      throw new RuntimeException("Invalid SHA digest String: " + SHA_STRING +
+        " " + ex.getMessage(), ex);
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/ServiceOperations.java
new file mode 100644
index 000000000000..f16863c1b41a
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/ServiceOperations.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+
+/**
+ * ServiceOperations.
+ *
+ */
+public final class ServiceOperations {
+  private static final Log LOG = LogFactory.getLog(ServiceOperations.class);
+
+  private ServiceOperations() {
+  }
+
+  /**
+   * Verify that a service is in a given state.
+   * @param state the actual state a service is in
+   * @param expectedState the desired state
+   * @throws IllegalStateException if the service state is different from
+   * the desired state
+   */
+  public static void ensureCurrentState(Service.STATE state,
+                                        Service.STATE expectedState) {
+    if (state != expectedState) {
+      throw new IllegalStateException("For this operation, the " +
+                                          "current service state must be "
+                                          + expectedState
+                                          + " instead of " + state);
+    }
+  }
+
+  /**
+   * Initialize a service.
+   *
+   * The service state is checked <i>before</i> the operation begins.
+   * This process is <i>not</i> thread safe.
+   * @param service a service that must be in the state
+   *   {@link Service.STATE#NOTINITED}
+   * @param configuration the configuration to initialize the service with
+   * @throws RuntimeException on a state change failure
+   * @throws IllegalStateException if the service is in the wrong state
+   */
+
+  public static void init(Service service, HiveConf configuration) {
+    Service.STATE state = service.getServiceState();
+    ensureCurrentState(state, Service.STATE.NOTINITED);
+    service.init(configuration);
+  }
+
+  /**
+   * Start a service.
+   *
+   * The service state is checked <i>before</i> the operation begins.
+   * This process is <i>not</i> thread safe.
+   * @param service a service that must be in the state
+   *   {@link Service.STATE#INITED}
+   * @throws RuntimeException on a state change failure
+   * @throws IllegalStateException if the service is in the wrong state
+   */
+
+  public static void start(Service service) {
+    Service.STATE state = service.getServiceState();
+    ensureCurrentState(state, Service.STATE.INITED);
+    service.start();
+  }
+
+  /**
+   * Initialize then start a service.
+   *
+   * The service state is checked <i>before</i> the operation begins.
+   * This process is <i>not</i> thread safe.
+   * @param service a service that must be in the state
+   *   {@link Service.STATE#NOTINITED}
+   * @param configuration the configuration to initialize the service with
+   * @throws RuntimeException on a state change failure
+   * @throws IllegalStateException if the service is in the wrong state
+   */
+  public static void deploy(Service service, HiveConf configuration) {
+    init(service, configuration);
+    start(service);
+  }
+
+  /**
+   * Stop a service.
+   *
+   * Do nothing if the service is null or not in a state in which it can be/needs to be stopped.
+   *
+   * The service state is checked <i>before</i> the operation begins.
+   * This process is <i>not</i> thread safe.
+   * @param service a service or null
+   */
+  public static void stop(Service service) {
+    if (service != null) {
+      Service.STATE state = service.getServiceState();
+      if (state == Service.STATE.STARTED) {
+        service.stop();
+      }
+    }
+  }
+
+  /**
+   * Stop a service; if it is null do nothing. Exceptions are caught and
+   * logged at warn level. (but not Throwables). This operation is intended to
+   * be used in cleanup operations
+   *
+   * @param service a service; may be null
+   * @return any exception that was caught; null if none was.
+   */
+  public static Exception stopQuietly(Service service) {
+    try {
+      stop(service);
+    } catch (Exception e) {
+      LOG.warn("When stopping the service " + service.getName()
+                   + " : " + e,
+               e);
+      return e;
+    }
+    return null;
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/ServiceUtils.java
new file mode 100644
index 000000000000..edb5eff9615b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/ServiceUtils.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service;
+
+public class ServiceUtils {
+
+  /*
+   * Get the index separating the user name from domain name (the user's name up
+   * to the first '/' or '@').
+   *
+   * @param userName full user name.
+   * @return index of domain match or -1 if not found
+   */
+  public static int indexOfDomainMatch(String userName) {
+    if (userName == null) {
+      return -1;
+    }
+
+    int idx = userName.indexOf('/');
+    int idx2 = userName.indexOf('@');
+    int endIdx = Math.min(idx, idx2); // Use the earlier match.
+    // Unless at least one of '/' or '@' was not found, in
+    // which case, user the latter match.
+    if (endIdx == -1) {
+      endIdx = Math.max(idx, idx2);
+    }
+    return endIdx;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
new file mode 100644
index 000000000000..10000f12ab32
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
@@ -0,0 +1,419 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.auth;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.net.InetSocketAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+
+import javax.net.ssl.SSLServerSocket;
+import javax.security.auth.login.LoginException;
+import javax.security.sasl.Sasl;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.HiveMetaStore;
+import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.hive.thrift.DBTokenStore;
+import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge;
+import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.ProxyUsers;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.thrift.ThriftCLIService;
+import org.apache.thrift.TProcessorFactory;
+import org.apache.thrift.transport.TSSLTransportFactory;
+import org.apache.thrift.transport.TServerSocket;
+import org.apache.thrift.transport.TSocket;
+import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
+import org.apache.thrift.transport.TTransportFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class helps in some aspects of authentication. It creates the proper Thrift classes for the
+ * given configuration as well as helps with authenticating requests.
+ */
+public class HiveAuthFactory {
+  private static final Logger LOG = LoggerFactory.getLogger(HiveAuthFactory.class);
+
+
+  public enum AuthTypes {
+    NOSASL("NOSASL"),
+    NONE("NONE"),
+    LDAP("LDAP"),
+    KERBEROS("KERBEROS"),
+    CUSTOM("CUSTOM"),
+    PAM("PAM");
+
+    private final String authType;
+
+    AuthTypes(String authType) {
+      this.authType = authType;
+    }
+
+    public String getAuthName() {
+      return authType;
+    }
+
+  }
+
+  private HadoopThriftAuthBridge.Server saslServer;
+  private String authTypeStr;
+  private final String transportMode;
+  private final HiveConf conf;
+
+  public static final String HS2_PROXY_USER = "hive.server2.proxy.user";
+  public static final String HS2_CLIENT_TOKEN = "hiveserver2ClientToken";
+
+  private static Field keytabFile = null;
+  private static Method getKeytab = null;
+  static {
+    Class<?> clz = UserGroupInformation.class;
+    try {
+      keytabFile = clz.getDeclaredField("keytabFile");
+      keytabFile.setAccessible(true);
+    } catch (NoSuchFieldException nfe) {
+      LOG.debug("Cannot find private field \"keytabFile\" in class: " +
+        UserGroupInformation.class.getCanonicalName(), nfe);
+      keytabFile = null;
+    }
+
+    try {
+      getKeytab = clz.getDeclaredMethod("getKeytab");
+      getKeytab.setAccessible(true);
+    } catch(NoSuchMethodException nme) {
+      LOG.debug("Cannot find private method \"getKeytab\" in class:" +
+        UserGroupInformation.class.getCanonicalName(), nme);
+      getKeytab = null;
+    }
+  }
+
+  public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException {
+    this.conf = conf;
+    transportMode = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE);
+    authTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION);
+
+    // In http mode we use NOSASL as the default auth type
+    if ("http".equalsIgnoreCase(transportMode)) {
+      if (authTypeStr == null) {
+        authTypeStr = AuthTypes.NOSASL.getAuthName();
+      }
+    } else {
+      if (authTypeStr == null) {
+        authTypeStr = AuthTypes.NONE.getAuthName();
+      }
+      if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) {
+        String principal = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL);
+        String keytab = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
+        if (needUgiLogin(UserGroupInformation.getCurrentUser(),
+          SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keytab)) {
+          saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(principal, keytab);
+        } else {
+          // Using the default constructor to avoid unnecessary UGI login.
+          saslServer = new HadoopThriftAuthBridge.Server();
+        }
+
+        // start delegation token manager
+        try {
+          // rawStore is only necessary for DBTokenStore
+          Object rawStore = null;
+          String tokenStoreClass = conf.getVar(HiveConf.ConfVars.METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS);
+
+          if (tokenStoreClass.equals(DBTokenStore.class.getName())) {
+            HMSHandler baseHandler = new HiveMetaStore.HMSHandler(
+                "new db based metaserver", conf, true);
+            rawStore = baseHandler.getMS();
+          }
+
+          saslServer.startDelegationTokenSecretManager(conf, rawStore, ServerMode.HIVESERVER2);
+        }
+        catch (MetaException|IOException e) {
+          throw new TTransportException("Failed to start token manager", e);
+        }
+      }
+    }
+  }
+
+  public Map<String, String> getSaslProperties() {
+    Map<String, String> saslProps = new HashMap<String, String>();
+    SaslQOP saslQOP = SaslQOP.fromString(conf.getVar(ConfVars.HIVE_SERVER2_THRIFT_SASL_QOP));
+    saslProps.put(Sasl.QOP, saslQOP.toString());
+    saslProps.put(Sasl.SERVER_AUTH, "true");
+    return saslProps;
+  }
+
+  public TTransportFactory getAuthTransFactory() throws LoginException {
+    TTransportFactory transportFactory;
+    if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) {
+      try {
+        transportFactory = saslServer.createTransportFactory(getSaslProperties());
+      } catch (TTransportException e) {
+        throw new LoginException(e.getMessage());
+      }
+    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.NONE.getAuthName())) {
+      transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr);
+    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.LDAP.getAuthName())) {
+      transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr);
+    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.PAM.getAuthName())) {
+      transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr);
+    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.NOSASL.getAuthName())) {
+      transportFactory = new TTransportFactory();
+    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.CUSTOM.getAuthName())) {
+      transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr);
+    } else {
+      throw new LoginException("Unsupported authentication type " + authTypeStr);
+    }
+    return transportFactory;
+  }
+
+  /**
+   * Returns the thrift processor factory for HiveServer2 running in binary mode
+   * @param service
+   * @return
+   * @throws LoginException
+   */
+  public TProcessorFactory getAuthProcFactory(ThriftCLIService service) throws LoginException {
+    if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) {
+      return KerberosSaslHelper.getKerberosProcessorFactory(saslServer, service);
+    } else {
+      return PlainSaslHelper.getPlainProcessorFactory(service);
+    }
+  }
+
+  public String getRemoteUser() {
+    return saslServer == null ? null : saslServer.getRemoteUser();
+  }
+
+  public String getIpAddress() {
+    if (saslServer == null || saslServer.getRemoteAddress() == null) {
+      return null;
+    } else {
+      return saslServer.getRemoteAddress().getHostAddress();
+    }
+  }
+
+  // Perform kerberos login using the hadoop shim API if the configuration is available
+  public static void loginFromKeytab(HiveConf hiveConf) throws IOException {
+    String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL);
+    String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
+    if (principal.isEmpty() || keyTabFile.isEmpty()) {
+      throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured");
+    } else {
+      UserGroupInformation.loginUserFromKeytab(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile);
+    }
+  }
+
+  // Perform SPNEGO login using the hadoop shim API if the configuration is available
+  public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hiveConf)
+    throws IOException {
+    String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL);
+    String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB);
+    if (principal.isEmpty() || keyTabFile.isEmpty()) {
+      throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured");
+    } else {
+      return UserGroupInformation.loginUserFromKeytabAndReturnUGI(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile);
+    }
+  }
+
+  public static TTransport getSocketTransport(String host, int port, int loginTimeout) {
+    return new TSocket(host, port, loginTimeout);
+  }
+
+  public static TTransport getSSLSocket(String host, int port, int loginTimeout)
+    throws TTransportException {
+    return TSSLTransportFactory.getClientSocket(host, port, loginTimeout);
+  }
+
+  public static TTransport getSSLSocket(String host, int port, int loginTimeout,
+    String trustStorePath, String trustStorePassWord) throws TTransportException {
+    TSSLTransportFactory.TSSLTransportParameters params =
+      new TSSLTransportFactory.TSSLTransportParameters();
+    params.setTrustStore(trustStorePath, trustStorePassWord);
+    params.requireClientAuth(true);
+    return TSSLTransportFactory.getClientSocket(host, port, loginTimeout, params);
+  }
+
+  public static TServerSocket getServerSocket(String hiveHost, int portNum)
+    throws TTransportException {
+    InetSocketAddress serverAddress;
+    if (hiveHost == null || hiveHost.isEmpty()) {
+      // Wildcard bind
+      serverAddress = new InetSocketAddress(portNum);
+    } else {
+      serverAddress = new InetSocketAddress(hiveHost, portNum);
+    }
+    return new TServerSocket(serverAddress);
+  }
+
+  public static TServerSocket getServerSSLSocket(String hiveHost, int portNum, String keyStorePath,
+      String keyStorePassWord, List<String> sslVersionBlacklist) throws TTransportException,
+      UnknownHostException {
+    TSSLTransportFactory.TSSLTransportParameters params =
+        new TSSLTransportFactory.TSSLTransportParameters();
+    params.setKeyStore(keyStorePath, keyStorePassWord);
+    InetSocketAddress serverAddress;
+    if (hiveHost == null || hiveHost.isEmpty()) {
+      // Wildcard bind
+      serverAddress = new InetSocketAddress(portNum);
+    } else {
+      serverAddress = new InetSocketAddress(hiveHost, portNum);
+    }
+    TServerSocket thriftServerSocket =
+        TSSLTransportFactory.getServerSocket(portNum, 0, serverAddress.getAddress(), params);
+    if (thriftServerSocket.getServerSocket() instanceof SSLServerSocket) {
+      List<String> sslVersionBlacklistLocal = new ArrayList<String>();
+      for (String sslVersion : sslVersionBlacklist) {
+        sslVersionBlacklistLocal.add(sslVersion.trim().toLowerCase(Locale.ROOT));
+      }
+      SSLServerSocket sslServerSocket = (SSLServerSocket) thriftServerSocket.getServerSocket();
+      List<String> enabledProtocols = new ArrayList<String>();
+      for (String protocol : sslServerSocket.getEnabledProtocols()) {
+        if (sslVersionBlacklistLocal.contains(protocol.toLowerCase(Locale.ROOT))) {
+          LOG.debug("Disabling SSL Protocol: " + protocol);
+        } else {
+          enabledProtocols.add(protocol);
+        }
+      }
+      sslServerSocket.setEnabledProtocols(enabledProtocols.toArray(new String[0]));
+      LOG.info("SSL Server Socket Enabled Protocols: "
+          + Arrays.toString(sslServerSocket.getEnabledProtocols()));
+    }
+    return thriftServerSocket;
+  }
+
+  // retrieve delegation token for the given user
+  public String getDelegationToken(String owner, String renewer) throws HiveSQLException {
+    if (saslServer == null) {
+      throw new HiveSQLException(
+          "Delegation token only supported over kerberos authentication", "08S01");
+    }
+
+    try {
+      String tokenStr = saslServer.getDelegationTokenWithService(owner, renewer, HS2_CLIENT_TOKEN);
+      if (tokenStr == null || tokenStr.isEmpty()) {
+        throw new HiveSQLException(
+            "Received empty retrieving delegation token for user " + owner, "08S01");
+      }
+      return tokenStr;
+    } catch (IOException e) {
+      throw new HiveSQLException(
+          "Error retrieving delegation token for user " + owner, "08S01", e);
+    } catch (InterruptedException e) {
+      throw new HiveSQLException("delegation token retrieval interrupted", "08S01", e);
+    }
+  }
+
+  // cancel given delegation token
+  public void cancelDelegationToken(String delegationToken) throws HiveSQLException {
+    if (saslServer == null) {
+      throw new HiveSQLException(
+          "Delegation token only supported over kerberos authentication", "08S01");
+    }
+    try {
+      saslServer.cancelDelegationToken(delegationToken);
+    } catch (IOException e) {
+      throw new HiveSQLException(
+          "Error canceling delegation token " + delegationToken, "08S01", e);
+    }
+  }
+
+  public void renewDelegationToken(String delegationToken) throws HiveSQLException {
+    if (saslServer == null) {
+      throw new HiveSQLException(
+          "Delegation token only supported over kerberos authentication", "08S01");
+    }
+    try {
+      saslServer.renewDelegationToken(delegationToken);
+    } catch (IOException e) {
+      throw new HiveSQLException(
+          "Error renewing delegation token " + delegationToken, "08S01", e);
+    }
+  }
+
+  public String getUserFromToken(String delegationToken) throws HiveSQLException {
+    if (saslServer == null) {
+      throw new HiveSQLException(
+          "Delegation token only supported over kerberos authentication", "08S01");
+    }
+    try {
+      return saslServer.getUserFromToken(delegationToken);
+    } catch (IOException e) {
+      throw new HiveSQLException(
+          "Error extracting user from delegation token " + delegationToken, "08S01", e);
+    }
+  }
+
+  public static void verifyProxyAccess(String realUser, String proxyUser, String ipAddress,
+    HiveConf hiveConf) throws HiveSQLException {
+    try {
+      UserGroupInformation sessionUgi;
+      if (UserGroupInformation.isSecurityEnabled()) {
+        KerberosNameShim kerbName = ShimLoader.getHadoopShims().getKerberosNameShim(realUser);
+        sessionUgi = UserGroupInformation.createProxyUser(
+            kerbName.getServiceName(), UserGroupInformation.getLoginUser());
+      } else {
+        sessionUgi = UserGroupInformation.createRemoteUser(realUser);
+      }
+      if (!proxyUser.equalsIgnoreCase(realUser)) {
+        ProxyUsers.refreshSuperUserGroupsConfiguration(hiveConf);
+        ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, sessionUgi),
+            ipAddress, hiveConf);
+      }
+    } catch (IOException e) {
+      throw new HiveSQLException(
+        "Failed to validate proxy privilege of " + realUser + " for " + proxyUser, "08S01", e);
+    }
+  }
+
+  public static boolean needUgiLogin(UserGroupInformation ugi, String principal, String keytab) {
+    return null == ugi || !ugi.hasKerberosCredentials() || !ugi.getUserName().equals(principal) ||
+      !Objects.equals(keytab, getKeytabFromUgi());
+  }
+
+  private static String getKeytabFromUgi() {
+    synchronized (UserGroupInformation.class) {
+      try {
+        if (keytabFile != null) {
+          return (String) keytabFile.get(null);
+        } else if (getKeytab != null) {
+          return (String) getKeytab.invoke(UserGroupInformation.getCurrentUser());
+        } else {
+          return null;
+        }
+      } catch (Exception e) {
+        LOG.debug("Fail to get keytabFile path via reflection", e);
+        return null;
+      }
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
new file mode 100644
index 000000000000..f7375ee70783
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.auth;
+
+import java.security.AccessControlContext;
+import java.security.AccessController;
+import java.security.PrivilegedExceptionAction;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.StringTokenizer;
+
+import javax.security.auth.Subject;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.http.protocol.BasicHttpContext;
+import org.apache.http.protocol.HttpContext;
+import org.ietf.jgss.GSSContext;
+import org.ietf.jgss.GSSManager;
+import org.ietf.jgss.GSSName;
+import org.ietf.jgss.Oid;
+
+/**
+ * Utility functions for HTTP mode authentication.
+ */
+public final class HttpAuthUtils {
+  public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
+  public static final String AUTHORIZATION = "Authorization";
+  public static final String BASIC = "Basic";
+  public static final String NEGOTIATE = "Negotiate";
+  private static final Log LOG = LogFactory.getLog(HttpAuthUtils.class);
+  private static final String COOKIE_ATTR_SEPARATOR = "&";
+  private static final String COOKIE_CLIENT_USER_NAME = "cu";
+  private static final String COOKIE_CLIENT_RAND_NUMBER = "rn";
+  private static final String COOKIE_KEY_VALUE_SEPARATOR = "=";
+  private static final Set<String> COOKIE_ATTRIBUTES =
+    new HashSet<String>(Arrays.asList(COOKIE_CLIENT_USER_NAME, COOKIE_CLIENT_RAND_NUMBER));
+
+  /**
+   * @return Stringified Base64 encoded kerberosAuthHeader on success
+   * @throws Exception
+   */
+  public static String getKerberosServiceTicket(String principal, String host,
+      String serverHttpUrl, boolean assumeSubject) throws Exception {
+    String serverPrincipal =
+        ShimLoader.getHadoopThriftAuthBridge().getServerPrincipal(principal, host);
+    if (assumeSubject) {
+      // With this option, we're assuming that the external application,
+      // using the JDBC driver has done a JAAS kerberos login already
+      AccessControlContext context = AccessController.getContext();
+      Subject subject = Subject.getSubject(context);
+      if (subject == null) {
+        throw new Exception("The Subject is not set");
+      }
+      return Subject.doAs(subject, new HttpKerberosClientAction(serverPrincipal, serverHttpUrl));
+    } else {
+      // JAAS login from ticket cache to setup the client UserGroupInformation
+      UserGroupInformation clientUGI =
+          ShimLoader.getHadoopThriftAuthBridge().getCurrentUGIWithConf("kerberos");
+      return clientUGI.doAs(new HttpKerberosClientAction(serverPrincipal, serverHttpUrl));
+    }
+  }
+
+  /**
+   * Creates and returns a HS2 cookie token.
+   * @param clientUserName Client User name.
+   * @return An unsigned cookie token generated from input parameters.
+   * The final cookie generated is of the following format :
+   * {@code cu=<username>&rn=<randomNumber>&s=<cookieSignature>}
+   */
+  public static String createCookieToken(String clientUserName) {
+    StringBuffer sb = new StringBuffer();
+    sb.append(COOKIE_CLIENT_USER_NAME).append(COOKIE_KEY_VALUE_SEPARATOR).append(clientUserName)
+      .append(COOKIE_ATTR_SEPARATOR);
+    sb.append(COOKIE_CLIENT_RAND_NUMBER).append(COOKIE_KEY_VALUE_SEPARATOR)
+      .append((new Random(System.currentTimeMillis())).nextLong());
+    return sb.toString();
+  }
+
+  /**
+   * Parses a cookie token to retrieve client user name.
+   * @param tokenStr Token String.
+   * @return A valid user name if input is of valid format, else returns null.
+   */
+  public static String getUserNameFromCookieToken(String tokenStr) {
+    Map<String, String> map = splitCookieToken(tokenStr);
+
+    if (!map.keySet().equals(COOKIE_ATTRIBUTES)) {
+      LOG.error("Invalid token with missing attributes " + tokenStr);
+      return null;
+    }
+    return map.get(COOKIE_CLIENT_USER_NAME);
+  }
+
+  /**
+   * Splits the cookie token into attributes pairs.
+   * @param str input token.
+   * @return a map with the attribute pairs of the token if the input is valid.
+   * Else, returns null.
+   */
+  private static Map<String, String> splitCookieToken(String tokenStr) {
+    Map<String, String> map = new HashMap<String, String>();
+    StringTokenizer st = new StringTokenizer(tokenStr, COOKIE_ATTR_SEPARATOR);
+
+    while (st.hasMoreTokens()) {
+      String part = st.nextToken();
+      int separator = part.indexOf(COOKIE_KEY_VALUE_SEPARATOR);
+      if (separator == -1) {
+        LOG.error("Invalid token string " + tokenStr);
+        return null;
+      }
+      String key = part.substring(0, separator);
+      String value = part.substring(separator + 1);
+      map.put(key, value);
+    }
+    return map;
+  }
+
+
+  private HttpAuthUtils() {
+    throw new UnsupportedOperationException("Can't initialize class");
+  }
+
+  /**
+   * We'll create an instance of this class within a doAs block so that the client's TGT credentials
+   * can be read from the Subject
+   */
+  public static class HttpKerberosClientAction implements PrivilegedExceptionAction<String> {
+    public static final String HTTP_RESPONSE = "HTTP_RESPONSE";
+    public static final String SERVER_HTTP_URL = "SERVER_HTTP_URL";
+    private final String serverPrincipal;
+    private final String serverHttpUrl;
+    private final Base64 base64codec;
+    private final HttpContext httpContext;
+
+    public HttpKerberosClientAction(String serverPrincipal, String serverHttpUrl) {
+      this.serverPrincipal = serverPrincipal;
+      this.serverHttpUrl = serverHttpUrl;
+      base64codec = new Base64(0);
+      httpContext = new BasicHttpContext();
+      httpContext.setAttribute(SERVER_HTTP_URL, serverHttpUrl);
+    }
+
+    @Override
+    public String run() throws Exception {
+      // This Oid for Kerberos GSS-API mechanism.
+      Oid mechOid = new Oid("1.2.840.113554.1.2.2");
+      // Oid for kerberos principal name
+      Oid krb5PrincipalOid = new Oid("1.2.840.113554.1.2.2.1");
+      GSSManager manager = GSSManager.getInstance();
+      // GSS name for server
+      GSSName serverName = manager.createName(serverPrincipal, krb5PrincipalOid);
+      // Create a GSSContext for authentication with the service.
+      // We're passing client credentials as null since we want them to be read from the Subject.
+      GSSContext gssContext =
+          manager.createContext(serverName, mechOid, null, GSSContext.DEFAULT_LIFETIME);
+      gssContext.requestMutualAuth(false);
+      // Establish context
+      byte[] inToken = new byte[0];
+      byte[] outToken = gssContext.initSecContext(inToken, 0, inToken.length);
+      gssContext.dispose();
+      // Base64 encoded and stringified token for server
+      return new String(base64codec.encode(outToken));
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
new file mode 100644
index 000000000000..52eb752f1e02
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.auth;
+
+import java.io.IOException;
+import java.util.Map;
+import javax.security.sasl.SaslException;
+
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge;
+import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server;
+import org.apache.hive.service.cli.thrift.TCLIService;
+import org.apache.hive.service.cli.thrift.TCLIService.Iface;
+import org.apache.hive.service.cli.thrift.ThriftCLIService;
+import org.apache.thrift.TProcessor;
+import org.apache.thrift.TProcessorFactory;
+import org.apache.thrift.transport.TSaslClientTransport;
+import org.apache.thrift.transport.TTransport;
+
+public final class KerberosSaslHelper {
+
+  public static TProcessorFactory getKerberosProcessorFactory(Server saslServer,
+    ThriftCLIService service) {
+    return new CLIServiceProcessorFactory(saslServer, service);
+  }
+
+  public static TTransport getKerberosTransport(String principal, String host,
+    TTransport underlyingTransport, Map<String, String> saslProps, boolean assumeSubject)
+    throws SaslException {
+    try {
+      String[] names = principal.split("[/@]");
+      if (names.length != 3) {
+        throw new IllegalArgumentException("Kerberos principal should have 3 parts: " + principal);
+      }
+
+      if (assumeSubject) {
+        return createSubjectAssumedTransport(principal, underlyingTransport, saslProps);
+      } else {
+        HadoopThriftAuthBridge.Client authBridge =
+          ShimLoader.getHadoopThriftAuthBridge().createClientWithConf("kerberos");
+        return authBridge.createClientTransport(principal, host, "KERBEROS", null,
+                                                underlyingTransport, saslProps);
+      }
+    } catch (IOException e) {
+      throw new SaslException("Failed to open client transport", e);
+    }
+  }
+
+  public static TTransport createSubjectAssumedTransport(String principal,
+    TTransport underlyingTransport, Map<String, String> saslProps) throws IOException {
+    String[] names = principal.split("[/@]");
+    try {
+      TTransport saslTransport =
+        new TSaslClientTransport("GSSAPI", null, names[0], names[1], saslProps, null,
+          underlyingTransport);
+      return new TSubjectAssumingTransport(saslTransport);
+    } catch (SaslException se) {
+      throw new IOException("Could not instantiate SASL transport", se);
+    }
+  }
+
+  public static TTransport getTokenTransport(String tokenStr, String host,
+    TTransport underlyingTransport, Map<String, String> saslProps) throws SaslException {
+    HadoopThriftAuthBridge.Client authBridge =
+      ShimLoader.getHadoopThriftAuthBridge().createClientWithConf("kerberos");
+
+    try {
+      return authBridge.createClientTransport(null, host, "DIGEST", tokenStr, underlyingTransport,
+                                              saslProps);
+    } catch (IOException e) {
+      throw new SaslException("Failed to open client transport", e);
+    }
+  }
+
+  private KerberosSaslHelper() {
+    throw new UnsupportedOperationException("Can't initialize class");
+  }
+
+  private static class CLIServiceProcessorFactory extends TProcessorFactory {
+
+    private final ThriftCLIService service;
+    private final Server saslServer;
+
+    CLIServiceProcessorFactory(Server saslServer, ThriftCLIService service) {
+      super(null);
+      this.service = service;
+      this.saslServer = saslServer;
+    }
+
+    @Override
+    public TProcessor getProcessor(TTransport trans) {
+      TProcessor sqlProcessor = new TCLIService.Processor<Iface>(service);
+      return saslServer.wrapNonAssumingProcessor(sqlProcessor);
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
new file mode 100644
index 000000000000..afc144199f1e
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.auth;
+
+import java.io.IOException;
+import java.security.Security;
+import java.util.HashMap;
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import javax.security.auth.callback.UnsupportedCallbackException;
+import javax.security.auth.login.LoginException;
+import javax.security.sasl.AuthenticationException;
+import javax.security.sasl.AuthorizeCallback;
+import javax.security.sasl.SaslException;
+
+import org.apache.hive.service.auth.AuthenticationProviderFactory.AuthMethods;
+import org.apache.hive.service.auth.PlainSaslServer.SaslPlainProvider;
+import org.apache.hive.service.cli.thrift.TCLIService.Iface;
+import org.apache.hive.service.cli.thrift.ThriftCLIService;
+import org.apache.thrift.TProcessor;
+import org.apache.thrift.TProcessorFactory;
+import org.apache.thrift.transport.TSaslClientTransport;
+import org.apache.thrift.transport.TSaslServerTransport;
+import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportFactory;
+
+public final class PlainSaslHelper {
+
+  public static TProcessorFactory getPlainProcessorFactory(ThriftCLIService service) {
+    return new SQLPlainProcessorFactory(service);
+  }
+
+  // Register Plain SASL server provider
+  static {
+    Security.addProvider(new SaslPlainProvider());
+  }
+
+  public static TTransportFactory getPlainTransportFactory(String authTypeStr)
+    throws LoginException {
+    TSaslServerTransport.Factory saslFactory = new TSaslServerTransport.Factory();
+    try {
+      saslFactory.addServerDefinition("PLAIN", authTypeStr, null, new HashMap<String, String>(),
+        new PlainServerCallbackHandler(authTypeStr));
+    } catch (AuthenticationException e) {
+      throw new LoginException("Error setting callback handler" + e);
+    }
+    return saslFactory;
+  }
+
+  public static TTransport getPlainTransport(String username, String password,
+    TTransport underlyingTransport) throws SaslException {
+    return new TSaslClientTransport("PLAIN", null, null, null, new HashMap<String, String>(),
+      new PlainCallbackHandler(username, password), underlyingTransport);
+  }
+
+  private PlainSaslHelper() {
+    throw new UnsupportedOperationException("Can't initialize class");
+  }
+
+  private static final class PlainServerCallbackHandler implements CallbackHandler {
+
+    private final AuthMethods authMethod;
+
+    PlainServerCallbackHandler(String authMethodStr) throws AuthenticationException {
+      authMethod = AuthMethods.getValidAuthMethod(authMethodStr);
+    }
+
+    @Override
+    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
+      String username = null;
+      String password = null;
+      AuthorizeCallback ac = null;
+
+      for (Callback callback : callbacks) {
+        if (callback instanceof NameCallback) {
+          NameCallback nc = (NameCallback) callback;
+          username = nc.getName();
+        } else if (callback instanceof PasswordCallback) {
+          PasswordCallback pc = (PasswordCallback) callback;
+          password = new String(pc.getPassword());
+        } else if (callback instanceof AuthorizeCallback) {
+          ac = (AuthorizeCallback) callback;
+        } else {
+          throw new UnsupportedCallbackException(callback);
+        }
+      }
+      PasswdAuthenticationProvider provider =
+        AuthenticationProviderFactory.getAuthenticationProvider(authMethod);
+      provider.Authenticate(username, password);
+      if (ac != null) {
+        ac.setAuthorized(true);
+      }
+    }
+  }
+
+  public static class PlainCallbackHandler implements CallbackHandler {
+
+    private final String username;
+    private final String password;
+
+    public PlainCallbackHandler(String username, String password) {
+      this.username = username;
+      this.password = password;
+    }
+
+    @Override
+    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
+      for (Callback callback : callbacks) {
+        if (callback instanceof NameCallback) {
+          NameCallback nameCallback = (NameCallback) callback;
+          nameCallback.setName(username);
+        } else if (callback instanceof PasswordCallback) {
+          PasswordCallback passCallback = (PasswordCallback) callback;
+          passCallback.setPassword(password.toCharArray());
+        } else {
+          throw new UnsupportedCallbackException(callback);
+        }
+      }
+    }
+  }
+
+  private static final class SQLPlainProcessorFactory extends TProcessorFactory {
+
+    private final ThriftCLIService service;
+
+    SQLPlainProcessorFactory(ThriftCLIService service) {
+      super(null);
+      this.service = service;
+    }
+
+    @Override
+    public TProcessor getProcessor(TTransport trans) {
+      return new TSetIpAddressProcessor<Iface>(service);
+    }
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
new file mode 100644
index 000000000000..9a61ad49942c
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.auth;
+
+import org.apache.hive.service.cli.thrift.TCLIService;
+import org.apache.hive.service.cli.thrift.TCLIService.Iface;
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.transport.TSaslClientTransport;
+import org.apache.thrift.transport.TSaslServerTransport;
+import org.apache.thrift.transport.TSocket;
+import org.apache.thrift.transport.TTransport;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class is responsible for setting the ipAddress for operations executed via HiveServer2.
+ *
+ * - IP address is only set for operations that calls listeners with hookContext
+ * - IP address is only set if the underlying transport mechanism is socket
+ *
+ * @see org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext
+ */
+public class TSetIpAddressProcessor<I extends Iface> extends TCLIService.Processor<Iface> {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(TSetIpAddressProcessor.class.getName());
+
+  public TSetIpAddressProcessor(Iface iface) {
+    super(iface);
+  }
+
+  @Override
+  public boolean process(final TProtocol in, final TProtocol out) throws TException {
+    setIpAddress(in);
+    setUserName(in);
+    try {
+      return super.process(in, out);
+    } finally {
+      THREAD_LOCAL_USER_NAME.remove();
+      THREAD_LOCAL_IP_ADDRESS.remove();
+    }
+  }
+
+  private void setUserName(final TProtocol in) {
+    TTransport transport = in.getTransport();
+    if (transport instanceof TSaslServerTransport) {
+      String userName = ((TSaslServerTransport) transport).getSaslServer().getAuthorizationID();
+      THREAD_LOCAL_USER_NAME.set(userName);
+    }
+  }
+
+  protected void setIpAddress(final TProtocol in) {
+    TTransport transport = in.getTransport();
+    TSocket tSocket = getUnderlyingSocketFromTransport(transport);
+    if (tSocket == null) {
+      LOGGER.warn("Unknown Transport, cannot determine ipAddress");
+    } else {
+      THREAD_LOCAL_IP_ADDRESS.set(tSocket.getSocket().getInetAddress().getHostAddress());
+    }
+  }
+
+  private TSocket getUnderlyingSocketFromTransport(TTransport transport) {
+    while (transport != null) {
+      if (transport instanceof TSaslServerTransport) {
+        transport = ((TSaslServerTransport) transport).getUnderlyingTransport();
+      }
+      if (transport instanceof TSaslClientTransport) {
+        transport = ((TSaslClientTransport) transport).getUnderlyingTransport();
+      }
+      if (transport instanceof TSocket) {
+        return (TSocket) transport;
+      }
+    }
+    return null;
+  }
+
+  private static final ThreadLocal<String> THREAD_LOCAL_IP_ADDRESS = new ThreadLocal<String>() {
+    @Override
+    protected synchronized String initialValue() {
+      return null;
+    }
+  };
+
+  private static final ThreadLocal<String> THREAD_LOCAL_USER_NAME = new ThreadLocal<String>() {
+    @Override
+    protected synchronized String initialValue() {
+      return null;
+    }
+  };
+
+  public static String getUserIpAddress() {
+    return THREAD_LOCAL_IP_ADDRESS.get();
+  }
+
+  public static String getUserName() {
+    return THREAD_LOCAL_USER_NAME.get();
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/CLIService.java
new file mode 100644
index 000000000000..791ddcbd2c5b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/CLIService.java
@@ -0,0 +1,507 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+import javax.security.auth.login.LoginException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.shims.Utils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.service.CompositeService;
+import org.apache.hive.service.ServiceException;
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.cli.operation.Operation;
+import org.apache.hive.service.cli.session.SessionManager;
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+import org.apache.hive.service.server.HiveServer2;
+
+/**
+ * CLIService.
+ *
+ */
+public class CLIService extends CompositeService implements ICLIService {
+
+  public static final TProtocolVersion SERVER_VERSION;
+
+  static {
+    TProtocolVersion[] protocols = TProtocolVersion.values();
+    SERVER_VERSION = protocols[protocols.length - 1];
+  }
+
+  private final Log LOG = LogFactory.getLog(CLIService.class.getName());
+
+  private HiveConf hiveConf;
+  private SessionManager sessionManager;
+  private UserGroupInformation serviceUGI;
+  private UserGroupInformation httpUGI;
+  // The HiveServer2 instance running this service
+  private final HiveServer2 hiveServer2;
+
+  public CLIService(HiveServer2 hiveServer2) {
+    super(CLIService.class.getSimpleName());
+    this.hiveServer2 = hiveServer2;
+  }
+
+  @Override
+  public synchronized void init(HiveConf hiveConf) {
+    this.hiveConf = hiveConf;
+    sessionManager = new SessionManager(hiveServer2);
+    addService(sessionManager);
+    //  If the hadoop cluster is secure, do a kerberos login for the service from the keytab
+    if (UserGroupInformation.isSecurityEnabled()) {
+      try {
+        HiveAuthFactory.loginFromKeytab(hiveConf);
+        this.serviceUGI = Utils.getUGI();
+      } catch (IOException e) {
+        throw new ServiceException("Unable to login to kerberos with given principal/keytab", e);
+      } catch (LoginException e) {
+        throw new ServiceException("Unable to login to kerberos with given principal/keytab", e);
+      }
+
+      // Also try creating a UGI object for the SPNego principal
+      String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL);
+      String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB);
+      if (principal.isEmpty() || keyTabFile.isEmpty()) {
+        LOG.info("SPNego httpUGI not created, spNegoPrincipal: " + principal +
+            ", ketabFile: " + keyTabFile);
+      } else {
+        try {
+          this.httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf);
+          LOG.info("SPNego httpUGI successfully created.");
+        } catch (IOException e) {
+          LOG.warn("SPNego httpUGI creation failed: ", e);
+        }
+      }
+    }
+    // creates connection to HMS and thus *must* occur after kerberos login above
+    try {
+      applyAuthorizationConfigPolicy(hiveConf);
+    } catch (Exception e) {
+      throw new RuntimeException("Error applying authorization policy on hive configuration: "
+          + e.getMessage(), e);
+    }
+    setupBlockedUdfs();
+    super.init(hiveConf);
+  }
+
+  private void applyAuthorizationConfigPolicy(HiveConf newHiveConf) throws HiveException,
+      MetaException {
+    // authorization setup using SessionState should be revisited eventually, as
+    // authorization and authentication are not session specific settings
+    SessionState ss = new SessionState(newHiveConf);
+    ss.setIsHiveServerQuery(true);
+    SessionState.start(ss);
+    ss.applyAuthorizationPolicy();
+  }
+
+  private void setupBlockedUdfs() {
+    FunctionRegistry.setupPermissionsForBuiltinUDFs(
+        hiveConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_WHITELIST),
+        hiveConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_BLACKLIST));
+  }
+
+  public UserGroupInformation getServiceUGI() {
+    return this.serviceUGI;
+  }
+
+  public UserGroupInformation getHttpUGI() {
+    return this.httpUGI;
+  }
+
+  @Override
+  public synchronized void start() {
+    super.start();
+    // Initialize and test a connection to the metastore
+    IMetaStoreClient metastoreClient = null;
+    try {
+      metastoreClient = new HiveMetaStoreClient(hiveConf);
+      metastoreClient.getDatabases("default");
+    } catch (Exception e) {
+      throw new ServiceException("Unable to connect to MetaStore!", e);
+    }
+    finally {
+      if (metastoreClient != null) {
+        metastoreClient.close();
+      }
+    }
+  }
+
+  @Override
+  public synchronized void stop() {
+    super.stop();
+  }
+
+  /**
+   * @deprecated  Use {@link #openSession(TProtocolVersion, String, String, String, Map)}
+   */
+  @Deprecated
+  public SessionHandle openSession(TProtocolVersion protocol, String username, String password,
+      Map<String, String> configuration) throws HiveSQLException {
+    SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, null, configuration, false, null);
+    LOG.debug(sessionHandle + ": openSession()");
+    return sessionHandle;
+  }
+
+  /**
+   * @deprecated  Use {@link #openSessionWithImpersonation(TProtocolVersion, String, String, String, Map, String)}
+   */
+  @Deprecated
+  public SessionHandle openSessionWithImpersonation(TProtocolVersion protocol, String username,
+      String password, Map<String, String> configuration, String delegationToken)
+          throws HiveSQLException {
+    SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, null, configuration,
+        true, delegationToken);
+    LOG.debug(sessionHandle + ": openSessionWithImpersonation()");
+    return sessionHandle;
+  }
+
+  public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress,
+      Map<String, String> configuration) throws HiveSQLException {
+    SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, ipAddress, configuration, false, null);
+    LOG.debug(sessionHandle + ": openSession()");
+    return sessionHandle;
+  }
+
+  public SessionHandle openSessionWithImpersonation(TProtocolVersion protocol, String username,
+      String password, String ipAddress, Map<String, String> configuration, String delegationToken)
+          throws HiveSQLException {
+    SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, ipAddress, configuration,
+        true, delegationToken);
+    LOG.debug(sessionHandle + ": openSession()");
+    return sessionHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map)
+   */
+  @Override
+  public SessionHandle openSession(String username, String password, Map<String, String> configuration)
+      throws HiveSQLException {
+    SessionHandle sessionHandle = sessionManager.openSession(SERVER_VERSION, username, password, null, configuration, false, null);
+    LOG.debug(sessionHandle + ": openSession()");
+    return sessionHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map)
+   */
+  @Override
+  public SessionHandle openSessionWithImpersonation(String username, String password, Map<String, String> configuration,
+      String delegationToken) throws HiveSQLException {
+    SessionHandle sessionHandle = sessionManager.openSession(SERVER_VERSION, username, password, null, configuration,
+        true, delegationToken);
+    LOG.debug(sessionHandle + ": openSession()");
+    return sessionHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public void closeSession(SessionHandle sessionHandle)
+      throws HiveSQLException {
+    sessionManager.closeSession(sessionHandle);
+    LOG.debug(sessionHandle + ": closeSession()");
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List)
+   */
+  @Override
+  public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType getInfoType)
+      throws HiveSQLException {
+    GetInfoValue infoValue = sessionManager.getSession(sessionHandle)
+        .getInfo(getInfoType);
+    LOG.debug(sessionHandle + ": getInfo()");
+    return infoValue;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#executeStatement(org.apache.hive.service.cli.SessionHandle,
+   *  java.lang.String, java.util.Map)
+   */
+  @Override
+  public OperationHandle executeStatement(SessionHandle sessionHandle, String statement,
+      Map<String, String> confOverlay)
+          throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .executeStatement(statement, confOverlay);
+    LOG.debug(sessionHandle + ": executeStatement()");
+    return opHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#executeStatementAsync(org.apache.hive.service.cli.SessionHandle,
+   *  java.lang.String, java.util.Map)
+   */
+  @Override
+  public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement,
+      Map<String, String> confOverlay) throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .executeStatementAsync(statement, confOverlay);
+    LOG.debug(sessionHandle + ": executeStatementAsync()");
+    return opHandle;
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getTypeInfo(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getTypeInfo(SessionHandle sessionHandle)
+      throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .getTypeInfo();
+    LOG.debug(sessionHandle + ": getTypeInfo()");
+    return opHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getCatalogs(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getCatalogs(SessionHandle sessionHandle)
+      throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .getCatalogs();
+    LOG.debug(sessionHandle + ": getCatalogs()");
+    return opHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String)
+   */
+  @Override
+  public OperationHandle getSchemas(SessionHandle sessionHandle,
+      String catalogName, String schemaName)
+          throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .getSchemas(catalogName, schemaName);
+    LOG.debug(sessionHandle + ": getSchemas()");
+    return opHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List)
+   */
+  @Override
+  public OperationHandle getTables(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String tableName, List<String> tableTypes)
+          throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .getTables(catalogName, schemaName, tableName, tableTypes);
+    LOG.debug(sessionHandle + ": getTables()");
+    return opHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getTableTypes(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getTableTypes(SessionHandle sessionHandle)
+      throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .getTableTypes();
+    LOG.debug(sessionHandle + ": getTableTypes()");
+    return opHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getColumns(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getColumns(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String tableName, String columnName)
+          throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .getColumns(catalogName, schemaName, tableName, columnName);
+    LOG.debug(sessionHandle + ": getColumns()");
+    return opHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getFunctions(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getFunctions(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String functionName)
+          throws HiveSQLException {
+    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
+        .getFunctions(catalogName, schemaName, functionName);
+    LOG.debug(sessionHandle + ": getFunctions()");
+    return opHandle;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getOperationStatus(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public OperationStatus getOperationStatus(OperationHandle opHandle)
+      throws HiveSQLException {
+    Operation operation = sessionManager.getOperationManager().getOperation(opHandle);
+    /**
+     * If this is a background operation run asynchronously,
+     * we block for a configured duration, before we return
+     * (duration: HIVE_SERVER2_LONG_POLLING_TIMEOUT).
+     * However, if the background operation is complete, we return immediately.
+     */
+    if (operation.shouldRunAsync()) {
+      HiveConf conf = operation.getParentSession().getHiveConf();
+      long timeout = HiveConf.getTimeVar(conf,
+          HiveConf.ConfVars.HIVE_SERVER2_LONG_POLLING_TIMEOUT, TimeUnit.MILLISECONDS);
+      try {
+        operation.getBackgroundHandle().get(timeout, TimeUnit.MILLISECONDS);
+      } catch (TimeoutException e) {
+        // No Op, return to the caller since long polling timeout has expired
+        LOG.trace(opHandle + ": Long polling timed out");
+      } catch (CancellationException e) {
+        // The background operation thread was cancelled
+        LOG.trace(opHandle + ": The background operation was cancelled", e);
+      } catch (ExecutionException e) {
+        // The background operation thread was aborted
+        LOG.warn(opHandle + ": The background operation was aborted", e);
+      } catch (InterruptedException e) {
+        // No op, this thread was interrupted
+        // In this case, the call might return sooner than long polling timeout
+      }
+    }
+    OperationStatus opStatus = operation.getStatus();
+    LOG.debug(opHandle + ": getOperationStatus()");
+    return opStatus;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#cancelOperation(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public void cancelOperation(OperationHandle opHandle)
+      throws HiveSQLException {
+    sessionManager.getOperationManager().getOperation(opHandle)
+    .getParentSession().cancelOperation(opHandle);
+    LOG.debug(opHandle + ": cancelOperation()");
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#closeOperation(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public void closeOperation(OperationHandle opHandle)
+      throws HiveSQLException {
+    sessionManager.getOperationManager().getOperation(opHandle)
+    .getParentSession().closeOperation(opHandle);
+    LOG.debug(opHandle + ": closeOperation");
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public TableSchema getResultSetMetadata(OperationHandle opHandle)
+      throws HiveSQLException {
+    TableSchema tableSchema = sessionManager.getOperationManager()
+        .getOperation(opHandle).getParentSession().getResultSetMetadata(opHandle);
+    LOG.debug(opHandle + ": getResultSetMetadata()");
+    return tableSchema;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#fetchResults(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public RowSet fetchResults(OperationHandle opHandle)
+      throws HiveSQLException {
+    return fetchResults(opHandle, Operation.DEFAULT_FETCH_ORIENTATION,
+        Operation.DEFAULT_FETCH_MAX_ROWS, FetchType.QUERY_OUTPUT);
+  }
+
+  @Override
+  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+                             long maxRows, FetchType fetchType) throws HiveSQLException {
+    RowSet rowSet = sessionManager.getOperationManager().getOperation(opHandle)
+        .getParentSession().fetchResults(opHandle, orientation, maxRows, fetchType);
+    LOG.debug(opHandle + ": fetchResults()");
+    return rowSet;
+  }
+
+  // obtain delegation token for the give user from metastore
+  public synchronized String getDelegationTokenFromMetaStore(String owner)
+      throws HiveSQLException, UnsupportedOperationException, LoginException, IOException {
+    if (!hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL) ||
+        !hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) {
+      throw new UnsupportedOperationException(
+          "delegation token is can only be obtained for a secure remote metastore");
+    }
+
+    try {
+      Hive.closeCurrent();
+      return Hive.get(hiveConf).getDelegationToken(owner, owner);
+    } catch (HiveException e) {
+      if (e.getCause() instanceof UnsupportedOperationException) {
+        throw (UnsupportedOperationException)e.getCause();
+      } else {
+        throw new HiveSQLException("Error connect metastore to setup impersonation", e);
+      }
+    }
+  }
+
+  @Override
+  public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String owner, String renewer) throws HiveSQLException {
+    String delegationToken = sessionManager.getSession(sessionHandle)
+        .getDelegationToken(authFactory, owner, renewer);
+    LOG.info(sessionHandle  + ": getDelegationToken()");
+    return delegationToken;
+  }
+
+  @Override
+  public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String tokenStr) throws HiveSQLException {
+    sessionManager.getSession(sessionHandle).cancelDelegationToken(authFactory, tokenStr);
+    LOG.info(sessionHandle  + ": cancelDelegationToken()");
+  }
+
+  @Override
+  public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String tokenStr) throws HiveSQLException {
+    sessionManager.getSession(sessionHandle).renewDelegationToken(authFactory, tokenStr);
+    LOG.info(sessionHandle  + ": renewDelegationToken()");
+  }
+
+  public SessionManager getSessionManager() {
+    return sessionManager;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Column.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Column.java
new file mode 100644
index 000000000000..26d0f718f383
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Column.java
@@ -0,0 +1,423 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.nio.ByteBuffer;
+import java.util.AbstractList;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.List;
+
+import com.google.common.primitives.Booleans;
+import com.google.common.primitives.Bytes;
+import com.google.common.primitives.Doubles;
+import com.google.common.primitives.Ints;
+import com.google.common.primitives.Longs;
+import com.google.common.primitives.Shorts;
+import org.apache.hive.service.cli.thrift.TBinaryColumn;
+import org.apache.hive.service.cli.thrift.TBoolColumn;
+import org.apache.hive.service.cli.thrift.TByteColumn;
+import org.apache.hive.service.cli.thrift.TColumn;
+import org.apache.hive.service.cli.thrift.TDoubleColumn;
+import org.apache.hive.service.cli.thrift.TI16Column;
+import org.apache.hive.service.cli.thrift.TI32Column;
+import org.apache.hive.service.cli.thrift.TI64Column;
+import org.apache.hive.service.cli.thrift.TStringColumn;
+
+/**
+ * Column.
+ */
+public class Column extends AbstractList {
+
+  private static final int DEFAULT_SIZE = 100;
+
+  private final Type type;
+
+  private BitSet nulls;
+
+  private int size;
+  private boolean[] boolVars;
+  private byte[] byteVars;
+  private short[] shortVars;
+  private int[] intVars;
+  private long[] longVars;
+  private double[] doubleVars;
+  private List<String> stringVars;
+  private List<ByteBuffer> binaryVars;
+
+  public Column(Type type, BitSet nulls, Object values) {
+    this.type = type;
+    this.nulls = nulls;
+    if (type == Type.BOOLEAN_TYPE) {
+      boolVars = (boolean[]) values;
+      size = boolVars.length;
+    } else if (type == Type.TINYINT_TYPE) {
+      byteVars = (byte[]) values;
+      size = byteVars.length;
+    } else if (type == Type.SMALLINT_TYPE) {
+      shortVars = (short[]) values;
+      size = shortVars.length;
+    } else if (type == Type.INT_TYPE) {
+      intVars = (int[]) values;
+      size = intVars.length;
+    } else if (type == Type.BIGINT_TYPE) {
+      longVars = (long[]) values;
+      size = longVars.length;
+    } else if (type == Type.DOUBLE_TYPE) {
+      doubleVars = (double[]) values;
+      size = doubleVars.length;
+    } else if (type == Type.BINARY_TYPE) {
+      binaryVars = (List<ByteBuffer>) values;
+      size = binaryVars.size();
+    } else if (type == Type.STRING_TYPE) {
+      stringVars = (List<String>) values;
+      size = stringVars.size();
+    } else {
+      throw new IllegalStateException("invalid union object");
+    }
+  }
+
+  public Column(Type type) {
+    nulls = new BitSet();
+    switch (type) {
+      case BOOLEAN_TYPE:
+        boolVars = new boolean[DEFAULT_SIZE];
+        break;
+      case TINYINT_TYPE:
+        byteVars = new byte[DEFAULT_SIZE];
+        break;
+      case SMALLINT_TYPE:
+        shortVars = new short[DEFAULT_SIZE];
+        break;
+      case INT_TYPE:
+        intVars = new int[DEFAULT_SIZE];
+        break;
+      case BIGINT_TYPE:
+        longVars = new long[DEFAULT_SIZE];
+        break;
+      case FLOAT_TYPE:
+      case DOUBLE_TYPE:
+        type = Type.DOUBLE_TYPE;
+        doubleVars = new double[DEFAULT_SIZE];
+        break;
+      case BINARY_TYPE:
+        binaryVars = new ArrayList<ByteBuffer>();
+        break;
+      default:
+        type = Type.STRING_TYPE;
+        stringVars = new ArrayList<String>();
+    }
+    this.type = type;
+  }
+
+  public Column(TColumn colValues) {
+    if (colValues.isSetBoolVal()) {
+      type = Type.BOOLEAN_TYPE;
+      nulls = toBitset(colValues.getBoolVal().getNulls());
+      boolVars = Booleans.toArray(colValues.getBoolVal().getValues());
+      size = boolVars.length;
+    } else if (colValues.isSetByteVal()) {
+      type = Type.TINYINT_TYPE;
+      nulls = toBitset(colValues.getByteVal().getNulls());
+      byteVars = Bytes.toArray(colValues.getByteVal().getValues());
+      size = byteVars.length;
+    } else if (colValues.isSetI16Val()) {
+      type = Type.SMALLINT_TYPE;
+      nulls = toBitset(colValues.getI16Val().getNulls());
+      shortVars = Shorts.toArray(colValues.getI16Val().getValues());
+      size = shortVars.length;
+    } else if (colValues.isSetI32Val()) {
+      type = Type.INT_TYPE;
+      nulls = toBitset(colValues.getI32Val().getNulls());
+      intVars = Ints.toArray(colValues.getI32Val().getValues());
+      size = intVars.length;
+    } else if (colValues.isSetI64Val()) {
+      type = Type.BIGINT_TYPE;
+      nulls = toBitset(colValues.getI64Val().getNulls());
+      longVars = Longs.toArray(colValues.getI64Val().getValues());
+      size = longVars.length;
+    } else if (colValues.isSetDoubleVal()) {
+      type = Type.DOUBLE_TYPE;
+      nulls = toBitset(colValues.getDoubleVal().getNulls());
+      doubleVars = Doubles.toArray(colValues.getDoubleVal().getValues());
+      size = doubleVars.length;
+    } else if (colValues.isSetBinaryVal()) {
+      type = Type.BINARY_TYPE;
+      nulls = toBitset(colValues.getBinaryVal().getNulls());
+      binaryVars = colValues.getBinaryVal().getValues();
+      size = binaryVars.size();
+    } else if (colValues.isSetStringVal()) {
+      type = Type.STRING_TYPE;
+      nulls = toBitset(colValues.getStringVal().getNulls());
+      stringVars = colValues.getStringVal().getValues();
+      size = stringVars.size();
+    } else {
+      throw new IllegalStateException("invalid union object");
+    }
+  }
+
+  public Column extractSubset(int start, int end) {
+    BitSet subNulls = nulls.get(start, end);
+    if (type == Type.BOOLEAN_TYPE) {
+      Column subset = new Column(type, subNulls, Arrays.copyOfRange(boolVars, start, end));
+      boolVars = Arrays.copyOfRange(boolVars, end, size);
+      nulls = nulls.get(start, size);
+      size = boolVars.length;
+      return subset;
+    }
+    if (type == Type.TINYINT_TYPE) {
+      Column subset = new Column(type, subNulls, Arrays.copyOfRange(byteVars, start, end));
+      byteVars = Arrays.copyOfRange(byteVars, end, size);
+      nulls = nulls.get(start, size);
+      size = byteVars.length;
+      return subset;
+    }
+    if (type == Type.SMALLINT_TYPE) {
+      Column subset = new Column(type, subNulls, Arrays.copyOfRange(shortVars, start, end));
+      shortVars = Arrays.copyOfRange(shortVars, end, size);
+      nulls = nulls.get(start, size);
+      size = shortVars.length;
+      return subset;
+    }
+    if (type == Type.INT_TYPE) {
+      Column subset = new Column(type, subNulls, Arrays.copyOfRange(intVars, start, end));
+      intVars = Arrays.copyOfRange(intVars, end, size);
+      nulls = nulls.get(start, size);
+      size = intVars.length;
+      return subset;
+    }
+    if (type == Type.BIGINT_TYPE) {
+      Column subset = new Column(type, subNulls, Arrays.copyOfRange(longVars, start, end));
+      longVars = Arrays.copyOfRange(longVars, end, size);
+      nulls = nulls.get(start, size);
+      size = longVars.length;
+      return subset;
+    }
+    if (type == Type.DOUBLE_TYPE) {
+      Column subset = new Column(type, subNulls, Arrays.copyOfRange(doubleVars, start, end));
+      doubleVars = Arrays.copyOfRange(doubleVars, end, size);
+      nulls = nulls.get(start, size);
+      size = doubleVars.length;
+      return subset;
+    }
+    if (type == Type.BINARY_TYPE) {
+      Column subset = new Column(type, subNulls, binaryVars.subList(start, end));
+      binaryVars = binaryVars.subList(end, binaryVars.size());
+      nulls = nulls.get(start, size);
+      size = binaryVars.size();
+      return subset;
+    }
+    if (type == Type.STRING_TYPE) {
+      Column subset = new Column(type, subNulls, stringVars.subList(start, end));
+      stringVars = stringVars.subList(end, stringVars.size());
+      nulls = nulls.get(start, size);
+      size = stringVars.size();
+      return subset;
+    }
+    throw new IllegalStateException("invalid union object");
+  }
+
+  private static final byte[] MASKS = new byte[] {
+      0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (byte)0x80
+  };
+
+  private static BitSet toBitset(byte[] nulls) {
+    BitSet bitset = new BitSet();
+    int bits = nulls.length * 8;
+    for (int i = 0; i < bits; i++) {
+      bitset.set(i, (nulls[i / 8] & MASKS[i % 8]) != 0);
+    }
+    return bitset;
+  }
+
+  private static byte[] toBinary(BitSet bitset) {
+    byte[] nulls = new byte[1 + (bitset.length() / 8)];
+    for (int i = 0; i < bitset.length(); i++) {
+      nulls[i / 8] |= bitset.get(i) ? MASKS[i % 8] : 0;
+    }
+    return nulls;
+  }
+
+  public Type getType() {
+    return type;
+  }
+
+  @Override
+  public Object get(int index) {
+    if (nulls.get(index)) {
+      return null;
+    }
+    switch (type) {
+      case BOOLEAN_TYPE:
+        return boolVars[index];
+      case TINYINT_TYPE:
+        return byteVars[index];
+      case SMALLINT_TYPE:
+        return shortVars[index];
+      case INT_TYPE:
+        return intVars[index];
+      case BIGINT_TYPE:
+        return longVars[index];
+      case DOUBLE_TYPE:
+        return doubleVars[index];
+      case STRING_TYPE:
+        return stringVars.get(index);
+      case BINARY_TYPE:
+        return binaryVars.get(index).array();
+    }
+    return null;
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+
+  public TColumn toTColumn() {
+    TColumn value = new TColumn();
+    ByteBuffer nullMasks = ByteBuffer.wrap(toBinary(nulls));
+    switch (type) {
+      case BOOLEAN_TYPE:
+        value.setBoolVal(new TBoolColumn(Booleans.asList(Arrays.copyOfRange(boolVars, 0, size)), nullMasks));
+        break;
+      case TINYINT_TYPE:
+        value.setByteVal(new TByteColumn(Bytes.asList(Arrays.copyOfRange(byteVars, 0, size)), nullMasks));
+        break;
+      case SMALLINT_TYPE:
+        value.setI16Val(new TI16Column(Shorts.asList(Arrays.copyOfRange(shortVars, 0, size)), nullMasks));
+        break;
+      case INT_TYPE:
+        value.setI32Val(new TI32Column(Ints.asList(Arrays.copyOfRange(intVars, 0, size)), nullMasks));
+        break;
+      case BIGINT_TYPE:
+        value.setI64Val(new TI64Column(Longs.asList(Arrays.copyOfRange(longVars, 0, size)), nullMasks));
+        break;
+      case DOUBLE_TYPE:
+        value.setDoubleVal(new TDoubleColumn(Doubles.asList(Arrays.copyOfRange(doubleVars, 0, size)), nullMasks));
+        break;
+      case STRING_TYPE:
+        value.setStringVal(new TStringColumn(stringVars, nullMasks));
+        break;
+      case BINARY_TYPE:
+        value.setBinaryVal(new TBinaryColumn(binaryVars, nullMasks));
+        break;
+    }
+    return value;
+  }
+
+  private static final ByteBuffer EMPTY_BINARY = ByteBuffer.allocate(0);
+  private static final String EMPTY_STRING = "";
+
+  public void addValue(Type type, Object field) {
+    switch (type) {
+      case BOOLEAN_TYPE:
+        nulls.set(size, field == null);
+        boolVars()[size] = field == null ? true : (Boolean)field;
+        break;
+      case TINYINT_TYPE:
+        nulls.set(size, field == null);
+        byteVars()[size] = field == null ? 0 : (Byte) field;
+        break;
+      case SMALLINT_TYPE:
+        nulls.set(size, field == null);
+        shortVars()[size] = field == null ? 0 : (Short)field;
+        break;
+      case INT_TYPE:
+        nulls.set(size, field == null);
+        intVars()[size] = field == null ? 0 : (Integer)field;
+        break;
+      case BIGINT_TYPE:
+        nulls.set(size, field == null);
+        longVars()[size] = field == null ? 0 : (Long)field;
+        break;
+      case FLOAT_TYPE:
+        nulls.set(size, field == null);
+        doubleVars()[size] = field == null ? 0 : Double.valueOf(field.toString());
+        break;
+      case DOUBLE_TYPE:
+        nulls.set(size, field == null);
+        doubleVars()[size] = field == null ? 0 : (Double)field;
+        break;
+      case BINARY_TYPE:
+        nulls.set(binaryVars.size(), field == null);
+        binaryVars.add(field == null ? EMPTY_BINARY : ByteBuffer.wrap((byte[])field));
+        break;
+      default:
+        nulls.set(stringVars.size(), field == null);
+        stringVars.add(field == null ? EMPTY_STRING : String.valueOf(field));
+        break;
+    }
+    size++;
+  }
+
+  private boolean[] boolVars() {
+    if (boolVars.length == size) {
+      boolean[] newVars = new boolean[size << 1];
+      System.arraycopy(boolVars, 0, newVars, 0, size);
+      return boolVars = newVars;
+    }
+    return boolVars;
+  }
+
+  private byte[] byteVars() {
+    if (byteVars.length == size) {
+      byte[] newVars = new byte[size << 1];
+      System.arraycopy(byteVars, 0, newVars, 0, size);
+      return byteVars = newVars;
+    }
+    return byteVars;
+  }
+
+  private short[] shortVars() {
+    if (shortVars.length == size) {
+      short[] newVars = new short[size << 1];
+      System.arraycopy(shortVars, 0, newVars, 0, size);
+      return shortVars = newVars;
+    }
+    return shortVars;
+  }
+
+  private int[] intVars() {
+    if (intVars.length == size) {
+      int[] newVars = new int[size << 1];
+      System.arraycopy(intVars, 0, newVars, 0, size);
+      return intVars = newVars;
+    }
+    return intVars;
+  }
+
+  private long[] longVars() {
+    if (longVars.length == size) {
+      long[] newVars = new long[size << 1];
+      System.arraycopy(longVars, 0, newVars, 0, size);
+      return longVars = newVars;
+    }
+    return longVars;
+  }
+
+  private double[] doubleVars() {
+    if (doubleVars.length == size) {
+      double[] newVars = new double[size << 1];
+      System.arraycopy(doubleVars, 0, newVars, 0, size);
+      return doubleVars = newVars;
+    }
+    return doubleVars;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
new file mode 100644
index 000000000000..47a582e2223e
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
@@ -0,0 +1,149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hive.service.cli.thrift.TColumn;
+import org.apache.hive.service.cli.thrift.TRow;
+import org.apache.hive.service.cli.thrift.TRowSet;
+
+/**
+ * ColumnBasedSet.
+ */
+public class ColumnBasedSet implements RowSet {
+
+  private long startOffset;
+
+  private final Type[] types; // non-null only for writing (server-side)
+  private final List<Column> columns;
+
+  public ColumnBasedSet(TableSchema schema) {
+    types = schema.toTypes();
+    columns = new ArrayList<Column>();
+    for (ColumnDescriptor colDesc : schema.getColumnDescriptors()) {
+      columns.add(new Column(colDesc.getType()));
+    }
+  }
+
+  public ColumnBasedSet(TRowSet tRowSet) {
+    types = null;
+    columns = new ArrayList<Column>();
+    for (TColumn tvalue : tRowSet.getColumns()) {
+      columns.add(new Column(tvalue));
+    }
+    startOffset = tRowSet.getStartRowOffset();
+  }
+
+  private ColumnBasedSet(Type[] types, List<Column> columns, long startOffset) {
+    this.types = types;
+    this.columns = columns;
+    this.startOffset = startOffset;
+  }
+
+  @Override
+  public ColumnBasedSet addRow(Object[] fields) {
+    for (int i = 0; i < fields.length; i++) {
+      columns.get(i).addValue(types[i], fields[i]);
+    }
+    return this;
+  }
+
+  public List<Column> getColumns() {
+    return columns;
+  }
+
+  @Override
+  public int numColumns() {
+    return columns.size();
+  }
+
+  @Override
+  public int numRows() {
+    return columns.isEmpty() ? 0 : columns.get(0).size();
+  }
+
+  @Override
+  public ColumnBasedSet extractSubset(int maxRows) {
+    int numRows = Math.min(numRows(), maxRows);
+
+    List<Column> subset = new ArrayList<Column>();
+    for (int i = 0; i < columns.size(); i++) {
+      subset.add(columns.get(i).extractSubset(0, numRows));
+    }
+    ColumnBasedSet result = new ColumnBasedSet(types, subset, startOffset);
+    startOffset += numRows;
+    return result;
+  }
+
+  @Override
+  public long getStartOffset() {
+    return startOffset;
+  }
+
+  @Override
+  public void setStartOffset(long startOffset) {
+    this.startOffset = startOffset;
+  }
+
+  public TRowSet toTRowSet() {
+    TRowSet tRowSet = new TRowSet(startOffset, new ArrayList<TRow>());
+    for (int i = 0; i < columns.size(); i++) {
+      tRowSet.addToColumns(columns.get(i).toTColumn());
+    }
+    return tRowSet;
+  }
+
+  @Override
+  public Iterator<Object[]> iterator() {
+    return new Iterator<Object[]>() {
+
+      private int index;
+      private final Object[] convey = new Object[numColumns()];
+
+      @Override
+      public boolean hasNext() {
+        return index < numRows();
+      }
+
+      @Override
+      public Object[] next() {
+        for (int i = 0; i < columns.size(); i++) {
+          convey[i] = columns.get(i).get(index);
+        }
+        index++;
+        return convey;
+      }
+
+      @Override
+      public void remove() {
+        throw new UnsupportedOperationException("remove");
+      }
+    };
+  }
+
+  public Object[] fill(int index, Object[] convey) {
+    for (int i = 0; i < columns.size(); i++) {
+      convey[i] = columns.get(i).get(index);
+    }
+    return convey;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
new file mode 100644
index 000000000000..f0bbf1469316
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hive.service.cli.thrift.TColumnDesc;
+
+
+/**
+ * ColumnDescriptor.
+ *
+ */
+public class ColumnDescriptor {
+  private final String name;
+  private final String comment;
+  private final TypeDescriptor type;
+  // ordinal position of this column in the schema
+  private final int position;
+
+  public ColumnDescriptor(String name, String comment, TypeDescriptor type, int position) {
+    this.name = name;
+    this.comment = comment;
+    this.type = type;
+    this.position = position;
+  }
+
+  public ColumnDescriptor(TColumnDesc tColumnDesc) {
+    name = tColumnDesc.getColumnName();
+    comment = tColumnDesc.getComment();
+    type = new TypeDescriptor(tColumnDesc.getTypeDesc());
+    position = tColumnDesc.getPosition();
+  }
+
+  public ColumnDescriptor(FieldSchema column, int position) {
+    name = column.getName();
+    comment = column.getComment();
+    type = new TypeDescriptor(column.getType());
+    this.position = position;
+  }
+
+  public static ColumnDescriptor newPrimitiveColumnDescriptor(String name, String comment, Type type, int position) {
+    // Current usage looks like it's only for metadata columns, but if that changes then
+    // this method may need to require a type qualifiers aruments.
+    return new ColumnDescriptor(name, comment, new TypeDescriptor(type), position);
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public String getComment() {
+    return comment;
+  }
+
+  public TypeDescriptor getTypeDescriptor() {
+    return type;
+  }
+
+  public int getOrdinalPosition() {
+    return position;
+  }
+
+  public TColumnDesc toTColumnDesc() {
+    TColumnDesc tColumnDesc = new TColumnDesc();
+    tColumnDesc.setColumnName(name);
+    tColumnDesc.setComment(comment);
+    tColumnDesc.setTypeDesc(type.toTTypeDesc());
+    tColumnDesc.setPosition(position);
+    return tColumnDesc;
+  }
+
+  public Type getType() {
+    return type.getType();
+  }
+
+  public boolean isPrimitive() {
+    return type.getType().isPrimitiveType();
+  }
+
+  public String getTypeName() {
+    return type.getTypeName();
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnValue.java
new file mode 100644
index 000000000000..40144cfe33fa
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ColumnValue.java
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Timestamp;
+
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hive.service.cli.thrift.TBoolValue;
+import org.apache.hive.service.cli.thrift.TByteValue;
+import org.apache.hive.service.cli.thrift.TColumnValue;
+import org.apache.hive.service.cli.thrift.TDoubleValue;
+import org.apache.hive.service.cli.thrift.TI16Value;
+import org.apache.hive.service.cli.thrift.TI32Value;
+import org.apache.hive.service.cli.thrift.TI64Value;
+import org.apache.hive.service.cli.thrift.TStringValue;
+
+/**
+ * Protocols before HIVE_CLI_SERVICE_PROTOCOL_V6 (used by RowBasedSet)
+ *
+ */
+public class ColumnValue {
+
+  private static TColumnValue booleanValue(Boolean value) {
+    TBoolValue tBoolValue = new TBoolValue();
+    if (value != null) {
+      tBoolValue.setValue(value);
+    }
+    return TColumnValue.boolVal(tBoolValue);
+  }
+
+  private static TColumnValue byteValue(Byte value) {
+    TByteValue tByteValue = new TByteValue();
+    if (value != null) {
+      tByteValue.setValue(value);
+    }
+    return TColumnValue.byteVal(tByteValue);
+  }
+
+  private static TColumnValue shortValue(Short value) {
+    TI16Value tI16Value = new TI16Value();
+    if (value != null) {
+      tI16Value.setValue(value);
+    }
+    return TColumnValue.i16Val(tI16Value);
+  }
+
+  private static TColumnValue intValue(Integer value) {
+    TI32Value tI32Value = new TI32Value();
+    if (value != null) {
+      tI32Value.setValue(value);
+    }
+    return TColumnValue.i32Val(tI32Value);
+  }
+
+  private static TColumnValue longValue(Long value) {
+    TI64Value tI64Value = new TI64Value();
+    if (value != null) {
+      tI64Value.setValue(value);
+    }
+    return TColumnValue.i64Val(tI64Value);
+  }
+
+  private static TColumnValue floatValue(Float value) {
+    TDoubleValue tDoubleValue = new TDoubleValue();
+    if (value != null) {
+      tDoubleValue.setValue(value);
+    }
+    return TColumnValue.doubleVal(tDoubleValue);
+  }
+
+  private static TColumnValue doubleValue(Double value) {
+    TDoubleValue tDoubleValue = new TDoubleValue();
+    if (value != null) {
+      tDoubleValue.setValue(value);
+    }
+    return TColumnValue.doubleVal(tDoubleValue);
+  }
+
+  private static TColumnValue stringValue(String value) {
+    TStringValue tStringValue = new TStringValue();
+    if (value != null) {
+      tStringValue.setValue(value);
+    }
+    return TColumnValue.stringVal(tStringValue);
+  }
+
+  private static TColumnValue stringValue(HiveChar value) {
+    TStringValue tStringValue = new TStringValue();
+    if (value != null) {
+      tStringValue.setValue(value.toString());
+    }
+    return TColumnValue.stringVal(tStringValue);
+  }
+
+  private static TColumnValue stringValue(HiveVarchar value) {
+    TStringValue tStringValue = new TStringValue();
+    if (value != null) {
+      tStringValue.setValue(value.toString());
+    }
+    return TColumnValue.stringVal(tStringValue);
+  }
+
+  private static TColumnValue dateValue(Date value) {
+    TStringValue tStringValue = new TStringValue();
+    if (value != null) {
+      tStringValue.setValue(value.toString());
+    }
+    return new TColumnValue(TColumnValue.stringVal(tStringValue));
+  }
+
+  private static TColumnValue timestampValue(Timestamp value) {
+    TStringValue tStringValue = new TStringValue();
+    if (value != null) {
+      tStringValue.setValue(value.toString());
+    }
+    return TColumnValue.stringVal(tStringValue);
+  }
+
+  private static TColumnValue stringValue(HiveDecimal value) {
+    TStringValue tStrValue = new TStringValue();
+    if (value != null) {
+      tStrValue.setValue(value.toString());
+    }
+    return TColumnValue.stringVal(tStrValue);
+  }
+
+  private static TColumnValue stringValue(HiveIntervalYearMonth value) {
+    TStringValue tStrValue = new TStringValue();
+    if (value != null) {
+      tStrValue.setValue(value.toString());
+    }
+    return TColumnValue.stringVal(tStrValue);
+  }
+
+  private static TColumnValue stringValue(HiveIntervalDayTime value) {
+    TStringValue tStrValue = new TStringValue();
+    if (value != null) {
+      tStrValue.setValue(value.toString());
+    }
+    return TColumnValue.stringVal(tStrValue);
+  }
+
+  public static TColumnValue toTColumnValue(Type type, Object value) {
+    switch (type) {
+    case BOOLEAN_TYPE:
+      return booleanValue((Boolean)value);
+    case TINYINT_TYPE:
+      return byteValue((Byte)value);
+    case SMALLINT_TYPE:
+      return shortValue((Short)value);
+    case INT_TYPE:
+      return intValue((Integer)value);
+    case BIGINT_TYPE:
+      return longValue((Long)value);
+    case FLOAT_TYPE:
+      return floatValue((Float)value);
+    case DOUBLE_TYPE:
+      return doubleValue((Double)value);
+    case STRING_TYPE:
+      return stringValue((String)value);
+    case CHAR_TYPE:
+      return stringValue((HiveChar)value);
+    case VARCHAR_TYPE:
+      return stringValue((HiveVarchar)value);
+    case DATE_TYPE:
+      return dateValue((Date)value);
+    case TIMESTAMP_TYPE:
+      return timestampValue((Timestamp)value);
+    case INTERVAL_YEAR_MONTH_TYPE:
+      return stringValue((HiveIntervalYearMonth) value);
+    case INTERVAL_DAY_TIME_TYPE:
+      return stringValue((HiveIntervalDayTime) value);
+    case DECIMAL_TYPE:
+      return stringValue(((HiveDecimal)value));
+    case BINARY_TYPE:
+      return stringValue((String)value);
+    case ARRAY_TYPE:
+    case MAP_TYPE:
+    case STRUCT_TYPE:
+    case UNION_TYPE:
+    case USER_DEFINED_TYPE:
+      return stringValue((String)value);
+    default:
+      return null;
+    }
+  }
+
+  private static Boolean getBooleanValue(TBoolValue tBoolValue) {
+    if (tBoolValue.isSetValue()) {
+      return tBoolValue.isValue();
+    }
+    return null;
+  }
+
+  private static Byte getByteValue(TByteValue tByteValue) {
+    if (tByteValue.isSetValue()) {
+      return tByteValue.getValue();
+    }
+    return null;
+  }
+
+  private static Short getShortValue(TI16Value tI16Value) {
+    if (tI16Value.isSetValue()) {
+      return tI16Value.getValue();
+    }
+    return null;
+  }
+
+  private static Integer getIntegerValue(TI32Value tI32Value) {
+    if (tI32Value.isSetValue()) {
+      return tI32Value.getValue();
+    }
+    return null;
+  }
+
+  private static Long getLongValue(TI64Value tI64Value) {
+    if (tI64Value.isSetValue()) {
+      return tI64Value.getValue();
+    }
+    return null;
+  }
+
+  private static Double getDoubleValue(TDoubleValue tDoubleValue) {
+    if (tDoubleValue.isSetValue()) {
+      return tDoubleValue.getValue();
+    }
+    return null;
+  }
+
+  private static String getStringValue(TStringValue tStringValue) {
+    if (tStringValue.isSetValue()) {
+      return tStringValue.getValue();
+    }
+    return null;
+  }
+
+  private static Date getDateValue(TStringValue tStringValue) {
+    if (tStringValue.isSetValue()) {
+      return Date.valueOf(tStringValue.getValue());
+    }
+    return null;
+  }
+
+  private static Timestamp getTimestampValue(TStringValue tStringValue) {
+    if (tStringValue.isSetValue()) {
+      return Timestamp.valueOf(tStringValue.getValue());
+    }
+    return null;
+  }
+
+  private static byte[] getBinaryValue(TStringValue tString) {
+    if (tString.isSetValue()) {
+      return tString.getValue().getBytes();
+    }
+    return null;
+  }
+
+  private static BigDecimal getBigDecimalValue(TStringValue tStringValue) {
+    if (tStringValue.isSetValue()) {
+      return new BigDecimal(tStringValue.getValue());
+    }
+    return null;
+  }
+
+  public static Object toColumnValue(TColumnValue value) {
+    TColumnValue._Fields field = value.getSetField();
+    switch (field) {
+      case BOOL_VAL:
+        return getBooleanValue(value.getBoolVal());
+      case BYTE_VAL:
+        return getByteValue(value.getByteVal());
+      case I16_VAL:
+        return getShortValue(value.getI16Val());
+      case I32_VAL:
+        return getIntegerValue(value.getI32Val());
+      case I64_VAL:
+        return getLongValue(value.getI64Val());
+      case DOUBLE_VAL:
+        return getDoubleValue(value.getDoubleVal());
+      case STRING_VAL:
+        return getStringValue(value.getStringVal());
+    }
+    throw new IllegalArgumentException("never");
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
new file mode 100644
index 000000000000..9cad5be198c0
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hive.service.auth.HiveAuthFactory;
+
+
+/**
+ * EmbeddedCLIServiceClient.
+ *
+ */
+public class EmbeddedCLIServiceClient extends CLIServiceClient {
+  private final ICLIService cliService;
+
+  public EmbeddedCLIServiceClient(ICLIService cliService) {
+    this.cliService = cliService;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#openSession(java.lang.String, java.lang.String, java.util.Map)
+   */
+  @Override
+  public SessionHandle openSession(String username, String password,
+      Map<String, String> configuration) throws HiveSQLException {
+    return cliService.openSession(username, password, configuration);
+  }
+
+  @Override
+  public SessionHandle openSessionWithImpersonation(String username, String password,
+      Map<String, String> configuration, String delegationToken) throws HiveSQLException {
+    throw new HiveSQLException("Impersonated session is not supported in the embedded mode");
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#closeSession(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public void closeSession(SessionHandle sessionHandle) throws HiveSQLException {
+    cliService.closeSession(sessionHandle);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List)
+   */
+  @Override
+  public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType getInfoType)
+      throws HiveSQLException {
+    return cliService.getInfo(sessionHandle, getInfoType);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#executeStatement(org.apache.hive.service.cli.SessionHandle,
+   *  java.lang.String, java.util.Map)
+   */
+  @Override
+  public OperationHandle executeStatement(SessionHandle sessionHandle, String statement,
+      Map<String, String> confOverlay) throws HiveSQLException {
+    return cliService.executeStatement(sessionHandle, statement, confOverlay);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#executeStatementAsync(org.apache.hive.service.cli.SessionHandle,
+   *  java.lang.String, java.util.Map)
+   */
+  @Override
+  public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement,
+      Map<String, String> confOverlay) throws HiveSQLException {
+    return cliService.executeStatementAsync(sessionHandle, statement, confOverlay);
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getTypeInfo(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getTypeInfo(SessionHandle sessionHandle) throws HiveSQLException {
+    return cliService.getTypeInfo(sessionHandle);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getCatalogs(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getCatalogs(SessionHandle sessionHandle) throws HiveSQLException {
+    return cliService.getCatalogs(sessionHandle);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String)
+   */
+  @Override
+  public OperationHandle getSchemas(SessionHandle sessionHandle, String catalogName,
+      String schemaName) throws HiveSQLException {
+    return cliService.getSchemas(sessionHandle, catalogName, schemaName);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List)
+   */
+  @Override
+  public OperationHandle getTables(SessionHandle sessionHandle, String catalogName,
+      String schemaName, String tableName, List<String> tableTypes) throws HiveSQLException {
+    return cliService.getTables(sessionHandle, catalogName, schemaName, tableName, tableTypes);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getTableTypes(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getTableTypes(SessionHandle sessionHandle) throws HiveSQLException {
+    return cliService.getTableTypes(sessionHandle);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getColumns(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.lang.String)
+   */
+  @Override
+  public OperationHandle getColumns(SessionHandle sessionHandle, String catalogName,
+      String schemaName, String tableName, String columnName) throws HiveSQLException {
+    return cliService.getColumns(sessionHandle, catalogName, schemaName, tableName, columnName);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getFunctions(org.apache.hive.service.cli.SessionHandle, java.lang.String)
+   */
+  @Override
+  public OperationHandle getFunctions(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String functionName)
+          throws HiveSQLException {
+    return cliService.getFunctions(sessionHandle, catalogName, schemaName, functionName);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getOperationStatus(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public OperationStatus getOperationStatus(OperationHandle opHandle) throws HiveSQLException {
+    return cliService.getOperationStatus(opHandle);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#cancelOperation(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
+    cliService.cancelOperation(opHandle);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#closeOperation(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public void closeOperation(OperationHandle opHandle) throws HiveSQLException {
+    cliService.closeOperation(opHandle);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.CLIServiceClient#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public TableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQLException {
+    return cliService.getResultSetMetadata(opHandle);
+  }
+
+  @Override
+  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+      long maxRows,  FetchType fetchType) throws HiveSQLException {
+    return cliService.fetchResults(opHandle, orientation, maxRows, fetchType);
+  }
+
+
+  @Override
+  public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+         String owner, String renewer) throws HiveSQLException {
+    return cliService.getDelegationToken(sessionHandle, authFactory, owner, renewer);
+  }
+
+  @Override
+  public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String tokenStr) throws HiveSQLException {
+    cliService.cancelDelegationToken(sessionHandle, authFactory, tokenStr);
+  }
+
+  @Override
+  public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String tokenStr) throws HiveSQLException {
+    cliService.renewDelegationToken(sessionHandle, authFactory, tokenStr);
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/FetchOrientation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
new file mode 100644
index 000000000000..ffa6f2e1f374
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.TFetchOrientation;
+
+/**
+ * FetchOrientation.
+ *
+ */
+public enum FetchOrientation {
+  FETCH_NEXT(TFetchOrientation.FETCH_NEXT),
+  FETCH_PRIOR(TFetchOrientation.FETCH_PRIOR),
+  FETCH_RELATIVE(TFetchOrientation.FETCH_RELATIVE),
+  FETCH_ABSOLUTE(TFetchOrientation.FETCH_ABSOLUTE),
+  FETCH_FIRST(TFetchOrientation.FETCH_FIRST),
+  FETCH_LAST(TFetchOrientation.FETCH_LAST);
+
+  private TFetchOrientation tFetchOrientation;
+
+  FetchOrientation(TFetchOrientation tFetchOrientation) {
+    this.tFetchOrientation = tFetchOrientation;
+  }
+
+  public static FetchOrientation getFetchOrientation(TFetchOrientation tFetchOrientation) {
+    for (FetchOrientation fetchOrientation : values()) {
+      if (tFetchOrientation.equals(fetchOrientation.toTFetchOrientation())) {
+        return fetchOrientation;
+      }
+    }
+    // TODO: Should this really default to FETCH_NEXT?
+    return FETCH_NEXT;
+  }
+
+  public TFetchOrientation toTFetchOrientation() {
+    return tFetchOrientation;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/GetInfoType.java
new file mode 100644
index 000000000000..8dd33a88fdeb
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/GetInfoType.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.TGetInfoType;
+
+/**
+ * GetInfoType.
+ *
+ */
+public enum GetInfoType {
+  CLI_MAX_DRIVER_CONNECTIONS(TGetInfoType.CLI_MAX_DRIVER_CONNECTIONS),
+  CLI_MAX_CONCURRENT_ACTIVITIES(TGetInfoType.CLI_MAX_CONCURRENT_ACTIVITIES),
+  CLI_DATA_SOURCE_NAME(TGetInfoType.CLI_DATA_SOURCE_NAME),
+  CLI_FETCH_DIRECTION(TGetInfoType.CLI_FETCH_DIRECTION),
+  CLI_SERVER_NAME(TGetInfoType.CLI_SERVER_NAME),
+  CLI_SEARCH_PATTERN_ESCAPE(TGetInfoType.CLI_SEARCH_PATTERN_ESCAPE),
+  CLI_DBMS_NAME(TGetInfoType.CLI_DBMS_NAME),
+  CLI_DBMS_VER(TGetInfoType.CLI_DBMS_VER),
+  CLI_ACCESSIBLE_TABLES(TGetInfoType.CLI_ACCESSIBLE_TABLES),
+  CLI_ACCESSIBLE_PROCEDURES(TGetInfoType.CLI_ACCESSIBLE_PROCEDURES),
+  CLI_CURSOR_COMMIT_BEHAVIOR(TGetInfoType.CLI_CURSOR_COMMIT_BEHAVIOR),
+  CLI_DATA_SOURCE_READ_ONLY(TGetInfoType.CLI_DATA_SOURCE_READ_ONLY),
+  CLI_DEFAULT_TXN_ISOLATION(TGetInfoType.CLI_DEFAULT_TXN_ISOLATION),
+  CLI_IDENTIFIER_CASE(TGetInfoType.CLI_IDENTIFIER_CASE),
+  CLI_IDENTIFIER_QUOTE_CHAR(TGetInfoType.CLI_IDENTIFIER_QUOTE_CHAR),
+  CLI_MAX_COLUMN_NAME_LEN(TGetInfoType.CLI_MAX_COLUMN_NAME_LEN),
+  CLI_MAX_CURSOR_NAME_LEN(TGetInfoType.CLI_MAX_CURSOR_NAME_LEN),
+  CLI_MAX_SCHEMA_NAME_LEN(TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN),
+  CLI_MAX_CATALOG_NAME_LEN(TGetInfoType.CLI_MAX_CATALOG_NAME_LEN),
+  CLI_MAX_TABLE_NAME_LEN(TGetInfoType.CLI_MAX_TABLE_NAME_LEN),
+  CLI_SCROLL_CONCURRENCY(TGetInfoType.CLI_SCROLL_CONCURRENCY),
+  CLI_TXN_CAPABLE(TGetInfoType.CLI_TXN_CAPABLE),
+  CLI_USER_NAME(TGetInfoType.CLI_USER_NAME),
+  CLI_TXN_ISOLATION_OPTION(TGetInfoType.CLI_TXN_ISOLATION_OPTION),
+  CLI_INTEGRITY(TGetInfoType.CLI_INTEGRITY),
+  CLI_GETDATA_EXTENSIONS(TGetInfoType.CLI_GETDATA_EXTENSIONS),
+  CLI_NULL_COLLATION(TGetInfoType.CLI_NULL_COLLATION),
+  CLI_ALTER_TABLE(TGetInfoType.CLI_ALTER_TABLE),
+  CLI_ORDER_BY_COLUMNS_IN_SELECT(TGetInfoType.CLI_ORDER_BY_COLUMNS_IN_SELECT),
+  CLI_SPECIAL_CHARACTERS(TGetInfoType.CLI_SPECIAL_CHARACTERS),
+  CLI_MAX_COLUMNS_IN_GROUP_BY(TGetInfoType.CLI_MAX_COLUMNS_IN_GROUP_BY),
+  CLI_MAX_COLUMNS_IN_INDEX(TGetInfoType.CLI_MAX_COLUMNS_IN_INDEX),
+  CLI_MAX_COLUMNS_IN_ORDER_BY(TGetInfoType.CLI_MAX_COLUMNS_IN_ORDER_BY),
+  CLI_MAX_COLUMNS_IN_SELECT(TGetInfoType.CLI_MAX_COLUMNS_IN_SELECT),
+  CLI_MAX_COLUMNS_IN_TABLE(TGetInfoType.CLI_MAX_COLUMNS_IN_TABLE),
+  CLI_MAX_INDEX_SIZE(TGetInfoType.CLI_MAX_INDEX_SIZE),
+  CLI_MAX_ROW_SIZE(TGetInfoType.CLI_MAX_ROW_SIZE),
+  CLI_MAX_STATEMENT_LEN(TGetInfoType.CLI_MAX_STATEMENT_LEN),
+  CLI_MAX_TABLES_IN_SELECT(TGetInfoType.CLI_MAX_TABLES_IN_SELECT),
+  CLI_MAX_USER_NAME_LEN(TGetInfoType.CLI_MAX_USER_NAME_LEN),
+  CLI_OJ_CAPABILITIES(TGetInfoType.CLI_OJ_CAPABILITIES),
+
+  CLI_XOPEN_CLI_YEAR(TGetInfoType.CLI_XOPEN_CLI_YEAR),
+  CLI_CURSOR_SENSITIVITY(TGetInfoType.CLI_CURSOR_SENSITIVITY),
+  CLI_DESCRIBE_PARAMETER(TGetInfoType.CLI_DESCRIBE_PARAMETER),
+  CLI_CATALOG_NAME(TGetInfoType.CLI_CATALOG_NAME),
+  CLI_COLLATION_SEQ(TGetInfoType.CLI_COLLATION_SEQ),
+  CLI_MAX_IDENTIFIER_LEN(TGetInfoType.CLI_MAX_IDENTIFIER_LEN);
+
+  private final TGetInfoType tInfoType;
+
+  GetInfoType(TGetInfoType tInfoType) {
+    this.tInfoType = tInfoType;
+  }
+
+  public static GetInfoType getGetInfoType(TGetInfoType tGetInfoType) {
+    for (GetInfoType infoType : values()) {
+      if (tGetInfoType.equals(infoType.tInfoType)) {
+        return infoType;
+      }
+    }
+    throw new IllegalArgumentException("Unrecognized Thrift TGetInfoType value: " + tGetInfoType);
+  }
+
+  public TGetInfoType toTGetInfoType() {
+    return tInfoType;
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
new file mode 100644
index 000000000000..ba92ff4ab5c1
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.TGetInfoValue;
+
+/**
+ * GetInfoValue.
+ *
+ */
+public class GetInfoValue {
+  private String stringValue = null;
+  private short shortValue;
+  private int intValue;
+  private long longValue;
+
+  public GetInfoValue(String stringValue) {
+    this.stringValue = stringValue;
+  }
+
+  public GetInfoValue(short shortValue) {
+    this.shortValue = shortValue;
+  }
+
+  public GetInfoValue(int intValue) {
+    this.intValue = intValue;
+  }
+
+  public GetInfoValue(long longValue) {
+    this.longValue = longValue;
+  }
+
+  public GetInfoValue(TGetInfoValue tGetInfoValue) {
+    switch (tGetInfoValue.getSetField()) {
+    case STRING_VALUE:
+      stringValue = tGetInfoValue.getStringValue();
+      break;
+    default:
+      throw new IllegalArgumentException("Unreconigzed TGetInfoValue");
+    }
+  }
+
+  public TGetInfoValue toTGetInfoValue() {
+    TGetInfoValue tInfoValue = new TGetInfoValue();
+    if (stringValue != null) {
+      tInfoValue.setStringValue(stringValue);
+    }
+    return tInfoValue;
+  }
+
+  public String getStringValue() {
+    return stringValue;
+  }
+
+  public short getShortValue() {
+    return shortValue;
+  }
+
+  public int getIntValue() {
+    return intValue;
+  }
+
+  public long getLongValue() {
+    return longValue;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Handle.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Handle.java
new file mode 100644
index 000000000000..cf3427ae20f3
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Handle.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.THandleIdentifier;
+
+
+
+
+public abstract class Handle {
+
+  private final HandleIdentifier handleId;
+
+  public Handle() {
+    handleId = new HandleIdentifier();
+  }
+
+  public Handle(HandleIdentifier handleId) {
+    this.handleId = handleId;
+  }
+
+  public Handle(THandleIdentifier tHandleIdentifier) {
+    this.handleId = new HandleIdentifier(tHandleIdentifier);
+  }
+
+  public HandleIdentifier getHandleIdentifier() {
+    return handleId;
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + ((handleId == null) ? 0 : handleId.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null) {
+      return false;
+    }
+    if (!(obj instanceof Handle)) {
+      return false;
+    }
+    Handle other = (Handle) obj;
+    if (handleId == null) {
+      if (other.handleId != null) {
+        return false;
+      }
+    } else if (!handleId.equals(other.handleId)) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public abstract String toString();
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
new file mode 100644
index 000000000000..4dc80da8dc50
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.nio.ByteBuffer;
+import java.util.UUID;
+
+import org.apache.hive.service.cli.thrift.THandleIdentifier;
+
+/**
+ * HandleIdentifier.
+ *
+ */
+public class HandleIdentifier {
+  private final UUID publicId;
+  private final UUID secretId;
+
+  public HandleIdentifier() {
+    publicId = UUID.randomUUID();
+    secretId = UUID.randomUUID();
+  }
+
+  public HandleIdentifier(UUID publicId, UUID secretId) {
+    this.publicId = publicId;
+    this.secretId = secretId;
+  }
+
+  public HandleIdentifier(THandleIdentifier tHandleId) {
+    ByteBuffer bb = ByteBuffer.wrap(tHandleId.getGuid());
+    this.publicId = new UUID(bb.getLong(), bb.getLong());
+    bb = ByteBuffer.wrap(tHandleId.getSecret());
+    this.secretId = new UUID(bb.getLong(), bb.getLong());
+  }
+
+  public UUID getPublicId() {
+    return publicId;
+  }
+
+  public UUID getSecretId() {
+    return secretId;
+  }
+
+  public THandleIdentifier toTHandleIdentifier() {
+    byte[] guid = new byte[16];
+    byte[] secret = new byte[16];
+    ByteBuffer guidBB = ByteBuffer.wrap(guid);
+    ByteBuffer secretBB = ByteBuffer.wrap(secret);
+    guidBB.putLong(publicId.getMostSignificantBits());
+    guidBB.putLong(publicId.getLeastSignificantBits());
+    secretBB.putLong(secretId.getMostSignificantBits());
+    secretBB.putLong(secretId.getLeastSignificantBits());
+    return new THandleIdentifier(ByteBuffer.wrap(guid), ByteBuffer.wrap(secret));
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + ((publicId == null) ? 0 : publicId.hashCode());
+    result = prime * result + ((secretId == null) ? 0 : secretId.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null) {
+      return false;
+    }
+    if (!(obj instanceof HandleIdentifier)) {
+      return false;
+    }
+    HandleIdentifier other = (HandleIdentifier) obj;
+    if (publicId == null) {
+      if (other.publicId != null) {
+        return false;
+      }
+    } else if (!publicId.equals(other.publicId)) {
+      return false;
+    }
+    if (secretId == null) {
+      if (other.secretId != null) {
+        return false;
+      }
+    } else if (!secretId.equals(other.secretId)) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public String toString() {
+    return publicId.toString();
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/HiveSQLException.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
new file mode 100644
index 000000000000..86e57fbf31fe
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
@@ -0,0 +1,249 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hive.service.cli.thrift.TStatus;
+import org.apache.hive.service.cli.thrift.TStatusCode;
+
+/**
+ * HiveSQLException.
+ *
+ */
+public class HiveSQLException extends SQLException {
+
+  /**
+   *
+   */
+  private static final long serialVersionUID = -6095254671958748094L;
+
+  /**
+   *
+   */
+  public HiveSQLException() {
+    super();
+  }
+
+  /**
+   * @param reason
+   */
+  public HiveSQLException(String reason) {
+    super(reason);
+  }
+
+  /**
+   * @param cause
+   */
+  public HiveSQLException(Throwable cause) {
+    super(cause);
+  }
+
+  /**
+   * @param reason
+   * @param sqlState
+   */
+  public HiveSQLException(String reason, String sqlState) {
+    super(reason, sqlState);
+  }
+
+  /**
+   * @param reason
+   * @param cause
+   */
+  public HiveSQLException(String reason, Throwable cause) {
+    super(reason, cause);
+  }
+
+  /**
+   * @param reason
+   * @param sqlState
+   * @param vendorCode
+   */
+  public HiveSQLException(String reason, String sqlState, int vendorCode) {
+    super(reason, sqlState, vendorCode);
+  }
+
+  /**
+   * @param reason
+   * @param sqlState
+   * @param cause
+   */
+  public HiveSQLException(String reason, String sqlState, Throwable cause) {
+    super(reason, sqlState, cause);
+  }
+
+  /**
+   * @param reason
+   * @param sqlState
+   * @param vendorCode
+   * @param cause
+   */
+  public HiveSQLException(String reason, String sqlState, int vendorCode, Throwable cause) {
+    super(reason, sqlState, vendorCode, cause);
+  }
+
+  public HiveSQLException(TStatus status) {
+    // TODO: set correct vendorCode field
+    super(status.getErrorMessage(), status.getSqlState(), status.getErrorCode());
+    if (status.getInfoMessages() != null) {
+      initCause(toCause(status.getInfoMessages()));
+    }
+  }
+
+  /**
+   * Converts current object to a {@link TStatus} object
+   * @return a {@link TStatus} object
+   */
+  public TStatus toTStatus() {
+    // TODO: convert sqlState, etc.
+    TStatus tStatus = new TStatus(TStatusCode.ERROR_STATUS);
+    tStatus.setSqlState(getSQLState());
+    tStatus.setErrorCode(getErrorCode());
+    tStatus.setErrorMessage(getMessage());
+    tStatus.setInfoMessages(toString(this));
+    return tStatus;
+  }
+
+  /**
+   * Converts the specified {@link Exception} object into a {@link TStatus} object
+   * @param e a {@link Exception} object
+   * @return a {@link TStatus} object
+   */
+  public static TStatus toTStatus(Exception e) {
+    if (e instanceof HiveSQLException) {
+      return ((HiveSQLException)e).toTStatus();
+    }
+    TStatus tStatus = new TStatus(TStatusCode.ERROR_STATUS);
+    tStatus.setErrorMessage(e.getMessage());
+    tStatus.setInfoMessages(toString(e));
+    return tStatus;
+  }
+
+  /**
+   * Converts a {@link Throwable} object into a flattened list of texts including its stack trace
+   * and the stack traces of the nested causes.
+   * @param ex  a {@link Throwable} object
+   * @return    a flattened list of texts including the {@link Throwable} object's stack trace
+   *            and the stack traces of the nested causes.
+   */
+  public static List<String> toString(Throwable ex) {
+    return toString(ex, null);
+  }
+
+  private static List<String> toString(Throwable cause, StackTraceElement[] parent) {
+    StackTraceElement[] trace = cause.getStackTrace();
+    int m = trace.length - 1;
+    if (parent != null) {
+      int n = parent.length - 1;
+      while (m >= 0 && n >= 0 && trace[m].equals(parent[n])) {
+        m--;
+        n--;
+      }
+    }
+    List<String> detail = enroll(cause, trace, m);
+    cause = cause.getCause();
+    if (cause != null) {
+      detail.addAll(toString(cause, trace));
+    }
+    return detail;
+  }
+
+  private static List<String> enroll(Throwable ex, StackTraceElement[] trace, int max) {
+    List<String> details = new ArrayList<String>();
+    StringBuilder builder = new StringBuilder();
+    builder.append('*').append(ex.getClass().getName()).append(':');
+    builder.append(ex.getMessage()).append(':');
+    builder.append(trace.length).append(':').append(max);
+    details.add(builder.toString());
+    for (int i = 0; i <= max; i++) {
+      builder.setLength(0);
+      builder.append(trace[i].getClassName()).append(':');
+      builder.append(trace[i].getMethodName()).append(':');
+      String fileName = trace[i].getFileName();
+      builder.append(fileName == null ? "" : fileName).append(':');
+      builder.append(trace[i].getLineNumber());
+      details.add(builder.toString());
+    }
+    return details;
+  }
+
+  /**
+   * Converts a flattened list of texts including the stack trace and the stack
+   * traces of the nested causes into a {@link Throwable} object.
+   * @param details a flattened list of texts including the stack trace and the stack
+   *                traces of the nested causes
+   * @return        a {@link Throwable} object
+   */
+  public static Throwable toCause(List<String> details) {
+    return toStackTrace(details, null, 0);
+  }
+
+  private static Throwable toStackTrace(List<String> details, StackTraceElement[] parent, int index) {
+    String detail = details.get(index++);
+    if (!detail.startsWith("*")) {
+      return null;  // should not be happened. ignore remaining
+    }
+    int i1 = detail.indexOf(':');
+    int i3 = detail.lastIndexOf(':');
+    int i2 = detail.substring(0, i3).lastIndexOf(':');
+    String exceptionClass = detail.substring(1, i1);
+    String exceptionMessage = detail.substring(i1 + 1, i2);
+    Throwable ex = newInstance(exceptionClass, exceptionMessage);
+
+    Integer length = Integer.valueOf(detail.substring(i2 + 1, i3));
+    Integer unique = Integer.valueOf(detail.substring(i3 + 1));
+
+    int i = 0;
+    StackTraceElement[] trace = new StackTraceElement[length];
+    for (; i <= unique; i++) {
+      detail = details.get(index++);
+      int j1 = detail.indexOf(':');
+      int j3 = detail.lastIndexOf(':');
+      int j2 = detail.substring(0, j3).lastIndexOf(':');
+      String className = detail.substring(0, j1);
+      String methodName = detail.substring(j1 + 1, j2);
+      String fileName = detail.substring(j2 + 1, j3);
+      if (fileName.isEmpty()) {
+        fileName = null;
+      }
+      int lineNumber = Integer.valueOf(detail.substring(j3 + 1));
+      trace[i] = new StackTraceElement(className, methodName, fileName, lineNumber);
+    }
+    int common = trace.length - i;
+    if (common > 0) {
+      System.arraycopy(parent, parent.length - common, trace, trace.length - common, common);
+    }
+    if (details.size() > index) {
+      ex.initCause(toStackTrace(details, trace, index));
+    }
+    ex.setStackTrace(trace);
+    return ex;
+  }
+
+  private static Throwable newInstance(String className, String message) {
+    try {
+      return (Throwable)Class.forName(className).getConstructor(String.class).newInstance(message);
+    } catch (Exception e) {
+      return new RuntimeException(className + ":" + message);
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ICLIService.java
new file mode 100644
index 000000000000..c9cc1f4da56f
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/ICLIService.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.cli;
+
+import java.util.List;
+import java.util.Map;
+
+
+
+
+import org.apache.hive.service.auth.HiveAuthFactory;
+
+public interface ICLIService {
+
+  SessionHandle openSession(String username, String password,
+      Map<String, String> configuration)
+          throws HiveSQLException;
+
+  SessionHandle openSessionWithImpersonation(String username, String password,
+      Map<String, String> configuration, String delegationToken)
+          throws HiveSQLException;
+
+  void closeSession(SessionHandle sessionHandle)
+      throws HiveSQLException;
+
+  GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType infoType)
+      throws HiveSQLException;
+
+  OperationHandle executeStatement(SessionHandle sessionHandle, String statement,
+      Map<String, String> confOverlay)
+          throws HiveSQLException;
+
+  OperationHandle executeStatementAsync(SessionHandle sessionHandle,
+      String statement, Map<String, String> confOverlay)
+          throws HiveSQLException;
+
+  OperationHandle getTypeInfo(SessionHandle sessionHandle)
+      throws HiveSQLException;
+
+  OperationHandle getCatalogs(SessionHandle sessionHandle)
+      throws HiveSQLException;
+
+  OperationHandle getSchemas(SessionHandle sessionHandle,
+      String catalogName, String schemaName)
+          throws HiveSQLException;
+
+  OperationHandle getTables(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String tableName, List<String> tableTypes)
+          throws HiveSQLException;
+
+  OperationHandle getTableTypes(SessionHandle sessionHandle)
+      throws HiveSQLException;
+
+  OperationHandle getColumns(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String tableName, String columnName)
+          throws HiveSQLException;
+
+  OperationHandle getFunctions(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String functionName)
+          throws HiveSQLException;
+
+  OperationStatus getOperationStatus(OperationHandle opHandle)
+      throws HiveSQLException;
+
+  void cancelOperation(OperationHandle opHandle)
+      throws HiveSQLException;
+
+  void closeOperation(OperationHandle opHandle)
+      throws HiveSQLException;
+
+  TableSchema getResultSetMetadata(OperationHandle opHandle)
+      throws HiveSQLException;
+
+  RowSet fetchResults(OperationHandle opHandle)
+      throws HiveSQLException;
+
+  RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+      long maxRows, FetchType fetchType) throws HiveSQLException;
+
+  String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String owner, String renewer) throws HiveSQLException;
+
+  void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String tokenStr) throws HiveSQLException;
+
+  void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String tokenStr) throws HiveSQLException;
+
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationHandle.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationHandle.java
new file mode 100644
index 000000000000..5426e2847123
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationHandle.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.TOperationHandle;
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+
+public class OperationHandle extends Handle {
+
+  private final OperationType opType;
+  private final TProtocolVersion protocol;
+  private boolean hasResultSet = false;
+
+  public OperationHandle(OperationType opType, TProtocolVersion protocol) {
+    super();
+    this.opType = opType;
+    this.protocol = protocol;
+  }
+
+  // dummy handle for ThriftCLIService
+  public OperationHandle(TOperationHandle tOperationHandle) {
+    this(tOperationHandle, TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1);
+  }
+
+  public OperationHandle(TOperationHandle tOperationHandle, TProtocolVersion protocol) {
+    super(tOperationHandle.getOperationId());
+    this.opType = OperationType.getOperationType(tOperationHandle.getOperationType());
+    this.hasResultSet = tOperationHandle.isHasResultSet();
+    this.protocol = protocol;
+  }
+
+  public OperationType getOperationType() {
+    return opType;
+  }
+
+  public void setHasResultSet(boolean hasResultSet) {
+    this.hasResultSet = hasResultSet;
+  }
+
+  public boolean hasResultSet() {
+    return hasResultSet;
+  }
+
+  public TOperationHandle toTOperationHandle() {
+    TOperationHandle tOperationHandle = new TOperationHandle();
+    tOperationHandle.setOperationId(getHandleIdentifier().toTHandleIdentifier());
+    tOperationHandle.setOperationType(opType.toTOperationType());
+    tOperationHandle.setHasResultSet(hasResultSet);
+    return tOperationHandle;
+  }
+
+  public TProtocolVersion getProtocolVersion() {
+    return protocol;
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = super.hashCode();
+    result = prime * result + ((opType == null) ? 0 : opType.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (!super.equals(obj)) {
+      return false;
+    }
+    if (!(obj instanceof OperationHandle)) {
+      return false;
+    }
+    OperationHandle other = (OperationHandle) obj;
+    if (opType != other.opType) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public String toString() {
+    return "OperationHandle [opType=" + opType + ", getHandleIdentifier()=" + getHandleIdentifier()
+        + "]";
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationState.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationState.java
new file mode 100644
index 000000000000..116518011841
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationState.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.TOperationState;
+
+/**
+ * OperationState.
+ *
+ */
+public enum OperationState {
+  INITIALIZED(TOperationState.INITIALIZED_STATE, false),
+  RUNNING(TOperationState.RUNNING_STATE, false),
+  FINISHED(TOperationState.FINISHED_STATE, true),
+  CANCELED(TOperationState.CANCELED_STATE, true),
+  CLOSED(TOperationState.CLOSED_STATE, true),
+  ERROR(TOperationState.ERROR_STATE, true),
+  UNKNOWN(TOperationState.UKNOWN_STATE, false),
+  PENDING(TOperationState.PENDING_STATE, false);
+
+  private final TOperationState tOperationState;
+  private final boolean terminal;
+
+  OperationState(TOperationState tOperationState, boolean terminal) {
+    this.tOperationState = tOperationState;
+    this.terminal = terminal;
+  }
+
+  // must be sync with TOperationState in order
+  public static OperationState getOperationState(TOperationState tOperationState) {
+    return OperationState.values()[tOperationState.getValue()];
+  }
+
+  public static void validateTransition(OperationState oldState,
+      OperationState newState)
+          throws HiveSQLException {
+    switch (oldState) {
+    case INITIALIZED:
+      switch (newState) {
+      case PENDING:
+      case RUNNING:
+      case CANCELED:
+      case CLOSED:
+        return;
+      }
+      break;
+    case PENDING:
+      switch (newState) {
+      case RUNNING:
+      case FINISHED:
+      case CANCELED:
+      case ERROR:
+      case CLOSED:
+        return;
+      }
+      break;
+    case RUNNING:
+      switch (newState) {
+      case FINISHED:
+      case CANCELED:
+      case ERROR:
+      case CLOSED:
+        return;
+      }
+      break;
+    case FINISHED:
+    case CANCELED:
+    case ERROR:
+      if (OperationState.CLOSED.equals(newState)) {
+        return;
+      }
+      break;
+    default:
+      // fall-through
+    }
+    throw new HiveSQLException("Illegal Operation state transition " +
+        "from " + oldState + " to " + newState);
+  }
+
+  public void validateTransition(OperationState newState)
+      throws HiveSQLException {
+    validateTransition(this, newState);
+  }
+
+  public TOperationState toTOperationState() {
+    return tOperationState;
+  }
+
+  public boolean isTerminal() {
+    return terminal;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationType.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationType.java
new file mode 100644
index 000000000000..429d9a4c2568
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/OperationType.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.TOperationType;
+
+/**
+ * OperationType.
+ *
+ */
+public enum OperationType {
+
+  UNKNOWN_OPERATION(TOperationType.UNKNOWN),
+  EXECUTE_STATEMENT(TOperationType.EXECUTE_STATEMENT),
+  GET_TYPE_INFO(TOperationType.GET_TYPE_INFO),
+  GET_CATALOGS(TOperationType.GET_CATALOGS),
+  GET_SCHEMAS(TOperationType.GET_SCHEMAS),
+  GET_TABLES(TOperationType.GET_TABLES),
+  GET_TABLE_TYPES(TOperationType.GET_TABLE_TYPES),
+  GET_COLUMNS(TOperationType.GET_COLUMNS),
+  GET_FUNCTIONS(TOperationType.GET_FUNCTIONS);
+
+  private TOperationType tOperationType;
+
+  OperationType(TOperationType tOpType) {
+    this.tOperationType = tOpType;
+  }
+
+  public static OperationType getOperationType(TOperationType tOperationType) {
+    // TODO: replace this with a Map?
+    for (OperationType opType : values()) {
+      if (tOperationType.equals(opType.tOperationType)) {
+        return opType;
+      }
+    }
+    return OperationType.UNKNOWN_OPERATION;
+  }
+
+  public TOperationType toTOperationType() {
+    return tOperationType;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
new file mode 100644
index 000000000000..6e4d43fd5df6
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+/**
+ * PatternOrIdentifier.
+ *
+ */
+public class PatternOrIdentifier {
+
+  boolean isPattern = false;
+  String text;
+
+  public PatternOrIdentifier(String tpoi) {
+    text = tpoi;
+    isPattern = false;
+  }
+
+  public boolean isPattern() {
+    return isPattern;
+  }
+
+  public boolean isIdentifier() {
+    return !isPattern;
+  }
+
+  @Override
+  public String toString() {
+    return text;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowBasedSet.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
new file mode 100644
index 000000000000..7452137f077d
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hive.service.cli.thrift.TColumnValue;
+import org.apache.hive.service.cli.thrift.TRow;
+import org.apache.hive.service.cli.thrift.TRowSet;
+
+/**
+ * RowBasedSet
+ */
+public class RowBasedSet implements RowSet {
+
+  private long startOffset;
+
+  private final Type[] types; // non-null only for writing (server-side)
+  private final RemovableList<TRow> rows;
+
+  public RowBasedSet(TableSchema schema) {
+    types = schema.toTypes();
+    rows = new RemovableList<TRow>();
+  }
+
+  public RowBasedSet(TRowSet tRowSet) {
+    types = null;
+    rows = new RemovableList<TRow>(tRowSet.getRows());
+    startOffset = tRowSet.getStartRowOffset();
+  }
+
+  private RowBasedSet(Type[] types, List<TRow> rows, long startOffset) {
+    this.types = types;
+    this.rows = new RemovableList<TRow>(rows);
+    this.startOffset = startOffset;
+  }
+
+  @Override
+  public RowBasedSet addRow(Object[] fields) {
+    TRow tRow = new TRow();
+    for (int i = 0; i < fields.length; i++) {
+      tRow.addToColVals(ColumnValue.toTColumnValue(types[i], fields[i]));
+    }
+    rows.add(tRow);
+    return this;
+  }
+
+  @Override
+  public int numColumns() {
+    return rows.isEmpty() ? 0 : rows.get(0).getColVals().size();
+  }
+
+  @Override
+  public int numRows() {
+    return rows.size();
+  }
+
+  public RowBasedSet extractSubset(int maxRows) {
+    int numRows = Math.min(numRows(), maxRows);
+    RowBasedSet result = new RowBasedSet(types, rows.subList(0, numRows), startOffset);
+    rows.removeRange(0, numRows);
+    startOffset += numRows;
+    return result;
+  }
+
+  public long getStartOffset() {
+    return startOffset;
+  }
+
+  public void setStartOffset(long startOffset) {
+    this.startOffset = startOffset;
+  }
+
+  public int getSize() {
+    return rows.size();
+  }
+
+  public TRowSet toTRowSet() {
+    TRowSet tRowSet = new TRowSet();
+    tRowSet.setStartRowOffset(startOffset);
+    tRowSet.setRows(new ArrayList<TRow>(rows));
+    return tRowSet;
+  }
+
+  @Override
+  public Iterator<Object[]> iterator() {
+    return new Iterator<Object[]>() {
+
+      final Iterator<TRow> iterator = rows.iterator();
+      final Object[] convey = new Object[numColumns()];
+
+      @Override
+      public boolean hasNext() {
+        return iterator.hasNext();
+      }
+
+      @Override
+      public Object[] next() {
+        TRow row = iterator.next();
+        List<TColumnValue> values = row.getColVals();
+        for (int i = 0; i < values.size(); i++) {
+          convey[i] = ColumnValue.toColumnValue(values.get(i));
+        }
+        return convey;
+      }
+
+      @Override
+      public void remove() {
+        throw new UnsupportedOperationException("remove");
+      }
+    };
+  }
+
+  private static class RemovableList<E> extends ArrayList<E> {
+    RemovableList() { super(); }
+    RemovableList(List<E> rows) { super(rows); }
+    @Override
+    public void removeRange(int fromIndex, int toIndex) {
+      super.removeRange(fromIndex, toIndex);
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowSet.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowSet.java
new file mode 100644
index 000000000000..ab0787e1d389
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowSet.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.TRowSet;
+
+public interface RowSet extends Iterable<Object[]> {
+
+  RowSet addRow(Object[] fields);
+
+  RowSet extractSubset(int maxRows);
+
+  int numColumns();
+
+  int numRows();
+
+  long getStartOffset();
+
+  void setStartOffset(long startOffset);
+
+  TRowSet toTRowSet();
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowSetFactory.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
new file mode 100644
index 000000000000..e8f68eaaf906
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+import org.apache.hive.service.cli.thrift.TRowSet;
+
+import static org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6;
+
+public class RowSetFactory {
+
+  public static RowSet create(TableSchema schema, TProtocolVersion version) {
+    if (version.getValue() >= HIVE_CLI_SERVICE_PROTOCOL_V6.getValue()) {
+      return new ColumnBasedSet(schema);
+    }
+    return new RowBasedSet(schema);
+  }
+
+  public static RowSet create(TRowSet results, TProtocolVersion version) {
+    if (version.getValue() >= HIVE_CLI_SERVICE_PROTOCOL_V6.getValue()) {
+      return new ColumnBasedSet(results);
+    }
+    return new RowBasedSet(results);
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/SessionHandle.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/SessionHandle.java
new file mode 100644
index 000000000000..52e0ad4834d8
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/SessionHandle.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.util.UUID;
+
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+import org.apache.hive.service.cli.thrift.TSessionHandle;
+
+
+/**
+ * SessionHandle.
+ *
+ */
+public class SessionHandle extends Handle {
+
+  private final TProtocolVersion protocol;
+
+  public SessionHandle(TProtocolVersion protocol) {
+    this.protocol = protocol;
+  }
+
+  // dummy handle for ThriftCLIService
+  public SessionHandle(TSessionHandle tSessionHandle) {
+    this(tSessionHandle, TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1);
+  }
+
+  public SessionHandle(TSessionHandle tSessionHandle, TProtocolVersion protocol) {
+    super(tSessionHandle.getSessionId());
+    this.protocol = protocol;
+  }
+
+  public UUID getSessionId() {
+    return getHandleIdentifier().getPublicId();
+  }
+
+  public TSessionHandle toTSessionHandle() {
+    TSessionHandle tSessionHandle = new TSessionHandle();
+    tSessionHandle.setSessionId(getHandleIdentifier().toTHandleIdentifier());
+    return tSessionHandle;
+  }
+
+  public TProtocolVersion getProtocolVersion() {
+    return protocol;
+  }
+
+  @Override
+  public String toString() {
+    return "SessionHandle [" + getHandleIdentifier() + "]";
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TableSchema.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TableSchema.java
new file mode 100644
index 000000000000..ee019bc73710
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TableSchema.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Schema;
+import org.apache.hive.service.cli.thrift.TColumnDesc;
+import org.apache.hive.service.cli.thrift.TTableSchema;
+
+/**
+ * TableSchema.
+ *
+ */
+public class TableSchema {
+  private final List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>();
+
+  public TableSchema() {
+  }
+
+  public TableSchema(int numColumns) {
+    // TODO: remove this constructor
+  }
+
+  public TableSchema(TTableSchema tTableSchema) {
+    for (TColumnDesc tColumnDesc : tTableSchema.getColumns()) {
+      columns.add(new ColumnDescriptor(tColumnDesc));
+    }
+  }
+
+  public TableSchema(List<FieldSchema> fieldSchemas) {
+    int pos = 1;
+    for (FieldSchema field : fieldSchemas) {
+      columns.add(new ColumnDescriptor(field, pos++));
+    }
+  }
+
+  public TableSchema(Schema schema) {
+    this(schema.getFieldSchemas());
+  }
+
+  public List<ColumnDescriptor> getColumnDescriptors() {
+    return new ArrayList<ColumnDescriptor>(columns);
+  }
+
+  public ColumnDescriptor getColumnDescriptorAt(int pos) {
+    return columns.get(pos);
+  }
+
+  public int getSize() {
+    return columns.size();
+  }
+
+  public void clear() {
+    columns.clear();
+  }
+
+
+  public TTableSchema toTTableSchema() {
+    TTableSchema tTableSchema = new TTableSchema();
+    for (ColumnDescriptor col : columns) {
+      tTableSchema.addToColumns(col.toTColumnDesc());
+    }
+    return tTableSchema;
+  }
+
+  public Type[] toTypes() {
+    Type[] types = new Type[columns.size()];
+    for (int i = 0; i < types.length; i++) {
+      types[i] = columns.get(i).getType();
+    }
+    return types;
+  }
+
+  public TableSchema addPrimitiveColumn(String columnName, Type columnType, String columnComment) {
+    columns.add(ColumnDescriptor.newPrimitiveColumnDescriptor(columnName, columnComment, columnType, columns.size() + 1));
+    return this;
+  }
+
+  public TableSchema addStringColumn(String columnName, String columnComment) {
+    columns.add(ColumnDescriptor.newPrimitiveColumnDescriptor(columnName, columnComment, Type.STRING_TYPE, columns.size() + 1));
+    return this;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Type.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Type.java
new file mode 100644
index 000000000000..7752ec03a29b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/Type.java
@@ -0,0 +1,349 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.sql.DatabaseMetaData;
+import java.util.Locale;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hive.service.cli.thrift.TTypeId;
+
+/**
+ * Type.
+ *
+ */
+public enum Type {
+  NULL_TYPE("VOID",
+      java.sql.Types.NULL,
+      TTypeId.NULL_TYPE),
+  BOOLEAN_TYPE("BOOLEAN",
+      java.sql.Types.BOOLEAN,
+      TTypeId.BOOLEAN_TYPE),
+  TINYINT_TYPE("TINYINT",
+      java.sql.Types.TINYINT,
+      TTypeId.TINYINT_TYPE),
+  SMALLINT_TYPE("SMALLINT",
+      java.sql.Types.SMALLINT,
+      TTypeId.SMALLINT_TYPE),
+  INT_TYPE("INT",
+      java.sql.Types.INTEGER,
+      TTypeId.INT_TYPE),
+  BIGINT_TYPE("BIGINT",
+      java.sql.Types.BIGINT,
+      TTypeId.BIGINT_TYPE),
+  FLOAT_TYPE("FLOAT",
+      java.sql.Types.FLOAT,
+      TTypeId.FLOAT_TYPE),
+  DOUBLE_TYPE("DOUBLE",
+      java.sql.Types.DOUBLE,
+      TTypeId.DOUBLE_TYPE),
+  STRING_TYPE("STRING",
+      java.sql.Types.VARCHAR,
+      TTypeId.STRING_TYPE),
+  CHAR_TYPE("CHAR",
+      java.sql.Types.CHAR,
+      TTypeId.CHAR_TYPE,
+      true, false, false),
+  VARCHAR_TYPE("VARCHAR",
+      java.sql.Types.VARCHAR,
+      TTypeId.VARCHAR_TYPE,
+      true, false, false),
+  DATE_TYPE("DATE",
+      java.sql.Types.DATE,
+      TTypeId.DATE_TYPE),
+  TIMESTAMP_TYPE("TIMESTAMP",
+      java.sql.Types.TIMESTAMP,
+      TTypeId.TIMESTAMP_TYPE),
+  INTERVAL_YEAR_MONTH_TYPE("INTERVAL_YEAR_MONTH",
+      java.sql.Types.OTHER,
+      TTypeId.INTERVAL_YEAR_MONTH_TYPE),
+  INTERVAL_DAY_TIME_TYPE("INTERVAL_DAY_TIME",
+      java.sql.Types.OTHER,
+      TTypeId.INTERVAL_DAY_TIME_TYPE),
+  BINARY_TYPE("BINARY",
+      java.sql.Types.BINARY,
+      TTypeId.BINARY_TYPE),
+  DECIMAL_TYPE("DECIMAL",
+      java.sql.Types.DECIMAL,
+      TTypeId.DECIMAL_TYPE,
+      true, false, false),
+  ARRAY_TYPE("ARRAY",
+      java.sql.Types.ARRAY,
+      TTypeId.ARRAY_TYPE,
+      true, true),
+  MAP_TYPE("MAP",
+      java.sql.Types.JAVA_OBJECT,
+      TTypeId.MAP_TYPE,
+      true, true),
+  STRUCT_TYPE("STRUCT",
+      java.sql.Types.STRUCT,
+      TTypeId.STRUCT_TYPE,
+      true, false),
+  UNION_TYPE("UNIONTYPE",
+      java.sql.Types.OTHER,
+      TTypeId.UNION_TYPE,
+      true, false),
+  USER_DEFINED_TYPE("USER_DEFINED",
+      java.sql.Types.OTHER,
+      TTypeId.USER_DEFINED_TYPE,
+      true, false);
+
+  private final String name;
+  private final TTypeId tType;
+  private final int javaSQLType;
+  private final boolean isQualified;
+  private final boolean isComplex;
+  private final boolean isCollection;
+
+  Type(String name, int javaSQLType, TTypeId tType, boolean isQualified, boolean isComplex, boolean isCollection) {
+    this.name = name;
+    this.javaSQLType = javaSQLType;
+    this.tType = tType;
+    this.isQualified = isQualified;
+    this.isComplex = isComplex;
+    this.isCollection = isCollection;
+  }
+
+  Type(String name, int javaSQLType, TTypeId tType, boolean isComplex, boolean isCollection) {
+    this(name, javaSQLType, tType, false, isComplex, isCollection);
+  }
+
+  Type(String name, int javaSqlType, TTypeId tType) {
+    this(name, javaSqlType, tType, false, false, false);
+  }
+
+  public boolean isPrimitiveType() {
+    return !isComplex;
+  }
+
+  public boolean isQualifiedType() {
+    return isQualified;
+  }
+
+  public boolean isComplexType() {
+    return isComplex;
+  }
+
+  public boolean isCollectionType() {
+    return isCollection;
+  }
+
+  public static Type getType(TTypeId tType) {
+    for (Type type : values()) {
+      if (tType.equals(type.tType)) {
+        return type;
+      }
+    }
+    throw new IllegalArgumentException("Unregonized Thrift TTypeId value: " + tType);
+  }
+
+  public static Type getType(String name) {
+    if (name == null) {
+      throw new IllegalArgumentException("Invalid type name: null");
+    }
+    for (Type type : values()) {
+      if (name.equalsIgnoreCase(type.name)) {
+        return type;
+      } else if (type.isQualifiedType() || type.isComplexType()) {
+        if (name.toUpperCase(Locale.ROOT).startsWith(type.name)) {
+            return type;
+        }
+      }
+    }
+    throw new IllegalArgumentException("Unrecognized type name: " + name);
+  }
+
+  /**
+   * Radix for this type (typically either 2 or 10)
+   * Null is returned for data types where this is not applicable.
+   */
+  public Integer getNumPrecRadix() {
+    if (this.isNumericType()) {
+      return 10;
+    }
+    return null;
+  }
+
+  /**
+   * Maximum precision for numeric types.
+   * Returns null for non-numeric types.
+   * @return
+   */
+  public Integer getMaxPrecision() {
+    switch (this) {
+    case TINYINT_TYPE:
+      return 3;
+    case SMALLINT_TYPE:
+      return 5;
+    case INT_TYPE:
+      return 10;
+    case BIGINT_TYPE:
+      return 19;
+    case FLOAT_TYPE:
+      return 7;
+    case DOUBLE_TYPE:
+      return 15;
+    case DECIMAL_TYPE:
+      return HiveDecimal.MAX_PRECISION;
+    default:
+      return null;
+    }
+  }
+
+  public boolean isNumericType() {
+    switch (this) {
+    case TINYINT_TYPE:
+    case SMALLINT_TYPE:
+    case INT_TYPE:
+    case BIGINT_TYPE:
+    case FLOAT_TYPE:
+    case DOUBLE_TYPE:
+    case DECIMAL_TYPE:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  /**
+   * Prefix used to quote a literal of this type (may be null)
+   */
+  public String getLiteralPrefix() {
+    return null;
+  }
+
+  /**
+   * Suffix used to quote a literal of this type (may be null)
+   * @return
+   */
+  public String getLiteralSuffix() {
+    return null;
+  }
+
+  /**
+   * Can you use NULL for this type?
+   * @return
+   * DatabaseMetaData.typeNoNulls - does not allow NULL values
+   * DatabaseMetaData.typeNullable - allows NULL values
+   * DatabaseMetaData.typeNullableUnknown - nullability unknown
+   */
+  public Short getNullable() {
+    // All Hive types are nullable
+    return DatabaseMetaData.typeNullable;
+  }
+
+  /**
+   * Is the type case sensitive?
+   * @return
+   */
+  public Boolean isCaseSensitive() {
+    switch (this) {
+    case STRING_TYPE:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  /**
+   * Parameters used in creating the type (may be null)
+   * @return
+   */
+  public String getCreateParams() {
+    return null;
+  }
+
+  /**
+   * Can you use WHERE based on this type?
+   * @return
+   * DatabaseMetaData.typePredNone - No support
+   * DatabaseMetaData.typePredChar - Only support with WHERE .. LIKE
+   * DatabaseMetaData.typePredBasic - Supported except for WHERE .. LIKE
+   * DatabaseMetaData.typeSearchable - Supported for all WHERE ..
+   */
+  public Short getSearchable() {
+    if (isPrimitiveType()) {
+      return DatabaseMetaData.typeSearchable;
+    }
+    return DatabaseMetaData.typePredNone;
+  }
+
+  /**
+   * Is this type unsigned?
+   * @return
+   */
+  public Boolean isUnsignedAttribute() {
+    if (isNumericType()) {
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Can this type represent money?
+   * @return
+   */
+  public Boolean isFixedPrecScale() {
+    return false;
+  }
+
+  /**
+   * Can this type be used for an auto-increment value?
+   * @return
+   */
+  public Boolean isAutoIncrement() {
+    return false;
+  }
+
+  /**
+   * Localized version of type name (may be null).
+   * @return
+   */
+  public String getLocalizedName() {
+    return null;
+  }
+
+  /**
+   * Minimum scale supported for this type
+   * @return
+   */
+  public Short getMinimumScale() {
+    return 0;
+  }
+
+  /**
+   * Maximum scale supported for this type
+   * @return
+   */
+  public Short getMaximumScale() {
+    return 0;
+  }
+
+  public TTypeId toTType() {
+    return tType;
+  }
+
+  public int toJavaSQLType() {
+    return javaSQLType;
+  }
+
+  public String getName() {
+    return name;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
new file mode 100644
index 000000000000..b80fd67884ad
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hive.service.cli.thrift.TPrimitiveTypeEntry;
+import org.apache.hive.service.cli.thrift.TTypeDesc;
+import org.apache.hive.service.cli.thrift.TTypeEntry;
+
+/**
+ * TypeDescriptor.
+ *
+ */
+public class TypeDescriptor {
+
+  private final Type type;
+  private String typeName = null;
+  private TypeQualifiers typeQualifiers = null;
+
+  public TypeDescriptor(Type type) {
+    this.type = type;
+  }
+
+  public TypeDescriptor(TTypeDesc tTypeDesc) {
+    List<TTypeEntry> tTypeEntries = tTypeDesc.getTypes();
+    TPrimitiveTypeEntry top = tTypeEntries.get(0).getPrimitiveEntry();
+    this.type = Type.getType(top.getType());
+    if (top.isSetTypeQualifiers()) {
+      setTypeQualifiers(TypeQualifiers.fromTTypeQualifiers(top.getTypeQualifiers()));
+    }
+  }
+
+  public TypeDescriptor(String typeName) {
+    this.type = Type.getType(typeName);
+    if (this.type.isComplexType()) {
+      this.typeName = typeName;
+    } else if (this.type.isQualifiedType()) {
+      PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(typeName);
+      setTypeQualifiers(TypeQualifiers.fromTypeInfo(pti));
+    }
+  }
+
+  public Type getType() {
+    return type;
+  }
+
+  public TTypeDesc toTTypeDesc() {
+    TPrimitiveTypeEntry primitiveEntry = new TPrimitiveTypeEntry(type.toTType());
+    if (getTypeQualifiers() != null) {
+      primitiveEntry.setTypeQualifiers(getTypeQualifiers().toTTypeQualifiers());
+    }
+    TTypeEntry entry = TTypeEntry.primitiveEntry(primitiveEntry);
+
+    TTypeDesc desc = new TTypeDesc();
+    desc.addToTypes(entry);
+    return desc;
+  }
+
+  public String getTypeName() {
+    if (typeName != null) {
+      return typeName;
+    } else {
+      return type.getName();
+    }
+  }
+
+  public TypeQualifiers getTypeQualifiers() {
+    return typeQualifiers;
+  }
+
+  public void setTypeQualifiers(TypeQualifiers typeQualifiers) {
+    this.typeQualifiers = typeQualifiers;
+  }
+
+  /**
+   * The column size for this type.
+   * For numeric data this is the maximum precision.
+   * For character data this is the length in characters.
+   * For datetime types this is the length in characters of the String representation
+   * (assuming the maximum allowed precision of the fractional seconds component).
+   * For binary data this is the length in bytes.
+   * Null is returned for data types where the column size is not applicable.
+   */
+  public Integer getColumnSize() {
+    if (type.isNumericType()) {
+      return getPrecision();
+    }
+    switch (type) {
+    case STRING_TYPE:
+    case BINARY_TYPE:
+      return Integer.MAX_VALUE;
+    case CHAR_TYPE:
+    case VARCHAR_TYPE:
+      return typeQualifiers.getCharacterMaximumLength();
+    case DATE_TYPE:
+      return 10;
+    case TIMESTAMP_TYPE:
+      return 29;
+    default:
+      return null;
+    }
+  }
+
+  /**
+   * Maximum precision for numeric types.
+   * Returns null for non-numeric types.
+   * @return
+   */
+  public Integer getPrecision() {
+    if (this.type == Type.DECIMAL_TYPE) {
+      return typeQualifiers.getPrecision();
+    }
+    return this.type.getMaxPrecision();
+  }
+
+  /**
+   * The number of fractional digits for this type.
+   * Null is returned for data types where this is not applicable.
+   */
+  public Integer getDecimalDigits() {
+    switch (this.type) {
+    case BOOLEAN_TYPE:
+    case TINYINT_TYPE:
+    case SMALLINT_TYPE:
+    case INT_TYPE:
+    case BIGINT_TYPE:
+      return 0;
+    case FLOAT_TYPE:
+      return 7;
+    case DOUBLE_TYPE:
+      return 15;
+    case DECIMAL_TYPE:
+      return typeQualifiers.getScale();
+    case TIMESTAMP_TYPE:
+      return 9;
+    default:
+      return null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
new file mode 100644
index 000000000000..c6da52c15a2b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hive.service.cli.thrift.TCLIServiceConstants;
+import org.apache.hive.service.cli.thrift.TTypeQualifierValue;
+import org.apache.hive.service.cli.thrift.TTypeQualifiers;
+
+/**
+ * This class holds type qualifier information for a primitive type,
+ * such as char/varchar length or decimal precision/scale.
+ */
+public class TypeQualifiers {
+  private Integer characterMaximumLength;
+  private Integer precision;
+  private Integer scale;
+
+  public TypeQualifiers() {}
+
+  public Integer getCharacterMaximumLength() {
+    return characterMaximumLength;
+  }
+  public void setCharacterMaximumLength(int characterMaximumLength) {
+    this.characterMaximumLength = characterMaximumLength;
+  }
+
+  public TTypeQualifiers toTTypeQualifiers() {
+    TTypeQualifiers ret = null;
+
+    Map<String, TTypeQualifierValue> qMap = new HashMap<String, TTypeQualifierValue>();
+    if (getCharacterMaximumLength() != null) {
+      TTypeQualifierValue val = new TTypeQualifierValue();
+      val.setI32Value(getCharacterMaximumLength().intValue());
+      qMap.put(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH, val);
+    }
+
+    if (precision != null) {
+      TTypeQualifierValue val = new TTypeQualifierValue();
+      val.setI32Value(precision.intValue());
+      qMap.put(TCLIServiceConstants.PRECISION, val);
+    }
+
+    if (scale != null) {
+      TTypeQualifierValue val = new TTypeQualifierValue();
+      val.setI32Value(scale.intValue());
+      qMap.put(TCLIServiceConstants.SCALE, val);
+    }
+
+    if (qMap.size() > 0) {
+      ret = new TTypeQualifiers(qMap);
+    }
+
+    return ret;
+  }
+
+  public static TypeQualifiers fromTTypeQualifiers(TTypeQualifiers ttq) {
+    TypeQualifiers ret = null;
+    if (ttq != null) {
+      ret = new TypeQualifiers();
+      Map<String, TTypeQualifierValue> tqMap = ttq.getQualifiers();
+
+      if (tqMap.containsKey(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH)) {
+        ret.setCharacterMaximumLength(
+            tqMap.get(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH).getI32Value());
+      }
+
+      if (tqMap.containsKey(TCLIServiceConstants.PRECISION)) {
+        ret.setPrecision(tqMap.get(TCLIServiceConstants.PRECISION).getI32Value());
+      }
+
+      if (tqMap.containsKey(TCLIServiceConstants.SCALE)) {
+        ret.setScale(tqMap.get(TCLIServiceConstants.SCALE).getI32Value());
+      }
+    }
+    return ret;
+  }
+
+  public static TypeQualifiers fromTypeInfo(PrimitiveTypeInfo pti) {
+    TypeQualifiers result = null;
+    if (pti instanceof VarcharTypeInfo) {
+      result = new TypeQualifiers();
+      result.setCharacterMaximumLength(((VarcharTypeInfo)pti).getLength());
+    }  else if (pti instanceof CharTypeInfo) {
+      result = new TypeQualifiers();
+      result.setCharacterMaximumLength(((CharTypeInfo)pti).getLength());
+    } else if (pti instanceof DecimalTypeInfo) {
+      result = new TypeQualifiers();
+      result.setPrecision(((DecimalTypeInfo)pti).precision());
+      result.setScale(((DecimalTypeInfo)pti).scale());
+    }
+    return result;
+  }
+
+  public Integer getPrecision() {
+    return precision;
+  }
+
+  public void setPrecision(Integer precision) {
+    this.precision = precision;
+  }
+
+  public Integer getScale() {
+    return scale;
+  }
+
+  public void setScale(Integer scale) {
+    this.scale = scale;
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
new file mode 100644
index 000000000000..af36057bdaec
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.hive.metastore.TableType;
+
+/**
+ * ClassicTableTypeMapping.
+ * Classic table type mapping :
+ *  Managed Table to Table
+ *  External Table to Table
+ *  Virtual View to View
+ */
+public class ClassicTableTypeMapping implements TableTypeMapping {
+
+  public enum ClassicTableTypes {
+    TABLE,
+    VIEW,
+  }
+
+  private final Map<String, String> hiveToClientMap = new HashMap<String, String>();
+  private final Map<String, String> clientToHiveMap = new HashMap<String, String>();
+
+  public ClassicTableTypeMapping() {
+    hiveToClientMap.put(TableType.MANAGED_TABLE.toString(),
+        ClassicTableTypes.TABLE.toString());
+    hiveToClientMap.put(TableType.EXTERNAL_TABLE.toString(),
+        ClassicTableTypes.TABLE.toString());
+    hiveToClientMap.put(TableType.VIRTUAL_VIEW.toString(),
+        ClassicTableTypes.VIEW.toString());
+
+    clientToHiveMap.put(ClassicTableTypes.TABLE.toString(),
+        TableType.MANAGED_TABLE.toString());
+    clientToHiveMap.put(ClassicTableTypes.VIEW.toString(),
+        TableType.VIRTUAL_VIEW.toString());
+  }
+
+  @Override
+  public String mapToHiveType(String clientTypeName) {
+    if (clientToHiveMap.containsKey(clientTypeName)) {
+      return clientToHiveMap.get(clientTypeName);
+    } else {
+      return clientTypeName;
+    }
+  }
+
+  @Override
+  public String mapToClientType(String hiveTypeName) {
+    if (hiveToClientMap.containsKey(hiveTypeName)) {
+      return hiveToClientMap.get(hiveTypeName);
+    } else {
+      return hiveTypeName;
+    }
+  }
+
+  @Override
+  public Set<String> getTableTypeNames() {
+    Set<String> typeNameSet = new HashSet<String>();
+    for (ClassicTableTypes typeNames : ClassicTableTypes.values()) {
+      typeNameSet.add(typeNames.toString());
+    }
+    return typeNameSet;
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
new file mode 100644
index 000000000000..6740d3bb59dc
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.cli.operation;
+
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.processors.CommandProcessor;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
+import org.apache.hadoop.hive.ql.session.OperationLog;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.session.HiveSession;
+
+public abstract class ExecuteStatementOperation extends Operation {
+  protected String statement = null;
+  protected Map<String, String> confOverlay = new HashMap<String, String>();
+
+  public ExecuteStatementOperation(HiveSession parentSession, String statement,
+      Map<String, String> confOverlay, boolean runInBackground) {
+    super(parentSession, OperationType.EXECUTE_STATEMENT, runInBackground);
+    this.statement = statement;
+    setConfOverlay(confOverlay);
+  }
+
+  public String getStatement() {
+    return statement;
+  }
+
+  public static ExecuteStatementOperation newExecuteStatementOperation(
+      HiveSession parentSession, String statement, Map<String, String> confOverlay, boolean runAsync)
+          throws HiveSQLException {
+    String[] tokens = statement.trim().split("\\s+");
+    CommandProcessor processor = null;
+    try {
+      processor = CommandProcessorFactory.getForHiveCommand(tokens, parentSession.getHiveConf());
+    } catch (SQLException e) {
+      throw new HiveSQLException(e.getMessage(), e.getSQLState(), e);
+    }
+    if (processor == null) {
+      return new SQLOperation(parentSession, statement, confOverlay, runAsync);
+    }
+    return new HiveCommandOperation(parentSession, statement, processor, confOverlay);
+  }
+
+  protected Map<String, String> getConfOverlay() {
+    return confOverlay;
+  }
+
+  protected void setConfOverlay(Map<String, String> confOverlay) {
+    if (confOverlay != null) {
+      this.confOverlay = confOverlay;
+    }
+  }
+
+  protected void registerCurrentOperationLog() {
+    if (isOperationLogEnabled) {
+      if (operationLog == null) {
+        LOG.warn("Failed to get current OperationLog object of Operation: " +
+          getHandle().getHandleIdentifier());
+        isOperationLogEnabled = false;
+        return;
+      }
+      OperationLog.setCurrentOperationLog(operationLog);
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
new file mode 100644
index 000000000000..8868ec18e0f5
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+
+/**
+ * GetCatalogsOperation.
+ *
+ */
+public class GetCatalogsOperation extends MetadataOperation {
+  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
+  .addStringColumn("TABLE_CAT", "Catalog name. NULL if not applicable.");
+
+  private final RowSet rowSet;
+
+  protected GetCatalogsOperation(HiveSession parentSession) {
+    super(parentSession, OperationType.GET_CATALOGS);
+    rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.RUNNING);
+    try {
+      if (isAuthV2Enabled()) {
+        authorizeMetaGets(HiveOperationType.GET_CATALOGS, null);
+      }
+      setState(OperationState.FINISHED);
+    } catch (HiveSQLException e) {
+      setState(OperationState.ERROR);
+      throw e;
+    }
+
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
+   */
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    return RESULT_SET_SCHEMA;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
+   */
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    validateDefaultFetchOrientation(orientation);
+    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
+      rowSet.setStartOffset(0);
+    }
+    return rowSet.extractSubset((int)maxRows);
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
new file mode 100644
index 000000000000..5efb0759383a
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.sql.DatabaseMetaData;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType;
+import org.apache.hive.service.cli.ColumnDescriptor;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.Type;
+import org.apache.hive.service.cli.session.HiveSession;
+
+/**
+ * GetColumnsOperation.
+ *
+ */
+public class GetColumnsOperation extends MetadataOperation {
+
+  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
+  .addPrimitiveColumn("TABLE_CAT", Type.STRING_TYPE,
+      "Catalog name. NULL if not applicable")
+  .addPrimitiveColumn("TABLE_SCHEM", Type.STRING_TYPE,
+      "Schema name")
+  .addPrimitiveColumn("TABLE_NAME", Type.STRING_TYPE,
+      "Table name")
+  .addPrimitiveColumn("COLUMN_NAME", Type.STRING_TYPE,
+      "Column name")
+  .addPrimitiveColumn("DATA_TYPE", Type.INT_TYPE,
+      "SQL type from java.sql.Types")
+  .addPrimitiveColumn("TYPE_NAME", Type.STRING_TYPE,
+      "Data source dependent type name, for a UDT the type name is fully qualified")
+  .addPrimitiveColumn("COLUMN_SIZE", Type.INT_TYPE,
+      "Column size. For char or date types this is the maximum number of characters,"
+      + " for numeric or decimal types this is precision.")
+  .addPrimitiveColumn("BUFFER_LENGTH", Type.TINYINT_TYPE,
+      "Unused")
+  .addPrimitiveColumn("DECIMAL_DIGITS", Type.INT_TYPE,
+      "The number of fractional digits")
+  .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE,
+      "Radix (typically either 10 or 2)")
+  .addPrimitiveColumn("NULLABLE", Type.INT_TYPE,
+      "Is NULL allowed")
+  .addPrimitiveColumn("REMARKS", Type.STRING_TYPE,
+      "Comment describing column (may be null)")
+  .addPrimitiveColumn("COLUMN_DEF", Type.STRING_TYPE,
+      "Default value (may be null)")
+  .addPrimitiveColumn("SQL_DATA_TYPE", Type.INT_TYPE,
+      "Unused")
+  .addPrimitiveColumn("SQL_DATETIME_SUB", Type.INT_TYPE,
+      "Unused")
+  .addPrimitiveColumn("CHAR_OCTET_LENGTH", Type.INT_TYPE,
+      "For char types the maximum number of bytes in the column")
+  .addPrimitiveColumn("ORDINAL_POSITION", Type.INT_TYPE,
+      "Index of column in table (starting at 1)")
+  .addPrimitiveColumn("IS_NULLABLE", Type.STRING_TYPE,
+      "\"NO\" means column definitely does not allow NULL values; "
+      + "\"YES\" means the column might allow NULL values. An empty "
+      + "string means nobody knows.")
+  .addPrimitiveColumn("SCOPE_CATALOG", Type.STRING_TYPE,
+      "Catalog of table that is the scope of a reference attribute "
+      + "(null if DATA_TYPE isn't REF)")
+  .addPrimitiveColumn("SCOPE_SCHEMA", Type.STRING_TYPE,
+      "Schema of table that is the scope of a reference attribute "
+      + "(null if the DATA_TYPE isn't REF)")
+  .addPrimitiveColumn("SCOPE_TABLE", Type.STRING_TYPE,
+      "Table name that this the scope of a reference attribure "
+      + "(null if the DATA_TYPE isn't REF)")
+  .addPrimitiveColumn("SOURCE_DATA_TYPE", Type.SMALLINT_TYPE,
+      "Source type of a distinct type or user-generated Ref type, "
+      + "SQL type from java.sql.Types (null if DATA_TYPE isn't DISTINCT or user-generated REF)")
+  .addPrimitiveColumn("IS_AUTO_INCREMENT", Type.STRING_TYPE,
+      "Indicates whether this column is auto incremented.");
+
+  private final String catalogName;
+  private final String schemaName;
+  private final String tableName;
+  private final String columnName;
+
+  private final RowSet rowSet;
+
+  protected GetColumnsOperation(HiveSession parentSession, String catalogName, String schemaName,
+      String tableName, String columnName) {
+    super(parentSession, OperationType.GET_COLUMNS);
+    this.catalogName = catalogName;
+    this.schemaName = schemaName;
+    this.tableName = tableName;
+    this.columnName = columnName;
+    this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.RUNNING);
+    try {
+      IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient();
+      String schemaPattern = convertSchemaPattern(schemaName);
+      String tablePattern = convertIdentifierPattern(tableName, true);
+
+      Pattern columnPattern = null;
+      if (columnName != null) {
+        columnPattern = Pattern.compile(convertIdentifierPattern(columnName, false));
+      }
+
+      List<String> dbNames = metastoreClient.getDatabases(schemaPattern);
+      Collections.sort(dbNames);
+      Map<String, List<String>> db2Tabs = new HashMap<>();
+
+      for (String dbName : dbNames) {
+        List<String> tableNames = metastoreClient.getTables(dbName, tablePattern);
+        Collections.sort(tableNames);
+        db2Tabs.put(dbName, tableNames);
+      }
+
+      if (isAuthV2Enabled()) {
+        List<HivePrivilegeObject> privObjs = getPrivObjs(db2Tabs);
+        String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName
+            + ", tablePattern : " + tableName;
+        authorizeMetaGets(HiveOperationType.GET_COLUMNS, privObjs, cmdStr);
+      }
+
+      for (Entry<String, List<String>> dbTabs : db2Tabs.entrySet()) {
+        String dbName = dbTabs.getKey();
+        List<String> tableNames = dbTabs.getValue();
+        for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) {
+          TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName, table.getTableName()));
+          for (ColumnDescriptor column : schema.getColumnDescriptors()) {
+            if (columnPattern != null && !columnPattern.matcher(column.getName()).matches()) {
+              continue;
+            }
+            Object[] rowData = new Object[] {
+                null,  // TABLE_CAT
+                table.getDbName(), // TABLE_SCHEM
+                table.getTableName(), // TABLE_NAME
+                column.getName(), // COLUMN_NAME
+                column.getType().toJavaSQLType(), // DATA_TYPE
+                column.getTypeName(), // TYPE_NAME
+                column.getTypeDescriptor().getColumnSize(), // COLUMN_SIZE
+                null, // BUFFER_LENGTH, unused
+                column.getTypeDescriptor().getDecimalDigits(), // DECIMAL_DIGITS
+                column.getType().getNumPrecRadix(), // NUM_PREC_RADIX
+                DatabaseMetaData.columnNullable, // NULLABLE
+                column.getComment(), // REMARKS
+                null, // COLUMN_DEF
+                null, // SQL_DATA_TYPE
+                null, // SQL_DATETIME_SUB
+                null, // CHAR_OCTET_LENGTH
+                column.getOrdinalPosition(), // ORDINAL_POSITION
+                "YES", // IS_NULLABLE
+                null, // SCOPE_CATALOG
+                null, // SCOPE_SCHEMA
+                null, // SCOPE_TABLE
+                null, // SOURCE_DATA_TYPE
+                "NO", // IS_AUTO_INCREMENT
+            };
+            rowSet.addRow(rowData);
+          }
+        }
+      }
+      setState(OperationState.FINISHED);
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException(e);
+    }
+
+  }
+
+
+  private List<HivePrivilegeObject> getPrivObjs(Map<String, List<String>> db2Tabs) {
+    List<HivePrivilegeObject> privObjs = new ArrayList<>();
+    for (Entry<String, List<String>> dbTabs : db2Tabs.entrySet()) {
+      for (String tabName : dbTabs.getValue()) {
+        privObjs.add(new HivePrivilegeObject(HivePrivilegeObjectType.TABLE_OR_VIEW, dbTabs.getKey(),
+            tabName));
+      }
+    }
+    return privObjs;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
+   */
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    return RESULT_SET_SCHEMA;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
+   */
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    validateDefaultFetchOrientation(orientation);
+    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
+      rowSet.setStartOffset(0);
+    }
+    return rowSet.extractSubset((int)maxRows);
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
new file mode 100644
index 000000000000..5273c386b83d
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.sql.DatabaseMetaData;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils;
+import org.apache.hive.service.cli.CLIServiceUtils;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.Type;
+import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.thrift.TException;
+
+/**
+ * GetFunctionsOperation.
+ *
+ */
+public class GetFunctionsOperation extends MetadataOperation {
+  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
+  .addPrimitiveColumn("FUNCTION_CAT", Type.STRING_TYPE,
+      "Function catalog (may be null)")
+  .addPrimitiveColumn("FUNCTION_SCHEM", Type.STRING_TYPE,
+      "Function schema (may be null)")
+  .addPrimitiveColumn("FUNCTION_NAME", Type.STRING_TYPE,
+      "Function name. This is the name used to invoke the function")
+  .addPrimitiveColumn("REMARKS", Type.STRING_TYPE,
+      "Explanatory comment on the function")
+  .addPrimitiveColumn("FUNCTION_TYPE", Type.INT_TYPE,
+      "Kind of function.")
+  .addPrimitiveColumn("SPECIFIC_NAME", Type.STRING_TYPE,
+      "The name which uniquely identifies this function within its schema");
+
+  private final String catalogName;
+  private final String schemaName;
+  private final String functionName;
+
+  private final RowSet rowSet;
+
+  public GetFunctionsOperation(HiveSession parentSession,
+      String catalogName, String schemaName, String functionName) {
+    super(parentSession, OperationType.GET_FUNCTIONS);
+    this.catalogName = catalogName;
+    this.schemaName = schemaName;
+    this.functionName = functionName;
+    this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.RUNNING);
+    if (isAuthV2Enabled()) {
+      // get databases for schema pattern
+      IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient();
+      String schemaPattern = convertSchemaPattern(schemaName);
+      List<String> matchingDbs;
+      try {
+        matchingDbs = metastoreClient.getDatabases(schemaPattern);
+      } catch (TException e) {
+        setState(OperationState.ERROR);
+        throw new HiveSQLException(e);
+      }
+      // authorize this call on the schema objects
+      List<HivePrivilegeObject> privObjs = HivePrivilegeObjectUtils
+          .getHivePrivDbObjects(matchingDbs);
+      String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName;
+      authorizeMetaGets(HiveOperationType.GET_FUNCTIONS, privObjs, cmdStr);
+    }
+
+    try {
+      if ((null == catalogName || "".equals(catalogName))
+          && (null == schemaName || "".equals(schemaName))) {
+        Set<String> functionNames =  FunctionRegistry
+            .getFunctionNames(CLIServiceUtils.patternToRegex(functionName));
+        for (String functionName : functionNames) {
+          FunctionInfo functionInfo = FunctionRegistry.getFunctionInfo(functionName);
+          Object[] rowData = new Object[] {
+              null, // FUNCTION_CAT
+              null, // FUNCTION_SCHEM
+              functionInfo.getDisplayName(), // FUNCTION_NAME
+              "", // REMARKS
+              (functionInfo.isGenericUDTF() ?
+                  DatabaseMetaData.functionReturnsTable
+                  : DatabaseMetaData.functionNoTable), // FUNCTION_TYPE
+             functionInfo.getClass().getCanonicalName()
+          };
+          rowSet.addRow(rowData);
+        }
+      }
+      setState(OperationState.FINISHED);
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException(e);
+    }
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
+   */
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    return RESULT_SET_SCHEMA;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
+   */
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    validateDefaultFetchOrientation(orientation);
+    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
+      rowSet.setStartOffset(0);
+    }
+    return rowSet.extractSubset((int)maxRows);
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
new file mode 100644
index 000000000000..3516bc2ba242
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+
+/**
+ * GetSchemasOperation.
+ *
+ */
+public class GetSchemasOperation extends MetadataOperation {
+  private final String catalogName;
+  private final String schemaName;
+
+  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
+  .addStringColumn("TABLE_SCHEM", "Schema name.")
+  .addStringColumn("TABLE_CATALOG", "Catalog name.");
+
+  protected RowSet rowSet;
+
+  protected GetSchemasOperation(HiveSession parentSession,
+      String catalogName, String schemaName) {
+    super(parentSession, OperationType.GET_SCHEMAS);
+    this.catalogName = catalogName;
+    this.schemaName = schemaName;
+    this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.RUNNING);
+    if (isAuthV2Enabled()) {
+      String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName;
+      authorizeMetaGets(HiveOperationType.GET_SCHEMAS, null, cmdStr);
+    }
+    try {
+      IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient();
+      String schemaPattern = convertSchemaPattern(schemaName);
+      for (String dbName : metastoreClient.getDatabases(schemaPattern)) {
+        rowSet.addRow(new Object[] {dbName, DEFAULT_HIVE_CATALOG});
+      }
+      setState(OperationState.FINISHED);
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException(e);
+    }
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
+   */
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    return RESULT_SET_SCHEMA;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
+   */
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    validateDefaultFetchOrientation(orientation);
+    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
+      rowSet.setStartOffset(0);
+    }
+    return rowSet.extractSubset((int)maxRows);
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
new file mode 100644
index 000000000000..3ae012a72764
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+
+/**
+ * GetTableTypesOperation.
+ *
+ */
+public class GetTableTypesOperation extends MetadataOperation {
+
+  protected static TableSchema RESULT_SET_SCHEMA = new TableSchema()
+  .addStringColumn("TABLE_TYPE", "Table type name.");
+
+  private final RowSet rowSet;
+  private final TableTypeMapping tableTypeMapping;
+
+  protected GetTableTypesOperation(HiveSession parentSession) {
+    super(parentSession, OperationType.GET_TABLE_TYPES);
+    String tableMappingStr = getParentSession().getHiveConf()
+      .getVar(HiveConf.ConfVars.HIVE_SERVER2_TABLE_TYPE_MAPPING);
+    tableTypeMapping =
+      TableTypeMappingFactory.getTableTypeMapping(tableMappingStr);
+    rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.RUNNING);
+    if (isAuthV2Enabled()) {
+      authorizeMetaGets(HiveOperationType.GET_TABLETYPES, null);
+    }
+    try {
+      for (TableType type : TableType.values()) {
+        rowSet.addRow(new String[] {tableTypeMapping.mapToClientType(type.toString())});
+      }
+      setState(OperationState.FINISHED);
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
+   */
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    return RESULT_SET_SCHEMA;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
+   */
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    validateDefaultFetchOrientation(orientation);
+    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
+      rowSet.setStartOffset(0);
+    }
+    return rowSet.extractSubset((int)maxRows);
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
new file mode 100644
index 000000000000..2af17a662a29
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+
+/**
+ * GetTablesOperation.
+ *
+ */
+public class GetTablesOperation extends MetadataOperation {
+
+  private final String catalogName;
+  private final String schemaName;
+  private final String tableName;
+  private final List<String> tableTypes = new ArrayList<String>();
+  protected final RowSet rowSet;
+  private final TableTypeMapping tableTypeMapping;
+
+
+  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
+  .addStringColumn("TABLE_CAT", "Catalog name. NULL if not applicable.")
+  .addStringColumn("TABLE_SCHEM", "Schema name.")
+  .addStringColumn("TABLE_NAME", "Table name.")
+  .addStringColumn("TABLE_TYPE", "The table type, e.g. \"TABLE\", \"VIEW\", etc.")
+  .addStringColumn("REMARKS", "Comments about the table.");
+
+  protected GetTablesOperation(HiveSession parentSession,
+      String catalogName, String schemaName, String tableName,
+      List<String> tableTypes) {
+    super(parentSession, OperationType.GET_TABLES);
+    this.catalogName = catalogName;
+    this.schemaName = schemaName;
+    this.tableName = tableName;
+    String tableMappingStr = getParentSession().getHiveConf()
+        .getVar(HiveConf.ConfVars.HIVE_SERVER2_TABLE_TYPE_MAPPING);
+    tableTypeMapping =
+        TableTypeMappingFactory.getTableTypeMapping(tableMappingStr);
+    if (tableTypes != null) {
+      this.tableTypes.addAll(tableTypes);
+    }
+    this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.RUNNING);
+    try {
+      IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient();
+      String schemaPattern = convertSchemaPattern(schemaName);
+      List<String> matchingDbs = metastoreClient.getDatabases(schemaPattern);
+      if(isAuthV2Enabled()){
+        List<HivePrivilegeObject> privObjs = HivePrivilegeObjectUtils.getHivePrivDbObjects(matchingDbs);
+        String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName;
+        authorizeMetaGets(HiveOperationType.GET_TABLES, privObjs, cmdStr);
+      }
+
+      String tablePattern = convertIdentifierPattern(tableName, true);
+      for (String dbName : metastoreClient.getDatabases(schemaPattern)) {
+        List<String> tableNames = metastoreClient.getTables(dbName, tablePattern);
+        for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) {
+          Object[] rowData = new Object[] {
+              DEFAULT_HIVE_CATALOG,
+              table.getDbName(),
+              table.getTableName(),
+              tableTypeMapping.mapToClientType(table.getTableType()),
+              table.getParameters().get("comment")
+              };
+          if (tableTypes.isEmpty() || tableTypes.contains(
+                tableTypeMapping.mapToClientType(table.getTableType()))) {
+            rowSet.addRow(rowData);
+          }
+        }
+      }
+      setState(OperationState.FINISHED);
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
+   */
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    return RESULT_SET_SCHEMA;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
+   */
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    validateDefaultFetchOrientation(orientation);
+    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
+      rowSet.setStartOffset(0);
+    }
+    return rowSet.extractSubset((int)maxRows);
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
new file mode 100644
index 000000000000..0f72071d7e7d
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.Type;
+import org.apache.hive.service.cli.session.HiveSession;
+
+/**
+ * GetTypeInfoOperation.
+ *
+ */
+public class GetTypeInfoOperation extends MetadataOperation {
+
+  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
+  .addPrimitiveColumn("TYPE_NAME", Type.STRING_TYPE,
+      "Type name")
+  .addPrimitiveColumn("DATA_TYPE", Type.INT_TYPE,
+      "SQL data type from java.sql.Types")
+  .addPrimitiveColumn("PRECISION", Type.INT_TYPE,
+      "Maximum precision")
+  .addPrimitiveColumn("LITERAL_PREFIX", Type.STRING_TYPE,
+      "Prefix used to quote a literal (may be null)")
+  .addPrimitiveColumn("LITERAL_SUFFIX", Type.STRING_TYPE,
+      "Suffix used to quote a literal (may be null)")
+  .addPrimitiveColumn("CREATE_PARAMS", Type.STRING_TYPE,
+      "Parameters used in creating the type (may be null)")
+  .addPrimitiveColumn("NULLABLE", Type.SMALLINT_TYPE,
+      "Can you use NULL for this type")
+  .addPrimitiveColumn("CASE_SENSITIVE", Type.BOOLEAN_TYPE,
+      "Is it case sensitive")
+  .addPrimitiveColumn("SEARCHABLE", Type.SMALLINT_TYPE,
+      "Can you use \"WHERE\" based on this type")
+  .addPrimitiveColumn("UNSIGNED_ATTRIBUTE", Type.BOOLEAN_TYPE,
+      "Is it unsigned")
+  .addPrimitiveColumn("FIXED_PREC_SCALE", Type.BOOLEAN_TYPE,
+      "Can it be a money value")
+  .addPrimitiveColumn("AUTO_INCREMENT", Type.BOOLEAN_TYPE,
+      "Can it be used for an auto-increment value")
+  .addPrimitiveColumn("LOCAL_TYPE_NAME", Type.STRING_TYPE,
+      "Localized version of type name (may be null)")
+  .addPrimitiveColumn("MINIMUM_SCALE", Type.SMALLINT_TYPE,
+      "Minimum scale supported")
+  .addPrimitiveColumn("MAXIMUM_SCALE", Type.SMALLINT_TYPE,
+      "Maximum scale supported")
+  .addPrimitiveColumn("SQL_DATA_TYPE", Type.INT_TYPE,
+      "Unused")
+  .addPrimitiveColumn("SQL_DATETIME_SUB", Type.INT_TYPE,
+      "Unused")
+  .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE,
+      "Usually 2 or 10");
+
+  private final RowSet rowSet;
+
+  protected GetTypeInfoOperation(HiveSession parentSession) {
+    super(parentSession, OperationType.GET_TYPE_INFO);
+    rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.RUNNING);
+    if (isAuthV2Enabled()) {
+      authorizeMetaGets(HiveOperationType.GET_TYPEINFO, null);
+    }
+    try {
+      for (Type type : Type.values()) {
+        Object[] rowData = new Object[] {
+            type.getName(), // TYPE_NAME
+            type.toJavaSQLType(), // DATA_TYPE
+            type.getMaxPrecision(), // PRECISION
+            type.getLiteralPrefix(), // LITERAL_PREFIX
+            type.getLiteralSuffix(), // LITERAL_SUFFIX
+            type.getCreateParams(), // CREATE_PARAMS
+            type.getNullable(), // NULLABLE
+            type.isCaseSensitive(), // CASE_SENSITIVE
+            type.getSearchable(), // SEARCHABLE
+            type.isUnsignedAttribute(), // UNSIGNED_ATTRIBUTE
+            type.isFixedPrecScale(), // FIXED_PREC_SCALE
+            type.isAutoIncrement(), // AUTO_INCREMENT
+            type.getLocalizedName(), // LOCAL_TYPE_NAME
+            type.getMinimumScale(), // MINIMUM_SCALE
+            type.getMaximumScale(), // MAXIMUM_SCALE
+            null, // SQL_DATA_TYPE, unused
+            null, // SQL_DATETIME_SUB, unused
+            type.getNumPrecRadix() //NUM_PREC_RADIX
+        };
+        rowSet.addRow(rowData);
+      }
+      setState(OperationState.FINISHED);
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException(e);
+    }
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
+   */
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    return RESULT_SET_SCHEMA;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
+   */
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    validateDefaultFetchOrientation(orientation);
+    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
+      rowSet.setStartOffset(0);
+    }
+    return rowSet.extractSubset((int)maxRows);
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
new file mode 100644
index 000000000000..bcc66cf811b2
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.metastore.api.Schema;
+import org.apache.hadoop.hive.ql.processors.CommandProcessor;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+
+/**
+ * Executes a HiveCommand
+ */
+public class HiveCommandOperation extends ExecuteStatementOperation {
+  private CommandProcessor commandProcessor;
+  private TableSchema resultSchema = null;
+
+  /**
+   * For processors other than Hive queries (Driver), they output to session.out (a temp file)
+   * first and the fetchOne/fetchN/fetchAll functions get the output from pipeIn.
+   */
+  private BufferedReader resultReader;
+
+
+  protected HiveCommandOperation(HiveSession parentSession, String statement,
+      CommandProcessor commandProcessor, Map<String, String> confOverlay) {
+    super(parentSession, statement, confOverlay, false);
+    this.commandProcessor = commandProcessor;
+    setupSessionIO(parentSession.getSessionState());
+  }
+
+  private void setupSessionIO(SessionState sessionState) {
+    try {
+      LOG.info("Putting temp output to file " + sessionState.getTmpOutputFile().toString());
+      sessionState.in = null; // hive server's session input stream is not used
+      // open a per-session file in auto-flush mode for writing temp results
+      sessionState.out = new PrintStream(new FileOutputStream(sessionState.getTmpOutputFile()), true, "UTF-8");
+      // TODO: for hadoop jobs, progress is printed out to session.err,
+      // we should find a way to feed back job progress to client
+      sessionState.err = new PrintStream(System.err, true, "UTF-8");
+    } catch (IOException e) {
+      LOG.error("Error in creating temp output file ", e);
+      try {
+        sessionState.in = null;
+        sessionState.out = new PrintStream(System.out, true, "UTF-8");
+        sessionState.err = new PrintStream(System.err, true, "UTF-8");
+      } catch (UnsupportedEncodingException ee) {
+        LOG.error("Error creating PrintStream", e);
+        ee.printStackTrace();
+        sessionState.out = null;
+        sessionState.err = null;
+      }
+    }
+  }
+
+
+  private void tearDownSessionIO() {
+    IOUtils.cleanup(LOG, parentSession.getSessionState().out);
+    IOUtils.cleanup(LOG, parentSession.getSessionState().err);
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.RUNNING);
+    try {
+      String command = getStatement().trim();
+      String[] tokens = statement.split("\\s");
+      String commandArgs = command.substring(tokens[0].length()).trim();
+
+      CommandProcessorResponse response = commandProcessor.run(commandArgs);
+      int returnCode = response.getResponseCode();
+      if (returnCode != 0) {
+        throw toSQLException("Error while processing statement", response);
+      }
+      Schema schema = response.getSchema();
+      if (schema != null) {
+        setHasResultSet(true);
+        resultSchema = new TableSchema(schema);
+      } else {
+        setHasResultSet(false);
+        resultSchema = new TableSchema();
+      }
+    } catch (HiveSQLException e) {
+      setState(OperationState.ERROR);
+      throw e;
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException("Error running query: " + e.toString(), e);
+    }
+    setState(OperationState.FINISHED);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.operation.Operation#close()
+   */
+  @Override
+  public void close() throws HiveSQLException {
+    setState(OperationState.CLOSED);
+    tearDownSessionIO();
+    cleanTmpFile();
+    cleanupOperationLog();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.operation.Operation#getResultSetSchema()
+   */
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    return resultSchema;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.operation.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
+   */
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    validateDefaultFetchOrientation(orientation);
+    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
+      resetResultReader();
+    }
+    List<String> rows = readResults((int) maxRows);
+    RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion());
+
+    for (String row : rows) {
+      rowSet.addRow(new String[] {row});
+    }
+    return rowSet;
+  }
+
+  /**
+   * Reads the temporary results for non-Hive (non-Driver) commands to the
+   * resulting List of strings.
+   * @param nLines number of lines read at once. If it is <= 0, then read all lines.
+   */
+  private List<String> readResults(int nLines) throws HiveSQLException {
+    if (resultReader == null) {
+      SessionState sessionState = getParentSession().getSessionState();
+      File tmp = sessionState.getTmpOutputFile();
+      try {
+        resultReader = new BufferedReader(new FileReader(tmp));
+      } catch (FileNotFoundException e) {
+        LOG.error("File " + tmp + " not found. ", e);
+        throw new HiveSQLException(e);
+      }
+    }
+    List<String> results = new ArrayList<String>();
+
+    for (int i = 0; i < nLines || nLines <= 0; ++i) {
+      try {
+        String line = resultReader.readLine();
+        if (line == null) {
+          // reached the end of the result file
+          break;
+        } else {
+          results.add(line);
+        }
+      } catch (IOException e) {
+        LOG.error("Reading temp results encountered an exception: ", e);
+        throw new HiveSQLException(e);
+      }
+    }
+    return results;
+  }
+
+  private void cleanTmpFile() {
+    resetResultReader();
+    SessionState sessionState = getParentSession().getSessionState();
+    File tmp = sessionState.getTmpOutputFile();
+    tmp.delete();
+  }
+
+  private void resetResultReader() {
+    if (resultReader != null) {
+      IOUtils.cleanup(LOG, resultReader);
+      resultReader = null;
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
new file mode 100644
index 000000000000..b530f217125b
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.hive.metastore.TableType;
+
+/**
+ * HiveTableTypeMapping.
+ * Default table type mapping
+ *
+ */
+public class HiveTableTypeMapping implements TableTypeMapping {
+
+  @Override
+  public String mapToHiveType(String clientTypeName) {
+    return clientTypeName;
+  }
+
+  @Override
+  public String mapToClientType(String hiveTypeName) {
+    return hiveTypeName;
+  }
+
+  @Override
+  public Set<String> getTableTypeNames() {
+    Set<String> typeNameSet = new HashSet<String>();
+    for (TableType typeNames : TableType.values()) {
+      typeNameSet.add(typeNames.toString());
+    }
+    return typeNameSet;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
new file mode 100644
index 000000000000..6c819876a556
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+
+/**
+ * MetadataOperation.
+ *
+ */
+public abstract class MetadataOperation extends Operation {
+
+  protected static final String DEFAULT_HIVE_CATALOG = "";
+  protected static TableSchema RESULT_SET_SCHEMA;
+  private static final char SEARCH_STRING_ESCAPE = '\\';
+
+  protected MetadataOperation(HiveSession parentSession, OperationType opType) {
+    super(parentSession, opType, false);
+    setHasResultSet(true);
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.Operation#close()
+   */
+  @Override
+  public void close() throws HiveSQLException {
+    setState(OperationState.CLOSED);
+    cleanupOperationLog();
+  }
+
+  /**
+   * Convert wildchars and escape sequence from JDBC format to datanucleous/regex
+   */
+  protected String convertIdentifierPattern(final String pattern, boolean datanucleusFormat) {
+    if (pattern == null) {
+      return convertPattern("%", true);
+    } else {
+      return convertPattern(pattern, datanucleusFormat);
+    }
+  }
+
+  /**
+   * Convert wildchars and escape sequence of schema pattern from JDBC format to datanucleous/regex
+   * The schema pattern treats empty string also as wildchar
+   */
+  protected String convertSchemaPattern(final String pattern) {
+    if ((pattern == null) || pattern.isEmpty()) {
+      return convertPattern("%", true);
+    } else {
+      return convertPattern(pattern, true);
+    }
+  }
+
+  /**
+   * Convert a pattern containing JDBC catalog search wildcards into
+   * Java regex patterns.
+   *
+   * @param pattern input which may contain '%' or '_' wildcard characters, or
+   * these characters escaped using {@link #getSearchStringEscape()}.
+   * @return replace %/_ with regex search characters, also handle escaped
+   * characters.
+   *
+   * The datanucleus module expects the wildchar as '*'. The columns search on the
+   * other hand is done locally inside the hive code and that requires the regex wildchar
+   * format '.*'  This is driven by the datanucleusFormat flag.
+   */
+  private String convertPattern(final String pattern, boolean datanucleusFormat) {
+    String wStr;
+    if (datanucleusFormat) {
+      wStr = "*";
+    } else {
+      wStr = ".*";
+    }
+    return pattern
+        .replaceAll("([^\\\\])%", "$1" + wStr).replaceAll("\\\\%", "%").replaceAll("^%", wStr)
+        .replaceAll("([^\\\\])_", "$1.").replaceAll("\\\\_", "_").replaceAll("^_", ".");
+  }
+
+  protected boolean isAuthV2Enabled(){
+    SessionState ss = SessionState.get();
+    return (ss.isAuthorizationModeV2() &&
+        HiveConf.getBoolVar(ss.getConf(), HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED));
+  }
+
+  protected void authorizeMetaGets(HiveOperationType opType, List<HivePrivilegeObject> inpObjs)
+      throws HiveSQLException {
+    authorizeMetaGets(opType, inpObjs, null);
+  }
+
+  protected void authorizeMetaGets(HiveOperationType opType, List<HivePrivilegeObject> inpObjs,
+      String cmdString) throws HiveSQLException {
+    SessionState ss = SessionState.get();
+    HiveAuthzContext.Builder ctxBuilder = new HiveAuthzContext.Builder();
+    ctxBuilder.setUserIpAddress(ss.getUserIpAddress());
+    ctxBuilder.setCommandString(cmdString);
+    try {
+      ss.getAuthorizerV2().checkPrivileges(opType, inpObjs, null,
+          ctxBuilder.build());
+    } catch (HiveAuthzPluginException | HiveAccessControlException e) {
+      throw new HiveSQLException(e.getMessage(), e);
+    }
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/Operation.java
new file mode 100644
index 000000000000..19153b654b08
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/Operation.java
@@ -0,0 +1,322 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.service.cli.operation;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.EnumSet;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.OperationLog;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationHandle;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationStatus;
+import org.apache.hive.service.cli.OperationType;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+
+public abstract class Operation {
+  protected final HiveSession parentSession;
+  private OperationState state = OperationState.INITIALIZED;
+  private final OperationHandle opHandle;
+  private HiveConf configuration;
+  public static final Log LOG = LogFactory.getLog(Operation.class.getName());
+  public static final FetchOrientation DEFAULT_FETCH_ORIENTATION = FetchOrientation.FETCH_NEXT;
+  public static final long DEFAULT_FETCH_MAX_ROWS = 100;
+  protected boolean hasResultSet;
+  protected volatile HiveSQLException operationException;
+  protected final boolean runAsync;
+  protected volatile Future<?> backgroundHandle;
+  protected OperationLog operationLog;
+  protected boolean isOperationLogEnabled;
+
+  private long operationTimeout;
+  private long lastAccessTime;
+
+  protected static final EnumSet<FetchOrientation> DEFAULT_FETCH_ORIENTATION_SET =
+      EnumSet.of(FetchOrientation.FETCH_NEXT,FetchOrientation.FETCH_FIRST);
+
+  protected Operation(HiveSession parentSession, OperationType opType, boolean runInBackground) {
+    this.parentSession = parentSession;
+    this.runAsync = runInBackground;
+    this.opHandle = new OperationHandle(opType, parentSession.getProtocolVersion());
+    lastAccessTime = System.currentTimeMillis();
+    operationTimeout = HiveConf.getTimeVar(parentSession.getHiveConf(),
+        HiveConf.ConfVars.HIVE_SERVER2_IDLE_OPERATION_TIMEOUT, TimeUnit.MILLISECONDS);
+  }
+
+  public Future<?> getBackgroundHandle() {
+    return backgroundHandle;
+  }
+
+  protected void setBackgroundHandle(Future<?> backgroundHandle) {
+    this.backgroundHandle = backgroundHandle;
+  }
+
+  public boolean shouldRunAsync() {
+    return runAsync;
+  }
+
+  public void setConfiguration(HiveConf configuration) {
+    this.configuration = new HiveConf(configuration);
+  }
+
+  public HiveConf getConfiguration() {
+    return new HiveConf(configuration);
+  }
+
+  public HiveSession getParentSession() {
+    return parentSession;
+  }
+
+  public OperationHandle getHandle() {
+    return opHandle;
+  }
+
+  public TProtocolVersion getProtocolVersion() {
+    return opHandle.getProtocolVersion();
+  }
+
+  public OperationType getType() {
+    return opHandle.getOperationType();
+  }
+
+  public OperationStatus getStatus() {
+    return new OperationStatus(state, operationException);
+  }
+
+  public boolean hasResultSet() {
+    return hasResultSet;
+  }
+
+  protected void setHasResultSet(boolean hasResultSet) {
+    this.hasResultSet = hasResultSet;
+    opHandle.setHasResultSet(hasResultSet);
+  }
+
+  public OperationLog getOperationLog() {
+    return operationLog;
+  }
+
+  protected final OperationState setState(OperationState newState) throws HiveSQLException {
+    state.validateTransition(newState);
+    this.state = newState;
+    this.lastAccessTime = System.currentTimeMillis();
+    return this.state;
+  }
+
+  public boolean isTimedOut(long current) {
+    if (operationTimeout == 0) {
+      return false;
+    }
+    if (operationTimeout > 0) {
+      // check only when it's in terminal state
+      return state.isTerminal() && lastAccessTime + operationTimeout <= current;
+    }
+    return lastAccessTime + -operationTimeout <= current;
+  }
+
+  public long getLastAccessTime() {
+    return lastAccessTime;
+  }
+
+  public long getOperationTimeout() {
+    return operationTimeout;
+  }
+
+  public void setOperationTimeout(long operationTimeout) {
+    this.operationTimeout = operationTimeout;
+  }
+
+  protected void setOperationException(HiveSQLException operationException) {
+    this.operationException = operationException;
+  }
+
+  protected final void assertState(OperationState state) throws HiveSQLException {
+    if (this.state != state) {
+      throw new HiveSQLException("Expected state " + state + ", but found " + this.state);
+    }
+    this.lastAccessTime = System.currentTimeMillis();
+  }
+
+  public boolean isRunning() {
+    return OperationState.RUNNING.equals(state);
+  }
+
+  public boolean isFinished() {
+    return OperationState.FINISHED.equals(state);
+  }
+
+  public boolean isCanceled() {
+    return OperationState.CANCELED.equals(state);
+  }
+
+  public boolean isFailed() {
+    return OperationState.ERROR.equals(state);
+  }
+
+  protected void createOperationLog() {
+    if (parentSession.isOperationLogEnabled()) {
+      File operationLogFile = new File(parentSession.getOperationLogSessionDir(),
+          opHandle.getHandleIdentifier().toString());
+      isOperationLogEnabled = true;
+
+      // create log file
+      try {
+        if (operationLogFile.exists()) {
+          LOG.warn("The operation log file should not exist, but it is already there: " +
+              operationLogFile.getAbsolutePath());
+          operationLogFile.delete();
+        }
+        if (!operationLogFile.createNewFile()) {
+          // the log file already exists and cannot be deleted.
+          // If it can be read/written, keep its contents and use it.
+          if (!operationLogFile.canRead() || !operationLogFile.canWrite()) {
+            LOG.warn("The already existed operation log file cannot be recreated, " +
+                "and it cannot be read or written: " + operationLogFile.getAbsolutePath());
+            isOperationLogEnabled = false;
+            return;
+          }
+        }
+      } catch (Exception e) {
+        LOG.warn("Unable to create operation log file: " + operationLogFile.getAbsolutePath(), e);
+        isOperationLogEnabled = false;
+        return;
+      }
+
+      // create OperationLog object with above log file
+      try {
+        operationLog = new OperationLog(opHandle.toString(), operationLogFile, parentSession.getHiveConf());
+      } catch (FileNotFoundException e) {
+        LOG.warn("Unable to instantiate OperationLog object for operation: " +
+            opHandle, e);
+        isOperationLogEnabled = false;
+        return;
+      }
+
+      // register this operationLog to current thread
+      OperationLog.setCurrentOperationLog(operationLog);
+    }
+  }
+
+  protected void unregisterOperationLog() {
+    if (isOperationLogEnabled) {
+      OperationLog.removeCurrentOperationLog();
+    }
+  }
+
+  /**
+   * Invoked before runInternal().
+   * Set up some preconditions, or configurations.
+   */
+  protected void beforeRun() {
+    createOperationLog();
+  }
+
+  /**
+   * Invoked after runInternal(), even if an exception is thrown in runInternal().
+   * Clean up resources, which was set up in beforeRun().
+   */
+  protected void afterRun() {
+    unregisterOperationLog();
+  }
+
+  /**
+   * Implemented by subclass of Operation class to execute specific behaviors.
+   * @throws HiveSQLException
+   */
+  protected abstract void runInternal() throws HiveSQLException;
+
+  public void run() throws HiveSQLException {
+    beforeRun();
+    try {
+      runInternal();
+    } finally {
+      afterRun();
+    }
+  }
+
+  protected void cleanupOperationLog() {
+    if (isOperationLogEnabled) {
+      if (operationLog == null) {
+        LOG.error("Operation [ " + opHandle.getHandleIdentifier() + " ] "
+          + "logging is enabled, but its OperationLog object cannot be found.");
+      } else {
+        operationLog.close();
+      }
+    }
+  }
+
+  // TODO: make this abstract and implement in subclasses.
+  public void cancel() throws HiveSQLException {
+    setState(OperationState.CANCELED);
+    throw new UnsupportedOperationException("SQLOperation.cancel()");
+  }
+
+  public abstract void close() throws HiveSQLException;
+
+  public abstract TableSchema getResultSetSchema() throws HiveSQLException;
+
+  public abstract RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException;
+
+  public RowSet getNextRowSet() throws HiveSQLException {
+    return getNextRowSet(FetchOrientation.FETCH_NEXT, DEFAULT_FETCH_MAX_ROWS);
+  }
+
+  /**
+   * Verify if the given fetch orientation is part of the default orientation types.
+   * @param orientation
+   * @throws HiveSQLException
+   */
+  protected void validateDefaultFetchOrientation(FetchOrientation orientation)
+      throws HiveSQLException {
+    validateFetchOrientation(orientation, DEFAULT_FETCH_ORIENTATION_SET);
+  }
+
+  /**
+   * Verify if the given fetch orientation is part of the supported orientation types.
+   * @param orientation
+   * @param supportedOrientations
+   * @throws HiveSQLException
+   */
+  protected void validateFetchOrientation(FetchOrientation orientation,
+      EnumSet<FetchOrientation> supportedOrientations) throws HiveSQLException {
+    if (!supportedOrientations.contains(orientation)) {
+      throw new HiveSQLException("The fetch type " + orientation.toString() +
+          " is not supported for this resultset", "HY106");
+    }
+  }
+
+  protected HiveSQLException toSQLException(String prefix, CommandProcessorResponse response) {
+    HiveSQLException ex = new HiveSQLException(prefix + ": " + response.getErrorMessage(),
+        response.getSQLState(), response.getResponseCode());
+    if (response.getException() != null) {
+      ex.initCause(response.getException());
+    }
+    return ex;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
new file mode 100644
index 000000000000..92c340a29c10
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
@@ -0,0 +1,284 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Schema;
+import org.apache.hadoop.hive.ql.session.OperationLog;
+import org.apache.hive.service.AbstractService;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationHandle;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.OperationStatus;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.log4j.Appender;
+import org.apache.log4j.Logger;
+
+/**
+ * OperationManager.
+ *
+ */
+public class OperationManager extends AbstractService {
+  private final Log LOG = LogFactory.getLog(OperationManager.class.getName());
+
+  private final Map<OperationHandle, Operation> handleToOperation =
+      new HashMap<OperationHandle, Operation>();
+
+  public OperationManager() {
+    super(OperationManager.class.getSimpleName());
+  }
+
+  @Override
+  public synchronized void init(HiveConf hiveConf) {
+    if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) {
+      initOperationLogCapture(hiveConf.getVar(
+        HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL));
+    } else {
+      LOG.debug("Operation level logging is turned off");
+    }
+    super.init(hiveConf);
+  }
+
+  @Override
+  public synchronized void start() {
+    super.start();
+    // TODO
+  }
+
+  @Override
+  public synchronized void stop() {
+    // TODO
+    super.stop();
+  }
+
+  private void initOperationLogCapture(String loggingMode) {
+    // Register another Appender (with the same layout) that talks to us.
+    Appender ap = new LogDivertAppender(this, OperationLog.getLoggingLevel(loggingMode));
+    Logger.getRootLogger().addAppender(ap);
+  }
+
+  public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession,
+      String statement, Map<String, String> confOverlay, boolean runAsync)
+          throws HiveSQLException {
+    ExecuteStatementOperation executeStatementOperation = ExecuteStatementOperation
+        .newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync);
+    addOperation(executeStatementOperation);
+    return executeStatementOperation;
+  }
+
+  public GetTypeInfoOperation newGetTypeInfoOperation(HiveSession parentSession) {
+    GetTypeInfoOperation operation = new GetTypeInfoOperation(parentSession);
+    addOperation(operation);
+    return operation;
+  }
+
+  public GetCatalogsOperation newGetCatalogsOperation(HiveSession parentSession) {
+    GetCatalogsOperation operation = new GetCatalogsOperation(parentSession);
+    addOperation(operation);
+    return operation;
+  }
+
+  public GetSchemasOperation newGetSchemasOperation(HiveSession parentSession,
+      String catalogName, String schemaName) {
+    GetSchemasOperation operation = new GetSchemasOperation(parentSession, catalogName, schemaName);
+    addOperation(operation);
+    return operation;
+  }
+
+  public MetadataOperation newGetTablesOperation(HiveSession parentSession,
+      String catalogName, String schemaName, String tableName,
+      List<String> tableTypes) {
+    MetadataOperation operation =
+        new GetTablesOperation(parentSession, catalogName, schemaName, tableName, tableTypes);
+    addOperation(operation);
+    return operation;
+  }
+
+  public GetTableTypesOperation newGetTableTypesOperation(HiveSession parentSession) {
+    GetTableTypesOperation operation = new GetTableTypesOperation(parentSession);
+    addOperation(operation);
+    return operation;
+  }
+
+  public GetColumnsOperation newGetColumnsOperation(HiveSession parentSession,
+      String catalogName, String schemaName, String tableName, String columnName) {
+    GetColumnsOperation operation = new GetColumnsOperation(parentSession,
+        catalogName, schemaName, tableName, columnName);
+    addOperation(operation);
+    return operation;
+  }
+
+  public GetFunctionsOperation newGetFunctionsOperation(HiveSession parentSession,
+      String catalogName, String schemaName, String functionName) {
+    GetFunctionsOperation operation = new GetFunctionsOperation(parentSession,
+        catalogName, schemaName, functionName);
+    addOperation(operation);
+    return operation;
+  }
+
+  public Operation getOperation(OperationHandle operationHandle) throws HiveSQLException {
+    Operation operation = getOperationInternal(operationHandle);
+    if (operation == null) {
+      throw new HiveSQLException("Invalid OperationHandle: " + operationHandle);
+    }
+    return operation;
+  }
+
+  private synchronized Operation getOperationInternal(OperationHandle operationHandle) {
+    return handleToOperation.get(operationHandle);
+  }
+
+  private synchronized Operation removeTimedOutOperation(OperationHandle operationHandle) {
+    Operation operation = handleToOperation.get(operationHandle);
+    if (operation != null && operation.isTimedOut(System.currentTimeMillis())) {
+      handleToOperation.remove(operationHandle);
+      return operation;
+    }
+    return null;
+  }
+
+  private synchronized void addOperation(Operation operation) {
+    handleToOperation.put(operation.getHandle(), operation);
+  }
+
+  private synchronized Operation removeOperation(OperationHandle opHandle) {
+    return handleToOperation.remove(opHandle);
+  }
+
+  public OperationStatus getOperationStatus(OperationHandle opHandle)
+      throws HiveSQLException {
+    return getOperation(opHandle).getStatus();
+  }
+
+  public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
+    Operation operation = getOperation(opHandle);
+    OperationState opState = operation.getStatus().getState();
+    if (opState == OperationState.CANCELED ||
+        opState == OperationState.CLOSED ||
+        opState == OperationState.FINISHED ||
+        opState == OperationState.ERROR ||
+        opState == OperationState.UNKNOWN) {
+      // Cancel should be a no-op in either cases
+      LOG.debug(opHandle + ": Operation is already aborted in state - " + opState);
+    }
+    else {
+      LOG.debug(opHandle + ": Attempting to cancel from state - " + opState);
+      operation.cancel();
+    }
+  }
+
+  public void closeOperation(OperationHandle opHandle) throws HiveSQLException {
+    Operation operation = removeOperation(opHandle);
+    if (operation == null) {
+      throw new HiveSQLException("Operation does not exist!");
+    }
+    operation.close();
+  }
+
+  public TableSchema getOperationResultSetSchema(OperationHandle opHandle)
+      throws HiveSQLException {
+    return getOperation(opHandle).getResultSetSchema();
+  }
+
+  public RowSet getOperationNextRowSet(OperationHandle opHandle)
+      throws HiveSQLException {
+    return getOperation(opHandle).getNextRowSet();
+  }
+
+  public RowSet getOperationNextRowSet(OperationHandle opHandle,
+      FetchOrientation orientation, long maxRows)
+          throws HiveSQLException {
+    return getOperation(opHandle).getNextRowSet(orientation, maxRows);
+  }
+
+  public RowSet getOperationLogRowSet(OperationHandle opHandle,
+      FetchOrientation orientation, long maxRows)
+          throws HiveSQLException {
+    // get the OperationLog object from the operation
+    OperationLog operationLog = getOperation(opHandle).getOperationLog();
+    if (operationLog == null) {
+      throw new HiveSQLException("Couldn't find log associated with operation handle: " + opHandle);
+    }
+
+    // read logs
+    List<String> logs;
+    try {
+      logs = operationLog.readOperationLog(isFetchFirst(orientation), maxRows);
+    } catch (SQLException e) {
+      throw new HiveSQLException(e.getMessage(), e.getCause());
+    }
+
+
+    // convert logs to RowSet
+    TableSchema tableSchema = new TableSchema(getLogSchema());
+    RowSet rowSet = RowSetFactory.create(tableSchema, getOperation(opHandle).getProtocolVersion());
+    for (String log : logs) {
+      rowSet.addRow(new String[] {log});
+    }
+
+    return rowSet;
+  }
+
+  private boolean isFetchFirst(FetchOrientation fetchOrientation) {
+    //TODO: Since OperationLog is moved to package o.a.h.h.ql.session,
+    // we may add a Enum there and map FetchOrientation to it.
+    if (fetchOrientation.equals(FetchOrientation.FETCH_FIRST)) {
+      return true;
+    }
+    return false;
+  }
+
+  private Schema getLogSchema() {
+    Schema schema = new Schema();
+    FieldSchema fieldSchema = new FieldSchema();
+    fieldSchema.setName("operation_log");
+    fieldSchema.setType("string");
+    schema.addToFieldSchemas(fieldSchema);
+    return schema;
+  }
+
+  public OperationLog getOperationLogByThread() {
+    return OperationLog.getCurrentOperationLog();
+  }
+
+  public List<Operation> removeExpiredOperations(OperationHandle[] handles) {
+    List<Operation> removed = new ArrayList<Operation>();
+    for (OperationHandle handle : handles) {
+      Operation operation = removeTimedOutOperation(handle);
+      if (operation != null) {
+        LOG.warn("Operation " + handle + " is timed-out and will be closed");
+        removed.add(operation);
+      }
+    }
+    return removed;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
new file mode 100644
index 000000000000..70c27948de61
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
@@ -0,0 +1,460 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.UnsupportedEncodingException;
+import java.security.PrivilegedExceptionAction;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Future;
+import java.util.concurrent.RejectedExecutionException;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Schema;
+import org.apache.hadoop.hive.ql.CommandNeedRetryException;
+import org.apache.hadoop.hive.ql.Driver;
+import org.apache.hadoop.hive.ql.exec.ExplainTask;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.shims.Utils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationState;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.RowSetFactory;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.server.ThreadWithGarbageCleanup;
+
+/**
+ * SQLOperation.
+ *
+ */
+public class SQLOperation extends ExecuteStatementOperation {
+
+  private Driver driver = null;
+  private CommandProcessorResponse response;
+  private TableSchema resultSchema = null;
+  private Schema mResultSchema = null;
+  private SerDe serde = null;
+  private boolean fetchStarted = false;
+
+  public SQLOperation(HiveSession parentSession, String statement, Map<String,
+      String> confOverlay, boolean runInBackground) {
+    // TODO: call setRemoteUser in ExecuteStatementOperation or higher.
+    super(parentSession, statement, confOverlay, runInBackground);
+  }
+
+  /***
+   * Compile the query and extract metadata
+   * @param sqlOperationConf
+   * @throws HiveSQLException
+   */
+  public void prepare(HiveConf sqlOperationConf) throws HiveSQLException {
+    setState(OperationState.RUNNING);
+
+    try {
+      driver = new Driver(sqlOperationConf, getParentSession().getUserName());
+
+      // set the operation handle information in Driver, so that thrift API users
+      // can use the operation handle they receive, to lookup query information in
+      // Yarn ATS
+      String guid64 = Base64.encodeBase64URLSafeString(getHandle().getHandleIdentifier()
+          .toTHandleIdentifier().getGuid()).trim();
+      driver.setOperationId(guid64);
+
+      // In Hive server mode, we are not able to retry in the FetchTask
+      // case, when calling fetch queries since execute() has returned.
+      // For now, we disable the test attempts.
+      driver.setTryCount(Integer.MAX_VALUE);
+
+      String subStatement = new VariableSubstitution().substitute(sqlOperationConf, statement);
+      response = driver.compileAndRespond(subStatement);
+      if (0 != response.getResponseCode()) {
+        throw toSQLException("Error while compiling statement", response);
+      }
+
+      mResultSchema = driver.getSchema();
+
+      // hasResultSet should be true only if the query has a FetchTask
+      // "explain" is an exception for now
+      if(driver.getPlan().getFetchTask() != null) {
+        //Schema has to be set
+        if (mResultSchema == null || !mResultSchema.isSetFieldSchemas()) {
+          throw new HiveSQLException("Error compiling query: Schema and FieldSchema " +
+              "should be set when query plan has a FetchTask");
+        }
+        resultSchema = new TableSchema(mResultSchema);
+        setHasResultSet(true);
+      } else {
+        setHasResultSet(false);
+      }
+      // Set hasResultSet true if the plan has ExplainTask
+      // TODO explain should use a FetchTask for reading
+      for (Task<? extends Serializable> task: driver.getPlan().getRootTasks()) {
+        if (task.getClass() == ExplainTask.class) {
+          resultSchema = new TableSchema(mResultSchema);
+          setHasResultSet(true);
+          break;
+        }
+      }
+    } catch (HiveSQLException e) {
+      setState(OperationState.ERROR);
+      throw e;
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException("Error running query: " + e.toString(), e);
+    }
+  }
+
+  private void runQuery(HiveConf sqlOperationConf) throws HiveSQLException {
+    try {
+      // In Hive server mode, we are not able to retry in the FetchTask
+      // case, when calling fetch queries since execute() has returned.
+      // For now, we disable the test attempts.
+      driver.setTryCount(Integer.MAX_VALUE);
+      response = driver.run();
+      if (0 != response.getResponseCode()) {
+        throw toSQLException("Error while processing statement", response);
+      }
+    } catch (HiveSQLException e) {
+      // If the operation was cancelled by another thread,
+      // Driver#run will return a non-zero response code.
+      // We will simply return if the operation state is CANCELED,
+      // otherwise throw an exception
+      if (getStatus().getState() == OperationState.CANCELED) {
+        return;
+      }
+      else {
+        setState(OperationState.ERROR);
+        throw e;
+      }
+    } catch (Exception e) {
+      setState(OperationState.ERROR);
+      throw new HiveSQLException("Error running query: " + e.toString(), e);
+    }
+    setState(OperationState.FINISHED);
+  }
+
+  @Override
+  public void runInternal() throws HiveSQLException {
+    setState(OperationState.PENDING);
+    final HiveConf opConfig = getConfigForOperation();
+    prepare(opConfig);
+    if (!shouldRunAsync()) {
+      runQuery(opConfig);
+    } else {
+      // We'll pass ThreadLocals in the background thread from the foreground (handler) thread
+      final SessionState parentSessionState = SessionState.get();
+      // ThreadLocal Hive object needs to be set in background thread.
+      // The metastore client in Hive is associated with right user.
+      final Hive parentHive = getSessionHive();
+      // Current UGI will get used by metastore when metsatore is in embedded mode
+      // So this needs to get passed to the new background thread
+      final UserGroupInformation currentUGI = getCurrentUGI(opConfig);
+      // Runnable impl to call runInternal asynchronously,
+      // from a different thread
+      Runnable backgroundOperation = new Runnable() {
+        @Override
+        public void run() {
+          PrivilegedExceptionAction<Object> doAsAction = new PrivilegedExceptionAction<Object>() {
+            @Override
+            public Object run() throws HiveSQLException {
+              Hive.set(parentHive);
+              SessionState.setCurrentSessionState(parentSessionState);
+              // Set current OperationLog in this async thread for keeping on saving query log.
+              registerCurrentOperationLog();
+              try {
+                runQuery(opConfig);
+              } catch (HiveSQLException e) {
+                setOperationException(e);
+                LOG.error("Error running hive query: ", e);
+              } finally {
+                unregisterOperationLog();
+              }
+              return null;
+            }
+          };
+
+          try {
+            currentUGI.doAs(doAsAction);
+          } catch (Exception e) {
+            setOperationException(new HiveSQLException(e));
+            LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e);
+          }
+          finally {
+            /**
+             * We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup
+             * when this thread is garbage collected later.
+             * @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize()
+             */
+            if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) {
+              ThreadWithGarbageCleanup currentThread =
+                  (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup.currentThread();
+              currentThread.cacheThreadLocalRawStore();
+            }
+          }
+        }
+      };
+      try {
+        // This submit blocks if no background threads are available to run this operation
+        Future<?> backgroundHandle =
+            getParentSession().getSessionManager().submitBackgroundOperation(backgroundOperation);
+        setBackgroundHandle(backgroundHandle);
+      } catch (RejectedExecutionException rejected) {
+        setState(OperationState.ERROR);
+        throw new HiveSQLException("The background threadpool cannot accept" +
+            " new task for execution, please retry the operation", rejected);
+      }
+    }
+  }
+
+  /**
+   * Returns the current UGI on the stack
+   * @param opConfig
+   * @return UserGroupInformation
+   * @throws HiveSQLException
+   */
+  private UserGroupInformation getCurrentUGI(HiveConf opConfig) throws HiveSQLException {
+    try {
+      return Utils.getUGI();
+    } catch (Exception e) {
+      throw new HiveSQLException("Unable to get current user", e);
+    }
+  }
+
+  /**
+   * Returns the ThreadLocal Hive for the current thread
+   * @return Hive
+   * @throws HiveSQLException
+   */
+  private Hive getSessionHive() throws HiveSQLException {
+    try {
+      return Hive.get();
+    } catch (HiveException e) {
+      throw new HiveSQLException("Failed to get ThreadLocal Hive object", e);
+    }
+  }
+
+  private void cleanup(OperationState state) throws HiveSQLException {
+    setState(state);
+    if (shouldRunAsync()) {
+      Future<?> backgroundHandle = getBackgroundHandle();
+      if (backgroundHandle != null) {
+        backgroundHandle.cancel(true);
+      }
+    }
+    if (driver != null) {
+      driver.close();
+      driver.destroy();
+    }
+    driver = null;
+
+    SessionState ss = SessionState.get();
+    if (ss.getTmpOutputFile() != null) {
+      ss.getTmpOutputFile().delete();
+    }
+  }
+
+  @Override
+  public void cancel() throws HiveSQLException {
+    cleanup(OperationState.CANCELED);
+  }
+
+  @Override
+  public void close() throws HiveSQLException {
+    cleanup(OperationState.CLOSED);
+    cleanupOperationLog();
+  }
+
+  @Override
+  public TableSchema getResultSetSchema() throws HiveSQLException {
+    assertState(OperationState.FINISHED);
+    if (resultSchema == null) {
+      resultSchema = new TableSchema(driver.getSchema());
+    }
+    return resultSchema;
+  }
+
+  private final transient List<Object> convey = new ArrayList<Object>();
+
+  @Override
+  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+    validateDefaultFetchOrientation(orientation);
+    assertState(OperationState.FINISHED);
+
+    RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion());
+
+    try {
+      /* if client is requesting fetch-from-start and its not the first time reading from this operation
+       * then reset the fetch position to beginning
+       */
+      if (orientation.equals(FetchOrientation.FETCH_FIRST) && fetchStarted) {
+        driver.resetFetch();
+      }
+      fetchStarted = true;
+      driver.setMaxRows((int) maxRows);
+      if (driver.getResults(convey)) {
+        return decode(convey, rowSet);
+      }
+      return rowSet;
+    } catch (IOException e) {
+      throw new HiveSQLException(e);
+    } catch (CommandNeedRetryException e) {
+      throw new HiveSQLException(e);
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    } finally {
+      convey.clear();
+    }
+  }
+
+  private RowSet decode(List<Object> rows, RowSet rowSet) throws Exception {
+    if (driver.isFetchingTable()) {
+      return prepareFromRow(rows, rowSet);
+    }
+    return decodeFromString(rows, rowSet);
+  }
+
+  // already encoded to thrift-able object in ThriftFormatter
+  private RowSet prepareFromRow(List<Object> rows, RowSet rowSet) throws Exception {
+    for (Object row : rows) {
+      rowSet.addRow((Object[]) row);
+    }
+    return rowSet;
+  }
+
+  private RowSet decodeFromString(List<Object> rows, RowSet rowSet)
+      throws SQLException, SerDeException {
+    getSerDe();
+    StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
+    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
+
+    Object[] deserializedFields = new Object[fieldRefs.size()];
+    Object rowObj;
+    ObjectInspector fieldOI;
+
+    int protocol = getProtocolVersion().getValue();
+    for (Object rowString : rows) {
+      try {
+        rowObj = serde.deserialize(new BytesWritable(((String)rowString).getBytes("UTF-8")));
+      } catch (UnsupportedEncodingException e) {
+        throw new SerDeException(e);
+      }
+      for (int i = 0; i < fieldRefs.size(); i++) {
+        StructField fieldRef = fieldRefs.get(i);
+        fieldOI = fieldRef.getFieldObjectInspector();
+        Object fieldData = soi.getStructFieldData(rowObj, fieldRef);
+        deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol);
+      }
+      rowSet.addRow(deserializedFields);
+    }
+    return rowSet;
+  }
+
+  private SerDe getSerDe() throws SQLException {
+    if (serde != null) {
+      return serde;
+    }
+    try {
+      List<FieldSchema> fieldSchemas = mResultSchema.getFieldSchemas();
+      StringBuilder namesSb = new StringBuilder();
+      StringBuilder typesSb = new StringBuilder();
+
+      if (fieldSchemas != null && !fieldSchemas.isEmpty()) {
+        for (int pos = 0; pos < fieldSchemas.size(); pos++) {
+          if (pos != 0) {
+            namesSb.append(",");
+            typesSb.append(",");
+          }
+          namesSb.append(fieldSchemas.get(pos).getName());
+          typesSb.append(fieldSchemas.get(pos).getType());
+        }
+      }
+      String names = namesSb.toString();
+      String types = typesSb.toString();
+
+      serde = new LazySimpleSerDe();
+      Properties props = new Properties();
+      if (names.length() > 0) {
+        LOG.debug("Column names: " + names);
+        props.setProperty(serdeConstants.LIST_COLUMNS, names);
+      }
+      if (types.length() > 0) {
+        LOG.debug("Column types: " + types);
+        props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types);
+      }
+      SerDeUtils.initializeSerDe(serde, new HiveConf(), props, null);
+
+    } catch (Exception ex) {
+      ex.printStackTrace();
+      throw new SQLException("Could not create ResultSet: " + ex.getMessage(), ex);
+    }
+    return serde;
+  }
+
+  /**
+   * If there are query specific settings to overlay, then create a copy of config
+   * There are two cases we need to clone the session config that's being passed to hive driver
+   * 1. Async query -
+   *    If the client changes a config setting, that shouldn't reflect in the execution already underway
+   * 2. confOverlay -
+   *    The query specific settings should only be applied to the query config and not session
+   * @return new configuration
+   * @throws HiveSQLException
+   */
+  private HiveConf getConfigForOperation() throws HiveSQLException {
+    HiveConf sqlOperationConf = getParentSession().getHiveConf();
+    if (!getConfOverlay().isEmpty() || shouldRunAsync()) {
+      // clone the parent session config for this query
+      sqlOperationConf = new HiveConf(sqlOperationConf);
+
+      // apply overlay query specific settings, if any
+      for (Map.Entry<String, String> confEntry : getConfOverlay().entrySet()) {
+        try {
+          sqlOperationConf.verifyAndSet(confEntry.getKey(), confEntry.getValue());
+        } catch (IllegalArgumentException e) {
+          throw new HiveSQLException("Error applying statement specific settings", e);
+        }
+      }
+    }
+    return sqlOperationConf;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
new file mode 100644
index 000000000000..e59d19ea6be4
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.operation;
+
+import java.util.Set;
+
+
+public interface TableTypeMapping {
+  /**
+   * Map client's table type name to hive's table type
+   * @param clientTypeName
+   * @return
+   */
+  String mapToHiveType(String clientTypeName);
+
+  /**
+   * Map hive's table type name to client's table type
+   * @param hiveTypeName
+   * @return
+   */
+  String mapToClientType(String hiveTypeName);
+
+  /**
+   * Get all the table types of this mapping
+   * @return
+   */
+  Set<String> getTableTypeNames();
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
new file mode 100644
index 000000000000..65f9b298bf4f
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.session;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.cli.*;
+
+public interface HiveSession extends HiveSessionBase {
+
+  void open(Map<String, String> sessionConfMap) throws Exception;
+
+  IMetaStoreClient getMetaStoreClient() throws HiveSQLException;
+
+  /**
+   * getInfo operation handler
+   * @param getInfoType
+   * @return
+   * @throws HiveSQLException
+   */
+  GetInfoValue getInfo(GetInfoType getInfoType) throws HiveSQLException;
+
+  /**
+   * execute operation handler
+   * @param statement
+   * @param confOverlay
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle executeStatement(String statement,
+      Map<String, String> confOverlay) throws HiveSQLException;
+
+  /**
+   * execute operation handler
+   * @param statement
+   * @param confOverlay
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle executeStatementAsync(String statement,
+      Map<String, String> confOverlay) throws HiveSQLException;
+
+  /**
+   * getTypeInfo operation handler
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle getTypeInfo() throws HiveSQLException;
+
+  /**
+   * getCatalogs operation handler
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle getCatalogs() throws HiveSQLException;
+
+  /**
+   * getSchemas operation handler
+   * @param catalogName
+   * @param schemaName
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle getSchemas(String catalogName, String schemaName)
+      throws HiveSQLException;
+
+  /**
+   * getTables operation handler
+   * @param catalogName
+   * @param schemaName
+   * @param tableName
+   * @param tableTypes
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle getTables(String catalogName, String schemaName,
+      String tableName, List<String> tableTypes) throws HiveSQLException;
+
+  /**
+   * getTableTypes operation handler
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle getTableTypes() throws HiveSQLException ;
+
+  /**
+   * getColumns operation handler
+   * @param catalogName
+   * @param schemaName
+   * @param tableName
+   * @param columnName
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle getColumns(String catalogName, String schemaName,
+      String tableName, String columnName)  throws HiveSQLException;
+
+  /**
+   * getFunctions operation handler
+   * @param catalogName
+   * @param schemaName
+   * @param functionName
+   * @return
+   * @throws HiveSQLException
+   */
+  OperationHandle getFunctions(String catalogName, String schemaName,
+      String functionName) throws HiveSQLException;
+
+  /**
+   * close the session
+   * @throws HiveSQLException
+   */
+  void close() throws HiveSQLException;
+
+  void cancelOperation(OperationHandle opHandle) throws HiveSQLException;
+
+  void closeOperation(OperationHandle opHandle) throws HiveSQLException;
+
+  TableSchema getResultSetMetadata(OperationHandle opHandle)
+      throws HiveSQLException;
+
+  RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+      long maxRows, FetchType fetchType) throws HiveSQLException;
+
+  String getDelegationToken(HiveAuthFactory authFactory, String owner,
+      String renewer) throws HiveSQLException;
+
+  void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr)
+      throws HiveSQLException;
+
+  void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr)
+      throws HiveSQLException;
+
+  void closeExpiredOperations();
+
+  long getNoOperationTime();
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
new file mode 100644
index 000000000000..b72c18b2b213
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.session;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hive.service.cli.SessionHandle;
+import org.apache.hive.service.cli.operation.OperationManager;
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+
+import java.io.File;
+
+/**
+ * Methods that don't need to be executed under a doAs
+ * context are here. Rest of them in HiveSession interface
+ */
+public interface HiveSessionBase {
+
+  TProtocolVersion getProtocolVersion();
+
+  /**
+   * Set the session manager for the session
+   * @param sessionManager
+   */
+  void setSessionManager(SessionManager sessionManager);
+
+  /**
+   * Get the session manager for the session
+   */
+  SessionManager getSessionManager();
+
+  /**
+   * Set operation manager for the session
+   * @param operationManager
+   */
+  void setOperationManager(OperationManager operationManager);
+
+  /**
+   * Check whether operation logging is enabled and session dir is created successfully
+   */
+  boolean isOperationLogEnabled();
+
+  /**
+   * Get the session dir, which is the parent dir of operation logs
+   * @return a file representing the parent directory of operation logs
+   */
+  File getOperationLogSessionDir();
+
+  /**
+   * Set the session dir, which is the parent dir of operation logs
+   * @param operationLogRootDir the parent dir of the session dir
+   */
+  void setOperationLogSessionDir(File operationLogRootDir);
+
+  SessionHandle getSessionHandle();
+
+  String getUsername();
+
+  String getPassword();
+
+  HiveConf getHiveConf();
+
+  SessionState getSessionState();
+
+  String getUserName();
+
+  void setUserName(String userName);
+
+  String getIpAddress();
+
+  void setIpAddress(String ipAddress);
+
+  long getLastAccessTime();
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
new file mode 100644
index 000000000000..745f385e87f7
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
@@ -0,0 +1,834 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.session;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.cli.HiveFileProcessor;
+import org.apache.hadoop.hive.common.cli.IHiveFileProcessor;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.ql.exec.FetchFormatter;
+import org.apache.hadoop.hive.ql.exec.ListSinkOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.history.HiveHistory;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hive.common.util.HiveVersionInfo;
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.cli.FetchOrientation;
+import org.apache.hive.service.cli.FetchType;
+import org.apache.hive.service.cli.GetInfoType;
+import org.apache.hive.service.cli.GetInfoValue;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.OperationHandle;
+import org.apache.hive.service.cli.RowSet;
+import org.apache.hive.service.cli.SessionHandle;
+import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.operation.ExecuteStatementOperation;
+import org.apache.hive.service.cli.operation.GetCatalogsOperation;
+import org.apache.hive.service.cli.operation.GetColumnsOperation;
+import org.apache.hive.service.cli.operation.GetFunctionsOperation;
+import org.apache.hive.service.cli.operation.GetSchemasOperation;
+import org.apache.hive.service.cli.operation.GetTableTypesOperation;
+import org.apache.hive.service.cli.operation.GetTypeInfoOperation;
+import org.apache.hive.service.cli.operation.MetadataOperation;
+import org.apache.hive.service.cli.operation.Operation;
+import org.apache.hive.service.cli.operation.OperationManager;
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+import org.apache.hive.service.server.ThreadWithGarbageCleanup;
+
+import static org.apache.hadoop.hive.conf.SystemVariables.ENV_PREFIX;
+import static org.apache.hadoop.hive.conf.SystemVariables.HIVECONF_PREFIX;
+import static org.apache.hadoop.hive.conf.SystemVariables.HIVEVAR_PREFIX;
+import static org.apache.hadoop.hive.conf.SystemVariables.METACONF_PREFIX;
+import static org.apache.hadoop.hive.conf.SystemVariables.SYSTEM_PREFIX;
+
+/**
+ * HiveSession
+ *
+ */
+public class HiveSessionImpl implements HiveSession {
+  private final SessionHandle sessionHandle;
+  private String username;
+  private final String password;
+  private HiveConf hiveConf;
+  private SessionState sessionState;
+  private String ipAddress;
+  private static final String FETCH_WORK_SERDE_CLASS =
+      "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe";
+  private static final Log LOG = LogFactory.getLog(HiveSessionImpl.class);
+  private SessionManager sessionManager;
+  private OperationManager operationManager;
+  private final Set<OperationHandle> opHandleSet = new HashSet<OperationHandle>();
+  private boolean isOperationLogEnabled;
+  private File sessionLogDir;
+  private volatile long lastAccessTime;
+  private volatile long lastIdleTime;
+
+  public HiveSessionImpl(TProtocolVersion protocol, String username, String password,
+      HiveConf serverhiveConf, String ipAddress) {
+    this.username = username;
+    this.password = password;
+    this.sessionHandle = new SessionHandle(protocol);
+    this.hiveConf = new HiveConf(serverhiveConf);
+    this.ipAddress = ipAddress;
+
+    try {
+      // In non-impersonation mode, map scheduler queue to current user
+      // if fair scheduler is configured.
+      if (! hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS) &&
+        hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_MAP_FAIR_SCHEDULER_QUEUE)) {
+        ShimLoader.getHadoopShims().refreshDefaultQueue(hiveConf, username);
+      }
+    } catch (IOException e) {
+      LOG.warn("Error setting scheduler queue: " + e, e);
+    }
+    // Set an explicit session name to control the download directory name
+    hiveConf.set(ConfVars.HIVESESSIONID.varname,
+        sessionHandle.getHandleIdentifier().toString());
+    // Use thrift transportable formatter
+    hiveConf.set(ListSinkOperator.OUTPUT_FORMATTER,
+        FetchFormatter.ThriftFormatter.class.getName());
+    hiveConf.setInt(ListSinkOperator.OUTPUT_PROTOCOL, protocol.getValue());
+  }
+
+  @Override
+  /**
+   * Opens a new HiveServer2 session for the client connection.
+   * Creates a new SessionState object that will be associated with this HiveServer2 session.
+   * When the server executes multiple queries in the same session,
+   * this SessionState object is reused across multiple queries.
+   * Note that if doAs is true, this call goes through a proxy object,
+   * which wraps the method logic in a UserGroupInformation#doAs.
+   * That's why it is important to create SessionState here rather than in the constructor.
+   */
+  public void open(Map<String, String> sessionConfMap) throws HiveSQLException {
+    sessionState = new SessionState(hiveConf, username);
+    sessionState.setUserIpAddress(ipAddress);
+    sessionState.setIsHiveServerQuery(true);
+    SessionState.start(sessionState);
+    try {
+      sessionState.reloadAuxJars();
+    } catch (IOException e) {
+      String msg = "Failed to load reloadable jar file path: " + e;
+      LOG.error(msg, e);
+      throw new HiveSQLException(msg, e);
+    }
+    // Process global init file: .hiverc
+    processGlobalInitFile();
+    if (sessionConfMap != null) {
+      configureSession(sessionConfMap);
+    }
+    lastAccessTime = System.currentTimeMillis();
+    lastIdleTime = lastAccessTime;
+  }
+
+  /**
+   * It is used for processing hiverc file from HiveServer2 side.
+   */
+  private class GlobalHivercFileProcessor extends HiveFileProcessor {
+    @Override
+    protected BufferedReader loadFile(String fileName) throws IOException {
+      FileInputStream initStream = null;
+      BufferedReader bufferedReader = null;
+      initStream = new FileInputStream(fileName);
+      bufferedReader = new BufferedReader(new InputStreamReader(initStream));
+      return bufferedReader;
+    }
+
+    @Override
+    protected int processCmd(String cmd) {
+      int rc = 0;
+      String cmd_trimed = cmd.trim();
+      try {
+        executeStatementInternal(cmd_trimed, null, false);
+      } catch (HiveSQLException e) {
+        rc = -1;
+        LOG.warn("Failed to execute HQL command in global .hiverc file.", e);
+      }
+      return rc;
+    }
+  }
+
+  private void processGlobalInitFile() {
+    IHiveFileProcessor processor = new GlobalHivercFileProcessor();
+
+    try {
+      String hiverc = hiveConf.getVar(ConfVars.HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION);
+      if (hiverc != null) {
+        File hivercFile = new File(hiverc);
+        if (hivercFile.isDirectory()) {
+          hivercFile = new File(hivercFile, SessionManager.HIVERCFILE);
+        }
+        if (hivercFile.isFile()) {
+          LOG.info("Running global init file: " + hivercFile);
+          int rc = processor.processFile(hivercFile.getAbsolutePath());
+          if (rc != 0) {
+            LOG.error("Failed on initializing global .hiverc file");
+          }
+        } else {
+          LOG.debug("Global init file " + hivercFile + " does not exist");
+        }
+      }
+    } catch (IOException e) {
+      LOG.warn("Failed on initializing global .hiverc file", e);
+    }
+  }
+
+  private void configureSession(Map<String, String> sessionConfMap) throws HiveSQLException {
+    SessionState.setCurrentSessionState(sessionState);
+    for (Map.Entry<String, String> entry : sessionConfMap.entrySet()) {
+      String key = entry.getKey();
+      if (key.startsWith("set:")) {
+        try {
+          setVariable(key.substring(4), entry.getValue());
+        } catch (Exception e) {
+          throw new HiveSQLException(e);
+        }
+      } else if (key.startsWith("use:")) {
+        SessionState.get().setCurrentDatabase(entry.getValue());
+      } else {
+        hiveConf.verifyAndSet(key, entry.getValue());
+      }
+    }
+  }
+
+  // Copy from org.apache.hadoop.hive.ql.processors.SetProcessor, only change:
+  // setConf(varname, propName, varvalue, true) when varname.startsWith(HIVECONF_PREFIX)
+  public static int setVariable(String varname, String varvalue) throws Exception {
+    SessionState ss = SessionState.get();
+    if (varvalue.contains("\n")){
+      ss.err.println("Warning: Value had a \\n character in it.");
+    }
+    varname = varname.trim();
+    if (varname.startsWith(ENV_PREFIX)){
+      ss.err.println("env:* variables can not be set.");
+      return 1;
+    } else if (varname.startsWith(SYSTEM_PREFIX)){
+      String propName = varname.substring(SYSTEM_PREFIX.length());
+      System.getProperties().setProperty(propName,
+              new VariableSubstitution().substitute(ss.getConf(),varvalue));
+    } else if (varname.startsWith(HIVECONF_PREFIX)){
+      String propName = varname.substring(HIVECONF_PREFIX.length());
+      setConf(varname, propName, varvalue, true);
+    } else if (varname.startsWith(HIVEVAR_PREFIX)) {
+      String propName = varname.substring(HIVEVAR_PREFIX.length());
+      ss.getHiveVariables().put(propName,
+              new VariableSubstitution().substitute(ss.getConf(),varvalue));
+    } else if (varname.startsWith(METACONF_PREFIX)) {
+      String propName = varname.substring(METACONF_PREFIX.length());
+      Hive hive = Hive.get(ss.getConf());
+      hive.setMetaConf(propName, new VariableSubstitution().substitute(ss.getConf(), varvalue));
+    } else {
+      setConf(varname, varname, varvalue, true);
+    }
+    return 0;
+  }
+
+  // returns non-null string for validation fail
+  private static void setConf(String varname, String key, String varvalue, boolean register)
+          throws IllegalArgumentException {
+    HiveConf conf = SessionState.get().getConf();
+    String value = new VariableSubstitution().substitute(conf, varvalue);
+    if (conf.getBoolVar(HiveConf.ConfVars.HIVECONFVALIDATION)) {
+      HiveConf.ConfVars confVars = HiveConf.getConfVars(key);
+      if (confVars != null) {
+        if (!confVars.isType(value)) {
+          StringBuilder message = new StringBuilder();
+          message.append("'SET ").append(varname).append('=').append(varvalue);
+          message.append("' FAILED because ").append(key).append(" expects ");
+          message.append(confVars.typeString()).append(" type value.");
+          throw new IllegalArgumentException(message.toString());
+        }
+        String fail = confVars.validate(value);
+        if (fail != null) {
+          StringBuilder message = new StringBuilder();
+          message.append("'SET ").append(varname).append('=').append(varvalue);
+          message.append("' FAILED in validation : ").append(fail).append('.');
+          throw new IllegalArgumentException(message.toString());
+        }
+      } else if (key.startsWith("hive.")) {
+        throw new IllegalArgumentException("hive configuration " + key + " does not exists.");
+      }
+    }
+    conf.verifyAndSet(key, value);
+    if (register) {
+      SessionState.get().getOverriddenConfigurations().put(key, value);
+    }
+  }
+
+  @Override
+  public void setOperationLogSessionDir(File operationLogRootDir) {
+    if (!operationLogRootDir.exists()) {
+      LOG.warn("The operation log root directory is removed, recreating: " +
+          operationLogRootDir.getAbsolutePath());
+      if (!operationLogRootDir.mkdirs()) {
+        LOG.warn("Unable to create operation log root directory: " +
+            operationLogRootDir.getAbsolutePath());
+      }
+    }
+    if (!operationLogRootDir.canWrite()) {
+      LOG.warn("The operation log root directory is not writable: " +
+          operationLogRootDir.getAbsolutePath());
+    }
+    sessionLogDir = new File(operationLogRootDir, sessionHandle.getHandleIdentifier().toString());
+    isOperationLogEnabled = true;
+    if (!sessionLogDir.exists()) {
+      if (!sessionLogDir.mkdir()) {
+        LOG.warn("Unable to create operation log session directory: " +
+            sessionLogDir.getAbsolutePath());
+        isOperationLogEnabled = false;
+      }
+    }
+    if (isOperationLogEnabled) {
+      LOG.info("Operation log session directory is created: " + sessionLogDir.getAbsolutePath());
+    }
+  }
+
+  @Override
+  public boolean isOperationLogEnabled() {
+    return isOperationLogEnabled;
+  }
+
+  @Override
+  public File getOperationLogSessionDir() {
+    return sessionLogDir;
+  }
+
+  @Override
+  public TProtocolVersion getProtocolVersion() {
+    return sessionHandle.getProtocolVersion();
+  }
+
+  @Override
+  public SessionManager getSessionManager() {
+    return sessionManager;
+  }
+
+  @Override
+  public void setSessionManager(SessionManager sessionManager) {
+    this.sessionManager = sessionManager;
+  }
+
+  private OperationManager getOperationManager() {
+    return operationManager;
+  }
+
+  @Override
+  public void setOperationManager(OperationManager operationManager) {
+    this.operationManager = operationManager;
+  }
+
+  protected synchronized void acquire(boolean userAccess) {
+    // Need to make sure that the this HiveServer2's session's SessionState is
+    // stored in the thread local for the handler thread.
+    SessionState.setCurrentSessionState(sessionState);
+    if (userAccess) {
+      lastAccessTime = System.currentTimeMillis();
+    }
+  }
+
+  /**
+   * 1. We'll remove the ThreadLocal SessionState as this thread might now serve
+   * other requests.
+   * 2. We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup
+   * when this thread is garbage collected later.
+   * @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize()
+   */
+  protected synchronized void release(boolean userAccess) {
+    SessionState.detachSession();
+    if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) {
+      ThreadWithGarbageCleanup currentThread =
+          (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup.currentThread();
+      currentThread.cacheThreadLocalRawStore();
+    }
+    if (userAccess) {
+      lastAccessTime = System.currentTimeMillis();
+    }
+    if (opHandleSet.isEmpty()) {
+      lastIdleTime = System.currentTimeMillis();
+    } else {
+      lastIdleTime = 0;
+    }
+  }
+
+  @Override
+  public SessionHandle getSessionHandle() {
+    return sessionHandle;
+  }
+
+  @Override
+  public String getUsername() {
+    return username;
+  }
+
+  @Override
+  public String getPassword() {
+    return password;
+  }
+
+  @Override
+  public HiveConf getHiveConf() {
+    hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHOUTPUTSERDE, FETCH_WORK_SERDE_CLASS);
+    return hiveConf;
+  }
+
+  @Override
+  public IMetaStoreClient getMetaStoreClient() throws HiveSQLException {
+    try {
+      return Hive.get(getHiveConf()).getMSC();
+    } catch (HiveException e) {
+      throw new HiveSQLException("Failed to get metastore connection", e);
+    } catch (MetaException e) {
+      throw new HiveSQLException("Failed to get metastore connection", e);
+    }
+  }
+
+  @Override
+  public GetInfoValue getInfo(GetInfoType getInfoType)
+      throws HiveSQLException {
+    acquire(true);
+    try {
+      switch (getInfoType) {
+      case CLI_SERVER_NAME:
+        return new GetInfoValue("Hive");
+      case CLI_DBMS_NAME:
+        return new GetInfoValue("Apache Hive");
+      case CLI_DBMS_VER:
+        return new GetInfoValue(HiveVersionInfo.getVersion());
+      case CLI_MAX_COLUMN_NAME_LEN:
+        return new GetInfoValue(128);
+      case CLI_MAX_SCHEMA_NAME_LEN:
+        return new GetInfoValue(128);
+      case CLI_MAX_TABLE_NAME_LEN:
+        return new GetInfoValue(128);
+      case CLI_TXN_CAPABLE:
+      default:
+        throw new HiveSQLException("Unrecognized GetInfoType value: " + getInfoType.toString());
+      }
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public OperationHandle executeStatement(String statement, Map<String, String> confOverlay)
+      throws HiveSQLException {
+    return executeStatementInternal(statement, confOverlay, false);
+  }
+
+  @Override
+  public OperationHandle executeStatementAsync(String statement, Map<String, String> confOverlay)
+      throws HiveSQLException {
+    return executeStatementInternal(statement, confOverlay, true);
+  }
+
+  private OperationHandle executeStatementInternal(String statement, Map<String, String> confOverlay,
+      boolean runAsync)
+          throws HiveSQLException {
+    acquire(true);
+
+    OperationManager operationManager = getOperationManager();
+    ExecuteStatementOperation operation = operationManager
+        .newExecuteStatementOperation(getSession(), statement, confOverlay, runAsync);
+    OperationHandle opHandle = operation.getHandle();
+    try {
+      operation.run();
+      opHandleSet.add(opHandle);
+      return opHandle;
+    } catch (HiveSQLException e) {
+      // Referring to SQLOperation.java, there is no chance that a HiveSQLException throws and the asyn
+      // background operation submits to thread pool successfully at the same time. So, Cleanup
+      // opHandle directly when got HiveSQLException
+      operationManager.closeOperation(opHandle);
+      throw e;
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public OperationHandle getTypeInfo()
+      throws HiveSQLException {
+    acquire(true);
+
+    OperationManager operationManager = getOperationManager();
+    GetTypeInfoOperation operation = operationManager.newGetTypeInfoOperation(getSession());
+    OperationHandle opHandle = operation.getHandle();
+    try {
+      operation.run();
+      opHandleSet.add(opHandle);
+      return opHandle;
+    } catch (HiveSQLException e) {
+      operationManager.closeOperation(opHandle);
+      throw e;
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public OperationHandle getCatalogs()
+      throws HiveSQLException {
+    acquire(true);
+
+    OperationManager operationManager = getOperationManager();
+    GetCatalogsOperation operation = operationManager.newGetCatalogsOperation(getSession());
+    OperationHandle opHandle = operation.getHandle();
+    try {
+      operation.run();
+      opHandleSet.add(opHandle);
+      return opHandle;
+    } catch (HiveSQLException e) {
+      operationManager.closeOperation(opHandle);
+      throw e;
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public OperationHandle getSchemas(String catalogName, String schemaName)
+      throws HiveSQLException {
+    acquire(true);
+
+    OperationManager operationManager = getOperationManager();
+    GetSchemasOperation operation =
+        operationManager.newGetSchemasOperation(getSession(), catalogName, schemaName);
+    OperationHandle opHandle = operation.getHandle();
+    try {
+      operation.run();
+      opHandleSet.add(opHandle);
+      return opHandle;
+    } catch (HiveSQLException e) {
+      operationManager.closeOperation(opHandle);
+      throw e;
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public OperationHandle getTables(String catalogName, String schemaName, String tableName,
+      List<String> tableTypes)
+          throws HiveSQLException {
+    acquire(true);
+
+    OperationManager operationManager = getOperationManager();
+    MetadataOperation operation =
+        operationManager.newGetTablesOperation(getSession(), catalogName, schemaName, tableName, tableTypes);
+    OperationHandle opHandle = operation.getHandle();
+    try {
+      operation.run();
+      opHandleSet.add(opHandle);
+      return opHandle;
+    } catch (HiveSQLException e) {
+      operationManager.closeOperation(opHandle);
+      throw e;
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public OperationHandle getTableTypes()
+      throws HiveSQLException {
+    acquire(true);
+
+    OperationManager operationManager = getOperationManager();
+    GetTableTypesOperation operation = operationManager.newGetTableTypesOperation(getSession());
+    OperationHandle opHandle = operation.getHandle();
+    try {
+      operation.run();
+      opHandleSet.add(opHandle);
+      return opHandle;
+    } catch (HiveSQLException e) {
+      operationManager.closeOperation(opHandle);
+      throw e;
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public OperationHandle getColumns(String catalogName, String schemaName,
+      String tableName, String columnName)  throws HiveSQLException {
+    acquire(true);
+    String addedJars = Utilities.getResourceFiles(hiveConf, SessionState.ResourceType.JAR);
+    if (StringUtils.isNotBlank(addedJars)) {
+       IMetaStoreClient metastoreClient = getSession().getMetaStoreClient();
+       metastoreClient.setHiveAddedJars(addedJars);
+    }
+    OperationManager operationManager = getOperationManager();
+    GetColumnsOperation operation = operationManager.newGetColumnsOperation(getSession(),
+        catalogName, schemaName, tableName, columnName);
+    OperationHandle opHandle = operation.getHandle();
+    try {
+      operation.run();
+      opHandleSet.add(opHandle);
+      return opHandle;
+    } catch (HiveSQLException e) {
+      operationManager.closeOperation(opHandle);
+      throw e;
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public OperationHandle getFunctions(String catalogName, String schemaName, String functionName)
+      throws HiveSQLException {
+    acquire(true);
+
+    OperationManager operationManager = getOperationManager();
+    GetFunctionsOperation operation = operationManager
+        .newGetFunctionsOperation(getSession(), catalogName, schemaName, functionName);
+    OperationHandle opHandle = operation.getHandle();
+    try {
+      operation.run();
+      opHandleSet.add(opHandle);
+      return opHandle;
+    } catch (HiveSQLException e) {
+      operationManager.closeOperation(opHandle);
+      throw e;
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public void close() throws HiveSQLException {
+    try {
+      acquire(true);
+      // Iterate through the opHandles and close their operations
+      for (OperationHandle opHandle : opHandleSet) {
+        operationManager.closeOperation(opHandle);
+      }
+      opHandleSet.clear();
+      // Cleanup session log directory.
+      cleanupSessionLogDir();
+      // Cleanup pipeout file.
+      cleanupPipeoutFile();
+      HiveHistory hiveHist = sessionState.getHiveHistory();
+      if (null != hiveHist) {
+        hiveHist.closeStream();
+      }
+      try {
+        sessionState.close();
+      } finally {
+        sessionState = null;
+      }
+    } catch (IOException ioe) {
+      throw new HiveSQLException("Failure to close", ioe);
+    } finally {
+      if (sessionState != null) {
+        try {
+          sessionState.close();
+        } catch (Throwable t) {
+          LOG.warn("Error closing session", t);
+        }
+        sessionState = null;
+      }
+      release(true);
+    }
+  }
+
+  private void cleanupPipeoutFile() {
+    String lScratchDir = hiveConf.getVar(ConfVars.LOCALSCRATCHDIR);
+    String sessionID = hiveConf.getVar(ConfVars.HIVESESSIONID);
+
+    File[] fileAry = new File(lScratchDir).listFiles(
+            (dir, name) -> name.startsWith(sessionID) && name.endsWith(".pipeout"));
+
+    for (File file : fileAry) {
+      try {
+        FileUtils.forceDelete(file);
+      } catch (Exception e) {
+        LOG.error("Failed to cleanup pipeout file: " + file, e);
+      }
+    }
+  }
+
+  private void cleanupSessionLogDir() {
+    if (isOperationLogEnabled) {
+      try {
+        FileUtils.forceDelete(sessionLogDir);
+      } catch (Exception e) {
+        LOG.error("Failed to cleanup session log dir: " + sessionHandle, e);
+      }
+    }
+  }
+
+  @Override
+  public SessionState getSessionState() {
+    return sessionState;
+  }
+
+  @Override
+  public String getUserName() {
+    return username;
+  }
+
+  @Override
+  public void setUserName(String userName) {
+    this.username = userName;
+  }
+
+  @Override
+  public long getLastAccessTime() {
+    return lastAccessTime;
+  }
+
+  @Override
+  public void closeExpiredOperations() {
+    OperationHandle[] handles = opHandleSet.toArray(new OperationHandle[opHandleSet.size()]);
+    if (handles.length > 0) {
+      List<Operation> operations = operationManager.removeExpiredOperations(handles);
+      if (!operations.isEmpty()) {
+        closeTimedOutOperations(operations);
+      }
+    }
+  }
+
+  @Override
+  public long getNoOperationTime() {
+    return lastIdleTime > 0 ? System.currentTimeMillis() - lastIdleTime : 0;
+  }
+
+  private void closeTimedOutOperations(List<Operation> operations) {
+    acquire(false);
+    try {
+      for (Operation operation : operations) {
+        opHandleSet.remove(operation.getHandle());
+        try {
+          operation.close();
+        } catch (Exception e) {
+          LOG.warn("Exception is thrown closing timed-out operation " + operation.getHandle(), e);
+        }
+      }
+    } finally {
+      release(false);
+    }
+  }
+
+  @Override
+  public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
+    acquire(true);
+    try {
+      sessionManager.getOperationManager().cancelOperation(opHandle);
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public void closeOperation(OperationHandle opHandle) throws HiveSQLException {
+    acquire(true);
+    try {
+      operationManager.closeOperation(opHandle);
+      opHandleSet.remove(opHandle);
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public TableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQLException {
+    acquire(true);
+    try {
+      return sessionManager.getOperationManager().getOperationResultSetSchema(opHandle);
+    } finally {
+      release(true);
+    }
+  }
+
+  @Override
+  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+      long maxRows, FetchType fetchType) throws HiveSQLException {
+    acquire(true);
+    try {
+      if (fetchType == FetchType.QUERY_OUTPUT) {
+        return operationManager.getOperationNextRowSet(opHandle, orientation, maxRows);
+      }
+      return operationManager.getOperationLogRowSet(opHandle, orientation, maxRows);
+    } finally {
+      release(true);
+    }
+  }
+
+  protected HiveSession getSession() {
+    return this;
+  }
+
+  @Override
+  public String getIpAddress() {
+    return ipAddress;
+  }
+
+  @Override
+  public void setIpAddress(String ipAddress) {
+    this.ipAddress = ipAddress;
+  }
+
+  @Override
+  public String getDelegationToken(HiveAuthFactory authFactory, String owner, String renewer)
+      throws HiveSQLException {
+    HiveAuthFactory.verifyProxyAccess(getUsername(), owner, getIpAddress(), getHiveConf());
+    return authFactory.getDelegationToken(owner, renewer);
+  }
+
+  @Override
+  public void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr)
+      throws HiveSQLException {
+    HiveAuthFactory.verifyProxyAccess(getUsername(), getUserFromToken(authFactory, tokenStr),
+        getIpAddress(), getHiveConf());
+    authFactory.cancelDelegationToken(tokenStr);
+  }
+
+  @Override
+  public void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr)
+      throws HiveSQLException {
+    HiveAuthFactory.verifyProxyAccess(getUsername(), getUserFromToken(authFactory, tokenStr),
+        getIpAddress(), getHiveConf());
+    authFactory.renewDelegationToken(tokenStr);
+  }
+
+  // extract the real user from the given token string
+  private String getUserFromToken(HiveAuthFactory authFactory, String tokenStr) throws HiveSQLException {
+    return authFactory.getUserFromToken(tokenStr);
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
new file mode 100644
index 000000000000..762dbb2faade
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.session;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.shims.Utils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+
+/**
+ *
+ * HiveSessionImplwithUGI.
+ * HiveSession with connecting user's UGI and delegation token if required
+ */
+public class HiveSessionImplwithUGI extends HiveSessionImpl {
+  public static final String HS2TOKEN = "HiveServer2ImpersonationToken";
+
+  private UserGroupInformation sessionUgi = null;
+  private String delegationTokenStr = null;
+  private Hive sessionHive = null;
+  private HiveSession proxySession = null;
+  static final Log LOG = LogFactory.getLog(HiveSessionImplwithUGI.class);
+
+  public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String password,
+      HiveConf hiveConf, String ipAddress, String delegationToken) throws HiveSQLException {
+    super(protocol, username, password, hiveConf, ipAddress);
+    setSessionUGI(username);
+    setDelegationToken(delegationToken);
+
+    // create a new metastore connection for this particular user session
+    Hive.set(null);
+    try {
+      sessionHive = Hive.get(getHiveConf());
+    } catch (HiveException e) {
+      throw new HiveSQLException("Failed to setup metastore connection", e);
+    }
+  }
+
+  // setup appropriate UGI for the session
+  public void setSessionUGI(String owner) throws HiveSQLException {
+    if (owner == null) {
+      throw new HiveSQLException("No username provided for impersonation");
+    }
+    if (UserGroupInformation.isSecurityEnabled()) {
+      try {
+        sessionUgi = UserGroupInformation.createProxyUser(
+            owner, UserGroupInformation.getLoginUser());
+      } catch (IOException e) {
+        throw new HiveSQLException("Couldn't setup proxy user", e);
+      }
+    } else {
+      sessionUgi = UserGroupInformation.createRemoteUser(owner);
+    }
+  }
+
+  public UserGroupInformation getSessionUgi() {
+    return this.sessionUgi;
+  }
+
+  public String getDelegationToken() {
+    return this.delegationTokenStr;
+  }
+
+  @Override
+  protected synchronized void acquire(boolean userAccess) {
+    super.acquire(userAccess);
+    // if we have a metastore connection with impersonation, then set it first
+    if (sessionHive != null) {
+      Hive.set(sessionHive);
+    }
+  }
+
+  /**
+   * Close the file systems for the session and remove it from the FileSystem cache.
+   * Cancel the session's delegation token and close the metastore connection
+   */
+  @Override
+  public void close() throws HiveSQLException {
+    try {
+      acquire(true);
+      cancelDelegationToken();
+    } finally {
+      try {
+        super.close();
+      } finally {
+        try {
+          FileSystem.closeAllForUGI(sessionUgi);
+        } catch (IOException ioe) {
+          throw new HiveSQLException("Could not clean up file-system handles for UGI: "
+              + sessionUgi, ioe);
+        }
+      }
+    }
+  }
+
+  /**
+   * Enable delegation token for the session
+   * save the token string and set the token.signature in hive conf. The metastore client uses
+   * this token.signature to determine where to use kerberos or delegation token
+   * @throws HiveException
+   * @throws IOException
+   */
+  private void setDelegationToken(String delegationTokenStr) throws HiveSQLException {
+    this.delegationTokenStr = delegationTokenStr;
+    if (delegationTokenStr != null) {
+      getHiveConf().set("hive.metastore.token.signature", HS2TOKEN);
+      try {
+        Utils.setTokenStr(sessionUgi, delegationTokenStr, HS2TOKEN);
+      } catch (IOException e) {
+        throw new HiveSQLException("Couldn't setup delegation token in the ugi", e);
+      }
+    }
+  }
+
+  // If the session has a delegation token obtained from the metastore, then cancel it
+  private void cancelDelegationToken() throws HiveSQLException {
+    if (delegationTokenStr != null) {
+      try {
+        Hive.get(getHiveConf()).cancelDelegationToken(delegationTokenStr);
+      } catch (HiveException e) {
+        throw new HiveSQLException("Couldn't cancel delegation token", e);
+      }
+      // close the metastore connection created with this delegation token
+      Hive.closeCurrent();
+    }
+  }
+
+  @Override
+  protected HiveSession getSession() {
+    assert proxySession != null;
+
+    return proxySession;
+  }
+
+  public void setProxySession(HiveSession proxySession) {
+    this.proxySession = proxySession;
+  }
+
+  @Override
+  public String getDelegationToken(HiveAuthFactory authFactory, String owner,
+      String renewer) throws HiveSQLException {
+    return authFactory.getDelegationToken(owner, renewer);
+  }
+
+  @Override
+  public void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr)
+      throws HiveSQLException {
+    authFactory.cancelDelegationToken(tokenStr);
+  }
+
+  @Override
+  public void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr)
+      throws HiveSQLException {
+    authFactory.renewDelegationToken(tokenStr);
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
new file mode 100644
index 000000000000..859f9c8b449e
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
@@ -0,0 +1,361 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.session;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hive.service.CompositeService;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.cli.SessionHandle;
+import org.apache.hive.service.cli.operation.OperationManager;
+import org.apache.hive.service.cli.thrift.TProtocolVersion;
+import org.apache.hive.service.server.HiveServer2;
+import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
+
+/**
+ * SessionManager.
+ *
+ */
+public class SessionManager extends CompositeService {
+
+  private static final Log LOG = LogFactory.getLog(SessionManager.class);
+  public static final String HIVERCFILE = ".hiverc";
+  private HiveConf hiveConf;
+  private final Map<SessionHandle, HiveSession> handleToSession =
+      new ConcurrentHashMap<SessionHandle, HiveSession>();
+  private final OperationManager operationManager = new OperationManager();
+  private ThreadPoolExecutor backgroundOperationPool;
+  private boolean isOperationLogEnabled;
+  private File operationLogRootDir;
+
+  private long checkInterval;
+  private long sessionTimeout;
+  private boolean checkOperation;
+
+  private volatile boolean shutdown;
+  // The HiveServer2 instance running this service
+  private final HiveServer2 hiveServer2;
+
+  public SessionManager(HiveServer2 hiveServer2) {
+    super(SessionManager.class.getSimpleName());
+    this.hiveServer2 = hiveServer2;
+  }
+
+  @Override
+  public synchronized void init(HiveConf hiveConf) {
+    this.hiveConf = hiveConf;
+    //Create operation log root directory, if operation logging is enabled
+    if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) {
+      initOperationLogRootDir();
+    }
+    createBackgroundOperationPool();
+    addService(operationManager);
+    super.init(hiveConf);
+  }
+
+  private void createBackgroundOperationPool() {
+    int poolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS);
+    LOG.info("HiveServer2: Background operation thread pool size: " + poolSize);
+    int poolQueueSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE);
+    LOG.info("HiveServer2: Background operation thread wait queue size: " + poolQueueSize);
+    long keepAliveTime = HiveConf.getTimeVar(
+        hiveConf, ConfVars.HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME, TimeUnit.SECONDS);
+    LOG.info(
+        "HiveServer2: Background operation thread keepalive time: " + keepAliveTime + " seconds");
+
+    // Create a thread pool with #poolSize threads
+    // Threads terminate when they are idle for more than the keepAliveTime
+    // A bounded blocking queue is used to queue incoming operations, if #operations > poolSize
+    String threadPoolName = "HiveServer2-Background-Pool";
+    backgroundOperationPool = new ThreadPoolExecutor(poolSize, poolSize,
+        keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(poolQueueSize),
+        new ThreadFactoryWithGarbageCleanup(threadPoolName));
+    backgroundOperationPool.allowCoreThreadTimeOut(true);
+
+    checkInterval = HiveConf.getTimeVar(
+        hiveConf, ConfVars.HIVE_SERVER2_SESSION_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
+    sessionTimeout = HiveConf.getTimeVar(
+        hiveConf, ConfVars.HIVE_SERVER2_IDLE_SESSION_TIMEOUT, TimeUnit.MILLISECONDS);
+    checkOperation = HiveConf.getBoolVar(hiveConf,
+        ConfVars.HIVE_SERVER2_IDLE_SESSION_CHECK_OPERATION);
+  }
+
+  private void initOperationLogRootDir() {
+    operationLogRootDir = new File(
+        hiveConf.getVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION));
+    isOperationLogEnabled = true;
+
+    if (operationLogRootDir.exists() && !operationLogRootDir.isDirectory()) {
+      LOG.warn("The operation log root directory exists, but it is not a directory: " +
+          operationLogRootDir.getAbsolutePath());
+      isOperationLogEnabled = false;
+    }
+
+    if (!operationLogRootDir.exists()) {
+      if (!operationLogRootDir.mkdirs()) {
+        LOG.warn("Unable to create operation log root directory: " +
+            operationLogRootDir.getAbsolutePath());
+        isOperationLogEnabled = false;
+      }
+    }
+
+    if (isOperationLogEnabled) {
+      LOG.info("Operation log root directory is created: " + operationLogRootDir.getAbsolutePath());
+      try {
+        FileUtils.forceDeleteOnExit(operationLogRootDir);
+      } catch (IOException e) {
+        LOG.warn("Failed to schedule cleanup HS2 operation logging root dir: " +
+            operationLogRootDir.getAbsolutePath(), e);
+      }
+    }
+  }
+
+  @Override
+  public synchronized void start() {
+    super.start();
+    if (checkInterval > 0) {
+      startTimeoutChecker();
+    }
+  }
+
+  private void startTimeoutChecker() {
+    final long interval = Math.max(checkInterval, 3000L);  // minimum 3 seconds
+    Runnable timeoutChecker = new Runnable() {
+      @Override
+      public void run() {
+        for (sleepInterval(interval); !shutdown; sleepInterval(interval)) {
+          long current = System.currentTimeMillis();
+          for (HiveSession session : new ArrayList<HiveSession>(handleToSession.values())) {
+            if (sessionTimeout > 0 && session.getLastAccessTime() + sessionTimeout <= current
+                && (!checkOperation || session.getNoOperationTime() > sessionTimeout)) {
+              SessionHandle handle = session.getSessionHandle();
+              LOG.warn("Session " + handle + " is Timed-out (last access : " +
+                  new Date(session.getLastAccessTime()) + ") and will be closed");
+              try {
+                closeSession(handle);
+              } catch (HiveSQLException e) {
+                LOG.warn("Exception is thrown closing session " + handle, e);
+              }
+            } else {
+              session.closeExpiredOperations();
+            }
+          }
+        }
+      }
+
+      private void sleepInterval(long interval) {
+        try {
+          Thread.sleep(interval);
+        } catch (InterruptedException e) {
+          // ignore
+        }
+      }
+    };
+    backgroundOperationPool.execute(timeoutChecker);
+  }
+
+  @Override
+  public synchronized void stop() {
+    super.stop();
+    shutdown = true;
+    if (backgroundOperationPool != null) {
+      backgroundOperationPool.shutdown();
+      long timeout = hiveConf.getTimeVar(
+          ConfVars.HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT, TimeUnit.SECONDS);
+      try {
+        backgroundOperationPool.awaitTermination(timeout, TimeUnit.SECONDS);
+      } catch (InterruptedException e) {
+        LOG.warn("HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT = " + timeout +
+            " seconds has been exceeded. RUNNING background operations will be shut down", e);
+      }
+      backgroundOperationPool = null;
+    }
+    cleanupLoggingRootDir();
+  }
+
+  private void cleanupLoggingRootDir() {
+    if (isOperationLogEnabled) {
+      try {
+        FileUtils.forceDelete(operationLogRootDir);
+      } catch (Exception e) {
+        LOG.warn("Failed to cleanup root dir of HS2 logging: " + operationLogRootDir
+            .getAbsolutePath(), e);
+      }
+    }
+  }
+
+  public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress,
+      Map<String, String> sessionConf) throws HiveSQLException {
+    return openSession(protocol, username, password, ipAddress, sessionConf, false, null);
+  }
+
+  /**
+   * Opens a new session and creates a session handle.
+   * The username passed to this method is the effective username.
+   * If withImpersonation is true (==doAs true) we wrap all the calls in HiveSession
+   * within a UGI.doAs, where UGI corresponds to the effective user.
+   *
+   * Please see {@code org.apache.hive.service.cli.thrift.ThriftCLIService.getUserName()} for
+   * more details.
+   *
+   * @param protocol
+   * @param username
+   * @param password
+   * @param ipAddress
+   * @param sessionConf
+   * @param withImpersonation
+   * @param delegationToken
+   * @return
+   * @throws HiveSQLException
+   */
+  public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress,
+      Map<String, String> sessionConf, boolean withImpersonation, String delegationToken)
+          throws HiveSQLException {
+    HiveSession session;
+    // If doAs is set to true for HiveServer2, we will create a proxy object for the session impl.
+    // Within the proxy object, we wrap the method call in a UserGroupInformation#doAs
+    if (withImpersonation) {
+      HiveSessionImplwithUGI sessionWithUGI = new HiveSessionImplwithUGI(protocol, username, password,
+          hiveConf, ipAddress, delegationToken);
+      session = HiveSessionProxy.getProxy(sessionWithUGI, sessionWithUGI.getSessionUgi());
+      sessionWithUGI.setProxySession(session);
+    } else {
+      session = new HiveSessionImpl(protocol, username, password, hiveConf, ipAddress);
+    }
+    session.setSessionManager(this);
+    session.setOperationManager(operationManager);
+    try {
+      session.open(sessionConf);
+    } catch (Exception e) {
+      try {
+        session.close();
+      } catch (Throwable t) {
+        LOG.warn("Error closing session", t);
+      }
+      session = null;
+      throw new HiveSQLException("Failed to open new session: " + e, e);
+    }
+    if (isOperationLogEnabled) {
+      session.setOperationLogSessionDir(operationLogRootDir);
+    }
+    handleToSession.put(session.getSessionHandle(), session);
+    return session.getSessionHandle();
+  }
+
+  public void closeSession(SessionHandle sessionHandle) throws HiveSQLException {
+    HiveSession session = handleToSession.remove(sessionHandle);
+    if (session == null) {
+      throw new HiveSQLException("Session does not exist!");
+    }
+    session.close();
+  }
+
+  public HiveSession getSession(SessionHandle sessionHandle) throws HiveSQLException {
+    HiveSession session = handleToSession.get(sessionHandle);
+    if (session == null) {
+      throw new HiveSQLException("Invalid SessionHandle: " + sessionHandle);
+    }
+    return session;
+  }
+
+  public OperationManager getOperationManager() {
+    return operationManager;
+  }
+
+  private static ThreadLocal<String> threadLocalIpAddress = new ThreadLocal<String>() {
+    @Override
+    protected synchronized String initialValue() {
+      return null;
+    }
+  };
+
+  public static void setIpAddress(String ipAddress) {
+    threadLocalIpAddress.set(ipAddress);
+  }
+
+  public static void clearIpAddress() {
+    threadLocalIpAddress.remove();
+  }
+
+  public static String getIpAddress() {
+    return threadLocalIpAddress.get();
+  }
+
+  private static ThreadLocal<String> threadLocalUserName = new ThreadLocal<String>(){
+    @Override
+    protected synchronized String initialValue() {
+      return null;
+    }
+  };
+
+  public static void setUserName(String userName) {
+    threadLocalUserName.set(userName);
+  }
+
+  public static void clearUserName() {
+    threadLocalUserName.remove();
+  }
+
+  public static String getUserName() {
+    return threadLocalUserName.get();
+  }
+
+  private static ThreadLocal<String> threadLocalProxyUserName = new ThreadLocal<String>(){
+    @Override
+    protected synchronized String initialValue() {
+      return null;
+    }
+  };
+
+  public static void setProxyUserName(String userName) {
+    LOG.debug("setting proxy user name based on query param to: " + userName);
+    threadLocalProxyUserName.set(userName);
+  }
+
+  public static String getProxyUserName() {
+    return threadLocalProxyUserName.get();
+  }
+
+  public static void clearProxyUserName() {
+    threadLocalProxyUserName.remove();
+  }
+
+  public Future<?> submitBackgroundOperation(Runnable r) {
+    return backgroundOperationPool.submit(r);
+  }
+
+  public int getOpenSessionCount() {
+    return handleToSession.size();
+  }
+}
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
new file mode 100644
index 000000000000..21b8bf7de75c
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.thrift;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.cli.CLIService;
+import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
+import org.apache.thrift.TProcessorFactory;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.apache.thrift.server.TThreadPoolServer;
+import org.apache.thrift.transport.TServerSocket;
+import org.apache.thrift.transport.TTransportFactory;
+
+
+public class ThriftBinaryCLIService extends ThriftCLIService {
+
+  public ThriftBinaryCLIService(CLIService cliService) {
+    super(cliService, ThriftBinaryCLIService.class.getSimpleName());
+  }
+
+  @Override
+  public void run() {
+    try {
+      // Server thread pool
+      String threadPoolName = "HiveServer2-Handler-Pool";
+      ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads,
+          workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
+          new ThreadFactoryWithGarbageCleanup(threadPoolName));
+
+      // Thrift configs
+      hiveAuthFactory = new HiveAuthFactory(hiveConf);
+      TTransportFactory transportFactory = hiveAuthFactory.getAuthTransFactory();
+      TProcessorFactory processorFactory = hiveAuthFactory.getAuthProcFactory(this);
+      TServerSocket serverSocket = null;
+      List<String> sslVersionBlacklist = new ArrayList<String>();
+      for (String sslVersion : hiveConf.getVar(ConfVars.HIVE_SSL_PROTOCOL_BLACKLIST).split(",")) {
+        sslVersionBlacklist.add(sslVersion);
+      }
+      if (!hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL)) {
+        serverSocket = HiveAuthFactory.getServerSocket(hiveHost, portNum);
+      } else {
+        String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH).trim();
+        if (keyStorePath.isEmpty()) {
+          throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname
+              + " Not configured for SSL connection");
+        }
+        String keyStorePassword = ShimLoader.getHadoopShims().getPassword(hiveConf,
+            HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname);
+        serverSocket = HiveAuthFactory.getServerSSLSocket(hiveHost, portNum, keyStorePath,
+            keyStorePassword, sslVersionBlacklist);
+      }
+
+      // Server args
+      int maxMessageSize = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE);
+      int requestTimeout = (int) hiveConf.getTimeVar(
+          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT, TimeUnit.SECONDS);
+      int beBackoffSlotLength = (int) hiveConf.getTimeVar(
+          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH, TimeUnit.MILLISECONDS);
+      TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket)
+          .processorFactory(processorFactory).transportFactory(transportFactory)
+          .protocolFactory(new TBinaryProtocol.Factory())
+          .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize))
+          .requestTimeout(requestTimeout).requestTimeoutUnit(TimeUnit.SECONDS)
+          .beBackoffSlotLength(beBackoffSlotLength).beBackoffSlotLengthUnit(TimeUnit.MILLISECONDS)
+          .executorService(executorService);
+
+      // TCP Server
+      server = new TThreadPoolServer(sargs);
+      server.setServerEventHandler(serverEventHandler);
+      String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port "
+          + serverSocket.getServerSocket().getLocalPort() + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads";
+      LOG.info(msg);
+      server.serve();
+    } catch (Throwable t) {
+      LOG.fatal(
+          "Error starting HiveServer2: could not start "
+              + ThriftBinaryCLIService.class.getSimpleName(), t);
+      System.exit(-1);
+    }
+  }
+
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
new file mode 100644
index 000000000000..ad7a9a238f8a
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -0,0 +1,689 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.thrift;
+
+import javax.security.auth.login.LoginException;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hive.service.AbstractService;
+import org.apache.hive.service.ServiceException;
+import org.apache.hive.service.ServiceUtils;
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.auth.TSetIpAddressProcessor;
+import org.apache.hive.service.cli.*;
+import org.apache.hive.service.cli.session.SessionManager;
+import org.apache.hive.service.server.HiveServer2;
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.server.ServerContext;
+import org.apache.thrift.server.TServer;
+import org.apache.thrift.server.TServerEventHandler;
+import org.apache.thrift.transport.TTransport;
+
+/**
+ * ThriftCLIService.
+ *
+ */
+public abstract class ThriftCLIService extends AbstractService implements TCLIService.Iface, Runnable {
+
+  public static final Log LOG = LogFactory.getLog(ThriftCLIService.class.getName());
+
+  protected CLIService cliService;
+  private static final TStatus OK_STATUS = new TStatus(TStatusCode.SUCCESS_STATUS);
+  protected static HiveAuthFactory hiveAuthFactory;
+
+  protected int portNum;
+  protected InetAddress serverIPAddress;
+  protected String hiveHost;
+  protected TServer server;
+  protected org.eclipse.jetty.server.Server httpServer;
+
+  private boolean isStarted = false;
+  protected boolean isEmbedded = false;
+
+  protected HiveConf hiveConf;
+
+  protected int minWorkerThreads;
+  protected int maxWorkerThreads;
+  protected long workerKeepAliveTime;
+
+  protected TServerEventHandler serverEventHandler;
+  protected ThreadLocal<ServerContext> currentServerContext;
+
+  static class ThriftCLIServerContext implements ServerContext {
+    private SessionHandle sessionHandle = null;
+
+    public void setSessionHandle(SessionHandle sessionHandle) {
+      this.sessionHandle = sessionHandle;
+    }
+
+    public SessionHandle getSessionHandle() {
+      return sessionHandle;
+    }
+  }
+
+  public ThriftCLIService(CLIService service, String serviceName) {
+    super(serviceName);
+    this.cliService = service;
+    currentServerContext = new ThreadLocal<ServerContext>();
+    serverEventHandler = new TServerEventHandler() {
+      @Override
+      public ServerContext createContext(
+          TProtocol input, TProtocol output) {
+        return new ThriftCLIServerContext();
+      }
+
+      @Override
+      public void deleteContext(ServerContext serverContext,
+          TProtocol input, TProtocol output) {
+        ThriftCLIServerContext context = (ThriftCLIServerContext)serverContext;
+        SessionHandle sessionHandle = context.getSessionHandle();
+        if (sessionHandle != null) {
+          LOG.info("Session disconnected without closing properly, close it now");
+          try {
+            cliService.closeSession(sessionHandle);
+          } catch (HiveSQLException e) {
+            LOG.warn("Failed to close session: " + e, e);
+          }
+        }
+      }
+
+      @Override
+      public void preServe() {
+      }
+
+      @Override
+      public void processContext(ServerContext serverContext,
+          TTransport input, TTransport output) {
+        currentServerContext.set(serverContext);
+      }
+    };
+  }
+
+  @Override
+  public synchronized void init(HiveConf hiveConf) {
+    this.hiveConf = hiveConf;
+    // Initialize common server configs needed in both binary & http modes
+    String portString;
+    hiveHost = System.getenv("HIVE_SERVER2_THRIFT_BIND_HOST");
+    if (hiveHost == null) {
+      hiveHost = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST);
+    }
+    try {
+      if (hiveHost != null && !hiveHost.isEmpty()) {
+        serverIPAddress = InetAddress.getByName(hiveHost);
+      } else {
+        serverIPAddress = InetAddress.getLocalHost();
+      }
+    } catch (UnknownHostException e) {
+      throw new ServiceException(e);
+    }
+    // HTTP mode
+    if (HiveServer2.isHTTPTransportMode(hiveConf)) {
+      workerKeepAliveTime =
+          hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_WORKER_KEEPALIVE_TIME,
+              TimeUnit.SECONDS);
+      portString = System.getenv("HIVE_SERVER2_THRIFT_HTTP_PORT");
+      if (portString != null) {
+        portNum = Integer.valueOf(portString);
+      } else {
+        portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT);
+      }
+    }
+    // Binary mode
+    else {
+      workerKeepAliveTime =
+          hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_WORKER_KEEPALIVE_TIME, TimeUnit.SECONDS);
+      portString = System.getenv("HIVE_SERVER2_THRIFT_PORT");
+      if (portString != null) {
+        portNum = Integer.valueOf(portString);
+      } else {
+        portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_PORT);
+      }
+    }
+    minWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MIN_WORKER_THREADS);
+    maxWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MAX_WORKER_THREADS);
+    super.init(hiveConf);
+  }
+
+  @Override
+  public synchronized void start() {
+    super.start();
+    if (!isStarted && !isEmbedded) {
+      new Thread(this).start();
+      isStarted = true;
+    }
+  }
+
+  @Override
+  public synchronized void stop() {
+    if (isStarted && !isEmbedded) {
+      if(server != null) {
+        server.stop();
+        LOG.info("Thrift server has stopped");
+      }
+      if((httpServer != null) && httpServer.isStarted()) {
+        try {
+          httpServer.stop();
+          LOG.info("Http server has stopped");
+        } catch (Exception e) {
+          LOG.error("Error stopping Http server: ", e);
+        }
+      }
+      isStarted = false;
+    }
+    super.stop();
+  }
+
+  public int getPortNumber() {
+    return portNum;
+  }
+
+  public InetAddress getServerIPAddress() {
+    return serverIPAddress;
+  }
+
+  @Override
+  public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req)
+      throws TException {
+    TGetDelegationTokenResp resp = new TGetDelegationTokenResp();
+    resp.setStatus(notSupportTokenErrorStatus());
+    return resp;
+  }
+
+  @Override
+  public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req)
+      throws TException {
+    TCancelDelegationTokenResp resp = new TCancelDelegationTokenResp();
+    resp.setStatus(notSupportTokenErrorStatus());
+    return resp;
+  }
+
+  @Override
+  public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req)
+      throws TException {
+    TRenewDelegationTokenResp resp = new TRenewDelegationTokenResp();
+    resp.setStatus(notSupportTokenErrorStatus());
+    return resp;
+  }
+
+  private TStatus notSupportTokenErrorStatus() {
+    TStatus errorStatus = new TStatus(TStatusCode.ERROR_STATUS);
+    errorStatus.setErrorMessage("Delegation token is not supported");
+    return errorStatus;
+  }
+
+  @Override
+  public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException {
+    LOG.info("Client protocol version: " + req.getClient_protocol());
+    TOpenSessionResp resp = new TOpenSessionResp();
+    try {
+      SessionHandle sessionHandle = getSessionHandle(req, resp);
+      resp.setSessionHandle(sessionHandle.toTSessionHandle());
+      // TODO: set real configuration map
+      resp.setConfiguration(new HashMap<String, String>());
+      resp.setStatus(OK_STATUS);
+      ThriftCLIServerContext context =
+        (ThriftCLIServerContext)currentServerContext.get();
+      if (context != null) {
+        context.setSessionHandle(sessionHandle);
+      }
+    } catch (Exception e) {
+      LOG.warn("Error opening session: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  private String getIpAddress() {
+    String clientIpAddress;
+    // Http transport mode.
+    // We set the thread local ip address, in ThriftHttpServlet.
+    if (cliService.getHiveConf().getVar(
+        ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) {
+      clientIpAddress = SessionManager.getIpAddress();
+    }
+    else {
+      // Kerberos
+      if (isKerberosAuthMode()) {
+        clientIpAddress = hiveAuthFactory.getIpAddress();
+      }
+      // Except kerberos, NOSASL
+      else {
+        clientIpAddress = TSetIpAddressProcessor.getUserIpAddress();
+      }
+    }
+    LOG.debug("Client's IP Address: " + clientIpAddress);
+    return clientIpAddress;
+  }
+
+  /**
+   * Returns the effective username.
+   * 1. If hive.server2.allow.user.substitution = false: the username of the connecting user
+   * 2. If hive.server2.allow.user.substitution = true: the username of the end user,
+   * that the connecting user is trying to proxy for.
+   * This includes a check whether the connecting user is allowed to proxy for the end user.
+   * @param req
+   * @return
+   * @throws HiveSQLException
+   */
+  private String getUserName(TOpenSessionReq req) throws HiveSQLException {
+    String userName = null;
+    // Kerberos
+    if (isKerberosAuthMode()) {
+      userName = hiveAuthFactory.getRemoteUser();
+    }
+    // Except kerberos, NOSASL
+    if (userName == null) {
+      userName = TSetIpAddressProcessor.getUserName();
+    }
+    // Http transport mode.
+    // We set the thread local username, in ThriftHttpServlet.
+    if (cliService.getHiveConf().getVar(
+        ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) {
+      userName = SessionManager.getUserName();
+    }
+    if (userName == null) {
+      userName = req.getUsername();
+    }
+
+    userName = getShortName(userName);
+    String effectiveClientUser = getProxyUser(userName, req.getConfiguration(), getIpAddress());
+    LOG.debug("Client's username: " + effectiveClientUser);
+    return effectiveClientUser;
+  }
+
+  private String getShortName(String userName) {
+    String ret = null;
+    if (userName != null) {
+      int indexOfDomainMatch = ServiceUtils.indexOfDomainMatch(userName);
+      ret = (indexOfDomainMatch <= 0) ? userName :
+          userName.substring(0, indexOfDomainMatch);
+    }
+
+    return ret;
+  }
+
+  /**
+   * Create a session handle
+   * @param req
+   * @param res
+   * @return
+   * @throws HiveSQLException
+   * @throws LoginException
+   * @throws IOException
+   */
+  SessionHandle getSessionHandle(TOpenSessionReq req, TOpenSessionResp res)
+      throws HiveSQLException, LoginException, IOException {
+    String userName = getUserName(req);
+    String ipAddress = getIpAddress();
+    TProtocolVersion protocol = getMinVersion(CLIService.SERVER_VERSION,
+        req.getClient_protocol());
+    SessionHandle sessionHandle;
+    if (cliService.getHiveConf().getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS) &&
+        (userName != null)) {
+      String delegationTokenStr = getDelegationToken(userName);
+      sessionHandle = cliService.openSessionWithImpersonation(protocol, userName,
+          req.getPassword(), ipAddress, req.getConfiguration(), delegationTokenStr);
+    } else {
+      sessionHandle = cliService.openSession(protocol, userName, req.getPassword(),
+          ipAddress, req.getConfiguration());
+    }
+    res.setServerProtocolVersion(protocol);
+    return sessionHandle;
+  }
+
+
+  private String getDelegationToken(String userName)
+      throws HiveSQLException, LoginException, IOException {
+    if (userName == null || !cliService.getHiveConf().getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION)
+        .equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString())) {
+      return null;
+    }
+    try {
+      return cliService.getDelegationTokenFromMetaStore(userName);
+    } catch (UnsupportedOperationException e) {
+      // The delegation token is not applicable in the given deployment mode
+    }
+    return null;
+  }
+
+  private TProtocolVersion getMinVersion(TProtocolVersion... versions) {
+    TProtocolVersion[] values = TProtocolVersion.values();
+    int current = values[values.length - 1].getValue();
+    for (TProtocolVersion version : versions) {
+      if (current > version.getValue()) {
+        current = version.getValue();
+      }
+    }
+    for (TProtocolVersion version : values) {
+      if (version.getValue() == current) {
+        return version;
+      }
+    }
+    throw new IllegalArgumentException("never");
+  }
+
+  @Override
+  public TCloseSessionResp CloseSession(TCloseSessionReq req) throws TException {
+    TCloseSessionResp resp = new TCloseSessionResp();
+    try {
+      SessionHandle sessionHandle = new SessionHandle(req.getSessionHandle());
+      cliService.closeSession(sessionHandle);
+      resp.setStatus(OK_STATUS);
+      ThriftCLIServerContext context =
+        (ThriftCLIServerContext)currentServerContext.get();
+      if (context != null) {
+        context.setSessionHandle(null);
+      }
+    } catch (Exception e) {
+      LOG.warn("Error closing session: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetInfoResp GetInfo(TGetInfoReq req) throws TException {
+    TGetInfoResp resp = new TGetInfoResp();
+    try {
+      GetInfoValue getInfoValue =
+          cliService.getInfo(new SessionHandle(req.getSessionHandle()),
+              GetInfoType.getGetInfoType(req.getInfoType()));
+      resp.setInfoValue(getInfoValue.toTGetInfoValue());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting info: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws TException {
+    TExecuteStatementResp resp = new TExecuteStatementResp();
+    try {
+      SessionHandle sessionHandle = new SessionHandle(req.getSessionHandle());
+      String statement = req.getStatement();
+      Map<String, String> confOverlay = req.getConfOverlay();
+      Boolean runAsync = req.isRunAsync();
+      OperationHandle operationHandle = runAsync ?
+          cliService.executeStatementAsync(sessionHandle, statement, confOverlay)
+          : cliService.executeStatement(sessionHandle, statement, confOverlay);
+          resp.setOperationHandle(operationHandle.toTOperationHandle());
+          resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error executing statement: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws TException {
+    TGetTypeInfoResp resp = new TGetTypeInfoResp();
+    try {
+      OperationHandle operationHandle = cliService.getTypeInfo(new SessionHandle(req.getSessionHandle()));
+      resp.setOperationHandle(operationHandle.toTOperationHandle());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting type info: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws TException {
+    TGetCatalogsResp resp = new TGetCatalogsResp();
+    try {
+      OperationHandle opHandle = cliService.getCatalogs(new SessionHandle(req.getSessionHandle()));
+      resp.setOperationHandle(opHandle.toTOperationHandle());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting catalogs: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws TException {
+    TGetSchemasResp resp = new TGetSchemasResp();
+    try {
+      OperationHandle opHandle = cliService.getSchemas(
+          new SessionHandle(req.getSessionHandle()), req.getCatalogName(), req.getSchemaName());
+      resp.setOperationHandle(opHandle.toTOperationHandle());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting schemas: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetTablesResp GetTables(TGetTablesReq req) throws TException {
+    TGetTablesResp resp = new TGetTablesResp();
+    try {
+      OperationHandle opHandle = cliService
+          .getTables(new SessionHandle(req.getSessionHandle()), req.getCatalogName(),
+              req.getSchemaName(), req.getTableName(), req.getTableTypes());
+      resp.setOperationHandle(opHandle.toTOperationHandle());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting tables: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws TException {
+    TGetTableTypesResp resp = new TGetTableTypesResp();
+    try {
+      OperationHandle opHandle = cliService.getTableTypes(new SessionHandle(req.getSessionHandle()));
+      resp.setOperationHandle(opHandle.toTOperationHandle());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting table types: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetColumnsResp GetColumns(TGetColumnsReq req) throws TException {
+    TGetColumnsResp resp = new TGetColumnsResp();
+    try {
+      OperationHandle opHandle = cliService.getColumns(
+          new SessionHandle(req.getSessionHandle()),
+          req.getCatalogName(),
+          req.getSchemaName(),
+          req.getTableName(),
+          req.getColumnName());
+      resp.setOperationHandle(opHandle.toTOperationHandle());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting columns: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws TException {
+    TGetFunctionsResp resp = new TGetFunctionsResp();
+    try {
+      OperationHandle opHandle = cliService.getFunctions(
+          new SessionHandle(req.getSessionHandle()), req.getCatalogName(),
+          req.getSchemaName(), req.getFunctionName());
+      resp.setOperationHandle(opHandle.toTOperationHandle());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting functions: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws TException {
+    TGetOperationStatusResp resp = new TGetOperationStatusResp();
+    try {
+      OperationStatus operationStatus = cliService.getOperationStatus(
+          new OperationHandle(req.getOperationHandle()));
+      resp.setOperationState(operationStatus.getState().toTOperationState());
+      HiveSQLException opException = operationStatus.getOperationException();
+      if (opException != null) {
+        resp.setSqlState(opException.getSQLState());
+        resp.setErrorCode(opException.getErrorCode());
+        resp.setErrorMessage(opException.getMessage());
+      }
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting operation status: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws TException {
+    TCancelOperationResp resp = new TCancelOperationResp();
+    try {
+      cliService.cancelOperation(new OperationHandle(req.getOperationHandle()));
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error cancelling operation: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws TException {
+    TCloseOperationResp resp = new TCloseOperationResp();
+    try {
+      cliService.closeOperation(new OperationHandle(req.getOperationHandle()));
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error closing operation: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req)
+      throws TException {
+    TGetResultSetMetadataResp resp = new TGetResultSetMetadataResp();
+    try {
+      TableSchema schema = cliService.getResultSetMetadata(new OperationHandle(req.getOperationHandle()));
+      resp.setSchema(schema.toTTableSchema());
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error getting result set metadata: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public TFetchResultsResp FetchResults(TFetchResultsReq req) throws TException {
+    TFetchResultsResp resp = new TFetchResultsResp();
+    try {
+      RowSet rowSet = cliService.fetchResults(
+          new OperationHandle(req.getOperationHandle()),
+          FetchOrientation.getFetchOrientation(req.getOrientation()),
+          req.getMaxRows(),
+          FetchType.getFetchType(req.getFetchType()));
+      resp.setResults(rowSet.toTRowSet());
+      resp.setHasMoreRows(false);
+      resp.setStatus(OK_STATUS);
+    } catch (Exception e) {
+      LOG.warn("Error fetching results: ", e);
+      resp.setStatus(HiveSQLException.toTStatus(e));
+    }
+    return resp;
+  }
+
+  @Override
+  public abstract void run();
+
+  /**
+   * If the proxy user name is provided then check privileges to substitute the user.
+   * @param realUser
+   * @param sessionConf
+   * @param ipAddress
+   * @return
+   * @throws HiveSQLException
+   */
+  private String getProxyUser(String realUser, Map<String, String> sessionConf,
+      String ipAddress) throws HiveSQLException {
+    String proxyUser = null;
+    // Http transport mode.
+    // We set the thread local proxy username, in ThriftHttpServlet.
+    if (cliService.getHiveConf().getVar(
+        ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) {
+      proxyUser = SessionManager.getProxyUserName();
+      LOG.debug("Proxy user from query string: " + proxyUser);
+    }
+
+    if (proxyUser == null && sessionConf != null && sessionConf.containsKey(HiveAuthFactory.HS2_PROXY_USER)) {
+      String proxyUserFromThriftBody = sessionConf.get(HiveAuthFactory.HS2_PROXY_USER);
+      LOG.debug("Proxy user from thrift body: " + proxyUserFromThriftBody);
+      proxyUser = proxyUserFromThriftBody;
+    }
+
+    if (proxyUser == null) {
+      return realUser;
+    }
+
+    // check whether substitution is allowed
+    if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ALLOW_USER_SUBSTITUTION)) {
+      throw new HiveSQLException("Proxy user substitution is not allowed");
+    }
+
+    // If there's no authentication, then directly substitute the user
+    if (HiveAuthFactory.AuthTypes.NONE.toString()
+        .equalsIgnoreCase(hiveConf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION))) {
+      return proxyUser;
+    }
+
+    // Verify proxy user privilege of the realUser for the proxyUser
+    HiveAuthFactory.verifyProxyAccess(realUser, proxyUser, ipAddress, hiveConf);
+    LOG.debug("Verified proxy user: " + proxyUser);
+    return proxyUser;
+  }
+
+  private boolean isKerberosAuthMode() {
+    return cliService.getHiveConf().getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION)
+        .equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString());
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
new file mode 100644
index 000000000000..1af45398b895
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
@@ -0,0 +1,440 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.thrift;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.cli.*;
+import org.apache.thrift.TException;
+
+/**
+ * ThriftCLIServiceClient.
+ *
+ */
+public class ThriftCLIServiceClient extends CLIServiceClient {
+  private final TCLIService.Iface cliService;
+
+  public ThriftCLIServiceClient(TCLIService.Iface cliService) {
+    this.cliService = cliService;
+  }
+
+  public void checkStatus(TStatus status) throws HiveSQLException {
+    if (TStatusCode.ERROR_STATUS.equals(status.getStatusCode())) {
+      throw new HiveSQLException(status);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map)
+   */
+  @Override
+  public SessionHandle openSession(String username, String password,
+      Map<String, String> configuration)
+          throws HiveSQLException {
+    try {
+      TOpenSessionReq req = new TOpenSessionReq();
+      req.setUsername(username);
+      req.setPassword(password);
+      req.setConfiguration(configuration);
+      TOpenSessionResp resp = cliService.OpenSession(req);
+      checkStatus(resp.getStatus());
+      return new SessionHandle(resp.getSessionHandle(), resp.getServerProtocolVersion());
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public SessionHandle openSessionWithImpersonation(String username, String password,
+      Map<String, String> configuration, String delegationToken) throws HiveSQLException {
+    throw new HiveSQLException("open with impersonation operation is not supported in the client");
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public void closeSession(SessionHandle sessionHandle) throws HiveSQLException {
+    try {
+      TCloseSessionReq req = new TCloseSessionReq(sessionHandle.toTSessionHandle());
+      TCloseSessionResp resp = cliService.CloseSession(req);
+      checkStatus(resp.getStatus());
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List)
+   */
+  @Override
+  public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType infoType)
+      throws HiveSQLException {
+    try {
+      // FIXME extract the right info type
+      TGetInfoReq req = new TGetInfoReq(sessionHandle.toTSessionHandle(), infoType.toTGetInfoType());
+      TGetInfoResp resp = cliService.GetInfo(req);
+      checkStatus(resp.getStatus());
+      return new GetInfoValue(resp.getInfoValue());
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#executeStatement(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.util.Map)
+   */
+  @Override
+  public OperationHandle executeStatement(SessionHandle sessionHandle, String statement,
+      Map<String, String> confOverlay)
+          throws HiveSQLException {
+    return executeStatementInternal(sessionHandle, statement, confOverlay, false);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#executeStatementAsync(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.util.Map)
+   */
+  @Override
+  public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement,
+      Map<String, String> confOverlay)
+          throws HiveSQLException {
+    return executeStatementInternal(sessionHandle, statement, confOverlay, true);
+  }
+
+  private OperationHandle executeStatementInternal(SessionHandle sessionHandle, String statement,
+      Map<String, String> confOverlay, boolean isAsync)
+          throws HiveSQLException {
+    try {
+      TExecuteStatementReq req =
+          new TExecuteStatementReq(sessionHandle.toTSessionHandle(), statement);
+      req.setConfOverlay(confOverlay);
+      req.setRunAsync(isAsync);
+      TExecuteStatementResp resp = cliService.ExecuteStatement(req);
+      checkStatus(resp.getStatus());
+      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
+      return new OperationHandle(resp.getOperationHandle(), protocol);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getTypeInfo(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getTypeInfo(SessionHandle sessionHandle) throws HiveSQLException {
+    try {
+      TGetTypeInfoReq req = new TGetTypeInfoReq(sessionHandle.toTSessionHandle());
+      TGetTypeInfoResp resp = cliService.GetTypeInfo(req);
+      checkStatus(resp.getStatus());
+      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
+      return new OperationHandle(resp.getOperationHandle(), protocol);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getCatalogs(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getCatalogs(SessionHandle sessionHandle) throws HiveSQLException {
+    try {
+      TGetCatalogsReq req = new TGetCatalogsReq(sessionHandle.toTSessionHandle());
+      TGetCatalogsResp resp = cliService.GetCatalogs(req);
+      checkStatus(resp.getStatus());
+      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
+      return new OperationHandle(resp.getOperationHandle(), protocol);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String)
+   */
+  @Override
+  public OperationHandle getSchemas(SessionHandle sessionHandle, String catalogName,
+      String schemaName)
+          throws HiveSQLException {
+    try {
+      TGetSchemasReq req = new TGetSchemasReq(sessionHandle.toTSessionHandle());
+      req.setCatalogName(catalogName);
+      req.setSchemaName(schemaName);
+      TGetSchemasResp resp = cliService.GetSchemas(req);
+      checkStatus(resp.getStatus());
+      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
+      return new OperationHandle(resp.getOperationHandle(), protocol);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List)
+   */
+  @Override
+  public OperationHandle getTables(SessionHandle sessionHandle, String catalogName,
+      String schemaName, String tableName, List<String> tableTypes)
+          throws HiveSQLException {
+    try {
+      TGetTablesReq req = new TGetTablesReq(sessionHandle.toTSessionHandle());
+      req.setTableName(tableName);
+      req.setTableTypes(tableTypes);
+      req.setSchemaName(schemaName);
+      TGetTablesResp resp = cliService.GetTables(req);
+      checkStatus(resp.getStatus());
+      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
+      return new OperationHandle(resp.getOperationHandle(), protocol);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getTableTypes(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getTableTypes(SessionHandle sessionHandle) throws HiveSQLException {
+    try {
+      TGetTableTypesReq req = new TGetTableTypesReq(sessionHandle.toTSessionHandle());
+      TGetTableTypesResp resp = cliService.GetTableTypes(req);
+      checkStatus(resp.getStatus());
+      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
+      return new OperationHandle(resp.getOperationHandle(), protocol);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getColumns(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getColumns(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String tableName, String columnName)
+          throws HiveSQLException {
+    try {
+      TGetColumnsReq req = new TGetColumnsReq();
+      req.setSessionHandle(sessionHandle.toTSessionHandle());
+      req.setCatalogName(catalogName);
+      req.setSchemaName(schemaName);
+      req.setTableName(tableName);
+      req.setColumnName(columnName);
+      TGetColumnsResp resp = cliService.GetColumns(req);
+      checkStatus(resp.getStatus());
+      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
+      return new OperationHandle(resp.getOperationHandle(), protocol);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getFunctions(org.apache.hive.service.cli.SessionHandle)
+   */
+  @Override
+  public OperationHandle getFunctions(SessionHandle sessionHandle,
+      String catalogName, String schemaName, String functionName) throws HiveSQLException {
+    try {
+      TGetFunctionsReq req = new TGetFunctionsReq(sessionHandle.toTSessionHandle(), functionName);
+      req.setCatalogName(catalogName);
+      req.setSchemaName(schemaName);
+      TGetFunctionsResp resp = cliService.GetFunctions(req);
+      checkStatus(resp.getStatus());
+      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
+      return new OperationHandle(resp.getOperationHandle(), protocol);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getOperationStatus(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public OperationStatus getOperationStatus(OperationHandle opHandle) throws HiveSQLException {
+    try {
+      TGetOperationStatusReq req = new TGetOperationStatusReq(opHandle.toTOperationHandle());
+      TGetOperationStatusResp resp = cliService.GetOperationStatus(req);
+      // Checks the status of the RPC call, throws an exception in case of error
+      checkStatus(resp.getStatus());
+      OperationState opState = OperationState.getOperationState(resp.getOperationState());
+      HiveSQLException opException = null;
+      if (opState == OperationState.ERROR) {
+        opException = new HiveSQLException(resp.getErrorMessage(), resp.getSqlState(), resp.getErrorCode());
+      }
+      return new OperationStatus(opState, opException);
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#cancelOperation(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
+    try {
+      TCancelOperationReq req = new TCancelOperationReq(opHandle.toTOperationHandle());
+      TCancelOperationResp resp = cliService.CancelOperation(req);
+      checkStatus(resp.getStatus());
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#closeOperation(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public void closeOperation(OperationHandle opHandle)
+      throws HiveSQLException {
+    try {
+      TCloseOperationReq req  = new TCloseOperationReq(opHandle.toTOperationHandle());
+      TCloseOperationResp resp = cliService.CloseOperation(req);
+      checkStatus(resp.getStatus());
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public TableSchema getResultSetMetadata(OperationHandle opHandle)
+      throws HiveSQLException {
+    try {
+      TGetResultSetMetadataReq req = new TGetResultSetMetadataReq(opHandle.toTOperationHandle());
+      TGetResultSetMetadataResp resp = cliService.GetResultSetMetadata(req);
+      checkStatus(resp.getStatus());
+      return new TableSchema(resp.getSchema());
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  @Override
+  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, long maxRows,
+      FetchType fetchType) throws HiveSQLException {
+    try {
+      TFetchResultsReq req = new TFetchResultsReq();
+      req.setOperationHandle(opHandle.toTOperationHandle());
+      req.setOrientation(orientation.toTFetchOrientation());
+      req.setMaxRows(maxRows);
+      req.setFetchType(fetchType.toTFetchType());
+      TFetchResultsResp resp = cliService.FetchResults(req);
+      checkStatus(resp.getStatus());
+      return RowSetFactory.create(resp.getResults(), opHandle.getProtocolVersion());
+    } catch (HiveSQLException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hive.service.cli.ICLIService#fetchResults(org.apache.hive.service.cli.OperationHandle)
+   */
+  @Override
+  public RowSet fetchResults(OperationHandle opHandle) throws HiveSQLException {
+    // TODO: set the correct default fetch size
+    return fetchResults(opHandle, FetchOrientation.FETCH_NEXT, 10000, FetchType.QUERY_OUTPUT);
+  }
+
+  @Override
+  public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String owner, String renewer) throws HiveSQLException {
+    TGetDelegationTokenReq req = new TGetDelegationTokenReq(
+        sessionHandle.toTSessionHandle(), owner, renewer);
+    try {
+      TGetDelegationTokenResp tokenResp = cliService.GetDelegationToken(req);
+      checkStatus(tokenResp.getStatus());
+      return tokenResp.getDelegationToken();
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  @Override
+  public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String tokenStr) throws HiveSQLException {
+    TCancelDelegationTokenReq cancelReq = new TCancelDelegationTokenReq(
+          sessionHandle.toTSessionHandle(), tokenStr);
+    try {
+      TCancelDelegationTokenResp cancelResp =
+        cliService.CancelDelegationToken(cancelReq);
+      checkStatus(cancelResp.getStatus());
+      return;
+    } catch (TException e) {
+      throw new HiveSQLException(e);
+    }
+  }
+
+  @Override
+  public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
+      String tokenStr) throws HiveSQLException {
+    TRenewDelegationTokenReq cancelReq = new TRenewDelegationTokenReq(
+        sessionHandle.toTSessionHandle(), tokenStr);
+    try {
+      TRenewDelegationTokenResp renewResp =
+        cliService.RenewDelegationToken(cancelReq);
+      checkStatus(renewResp.getStatus());
+      return;
+    } catch (Exception e) {
+      throw new HiveSQLException(e);
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
new file mode 100644
index 000000000000..c3bcaf1e2d6c
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.thrift;
+
+import java.util.Arrays;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.Shell;
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.cli.CLIService;
+import org.apache.hive.service.cli.thrift.TCLIService.Iface;
+import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
+import org.apache.thrift.TProcessor;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.apache.thrift.protocol.TProtocolFactory;
+import org.apache.thrift.server.TServlet;
+import org.eclipse.jetty.server.AbstractConnectionFactory;
+import org.eclipse.jetty.server.ConnectionFactory;
+import org.eclipse.jetty.server.HttpConnectionFactory;
+import org.eclipse.jetty.server.ServerConnector;
+import org.eclipse.jetty.servlet.ServletContextHandler;
+import org.eclipse.jetty.servlet.ServletHolder;
+import org.eclipse.jetty.util.ssl.SslContextFactory;
+import org.eclipse.jetty.util.thread.ExecutorThreadPool;
+import org.eclipse.jetty.util.thread.ScheduledExecutorScheduler;
+
+
+public class ThriftHttpCLIService extends ThriftCLIService {
+
+  public ThriftHttpCLIService(CLIService cliService) {
+    super(cliService, ThriftHttpCLIService.class.getSimpleName());
+  }
+
+  /**
+   * Configure Jetty to serve http requests. Example of a client connection URL:
+   * http://localhost:10000/servlets/thrifths2/ A gateway may cause actual target URL to differ,
+   * e.g. http://gateway:port/hive2/servlets/thrifths2/
+   */
+  @Override
+  public void run() {
+    try {
+      // Server thread pool
+      // Start with minWorkerThreads, expand till maxWorkerThreads and reject subsequent requests
+      String threadPoolName = "HiveServer2-HttpHandler-Pool";
+      ThreadPoolExecutor executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads,
+          workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
+          new ThreadFactoryWithGarbageCleanup(threadPoolName));
+      ExecutorThreadPool threadPool = new ExecutorThreadPool(executorService);
+
+      // HTTP Server
+      httpServer = new org.eclipse.jetty.server.Server(threadPool);
+
+      // Connector configs
+
+      ConnectionFactory[] connectionFactories;
+      boolean useSsl = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL);
+      String schemeName = useSsl ? "https" : "http";
+      // Change connector if SSL is used
+      if (useSsl) {
+        String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH).trim();
+        String keyStorePassword = ShimLoader.getHadoopShims().getPassword(hiveConf,
+            HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname);
+        if (keyStorePath.isEmpty()) {
+          throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname
+              + " Not configured for SSL connection");
+        }
+        SslContextFactory sslContextFactory = new SslContextFactory();
+        String[] excludedProtocols = hiveConf.getVar(ConfVars.HIVE_SSL_PROTOCOL_BLACKLIST).split(",");
+        LOG.info("HTTP Server SSL: adding excluded protocols: " + Arrays.toString(excludedProtocols));
+        sslContextFactory.addExcludeProtocols(excludedProtocols);
+        LOG.info("HTTP Server SSL: SslContextFactory.getExcludeProtocols = " +
+          Arrays.toString(sslContextFactory.getExcludeProtocols()));
+        sslContextFactory.setKeyStorePath(keyStorePath);
+        sslContextFactory.setKeyStorePassword(keyStorePassword);
+        connectionFactories = AbstractConnectionFactory.getFactories(
+            sslContextFactory, new HttpConnectionFactory());
+      } else {
+        connectionFactories = new ConnectionFactory[] { new HttpConnectionFactory() };
+      }
+      ServerConnector connector = new ServerConnector(
+          httpServer,
+          null,
+          // Call this full constructor to set this, which forces daemon threads:
+          new ScheduledExecutorScheduler("HiveServer2-HttpHandler-JettyScheduler", true),
+          null,
+          -1,
+          -1,
+          connectionFactories);
+
+      connector.setPort(portNum);
+      // Linux:yes, Windows:no
+      connector.setReuseAddress(!Shell.WINDOWS);
+      int maxIdleTime = (int) hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_MAX_IDLE_TIME,
+          TimeUnit.MILLISECONDS);
+      connector.setIdleTimeout(maxIdleTime);
+
+      httpServer.addConnector(connector);
+
+      // Thrift configs
+      hiveAuthFactory = new HiveAuthFactory(hiveConf);
+      TProcessor processor = new TCLIService.Processor<Iface>(this);
+      TProtocolFactory protocolFactory = new TBinaryProtocol.Factory();
+      // Set during the init phase of HiveServer2 if auth mode is kerberos
+      // UGI for the hive/_HOST (kerberos) principal
+      UserGroupInformation serviceUGI = cliService.getServiceUGI();
+      // UGI for the http/_HOST (SPNego) principal
+      UserGroupInformation httpUGI = cliService.getHttpUGI();
+      String authType = hiveConf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION);
+      TServlet thriftHttpServlet = new ThriftHttpServlet(processor, protocolFactory, authType,
+          serviceUGI, httpUGI);
+
+      // Context handler
+      final ServletContextHandler context = new ServletContextHandler(
+          ServletContextHandler.SESSIONS);
+      context.setContextPath("/");
+      String httpPath = getHttpPath(hiveConf
+          .getVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_HTTP_PATH));
+      httpServer.setHandler(context);
+      context.addServlet(new ServletHolder(thriftHttpServlet), httpPath);
+
+      // TODO: check defaults: maxTimeout, keepalive, maxBodySize, bodyRecieveDuration, etc.
+      // Finally, start the server
+      httpServer.start();
+      String msg = "Started " + ThriftHttpCLIService.class.getSimpleName() + " in " + schemeName
+          + " mode on port " + connector.getLocalPort()+ " path=" + httpPath + " with " + minWorkerThreads + "..."
+          + maxWorkerThreads + " worker threads";
+      LOG.info(msg);
+      httpServer.join();
+    } catch (Throwable t) {
+      LOG.fatal(
+          "Error starting HiveServer2: could not start "
+              + ThriftHttpCLIService.class.getSimpleName(), t);
+      System.exit(-1);
+    }
+  }
+
+  /**
+   * The config parameter can be like "path", "/path", "/path/", "path/*", "/path1/path2/*" and so on.
+   * httpPath should end up as "/*", "/path/*" or "/path1/../pathN/*"
+   * @param httpPath
+   * @return
+   */
+  private String getHttpPath(String httpPath) {
+    if(httpPath == null || httpPath.equals("")) {
+      httpPath = "/*";
+    }
+    else {
+      if(!httpPath.startsWith("/")) {
+        httpPath = "/" + httpPath;
+      }
+      if(httpPath.endsWith("/")) {
+        httpPath = httpPath + "*";
+      }
+      if(!httpPath.endsWith("/*")) {
+        httpPath = httpPath + "/*";
+      }
+    }
+    return httpPath;
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
new file mode 100644
index 000000000000..e15d2d0566d2
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
@@ -0,0 +1,545 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.cli.thrift;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.security.PrivilegedExceptionAction;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.Cookie;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import javax.ws.rs.core.NewCookie;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.service.auth.AuthenticationProviderFactory;
+import org.apache.hive.service.auth.AuthenticationProviderFactory.AuthMethods;
+import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.auth.HttpAuthUtils;
+import org.apache.hive.service.auth.HttpAuthenticationException;
+import org.apache.hive.service.auth.PasswdAuthenticationProvider;
+import org.apache.hive.service.cli.session.SessionManager;
+import org.apache.hive.service.CookieSigner;
+import org.apache.thrift.TProcessor;
+import org.apache.thrift.protocol.TProtocolFactory;
+import org.apache.thrift.server.TServlet;
+import org.ietf.jgss.GSSContext;
+import org.ietf.jgss.GSSCredential;
+import org.ietf.jgss.GSSException;
+import org.ietf.jgss.GSSManager;
+import org.ietf.jgss.GSSName;
+import org.ietf.jgss.Oid;
+
+/**
+ *
+ * ThriftHttpServlet
+ *
+ */
+public class ThriftHttpServlet extends TServlet {
+
+  private static final long serialVersionUID = 1L;
+  public static final Log LOG = LogFactory.getLog(ThriftHttpServlet.class.getName());
+  private final String authType;
+  private final UserGroupInformation serviceUGI;
+  private final UserGroupInformation httpUGI;
+  private HiveConf hiveConf = new HiveConf();
+
+  // Class members for cookie based authentication.
+  private CookieSigner signer;
+  public static final String AUTH_COOKIE = "hive.server2.auth";
+  private static final Random RAN = new Random();
+  private boolean isCookieAuthEnabled;
+  private String cookieDomain;
+  private String cookiePath;
+  private int cookieMaxAge;
+  private boolean isCookieSecure;
+  private boolean isHttpOnlyCookie;
+
+  public ThriftHttpServlet(TProcessor processor, TProtocolFactory protocolFactory,
+      String authType, UserGroupInformation serviceUGI, UserGroupInformation httpUGI) {
+    super(processor, protocolFactory);
+    this.authType = authType;
+    this.serviceUGI = serviceUGI;
+    this.httpUGI = httpUGI;
+    this.isCookieAuthEnabled = hiveConf.getBoolVar(
+      ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_AUTH_ENABLED);
+    // Initialize the cookie based authentication related variables.
+    if (isCookieAuthEnabled) {
+      // Generate the signer with secret.
+      String secret = Long.toString(RAN.nextLong());
+      LOG.debug("Using the random number as the secret for cookie generation " + secret);
+      this.signer = new CookieSigner(secret.getBytes());
+      this.cookieMaxAge = (int) hiveConf.getTimeVar(
+        ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_MAX_AGE, TimeUnit.SECONDS);
+      this.cookieDomain = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_DOMAIN);
+      this.cookiePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_PATH);
+      this.isCookieSecure = hiveConf.getBoolVar(
+        ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_SECURE);
+      this.isHttpOnlyCookie = hiveConf.getBoolVar(
+        ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_HTTPONLY);
+    }
+  }
+
+  @Override
+  protected void doPost(HttpServletRequest request, HttpServletResponse response)
+      throws ServletException, IOException {
+    String clientUserName = null;
+    String clientIpAddress;
+    boolean requireNewCookie = false;
+
+    try {
+      // If the cookie based authentication is already enabled, parse the
+      // request and validate the request cookies.
+      if (isCookieAuthEnabled) {
+        clientUserName = validateCookie(request);
+        requireNewCookie = (clientUserName == null);
+        if (requireNewCookie) {
+          LOG.info("Could not validate cookie sent, will try to generate a new cookie");
+        }
+      }
+      // If the cookie based authentication is not enabled or the request does
+      // not have a valid cookie, use the kerberos or password based authentication
+      // depending on the server setup.
+      if (clientUserName == null) {
+        // For a kerberos setup
+        if (isKerberosAuthMode(authType)) {
+          clientUserName = doKerberosAuth(request);
+        }
+        // For password based authentication
+        else {
+          clientUserName = doPasswdAuth(request, authType);
+        }
+      }
+      LOG.debug("Client username: " + clientUserName);
+
+      // Set the thread local username to be used for doAs if true
+      SessionManager.setUserName(clientUserName);
+
+      // find proxy user if any from query param
+      String doAsQueryParam = getDoAsQueryParam(request.getQueryString());
+      if (doAsQueryParam != null) {
+        SessionManager.setProxyUserName(doAsQueryParam);
+      }
+
+      clientIpAddress = request.getRemoteAddr();
+      LOG.debug("Client IP Address: " + clientIpAddress);
+      // Set the thread local ip address
+      SessionManager.setIpAddress(clientIpAddress);
+      // Generate new cookie and add it to the response
+      if (requireNewCookie &&
+          !authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.NOSASL.toString())) {
+        String cookieToken = HttpAuthUtils.createCookieToken(clientUserName);
+        Cookie hs2Cookie = createCookie(signer.signCookie(cookieToken));
+
+        if (isHttpOnlyCookie) {
+          response.setHeader("SET-COOKIE", getHttpOnlyCookieHeader(hs2Cookie));
+        } else {
+          response.addCookie(hs2Cookie);
+        }
+        LOG.info("Cookie added for clientUserName " + clientUserName);
+      }
+      super.doPost(request, response);
+    }
+    catch (HttpAuthenticationException e) {
+      LOG.error("Error: ", e);
+      // Send a 401 to the client
+      response.setStatus(HttpServletResponse.SC_UNAUTHORIZED);
+      if(isKerberosAuthMode(authType)) {
+        response.addHeader(HttpAuthUtils.WWW_AUTHENTICATE, HttpAuthUtils.NEGOTIATE);
+      }
+      response.getWriter().println("Authentication Error: " + e.getMessage());
+    }
+    finally {
+      // Clear the thread locals
+      SessionManager.clearUserName();
+      SessionManager.clearIpAddress();
+      SessionManager.clearProxyUserName();
+    }
+  }
+
+  /**
+   * Retrieves the client name from cookieString. If the cookie does not
+   * correspond to a valid client, the function returns null.
+   * @param cookies HTTP Request cookies.
+   * @return Client Username if cookieString has a HS2 Generated cookie that is currently valid.
+   * Else, returns null.
+   */
+  private String getClientNameFromCookie(Cookie[] cookies) {
+    // Current Cookie Name, Current Cookie Value
+    String currName, currValue;
+
+    // Following is the main loop which iterates through all the cookies send by the client.
+    // The HS2 generated cookies are of the format hive.server2.auth=<value>
+    // A cookie which is identified as a hiveserver2 generated cookie is validated
+    // by calling signer.verifyAndExtract(). If the validation passes, send the
+    // username for which the cookie is validated to the caller. If no client side
+    // cookie passes the validation, return null to the caller.
+    for (Cookie currCookie : cookies) {
+      // Get the cookie name
+      currName = currCookie.getName();
+      if (!currName.equals(AUTH_COOKIE)) {
+        // Not a HS2 generated cookie, continue.
+        continue;
+      }
+      // If we reached here, we have match for HS2 generated cookie
+      currValue = currCookie.getValue();
+      // Validate the value.
+      currValue = signer.verifyAndExtract(currValue);
+      // Retrieve the user name, do the final validation step.
+      if (currValue != null) {
+        String userName = HttpAuthUtils.getUserNameFromCookieToken(currValue);
+
+        if (userName == null) {
+          LOG.warn("Invalid cookie token " + currValue);
+          continue;
+        }
+        //We have found a valid cookie in the client request.
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Validated the cookie for user " + userName);
+        }
+        return userName;
+      }
+    }
+    // No valid HS2 generated cookies found, return null
+    return null;
+  }
+
+  /**
+   * Convert cookie array to human readable cookie string
+   * @param cookies Cookie Array
+   * @return String containing all the cookies separated by a newline character.
+   * Each cookie is of the format [key]=[value]
+   */
+  private String toCookieStr(Cookie[] cookies) {
+    String cookieStr = "";
+
+    for (Cookie c : cookies) {
+     cookieStr += c.getName() + "=" + c.getValue() + " ;\n";
+    }
+    return cookieStr;
+  }
+
+  /**
+   * Validate the request cookie. This function iterates over the request cookie headers
+   * and finds a cookie that represents a valid client/server session. If it finds one, it
+   * returns the client name associated with the session. Else, it returns null.
+   * @param request The HTTP Servlet Request send by the client
+   * @return Client Username if the request has valid HS2 cookie, else returns null
+   * @throws UnsupportedEncodingException
+   */
+  private String validateCookie(HttpServletRequest request) throws UnsupportedEncodingException {
+    // Find all the valid cookies associated with the request.
+    Cookie[] cookies = request.getCookies();
+
+    if (cookies == null) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("No valid cookies associated with the request " + request);
+      }
+      return null;
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Received cookies: " + toCookieStr(cookies));
+    }
+    return getClientNameFromCookie(cookies);
+  }
+
+  /**
+   * Generate a server side cookie given the cookie value as the input.
+   * @param str Input string token.
+   * @return The generated cookie.
+   * @throws UnsupportedEncodingException
+   */
+  private Cookie createCookie(String str) throws UnsupportedEncodingException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Cookie name = " + AUTH_COOKIE + " value = " + str);
+    }
+    Cookie cookie = new Cookie(AUTH_COOKIE, str);
+
+    cookie.setMaxAge(cookieMaxAge);
+    if (cookieDomain != null) {
+      cookie.setDomain(cookieDomain);
+    }
+    if (cookiePath != null) {
+      cookie.setPath(cookiePath);
+    }
+    cookie.setSecure(isCookieSecure);
+    return cookie;
+  }
+
+  /**
+   * Generate httponly cookie from HS2 cookie
+   * @param cookie HS2 generated cookie
+   * @return The httponly cookie
+   */
+  private static String getHttpOnlyCookieHeader(Cookie cookie) {
+    NewCookie newCookie = new NewCookie(cookie.getName(), cookie.getValue(),
+      cookie.getPath(), cookie.getDomain(), cookie.getVersion(),
+      cookie.getComment(), cookie.getMaxAge(), cookie.getSecure());
+    return newCookie + "; HttpOnly";
+  }
+
+  /**
+   * Do the LDAP/PAM authentication
+   * @param request
+   * @param authType
+   * @throws HttpAuthenticationException
+   */
+  private String doPasswdAuth(HttpServletRequest request, String authType)
+      throws HttpAuthenticationException {
+    String userName = getUsername(request, authType);
+    // No-op when authType is NOSASL
+    if (!authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.NOSASL.toString())) {
+      try {
+        AuthMethods authMethod = AuthMethods.getValidAuthMethod(authType);
+        PasswdAuthenticationProvider provider =
+            AuthenticationProviderFactory.getAuthenticationProvider(authMethod);
+        provider.Authenticate(userName, getPassword(request, authType));
+
+      } catch (Exception e) {
+        throw new HttpAuthenticationException(e);
+      }
+    }
+    return userName;
+  }
+
+  /**
+   * Do the GSS-API kerberos authentication.
+   * We already have a logged in subject in the form of serviceUGI,
+   * which GSS-API will extract information from.
+   * In case of a SPNego request we use the httpUGI,
+   * for the authenticating service tickets.
+   * @param request
+   * @return
+   * @throws HttpAuthenticationException
+   */
+  private String doKerberosAuth(HttpServletRequest request)
+      throws HttpAuthenticationException {
+    // Try authenticating with the http/_HOST principal
+    if (httpUGI != null) {
+      try {
+        return httpUGI.doAs(new HttpKerberosServerAction(request, httpUGI));
+      } catch (Exception e) {
+        LOG.info("Failed to authenticate with http/_HOST kerberos principal, " +
+            "trying with hive/_HOST kerberos principal");
+      }
+    }
+    // Now try with hive/_HOST principal
+    try {
+      return serviceUGI.doAs(new HttpKerberosServerAction(request, serviceUGI));
+    } catch (Exception e) {
+      LOG.error("Failed to authenticate with hive/_HOST kerberos principal");
+      throw new HttpAuthenticationException(e);
+    }
+
+  }
+
+  class HttpKerberosServerAction implements PrivilegedExceptionAction<String> {
+    HttpServletRequest request;
+    UserGroupInformation serviceUGI;
+
+    HttpKerberosServerAction(HttpServletRequest request,
+        UserGroupInformation serviceUGI) {
+      this.request = request;
+      this.serviceUGI = serviceUGI;
+    }
+
+    @Override
+    public String run() throws HttpAuthenticationException {
+      // Get own Kerberos credentials for accepting connection
+      GSSManager manager = GSSManager.getInstance();
+      GSSContext gssContext = null;
+      String serverPrincipal = getPrincipalWithoutRealm(
+          serviceUGI.getUserName());
+      try {
+        // This Oid for Kerberos GSS-API mechanism.
+        Oid kerberosMechOid = new Oid("1.2.840.113554.1.2.2");
+        // Oid for SPNego GSS-API mechanism.
+        Oid spnegoMechOid = new Oid("1.3.6.1.5.5.2");
+        // Oid for kerberos principal name
+        Oid krb5PrincipalOid = new Oid("1.2.840.113554.1.2.2.1");
+
+        // GSS name for server
+        GSSName serverName = manager.createName(serverPrincipal, krb5PrincipalOid);
+
+        // GSS credentials for server
+        GSSCredential serverCreds = manager.createCredential(serverName,
+            GSSCredential.DEFAULT_LIFETIME,
+            new Oid[]{kerberosMechOid, spnegoMechOid},
+            GSSCredential.ACCEPT_ONLY);
+
+        // Create a GSS context
+        gssContext = manager.createContext(serverCreds);
+        // Get service ticket from the authorization header
+        String serviceTicketBase64 = getAuthHeader(request, authType);
+        byte[] inToken = Base64.decodeBase64(serviceTicketBase64.getBytes());
+        gssContext.acceptSecContext(inToken, 0, inToken.length);
+        // Authenticate or deny based on its context completion
+        if (!gssContext.isEstablished()) {
+          throw new HttpAuthenticationException("Kerberos authentication failed: " +
+              "unable to establish context with the service ticket " +
+              "provided by the client.");
+        }
+        else {
+          return getPrincipalWithoutRealmAndHost(gssContext.getSrcName().toString());
+        }
+      }
+      catch (GSSException e) {
+        throw new HttpAuthenticationException("Kerberos authentication failed: ", e);
+      }
+      finally {
+        if (gssContext != null) {
+          try {
+            gssContext.dispose();
+          } catch (GSSException e) {
+            // No-op
+          }
+        }
+      }
+    }
+
+    private String getPrincipalWithoutRealm(String fullPrincipal)
+        throws HttpAuthenticationException {
+      KerberosNameShim fullKerberosName;
+      try {
+        fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal);
+      } catch (IOException e) {
+        throw new HttpAuthenticationException(e);
+      }
+      String serviceName = fullKerberosName.getServiceName();
+      String hostName = fullKerberosName.getHostName();
+      String principalWithoutRealm = serviceName;
+      if (hostName != null) {
+        principalWithoutRealm = serviceName + "/" + hostName;
+      }
+      return principalWithoutRealm;
+    }
+
+    private String getPrincipalWithoutRealmAndHost(String fullPrincipal)
+        throws HttpAuthenticationException {
+      KerberosNameShim fullKerberosName;
+      try {
+        fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal);
+        return fullKerberosName.getShortName();
+      } catch (IOException e) {
+        throw new HttpAuthenticationException(e);
+      }
+    }
+  }
+
+  private String getUsername(HttpServletRequest request, String authType)
+      throws HttpAuthenticationException {
+    String[] creds = getAuthHeaderTokens(request, authType);
+    // Username must be present
+    if (creds[0] == null || creds[0].isEmpty()) {
+      throw new HttpAuthenticationException("Authorization header received " +
+          "from the client does not contain username.");
+    }
+    return creds[0];
+  }
+
+  private String getPassword(HttpServletRequest request, String authType)
+      throws HttpAuthenticationException {
+    String[] creds = getAuthHeaderTokens(request, authType);
+    // Password must be present
+    if (creds[1] == null || creds[1].isEmpty()) {
+      throw new HttpAuthenticationException("Authorization header received " +
+          "from the client does not contain username.");
+    }
+    return creds[1];
+  }
+
+  private String[] getAuthHeaderTokens(HttpServletRequest request,
+      String authType) throws HttpAuthenticationException {
+    String authHeaderBase64 = getAuthHeader(request, authType);
+    String authHeaderString = StringUtils.newStringUtf8(
+        Base64.decodeBase64(authHeaderBase64.getBytes()));
+    String[] creds = authHeaderString.split(":");
+    return creds;
+  }
+
+  /**
+   * Returns the base64 encoded auth header payload
+   * @param request
+   * @param authType
+   * @return
+   * @throws HttpAuthenticationException
+   */
+  private String getAuthHeader(HttpServletRequest request, String authType)
+      throws HttpAuthenticationException {
+    String authHeader = request.getHeader(HttpAuthUtils.AUTHORIZATION);
+    // Each http request must have an Authorization header
+    if (authHeader == null || authHeader.isEmpty()) {
+      throw new HttpAuthenticationException("Authorization header received " +
+          "from the client is empty.");
+    }
+
+    String authHeaderBase64String;
+    int beginIndex;
+    if (isKerberosAuthMode(authType)) {
+      beginIndex = (HttpAuthUtils.NEGOTIATE + " ").length();
+    }
+    else {
+      beginIndex = (HttpAuthUtils.BASIC + " ").length();
+    }
+    authHeaderBase64String = authHeader.substring(beginIndex);
+    // Authorization header must have a payload
+    if (authHeaderBase64String == null || authHeaderBase64String.isEmpty()) {
+      throw new HttpAuthenticationException("Authorization header received " +
+          "from the client does not contain any data.");
+    }
+    return authHeaderBase64String;
+  }
+
+  private boolean isKerberosAuthMode(String authType) {
+    return authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString());
+  }
+
+  private static String getDoAsQueryParam(String queryString) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("URL query string:" + queryString);
+    }
+    if (queryString == null) {
+      return null;
+    }
+    Map<String, String[]> params = javax.servlet.http.HttpUtils.parseQueryString( queryString );
+    Set<String> keySet = params.keySet();
+    for (String key: keySet) {
+      if (key.equalsIgnoreCase("doAs")) {
+        return params.get(key)[0];
+      }
+    }
+    return null;
+  }
+
+}
+
+
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/server/HiveServer2.java
new file mode 100644
index 000000000000..a30be2bc06b9
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/server/HiveServer2.java
@@ -0,0 +1,291 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.service.server;
+
+import java.util.Properties;
+
+import scala.runtime.AbstractFunction0;
+import scala.runtime.BoxedUnit;
+
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.LogUtils;
+import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hive.common.util.HiveStringUtils;
+import org.apache.hive.service.CompositeService;
+import org.apache.hive.service.cli.CLIService;
+import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService;
+import org.apache.hive.service.cli.thrift.ThriftCLIService;
+import org.apache.hive.service.cli.thrift.ThriftHttpCLIService;
+
+import org.apache.spark.util.ShutdownHookManager;
+
+/**
+ * HiveServer2.
+ *
+ */
+public class HiveServer2 extends CompositeService {
+  private static final Log LOG = LogFactory.getLog(HiveServer2.class);
+
+  private CLIService cliService;
+  private ThriftCLIService thriftCLIService;
+
+  public HiveServer2() {
+    super(HiveServer2.class.getSimpleName());
+    HiveConf.setLoadHiveServer2Config(true);
+  }
+
+  @Override
+  public synchronized void init(HiveConf hiveConf) {
+    cliService = new CLIService(this);
+    addService(cliService);
+    if (isHTTPTransportMode(hiveConf)) {
+      thriftCLIService = new ThriftHttpCLIService(cliService);
+    } else {
+      thriftCLIService = new ThriftBinaryCLIService(cliService);
+    }
+    addService(thriftCLIService);
+    super.init(hiveConf);
+
+    // Add a shutdown hook for catching SIGTERM & SIGINT
+    // this must be higher than the Hadoop Filesystem priority of 10,
+    // which the default priority is.
+    // The signature of the callback must match that of a scala () -> Unit
+    // function
+    ShutdownHookManager.addShutdownHook(
+        new AbstractFunction0<BoxedUnit>() {
+          public BoxedUnit apply() {
+            try {
+              LOG.info("Hive Server Shutdown hook invoked");
+              stop();
+            } catch (Throwable e) {
+              LOG.warn("Ignoring Exception while stopping Hive Server from shutdown hook",
+                  e);
+            }
+            return BoxedUnit.UNIT;
+          }
+        });
+  }
+
+  public static boolean isHTTPTransportMode(HiveConf hiveConf) {
+    String transportMode = System.getenv("HIVE_SERVER2_TRANSPORT_MODE");
+    if (transportMode == null) {
+      transportMode = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE);
+    }
+    if (transportMode != null && (transportMode.equalsIgnoreCase("http"))) {
+      return true;
+    }
+    return false;
+  }
+
+  @Override
+  public synchronized void start() {
+    super.start();
+  }
+
+  @Override
+  public synchronized void stop() {
+    LOG.info("Shutting down HiveServer2");
+    super.stop();
+  }
+
+  private static void startHiveServer2() throws Throwable {
+    long attempts = 0, maxAttempts = 1;
+    while (true) {
+      LOG.info("Starting HiveServer2");
+      HiveConf hiveConf = new HiveConf();
+      maxAttempts = hiveConf.getLongVar(HiveConf.ConfVars.HIVE_SERVER2_MAX_START_ATTEMPTS);
+      HiveServer2 server = null;
+      try {
+        server = new HiveServer2();
+        server.init(hiveConf);
+        server.start();
+        ShimLoader.getHadoopShims().startPauseMonitor(hiveConf);
+        break;
+      } catch (Throwable throwable) {
+        if (server != null) {
+          try {
+            server.stop();
+          } catch (Throwable t) {
+            LOG.info("Exception caught when calling stop of HiveServer2 before retrying start", t);
+          } finally {
+            server = null;
+          }
+        }
+        if (++attempts >= maxAttempts) {
+          throw new Error("Max start attempts " + maxAttempts + " exhausted", throwable);
+        } else {
+          LOG.warn("Error starting HiveServer2 on attempt " + attempts
+              + ", will retry in 60 seconds", throwable);
+          try {
+            Thread.sleep(60L * 1000L);
+          } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+          }
+        }
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    HiveConf.setLoadHiveServer2Config(true);
+    try {
+      ServerOptionsProcessor oproc = new ServerOptionsProcessor("hiveserver2");
+      ServerOptionsProcessorResponse oprocResponse = oproc.parse(args);
+
+      // NOTE: It is critical to do this here so that log4j is reinitialized
+      // before any of the other core hive classes are loaded
+      String initLog4jMessage = LogUtils.initHiveLog4j();
+      LOG.debug(initLog4jMessage);
+      HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG);
+
+      // Log debug message from "oproc" after log4j initialize properly
+      LOG.debug(oproc.getDebugMessage().toString());
+
+      // Call the executor which will execute the appropriate command based on the parsed options
+      oprocResponse.getServerOptionsExecutor().execute();
+    } catch (LogInitializationException e) {
+      LOG.error("Error initializing log: " + e.getMessage(), e);
+      System.exit(-1);
+    }
+  }
+
+  /**
+   * ServerOptionsProcessor.
+   * Process arguments given to HiveServer2 (-hiveconf property=value)
+   * Set properties in System properties
+   * Create an appropriate response object,
+   * which has executor to execute the appropriate command based on the parsed options.
+   */
+  public static class ServerOptionsProcessor {
+    private final Options options = new Options();
+    private org.apache.commons.cli.CommandLine commandLine;
+    private final String serverName;
+    private final StringBuilder debugMessage = new StringBuilder();
+
+    @SuppressWarnings("static-access")
+    public ServerOptionsProcessor(String serverName) {
+      this.serverName = serverName;
+      // -hiveconf x=y
+      options.addOption(OptionBuilder
+          .withValueSeparator()
+          .hasArgs(2)
+          .withArgName("property=value")
+          .withLongOpt("hiveconf")
+          .withDescription("Use value for given property")
+          .create());
+      options.addOption(new Option("H", "help", false, "Print help information"));
+    }
+
+    public ServerOptionsProcessorResponse parse(String[] argv) {
+      try {
+        commandLine = new GnuParser().parse(options, argv);
+        // Process --hiveconf
+        // Get hiveconf param values and set the System property values
+        Properties confProps = commandLine.getOptionProperties("hiveconf");
+        for (String propKey : confProps.stringPropertyNames()) {
+          // save logging message for log4j output latter after log4j initialize properly
+          debugMessage.append("Setting " + propKey + "=" + confProps.getProperty(propKey) + ";\n");
+          System.setProperty(propKey, confProps.getProperty(propKey));
+        }
+
+        // Process --help
+        if (commandLine.hasOption('H')) {
+          return new ServerOptionsProcessorResponse(new HelpOptionExecutor(serverName, options));
+        }
+      } catch (ParseException e) {
+        // Error out & exit - we were not able to parse the args successfully
+        System.err.println("Error starting HiveServer2 with given arguments: ");
+        System.err.println(e.getMessage());
+        System.exit(-1);
+      }
+      // Default executor, when no option is specified
+      return new ServerOptionsProcessorResponse(new StartOptionExecutor());
+    }
+
+    StringBuilder getDebugMessage() {
+      return debugMessage;
+    }
+  }
+
+  /**
+   * The response sent back from {@link ServerOptionsProcessor#parse(String[])}
+   */
+  static class ServerOptionsProcessorResponse {
+    private final ServerOptionsExecutor serverOptionsExecutor;
+
+    ServerOptionsProcessorResponse(ServerOptionsExecutor serverOptionsExecutor) {
+      this.serverOptionsExecutor = serverOptionsExecutor;
+    }
+
+    ServerOptionsExecutor getServerOptionsExecutor() {
+      return serverOptionsExecutor;
+    }
+  }
+
+  /**
+   * The executor interface for running the appropriate HiveServer2 command based on parsed options
+   */
+  interface ServerOptionsExecutor {
+    void execute();
+  }
+
+  /**
+   * HelpOptionExecutor: executes the --help option by printing out the usage
+   */
+  static class HelpOptionExecutor implements ServerOptionsExecutor {
+    private final Options options;
+    private final String serverName;
+
+    HelpOptionExecutor(String serverName, Options options) {
+      this.options = options;
+      this.serverName = serverName;
+    }
+
+    @Override
+    public void execute() {
+      new HelpFormatter().printHelp(serverName, options);
+      System.exit(0);
+    }
+  }
+
+  /**
+   * StartOptionExecutor: starts HiveServer2.
+   * This is the default executor, when no option is specified.
+   */
+  static class StartOptionExecutor implements ServerOptionsExecutor {
+    @Override
+    public void execute() {
+      try {
+        startHiveServer2();
+      } catch (Throwable t) {
+        LOG.fatal("Error starting HiveServer2", t);
+        System.exit(-1);
+      }
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
new file mode 100644
index 000000000000..8ee98103f7ef
--- /dev/null
+++ b/sql/hive-thriftserver/v2.3.4/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.hive.service.server;
+
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.metastore.HiveMetaStore;
+import org.apache.hadoop.hive.metastore.RawStore;
+
+/**
+ * A HiveServer2 thread used to construct new server threads.
+ * In particular, this thread ensures an orderly cleanup,
+ * when killed by its corresponding ExecutorService.
+ */
+public class ThreadWithGarbageCleanup extends Thread {
+  private static final Log LOG = LogFactory.getLog(ThreadWithGarbageCleanup.class);
+
+  Map<Long, RawStore> threadRawStoreMap =
+      ThreadFactoryWithGarbageCleanup.getThreadRawStoreMap();
+
+  public ThreadWithGarbageCleanup(Runnable runnable) {
+    super(runnable);
+  }
+
+  /**
+   * Add any Thread specific garbage cleanup code here.
+   * Currently, it shuts down the RawStore object for this thread if it is not null.
+   */
+  @Override
+  public void finalize() throws Throwable {
+    cleanRawStore();
+    super.finalize();
+  }
+
+  private void cleanRawStore() {
+    Long threadId = this.getId();
+    RawStore threadLocalRawStore = threadRawStoreMap.get(threadId);
+    if (threadLocalRawStore != null) {
+      LOG.debug("RawStore: " + threadLocalRawStore + ", for the thread: " +
+          this.getName()  +  " will be closed now.");
+      threadLocalRawStore.shutdown();
+      threadRawStoreMap.remove(threadId);
+    }
+  }
+
+  /**
+   * Cache the ThreadLocal RawStore object. Called from the corresponding thread.
+   */
+  public void cacheThreadLocalRawStore() {
+    Long threadId = this.getId();
+    RawStore threadLocalRawStore = HiveMetaStore.HMSHandler.getRawStore();
+    if (threadLocalRawStore != null && !threadRawStoreMap.containsKey(threadId)) {
+      LOG.debug("Adding RawStore: " + threadLocalRawStore + ", for the thread: " +
+          this.getName() + " to threadRawStoreMap for future cleanup.");
+      threadRawStoreMap.put(threadId, threadLocalRawStore);
+    }
+  }
+}

From 3e30a988102e162f2702ae223312763a0bdc15eb Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 15 May 2019 15:47:52 -0700
Subject: [PATCH 1063/1072] [SPARK-27674][SQL] the hint should not be dropped
 after cache lookup

## What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/20365 .

#20365 fixed this problem when the hint node is a root node. This PR fixes this problem for all the cases.

## How was this patch tested?

a new test

Closes #24580 from cloud-fan/bug.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../optimizer/EliminateResolvedHint.scala     |  2 +-
 .../spark/sql/execution/CacheManager.scala    | 22 ++++----
 .../apache/spark/sql/CachedTableSuite.scala   | 56 ++++++++++++++-----
 3 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
index 5586690520c2..aebd660a059e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
@@ -56,7 +56,7 @@ object EliminateResolvedHint extends Rule[LogicalPlan] {
    * in this method will be cleaned up later by this rule, and may emit warnings depending on the
    * configurations.
    */
-  private def extractHintsFromPlan(plan: LogicalPlan): (LogicalPlan, Seq[HintInfo]) = {
+  private[sql] def extractHintsFromPlan(plan: LogicalPlan): (LogicalPlan, Seq[HintInfo]) = {
     plan match {
       case h: ResolvedHint =>
         val (plan, hints) = extractHintsFromPlan(h.child)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index b3c253b75c31..a13e6ada83dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -23,7 +23,8 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Dataset, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, SubqueryExpression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression}
+import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.CommandUtils
@@ -212,17 +213,18 @@ class CacheManager extends Logging {
   def useCachedData(plan: LogicalPlan): LogicalPlan = {
     val newPlan = plan transformDown {
       case command: IgnoreCachedData => command
-      // Do not lookup the cache by hint node. Hint node is special, we should ignore it when
-      // canonicalizing plans, so that plans which are same except hint can hit the same cache.
-      // However, we also want to keep the hint info after cache lookup. Here we skip the hint
-      // node, so that the returned caching plan won't replace the hint node and drop the hint info
-      // from the original plan.
-      case hint: ResolvedHint => hint
 
       case currentFragment =>
-        lookupCachedData(currentFragment)
-          .map(_.cachedRepresentation.withOutput(currentFragment.output))
-          .getOrElse(currentFragment)
+        lookupCachedData(currentFragment).map { cached =>
+          // After cache lookup, we should still keep the hints from the input plan.
+          val hints = EliminateResolvedHint.extractHintsFromPlan(currentFragment)._2
+          val cachedPlan = cached.cachedRepresentation.withOutput(currentFragment.output)
+          // The returned hint list is in top-down order, we should create the hint nodes from
+          // right to left.
+          hints.foldRight[LogicalPlan](cachedPlan) { case (hint, p) =>
+            ResolvedHint(p, hint)
+          }
+        }.getOrElse(currentFragment)
     }
 
     newPlan transformAllExpressions {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 76350adb095b..62e77bf76844 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.executor.DataReadMethod.DataReadMethod
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, Join}
+import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, Join, JoinStrategyHint, SHUFFLE_HASH}
 import org.apache.spark.sql.execution.{RDDScanExec, SparkPlan}
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
@@ -938,23 +938,49 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     }
   }
 
-  test("Cache should respect the broadcast hint") {
-    val df = broadcast(spark.range(1000)).cache()
-    val df2 = spark.range(1000).cache()
-    df.count()
-    df2.count()
+  test("Cache should respect the hint") {
+    def testHint(df: Dataset[_], expectedHint: JoinStrategyHint): Unit = {
+      val df2 = spark.range(2000).cache()
+      df2.count()
 
-    // Test the broadcast hint.
-    val joinPlan = df.join(df2, "id").queryExecution.optimizedPlan
-    val hint = joinPlan.collect {
-      case Join(_, _, _, _, hint) => hint
+      def checkHintExists(): Unit = {
+        // Test the broadcast hint.
+        val joinPlan = df.join(df2, "id").queryExecution.optimizedPlan
+        val joinHints = joinPlan.collect {
+          case Join(_, _, _, _, hint) => hint
+        }
+        assert(joinHints.size == 1)
+        assert(joinHints(0).leftHint.get.strategy.contains(expectedHint))
+        assert(joinHints(0).rightHint.isEmpty)
+      }
+
+      // Make sure the hint does exist when `df` is not cached.
+      checkHintExists()
+
+      df.cache()
+      try {
+        df.count()
+        // Make sure the hint still exists when `df` is cached.
+        checkHintExists()
+      } finally {
+        // Clean-up
+        df.unpersist()
+      }
     }
-    assert(hint.size == 1)
-    assert(hint(0).leftHint.get.strategy.contains(BROADCAST))
-    assert(hint(0).rightHint.isEmpty)
 
-    // Clean-up
-    df.unpersist()
+    // The hint is the root node
+    testHint(broadcast(spark.range(1000)), BROADCAST)
+    // The hint is under subquery alias
+    testHint(broadcast(spark.range(1000)).as("df"), BROADCAST)
+    // The hint is under filter
+    testHint(broadcast(spark.range(1000)).filter($"id" > 100), BROADCAST)
+    // If there are 2 adjacent hints, the top one takes effect.
+    testHint(
+      spark.range(1000)
+        .hint("SHUFFLE_MERGE")
+        .hint("SHUFFLE_HASH")
+        .as("df"),
+      SHUFFLE_HASH)
   }
 
   test("analyzes column statistics in cached query") {

From c6a45e6f67abc99d1953d915b96e65a3e2148cf1 Mon Sep 17 00:00:00 2001
From: shivusondur <shivusondur@gmail.com>
Date: Thu, 16 May 2019 18:22:06 +0800
Subject: [PATCH 1064/1072] [SPARK-27722][SQL] removed the unsed
 "UnsafeKeyValueSorter" file.

## What changes were proposed in this pull request?

removed the unused "UnsafeKeyValueSorter.java" file

## How was this patch tested?

Ran Compilation and UT locally.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Closes #24622 from shivusondur/jira27722.

Authored-by: shivusondur <shivusondur@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/UnsafeKeyValueSorter.java   | 30 -------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java
deleted file mode 100644
index 59c774da74ac..000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKeyValueSorter.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution;
-
-import java.io.IOException;
-
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
-import org.apache.spark.unsafe.KVIterator;
-
-public abstract class UnsafeKeyValueSorter {
-
-  public abstract void insert(UnsafeRow key, UnsafeRow value);
-
-  public abstract KVIterator<UnsafeRow, UnsafeRow> sort() throws IOException;
-}

From 6a317c8f014557dfd60931bd1eac3f545520d939 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Thu, 16 May 2019 13:58:27 -0700
Subject: [PATCH 1065/1072] [SPARK-27735][SS] Parsing interval string should be
 case-insensitive in SS

## What changes were proposed in this pull request?

Some APIs in Structured Streaming requires the user to specify an interval. Right now these APIs don't accept upper-case strings.

This PR adds a new method `fromCaseInsensitiveString` to `CalendarInterval` to support paring upper-case strings, and fixes all APIs that need to parse an interval string.

## How was this patch tested?

The new unit test.

Closes #24619 from zsxwing/SPARK-27735.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/unsafe/types/CalendarInterval.java  | 25 +++++++++++++++++++
 .../unsafe/types/CalendarIntervalSuite.java   | 25 +++++++++++++++++++
 .../sql/catalyst/expressions/TimeWindow.scala | 17 +------------
 .../scala/org/apache/spark/sql/Dataset.scala  | 10 ++++++--
 .../execution/streaming/GroupStateImpl.scala  | 17 +------------
 .../continuous/ContinuousTrigger.scala        | 15 +----------
 .../spark/sql/streaming/ProcessingTime.scala  | 15 +----------
 7 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
index 611b2a217aad..e36efa3b0f22 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -18,6 +18,7 @@
 package org.apache.spark.unsafe.types;
 
 import java.io.Serializable;
+import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -66,6 +67,10 @@ private static long toLong(String s) {
     }
   }
 
+  /**
+   * Convert a string to CalendarInterval. Return null if the input string is not a valid interval.
+   * This method is case-sensitive and all characters in the input string should be in lower case.
+   */
   public static CalendarInterval fromString(String s) {
     if (s == null) {
       return null;
@@ -87,6 +92,26 @@ public static CalendarInterval fromString(String s) {
     }
   }
 
+  /**
+   * Convert a string to CalendarInterval. Unlike fromString, this method is case-insensitive and
+   * will throw IllegalArgumentException when the input string is not a valid interval.
+   *
+   * @throws IllegalArgumentException if the string is not a valid internal.
+   */
+  public static CalendarInterval fromCaseInsensitiveString(String s) {
+    if (s == null || s.trim().isEmpty()) {
+      throw new IllegalArgumentException("Interval cannot be null or blank.");
+    }
+    String sInLowerCase = s.trim().toLowerCase(Locale.ROOT);
+    String interval =
+      sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase;
+    CalendarInterval cal = fromString(interval);
+    if (cal == null) {
+      throw new IllegalArgumentException("Invalid interval: " + s);
+    }
+    return cal;
+  }
+
   public static long toLongWithRange(String fieldName,
       String s, long minValue, long maxValue) throws IllegalArgumentException {
     long result = 0;
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
index 1e556913b2dc..994af8f08244 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
@@ -104,6 +104,31 @@ public void fromStringTest() {
     assertNull(fromString(input));
   }
 
+  @Test
+  public void fromCaseInsensitiveStringTest() {
+    for (String input : new String[]{"5 MINUTES", "5 minutes", "5 Minutes"}) {
+      assertEquals(fromCaseInsensitiveString(input), new CalendarInterval(0, 5L * 60 * 1_000_000));
+    }
+
+    for (String input : new String[]{null, "", " "}) {
+      try {
+        fromCaseInsensitiveString(input);
+        fail("Expected to throw an exception for the invalid input");
+      } catch (IllegalArgumentException e) {
+        assertTrue(e.getMessage().contains("cannot be null or blank"));
+      }
+    }
+
+    for (String input : new String[]{"interval", "interval1 day", "foo", "foo 1 day"}) {
+      try {
+        fromCaseInsensitiveString(input);
+        fail("Expected to throw an exception for the invalid input");
+      } catch (IllegalArgumentException e) {
+        assertTrue(e.getMessage().contains("Invalid interval"));
+      }
+    }
+  }
+
   @Test
   public void fromYearMonthStringTest() {
     String input;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
index 8e48856d4607..9aae678deb4b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.commons.lang3.StringUtils
-
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
@@ -104,20 +102,7 @@ object TimeWindow {
    *         precision.
    */
   private def getIntervalInMicroSeconds(interval: String): Long = {
-    if (StringUtils.isBlank(interval)) {
-      throw new IllegalArgumentException(
-        "The window duration, slide duration and start time cannot be null or blank.")
-    }
-    val intervalString = if (interval.startsWith("interval")) {
-      interval
-    } else {
-      "interval " + interval
-    }
-    val cal = CalendarInterval.fromString(intervalString)
-    if (cal == null) {
-      throw new IllegalArgumentException(
-        s"The provided interval ($interval) did not correspond to a valid interval string.")
-    }
+    val cal = CalendarInterval.fromCaseInsensitiveString(interval)
     if (cal.months > 0) {
       throw new IllegalArgumentException(
         s"Intervals greater than a month is not supported ($interval).")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 5d6e5306f174..74cb3e627432 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -695,8 +695,14 @@ class Dataset[T] private[sql](
   // defined on a derived column cannot referenced elsewhere in the plan.
   def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = withTypedPlan {
     val parsedDelay =
-      Option(CalendarInterval.fromString("interval " + delayThreshold))
-        .getOrElse(throw new AnalysisException(s"Unable to parse time delay '$delayThreshold'"))
+      try {
+        CalendarInterval.fromCaseInsensitiveString(delayThreshold)
+      } catch {
+        case e: IllegalArgumentException =>
+          throw new AnalysisException(
+            s"Unable to parse time delay '$delayThreshold'",
+            cause = Some(e))
+      }
     require(parsedDelay.milliseconds >= 0 && parsedDelay.months >= 0,
       s"delay threshold ($delayThreshold) should not be negative.")
     EliminateEventTimeWatermark(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
index fcb230bd088d..dda9d41f630e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.execution.streaming
 import java.sql.Date
 import java.util.concurrent.TimeUnit
 
-import org.apache.commons.lang3.StringUtils
-
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeTimeout, ProcessingTimeTimeout}
 import org.apache.spark.sql.execution.streaming.GroupStateImpl._
 import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout}
@@ -161,20 +159,7 @@ private[sql] class GroupStateImpl[S] private(
   def getTimeoutTimestamp: Long = timeoutTimestamp
 
   private def parseDuration(duration: String): Long = {
-    if (StringUtils.isBlank(duration)) {
-      throw new IllegalArgumentException(
-        "Provided duration is null or blank.")
-    }
-    val intervalString = if (duration.startsWith("interval")) {
-      duration
-    } else {
-      "interval " + duration
-    }
-    val cal = CalendarInterval.fromString(intervalString)
-    if (cal == null) {
-      throw new IllegalArgumentException(
-        s"Provided duration ($duration) is not valid.")
-    }
+    val cal = CalendarInterval.fromCaseInsensitiveString(duration)
     if (cal.milliseconds < 0 || cal.months < 0) {
       throw new IllegalArgumentException(s"Provided duration ($duration) is not positive")
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala
index fd0ff31199bd..bd343f380603 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala
@@ -21,8 +21,6 @@ import java.util.concurrent.TimeUnit
 
 import scala.concurrent.duration.Duration
 
-import org.apache.commons.lang3.StringUtils
-
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.sql.streaming.Trigger
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -38,18 +36,7 @@ case class ContinuousTrigger(intervalMs: Long) extends Trigger {
 
 private[sql] object ContinuousTrigger {
   def apply(interval: String): ContinuousTrigger = {
-    if (StringUtils.isBlank(interval)) {
-      throw new IllegalArgumentException(
-        "interval cannot be null or blank.")
-    }
-    val cal = if (interval.startsWith("interval")) {
-      CalendarInterval.fromString(interval)
-    } else {
-      CalendarInterval.fromString("interval " + interval)
-    }
-    if (cal == null) {
-      throw new IllegalArgumentException(s"Invalid interval: $interval")
-    }
+    val cal = CalendarInterval.fromCaseInsensitiveString(interval)
     if (cal.months > 0) {
       throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
index 38b0776ec1fe..417d698bdbb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
@@ -21,8 +21,6 @@ import java.util.concurrent.TimeUnit
 
 import scala.concurrent.duration.Duration
 
-import org.apache.commons.lang3.StringUtils
-
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -76,18 +74,7 @@ object ProcessingTime {
    */
   @deprecated("use Trigger.ProcessingTime(interval)", "2.2.0")
   def apply(interval: String): ProcessingTime = {
-    if (StringUtils.isBlank(interval)) {
-      throw new IllegalArgumentException(
-        "interval cannot be null or blank.")
-    }
-    val cal = if (interval.startsWith("interval")) {
-      CalendarInterval.fromString(interval)
-    } else {
-      CalendarInterval.fromString("interval " + interval)
-    }
-    if (cal == null) {
-      throw new IllegalArgumentException(s"Invalid interval: $interval")
-    }
+    val cal = CalendarInterval.fromCaseInsensitiveString(interval)
     if (cal.months > 0) {
       throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
     }

From fc5bd6da77d2266a48fedddba792db50459848f8 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 16 May 2019 16:24:53 -0700
Subject: [PATCH 1066/1072] [SPARK-27576][SQL] table capability to skip the
 output column resolution

## What changes were proposed in this pull request?

Currently we have an analyzer rule, which resolves the output columns of data source v2 writing plans, to make sure the schema of input query is compatible with the table.

However, not all data sources need this check. For example, the `NoopDataSource` doesn't care about the schema of input query at all.

This PR introduces a new table capability: ACCEPT_ANY_SCHEMA. If a table reports this capability, we skip resolving output columns for it during write.

Note that, we already skip resolving output columns for `NoopDataSource` because it implements `SupportsSaveMode`. However, `SupportsSaveMode` is a hack and will be removed soon.

## How was this patch tested?

new test cases

Closes #24469 from cloud-fan/schema-check.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/sources/v2/TableCapability.java |  7 ++++-
 .../sql/catalyst/analysis/NamedRelation.scala |  3 ++
 .../plans/logical/basicLogicalOperators.scala |  9 ++++--
 .../analysis/DataSourceV2AnalysisSuite.scala  | 31 +++++++++++++++++--
 .../datasources/noop/NoopDataSource.scala     |  5 ++-
 .../datasources/v2/DataSourceV2Relation.scala |  2 ++
 6 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
index 7fff09fae6a3..c44a12b174f4 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
@@ -84,5 +84,10 @@ public enum TableCapability {
    * <p>
    * See {@code org.apache.spark.sql.sources.v2.writer.SupportsDynamicOverwrite}.
    */
-  OVERWRITE_DYNAMIC
+  OVERWRITE_DYNAMIC,
+
+  /**
+   * Signals that the table accepts input of any schema in a write operation.
+   */
+  ACCEPT_ANY_SCHEMA
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala
index ad201f947b67..56b8d84441c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala
@@ -21,4 +21,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
 trait NamedRelation extends LogicalPlan {
   def name: String
+
+  // When false, the schema of input data must match the schema of this relation, during write.
+  def skipSchemaResolution: Boolean = false
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 2b98132f188f..1925d4559190 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -393,14 +393,17 @@ trait V2WriteCommand extends Command {
   override lazy val resolved: Boolean = outputResolved
 
   def outputResolved: Boolean = {
-    table.resolved && query.resolved && query.output.size == table.output.size &&
+    // If the table doesn't require schema match, we don't need to resolve the output columns.
+    table.skipSchemaResolution || {
+      table.resolved && query.resolved && query.output.size == table.output.size &&
         query.output.zip(table.output).forall {
           case (inAttr, outAttr) =>
             // names and types must match, nullability must be compatible
             inAttr.name == outAttr.name &&
-                DataType.equalsIgnoreCompatibleNullability(outAttr.dataType, inAttr.dataType) &&
-                (outAttr.nullable || !inAttr.nullable)
+              DataType.equalsIgnoreCompatibleNullability(outAttr.dataType, inAttr.dataType) &&
+              (outAttr.nullable || !inAttr.nullable)
         }
+    }
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index 0c4854861426..48b43fcccace 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.catalyst.analysis
 import java.util.Locale
 
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, LessThanOrEqual, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LeafNode, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Project}
-import org.apache.spark.sql.types.{DoubleType, FloatType, StructField, StructType}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types._
 
 class V2AppendDataAnalysisSuite extends DataSourceV2AnalysisSuite {
   override def byName(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
@@ -104,6 +104,12 @@ case class TestRelation(output: Seq[AttributeReference]) extends LeafNode with N
   override def name: String = "table-name"
 }
 
+case class TestRelationAcceptAnySchema(output: Seq[AttributeReference])
+  extends LeafNode with NamedRelation {
+  override def name: String = "test-name"
+  override def skipSchemaResolution: Boolean = true
+}
+
 abstract class DataSourceV2AnalysisSuite extends AnalysisTest {
   val table = TestRelation(StructType(Seq(
     StructField("x", FloatType),
@@ -446,6 +452,27 @@ abstract class DataSourceV2AnalysisSuite extends AnalysisTest {
       "Cannot safely cast", "'x'", "DoubleType to FloatType"))
   }
 
+  test("bypass output column resolution") {
+    val table = TestRelationAcceptAnySchema(StructType(Seq(
+      StructField("a", FloatType, nullable = false),
+      StructField("b", DoubleType))).toAttributes)
+
+    val query = TestRelation(StructType(Seq(
+      StructField("s", StringType))).toAttributes)
+
+    withClue("byName") {
+      val parsedPlan = byName(table, query)
+      assertResolved(parsedPlan)
+      checkAnalysis(parsedPlan, parsedPlan)
+    }
+
+    withClue("byPosition") {
+      val parsedPlan = byPosition(table, query)
+      assertResolved(parsedPlan)
+      checkAnalysis(parsedPlan, parsedPlan)
+    }
+  }
+
   def assertNotResolved(logicalPlan: LogicalPlan): Unit = {
     assert(!logicalPlan.resolved, s"Plan should not be resolved: $logicalPlan")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index e91e2b48db48..6b4efaf303c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -44,7 +44,10 @@ private[noop] object NoopTable extends Table with SupportsWrite {
   override def name(): String = "noop-table"
   override def schema(): StructType = new StructType()
   override def capabilities(): util.Set[TableCapability] = {
-    Set(TableCapability.BATCH_WRITE, TableCapability.STREAMING_WRITE).asJava
+    Set(
+      TableCapability.BATCH_WRITE,
+      TableCapability.STREAMING_WRITE,
+      TableCapability.ACCEPT_ANY_SCHEMA).asJava
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index e7e0be0dddf4..27875b3f7923 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -44,6 +44,8 @@ case class DataSourceV2Relation(
 
   override def name: String = table.name()
 
+  override def skipSchemaResolution: Boolean = table.supports(TableCapability.ACCEPT_ANY_SCHEMA)
+
   override def simpleString(maxFields: Int): String = {
     s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
   }

From 9e0d8c6ce2453e803b55436876820d471f5592b7 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Thu, 16 May 2019 20:45:13 -0700
Subject: [PATCH 1067/1072] [SPARK-27752][CORE] Upgrade lz4-java from 1.5.1 to
 1.6.0

## What changes were proposed in this pull request?

This PR upgrades lz4-java from 1.5.1 to 1.6.0. Lz4-java is available at https://github.com/lz4/lz4-java.

Changes from 1.5.1:
- Upgraded LZ4 to 1.9.1. Updated the JNI bindings, except for the one for Linux/i386. Decompression speed is improved on amd64.
- Deprecated use of LZ4FastDecompressor of a native instance because the corresponding C API function is deprecated. See the release note of LZ4 1.9.0 for details. Updated javadoc accordingly.
- Changed the module name from org.lz4.lz4-java to org.lz4.java to avoid using - in the module name. (severn-everett, Oliver Eikemeier, Rei Odaira)
- Enabled build with Java 11. Note that the distribution is still built with Java 7. (Rei Odaira)

## How was this patch tested?

Existing tests.

Closes #24629 from kiszk/SPARK-27752.

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.2 | 2 +-
 pom.xml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 8384fadfb497..7c0dcae9d37c 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -139,7 +139,7 @@ libfb303-0.9.3.jar
 libthrift-0.12.0.jar
 log4j-1.2.17.jar
 logging-interceptor-3.12.0.jar
-lz4-java-1.5.1.jar
+lz4-java-1.6.0.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
 mesos-1.4.0-shaded-protobuf.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index 643ba2fba4ec..a8d254177a05 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -156,7 +156,7 @@ libfb303-0.9.3.jar
 libthrift-0.12.0.jar
 log4j-1.2.17.jar
 logging-interceptor-3.12.0.jar
-lz4-java-1.5.1.jar
+lz4-java-1.6.0.jar
 machinist_2.12-0.6.1.jar
 macro-compat_2.12-1.1.1.jar
 mesos-1.4.0-shaded-protobuf.jar
diff --git a/pom.xml b/pom.xml
index 322528ad8fa4..2491a200cba5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -594,7 +594,7 @@
       <dependency>
         <groupId>org.lz4</groupId>
         <artifactId>lz4-java</artifactId>
-        <version>1.5.1</version>
+        <version>1.6.0</version>
       </dependency>
       <dependency>
         <groupId>com.github.luben</groupId>

From e39e97b73aecf2fdf1486696473d742891f8e6ca Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 17 May 2019 19:25:24 +0800
Subject: [PATCH 1068/1072] [SPARK-27699][SQL] Partially push down disjunctive
 predicated in Parquet/ORC

## What changes were proposed in this pull request?

Currently, in `ParquetFilters` and `OrcFilters`, if the child predicate of `Or` operator can't be entirely pushed down, the predicates will be thrown away.
In fact, the conjunctive predicates under `Or` operators can be partially pushed down.
For example, says `a` and `b` are convertible, while `c` can't be pushed down, the predicate
`a or (b and c)`
can be converted as
`(a or b) and (a or c)`
We can still push down `(a or b)`.
We can't push down disjunctive predicates only when one of its children is not partially convertible.

This PR also improve the filter pushing down logic in `DataSourceV2Strategy`. With partial filter push down in `Or` operator, the result of `pushedFilters()` might not exist in the mapping `translatedFilterToExpr`.  To fix it, this PR changes the mapping `translatedFilterToExpr` as leaf filter expression to `sources.filter`, and later on rebuild the whole expression with the mapping.
## How was this patch tested?

Unit test

Closes #24598 from gengliangwang/pushdownDisjunctivePredicates.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/DataSourceStrategy.scala      | 179 +++++++++++-------
 .../datasources/parquet/ParquetFilters.scala  |  15 +-
 .../datasources/v2/DataSourceV2Strategy.scala |  24 ++-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  28 ++-
 .../parquet/ParquetFilterSuite.scala          |  85 ++++++---
 .../datasources/orc/OrcFilters.scala          |  59 +++++-
 .../datasources/orc/OrcFilterSuite.scala      |  60 ++++--
 .../datasources/orc/OrcFilters.scala          |  60 +++++-
 .../datasources/orc/OrcFilterSuite.scala      |  60 ++++--
 .../spark/sql/hive/orc/OrcFilters.scala       |  19 +-
 .../sql/hive/orc/HiveOrcFilterSuite.scala     |  60 ++++--
 11 files changed, 489 insertions(+), 160 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index c907ac21af38..b8bed8569ace 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources
 
 import java.util.Locale
 
+import scala.collection.mutable
+
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
@@ -436,60 +438,94 @@ object DataSourceStrategy {
     }
   }
 
+  private def translateLeafNodeFilter(predicate: Expression): Option[Filter] = predicate match {
+    case expressions.EqualTo(a: Attribute, Literal(v, t)) =>
+      Some(sources.EqualTo(a.name, convertToScala(v, t)))
+    case expressions.EqualTo(Literal(v, t), a: Attribute) =>
+      Some(sources.EqualTo(a.name, convertToScala(v, t)))
+
+    case expressions.EqualNullSafe(a: Attribute, Literal(v, t)) =>
+      Some(sources.EqualNullSafe(a.name, convertToScala(v, t)))
+    case expressions.EqualNullSafe(Literal(v, t), a: Attribute) =>
+      Some(sources.EqualNullSafe(a.name, convertToScala(v, t)))
+
+    case expressions.GreaterThan(a: Attribute, Literal(v, t)) =>
+      Some(sources.GreaterThan(a.name, convertToScala(v, t)))
+    case expressions.GreaterThan(Literal(v, t), a: Attribute) =>
+      Some(sources.LessThan(a.name, convertToScala(v, t)))
+
+    case expressions.LessThan(a: Attribute, Literal(v, t)) =>
+      Some(sources.LessThan(a.name, convertToScala(v, t)))
+    case expressions.LessThan(Literal(v, t), a: Attribute) =>
+      Some(sources.GreaterThan(a.name, convertToScala(v, t)))
+
+    case expressions.GreaterThanOrEqual(a: Attribute, Literal(v, t)) =>
+      Some(sources.GreaterThanOrEqual(a.name, convertToScala(v, t)))
+    case expressions.GreaterThanOrEqual(Literal(v, t), a: Attribute) =>
+      Some(sources.LessThanOrEqual(a.name, convertToScala(v, t)))
+
+    case expressions.LessThanOrEqual(a: Attribute, Literal(v, t)) =>
+      Some(sources.LessThanOrEqual(a.name, convertToScala(v, t)))
+    case expressions.LessThanOrEqual(Literal(v, t), a: Attribute) =>
+      Some(sources.GreaterThanOrEqual(a.name, convertToScala(v, t)))
+
+    case expressions.InSet(a: Attribute, set) =>
+      val toScala = CatalystTypeConverters.createToScalaConverter(a.dataType)
+      Some(sources.In(a.name, set.toArray.map(toScala)))
+
+    // Because we only convert In to InSet in Optimizer when there are more than certain
+    // items. So it is possible we still get an In expression here that needs to be pushed
+    // down.
+    case expressions.In(a: Attribute, list) if list.forall(_.isInstanceOf[Literal]) =>
+      val hSet = list.map(_.eval(EmptyRow))
+      val toScala = CatalystTypeConverters.createToScalaConverter(a.dataType)
+      Some(sources.In(a.name, hSet.toArray.map(toScala)))
+
+    case expressions.IsNull(a: Attribute) =>
+      Some(sources.IsNull(a.name))
+    case expressions.IsNotNull(a: Attribute) =>
+      Some(sources.IsNotNull(a.name))
+    case expressions.StartsWith(a: Attribute, Literal(v: UTF8String, StringType)) =>
+      Some(sources.StringStartsWith(a.name, v.toString))
+
+    case expressions.EndsWith(a: Attribute, Literal(v: UTF8String, StringType)) =>
+      Some(sources.StringEndsWith(a.name, v.toString))
+
+    case expressions.Contains(a: Attribute, Literal(v: UTF8String, StringType)) =>
+      Some(sources.StringContains(a.name, v.toString))
+
+    case expressions.Literal(true, BooleanType) =>
+      Some(sources.AlwaysTrue)
+
+    case expressions.Literal(false, BooleanType) =>
+      Some(sources.AlwaysFalse)
+
+    case _ => None
+  }
+
   /**
    * Tries to translate a Catalyst [[Expression]] into data source [[Filter]].
    *
    * @return a `Some[Filter]` if the input [[Expression]] is convertible, otherwise a `None`.
    */
   protected[sql] def translateFilter(predicate: Expression): Option[Filter] = {
-    predicate match {
-      case expressions.EqualTo(a: Attribute, Literal(v, t)) =>
-        Some(sources.EqualTo(a.name, convertToScala(v, t)))
-      case expressions.EqualTo(Literal(v, t), a: Attribute) =>
-        Some(sources.EqualTo(a.name, convertToScala(v, t)))
-
-      case expressions.EqualNullSafe(a: Attribute, Literal(v, t)) =>
-        Some(sources.EqualNullSafe(a.name, convertToScala(v, t)))
-      case expressions.EqualNullSafe(Literal(v, t), a: Attribute) =>
-        Some(sources.EqualNullSafe(a.name, convertToScala(v, t)))
-
-      case expressions.GreaterThan(a: Attribute, Literal(v, t)) =>
-        Some(sources.GreaterThan(a.name, convertToScala(v, t)))
-      case expressions.GreaterThan(Literal(v, t), a: Attribute) =>
-        Some(sources.LessThan(a.name, convertToScala(v, t)))
-
-      case expressions.LessThan(a: Attribute, Literal(v, t)) =>
-        Some(sources.LessThan(a.name, convertToScala(v, t)))
-      case expressions.LessThan(Literal(v, t), a: Attribute) =>
-        Some(sources.GreaterThan(a.name, convertToScala(v, t)))
-
-      case expressions.GreaterThanOrEqual(a: Attribute, Literal(v, t)) =>
-        Some(sources.GreaterThanOrEqual(a.name, convertToScala(v, t)))
-      case expressions.GreaterThanOrEqual(Literal(v, t), a: Attribute) =>
-        Some(sources.LessThanOrEqual(a.name, convertToScala(v, t)))
-
-      case expressions.LessThanOrEqual(a: Attribute, Literal(v, t)) =>
-        Some(sources.LessThanOrEqual(a.name, convertToScala(v, t)))
-      case expressions.LessThanOrEqual(Literal(v, t), a: Attribute) =>
-        Some(sources.GreaterThanOrEqual(a.name, convertToScala(v, t)))
-
-      case expressions.InSet(a: Attribute, set) =>
-        val toScala = CatalystTypeConverters.createToScalaConverter(a.dataType)
-        Some(sources.In(a.name, set.toArray.map(toScala)))
-
-      // Because we only convert In to InSet in Optimizer when there are more than certain
-      // items. So it is possible we still get an In expression here that needs to be pushed
-      // down.
-      case expressions.In(a: Attribute, list) if list.forall(_.isInstanceOf[Literal]) =>
-        val hSet = list.map(_.eval(EmptyRow))
-        val toScala = CatalystTypeConverters.createToScalaConverter(a.dataType)
-        Some(sources.In(a.name, hSet.toArray.map(toScala)))
-
-      case expressions.IsNull(a: Attribute) =>
-        Some(sources.IsNull(a.name))
-      case expressions.IsNotNull(a: Attribute) =>
-        Some(sources.IsNotNull(a.name))
+    translateFilterWithMapping(predicate, None)
+  }
 
+  /**
+   * Tries to translate a Catalyst [[Expression]] into data source [[Filter]].
+   *
+   * @param predicate The input [[Expression]] to be translated as [[Filter]]
+   * @param translatedFilterToExpr An optional map from leaf node filter expressions to its
+   *                               translated [[Filter]]. The map is used for rebuilding
+   *                               [[Expression]] from [[Filter]].
+   * @return a `Some[Filter]` if the input [[Expression]] is convertible, otherwise a `None`.
+   */
+  protected[sql] def translateFilterWithMapping(
+      predicate: Expression,
+      translatedFilterToExpr: Option[mutable.HashMap[sources.Filter, Expression]])
+    : Option[Filter] = {
+    predicate match {
       case expressions.And(left, right) =>
         // See SPARK-12218 for detailed discussion
         // It is not safe to just convert one side if we do not understand the
@@ -501,35 +537,44 @@ object DataSourceStrategy {
         // Pushing one leg of AND down is only safe to do at the top level.
         // You can see ParquetFilters' createFilter for more details.
         for {
-          leftFilter <- translateFilter(left)
-          rightFilter <- translateFilter(right)
+          leftFilter <- translateFilterWithMapping(left, translatedFilterToExpr)
+          rightFilter <- translateFilterWithMapping(right, translatedFilterToExpr)
         } yield sources.And(leftFilter, rightFilter)
 
       case expressions.Or(left, right) =>
         for {
-          leftFilter <- translateFilter(left)
-          rightFilter <- translateFilter(right)
+          leftFilter <- translateFilterWithMapping(left, translatedFilterToExpr)
+          rightFilter <- translateFilterWithMapping(right, translatedFilterToExpr)
         } yield sources.Or(leftFilter, rightFilter)
 
       case expressions.Not(child) =>
-        translateFilter(child).map(sources.Not)
-
-      case expressions.StartsWith(a: Attribute, Literal(v: UTF8String, StringType)) =>
-        Some(sources.StringStartsWith(a.name, v.toString))
-
-      case expressions.EndsWith(a: Attribute, Literal(v: UTF8String, StringType)) =>
-        Some(sources.StringEndsWith(a.name, v.toString))
-
-      case expressions.Contains(a: Attribute, Literal(v: UTF8String, StringType)) =>
-        Some(sources.StringContains(a.name, v.toString))
-
-      case expressions.Literal(true, BooleanType) =>
-        Some(sources.AlwaysTrue)
+        translateFilterWithMapping(child, translatedFilterToExpr).map(sources.Not)
 
-      case expressions.Literal(false, BooleanType) =>
-        Some(sources.AlwaysFalse)
+      case other =>
+        val filter = translateLeafNodeFilter(other)
+        if (filter.isDefined && translatedFilterToExpr.isDefined) {
+          translatedFilterToExpr.get(filter.get) = predicate
+        }
+        filter
+    }
+  }
 
-      case _ => None
+  protected[sql] def rebuildExpressionFromFilter(
+      filter: Filter,
+      translatedFilterToExpr: mutable.HashMap[sources.Filter, Expression]): Expression = {
+    filter match {
+      case sources.And(left, right) =>
+        expressions.And(rebuildExpressionFromFilter(left, translatedFilterToExpr),
+          rebuildExpressionFromFilter(right, translatedFilterToExpr))
+      case sources.Or(left, right) =>
+        expressions.Or(rebuildExpressionFromFilter(left, translatedFilterToExpr),
+          rebuildExpressionFromFilter(right, translatedFilterToExpr))
+      case sources.Not(pred) =>
+        expressions.Not(rebuildExpressionFromFilter(pred, translatedFilterToExpr))
+      case other =>
+        translatedFilterToExpr.getOrElse(other,
+          throw new AnalysisException(
+            s"Fail to rebuild expression: missing key $filter in `translatedFilterToExpr`"))
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
index 21ab9c78e53d..22d71659b928 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
@@ -527,11 +527,22 @@ private[parquet] class ParquetFilters(
         }
 
       case sources.Or(lhs, rhs) =>
+        // The Or predicate is convertible when both of its children can be pushed down.
+        // That is to say, if one/both of the children can be partially pushed down, the Or
+        // predicate can be partially pushed down as well.
+        //
+        // Here is an example used to explain the reason.
+        // Let's say we have
+        // (a1 AND a2) OR (b1 AND b2),
+        // a1 and b1 is convertible, while a2 and b2 is not.
+        // The predicate can be converted as
+        // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
+        // As per the logical in And predicate, we can push down (a1 OR b1).
         for {
           lhsFilter <-
-            createFilterHelper(nameToParquetField, lhs, canPartialPushDownConjuncts = false)
+            createFilterHelper(nameToParquetField, lhs, canPartialPushDownConjuncts)
           rhsFilter <-
-            createFilterHelper(nameToParquetField, rhs, canPartialPushDownConjuncts = false)
+            createFilterHelper(nameToParquetField, rhs, canPartialPushDownConjuncts)
         } yield FilterApi.or(lhsFilter, rhsFilter)
 
       case sources.Not(pred) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 4325aaa4e993..92596aca5190 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -44,27 +44,35 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       filters: Seq[Expression]): (Seq[Expression], Seq[Expression]) = {
     scanBuilder match {
       case r: SupportsPushDownFilters =>
-        // A map from translated data source filters to original catalyst filter expressions.
+        // A map from translated data source leaf node filters to original catalyst filter
+        // expressions. For a `And`/`Or` predicate, it is possible that the predicate is partially
+        // pushed down. This map can be used to construct a catalyst filter expression from the
+        // input filter, or a superset(partial push down filter) of the input filter.
         val translatedFilterToExpr = mutable.HashMap.empty[sources.Filter, Expression]
+        val translatedFilters = mutable.ArrayBuffer.empty[sources.Filter]
         // Catalyst filter expression that can't be translated to data source filters.
         val untranslatableExprs = mutable.ArrayBuffer.empty[Expression]
 
         for (filterExpr <- filters) {
-          val translated = DataSourceStrategy.translateFilter(filterExpr)
-          if (translated.isDefined) {
-            translatedFilterToExpr(translated.get) = filterExpr
-          } else {
+          val translated =
+            DataSourceStrategy.translateFilterWithMapping(filterExpr, Some(translatedFilterToExpr))
+          if (translated.isEmpty) {
             untranslatableExprs += filterExpr
+          } else {
+            translatedFilters += translated.get
           }
         }
 
         // Data source filters that need to be evaluated again after scanning. which means
         // the data source cannot guarantee the rows returned can pass these filters.
         // As a result we must return it so Spark can plan an extra filter operator.
-        val postScanFilters = r.pushFilters(translatedFilterToExpr.keys.toArray)
-          .map(translatedFilterToExpr)
+        val postScanFilters = r.pushFilters(translatedFilters.toArray).map { filter =>
+          DataSourceStrategy.rebuildExpressionFromFilter(filter, translatedFilterToExpr)
+        }
         // The filters which are marked as pushed to this data source
-        val pushedFilters = r.pushedFilters().map(translatedFilterToExpr)
+        val pushedFilters = r.pushedFilters().map { filter =>
+          DataSourceStrategy.rebuildExpressionFromFilter(filter, translatedFilterToExpr)
+        }
         (pushedFilters, untranslatableExprs ++ postScanFilters)
 
       case _ => (Nil, filters)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 6ae77a8e68c9..da2645ccca96 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -26,7 +26,8 @@ import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SortAggregateExec}
-import org.apache.spark.sql.execution.datasources.FilePartition
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -2978,6 +2979,31 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("SPARK-27699 Validate pushed down filters") {
+    def checkPushedFilters(df: DataFrame, filters: Array[sources.Filter]): Unit = {
+      val scan = df.queryExecution.sparkPlan
+        .find(_.isInstanceOf[BatchScanExec]).get.asInstanceOf[BatchScanExec]
+        .scan
+      assert(scan.isInstanceOf[OrcScan])
+      assert(scan.asInstanceOf[OrcScan].pushedFilters === filters)
+    }
+    withSQLConf(SQLConf.USE_V1_SOURCE_READER_LIST.key -> "") {
+      withTempPath { dir =>
+        spark.range(10).map(i => (i, i.toString)).toDF("id", "s").write.orc(dir.getCanonicalPath)
+        val df = spark.read.orc(dir.getCanonicalPath)
+        checkPushedFilters(
+          df.where(('id < 2 and 's.contains("foo")) or ('id > 10 and 's.contains("bar"))),
+          Array(sources.Or(sources.LessThan("id", 2), sources.GreaterThan("id", 10))))
+        checkPushedFilters(
+          df.where('s.contains("foo") or ('id > 10 and 's.contains("bar"))),
+          Array.empty)
+        checkPushedFilters(
+          df.where('id < 2 and not('id > 10 and 's.contains("bar"))),
+          Array(sources.IsNotNull("id"), sources.LessThan("id", 2)))
+      }
+    }
+  }
+
   test("SPARK-26709: OptimizeMetadataOnlyQuery does not handle empty records correctly") {
     Seq(true, false).foreach { enableOptimizeMetadataOnlyQuery =>
       withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> enableOptimizeMetadataOnlyQuery.toString) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 255f7db8d135..f2b92ae61a52 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -827,34 +827,6 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
           )))
     }
 
-    // Testing
-    // case sources.Or(lhs, rhs) =>
-    //   ...
-    //     lhsFilter <- createFilterHelper(nameToParquetField, lhs, canRemoveOneSideInAnd = false)
-    assertResult(None) {
-      parquetFilters.createFilter(
-        parquetSchema,
-        sources.Or(
-          sources.And(
-            sources.GreaterThan("a", 1),
-            sources.StringContains("b", "prefix")),
-          sources.GreaterThan("a", 2)))
-    }
-
-    // Testing
-    // case sources.Or(lhs, rhs) =>
-    //   ...
-    //     rhsFilter <- createFilterHelper(nameToParquetField, rhs, canRemoveOneSideInAnd = false)
-    assertResult(None) {
-      parquetFilters.createFilter(
-        parquetSchema,
-        sources.Or(
-          sources.GreaterThan("a", 2),
-          sources.And(
-            sources.GreaterThan("a", 1),
-            sources.StringContains("b", "prefix"))))
-    }
-
     // Testing
     // case sources.Not(pred) =>
     //   createFilterHelper(nameToParquetField, pred, canRemoveOneSideInAnd = false)
@@ -938,6 +910,63 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
     }
   }
 
+  test("SPARK-27699 Converting disjunctions into Parquet filter predicates") {
+    val schema = StructType(Seq(
+      StructField("a", IntegerType, nullable = false),
+      StructField("b", StringType, nullable = true),
+      StructField("c", DoubleType, nullable = true)
+    ))
+
+    val parquetSchema = new SparkToParquetSchemaConverter(conf).convert(schema)
+    // Testing
+    // case sources.Or(lhs, rhs) =>
+    //   ...
+    //     lhsFilter <- createFilterHelper(nameToParquetField, lhs, canRemoveOneSideInAnd = true)
+    assertResult(Some(
+      FilterApi.or(gt(intColumn("a"), 1: Integer), gt(intColumn("a"), 2: Integer)))) {
+      parquetFilters.createFilter(
+        parquetSchema,
+        sources.Or(
+          sources.And(
+            sources.GreaterThan("a", 1),
+            sources.StringContains("b", "prefix")),
+          sources.GreaterThan("a", 2)))
+    }
+
+    // Testing
+    // case sources.Or(lhs, rhs) =>
+    //   ...
+    //     rhsFilter <- createFilterHelper(nameToParquetField, rhs, canRemoveOneSideInAnd = true)
+    assertResult(Some(
+      FilterApi.or(gt(intColumn("a"), 2: Integer), gt(intColumn("a"), 1: Integer)))) {
+      parquetFilters.createFilter(
+        parquetSchema,
+        sources.Or(
+          sources.GreaterThan("a", 2),
+          sources.And(
+            sources.GreaterThan("a", 1),
+            sources.StringContains("b", "prefix"))))
+    }
+
+    // Testing
+    // case sources.Or(lhs, rhs) =>
+    //   ...
+    //     lhsFilter <- createFilterHelper(nameToParquetField, lhs, canRemoveOneSideInAnd = true)
+    //     rhsFilter <- createFilterHelper(nameToParquetField, rhs, canRemoveOneSideInAnd = true)
+    assertResult(Some(
+      FilterApi.or(gt(intColumn("a"), 1: Integer), lt(intColumn("a"), 0: Integer)))) {
+      parquetFilters.createFilter(
+        parquetSchema,
+        sources.Or(
+          sources.And(
+            sources.GreaterThan("a", 1),
+            sources.StringContains("b", "prefix")),
+          sources.And(
+            sources.LessThan("a", 0),
+            sources.StringContains("b", "foobar"))))
+    }
+  }
+
   test("SPARK-16371 Do not push down filters when inner name and outer name are the same") {
     withParquetDataFrame((1 to 4).map(i => Tuple1(Tuple1(i)))) { implicit df =>
       // Here the schema becomes as below:
diff --git a/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 112dcb2cb238..9e4bf22ff7e3 100644
--- a/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -75,10 +75,42 @@ private[sql] object OrcFilters extends OrcFiltersBase {
       schema: StructType,
       dataTypeMap: Map[String, DataType],
       filters: Seq[Filter]): Seq[Filter] = {
-    for {
-      filter <- filters
-      _ <- buildSearchArgument(dataTypeMap, filter, newBuilder())
-    } yield filter
+    import org.apache.spark.sql.sources._
+
+    def convertibleFiltersHelper(
+        filter: Filter,
+        canPartialPushDown: Boolean): Option[Filter] = filter match {
+      case And(left, right) =>
+        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
+        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
+        (leftResultOptional, rightResultOptional) match {
+          case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, rightResult))
+          case (Some(leftResult), None) if canPartialPushDown => Some(leftResult)
+          case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult)
+          case _ => None
+        }
+
+      case Or(left, right) =>
+        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
+        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
+        if (leftResultOptional.isEmpty || rightResultOptional.isEmpty) {
+          None
+        } else {
+          Some(Or(leftResultOptional.get, rightResultOptional.get))
+        }
+      case Not(pred) =>
+        val resultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false)
+        resultOptional.map(Not)
+      case other =>
+        if (buildSearchArgument(dataTypeMap, other, newBuilder()).isDefined) {
+          Some(other)
+        } else {
+          None
+        }
+    }
+    filters.flatMap { filter =>
+      convertibleFiltersHelper(filter, true)
+    }
   }
 
   /**
@@ -175,12 +207,23 @@ private[sql] object OrcFilters extends OrcFiltersBase {
         }
 
       case Or(left, right) =>
+        // The Or predicate is convertible when both of its children can be pushed down.
+        // That is to say, if one/both of the children can be partially pushed down, the Or
+        // predicate can be partially pushed down as well.
+        //
+        // Here is an example used to explain the reason.
+        // Let's say we have
+        // (a1 AND a2) OR (b1 AND b2),
+        // a1 and b1 is convertible, while a2 and b2 is not.
+        // The predicate can be converted as
+        // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
+        // As per the logical in And predicate, we can push down (a1 OR b1).
         for {
-          _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts = false)
-          _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts = false)
+          _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts)
+          _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts)
           lhs <- createBuilder(dataTypeMap, left,
-            builder.startOr(), canPartialPushDownConjuncts = false)
-          rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts = false)
+            builder.startOr(), canPartialPushDownConjuncts)
+          rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts)
         } yield rhs.end()
 
       case Not(child) =>
diff --git a/sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index e96c6fb7716c..535c32396b59 100644
--- a/sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -362,17 +362,6 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
       )).get.toString
     }
 
-    // Can not remove unsupported `StringContains` predicate since it is under `Or` operator.
-    assert(OrcFilters.createFilter(schema, Array(
-      Or(
-        LessThan("a", 10),
-        And(
-          StringContains("b", "prefix"),
-          GreaterThan("a", 1)
-        )
-      )
-    )).isEmpty)
-
     // Safely remove unsupported `StringContains` predicate and push down `LessThan`
     assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
       OrcFilters.createFilter(schema, Array(
@@ -398,6 +387,55 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     }
   }
 
+  test("SPARK-27699 Converting disjunctions into ORC SearchArguments") {
+    import org.apache.spark.sql.sources._
+    // The `LessThan` should be converted while the `StringContains` shouldn't
+    val schema = new StructType(
+      Array(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", StringType, nullable = true)))
+
+    // The predicate `StringContains` predicate is not able to be pushed down.
+    assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," +
+      " expr = (or (not leaf-0) leaf-1)") {
+      OrcFilters.createFilter(schema, Array(
+        Or(
+          GreaterThan("a", 10),
+          And(
+            StringContains("b", "prefix"),
+            LessThan("a", 1)
+          )
+        )
+      )).get.toString
+    }
+
+    assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," +
+      " expr = (or (not leaf-0) leaf-1)") {
+      OrcFilters.createFilter(schema, Array(
+        Or(
+          And(
+            GreaterThan("a", 10),
+            StringContains("b", "foobar")
+          ),
+          And(
+            StringContains("b", "prefix"),
+            LessThan("a", 1)
+          )
+        )
+      )).get.toString
+    }
+
+    assert(OrcFilters.createFilter(schema, Array(
+      Or(
+        StringContains("b", "foobar"),
+        And(
+          StringContains("b", "prefix"),
+          LessThan("a", 1)
+        )
+      )
+    )).isEmpty)
+  }
+
   test("SPARK-27160: Fix casting of the DecimalType literal") {
     import org.apache.spark.sql.sources._
     val schema = StructType(Array(StructField("a", DecimalType(3, 2))))
diff --git a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 85d61bccc8e3..632a72a32abd 100644
--- a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -75,10 +75,42 @@ private[sql] object OrcFilters extends OrcFiltersBase {
       schema: StructType,
       dataTypeMap: Map[String, DataType],
       filters: Seq[Filter]): Seq[Filter] = {
-    for {
-      filter <- filters
-      _ <- buildSearchArgument(dataTypeMap, filter, newBuilder())
-    } yield filter
+    import org.apache.spark.sql.sources._
+
+    def convertibleFiltersHelper(
+        filter: Filter,
+        canPartialPushDown: Boolean): Option[Filter] = filter match {
+      case And(left, right) =>
+        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
+        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
+        (leftResultOptional, rightResultOptional) match {
+          case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, rightResult))
+          case (Some(leftResult), None) if canPartialPushDown => Some(leftResult)
+          case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult)
+          case _ => None
+        }
+
+      case Or(left, right) =>
+        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
+        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
+        if (leftResultOptional.isEmpty || rightResultOptional.isEmpty) {
+          None
+        } else {
+          Some(Or(leftResultOptional.get, rightResultOptional.get))
+        }
+      case Not(pred) =>
+        val resultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false)
+        resultOptional.map(Not)
+      case other =>
+        if (buildSearchArgument(dataTypeMap, other, newBuilder()).isDefined) {
+          Some(other)
+        } else {
+          None
+        }
+    }
+    filters.flatMap { filter =>
+      convertibleFiltersHelper(filter, true)
+    }
   }
 
   /**
@@ -175,12 +207,22 @@ private[sql] object OrcFilters extends OrcFiltersBase {
         }
 
       case Or(left, right) =>
+        // The Or predicate is convertible when both of its children can be pushed down.
+        // That is to say, if one/both of the children can be partially pushed down, the Or
+        // predicate can be partially pushed down as well.
+        //
+        // Here is an example used to explain the reason.
+        // Let's say we have
+        // (a1 AND a2) OR (b1 AND b2),
+        // a1 and b1 is convertible, while a2 and b2 is not.
+        // The predicate can be converted as
+        // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
+        // As per the logical in And predicate, we can push down (a1 OR b1).
         for {
-          _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts = false)
-          _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts = false)
-          lhs <- createBuilder(dataTypeMap, left,
-            builder.startOr(), canPartialPushDownConjuncts = false)
-          rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts = false)
+          _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts)
+          _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts)
+          lhs <- createBuilder(dataTypeMap, left, builder.startOr(), canPartialPushDownConjuncts)
+          rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts)
         } yield rhs.end()
 
       case Not(child) =>
diff --git a/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index 1ed42f1c5f75..0f19c9e40b93 100644
--- a/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -362,17 +362,6 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
       )).get.toString
     }
 
-    // Can not remove unsupported `StringContains` predicate since it is under `Or` operator.
-    assert(OrcFilters.createFilter(schema, Array(
-      Or(
-        LessThan("a", 10),
-        And(
-          StringContains("b", "prefix"),
-          GreaterThan("a", 1)
-        )
-      )
-    )).isEmpty)
-
     // Safely remove unsupported `StringContains` predicate and push down `LessThan`
     assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
       OrcFilters.createFilter(schema, Array(
@@ -398,6 +387,55 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     }
   }
 
+  test("SPARK-27699 Converting disjunctions into ORC SearchArguments") {
+    import org.apache.spark.sql.sources._
+    // The `LessThan` should be converted while the `StringContains` shouldn't
+    val schema = new StructType(
+      Array(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", StringType, nullable = true)))
+
+    // The predicate `StringContains` predicate is not able to be pushed down.
+    assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," +
+      " expr = (or (not leaf-0) leaf-1)") {
+      OrcFilters.createFilter(schema, Array(
+        Or(
+          GreaterThan("a", 10),
+          And(
+            StringContains("b", "prefix"),
+            LessThan("a", 1)
+          )
+        )
+      )).get.toString
+    }
+
+    assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," +
+      " expr = (or (not leaf-0) leaf-1)") {
+      OrcFilters.createFilter(schema, Array(
+        Or(
+          And(
+            GreaterThan("a", 10),
+            StringContains("b", "foobar")
+          ),
+          And(
+            StringContains("b", "prefix"),
+            LessThan("a", 1)
+          )
+        )
+      )).get.toString
+    }
+
+    assert(OrcFilters.createFilter(schema, Array(
+      Or(
+        StringContains("b", "foobar"),
+        And(
+          StringContains("b", "prefix"),
+          LessThan("a", 1)
+        )
+      )
+    )).isEmpty)
+  }
+
   test("SPARK-27160: Fix casting of the DecimalType literal") {
     import org.apache.spark.sql.sources._
     val schema = StructType(Array(StructField("a", DecimalType(3, 2))))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
index dfac73ce62fa..3bfe157f5fe1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
@@ -155,12 +155,23 @@ private[orc] object OrcFilters extends Logging {
         }
 
       case Or(left, right) =>
+        // The Or predicate is convertible when both of its children can be pushed down.
+        // That is to say, if one/both of the children can be partially pushed down, the Or
+        // predicate can be partially pushed down as well.
+        //
+        // Here is an example used to explain the reason.
+        // Let's say we have
+        // (a1 AND a2) OR (b1 AND b2),
+        // a1 and b1 is convertible, while a2 and b2 is not.
+        // The predicate can be converted as
+        // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
+        // As per the logical in And predicate, we can push down (a1 OR b1).
         for {
-          _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts = false)
-          _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts = false)
+          _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts)
+          _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts)
           lhs <- createBuilder(dataTypeMap, left,
-            builder.startOr(), canPartialPushDownConjuncts = false)
-          rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts = false)
+            builder.startOr(), canPartialPushDownConjuncts)
+          rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts)
         } yield rhs.end()
 
       case Not(child) =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
index 6de47a68a9bc..6932a2d6174e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
@@ -399,17 +399,6 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
       )).get.toString
     }
 
-    // Can not remove unsupported `StringContains` predicate since it is under `Or` operator.
-    assert(OrcFilters.createFilter(schema, Array(
-      Or(
-        LessThan("a", 10),
-        And(
-          StringContains("b", "prefix"),
-          GreaterThan("a", 1)
-        )
-      )
-    )).isEmpty)
-
     // Safely remove unsupported `StringContains` predicate and push down `LessThan`
     assertResultWithDiffHiveVersion(
       """leaf-0 = (LESS_THAN a 10)
@@ -442,4 +431,53 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
       )).get.toString
     }
   }
+
+  test("SPARK-27699 Converting disjunctions into ORC SearchArguments") {
+    import org.apache.spark.sql.sources._
+    // The `LessThan` should be converted while the `StringContains` shouldn't
+    val schema = new StructType(
+      Array(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", StringType, nullable = true)))
+
+    // The predicate `StringContains` predicate is not able to be pushed down.
+    assertResult("leaf-0 = (LESS_THAN_EQUALS a 10)\nleaf-1 = (LESS_THAN a 1)\n" +
+      "expr = (or (not leaf-0) leaf-1)") {
+      OrcFilters.createFilter(schema, Array(
+        Or(
+          GreaterThan("a", 10),
+          And(
+            StringContains("b", "prefix"),
+            LessThan("a", 1)
+          )
+        )
+      )).get.toString
+    }
+
+    assertResult("leaf-0 = (LESS_THAN_EQUALS a 10)\nleaf-1 = (LESS_THAN a 1)\n" +
+      "expr = (or (not leaf-0) leaf-1)") {
+      OrcFilters.createFilter(schema, Array(
+        Or(
+          And(
+            GreaterThan("a", 10),
+            StringContains("b", "foobar")
+          ),
+          And(
+            StringContains("b", "prefix"),
+            LessThan("a", 1)
+          )
+        )
+      )).get.toString
+    }
+
+    assert(OrcFilters.createFilter(schema, Array(
+      Or(
+        StringContains("b", "foobar"),
+        And(
+          StringContains("b", "prefix"),
+          LessThan("a", 1)
+        )
+      )
+    )).isEmpty)
+  }
 }

From 141a3bfc8d9842d54fec85083319711fec40b634 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 17 May 2019 08:34:45 -0700
Subject: [PATCH 1069/1072] [SPARK-27755][BUILD] Update zstd-jni to 1.4.0-1

## What changes were proposed in this pull request?

This PR aims to update `zstd-jni` library to `1.4.0-1` which improves the `level 1 compression speed` performance by 6% in most scenarios. The following is the full release note.
- https://github.com/facebook/zstd/releases/tag/v1.4.0

## How was this patch tested?

Pass the Jenkins.

Closes #24632 from dongjoon-hyun/SPARK-27755.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.2 | 2 +-
 pom.xml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 7c0dcae9d37c..d7f6ab642318 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -197,4 +197,4 @@ xmlenc-0.52.jar
 xz-1.5.jar
 zjsonpatch-0.3.0.jar
 zookeeper-3.4.6.jar
-zstd-jni-1.3.8-9.jar
+zstd-jni-1.4.0-1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index a8d254177a05..9f4761968dd7 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -217,4 +217,4 @@ xbean-asm7-shaded-4.13.jar
 xz-1.5.jar
 zjsonpatch-0.3.0.jar
 zookeeper-3.4.13.jar
-zstd-jni-1.3.8-9.jar
+zstd-jni-1.4.0-1.jar
diff --git a/pom.xml b/pom.xml
index 2491a200cba5..2e88e19ca683 100644
--- a/pom.xml
+++ b/pom.xml
@@ -599,7 +599,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.3.8-9</version>
+        <version>1.4.0-1</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>

From 9bca99b29b8ce4f13252d3c07ef4d12d45cb82b4 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 17 May 2019 14:00:17 -0500
Subject: [PATCH 1070/1072] [SPARK-27552][SQL] The configuration
 `hive.exec.stagingdir` is invalid on Windows OS

## What changes were proposed in this pull requesst?
If we set `hive.exec.stagingdir=.test-staging\tmp`,
But the staging directory is still `.hive-staging` on Windows OS.

Reasons for failure:
Test code:
```
 val path = new Path("C:\\test\\hivetable")
  println("path.toString: " + path.toString)
  println("path.toUri.getPath: " + path.toUri.getPath)
```

Output:
```
path.toString: C:/test/hivetable
path.toUri.getPath: /C:/test/hivetable
```
 We can see that `path.toUri.getPath` has one more separator than `path.toString`,  and the separator is   ' / ',  not  ' \ '
So `stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")` will return false
## How was this patch tested?
1. Existed tests
2. Manual testing on Windows OS

Closes #24446 from 10110346/stagingdir.

Authored-by: liuxian <liu.xian3@zte.com.cn>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
---
 .../sql/hive/execution/SaveAsHiveFile.scala   |  7 ++--
 .../apache/spark/sql/hive/InsertSuite.scala   | 32 +++++++++++++++++--
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
index 73b3f20c2cbe..22b1117df98e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
@@ -210,12 +210,11 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
       stagingDir)
   }
 
-  private def getStagingDir(
+  private[hive] def getStagingDir(
       inputPath: Path,
       hadoopConf: Configuration,
       stagingDir: String): Path = {
-    val inputPathUri: URI = inputPath.toUri
-    val inputPathName: String = inputPathUri.getPath
+    val inputPathName: String = inputPath.toString
     val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
     var stagingPathName: String =
       if (inputPathName.indexOf(stagingDir) == -1) {
@@ -228,7 +227,7 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
     // staging directory needs to avoid being deleted when users set hive.exec.stagingdir
     // under the table directory.
     if (isSubDir(new Path(stagingPathName), inputPath, fs) &&
-      !stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) {
+      !stagingPathName.stripPrefix(inputPathName).stripPrefix("/").startsWith(".")) {
       logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
         "with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
         "directory.")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 510de3a7eab5..48d969c5ee9a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -19,12 +19,14 @@ package org.apache.spark.sql.hive
 
 import java.io.File
 
-import org.scalatest.BeforeAndAfter
+import org.apache.hadoop.fs.Path
+import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{QueryTest, _}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable
+import org.apache.spark.sql.hive.execution.InsertIntoHiveTable
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -36,7 +38,7 @@ case class TestData(key: Int, value: String)
 case class ThreeCloumntable(key: Int, value: String, key1: String)
 
 class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
-    with SQLTestUtils {
+    with SQLTestUtils  with PrivateMethodTester  {
   import spark.implicits._
 
   override lazy val testData = spark.sparkContext.parallelize(
@@ -550,6 +552,32 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
     }
   }
 
+  test("SPARK-27552: hive.exec.stagingdir is invalid on Windows OS") {
+    val conf = spark.sessionState.newHadoopConf()
+    val inputPath = new Path("/tmp/b/c")
+    var stagingDir = "tmp/b"
+    val saveHiveFile = InsertIntoHiveTable(null, Map.empty, null, false, false, null)
+    val getStagingDir = PrivateMethod[Path]('getStagingDir)
+    var path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    assert(path.toString.indexOf("/tmp/b_hive_") != -1)
+
+    stagingDir = "tmp/b/c"
+    path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    assert(path.toString.indexOf("/tmp/b/c/.hive-staging_hive_") != -1)
+
+    stagingDir = "d/e"
+    path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    assert(path.toString.indexOf("/tmp/b/c/.hive-staging_hive_") != -1)
+
+    stagingDir = ".d/e"
+    path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    assert(path.toString.indexOf("/tmp/b/c/.d/e_hive_") != -1)
+
+    stagingDir = "/tmp/c/"
+    path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    assert(path.toString.indexOf("/tmp/c_hive_") != -1)
+  }
+
   test("insert overwrite to dir from hive metastore table") {
     withTempDir { dir =>
       val path = dir.toURI.getPath

From e354042d45d08c684e808c5bbeed2bfa5acbd0e0 Mon Sep 17 00:00:00 2001
From: sujithjay <sujith@logistimo.com>
Date: Tue, 21 Aug 2018 13:50:48 +0530
Subject: [PATCH 1071/1072] [SPARK-24985][SQL] Fix OOM in Full Outer Join in
 presence of data skew.

Change SortMergeJoinExec to use ExternalAppendOnlyUnsafeRowArray for Full Outer Join. This would spill data into disk if the buffered rows exceed beyond a threshold, thus preventing OOM errors.

Change corresponding test case in JoinSuite.

[SPARK-24985][SQL] Clean up unused imports

[SPARK-24985][SQL] Remove `copy()` on row instance before adding to `ExternalAppendOnlyUnsafeRowArray`.

`ExternalAppendOnlyUnsafeRowArray` handles copy internally.
---
 .../execution/joins/SortMergeJoinExec.scala   | 64 +++++++++++--------
 .../org/apache/spark/sql/JoinSuite.scala      |  4 +-
 2 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index f829f07e8072..6d87624486fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.joins
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -249,7 +247,9 @@ case class SortMergeJoinExec(
             rightIter = RowIterator.fromScala(rightIter),
             boundCondition,
             leftNullRow,
-            rightNullRow)
+            rightNullRow,
+            inMemoryThreshold,
+            spillThreshold)
 
           new FullOuterIterator(
             smjScanner,
@@ -967,7 +967,9 @@ private class SortMergeFullOuterJoinScanner(
     rightIter: RowIterator,
     boundCondition: InternalRow => Boolean,
     leftNullRow: InternalRow,
-    rightNullRow: InternalRow)  {
+    rightNullRow: InternalRow,
+    inMemoryThreshold: Int,
+    spillThreshold: Int)  {
   private[this] val joinedRow: JoinedRow = new JoinedRow()
   private[this] var leftRow: InternalRow = _
   private[this] var leftRowKey: InternalRow = _
@@ -976,8 +978,10 @@ private class SortMergeFullOuterJoinScanner(
 
   private[this] var leftIndex: Int = 0
   private[this] var rightIndex: Int = 0
-  private[this] val leftMatches: ArrayBuffer[InternalRow] = new ArrayBuffer[InternalRow]
-  private[this] val rightMatches: ArrayBuffer[InternalRow] = new ArrayBuffer[InternalRow]
+  private[this] val leftMatches: ExternalAppendOnlyUnsafeRowArray =
+    new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
+  private[this] val rightMatches: ExternalAppendOnlyUnsafeRowArray =
+    new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
   private[this] var leftMatched: BitSet = new BitSet(1)
   private[this] var rightMatched: BitSet = new BitSet(1)
 
@@ -1029,23 +1033,23 @@ private class SortMergeFullOuterJoinScanner(
     rightIndex = 0
 
     while (leftRowKey != null && keyOrdering.compare(leftRowKey, matchingKey) == 0) {
-      leftMatches += leftRow.copy()
+      leftMatches.add(leftRow.asInstanceOf[UnsafeRow])
       advancedLeft()
     }
     while (rightRowKey != null && keyOrdering.compare(rightRowKey, matchingKey) == 0) {
-      rightMatches += rightRow.copy()
+      rightMatches.add(rightRow.asInstanceOf[UnsafeRow])
       advancedRight()
     }
 
-    if (leftMatches.size <= leftMatched.capacity) {
-      leftMatched.clearUntil(leftMatches.size)
+    if (leftMatches.length <= leftMatched.capacity) {
+      leftMatched.clearUntil(leftMatches.length)
     } else {
-      leftMatched = new BitSet(leftMatches.size)
+      leftMatched = new BitSet(leftMatches.length)
     }
-    if (rightMatches.size <= rightMatched.capacity) {
-      rightMatched.clearUntil(rightMatches.size)
+    if (rightMatches.length <= rightMatched.capacity) {
+      rightMatched.clearUntil(rightMatches.length)
     } else {
-      rightMatched = new BitSet(rightMatches.size)
+      rightMatched = new BitSet(rightMatches.length)
     }
   }
 
@@ -1059,31 +1063,37 @@ private class SortMergeFullOuterJoinScanner(
    * @return true if a valid match is found, false otherwise.
    */
   private def scanNextInBuffered(): Boolean = {
-    while (leftIndex < leftMatches.size) {
-      while (rightIndex < rightMatches.size) {
-        joinedRow(leftMatches(leftIndex), rightMatches(rightIndex))
-        if (boundCondition(joinedRow)) {
-          leftMatched.set(leftIndex)
-          rightMatched.set(rightIndex)
+    val leftMatchesIterator = leftMatches.generateIterator(leftIndex)
+
+    while (leftMatchesIterator.hasNext) {
+      val leftCurRow = leftMatchesIterator.next()
+      val rightMatchesIterator = rightMatches.generateIterator(rightIndex)
+      while (rightMatchesIterator.hasNext) {
+          joinedRow(leftCurRow, rightMatchesIterator.next())
+          if (boundCondition(joinedRow)) {
+            leftMatched.set(leftIndex)
+            rightMatched.set(rightIndex)
+            rightIndex += 1
+            return true
+          }
           rightIndex += 1
-          return true
-        }
-        rightIndex += 1
       }
       rightIndex = 0
       if (!leftMatched.get(leftIndex)) {
         // the left row has never matched any right row, join it with null row
-        joinedRow(leftMatches(leftIndex), rightNullRow)
+        joinedRow(leftCurRow, rightNullRow)
         leftIndex += 1
         return true
       }
       leftIndex += 1
     }
 
-    while (rightIndex < rightMatches.size) {
+    val rightMatchesIterator = rightMatches.generateIterator(rightIndex)
+    while (rightMatchesIterator.hasNext) {
+      val rightCurRow = rightMatchesIterator.next()
       if (!rightMatched.get(rightIndex)) {
         // the right row has never matched any left row, join it with null row
-        joinedRow(leftNullRow, rightMatches(rightIndex))
+        joinedRow(leftNullRow, rightCurRow)
         rightIndex += 1
         return true
       }
@@ -1100,7 +1110,7 @@ private class SortMergeFullOuterJoinScanner(
 
   def advanceNext(): Boolean = {
     // If we already buffered some matching rows, use them directly
-    if (leftIndex <= leftMatches.size || rightIndex <= rightMatches.size) {
+    if (leftIndex <= leftMatches.length || rightIndex <= rightMatches.length) {
       if (scanNextInBuffered()) {
         return true
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 38c634e1107b..759bfd00cbd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -761,9 +761,7 @@ class JoinSuite extends QueryTest with SharedSQLContext {
         )
       }
 
-      // FULL OUTER JOIN still does not use [[ExternalAppendOnlyUnsafeRowArray]]
-      // so should not cause any spill
-      assertNotSpilled(sparkContext, "full outer join") {
+      assertSpilled(sparkContext, "full outer join") {
         checkAnswer(
           sql(
             """

From d47e7888e01441e84c95a5a88bd46c3f85ac9c95 Mon Sep 17 00:00:00 2001
From: sujithjay <sujith@logistimo.com>
Date: Tue, 21 Aug 2018 13:50:48 +0530
Subject: [PATCH 1072/1072] [SPARK-24985][SQL] Fix OOM in Full Outer Join in
 presence of data skew.

Change SortMergeJoinExec to use ExternalAppendOnlyUnsafeRowArray for Full Outer Join. This would spill data into disk if the buffered rows exceed beyond a threshold, thus preventing OOM errors.

Change corresponding test case in JoinSuite.

[SPARK-24985][SQL] Clean up unused imports

[SPARK-24985][SQL] Remove `copy()` on row instance before adding to `ExternalAppendOnlyUnsafeRowArray`.

`ExternalAppendOnlyUnsafeRowArray` handles copy internally.

[SPARK-24985][SQL] Fix OOM in Full Outer Join in presence of data skew.

Change SortMergeJoinExec to use ExternalAppendOnlyUnsafeRowArray for Full Outer Join. This would spill data into disk if the buffered rows exceed beyond a threshold, thus preventing OOM errors.

Change corresponding test case in JoinSuite.

[SPARK-24985][SQL] Clean up unused imports

[SPARK-24985][SQL] Remove `copy()` on row instance before adding to `ExternalAppendOnlyUnsafeRowArray`.

`ExternalAppendOnlyUnsafeRowArray` handles copy internally.
---
 .../execution/joins/SortMergeJoinExec.scala   | 64 +++++++++++--------
 .../org/apache/spark/sql/JoinSuite.scala      |  4 +-
 2 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index f829f07e8072..6d87624486fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.joins
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -249,7 +247,9 @@ case class SortMergeJoinExec(
             rightIter = RowIterator.fromScala(rightIter),
             boundCondition,
             leftNullRow,
-            rightNullRow)
+            rightNullRow,
+            inMemoryThreshold,
+            spillThreshold)
 
           new FullOuterIterator(
             smjScanner,
@@ -967,7 +967,9 @@ private class SortMergeFullOuterJoinScanner(
     rightIter: RowIterator,
     boundCondition: InternalRow => Boolean,
     leftNullRow: InternalRow,
-    rightNullRow: InternalRow)  {
+    rightNullRow: InternalRow,
+    inMemoryThreshold: Int,
+    spillThreshold: Int)  {
   private[this] val joinedRow: JoinedRow = new JoinedRow()
   private[this] var leftRow: InternalRow = _
   private[this] var leftRowKey: InternalRow = _
@@ -976,8 +978,10 @@ private class SortMergeFullOuterJoinScanner(
 
   private[this] var leftIndex: Int = 0
   private[this] var rightIndex: Int = 0
-  private[this] val leftMatches: ArrayBuffer[InternalRow] = new ArrayBuffer[InternalRow]
-  private[this] val rightMatches: ArrayBuffer[InternalRow] = new ArrayBuffer[InternalRow]
+  private[this] val leftMatches: ExternalAppendOnlyUnsafeRowArray =
+    new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
+  private[this] val rightMatches: ExternalAppendOnlyUnsafeRowArray =
+    new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
   private[this] var leftMatched: BitSet = new BitSet(1)
   private[this] var rightMatched: BitSet = new BitSet(1)
 
@@ -1029,23 +1033,23 @@ private class SortMergeFullOuterJoinScanner(
     rightIndex = 0
 
     while (leftRowKey != null && keyOrdering.compare(leftRowKey, matchingKey) == 0) {
-      leftMatches += leftRow.copy()
+      leftMatches.add(leftRow.asInstanceOf[UnsafeRow])
       advancedLeft()
     }
     while (rightRowKey != null && keyOrdering.compare(rightRowKey, matchingKey) == 0) {
-      rightMatches += rightRow.copy()
+      rightMatches.add(rightRow.asInstanceOf[UnsafeRow])
       advancedRight()
     }
 
-    if (leftMatches.size <= leftMatched.capacity) {
-      leftMatched.clearUntil(leftMatches.size)
+    if (leftMatches.length <= leftMatched.capacity) {
+      leftMatched.clearUntil(leftMatches.length)
     } else {
-      leftMatched = new BitSet(leftMatches.size)
+      leftMatched = new BitSet(leftMatches.length)
     }
-    if (rightMatches.size <= rightMatched.capacity) {
-      rightMatched.clearUntil(rightMatches.size)
+    if (rightMatches.length <= rightMatched.capacity) {
+      rightMatched.clearUntil(rightMatches.length)
     } else {
-      rightMatched = new BitSet(rightMatches.size)
+      rightMatched = new BitSet(rightMatches.length)
     }
   }
 
@@ -1059,31 +1063,37 @@ private class SortMergeFullOuterJoinScanner(
    * @return true if a valid match is found, false otherwise.
    */
   private def scanNextInBuffered(): Boolean = {
-    while (leftIndex < leftMatches.size) {
-      while (rightIndex < rightMatches.size) {
-        joinedRow(leftMatches(leftIndex), rightMatches(rightIndex))
-        if (boundCondition(joinedRow)) {
-          leftMatched.set(leftIndex)
-          rightMatched.set(rightIndex)
+    val leftMatchesIterator = leftMatches.generateIterator(leftIndex)
+
+    while (leftMatchesIterator.hasNext) {
+      val leftCurRow = leftMatchesIterator.next()
+      val rightMatchesIterator = rightMatches.generateIterator(rightIndex)
+      while (rightMatchesIterator.hasNext) {
+          joinedRow(leftCurRow, rightMatchesIterator.next())
+          if (boundCondition(joinedRow)) {
+            leftMatched.set(leftIndex)
+            rightMatched.set(rightIndex)
+            rightIndex += 1
+            return true
+          }
           rightIndex += 1
-          return true
-        }
-        rightIndex += 1
       }
       rightIndex = 0
       if (!leftMatched.get(leftIndex)) {
         // the left row has never matched any right row, join it with null row
-        joinedRow(leftMatches(leftIndex), rightNullRow)
+        joinedRow(leftCurRow, rightNullRow)
         leftIndex += 1
         return true
       }
       leftIndex += 1
     }
 
-    while (rightIndex < rightMatches.size) {
+    val rightMatchesIterator = rightMatches.generateIterator(rightIndex)
+    while (rightMatchesIterator.hasNext) {
+      val rightCurRow = rightMatchesIterator.next()
       if (!rightMatched.get(rightIndex)) {
         // the right row has never matched any left row, join it with null row
-        joinedRow(leftNullRow, rightMatches(rightIndex))
+        joinedRow(leftNullRow, rightCurRow)
         rightIndex += 1
         return true
       }
@@ -1100,7 +1110,7 @@ private class SortMergeFullOuterJoinScanner(
 
   def advanceNext(): Boolean = {
     // If we already buffered some matching rows, use them directly
-    if (leftIndex <= leftMatches.size || rightIndex <= rightMatches.size) {
+    if (leftIndex <= leftMatches.length || rightIndex <= rightMatches.length) {
       if (scanNextInBuffered()) {
         return true
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 38c634e1107b..759bfd00cbd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -761,9 +761,7 @@ class JoinSuite extends QueryTest with SharedSQLContext {
         )
       }
 
-      // FULL OUTER JOIN still does not use [[ExternalAppendOnlyUnsafeRowArray]]
-      // so should not cause any spill
-      assertNotSpilled(sparkContext, "full outer join") {
+      assertSpilled(sparkContext, "full outer join") {
         checkAnswer(
           sql(
             """